Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.

author: Anton Samokhvalov <pg83@yandex.ru> 2022-02-10 16:45:17 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:17 +0300
commit: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
tree: dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib
parent: 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
download: ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
1102 files changed, 265542 insertions, 265542 deletions
diff --git a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make
index 74e8bab708..d6b0823d45 100644
--- a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make
+++ b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core/ya.make
@@ -13,7 +13,7 @@ LICENSE(
     MIT AND
     Zlib
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make
index 7bc59e52a5..df8594d6cc 100644
--- a/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make
+++ b/contrib/libs/aws-sdk-cpp/aws-cpp-sdk-s3/ya.make
@@ -8,8 +8,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/base64/plain32/codec_plain.c b/contrib/libs/base64/plain32/codec_plain.c
index 25ce09e3a1..740d343468 100644
--- a/contrib/libs/base64/plain32/codec_plain.c
+++ b/contrib/libs/base64/plain32/codec_plain.c
@@ -1,7 +1,7 @@
 #include <stdint.h>
 #include <stddef.h>
 #include <stdlib.h>
-#include <string.h> 
+#include <string.h>
 
 #include "libbase64.h"
 #include "codecs.h"
diff --git a/contrib/libs/base64/plain32/dec_uint32.c b/contrib/libs/base64/plain32/dec_uint32.c
index e2829633a2..db701d73d4 100644
--- a/contrib/libs/base64/plain32/dec_uint32.c
+++ b/contrib/libs/base64/plain32/dec_uint32.c
@@ -8,8 +8,8 @@ while (srclen >= 8)
 	uint32_t str, res, dec;
 
 	// Load string:
-	//str = *(uint32_t *)c; 
-    memcpy(&str, c, sizeof(str)); 
+	//str = *(uint32_t *)c;
+    memcpy(&str, c, sizeof(str));
 
 	// Shuffle bytes to 32-bit bigendian:
 	str = cpu_to_be32(str);
@@ -40,8 +40,8 @@ while (srclen >= 8)
 	res = be32_to_cpu(res);
 
 	// Store back:
-	//*(uint32_t *)o = res; 
-    memcpy(o, &res, sizeof(res)); 
+	//*(uint32_t *)o = res;
+    memcpy(o, &res, sizeof(res));
 
 	c += 4;
 	o += 3;
diff --git a/contrib/libs/base64/plain32/enc_uint32.c b/contrib/libs/base64/plain32/enc_uint32.c
index 9e117f0602..1dbe5fbe53 100644
--- a/contrib/libs/base64/plain32/enc_uint32.c
+++ b/contrib/libs/base64/plain32/enc_uint32.c
@@ -3,11 +3,11 @@
 while (srclen >= 4)
 {
 	// Load string:
-	//uint32_t str = *(uint32_t *)c; 
-	uint32_t str; 
+	//uint32_t str = *(uint32_t *)c;
+	uint32_t str;
+
+    memcpy(&str, c, sizeof(str));
 
-    memcpy(&str, c, sizeof(str)); 
- 
 	// Reorder to 32-bit big-endian, if not already in that format. The
 	// workset must be in big-endian, otherwise the shifted bits do not
 	// carry over properly among adjacent bytes:
diff --git a/contrib/libs/base64/plain64/codec_plain.c b/contrib/libs/base64/plain64/codec_plain.c
index 67e5ed88f5..26a5af9097 100644
--- a/contrib/libs/base64/plain64/codec_plain.c
+++ b/contrib/libs/base64/plain64/codec_plain.c
@@ -1,7 +1,7 @@
 #include <stdint.h>
 #include <stddef.h>
 #include <stdlib.h>
-#include <string.h> 
+#include <string.h>
 
 #include "libbase64.h"
 #include "codecs.h"
diff --git a/contrib/libs/base64/plain64/dec_uint64.c b/contrib/libs/base64/plain64/dec_uint64.c
index 8dc787dd8d..fe26e9881b 100644
--- a/contrib/libs/base64/plain64/dec_uint64.c
+++ b/contrib/libs/base64/plain64/dec_uint64.c
@@ -8,8 +8,8 @@ while (srclen >= 13)
 	uint64_t str, res, dec;
 
 	// Load string:
-	//str = *(uint64_t *)c; 
-    memcpy(&str, c, sizeof(str)); 
+	//str = *(uint64_t *)c;
+    memcpy(&str, c, sizeof(str));
 
 	// Shuffle bytes to 64-bit bigendian:
 	str = cpu_to_be64(str);
@@ -60,8 +60,8 @@ while (srclen >= 13)
 	res = be64_to_cpu(res);
 
 	// Store back:
-	//*(uint64_t *)o = res; 
-    memcpy(o, &res, sizeof(res)); 
+	//*(uint64_t *)o = res;
+    memcpy(o, &res, sizeof(res));
 
 	c += 8;
 	o += 6;
diff --git a/contrib/libs/base64/plain64/enc_uint64.c b/contrib/libs/base64/plain64/enc_uint64.c
index 67636b12a0..3d5955af24 100644
--- a/contrib/libs/base64/plain64/enc_uint64.c
+++ b/contrib/libs/base64/plain64/enc_uint64.c
@@ -3,11 +3,11 @@
 while (srclen >= 8)
 {
 	// Load string:
-	//uint64_t str = *(uint64_t *)c; 
-    uint64_t str; 
+	//uint64_t str = *(uint64_t *)c;
+    uint64_t str;
+
+    memcpy(&str, c, sizeof(str));
 
-    memcpy(&str, c, sizeof(str)); 
- 
 	// Reorder to 64-bit big-endian, if not already in that format. The
 	// workset must be in big-endian, otherwise the shifted bits do not
 	// carry over properly among adjacent bytes:
diff --git a/contrib/libs/brotli/LICENSE b/contrib/libs/brotli/LICENSE
index 981ec3b4f3..33b7cdd2db 100644
--- a/contrib/libs/brotli/LICENSE
+++ b/contrib/libs/brotli/LICENSE
@@ -1,15 +1,15 @@
 Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
- 
+
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
- 
+
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
- 
+
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
diff --git a/contrib/libs/brotli/README.md b/contrib/libs/brotli/README.md
index 3874ddfa0f..6b9b0cf0e1 100644
--- a/contrib/libs/brotli/README.md
+++ b/contrib/libs/brotli/README.md
@@ -1,19 +1,19 @@
 <p align="center"><img src="https://brotli.org/brotli.svg" alt="Brotli" width="64"></p>
- 
+
 ### Introduction
 
-Brotli is a generic-purpose lossless compression algorithm that compresses data 
-using a combination of a modern variant of the LZ77 algorithm, Huffman coding 
-and 2nd order context modeling, with a compression ratio comparable to the best 
-currently available general-purpose compression methods. It is similar in speed 
-with deflate but offers more dense compression. 
- 
+Brotli is a generic-purpose lossless compression algorithm that compresses data
+using a combination of a modern variant of the LZ77 algorithm, Huffman coding
+and 2nd order context modeling, with a compression ratio comparable to the best
+currently available general-purpose compression methods. It is similar in speed
+with deflate but offers more dense compression.
+
 The specification of the Brotli Compressed Data Format is defined in [RFC 7932](https://tools.ietf.org/html/rfc7932).
- 
+
 Brotli is open-sourced under the MIT License, see the LICENSE file.
- 
-Brotli mailing list: 
-https://groups.google.com/forum/#!forum/brotli 
+
+Brotli mailing list:
+https://groups.google.com/forum/#!forum/brotli
 
 [![TravisCI Build Status](https://travis-ci.org/google/brotli.svg?branch=master)](https://travis-ci.org/google/brotli)
 [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/google/brotli?branch=master&svg=true)](https://ci.appveyor.com/project/szabadka/brotli)
diff --git a/contrib/libs/brotli/common/ya.make b/contrib/libs/brotli/common/ya.make
index 7419a29f5a..6c4157831c 100644
--- a/contrib/libs/brotli/common/ya.make
+++ b/contrib/libs/brotli/common/ya.make
@@ -1,6 +1,6 @@
 LIBRARY()
 
-LICENSE(MIT) 
+LICENSE(MIT)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
diff --git a/contrib/libs/brotli/dec/bit_reader.c b/contrib/libs/brotli/dec/bit_reader.c
index a685fca6c2..722fd906dd 100644
--- a/contrib/libs/brotli/dec/bit_reader.c
+++ b/contrib/libs/brotli/dec/bit_reader.c
@@ -1,25 +1,25 @@
-/* Copyright 2013 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/ 
- 
-/* Bit reading helpers */ 
- 
+*/
+
+/* Bit reading helpers */
+
 #include "./bit_reader.h"
- 
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 void BrotliInitBitReader(BrotliBitReader* const br) {
-  br->val_ = 0; 
-  br->bit_pos_ = sizeof(br->val_) << 3; 
-} 
- 
+  br->val_ = 0;
+  br->bit_pos_ = sizeof(br->val_) << 3;
+}
+
 BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
   size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1;
   /* Fixing alignment after unaligned BrotliFillWindow would result accumulator
@@ -31,8 +31,8 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
   if (BrotliGetAvailableBits(br) == 0) {
     if (!BrotliPullByte(br)) {
       return BROTLI_FALSE;
-    } 
-  } 
+    }
+  }
 
   while ((((size_t)br->next_in) & aligned_read_mask) != 0) {
     if (!BrotliPullByte(br)) {
@@ -41,8 +41,8 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
     }
   }
   return BROTLI_TRUE;
-} 
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
+#endif
diff --git a/contrib/libs/brotli/dec/bit_reader.h b/contrib/libs/brotli/dec/bit_reader.h
index 732072bfdd..c06e91419f 100644
--- a/contrib/libs/brotli/dec/bit_reader.h
+++ b/contrib/libs/brotli/dec/bit_reader.h
@@ -1,25 +1,25 @@
-/* Copyright 2013 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/ 
- 
-/* Bit reading helpers */ 
- 
-#ifndef BROTLI_DEC_BIT_READER_H_ 
-#define BROTLI_DEC_BIT_READER_H_ 
- 
+*/
+
+/* Bit reading helpers */
+
+#ifndef BROTLI_DEC_BIT_READER_H_
+#define BROTLI_DEC_BIT_READER_H_
+
 #include <string.h>  /* memcpy */
- 
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 #define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
- 
+
 static const uint32_t kBitMask[33] = {  0x00000000,
     0x00000001, 0x00000003, 0x00000007, 0x0000000F,
     0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
@@ -30,7 +30,7 @@ static const uint32_t kBitMask[33] = {  0x00000000,
     0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
     0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
 };
- 
+
 static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
   if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
     /* Masking with this expression turns to a single
@@ -41,20 +41,20 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
   }
 }
 
-typedef struct { 
+typedef struct {
   brotli_reg_t val_;       /* pre-fetched bits */
   uint32_t bit_pos_;       /* current bit-reading position in val_ */
   const uint8_t* next_in;  /* the byte we're reading from */
   size_t avail_in;
-} BrotliBitReader; 
- 
+} BrotliBitReader;
+
 typedef struct {
   brotli_reg_t val_;
   uint32_t bit_pos_;
   const uint8_t* next_in;
   size_t avail_in;
 } BrotliBitReaderState;
- 
+
 /* Initializes the BrotliBitReader fields. */
 BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
 
@@ -64,7 +64,7 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
    For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
    reading. */
 BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
- 
+
 static BROTLI_INLINE void BrotliBitReaderSaveState(
     BrotliBitReader* const from, BrotliBitReaderState* to) {
   to->val_ = from->val_;
@@ -72,7 +72,7 @@ static BROTLI_INLINE void BrotliBitReaderSaveState(
   to->next_in = from->next_in;
   to->avail_in = from->avail_in;
 }
- 
+
 static BROTLI_INLINE void BrotliBitReaderRestoreState(
     BrotliBitReader* const to, BrotliBitReaderState* from) {
   to->val_ = from->val_;
@@ -80,15 +80,15 @@ static BROTLI_INLINE void BrotliBitReaderRestoreState(
   to->next_in = from->next_in;
   to->avail_in = from->avail_in;
 }
- 
+
 static BROTLI_INLINE uint32_t BrotliGetAvailableBits(
     const BrotliBitReader* br) {
   return (BROTLI_64_BITS ? 64 : 32) - br->bit_pos_;
-} 
- 
-/* Returns amount of unread bytes the bit reader still has buffered from the 
-   BrotliInput, including whole bytes in br->val_. */ 
-static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) { 
+}
+
+/* Returns amount of unread bytes the bit reader still has buffered from the
+   BrotliInput, including whole bytes in br->val_. */
+static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
   return br->avail_in + (BrotliGetAvailableBits(br) >> 3);
 }
 
@@ -100,59 +100,59 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
 }
 
 /* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
-   Precondition: accumulator contains at least 1 bit. 
+   Precondition: accumulator contains at least 1 bit.
    |n_bits| should be in the range [1..24] for regular build. For portable
    non-64-bit little-endian build only 16 bits are safe to request. */
-static BROTLI_INLINE void BrotliFillBitWindow( 
+static BROTLI_INLINE void BrotliFillBitWindow(
     BrotliBitReader* const br, uint32_t n_bits) {
 #if (BROTLI_64_BITS)
   if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
-    if (br->bit_pos_ >= 56) { 
-      br->val_ >>= 56; 
-      br->bit_pos_ ^= 56;  /* here same as -= 56 because of the if condition */ 
+    if (br->bit_pos_ >= 56) {
+      br->val_ >>= 56;
+      br->bit_pos_ ^= 56;  /* here same as -= 56 because of the if condition */
       br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8;
-      br->avail_in -= 7; 
-      br->next_in += 7; 
-    } 
+      br->avail_in -= 7;
+      br->next_in += 7;
+    }
   } else if (
       !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) {
-    if (br->bit_pos_ >= 48) { 
-      br->val_ >>= 48; 
-      br->bit_pos_ ^= 48;  /* here same as -= 48 because of the if condition */ 
+    if (br->bit_pos_ >= 48) {
+      br->val_ >>= 48;
+      br->bit_pos_ ^= 48;  /* here same as -= 48 because of the if condition */
       br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16;
-      br->avail_in -= 6; 
-      br->next_in += 6; 
-    } 
-  } else { 
-    if (br->bit_pos_ >= 32) { 
-      br->val_ >>= 32; 
-      br->bit_pos_ ^= 32;  /* here same as -= 32 because of the if condition */ 
+      br->avail_in -= 6;
+      br->next_in += 6;
+    }
+  } else {
+    if (br->bit_pos_ >= 32) {
+      br->val_ >>= 32;
+      br->bit_pos_ ^= 32;  /* here same as -= 32 because of the if condition */
       br->val_ |= ((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32;
       br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
       br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
-    } 
-  } 
+    }
+  }
 #else
   if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
-    if (br->bit_pos_ >= 24) { 
-      br->val_ >>= 24; 
-      br->bit_pos_ ^= 24;  /* here same as -= 24 because of the if condition */ 
+    if (br->bit_pos_ >= 24) {
+      br->val_ >>= 24;
+      br->bit_pos_ ^= 24;  /* here same as -= 24 because of the if condition */
       br->val_ |= BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8;
-      br->avail_in -= 3; 
-      br->next_in += 3; 
-    } 
-  } else { 
-    if (br->bit_pos_ >= 16) { 
-      br->val_ >>= 16; 
-      br->bit_pos_ ^= 16;  /* here same as -= 16 because of the if condition */ 
+      br->avail_in -= 3;
+      br->next_in += 3;
+    }
+  } else {
+    if (br->bit_pos_ >= 16) {
+      br->val_ >>= 16;
+      br->bit_pos_ ^= 16;  /* here same as -= 16 because of the if condition */
       br->val_ |= ((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16;
       br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
       br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
-    } 
-  } 
-#endif 
-} 
- 
+    }
+  }
+#endif
+}
+
 /* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
    more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
 static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
@@ -165,25 +165,25 @@ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
   if (br->avail_in == 0) {
     return BROTLI_FALSE;
   }
-  br->val_ >>= 8; 
+  br->val_ >>= 8;
 #if (BROTLI_64_BITS)
   br->val_ |= ((uint64_t)*br->next_in) << 56;
-#else 
+#else
   br->val_ |= ((uint32_t)*br->next_in) << 24;
-#endif 
-  br->bit_pos_ -= 8; 
-  --br->avail_in; 
-  ++br->next_in; 
+#endif
+  br->bit_pos_ -= 8;
+  --br->avail_in;
+  ++br->next_in;
   return BROTLI_TRUE;
-} 
- 
+}
+
 /* Returns currently available bits.
    The number of valid bits could be calculated by BrotliGetAvailableBits. */
 static BROTLI_INLINE brotli_reg_t BrotliGetBitsUnmasked(
     BrotliBitReader* const br) {
   return br->val_ >> br->bit_pos_;
-} 
- 
+}
+
 /* Like BrotliGetBits, but does not mask the result.
    The result contains at least 16 valid bits. */
 static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
@@ -194,12 +194,12 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
 
 /* Returns the specified number of bits from |br| without advancing bit
    position. */
-static BROTLI_INLINE uint32_t BrotliGetBits( 
+static BROTLI_INLINE uint32_t BrotliGetBits(
     BrotliBitReader* const br, uint32_t n_bits) {
-  BrotliFillBitWindow(br, n_bits); 
+  BrotliFillBitWindow(br, n_bits);
   return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
-} 
- 
+}
+
 /* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there
    is not enough input. */
 static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
@@ -214,11 +214,11 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
 }
 
 /* Advances the bit pos by |n_bits|. */
-static BROTLI_INLINE void BrotliDropBits( 
+static BROTLI_INLINE void BrotliDropBits(
     BrotliBitReader* const br, uint32_t n_bits) {
   br->bit_pos_ += n_bits;
-} 
- 
+}
+
 static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
   uint32_t unused_bytes = BrotliGetAvailableBits(br) >> 3;
   uint32_t unused_bits = unused_bytes << 3;
@@ -234,17 +234,17 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
 
 /* Reads the specified number of bits from |br| and advances the bit pos.
    Precondition: accumulator MUST contain at least |n_bits|. */
-static BROTLI_INLINE void BrotliTakeBits( 
+static BROTLI_INLINE void BrotliTakeBits(
   BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
   *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
   BROTLI_LOG(("[BrotliReadBits]  %d %d %d val: %6x\n",
       (int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
   BrotliDropBits(br, n_bits);
-} 
- 
+}
+
 /* Reads the specified number of bits from |br| and advances the bit pos.
-   Assumes that there is enough input to perform BrotliFillBitWindow. */ 
-static BROTLI_INLINE uint32_t BrotliReadBits( 
+   Assumes that there is enough input to perform BrotliFillBitWindow. */
+static BROTLI_INLINE uint32_t BrotliReadBits(
     BrotliBitReader* const br, uint32_t n_bits) {
   if (BROTLI_64_BITS || (n_bits <= 16)) {
     uint32_t val;
@@ -260,8 +260,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits(
     BrotliTakeBits(br, n_bits - 16, &high_val);
     return low_val | (high_val << 16);
   }
-} 
- 
+}
+
 /* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
    is not enough input. |n_bits| MUST be positive. */
 static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
@@ -269,41 +269,41 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
   while (BrotliGetAvailableBits(br) < n_bits) {
     if (!BrotliPullByte(br)) {
       return BROTLI_FALSE;
-    } 
-  } 
-  BrotliTakeBits(br, n_bits, val); 
+    }
+  }
+  BrotliTakeBits(br, n_bits, val);
   return BROTLI_TRUE;
-} 
- 
-/* Advances the bit reader position to the next byte boundary and verifies 
-   that any skipped bits are set to zero. */ 
+}
+
+/* Advances the bit reader position to the next byte boundary and verifies
+   that any skipped bits are set to zero. */
 static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
   uint32_t pad_bits_count = BrotliGetAvailableBits(br) & 0x7;
-  uint32_t pad_bits = 0; 
-  if (pad_bits_count != 0) { 
-    BrotliTakeBits(br, pad_bits_count, &pad_bits); 
-  } 
+  uint32_t pad_bits = 0;
+  if (pad_bits_count != 0) {
+    BrotliTakeBits(br, pad_bits_count, &pad_bits);
+  }
   return TO_BROTLI_BOOL(pad_bits == 0);
-} 
- 
-/* Copies remaining input bytes stored in the bit reader to the output. Value 
+}
+
+/* Copies remaining input bytes stored in the bit reader to the output. Value
    |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
-   warmed up again after this. */ 
-static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, 
-                                          BrotliBitReader* br, size_t num) { 
+   warmed up again after this. */
+static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
+                                          BrotliBitReader* br, size_t num) {
   while (BrotliGetAvailableBits(br) >= 8 && num > 0) {
     *dest = (uint8_t)BrotliGetBitsUnmasked(br);
     BrotliDropBits(br, 8);
-    ++dest; 
-    --num; 
-  } 
-  memcpy(dest, br->next_in, num); 
+    ++dest;
+    --num;
+  }
+  memcpy(dest, br->next_in, num);
   br->avail_in -= num;
-  br->next_in += num; 
-} 
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
+  br->next_in += num;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
- 
-#endif  /* BROTLI_DEC_BIT_READER_H_ */ 
+#endif
+
+#endif  /* BROTLI_DEC_BIT_READER_H_ */
diff --git a/contrib/libs/brotli/dec/decode.c b/contrib/libs/brotli/dec/decode.c
index ee898d4372..08bd76ca16 100644
--- a/contrib/libs/brotli/dec/decode.c
+++ b/contrib/libs/brotli/dec/decode.c
@@ -1,11 +1,11 @@
-/* Copyright 2013 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
- 
+
 #include <brotli/decode.h>
- 
+
 #include <stdlib.h>  /* free, malloc */
 #include <string.h>  /* memcpy, memset */
 
@@ -15,48 +15,48 @@
 #include "../common/platform.h"
 #include "../common/transform.h"
 #include "../common/version.h"
-#include "./bit_reader.h" 
+#include "./bit_reader.h"
 #include "./huffman.h"
 #include "./prefix.h"
 #include "./state.h"
- 
+
 #if defined(BROTLI_TARGET_NEON)
 #include <arm_neon.h>
 #endif
 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 #define BROTLI_FAILURE(CODE) (BROTLI_DUMP(), CODE)
- 
+
 #define BROTLI_LOG_UINT(name)                                       \
   BROTLI_LOG(("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name)))
 #define BROTLI_LOG_ARRAY_INDEX(array_name, idx)                     \
   BROTLI_LOG(("[%s] %s[%lu] = %lu\n", __func__, #array_name,        \
          (unsigned long)(idx), (unsigned long)array_name[idx]))
- 
+
 #define HUFFMAN_TABLE_BITS 8U
 #define HUFFMAN_TABLE_MASK 0xFF
- 
+
 /* We need the slack region for the following reasons:
     - doing up to two 16-byte copies for fast backward copying
     - inserting transformed dictionary word (5 prefix + 24 base + 8 suffix) */
 static const uint32_t kRingBufferWriteAheadSlack = 42;
 
 static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = {
-  1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
-}; 
- 
-/* Static prefix code for the complex code length code lengths. */ 
-static const uint8_t kCodeLengthPrefixLength[16] = { 
-  2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4, 
-}; 
- 
-static const uint8_t kCodeLengthPrefixValue[16] = { 
-  0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5, 
-}; 
- 
+  1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+
+/* Static prefix code for the complex code length code lengths. */
+static const uint8_t kCodeLengthPrefixLength[16] = {
+  2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4,
+};
+
+static const uint8_t kCodeLengthPrefixValue[16] = {
+  0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5,
+};
+
 BROTLI_BOOL BrotliDecoderSetParameter(
     BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) {
   if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
@@ -64,7 +64,7 @@ BROTLI_BOOL BrotliDecoderSetParameter(
     case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION:
       state->canny_ringbuffer_allocation = !!value ? 0 : 1;
       return BROTLI_TRUE;
- 
+
     case BROTLI_DECODER_PARAM_LARGE_WINDOW:
       state->large_window = TO_BROTLI_BOOL(!!value);
       return BROTLI_TRUE;
@@ -132,20 +132,20 @@ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
    Precondition: bit-reader accumulator has at least 8 bits. */
 static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
                                                BrotliBitReader* br) {
-  uint32_t n; 
+  uint32_t n;
   BROTLI_BOOL large_window = s->large_window;
   s->large_window = BROTLI_FALSE;
-  BrotliTakeBits(br, 1, &n); 
-  if (n == 0) { 
+  BrotliTakeBits(br, 1, &n);
+  if (n == 0) {
     s->window_bits = 16;
     return BROTLI_DECODER_SUCCESS;
-  } 
-  BrotliTakeBits(br, 3, &n); 
-  if (n != 0) { 
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n != 0) {
     s->window_bits = 17 + n;
     return BROTLI_DECODER_SUCCESS;
-  } 
-  BrotliTakeBits(br, 3, &n); 
+  }
+  BrotliTakeBits(br, 3, &n);
   if (n == 1) {
     if (large_window) {
       BrotliTakeBits(br, 1, &n);
@@ -158,188 +158,188 @@ static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
       return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
     }
   }
-  if (n != 0) { 
+  if (n != 0) {
     s->window_bits = 8 + n;
     return BROTLI_DECODER_SUCCESS;
-  } 
+  }
   s->window_bits = 17;
   return BROTLI_DECODER_SUCCESS;
-} 
- 
+}
+
 static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) {
 #if defined(BROTLI_TARGET_NEON)
-  vst1q_u8(dst, vld1q_u8(src)); 
-#else 
+  vst1q_u8(dst, vld1q_u8(src));
+#else
   uint32_t buffer[4];
   memcpy(buffer, src, 16);
   memcpy(dst, buffer, 16);
-#endif 
-} 
- 
-/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */ 
+#endif
+}
+
+/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */
 static BROTLI_NOINLINE BrotliDecoderErrorCode DecodeVarLenUint8(
     BrotliDecoderState* s, BrotliBitReader* br, uint32_t* value) {
-  uint32_t bits; 
-  switch (s->substate_decode_uint8) { 
-    case BROTLI_STATE_DECODE_UINT8_NONE: 
+  uint32_t bits;
+  switch (s->substate_decode_uint8) {
+    case BROTLI_STATE_DECODE_UINT8_NONE:
       if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 1, &bits))) {
         return BROTLI_DECODER_NEEDS_MORE_INPUT;
-      } 
-      if (bits == 0) { 
-        *value = 0; 
+      }
+      if (bits == 0) {
+        *value = 0;
         return BROTLI_DECODER_SUCCESS;
-      } 
+      }
     /* Fall through. */
- 
-    case BROTLI_STATE_DECODE_UINT8_SHORT: 
+
+    case BROTLI_STATE_DECODE_UINT8_SHORT:
       if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 3, &bits))) {
-        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT; 
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT;
         return BROTLI_DECODER_NEEDS_MORE_INPUT;
-      } 
-      if (bits == 0) { 
-        *value = 1; 
-        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; 
+      }
+      if (bits == 0) {
+        *value = 1;
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
         return BROTLI_DECODER_SUCCESS;
-      } 
-      /* Use output value as a temporary storage. It MUST be persisted. */ 
+      }
+      /* Use output value as a temporary storage. It MUST be persisted. */
       *value = bits;
     /* Fall through. */
- 
-    case BROTLI_STATE_DECODE_UINT8_LONG: 
+
+    case BROTLI_STATE_DECODE_UINT8_LONG:
       if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, *value, &bits))) {
-        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG; 
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG;
         return BROTLI_DECODER_NEEDS_MORE_INPUT;
-      } 
+      }
       *value = (1U << *value) + bits;
-      s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; 
+      s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
       return BROTLI_DECODER_SUCCESS;
- 
-    default: 
+
+    default:
       return
           BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
-  } 
-} 
- 
-/* Decodes a metablock length and flags by reading 2 - 31 bits. */ 
+  }
+}
+
+/* Decodes a metablock length and flags by reading 2 - 31 bits. */
 static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
     BrotliDecoderState* s, BrotliBitReader* br) {
-  uint32_t bits; 
-  int i; 
-  for (;;) { 
-    switch (s->substate_metablock_header) { 
-      case BROTLI_STATE_METABLOCK_HEADER_NONE: 
-        if (!BrotliSafeReadBits(br, 1, &bits)) { 
+  uint32_t bits;
+  int i;
+  for (;;) {
+    switch (s->substate_metablock_header) {
+      case BROTLI_STATE_METABLOCK_HEADER_NONE:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
+        }
         s->is_last_metablock = bits ? 1 : 0;
-        s->meta_block_remaining_len = 0; 
-        s->is_uncompressed = 0; 
-        s->is_metadata = 0; 
-        if (!s->is_last_metablock) { 
-          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES; 
-          break; 
-        } 
-        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY; 
+        s->meta_block_remaining_len = 0;
+        s->is_uncompressed = 0;
+        s->is_metadata = 0;
+        if (!s->is_last_metablock) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY;
       /* Fall through. */
- 
-      case BROTLI_STATE_METABLOCK_HEADER_EMPTY: 
-        if (!BrotliSafeReadBits(br, 1, &bits)) { 
+
+      case BROTLI_STATE_METABLOCK_HEADER_EMPTY:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
-        if (bits) { 
-          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; 
+        }
+        if (bits) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
           return BROTLI_DECODER_SUCCESS;
-        } 
-        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES; 
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
       /* Fall through. */
- 
-      case BROTLI_STATE_METABLOCK_HEADER_NIBBLES: 
-        if (!BrotliSafeReadBits(br, 2, &bits)) { 
+
+      case BROTLI_STATE_METABLOCK_HEADER_NIBBLES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
-        s->size_nibbles = (uint8_t)(bits + 4); 
-        s->loop_counter = 0; 
-        if (bits == 3) { 
-          s->is_metadata = 1; 
-          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED; 
-          break; 
-        } 
-        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE; 
+        }
+        s->size_nibbles = (uint8_t)(bits + 4);
+        s->loop_counter = 0;
+        if (bits == 3) {
+          s->is_metadata = 1;
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE;
       /* Fall through. */
- 
-      case BROTLI_STATE_METABLOCK_HEADER_SIZE: 
-        i = s->loop_counter; 
+
+      case BROTLI_STATE_METABLOCK_HEADER_SIZE:
+        i = s->loop_counter;
         for (; i < (int)s->size_nibbles; ++i) {
-          if (!BrotliSafeReadBits(br, 4, &bits)) { 
-            s->loop_counter = i; 
+          if (!BrotliSafeReadBits(br, 4, &bits)) {
+            s->loop_counter = i;
             return BROTLI_DECODER_NEEDS_MORE_INPUT;
-          } 
-          if (i + 1 == s->size_nibbles && s->size_nibbles > 4 && bits == 0) { 
+          }
+          if (i + 1 == s->size_nibbles && s->size_nibbles > 4 && bits == 0) {
             return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE);
-          } 
-          s->meta_block_remaining_len |= (int)(bits << (i * 4)); 
-        } 
-        s->substate_metablock_header = 
-            BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED; 
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 4));
+        }
+        s->substate_metablock_header =
+            BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED;
       /* Fall through. */
- 
-      case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED: 
+
+      case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED:
         if (!s->is_last_metablock) {
-          if (!BrotliSafeReadBits(br, 1, &bits)) { 
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
             return BROTLI_DECODER_NEEDS_MORE_INPUT;
-          } 
+          }
           s->is_uncompressed = bits ? 1 : 0;
-        } 
-        ++s->meta_block_remaining_len; 
-        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; 
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
         return BROTLI_DECODER_SUCCESS;
- 
-      case BROTLI_STATE_METABLOCK_HEADER_RESERVED: 
-        if (!BrotliSafeReadBits(br, 1, &bits)) { 
+
+      case BROTLI_STATE_METABLOCK_HEADER_RESERVED:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
-        if (bits != 0) { 
+        }
+        if (bits != 0) {
           return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_RESERVED);
-        } 
-        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES; 
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES;
       /* Fall through. */
- 
-      case BROTLI_STATE_METABLOCK_HEADER_BYTES: 
-        if (!BrotliSafeReadBits(br, 2, &bits)) { 
+
+      case BROTLI_STATE_METABLOCK_HEADER_BYTES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
-        if (bits == 0) { 
-          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; 
+        }
+        if (bits == 0) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
           return BROTLI_DECODER_SUCCESS;
-        } 
-        s->size_nibbles = (uint8_t)bits; 
-        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA; 
+        }
+        s->size_nibbles = (uint8_t)bits;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA;
       /* Fall through. */
- 
-      case BROTLI_STATE_METABLOCK_HEADER_METADATA: 
-        i = s->loop_counter; 
+
+      case BROTLI_STATE_METABLOCK_HEADER_METADATA:
+        i = s->loop_counter;
         for (; i < (int)s->size_nibbles; ++i) {
-          if (!BrotliSafeReadBits(br, 8, &bits)) { 
-            s->loop_counter = i; 
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            s->loop_counter = i;
             return BROTLI_DECODER_NEEDS_MORE_INPUT;
-          } 
-          if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) { 
+          }
+          if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) {
             return BROTLI_FAILURE(
                 BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE);
-          } 
-          s->meta_block_remaining_len |= (int)(bits << (i * 8)); 
-        } 
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 8));
+        }
         ++s->meta_block_remaining_len;
         s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
         return BROTLI_DECODER_SUCCESS;
- 
-      default: 
+
+      default:
         return
             BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
-    } 
-  } 
-} 
- 
+    }
+  }
+}
+
 /* Decodes the Huffman code.
    This method doesn't read data from the bit reader, BUT drops the amount of
    bits that correspond to the decoded symbol.
@@ -351,15 +351,15 @@ static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits,
   BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK);
   if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) {
     uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS;
-    BrotliDropBits(br, HUFFMAN_TABLE_BITS); 
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
     BROTLI_HC_ADJUST_TABLE_INDEX(table,
         BROTLI_HC_FAST_LOAD_VALUE(table) +
         ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits)));
-  } 
+  }
   BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
   return BROTLI_HC_FAST_LOAD_VALUE(table);
-} 
- 
+}
+
 /* Reads and decodes the next Huffman code from bit-stream.
    This method peeks 16 bits of input and drops 0 - 15 of them. */
 static BROTLI_INLINE uint32_t ReadSymbol(const HuffmanCode* table,
@@ -419,10 +419,10 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadSymbol(
   return SafeDecodeSymbol(table, br, result);
 }
 
-/* Makes a look-up in first level Huffman table. Peeks 8 bits. */ 
+/* Makes a look-up in first level Huffman table. Peeks 8 bits. */
 static BROTLI_INLINE void PreloadSymbol(int safe,
                                         const HuffmanCode* table,
-                                        BrotliBitReader* br, 
+                                        BrotliBitReader* br,
                                         uint32_t* bits,
                                         uint32_t* value) {
   if (safe) {
@@ -432,40 +432,40 @@ static BROTLI_INLINE void PreloadSymbol(int safe,
   BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS));
   *bits = BROTLI_HC_FAST_LOAD_BITS(table);
   *value = BROTLI_HC_FAST_LOAD_VALUE(table);
-} 
- 
-/* Decodes the next Huffman code using data prepared by PreloadSymbol. 
-   Reads 0 - 15 bits. Also peeks 8 following bits. */ 
+}
+
+/* Decodes the next Huffman code using data prepared by PreloadSymbol.
+   Reads 0 - 15 bits. Also peeks 8 following bits. */
 static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table,
-                                                  BrotliBitReader* br, 
+                                                  BrotliBitReader* br,
                                                   uint32_t* bits,
                                                   uint32_t* value) {
   uint32_t result = *value;
   if (BROTLI_PREDICT_FALSE(*bits > HUFFMAN_TABLE_BITS)) {
     uint32_t val = BrotliGet16BitsUnmasked(br);
-    const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value; 
+    const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value;
     uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS));
     BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext);
-    BrotliDropBits(br, HUFFMAN_TABLE_BITS); 
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
     BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask);
     BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext));
     result = BROTLI_HC_FAST_LOAD_VALUE(ext);
-  } else { 
+  } else {
     BrotliDropBits(br, *bits);
-  } 
+  }
   PreloadSymbol(0, table, br, bits, value);
-  return result; 
-} 
- 
+  return result;
+}
+
 static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
   uint32_t result = 0;
-  while (x) { 
-    x >>= 1; 
-    ++result; 
-  } 
-  return result; 
-} 
- 
+  while (x) {
+    x >>= 1;
+    ++result;
+  }
+  return result;
+}
+
 /* Reads (s->symbol + 1) symbols.
    Totally 1..4 symbols are read, 1..11 bits each.
    The list of symbols MUST NOT contain duplicates. */
@@ -726,24 +726,24 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
   return BROTLI_DECODER_SUCCESS;
 }
 
-/* Decodes the Huffman tables. 
-   There are 2 scenarios: 
-    A) Huffman code contains only few symbols (1..4). Those symbols are read 
-       directly; their code lengths are defined by the number of symbols. 
+/* Decodes the Huffman tables.
+   There are 2 scenarios:
+    A) Huffman code contains only few symbols (1..4). Those symbols are read
+       directly; their code lengths are defined by the number of symbols.
        For this scenario 4 - 49 bits will be read.
- 
-    B) 2-phase decoding: 
-    B.1) Small Huffman table is decoded; it is specified with code lengths 
-         encoded with predefined entropy code. 32 - 74 bits are used. 
-    B.2) Decoded table is used to decode code lengths of symbols in resulting 
+
+    B) 2-phase decoding:
+    B.1) Small Huffman table is decoded; it is specified with code lengths
+         encoded with predefined entropy code. 32 - 74 bits are used.
+    B.2) Decoded table is used to decode code lengths of symbols in resulting
          Huffman table. In worst case 3520 bits are read. */
 static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
                                               uint32_t max_symbol,
                                               HuffmanCode* table,
                                               uint32_t* opt_table_size,
                                               BrotliDecoderState* s) {
-  BrotliBitReader* br = &s->br; 
-  /* Unnecessary masking, but might be good for safety. */ 
+  BrotliBitReader* br = &s->br;
+  /* Unnecessary masking, but might be good for safety. */
   alphabet_size &= 0x7FF;
   /* State machine. */
   for (;;) {
@@ -769,11 +769,11 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
       /* Fall through. */
 
       case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
-        /* Read symbols, codes & code lengths directly. */ 
+        /* Read symbols, codes & code lengths directly. */
         if (!BrotliSafeReadBits(br, 2, &s->symbol)) {  /* num_symbols */
           s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
+        }
         s->sub_loop_counter = 0;
       /* Fall through. */
 
@@ -793,16 +793,16 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
           if (!BrotliSafeReadBits(br, 1, &bits)) {
             s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
             return BROTLI_DECODER_NEEDS_MORE_INPUT;
-          } 
+          }
           s->symbol += bits;
-        } 
+        }
         BROTLI_LOG_UINT(s->symbol);
         table_size = BrotliBuildSimpleHuffmanTable(
             table, HUFFMAN_TABLE_BITS, s->symbols_lists_array, s->symbol);
-        if (opt_table_size) { 
+        if (opt_table_size) {
           *opt_table_size = table_size;
-        } 
-        s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; 
+        }
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
         return BROTLI_DECODER_SUCCESS;
       }
 
@@ -812,7 +812,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
         BrotliDecoderErrorCode result = ReadCodeLengthCodeLengths(s);
         if (result != BROTLI_DECODER_SUCCESS) {
           return result;
-        } 
+        }
         BrotliBuildCodeLengthsHuffmanTable(s->table,
                                            s->code_length_code_lengths,
                                            s->code_length_histo);
@@ -820,7 +820,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
         for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) {
           s->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
           s->symbol_lists[s->next_symbol[i]] = 0xFFFF;
-        } 
+        }
 
         s->symbol = 0;
         s->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
@@ -828,7 +828,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
         s->repeat_code_len = 0;
         s->space = 32768;
         s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
-      } 
+      }
       /* Fall through. */
 
       case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
@@ -840,37 +840,37 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
         if (result != BROTLI_DECODER_SUCCESS) {
           return result;
         }
- 
+
         if (s->space != 0) {
           BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)s->space));
           return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE);
-        } 
+        }
         table_size = BrotliBuildHuffmanTable(
             table, HUFFMAN_TABLE_BITS, s->symbol_lists, s->code_length_histo);
-        if (opt_table_size) { 
-          *opt_table_size = table_size; 
-        } 
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
         s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
         return BROTLI_DECODER_SUCCESS;
-      } 
+      }
 
       default:
         return
             BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
-    } 
-  } 
-} 
- 
-/* Decodes a block length by reading 3..39 bits. */ 
+    }
+  }
+}
+
+/* Decodes a block length by reading 3..39 bits. */
 static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
                                               BrotliBitReader* br) {
   uint32_t code;
   uint32_t nbits;
-  code = ReadSymbol(table, br); 
+  code = ReadSymbol(table, br);
   nbits = kBlockLengthPrefixCode[code].nbits;  /* nbits == 2..24 */
   return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
-} 
- 
+}
+
 /* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then
    reading can't be continued with ReadBlockLength. */
 static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
@@ -898,114 +898,114 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
   }
 }
 
-/* Transform: 
-    1) initialize list L with values 0, 1,... 255 
-    2) For each input element X: 
-    2.1) let Y = L[X] 
-    2.2) remove X-th element from L 
-    2.3) prepend Y to L 
-    2.4) append Y to output 
- 
-   In most cases max(Y) <= 7, so most of L remains intact. 
-   To reduce the cost of initialization, we reuse L, remember the upper bound 
-   of Y values, and reinitialize only first elements in L. 
- 
-   Most of input values are 0 and 1. To reduce number of branches, we replace 
+/* Transform:
+    1) initialize list L with values 0, 1,... 255
+    2) For each input element X:
+    2.1) let Y = L[X]
+    2.2) remove X-th element from L
+    2.3) prepend Y to L
+    2.4) append Y to output
+
+   In most cases max(Y) <= 7, so most of L remains intact.
+   To reduce the cost of initialization, we reuse L, remember the upper bound
+   of Y values, and reinitialize only first elements in L.
+
+   Most of input values are 0 and 1. To reduce number of branches, we replace
    inner for loop with do-while. */
 static BROTLI_NOINLINE void InverseMoveToFrontTransform(
     uint8_t* v, uint32_t v_len, BrotliDecoderState* state) {
-  /* Reinitialize elements that could have been changed. */ 
+  /* Reinitialize elements that could have been changed. */
   uint32_t i = 1;
   uint32_t upper_bound = state->mtf_upper_bound;
   uint32_t* mtf = &state->mtf[1];  /* Make mtf[-1] addressable. */
   uint8_t* mtf_u8 = (uint8_t*)mtf;
-  /* Load endian-aware constant. */ 
-  const uint8_t b0123[4] = {0, 1, 2, 3}; 
-  uint32_t pattern; 
-  memcpy(&pattern, &b0123, 4); 
- 
-  /* Initialize list using 4 consequent values pattern. */ 
+  /* Load endian-aware constant. */
+  const uint8_t b0123[4] = {0, 1, 2, 3};
+  uint32_t pattern;
+  memcpy(&pattern, &b0123, 4);
+
+  /* Initialize list using 4 consequent values pattern. */
   mtf[0] = pattern;
-  do { 
+  do {
     pattern += 0x04040404;  /* Advance all 4 values by 4. */
     mtf[i] = pattern;
     i++;
-  } while (i <= upper_bound); 
- 
-  /* Transform the input. */ 
-  upper_bound = 0; 
-  for (i = 0; i < v_len; ++i) { 
-    int index = v[i]; 
+  } while (i <= upper_bound);
+
+  /* Transform the input. */
+  upper_bound = 0;
+  for (i = 0; i < v_len; ++i) {
+    int index = v[i];
     uint8_t value = mtf_u8[index];
     upper_bound |= v[i];
-    v[i] = value; 
+    v[i] = value;
     mtf_u8[-1] = value;
-    do { 
-      index--; 
+    do {
+      index--;
       mtf_u8[index + 1] = mtf_u8[index];
     } while (index >= 0);
-  } 
-  /* Remember amount of elements to be reinitialized. */ 
+  }
+  /* Remember amount of elements to be reinitialized. */
   state->mtf_upper_bound = upper_bound >> 2;
-} 
- 
-/* Decodes a series of Huffman table using ReadHuffmanCode function. */ 
+}
+
+/* Decodes a series of Huffman table using ReadHuffmanCode function. */
 static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
     HuffmanTreeGroup* group, BrotliDecoderState* s) {
-  if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) { 
-    s->next = group->codes; 
-    s->htree_index = 0; 
-    s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP; 
-  } 
-  while (s->htree_index < group->num_htrees) { 
+  if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
+    s->next = group->codes;
+    s->htree_index = 0;
+    s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
+  }
+  while (s->htree_index < group->num_htrees) {
     uint32_t table_size;
     BrotliDecoderErrorCode result =
         ReadHuffmanCode(group->alphabet_size, group->max_symbol,
                         s->next, &table_size, s);
     if (result != BROTLI_DECODER_SUCCESS) return result;
-    group->htrees[s->htree_index] = s->next; 
-    s->next += table_size; 
-    ++s->htree_index; 
-  } 
-  s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; 
+    group->htrees[s->htree_index] = s->next;
+    s->next += table_size;
+    ++s->htree_index;
+  }
+  s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
   return BROTLI_DECODER_SUCCESS;
-} 
- 
-/* Decodes a context map. 
-   Decoding is done in 4 phases: 
-    1) Read auxiliary information (6..16 bits) and allocate memory. 
-       In case of trivial context map, decoding is finished at this phase. 
-    2) Decode Huffman table using ReadHuffmanCode function. 
-       This table will be used for reading context map items. 
-    3) Read context map items; "0" values could be run-length encoded. 
+}
+
+/* Decodes a context map.
+   Decoding is done in 4 phases:
+    1) Read auxiliary information (6..16 bits) and allocate memory.
+       In case of trivial context map, decoding is finished at this phase.
+    2) Decode Huffman table using ReadHuffmanCode function.
+       This table will be used for reading context map items.
+    3) Read context map items; "0" values could be run-length encoded.
     4) Optionally, apply InverseMoveToFront transform to the resulting map. */
 static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
                                                uint32_t* num_htrees,
                                                uint8_t** context_map_arg,
                                                BrotliDecoderState* s) {
-  BrotliBitReader* br = &s->br; 
+  BrotliBitReader* br = &s->br;
   BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
- 
+
   switch ((int)s->substate_context_map) {
-    case BROTLI_STATE_CONTEXT_MAP_NONE: 
-      result = DecodeVarLenUint8(s, br, num_htrees); 
+    case BROTLI_STATE_CONTEXT_MAP_NONE:
+      result = DecodeVarLenUint8(s, br, num_htrees);
       if (result != BROTLI_DECODER_SUCCESS) {
-        return result; 
-      } 
-      (*num_htrees)++; 
-      s->context_index = 0; 
-      BROTLI_LOG_UINT(context_map_size); 
-      BROTLI_LOG_UINT(*num_htrees); 
+        return result;
+      }
+      (*num_htrees)++;
+      s->context_index = 0;
+      BROTLI_LOG_UINT(context_map_size);
+      BROTLI_LOG_UINT(*num_htrees);
       *context_map_arg =
           (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)context_map_size);
-      if (*context_map_arg == 0) { 
+      if (*context_map_arg == 0) {
         return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP);
-      } 
-      if (*num_htrees <= 1) { 
-        memset(*context_map_arg, 0, (size_t)context_map_size); 
+      }
+      if (*num_htrees <= 1) {
+        memset(*context_map_arg, 0, (size_t)context_map_size);
         return BROTLI_DECODER_SUCCESS;
-      } 
-      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX; 
+      }
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
     /* Fall through. */
 
     case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
@@ -1014,33 +1014,33 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
          to peek 4 bits ahead. */
       if (!BrotliSafeGetBits(br, 5, &bits)) {
         return BROTLI_DECODER_NEEDS_MORE_INPUT;
-      } 
+      }
       if ((bits & 1) != 0) { /* Use RLE for zeros. */
         s->max_run_length_prefix = (bits >> 1) + 1;
         BrotliDropBits(br, 5);
-      } else { 
-        s->max_run_length_prefix = 0; 
+      } else {
+        s->max_run_length_prefix = 0;
         BrotliDropBits(br, 1);
-      } 
-      BROTLI_LOG_UINT(s->max_run_length_prefix); 
-      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN; 
+      }
+      BROTLI_LOG_UINT(s->max_run_length_prefix);
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
     }
     /* Fall through. */
 
     case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
       uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix;
       result = ReadHuffmanCode(alphabet_size, alphabet_size,
-                               s->context_map_table, NULL, s); 
+                               s->context_map_table, NULL, s);
       if (result != BROTLI_DECODER_SUCCESS) return result;
       s->code = 0xFFFF;
-      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE; 
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
     }
     /* Fall through. */
 
-    case BROTLI_STATE_CONTEXT_MAP_DECODE: { 
+    case BROTLI_STATE_CONTEXT_MAP_DECODE: {
       uint32_t context_index = s->context_index;
       uint32_t max_run_length_prefix = s->max_run_length_prefix;
-      uint8_t* context_map = *context_map_arg; 
+      uint8_t* context_map = *context_map_arg;
       uint32_t code = s->code;
       BROTLI_BOOL skip_preamble = (code != 0xFFFF);
       while (context_index < context_map_size || skip_preamble) {
@@ -1063,7 +1063,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
           }
         } else {
           skip_preamble = BROTLI_FALSE;
-        } 
+        }
         /* RLE sub-stage. */
         {
           uint32_t reps;
@@ -1073,16 +1073,16 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
             return BROTLI_DECODER_NEEDS_MORE_INPUT;
           }
           reps += 1U << code;
-          BROTLI_LOG_UINT(reps); 
-          if (context_index + reps > context_map_size) { 
+          BROTLI_LOG_UINT(reps);
+          if (context_index + reps > context_map_size) {
             return
                 BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT);
-          } 
-          do { 
-            context_map[context_index++] = 0; 
-          } while (--reps); 
-        } 
-      } 
+          }
+          do {
+            context_map[context_index++] = 0;
+          } while (--reps);
+        }
+      }
     }
     /* Fall through. */
 
@@ -1091,20 +1091,20 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
       if (!BrotliSafeReadBits(br, 1, &bits)) {
         s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
         return BROTLI_DECODER_NEEDS_MORE_INPUT;
-      } 
+      }
       if (bits != 0) {
         InverseMoveToFrontTransform(*context_map_arg, context_map_size, s);
       }
-      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; 
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
       return BROTLI_DECODER_SUCCESS;
-    } 
+    }
 
     default:
       return
           BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
-  } 
-} 
- 
+  }
+}
+
 /* Decodes a command or literal and updates block type ring-buffer.
    Reads 3..54 bits. */
 static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
@@ -1137,20 +1137,20 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
   }
 
   if (block_type == 1) {
-    block_type = ringbuffer[1] + 1; 
+    block_type = ringbuffer[1] + 1;
   } else if (block_type == 0) {
-    block_type = ringbuffer[0]; 
+    block_type = ringbuffer[0];
   } else {
     block_type -= 2;
-  } 
-  if (block_type >= max_block_type) { 
-    block_type -= max_block_type; 
-  } 
-  ringbuffer[0] = ringbuffer[1]; 
-  ringbuffer[1] = block_type; 
+  }
+  if (block_type >= max_block_type) {
+    block_type -= max_block_type;
+  }
+  ringbuffer[0] = ringbuffer[1];
+  ringbuffer[1] = block_type;
   return BROTLI_TRUE;
-} 
- 
+}
+
 static BROTLI_INLINE void DetectTrivialLiteralBlockTypes(
     BrotliDecoderState* s) {
   size_t i;
@@ -1170,18 +1170,18 @@ static BROTLI_INLINE void DetectTrivialLiteralBlockTypes(
 }
 
 static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) {
-  uint8_t context_mode; 
+  uint8_t context_mode;
   size_t trivial;
   uint32_t block_type = s->block_type_rb[1];
   uint32_t context_offset = block_type << BROTLI_LITERAL_CONTEXT_BITS;
-  s->context_map_slice = s->context_map + context_offset; 
+  s->context_map_slice = s->context_map + context_offset;
   trivial = s->trivial_literal_contexts[block_type >> 5];
   s->trivial_literal_context = (trivial >> (block_type & 31)) & 1;
   s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]];
   context_mode = s->context_modes[block_type] & 3;
   s->context_lookup = BROTLI_CONTEXT_LUT(context_mode);
-} 
- 
+}
+
 /* Decodes the block type and updates the state for literal context.
    Reads 3..54 bits. */
 static BROTLI_INLINE BROTLI_BOOL DecodeLiteralBlockSwitchInternal(
@@ -1264,9 +1264,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
   if (num_written > to_write) {
     num_written = to_write;
   }
-  if (s->meta_block_remaining_len < 0) { 
+  if (s->meta_block_remaining_len < 0) {
     return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1);
-  } 
+  }
   if (next_out && !*next_out) {
     *next_out = start;
   } else {
@@ -1277,18 +1277,18 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
   }
   *available_out -= num_written;
   BROTLI_LOG_UINT(to_write);
-  BROTLI_LOG_UINT(num_written); 
+  BROTLI_LOG_UINT(num_written);
   s->partial_pos_out += num_written;
   if (total_out) {
     *total_out = s->partial_pos_out;
-  } 
+  }
   if (num_written < to_write) {
     if (s->ringbuffer_size == (1 << s->window_bits) || force) {
       return BROTLI_DECODER_NEEDS_MORE_OUTPUT;
     } else {
       return BROTLI_DECODER_SUCCESS;
     }
-  } 
+  }
   /* Wrap ring buffer only if it has reached its maximal size. */
   if (s->ringbuffer_size == (1 << s->window_bits) &&
       s->pos >= s->ringbuffer_size) {
@@ -1297,8 +1297,8 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
     s->should_wrap_ringbuffer = (size_t)s->pos != 0 ? 1 : 0;
   }
   return BROTLI_DECODER_SUCCESS;
-} 
- 
+}
+
 static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) {
   if (s->should_wrap_ringbuffer) {
     memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)s->pos);
@@ -1350,17 +1350,17 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
     return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1);
   }
 
-  /* State machine */ 
-  for (;;) { 
+  /* State machine */
+  for (;;) {
     switch (s->substate_uncompressed) {
       case BROTLI_STATE_UNCOMPRESSED_NONE: {
         int nbytes = (int)BrotliGetRemainingBytes(&s->br);
         if (nbytes > s->meta_block_remaining_len) {
           nbytes = s->meta_block_remaining_len;
-        } 
+        }
         if (s->pos + nbytes > s->ringbuffer_size) {
           nbytes = s->ringbuffer_size - s->pos;
-        } 
+        }
         /* Copy remaining bytes from s->br.buf_ to ring-buffer. */
         BrotliCopyBytes(&s->ringbuffer[s->pos], &s->br, (size_t)nbytes);
         s->pos += nbytes;
@@ -1368,9 +1368,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
         if (s->pos < 1 << s->window_bits) {
           if (s->meta_block_remaining_len == 0) {
             return BROTLI_DECODER_SUCCESS;
-          } 
+          }
           return BROTLI_DECODER_NEEDS_MORE_INPUT;
-        } 
+        }
         s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
       }
       /* Fall through. */
@@ -1380,19 +1380,19 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
         result = WriteRingBuffer(
             s, available_out, next_out, total_out, BROTLI_FALSE);
         if (result != BROTLI_DECODER_SUCCESS) {
-          return result; 
-        } 
+          return result;
+        }
         if (s->ringbuffer_size == 1 << s->window_bits) {
           s->max_distance = s->max_backward_distance;
-        } 
+        }
         s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
-        break; 
+        break;
       }
-    } 
-  } 
+    }
+  }
   BROTLI_DCHECK(0);  /* Unreachable */
-} 
- 
+}
+
 /* Calculates the smallest feasible ring buffer.
 
    If we know the data size is small, do not allocate more ring buffer
@@ -1411,18 +1411,18 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
   /* If maximum is already reached, no further extension is retired. */
   if (s->ringbuffer_size == window_size) {
     return;
-  } 
+  }
 
   /* Metadata blocks does not touch ring buffer. */
   if (s->is_metadata) {
     return;
-  } 
+  }
 
   if (!s->ringbuffer) {
     output_size = 0;
   } else {
     output_size = s->pos;
-  } 
+  }
   output_size += s->meta_block_remaining_len;
   min_size = min_size < output_size ? output_size : min_size;
 
@@ -1433,16 +1433,16 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
     while ((new_ringbuffer_size >> 1) >= min_size) {
       new_ringbuffer_size >>= 1;
     }
-  } 
+  }
 
   s->new_ringbuffer_size = new_ringbuffer_size;
-} 
- 
+}
+
 /* Reads 1..256 2-bit context modes. */
 static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
   BrotliBitReader* br = &s->br;
   int i = s->loop_counter;
- 
+
   while (i < (int)s->num_block_types[0]) {
     uint32_t bits;
     if (!BrotliSafeReadBits(br, 2, &bits)) {
@@ -1455,7 +1455,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
   }
   return BROTLI_DECODER_SUCCESS;
 }
- 
+
 static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
   if (s->distance_code == 0) {
     --s->dist_rb_idx;
@@ -1482,11 +1482,11 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
         /* A huge distance will cause a BROTLI_FAILURE() soon.
            This is a little faster than failing here. */
         s->distance_code = 0x7FFFFFFF;
-      } 
-    } 
-  } 
+      }
+    }
+  }
 }
- 
+
 static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
     BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
   if (n_bits != 0) {
@@ -1494,9 +1494,9 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
   } else {
     *val = 0;
     return BROTLI_TRUE;
-  } 
+  }
 }
- 
+
 /* Precondition: s->distance_code < 0. */
 static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
     int safe, BrotliDecoderState* s, BrotliBitReader* br) {
@@ -1512,7 +1512,7 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
       return BROTLI_FALSE;
     }
     s->distance_code = (int)code;
-  } 
+  }
   /* Convert the distance code to the actual distance by possibly
      looking up past distances from the s->ringbuffer. */
   s->distance_context = 0;
@@ -1555,7 +1555,7 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
   --s->block_length[2];
   return BROTLI_TRUE;
 }
- 
+
 static BROTLI_INLINE void ReadDistance(
     BrotliDecoderState* s, BrotliBitReader* br) {
   ReadDistanceInternal(0, s, br);
@@ -1580,7 +1580,7 @@ static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal(
     if (!SafeReadSymbol(s->htree_command, br, &cmd_code)) {
       return BROTLI_FALSE;
     }
-  } 
+  }
   v = kCmdLut[cmd_code];
   s->distance_code = v.distance_code;
   s->distance_context = v.context;
@@ -1597,28 +1597,28 @@ static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal(
       BrotliBitReaderRestoreState(br, &memento);
       return BROTLI_FALSE;
     }
-  } 
+  }
   s->copy_length = (int)copy_length + v.copy_len_offset;
   --s->block_length[1];
   *insert_length += (int)insert_len_extra;
   return BROTLI_TRUE;
 }
- 
+
 static BROTLI_INLINE void ReadCommand(
     BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
   ReadCommandInternal(0, s, br, insert_length);
-} 
- 
+}
+
 static BROTLI_INLINE BROTLI_BOOL SafeReadCommand(
     BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
   return ReadCommandInternal(1, s, br, insert_length);
-} 
- 
+}
+
 static BROTLI_INLINE BROTLI_BOOL CheckInputAmount(
     int safe, BrotliBitReader* const br, size_t num) {
   if (safe) {
     return BROTLI_TRUE;
-  } 
+  }
   return BrotliCheckInputAmount(br, num);
 }
 
@@ -1920,9 +1920,9 @@ CommandPostWrapCopy:
 saveStateAndReturn:
   s->pos = pos;
   s->loop_counter = i;
-  return result; 
-} 
- 
+  return result;
+}
+
 #undef BROTLI_SAFE
 
 static BROTLI_NOINLINE BrotliDecoderErrorCode ProcessCommands(
@@ -1970,9 +1970,9 @@ BrotliDecoderResult BrotliDecoderDecompress(
   if (result != BROTLI_DECODER_RESULT_SUCCESS) {
     result = BROTLI_DECODER_RESULT_ERROR;
   }
-  return result; 
-} 
- 
+  return result;
+}
+
 /* Invariant: input stream is never overconsumed:
     - invalid input implies that the whole stream is invalid -> any amount of
       input could be read and discarded
@@ -1988,7 +1988,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
     BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in,
     size_t* available_out, uint8_t** next_out, size_t* total_out) {
   BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
-  BrotliBitReader* br = &s->br; 
+  BrotliBitReader* br = &s->br;
   /* Ensure that |total_out| is set, even if no data will ever be pushed out. */
   if (total_out) {
     *total_out = s->partial_pos_out;
@@ -2012,8 +2012,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
     result = BROTLI_DECODER_NEEDS_MORE_INPUT;
     br->next_in = &s->buffer.u8[0];
   }
-  /* State machine */ 
-  for (;;) { 
+  /* State machine */
+  for (;;) {
     if (result != BROTLI_DECODER_SUCCESS) {
       /* Error, needs more input/output. */
       if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
@@ -2025,7 +2025,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
             result = intermediate_result;
             break;
           }
-        } 
+        }
         if (s->buffer_length != 0) {  /* Used with internal buffer. */
           if (br->avail_in == 0) {
             /* Successfully finished read transaction.
@@ -2062,9 +2062,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
             (*available_in)--;
           }
           break;
-        } 
+        }
         /* Unreachable. */
-      } 
+      }
 
       /* Fail or needs more output. */
 
@@ -2081,15 +2081,15 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
         *next_in = br->next_in;
       }
       break;
-    } 
-    switch (s->state) { 
-      case BROTLI_STATE_UNINITED: 
-        /* Prepare to the first read. */ 
-        if (!BrotliWarmupBitReader(br)) { 
+    }
+    switch (s->state) {
+      case BROTLI_STATE_UNINITED:
+        /* Prepare to the first read. */
+        if (!BrotliWarmupBitReader(br)) {
           result = BROTLI_DECODER_NEEDS_MORE_INPUT;
-          break; 
-        } 
-        /* Decode window size. */ 
+          break;
+        }
+        /* Decode window size. */
         result = DecodeWindowBits(s, br);  /* Reads 1..8 bits. */
         if (result != BROTLI_DECODER_SUCCESS) {
           break;
@@ -2109,8 +2109,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
         if (s->window_bits < BROTLI_LARGE_MIN_WBITS ||
             s->window_bits > BROTLI_LARGE_MAX_WBITS) {
           result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
-          break; 
-        } 
+          break;
+        }
         s->state = BROTLI_STATE_INITIALIZE;
       /* Fall through. */
 
@@ -2118,99 +2118,99 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
         BROTLI_LOG_UINT(s->window_bits);
         /* Maximum distance, see section 9.1. of the spec. */
         s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP;
- 
-        /* Allocate memory for both block_type_trees and block_len_trees. */ 
+
+        /* Allocate memory for both block_type_trees and block_len_trees. */
         s->block_type_trees = (HuffmanCode*)BROTLI_DECODER_ALLOC(s,
             sizeof(HuffmanCode) * 3 *
                 (BROTLI_HUFFMAN_MAX_SIZE_258 + BROTLI_HUFFMAN_MAX_SIZE_26));
         if (s->block_type_trees == 0) {
           result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES);
-          break; 
-        } 
+          break;
+        }
         s->block_len_trees =
             s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258;
- 
-        s->state = BROTLI_STATE_METABLOCK_BEGIN; 
+
+        s->state = BROTLI_STATE_METABLOCK_BEGIN;
       /* Fall through. */
 
-      case BROTLI_STATE_METABLOCK_BEGIN: 
+      case BROTLI_STATE_METABLOCK_BEGIN:
         BrotliDecoderStateMetablockBegin(s);
         BROTLI_LOG_UINT(s->pos);
-        s->state = BROTLI_STATE_METABLOCK_HEADER; 
+        s->state = BROTLI_STATE_METABLOCK_HEADER;
       /* Fall through. */
 
-      case BROTLI_STATE_METABLOCK_HEADER: 
+      case BROTLI_STATE_METABLOCK_HEADER:
         result = DecodeMetaBlockLength(s, br);  /* Reads 2 - 31 bits. */
         if (result != BROTLI_DECODER_SUCCESS) {
-          break; 
-        } 
-        BROTLI_LOG_UINT(s->is_last_metablock); 
-        BROTLI_LOG_UINT(s->meta_block_remaining_len); 
-        BROTLI_LOG_UINT(s->is_metadata); 
-        BROTLI_LOG_UINT(s->is_uncompressed); 
-        if (s->is_metadata || s->is_uncompressed) { 
-          if (!BrotliJumpToByteBoundary(br)) { 
+          break;
+        }
+        BROTLI_LOG_UINT(s->is_last_metablock);
+        BROTLI_LOG_UINT(s->meta_block_remaining_len);
+        BROTLI_LOG_UINT(s->is_metadata);
+        BROTLI_LOG_UINT(s->is_uncompressed);
+        if (s->is_metadata || s->is_uncompressed) {
+          if (!BrotliJumpToByteBoundary(br)) {
             result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_1);
-            break; 
-          } 
-        } 
-        if (s->is_metadata) { 
-          s->state = BROTLI_STATE_METADATA; 
-          break; 
-        } 
-        if (s->meta_block_remaining_len == 0) { 
-          s->state = BROTLI_STATE_METABLOCK_DONE; 
-          break; 
-        } 
+            break;
+          }
+        }
+        if (s->is_metadata) {
+          s->state = BROTLI_STATE_METADATA;
+          break;
+        }
+        if (s->meta_block_remaining_len == 0) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+          break;
+        }
         BrotliCalculateRingBufferSize(s);
-        if (s->is_uncompressed) { 
-          s->state = BROTLI_STATE_UNCOMPRESSED; 
-          break; 
-        } 
+        if (s->is_uncompressed) {
+          s->state = BROTLI_STATE_UNCOMPRESSED;
+          break;
+        }
         s->loop_counter = 0;
-        s->state = BROTLI_STATE_HUFFMAN_CODE_0; 
-        break; 
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
 
       case BROTLI_STATE_UNCOMPRESSED: {
         result = CopyUncompressedBlockToOutput(
             available_out, next_out, total_out, s);
         if (result != BROTLI_DECODER_SUCCESS) {
-          break; 
-        } 
-        s->state = BROTLI_STATE_METABLOCK_DONE; 
-        break; 
+          break;
+        }
+        s->state = BROTLI_STATE_METABLOCK_DONE;
+        break;
       }
 
-      case BROTLI_STATE_METADATA: 
-        for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { 
-          uint32_t bits; 
-          /* Read one byte and ignore it. */ 
-          if (!BrotliSafeReadBits(br, 8, &bits)) { 
+      case BROTLI_STATE_METADATA:
+        for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
+          uint32_t bits;
+          /* Read one byte and ignore it. */
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
             result = BROTLI_DECODER_NEEDS_MORE_INPUT;
-            break; 
-          } 
-        } 
+            break;
+          }
+        }
         if (result == BROTLI_DECODER_SUCCESS) {
-          s->state = BROTLI_STATE_METABLOCK_DONE; 
-        } 
-        break; 
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+        }
+        break;
 
-      case BROTLI_STATE_HUFFMAN_CODE_0: 
+      case BROTLI_STATE_HUFFMAN_CODE_0:
         if (s->loop_counter >= 3) {
           s->state = BROTLI_STATE_METABLOCK_HEADER_2;
-          break; 
-        } 
-        /* Reads 1..11 bits. */ 
+          break;
+        }
+        /* Reads 1..11 bits. */
         result = DecodeVarLenUint8(s, br, &s->num_block_types[s->loop_counter]);
         if (result != BROTLI_DECODER_SUCCESS) {
-          break; 
-        } 
+          break;
+        }
         s->num_block_types[s->loop_counter]++;
         BROTLI_LOG_UINT(s->num_block_types[s->loop_counter]);
         if (s->num_block_types[s->loop_counter] < 2) {
           s->loop_counter++;
-          break; 
-        } 
+          break;
+        }
         s->state = BROTLI_STATE_HUFFMAN_CODE_1;
       /* Fall through. */
 
@@ -2230,7 +2230,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
         result = ReadHuffmanCode(alphabet_size, alphabet_size,
             &s->block_len_trees[tree_offset], NULL, s);
         if (result != BROTLI_DECODER_SUCCESS) break;
-        s->state = BROTLI_STATE_HUFFMAN_CODE_3; 
+        s->state = BROTLI_STATE_HUFFMAN_CODE_3;
       }
       /* Fall through. */
 
@@ -2239,33 +2239,33 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
         if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
             &s->block_len_trees[tree_offset], br)) {
           result = BROTLI_DECODER_NEEDS_MORE_INPUT;
-          break; 
-        } 
+          break;
+        }
         BROTLI_LOG_UINT(s->block_length[s->loop_counter]);
         s->loop_counter++;
-        s->state = BROTLI_STATE_HUFFMAN_CODE_0; 
-        break; 
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
       }
 
       case BROTLI_STATE_METABLOCK_HEADER_2: {
         uint32_t bits;
         if (!BrotliSafeReadBits(br, 6, &bits)) {
           result = BROTLI_DECODER_NEEDS_MORE_INPUT;
-          break; 
-        } 
+          break;
+        }
         s->distance_postfix_bits = bits & BitMask(2);
         bits >>= 2;
         s->num_direct_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
             (bits << s->distance_postfix_bits);
-        BROTLI_LOG_UINT(s->num_direct_distance_codes); 
-        BROTLI_LOG_UINT(s->distance_postfix_bits); 
-        s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits); 
+        BROTLI_LOG_UINT(s->num_direct_distance_codes);
+        BROTLI_LOG_UINT(s->distance_postfix_bits);
+        s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits);
         s->context_modes =
             (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)s->num_block_types[0]);
-        if (s->context_modes == 0) { 
+        if (s->context_modes == 0) {
           result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES);
-          break; 
-        } 
+          break;
+        }
         s->loop_counter = 0;
         s->state = BROTLI_STATE_CONTEXT_MODES;
       }
@@ -2275,19 +2275,19 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
         result = ReadContextModes(s);
         if (result != BROTLI_DECODER_SUCCESS) {
           break;
-        } 
-        s->state = BROTLI_STATE_CONTEXT_MAP_1; 
+        }
+        s->state = BROTLI_STATE_CONTEXT_MAP_1;
       /* Fall through. */
 
-      case BROTLI_STATE_CONTEXT_MAP_1: 
+      case BROTLI_STATE_CONTEXT_MAP_1:
         result = DecodeContextMap(
             s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS,
             &s->num_literal_htrees, &s->context_map, s);
         if (result != BROTLI_DECODER_SUCCESS) {
-          break; 
-        } 
+          break;
+        }
         DetectTrivialLiteralBlockTypes(s);
-        s->state = BROTLI_STATE_CONTEXT_MAP_2; 
+        s->state = BROTLI_STATE_CONTEXT_MAP_2;
       /* Fall through. */
 
       case BROTLI_STATE_CONTEXT_MAP_2: {
@@ -2307,7 +2307,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
             &s->num_dist_htrees, &s->dist_context_map, s);
         if (result != BROTLI_DECODER_SUCCESS) {
           break;
-        } 
+        }
         allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
             s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
             BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees);
@@ -2322,7 +2322,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
               BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
         }
         s->loop_counter = 0;
-        s->state = BROTLI_STATE_TREE_GROUP; 
+        s->state = BROTLI_STATE_TREE_GROUP;
       }
       /* Fall through. */
 
@@ -2334,83 +2334,83 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
           case 2: hgroup = &s->distance_hgroup; break;
           default: return SaveErrorCode(s, BROTLI_FAILURE(
               BROTLI_DECODER_ERROR_UNREACHABLE));
-        } 
+        }
         result = HuffmanTreeGroupDecode(hgroup, s);
         if (result != BROTLI_DECODER_SUCCESS) break;
         s->loop_counter++;
         if (s->loop_counter >= 3) {
           PrepareLiteralDecoding(s);
-          s->dist_context_map_slice = s->dist_context_map; 
-          s->htree_command = s->insert_copy_hgroup.htrees[0]; 
+          s->dist_context_map_slice = s->dist_context_map;
+          s->htree_command = s->insert_copy_hgroup.htrees[0];
           if (!BrotliEnsureRingBuffer(s)) {
             result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2);
             break;
           }
-          s->state = BROTLI_STATE_COMMAND_BEGIN; 
-        } 
-        break; 
+          s->state = BROTLI_STATE_COMMAND_BEGIN;
+        }
+        break;
       }
 
-      case BROTLI_STATE_COMMAND_BEGIN: 
+      case BROTLI_STATE_COMMAND_BEGIN:
       /* Fall through. */
-      case BROTLI_STATE_COMMAND_INNER: 
+      case BROTLI_STATE_COMMAND_INNER:
       /* Fall through. */
       case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
       /* Fall through. */
-      case BROTLI_STATE_COMMAND_POST_WRAP_COPY: 
+      case BROTLI_STATE_COMMAND_POST_WRAP_COPY:
         result = ProcessCommands(s);
         if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
           result = SafeProcessCommands(s);
-        } 
-        break; 
+        }
+        break;
 
-      case BROTLI_STATE_COMMAND_INNER_WRITE: 
+      case BROTLI_STATE_COMMAND_INNER_WRITE:
       /* Fall through. */
-      case BROTLI_STATE_COMMAND_POST_WRITE_1: 
+      case BROTLI_STATE_COMMAND_POST_WRITE_1:
       /* Fall through. */
-      case BROTLI_STATE_COMMAND_POST_WRITE_2: 
+      case BROTLI_STATE_COMMAND_POST_WRITE_2:
         result = WriteRingBuffer(
             s, available_out, next_out, total_out, BROTLI_FALSE);
         if (result != BROTLI_DECODER_SUCCESS) {
-          break; 
-        } 
+          break;
+        }
         WrapRingBuffer(s);
         if (s->ringbuffer_size == 1 << s->window_bits) {
           s->max_distance = s->max_backward_distance;
         }
-        if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) { 
+        if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
           if (s->meta_block_remaining_len == 0) {
             /* Next metablock, if any. */
-            s->state = BROTLI_STATE_METABLOCK_DONE; 
-          } else { 
+            s->state = BROTLI_STATE_METABLOCK_DONE;
+          } else {
             s->state = BROTLI_STATE_COMMAND_BEGIN;
-          } 
+          }
           break;
-        } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) { 
-          s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY; 
-        } else {  /* BROTLI_STATE_COMMAND_INNER_WRITE */ 
+        } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
+        } else {  /* BROTLI_STATE_COMMAND_INNER_WRITE */
           if (s->loop_counter == 0) {
             if (s->meta_block_remaining_len == 0) {
-              s->state = BROTLI_STATE_METABLOCK_DONE; 
+              s->state = BROTLI_STATE_METABLOCK_DONE;
             } else {
               s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
-            } 
+            }
             break;
-          } 
-          s->state = BROTLI_STATE_COMMAND_INNER; 
-        } 
-        break; 
+          }
+          s->state = BROTLI_STATE_COMMAND_INNER;
+        }
+        break;
 
-      case BROTLI_STATE_METABLOCK_DONE: 
+      case BROTLI_STATE_METABLOCK_DONE:
         if (s->meta_block_remaining_len < 0) {
           result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2);
           break;
         }
         BrotliDecoderStateCleanupAfterMetablock(s);
-        if (!s->is_last_metablock) { 
-          s->state = BROTLI_STATE_METABLOCK_BEGIN; 
-          break; 
-        } 
+        if (!s->is_last_metablock) {
+          s->state = BROTLI_STATE_METABLOCK_BEGIN;
+          break;
+        }
         if (!BrotliJumpToByteBoundary(br)) {
           result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_2);
           break;
@@ -2420,20 +2420,20 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
           *available_in = br->avail_in;
           *next_in = br->next_in;
         }
-        s->state = BROTLI_STATE_DONE; 
+        s->state = BROTLI_STATE_DONE;
       /* Fall through. */
 
-      case BROTLI_STATE_DONE: 
-        if (s->ringbuffer != 0) { 
+      case BROTLI_STATE_DONE:
+        if (s->ringbuffer != 0) {
           result = WriteRingBuffer(
               s, available_out, next_out, total_out, BROTLI_TRUE);
           if (result != BROTLI_DECODER_SUCCESS) {
-            break; 
-          } 
-        } 
+            break;
+          }
+        }
         return SaveErrorCode(s, result);
-    } 
-  } 
+    }
+  }
   return SaveErrorCode(s, result);
 }
 
@@ -2468,19 +2468,19 @@ const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) {
     *size = 0;
     result = 0;
   }
-  return result; 
-} 
- 
+  return result;
+}
+
 BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* s) {
   return TO_BROTLI_BOOL(s->state != BROTLI_STATE_UNINITED ||
       BrotliGetAvailableBits(&s->br) != 0);
-} 
- 
+}
+
 BROTLI_BOOL BrotliDecoderIsFinished(const BrotliDecoderState* s) {
   return TO_BROTLI_BOOL(s->state == BROTLI_STATE_DONE) &&
       !BrotliDecoderHasMoreOutput(s);
 }
- 
+
 BrotliDecoderErrorCode BrotliDecoderGetErrorCode(const BrotliDecoderState* s) {
   return (BrotliDecoderErrorCode)s->error_code;
 }
@@ -2501,6 +2501,6 @@ uint32_t BrotliDecoderVersion() {
   return BROTLI_VERSION;
 }
 
-#if defined(__cplusplus) || defined(c_plusplus) 
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
+#endif
diff --git a/contrib/libs/brotli/dec/huffman.c b/contrib/libs/brotli/dec/huffman.c
index 02e5b15c22..30c40d33f2 100644
--- a/contrib/libs/brotli/dec/huffman.c
+++ b/contrib/libs/brotli/dec/huffman.c
@@ -1,29 +1,29 @@
-/* Copyright 2013 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/ 
- 
-/* Utilities for building Huffman decoding tables. */ 
- 
-#include "./huffman.h" 
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#include "./huffman.h"
 
 #include <string.h>  /* memcpy, memset */
 
 #include "../common/constants.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 #define BROTLI_REVERSE_BITS_MAX 8
 
 #if defined(BROTLI_RBIT)
 #define BROTLI_REVERSE_BITS_BASE \
   ((sizeof(brotli_reg_t) << 3) - BROTLI_REVERSE_BITS_MAX)
-#else 
+#else
 #define BROTLI_REVERSE_BITS_BASE 0
 static uint8_t kReverseBits[1 << BROTLI_REVERSE_BITS_MAX] = {
   0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
@@ -72,37 +72,37 @@ static BROTLI_INLINE brotli_reg_t BrotliReverseBits(brotli_reg_t num) {
   return BROTLI_RBIT(num);
 #else
   return kReverseBits[num];
-#endif 
-} 
- 
-/* Stores code in table[0], table[step], table[2*step], ..., table[end] */ 
-/* Assumes that end is an integer multiple of step */ 
-static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, 
-                                         int step, int end, 
-                                         HuffmanCode code) { 
-  do { 
-    end -= step; 
-    table[end] = code; 
-  } while (end > 0); 
-} 
- 
+#endif
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
+                                         int step, int end,
+                                         HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
 /* Returns the table width of the next 2nd level table. |count| is the histogram
    of bit lengths for the remaining symbols, |len| is the code length of the
    next processed symbol. */
-static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count, 
-                                          int len, int root_bits) { 
-  int left = 1 << (len - root_bits); 
-  while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) { 
-    left -= count[len]; 
-    if (left <= 0) break; 
-    ++len; 
-    left <<= 1; 
-  } 
-  return len - root_bits; 
-} 
- 
-void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, 
-                                        const uint8_t* const code_lengths, 
+static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
+                                          int len, int root_bits) {
+  int left = 1 << (len - root_bits);
+  while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
+                                        const uint8_t* const code_lengths,
                                         uint16_t* count) {
   HuffmanCode code;       /* current table entry */
   int symbol;             /* symbol index in original or sorted table */
@@ -111,61 +111,61 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
   int step;               /* step size to replicate values in current table */
   int table_size;         /* size of current table */
   int sorted[BROTLI_CODE_LENGTH_CODES];  /* symbols sorted by code length */
-  /* offsets in sorted table for each length */ 
-  int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1]; 
-  int bits; 
-  int bits_count; 
+  /* offsets in sorted table for each length */
+  int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1];
+  int bits;
+  int bits_count;
   BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
                 BROTLI_REVERSE_BITS_MAX);
- 
+
   /* Generate offsets into sorted symbol table by code length. */
-  symbol = -1; 
-  bits = 1; 
-  BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { 
-    symbol += count[bits]; 
-    offset[bits] = symbol; 
-    bits++; 
-  }); 
+  symbol = -1;
+  bits = 1;
+  BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
+    symbol += count[bits];
+    offset[bits] = symbol;
+    bits++;
+  });
   /* Symbols with code length 0 are placed after all other symbols. */
   offset[0] = BROTLI_CODE_LENGTH_CODES - 1;
- 
+
   /* Sort symbols by length, by symbol order within each length. */
   symbol = BROTLI_CODE_LENGTH_CODES;
-  do { 
-    BROTLI_REPEAT(6, { 
-      symbol--; 
-      sorted[offset[code_lengths[symbol]]--] = symbol; 
-    }); 
-  } while (symbol != 0); 
- 
-  table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH; 
- 
+  do {
+    BROTLI_REPEAT(6, {
+      symbol--;
+      sorted[offset[code_lengths[symbol]]--] = symbol;
+    });
+  } while (symbol != 0);
+
+  table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH;
+
   /* Special case: all symbols but one have 0 code length. */
   if (offset[0] == 0) {
     code = ConstructHuffmanCode(0, (uint16_t)sorted[0]);
     for (key = 0; key < (brotli_reg_t)table_size; ++key) {
-      table[key] = code; 
-    } 
-    return; 
-  } 
- 
+      table[key] = code;
+    }
+    return;
+  }
+
   /* Fill in table. */
-  key = 0; 
+  key = 0;
   key_step = BROTLI_REVERSE_BITS_LOWEST;
-  symbol = 0; 
-  bits = 1; 
-  step = 2; 
-  do { 
-    for (bits_count = count[bits]; bits_count != 0; --bits_count) { 
+  symbol = 0;
+  bits = 1;
+  step = 2;
+  do {
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
       code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)sorted[symbol++]);
       ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
       key += key_step;
-    } 
-    step <<= 1; 
+    }
+    step <<= 1;
     key_step >>= 1;
-  } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); 
-} 
- 
+  } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
+}
+
 uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
                                  int root_bits,
                                  const uint16_t* const symbol_lists,
@@ -182,114 +182,114 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
   int table_bits;         /* key length of current table */
   int table_size;         /* size of current table */
   int total_size;         /* sum of root table size and 2nd level table sizes */
-  int max_length = -1; 
-  int bits; 
-  int bits_count; 
- 
+  int max_length = -1;
+  int bits;
+  int bits_count;
+
   BROTLI_DCHECK(root_bits <= BROTLI_REVERSE_BITS_MAX);
   BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH - root_bits <=
                 BROTLI_REVERSE_BITS_MAX);
 
-  while (symbol_lists[max_length] == 0xFFFF) max_length--; 
-  max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1; 
- 
-  table = root_table; 
-  table_bits = root_bits; 
-  table_size = 1 << table_bits; 
-  total_size = table_size; 
- 
+  while (symbol_lists[max_length] == 0xFFFF) max_length--;
+  max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1;
+
+  table = root_table;
+  table_bits = root_bits;
+  table_size = 1 << table_bits;
+  total_size = table_size;
+
   /* Fill in the root table. Reduce the table size to if possible,
      and create the repetitions by memcpy. */
-  if (table_bits > max_length) { 
-    table_bits = max_length; 
-    table_size = 1 << table_bits; 
-  } 
-  key = 0; 
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1 << table_bits;
+  }
+  key = 0;
   key_step = BROTLI_REVERSE_BITS_LOWEST;
-  bits = 1; 
-  step = 2; 
-  do { 
-    symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); 
-    for (bits_count = count[bits]; bits_count != 0; --bits_count) { 
-      symbol = symbol_lists[symbol]; 
+  bits = 1;
+  step = 2;
+  do {
+    symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      symbol = symbol_lists[symbol];
       code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol);
       ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
       key += key_step;
-    } 
-    step <<= 1; 
+    }
+    step <<= 1;
     key_step >>= 1;
-  } while (++bits <= table_bits); 
- 
+  } while (++bits <= table_bits);
+
   /* If root_bits != table_bits then replicate to fill the remaining slots. */
-  while (total_size != table_size) { 
-    memcpy(&table[table_size], &table[0], 
-           (size_t)table_size * sizeof(table[0])); 
-    table_size <<= 1; 
-  } 
- 
+  while (total_size != table_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
   /* Fill in 2nd level tables and add pointers to root table. */
   key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
   sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
   sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
   for (len = root_bits + 1, step = 2; len <= max_length; ++len) {
-    symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); 
-    for (; count[len] != 0; --count[len]) { 
+    symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (; count[len] != 0; --count[len]) {
       if (sub_key == (BROTLI_REVERSE_BITS_LOWEST << 1U)) {
-        table += table_size; 
-        table_bits = NextTableBitSize(count, len, root_bits); 
-        table_size = 1 << table_bits; 
-        total_size += table_size; 
+        table += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1 << table_bits;
+        total_size += table_size;
         sub_key = BrotliReverseBits(key);
         key += key_step;
         root_table[sub_key] = ConstructHuffmanCode(
             (uint8_t)(table_bits + root_bits),
             (uint16_t)(((size_t)(table - root_table)) - sub_key));
         sub_key = 0;
-      } 
-      symbol = symbol_lists[symbol]; 
+      }
+      symbol = symbol_lists[symbol];
       code = ConstructHuffmanCode((uint8_t)(len - root_bits), (uint16_t)symbol);
       ReplicateValue(
           &table[BrotliReverseBits(sub_key)], step, table_size, code);
       sub_key += sub_key_step;
-    } 
+    }
     step <<= 1;
     sub_key_step >>= 1;
-  } 
+  }
   return (uint32_t)total_size;
-} 
- 
+}
+
 uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
                                        int root_bits,
                                        uint16_t* val,
                                        uint32_t num_symbols) {
   uint32_t table_size = 1;
   const uint32_t goal_size = 1U << root_bits;
-  switch (num_symbols) { 
-    case 0: 
+  switch (num_symbols) {
+    case 0:
       table[0] = ConstructHuffmanCode(0, val[0]);
-      break; 
-    case 1: 
-      if (val[1] > val[0]) { 
+      break;
+    case 1:
+      if (val[1] > val[0]) {
         table[0] = ConstructHuffmanCode(1, val[0]);
         table[1] = ConstructHuffmanCode(1, val[1]);
-      } else { 
+      } else {
         table[0] = ConstructHuffmanCode(1, val[1]);
         table[1] = ConstructHuffmanCode(1, val[0]);
-      } 
-      table_size = 2; 
-      break; 
-    case 2: 
+      }
+      table_size = 2;
+      break;
+    case 2:
       table[0] = ConstructHuffmanCode(1, val[0]);
       table[2] = ConstructHuffmanCode(1, val[0]);
-      if (val[2] > val[1]) { 
+      if (val[2] > val[1]) {
         table[1] = ConstructHuffmanCode(2, val[1]);
         table[3] = ConstructHuffmanCode(2, val[2]);
-      } else { 
+      } else {
         table[1] = ConstructHuffmanCode(2, val[2]);
         table[3] = ConstructHuffmanCode(2, val[1]);
-      } 
-      table_size = 4; 
-      break; 
+      }
+      table_size = 4;
+      break;
     case 3: {
       int i, k;
       for (i = 0; i < 3; ++i) {
@@ -298,22 +298,22 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
             uint16_t t = val[k];
             val[k] = val[i];
             val[i] = t;
-          } 
-        } 
-      } 
+          }
+        }
+      }
       table[0] = ConstructHuffmanCode(2, val[0]);
       table[2] = ConstructHuffmanCode(2, val[1]);
       table[1] = ConstructHuffmanCode(2, val[2]);
       table[3] = ConstructHuffmanCode(2, val[3]);
       table_size = 4;
-      break; 
+      break;
     }
     case 4: {
       if (val[3] < val[2]) {
         uint16_t t = val[3];
         val[3] = val[2];
         val[2] = t;
-      } 
+      }
       table[0] = ConstructHuffmanCode(1, val[0]);
       table[1] = ConstructHuffmanCode(2, val[1]);
       table[2] = ConstructHuffmanCode(1, val[0]);
@@ -323,17 +323,17 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
       table[6] = ConstructHuffmanCode(1, val[0]);
       table[7] = ConstructHuffmanCode(3, val[3]);
       table_size = 8;
-      break; 
+      break;
     }
-  } 
-  while (table_size != goal_size) { 
-    memcpy(&table[table_size], &table[0], 
-           (size_t)table_size * sizeof(table[0])); 
-    table_size <<= 1; 
-  } 
-  return goal_size; 
-} 
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
+  }
+  while (table_size != goal_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+  return goal_size;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
+#endif
diff --git a/contrib/libs/brotli/dec/huffman.h b/contrib/libs/brotli/dec/huffman.h
index 9951f8e15d..b9f0716c16 100644
--- a/contrib/libs/brotli/dec/huffman.h
+++ b/contrib/libs/brotli/dec/huffman.h
@@ -1,23 +1,23 @@
-/* Copyright 2013 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/ 
- 
-/* Utilities for building Huffman decoding tables. */ 
- 
-#ifndef BROTLI_DEC_HUFFMAN_H_ 
-#define BROTLI_DEC_HUFFMAN_H_ 
- 
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#ifndef BROTLI_DEC_HUFFMAN_H_
+#define BROTLI_DEC_HUFFMAN_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
-#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15 
- 
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
+
 /* Maximum possible Huffman table size for an alphabet size of (index * 32),
    max code length 15 and root table bits 8. */
 static const uint16_t kMaxHuffmanTableSize[] = {
@@ -30,9 +30,9 @@ static const uint16_t kMaxHuffmanTableSize[] = {
 #define BROTLI_HUFFMAN_MAX_SIZE_258 632
 /* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
 #define BROTLI_HUFFMAN_MAX_SIZE_272 646
- 
-#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5 
- 
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
+
 #if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \
   BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0))
 #define BROTLI_HUFFMAN_CODE_FAST_LOAD
@@ -41,11 +41,11 @@ static const uint16_t kMaxHuffmanTableSize[] = {
 #if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD)
 /* Do not create this struct directly - use the ConstructHuffmanCode
  * constructor below! */
-typedef struct { 
+typedef struct {
   uint8_t bits;    /* number of bits used for this symbol */
   uint16_t value;  /* symbol value or table offset */
-} HuffmanCode; 
- 
+} HuffmanCode;
+
 static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
     const uint16_t value) {
   HuffmanCode h;
@@ -93,35 +93,35 @@ static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
 #define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16)
 #endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
 
-/* Builds Huffman lookup table assuming code lengths are in symbol order. */ 
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
 BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
     const uint8_t* const code_lengths, uint16_t* count);
- 
+
 /* Builds Huffman lookup table assuming code lengths are in symbol order.
    Returns size of resulting table. */
 BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
     int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg);
- 
+
 /* Builds a simple Huffman table. The |num_symbols| parameter is to be
    interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
    2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
    4 means 4 symbols with lengths [1, 2, 3, 3]. */
 BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
     int root_bits, uint16_t* symbols, uint32_t num_symbols);
- 
-/* Contains a collection of Huffman trees with the same alphabet size. */ 
+
+/* Contains a collection of Huffman trees with the same alphabet size. */
 /* max_symbol is needed due to simple codes since log2(alphabet_size) could be
    greater than log2(max_symbol). */
-typedef struct { 
-  HuffmanCode** htrees; 
-  HuffmanCode* codes; 
+typedef struct {
+  HuffmanCode** htrees;
+  HuffmanCode* codes;
   uint16_t alphabet_size;
   uint16_t max_symbol;
   uint16_t num_htrees;
-} HuffmanTreeGroup; 
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
+} HuffmanTreeGroup;
+
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
- 
-#endif  /* BROTLI_DEC_HUFFMAN_H_ */ 
+#endif
+
+#endif  /* BROTLI_DEC_HUFFMAN_H_ */
diff --git a/contrib/libs/brotli/dec/prefix.h b/contrib/libs/brotli/dec/prefix.h
index b4ceebfbd4..3ea062d84a 100644
--- a/contrib/libs/brotli/dec/prefix.h
+++ b/contrib/libs/brotli/dec/prefix.h
@@ -1,750 +1,750 @@
-/* Copyright 2013 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/ 
- 
-/* Lookup tables to map prefix codes to value ranges. This is used during 
+*/
+
+/* Lookup tables to map prefix codes to value ranges. This is used during
    decoding of the block lengths, literal insertion lengths and copy lengths. */
- 
-#ifndef BROTLI_DEC_PREFIX_H_ 
-#define BROTLI_DEC_PREFIX_H_ 
- 
+
+#ifndef BROTLI_DEC_PREFIX_H_
+#define BROTLI_DEC_PREFIX_H_
+
 #include "../common/constants.h"
 #include <brotli/types.h>
 
 /* Represents the range of values belonging to a prefix code:
    [offset, offset + 2^nbits) */
-struct PrefixCodeRange { 
+struct PrefixCodeRange {
   uint16_t offset;
   uint8_t nbits;
-}; 
- 
+};
+
 static const struct PrefixCodeRange
     kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
-  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2}, 
-  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3}, 
-  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4}, 
-  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5}, 
-  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8}, 
-  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12}, 
-  {8433, 13}, {16625, 24} 
-}; 
- 
-typedef struct CmdLutElement { 
-  uint8_t insert_len_extra_bits; 
-  uint8_t copy_len_extra_bits; 
-  int8_t distance_code; 
-  uint8_t context; 
-  uint16_t insert_len_offset; 
-  uint16_t copy_len_offset; 
-} CmdLutElement; 
- 
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+typedef struct CmdLutElement {
+  uint8_t insert_len_extra_bits;
+  uint8_t copy_len_extra_bits;
+  int8_t distance_code;
+  uint8_t context;
+  uint16_t insert_len_offset;
+  uint16_t copy_len_offset;
+} CmdLutElement;
+
 static const CmdLutElement kCmdLut[BROTLI_NUM_COMMAND_SYMBOLS] = {
-  { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 }, 
-  { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 }, 
-  { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 }, 
-  { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 }, 
-  { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 }, 
-  { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 }, 
-  { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 }, 
-  { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 }, 
-  { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 }, 
-  { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 }, 
-  { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 }, 
-  { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 }, 
-  { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 }, 
-  { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 }, 
-  { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 }, 
-  { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 }, 
-  { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 }, 
-  { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 }, 
-  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 }, 
-  { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 }, 
-  { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 }, 
-  { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 }, 
-  { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 }, 
-  { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 }, 
-  { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 }, 
-  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 }, 
-  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a }, 
-  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c }, 
-  { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e }, 
-  { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e }, 
-  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 }, 
-  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 }, 
-  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a }, 
-  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c }, 
-  { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e }, 
-  { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e }, 
-  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 }, 
-  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 }, 
-  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a }, 
-  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c }, 
-  { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e }, 
-  { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e }, 
-  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 }, 
-  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 }, 
-  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a }, 
-  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c }, 
-  { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e }, 
-  { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e }, 
-  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 }, 
-  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 }, 
-  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a }, 
-  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c }, 
-  { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e }, 
-  { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e }, 
-  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 }, 
-  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 }, 
-  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a }, 
-  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c }, 
-  { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e }, 
-  { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 }, 
-  { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e }, 
-  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 }, 
-  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 }, 
-  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a }, 
-  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c }, 
-  { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e }, 
-  { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 }, 
-  { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 }, 
-  { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e }, 
-  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 }, 
-  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 }, 
-  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a }, 
-  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c }, 
-  { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e }, 
-  { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 }, 
-  { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 }, 
-  { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e }, 
-  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 }, 
-  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 }, 
-  { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 }, 
-  { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 }, 
-  { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 }, 
-  { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 }, 
-  { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 }, 
-  { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 }, 
-  { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 }, 
-  { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 }, 
-  { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 }, 
-  { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 }, 
-  { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 }, 
-  { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 }, 
-  { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 }, 
-  { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 }, 
-  { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 }, 
-  { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 }, 
-  { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 }, 
-  { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 }, 
-  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 }, 
-  { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 }, 
-  { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 }, 
-  { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 }, 
-  { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 }, 
-  { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 }, 
-  { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 }, 
-  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 }, 
-  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a }, 
-  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c }, 
-  { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e }, 
-  { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e }, 
-  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 }, 
-  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 }, 
-  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a }, 
-  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c }, 
-  { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e }, 
-  { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e }, 
-  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 }, 
-  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 }, 
-  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a }, 
-  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c }, 
-  { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e }, 
-  { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e }, 
-  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 }, 
-  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 }, 
-  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a }, 
-  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c }, 
-  { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e }, 
-  { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e }, 
-  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 }, 
-  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 }, 
-  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a }, 
-  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c }, 
-  { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e }, 
-  { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e }, 
-  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 }, 
-  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 }, 
-  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a }, 
-  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c }, 
-  { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e }, 
-  { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 }, 
-  { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e }, 
-  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 }, 
-  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 }, 
-  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a }, 
-  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c }, 
-  { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e }, 
-  { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 }, 
-  { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 }, 
-  { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e }, 
-  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 }, 
-  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 }, 
-  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a }, 
-  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c }, 
-  { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e }, 
-  { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 }, 
-  { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 }, 
-  { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e }, 
-  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 }, 
-  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 }, 
-  { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 }, 
-  { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 }, 
-  { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 }, 
-  { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 }, 
-  { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 }, 
-  { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 }, 
-  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 }, 
-  { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 }, 
-  { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 }, 
-  { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 }, 
-  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 }, 
-  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 }, 
-  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 }, 
-  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 }, 
-  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 }, 
-  { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 }, 
-  { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 }, 
-  { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 }, 
-  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 }, 
-  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 }, 
-  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 }, 
-  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 }, 
-  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 }, 
-  { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 }, 
-  { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 }, 
-  { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 }, 
-  { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 }, 
-  { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 }, 
-  { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 }, 
-  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 }, 
-  { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 }, 
-  { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 }, 
-  { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 }, 
-  { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 }, 
-  { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 }, 
-  { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 }, 
-  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 }, 
-  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a }, 
-  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c }, 
-  { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e }, 
-  { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 }, 
-  { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 }, 
-  { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e }, 
-  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 }, 
-  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 }, 
-  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a }, 
-  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c }, 
-  { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e }, 
-  { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 }, 
-  { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 }, 
-  { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e }, 
-  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 }, 
-  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 }, 
-  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a }, 
-  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c }, 
-  { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e }, 
-  { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 }, 
-  { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 }, 
-  { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e }, 
-  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 }, 
-  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 }, 
-  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a }, 
-  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c }, 
-  { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e }, 
-  { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 }, 
-  { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 }, 
-  { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e }, 
-  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 }, 
-  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 }, 
-  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a }, 
-  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c }, 
-  { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e }, 
-  { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 }, 
-  { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 }, 
-  { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e }, 
-  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 }, 
-  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 }, 
-  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a }, 
-  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c }, 
-  { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e }, 
-  { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 }, 
-  { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 }, 
-  { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e }, 
-  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 }, 
-  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 }, 
-  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a }, 
-  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c }, 
-  { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e }, 
-  { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 }, 
-  { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 }, 
-  { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e }, 
-  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 }, 
-  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 }, 
-  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a }, 
-  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c }, 
-  { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e }, 
-  { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 }, 
-  { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 }, 
-  { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e }, 
-  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 }, 
-  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 }, 
-  { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 }, 
-  { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 }, 
-  { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 }, 
-  { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 }, 
-  { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 }, 
-  { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 }, 
-  { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 }, 
-  { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 }, 
-  { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 }, 
-  { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 }, 
-  { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 }, 
-  { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 }, 
-  { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 }, 
-  { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 }, 
-  { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 }, 
-  { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 }, 
-  { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 }, 
-  { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 }, 
-  { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 }, 
-  { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 }, 
-  { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 }, 
-  { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 }, 
-  { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 }, 
-  { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 }, 
-  { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 }, 
-  { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 }, 
-  { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 }, 
-  { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 }, 
-  { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 }, 
-  { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 }, 
-  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 }, 
-  { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 }, 
-  { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 }, 
-  { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 }, 
-  { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 }, 
-  { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 }, 
-  { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 }, 
-  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 }, 
-  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 }, 
-  { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 }, 
-  { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 }, 
-  { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 }, 
-  { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 }, 
-  { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 }, 
-  { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 }, 
-  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 }, 
-  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 }, 
-  { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 }, 
-  { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 }, 
-  { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 }, 
-  { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 }, 
-  { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 }, 
-  { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 }, 
-  { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 }, 
-  { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 }, 
-  { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 }, 
-  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 }, 
-  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 }, 
-  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 }, 
-  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 }, 
-  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 }, 
-  { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 }, 
-  { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 }, 
-  { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 }, 
-  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 }, 
-  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 }, 
-  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 }, 
-  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 }, 
-  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 }, 
-  { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 }, 
-  { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 }, 
-  { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 }, 
-  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 }, 
-  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 }, 
-  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 }, 
-  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 }, 
-  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 }, 
-  { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 }, 
-  { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 }, 
-  { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 }, 
-  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 }, 
-  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 }, 
-  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 }, 
-  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 }, 
-  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 }, 
-  { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 }, 
-  { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 }, 
-  { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 }, 
-  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 }, 
-  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 }, 
-  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 }, 
-  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 }, 
-  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 }, 
-  { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 }, 
-  { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 }, 
-  { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 }, 
-  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 }, 
-  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 }, 
-  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 }, 
-  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 }, 
-  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 }, 
-  { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 }, 
-  { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 }, 
-  { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 }, 
-  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 }, 
-  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 }, 
-  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 }, 
-  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 }, 
-  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 }, 
-  { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 }, 
-  { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 }, 
-  { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 }, 
-  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 }, 
-  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 }, 
-  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 }, 
-  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 }, 
-  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 }, 
-  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 }, 
-  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 }, 
-  { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 }, 
-  { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 }, 
-  { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 }, 
-  { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 }, 
-  { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 }, 
-  { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 }, 
-  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 }, 
-  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 }, 
-  { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 }, 
-  { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 }, 
-  { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 }, 
-  { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 }, 
-  { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 }, 
-  { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 }, 
-  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 }, 
-  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 }, 
-  { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 }, 
-  { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 }, 
-  { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 }, 
-  { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 }, 
-  { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 }, 
-  { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 }, 
-  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 }, 
-  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 }, 
-  { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 }, 
-  { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 }, 
-  { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 }, 
-  { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 }, 
-  { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 }, 
-  { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 }, 
-  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 }, 
-  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 }, 
-  { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 }, 
-  { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 }, 
-  { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 }, 
-  { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 }, 
-  { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 }, 
-  { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 }, 
-  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 }, 
-  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 }, 
-  { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 }, 
-  { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 }, 
-  { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 }, 
-  { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 }, 
-  { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 }, 
-  { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 }, 
-  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 }, 
-  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 }, 
-  { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 }, 
-  { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 }, 
-  { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 }, 
-  { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 }, 
-  { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 }, 
-  { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 }, 
-  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 }, 
-  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 }, 
-  { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 }, 
-  { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 }, 
-  { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 }, 
-  { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 }, 
-  { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 }, 
-  { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 }, 
-  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a }, 
-  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c }, 
-  { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e }, 
-  { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 }, 
-  { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 }, 
-  { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e }, 
-  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 }, 
-  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 }, 
-  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a }, 
-  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c }, 
-  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e }, 
-  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 }, 
-  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 }, 
-  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e }, 
-  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 }, 
-  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 }, 
-  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a }, 
-  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c }, 
-  { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e }, 
-  { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 }, 
-  { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 }, 
-  { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e }, 
-  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 }, 
-  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 }, 
-  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a }, 
-  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c }, 
-  { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e }, 
-  { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 }, 
-  { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 }, 
-  { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e }, 
-  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 }, 
-  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 }, 
-  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a }, 
-  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c }, 
-  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e }, 
-  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 }, 
-  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 }, 
-  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e }, 
-  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 }, 
-  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 }, 
-  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a }, 
-  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c }, 
-  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e }, 
-  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 }, 
-  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 }, 
-  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e }, 
-  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 }, 
-  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 }, 
-  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a }, 
-  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c }, 
-  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e }, 
-  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 }, 
-  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 }, 
-  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e }, 
-  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 }, 
-  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 }, 
-  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a }, 
-  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c }, 
-  { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e }, 
-  { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 }, 
-  { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 }, 
-  { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e }, 
-  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 }, 
-  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 }, 
-  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 }, 
-  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 }, 
-  { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 }, 
-  { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 }, 
-  { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 }, 
-  { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 }, 
-  { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 }, 
-  { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 }, 
-  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 }, 
-  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 }, 
-  { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 }, 
-  { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 }, 
-  { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 }, 
-  { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 }, 
-  { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 }, 
-  { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 }, 
-  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 }, 
-  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 }, 
-  { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 }, 
-  { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 }, 
-  { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 }, 
-  { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 }, 
-  { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 }, 
-  { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 }, 
-  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 }, 
-  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 }, 
-  { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 }, 
-  { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 }, 
-  { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 }, 
-  { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 }, 
-  { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 }, 
-  { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 }, 
-  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 }, 
-  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 }, 
-  { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 }, 
-  { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 }, 
-  { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 }, 
-  { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 }, 
-  { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 }, 
-  { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 }, 
-  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 }, 
-  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 }, 
-  { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 }, 
-  { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 }, 
-  { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 }, 
-  { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 }, 
-  { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 }, 
-  { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 }, 
-  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 }, 
-  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 }, 
-  { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 }, 
-  { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 }, 
-  { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 }, 
-  { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 }, 
-  { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 }, 
-  { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 }, 
-  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 }, 
-  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 }, 
-  { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 }, 
-  { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 }, 
-  { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 }, 
-  { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 }, 
-  { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 }, 
-  { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 }, 
-}; 
- 
-#endif  /* BROTLI_DEC_PREFIX_H_ */ 
+  { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 },
+  { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 },
+  { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 },
+  { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 },
+  { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 },
+  { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 },
+  { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 },
+  { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 },
+  { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 },
+  { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 },
+  { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 },
+  { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 },
+  { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 },
+  { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 },
+  { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 },
+  { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 },
+  { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 },
+  { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 },
+  { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 },
+  { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 },
+  { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 },
+  { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 },
+  { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 },
+  { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 },
+  { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 },
+  { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 },
+  { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 },
+  { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 },
+  { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 },
+  { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 },
+  { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 },
+  { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 },
+  { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 },
+  { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 },
+  { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 },
+  { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 },
+  { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 },
+  { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 },
+  { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 },
+  { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 },
+  { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 },
+  { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 },
+  { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 },
+  { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 },
+  { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 },
+  { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 },
+  { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 },
+  { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 },
+  { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 },
+  { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 },
+  { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 },
+  { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 },
+  { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 },
+  { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 },
+  { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 },
+  { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 },
+  { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 },
+  { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 },
+  { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 },
+  { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 },
+  { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 },
+  { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 },
+  { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 },
+  { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 },
+  { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 },
+  { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 },
+  { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 },
+  { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 },
+  { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 },
+  { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 },
+  { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 },
+  { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 },
+  { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 },
+  { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 },
+  { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 },
+  { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 },
+};
+
+#endif  /* BROTLI_DEC_PREFIX_H_ */
diff --git a/contrib/libs/brotli/dec/state.c b/contrib/libs/brotli/dec/state.c
index aebdee19c4..e0b37c2dcd 100644
--- a/contrib/libs/brotli/dec/state.c
+++ b/contrib/libs/brotli/dec/state.c
@@ -1,20 +1,20 @@
-/* Copyright 2015 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
- 
+
 #include "./state.h"
- 
+
 #include <stdlib.h>  /* free, malloc */
- 
+
 #include <brotli/types.h>
-#include "./huffman.h" 
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
+#include "./huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
   if (!alloc_func) {
@@ -30,101 +30,101 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
   s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */
 
   BrotliInitBitReader(&s->br);
-  s->state = BROTLI_STATE_UNINITED; 
+  s->state = BROTLI_STATE_UNINITED;
   s->large_window = 0;
-  s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; 
-  s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; 
-  s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; 
-  s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE; 
-  s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE; 
-  s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE; 
+  s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+  s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+  s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+  s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+  s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+  s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
   s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
- 
+
   s->buffer_length = 0;
   s->loop_counter = 0;
   s->pos = 0;
   s->rb_roundtrips = 0;
   s->partial_pos_out = 0;
 
-  s->block_type_trees = NULL; 
-  s->block_len_trees = NULL; 
-  s->ringbuffer = NULL; 
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+  s->ringbuffer = NULL;
   s->ringbuffer_size = 0;
   s->new_ringbuffer_size = 0;
   s->ringbuffer_mask = 0;
- 
-  s->context_map = NULL; 
-  s->context_modes = NULL; 
-  s->dist_context_map = NULL; 
-  s->context_map_slice = NULL; 
-  s->dist_context_map_slice = NULL; 
- 
+
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->dist_context_map_slice = NULL;
+
   s->sub_loop_counter = 0;
 
-  s->literal_hgroup.codes = NULL; 
-  s->literal_hgroup.htrees = NULL; 
-  s->insert_copy_hgroup.codes = NULL; 
-  s->insert_copy_hgroup.htrees = NULL; 
-  s->distance_hgroup.codes = NULL; 
-  s->distance_hgroup.htrees = NULL; 
- 
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+
   s->is_last_metablock = 0;
   s->is_uncompressed = 0;
   s->is_metadata = 0;
   s->should_wrap_ringbuffer = 0;
   s->canny_ringbuffer_allocation = 1;
- 
-  s->window_bits = 0; 
-  s->max_distance = 0; 
-  s->dist_rb[0] = 16; 
-  s->dist_rb[1] = 15; 
-  s->dist_rb[2] = 11; 
-  s->dist_rb[3] = 4; 
-  s->dist_rb_idx = 0; 
-  s->block_type_trees = NULL; 
-  s->block_len_trees = NULL; 
- 
-  /* Make small negative indexes addressable. */ 
-  s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1]; 
- 
+
+  s->window_bits = 0;
+  s->max_distance = 0;
+  s->dist_rb[0] = 16;
+  s->dist_rb[1] = 15;
+  s->dist_rb[2] = 11;
+  s->dist_rb[3] = 4;
+  s->dist_rb_idx = 0;
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+
+  /* Make small negative indexes addressable. */
+  s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
+
   s->mtf_upper_bound = 63;
 
   s->dictionary = BrotliGetDictionary();
   s->transforms = BrotliGetTransforms();
 
   return BROTLI_TRUE;
-} 
- 
+}
+
 void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
-  s->meta_block_remaining_len = 0; 
+  s->meta_block_remaining_len = 0;
   s->block_length[0] = 1U << 24;
   s->block_length[1] = 1U << 24;
   s->block_length[2] = 1U << 24;
-  s->num_block_types[0] = 1; 
-  s->num_block_types[1] = 1; 
-  s->num_block_types[2] = 1; 
-  s->block_type_rb[0] = 1; 
-  s->block_type_rb[1] = 0; 
-  s->block_type_rb[2] = 1; 
-  s->block_type_rb[3] = 0; 
-  s->block_type_rb[4] = 1; 
-  s->block_type_rb[5] = 0; 
-  s->context_map = NULL; 
-  s->context_modes = NULL; 
-  s->dist_context_map = NULL; 
-  s->context_map_slice = NULL; 
-  s->literal_htree = NULL; 
-  s->dist_context_map_slice = NULL; 
-  s->dist_htree_index = 0; 
+  s->num_block_types[0] = 1;
+  s->num_block_types[1] = 1;
+  s->num_block_types[2] = 1;
+  s->block_type_rb[0] = 1;
+  s->block_type_rb[1] = 0;
+  s->block_type_rb[2] = 1;
+  s->block_type_rb[3] = 0;
+  s->block_type_rb[4] = 1;
+  s->block_type_rb[5] = 0;
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->literal_htree = NULL;
+  s->dist_context_map_slice = NULL;
+  s->dist_htree_index = 0;
   s->context_lookup = NULL;
-  s->literal_hgroup.codes = NULL; 
-  s->literal_hgroup.htrees = NULL; 
-  s->insert_copy_hgroup.codes = NULL; 
-  s->insert_copy_hgroup.htrees = NULL; 
-  s->distance_hgroup.codes = NULL; 
-  s->distance_hgroup.htrees = NULL; 
-} 
- 
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+}
+
 void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
   BROTLI_DECODER_FREE(s, s->context_modes);
   BROTLI_DECODER_FREE(s, s->context_map);
@@ -132,15 +132,15 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
   BROTLI_DECODER_FREE(s, s->literal_hgroup.htrees);
   BROTLI_DECODER_FREE(s, s->insert_copy_hgroup.htrees);
   BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees);
-} 
- 
+}
+
 void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
   BrotliDecoderStateCleanupAfterMetablock(s);
- 
+
   BROTLI_DECODER_FREE(s, s->ringbuffer);
   BROTLI_DECODER_FREE(s, s->block_type_trees);
-} 
- 
+}
+
 BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
     HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol,
     uint32_t ntrees) {
@@ -159,6 +159,6 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
   return !!p;
 }
 
-#if defined(__cplusplus) || defined(c_plusplus) 
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
+#endif
diff --git a/contrib/libs/brotli/dec/state.h b/contrib/libs/brotli/dec/state.h
index b21553ee72..d28b63920e 100644
--- a/contrib/libs/brotli/dec/state.h
+++ b/contrib/libs/brotli/dec/state.h
@@ -1,110 +1,110 @@
-/* Copyright 2015 Google Inc. All Rights Reserved. 
- 
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/ 
- 
-/* Brotli state for partial streaming decoding. */ 
- 
-#ifndef BROTLI_DEC_STATE_H_ 
-#define BROTLI_DEC_STATE_H_ 
- 
+*/
+
+/* Brotli state for partial streaming decoding. */
+
+#ifndef BROTLI_DEC_STATE_H_
+#define BROTLI_DEC_STATE_H_
+
 #include "../common/constants.h"
 #include "../common/dictionary.h"
 #include "../common/platform.h"
 #include "../common/transform.h"
 #include <brotli/types.h>
-#include "./bit_reader.h" 
-#include "./huffman.h" 
- 
-#if defined(__cplusplus) || defined(c_plusplus) 
-extern "C" { 
-#endif 
- 
-typedef enum { 
-  BROTLI_STATE_UNINITED, 
+#include "./bit_reader.h"
+#include "./huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef enum {
+  BROTLI_STATE_UNINITED,
   BROTLI_STATE_LARGE_WINDOW_BITS,
   BROTLI_STATE_INITIALIZE,
-  BROTLI_STATE_METABLOCK_BEGIN, 
-  BROTLI_STATE_METABLOCK_HEADER, 
+  BROTLI_STATE_METABLOCK_BEGIN,
+  BROTLI_STATE_METABLOCK_HEADER,
   BROTLI_STATE_METABLOCK_HEADER_2,
-  BROTLI_STATE_CONTEXT_MODES, 
-  BROTLI_STATE_COMMAND_BEGIN, 
-  BROTLI_STATE_COMMAND_INNER, 
+  BROTLI_STATE_CONTEXT_MODES,
+  BROTLI_STATE_COMMAND_BEGIN,
+  BROTLI_STATE_COMMAND_INNER,
   BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
   BROTLI_STATE_COMMAND_POST_WRAP_COPY,
-  BROTLI_STATE_UNCOMPRESSED, 
-  BROTLI_STATE_METADATA, 
-  BROTLI_STATE_COMMAND_INNER_WRITE, 
-  BROTLI_STATE_METABLOCK_DONE, 
-  BROTLI_STATE_COMMAND_POST_WRITE_1, 
-  BROTLI_STATE_COMMAND_POST_WRITE_2, 
-  BROTLI_STATE_HUFFMAN_CODE_0, 
-  BROTLI_STATE_HUFFMAN_CODE_1, 
-  BROTLI_STATE_HUFFMAN_CODE_2, 
-  BROTLI_STATE_HUFFMAN_CODE_3, 
-  BROTLI_STATE_CONTEXT_MAP_1, 
-  BROTLI_STATE_CONTEXT_MAP_2, 
-  BROTLI_STATE_TREE_GROUP, 
-  BROTLI_STATE_DONE 
-} BrotliRunningState; 
- 
-typedef enum { 
-  BROTLI_STATE_METABLOCK_HEADER_NONE, 
-  BROTLI_STATE_METABLOCK_HEADER_EMPTY, 
-  BROTLI_STATE_METABLOCK_HEADER_NIBBLES, 
-  BROTLI_STATE_METABLOCK_HEADER_SIZE, 
-  BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED, 
-  BROTLI_STATE_METABLOCK_HEADER_RESERVED, 
-  BROTLI_STATE_METABLOCK_HEADER_BYTES, 
-  BROTLI_STATE_METABLOCK_HEADER_METADATA 
-} BrotliRunningMetablockHeaderState; 
- 
-typedef enum { 
-  BROTLI_STATE_UNCOMPRESSED_NONE, 
+  BROTLI_STATE_UNCOMPRESSED,
+  BROTLI_STATE_METADATA,
+  BROTLI_STATE_COMMAND_INNER_WRITE,
+  BROTLI_STATE_METABLOCK_DONE,
+  BROTLI_STATE_COMMAND_POST_WRITE_1,
+  BROTLI_STATE_COMMAND_POST_WRITE_2,
+  BROTLI_STATE_HUFFMAN_CODE_0,
+  BROTLI_STATE_HUFFMAN_CODE_1,
+  BROTLI_STATE_HUFFMAN_CODE_2,
+  BROTLI_STATE_HUFFMAN_CODE_3,
+  BROTLI_STATE_CONTEXT_MAP_1,
+  BROTLI_STATE_CONTEXT_MAP_2,
+  BROTLI_STATE_TREE_GROUP,
+  BROTLI_STATE_DONE
+} BrotliRunningState;
+
+typedef enum {
+  BROTLI_STATE_METABLOCK_HEADER_NONE,
+  BROTLI_STATE_METABLOCK_HEADER_EMPTY,
+  BROTLI_STATE_METABLOCK_HEADER_NIBBLES,
+  BROTLI_STATE_METABLOCK_HEADER_SIZE,
+  BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
+  BROTLI_STATE_METABLOCK_HEADER_RESERVED,
+  BROTLI_STATE_METABLOCK_HEADER_BYTES,
+  BROTLI_STATE_METABLOCK_HEADER_METADATA
+} BrotliRunningMetablockHeaderState;
+
+typedef enum {
+  BROTLI_STATE_UNCOMPRESSED_NONE,
   BROTLI_STATE_UNCOMPRESSED_WRITE
-} BrotliRunningUncompressedState; 
- 
-typedef enum { 
-  BROTLI_STATE_TREE_GROUP_NONE, 
-  BROTLI_STATE_TREE_GROUP_LOOP 
-} BrotliRunningTreeGroupState; 
- 
-typedef enum { 
-  BROTLI_STATE_CONTEXT_MAP_NONE, 
-  BROTLI_STATE_CONTEXT_MAP_READ_PREFIX, 
-  BROTLI_STATE_CONTEXT_MAP_HUFFMAN, 
+} BrotliRunningUncompressedState;
+
+typedef enum {
+  BROTLI_STATE_TREE_GROUP_NONE,
+  BROTLI_STATE_TREE_GROUP_LOOP
+} BrotliRunningTreeGroupState;
+
+typedef enum {
+  BROTLI_STATE_CONTEXT_MAP_NONE,
+  BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
+  BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
   BROTLI_STATE_CONTEXT_MAP_DECODE,
   BROTLI_STATE_CONTEXT_MAP_TRANSFORM
-} BrotliRunningContextMapState; 
- 
-typedef enum { 
-  BROTLI_STATE_HUFFMAN_NONE, 
+} BrotliRunningContextMapState;
+
+typedef enum {
+  BROTLI_STATE_HUFFMAN_NONE,
   BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
   BROTLI_STATE_HUFFMAN_SIMPLE_READ,
   BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
   BROTLI_STATE_HUFFMAN_COMPLEX,
-  BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS 
-} BrotliRunningHuffmanState; 
- 
-typedef enum { 
-  BROTLI_STATE_DECODE_UINT8_NONE, 
-  BROTLI_STATE_DECODE_UINT8_SHORT, 
-  BROTLI_STATE_DECODE_UINT8_LONG 
-} BrotliRunningDecodeUint8State; 
- 
+  BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
+} BrotliRunningHuffmanState;
+
+typedef enum {
+  BROTLI_STATE_DECODE_UINT8_NONE,
+  BROTLI_STATE_DECODE_UINT8_SHORT,
+  BROTLI_STATE_DECODE_UINT8_LONG
+} BrotliRunningDecodeUint8State;
+
 typedef enum {
   BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
   BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
 } BrotliRunningReadBlockLengthState;
 
 struct BrotliDecoderStateStruct {
-  BrotliRunningState state; 
+  BrotliRunningState state;
 
-  /* This counter is reused for several disjoint loops. */ 
+  /* This counter is reused for several disjoint loops. */
   int loop_counter;
 
-  BrotliBitReader br; 
+  BrotliBitReader br;
 
   brotli_alloc_func alloc_func;
   brotli_free_func free_func;
@@ -117,99 +117,99 @@ struct BrotliDecoderStateStruct {
   } buffer;
   uint32_t buffer_length;
 
-  int pos; 
-  int max_backward_distance; 
-  int max_distance; 
-  int ringbuffer_size; 
-  int ringbuffer_mask; 
-  int dist_rb_idx; 
-  int dist_rb[4]; 
+  int pos;
+  int max_backward_distance;
+  int max_distance;
+  int ringbuffer_size;
+  int ringbuffer_mask;
+  int dist_rb_idx;
+  int dist_rb[4];
   int error_code;
   uint32_t sub_loop_counter;
-  uint8_t* ringbuffer; 
-  uint8_t* ringbuffer_end; 
-  HuffmanCode* htree_command; 
+  uint8_t* ringbuffer;
+  uint8_t* ringbuffer_end;
+  HuffmanCode* htree_command;
   const uint8_t* context_lookup;
-  uint8_t* context_map_slice; 
-  uint8_t* dist_context_map_slice; 
- 
+  uint8_t* context_map_slice;
+  uint8_t* dist_context_map_slice;
+
   /* This ring buffer holds a few past copy distances that will be used by
      some special distance codes. */
-  HuffmanTreeGroup literal_hgroup; 
-  HuffmanTreeGroup insert_copy_hgroup; 
-  HuffmanTreeGroup distance_hgroup; 
-  HuffmanCode* block_type_trees; 
-  HuffmanCode* block_len_trees; 
-  /* This is true if the literal context map histogram type always matches the 
+  HuffmanTreeGroup literal_hgroup;
+  HuffmanTreeGroup insert_copy_hgroup;
+  HuffmanTreeGroup distance_hgroup;
+  HuffmanCode* block_type_trees;
+  HuffmanCode* block_len_trees;
+  /* This is true if the literal context map histogram type always matches the
      block type. It is then not needed to keep the context (faster decoding). */
-  int trivial_literal_context; 
+  int trivial_literal_context;
   /* Distance context is actual after command is decoded and before distance is
      computed. After distance computation it is used as a temporary variable. */
-  int distance_context; 
-  int meta_block_remaining_len; 
+  int distance_context;
+  int meta_block_remaining_len;
   uint32_t block_length_index;
   uint32_t block_length[3];
   uint32_t num_block_types[3];
   uint32_t block_type_rb[6];
   uint32_t distance_postfix_bits;
   uint32_t num_direct_distance_codes;
-  int distance_postfix_mask; 
+  int distance_postfix_mask;
   uint32_t num_dist_htrees;
-  uint8_t* dist_context_map; 
+  uint8_t* dist_context_map;
   HuffmanCode* literal_htree;
-  uint8_t dist_htree_index; 
+  uint8_t dist_htree_index;
   uint32_t repeat_code_len;
   uint32_t prev_code_len;
- 
-  int copy_length; 
-  int distance_code; 
- 
+
+  int copy_length;
+  int distance_code;
+
   /* For partial write operations. */
   size_t rb_roundtrips;  /* how many times we went around the ring-buffer */
   size_t partial_pos_out;  /* how much output to the user in total */
- 
+
   /* For ReadHuffmanCode. */
-  uint32_t symbol; 
-  uint32_t repeat; 
-  uint32_t space; 
- 
-  HuffmanCode table[32]; 
+  uint32_t symbol;
+  uint32_t repeat;
+  uint32_t space;
+
+  HuffmanCode table[32];
   /* List of heads of symbol chains. */
-  uint16_t* symbol_lists; 
-  /* Storage from symbol_lists. */ 
-  uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 + 
+  uint16_t* symbol_lists;
+  /* Storage from symbol_lists. */
+  uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
                                BROTLI_NUM_COMMAND_SYMBOLS];
-  /* Tails of symbol chains. */ 
-  int next_symbol[32]; 
+  /* Tails of symbol chains. */
+  int next_symbol[32];
   uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
   /* Population counts for the code lengths. */
-  uint16_t code_length_histo[16]; 
- 
+  uint16_t code_length_histo[16];
+
   /* For HuffmanTreeGroupDecode. */
-  int htree_index; 
-  HuffmanCode* next; 
- 
+  int htree_index;
+  HuffmanCode* next;
+
   /* For DecodeContextMap. */
   uint32_t context_index;
   uint32_t max_run_length_prefix;
   uint32_t code;
   HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
- 
+
   /* For InverseMoveToFrontTransform. */
   uint32_t mtf_upper_bound;
   uint32_t mtf[64 + 1];
- 
+
   /* Less used attributes are at the end of this struct. */
 
   /* States inside function calls. */
-  BrotliRunningMetablockHeaderState substate_metablock_header; 
-  BrotliRunningTreeGroupState substate_tree_group; 
-  BrotliRunningContextMapState substate_context_map; 
-  BrotliRunningUncompressedState substate_uncompressed; 
-  BrotliRunningHuffmanState substate_huffman; 
-  BrotliRunningDecodeUint8State substate_decode_uint8; 
+  BrotliRunningMetablockHeaderState substate_metablock_header;
+  BrotliRunningTreeGroupState substate_tree_group;
+  BrotliRunningContextMapState substate_context_map;
+  BrotliRunningUncompressedState substate_uncompressed;
+  BrotliRunningHuffmanState substate_huffman;
+  BrotliRunningDecodeUint8State substate_decode_uint8;
   BrotliRunningReadBlockLengthState substate_read_block_length;
- 
+
   unsigned int is_last_metablock : 1;
   unsigned int is_uncompressed : 1;
   unsigned int is_metadata : 1;
@@ -217,20 +217,20 @@ struct BrotliDecoderStateStruct {
   unsigned int canny_ringbuffer_allocation : 1;
   unsigned int large_window : 1;
   unsigned int size_nibbles : 8;
-  uint32_t window_bits; 
- 
+  uint32_t window_bits;
+
   int new_ringbuffer_size;
- 
+
   uint32_t num_literal_htrees;
-  uint8_t* context_map; 
-  uint8_t* context_modes; 
+  uint8_t* context_map;
+  uint8_t* context_modes;
 
   const BrotliDictionary* dictionary;
   const BrotliTransforms* transforms;
- 
+
   uint32_t trivial_literal_contexts[8];  /* 256 bits */
 };
- 
+
 typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
 #define BrotliDecoderState BrotliDecoderStateInternal
 
@@ -251,8 +251,8 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
   X = NULL;                                  \
 }
 
-#if defined(__cplusplus) || defined(c_plusplus) 
+#if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
-#endif 
- 
-#endif  /* BROTLI_DEC_STATE_H_ */ 
+#endif
+
+#endif  /* BROTLI_DEC_STATE_H_ */
diff --git a/contrib/libs/brotli/dec/ya.make b/contrib/libs/brotli/dec/ya.make
index c510ee5a5d..0f482f36ed 100644
--- a/contrib/libs/brotli/dec/ya.make
+++ b/contrib/libs/brotli/dec/ya.make
@@ -1,6 +1,6 @@
-LIBRARY() 
- 
-LICENSE(MIT) 
+LIBRARY()
+
+LICENSE(MIT)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
@@ -9,22 +9,22 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
-NO_UTIL() 
 
-NO_COMPILER_WARNINGS() 
- 
+NO_UTIL()
+
+NO_COMPILER_WARNINGS()
+
 ADDINCL(GLOBAL contrib/libs/brotli/include)
 
 PEERDIR(
     contrib/libs/brotli/common
 )
 
-SRCS( 
-    bit_reader.c 
-    decode.c 
-    huffman.c 
-    state.c 
-) 
- 
-END() 
+SRCS(
+    bit_reader.c
+    decode.c
+    huffman.c
+    state.c
+)
+
+END()
diff --git a/contrib/libs/brotli/enc/backward_references.h b/contrib/libs/brotli/enc/backward_references.h
index 7b3c04a8ff..3a4146647c 100644
--- a/contrib/libs/brotli/enc/backward_references.h
+++ b/contrib/libs/brotli/enc/backward_references.h
@@ -1,26 +1,26 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Function to find backward reference copies. */
 
-#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ 
-#define BROTLI_ENC_BACKWARD_REFERENCES_H_ 
- 
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
 #include "../common/constants.h"
 #include "../common/dictionary.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./command.h"
-#include "./hash.h" 
+#include "./hash.h"
 #include "./quality.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* "commands" points to the next output command to write to, "*num_commands" is
    initially the total amount of commands output by previous
    CreateBackwardReferences calls, and must be incremented by the amount written
@@ -30,9 +30,9 @@ BROTLI_INTERNAL void BrotliCreateBackwardReferences(
     size_t ringbuffer_mask, const BrotliEncoderParams* params,
     HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
     Command* commands, size_t* num_commands, size_t* num_literals);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
diff --git a/contrib/libs/brotli/enc/bit_cost.h b/contrib/libs/brotli/enc/bit_cost.h
index 30324d38c7..6586469e62 100644
--- a/contrib/libs/brotli/enc/bit_cost.h
+++ b/contrib/libs/brotli/enc/bit_cost.h
@@ -1,63 +1,63 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Functions to estimate the bit cost of Huffman trees. */
 
-#ifndef BROTLI_ENC_BIT_COST_H_ 
-#define BROTLI_ENC_BIT_COST_H_ 
- 
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./fast_log.h" 
+#include "./fast_log.h"
 #include "./histogram.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 static BROTLI_INLINE double ShannonEntropy(
     const uint32_t* population, size_t size, size_t* total) {
   size_t sum = 0;
-  double retval = 0; 
+  double retval = 0;
   const uint32_t* population_end = population + size;
   size_t p;
-  if (size & 1) { 
-    goto odd_number_of_elements_left; 
-  } 
-  while (population < population_end) { 
-    p = *population++; 
-    sum += p; 
+  if (size & 1) {
+    goto odd_number_of_elements_left;
+  }
+  while (population < population_end) {
+    p = *population++;
+    sum += p;
     retval -= (double)p * FastLog2(p);
- odd_number_of_elements_left: 
-    p = *population++; 
-    sum += p; 
+ odd_number_of_elements_left:
+    p = *population++;
+    sum += p;
     retval -= (double)p * FastLog2(p);
-  } 
+  }
   if (sum) retval += (double)sum * FastLog2(sum);
-  *total = sum; 
-  return retval; 
-} 
- 
+  *total = sum;
+  return retval;
+}
+
 static BROTLI_INLINE double BitsEntropy(
     const uint32_t* population, size_t size) {
   size_t sum;
-  double retval = ShannonEntropy(population, size, &sum); 
-  if (retval < sum) { 
+  double retval = ShannonEntropy(population, size, &sum);
+  if (retval < sum) {
     /* At least one bit per literal is needed. */
     retval = (double)sum;
-  } 
-  return retval; 
-} 
- 
+  }
+  return retval;
+}
+
 BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
 BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
 BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_BIT_COST_H_ */
diff --git a/contrib/libs/brotli/enc/block_splitter.h b/contrib/libs/brotli/enc/block_splitter.h
index 2fd1cb417a..a5e006c4b3 100644
--- a/contrib/libs/brotli/enc/block_splitter.h
+++ b/contrib/libs/brotli/enc/block_splitter.h
@@ -1,38 +1,38 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Block split point selection utilities. */
 
-#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_ 
-#define BROTLI_ENC_BLOCK_SPLITTER_H_ 
- 
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./command.h" 
+#include "./command.h"
 #include "./memory.h"
 #include "./quality.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 typedef struct BlockSplit {
   size_t num_types;  /* Amount of distinct types */
   size_t num_blocks;  /* Amount of values in types and length */
   uint8_t* types;
   uint32_t* lengths;
- 
+
   size_t types_alloc_size;
   size_t lengths_alloc_size;
 } BlockSplit;
- 
+
 BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
 BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
                                              BlockSplit* self);
- 
+
 BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
                                       const Command* cmds,
                                       const size_t num_commands,
@@ -43,9 +43,9 @@ BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
                                       BlockSplit* literal_split,
                                       BlockSplit* insert_and_copy_split,
                                       BlockSplit* dist_split);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
diff --git a/contrib/libs/brotli/enc/brotli_bit_stream.h b/contrib/libs/brotli/enc/brotli_bit_stream.h
index 42663c6704..2ed703bf79 100644
--- a/contrib/libs/brotli/enc/brotli_bit_stream.h
+++ b/contrib/libs/brotli/enc/brotli_bit_stream.h
@@ -1,5 +1,5 @@
 /* Copyright 2014 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
@@ -13,32 +13,32 @@
    is called "storage" and the index to the bit is called storage_ix
    in function arguments. */
 
-#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ 
-#define BROTLI_ENC_BROTLI_BIT_STREAM_H_ 
- 
+#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+
 #include "../common/context.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./command.h"
 #include "./entropy_encode.h"
 #include "./memory.h"
-#include "./metablock.h" 
- 
+#include "./metablock.h"
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* All Store functions here will use a storage_ix, which is always the bit
    position for the current storage. */
- 
+
 BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
     HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
- 
+
 BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
     MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
     const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
     uint8_t* storage);
- 
+
 /* REQUIRES: length > 0 */
 /* REQUIRES: length <= (1 << 24) */
 BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
@@ -47,7 +47,7 @@ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
     const BrotliEncoderParams* params, ContextType literal_context_mode,
     const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
     size_t* storage_ix, uint8_t* storage);
- 
+
 /* Stores the meta-block without doing any block splitting, just collects
    one histogram per block category and uses that for entropy coding.
    REQUIRES: length > 0
@@ -57,7 +57,7 @@ BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
     BROTLI_BOOL is_last, const BrotliEncoderParams* params,
     const Command* commands, size_t n_commands,
     size_t* storage_ix, uint8_t* storage);
- 
+
 /* Same as above, but uses static prefix codes for histograms with a only a few
    symbols, and uses static code length prefix codes for all other histograms.
    REQUIRES: length > 0
@@ -67,7 +67,7 @@ BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
     BROTLI_BOOL is_last, const BrotliEncoderParams* params,
     const Command* commands, size_t n_commands,
     size_t* storage_ix, uint8_t* storage);
- 
+
 /* This is for storing uncompressed blocks (simple raw storage of
    bytes-as-bytes).
    REQUIRES: length > 0
@@ -76,9 +76,9 @@ BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
     BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
     size_t position, size_t mask, size_t len,
     size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
diff --git a/contrib/libs/brotli/enc/cluster.h b/contrib/libs/brotli/enc/cluster.h
index daf573dc65..bb26124d24 100644
--- a/contrib/libs/brotli/enc/cluster.h
+++ b/contrib/libs/brotli/enc/cluster.h
@@ -1,48 +1,48 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Functions for clustering similar histograms together. */
 
-#ifndef BROTLI_ENC_CLUSTER_H_ 
-#define BROTLI_ENC_CLUSTER_H_ 
- 
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./histogram.h" 
+#include "./histogram.h"
 #include "./memory.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 typedef struct HistogramPair {
   uint32_t idx1;
   uint32_t idx2;
-  double cost_combo; 
-  double cost_diff; 
+  double cost_combo;
+  double cost_diff;
 } HistogramPair;
- 
+
 #define CODE(X) /* Declaration */;
- 
+
 #define FN(X) X ## Literal
 #include "./cluster_inc.h"  /* NOLINT(build/include) */
 #undef FN
- 
+
 #define FN(X) X ## Command
 #include "./cluster_inc.h"  /* NOLINT(build/include) */
 #undef FN
- 
+
 #define FN(X) X ## Distance
 #include "./cluster_inc.h"  /* NOLINT(build/include) */
 #undef FN
- 
+
 #undef CODE
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_CLUSTER_H_ */
diff --git a/contrib/libs/brotli/enc/command.h b/contrib/libs/brotli/enc/command.h
index 181510cd67..1aac85689b 100644
--- a/contrib/libs/brotli/enc/command.h
+++ b/contrib/libs/brotli/enc/command.h
@@ -1,25 +1,25 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* This class models a sequence of literals and a backward reference copy. */
 
-#ifndef BROTLI_ENC_COMMAND_H_ 
-#define BROTLI_ENC_COMMAND_H_ 
- 
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
 #include "../common/constants.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./fast_log.h" 
+#include "./fast_log.h"
 #include "./params.h"
-#include "./prefix.h" 
- 
+#include "./prefix.h"
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
     66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
 static uint32_t kInsExtra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,
@@ -28,44 +28,44 @@ static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
     38, 54,  70, 102, 134, 198, 326,   582, 1094,  2118 };
 static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
      4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };
- 
+
 static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
-  if (insertlen < 6) { 
+  if (insertlen < 6) {
     return (uint16_t)insertlen;
-  } else if (insertlen < 130) { 
+  } else if (insertlen < 130) {
     uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
     return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
-  } else if (insertlen < 2114) { 
+  } else if (insertlen < 2114) {
     return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
-  } else if (insertlen < 6210) { 
+  } else if (insertlen < 6210) {
     return 21u;
-  } else if (insertlen < 22594) { 
+  } else if (insertlen < 22594) {
     return 22u;
-  } else { 
+  } else {
     return 23u;
-  } 
-} 
- 
+  }
+}
+
 static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
-  if (copylen < 10) { 
+  if (copylen < 10) {
     return (uint16_t)(copylen - 2);
-  } else if (copylen < 134) { 
+  } else if (copylen < 134) {
     uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
     return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
-  } else if (copylen < 2118) { 
+  } else if (copylen < 2118) {
     return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
-  } else { 
+  } else {
     return 23u;
-  } 
-} 
- 
+  }
+}
+
 static BROTLI_INLINE uint16_t CombineLengthCodes(
     uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) {
   uint16_t bits64 =
       (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u));
   if (use_last_distance && inscode < 8u && copycode < 16u) {
     return (copycode < 8u) ? bits64 : (bits64 | 64u);
-  } else { 
+  } else {
     /* Specification: 5 Encoding of ... (last table) */
     /* offset = 2 * index, where index is in range [0..8] */
     uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u));
@@ -77,29 +77,29 @@ static BROTLI_INLINE uint16_t CombineLengthCodes(
        Magic constant is shifted 6 bits left, to avoid final multiplication. */
     offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u);
     return (uint16_t)(offset | bits64);
-  } 
-} 
- 
+  }
+}
+
 static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
                                         BROTLI_BOOL use_last_distance,
                                         uint16_t* code) {
   uint16_t inscode = GetInsertLengthCode(insertlen);
   uint16_t copycode = GetCopyLengthCode(copylen);
   *code = CombineLengthCodes(inscode, copycode, use_last_distance);
-} 
- 
+}
+
 static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
   return kInsBase[inscode];
 }
- 
+
 static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
   return kInsExtra[inscode];
 }
- 
+
 static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
   return kCopyBase[copycode];
 }
- 
+
 static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
   return kCopyExtra[copycode];
 }
@@ -161,28 +161,28 @@ static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
     uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
     return ((offset + extra) << dist->distance_postfix_bits) + lcode +
         dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
-  } 
+  }
 }
- 
+
 static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
   uint32_t r = self->cmd_prefix_ >> 6;
   uint32_t c = self->cmd_prefix_ & 7;
   if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
     return c;
-  } 
+  }
   return 3;
 }
- 
+
 static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
   return self->copy_len_ & 0x1FFFFFF;
 }
- 
+
 static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
   uint32_t modifier = self->copy_len_ >> 25;
   int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
   return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
 }
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
diff --git a/contrib/libs/brotli/enc/dictionary_hash.h b/contrib/libs/brotli/enc/dictionary_hash.h
index a7fafbe065..b3bb9599f4 100644
--- a/contrib/libs/brotli/enc/dictionary_hash.h
+++ b/contrib/libs/brotli/enc/dictionary_hash.h
@@ -1,24 +1,24 @@
 /* Copyright 2015 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Hash table on the 4-byte prefixes of static dictionary words. */
 
-#ifndef BROTLI_ENC_DICTIONARY_HASH_H_ 
-#define BROTLI_ENC_DICTIONARY_HASH_H_ 
- 
+#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
+#define BROTLI_ENC_DICTIONARY_HASH_H_
+
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 extern const uint16_t kStaticDictionaryHash[32768];
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_DICTIONARY_HASH_H_ */
diff --git a/contrib/libs/brotli/enc/entropy_encode.h b/contrib/libs/brotli/enc/entropy_encode.h
index b1f02d5a35..f23d9c379d 100644
--- a/contrib/libs/brotli/enc/entropy_encode.h
+++ b/contrib/libs/brotli/enc/entropy_encode.h
@@ -1,57 +1,57 @@
 /* Copyright 2010 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Entropy encoding (Huffman) utilities. */
 
-#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_ 
-#define BROTLI_ENC_ENTROPY_ENCODE_H_ 
- 
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* A node of a Huffman tree. */
 typedef struct HuffmanTree {
   uint32_t total_count_;
   int16_t index_left_;
   int16_t index_right_or_value_;
 } HuffmanTree;
- 
+
 static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
     int16_t left, int16_t right) {
   self->total_count_ = count;
   self->index_left_ = left;
   self->index_right_or_value_ = right;
 }
- 
+
 /* Returns 1 is assignment of depths succeeded, otherwise 0. */
 BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
     int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
- 
+
 /* This function will create a Huffman tree.
- 
+
    The (data,length) contains the population counts.
    The tree_limit is the maximum bit depth of the Huffman codes.
- 
+
    The depth contains the tree, i.e., how many bits are used for
    the symbol.
- 
+
    The actual Huffman tree is constructed in the tree[] array, which has to
    be at least 2 * length + 1 long.
- 
+
    See http://en.wikipedia.org/wiki/Huffman_coding */
 BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
                                              const size_t length,
                                              const int tree_limit,
                                              HuffmanTree* tree,
                                              uint8_t* depth);
- 
+
 /* Change the population counts in a way that the consequent
    Huffman tree compression, especially its RLE-part will be more
    likely to compress this data more efficiently.
diff --git a/contrib/libs/brotli/enc/fast_log.h b/contrib/libs/brotli/enc/fast_log.h
index 7b5d067de6..cade1235ad 100644
--- a/contrib/libs/brotli/enc/fast_log.h
+++ b/contrib/libs/brotli/enc/fast_log.h
@@ -1,147 +1,147 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Utilities for fast computation of logarithms. */
 
-#ifndef BROTLI_ENC_FAST_LOG_H_ 
-#define BROTLI_ENC_FAST_LOG_H_ 
- 
-#include <math.h> 
- 
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
-#endif 
- 
+#endif
+
 static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
   /* TODO: generalize and move to platform.h */
 #if BROTLI_GNUC_HAS_BUILTIN(__builtin_clz, 3, 4, 0) || \
     BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
   return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
-#else 
+#else
   uint32_t result = 0;
-  while (n >>= 1) result++; 
-  return result; 
-#endif 
-} 
- 
+  while (n >>= 1) result++;
+  return result;
+#endif
+}
+
 /* A lookup table for small values of log2(int) to be used in entropy
    computation.
- 
+
    ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
-static const float kLog2Table[] = { 
-  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, 
-  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f, 
-  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f, 
-  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f, 
-  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f, 
-  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f, 
-  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f, 
-  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f, 
-  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f, 
-  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f, 
-  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f, 
-  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f, 
-  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f, 
-  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f, 
-  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f, 
-  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f, 
-  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f, 
-  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f, 
-  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f, 
-  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f, 
-  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f, 
-  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f, 
-  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f, 
-  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f, 
-  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f, 
-  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f, 
-  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f, 
-  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f, 
-  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f, 
-  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f, 
-  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f, 
-  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f, 
-  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f, 
-  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f, 
-  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f, 
-  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f, 
-  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f, 
-  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f, 
-  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f, 
-  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f, 
-  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f, 
-  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f, 
-  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f, 
-  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f, 
-  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f, 
-  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f, 
-  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f, 
-  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f, 
-  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f, 
-  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f, 
-  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f, 
-  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f, 
-  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f, 
-  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f, 
-  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f, 
-  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f, 
-  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f, 
-  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f, 
-  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f, 
-  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f, 
-  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f, 
-  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f, 
-  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f, 
-  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f, 
-  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f, 
-  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f, 
-  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f, 
-  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f, 
-  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f, 
-  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f, 
-  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f, 
-  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f, 
-  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f, 
-  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f, 
-  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f, 
-  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f, 
-  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f, 
-  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f, 
-  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f, 
-  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f, 
-  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f, 
-  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f, 
-  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f, 
-  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f, 
-  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f, 
-  7.9943534368588578f 
-}; 
- 
+static const float kLog2Table[] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
 #define LOG_2_INV 1.4426950408889634
 
 /* Faster logarithm for small integers, with the property of log2(0) == 0. */
 static BROTLI_INLINE double FastLog2(size_t v) {
   if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
-    return kLog2Table[v]; 
-  } 
+    return kLog2Table[v];
+  }
 #if (defined(_MSC_VER) && _MSC_VER <= 1700) || \
     (defined(__ANDROID_API__) && __ANDROID_API__ < 18)
   /* Visual Studio 2012 and Android API levels < 18 do not have the log2()
    * function defined, so we use log() and a multiplication instead. */
   return log((double)v) * LOG_2_INV;
-#else 
+#else
   return log2((double)v);
-#endif 
-} 
- 
+#endif
+}
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_FAST_LOG_H_ */
diff --git a/contrib/libs/brotli/enc/find_match_length.h b/contrib/libs/brotli/enc/find_match_length.h
index 5dd2bbb52e..bc428cffda 100644
--- a/contrib/libs/brotli/enc/find_match_length.h
+++ b/contrib/libs/brotli/enc/find_match_length.h
@@ -1,24 +1,24 @@
 /* Copyright 2010 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Function to find maximal matching prefixes of strings. */
 
-#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_ 
-#define BROTLI_ENC_FIND_MATCH_LENGTH_H_ 
- 
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* Separate implementation for little-endian 64-bit targets, for speed. */
 #if defined(__GNUC__) && defined(_LP64) && defined(BROTLI_LITTLE_ENDIAN)
- 
+
 static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
                                                      const uint8_t* s2,
                                                      size_t limit) {
@@ -27,54 +27,54 @@ static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
   while (BROTLI_PREDICT_TRUE(--limit2)) {
     if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) ==
                       BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) {
-      s2 += 8; 
-      matched += 8; 
-    } else { 
+      s2 += 8;
+      matched += 8;
+    } else {
       uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
           BROTLI_UNALIGNED_LOAD64LE(s1 + matched);
       size_t matching_bits = (size_t)__builtin_ctzll(x);
-      matched += matching_bits >> 3; 
-      return matched; 
-    } 
-  } 
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
   limit = (limit & 7) + 1;  /* + 1 is for pre-decrement in while */
-  while (--limit) { 
+  while (--limit) {
     if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) {
-      ++s2; 
-      ++matched; 
-    } else { 
-      return matched; 
-    } 
-  } 
-  return matched; 
-} 
-#else 
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
 static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
                                                      const uint8_t* s2,
                                                      size_t limit) {
   size_t matched = 0;
-  const uint8_t* s2_limit = s2 + limit; 
-  const uint8_t* s2_ptr = s2; 
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
   /* Find out how long the match is. We loop over the data 32 bits at a
      time until we find a 32-bit block that doesn't match; then we find
      the first non-matching bit and use that to calculate the total
      length of the match. */
-  while (s2_ptr <= s2_limit - 4 && 
+  while (s2_ptr <= s2_limit - 4 &&
          BrotliUnalignedRead32(s2_ptr) ==
          BrotliUnalignedRead32(s1 + matched)) {
-    s2_ptr += 4; 
-    matched += 4; 
-  } 
-  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) { 
-    ++s2_ptr; 
-    ++matched; 
-  } 
-  return matched; 
-} 
-#endif 
- 
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
diff --git a/contrib/libs/brotli/enc/hash.h b/contrib/libs/brotli/enc/hash.h
index c4945fb101..8c5a7bb5ad 100644
--- a/contrib/libs/brotli/enc/hash.h
+++ b/contrib/libs/brotli/enc/hash.h
@@ -1,5 +1,5 @@
 /* Copyright 2010 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
@@ -7,26 +7,26 @@
 /* A (forgetful) hash table to the data seen by the compressor, to
    help create backward references to previous data. */
 
-#ifndef BROTLI_ENC_HASH_H_ 
-#define BROTLI_ENC_HASH_H_ 
- 
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
 #include <string.h>  /* memcmp, memset */
- 
+
 #include "../common/constants.h"
 #include "../common/dictionary.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./encoder_dict.h"
-#include "./fast_log.h" 
-#include "./find_match_length.h" 
+#include "./fast_log.h"
+#include "./find_match_length.h"
 #include "./memory.h"
 #include "./quality.h"
-#include "./static_dict.h" 
- 
+#include "./static_dict.h"
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* Pointer to hasher data.
  *
  * Excluding initialization and destruction, hasher can be passed as
@@ -40,10 +40,10 @@ extern "C" {
  * Using "define" instead of "typedef", because on MSVC __restrict does not work
  * on typedef pointer types. */
 #define HasherHandle uint8_t*
- 
+
 typedef struct {
   BrotliHasherParams params;
- 
+
   /* False if hasher needs to be "prepared" before use. */
   BROTLI_BOOL is_prepared_;
 
@@ -80,14 +80,14 @@ static const uint32_t kHashMul32 = 0x1E35A7BD;
 static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
 static const uint64_t kHashMul64Long =
     BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
- 
+
 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
   uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
   /* The higher bits contain more mixture from the multiplication,
      so we take our results from there. */
   return h >> (32 - 14);
-} 
- 
+}
+
 static BROTLI_INLINE void PrepareDistanceCache(
     int* BROTLI_RESTRICT distance_cache, const int num_distances) {
   if (num_distances > 4) {
@@ -108,8 +108,8 @@ static BROTLI_INLINE void PrepareDistanceCache(
       distance_cache[15] = next_last_distance + 3;
     }
   }
-} 
- 
+}
+
 #define BROTLI_LITERAL_BYTE_SCORE 135
 #define BROTLI_DISTANCE_BIT_PENALTY 30
 /* Score must be positive after applying maximal penalty. */
@@ -135,19 +135,19 @@ static BROTLI_INLINE score_t BackwardReferenceScore(
     size_t copy_length, size_t backward_reference_offset) {
   return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
       BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
-} 
- 
+}
+
 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
     size_t copy_length) {
   return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
       BROTLI_SCORE_BASE + 15;
 }
- 
+
 static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
     size_t distance_short_code) {
   return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
 }
- 
+
 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
     const BrotliEncoderDictionary* dictionary, size_t item,
     const uint8_t* data, size_t max_length, size_t max_backward,
@@ -164,33 +164,33 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
   if (len > max_length) {
     return BROTLI_FALSE;
   }
- 
+
   matchlen =
       FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
   if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
     return BROTLI_FALSE;
-  } 
+  }
   {
     size_t cut = len - matchlen;
     size_t transform_id = (cut << 2) +
         (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
     backward = max_backward + 1 + word_idx +
         (transform_id << dictionary->words->size_bits_by_length[len]);
-  } 
+  }
   if (backward > max_distance) {
     return BROTLI_FALSE;
-  } 
+  }
   score = BackwardReferenceScore(matchlen, backward);
   if (score < out->score) {
     return BROTLI_FALSE;
-  } 
+  }
   out->len = matchlen;
   out->len_code_delta = (int)len - (int)matchlen;
   out->distance = backward;
   out->score = score;
   return BROTLI_TRUE;
 }
- 
+
 static BROTLI_INLINE void SearchInStaticDictionary(
     const BrotliEncoderDictionary* dictionary,
     HasherHandle handle, const uint8_t* data, size_t max_length,
@@ -201,7 +201,7 @@ static BROTLI_INLINE void SearchInStaticDictionary(
   HasherCommon* self = GetHasherCommon(handle);
   if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
     return;
-  } 
+  }
   key = Hash14(data) << 1;
   for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
     size_t item = dictionary->hash_table[key];
@@ -212,42 +212,42 @@ static BROTLI_INLINE void SearchInStaticDictionary(
           max_length, max_backward, max_distance, out);
       if (item_matches) {
         self->dict_num_matches++;
-      } 
-    } 
-  } 
+      }
+    }
+  }
 }
- 
+
 typedef struct BackwardMatch {
   uint32_t distance;
   uint32_t length_and_code;
 } BackwardMatch;
- 
+
 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
     size_t dist, size_t len) {
   self->distance = (uint32_t)dist;
   self->length_and_code = (uint32_t)(len << 5);
 }
- 
+
 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
     size_t dist, size_t len, size_t len_code) {
   self->distance = (uint32_t)dist;
   self->length_and_code =
       (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
 }
- 
+
 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
   return self->length_and_code >> 5;
 }
- 
+
 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
   size_t code = self->length_and_code & 31;
   return code ? code : BackwardMatchLength(self);
 }
- 
+
 #define EXPAND_CAT(a, b) CAT(a, b)
 #define CAT(a, b) a ## b
 #define FN(X) EXPAND_CAT(X, HASHER())
- 
+
 #define HASHER() H10
 #define BUCKET_BITS 17
 #define MAX_TREE_SEARCH_DEPTH 64
@@ -259,11 +259,11 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef HASHER
 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
 #define MAX_NUM_MATCHES_H10 128
- 
+
 /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
    a little faster (0.5% - 1%) and it compresses 0.15% better on small text
    and HTML inputs. */
- 
+
 #define HASHER() H2
 #define BUCKET_BITS 16
 #define BUCKET_SWEEP 1
@@ -273,7 +273,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef BUCKET_SWEEP
 #undef USE_DICTIONARY
 #undef HASHER
- 
+
 #define HASHER() H3
 #define BUCKET_SWEEP 2
 #define USE_DICTIONARY 0
@@ -282,7 +282,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef BUCKET_SWEEP
 #undef BUCKET_BITS
 #undef HASHER
- 
+
 #define HASHER() H4
 #define BUCKET_BITS 17
 #define BUCKET_SWEEP 4
@@ -293,17 +293,17 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef BUCKET_SWEEP
 #undef BUCKET_BITS
 #undef HASHER
- 
+
 #define HASHER() H5
 #include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
 #undef HASHER
- 
+
 #define HASHER() H6
 #include "./hash_longest_match64_inc.h"  /* NOLINT(build/include) */
 #undef HASHER
- 
+
 #define BUCKET_BITS 15
- 
+
 #define NUM_LAST_DISTANCES_TO_CHECK 4
 #define NUM_BANKS 1
 #define BANK_BITS 16
@@ -311,7 +311,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
 #undef HASHER
 #undef NUM_LAST_DISTANCES_TO_CHECK
- 
+
 #define NUM_LAST_DISTANCES_TO_CHECK 10
 #define HASHER() H41
 #include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
@@ -319,7 +319,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef NUM_LAST_DISTANCES_TO_CHECK
 #undef NUM_BANKS
 #undef BANK_BITS
- 
+
 #define NUM_LAST_DISTANCES_TO_CHECK 16
 #define NUM_BANKS 512
 #define BANK_BITS 9
@@ -329,9 +329,9 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef NUM_LAST_DISTANCES_TO_CHECK
 #undef NUM_BANKS
 #undef BANK_BITS
- 
+
 #undef BUCKET_BITS
- 
+
 #define HASHER() H54
 #define BUCKET_BITS 20
 #define BUCKET_SWEEP 4
@@ -343,7 +343,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
 #undef BUCKET_SWEEP
 #undef BUCKET_BITS
 #undef HASHER
- 
+
 /* fast large window hashers */
 
 #define HASHER() HROLLING_FAST
@@ -420,10 +420,10 @@ static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
 #undef SIZE_
     default:
       break;
-  } 
+  }
   return result;
 }
- 
+
 static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
     BrotliEncoderParams* params, const uint8_t* data, size_t position,
     size_t input_size, BROTLI_BOOL is_last) {
@@ -448,10 +448,10 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
 #undef INITIALIZE_
       default:
         break;
-    } 
+    }
     HasherReset(*handle);
-  } 
- 
+  }
+
   self = *handle;
   common = GetHasherCommon(self);
   if (!common->is_prepared_) {
@@ -462,16 +462,16 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
         break;
       FOR_ALL_HASHERS(PREPARE_)
 #undef PREPARE_
-      default: break; 
-    } 
+      default: break;
+    }
     if (position == 0) {
         common->dict_num_lookups = 0;
         common->dict_num_matches = 0;
     }
     common->is_prepared_ = BROTLI_TRUE;
-  } 
+  }
 }
- 
+
 static BROTLI_INLINE void InitOrStitchToPreviousBlock(
     MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
     BrotliEncoderParams* params, size_t position, size_t input_size,
@@ -490,9 +490,9 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
     default: break;
   }
 }
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_HASH_H_ */
diff --git a/contrib/libs/brotli/enc/histogram.h b/contrib/libs/brotli/enc/histogram.h
index a522ca7aa7..42af3c3f9d 100644
--- a/contrib/libs/brotli/enc/histogram.h
+++ b/contrib/libs/brotli/enc/histogram.h
@@ -1,14 +1,14 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Models the histograms of literals, commands and distance codes. */
 
-#ifndef BROTLI_ENC_HISTOGRAM_H_ 
-#define BROTLI_ENC_HISTOGRAM_H_ 
- 
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
 #include <string.h>  /* memset */
 
 #include "../common/constants.h"
@@ -16,12 +16,12 @@
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./block_splitter.h"
-#include "./command.h" 
- 
+#include "./command.h"
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
 #define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
 
@@ -32,21 +32,21 @@ extern "C" {
 #undef DataType
 #undef DATA_SIZE
 #undef FN
- 
+
 #define FN(X) X ## Command
 #define DataType uint16_t
 #define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
 #include "./histogram_inc.h"  /* NOLINT(build/include) */
 #undef DATA_SIZE
 #undef FN
- 
+
 #define FN(X) X ## Distance
 #define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
 #include "./histogram_inc.h"  /* NOLINT(build/include) */
 #undef DataType
 #undef DATA_SIZE
 #undef FN
- 
+
 BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
     const Command* cmds, const size_t num_commands,
     const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
@@ -55,9 +55,9 @@ BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
     const ContextType* context_modes, HistogramLiteral* literal_histograms,
     HistogramCommand* insert_and_copy_histograms,
     HistogramDistance* copy_dist_histograms);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_HISTOGRAM_H_ */
diff --git a/contrib/libs/brotli/enc/literal_cost.h b/contrib/libs/brotli/enc/literal_cost.h
index 412c155890..8f53f39d3f 100644
--- a/contrib/libs/brotli/enc/literal_cost.h
+++ b/contrib/libs/brotli/enc/literal_cost.h
@@ -1,5 +1,5 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
@@ -7,24 +7,24 @@
 /* Literal cost model to allow backward reference replacement to be efficient.
 */
 
-#ifndef BROTLI_ENC_LITERAL_COST_H_ 
-#define BROTLI_ENC_LITERAL_COST_H_ 
- 
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* Estimates how many bits the literals in the interval [pos, pos + len) in the
    ring-buffer (data, mask) will take entropy coded and writes these estimates
    to the cost[0..len) array. */
 BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
     size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_LITERAL_COST_H_ */
diff --git a/contrib/libs/brotli/enc/metablock.h b/contrib/libs/brotli/enc/metablock.h
index add40a056c..334a79a443 100644
--- a/contrib/libs/brotli/enc/metablock.h
+++ b/contrib/libs/brotli/enc/metablock.h
@@ -1,5 +1,5 @@
 /* Copyright 2015 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
@@ -7,26 +7,26 @@
 /* Algorithms for distributing the literals and commands of a metablock between
    block types and contexts. */
 
-#ifndef BROTLI_ENC_METABLOCK_H_ 
-#define BROTLI_ENC_METABLOCK_H_ 
- 
+#ifndef BROTLI_ENC_METABLOCK_H_
+#define BROTLI_ENC_METABLOCK_H_
+
 #include "../common/context.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./block_splitter.h"
-#include "./command.h" 
-#include "./histogram.h" 
+#include "./command.h"
+#include "./histogram.h"
 #include "./memory.h"
 #include "./quality.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 typedef struct MetaBlockSplit {
-  BlockSplit literal_split; 
-  BlockSplit command_split; 
-  BlockSplit distance_split; 
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
   uint32_t* literal_context_map;
   size_t literal_context_map_size;
   uint32_t* distance_context_map;
@@ -38,7 +38,7 @@ typedef struct MetaBlockSplit {
   HistogramDistance* distance_histograms;
   size_t distance_histograms_size;
 } MetaBlockSplit;
- 
+
 static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
   BrotliInitBlockSplit(&mb->literal_split);
   BrotliInitBlockSplit(&mb->command_split);
@@ -54,7 +54,7 @@ static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
   mb->distance_histograms = 0;
   mb->distance_histograms_size = 0;
 }
- 
+
 static BROTLI_INLINE void DestroyMetaBlockSplit(
     MemoryManager* m, MetaBlockSplit* mb) {
   BrotliDestroyBlockSplit(m, &mb->literal_split);
@@ -66,7 +66,7 @@ static BROTLI_INLINE void DestroyMetaBlockSplit(
   BROTLI_FREE(m, mb->command_histograms);
   BROTLI_FREE(m, mb->distance_histograms);
 }
- 
+
 /* Uses the slow shortest-path block splitter and does context clustering.
    The distance parameters are dynamically selected based on the commands
    which get recomputed under the new distance parameters. The new distance
@@ -82,7 +82,7 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
                                           size_t num_commands,
                                           ContextType literal_context_mode,
                                           MetaBlockSplit* mb);
- 
+
 /* Uses a fast greedy block splitter that tries to merge current block with the
    last or the second last block and uses a static context clustering which
    is the same for all block types. */
@@ -91,10 +91,10 @@ BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
     uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
     size_t num_contexts, const uint32_t* static_context_map,
     const Command* commands, size_t n_commands, MetaBlockSplit* mb);
- 
+
 BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
                                               MetaBlockSplit* mb);
- 
+
 BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
     uint32_t npostfix, uint32_t ndirect);
 
diff --git a/contrib/libs/brotli/enc/prefix.h b/contrib/libs/brotli/enc/prefix.h
index 56b89fa466..fd359a478d 100644
--- a/contrib/libs/brotli/enc/prefix.h
+++ b/contrib/libs/brotli/enc/prefix.h
@@ -1,5 +1,5 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
@@ -7,18 +7,18 @@
 /* Functions for encoding of integers into prefix codes the amount of extra
    bits, and the actual values of the extra bits. */
 
-#ifndef BROTLI_ENC_PREFIX_H_ 
-#define BROTLI_ENC_PREFIX_H_ 
- 
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
 #include "../common/constants.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./fast_log.h" 
- 
+#include "./fast_log.h"
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* Here distance_code is an intermediate code, i.e. one of the special codes or
    the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
 static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
@@ -28,8 +28,8 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
                                                    uint32_t* extra_bits) {
   if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
     *code = (uint16_t)distance_code;
-    *extra_bits = 0; 
-    return; 
+    *extra_bits = 0;
+    return;
   } else {
     size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
         (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
@@ -43,11 +43,11 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
         (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
          ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
     *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
-  } 
-} 
- 
+  }
+}
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_PREFIX_H_ */
diff --git a/contrib/libs/brotli/enc/ringbuffer.h b/contrib/libs/brotli/enc/ringbuffer.h
index 1ee7688e54..86079a89d3 100644
--- a/contrib/libs/brotli/enc/ringbuffer.h
+++ b/contrib/libs/brotli/enc/ringbuffer.h
@@ -1,25 +1,25 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Sliding window over the input data. */
 
-#ifndef BROTLI_ENC_RINGBUFFER_H_ 
-#define BROTLI_ENC_RINGBUFFER_H_ 
- 
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
 #include <string.h>  /* memcpy */
- 
+
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./memory.h"
 #include "./quality.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
    data in a circular manner: writing a byte writes it to:
      `position() % (1 << window_bits)'.
@@ -80,16 +80,16 @@ static BROTLI_INLINE void RingBufferInitBuffer(
     memcpy(new_data, rb->data_,
         2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
     BROTLI_FREE(m, rb->data_);
-  } 
+  }
   rb->data_ = new_data;
   rb->cur_size_ = buflen;
   rb->buffer_ = rb->data_ + 2;
   rb->buffer_[-2] = rb->buffer_[-1] = 0;
   for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
     rb->buffer_[rb->cur_size_ + i] = 0;
-  } 
+  }
 }
- 
+
 static BROTLI_INLINE void RingBufferWriteTail(
     const uint8_t* bytes, size_t n, RingBuffer* rb) {
   const size_t masked_pos = rb->pos_ & rb->mask_;
@@ -98,9 +98,9 @@ static BROTLI_INLINE void RingBufferWriteTail(
     const size_t p = rb->size_ + masked_pos;
     memcpy(&rb->buffer_[p], bytes,
         BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
-  } 
+  }
 }
- 
+
 /* Push bytes into the ring buffer. */
 static BROTLI_INLINE void RingBufferWrite(
     MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
@@ -116,7 +116,7 @@ static BROTLI_INLINE void RingBufferWrite(
     if (BROTLI_IS_OOM(m)) return;
     memcpy(rb->buffer_, bytes, n);
     return;
-  } 
+  }
   if (rb->cur_size_ < rb->total_size_) {
     /* Lazily allocate the full buffer. */
     RingBufferInitBuffer(m, rb->total_size_, rb);
@@ -142,8 +142,8 @@ static BROTLI_INLINE void RingBufferWrite(
       /* Copy into the beginning of the buffer */
       memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
              n - (rb->size_ - masked_pos));
-    } 
-  } 
+    }
+  }
   {
     BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
     uint32_t rb_pos_mask = (1u << 31) - 1;
@@ -156,9 +156,9 @@ static BROTLI_INLINE void RingBufferWrite(
     }
   }
 }
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_RINGBUFFER_H_ */
diff --git a/contrib/libs/brotli/enc/static_dict.h b/contrib/libs/brotli/enc/static_dict.h
index a5e06f43c5..6b5d4eb0c9 100644
--- a/contrib/libs/brotli/enc/static_dict.h
+++ b/contrib/libs/brotli/enc/static_dict.h
@@ -1,26 +1,26 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Class to model the static dictionary. */
 
-#ifndef BROTLI_ENC_STATIC_DICT_H_ 
-#define BROTLI_ENC_STATIC_DICT_H_ 
- 
+#ifndef BROTLI_ENC_STATIC_DICT_H_
+#define BROTLI_ENC_STATIC_DICT_H_
+
 #include "../common/dictionary.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./encoder_dict.h"
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
 static const uint32_t kInvalidMatch = 0xFFFFFFF;
- 
+
 /* Matches data against static dictionary words, and for each length l,
    for which a match is found, updates matches[l] to be the minimum possible
      (distance << 5) + len_code.
@@ -32,9 +32,9 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
     const BrotliEncoderDictionary* dictionary,
     const uint8_t* data, size_t min_length, size_t max_length,
     uint32_t* matches);
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_STATIC_DICT_H_ */
diff --git a/contrib/libs/brotli/enc/static_dict_lut.h b/contrib/libs/brotli/enc/static_dict_lut.h
index aba9af4508..e299cda6d8 100644
--- a/contrib/libs/brotli/enc/static_dict_lut.h
+++ b/contrib/libs/brotli/enc/static_dict_lut.h
@@ -1,30 +1,30 @@
 /* Copyright 2015 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
- 
+
 /* Lookup table for static dictionary and transforms. */
- 
+
 #ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
 #define BROTLI_ENC_STATIC_DICT_LUT_H_
- 
+
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
 typedef struct DictWord {
   /* Highest bit is used to indicate end of bucket. */
-  uint8_t len; 
-  uint8_t transform; 
-  uint16_t idx; 
+  uint8_t len;
+  uint8_t transform;
+  uint16_t idx;
 } DictWord;
- 
+
 static const int kDictNumBits = 15;
 static const uint32_t kDictHashMul32 = 0x1E35A7BD;
- 
+
 static const uint16_t kStaticDictionaryBuckets[32768] = {
 1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
 0,53,0,0,0,0,0,0,55,0,0,0,0,0,0,61,76,0,0,0,94,0,0,0,0,0,0,96,0,97,0,98,0,0,0,0,
@@ -5855,10 +5855,10 @@ static const DictWord kStaticDictionaryWords[31705] = {
 458},{12,0,756},{132,10,420},{134,0,1504},{6,0,757},{133,11,383},{6,0,1266},{135
 ,0,1735},{5,0,598},{7,0,791},{8,0,108},{9,0,123},{7,10,1570},{140,10,542},{142,
 11,410},{9,11,660},{138,11,347}
-}; 
- 
+};
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
diff --git a/contrib/libs/brotli/enc/write_bits.h b/contrib/libs/brotli/enc/write_bits.h
index 6f6080b5c0..36515a6893 100644
--- a/contrib/libs/brotli/enc/write_bits.h
+++ b/contrib/libs/brotli/enc/write_bits.h
@@ -1,23 +1,23 @@
 /* Copyright 2010 Google Inc. All Rights Reserved.
- 
+
    Distributed under MIT license.
    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Write bits into a byte array. */
 
-#ifndef BROTLI_ENC_WRITE_BITS_H_ 
-#define BROTLI_ENC_WRITE_BITS_H_ 
- 
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
 #include "../common/platform.h"
 #include <brotli/types.h>
- 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
- 
+
 /*#define BIT_WRITER_DEBUG */
- 
+
 /* This function writes bits into bytes in increasing addresses, and within
    a byte least-significant-bit first.
 
@@ -50,36 +50,36 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
       (int)*pos));
   BROTLI_DCHECK((bits >> n_bits) == 0);
   BROTLI_DCHECK(n_bits <= 56);
-  v |= bits << (*pos & 7); 
+  v |= bits << (*pos & 7);
   BROTLI_UNALIGNED_STORE64LE(p, v);  /* Set some bits. */
-  *pos += n_bits; 
-#else 
+  *pos += n_bits;
+#else
   /* implicit & 0xFF is assumed for uint8_t arithmetics */
   uint8_t* array_pos = &array[*pos >> 3];
   const size_t bits_reserved_in_first_byte = (*pos & 7);
   size_t bits_left_to_write;
-  bits <<= bits_reserved_in_first_byte; 
+  bits <<= bits_reserved_in_first_byte;
   *array_pos++ |= (uint8_t)bits;
   for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
        bits_left_to_write >= 9;
-       bits_left_to_write -= 8) { 
-    bits >>= 8; 
+       bits_left_to_write -= 8) {
+    bits >>= 8;
     *array_pos++ = (uint8_t)bits;
-  } 
-  *array_pos = 0; 
-  *pos += n_bits; 
-#endif 
-} 
- 
+  }
+  *array_pos = 0;
+  *pos += n_bits;
+#endif
+}
+
 static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
     size_t pos, uint8_t* array) {
   BROTLI_LOG(("WriteBitsPrepareStorage            %10d\n", (int)pos));
   BROTLI_DCHECK((pos & 7) == 0);
-  array[pos >> 3] = 0; 
-} 
- 
+  array[pos >> 3] = 0;
+}
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
- 
+
 #endif  /* BROTLI_ENC_WRITE_BITS_H_ */
diff --git a/contrib/libs/brotli/enc/ya.make b/contrib/libs/brotli/enc/ya.make
index 6d415989e7..67da82ec4d 100644
--- a/contrib/libs/brotli/enc/ya.make
+++ b/contrib/libs/brotli/enc/ya.make
@@ -1,6 +1,6 @@
-LIBRARY() 
- 
-LICENSE(MIT) 
+LIBRARY()
+
+LICENSE(MIT)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
@@ -9,19 +9,19 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
-NO_UTIL() 
 
-NO_COMPILER_WARNINGS() 
- 
+NO_UTIL()
+
+NO_COMPILER_WARNINGS()
+
 ADDINCL(GLOBAL contrib/libs/brotli/include)
 
-PEERDIR( 
+PEERDIR(
     contrib/libs/brotli/common
-    contrib/libs/brotli/dec 
-) 
- 
-SRCS( 
+    contrib/libs/brotli/dec
+)
+
+SRCS(
     backward_references.c
     backward_references_hq.c
     bit_cost.c
@@ -40,8 +40,8 @@ SRCS(
     metablock.c
     static_dict.c
     utf8_util.c
-) 
- 
+)
+
 CFLAGS(-DBROTLI_BUILD_PORTABLE)
 
-END() 
+END()
diff --git a/contrib/libs/brotli/ya.make b/contrib/libs/brotli/ya.make
index f1b05ee869..f0941aa638 100644
--- a/contrib/libs/brotli/ya.make
+++ b/contrib/libs/brotli/ya.make
@@ -1,10 +1,10 @@
 VERSION(1.0.1)
 
-RECURSE( 
+RECURSE(
     common
-    dec 
-    enc 
-    tools 
+    dec
+    enc
+    tools
     python
     java
-) 
+)
diff --git a/contrib/libs/c-ares/ares_build.h b/contrib/libs/c-ares/ares_build.h
index 26ad125176..e0a2ed784b 100644
--- a/contrib/libs/c-ares/ares_build.h
+++ b/contrib/libs/c-ares/ares_build.h
@@ -202,9 +202,9 @@
 #  else
 #    define CARES_TYPEOF_ARES_SSIZE_T long
 #  endif
-#else 
+#else
 #  define CARES_TYPEOF_ARES_SSIZE_T ssize_t
-#endif 
+#endif
 
 typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
 
diff --git a/contrib/libs/c-ares/ares_config.h b/contrib/libs/c-ares/ares_config.h
index 1e8def9f5a..6695a6ad21 100644
--- a/contrib/libs/c-ares/ares_config.h
+++ b/contrib/libs/c-ares/ares_config.h
@@ -46,11 +46,11 @@
 /* Define to 1 if you have the <arpa/inet.h> header file. */
 #define HAVE_ARPA_INET_H 1
 
-#if defined(__ANDROID__) 
-#else 
+#if defined(__ANDROID__)
+#else
 /* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
 #define HAVE_ARPA_NAMESER_COMPAT_H 1
-#endif 
+#endif
 
 /* Define to 1 if you have the <arpa/nameser.h> header file. */
 #define HAVE_ARPA_NAMESER_H 1
@@ -118,10 +118,10 @@
 /* Define to 1 if you have the getservbyport_r function. */
 #define HAVE_GETSERVBYPORT_R 1
 
-#if defined(__APPLE__) 
-#undef HAVE_GETSERVBYPORT_R 
-#endif 
- 
+#if defined(__APPLE__)
+#undef HAVE_GETSERVBYPORT_R
+#endif
+
 #if defined(__ANDROID__)
 #undef HAVE_GETSERVBYPORT_R
 #endif
@@ -385,7 +385,7 @@
 #define RANDOM_FILE "/dev/urandom"
 
 /* Define to the type qualifier pointed by arg 5 for recvfrom. */
-#define RECVFROM_QUAL_ARG5 
+#define RECVFROM_QUAL_ARG5
 
 /* Define to the type of arg 1 for recvfrom. */
 #define RECVFROM_TYPE_ARG1 int
@@ -456,24 +456,24 @@
 /* The size of `int', as computed by sizeof. */
 #define SIZEOF_INT 4
 
-#if defined(_MSC_VER) 
-#define SIZEOF_LONG 4 
-#elif defined(__SIZEOF_LONG__) 
-#define SIZEOF_LONG __SIZEOF_LONG__ 
-#else 
+#if defined(_MSC_VER)
+#define SIZEOF_LONG 4
+#elif defined(__SIZEOF_LONG__)
+#define SIZEOF_LONG __SIZEOF_LONG__
+#else
 /* The size of `long', as computed by sizeof. */
 #define SIZEOF_LONG 8
-#endif 
+#endif
 
 /* The size of `short', as computed by sizeof. */
 #define SIZEOF_SHORT 2
 
-#if defined(__SIZEOF_SIZE_T__) 
-#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ 
-#else 
+#if defined(__SIZEOF_SIZE_T__)
+#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__
+#else
 /* The size of `size_t', as computed by sizeof. */
 #define SIZEOF_SIZE_T 8
-#endif 
+#endif
 
 /* The size of `struct in6_addr', as computed by sizeof. */
 #define SIZEOF_STRUCT_IN6_ADDR 16
diff --git a/contrib/libs/c-ares/ares_setup.h b/contrib/libs/c-ares/ares_setup.h
index 9a9badb9d9..b9a1fd5b52 100644
--- a/contrib/libs/c-ares/ares_setup.h
+++ b/contrib/libs/c-ares/ares_setup.h
@@ -28,7 +28,7 @@
  * configuration file for platforms which lack config tool.
  */
 
-#if defined(HAVE_CONFIG_H) && !defined(_MSC_VER) 
+#if defined(HAVE_CONFIG_H) && !defined(_MSC_VER)
 #include "ares_config.h"
 #else
 
diff --git a/contrib/libs/cctz/tzdata/ya.make b/contrib/libs/cctz/tzdata/ya.make
index 8b8be805f0..3b9ff5cc67 100644
--- a/contrib/libs/cctz/tzdata/ya.make
+++ b/contrib/libs/cctz/tzdata/ya.make
@@ -2,7 +2,7 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(Apache-2.0) 
+LICENSE(Apache-2.0)
 
 OWNER(
     dfyz
diff --git a/contrib/libs/crcutil/ya.make b/contrib/libs/crcutil/ya.make
index 1d6fbdfeec..2da8ef940f 100644
--- a/contrib/libs/crcutil/ya.make
+++ b/contrib/libs/crcutil/ya.make
@@ -1,6 +1,6 @@
 LIBRARY()
 
-LICENSE(Apache-2.0) 
+LICENSE(Apache-2.0)
 
 VERSION(1.0)
 
@@ -10,18 +10,18 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 NO_UTIL()
 
 NO_COMPILER_WARNINGS()
 
 NO_JOIN_SRC()
 
-IF (GCC AND USE_LTO) 
-    CFLAGS(-DCRCUTIL_FORCE_ASM_CRC32C=1) 
+IF (GCC AND USE_LTO)
+    CFLAGS(-DCRCUTIL_FORCE_ASM_CRC32C=1)
 ENDIF()
- 
-IF (ARCH_I386 OR ARCH_X86_64) 
+
+IF (ARCH_I386 OR ARCH_X86_64)
     IF (OS_WINDOWS)
         SRCS(
             multiword_64_64_cl_i386_mmx.cc
@@ -29,7 +29,7 @@ IF (ARCH_I386 OR ARCH_X86_64)
     ELSEIF (OS_ANDROID AND ARCH_I386)
         # 32-bit Android has some problems with register allocation, so we fall back to default implementation
     ELSE()
-        IF (CLANG) 
+        IF (CLANG)
             CFLAGS(-DCRCUTIL_USE_MM_CRC32=1)
             IF (ARCH_I386)
                 # clang doesn't support this as optimization attribute and has problems with register allocation
@@ -42,17 +42,17 @@ IF (ARCH_I386 OR ARCH_X86_64)
                     multiword_64_64_gcc_i386_mmx.cc
                 )
             ENDIF()
-        ELSE() 
-            CFLAGS( 
+        ELSE()
+            CFLAGS(
                 -mcrc32
                 -DCRCUTIL_USE_MM_CRC32=1
-            ) 
-        ENDIF() 
-        SRCS( 
-            multiword_128_64_gcc_amd64_sse2.cc 
-            multiword_64_64_gcc_amd64_asm.cc 
+            )
+        ENDIF()
+        SRCS(
+            multiword_128_64_gcc_amd64_sse2.cc
+            multiword_64_64_gcc_amd64_asm.cc
         )
-    ENDIF() 
+    ENDIF()
     IF (OS_WINDOWS)
         SRCS(
             crc32c_sse4.cc
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt
index 718c123c48..f2ee7fef0c 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4-x86_64.txt
@@ -1,35 +1,35 @@
-absvti2 
-addvti3 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divti3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-muloti4 
-multi3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-subvti3 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
+absvti2
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+subvti3
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt
index 844be5088f..70d3644f27 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/10.4.txt
@@ -1,96 +1,96 @@
-apple_versioning 
-absvdi2 
-absvsi2 
-adddf3 
-addsf3 
-addvdi3 
-addvsi3 
-ashldi3 
-ashrdi3 
-clear_cache 
-clzdi2 
-clzsi2 
-cmpdi2 
-ctzdi2 
-ctzsi2 
-divdc3 
-divdf3 
-divdi3 
-divmoddi4 
-divmodsi4 
-divsc3 
-divsf3 
-divsi3 
-divxc3 
-enable_execute_stack 
-comparedf2 
-comparesf2 
-extendhfsf2 
-extendsfdf2 
-ffsdi2 
-fixdfdi 
-fixdfsi 
-fixsfdi 
-fixsfsi 
-fixunsdfdi 
-fixunsdfsi 
-fixunssfdi 
-fixunssfsi 
-fixunsxfdi 
-fixunsxfsi 
-fixxfdi 
-floatdidf 
-floatdisf 
-floatdixf 
-floatsidf 
-floatsisf 
-floatunsidf 
-floatunsisf 
-gcc_personality_v0 
-gnu_f2h_ieee 
-gnu_h2f_ieee 
-lshrdi3 
-moddi3 
-modsi3 
-muldc3 
-muldf3 
-muldi3 
-mulodi4 
-mulosi4 
-mulsc3 
-mulsf3 
-mulvdi3 
-mulvsi3 
-mulxc3 
-negdf2 
-negdi2 
-negsf2 
-negvdi2 
-negvsi2 
-paritydi2 
-paritysi2 
-popcountdi2 
-popcountsi2 
-powidf2 
-powisf2 
-powixf2 
-subdf3 
-subsf3 
-subvdi3 
-subvsi3 
-truncdfhf2 
-truncdfsf2 
-truncsfhf2 
-ucmpdi2 
-udivdi3 
-udivmoddi4 
-udivmodsi4 
-udivsi3 
-umoddi3 
-umodsi3 
-atomic_flag_clear 
-atomic_flag_clear_explicit 
-atomic_flag_test_and_set 
-atomic_flag_test_and_set_explicit 
-atomic_signal_fence 
-atomic_thread_fence 
-\ No newline at end of file
+apple_versioning
+absvdi2
+absvsi2
+adddf3
+addsf3
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+clear_cache
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdf3
+divdi3
+divmoddi4
+divmodsi4
+divsc3
+divsf3
+divsi3
+divxc3
+enable_execute_stack
+comparedf2
+comparesf2
+extendhfsf2
+extendsfdf2
+ffsdi2
+fixdfdi
+fixdfsi
+fixsfdi
+fixsfsi
+fixunsdfdi
+fixunsdfsi
+fixunssfdi
+fixunssfsi
+fixunsxfdi
+fixunsxfsi
+fixxfdi
+floatdidf
+floatdisf
+floatdixf
+floatsidf
+floatsisf
+floatunsidf
+floatunsisf
+gcc_personality_v0
+gnu_f2h_ieee
+gnu_h2f_ieee
+lshrdi3
+moddi3
+modsi3
+muldc3
+muldf3
+muldi3
+mulodi4
+mulosi4
+mulsc3
+mulsf3
+mulvdi3
+mulvsi3
+mulxc3
+negdf2
+negdi2
+negsf2
+negvdi2
+negvsi2
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+powixf2
+subdf3
+subsf3
+subvdi3
+subvsi3
+truncdfhf2
+truncdfsf2
+truncsfhf2
+ucmpdi2
+udivdi3
+udivmoddi4
+udivmodsi4
+udivsi3
+umoddi3
+umodsi3
+atomic_flag_clear
+atomic_flag_clear_explicit
+atomic_flag_test_and_set
+atomic_flag_test_and_set_explicit
+atomic_signal_fence
+atomic_thread_fence
+\ No newline at end of file
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt
index bc6fcefc20..266e422152 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/CMakeLists.txt
@@ -1,4 +1,4 @@
-file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) 
-foreach(filter_file ${filter_files}) 
-  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) 
-endforeach() 
+file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt)
+foreach(filter_file ${filter_files})
+  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file})
+endforeach()
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT b/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT
index 50b1e2d513..173eccca6d 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/README.TXT
@@ -1,11 +1,11 @@
-This folder contains list of symbols that should be excluded from the builtin 
-libraries for Darwin. There are two reasons symbols are excluded: 
- 
-(1) They aren't supported on Darwin 
-(2) They are contained within the OS on the minimum supported target 
- 
-The builtin libraries must contain all symbols not provided by the lowest 
-supported target OS. Meaning if minimum deployment target is iOS 6, all builtins 
-not included in the ios6-<arch>.txt files need to be included. The one catch is 
-that this is per-architecture. Since iOS 6 doesn't support arm64, when supporting 
-iOS 6, the minimum deployment target for arm64 binaries is iOS 7. 
+This folder contains list of symbols that should be excluded from the builtin
+libraries for Darwin. There are two reasons symbols are excluded:
+
+(1) They aren't supported on Darwin
+(2) They are contained within the OS on the minimum supported target
+
+The builtin libraries must contain all symbols not provided by the lowest
+supported target OS. Meaning if minimum deployment target is iOS 6, all builtins
+not included in the ios6-<arch>.txt files need to be included. The one catch is
+that this is per-architecture. Since iOS 6 doesn't support arm64, when supporting
+iOS 6, the minimum deployment target for arm64 binaries is iOS 7.
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt
index 4500f5f720..6aa542f7fe 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7.txt
@@ -1,57 +1,57 @@
-absvti2 
-addtf3 
-addvti3 
-aeabi_cdcmp 
-aeabi_cdcmpeq_check_nan 
-aeabi_cfcmp 
-aeabi_cfcmpeq_check_nan 
-aeabi_dcmp 
-aeabi_div0 
-aeabi_drsub 
-aeabi_fcmp 
-aeabi_frsub 
-aeabi_idivmod 
-aeabi_ldivmod 
-aeabi_memcmp 
-aeabi_memcpy 
-aeabi_memmove 
-aeabi_memset 
-aeabi_uidivmod 
-aeabi_uldivmod 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divtf3 
-divti3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-multf3 
-multi3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-powitf2 
-subtf3 
-subvti3 
-trampoline_setup 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
+absvti2
+addtf3
+addvti3
+aeabi_cdcmp
+aeabi_cdcmpeq_check_nan
+aeabi_cfcmp
+aeabi_cfcmpeq_check_nan
+aeabi_dcmp
+aeabi_div0
+aeabi_drsub
+aeabi_fcmp
+aeabi_frsub
+aeabi_idivmod
+aeabi_ldivmod
+aeabi_memcmp
+aeabi_memcpy
+aeabi_memmove
+aeabi_memset
+aeabi_uidivmod
+aeabi_uldivmod
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divtf3
+divti3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+multf3
+multi3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subtf3
+subvti3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt
index 735a87b05a..28167aa4c5 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios-armv7s.txt
@@ -1,57 +1,57 @@
-absvti2 
-addtf3 
-addvti3 
-aeabi_cdcmp 
-aeabi_cdcmpeq_check_nan 
-aeabi_cfcmp 
-aeabi_cfcmpeq_check_nan 
-aeabi_dcmp 
-aeabi_div0 
-aeabi_drsub 
-aeabi_fcmp 
-aeabi_frsub 
-aeabi_idivmod 
-aeabi_ldivmod 
-aeabi_memcmp 
-aeabi_memcpy 
-aeabi_memmove 
-aeabi_memset 
-aeabi_uidivmod 
-aeabi_uldivmod 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divtf3 
-divti3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-multf 
-multi3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-powitf2 
-subtf3 
-subvti3 
-trampoline_setup 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
+absvti2
+addtf3
+addvti3
+aeabi_cdcmp
+aeabi_cdcmpeq_check_nan
+aeabi_cfcmp
+aeabi_cfcmpeq_check_nan
+aeabi_dcmp
+aeabi_div0
+aeabi_drsub
+aeabi_fcmp
+aeabi_frsub
+aeabi_idivmod
+aeabi_ldivmod
+aeabi_memcmp
+aeabi_memcpy
+aeabi_memmove
+aeabi_memset
+aeabi_uidivmod
+aeabi_uldivmod
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divtf3
+divti3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+multf
+multi3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subtf3
+subvti3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt
index 8e02d08299..5db24000a1 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios.txt
@@ -1 +1 @@
-apple_versioning 
+apple_versioning
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt
index bc83c6dd0a..b01fa711a3 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7.txt
@@ -1,120 +1,120 @@
-absvdi2 
-absvsi2 
-adddf3 
-adddf3vfp 
-addsf3 
-addsf3vfp 
-addvdi3 
-addvsi3 
-ashldi3 
-ashrdi3 
-bswapdi2 
-bswapsi2 
-clzdi2 
-clzsi2 
-cmpdi2 
-ctzdi2 
-ctzsi2 
-divdc3 
-divdf3 
-divdf3vfp 
-divdi3 
-divmodsi4 
-divsc3 
-divsf3 
-divsf3vfp 
-divsi3 
-eqdf2 
-eqdf2vfp 
-eqsf2 
-eqsf2vfp 
-extendsfdf2 
-extendsfdf2vfp 
-ffsdi2 
-fixdfdi 
-fixdfsi 
-fixdfsivfp 
-fixsfdi 
-fixsfsi 
-fixsfsivfp 
-fixunsdfdi 
-fixunsdfsi 
-fixunsdfsivfp 
-fixunssfdi 
-fixunssfsi 
-fixunssfsivfp 
-floatdidf 
-floatdisf 
-floatsidf 
-floatsidfvfp 
-floatsisf 
-floatsisfvfp 
-floatundidf 
-floatundisf 
-floatunsidf 
-floatunsisf 
-floatunssidfvfp 
-floatunssisfvfp 
-gcc_personality_sj0 
-gedf2 
-gedf2vfp 
-gesf2 
-gesf2vfp 
-gtdf2 
-gtdf2vfp 
-gtsf2 
-gtsf2vfp 
-ledf2 
-ledf2vfp 
-lesf2 
-lesf2vfp 
-lshrdi3 
-ltdf2 
-ltdf2vfp 
-ltsf2 
-ltsf2vfp 
-moddi3 
-modsi3 
-muldc3 
-muldf3 
-muldf3vfp 
-muldi3 
-mulodi4 
-mulosi4 
-mulsc3 
-mulsf3 
-mulsf3vfp 
-mulvdi3 
-mulvsi3 
-nedf2 
-nedf2vfp 
-negdi2 
-negvdi2 
-negvsi2 
-nesf2 
-nesf2vfp 
-paritydi2 
-paritysi2 
-popcountdi2 
-popcountsi2 
-powidf2 
-powisf2 
-subdf3 
-subdf3vfp 
-subsf3 
-subsf3vfp 
-subvdi3 
-subvsi3 
-truncdfsf2 
-truncdfsf2vfp 
-ucmpdi2 
-udivdi3 
-udivmoddi4 
-udivmodsi4 
-udivsi3 
-umoddi3 
-umodsi3 
-unorddf2 
-unorddf2vfp 
-unordsf2 
-unordsf2vfp 
+absvdi2
+absvsi2
+adddf3
+adddf3vfp
+addsf3
+addsf3vfp
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+bswapdi2
+bswapsi2
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdf3
+divdf3vfp
+divdi3
+divmodsi4
+divsc3
+divsf3
+divsf3vfp
+divsi3
+eqdf2
+eqdf2vfp
+eqsf2
+eqsf2vfp
+extendsfdf2
+extendsfdf2vfp
+ffsdi2
+fixdfdi
+fixdfsi
+fixdfsivfp
+fixsfdi
+fixsfsi
+fixsfsivfp
+fixunsdfdi
+fixunsdfsi
+fixunsdfsivfp
+fixunssfdi
+fixunssfsi
+fixunssfsivfp
+floatdidf
+floatdisf
+floatsidf
+floatsidfvfp
+floatsisf
+floatsisfvfp
+floatundidf
+floatundisf
+floatunsidf
+floatunsisf
+floatunssidfvfp
+floatunssisfvfp
+gcc_personality_sj0
+gedf2
+gedf2vfp
+gesf2
+gesf2vfp
+gtdf2
+gtdf2vfp
+gtsf2
+gtsf2vfp
+ledf2
+ledf2vfp
+lesf2
+lesf2vfp
+lshrdi3
+ltdf2
+ltdf2vfp
+ltsf2
+ltsf2vfp
+moddi3
+modsi3
+muldc3
+muldf3
+muldf3vfp
+muldi3
+mulodi4
+mulosi4
+mulsc3
+mulsf3
+mulsf3vfp
+mulvdi3
+mulvsi3
+nedf2
+nedf2vfp
+negdi2
+negvdi2
+negvsi2
+nesf2
+nesf2vfp
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+subdf3
+subdf3vfp
+subsf3
+subsf3vfp
+subvdi3
+subvsi3
+truncdfsf2
+truncdfsf2vfp
+ucmpdi2
+udivdi3
+udivmoddi4
+udivmodsi4
+udivsi3
+umoddi3
+umodsi3
+unorddf2
+unorddf2vfp
+unordsf2
+unordsf2vfp
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt
index bc83c6dd0a..b01fa711a3 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios6-armv7s.txt
@@ -1,120 +1,120 @@
-absvdi2 
-absvsi2 
-adddf3 
-adddf3vfp 
-addsf3 
-addsf3vfp 
-addvdi3 
-addvsi3 
-ashldi3 
-ashrdi3 
-bswapdi2 
-bswapsi2 
-clzdi2 
-clzsi2 
-cmpdi2 
-ctzdi2 
-ctzsi2 
-divdc3 
-divdf3 
-divdf3vfp 
-divdi3 
-divmodsi4 
-divsc3 
-divsf3 
-divsf3vfp 
-divsi3 
-eqdf2 
-eqdf2vfp 
-eqsf2 
-eqsf2vfp 
-extendsfdf2 
-extendsfdf2vfp 
-ffsdi2 
-fixdfdi 
-fixdfsi 
-fixdfsivfp 
-fixsfdi 
-fixsfsi 
-fixsfsivfp 
-fixunsdfdi 
-fixunsdfsi 
-fixunsdfsivfp 
-fixunssfdi 
-fixunssfsi 
-fixunssfsivfp 
-floatdidf 
-floatdisf 
-floatsidf 
-floatsidfvfp 
-floatsisf 
-floatsisfvfp 
-floatundidf 
-floatundisf 
-floatunsidf 
-floatunsisf 
-floatunssidfvfp 
-floatunssisfvfp 
-gcc_personality_sj0 
-gedf2 
-gedf2vfp 
-gesf2 
-gesf2vfp 
-gtdf2 
-gtdf2vfp 
-gtsf2 
-gtsf2vfp 
-ledf2 
-ledf2vfp 
-lesf2 
-lesf2vfp 
-lshrdi3 
-ltdf2 
-ltdf2vfp 
-ltsf2 
-ltsf2vfp 
-moddi3 
-modsi3 
-muldc3 
-muldf3 
-muldf3vfp 
-muldi3 
-mulodi4 
-mulosi4 
-mulsc3 
-mulsf3 
-mulsf3vfp 
-mulvdi3 
-mulvsi3 
-nedf2 
-nedf2vfp 
-negdi2 
-negvdi2 
-negvsi2 
-nesf2 
-nesf2vfp 
-paritydi2 
-paritysi2 
-popcountdi2 
-popcountsi2 
-powidf2 
-powisf2 
-subdf3 
-subdf3vfp 
-subsf3 
-subsf3vfp 
-subvdi3 
-subvsi3 
-truncdfsf2 
-truncdfsf2vfp 
-ucmpdi2 
-udivdi3 
-udivmoddi4 
-udivmodsi4 
-udivsi3 
-umoddi3 
-umodsi3 
-unorddf2 
-unorddf2vfp 
-unordsf2 
-unordsf2vfp 
+absvdi2
+absvsi2
+adddf3
+adddf3vfp
+addsf3
+addsf3vfp
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+bswapdi2
+bswapsi2
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdf3
+divdf3vfp
+divdi3
+divmodsi4
+divsc3
+divsf3
+divsf3vfp
+divsi3
+eqdf2
+eqdf2vfp
+eqsf2
+eqsf2vfp
+extendsfdf2
+extendsfdf2vfp
+ffsdi2
+fixdfdi
+fixdfsi
+fixdfsivfp
+fixsfdi
+fixsfsi
+fixsfsivfp
+fixunsdfdi
+fixunsdfsi
+fixunsdfsivfp
+fixunssfdi
+fixunssfsi
+fixunssfsivfp
+floatdidf
+floatdisf
+floatsidf
+floatsidfvfp
+floatsisf
+floatsisfvfp
+floatundidf
+floatundisf
+floatunsidf
+floatunsisf
+floatunssidfvfp
+floatunssisfvfp
+gcc_personality_sj0
+gedf2
+gedf2vfp
+gesf2
+gesf2vfp
+gtdf2
+gtdf2vfp
+gtsf2
+gtsf2vfp
+ledf2
+ledf2vfp
+lesf2
+lesf2vfp
+lshrdi3
+ltdf2
+ltdf2vfp
+ltsf2
+ltsf2vfp
+moddi3
+modsi3
+muldc3
+muldf3
+muldf3vfp
+muldi3
+mulodi4
+mulosi4
+mulsc3
+mulsf3
+mulsf3vfp
+mulvdi3
+mulvsi3
+nedf2
+nedf2vfp
+negdi2
+negvdi2
+negvsi2
+nesf2
+nesf2vfp
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+subdf3
+subdf3vfp
+subsf3
+subsf3vfp
+subvdi3
+subvsi3
+truncdfsf2
+truncdfsf2vfp
+ucmpdi2
+udivdi3
+udivmoddi4
+udivmodsi4
+udivsi3
+umoddi3
+umodsi3
+unorddf2
+unorddf2vfp
+unordsf2
+unordsf2vfp
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt
index 3e0c2b25a7..5e4caf9e9f 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/ios7-arm64.txt
@@ -1,16 +1,16 @@
-clzti2 
-divti3 
-fixdfti 
-fixsfti 
-fixunsdfti 
-floattidf 
-floattisf 
-floatuntidf 
-floatuntisf 
-gcc_personality_v0 
-modti3 
-powidf2 
-powisf2 
-udivmodti4 
-udivti3 
-umodti3 
+clzti2
+divti3
+fixdfti
+fixsfti
+fixunsdfti
+floattidf
+floattisf
+floatuntidf
+floatuntisf
+gcc_personality_v0
+modti3
+powidf2
+powisf2
+udivmodti4
+udivti3
+umodti3
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt
index fc9372cc7e..60c0e2d650 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-i386.txt
@@ -1,82 +1,82 @@
-absvti2 
-addtf3 
-addvti3 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divti3 
-divtf3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-muloti4 
-multi3 
-multf3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-powitf2 
-subvti3 
-subtf3 
-trampoline_setup 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
-absvti2 
-addtf3 
-addvti3 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divti3 
-divtf3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-muloti4 
-multi3 
-multf3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-powitf2 
-subvti3 
-subtf3 
-trampoline_setup 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt
index 5a25ce4041..de1574e6ce 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim-x86_64.txt
@@ -1,12 +1,12 @@
-addtf3 
-divtf3 
-multf3 
-powitf2 
-subtf3 
-trampoline_setup 
-addtf3 
-divtf3 
-multf3 
-powitf2 
-subtf3 
-trampoline_setup 
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt
index 8e02d08299..5db24000a1 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/iossim.txt
@@ -1 +1 @@
-apple_versioning 
+apple_versioning
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt
index fc9372cc7e..60c0e2d650 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-i386.txt
@@ -1,82 +1,82 @@
-absvti2 
-addtf3 
-addvti3 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divti3 
-divtf3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-muloti4 
-multi3 
-multf3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-powitf2 
-subvti3 
-subtf3 
-trampoline_setup 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
-absvti2 
-addtf3 
-addvti3 
-ashlti3 
-ashrti3 
-clzti2 
-cmpti2 
-ctzti2 
-divti3 
-divtf3 
-ffsti2 
-fixdfti 
-fixsfti 
-fixunsdfti 
-fixunssfti 
-fixunsxfti 
-fixxfti 
-floattidf 
-floattisf 
-floattixf 
-floatuntidf 
-floatuntisf 
-floatuntixf 
-lshrti3 
-modti3 
-muloti4 
-multi3 
-multf3 
-mulvti3 
-negti2 
-negvti2 
-parityti2 
-popcountti2 
-powitf2 
-subvti3 
-subtf3 
-trampoline_setup 
-ucmpti2 
-udivmodti4 
-udivti3 
-umodti3 
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
+absvti2
+addtf3
+addvti3
+ashlti3
+ashrti3
+clzti2
+cmpti2
+ctzti2
+divti3
+divtf3
+ffsti2
+fixdfti
+fixsfti
+fixunsdfti
+fixunssfti
+fixunsxfti
+fixxfti
+floattidf
+floattisf
+floattixf
+floatuntidf
+floatuntisf
+floatuntixf
+lshrti3
+modti3
+muloti4
+multi3
+multf3
+mulvti3
+negti2
+negvti2
+parityti2
+popcountti2
+powitf2
+subvti3
+subtf3
+trampoline_setup
+ucmpti2
+udivmodti4
+udivti3
+umodti3
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt
index 5a25ce4041..de1574e6ce 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx-x86_64.txt
@@ -1,12 +1,12 @@
-addtf3 
-divtf3 
-multf3 
-powitf2 
-subtf3 
-trampoline_setup 
-addtf3 
-divtf3 
-multf3 
-powitf2 
-subtf3 
-trampoline_setup 
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
diff --git a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt
index 8e02d08299..5db24000a1 100644
--- a/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt
+++ b/contrib/libs/cxxsupp/builtins/Darwin-excludes/osx.txt
@@ -1 +1 @@
-apple_versioning 
+apple_versioning
diff --git a/contrib/libs/cxxsupp/builtins/README.txt b/contrib/libs/cxxsupp/builtins/README.txt
index c5d95c2c96..ad36e4e527 100644
--- a/contrib/libs/cxxsupp/builtins/README.txt
+++ b/contrib/libs/cxxsupp/builtins/README.txt
@@ -1,345 +1,345 @@
-Compiler-RT 
-================================ 
- 
-This directory and its subdirectories contain source code for the compiler 
-support routines. 
- 
-Compiler-RT is open source software. You may freely distribute it under the 
-terms of the license agreement found in LICENSE.txt. 
- 
-================================ 
- 
-This is a replacement library for libgcc.  Each function is contained 
-in its own file.  Each function has a corresponding unit test under 
-test/Unit. 
- 
-A rudimentary script to test each file is in the file called 
-test/Unit/test. 
- 
-Here is the specification for this library: 
- 
-http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc 
- 
-Here is a synopsis of the contents of this library: 
- 
-typedef      int si_int; 
-typedef unsigned su_int; 
- 
-typedef          long long di_int; 
-typedef unsigned long long du_int; 
- 
-// Integral bit manipulation 
- 
-di_int __ashldi3(di_int a, si_int b);      // a << b 
-ti_int __ashlti3(ti_int a, si_int b);      // a << b 
- 
-di_int __ashrdi3(di_int a, si_int b);      // a >> b  arithmetic (sign fill) 
-ti_int __ashrti3(ti_int a, si_int b);      // a >> b  arithmetic (sign fill) 
-di_int __lshrdi3(di_int a, si_int b);      // a >> b  logical    (zero fill) 
-ti_int __lshrti3(ti_int a, si_int b);      // a >> b  logical    (zero fill) 
- 
-si_int __clzsi2(si_int a);  // count leading zeros 
-si_int __clzdi2(di_int a);  // count leading zeros 
-si_int __clzti2(ti_int a);  // count leading zeros 
-si_int __ctzsi2(si_int a);  // count trailing zeros 
-si_int __ctzdi2(di_int a);  // count trailing zeros 
-si_int __ctzti2(ti_int a);  // count trailing zeros 
- 
-si_int __ffsdi2(di_int a);  // find least significant 1 bit 
-si_int __ffsti2(ti_int a);  // find least significant 1 bit 
- 
-si_int __paritysi2(si_int a);  // bit parity 
-si_int __paritydi2(di_int a);  // bit parity 
-si_int __parityti2(ti_int a);  // bit parity 
- 
-si_int __popcountsi2(si_int a);  // bit population 
-si_int __popcountdi2(di_int a);  // bit population 
-si_int __popcountti2(ti_int a);  // bit population 
- 
-uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only 
-uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only 
- 
-// Integral arithmetic 
- 
-di_int __negdi2    (di_int a);                         // -a 
-ti_int __negti2    (ti_int a);                         // -a 
-di_int __muldi3    (di_int a, di_int b);               // a * b 
-ti_int __multi3    (ti_int a, ti_int b);               // a * b 
-si_int __divsi3    (si_int a, si_int b);               // a / b   signed 
-di_int __divdi3    (di_int a, di_int b);               // a / b   signed 
-ti_int __divti3    (ti_int a, ti_int b);               // a / b   signed 
-su_int __udivsi3   (su_int n, su_int d);               // a / b   unsigned 
-du_int __udivdi3   (du_int a, du_int b);               // a / b   unsigned 
-tu_int __udivti3   (tu_int a, tu_int b);               // a / b   unsigned 
-si_int __modsi3    (si_int a, si_int b);               // a % b   signed 
-di_int __moddi3    (di_int a, di_int b);               // a % b   signed 
-ti_int __modti3    (ti_int a, ti_int b);               // a % b   signed 
-su_int __umodsi3   (su_int a, su_int b);               // a % b   unsigned 
-du_int __umoddi3   (du_int a, du_int b);               // a % b   unsigned 
-tu_int __umodti3   (tu_int a, tu_int b);               // a % b   unsigned 
-du_int __udivmoddi4(du_int a, du_int b, du_int* rem);  // a / b, *rem = a % b  unsigned 
-tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);  // a / b, *rem = a % b  unsigned 
-su_int __udivmodsi4(su_int a, su_int b, su_int* rem);  // a / b, *rem = a % b  unsigned 
-si_int __divmodsi4(si_int a, si_int b, si_int* rem);   // a / b, *rem = a % b  signed 
- 
- 
- 
-//  Integral arithmetic with trapping overflow 
- 
-si_int __absvsi2(si_int a);           // abs(a) 
-di_int __absvdi2(di_int a);           // abs(a) 
-ti_int __absvti2(ti_int a);           // abs(a) 
- 
-si_int __negvsi2(si_int a);           // -a 
-di_int __negvdi2(di_int a);           // -a 
-ti_int __negvti2(ti_int a);           // -a 
- 
-si_int __addvsi3(si_int a, si_int b);  // a + b 
-di_int __addvdi3(di_int a, di_int b);  // a + b 
-ti_int __addvti3(ti_int a, ti_int b);  // a + b 
- 
-si_int __subvsi3(si_int a, si_int b);  // a - b 
-di_int __subvdi3(di_int a, di_int b);  // a - b 
-ti_int __subvti3(ti_int a, ti_int b);  // a - b 
- 
-si_int __mulvsi3(si_int a, si_int b);  // a * b 
-di_int __mulvdi3(di_int a, di_int b);  // a * b 
-ti_int __mulvti3(ti_int a, ti_int b);  // a * b 
- 
- 
-// Integral arithmetic which returns if overflow 
- 
-si_int __mulosi4(si_int a, si_int b, int* overflow);  // a * b, overflow set to one if result not in signed range 
-di_int __mulodi4(di_int a, di_int b, int* overflow);  // a * b, overflow set to one if result not in signed range 
-ti_int __muloti4(ti_int a, ti_int b, int* overflow);  // a * b, overflow set to 
- one if result not in signed range 
- 
- 
-//  Integral comparison: a  < b -> 0 
-//                       a == b -> 1 
-//                       a  > b -> 2 
- 
-si_int __cmpdi2 (di_int a, di_int b); 
-si_int __cmpti2 (ti_int a, ti_int b); 
-si_int __ucmpdi2(du_int a, du_int b); 
-si_int __ucmpti2(tu_int a, tu_int b); 
- 
-//  Integral / floating point conversion 
- 
-di_int __fixsfdi(      float a); 
-di_int __fixdfdi(     double a); 
-di_int __fixxfdi(long double a); 
- 
-ti_int __fixsfti(      float a); 
-ti_int __fixdfti(     double a); 
-ti_int __fixxfti(long double a); 
-uint64_t __fixtfdi(long double input);  // ppc only, doesn't match documentation 
- 
-su_int __fixunssfsi(      float a); 
-su_int __fixunsdfsi(     double a); 
-su_int __fixunsxfsi(long double a); 
- 
-du_int __fixunssfdi(      float a); 
-du_int __fixunsdfdi(     double a); 
-du_int __fixunsxfdi(long double a); 
- 
-tu_int __fixunssfti(      float a); 
-tu_int __fixunsdfti(     double a); 
-tu_int __fixunsxfti(long double a); 
-uint64_t __fixunstfdi(long double input);  // ppc only 
- 
-float       __floatdisf(di_int a); 
-double      __floatdidf(di_int a); 
-long double __floatdixf(di_int a); 
-long double __floatditf(int64_t a);        // ppc only 
- 
-float       __floattisf(ti_int a); 
-double      __floattidf(ti_int a); 
-long double __floattixf(ti_int a); 
- 
-float       __floatundisf(du_int a); 
-double      __floatundidf(du_int a); 
-long double __floatundixf(du_int a); 
-long double __floatunditf(uint64_t a);     // ppc only 
- 
-float       __floatuntisf(tu_int a); 
-double      __floatuntidf(tu_int a); 
-long double __floatuntixf(tu_int a); 
- 
-//  Floating point raised to integer power 
- 
-float       __powisf2(      float a, si_int b);  // a ^ b 
-double      __powidf2(     double a, si_int b);  // a ^ b 
-long double __powixf2(long double a, si_int b);  // a ^ b 
-long double __powitf2(long double a, si_int b);  // ppc only, a ^ b 
- 
-//  Complex arithmetic 
- 
-//  (a + ib) * (c + id) 
- 
-      float _Complex __mulsc3( float a,  float b,  float c,  float d); 
-     double _Complex __muldc3(double a, double b, double c, double d); 
-long double _Complex __mulxc3(long double a, long double b, 
-                              long double c, long double d); 
-long double _Complex __multc3(long double a, long double b, 
-                              long double c, long double d); // ppc only 
- 
-//  (a + ib) / (c + id) 
- 
-      float _Complex __divsc3( float a,  float b,  float c,  float d); 
-     double _Complex __divdc3(double a, double b, double c, double d); 
-long double _Complex __divxc3(long double a, long double b, 
-                              long double c, long double d); 
-long double _Complex __divtc3(long double a, long double b, 
-                              long double c, long double d);  // ppc only 
- 
- 
-//         Runtime support 
- 
-// __clear_cache() is used to tell process that new instructions have been 
-// written to an address range.  Necessary on processors that do not have 
-// a unified instruction and data cache. 
-void __clear_cache(void* start, void* end); 
- 
-// __enable_execute_stack() is used with nested functions when a trampoline 
-// function is written onto the stack and that page range needs to be made 
-// executable. 
-void __enable_execute_stack(void* addr); 
- 
-// __gcc_personality_v0() is normally only called by the system unwinder. 
-// C code (as opposed to C++) normally does not need a personality function 
-// because there are no catch clauses or destructors to be run.  But there 
-// is a C language extension __attribute__((cleanup(func))) which marks local 
-// variables as needing the cleanup function "func" to be run when the 
-// variable goes out of scope.  That includes when an exception is thrown, 
-// so a personality handler is needed.   
-_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, 
-         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, 
-         _Unwind_Context_t context); 
- 
-// for use with some implementations of assert() in <assert.h> 
-void __eprintf(const char* format, const char* assertion_expression, 
-				const char* line, const char* file); 
- 
-// for systems with emulated thread local storage 
-void* __emutls_get_address(struct __emutls_control*); 
- 
- 
-//   Power PC specific functions 
- 
-// There is no C interface to the saveFP/restFP functions.  They are helper 
-// functions called by the prolog and epilog of functions that need to save 
-// a number of non-volatile float point registers.   
-saveFP 
-restFP 
- 
-// PowerPC has a standard template for trampoline functions.  This function 
-// generates a custom trampoline function with the specific realFunc 
-// and localsPtr values. 
-void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,  
-                                const void* realFunc, void* localsPtr); 
- 
-// adds two 128-bit double-double precision values ( x + y ) 
-long double __gcc_qadd(long double x, long double y);   
- 
-// subtracts two 128-bit double-double precision values ( x - y ) 
-long double __gcc_qsub(long double x, long double y);  
- 
-// multiples two 128-bit double-double precision values ( x * y ) 
-long double __gcc_qmul(long double x, long double y);   
- 
-// divides two 128-bit double-double precision values ( x / y ) 
-long double __gcc_qdiv(long double a, long double b);   
- 
- 
-//    ARM specific functions 
- 
-// There is no C interface to the switch* functions.  These helper functions 
-// are only needed by Thumb1 code for efficient switch table generation. 
-switch16 
-switch32 
-switch8 
-switchu8 
- 
-// There is no C interface to the *_vfp_d8_d15_regs functions.  There are 
-// called in the prolog and epilog of Thumb1 functions.  When the C++ ABI use 
-// SJLJ for exceptions, each function with a catch clause or destuctors needs 
-// to save and restore all registers in it prolog and epliog.  But there is  
-// no way to access vector and high float registers from thumb1 code, so the  
-// compiler must add call outs to these helper functions in the prolog and  
-// epilog. 
-restore_vfp_d8_d15_regs 
-save_vfp_d8_d15_regs 
- 
- 
-// Note: long ago ARM processors did not have floating point hardware support. 
-// Floating point was done in software and floating point parameters were  
-// passed in integer registers.  When hardware support was added for floating 
-// point, new *vfp functions were added to do the same operations but with  
-// floating point parameters in floating point registers. 
- 
-// Undocumented functions 
- 
-float  __addsf3vfp(float a, float b);   // Appears to return a + b 
-double __adddf3vfp(double a, double b); // Appears to return a + b 
-float  __divsf3vfp(float a, float b);   // Appears to return a / b 
-double __divdf3vfp(double a, double b); // Appears to return a / b 
-int    __eqsf2vfp(float a, float b);    // Appears to return  one 
-                                        //     iff a == b and neither is NaN. 
-int    __eqdf2vfp(double a, double b);  // Appears to return  one 
-                                        //     iff a == b and neither is NaN. 
-double __extendsfdf2vfp(float a);       // Appears to convert from 
-                                        //     float to double. 
-int    __fixdfsivfp(double a);          // Appears to convert from 
-                                        //     double to int. 
-int    __fixsfsivfp(float a);           // Appears to convert from 
-                                        //     float to int. 
-unsigned int __fixunssfsivfp(float a);  // Appears to convert from 
-                                        //     float to unsigned int. 
-unsigned int __fixunsdfsivfp(double a); // Appears to convert from 
-                                        //     double to unsigned int. 
-double __floatsidfvfp(int a);           // Appears to convert from 
-                                        //     int to double. 
-float __floatsisfvfp(int a);            // Appears to convert from 
-                                        //     int to float. 
-double __floatunssidfvfp(unsigned int a); // Appears to convert from 
-                                        //     unisgned int to double. 
-float __floatunssisfvfp(unsigned int a); // Appears to convert from 
-                                        //     unisgned int to float. 
-int __gedf2vfp(double a, double b);     // Appears to return __gedf2 
-                                        //     (a >= b) 
-int __gesf2vfp(float a, float b);       // Appears to return __gesf2 
-                                        //     (a >= b) 
-int __gtdf2vfp(double a, double b);     // Appears to return __gtdf2 
-                                        //     (a > b) 
-int __gtsf2vfp(float a, float b);       // Appears to return __gtsf2 
-                                        //     (a > b) 
-int __ledf2vfp(double a, double b);     // Appears to return __ledf2 
-                                        //     (a <= b) 
-int __lesf2vfp(float a, float b);       // Appears to return __lesf2 
-                                        //     (a <= b) 
-int __ltdf2vfp(double a, double b);     // Appears to return __ltdf2 
-                                        //     (a < b) 
-int __ltsf2vfp(float a, float b);       // Appears to return __ltsf2 
-                                        //     (a < b) 
-double __muldf3vfp(double a, double b); // Appears to return a * b 
-float __mulsf3vfp(float a, float b);    // Appears to return a * b 
-int __nedf2vfp(double a, double b);     // Appears to return __nedf2 
-                                        //     (a != b) 
-double __negdf2vfp(double a);           // Appears to return -a 
-float __negsf2vfp(float a);             // Appears to return -a 
-float __negsf2vfp(float a);             // Appears to return -a 
-double __subdf3vfp(double a, double b); // Appears to return a - b 
-float __subsf3vfp(float a, float b);    // Appears to return a - b 
-float __truncdfsf2vfp(double a);        // Appears to convert from 
-                                        //     double to float. 
-int __unorddf2vfp(double a, double b);  // Appears to return __unorddf2 
-int __unordsf2vfp(float a, float b);    // Appears to return __unordsf2 
- 
- 
-Preconditions are listed for each function at the definition when there are any. 
-Any preconditions reflect the specification at 
-http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc. 
- 
-Assumptions are listed in "int_lib.h", and in individual files.  Where possible 
-assumptions are checked at compile time. 
+Compiler-RT
+================================
+
+This directory and its subdirectories contain source code for the compiler
+support routines.
+
+Compiler-RT is open source software. You may freely distribute it under the
+terms of the license agreement found in LICENSE.txt.
+
+================================
+
+This is a replacement library for libgcc.  Each function is contained
+in its own file.  Each function has a corresponding unit test under
+test/Unit.
+
+A rudimentary script to test each file is in the file called
+test/Unit/test.
+
+Here is the specification for this library:
+
+http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc
+
+Here is a synopsis of the contents of this library:
+
+typedef      int si_int;
+typedef unsigned su_int;
+
+typedef          long long di_int;
+typedef unsigned long long du_int;
+
+// Integral bit manipulation
+
+di_int __ashldi3(di_int a, si_int b);      // a << b
+ti_int __ashlti3(ti_int a, si_int b);      // a << b
+
+di_int __ashrdi3(di_int a, si_int b);      // a >> b  arithmetic (sign fill)
+ti_int __ashrti3(ti_int a, si_int b);      // a >> b  arithmetic (sign fill)
+di_int __lshrdi3(di_int a, si_int b);      // a >> b  logical    (zero fill)
+ti_int __lshrti3(ti_int a, si_int b);      // a >> b  logical    (zero fill)
+
+si_int __clzsi2(si_int a);  // count leading zeros
+si_int __clzdi2(di_int a);  // count leading zeros
+si_int __clzti2(ti_int a);  // count leading zeros
+si_int __ctzsi2(si_int a);  // count trailing zeros
+si_int __ctzdi2(di_int a);  // count trailing zeros
+si_int __ctzti2(ti_int a);  // count trailing zeros
+
+si_int __ffsdi2(di_int a);  // find least significant 1 bit
+si_int __ffsti2(ti_int a);  // find least significant 1 bit
+
+si_int __paritysi2(si_int a);  // bit parity
+si_int __paritydi2(di_int a);  // bit parity
+si_int __parityti2(ti_int a);  // bit parity
+
+si_int __popcountsi2(si_int a);  // bit population
+si_int __popcountdi2(di_int a);  // bit population
+si_int __popcountti2(ti_int a);  // bit population
+
+uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only
+uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only
+
+// Integral arithmetic
+
+di_int __negdi2    (di_int a);                         // -a
+ti_int __negti2    (ti_int a);                         // -a
+di_int __muldi3    (di_int a, di_int b);               // a * b
+ti_int __multi3    (ti_int a, ti_int b);               // a * b
+si_int __divsi3    (si_int a, si_int b);               // a / b   signed
+di_int __divdi3    (di_int a, di_int b);               // a / b   signed
+ti_int __divti3    (ti_int a, ti_int b);               // a / b   signed
+su_int __udivsi3   (su_int n, su_int d);               // a / b   unsigned
+du_int __udivdi3   (du_int a, du_int b);               // a / b   unsigned
+tu_int __udivti3   (tu_int a, tu_int b);               // a / b   unsigned
+si_int __modsi3    (si_int a, si_int b);               // a % b   signed
+di_int __moddi3    (di_int a, di_int b);               // a % b   signed
+ti_int __modti3    (ti_int a, ti_int b);               // a % b   signed
+su_int __umodsi3   (su_int a, su_int b);               // a % b   unsigned
+du_int __umoddi3   (du_int a, du_int b);               // a % b   unsigned
+tu_int __umodti3   (tu_int a, tu_int b);               // a % b   unsigned
+du_int __udivmoddi4(du_int a, du_int b, du_int* rem);  // a / b, *rem = a % b  unsigned
+tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);  // a / b, *rem = a % b  unsigned
+su_int __udivmodsi4(su_int a, su_int b, su_int* rem);  // a / b, *rem = a % b  unsigned
+si_int __divmodsi4(si_int a, si_int b, si_int* rem);   // a / b, *rem = a % b  signed
+
+
+
+//  Integral arithmetic with trapping overflow
+
+si_int __absvsi2(si_int a);           // abs(a)
+di_int __absvdi2(di_int a);           // abs(a)
+ti_int __absvti2(ti_int a);           // abs(a)
+
+si_int __negvsi2(si_int a);           // -a
+di_int __negvdi2(di_int a);           // -a
+ti_int __negvti2(ti_int a);           // -a
+
+si_int __addvsi3(si_int a, si_int b);  // a + b
+di_int __addvdi3(di_int a, di_int b);  // a + b
+ti_int __addvti3(ti_int a, ti_int b);  // a + b
+
+si_int __subvsi3(si_int a, si_int b);  // a - b
+di_int __subvdi3(di_int a, di_int b);  // a - b
+ti_int __subvti3(ti_int a, ti_int b);  // a - b
+
+si_int __mulvsi3(si_int a, si_int b);  // a * b
+di_int __mulvdi3(di_int a, di_int b);  // a * b
+ti_int __mulvti3(ti_int a, ti_int b);  // a * b
+
+
+// Integral arithmetic which returns if overflow
+
+si_int __mulosi4(si_int a, si_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
+di_int __mulodi4(di_int a, di_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
+ti_int __muloti4(ti_int a, ti_int b, int* overflow);  // a * b, overflow set to
+ one if result not in signed range
+
+
+//  Integral comparison: a  < b -> 0
+//                       a == b -> 1
+//                       a  > b -> 2
+
+si_int __cmpdi2 (di_int a, di_int b);
+si_int __cmpti2 (ti_int a, ti_int b);
+si_int __ucmpdi2(du_int a, du_int b);
+si_int __ucmpti2(tu_int a, tu_int b);
+
+//  Integral / floating point conversion
+
+di_int __fixsfdi(      float a);
+di_int __fixdfdi(     double a);
+di_int __fixxfdi(long double a);
+
+ti_int __fixsfti(      float a);
+ti_int __fixdfti(     double a);
+ti_int __fixxfti(long double a);
+uint64_t __fixtfdi(long double input);  // ppc only, doesn't match documentation
+
+su_int __fixunssfsi(      float a);
+su_int __fixunsdfsi(     double a);
+su_int __fixunsxfsi(long double a);
+
+du_int __fixunssfdi(      float a);
+du_int __fixunsdfdi(     double a);
+du_int __fixunsxfdi(long double a);
+
+tu_int __fixunssfti(      float a);
+tu_int __fixunsdfti(     double a);
+tu_int __fixunsxfti(long double a);
+uint64_t __fixunstfdi(long double input);  // ppc only
+
+float       __floatdisf(di_int a);
+double      __floatdidf(di_int a);
+long double __floatdixf(di_int a);
+long double __floatditf(int64_t a);        // ppc only
+
+float       __floattisf(ti_int a);
+double      __floattidf(ti_int a);
+long double __floattixf(ti_int a);
+
+float       __floatundisf(du_int a);
+double      __floatundidf(du_int a);
+long double __floatundixf(du_int a);
+long double __floatunditf(uint64_t a);     // ppc only
+
+float       __floatuntisf(tu_int a);
+double      __floatuntidf(tu_int a);
+long double __floatuntixf(tu_int a);
+
+//  Floating point raised to integer power
+
+float       __powisf2(      float a, si_int b);  // a ^ b
+double      __powidf2(     double a, si_int b);  // a ^ b
+long double __powixf2(long double a, si_int b);  // a ^ b
+long double __powitf2(long double a, si_int b);  // ppc only, a ^ b
+
+//  Complex arithmetic
+
+//  (a + ib) * (c + id)
+
+      float _Complex __mulsc3( float a,  float b,  float c,  float d);
+     double _Complex __muldc3(double a, double b, double c, double d);
+long double _Complex __mulxc3(long double a, long double b,
+                              long double c, long double d);
+long double _Complex __multc3(long double a, long double b,
+                              long double c, long double d); // ppc only
+
+//  (a + ib) / (c + id)
+
+      float _Complex __divsc3( float a,  float b,  float c,  float d);
+     double _Complex __divdc3(double a, double b, double c, double d);
+long double _Complex __divxc3(long double a, long double b,
+                              long double c, long double d);
+long double _Complex __divtc3(long double a, long double b,
+                              long double c, long double d);  // ppc only
+
+
+//         Runtime support
+
+// __clear_cache() is used to tell process that new instructions have been
+// written to an address range.  Necessary on processors that do not have
+// a unified instruction and data cache.
+void __clear_cache(void* start, void* end);
+
+// __enable_execute_stack() is used with nested functions when a trampoline
+// function is written onto the stack and that page range needs to be made
+// executable.
+void __enable_execute_stack(void* addr);
+
+// __gcc_personality_v0() is normally only called by the system unwinder.
+// C code (as opposed to C++) normally does not need a personality function
+// because there are no catch clauses or destructors to be run.  But there
+// is a C language extension __attribute__((cleanup(func))) which marks local
+// variables as needing the cleanup function "func" to be run when the
+// variable goes out of scope.  That includes when an exception is thrown,
+// so a personality handler is needed.  
+_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions,
+         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+         _Unwind_Context_t context);
+
+// for use with some implementations of assert() in <assert.h>
+void __eprintf(const char* format, const char* assertion_expression,
+				const char* line, const char* file);
+
+// for systems with emulated thread local storage
+void* __emutls_get_address(struct __emutls_control*);
+
+
+//   Power PC specific functions
+
+// There is no C interface to the saveFP/restFP functions.  They are helper
+// functions called by the prolog and epilog of functions that need to save
+// a number of non-volatile float point registers.  
+saveFP
+restFP
+
+// PowerPC has a standard template for trampoline functions.  This function
+// generates a custom trampoline function with the specific realFunc
+// and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, 
+                                const void* realFunc, void* localsPtr);
+
+// adds two 128-bit double-double precision values ( x + y )
+long double __gcc_qadd(long double x, long double y);  
+
+// subtracts two 128-bit double-double precision values ( x - y )
+long double __gcc_qsub(long double x, long double y); 
+
+// multiples two 128-bit double-double precision values ( x * y )
+long double __gcc_qmul(long double x, long double y);  
+
+// divides two 128-bit double-double precision values ( x / y )
+long double __gcc_qdiv(long double a, long double b);  
+
+
+//    ARM specific functions
+
+// There is no C interface to the switch* functions.  These helper functions
+// are only needed by Thumb1 code for efficient switch table generation.
+switch16
+switch32
+switch8
+switchu8
+
+// There is no C interface to the *_vfp_d8_d15_regs functions.  There are
+// called in the prolog and epilog of Thumb1 functions.  When the C++ ABI use
+// SJLJ for exceptions, each function with a catch clause or destuctors needs
+// to save and restore all registers in it prolog and epliog.  But there is 
+// no way to access vector and high float registers from thumb1 code, so the 
+// compiler must add call outs to these helper functions in the prolog and 
+// epilog.
+restore_vfp_d8_d15_regs
+save_vfp_d8_d15_regs
+
+
+// Note: long ago ARM processors did not have floating point hardware support.
+// Floating point was done in software and floating point parameters were 
+// passed in integer registers.  When hardware support was added for floating
+// point, new *vfp functions were added to do the same operations but with 
+// floating point parameters in floating point registers.
+
+// Undocumented functions
+
+float  __addsf3vfp(float a, float b);   // Appears to return a + b
+double __adddf3vfp(double a, double b); // Appears to return a + b
+float  __divsf3vfp(float a, float b);   // Appears to return a / b
+double __divdf3vfp(double a, double b); // Appears to return a / b
+int    __eqsf2vfp(float a, float b);    // Appears to return  one
+                                        //     iff a == b and neither is NaN.
+int    __eqdf2vfp(double a, double b);  // Appears to return  one
+                                        //     iff a == b and neither is NaN.
+double __extendsfdf2vfp(float a);       // Appears to convert from
+                                        //     float to double.
+int    __fixdfsivfp(double a);          // Appears to convert from
+                                        //     double to int.
+int    __fixsfsivfp(float a);           // Appears to convert from
+                                        //     float to int.
+unsigned int __fixunssfsivfp(float a);  // Appears to convert from
+                                        //     float to unsigned int.
+unsigned int __fixunsdfsivfp(double a); // Appears to convert from
+                                        //     double to unsigned int.
+double __floatsidfvfp(int a);           // Appears to convert from
+                                        //     int to double.
+float __floatsisfvfp(int a);            // Appears to convert from
+                                        //     int to float.
+double __floatunssidfvfp(unsigned int a); // Appears to convert from
+                                        //     unisgned int to double.
+float __floatunssisfvfp(unsigned int a); // Appears to convert from
+                                        //     unisgned int to float.
+int __gedf2vfp(double a, double b);     // Appears to return __gedf2
+                                        //     (a >= b)
+int __gesf2vfp(float a, float b);       // Appears to return __gesf2
+                                        //     (a >= b)
+int __gtdf2vfp(double a, double b);     // Appears to return __gtdf2
+                                        //     (a > b)
+int __gtsf2vfp(float a, float b);       // Appears to return __gtsf2
+                                        //     (a > b)
+int __ledf2vfp(double a, double b);     // Appears to return __ledf2
+                                        //     (a <= b)
+int __lesf2vfp(float a, float b);       // Appears to return __lesf2
+                                        //     (a <= b)
+int __ltdf2vfp(double a, double b);     // Appears to return __ltdf2
+                                        //     (a < b)
+int __ltsf2vfp(float a, float b);       // Appears to return __ltsf2
+                                        //     (a < b)
+double __muldf3vfp(double a, double b); // Appears to return a * b
+float __mulsf3vfp(float a, float b);    // Appears to return a * b
+int __nedf2vfp(double a, double b);     // Appears to return __nedf2
+                                        //     (a != b)
+double __negdf2vfp(double a);           // Appears to return -a
+float __negsf2vfp(float a);             // Appears to return -a
+float __negsf2vfp(float a);             // Appears to return -a
+double __subdf3vfp(double a, double b); // Appears to return a - b
+float __subsf3vfp(float a, float b);    // Appears to return a - b
+float __truncdfsf2vfp(double a);        // Appears to convert from
+                                        //     double to float.
+int __unorddf2vfp(double a, double b);  // Appears to return __unorddf2
+int __unordsf2vfp(float a, float b);    // Appears to return __unordsf2
+
+
+Preconditions are listed for each function at the definition when there are any.
+Any preconditions reflect the specification at
+http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc.
+
+Assumptions are listed in "int_lib.h", and in individual files.  Where possible
+assumptions are checked at compile time.
diff --git a/contrib/libs/cxxsupp/builtins/absvdi2.c b/contrib/libs/cxxsupp/builtins/absvdi2.c
index f87098395a..682c2355d2 100644
--- a/contrib/libs/cxxsupp/builtins/absvdi2.c
+++ b/contrib/libs/cxxsupp/builtins/absvdi2.c
@@ -1,29 +1,29 @@
-/*===-- absvdi2.c - Implement __absvdi2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------=== 
- * 
- * This file implements __absvdi2 for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: absolute value */ 
- 
-/* Effects: aborts if abs(x) < 0 */ 
- 
-COMPILER_RT_ABI di_int 
-__absvdi2(di_int a) 
-{ 
-    const int N = (int)(sizeof(di_int) * CHAR_BIT); 
-    if (a == ((di_int)1 << (N-1))) 
-        compilerrt_abort(); 
-    const di_int t = a >> (N - 1); 
-    return (a ^ t) - t; 
-} 
+/*===-- absvdi2.c - Implement __absvdi2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This file implements __absvdi2 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: absolute value */
+
+/* Effects: aborts if abs(x) < 0 */
+
+COMPILER_RT_ABI di_int
+__absvdi2(di_int a)
+{
+    const int N = (int)(sizeof(di_int) * CHAR_BIT);
+    if (a == ((di_int)1 << (N-1)))
+        compilerrt_abort();
+    const di_int t = a >> (N - 1);
+    return (a ^ t) - t;
+}
diff --git a/contrib/libs/cxxsupp/builtins/absvsi2.c b/contrib/libs/cxxsupp/builtins/absvsi2.c
index e7d2a82c98..4812af8159 100644
--- a/contrib/libs/cxxsupp/builtins/absvsi2.c
+++ b/contrib/libs/cxxsupp/builtins/absvsi2.c
@@ -1,29 +1,29 @@
-/* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __absvsi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: absolute value */ 
- 
-/* Effects: aborts if abs(x) < 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__absvsi2(si_int a) 
-{ 
-    const int N = (int)(sizeof(si_int) * CHAR_BIT); 
-    if (a == (1 << (N-1))) 
-        compilerrt_abort(); 
-    const si_int t = a >> (N - 1); 
-    return (a ^ t) - t; 
-} 
+/* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __absvsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: absolute value */
+
+/* Effects: aborts if abs(x) < 0 */
+
+COMPILER_RT_ABI si_int
+__absvsi2(si_int a)
+{
+    const int N = (int)(sizeof(si_int) * CHAR_BIT);
+    if (a == (1 << (N-1)))
+        compilerrt_abort();
+    const si_int t = a >> (N - 1);
+    return (a ^ t) - t;
+}
diff --git a/contrib/libs/cxxsupp/builtins/absvti2.c b/contrib/libs/cxxsupp/builtins/absvti2.c
index 945673abff..7927770c9a 100644
--- a/contrib/libs/cxxsupp/builtins/absvti2.c
+++ b/contrib/libs/cxxsupp/builtins/absvti2.c
@@ -1,34 +1,34 @@
-/* ===-- absvti2.c - Implement __absvdi2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __absvti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: absolute value */ 
- 
-/* Effects: aborts if abs(x) < 0 */ 
- 
-COMPILER_RT_ABI ti_int 
-__absvti2(ti_int a) 
-{ 
-    const int N = (int)(sizeof(ti_int) * CHAR_BIT); 
-    if (a == ((ti_int)1 << (N-1))) 
-        compilerrt_abort(); 
-    const ti_int s = a >> (N - 1); 
-    return (a ^ s) - s; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
- 
+/* ===-- absvti2.c - Implement __absvdi2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __absvti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: absolute value */
+
+/* Effects: aborts if abs(x) < 0 */
+
+COMPILER_RT_ABI ti_int
+__absvti2(ti_int a)
+{
+    const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+    if (a == ((ti_int)1 << (N-1)))
+        compilerrt_abort();
+    const ti_int s = a >> (N - 1);
+    return (a ^ s) - s;
+}
+
+#endif /* CRT_HAS_128BIT */
+
diff --git a/contrib/libs/cxxsupp/builtins/adddf3.c b/contrib/libs/cxxsupp/builtins/adddf3.c
index 7d80a36394..8b7aae0a6f 100644
--- a/contrib/libs/cxxsupp/builtins/adddf3.c
+++ b/contrib/libs/cxxsupp/builtins/adddf3.c
@@ -1,22 +1,22 @@
-//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements double-precision soft-float addition with the IEEE-754 
-// default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_add_impl.inc" 
- 
-ARM_EABI_FNALIAS(dadd, adddf3) 
- 
-COMPILER_RT_ABI double __adddf3(double a, double b){ 
-    return __addXf3__(a, b); 
-} 
+//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_add_impl.inc"
+
+ARM_EABI_FNALIAS(dadd, adddf3)
+
+COMPILER_RT_ABI double __adddf3(double a, double b){
+    return __addXf3__(a, b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/addsf3.c b/contrib/libs/cxxsupp/builtins/addsf3.c
index eddb1c6360..0f5d6ea409 100644
--- a/contrib/libs/cxxsupp/builtins/addsf3.c
+++ b/contrib/libs/cxxsupp/builtins/addsf3.c
@@ -1,22 +1,22 @@
-//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements single-precision soft-float addition with the IEEE-754 
-// default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_add_impl.inc" 
- 
-ARM_EABI_FNALIAS(fadd, addsf3) 
- 
-COMPILER_RT_ABI float __addsf3(float a, float b) { 
-    return __addXf3__(a, b); 
-} 
+//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_add_impl.inc"
+
+ARM_EABI_FNALIAS(fadd, addsf3)
+
+COMPILER_RT_ABI float __addsf3(float a, float b) {
+    return __addXf3__(a, b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/addtf3.c b/contrib/libs/cxxsupp/builtins/addtf3.c
index 7841492b1c..e4bbe0227a 100644
--- a/contrib/libs/cxxsupp/builtins/addtf3.c
+++ b/contrib/libs/cxxsupp/builtins/addtf3.c
@@ -1,25 +1,25 @@
-//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements quad-precision soft-float addition with the IEEE-754 
-// default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-#include "fp_add_impl.inc" 
- 
-COMPILER_RT_ABI long double __addtf3(long double a, long double b){ 
-    return __addXf3__(a, b); 
-} 
- 
-#endif 
+//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI long double __addtf3(long double a, long double b){
+    return __addXf3__(a, b);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/addvdi3.c b/contrib/libs/cxxsupp/builtins/addvdi3.c
index 323d2eba79..0da3894567 100644
--- a/contrib/libs/cxxsupp/builtins/addvdi3.c
+++ b/contrib/libs/cxxsupp/builtins/addvdi3.c
@@ -1,36 +1,36 @@
-/* ===-- addvdi3.c - Implement __addvdi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __addvdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a + b */ 
- 
-/* Effects: aborts if a + b overflows */ 
- 
-COMPILER_RT_ABI di_int 
-__addvdi3(di_int a, di_int b) 
-{ 
-    di_int s = (du_int) a + (du_int) b; 
-    if (b >= 0) 
-    { 
-        if (s < a) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (s >= a) 
-            compilerrt_abort(); 
-    } 
-    return s; 
-} 
+/* ===-- addvdi3.c - Implement __addvdi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __addvdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a + b */
+
+/* Effects: aborts if a + b overflows */
+
+COMPILER_RT_ABI di_int
+__addvdi3(di_int a, di_int b)
+{
+    di_int s = (du_int) a + (du_int) b;
+    if (b >= 0)
+    {
+        if (s < a)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (s >= a)
+            compilerrt_abort();
+    }
+    return s;
+}
diff --git a/contrib/libs/cxxsupp/builtins/addvsi3.c b/contrib/libs/cxxsupp/builtins/addvsi3.c
index 55d14ecbdb..94ca726f42 100644
--- a/contrib/libs/cxxsupp/builtins/addvsi3.c
+++ b/contrib/libs/cxxsupp/builtins/addvsi3.c
@@ -1,36 +1,36 @@
-/* ===-- addvsi3.c - Implement __addvsi3 -----------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __addvsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a + b */ 
- 
-/* Effects: aborts if a + b overflows */ 
- 
-COMPILER_RT_ABI si_int 
-__addvsi3(si_int a, si_int b) 
-{ 
-    si_int s = (su_int) a + (su_int) b; 
-    if (b >= 0) 
-    { 
-        if (s < a) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (s >= a) 
-            compilerrt_abort(); 
-    } 
-    return s; 
-} 
+/* ===-- addvsi3.c - Implement __addvsi3 -----------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __addvsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a + b */
+
+/* Effects: aborts if a + b overflows */
+
+COMPILER_RT_ABI si_int
+__addvsi3(si_int a, si_int b)
+{
+    si_int s = (su_int) a + (su_int) b;
+    if (b >= 0)
+    {
+        if (s < a)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (s >= a)
+            compilerrt_abort();
+    }
+    return s;
+}
diff --git a/contrib/libs/cxxsupp/builtins/addvti3.c b/contrib/libs/cxxsupp/builtins/addvti3.c
index fb2acff91a..c224de60aa 100644
--- a/contrib/libs/cxxsupp/builtins/addvti3.c
+++ b/contrib/libs/cxxsupp/builtins/addvti3.c
@@ -1,40 +1,40 @@
-/* ===-- addvti3.c - Implement __addvti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __addvti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a + b */ 
- 
-/* Effects: aborts if a + b overflows */ 
- 
-COMPILER_RT_ABI ti_int 
-__addvti3(ti_int a, ti_int b) 
-{ 
-    ti_int s = (tu_int) a + (tu_int) b; 
-    if (b >= 0) 
-    { 
-        if (s < a) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (s >= a) 
-            compilerrt_abort(); 
-    } 
-    return s; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- addvti3.c - Implement __addvti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __addvti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a + b */
+
+/* Effects: aborts if a + b overflows */
+
+COMPILER_RT_ABI ti_int
+__addvti3(ti_int a, ti_int b)
+{
+    ti_int s = (tu_int) a + (tu_int) b;
+    if (b >= 0)
+    {
+        if (s < a)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (s >= a)
+            compilerrt_abort();
+    }
+    return s;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/apple_versioning.c b/contrib/libs/cxxsupp/builtins/apple_versioning.c
index 2b852f87ff..3797a1ab02 100644
--- a/contrib/libs/cxxsupp/builtins/apple_versioning.c
+++ b/contrib/libs/cxxsupp/builtins/apple_versioning.c
@@ -1,350 +1,350 @@
-/* ===-- apple_versioning.c - Adds versioning symbols for ld ---------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
- 
-#if __APPLE__ 
-  #include <Availability.h> 
-   
-  #if __IPHONE_OS_VERSION_MIN_REQUIRED 
-    #define NOT_HERE_BEFORE_10_6(sym)  
-    #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ 
-        extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ 
-        extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ 
-        extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ 
-        extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp50 = 0;  
-  #else 
-    #define NOT_HERE_BEFORE_10_6(sym) \ 
-         extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ 
-        extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp5 = 0;  
-    #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ 
-         extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ 
-        extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ 
-        extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp6 = 0;  
-  #endif  
- 
- 
-/* Symbols in libSystem.dylib in 10.6 and later,  
- *  but are in libgcc_s.dylib in earlier versions 
- */ 
- 
-NOT_HERE_BEFORE_10_6(__absvdi2) 
-NOT_HERE_BEFORE_10_6(__absvsi2) 
-NOT_HERE_BEFORE_10_6(__absvti2) 
-NOT_HERE_BEFORE_10_6(__addvdi3) 
-NOT_HERE_BEFORE_10_6(__addvsi3) 
-NOT_HERE_BEFORE_10_6(__addvti3) 
-NOT_HERE_BEFORE_10_6(__ashldi3) 
-NOT_HERE_BEFORE_10_6(__ashlti3) 
-NOT_HERE_BEFORE_10_6(__ashrdi3) 
-NOT_HERE_BEFORE_10_6(__ashrti3) 
-NOT_HERE_BEFORE_10_6(__clear_cache) 
-NOT_HERE_BEFORE_10_6(__clzdi2) 
-NOT_HERE_BEFORE_10_6(__clzsi2) 
-NOT_HERE_BEFORE_10_6(__clzti2) 
-NOT_HERE_BEFORE_10_6(__cmpdi2) 
-NOT_HERE_BEFORE_10_6(__cmpti2) 
-NOT_HERE_BEFORE_10_6(__ctzdi2) 
-NOT_HERE_BEFORE_10_6(__ctzsi2) 
-NOT_HERE_BEFORE_10_6(__ctzti2) 
-NOT_HERE_BEFORE_10_6(__divdc3) 
-NOT_HERE_BEFORE_10_6(__divdi3) 
-NOT_HERE_BEFORE_10_6(__divsc3) 
-NOT_HERE_BEFORE_10_6(__divtc3) 
-NOT_HERE_BEFORE_10_6(__divti3) 
-NOT_HERE_BEFORE_10_6(__divxc3) 
-NOT_HERE_BEFORE_10_6(__enable_execute_stack) 
-NOT_HERE_BEFORE_10_6(__ffsdi2) 
-NOT_HERE_BEFORE_10_6(__ffsti2) 
-NOT_HERE_BEFORE_10_6(__fixdfdi) 
-NOT_HERE_BEFORE_10_6(__fixdfti) 
-NOT_HERE_BEFORE_10_6(__fixsfdi) 
-NOT_HERE_BEFORE_10_6(__fixsfti) 
-NOT_HERE_BEFORE_10_6(__fixtfdi) 
-NOT_HERE_BEFORE_10_6(__fixunsdfdi) 
-NOT_HERE_BEFORE_10_6(__fixunsdfsi) 
-NOT_HERE_BEFORE_10_6(__fixunsdfti) 
-NOT_HERE_BEFORE_10_6(__fixunssfdi) 
-NOT_HERE_BEFORE_10_6(__fixunssfsi) 
-NOT_HERE_BEFORE_10_6(__fixunssfti) 
-NOT_HERE_BEFORE_10_6(__fixunstfdi) 
-NOT_HERE_BEFORE_10_6(__fixunsxfdi) 
-NOT_HERE_BEFORE_10_6(__fixunsxfsi) 
-NOT_HERE_BEFORE_10_6(__fixunsxfti) 
-NOT_HERE_BEFORE_10_6(__fixxfdi) 
-NOT_HERE_BEFORE_10_6(__fixxfti) 
-NOT_HERE_BEFORE_10_6(__floatdidf) 
-NOT_HERE_BEFORE_10_6(__floatdisf) 
-NOT_HERE_BEFORE_10_6(__floatditf) 
-NOT_HERE_BEFORE_10_6(__floatdixf) 
-NOT_HERE_BEFORE_10_6(__floattidf) 
-NOT_HERE_BEFORE_10_6(__floattisf) 
-NOT_HERE_BEFORE_10_6(__floattixf) 
-NOT_HERE_BEFORE_10_6(__floatundidf) 
-NOT_HERE_BEFORE_10_6(__floatundisf) 
-NOT_HERE_BEFORE_10_6(__floatunditf) 
-NOT_HERE_BEFORE_10_6(__floatundixf) 
-NOT_HERE_BEFORE_10_6(__floatuntidf) 
-NOT_HERE_BEFORE_10_6(__floatuntisf) 
-NOT_HERE_BEFORE_10_6(__floatuntixf) 
-NOT_HERE_BEFORE_10_6(__gcc_personality_v0) 
-NOT_HERE_BEFORE_10_6(__lshrdi3) 
-NOT_HERE_BEFORE_10_6(__lshrti3) 
-NOT_HERE_BEFORE_10_6(__moddi3) 
-NOT_HERE_BEFORE_10_6(__modti3) 
-NOT_HERE_BEFORE_10_6(__muldc3) 
-NOT_HERE_BEFORE_10_6(__muldi3) 
-NOT_HERE_BEFORE_10_6(__mulsc3) 
-NOT_HERE_BEFORE_10_6(__multc3) 
-NOT_HERE_BEFORE_10_6(__multi3) 
-NOT_HERE_BEFORE_10_6(__mulvdi3) 
-NOT_HERE_BEFORE_10_6(__mulvsi3) 
-NOT_HERE_BEFORE_10_6(__mulvti3) 
-NOT_HERE_BEFORE_10_6(__mulxc3) 
-NOT_HERE_BEFORE_10_6(__negdi2) 
-NOT_HERE_BEFORE_10_6(__negti2) 
-NOT_HERE_BEFORE_10_6(__negvdi2) 
-NOT_HERE_BEFORE_10_6(__negvsi2) 
-NOT_HERE_BEFORE_10_6(__negvti2) 
-NOT_HERE_BEFORE_10_6(__paritydi2) 
-NOT_HERE_BEFORE_10_6(__paritysi2) 
-NOT_HERE_BEFORE_10_6(__parityti2) 
-NOT_HERE_BEFORE_10_6(__popcountdi2) 
-NOT_HERE_BEFORE_10_6(__popcountsi2) 
-NOT_HERE_BEFORE_10_6(__popcountti2) 
-NOT_HERE_BEFORE_10_6(__powidf2) 
-NOT_HERE_BEFORE_10_6(__powisf2) 
-NOT_HERE_BEFORE_10_6(__powitf2) 
-NOT_HERE_BEFORE_10_6(__powixf2) 
-NOT_HERE_BEFORE_10_6(__subvdi3) 
-NOT_HERE_BEFORE_10_6(__subvsi3) 
-NOT_HERE_BEFORE_10_6(__subvti3) 
-NOT_HERE_BEFORE_10_6(__ucmpdi2) 
-NOT_HERE_BEFORE_10_6(__ucmpti2) 
-NOT_HERE_BEFORE_10_6(__udivdi3) 
-NOT_HERE_BEFORE_10_6(__udivmoddi4) 
-NOT_HERE_BEFORE_10_6(__udivmodti4) 
-NOT_HERE_BEFORE_10_6(__udivti3) 
-NOT_HERE_BEFORE_10_6(__umoddi3) 
-NOT_HERE_BEFORE_10_6(__umodti3) 
- 
- 
-#if __ppc__ 
-NOT_HERE_BEFORE_10_6(__gcc_qadd) 
-NOT_HERE_BEFORE_10_6(__gcc_qdiv) 
-NOT_HERE_BEFORE_10_6(__gcc_qmul) 
-NOT_HERE_BEFORE_10_6(__gcc_qsub) 
-NOT_HERE_BEFORE_10_6(__trampoline_setup) 
-#endif /* __ppc__ */ 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8) 
- 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4) 
-NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8) 
- 
- 
-#if __arm__ && __DYNAMIC__ 
-   #define NOT_HERE_UNTIL_AFTER_4_3(sym) \ 
-        extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ 
-        extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ 
-        extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ 
-        extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ 
-        extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ 
-        extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ 
-        extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \ 
-            __attribute__((visibility("default"))) const char sym##_tmp7 = 0;  
-             
-NOT_HERE_UNTIL_AFTER_4_3(__absvdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__absvsi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__adddf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__addsf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__addvdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__addvsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__ashldi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__clzdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__clzsi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__divdc3) 
-NOT_HERE_UNTIL_AFTER_4_3(__divdf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__divdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__divsc3) 
-NOT_HERE_UNTIL_AFTER_4_3(__divsf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__divsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__eqdf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__eqsf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi) 
-NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatdidf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatdisf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatsidf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatsisf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatundidf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatundisf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__gedf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__gesf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__gtdf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__gtsf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__ledf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__lesf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__ltdf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__ltsf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__moddi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__modsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__muldc3) 
-NOT_HERE_UNTIL_AFTER_4_3(__muldf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__muldi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__mulsc3) 
-NOT_HERE_UNTIL_AFTER_4_3(__mulsf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__nedf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__negdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__negvdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__negvsi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__nesf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__paritydi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__paritysi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__powidf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__powisf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__subdf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__subsf3) 
-NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__subvdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__subvsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2) 
-NOT_HERE_UNTIL_AFTER_4_3(__udivdi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4) 
-NOT_HERE_UNTIL_AFTER_4_3(__udivsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__umoddi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__umodsi3) 
-NOT_HERE_UNTIL_AFTER_4_3(__unorddf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp) 
-NOT_HERE_UNTIL_AFTER_4_3(__unordsf2) 
-NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp) 
- 
-NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4) 
-NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4) 
-#endif // __arm__ && __DYNAMIC__ 
- 
-        
-        
- 
- 
-#else /* !__APPLE__ */ 
- 
-extern int avoid_empty_file; 
- 
-#endif /* !__APPLE__*/ 
+/* ===-- apple_versioning.c - Adds versioning symbols for ld ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+
+#if __APPLE__
+  #include <Availability.h>
+  
+  #if __IPHONE_OS_VERSION_MIN_REQUIRED
+    #define NOT_HERE_BEFORE_10_6(sym) 
+    #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \
+        extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \
+        extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \
+        extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \
+        extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp50 = 0; 
+  #else
+    #define NOT_HERE_BEFORE_10_6(sym) \
+         extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+        extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp5 = 0; 
+    #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \
+         extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \
+        extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \
+        extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp6 = 0; 
+  #endif 
+
+
+/* Symbols in libSystem.dylib in 10.6 and later, 
+ *  but are in libgcc_s.dylib in earlier versions
+ */
+
+NOT_HERE_BEFORE_10_6(__absvdi2)
+NOT_HERE_BEFORE_10_6(__absvsi2)
+NOT_HERE_BEFORE_10_6(__absvti2)
+NOT_HERE_BEFORE_10_6(__addvdi3)
+NOT_HERE_BEFORE_10_6(__addvsi3)
+NOT_HERE_BEFORE_10_6(__addvti3)
+NOT_HERE_BEFORE_10_6(__ashldi3)
+NOT_HERE_BEFORE_10_6(__ashlti3)
+NOT_HERE_BEFORE_10_6(__ashrdi3)
+NOT_HERE_BEFORE_10_6(__ashrti3)
+NOT_HERE_BEFORE_10_6(__clear_cache)
+NOT_HERE_BEFORE_10_6(__clzdi2)
+NOT_HERE_BEFORE_10_6(__clzsi2)
+NOT_HERE_BEFORE_10_6(__clzti2)
+NOT_HERE_BEFORE_10_6(__cmpdi2)
+NOT_HERE_BEFORE_10_6(__cmpti2)
+NOT_HERE_BEFORE_10_6(__ctzdi2)
+NOT_HERE_BEFORE_10_6(__ctzsi2)
+NOT_HERE_BEFORE_10_6(__ctzti2)
+NOT_HERE_BEFORE_10_6(__divdc3)
+NOT_HERE_BEFORE_10_6(__divdi3)
+NOT_HERE_BEFORE_10_6(__divsc3)
+NOT_HERE_BEFORE_10_6(__divtc3)
+NOT_HERE_BEFORE_10_6(__divti3)
+NOT_HERE_BEFORE_10_6(__divxc3)
+NOT_HERE_BEFORE_10_6(__enable_execute_stack)
+NOT_HERE_BEFORE_10_6(__ffsdi2)
+NOT_HERE_BEFORE_10_6(__ffsti2)
+NOT_HERE_BEFORE_10_6(__fixdfdi)
+NOT_HERE_BEFORE_10_6(__fixdfti)
+NOT_HERE_BEFORE_10_6(__fixsfdi)
+NOT_HERE_BEFORE_10_6(__fixsfti)
+NOT_HERE_BEFORE_10_6(__fixtfdi)
+NOT_HERE_BEFORE_10_6(__fixunsdfdi)
+NOT_HERE_BEFORE_10_6(__fixunsdfsi)
+NOT_HERE_BEFORE_10_6(__fixunsdfti)
+NOT_HERE_BEFORE_10_6(__fixunssfdi)
+NOT_HERE_BEFORE_10_6(__fixunssfsi)
+NOT_HERE_BEFORE_10_6(__fixunssfti)
+NOT_HERE_BEFORE_10_6(__fixunstfdi)
+NOT_HERE_BEFORE_10_6(__fixunsxfdi)
+NOT_HERE_BEFORE_10_6(__fixunsxfsi)
+NOT_HERE_BEFORE_10_6(__fixunsxfti)
+NOT_HERE_BEFORE_10_6(__fixxfdi)
+NOT_HERE_BEFORE_10_6(__fixxfti)
+NOT_HERE_BEFORE_10_6(__floatdidf)
+NOT_HERE_BEFORE_10_6(__floatdisf)
+NOT_HERE_BEFORE_10_6(__floatditf)
+NOT_HERE_BEFORE_10_6(__floatdixf)
+NOT_HERE_BEFORE_10_6(__floattidf)
+NOT_HERE_BEFORE_10_6(__floattisf)
+NOT_HERE_BEFORE_10_6(__floattixf)
+NOT_HERE_BEFORE_10_6(__floatundidf)
+NOT_HERE_BEFORE_10_6(__floatundisf)
+NOT_HERE_BEFORE_10_6(__floatunditf)
+NOT_HERE_BEFORE_10_6(__floatundixf)
+NOT_HERE_BEFORE_10_6(__floatuntidf)
+NOT_HERE_BEFORE_10_6(__floatuntisf)
+NOT_HERE_BEFORE_10_6(__floatuntixf)
+NOT_HERE_BEFORE_10_6(__gcc_personality_v0)
+NOT_HERE_BEFORE_10_6(__lshrdi3)
+NOT_HERE_BEFORE_10_6(__lshrti3)
+NOT_HERE_BEFORE_10_6(__moddi3)
+NOT_HERE_BEFORE_10_6(__modti3)
+NOT_HERE_BEFORE_10_6(__muldc3)
+NOT_HERE_BEFORE_10_6(__muldi3)
+NOT_HERE_BEFORE_10_6(__mulsc3)
+NOT_HERE_BEFORE_10_6(__multc3)
+NOT_HERE_BEFORE_10_6(__multi3)
+NOT_HERE_BEFORE_10_6(__mulvdi3)
+NOT_HERE_BEFORE_10_6(__mulvsi3)
+NOT_HERE_BEFORE_10_6(__mulvti3)
+NOT_HERE_BEFORE_10_6(__mulxc3)
+NOT_HERE_BEFORE_10_6(__negdi2)
+NOT_HERE_BEFORE_10_6(__negti2)
+NOT_HERE_BEFORE_10_6(__negvdi2)
+NOT_HERE_BEFORE_10_6(__negvsi2)
+NOT_HERE_BEFORE_10_6(__negvti2)
+NOT_HERE_BEFORE_10_6(__paritydi2)
+NOT_HERE_BEFORE_10_6(__paritysi2)
+NOT_HERE_BEFORE_10_6(__parityti2)
+NOT_HERE_BEFORE_10_6(__popcountdi2)
+NOT_HERE_BEFORE_10_6(__popcountsi2)
+NOT_HERE_BEFORE_10_6(__popcountti2)
+NOT_HERE_BEFORE_10_6(__powidf2)
+NOT_HERE_BEFORE_10_6(__powisf2)
+NOT_HERE_BEFORE_10_6(__powitf2)
+NOT_HERE_BEFORE_10_6(__powixf2)
+NOT_HERE_BEFORE_10_6(__subvdi3)
+NOT_HERE_BEFORE_10_6(__subvsi3)
+NOT_HERE_BEFORE_10_6(__subvti3)
+NOT_HERE_BEFORE_10_6(__ucmpdi2)
+NOT_HERE_BEFORE_10_6(__ucmpti2)
+NOT_HERE_BEFORE_10_6(__udivdi3)
+NOT_HERE_BEFORE_10_6(__udivmoddi4)
+NOT_HERE_BEFORE_10_6(__udivmodti4)
+NOT_HERE_BEFORE_10_6(__udivti3)
+NOT_HERE_BEFORE_10_6(__umoddi3)
+NOT_HERE_BEFORE_10_6(__umodti3)
+
+
+#if __ppc__
+NOT_HERE_BEFORE_10_6(__gcc_qadd)
+NOT_HERE_BEFORE_10_6(__gcc_qdiv)
+NOT_HERE_BEFORE_10_6(__gcc_qmul)
+NOT_HERE_BEFORE_10_6(__gcc_qsub)
+NOT_HERE_BEFORE_10_6(__trampoline_setup)
+#endif /* __ppc__ */
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8)
+
+
+#if __arm__ && __DYNAMIC__
+   #define NOT_HERE_UNTIL_AFTER_4_3(sym) \
+        extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \
+        extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \
+        extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \
+        extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+        extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \
+        extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \
+        extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \
+            __attribute__((visibility("default"))) const char sym##_tmp7 = 0; 
+            
+NOT_HERE_UNTIL_AFTER_4_3(__absvdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__absvsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__adddf3)
+NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__addsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__addvdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__addvsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__ashldi3)
+NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__clzdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__clzsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__divdc3)
+NOT_HERE_UNTIL_AFTER_4_3(__divdf3)
+NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__divdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__divsc3)
+NOT_HERE_UNTIL_AFTER_4_3(__divsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__divsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__eqdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__eqsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatdidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatdisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatundidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatundisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gedf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gesf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gtdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gtsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ledf2)
+NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__lesf2)
+NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__ltdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ltsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__moddi3)
+NOT_HERE_UNTIL_AFTER_4_3(__modsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__muldc3)
+NOT_HERE_UNTIL_AFTER_4_3(__muldf3)
+NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__muldi3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulsc3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__nedf2)
+NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__negdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__negvdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__negvsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__nesf2)
+NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__paritydi2)
+NOT_HERE_UNTIL_AFTER_4_3(__paritysi2)
+NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__powidf2)
+NOT_HERE_UNTIL_AFTER_4_3(__powisf2)
+NOT_HERE_UNTIL_AFTER_4_3(__subdf3)
+NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__subsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__subvdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__subvsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__udivdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4)
+NOT_HERE_UNTIL_AFTER_4_3(__udivsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__umoddi3)
+NOT_HERE_UNTIL_AFTER_4_3(__umodsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__unorddf2)
+NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__unordsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp)
+
+NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4)
+NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4)
+#endif // __arm__ && __DYNAMIC__
+
+       
+       
+
+
+#else /* !__APPLE__ */
+
+extern int avoid_empty_file;
+
+#endif /* !__APPLE__*/
diff --git a/contrib/libs/cxxsupp/builtins/arm/Makefile.mk b/contrib/libs/cxxsupp/builtins/arm/Makefile.mk
index e41c9f2d0a..ed2e8323e3 100644
--- a/contrib/libs/cxxsupp/builtins/arm/Makefile.mk
+++ b/contrib/libs/cxxsupp/builtins/arm/Makefile.mk
@@ -1,20 +1,20 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# 
-# 
-#                     The LLVM Compiler Infrastructure 
-# 
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details. 
-# 
-#===------------------------------------------------------------------------===# 
- 
-ModuleName := builtins 
-SubDirs :=  
-OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s 
- 
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) 
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) 
-Implementation := Optimized 
- 
-# FIXME: use automatic dependencies? 
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h) 
+#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S
index 2e8608704a..2825ae92cd 100644
--- a/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/adddf3vfp.S
@@ -1,26 +1,26 @@
-//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// double __adddf3vfp(double a, double b) { return a + b; } 
-// 
-// Adds two double precision floating point numbers using the Darwin 
-// calling convention where double arguments are passsed in GPR pairs 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) 
-	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6 
-	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7 
-	vadd.f64 d6, d6, d7		 
-	vmov	r0, r1, d6		// move result back to r0/r1 pair 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__adddf3vfp) 
+//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// double __adddf3vfp(double a, double b) { return a + b; }
+//
+// Adds two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
+	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	vadd.f64 d6, d6, d7		
+	vmov	r0, r1, d6		// move result back to r0/r1 pair
+	bx	lr
+END_COMPILERRT_FUNCTION(__adddf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S
index 80c8d1b2f6..bff5a7e0fb 100644
--- a/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/addsf3vfp.S
@@ -1,26 +1,26 @@
-//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __addsf3vfp(float a, float b); 
-// 
-// Adds two single precision floating point numbers using the Darwin 
-// calling convention where single arguments are passsed in GPRs 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) 
-	vmov	s14, r0		// move first param from r0 into float register 
-	vmov	s15, r1		// move second param from r1 into float register 
-	vadd.f32 s14, s14, s15 
-	vmov	r0, s14		// move result back to r0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__addsf3vfp) 
+//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __addsf3vfp(float a, float b);
+//
+// Adds two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed in GPRs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vadd.f32 s14, s14, s15
+	vmov	r0, s14		// move result back to r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__addsf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S
index ca2bd750bf..036a6f542f 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmp.S
@@ -1,96 +1,96 @@
-//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ 
-#error big endian support not implemented 
-#endif 
- 
-#define APSR_Z (1 << 30) 
-#define APSR_C (1 << 29) 
- 
-// void __aeabi_cdcmpeq(double a, double b) { 
-//   if (isnan(a) || isnan(b)) { 
-//     Z = 0; C = 1; 
-//   } else { 
-//     __aeabi_cdcmple(a, b); 
-//   } 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) 
-        push {r0-r3, lr} 
-        bl __aeabi_cdcmpeq_check_nan 
-        cmp r0, #1 
-        pop {r0-r3, lr} 
- 
-        // NaN has been ruled out, so __aeabi_cdcmple can't trap 
-        bne __aeabi_cdcmple 
- 
-        msr CPSR_f, #APSR_C 
-        JMP(lr) 
-END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) 
- 
- 
-// void __aeabi_cdcmple(double a, double b) { 
-//   if (__aeabi_dcmplt(a, b)) { 
-//     Z = 0; C = 0; 
-//   } else if (__aeabi_dcmpeq(a, b)) { 
-//     Z = 1; C = 1; 
-//   } else { 
-//     Z = 0; C = 1; 
-//   } 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) 
-        // Per the RTABI, this function must preserve r0-r11. 
-        // Save lr in the same instruction for compactness 
-        push {r0-r3, lr} 
- 
-        bl __aeabi_dcmplt 
-        cmp r0, #1 
-        moveq ip, #0 
-        beq 1f 
- 
-        ldm sp, {r0-r3} 
-        bl __aeabi_dcmpeq 
-        cmp r0, #1 
-        moveq ip, #(APSR_C | APSR_Z) 
-        movne ip, #(APSR_C) 
- 
-1: 
-        msr CPSR_f, ip 
-        pop {r0-r3} 
-        POP_PC() 
-END_COMPILERRT_FUNCTION(__aeabi_cdcmple) 
- 
-// int __aeabi_cdrcmple(double a, double b) { 
-//   return __aeabi_cdcmple(b, a); 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple) 
-        // Swap r0 and r2 
-        mov ip, r0 
-        mov r0, r2 
-        mov r2, ip 
- 
-        // Swap r1 and r3 
-        mov ip, r1 
-        mov r1, r3 
-        mov r3, ip 
- 
-        b __aeabi_cdcmple 
-END_COMPILERRT_FUNCTION(__aeabi_cdrcmple) 
- 
+//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cdcmpeq(double a, double b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cdcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cdcmpeq_check_nan
+        cmp r0, #1
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        bne __aeabi_cdcmple
+
+        msr CPSR_f, #APSR_C
+        JMP(lr)
+END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+
+
+// void __aeabi_cdcmple(double a, double b) {
+//   if (__aeabi_dcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_dcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_dcmplt
+        cmp r0, #1
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr CPSR_f, ip
+        pop {r0-r3}
+        POP_PC()
+END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+
+// int __aeabi_cdrcmple(double a, double b) {
+//   return __aeabi_cdcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+        // Swap r0 and r2
+        mov ip, r0
+        mov r0, r2
+        mov r2, ip
+
+        // Swap r1 and r3
+        mov ip, r1
+        mov r1, r3
+        mov r3, ip
+
+        b __aeabi_cdcmple
+END_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c
index 85f484fea1..577f6b2c55 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cdcmpeq_check_nan.c
@@ -1,16 +1,16 @@
-//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include <stdint.h> 
- 
-__attribute__((pcs("aapcs"))) 
-__attribute__((visibility("hidden"))) 
-int __aeabi_cdcmpeq_check_nan(double a, double b) { 
-    return __builtin_isnan(a) || __builtin_isnan(b); 
-} 
+//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+__attribute__((pcs("aapcs")))
+__attribute__((visibility("hidden")))
+int __aeabi_cdcmpeq_check_nan(double a, double b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S
index af8a19b18a..43594e5c39 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmp.S
@@ -1,91 +1,91 @@
-//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ 
-#error big endian support not implemented 
-#endif 
- 
-#define APSR_Z (1 << 30) 
-#define APSR_C (1 << 29) 
- 
-// void __aeabi_cfcmpeq(float a, float b) { 
-//   if (isnan(a) || isnan(b)) { 
-//     Z = 0; C = 1; 
-//   } else { 
-//     __aeabi_cfcmple(a, b); 
-//   } 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) 
-        push {r0-r3, lr} 
-        bl __aeabi_cfcmpeq_check_nan 
-        cmp r0, #1 
-        pop {r0-r3, lr} 
- 
-        // NaN has been ruled out, so __aeabi_cfcmple can't trap 
-        bne __aeabi_cfcmple 
- 
-        msr CPSR_f, #APSR_C 
-        JMP(lr) 
-END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) 
- 
- 
-// void __aeabi_cfcmple(float a, float b) { 
-//   if (__aeabi_fcmplt(a, b)) { 
-//     Z = 0; C = 0; 
-//   } else if (__aeabi_fcmpeq(a, b)) { 
-//     Z = 1; C = 1; 
-//   } else { 
-//     Z = 0; C = 1; 
-//   } 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) 
-        // Per the RTABI, this function must preserve r0-r11. 
-        // Save lr in the same instruction for compactness 
-        push {r0-r3, lr} 
- 
-        bl __aeabi_fcmplt 
-        cmp r0, #1 
-        moveq ip, #0 
-        beq 1f 
- 
-        ldm sp, {r0-r3} 
-        bl __aeabi_fcmpeq 
-        cmp r0, #1 
-        moveq ip, #(APSR_C | APSR_Z) 
-        movne ip, #(APSR_C) 
- 
-1: 
-        msr CPSR_f, ip 
-        pop {r0-r3} 
-        POP_PC() 
-END_COMPILERRT_FUNCTION(__aeabi_cfcmple) 
- 
-// int __aeabi_cfrcmple(float a, float b) { 
-//   return __aeabi_cfcmple(b, a); 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple) 
-        // Swap r0 and r1 
-        mov ip, r0 
-        mov r0, r1 
-        mov r1, ip 
- 
-        b __aeabi_cfcmple 
-END_COMPILERRT_FUNCTION(__aeabi_cfrcmple) 
- 
+//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cfcmpeq(float a, float b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cfcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cfcmpeq_check_nan
+        cmp r0, #1
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        bne __aeabi_cfcmple
+
+        msr CPSR_f, #APSR_C
+        JMP(lr)
+END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+
+
+// void __aeabi_cfcmple(float a, float b) {
+//   if (__aeabi_fcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_fcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_fcmplt
+        cmp r0, #1
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr CPSR_f, ip
+        pop {r0-r3}
+        POP_PC()
+END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+
+// int __aeabi_cfrcmple(float a, float b) {
+//   return __aeabi_cfcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+        // Swap r0 and r1
+        mov ip, r0
+        mov r0, r1
+        mov r1, ip
+
+        b __aeabi_cfcmple
+END_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c
index 7727bff253..992e31fbd8 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_cfcmpeq_check_nan.c
@@ -1,16 +1,16 @@
-//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include <stdint.h> 
- 
-__attribute__((pcs("aapcs"))) 
-__attribute__((visibility("hidden"))) 
-int __aeabi_cfcmpeq_check_nan(float a, float b) { 
-    return __builtin_isnan(a) || __builtin_isnan(b); 
-} 
+//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+__attribute__((pcs("aapcs")))
+__attribute__((visibility("hidden")))
+int __aeabi_cfcmpeq_check_nan(float a, float b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S
index eb413bd4dc..310c35b749 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_dcmp.S
@@ -1,40 +1,40 @@
-//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { 
-//   int result = __{eq,lt,le,ge,gt}df2(a, b); 
-//   if (result {==,<,<=,>=,>} 0) { 
-//     return 1; 
-//   } else { 
-//     return 0; 
-//   } 
-// } 
- 
-#define DEFINE_AEABI_DCMP(cond)                            \ 
-        .syntax unified                          SEPARATOR \ 
-        .p2align 2                               SEPARATOR \ 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \ 
-        push      { r4, lr }                     SEPARATOR \ 
-        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ 
-        cmp       r0, #0                         SEPARATOR \ 
-        b ## cond 1f                             SEPARATOR \ 
-        mov       r0, #0                         SEPARATOR \ 
-        pop       { r4, pc }                     SEPARATOR \ 
-1:                                               SEPARATOR \ 
-        mov       r0, #1                         SEPARATOR \ 
-        pop       { r4, pc }                     SEPARATOR \ 
-END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) 
- 
-DEFINE_AEABI_DCMP(eq) 
-DEFINE_AEABI_DCMP(lt) 
-DEFINE_AEABI_DCMP(le) 
-DEFINE_AEABI_DCMP(ge) 
-DEFINE_AEABI_DCMP(gt) 
+//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+//   int result = __{eq,lt,le,ge,gt}df2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#define DEFINE_AEABI_DCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        mov       r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        mov       r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c
index 3f2785cb07..ccc95fa5c1 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_div0.c
@@ -1,43 +1,43 @@
-/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements the division by zero helper routines as specified by the 
- * Run-time ABI for the ARM Architecture. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-/* 
- * RTABI 4.3.2 - Division by zero 
- * 
- * The *div0 functions: 
- * - Return the value passed to them as a parameter 
- * - Or, return a fixed value defined by the execution environment (such as 0) 
- * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return 
- * 
- * An application may provide its own implementations of the *div0 functions to 
- * for a particular behaviour from the *div and *divmod functions called out of 
- * line. 
- */ 
- 
-/* provide an unused declaration to pacify pendantic compilation */ 
-extern unsigned char declaration; 
- 
-#if defined(__ARM_EABI__) 
-int __attribute__((weak)) __attribute__((visibility("hidden"))) 
-__aeabi_idiv0(int return_value) { 
-  return return_value; 
-} 
- 
-long long __attribute__((weak)) __attribute__((visibility("hidden"))) 
-__aeabi_ldiv0(long long return_value) { 
-  return return_value; 
-} 
-#endif 
- 
+/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements the division by zero helper routines as specified by the
+ * Run-time ABI for the ARM Architecture.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+/*
+ * RTABI 4.3.2 - Division by zero
+ *
+ * The *div0 functions:
+ * - Return the value passed to them as a parameter
+ * - Or, return a fixed value defined by the execution environment (such as 0)
+ * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return
+ *
+ * An application may provide its own implementations of the *div0 functions to
+ * for a particular behaviour from the *div and *divmod functions called out of
+ * line.
+ */
+
+/* provide an unused declaration to pacify pendantic compilation */
+extern unsigned char declaration;
+
+#if defined(__ARM_EABI__)
+int __attribute__((weak)) __attribute__((visibility("hidden")))
+__aeabi_idiv0(int return_value) {
+  return return_value;
+}
+
+long long __attribute__((weak)) __attribute__((visibility("hidden")))
+__aeabi_ldiv0(long long return_value) {
+  return return_value;
+}
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c
index 8a39c6dac3..fc17d5a4cc 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_drsub.c
@@ -1,19 +1,19 @@
-//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "../fp_lib.h" 
- 
-COMPILER_RT_ABI fp_t 
-__aeabi_dsub(fp_t, fp_t); 
- 
-COMPILER_RT_ABI fp_t 
-__aeabi_drsub(fp_t a, fp_t b) { 
-    return __aeabi_dsub(b, a); 
-} 
+//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "../fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__aeabi_dsub(fp_t, fp_t);
+
+COMPILER_RT_ABI fp_t
+__aeabi_drsub(fp_t a, fp_t b) {
+    return __aeabi_dsub(b, a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S
index 2dab884a48..55f49a2b5a 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_fcmp.S
@@ -1,40 +1,40 @@
-//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { 
-//   int result = __{eq,lt,le,ge,gt}sf2(a, b); 
-//   if (result {==,<,<=,>=,>} 0) { 
-//     return 1; 
-//   } else { 
-//     return 0; 
-//   } 
-// } 
- 
-#define DEFINE_AEABI_FCMP(cond)                            \ 
-        .syntax unified                          SEPARATOR \ 
-        .p2align 2                               SEPARATOR \ 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \ 
-        push      { r4, lr }                     SEPARATOR \ 
-        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ 
-        cmp       r0, #0                         SEPARATOR \ 
-        b ## cond 1f                             SEPARATOR \ 
-        mov       r0, #0                         SEPARATOR \ 
-        pop       { r4, pc }                     SEPARATOR \ 
-1:                                               SEPARATOR \ 
-        mov       r0, #1                         SEPARATOR \ 
-        pop       { r4, pc }                     SEPARATOR \ 
-END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) 
- 
-DEFINE_AEABI_FCMP(eq) 
-DEFINE_AEABI_FCMP(lt) 
-DEFINE_AEABI_FCMP(le) 
-DEFINE_AEABI_FCMP(ge) 
-DEFINE_AEABI_FCMP(gt) 
+//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+//   int result = __{eq,lt,le,ge,gt}sf2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#define DEFINE_AEABI_FCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        mov       r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        mov       r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c b/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c
index 1d019df808..64258dc7e0 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_frsub.c
@@ -1,19 +1,19 @@
-//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "../fp_lib.h" 
- 
-COMPILER_RT_ABI fp_t 
-__aeabi_fsub(fp_t, fp_t); 
- 
-COMPILER_RT_ABI fp_t 
-__aeabi_frsub(fp_t a, fp_t b) { 
-    return __aeabi_fsub(b, a); 
-} 
+//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "../fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__aeabi_fsub(fp_t, fp_t);
+
+COMPILER_RT_ABI fp_t
+__aeabi_frsub(fp_t a, fp_t b) {
+    return __aeabi_fsub(b, a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S
index eb6d55529a..384add3827 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_idivmod.S
@@ -1,28 +1,28 @@
-//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { 
-//   int rem, quot; 
-//   quot = __divmodsi4(numerator, denominator, &rem); 
-//   return {quot, rem}; 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) 
-        push    { lr } 
-        sub     sp, sp, #4 
-        mov     r2, sp 
-        bl      SYMBOL_NAME(__divmodsi4) 
-        ldr     r1, [sp] 
-        add     sp, sp, #4 
-        pop     { pc } 
-END_COMPILERRT_FUNCTION(__aeabi_idivmod) 
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S
index e27f79a9c1..ad06f1de2a 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_ldivmod.S
@@ -1,31 +1,31 @@
-//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// struct { int64_t quot, int64_t rem} 
-//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) { 
-//   int64_t rem, quot; 
-//   quot = __divmoddi4(numerator, denominator, &rem); 
-//   return {quot, rem}; 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) 
-        push    {r11, lr} 
-        sub     sp, sp, #16 
-        add     r12, sp, #8 
-        str     r12, [sp] 
-        bl      SYMBOL_NAME(__divmoddi4) 
-        ldr     r2, [sp, #8] 
-        ldr     r3, [sp, #12] 
-        add     sp, sp, #16 
-        pop     {r11, pc} 
-END_COMPILERRT_FUNCTION(__aeabi_ldivmod) 
+//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+//   int64_t rem, quot;
+//   quot = __divmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+        push    {r11, lr}
+        sub     sp, sp, #16
+        add     r12, sp, #8
+        str     r12, [sp]
+        bl      SYMBOL_NAME(__divmoddi4)
+        ldr     r2, [sp, #8]
+        ldr     r3, [sp, #12]
+        add     sp, sp, #16
+        pop     {r11, pc}
+END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S
index b4c7f31bd9..051ce435ba 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcmp.S
@@ -1,20 +1,20 @@
-//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } 
- 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) 
-        b       memcmp 
-END_COMPILERRT_FUNCTION(__aeabi_memcmp) 
- 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) 
+//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+        b       memcmp
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S
index 1d3bbc8be5..cf02332490 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memcpy.S
@@ -1,20 +1,20 @@
-//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } 
- 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) 
-        b       memcpy 
-END_COMPILERRT_FUNCTION(__aeabi_memcpy) 
- 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) 
+//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+        b       memcpy
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S
index 16f4da5409..4dda06f75d 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memmove.S
@@ -1,20 +1,20 @@
-//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===---------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } 
- 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) 
-        b       memmove 
-END_COMPILERRT_FUNCTION(__aeabi_memmove) 
- 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) 
+//===-- aeabi_memmove.S - EABI memmove implementation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+        b       memmove
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S
index d0f8a587dc..c8b49c7809 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_memset.S
@@ -1,34 +1,34 @@
-//===-- aeabi_memset.S - EABI memset implementation -----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } 
-//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } 
- 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) 
-        mov     r3, r1 
-        mov     r1, r2 
-        mov     r2, r3 
-        b       memset 
-END_COMPILERRT_FUNCTION(__aeabi_memset) 
- 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) 
- 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) 
-        mov     r2, r1 
-        mov     r1, #0 
-        b       memset 
-END_COMPILERRT_FUNCTION(__aeabi_memclr) 
- 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) 
- 
+//===-- aeabi_memset.S - EABI memset implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+        mov     r3, r1
+        mov     r1, r2
+        mov     r2, r3
+        b       memset
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+        mov     r2, r1
+        mov     r1, #0
+        b       memset
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S
index 0452978c51..8ea474d91c 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_uidivmod.S
@@ -1,29 +1,29 @@
-//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// struct { unsigned quot, unsigned rem} 
-//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) { 
-//   unsigned rem, quot; 
-//   quot = __udivmodsi4(numerator, denominator, &rem); 
-//   return {quot, rem}; 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) 
-        push    { lr } 
-        sub     sp, sp, #4 
-        mov     r2, sp 
-        bl      SYMBOL_NAME(__udivmodsi4) 
-        ldr     r1, [sp] 
-        add     sp, sp, #4 
-        pop     { pc } 
-END_COMPILERRT_FUNCTION(__aeabi_uidivmod) 
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
diff --git a/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S b/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S
index a0e2a57c66..4e1f8e2a67 100644
--- a/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S
+++ b/contrib/libs/cxxsupp/builtins/arm/aeabi_uldivmod.S
@@ -1,31 +1,31 @@
-//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// struct { uint64_t quot, uint64_t rem} 
-//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { 
-//   uint64_t rem, quot; 
-//   quot = __udivmoddi4(numerator, denominator, &rem); 
-//   return {quot, rem}; 
-// } 
- 
-        .syntax unified 
-        .p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) 
-        push	{r11, lr} 
-        sub	sp, sp, #16 
-        add	r12, sp, #8 
-        str	r12, [sp] 
-        bl	SYMBOL_NAME(__udivmoddi4) 
-        ldr	r2, [sp, #8] 
-        ldr	r3, [sp, #12] 
-        add	sp, sp, #16 
-        pop	{r11, pc} 
-END_COMPILERRT_FUNCTION(__aeabi_uldivmod) 
+//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+//   uint64_t rem, quot;
+//   quot = __udivmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+        push	{r11, lr}
+        sub	sp, sp, #16
+        add	r12, sp, #8
+        str	r12, [sp]
+        bl	SYMBOL_NAME(__udivmoddi4)
+        ldr	r2, [sp, #8]
+        ldr	r3, [sp, #12]
+        add	sp, sp, #16
+        pop	{r11, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
diff --git a/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S b/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S
index 02975c7f15..86f3bba8c2 100644
--- a/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S
+++ b/contrib/libs/cxxsupp/builtins/arm/bswapdi2.S
@@ -1,47 +1,47 @@
-//===------- bswapdi2 - Implement bswapdi2 --------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-// 
-// extern uint64_t __bswapdi2(uint64_t); 
-// 
-// Reverse all the bytes in a 64-bit integer. 
-// 
-	.p2align 2 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapdi2) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__bswapdi2) 
-#endif 
-#if __ARM_ARCH < 6 
-    // before armv6 does not have "rev" instruction 
-    // r2 = rev(r0) 
-    eor r2, r0, r0, ror #16 
-    bic r2, r2, #0xff0000 
-    mov r2, r2, lsr #8 
-    eor r2, r2, r0, ror #8 
-    // r0 = rev(r1) 
-    eor r0, r1, r1, ror #16 
-    bic r0, r0, #0xff0000 
-    mov r0, r0, lsr #8 
-    eor r0, r0, r1, ror #8 
-#else 
-    rev r2, r0  // r2 = rev(r0) 
-    rev r0, r1  // r0 = rev(r1) 
-#endif 
-    mov r1, r2  // r1 = r2 = rev(r0) 
-    JMP(lr) 
-END_COMPILERRT_FUNCTION(__bswapdi2) 
+//===------- bswapdi2 - Implement bswapdi2 --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+//
+// extern uint64_t __bswapdi2(uint64_t);
+//
+// Reverse all the bytes in a 64-bit integer.
+//
+	.p2align 2
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapdi2)
+#else
+DEFINE_COMPILERRT_FUNCTION(__bswapdi2)
+#endif
+#if __ARM_ARCH < 6
+    // before armv6 does not have "rev" instruction
+    // r2 = rev(r0)
+    eor r2, r0, r0, ror #16
+    bic r2, r2, #0xff0000
+    mov r2, r2, lsr #8
+    eor r2, r2, r0, ror #8
+    // r0 = rev(r1)
+    eor r0, r1, r1, ror #16
+    bic r0, r0, #0xff0000
+    mov r0, r0, lsr #8
+    eor r0, r0, r1, ror #8
+#else
+    rev r2, r0  // r2 = rev(r0)
+    rev r0, r1  // r0 = rev(r1)
+#endif
+    mov r1, r2  // r1 = r2 = rev(r0)
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__bswapdi2)
diff --git a/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S b/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S
index cbb9216935..59ba8158fd 100644
--- a/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S
+++ b/contrib/libs/cxxsupp/builtins/arm/bswapsi2.S
@@ -1,39 +1,39 @@
-//===------- bswapsi2 - Implement bswapsi2 --------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-// 
-// extern uint32_t __bswapsi2(uint32_t); 
-// 
-// Reverse all the bytes in a 32-bit integer. 
-// 
-	.p2align 2 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapsi2) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__bswapsi2) 
-#endif 
-#if __ARM_ARCH < 6 
-    // before armv6 does not have "rev" instruction 
- 	eor	r1, r0, r0, ror #16 
- 	bic	r1, r1, #0xff0000 
- 	mov	r1, r1, lsr #8 
- 	eor	r0, r1, r0, ror #8 
-#else 
-    rev r0, r0 
-#endif 
-    JMP(lr) 
-END_COMPILERRT_FUNCTION(__bswapsi2) 
+//===------- bswapsi2 - Implement bswapsi2 --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+//
+// extern uint32_t __bswapsi2(uint32_t);
+//
+// Reverse all the bytes in a 32-bit integer.
+//
+	.p2align 2
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapsi2)
+#else
+DEFINE_COMPILERRT_FUNCTION(__bswapsi2)
+#endif
+#if __ARM_ARCH < 6
+    // before armv6 does not have "rev" instruction
+ 	eor	r1, r0, r0, ror #16
+ 	bic	r1, r1, #0xff0000
+ 	mov	r1, r1, lsr #8
+ 	eor	r0, r1, r0, ror #8
+#else
+    rev r0, r0
+#endif
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__bswapsi2)
diff --git a/contrib/libs/cxxsupp/builtins/arm/clzdi2.S b/contrib/libs/cxxsupp/builtins/arm/clzdi2.S
index 28d4f8761e..a55abac046 100644
--- a/contrib/libs/cxxsupp/builtins/arm/clzdi2.S
+++ b/contrib/libs/cxxsupp/builtins/arm/clzdi2.S
@@ -1,97 +1,97 @@
-/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== 
- * 
- *               The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements count leading zeros for 64bit arguments. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
- 
-	.p2align	2 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__clzdi2) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__clzdi2) 
-#endif 
-#ifdef __ARM_FEATURE_CLZ 
-#ifdef __ARMEB__ 
-	cmp	r0, 0 
-	itee ne 
-	clzne	r0, r0 
-	clzeq	r0, r1 
-	addeq	r0, r0, 32 
-#else 
-	cmp	r1, 0 
-	itee ne 
-	clzne	r0, r1 
-	clzeq	r0, r0 
-	addeq	r0, r0, 32 
-#endif 
-	JMP(lr) 
-#else 
-	/* Assumption: n != 0 */ 
- 
-	/* 
-	 * r0: n 
-	 * r1: upper half of n, overwritten after check 
-	 * r1: count of leading zeros in n + 1 
-	 * r2: scratch register for shifted r0 
-	 */ 
-#ifdef __ARMEB__ 
-	cmp	r0, 0 
-	moveq	r0, r1 
-#else 
-	cmp	r1, 0 
-	movne	r0, r1 
-#endif 
-	movne	r1, 1 
-	moveq	r1, 33 
- 
-	/* 
-	 * Basic block: 
-	 * if ((r0 >> SHIFT) == 0) 
-	 *   r1 += SHIFT; 
-	 * else 
-	 *   r0 >>= SHIFT; 
-	 * for descending powers of two as SHIFT. 
-	 */ 
-#define BLOCK(shift) \ 
-	lsrs	r2, r0, shift; \ 
-	movne	r0, r2; \ 
-	addeq	r1, shift \ 
- 
-	BLOCK(16) 
-	BLOCK(8) 
-	BLOCK(4) 
-	BLOCK(2) 
- 
-	/* 
-	 * The basic block invariants at this point are (r0 >> 2) == 0 and 
-	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. 
-	 * 
-	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) 
-	 * ---+----------------+----------------+------------+-------------- 
-	 * 1  | 1              | 0              | 0          | 1 
-	 * 2  | 0              | 1              | -1         | 0 
-	 * 3  | 0              | 1              | -1         | 0 
-	 * 
-	 * The r1's initial value of 1 compensates for the 1 here. 
-	 */ 
-	sub	r0, r1, r0, lsr #1 
- 
-	JMP(lr) 
-#endif // __ARM_FEATURE_CLZ 
-END_COMPILERRT_FUNCTION(__clzdi2) 
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 64bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+
+	.p2align	2
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__clzdi2)
+#else
+DEFINE_COMPILERRT_FUNCTION(__clzdi2)
+#endif
+#ifdef __ARM_FEATURE_CLZ
+#ifdef __ARMEB__
+	cmp	r0, 0
+	itee ne
+	clzne	r0, r0
+	clzeq	r0, r1
+	addeq	r0, r0, 32
+#else
+	cmp	r1, 0
+	itee ne
+	clzne	r0, r1
+	clzeq	r0, r0
+	addeq	r0, r0, 32
+#endif
+	JMP(lr)
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: upper half of n, overwritten after check
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+#ifdef __ARMEB__
+	cmp	r0, 0
+	moveq	r0, r1
+#else
+	cmp	r1, 0
+	movne	r0, r1
+#endif
+	movne	r1, 1
+	moveq	r1, 33
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+#define BLOCK(shift) \
+	lsrs	r2, r0, shift; \
+	movne	r0, r2; \
+	addeq	r1, shift \
+
+	BLOCK(16)
+	BLOCK(8)
+	BLOCK(4)
+	BLOCK(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+	JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzdi2)
diff --git a/contrib/libs/cxxsupp/builtins/arm/clzsi2.S b/contrib/libs/cxxsupp/builtins/arm/clzsi2.S
index d396ebe99c..1cd379bfb0 100644
--- a/contrib/libs/cxxsupp/builtins/arm/clzsi2.S
+++ b/contrib/libs/cxxsupp/builtins/arm/clzsi2.S
@@ -1,76 +1,76 @@
-/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== 
- * 
- *               The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements count leading zeros for 32bit arguments. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-	.p2align	2 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__clzsi2) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__clzsi2) 
-#endif 
-#ifdef __ARM_FEATURE_CLZ 
-	clz	r0, r0 
-	JMP(lr) 
-#else 
-	/* Assumption: n != 0 */ 
- 
-	/* 
-	 * r0: n 
-	 * r1: count of leading zeros in n + 1 
-	 * r2: scratch register for shifted r0 
-	 */ 
-	mov	r1, 1 
- 
-	/* 
-	 * Basic block: 
-	 * if ((r0 >> SHIFT) == 0) 
-	 *   r1 += SHIFT; 
-	 * else 
-	 *   r0 >>= SHIFT; 
-	 * for descending powers of two as SHIFT. 
-	 */ 
- 
-#define BLOCK(shift) \ 
-	lsrs	r2, r0, shift; \ 
-	movne	r0, r2; \ 
-	addeq	r1, shift \ 
- 
-	BLOCK(16) 
-	BLOCK(8) 
-	BLOCK(4) 
-	BLOCK(2) 
- 
-	/* 
-	 * The basic block invariants at this point are (r0 >> 2) == 0 and 
-	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. 
-	 * 
-	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) 
-	 * ---+----------------+----------------+------------+-------------- 
-	 * 1  | 1              | 0              | 0          | 1 
-	 * 2  | 0              | 1              | -1         | 0 
-	 * 3  | 0              | 1              | -1         | 0 
-	 * 
-	 * The r1's initial value of 1 compensates for the 1 here. 
-	 */ 
-	sub	r0, r1, r0, lsr #1 
- 
-	JMP(lr) 
-#endif // __ARM_FEATURE_CLZ 
-END_COMPILERRT_FUNCTION(__clzsi2) 
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 32bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+	.p2align	2
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__clzsi2)
+#else
+DEFINE_COMPILERRT_FUNCTION(__clzsi2)
+#endif
+#ifdef __ARM_FEATURE_CLZ
+	clz	r0, r0
+	JMP(lr)
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+	mov	r1, 1
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+
+#define BLOCK(shift) \
+	lsrs	r2, r0, shift; \
+	movne	r0, r2; \
+	addeq	r1, shift \
+
+	BLOCK(16)
+	BLOCK(8)
+	BLOCK(4)
+	BLOCK(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+	JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzsi2)
diff --git a/contrib/libs/cxxsupp/builtins/arm/comparesf2.S b/contrib/libs/cxxsupp/builtins/arm/comparesf2.S
index 8effe8cd7e..cf71d36e05 100644
--- a/contrib/libs/cxxsupp/builtins/arm/comparesf2.S
+++ b/contrib/libs/cxxsupp/builtins/arm/comparesf2.S
@@ -1,148 +1,148 @@
-//===-- comparesf2.S - Implement single-precision soft-float comparisons --===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements the following soft-fp_t comparison routines: 
-// 
-//   __eqsf2   __gesf2   __unordsf2 
-//   __lesf2   __gtsf2 
-//   __ltsf2 
-//   __nesf2 
-// 
-// The semantics of the routines grouped in each column are identical, so there 
-// is a single implementation for each, with multiple names. 
-// 
-// The routines behave as follows: 
-// 
-//   __lesf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                         1 if either a or b is NaN 
-// 
-//   __gesf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                        -1 if either a or b is NaN 
-// 
-//   __unordsf2(a,b) returns 0 if both a and b are numbers 
-//                           1 if either a or b is NaN 
-// 
-// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of 
-// NaN values. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
-.syntax unified 
- 
-.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__eqsf2) 
-    // Make copies of a and b with the sign bit shifted off the top.  These will 
-    // be used to detect zeros and NaNs. 
-    mov     r2,         r0, lsl #1 
-    mov     r3,         r1, lsl #1 
- 
-    // We do the comparison in three stages (ignoring NaN values for the time 
-    // being).  First, we orr the absolute values of a and b; this sets the Z 
-    // flag if both a and b are zero (of either sign).  The shift of r3 doesn't 
-    // effect this at all, but it *does* make sure that the C flag is clear for 
-    // the subsequent operations. 
-    orrs    r12,    r2, r3, lsr #1 
- 
-    // Next, we check if a and b have the same or different signs.  If they have 
-    // opposite signs, this eor will set the N flag. 
-    it ne 
-    eorsne  r12,    r0, r1 
- 
-    // If a and b are equal (either both zeros or bit identical; again, we're 
-    // ignoring NaNs for now), this subtract will zero out r0.  If they have the 
-    // same sign, the flags are updated as they would be for a comparison of the 
-    // absolute values of a and b. 
-    it pl 
-    subspl  r0,     r2, r3 
- 
-    // If a is smaller in magnitude than b and both have the same sign, place 
-    // the negation of the sign of b in r0.  Thus, if both are negative and 
-    // a > b, this sets r0 to 0; if both are positive and a < b, this sets 
-    // r0 to -1. 
-    // 
-    // This is also done if a and b have opposite signs and are not both zero, 
-    // because in that case the subtract was not performed and the C flag is 
-    // still clear from the shift argument in orrs; if a is positive and b 
-    // negative, this places 0 in r0; if a is negative and b positive, -1 is 
-    // placed in r0. 
-    it lo 
-    mvnlo   r0,         r1, asr #31 
- 
-    // If a is greater in magnitude than b and both have the same sign, place 
-    // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed 
-    // in r0, which is the desired result.  Conversely, if both are positive 
-    // and a > b, zero is placed in r0. 
-    it hi 
-    movhi   r0,         r1, asr #31 
- 
-    // If you've been keeping track, at this point r0 contains -1 if a < b and 
-    // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b. 
-    // If a == b, then the Z flag is set, so we can get the correct final value 
-    // into r0 by simply or'ing with 1 if Z is clear. 
-    it ne 
-    orrne   r0,     r0, #1 
- 
-    // Finally, we need to deal with NaNs.  If either argument is NaN, replace 
-    // the value in r0 with 1. 
-    cmp     r2,         #0xff000000 
-    ite ls 
-    cmpls   r3,         #0xff000000 
-    movhi   r0,         #1 
-    JMP(lr) 
-END_COMPILERRT_FUNCTION(__eqsf2) 
-DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) 
-DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) 
-DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) 
- 
-.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__gtsf2) 
-    // Identical to the preceding except in that we return -1 for NaN values. 
-    // Given that the two paths share so much code, one might be tempted to  
-    // unify them; however, the extra code needed to do so makes the code size 
-    // to performance tradeoff very hard to justify for such small functions. 
-    mov     r2,         r0, lsl #1 
-    mov     r3,         r1, lsl #1 
-    orrs    r12,    r2, r3, lsr #1 
-    it ne 
-    eorsne  r12,    r0, r1 
-    it pl 
-    subspl  r0,     r2, r3 
-    it lo 
-    mvnlo   r0,         r1, asr #31 
-    it hi 
-    movhi   r0,         r1, asr #31 
-    it ne 
-    orrne   r0,     r0, #1 
-    cmp     r2,         #0xff000000 
-    ite ls 
-    cmpls   r3,         #0xff000000 
-    movhi   r0,         #-1 
-    JMP(lr) 
-END_COMPILERRT_FUNCTION(__gtsf2) 
-DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) 
- 
-.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__unordsf2) 
-    // Return 1 for NaN values, 0 otherwise. 
-    mov     r2,         r0, lsl #1 
-    mov     r3,         r1, lsl #1 
-    mov     r0,         #0 
-    cmp     r2,         #0xff000000 
-    ite ls 
-    cmpls   r3,         #0xff000000 
-    movhi   r0,         #1 
-    JMP(lr) 
-END_COMPILERRT_FUNCTION(__unordsf2) 
- 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) 
+//===-- comparesf2.S - Implement single-precision soft-float comparisons --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+//   __eqsf2   __gesf2   __unordsf2
+//   __lesf2   __gtsf2
+//   __ltsf2
+//   __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, with multiple names.
+//
+// The routines behave as follows:
+//
+//   __lesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __gesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unordsf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+.syntax unified
+
+.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+    // Make copies of a and b with the sign bit shifted off the top.  These will
+    // be used to detect zeros and NaNs.
+    mov     r2,         r0, lsl #1
+    mov     r3,         r1, lsl #1
+
+    // We do the comparison in three stages (ignoring NaN values for the time
+    // being).  First, we orr the absolute values of a and b; this sets the Z
+    // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
+    // effect this at all, but it *does* make sure that the C flag is clear for
+    // the subsequent operations.
+    orrs    r12,    r2, r3, lsr #1
+
+    // Next, we check if a and b have the same or different signs.  If they have
+    // opposite signs, this eor will set the N flag.
+    it ne
+    eorsne  r12,    r0, r1
+
+    // If a and b are equal (either both zeros or bit identical; again, we're
+    // ignoring NaNs for now), this subtract will zero out r0.  If they have the
+    // same sign, the flags are updated as they would be for a comparison of the
+    // absolute values of a and b.
+    it pl
+    subspl  r0,     r2, r3
+
+    // If a is smaller in magnitude than b and both have the same sign, place
+    // the negation of the sign of b in r0.  Thus, if both are negative and
+    // a > b, this sets r0 to 0; if both are positive and a < b, this sets
+    // r0 to -1.
+    //
+    // This is also done if a and b have opposite signs and are not both zero,
+    // because in that case the subtract was not performed and the C flag is
+    // still clear from the shift argument in orrs; if a is positive and b
+    // negative, this places 0 in r0; if a is negative and b positive, -1 is
+    // placed in r0.
+    it lo
+    mvnlo   r0,         r1, asr #31
+
+    // If a is greater in magnitude than b and both have the same sign, place
+    // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
+    // in r0, which is the desired result.  Conversely, if both are positive
+    // and a > b, zero is placed in r0.
+    it hi
+    movhi   r0,         r1, asr #31
+
+    // If you've been keeping track, at this point r0 contains -1 if a < b and
+    // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
+    // If a == b, then the Z flag is set, so we can get the correct final value
+    // into r0 by simply or'ing with 1 if Z is clear.
+    it ne
+    orrne   r0,     r0, #1
+
+    // Finally, we need to deal with NaNs.  If either argument is NaN, replace
+    // the value in r0 with 1.
+    cmp     r2,         #0xff000000
+    ite ls
+    cmpls   r3,         #0xff000000
+    movhi   r0,         #1
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
+
+.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2)
+    // Identical to the preceding except in that we return -1 for NaN values.
+    // Given that the two paths share so much code, one might be tempted to 
+    // unify them; however, the extra code needed to do so makes the code size
+    // to performance tradeoff very hard to justify for such small functions.
+    mov     r2,         r0, lsl #1
+    mov     r3,         r1, lsl #1
+    orrs    r12,    r2, r3, lsr #1
+    it ne
+    eorsne  r12,    r0, r1
+    it pl
+    subspl  r0,     r2, r3
+    it lo
+    mvnlo   r0,         r1, asr #31
+    it hi
+    movhi   r0,         r1, asr #31
+    it ne
+    orrne   r0,     r0, #1
+    cmp     r2,         #0xff000000
+    ite ls
+    cmpls   r3,         #0xff000000
+    movhi   r0,         #-1
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__gtsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
+
+.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+    // Return 1 for NaN values, 0 otherwise.
+    mov     r2,         r0, lsl #1
+    mov     r3,         r1, lsl #1
+    mov     r0,         #0
+    cmp     r2,         #0xff000000
+    ite ls
+    cmpls   r3,         #0xff000000
+    movhi   r0,         #1
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__unordsf2)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
diff --git a/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S
index 7266c235db..6eebef167a 100644
--- a/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/divdf3vfp.S
@@ -1,26 +1,26 @@
-//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __divdf3vfp(double a, double b); 
-// 
-// Divides two double precision floating point numbers using the Darwin 
-// calling convention where double arguments are passsed in GPR pairs 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) 
-	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6 
-	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7 
-	vdiv.f64 d5, d6, d7		 
-	vmov	r0, r1, d5		// move result back to r0/r1 pair 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__divdf3vfp) 
+//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __divdf3vfp(double a, double b);
+//
+// Divides two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
+	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	vdiv.f64 d5, d6, d7		
+	vmov	r0, r1, d5		// move result back to r0/r1 pair
+	bx	lr
+END_COMPILERRT_FUNCTION(__divdf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S b/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S
index 7d62a3d3eb..646b9ab78f 100644
--- a/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/divmodsi4.S
@@ -1,74 +1,74 @@
-/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __divmodsi4 (32-bit signed integer divide and 
- * modulus) function for the ARM architecture.  A naive digit-by-digit 
- * computation is employed for simplicity. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-#define ESTABLISH_FRAME    \ 
-    push   {r4-r7, lr}   ;\ 
-    add     r7,     sp, #12 
-#define CLEAR_FRAME_AND_RETURN \ 
-    pop    {r4-r7, pc} 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-@ int __divmodsi4(int divident, int divisor, int *remainder) 
-@   Calculate the quotient and remainder of the (signed) division.  The return 
-@   value is the quotient, the remainder is placed in the variable. 
- 
-	.p2align 3 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__divmodsi4) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__divmodsi4) 
-#endif 
-#if __ARM_ARCH_EXT_IDIV__ 
-	tst     r1, r1 
-	beq     LOCAL_LABEL(divzero) 
-	mov 	r3, r0 
-	sdiv	r0, r3, r1 
-	mls 	r1, r0, r1, r3 
-	str 	r1, [r2] 
-	bx  	lr 
-LOCAL_LABEL(divzero): 
-	mov     r0, #0 
-	bx      lr 
-#else 
-    ESTABLISH_FRAME 
-//  Set aside the sign of the quotient and modulus, and the address for the 
-//  modulus. 
-    eor     r4,     r0, r1 
-    mov     r5,     r0 
-    mov     r6,     r2 
-//  Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). 
-    eor     ip,     r0, r0, asr #31 
-    eor     lr,     r1, r1, asr #31 
-    sub     r0,     ip, r0, asr #31 
-    sub     r1,     lr, r1, asr #31 
-//  Unsigned divmod: 
-    bl      SYMBOL_NAME(__udivmodsi4) 
-//  Apply the sign of quotient and modulus 
-    ldr     r1,    [r6] 
-    eor     r0,     r0, r4, asr #31 
-    eor     r1,     r1, r5, asr #31 
-    sub     r0,     r0, r4, asr #31 
-    sub     r1,     r1, r5, asr #31 
-    str     r1,    [r6] 
-    CLEAR_FRAME_AND_RETURN 
-#endif 
-END_COMPILERRT_FUNCTION(__divmodsi4) 
+/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divmodsi4 (32-bit signed integer divide and
+ * modulus) function for the ARM architecture.  A naive digit-by-digit
+ * computation is employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME    \
+    push   {r4-r7, lr}   ;\
+    add     r7,     sp, #12
+#define CLEAR_FRAME_AND_RETURN \
+    pop    {r4-r7, pc}
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+@ int __divmodsi4(int divident, int divisor, int *remainder)
+@   Calculate the quotient and remainder of the (signed) division.  The return
+@   value is the quotient, the remainder is placed in the variable.
+
+	.p2align 3
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__divmodsi4)
+#else
+DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
+#endif
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divzero)
+	mov 	r3, r0
+	sdiv	r0, r3, r1
+	mls 	r1, r0, r1, r3
+	str 	r1, [r2]
+	bx  	lr
+LOCAL_LABEL(divzero):
+	mov     r0, #0
+	bx      lr
+#else
+    ESTABLISH_FRAME
+//  Set aside the sign of the quotient and modulus, and the address for the
+//  modulus.
+    eor     r4,     r0, r1
+    mov     r5,     r0
+    mov     r6,     r2
+//  Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+    eor     ip,     r0, r0, asr #31
+    eor     lr,     r1, r1, asr #31
+    sub     r0,     ip, r0, asr #31
+    sub     r1,     lr, r1, asr #31
+//  Unsigned divmod:
+    bl      SYMBOL_NAME(__udivmodsi4)
+//  Apply the sign of quotient and modulus
+    ldr     r1,    [r6]
+    eor     r0,     r0, r4, asr #31
+    eor     r1,     r1, r5, asr #31
+    sub     r0,     r0, r4, asr #31
+    sub     r1,     r1, r5, asr #31
+    str     r1,    [r6]
+    CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__divmodsi4)
diff --git a/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S
index 55feaf46fc..fdbaebc883 100644
--- a/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/divsf3vfp.S
@@ -1,26 +1,26 @@
-//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __divsf3vfp(float a, float b); 
-// 
-// Divides two single precision floating point numbers using the Darwin 
-// calling convention where single arguments are passsed like 32-bit ints. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) 
-	vmov	s14, r0		// move first param from r0 into float register 
-	vmov	s15, r1		// move second param from r1 into float register 
-	vdiv.f32 s13, s14, s15 
-	vmov	r0, s13		// move result back to r0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__divsf3vfp) 
+//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __divsf3vfp(float a, float b);
+//
+// Divides two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed like 32-bit ints.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vdiv.f32 s13, s14, s15
+	vmov	r0, s13		// move result back to r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__divsf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/divsi3.S b/contrib/libs/cxxsupp/builtins/arm/divsi3.S
index 3ad8000dd6..adf8f94fc7 100644
--- a/contrib/libs/cxxsupp/builtins/arm/divsi3.S
+++ b/contrib/libs/cxxsupp/builtins/arm/divsi3.S
@@ -1,65 +1,65 @@
-/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __divsi3 (32-bit signed integer divide) function 
- * for the ARM architecture as a wrapper around the unsigned routine. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-#define ESTABLISH_FRAME \ 
-    push   {r4, r7, lr}    ;\ 
-    add     r7,     sp, #4 
-#define CLEAR_FRAME_AND_RETURN \ 
-    pop    {r4, r7, pc} 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-	.p2align 3 
-// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) 
- 
-@ int __divsi3(int divident, int divisor) 
-@   Calculate and return the quotient of the (signed) division. 
- 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__divsi3) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__divsi3) 
-#endif 
-#if __ARM_ARCH_EXT_IDIV__ 
-   tst     r1,r1 
-   beq     LOCAL_LABEL(divzero) 
-   sdiv    r0, r0, r1 
-   bx      lr 
-LOCAL_LABEL(divzero): 
-   mov     r0,#0 
-   bx      lr 
-#else 
-ESTABLISH_FRAME 
-//  Set aside the sign of the quotient. 
-    eor     r4,     r0, r1 
-//  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). 
-    eor     r2,     r0, r0, asr #31 
-    eor     r3,     r1, r1, asr #31 
-    sub     r0,     r2, r0, asr #31 
-    sub     r1,     r3, r1, asr #31 
-//  abs(a) / abs(b) 
-    bl      SYMBOL_NAME(__udivsi3) 
-//  Apply sign of quotient to result and return. 
-    eor     r0,     r0, r4, asr #31 
-    sub     r0,     r0, r4, asr #31 
-    CLEAR_FRAME_AND_RETURN 
-#endif 
-END_COMPILERRT_FUNCTION(__divsi3) 
+/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divsi3 (32-bit signed integer divide) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+    push   {r4, r7, lr}    ;\
+    add     r7,     sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+    pop    {r4, r7, pc}
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+	.p2align 3
+// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3)
+
+@ int __divsi3(int divident, int divisor)
+@   Calculate and return the quotient of the (signed) division.
+
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__divsi3)
+#else
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+#endif
+#if __ARM_ARCH_EXT_IDIV__
+   tst     r1,r1
+   beq     LOCAL_LABEL(divzero)
+   sdiv    r0, r0, r1
+   bx      lr
+LOCAL_LABEL(divzero):
+   mov     r0,#0
+   bx      lr
+#else
+ESTABLISH_FRAME
+//  Set aside the sign of the quotient.
+    eor     r4,     r0, r1
+//  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+    eor     r2,     r0, r0, asr #31
+    eor     r3,     r1, r1, asr #31
+    sub     r0,     r2, r0, asr #31
+    sub     r1,     r3, r1, asr #31
+//  abs(a) / abs(b)
+    bl      SYMBOL_NAME(__udivsi3)
+//  Apply sign of quotient to result and return.
+    eor     r0,     r0, r4, asr #31
+    sub     r0,     r0, r4, asr #31
+    CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__divsi3)
diff --git a/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S
index c3246f90eb..7f2fbc3072 100644
--- a/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/eqdf2vfp.S
@@ -1,29 +1,29 @@
-//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __eqdf2vfp(double a, double b); 
-// 
-// Returns one iff a == b and neither is NaN. 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) 
-	vmov	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7		 
-	vmrs	apsr_nzcv, fpscr 
-	moveq	r0, #1		// set result register to 1 if equal 
-	movne	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__eqdf2vfp) 
+//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __eqdf2vfp(double a, double b);
+//
+// Returns one iff a == b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+	vmov	d6, r0, r1	// load r0/r1 pair in double register
+	vmov	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+	vmrs	apsr_nzcv, fpscr
+	moveq	r0, #1		// set result register to 1 if equal
+	movne	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__eqdf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S
index adcd7e8e81..a318b336ae 100644
--- a/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/eqsf2vfp.S
@@ -1,29 +1,29 @@
-//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __eqsf2vfp(float a, float b); 
-// 
-// Returns one iff a == b and neither is NaN. 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) 
-	vmov	s14, r0     // move from GPR 0 to float register 
-	vmov	s15, r1	    // move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	moveq	r0, #1      // set result register to 1 if equal 
-	movne	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__eqsf2vfp) 
+//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __eqsf2vfp(float a, float b);
+//
+// Returns one iff a == b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	moveq	r0, #1      // set result register to 1 if equal
+	movne	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__eqsf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S
index 63eb10284e..b998e58945 100644
--- a/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/extendsfdf2vfp.S
@@ -1,26 +1,26 @@
-//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __extendsfdf2vfp(float a); 
-// 
-// Converts single precision float to double precision result. 
-// Uses Darwin calling convention where a single precision parameter is  
-// passed in a GPR and a double precision result is returned in R0/R1 pair. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) 
-	vmov	s15, r0      // load float register from R0 
-	vcvt.f64.f32 d7, s15 // convert single to double 
-	vmov	r0, r1, d7   // return result in r0/r1 pair 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__extendsfdf2vfp) 
+//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __extendsfdf2vfp(float a);
+//
+// Converts single precision float to double precision result.
+// Uses Darwin calling convention where a single precision parameter is 
+// passed in a GPR and a double precision result is returned in R0/R1 pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+	vmov	s15, r0      // load float register from R0
+	vcvt.f64.f32 d7, s15 // convert single to double
+	vmov	r0, r1, d7   // return result in r0/r1 pair
+	bx	lr
+END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S
index 628e7462b6..e3bd8e05e0 100644
--- a/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/fixdfsivfp.S
@@ -1,26 +1,26 @@
-//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __fixdfsivfp(double a); 
-// 
-// Converts double precision float to a 32-bit int rounding towards zero. 
-// Uses Darwin calling convention where a double precision parameter is  
-// passed in GPR register pair. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) 
-	vmov	d7, r0, r1    // load double register from R0/R1 
-	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15 
-	vmov	r0, s15	      // move s15 to result register 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__fixdfsivfp) 
+//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __fixdfsivfp(double a);
+//
+// Converts double precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a double precision parameter is 
+// passed in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+	vmov	d7, r0, r1    // load double register from R0/R1
+	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
+	vmov	r0, s15	      // move s15 to result register
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixdfsivfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S
index 549d8b7ef7..3d0d0f56d2 100644
--- a/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/fixsfsivfp.S
@@ -1,26 +1,26 @@
-//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __fixsfsivfp(float a); 
-// 
-// Converts single precision float to a 32-bit int rounding towards zero. 
-// Uses Darwin calling convention where a single precision parameter is  
-// passed in a GPR.. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) 
-	vmov	s15, r0        // load float register from R0 
-	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15 
-	vmov	r0, s15	       // move s15 to result register 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__fixsfsivfp) 
+//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __fixsfsivfp(float a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision parameter is 
+// passed in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+	vmov	s15, r0        // load float register from R0
+	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
+	vmov	r0, s15	       // move s15 to result register
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixsfsivfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S
index 54668fbe6e..35dda5b9b0 100644
--- a/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/fixunsdfsivfp.S
@@ -1,27 +1,27 @@
-//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern unsigned int __fixunsdfsivfp(double a); 
-// 
-// Converts double precision float to a 32-bit unsigned int rounding towards  
-// zero. All negative values become zero. 
-// Uses Darwin calling convention where a double precision parameter is  
-// passed in GPR register pair. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) 
-	vmov	d7, r0, r1    // load double register from R0/R1 
-	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15 
-	vmov	r0, s15	      // move s15 to result register 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__fixunsdfsivfp) 
+//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern unsigned int __fixunsdfsivfp(double a);
+//
+// Converts double precision float to a 32-bit unsigned int rounding towards 
+// zero. All negative values become zero.
+// Uses Darwin calling convention where a double precision parameter is 
+// passed in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+	vmov	d7, r0, r1    // load double register from R0/R1
+	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
+	vmov	r0, s15	      // move s15 to result register
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S b/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S
index a5c4f10933..5c3a7d926f 100644
--- a/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/fixunssfsivfp.S
@@ -1,27 +1,27 @@
-//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern unsigned int __fixunssfsivfp(float a); 
-// 
-// Converts single precision float to a 32-bit unsigned int rounding towards  
-// zero. All negative values become zero. 
-// Uses Darwin calling convention where a single precision parameter is  
-// passed in a GPR.. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) 
-	vmov	s15, r0        // load float register from R0 
-	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15 
-	vmov	r0, s15	       // move s15 to result register 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__fixunssfsivfp) 
+//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern unsigned int __fixunssfsivfp(float a);
+//
+// Converts single precision float to a 32-bit unsigned int rounding towards 
+// zero. All negative values become zero.
+// Uses Darwin calling convention where a single precision parameter is 
+// passed in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+	vmov	s15, r0        // load float register from R0
+	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
+	vmov	r0, s15	       // move s15 to result register
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixunssfsivfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S
index addd40640c..d69184914c 100644
--- a/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/floatsidfvfp.S
@@ -1,26 +1,26 @@
-//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __floatsidfvfp(int a); 
-// 
-// Converts a 32-bit int to a double precision float. 
-// Uses Darwin calling convention where a double precision result is  
-// return in GPR register pair. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) 
-	vmov	s15, r0        // move int to float register s15 
-	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7 
-	vmov	r0, r1, d7     // move d7 to result register pair r0/r1 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__floatsidfvfp) 
+//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __floatsidfvfp(int a);
+//
+// Converts a 32-bit int to a double precision float.
+// Uses Darwin calling convention where a double precision result is 
+// return in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+	vmov	s15, r0        // move int to float register s15
+	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
+	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatsidfvfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S
index 21dc2df569..4a0cb39d0e 100644
--- a/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/floatsisfvfp.S
@@ -1,26 +1,26 @@
-//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __floatsisfvfp(int a); 
-// 
-// Converts single precision float to a 32-bit int rounding towards zero. 
-// Uses Darwin calling convention where a single precision result is  
-// return in a GPR.. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) 
-	vmov	s15, r0	       // move int to float register s15 
-	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15 
-	vmov	r0, s15        // move s15 to result register 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__floatsisfvfp) 
+//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __floatsisfvfp(int a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision result is 
+// return in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+	vmov	s15, r0	       // move int to float register s15
+	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
+	vmov	r0, s15        // move s15 to result register
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatsisfvfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S
index ea5bea97c8..d92969ea34 100644
--- a/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/floatunssidfvfp.S
@@ -1,26 +1,26 @@
-//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __floatunssidfvfp(unsigned int a); 
-// 
-// Converts a 32-bit int to a double precision float. 
-// Uses Darwin calling convention where a double precision result is  
-// return in GPR register pair. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) 
-	vmov	s15, r0        // move int to float register s15 
-	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7 
-	vmov	r0, r1, d7     // move d7 to result register pair r0/r1 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__floatunssidfvfp) 
+//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __floatunssidfvfp(unsigned int a);
+//
+// Converts a 32-bit int to a double precision float.
+// Uses Darwin calling convention where a double precision result is 
+// return in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+	vmov	s15, r0        // move int to float register s15
+	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
+	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatunssidfvfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S b/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S
index a031b33c17..f6aeba56ae 100644
--- a/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/floatunssisfvfp.S
@@ -1,26 +1,26 @@
-//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __floatunssisfvfp(unsigned int a); 
-// 
-// Converts single precision float to a 32-bit int rounding towards zero. 
-// Uses Darwin calling convention where a single precision result is  
-// return in a GPR.. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) 
-	vmov	s15, r0	       // move int to float register s15 
-	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15 
-	vmov	r0, s15        // move s15 to result register 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__floatunssisfvfp) 
+//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __floatunssisfvfp(unsigned int a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision result is 
+// return in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+	vmov	s15, r0	       // move int to float register s15
+	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
+	vmov	r0, s15        // move s15 to result register
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatunssisfvfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S
index 77fbf64d47..9e23527017 100644
--- a/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/gedf2vfp.S
@@ -1,29 +1,29 @@
-//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __gedf2vfp(double a, double b); 
-// 
-// Returns one iff a >= b and neither is NaN. 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) 
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7 
-	vmrs	apsr_nzcv, fpscr 
-	movge	r0, #1      // set result register to 1 if greater than or equal 
-	movlt	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__gedf2vfp) 
+//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gedf2vfp(double a, double b);
+//
+// Returns one iff a >= b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
+	movge	r0, #1      // set result register to 1 if greater than or equal
+	movlt	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gedf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S
index 834f4b17d4..0ff6084778 100644
--- a/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/gesf2vfp.S
@@ -1,29 +1,29 @@
-//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __gesf2vfp(float a, float b); 
-// 
-// Returns one iff a >= b and neither is NaN. 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) 
-	vmov	s14, r0	    // move from GPR 0 to float register 
-	vmov	s15, r1	    // move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	movge	r0, #1      // set result register to 1 if greater than or equal 
-	movlt	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__gesf2vfp) 
+//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gesf2vfp(float a, float b);
+//
+// Returns one iff a >= b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+	vmov	s14, r0	    // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	movge	r0, #1      // set result register to 1 if greater than or equal
+	movlt	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gesf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S
index 329238d6e7..3dc5d5b592 100644
--- a/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/gtdf2vfp.S
@@ -1,29 +1,29 @@
-//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __gtdf2vfp(double a, double b); 
-// 
-// Returns one iff a > b and neither is NaN. 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) 
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7 
-	vmrs	apsr_nzcv, fpscr 
-	movgt	r0, #1		// set result register to 1 if equal 
-	movle	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__gtdf2vfp) 
+//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __gtdf2vfp(double a, double b);
+//
+// Returns one iff a > b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
+	movgt	r0, #1		// set result register to 1 if equal
+	movle	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gtdf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S
index 74e0be62c2..ddd843acf5 100644
--- a/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/gtsf2vfp.S
@@ -1,29 +1,29 @@
-//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __gtsf2vfp(float a, float b); 
-// 
-// Returns one iff a > b and neither is NaN. 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) 
-	vmov	s14, r0		// move from GPR 0 to float register 
-	vmov	s15, r1		// move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	movgt	r0, #1		// set result register to 1 if equal 
-	movle	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__gtsf2vfp) 
+//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gtsf2vfp(float a, float b);
+//
+// Returns one iff a > b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+	vmov	s14, r0		// move from GPR 0 to float register
+	vmov	s15, r1		// move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	movgt	r0, #1		// set result register to 1 if equal
+	movle	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gtsf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S
index 17b004e66a..b06ff6db5a 100644
--- a/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/ledf2vfp.S
@@ -1,29 +1,29 @@
-//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __ledf2vfp(double a, double b); 
-// 
-// Returns one iff a <= b and neither is NaN. 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) 
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7 
-	vmrs	apsr_nzcv, fpscr 
-	movls	r0, #1		// set result register to 1 if equal 
-	movhi	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__ledf2vfp) 
+//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __ledf2vfp(double a, double b);
+//
+// Returns one iff a <= b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
+	movls	r0, #1		// set result register to 1 if equal
+	movhi	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__ledf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S
index 86482ff638..9b33c0c536 100644
--- a/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/lesf2vfp.S
@@ -1,29 +1,29 @@
-//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __lesf2vfp(float a, float b); 
-// 
-// Returns one iff a <= b and neither is NaN. 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) 
-	vmov	s14, r0     // move from GPR 0 to float register 
-	vmov	s15, r1     // move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	movls	r0, #1      // set result register to 1 if equal 
-	movhi	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__lesf2vfp) 
+//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __lesf2vfp(float a, float b);
+//
+// Returns one iff a <= b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1     // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	movls	r0, #1      // set result register to 1 if equal
+	movhi	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__lesf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S
index b91b739560..9f794b026a 100644
--- a/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/ltdf2vfp.S
@@ -1,29 +1,29 @@
-//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __ltdf2vfp(double a, double b); 
-// 
-// Returns one iff a < b and neither is NaN. 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) 
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7 
-	vmrs	apsr_nzcv, fpscr 
-	movmi	r0, #1		// set result register to 1 if equal 
-	movpl	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__ltdf2vfp) 
+//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __ltdf2vfp(double a, double b);
+//
+// Returns one iff a < b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
+	movmi	r0, #1		// set result register to 1 if equal
+	movpl	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__ltdf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S
index 6aee77c23f..ba190d9d8d 100644
--- a/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/ltsf2vfp.S
@@ -1,29 +1,29 @@
-//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __ltsf2vfp(float a, float b); 
-// 
-// Returns one iff a < b and neither is NaN. 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) 
-	vmov	s14, r0     // move from GPR 0 to float register 
-	vmov	s15, r1     // move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	movmi	r0, #1      // set result register to 1 if equal 
-	movpl	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__ltsf2vfp) 
+//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __ltsf2vfp(float a, float b);
+//
+// Returns one iff a < b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1     // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	movmi	r0, #1      // set result register to 1 if equal
+	movpl	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__ltsf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/modsi3.S b/contrib/libs/cxxsupp/builtins/arm/modsi3.S
index d997107c28..295a227d86 100644
--- a/contrib/libs/cxxsupp/builtins/arm/modsi3.S
+++ b/contrib/libs/cxxsupp/builtins/arm/modsi3.S
@@ -1,63 +1,63 @@
-/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __modsi3 (32-bit signed integer modulus) function 
- * for the ARM architecture as a wrapper around the unsigned routine. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-#define ESTABLISH_FRAME \ 
-    push   {r4, r7, lr}    ;\ 
-    add     r7,     sp, #4 
-#define CLEAR_FRAME_AND_RETURN \ 
-    pop    {r4, r7, pc} 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-@ int __modsi3(int divident, int divisor) 
-@   Calculate and return the remainder of the (signed) division. 
- 
-	.p2align 3 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__modsi3) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__modsi3) 
-#endif 
-#if __ARM_ARCH_EXT_IDIV__ 
-	tst     r1, r1 
-	beq     LOCAL_LABEL(divzero) 
-	sdiv	r2, r0, r1 
-	mls 	r0, r2, r1, r0 
-	bx      lr 
-LOCAL_LABEL(divzero): 
-	mov     r0, #0 
-	bx      lr 
-#else 
-    ESTABLISH_FRAME 
-    //  Set aside the sign of the dividend. 
-    mov     r4,     r0 
-    //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). 
-    eor     r2,     r0, r0, asr #31 
-    eor     r3,     r1, r1, asr #31 
-    sub     r0,     r2, r0, asr #31 
-    sub     r1,     r3, r1, asr #31 
-    //  abs(a) % abs(b) 
-    bl     SYMBOL_NAME(__umodsi3) 
-    //  Apply sign of dividend to result and return. 
-    eor     r0,     r0, r4, asr #31 
-    sub     r0,     r0, r4, asr #31 
-    CLEAR_FRAME_AND_RETURN 
-#endif 
-END_COMPILERRT_FUNCTION(__modsi3) 
+/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __modsi3 (32-bit signed integer modulus) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+    push   {r4, r7, lr}    ;\
+    add     r7,     sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+    pop    {r4, r7, pc}
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+@ int __modsi3(int divident, int divisor)
+@   Calculate and return the remainder of the (signed) division.
+
+	.p2align 3
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__modsi3)
+#else
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+#endif
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divzero)
+	sdiv	r2, r0, r1
+	mls 	r0, r2, r1, r0
+	bx      lr
+LOCAL_LABEL(divzero):
+	mov     r0, #0
+	bx      lr
+#else
+    ESTABLISH_FRAME
+    //  Set aside the sign of the dividend.
+    mov     r4,     r0
+    //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+    eor     r2,     r0, r0, asr #31
+    eor     r3,     r1, r1, asr #31
+    sub     r0,     r2, r0, asr #31
+    sub     r1,     r3, r1, asr #31
+    //  abs(a) % abs(b)
+    bl     SYMBOL_NAME(__umodsi3)
+    //  Apply sign of dividend to result and return.
+    eor     r0,     r0, r4, asr #31
+    sub     r0,     r0, r4, asr #31
+    CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__modsi3)
diff --git a/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S
index 358051c55a..636cc711ac 100644
--- a/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/muldf3vfp.S
@@ -1,26 +1,26 @@
-//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __muldf3vfp(double a, double b); 
-// 
-// Multiplies two double precision floating point numbers using the Darwin 
-// calling convention where double arguments are passsed in GPR pairs 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) 
-	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6 
-	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7 
-	vmul.f64 d6, d6, d7		 
-	vmov 	r0, r1, d6         // move result back to r0/r1 pair 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__muldf3vfp) 
+//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __muldf3vfp(double a, double b);
+//
+// Multiplies two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
+	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	vmul.f64 d6, d6, d7		
+	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+	bx	lr
+END_COMPILERRT_FUNCTION(__muldf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S
index f43b3dc033..7f4008266b 100644
--- a/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/mulsf3vfp.S
@@ -1,26 +1,26 @@
-//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __mulsf3vfp(float a, float b); 
-// 
-// Multiplies two single precision floating point numbers using the Darwin 
-// calling convention where single arguments are passsed like 32-bit ints. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) 
-	vmov	s14, r0		// move first param from r0 into float register 
-	vmov	s15, r1		// move second param from r1 into float register 
-	vmul.f32 s13, s14, s15 
-	vmov	r0, s13		// move result back to r0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__mulsf3vfp) 
+//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __mulsf3vfp(float a, float b);
+//
+// Multiplies two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed like 32-bit ints.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vmul.f32 s13, s14, s15
+	vmov	r0, s13		// move result back to r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__mulsf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S
index 803cf49220..7ab2f5501c 100644
--- a/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/nedf2vfp.S
@@ -1,29 +1,29 @@
-//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __nedf2vfp(double a, double b); 
-// 
-// Returns zero if a and b are unequal and neither is NaN. 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) 
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7		 
-	vmrs	apsr_nzcv, fpscr 
-	movne	r0, #1		// set result register to 0 if unequal 
-	moveq	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__nedf2vfp) 
+//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __nedf2vfp(double a, double b);
+//
+// Returns zero if a and b are unequal and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+	vmrs	apsr_nzcv, fpscr
+	movne	r0, #1		// set result register to 0 if unequal
+	moveq	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__nedf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S
index 742672acbc..56d73c6761 100644
--- a/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/negdf2vfp.S
@@ -1,23 +1,23 @@
-//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __negdf2vfp(double a, double b); 
-// 
-// Returns the negation a double precision floating point numbers using the  
-// Darwin calling convention where double arguments are passsed in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) 
-	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__negdf2vfp) 
+//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __negdf2vfp(double a, double b);
+//
+// Returns the negation a double precision floating point numbers using the 
+// Darwin calling convention where double arguments are passsed in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
+	bx	lr
+END_COMPILERRT_FUNCTION(__negdf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S
index 1951043ba3..a6e32e1ff8 100644
--- a/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/negsf2vfp.S
@@ -1,23 +1,23 @@
-//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __negsf2vfp(float a); 
-// 
-// Returns the negation of a single precision floating point numbers using the  
-// Darwin calling convention where single arguments are passsed like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) 
-	eor	r0, r0, #-2147483648	// flip sign bit on float in r0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__negsf2vfp) 
+//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __negsf2vfp(float a);
+//
+// Returns the negation of a single precision floating point numbers using the 
+// Darwin calling convention where single arguments are passsed like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__negsf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S
index 02a726e602..9fe8ecdefb 100644
--- a/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/nesf2vfp.S
@@ -1,29 +1,29 @@
-//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __nesf2vfp(float a, float b); 
-// 
-// Returns one iff a != b and neither is NaN. 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) 
-	vmov	s14, r0	    // move from GPR 0 to float register 
-	vmov	s15, r1	    // move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	movne	r0, #1      // set result register to 1 if unequal 
-	moveq	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__nesf2vfp) 
+//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __nesf2vfp(float a, float b);
+//
+// Returns one iff a != b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+	vmov	s14, r0	    // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	movne	r0, #1      // set result register to 1 if unequal
+	moveq	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__nesf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S b/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S
index a4866cc6f2..0f6ea51361 100644
--- a/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S
+++ b/contrib/libs/cxxsupp/builtins/arm/restore_vfp_d8_d15_regs.S
@@ -1,33 +1,33 @@
-//===-- save_restore_regs.S - Implement save/restore* ---------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling C++ functions that need to handle thrown exceptions the 
-// compiler is required to save all registers and call __Unwind_SjLj_Register 
-// in the function prolog.  But when compiling for thumb1, there are 
-// no instructions to access the floating point registers, so the 
-// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs 
-// written in ARM to save the float registers.  In the epilog, the compiler 
-// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. 
-// 
- 
-	.text 
-	.syntax unified 
- 
-// 
-// Restore registers d8-d15 from stack 
-// 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) 
-	vldmia	sp!, {d8-d15}           // pop registers d8-d15 off stack 
-	bx      lr                      // return to prolog 
-END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs) 
- 
+//===-- save_restore_regs.S - Implement save/restore* ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling C++ functions that need to handle thrown exceptions the
+// compiler is required to save all registers and call __Unwind_SjLj_Register
+// in the function prolog.  But when compiling for thumb1, there are
+// no instructions to access the floating point registers, so the
+// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs
+// written in ARM to save the float registers.  In the epilog, the compiler
+// must also add a call to __restore_vfp_d8_d15_regs to restore those registers.
+//
+
+	.text
+	.syntax unified
+
+//
+// Restore registers d8-d15 from stack
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs)
+	vldmia	sp!, {d8-d15}           // pop registers d8-d15 off stack
+	bx      lr                      // return to prolog
+END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S b/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S
index 9260d9c9a0..f1d90e7580 100644
--- a/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S
+++ b/contrib/libs/cxxsupp/builtins/arm/save_vfp_d8_d15_regs.S
@@ -1,33 +1,33 @@
-//===-- save_restore_regs.S - Implement save/restore* ---------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling C++ functions that need to handle thrown exceptions the 
-// compiler is required to save all registers and call __Unwind_SjLj_Register 
-// in the function prolog.  But when compiling for thumb1, there are 
-// no instructions to access the floating point registers, so the 
-// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs 
-// written in ARM to save the float registers.  In the epilog, the compiler 
-// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. 
-// 
- 
-	.text 
-	.syntax unified 
- 
-// 
-// Save registers d8-d15 onto stack 
-// 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) 
-	vstmdb	sp!, {d8-d15}           // push registers d8-d15 onto stack 
-	bx      lr                      // return to prolog 
-END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs) 
- 
+//===-- save_restore_regs.S - Implement save/restore* ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling C++ functions that need to handle thrown exceptions the
+// compiler is required to save all registers and call __Unwind_SjLj_Register
+// in the function prolog.  But when compiling for thumb1, there are
+// no instructions to access the floating point registers, so the
+// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs
+// written in ARM to save the float registers.  In the epilog, the compiler
+// must also add a call to __restore_vfp_d8_d15_regs to restore those registers.
+//
+
+	.text
+	.syntax unified
+
+//
+// Save registers d8-d15 onto stack
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs)
+	vstmdb	sp!, {d8-d15}           // push registers d8-d15 onto stack
+	bx      lr                      // return to prolog
+END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list b/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list
index 6144dd5769..cc6a4b3cdd 100644
--- a/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list
+++ b/contrib/libs/cxxsupp/builtins/arm/softfloat-alias.list
@@ -1,21 +1,21 @@
-# 
-# These are soft float functions which can be  
-# aliased to the *vfp functions on arm processors 
-# that support floating point instructions. 
-# 
-___adddf3vfp		___adddf3 
-___addsf3vfp		___addsf3 
-___divdf3vfp		___divdf3 
-___divsf3vfp		___divsf3 
-___extendsfdf2vfp	___extendsfdf2 
-___fixdfsivfp		___fixdfsi 
-___fixsfsivfp		___fixsfsi 
-___floatsidfvfp		___floatsidf 
-___floatsisfvfp		___floatsisf 
-___muldf3vfp		___muldf3 
-___mulsf3vfp		___mulsf3 
-___subdf3vfp		___subdf3 
-___subsf3vfp		___subsf3 
-___truncdfsf2vfp	___truncdfsf2 
-___floatunssidfvfp	___floatunsidf 
-___floatunssisfvfp	___floatunsisf 
+#
+# These are soft float functions which can be 
+# aliased to the *vfp functions on arm processors
+# that support floating point instructions.
+#
+___adddf3vfp		___adddf3
+___addsf3vfp		___addsf3
+___divdf3vfp		___divdf3
+___divsf3vfp		___divsf3
+___extendsfdf2vfp	___extendsfdf2
+___fixdfsivfp		___fixdfsi
+___fixsfsivfp		___fixsfsi
+___floatsidfvfp		___floatsidf
+___floatsisfvfp		___floatsisf
+___muldf3vfp		___muldf3
+___mulsf3vfp		___mulsf3
+___subdf3vfp		___subdf3
+___subsf3vfp		___subsf3
+___truncdfsf2vfp	___truncdfsf2
+___floatunssidfvfp	___floatunsidf
+___floatunssisfvfp	___floatunsisf
diff --git a/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S
index b9d628a006..5f3c0f70db 100644
--- a/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/subdf3vfp.S
@@ -1,26 +1,26 @@
-//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern double __subdf3vfp(double a, double b); 
-// 
-// Returns difference between two double precision floating point numbers using  
-// the Darwin calling convention where double arguments are passsed in GPR pairs 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) 
-	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6 
-	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7 
-	vsub.f64 d6, d6, d7		 
-	vmov 	r0, r1, d6         // move result back to r0/r1 pair 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__subdf3vfp) 
+//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __subdf3vfp(double a, double b);
+//
+// Returns difference between two double precision floating point numbers using 
+// the Darwin calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
+	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	vsub.f64 d6, d6, d7		
+	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+	bx	lr
+END_COMPILERRT_FUNCTION(__subdf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S b/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S
index 8d8e3d6155..d6e06df519 100644
--- a/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/subsf3vfp.S
@@ -1,27 +1,27 @@
-//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __subsf3vfp(float a, float b); 
-// 
-// Returns the difference between two single precision floating point numbers  
-// using the Darwin calling convention where single arguments are passsed 
-// like 32-bit ints. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) 
-	vmov	s14, r0		// move first param from r0 into float register 
-	vmov	s15, r1		// move second param from r1 into float register 
-	vsub.f32 s14, s14, s15 
-	vmov	r0, s14		// move result back to r0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__subsf3vfp) 
+//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __subsf3vfp(float a, float b);
+//
+// Returns the difference between two single precision floating point numbers 
+// using the Darwin calling convention where single arguments are passsed
+// like 32-bit ints.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vsub.f32 s14, s14, s15
+	vmov	r0, s14		// move result back to r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__subsf3vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/switch16.S b/contrib/libs/cxxsupp/builtins/arm/switch16.S
index 4350d4fee3..3c3a6b1061 100644
--- a/contrib/libs/cxxsupp/builtins/arm/switch16.S
+++ b/contrib/libs/cxxsupp/builtins/arm/switch16.S
@@ -1,44 +1,44 @@
-//===-- switch.S - Implement switch* --------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling switch statements in thumb mode, the compiler 
-// can use these __switch* helper functions  The compiler emits a blx to 
-// the __switch* function followed by a table of displacements for each 
-// case statement.  On entry, R0 is the index into the table. The __switch* 
-// function uses the return address in lr to find the start of the table. 
-// The first entry in the table is the count of the entries in the table. 
-// It then uses R0 to index into the table and get the displacement of the 
-// address to jump to.  If R0 is greater than the size of the table, it jumps 
-// to the last entry in the table. Each displacement in the table is actually 
-// the distance from lr to the label, thus making the tables PIC. 
- 
- 
-	.text 
-	.syntax unified 
- 
-// 
-// The table contains signed 2-byte sized elements which are 1/2 the distance 
-// from lr to the target label. 
-// 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) 
-	ldrh    ip, [lr, #-1]           // get first 16-bit word in table 
-	cmp     r0, ip                  // compare with index 
-	add     r0, lr, r0, lsl #1      // compute address of element in table 
-	add     ip, lr, ip, lsl #1      // compute address of last element in table 
-	ite lo 
-	ldrshlo r0, [r0, #1]            // load 16-bit element if r0 is in range 
-	ldrshhs r0, [ip, #1]            // load 16-bit element if r0 out of range 
-	add     ip, lr, r0, lsl #1      // compute label = lr + element*2 
-	bx      ip                      // jump to computed label 
-END_COMPILERRT_FUNCTION(__switch16) 
- 
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains signed 2-byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16)
+	ldrh    ip, [lr, #-1]           // get first 16-bit word in table
+	cmp     r0, ip                  // compare with index
+	add     r0, lr, r0, lsl #1      // compute address of element in table
+	add     ip, lr, ip, lsl #1      // compute address of last element in table
+	ite lo
+	ldrshlo r0, [r0, #1]            // load 16-bit element if r0 is in range
+	ldrshhs r0, [ip, #1]            // load 16-bit element if r0 out of range
+	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
+	bx      ip                      // jump to computed label
+END_COMPILERRT_FUNCTION(__switch16)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/switch32.S b/contrib/libs/cxxsupp/builtins/arm/switch32.S
index 42e5782eb2..b38cd2b764 100644
--- a/contrib/libs/cxxsupp/builtins/arm/switch32.S
+++ b/contrib/libs/cxxsupp/builtins/arm/switch32.S
@@ -1,44 +1,44 @@
-//===-- switch.S - Implement switch* --------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling switch statements in thumb mode, the compiler 
-// can use these __switch* helper functions  The compiler emits a blx to 
-// the __switch* function followed by a table of displacements for each 
-// case statement.  On entry, R0 is the index into the table. The __switch* 
-// function uses the return address in lr to find the start of the table. 
-// The first entry in the table is the count of the entries in the table. 
-// It then uses R0 to index into the table and get the displacement of the 
-// address to jump to.  If R0 is greater than the size of the table, it jumps 
-// to the last entry in the table. Each displacement in the table is actually 
-// the distance from lr to the label, thus making the tables PIC. 
- 
- 
-	.text 
-	.syntax unified 
- 
-// 
-// The table contains signed 4-byte sized elements which are the distance 
-// from lr to the target label. 
-// 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) 
-	ldr     ip, [lr, #-1]            // get first 32-bit word in table 
-	cmp     r0, ip                   // compare with index 
-	add     r0, lr, r0, lsl #2       // compute address of element in table 
-	add     ip, lr, ip, lsl #2       // compute address of last element in table 
-	ite lo 
-	ldrlo   r0, [r0, #3]             // load 32-bit element if r0 is in range 
-	ldrhs   r0, [ip, #3]             // load 32-bit element if r0 out of range 
-	add     ip, lr, r0               // compute label = lr + element 
-	bx      ip                       // jump to computed label 
-END_COMPILERRT_FUNCTION(__switch32) 
- 
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains signed 4-byte sized elements which are the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32)
+	ldr     ip, [lr, #-1]            // get first 32-bit word in table
+	cmp     r0, ip                   // compare with index
+	add     r0, lr, r0, lsl #2       // compute address of element in table
+	add     ip, lr, ip, lsl #2       // compute address of last element in table
+	ite lo
+	ldrlo   r0, [r0, #3]             // load 32-bit element if r0 is in range
+	ldrhs   r0, [ip, #3]             // load 32-bit element if r0 out of range
+	add     ip, lr, r0               // compute label = lr + element
+	bx      ip                       // jump to computed label
+END_COMPILERRT_FUNCTION(__switch32)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/switch8.S b/contrib/libs/cxxsupp/builtins/arm/switch8.S
index d33e545c73..d7c20423de 100644
--- a/contrib/libs/cxxsupp/builtins/arm/switch8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/switch8.S
@@ -1,42 +1,42 @@
-//===-- switch.S - Implement switch* --------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling switch statements in thumb mode, the compiler 
-// can use these __switch* helper functions  The compiler emits a blx to 
-// the __switch* function followed by a table of displacements for each 
-// case statement.  On entry, R0 is the index into the table. The __switch* 
-// function uses the return address in lr to find the start of the table. 
-// The first entry in the table is the count of the entries in the table. 
-// It then uses R0 to index into the table and get the displacement of the 
-// address to jump to.  If R0 is greater than the size of the table, it jumps 
-// to the last entry in the table. Each displacement in the table is actually 
-// the distance from lr to the label, thus making the tables PIC. 
- 
- 
-	.text 
-	.syntax unified 
- 
-// 
-// The table contains signed byte sized elements which are 1/2 the distance 
-// from lr to the target label. 
-// 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) 
-	ldrb    ip, [lr, #-1]           // get first byte in table 
-	cmp     r0, ip                  // signed compare with index 
-	ite lo 
-	ldrsblo r0, [lr, r0]            // get indexed byte out of table 
-	ldrsbhs r0, [lr, ip]            // if out of range, use last entry in table 
-	add     ip, lr, r0, lsl #1      // compute label = lr + element*2 
-	bx      ip                      // jump to computed label 
-END_COMPILERRT_FUNCTION(__switch8) 
- 
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains signed byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8)
+	ldrb    ip, [lr, #-1]           // get first byte in table
+	cmp     r0, ip                  // signed compare with index
+	ite lo
+	ldrsblo r0, [lr, r0]            // get indexed byte out of table
+	ldrsbhs r0, [lr, ip]            // if out of range, use last entry in table
+	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
+	bx      ip                      // jump to computed label
+END_COMPILERRT_FUNCTION(__switch8)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/switchu8.S b/contrib/libs/cxxsupp/builtins/arm/switchu8.S
index af7ebedf83..1844f11c60 100644
--- a/contrib/libs/cxxsupp/builtins/arm/switchu8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/switchu8.S
@@ -1,42 +1,42 @@
-//===-- switch.S - Implement switch* --------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling switch statements in thumb mode, the compiler 
-// can use these __switch* helper functions  The compiler emits a blx to 
-// the __switch* function followed by a table of displacements for each 
-// case statement.  On entry, R0 is the index into the table. The __switch* 
-// function uses the return address in lr to find the start of the table. 
-// The first entry in the table is the count of the entries in the table. 
-// It then uses R0 to index into the table and get the displacement of the 
-// address to jump to.  If R0 is greater than the size of the table, it jumps 
-// to the last entry in the table. Each displacement in the table is actually 
-// the distance from lr to the label, thus making the tables PIC. 
- 
- 
-	.text 
-	.syntax unified 
- 
-// 
-// The table contains unsigned byte sized elements which are 1/2 the distance 
-// from lr to the target label. 
-// 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) 
-	ldrb    ip, [lr, #-1]           // get first byte in table 
-	cmp     r0, ip                  // compare with index 
-	ite lo 
-	ldrblo  r0, [lr, r0]            // get indexed byte out of table 
-	ldrbhs  r0, [lr, ip]            // if out of range, use last entry in table 
-	add     ip, lr, r0, lsl #1      // compute label = lr + element*2 
-	bx      ip                      // jump to computed label 
-END_COMPILERRT_FUNCTION(__switchu8) 
- 
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains unsigned byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8)
+	ldrb    ip, [lr, #-1]           // get first byte in table
+	cmp     r0, ip                  // compare with index
+	ite lo
+	ldrblo  r0, [lr, r0]            // get indexed byte out of table
+	ldrbhs  r0, [lr, ip]            // if out of range, use last entry in table
+	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
+	bx      ip                      // jump to computed label
+END_COMPILERRT_FUNCTION(__switchu8)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync-ops.h b/contrib/libs/cxxsupp/builtins/arm/sync-ops.h
index 17d617ae4c..ee02c30c6e 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync-ops.h
+++ b/contrib/libs/cxxsupp/builtins/arm/sync-ops.h
@@ -1,64 +1,64 @@
-/*===-- sync-ops.h - --===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements outline macros for the __sync_fetch_and_* 
- * operations. Different instantiations will generate appropriate assembly for 
- * ARM and Thumb-2 versions of the functions. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-#define SYNC_OP_4(op) \ 
-        .p2align 2 ; \ 
-        .thumb ; \ 
-        .syntax unified ; \ 
-        DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ 
-        dmb ; \ 
-        mov r12, r0 ; \ 
-        LOCAL_LABEL(tryatomic_ ## op): \ 
-        ldrex r0, [r12] ; \ 
-        op(r2, r0, r1) ; \ 
-        strex r3, r2, [r12] ; \ 
-        cmp r3, #0 ; \ 
-        bne LOCAL_LABEL(tryatomic_ ## op) ; \ 
-        dmb ; \ 
-        bx lr 
- 
-#define SYNC_OP_8(op) \ 
-        .p2align 2 ; \ 
-        .thumb ; \ 
-        .syntax unified ; \ 
-        DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ 
-        push {r4, r5, r6, lr} ; \ 
-        dmb ; \ 
-        mov r12, r0 ; \ 
-        LOCAL_LABEL(tryatomic_ ## op): \ 
-        ldrexd r0, r1, [r12] ; \ 
-        op(r4, r5, r0, r1, r2, r3) ; \ 
-        strexd r6, r4, r5, [r12] ; \ 
-        cmp r6, #0 ; \ 
-        bne LOCAL_LABEL(tryatomic_ ## op) ; \ 
-        dmb ; \ 
-        pop {r4, r5, r6, pc} 
- 
-#define MINMAX_4(rD, rN, rM, cmp_kind) \ 
-        cmp rN, rM ; \ 
-        mov rD, rM ; \ 
-        it cmp_kind ; \ 
-        mov##cmp_kind rD, rN 
- 
-#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ 
-        cmp rN_LO, rM_LO ; \ 
-        sbcs rN_HI, rM_HI ; \ 
-        mov rD_LO, rM_LO ; \ 
-        mov rD_HI, rM_HI ; \ 
-        itt cmp_kind ; \ 
-        mov##cmp_kind rD_LO, rN_LO ; \ 
-        mov##cmp_kind rD_HI, rN_HI 
+/*===-- sync-ops.h - --===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements outline macros for the __sync_fetch_and_*
+ * operations. Different instantiations will generate appropriate assembly for
+ * ARM and Thumb-2 versions of the functions.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define SYNC_OP_4(op) \
+        .p2align 2 ; \
+        .thumb ; \
+        .syntax unified ; \
+        DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
+        dmb ; \
+        mov r12, r0 ; \
+        LOCAL_LABEL(tryatomic_ ## op): \
+        ldrex r0, [r12] ; \
+        op(r2, r0, r1) ; \
+        strex r3, r2, [r12] ; \
+        cmp r3, #0 ; \
+        bne LOCAL_LABEL(tryatomic_ ## op) ; \
+        dmb ; \
+        bx lr
+
+#define SYNC_OP_8(op) \
+        .p2align 2 ; \
+        .thumb ; \
+        .syntax unified ; \
+        DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
+        push {r4, r5, r6, lr} ; \
+        dmb ; \
+        mov r12, r0 ; \
+        LOCAL_LABEL(tryatomic_ ## op): \
+        ldrexd r0, r1, [r12] ; \
+        op(r4, r5, r0, r1, r2, r3) ; \
+        strexd r6, r4, r5, [r12] ; \
+        cmp r6, #0 ; \
+        bne LOCAL_LABEL(tryatomic_ ## op) ; \
+        dmb ; \
+        pop {r4, r5, r6, pc}
+
+#define MINMAX_4(rD, rN, rM, cmp_kind) \
+        cmp rN, rM ; \
+        mov rD, rM ; \
+        it cmp_kind ; \
+        mov##cmp_kind rD, rN
+
+#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \
+        cmp rN_LO, rM_LO ; \
+        sbcs rN_HI, rM_HI ; \
+        mov rD_LO, rM_LO ; \
+        mov rD_HI, rM_HI ; \
+        itt cmp_kind ; \
+        mov##cmp_kind rD_LO, rN_LO ; \
+        mov##cmp_kind rD_HI, rN_HI
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S
index a9b7ef04a7..54c33e2d26 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_4.S
@@ -1,21 +1,21 @@
-/*===-- sync_fetch_and_add_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_add_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-/* "adds" is 2 bytes shorter than "add". */ 
-#define add_4(rD, rN, rM)  add rD, rN, rM 
- 
-SYNC_OP_4(add_4) 
- 
+/*===-- sync_fetch_and_add_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "adds" is 2 bytes shorter than "add". */
+#define add_4(rD, rN, rM)  add rD, rN, rM
+
+SYNC_OP_4(add_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S
index 274c2989b5..5724bb148b 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_add_8.S
@@ -1,24 +1,24 @@
-/*===-- sync_fetch_and_add_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_add_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ 
-    adds rD_LO, rN_LO, rM_LO ; \ 
-    adc rD_HI, rN_HI, rM_HI 
- 
-SYNC_OP_8(add_8) 
-#endif 
- 
+/*===-- sync_fetch_and_add_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    adds rD_LO, rN_LO, rM_LO ; \
+    adc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(add_8)
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S
index b7620e6e10..e2b77a1a87 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_4.S
@@ -1,19 +1,19 @@
-/*===-- sync_fetch_and_and_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_and_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define and_4(rD, rN, rM)  and rD, rN, rM 
- 
-SYNC_OP_4(and_4) 
+/*===-- sync_fetch_and_and_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define and_4(rD, rN, rM)  and rD, rN, rM
+
+SYNC_OP_4(and_4)
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S
index 29c4986a90..a74163a860 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_and_8.S
@@ -1,23 +1,23 @@
-/*===-- sync_fetch_and_and_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_and_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ 
-    and rD_LO, rN_LO, rM_LO ; \ 
-    and rD_HI, rN_HI, rM_HI 
- 
-SYNC_OP_8(and_8) 
-#endif 
+/*===-- sync_fetch_and_and_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    and rD_LO, rN_LO, rM_LO ; \
+    and rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(and_8)
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S
index 6d16be3ec1..01e4f444c2 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_max_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_max_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define max_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, gt) 
- 
-SYNC_OP_4(max_4) 
- 
+/*===-- sync_fetch_and_max_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define max_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, gt)
+
+SYNC_OP_4(max_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S
index 68a186e9bf..1eef2b2236 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_max_8.S
@@ -1,21 +1,21 @@
-/*===-- sync_fetch_and_max_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_max_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt) 
- 
-SYNC_OP_8(max_8) 
-#endif 
+/*===-- sync_fetch_and_max_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt)
+
+SYNC_OP_8(max_8)
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S
index 5333413c46..015626b63d 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_min_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_min_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt) 
- 
-SYNC_OP_4(min_4) 
- 
+/*===-- sync_fetch_and_min_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt)
+
+SYNC_OP_4(min_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S
index 18b7811f23..ad5cce0754 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_min_8.S
@@ -1,21 +1,21 @@
-/*===-- sync_fetch_and_min_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_min_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt) 
- 
-SYNC_OP_8(min_8) 
-#endif 
+/*===-- sync_fetch_and_min_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt)
+
+SYNC_OP_8(min_8)
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S
index f4f764ffce..b32a314b39 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_nand_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define nand_4(rD, rN, rM)  bic rD, rN, rM 
- 
-SYNC_OP_4(nand_4) 
- 
+/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define nand_4(rD, rN, rM)  bic rD, rN, rM
+
+SYNC_OP_4(nand_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S
index 2f8f707d19..a2c17c09c0 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_nand_8.S
@@ -1,24 +1,24 @@
-/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_nand_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ 
-    bic rD_LO, rN_LO, rM_LO ; \ 
-    bic rD_HI, rN_HI, rM_HI 
- 
-SYNC_OP_8(nand_8) 
-#endif 
- 
+/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    bic rD_LO, rN_LO, rM_LO ; \
+    bic rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(nand_8)
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S
index 30b2589013..f2e08576aa 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_or_4.S - -------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_or_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define or_4(rD, rN, rM)  orr rD, rN, rM 
- 
-SYNC_OP_4(or_4) 
- 
+/*===-- sync_fetch_and_or_4.S - -------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define or_4(rD, rN, rM)  orr rD, rN, rM
+
+SYNC_OP_4(or_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S
index 16af58e257..87b940bf62 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_or_8.S
@@ -1,24 +1,24 @@
-/*===-- sync_fetch_and_or_8.S - -------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_or_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ 
-    orr rD_LO, rN_LO, rM_LO ; \ 
-    orr rD_HI, rN_HI, rM_HI 
- 
-SYNC_OP_8(or_8) 
-#endif 
- 
+/*===-- sync_fetch_and_or_8.S - -------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    orr rD_LO, rN_LO, rM_LO ; \
+    orr rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(or_8)
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S
index 3e9d8a2e1a..460b2bc1ed 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_4.S
@@ -1,21 +1,21 @@
-/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_sub_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-/* "subs" is 2 bytes shorter than "sub". */ 
-#define sub_4(rD, rN, rM)  sub rD, rN, rM 
- 
-SYNC_OP_4(sub_4) 
- 
+/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "subs" is 2 bytes shorter than "sub". */
+#define sub_4(rD, rN, rM)  sub rD, rN, rM
+
+SYNC_OP_4(sub_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S
index 4dd26a5bfc..a8035a2768 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_sub_8.S
@@ -1,24 +1,24 @@
-/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_sub_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ 
-    subs rD_LO, rN_LO, rM_LO ; \ 
-    sbc rD_HI, rN_HI, rM_HI 
- 
-SYNC_OP_8(sub_8) 
-#endif 
- 
+/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    subs rD_LO, rN_LO, rM_LO ; \
+    sbc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(sub_8)
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S
index 627e7b72d7..c591530319 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_umax_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define umax_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, hi) 
- 
-SYNC_OP_4(umax_4) 
- 
+/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umax_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, hi)
+
+SYNC_OP_4(umax_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S
index 633538e4de..d9b7965e52 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umax_8.S
@@ -1,21 +1,21 @@
-/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_umax_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi) 
- 
-SYNC_OP_8(umax_8) 
-#endif 
+/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi)
+
+SYNC_OP_8(umax_8)
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S
index 90c9c101d8..9f3896fca8 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_umin_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo) 
- 
-SYNC_OP_4(umin_4) 
- 
+/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo)
+
+SYNC_OP_4(umin_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S
index 6c6214ef84..7bf5e23565 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_umin_8.S
@@ -1,21 +1,21 @@
-/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_umin_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo) 
- 
-SYNC_OP_8(umin_8) 
-#endif 
+/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo)
+
+SYNC_OP_8(umin_8)
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S
index 5c42ff54c7..7e7c90c962 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_4.S
@@ -1,20 +1,20 @@
-/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_xor_4 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#define xor_4(rD, rN, rM)  eor rD, rN, rM 
- 
-SYNC_OP_4(xor_4) 
- 
+/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define xor_4(rD, rN, rM)  eor rD, rN, rM
+
+SYNC_OP_4(xor_4)
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S
index 34ae23ba00..ea9aa6d4b0 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_fetch_and_xor_8.S
@@ -1,24 +1,24 @@
-/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __sync_fetch_and_xor_8 function for the ARM 
- * architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "sync-ops.h" 
- 
-#if __ARM_ARCH_PROFILE != 'M' 
-#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ 
-    eor rD_LO, rN_LO, rM_LO ; \ 
-    eor rD_HI, rN_HI, rM_HI 
- 
-SYNC_OP_8(xor_8) 
-#endif 
- 
+/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    eor rD_LO, rN_LO, rM_LO ; \
+    eor rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(xor_8)
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S b/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S
index 8d400bcfcb..178f24534c 100644
--- a/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S
+++ b/contrib/libs/cxxsupp/builtins/arm/sync_synchronize.S
@@ -1,35 +1,35 @@
-//===-- sync_synchronize - Implement memory barrier * ----------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode 
-// the compiler may emit a call to __sync_synchronize.   
-// On Darwin the implementation jumps to an OS supplied function named  
-// OSMemoryBarrier 
-// 
- 
-	.text 
-	.syntax unified 
- 
-#if __APPLE__ 
- 
-	.p2align 2 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) 
-	stmfd	sp!, {r7, lr} 
-	add		r7, sp, #0 
-	bl		_OSMemoryBarrier 
-	ldmfd	sp!, {r7, pc} 
-END_COMPILERRT_FUNCTION(__sync_synchronize) 
- 
-	// tell linker it can break up file at label boundaries 
-	.subsections_via_symbols 
-		 
-#endif 
+//===-- sync_synchronize - Implement memory barrier * ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode
+// the compiler may emit a call to __sync_synchronize.  
+// On Darwin the implementation jumps to an OS supplied function named 
+// OSMemoryBarrier
+//
+
+	.text
+	.syntax unified
+
+#if __APPLE__
+
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize)
+	stmfd	sp!, {r7, lr}
+	add		r7, sp, #0
+	bl		_OSMemoryBarrier
+	ldmfd	sp!, {r7, pc}
+END_COMPILERRT_FUNCTION(__sync_synchronize)
+
+	// tell linker it can break up file at label boundaries
+	.subsections_via_symbols
+		
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S
index a138dfceb2..fa4362c45e 100644
--- a/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/truncdfsf2vfp.S
@@ -1,26 +1,26 @@
-//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern float __truncdfsf2vfp(double a); 
-// 
-// Converts double precision float to signle precision result. 
-// Uses Darwin calling convention where a double precision parameter is  
-// passed in a R0/R1 pair and a signle precision result is returned in R0. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) 
-	vmov 	d7, r0, r1   // load double from r0/r1 pair 
-	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) 
-	vmov 	r0, s15      // return result in r0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__truncdfsf2vfp) 
+//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __truncdfsf2vfp(double a);
+//
+// Converts double precision float to signle precision result.
+// Uses Darwin calling convention where a double precision parameter is 
+// passed in a R0/R1 pair and a signle precision result is returned in R0.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+	vmov 	d7, r0, r1   // load double from r0/r1 pair
+	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
+	vmov 	r0, s15      // return result in r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S b/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S
index ae23f19753..85b84936c4 100644
--- a/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S
+++ b/contrib/libs/cxxsupp/builtins/arm/udivmodsi4.S
@@ -1,184 +1,184 @@
-/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __udivmodsi4 (32-bit unsigned integer divide and 
- * modulus) function for the ARM 32-bit architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
- 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, 
-@                           unsigned int *remainder) 
-@   Calculate the quotient and remainder of the (unsigned) division.  The return 
-@   value is the quotient, the remainder is placed in the variable. 
- 
-	.p2align 2 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 
-#endif 
-#if __ARM_ARCH_EXT_IDIV__ 
-	tst     r1, r1 
-	beq     LOCAL_LABEL(divby0) 
-	mov 	r3, r0 
-	udiv	r0, r3, r1 
-	mls 	r1, r0, r1, r3 
-	str 	r1, [r2] 
-	bx  	lr 
-#else 
-	cmp	r1, #1 
-	bcc	LOCAL_LABEL(divby0) 
-	beq	LOCAL_LABEL(divby1) 
-	cmp	r0, r1 
-	bcc	LOCAL_LABEL(quotient0) 
-	/* 
-	 * Implement division using binary long division algorithm. 
-	 * 
-	 * r0 is the numerator, r1 the denominator. 
-	 * 
-	 * The code before JMP computes the correct shift I, so that 
-	 * r0 and (r1 << I) have the highest bit set in the same position. 
-	 * At the time of JMP, ip := .Ldiv0block - 12 * I. 
-	 * This depends on the fixed instruction size of block. 
-	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 
-	 * 
-	 * block(shift) implements the test-and-update-quotient core. 
-	 * It assumes (r0 << shift) can be computed without overflow and 
-	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 
-	 */ 
- 
-#  ifdef __ARM_FEATURE_CLZ 
-	clz	ip, r0 
-	clz	r3, r1 
-	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 
-	sub	r3, r3, ip 
-#    if __ARM_ARCH_ISA_THUMB == 2 
-	adr	ip, LOCAL_LABEL(div0block) + 1 
-	sub	ip, ip, r3, lsl #1 
-#    else 
-	adr	ip, LOCAL_LABEL(div0block) 
-#    endif 
-	sub	ip, ip, r3, lsl #2 
-	sub	ip, ip, r3, lsl #3 
-	mov	r3, #0 
-	bx	ip 
-#  else 
-#    if __ARM_ARCH_ISA_THUMB == 2 
-#    error THUMB mode requires CLZ or UDIV 
-#    endif 
-	str	r4, [sp, #-8]! 
- 
-	mov	r4, r0 
-	adr	ip, LOCAL_LABEL(div0block) 
- 
-	lsr	r3, r4, #16 
-	cmp	r3, r1 
-	movhs	r4, r3 
-	subhs	ip, ip, #(16 * 12) 
- 
-	lsr	r3, r4, #8 
-	cmp	r3, r1 
-	movhs	r4, r3 
-	subhs	ip, ip, #(8 * 12) 
- 
-	lsr	r3, r4, #4 
-	cmp	r3, r1 
-	movhs	r4, r3 
-	subhs	ip, #(4 * 12) 
- 
-	lsr	r3, r4, #2 
-	cmp	r3, r1 
-	movhs	r4, r3 
-	subhs	ip, ip, #(2 * 12) 
- 
-	/* Last block, no need to update r3 or r4. */ 
-	cmp	r1, r4, lsr #1 
-	subls	ip, ip, #(1 * 12) 
- 
-	ldr	r4, [sp], #8	/* restore r4, we are done with it. */ 
-	mov	r3, #0 
- 
-	JMP(ip) 
-#  endif 
- 
-#define	IMM	# 
- 
-#define block(shift)                                                           \ 
-	cmp	r0, r1, lsl IMM shift;                                         \ 
-	ITT(hs);                                                               \ 
-	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \ 
-	WIDE(subhs)	r0, r0, r1, lsl IMM shift 
- 
-	block(31) 
-	block(30) 
-	block(29) 
-	block(28) 
-	block(27) 
-	block(26) 
-	block(25) 
-	block(24) 
-	block(23) 
-	block(22) 
-	block(21) 
-	block(20) 
-	block(19) 
-	block(18) 
-	block(17) 
-	block(16) 
-	block(15) 
-	block(14) 
-	block(13) 
-	block(12) 
-	block(11) 
-	block(10) 
-	block(9) 
-	block(8) 
-	block(7) 
-	block(6) 
-	block(5) 
-	block(4) 
-	block(3) 
-	block(2) 
-	block(1) 
-LOCAL_LABEL(div0block): 
-	block(0) 
- 
-	str	r0, [r2] 
-	mov	r0, r3 
-	JMP(lr) 
- 
-LOCAL_LABEL(quotient0): 
-	str	r0, [r2] 
-	mov	r0, #0 
-	JMP(lr) 
- 
-LOCAL_LABEL(divby1): 
-	mov	r3, #0 
-	str	r3, [r2] 
-	JMP(lr) 
-#endif /* __ARM_ARCH_EXT_IDIV__ */ 
- 
-LOCAL_LABEL(divby0): 
-	mov	r0, #0 
-#ifdef __ARM_EABI__ 
-	b	__aeabi_idiv0 
-#else 
-	JMP(lr) 
-#endif 
- 
-END_COMPILERRT_FUNCTION(__udivmodsi4) 
+/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
+ * modulus) function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
+@                           unsigned int *remainder)
+@   Calculate the quotient and remainder of the (unsigned) division.  The return
+@   value is the quotient, the remainder is placed in the variable.
+
+	.p2align 2
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4)
+#else
+DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
+#endif
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divby0)
+	mov 	r3, r0
+	udiv	r0, r3, r1
+	mls 	r1, r0, r1, r3
+	str 	r1, [r2]
+	bx  	lr
+#else
+	cmp	r1, #1
+	bcc	LOCAL_LABEL(divby0)
+	beq	LOCAL_LABEL(divby1)
+	cmp	r0, r1
+	bcc	LOCAL_LABEL(quotient0)
+	/*
+	 * Implement division using binary long division algorithm.
+	 *
+	 * r0 is the numerator, r1 the denominator.
+	 *
+	 * The code before JMP computes the correct shift I, so that
+	 * r0 and (r1 << I) have the highest bit set in the same position.
+	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
+	 * This depends on the fixed instruction size of block.
+	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+	 *
+	 * block(shift) implements the test-and-update-quotient core.
+	 * It assumes (r0 << shift) can be computed without overflow and
+	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+	 */
+
+#  ifdef __ARM_FEATURE_CLZ
+	clz	ip, r0
+	clz	r3, r1
+	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+	sub	r3, r3, ip
+#    if __ARM_ARCH_ISA_THUMB == 2
+	adr	ip, LOCAL_LABEL(div0block) + 1
+	sub	ip, ip, r3, lsl #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	sub	ip, ip, r3, lsl #2
+	sub	ip, ip, r3, lsl #3
+	mov	r3, #0
+	bx	ip
+#  else
+#    if __ARM_ARCH_ISA_THUMB == 2
+#    error THUMB mode requires CLZ or UDIV
+#    endif
+	str	r4, [sp, #-8]!
+
+	mov	r4, r0
+	adr	ip, LOCAL_LABEL(div0block)
+
+	lsr	r3, r4, #16
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, ip, #(16 * 12)
+
+	lsr	r3, r4, #8
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, ip, #(8 * 12)
+
+	lsr	r3, r4, #4
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, #(4 * 12)
+
+	lsr	r3, r4, #2
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, ip, #(2 * 12)
+
+	/* Last block, no need to update r3 or r4. */
+	cmp	r1, r4, lsr #1
+	subls	ip, ip, #(1 * 12)
+
+	ldr	r4, [sp], #8	/* restore r4, we are done with it. */
+	mov	r3, #0
+
+	JMP(ip)
+#  endif
+
+#define	IMM	#
+
+#define block(shift)                                                           \
+	cmp	r0, r1, lsl IMM shift;                                         \
+	ITT(hs);                                                               \
+	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
+	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+
+	block(31)
+	block(30)
+	block(29)
+	block(28)
+	block(27)
+	block(26)
+	block(25)
+	block(24)
+	block(23)
+	block(22)
+	block(21)
+	block(20)
+	block(19)
+	block(18)
+	block(17)
+	block(16)
+	block(15)
+	block(14)
+	block(13)
+	block(12)
+	block(11)
+	block(10)
+	block(9)
+	block(8)
+	block(7)
+	block(6)
+	block(5)
+	block(4)
+	block(3)
+	block(2)
+	block(1)
+LOCAL_LABEL(div0block):
+	block(0)
+
+	str	r0, [r2]
+	mov	r0, r3
+	JMP(lr)
+
+LOCAL_LABEL(quotient0):
+	str	r0, [r2]
+	mov	r0, #0
+	JMP(lr)
+
+LOCAL_LABEL(divby1):
+	mov	r3, #0
+	str	r3, [r2]
+	JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+	mov	r0, #0
+#ifdef __ARM_EABI__
+	b	__aeabi_idiv0
+#else
+	JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__udivmodsi4)
diff --git a/contrib/libs/cxxsupp/builtins/arm/udivsi3.S b/contrib/libs/cxxsupp/builtins/arm/udivsi3.S
index dd6765bfbb..165b2b58ac 100644
--- a/contrib/libs/cxxsupp/builtins/arm/udivsi3.S
+++ b/contrib/libs/cxxsupp/builtins/arm/udivsi3.S
@@ -1,170 +1,170 @@
-/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __udivsi3 (32-bit unsigned integer divide) 
- * function for the ARM 32-bit architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
- 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-	.p2align 2 
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) 
- 
-@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) 
-@   Calculate and return the quotient of the (unsigned) division. 
- 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__udivsi3) 
-#endif 
-#if __ARM_ARCH_EXT_IDIV__ 
-	tst     r1, r1 
-	beq     LOCAL_LABEL(divby0) 
-	udiv	r0, r0, r1 
-	bx  	lr 
-#else 
-	cmp	r1, #1 
-	bcc	LOCAL_LABEL(divby0) 
-	IT(eq) 
-	JMPc(lr, eq) 
-	cmp	r0, r1 
-	ITT(cc) 
-	movcc	r0, #0 
-	JMPc(lr, cc) 
-	/* 
-	 * Implement division using binary long division algorithm. 
-	 * 
-	 * r0 is the numerator, r1 the denominator. 
-	 * 
-	 * The code before JMP computes the correct shift I, so that 
-	 * r0 and (r1 << I) have the highest bit set in the same position. 
-	 * At the time of JMP, ip := .Ldiv0block - 12 * I. 
-	 * This depends on the fixed instruction size of block. 
-	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 
-	 * 
-	 * block(shift) implements the test-and-update-quotient core. 
-	 * It assumes (r0 << shift) can be computed without overflow and 
-	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 
-	 */ 
- 
-#  ifdef __ARM_FEATURE_CLZ 
-	clz	ip, r0 
-	clz	r3, r1 
-	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 
-	sub	r3, r3, ip 
-#    if __ARM_ARCH_ISA_THUMB == 2 
-	adr	ip, LOCAL_LABEL(div0block) + 1 
-	sub	ip, ip, r3, lsl #1 
-#    else 
-	adr	ip, LOCAL_LABEL(div0block) 
-#    endif 
-	sub	ip, ip, r3, lsl #2 
-	sub	ip, ip, r3, lsl #3 
-	mov	r3, #0 
-	bx	ip 
-#  else 
-#    if __ARM_ARCH_ISA_THUMB == 2 
-#    error THUMB mode requires CLZ or UDIV 
-#    endif 
-	mov	r2, r0 
-	adr	ip, LOCAL_LABEL(div0block) 
- 
-	lsr	r3, r2, #16 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, ip, #(16 * 12) 
- 
-	lsr	r3, r2, #8 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, ip, #(8 * 12) 
- 
-	lsr	r3, r2, #4 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, #(4 * 12) 
- 
-	lsr	r3, r2, #2 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, ip, #(2 * 12) 
- 
-	/* Last block, no need to update r2 or r3. */ 
-	cmp	r1, r2, lsr #1 
-	subls	ip, ip, #(1 * 12) 
- 
-	mov	r3, #0 
- 
-	JMP(ip) 
-#  endif 
- 
-#define	IMM	# 
- 
-#define block(shift)                                                           \ 
-	cmp	r0, r1, lsl IMM shift;                                         \ 
-	ITT(hs);                                                               \ 
-	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \ 
-	WIDE(subhs)	r0, r0, r1, lsl IMM shift 
- 
-	block(31) 
-	block(30) 
-	block(29) 
-	block(28) 
-	block(27) 
-	block(26) 
-	block(25) 
-	block(24) 
-	block(23) 
-	block(22) 
-	block(21) 
-	block(20) 
-	block(19) 
-	block(18) 
-	block(17) 
-	block(16) 
-	block(15) 
-	block(14) 
-	block(13) 
-	block(12) 
-	block(11) 
-	block(10) 
-	block(9) 
-	block(8) 
-	block(7) 
-	block(6) 
-	block(5) 
-	block(4) 
-	block(3) 
-	block(2) 
-	block(1) 
-LOCAL_LABEL(div0block): 
-	block(0) 
- 
-	mov	r0, r3 
-	JMP(lr) 
-#endif /* __ARM_ARCH_EXT_IDIV__ */ 
- 
-LOCAL_LABEL(divby0): 
-	mov	r0, #0 
-#ifdef __ARM_EABI__ 
-	b	__aeabi_idiv0 
-#else 
-	JMP(lr) 
-#endif 
- 
-END_COMPILERRT_FUNCTION(__udivsi3) 
+/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivsi3 (32-bit unsigned integer divide)
+ * function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+	.p2align 2
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
+
+@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
+@   Calculate and return the quotient of the (unsigned) division.
+
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3)
+#else
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+#endif
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divby0)
+	udiv	r0, r0, r1
+	bx  	lr
+#else
+	cmp	r1, #1
+	bcc	LOCAL_LABEL(divby0)
+	IT(eq)
+	JMPc(lr, eq)
+	cmp	r0, r1
+	ITT(cc)
+	movcc	r0, #0
+	JMPc(lr, cc)
+	/*
+	 * Implement division using binary long division algorithm.
+	 *
+	 * r0 is the numerator, r1 the denominator.
+	 *
+	 * The code before JMP computes the correct shift I, so that
+	 * r0 and (r1 << I) have the highest bit set in the same position.
+	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
+	 * This depends on the fixed instruction size of block.
+	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+	 *
+	 * block(shift) implements the test-and-update-quotient core.
+	 * It assumes (r0 << shift) can be computed without overflow and
+	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+	 */
+
+#  ifdef __ARM_FEATURE_CLZ
+	clz	ip, r0
+	clz	r3, r1
+	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+	sub	r3, r3, ip
+#    if __ARM_ARCH_ISA_THUMB == 2
+	adr	ip, LOCAL_LABEL(div0block) + 1
+	sub	ip, ip, r3, lsl #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	sub	ip, ip, r3, lsl #2
+	sub	ip, ip, r3, lsl #3
+	mov	r3, #0
+	bx	ip
+#  else
+#    if __ARM_ARCH_ISA_THUMB == 2
+#    error THUMB mode requires CLZ or UDIV
+#    endif
+	mov	r2, r0
+	adr	ip, LOCAL_LABEL(div0block)
+
+	lsr	r3, r2, #16
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(16 * 12)
+
+	lsr	r3, r2, #8
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(8 * 12)
+
+	lsr	r3, r2, #4
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, #(4 * 12)
+
+	lsr	r3, r2, #2
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(2 * 12)
+
+	/* Last block, no need to update r2 or r3. */
+	cmp	r1, r2, lsr #1
+	subls	ip, ip, #(1 * 12)
+
+	mov	r3, #0
+
+	JMP(ip)
+#  endif
+
+#define	IMM	#
+
+#define block(shift)                                                           \
+	cmp	r0, r1, lsl IMM shift;                                         \
+	ITT(hs);                                                               \
+	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
+	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+
+	block(31)
+	block(30)
+	block(29)
+	block(28)
+	block(27)
+	block(26)
+	block(25)
+	block(24)
+	block(23)
+	block(22)
+	block(21)
+	block(20)
+	block(19)
+	block(18)
+	block(17)
+	block(16)
+	block(15)
+	block(14)
+	block(13)
+	block(12)
+	block(11)
+	block(10)
+	block(9)
+	block(8)
+	block(7)
+	block(6)
+	block(5)
+	block(4)
+	block(3)
+	block(2)
+	block(1)
+LOCAL_LABEL(div0block):
+	block(0)
+
+	mov	r0, r3
+	JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+	mov	r0, #0
+#ifdef __ARM_EABI__
+	b	__aeabi_idiv0
+#else
+	JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__udivsi3)
diff --git a/contrib/libs/cxxsupp/builtins/arm/umodsi3.S b/contrib/libs/cxxsupp/builtins/arm/umodsi3.S
index 6380d455d7..9e7a148ce4 100644
--- a/contrib/libs/cxxsupp/builtins/arm/umodsi3.S
+++ b/contrib/libs/cxxsupp/builtins/arm/umodsi3.S
@@ -1,161 +1,161 @@
-/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------===// 
- * 
- * This file implements the __umodsi3 (32-bit unsigned integer modulus) 
- * function for the ARM 32-bit architecture. 
- * 
- *===----------------------------------------------------------------------===*/ 
- 
-#include "../assembly.h" 
- 
-	.syntax unified 
-	.text 
-#if __ARM_ARCH_ISA_THUMB == 2 
-	.thumb 
-#endif 
- 
-@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) 
-@   Calculate and return the remainder of the (unsigned) division. 
- 
-	.p2align 2 
-#if __ARM_ARCH_ISA_THUMB == 2 
-DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3) 
-#else 
-DEFINE_COMPILERRT_FUNCTION(__umodsi3) 
-#endif 
-#if __ARM_ARCH_EXT_IDIV__ 
-	tst     r1, r1 
-	beq     LOCAL_LABEL(divby0) 
-	udiv	r2, r0, r1 
-	mls 	r0, r2, r1, r0 
-	bx  	lr 
-#else 
-	cmp	r1, #1 
-	bcc	LOCAL_LABEL(divby0) 
-	ITT(eq) 
-	moveq	r0, #0 
-	JMPc(lr, eq) 
-	cmp	r0, r1 
-	IT(cc) 
-	JMPc(lr, cc) 
-	/* 
-	 * Implement division using binary long division algorithm. 
-	 * 
-	 * r0 is the numerator, r1 the denominator. 
-	 * 
-	 * The code before JMP computes the correct shift I, so that 
-	 * r0 and (r1 << I) have the highest bit set in the same position. 
-	 * At the time of JMP, ip := .Ldiv0block - 8 * I. 
-	 * This depends on the fixed instruction size of block. 
-	 * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. 
-	 * 
-	 * block(shift) implements the test-and-update-quotient core. 
-	 * It assumes (r0 << shift) can be computed without overflow and 
-	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 
-	 */ 
- 
-#  ifdef __ARM_FEATURE_CLZ 
-	clz	ip, r0 
-	clz	r3, r1 
-	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 
-	sub	r3, r3, ip 
-#    if __ARM_ARCH_ISA_THUMB == 2 
-	adr	ip, LOCAL_LABEL(div0block) + 1 
-	sub	ip, ip, r3, lsl #1 
-#    else 
-	adr	ip, LOCAL_LABEL(div0block) 
-#    endif 
-	sub	ip, ip, r3, lsl #3 
-	bx	ip 
-#  else 
-#    if __ARM_ARCH_ISA_THUMB == 2 
-#    error THUMB mode requires CLZ or UDIV 
-#    endif 
-	mov	r2, r0 
-	adr	ip, LOCAL_LABEL(div0block) 
- 
-	lsr	r3, r2, #16 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, ip, #(16 * 8) 
- 
-	lsr	r3, r2, #8 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, ip, #(8 * 8) 
- 
-	lsr	r3, r2, #4 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, #(4 * 8) 
- 
-	lsr	r3, r2, #2 
-	cmp	r3, r1 
-	movhs	r2, r3 
-	subhs	ip, ip, #(2 * 8) 
- 
-	/* Last block, no need to update r2 or r3. */ 
-	cmp	r1, r2, lsr #1 
-	subls	ip, ip, #(1 * 8) 
- 
-	JMP(ip) 
-#  endif 
- 
-#define	IMM	# 
- 
-#define block(shift)                                                           \ 
-	cmp	r0, r1, lsl IMM shift;                                         \ 
-	IT(hs);                                                                \ 
-	WIDE(subhs)	r0, r0, r1, lsl IMM shift 
- 
-	block(31) 
-	block(30) 
-	block(29) 
-	block(28) 
-	block(27) 
-	block(26) 
-	block(25) 
-	block(24) 
-	block(23) 
-	block(22) 
-	block(21) 
-	block(20) 
-	block(19) 
-	block(18) 
-	block(17) 
-	block(16) 
-	block(15) 
-	block(14) 
-	block(13) 
-	block(12) 
-	block(11) 
-	block(10) 
-	block(9) 
-	block(8) 
-	block(7) 
-	block(6) 
-	block(5) 
-	block(4) 
-	block(3) 
-	block(2) 
-	block(1) 
-LOCAL_LABEL(div0block): 
-	block(0) 
-	JMP(lr) 
-#endif /* __ARM_ARCH_EXT_IDIV__ */ 
- 
-LOCAL_LABEL(divby0): 
-	mov	r0, #0 
-#ifdef __ARM_EABI__ 
-	b	__aeabi_idiv0 
-#else 
-	JMP(lr) 
-#endif 
- 
-END_COMPILERRT_FUNCTION(__umodsi3) 
+/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __umodsi3 (32-bit unsigned integer modulus)
+ * function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
+@   Calculate and return the remainder of the (unsigned) division.
+
+	.p2align 2
+#if __ARM_ARCH_ISA_THUMB == 2
+DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3)
+#else
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+#endif
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divby0)
+	udiv	r2, r0, r1
+	mls 	r0, r2, r1, r0
+	bx  	lr
+#else
+	cmp	r1, #1
+	bcc	LOCAL_LABEL(divby0)
+	ITT(eq)
+	moveq	r0, #0
+	JMPc(lr, eq)
+	cmp	r0, r1
+	IT(cc)
+	JMPc(lr, cc)
+	/*
+	 * Implement division using binary long division algorithm.
+	 *
+	 * r0 is the numerator, r1 the denominator.
+	 *
+	 * The code before JMP computes the correct shift I, so that
+	 * r0 and (r1 << I) have the highest bit set in the same position.
+	 * At the time of JMP, ip := .Ldiv0block - 8 * I.
+	 * This depends on the fixed instruction size of block.
+	 * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
+	 *
+	 * block(shift) implements the test-and-update-quotient core.
+	 * It assumes (r0 << shift) can be computed without overflow and
+	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+	 */
+
+#  ifdef __ARM_FEATURE_CLZ
+	clz	ip, r0
+	clz	r3, r1
+	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+	sub	r3, r3, ip
+#    if __ARM_ARCH_ISA_THUMB == 2
+	adr	ip, LOCAL_LABEL(div0block) + 1
+	sub	ip, ip, r3, lsl #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	sub	ip, ip, r3, lsl #3
+	bx	ip
+#  else
+#    if __ARM_ARCH_ISA_THUMB == 2
+#    error THUMB mode requires CLZ or UDIV
+#    endif
+	mov	r2, r0
+	adr	ip, LOCAL_LABEL(div0block)
+
+	lsr	r3, r2, #16
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(16 * 8)
+
+	lsr	r3, r2, #8
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(8 * 8)
+
+	lsr	r3, r2, #4
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, #(4 * 8)
+
+	lsr	r3, r2, #2
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(2 * 8)
+
+	/* Last block, no need to update r2 or r3. */
+	cmp	r1, r2, lsr #1
+	subls	ip, ip, #(1 * 8)
+
+	JMP(ip)
+#  endif
+
+#define	IMM	#
+
+#define block(shift)                                                           \
+	cmp	r0, r1, lsl IMM shift;                                         \
+	IT(hs);                                                                \
+	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+
+	block(31)
+	block(30)
+	block(29)
+	block(28)
+	block(27)
+	block(26)
+	block(25)
+	block(24)
+	block(23)
+	block(22)
+	block(21)
+	block(20)
+	block(19)
+	block(18)
+	block(17)
+	block(16)
+	block(15)
+	block(14)
+	block(13)
+	block(12)
+	block(11)
+	block(10)
+	block(9)
+	block(8)
+	block(7)
+	block(6)
+	block(5)
+	block(4)
+	block(3)
+	block(2)
+	block(1)
+LOCAL_LABEL(div0block):
+	block(0)
+	JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+	mov	r0, #0
+#ifdef __ARM_EABI__
+	b	__aeabi_idiv0
+#else
+	JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__umodsi3)
diff --git a/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S
index 2efa2e6ebb..c4bea2d5ee 100644
--- a/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/unorddf2vfp.S
@@ -1,29 +1,29 @@
-//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __unorddf2vfp(double a, double b); 
-// 
-// Returns one iff a or b is NaN 
-// Uses Darwin calling convention where double precision arguments are passsed  
-// like in GPR pairs. 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) 
-	vmov 	d6, r0, r1	// load r0/r1 pair in double register 
-	vmov 	d7, r2, r3	// load r2/r3 pair in double register 
-	vcmp.f64 d6, d7		 
-	vmrs	apsr_nzcv, fpscr 
-	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs) 
-	movvc	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__unorddf2vfp) 
+//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __unorddf2vfp(double a, double b);
+//
+// Returns one iff a or b is NaN
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+	vmrs	apsr_nzcv, fpscr
+	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
+	movvc	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__unorddf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S b/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S
index d5e69f5382..886e965681 100644
--- a/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S
+++ b/contrib/libs/cxxsupp/builtins/arm/unordsf2vfp.S
@@ -1,29 +1,29 @@
-//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// extern int __unordsf2vfp(float a, float b); 
-// 
-// Returns one iff a or b is NaN 
-// Uses Darwin calling convention where single precision arguments are passsed  
-// like 32-bit ints 
-// 
-	.syntax unified 
-	.p2align 2 
-DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) 
-	vmov	s14, r0     // move from GPR 0 to float register 
-	vmov	s15, r1	    // move from GPR 1 to float register 
-	vcmp.f32 s14, s15 
-	vmrs	apsr_nzcv, fpscr 
-	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs) 
-	movvc	r0, #0 
-	bx	lr 
-END_COMPILERRT_FUNCTION(__unordsf2vfp) 
+//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __unordsf2vfp(float a, float b);
+//
+// Returns one iff a or b is NaN
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
+	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
+	movvc	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__unordsf2vfp)
diff --git a/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk b/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk
index 8c8768d8fd..7f7e386613 100644
--- a/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk
+++ b/contrib/libs/cxxsupp/builtins/arm64/Makefile.mk
@@ -1,20 +1,20 @@
-#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===# 
-# 
-#                     The LLVM Compiler Infrastructure 
-# 
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details. 
-# 
-#===------------------------------------------------------------------------===# 
- 
-ModuleName := builtins 
-SubDirs :=  
-OnlyArchs := arm64  
- 
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) 
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) 
-Implementation := Optimized 
- 
-# FIXME: use automatic dependencies? 
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h) 
+#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := arm64 
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk b/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk
index 6518514018..f3c1807f01 100644
--- a/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk
+++ b/contrib/libs/cxxsupp/builtins/armv6m/Makefile.mk
@@ -1,20 +1,20 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# 
-# 
-#                     The LLVM Compiler Infrastructure 
-# 
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details. 
-# 
-#===------------------------------------------------------------------------===# 
- 
-ModuleName := builtins 
-SubDirs :=  
-OnlyArchs := armv6m 
- 
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) 
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) 
-Implementation := Optimized 
- 
-# FIXME: use automatic dependencies? 
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h) 
+#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := armv6m
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/contrib/libs/cxxsupp/builtins/ashldi3.c b/contrib/libs/cxxsupp/builtins/ashldi3.c
index f9f5bfe60e..eb4698ac51 100644
--- a/contrib/libs/cxxsupp/builtins/ashldi3.c
+++ b/contrib/libs/cxxsupp/builtins/ashldi3.c
@@ -1,43 +1,43 @@
-/* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ashldi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a << b */ 
- 
-/* Precondition:  0 <= b < bits_in_dword */ 
- 
-ARM_EABI_FNALIAS(llsl, ashldi3) 
- 
-COMPILER_RT_ABI di_int 
-__ashldi3(di_int a, si_int b) 
-{ 
-    const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); 
-    dwords input; 
-    dwords result; 
-    input.all = a; 
-    if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */ 
-    { 
-        result.s.low = 0; 
-        result.s.high = input.s.low << (b - bits_in_word); 
-    } 
-    else  /* 0 <= b < bits_in_word */ 
-    { 
-        if (b == 0) 
-            return a; 
-        result.s.low  = input.s.low << b; 
-        result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); 
-    } 
-    return result.all; 
-} 
+/* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashldi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a << b */
+
+/* Precondition:  0 <= b < bits_in_dword */
+
+ARM_EABI_FNALIAS(llsl, ashldi3)
+
+COMPILER_RT_ABI di_int
+__ashldi3(di_int a, si_int b)
+{
+    const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+    dwords input;
+    dwords result;
+    input.all = a;
+    if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */
+    {
+        result.s.low = 0;
+        result.s.high = input.s.low << (b - bits_in_word);
+    }
+    else  /* 0 <= b < bits_in_word */
+    {
+        if (b == 0)
+            return a;
+        result.s.low  = input.s.low << b;
+        result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b));
+    }
+    return result.all;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ashlti3.c b/contrib/libs/cxxsupp/builtins/ashlti3.c
index 9e33701d68..638ae845ff 100644
--- a/contrib/libs/cxxsupp/builtins/ashlti3.c
+++ b/contrib/libs/cxxsupp/builtins/ashlti3.c
@@ -1,45 +1,45 @@
-/* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ashlti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a << b */ 
- 
-/* Precondition:  0 <= b < bits_in_tword */ 
- 
-COMPILER_RT_ABI ti_int 
-__ashlti3(ti_int a, si_int b) 
-{ 
-    const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); 
-    twords input; 
-    twords result; 
-    input.all = a; 
-    if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */ 
-    { 
-        result.s.low = 0; 
-        result.s.high = input.s.low << (b - bits_in_dword); 
-    } 
-    else  /* 0 <= b < bits_in_dword */ 
-    { 
-        if (b == 0) 
-            return a; 
-        result.s.low  = input.s.low << b; 
-        result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); 
-    } 
-    return result.all; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashlti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a << b */
+
+/* Precondition:  0 <= b < bits_in_tword */
+
+COMPILER_RT_ABI ti_int
+__ashlti3(ti_int a, si_int b)
+{
+    const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+    twords input;
+    twords result;
+    input.all = a;
+    if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */
+    {
+        result.s.low = 0;
+        result.s.high = input.s.low << (b - bits_in_dword);
+    }
+    else  /* 0 <= b < bits_in_dword */
+    {
+        if (b == 0)
+            return a;
+        result.s.low  = input.s.low << b;
+        result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b));
+    }
+    return result.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/ashrdi3.c b/contrib/libs/cxxsupp/builtins/ashrdi3.c
index d750f193e3..14c878bb77 100644
--- a/contrib/libs/cxxsupp/builtins/ashrdi3.c
+++ b/contrib/libs/cxxsupp/builtins/ashrdi3.c
@@ -1,44 +1,44 @@
-/*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ashrdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: arithmetic a >> b */ 
- 
-/* Precondition:  0 <= b < bits_in_dword */ 
- 
-ARM_EABI_FNALIAS(lasr, ashrdi3) 
- 
-COMPILER_RT_ABI di_int 
-__ashrdi3(di_int a, si_int b) 
-{ 
-    const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); 
-    dwords input; 
-    dwords result; 
-    input.all = a; 
-    if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */ 
-    { 
-        /* result.s.high = input.s.high < 0 ? -1 : 0 */ 
-        result.s.high = input.s.high >> (bits_in_word - 1); 
-        result.s.low = input.s.high >> (b - bits_in_word); 
-    } 
-    else  /* 0 <= b < bits_in_word */ 
-    { 
-        if (b == 0) 
-            return a; 
-        result.s.high  = input.s.high >> b; 
-        result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); 
-    } 
-    return result.all; 
-} 
+/*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashrdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: arithmetic a >> b */
+
+/* Precondition:  0 <= b < bits_in_dword */
+
+ARM_EABI_FNALIAS(lasr, ashrdi3)
+
+COMPILER_RT_ABI di_int
+__ashrdi3(di_int a, si_int b)
+{
+    const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+    dwords input;
+    dwords result;
+    input.all = a;
+    if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */
+    {
+        /* result.s.high = input.s.high < 0 ? -1 : 0 */
+        result.s.high = input.s.high >> (bits_in_word - 1);
+        result.s.low = input.s.high >> (b - bits_in_word);
+    }
+    else  /* 0 <= b < bits_in_word */
+    {
+        if (b == 0)
+            return a;
+        result.s.high  = input.s.high >> b;
+        result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+    }
+    return result.all;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ashrti3.c b/contrib/libs/cxxsupp/builtins/ashrti3.c
index 987f286ca3..f78205d961 100644
--- a/contrib/libs/cxxsupp/builtins/ashrti3.c
+++ b/contrib/libs/cxxsupp/builtins/ashrti3.c
@@ -1,46 +1,46 @@
-/* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ashrti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: arithmetic a >> b */ 
- 
-/* Precondition:  0 <= b < bits_in_tword */ 
- 
-COMPILER_RT_ABI ti_int 
-__ashrti3(ti_int a, si_int b) 
-{ 
-    const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); 
-    twords input; 
-    twords result; 
-    input.all = a; 
-    if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */ 
-    { 
-        /* result.s.high = input.s.high < 0 ? -1 : 0 */ 
-        result.s.high = input.s.high >> (bits_in_dword - 1); 
-        result.s.low = input.s.high >> (b - bits_in_dword); 
-    } 
-    else  /* 0 <= b < bits_in_dword */ 
-    { 
-        if (b == 0) 
-            return a; 
-        result.s.high  = input.s.high >> b; 
-        result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); 
-    } 
-    return result.all; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashrti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: arithmetic a >> b */
+
+/* Precondition:  0 <= b < bits_in_tword */
+
+COMPILER_RT_ABI ti_int
+__ashrti3(ti_int a, si_int b)
+{
+    const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+    twords input;
+    twords result;
+    input.all = a;
+    if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */
+    {
+        /* result.s.high = input.s.high < 0 ? -1 : 0 */
+        result.s.high = input.s.high >> (bits_in_dword - 1);
+        result.s.low = input.s.high >> (b - bits_in_dword);
+    }
+    else  /* 0 <= b < bits_in_dword */
+    {
+        if (b == 0)
+            return a;
+        result.s.high  = input.s.high >> b;
+        result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+    }
+    return result.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/assembly.h b/contrib/libs/cxxsupp/builtins/assembly.h
index e4a4d45b20..c28970534c 100644
--- a/contrib/libs/cxxsupp/builtins/assembly.h
+++ b/contrib/libs/cxxsupp/builtins/assembly.h
@@ -1,158 +1,158 @@
-/* ===-- assembly.h - compiler-rt assembler support macros -----------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file defines macros for use in compiler-rt assembler source. 
- * This file is not part of the interface of this library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef COMPILERRT_ASSEMBLY_H 
-#define COMPILERRT_ASSEMBLY_H 
- 
-#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) 
-#define SEPARATOR @ 
-#else 
-#define SEPARATOR ; 
-#endif 
- 
-#if defined(__APPLE__) 
-#define HIDDEN(name) .private_extern name 
-#define LOCAL_LABEL(name) L_##name 
-// tell linker it can break up file at label boundaries 
-#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols 
-#define SYMBOL_IS_FUNC(name) 
-#define CONST_SECTION .const 
- 
-#elif defined(__ELF__) 
- 
-#define HIDDEN(name) .hidden name 
-#define LOCAL_LABEL(name) .L_##name 
-#define FILE_LEVEL_DIRECTIVE 
-#if defined(__arm__) 
-#define SYMBOL_IS_FUNC(name) .type name,%function 
-#else 
-#define SYMBOL_IS_FUNC(name) .type name,@function 
-#endif 
-#define CONST_SECTION .section .rodata 
- 
-#else // !__APPLE__ && !__ELF__ 
- 
-#define HIDDEN(name) 
-#define LOCAL_LABEL(name) .L ## name 
-#define FILE_LEVEL_DIRECTIVE 
-#define SYMBOL_IS_FUNC(name)                                                   \ 
-  .def name SEPARATOR                                                          \ 
-    .scl 2 SEPARATOR                                                           \ 
-    .type 32 SEPARATOR                                                         \ 
-  .endef 
-#define CONST_SECTION .section .rdata,"rd" 
- 
-#endif 
- 
-#if defined(__arm__) 
-#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 
-#define ARM_HAS_BX 
-#endif 
-#if !defined(__ARM_FEATURE_CLZ) &&                                             \ 
-    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) 
-#define __ARM_FEATURE_CLZ 
-#endif 
- 
-#ifdef ARM_HAS_BX 
-#define JMP(r) bx r 
-#define JMPc(r, c) bx##c r 
-#else 
-#define JMP(r) mov pc, r 
-#define JMPc(r, c) mov##c pc, r 
-#endif 
- 
-// pop {pc} can't switch Thumb mode on ARMv4T 
-#if __ARM_ARCH >= 5 
-#define POP_PC() pop {pc} 
-#else 
-#define POP_PC()                                                               \ 
-  pop {ip};                                                                    \ 
-  JMP(ip) 
-#endif 
- 
-#if __ARM_ARCH_ISA_THUMB == 2 
-#define IT(cond)  it cond 
-#define ITT(cond) itt cond 
-#else 
-#define IT(cond) 
-#define ITT(cond) 
-#endif 
- 
-#if __ARM_ARCH_ISA_THUMB == 2 
-#define WIDE(op) op.w 
-#else 
-#define WIDE(op) op 
-#endif 
-#endif 
- 
-#define GLUE2(a, b) a##b 
-#define GLUE(a, b) GLUE2(a, b) 
-#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) 
- 
-#ifdef VISIBILITY_HIDDEN 
-#define DECLARE_SYMBOL_VISIBILITY(name)                                        \ 
-  HIDDEN(SYMBOL_NAME(name)) SEPARATOR 
-#else 
-#define DECLARE_SYMBOL_VISIBILITY(name) 
-#endif 
- 
-#define DEFINE_COMPILERRT_FUNCTION(name)                                       \ 
-  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \ 
-  .globl SYMBOL_NAME(name) SEPARATOR                                           \ 
-  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ 
-  DECLARE_SYMBOL_VISIBILITY(name)                                              \ 
-  SYMBOL_NAME(name): 
- 
-#define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \ 
-  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \ 
-  .globl SYMBOL_NAME(name) SEPARATOR                                           \ 
-  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ 
-  DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \ 
-  .thumb_func SEPARATOR                                                        \ 
-  SYMBOL_NAME(name): 
- 
-#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \ 
-  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \ 
-  .globl SYMBOL_NAME(name) SEPARATOR                                           \ 
-  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ 
-  HIDDEN(SYMBOL_NAME(name)) SEPARATOR                                          \ 
-  SYMBOL_NAME(name): 
- 
-#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \ 
-  .globl name SEPARATOR                                                        \ 
-  SYMBOL_IS_FUNC(name) SEPARATOR                                               \ 
-  HIDDEN(name) SEPARATOR                                                       \ 
-  name: 
- 
-#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \ 
-  .globl SYMBOL_NAME(name) SEPARATOR                                           \ 
-  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ 
-  .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR 
- 
-#if defined(__ARM_EABI__) 
-#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)                          \ 
-  DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) 
-#else 
-#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) 
-#endif 
- 
-#ifdef __ELF__ 
-#define END_COMPILERRT_FUNCTION(name)                                          \ 
-  .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) 
-#else 
-#define END_COMPILERRT_FUNCTION(name) 
-#endif 
- 
-#endif /* COMPILERRT_ASSEMBLY_H */ 
+/* ===-- assembly.h - compiler-rt assembler support macros -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in compiler-rt assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
+#define SEPARATOR @
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#define HIDDEN(name) .private_extern name
+#define LOCAL_LABEL(name) L_##name
+// tell linker it can break up file at label boundaries
+#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols
+#define SYMBOL_IS_FUNC(name)
+#define CONST_SECTION .const
+
+#elif defined(__ELF__)
+
+#define HIDDEN(name) .hidden name
+#define LOCAL_LABEL(name) .L_##name
+#define FILE_LEVEL_DIRECTIVE
+#if defined(__arm__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
+#define CONST_SECTION .section .rodata
+
+#else // !__APPLE__ && !__ELF__
+
+#define HIDDEN(name)
+#define LOCAL_LABEL(name) .L ## name
+#define FILE_LEVEL_DIRECTIVE
+#define SYMBOL_IS_FUNC(name)                                                   \
+  .def name SEPARATOR                                                          \
+    .scl 2 SEPARATOR                                                           \
+    .type 32 SEPARATOR                                                         \
+  .endef
+#define CONST_SECTION .section .rdata,"rd"
+
+#endif
+
+#if defined(__arm__)
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+#if !defined(__ARM_FEATURE_CLZ) &&                                             \
+    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
+#define __ARM_FEATURE_CLZ
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#define JMPc(r, c) bx##c r
+#else
+#define JMP(r) mov pc, r
+#define JMPc(r, c) mov##c pc, r
+#endif
+
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC()                                                               \
+  pop {ip};                                                                    \
+  JMP(ip)
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define IT(cond)  it cond
+#define ITT(cond) itt cond
+#else
+#define IT(cond)
+#define ITT(cond)
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define WIDE(op) op.w
+#else
+#define WIDE(op) op
+#endif
+#endif
+
+#define GLUE2(a, b) a##b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#define DECLARE_SYMBOL_VISIBILITY(name)                                        \
+  HIDDEN(SYMBOL_NAME(name)) SEPARATOR
+#else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name)                                       \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name)                                              \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \
+  .thumb_func SEPARATOR                                                        \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  HIDDEN(SYMBOL_NAME(name)) SEPARATOR                                          \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \
+  .globl name SEPARATOR                                                        \
+  SYMBOL_IS_FUNC(name) SEPARATOR                                               \
+  HIDDEN(name) SEPARATOR                                                       \
+  name:
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)                          \
+  DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#ifdef __ELF__
+#define END_COMPILERRT_FUNCTION(name)                                          \
+  .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#else
+#define END_COMPILERRT_FUNCTION(name)
+#endif
+
+#endif /* COMPILERRT_ASSEMBLY_H */
diff --git a/contrib/libs/cxxsupp/builtins/atomic.c b/contrib/libs/cxxsupp/builtins/atomic.c
index 42753ee809..f1ddc3e0c5 100644
--- a/contrib/libs/cxxsupp/builtins/atomic.c
+++ b/contrib/libs/cxxsupp/builtins/atomic.c
@@ -1,331 +1,331 @@
-/*===-- atomic.c - Implement support functions for atomic operations.------=== 
+/*===-- atomic.c - Implement support functions for atomic operations.------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ *  atomic.c defines a set of functions for performing atomic accesses on
+ *  arbitrary-sized memory locations.  This design uses locks that should
+ *  be fast in the uncontended case, for two reasons:
  * 
- *                     The LLVM Compiler Infrastructure 
+ *  1) This code must work with C programs that do not link to anything
+ *     (including pthreads) and so it should not depend on any pthread
+ *     functions.
+ *  2) Atomic operations, rather than explicit mutexes, are most commonly used
+ *     on code where contended operations are rate.
  * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------=== 
- * 
- *  atomic.c defines a set of functions for performing atomic accesses on 
- *  arbitrary-sized memory locations.  This design uses locks that should 
- *  be fast in the uncontended case, for two reasons: 
- *  
- *  1) This code must work with C programs that do not link to anything 
- *     (including pthreads) and so it should not depend on any pthread 
- *     functions. 
- *  2) Atomic operations, rather than explicit mutexes, are most commonly used 
- *     on code where contended operations are rate. 
- *  
- *  To avoid needing a per-object lock, this code allocates an array of 
- *  locks and hashes the object pointers to find the one that it should use. 
- *  For operations that must be atomic on two locations, the lower lock is 
- *  always acquired first, to avoid deadlock. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-#include <stdint.h> 
-#include <string.h> 
- 
-#include "assembly.h" 
- 
-// Clang objects if you redefine a builtin.  This little hack allows us to 
-// define a function with the same name as an intrinsic. 
-#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) 
-#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) 
-#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) 
-#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange) 
- 
-/// Number of locks.  This allocates one page on 32-bit platforms, two on 
-/// 64-bit.  This can be specified externally if a different trade between 
-/// memory usage and contention probability is required for a given platform. 
-#ifndef SPINLOCK_COUNT 
-#define SPINLOCK_COUNT (1<<10) 
-#endif 
-static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; 
- 
-//////////////////////////////////////////////////////////////////////////////// 
-// Platform-specific lock implementation.  Falls back to spinlocks if none is 
-// defined.  Each platform should define the Lock type, and corresponding 
-// lock() and unlock() functions. 
-//////////////////////////////////////////////////////////////////////////////// 
-#ifdef __FreeBSD__ 
-#include <errno.h> 
-#include <sys/types.h> 
-#include <machine/atomic.h> 
-#include <sys/umtx.h> 
-typedef struct _usem Lock; 
-__inline static void unlock(Lock *l) { 
-  __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE); 
-  __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); 
-  if (l->_has_waiters) 
-      _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); 
-} 
-__inline static void lock(Lock *l) { 
-  uint32_t old = 1; 
-  while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old, 
-        0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { 
-    _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); 
-    old = 1; 
-  } 
-} 
-/// locks for atomic operations 
-static Lock locks[SPINLOCK_COUNT] = { [0 ...  SPINLOCK_COUNT-1] = {0,1,0} }; 
- 
-#elif defined(__APPLE__) 
-#include <libkern/OSAtomic.h> 
-typedef OSSpinLock Lock; 
-__inline static void unlock(Lock *l) { 
-  OSSpinLockUnlock(l); 
-} 
-/// Locks a lock.  In the current implementation, this is potentially 
-/// unbounded in the contended case. 
-__inline static void lock(Lock *l) { 
-  OSSpinLockLock(l); 
-} 
-static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 
- 
-#else 
-typedef _Atomic(uintptr_t) Lock; 
-/// Unlock a lock.  This is a release operation. 
-__inline static void unlock(Lock *l) { 
-  __c11_atomic_store(l, 0, __ATOMIC_RELEASE); 
-} 
-/// Locks a lock.  In the current implementation, this is potentially 
-/// unbounded in the contended case. 
-__inline static void lock(Lock *l) { 
-  uintptr_t old = 0; 
-  while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, 
-        __ATOMIC_RELAXED)) 
-    old = 0; 
-} 
-/// locks for atomic operations 
-static Lock locks[SPINLOCK_COUNT]; 
-#endif 
- 
- 
-/// Returns a lock to use for a given pointer.   
-static __inline Lock *lock_for_pointer(void *ptr) { 
-  intptr_t hash = (intptr_t)ptr; 
-  // Disregard the lowest 4 bits.  We want all values that may be part of the 
-  // same memory operation to hash to the same value and therefore use the same 
-  // lock.   
-  hash >>= 4; 
-  // Use the next bits as the basis for the hash 
-  intptr_t low = hash & SPINLOCK_MASK; 
-  // Now use the high(er) set of bits to perturb the hash, so that we don't 
-  // get collisions from atomic fields in a single object 
-  hash >>= 16; 
-  hash ^= low; 
-  // Return a pointer to the word to use 
-  return locks + (hash & SPINLOCK_MASK); 
-} 
- 
-/// Macros for determining whether a size is lock free.  Clang can not yet 
-/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are 
-/// not lock free. 
-#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) 
-#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) 
-#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) 
-#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) 
-#define IS_LOCK_FREE_16 0 
- 
-/// Macro that calls the compiler-generated lock-free versions of functions 
-/// when they exist. 
-#define LOCK_FREE_CASES() \ 
-  do {\ 
-  switch (size) {\ 
-    case 2:\ 
-      if (IS_LOCK_FREE_2) {\ 
-        LOCK_FREE_ACTION(uint16_t);\ 
-      }\ 
-    case 4:\ 
-      if (IS_LOCK_FREE_4) {\ 
-        LOCK_FREE_ACTION(uint32_t);\ 
-      }\ 
-    case 8:\ 
-      if (IS_LOCK_FREE_8) {\ 
-        LOCK_FREE_ACTION(uint64_t);\ 
-      }\ 
-    case 16:\ 
-      if (IS_LOCK_FREE_16) {\ 
-        /* FIXME: __uint128_t isn't available on 32 bit platforms. 
-        LOCK_FREE_ACTION(__uint128_t);*/\ 
-      }\ 
-  }\ 
-  } while (0) 
- 
- 
-/// An atomic load operation.  This is atomic with respect to the source 
-/// pointer only. 
-void __atomic_load_c(int size, void *src, void *dest, int model) { 
-#define LOCK_FREE_ACTION(type) \ 
-    *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ 
-    return; 
-  LOCK_FREE_CASES(); 
-#undef LOCK_FREE_ACTION 
-  Lock *l = lock_for_pointer(src); 
-  lock(l); 
-  memcpy(dest, src, size); 
-  unlock(l); 
-} 
- 
-/// An atomic store operation.  This is atomic with respect to the destination 
-/// pointer only. 
-void __atomic_store_c(int size, void *dest, void *src, int model) { 
-#define LOCK_FREE_ACTION(type) \ 
-    __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\ 
-    return; 
-  LOCK_FREE_CASES(); 
-#undef LOCK_FREE_ACTION 
-  Lock *l = lock_for_pointer(dest); 
-  lock(l); 
-  memcpy(dest, src, size); 
-  unlock(l); 
-} 
- 
-/// Atomic compare and exchange operation.  If the value at *ptr is identical 
-/// to the value at *expected, then this copies value at *desired to *ptr.  If 
-/// they  are not, then this stores the current value from *ptr in *expected. 
-/// 
-/// This function returns 1 if the exchange takes place or 0 if it fails.  
-int __atomic_compare_exchange_c(int size, void *ptr, void *expected, 
-    void *desired, int success, int failure) { 
-#define LOCK_FREE_ACTION(type) \ 
-  return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\ 
-      *(type*)desired, success, failure) 
-  LOCK_FREE_CASES(); 
-#undef LOCK_FREE_ACTION 
-  Lock *l = lock_for_pointer(ptr); 
-  lock(l); 
-  if (memcmp(ptr, expected, size) == 0) { 
-    memcpy(ptr, desired, size); 
-    unlock(l); 
-    return 1; 
-  } 
-  memcpy(expected, ptr, size); 
-  unlock(l); 
-  return 0; 
-} 
- 
-/// Performs an atomic exchange operation between two pointers.  This is atomic 
-/// with respect to the target address. 
-void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { 
-#define LOCK_FREE_ACTION(type) \ 
-    *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\ 
-        model);\ 
-    return; 
-  LOCK_FREE_CASES(); 
-#undef LOCK_FREE_ACTION 
-  Lock *l = lock_for_pointer(ptr); 
-  lock(l); 
-  memcpy(old, ptr, size); 
-  memcpy(ptr, val, size); 
-  unlock(l); 
-} 
- 
-//////////////////////////////////////////////////////////////////////////////// 
-// Where the size is known at compile time, the compiler may emit calls to 
-// specialised versions of the above functions. 
-//////////////////////////////////////////////////////////////////////////////// 
-#define OPTIMISED_CASES\ 
-  OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ 
-  OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ 
-  OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ 
-  OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\ 
-  /* FIXME: __uint128_t isn't available on 32 bit platforms. 
-  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\ 
- 
-#define OPTIMISED_CASE(n, lockfree, type)\ 
-type __atomic_load_##n(type *src, int model) {\ 
-  if (lockfree)\ 
-    return __c11_atomic_load((_Atomic(type)*)src, model);\ 
-  Lock *l = lock_for_pointer(src);\ 
-  lock(l);\ 
-  type val = *src;\ 
-  unlock(l);\ 
-  return val;\ 
-} 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
- 
-#define OPTIMISED_CASE(n, lockfree, type)\ 
-void  __atomic_store_##n(type *dest, type val, int model) {\ 
-  if (lockfree) {\ 
-    __c11_atomic_store((_Atomic(type)*)dest, val, model);\ 
-    return;\ 
-  }\ 
-  Lock *l = lock_for_pointer(dest);\ 
-  lock(l);\ 
-  *dest = val;\ 
-  unlock(l);\ 
-  return;\ 
-} 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
- 
-#define OPTIMISED_CASE(n, lockfree, type)\ 
-type __atomic_exchange_##n(type *dest, type val, int model) {\ 
-  if (lockfree)\ 
-    return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\ 
-  Lock *l = lock_for_pointer(dest);\ 
-  lock(l);\ 
-  type tmp = *dest;\ 
-  *dest = val;\ 
-  unlock(l);\ 
-  return tmp;\ 
-} 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
- 
-#define OPTIMISED_CASE(n, lockfree, type)\ 
-int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\ 
-    int success, int failure) {\ 
-  if (lockfree)\ 
-    return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\ 
-        success, failure);\ 
-  Lock *l = lock_for_pointer(ptr);\ 
-  lock(l);\ 
-  if (*ptr == *expected) {\ 
-    *ptr = desired;\ 
-    unlock(l);\ 
-    return 1;\ 
-  }\ 
-  *expected = *ptr;\ 
-  unlock(l);\ 
-  return 0;\ 
-} 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
- 
-//////////////////////////////////////////////////////////////////////////////// 
-// Atomic read-modify-write operations for integers of various sizes. 
-//////////////////////////////////////////////////////////////////////////////// 
-#define ATOMIC_RMW(n, lockfree, type, opname, op) \ 
-type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\ 
-  if (lockfree) \ 
-    return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\ 
-  Lock *l = lock_for_pointer(ptr);\ 
-  lock(l);\ 
-  type tmp = *ptr;\ 
-  *ptr = tmp op val;\ 
-  unlock(l);\ 
-  return tmp;\ 
-} 
- 
-#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
-#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
-#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
-#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
-#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) 
-OPTIMISED_CASES 
-#undef OPTIMISED_CASE 
+ *  To avoid needing a per-object lock, this code allocates an array of
+ *  locks and hashes the object pointers to find the one that it should use.
+ *  For operations that must be atomic on two locations, the lower lock is
+ *  always acquired first, to avoid deadlock.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "assembly.h"
+
+// Clang objects if you redefine a builtin.  This little hack allows us to
+// define a function with the same name as an intrinsic.
+#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load)
+#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store)
+#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange)
+#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange)
+
+/// Number of locks.  This allocates one page on 32-bit platforms, two on
+/// 64-bit.  This can be specified externally if a different trade between
+/// memory usage and contention probability is required for a given platform.
+#ifndef SPINLOCK_COUNT
+#define SPINLOCK_COUNT (1<<10)
+#endif
+static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
+
+////////////////////////////////////////////////////////////////////////////////
+// Platform-specific lock implementation.  Falls back to spinlocks if none is
+// defined.  Each platform should define the Lock type, and corresponding
+// lock() and unlock() functions.
+////////////////////////////////////////////////////////////////////////////////
+#ifdef __FreeBSD__
+#include <errno.h>
+#include <sys/types.h>
+#include <machine/atomic.h>
+#include <sys/umtx.h>
+typedef struct _usem Lock;
+__inline static void unlock(Lock *l) {
+  __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE);
+  __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+  if (l->_has_waiters)
+      _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0);
+}
+__inline static void lock(Lock *l) {
+  uint32_t old = 1;
+  while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old,
+        0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+    _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0);
+    old = 1;
+  }
+}
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT] = { [0 ...  SPINLOCK_COUNT-1] = {0,1,0} };
+
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+typedef OSSpinLock Lock;
+__inline static void unlock(Lock *l) {
+  OSSpinLockUnlock(l);
+}
+/// Locks a lock.  In the current implementation, this is potentially
+/// unbounded in the contended case.
+__inline static void lock(Lock *l) {
+  OSSpinLockLock(l);
+}
+static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0
+
+#else
+typedef _Atomic(uintptr_t) Lock;
+/// Unlock a lock.  This is a release operation.
+__inline static void unlock(Lock *l) {
+  __c11_atomic_store(l, 0, __ATOMIC_RELEASE);
+}
+/// Locks a lock.  In the current implementation, this is potentially
+/// unbounded in the contended case.
+__inline static void lock(Lock *l) {
+  uintptr_t old = 0;
+  while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE,
+        __ATOMIC_RELAXED))
+    old = 0;
+}
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT];
+#endif
+
+
+/// Returns a lock to use for a given pointer.  
+static __inline Lock *lock_for_pointer(void *ptr) {
+  intptr_t hash = (intptr_t)ptr;
+  // Disregard the lowest 4 bits.  We want all values that may be part of the
+  // same memory operation to hash to the same value and therefore use the same
+  // lock.  
+  hash >>= 4;
+  // Use the next bits as the basis for the hash
+  intptr_t low = hash & SPINLOCK_MASK;
+  // Now use the high(er) set of bits to perturb the hash, so that we don't
+  // get collisions from atomic fields in a single object
+  hash >>= 16;
+  hash ^= low;
+  // Return a pointer to the word to use
+  return locks + (hash & SPINLOCK_MASK);
+}
+
+/// Macros for determining whether a size is lock free.  Clang can not yet
+/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are
+/// not lock free.
+#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1)
+#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2)
+#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4)
+#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8)
+#define IS_LOCK_FREE_16 0
+
+/// Macro that calls the compiler-generated lock-free versions of functions
+/// when they exist.
+#define LOCK_FREE_CASES() \
+  do {\
+  switch (size) {\
+    case 2:\
+      if (IS_LOCK_FREE_2) {\
+        LOCK_FREE_ACTION(uint16_t);\
+      }\
+    case 4:\
+      if (IS_LOCK_FREE_4) {\
+        LOCK_FREE_ACTION(uint32_t);\
+      }\
+    case 8:\
+      if (IS_LOCK_FREE_8) {\
+        LOCK_FREE_ACTION(uint64_t);\
+      }\
+    case 16:\
+      if (IS_LOCK_FREE_16) {\
+        /* FIXME: __uint128_t isn't available on 32 bit platforms.
+        LOCK_FREE_ACTION(__uint128_t);*/\
+      }\
+  }\
+  } while (0)
+
+
+/// An atomic load operation.  This is atomic with respect to the source
+/// pointer only.
+void __atomic_load_c(int size, void *src, void *dest, int model) {
+#define LOCK_FREE_ACTION(type) \
+    *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\
+    return;
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(src);
+  lock(l);
+  memcpy(dest, src, size);
+  unlock(l);
+}
+
+/// An atomic store operation.  This is atomic with respect to the destination
+/// pointer only.
+void __atomic_store_c(int size, void *dest, void *src, int model) {
+#define LOCK_FREE_ACTION(type) \
+    __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\
+    return;
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(dest);
+  lock(l);
+  memcpy(dest, src, size);
+  unlock(l);
+}
+
+/// Atomic compare and exchange operation.  If the value at *ptr is identical
+/// to the value at *expected, then this copies value at *desired to *ptr.  If
+/// they  are not, then this stores the current value from *ptr in *expected.
+///
+/// This function returns 1 if the exchange takes place or 0 if it fails. 
+int __atomic_compare_exchange_c(int size, void *ptr, void *expected,
+    void *desired, int success, int failure) {
+#define LOCK_FREE_ACTION(type) \
+  return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\
+      *(type*)desired, success, failure)
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(ptr);
+  lock(l);
+  if (memcmp(ptr, expected, size) == 0) {
+    memcpy(ptr, desired, size);
+    unlock(l);
+    return 1;
+  }
+  memcpy(expected, ptr, size);
+  unlock(l);
+  return 0;
+}
+
+/// Performs an atomic exchange operation between two pointers.  This is atomic
+/// with respect to the target address.
+void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) {
+#define LOCK_FREE_ACTION(type) \
+    *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\
+        model);\
+    return;
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(ptr);
+  lock(l);
+  memcpy(old, ptr, size);
+  memcpy(ptr, val, size);
+  unlock(l);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Where the size is known at compile time, the compiler may emit calls to
+// specialised versions of the above functions.
+////////////////////////////////////////////////////////////////////////////////
+#define OPTIMISED_CASES\
+  OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+  OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+  OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+  OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
+  /* FIXME: __uint128_t isn't available on 32 bit platforms.
+  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+type __atomic_load_##n(type *src, int model) {\
+  if (lockfree)\
+    return __c11_atomic_load((_Atomic(type)*)src, model);\
+  Lock *l = lock_for_pointer(src);\
+  lock(l);\
+  type val = *src;\
+  unlock(l);\
+  return val;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+void  __atomic_store_##n(type *dest, type val, int model) {\
+  if (lockfree) {\
+    __c11_atomic_store((_Atomic(type)*)dest, val, model);\
+    return;\
+  }\
+  Lock *l = lock_for_pointer(dest);\
+  lock(l);\
+  *dest = val;\
+  unlock(l);\
+  return;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+type __atomic_exchange_##n(type *dest, type val, int model) {\
+  if (lockfree)\
+    return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\
+  Lock *l = lock_for_pointer(dest);\
+  lock(l);\
+  type tmp = *dest;\
+  *dest = val;\
+  unlock(l);\
+  return tmp;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\
+    int success, int failure) {\
+  if (lockfree)\
+    return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\
+        success, failure);\
+  Lock *l = lock_for_pointer(ptr);\
+  lock(l);\
+  if (*ptr == *expected) {\
+    *ptr = desired;\
+    unlock(l);\
+    return 1;\
+  }\
+  *expected = *ptr;\
+  unlock(l);\
+  return 0;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+////////////////////////////////////////////////////////////////////////////////
+// Atomic read-modify-write operations for integers of various sizes.
+////////////////////////////////////////////////////////////////////////////////
+#define ATOMIC_RMW(n, lockfree, type, opname, op) \
+type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\
+  if (lockfree) \
+    return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\
+  Lock *l = lock_for_pointer(ptr);\
+  lock(l);\
+  type tmp = *ptr;\
+  *ptr = tmp op val;\
+  unlock(l);\
+  return tmp;\
+}
+
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c b/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c
index 58e2508cd0..da912af643 100644
--- a/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c
+++ b/contrib/libs/cxxsupp/builtins/atomic_flag_clear.c
@@ -1,27 +1,27 @@
-/*===-- atomic_flag_clear.c -------------------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===------------------------------------------------------------------------=== 
- * 
- * This file implements atomic_flag_clear from C11's stdatomic.h. 
- * 
- *===------------------------------------------------------------------------=== 
- */ 
- 
-#ifndef __has_include 
-#define __has_include(inc) 0 
-#endif 
- 
-#if __has_include(<stdatomic.h>) 
- 
-#include <stdatomic.h> 
-#undef atomic_flag_clear 
-void atomic_flag_clear(volatile atomic_flag *object) { 
-  __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST); 
-} 
- 
-#endif 
+/*===-- atomic_flag_clear.c -------------------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_clear from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_clear
+void atomic_flag_clear(volatile atomic_flag *object) {
+  __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c b/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c
index 3efab84f77..1059b787f1 100644
--- a/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c
+++ b/contrib/libs/cxxsupp/builtins/atomic_flag_clear_explicit.c
@@ -1,28 +1,28 @@
-/*===-- atomic_flag_clear_explicit.c ----------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===------------------------------------------------------------------------=== 
- * 
- * This file implements atomic_flag_clear_explicit from C11's stdatomic.h. 
- * 
- *===------------------------------------------------------------------------=== 
- */ 
- 
-#ifndef __has_include 
-#define __has_include(inc) 0 
-#endif 
- 
-#if __has_include(<stdatomic.h>) 
- 
-#include <stdatomic.h> 
-#undef atomic_flag_clear_explicit 
-void atomic_flag_clear_explicit(volatile atomic_flag *object, 
-                                memory_order order) { 
-  __c11_atomic_store(&(object)->_Value, 0, order); 
-} 
- 
-#endif 
+/*===-- atomic_flag_clear_explicit.c ----------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_clear_explicit from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_clear_explicit
+void atomic_flag_clear_explicit(volatile atomic_flag *object,
+                                memory_order order) {
+  __c11_atomic_store(&(object)->_Value, 0, order);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c
index d7fe7f8be2..e8811d39ef 100644
--- a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c
+++ b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set.c
@@ -1,27 +1,27 @@
-/*===-- atomic_flag_test_and_set.c ------------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===------------------------------------------------------------------------=== 
- * 
- * This file implements atomic_flag_test_and_set from C11's stdatomic.h. 
- * 
- *===------------------------------------------------------------------------=== 
- */ 
- 
-#ifndef __has_include 
-#define __has_include(inc) 0 
-#endif 
- 
-#if __has_include(<stdatomic.h>) 
- 
-#include <stdatomic.h> 
-#undef atomic_flag_test_and_set 
-_Bool atomic_flag_test_and_set(volatile atomic_flag *object) { 
-  return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST); 
-} 
- 
-#endif 
+/*===-- atomic_flag_test_and_set.c ------------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_test_and_set from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_test_and_set
+_Bool atomic_flag_test_and_set(volatile atomic_flag *object) {
+  return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c
index 273e9b651c..5c8c2df905 100644
--- a/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c
+++ b/contrib/libs/cxxsupp/builtins/atomic_flag_test_and_set_explicit.c
@@ -1,28 +1,28 @@
-/*===-- atomic_flag_test_and_set_explicit.c ---------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===------------------------------------------------------------------------=== 
- * 
- * This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h 
- * 
- *===------------------------------------------------------------------------=== 
- */ 
- 
-#ifndef __has_include 
-#define __has_include(inc) 0 
-#endif 
- 
-#if __has_include(<stdatomic.h>) 
- 
-#include <stdatomic.h> 
-#undef atomic_flag_test_and_set_explicit 
-_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, 
-                                        memory_order order) { 
-  return __c11_atomic_exchange(&(object)->_Value, 1, order); 
-} 
- 
-#endif 
+/*===-- atomic_flag_test_and_set_explicit.c ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_test_and_set_explicit
+_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object,
+                                        memory_order order) {
+  return __c11_atomic_exchange(&(object)->_Value, 1, order);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c b/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c
index 23077c0aa3..9ccc2ae60a 100644
--- a/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c
+++ b/contrib/libs/cxxsupp/builtins/atomic_signal_fence.c
@@ -1,27 +1,27 @@
-/*===-- atomic_signal_fence.c -----------------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===------------------------------------------------------------------------=== 
- * 
- * This file implements atomic_signal_fence from C11's stdatomic.h. 
- * 
- *===------------------------------------------------------------------------=== 
- */ 
- 
-#ifndef __has_include 
-#define __has_include(inc) 0 
-#endif 
- 
-#if __has_include(<stdatomic.h>) 
- 
-#include <stdatomic.h> 
-#undef atomic_signal_fence 
-void atomic_signal_fence(memory_order order) { 
-  __c11_atomic_signal_fence(order); 
-} 
- 
-#endif 
+/*===-- atomic_signal_fence.c -----------------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_signal_fence from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_signal_fence
+void atomic_signal_fence(memory_order order) {
+  __c11_atomic_signal_fence(order);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c b/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c
index c81ae4f9bd..d22560151b 100644
--- a/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c
+++ b/contrib/libs/cxxsupp/builtins/atomic_thread_fence.c
@@ -1,27 +1,27 @@
-/*===-- atomic_thread_fence.c -----------------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===------------------------------------------------------------------------=== 
- * 
- * This file implements atomic_thread_fence from C11's stdatomic.h. 
- * 
- *===------------------------------------------------------------------------=== 
- */ 
- 
-#ifndef __has_include 
-#define __has_include(inc) 0 
-#endif 
- 
-#if __has_include(<stdatomic.h>) 
- 
-#include <stdatomic.h> 
-#undef atomic_thread_fence 
-void atomic_thread_fence(memory_order order) { 
-  __c11_atomic_thread_fence(order); 
-} 
- 
-#endif 
+/*===-- atomic_thread_fence.c -----------------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_thread_fence from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_thread_fence
+void atomic_thread_fence(memory_order order) {
+  __c11_atomic_thread_fence(order);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/clear_cache.c b/contrib/libs/cxxsupp/builtins/clear_cache.c
index ad013eaded..8eec068939 100644
--- a/contrib/libs/cxxsupp/builtins/clear_cache.c
+++ b/contrib/libs/cxxsupp/builtins/clear_cache.c
@@ -1,159 +1,159 @@
-/* ===-- clear_cache.c - Implement __clear_cache ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include <stddef.h> 
- 
-#if __APPLE__ 
-  #include <libkern/OSCacheControl.h> 
-#endif 
-#if (defined(__FreeBSD__) || defined(__Bitrig__)) && defined(__arm__) 
-  #include <sys/types.h> 
-  #include <machine/sysarch.h> 
-#endif 
- 
-#if defined(__NetBSD__) && defined(__arm__) 
-  #include <machine/sysarch.h> 
-#endif 
- 
-#if defined(__mips__) 
-  #include <sys/cachectl.h> 
-  #include <sys/syscall.h> 
-  #include <unistd.h> 
-  #if defined(__ANDROID__) && defined(__LP64__) 
-    /* 
-     * clear_mips_cache - Invalidates instruction cache for Mips. 
-     */ 
-    static void clear_mips_cache(const void* Addr, size_t Size) { 
-      asm volatile ( 
-        ".set push\n" 
-        ".set noreorder\n" 
-        ".set noat\n" 
-        "beq %[Size], $zero, 20f\n"          /* If size == 0, branch around. */ 
-        "nop\n" 
-        "daddu %[Size], %[Addr], %[Size]\n"  /* Calculate end address + 1 */ 
-        "rdhwr $v0, $1\n"                    /* Get step size for SYNCI. 
-                                                $1 is $HW_SYNCI_Step */ 
-        "beq $v0, $zero, 20f\n"              /* If no caches require 
-                                                synchronization, branch 
-                                                around. */ 
-        "nop\n" 
-        "10:\n" 
-        "synci 0(%[Addr])\n"                 /* Synchronize all caches around 
-                                                address. */ 
-        "daddu %[Addr], %[Addr], $v0\n"      /* Add step size. */ 
-        "sltu $at, %[Addr], %[Size]\n"       /* Compare current with end 
-                                                address. */ 
-        "bne $at, $zero, 10b\n"              /* Branch if more to do. */ 
-        "nop\n" 
-        "sync\n"                             /* Clear memory hazards. */ 
-        "20:\n" 
-        "bal 30f\n" 
-        "nop\n" 
-        "30:\n" 
-        "daddiu $ra, $ra, 12\n"              /* $ra has a value of $pc here. 
-                                                Add offset of 12 to point to the 
-                                                instruction after the last nop. 
-                                              */ 
-        "jr.hb $ra\n"                        /* Return, clearing instruction 
-                                                hazards. */ 
-        "nop\n" 
-        ".set pop\n" 
-        : [Addr] "+r"(Addr), [Size] "+r"(Size) 
-        :: "at", "ra", "v0", "memory" 
-      ); 
-    } 
-  #endif 
-#endif 
- 
-#if defined(__ANDROID__) && defined(__arm__) 
-  #include <asm/unistd.h> 
-#endif 
- 
-/* 
- * The compiler generates calls to __clear_cache() when creating  
- * trampoline functions on the stack for use with nested functions. 
- * It is expected to invalidate the instruction cache for the  
- * specified range. 
- */ 
- 
-void __clear_cache(void *start, void *end) { 
-#if __i386__ || __x86_64__ 
-/* 
- * Intel processors have a unified instruction and data cache 
- * so there is nothing to do 
- */ 
-#elif defined(__arm__) && !defined(__APPLE__) 
-    #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__Bitrig__) 
-        struct arm_sync_icache_args arg; 
- 
-        arg.addr = (uintptr_t)start; 
-        arg.len = (uintptr_t)end - (uintptr_t)start; 
- 
-        sysarch(ARM_SYNC_ICACHE, &arg); 
-    #elif defined(__ANDROID__) 
+/* ===-- clear_cache.c - Implement __clear_cache ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include <stddef.h>
+
+#if __APPLE__
+  #include <libkern/OSCacheControl.h>
+#endif
+#if (defined(__FreeBSD__) || defined(__Bitrig__)) && defined(__arm__)
+  #include <sys/types.h>
+  #include <machine/sysarch.h>
+#endif
+
+#if defined(__NetBSD__) && defined(__arm__)
+  #include <machine/sysarch.h>
+#endif
+
+#if defined(__mips__)
+  #include <sys/cachectl.h>
+  #include <sys/syscall.h>
+  #include <unistd.h>
+  #if defined(__ANDROID__) && defined(__LP64__)
+    /*
+     * clear_mips_cache - Invalidates instruction cache for Mips.
+     */
+    static void clear_mips_cache(const void* Addr, size_t Size) {
+      asm volatile (
+        ".set push\n"
+        ".set noreorder\n"
+        ".set noat\n"
+        "beq %[Size], $zero, 20f\n"          /* If size == 0, branch around. */
+        "nop\n"
+        "daddu %[Size], %[Addr], %[Size]\n"  /* Calculate end address + 1 */
+        "rdhwr $v0, $1\n"                    /* Get step size for SYNCI.
+                                                $1 is $HW_SYNCI_Step */
+        "beq $v0, $zero, 20f\n"              /* If no caches require
+                                                synchronization, branch
+                                                around. */
+        "nop\n"
+        "10:\n"
+        "synci 0(%[Addr])\n"                 /* Synchronize all caches around
+                                                address. */
+        "daddu %[Addr], %[Addr], $v0\n"      /* Add step size. */
+        "sltu $at, %[Addr], %[Size]\n"       /* Compare current with end
+                                                address. */
+        "bne $at, $zero, 10b\n"              /* Branch if more to do. */
+        "nop\n"
+        "sync\n"                             /* Clear memory hazards. */
+        "20:\n"
+        "bal 30f\n"
+        "nop\n"
+        "30:\n"
+        "daddiu $ra, $ra, 12\n"              /* $ra has a value of $pc here.
+                                                Add offset of 12 to point to the
+                                                instruction after the last nop.
+                                              */
+        "jr.hb $ra\n"                        /* Return, clearing instruction
+                                                hazards. */
+        "nop\n"
+        ".set pop\n"
+        : [Addr] "+r"(Addr), [Size] "+r"(Size)
+        :: "at", "ra", "v0", "memory"
+      );
+    }
+  #endif
+#endif
+
+#if defined(__ANDROID__) && defined(__arm__)
+  #include <asm/unistd.h>
+#endif
+
+/*
+ * The compiler generates calls to __clear_cache() when creating 
+ * trampoline functions on the stack for use with nested functions.
+ * It is expected to invalidate the instruction cache for the 
+ * specified range.
+ */
+
+void __clear_cache(void *start, void *end) {
+#if __i386__ || __x86_64__
+/*
+ * Intel processors have a unified instruction and data cache
+ * so there is nothing to do
+ */
+#elif defined(__arm__) && !defined(__APPLE__)
+    #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__Bitrig__)
+        struct arm_sync_icache_args arg;
+
+        arg.addr = (uintptr_t)start;
+        arg.len = (uintptr_t)end - (uintptr_t)start;
+
+        sysarch(ARM_SYNC_ICACHE, &arg);
+    #elif defined(__ANDROID__)
          int start_reg __asm("r0") = (int) (intptr_t) start;
-         const register int end_reg __asm("r1") = (int) (intptr_t) end; 
-         const register int flags __asm("r2") = 0; 
-         const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; 
-        __asm __volatile("svc 0x0" : "=r"(start_reg) 
-            : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0"); 
-         if (start_reg != 0) { 
-             compilerrt_abort(); 
-         } 
-    #else 
-        compilerrt_abort(); 
-    #endif 
-#elif defined(__mips__) 
-  const uintptr_t start_int = (uintptr_t) start; 
-  const uintptr_t end_int = (uintptr_t) end; 
-    #if defined(__ANDROID__) && defined(__LP64__) 
-        // Call synci implementation for short address range. 
-        const uintptr_t address_range_limit = 256; 
-        if ((end_int - start_int) <= address_range_limit) { 
-            clear_mips_cache(start, (end_int - start_int)); 
-        } else { 
-            syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); 
-        } 
-    #else 
-        syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); 
-    #endif 
-#elif defined(__aarch64__) && !defined(__APPLE__) 
-  uint64_t xstart = (uint64_t)(uintptr_t) start; 
-  uint64_t xend = (uint64_t)(uintptr_t) end; 
-  uint64_t addr; 
- 
-  // Get Cache Type Info 
-  uint64_t ctr_el0; 
-  __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); 
- 
-  /* 
-   * dc & ic instructions must use 64bit registers so we don't use 
-   * uintptr_t in case this runs in an IPL32 environment. 
-   */ 
-  const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); 
-  for (addr = xstart; addr < xend; addr += dcache_line_size) 
-    __asm __volatile("dc cvau, %0" :: "r"(addr)); 
-  __asm __volatile("dsb ish"); 
- 
-  const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); 
-  for (addr = xstart; addr < xend; addr += icache_line_size) 
-    __asm __volatile("ic ivau, %0" :: "r"(addr)); 
-  __asm __volatile("isb sy"); 
-#else 
-    #if __APPLE__ 
-        /* On Darwin, sys_icache_invalidate() provides this functionality */ 
-        sys_icache_invalidate(start, end-start); 
-    #else 
-        compilerrt_abort(); 
-    #endif 
-#endif 
-} 
- 
+         const register int end_reg __asm("r1") = (int) (intptr_t) end;
+         const register int flags __asm("r2") = 0;
+         const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
+        __asm __volatile("svc 0x0" : "=r"(start_reg)
+            : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0");
+         if (start_reg != 0) {
+             compilerrt_abort();
+         }
+    #else
+        compilerrt_abort();
+    #endif
+#elif defined(__mips__)
+  const uintptr_t start_int = (uintptr_t) start;
+  const uintptr_t end_int = (uintptr_t) end;
+    #if defined(__ANDROID__) && defined(__LP64__)
+        // Call synci implementation for short address range.
+        const uintptr_t address_range_limit = 256;
+        if ((end_int - start_int) <= address_range_limit) {
+            clear_mips_cache(start, (end_int - start_int));
+        } else {
+            syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
+        }
+    #else
+        syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
+    #endif
+#elif defined(__aarch64__) && !defined(__APPLE__)
+  uint64_t xstart = (uint64_t)(uintptr_t) start;
+  uint64_t xend = (uint64_t)(uintptr_t) end;
+  uint64_t addr;
+
+  // Get Cache Type Info
+  uint64_t ctr_el0;
+  __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
+
+  /*
+   * dc & ic instructions must use 64bit registers so we don't use
+   * uintptr_t in case this runs in an IPL32 environment.
+   */
+  const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
+  for (addr = xstart; addr < xend; addr += dcache_line_size)
+    __asm __volatile("dc cvau, %0" :: "r"(addr));
+  __asm __volatile("dsb ish");
+
+  const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
+  for (addr = xstart; addr < xend; addr += icache_line_size)
+    __asm __volatile("ic ivau, %0" :: "r"(addr));
+  __asm __volatile("isb sy");
+#else
+    #if __APPLE__
+        /* On Darwin, sys_icache_invalidate() provides this functionality */
+        sys_icache_invalidate(start, end-start);
+    #else
+        compilerrt_abort();
+    #endif
+#endif
+}
+
diff --git a/contrib/libs/cxxsupp/builtins/clzdi2.c b/contrib/libs/cxxsupp/builtins/clzdi2.c
index 5d1539ae5e..b9e64da492 100644
--- a/contrib/libs/cxxsupp/builtins/clzdi2.c
+++ b/contrib/libs/cxxsupp/builtins/clzdi2.c
@@ -1,29 +1,29 @@
-/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== 
- * 
- *               The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __clzdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: the number of leading 0-bits */ 
- 
-/* Precondition: a != 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__clzdi2(di_int a) 
-{ 
-    dwords x; 
-    x.all = a; 
-    const si_int f = -(x.s.high == 0); 
-    return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + 
-           (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); 
-} 
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __clzdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of leading 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__clzdi2(di_int a)
+{
+    dwords x;
+    x.all = a;
+    const si_int f = -(x.s.high == 0);
+    return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) +
+           (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/contrib/libs/cxxsupp/builtins/clzsi2.c b/contrib/libs/cxxsupp/builtins/clzsi2.c
index 4dab1fbf5d..25b8ed2c4c 100644
--- a/contrib/libs/cxxsupp/builtins/clzsi2.c
+++ b/contrib/libs/cxxsupp/builtins/clzsi2.c
@@ -1,53 +1,53 @@
-/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== 
- * 
- *               The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __clzsi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: the number of leading 0-bits */ 
- 
-/* Precondition: a != 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__clzsi2(si_int a) 
-{ 
-    su_int x = (su_int)a; 
-    si_int t = ((x & 0xFFFF0000) == 0) << 4;  /* if (x is small) t = 16 else 0 */ 
-    x >>= 16 - t;      /* x = [0 - 0xFFFF] */ 
-    su_int r = t;       /* r = [0, 16] */ 
-    /* return r + clz(x) */ 
-    t = ((x & 0xFF00) == 0) << 3; 
-    x >>= 8 - t;       /* x = [0 - 0xFF] */ 
-    r += t;            /* r = [0, 8, 16, 24] */ 
-    /* return r + clz(x) */ 
-    t = ((x & 0xF0) == 0) << 2; 
-    x >>= 4 - t;       /* x = [0 - 0xF] */ 
-    r += t;            /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ 
-    /* return r + clz(x) */ 
-    t = ((x & 0xC) == 0) << 1; 
-    x >>= 2 - t;       /* x = [0 - 3] */ 
-    r += t;            /* r = [0 - 30] and is even */ 
-    /* return r + clz(x) */ 
-/*     switch (x) 
- *     { 
- *     case 0: 
- *         return r + 2; 
- *     case 1: 
- *         return r + 1; 
- *     case 2: 
- *     case 3: 
- *         return r; 
- *     } 
- */ 
-    return r + ((2 - x) & -((x & 2) == 0)); 
-} 
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __clzsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of leading 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__clzsi2(si_int a)
+{
+    su_int x = (su_int)a;
+    si_int t = ((x & 0xFFFF0000) == 0) << 4;  /* if (x is small) t = 16 else 0 */
+    x >>= 16 - t;      /* x = [0 - 0xFFFF] */
+    su_int r = t;       /* r = [0, 16] */
+    /* return r + clz(x) */
+    t = ((x & 0xFF00) == 0) << 3;
+    x >>= 8 - t;       /* x = [0 - 0xFF] */
+    r += t;            /* r = [0, 8, 16, 24] */
+    /* return r + clz(x) */
+    t = ((x & 0xF0) == 0) << 2;
+    x >>= 4 - t;       /* x = [0 - 0xF] */
+    r += t;            /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
+    /* return r + clz(x) */
+    t = ((x & 0xC) == 0) << 1;
+    x >>= 2 - t;       /* x = [0 - 3] */
+    r += t;            /* r = [0 - 30] and is even */
+    /* return r + clz(x) */
+/*     switch (x)
+ *     {
+ *     case 0:
+ *         return r + 2;
+ *     case 1:
+ *         return r + 1;
+ *     case 2:
+ *     case 3:
+ *         return r;
+ *     }
+ */
+    return r + ((2 - x) & -((x & 2) == 0));
+}
diff --git a/contrib/libs/cxxsupp/builtins/clzti2.c b/contrib/libs/cxxsupp/builtins/clzti2.c
index 8b8b928c2a..15a7b3c900 100644
--- a/contrib/libs/cxxsupp/builtins/clzti2.c
+++ b/contrib/libs/cxxsupp/builtins/clzti2.c
@@ -1,33 +1,33 @@
-/* ===-- clzti2.c - Implement __clzti2 -------------------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __clzti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: the number of leading 0-bits */ 
- 
-/* Precondition: a != 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__clzti2(ti_int a) 
-{ 
-    twords x; 
-    x.all = a; 
-    const di_int f = -(x.s.high == 0); 
-    return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + 
-           ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- clzti2.c - Implement __clzti2 -------------------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __clzti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: the number of leading 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__clzti2(ti_int a)
+{
+    twords x;
+    x.all = a;
+    const di_int f = -(x.s.high == 0);
+    return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
+           ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/cmpdi2.c b/contrib/libs/cxxsupp/builtins/cmpdi2.c
index 7477eb8cc9..52634d9c33 100644
--- a/contrib/libs/cxxsupp/builtins/cmpdi2.c
+++ b/contrib/libs/cxxsupp/builtins/cmpdi2.c
@@ -1,51 +1,51 @@
-/* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __cmpdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: if (a <  b) returns 0 
-*           if (a == b) returns 1 
-*           if (a >  b) returns 2 
-*/ 
- 
-COMPILER_RT_ABI si_int 
-__cmpdi2(di_int a, di_int b) 
-{ 
-    dwords x; 
-    x.all = a; 
-    dwords y; 
-    y.all = b; 
-    if (x.s.high < y.s.high) 
-        return 0; 
-    if (x.s.high > y.s.high) 
-        return 2; 
-    if (x.s.low < y.s.low) 
-        return 0; 
-    if (x.s.low > y.s.low) 
-        return 2; 
-    return 1; 
-} 
- 
-#ifdef __ARM_EABI__ 
-/* Returns: if (a <  b) returns -1 
-*           if (a == b) returns  0 
-*           if (a >  b) returns  1 
-*/ 
-COMPILER_RT_ABI si_int 
-__aeabi_lcmp(di_int a, di_int b) 
-{ 
-	return __cmpdi2(a, b) - 1; 
-} 
-#endif 
- 
+/* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __cmpdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: if (a <  b) returns 0
+*           if (a == b) returns 1
+*           if (a >  b) returns 2
+*/
+
+COMPILER_RT_ABI si_int
+__cmpdi2(di_int a, di_int b)
+{
+    dwords x;
+    x.all = a;
+    dwords y;
+    y.all = b;
+    if (x.s.high < y.s.high)
+        return 0;
+    if (x.s.high > y.s.high)
+        return 2;
+    if (x.s.low < y.s.low)
+        return 0;
+    if (x.s.low > y.s.low)
+        return 2;
+    return 1;
+}
+
+#ifdef __ARM_EABI__
+/* Returns: if (a <  b) returns -1
+*           if (a == b) returns  0
+*           if (a >  b) returns  1
+*/
+COMPILER_RT_ABI si_int
+__aeabi_lcmp(di_int a, di_int b)
+{
+	return __cmpdi2(a, b) - 1;
+}
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/cmpti2.c b/contrib/libs/cxxsupp/builtins/cmpti2.c
index 9280903fe1..2c8b56e29a 100644
--- a/contrib/libs/cxxsupp/builtins/cmpti2.c
+++ b/contrib/libs/cxxsupp/builtins/cmpti2.c
@@ -1,42 +1,42 @@
-/* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __cmpti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns:  if (a <  b) returns 0 
- *           if (a == b) returns 1 
- *           if (a >  b) returns 2 
- */ 
- 
-COMPILER_RT_ABI si_int 
-__cmpti2(ti_int a, ti_int b) 
-{ 
-    twords x; 
-    x.all = a; 
-    twords y; 
-    y.all = b; 
-    if (x.s.high < y.s.high) 
-        return 0; 
-    if (x.s.high > y.s.high) 
-        return 2; 
-    if (x.s.low < y.s.low) 
-        return 0; 
-    if (x.s.low > y.s.low) 
-        return 2; 
-    return 1; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __cmpti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns:  if (a <  b) returns 0
+ *           if (a == b) returns 1
+ *           if (a >  b) returns 2
+ */
+
+COMPILER_RT_ABI si_int
+__cmpti2(ti_int a, ti_int b)
+{
+    twords x;
+    x.all = a;
+    twords y;
+    y.all = b;
+    if (x.s.high < y.s.high)
+        return 0;
+    if (x.s.high > y.s.high)
+        return 2;
+    if (x.s.low < y.s.low)
+        return 0;
+    if (x.s.low > y.s.low)
+        return 2;
+    return 1;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/comparedf2.c b/contrib/libs/cxxsupp/builtins/comparedf2.c
index 83503f2cab..9e29752231 100644
--- a/contrib/libs/cxxsupp/builtins/comparedf2.c
+++ b/contrib/libs/cxxsupp/builtins/comparedf2.c
@@ -1,146 +1,146 @@
-//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// // This file implements the following soft-float comparison routines: 
-// 
-//   __eqdf2   __gedf2   __unorddf2 
-//   __ledf2   __gtdf2 
-//   __ltdf2 
-//   __nedf2 
-// 
-// The semantics of the routines grouped in each column are identical, so there 
-// is a single implementation for each, and wrappers to provide the other names. 
-// 
-// The main routines behave as follows: 
-// 
-//   __ledf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                         1 if either a or b is NaN 
-// 
-//   __gedf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                        -1 if either a or b is NaN 
-// 
-//   __unorddf2(a,b) returns 0 if both a and b are numbers 
-//                           1 if either a or b is NaN 
-// 
-// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of 
-// NaN values. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-enum LE_RESULT { 
-    LE_LESS      = -1, 
-    LE_EQUAL     =  0, 
-    LE_GREATER   =  1, 
-    LE_UNORDERED =  1 
-}; 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__ledf2(fp_t a, fp_t b) { 
-     
-    const srep_t aInt = toRep(a); 
-    const srep_t bInt = toRep(b); 
-    const rep_t aAbs = aInt & absMask; 
-    const rep_t bAbs = bInt & absMask; 
-     
-    // If either a or b is NaN, they are unordered. 
-    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; 
-     
-    // If a and b are both zeros, they are equal. 
-    if ((aAbs | bAbs) == 0) return LE_EQUAL; 
-     
-    // If at least one of a and b is positive, we get the same result comparing 
-    // a and b as signed integers as we would with a floating-point compare. 
-    if ((aInt & bInt) >= 0) { 
-        if (aInt < bInt) return LE_LESS; 
-        else if (aInt == bInt) return LE_EQUAL; 
-        else return LE_GREATER; 
-    } 
-     
-    // Otherwise, both are negative, so we need to flip the sense of the 
-    // comparison to get the correct result.  (This assumes a twos- or ones- 
-    // complement integer representation; if integers are represented in a 
-    // sign-magnitude representation, then this flip is incorrect). 
-    else { 
-        if (aInt > bInt) return LE_LESS; 
-        else if (aInt == bInt) return LE_EQUAL; 
-        else return LE_GREATER; 
-    } 
-} 
- 
-#if defined(__ELF__) 
-// Alias for libgcc compatibility 
-FNALIAS(__cmpdf2, __ledf2); 
-#endif 
- 
-enum GE_RESULT { 
-    GE_LESS      = -1, 
-    GE_EQUAL     =  0, 
-    GE_GREATER   =  1, 
-    GE_UNORDERED = -1   // Note: different from LE_UNORDERED 
-}; 
- 
-COMPILER_RT_ABI enum GE_RESULT 
-__gedf2(fp_t a, fp_t b) { 
-     
-    const srep_t aInt = toRep(a); 
-    const srep_t bInt = toRep(b); 
-    const rep_t aAbs = aInt & absMask; 
-    const rep_t bAbs = bInt & absMask; 
-     
-    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; 
-    if ((aAbs | bAbs) == 0) return GE_EQUAL; 
-    if ((aInt & bInt) >= 0) { 
-        if (aInt < bInt) return GE_LESS; 
-        else if (aInt == bInt) return GE_EQUAL; 
-        else return GE_GREATER; 
-    } else { 
-        if (aInt > bInt) return GE_LESS; 
-        else if (aInt == bInt) return GE_EQUAL; 
-        else return GE_GREATER; 
-    } 
-} 
- 
-ARM_EABI_FNALIAS(dcmpun, unorddf2) 
- 
-COMPILER_RT_ABI int 
-__unorddf2(fp_t a, fp_t b) { 
-    const rep_t aAbs = toRep(a) & absMask; 
-    const rep_t bAbs = toRep(b) & absMask; 
-    return aAbs > infRep || bAbs > infRep; 
-} 
- 
-// The following are alternative names for the preceding routines. 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__eqdf2(fp_t a, fp_t b) { 
-    return __ledf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__ltdf2(fp_t a, fp_t b) { 
-    return __ledf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__nedf2(fp_t a, fp_t b) { 
-    return __ledf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum GE_RESULT 
-__gtdf2(fp_t a, fp_t b) { 
-    return __gedf2(a, b); 
-} 
- 
+//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+//   __eqdf2   __gedf2   __unorddf2
+//   __ledf2   __gtdf2
+//   __ltdf2
+//   __nedf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+//   __ledf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __gedf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unorddf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+enum LE_RESULT {
+    LE_LESS      = -1,
+    LE_EQUAL     =  0,
+    LE_GREATER   =  1,
+    LE_UNORDERED =  1
+};
+
+COMPILER_RT_ABI enum LE_RESULT
+__ledf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    // If either a or b is NaN, they are unordered.
+    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+    
+    // If a and b are both zeros, they are equal.
+    if ((aAbs | bAbs) == 0) return LE_EQUAL;
+    
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a floating-point compare.
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+    
+    // Otherwise, both are negative, so we need to flip the sense of the
+    // comparison to get the correct result.  (This assumes a twos- or ones-
+    // complement integer representation; if integers are represented in a
+    // sign-magnitude representation, then this flip is incorrect).
+    else {
+        if (aInt > bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+}
+
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpdf2, __ledf2);
+#endif
+
+enum GE_RESULT {
+    GE_LESS      = -1,
+    GE_EQUAL     =  0,
+    GE_GREATER   =  1,
+    GE_UNORDERED = -1   // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT
+__gedf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+    if ((aAbs | bAbs) == 0) return GE_EQUAL;
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    } else {
+        if (aInt > bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    }
+}
+
+ARM_EABI_FNALIAS(dcmpun, unorddf2)
+
+COMPILER_RT_ABI int
+__unorddf2(fp_t a, fp_t b) {
+    const rep_t aAbs = toRep(a) & absMask;
+    const rep_t bAbs = toRep(b) & absMask;
+    return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceding routines.
+
+COMPILER_RT_ABI enum LE_RESULT
+__eqdf2(fp_t a, fp_t b) {
+    return __ledf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__ltdf2(fp_t a, fp_t b) {
+    return __ledf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__nedf2(fp_t a, fp_t b) {
+    return __ledf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT
+__gtdf2(fp_t a, fp_t b) {
+    return __gedf2(a, b);
+}
+
diff --git a/contrib/libs/cxxsupp/builtins/comparesf2.c b/contrib/libs/cxxsupp/builtins/comparesf2.c
index 0ed6dd6a4b..1fd50636ab 100644
--- a/contrib/libs/cxxsupp/builtins/comparesf2.c
+++ b/contrib/libs/cxxsupp/builtins/comparesf2.c
@@ -1,145 +1,145 @@
-//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements the following soft-fp_t comparison routines: 
-// 
-//   __eqsf2   __gesf2   __unordsf2 
-//   __lesf2   __gtsf2 
-//   __ltsf2 
-//   __nesf2 
-// 
-// The semantics of the routines grouped in each column are identical, so there 
-// is a single implementation for each, and wrappers to provide the other names. 
-// 
-// The main routines behave as follows: 
-// 
-//   __lesf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                         1 if either a or b is NaN 
-// 
-//   __gesf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                        -1 if either a or b is NaN 
-// 
-//   __unordsf2(a,b) returns 0 if both a and b are numbers 
-//                           1 if either a or b is NaN 
-// 
-// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of 
-// NaN values. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-enum LE_RESULT { 
-    LE_LESS      = -1, 
-    LE_EQUAL     =  0, 
-    LE_GREATER   =  1, 
-    LE_UNORDERED =  1 
-}; 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__lesf2(fp_t a, fp_t b) { 
-     
-    const srep_t aInt = toRep(a); 
-    const srep_t bInt = toRep(b); 
-    const rep_t aAbs = aInt & absMask; 
-    const rep_t bAbs = bInt & absMask; 
-     
-    // If either a or b is NaN, they are unordered. 
-    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; 
-     
-    // If a and b are both zeros, they are equal. 
-    if ((aAbs | bAbs) == 0) return LE_EQUAL; 
-     
-    // If at least one of a and b is positive, we get the same result comparing 
-    // a and b as signed integers as we would with a fp_ting-point compare. 
-    if ((aInt & bInt) >= 0) { 
-        if (aInt < bInt) return LE_LESS; 
-        else if (aInt == bInt) return LE_EQUAL; 
-        else return LE_GREATER; 
-    } 
-     
-    // Otherwise, both are negative, so we need to flip the sense of the 
-    // comparison to get the correct result.  (This assumes a twos- or ones- 
-    // complement integer representation; if integers are represented in a 
-    // sign-magnitude representation, then this flip is incorrect). 
-    else { 
-        if (aInt > bInt) return LE_LESS; 
-        else if (aInt == bInt) return LE_EQUAL; 
-        else return LE_GREATER; 
-    } 
-} 
- 
-#if defined(__ELF__) 
-// Alias for libgcc compatibility 
-FNALIAS(__cmpsf2, __lesf2); 
-#endif 
- 
-enum GE_RESULT { 
-    GE_LESS      = -1, 
-    GE_EQUAL     =  0, 
-    GE_GREATER   =  1, 
-    GE_UNORDERED = -1   // Note: different from LE_UNORDERED 
-}; 
- 
-COMPILER_RT_ABI enum GE_RESULT 
-__gesf2(fp_t a, fp_t b) { 
-     
-    const srep_t aInt = toRep(a); 
-    const srep_t bInt = toRep(b); 
-    const rep_t aAbs = aInt & absMask; 
-    const rep_t bAbs = bInt & absMask; 
-     
-    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; 
-    if ((aAbs | bAbs) == 0) return GE_EQUAL; 
-    if ((aInt & bInt) >= 0) { 
-        if (aInt < bInt) return GE_LESS; 
-        else if (aInt == bInt) return GE_EQUAL; 
-        else return GE_GREATER; 
-    } else { 
-        if (aInt > bInt) return GE_LESS; 
-        else if (aInt == bInt) return GE_EQUAL; 
-        else return GE_GREATER; 
-    } 
-} 
- 
-ARM_EABI_FNALIAS(fcmpun, unordsf2) 
- 
-COMPILER_RT_ABI int 
-__unordsf2(fp_t a, fp_t b) { 
-    const rep_t aAbs = toRep(a) & absMask; 
-    const rep_t bAbs = toRep(b) & absMask; 
-    return aAbs > infRep || bAbs > infRep; 
-} 
- 
-// The following are alternative names for the preceding routines. 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__eqsf2(fp_t a, fp_t b) { 
-    return __lesf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__ltsf2(fp_t a, fp_t b) { 
-    return __lesf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum LE_RESULT 
-__nesf2(fp_t a, fp_t b) { 
-    return __lesf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum GE_RESULT 
-__gtsf2(fp_t a, fp_t b) { 
-    return __gesf2(a, b); 
-} 
+//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+//   __eqsf2   __gesf2   __unordsf2
+//   __lesf2   __gtsf2
+//   __ltsf2
+//   __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+//   __lesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __gesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unordsf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+enum LE_RESULT {
+    LE_LESS      = -1,
+    LE_EQUAL     =  0,
+    LE_GREATER   =  1,
+    LE_UNORDERED =  1
+};
+
+COMPILER_RT_ABI enum LE_RESULT
+__lesf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    // If either a or b is NaN, they are unordered.
+    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+    
+    // If a and b are both zeros, they are equal.
+    if ((aAbs | bAbs) == 0) return LE_EQUAL;
+    
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a fp_ting-point compare.
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+    
+    // Otherwise, both are negative, so we need to flip the sense of the
+    // comparison to get the correct result.  (This assumes a twos- or ones-
+    // complement integer representation; if integers are represented in a
+    // sign-magnitude representation, then this flip is incorrect).
+    else {
+        if (aInt > bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+}
+
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpsf2, __lesf2);
+#endif
+
+enum GE_RESULT {
+    GE_LESS      = -1,
+    GE_EQUAL     =  0,
+    GE_GREATER   =  1,
+    GE_UNORDERED = -1   // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT
+__gesf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+    if ((aAbs | bAbs) == 0) return GE_EQUAL;
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    } else {
+        if (aInt > bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    }
+}
+
+ARM_EABI_FNALIAS(fcmpun, unordsf2)
+
+COMPILER_RT_ABI int
+__unordsf2(fp_t a, fp_t b) {
+    const rep_t aAbs = toRep(a) & absMask;
+    const rep_t bAbs = toRep(b) & absMask;
+    return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceding routines.
+
+COMPILER_RT_ABI enum LE_RESULT
+__eqsf2(fp_t a, fp_t b) {
+    return __lesf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__ltsf2(fp_t a, fp_t b) {
+    return __lesf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__nesf2(fp_t a, fp_t b) {
+    return __lesf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT
+__gtsf2(fp_t a, fp_t b) {
+    return __gesf2(a, b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/comparetf2.c b/contrib/libs/cxxsupp/builtins/comparetf2.c
index 0b4c16b1e3..c0ad8ed0ae 100644
--- a/contrib/libs/cxxsupp/builtins/comparetf2.c
+++ b/contrib/libs/cxxsupp/builtins/comparetf2.c
@@ -1,138 +1,138 @@
-//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// // This file implements the following soft-float comparison routines: 
-// 
-//   __eqtf2   __getf2   __unordtf2 
-//   __letf2   __gttf2 
-//   __lttf2 
-//   __netf2 
-// 
-// The semantics of the routines grouped in each column are identical, so there 
-// is a single implementation for each, and wrappers to provide the other names. 
-// 
-// The main routines behave as follows: 
-// 
-//   __letf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                         1 if either a or b is NaN 
-// 
-//   __getf2(a,b) returns -1 if a < b 
-//                         0 if a == b 
-//                         1 if a > b 
-//                        -1 if either a or b is NaN 
-// 
-//   __unordtf2(a,b) returns 0 if both a and b are numbers 
-//                           1 if either a or b is NaN 
-// 
-// Note that __letf2( ) and __getf2( ) are identical except in their handling of 
-// NaN values. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-enum LE_RESULT { 
-    LE_LESS      = -1, 
-    LE_EQUAL     =  0, 
-    LE_GREATER   =  1, 
-    LE_UNORDERED =  1 
-}; 
- 
-COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { 
- 
-    const srep_t aInt = toRep(a); 
-    const srep_t bInt = toRep(b); 
-    const rep_t aAbs = aInt & absMask; 
-    const rep_t bAbs = bInt & absMask; 
- 
-    // If either a or b is NaN, they are unordered. 
-    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; 
- 
-    // If a and b are both zeros, they are equal. 
-    if ((aAbs | bAbs) == 0) return LE_EQUAL; 
- 
-    // If at least one of a and b is positive, we get the same result comparing 
-    // a and b as signed integers as we would with a floating-point compare. 
-    if ((aInt & bInt) >= 0) { 
-        if (aInt < bInt) return LE_LESS; 
-        else if (aInt == bInt) return LE_EQUAL; 
-        else return LE_GREATER; 
-    } 
-    else { 
-        // Otherwise, both are negative, so we need to flip the sense of the 
-        // comparison to get the correct result.  (This assumes a twos- or ones- 
-        // complement integer representation; if integers are represented in a 
-        // sign-magnitude representation, then this flip is incorrect). 
-        if (aInt > bInt) return LE_LESS; 
-        else if (aInt == bInt) return LE_EQUAL; 
-        else return LE_GREATER; 
-    } 
-} 
- 
-#if defined(__ELF__) 
-// Alias for libgcc compatibility 
-FNALIAS(__cmptf2, __letf2); 
-#endif 
- 
-enum GE_RESULT { 
-    GE_LESS      = -1, 
-    GE_EQUAL     =  0, 
-    GE_GREATER   =  1, 
-    GE_UNORDERED = -1   // Note: different from LE_UNORDERED 
-}; 
- 
-COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) { 
- 
-    const srep_t aInt = toRep(a); 
-    const srep_t bInt = toRep(b); 
-    const rep_t aAbs = aInt & absMask; 
-    const rep_t bAbs = bInt & absMask; 
- 
-    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; 
-    if ((aAbs | bAbs) == 0) return GE_EQUAL; 
-    if ((aInt & bInt) >= 0) { 
-        if (aInt < bInt) return GE_LESS; 
-        else if (aInt == bInt) return GE_EQUAL; 
-        else return GE_GREATER; 
-    } else { 
-        if (aInt > bInt) return GE_LESS; 
-        else if (aInt == bInt) return GE_EQUAL; 
-        else return GE_GREATER; 
-    } 
-} 
- 
-COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) { 
-    const rep_t aAbs = toRep(a) & absMask; 
-    const rep_t bAbs = toRep(b) & absMask; 
-    return aAbs > infRep || bAbs > infRep; 
-} 
- 
-// The following are alternative names for the preceding routines. 
- 
-COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { 
-    return __letf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { 
-    return __letf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { 
-    return __letf2(a, b); 
-} 
- 
-COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { 
-    return __getf2(a, b); 
-} 
- 
-#endif 
+//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+//   __eqtf2   __getf2   __unordtf2
+//   __letf2   __gttf2
+//   __lttf2
+//   __netf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+//   __letf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __getf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unordtf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __letf2( ) and __getf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+enum LE_RESULT {
+    LE_LESS      = -1,
+    LE_EQUAL     =  0,
+    LE_GREATER   =  1,
+    LE_UNORDERED =  1
+};
+
+COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) {
+
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+
+    // If either a or b is NaN, they are unordered.
+    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+
+    // If a and b are both zeros, they are equal.
+    if ((aAbs | bAbs) == 0) return LE_EQUAL;
+
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a floating-point compare.
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+    else {
+        // Otherwise, both are negative, so we need to flip the sense of the
+        // comparison to get the correct result.  (This assumes a twos- or ones-
+        // complement integer representation; if integers are represented in a
+        // sign-magnitude representation, then this flip is incorrect).
+        if (aInt > bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+}
+
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmptf2, __letf2);
+#endif
+
+enum GE_RESULT {
+    GE_LESS      = -1,
+    GE_EQUAL     =  0,
+    GE_GREATER   =  1,
+    GE_UNORDERED = -1   // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) {
+
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+
+    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+    if ((aAbs | bAbs) == 0) return GE_EQUAL;
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    } else {
+        if (aInt > bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    }
+}
+
+COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) {
+    const rep_t aAbs = toRep(a) & absMask;
+    const rep_t bAbs = toRep(b) & absMask;
+    return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceding routines.
+
+COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) {
+    return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) {
+    return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) {
+    return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) {
+    return __getf2(a, b);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/ctzdi2.c b/contrib/libs/cxxsupp/builtins/ctzdi2.c
index 6474996b5f..db3c6fdc08 100644
--- a/contrib/libs/cxxsupp/builtins/ctzdi2.c
+++ b/contrib/libs/cxxsupp/builtins/ctzdi2.c
@@ -1,29 +1,29 @@
-/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ctzdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: the number of trailing 0-bits  */ 
- 
-/* Precondition: a != 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__ctzdi2(di_int a) 
-{ 
-    dwords x; 
-    x.all = a; 
-    const si_int f = -(x.s.low == 0); 
-    return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + 
-              (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); 
-} 
+/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of trailing 0-bits  */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzdi2(di_int a)
+{
+    dwords x;
+    x.all = a;
+    const si_int f = -(x.s.low == 0);
+    return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) +
+              (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/contrib/libs/cxxsupp/builtins/ctzsi2.c b/contrib/libs/cxxsupp/builtins/ctzsi2.c
index bc31f8d405..c69486ea44 100644
--- a/contrib/libs/cxxsupp/builtins/ctzsi2.c
+++ b/contrib/libs/cxxsupp/builtins/ctzsi2.c
@@ -1,57 +1,57 @@
-/* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ctzsi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: the number of trailing 0-bits */ 
- 
-/* Precondition: a != 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__ctzsi2(si_int a) 
-{ 
-    su_int x = (su_int)a; 
-    si_int t = ((x & 0x0000FFFF) == 0) << 4;  /* if (x has no small bits) t = 16 else 0 */ 
-    x >>= t;           /* x = [0 - 0xFFFF] + higher garbage bits */ 
-    su_int r = t;       /* r = [0, 16]  */ 
-    /* return r + ctz(x) */ 
-    t = ((x & 0x00FF) == 0) << 3; 
-    x >>= t;           /* x = [0 - 0xFF] + higher garbage bits */ 
-    r += t;            /* r = [0, 8, 16, 24] */ 
-    /* return r + ctz(x) */ 
-    t = ((x & 0x0F) == 0) << 2; 
-    x >>= t;           /* x = [0 - 0xF] + higher garbage bits */ 
-    r += t;            /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ 
-    /* return r + ctz(x) */ 
-    t = ((x & 0x3) == 0) << 1; 
-    x >>= t; 
-    x &= 3;            /* x = [0 - 3] */ 
-    r += t;            /* r = [0 - 30] and is even */ 
-    /* return r + ctz(x) */ 
- 
-/*  The branch-less return statement below is equivalent 
- *  to the following switch statement: 
- *     switch (x) 
- *    { 
- *     case 0: 
- *         return r + 2; 
- *     case 2: 
- *         return r + 1; 
- *     case 1: 
- *     case 3: 
- *         return r; 
- *     } 
- */ 
-    return r + ((2 - (x >> 1)) & -((x & 1) == 0)); 
-} 
+/* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of trailing 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzsi2(si_int a)
+{
+    su_int x = (su_int)a;
+    si_int t = ((x & 0x0000FFFF) == 0) << 4;  /* if (x has no small bits) t = 16 else 0 */
+    x >>= t;           /* x = [0 - 0xFFFF] + higher garbage bits */
+    su_int r = t;       /* r = [0, 16]  */
+    /* return r + ctz(x) */
+    t = ((x & 0x00FF) == 0) << 3;
+    x >>= t;           /* x = [0 - 0xFF] + higher garbage bits */
+    r += t;            /* r = [0, 8, 16, 24] */
+    /* return r + ctz(x) */
+    t = ((x & 0x0F) == 0) << 2;
+    x >>= t;           /* x = [0 - 0xF] + higher garbage bits */
+    r += t;            /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
+    /* return r + ctz(x) */
+    t = ((x & 0x3) == 0) << 1;
+    x >>= t;
+    x &= 3;            /* x = [0 - 3] */
+    r += t;            /* r = [0 - 30] and is even */
+    /* return r + ctz(x) */
+
+/*  The branch-less return statement below is equivalent
+ *  to the following switch statement:
+ *     switch (x)
+ *    {
+ *     case 0:
+ *         return r + 2;
+ *     case 2:
+ *         return r + 1;
+ *     case 1:
+ *     case 3:
+ *         return r;
+ *     }
+ */
+    return r + ((2 - (x >> 1)) & -((x & 1) == 0));
+}
diff --git a/contrib/libs/cxxsupp/builtins/ctzti2.c b/contrib/libs/cxxsupp/builtins/ctzti2.c
index b9f4f16123..45de682700 100644
--- a/contrib/libs/cxxsupp/builtins/ctzti2.c
+++ b/contrib/libs/cxxsupp/builtins/ctzti2.c
@@ -1,33 +1,33 @@
-/* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ctzti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: the number of trailing 0-bits */ 
- 
-/* Precondition: a != 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__ctzti2(ti_int a) 
-{ 
-    twords x; 
-    x.all = a; 
-    const di_int f = -(x.s.low == 0); 
-    return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + 
-              ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: the number of trailing 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzti2(ti_int a)
+{
+    twords x;
+    x.all = a;
+    const di_int f = -(x.s.low == 0);
+    return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
+              ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/divdc3.c b/contrib/libs/cxxsupp/builtins/divdc3.c
index 57f4ce4c3c..3c88390b5e 100644
--- a/contrib/libs/cxxsupp/builtins/divdc3.c
+++ b/contrib/libs/cxxsupp/builtins/divdc3.c
@@ -1,60 +1,60 @@
-/* ===-- divdc3.c - Implement __divdc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divdc3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the quotient of (a + ib) / (c + id) */ 
- 
-COMPILER_RT_ABI Dcomplex 
-__divdc3(double __a, double __b, double __c, double __d) 
-{ 
-    int __ilogbw = 0; 
-    double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); 
-    if (crt_isfinite(__logbw)) 
-    { 
-        __ilogbw = (int)__logbw; 
-        __c = crt_scalbn(__c, -__ilogbw); 
-        __d = crt_scalbn(__d, -__ilogbw); 
-    } 
-    double __denom = __c * __c + __d * __d; 
-    Dcomplex z; 
-    COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); 
-    COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); 
-    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) 
-    { 
-        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) 
-        { 
-            COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; 
-            COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; 
-        } 
-        else if ((crt_isinf(__a) || crt_isinf(__b)) && 
-                 crt_isfinite(__c) && crt_isfinite(__d)) 
-        { 
-            __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); 
-            __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); 
-            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); 
-            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); 
-        } 
-        else if (crt_isinf(__logbw) && __logbw > 0.0 && 
-                 crt_isfinite(__a) && crt_isfinite(__b)) 
-        { 
-            __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); 
-            __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); 
-            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); 
-            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); 
-        } 
-    } 
-    return z; 
-} 
+/* ===-- divdc3.c - Implement __divdc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divdc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Dcomplex
+__divdc3(double __a, double __b, double __c, double __d)
+{
+    int __ilogbw = 0;
+    double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d)));
+    if (crt_isfinite(__logbw))
+    {
+        __ilogbw = (int)__logbw;
+        __c = crt_scalbn(__c, -__ilogbw);
+        __d = crt_scalbn(__d, -__ilogbw);
+    }
+    double __denom = __c * __c + __d * __d;
+    Dcomplex z;
+    COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+    {
+        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+        {
+            COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b;
+        }
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
+        {
+            __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
+            __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+        }
+        else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
+        {
+            __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
+            __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
+            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
+        }
+    }
+    return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/divdf3.c b/contrib/libs/cxxsupp/builtins/divdf3.c
index 21fb6d366b..ab44c2b25f 100644
--- a/contrib/libs/cxxsupp/builtins/divdf3.c
+++ b/contrib/libs/cxxsupp/builtins/divdf3.c
@@ -1,185 +1,185 @@
-//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements double-precision soft-float division 
-// with the IEEE-754 default rounding (to nearest, ties to even). 
-// 
-// For simplicity, this implementation currently flushes denormals to zero. 
-// It should be a fairly straightforward exercise to implement gradual 
-// underflow with correct rounding. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(ddiv, divdf3) 
- 
-COMPILER_RT_ABI fp_t 
-__divdf3(fp_t a, fp_t b) { 
-     
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; 
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; 
-    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; 
-     
-    rep_t aSignificand = toRep(a) & significandMask; 
-    rep_t bSignificand = toRep(b) & significandMask; 
-    int scale = 0; 
-     
-    // Detect if a or b is zero, denormal, infinity, or NaN. 
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { 
-         
-        const rep_t aAbs = toRep(a) & absMask; 
-        const rep_t bAbs = toRep(b) & absMask; 
-         
-        // NaN / anything = qNaN 
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit); 
-        // anything / NaN = qNaN 
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit); 
-         
-        if (aAbs == infRep) { 
-            // infinity / infinity = NaN 
-            if (bAbs == infRep) return fromRep(qnanRep); 
-            // infinity / anything else = +/- infinity 
-            else return fromRep(aAbs | quotientSign); 
-        } 
-         
-        // anything else / infinity = +/- 0 
-        if (bAbs == infRep) return fromRep(quotientSign); 
-         
-        if (!aAbs) { 
-            // zero / zero = NaN 
-            if (!bAbs) return fromRep(qnanRep); 
-            // zero / anything else = +/- zero 
-            else return fromRep(quotientSign); 
-        } 
-        // anything else / zero = +/- infinity 
-        if (!bAbs) return fromRep(infRep | quotientSign); 
-         
-        // one or both of a or b is denormal, the other (if applicable) is a 
-        // normal number.  Renormalize one or both of a and b, and set scale to 
-        // include the necessary exponent adjustment. 
-        if (aAbs < implicitBit) scale += normalize(&aSignificand); 
-        if (bAbs < implicitBit) scale -= normalize(&bSignificand); 
-    } 
-     
-    // Or in the implicit significand bit.  (If we fell through from the 
-    // denormal path it was already set by normalize( ), but setting it twice 
-    // won't hurt anything.) 
-    aSignificand |= implicitBit; 
-    bSignificand |= implicitBit; 
-    int quotientExponent = aExponent - bExponent + scale; 
-     
-    // Align the significand of b as a Q31 fixed-point number in the range 
-    // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax 
-    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This 
-    // is accurate to about 3.5 binary digits. 
-    const uint32_t q31b = bSignificand >> 21; 
-    uint32_t recip32 = UINT32_C(0x7504f333) - q31b; 
-     
-    // Now refine the reciprocal estimate using a Newton-Raphson iteration: 
+//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(ddiv, divdf3)
+
+COMPILER_RT_ABI fp_t
+__divdf3(fp_t a, fp_t b) {
+    
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+    
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+    
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+        
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+        
+        // NaN / anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything / NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+        
+        if (aAbs == infRep) {
+            // infinity / infinity = NaN
+            if (bAbs == infRep) return fromRep(qnanRep);
+            // infinity / anything else = +/- infinity
+            else return fromRep(aAbs | quotientSign);
+        }
+        
+        // anything else / infinity = +/- 0
+        if (bAbs == infRep) return fromRep(quotientSign);
+        
+        if (!aAbs) {
+            // zero / zero = NaN
+            if (!bAbs) return fromRep(qnanRep);
+            // zero / anything else = +/- zero
+            else return fromRep(quotientSign);
+        }
+        // anything else / zero = +/- infinity
+        if (!bAbs) return fromRep(infRep | quotientSign);
+        
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+    }
+    
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+    int quotientExponent = aExponent - bExponent + scale;
+    
+    // Align the significand of b as a Q31 fixed-point number in the range
+    // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
+    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
+    // is accurate to about 3.5 binary digits.
+    const uint32_t q31b = bSignificand >> 21;
+    uint32_t recip32 = UINT32_C(0x7504f333) - q31b;
+    
+    // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+    //
+    //     x1 = x0 * (2 - x0 * b)
+    //
+    // This doubles the number of correct binary digits in the approximation
+    // with each iteration, so after three iterations, we have about 28 binary
+    // digits of accuracy.
+    uint32_t correction32;
+    correction32 = -((uint64_t)recip32 * q31b >> 32);
+    recip32 = (uint64_t)recip32 * correction32 >> 31;
+    correction32 = -((uint64_t)recip32 * q31b >> 32);
+    recip32 = (uint64_t)recip32 * correction32 >> 31;
+    correction32 = -((uint64_t)recip32 * q31b >> 32);
+    recip32 = (uint64_t)recip32 * correction32 >> 31;
+    
+    // recip32 might have overflowed to exactly zero in the preceding
+    // computation if the high word of b is exactly 1.0.  This would sabotage
+    // the full-width final stage of the computation that follows, so we adjust
+    // recip32 downward by one bit.
+    recip32--;
+    
+    // We need to perform one more iteration to get us to 56 binary digits;
+    // The last iteration needs to happen with extra precision.
+    const uint32_t q63blo = bSignificand << 11;
+    uint64_t correction, reciprocal;
+    correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32));
+    uint32_t cHi = correction >> 32;
+    uint32_t cLo = correction;
+    reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32);
+    
+    // We already adjusted the 32-bit estimate, now we need to adjust the final
+    // 64-bit reciprocal estimate downward to ensure that it is strictly smaller
+    // than the infinitely precise exact reciprocal.  Because the computation
+    // of the Newton-Raphson step is truncating at every step, this adjustment
+    // is small; most of the work is already done.
+    reciprocal -= 2;
+    
+    // The numerical reciprocal is accurate to within 2^-56, lies in the
+    // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
+    // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
+    // in Q53 with the following properties:
+    //
+    //    1. q < a/b
+    //    2. q is in the interval [0.5, 2.0)
+    //    3. the error in q is bounded away from 2^-53 (actually, we have a
+    //       couple of bits to spare, but this is all we need).
+    
+    // We need a 64 x 64 multiply high to compute q, which isn't a basic
+    // operation in C, so we need to be a little bit fussy.
+    rep_t quotient, quotientLo;
+    wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
+    
+    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+    // In either case, we are going to compute a residual of the form
+    //
+    //     r = a - q*b
+    //
+    // We know from the construction of q that r satisfies:
+    //
+    //     0 <= r < ulp(q)*b
     // 
-    //     x1 = x0 * (2 - x0 * b) 
-    // 
-    // This doubles the number of correct binary digits in the approximation 
-    // with each iteration, so after three iterations, we have about 28 binary 
-    // digits of accuracy. 
-    uint32_t correction32; 
-    correction32 = -((uint64_t)recip32 * q31b >> 32); 
-    recip32 = (uint64_t)recip32 * correction32 >> 31; 
-    correction32 = -((uint64_t)recip32 * q31b >> 32); 
-    recip32 = (uint64_t)recip32 * correction32 >> 31; 
-    correction32 = -((uint64_t)recip32 * q31b >> 32); 
-    recip32 = (uint64_t)recip32 * correction32 >> 31; 
-     
-    // recip32 might have overflowed to exactly zero in the preceding 
-    // computation if the high word of b is exactly 1.0.  This would sabotage 
-    // the full-width final stage of the computation that follows, so we adjust 
-    // recip32 downward by one bit. 
-    recip32--; 
-     
-    // We need to perform one more iteration to get us to 56 binary digits; 
-    // The last iteration needs to happen with extra precision. 
-    const uint32_t q63blo = bSignificand << 11; 
-    uint64_t correction, reciprocal; 
-    correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32)); 
-    uint32_t cHi = correction >> 32; 
-    uint32_t cLo = correction; 
-    reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); 
-     
-    // We already adjusted the 32-bit estimate, now we need to adjust the final 
-    // 64-bit reciprocal estimate downward to ensure that it is strictly smaller 
-    // than the infinitely precise exact reciprocal.  Because the computation 
-    // of the Newton-Raphson step is truncating at every step, this adjustment 
-    // is small; most of the work is already done. 
-    reciprocal -= 2; 
-     
-    // The numerical reciprocal is accurate to within 2^-56, lies in the 
-    // interval [0.5, 1.0), and is strictly smaller than the true reciprocal 
-    // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b 
-    // in Q53 with the following properties: 
-    // 
-    //    1. q < a/b 
-    //    2. q is in the interval [0.5, 2.0) 
-    //    3. the error in q is bounded away from 2^-53 (actually, we have a 
-    //       couple of bits to spare, but this is all we need). 
-     
-    // We need a 64 x 64 multiply high to compute q, which isn't a basic 
-    // operation in C, so we need to be a little bit fussy. 
-    rep_t quotient, quotientLo; 
-    wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo); 
-     
-    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). 
-    // In either case, we are going to compute a residual of the form 
-    // 
-    //     r = a - q*b 
-    // 
-    // We know from the construction of q that r satisfies: 
-    // 
-    //     0 <= r < ulp(q)*b 
-    //  
-    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we 
-    // already have the correct result.  The exact halfway case cannot occur. 
-    // We also take this time to right shift quotient if it falls in the [1,2) 
-    // range and adjust the exponent accordingly. 
-    rep_t residual; 
-    if (quotient < (implicitBit << 1)) { 
-        residual = (aSignificand << 53) - quotient * bSignificand; 
-        quotientExponent--; 
-    } else { 
-        quotient >>= 1; 
-        residual = (aSignificand << 52) - quotient * bSignificand; 
-    } 
-     
-    const int writtenExponent = quotientExponent + exponentBias; 
-     
-    if (writtenExponent >= maxExponent) { 
-        // If we have overflowed the exponent, return infinity. 
-        return fromRep(infRep | quotientSign); 
-    } 
-     
-    else if (writtenExponent < 1) { 
-        // Flush denormals to zero.  In the future, it would be nice to add 
-        // code to round them correctly. 
-        return fromRep(quotientSign); 
-    } 
-     
-    else { 
-        const bool round = (residual << 1) > bSignificand; 
-        // Clear the implicit bit 
-        rep_t absResult = quotient & significandMask; 
-        // Insert the exponent 
-        absResult |= (rep_t)writtenExponent << significandBits; 
-        // Round 
-        absResult += round; 
-        // Insert the sign and return 
-        const double result = fromRep(absResult | quotientSign); 
-        return result; 
-    } 
-} 
+    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
+    // already have the correct result.  The exact halfway case cannot occur.
+    // We also take this time to right shift quotient if it falls in the [1,2)
+    // range and adjust the exponent accordingly.
+    rep_t residual;
+    if (quotient < (implicitBit << 1)) {
+        residual = (aSignificand << 53) - quotient * bSignificand;
+        quotientExponent--;
+    } else {
+        quotient >>= 1;
+        residual = (aSignificand << 52) - quotient * bSignificand;
+    }
+    
+    const int writtenExponent = quotientExponent + exponentBias;
+    
+    if (writtenExponent >= maxExponent) {
+        // If we have overflowed the exponent, return infinity.
+        return fromRep(infRep | quotientSign);
+    }
+    
+    else if (writtenExponent < 1) {
+        // Flush denormals to zero.  In the future, it would be nice to add
+        // code to round them correctly.
+        return fromRep(quotientSign);
+    }
+    
+    else {
+        const bool round = (residual << 1) > bSignificand;
+        // Clear the implicit bit
+        rep_t absResult = quotient & significandMask;
+        // Insert the exponent
+        absResult |= (rep_t)writtenExponent << significandBits;
+        // Round
+        absResult += round;
+        // Insert the sign and return
+        const double result = fromRep(absResult | quotientSign);
+        return result;
+    }
+}
diff --git a/contrib/libs/cxxsupp/builtins/divdi3.c b/contrib/libs/cxxsupp/builtins/divdi3.c
index d757ce5f22..b8eebcb204 100644
--- a/contrib/libs/cxxsupp/builtins/divdi3.c
+++ b/contrib/libs/cxxsupp/builtins/divdi3.c
@@ -1,29 +1,29 @@
-/* ===-- divdi3.c - Implement __divdi3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b */ 
- 
-COMPILER_RT_ABI di_int 
-__divdi3(di_int a, di_int b) 
-{ 
-    const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; 
-    di_int s_a = a >> bits_in_dword_m1;           /* s_a = a < 0 ? -1 : 0 */ 
-    di_int s_b = b >> bits_in_dword_m1;           /* s_b = b < 0 ? -1 : 0 */ 
-    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */ 
-    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */ 
-    s_a ^= s_b;                                  /*sign of quotient */ 
-    return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */ 
-} 
+/* ===-- divdi3.c - Implement __divdi3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI di_int
+__divdi3(di_int a, di_int b)
+{
+    const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
+    di_int s_a = a >> bits_in_dword_m1;           /* s_a = a < 0 ? -1 : 0 */
+    di_int s_b = b >> bits_in_dword_m1;           /* s_b = b < 0 ? -1 : 0 */
+    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */
+    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */
+    s_a ^= s_b;                                  /*sign of quotient */
+    return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */
+}
diff --git a/contrib/libs/cxxsupp/builtins/divmoddi4.c b/contrib/libs/cxxsupp/builtins/divmoddi4.c
index f5ae0f5d13..0d4df67a63 100644
--- a/contrib/libs/cxxsupp/builtins/divmoddi4.c
+++ b/contrib/libs/cxxsupp/builtins/divmoddi4.c
@@ -1,25 +1,25 @@
-/*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divmoddi4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b, *rem = a % b  */ 
- 
-COMPILER_RT_ABI di_int 
-__divmoddi4(di_int a, di_int b, di_int* rem) 
-{ 
-  di_int d = __divdi3(a,b); 
-  *rem = a - (d*b); 
-  return d; 
-} 
+/*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divmoddi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b, *rem = a % b  */
+
+COMPILER_RT_ABI di_int
+__divmoddi4(di_int a, di_int b, di_int* rem)
+{
+  di_int d = __divdi3(a,b);
+  *rem = a - (d*b);
+  return d;
+}
diff --git a/contrib/libs/cxxsupp/builtins/divmodsi4.c b/contrib/libs/cxxsupp/builtins/divmodsi4.c
index 8efe156707..dabe287439 100644
--- a/contrib/libs/cxxsupp/builtins/divmodsi4.c
+++ b/contrib/libs/cxxsupp/builtins/divmodsi4.c
@@ -1,27 +1,27 @@
-/*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divmodsi4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b, *rem = a % b  */ 
- 
-COMPILER_RT_ABI si_int 
-__divmodsi4(si_int a, si_int b, si_int* rem) 
-{ 
-  si_int d = __divsi3(a,b); 
-  *rem = a - (d*b); 
-  return d;  
-} 
- 
- 
+/*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divmodsi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b, *rem = a % b  */
+
+COMPILER_RT_ABI si_int
+__divmodsi4(si_int a, si_int b, si_int* rem)
+{
+  si_int d = __divsi3(a,b);
+  *rem = a - (d*b);
+  return d; 
+}
+
+
diff --git a/contrib/libs/cxxsupp/builtins/divsc3.c b/contrib/libs/cxxsupp/builtins/divsc3.c
index 933a354744..42a48315e6 100644
--- a/contrib/libs/cxxsupp/builtins/divsc3.c
+++ b/contrib/libs/cxxsupp/builtins/divsc3.c
@@ -1,60 +1,60 @@
-/*===-- divsc3.c - Implement __divsc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divsc3 for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the quotient of (a + ib) / (c + id) */ 
- 
-COMPILER_RT_ABI Fcomplex 
-__divsc3(float __a, float __b, float __c, float __d) 
-{ 
-    int __ilogbw = 0; 
-    float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); 
-    if (crt_isfinite(__logbw)) 
-    { 
-        __ilogbw = (int)__logbw; 
-        __c = crt_scalbnf(__c, -__ilogbw); 
-        __d = crt_scalbnf(__d, -__ilogbw); 
-    } 
-    float __denom = __c * __c + __d * __d; 
-    Fcomplex z; 
-    COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); 
-    COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); 
-    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) 
-    { 
-        if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) 
-        { 
-            COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; 
-            COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; 
-        } 
-        else if ((crt_isinf(__a) || crt_isinf(__b)) && 
-                 crt_isfinite(__c) && crt_isfinite(__d)) 
-        { 
-            __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); 
-            __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); 
-            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); 
-            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); 
-        } 
-        else if (crt_isinf(__logbw) && __logbw > 0 && 
-                 crt_isfinite(__a) && crt_isfinite(__b)) 
-        { 
-            __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); 
-            __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); 
-            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); 
-            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); 
-        } 
-    } 
-    return z; 
-} 
+/*===-- divsc3.c - Implement __divsc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divsc3 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Fcomplex
+__divsc3(float __a, float __b, float __c, float __d)
+{
+    int __ilogbw = 0;
+    float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
+    if (crt_isfinite(__logbw))
+    {
+        __ilogbw = (int)__logbw;
+        __c = crt_scalbnf(__c, -__ilogbw);
+        __d = crt_scalbnf(__d, -__ilogbw);
+    }
+    float __denom = __c * __c + __d * __d;
+    Fcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+    {
+        if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+        {
+            COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b;
+        }
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
+        {
+            __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+        }
+        else if (crt_isinf(__logbw) && __logbw > 0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
+        {
+            __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
+            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
+        }
+    }
+    return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/divsf3.c b/contrib/libs/cxxsupp/builtins/divsf3.c
index d88b3048c7..de2e376125 100644
--- a/contrib/libs/cxxsupp/builtins/divsf3.c
+++ b/contrib/libs/cxxsupp/builtins/divsf3.c
@@ -1,169 +1,169 @@
-//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements single-precision soft-float division 
-// with the IEEE-754 default rounding (to nearest, ties to even). 
-// 
-// For simplicity, this implementation currently flushes denormals to zero. 
-// It should be a fairly straightforward exercise to implement gradual 
-// underflow with correct rounding. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(fdiv, divsf3) 
- 
-COMPILER_RT_ABI fp_t 
-__divsf3(fp_t a, fp_t b) { 
-     
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; 
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; 
-    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; 
-     
-    rep_t aSignificand = toRep(a) & significandMask; 
-    rep_t bSignificand = toRep(b) & significandMask; 
-    int scale = 0; 
-     
-    // Detect if a or b is zero, denormal, infinity, or NaN. 
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { 
-         
-        const rep_t aAbs = toRep(a) & absMask; 
-        const rep_t bAbs = toRep(b) & absMask; 
-         
-        // NaN / anything = qNaN 
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit); 
-        // anything / NaN = qNaN 
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit); 
-         
-        if (aAbs == infRep) { 
-            // infinity / infinity = NaN 
-            if (bAbs == infRep) return fromRep(qnanRep); 
-            // infinity / anything else = +/- infinity 
-            else return fromRep(aAbs | quotientSign); 
-        } 
-         
-        // anything else / infinity = +/- 0 
-        if (bAbs == infRep) return fromRep(quotientSign); 
-         
-        if (!aAbs) { 
-            // zero / zero = NaN 
-            if (!bAbs) return fromRep(qnanRep); 
-            // zero / anything else = +/- zero 
-            else return fromRep(quotientSign); 
-        } 
-        // anything else / zero = +/- infinity 
-        if (!bAbs) return fromRep(infRep | quotientSign); 
-         
-        // one or both of a or b is denormal, the other (if applicable) is a 
-        // normal number.  Renormalize one or both of a and b, and set scale to 
-        // include the necessary exponent adjustment. 
-        if (aAbs < implicitBit) scale += normalize(&aSignificand); 
-        if (bAbs < implicitBit) scale -= normalize(&bSignificand); 
-    } 
-     
-    // Or in the implicit significand bit.  (If we fell through from the 
-    // denormal path it was already set by normalize( ), but setting it twice 
-    // won't hurt anything.) 
-    aSignificand |= implicitBit; 
-    bSignificand |= implicitBit; 
-    int quotientExponent = aExponent - bExponent + scale; 
-     
-    // Align the significand of b as a Q31 fixed-point number in the range 
-    // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax 
-    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This 
-    // is accurate to about 3.5 binary digits. 
-    uint32_t q31b = bSignificand << 8; 
-    uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; 
-     
-    // Now refine the reciprocal estimate using a Newton-Raphson iteration: 
+//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(fdiv, divsf3)
+
+COMPILER_RT_ABI fp_t
+__divsf3(fp_t a, fp_t b) {
+    
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+    
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+    
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+        
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+        
+        // NaN / anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything / NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+        
+        if (aAbs == infRep) {
+            // infinity / infinity = NaN
+            if (bAbs == infRep) return fromRep(qnanRep);
+            // infinity / anything else = +/- infinity
+            else return fromRep(aAbs | quotientSign);
+        }
+        
+        // anything else / infinity = +/- 0
+        if (bAbs == infRep) return fromRep(quotientSign);
+        
+        if (!aAbs) {
+            // zero / zero = NaN
+            if (!bAbs) return fromRep(qnanRep);
+            // zero / anything else = +/- zero
+            else return fromRep(quotientSign);
+        }
+        // anything else / zero = +/- infinity
+        if (!bAbs) return fromRep(infRep | quotientSign);
+        
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+    }
+    
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+    int quotientExponent = aExponent - bExponent + scale;
+    
+    // Align the significand of b as a Q31 fixed-point number in the range
+    // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
+    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
+    // is accurate to about 3.5 binary digits.
+    uint32_t q31b = bSignificand << 8;
+    uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;
+    
+    // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+    //
+    //     x1 = x0 * (2 - x0 * b)
+    //
+    // This doubles the number of correct binary digits in the approximation
+    // with each iteration, so after three iterations, we have about 28 binary
+    // digits of accuracy.
+    uint32_t correction;
+    correction = -((uint64_t)reciprocal * q31b >> 32);
+    reciprocal = (uint64_t)reciprocal * correction >> 31;
+    correction = -((uint64_t)reciprocal * q31b >> 32);
+    reciprocal = (uint64_t)reciprocal * correction >> 31;
+    correction = -((uint64_t)reciprocal * q31b >> 32);
+    reciprocal = (uint64_t)reciprocal * correction >> 31;
+    
+    // Exhaustive testing shows that the error in reciprocal after three steps
+    // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
+    // expectations.  We bump the reciprocal by a tiny value to force the error
+    // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
+    // be specific).  This also causes 1/1 to give a sensible approximation
+    // instead of zero (due to overflow).
+    reciprocal -= 2;
+    
+    // The numerical reciprocal is accurate to within 2^-28, lies in the
+    // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
+    // than the true reciprocal of b.  Multiplying a by this reciprocal thus
+    // gives a numerical q = a/b in Q24 with the following properties:
+    //
+    //    1. q < a/b
+    //    2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
+    //    3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
+    //       from the fact that we truncate the product, and the 2^27 term
+    //       is the error in the reciprocal of b scaled by the maximum
+    //       possible value of a.  As a consequence of this error bound,
+    //       either q or nextafter(q) is the correctly rounded 
+    rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32;
+    
+    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+    // In either case, we are going to compute a residual of the form
+    //
+    //     r = a - q*b
+    //
+    // We know from the construction of q that r satisfies:
+    //
+    //     0 <= r < ulp(q)*b
     // 
-    //     x1 = x0 * (2 - x0 * b) 
-    // 
-    // This doubles the number of correct binary digits in the approximation 
-    // with each iteration, so after three iterations, we have about 28 binary 
-    // digits of accuracy. 
-    uint32_t correction; 
-    correction = -((uint64_t)reciprocal * q31b >> 32); 
-    reciprocal = (uint64_t)reciprocal * correction >> 31; 
-    correction = -((uint64_t)reciprocal * q31b >> 32); 
-    reciprocal = (uint64_t)reciprocal * correction >> 31; 
-    correction = -((uint64_t)reciprocal * q31b >> 32); 
-    reciprocal = (uint64_t)reciprocal * correction >> 31; 
-     
-    // Exhaustive testing shows that the error in reciprocal after three steps 
-    // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our 
-    // expectations.  We bump the reciprocal by a tiny value to force the error 
-    // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to 
-    // be specific).  This also causes 1/1 to give a sensible approximation 
-    // instead of zero (due to overflow). 
-    reciprocal -= 2; 
-     
-    // The numerical reciprocal is accurate to within 2^-28, lies in the 
-    // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller 
-    // than the true reciprocal of b.  Multiplying a by this reciprocal thus 
-    // gives a numerical q = a/b in Q24 with the following properties: 
-    // 
-    //    1. q < a/b 
-    //    2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) 
-    //    3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes 
-    //       from the fact that we truncate the product, and the 2^27 term 
-    //       is the error in the reciprocal of b scaled by the maximum 
-    //       possible value of a.  As a consequence of this error bound, 
-    //       either q or nextafter(q) is the correctly rounded  
-    rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; 
-     
-    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). 
-    // In either case, we are going to compute a residual of the form 
-    // 
-    //     r = a - q*b 
-    // 
-    // We know from the construction of q that r satisfies: 
-    // 
-    //     0 <= r < ulp(q)*b 
-    //  
-    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we 
-    // already have the correct result.  The exact halfway case cannot occur. 
-    // We also take this time to right shift quotient if it falls in the [1,2) 
-    // range and adjust the exponent accordingly. 
-    rep_t residual; 
-    if (quotient < (implicitBit << 1)) { 
-        residual = (aSignificand << 24) - quotient * bSignificand; 
-        quotientExponent--; 
-    } else { 
-        quotient >>= 1; 
-        residual = (aSignificand << 23) - quotient * bSignificand; 
-    } 
- 
-    const int writtenExponent = quotientExponent + exponentBias; 
-     
-    if (writtenExponent >= maxExponent) { 
-        // If we have overflowed the exponent, return infinity. 
-        return fromRep(infRep | quotientSign); 
-    } 
-     
-    else if (writtenExponent < 1) { 
-        // Flush denormals to zero.  In the future, it would be nice to add 
-        // code to round them correctly. 
-        return fromRep(quotientSign); 
-    } 
-     
-    else { 
-        const bool round = (residual << 1) > bSignificand; 
-        // Clear the implicit bit 
-        rep_t absResult = quotient & significandMask; 
-        // Insert the exponent 
-        absResult |= (rep_t)writtenExponent << significandBits; 
-        // Round 
-        absResult += round; 
-        // Insert the sign and return 
-        return fromRep(absResult | quotientSign); 
-    } 
-} 
+    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
+    // already have the correct result.  The exact halfway case cannot occur.
+    // We also take this time to right shift quotient if it falls in the [1,2)
+    // range and adjust the exponent accordingly.
+    rep_t residual;
+    if (quotient < (implicitBit << 1)) {
+        residual = (aSignificand << 24) - quotient * bSignificand;
+        quotientExponent--;
+    } else {
+        quotient >>= 1;
+        residual = (aSignificand << 23) - quotient * bSignificand;
+    }
+
+    const int writtenExponent = quotientExponent + exponentBias;
+    
+    if (writtenExponent >= maxExponent) {
+        // If we have overflowed the exponent, return infinity.
+        return fromRep(infRep | quotientSign);
+    }
+    
+    else if (writtenExponent < 1) {
+        // Flush denormals to zero.  In the future, it would be nice to add
+        // code to round them correctly.
+        return fromRep(quotientSign);
+    }
+    
+    else {
+        const bool round = (residual << 1) > bSignificand;
+        // Clear the implicit bit
+        rep_t absResult = quotient & significandMask;
+        // Insert the exponent
+        absResult |= (rep_t)writtenExponent << significandBits;
+        // Round
+        absResult += round;
+        // Insert the sign and return
+        return fromRep(absResult | quotientSign);
+    }
+}
diff --git a/contrib/libs/cxxsupp/builtins/divsi3.c b/contrib/libs/cxxsupp/builtins/divsi3.c
index dd8c6fb891..bab4aefda3 100644
--- a/contrib/libs/cxxsupp/builtins/divsi3.c
+++ b/contrib/libs/cxxsupp/builtins/divsi3.c
@@ -1,37 +1,37 @@
-/* ===-- divsi3.c - Implement __divsi3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b */ 
- 
-ARM_EABI_FNALIAS(idiv, divsi3) 
- 
-COMPILER_RT_ABI si_int 
-__divsi3(si_int a, si_int b) 
-{ 
-    const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; 
-    si_int s_a = a >> bits_in_word_m1;           /* s_a = a < 0 ? -1 : 0 */ 
-    si_int s_b = b >> bits_in_word_m1;           /* s_b = b < 0 ? -1 : 0 */ 
-    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */ 
-    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */ 
-    s_a ^= s_b;                                  /* sign of quotient */ 
-    /* 
-     * On CPUs without unsigned hardware division support, 
-     *  this calls __udivsi3 (notice the cast to su_int). 
-     * On CPUs with unsigned hardware division support, 
-     *  this uses the unsigned division instruction. 
-     */ 
-    return ((su_int)a/(su_int)b ^ s_a) - s_a;    /* negate if s_a == -1 */ 
-} 
+/* ===-- divsi3.c - Implement __divsi3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+ARM_EABI_FNALIAS(idiv, divsi3)
+
+COMPILER_RT_ABI si_int
+__divsi3(si_int a, si_int b)
+{
+    const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1;
+    si_int s_a = a >> bits_in_word_m1;           /* s_a = a < 0 ? -1 : 0 */
+    si_int s_b = b >> bits_in_word_m1;           /* s_b = b < 0 ? -1 : 0 */
+    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */
+    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */
+    s_a ^= s_b;                                  /* sign of quotient */
+    /*
+     * On CPUs without unsigned hardware division support,
+     *  this calls __udivsi3 (notice the cast to su_int).
+     * On CPUs with unsigned hardware division support,
+     *  this uses the unsigned division instruction.
+     */
+    return ((su_int)a/(su_int)b ^ s_a) - s_a;    /* negate if s_a == -1 */
+}
diff --git a/contrib/libs/cxxsupp/builtins/divtc3.c b/contrib/libs/cxxsupp/builtins/divtc3.c
index 72581de5ea..04693df471 100644
--- a/contrib/libs/cxxsupp/builtins/divtc3.c
+++ b/contrib/libs/cxxsupp/builtins/divtc3.c
@@ -1,60 +1,60 @@
-/*===-- divtc3.c - Implement __divtc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divtc3 for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the quotient of (a + ib) / (c + id) */ 
- 
-COMPILER_RT_ABI long double _Complex 
-__divtc3(long double __a, long double __b, long double __c, long double __d) 
-{ 
-    int __ilogbw = 0; 
-    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); 
-    if (crt_isfinite(__logbw)) 
-    { 
-        __ilogbw = (int)__logbw; 
-        __c = crt_scalbnl(__c, -__ilogbw); 
-        __d = crt_scalbnl(__d, -__ilogbw); 
-    } 
-    long double __denom = __c * __c + __d * __d; 
-    long double _Complex z; 
-    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); 
-    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); 
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) 
-    { 
-        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) 
-        { 
-            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a; 
-            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b; 
-        } 
-        else if ((crt_isinf(__a) || crt_isinf(__b)) && 
-                 crt_isfinite(__c) && crt_isfinite(__d)) 
-        { 
-            __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); 
-            __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); 
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d); 
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d); 
-        } 
-        else if (crt_isinf(__logbw) && __logbw > 0.0 && 
-                 crt_isfinite(__a) && crt_isfinite(__b)) 
-        { 
-            __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); 
-            __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); 
-            __real__ z = 0.0 * (__a * __c + __b * __d); 
-            __imag__ z = 0.0 * (__b * __c - __a * __d); 
-        } 
-    } 
-    return z; 
-} 
+/*===-- divtc3.c - Implement __divtc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divtc3 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI long double _Complex
+__divtc3(long double __a, long double __b, long double __c, long double __d)
+{
+    int __ilogbw = 0;
+    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+    if (crt_isfinite(__logbw))
+    {
+        __ilogbw = (int)__logbw;
+        __c = crt_scalbnl(__c, -__ilogbw);
+        __d = crt_scalbnl(__d, -__ilogbw);
+    }
+    long double __denom = __c * __c + __d * __d;
+    long double _Complex z;
+    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    {
+        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+        {
+            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
+            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+        }
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
+        {
+            __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
+            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
+            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+        }
+        else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
+        {
+            __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
+            __real__ z = 0.0 * (__a * __c + __b * __d);
+            __imag__ z = 0.0 * (__b * __c - __a * __d);
+        }
+    }
+    return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/divtf3.c b/contrib/libs/cxxsupp/builtins/divtf3.c
index 80471b381d..e81dab826b 100644
--- a/contrib/libs/cxxsupp/builtins/divtf3.c
+++ b/contrib/libs/cxxsupp/builtins/divtf3.c
@@ -1,203 +1,203 @@
-//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements quad-precision soft-float division 
-// with the IEEE-754 default rounding (to nearest, ties to even). 
-// 
-// For simplicity, this implementation currently flushes denormals to zero. 
-// It should be a fairly straightforward exercise to implement gradual 
-// underflow with correct rounding. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { 
- 
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; 
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; 
-    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; 
- 
-    rep_t aSignificand = toRep(a) & significandMask; 
-    rep_t bSignificand = toRep(b) & significandMask; 
-    int scale = 0; 
- 
-    // Detect if a or b is zero, denormal, infinity, or NaN. 
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { 
- 
-        const rep_t aAbs = toRep(a) & absMask; 
-        const rep_t bAbs = toRep(b) & absMask; 
- 
-        // NaN / anything = qNaN 
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit); 
-        // anything / NaN = qNaN 
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit); 
- 
-        if (aAbs == infRep) { 
-            // infinity / infinity = NaN 
-            if (bAbs == infRep) return fromRep(qnanRep); 
-            // infinity / anything else = +/- infinity 
-            else return fromRep(aAbs | quotientSign); 
-        } 
- 
-        // anything else / infinity = +/- 0 
-        if (bAbs == infRep) return fromRep(quotientSign); 
- 
-        if (!aAbs) { 
-            // zero / zero = NaN 
-            if (!bAbs) return fromRep(qnanRep); 
-            // zero / anything else = +/- zero 
-            else return fromRep(quotientSign); 
-        } 
-        // anything else / zero = +/- infinity 
-        if (!bAbs) return fromRep(infRep | quotientSign); 
- 
-        // one or both of a or b is denormal, the other (if applicable) is a 
-        // normal number.  Renormalize one or both of a and b, and set scale to 
-        // include the necessary exponent adjustment. 
-        if (aAbs < implicitBit) scale += normalize(&aSignificand); 
-        if (bAbs < implicitBit) scale -= normalize(&bSignificand); 
-    } 
- 
-    // Or in the implicit significand bit.  (If we fell through from the 
-    // denormal path it was already set by normalize( ), but setting it twice 
-    // won't hurt anything.) 
-    aSignificand |= implicitBit; 
-    bSignificand |= implicitBit; 
-    int quotientExponent = aExponent - bExponent + scale; 
- 
-    // Align the significand of b as a Q63 fixed-point number in the range 
-    // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax 
-    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This 
-    // is accurate to about 3.5 binary digits. 
-    const uint64_t q63b = bSignificand >> 49; 
-    uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; 
-    // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) 
- 
-    // Now refine the reciprocal estimate using a Newton-Raphson iteration: 
-    // 
-    //     x1 = x0 * (2 - x0 * b) 
-    // 
-    // This doubles the number of correct binary digits in the approximation 
-    // with each iteration. 
-    uint64_t correction64; 
-    correction64 = -((rep_t)recip64 * q63b >> 64); 
-    recip64 = (rep_t)recip64 * correction64 >> 63; 
-    correction64 = -((rep_t)recip64 * q63b >> 64); 
-    recip64 = (rep_t)recip64 * correction64 >> 63; 
-    correction64 = -((rep_t)recip64 * q63b >> 64); 
-    recip64 = (rep_t)recip64 * correction64 >> 63; 
-    correction64 = -((rep_t)recip64 * q63b >> 64); 
-    recip64 = (rep_t)recip64 * correction64 >> 63; 
-    correction64 = -((rep_t)recip64 * q63b >> 64); 
-    recip64 = (rep_t)recip64 * correction64 >> 63; 
- 
-    // recip64 might have overflowed to exactly zero in the preceeding 
-    // computation if the high word of b is exactly 1.0.  This would sabotage 
-    // the full-width final stage of the computation that follows, so we adjust 
-    // recip64 downward by one bit. 
-    recip64--; 
- 
-    // We need to perform one more iteration to get us to 112 binary digits; 
-    // The last iteration needs to happen with extra precision. 
-    const uint64_t q127blo = bSignificand << 15; 
-    rep_t correction, reciprocal; 
- 
-    // NOTE: This operation is equivalent to __multi3, which is not implemented 
-    //       in some architechure 
-    rep_t r64q63, r64q127, r64cH, r64cL, dummy; 
-    wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); 
-    wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); 
- 
-    correction = -(r64q63 + (r64q127 >> 64)); 
- 
-    uint64_t cHi = correction >> 64; 
-    uint64_t cLo = correction; 
- 
-    wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); 
-    wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); 
- 
-    reciprocal = r64cH + (r64cL >> 64); 
- 
-    // We already adjusted the 64-bit estimate, now we need to adjust the final 
-    // 128-bit reciprocal estimate downward to ensure that it is strictly smaller 
-    // than the infinitely precise exact reciprocal.  Because the computation 
-    // of the Newton-Raphson step is truncating at every step, this adjustment 
-    // is small; most of the work is already done. 
-    reciprocal -= 2; 
- 
-    // The numerical reciprocal is accurate to within 2^-112, lies in the 
-    // interval [0.5, 1.0), and is strictly smaller than the true reciprocal 
-    // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b 
-    // in Q127 with the following properties: 
-    // 
-    //    1. q < a/b 
-    //    2. q is in the interval [0.5, 2.0) 
-    //    3. the error in q is bounded away from 2^-113 (actually, we have a 
-    //       couple of bits to spare, but this is all we need). 
- 
-    // We need a 128 x 128 multiply high to compute q, which isn't a basic 
-    // operation in C, so we need to be a little bit fussy. 
-    rep_t quotient, quotientLo; 
-    wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo); 
- 
-    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). 
-    // In either case, we are going to compute a residual of the form 
-    // 
-    //     r = a - q*b 
-    // 
-    // We know from the construction of q that r satisfies: 
-    // 
-    //     0 <= r < ulp(q)*b 
-    // 
-    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we 
-    // already have the correct result.  The exact halfway case cannot occur. 
-    // We also take this time to right shift quotient if it falls in the [1,2) 
-    // range and adjust the exponent accordingly. 
-    rep_t residual; 
-    rep_t qb; 
- 
-    if (quotient < (implicitBit << 1)) { 
-        wideMultiply(quotient, bSignificand, &dummy, &qb); 
-        residual = (aSignificand << 113) - qb; 
-        quotientExponent--; 
-    } else { 
-        quotient >>= 1; 
-        wideMultiply(quotient, bSignificand, &dummy, &qb); 
-        residual = (aSignificand << 112) - qb; 
-    } 
- 
-    const int writtenExponent = quotientExponent + exponentBias; 
- 
-    if (writtenExponent >= maxExponent) { 
-        // If we have overflowed the exponent, return infinity. 
-        return fromRep(infRep | quotientSign); 
-    } 
-    else if (writtenExponent < 1) { 
-        // Flush denormals to zero.  In the future, it would be nice to add 
-        // code to round them correctly. 
-        return fromRep(quotientSign); 
-    } 
-    else { 
-        const bool round = (residual << 1) >= bSignificand; 
-        // Clear the implicit bit 
-        rep_t absResult = quotient & significandMask; 
-        // Insert the exponent 
-        absResult |= (rep_t)writtenExponent << significandBits; 
-        // Round 
-        absResult += round; 
-        // Insert the sign and return 
-        const long double result = fromRep(absResult | quotientSign); 
-        return result; 
-    } 
-} 
- 
-#endif 
+//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
+
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+
+        // NaN / anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything / NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+        if (aAbs == infRep) {
+            // infinity / infinity = NaN
+            if (bAbs == infRep) return fromRep(qnanRep);
+            // infinity / anything else = +/- infinity
+            else return fromRep(aAbs | quotientSign);
+        }
+
+        // anything else / infinity = +/- 0
+        if (bAbs == infRep) return fromRep(quotientSign);
+
+        if (!aAbs) {
+            // zero / zero = NaN
+            if (!bAbs) return fromRep(qnanRep);
+            // zero / anything else = +/- zero
+            else return fromRep(quotientSign);
+        }
+        // anything else / zero = +/- infinity
+        if (!bAbs) return fromRep(infRep | quotientSign);
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+    int quotientExponent = aExponent - bExponent + scale;
+
+    // Align the significand of b as a Q63 fixed-point number in the range
+    // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
+    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
+    // is accurate to about 3.5 binary digits.
+    const uint64_t q63b = bSignificand >> 49;
+    uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b;
+    // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
+
+    // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+    //
+    //     x1 = x0 * (2 - x0 * b)
+    //
+    // This doubles the number of correct binary digits in the approximation
+    // with each iteration.
+    uint64_t correction64;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+
+    // recip64 might have overflowed to exactly zero in the preceeding
+    // computation if the high word of b is exactly 1.0.  This would sabotage
+    // the full-width final stage of the computation that follows, so we adjust
+    // recip64 downward by one bit.
+    recip64--;
+
+    // We need to perform one more iteration to get us to 112 binary digits;
+    // The last iteration needs to happen with extra precision.
+    const uint64_t q127blo = bSignificand << 15;
+    rep_t correction, reciprocal;
+
+    // NOTE: This operation is equivalent to __multi3, which is not implemented
+    //       in some architechure
+    rep_t r64q63, r64q127, r64cH, r64cL, dummy;
+    wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63);
+    wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127);
+
+    correction = -(r64q63 + (r64q127 >> 64));
+
+    uint64_t cHi = correction >> 64;
+    uint64_t cLo = correction;
+
+    wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH);
+    wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL);
+
+    reciprocal = r64cH + (r64cL >> 64);
+
+    // We already adjusted the 64-bit estimate, now we need to adjust the final
+    // 128-bit reciprocal estimate downward to ensure that it is strictly smaller
+    // than the infinitely precise exact reciprocal.  Because the computation
+    // of the Newton-Raphson step is truncating at every step, this adjustment
+    // is small; most of the work is already done.
+    reciprocal -= 2;
+
+    // The numerical reciprocal is accurate to within 2^-112, lies in the
+    // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
+    // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
+    // in Q127 with the following properties:
+    //
+    //    1. q < a/b
+    //    2. q is in the interval [0.5, 2.0)
+    //    3. the error in q is bounded away from 2^-113 (actually, we have a
+    //       couple of bits to spare, but this is all we need).
+
+    // We need a 128 x 128 multiply high to compute q, which isn't a basic
+    // operation in C, so we need to be a little bit fussy.
+    rep_t quotient, quotientLo;
+    wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
+
+    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+    // In either case, we are going to compute a residual of the form
+    //
+    //     r = a - q*b
+    //
+    // We know from the construction of q that r satisfies:
+    //
+    //     0 <= r < ulp(q)*b
+    //
+    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
+    // already have the correct result.  The exact halfway case cannot occur.
+    // We also take this time to right shift quotient if it falls in the [1,2)
+    // range and adjust the exponent accordingly.
+    rep_t residual;
+    rep_t qb;
+
+    if (quotient < (implicitBit << 1)) {
+        wideMultiply(quotient, bSignificand, &dummy, &qb);
+        residual = (aSignificand << 113) - qb;
+        quotientExponent--;
+    } else {
+        quotient >>= 1;
+        wideMultiply(quotient, bSignificand, &dummy, &qb);
+        residual = (aSignificand << 112) - qb;
+    }
+
+    const int writtenExponent = quotientExponent + exponentBias;
+
+    if (writtenExponent >= maxExponent) {
+        // If we have overflowed the exponent, return infinity.
+        return fromRep(infRep | quotientSign);
+    }
+    else if (writtenExponent < 1) {
+        // Flush denormals to zero.  In the future, it would be nice to add
+        // code to round them correctly.
+        return fromRep(quotientSign);
+    }
+    else {
+        const bool round = (residual << 1) >= bSignificand;
+        // Clear the implicit bit
+        rep_t absResult = quotient & significandMask;
+        // Insert the exponent
+        absResult |= (rep_t)writtenExponent << significandBits;
+        // Round
+        absResult += round;
+        // Insert the sign and return
+        const long double result = fromRep(absResult | quotientSign);
+        return result;
+    }
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/divti3.c b/contrib/libs/cxxsupp/builtins/divti3.c
index 1bcf0d53a2..c73eae28fe 100644
--- a/contrib/libs/cxxsupp/builtins/divti3.c
+++ b/contrib/libs/cxxsupp/builtins/divti3.c
@@ -1,33 +1,33 @@
-/* ===-- divti3.c - Implement __divti3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a / b */ 
- 
-COMPILER_RT_ABI ti_int 
-__divti3(ti_int a, ti_int b) 
-{ 
-    const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; 
-    ti_int s_a = a >> bits_in_tword_m1;           /* s_a = a < 0 ? -1 : 0 */ 
-    ti_int s_b = b >> bits_in_tword_m1;           /* s_b = b < 0 ? -1 : 0 */ 
-    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */ 
-    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */ 
-    s_a ^= s_b;                                  /* sign of quotient */ 
-    return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */ 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- divti3.c - Implement __divti3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI ti_int
+__divti3(ti_int a, ti_int b)
+{
+    const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+    ti_int s_a = a >> bits_in_tword_m1;           /* s_a = a < 0 ? -1 : 0 */
+    ti_int s_b = b >> bits_in_tword_m1;           /* s_b = b < 0 ? -1 : 0 */
+    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */
+    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */
+    s_a ^= s_b;                                  /* sign of quotient */
+    return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/divxc3.c b/contrib/libs/cxxsupp/builtins/divxc3.c
index 1c0705704b..6f49280e5f 100644
--- a/contrib/libs/cxxsupp/builtins/divxc3.c
+++ b/contrib/libs/cxxsupp/builtins/divxc3.c
@@ -1,63 +1,63 @@
-/* ===-- divxc3.c - Implement __divxc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __divxc3 for the compiler_rt library. 
- * 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the quotient of (a + ib) / (c + id) */ 
- 
-COMPILER_RT_ABI Lcomplex 
-__divxc3(long double __a, long double __b, long double __c, long double __d) 
-{ 
-    int __ilogbw = 0; 
-    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); 
-    if (crt_isfinite(__logbw)) 
-    { 
-        __ilogbw = (int)__logbw; 
-        __c = crt_scalbnl(__c, -__ilogbw); 
-        __d = crt_scalbnl(__d, -__ilogbw); 
-    } 
-    long double __denom = __c * __c + __d * __d; 
-    Lcomplex z; 
-    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); 
-    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); 
-    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) 
-    { 
-        if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) 
-        { 
-            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; 
-            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; 
-        } 
-        else if ((crt_isinf(__a) || crt_isinf(__b)) && 
-                 crt_isfinite(__c) && crt_isfinite(__d)) 
-        { 
-            __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); 
-            __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); 
-            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); 
-            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); 
-        } 
-        else if (crt_isinf(__logbw) && __logbw > 0 && 
-                 crt_isfinite(__a) && crt_isfinite(__b)) 
-        { 
-            __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); 
-            __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); 
-            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); 
-            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); 
-        } 
-    } 
-    return z; 
-} 
- 
-#endif 
+/* ===-- divxc3.c - Implement __divxc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divxc3 for the compiler_rt library.
+ *
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Lcomplex
+__divxc3(long double __a, long double __b, long double __c, long double __d)
+{
+    int __ilogbw = 0;
+    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+    if (crt_isfinite(__logbw))
+    {
+        __ilogbw = (int)__logbw;
+        __c = crt_scalbnl(__c, -__ilogbw);
+        __d = crt_scalbnl(__d, -__ilogbw);
+    }
+    long double __denom = __c * __c + __d * __d;
+    Lcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+    {
+        if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+        {
+            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
+        }
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
+        {
+            __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+        }
+        else if (crt_isinf(__logbw) && __logbw > 0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
+        {
+            __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
+            COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
+        }
+    }
+    return z;
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/emutls.c b/contrib/libs/cxxsupp/builtins/emutls.c
index c2c4318fd4..09e79568bd 100644
--- a/contrib/libs/cxxsupp/builtins/emutls.c
+++ b/contrib/libs/cxxsupp/builtins/emutls.c
@@ -1,183 +1,183 @@
-/* ===---------- emutls.c - Implements __emutls_get_address ---------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
-#include <pthread.h> 
-#include <stdint.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
-#include "int_lib.h" 
-#include "int_util.h" 
- 
-/* Default is not to use posix_memalign, so systems like Android 
- * can use thread local data without heavier POSIX memory allocators. 
- */ 
-#ifndef EMUTLS_USE_POSIX_MEMALIGN 
-#define EMUTLS_USE_POSIX_MEMALIGN 0 
-#endif 
- 
-/* For every TLS variable xyz, 
- * there is one __emutls_control variable named __emutls_v.xyz. 
- * If xyz has non-zero initial value, __emutls_v.xyz's "value" 
- * will point to __emutls_t.xyz, which has the initial value. 
- */ 
-typedef struct __emutls_control { 
-    size_t size;  /* size of the object in bytes */ 
-    size_t align;  /* alignment of the object in bytes */ 
-    union { 
-        uintptr_t index;  /* data[index-1] is the object address */ 
-        void* address;  /* object address, when in single thread env */ 
-    } object; 
-    void* value;  /* null or non-zero initial value for the object */ 
-} __emutls_control; 
- 
-static __inline void *emutls_memalign_alloc(size_t align, size_t size) { 
-    void *base; 
-#if EMUTLS_USE_POSIX_MEMALIGN 
-    if (posix_memalign(&base, align, size) != 0) 
-        abort(); 
-#else 
-    #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) 
-    char* object; 
-    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) 
-        abort(); 
-    base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) 
-                    & ~(uintptr_t)(align - 1)); 
- 
-    ((void**)base)[-1] = object; 
-#endif 
-    return base; 
-} 
- 
-static __inline void emutls_memalign_free(void *base) { 
-#if EMUTLS_USE_POSIX_MEMALIGN 
-    free(base); 
-#else 
-    /* The mallocated address is in ((void**)base)[-1] */ 
-    free(((void**)base)[-1]); 
-#endif 
-} 
- 
-/* Emulated TLS objects are always allocated at run-time. */ 
-static __inline void *emutls_allocate_object(__emutls_control *control) { 
-    /* Use standard C types, check with gcc's emutls.o. */ 
-    typedef unsigned int gcc_word __attribute__((mode(word))); 
-    typedef unsigned int gcc_pointer __attribute__((mode(pointer))); 
-    COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word)); 
-    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); 
-    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); 
- 
-    size_t size = control->size; 
-    size_t align = control->align; 
-    if (align < sizeof(void*)) 
-        align = sizeof(void*); 
-    /* Make sure that align is power of 2. */ 
-    if ((align & (align - 1)) != 0) 
-        abort(); 
- 
-    void* base = emutls_memalign_alloc(align, size); 
-    if (control->value) 
-        memcpy(base, control->value, size); 
-    else 
-        memset(base, 0, size); 
-    return base; 
-} 
- 
-static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; 
- 
-static size_t emutls_num_object = 0;  /* number of allocated TLS objects */ 
- 
-typedef struct emutls_address_array { 
-    uintptr_t size;  /* number of elements in the 'data' array */ 
-    void* data[]; 
-} emutls_address_array; 
- 
-static pthread_key_t emutls_pthread_key; 
- 
-static void emutls_key_destructor(void* ptr) { 
-    emutls_address_array* array = (emutls_address_array*)ptr; 
-    uintptr_t i; 
-    for (i = 0; i < array->size; ++i) { 
-        if (array->data[i]) 
-            emutls_memalign_free(array->data[i]); 
-    } 
-    free(ptr); 
-} 
- 
-static void emutls_init(void) { 
-    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) 
-        abort(); 
-} 
- 
-/* Returns control->object.index; set index if not allocated yet. */ 
-static __inline uintptr_t emutls_get_index(__emutls_control *control) { 
-    uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); 
-    if (!index) { 
-        static pthread_once_t once = PTHREAD_ONCE_INIT; 
-        pthread_once(&once, emutls_init); 
-        pthread_mutex_lock(&emutls_mutex); 
-        index = control->object.index; 
-        if (!index) { 
-            index = ++emutls_num_object; 
-            __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); 
-        } 
-        pthread_mutex_unlock(&emutls_mutex); 
-    } 
-    return index; 
-} 
- 
-/* Updates newly allocated thread local emutls_address_array. */ 
-static __inline void emutls_check_array_set_size(emutls_address_array *array, 
-                                                 uintptr_t size) { 
-    if (array == NULL) 
-        abort(); 
-    array->size = size; 
-    pthread_setspecific(emutls_pthread_key, (void*)array); 
-} 
- 
-/* Returns the new 'data' array size, number of elements, 
- * which must be no smaller than the given index. 
- */ 
-static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { 
-   /* Need to allocate emutls_address_array with one extra slot 
-    * to store the data array size. 
-    * Round up the emutls_address_array size to multiple of 16. 
-    */ 
-    return ((index + 1 + 15) & ~((uintptr_t)15)) - 1; 
-} 
- 
-/* Returns the thread local emutls_address_array. 
- * Extends its size if necessary to hold address at index. 
- */ 
-static __inline emutls_address_array * 
-emutls_get_address_array(uintptr_t index) { 
-    emutls_address_array* array = pthread_getspecific(emutls_pthread_key); 
-    if (array == NULL) { 
-        uintptr_t new_size = emutls_new_data_array_size(index); 
-        array = calloc(new_size + 1, sizeof(void*)); 
-        emutls_check_array_set_size(array, new_size); 
-    } else if (index > array->size) { 
-        uintptr_t orig_size = array->size; 
-        uintptr_t new_size = emutls_new_data_array_size(index); 
-        array = realloc(array, (new_size + 1) * sizeof(void*)); 
-        if (array) 
-            memset(array->data + orig_size, 0, 
-                   (new_size - orig_size) * sizeof(void*)); 
-        emutls_check_array_set_size(array, new_size); 
-    } 
-    return array; 
-} 
- 
-void* __emutls_get_address(__emutls_control* control) { 
-    uintptr_t index = emutls_get_index(control); 
-    emutls_address_array* array = emutls_get_address_array(index); 
-    if (array->data[index - 1] == NULL) 
-        array->data[index - 1] = emutls_allocate_object(control); 
-    return array->data[index - 1]; 
-} 
+/* ===---------- emutls.c - Implements __emutls_get_address ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "int_lib.h"
+#include "int_util.h"
+
+/* Default is not to use posix_memalign, so systems like Android
+ * can use thread local data without heavier POSIX memory allocators.
+ */
+#ifndef EMUTLS_USE_POSIX_MEMALIGN
+#define EMUTLS_USE_POSIX_MEMALIGN 0
+#endif
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+    size_t size;  /* size of the object in bytes */
+    size_t align;  /* alignment of the object in bytes */
+    union {
+        uintptr_t index;  /* data[index-1] is the object address */
+        void* address;  /* object address, when in single thread env */
+    } object;
+    void* value;  /* null or non-zero initial value for the object */
+} __emutls_control;
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+    void *base;
+#if EMUTLS_USE_POSIX_MEMALIGN
+    if (posix_memalign(&base, align, size) != 0)
+        abort();
+#else
+    #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
+    char* object;
+    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+        abort();
+    base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
+                    & ~(uintptr_t)(align - 1));
+
+    ((void**)base)[-1] = object;
+#endif
+    return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+#if EMUTLS_USE_POSIX_MEMALIGN
+    free(base);
+#else
+    /* The mallocated address is in ((void**)base)[-1] */
+    free(((void**)base)[-1]);
+#endif
+}
+
+/* Emulated TLS objects are always allocated at run-time. */
+static __inline void *emutls_allocate_object(__emutls_control *control) {
+    /* Use standard C types, check with gcc's emutls.o. */
+    typedef unsigned int gcc_word __attribute__((mode(word)));
+    typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+    COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word));
+    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
+    COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
+
+    size_t size = control->size;
+    size_t align = control->align;
+    if (align < sizeof(void*))
+        align = sizeof(void*);
+    /* Make sure that align is power of 2. */
+    if ((align & (align - 1)) != 0)
+        abort();
+
+    void* base = emutls_memalign_alloc(align, size);
+    if (control->value)
+        memcpy(base, control->value, size);
+    else
+        memset(base, 0, size);
+    return base;
+}
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
+
+typedef struct emutls_address_array {
+    uintptr_t size;  /* number of elements in the 'data' array */
+    void* data[];
+} emutls_address_array;
+
+static pthread_key_t emutls_pthread_key;
+
+static void emutls_key_destructor(void* ptr) {
+    emutls_address_array* array = (emutls_address_array*)ptr;
+    uintptr_t i;
+    for (i = 0; i < array->size; ++i) {
+        if (array->data[i])
+            emutls_memalign_free(array->data[i]);
+    }
+    free(ptr);
+}
+
+static void emutls_init(void) {
+    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+        abort();
+}
+
+/* Returns control->object.index; set index if not allocated yet. */
+static __inline uintptr_t emutls_get_index(__emutls_control *control) {
+    uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
+    if (!index) {
+        static pthread_once_t once = PTHREAD_ONCE_INIT;
+        pthread_once(&once, emutls_init);
+        pthread_mutex_lock(&emutls_mutex);
+        index = control->object.index;
+        if (!index) {
+            index = ++emutls_num_object;
+            __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
+        }
+        pthread_mutex_unlock(&emutls_mutex);
+    }
+    return index;
+}
+
+/* Updates newly allocated thread local emutls_address_array. */
+static __inline void emutls_check_array_set_size(emutls_address_array *array,
+                                                 uintptr_t size) {
+    if (array == NULL)
+        abort();
+    array->size = size;
+    pthread_setspecific(emutls_pthread_key, (void*)array);
+}
+
+/* Returns the new 'data' array size, number of elements,
+ * which must be no smaller than the given index.
+ */
+static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
+   /* Need to allocate emutls_address_array with one extra slot
+    * to store the data array size.
+    * Round up the emutls_address_array size to multiple of 16.
+    */
+    return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
+}
+
+/* Returns the thread local emutls_address_array.
+ * Extends its size if necessary to hold address at index.
+ */
+static __inline emutls_address_array *
+emutls_get_address_array(uintptr_t index) {
+    emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+    if (array == NULL) {
+        uintptr_t new_size = emutls_new_data_array_size(index);
+        array = calloc(new_size + 1, sizeof(void*));
+        emutls_check_array_set_size(array, new_size);
+    } else if (index > array->size) {
+        uintptr_t orig_size = array->size;
+        uintptr_t new_size = emutls_new_data_array_size(index);
+        array = realloc(array, (new_size + 1) * sizeof(void*));
+        if (array)
+            memset(array->data + orig_size, 0,
+                   (new_size - orig_size) * sizeof(void*));
+        emutls_check_array_set_size(array, new_size);
+    }
+    return array;
+}
+
+void* __emutls_get_address(__emutls_control* control) {
+    uintptr_t index = emutls_get_index(control);
+    emutls_address_array* array = emutls_get_address_array(index);
+    if (array->data[index - 1] == NULL)
+        array->data[index - 1] = emutls_allocate_object(control);
+    return array->data[index - 1];
+}
diff --git a/contrib/libs/cxxsupp/builtins/enable_execute_stack.c b/contrib/libs/cxxsupp/builtins/enable_execute_stack.c
index eb25a33330..0dc3482c44 100644
--- a/contrib/libs/cxxsupp/builtins/enable_execute_stack.c
+++ b/contrib/libs/cxxsupp/builtins/enable_execute_stack.c
@@ -1,72 +1,72 @@
-/* ===-- enable_execute_stack.c - Implement __enable_execute_stack ---------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifndef _WIN32 
-#include <sys/mman.h> 
-#endif 
- 
-/* #include "config.h" 
- * FIXME: CMake - include when cmake system is ready. 
- * Remove #define HAVE_SYSCONF 1 line. 
- */ 
-#define HAVE_SYSCONF 1 
- 
-#ifdef _WIN32 
-#define WIN32_LEAN_AND_MEAN 
-#include <Windows.h> 
-#else 
-#ifndef __APPLE__ 
-#include <unistd.h> 
-#endif /* __APPLE__ */ 
-#endif /* _WIN32 */ 
- 
-#if __LP64__ 
-	#define TRAMPOLINE_SIZE 48 
-#else 
-	#define TRAMPOLINE_SIZE 40 
-#endif 
- 
-/* 
- * The compiler generates calls to __enable_execute_stack() when creating  
- * trampoline functions on the stack for use with nested functions. 
- * It is expected to mark the page(s) containing the address  
- * and the next 48 bytes as executable.  Since the stack is normally rw- 
- * that means changing the protection on those page(s) to rwx.  
- */ 
- 
-COMPILER_RT_ABI void 
-__enable_execute_stack(void* addr) 
-{ 
- 
-#if _WIN32 
-	MEMORY_BASIC_INFORMATION mbi; 
-	if (!VirtualQuery (addr, &mbi, sizeof(mbi))) 
-		return; /* We should probably assert here because there is no return value */ 
-	VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect); 
-#else 
-#if __APPLE__ 
-	/* On Darwin, pagesize is always 4096 bytes */ 
-	const uintptr_t pageSize = 4096; 
-#elif !defined(HAVE_SYSCONF) 
-#error "HAVE_SYSCONF not defined! See enable_execute_stack.c" 
-#else 
-        const uintptr_t pageSize = sysconf(_SC_PAGESIZE); 
-#endif /* __APPLE__ */ 
- 
-	const uintptr_t pageAlignMask = ~(pageSize-1); 
-	uintptr_t p = (uintptr_t)addr; 
-	unsigned char* startPage = (unsigned char*)(p & pageAlignMask); 
-	unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask); 
-	size_t length = endPage - startPage; 
-	(void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); 
-#endif 
-} 
+/* ===-- enable_execute_stack.c - Implement __enable_execute_stack ---------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif
+
+/* #include "config.h"
+ * FIXME: CMake - include when cmake system is ready.
+ * Remove #define HAVE_SYSCONF 1 line.
+ */
+#define HAVE_SYSCONF 1
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#else
+#ifndef __APPLE__
+#include <unistd.h>
+#endif /* __APPLE__ */
+#endif /* _WIN32 */
+
+#if __LP64__
+	#define TRAMPOLINE_SIZE 48
+#else
+	#define TRAMPOLINE_SIZE 40
+#endif
+
+/*
+ * The compiler generates calls to __enable_execute_stack() when creating 
+ * trampoline functions on the stack for use with nested functions.
+ * It is expected to mark the page(s) containing the address 
+ * and the next 48 bytes as executable.  Since the stack is normally rw-
+ * that means changing the protection on those page(s) to rwx. 
+ */
+
+COMPILER_RT_ABI void
+__enable_execute_stack(void* addr)
+{
+
+#if _WIN32
+	MEMORY_BASIC_INFORMATION mbi;
+	if (!VirtualQuery (addr, &mbi, sizeof(mbi)))
+		return; /* We should probably assert here because there is no return value */
+	VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect);
+#else
+#if __APPLE__
+	/* On Darwin, pagesize is always 4096 bytes */
+	const uintptr_t pageSize = 4096;
+#elif !defined(HAVE_SYSCONF)
+#error "HAVE_SYSCONF not defined! See enable_execute_stack.c"
+#else
+        const uintptr_t pageSize = sysconf(_SC_PAGESIZE);
+#endif /* __APPLE__ */
+
+	const uintptr_t pageAlignMask = ~(pageSize-1);
+	uintptr_t p = (uintptr_t)addr;
+	unsigned char* startPage = (unsigned char*)(p & pageAlignMask);
+	unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask);
+	size_t length = endPage - startPage;
+	(void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC);
+#endif
+}
diff --git a/contrib/libs/cxxsupp/builtins/eprintf.c b/contrib/libs/cxxsupp/builtins/eprintf.c
index 8ae0fdf815..89f34b1545 100644
--- a/contrib/libs/cxxsupp/builtins/eprintf.c
+++ b/contrib/libs/cxxsupp/builtins/eprintf.c
@@ -1,35 +1,35 @@
-/* ===---------- eprintf.c - Implements __eprintf --------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
- 
- 
-#include "int_lib.h" 
-#include <stdio.h> 
- 
- 
-/* 
- * __eprintf() was used in an old version of <assert.h>. 
- * It can eventually go away, but it is needed when linking 
- * .o files built with the old <assert.h>. 
- * 
- * It should never be exported from a dylib, so it is marked 
- * visibility hidden. 
- */ 
-#ifndef _WIN32 
-__attribute__((visibility("hidden"))) 
-#endif 
-COMPILER_RT_ABI void 
-__eprintf(const char* format, const char* assertion_expression, 
-	  const char* line, const char* file) 
-{ 
-	fprintf(stderr, format, assertion_expression, line, file); 
-	fflush(stderr); 
-	compilerrt_abort(); 
-} 
+/* ===---------- eprintf.c - Implements __eprintf --------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+
+
+#include "int_lib.h"
+#include <stdio.h>
+
+
+/*
+ * __eprintf() was used in an old version of <assert.h>.
+ * It can eventually go away, but it is needed when linking
+ * .o files built with the old <assert.h>.
+ *
+ * It should never be exported from a dylib, so it is marked
+ * visibility hidden.
+ */
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+COMPILER_RT_ABI void
+__eprintf(const char* format, const char* assertion_expression,
+	  const char* line, const char* file)
+{
+	fprintf(stderr, format, assertion_expression, line, file);
+	fflush(stderr);
+	compilerrt_abort();
+}
diff --git a/contrib/libs/cxxsupp/builtins/extenddftf2.c b/contrib/libs/cxxsupp/builtins/extenddftf2.c
index c55e2e8a25..86dab8f03a 100644
--- a/contrib/libs/cxxsupp/builtins/extenddftf2.c
+++ b/contrib/libs/cxxsupp/builtins/extenddftf2.c
@@ -1,23 +1,23 @@
-//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-#define SRC_DOUBLE 
-#define DST_QUAD 
-#include "fp_extend_impl.inc" 
- 
-COMPILER_RT_ABI long double __extenddftf2(double a) { 
-    return __extendXfYf2__(a); 
-} 
- 
-#endif 
+//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_DOUBLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extenddftf2(double a) {
+    return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/extendhfsf2.c b/contrib/libs/cxxsupp/builtins/extendhfsf2.c
index aee4fd8787..27115a48c1 100644
--- a/contrib/libs/cxxsupp/builtins/extendhfsf2.c
+++ b/contrib/libs/cxxsupp/builtins/extendhfsf2.c
@@ -1,25 +1,25 @@
-//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
- 
-#define SRC_HALF 
-#define DST_SINGLE 
-#include "fp_extend_impl.inc" 
- 
-ARM_EABI_FNALIAS(h2f, extendhfsf2) 
- 
-// Use a forwarding definition and noinline to implement a poor man's alias, 
-// as there isn't a good cross-platform way of defining one. 
-COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { 
-    return __extendXfYf2__(a); 
-} 
- 
-COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { 
-    return __extendhfsf2(a); 
-} 
+//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define SRC_HALF
+#define DST_SINGLE
+#include "fp_extend_impl.inc"
+
+ARM_EABI_FNALIAS(h2f, extendhfsf2)
+
+// Use a forwarding definition and noinline to implement a poor man's alias,
+// as there isn't a good cross-platform way of defining one.
+COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
+    return __extendXfYf2__(a);
+}
+
+COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
+    return __extendhfsf2(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/extendsfdf2.c b/contrib/libs/cxxsupp/builtins/extendsfdf2.c
index 595264f3a2..7a267c2f47 100644
--- a/contrib/libs/cxxsupp/builtins/extendsfdf2.c
+++ b/contrib/libs/cxxsupp/builtins/extendsfdf2.c
@@ -1,19 +1,19 @@
-//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
- 
-#define SRC_SINGLE 
-#define DST_DOUBLE 
-#include "fp_extend_impl.inc" 
- 
-ARM_EABI_FNALIAS(f2d, extendsfdf2) 
- 
-COMPILER_RT_ABI double __extendsfdf2(float a) { 
-    return __extendXfYf2__(a); 
-} 
+//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define SRC_SINGLE
+#define DST_DOUBLE
+#include "fp_extend_impl.inc"
+
+ARM_EABI_FNALIAS(f2d, extendsfdf2)
+
+COMPILER_RT_ABI double __extendsfdf2(float a) {
+    return __extendXfYf2__(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/extendsftf2.c b/contrib/libs/cxxsupp/builtins/extendsftf2.c
index 90120a2ba0..2eeeba2848 100644
--- a/contrib/libs/cxxsupp/builtins/extendsftf2.c
+++ b/contrib/libs/cxxsupp/builtins/extendsftf2.c
@@ -1,23 +1,23 @@
-//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-#define SRC_SINGLE 
-#define DST_QUAD 
-#include "fp_extend_impl.inc" 
- 
-COMPILER_RT_ABI long double __extendsftf2(float a) { 
-    return __extendXfYf2__(a); 
-} 
- 
-#endif 
+//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_SINGLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extendsftf2(float a) {
+    return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/ffsdi2.c b/contrib/libs/cxxsupp/builtins/ffsdi2.c
index a1473d4a3c..a5ac9900ff 100644
--- a/contrib/libs/cxxsupp/builtins/ffsdi2.c
+++ b/contrib/libs/cxxsupp/builtins/ffsdi2.c
@@ -1,33 +1,33 @@
-/* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ffsdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: the index of the least significant 1-bit in a, or 
- * the value zero if a is zero. The least significant bit is index one. 
- */ 
- 
-COMPILER_RT_ABI si_int 
-__ffsdi2(di_int a) 
-{ 
-    dwords x; 
-    x.all = a; 
-    if (x.s.low == 0) 
-    { 
-        if (x.s.high == 0) 
-            return 0; 
-        return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); 
-    } 
-    return __builtin_ctz(x.s.low) + 1; 
-} 
+/* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffsdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffsdi2(di_int a)
+{
+    dwords x;
+    x.all = a;
+    if (x.s.low == 0)
+    {
+        if (x.s.high == 0)
+            return 0;
+        return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
+    }
+    return __builtin_ctz(x.s.low) + 1;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ffsti2.c b/contrib/libs/cxxsupp/builtins/ffsti2.c
index 6689881101..dcdb3bd7f8 100644
--- a/contrib/libs/cxxsupp/builtins/ffsti2.c
+++ b/contrib/libs/cxxsupp/builtins/ffsti2.c
@@ -1,37 +1,37 @@
-/* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ffsti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: the index of the least significant 1-bit in a, or 
- * the value zero if a is zero. The least significant bit is index one. 
- */ 
- 
-COMPILER_RT_ABI si_int 
-__ffsti2(ti_int a) 
-{ 
-    twords x; 
-    x.all = a; 
-    if (x.s.low == 0) 
-    { 
-        if (x.s.high == 0) 
-            return 0; 
-        return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); 
-    } 
-    return __builtin_ctzll(x.s.low) + 1; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffsti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffsti2(ti_int a)
+{
+    twords x;
+    x.all = a;
+    if (x.s.low == 0)
+    {
+        if (x.s.high == 0)
+            return 0;
+        return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT);
+    }
+    return __builtin_ctzll(x.s.low) + 1;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/fixdfdi.c b/contrib/libs/cxxsupp/builtins/fixdfdi.c
index e37029b34e..14283ef42e 100644
--- a/contrib/libs/cxxsupp/builtins/fixdfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixdfdi.c
@@ -1,46 +1,46 @@
-/* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
-ARM_EABI_FNALIAS(d2lz, fixdfdi) 
- 
-#ifndef __SOFT_FP__ 
-/* Support for systems that have hardware floating-point; can set the invalid 
- * flag as a side-effect of computation. 
- */ 
- 
-COMPILER_RT_ABI du_int __fixunsdfdi(double a); 
- 
-COMPILER_RT_ABI di_int 
-__fixdfdi(double a) 
-{ 
-    if (a < 0.0) { 
-        return -__fixunsdfdi(-a); 
-    } 
-    return __fixunsdfdi(a); 
-} 
- 
-#else 
-/* Support for systems that don't have hardware floating-point; there are no 
- * flags to set, and we don't want to code-gen to an unknown soft-float 
- * implementation. 
- */ 
- 
-typedef di_int fixint_t; 
-typedef du_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI di_int 
-__fixdfdi(fp_t a) { 
-    return __fixint(a); 
-} 
- 
-#endif 
+/* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+ARM_EABI_FNALIAS(d2lz, fixdfdi)
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+
+COMPILER_RT_ABI di_int
+__fixdfdi(double a)
+{
+    if (a < 0.0) {
+        return -__fixunsdfdi(-a);
+    }
+    return __fixunsdfdi(a);
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int
+__fixdfdi(fp_t a) {
+    return __fixint(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixdfsi.c b/contrib/libs/cxxsupp/builtins/fixdfsi.c
index 952c0b07b9..704e65bc43 100644
--- a/contrib/libs/cxxsupp/builtins/fixdfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixdfsi.c
@@ -1,22 +1,22 @@
-/* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
-typedef si_int fixint_t; 
-typedef su_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-ARM_EABI_FNALIAS(d2iz, fixdfsi) 
- 
-COMPILER_RT_ABI si_int 
-__fixdfsi(fp_t a) { 
-    return __fixint(a); 
-} 
+/* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+ARM_EABI_FNALIAS(d2iz, fixdfsi)
+
+COMPILER_RT_ABI si_int
+__fixdfsi(fp_t a) {
+    return __fixint(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fixdfti.c b/contrib/libs/cxxsupp/builtins/fixdfti.c
index ce207e4cce..aaf225e74f 100644
--- a/contrib/libs/cxxsupp/builtins/fixdfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixdfti.c
@@ -1,26 +1,26 @@
-/* ===-- fixdfti.c - Implement __fixdfti -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-typedef ti_int fixint_t; 
-typedef tu_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI ti_int 
-__fixdfti(fp_t a) { 
-    return __fixint(a); 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- fixdfti.c - Implement __fixdfti -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int
+__fixdfti(fp_t a) {
+    return __fixint(a);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/fixsfdi.c b/contrib/libs/cxxsupp/builtins/fixsfdi.c
index ca1ffcdfe8..fab47e272a 100644
--- a/contrib/libs/cxxsupp/builtins/fixsfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixsfdi.c
@@ -1,47 +1,47 @@
-/* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(f2lz, fixsfdi) 
- 
-#ifndef __SOFT_FP__ 
-/* Support for systems that have hardware floating-point; can set the invalid 
- * flag as a side-effect of computation. 
- */ 
- 
-COMPILER_RT_ABI du_int __fixunssfdi(float a); 
- 
-COMPILER_RT_ABI di_int 
-__fixsfdi(float a) 
-{ 
-    if (a < 0.0f) { 
-        return -__fixunssfdi(-a); 
-    } 
-    return __fixunssfdi(a); 
-} 
- 
-#else 
-/* Support for systems that don't have hardware floating-point; there are no 
- * flags to set, and we don't want to code-gen to an unknown soft-float 
- * implementation. 
- */ 
- 
-typedef di_int fixint_t; 
-typedef du_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI di_int 
-__fixsfdi(fp_t a) { 
-    return __fixint(a); 
-} 
- 
-#endif 
+/* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(f2lz, fixsfdi)
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+
+COMPILER_RT_ABI di_int
+__fixsfdi(float a)
+{
+    if (a < 0.0f) {
+        return -__fixunssfdi(-a);
+    }
+    return __fixunssfdi(a);
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int
+__fixsfdi(fp_t a) {
+    return __fixint(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixsfsi.c b/contrib/libs/cxxsupp/builtins/fixsfsi.c
index 2907970424..f045536d68 100644
--- a/contrib/libs/cxxsupp/builtins/fixsfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixsfsi.c
@@ -1,22 +1,22 @@
-/* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
-typedef si_int fixint_t; 
-typedef su_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-ARM_EABI_FNALIAS(f2iz, fixsfsi) 
- 
-COMPILER_RT_ABI si_int 
-__fixsfsi(fp_t a) { 
-    return __fixint(a); 
-} 
+/* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+ARM_EABI_FNALIAS(f2iz, fixsfsi)
+
+COMPILER_RT_ABI si_int
+__fixsfsi(fp_t a) {
+    return __fixint(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fixsfti.c b/contrib/libs/cxxsupp/builtins/fixsfti.c
index 060f3faf3d..3a159b3e18 100644
--- a/contrib/libs/cxxsupp/builtins/fixsfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixsfti.c
@@ -1,26 +1,26 @@
-/* ===-- fixsfti.c - Implement __fixsfti -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-typedef ti_int fixint_t; 
-typedef tu_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI ti_int 
-__fixsfti(fp_t a) { 
-    return __fixint(a); 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- fixsfti.c - Implement __fixsfti -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int
+__fixsfti(fp_t a) {
+    return __fixint(a);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/fixtfdi.c b/contrib/libs/cxxsupp/builtins/fixtfdi.c
index a036f06292..bc9dea1f4f 100644
--- a/contrib/libs/cxxsupp/builtins/fixtfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixtfdi.c
@@ -1,23 +1,23 @@
-/* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-typedef di_int fixint_t; 
-typedef du_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI di_int 
-__fixtfdi(fp_t a) { 
-    return __fixint(a); 
-} 
-#endif 
+/* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int
+__fixtfdi(fp_t a) {
+    return __fixint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixtfsi.c b/contrib/libs/cxxsupp/builtins/fixtfsi.c
index 326454dc52..feb3de8850 100644
--- a/contrib/libs/cxxsupp/builtins/fixtfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixtfsi.c
@@ -1,23 +1,23 @@
-/* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-typedef si_int fixint_t; 
-typedef su_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI si_int 
-__fixtfsi(fp_t a) { 
-    return __fixint(a); 
-} 
-#endif 
+/* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int
+__fixtfsi(fp_t a) {
+    return __fixint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixtfti.c b/contrib/libs/cxxsupp/builtins/fixtfti.c
index 2776c29fa2..ee4ada85cb 100644
--- a/contrib/libs/cxxsupp/builtins/fixtfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixtfti.c
@@ -1,23 +1,23 @@
-/* ===-- fixtfti.c - Implement __fixtfti -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-typedef ti_int fixint_t; 
-typedef tu_int fixuint_t; 
-#include "fp_fixint_impl.inc" 
- 
-COMPILER_RT_ABI ti_int 
-__fixtfti(fp_t a) { 
-    return __fixint(a); 
-} 
-#endif 
+/* ===-- fixtfti.c - Implement __fixtfti -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int
+__fixtfti(fp_t a) {
+    return __fixint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunsdfdi.c b/contrib/libs/cxxsupp/builtins/fixunsdfdi.c
index d708e6c380..4b0bc9e1d0 100644
--- a/contrib/libs/cxxsupp/builtins/fixunsdfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunsdfdi.c
@@ -1,44 +1,44 @@
-/* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(d2ulz, fixunsdfdi) 
- 
-#ifndef __SOFT_FP__ 
-/* Support for systems that have hardware floating-point; can set the invalid 
- * flag as a side-effect of computation. 
- */ 
- 
-COMPILER_RT_ABI du_int 
-__fixunsdfdi(double a) 
-{ 
-    if (a <= 0.0) return 0; 
-    su_int high = a / 4294967296.f;               /* a / 0x1p32f; */ 
-    su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ 
-    return ((du_int)high << 32) | low; 
-} 
- 
-#else 
-/* Support for systems that don't have hardware floating-point; there are no 
- * flags to set, and we don't want to code-gen to an unknown soft-float 
- * implementation. 
- */ 
- 
-typedef du_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI du_int 
-__fixunsdfdi(fp_t a) { 
-    return __fixuint(a); 
-} 
- 
-#endif 
+/* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(d2ulz, fixunsdfdi)
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int
+__fixunsdfdi(double a)
+{
+    if (a <= 0.0) return 0;
+    su_int high = a / 4294967296.f;               /* a / 0x1p32f; */
+    su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */
+    return ((du_int)high << 32) | low;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int
+__fixunsdfdi(fp_t a) {
+    return __fixuint(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunsdfsi.c b/contrib/libs/cxxsupp/builtins/fixunsdfsi.c
index 1e1216188a..232d342d77 100644
--- a/contrib/libs/cxxsupp/builtins/fixunsdfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunsdfsi.c
@@ -1,21 +1,21 @@
-/* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
-typedef su_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-ARM_EABI_FNALIAS(d2uiz, fixunsdfsi) 
- 
-COMPILER_RT_ABI su_int 
-__fixunsdfsi(fp_t a) { 
-    return __fixuint(a); 
-} 
+/* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+ARM_EABI_FNALIAS(d2uiz, fixunsdfsi)
+
+COMPILER_RT_ABI su_int
+__fixunsdfsi(fp_t a) {
+    return __fixuint(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fixunsdfti.c b/contrib/libs/cxxsupp/builtins/fixunsdfti.c
index 699dd464ea..f8046a0263 100644
--- a/contrib/libs/cxxsupp/builtins/fixunsdfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixunsdfti.c
@@ -1,23 +1,23 @@
-/* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
-typedef tu_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI tu_int 
-__fixunsdfti(fp_t a) { 
-    return __fixuint(a); 
-} 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int
+__fixunsdfti(fp_t a) {
+    return __fixuint(a);
+}
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/fixunssfdi.c b/contrib/libs/cxxsupp/builtins/fixunssfdi.c
index ab0513bc07..f8ebab854f 100644
--- a/contrib/libs/cxxsupp/builtins/fixunssfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunssfdi.c
@@ -1,45 +1,45 @@
-/* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(f2ulz, fixunssfdi) 
- 
-#ifndef __SOFT_FP__ 
-/* Support for systems that have hardware floating-point; can set the invalid 
- * flag as a side-effect of computation. 
- */ 
- 
-COMPILER_RT_ABI du_int 
-__fixunssfdi(float a) 
-{ 
-    if (a <= 0.0f) return 0; 
-    double da = a; 
-    su_int high = da / 4294967296.f;               /* da / 0x1p32f; */ 
-    su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ 
-    return ((du_int)high << 32) | low; 
-} 
- 
-#else 
-/* Support for systems that don't have hardware floating-point; there are no 
- * flags to set, and we don't want to code-gen to an unknown soft-float 
- * implementation. 
- */ 
- 
-typedef du_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI du_int 
-__fixunssfdi(fp_t a) { 
-    return __fixuint(a); 
-} 
- 
-#endif 
+/* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(f2ulz, fixunssfdi)
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int
+__fixunssfdi(float a)
+{
+    if (a <= 0.0f) return 0;
+    double da = a;
+    su_int high = da / 4294967296.f;               /* da / 0x1p32f; */
+    su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */
+    return ((du_int)high << 32) | low;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int
+__fixunssfdi(fp_t a) {
+    return __fixuint(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunssfsi.c b/contrib/libs/cxxsupp/builtins/fixunssfsi.c
index a7be73f802..cc2b05bd84 100644
--- a/contrib/libs/cxxsupp/builtins/fixunssfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunssfsi.c
@@ -1,25 +1,25 @@
-/* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixunssfsi for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
-typedef su_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-ARM_EABI_FNALIAS(f2uiz, fixunssfsi) 
- 
-COMPILER_RT_ABI su_int 
-__fixunssfsi(fp_t a) { 
-    return __fixuint(a); 
-} 
+/* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunssfsi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+ARM_EABI_FNALIAS(f2uiz, fixunssfsi)
+
+COMPILER_RT_ABI su_int
+__fixunssfsi(fp_t a) {
+    return __fixuint(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fixunssfti.c b/contrib/libs/cxxsupp/builtins/fixunssfti.c
index e18617293e..862d7bd6c7 100644
--- a/contrib/libs/cxxsupp/builtins/fixunssfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixunssfti.c
@@ -1,26 +1,26 @@
-/* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixunssfti for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) 
-typedef tu_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI tu_int 
-__fixunssfti(fp_t a) { 
-    return __fixuint(a); 
-} 
-#endif 
+/* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunssfti for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT)
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int
+__fixunssfti(fp_t a) {
+    return __fixuint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunstfdi.c b/contrib/libs/cxxsupp/builtins/fixunstfdi.c
index 08d158f52e..b2995f6583 100644
--- a/contrib/libs/cxxsupp/builtins/fixunstfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunstfdi.c
@@ -1,22 +1,22 @@
-/* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-typedef du_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI du_int 
-__fixunstfdi(fp_t a) { 
-    return __fixuint(a); 
-} 
-#endif 
+/* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int
+__fixunstfdi(fp_t a) {
+    return __fixuint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunstfsi.c b/contrib/libs/cxxsupp/builtins/fixunstfsi.c
index 4468bcdf6c..b5d3f6a7d3 100644
--- a/contrib/libs/cxxsupp/builtins/fixunstfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunstfsi.c
@@ -1,22 +1,22 @@
-/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-typedef su_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI su_int 
-__fixunstfsi(fp_t a) { 
-    return __fixuint(a); 
-} 
-#endif 
+/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int
+__fixunstfsi(fp_t a) {
+    return __fixuint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunstfti.c b/contrib/libs/cxxsupp/builtins/fixunstfti.c
index 5828998cbf..22ff9dfc03 100644
--- a/contrib/libs/cxxsupp/builtins/fixunstfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixunstfti.c
@@ -1,22 +1,22 @@
-/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-typedef tu_int fixuint_t; 
-#include "fp_fixuint_impl.inc" 
- 
-COMPILER_RT_ABI tu_int 
-__fixunstfti(fp_t a) { 
-    return __fixuint(a); 
-} 
-#endif 
+/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int
+__fixunstfti(fp_t a) {
+    return __fixuint(a);
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunsxfdi.c b/contrib/libs/cxxsupp/builtins/fixunsxfdi.c
index 485f5b293a..075304e78d 100644
--- a/contrib/libs/cxxsupp/builtins/fixunsxfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunsxfdi.c
@@ -1,46 +1,46 @@
-/* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixunsxfdi for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
- 
-/* Returns: convert a to a unsigned long long, rounding toward zero. 
- *          Negative values all become zero. 
- */ 
- 
-/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes 
- *             du_int is a 64 bit integral type 
- *             value in long double is representable in du_int or is negative  
- *                 (no range checking performed) 
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI du_int 
-__fixunsxfdi(long double a) 
-{ 
-    long_double_bits fb; 
-    fb.f = a; 
-    int e = (fb.u.high.s.low & 0x00007FFF) - 16383; 
-    if (e < 0 || (fb.u.high.s.low & 0x00008000)) 
-        return 0; 
-    if ((unsigned)e > sizeof(du_int) * CHAR_BIT) 
-        return ~(du_int)0; 
-    return fb.u.low.all >> (63 - e); 
-} 
- 
-#endif 
+/* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunsxfdi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a unsigned long long, rounding toward zero.
+ *          Negative values all become zero.
+ */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ *             du_int is a 64 bit integral type
+ *             value in long double is representable in du_int or is negative 
+ *                 (no range checking performed)
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI du_int
+__fixunsxfdi(long double a)
+{
+    long_double_bits fb;
+    fb.f = a;
+    int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+    if (e < 0 || (fb.u.high.s.low & 0x00008000))
+        return 0;
+    if ((unsigned)e > sizeof(du_int) * CHAR_BIT)
+        return ~(du_int)0;
+    return fb.u.low.all >> (63 - e);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fixunsxfsi.c b/contrib/libs/cxxsupp/builtins/fixunsxfsi.c
index 0e0faf5375..c3c70f743d 100644
--- a/contrib/libs/cxxsupp/builtins/fixunsxfsi.c
+++ b/contrib/libs/cxxsupp/builtins/fixunsxfsi.c
@@ -1,45 +1,45 @@
-/* ===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixunsxfsi for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
- 
-/* Returns: convert a to a unsigned int, rounding toward zero. 
- *          Negative values all become zero. 
- */ 
- 
-/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes 
- *             su_int is a 32 bit integral type 
- *             value in long double is representable in su_int or is negative  
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI su_int 
-__fixunsxfsi(long double a) 
-{ 
-    long_double_bits fb; 
-    fb.f = a; 
-    int e = (fb.u.high.s.low & 0x00007FFF) - 16383; 
-    if (e < 0 || (fb.u.high.s.low & 0x00008000)) 
-        return 0; 
-    if ((unsigned)e > sizeof(su_int) * CHAR_BIT) 
-        return ~(su_int)0; 
-    return fb.u.low.s.high >> (31 - e); 
-} 
- 
-#endif /* !_ARCH_PPC */ 
+/* ===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunsxfsi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a unsigned int, rounding toward zero.
+ *          Negative values all become zero.
+ */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ *             su_int is a 32 bit integral type
+ *             value in long double is representable in su_int or is negative 
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI su_int
+__fixunsxfsi(long double a)
+{
+    long_double_bits fb;
+    fb.f = a;
+    int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+    if (e < 0 || (fb.u.high.s.low & 0x00008000))
+        return 0;
+    if ((unsigned)e > sizeof(su_int) * CHAR_BIT)
+        return ~(su_int)0;
+    return fb.u.low.s.high >> (31 - e);
+}
+
+#endif /* !_ARCH_PPC */
diff --git a/contrib/libs/cxxsupp/builtins/fixunsxfti.c b/contrib/libs/cxxsupp/builtins/fixunsxfti.c
index 9ed2988424..fb39d00ff5 100644
--- a/contrib/libs/cxxsupp/builtins/fixunsxfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixunsxfti.c
@@ -1,50 +1,50 @@
-/* ===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixunsxfti for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a unsigned long long, rounding toward zero. 
- *          Negative values all become zero. 
- */ 
- 
-/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes 
- *             tu_int is a 128 bit integral type 
- *             value in long double is representable in tu_int or is negative  
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI tu_int 
-__fixunsxfti(long double a) 
-{ 
-    long_double_bits fb; 
-    fb.f = a; 
-    int e = (fb.u.high.s.low & 0x00007FFF) - 16383; 
-    if (e < 0 || (fb.u.high.s.low & 0x00008000)) 
-        return 0; 
-    if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) 
-        return ~(tu_int)0; 
-    tu_int r = fb.u.low.all; 
-    if (e > 63) 
-        r <<= (e - 63); 
-    else 
-        r >>= (63 - e); 
-    return r; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunsxfti for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a unsigned long long, rounding toward zero.
+ *          Negative values all become zero.
+ */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ *             tu_int is a 128 bit integral type
+ *             value in long double is representable in tu_int or is negative 
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI tu_int
+__fixunsxfti(long double a)
+{
+    long_double_bits fb;
+    fb.f = a;
+    int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+    if (e < 0 || (fb.u.high.s.low & 0x00008000))
+        return 0;
+    if ((unsigned)e > sizeof(tu_int) * CHAR_BIT)
+        return ~(tu_int)0;
+    tu_int r = fb.u.low.all;
+    if (e > 63)
+        r <<= (e - 63);
+    else
+        r >>= (63 - e);
+    return r;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/fixxfdi.c b/contrib/libs/cxxsupp/builtins/fixxfdi.c
index 65ba7ce89d..011787f9e4 100644
--- a/contrib/libs/cxxsupp/builtins/fixxfdi.c
+++ b/contrib/libs/cxxsupp/builtins/fixxfdi.c
@@ -1,48 +1,48 @@
-/* ===-- fixxfdi.c - Implement __fixxfdi -----------------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixxfdi for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
- 
-/* Returns: convert a to a signed long long, rounding toward zero. */ 
- 
-/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes 
- *             di_int is a 64 bit integral type 
- *             value in long double is representable in di_int (no range checking performed) 
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI di_int 
-__fixxfdi(long double a) 
-{ 
-    const di_int di_max = (di_int)((~(du_int)0) / 2); 
-    const di_int di_min = -di_max - 1; 
-    long_double_bits fb; 
-    fb.f = a; 
-    int e = (fb.u.high.s.low & 0x00007FFF) - 16383; 
-    if (e < 0) 
-        return 0; 
-    if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) 
-        return a > 0 ? di_max : di_min; 
-    di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); 
-    di_int r = fb.u.low.all; 
-    r = (du_int)r >> (63 - e); 
-    return (r ^ s) - s; 
-} 
- 
-#endif /* !_ARCH_PPC */ 
+/* ===-- fixxfdi.c - Implement __fixxfdi -----------------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixxfdi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a signed long long, rounding toward zero. */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ *             di_int is a 64 bit integral type
+ *             value in long double is representable in di_int (no range checking performed)
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI di_int
+__fixxfdi(long double a)
+{
+    const di_int di_max = (di_int)((~(du_int)0) / 2);
+    const di_int di_min = -di_max - 1;
+    long_double_bits fb;
+    fb.f = a;
+    int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+    if (e < 0)
+        return 0;
+    if ((unsigned)e >= sizeof(di_int) * CHAR_BIT)
+        return a > 0 ? di_max : di_min;
+    di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15);
+    di_int r = fb.u.low.all;
+    r = (du_int)r >> (63 - e);
+    return (r ^ s) - s;
+}
+
+#endif /* !_ARCH_PPC */
diff --git a/contrib/libs/cxxsupp/builtins/fixxfti.c b/contrib/libs/cxxsupp/builtins/fixxfti.c
index 4dc36dd0b7..968a4f0d5e 100644
--- a/contrib/libs/cxxsupp/builtins/fixxfti.c
+++ b/contrib/libs/cxxsupp/builtins/fixxfti.c
@@ -1,51 +1,51 @@
-/* ===-- fixxfti.c - Implement __fixxfti -----------------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __fixxfti for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a signed long long, rounding toward zero. */ 
- 
-/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes 
- *             ti_int is a 128 bit integral type 
- *             value in long double is representable in ti_int 
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI ti_int 
-__fixxfti(long double a) 
-{ 
-    const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); 
-    const ti_int ti_min = -ti_max - 1; 
-    long_double_bits fb; 
-    fb.f = a; 
-    int e = (fb.u.high.s.low & 0x00007FFF) - 16383; 
-    if (e < 0) 
-        return 0; 
-    ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); 
-    ti_int r = fb.u.low.all; 
-    if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) 
-        return a > 0 ? ti_max : ti_min; 
-    if (e > 63) 
-        r <<= (e - 63); 
-    else 
-        r >>= (63 - e); 
-    return (r ^ s) - s; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- fixxfti.c - Implement __fixxfti -----------------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixxfti for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a signed long long, rounding toward zero. */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ *             ti_int is a 128 bit integral type
+ *             value in long double is representable in ti_int
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI ti_int
+__fixxfti(long double a)
+{
+    const ti_int ti_max = (ti_int)((~(tu_int)0) / 2);
+    const ti_int ti_min = -ti_max - 1;
+    long_double_bits fb;
+    fb.f = a;
+    int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+    if (e < 0)
+        return 0;
+    ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15);
+    ti_int r = fb.u.low.all;
+    if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT)
+        return a > 0 ? ti_max : ti_min;
+    if (e > 63)
+        r <<= (e - 63);
+    else
+        r >>= (63 - e);
+    return (r ^ s) - s;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/floatdidf.c b/contrib/libs/cxxsupp/builtins/floatdidf.c
index 00aa9138ad..a300c9f312 100644
--- a/contrib/libs/cxxsupp/builtins/floatdidf.c
+++ b/contrib/libs/cxxsupp/builtins/floatdidf.c
@@ -1,107 +1,107 @@
-/*===-- floatdidf.c - Implement __floatdidf -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatdidf for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: convert a to a double, rounding toward even. */ 
- 
-/* Assumption: double is a IEEE 64 bit floating point type  
- *             di_int is a 64 bit integral type 
- */ 
- 
-/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-ARM_EABI_FNALIAS(l2d, floatdidf) 
- 
-#ifndef __SOFT_FP__ 
-/* Support for systems that have hardware floating-point; we'll set the inexact flag 
- * as a side-effect of this computation. 
- */ 
- 
-COMPILER_RT_ABI double 
-__floatdidf(di_int a) 
-{ 
-	static const double twop52 = 4503599627370496.0; // 0x1.0p52 
-	static const double twop32 = 4294967296.0; // 0x1.0p32 
-	 
-	union { int64_t x; double d; } low = { .d = twop52 }; 
-	 
-	const double high = (int32_t)(a >> 32) * twop32; 
-	low.x |= a & INT64_C(0x00000000ffffffff); 
-	 
-	const double result = (high - twop52) + low.d; 
-	return result; 
-} 
- 
-#else 
-/* Support for systems that don't have hardware floating-point; there are no flags to 
- * set, and we don't want to code-gen to an unknown soft-float implementation. 
- */ 
- 
-COMPILER_RT_ABI double 
-__floatdidf(di_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(di_int) * CHAR_BIT; 
-    const di_int s = a >> (N-1); 
-    a = (a ^ s) - s; 
-    int sd = N - __builtin_clzll(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > DBL_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit DBL_MANT_DIG-1 bits to the right of 1 
-         * Q = bit DBL_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-        */ 
-        switch (sd) 
-        { 
-        case DBL_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case DBL_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) | 
-                ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ 
-        if (a & ((du_int)1 << DBL_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (DBL_MANT_DIG - sd); 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    double_bits fb; 
-    fb.u.high = ((su_int)s & 0x80000000) |        /* sign */ 
-                ((e + 1023) << 20)      |        /* exponent */ 
-                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ 
-    fb.u.low = (su_int)a;                         /* mantissa-low */ 
-    return fb.f; 
-} 
-#endif 
+/*===-- floatdidf.c - Implement __floatdidf -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This file implements __floatdidf for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: convert a to a double, rounding toward even. */
+
+/* Assumption: double is a IEEE 64 bit floating point type 
+ *             di_int is a 64 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+ARM_EABI_FNALIAS(l2d, floatdidf)
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; we'll set the inexact flag
+ * as a side-effect of this computation.
+ */
+
+COMPILER_RT_ABI double
+__floatdidf(di_int a)
+{
+	static const double twop52 = 4503599627370496.0; // 0x1.0p52
+	static const double twop32 = 4294967296.0; // 0x1.0p32
+	
+	union { int64_t x; double d; } low = { .d = twop52 };
+	
+	const double high = (int32_t)(a >> 32) * twop32;
+	low.x |= a & INT64_C(0x00000000ffffffff);
+	
+	const double result = (high - twop52) + low.d;
+	return result;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no flags to
+ * set, and we don't want to code-gen to an unknown soft-float implementation.
+ */
+
+COMPILER_RT_ABI double
+__floatdidf(di_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(di_int) * CHAR_BIT;
+    const di_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __builtin_clzll(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > DBL_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit DBL_MANT_DIG-1 bits to the right of 1
+         * Q = bit DBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+        */
+        switch (sd)
+        {
+        case DBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case DBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) |
+                ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+        if (a & ((du_int)1 << DBL_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (DBL_MANT_DIG - sd);
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    double_bits fb;
+    fb.u.high = ((su_int)s & 0x80000000) |        /* sign */
+                ((e + 1023) << 20)      |        /* exponent */
+                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+    fb.u.low = (su_int)a;                         /* mantissa-low */
+    return fb.f;
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/floatdisf.c b/contrib/libs/cxxsupp/builtins/floatdisf.c
index 31fcb8ff17..3e47580ef5 100644
--- a/contrib/libs/cxxsupp/builtins/floatdisf.c
+++ b/contrib/libs/cxxsupp/builtins/floatdisf.c
@@ -1,80 +1,80 @@
-/*===-- floatdisf.c - Implement __floatdisf -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatdisf for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
+/*===-- floatdisf.c - Implement __floatdisf -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This file implements __floatdisf for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+/* Returns: convert a to a float, rounding toward even.*/
+
+/* Assumption: float is a IEEE 32 bit floating point type 
+ *             di_int is a 64 bit integral type
  */ 
- 
-/* Returns: convert a to a float, rounding toward even.*/ 
- 
-/* Assumption: float is a IEEE 32 bit floating point type  
- *             di_int is a 64 bit integral type 
- */  
- 
-/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(l2f, floatdisf) 
- 
-COMPILER_RT_ABI float 
-__floatdisf(di_int a) 
-{ 
-    if (a == 0) 
-        return 0.0F; 
-    const unsigned N = sizeof(di_int) * CHAR_BIT; 
-    const di_int s = a >> (N-1); 
-    a = (a ^ s) - s; 
-    int sd = N - __builtin_clzll(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > FLT_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx  
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR  
-         *                                                12345678901234567890123456  
-         *  1 = msb 1 bit  
-         *  P = bit FLT_MANT_DIG-1 bits to the right of 1  
-         *  Q = bit FLT_MANT_DIG bits to the right of 1    
-         *  R = "or" of all bits to the right of Q  
-         */ 
-        switch (sd) 
-        { 
-        case FLT_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case FLT_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) | 
-                ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ 
-        if (a & ((du_int)1 << FLT_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (FLT_MANT_DIG - sd); 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    float_bits fb; 
-    fb.u = ((su_int)s & 0x80000000) |  /* sign */ 
-           ((e + 127) << 23)       |  /* exponent */ 
-           ((su_int)a & 0x007FFFFF);   /* mantissa */ 
-    return fb.f; 
-} 
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(l2f, floatdisf)
+
+COMPILER_RT_ABI float
+__floatdisf(di_int a)
+{
+    if (a == 0)
+        return 0.0F;
+    const unsigned N = sizeof(di_int) * CHAR_BIT;
+    const di_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __builtin_clzll(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > FLT_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
+         *                                                12345678901234567890123456 
+         *  1 = msb 1 bit 
+         *  P = bit FLT_MANT_DIG-1 bits to the right of 1 
+         *  Q = bit FLT_MANT_DIG bits to the right of 1   
+         *  R = "or" of all bits to the right of Q 
+         */
+        switch (sd)
+        {
+        case FLT_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case FLT_MANT_DIG + 2:
+            break;
+        default:
+            a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) |
+                ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+        if (a & ((du_int)1 << FLT_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (FLT_MANT_DIG - sd);
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    float_bits fb;
+    fb.u = ((su_int)s & 0x80000000) |  /* sign */
+           ((e + 127) << 23)       |  /* exponent */
+           ((su_int)a & 0x007FFFFF);   /* mantissa */
+    return fb.f;
+}
diff --git a/contrib/libs/cxxsupp/builtins/floatditf.c b/contrib/libs/cxxsupp/builtins/floatditf.c
index 6effa55f5e..cd51dd8aad 100644
--- a/contrib/libs/cxxsupp/builtins/floatditf.c
+++ b/contrib/libs/cxxsupp/builtins/floatditf.c
@@ -1,50 +1,50 @@
-//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements di_int to quad-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-COMPILER_RT_ABI fp_t __floatditf(di_int a) { 
- 
-    const int aWidth = sizeof a * CHAR_BIT; 
- 
-    // Handle zero as a special case to protect clz 
-    if (a == 0) 
-        return fromRep(0); 
- 
-    // All other cases begin by extracting the sign and absolute value of a 
-    rep_t sign = 0; 
-    du_int aAbs = (du_int)a; 
-    if (a < 0) { 
-        sign = signBit; 
-        aAbs = ~(du_int)a + 1U; 
-    } 
- 
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); 
-    rep_t result; 
- 
-    // Shift a into the significand field, rounding if it is a right-shift 
-    const int shift = significandBits - exponent; 
-    result = (rep_t)aAbs << shift ^ implicitBit; 
- 
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    // Insert the sign bit and return 
-    return fromRep(result | sign); 
-} 
- 
-#endif 
+//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements di_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatditf(di_int a) {
+
+    const int aWidth = sizeof a * CHAR_BIT;
+
+    // Handle zero as a special case to protect clz
+    if (a == 0)
+        return fromRep(0);
+
+    // All other cases begin by extracting the sign and absolute value of a
+    rep_t sign = 0;
+    du_int aAbs = (du_int)a;
+    if (a < 0) {
+        sign = signBit;
+        aAbs = ~(du_int)a + 1U;
+    }
+
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
+    rep_t result;
+
+    // Shift a into the significand field, rounding if it is a right-shift
+    const int shift = significandBits - exponent;
+    result = (rep_t)aAbs << shift ^ implicitBit;
+
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    // Insert the sign bit and return
+    return fromRep(result | sign);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/floatdixf.c b/contrib/libs/cxxsupp/builtins/floatdixf.c
index 64a46e11b3..d39e81d7ca 100644
--- a/contrib/libs/cxxsupp/builtins/floatdixf.c
+++ b/contrib/libs/cxxsupp/builtins/floatdixf.c
@@ -1,46 +1,46 @@
-/* ===-- floatdixf.c - Implement __floatdixf -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatdixf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */  
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
- 
-/* Returns: convert a to a long double, rounding toward even. */ 
- 
-/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits 
- *             di_int is a 64 bit integral type 
+/* ===-- floatdixf.c - Implement __floatdixf -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatdixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
  */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI long double 
-__floatdixf(di_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(di_int) * CHAR_BIT; 
-    const di_int s = a >> (N-1); 
-    a = (a ^ s) - s; 
-    int clz = __builtin_clzll(a); 
-    int e = (N - 1) - clz ;    /* exponent */ 
-    long_double_bits fb; 
-    fb.u.high.s.low = ((su_int)s & 0x00008000) |  /* sign */ 
-		      (e + 16383);                /* exponent */ 
-    fb.u.low.all = a << clz;                    /* mantissa */ 
-    return fb.f; 
-} 
- 
-#endif /* !_ARCH_PPC */ 
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ *             di_int is a 64 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI long double
+__floatdixf(di_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(di_int) * CHAR_BIT;
+    const di_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int clz = __builtin_clzll(a);
+    int e = (N - 1) - clz ;    /* exponent */
+    long_double_bits fb;
+    fb.u.high.s.low = ((su_int)s & 0x00008000) |  /* sign */
+		      (e + 16383);                /* exponent */
+    fb.u.low.all = a << clz;                    /* mantissa */
+    return fb.f;
+}
+
+#endif /* !_ARCH_PPC */
diff --git a/contrib/libs/cxxsupp/builtins/floatsidf.c b/contrib/libs/cxxsupp/builtins/floatsidf.c
index 8495859803..1cf99b782a 100644
--- a/contrib/libs/cxxsupp/builtins/floatsidf.c
+++ b/contrib/libs/cxxsupp/builtins/floatsidf.c
@@ -1,53 +1,53 @@
-//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements integer to double-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(i2d, floatsidf) 
- 
-COMPILER_RT_ABI fp_t 
-__floatsidf(int a) { 
-     
-    const int aWidth = sizeof a * CHAR_BIT; 
-     
-    // Handle zero as a special case to protect clz 
-    if (a == 0) 
-        return fromRep(0); 
-     
-    // All other cases begin by extracting the sign and absolute value of a 
-    rep_t sign = 0; 
-    if (a < 0) { 
-        sign = signBit; 
-        a = -a; 
-    } 
-     
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clz(a); 
-    rep_t result; 
-     
-    // Shift a into the significand field and clear the implicit bit.  Extra 
-    // cast to unsigned int is necessary to get the correct behavior for 
-    // the input INT_MIN. 
-    const int shift = significandBits - exponent; 
-    result = (rep_t)(unsigned int)a << shift ^ implicitBit; 
-     
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    // Insert the sign bit and return 
-    return fromRep(result | sign); 
-} 
+//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to double-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(i2d, floatsidf)
+
+COMPILER_RT_ABI fp_t
+__floatsidf(int a) {
+    
+    const int aWidth = sizeof a * CHAR_BIT;
+    
+    // Handle zero as a special case to protect clz
+    if (a == 0)
+        return fromRep(0);
+    
+    // All other cases begin by extracting the sign and absolute value of a
+    rep_t sign = 0;
+    if (a < 0) {
+        sign = signBit;
+        a = -a;
+    }
+    
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    rep_t result;
+    
+    // Shift a into the significand field and clear the implicit bit.  Extra
+    // cast to unsigned int is necessary to get the correct behavior for
+    // the input INT_MIN.
+    const int shift = significandBits - exponent;
+    result = (rep_t)(unsigned int)a << shift ^ implicitBit;
+    
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    // Insert the sign bit and return
+    return fromRep(result | sign);
+}
diff --git a/contrib/libs/cxxsupp/builtins/floatsisf.c b/contrib/libs/cxxsupp/builtins/floatsisf.c
index 3047aed6db..467dd1d1ea 100644
--- a/contrib/libs/cxxsupp/builtins/floatsisf.c
+++ b/contrib/libs/cxxsupp/builtins/floatsisf.c
@@ -1,59 +1,59 @@
-//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements integer to single-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(i2f, floatsisf) 
- 
-COMPILER_RT_ABI fp_t 
-__floatsisf(int a) { 
-     
-    const int aWidth = sizeof a * CHAR_BIT; 
-     
-    // Handle zero as a special case to protect clz 
-    if (a == 0) 
-        return fromRep(0); 
-     
-    // All other cases begin by extracting the sign and absolute value of a 
-    rep_t sign = 0; 
-    if (a < 0) { 
-        sign = signBit; 
-        a = -a; 
-    } 
-     
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clz(a); 
-    rep_t result; 
-     
-    // Shift a into the significand field, rounding if it is a right-shift 
-    if (exponent <= significandBits) { 
-        const int shift = significandBits - exponent; 
-        result = (rep_t)a << shift ^ implicitBit; 
-    } else { 
-        const int shift = exponent - significandBits; 
-        result = (rep_t)a >> shift ^ implicitBit; 
-        rep_t round = (rep_t)a << (typeWidth - shift); 
-        if (round > signBit) result++; 
-        if (round == signBit) result += result & 1; 
-    } 
-     
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    // Insert the sign bit and return 
-    return fromRep(result | sign); 
-} 
+//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to single-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(i2f, floatsisf)
+
+COMPILER_RT_ABI fp_t
+__floatsisf(int a) {
+    
+    const int aWidth = sizeof a * CHAR_BIT;
+    
+    // Handle zero as a special case to protect clz
+    if (a == 0)
+        return fromRep(0);
+    
+    // All other cases begin by extracting the sign and absolute value of a
+    rep_t sign = 0;
+    if (a < 0) {
+        sign = signBit;
+        a = -a;
+    }
+    
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    rep_t result;
+    
+    // Shift a into the significand field, rounding if it is a right-shift
+    if (exponent <= significandBits) {
+        const int shift = significandBits - exponent;
+        result = (rep_t)a << shift ^ implicitBit;
+    } else {
+        const int shift = exponent - significandBits;
+        result = (rep_t)a >> shift ^ implicitBit;
+        rep_t round = (rep_t)a << (typeWidth - shift);
+        if (round > signBit) result++;
+        if (round == signBit) result += result & 1;
+    }
+    
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    // Insert the sign bit and return
+    return fromRep(result | sign);
+}
diff --git a/contrib/libs/cxxsupp/builtins/floatsitf.c b/contrib/libs/cxxsupp/builtins/floatsitf.c
index 751a6ad8ca..f0abca363b 100644
--- a/contrib/libs/cxxsupp/builtins/floatsitf.c
+++ b/contrib/libs/cxxsupp/builtins/floatsitf.c
@@ -1,50 +1,50 @@
-//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements integer to quad-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-COMPILER_RT_ABI fp_t __floatsitf(int a) { 
- 
-    const int aWidth = sizeof a * CHAR_BIT; 
- 
-    // Handle zero as a special case to protect clz 
-    if (a == 0) 
-        return fromRep(0); 
- 
-    // All other cases begin by extracting the sign and absolute value of a 
-    rep_t sign = 0; 
-    unsigned aAbs = (unsigned)a; 
-    if (a < 0) { 
-        sign = signBit; 
-        aAbs = ~(unsigned)a + 1U; 
-    } 
- 
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clz(aAbs); 
-    rep_t result; 
- 
-    // Shift a into the significand field and clear the implicit bit. 
-    const int shift = significandBits - exponent; 
-    result = (rep_t)aAbs << shift ^ implicitBit; 
- 
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    // Insert the sign bit and return 
-    return fromRep(result | sign); 
-} 
- 
-#endif 
+//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatsitf(int a) {
+
+    const int aWidth = sizeof a * CHAR_BIT;
+
+    // Handle zero as a special case to protect clz
+    if (a == 0)
+        return fromRep(0);
+
+    // All other cases begin by extracting the sign and absolute value of a
+    rep_t sign = 0;
+    unsigned aAbs = (unsigned)a;
+    if (a < 0) {
+        sign = signBit;
+        aAbs = ~(unsigned)a + 1U;
+    }
+
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clz(aAbs);
+    rep_t result;
+
+    // Shift a into the significand field and clear the implicit bit.
+    const int shift = significandBits - exponent;
+    result = (rep_t)aAbs << shift ^ implicitBit;
+
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    // Insert the sign bit and return
+    return fromRep(result | sign);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/floattidf.c b/contrib/libs/cxxsupp/builtins/floattidf.c
index bf3af1d51e..6331ba5737 100644
--- a/contrib/libs/cxxsupp/builtins/floattidf.c
+++ b/contrib/libs/cxxsupp/builtins/floattidf.c
@@ -1,83 +1,83 @@
-/* ===-- floattidf.c - Implement __floattidf -------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floattidf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */  
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a double, rounding toward even.*/ 
- 
-/* Assumption: double is a IEEE 64 bit floating point type  
- *            ti_int is a 128 bit integral type 
+/* ===-- floattidf.c - Implement __floattidf -------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floattidf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
  */ 
- 
-/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */  
- 
-COMPILER_RT_ABI double 
-__floattidf(ti_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(ti_int) * CHAR_BIT; 
-    const ti_int s = a >> (N-1); 
-    a = (a ^ s) - s; 
-    int sd = N - __clzti2(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > DBL_MANT_DIG) 
-    { 
-        /* start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                               12345678901234567890123456 
-         * 1 = msb 1 bit 
-         * P = bit DBL_MANT_DIG-1 bits to the right of 1 
-         * Q = bit DBL_MANT_DIG bits to the right of 1 
-         * R = "or" of all bits to the right of Q 
-         */ 
-        switch (sd) 
-        { 
-        case DBL_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case DBL_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) | 
-                ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ 
-        if (a & ((tu_int)1 << DBL_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (DBL_MANT_DIG - sd); 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    double_bits fb; 
-    fb.u.s.high = ((su_int)s & 0x80000000) |        /* sign */ 
-                ((e + 1023) << 20)      |        /* exponent */ 
-                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ 
-    fb.u.s.low = (su_int)a;                         /* mantissa-low */ 
-    return fb.f; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a double, rounding toward even.*/
+
+/* Assumption: double is a IEEE 64 bit floating point type 
+ *            ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ 
+
+COMPILER_RT_ABI double
+__floattidf(ti_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > DBL_MANT_DIG)
+    {
+        /* start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                               12345678901234567890123456
+         * 1 = msb 1 bit
+         * P = bit DBL_MANT_DIG-1 bits to the right of 1
+         * Q = bit DBL_MANT_DIG bits to the right of 1
+         * R = "or" of all bits to the right of Q
+         */
+        switch (sd)
+        {
+        case DBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case DBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << DBL_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (DBL_MANT_DIG - sd);
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    double_bits fb;
+    fb.u.s.high = ((su_int)s & 0x80000000) |        /* sign */
+                ((e + 1023) << 20)      |        /* exponent */
+                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+    fb.u.s.low = (su_int)a;                         /* mantissa-low */
+    return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/floattisf.c b/contrib/libs/cxxsupp/builtins/floattisf.c
index aa57609aa5..f1b585f2c3 100644
--- a/contrib/libs/cxxsupp/builtins/floattisf.c
+++ b/contrib/libs/cxxsupp/builtins/floattisf.c
@@ -1,82 +1,82 @@
-/* ===-- floattisf.c - Implement __floattisf -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floattisf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a float, rounding toward even. */ 
- 
-/* Assumption: float is a IEEE 32 bit floating point type  
- *             ti_int is a 128 bit integral type 
- */ 
- 
-/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-COMPILER_RT_ABI float 
-__floattisf(ti_int a) 
-{ 
-    if (a == 0) 
-        return 0.0F; 
-    const unsigned N = sizeof(ti_int) * CHAR_BIT; 
-    const ti_int s = a >> (N-1); 
-    a = (a ^ s) - s; 
-    int sd = N - __clzti2(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > FLT_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-        * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-        *                                                12345678901234567890123456 
-        *  1 = msb 1 bit 
-        *  P = bit FLT_MANT_DIG-1 bits to the right of 1 
-        *  Q = bit FLT_MANT_DIG bits to the right of 1 
-        *  R = "or" of all bits to the right of Q 
-        */ 
-        switch (sd) 
-        { 
-        case FLT_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case FLT_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) | 
-                ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ 
-        if (a & ((tu_int)1 << FLT_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (FLT_MANT_DIG - sd); 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    float_bits fb; 
-    fb.u = ((su_int)s & 0x80000000) |  /* sign */ 
-           ((e + 127) << 23)       |  /* exponent */ 
-           ((su_int)a & 0x007FFFFF);   /* mantissa */ 
-    return fb.f; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- floattisf.c - Implement __floattisf -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floattisf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a float, rounding toward even. */
+
+/* Assumption: float is a IEEE 32 bit floating point type 
+ *             ti_int is a 128 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI float
+__floattisf(ti_int a)
+{
+    if (a == 0)
+        return 0.0F;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > FLT_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+        * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+        *                                                12345678901234567890123456
+        *  1 = msb 1 bit
+        *  P = bit FLT_MANT_DIG-1 bits to the right of 1
+        *  Q = bit FLT_MANT_DIG bits to the right of 1
+        *  R = "or" of all bits to the right of Q
+        */
+        switch (sd)
+        {
+        case FLT_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case FLT_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << FLT_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (FLT_MANT_DIG - sd);
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    float_bits fb;
+    fb.u = ((su_int)s & 0x80000000) |  /* sign */
+           ((e + 127) << 23)       |  /* exponent */
+           ((su_int)a & 0x007FFFFF);   /* mantissa */
+    return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/floattixf.c b/contrib/libs/cxxsupp/builtins/floattixf.c
index a63e238940..1203b3a96e 100644
--- a/contrib/libs/cxxsupp/builtins/floattixf.c
+++ b/contrib/libs/cxxsupp/builtins/floattixf.c
@@ -1,84 +1,84 @@
-/* ===-- floattixf.c - Implement __floattixf -------------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floattixf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a long double, rounding toward even. */ 
- 
-/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits 
- *             ti_int is a 128 bit integral type 
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI long double 
-__floattixf(ti_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(ti_int) * CHAR_BIT; 
-    const ti_int s = a >> (N-1); 
-    a = (a ^ s) - s; 
-    int sd = N - __clzti2(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > LDBL_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1 
-         *  Q = bit LDBL_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-         */ 
-        switch (sd) 
-        { 
-        case LDBL_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case LDBL_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | 
-                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ 
-        if (a & ((tu_int)1 << LDBL_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to LDBL_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (LDBL_MANT_DIG - sd); 
-        /* a is now rounded to LDBL_MANT_DIG bits */ 
-    } 
-    long_double_bits fb; 
-    fb.u.high.s.low = ((su_int)s & 0x8000) |        /* sign */ 
-                    (e + 16383);                  /* exponent */ 
-    fb.u.low.all = (du_int)a;                     /* mantissa */ 
-    return fb.f; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- floattixf.c - Implement __floattixf -------------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floattixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ *             ti_int is a 128 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI long double
+__floattixf(ti_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > LDBL_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd)
+        {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+    long_double_bits fb;
+    fb.u.high.s.low = ((su_int)s & 0x8000) |        /* sign */
+                    (e + 16383);                  /* exponent */
+    fb.u.low.all = (du_int)a;                     /* mantissa */
+    return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/floatundidf.c b/contrib/libs/cxxsupp/builtins/floatundidf.c
index 4136d4719f..67aa86e5e5 100644
--- a/contrib/libs/cxxsupp/builtins/floatundidf.c
+++ b/contrib/libs/cxxsupp/builtins/floatundidf.c
@@ -1,106 +1,106 @@
-/* ===-- floatundidf.c - Implement __floatundidf ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatundidf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
+/* ===-- floatundidf.c - Implement __floatundidf ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatundidf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+/* Returns: convert a to a double, rounding toward even. */
+
+/* Assumption: double is a IEEE 64 bit floating point type 
+ *             du_int is a 64 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(ul2d, floatundidf)
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; we'll set the inexact flag
+ * as a side-effect of this computation.
+ */
+
+COMPILER_RT_ABI double
+__floatundidf(du_int a)
+{
+	static const double twop52 = 4503599627370496.0; // 0x1.0p52
+	static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
+	static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
+	
+	union { uint64_t x; double d; } high = { .d = twop84 };
+	union { uint64_t x; double d; } low = { .d = twop52 };
+	
+	high.x |= a >> 32;
+	low.x |= a & UINT64_C(0x00000000ffffffff);
+	
+	const double result = (high.d - twop84_plus_twop52) + low.d;
+	return result;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no flags to
+ * set, and we don't want to code-gen to an unknown soft-float implementation.
  */ 
- 
-/* Returns: convert a to a double, rounding toward even. */ 
- 
-/* Assumption: double is a IEEE 64 bit floating point type  
- *             du_int is a 64 bit integral type 
- */ 
- 
-/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(ul2d, floatundidf) 
- 
-#ifndef __SOFT_FP__ 
-/* Support for systems that have hardware floating-point; we'll set the inexact flag 
- * as a side-effect of this computation. 
- */ 
- 
-COMPILER_RT_ABI double 
-__floatundidf(du_int a) 
-{ 
-	static const double twop52 = 4503599627370496.0; // 0x1.0p52 
-	static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 
-	static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84 
-	 
-	union { uint64_t x; double d; } high = { .d = twop84 }; 
-	union { uint64_t x; double d; } low = { .d = twop52 }; 
-	 
-	high.x |= a >> 32; 
-	low.x |= a & UINT64_C(0x00000000ffffffff); 
-	 
-	const double result = (high.d - twop84_plus_twop52) + low.d; 
-	return result; 
-} 
- 
-#else 
-/* Support for systems that don't have hardware floating-point; there are no flags to 
- * set, and we don't want to code-gen to an unknown soft-float implementation. 
- */  
- 
-COMPILER_RT_ABI double 
-__floatundidf(du_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(du_int) * CHAR_BIT; 
-    int sd = N - __builtin_clzll(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > DBL_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit DBL_MANT_DIG-1 bits to the right of 1 
-         *  Q = bit DBL_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-         */ 
-        switch (sd) 
-        { 
-        case DBL_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case DBL_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = (a >> (sd - (DBL_MANT_DIG+2))) | 
-                ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ 
-        if (a & ((du_int)1 << DBL_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (DBL_MANT_DIG - sd); 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    double_bits fb; 
-    fb.u.high = ((e + 1023) << 20)      |        /* exponent */ 
-                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ 
-    fb.u.low = (su_int)a;                         /* mantissa-low  */ 
-    return fb.f; 
-} 
-#endif 
+
+COMPILER_RT_ABI double
+__floatundidf(du_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(du_int) * CHAR_BIT;
+    int sd = N - __builtin_clzll(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > DBL_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit DBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit DBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd)
+        {
+        case DBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case DBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (DBL_MANT_DIG+2))) |
+                ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+        if (a & ((du_int)1 << DBL_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (DBL_MANT_DIG - sd);
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    double_bits fb;
+    fb.u.high = ((e + 1023) << 20)      |        /* exponent */
+                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+    fb.u.low = (su_int)a;                         /* mantissa-low  */
+    return fb.f;
+}
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/floatundisf.c b/contrib/libs/cxxsupp/builtins/floatundisf.c
index fbf2a9ec56..713a44abc8 100644
--- a/contrib/libs/cxxsupp/builtins/floatundisf.c
+++ b/contrib/libs/cxxsupp/builtins/floatundisf.c
@@ -1,77 +1,77 @@
-/*===-- floatundisf.c - Implement __floatundisf ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatundisf for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-/* Returns: convert a to a float, rounding toward even. */ 
- 
-/* Assumption: float is a IEEE 32 bit floating point type  
- *            du_int is a 64 bit integral type 
- */ 
- 
-/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(ul2f, floatundisf) 
- 
-COMPILER_RT_ABI float 
-__floatundisf(du_int a) 
-{ 
-    if (a == 0) 
-        return 0.0F; 
-    const unsigned N = sizeof(du_int) * CHAR_BIT; 
-    int sd = N - __builtin_clzll(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* 8 exponent */ 
-    if (sd > FLT_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit FLT_MANT_DIG-1 bits to the right of 1 
-         *  Q = bit FLT_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-         */ 
-        switch (sd) 
-        { 
-        case FLT_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case FLT_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = (a >> (sd - (FLT_MANT_DIG+2))) | 
-                ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ 
-        if (a & ((du_int)1 << FLT_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (FLT_MANT_DIG - sd); 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    float_bits fb; 
-    fb.u = ((e + 127) << 23)       |  /* exponent */ 
-           ((su_int)a & 0x007FFFFF);  /* mantissa */ 
-    return fb.f; 
-} 
+/*===-- floatundisf.c - Implement __floatundisf ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatundisf for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+/* Returns: convert a to a float, rounding toward even. */
+
+/* Assumption: float is a IEEE 32 bit floating point type 
+ *            du_int is a 64 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(ul2f, floatundisf)
+
+COMPILER_RT_ABI float
+__floatundisf(du_int a)
+{
+    if (a == 0)
+        return 0.0F;
+    const unsigned N = sizeof(du_int) * CHAR_BIT;
+    int sd = N - __builtin_clzll(a);  /* number of significant digits */
+    int e = sd - 1;             /* 8 exponent */
+    if (sd > FLT_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit FLT_MANT_DIG-1 bits to the right of 1
+         *  Q = bit FLT_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd)
+        {
+        case FLT_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case FLT_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (FLT_MANT_DIG+2))) |
+                ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+        if (a & ((du_int)1 << FLT_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (FLT_MANT_DIG - sd);
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    float_bits fb;
+    fb.u = ((e + 127) << 23)       |  /* exponent */
+           ((su_int)a & 0x007FFFFF);  /* mantissa */
+    return fb.f;
+}
diff --git a/contrib/libs/cxxsupp/builtins/floatunditf.c b/contrib/libs/cxxsupp/builtins/floatunditf.c
index f6706c860e..8098e95e82 100644
--- a/contrib/libs/cxxsupp/builtins/floatunditf.c
+++ b/contrib/libs/cxxsupp/builtins/floatunditf.c
@@ -1,40 +1,40 @@
-//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements du_int to quad-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-COMPILER_RT_ABI fp_t __floatunditf(du_int a) { 
- 
-    const int aWidth = sizeof a * CHAR_BIT; 
- 
-    // Handle zero as a special case to protect clz 
-    if (a == 0) return fromRep(0); 
- 
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clzll(a); 
-    rep_t result; 
- 
-    // Shift a into the significand field and clear the implicit bit. 
-    const int shift = significandBits - exponent; 
-    result = (rep_t)a << shift ^ implicitBit; 
- 
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    return fromRep(result); 
-} 
- 
-#endif 
+//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements du_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
+
+    const int aWidth = sizeof a * CHAR_BIT;
+
+    // Handle zero as a special case to protect clz
+    if (a == 0) return fromRep(0);
+
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clzll(a);
+    rep_t result;
+
+    // Shift a into the significand field and clear the implicit bit.
+    const int shift = significandBits - exponent;
+    result = (rep_t)a << shift ^ implicitBit;
+
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    return fromRep(result);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/floatundixf.c b/contrib/libs/cxxsupp/builtins/floatundixf.c
index d8eaf5b80d..ca5e06d64d 100644
--- a/contrib/libs/cxxsupp/builtins/floatundixf.c
+++ b/contrib/libs/cxxsupp/builtins/floatundixf.c
@@ -1,42 +1,42 @@
-/* ===-- floatundixf.c - Implement __floatundixf ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatundixf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
- 
-/* Returns: convert a to a long double, rounding toward even. */ 
- 
-/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits 
- *             du_int is a 64 bit integral type 
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
-COMPILER_RT_ABI long double 
-__floatundixf(du_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(du_int) * CHAR_BIT; 
-    int clz = __builtin_clzll(a); 
-    int e = (N - 1) - clz ;    /* exponent */ 
-    long_double_bits fb; 
-    fb.u.high.s.low = (e + 16383);              /* exponent */ 
-    fb.u.low.all = a << clz;                   /* mantissa */ 
-    return fb.f; 
-} 
- 
-#endif /* _ARCH_PPC */ 
+/* ===-- floatundixf.c - Implement __floatundixf ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatundixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ *             du_int is a 64 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+COMPILER_RT_ABI long double
+__floatundixf(du_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(du_int) * CHAR_BIT;
+    int clz = __builtin_clzll(a);
+    int e = (N - 1) - clz ;    /* exponent */
+    long_double_bits fb;
+    fb.u.high.s.low = (e + 16383);              /* exponent */
+    fb.u.low.all = a << clz;                   /* mantissa */
+    return fb.f;
+}
+
+#endif /* _ARCH_PPC */
diff --git a/contrib/libs/cxxsupp/builtins/floatunsidf.c b/contrib/libs/cxxsupp/builtins/floatunsidf.c
index ed45a1658f..445e18041c 100644
--- a/contrib/libs/cxxsupp/builtins/floatunsidf.c
+++ b/contrib/libs/cxxsupp/builtins/floatunsidf.c
@@ -1,42 +1,42 @@
-//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements unsigned integer to double-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(ui2d, floatunsidf) 
- 
-COMPILER_RT_ABI fp_t 
-__floatunsidf(unsigned int a) { 
-     
-    const int aWidth = sizeof a * CHAR_BIT; 
-     
-    // Handle zero as a special case to protect clz 
-    if (a == 0) return fromRep(0); 
-     
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clz(a); 
-    rep_t result; 
-     
-    // Shift a into the significand field and clear the implicit bit. 
-    const int shift = significandBits - exponent; 
-    result = (rep_t)a << shift ^ implicitBit; 
-     
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    return fromRep(result); 
-} 
+//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to double-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(ui2d, floatunsidf)
+
+COMPILER_RT_ABI fp_t
+__floatunsidf(unsigned int a) {
+    
+    const int aWidth = sizeof a * CHAR_BIT;
+    
+    // Handle zero as a special case to protect clz
+    if (a == 0) return fromRep(0);
+    
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    rep_t result;
+    
+    // Shift a into the significand field and clear the implicit bit.
+    const int shift = significandBits - exponent;
+    result = (rep_t)a << shift ^ implicitBit;
+    
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    return fromRep(result);
+}
diff --git a/contrib/libs/cxxsupp/builtins/floatunsisf.c b/contrib/libs/cxxsupp/builtins/floatunsisf.c
index 54073a29da..ea6f161adc 100644
--- a/contrib/libs/cxxsupp/builtins/floatunsisf.c
+++ b/contrib/libs/cxxsupp/builtins/floatunsisf.c
@@ -1,50 +1,50 @@
-//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements unsigned integer to single-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-#include "int_lib.h" 
- 
-ARM_EABI_FNALIAS(ui2f, floatunsisf) 
- 
-COMPILER_RT_ABI fp_t 
-__floatunsisf(unsigned int a) { 
-     
-    const int aWidth = sizeof a * CHAR_BIT; 
-     
-    // Handle zero as a special case to protect clz 
-    if (a == 0) return fromRep(0); 
-     
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clz(a); 
-    rep_t result; 
-     
-    // Shift a into the significand field, rounding if it is a right-shift 
-    if (exponent <= significandBits) { 
-        const int shift = significandBits - exponent; 
-        result = (rep_t)a << shift ^ implicitBit; 
-    } else { 
-        const int shift = exponent - significandBits; 
-        result = (rep_t)a >> shift ^ implicitBit; 
-        rep_t round = (rep_t)a << (typeWidth - shift); 
-        if (round > signBit) result++; 
-        if (round == signBit) result += result & 1; 
-    } 
-     
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    return fromRep(result); 
-} 
+//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to single-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+ARM_EABI_FNALIAS(ui2f, floatunsisf)
+
+COMPILER_RT_ABI fp_t
+__floatunsisf(unsigned int a) {
+    
+    const int aWidth = sizeof a * CHAR_BIT;
+    
+    // Handle zero as a special case to protect clz
+    if (a == 0) return fromRep(0);
+    
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    rep_t result;
+    
+    // Shift a into the significand field, rounding if it is a right-shift
+    if (exponent <= significandBits) {
+        const int shift = significandBits - exponent;
+        result = (rep_t)a << shift ^ implicitBit;
+    } else {
+        const int shift = exponent - significandBits;
+        result = (rep_t)a >> shift ^ implicitBit;
+        rep_t round = (rep_t)a << (typeWidth - shift);
+        if (round > signBit) result++;
+        if (round == signBit) result += result & 1;
+    }
+    
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    return fromRep(result);
+}
diff --git a/contrib/libs/cxxsupp/builtins/floatunsitf.c b/contrib/libs/cxxsupp/builtins/floatunsitf.c
index 502c3bf7ca..1cd1842e70 100644
--- a/contrib/libs/cxxsupp/builtins/floatunsitf.c
+++ b/contrib/libs/cxxsupp/builtins/floatunsitf.c
@@ -1,40 +1,40 @@
-//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements unsigned integer to quad-precision conversion for the 
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even 
-// mode. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { 
- 
-    const int aWidth = sizeof a * CHAR_BIT; 
- 
-    // Handle zero as a special case to protect clz 
-    if (a == 0) return fromRep(0); 
- 
-    // Exponent of (fp_t)a is the width of abs(a). 
-    const int exponent = (aWidth - 1) - __builtin_clz(a); 
-    rep_t result; 
- 
-    // Shift a into the significand field and clear the implicit bit. 
-    const int shift = significandBits - exponent; 
-    result = (rep_t)a << shift ^ implicitBit; 
- 
-    // Insert the exponent 
-    result += (rep_t)(exponent + exponentBias) << significandBits; 
-    return fromRep(result); 
-} 
- 
-#endif 
+//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) {
+
+    const int aWidth = sizeof a * CHAR_BIT;
+
+    // Handle zero as a special case to protect clz
+    if (a == 0) return fromRep(0);
+
+    // Exponent of (fp_t)a is the width of abs(a).
+    const int exponent = (aWidth - 1) - __builtin_clz(a);
+    rep_t result;
+
+    // Shift a into the significand field and clear the implicit bit.
+    const int shift = significandBits - exponent;
+    result = (rep_t)a << shift ^ implicitBit;
+
+    // Insert the exponent
+    result += (rep_t)(exponent + exponentBias) << significandBits;
+    return fromRep(result);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/floatuntidf.c b/contrib/libs/cxxsupp/builtins/floatuntidf.c
index 53f48a3598..06202d9679 100644
--- a/contrib/libs/cxxsupp/builtins/floatuntidf.c
+++ b/contrib/libs/cxxsupp/builtins/floatuntidf.c
@@ -1,80 +1,80 @@
-/* ===-- floatuntidf.c - Implement __floatuntidf ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatuntidf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a double, rounding toward even. */ 
- 
-/* Assumption: double is a IEEE 64 bit floating point type  
- *             tu_int is a 128 bit integral type 
- */ 
- 
-/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-COMPILER_RT_ABI double 
-__floatuntidf(tu_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(tu_int) * CHAR_BIT; 
-    int sd = N - __clzti2(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > DBL_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit DBL_MANT_DIG-1 bits to the right of 1 
-         *  Q = bit DBL_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-         */ 
-        switch (sd) 
-        { 
-        case DBL_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case DBL_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = (a >> (sd - (DBL_MANT_DIG+2))) | 
-                ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ 
-        if (a & ((tu_int)1 << DBL_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (DBL_MANT_DIG - sd); 
-        /* a is now rounded to DBL_MANT_DIG bits */ 
-    } 
-    double_bits fb; 
-    fb.u.s.high = ((e + 1023) << 20)      |        /* exponent */ 
-                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ 
-    fb.u.s.low = (su_int)a;                         /* mantissa-low */ 
-    return fb.f; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- floatuntidf.c - Implement __floatuntidf ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatuntidf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a double, rounding toward even. */
+
+/* Assumption: double is a IEEE 64 bit floating point type 
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI double
+__floatuntidf(tu_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > DBL_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit DBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit DBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd)
+        {
+        case DBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case DBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (DBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << DBL_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (DBL_MANT_DIG - sd);
+        /* a is now rounded to DBL_MANT_DIG bits */
+    }
+    double_bits fb;
+    fb.u.s.high = ((e + 1023) << 20)      |        /* exponent */
+                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+    fb.u.s.low = (su_int)a;                         /* mantissa-low */
+    return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/floatuntisf.c b/contrib/libs/cxxsupp/builtins/floatuntisf.c
index 9e0c8e1489..c0dd0275dd 100644
--- a/contrib/libs/cxxsupp/builtins/floatuntisf.c
+++ b/contrib/libs/cxxsupp/builtins/floatuntisf.c
@@ -1,79 +1,79 @@
-/* ===-- floatuntisf.c - Implement __floatuntisf ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatuntisf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a float, rounding toward even. */ 
- 
-/* Assumption: float is a IEEE 32 bit floating point type  
- *             tu_int is a 128 bit integral type 
- */ 
- 
-/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ 
- 
-COMPILER_RT_ABI float 
-__floatuntisf(tu_int a) 
-{ 
-    if (a == 0) 
-        return 0.0F; 
-    const unsigned N = sizeof(tu_int) * CHAR_BIT; 
-    int sd = N - __clzti2(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > FLT_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit FLT_MANT_DIG-1 bits to the right of 1 
-         *  Q = bit FLT_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-	 */ 
-        switch (sd) 
-        { 
-        case FLT_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case FLT_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = (a >> (sd - (FLT_MANT_DIG+2))) | 
-                ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ 
-        if (a & ((tu_int)1 << FLT_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (FLT_MANT_DIG - sd); 
-        /* a is now rounded to FLT_MANT_DIG bits */ 
-    } 
-    float_bits fb; 
-    fb.u = ((e + 127) << 23)       |  /* exponent */ 
-           ((su_int)a & 0x007FFFFF);  /* mantissa */ 
-    return fb.f; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- floatuntisf.c - Implement __floatuntisf ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatuntisf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a float, rounding toward even. */
+
+/* Assumption: float is a IEEE 32 bit floating point type 
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI float
+__floatuntisf(tu_int a)
+{
+    if (a == 0)
+        return 0.0F;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > FLT_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit FLT_MANT_DIG-1 bits to the right of 1
+         *  Q = bit FLT_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+	 */
+        switch (sd)
+        {
+        case FLT_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case FLT_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (FLT_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << FLT_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (FLT_MANT_DIG - sd);
+        /* a is now rounded to FLT_MANT_DIG bits */
+    }
+    float_bits fb;
+    fb.u = ((e + 127) << 23)       |  /* exponent */
+           ((su_int)a & 0x007FFFFF);  /* mantissa */
+    return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/floatuntixf.c b/contrib/libs/cxxsupp/builtins/floatuntixf.c
index c18a1127a2..ea81cb1bcd 100644
--- a/contrib/libs/cxxsupp/builtins/floatuntixf.c
+++ b/contrib/libs/cxxsupp/builtins/floatuntixf.c
@@ -1,81 +1,81 @@
-/* ===-- floatuntixf.c - Implement __floatuntixf ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __floatuntixf for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: convert a to a long double, rounding toward even. */ 
- 
-/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits 
- *             tu_int is a 128 bit integral type 
- */ 
- 
-/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | 
- * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm 
- */ 
- 
-COMPILER_RT_ABI long double 
-__floatuntixf(tu_int a) 
-{ 
-    if (a == 0) 
-        return 0.0; 
-    const unsigned N = sizeof(tu_int) * CHAR_BIT; 
-    int sd = N - __clzti2(a);  /* number of significant digits */ 
-    int e = sd - 1;             /* exponent */ 
-    if (sd > LDBL_MANT_DIG) 
-    { 
-        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx 
-         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR 
-         *                                                12345678901234567890123456 
-         *  1 = msb 1 bit 
-         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1 
-         *  Q = bit LDBL_MANT_DIG bits to the right of 1 
-         *  R = "or" of all bits to the right of Q 
-	 */ 
-        switch (sd) 
-        { 
-        case LDBL_MANT_DIG + 1: 
-            a <<= 1; 
-            break; 
-        case LDBL_MANT_DIG + 2: 
-            break; 
-        default: 
-            a = (a >> (sd - (LDBL_MANT_DIG+2))) | 
-                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); 
-        }; 
-        /* finish: */ 
-        a |= (a & 4) != 0;  /* Or P into R */ 
-        ++a;  /* round - this step may add a significant bit */ 
-        a >>= 2;  /* dump Q and R */ 
-        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ 
-        if (a & ((tu_int)1 << LDBL_MANT_DIG)) 
-        { 
-            a >>= 1; 
-            ++e; 
-        } 
-        /* a is now rounded to LDBL_MANT_DIG bits */ 
-    } 
-    else 
-    { 
-        a <<= (LDBL_MANT_DIG - sd); 
-        /* a is now rounded to LDBL_MANT_DIG bits */ 
-    } 
-    long_double_bits fb; 
-    fb.u.high.s.low = (e + 16383);                  /* exponent */ 
-    fb.u.low.all = (du_int)a;                     /* mantissa */ 
-    return fb.f; 
-} 
- 
-#endif 
+/* ===-- floatuntixf.c - Implement __floatuntixf ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatuntixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ *             tu_int is a 128 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI long double
+__floatuntixf(tu_int a)
+{
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;             /* exponent */
+    if (sd > LDBL_MANT_DIG)
+    {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+	 */
+        switch (sd)
+        {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG))
+        {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+    else
+    {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+    long_double_bits fb;
+    fb.u.high.s.low = (e + 16383);                  /* exponent */
+    fb.u.low.all = (du_int)a;                     /* mantissa */
+    return fb.f;
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc
index 57d079e9d3..b47be1b648 100644
--- a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc
@@ -1,144 +1,144 @@
-//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements soft-float addition with the IEEE-754 default rounding 
-// (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "fp_lib.h" 
- 
-static __inline fp_t __addXf3__(fp_t a, fp_t b) { 
-    rep_t aRep = toRep(a); 
-    rep_t bRep = toRep(b); 
-    const rep_t aAbs = aRep & absMask; 
-    const rep_t bAbs = bRep & absMask; 
- 
-    // Detect if a or b is zero, infinity, or NaN. 
-    if (aAbs - REP_C(1) >= infRep - REP_C(1) || 
-        bAbs - REP_C(1) >= infRep - REP_C(1)) { 
-        // NaN + anything = qNaN 
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit); 
-        // anything + NaN = qNaN 
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit); 
- 
-        if (aAbs == infRep) { 
-            // +/-infinity + -/+infinity = qNaN 
-            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep); 
-            // +/-infinity + anything remaining = +/- infinity 
-            else return a; 
-        } 
- 
-        // anything remaining + +/-infinity = +/-infinity 
-        if (bAbs == infRep) return b; 
- 
-        // zero + anything = anything 
-        if (!aAbs) { 
-            // but we need to get the sign right for zero + zero 
-            if (!bAbs) return fromRep(toRep(a) & toRep(b)); 
-            else return b; 
-        } 
- 
-        // anything + zero = anything 
-        if (!bAbs) return a; 
-    } 
- 
-    // Swap a and b if necessary so that a has the larger absolute value. 
-    if (bAbs > aAbs) { 
-        const rep_t temp = aRep; 
-        aRep = bRep; 
-        bRep = temp; 
-    } 
- 
-    // Extract the exponent and significand from the (possibly swapped) a and b. 
-    int aExponent = aRep >> significandBits & maxExponent; 
-    int bExponent = bRep >> significandBits & maxExponent; 
-    rep_t aSignificand = aRep & significandMask; 
-    rep_t bSignificand = bRep & significandMask; 
- 
-    // Normalize any denormals, and adjust the exponent accordingly. 
-    if (aExponent == 0) aExponent = normalize(&aSignificand); 
-    if (bExponent == 0) bExponent = normalize(&bSignificand); 
- 
-    // The sign of the result is the sign of the larger operand, a.  If they 
-    // have opposite signs, we are performing a subtraction; otherwise addition. 
-    const rep_t resultSign = aRep & signBit; 
-    const bool subtraction = (aRep ^ bRep) & signBit; 
- 
-    // Shift the significands to give us round, guard and sticky, and or in the 
-    // implicit significand bit.  (If we fell through from the denormal path it 
-    // was already set by normalize( ), but setting it twice won't hurt 
-    // anything.) 
-    aSignificand = (aSignificand | implicitBit) << 3; 
-    bSignificand = (bSignificand | implicitBit) << 3; 
- 
-    // Shift the significand of b by the difference in exponents, with a sticky 
-    // bottom bit to get rounding correct. 
-    const unsigned int align = aExponent - bExponent; 
-    if (align) { 
-        if (align < typeWidth) { 
-            const bool sticky = bSignificand << (typeWidth - align); 
-            bSignificand = bSignificand >> align | sticky; 
-        } else { 
-            bSignificand = 1; // sticky; b is known to be non-zero. 
-        } 
-    } 
-    if (subtraction) { 
-        aSignificand -= bSignificand; 
-        // If a == -b, return +zero. 
-        if (aSignificand == 0) return fromRep(0); 
- 
-        // If partial cancellation occured, we need to left-shift the result 
-        // and adjust the exponent: 
-        if (aSignificand < implicitBit << 3) { 
-            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); 
-            aSignificand <<= shift; 
-            aExponent -= shift; 
-        } 
-    } 
-    else /* addition */ { 
-        aSignificand += bSignificand; 
- 
-        // If the addition carried up, we need to right-shift the result and 
-        // adjust the exponent: 
-        if (aSignificand & implicitBit << 4) { 
-            const bool sticky = aSignificand & 1; 
-            aSignificand = aSignificand >> 1 | sticky; 
-            aExponent += 1; 
-        } 
-    } 
- 
-    // If we have overflowed the type, return +/- infinity: 
-    if (aExponent >= maxExponent) return fromRep(infRep | resultSign); 
- 
-    if (aExponent <= 0) { 
-        // Result is denormal before rounding; the exponent is zero and we 
-        // need to shift the significand. 
-        const int shift = 1 - aExponent; 
-        const bool sticky = aSignificand << (typeWidth - shift); 
-        aSignificand = aSignificand >> shift | sticky; 
-        aExponent = 0; 
-    } 
- 
-    // Low three bits are round, guard, and sticky. 
-    const int roundGuardSticky = aSignificand & 0x7; 
- 
-    // Shift the significand into place, and mask off the implicit bit. 
-    rep_t result = aSignificand >> 3 & significandMask; 
- 
-    // Insert the exponent and sign. 
-    result |= (rep_t)aExponent << significandBits; 
-    result |= resultSign; 
- 
-    // Final rounding.  The result may overflow to infinity, but that is the 
-    // correct result in that case. 
-    if (roundGuardSticky > 0x4) result++; 
-    if (roundGuardSticky == 0x4) result += result & 1; 
-    return fromRep(result); 
-} 
+//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float addition with the IEEE-754 default rounding
+// (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fp_t __addXf3__(fp_t a, fp_t b) {
+    rep_t aRep = toRep(a);
+    rep_t bRep = toRep(b);
+    const rep_t aAbs = aRep & absMask;
+    const rep_t bAbs = bRep & absMask;
+
+    // Detect if a or b is zero, infinity, or NaN.
+    if (aAbs - REP_C(1) >= infRep - REP_C(1) ||
+        bAbs - REP_C(1) >= infRep - REP_C(1)) {
+        // NaN + anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything + NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+        if (aAbs == infRep) {
+            // +/-infinity + -/+infinity = qNaN
+            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
+            // +/-infinity + anything remaining = +/- infinity
+            else return a;
+        }
+
+        // anything remaining + +/-infinity = +/-infinity
+        if (bAbs == infRep) return b;
+
+        // zero + anything = anything
+        if (!aAbs) {
+            // but we need to get the sign right for zero + zero
+            if (!bAbs) return fromRep(toRep(a) & toRep(b));
+            else return b;
+        }
+
+        // anything + zero = anything
+        if (!bAbs) return a;
+    }
+
+    // Swap a and b if necessary so that a has the larger absolute value.
+    if (bAbs > aAbs) {
+        const rep_t temp = aRep;
+        aRep = bRep;
+        bRep = temp;
+    }
+
+    // Extract the exponent and significand from the (possibly swapped) a and b.
+    int aExponent = aRep >> significandBits & maxExponent;
+    int bExponent = bRep >> significandBits & maxExponent;
+    rep_t aSignificand = aRep & significandMask;
+    rep_t bSignificand = bRep & significandMask;
+
+    // Normalize any denormals, and adjust the exponent accordingly.
+    if (aExponent == 0) aExponent = normalize(&aSignificand);
+    if (bExponent == 0) bExponent = normalize(&bSignificand);
+
+    // The sign of the result is the sign of the larger operand, a.  If they
+    // have opposite signs, we are performing a subtraction; otherwise addition.
+    const rep_t resultSign = aRep & signBit;
+    const bool subtraction = (aRep ^ bRep) & signBit;
+
+    // Shift the significands to give us round, guard and sticky, and or in the
+    // implicit significand bit.  (If we fell through from the denormal path it
+    // was already set by normalize( ), but setting it twice won't hurt
+    // anything.)
+    aSignificand = (aSignificand | implicitBit) << 3;
+    bSignificand = (bSignificand | implicitBit) << 3;
+
+    // Shift the significand of b by the difference in exponents, with a sticky
+    // bottom bit to get rounding correct.
+    const unsigned int align = aExponent - bExponent;
+    if (align) {
+        if (align < typeWidth) {
+            const bool sticky = bSignificand << (typeWidth - align);
+            bSignificand = bSignificand >> align | sticky;
+        } else {
+            bSignificand = 1; // sticky; b is known to be non-zero.
+        }
+    }
+    if (subtraction) {
+        aSignificand -= bSignificand;
+        // If a == -b, return +zero.
+        if (aSignificand == 0) return fromRep(0);
+
+        // If partial cancellation occured, we need to left-shift the result
+        // and adjust the exponent:
+        if (aSignificand < implicitBit << 3) {
+            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+            aSignificand <<= shift;
+            aExponent -= shift;
+        }
+    }
+    else /* addition */ {
+        aSignificand += bSignificand;
+
+        // If the addition carried up, we need to right-shift the result and
+        // adjust the exponent:
+        if (aSignificand & implicitBit << 4) {
+            const bool sticky = aSignificand & 1;
+            aSignificand = aSignificand >> 1 | sticky;
+            aExponent += 1;
+        }
+    }
+
+    // If we have overflowed the type, return +/- infinity:
+    if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
+
+    if (aExponent <= 0) {
+        // Result is denormal before rounding; the exponent is zero and we
+        // need to shift the significand.
+        const int shift = 1 - aExponent;
+        const bool sticky = aSignificand << (typeWidth - shift);
+        aSignificand = aSignificand >> shift | sticky;
+        aExponent = 0;
+    }
+
+    // Low three bits are round, guard, and sticky.
+    const int roundGuardSticky = aSignificand & 0x7;
+
+    // Shift the significand into place, and mask off the implicit bit.
+    rep_t result = aSignificand >> 3 & significandMask;
+
+    // Insert the exponent and sign.
+    result |= (rep_t)aExponent << significandBits;
+    result |= resultSign;
+
+    // Final rounding.  The result may overflow to infinity, but that is the
+    // correct result in that case.
+    if (roundGuardSticky > 0x4) result++;
+    if (roundGuardSticky == 0x4) result += result & 1;
+    return fromRep(result);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fp_extend.h b/contrib/libs/cxxsupp/builtins/fp_extend.h
index 65692dc0e5..6d95a06807 100644
--- a/contrib/libs/cxxsupp/builtins/fp_extend.h
+++ b/contrib/libs/cxxsupp/builtins/fp_extend.h
@@ -1,89 +1,89 @@
-//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// Set source and destination setting 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef FP_EXTEND_HEADER 
-#define FP_EXTEND_HEADER 
- 
-#include "int_lib.h" 
- 
-#if defined SRC_SINGLE 
-typedef float src_t; 
-typedef uint32_t src_rep_t; 
-#define SRC_REP_C UINT32_C 
-static const int srcSigBits = 23; 
-#define src_rep_t_clz __builtin_clz 
- 
-#elif defined SRC_DOUBLE 
-typedef double src_t; 
-typedef uint64_t src_rep_t; 
-#define SRC_REP_C UINT64_C 
-static const int srcSigBits = 52; 
-static __inline int src_rep_t_clz(src_rep_t a) { 
-#if defined __LP64__ 
-    return __builtin_clzl(a); 
-#else 
-    if (a & REP_C(0xffffffff00000000)) 
-        return __builtin_clz(a >> 32); 
-    else 
-        return 32 + __builtin_clz(a & REP_C(0xffffffff)); 
-#endif 
-} 
- 
-#elif defined SRC_HALF 
-typedef uint16_t src_t; 
-typedef uint16_t src_rep_t; 
-#define SRC_REP_C UINT16_C 
-static const int srcSigBits = 10; 
-#define src_rep_t_clz __builtin_clz 
- 
-#else 
-#error Source should be half, single, or double precision! 
-#endif //end source precision 
- 
-#if defined DST_SINGLE 
-typedef float dst_t; 
-typedef uint32_t dst_rep_t; 
-#define DST_REP_C UINT32_C 
-static const int dstSigBits = 23; 
- 
-#elif defined DST_DOUBLE 
-typedef double dst_t; 
-typedef uint64_t dst_rep_t; 
-#define DST_REP_C UINT64_C 
-static const int dstSigBits = 52; 
- 
-#elif defined DST_QUAD 
-typedef long double dst_t; 
-typedef __uint128_t dst_rep_t; 
-#define DST_REP_C (__uint128_t) 
-static const int dstSigBits = 112; 
- 
-#else 
-#error Destination should be single, double, or quad precision! 
-#endif //end destination precision 
- 
-// End of specialization parameters.  Two helper routines for conversion to and 
-// from the representation of floating-point data as integer values follow. 
- 
-static __inline src_rep_t srcToRep(src_t x) { 
-    const union { src_t f; src_rep_t i; } rep = {.f = x}; 
-    return rep.i; 
-} 
- 
-static __inline dst_t dstFromRep(dst_rep_t x) { 
-    const union { dst_t f; dst_rep_t i; } rep = {.i = x}; 
-    return rep.f; 
-} 
-// End helper routines.  Conversion implementation follows. 
- 
-#endif //FP_EXTEND_HEADER 
+//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_EXTEND_HEADER
+#define FP_EXTEND_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcSigBits = 23;
+#define src_rep_t_clz __builtin_clz
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcSigBits = 52;
+static __inline int src_rep_t_clz(src_rep_t a) {
+#if defined __LP64__
+    return __builtin_clzl(a);
+#else
+    if (a & REP_C(0xffffffff00000000))
+        return __builtin_clz(a >> 32);
+    else
+        return 32 + __builtin_clz(a & REP_C(0xffffffff));
+#endif
+}
+
+#elif defined SRC_HALF
+typedef uint16_t src_t;
+typedef uint16_t src_rep_t;
+#define SRC_REP_C UINT16_C
+static const int srcSigBits = 10;
+#define src_rep_t_clz __builtin_clz
+
+#else
+#error Source should be half, single, or double precision!
+#endif //end source precision
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstSigBits = 23;
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+#elif defined DST_QUAD
+typedef long double dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstSigBits = 112;
+
+#else
+#error Destination should be single, double, or quad precision!
+#endif //end destination precision
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static __inline src_rep_t srcToRep(src_t x) {
+    const union { src_t f; src_rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static __inline dst_t dstFromRep(dst_rep_t x) {
+    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+// End helper routines.  Conversion implementation follows.
+
+#endif //FP_EXTEND_HEADER
diff --git a/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc b/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc
index 6ef3338385..b785cc7687 100644
--- a/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_extend_impl.inc
@@ -1,108 +1,108 @@
-//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements a fairly generic conversion from a narrower to a wider 
-// IEEE-754 floating-point type.  The constants and types defined following the 
-// includes below parameterize the conversion. 
-// 
-// It does not support types that don't use the usual IEEE-754 interchange 
-// formats; specifically, some work would be needed to adapt it to 
-// (for example) the Intel 80-bit format or PowerPC double-double format. 
-// 
-// Note please, however, that this implementation is only intended to support 
-// *widening* operations; if you need to convert to a *narrower* floating-point 
-// type (e.g. double -> float), then this routine will not do what you want it 
-// to. 
-// 
-// It also requires that integer types at least as large as both formats 
-// are available on the target platform; this may pose a problem when trying 
-// to add support for quad on some 32-bit systems, for example.  You also may 
-// run into trouble finding an appropriate CLZ function for wide source types; 
-// you will likely need to roll your own on some platforms. 
-// 
-// Finally, the following assumptions are made: 
-// 
-// 1. floating-point types and integer types have the same endianness on the 
-//    target platform 
-// 
-// 2. quiet NaNs, if supported, are indicated by the leading bit of the 
-//    significand field being set 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "fp_extend.h" 
- 
-static __inline dst_t __extendXfYf2__(src_t a) { 
-    // Various constants whose values follow from the type parameters. 
-    // Any reasonable optimizer will fold and propagate all of these. 
-    const int srcBits = sizeof(src_t)*CHAR_BIT; 
-    const int srcExpBits = srcBits - srcSigBits - 1; 
-    const int srcInfExp = (1 << srcExpBits) - 1; 
-    const int srcExpBias = srcInfExp >> 1; 
- 
-    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; 
-    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; 
-    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); 
-    const src_rep_t srcAbsMask = srcSignMask - 1; 
-    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); 
-    const src_rep_t srcNaNCode = srcQNaN - 1; 
- 
-    const int dstBits = sizeof(dst_t)*CHAR_BIT; 
-    const int dstExpBits = dstBits - dstSigBits - 1; 
-    const int dstInfExp = (1 << dstExpBits) - 1; 
-    const int dstExpBias = dstInfExp >> 1; 
- 
-    const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; 
- 
-    // Break a into a sign and representation of the absolute value 
-    const src_rep_t aRep = srcToRep(a); 
-    const src_rep_t aAbs = aRep & srcAbsMask; 
-    const src_rep_t sign = aRep & srcSignMask; 
-    dst_rep_t absResult; 
- 
-    // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted 
-    // to (signed) int.  To avoid that, explicitly cast to src_rep_t. 
-    if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { 
-        // a is a normal number. 
-        // Extend to the destination type by shifting the significand and 
-        // exponent into the proper position and rebiasing the exponent. 
-        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); 
-        absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; 
-    } 
- 
-    else if (aAbs >= srcInfinity) { 
-        // a is NaN or infinity. 
-        // Conjure the result by beginning with infinity, then setting the qNaN 
-        // bit (if needed) and right-aligning the rest of the trailing NaN 
-        // payload field. 
-        absResult = (dst_rep_t)dstInfExp << dstSigBits; 
-        absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); 
-        absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); 
-    } 
- 
-    else if (aAbs) { 
-        // a is denormal. 
-        // renormalize the significand and clear the leading bit, then insert 
-        // the correct adjusted exponent in the destination type. 
-        const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); 
-        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); 
-        absResult ^= dstMinNormal; 
-        const int resultExponent = dstExpBias - srcExpBias - scale + 1; 
-        absResult |= (dst_rep_t)resultExponent << dstSigBits; 
-    } 
- 
-    else { 
-        // a is zero. 
-        absResult = 0; 
-    } 
- 
-    // Apply the signbit to (dst_t)abs(a). 
-    const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); 
-    return dstFromRep(result); 
-} 
+//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a narrower to a wider
+// IEEE-754 floating-point type.  The constants and types defined following the
+// includes below parameterize the conversion.
+//
+// It does not support types that don't use the usual IEEE-754 interchange
+// formats; specifically, some work would be needed to adapt it to
+// (for example) the Intel 80-bit format or PowerPC double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *widening* operations; if you need to convert to a *narrower* floating-point
+// type (e.g. double -> float), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.  You also may
+// run into trouble finding an appropriate CLZ function for wide source types;
+// you will likely need to roll your own on some platforms.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+//    target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_extend.h"
+
+static __inline dst_t __extendXfYf2__(src_t a) {
+    // Various constants whose values follow from the type parameters.
+    // Any reasonable optimizer will fold and propagate all of these.
+    const int srcBits = sizeof(src_t)*CHAR_BIT;
+    const int srcExpBits = srcBits - srcSigBits - 1;
+    const int srcInfExp = (1 << srcExpBits) - 1;
+    const int srcExpBias = srcInfExp >> 1;
+
+    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+    const src_rep_t srcAbsMask = srcSignMask - 1;
+    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+    const src_rep_t srcNaNCode = srcQNaN - 1;
+
+    const int dstBits = sizeof(dst_t)*CHAR_BIT;
+    const int dstExpBits = dstBits - dstSigBits - 1;
+    const int dstInfExp = (1 << dstExpBits) - 1;
+    const int dstExpBias = dstInfExp >> 1;
+
+    const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
+
+    // Break a into a sign and representation of the absolute value
+    const src_rep_t aRep = srcToRep(a);
+    const src_rep_t aAbs = aRep & srcAbsMask;
+    const src_rep_t sign = aRep & srcSignMask;
+    dst_rep_t absResult;
+
+    // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted
+    // to (signed) int.  To avoid that, explicitly cast to src_rep_t.
+    if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
+        // a is a normal number.
+        // Extend to the destination type by shifting the significand and
+        // exponent into the proper position and rebiasing the exponent.
+        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
+        absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
+    }
+
+    else if (aAbs >= srcInfinity) {
+        // a is NaN or infinity.
+        // Conjure the result by beginning with infinity, then setting the qNaN
+        // bit (if needed) and right-aligning the rest of the trailing NaN
+        // payload field.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+        absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
+        absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
+    }
+
+    else if (aAbs) {
+        // a is denormal.
+        // renormalize the significand and clear the leading bit, then insert
+        // the correct adjusted exponent in the destination type.
+        const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
+        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
+        absResult ^= dstMinNormal;
+        const int resultExponent = dstExpBias - srcExpBias - scale + 1;
+        absResult |= (dst_rep_t)resultExponent << dstSigBits;
+    }
+
+    else {
+        // a is zero.
+        absResult = 0;
+    }
+
+    // Apply the signbit to (dst_t)abs(a).
+    const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
+    return dstFromRep(result);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc
index 433887a503..da70d4d393 100644
--- a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc
@@ -1,41 +1,41 @@
-//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements float to integer conversion for the 
-// compiler-rt library. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "fp_lib.h" 
- 
-static __inline fixint_t __fixint(fp_t a) { 
-    const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); 
-    const fixint_t fixint_min = -fixint_max - 1; 
-    // Break a into sign, exponent, significand 
-    const rep_t aRep = toRep(a); 
-    const rep_t aAbs = aRep & absMask; 
-    const fixint_t sign = aRep & signBit ? -1 : 1; 
-    const int exponent = (aAbs >> significandBits) - exponentBias; 
-    const rep_t significand = (aAbs & significandMask) | implicitBit; 
- 
-    // If exponent is negative, the result is zero. 
-    if (exponent < 0) 
-        return 0; 
- 
-    // If the value is too large for the integer type, saturate. 
-    if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) 
-        return sign == 1 ? fixint_max : fixint_min; 
- 
-    // If 0 <= exponent < significandBits, right shift to get the result. 
-    // Otherwise, shift left. 
-    if (exponent < significandBits) 
-        return sign * (significand >> (significandBits - exponent)); 
-    else 
-        return sign * ((fixint_t)significand << (exponent - significandBits)); 
-} 
+//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float to integer conversion for the
+// compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fixint_t __fixint(fp_t a) {
+    const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2);
+    const fixint_t fixint_min = -fixint_max - 1;
+    // Break a into sign, exponent, significand
+    const rep_t aRep = toRep(a);
+    const rep_t aAbs = aRep & absMask;
+    const fixint_t sign = aRep & signBit ? -1 : 1;
+    const int exponent = (aAbs >> significandBits) - exponentBias;
+    const rep_t significand = (aAbs & significandMask) | implicitBit;
+
+    // If exponent is negative, the result is zero.
+    if (exponent < 0)
+        return 0;
+
+    // If the value is too large for the integer type, saturate.
+    if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT)
+        return sign == 1 ? fixint_max : fixint_min;
+
+    // If 0 <= exponent < significandBits, right shift to get the result.
+    // Otherwise, shift left.
+    if (exponent < significandBits)
+        return sign * (significand >> (significandBits - exponent));
+    else
+        return sign * ((fixint_t)significand << (exponent - significandBits));
+}
diff --git a/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc b/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc
index 1588ebf8f3..d68ccf27a7 100644
--- a/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_fixuint_impl.inc
@@ -1,39 +1,39 @@
-//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements float to unsigned integer conversion for the 
-// compiler-rt library. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "fp_lib.h" 
- 
-static __inline fixuint_t __fixuint(fp_t a) { 
-    // Break a into sign, exponent, significand 
-    const rep_t aRep = toRep(a); 
-    const rep_t aAbs = aRep & absMask; 
-    const int sign = aRep & signBit ? -1 : 1; 
-    const int exponent = (aAbs >> significandBits) - exponentBias; 
-    const rep_t significand = (aAbs & significandMask) | implicitBit; 
- 
-    // If either the value or the exponent is negative, the result is zero. 
-    if (sign == -1 || exponent < 0) 
-        return 0; 
- 
-    // If the value is too large for the integer type, saturate. 
-    if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) 
-        return ~(fixuint_t)0; 
- 
-    // If 0 <= exponent < significandBits, right shift to get the result. 
-    // Otherwise, shift left. 
-    if (exponent < significandBits) 
-        return significand >> (significandBits - exponent); 
-    else 
-        return (fixuint_t)significand << (exponent - significandBits); 
-} 
+//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float to unsigned integer conversion for the
+// compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fixuint_t __fixuint(fp_t a) {
+    // Break a into sign, exponent, significand
+    const rep_t aRep = toRep(a);
+    const rep_t aAbs = aRep & absMask;
+    const int sign = aRep & signBit ? -1 : 1;
+    const int exponent = (aAbs >> significandBits) - exponentBias;
+    const rep_t significand = (aAbs & significandMask) | implicitBit;
+
+    // If either the value or the exponent is negative, the result is zero.
+    if (sign == -1 || exponent < 0)
+        return 0;
+
+    // If the value is too large for the integer type, saturate.
+    if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT)
+        return ~(fixuint_t)0;
+
+    // If 0 <= exponent < significandBits, right shift to get the result.
+    // Otherwise, shift left.
+    if (exponent < significandBits)
+        return significand >> (significandBits - exponent);
+    else
+        return (fixuint_t)significand << (exponent - significandBits);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fp_lib.h b/contrib/libs/cxxsupp/builtins/fp_lib.h
index 0892845842..223fb980aa 100644
--- a/contrib/libs/cxxsupp/builtins/fp_lib.h
+++ b/contrib/libs/cxxsupp/builtins/fp_lib.h
@@ -1,270 +1,270 @@
-//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file is a configuration header for soft-float routines in compiler-rt. 
-// This file does not provide any part of the compiler-rt interface, but defines 
-// many useful constants and utility routines that are used in the 
-// implementation of the soft-float routines in compiler-rt. 
-// 
-// Assumes that float, double and long double correspond to the IEEE-754 
-// binary32, binary64 and binary 128 types, respectively, and that integer 
-// endianness matches floating point endianness on the target platform. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef FP_LIB_HEADER 
-#define FP_LIB_HEADER 
- 
-#include <stdint.h> 
-#include <stdbool.h> 
-#include <limits.h> 
-#include "int_lib.h" 
- 
-// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in 
-// 32-bit mode. 
-#if defined(__FreeBSD__) && defined(__i386__) 
-# include <sys/param.h> 
-# if __FreeBSD_version < 903000  // v9.3 
-#  define uint64_t unsigned long long 
-#  define int64_t long long 
-#  undef UINT64_C 
-#  define UINT64_C(c) (c ## ULL) 
-# endif 
-#endif 
- 
-#if defined SINGLE_PRECISION 
- 
-typedef uint32_t rep_t; 
-typedef int32_t srep_t; 
-typedef float fp_t; 
-#define REP_C UINT32_C 
-#define significandBits 23 
- 
-static __inline int rep_clz(rep_t a) { 
-    return __builtin_clz(a); 
-} 
- 
-// 32x32 --> 64 bit multiply 
-static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { 
-    const uint64_t product = (uint64_t)a*b; 
-    *hi = product >> 32; 
-    *lo = product; 
-} 
-COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); 
- 
-#elif defined DOUBLE_PRECISION 
- 
-typedef uint64_t rep_t; 
-typedef int64_t srep_t; 
-typedef double fp_t; 
-#define REP_C UINT64_C 
-#define significandBits 52 
- 
-static __inline int rep_clz(rep_t a) { 
-#if defined __LP64__ 
-    return __builtin_clzl(a); 
-#else 
-    if (a & REP_C(0xffffffff00000000)) 
-        return __builtin_clz(a >> 32); 
-    else 
-        return 32 + __builtin_clz(a & REP_C(0xffffffff)); 
-#endif 
-} 
- 
-#define loWord(a) (a & 0xffffffffU) 
-#define hiWord(a) (a >> 32) 
- 
-// 64x64 -> 128 wide multiply for platforms that don't have such an operation; 
-// many 64-bit platforms have this operation, but they tend to have hardware 
-// floating-point, so we don't bother with a special case for them here. 
-static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { 
-    // Each of the component 32x32 -> 64 products 
-    const uint64_t plolo = loWord(a) * loWord(b); 
-    const uint64_t plohi = loWord(a) * hiWord(b); 
-    const uint64_t philo = hiWord(a) * loWord(b); 
-    const uint64_t phihi = hiWord(a) * hiWord(b); 
-    // Sum terms that contribute to lo in a way that allows us to get the carry 
-    const uint64_t r0 = loWord(plolo); 
-    const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); 
-    *lo = r0 + (r1 << 32); 
-    // Sum terms contributing to hi with the carry from lo 
-    *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; 
-} 
-#undef loWord 
-#undef hiWord 
- 
-COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); 
- 
-#elif defined QUAD_PRECISION 
-#if __LDBL_MANT_DIG__ == 113 
-#define CRT_LDBL_128BIT 
-typedef __uint128_t rep_t; 
-typedef __int128_t srep_t; 
-typedef long double fp_t; 
-#define REP_C (__uint128_t) 
-// Note: Since there is no explicit way to tell compiler the constant is a 
-// 128-bit integer, we let the constant be casted to 128-bit integer 
-#define significandBits 112 
- 
-static __inline int rep_clz(rep_t a) { 
-    const union 
-        { 
-             __uint128_t ll; 
-#if _YUGA_BIG_ENDIAN 
-             struct { uint64_t high, low; } s; 
-#else 
-             struct { uint64_t low, high; } s; 
-#endif 
-        } uu = { .ll = a }; 
- 
-    uint64_t word; 
-    uint64_t add; 
- 
-    if (uu.s.high){ 
-        word = uu.s.high; 
-        add = 0; 
-    } 
-    else{ 
-        word = uu.s.low; 
-        add = 64; 
-    } 
-    return __builtin_clzll(word) + add; 
-} 
- 
-#define Word_LoMask   UINT64_C(0x00000000ffffffff) 
-#define Word_HiMask   UINT64_C(0xffffffff00000000) 
-#define Word_FullMask UINT64_C(0xffffffffffffffff) 
-#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) 
-#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) 
-#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) 
-#define Word_4(a) (uint64_t)(a & Word_LoMask) 
- 
-// 128x128 -> 256 wide multiply for platforms that don't have such an operation; 
-// many 64-bit platforms have this operation, but they tend to have hardware 
-// floating-point, so we don't bother with a special case for them here. 
-static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { 
- 
-    const uint64_t product11 = Word_1(a) * Word_1(b); 
-    const uint64_t product12 = Word_1(a) * Word_2(b); 
-    const uint64_t product13 = Word_1(a) * Word_3(b); 
-    const uint64_t product14 = Word_1(a) * Word_4(b); 
-    const uint64_t product21 = Word_2(a) * Word_1(b); 
-    const uint64_t product22 = Word_2(a) * Word_2(b); 
-    const uint64_t product23 = Word_2(a) * Word_3(b); 
-    const uint64_t product24 = Word_2(a) * Word_4(b); 
-    const uint64_t product31 = Word_3(a) * Word_1(b); 
-    const uint64_t product32 = Word_3(a) * Word_2(b); 
-    const uint64_t product33 = Word_3(a) * Word_3(b); 
-    const uint64_t product34 = Word_3(a) * Word_4(b); 
-    const uint64_t product41 = Word_4(a) * Word_1(b); 
-    const uint64_t product42 = Word_4(a) * Word_2(b); 
-    const uint64_t product43 = Word_4(a) * Word_3(b); 
-    const uint64_t product44 = Word_4(a) * Word_4(b); 
- 
-    const __uint128_t sum0 = (__uint128_t)product44; 
-    const __uint128_t sum1 = (__uint128_t)product34 + 
-                             (__uint128_t)product43; 
-    const __uint128_t sum2 = (__uint128_t)product24 + 
-                             (__uint128_t)product33 + 
-                             (__uint128_t)product42; 
-    const __uint128_t sum3 = (__uint128_t)product14 + 
-                             (__uint128_t)product23 + 
-                             (__uint128_t)product32 + 
-                             (__uint128_t)product41; 
-    const __uint128_t sum4 = (__uint128_t)product13 + 
-                             (__uint128_t)product22 + 
-                             (__uint128_t)product31; 
-    const __uint128_t sum5 = (__uint128_t)product12 + 
-                             (__uint128_t)product21; 
-    const __uint128_t sum6 = (__uint128_t)product11; 
- 
-    const __uint128_t r0 = (sum0 & Word_FullMask) + 
-                           ((sum1 & Word_LoMask) << 32); 
-    const __uint128_t r1 = (sum0 >> 64) + 
-                           ((sum1 >> 32) & Word_FullMask) + 
-                           (sum2 & Word_FullMask) + 
-                           ((sum3 << 32) & Word_HiMask); 
- 
-    *lo = r0 + (r1 << 64); 
-    *hi = (r1 >> 64) + 
-          (sum1 >> 96) + 
-          (sum2 >> 64) + 
-          (sum3 >> 32) + 
-          sum4 + 
-          (sum5 << 32) + 
-          (sum6 << 64); 
-} 
-#undef Word_1 
-#undef Word_2 
-#undef Word_3 
-#undef Word_4 
-#undef Word_HiMask 
-#undef Word_LoMask 
-#undef Word_FullMask 
-#endif // __LDBL_MANT_DIG__ == 113 
-#else 
-#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. 
-#endif 
- 
-#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT) 
-#define typeWidth       (sizeof(rep_t)*CHAR_BIT) 
-#define exponentBits    (typeWidth - significandBits - 1) 
-#define maxExponent     ((1 << exponentBits) - 1) 
-#define exponentBias    (maxExponent >> 1) 
- 
-#define implicitBit     (REP_C(1) << significandBits) 
-#define significandMask (implicitBit - 1U) 
-#define signBit         (REP_C(1) << (significandBits + exponentBits)) 
-#define absMask         (signBit - 1U) 
-#define exponentMask    (absMask ^ significandMask) 
-#define oneRep          ((rep_t)exponentBias << significandBits) 
-#define infRep          exponentMask 
-#define quietBit        (implicitBit >> 1) 
-#define qnanRep         (exponentMask | quietBit) 
- 
-static __inline rep_t toRep(fp_t x) { 
-    const union { fp_t f; rep_t i; } rep = {.f = x}; 
-    return rep.i; 
-} 
- 
-static __inline fp_t fromRep(rep_t x) { 
-    const union { fp_t f; rep_t i; } rep = {.i = x}; 
-    return rep.f; 
-} 
- 
-static __inline int normalize(rep_t *significand) { 
-    const int shift = rep_clz(*significand) - rep_clz(implicitBit); 
-    *significand <<= shift; 
-    return 1 - shift; 
-} 
- 
-static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { 
-    *hi = *hi << count | *lo >> (typeWidth - count); 
-    *lo = *lo << count; 
-} 
- 
-static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) { 
-    if (count < typeWidth) { 
-        const bool sticky = *lo << (typeWidth - count); 
-        *lo = *hi << (typeWidth - count) | *lo >> count | sticky; 
-        *hi = *hi >> count; 
-    } 
-    else if (count < 2*typeWidth) { 
-        const bool sticky = *hi << (2*typeWidth - count) | *lo; 
-        *lo = *hi >> (count - typeWidth) | sticky; 
-        *hi = 0; 
-    } else { 
-        const bool sticky = *hi | *lo; 
-        *lo = sticky; 
-        *hi = 0; 
-    } 
-} 
-#endif 
- 
-#endif // FP_LIB_HEADER 
+//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a configuration header for soft-float routines in compiler-rt.
+// This file does not provide any part of the compiler-rt interface, but defines
+// many useful constants and utility routines that are used in the
+// implementation of the soft-float routines in compiler-rt.
+//
+// Assumes that float, double and long double correspond to the IEEE-754
+// binary32, binary64 and binary 128 types, respectively, and that integer
+// endianness matches floating point endianness on the target platform.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_LIB_HEADER
+#define FP_LIB_HEADER
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+#include "int_lib.h"
+
+// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in
+// 32-bit mode.
+#if defined(__FreeBSD__) && defined(__i386__)
+# include <sys/param.h>
+# if __FreeBSD_version < 903000  // v9.3
+#  define uint64_t unsigned long long
+#  define int64_t long long
+#  undef UINT64_C
+#  define UINT64_C(c) (c ## ULL)
+# endif
+#endif
+
+#if defined SINGLE_PRECISION
+
+typedef uint32_t rep_t;
+typedef int32_t srep_t;
+typedef float fp_t;
+#define REP_C UINT32_C
+#define significandBits 23
+
+static __inline int rep_clz(rep_t a) {
+    return __builtin_clz(a);
+}
+
+// 32x32 --> 64 bit multiply
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+    const uint64_t product = (uint64_t)a*b;
+    *hi = product >> 32;
+    *lo = product;
+}
+COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
+
+#elif defined DOUBLE_PRECISION
+
+typedef uint64_t rep_t;
+typedef int64_t srep_t;
+typedef double fp_t;
+#define REP_C UINT64_C
+#define significandBits 52
+
+static __inline int rep_clz(rep_t a) {
+#if defined __LP64__
+    return __builtin_clzl(a);
+#else
+    if (a & REP_C(0xffffffff00000000))
+        return __builtin_clz(a >> 32);
+    else
+        return 32 + __builtin_clz(a & REP_C(0xffffffff));
+#endif
+}
+
+#define loWord(a) (a & 0xffffffffU)
+#define hiWord(a) (a >> 32)
+
+// 64x64 -> 128 wide multiply for platforms that don't have such an operation;
+// many 64-bit platforms have this operation, but they tend to have hardware
+// floating-point, so we don't bother with a special case for them here.
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+    // Each of the component 32x32 -> 64 products
+    const uint64_t plolo = loWord(a) * loWord(b);
+    const uint64_t plohi = loWord(a) * hiWord(b);
+    const uint64_t philo = hiWord(a) * loWord(b);
+    const uint64_t phihi = hiWord(a) * hiWord(b);
+    // Sum terms that contribute to lo in a way that allows us to get the carry
+    const uint64_t r0 = loWord(plolo);
+    const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo);
+    *lo = r0 + (r1 << 32);
+    // Sum terms contributing to hi with the carry from lo
+    *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi;
+}
+#undef loWord
+#undef hiWord
+
+COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
+
+#elif defined QUAD_PRECISION
+#if __LDBL_MANT_DIG__ == 113
+#define CRT_LDBL_128BIT
+typedef __uint128_t rep_t;
+typedef __int128_t srep_t;
+typedef long double fp_t;
+#define REP_C (__uint128_t)
+// Note: Since there is no explicit way to tell compiler the constant is a
+// 128-bit integer, we let the constant be casted to 128-bit integer
+#define significandBits 112
+
+static __inline int rep_clz(rep_t a) {
+    const union
+        {
+             __uint128_t ll;
+#if _YUGA_BIG_ENDIAN
+             struct { uint64_t high, low; } s;
+#else
+             struct { uint64_t low, high; } s;
+#endif
+        } uu = { .ll = a };
+
+    uint64_t word;
+    uint64_t add;
+
+    if (uu.s.high){
+        word = uu.s.high;
+        add = 0;
+    }
+    else{
+        word = uu.s.low;
+        add = 64;
+    }
+    return __builtin_clzll(word) + add;
+}
+
+#define Word_LoMask   UINT64_C(0x00000000ffffffff)
+#define Word_HiMask   UINT64_C(0xffffffff00000000)
+#define Word_FullMask UINT64_C(0xffffffffffffffff)
+#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask)
+#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask)
+#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask)
+#define Word_4(a) (uint64_t)(a & Word_LoMask)
+
+// 128x128 -> 256 wide multiply for platforms that don't have such an operation;
+// many 64-bit platforms have this operation, but they tend to have hardware
+// floating-point, so we don't bother with a special case for them here.
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+
+    const uint64_t product11 = Word_1(a) * Word_1(b);
+    const uint64_t product12 = Word_1(a) * Word_2(b);
+    const uint64_t product13 = Word_1(a) * Word_3(b);
+    const uint64_t product14 = Word_1(a) * Word_4(b);
+    const uint64_t product21 = Word_2(a) * Word_1(b);
+    const uint64_t product22 = Word_2(a) * Word_2(b);
+    const uint64_t product23 = Word_2(a) * Word_3(b);
+    const uint64_t product24 = Word_2(a) * Word_4(b);
+    const uint64_t product31 = Word_3(a) * Word_1(b);
+    const uint64_t product32 = Word_3(a) * Word_2(b);
+    const uint64_t product33 = Word_3(a) * Word_3(b);
+    const uint64_t product34 = Word_3(a) * Word_4(b);
+    const uint64_t product41 = Word_4(a) * Word_1(b);
+    const uint64_t product42 = Word_4(a) * Word_2(b);
+    const uint64_t product43 = Word_4(a) * Word_3(b);
+    const uint64_t product44 = Word_4(a) * Word_4(b);
+
+    const __uint128_t sum0 = (__uint128_t)product44;
+    const __uint128_t sum1 = (__uint128_t)product34 +
+                             (__uint128_t)product43;
+    const __uint128_t sum2 = (__uint128_t)product24 +
+                             (__uint128_t)product33 +
+                             (__uint128_t)product42;
+    const __uint128_t sum3 = (__uint128_t)product14 +
+                             (__uint128_t)product23 +
+                             (__uint128_t)product32 +
+                             (__uint128_t)product41;
+    const __uint128_t sum4 = (__uint128_t)product13 +
+                             (__uint128_t)product22 +
+                             (__uint128_t)product31;
+    const __uint128_t sum5 = (__uint128_t)product12 +
+                             (__uint128_t)product21;
+    const __uint128_t sum6 = (__uint128_t)product11;
+
+    const __uint128_t r0 = (sum0 & Word_FullMask) +
+                           ((sum1 & Word_LoMask) << 32);
+    const __uint128_t r1 = (sum0 >> 64) +
+                           ((sum1 >> 32) & Word_FullMask) +
+                           (sum2 & Word_FullMask) +
+                           ((sum3 << 32) & Word_HiMask);
+
+    *lo = r0 + (r1 << 64);
+    *hi = (r1 >> 64) +
+          (sum1 >> 96) +
+          (sum2 >> 64) +
+          (sum3 >> 32) +
+          sum4 +
+          (sum5 << 32) +
+          (sum6 << 64);
+}
+#undef Word_1
+#undef Word_2
+#undef Word_3
+#undef Word_4
+#undef Word_HiMask
+#undef Word_LoMask
+#undef Word_FullMask
+#endif // __LDBL_MANT_DIG__ == 113
+#else
+#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
+#endif
+
+#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT)
+#define typeWidth       (sizeof(rep_t)*CHAR_BIT)
+#define exponentBits    (typeWidth - significandBits - 1)
+#define maxExponent     ((1 << exponentBits) - 1)
+#define exponentBias    (maxExponent >> 1)
+
+#define implicitBit     (REP_C(1) << significandBits)
+#define significandMask (implicitBit - 1U)
+#define signBit         (REP_C(1) << (significandBits + exponentBits))
+#define absMask         (signBit - 1U)
+#define exponentMask    (absMask ^ significandMask)
+#define oneRep          ((rep_t)exponentBias << significandBits)
+#define infRep          exponentMask
+#define quietBit        (implicitBit >> 1)
+#define qnanRep         (exponentMask | quietBit)
+
+static __inline rep_t toRep(fp_t x) {
+    const union { fp_t f; rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static __inline fp_t fromRep(rep_t x) {
+    const union { fp_t f; rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+static __inline int normalize(rep_t *significand) {
+    const int shift = rep_clz(*significand) - rep_clz(implicitBit);
+    *significand <<= shift;
+    return 1 - shift;
+}
+
+static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+    *hi = *hi << count | *lo >> (typeWidth - count);
+    *lo = *lo << count;
+}
+
+static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {
+    if (count < typeWidth) {
+        const bool sticky = *lo << (typeWidth - count);
+        *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
+        *hi = *hi >> count;
+    }
+    else if (count < 2*typeWidth) {
+        const bool sticky = *hi << (2*typeWidth - count) | *lo;
+        *lo = *hi >> (count - typeWidth) | sticky;
+        *hi = 0;
+    } else {
+        const bool sticky = *hi | *lo;
+        *lo = sticky;
+        *hi = 0;
+    }
+}
+#endif
+
+#endif // FP_LIB_HEADER
diff --git a/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc b/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc
index 72d1736d1d..b34aa1b8f5 100644
--- a/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_mul_impl.inc
@@ -1,116 +1,116 @@
-//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements soft-float multiplication with the IEEE-754 default 
-// rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "fp_lib.h" 
- 
-static __inline fp_t __mulXf3__(fp_t a, fp_t b) { 
-    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; 
-    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; 
-    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; 
- 
-    rep_t aSignificand = toRep(a) & significandMask; 
-    rep_t bSignificand = toRep(b) & significandMask; 
-    int scale = 0; 
- 
-    // Detect if a or b is zero, denormal, infinity, or NaN. 
-    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { 
- 
-        const rep_t aAbs = toRep(a) & absMask; 
-        const rep_t bAbs = toRep(b) & absMask; 
- 
-        // NaN * anything = qNaN 
-        if (aAbs > infRep) return fromRep(toRep(a) | quietBit); 
-        // anything * NaN = qNaN 
-        if (bAbs > infRep) return fromRep(toRep(b) | quietBit); 
- 
-        if (aAbs == infRep) { 
-            // infinity * non-zero = +/- infinity 
-            if (bAbs) return fromRep(aAbs | productSign); 
-            // infinity * zero = NaN 
-            else return fromRep(qnanRep); 
-        } 
- 
-        if (bAbs == infRep) { 
-            //? non-zero * infinity = +/- infinity 
-            if (aAbs) return fromRep(bAbs | productSign); 
-            // zero * infinity = NaN 
-            else return fromRep(qnanRep); 
-        } 
- 
-        // zero * anything = +/- zero 
-        if (!aAbs) return fromRep(productSign); 
-        // anything * zero = +/- zero 
-        if (!bAbs) return fromRep(productSign); 
- 
-        // one or both of a or b is denormal, the other (if applicable) is a 
-        // normal number.  Renormalize one or both of a and b, and set scale to 
-        // include the necessary exponent adjustment. 
-        if (aAbs < implicitBit) scale += normalize(&aSignificand); 
-        if (bAbs < implicitBit) scale += normalize(&bSignificand); 
-    } 
- 
-    // Or in the implicit significand bit.  (If we fell through from the 
-    // denormal path it was already set by normalize( ), but setting it twice 
-    // won't hurt anything.) 
-    aSignificand |= implicitBit; 
-    bSignificand |= implicitBit; 
- 
-    // Get the significand of a*b.  Before multiplying the significands, shift 
-    // one of them left to left-align it in the field.  Thus, the product will 
-    // have (exponentBits + 2) integral digits, all but two of which must be 
-    // zero.  Normalizing this result is just a conditional left-shift by one 
-    // and bumping the exponent accordingly. 
-    rep_t productHi, productLo; 
-    wideMultiply(aSignificand, bSignificand << exponentBits, 
-                 &productHi, &productLo); 
- 
-    int productExponent = aExponent + bExponent - exponentBias + scale; 
- 
-    // Normalize the significand, adjust exponent if needed. 
-    if (productHi & implicitBit) productExponent++; 
-    else wideLeftShift(&productHi, &productLo, 1); 
- 
-    // If we have overflowed the type, return +/- infinity. 
-    if (productExponent >= maxExponent) return fromRep(infRep | productSign); 
- 
-    if (productExponent <= 0) { 
-        // Result is denormal before rounding 
-        // 
-        // If the result is so small that it just underflows to zero, return 
-        // a zero of the appropriate sign.  Mathematically there is no need to 
-        // handle this case separately, but we make it a special case to 
-        // simplify the shift logic. 
-        const unsigned int shift = REP_C(1) - (unsigned int)productExponent; 
-        if (shift >= typeWidth) return fromRep(productSign); 
- 
-        // Otherwise, shift the significand of the result so that the round 
-        // bit is the high bit of productLo. 
-        wideRightShiftWithSticky(&productHi, &productLo, shift); 
-    } 
-    else { 
-        // Result is normal before rounding; insert the exponent. 
-        productHi &= significandMask; 
-        productHi |= (rep_t)productExponent << significandBits; 
-    } 
- 
-    // Insert the sign of the result: 
-    productHi |= productSign; 
- 
-    // Final rounding.  The final result may overflow to infinity, or underflow 
-    // to zero, but those are the correct results in those cases.  We use the 
-    // default IEEE-754 round-to-nearest, ties-to-even rounding mode. 
-    if (productLo > signBit) productHi++; 
-    if (productLo == signBit) productHi += productHi & 1; 
-    return fromRep(productHi); 
-} 
+//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float multiplication with the IEEE-754 default
+// rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
+
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+
+        // NaN * anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything * NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+        if (aAbs == infRep) {
+            // infinity * non-zero = +/- infinity
+            if (bAbs) return fromRep(aAbs | productSign);
+            // infinity * zero = NaN
+            else return fromRep(qnanRep);
+        }
+
+        if (bAbs == infRep) {
+            //? non-zero * infinity = +/- infinity
+            if (aAbs) return fromRep(bAbs | productSign);
+            // zero * infinity = NaN
+            else return fromRep(qnanRep);
+        }
+
+        // zero * anything = +/- zero
+        if (!aAbs) return fromRep(productSign);
+        // anything * zero = +/- zero
+        if (!bAbs) return fromRep(productSign);
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale += normalize(&bSignificand);
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+
+    // Get the significand of a*b.  Before multiplying the significands, shift
+    // one of them left to left-align it in the field.  Thus, the product will
+    // have (exponentBits + 2) integral digits, all but two of which must be
+    // zero.  Normalizing this result is just a conditional left-shift by one
+    // and bumping the exponent accordingly.
+    rep_t productHi, productLo;
+    wideMultiply(aSignificand, bSignificand << exponentBits,
+                 &productHi, &productLo);
+
+    int productExponent = aExponent + bExponent - exponentBias + scale;
+
+    // Normalize the significand, adjust exponent if needed.
+    if (productHi & implicitBit) productExponent++;
+    else wideLeftShift(&productHi, &productLo, 1);
+
+    // If we have overflowed the type, return +/- infinity.
+    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+
+    if (productExponent <= 0) {
+        // Result is denormal before rounding
+        //
+        // If the result is so small that it just underflows to zero, return
+        // a zero of the appropriate sign.  Mathematically there is no need to
+        // handle this case separately, but we make it a special case to
+        // simplify the shift logic.
+        const unsigned int shift = REP_C(1) - (unsigned int)productExponent;
+        if (shift >= typeWidth) return fromRep(productSign);
+
+        // Otherwise, shift the significand of the result so that the round
+        // bit is the high bit of productLo.
+        wideRightShiftWithSticky(&productHi, &productLo, shift);
+    }
+    else {
+        // Result is normal before rounding; insert the exponent.
+        productHi &= significandMask;
+        productHi |= (rep_t)productExponent << significandBits;
+    }
+
+    // Insert the sign of the result:
+    productHi |= productSign;
+
+    // Final rounding.  The final result may overflow to infinity, or underflow
+    // to zero, but those are the correct results in those cases.  We use the
+    // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
+    if (productLo > signBit) productHi++;
+    if (productLo == signBit) productHi += productHi & 1;
+    return fromRep(productHi);
+}
diff --git a/contrib/libs/cxxsupp/builtins/fp_trunc.h b/contrib/libs/cxxsupp/builtins/fp_trunc.h
index 2d7c5b422a..d5e79bb5b8 100644
--- a/contrib/libs/cxxsupp/builtins/fp_trunc.h
+++ b/contrib/libs/cxxsupp/builtins/fp_trunc.h
@@ -1,76 +1,76 @@
-//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// Set source and destination precision setting 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef FP_TRUNC_HEADER 
-#define FP_TRUNC_HEADER 
- 
-#include "int_lib.h" 
- 
-#if defined SRC_SINGLE 
-typedef float src_t; 
-typedef uint32_t src_rep_t; 
-#define SRC_REP_C UINT32_C 
-static const int srcSigBits = 23; 
- 
-#elif defined SRC_DOUBLE 
-typedef double src_t; 
-typedef uint64_t src_rep_t; 
-#define SRC_REP_C UINT64_C 
-static const int srcSigBits = 52; 
- 
-#elif defined SRC_QUAD 
-typedef long double src_t; 
-typedef __uint128_t src_rep_t; 
-#define SRC_REP_C (__uint128_t) 
-static const int srcSigBits = 112; 
- 
-#else 
-#error Source should be double precision or quad precision! 
-#endif //end source precision 
- 
-#if defined DST_DOUBLE 
-typedef double dst_t; 
-typedef uint64_t dst_rep_t; 
-#define DST_REP_C UINT64_C 
-static const int dstSigBits = 52; 
- 
-#elif defined DST_SINGLE 
-typedef float dst_t; 
-typedef uint32_t dst_rep_t; 
-#define DST_REP_C UINT32_C 
-static const int dstSigBits = 23; 
- 
-#elif defined DST_HALF 
-typedef uint16_t dst_t; 
-typedef uint16_t dst_rep_t; 
-#define DST_REP_C UINT16_C 
-static const int dstSigBits = 10; 
- 
-#else 
-#error Destination should be single precision or double precision! 
-#endif //end destination precision 
- 
-// End of specialization parameters.  Two helper routines for conversion to and 
-// from the representation of floating-point data as integer values follow. 
- 
-static __inline src_rep_t srcToRep(src_t x) { 
-    const union { src_t f; src_rep_t i; } rep = {.f = x}; 
-    return rep.i; 
-} 
- 
-static __inline dst_t dstFromRep(dst_rep_t x) { 
-    const union { dst_t f; dst_rep_t i; } rep = {.i = x}; 
-    return rep.f; 
-} 
- 
-#endif // FP_TRUNC_HEADER 
+//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination precision setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_TRUNC_HEADER
+#define FP_TRUNC_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcSigBits = 23;
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcSigBits = 52;
+
+#elif defined SRC_QUAD
+typedef long double src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+static const int srcSigBits = 112;
+
+#else
+#error Source should be double precision or quad precision!
+#endif //end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+#elif defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstSigBits = 23;
+
+#elif defined DST_HALF
+typedef uint16_t dst_t;
+typedef uint16_t dst_rep_t;
+#define DST_REP_C UINT16_C
+static const int dstSigBits = 10;
+
+#else
+#error Destination should be single precision or double precision!
+#endif //end destination precision
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static __inline src_rep_t srcToRep(src_t x) {
+    const union { src_t f; src_rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static __inline dst_t dstFromRep(dst_rep_t x) {
+    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+#endif // FP_TRUNC_HEADER
diff --git a/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc b/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc
index 2b5199dc97..d88ae06091 100644
--- a/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_trunc_impl.inc
@@ -1,135 +1,135 @@
-//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements a fairly generic conversion from a wider to a narrower 
-// IEEE-754 floating-point type in the default (round to nearest, ties to even) 
-// rounding mode.  The constants and types defined following the includes below 
-// parameterize the conversion. 
-// 
-// This routine can be trivially adapted to support conversions to 
-// half-precision or from quad-precision. It does not support types that don't 
-// use the usual IEEE-754 interchange formats; specifically, some work would be 
-// needed to adapt it to (for example) the Intel 80-bit format or PowerPC 
-// double-double format. 
-// 
-// Note please, however, that this implementation is only intended to support 
-// *narrowing* operations; if you need to convert to a *wider* floating-point 
-// type (e.g. float -> double), then this routine will not do what you want it 
-// to. 
-// 
-// It also requires that integer types at least as large as both formats 
-// are available on the target platform; this may pose a problem when trying 
-// to add support for quad on some 32-bit systems, for example. 
-// 
-// Finally, the following assumptions are made: 
-// 
-// 1. floating-point types and integer types have the same endianness on the 
-//    target platform 
-// 
-// 2. quiet NaNs, if supported, are indicated by the leading bit of the 
-//    significand field being set 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "fp_trunc.h" 
- 
-static __inline dst_t __truncXfYf2__(src_t a) { 
-    // Various constants whose values follow from the type parameters. 
-    // Any reasonable optimizer will fold and propagate all of these. 
-    const int srcBits = sizeof(src_t)*CHAR_BIT; 
-    const int srcExpBits = srcBits - srcSigBits - 1; 
-    const int srcInfExp = (1 << srcExpBits) - 1; 
-    const int srcExpBias = srcInfExp >> 1; 
- 
-    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; 
-    const src_rep_t srcSignificandMask = srcMinNormal - 1; 
-    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; 
-    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); 
-    const src_rep_t srcAbsMask = srcSignMask - 1; 
-    const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; 
-    const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); 
-    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); 
-    const src_rep_t srcNaNCode = srcQNaN - 1; 
- 
-    const int dstBits = sizeof(dst_t)*CHAR_BIT; 
-    const int dstExpBits = dstBits - dstSigBits - 1; 
-    const int dstInfExp = (1 << dstExpBits) - 1; 
-    const int dstExpBias = dstInfExp >> 1; 
- 
-    const int underflowExponent = srcExpBias + 1 - dstExpBias; 
-    const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; 
-    const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; 
-    const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; 
- 
-    const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); 
-    const dst_rep_t dstNaNCode = dstQNaN - 1; 
- 
-    // Break a into a sign and representation of the absolute value 
-    const src_rep_t aRep = srcToRep(a); 
-    const src_rep_t aAbs = aRep & srcAbsMask; 
-    const src_rep_t sign = aRep & srcSignMask; 
-    dst_rep_t absResult; 
- 
-    if (aAbs - underflow < aAbs - overflow) { 
-        // The exponent of a is within the range of normal numbers in the 
-        // destination format.  We can convert by simply right-shifting with 
-        // rounding and adjusting the exponent. 
-        absResult = aAbs >> (srcSigBits - dstSigBits); 
-        absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; 
- 
-        const src_rep_t roundBits = aAbs & roundMask; 
-        // Round to nearest 
-        if (roundBits > halfway) 
-            absResult++; 
-        // Ties to even 
-        else if (roundBits == halfway) 
-            absResult += absResult & 1; 
-    } 
-    else if (aAbs > srcInfinity) { 
-        // a is NaN. 
-        // Conjure the result by beginning with infinity, setting the qNaN 
-        // bit and inserting the (truncated) trailing NaN field. 
-        absResult = (dst_rep_t)dstInfExp << dstSigBits; 
-        absResult |= dstQNaN; 
-        absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; 
-    } 
-    else if (aAbs >= overflow) { 
-        // a overflows to infinity. 
-        absResult = (dst_rep_t)dstInfExp << dstSigBits; 
-    } 
-    else { 
-        // a underflows on conversion to the destination type or is an exact 
-        // zero.  The result may be a denormal or zero.  Extract the exponent 
-        // to get the shift amount for the denormalization. 
-        const int aExp = aAbs >> srcSigBits; 
-        const int shift = srcExpBias - dstExpBias - aExp + 1; 
- 
-        const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; 
- 
-        // Right shift by the denormalization amount with sticky. 
-        if (shift > srcSigBits) { 
-            absResult = 0; 
-        } else { 
-            const bool sticky = significand << (srcBits - shift); 
-            src_rep_t denormalizedSignificand = significand >> shift | sticky; 
-            absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); 
-            const src_rep_t roundBits = denormalizedSignificand & roundMask; 
-            // Round to nearest 
-            if (roundBits > halfway) 
-                absResult++; 
-            // Ties to even 
-            else if (roundBits == halfway) 
-                absResult += absResult & 1; 
-        } 
-    } 
- 
-    // Apply the signbit to (dst_t)abs(a). 
-    const dst_rep_t result = absResult | sign >> (srcBits - dstBits); 
-    return dstFromRep(result); 
-} 
+//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a wider to a narrower
+// IEEE-754 floating-point type in the default (round to nearest, ties to even)
+// rounding mode.  The constants and types defined following the includes below
+// parameterize the conversion.
+//
+// This routine can be trivially adapted to support conversions to
+// half-precision or from quad-precision. It does not support types that don't
+// use the usual IEEE-754 interchange formats; specifically, some work would be
+// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+// double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *narrowing* operations; if you need to convert to a *wider* floating-point
+// type (e.g. float -> double), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+//    target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_trunc.h"
+
+static __inline dst_t __truncXfYf2__(src_t a) {
+    // Various constants whose values follow from the type parameters.
+    // Any reasonable optimizer will fold and propagate all of these.
+    const int srcBits = sizeof(src_t)*CHAR_BIT;
+    const int srcExpBits = srcBits - srcSigBits - 1;
+    const int srcInfExp = (1 << srcExpBits) - 1;
+    const int srcExpBias = srcInfExp >> 1;
+
+    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+    const src_rep_t srcSignificandMask = srcMinNormal - 1;
+    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+    const src_rep_t srcAbsMask = srcSignMask - 1;
+    const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1;
+    const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1);
+    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+    const src_rep_t srcNaNCode = srcQNaN - 1;
+
+    const int dstBits = sizeof(dst_t)*CHAR_BIT;
+    const int dstExpBits = dstBits - dstSigBits - 1;
+    const int dstInfExp = (1 << dstExpBits) - 1;
+    const int dstExpBias = dstInfExp >> 1;
+
+    const int underflowExponent = srcExpBias + 1 - dstExpBias;
+    const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
+    const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits;
+    const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits;
+
+    const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1);
+    const dst_rep_t dstNaNCode = dstQNaN - 1;
+
+    // Break a into a sign and representation of the absolute value
+    const src_rep_t aRep = srcToRep(a);
+    const src_rep_t aAbs = aRep & srcAbsMask;
+    const src_rep_t sign = aRep & srcSignMask;
+    dst_rep_t absResult;
+
+    if (aAbs - underflow < aAbs - overflow) {
+        // The exponent of a is within the range of normal numbers in the
+        // destination format.  We can convert by simply right-shifting with
+        // rounding and adjusting the exponent.
+        absResult = aAbs >> (srcSigBits - dstSigBits);
+        absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
+
+        const src_rep_t roundBits = aAbs & roundMask;
+        // Round to nearest
+        if (roundBits > halfway)
+            absResult++;
+        // Ties to even
+        else if (roundBits == halfway)
+            absResult += absResult & 1;
+    }
+    else if (aAbs > srcInfinity) {
+        // a is NaN.
+        // Conjure the result by beginning with infinity, setting the qNaN
+        // bit and inserting the (truncated) trailing NaN field.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+        absResult |= dstQNaN;
+        absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode;
+    }
+    else if (aAbs >= overflow) {
+        // a overflows to infinity.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+    }
+    else {
+        // a underflows on conversion to the destination type or is an exact
+        // zero.  The result may be a denormal or zero.  Extract the exponent
+        // to get the shift amount for the denormalization.
+        const int aExp = aAbs >> srcSigBits;
+        const int shift = srcExpBias - dstExpBias - aExp + 1;
+
+        const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal;
+
+        // Right shift by the denormalization amount with sticky.
+        if (shift > srcSigBits) {
+            absResult = 0;
+        } else {
+            const bool sticky = significand << (srcBits - shift);
+            src_rep_t denormalizedSignificand = significand >> shift | sticky;
+            absResult = denormalizedSignificand >> (srcSigBits - dstSigBits);
+            const src_rep_t roundBits = denormalizedSignificand & roundMask;
+            // Round to nearest
+            if (roundBits > halfway)
+                absResult++;
+            // Ties to even
+            else if (roundBits == halfway)
+                absResult += absResult & 1;
+        }
+    }
+
+    // Apply the signbit to (dst_t)abs(a).
+    const dst_rep_t result = absResult | sign >> (srcBits - dstBits);
+    return dstFromRep(result);
+}
diff --git a/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c b/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c
index db4a716f92..331dc2bea2 100644
--- a/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c
+++ b/contrib/libs/cxxsupp/builtins/gcc_personality_v0.c
@@ -1,209 +1,209 @@
-/* ===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- */ 
- 
-#include "int_lib.h" 
- 
-#include <unwind.h> 
- 
-/* 
- * Pointer encodings documented at: 
- *   http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html 
- */ 
- 
-#define DW_EH_PE_omit      0xff  /* no data follows */ 
- 
-#define DW_EH_PE_absptr    0x00 
-#define DW_EH_PE_uleb128   0x01 
-#define DW_EH_PE_udata2    0x02 
-#define DW_EH_PE_udata4    0x03 
-#define DW_EH_PE_udata8    0x04 
-#define DW_EH_PE_sleb128   0x09 
-#define DW_EH_PE_sdata2    0x0A 
-#define DW_EH_PE_sdata4    0x0B 
-#define DW_EH_PE_sdata8    0x0C 
- 
-#define DW_EH_PE_pcrel     0x10 
-#define DW_EH_PE_textrel   0x20 
-#define DW_EH_PE_datarel   0x30 
-#define DW_EH_PE_funcrel   0x40 
-#define DW_EH_PE_aligned   0x50   
-#define DW_EH_PE_indirect  0x80 /* gcc extension */ 
- 
- 
- 
-/* read a uleb128 encoded value and advance pointer */ 
-static uintptr_t readULEB128(const uint8_t** data) 
-{ 
-    uintptr_t result = 0; 
-    uintptr_t shift = 0; 
-    unsigned char byte; 
-    const uint8_t* p = *data; 
-    do { 
-        byte = *p++; 
-        result |= (byte & 0x7f) << shift; 
-        shift += 7; 
-    } while (byte & 0x80); 
-    *data = p; 
-    return result; 
-} 
- 
-/* read a pointer encoded value and advance pointer */ 
-static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) 
-{ 
-    const uint8_t* p = *data; 
-    uintptr_t result = 0; 
- 
-    if ( encoding == DW_EH_PE_omit )  
-        return 0; 
- 
-    /* first get value */ 
-    switch (encoding & 0x0F) { 
-        case DW_EH_PE_absptr: 
-            result = *((const uintptr_t*)p); 
-            p += sizeof(uintptr_t); 
-            break; 
-        case DW_EH_PE_uleb128: 
-            result = readULEB128(&p); 
-            break; 
-        case DW_EH_PE_udata2: 
-            result = *((const uint16_t*)p); 
-            p += sizeof(uint16_t); 
-            break; 
-        case DW_EH_PE_udata4: 
-            result = *((const uint32_t*)p); 
-            p += sizeof(uint32_t); 
-            break; 
-        case DW_EH_PE_udata8: 
-            result = *((const uint64_t*)p); 
-            p += sizeof(uint64_t); 
-            break; 
-        case DW_EH_PE_sdata2: 
-            result = *((const int16_t*)p); 
-            p += sizeof(int16_t); 
-            break; 
-        case DW_EH_PE_sdata4: 
-            result = *((const int32_t*)p); 
-            p += sizeof(int32_t); 
-            break; 
-        case DW_EH_PE_sdata8: 
-            result = *((const int64_t*)p); 
-            p += sizeof(int64_t); 
-            break; 
-        case DW_EH_PE_sleb128: 
-        default: 
-            /* not supported */ 
-            compilerrt_abort(); 
-            break; 
-    } 
- 
-    /* then add relative offset */ 
-    switch ( encoding & 0x70 ) { 
-        case DW_EH_PE_absptr: 
-            /* do nothing */ 
-            break; 
-        case DW_EH_PE_pcrel: 
-            result += (uintptr_t)(*data); 
-            break; 
-        case DW_EH_PE_textrel: 
-        case DW_EH_PE_datarel: 
-        case DW_EH_PE_funcrel: 
-        case DW_EH_PE_aligned: 
-        default: 
-            /* not supported */ 
-            compilerrt_abort(); 
-            break; 
-    } 
- 
-    /* then apply indirection */ 
-    if (encoding & DW_EH_PE_indirect) { 
-        result = *((const uintptr_t*)result); 
-    } 
- 
-    *data = p; 
-    return result; 
-} 
- 
- 
-/* 
- * The C compiler makes references to __gcc_personality_v0 in 
- * the dwarf unwind information for translation units that use 
- * __attribute__((cleanup(xx))) on local variables. 
- * This personality routine is called by the system unwinder 
- * on each frame as the stack is unwound during a C++ exception 
- * throw through a C function compiled with -fexceptions. 
- */ 
-#if __USING_SJLJ_EXCEPTIONS__ 
-// the setjump-longjump based exceptions personality routine has a different name 
-COMPILER_RT_ABI _Unwind_Reason_Code 
-__gcc_personality_sj0(int version, _Unwind_Action actions, 
-         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, 
-         struct _Unwind_Context *context) 
-#else 
-COMPILER_RT_ABI _Unwind_Reason_Code 
-__gcc_personality_v0(int version, _Unwind_Action actions, 
-         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, 
-         struct _Unwind_Context *context) 
-#endif 
-{ 
-    /* Since C does not have catch clauses, there is nothing to do during */ 
-    /* phase 1 (the search phase). */ 
-    if ( actions & _UA_SEARCH_PHASE )  
-        return _URC_CONTINUE_UNWIND; 
-         
-    /* There is nothing to do if there is no LSDA for this frame. */ 
-    const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context); 
-    if ( lsda == (uint8_t*) 0 ) 
-        return _URC_CONTINUE_UNWIND; 
- 
-    uintptr_t pc = _Unwind_GetIP(context)-1; 
-    uintptr_t funcStart = _Unwind_GetRegionStart(context); 
-    uintptr_t pcOffset = pc - funcStart; 
- 
-    /* Parse LSDA header. */ 
-    uint8_t lpStartEncoding = *lsda++; 
-    if (lpStartEncoding != DW_EH_PE_omit) { 
-        readEncodedPointer(&lsda, lpStartEncoding);  
-    } 
-    uint8_t ttypeEncoding = *lsda++; 
-    if (ttypeEncoding != DW_EH_PE_omit) { 
-        readULEB128(&lsda);   
-    } 
-    /* Walk call-site table looking for range that includes current PC. */ 
-    uint8_t         callSiteEncoding = *lsda++; 
-    uint32_t        callSiteTableLength = readULEB128(&lsda); 
-    const uint8_t*  callSiteTableStart = lsda; 
-    const uint8_t*  callSiteTableEnd = callSiteTableStart + callSiteTableLength; 
-    const uint8_t* p=callSiteTableStart; 
-    while (p < callSiteTableEnd) { 
-        uintptr_t start = readEncodedPointer(&p, callSiteEncoding); 
-        uintptr_t length = readEncodedPointer(&p, callSiteEncoding); 
-        uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); 
-        readULEB128(&p); /* action value not used for C code */ 
-        if ( landingPad == 0 ) 
-            continue; /* no landing pad for this entry */ 
-        if ( (start <= pcOffset) && (pcOffset < (start+length)) ) { 
-            /* Found landing pad for the PC. 
-             * Set Instruction Pointer to so we re-enter function  
-             * at landing pad. The landing pad is created by the compiler 
-             * to take two parameters in registers. 
-             */ 
-            _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), 
-                          (uintptr_t)exceptionObject); 
-            _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); 
-            _Unwind_SetIP(context, (funcStart + landingPad)); 
-            return _URC_INSTALL_CONTEXT; 
-        } 
-    } 
- 
-    /* No landing pad found, continue unwinding. */ 
-    return _URC_CONTINUE_UNWIND; 
-} 
- 
+/* ===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ */
+
+#include "int_lib.h"
+
+#include <unwind.h>
+
+/*
+ * Pointer encodings documented at:
+ *   http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
+ */
+
+#define DW_EH_PE_omit      0xff  /* no data follows */
+
+#define DW_EH_PE_absptr    0x00
+#define DW_EH_PE_uleb128   0x01
+#define DW_EH_PE_udata2    0x02
+#define DW_EH_PE_udata4    0x03
+#define DW_EH_PE_udata8    0x04
+#define DW_EH_PE_sleb128   0x09
+#define DW_EH_PE_sdata2    0x0A
+#define DW_EH_PE_sdata4    0x0B
+#define DW_EH_PE_sdata8    0x0C
+
+#define DW_EH_PE_pcrel     0x10
+#define DW_EH_PE_textrel   0x20
+#define DW_EH_PE_datarel   0x30
+#define DW_EH_PE_funcrel   0x40
+#define DW_EH_PE_aligned   0x50  
+#define DW_EH_PE_indirect  0x80 /* gcc extension */
+
+
+
+/* read a uleb128 encoded value and advance pointer */
+static uintptr_t readULEB128(const uint8_t** data)
+{
+    uintptr_t result = 0;
+    uintptr_t shift = 0;
+    unsigned char byte;
+    const uint8_t* p = *data;
+    do {
+        byte = *p++;
+        result |= (byte & 0x7f) << shift;
+        shift += 7;
+    } while (byte & 0x80);
+    *data = p;
+    return result;
+}
+
+/* read a pointer encoded value and advance pointer */
+static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding)
+{
+    const uint8_t* p = *data;
+    uintptr_t result = 0;
+
+    if ( encoding == DW_EH_PE_omit ) 
+        return 0;
+
+    /* first get value */
+    switch (encoding & 0x0F) {
+        case DW_EH_PE_absptr:
+            result = *((const uintptr_t*)p);
+            p += sizeof(uintptr_t);
+            break;
+        case DW_EH_PE_uleb128:
+            result = readULEB128(&p);
+            break;
+        case DW_EH_PE_udata2:
+            result = *((const uint16_t*)p);
+            p += sizeof(uint16_t);
+            break;
+        case DW_EH_PE_udata4:
+            result = *((const uint32_t*)p);
+            p += sizeof(uint32_t);
+            break;
+        case DW_EH_PE_udata8:
+            result = *((const uint64_t*)p);
+            p += sizeof(uint64_t);
+            break;
+        case DW_EH_PE_sdata2:
+            result = *((const int16_t*)p);
+            p += sizeof(int16_t);
+            break;
+        case DW_EH_PE_sdata4:
+            result = *((const int32_t*)p);
+            p += sizeof(int32_t);
+            break;
+        case DW_EH_PE_sdata8:
+            result = *((const int64_t*)p);
+            p += sizeof(int64_t);
+            break;
+        case DW_EH_PE_sleb128:
+        default:
+            /* not supported */
+            compilerrt_abort();
+            break;
+    }
+
+    /* then add relative offset */
+    switch ( encoding & 0x70 ) {
+        case DW_EH_PE_absptr:
+            /* do nothing */
+            break;
+        case DW_EH_PE_pcrel:
+            result += (uintptr_t)(*data);
+            break;
+        case DW_EH_PE_textrel:
+        case DW_EH_PE_datarel:
+        case DW_EH_PE_funcrel:
+        case DW_EH_PE_aligned:
+        default:
+            /* not supported */
+            compilerrt_abort();
+            break;
+    }
+
+    /* then apply indirection */
+    if (encoding & DW_EH_PE_indirect) {
+        result = *((const uintptr_t*)result);
+    }
+
+    *data = p;
+    return result;
+}
+
+
+/*
+ * The C compiler makes references to __gcc_personality_v0 in
+ * the dwarf unwind information for translation units that use
+ * __attribute__((cleanup(xx))) on local variables.
+ * This personality routine is called by the system unwinder
+ * on each frame as the stack is unwound during a C++ exception
+ * throw through a C function compiled with -fexceptions.
+ */
+#if __USING_SJLJ_EXCEPTIONS__
+// the setjump-longjump based exceptions personality routine has a different name
+COMPILER_RT_ABI _Unwind_Reason_Code
+__gcc_personality_sj0(int version, _Unwind_Action actions,
+         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+         struct _Unwind_Context *context)
+#else
+COMPILER_RT_ABI _Unwind_Reason_Code
+__gcc_personality_v0(int version, _Unwind_Action actions,
+         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+         struct _Unwind_Context *context)
+#endif
+{
+    /* Since C does not have catch clauses, there is nothing to do during */
+    /* phase 1 (the search phase). */
+    if ( actions & _UA_SEARCH_PHASE ) 
+        return _URC_CONTINUE_UNWIND;
+        
+    /* There is nothing to do if there is no LSDA for this frame. */
+    const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context);
+    if ( lsda == (uint8_t*) 0 )
+        return _URC_CONTINUE_UNWIND;
+
+    uintptr_t pc = _Unwind_GetIP(context)-1;
+    uintptr_t funcStart = _Unwind_GetRegionStart(context);
+    uintptr_t pcOffset = pc - funcStart;
+
+    /* Parse LSDA header. */
+    uint8_t lpStartEncoding = *lsda++;
+    if (lpStartEncoding != DW_EH_PE_omit) {
+        readEncodedPointer(&lsda, lpStartEncoding); 
+    }
+    uint8_t ttypeEncoding = *lsda++;
+    if (ttypeEncoding != DW_EH_PE_omit) {
+        readULEB128(&lsda);  
+    }
+    /* Walk call-site table looking for range that includes current PC. */
+    uint8_t         callSiteEncoding = *lsda++;
+    uint32_t        callSiteTableLength = readULEB128(&lsda);
+    const uint8_t*  callSiteTableStart = lsda;
+    const uint8_t*  callSiteTableEnd = callSiteTableStart + callSiteTableLength;
+    const uint8_t* p=callSiteTableStart;
+    while (p < callSiteTableEnd) {
+        uintptr_t start = readEncodedPointer(&p, callSiteEncoding);
+        uintptr_t length = readEncodedPointer(&p, callSiteEncoding);
+        uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding);
+        readULEB128(&p); /* action value not used for C code */
+        if ( landingPad == 0 )
+            continue; /* no landing pad for this entry */
+        if ( (start <= pcOffset) && (pcOffset < (start+length)) ) {
+            /* Found landing pad for the PC.
+             * Set Instruction Pointer to so we re-enter function 
+             * at landing pad. The landing pad is created by the compiler
+             * to take two parameters in registers.
+             */
+            _Unwind_SetGR(context, __builtin_eh_return_data_regno(0),
+                          (uintptr_t)exceptionObject);
+            _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0);
+            _Unwind_SetIP(context, (funcStart + landingPad));
+            return _URC_INSTALL_CONTEXT;
+        }
+    }
+
+    /* No landing pad found, continue unwinding. */
+    return _URC_CONTINUE_UNWIND;
+}
+
diff --git a/contrib/libs/cxxsupp/builtins/i386/Makefile.mk b/contrib/libs/cxxsupp/builtins/i386/Makefile.mk
index 3d116afe95..f3776a02c0 100644
--- a/contrib/libs/cxxsupp/builtins/i386/Makefile.mk
+++ b/contrib/libs/cxxsupp/builtins/i386/Makefile.mk
@@ -1,20 +1,20 @@
-#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===# 
-# 
-#                     The LLVM Compiler Infrastructure 
-# 
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details. 
-# 
-#===------------------------------------------------------------------------===# 
- 
-ModuleName := builtins 
-SubDirs :=  
-OnlyArchs := i386 
- 
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) 
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) 
-Implementation := Optimized 
- 
-# FIXME: use automatic dependencies? 
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h) 
+#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := i386
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/contrib/libs/cxxsupp/builtins/i386/ashldi3.S b/contrib/libs/cxxsupp/builtins/i386/ashldi3.S
index bc1cf1e631..3fbd739038 100644
--- a/contrib/libs/cxxsupp/builtins/i386/ashldi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/ashldi3.S
@@ -1,58 +1,58 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// di_int __ashldi3(di_int input, int count); 
- 
-// This routine has some extra memory traffic, loading the 64-bit input via two 
-// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 
-// store.  This is to avoid a write-small, read-large stall. 
-// However, if callers of this routine can be safely assumed to store the argument 
-// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 
-// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 
- 
-#ifdef __i386__ 
-#ifdef __SSE2__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__ashldi3) 
-	movd	  12(%esp),		%xmm2	// Load count 
-#ifndef TRUST_CALLERS_USE_64_BIT_STORES 
-	movd	   4(%esp),		%xmm0 
-	movd	   8(%esp),		%xmm1 
-	punpckldq	%xmm1,		%xmm0	// Load input 
-#else 
-	movq	   4(%esp),		%xmm0	// Load input 
-#endif 
-	psllq		%xmm2,		%xmm0	// shift input by count 
-	movd		%xmm0,		%eax 
-	psrlq		$32,		%xmm0 
-	movd		%xmm0,		%edx 
-	ret 
-END_COMPILERRT_FUNCTION(__ashldi3) 
- 
-#else // Use GPRs instead of SSE2 instructions, if they aren't available. 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__ashldi3) 
-	movl	  12(%esp),		%ecx	// Load count 
-	movl	   8(%esp),		%edx	// Load high 
-	movl	   4(%esp),		%eax	// Load low 
- 
-	testl		$0x20,		%ecx	// If count >= 32 
-	jnz		1f			//    goto 1 
-	shldl		%cl, %eax,	%edx	// left shift high by count 
-	shll		%cl,		%eax	// left shift low by count 
-	ret 
- 
-1:	movl		%eax,		%edx	// Move low to high 
-	xorl		%eax,		%eax	// clear low 
-	shll		%cl,		%edx	// shift high by count - 32 
-	ret 
-END_COMPILERRT_FUNCTION(__ashldi3) 
- 
-#endif // __SSE2__ 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __ashldi3(di_int input, int count);
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store.  This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+#ifdef __i386__
+#ifdef __SSE2__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashldi3)
+	movd	  12(%esp),		%xmm2	// Load count
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+	movd	   4(%esp),		%xmm0
+	movd	   8(%esp),		%xmm1
+	punpckldq	%xmm1,		%xmm0	// Load input
+#else
+	movq	   4(%esp),		%xmm0	// Load input
+#endif
+	psllq		%xmm2,		%xmm0	// shift input by count
+	movd		%xmm0,		%eax
+	psrlq		$32,		%xmm0
+	movd		%xmm0,		%edx
+	ret
+END_COMPILERRT_FUNCTION(__ashldi3)
+
+#else // Use GPRs instead of SSE2 instructions, if they aren't available.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashldi3)
+	movl	  12(%esp),		%ecx	// Load count
+	movl	   8(%esp),		%edx	// Load high
+	movl	   4(%esp),		%eax	// Load low
+
+	testl		$0x20,		%ecx	// If count >= 32
+	jnz		1f			//    goto 1
+	shldl		%cl, %eax,	%edx	// left shift high by count
+	shll		%cl,		%eax	// left shift low by count
+	ret
+
+1:	movl		%eax,		%edx	// Move low to high
+	xorl		%eax,		%eax	// clear low
+	shll		%cl,		%edx	// shift high by count - 32
+	ret
+END_COMPILERRT_FUNCTION(__ashldi3)
+
+#endif // __SSE2__
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S b/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S
index b4549a198d..8f4742481b 100644
--- a/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/ashrdi3.S
@@ -1,69 +1,69 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// di_int __ashrdi3(di_int input, int count); 
- 
-#ifdef __i386__ 
-#ifdef __SSE2__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__ashrdi3) 
-	movd	  12(%esp),		%xmm2	// Load count 
-	movl	   8(%esp),		%eax 
-#ifndef TRUST_CALLERS_USE_64_BIT_STORES 
-	movd	   4(%esp),		%xmm0 
-	movd	   8(%esp),		%xmm1 
-	punpckldq	%xmm1,		%xmm0	// Load input 
-#else 
-	movq	   4(%esp),		%xmm0	// Load input 
-#endif 
- 
-	psrlq		%xmm2,		%xmm0	// unsigned shift input by count 
-	 
-	testl		%eax,		%eax	// check the sign-bit of the input 
-	jns			1f					// early out for positive inputs 
-	 
-	// If the input is negative, we need to construct the shifted sign bit 
-	// to or into the result, as xmm does not have a signed right shift. 
-	pcmpeqb		%xmm1,		%xmm1	// -1ULL 
-	psrlq		$58,		%xmm1	// 0x3f 
-	pandn		%xmm1,		%xmm2	// 63 - count 
-	pcmpeqb		%xmm1,		%xmm1	// -1ULL 
-	psubq		%xmm1,		%xmm2	// 64 - count 
-	psllq		%xmm2,		%xmm1	// -1 << (64 - count) = leading sign bits 
-	por			%xmm1,		%xmm0 
-	 
-	// Move the result back to the general purpose registers and return 
-1:	movd		%xmm0,		%eax 
-	psrlq		$32,		%xmm0 
-	movd		%xmm0,		%edx 
-	ret 
-END_COMPILERRT_FUNCTION(__ashrdi3) 
- 
-#else // Use GPRs instead of SSE2 instructions, if they aren't available. 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__ashrdi3) 
-	movl	  12(%esp),		%ecx	// Load count 
-	movl	   8(%esp),		%edx	// Load high 
-	movl	   4(%esp),		%eax	// Load low 
-	 
-	testl		$0x20,		%ecx	// If count >= 32 
-	jnz			1f					//    goto 1 
- 
-	shrdl		%cl, %edx,	%eax	// right shift low by count 
-	sarl		%cl,		%edx	// right shift high by count 
-	ret 
-	 
-1:	movl		%edx,		%eax	// Move high to low 
-	sarl		$31,		%edx	// clear high 
-	sarl		%cl,		%eax	// shift low by count - 32 
-	ret 
-END_COMPILERRT_FUNCTION(__ashrdi3) 
- 
-#endif // __SSE2__ 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __ashrdi3(di_int input, int count);
+
+#ifdef __i386__
+#ifdef __SSE2__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashrdi3)
+	movd	  12(%esp),		%xmm2	// Load count
+	movl	   8(%esp),		%eax
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+	movd	   4(%esp),		%xmm0
+	movd	   8(%esp),		%xmm1
+	punpckldq	%xmm1,		%xmm0	// Load input
+#else
+	movq	   4(%esp),		%xmm0	// Load input
+#endif
+
+	psrlq		%xmm2,		%xmm0	// unsigned shift input by count
+	
+	testl		%eax,		%eax	// check the sign-bit of the input
+	jns			1f					// early out for positive inputs
+	
+	// If the input is negative, we need to construct the shifted sign bit
+	// to or into the result, as xmm does not have a signed right shift.
+	pcmpeqb		%xmm1,		%xmm1	// -1ULL
+	psrlq		$58,		%xmm1	// 0x3f
+	pandn		%xmm1,		%xmm2	// 63 - count
+	pcmpeqb		%xmm1,		%xmm1	// -1ULL
+	psubq		%xmm1,		%xmm2	// 64 - count
+	psllq		%xmm2,		%xmm1	// -1 << (64 - count) = leading sign bits
+	por			%xmm1,		%xmm0
+	
+	// Move the result back to the general purpose registers and return
+1:	movd		%xmm0,		%eax
+	psrlq		$32,		%xmm0
+	movd		%xmm0,		%edx
+	ret
+END_COMPILERRT_FUNCTION(__ashrdi3)
+
+#else // Use GPRs instead of SSE2 instructions, if they aren't available.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashrdi3)
+	movl	  12(%esp),		%ecx	// Load count
+	movl	   8(%esp),		%edx	// Load high
+	movl	   4(%esp),		%eax	// Load low
+	
+	testl		$0x20,		%ecx	// If count >= 32
+	jnz			1f					//    goto 1
+
+	shrdl		%cl, %edx,	%eax	// right shift low by count
+	sarl		%cl,		%edx	// right shift high by count
+	ret
+	
+1:	movl		%edx,		%eax	// Move high to low
+	sarl		$31,		%edx	// clear high
+	sarl		%cl,		%eax	// shift low by count - 32
+	ret
+END_COMPILERRT_FUNCTION(__ashrdi3)
+
+#endif // __SSE2__
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/chkstk.S b/contrib/libs/cxxsupp/builtins/i386/chkstk.S
index ee5daaef1e..b59974868f 100644
--- a/contrib/libs/cxxsupp/builtins/i386/chkstk.S
+++ b/contrib/libs/cxxsupp/builtins/i386/chkstk.S
@@ -1,34 +1,34 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// _chkstk routine 
-// This routine is windows specific 
-// http://msdn.microsoft.com/en-us/library/ms648426.aspx 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__chkstk_ms) 
-        push   %ecx 
-        push   %eax 
-        cmp    $0x1000,%eax 
-        lea    12(%esp),%ecx 
-        jb     1f 
-2: 
-        sub    $0x1000,%ecx 
-        test   %ecx,(%ecx) 
-        sub    $0x1000,%eax 
-        cmp    $0x1000,%eax 
-        ja     2b 
-1: 
-        sub    %eax,%ecx 
-        test   %ecx,(%ecx) 
-        pop    %eax 
-        pop    %ecx 
-        ret 
-END_COMPILERRT_FUNCTION(__chkstk_ms) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// _chkstk routine
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__chkstk_ms)
+        push   %ecx
+        push   %eax
+        cmp    $0x1000,%eax
+        lea    12(%esp),%ecx
+        jb     1f
+2:
+        sub    $0x1000,%ecx
+        test   %ecx,(%ecx)
+        sub    $0x1000,%eax
+        cmp    $0x1000,%eax
+        ja     2b
+1:
+        sub    %eax,%ecx
+        test   %ecx,(%ecx)
+        pop    %eax
+        pop    %ecx
+        ret
+END_COMPILERRT_FUNCTION(__chkstk_ms)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/chkstk2.S b/contrib/libs/cxxsupp/builtins/i386/chkstk2.S
index e4dbf58622..7d65bb0889 100644
--- a/contrib/libs/cxxsupp/builtins/i386/chkstk2.S
+++ b/contrib/libs/cxxsupp/builtins/i386/chkstk2.S
@@ -1,40 +1,40 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-#ifdef __i386__ 
- 
-// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments, 
-// then decrement %esp by %eax.  Preserves all registers except %esp and flags. 
-// This routine is windows specific 
-// http://msdn.microsoft.com/en-us/library/ms648426.aspx 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function 
-DEFINE_COMPILERRT_FUNCTION(__chkstk) 
-        push   %ecx 
-        cmp    $0x1000,%eax 
-        lea    8(%esp),%ecx     // esp before calling this routine -> ecx 
-        jb     1f 
-2: 
-        sub    $0x1000,%ecx 
-        test   %ecx,(%ecx) 
-        sub    $0x1000,%eax 
-        cmp    $0x1000,%eax 
-        ja     2b 
-1: 
-        sub    %eax,%ecx 
-        test   %ecx,(%ecx) 
- 
-        lea    4(%esp),%eax     // load pointer to the return address into eax 
-        mov    %ecx,%esp        // install the new top of stack pointer into esp 
-        mov    -4(%eax),%ecx    // restore ecx 
-        push   (%eax)           // push return address onto the stack 
-        sub    %esp,%eax        // restore the original value in eax 
-        ret 
-END_COMPILERRT_FUNCTION(__chkstk) 
-END_COMPILERRT_FUNCTION(_alloca) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __i386__
+
+// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments,
+// then decrement %esp by %eax.  Preserves all registers except %esp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+        push   %ecx
+        cmp    $0x1000,%eax
+        lea    8(%esp),%ecx     // esp before calling this routine -> ecx
+        jb     1f
+2:
+        sub    $0x1000,%ecx
+        test   %ecx,(%ecx)
+        sub    $0x1000,%eax
+        cmp    $0x1000,%eax
+        ja     2b
+1:
+        sub    %eax,%ecx
+        test   %ecx,(%ecx)
+
+        lea    4(%esp),%eax     // load pointer to the return address into eax
+        mov    %ecx,%esp        // install the new top of stack pointer into esp
+        mov    -4(%eax),%ecx    // restore ecx
+        push   (%eax)           // push return address onto the stack
+        sub    %esp,%eax        // restore the original value in eax
+        ret
+END_COMPILERRT_FUNCTION(__chkstk)
+END_COMPILERRT_FUNCTION(_alloca)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/divdi3.S b/contrib/libs/cxxsupp/builtins/i386/divdi3.S
index ceb0e80270..2cb0ddd4c2 100644
--- a/contrib/libs/cxxsupp/builtins/i386/divdi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/divdi3.S
@@ -1,162 +1,162 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// di_int __divdi3(di_int a, di_int b); 
- 
-// result = a / b. 
-// both inputs and the output are 64-bit signed integers. 
-// This will do whatever the underlying hardware is set to do on division by zero. 
-// No other exceptions are generated, as the divide cannot overflow. 
-// 
-// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 
-// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than 
-// currently possible via simulation of integer divides on the x87 unit. 
-// 
-// Stephen Canon, December 2008 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__divdi3) 
- 
-/* This is currently implemented by wrapping the unsigned divide up in an absolute 
-   value, then restoring the correct sign at the end of the computation.  This could 
-   certainly be improved upon. */ 
- 
-	pushl		%esi 
-	movl	 20(%esp),			%edx	// high word of b 
-	movl	 16(%esp),			%eax	// low word of b 
-	movl		%edx,			%ecx 
-	sarl		$31,			%ecx	// (b < 0) ? -1 : 0 
-	xorl		%ecx,			%eax 
-	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b 
-	subl		%ecx,			%eax 
-	sbbl		%ecx,			%edx	// EDX:EAX = abs(b) 
-	movl		%edx,		 20(%esp) 
-	movl		%eax,		 16(%esp)	// store abs(b) back to stack 
-	movl		%ecx,			%esi	// set aside sign of b 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __divdi3(di_int a, di_int b);
+
+// result = a / b.
+// both inputs and the output are 64-bit signed integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__divdi3)
+
+/* This is currently implemented by wrapping the unsigned divide up in an absolute
+   value, then restoring the correct sign at the end of the computation.  This could
+   certainly be improved upon. */
+
+	pushl		%esi
+	movl	 20(%esp),			%edx	// high word of b
+	movl	 16(%esp),			%eax	// low word of b
+	movl		%edx,			%ecx
+	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
+	xorl		%ecx,			%eax
+	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
+	subl		%ecx,			%eax
+	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
+	movl		%edx,		 20(%esp)
+	movl		%eax,		 16(%esp)	// store abs(b) back to stack
+	movl		%ecx,			%esi	// set aside sign of b
+	
+	movl	 12(%esp),			%edx	// high word of b
+	movl	  8(%esp),			%eax	// low word of b
+	movl		%edx,			%ecx
+	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
+	xorl		%ecx,			%eax
+	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
+	subl		%ecx,			%eax
+	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
+	movl		%edx,		 12(%esp)
+	movl		%eax,		  8(%esp)	// store abs(a) back to stack
+	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
+
+	pushl		%ebx
+	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
+	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
+	jz			9f						// the code to handle that special case [9].
+	
+	/* High word of b is known to be non-zero on this branch */
+	
+	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
+	
+	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
+	shrl		%eax					//
+	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
+	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
+	orl			%eax,			%ebx	//
+	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
+	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
+	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
+	jae			1f						
+		
+	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+	
+	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	shrl		%cl,			%eax	// q = qs >> (1 + i)
+	movl		%eax,			%edi
+	mull	 24(%esp)					// q*blo
+	movl	 16(%esp),			%ebx
+	movl	 20(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 28(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+	sbbl		$0,				%edi	// decrement q if remainder is negative
+	xorl		%edx,			%edx
+	movl		%edi,			%eax
+	
+	addl		%esi,			%eax	// Restore correct sign to result
+	adcl		%esi,			%edx
+	xorl		%esi,			%eax
+	xorl		%esi,			%edx
+	popl		%edi					// Restore callee-save registers
+	popl		%ebx
+	popl		%esi
+	retl								// Return
+
+
+1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
 	 
-	movl	 12(%esp),			%edx	// high word of b 
-	movl	  8(%esp),			%eax	// low word of b 
-	movl		%edx,			%ecx 
-	sarl		$31,			%ecx	// (a < 0) ? -1 : 0 
-	xorl		%ecx,			%eax 
-	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a 
-	subl		%ecx,			%eax 
-	sbbl		%ecx,			%edx	// EDX:EAX = abs(a) 
-	movl		%edx,		 12(%esp) 
-	movl		%eax,		  8(%esp)	// store abs(a) back to stack 
-	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b) 
- 
-	pushl		%ebx 
-	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b. 
-	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to 
-	jz			9f						// the code to handle that special case [9]. 
-	 
-	/* High word of b is known to be non-zero on this branch */ 
-	 
-	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b 
-	 
-	shrl		%cl,			%eax	// Practically, this means that bhi is given by: 
-	shrl		%eax					// 
-	notl		%ecx					//		bhi = (high word of b) << (31 - i) | 
-	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i) 
-	orl			%eax,			%ebx	// 
-	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump 
-	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi 
-	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide. 
-	jae			1f						 
-		 
-	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	 
-	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	shrl		%cl,			%eax	// q = qs >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 24(%esp)					// q*blo 
-	movl	 16(%esp),			%ebx 
-	movl	 20(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 28(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
-	sbbl		$0,				%edi	// decrement q if remainder is negative 
-	xorl		%edx,			%edx 
-	movl		%edi,			%eax 
-	 
-	addl		%esi,			%eax	// Restore correct sign to result 
-	adcl		%esi,			%edx 
-	xorl		%esi,			%eax 
-	xorl		%esi,			%edx 
-	popl		%edi					// Restore callee-save registers 
-	popl		%ebx 
-	popl		%esi 
-	retl								// Return 
- 
- 
-1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	  
-	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not 
-	divl		%ebx					// overflow, and find q and r such that 
-										// 
-										//		ahi:alo = (1:q)*bhi + r 
-										// 
-										// Note that q is a number in (31-i).(1+i) 
-										// fix point. 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	orl			$0x80000000,	%eax 
-	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 24(%esp)					// q*blo 
-	movl	 16(%esp),			%ebx 
-	movl	 20(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 28(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
-	sbbl		$0,				%edi	// decrement q if remainder is negative 
-	xorl		%edx,			%edx 
-	movl		%edi,			%eax 
-	 
-	addl		%esi,			%eax	// Restore correct sign to result 
-	adcl		%esi,			%edx 
-	xorl		%esi,			%eax 
-	xorl		%esi,			%edx 
-	popl		%edi					// Restore callee-save registers 
-	popl		%ebx 
-	popl		%esi 
-	retl								// Return 
- 
-	 
-9:	/* High word of b is zero on this branch */ 
- 
-	movl	 16(%esp),			%eax	// Find qhi and rhi such that 
-	movl	 20(%esp),			%ecx	// 
-	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b 
-	divl		%ecx					// 
-	movl		%eax,			%ebx	// 
-	movl	 12(%esp),			%eax	// Find qlo such that 
-	divl		%ecx					// 
-	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b 
-	 
-	addl		%esi,			%eax	// Restore correct sign to result 
-	adcl		%esi,			%edx 
-	xorl		%esi,			%eax 
-	xorl		%esi,			%edx 
-	popl		%ebx					// Restore callee-save registers 
-	popl		%esi 
-	retl								// Return 
-END_COMPILERRT_FUNCTION(__divdi3) 
- 
-#endif // __i386__ 
+	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
+	divl		%ebx					// overflow, and find q and r such that
+										//
+										//		ahi:alo = (1:q)*bhi + r
+										//
+										// Note that q is a number in (31-i).(1+i)
+										// fix point.
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	orl			$0x80000000,	%eax
+	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
+	movl		%eax,			%edi
+	mull	 24(%esp)					// q*blo
+	movl	 16(%esp),			%ebx
+	movl	 20(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 28(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+	sbbl		$0,				%edi	// decrement q if remainder is negative
+	xorl		%edx,			%edx
+	movl		%edi,			%eax
+	
+	addl		%esi,			%eax	// Restore correct sign to result
+	adcl		%esi,			%edx
+	xorl		%esi,			%eax
+	xorl		%esi,			%edx
+	popl		%edi					// Restore callee-save registers
+	popl		%ebx
+	popl		%esi
+	retl								// Return
+
+	
+9:	/* High word of b is zero on this branch */
+
+	movl	 16(%esp),			%eax	// Find qhi and rhi such that
+	movl	 20(%esp),			%ecx	//
+	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
+	divl		%ecx					//
+	movl		%eax,			%ebx	//
+	movl	 12(%esp),			%eax	// Find qlo such that
+	divl		%ecx					//
+	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
+	
+	addl		%esi,			%eax	// Restore correct sign to result
+	adcl		%esi,			%edx
+	xorl		%esi,			%eax
+	xorl		%esi,			%edx
+	popl		%ebx					// Restore callee-save registers
+	popl		%esi
+	retl								// Return
+END_COMPILERRT_FUNCTION(__divdi3)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/floatdidf.S b/contrib/libs/cxxsupp/builtins/i386/floatdidf.S
index 21d6154a76..dcc32f8ed8 100644
--- a/contrib/libs/cxxsupp/builtins/i386/floatdidf.S
+++ b/contrib/libs/cxxsupp/builtins/i386/floatdidf.S
@@ -1,39 +1,39 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// double __floatundidf(du_int a); 
- 
-#ifdef __i386__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-twop52: 
-	.quad 0x4330000000000000 
- 
-	.balign 16 
-twop32: 
-	.quad 0x41f0000000000000 
- 
-#define REL_ADDR(_a)	(_a)-0b(%eax) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatdidf) 
-	cvtsi2sd	8(%esp),			%xmm1 
-	movss		4(%esp),			%xmm0 // low 32 bits of a 
-	calll		0f 
-0:	popl		%eax 
-	mulsd		REL_ADDR(twop32),	%xmm1 // a_hi as a double (without rounding) 
-	movsd		REL_ADDR(twop52),	%xmm2 // 0x1.0p52 
-	subsd		%xmm2,				%xmm1 // a_hi - 0x1p52 (no rounding occurs) 
-	orpd		%xmm2,				%xmm0 // 0x1p52 + a_lo (no rounding occurs) 
-	addsd		%xmm1,				%xmm0 // a_hi + a_lo   (round happens here) 
-	movsd		%xmm0,			   4(%esp) 
-	fldl	   4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatdidf) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// double __floatundidf(du_int a);
+
+#ifdef __i386__
+
+CONST_SECTION
+
+	.balign 16
+twop52:
+	.quad 0x4330000000000000
+
+	.balign 16
+twop32:
+	.quad 0x41f0000000000000
+
+#define REL_ADDR(_a)	(_a)-0b(%eax)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatdidf)
+	cvtsi2sd	8(%esp),			%xmm1
+	movss		4(%esp),			%xmm0 // low 32 bits of a
+	calll		0f
+0:	popl		%eax
+	mulsd		REL_ADDR(twop32),	%xmm1 // a_hi as a double (without rounding)
+	movsd		REL_ADDR(twop52),	%xmm2 // 0x1.0p52
+	subsd		%xmm2,				%xmm1 // a_hi - 0x1p52 (no rounding occurs)
+	orpd		%xmm2,				%xmm0 // 0x1p52 + a_lo (no rounding occurs)
+	addsd		%xmm1,				%xmm0 // a_hi + a_lo   (round happens here)
+	movsd		%xmm0,			   4(%esp)
+	fldl	   4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatdidf)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/floatdisf.S b/contrib/libs/cxxsupp/builtins/i386/floatdisf.S
index ee6f07c860..f642767036 100644
--- a/contrib/libs/cxxsupp/builtins/i386/floatdisf.S
+++ b/contrib/libs/cxxsupp/builtins/i386/floatdisf.S
@@ -1,32 +1,32 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// float __floatdisf(di_int a); 
- 
-// This routine has some extra memory traffic, loading the 64-bit input via two 
-// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 
-// store.  This is to avoid a write-small, read-large stall. 
-// However, if callers of this routine can be safely assumed to store the argument 
-// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 
-// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatdisf) 
-#ifndef TRUST_CALLERS_USE_64_BIT_STORES 
-	movd		4(%esp),	%xmm0 
-	movd		8(%esp),	%xmm1 
-	punpckldq	%xmm1,		%xmm0 
-	movq		%xmm0,		4(%esp) 
-#endif 
-	fildll		4(%esp) 
-	fstps		4(%esp) 
-	flds		4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatdisf) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatdisf(di_int a);
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store.  This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatdisf)
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+	movd		4(%esp),	%xmm0
+	movd		8(%esp),	%xmm1
+	punpckldq	%xmm1,		%xmm0
+	movq		%xmm0,		4(%esp)
+#endif
+	fildll		4(%esp)
+	fstps		4(%esp)
+	flds		4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatdisf)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/floatdixf.S b/contrib/libs/cxxsupp/builtins/i386/floatdixf.S
index 2e9ee2ce61..839b0434c0 100644
--- a/contrib/libs/cxxsupp/builtins/i386/floatdixf.S
+++ b/contrib/libs/cxxsupp/builtins/i386/floatdixf.S
@@ -1,30 +1,30 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// float __floatdixf(di_int a); 
- 
-#ifdef __i386__ 
- 
-// This routine has some extra memory traffic, loading the 64-bit input via two 
-// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 
-// store.  This is to avoid a write-small, read-large stall. 
-// However, if callers of this routine can be safely assumed to store the argument 
-// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 
-// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatdixf) 
-#ifndef TRUST_CALLERS_USE_64_BIT_STORES 
-	movd		4(%esp),	%xmm0 
-	movd		8(%esp),	%xmm1 
-	punpckldq	%xmm1,		%xmm0 
-	movq		%xmm0,		4(%esp) 
-#endif 
-	fildll		4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatdixf) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatdixf(di_int a);
+
+#ifdef __i386__
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store.  This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatdixf)
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+	movd		4(%esp),	%xmm0
+	movd		8(%esp),	%xmm1
+	punpckldq	%xmm1,		%xmm0
+	movq		%xmm0,		4(%esp)
+#endif
+	fildll		4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatdixf)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/floatundidf.S b/contrib/libs/cxxsupp/builtins/i386/floatundidf.S
index 104ee8dc81..8058c2ac0a 100644
--- a/contrib/libs/cxxsupp/builtins/i386/floatundidf.S
+++ b/contrib/libs/cxxsupp/builtins/i386/floatundidf.S
@@ -1,52 +1,52 @@
-//===-- floatundidf.S - Implement __floatundidf for i386 ------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements __floatundidf for the compiler_rt library. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// double __floatundidf(du_int a); 
- 
-#ifdef __i386__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-twop52: 
-	.quad 0x4330000000000000 
- 
-	.balign 16 
-twop84_plus_twop52: 
-	.quad 0x4530000000100000 
- 
-	.balign 16 
-twop84: 
-	.quad 0x4530000000000000 
- 
-#define REL_ADDR(_a)	(_a)-0b(%eax) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundidf) 
-	movss	8(%esp),						%xmm1 // high 32 bits of a 
-	movss	4(%esp),						%xmm0 // low 32 bits of a 
-	calll	0f 
-0:	popl	%eax 
-	orpd	REL_ADDR(twop84),				%xmm1 // 0x1p84 + a_hi (no rounding occurs) 
-	subsd	REL_ADDR(twop84_plus_twop52),	%xmm1 // a_hi - 0x1p52 (no rounding occurs) 
-	orpd	REL_ADDR(twop52),				%xmm0 // 0x1p52 + a_lo (no rounding occurs) 
-	addsd	%xmm1,							%xmm0 // a_hi + a_lo   (round happens here) 
-	movsd	%xmm0,						   4(%esp) 
-	fldl   4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundidf) 
- 
-#endif // __i386__ 
+//===-- floatundidf.S - Implement __floatundidf for i386 ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatundidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// double __floatundidf(du_int a);
+
+#ifdef __i386__
+
+CONST_SECTION
+
+	.balign 16
+twop52:
+	.quad 0x4330000000000000
+
+	.balign 16
+twop84_plus_twop52:
+	.quad 0x4530000000100000
+
+	.balign 16
+twop84:
+	.quad 0x4530000000000000
+
+#define REL_ADDR(_a)	(_a)-0b(%eax)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundidf)
+	movss	8(%esp),						%xmm1 // high 32 bits of a
+	movss	4(%esp),						%xmm0 // low 32 bits of a
+	calll	0f
+0:	popl	%eax
+	orpd	REL_ADDR(twop84),				%xmm1 // 0x1p84 + a_hi (no rounding occurs)
+	subsd	REL_ADDR(twop84_plus_twop52),	%xmm1 // a_hi - 0x1p52 (no rounding occurs)
+	orpd	REL_ADDR(twop52),				%xmm0 // 0x1p52 + a_lo (no rounding occurs)
+	addsd	%xmm1,							%xmm0 // a_hi + a_lo   (round happens here)
+	movsd	%xmm0,						   4(%esp)
+	fldl   4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatundidf)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/floatundisf.S b/contrib/libs/cxxsupp/builtins/i386/floatundisf.S
index b7db958283..94c97e25aa 100644
--- a/contrib/libs/cxxsupp/builtins/i386/floatundisf.S
+++ b/contrib/libs/cxxsupp/builtins/i386/floatundisf.S
@@ -1,105 +1,105 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// float __floatundisf(du_int a); 
- 
-// Note that there is a hardware instruction, fildll, that does most of what 
-// this function needs to do.  However, because of our ia32 ABI, it will take 
-// a write-small read-large stall, so the software implementation here is 
-// actually several cycles faster. 
- 
-// This is a branch-free implementation.  A branchy implementation might be 
-// faster for the common case if you know something a priori about the input 
-// distribution. 
- 
-/* branch-free x87 implementation - one cycle slower than without x87. 
- 
-#ifdef __i386__ 
- 
-CONST_SECTION 
-.balign 3 
- 
-		.quad	0x43f0000000000000 
-twop64:	.quad	0x0000000000000000 
- 
-#define			TWOp64			twop64-0b(%ecx,%eax,8) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundisf) 
-	movl		8(%esp),		%eax 
-	movd		8(%esp),		%xmm1 
-	movd		4(%esp),		%xmm0 
-	punpckldq	%xmm1,			%xmm0 
-	calll		0f 
-0:	popl		%ecx 
-	sarl		$31,			%eax 
-	movq		%xmm0,			4(%esp) 
-	fildll		4(%esp) 
-	faddl		TWOp64 
-	fstps		4(%esp) 
-	flds		4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundisf) 
- 
-#endif // __i386__ 
- 
-*/ 
- 
-/* branch-free, x87-free implementation - faster at the expense of code size */ 
- 
-#ifdef __i386__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-twop52: 
-	.quad 0x4330000000000000 
-	.quad 0x0000000000000fff 
- 
-	.balign 16 
-sticky: 
-	.quad 0x0000000000000000 
-	.long 0x00000012 
- 
-	.balign 16 
-twelve: 
-	.long 0x00000000 
- 
-#define			TWOp52			twop52-0b(%ecx) 
-#define			STICKY			sticky-0b(%ecx,%eax,8) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundisf) 
-	movl		8(%esp),		%eax 
-	movd		8(%esp),		%xmm1 
-	movd		4(%esp),		%xmm0 
-	punpckldq	%xmm1,			%xmm0 
-	 
-	calll		0f 
-0:	popl		%ecx 
-	shrl		%eax					// high 31 bits of input as sint32 
-	addl		$0x7ff80000,	%eax 
-	sarl		$31,			%eax	// (big input) ? -1 : 0 
-	movsd		STICKY,			%xmm1	// (big input) ? 0xfff : 0 
-	movl		$12,			%edx 
-	andl		%eax,			%edx	// (big input) ? 12 : 0 
-	movd		%edx,			%xmm3 
-	andpd		%xmm0,			%xmm1	// (big input) ? input & 0xfff : 0 
-	movsd		TWOp52,			%xmm2	// 0x1.0p52 
-	psrlq		%xmm3,			%xmm0	// (big input) ? input >> 12 : input 
-	orpd		%xmm2,			%xmm1	// 0x1.0p52 + ((big input) ? input & 0xfff : input) 
-	orpd		%xmm1,			%xmm0	// 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input) 
-	subsd		%xmm2,			%xmm0	// (double)((big input) ? (input >> 12 | input & 0xfff) : input) 
-	cvtsd2ss	%xmm0,			%xmm0	// (float)((big input) ? (input >> 12 | input & 0xfff) : input) 
-	pslld		$23,			%xmm3 
-	paddd		%xmm3,			%xmm0	// (float)input 
-	movd		%xmm0,			4(%esp) 
-	flds		4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundisf) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatundisf(du_int a);
+
+// Note that there is a hardware instruction, fildll, that does most of what
+// this function needs to do.  However, because of our ia32 ABI, it will take
+// a write-small read-large stall, so the software implementation here is
+// actually several cycles faster.
+
+// This is a branch-free implementation.  A branchy implementation might be
+// faster for the common case if you know something a priori about the input
+// distribution.
+
+/* branch-free x87 implementation - one cycle slower than without x87.
+
+#ifdef __i386__
+
+CONST_SECTION
+.balign 3
+
+		.quad	0x43f0000000000000
+twop64:	.quad	0x0000000000000000
+
+#define			TWOp64			twop64-0b(%ecx,%eax,8)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundisf)
+	movl		8(%esp),		%eax
+	movd		8(%esp),		%xmm1
+	movd		4(%esp),		%xmm0
+	punpckldq	%xmm1,			%xmm0
+	calll		0f
+0:	popl		%ecx
+	sarl		$31,			%eax
+	movq		%xmm0,			4(%esp)
+	fildll		4(%esp)
+	faddl		TWOp64
+	fstps		4(%esp)
+	flds		4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatundisf)
+
+#endif // __i386__
+
+*/
+
+/* branch-free, x87-free implementation - faster at the expense of code size */
+
+#ifdef __i386__
+
+CONST_SECTION
+
+	.balign 16
+twop52:
+	.quad 0x4330000000000000
+	.quad 0x0000000000000fff
+
+	.balign 16
+sticky:
+	.quad 0x0000000000000000
+	.long 0x00000012
+
+	.balign 16
+twelve:
+	.long 0x00000000
+
+#define			TWOp52			twop52-0b(%ecx)
+#define			STICKY			sticky-0b(%ecx,%eax,8)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundisf)
+	movl		8(%esp),		%eax
+	movd		8(%esp),		%xmm1
+	movd		4(%esp),		%xmm0
+	punpckldq	%xmm1,			%xmm0
+	
+	calll		0f
+0:	popl		%ecx
+	shrl		%eax					// high 31 bits of input as sint32
+	addl		$0x7ff80000,	%eax
+	sarl		$31,			%eax	// (big input) ? -1 : 0
+	movsd		STICKY,			%xmm1	// (big input) ? 0xfff : 0
+	movl		$12,			%edx
+	andl		%eax,			%edx	// (big input) ? 12 : 0
+	movd		%edx,			%xmm3
+	andpd		%xmm0,			%xmm1	// (big input) ? input & 0xfff : 0
+	movsd		TWOp52,			%xmm2	// 0x1.0p52
+	psrlq		%xmm3,			%xmm0	// (big input) ? input >> 12 : input
+	orpd		%xmm2,			%xmm1	// 0x1.0p52 + ((big input) ? input & 0xfff : input)
+	orpd		%xmm1,			%xmm0	// 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
+	subsd		%xmm2,			%xmm0	// (double)((big input) ? (input >> 12 | input & 0xfff) : input)
+	cvtsd2ss	%xmm0,			%xmm0	// (float)((big input) ? (input >> 12 | input & 0xfff) : input)
+	pslld		$23,			%xmm3
+	paddd		%xmm3,			%xmm0	// (float)input
+	movd		%xmm0,			4(%esp)
+	flds		4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatundisf)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/floatundixf.S b/contrib/libs/cxxsupp/builtins/i386/floatundixf.S
index c6c29e67f1..814b52f941 100644
--- a/contrib/libs/cxxsupp/builtins/i386/floatundixf.S
+++ b/contrib/libs/cxxsupp/builtins/i386/floatundixf.S
@@ -1,43 +1,43 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// long double __floatundixf(du_int a);16 
- 
-#ifdef __i386__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-twop52: 
-	.quad 0x4330000000000000 
- 
-	.balign 16 
-twop84_plus_twop52_neg: 
-	.quad 0xc530000000100000 
- 
-	.balign 16 
-twop84: 
-	.quad 0x4530000000000000 
- 
-#define REL_ADDR(_a)	(_a)-0b(%eax) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundixf) 
-	calll	0f 
-0:	popl	%eax 
-	movss	8(%esp),			%xmm0	// hi 32 bits of input 
-	movss	4(%esp),			%xmm1	// lo 32 bits of input 
-	orpd	REL_ADDR(twop84),	%xmm0	// 2^84 + hi (as a double) 
-	orpd	REL_ADDR(twop52),	%xmm1	// 2^52 + lo (as a double) 
-	addsd	REL_ADDR(twop84_plus_twop52_neg),	%xmm0	// hi - 2^52 (no rounding occurs) 
-	movsd	%xmm1,				4(%esp) 
-	fldl	4(%esp) 
-	movsd	%xmm0,				4(%esp) 
-	faddl	4(%esp) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundixf) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// long double __floatundixf(du_int a);16
+
+#ifdef __i386__
+
+CONST_SECTION
+
+	.balign 16
+twop52:
+	.quad 0x4330000000000000
+
+	.balign 16
+twop84_plus_twop52_neg:
+	.quad 0xc530000000100000
+
+	.balign 16
+twop84:
+	.quad 0x4530000000000000
+
+#define REL_ADDR(_a)	(_a)-0b(%eax)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundixf)
+	calll	0f
+0:	popl	%eax
+	movss	8(%esp),			%xmm0	// hi 32 bits of input
+	movss	4(%esp),			%xmm1	// lo 32 bits of input
+	orpd	REL_ADDR(twop84),	%xmm0	// 2^84 + hi (as a double)
+	orpd	REL_ADDR(twop52),	%xmm1	// 2^52 + lo (as a double)
+	addsd	REL_ADDR(twop84_plus_twop52_neg),	%xmm0	// hi - 2^52 (no rounding occurs)
+	movsd	%xmm1,				4(%esp)
+	fldl	4(%esp)
+	movsd	%xmm0,				4(%esp)
+	faddl	4(%esp)
+	ret
+END_COMPILERRT_FUNCTION(__floatundixf)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S b/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S
index 74673c03de..b80f11a380 100644
--- a/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/lshrdi3.S
@@ -1,59 +1,59 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// di_int __lshrdi3(di_int input, int count); 
- 
-// This routine has some extra memory traffic, loading the 64-bit input via two 
-// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 
-// store.  This is to avoid a write-small, read-large stall. 
-// However, if callers of this routine can be safely assumed to store the argument 
-// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 
-// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 
- 
-#ifdef __i386__ 
-#ifdef __SSE2__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__lshrdi3) 
-	movd	  12(%esp),		%xmm2	// Load count 
-#ifndef TRUST_CALLERS_USE_64_BIT_STORES 
-	movd	   4(%esp),		%xmm0 
-	movd	   8(%esp),		%xmm1 
-	punpckldq	%xmm1,		%xmm0	// Load input 
-#else 
-	movq	   4(%esp),		%xmm0	// Load input 
-#endif 
-	psrlq		%xmm2,		%xmm0	// shift input by count 
-	movd		%xmm0,		%eax 
-	psrlq		$32,		%xmm0 
-	movd		%xmm0,		%edx 
-	ret 
-END_COMPILERRT_FUNCTION(__lshrdi3) 
- 
-#else // Use GPRs instead of SSE2 instructions, if they aren't available. 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__lshrdi3) 
-	movl	  12(%esp),		%ecx	// Load count 
-	movl	   8(%esp),		%edx	// Load high 
-	movl	   4(%esp),		%eax	// Load low 
-	 
-	testl		$0x20,		%ecx	// If count >= 32 
-	jnz			1f					//    goto 1 
- 
-	shrdl		%cl, %edx,	%eax	// right shift low by count 
-	shrl		%cl,		%edx	// right shift high by count 
-	ret 
-	 
-1:	movl		%edx,		%eax	// Move high to low 
-	xorl		%edx,		%edx	// clear high 
-	shrl		%cl,		%eax	// shift low by count - 32 
-	ret 
-END_COMPILERRT_FUNCTION(__lshrdi3) 
- 
-#endif // __SSE2__ 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __lshrdi3(di_int input, int count);
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store.  This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+#ifdef __i386__
+#ifdef __SSE2__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__lshrdi3)
+	movd	  12(%esp),		%xmm2	// Load count
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+	movd	   4(%esp),		%xmm0
+	movd	   8(%esp),		%xmm1
+	punpckldq	%xmm1,		%xmm0	// Load input
+#else
+	movq	   4(%esp),		%xmm0	// Load input
+#endif
+	psrlq		%xmm2,		%xmm0	// shift input by count
+	movd		%xmm0,		%eax
+	psrlq		$32,		%xmm0
+	movd		%xmm0,		%edx
+	ret
+END_COMPILERRT_FUNCTION(__lshrdi3)
+
+#else // Use GPRs instead of SSE2 instructions, if they aren't available.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__lshrdi3)
+	movl	  12(%esp),		%ecx	// Load count
+	movl	   8(%esp),		%edx	// Load high
+	movl	   4(%esp),		%eax	// Load low
+	
+	testl		$0x20,		%ecx	// If count >= 32
+	jnz			1f					//    goto 1
+
+	shrdl		%cl, %edx,	%eax	// right shift low by count
+	shrl		%cl,		%edx	// right shift high by count
+	ret
+	
+1:	movl		%edx,		%eax	// Move high to low
+	xorl		%edx,		%edx	// clear high
+	shrl		%cl,		%eax	// shift low by count - 32
+	ret
+END_COMPILERRT_FUNCTION(__lshrdi3)
+
+#endif // __SSE2__
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/moddi3.S b/contrib/libs/cxxsupp/builtins/i386/moddi3.S
index 94f922d447..b9cee9d7aa 100644
--- a/contrib/libs/cxxsupp/builtins/i386/moddi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/moddi3.S
@@ -1,166 +1,166 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// di_int __moddi3(di_int a, di_int b); 
- 
-// result = remainder of a / b. 
-// both inputs and the output are 64-bit signed integers. 
-// This will do whatever the underlying hardware is set to do on division by zero. 
-// No other exceptions are generated, as the divide cannot overflow. 
-// 
-// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 
-// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than 
-// currently possible via simulation of integer divides on the x87 unit. 
-// 
- 
-// Stephen Canon, December 2008 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__moddi3) 
- 
-/* This is currently implemented by wrapping the unsigned modulus up in an absolute 
-   value.  This could certainly be improved upon. */ 
- 
-	pushl		%esi 
-	movl	 20(%esp),			%edx	// high word of b 
-	movl	 16(%esp),			%eax	// low word of b 
-	movl		%edx,			%ecx 
-	sarl		$31,			%ecx	// (b < 0) ? -1 : 0 
-	xorl		%ecx,			%eax 
-	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b 
-	subl		%ecx,			%eax 
-	sbbl		%ecx,			%edx	// EDX:EAX = abs(b) 
-	movl		%edx,		 20(%esp) 
-	movl		%eax,		 16(%esp)	// store abs(b) back to stack 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __moddi3(di_int a, di_int b);
+
+// result = remainder of a / b.
+// both inputs and the output are 64-bit signed integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__moddi3)
+
+/* This is currently implemented by wrapping the unsigned modulus up in an absolute
+   value.  This could certainly be improved upon. */
+
+	pushl		%esi
+	movl	 20(%esp),			%edx	// high word of b
+	movl	 16(%esp),			%eax	// low word of b
+	movl		%edx,			%ecx
+	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
+	xorl		%ecx,			%eax
+	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
+	subl		%ecx,			%eax
+	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
+	movl		%edx,		 20(%esp)
+	movl		%eax,		 16(%esp)	// store abs(b) back to stack
+	
+	movl	 12(%esp),			%edx	// high word of b
+	movl	  8(%esp),			%eax	// low word of b
+	movl		%edx,			%ecx
+	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
+	xorl		%ecx,			%eax
+	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
+	subl		%ecx,			%eax
+	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
+	movl		%edx,		 12(%esp)
+	movl		%eax,		  8(%esp)	// store abs(a) back to stack
+	movl		%ecx,			%esi	// set aside sign of a
+
+	pushl		%ebx
+	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
+	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
+	jz			9f						// the code to handle that special case [9].
+	
+	/* High word of b is known to be non-zero on this branch */
+	
+	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
+	
+	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
+	shrl		%eax					//
+	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
+	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
+	orl			%eax,			%ebx	//
+	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
+	movl	 12(%esp),			%eax	// to [2] if the high word is larger than bhi
+	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
+	jae			2f						
+		
+	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+	
+	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	shrl		%cl,			%eax	// q = qs >> (1 + i)
+	movl		%eax,			%edi
+	mull	 24(%esp)					// q*blo
+	movl	 16(%esp),			%ebx
+	movl	 20(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 28(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+	
+	jnc			1f						// if positive, this is the result.
+	addl	 24(%esp),			%ebx	// otherwise
+	adcl	 28(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result
+1:	movl		%ebx,			%eax
+	movl		%ecx,			%edx
+	
+	addl		%esi,			%eax	// Restore correct sign to result
+	adcl		%esi,			%edx
+	xorl		%esi,			%eax
+	xorl		%esi,			%edx
+	popl		%edi					// Restore callee-save registers
+	popl		%ebx
+	popl		%esi
+	retl								// Return
+
+2:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
 	 
-	movl	 12(%esp),			%edx	// high word of b 
-	movl	  8(%esp),			%eax	// low word of b 
-	movl		%edx,			%ecx 
-	sarl		$31,			%ecx	// (a < 0) ? -1 : 0 
-	xorl		%ecx,			%eax 
-	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a 
-	subl		%ecx,			%eax 
-	sbbl		%ecx,			%edx	// EDX:EAX = abs(a) 
-	movl		%edx,		 12(%esp) 
-	movl		%eax,		  8(%esp)	// store abs(a) back to stack 
-	movl		%ecx,			%esi	// set aside sign of a 
- 
-	pushl		%ebx 
-	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b. 
-	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to 
-	jz			9f						// the code to handle that special case [9]. 
-	 
-	/* High word of b is known to be non-zero on this branch */ 
-	 
-	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b 
-	 
-	shrl		%cl,			%eax	// Practically, this means that bhi is given by: 
-	shrl		%eax					// 
-	notl		%ecx					//		bhi = (high word of b) << (31 - i) | 
-	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i) 
-	orl			%eax,			%ebx	// 
-	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump 
-	movl	 12(%esp),			%eax	// to [2] if the high word is larger than bhi 
-	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide. 
-	jae			2f						 
-		 
-	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	 
-	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	shrl		%cl,			%eax	// q = qs >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 24(%esp)					// q*blo 
-	movl	 16(%esp),			%ebx 
-	movl	 20(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 28(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
-	 
-	jnc			1f						// if positive, this is the result. 
-	addl	 24(%esp),			%ebx	// otherwise 
-	adcl	 28(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result 
-1:	movl		%ebx,			%eax 
-	movl		%ecx,			%edx 
-	 
-	addl		%esi,			%eax	// Restore correct sign to result 
-	adcl		%esi,			%edx 
-	xorl		%esi,			%eax 
-	xorl		%esi,			%edx 
-	popl		%edi					// Restore callee-save registers 
-	popl		%ebx 
-	popl		%esi 
-	retl								// Return 
- 
-2:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	  
-	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not 
-	divl		%ebx					// overflow, and find q and r such that 
-										// 
-										//		ahi:alo = (1:q)*bhi + r 
-										// 
-										// Note that q is a number in (31-i).(1+i) 
-										// fix point. 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	orl			$0x80000000,	%eax 
-	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 24(%esp)					// q*blo 
-	movl	 16(%esp),			%ebx 
-	movl	 20(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 28(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
- 
-	jnc			3f						// if positive, this is the result. 
-	addl	 24(%esp),			%ebx	// otherwise 
-	adcl	 28(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result 
-3:	movl		%ebx,			%eax 
-	movl		%ecx,			%edx 
-	 
-	addl		%esi,			%eax	// Restore correct sign to result 
-	adcl		%esi,			%edx 
-	xorl		%esi,			%eax 
-	xorl		%esi,			%edx 
-	popl		%edi					// Restore callee-save registers 
-	popl		%ebx 
-	popl		%esi 
-	retl								// Return 
-	 
-9:	/* High word of b is zero on this branch */ 
- 
-	movl	 16(%esp),			%eax	// Find qhi and rhi such that 
-	movl	 20(%esp),			%ecx	// 
-	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b 
-	divl		%ecx					// 
-	movl		%eax,			%ebx	// 
-	movl	 12(%esp),			%eax	// Find rlo such that 
-	divl		%ecx					// 
-	movl		%edx,			%eax	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b 
-	popl		%ebx					// 
-	xorl		%edx,			%edx	// and return 0:rlo 
- 
-	addl		%esi,			%eax	// Restore correct sign to result 
-	adcl		%esi,			%edx 
-	xorl		%esi,			%eax 
-	xorl		%esi,			%edx 
-	popl		%esi 
-	retl								// Return 
-END_COMPILERRT_FUNCTION(__moddi3) 
- 
-#endif // __i386__ 
+	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
+	divl		%ebx					// overflow, and find q and r such that
+										//
+										//		ahi:alo = (1:q)*bhi + r
+										//
+										// Note that q is a number in (31-i).(1+i)
+										// fix point.
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	orl			$0x80000000,	%eax
+	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
+	movl		%eax,			%edi
+	mull	 24(%esp)					// q*blo
+	movl	 16(%esp),			%ebx
+	movl	 20(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 28(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+
+	jnc			3f						// if positive, this is the result.
+	addl	 24(%esp),			%ebx	// otherwise
+	adcl	 28(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result
+3:	movl		%ebx,			%eax
+	movl		%ecx,			%edx
+	
+	addl		%esi,			%eax	// Restore correct sign to result
+	adcl		%esi,			%edx
+	xorl		%esi,			%eax
+	xorl		%esi,			%edx
+	popl		%edi					// Restore callee-save registers
+	popl		%ebx
+	popl		%esi
+	retl								// Return
+	
+9:	/* High word of b is zero on this branch */
+
+	movl	 16(%esp),			%eax	// Find qhi and rhi such that
+	movl	 20(%esp),			%ecx	//
+	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
+	divl		%ecx					//
+	movl		%eax,			%ebx	//
+	movl	 12(%esp),			%eax	// Find rlo such that
+	divl		%ecx					//
+	movl		%edx,			%eax	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
+	popl		%ebx					//
+	xorl		%edx,			%edx	// and return 0:rlo
+
+	addl		%esi,			%eax	// Restore correct sign to result
+	adcl		%esi,			%edx
+	xorl		%esi,			%eax
+	xorl		%esi,			%edx
+	popl		%esi
+	retl								// Return
+END_COMPILERRT_FUNCTION(__moddi3)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/muldi3.S b/contrib/libs/cxxsupp/builtins/i386/muldi3.S
index d19eb13d96..15b6b49984 100644
--- a/contrib/libs/cxxsupp/builtins/i386/muldi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/muldi3.S
@@ -1,30 +1,30 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// di_int __muldi3(di_int a, di_int b); 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__muldi3) 
-	pushl	%ebx 
-	movl  16(%esp),		%eax	// b.lo 
-	movl  12(%esp),		%ecx	// a.hi 
-	imull	%eax,		%ecx	// b.lo * a.hi 
-	 
-	movl   8(%esp),		%edx	// a.lo 
-	movl  20(%esp),		%ebx	// b.hi 
-	imull	%edx,		%ebx	// a.lo * b.hi 
-	 
-	mull	%edx				// EDX:EAX = a.lo * b.lo 
-	addl	%ecx,		%ebx	// EBX = (a.lo*b.hi + a.hi*b.lo) 
-	addl	%ebx,		%edx 
-	 
-	popl	%ebx 
-	retl 
-END_COMPILERRT_FUNCTION(__muldi3) 
- 
-#endif // __i386__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __muldi3(di_int a, di_int b);
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__muldi3)
+	pushl	%ebx
+	movl  16(%esp),		%eax	// b.lo
+	movl  12(%esp),		%ecx	// a.hi
+	imull	%eax,		%ecx	// b.lo * a.hi
+	
+	movl   8(%esp),		%edx	// a.lo
+	movl  20(%esp),		%ebx	// b.hi
+	imull	%edx,		%ebx	// a.lo * b.hi
+	
+	mull	%edx				// EDX:EAX = a.lo * b.lo
+	addl	%ecx,		%ebx	// EBX = (a.lo*b.hi + a.hi*b.lo)
+	addl	%ebx,		%edx
+	
+	popl	%ebx
+	retl
+END_COMPILERRT_FUNCTION(__muldi3)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/udivdi3.S b/contrib/libs/cxxsupp/builtins/i386/udivdi3.S
index 6c369193c0..41b2edf03e 100644
--- a/contrib/libs/cxxsupp/builtins/i386/udivdi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/udivdi3.S
@@ -1,115 +1,115 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// du_int __udivdi3(du_int a, du_int b); 
- 
-// result = a / b. 
-// both inputs and the output are 64-bit unsigned integers. 
-// This will do whatever the underlying hardware is set to do on division by zero. 
-// No other exceptions are generated, as the divide cannot overflow. 
-// 
-// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 
-// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than 
-// currently possible via simulation of integer divides on the x87 unit. 
-// 
-// Stephen Canon, December 2008 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__udivdi3) 
- 
-	pushl		%ebx 
-	movl	 20(%esp),			%ebx	// Find the index i of the leading bit in b. 
-	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to 
-	jz			9f						// the code to handle that special case [9]. 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// du_int __udivdi3(du_int a, du_int b);
+
+// result = a / b.
+// both inputs and the output are 64-bit unsigned integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__udivdi3)
+
+	pushl		%ebx
+	movl	 20(%esp),			%ebx	// Find the index i of the leading bit in b.
+	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
+	jz			9f						// the code to handle that special case [9].
+	
+	/* High word of b is known to be non-zero on this branch */
+	
+	movl	 16(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
+	
+	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
+	shrl		%eax					//
+	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
+	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
+	orl			%eax,			%ebx	//
+	movl	 12(%esp),			%edx	// Load the high and low words of a, and jump
+	movl	  8(%esp),			%eax	// to [1] if the high word is larger than bhi
+	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
+	jae			1f						
+		
+	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+	
+	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	shrl		%cl,			%eax	// q = qs >> (1 + i)
+	movl		%eax,			%edi
+	mull	 20(%esp)					// q*blo
+	movl	 12(%esp),			%ebx
+	movl	 16(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 24(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+	sbbl		$0,				%edi	// decrement q if remainder is negative
+	xorl		%edx,			%edx
+	movl		%edi,			%eax
+	popl		%edi
+	popl		%ebx
+	retl
+
+
+1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
 	 
-	/* High word of b is known to be non-zero on this branch */ 
-	 
-	movl	 16(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b 
-	 
-	shrl		%cl,			%eax	// Practically, this means that bhi is given by: 
-	shrl		%eax					// 
-	notl		%ecx					//		bhi = (high word of b) << (31 - i) | 
-	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i) 
-	orl			%eax,			%ebx	// 
-	movl	 12(%esp),			%edx	// Load the high and low words of a, and jump 
-	movl	  8(%esp),			%eax	// to [1] if the high word is larger than bhi 
-	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide. 
-	jae			1f						 
-		 
-	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	 
-	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	shrl		%cl,			%eax	// q = qs >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 20(%esp)					// q*blo 
-	movl	 12(%esp),			%ebx 
-	movl	 16(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 24(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
-	sbbl		$0,				%edi	// decrement q if remainder is negative 
-	xorl		%edx,			%edx 
-	movl		%edi,			%eax 
-	popl		%edi 
-	popl		%ebx 
-	retl 
- 
- 
-1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	  
-	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not 
-	divl		%ebx					// overflow, and find q and r such that 
-										// 
-										//		ahi:alo = (1:q)*bhi + r 
-										// 
-										// Note that q is a number in (31-i).(1+i) 
-										// fix point. 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	orl			$0x80000000,	%eax 
-	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 20(%esp)					// q*blo 
-	movl	 12(%esp),			%ebx 
-	movl	 16(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 24(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
-	sbbl		$0,				%edi	// decrement q if remainder is negative 
-	xorl		%edx,			%edx 
-	movl		%edi,			%eax 
-	popl		%edi 
-	popl		%ebx 
-	retl 
- 
-	 
-9:	/* High word of b is zero on this branch */ 
- 
-	movl	 12(%esp),			%eax	// Find qhi and rhi such that 
-	movl	 16(%esp),			%ecx	// 
-	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b 
-	divl		%ecx					// 
-	movl		%eax,			%ebx	// 
-	movl	  8(%esp),			%eax	// Find qlo such that 
-	divl		%ecx					// 
-	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b 
-	popl		%ebx					// 
-	retl								// and return qhi:qlo 
-END_COMPILERRT_FUNCTION(__udivdi3) 
- 
-#endif // __i386__ 
+	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
+	divl		%ebx					// overflow, and find q and r such that
+										//
+										//		ahi:alo = (1:q)*bhi + r
+										//
+										// Note that q is a number in (31-i).(1+i)
+										// fix point.
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	orl			$0x80000000,	%eax
+	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
+	movl		%eax,			%edi
+	mull	 20(%esp)					// q*blo
+	movl	 12(%esp),			%ebx
+	movl	 16(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 24(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+	sbbl		$0,				%edi	// decrement q if remainder is negative
+	xorl		%edx,			%edx
+	movl		%edi,			%eax
+	popl		%edi
+	popl		%ebx
+	retl
+
+	
+9:	/* High word of b is zero on this branch */
+
+	movl	 12(%esp),			%eax	// Find qhi and rhi such that
+	movl	 16(%esp),			%ecx	//
+	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
+	divl		%ecx					//
+	movl		%eax,			%ebx	//
+	movl	  8(%esp),			%eax	// Find qlo such that
+	divl		%ecx					//
+	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
+	popl		%ebx					//
+	retl								// and return qhi:qlo
+END_COMPILERRT_FUNCTION(__udivdi3)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/i386/umoddi3.S b/contrib/libs/cxxsupp/builtins/i386/umoddi3.S
index c88ce8c0dc..a190a7d397 100644
--- a/contrib/libs/cxxsupp/builtins/i386/umoddi3.S
+++ b/contrib/libs/cxxsupp/builtins/i386/umoddi3.S
@@ -1,126 +1,126 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// du_int __umoddi3(du_int a, du_int b); 
- 
-// result = remainder of a / b. 
-// both inputs and the output are 64-bit unsigned integers. 
-// This will do whatever the underlying hardware is set to do on division by zero. 
-// No other exceptions are generated, as the divide cannot overflow. 
-// 
-// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 
-// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than 
-// currently possible via simulation of integer divides on the x87 unit. 
-// 
- 
-// Stephen Canon, December 2008 
- 
-#ifdef __i386__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__umoddi3) 
- 
-	pushl		%ebx 
-	movl	 20(%esp),			%ebx	// Find the index i of the leading bit in b. 
-	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to 
-	jz			9f						// the code to handle that special case [9]. 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// du_int __umoddi3(du_int a, du_int b);
+
+// result = remainder of a / b.
+// both inputs and the output are 64-bit unsigned integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__umoddi3)
+
+	pushl		%ebx
+	movl	 20(%esp),			%ebx	// Find the index i of the leading bit in b.
+	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
+	jz			9f						// the code to handle that special case [9].
+	
+	/* High word of b is known to be non-zero on this branch */
+	
+	movl	 16(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
+	
+	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
+	shrl		%eax					//
+	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
+	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
+	orl			%eax,			%ebx	//
+	movl	 12(%esp),			%edx	// Load the high and low words of a, and jump
+	movl	  8(%esp),			%eax	// to [2] if the high word is larger than bhi
+	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
+	jae			2f						
+		
+	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+	
+	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	shrl		%cl,			%eax	// q = qs >> (1 + i)
+	movl		%eax,			%edi
+	mull	 20(%esp)					// q*blo
+	movl	 12(%esp),			%ebx
+	movl	 16(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 24(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+	
+	jnc			1f						// if positive, this is the result.
+	addl	 20(%esp),			%ebx	// otherwise
+	adcl	 24(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result
+1:	movl		%ebx,			%eax
+	movl		%ecx,			%edx
+	
+	popl		%edi
+	popl		%ebx
+	retl
+
+
+2:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
 	 
-	/* High word of b is known to be non-zero on this branch */ 
-	 
-	movl	 16(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b 
-	 
-	shrl		%cl,			%eax	// Practically, this means that bhi is given by: 
-	shrl		%eax					// 
-	notl		%ecx					//		bhi = (high word of b) << (31 - i) | 
-	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i) 
-	orl			%eax,			%ebx	// 
-	movl	 12(%esp),			%edx	// Load the high and low words of a, and jump 
-	movl	  8(%esp),			%eax	// to [2] if the high word is larger than bhi 
-	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide. 
-	jae			2f						 
-		 
-	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	 
-	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	shrl		%cl,			%eax	// q = qs >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 20(%esp)					// q*blo 
-	movl	 12(%esp),			%ebx 
-	movl	 16(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 24(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
-	 
-	jnc			1f						// if positive, this is the result. 
-	addl	 20(%esp),			%ebx	// otherwise 
-	adcl	 24(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result 
-1:	movl		%ebx,			%eax 
-	movl		%ecx,			%edx 
-	 
-	popl		%edi 
-	popl		%ebx 
-	retl 
- 
- 
-2:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 
-	  
-	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not 
-	divl		%ebx					// overflow, and find q and r such that 
-										// 
-										//		ahi:alo = (1:q)*bhi + r 
-										// 
-										// Note that q is a number in (31-i).(1+i) 
-										// fix point. 
- 
-	pushl		%edi 
-	notl		%ecx 
-	shrl		%eax 
-	orl			$0x80000000,	%eax 
-	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i) 
-	movl		%eax,			%edi 
-	mull	 20(%esp)					// q*blo 
-	movl	 12(%esp),			%ebx 
-	movl	 16(%esp),			%ecx	// ECX:EBX = a 
-	subl		%eax,			%ebx 
-	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo 
-	movl	 24(%esp),			%eax 
-	imull		%edi,			%eax	// q*bhi 
-	subl		%eax,			%ecx	// ECX:EBX = a - q*b 
- 
-	jnc			3f						// if positive, this is the result. 
-	addl	 20(%esp),			%ebx	// otherwise 
-	adcl	 24(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result 
-3:	movl		%ebx,			%eax 
-	movl		%ecx,			%edx 
-	 
-	popl		%edi 
-	popl		%ebx 
-	retl 
- 
- 
-	 
-9:	/* High word of b is zero on this branch */ 
- 
-	movl	 12(%esp),			%eax	// Find qhi and rhi such that 
-	movl	 16(%esp),			%ecx	// 
-	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b 
-	divl		%ecx					// 
-	movl		%eax,			%ebx	// 
-	movl	  8(%esp),			%eax	// Find rlo such that 
-	divl		%ecx					// 
-	movl		%edx,			%eax	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b 
-	popl		%ebx					// 
-	xorl		%edx,			%edx	// and return 0:rlo 
-	retl								//  
-END_COMPILERRT_FUNCTION(__umoddi3) 
- 
-#endif // __i386__ 
+	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
+	divl		%ebx					// overflow, and find q and r such that
+										//
+										//		ahi:alo = (1:q)*bhi + r
+										//
+										// Note that q is a number in (31-i).(1+i)
+										// fix point.
+
+	pushl		%edi
+	notl		%ecx
+	shrl		%eax
+	orl			$0x80000000,	%eax
+	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
+	movl		%eax,			%edi
+	mull	 20(%esp)					// q*blo
+	movl	 12(%esp),			%ebx
+	movl	 16(%esp),			%ecx	// ECX:EBX = a
+	subl		%eax,			%ebx
+	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
+	movl	 24(%esp),			%eax
+	imull		%edi,			%eax	// q*bhi
+	subl		%eax,			%ecx	// ECX:EBX = a - q*b
+
+	jnc			3f						// if positive, this is the result.
+	addl	 20(%esp),			%ebx	// otherwise
+	adcl	 24(%esp),			%ecx	// ECX:EBX = a - (q-1)*b = result
+3:	movl		%ebx,			%eax
+	movl		%ecx,			%edx
+	
+	popl		%edi
+	popl		%ebx
+	retl
+
+
+	
+9:	/* High word of b is zero on this branch */
+
+	movl	 12(%esp),			%eax	// Find qhi and rhi such that
+	movl	 16(%esp),			%ecx	//
+	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
+	divl		%ecx					//
+	movl		%eax,			%ebx	//
+	movl	  8(%esp),			%eax	// Find rlo such that
+	divl		%ecx					//
+	movl		%edx,			%eax	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
+	popl		%ebx					//
+	xorl		%edx,			%edx	// and return 0:rlo
+	retl								// 
+END_COMPILERRT_FUNCTION(__umoddi3)
+
+#endif // __i386__
diff --git a/contrib/libs/cxxsupp/builtins/int_endianness.h b/contrib/libs/cxxsupp/builtins/int_endianness.h
index 2a813489dc..7995ddbb95 100644
--- a/contrib/libs/cxxsupp/builtins/int_endianness.h
+++ b/contrib/libs/cxxsupp/builtins/int_endianness.h
@@ -1,116 +1,116 @@
-/* ===-- int_endianness.h - configuration header for compiler-rt ------------=== 
- * 
- *		       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file is a configuration header for compiler-rt. 
- * This file is not part of the interface of this library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef INT_ENDIANNESS_H 
-#define INT_ENDIANNESS_H 
- 
-#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ 
-    defined(__ORDER_LITTLE_ENDIAN__) 
- 
-/* Clang and GCC provide built-in endianness definitions. */ 
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 
-#define _YUGA_LITTLE_ENDIAN 0 
-#define _YUGA_BIG_ENDIAN    1 
-#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 
-#define _YUGA_LITTLE_ENDIAN 1 
-#define _YUGA_BIG_ENDIAN    0 
-#endif /* __BYTE_ORDER__ */ 
- 
-#else /* Compilers other than Clang or GCC. */ 
- 
-#if defined(__SVR4) && defined(__sun) 
-#include <sys/byteorder.h> 
- 
-#if defined(_BIG_ENDIAN) 
-#define _YUGA_LITTLE_ENDIAN 0 
-#define _YUGA_BIG_ENDIAN    1 
-#elif defined(_LITTLE_ENDIAN) 
-#define _YUGA_LITTLE_ENDIAN 1 
-#define _YUGA_BIG_ENDIAN    0 
-#else /* !_LITTLE_ENDIAN */ 
-#error "unknown endianness" 
-#endif /* !_LITTLE_ENDIAN */ 
- 
-#endif /* Solaris and AuroraUX. */ 
- 
-/* .. */ 
- 
-#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) ||   \ 
-    defined(__minix) 
-#include <sys/endian.h> 
- 
-#if _BYTE_ORDER == _BIG_ENDIAN 
-#define _YUGA_LITTLE_ENDIAN 0 
-#define _YUGA_BIG_ENDIAN    1 
-#elif _BYTE_ORDER == _LITTLE_ENDIAN 
-#define _YUGA_LITTLE_ENDIAN 1 
-#define _YUGA_BIG_ENDIAN    0 
-#endif /* _BYTE_ORDER */ 
- 
-#endif /* *BSD */ 
- 
-#if defined(__OpenBSD__) || defined(__Bitrig__) 
-#include <machine/endian.h> 
- 
-#if _BYTE_ORDER == _BIG_ENDIAN 
-#define _YUGA_LITTLE_ENDIAN 0 
-#define _YUGA_BIG_ENDIAN    1 
-#elif _BYTE_ORDER == _LITTLE_ENDIAN 
-#define _YUGA_LITTLE_ENDIAN 1 
-#define _YUGA_BIG_ENDIAN    0 
-#endif /* _BYTE_ORDER */ 
- 
-#endif /* OpenBSD and Bitrig. */ 
- 
-/* .. */ 
- 
-/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the 
- * compiler (at least with GCC) */ 
-#if defined(__APPLE__) || defined(__ellcc__ ) 
- 
-#ifdef __BIG_ENDIAN__ 
-#if __BIG_ENDIAN__ 
-#define _YUGA_LITTLE_ENDIAN 0 
-#define _YUGA_BIG_ENDIAN    1 
-#endif 
-#endif /* __BIG_ENDIAN__ */ 
- 
-#ifdef __LITTLE_ENDIAN__ 
-#if __LITTLE_ENDIAN__ 
-#define _YUGA_LITTLE_ENDIAN 1 
-#define _YUGA_BIG_ENDIAN    0 
-#endif 
-#endif /* __LITTLE_ENDIAN__ */ 
- 
-#endif /* Mac OSX */ 
- 
-/* .. */ 
- 
-#if defined(_WIN32) 
- 
-#define _YUGA_LITTLE_ENDIAN 1 
-#define _YUGA_BIG_ENDIAN    0 
- 
-#endif /* Windows */ 
- 
-#endif /* Clang or GCC. */ 
- 
-/* . */ 
- 
-#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) 
-#error Unable to determine endian 
-#endif /* Check we found an endianness correctly. */ 
- 
-#endif /* INT_ENDIANNESS_H */ 
+/* ===-- int_endianness.h - configuration header for compiler-rt ------------===
+ *
+ *		       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is a configuration header for compiler-rt.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+    defined(__ORDER_LITTLE_ENDIAN__)
+
+/* Clang and GCC provide built-in endianness definitions. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif /* __BYTE_ORDER__ */
+
+#else /* Compilers other than Clang or GCC. */
+
+#if defined(__SVR4) && defined(__sun)
+#include <sys/byteorder.h>
+
+#if defined(_BIG_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif defined(_LITTLE_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#else /* !_LITTLE_ENDIAN */
+#error "unknown endianness"
+#endif /* !_LITTLE_ENDIAN */
+
+#endif /* Solaris and AuroraUX. */
+
+/* .. */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) ||   \
+    defined(__minix)
+#include <sys/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif /* _BYTE_ORDER */
+
+#endif /* *BSD */
+
+#if defined(__OpenBSD__) || defined(__Bitrig__)
+#include <machine/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif /* _BYTE_ORDER */
+
+#endif /* OpenBSD and Bitrig. */
+
+/* .. */
+
+/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the
+ * compiler (at least with GCC) */
+#if defined(__APPLE__) || defined(__ellcc__ )
+
+#ifdef __BIG_ENDIAN__
+#if __BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#endif
+#endif /* __BIG_ENDIAN__ */
+
+#ifdef __LITTLE_ENDIAN__
+#if __LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif
+#endif /* __LITTLE_ENDIAN__ */
+
+#endif /* Mac OSX */
+
+/* .. */
+
+#if defined(_WIN32)
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+
+#endif /* Windows */
+
+#endif /* Clang or GCC. */
+
+/* . */
+
+#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN)
+#error Unable to determine endian
+#endif /* Check we found an endianness correctly. */
+
+#endif /* INT_ENDIANNESS_H */
diff --git a/contrib/libs/cxxsupp/builtins/int_lib.h b/contrib/libs/cxxsupp/builtins/int_lib.h
index c231bcbcdf..272f9d9dad 100644
--- a/contrib/libs/cxxsupp/builtins/int_lib.h
+++ b/contrib/libs/cxxsupp/builtins/int_lib.h
@@ -1,133 +1,133 @@
-/* ===-- int_lib.h - configuration header for compiler-rt  -----------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file is a configuration header for compiler-rt. 
- * This file is not part of the interface of this library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef INT_LIB_H 
-#define INT_LIB_H 
- 
-/* Assumption: Signed integral is 2's complement. */ 
-/* Assumption: Right shift of signed negative is arithmetic shift. */ 
-/* Assumption: Endianness is little or big (not mixed). */ 
- 
-#if defined(__ELF__) 
-#define FNALIAS(alias_name, original_name) \ 
-  void alias_name() __attribute__((alias(#original_name))) 
-#else 
-#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")") 
-#endif 
- 
-/* ABI macro definitions */ 
- 
-#if __ARM_EABI__ 
-# define ARM_EABI_FNALIAS(aeabi_name, name)         \ 
-  void __aeabi_##aeabi_name() __attribute__((alias("__" #name))); 
-# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) 
-#else 
-# define ARM_EABI_FNALIAS(aeabi_name, name) 
-# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__)) 
-#   define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) 
-# else 
-#   define COMPILER_RT_ABI 
-# endif 
-#endif 
- 
-#ifdef _MSC_VER 
-#define ALWAYS_INLINE __forceinline 
-#define NOINLINE __declspec(noinline) 
-#define NORETURN __declspec(noreturn) 
-#define UNUSED 
-#else 
-#define ALWAYS_INLINE __attribute__((always_inline)) 
-#define NOINLINE __attribute__((noinline)) 
-#define NORETURN __attribute__((noreturn)) 
-#define UNUSED __attribute__((unused)) 
-#endif 
- 
-#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE)) 
-/* 
- * Kernel and boot environment can't use normal headers, 
- * so use the equivalent system headers. 
- */ 
-#  include <machine/limits.h> 
-#  include <sys/stdint.h> 
-#  include <sys/types.h> 
-#else 
-/* Include the standard compiler builtin headers we use functionality from. */ 
-#  include <limits.h> 
-#  include <stdint.h> 
-#  include <stdbool.h> 
-#  include <float.h> 
-#endif 
- 
-/* Include the commonly used internal type definitions. */ 
-#include "int_types.h" 
- 
-/* Include internal utility function declarations. */ 
-#include "int_util.h" 
- 
-COMPILER_RT_ABI si_int __paritysi2(si_int a); 
-COMPILER_RT_ABI si_int __paritydi2(di_int a); 
- 
-COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); 
-COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); 
-COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); 
- 
-COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem); 
-COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem); 
-#ifdef CRT_HAS_128BIT 
-COMPILER_RT_ABI si_int __clzti2(ti_int a); 
-COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); 
-#endif 
- 
-/* Definitions for builtins unavailable on MSVC */ 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#include <intrin.h> 
- 
-uint32_t __inline __builtin_ctz(uint32_t value) { 
-  uint32_t trailing_zero = 0; 
-  if (_BitScanForward(&trailing_zero, value)) 
-    return trailing_zero; 
-  return 32; 
-} 
- 
-uint32_t __inline __builtin_clz(uint32_t value) { 
-  uint32_t leading_zero = 0; 
-  if (_BitScanReverse(&leading_zero, value)) 
-    return 31 - leading_zero; 
-  return 32; 
-} 
- 
-#if defined(_M_ARM) || defined(_M_X64) 
-uint32_t __inline __builtin_clzll(uint64_t value) { 
-  uint32_t leading_zero = 0; 
-  if (_BitScanReverse64(&leading_zero, value)) 
-    return 63 - leading_zero; 
-  return 64; 
-} 
-#else 
-uint32_t __inline __builtin_clzll(uint64_t value) { 
-  if (value == 0) 
-    return 64; 
-  uint32_t msh = (uint32_t)(value >> 32); 
-  uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); 
-  if (msh != 0) 
-    return __builtin_clz(msh); 
-  return 32 + __builtin_clz(lsh); 
-} 
-#endif 
- 
-#define __builtin_clzl __builtin_clzll 
-#endif // defined(_MSC_VER) && !defined(__clang__) 
- 
-#endif /* INT_LIB_H */ 
+/* ===-- int_lib.h - configuration header for compiler-rt  -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is a configuration header for compiler-rt.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+/* Assumption: Signed integral is 2's complement. */
+/* Assumption: Right shift of signed negative is arithmetic shift. */
+/* Assumption: Endianness is little or big (not mixed). */
+
+#if defined(__ELF__)
+#define FNALIAS(alias_name, original_name) \
+  void alias_name() __attribute__((alias(#original_name)))
+#else
+#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#endif
+
+/* ABI macro definitions */
+
+#if __ARM_EABI__
+# define ARM_EABI_FNALIAS(aeabi_name, name)         \
+  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
+# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+#else
+# define ARM_EABI_FNALIAS(aeabi_name, name)
+# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__))
+#   define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+# else
+#   define COMPILER_RT_ABI
+# endif
+#endif
+
+#ifdef _MSC_VER
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#define NORETURN __declspec(noreturn)
+#define UNUSED
+#else
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#endif
+
+#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE))
+/*
+ * Kernel and boot environment can't use normal headers,
+ * so use the equivalent system headers.
+ */
+#  include <machine/limits.h>
+#  include <sys/stdint.h>
+#  include <sys/types.h>
+#else
+/* Include the standard compiler builtin headers we use functionality from. */
+#  include <limits.h>
+#  include <stdint.h>
+#  include <stdbool.h>
+#  include <float.h>
+#endif
+
+/* Include the commonly used internal type definitions. */
+#include "int_types.h"
+
+/* Include internal utility function declarations. */
+#include "int_util.h"
+
+COMPILER_RT_ABI si_int __paritysi2(si_int a);
+COMPILER_RT_ABI si_int __paritydi2(di_int a);
+
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
+#ifdef CRT_HAS_128BIT
+COMPILER_RT_ABI si_int __clzti2(ti_int a);
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
+#endif
+
+/* Definitions for builtins unavailable on MSVC */
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+uint32_t __inline __builtin_ctz(uint32_t value) {
+  uint32_t trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, value))
+    return trailing_zero;
+  return 32;
+}
+
+uint32_t __inline __builtin_clz(uint32_t value) {
+  uint32_t leading_zero = 0;
+  if (_BitScanReverse(&leading_zero, value))
+    return 31 - leading_zero;
+  return 32;
+}
+
+#if defined(_M_ARM) || defined(_M_X64)
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  uint32_t leading_zero = 0;
+  if (_BitScanReverse64(&leading_zero, value))
+    return 63 - leading_zero;
+  return 64;
+}
+#else
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  if (value == 0)
+    return 64;
+  uint32_t msh = (uint32_t)(value >> 32);
+  uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
+  if (msh != 0)
+    return __builtin_clz(msh);
+  return 32 + __builtin_clz(lsh);
+}
+#endif
+
+#define __builtin_clzl __builtin_clzll
+#endif // defined(_MSC_VER) && !defined(__clang__)
+
+#endif /* INT_LIB_H */
diff --git a/contrib/libs/cxxsupp/builtins/int_math.h b/contrib/libs/cxxsupp/builtins/int_math.h
index 9c718df92c..fc81fb7f02 100644
--- a/contrib/libs/cxxsupp/builtins/int_math.h
+++ b/contrib/libs/cxxsupp/builtins/int_math.h
@@ -1,114 +1,114 @@
-/* ===-- int_math.h - internal math inlines ---------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===-----------------------------------------------------------------------=== 
- * 
- * This file is not part of the interface of this library. 
- * 
- * This file defines substitutes for the libm functions used in some of the 
- * compiler-rt implementations, defined in such a way that there is not a direct 
- * dependency on libm or math.h. Instead, we use the compiler builtin versions 
- * where available. This reduces our dependencies on the system SDK by foisting 
- * the responsibility onto the compiler. 
- * 
- * ===-----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef INT_MATH_H 
-#define INT_MATH_H 
- 
-#ifndef __has_builtin 
-#  define  __has_builtin(x) 0 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#include <math.h> 
-#include <stdlib.h> 
-#include <ymath.h> 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define CRT_INFINITY INFINITY 
-#else 
-#define CRT_INFINITY __builtin_huge_valf() 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define crt_isfinite(x) _finite((x)) 
-#define crt_isinf(x) !_finite((x)) 
-#define crt_isnan(x) _isnan((x)) 
-#else 
-/* Define crt_isfinite in terms of the builtin if available, otherwise provide 
- * an alternate version in terms of our other functions. This supports some 
- * versions of GCC which didn't have __builtin_isfinite. 
- */ 
-#if __has_builtin(__builtin_isfinite) 
-#  define crt_isfinite(x) __builtin_isfinite((x)) 
-#elif defined(__GNUC__) 
-#  define crt_isfinite(x) \ 
-  __extension__(({ \ 
-      __typeof((x)) x_ = (x); \ 
-      !crt_isinf(x_) && !crt_isnan(x_); \ 
-    })) 
-#else 
-#  error "Do not know how to check for infinity" 
-#endif /* __has_builtin(__builtin_isfinite) */ 
-#define crt_isinf(x) __builtin_isinf((x)) 
-#define crt_isnan(x) __builtin_isnan((x)) 
-#endif /* _MSC_VER */ 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define crt_copysign(x, y) copysign((x), (y)) 
-#define crt_copysignf(x, y) copysignf((x), (y)) 
-#define crt_copysignl(x, y) copysignl((x), (y)) 
-#else 
-#define crt_copysign(x, y) __builtin_copysign((x), (y)) 
-#define crt_copysignf(x, y) __builtin_copysignf((x), (y)) 
-#define crt_copysignl(x, y) __builtin_copysignl((x), (y)) 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define crt_fabs(x) fabs((x)) 
-#define crt_fabsf(x) fabsf((x)) 
-#define crt_fabsl(x) fabs((x)) 
-#else 
-#define crt_fabs(x) __builtin_fabs((x)) 
-#define crt_fabsf(x) __builtin_fabsf((x)) 
-#define crt_fabsl(x) __builtin_fabsl((x)) 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define crt_fmax(x, y) __max((x), (y)) 
-#define crt_fmaxf(x, y) __max((x), (y)) 
-#define crt_fmaxl(x, y) __max((x), (y)) 
-#else 
-#define crt_fmax(x, y) __builtin_fmax((x), (y)) 
-#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y)) 
-#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define crt_logb(x) logb((x)) 
-#define crt_logbf(x) logbf((x)) 
-#define crt_logbl(x) logbl((x)) 
-#else 
-#define crt_logb(x) __builtin_logb((x)) 
-#define crt_logbf(x) __builtin_logbf((x)) 
-#define crt_logbl(x) __builtin_logbl((x)) 
-#endif 
- 
-#if defined(_MSC_VER) && !defined(__clang__) 
-#define crt_scalbn(x, y) scalbn((x), (y)) 
-#define crt_scalbnf(x, y) scalbnf((x), (y)) 
-#define crt_scalbnl(x, y) scalbnl((x), (y)) 
-#else 
-#define crt_scalbn(x, y) __builtin_scalbn((x), (y)) 
-#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y)) 
-#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) 
-#endif 
- 
-#endif /* INT_MATH_H */ 
+/* ===-- int_math.h - internal math inlines ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines substitutes for the libm functions used in some of the
+ * compiler-rt implementations, defined in such a way that there is not a direct
+ * dependency on libm or math.h. Instead, we use the compiler builtin versions
+ * where available. This reduces our dependencies on the system SDK by foisting
+ * the responsibility onto the compiler.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_MATH_H
+#define INT_MATH_H
+
+#ifndef __has_builtin
+#  define  __has_builtin(x) 0
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <math.h>
+#include <stdlib.h>
+#include <ymath.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CRT_INFINITY INFINITY
+#else
+#define CRT_INFINITY __builtin_huge_valf()
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_isfinite(x) _finite((x))
+#define crt_isinf(x) !_finite((x))
+#define crt_isnan(x) _isnan((x))
+#else
+/* Define crt_isfinite in terms of the builtin if available, otherwise provide
+ * an alternate version in terms of our other functions. This supports some
+ * versions of GCC which didn't have __builtin_isfinite.
+ */
+#if __has_builtin(__builtin_isfinite)
+#  define crt_isfinite(x) __builtin_isfinite((x))
+#elif defined(__GNUC__)
+#  define crt_isfinite(x) \
+  __extension__(({ \
+      __typeof((x)) x_ = (x); \
+      !crt_isinf(x_) && !crt_isnan(x_); \
+    }))
+#else
+#  error "Do not know how to check for infinity"
+#endif /* __has_builtin(__builtin_isfinite) */
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+#endif /* _MSC_VER */
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_copysign(x, y) copysign((x), (y))
+#define crt_copysignf(x, y) copysignf((x), (y))
+#define crt_copysignl(x, y) copysignl((x), (y))
+#else
+#define crt_copysign(x, y) __builtin_copysign((x), (y))
+#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
+#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fabs(x) fabs((x))
+#define crt_fabsf(x) fabsf((x))
+#define crt_fabsl(x) fabs((x))
+#else
+#define crt_fabs(x) __builtin_fabs((x))
+#define crt_fabsf(x) __builtin_fabsf((x))
+#define crt_fabsl(x) __builtin_fabsl((x))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fmax(x, y) __max((x), (y))
+#define crt_fmaxf(x, y) __max((x), (y))
+#define crt_fmaxl(x, y) __max((x), (y))
+#else
+#define crt_fmax(x, y) __builtin_fmax((x), (y))
+#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
+#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_logb(x) logb((x))
+#define crt_logbf(x) logbf((x))
+#define crt_logbl(x) logbl((x))
+#else
+#define crt_logb(x) __builtin_logb((x))
+#define crt_logbf(x) __builtin_logbf((x))
+#define crt_logbl(x) __builtin_logbl((x))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_scalbn(x, y) scalbn((x), (y))
+#define crt_scalbnf(x, y) scalbnf((x), (y))
+#define crt_scalbnl(x, y) scalbnl((x), (y))
+#else
+#define crt_scalbn(x, y) __builtin_scalbn((x), (y))
+#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
+#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+#endif
+
+#endif /* INT_MATH_H */
diff --git a/contrib/libs/cxxsupp/builtins/int_types.h b/contrib/libs/cxxsupp/builtins/int_types.h
index 2784a9c54d..23065b89bd 100644
--- a/contrib/libs/cxxsupp/builtins/int_types.h
+++ b/contrib/libs/cxxsupp/builtins/int_types.h
@@ -1,165 +1,165 @@
-/* ===-- int_lib.h - configuration header for compiler-rt  -----------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file is not part of the interface of this library. 
- * 
- * This file defines various standard types, most importantly a number of unions 
- * used to access parts of larger types. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef INT_TYPES_H 
-#define INT_TYPES_H 
- 
-#include "int_endianness.h" 
- 
-/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */ 
-#ifdef si_int 
-#undef si_int 
-#endif 
-typedef      int si_int; 
-typedef unsigned su_int; 
- 
-typedef          long long di_int; 
-typedef unsigned long long du_int; 
- 
-typedef union 
-{ 
-    di_int all; 
-    struct 
-    { 
-#if _YUGA_LITTLE_ENDIAN 
-        su_int low; 
-        si_int high; 
-#else 
-        si_int high; 
-        su_int low; 
-#endif /* _YUGA_LITTLE_ENDIAN */ 
-    }s; 
-} dwords; 
- 
-typedef union 
-{ 
-    du_int all; 
-    struct 
-    { 
-#if _YUGA_LITTLE_ENDIAN 
-        su_int low; 
-        su_int high; 
-#else 
-        su_int high; 
-        su_int low; 
-#endif /* _YUGA_LITTLE_ENDIAN */ 
-    }s; 
-} udwords; 
- 
-/* MIPS64 issue: PR 20098 */ 
+/* ===-- int_lib.h - configuration header for compiler-rt  -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines various standard types, most importantly a number of unions
+ * used to access parts of larger types.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */
+#ifdef si_int
+#undef si_int
+#endif
+typedef      int si_int;
+typedef unsigned su_int;
+
+typedef          long long di_int;
+typedef unsigned long long du_int;
+
+typedef union
+{
+    di_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        su_int low;
+        si_int high;
+#else
+        si_int high;
+        su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} dwords;
+
+typedef union
+{
+    du_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        su_int low;
+        su_int high;
+#else
+        su_int high;
+        su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} udwords;
+
+/* MIPS64 issue: PR 20098 */
 #if defined(__LP64__) && !(defined(__mips__) && defined(__clang__)) || (defined(_MSC_VER) && defined(__clang__))
-#define CRT_HAS_128BIT 
-#endif 
- 
-#ifdef CRT_HAS_128BIT 
-typedef int      ti_int __attribute__ ((mode (TI))); 
-typedef unsigned tu_int __attribute__ ((mode (TI))); 
- 
-typedef union 
-{ 
-    ti_int all; 
-    struct 
-    { 
-#if _YUGA_LITTLE_ENDIAN 
-        du_int low; 
-        di_int high; 
-#else 
-        di_int high; 
-        du_int low; 
-#endif /* _YUGA_LITTLE_ENDIAN */ 
-    }s; 
-} twords; 
- 
-typedef union 
-{ 
-    tu_int all; 
-    struct 
-    { 
-#if _YUGA_LITTLE_ENDIAN 
-        du_int low; 
-        du_int high; 
-#else 
-        du_int high; 
-        du_int low; 
-#endif /* _YUGA_LITTLE_ENDIAN */ 
-    }s; 
-} utwords; 
- 
-static __inline ti_int make_ti(di_int h, di_int l) { 
-    twords r; 
-    r.s.high = h; 
-    r.s.low = l; 
-    return r.all; 
-} 
- 
-static __inline tu_int make_tu(du_int h, du_int l) { 
-    utwords r; 
-    r.s.high = h; 
-    r.s.low = l; 
-    return r.all; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
- 
-typedef union 
-{ 
-    su_int u; 
-    float f; 
-} float_bits; 
- 
-typedef union 
-{ 
-    udwords u; 
-    double  f; 
-} double_bits; 
- 
-typedef struct 
-{ 
-#if _YUGA_LITTLE_ENDIAN 
-    udwords low; 
-    udwords high; 
-#else 
-    udwords high; 
-    udwords low; 
-#endif /* _YUGA_LITTLE_ENDIAN */ 
-} uqwords; 
- 
-typedef union 
-{ 
-    uqwords     u; 
-    long double f; 
-} long_double_bits; 
- 
-#if __STDC_VERSION__ >= 199901L 
-typedef float _Complex Fcomplex; 
-typedef double _Complex Dcomplex; 
-typedef long double _Complex Lcomplex; 
- 
-#define COMPLEX_REAL(x) __real__(x) 
-#define COMPLEX_IMAGINARY(x) __imag__(x) 
-#else 
-typedef struct { float real, imaginary; } Fcomplex; 
- 
-typedef struct { double real, imaginary; } Dcomplex; 
- 
-typedef struct { long double real, imaginary; } Lcomplex; 
- 
-#define COMPLEX_REAL(x) (x).real 
-#define COMPLEX_IMAGINARY(x) (x).imaginary 
-#endif 
-#endif /* INT_TYPES_H */ 
- 
+#define CRT_HAS_128BIT
+#endif
+
+#ifdef CRT_HAS_128BIT
+typedef int      ti_int __attribute__ ((mode (TI)));
+typedef unsigned tu_int __attribute__ ((mode (TI)));
+
+typedef union
+{
+    ti_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        du_int low;
+        di_int high;
+#else
+        di_int high;
+        du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} twords;
+
+typedef union
+{
+    tu_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        du_int low;
+        du_int high;
+#else
+        du_int high;
+        du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+    twords r;
+    r.s.high = h;
+    r.s.low = l;
+    return r.all;
+}
+
+static __inline tu_int make_tu(du_int h, du_int l) {
+    utwords r;
+    r.s.high = h;
+    r.s.low = l;
+    return r.all;
+}
+
+#endif /* CRT_HAS_128BIT */
+
+typedef union
+{
+    su_int u;
+    float f;
+} float_bits;
+
+typedef union
+{
+    udwords u;
+    double  f;
+} double_bits;
+
+typedef struct
+{
+#if _YUGA_LITTLE_ENDIAN
+    udwords low;
+    udwords high;
+#else
+    udwords high;
+    udwords low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+} uqwords;
+
+typedef union
+{
+    uqwords     u;
+    long double f;
+} long_double_bits;
+
+#if __STDC_VERSION__ >= 199901L
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+#else
+typedef struct { float real, imaginary; } Fcomplex;
+
+typedef struct { double real, imaginary; } Dcomplex;
+
+typedef struct { long double real, imaginary; } Lcomplex;
+
+#define COMPLEX_REAL(x) (x).real
+#define COMPLEX_IMAGINARY(x) (x).imaginary
+#endif
+#endif /* INT_TYPES_H */
+
diff --git a/contrib/libs/cxxsupp/builtins/int_util.c b/contrib/libs/cxxsupp/builtins/int_util.c
index 550c723487..420d1e237a 100644
--- a/contrib/libs/cxxsupp/builtins/int_util.c
+++ b/contrib/libs/cxxsupp/builtins/int_util.c
@@ -1,61 +1,61 @@
-/* ===-- int_util.c - Implement internal utilities --------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_util.h" 
- 
-/* NOTE: The definitions in this file are declared weak because we clients to be 
- * able to arbitrarily package individual functions into separate .a files. If 
- * we did not declare these weak, some link situations might end up seeing 
- * duplicate strong definitions of the same symbol. 
- * 
- * We can't use this solution for kernel use (which may not support weak), but 
- * currently expect that when built for kernel use all the functionality is 
- * packaged into a single library. 
- */ 
- 
-#ifdef KERNEL_USE 
- 
-NORETURN extern void panic(const char *, ...); 
-#ifndef _WIN32 
-__attribute__((visibility("hidden"))) 
-#endif 
-void compilerrt_abort_impl(const char *file, int line, const char *function) { 
-  panic("%s:%d: abort in %s", file, line, function); 
-} 
- 
-#elif __APPLE__ 
- 
-/* from libSystem.dylib */ 
-NORETURN extern void __assert_rtn(const char *func, const char *file, int line, 
-                                  const char *message); 
- 
-#ifndef _WIN32 
-__attribute__((weak)) 
-__attribute__((visibility("hidden"))) 
-#endif 
-void compilerrt_abort_impl(const char *file, int line, const char *function) { 
-  __assert_rtn(function, file, line, "libcompiler_rt abort"); 
-} 
- 
-#else 
- 
-/* Get the system definition of abort() */ 
-#include <stdlib.h> 
- 
-#ifndef _WIN32 
-__attribute__((weak)) 
-__attribute__((visibility("hidden"))) 
-#endif 
-void compilerrt_abort_impl(const char *file, int line, const char *function) { 
-  abort(); 
-} 
- 
-#endif 
+/* ===-- int_util.c - Implement internal utilities --------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_util.h"
+
+/* NOTE: The definitions in this file are declared weak because we clients to be
+ * able to arbitrarily package individual functions into separate .a files. If
+ * we did not declare these weak, some link situations might end up seeing
+ * duplicate strong definitions of the same symbol.
+ *
+ * We can't use this solution for kernel use (which may not support weak), but
+ * currently expect that when built for kernel use all the functionality is
+ * packaged into a single library.
+ */
+
+#ifdef KERNEL_USE
+
+NORETURN extern void panic(const char *, ...);
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  panic("%s:%d: abort in %s", file, line, function);
+}
+
+#elif __APPLE__
+
+/* from libSystem.dylib */
+NORETURN extern void __assert_rtn(const char *func, const char *file, int line,
+                                  const char *message);
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  __assert_rtn(function, file, line, "libcompiler_rt abort");
+}
+
+#else
+
+/* Get the system definition of abort() */
+#include <stdlib.h>
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  abort();
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/int_util.h b/contrib/libs/cxxsupp/builtins/int_util.h
index 649a4a0d91..a7b20ed662 100644
--- a/contrib/libs/cxxsupp/builtins/int_util.h
+++ b/contrib/libs/cxxsupp/builtins/int_util.h
@@ -1,33 +1,33 @@
-/* ===-- int_util.h - internal utility functions ----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===-----------------------------------------------------------------------=== 
- * 
- * This file is not part of the interface of this library. 
- * 
- * This file defines non-inline utilities which are available for use in the 
- * library. The function definitions themselves are all contained in int_util.c 
- * which will always be compiled into any compiler-rt library. 
- * 
- * ===-----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef INT_UTIL_H 
-#define INT_UTIL_H 
- 
-/** \brief Trigger a program abort (or panic for kernel code). */ 
-#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__) 
- 
-NORETURN void compilerrt_abort_impl(const char *file, int line, 
-                                    const char *function); 
- 
-#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) 
-#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) 
-#define COMPILE_TIME_ASSERT2(expr, cnt)                                        \ 
-  typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED 
- 
-#endif /* INT_UTIL_H */ 
+/* ===-- int_util.h - internal utility functions ----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines non-inline utilities which are available for use in the
+ * library. The function definitions themselves are all contained in int_util.c
+ * which will always be compiled into any compiler-rt library.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_UTIL_H
+#define INT_UTIL_H
+
+/** \brief Trigger a program abort (or panic for kernel code). */
+#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__)
+
+NORETURN void compilerrt_abort_impl(const char *file, int line,
+                                    const char *function);
+
+#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
+#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
+#define COMPILE_TIME_ASSERT2(expr, cnt)                                        \
+  typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
+
+#endif /* INT_UTIL_H */
diff --git a/contrib/libs/cxxsupp/builtins/lshrdi3.c b/contrib/libs/cxxsupp/builtins/lshrdi3.c
index 981b316005..6b1ea923b7 100644
--- a/contrib/libs/cxxsupp/builtins/lshrdi3.c
+++ b/contrib/libs/cxxsupp/builtins/lshrdi3.c
@@ -1,43 +1,43 @@
-/* ===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __lshrdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: logical a >> b */ 
- 
-/* Precondition:  0 <= b < bits_in_dword */ 
- 
-ARM_EABI_FNALIAS(llsr, lshrdi3) 
- 
-COMPILER_RT_ABI di_int 
-__lshrdi3(di_int a, si_int b) 
-{ 
-    const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); 
-    udwords input; 
-    udwords result; 
-    input.all = a; 
-    if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */ 
-    { 
-        result.s.high = 0; 
-        result.s.low = input.s.high >> (b - bits_in_word); 
-    } 
-    else  /* 0 <= b < bits_in_word */ 
-    { 
-        if (b == 0) 
-            return a; 
-        result.s.high  = input.s.high >> b; 
-        result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); 
-    } 
-    return result.all; 
-} 
+/* ===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __lshrdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: logical a >> b */
+
+/* Precondition:  0 <= b < bits_in_dword */
+
+ARM_EABI_FNALIAS(llsr, lshrdi3)
+
+COMPILER_RT_ABI di_int
+__lshrdi3(di_int a, si_int b)
+{
+    const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+    udwords input;
+    udwords result;
+    input.all = a;
+    if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */
+    {
+        result.s.high = 0;
+        result.s.low = input.s.high >> (b - bits_in_word);
+    }
+    else  /* 0 <= b < bits_in_word */
+    {
+        if (b == 0)
+            return a;
+        result.s.high  = input.s.high >> b;
+        result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+    }
+    return result.all;
+}
diff --git a/contrib/libs/cxxsupp/builtins/lshrti3.c b/contrib/libs/cxxsupp/builtins/lshrti3.c
index 9060165bb5..e4170ff84a 100644
--- a/contrib/libs/cxxsupp/builtins/lshrti3.c
+++ b/contrib/libs/cxxsupp/builtins/lshrti3.c
@@ -1,45 +1,45 @@
-/* ===-- lshrti3.c - Implement __lshrti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __lshrti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: logical a >> b */ 
- 
-/* Precondition:  0 <= b < bits_in_tword */ 
- 
-COMPILER_RT_ABI ti_int 
-__lshrti3(ti_int a, si_int b) 
-{ 
-    const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); 
-    utwords input; 
-    utwords result; 
-    input.all = a; 
-    if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */ 
-    { 
-        result.s.high = 0; 
-        result.s.low = input.s.high >> (b - bits_in_dword); 
-    } 
-    else  /* 0 <= b < bits_in_dword */ 
-    { 
-        if (b == 0) 
-            return a; 
-        result.s.high  = input.s.high >> b; 
-        result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); 
-    } 
-    return result.all; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- lshrti3.c - Implement __lshrti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __lshrti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: logical a >> b */
+
+/* Precondition:  0 <= b < bits_in_tword */
+
+COMPILER_RT_ABI ti_int
+__lshrti3(ti_int a, si_int b)
+{
+    const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+    utwords input;
+    utwords result;
+    input.all = a;
+    if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */
+    {
+        result.s.high = 0;
+        result.s.low = input.s.high >> (b - bits_in_dword);
+    }
+    else  /* 0 <= b < bits_in_dword */
+    {
+        if (b == 0)
+            return a;
+        result.s.high  = input.s.high >> b;
+        result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+    }
+    return result.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt
index bc6fcefc20..266e422152 100644
--- a/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt
+++ b/contrib/libs/cxxsupp/builtins/macho_embedded/CMakeLists.txt
@@ -1,4 +1,4 @@
-file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) 
-foreach(filter_file ${filter_files}) 
-  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) 
-endforeach() 
+file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt)
+foreach(filter_file ${filter_files})
+  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file})
+endforeach()
diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt
index 55d61934a1..4b1683a6ba 100644
--- a/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt
+++ b/contrib/libs/cxxsupp/builtins/macho_embedded/arm.txt
@@ -1,16 +1,16 @@
-aeabi_cdcmpeq 
-aeabi_cdrcmple 
-aeabi_cfcmpeq 
-aeabi_cfrcmple 
-aeabi_dcmpeq 
-aeabi_dcmpge 
-aeabi_dcmpgt 
-aeabi_dcmple 
-aeabi_dcmplt 
-aeabi_drsub 
-aeabi_fcmpeq 
-aeabi_fcmpge 
-aeabi_fcmpgt 
-aeabi_fcmple 
-aeabi_fcmplt 
-aeabi_frsub 
+aeabi_cdcmpeq
+aeabi_cdrcmple
+aeabi_cfcmpeq
+aeabi_cfrcmple
+aeabi_dcmpeq
+aeabi_dcmpge
+aeabi_dcmpgt
+aeabi_dcmple
+aeabi_dcmplt
+aeabi_drsub
+aeabi_fcmpeq
+aeabi_fcmpge
+aeabi_fcmpgt
+aeabi_fcmple
+aeabi_fcmplt
+aeabi_frsub
diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt
index aa20837612..6ac85a771f 100644
--- a/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt
+++ b/contrib/libs/cxxsupp/builtins/macho_embedded/common.txt
@@ -1,92 +1,92 @@
-absvdi2 
-absvsi2 
-addvdi3 
-addvsi3 
-ashldi3 
-ashrdi3 
-clzdi2 
-clzsi2 
-cmpdi2 
-ctzdi2 
-ctzsi2 
-divdc3 
-divdi3 
-divsc3 
-divmodsi4 
-udivmodsi4 
-do_global_dtors 
-ffsdi2 
-fixdfdi 
-fixsfdi 
-fixunsdfdi 
-fixunsdfsi 
-fixunssfdi 
-fixunssfsi 
-floatdidf 
-floatdisf 
-floatundidf 
-floatundisf 
-gcc_bcmp 
-lshrdi3 
-moddi3 
-muldc3 
-muldi3 
-mulsc3 
-mulvdi3 
-mulvsi3 
-negdi2 
-negvdi2 
-negvsi2 
-paritydi2 
-paritysi2 
-popcountdi2 
-popcountsi2 
-powidf2 
-powisf2 
-subvdi3 
-subvsi3 
-ucmpdi2 
-udiv_w_sdiv 
-udivdi3 
-udivmoddi4 
-umoddi3 
-adddf3 
-addsf3 
-cmpdf2 
-cmpsf2 
-div0 
-divdf3 
-divsf3 
-divsi3 
-extendsfdf2 
-extendhfsf2 
-ffssi2 
-fixdfsi 
-fixsfsi 
-floatsidf 
-floatsisf 
-floatunsidf 
-floatunsisf 
-comparedf2 
-comparesf2 
-modsi3 
-muldf3 
-mulsf3 
-negdf2 
-negsf2 
-subdf3 
-subsf3 
-truncdfhf2 
-truncdfsf2 
-truncsfhf2 
-udivsi3 
-umodsi3 
-unorddf2 
-unordsf2 
-atomic_flag_clear 
-atomic_flag_clear_explicit 
-atomic_flag_test_and_set 
-atomic_flag_test_and_set_explicit 
-atomic_signal_fence 
-atomic_thread_fence 
-int_util 
+absvdi2
+absvsi2
+addvdi3
+addvsi3
+ashldi3
+ashrdi3
+clzdi2
+clzsi2
+cmpdi2
+ctzdi2
+ctzsi2
+divdc3
+divdi3
+divsc3
+divmodsi4
+udivmodsi4
+do_global_dtors
+ffsdi2
+fixdfdi
+fixsfdi
+fixunsdfdi
+fixunsdfsi
+fixunssfdi
+fixunssfsi
+floatdidf
+floatdisf
+floatundidf
+floatundisf
+gcc_bcmp
+lshrdi3
+moddi3
+muldc3
+muldi3
+mulsc3
+mulvdi3
+mulvsi3
+negdi2
+negvdi2
+negvsi2
+paritydi2
+paritysi2
+popcountdi2
+popcountsi2
+powidf2
+powisf2
+subvdi3
+subvsi3
+ucmpdi2
+udiv_w_sdiv
+udivdi3
+udivmoddi4
+umoddi3
+adddf3
+addsf3
+cmpdf2
+cmpsf2
+div0
+divdf3
+divsf3
+divsi3
+extendsfdf2
+extendhfsf2
+ffssi2
+fixdfsi
+fixsfsi
+floatsidf
+floatsisf
+floatunsidf
+floatunsisf
+comparedf2
+comparesf2
+modsi3
+muldf3
+mulsf3
+negdf2
+negsf2
+subdf3
+subsf3
+truncdfhf2
+truncdfsf2
+truncsfhf2
+udivsi3
+umodsi3
+unorddf2
+unordsf2
+atomic_flag_clear
+atomic_flag_clear_explicit
+atomic_flag_test_and_set
+atomic_flag_test_and_set_explicit
+atomic_signal_fence
+atomic_thread_fence
+int_util
diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt
index 71f0f6993c..b92e44bb35 100644
--- a/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt
+++ b/contrib/libs/cxxsupp/builtins/macho_embedded/i386.txt
@@ -1,7 +1,7 @@
-i686.get_pc_thunk.eax 
-i686.get_pc_thunk.ebp 
-i686.get_pc_thunk.ebx 
-i686.get_pc_thunk.ecx 
-i686.get_pc_thunk.edi 
-i686.get_pc_thunk.edx 
-i686.get_pc_thunk.esi 
+i686.get_pc_thunk.eax
+i686.get_pc_thunk.ebp
+i686.get_pc_thunk.ebx
+i686.get_pc_thunk.ecx
+i686.get_pc_thunk.edi
+i686.get_pc_thunk.edx
+i686.get_pc_thunk.esi
diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt
index 6f4b75888b..1c72fb1c3c 100644
--- a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt
+++ b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2-64.txt
@@ -1,10 +1,10 @@
-sync_fetch_and_add_8 
-sync_fetch_and_sub_8 
-sync_fetch_and_and_8 
-sync_fetch_and_or_8 
-sync_fetch_and_xor_8 
-sync_fetch_and_nand_8 
-sync_fetch_and_max_8 
-sync_fetch_and_umax_8 
-sync_fetch_and_min_8 
-sync_fetch_and_umin_8 
+sync_fetch_and_add_8
+sync_fetch_and_sub_8
+sync_fetch_and_and_8
+sync_fetch_and_or_8
+sync_fetch_and_xor_8
+sync_fetch_and_nand_8
+sync_fetch_and_max_8
+sync_fetch_and_umax_8
+sync_fetch_and_min_8
+sync_fetch_and_umin_8
diff --git a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt
index 2783f516b9..6add5ecd2d 100644
--- a/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt
+++ b/contrib/libs/cxxsupp/builtins/macho_embedded/thumb2.txt
@@ -1,14 +1,14 @@
-switch16 
-switch32 
-switch8 
-switchu8 
-sync_fetch_and_add_4 
-sync_fetch_and_sub_4 
-sync_fetch_and_and_4 
-sync_fetch_and_or_4 
-sync_fetch_and_xor_4 
-sync_fetch_and_nand_4 
-sync_fetch_and_max_4 
-sync_fetch_and_umax_4 
-sync_fetch_and_min_4 
-sync_fetch_and_umin_4 
+switch16
+switch32
+switch8
+switchu8
+sync_fetch_and_add_4
+sync_fetch_and_sub_4
+sync_fetch_and_and_4
+sync_fetch_and_or_4
+sync_fetch_and_xor_4
+sync_fetch_and_nand_4
+sync_fetch_and_max_4
+sync_fetch_and_umax_4
+sync_fetch_and_min_4
+sync_fetch_and_umin_4
diff --git a/contrib/libs/cxxsupp/builtins/moddi3.c b/contrib/libs/cxxsupp/builtins/moddi3.c
index 003966075a..a04279e387 100644
--- a/contrib/libs/cxxsupp/builtins/moddi3.c
+++ b/contrib/libs/cxxsupp/builtins/moddi3.c
@@ -1,30 +1,30 @@
-/*===-- moddi3.c - Implement __moddi3 -------------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __moddi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a % b */ 
- 
-COMPILER_RT_ABI di_int 
-__moddi3(di_int a, di_int b) 
-{ 
-    const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; 
-    di_int s = b >> bits_in_dword_m1;  /* s = b < 0 ? -1 : 0 */ 
-    b = (b ^ s) - s;                   /* negate if s == -1 */ 
-    s = a >> bits_in_dword_m1;         /* s = a < 0 ? -1 : 0 */ 
-    a = (a ^ s) - s;                   /* negate if s == -1 */ 
-    du_int r; 
-    __udivmoddi4(a, b, &r); 
-    return ((di_int)r ^ s) - s;                /* negate if s == -1 */ 
-} 
+/*===-- moddi3.c - Implement __moddi3 -------------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __moddi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI di_int
+__moddi3(di_int a, di_int b)
+{
+    const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
+    di_int s = b >> bits_in_dword_m1;  /* s = b < 0 ? -1 : 0 */
+    b = (b ^ s) - s;                   /* negate if s == -1 */
+    s = a >> bits_in_dword_m1;         /* s = a < 0 ? -1 : 0 */
+    a = (a ^ s) - s;                   /* negate if s == -1 */
+    du_int r;
+    __udivmoddi4(a, b, &r);
+    return ((di_int)r ^ s) - s;                /* negate if s == -1 */
+}
diff --git a/contrib/libs/cxxsupp/builtins/modsi3.c b/contrib/libs/cxxsupp/builtins/modsi3.c
index e876f0487c..86c73ce137 100644
--- a/contrib/libs/cxxsupp/builtins/modsi3.c
+++ b/contrib/libs/cxxsupp/builtins/modsi3.c
@@ -1,23 +1,23 @@
-/* ===-- modsi3.c - Implement __modsi3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __modsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a % b */ 
- 
-COMPILER_RT_ABI si_int 
-__modsi3(si_int a, si_int b) 
-{ 
-    return a - __divsi3(a, b) * b; 
-} 
+/* ===-- modsi3.c - Implement __modsi3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __modsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI si_int
+__modsi3(si_int a, si_int b)
+{
+    return a - __divsi3(a, b) * b;
+}
diff --git a/contrib/libs/cxxsupp/builtins/modti3.c b/contrib/libs/cxxsupp/builtins/modti3.c
index f6edbdfb34..d505c07ac1 100644
--- a/contrib/libs/cxxsupp/builtins/modti3.c
+++ b/contrib/libs/cxxsupp/builtins/modti3.c
@@ -1,34 +1,34 @@
-/* ===-- modti3.c - Implement __modti3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __modti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/*Returns: a % b */ 
- 
-COMPILER_RT_ABI ti_int 
-__modti3(ti_int a, ti_int b) 
-{ 
-    const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; 
-    ti_int s = b >> bits_in_tword_m1;  /* s = b < 0 ? -1 : 0 */ 
-    b = (b ^ s) - s;                   /* negate if s == -1 */ 
-    s = a >> bits_in_tword_m1;         /* s = a < 0 ? -1 : 0 */ 
-    a = (a ^ s) - s;                   /* negate if s == -1 */ 
-    tu_int r; 
-    __udivmodti4(a, b, &r); 
-    return ((ti_int)r ^ s) - s;                /* negate if s == -1 */ 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- modti3.c - Implement __modti3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __modti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/*Returns: a % b */
+
+COMPILER_RT_ABI ti_int
+__modti3(ti_int a, ti_int b)
+{
+    const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+    ti_int s = b >> bits_in_tword_m1;  /* s = b < 0 ? -1 : 0 */
+    b = (b ^ s) - s;                   /* negate if s == -1 */
+    s = a >> bits_in_tword_m1;         /* s = a < 0 ? -1 : 0 */
+    a = (a ^ s) - s;                   /* negate if s == -1 */
+    tu_int r;
+    __udivmodti4(a, b, &r);
+    return ((ti_int)r ^ s) - s;                /* negate if s == -1 */
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/muldc3.c b/contrib/libs/cxxsupp/builtins/muldc3.c
index e40debca03..16d8e98390 100644
--- a/contrib/libs/cxxsupp/builtins/muldc3.c
+++ b/contrib/libs/cxxsupp/builtins/muldc3.c
@@ -1,73 +1,73 @@
-/* ===-- muldc3.c - Implement __muldc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __muldc3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the product of a + ib and c + id */ 
- 
-COMPILER_RT_ABI Dcomplex 
-__muldc3(double __a, double __b, double __c, double __d) 
-{ 
-    double __ac = __a * __c; 
-    double __bd = __b * __d; 
-    double __ad = __a * __d; 
-    double __bc = __b * __c; 
-    Dcomplex z; 
-    COMPLEX_REAL(z) = __ac - __bd; 
-    COMPLEX_IMAGINARY(z) = __ad + __bc; 
-    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) 
-    { 
-        int __recalc = 0; 
-        if (crt_isinf(__a) || crt_isinf(__b)) 
-        { 
-            __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a); 
-            __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b); 
-            if (crt_isnan(__c)) 
-                __c = crt_copysign(0, __c); 
-            if (crt_isnan(__d)) 
-                __d = crt_copysign(0, __d); 
-            __recalc = 1; 
-        } 
-        if (crt_isinf(__c) || crt_isinf(__d)) 
-        { 
-            __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c); 
-            __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d); 
-            if (crt_isnan(__a)) 
-                __a = crt_copysign(0, __a); 
-            if (crt_isnan(__b)) 
-                __b = crt_copysign(0, __b); 
-            __recalc = 1; 
-        } 
-        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || 
-                          crt_isinf(__ad) || crt_isinf(__bc))) 
-        { 
-            if (crt_isnan(__a)) 
-                __a = crt_copysign(0, __a); 
-            if (crt_isnan(__b)) 
-                __b = crt_copysign(0, __b); 
-            if (crt_isnan(__c)) 
-                __c = crt_copysign(0, __c); 
-            if (crt_isnan(__d)) 
-                __d = crt_copysign(0, __d); 
-            __recalc = 1; 
-        } 
-        if (__recalc) 
-        { 
-            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); 
-            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); 
-        } 
-    } 
-    return z; 
-} 
+/* ===-- muldc3.c - Implement __muldc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muldc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI Dcomplex
+__muldc3(double __a, double __b, double __c, double __d)
+{
+    double __ac = __a * __c;
+    double __bd = __b * __d;
+    double __ad = __a * __d;
+    double __bc = __b * __c;
+    Dcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+    {
+        int __recalc = 0;
+        if (crt_isinf(__a) || crt_isinf(__b))
+        {
+            __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysign(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysign(0, __d);
+            __recalc = 1;
+        }
+        if (crt_isinf(__c) || crt_isinf(__d))
+        {
+            __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysign(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysign(0, __b);
+            __recalc = 1;
+        }
+        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+                          crt_isinf(__ad) || crt_isinf(__bc)))
+        {
+            if (crt_isnan(__a))
+                __a = crt_copysign(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysign(0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysign(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysign(0, __d);
+            __recalc = 1;
+        }
+        if (__recalc)
+        {
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
+        }
+    }
+    return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/muldf3.c b/contrib/libs/cxxsupp/builtins/muldf3.c
index 59827d8ce4..1eb733849e 100644
--- a/contrib/libs/cxxsupp/builtins/muldf3.c
+++ b/contrib/libs/cxxsupp/builtins/muldf3.c
@@ -1,22 +1,22 @@
-//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements double-precision soft-float multiplication 
-// with the IEEE-754 default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_mul_impl.inc" 
- 
-ARM_EABI_FNALIAS(dmul, muldf3) 
- 
-COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { 
-    return __mulXf3__(a, b); 
-} 
+//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_mul_impl.inc"
+
+ARM_EABI_FNALIAS(dmul, muldf3)
+
+COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) {
+    return __mulXf3__(a, b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/muldi3.c b/contrib/libs/cxxsupp/builtins/muldi3.c
index 9beacb33b2..2dae44c11b 100644
--- a/contrib/libs/cxxsupp/builtins/muldi3.c
+++ b/contrib/libs/cxxsupp/builtins/muldi3.c
@@ -1,56 +1,56 @@
-/* ===-- muldi3.c - Implement __muldi3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __muldi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a * b */ 
- 
-static 
-di_int 
-__muldsi3(su_int a, su_int b) 
-{ 
-    dwords r; 
-    const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; 
-    const su_int lower_mask = (su_int)~0 >> bits_in_word_2; 
-    r.s.low = (a & lower_mask) * (b & lower_mask); 
-    su_int t = r.s.low >> bits_in_word_2; 
-    r.s.low &= lower_mask; 
-    t += (a >> bits_in_word_2) * (b & lower_mask); 
-    r.s.low += (t & lower_mask) << bits_in_word_2; 
-    r.s.high = t >> bits_in_word_2; 
-    t = r.s.low >> bits_in_word_2; 
-    r.s.low &= lower_mask; 
-    t += (b >> bits_in_word_2) * (a & lower_mask); 
-    r.s.low += (t & lower_mask) << bits_in_word_2; 
-    r.s.high += t >> bits_in_word_2; 
-    r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); 
-    return r.all; 
-} 
- 
-/* Returns: a * b */ 
- 
-ARM_EABI_FNALIAS(lmul, muldi3) 
- 
-COMPILER_RT_ABI di_int 
-__muldi3(di_int a, di_int b) 
-{ 
-    dwords x; 
-    x.all = a; 
-    dwords y; 
-    y.all = b; 
-    dwords r; 
-    r.all = __muldsi3(x.s.low, y.s.low); 
-    r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; 
-    return r.all; 
-} 
+/* ===-- muldi3.c - Implement __muldi3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muldi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+static
+di_int
+__muldsi3(su_int a, su_int b)
+{
+    dwords r;
+    const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
+    const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
+    r.s.low = (a & lower_mask) * (b & lower_mask);
+    su_int t = r.s.low >> bits_in_word_2;
+    r.s.low &= lower_mask;
+    t += (a >> bits_in_word_2) * (b & lower_mask);
+    r.s.low += (t & lower_mask) << bits_in_word_2;
+    r.s.high = t >> bits_in_word_2;
+    t = r.s.low >> bits_in_word_2;
+    r.s.low &= lower_mask;
+    t += (b >> bits_in_word_2) * (a & lower_mask);
+    r.s.low += (t & lower_mask) << bits_in_word_2;
+    r.s.high += t >> bits_in_word_2;
+    r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
+    return r.all;
+}
+
+/* Returns: a * b */
+
+ARM_EABI_FNALIAS(lmul, muldi3)
+
+COMPILER_RT_ABI di_int
+__muldi3(di_int a, di_int b)
+{
+    dwords x;
+    x.all = a;
+    dwords y;
+    y.all = b;
+    dwords r;
+    r.all = __muldsi3(x.s.low, y.s.low);
+    r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+    return r.all;
+}
diff --git a/contrib/libs/cxxsupp/builtins/mulodi4.c b/contrib/libs/cxxsupp/builtins/mulodi4.c
index b97d99ce0f..d2fd7db2bc 100644
--- a/contrib/libs/cxxsupp/builtins/mulodi4.c
+++ b/contrib/libs/cxxsupp/builtins/mulodi4.c
@@ -1,58 +1,58 @@
-/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulodi4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a * b */ 
- 
-/* Effects: sets *overflow to 1  if a * b overflows */ 
- 
-COMPILER_RT_ABI di_int 
-__mulodi4(di_int a, di_int b, int* overflow) 
-{ 
-    const int N = (int)(sizeof(di_int) * CHAR_BIT); 
-    const di_int MIN = (di_int)1 << (N-1); 
-    const di_int MAX = ~MIN; 
-    *overflow = 0;  
-    di_int result = a * b; 
-    if (a == MIN) 
-    { 
-        if (b != 0 && b != 1) 
-	    *overflow = 1; 
-	return result; 
-    } 
-    if (b == MIN) 
-    { 
-        if (a != 0 && a != 1) 
-	    *overflow = 1; 
-        return result; 
-    } 
-    di_int sa = a >> (N - 1); 
-    di_int abs_a = (a ^ sa) - sa; 
-    di_int sb = b >> (N - 1); 
-    di_int abs_b = (b ^ sb) - sb; 
-    if (abs_a < 2 || abs_b < 2) 
-        return result; 
-    if (sa == sb) 
-    { 
-        if (abs_a > MAX / abs_b) 
-            *overflow = 1; 
-    } 
-    else 
-    { 
-        if (abs_a > MIN / -abs_b) 
-            *overflow = 1; 
-    } 
-    return result; 
-} 
+/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulodi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1  if a * b overflows */
+
+COMPILER_RT_ABI di_int
+__mulodi4(di_int a, di_int b, int* overflow)
+{
+    const int N = (int)(sizeof(di_int) * CHAR_BIT);
+    const di_int MIN = (di_int)1 << (N-1);
+    const di_int MAX = ~MIN;
+    *overflow = 0; 
+    di_int result = a * b;
+    if (a == MIN)
+    {
+        if (b != 0 && b != 1)
+	    *overflow = 1;
+	return result;
+    }
+    if (b == MIN)
+    {
+        if (a != 0 && a != 1)
+	    *overflow = 1;
+        return result;
+    }
+    di_int sa = a >> (N - 1);
+    di_int abs_a = (a ^ sa) - sa;
+    di_int sb = b >> (N - 1);
+    di_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return result;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            *overflow = 1;
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            *overflow = 1;
+    }
+    return result;
+}
diff --git a/contrib/libs/cxxsupp/builtins/mulosi4.c b/contrib/libs/cxxsupp/builtins/mulosi4.c
index 69ab084f9d..422528085c 100644
--- a/contrib/libs/cxxsupp/builtins/mulosi4.c
+++ b/contrib/libs/cxxsupp/builtins/mulosi4.c
@@ -1,58 +1,58 @@
-/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulosi4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a * b */ 
- 
-/* Effects: sets *overflow to 1  if a * b overflows */ 
- 
-COMPILER_RT_ABI si_int 
-__mulosi4(si_int a, si_int b, int* overflow) 
-{ 
-    const int N = (int)(sizeof(si_int) * CHAR_BIT); 
-    const si_int MIN = (si_int)1 << (N-1); 
-    const si_int MAX = ~MIN; 
-    *overflow = 0;  
-    si_int result = a * b; 
-    if (a == MIN) 
-    { 
-        if (b != 0 && b != 1) 
-	    *overflow = 1; 
-	return result; 
-    } 
-    if (b == MIN) 
-    { 
-        if (a != 0 && a != 1) 
-	    *overflow = 1; 
-        return result; 
-    } 
-    si_int sa = a >> (N - 1); 
-    si_int abs_a = (a ^ sa) - sa; 
-    si_int sb = b >> (N - 1); 
-    si_int abs_b = (b ^ sb) - sb; 
-    if (abs_a < 2 || abs_b < 2) 
-        return result; 
-    if (sa == sb) 
-    { 
-        if (abs_a > MAX / abs_b) 
-            *overflow = 1; 
-    } 
-    else 
-    { 
-        if (abs_a > MIN / -abs_b) 
-            *overflow = 1; 
-    } 
-    return result; 
-} 
+/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulosi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1  if a * b overflows */
+
+COMPILER_RT_ABI si_int
+__mulosi4(si_int a, si_int b, int* overflow)
+{
+    const int N = (int)(sizeof(si_int) * CHAR_BIT);
+    const si_int MIN = (si_int)1 << (N-1);
+    const si_int MAX = ~MIN;
+    *overflow = 0; 
+    si_int result = a * b;
+    if (a == MIN)
+    {
+        if (b != 0 && b != 1)
+	    *overflow = 1;
+	return result;
+    }
+    if (b == MIN)
+    {
+        if (a != 0 && a != 1)
+	    *overflow = 1;
+        return result;
+    }
+    si_int sa = a >> (N - 1);
+    si_int abs_a = (a ^ sa) - sa;
+    si_int sb = b >> (N - 1);
+    si_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return result;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            *overflow = 1;
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            *overflow = 1;
+    }
+    return result;
+}
diff --git a/contrib/libs/cxxsupp/builtins/muloti4.c b/contrib/libs/cxxsupp/builtins/muloti4.c
index 67fcbcc6f7..aef8207aaa 100644
--- a/contrib/libs/cxxsupp/builtins/muloti4.c
+++ b/contrib/libs/cxxsupp/builtins/muloti4.c
@@ -1,63 +1,63 @@
-/*===-- muloti4.c - Implement __muloti4 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __muloti4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a * b */ 
- 
-/* Effects: sets *overflow to 1  if a * b overflows */ 
- 
+/*===-- muloti4.c - Implement __muloti4 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muloti4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1  if a * b overflows */
+
 __attribute__((no_sanitize("undefined")))
-COMPILER_RT_ABI ti_int 
-__muloti4(ti_int a, ti_int b, int* overflow) 
-{ 
-    const int N = (int)(sizeof(ti_int) * CHAR_BIT); 
-    const ti_int MIN = (ti_int)1 << (N-1); 
-    const ti_int MAX = ~MIN; 
-    *overflow = 0; 
-    ti_int result = a * b; 
-    if (a == MIN) 
-    { 
-        if (b != 0 && b != 1) 
-	    *overflow = 1; 
-	return result; 
-    } 
-    if (b == MIN) 
-    { 
-        if (a != 0 && a != 1) 
-	    *overflow = 1; 
-        return result; 
-    } 
-    ti_int sa = a >> (N - 1); 
-    ti_int abs_a = (a ^ sa) - sa; 
-    ti_int sb = b >> (N - 1); 
-    ti_int abs_b = (b ^ sb) - sb; 
-    if (abs_a < 2 || abs_b < 2) 
-        return result; 
-    if (sa == sb) 
-    { 
-        if (abs_a > MAX / abs_b) 
-            *overflow = 1; 
-    } 
-    else 
-    { 
-        if (abs_a > MIN / -abs_b) 
-            *overflow = 1; 
-    } 
-    return result; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+COMPILER_RT_ABI ti_int
+__muloti4(ti_int a, ti_int b, int* overflow)
+{
+    const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+    const ti_int MIN = (ti_int)1 << (N-1);
+    const ti_int MAX = ~MIN;
+    *overflow = 0;
+    ti_int result = a * b;
+    if (a == MIN)
+    {
+        if (b != 0 && b != 1)
+	    *overflow = 1;
+	return result;
+    }
+    if (b == MIN)
+    {
+        if (a != 0 && a != 1)
+	    *overflow = 1;
+        return result;
+    }
+    ti_int sa = a >> (N - 1);
+    ti_int abs_a = (a ^ sa) - sa;
+    ti_int sb = b >> (N - 1);
+    ti_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return result;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            *overflow = 1;
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            *overflow = 1;
+    }
+    return result;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/mulsc3.c b/contrib/libs/cxxsupp/builtins/mulsc3.c
index 270c6de820..c89cfd247a 100644
--- a/contrib/libs/cxxsupp/builtins/mulsc3.c
+++ b/contrib/libs/cxxsupp/builtins/mulsc3.c
@@ -1,73 +1,73 @@
-/* ===-- mulsc3.c - Implement __mulsc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulsc3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the product of a + ib and c + id */ 
- 
-COMPILER_RT_ABI Fcomplex 
-__mulsc3(float __a, float __b, float __c, float __d) 
-{ 
-    float __ac = __a * __c; 
-    float __bd = __b * __d; 
-    float __ad = __a * __d; 
-    float __bc = __b * __c; 
-    Fcomplex z; 
-    COMPLEX_REAL(z) = __ac - __bd; 
-    COMPLEX_IMAGINARY(z) = __ad + __bc; 
-    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) 
-    { 
-        int __recalc = 0; 
-        if (crt_isinf(__a) || crt_isinf(__b)) 
-        { 
-            __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); 
-            __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); 
-            if (crt_isnan(__c)) 
-                __c = crt_copysignf(0, __c); 
-            if (crt_isnan(__d)) 
-                __d = crt_copysignf(0, __d); 
-            __recalc = 1; 
-        } 
-        if (crt_isinf(__c) || crt_isinf(__d)) 
-        { 
-            __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); 
-            __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); 
-            if (crt_isnan(__a)) 
-                __a = crt_copysignf(0, __a); 
-            if (crt_isnan(__b)) 
-                __b = crt_copysignf(0, __b); 
-            __recalc = 1; 
-        } 
-        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || 
-                          crt_isinf(__ad) || crt_isinf(__bc))) 
-        { 
-            if (crt_isnan(__a)) 
-                __a = crt_copysignf(0, __a); 
-            if (crt_isnan(__b)) 
-                __b = crt_copysignf(0, __b); 
-            if (crt_isnan(__c)) 
-                __c = crt_copysignf(0, __c); 
-            if (crt_isnan(__d)) 
-                __d = crt_copysignf(0, __d); 
-            __recalc = 1; 
-        } 
-        if (__recalc) 
-        { 
-            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); 
-            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); 
-        } 
-    } 
-    return z; 
-} 
+/* ===-- mulsc3.c - Implement __mulsc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulsc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI Fcomplex
+__mulsc3(float __a, float __b, float __c, float __d)
+{
+    float __ac = __a * __c;
+    float __bd = __b * __d;
+    float __ad = __a * __d;
+    float __bc = __b * __c;
+    Fcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+    {
+        int __recalc = 0;
+        if (crt_isinf(__a) || crt_isinf(__b))
+        {
+            __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignf(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignf(0, __d);
+            __recalc = 1;
+        }
+        if (crt_isinf(__c) || crt_isinf(__d))
+        {
+            __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysignf(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignf(0, __b);
+            __recalc = 1;
+        }
+        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+                          crt_isinf(__ad) || crt_isinf(__bc)))
+        {
+            if (crt_isnan(__a))
+                __a = crt_copysignf(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignf(0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignf(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignf(0, __d);
+            __recalc = 1;
+        }
+        if (__recalc)
+        {
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
+        }
+    }
+    return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/mulsf3.c b/contrib/libs/cxxsupp/builtins/mulsf3.c
index 7c7f94531e..478b3bc0e0 100644
--- a/contrib/libs/cxxsupp/builtins/mulsf3.c
+++ b/contrib/libs/cxxsupp/builtins/mulsf3.c
@@ -1,22 +1,22 @@
-//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements single-precision soft-float multiplication 
-// with the IEEE-754 default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_mul_impl.inc" 
- 
-ARM_EABI_FNALIAS(fmul, mulsf3) 
- 
-COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { 
-    return __mulXf3__(a, b); 
-} 
+//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_mul_impl.inc"
+
+ARM_EABI_FNALIAS(fmul, mulsf3)
+
+COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) {
+    return __mulXf3__(a, b);
+}
diff --git a/contrib/libs/cxxsupp/builtins/multc3.c b/contrib/libs/cxxsupp/builtins/multc3.c
index aabe0634b3..0518bc2569 100644
--- a/contrib/libs/cxxsupp/builtins/multc3.c
+++ b/contrib/libs/cxxsupp/builtins/multc3.c
@@ -1,68 +1,68 @@
-/* ===-- multc3.c - Implement __multc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __multc3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the product of a + ib and c + id */ 
- 
-COMPILER_RT_ABI long double _Complex 
-__multc3(long double a, long double b, long double c, long double d) 
-{ 
-    long double ac = a * c; 
-    long double bd = b * d; 
-    long double ad = a * d; 
-    long double bc = b * c; 
-    long double _Complex z; 
-    __real__ z = ac - bd; 
-    __imag__ z = ad + bc; 
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) { 
-        int recalc = 0; 
-        if (crt_isinf(a) || crt_isinf(b)) { 
-            a = crt_copysignl(crt_isinf(a) ? 1 : 0, a); 
-            b = crt_copysignl(crt_isinf(b) ? 1 : 0, b); 
-            if (crt_isnan(c)) 
-                c = crt_copysignl(0, c); 
-            if (crt_isnan(d)) 
-                d = crt_copysignl(0, d); 
-            recalc = 1; 
-        } 
-        if (crt_isinf(c) || crt_isinf(d)) { 
-            c = crt_copysignl(crt_isinf(c) ? 1 : 0, c); 
-            d = crt_copysignl(crt_isinf(d) ? 1 : 0, d); 
-            if (crt_isnan(a)) 
-                a = crt_copysignl(0, a); 
-            if (crt_isnan(b)) 
-                b = crt_copysignl(0, b); 
-            recalc = 1; 
-        } 
-        if (!recalc && (crt_isinf(ac) || crt_isinf(bd) || 
-                          crt_isinf(ad) || crt_isinf(bc))) { 
-            if (crt_isnan(a)) 
-                a = crt_copysignl(0, a); 
-            if (crt_isnan(b)) 
-                b = crt_copysignl(0, b); 
-            if (crt_isnan(c)) 
-                c = crt_copysignl(0, c); 
-            if (crt_isnan(d)) 
-                d = crt_copysignl(0, d); 
-            recalc = 1; 
-        } 
-        if (recalc) { 
-            __real__ z = CRT_INFINITY * (a * c - b * d); 
-            __imag__ z = CRT_INFINITY * (a * d + b * c); 
-        } 
-    } 
-    return z; 
-} 
+/* ===-- multc3.c - Implement __multc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __multc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI long double _Complex
+__multc3(long double a, long double b, long double c, long double d)
+{
+    long double ac = a * c;
+    long double bd = b * d;
+    long double ad = a * d;
+    long double bc = b * c;
+    long double _Complex z;
+    __real__ z = ac - bd;
+    __imag__ z = ad + bc;
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) {
+        int recalc = 0;
+        if (crt_isinf(a) || crt_isinf(b)) {
+            a = crt_copysignl(crt_isinf(a) ? 1 : 0, a);
+            b = crt_copysignl(crt_isinf(b) ? 1 : 0, b);
+            if (crt_isnan(c))
+                c = crt_copysignl(0, c);
+            if (crt_isnan(d))
+                d = crt_copysignl(0, d);
+            recalc = 1;
+        }
+        if (crt_isinf(c) || crt_isinf(d)) {
+            c = crt_copysignl(crt_isinf(c) ? 1 : 0, c);
+            d = crt_copysignl(crt_isinf(d) ? 1 : 0, d);
+            if (crt_isnan(a))
+                a = crt_copysignl(0, a);
+            if (crt_isnan(b))
+                b = crt_copysignl(0, b);
+            recalc = 1;
+        }
+        if (!recalc && (crt_isinf(ac) || crt_isinf(bd) ||
+                          crt_isinf(ad) || crt_isinf(bc))) {
+            if (crt_isnan(a))
+                a = crt_copysignl(0, a);
+            if (crt_isnan(b))
+                b = crt_copysignl(0, b);
+            if (crt_isnan(c))
+                c = crt_copysignl(0, c);
+            if (crt_isnan(d))
+                d = crt_copysignl(0, d);
+            recalc = 1;
+        }
+        if (recalc) {
+            __real__ z = CRT_INFINITY * (a * c - b * d);
+            __imag__ z = CRT_INFINITY * (a * d + b * c);
+        }
+    }
+    return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/multf3.c b/contrib/libs/cxxsupp/builtins/multf3.c
index 3df42e3c2a..0b915923ea 100644
--- a/contrib/libs/cxxsupp/builtins/multf3.c
+++ b/contrib/libs/cxxsupp/builtins/multf3.c
@@ -1,25 +1,25 @@
-//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements quad-precision soft-float multiplication 
-// with the IEEE-754 default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-#include "fp_mul_impl.inc" 
- 
-COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { 
-    return __mulXf3__(a, b); 
-} 
- 
-#endif 
+//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) {
+    return __mulXf3__(a, b);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/multi3.c b/contrib/libs/cxxsupp/builtins/multi3.c
index 5b3f558713..e0d52d430b 100644
--- a/contrib/libs/cxxsupp/builtins/multi3.c
+++ b/contrib/libs/cxxsupp/builtins/multi3.c
@@ -1,58 +1,58 @@
-/* ===-- multi3.c - Implement __multi3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- 
- * This file implements __multi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a * b */ 
- 
-static 
-ti_int 
-__mulddi3(du_int a, du_int b) 
-{ 
-    twords r; 
-    const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; 
-    const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; 
-    r.s.low = (a & lower_mask) * (b & lower_mask); 
-    du_int t = r.s.low >> bits_in_dword_2; 
-    r.s.low &= lower_mask; 
-    t += (a >> bits_in_dword_2) * (b & lower_mask); 
-    r.s.low += (t & lower_mask) << bits_in_dword_2; 
-    r.s.high = t >> bits_in_dword_2; 
-    t = r.s.low >> bits_in_dword_2; 
-    r.s.low &= lower_mask; 
-    t += (b >> bits_in_dword_2) * (a & lower_mask); 
-    r.s.low += (t & lower_mask) << bits_in_dword_2; 
-    r.s.high += t >> bits_in_dword_2; 
-    r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); 
-    return r.all; 
-} 
- 
-/* Returns: a * b */ 
- 
-COMPILER_RT_ABI ti_int 
-__multi3(ti_int a, ti_int b) 
-{ 
-    twords x; 
-    x.all = a; 
-    twords y; 
-    y.all = b; 
-    twords r; 
-    r.all = __mulddi3(x.s.low, y.s.low); 
-    r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; 
-    return r.all; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- multi3.c - Implement __multi3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+
+ * This file implements __multi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a * b */
+
+static
+ti_int
+__mulddi3(du_int a, du_int b)
+{
+    twords r;
+    const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2;
+    const du_int lower_mask = (du_int)~0 >> bits_in_dword_2;
+    r.s.low = (a & lower_mask) * (b & lower_mask);
+    du_int t = r.s.low >> bits_in_dword_2;
+    r.s.low &= lower_mask;
+    t += (a >> bits_in_dword_2) * (b & lower_mask);
+    r.s.low += (t & lower_mask) << bits_in_dword_2;
+    r.s.high = t >> bits_in_dword_2;
+    t = r.s.low >> bits_in_dword_2;
+    r.s.low &= lower_mask;
+    t += (b >> bits_in_dword_2) * (a & lower_mask);
+    r.s.low += (t & lower_mask) << bits_in_dword_2;
+    r.s.high += t >> bits_in_dword_2;
+    r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
+    return r.all;
+}
+
+/* Returns: a * b */
+
+COMPILER_RT_ABI ti_int
+__multi3(ti_int a, ti_int b)
+{
+    twords x;
+    x.all = a;
+    twords y;
+    y.all = b;
+    twords r;
+    r.all = __mulddi3(x.s.low, y.s.low);
+    r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+    return r.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/mulvdi3.c b/contrib/libs/cxxsupp/builtins/mulvdi3.c
index 69095e0241..e63249e0a0 100644
--- a/contrib/libs/cxxsupp/builtins/mulvdi3.c
+++ b/contrib/libs/cxxsupp/builtins/mulvdi3.c
@@ -1,56 +1,56 @@
-/*===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulvdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a * b */ 
- 
-/* Effects: aborts if a * b overflows */ 
- 
-COMPILER_RT_ABI di_int 
-__mulvdi3(di_int a, di_int b) 
-{ 
-    const int N = (int)(sizeof(di_int) * CHAR_BIT); 
-    const di_int MIN = (di_int)1 << (N-1); 
-    const di_int MAX = ~MIN; 
-    if (a == MIN) 
-    { 
-        if (b == 0 || b == 1) 
-            return a * b; 
-        compilerrt_abort(); 
-    } 
-    if (b == MIN) 
-    { 
-        if (a == 0 || a == 1) 
-            return a * b; 
-        compilerrt_abort(); 
-    } 
-    di_int sa = a >> (N - 1); 
-    di_int abs_a = (a ^ sa) - sa; 
-    di_int sb = b >> (N - 1); 
-    di_int abs_b = (b ^ sb) - sb; 
-    if (abs_a < 2 || abs_b < 2) 
-        return a * b; 
-    if (sa == sb) 
-    { 
-        if (abs_a > MAX / abs_b) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (abs_a > MIN / -abs_b) 
-            compilerrt_abort(); 
-    } 
-    return a * b; 
-} 
+/*===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulvdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: aborts if a * b overflows */
+
+COMPILER_RT_ABI di_int
+__mulvdi3(di_int a, di_int b)
+{
+    const int N = (int)(sizeof(di_int) * CHAR_BIT);
+    const di_int MIN = (di_int)1 << (N-1);
+    const di_int MAX = ~MIN;
+    if (a == MIN)
+    {
+        if (b == 0 || b == 1)
+            return a * b;
+        compilerrt_abort();
+    }
+    if (b == MIN)
+    {
+        if (a == 0 || a == 1)
+            return a * b;
+        compilerrt_abort();
+    }
+    di_int sa = a >> (N - 1);
+    di_int abs_a = (a ^ sa) - sa;
+    di_int sb = b >> (N - 1);
+    di_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return a * b;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            compilerrt_abort();
+    }
+    return a * b;
+}
diff --git a/contrib/libs/cxxsupp/builtins/mulvsi3.c b/contrib/libs/cxxsupp/builtins/mulvsi3.c
index 210a20138e..74ea4f2da2 100644
--- a/contrib/libs/cxxsupp/builtins/mulvsi3.c
+++ b/contrib/libs/cxxsupp/builtins/mulvsi3.c
@@ -1,56 +1,56 @@
-/* ===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulvsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a * b */ 
- 
-/* Effects: aborts if a * b overflows */ 
- 
-COMPILER_RT_ABI si_int 
-__mulvsi3(si_int a, si_int b) 
-{ 
-    const int N = (int)(sizeof(si_int) * CHAR_BIT); 
-    const si_int MIN = (si_int)1 << (N-1); 
-    const si_int MAX = ~MIN; 
-    if (a == MIN) 
-    { 
-        if (b == 0 || b == 1) 
-            return a * b; 
-        compilerrt_abort(); 
-    } 
-    if (b == MIN) 
-    { 
-        if (a == 0 || a == 1) 
-            return a * b; 
-        compilerrt_abort(); 
-    } 
-    si_int sa = a >> (N - 1); 
-    si_int abs_a = (a ^ sa) - sa; 
-    si_int sb = b >> (N - 1); 
-    si_int abs_b = (b ^ sb) - sb; 
-    if (abs_a < 2 || abs_b < 2) 
-        return a * b; 
-    if (sa == sb) 
-    { 
-        if (abs_a > MAX / abs_b) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (abs_a > MIN / -abs_b) 
-            compilerrt_abort(); 
-    } 
-    return a * b; 
-} 
+/* ===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulvsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: aborts if a * b overflows */
+
+COMPILER_RT_ABI si_int
+__mulvsi3(si_int a, si_int b)
+{
+    const int N = (int)(sizeof(si_int) * CHAR_BIT);
+    const si_int MIN = (si_int)1 << (N-1);
+    const si_int MAX = ~MIN;
+    if (a == MIN)
+    {
+        if (b == 0 || b == 1)
+            return a * b;
+        compilerrt_abort();
+    }
+    if (b == MIN)
+    {
+        if (a == 0 || a == 1)
+            return a * b;
+        compilerrt_abort();
+    }
+    si_int sa = a >> (N - 1);
+    si_int abs_a = (a ^ sa) - sa;
+    si_int sb = b >> (N - 1);
+    si_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return a * b;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            compilerrt_abort();
+    }
+    return a * b;
+}
diff --git a/contrib/libs/cxxsupp/builtins/mulvti3.c b/contrib/libs/cxxsupp/builtins/mulvti3.c
index e8e817a2df..f4c7d1612b 100644
--- a/contrib/libs/cxxsupp/builtins/mulvti3.c
+++ b/contrib/libs/cxxsupp/builtins/mulvti3.c
@@ -1,60 +1,60 @@
-/* ===-- mulvti3.c - Implement __mulvti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulvti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a * b */ 
- 
-/* Effects: aborts if a * b overflows */ 
- 
-COMPILER_RT_ABI ti_int 
-__mulvti3(ti_int a, ti_int b) 
-{ 
-    const int N = (int)(sizeof(ti_int) * CHAR_BIT); 
-    const ti_int MIN = (ti_int)1 << (N-1); 
-    const ti_int MAX = ~MIN; 
-    if (a == MIN) 
-    { 
-        if (b == 0 || b == 1) 
-            return a * b; 
-        compilerrt_abort(); 
-    } 
-    if (b == MIN) 
-    { 
-        if (a == 0 || a == 1) 
-            return a * b; 
-        compilerrt_abort(); 
-    } 
-    ti_int sa = a >> (N - 1); 
-    ti_int abs_a = (a ^ sa) - sa; 
-    ti_int sb = b >> (N - 1); 
-    ti_int abs_b = (b ^ sb) - sb; 
-    if (abs_a < 2 || abs_b < 2) 
-        return a * b; 
-    if (sa == sb) 
-    { 
-        if (abs_a > MAX / abs_b) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (abs_a > MIN / -abs_b) 
-            compilerrt_abort(); 
-    } 
-    return a * b; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- mulvti3.c - Implement __mulvti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulvti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a * b */
+
+/* Effects: aborts if a * b overflows */
+
+COMPILER_RT_ABI ti_int
+__mulvti3(ti_int a, ti_int b)
+{
+    const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+    const ti_int MIN = (ti_int)1 << (N-1);
+    const ti_int MAX = ~MIN;
+    if (a == MIN)
+    {
+        if (b == 0 || b == 1)
+            return a * b;
+        compilerrt_abort();
+    }
+    if (b == MIN)
+    {
+        if (a == 0 || a == 1)
+            return a * b;
+        compilerrt_abort();
+    }
+    ti_int sa = a >> (N - 1);
+    ti_int abs_a = (a ^ sa) - sa;
+    ti_int sb = b >> (N - 1);
+    ti_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return a * b;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            compilerrt_abort();
+    }
+    return a * b;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/mulxc3.c b/contrib/libs/cxxsupp/builtins/mulxc3.c
index ba615ddf5f..ba32216918 100644
--- a/contrib/libs/cxxsupp/builtins/mulxc3.c
+++ b/contrib/libs/cxxsupp/builtins/mulxc3.c
@@ -1,77 +1,77 @@
-/* ===-- mulxc3.c - Implement __mulxc3 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __mulxc3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
-#include "int_math.h" 
- 
-/* Returns: the product of a + ib and c + id */ 
- 
-COMPILER_RT_ABI Lcomplex 
-__mulxc3(long double __a, long double __b, long double __c, long double __d) 
-{ 
-    long double __ac = __a * __c; 
-    long double __bd = __b * __d; 
-    long double __ad = __a * __d; 
-    long double __bc = __b * __c; 
-    Lcomplex z; 
-    COMPLEX_REAL(z) = __ac - __bd; 
-    COMPLEX_IMAGINARY(z) = __ad + __bc; 
-    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) 
-    { 
-        int __recalc = 0; 
-        if (crt_isinf(__a) || crt_isinf(__b)) 
-        { 
-            __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); 
-            __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); 
-            if (crt_isnan(__c)) 
-                __c = crt_copysignl(0, __c); 
-            if (crt_isnan(__d)) 
-                __d = crt_copysignl(0, __d); 
-            __recalc = 1; 
-        } 
-        if (crt_isinf(__c) || crt_isinf(__d)) 
-        { 
-            __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); 
-            __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); 
-            if (crt_isnan(__a)) 
-                __a = crt_copysignl(0, __a); 
-            if (crt_isnan(__b)) 
-                __b = crt_copysignl(0, __b); 
-            __recalc = 1; 
-        } 
-        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || 
-                          crt_isinf(__ad) || crt_isinf(__bc))) 
-        { 
-            if (crt_isnan(__a)) 
-                __a = crt_copysignl(0, __a); 
-            if (crt_isnan(__b)) 
-                __b = crt_copysignl(0, __b); 
-            if (crt_isnan(__c)) 
-                __c = crt_copysignl(0, __c); 
-            if (crt_isnan(__d)) 
-                __d = crt_copysignl(0, __d); 
-            __recalc = 1; 
-        } 
-        if (__recalc) 
-        { 
-            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); 
-            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); 
-        } 
-    } 
-    return z; 
-} 
- 
-#endif 
+/* ===-- mulxc3.c - Implement __mulxc3 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulxc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI Lcomplex
+__mulxc3(long double __a, long double __b, long double __c, long double __d)
+{
+    long double __ac = __a * __c;
+    long double __bd = __b * __d;
+    long double __ad = __a * __d;
+    long double __bc = __b * __c;
+    Lcomplex z;
+    COMPLEX_REAL(z) = __ac - __bd;
+    COMPLEX_IMAGINARY(z) = __ad + __bc;
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+    {
+        int __recalc = 0;
+        if (crt_isinf(__a) || crt_isinf(__b))
+        {
+            __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignl(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignl(0, __d);
+            __recalc = 1;
+        }
+        if (crt_isinf(__c) || crt_isinf(__d))
+        {
+            __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysignl(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignl(0, __b);
+            __recalc = 1;
+        }
+        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+                          crt_isinf(__ad) || crt_isinf(__bc)))
+        {
+            if (crt_isnan(__a))
+                __a = crt_copysignl(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignl(0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignl(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignl(0, __d);
+            __recalc = 1;
+        }
+        if (__recalc)
+        {
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
+        }
+    }
+    return z;
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/negdf2.c b/contrib/libs/cxxsupp/builtins/negdf2.c
index 39a6de087e..d634b421cb 100644
--- a/contrib/libs/cxxsupp/builtins/negdf2.c
+++ b/contrib/libs/cxxsupp/builtins/negdf2.c
@@ -1,22 +1,22 @@
-//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements double-precision soft-float negation. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(dneg, negdf2) 
- 
-COMPILER_RT_ABI fp_t 
-__negdf2(fp_t a) { 
-    return fromRep(toRep(a) ^ signBit); 
-} 
+//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float negation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(dneg, negdf2)
+
+COMPILER_RT_ABI fp_t
+__negdf2(fp_t a) {
+    return fromRep(toRep(a) ^ signBit);
+}
diff --git a/contrib/libs/cxxsupp/builtins/negdi2.c b/contrib/libs/cxxsupp/builtins/negdi2.c
index d8e579e049..3d49ba2899 100644
--- a/contrib/libs/cxxsupp/builtins/negdi2.c
+++ b/contrib/libs/cxxsupp/builtins/negdi2.c
@@ -1,26 +1,26 @@
-/* ===-- negdi2.c - Implement __negdi2 -------------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __negdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: -a */ 
- 
-COMPILER_RT_ABI di_int 
-__negdi2(di_int a) 
-{ 
-    /* Note: this routine is here for API compatibility; any sane compiler 
-     * should expand it inline. 
-     */ 
-    return -a; 
-} 
+/* ===-- negdi2.c - Implement __negdi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: -a */
+
+COMPILER_RT_ABI di_int
+__negdi2(di_int a)
+{
+    /* Note: this routine is here for API compatibility; any sane compiler
+     * should expand it inline.
+     */
+    return -a;
+}
diff --git a/contrib/libs/cxxsupp/builtins/negsf2.c b/contrib/libs/cxxsupp/builtins/negsf2.c
index 2d67569329..29c17be414 100644
--- a/contrib/libs/cxxsupp/builtins/negsf2.c
+++ b/contrib/libs/cxxsupp/builtins/negsf2.c
@@ -1,22 +1,22 @@
-//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements single-precision soft-float negation. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(fneg, negsf2) 
- 
-COMPILER_RT_ABI fp_t 
-__negsf2(fp_t a) { 
-    return fromRep(toRep(a) ^ signBit); 
-} 
+//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float negation.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(fneg, negsf2)
+
+COMPILER_RT_ABI fp_t
+__negsf2(fp_t a) {
+    return fromRep(toRep(a) ^ signBit);
+}
diff --git a/contrib/libs/cxxsupp/builtins/negti2.c b/contrib/libs/cxxsupp/builtins/negti2.c
index 084abca497..9b00b303f8 100644
--- a/contrib/libs/cxxsupp/builtins/negti2.c
+++ b/contrib/libs/cxxsupp/builtins/negti2.c
@@ -1,30 +1,30 @@
-/* ===-- negti2.c - Implement __negti2 -------------------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __negti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: -a */ 
- 
-COMPILER_RT_ABI ti_int 
-__negti2(ti_int a) 
-{ 
-    /* Note: this routine is here for API compatibility; any sane compiler 
-     * should expand it inline. 
-     */ 
-    return -a; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- negti2.c - Implement __negti2 -------------------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: -a */
+
+COMPILER_RT_ABI ti_int
+__negti2(ti_int a)
+{
+    /* Note: this routine is here for API compatibility; any sane compiler
+     * should expand it inline.
+     */
+    return -a;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/negvdi2.c b/contrib/libs/cxxsupp/builtins/negvdi2.c
index 9ea5988d2d..e336ecf28f 100644
--- a/contrib/libs/cxxsupp/builtins/negvdi2.c
+++ b/contrib/libs/cxxsupp/builtins/negvdi2.c
@@ -1,28 +1,28 @@
-/* ===-- negvdi2.c - Implement __negvdi2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __negvdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: -a */ 
- 
-/* Effects: aborts if -a overflows */ 
- 
-COMPILER_RT_ABI di_int 
-__negvdi2(di_int a) 
-{ 
-    const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1); 
-    if (a == MIN) 
-        compilerrt_abort(); 
-    return -a; 
-} 
+/* ===-- negvdi2.c - Implement __negvdi2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negvdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: -a */
+
+/* Effects: aborts if -a overflows */
+
+COMPILER_RT_ABI di_int
+__negvdi2(di_int a)
+{
+    const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1);
+    if (a == MIN)
+        compilerrt_abort();
+    return -a;
+}
diff --git a/contrib/libs/cxxsupp/builtins/negvsi2.c b/contrib/libs/cxxsupp/builtins/negvsi2.c
index 065c487ad6..b9e93fef06 100644
--- a/contrib/libs/cxxsupp/builtins/negvsi2.c
+++ b/contrib/libs/cxxsupp/builtins/negvsi2.c
@@ -1,28 +1,28 @@
-/* ===-- negvsi2.c - Implement __negvsi2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __negvsi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: -a */ 
- 
-/* Effects: aborts if -a overflows */ 
- 
-COMPILER_RT_ABI si_int 
-__negvsi2(si_int a) 
-{ 
-    const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1); 
-    if (a == MIN) 
-        compilerrt_abort(); 
-    return -a; 
-} 
+/* ===-- negvsi2.c - Implement __negvsi2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negvsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: -a */
+
+/* Effects: aborts if -a overflows */
+
+COMPILER_RT_ABI si_int
+__negvsi2(si_int a)
+{
+    const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1);
+    if (a == MIN)
+        compilerrt_abort();
+    return -a;
+}
diff --git a/contrib/libs/cxxsupp/builtins/negvti2.c b/contrib/libs/cxxsupp/builtins/negvti2.c
index c5bf2aea2b..85f9f7d19d 100644
--- a/contrib/libs/cxxsupp/builtins/negvti2.c
+++ b/contrib/libs/cxxsupp/builtins/negvti2.c
@@ -1,32 +1,32 @@
-/*===-- negvti2.c - Implement __negvti2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- *===----------------------------------------------------------------------=== 
- * 
- *This file implements __negvti2 for the compiler_rt library. 
- * 
- *===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: -a */ 
- 
-/* Effects: aborts if -a overflows */ 
- 
-COMPILER_RT_ABI ti_int 
-__negvti2(ti_int a) 
-{ 
-    const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1); 
-    if (a == MIN) 
-        compilerrt_abort(); 
-    return -a; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/*===-- negvti2.c - Implement __negvti2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ *This file implements __negvti2 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: -a */
+
+/* Effects: aborts if -a overflows */
+
+COMPILER_RT_ABI ti_int
+__negvti2(ti_int a)
+{
+    const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1);
+    if (a == MIN)
+        compilerrt_abort();
+    return -a;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/paritydi2.c b/contrib/libs/cxxsupp/builtins/paritydi2.c
index f8a248290f..8ea5ab4214 100644
--- a/contrib/libs/cxxsupp/builtins/paritydi2.c
+++ b/contrib/libs/cxxsupp/builtins/paritydi2.c
@@ -1,25 +1,25 @@
-/* ===-- paritydi2.c - Implement __paritydi2 -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __paritydi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: 1 if number of bits is odd else returns 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__paritydi2(di_int a) 
-{ 
-    dwords x; 
-    x.all = a; 
-    return __paritysi2(x.s.high ^ x.s.low); 
-} 
+/* ===-- paritydi2.c - Implement __paritydi2 -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __paritydi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: 1 if number of bits is odd else returns 0 */
+
+COMPILER_RT_ABI si_int
+__paritydi2(di_int a)
+{
+    dwords x;
+    x.all = a;
+    return __paritysi2(x.s.high ^ x.s.low);
+}
diff --git a/contrib/libs/cxxsupp/builtins/paritysi2.c b/contrib/libs/cxxsupp/builtins/paritysi2.c
index 5ea59fd2b6..5999846638 100644
--- a/contrib/libs/cxxsupp/builtins/paritysi2.c
+++ b/contrib/libs/cxxsupp/builtins/paritysi2.c
@@ -1,27 +1,27 @@
-/* ===-- paritysi2.c - Implement __paritysi2 -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __paritysi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: 1 if number of bits is odd else returns 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__paritysi2(si_int a) 
-{ 
-    su_int x = (su_int)a; 
-    x ^= x >> 16; 
-    x ^= x >> 8; 
-    x ^= x >> 4; 
-    return (0x6996 >> (x & 0xF)) & 1; 
-} 
+/* ===-- paritysi2.c - Implement __paritysi2 -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __paritysi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: 1 if number of bits is odd else returns 0 */
+
+COMPILER_RT_ABI si_int
+__paritysi2(si_int a)
+{
+    su_int x = (su_int)a;
+    x ^= x >> 16;
+    x ^= x >> 8;
+    x ^= x >> 4;
+    return (0x6996 >> (x & 0xF)) & 1;
+}
diff --git a/contrib/libs/cxxsupp/builtins/parityti2.c b/contrib/libs/cxxsupp/builtins/parityti2.c
index 385eab0e69..5a4fe49248 100644
--- a/contrib/libs/cxxsupp/builtins/parityti2.c
+++ b/contrib/libs/cxxsupp/builtins/parityti2.c
@@ -1,29 +1,29 @@
-/* ===-- parityti2.c - Implement __parityti2 -------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __parityti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */  
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: 1 if number of bits is odd else returns 0 */ 
- 
-COMPILER_RT_ABI si_int 
-__parityti2(ti_int a) 
-{ 
-    twords x; 
-    x.all = a; 
-    return __paritydi2(x.s.high ^ x.s.low); 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- parityti2.c - Implement __parityti2 -------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __parityti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */ 
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: 1 if number of bits is odd else returns 0 */
+
+COMPILER_RT_ABI si_int
+__parityti2(ti_int a)
+{
+    twords x;
+    x.all = a;
+    return __paritydi2(x.s.high ^ x.s.low);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/popcountdi2.c b/contrib/libs/cxxsupp/builtins/popcountdi2.c
index ee88c79f10..5e8a62f075 100644
--- a/contrib/libs/cxxsupp/builtins/popcountdi2.c
+++ b/contrib/libs/cxxsupp/builtins/popcountdi2.c
@@ -1,36 +1,36 @@
-/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __popcountdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: count of 1 bits */ 
- 
-COMPILER_RT_ABI si_int 
-__popcountdi2(di_int a) 
-{ 
-    du_int x2 = (du_int)a; 
-    x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); 
-    /* Every 2 bits holds the sum of every pair of bits (32) */ 
-    x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); 
-    /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ 
-    x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; 
-    /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ 
-    su_int x = (su_int)(x2 + (x2 >> 32)); 
-    /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ 
-    /*   Upper 32 bits are garbage */ 
-    x = x + (x >> 16); 
-    /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ 
-    /*   Upper 16 bits are garbage */ 
-    return (x + (x >> 8)) & 0x0000007F;  /* (7 significant bits) */ 
-} 
+/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __popcountdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: count of 1 bits */
+
+COMPILER_RT_ABI si_int
+__popcountdi2(di_int a)
+{
+    du_int x2 = (du_int)a;
+    x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
+    /* Every 2 bits holds the sum of every pair of bits (32) */
+    x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
+    /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */
+    x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
+    /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */
+    su_int x = (su_int)(x2 + (x2 >> 32));
+    /* The lower 32 bits hold four 16 bit sums (5 significant bits). */
+    /*   Upper 32 bits are garbage */
+    x = x + (x >> 16);
+    /* The lower 16 bits hold two 32 bit sums (6 significant bits). */
+    /*   Upper 16 bits are garbage */
+    return (x + (x >> 8)) & 0x0000007F;  /* (7 significant bits) */
+}
diff --git a/contrib/libs/cxxsupp/builtins/popcountsi2.c b/contrib/libs/cxxsupp/builtins/popcountsi2.c
index 7ef6d899ee..44544ff498 100644
--- a/contrib/libs/cxxsupp/builtins/popcountsi2.c
+++ b/contrib/libs/cxxsupp/builtins/popcountsi2.c
@@ -1,33 +1,33 @@
-/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __popcountsi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: count of 1 bits */ 
- 
-COMPILER_RT_ABI si_int 
-__popcountsi2(si_int a) 
-{ 
-    su_int x = (su_int)a; 
-    x = x - ((x >> 1) & 0x55555555); 
-    /* Every 2 bits holds the sum of every pair of bits */ 
-    x = ((x >> 2) & 0x33333333) + (x & 0x33333333); 
-    /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ 
-    x = (x + (x >> 4)) & 0x0F0F0F0F; 
-    /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ 
-    x = (x + (x >> 16)); 
-    /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ 
-    /*    Upper 16 bits are garbage */ 
-    return (x + (x >> 8)) & 0x0000003F;  /* (6 significant bits) */ 
-} 
+/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __popcountsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: count of 1 bits */
+
+COMPILER_RT_ABI si_int
+__popcountsi2(si_int a)
+{
+    su_int x = (su_int)a;
+    x = x - ((x >> 1) & 0x55555555);
+    /* Every 2 bits holds the sum of every pair of bits */
+    x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
+    /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */
+    x = (x + (x >> 4)) & 0x0F0F0F0F;
+    /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */
+    x = (x + (x >> 16));
+    /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/
+    /*    Upper 16 bits are garbage */
+    return (x + (x >> 8)) & 0x0000003F;  /* (6 significant bits) */
+}
diff --git a/contrib/libs/cxxsupp/builtins/popcountti2.c b/contrib/libs/cxxsupp/builtins/popcountti2.c
index 0f3ba706da..7451bbb286 100644
--- a/contrib/libs/cxxsupp/builtins/popcountti2.c
+++ b/contrib/libs/cxxsupp/builtins/popcountti2.c
@@ -1,44 +1,44 @@
-/* ===-- popcountti2.c - Implement __popcountti2 ----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __popcountti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: count of 1 bits */ 
- 
-COMPILER_RT_ABI si_int 
-__popcountti2(ti_int a) 
-{ 
-    tu_int x3 = (tu_int)a; 
-    x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) | 
-                                     0x5555555555555555uLL)); 
-    /* Every 2 bits holds the sum of every pair of bits (64) */ 
-    x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) 
-       + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); 
-    /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */ 
-    x3 = (x3 + (x3 >> 4)) 
-       & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); 
-    /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */ 
-    du_int x2 = (du_int)(x3 + (x3 >> 64)); 
-    /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */ 
-    su_int x = (su_int)(x2 + (x2 >> 32)); 
-    /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */ 
-    x = x + (x >> 16); 
-    /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */ 
-    /* Upper 16 bits are garbage */ 
-    return (x + (x >> 8)) & 0xFF;  /* (8 significant bits) */ 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- popcountti2.c - Implement __popcountti2 ----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __popcountti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: count of 1 bits */
+
+COMPILER_RT_ABI si_int
+__popcountti2(ti_int a)
+{
+    tu_int x3 = (tu_int)a;
+    x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) |
+                                     0x5555555555555555uLL));
+    /* Every 2 bits holds the sum of every pair of bits (64) */
+    x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL))
+       + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL));
+    /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */
+    x3 = (x3 + (x3 >> 4))
+       & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL);
+    /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */
+    du_int x2 = (du_int)(x3 + (x3 >> 64));
+    /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */
+    su_int x = (su_int)(x2 + (x2 >> 32));
+    /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */
+    x = x + (x >> 16);
+    /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */
+    /* Upper 16 bits are garbage */
+    return (x + (x >> 8)) & 0xFF;  /* (8 significant bits) */
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/powidf2.c b/contrib/libs/cxxsupp/builtins/powidf2.c
index ec0791358c..ac13b172b0 100644
--- a/contrib/libs/cxxsupp/builtins/powidf2.c
+++ b/contrib/libs/cxxsupp/builtins/powidf2.c
@@ -1,34 +1,34 @@
-/* ===-- powidf2.cpp - Implement __powidf2 ---------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __powidf2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a ^ b */ 
- 
-COMPILER_RT_ABI double 
-__powidf2(double a, si_int b) 
-{ 
-    const int recip = b < 0; 
-    double r = 1; 
-    while (1) 
-    { 
-        if (b & 1) 
-            r *= a; 
-        b /= 2; 
-        if (b == 0) 
-            break; 
-        a *= a; 
-    } 
-    return recip ? 1/r : r; 
-} 
+/* ===-- powidf2.cpp - Implement __powidf2 ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powidf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI double
+__powidf2(double a, si_int b)
+{
+    const int recip = b < 0;
+    double r = 1;
+    while (1)
+    {
+        if (b & 1)
+            r *= a;
+        b /= 2;
+        if (b == 0)
+            break;
+        a *= a;
+    }
+    return recip ? 1/r : r;
+}
diff --git a/contrib/libs/cxxsupp/builtins/powisf2.c b/contrib/libs/cxxsupp/builtins/powisf2.c
index 945618e653..0c400ec6dd 100644
--- a/contrib/libs/cxxsupp/builtins/powisf2.c
+++ b/contrib/libs/cxxsupp/builtins/powisf2.c
@@ -1,34 +1,34 @@
-/*===-- powisf2.cpp - Implement __powisf2 ---------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __powisf2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a ^ b */ 
- 
-COMPILER_RT_ABI float 
-__powisf2(float a, si_int b) 
-{ 
-    const int recip = b < 0; 
-    float r = 1; 
-    while (1) 
-    { 
-        if (b & 1) 
-            r *= a; 
-        b /= 2; 
-        if (b == 0) 
-            break; 
-        a *= a; 
-    } 
-    return recip ? 1/r : r; 
-} 
+/*===-- powisf2.cpp - Implement __powisf2 ---------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powisf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI float
+__powisf2(float a, si_int b)
+{
+    const int recip = b < 0;
+    float r = 1;
+    while (1)
+    {
+        if (b & 1)
+            r *= a;
+        b /= 2;
+        if (b == 0)
+            break;
+        a *= a;
+    }
+    return recip ? 1/r : r;
+}
diff --git a/contrib/libs/cxxsupp/builtins/powitf2.c b/contrib/libs/cxxsupp/builtins/powitf2.c
index f17988f868..172f29f58f 100644
--- a/contrib/libs/cxxsupp/builtins/powitf2.c
+++ b/contrib/libs/cxxsupp/builtins/powitf2.c
@@ -1,38 +1,38 @@
-/* ===-- powitf2.cpp - Implement __powitf2 ---------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __powitf2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#if _ARCH_PPC 
- 
-/* Returns: a ^ b */ 
- 
-COMPILER_RT_ABI long double 
-__powitf2(long double a, si_int b) 
-{ 
-    const int recip = b < 0; 
-    long double r = 1; 
-    while (1) 
-    { 
-        if (b & 1) 
-            r *= a; 
-        b /= 2; 
-        if (b == 0) 
-            break; 
-        a *= a; 
-    } 
-    return recip ? 1/r : r; 
-} 
- 
-#endif 
+/* ===-- powitf2.cpp - Implement __powitf2 ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powitf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#if _ARCH_PPC
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI long double
+__powitf2(long double a, si_int b)
+{
+    const int recip = b < 0;
+    long double r = 1;
+    while (1)
+    {
+        if (b & 1)
+            r *= a;
+        b /= 2;
+        if (b == 0)
+            break;
+        a *= a;
+    }
+    return recip ? 1/r : r;
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/powixf2.c b/contrib/libs/cxxsupp/builtins/powixf2.c
index c2d54db9df..0fd96e503e 100644
--- a/contrib/libs/cxxsupp/builtins/powixf2.c
+++ b/contrib/libs/cxxsupp/builtins/powixf2.c
@@ -1,38 +1,38 @@
-/* ===-- powixf2.cpp - Implement __powixf2 ---------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __powixf2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#if !_ARCH_PPC 
- 
-#include "int_lib.h" 
- 
-/* Returns: a ^ b */ 
- 
-COMPILER_RT_ABI long double 
-__powixf2(long double a, si_int b) 
-{ 
-    const int recip = b < 0; 
-    long double r = 1; 
-    while (1) 
-    { 
-        if (b & 1) 
-            r *= a; 
-        b /= 2; 
-        if (b == 0) 
-            break; 
-        a *= a; 
-    } 
-    return recip ? 1/r : r; 
-} 
- 
-#endif 
+/* ===-- powixf2.cpp - Implement __powixf2 ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powixf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI long double
+__powixf2(long double a, si_int b)
+{
+    const int recip = b < 0;
+    long double r = 1;
+    while (1)
+    {
+        if (b & 1)
+            r *= a;
+        b /= 2;
+        if (b == 0)
+            break;
+        a *= a;
+    }
+    return recip ? 1/r : r;
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/ppc/DD.h b/contrib/libs/cxxsupp/builtins/ppc/DD.h
index 45bbc846ea..3e5f9e58c1 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/DD.h
+++ b/contrib/libs/cxxsupp/builtins/ppc/DD.h
@@ -1,45 +1,45 @@
-#ifndef COMPILERRT_DD_HEADER 
-#define COMPILERRT_DD_HEADER 
- 
-#include "../int_lib.h" 
- 
+#ifndef COMPILERRT_DD_HEADER
+#define COMPILERRT_DD_HEADER
+
+#include "../int_lib.h"
+
+typedef union {
+	long double ld;
+	struct {
+		double hi;
+		double lo;
+	}s;
+} DD;
+
 typedef union { 
-	long double ld; 
-	struct { 
-		double hi; 
-		double lo; 
-	}s; 
-} DD; 
- 
-typedef union {  
-	double d; 
-	uint64_t x; 
-} doublebits; 
- 
-#define LOWORDER(xy,xHi,xLo,yHi,yLo) \ 
-	(((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo)) 
- 
-static __inline ALWAYS_INLINE double local_fabs(double x) { 
-  doublebits result = {.d = x}; 
-  result.x &= UINT64_C(0x7fffffffffffffff); 
-  return result.d; 
-} 
- 
-static __inline ALWAYS_INLINE double high26bits(double x) { 
-  doublebits result = {.d = x}; 
-  result.x &= UINT64_C(0xfffffffff8000000); 
-  return result.d; 
-} 
- 
-static __inline ALWAYS_INLINE int different_sign(double x, double y) { 
-  doublebits xsignbit = {.d = x}, ysignbit = {.d = y}; 
-  int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63); 
-  return result; 
-} 
- 
-long double __gcc_qadd(long double, long double); 
-long double __gcc_qsub(long double, long double); 
-long double __gcc_qmul(long double, long double); 
-long double __gcc_qdiv(long double, long double); 
- 
-#endif /* COMPILERRT_DD_HEADER */ 
+	double d;
+	uint64_t x;
+} doublebits;
+
+#define LOWORDER(xy,xHi,xLo,yHi,yLo) \
+	(((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo))
+
+static __inline ALWAYS_INLINE double local_fabs(double x) {
+  doublebits result = {.d = x};
+  result.x &= UINT64_C(0x7fffffffffffffff);
+  return result.d;
+}
+
+static __inline ALWAYS_INLINE double high26bits(double x) {
+  doublebits result = {.d = x};
+  result.x &= UINT64_C(0xfffffffff8000000);
+  return result.d;
+}
+
+static __inline ALWAYS_INLINE int different_sign(double x, double y) {
+  doublebits xsignbit = {.d = x}, ysignbit = {.d = y};
+  int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
+  return result;
+}
+
+long double __gcc_qadd(long double, long double);
+long double __gcc_qsub(long double, long double);
+long double __gcc_qmul(long double, long double);
+long double __gcc_qdiv(long double, long double);
+
+#endif /* COMPILERRT_DD_HEADER */
diff --git a/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk b/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk
index 6d5af81faf..0adc623aa0 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk
+++ b/contrib/libs/cxxsupp/builtins/ppc/Makefile.mk
@@ -1,20 +1,20 @@
-#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===# 
-# 
-#                     The LLVM Compiler Infrastructure 
-# 
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details. 
-# 
-#===------------------------------------------------------------------------===# 
- 
-ModuleName := builtins 
-SubDirs :=  
-OnlyArchs := ppc 
- 
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) 
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) 
-Implementation := Optimized 
- 
-# FIXME: use automatic dependencies? 
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h) 
+#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := ppc
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/contrib/libs/cxxsupp/builtins/ppc/divtc3.c b/contrib/libs/cxxsupp/builtins/ppc/divtc3.c
index 5d55ab4b1a..8ec41c528a 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/divtc3.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/divtc3.c
@@ -1,91 +1,91 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-#include "DD.h" 
-#include "../int_math.h" 
- 
-#if !defined(CRT_INFINITY) && defined(HUGE_VAL) 
-#define CRT_INFINITY HUGE_VAL 
-#endif /* CRT_INFINITY */ 
- 
-#define makeFinite(x) { \ 
-    (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ 
-    (x).s.lo = 0.0;                                                     \ 
-  } 
- 
-long double _Complex 
-__divtc3(long double a, long double b, long double c, long double d) 
-{ 
-	DD cDD = { .ld = c }; 
-	DD dDD = { .ld = d }; 
-	 
-	int ilogbw = 0; 
-	const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) )); 
-	 
-	if (crt_isfinite(logbw)) 
-	{ 
-		ilogbw = (int)logbw; 
-		 
-		cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw); 
-		cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw); 
-		dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw); 
-		dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw); 
-	} 
-	 
-	const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); 
-	const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld)); 
-	const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld)); 
-	 
-	DD real = { .ld = __gcc_qdiv(realNumerator, denom) }; 
-	DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) }; 
-	 
-	real.s.hi = crt_scalbn(real.s.hi, -ilogbw); 
-	real.s.lo = crt_scalbn(real.s.lo, -ilogbw); 
-	imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw); 
-	imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw); 
-	 
-	if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) 
-	{ 
-		DD aDD = { .ld = a }; 
-		DD bDD = { .ld = b }; 
-		DD rDD = { .ld = denom }; 
-		 
-		if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) || 
-                                          !crt_isnan(bDD.s.hi))) 
-		{ 
-			real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi; 
-			real.s.lo = 0.0; 
-			imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi; 
-			imag.s.lo = 0.0; 
-		} 
-		 
-		else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) && 
-                         crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi)) 
-		{ 
-			makeFinite(aDD); 
-			makeFinite(bDD); 
-			real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi); 
-			real.s.lo = 0.0; 
-			imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi); 
-			imag.s.lo = 0.0; 
-		} 
-		 
-		else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) && 
-                         crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi)) 
-		{ 
-			makeFinite(cDD); 
-			makeFinite(dDD); 
-			real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi)); 
-			real.s.lo = 0.0; 
-			imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi)); 
-			imag.s.lo = 0.0; 
-		} 
-	} 
-	 
-	long double _Complex z; 
-	__real__ z = real.ld; 
-	__imag__ z = imag.ld; 
-	 
-	return z; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#include "DD.h"
+#include "../int_math.h"
+
+#if !defined(CRT_INFINITY) && defined(HUGE_VAL)
+#define CRT_INFINITY HUGE_VAL
+#endif /* CRT_INFINITY */
+
+#define makeFinite(x) { \
+    (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
+    (x).s.lo = 0.0;                                                     \
+  }
+
+long double _Complex
+__divtc3(long double a, long double b, long double c, long double d)
+{
+	DD cDD = { .ld = c };
+	DD dDD = { .ld = d };
+	
+	int ilogbw = 0;
+	const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) ));
+	
+	if (crt_isfinite(logbw))
+	{
+		ilogbw = (int)logbw;
+		
+		cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw);
+		cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw);
+		dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw);
+		dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw);
+	}
+	
+	const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld));
+	const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld));
+	const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld));
+	
+	DD real = { .ld = __gcc_qdiv(realNumerator, denom) };
+	DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) };
+	
+	real.s.hi = crt_scalbn(real.s.hi, -ilogbw);
+	real.s.lo = crt_scalbn(real.s.lo, -ilogbw);
+	imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw);
+	imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw);
+	
+	if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi))
+	{
+		DD aDD = { .ld = a };
+		DD bDD = { .ld = b };
+		DD rDD = { .ld = denom };
+		
+		if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) ||
+                                          !crt_isnan(bDD.s.hi)))
+		{
+			real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi;
+			real.s.lo = 0.0;
+			imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi;
+			imag.s.lo = 0.0;
+		}
+		
+		else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) &&
+                         crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi))
+		{
+			makeFinite(aDD);
+			makeFinite(bDD);
+			real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi);
+			real.s.lo = 0.0;
+			imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi);
+			imag.s.lo = 0.0;
+		}
+		
+		else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) &&
+                         crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi))
+		{
+			makeFinite(cDD);
+			makeFinite(dDD);
+			real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi));
+			real.s.lo = 0.0;
+			imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi));
+			imag.s.lo = 0.0;
+		}
+	}
+	
+	long double _Complex z;
+	__real__ z = real.ld;
+	__imag__ z = imag.ld;
+	
+	return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c b/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c
index 77112905de..2c7c0f8e27 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/fixtfdi.c
@@ -1,104 +1,104 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* int64_t __fixunstfdi(long double x); 
- * This file implements the PowerPC 128-bit double-double -> int64_t conversion 
- */ 
- 
-#include "DD.h" 
-#include "../int_math.h" 
- 
-uint64_t __fixtfdi(long double input) 
-{ 
-	const DD x = { .ld = input }; 
-	const doublebits hibits = { .d = x.s.hi }; 
-	 
-	const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); 
-	const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); 
-	 
-	/* If (1.0 - tiny) <= input < 0x1.0p63: */ 
-	if (UINT32_C(0x03f00000) > absHighWordMinusOne) 
-	{ 
-		/* Do an unsigned conversion of the absolute value, then restore the sign. */ 
-		const int unbiasedHeadExponent = absHighWordMinusOne >> 20; 
-		 
-		int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */ 
-		result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ 
-		result <<= 10; /* mantissa(hi) with one zero preceding bit. */ 
-		 
-		const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; 
-		 
-		/* If the tail is non-zero, we need to patch in the tail bits. */ 
-		if (0.0 != x.s.lo) 
-		{ 
-			const doublebits lobits = { .d = x.s.lo }; 
-			int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); 
-			tailMantissa |= INT64_C(0x0010000000000000); 
-			 
-			/* At this point we have the mantissa of |tail| */ 
-			/* We need to negate it if head and tail have different signs. */ 
-			const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; 
-			const int64_t negationMask = loNegationMask ^ hiNegationMask; 
-			tailMantissa = (tailMantissa ^ negationMask) - negationMask; 
-			 
-			/* Now we have the mantissa of tail as a signed 2s-complement integer */ 
-			 
-			const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; 
-			 
-			/* Shift the tail mantissa into the right position, accounting for the 
-			 * bias of 10 that we shifted the head mantissa by. 
-			 */  
-			tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); 
-			 
-			result += tailMantissa; 
-		} 
-		 
-		result >>= (62 - unbiasedHeadExponent); 
-		 
-		/* Restore the sign of the result and return */ 
-		result = (result ^ hiNegationMask) - hiNegationMask; 
-		return result; 
-		 
-	} 
- 
-	/* Edge cases handled here: */ 
-	 
-	/* |x| < 1, result is zero. */ 
-	if (1.0 > crt_fabs(x.s.hi)) 
-		return INT64_C(0); 
-	 
-	/* x very close to INT64_MIN, care must be taken to see which side we are on. */ 
-	if (x.s.hi == -0x1.0p63) { 
-		 
-		int64_t result = INT64_MIN; 
-		 
-		if (0.0 < x.s.lo) 
-		{ 
-			/* If the tail is positive, the correct result is something other than INT64_MIN. 
-			 * we'll need to figure out what it is. 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* int64_t __fixunstfdi(long double x);
+ * This file implements the PowerPC 128-bit double-double -> int64_t conversion
+ */
+
+#include "DD.h"
+#include "../int_math.h"
+
+uint64_t __fixtfdi(long double input)
+{
+	const DD x = { .ld = input };
+	const doublebits hibits = { .d = x.s.hi };
+	
+	const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff);
+	const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000);
+	
+	/* If (1.0 - tiny) <= input < 0x1.0p63: */
+	if (UINT32_C(0x03f00000) > absHighWordMinusOne)
+	{
+		/* Do an unsigned conversion of the absolute value, then restore the sign. */
+		const int unbiasedHeadExponent = absHighWordMinusOne >> 20;
+		
+		int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */
+		result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */
+		result <<= 10; /* mantissa(hi) with one zero preceding bit. */
+		
+		const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63;
+		
+		/* If the tail is non-zero, we need to patch in the tail bits. */
+		if (0.0 != x.s.lo)
+		{
+			const doublebits lobits = { .d = x.s.lo };
+			int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+			tailMantissa |= INT64_C(0x0010000000000000);
+			
+			/* At this point we have the mantissa of |tail| */
+			/* We need to negate it if head and tail have different signs. */
+			const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63;
+			const int64_t negationMask = loNegationMask ^ hiNegationMask;
+			tailMantissa = (tailMantissa ^ negationMask) - negationMask;
+			
+			/* Now we have the mantissa of tail as a signed 2s-complement integer */
+			
+			const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+			
+			/* Shift the tail mantissa into the right position, accounting for the
+			 * bias of 10 that we shifted the head mantissa by.
 			 */ 
- 
-			const doublebits lobits = { .d = x.s.lo }; 
-			int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); 
-			tailMantissa |= INT64_C(0x0010000000000000); 
-			 
-			/* Now we negate the tailMantissa */ 
-			tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); 
-			 
-			/* And shift it by the appropriate amount */ 
-			const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; 
-			tailMantissa >>= 1075 - biasedTailExponent; 
-			 
-			result -= tailMantissa; 
-		} 
-		 
-		return result; 
-	} 
-	 
-	/* Signed overflows, infinities, and NaNs */ 
-	if (x.s.hi > 0.0) 
-		return INT64_MAX; 
-	else 
-		return INT64_MIN; 
-} 
+			tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10)));
+			
+			result += tailMantissa;
+		}
+		
+		result >>= (62 - unbiasedHeadExponent);
+		
+		/* Restore the sign of the result and return */
+		result = (result ^ hiNegationMask) - hiNegationMask;
+		return result;
+		
+	}
+
+	/* Edge cases handled here: */
+	
+	/* |x| < 1, result is zero. */
+	if (1.0 > crt_fabs(x.s.hi))
+		return INT64_C(0);
+	
+	/* x very close to INT64_MIN, care must be taken to see which side we are on. */
+	if (x.s.hi == -0x1.0p63) {
+		
+		int64_t result = INT64_MIN;
+		
+		if (0.0 < x.s.lo)
+		{
+			/* If the tail is positive, the correct result is something other than INT64_MIN.
+			 * we'll need to figure out what it is.
+			 */
+
+			const doublebits lobits = { .d = x.s.lo };
+			int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+			tailMantissa |= INT64_C(0x0010000000000000);
+			
+			/* Now we negate the tailMantissa */
+			tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1);
+			
+			/* And shift it by the appropriate amount */
+			const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+			tailMantissa >>= 1075 - biasedTailExponent;
+			
+			result -= tailMantissa;
+		}
+		
+		return result;
+	}
+	
+	/* Signed overflows, infinities, and NaNs */
+	if (x.s.hi > 0.0)
+		return INT64_MAX;
+	else
+		return INT64_MIN;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c b/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c
index 277f30ecca..5e6e2cedf6 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/fixunstfdi.c
@@ -1,59 +1,59 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* uint64_t __fixunstfdi(long double x); */ 
-/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */ 
- 
-#include "DD.h" 
- 
-uint64_t __fixunstfdi(long double input) 
-{ 
-	const DD x = { .ld = input }; 
-	const doublebits hibits = { .d = x.s.hi }; 
-	 
-	const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); 
-	 
-	/* If (1.0 - tiny) <= input < 0x1.0p64: */ 
-	if (UINT32_C(0x04000000) > highWordMinusOne) 
-	{ 
-		const int unbiasedHeadExponent = highWordMinusOne >> 20; 
-		 
-		uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */ 
-		result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ 
-		result <<= 11; /* mantissa(hi) left aligned in the int64 field. */ 
-		 
-		/* If the tail is non-zero, we need to patch in the tail bits. */ 
-		if (0.0 != x.s.lo) 
-		{ 
-			const doublebits lobits = { .d = x.s.lo }; 
-			int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); 
-			tailMantissa |= INT64_C(0x0010000000000000); 
-			 
-			/* At this point we have the mantissa of |tail| */ 
-			 
-			const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; 
-			tailMantissa = (tailMantissa ^ negationMask) - negationMask; 
-			 
-			/* Now we have the mantissa of tail as a signed 2s-complement integer */ 
-			 
-			const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; 
-			 
-			/* Shift the tail mantissa into the right position, accounting for the 
-			 * bias of 11 that we shifted the head mantissa by. 
-			 */ 
-			tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); 
-			 
-			result += tailMantissa; 
-		} 
-		 
-		result >>= (63 - unbiasedHeadExponent); 
-		return result; 
-	} 
-	 
-	/* Edge cases are handled here, with saturation. */ 
-	if (1.0 > x.s.hi) 
-		return UINT64_C(0); 
-	else 
-		return UINT64_MAX; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* uint64_t __fixunstfdi(long double x); */
+/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */
+
+#include "DD.h"
+
+uint64_t __fixunstfdi(long double input)
+{
+	const DD x = { .ld = input };
+	const doublebits hibits = { .d = x.s.hi };
+	
+	const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000);
+	
+	/* If (1.0 - tiny) <= input < 0x1.0p64: */
+	if (UINT32_C(0x04000000) > highWordMinusOne)
+	{
+		const int unbiasedHeadExponent = highWordMinusOne >> 20;
+		
+		uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */
+		result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */
+		result <<= 11; /* mantissa(hi) left aligned in the int64 field. */
+		
+		/* If the tail is non-zero, we need to patch in the tail bits. */
+		if (0.0 != x.s.lo)
+		{
+			const doublebits lobits = { .d = x.s.lo };
+			int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+			tailMantissa |= INT64_C(0x0010000000000000);
+			
+			/* At this point we have the mantissa of |tail| */
+			
+			const int64_t negationMask = ((int64_t)(lobits.x)) >> 63;
+			tailMantissa = (tailMantissa ^ negationMask) - negationMask;
+			
+			/* Now we have the mantissa of tail as a signed 2s-complement integer */
+			
+			const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+			
+			/* Shift the tail mantissa into the right position, accounting for the
+			 * bias of 11 that we shifted the head mantissa by.
+			 */
+			tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11)));
+			
+			result += tailMantissa;
+		}
+		
+		result >>= (63 - unbiasedHeadExponent);
+		return result;
+	}
+	
+	/* Edge cases are handled here, with saturation. */
+	if (1.0 > x.s.hi)
+		return UINT64_C(0);
+	else
+		return UINT64_MAX;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/floatditf.c b/contrib/libs/cxxsupp/builtins/ppc/floatditf.c
index 0b86d8b4d8..beabdd0174 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/floatditf.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/floatditf.c
@@ -1,36 +1,36 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __floatditf(long long x); */ 
-/* This file implements the PowerPC long long -> long double conversion */ 
- 
-#include "DD.h" 
- 
-long double __floatditf(int64_t a) { 
-	 
-	static const double twop32 = 0x1.0p32; 
-	static const double twop52 = 0x1.0p52; 
-	 
-	doublebits low  = { .d = twop52 }; 
-	low.x |= a & UINT64_C(0x00000000ffffffff);	/* 0x1.0p52 + low 32 bits of a. */ 
-	 
-	const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52; 
-	 
-	/* At this point, we have two double precision numbers 
-	 * high_addend and low.d, and we wish to return their sum 
-	 * as a canonicalized long double: 
-	 */ 
- 
-	/* This implementation sets the inexact flag spuriously. 
-	 * This could be avoided, but at some substantial cost. 
-	*/ 
- 
-	DD result; 
-	 
-	result.s.hi = high_addend + low.d; 
-	result.s.lo = (high_addend - result.s.hi) + low.d; 
-	 
-	return result.ld; 
-	 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __floatditf(long long x); */
+/* This file implements the PowerPC long long -> long double conversion */
+
+#include "DD.h"
+
+long double __floatditf(int64_t a) {
+	
+	static const double twop32 = 0x1.0p32;
+	static const double twop52 = 0x1.0p52;
+	
+	doublebits low  = { .d = twop52 };
+	low.x |= a & UINT64_C(0x00000000ffffffff);	/* 0x1.0p52 + low 32 bits of a. */
+	
+	const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52;
+	
+	/* At this point, we have two double precision numbers
+	 * high_addend and low.d, and we wish to return their sum
+	 * as a canonicalized long double:
+	 */
+
+	/* This implementation sets the inexact flag spuriously.
+	 * This could be avoided, but at some substantial cost.
+	*/
+
+	DD result;
+	
+	result.s.hi = high_addend + low.d;
+	result.s.lo = (high_addend - result.s.hi) + low.d;
+	
+	return result.ld;
+	
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c b/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c
index e76a4e52db..b12e1e738f 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/floatunditf.c
@@ -1,41 +1,41 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __floatunditf(unsigned long long x); */ 
-/* This file implements the PowerPC unsigned long long -> long double conversion */ 
- 
-#include "DD.h" 
- 
-long double __floatunditf(uint64_t a) { 
-	 
-	/* Begins with an exact copy of the code from __floatundidf */ 
-	 
-	static const double twop52 = 0x1.0p52; 
-	static const double twop84 = 0x1.0p84; 
-	static const double twop84_plus_twop52 = 0x1.00000001p84; 
-	 
-	doublebits high = { .d = twop84 }; 
-	doublebits low  = { .d = twop52 }; 
-	 
-	high.x |= a >> 32;							/* 0x1.0p84 + high 32 bits of a */ 
-	low.x |= a & UINT64_C(0x00000000ffffffff);	/* 0x1.0p52 + low 32 bits of a */ 
-	 
-	const double high_addend = high.d - twop84_plus_twop52; 
-	 
-	/* At this point, we have two double precision numbers 
-	 * high_addend and low.d, and we wish to return their sum 
-	 * as a canonicalized long double: 
-	 */ 
- 
-	/* This implementation sets the inexact flag spuriously. */ 
-	/* This could be avoided, but at some substantial cost. */ 
-	 
-	DD result; 
-	 
-	result.s.hi = high_addend + low.d; 
-	result.s.lo = (high_addend - result.s.hi) + low.d; 
-	 
-	return result.ld; 
-	 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __floatunditf(unsigned long long x); */
+/* This file implements the PowerPC unsigned long long -> long double conversion */
+
+#include "DD.h"
+
+long double __floatunditf(uint64_t a) {
+	
+	/* Begins with an exact copy of the code from __floatundidf */
+	
+	static const double twop52 = 0x1.0p52;
+	static const double twop84 = 0x1.0p84;
+	static const double twop84_plus_twop52 = 0x1.00000001p84;
+	
+	doublebits high = { .d = twop84 };
+	doublebits low  = { .d = twop52 };
+	
+	high.x |= a >> 32;							/* 0x1.0p84 + high 32 bits of a */
+	low.x |= a & UINT64_C(0x00000000ffffffff);	/* 0x1.0p52 + low 32 bits of a */
+	
+	const double high_addend = high.d - twop84_plus_twop52;
+	
+	/* At this point, we have two double precision numbers
+	 * high_addend and low.d, and we wish to return their sum
+	 * as a canonicalized long double:
+	 */
+
+	/* This implementation sets the inexact flag spuriously. */
+	/* This could be avoided, but at some substantial cost. */
+	
+	DD result;
+	
+	result.s.hi = high_addend + low.d;
+	result.s.lo = (high_addend - result.s.hi) + low.d;
+	
+	return result.ld;
+	
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c
index 0284e28f9c..32e16e9d1d 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qadd.c
@@ -1,76 +1,76 @@
-/* This file is distributed under the University of Illinois Open Source 
- *  License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __gcc_qadd(long double x, long double y); 
- * This file implements the PowerPC 128-bit double-double add operation. 
- * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) 
- */ 
- 
-#include "DD.h" 
- 
-long double __gcc_qadd(long double x, long double y) 
-{ 
-	static const uint32_t infinityHi = UINT32_C(0x7ff00000); 
-	 
-	DD dst = { .ld = x }, src = { .ld = y }; 
-	 
-	register double A = dst.s.hi, a = dst.s.lo, 
-					B = src.s.hi, b = src.s.lo; 
-	 
-	/* If both operands are zero: */ 
-	if ((A == 0.0) && (B == 0.0)) { 
-		dst.s.hi = A + B; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	/* If either operand is NaN or infinity: */ 
-	const doublebits abits = { .d = A }; 
-	const doublebits bbits = { .d = B }; 
-	if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || 
-		(((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { 
-		dst.s.hi = A + B; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	/* If the computation overflows: */ 
-	/* This may be playing things a little bit fast and loose, but it will do for a start. */ 
-	const double testForOverflow = A + (B + (a + b)); 
-	const doublebits testbits = { .d = testForOverflow }; 
-	if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { 
-		dst.s.hi = testForOverflow; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	double H, h; 
-	double T, t; 
-	double W, w; 
-	double Y; 
-	 
-	H = B + (A - (A + B)); 
-	T = b + (a - (a + b)); 
-	h = A + (B - (A + B)); 
-	t = a + (b - (a + b)); 
-	 
-	if (local_fabs(A) <= local_fabs(B)) 
-		w = (a + b) + h; 
-	else 
-		w = (a + b) + H; 
-	 
-	W = (A + B) + w; 
-	Y = (A + B) - W; 
-	Y += w; 
-	 
-	if (local_fabs(a) <= local_fabs(b)) 
-		w = t + Y; 
-	else 
-		w = T + Y; 
-	 
-	dst.s.hi = Y = W + w; 
-	dst.s.lo = (W - Y) + w; 
-	 
-	return dst.ld; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ *  License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qadd(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double add operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qadd(long double x, long double y)
+{
+	static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+	
+	DD dst = { .ld = x }, src = { .ld = y };
+	
+	register double A = dst.s.hi, a = dst.s.lo,
+					B = src.s.hi, b = src.s.lo;
+	
+	/* If both operands are zero: */
+	if ((A == 0.0) && (B == 0.0)) {
+		dst.s.hi = A + B;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	/* If either operand is NaN or infinity: */
+	const doublebits abits = { .d = A };
+	const doublebits bbits = { .d = B };
+	if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) ||
+		(((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) {
+		dst.s.hi = A + B;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	/* If the computation overflows: */
+	/* This may be playing things a little bit fast and loose, but it will do for a start. */
+	const double testForOverflow = A + (B + (a + b));
+	const doublebits testbits = { .d = testForOverflow };
+	if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) {
+		dst.s.hi = testForOverflow;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	double H, h;
+	double T, t;
+	double W, w;
+	double Y;
+	
+	H = B + (A - (A + B));
+	T = b + (a - (a + b));
+	h = A + (B - (A + B));
+	t = a + (b - (a + b));
+	
+	if (local_fabs(A) <= local_fabs(B))
+		w = (a + b) + h;
+	else
+		w = (a + b) + H;
+	
+	W = (A + B) + w;
+	Y = (A + B) - W;
+	Y += w;
+	
+	if (local_fabs(a) <= local_fabs(b))
+		w = t + Y;
+	else
+		w = T + Y;
+	
+	dst.s.hi = Y = W + w;
+	dst.s.lo = (W - Y) + w;
+	
+	return dst.ld;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c
index 0e01067914..70aa00b644 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qdiv.c
@@ -1,55 +1,55 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __gcc_qdiv(long double x, long double y); 
- * This file implements the PowerPC 128-bit double-double division operation. 
- * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) 
- */ 
- 
-#include "DD.h" 
- 
-long double __gcc_qdiv(long double a, long double b) 
-{	 
-	static const uint32_t infinityHi = UINT32_C(0x7ff00000); 
-	DD dst = { .ld = a }, src = { .ld = b }; 
-	 
-	register double x = dst.s.hi, x1 = dst.s.lo, 
-					y = src.s.hi, y1 = src.s.lo; 
-	 
-    double yHi, yLo, qHi, qLo; 
-    double yq, tmp, q; 
-	 
-    q = x / y; 
-	 
-	/* Detect special cases */ 
-	if (q == 0.0) { 
-		dst.s.hi = q; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	const doublebits qBits = { .d = q }; 
-	if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { 
-		dst.s.hi = q; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-    yHi = high26bits(y); 
-    qHi = high26bits(q); 
-	 
-    yq = y * q; 
-    yLo = y - yHi; 
-    qLo = q - qHi; 
-	 
-    tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); 
-    tmp = (x - yq) - tmp; 
-    tmp = ((tmp + x1) - y1 * q) / y; 
-    x = q + tmp; 
-	 
-    dst.s.lo = (q - x) + tmp; 
-    dst.s.hi = x; 
-	 
-    return dst.ld; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qdiv(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double division operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qdiv(long double a, long double b)
+{	
+	static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+	DD dst = { .ld = a }, src = { .ld = b };
+	
+	register double x = dst.s.hi, x1 = dst.s.lo,
+					y = src.s.hi, y1 = src.s.lo;
+	
+    double yHi, yLo, qHi, qLo;
+    double yq, tmp, q;
+	
+    q = x / y;
+	
+	/* Detect special cases */
+	if (q == 0.0) {
+		dst.s.hi = q;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	const doublebits qBits = { .d = q };
+	if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) {
+		dst.s.hi = q;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+    yHi = high26bits(y);
+    qHi = high26bits(q);
+	
+    yq = y * q;
+    yLo = y - yHi;
+    qLo = q - qHi;
+	
+    tmp = LOWORDER(yq, yHi, yLo, qHi, qLo);
+    tmp = (x - yq) - tmp;
+    tmp = ((tmp + x1) - y1 * q) / y;
+    x = q + tmp;
+	
+    dst.s.lo = (q - x) + tmp;
+    dst.s.hi = x;
+	
+    return dst.ld;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c
index 1bdac9337a..fb4c5164cc 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qmul.c
@@ -1,53 +1,53 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __gcc_qmul(long double x, long double y); 
- * This file implements the PowerPC 128-bit double-double multiply operation. 
- * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) 
- */ 
- 
-#include "DD.h" 
- 
-long double __gcc_qmul(long double x, long double y) 
-{	 
-	static const uint32_t infinityHi = UINT32_C(0x7ff00000); 
-	DD dst = { .ld = x }, src = { .ld = y }; 
-	 
-	register double A = dst.s.hi, a = dst.s.lo, 
-					B = src.s.hi, b = src.s.lo; 
-	 
-	double aHi, aLo, bHi, bLo; 
-    double ab, tmp, tau; 
-	 
-	ab = A * B; 
-	 
-	/* Detect special cases */ 
-	if (ab == 0.0) { 
-		dst.s.hi = ab; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	const doublebits abBits = { .d = ab }; 
-	if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { 
-		dst.s.hi = ab; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	/* Generic cases handled here. */ 
-    aHi = high26bits(A); 
-    bHi = high26bits(B); 
-    aLo = A - aHi; 
-    bLo = B - bHi; 
-	 
-    tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); 
-    tmp += (A * b + a * B); 
-    tau = ab + tmp; 
-	 
-    dst.s.lo = (ab - tau) + tmp; 
-    dst.s.hi = tau; 
-	 
-    return dst.ld; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qmul(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double multiply operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qmul(long double x, long double y)
+{	
+	static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+	DD dst = { .ld = x }, src = { .ld = y };
+	
+	register double A = dst.s.hi, a = dst.s.lo,
+					B = src.s.hi, b = src.s.lo;
+	
+	double aHi, aLo, bHi, bLo;
+    double ab, tmp, tau;
+	
+	ab = A * B;
+	
+	/* Detect special cases */
+	if (ab == 0.0) {
+		dst.s.hi = ab;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	const doublebits abBits = { .d = ab };
+	if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) {
+		dst.s.hi = ab;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	/* Generic cases handled here. */
+    aHi = high26bits(A);
+    bHi = high26bits(B);
+    aLo = A - aHi;
+    bLo = B - bHi;
+	
+    tmp = LOWORDER(ab, aHi, aLo, bHi, bLo);
+    tmp += (A * b + a * B);
+    tau = ab + tmp;
+	
+    dst.s.lo = (ab - tau) + tmp;
+    dst.s.hi = tau;
+	
+    return dst.ld;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c b/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c
index d45fc4d14a..c092e24dbd 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/gcc_qsub.c
@@ -1,76 +1,76 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __gcc_qsub(long double x, long double y); 
- * This file implements the PowerPC 128-bit double-double add operation. 
- * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) 
- */ 
- 
-#include "DD.h" 
- 
-long double __gcc_qsub(long double x, long double y) 
-{ 
-	static const uint32_t infinityHi = UINT32_C(0x7ff00000); 
-	 
-	DD dst = { .ld = x }, src = { .ld = y }; 
-	 
-	register double A =  dst.s.hi, a =  dst.s.lo, 
-					B = -src.s.hi, b = -src.s.lo; 
-	 
-	/* If both operands are zero: */ 
-	if ((A == 0.0) && (B == 0.0)) { 
-		dst.s.hi = A + B; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	/* If either operand is NaN or infinity: */ 
-	const doublebits abits = { .d = A }; 
-	const doublebits bbits = { .d = B }; 
-	if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || 
-		(((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { 
-		dst.s.hi = A + B; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	/* If the computation overflows: */ 
-	/* This may be playing things a little bit fast and loose, but it will do for a start. */ 
-	const double testForOverflow = A + (B + (a + b)); 
-	const doublebits testbits = { .d = testForOverflow }; 
-	if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { 
-		dst.s.hi = testForOverflow; 
-		dst.s.lo = 0.0; 
-		return dst.ld; 
-	} 
-	 
-	double H, h; 
-	double T, t; 
-	double W, w; 
-	double Y; 
-	 
-	H = B + (A - (A + B)); 
-	T = b + (a - (a + b)); 
-	h = A + (B - (A + B)); 
-	t = a + (b - (a + b)); 
-	 
-	if (local_fabs(A) <= local_fabs(B)) 
-		w = (a + b) + h; 
-	else 
-		w = (a + b) + H; 
-	 
-	W = (A + B) + w; 
-	Y = (A + B) - W; 
-	Y += w; 
-	 
-	if (local_fabs(a) <= local_fabs(b)) 
-		w = t + Y; 
-	else 
-		w = T + Y; 
-	 
-	dst.s.hi = Y = W + w; 
-	dst.s.lo = (W - Y) + w; 
-	 
-	return dst.ld; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qsub(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double add operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qsub(long double x, long double y)
+{
+	static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+	
+	DD dst = { .ld = x }, src = { .ld = y };
+	
+	register double A =  dst.s.hi, a =  dst.s.lo,
+					B = -src.s.hi, b = -src.s.lo;
+	
+	/* If both operands are zero: */
+	if ((A == 0.0) && (B == 0.0)) {
+		dst.s.hi = A + B;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	/* If either operand is NaN or infinity: */
+	const doublebits abits = { .d = A };
+	const doublebits bbits = { .d = B };
+	if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) ||
+		(((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) {
+		dst.s.hi = A + B;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	/* If the computation overflows: */
+	/* This may be playing things a little bit fast and loose, but it will do for a start. */
+	const double testForOverflow = A + (B + (a + b));
+	const doublebits testbits = { .d = testForOverflow };
+	if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) {
+		dst.s.hi = testForOverflow;
+		dst.s.lo = 0.0;
+		return dst.ld;
+	}
+	
+	double H, h;
+	double T, t;
+	double W, w;
+	double Y;
+	
+	H = B + (A - (A + B));
+	T = b + (a - (a + b));
+	h = A + (B - (A + B));
+	t = a + (b - (a + b));
+	
+	if (local_fabs(A) <= local_fabs(B))
+		w = (a + b) + h;
+	else
+		w = (a + b) + H;
+	
+	W = (A + B) + w;
+	Y = (A + B) - W;
+	Y += w;
+	
+	if (local_fabs(a) <= local_fabs(b))
+		w = t + Y;
+	else
+		w = T + Y;
+	
+	dst.s.hi = Y = W + w;
+	dst.s.lo = (W - Y) + w;
+	
+	return dst.ld;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/multc3.c b/contrib/libs/cxxsupp/builtins/ppc/multc3.c
index 327d625dc7..9dd79c975d 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/multc3.c
+++ b/contrib/libs/cxxsupp/builtins/ppc/multc3.c
@@ -1,90 +1,90 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-#include "DD.h" 
-#include "../int_math.h" 
- 
-#define makeFinite(x) { \ 
-    (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ 
-    (x).s.lo = 0.0;                                                     \ 
-  } 
- 
-#define zeroNaN(x) { \ 
-    if (crt_isnan((x).s.hi)) {                                          \ 
-      (x).s.hi = crt_copysign(0.0, (x).s.hi);                     \ 
-      (x).s.lo = 0.0;                                                   \ 
-    }                                                                   \ 
-  } 
- 
-long double _Complex 
-__multc3(long double a, long double b, long double c, long double d) 
-{ 
-	long double ac = __gcc_qmul(a,c); 
-	long double bd = __gcc_qmul(b,d); 
-	long double ad = __gcc_qmul(a,d); 
-	long double bc = __gcc_qmul(b,c); 
-	 
-	DD real = { .ld = __gcc_qsub(ac,bd) }; 
-	DD imag = { .ld = __gcc_qadd(ad,bc) }; 
-	 
-	if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) 
-	{ 
-		int recalc = 0; 
-		 
-		DD aDD = { .ld = a }; 
-		DD bDD = { .ld = b }; 
-		DD cDD = { .ld = c }; 
-		DD dDD = { .ld = d }; 
-		 
-		if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) 
-		{ 
-			makeFinite(aDD); 
-			makeFinite(bDD); 
-			zeroNaN(cDD); 
-			zeroNaN(dDD); 
-			recalc = 1; 
-		} 
-		 
-		if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) 
-		{ 
-			makeFinite(cDD); 
-			makeFinite(dDD); 
-			zeroNaN(aDD); 
-			zeroNaN(bDD); 
-			recalc = 1; 
-		} 
-		 
-		if (!recalc) 
-		{ 
-			DD acDD = { .ld = ac }; 
-			DD bdDD = { .ld = bd }; 
-			DD adDD = { .ld = ad }; 
-			DD bcDD = { .ld = bc }; 
-			 
-			if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) || 
-                            crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi)) 
-			{ 
-				zeroNaN(aDD); 
-				zeroNaN(bDD); 
-				zeroNaN(cDD); 
-				zeroNaN(dDD); 
-				recalc = 1; 
-			} 
-		} 
-		 
-		if (recalc) 
-		{ 
-			real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi); 
-			real.s.lo = 0.0; 
-			imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi); 
-			imag.s.lo = 0.0; 
-		} 
-	} 
-	 
-	long double _Complex z; 
-	__real__ z = real.ld; 
-	__imag__ z = imag.ld; 
-	 
-	return z; 
-} 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#include "DD.h"
+#include "../int_math.h"
+
+#define makeFinite(x) { \
+    (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
+    (x).s.lo = 0.0;                                                     \
+  }
+
+#define zeroNaN(x) { \
+    if (crt_isnan((x).s.hi)) {                                          \
+      (x).s.hi = crt_copysign(0.0, (x).s.hi);                     \
+      (x).s.lo = 0.0;                                                   \
+    }                                                                   \
+  }
+
+long double _Complex
+__multc3(long double a, long double b, long double c, long double d)
+{
+	long double ac = __gcc_qmul(a,c);
+	long double bd = __gcc_qmul(b,d);
+	long double ad = __gcc_qmul(a,d);
+	long double bc = __gcc_qmul(b,c);
+	
+	DD real = { .ld = __gcc_qsub(ac,bd) };
+	DD imag = { .ld = __gcc_qadd(ad,bc) };
+	
+	if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi))
+	{
+		int recalc = 0;
+		
+		DD aDD = { .ld = a };
+		DD bDD = { .ld = b };
+		DD cDD = { .ld = c };
+		DD dDD = { .ld = d };
+		
+		if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi))
+		{
+			makeFinite(aDD);
+			makeFinite(bDD);
+			zeroNaN(cDD);
+			zeroNaN(dDD);
+			recalc = 1;
+		}
+		
+		if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi))
+		{
+			makeFinite(cDD);
+			makeFinite(dDD);
+			zeroNaN(aDD);
+			zeroNaN(bDD);
+			recalc = 1;
+		}
+		
+		if (!recalc)
+		{
+			DD acDD = { .ld = ac };
+			DD bdDD = { .ld = bd };
+			DD adDD = { .ld = ad };
+			DD bcDD = { .ld = bc };
+			
+			if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) ||
+                            crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi))
+			{
+				zeroNaN(aDD);
+				zeroNaN(bDD);
+				zeroNaN(cDD);
+				zeroNaN(dDD);
+				recalc = 1;
+			}
+		}
+		
+		if (recalc)
+		{
+			real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi);
+			real.s.lo = 0.0;
+			imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi);
+			imag.s.lo = 0.0;
+		}
+	}
+	
+	long double _Complex z;
+	__real__ z = real.ld;
+	__imag__ z = imag.ld;
+	
+	return z;
+}
diff --git a/contrib/libs/cxxsupp/builtins/ppc/restFP.S b/contrib/libs/cxxsupp/builtins/ppc/restFP.S
index 23d5e142b1..95032897c0 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/restFP.S
+++ b/contrib/libs/cxxsupp/builtins/ppc/restFP.S
@@ -1,43 +1,43 @@
-//===-- restFP.S - Implement restFP ---------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// Helper function used by compiler to restore ppc floating point registers at 
-// the end of the function epilog.  This function returns to the address 
-// in the LR slot.  So a function epilog must branch (b) not branch and link 
-// (bl) to this function. 
-// If the compiler wants to restore f27..f31, it does a "b restFP+52" 
-// 
-// This function should never be exported by a shared library.  Each linkage 
-// unit carries its own copy of this function. 
-// 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(restFP) 
-        lfd    f14,-144(r1) 
-        lfd    f15,-136(r1) 
-        lfd    f16,-128(r1) 
-        lfd    f17,-120(r1) 
-        lfd    f18,-112(r1) 
-        lfd    f19,-104(r1) 
-        lfd    f20,-96(r1) 
-        lfd    f21,-88(r1) 
-        lfd    f22,-80(r1) 
-        lfd    f23,-72(r1) 
-        lfd    f24,-64(r1) 
-        lfd    f25,-56(r1) 
-        lfd    f26,-48(r1) 
-        lfd    f27,-40(r1) 
-        lfd    f28,-32(r1) 
-        lfd    f29,-24(r1) 
-        lfd    f30,-16(r1) 
-        lfd    f31,-8(r1) 
-        lwz     r0,8(r1) 
-        mtlr	r0 
-        blr 
+//===-- restFP.S - Implement restFP ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// Helper function used by compiler to restore ppc floating point registers at
+// the end of the function epilog.  This function returns to the address
+// in the LR slot.  So a function epilog must branch (b) not branch and link
+// (bl) to this function.
+// If the compiler wants to restore f27..f31, it does a "b restFP+52"
+//
+// This function should never be exported by a shared library.  Each linkage
+// unit carries its own copy of this function.
+//
+DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(restFP)
+        lfd    f14,-144(r1)
+        lfd    f15,-136(r1)
+        lfd    f16,-128(r1)
+        lfd    f17,-120(r1)
+        lfd    f18,-112(r1)
+        lfd    f19,-104(r1)
+        lfd    f20,-96(r1)
+        lfd    f21,-88(r1)
+        lfd    f22,-80(r1)
+        lfd    f23,-72(r1)
+        lfd    f24,-64(r1)
+        lfd    f25,-56(r1)
+        lfd    f26,-48(r1)
+        lfd    f27,-40(r1)
+        lfd    f28,-32(r1)
+        lfd    f29,-24(r1)
+        lfd    f30,-16(r1)
+        lfd    f31,-8(r1)
+        lwz     r0,8(r1)
+        mtlr	r0
+        blr
diff --git a/contrib/libs/cxxsupp/builtins/ppc/saveFP.S b/contrib/libs/cxxsupp/builtins/ppc/saveFP.S
index c8dcfed616..72bd459f4c 100644
--- a/contrib/libs/cxxsupp/builtins/ppc/saveFP.S
+++ b/contrib/libs/cxxsupp/builtins/ppc/saveFP.S
@@ -1,40 +1,40 @@
-//===-- saveFP.S - Implement saveFP ---------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// 
-// Helper function used by compiler to save ppc floating point registers in 
-// function prologs.  This routines also saves r0 in the LR slot. 
-// If the compiler wants to save f27..f31, it does a "bl saveFP+52" 
-// 
-// This function should never be exported by a shared library.  Each linkage 
-// unit carries its own copy of this function. 
-// 
-DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(saveFP) 
-	stfd    f14,-144(r1) 
-        stfd    f15,-136(r1) 
-        stfd    f16,-128(r1) 
-        stfd    f17,-120(r1) 
-        stfd    f18,-112(r1) 
-        stfd    f19,-104(r1) 
-        stfd    f20,-96(r1) 
-        stfd    f21,-88(r1) 
-        stfd    f22,-80(r1) 
-        stfd    f23,-72(r1) 
-        stfd    f24,-64(r1) 
-        stfd    f25,-56(r1) 
-        stfd    f26,-48(r1) 
-        stfd    f27,-40(r1) 
-        stfd    f28,-32(r1) 
-        stfd    f29,-24(r1) 
-        stfd    f30,-16(r1) 
-        stfd    f31,-8(r1) 
-        stw      r0,8(r1) 
-        blr 
+//===-- saveFP.S - Implement saveFP ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// Helper function used by compiler to save ppc floating point registers in
+// function prologs.  This routines also saves r0 in the LR slot.
+// If the compiler wants to save f27..f31, it does a "bl saveFP+52"
+//
+// This function should never be exported by a shared library.  Each linkage
+// unit carries its own copy of this function.
+//
+DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(saveFP)
+	stfd    f14,-144(r1)
+        stfd    f15,-136(r1)
+        stfd    f16,-128(r1)
+        stfd    f17,-120(r1)
+        stfd    f18,-112(r1)
+        stfd    f19,-104(r1)
+        stfd    f20,-96(r1)
+        stfd    f21,-88(r1)
+        stfd    f22,-80(r1)
+        stfd    f23,-72(r1)
+        stfd    f24,-64(r1)
+        stfd    f25,-56(r1)
+        stfd    f26,-48(r1)
+        stfd    f27,-40(r1)
+        stfd    f28,-32(r1)
+        stfd    f29,-24(r1)
+        stfd    f30,-16(r1)
+        stfd    f31,-8(r1)
+        stw      r0,8(r1)
+        blr
diff --git a/contrib/libs/cxxsupp/builtins/subdf3.c b/contrib/libs/cxxsupp/builtins/subdf3.c
index 33264bf561..7a79e5e776 100644
--- a/contrib/libs/cxxsupp/builtins/subdf3.c
+++ b/contrib/libs/cxxsupp/builtins/subdf3.c
@@ -1,25 +1,25 @@
-//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements double-precision soft-float subtraction with the 
-// IEEE-754 default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define DOUBLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(dsub, subdf3) 
- 
-// Subtraction; flip the sign bit of b and add. 
-COMPILER_RT_ABI fp_t 
-__subdf3(fp_t a, fp_t b) { 
-    return __adddf3(a, fromRep(toRep(b) ^ signBit)); 
-} 
- 
+//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(dsub, subdf3)
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subdf3(fp_t a, fp_t b) {
+    return __adddf3(a, fromRep(toRep(b) ^ signBit));
+}
+
diff --git a/contrib/libs/cxxsupp/builtins/subsf3.c b/contrib/libs/cxxsupp/builtins/subsf3.c
index 99677d259d..c3b85144af 100644
--- a/contrib/libs/cxxsupp/builtins/subsf3.c
+++ b/contrib/libs/cxxsupp/builtins/subsf3.c
@@ -1,25 +1,25 @@
-//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements single-precision soft-float subtraction with the 
-// IEEE-754 default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SINGLE_PRECISION 
-#include "fp_lib.h" 
- 
-ARM_EABI_FNALIAS(fsub, subsf3) 
- 
-// Subtraction; flip the sign bit of b and add. 
-COMPILER_RT_ABI fp_t 
-__subsf3(fp_t a, fp_t b) { 
-    return __addsf3(a, fromRep(toRep(b) ^ signBit)); 
-} 
- 
+//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+ARM_EABI_FNALIAS(fsub, subsf3)
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subsf3(fp_t a, fp_t b) {
+    return __addsf3(a, fromRep(toRep(b) ^ signBit));
+}
+
diff --git a/contrib/libs/cxxsupp/builtins/subtf3.c b/contrib/libs/cxxsupp/builtins/subtf3.c
index 02e90608a8..609b816f41 100644
--- a/contrib/libs/cxxsupp/builtins/subtf3.c
+++ b/contrib/libs/cxxsupp/builtins/subtf3.c
@@ -1,27 +1,27 @@
-//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements quad-precision soft-float subtraction with the 
-// IEEE-754 default rounding (to nearest, ties to even). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b); 
- 
-// Subtraction; flip the sign bit of b and add. 
-COMPILER_RT_ABI fp_t 
-__subtf3(fp_t a, fp_t b) { 
-    return __addtf3(a, fromRep(toRep(b) ^ signBit)); 
-} 
- 
-#endif 
+//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subtf3(fp_t a, fp_t b) {
+    return __addtf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/subvdi3.c b/contrib/libs/cxxsupp/builtins/subvdi3.c
index b4a680a50f..71fc70ffa9 100644
--- a/contrib/libs/cxxsupp/builtins/subvdi3.c
+++ b/contrib/libs/cxxsupp/builtins/subvdi3.c
@@ -1,36 +1,36 @@
-/* ===-- subvdi3.c - Implement __subvdi3 -----------------------------------=== 
- * 
- *                The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __subvdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a - b */ 
- 
-/* Effects: aborts if a - b overflows */ 
- 
-COMPILER_RT_ABI di_int 
-__subvdi3(di_int a, di_int b) 
-{ 
-    di_int s = (du_int) a - (du_int) b; 
-    if (b >= 0) 
-    { 
-        if (s > a) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (s <= a) 
-            compilerrt_abort(); 
-    } 
-    return s; 
-} 
+/* ===-- subvdi3.c - Implement __subvdi3 -----------------------------------===
+ *
+ *                The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __subvdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a - b */
+
+/* Effects: aborts if a - b overflows */
+
+COMPILER_RT_ABI di_int
+__subvdi3(di_int a, di_int b)
+{
+    di_int s = (du_int) a - (du_int) b;
+    if (b >= 0)
+    {
+        if (s > a)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (s <= a)
+            compilerrt_abort();
+    }
+    return s;
+}
diff --git a/contrib/libs/cxxsupp/builtins/subvsi3.c b/contrib/libs/cxxsupp/builtins/subvsi3.c
index 2e4b732866..e6c0fb688c 100644
--- a/contrib/libs/cxxsupp/builtins/subvsi3.c
+++ b/contrib/libs/cxxsupp/builtins/subvsi3.c
@@ -1,36 +1,36 @@
-/* ===-- subvsi3.c - Implement __subvsi3 -----------------------------------=== 
- * 
- *                The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __subvsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a - b */ 
- 
-/* Effects: aborts if a - b overflows */ 
- 
-COMPILER_RT_ABI si_int 
-__subvsi3(si_int a, si_int b) 
-{ 
-    si_int s = (su_int) a - (su_int) b; 
-    if (b >= 0) 
-    { 
-        if (s > a) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (s <= a) 
-            compilerrt_abort(); 
-    } 
-    return s; 
-} 
+/* ===-- subvsi3.c - Implement __subvsi3 -----------------------------------===
+ *
+ *                The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __subvsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a - b */
+
+/* Effects: aborts if a - b overflows */
+
+COMPILER_RT_ABI si_int
+__subvsi3(si_int a, si_int b)
+{
+    si_int s = (su_int) a - (su_int) b;
+    if (b >= 0)
+    {
+        if (s > a)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (s <= a)
+            compilerrt_abort();
+    }
+    return s;
+}
diff --git a/contrib/libs/cxxsupp/builtins/subvti3.c b/contrib/libs/cxxsupp/builtins/subvti3.c
index 23b504e1fa..a6804d2d7b 100644
--- a/contrib/libs/cxxsupp/builtins/subvti3.c
+++ b/contrib/libs/cxxsupp/builtins/subvti3.c
@@ -1,40 +1,40 @@
-/* ===-- subvti3.c - Implement __subvti3 -----------------------------------=== 
- * 
- *      	       The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __subvti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a - b */ 
- 
-/* Effects: aborts if a - b overflows */ 
- 
-COMPILER_RT_ABI ti_int 
-__subvti3(ti_int a, ti_int b) 
-{ 
-    ti_int s = (tu_int) a - (tu_int) b; 
-    if (b >= 0) 
-    { 
-        if (s > a) 
-            compilerrt_abort(); 
-    } 
-    else 
-    { 
-        if (s <= a) 
-            compilerrt_abort(); 
-    } 
-    return s; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- subvti3.c - Implement __subvti3 -----------------------------------===
+ *
+ *      	       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __subvti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a - b */
+
+/* Effects: aborts if a - b overflows */
+
+COMPILER_RT_ABI ti_int
+__subvti3(ti_int a, ti_int b)
+{
+    ti_int s = (tu_int) a - (tu_int) b;
+    if (b >= 0)
+    {
+        if (s > a)
+            compilerrt_abort();
+    }
+    else
+    {
+        if (s <= a)
+            compilerrt_abort();
+    }
+    return s;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/trampoline_setup.c b/contrib/libs/cxxsupp/builtins/trampoline_setup.c
index d24d365735..25b627ab76 100644
--- a/contrib/libs/cxxsupp/builtins/trampoline_setup.c
+++ b/contrib/libs/cxxsupp/builtins/trampoline_setup.c
@@ -1,48 +1,48 @@
-/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-extern void __clear_cache(void* start, void* end); 
- 
-/* 
- * The ppc compiler generates calls to __trampoline_setup() when creating  
- * trampoline functions on the stack for use with nested functions. 
- * This function creates a custom 40-byte trampoline function on the stack  
- * which loads r11 with a pointer to the outer function's locals 
- * and then jumps to the target nested function. 
- */ 
- 
-#if __ppc__ && !defined(__powerpc64__) 
-COMPILER_RT_ABI void 
-__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,  
-                   const void* realFunc, void* localsPtr) 
-{ 
-    /* should never happen, but if compiler did not allocate */ 
-    /* enough space on stack for the trampoline, abort */ 
-    if ( trampSizeAllocated < 40 ) 
-        compilerrt_abort(); 
-     
-    /* create trampoline */ 
-    trampOnStack[0] = 0x7c0802a6;    /* mflr r0 */ 
-    trampOnStack[1] = 0x4800000d;    /* bl Lbase */ 
-    trampOnStack[2] = (uint32_t)realFunc; 
-    trampOnStack[3] = (uint32_t)localsPtr; 
-    trampOnStack[4] = 0x7d6802a6;    /* Lbase: mflr r11 */ 
-    trampOnStack[5] = 0x818b0000;    /* lwz    r12,0(r11) */ 
-    trampOnStack[6] = 0x7c0803a6;    /* mtlr r0 */ 
-    trampOnStack[7] = 0x7d8903a6;    /* mtctr r12 */ 
-    trampOnStack[8] = 0x816b0004;    /* lwz    r11,4(r11) */ 
-    trampOnStack[9] = 0x4e800420;    /* bctr */ 
-     
-    /* clear instruction cache */ 
-    __clear_cache(trampOnStack, &trampOnStack[10]); 
-} 
-#endif /* __ppc__ && !defined(__powerpc64__) */ 
+/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+extern void __clear_cache(void* start, void* end);
+
+/*
+ * The ppc compiler generates calls to __trampoline_setup() when creating 
+ * trampoline functions on the stack for use with nested functions.
+ * This function creates a custom 40-byte trampoline function on the stack 
+ * which loads r11 with a pointer to the outer function's locals
+ * and then jumps to the target nested function.
+ */
+
+#if __ppc__ && !defined(__powerpc64__)
+COMPILER_RT_ABI void
+__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, 
+                   const void* realFunc, void* localsPtr)
+{
+    /* should never happen, but if compiler did not allocate */
+    /* enough space on stack for the trampoline, abort */
+    if ( trampSizeAllocated < 40 )
+        compilerrt_abort();
+    
+    /* create trampoline */
+    trampOnStack[0] = 0x7c0802a6;    /* mflr r0 */
+    trampOnStack[1] = 0x4800000d;    /* bl Lbase */
+    trampOnStack[2] = (uint32_t)realFunc;
+    trampOnStack[3] = (uint32_t)localsPtr;
+    trampOnStack[4] = 0x7d6802a6;    /* Lbase: mflr r11 */
+    trampOnStack[5] = 0x818b0000;    /* lwz    r12,0(r11) */
+    trampOnStack[6] = 0x7c0803a6;    /* mtlr r0 */
+    trampOnStack[7] = 0x7d8903a6;    /* mtctr r12 */
+    trampOnStack[8] = 0x816b0004;    /* lwz    r11,4(r11) */
+    trampOnStack[9] = 0x4e800420;    /* bctr */
+    
+    /* clear instruction cache */
+    __clear_cache(trampOnStack, &trampOnStack[10]);
+}
+#endif /* __ppc__ && !defined(__powerpc64__) */
diff --git a/contrib/libs/cxxsupp/builtins/truncdfhf2.c b/contrib/libs/cxxsupp/builtins/truncdfhf2.c
index c81e272a8a..17195cd9e7 100644
--- a/contrib/libs/cxxsupp/builtins/truncdfhf2.c
+++ b/contrib/libs/cxxsupp/builtins/truncdfhf2.c
@@ -1,18 +1,18 @@
-//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SRC_DOUBLE 
-#define DST_HALF 
-#include "fp_trunc_impl.inc" 
- 
-ARM_EABI_FNALIAS(d2h, truncdfhf2) 
- 
-COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { 
-    return __truncXfYf2__(a); 
-} 
+//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_DOUBLE
+#define DST_HALF
+#include "fp_trunc_impl.inc"
+
+ARM_EABI_FNALIAS(d2h, truncdfhf2)
+
+COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
+    return __truncXfYf2__(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/truncdfsf2.c b/contrib/libs/cxxsupp/builtins/truncdfsf2.c
index 6b07b62111..46ec11dccd 100644
--- a/contrib/libs/cxxsupp/builtins/truncdfsf2.c
+++ b/contrib/libs/cxxsupp/builtins/truncdfsf2.c
@@ -1,18 +1,18 @@
-//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SRC_DOUBLE 
-#define DST_SINGLE 
-#include "fp_trunc_impl.inc" 
- 
-ARM_EABI_FNALIAS(d2f, truncdfsf2) 
- 
-COMPILER_RT_ABI float __truncdfsf2(double a) { 
-    return __truncXfYf2__(a); 
-} 
+//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_DOUBLE
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+ARM_EABI_FNALIAS(d2f, truncdfsf2)
+
+COMPILER_RT_ABI float __truncdfsf2(double a) {
+    return __truncXfYf2__(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/truncsfhf2.c b/contrib/libs/cxxsupp/builtins/truncsfhf2.c
index edc71420b4..9d61895bfd 100644
--- a/contrib/libs/cxxsupp/builtins/truncsfhf2.c
+++ b/contrib/libs/cxxsupp/builtins/truncsfhf2.c
@@ -1,24 +1,24 @@
-//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define SRC_SINGLE 
-#define DST_HALF 
-#include "fp_trunc_impl.inc" 
- 
-ARM_EABI_FNALIAS(f2h, truncsfhf2) 
- 
-// Use a forwarding definition and noinline to implement a poor man's alias, 
-// as there isn't a good cross-platform way of defining one. 
-COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { 
-    return __truncXfYf2__(a); 
-} 
- 
-COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { 
-    return __truncsfhf2(a); 
-} 
+//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_SINGLE
+#define DST_HALF
+#include "fp_trunc_impl.inc"
+
+ARM_EABI_FNALIAS(f2h, truncsfhf2)
+
+// Use a forwarding definition and noinline to implement a poor man's alias,
+// as there isn't a good cross-platform way of defining one.
+COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
+    return __truncXfYf2__(a);
+}
+
+COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
+    return __truncsfhf2(a);
+}
diff --git a/contrib/libs/cxxsupp/builtins/trunctfdf2.c b/contrib/libs/cxxsupp/builtins/trunctfdf2.c
index 3547234eff..741a71b33c 100644
--- a/contrib/libs/cxxsupp/builtins/trunctfdf2.c
+++ b/contrib/libs/cxxsupp/builtins/trunctfdf2.c
@@ -1,22 +1,22 @@
-//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-#define SRC_QUAD 
-#define DST_DOUBLE 
-#include "fp_trunc_impl.inc" 
- 
-COMPILER_RT_ABI double __trunctfdf2(long double a) { 
-    return __truncXfYf2__(a); 
-} 
- 
-#endif 
+//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_QUAD
+#define DST_DOUBLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI double __trunctfdf2(long double a) {
+    return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/trunctfsf2.c b/contrib/libs/cxxsupp/builtins/trunctfsf2.c
index 3f6636f19a..de96c1decf 100644
--- a/contrib/libs/cxxsupp/builtins/trunctfsf2.c
+++ b/contrib/libs/cxxsupp/builtins/trunctfsf2.c
@@ -1,22 +1,22 @@
-//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#define QUAD_PRECISION 
-#include "fp_lib.h" 
- 
-#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 
-#define SRC_QUAD 
-#define DST_SINGLE 
-#include "fp_trunc_impl.inc" 
- 
-COMPILER_RT_ABI float __trunctfsf2(long double a) { 
-    return __truncXfYf2__(a); 
-} 
- 
-#endif 
+//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_QUAD
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI float __trunctfsf2(long double a) {
+    return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/ucmpdi2.c b/contrib/libs/cxxsupp/builtins/ucmpdi2.c
index 5a57adb98c..40af23613b 100644
--- a/contrib/libs/cxxsupp/builtins/ucmpdi2.c
+++ b/contrib/libs/cxxsupp/builtins/ucmpdi2.c
@@ -1,51 +1,51 @@
-/* ===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ucmpdi2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns:  if (a <  b) returns 0 
- *           if (a == b) returns 1 
- *           if (a >  b) returns 2 
- */ 
- 
-COMPILER_RT_ABI si_int 
-__ucmpdi2(du_int a, du_int b) 
-{ 
-    udwords x; 
-    x.all = a; 
-    udwords y; 
-    y.all = b; 
-    if (x.s.high < y.s.high) 
-        return 0; 
-    if (x.s.high > y.s.high) 
-        return 2; 
-    if (x.s.low < y.s.low) 
-        return 0; 
-    if (x.s.low > y.s.low) 
-        return 2; 
-    return 1; 
-} 
- 
-#ifdef __ARM_EABI__ 
-/* Returns: if (a <  b) returns -1 
-*           if (a == b) returns  0 
-*           if (a >  b) returns  1 
-*/ 
-COMPILER_RT_ABI si_int 
-__aeabi_ulcmp(di_int a, di_int b) 
-{ 
-	return __ucmpdi2(a, b) - 1; 
-} 
-#endif 
- 
+/* ===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ucmpdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns:  if (a <  b) returns 0
+ *           if (a == b) returns 1
+ *           if (a >  b) returns 2
+ */
+
+COMPILER_RT_ABI si_int
+__ucmpdi2(du_int a, du_int b)
+{
+    udwords x;
+    x.all = a;
+    udwords y;
+    y.all = b;
+    if (x.s.high < y.s.high)
+        return 0;
+    if (x.s.high > y.s.high)
+        return 2;
+    if (x.s.low < y.s.low)
+        return 0;
+    if (x.s.low > y.s.low)
+        return 2;
+    return 1;
+}
+
+#ifdef __ARM_EABI__
+/* Returns: if (a <  b) returns -1
+*           if (a == b) returns  0
+*           if (a >  b) returns  1
+*/
+COMPILER_RT_ABI si_int
+__aeabi_ulcmp(di_int a, di_int b)
+{
+	return __ucmpdi2(a, b) - 1;
+}
+#endif
+
diff --git a/contrib/libs/cxxsupp/builtins/ucmpti2.c b/contrib/libs/cxxsupp/builtins/ucmpti2.c
index 797c62a8a7..bda8083bb2 100644
--- a/contrib/libs/cxxsupp/builtins/ucmpti2.c
+++ b/contrib/libs/cxxsupp/builtins/ucmpti2.c
@@ -1,42 +1,42 @@
-/* ===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __ucmpti2 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns:  if (a <  b) returns 0 
- *           if (a == b) returns 1 
- *           if (a >  b) returns 2 
- */ 
- 
-COMPILER_RT_ABI si_int 
-__ucmpti2(tu_int a, tu_int b) 
-{ 
-    utwords x; 
-    x.all = a; 
-    utwords y; 
-    y.all = b; 
-    if (x.s.high < y.s.high) 
-        return 0; 
-    if (x.s.high > y.s.high) 
-        return 2; 
-    if (x.s.low < y.s.low) 
-        return 0; 
-    if (x.s.low > y.s.low) 
-        return 2; 
-    return 1; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ucmpti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns:  if (a <  b) returns 0
+ *           if (a == b) returns 1
+ *           if (a >  b) returns 2
+ */
+
+COMPILER_RT_ABI si_int
+__ucmpti2(tu_int a, tu_int b)
+{
+    utwords x;
+    x.all = a;
+    utwords y;
+    y.all = b;
+    if (x.s.high < y.s.high)
+        return 0;
+    if (x.s.high > y.s.high)
+        return 2;
+    if (x.s.low < y.s.low)
+        return 0;
+    if (x.s.low > y.s.low)
+        return 2;
+    return 1;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/udivdi3.c b/contrib/libs/cxxsupp/builtins/udivdi3.c
index 1a1524479e..dc68e154b1 100644
--- a/contrib/libs/cxxsupp/builtins/udivdi3.c
+++ b/contrib/libs/cxxsupp/builtins/udivdi3.c
@@ -1,23 +1,23 @@
-/* ===-- udivdi3.c - Implement __udivdi3 -----------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __udivdi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b */ 
- 
-COMPILER_RT_ABI du_int 
-__udivdi3(du_int a, du_int b) 
-{ 
-    return __udivmoddi4(a, b, 0); 
-} 
+/* ===-- udivdi3.c - Implement __udivdi3 -----------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI du_int
+__udivdi3(du_int a, du_int b)
+{
+    return __udivmoddi4(a, b, 0);
+}
diff --git a/contrib/libs/cxxsupp/builtins/udivmoddi4.c b/contrib/libs/cxxsupp/builtins/udivmoddi4.c
index 606c43e509..0c8b4ff464 100644
--- a/contrib/libs/cxxsupp/builtins/udivmoddi4.c
+++ b/contrib/libs/cxxsupp/builtins/udivmoddi4.c
@@ -1,231 +1,231 @@
-/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __udivmoddi4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Effects: if rem != 0, *rem = a % b 
- * Returns: a / b 
- */ 
- 
-/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 
- 
-COMPILER_RT_ABI du_int 
-__udivmoddi4(du_int a, du_int b, du_int* rem) 
-{ 
-    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 
-    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 
-    udwords n; 
-    n.all = a; 
-    udwords d; 
-    d.all = b; 
-    udwords q; 
-    udwords r; 
-    unsigned sr; 
-    /* special cases, X is unknown, K != 0 */ 
-    if (n.s.high == 0) 
-    { 
-        if (d.s.high == 0) 
-        { 
-            /* 0 X 
-             * --- 
-             * 0 X 
+/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmoddi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Effects: if rem != 0, *rem = a % b
+ * Returns: a / b
+ */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+COMPILER_RT_ABI du_int
+__udivmoddi4(du_int a, du_int b, du_int* rem)
+{
+    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+    udwords n;
+    n.all = a;
+    udwords d;
+    d.all = b;
+    udwords q;
+    udwords r;
+    unsigned sr;
+    /* special cases, X is unknown, K != 0 */
+    if (n.s.high == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* 0 X
+             * ---
+             * 0 X
+             */
+            if (rem)
+                *rem = n.s.low % d.s.low;
+            return n.s.low / d.s.low;
+        }
+        /* 0 X
+         * ---
+         * K X
+         */
+        if (rem)
+            *rem = n.s.low;
+        return 0;
+    }
+    /* n.s.high != 0 */
+    if (d.s.low == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 0
              */ 
-            if (rem) 
-                *rem = n.s.low % d.s.low; 
-            return n.s.low / d.s.low; 
-        } 
-        /* 0 X 
-         * --- 
-         * K X 
-         */ 
-        if (rem) 
-            *rem = n.s.low; 
-        return 0; 
-    } 
-    /* n.s.high != 0 */ 
-    if (d.s.low == 0) 
-    { 
-        if (d.s.high == 0) 
-        { 
-            /* K X 
-             * --- 
-             * 0 0 
-             */  
-            if (rem) 
-                *rem = n.s.high % d.s.low; 
-            return n.s.high / d.s.low; 
-        } 
-        /* d.s.high != 0 */ 
-        if (n.s.low == 0) 
-        { 
-            /* K 0 
-             * --- 
-             * K 0 
-             */ 
-            if (rem) 
-            { 
-                r.s.high = n.s.high % d.s.high; 
-                r.s.low = 0; 
-                *rem = r.all; 
-            } 
-            return n.s.high / d.s.high; 
-        } 
-        /* K K 
-         * --- 
-         * K 0 
-         */ 
-        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */ 
-        { 
-            if (rem) 
-            { 
-                r.s.low = n.s.low; 
-                r.s.high = n.s.high & (d.s.high - 1); 
-                *rem = r.all; 
-            } 
-            return n.s.high >> __builtin_ctz(d.s.high); 
-        } 
-        /* K K 
-         * --- 
-         * K 0 
-         */ 
-        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 
-        /* 0 <= sr <= n_uword_bits - 2 or sr large */ 
-        if (sr > n_uword_bits - 2) 
-        { 
-           if (rem) 
-                *rem = n.all; 
-            return 0; 
-        } 
-        ++sr; 
-        /* 1 <= sr <= n_uword_bits - 1 */ 
-        /* q.all = n.all << (n_udword_bits - sr); */ 
-        q.s.low = 0; 
-        q.s.high = n.s.low << (n_uword_bits - sr); 
-        /* r.all = n.all >> sr; */ 
-        r.s.high = n.s.high >> sr; 
-        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 
-    } 
-    else  /* d.s.low != 0 */ 
-    { 
-        if (d.s.high == 0) 
-        { 
-            /* K X 
-             * --- 
-             * 0 K 
-             */ 
-            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */ 
-            { 
-                if (rem) 
-                    *rem = n.s.low & (d.s.low - 1); 
-                if (d.s.low == 1) 
-                    return n.all; 
-                sr = __builtin_ctz(d.s.low); 
-                q.s.high = n.s.high >> sr; 
-                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 
-                return q.all; 
-            } 
-            /* K X 
-             * --- 
-             * 0 K 
-             */ 
-            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 
-            /* 2 <= sr <= n_udword_bits - 1 
-             * q.all = n.all << (n_udword_bits - sr); 
-             * r.all = n.all >> sr; 
-             */ 
-            if (sr == n_uword_bits) 
-            { 
-                q.s.low = 0; 
-                q.s.high = n.s.low; 
-                r.s.high = 0; 
-                r.s.low = n.s.high; 
-            } 
-            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1 
-            { 
-                q.s.low = 0; 
-                q.s.high = n.s.low << (n_uword_bits - sr); 
-                r.s.high = n.s.high >> sr; 
-                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 
-            } 
-            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1 
-            { 
-                q.s.low = n.s.low << (n_udword_bits - sr); 
-                q.s.high = (n.s.high << (n_udword_bits - sr)) | 
-                           (n.s.low >> (sr - n_uword_bits)); 
-                r.s.high = 0; 
-                r.s.low = n.s.high >> (sr - n_uword_bits); 
-            } 
-        } 
-        else 
-        { 
-            /* K X 
-             * --- 
-             * K K 
-             */ 
-            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 
-            /* 0 <= sr <= n_uword_bits - 1 or sr large */ 
-            if (sr > n_uword_bits - 1) 
-            { 
-                if (rem) 
-                    *rem = n.all; 
-                return 0; 
-            } 
-            ++sr; 
-            /* 1 <= sr <= n_uword_bits */ 
-            /*  q.all = n.all << (n_udword_bits - sr); */ 
-            q.s.low = 0; 
-            if (sr == n_uword_bits) 
-            { 
-                q.s.high = n.s.low; 
-                r.s.high = 0; 
-                r.s.low = n.s.high; 
-            } 
-            else 
-            { 
-                q.s.high = n.s.low << (n_uword_bits - sr); 
-                r.s.high = n.s.high >> sr; 
-                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 
-            } 
-        } 
-    } 
-    /* Not a special case 
-     * q and r are initialized with: 
-     * q.all = n.all << (n_udword_bits - sr); 
-     * r.all = n.all >> sr; 
-     * 1 <= sr <= n_udword_bits - 1 
-     */ 
-    su_int carry = 0; 
-    for (; sr > 0; --sr) 
-    { 
-        /* r:q = ((r:q)  << 1) | carry */ 
-        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1)); 
-        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1)); 
-        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1)); 
-        q.s.low  = (q.s.low  << 1) | carry; 
-        /* carry = 0; 
-         * if (r.all >= d.all) 
-         * { 
-         *      r.all -= d.all; 
-         *      carry = 1; 
-         * } 
-         */ 
-        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 
-        carry = s & 1; 
-        r.all -= d.all & s; 
-    } 
-    q.all = (q.all << 1) | carry; 
-    if (rem) 
-        *rem = r.all; 
-    return q.all; 
-} 
+            if (rem)
+                *rem = n.s.high % d.s.low;
+            return n.s.high / d.s.low;
+        }
+        /* d.s.high != 0 */
+        if (n.s.low == 0)
+        {
+            /* K 0
+             * ---
+             * K 0
+             */
+            if (rem)
+            {
+                r.s.high = n.s.high % d.s.high;
+                r.s.low = 0;
+                *rem = r.all;
+            }
+            return n.s.high / d.s.high;
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
+        {
+            if (rem)
+            {
+                r.s.low = n.s.low;
+                r.s.high = n.s.high & (d.s.high - 1);
+                *rem = r.all;
+            }
+            return n.s.high >> __builtin_ctz(d.s.high);
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+        /* 0 <= sr <= n_uword_bits - 2 or sr large */
+        if (sr > n_uword_bits - 2)
+        {
+           if (rem)
+                *rem = n.all;
+            return 0;
+        }
+        ++sr;
+        /* 1 <= sr <= n_uword_bits - 1 */
+        /* q.all = n.all << (n_udword_bits - sr); */
+        q.s.low = 0;
+        q.s.high = n.s.low << (n_uword_bits - sr);
+        /* r.all = n.all >> sr; */
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+    }
+    else  /* d.s.low != 0 */
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 K
+             */
+            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
+            {
+                if (rem)
+                    *rem = n.s.low & (d.s.low - 1);
+                if (d.s.low == 1)
+                    return n.all;
+                sr = __builtin_ctz(d.s.low);
+                q.s.high = n.s.high >> sr;
+                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+                return q.all;
+            }
+            /* K X
+             * ---
+             * 0 K
+             */
+            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
+            /* 2 <= sr <= n_udword_bits - 1
+             * q.all = n.all << (n_udword_bits - sr);
+             * r.all = n.all >> sr;
+             */
+            if (sr == n_uword_bits)
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low << (n_uword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+            }
+            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
+            {
+                q.s.low = n.s.low << (n_udword_bits - sr);
+                q.s.high = (n.s.high << (n_udword_bits - sr)) |
+                           (n.s.low >> (sr - n_uword_bits));
+                r.s.high = 0;
+                r.s.low = n.s.high >> (sr - n_uword_bits);
+            }
+        }
+        else
+        {
+            /* K X
+             * ---
+             * K K
+             */
+            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+            /* 0 <= sr <= n_uword_bits - 1 or sr large */
+            if (sr > n_uword_bits - 1)
+            {
+                if (rem)
+                    *rem = n.all;
+                return 0;
+            }
+            ++sr;
+            /* 1 <= sr <= n_uword_bits */
+            /*  q.all = n.all << (n_udword_bits - sr); */
+            q.s.low = 0;
+            if (sr == n_uword_bits)
+            {
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else
+            {
+                q.s.high = n.s.low << (n_uword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+            }
+        }
+    }
+    /* Not a special case
+     * q and r are initialized with:
+     * q.all = n.all << (n_udword_bits - sr);
+     * r.all = n.all >> sr;
+     * 1 <= sr <= n_udword_bits - 1
+     */
+    su_int carry = 0;
+    for (; sr > 0; --sr)
+    {
+        /* r:q = ((r:q)  << 1) | carry */
+        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
+        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
+        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
+        q.s.low  = (q.s.low  << 1) | carry;
+        /* carry = 0;
+         * if (r.all >= d.all)
+         * {
+         *      r.all -= d.all;
+         *      carry = 1;
+         * }
+         */
+        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
+        carry = s & 1;
+        r.all -= d.all & s;
+    }
+    q.all = (q.all << 1) | carry;
+    if (rem)
+        *rem = r.all;
+    return q.all;
+}
diff --git a/contrib/libs/cxxsupp/builtins/udivmodsi4.c b/contrib/libs/cxxsupp/builtins/udivmodsi4.c
index 67fab7f36f..789c4b5061 100644
--- a/contrib/libs/cxxsupp/builtins/udivmodsi4.c
+++ b/contrib/libs/cxxsupp/builtins/udivmodsi4.c
@@ -1,27 +1,27 @@
-/*===-- udivmodsi4.c - Implement __udivmodsi4 ------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __udivmodsi4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b, *rem = a % b  */ 
- 
-COMPILER_RT_ABI su_int 
-__udivmodsi4(su_int a, su_int b, su_int* rem) 
-{ 
-  si_int d = __udivsi3(a,b); 
-  *rem = a - (d*b); 
-  return d; 
-} 
- 
- 
+/*===-- udivmodsi4.c - Implement __udivmodsi4 ------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmodsi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b, *rem = a % b  */
+
+COMPILER_RT_ABI su_int
+__udivmodsi4(su_int a, su_int b, su_int* rem)
+{
+  si_int d = __udivsi3(a,b);
+  *rem = a - (d*b);
+  return d;
+}
+
+
diff --git a/contrib/libs/cxxsupp/builtins/udivmodti4.c b/contrib/libs/cxxsupp/builtins/udivmodti4.c
index 2d221f8581..803168849c 100644
--- a/contrib/libs/cxxsupp/builtins/udivmodti4.c
+++ b/contrib/libs/cxxsupp/builtins/udivmodti4.c
@@ -1,238 +1,238 @@
-/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __udivmodti4 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */  
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Effects: if rem != 0, *rem = a % b  
- * Returns: a / b  
+/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmodti4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
  */ 
- 
-/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 
- 
-COMPILER_RT_ABI tu_int 
-__udivmodti4(tu_int a, tu_int b, tu_int* rem) 
-{ 
-    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 
-    const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; 
-    utwords n; 
-    n.all = a; 
-    utwords d; 
-    d.all = b; 
-    utwords q; 
-    utwords r; 
-    unsigned sr; 
-    /* special cases, X is unknown, K != 0 */ 
-    if (n.s.high == 0) 
-    { 
-        if (d.s.high == 0) 
-        { 
-            /* 0 X 
-             * --- 
-             * 0 X 
-             */ 
-            if (rem) 
-                *rem = n.s.low % d.s.low; 
-            return n.s.low / d.s.low; 
-        } 
-        /* 0 X 
-         * --- 
-         * K X 
-         */ 
-        if (rem) 
-            *rem = n.s.low; 
-        return 0; 
-    } 
-    /* n.s.high != 0 */ 
-    if (d.s.low == 0) 
-    { 
-        if (d.s.high == 0) 
-        { 
-            /* K X 
-             * --- 
-             * 0 0 
-             */ 
-            if (rem) 
-                *rem = n.s.high % d.s.low; 
-            return n.s.high / d.s.low; 
-        } 
-        /* d.s.high != 0 */ 
-        if (n.s.low == 0) 
-        { 
-            /* K 0 
-             * --- 
-             * K 0 
-             */ 
-            if (rem) 
-            { 
-                r.s.high = n.s.high % d.s.high; 
-                r.s.low = 0; 
-                *rem = r.all; 
-            } 
-            return n.s.high / d.s.high; 
-        } 
-        /* K K 
-         * --- 
-         * K 0 
-         */ 
-        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */ 
-        { 
-            if (rem) 
-            { 
-                r.s.low = n.s.low; 
-                r.s.high = n.s.high & (d.s.high - 1); 
-                *rem = r.all; 
-            } 
-            return n.s.high >> __builtin_ctzll(d.s.high); 
-        } 
-        /* K K 
-         * --- 
-         * K 0 
-         */ 
-        sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 
-        /* 0 <= sr <= n_udword_bits - 2 or sr large */ 
-        if (sr > n_udword_bits - 2) 
-        { 
-           if (rem) 
-                *rem = n.all; 
-            return 0; 
-        } 
-        ++sr; 
-        /* 1 <= sr <= n_udword_bits - 1 */ 
-        /* q.all = n.all << (n_utword_bits - sr); */ 
-        q.s.low = 0; 
-        q.s.high = n.s.low << (n_udword_bits - sr); 
-        /* r.all = n.all >> sr; */ 
-        r.s.high = n.s.high >> sr; 
-        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 
-    } 
-    else  /* d.s.low != 0 */ 
-    { 
-        if (d.s.high == 0) 
-        { 
-            /* K X 
-             * --- 
-             * 0 K 
-             */ 
-            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */ 
-            { 
-                if (rem) 
-                    *rem = n.s.low & (d.s.low - 1); 
-                if (d.s.low == 1) 
-                    return n.all; 
-                sr = __builtin_ctzll(d.s.low); 
-                q.s.high = n.s.high >> sr; 
-                q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 
-                return q.all; 
-            } 
-            /* K X 
-             * --- 
-             * 0 K 
-             */ 
-            sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) 
-                                   - __builtin_clzll(n.s.high); 
-            /* 2 <= sr <= n_utword_bits - 1 
-             * q.all = n.all << (n_utword_bits - sr); 
-             * r.all = n.all >> sr; 
-             */ 
-            if (sr == n_udword_bits) 
-            { 
-                q.s.low = 0; 
-                q.s.high = n.s.low; 
-                r.s.high = 0; 
-                r.s.low = n.s.high; 
-            } 
-            else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1 
-            { 
-                q.s.low = 0; 
-                q.s.high = n.s.low << (n_udword_bits - sr); 
-                r.s.high = n.s.high >> sr; 
-                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 
-            } 
-            else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1 
-            { 
-                q.s.low = n.s.low << (n_utword_bits - sr); 
-                q.s.high = (n.s.high << (n_utword_bits - sr)) | 
-                           (n.s.low >> (sr - n_udword_bits)); 
-                r.s.high = 0; 
-                r.s.low = n.s.high >> (sr - n_udword_bits); 
-            } 
-        } 
-        else 
-        { 
-            /* K X 
-             * --- 
-             * K K 
-             */ 
-            sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 
-            /*0 <= sr <= n_udword_bits - 1 or sr large */ 
-            if (sr > n_udword_bits - 1) 
-            { 
-               if (rem) 
-                    *rem = n.all; 
-                return 0; 
-            } 
-            ++sr; 
-            /* 1 <= sr <= n_udword_bits 
-             * q.all = n.all << (n_utword_bits - sr); 
-             * r.all = n.all >> sr; 
-             */ 
-            q.s.low = 0; 
-            if (sr == n_udword_bits) 
-            { 
-                q.s.high = n.s.low; 
-                r.s.high = 0; 
-                r.s.low = n.s.high; 
-            } 
-            else 
-            { 
-                r.s.high = n.s.high >> sr; 
-                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 
-                q.s.high = n.s.low << (n_udword_bits - sr); 
-            } 
-        } 
-    } 
-    /* Not a special case 
-     * q and r are initialized with: 
-     * q.all = n.all << (n_utword_bits - sr); 
-     * r.all = n.all >> sr; 
-     * 1 <= sr <= n_utword_bits - 1 
-     */ 
-    su_int carry = 0; 
-    for (; sr > 0; --sr) 
-    { 
-        /* r:q = ((r:q)  << 1) | carry */ 
-        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1)); 
-        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1)); 
-        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1)); 
-        q.s.low  = (q.s.low  << 1) | carry; 
-        /* carry = 0; 
-         * if (r.all >= d.all) 
-         * { 
-         *     r.all -= d.all; 
-         *      carry = 1; 
-         * } 
-         */ 
-        const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); 
-        carry = s & 1; 
-        r.all -= d.all & s; 
-    } 
-    q.all = (q.all << 1) | carry; 
-    if (rem) 
-        *rem = r.all; 
-    return q.all; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Effects: if rem != 0, *rem = a % b 
+ * Returns: a / b 
+ */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+COMPILER_RT_ABI tu_int
+__udivmodti4(tu_int a, tu_int b, tu_int* rem)
+{
+    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+    const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+    utwords n;
+    n.all = a;
+    utwords d;
+    d.all = b;
+    utwords q;
+    utwords r;
+    unsigned sr;
+    /* special cases, X is unknown, K != 0 */
+    if (n.s.high == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* 0 X
+             * ---
+             * 0 X
+             */
+            if (rem)
+                *rem = n.s.low % d.s.low;
+            return n.s.low / d.s.low;
+        }
+        /* 0 X
+         * ---
+         * K X
+         */
+        if (rem)
+            *rem = n.s.low;
+        return 0;
+    }
+    /* n.s.high != 0 */
+    if (d.s.low == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 0
+             */
+            if (rem)
+                *rem = n.s.high % d.s.low;
+            return n.s.high / d.s.low;
+        }
+        /* d.s.high != 0 */
+        if (n.s.low == 0)
+        {
+            /* K 0
+             * ---
+             * K 0
+             */
+            if (rem)
+            {
+                r.s.high = n.s.high % d.s.high;
+                r.s.low = 0;
+                *rem = r.all;
+            }
+            return n.s.high / d.s.high;
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
+        {
+            if (rem)
+            {
+                r.s.low = n.s.low;
+                r.s.high = n.s.high & (d.s.high - 1);
+                *rem = r.all;
+            }
+            return n.s.high >> __builtin_ctzll(d.s.high);
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
+        /* 0 <= sr <= n_udword_bits - 2 or sr large */
+        if (sr > n_udword_bits - 2)
+        {
+           if (rem)
+                *rem = n.all;
+            return 0;
+        }
+        ++sr;
+        /* 1 <= sr <= n_udword_bits - 1 */
+        /* q.all = n.all << (n_utword_bits - sr); */
+        q.s.low = 0;
+        q.s.high = n.s.low << (n_udword_bits - sr);
+        /* r.all = n.all >> sr; */
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+    }
+    else  /* d.s.low != 0 */
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 K
+             */
+            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
+            {
+                if (rem)
+                    *rem = n.s.low & (d.s.low - 1);
+                if (d.s.low == 1)
+                    return n.all;
+                sr = __builtin_ctzll(d.s.low);
+                q.s.high = n.s.high >> sr;
+                q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+                return q.all;
+            }
+            /* K X
+             * ---
+             * 0 K
+             */
+            sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
+                                   - __builtin_clzll(n.s.high);
+            /* 2 <= sr <= n_utword_bits - 1
+             * q.all = n.all << (n_utword_bits - sr);
+             * r.all = n.all >> sr;
+             */
+            if (sr == n_udword_bits)
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low << (n_udword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+            }
+            else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
+            {
+                q.s.low = n.s.low << (n_utword_bits - sr);
+                q.s.high = (n.s.high << (n_utword_bits - sr)) |
+                           (n.s.low >> (sr - n_udword_bits));
+                r.s.high = 0;
+                r.s.low = n.s.high >> (sr - n_udword_bits);
+            }
+        }
+        else
+        {
+            /* K X
+             * ---
+             * K K
+             */
+            sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
+            /*0 <= sr <= n_udword_bits - 1 or sr large */
+            if (sr > n_udword_bits - 1)
+            {
+               if (rem)
+                    *rem = n.all;
+                return 0;
+            }
+            ++sr;
+            /* 1 <= sr <= n_udword_bits
+             * q.all = n.all << (n_utword_bits - sr);
+             * r.all = n.all >> sr;
+             */
+            q.s.low = 0;
+            if (sr == n_udword_bits)
+            {
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else
+            {
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+                q.s.high = n.s.low << (n_udword_bits - sr);
+            }
+        }
+    }
+    /* Not a special case
+     * q and r are initialized with:
+     * q.all = n.all << (n_utword_bits - sr);
+     * r.all = n.all >> sr;
+     * 1 <= sr <= n_utword_bits - 1
+     */
+    su_int carry = 0;
+    for (; sr > 0; --sr)
+    {
+        /* r:q = ((r:q)  << 1) | carry */
+        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
+        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
+        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
+        q.s.low  = (q.s.low  << 1) | carry;
+        /* carry = 0;
+         * if (r.all >= d.all)
+         * {
+         *     r.all -= d.all;
+         *      carry = 1;
+         * }
+         */
+        const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
+        carry = s & 1;
+        r.all -= d.all & s;
+    }
+    q.all = (q.all << 1) | carry;
+    if (rem)
+        *rem = r.all;
+    return q.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/udivsi3.c b/contrib/libs/cxxsupp/builtins/udivsi3.c
index 24c806c806..5d0140cc3e 100644
--- a/contrib/libs/cxxsupp/builtins/udivsi3.c
+++ b/contrib/libs/cxxsupp/builtins/udivsi3.c
@@ -1,66 +1,66 @@
-/* ===-- udivsi3.c - Implement __udivsi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __udivsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a / b */ 
- 
-/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 
- 
-ARM_EABI_FNALIAS(uidiv, udivsi3) 
- 
-/* This function should not call __divsi3! */ 
-COMPILER_RT_ABI su_int 
-__udivsi3(su_int n, su_int d) 
-{ 
-    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 
-    su_int q; 
-    su_int r; 
-    unsigned sr; 
-    /* special cases */ 
-    if (d == 0) 
-        return 0; /* ?! */ 
-    if (n == 0) 
-        return 0; 
-    sr = __builtin_clz(d) - __builtin_clz(n); 
-    /* 0 <= sr <= n_uword_bits - 1 or sr large */ 
-    if (sr > n_uword_bits - 1)  /* d > r */ 
-        return 0; 
-    if (sr == n_uword_bits - 1)  /* d == 1 */ 
-        return n; 
-    ++sr; 
-    /* 1 <= sr <= n_uword_bits - 1 */ 
-    /* Not a special case */ 
-    q = n << (n_uword_bits - sr); 
-    r = n >> sr; 
-    su_int carry = 0; 
-    for (; sr > 0; --sr) 
-    { 
-        /* r:q = ((r:q)  << 1) | carry */ 
-        r = (r << 1) | (q >> (n_uword_bits - 1)); 
-        q = (q << 1) | carry; 
-        /* carry = 0; 
-         * if (r.all >= d.all) 
-         * { 
-         *      r.all -= d.all; 
-         *      carry = 1; 
-         * } 
-         */ 
-        const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); 
-        carry = s & 1; 
-        r -= d & s; 
-    } 
-    q = (q << 1) | carry; 
-    return q; 
-} 
+/* ===-- udivsi3.c - Implement __udivsi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+ARM_EABI_FNALIAS(uidiv, udivsi3)
+
+/* This function should not call __divsi3! */
+COMPILER_RT_ABI su_int
+__udivsi3(su_int n, su_int d)
+{
+    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+    su_int q;
+    su_int r;
+    unsigned sr;
+    /* special cases */
+    if (d == 0)
+        return 0; /* ?! */
+    if (n == 0)
+        return 0;
+    sr = __builtin_clz(d) - __builtin_clz(n);
+    /* 0 <= sr <= n_uword_bits - 1 or sr large */
+    if (sr > n_uword_bits - 1)  /* d > r */
+        return 0;
+    if (sr == n_uword_bits - 1)  /* d == 1 */
+        return n;
+    ++sr;
+    /* 1 <= sr <= n_uword_bits - 1 */
+    /* Not a special case */
+    q = n << (n_uword_bits - sr);
+    r = n >> sr;
+    su_int carry = 0;
+    for (; sr > 0; --sr)
+    {
+        /* r:q = ((r:q)  << 1) | carry */
+        r = (r << 1) | (q >> (n_uword_bits - 1));
+        q = (q << 1) | carry;
+        /* carry = 0;
+         * if (r.all >= d.all)
+         * {
+         *      r.all -= d.all;
+         *      carry = 1;
+         * }
+         */
+        const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1);
+        carry = s & 1;
+        r -= d & s;
+    }
+    q = (q << 1) | carry;
+    return q;
+}
diff --git a/contrib/libs/cxxsupp/builtins/udivti3.c b/contrib/libs/cxxsupp/builtins/udivti3.c
index fcc96af049..ec94673e25 100644
--- a/contrib/libs/cxxsupp/builtins/udivti3.c
+++ b/contrib/libs/cxxsupp/builtins/udivti3.c
@@ -1,27 +1,27 @@
-/* ===-- udivti3.c - Implement __udivti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __udivti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a / b */ 
- 
-COMPILER_RT_ABI tu_int 
-__udivti3(tu_int a, tu_int b) 
-{ 
-    return __udivmodti4(a, b, 0); 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- udivti3.c - Implement __udivti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI tu_int
+__udivti3(tu_int a, tu_int b)
+{
+    return __udivmodti4(a, b, 0);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/umoddi3.c b/contrib/libs/cxxsupp/builtins/umoddi3.c
index 20b1c4ef7b..d513f080a1 100644
--- a/contrib/libs/cxxsupp/builtins/umoddi3.c
+++ b/contrib/libs/cxxsupp/builtins/umoddi3.c
@@ -1,25 +1,25 @@
-/* ===-- umoddi3.c - Implement __umoddi3 -----------------------------------=== 
- * 
- *                    The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __umoddi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a % b */ 
- 
-COMPILER_RT_ABI du_int 
-__umoddi3(du_int a, du_int b) 
-{ 
-    du_int r; 
-    __udivmoddi4(a, b, &r); 
-    return r; 
-} 
+/* ===-- umoddi3.c - Implement __umoddi3 -----------------------------------===
+ *
+ *                    The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __umoddi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI du_int
+__umoddi3(du_int a, du_int b)
+{
+    du_int r;
+    __udivmoddi4(a, b, &r);
+    return r;
+}
diff --git a/contrib/libs/cxxsupp/builtins/umodsi3.c b/contrib/libs/cxxsupp/builtins/umodsi3.c
index 5218509828..d5fda4a6af 100644
--- a/contrib/libs/cxxsupp/builtins/umodsi3.c
+++ b/contrib/libs/cxxsupp/builtins/umodsi3.c
@@ -1,23 +1,23 @@
-/* ===-- umodsi3.c - Implement __umodsi3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __umodsi3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-/* Returns: a % b */ 
- 
-COMPILER_RT_ABI su_int 
-__umodsi3(su_int a, su_int b) 
-{ 
-    return a - __udivsi3(a, b) * b; 
-} 
+/* ===-- umodsi3.c - Implement __umodsi3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __umodsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI su_int
+__umodsi3(su_int a, su_int b)
+{
+    return a - __udivsi3(a, b) * b;
+}
diff --git a/contrib/libs/cxxsupp/builtins/umodti3.c b/contrib/libs/cxxsupp/builtins/umodti3.c
index 166fdf5394..6d1ca7a8cf 100644
--- a/contrib/libs/cxxsupp/builtins/umodti3.c
+++ b/contrib/libs/cxxsupp/builtins/umodti3.c
@@ -1,29 +1,29 @@
-/* ===-- umodti3.c - Implement __umodti3 -----------------------------------=== 
- * 
- *                     The LLVM Compiler Infrastructure 
- * 
- * This file is dual licensed under the MIT and the University of Illinois Open 
- * Source Licenses. See LICENSE.TXT for details. 
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file implements __umodti3 for the compiler_rt library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#include "int_lib.h" 
- 
-#ifdef CRT_HAS_128BIT 
- 
-/* Returns: a % b */ 
- 
-COMPILER_RT_ABI tu_int 
-__umodti3(tu_int a, tu_int b) 
-{ 
-    tu_int r; 
-    __udivmodti4(a, b, &r); 
-    return r; 
-} 
- 
-#endif /* CRT_HAS_128BIT */ 
+/* ===-- umodti3.c - Implement __umodti3 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __umodti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI tu_int
+__umodti3(tu_int a, tu_int b)
+{
+    tu_int r;
+    __udivmodti4(a, b, &r);
+    return r;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk b/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk
index 60219c0bc1..83848dddd9 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk
+++ b/contrib/libs/cxxsupp/builtins/x86_64/Makefile.mk
@@ -1,20 +1,20 @@
-#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===# 
-# 
-#                     The LLVM Compiler Infrastructure 
-# 
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details. 
-# 
-#===------------------------------------------------------------------------===# 
- 
-ModuleName := builtins 
-SubDirs :=  
-OnlyArchs := x86_64 x86_64h 
- 
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) 
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) 
-Implementation := Optimized 
- 
-# FIXME: use automatic dependencies? 
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h) 
+#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := x86_64 x86_64h
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S b/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S
index de315176c1..4149ac63d9 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S
+++ b/contrib/libs/cxxsupp/builtins/x86_64/chkstk.S
@@ -1,39 +1,39 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// _chkstk routine 
-// This routine is windows specific 
-// http://msdn.microsoft.com/en-us/library/ms648426.aspx 
- 
-// Notes from r227519 
-// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp 
-// themselves. It also does not clobber %rax so we can reuse it when 
-// adjusting %rsp. 
- 
-#ifdef __x86_64__ 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(___chkstk_ms) 
-        push   %rcx 
-        push   %rax 
-        cmp    $0x1000,%rax 
-        lea    24(%rsp),%rcx 
-        jb     1f 
-2: 
-        sub    $0x1000,%rcx 
-        test   %rcx,(%rcx) 
-        sub    $0x1000,%rax 
-        cmp    $0x1000,%rax 
-        ja     2b 
-1: 
-        sub    %rax,%rcx 
-        test   %rcx,(%rcx) 
-        pop    %rax 
-        pop    %rcx 
-        ret 
-END_COMPILERRT_FUNCTION(___chkstk_ms) 
- 
-#endif // __x86_64__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// _chkstk routine
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// Notes from r227519
+// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp
+// themselves. It also does not clobber %rax so we can reuse it when
+// adjusting %rsp.
+
+#ifdef __x86_64__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(___chkstk_ms)
+        push   %rcx
+        push   %rax
+        cmp    $0x1000,%rax
+        lea    24(%rsp),%rcx
+        jb     1f
+2:
+        sub    $0x1000,%rcx
+        test   %rcx,(%rcx)
+        sub    $0x1000,%rax
+        cmp    $0x1000,%rax
+        ja     2b
+1:
+        sub    %rax,%rcx
+        test   %rcx,(%rcx)
+        pop    %rax
+        pop    %rcx
+        ret
+END_COMPILERRT_FUNCTION(___chkstk_ms)
+
+#endif // __x86_64__
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S b/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S
index 24f4ab1727..ac1eb920e0 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S
+++ b/contrib/libs/cxxsupp/builtins/x86_64/chkstk2.S
@@ -1,42 +1,42 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-#ifdef __x86_64__ 
- 
-// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments, 
-// then decrement %rsp by %rax.  Preserves all registers except %rsp and flags. 
-// This routine is windows specific 
-// http://msdn.microsoft.com/en-us/library/ms648426.aspx 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__alloca) 
-        mov    %rcx,%rax        // x64 _alloca is a normal function with parameter in rcx 
-        // fallthrough 
-DEFINE_COMPILERRT_FUNCTION(___chkstk) 
-        push   %rcx 
-        cmp    $0x1000,%rax 
-        lea    16(%rsp),%rcx     // rsp before calling this routine -> rcx 
-        jb     1f 
-2: 
-        sub    $0x1000,%rcx 
-        test   %rcx,(%rcx) 
-        sub    $0x1000,%rax 
-        cmp    $0x1000,%rax 
-        ja     2b 
-1: 
-        sub    %rax,%rcx 
-        test   %rcx,(%rcx) 
- 
-        lea    8(%rsp),%rax     // load pointer to the return address into rax 
-        mov    %rcx,%rsp        // install the new top of stack pointer into rsp 
-        mov    -8(%rax),%rcx    // restore rcx 
-        push   (%rax)           // push return address onto the stack 
-        sub    %rsp,%rax        // restore the original value in rax 
-        ret 
-END_COMPILERRT_FUNCTION(___chkstk) 
-END_COMPILERRT_FUNCTION(__alloca) 
- 
-#endif // __x86_64__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __x86_64__
+
+// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments,
+// then decrement %rsp by %rax.  Preserves all registers except %rsp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__alloca)
+        mov    %rcx,%rax        // x64 _alloca is a normal function with parameter in rcx
+        // fallthrough
+DEFINE_COMPILERRT_FUNCTION(___chkstk)
+        push   %rcx
+        cmp    $0x1000,%rax
+        lea    16(%rsp),%rcx     // rsp before calling this routine -> rcx
+        jb     1f
+2:
+        sub    $0x1000,%rcx
+        test   %rcx,(%rcx)
+        sub    $0x1000,%rax
+        cmp    $0x1000,%rax
+        ja     2b
+1:
+        sub    %rax,%rcx
+        test   %rcx,(%rcx)
+
+        lea    8(%rsp),%rax     // load pointer to the return address into rax
+        mov    %rcx,%rsp        // install the new top of stack pointer into rsp
+        mov    -8(%rax),%rcx    // restore rcx
+        push   (%rax)           // push return address onto the stack
+        sub    %rsp,%rax        // restore the original value in rax
+        ret
+END_COMPILERRT_FUNCTION(___chkstk)
+END_COMPILERRT_FUNCTION(__alloca)
+
+#endif // __x86_64__
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c b/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c
index 6bf8e90b1c..388404e5e0 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c
+++ b/contrib/libs/cxxsupp/builtins/x86_64/floatdidf.c
@@ -1,16 +1,16 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* double __floatdidf(di_int a); */ 
- 
-#ifdef __x86_64__ 
- 
-#include "../int_lib.h" 
- 
-double __floatdidf(int64_t a) 
-{ 
-	return (double)a; 
-} 
- 
-#endif /* __x86_64__ */ 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* double __floatdidf(di_int a); */
+
+#ifdef __x86_64__
+
+#include "../int_lib.h"
+
+double __floatdidf(int64_t a)
+{
+	return (double)a;
+}
+
+#endif /* __x86_64__ */
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c b/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c
index 92fc82d2cb..96c3728e92 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c
+++ b/contrib/libs/cxxsupp/builtins/x86_64/floatdisf.c
@@ -1,14 +1,14 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-#ifdef __x86_64__ 
- 
-#include "../int_lib.h" 
- 
-float __floatdisf(int64_t a) 
-{ 
-	return (float)a; 
-} 
- 
-#endif /* __x86_64__ */ 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#ifdef __x86_64__
+
+#include "../int_lib.h"
+
+float __floatdisf(int64_t a)
+{
+	return (float)a;
+}
+
+#endif /* __x86_64__ */
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c b/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c
index 8d308e6fb0..c01193a82b 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c
+++ b/contrib/libs/cxxsupp/builtins/x86_64/floatdixf.c
@@ -1,16 +1,16 @@
-/* This file is distributed under the University of Illinois Open Source 
- * License. See LICENSE.TXT for details. 
- */ 
- 
-/* long double __floatdixf(di_int a); */ 
- 
-#ifdef __x86_64__ 
- 
-#include "../int_lib.h" 
- 
-long double __floatdixf(int64_t a) 
-{ 
-	return (long double)a; 
-} 
- 
-#endif /* __i386__ */ 
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __floatdixf(di_int a); */
+
+#ifdef __x86_64__
+
+#include "../int_lib.h"
+
+long double __floatdixf(int64_t a)
+{
+	return (long double)a;
+}
+
+#endif /* __i386__ */
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S b/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S
index 6b2f061391..3cd5d02a74 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S
+++ b/contrib/libs/cxxsupp/builtins/x86_64/floatundidf.S
@@ -1,49 +1,49 @@
-//===-- floatundidf.S - Implement __floatundidf for x86_64 ----------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements __floatundidf for the compiler_rt library. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "../assembly.h" 
- 
-// double __floatundidf(du_int a); 
- 
-#ifdef __x86_64__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-twop52: 
-	.quad 0x4330000000000000 
- 
-	.balign 16 
-twop84_plus_twop52: 
-	.quad 0x4530000000100000 
- 
-	.balign 16 
-twop84: 
-	.quad 0x4530000000000000 
- 
-#define REL_ADDR(_a)	(_a)(%rip) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundidf) 
-	movd	%edi,							%xmm0 // low 32 bits of a 
-	shrq	$32,							%rdi  // high 32 bits of a 
-	orq		REL_ADDR(twop84),				%rdi  // 0x1p84 + a_hi (no rounding occurs) 
-	orpd	REL_ADDR(twop52),				%xmm0 // 0x1p52 + a_lo (no rounding occurs) 
-	movd	%rdi,							%xmm1 
-	subsd	REL_ADDR(twop84_plus_twop52),	%xmm1 // a_hi - 0x1p52 (no rounding occurs) 
-	addsd	%xmm1,							%xmm0 // a_hi + a_lo   (round happens here) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundidf) 
- 
-#endif // __x86_64__ 
+//===-- floatundidf.S - Implement __floatundidf for x86_64 ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatundidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// double __floatundidf(du_int a);
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+	.balign 16
+twop52:
+	.quad 0x4330000000000000
+
+	.balign 16
+twop84_plus_twop52:
+	.quad 0x4530000000100000
+
+	.balign 16
+twop84:
+	.quad 0x4530000000000000
+
+#define REL_ADDR(_a)	(_a)(%rip)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundidf)
+	movd	%edi,							%xmm0 // low 32 bits of a
+	shrq	$32,							%rdi  // high 32 bits of a
+	orq		REL_ADDR(twop84),				%rdi  // 0x1p84 + a_hi (no rounding occurs)
+	orpd	REL_ADDR(twop52),				%xmm0 // 0x1p52 + a_lo (no rounding occurs)
+	movd	%rdi,							%xmm1
+	subsd	REL_ADDR(twop84_plus_twop52),	%xmm1 // a_hi - 0x1p52 (no rounding occurs)
+	addsd	%xmm1,							%xmm0 // a_hi + a_lo   (round happens here)
+	ret
+END_COMPILERRT_FUNCTION(__floatundidf)
+
+#endif // __x86_64__
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S b/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S
index ad45327114..61952f4047 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S
+++ b/contrib/libs/cxxsupp/builtins/x86_64/floatundisf.S
@@ -1,35 +1,35 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// float __floatundisf(du_int a); 
- 
-#ifdef __x86_64__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-two: 
-	.single 2.0 
- 
-#define REL_ADDR(_a)	(_a)(%rip) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundisf) 
-	movq		$1,			%rsi 
-	testq		%rdi,		%rdi 
-	js			1f 
-	cvtsi2ssq	%rdi,		%xmm0 
-	ret 
-	 
-1:	andq		%rdi,		%rsi 
-	shrq		%rdi 
-	orq			%rsi,		%rdi 
-	cvtsi2ssq	%rdi,		%xmm0 
-	mulss	REL_ADDR(two),	%xmm0 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundisf) 
- 
-#endif // __x86_64__ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatundisf(du_int a);
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+	.balign 16
+two:
+	.single 2.0
+
+#define REL_ADDR(_a)	(_a)(%rip)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundisf)
+	movq		$1,			%rsi
+	testq		%rdi,		%rdi
+	js			1f
+	cvtsi2ssq	%rdi,		%xmm0
+	ret
+	
+1:	andq		%rdi,		%rsi
+	shrq		%rdi
+	orq			%rsi,		%rdi
+	cvtsi2ssq	%rdi,		%xmm0
+	mulss	REL_ADDR(two),	%xmm0
+	ret
+END_COMPILERRT_FUNCTION(__floatundisf)
+
+#endif // __x86_64__
diff --git a/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S b/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S
index b3bac15b9e..92961c8911 100644
--- a/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S
+++ b/contrib/libs/cxxsupp/builtins/x86_64/floatundixf.S
@@ -1,68 +1,68 @@
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.TXT for details. 
- 
-#include "../assembly.h" 
- 
-// long double __floatundixf(du_int a); 
- 
-#ifdef __x86_64__ 
- 
-CONST_SECTION 
- 
-	.balign 16 
-twop64: 
-	.quad 0x43f0000000000000 
- 
-#define REL_ADDR(_a)	(_a)(%rip) 
- 
-	.text 
- 
-	.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundixf) 
-	movq	%rdi,	 -8(%rsp) 
-	fildq	-8(%rsp) 
-	test	%rdi,		%rdi 
-	js		1f 
-	ret 
-1:	faddl	REL_ADDR(twop64) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundixf) 
- 
-#endif // __x86_64__ 
- 
- 
-/* Branch-free implementation is ever so slightly slower, but more beautiful. 
-   It is likely superior for inlining, so I kept it around for future reference. 
- 
-#ifdef __x86_64__ 
- 
-CONST_SECTION 
- 
-	.balign 4 
-twop52: 
-	.quad 0x4330000000000000 
-twop84_plus_twop52_neg: 
-	.quad 0xc530000000100000 
-twop84: 
-	.quad 0x4530000000000000 
- 
-#define REL_ADDR(_a)	(_a)(%rip) 
- 
-.text 
-.balign 4 
-DEFINE_COMPILERRT_FUNCTION(__floatundixf) 
-	movl	%edi,				%esi			// low 32 bits of input 
-	shrq	$32,				%rdi			// hi 32 bits of input 
-	orq		REL_ADDR(twop84),	%rdi			// 2^84 + hi (as a double) 
-	orq		REL_ADDR(twop52),	%rsi			// 2^52 + lo (as a double) 
-	movq	%rdi,			 -8(%rsp) 
-	movq	%rsi,			-16(%rsp) 
-	fldl	REL_ADDR(twop84_plus_twop52_neg)	 
-	faddl	-8(%rsp)	// hi - 2^52 (as double extended, no rounding occurs) 
-	faddl	-16(%rsp)	// hi + lo (as double extended) 
-	ret 
-END_COMPILERRT_FUNCTION(__floatundixf) 
- 
-#endif // __x86_64__ 
- 
-*/ 
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// long double __floatundixf(du_int a);
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+	.balign 16
+twop64:
+	.quad 0x43f0000000000000
+
+#define REL_ADDR(_a)	(_a)(%rip)
+
+	.text
+
+	.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundixf)
+	movq	%rdi,	 -8(%rsp)
+	fildq	-8(%rsp)
+	test	%rdi,		%rdi
+	js		1f
+	ret
+1:	faddl	REL_ADDR(twop64)
+	ret
+END_COMPILERRT_FUNCTION(__floatundixf)
+
+#endif // __x86_64__
+
+
+/* Branch-free implementation is ever so slightly slower, but more beautiful.
+   It is likely superior for inlining, so I kept it around for future reference.
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+	.balign 4
+twop52:
+	.quad 0x4330000000000000
+twop84_plus_twop52_neg:
+	.quad 0xc530000000100000
+twop84:
+	.quad 0x4530000000000000
+
+#define REL_ADDR(_a)	(_a)(%rip)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundixf)
+	movl	%edi,				%esi			// low 32 bits of input
+	shrq	$32,				%rdi			// hi 32 bits of input
+	orq		REL_ADDR(twop84),	%rdi			// 2^84 + hi (as a double)
+	orq		REL_ADDR(twop52),	%rsi			// 2^52 + lo (as a double)
+	movq	%rdi,			 -8(%rsp)
+	movq	%rsi,			-16(%rsp)
+	fldl	REL_ADDR(twop84_plus_twop52_neg)	
+	faddl	-8(%rsp)	// hi - 2^52 (as double extended, no rounding occurs)
+	faddl	-16(%rsp)	// hi + lo (as double extended)
+	ret
+END_COMPILERRT_FUNCTION(__floatundixf)
+
+#endif // __x86_64__
+
+*/
diff --git a/contrib/libs/cxxsupp/builtins/ya.make b/contrib/libs/cxxsupp/builtins/ya.make
index bc4a815c9b..d2c319c927 100644
--- a/contrib/libs/cxxsupp/builtins/ya.make
+++ b/contrib/libs/cxxsupp/builtins/ya.make
@@ -1,5 +1,5 @@
-LIBRARY() 
- 
+LIBRARY()
+
 # Part of compiler-rt LLVM subproject
 
 # git repository: https://github.com/llvm/llvm-project.git
@@ -28,7 +28,7 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 # Check MUSL before NO_PLATFORM() disables it.
 IF (MUSL)
     # We use C headers despite NO_PLATFORM, but we do not propagate
@@ -43,14 +43,14 @@ IF (MUSL)
     )
 ENDIF()
 
-NO_UTIL() 
+NO_UTIL()
+
+NO_RUNTIME()
 
-NO_RUNTIME() 
+NO_PLATFORM()
 
-NO_PLATFORM() 
+NO_COMPILER_WARNINGS()
 
-NO_COMPILER_WARNINGS() 
- 
 IF (GCC OR CLANG)
     # Clang (maybe GCC too) LTO code generator leaves the builtin calls unresolved
     # even if they are available. After the code generation pass is done
@@ -61,7 +61,7 @@ IF (GCC OR CLANG)
     # Just generate native code from the beginning.
     DISABLE(USE_LTO)
 ENDIF()
- 
+
 SRCS(
     addtf3.c
     ashlti3.c
@@ -109,7 +109,7 @@ SRCS(
     udivti3.c
     umodti3.c
 )
- 
+
 IF (OS_DARWIN OR OS_IOS)
     SRCS(
         os_version_check.c
@@ -123,4 +123,4 @@ IF (ARCH_ARM)
     )
 ENDIF()
 
-END() 
+END()
diff --git a/contrib/libs/cxxsupp/libcxx/include/deque b/contrib/libs/cxxsupp/libcxx/include/deque
index da489aa457..67cd6654f1 100644
--- a/contrib/libs/cxxsupp/libcxx/include/deque
+++ b/contrib/libs/cxxsupp/libcxx/include/deque
@@ -280,9 +280,9 @@ move_backward(__deque_iterator<_V1, _P1, _R1, _M1, _D1, _B1> __f,
 
 template <class _ValueType, class _DiffType>
 struct __deque_block_size {
-  static const _DiffType __buf_size = 64 * sizeof(void*); 
-  static const _DiffType value = (__buf_size / sizeof(_ValueType)) > 2 ? (__buf_size / sizeof(_ValueType)) : 2; 
-  //static const _DiffType value = sizeof(_ValueType) < 256 ? 4096 / sizeof(_ValueType) : 16; 
+  static const _DiffType __buf_size = 64 * sizeof(void*);
+  static const _DiffType value = (__buf_size / sizeof(_ValueType)) > 2 ? (__buf_size / sizeof(_ValueType)) : 2;
+  //static const _DiffType value = sizeof(_ValueType) < 256 ? 4096 / sizeof(_ValueType) : 16;
 };
 
 template <class _ValueType, class _Pointer, class _Reference, class _MapPointer,
diff --git a/contrib/libs/cxxsupp/libcxx/ya.make b/contrib/libs/cxxsupp/libcxx/ya.make
index 3940d90421..15403fe6d5 100644
--- a/contrib/libs/cxxsupp/libcxx/ya.make
+++ b/contrib/libs/cxxsupp/libcxx/ya.make
@@ -45,9 +45,9 @@ IF (OS_ANDROID)
     CFLAGS(
         -DLIBCXX_BUILDING_LIBCXXABI
     )
-ELSEIF (OS_IOS) 
+ELSEIF (OS_IOS)
     # Take cxxabi implementation from system.
-    LDFLAGS(-lc++abi) 
+    LDFLAGS(-lc++abi)
     CFLAGS(
         -DLIBCXX_BUILDING_LIBCXXABI
     )
diff --git a/contrib/libs/cxxsupp/libcxxrt/exception.cc b/contrib/libs/cxxsupp/libcxxrt/exception.cc
index 4d5ee1b4f7..6baf428ead 100644
--- a/contrib/libs/cxxsupp/libcxxrt/exception.cc
+++ b/contrib/libs/cxxsupp/libcxxrt/exception.cc
@@ -208,7 +208,7 @@ namespace std
 			virtual ~exception();
 			virtual const char* what() const noexcept;
 	};
- 
+
 }
 
 /**
@@ -273,69 +273,69 @@ namespace std
 
 using namespace ABI_NAMESPACE;
 
-/** 
- * Callback function used with _Unwind_Backtrace(). 
- * 
- * Prints a stack trace.  Used only for debugging help. 
- * 
- * Note: As of FreeBSD 8.1, dladd() still doesn't work properly, so this only 
- * correctly prints function names from public, relocatable, symbols. 
- */ 
-static _Unwind_Reason_Code trace(struct _Unwind_Context *context, void *c) 
-{ 
-	Dl_info myinfo; 
-	int mylookup = 
-		dladdr(reinterpret_cast<void *>(__cxa_current_exception_type), &myinfo); 
-	void *ip = reinterpret_cast<void*>(_Unwind_GetIP(context)); 
-	Dl_info info; 
-	if (dladdr(ip, &info) != 0) 
-	{ 
-		if (mylookup == 0 || strcmp(info.dli_fname, myinfo.dli_fname) != 0) 
-		{ 
-			printf("%p:%s() in %s\n", ip, info.dli_sname, info.dli_fname); 
-		} 
-	} 
-	return _URC_CONTINUE_UNWIND; 
-} 
-
-static void bt_terminate_handler() { 
-    __cxa_eh_globals* globals = __cxa_get_globals(); 
-    __cxa_exception* thrown_exception = globals->caughtExceptions; 
-
-    if (!thrown_exception) { 
-        abort(); 
-    } 
- 
-    fprintf(stderr, "uncaught exception:\n    address -> %p\n", (void*)thrown_exception); 
-    thrown_exception = realExceptionFromException(thrown_exception); 
- 
-    const __class_type_info *e_ti = static_cast<const __class_type_info*>(&typeid(std::exception)); 
-    const __class_type_info *throw_ti = dynamic_cast<const __class_type_info*>(thrown_exception->exceptionType); 
- 
-    if (throw_ti) { 
-        void* ptr = thrown_exception + 1; 
- 
-        if (throw_ti->__do_upcast(e_ti, &ptr)) { 
-            std::exception* e = static_cast<std::exception*>(ptr); 
- 
-            if (e) { 
-                fprintf(stderr, "    what() -> \"%s\"\n", e->what()); 
-            } 
-        } 
-    } 
- 
-    size_t bufferSize = 128; 
-    char *demangled = static_cast<char*>(malloc(bufferSize)); 
-    const char *mangled = thrown_exception->exceptionType->name(); 
-    int status; 
-    demangled = __cxa_demangle(mangled, demangled, &bufferSize, &status); 
-    fprintf(stderr, "    type -> %s\n", status == 0 ? demangled : mangled); 
-    if (status == 0) { free(demangled); } 
-    abort(); 
-} 
- 
+/**
+ * Callback function used with _Unwind_Backtrace().
+ *
+ * Prints a stack trace.  Used only for debugging help.
+ *
+ * Note: As of FreeBSD 8.1, dladd() still doesn't work properly, so this only
+ * correctly prints function names from public, relocatable, symbols.
+ */
+static _Unwind_Reason_Code trace(struct _Unwind_Context *context, void *c)
+{
+	Dl_info myinfo;
+	int mylookup =
+		dladdr(reinterpret_cast<void *>(__cxa_current_exception_type), &myinfo);
+	void *ip = reinterpret_cast<void*>(_Unwind_GetIP(context));
+	Dl_info info;
+	if (dladdr(ip, &info) != 0)
+	{
+		if (mylookup == 0 || strcmp(info.dli_fname, myinfo.dli_fname) != 0)
+		{
+			printf("%p:%s() in %s\n", ip, info.dli_sname, info.dli_fname);
+		}
+	}
+	return _URC_CONTINUE_UNWIND;
+}
+
+static void bt_terminate_handler() {
+    __cxa_eh_globals* globals = __cxa_get_globals();
+    __cxa_exception* thrown_exception = globals->caughtExceptions;
+
+    if (!thrown_exception) {
+        abort();
+    }
+
+    fprintf(stderr, "uncaught exception:\n    address -> %p\n", (void*)thrown_exception);
+    thrown_exception = realExceptionFromException(thrown_exception);
+
+    const __class_type_info *e_ti = static_cast<const __class_type_info*>(&typeid(std::exception));
+    const __class_type_info *throw_ti = dynamic_cast<const __class_type_info*>(thrown_exception->exceptionType);
+
+    if (throw_ti) {
+        void* ptr = thrown_exception + 1;
+
+        if (throw_ti->__do_upcast(e_ti, &ptr)) {
+            std::exception* e = static_cast<std::exception*>(ptr);
+
+            if (e) {
+                fprintf(stderr, "    what() -> \"%s\"\n", e->what());
+            }
+        }
+    }
+
+    size_t bufferSize = 128;
+    char *demangled = static_cast<char*>(malloc(bufferSize));
+    const char *mangled = thrown_exception->exceptionType->name();
+    int status;
+    demangled = __cxa_demangle(mangled, demangled, &bufferSize, &status);
+    fprintf(stderr, "    type -> %s\n", status == 0 ? demangled : mangled);
+    if (status == 0) { free(demangled); }
+    abort();
+}
+
 /** The global termination handler. */
-static terminate_handler terminateHandler = bt_terminate_handler; 
+static terminate_handler terminateHandler = bt_terminate_handler;
 /** The global unexpected exception handler. */
 static unexpected_handler unexpectedHandler = std::terminate;
 
@@ -377,44 +377,44 @@ static void free_exception_list(__cxa_exception *ex)
 	__cxa_free_exception(ex+1);
 }
 
-#define fast_ti_size 100 
- 
-static long fast_ti_index; 
-static __cxa_thread_info fast_ti[fast_ti_size]; 
- 
-static inline __cxa_thread_info* alloc_thread_info() { 
-    { 
-        long cur_index; 
- 
-        __atomic_load(&fast_ti_index, &cur_index, __ATOMIC_SEQ_CST); 
- 
-        // exausted long time ago 
-        if (cur_index >= fast_ti_size) { 
-            return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info))); 
-        } 
-    } 
- 
-    auto my_index = __sync_fetch_and_add(&fast_ti_index, 1); 
- 
-    // exausted 
-    if (my_index >= fast_ti_size) { 
-        return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info))); 
-    } 
- 
-    // fast path 
-    auto& ret = fast_ti[my_index]; 
- 
-    memset(&ret, 0, sizeof(ret)); 
- 
-    return &ret; 
-} 
- 
-static inline void free_thread_info(__cxa_thread_info* ti) { 
-    if ((ti < fast_ti) || (ti >= (fast_ti + fast_ti_size))) { 
-        free(ti); 
-    } 
-} 
- 
+#define fast_ti_size 100
+
+static long fast_ti_index;
+static __cxa_thread_info fast_ti[fast_ti_size];
+
+static inline __cxa_thread_info* alloc_thread_info() {
+    {
+        long cur_index;
+
+        __atomic_load(&fast_ti_index, &cur_index, __ATOMIC_SEQ_CST);
+
+        // exausted long time ago
+        if (cur_index >= fast_ti_size) {
+            return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info)));
+        }
+    }
+
+    auto my_index = __sync_fetch_and_add(&fast_ti_index, 1);
+
+    // exausted
+    if (my_index >= fast_ti_size) {
+        return static_cast<__cxa_thread_info*>(calloc(1, sizeof(__cxa_thread_info)));
+    }
+
+    // fast path
+    auto& ret = fast_ti[my_index];
+
+    memset(&ret, 0, sizeof(ret));
+
+    return &ret;
+}
+
+static inline void free_thread_info(__cxa_thread_info* ti) {
+    if ((ti < fast_ti) || (ti >= (fast_ti + fast_ti_size))) {
+        free(ti);
+    }
+}
+
 /**
  * Cleanup function called when a thread exists to make certain that all of the
  * per-thread data is deleted.
@@ -436,7 +436,7 @@ static void thread_cleanup(void* thread_info)
 			free_exception_list(info->globals.caughtExceptions);
 		}
 	}
-	free_thread_info(info); 
+	free_thread_info(info);
 }
 
 /**
@@ -457,8 +457,8 @@ static void init_key(void)
 	pthread_setspecific(eh_key, 0);
 }
 
-static __thread __cxa_thread_info* THR_INFO = nullptr; 
- 
+static __thread __cxa_thread_info* THR_INFO = nullptr;
+
 /**
  * Returns the thread info structure, creating it if it is not already created.
  */
@@ -477,14 +477,14 @@ static __cxa_thread_info *thread_info()
    THR_INFO = info;
 	return info;
 }
- 
-// ensure main thread will allocate preallocated tls 
-static struct InitMainTls { 
-    inline InitMainTls() { 
-        thread_info(); 
-    } 
-} init_main_tls; 
- 
+
+// ensure main thread will allocate preallocated tls
+static struct InitMainTls {
+    inline InitMainTls() {
+        thread_info();
+    }
+} init_main_tls;
+
 /**
  * Fast version of thread_info().  May fail if thread_info() is not called on
  * this thread at least once already.
diff --git a/contrib/libs/cxxsupp/libcxxrt/ya.make b/contrib/libs/cxxsupp/libcxxrt/ya.make
index feab5bda04..12dccbd505 100644
--- a/contrib/libs/cxxsupp/libcxxrt/ya.make
+++ b/contrib/libs/cxxsupp/libcxxrt/ya.make
@@ -1,7 +1,7 @@
 # Generated by devtools/yamaker from nixpkgs 9ee8bd188933750be0584f285daf9a295d0c8930.
 
 LIBRARY()
- 
+
 LICENSE(
     BSD-2-Clause AND
     BSD-2-Clause-Views AND
@@ -42,11 +42,11 @@ ELSE()
 ENDIF()
 
 IF (SANITIZER_TYPE == undefined OR FUZZING)
-    NO_SANITIZE() 
+    NO_SANITIZE()
     NO_SANITIZE_COVERAGE()
 ENDIF()
- 
-SRCS( 
+
+SRCS(
     auxhelper.cc
     dynamic_cast.cc
     exception.cc
diff --git a/contrib/libs/cxxsupp/libsan/ya.make b/contrib/libs/cxxsupp/libsan/ya.make
index 8c54354b93..2fb16630be 100644
--- a/contrib/libs/cxxsupp/libsan/ya.make
+++ b/contrib/libs/cxxsupp/libsan/ya.make
@@ -3,7 +3,7 @@ LIBRARY()
 WITHOUT_LICENSE_TEXTS()
 
 LICENSE(YandexOpen)
- 
+
 NO_PLATFORM()
 
 NO_SANITIZE()
diff --git a/contrib/libs/cxxsupp/openmp/asm.S b/contrib/libs/cxxsupp/openmp/asm.S
index 426373bee3..1c869244ef 100644
--- a/contrib/libs/cxxsupp/openmp/asm.S
+++ b/contrib/libs/cxxsupp/openmp/asm.S
@@ -1 +1 @@
-#include "z_Linux_asm.s" 
+#include "z_Linux_asm.s"
diff --git a/contrib/libs/cxxsupp/openmp/extractExternal.cpp b/contrib/libs/cxxsupp/openmp/extractExternal.cpp
index 83c61869e8..7a6fdb7e29 100644
--- a/contrib/libs/cxxsupp/openmp/extractExternal.cpp
+++ b/contrib/libs/cxxsupp/openmp/extractExternal.cpp
@@ -1,497 +1,497 @@
-/* 
- * extractExternal.cpp 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include <stdlib.h> 
-#include <iostream> 
-#include <strstream> 
-#include <fstream> 
-#include <string> 
-#include <set> 
-#include <map> 
- 
-/* Given a set of n object files h ('external' object files) and a set of m 
-   object files o ('internal' object files), 
-   1. Determines r, the subset of h that o depends on, directly or indirectly 
-   2. Removes the files in h - r from the file system 
-   3. For each external symbol defined in some file in r, rename it in r U o 
-      by prefixing it with "__kmp_external_" 
-   Usage: 
-   hide.exe <n> <filenames for h> <filenames for o> 
- 
-   Thus, the prefixed symbols become hidden in the sense that they now have a special 
-   prefix. 
-*/ 
- 
-using namespace std; 
- 
-void stop(char* errorMsg) { 
-    printf("%s\n", errorMsg); 
-    exit(1); 
-} 
- 
-// an entry in the symbol table of a .OBJ file 
-class Symbol { 
-public: 
-    __int64 name; 
-    unsigned value; 
-    unsigned short sectionNum, type; 
-    char storageClass, nAux; 
-}; 
- 
-class _rstream : public istrstream { 
-private: 
-    const char *buf; 
-protected: 
-    _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){} 
-    ~_rstream() { 
-	delete[]buf; 
-    } 
-}; 
- 
-/* A stream encapuslating the content of a file or the content of a string, overriding the 
-   >> operator to read various integer types in binary form, as well as a symbol table 
-   entry. 
-*/ 
-class rstream : public _rstream { 
-private: 
-    template<class T> 
-    inline rstream& doRead(T &x) { 
-	read((char*)&x, sizeof(T)); 
-	return *this; 
-    } 
-    static pair<const char*, streamsize> getBuf(const char *fileName) { 
-	ifstream raw(fileName,ios::binary | ios::in); 
-	if(!raw.is_open()) 
-	    stop("rstream.getBuf: Error opening file"); 
-	raw.seekg(0,ios::end); 
-	streampos fileSize = raw.tellg(); 
-	if(fileSize < 0) 
-	    stop("rstream.getBuf: Error reading file"); 
-	char *buf = new char[fileSize]; 
-	raw.seekg(0,ios::beg); 
-	raw.read(buf, fileSize); 
-	return pair<const char*, streamsize>(buf,fileSize); 
-    } 
-public: 
-    // construct from a string 
-    rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){} 
-    /* construct from a file whole content is fully read once to initialize the content of 
-       this stream 
-    */ 
-    rstream(const char *fileName):_rstream(getBuf(fileName)){} 
-    rstream& operator>>(int &x) { 
-	return doRead(x); 
-    } 
-    rstream& operator>>(unsigned &x) { 
-	return doRead(x); 
-    } 
-    rstream& operator>>(short &x) { 
-	return doRead(x); 
-    } 
-    rstream& operator>>(unsigned short &x) { 
-	return doRead(x); 
-    } 
-    rstream& operator>>(Symbol &e) { 
-	read((char*)&e, 18); 
-	return *this; 
-    } 
-}; 
- 
-// string table in a .OBJ file 
-class StringTable { 
-private: 
-    map<string, unsigned> directory; 
-    size_t length; 
-    char *data; 
- 
-    // make <directory> from <length> bytes in <data> 
-    void makeDirectory(void) { 
-	unsigned i = 4; 
-	while(i < length) { 
-	    string s = string(data + i); 
-	    directory.insert(make_pair(s, i)); 
-	    i += s.size() + 1; 
-	} 
-    } 
-    // initialize <length> and <data> with contents specified by the arguments 
-    void init(const char *_data) { 
-	unsigned _length = *(unsigned*)_data; 
- 
-	if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) 
-	    stop("StringTable.init: Invalid symbol table"); 
-	if(_data[_length - 1]) { 
-	    // to prevent runaway strings, make sure the data ends with a zero 
-	    data = new char[length = _length + 1]; 
-	    data[_length] = 0; 
-	} else { 
-	    data = new char[length = _length]; 
-	} 
-	*(unsigned*)data = length; 
-	KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), 
-	           length - sizeof(unsigned)); 
-	makeDirectory(); 
-    } 
-public: 
-    StringTable(rstream &f) { 
-	/* Construct string table by reading from f. 
-	 */ 
-	streampos s; 
-	unsigned strSize; 
-	char *strData; 
- 
-	s = f.tellg(); 
-	f>>strSize; 
-	if(strSize < sizeof(unsigned)) 
-	    stop("StringTable: Invalid string table"); 
-	strData = new char[strSize]; 
-	*(unsigned*)strData = strSize; 
-	// read the raw data into <strData> 
-	f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); 
-	s = f.tellg() - s; 
-	if(s < strSize) 
-	    stop("StringTable: Unexpected EOF"); 
-	init(strData); 
-	delete[]strData; 
-    } 
-    StringTable(const set<string> &strings) { 
-	/* Construct string table from given strings. 
-	 */ 
-	char *p; 
-	set<string>::const_iterator it; 
-	size_t s; 
- 
-	// count required size for data 
-	for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { 
-	    size_t l = (*it).size(); 
- 
-	    if(l > (unsigned) 0xFFFFFFFF) 
-		stop("StringTable: String too long"); 
-	    if(l > 8) { 
-		length += l + 1; 
-		if(length > (unsigned) 0xFFFFFFFF) 
-		    stop("StringTable: Symbol table too long"); 
-	    } 
-	} 
-	data = new char[length]; 
-	*(unsigned*)data = length; 
-	// populate data and directory 
-	for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { 
-	    const string &str = *it; 
-	    size_t l = str.size(); 
-	    if(l > 8) { 
-		directory.insert(make_pair(str, p - data)); 
-		KMP_MEMCPY(p, str.c_str(), l); 
-		p[l] = 0; 
-		p += l + 1; 
-	    } 
-	} 
-    } 
-    ~StringTable() { 
-	delete[] data; 
-    } 
-    /* Returns encoding for given string based on this string table. 
-       Error if string length is greater than 8 but string is not in 
-       the string table--returns 0. 
-    */ 
-    __int64 encode(const string &str) { 
-	__int64 r; 
- 
-	if(str.size() <= 8) { 
-	    // encoded directly 
-	    ((char*)&r)[7] = 0; 
-	    KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8); 
-	    return r; 
-	} else { 
-	    // represented as index into table 
-	    map<string,unsigned>::const_iterator it = directory.find(str); 
-	    if(it == directory.end()) 
-		stop("StringTable::encode: String now found in string table"); 
-	    ((unsigned*)&r)[0] = 0; 
-	    ((unsigned*)&r)[1] = (*it).second; 
-	    return r; 
-	} 
-    } 
-    /* Returns string represented by x based on this string table. 
-       Error if x references an invalid position in the table--returns 
-       the empty string. 
-    */ 
-    string decode(__int64 x) const { 
-	if(*(unsigned*)&x == 0) { 
-	    // represented as index into table 
-	    unsigned &p = ((unsigned*)&x)[1]; 
-	    if(p >= length) 
-		stop("StringTable::decode: Invalid string table lookup"); 
-	    return string(data + p); 
-	} else { 
-	    // encoded directly 
-	    char *p = (char*)&x; 
-	    int i; 
- 
-	    for(i = 0; i < 8 && p[i]; ++i); 
-	    return string(p, i); 
-	} 
-    } 
-    void write(ostream &os) { 
-	os.write(data, length); 
-    } 
-}; 
- 
-/* for the named object file, determines the set of defined symbols and the set of undefined external symbols 
-   and writes them to <defined> and <undefined> respectively 
-*/ 
-void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){ 
-    streampos fileSize; 
-    size_t strTabStart; 
-    unsigned symTabStart, symNEntries; 
-    rstream f(fileName); 
- 
-    f.seekg(0,ios::end); 
-    fileSize = f.tellg(); 
- 
-    f.seekg(8); 
-    f >> symTabStart >> symNEntries; 
-    // seek to the string table 
-    f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 
-    if(f.eof()) { 
-	printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", 
-	       fileName, (unsigned long) fileSize, symTabStart, symNEntries); 
-	stop("computeExternalSymbols: Unexpected EOF 1"); 
-    } 
-    StringTable stringTable(f); // read the string table 
-    if(f.tellg() != fileSize) 
-	stop("computeExternalSymbols: Unexpected data after string table"); 
- 
-    f.clear(); 
-    f.seekg(symTabStart); // seek to the symbol table 
- 
-    defined->clear(); undefined->clear(); 
-    for(int i = 0; i < symNEntries; ++i) { 
-	// process each entry 
-	Symbol e; 
- 
-	if(f.eof()) 
-	    stop("computeExternalSymbols: Unexpected EOF 2"); 
-	f>>e; 
-	if(f.fail()) 
-	    stop("computeExternalSymbols: File read error"); 
-	if(e.nAux) { // auxiliary entry: skip 
-	    f.seekg(e.nAux * 18, ios::cur); 
-	    i += e.nAux; 
-	} 
-	// if symbol is extern and defined in the current file, insert it 
-	if(e.storageClass == 2) 
-	    if(e.sectionNum) 
-		defined->insert(stringTable.decode(e.name)); 
-	    else 
-		undefined->insert(stringTable.decode(e.name)); 
-    } 
-} 
- 
-/* For each occurrence of an external symbol in the object file named by 
-   by <fileName> that is a member of <hide>, renames it by prefixing 
-   with "__kmp_external_", writing back the file in-place 
-*/ 
-void hideSymbols(char *fileName, const set<string> &hide) { 
-    static const string prefix("__kmp_external_"); 
-    set<string> strings; // set of all occurring symbols, appropriately prefixed 
-    streampos fileSize; 
-    size_t strTabStart; 
-    unsigned symTabStart, symNEntries; 
-    int i; 
-    rstream in(fileName); 
- 
-    in.seekg(0,ios::end); 
-    fileSize = in.tellg(); 
- 
-    in.seekg(8); 
-    in >> symTabStart >> symNEntries; 
-    in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 
-    if(in.eof()) 
-	stop("hideSymbols: Unexpected EOF"); 
-    StringTable stringTableOld(in); // read original string table 
- 
-    if(in.tellg() != fileSize) 
-	stop("hideSymbols: Unexpected data after string table"); 
- 
-    // compute set of occurring strings with prefix added 
-    for(i = 0; i < symNEntries; ++i) { 
-	Symbol e; 
- 
-	in.seekg(symTabStart + i * 18); 
-	if(in.eof()) 
-	    stop("hideSymbols: Unexpected EOF"); 
-	in >> e; 
-	if(in.fail()) 
-	    stop("hideSymbols: File read error"); 
-	if(e.nAux) 
-	    i += e.nAux; 
-	const string &s = stringTableOld.decode(e.name); 
-	// if symbol is extern and found in <hide>, prefix and insert into strings, 
-	// otherwise, just insert into strings without prefix 
-	strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? 
-			prefix + s : s); 
-    } 
- 
-    ofstream out(fileName, ios::trunc | ios::out | ios::binary); 
-    if(!out.is_open()) 
-	stop("hideSymbols: Error opening output file"); 
- 
-    // make new string table from string set 
-    StringTable stringTableNew = StringTable(strings); 
- 
-    // copy input file to output file up to just before the symbol table 
-    in.seekg(0); 
-    char *buf = new char[symTabStart]; 
-    in.read(buf, symTabStart); 
-    out.write(buf, symTabStart); 
-    delete []buf; 
- 
-    // copy input symbol table to output symbol table with name translation 
-    for(i = 0; i < symNEntries; ++i) { 
-	Symbol e; 
- 
-	in.seekg(symTabStart + i*18); 
-	if(in.eof()) 
-	    stop("hideSymbols: Unexpected EOF"); 
-	in >> e; 
-	if(in.fail()) 
-	    stop("hideSymbols: File read error"); 
-	const string &s = stringTableOld.decode(e.name); 
-	out.seekp(symTabStart + i*18); 
-	e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? 
-					prefix + s : s); 
-	out.write((char*)&e, 18); 
-	if(out.fail()) 
-	    stop("hideSymbols: File write error"); 
-	if(e.nAux) { 
-	    // copy auxiliary symbol table entries 
-	    int nAux = e.nAux; 
-	    for(int j = 1; j <= nAux; ++j) { 
-		in >> e; 
-		out.seekp(symTabStart + (i + j) * 18); 
-		out.write((char*)&e, 18); 
-	    } 
-	    i += nAux; 
-	} 
-    } 
-    // output string table 
-    stringTableNew.write(out); 
-} 
- 
-// returns true iff <a> and <b> have no common element 
-template <class T> 
-bool isDisjoint(const set<T> &a, const set<T> &b) { 
-    set<T>::const_iterator ita, itb; 
- 
-    for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { 
-	const T &ta = *ita, &tb = *itb; 
-	if(ta < tb) 
-	    ++ita; 
-	else if (tb < ta) 
-	    ++itb; 
-	else 
-	    return false; 
-    } 
-    return true; 
-} 
- 
-/* precondition: <defined> and <undefined> are arrays with <nTotal> elements where 
-   <nTotal> >= <nExternal>.  The first <nExternal> elements correspond to the external object 
-   files and the rest correspond to the internal object files. 
-   postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not 
-   disjoint.  Returns the transitive closure of the set of internal object files, as a set of 
-   file indexes, under the 'depends on' relation, minus the set of internal object files. 
-*/ 
-set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) { 
-    set<int> *required = new set<int>; 
-    set<int> fresh[2]; 
-    int i, cur = 0; 
-    bool changed; 
- 
-    for(i = nTotal - 1; i >= nExternal; --i) 
-	fresh[cur].insert(i); 
-    do { 
-	changed = false; 
-	for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { 
-	    set<string> &s = undefined[*it]; 
- 
-	    for(i = 0; i < nExternal; ++i) { 
-		if(required->find(i) == required->end()) { 
-		    if(!isDisjoint(defined[i], s)) { 
-			// found a new qualifying element 
-			required->insert(i); 
-			fresh[1 - cur].insert(i); 
-			changed = true; 
-		    } 
-		} 
-	    } 
-	} 
-	fresh[cur].clear(); 
-	cur = 1 - cur; 
-    } while(changed); 
-    return required; 
-} 
- 
-int main(int argc, char **argv) { 
-    int nExternal, nInternal, i; 
-    set<string> *defined, *undefined; 
-    set<int>::iterator it; 
- 
-    if(argc < 3) 
-	stop("Please specify a positive integer followed by a list of object filenames"); 
-    nExternal = atoi(argv[1]); 
-    if(nExternal <= 0) 
-	stop("Please specify a positive integer followed by a list of object filenames"); 
-    if(nExternal +  2 > argc) 
-	stop("Too few external objects"); 
-    nInternal = argc - nExternal - 2; 
-    defined = new set<string>[argc - 2]; 
-    undefined = new set<string>[argc - 2]; 
- 
-    // determine the set of defined and undefined external symbols 
-    for(i = 2; i < argc; ++i) 
-	computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); 
- 
-    // determine the set of required external files 
-    set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); 
-    set<string> hide; 
- 
-    /* determine the set of symbols to hide--namely defined external symbols of the 
-       required external files 
-    */ 
-    for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { 
-	int idx = *it; 
-	set<string>::iterator it2; 
-	/* We have to insert one element at a time instead of inserting a range because 
-	   the insert member function taking a range doesn't exist on Windows* OS, at least 
-	   at the time of this writing. 
-	*/ 
-	for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) 
-	    hide.insert(*it2); 
-    } 
- 
-    /* process the external files--removing those that are not required and hiding 
-       the appropriate symbols in the others 
-    */ 
-    for(i = 0; i < nExternal; ++i) 
-	if(requiredExternal->find(i) != requiredExternal->end()) 
-	    hideSymbols(argv[2 + i], hide); 
-	else 
-	    remove(argv[2 + i]); 
-    // hide the appropriate symbols in the internal files 
-    for(i = nExternal + 2; i < argc; ++i) 
-	hideSymbols(argv[i], hide); 
-    return 0; 
-} 
+/*
+ * extractExternal.cpp
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stdlib.h>
+#include <iostream>
+#include <strstream>
+#include <fstream>
+#include <string>
+#include <set>
+#include <map>
+
+/* Given a set of n object files h ('external' object files) and a set of m
+   object files o ('internal' object files),
+   1. Determines r, the subset of h that o depends on, directly or indirectly
+   2. Removes the files in h - r from the file system
+   3. For each external symbol defined in some file in r, rename it in r U o
+      by prefixing it with "__kmp_external_"
+   Usage:
+   hide.exe <n> <filenames for h> <filenames for o>
+
+   Thus, the prefixed symbols become hidden in the sense that they now have a special
+   prefix.
+*/
+
+using namespace std;
+
+void stop(char* errorMsg) {
+    printf("%s\n", errorMsg);
+    exit(1);
+}
+
+// an entry in the symbol table of a .OBJ file
+class Symbol {
+public:
+    __int64 name;
+    unsigned value;
+    unsigned short sectionNum, type;
+    char storageClass, nAux;
+};
+
+class _rstream : public istrstream {
+private:
+    const char *buf;
+protected:
+    _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
+    ~_rstream() {
+	delete[]buf;
+    }
+};
+
+/* A stream encapuslating the content of a file or the content of a string, overriding the
+   >> operator to read various integer types in binary form, as well as a symbol table
+   entry.
+*/
+class rstream : public _rstream {
+private:
+    template<class T>
+    inline rstream& doRead(T &x) {
+	read((char*)&x, sizeof(T));
+	return *this;
+    }
+    static pair<const char*, streamsize> getBuf(const char *fileName) {
+	ifstream raw(fileName,ios::binary | ios::in);
+	if(!raw.is_open())
+	    stop("rstream.getBuf: Error opening file");
+	raw.seekg(0,ios::end);
+	streampos fileSize = raw.tellg();
+	if(fileSize < 0)
+	    stop("rstream.getBuf: Error reading file");
+	char *buf = new char[fileSize];
+	raw.seekg(0,ios::beg);
+	raw.read(buf, fileSize);
+	return pair<const char*, streamsize>(buf,fileSize);
+    }
+public:
+    // construct from a string
+    rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
+    /* construct from a file whole content is fully read once to initialize the content of
+       this stream
+    */
+    rstream(const char *fileName):_rstream(getBuf(fileName)){}
+    rstream& operator>>(int &x) {
+	return doRead(x);
+    }
+    rstream& operator>>(unsigned &x) {
+	return doRead(x);
+    }
+    rstream& operator>>(short &x) {
+	return doRead(x);
+    }
+    rstream& operator>>(unsigned short &x) {
+	return doRead(x);
+    }
+    rstream& operator>>(Symbol &e) {
+	read((char*)&e, 18);
+	return *this;
+    }
+};
+
+// string table in a .OBJ file
+class StringTable {
+private:
+    map<string, unsigned> directory;
+    size_t length;
+    char *data;
+
+    // make <directory> from <length> bytes in <data>
+    void makeDirectory(void) {
+	unsigned i = 4;
+	while(i < length) {
+	    string s = string(data + i);
+	    directory.insert(make_pair(s, i));
+	    i += s.size() + 1;
+	}
+    }
+    // initialize <length> and <data> with contents specified by the arguments
+    void init(const char *_data) {
+	unsigned _length = *(unsigned*)_data;
+
+	if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
+	    stop("StringTable.init: Invalid symbol table");
+	if(_data[_length - 1]) {
+	    // to prevent runaway strings, make sure the data ends with a zero
+	    data = new char[length = _length + 1];
+	    data[_length] = 0;
+	} else {
+	    data = new char[length = _length];
+	}
+	*(unsigned*)data = length;
+	KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
+	           length - sizeof(unsigned));
+	makeDirectory();
+    }
+public:
+    StringTable(rstream &f) {
+	/* Construct string table by reading from f.
+	 */
+	streampos s;
+	unsigned strSize;
+	char *strData;
+
+	s = f.tellg();
+	f>>strSize;
+	if(strSize < sizeof(unsigned))
+	    stop("StringTable: Invalid string table");
+	strData = new char[strSize];
+	*(unsigned*)strData = strSize;
+	// read the raw data into <strData>
+	f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
+	s = f.tellg() - s;
+	if(s < strSize)
+	    stop("StringTable: Unexpected EOF");
+	init(strData);
+	delete[]strData;
+    }
+    StringTable(const set<string> &strings) {
+	/* Construct string table from given strings.
+	 */
+	char *p;
+	set<string>::const_iterator it;
+	size_t s;
+
+	// count required size for data
+	for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
+	    size_t l = (*it).size();
+
+	    if(l > (unsigned) 0xFFFFFFFF)
+		stop("StringTable: String too long");
+	    if(l > 8) {
+		length += l + 1;
+		if(length > (unsigned) 0xFFFFFFFF)
+		    stop("StringTable: Symbol table too long");
+	    }
+	}
+	data = new char[length];
+	*(unsigned*)data = length;
+	// populate data and directory
+	for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
+	    const string &str = *it;
+	    size_t l = str.size();
+	    if(l > 8) {
+		directory.insert(make_pair(str, p - data));
+		KMP_MEMCPY(p, str.c_str(), l);
+		p[l] = 0;
+		p += l + 1;
+	    }
+	}
+    }
+    ~StringTable() {
+	delete[] data;
+    }
+    /* Returns encoding for given string based on this string table.
+       Error if string length is greater than 8 but string is not in
+       the string table--returns 0.
+    */
+    __int64 encode(const string &str) {
+	__int64 r;
+
+	if(str.size() <= 8) {
+	    // encoded directly
+	    ((char*)&r)[7] = 0;
+	    KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
+	    return r;
+	} else {
+	    // represented as index into table
+	    map<string,unsigned>::const_iterator it = directory.find(str);
+	    if(it == directory.end())
+		stop("StringTable::encode: String now found in string table");
+	    ((unsigned*)&r)[0] = 0;
+	    ((unsigned*)&r)[1] = (*it).second;
+	    return r;
+	}
+    }
+    /* Returns string represented by x based on this string table.
+       Error if x references an invalid position in the table--returns
+       the empty string.
+    */
+    string decode(__int64 x) const {
+	if(*(unsigned*)&x == 0) {
+	    // represented as index into table
+	    unsigned &p = ((unsigned*)&x)[1];
+	    if(p >= length)
+		stop("StringTable::decode: Invalid string table lookup");
+	    return string(data + p);
+	} else {
+	    // encoded directly
+	    char *p = (char*)&x;
+	    int i;
+
+	    for(i = 0; i < 8 && p[i]; ++i);
+	    return string(p, i);
+	}
+    }
+    void write(ostream &os) {
+	os.write(data, length);
+    }
+};
+
+/* for the named object file, determines the set of defined symbols and the set of undefined external symbols
+   and writes them to <defined> and <undefined> respectively
+*/
+void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
+    streampos fileSize;
+    size_t strTabStart;
+    unsigned symTabStart, symNEntries;
+    rstream f(fileName);
+
+    f.seekg(0,ios::end);
+    fileSize = f.tellg();
+
+    f.seekg(8);
+    f >> symTabStart >> symNEntries;
+    // seek to the string table
+    f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
+    if(f.eof()) {
+	printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
+	       fileName, (unsigned long) fileSize, symTabStart, symNEntries);
+	stop("computeExternalSymbols: Unexpected EOF 1");
+    }
+    StringTable stringTable(f); // read the string table
+    if(f.tellg() != fileSize)
+	stop("computeExternalSymbols: Unexpected data after string table");
+
+    f.clear();
+    f.seekg(symTabStart); // seek to the symbol table
+
+    defined->clear(); undefined->clear();
+    for(int i = 0; i < symNEntries; ++i) {
+	// process each entry
+	Symbol e;
+
+	if(f.eof())
+	    stop("computeExternalSymbols: Unexpected EOF 2");
+	f>>e;
+	if(f.fail())
+	    stop("computeExternalSymbols: File read error");
+	if(e.nAux) { // auxiliary entry: skip
+	    f.seekg(e.nAux * 18, ios::cur);
+	    i += e.nAux;
+	}
+	// if symbol is extern and defined in the current file, insert it
+	if(e.storageClass == 2)
+	    if(e.sectionNum)
+		defined->insert(stringTable.decode(e.name));
+	    else
+		undefined->insert(stringTable.decode(e.name));
+    }
+}
+
+/* For each occurrence of an external symbol in the object file named by
+   by <fileName> that is a member of <hide>, renames it by prefixing
+   with "__kmp_external_", writing back the file in-place
+*/
+void hideSymbols(char *fileName, const set<string> &hide) {
+    static const string prefix("__kmp_external_");
+    set<string> strings; // set of all occurring symbols, appropriately prefixed
+    streampos fileSize;
+    size_t strTabStart;
+    unsigned symTabStart, symNEntries;
+    int i;
+    rstream in(fileName);
+
+    in.seekg(0,ios::end);
+    fileSize = in.tellg();
+
+    in.seekg(8);
+    in >> symTabStart >> symNEntries;
+    in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
+    if(in.eof())
+	stop("hideSymbols: Unexpected EOF");
+    StringTable stringTableOld(in); // read original string table
+
+    if(in.tellg() != fileSize)
+	stop("hideSymbols: Unexpected data after string table");
+
+    // compute set of occurring strings with prefix added
+    for(i = 0; i < symNEntries; ++i) {
+	Symbol e;
+
+	in.seekg(symTabStart + i * 18);
+	if(in.eof())
+	    stop("hideSymbols: Unexpected EOF");
+	in >> e;
+	if(in.fail())
+	    stop("hideSymbols: File read error");
+	if(e.nAux)
+	    i += e.nAux;
+	const string &s = stringTableOld.decode(e.name);
+	// if symbol is extern and found in <hide>, prefix and insert into strings,
+	// otherwise, just insert into strings without prefix
+	strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
+			prefix + s : s);
+    }
+
+    ofstream out(fileName, ios::trunc | ios::out | ios::binary);
+    if(!out.is_open())
+	stop("hideSymbols: Error opening output file");
+
+    // make new string table from string set
+    StringTable stringTableNew = StringTable(strings);
+
+    // copy input file to output file up to just before the symbol table
+    in.seekg(0);
+    char *buf = new char[symTabStart];
+    in.read(buf, symTabStart);
+    out.write(buf, symTabStart);
+    delete []buf;
+
+    // copy input symbol table to output symbol table with name translation
+    for(i = 0; i < symNEntries; ++i) {
+	Symbol e;
+
+	in.seekg(symTabStart + i*18);
+	if(in.eof())
+	    stop("hideSymbols: Unexpected EOF");
+	in >> e;
+	if(in.fail())
+	    stop("hideSymbols: File read error");
+	const string &s = stringTableOld.decode(e.name);
+	out.seekp(symTabStart + i*18);
+	e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
+					prefix + s : s);
+	out.write((char*)&e, 18);
+	if(out.fail())
+	    stop("hideSymbols: File write error");
+	if(e.nAux) {
+	    // copy auxiliary symbol table entries
+	    int nAux = e.nAux;
+	    for(int j = 1; j <= nAux; ++j) {
+		in >> e;
+		out.seekp(symTabStart + (i + j) * 18);
+		out.write((char*)&e, 18);
+	    }
+	    i += nAux;
+	}
+    }
+    // output string table
+    stringTableNew.write(out);
+}
+
+// returns true iff <a> and <b> have no common element
+template <class T>
+bool isDisjoint(const set<T> &a, const set<T> &b) {
+    set<T>::const_iterator ita, itb;
+
+    for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
+	const T &ta = *ita, &tb = *itb;
+	if(ta < tb)
+	    ++ita;
+	else if (tb < ta)
+	    ++itb;
+	else
+	    return false;
+    }
+    return true;
+}
+
+/* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
+   <nTotal> >= <nExternal>.  The first <nExternal> elements correspond to the external object
+   files and the rest correspond to the internal object files.
+   postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
+   disjoint.  Returns the transitive closure of the set of internal object files, as a set of
+   file indexes, under the 'depends on' relation, minus the set of internal object files.
+*/
+set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
+    set<int> *required = new set<int>;
+    set<int> fresh[2];
+    int i, cur = 0;
+    bool changed;
+
+    for(i = nTotal - 1; i >= nExternal; --i)
+	fresh[cur].insert(i);
+    do {
+	changed = false;
+	for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
+	    set<string> &s = undefined[*it];
+
+	    for(i = 0; i < nExternal; ++i) {
+		if(required->find(i) == required->end()) {
+		    if(!isDisjoint(defined[i], s)) {
+			// found a new qualifying element
+			required->insert(i);
+			fresh[1 - cur].insert(i);
+			changed = true;
+		    }
+		}
+	    }
+	}
+	fresh[cur].clear();
+	cur = 1 - cur;
+    } while(changed);
+    return required;
+}
+
+int main(int argc, char **argv) {
+    int nExternal, nInternal, i;
+    set<string> *defined, *undefined;
+    set<int>::iterator it;
+
+    if(argc < 3)
+	stop("Please specify a positive integer followed by a list of object filenames");
+    nExternal = atoi(argv[1]);
+    if(nExternal <= 0)
+	stop("Please specify a positive integer followed by a list of object filenames");
+    if(nExternal +  2 > argc)
+	stop("Too few external objects");
+    nInternal = argc - nExternal - 2;
+    defined = new set<string>[argc - 2];
+    undefined = new set<string>[argc - 2];
+
+    // determine the set of defined and undefined external symbols
+    for(i = 2; i < argc; ++i)
+	computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
+
+    // determine the set of required external files
+    set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
+    set<string> hide;
+
+    /* determine the set of symbols to hide--namely defined external symbols of the
+       required external files
+    */
+    for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
+	int idx = *it;
+	set<string>::iterator it2;
+	/* We have to insert one element at a time instead of inserting a range because
+	   the insert member function taking a range doesn't exist on Windows* OS, at least
+	   at the time of this writing.
+	*/
+	for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
+	    hide.insert(*it2);
+    }
+
+    /* process the external files--removing those that are not required and hiding
+       the appropriate symbols in the others
+    */
+    for(i = 0; i < nExternal; ++i)
+	if(requiredExternal->find(i) != requiredExternal->end())
+	    hideSymbols(argv[2 + i], hide);
+	else
+	    remove(argv[2 + i]);
+    // hide the appropriate symbols in the internal files
+    for(i = nExternal + 2; i < argc; ++i)
+	hideSymbols(argv[i], hide);
+    return 0;
+}
diff --git a/contrib/libs/cxxsupp/openmp/i18n/en_US.txt b/contrib/libs/cxxsupp/openmp/i18n/en_US.txt
index 840d29ea48..11d57eb798 100644
--- a/contrib/libs/cxxsupp/openmp/i18n/en_US.txt
+++ b/contrib/libs/cxxsupp/openmp/i18n/en_US.txt
@@ -1,475 +1,475 @@
-# en_US.txt # 
- 
-# 
-#//===----------------------------------------------------------------------===// 
-#// 
-#//                     The LLVM Compiler Infrastructure 
-#// 
-#// This file is dual licensed under the MIT and the University of Illinois Open 
-#// Source Licenses. See LICENSE.txt for details. 
-#// 
-#//===----------------------------------------------------------------------===// 
-# 
- 
-# Default messages, embedded into the OpenMP RTL, and source for English catalog. 
- 
- 
-# Compatible changes (which does not require version bumping): 
-#     * Editing message (number and type of placeholders must remain, relative order of 
-#       placeholders may be changed, e.g. "File %1$s line %2$d" may be safely edited to 
-#       "Line %2$d file %1$s"). 
-#     * Adding new message to the end of section. 
-# Incompatible changes (version must be bumbed by 1): 
-#     * Introducing new placeholders to existing messages. 
-#     * Changing type of placeholders (e.g. "line %1$d" -> "line %1$s"). 
-#     * Rearranging order of messages. 
-#     * Deleting messages. 
-# Use special "OBSOLETE" pseudoidentifier for obsolete entries, which is kept only for backward 
-# compatibility. When version is bumped, do not forget to delete all obsolete entries. 
- 
- 
-# -------------------------------------------------------------------------------------------------- 
--*- META -*- 
-# -------------------------------------------------------------------------------------------------- 
- 
-# Meta information about message catalog. 
- 
-Language "English" 
-Country  "USA" 
-LangId   "1033" 
-Version  "2" 
-Revision "20140827" 
- 
- 
- 
-# -------------------------------------------------------------------------------------------------- 
--*- STRINGS -*- 
-# -------------------------------------------------------------------------------------------------- 
- 
-# Strings are not complete messages, just fragments. We need to work on it and reduce number of 
-# strings (to zero?). 
- 
-Error                        "Error" 
-UnknownFile                  "(unknown file)" 
-NotANumber                   "not a number" 
-BadUnit                      "bad unit" 
-IllegalCharacters            "illegal characters" 
-ValueTooLarge                "value too large" 
-ValueTooSmall                "value too small" 
-NotMultiple4K                "value is not a multiple of 4k" 
-UnknownTopology              "Unknown processor topology" 
-CantOpenCpuinfo              "Cannot open /proc/cpuinfo" 
-ProcCpuinfo                  "/proc/cpuinfo" 
-NoProcRecords                "cpuinfo file invalid (No processor records)" 
-TooManyProcRecords           "cpuinfo file invalid (Too many processor records)" 
-CantRewindCpuinfo            "Cannot rewind cpuinfo file" 
-LongLineCpuinfo              "cpuinfo file invalid (long line)" 
-TooManyEntries               "cpuinfo file contains too many entries" 
-MissingProcField             "cpuinfo file missing processor field" 
-MissingPhysicalIDField       "cpuinfo file missing physical id field" 
-MissingValCpuinfo            "cpuinfo file invalid (missing val)" 
-DuplicateFieldCpuinfo        "cpuinfo file invalid (duplicate field)" 
-PhysicalIDsNotUnique         "Physical node/pkg/core/thread ids not unique" 
-ApicNotPresent               "APIC not present" 
-InvalidCpuidInfo             "Invalid cpuid info" 
-OBSOLETE                     "APIC ids not unique" 
-InconsistentCpuidInfo        "Inconsistent cpuid info" 
-OutOfHeapMemory              "Out of heap memory" 
-MemoryAllocFailed            "Memory allocation failed" 
-Core                         "core" 
-Thread                       "thread" 
-Package                      "package" 
-Node                         "node" 
-OBSOLETE                     "<undef>" 
-DecodingLegacyAPIC           "decoding legacy APIC ids" 
-OBSOLETE                     "parsing /proc/cpuinfo" 
-NotDefined                   "value is not defined" 
-EffectiveSettings            "Effective settings:" 
-UserSettings                 "User settings:" 
-StorageMapWarning            "warning: pointers or size don't make sense" 
-OBSOLETE                     "CPU" 
-OBSOLETE                     "TPU" 
-OBSOLETE                     "TPUs per package" 
-OBSOLETE                     "HT enabled" 
-OBSOLETE                     "HT disabled" 
-Decodingx2APIC               "decoding x2APIC ids" 
-NoLeaf11Support              "cpuid leaf 11 not supported" 
-NoLeaf4Support               "cpuid leaf 4 not supported" 
-ThreadIDsNotUnique           "thread ids not unique" 
-UsingPthread                 "using pthread info" 
-LegacyApicIDsNotUnique       "legacy APIC ids not unique" 
-x2ApicIDsNotUnique           "x2APIC ids not unique" 
-DisplayEnvBegin		     "OPENMP DISPLAY ENVIRONMENT BEGIN" 
-DisplayEnvEnd		     "OPENMP DISPLAY ENVIRONMENT END" 
-Device			     "[device]" 
-Host			     "[host]" 
- 
- 
- 
-# -------------------------------------------------------------------------------------------------- 
--*- FORMATS -*- 
-# -------------------------------------------------------------------------------------------------- 
- 
-Info                         "OMP: Info #%1$d: %2$s\n" 
-Warning                      "OMP: Warning #%1$d: %2$s\n" 
-Fatal                        "OMP: Error #%1$d: %2$s\n" 
-SysErr                       "OMP: System error #%1$d: %2$s\n" 
-Hint                         "OMP: Hint: %2$s\n" 
- 
-Pragma                       "%1$s pragma (at %2$s:%3$s():%4$s)" 
-    # %1 is pragma name (like "parallel" or "master", 
-    # %2 is file name, 
-    # %3 is function (routine) name, 
-    # %4 is the line number (as string, so "s" type specifier should be used). 
- 
- 
- 
-# -------------------------------------------------------------------------------------------------- 
--*- MESSAGES -*- 
-# -------------------------------------------------------------------------------------------------- 
- 
-# Messages of any severity: informational, warning, or fatal. 
-# To maintain message numbers (they are visible to customers), add new messages to the end. 
- 
-# Use following prefixes for messages and hints when appropriate: 
-#    Aff -- Affinity messages. 
-#    Cns -- Consistency check failures (KMP_CONSISTENCY_CHECK). 
-#    Itt -- ITT Notify-related messages. 
- 
-LibraryIsSerial              "Library is \"serial\"." 
-CantOpenMessageCatalog       "Cannot open message catalog \"%1$s\":" 
-WillUseDefaultMessages       "Default messages will be used." 
-LockIsUninitialized          "%1$s: Lock is uninitialized" 
-LockSimpleUsedAsNestable     "%1$s: Lock was initialized as simple, but used as nestable" 
-LockNestableUsedAsSimple     "%1$s: Lock was initialized as nestable, but used as simple" 
-LockIsAlreadyOwned           "%1$s: Lock is already owned by requesting thread" 
-LockStillOwned               "%1$s: Lock is still owned by a thread" 
-LockUnsettingFree            "%1$s: Attempt to release a lock not owned by any thread" 
-LockUnsettingSetByAnother    "%1$s: Attempt to release a lock owned by another thread" 
-StackOverflow                "Stack overflow detected for OpenMP thread #%1$d" 
-StackOverlap                 "Stack overlap detected. " 
-AssertionFailure             "Assertion failure at %1$s(%2$d)." 
-CantRegisterNewThread        "Unable to register a new user thread." 
-DuplicateLibrary             "Initializing %1$s, but found %2$s already initialized." 
-CantOpenFileForReading       "Cannot open file \"%1$s\" for reading:" 
-CantGetEnvVar                "Getting environment variable \"%1$s\" failed:" 
-CantSetEnvVar                "Setting environment variable \"%1$s\" failed:" 
-CantGetEnvironment           "Getting environment failed:" 
-BadBoolValue                 "%1$s=\"%2$s\": Wrong value, boolean expected." 
-SSPNotBuiltIn                "No Helper Thread support built in this OMP library." 
-SPPSotfTerminateFailed       "Helper thread failed to soft terminate." 
-BufferOverflow               "Buffer overflow detected." 
-RealTimeSchedNotSupported    "Real-time scheduling policy is not supported." 
-RunningAtMaxPriority         "OMP application is running at maximum priority with real-time scheduling policy. " 
-CantChangeMonitorPriority    "Changing priority of the monitor thread failed:" 
-MonitorWillStarve            "Deadlocks are highly possible due to monitor thread starvation." 
-CantSetMonitorStackSize      "Unable to set monitor thread stack size to %1$lu bytes:" 
-CantSetWorkerStackSize       "Unable to set OMP thread stack size to %1$lu bytes:" 
-CantInitThreadAttrs          "Thread attribute initialization failed:" 
-CantDestroyThreadAttrs       "Thread attribute destroying failed:" 
-CantSetWorkerState           "OMP thread joinable state setting failed:" 
-CantSetMonitorState          "Monitor thread joinable state setting failed:" 
-NoResourcesForWorkerThread   "System unable to allocate necessary resources for OMP thread:" 
-NoResourcesForMonitorThread  "System unable to allocate necessary resources for the monitor thread:" 
-CantTerminateWorkerThread    "Unable to terminate OMP thread:" 
-ScheduleKindOutOfRange       "Wrong schedule type %1$d, see <omp.h> or <omp_lib.h> file for the list of values supported." 
-UnknownSchedulingType        "Unknown scheduling type \"%1$d\"." 
-InvalidValue                 "%1$s value \"%2$s\" is invalid." 
-SmallValue                   "%1$s value \"%2$s\" is too small." 
-LargeValue                   "%1$s value \"%2$s\" is too large." 
-StgInvalidValue              "%1$s: \"%2$s\" is an invalid value; ignored." 
-BarrReleaseValueInvalid      "%1$s release value \"%2$s\" is invalid." 
-BarrGatherValueInvalid       "%1$s gather value \"%2$s\" is invalid." 
-OBSOLETE                     "%1$s supported only on debug builds; ignored." 
-ParRangeSyntax               "Syntax error: Usage: %1$s=[ routine=<func> | filename=<file> | range=<lb>:<ub> " 
-                             "| excl_range=<lb>:<ub> ],..." 
-UnbalancedQuotes             "Unbalanced quotes in %1$s." 
-EmptyString                  "Empty string specified for %1$s; ignored." 
-LongValue                    "%1$s value is too long; ignored." 
-InvalidClause                "%1$s: Invalid clause in \"%2$s\"." 
-EmptyClause                  "Empty clause in %1$s." 
-InvalidChunk                 "%1$s value \"%2$s\" is invalid chunk size." 
-LargeChunk                   "%1$s value \"%2$s\" is to large chunk size." 
-IgnoreChunk                  "%1$s value \"%2$s\" is ignored." 
-CantGetProcFreq              "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY." 
-EnvParallelWarn              "%1$s must be set prior to first parallel region; ignored." 
-AffParamDefined              "%1$s: parameter has been specified already, ignoring \"%2$s\"." 
-AffInvalidParam              "%1$s: parameter invalid, ignoring \"%2$s\"." 
-AffManyParams                "%1$s: too many integer parameters specified, ignoring \"%2$s\"." 
-AffManyParamsForLogic        "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\"." 
-AffNoParam                   "%1$s: '%2$s' type does not take any integer parameters, ignoring them." 
-AffNoProcList                "%1$s: proclist not specified with explicit affinity type, using \"none\"." 
-AffProcListNoType            "%1$s: proclist specified, setting affinity type to \"explicit\"." 
-AffProcListNotExplicit       "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored." 
-AffSyntaxError               "%1$s: syntax error, not using affinity." 
-AffZeroStride                "%1$s: range error (zero stride), not using affinity." 
-AffStartGreaterEnd           "%1$s: range error (%2$d > %3$d), not using affinity." 
-AffStrideLessZero            "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity." 
-AffRangeTooBig               "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity." 
-OBSOLETE                     "%1$s: %2$s is defined. %3$s will be ignored." 
-AffNotSupported              "%1$s: affinity not supported, using \"disabled\"." 
-OBSOLETE                     "%1$s: affinity only supported for Intel(R) processors." 
-GetAffSysCallNotSupported    "%1$s: getaffinity system call not supported." 
-SetAffSysCallNotSupported    "%1$s: setaffinity system call not supported." 
-OBSOLETE                     "%1$s: pthread_aff_set_np call not found." 
-OBSOLETE                     "%1$s: pthread_get_num_resources_np call not found." 
-OBSOLETE                     "%1$s: the OS kernel does not support affinity." 
-OBSOLETE                     "%1$s: pthread_get_num_resources_np returned %2$d." 
-AffCantGetMaskSize           "%1$s: cannot determine proper affinity mask size." 
-ParseSizeIntWarn             "%1$s=\"%2$s\": %3$s." 
-ParseExtraCharsWarn          "%1$s: extra trailing characters ignored: \"%2$s\"." 
-UnknownForceReduction        "%1$s: unknown method \"%2$s\"." 
-TimerUseGettimeofday         "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday." 
-TimerNeedMoreParam           "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday." 
-TimerInvalidParam            "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday." 
-TimerGettimeFailed           "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday." 
-TimerUnknownFunction         "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\")." 
-UnknownSchedTypeDetected     "Unknown scheduling type detected." 
-DispatchManyThreads          "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling." 
-IttLookupFailed              "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed." 
-IttLoadLibFailed             "ittnotify: Loading \"%1$s\" library failed." 
-IttAllNotifDisabled          "ittnotify: All itt notifications disabled." 
-IttObjNotifDisabled          "ittnotify: Object state itt notifications disabled." 
-IttMarkNotifDisabled         "ittnotify: Mark itt notifications disabled." 
-IttUnloadLibFailed           "ittnotify: Unloading \"%1$s\" library failed." 
-CantFormThrTeam              "Cannot form a team with %1$d threads, using %2$d instead." 
-ActiveLevelsNegative         "Requested number of active parallel levels \"%1$d\" is negative; ignored." 
-ActiveLevelsExceedLimit      "Requested number of active parallel levels \"%1$d\" exceeds supported limit; " 
-                             "the following limit value will be used: \"%1$d\"." 
-SetLibraryIncorrectCall      "kmp_set_library must only be called from the top level serial thread; ignored." 
-FatalSysError                "Fatal system error detected." 
-OutOfHeapMemory              "Out of heap memory." 
-OBSOLETE                     "Clearing __KMP_REGISTERED_LIB env var failed." 
-OBSOLETE                     "Registering library with env var failed." 
-Using_int_Value              "%1$s value \"%2$d\" will be used." 
-Using_uint_Value             "%1$s value \"%2$u\" will be used." 
-Using_uint64_Value           "%1$s value \"%2$s\" will be used." 
-Using_str_Value              "%1$s value \"%2$s\" will be used." 
-MaxValueUsing                "%1$s maximum value \"%2$d\" will be used." 
-MinValueUsing                "%1$s minimum value \"%2$d\" will be used." 
-MemoryAllocFailed            "Memory allocation failed." 
-FileNameTooLong              "File name too long." 
-OBSOLETE                     "Lock table overflow." 
-ManyThreadsForTPDirective    "Too many threads to use threadprivate directive." 
-AffinityInvalidMask          "%1$s: invalid mask." 
-WrongDefinition              "Wrong definition." 
-TLSSetValueFailed            "Windows* OS: TLS Set Value failed." 
-TLSOutOfIndexes              "Windows* OS: TLS out of indexes." 
-OBSOLETE                     "PDONE directive must be nested within a DO directive." 
-CantGetNumAvailCPU           "Cannot get number of available CPUs." 
-AssumedNumCPU                "Assumed number of CPUs is 2." 
-ErrorInitializeAffinity      "Error initializing affinity - not using affinity." 
-AffThreadsMayMigrate         "Threads may migrate across all available OS procs (granularity setting too coarse)." 
-AffIgnoreInvalidProcID       "Ignoring invalid OS proc ID %1$d." 
-AffNoValidProcID             "No valid OS proc IDs specified - not using affinity." 
-UsingFlatOS                  "%1$s - using \"flat\" OS <-> physical proc mapping." 
-UsingFlatOSFile              "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping." 
-UsingFlatOSFileLine          "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping." 
-FileMsgExiting               "%1$s: %2$s - exiting." 
-FileLineMsgExiting           "%1$s, line %2$d: %3$s - exiting." 
-ConstructIdentInvalid        "Construct identifier invalid." 
-ThreadIdentInvalid           "Thread identifier invalid." 
-RTLNotInitialized            "runtime library not initialized." 
-TPCommonBlocksInconsist      "Inconsistent THREADPRIVATE common block declarations are non-conforming " 
-                             "and are unsupported. Either all threadprivate common blocks must be declared " 
-                             "identically, or the largest instance of each threadprivate common block " 
-                             "must be referenced first during the run." 
-CantSetThreadAffMask         "Cannot set thread affinity mask." 
-CantSetThreadPriority        "Cannot set thread priority." 
-CantCreateThread             "Cannot create thread." 
-CantCreateEvent              "Cannot create event." 
-CantSetEvent                 "Cannot set event." 
-CantCloseHandle              "Cannot close handle." 
-UnknownLibraryType           "Unknown library type: %1$d." 
-ReapMonitorError             "Monitor did not reap properly." 
-ReapWorkerError              "Worker thread failed to join." 
-ChangeThreadAffMaskError     "Cannot change thread affinity mask." 
-ThreadsMigrate               "%1$s: Threads may migrate across %2$d innermost levels of machine" 
-DecreaseToThreads            "%1$s: decrease to %2$d threads" 
-IncreaseToThreads            "%1$s: increase to %2$d threads" 
-OBSOLETE                     "%1$s: Internal thread %2$d bound to OS proc set %3$s" 
-AffCapableUseCpuinfo         "%1$s: Affinity capable, using cpuinfo file" 
-AffUseGlobCpuid              "%1$s: Affinity capable, using global cpuid info" 
-AffCapableUseFlat            "%1$s: Affinity capable, using default \"flat\" topology" 
-AffNotCapableUseLocCpuid     "%1$s: Affinity not capable, using local cpuid info" 
-AffNotCapableUseCpuinfo      "%1$s: Affinity not capable, using cpuinfo file" 
-AffFlatTopology              "%1$s: Affinity not capable, assumming \"flat\" topology" 
-InitOSProcSetRespect         "%1$s: Initial OS proc set respected: %2$s" 
-InitOSProcSetNotRespect      "%1$s: Initial OS proc set not respected: %2$s" 
-AvailableOSProc              "%1$s: %2$d available OS procs" 
-Uniform                      "%1$s: Uniform topology" 
-NonUniform                   "%1$s: Nonuniform topology" 
-Topology                     "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" 
-OBSOLETE                     "%1$s: OS proc to physical thread map ([] => level not in map):" 
-OSProcToPackage              "%1$s: OS proc <n> maps to <n>th package core 0" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d" 
-OSProcMapToPack              "%1$s: OS proc %2$d maps to %3$s" 
-OBSOLETE                     "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d" 
-OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d" 
-OBSOLETE                     "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package" 
-OBSOLETE                     "%1$s: HT disabled; %2$d packages" 
-BarriersInDifferentOrder     "Threads encountered barriers in different order. " 
-FunctionError                "Function %1$s failed:" 
-TopologyExtra                "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)" 
-WrongMessageCatalog          "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected." 
-StgIgnored                   "%1$s: ignored because %2$s has been defined" 
-                                 # %1, -- name of ignored variable, %2 -- name of variable with higher priority. 
-OBSOLETE                     "%1$s: overrides %3$s specified before" 
-                                 # %1, %2 -- name and value of the overriding variable, %3 -- name of overriden variable. 
- 
-# --- OpenMP errors detected at runtime --- 
-# 
-#    %1 is the name of OpenMP construct (formatted with "Pragma" format). 
-# 
-CnsBoundToWorksharing        "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause" 
-CnsDetectedEnd               "Detected end of %1$s without first executing a corresponding beginning." 
-CnsIterationRangeTooLarge    "Iteration range too large in %1$s." 
-CnsLoopIncrZeroProhibited    "%1$s must not have a loop increment that evaluates to zero." 
-# 
-#    %1 is the name of the first OpenMP construct, %2 -- the name of the second one (both formatted with "Pragma" format). 
-# 
-CnsExpectedEnd               "Expected end of %1$s; %2$s, however, has most recently begun execution." 
-CnsInvalidNesting            "%1$s is incorrectly nested within %2$s" 
-CnsMultipleNesting           "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s" 
-CnsNestingSameName           "%1$s is incorrectly nested within %2$s of the same name" 
-CnsNoOrderedClause           "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause" 
-CnsNotInTaskConstruct        "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs" 
-CnsThreadsAtBarrier          "One thread at %1$s while another thread is at %2$s." 
- 
-# New errors 
-CantConnect                  "Cannot connect to %1$s" 
-CantConnectUsing             "Cannot connect to %1$s - Using %2$s" 
-LibNotSupport                "%1$s does not support %2$s. Continuing without using %2$s." 
-LibNotSupportFor             "%1$s does not support %2$s for %3$s. Continuing without using %2$s." 
-StaticLibNotSupport          "Static %1$s does not support %2$s. Continuing without using %2$s." 
-OBSOLETE                     "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0" 
-IttUnknownGroup              "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\"." 
-IttEnvVarTooLong             "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu." 
-AffUseGlobCpuidL11           "%1$s: Affinity capable, using global cpuid leaf 11 info" 
-AffNotCapableUseLocCpuidL11  "%1$s: Affinity not capable, using local cpuid leaf 11 info" 
-AffInfoStr                   "%1$s: %2$s." 
-AffInfoStrStr                "%1$s: %2$s - %3$s." 
-OSProcToPhysicalThreadMap    "%1$s: OS proc to physical thread map:" 
-AffUsingFlatOS               "%1$s: using \"flat\" OS <-> physical proc mapping." 
-AffParseFilename             "%1$s: parsing %2$s." 
-MsgExiting                   "%1$s - exiting." 
-IncompatibleLibrary          "Incompatible %1$s library with version %2$s found." 
-IttFunctionError             "ittnotify: Function %1$s failed:" 
-IttUnknownError              "ittnofify: Error #%1$d." 
-EnvMiddleWarn                "%1$s must be set prior to first parallel region or certain API calls; ignored." 
-CnsLockNotDestroyed          "Lock initialized at %1$s(%2$d) was not destroyed" 
-                                 # %1, %2, %3, %4 -- file, line, func, col 
-CantLoadBalUsing             "Cannot determine machine load balance - Using %1$s" 
-AffNotCapableUsePthread      "%1$s: Affinity not capable, using pthread info" 
-AffUsePthread                "%1$s: Affinity capable, using pthread info" 
-OBSOLETE                     "Loading \"%1$s\" library failed:" 
-OBSOLETE                     "Lookup of \"%1$s\" function failed:" 
-OBSOLETE                     "Buffer too small." 
-OBSOLETE                     "Error #%1$d." 
-NthSyntaxError               "%1$s: Invalid symbols found. Check the value \"%2$s\"." 
-NthSpacesNotAllowed          "%1$s: Spaces between digits are not allowed \"%2$s\"." 
-AffStrParseFilename          "%1$s: %2$s - parsing %3$s." 
-OBSOLETE                     "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group." 
-AffTypeCantUseMultGroups     "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." 
-AffGranCantUseMultGroups     "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"." 
-AffWindowsProcGroupMap       "%1$s: Mapping Windows* OS processor group <i> proc <j> to OS proc 64*<i>+<j>." 
-AffOSProcToGroup             "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d" 
-AffBalancedNotAvail          "%1$s: Affinity balanced is not available." 
-OBSOLETE                     "%1$s: granularity=core will be used." 
-EnvLockWarn                  "%1$s must be set prior to first OMP lock call or critical section; ignored." 
-FutexNotSupported            "futex system call not supported; %1$s=%2$s ignored." 
-AffGranUsing                 "%1$s: granularity=%2$s will be used." 
-AffThrPlaceInvalid           "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt " 
-                             "(nSockets@offset, nCores@offset, nTthreads per core)\"." 
-AffThrPlaceUnsupported       "KMP_PLACE_THREADS ignored: unsupported architecture." 
-AffThrPlaceManyCores         "KMP_PLACE_THREADS ignored: too many cores requested." 
-SyntaxErrorUsing             "%1$s: syntax error, using %2$s." 
-AdaptiveNotSupported         "%1$s: Adaptive locks are not supported; using queuing." 
-EnvSyntaxError               "%1$s: Invalid symbols found. Check the value \"%2$s\"." 
-EnvSpacesNotAllowed          "%1$s: Spaces between digits are not allowed \"%2$s\"." 
-BoundToOSProcSet             "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s" 
-CnsLoopIncrIllegal           "%1$s error: parallel loop increment and condition are inconsistent." 
-NoGompCancellation           "libgomp cancellation is not currently supported." 
-AffThrPlaceNonUniform        "KMP_PLACE_THREADS ignored: non-uniform topology." 
-AffThrPlaceNonThreeLevel     "KMP_PLACE_THREADS ignored: only three-level topology is supported." 
-AffGranTopGroup              "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"." 
-AffGranGroupType             "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"." 
-AffThrPlaceManySockets       "KMP_PLACE_THREADS ignored: too many sockets requested." 
-AffThrPlaceDeprecated        "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value." 
-AffUsingHwloc                "%1$s: Affinity capable, using hwloc." 
-AffIgnoringHwloc             "%1$s: Ignoring hwloc mechanism." 
-AffHwlocErrorOccurred        "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms." 
- 
- 
-# -------------------------------------------------------------------------------------------------- 
--*- HINTS -*- 
-# -------------------------------------------------------------------------------------------------- 
- 
-# Hints. Hint may be printed after a message. Usually it is longer explanation text or suggestion. 
-# To maintain hint numbers (they are visible to customers), add new hints to the end. 
- 
-SubmitBugReport              "Please submit a bug report with this message, compile and run " 
-                             "commands used, and machine configuration info including native " 
-                             "compiler and operating system versions. Faster response will be " 
-                             "obtained by including all program sources. For information on " 
-                             "submitting this issue, please see " 
-                             "http://www.intel.com/software/products/support/." 
-OBSOLETE                     "Check NLSPATH environment variable, its value is \"%1$s\"." 
-ChangeStackLimit             "Please try changing the shell stack limit or adjusting the " 
-                             "OMP_STACKSIZE environment variable." 
-Unset_ALL_THREADS            "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)." 
-Set_ALL_THREADPRIVATE        "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d." 
-PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads." 
-DuplicateLibrary             "This means that multiple copies of the OpenMP runtime have been " 
-                             "linked into the program. That is dangerous, since it can degrade " 
-                             "performance or cause incorrect results. " 
-                             "The best thing to do is to ensure that only a single OpenMP runtime is " 
-                             "linked into the process, e.g. by avoiding static linking of the OpenMP " 
-                             "runtime in any library. As an unsafe, unsupported, undocumented workaround " 
-                             "you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow " 
-                             "the program to continue to execute, but that may cause crashes or " 
-                             "silently produce incorrect results. " 
-                             "For more information, please see http://www.intel.com/software/products/support/." 
-NameComesFrom_CPUINFO_FILE   "This name is specified in environment variable KMP_CPUINFO_FILE." 
-NotEnoughMemory              "Seems application required too much memory." 
-ValidBoolValues              "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, " 
-                             "\"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values." 
-BufferOverflow               "Perhaps too many threads." 
-RunningAtMaxPriority         "Decrease priority of application. " 
-                             "This will allow the monitor thread run at higher priority than other threads." 
-ChangeMonitorStackSize       "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit." 
-ChangeWorkerStackSize        "Try changing OMP_STACKSIZE and/or the shell stack limit." 
-IncreaseWorkerStackSize      "Try increasing OMP_STACKSIZE or the shell stack limit." 
-DecreaseWorkerStackSize      "Try decreasing OMP_STACKSIZE." 
-Decrease_NUM_THREADS         "Try decreasing the value of OMP_NUM_THREADS." 
-IncreaseMonitorStackSize     "Try increasing KMP_MONITOR_STACKSIZE." 
-DecreaseMonitorStackSize     "Try decreasing KMP_MONITOR_STACKSIZE." 
-DecreaseNumberOfThreadsInUse "Try decreasing the number of threads in use simultaneously." 
-DefaultScheduleKindUsed      "Will use default schedule type (%1$s)." 
-GetNewerLibrary              "It could be a result of using an older OMP library with a newer " 
-                             "compiler or memory corruption. You may check the proper OMP library " 
-                             "is linked to the application." 
-CheckEnvVar                  "Check %1$s environment variable, its value is \"%2$s\"." 
-OBSOLETE                     "You may want to use an %1$s library that supports %2$s interface with version %3$s." 
-OBSOLETE                     "You may want to use an %1$s library with version %2$s." 
-BadExeFormat                 "System error #193 is \"Bad format of EXE or DLL file\". " 
-                             "Usually it means the file is found, but it is corrupted or " 
-                             "a file for another architecture. " 
-                             "Check whether \"%1$s\" is a file for %2$s architecture." 
-SystemLimitOnThreads         "System-related limit on the number of threads." 
- 
- 
- 
-# -------------------------------------------------------------------------------------------------- 
-# end of file # 
-# -------------------------------------------------------------------------------------------------- 
- 
+# en_US.txt #
+
+#
+#//===----------------------------------------------------------------------===//
+#//
+#//                     The LLVM Compiler Infrastructure
+#//
+#// This file is dual licensed under the MIT and the University of Illinois Open
+#// Source Licenses. See LICENSE.txt for details.
+#//
+#//===----------------------------------------------------------------------===//
+#
+
+# Default messages, embedded into the OpenMP RTL, and source for English catalog.
+
+
+# Compatible changes (which does not require version bumping):
+#     * Editing message (number and type of placeholders must remain, relative order of
+#       placeholders may be changed, e.g. "File %1$s line %2$d" may be safely edited to
+#       "Line %2$d file %1$s").
+#     * Adding new message to the end of section.
+# Incompatible changes (version must be bumbed by 1):
+#     * Introducing new placeholders to existing messages.
+#     * Changing type of placeholders (e.g. "line %1$d" -> "line %1$s").
+#     * Rearranging order of messages.
+#     * Deleting messages.
+# Use special "OBSOLETE" pseudoidentifier for obsolete entries, which is kept only for backward
+# compatibility. When version is bumped, do not forget to delete all obsolete entries.
+
+
+# --------------------------------------------------------------------------------------------------
+-*- META -*-
+# --------------------------------------------------------------------------------------------------
+
+# Meta information about message catalog.
+
+Language "English"
+Country  "USA"
+LangId   "1033"
+Version  "2"
+Revision "20140827"
+
+
+
+# --------------------------------------------------------------------------------------------------
+-*- STRINGS -*-
+# --------------------------------------------------------------------------------------------------
+
+# Strings are not complete messages, just fragments. We need to work on it and reduce number of
+# strings (to zero?).
+
+Error                        "Error"
+UnknownFile                  "(unknown file)"
+NotANumber                   "not a number"
+BadUnit                      "bad unit"
+IllegalCharacters            "illegal characters"
+ValueTooLarge                "value too large"
+ValueTooSmall                "value too small"
+NotMultiple4K                "value is not a multiple of 4k"
+UnknownTopology              "Unknown processor topology"
+CantOpenCpuinfo              "Cannot open /proc/cpuinfo"
+ProcCpuinfo                  "/proc/cpuinfo"
+NoProcRecords                "cpuinfo file invalid (No processor records)"
+TooManyProcRecords           "cpuinfo file invalid (Too many processor records)"
+CantRewindCpuinfo            "Cannot rewind cpuinfo file"
+LongLineCpuinfo              "cpuinfo file invalid (long line)"
+TooManyEntries               "cpuinfo file contains too many entries"
+MissingProcField             "cpuinfo file missing processor field"
+MissingPhysicalIDField       "cpuinfo file missing physical id field"
+MissingValCpuinfo            "cpuinfo file invalid (missing val)"
+DuplicateFieldCpuinfo        "cpuinfo file invalid (duplicate field)"
+PhysicalIDsNotUnique         "Physical node/pkg/core/thread ids not unique"
+ApicNotPresent               "APIC not present"
+InvalidCpuidInfo             "Invalid cpuid info"
+OBSOLETE                     "APIC ids not unique"
+InconsistentCpuidInfo        "Inconsistent cpuid info"
+OutOfHeapMemory              "Out of heap memory"
+MemoryAllocFailed            "Memory allocation failed"
+Core                         "core"
+Thread                       "thread"
+Package                      "package"
+Node                         "node"
+OBSOLETE                     "<undef>"
+DecodingLegacyAPIC           "decoding legacy APIC ids"
+OBSOLETE                     "parsing /proc/cpuinfo"
+NotDefined                   "value is not defined"
+EffectiveSettings            "Effective settings:"
+UserSettings                 "User settings:"
+StorageMapWarning            "warning: pointers or size don't make sense"
+OBSOLETE                     "CPU"
+OBSOLETE                     "TPU"
+OBSOLETE                     "TPUs per package"
+OBSOLETE                     "HT enabled"
+OBSOLETE                     "HT disabled"
+Decodingx2APIC               "decoding x2APIC ids"
+NoLeaf11Support              "cpuid leaf 11 not supported"
+NoLeaf4Support               "cpuid leaf 4 not supported"
+ThreadIDsNotUnique           "thread ids not unique"
+UsingPthread                 "using pthread info"
+LegacyApicIDsNotUnique       "legacy APIC ids not unique"
+x2ApicIDsNotUnique           "x2APIC ids not unique"
+DisplayEnvBegin		     "OPENMP DISPLAY ENVIRONMENT BEGIN"
+DisplayEnvEnd		     "OPENMP DISPLAY ENVIRONMENT END"
+Device			     "[device]"
+Host			     "[host]"
+
+
+
+# --------------------------------------------------------------------------------------------------
+-*- FORMATS -*-
+# --------------------------------------------------------------------------------------------------
+
+Info                         "OMP: Info #%1$d: %2$s\n"
+Warning                      "OMP: Warning #%1$d: %2$s\n"
+Fatal                        "OMP: Error #%1$d: %2$s\n"
+SysErr                       "OMP: System error #%1$d: %2$s\n"
+Hint                         "OMP: Hint: %2$s\n"
+
+Pragma                       "%1$s pragma (at %2$s:%3$s():%4$s)"
+    # %1 is pragma name (like "parallel" or "master",
+    # %2 is file name,
+    # %3 is function (routine) name,
+    # %4 is the line number (as string, so "s" type specifier should be used).
+
+
+
+# --------------------------------------------------------------------------------------------------
+-*- MESSAGES -*-
+# --------------------------------------------------------------------------------------------------
+
+# Messages of any severity: informational, warning, or fatal.
+# To maintain message numbers (they are visible to customers), add new messages to the end.
+
+# Use following prefixes for messages and hints when appropriate:
+#    Aff -- Affinity messages.
+#    Cns -- Consistency check failures (KMP_CONSISTENCY_CHECK).
+#    Itt -- ITT Notify-related messages.
+
+LibraryIsSerial              "Library is \"serial\"."
+CantOpenMessageCatalog       "Cannot open message catalog \"%1$s\":"
+WillUseDefaultMessages       "Default messages will be used."
+LockIsUninitialized          "%1$s: Lock is uninitialized"
+LockSimpleUsedAsNestable     "%1$s: Lock was initialized as simple, but used as nestable"
+LockNestableUsedAsSimple     "%1$s: Lock was initialized as nestable, but used as simple"
+LockIsAlreadyOwned           "%1$s: Lock is already owned by requesting thread"
+LockStillOwned               "%1$s: Lock is still owned by a thread"
+LockUnsettingFree            "%1$s: Attempt to release a lock not owned by any thread"
+LockUnsettingSetByAnother    "%1$s: Attempt to release a lock owned by another thread"
+StackOverflow                "Stack overflow detected for OpenMP thread #%1$d"
+StackOverlap                 "Stack overlap detected. "
+AssertionFailure             "Assertion failure at %1$s(%2$d)."
+CantRegisterNewThread        "Unable to register a new user thread."
+DuplicateLibrary             "Initializing %1$s, but found %2$s already initialized."
+CantOpenFileForReading       "Cannot open file \"%1$s\" for reading:"
+CantGetEnvVar                "Getting environment variable \"%1$s\" failed:"
+CantSetEnvVar                "Setting environment variable \"%1$s\" failed:"
+CantGetEnvironment           "Getting environment failed:"
+BadBoolValue                 "%1$s=\"%2$s\": Wrong value, boolean expected."
+SSPNotBuiltIn                "No Helper Thread support built in this OMP library."
+SPPSotfTerminateFailed       "Helper thread failed to soft terminate."
+BufferOverflow               "Buffer overflow detected."
+RealTimeSchedNotSupported    "Real-time scheduling policy is not supported."
+RunningAtMaxPriority         "OMP application is running at maximum priority with real-time scheduling policy. "
+CantChangeMonitorPriority    "Changing priority of the monitor thread failed:"
+MonitorWillStarve            "Deadlocks are highly possible due to monitor thread starvation."
+CantSetMonitorStackSize      "Unable to set monitor thread stack size to %1$lu bytes:"
+CantSetWorkerStackSize       "Unable to set OMP thread stack size to %1$lu bytes:"
+CantInitThreadAttrs          "Thread attribute initialization failed:"
+CantDestroyThreadAttrs       "Thread attribute destroying failed:"
+CantSetWorkerState           "OMP thread joinable state setting failed:"
+CantSetMonitorState          "Monitor thread joinable state setting failed:"
+NoResourcesForWorkerThread   "System unable to allocate necessary resources for OMP thread:"
+NoResourcesForMonitorThread  "System unable to allocate necessary resources for the monitor thread:"
+CantTerminateWorkerThread    "Unable to terminate OMP thread:"
+ScheduleKindOutOfRange       "Wrong schedule type %1$d, see <omp.h> or <omp_lib.h> file for the list of values supported."
+UnknownSchedulingType        "Unknown scheduling type \"%1$d\"."
+InvalidValue                 "%1$s value \"%2$s\" is invalid."
+SmallValue                   "%1$s value \"%2$s\" is too small."
+LargeValue                   "%1$s value \"%2$s\" is too large."
+StgInvalidValue              "%1$s: \"%2$s\" is an invalid value; ignored."
+BarrReleaseValueInvalid      "%1$s release value \"%2$s\" is invalid."
+BarrGatherValueInvalid       "%1$s gather value \"%2$s\" is invalid."
+OBSOLETE                     "%1$s supported only on debug builds; ignored."
+ParRangeSyntax               "Syntax error: Usage: %1$s=[ routine=<func> | filename=<file> | range=<lb>:<ub> "
+                             "| excl_range=<lb>:<ub> ],..."
+UnbalancedQuotes             "Unbalanced quotes in %1$s."
+EmptyString                  "Empty string specified for %1$s; ignored."
+LongValue                    "%1$s value is too long; ignored."
+InvalidClause                "%1$s: Invalid clause in \"%2$s\"."
+EmptyClause                  "Empty clause in %1$s."
+InvalidChunk                 "%1$s value \"%2$s\" is invalid chunk size."
+LargeChunk                   "%1$s value \"%2$s\" is to large chunk size."
+IgnoreChunk                  "%1$s value \"%2$s\" is ignored."
+CantGetProcFreq              "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY."
+EnvParallelWarn              "%1$s must be set prior to first parallel region; ignored."
+AffParamDefined              "%1$s: parameter has been specified already, ignoring \"%2$s\"."
+AffInvalidParam              "%1$s: parameter invalid, ignoring \"%2$s\"."
+AffManyParams                "%1$s: too many integer parameters specified, ignoring \"%2$s\"."
+AffManyParamsForLogic        "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\"."
+AffNoParam                   "%1$s: '%2$s' type does not take any integer parameters, ignoring them."
+AffNoProcList                "%1$s: proclist not specified with explicit affinity type, using \"none\"."
+AffProcListNoType            "%1$s: proclist specified, setting affinity type to \"explicit\"."
+AffProcListNotExplicit       "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored."
+AffSyntaxError               "%1$s: syntax error, not using affinity."
+AffZeroStride                "%1$s: range error (zero stride), not using affinity."
+AffStartGreaterEnd           "%1$s: range error (%2$d > %3$d), not using affinity."
+AffStrideLessZero            "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity."
+AffRangeTooBig               "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity."
+OBSOLETE                     "%1$s: %2$s is defined. %3$s will be ignored."
+AffNotSupported              "%1$s: affinity not supported, using \"disabled\"."
+OBSOLETE                     "%1$s: affinity only supported for Intel(R) processors."
+GetAffSysCallNotSupported    "%1$s: getaffinity system call not supported."
+SetAffSysCallNotSupported    "%1$s: setaffinity system call not supported."
+OBSOLETE                     "%1$s: pthread_aff_set_np call not found."
+OBSOLETE                     "%1$s: pthread_get_num_resources_np call not found."
+OBSOLETE                     "%1$s: the OS kernel does not support affinity."
+OBSOLETE                     "%1$s: pthread_get_num_resources_np returned %2$d."
+AffCantGetMaskSize           "%1$s: cannot determine proper affinity mask size."
+ParseSizeIntWarn             "%1$s=\"%2$s\": %3$s."
+ParseExtraCharsWarn          "%1$s: extra trailing characters ignored: \"%2$s\"."
+UnknownForceReduction        "%1$s: unknown method \"%2$s\"."
+TimerUseGettimeofday         "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday."
+TimerNeedMoreParam           "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday."
+TimerInvalidParam            "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday."
+TimerGettimeFailed           "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday."
+TimerUnknownFunction         "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\")."
+UnknownSchedTypeDetected     "Unknown scheduling type detected."
+DispatchManyThreads          "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling."
+IttLookupFailed              "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed."
+IttLoadLibFailed             "ittnotify: Loading \"%1$s\" library failed."
+IttAllNotifDisabled          "ittnotify: All itt notifications disabled."
+IttObjNotifDisabled          "ittnotify: Object state itt notifications disabled."
+IttMarkNotifDisabled         "ittnotify: Mark itt notifications disabled."
+IttUnloadLibFailed           "ittnotify: Unloading \"%1$s\" library failed."
+CantFormThrTeam              "Cannot form a team with %1$d threads, using %2$d instead."
+ActiveLevelsNegative         "Requested number of active parallel levels \"%1$d\" is negative; ignored."
+ActiveLevelsExceedLimit      "Requested number of active parallel levels \"%1$d\" exceeds supported limit; "
+                             "the following limit value will be used: \"%1$d\"."
+SetLibraryIncorrectCall      "kmp_set_library must only be called from the top level serial thread; ignored."
+FatalSysError                "Fatal system error detected."
+OutOfHeapMemory              "Out of heap memory."
+OBSOLETE                     "Clearing __KMP_REGISTERED_LIB env var failed."
+OBSOLETE                     "Registering library with env var failed."
+Using_int_Value              "%1$s value \"%2$d\" will be used."
+Using_uint_Value             "%1$s value \"%2$u\" will be used."
+Using_uint64_Value           "%1$s value \"%2$s\" will be used."
+Using_str_Value              "%1$s value \"%2$s\" will be used."
+MaxValueUsing                "%1$s maximum value \"%2$d\" will be used."
+MinValueUsing                "%1$s minimum value \"%2$d\" will be used."
+MemoryAllocFailed            "Memory allocation failed."
+FileNameTooLong              "File name too long."
+OBSOLETE                     "Lock table overflow."
+ManyThreadsForTPDirective    "Too many threads to use threadprivate directive."
+AffinityInvalidMask          "%1$s: invalid mask."
+WrongDefinition              "Wrong definition."
+TLSSetValueFailed            "Windows* OS: TLS Set Value failed."
+TLSOutOfIndexes              "Windows* OS: TLS out of indexes."
+OBSOLETE                     "PDONE directive must be nested within a DO directive."
+CantGetNumAvailCPU           "Cannot get number of available CPUs."
+AssumedNumCPU                "Assumed number of CPUs is 2."
+ErrorInitializeAffinity      "Error initializing affinity - not using affinity."
+AffThreadsMayMigrate         "Threads may migrate across all available OS procs (granularity setting too coarse)."
+AffIgnoreInvalidProcID       "Ignoring invalid OS proc ID %1$d."
+AffNoValidProcID             "No valid OS proc IDs specified - not using affinity."
+UsingFlatOS                  "%1$s - using \"flat\" OS <-> physical proc mapping."
+UsingFlatOSFile              "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping."
+UsingFlatOSFileLine          "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping."
+FileMsgExiting               "%1$s: %2$s - exiting."
+FileLineMsgExiting           "%1$s, line %2$d: %3$s - exiting."
+ConstructIdentInvalid        "Construct identifier invalid."
+ThreadIdentInvalid           "Thread identifier invalid."
+RTLNotInitialized            "runtime library not initialized."
+TPCommonBlocksInconsist      "Inconsistent THREADPRIVATE common block declarations are non-conforming "
+                             "and are unsupported. Either all threadprivate common blocks must be declared "
+                             "identically, or the largest instance of each threadprivate common block "
+                             "must be referenced first during the run."
+CantSetThreadAffMask         "Cannot set thread affinity mask."
+CantSetThreadPriority        "Cannot set thread priority."
+CantCreateThread             "Cannot create thread."
+CantCreateEvent              "Cannot create event."
+CantSetEvent                 "Cannot set event."
+CantCloseHandle              "Cannot close handle."
+UnknownLibraryType           "Unknown library type: %1$d."
+ReapMonitorError             "Monitor did not reap properly."
+ReapWorkerError              "Worker thread failed to join."
+ChangeThreadAffMaskError     "Cannot change thread affinity mask."
+ThreadsMigrate               "%1$s: Threads may migrate across %2$d innermost levels of machine"
+DecreaseToThreads            "%1$s: decrease to %2$d threads"
+IncreaseToThreads            "%1$s: increase to %2$d threads"
+OBSOLETE                     "%1$s: Internal thread %2$d bound to OS proc set %3$s"
+AffCapableUseCpuinfo         "%1$s: Affinity capable, using cpuinfo file"
+AffUseGlobCpuid              "%1$s: Affinity capable, using global cpuid info"
+AffCapableUseFlat            "%1$s: Affinity capable, using default \"flat\" topology"
+AffNotCapableUseLocCpuid     "%1$s: Affinity not capable, using local cpuid info"
+AffNotCapableUseCpuinfo      "%1$s: Affinity not capable, using cpuinfo file"
+AffFlatTopology              "%1$s: Affinity not capable, assumming \"flat\" topology"
+InitOSProcSetRespect         "%1$s: Initial OS proc set respected: %2$s"
+InitOSProcSetNotRespect      "%1$s: Initial OS proc set not respected: %2$s"
+AvailableOSProc              "%1$s: %2$d available OS procs"
+Uniform                      "%1$s: Uniform topology"
+NonUniform                   "%1$s: Nonuniform topology"
+Topology                     "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)"
+OBSOLETE                     "%1$s: OS proc to physical thread map ([] => level not in map):"
+OSProcToPackage              "%1$s: OS proc <n> maps to <n>th package core 0"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]"
+OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]"
+OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d"
+OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]"
+OBSOLETE                     "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d"
+OSProcMapToPack              "%1$s: OS proc %2$d maps to %3$s"
+OBSOLETE                     "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d"
+OBSOLETE                     "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d"
+OBSOLETE                     "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package"
+OBSOLETE                     "%1$s: HT disabled; %2$d packages"
+BarriersInDifferentOrder     "Threads encountered barriers in different order. "
+FunctionError                "Function %1$s failed:"
+TopologyExtra                "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)"
+WrongMessageCatalog          "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected."
+StgIgnored                   "%1$s: ignored because %2$s has been defined"
+                                 # %1, -- name of ignored variable, %2 -- name of variable with higher priority.
+OBSOLETE                     "%1$s: overrides %3$s specified before"
+                                 # %1, %2 -- name and value of the overriding variable, %3 -- name of overriden variable.
+
+# --- OpenMP errors detected at runtime ---
+#
+#    %1 is the name of OpenMP construct (formatted with "Pragma" format).
+#
+CnsBoundToWorksharing        "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause"
+CnsDetectedEnd               "Detected end of %1$s without first executing a corresponding beginning."
+CnsIterationRangeTooLarge    "Iteration range too large in %1$s."
+CnsLoopIncrZeroProhibited    "%1$s must not have a loop increment that evaluates to zero."
+#
+#    %1 is the name of the first OpenMP construct, %2 -- the name of the second one (both formatted with "Pragma" format).
+#
+CnsExpectedEnd               "Expected end of %1$s; %2$s, however, has most recently begun execution."
+CnsInvalidNesting            "%1$s is incorrectly nested within %2$s"
+CnsMultipleNesting           "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s"
+CnsNestingSameName           "%1$s is incorrectly nested within %2$s of the same name"
+CnsNoOrderedClause           "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause"
+CnsNotInTaskConstruct        "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs"
+CnsThreadsAtBarrier          "One thread at %1$s while another thread is at %2$s."
+
+# New errors
+CantConnect                  "Cannot connect to %1$s"
+CantConnectUsing             "Cannot connect to %1$s - Using %2$s"
+LibNotSupport                "%1$s does not support %2$s. Continuing without using %2$s."
+LibNotSupportFor             "%1$s does not support %2$s for %3$s. Continuing without using %2$s."
+StaticLibNotSupport          "Static %1$s does not support %2$s. Continuing without using %2$s."
+OBSOLETE                     "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0"
+IttUnknownGroup              "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\"."
+IttEnvVarTooLong             "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu."
+AffUseGlobCpuidL11           "%1$s: Affinity capable, using global cpuid leaf 11 info"
+AffNotCapableUseLocCpuidL11  "%1$s: Affinity not capable, using local cpuid leaf 11 info"
+AffInfoStr                   "%1$s: %2$s."
+AffInfoStrStr                "%1$s: %2$s - %3$s."
+OSProcToPhysicalThreadMap    "%1$s: OS proc to physical thread map:"
+AffUsingFlatOS               "%1$s: using \"flat\" OS <-> physical proc mapping."
+AffParseFilename             "%1$s: parsing %2$s."
+MsgExiting                   "%1$s - exiting."
+IncompatibleLibrary          "Incompatible %1$s library with version %2$s found."
+IttFunctionError             "ittnotify: Function %1$s failed:"
+IttUnknownError              "ittnofify: Error #%1$d."
+EnvMiddleWarn                "%1$s must be set prior to first parallel region or certain API calls; ignored."
+CnsLockNotDestroyed          "Lock initialized at %1$s(%2$d) was not destroyed"
+                                 # %1, %2, %3, %4 -- file, line, func, col
+CantLoadBalUsing             "Cannot determine machine load balance - Using %1$s"
+AffNotCapableUsePthread      "%1$s: Affinity not capable, using pthread info"
+AffUsePthread                "%1$s: Affinity capable, using pthread info"
+OBSOLETE                     "Loading \"%1$s\" library failed:"
+OBSOLETE                     "Lookup of \"%1$s\" function failed:"
+OBSOLETE                     "Buffer too small."
+OBSOLETE                     "Error #%1$d."
+NthSyntaxError               "%1$s: Invalid symbols found. Check the value \"%2$s\"."
+NthSpacesNotAllowed          "%1$s: Spaces between digits are not allowed \"%2$s\"."
+AffStrParseFilename          "%1$s: %2$s - parsing %3$s."
+OBSOLETE                     "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group."
+AffTypeCantUseMultGroups     "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"."
+AffGranCantUseMultGroups     "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\"."
+AffWindowsProcGroupMap       "%1$s: Mapping Windows* OS processor group <i> proc <j> to OS proc 64*<i>+<j>."
+AffOSProcToGroup             "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d"
+AffBalancedNotAvail          "%1$s: Affinity balanced is not available."
+OBSOLETE                     "%1$s: granularity=core will be used."
+EnvLockWarn                  "%1$s must be set prior to first OMP lock call or critical section; ignored."
+FutexNotSupported            "futex system call not supported; %1$s=%2$s ignored."
+AffGranUsing                 "%1$s: granularity=%2$s will be used."
+AffThrPlaceInvalid           "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt "
+                             "(nSockets@offset, nCores@offset, nTthreads per core)\"."
+AffThrPlaceUnsupported       "KMP_PLACE_THREADS ignored: unsupported architecture."
+AffThrPlaceManyCores         "KMP_PLACE_THREADS ignored: too many cores requested."
+SyntaxErrorUsing             "%1$s: syntax error, using %2$s."
+AdaptiveNotSupported         "%1$s: Adaptive locks are not supported; using queuing."
+EnvSyntaxError               "%1$s: Invalid symbols found. Check the value \"%2$s\"."
+EnvSpacesNotAllowed          "%1$s: Spaces between digits are not allowed \"%2$s\"."
+BoundToOSProcSet             "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s"
+CnsLoopIncrIllegal           "%1$s error: parallel loop increment and condition are inconsistent."
+NoGompCancellation           "libgomp cancellation is not currently supported."
+AffThrPlaceNonUniform        "KMP_PLACE_THREADS ignored: non-uniform topology."
+AffThrPlaceNonThreeLevel     "KMP_PLACE_THREADS ignored: only three-level topology is supported."
+AffGranTopGroup              "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"."
+AffGranGroupType             "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"."
+AffThrPlaceManySockets       "KMP_PLACE_THREADS ignored: too many sockets requested."
+AffThrPlaceDeprecated        "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value."
+AffUsingHwloc                "%1$s: Affinity capable, using hwloc."
+AffIgnoringHwloc             "%1$s: Ignoring hwloc mechanism."
+AffHwlocErrorOccurred        "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."
+
+
+# --------------------------------------------------------------------------------------------------
+-*- HINTS -*-
+# --------------------------------------------------------------------------------------------------
+
+# Hints. Hint may be printed after a message. Usually it is longer explanation text or suggestion.
+# To maintain hint numbers (they are visible to customers), add new hints to the end.
+
+SubmitBugReport              "Please submit a bug report with this message, compile and run "
+                             "commands used, and machine configuration info including native "
+                             "compiler and operating system versions. Faster response will be "
+                             "obtained by including all program sources. For information on "
+                             "submitting this issue, please see "
+                             "http://www.intel.com/software/products/support/."
+OBSOLETE                     "Check NLSPATH environment variable, its value is \"%1$s\"."
+ChangeStackLimit             "Please try changing the shell stack limit or adjusting the "
+                             "OMP_STACKSIZE environment variable."
+Unset_ALL_THREADS            "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)."
+Set_ALL_THREADPRIVATE        "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d."
+PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads."
+DuplicateLibrary             "This means that multiple copies of the OpenMP runtime have been "
+                             "linked into the program. That is dangerous, since it can degrade "
+                             "performance or cause incorrect results. "
+                             "The best thing to do is to ensure that only a single OpenMP runtime is "
+                             "linked into the process, e.g. by avoiding static linking of the OpenMP "
+                             "runtime in any library. As an unsafe, unsupported, undocumented workaround "
+                             "you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow "
+                             "the program to continue to execute, but that may cause crashes or "
+                             "silently produce incorrect results. "
+                             "For more information, please see http://www.intel.com/software/products/support/."
+NameComesFrom_CPUINFO_FILE   "This name is specified in environment variable KMP_CPUINFO_FILE."
+NotEnoughMemory              "Seems application required too much memory."
+ValidBoolValues              "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, "
+                             "\"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values."
+BufferOverflow               "Perhaps too many threads."
+RunningAtMaxPriority         "Decrease priority of application. "
+                             "This will allow the monitor thread run at higher priority than other threads."
+ChangeMonitorStackSize       "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit."
+ChangeWorkerStackSize        "Try changing OMP_STACKSIZE and/or the shell stack limit."
+IncreaseWorkerStackSize      "Try increasing OMP_STACKSIZE or the shell stack limit."
+DecreaseWorkerStackSize      "Try decreasing OMP_STACKSIZE."
+Decrease_NUM_THREADS         "Try decreasing the value of OMP_NUM_THREADS."
+IncreaseMonitorStackSize     "Try increasing KMP_MONITOR_STACKSIZE."
+DecreaseMonitorStackSize     "Try decreasing KMP_MONITOR_STACKSIZE."
+DecreaseNumberOfThreadsInUse "Try decreasing the number of threads in use simultaneously."
+DefaultScheduleKindUsed      "Will use default schedule type (%1$s)."
+GetNewerLibrary              "It could be a result of using an older OMP library with a newer "
+                             "compiler or memory corruption. You may check the proper OMP library "
+                             "is linked to the application."
+CheckEnvVar                  "Check %1$s environment variable, its value is \"%2$s\"."
+OBSOLETE                     "You may want to use an %1$s library that supports %2$s interface with version %3$s."
+OBSOLETE                     "You may want to use an %1$s library with version %2$s."
+BadExeFormat                 "System error #193 is \"Bad format of EXE or DLL file\". "
+                             "Usually it means the file is found, but it is corrupted or "
+                             "a file for another architecture. "
+                             "Check whether \"%1$s\" is a file for %2$s architecture."
+SystemLimitOnThreads         "System-related limit on the number of threads."
+
+
+
+# --------------------------------------------------------------------------------------------------
+# end of file #
+# --------------------------------------------------------------------------------------------------
+
diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp.h.var b/contrib/libs/cxxsupp/openmp/include/30/omp.h.var
index 212c8c180d..9ffcfb297b 100644
--- a/contrib/libs/cxxsupp/openmp/include/30/omp.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/30/omp.h.var
@@ -1,164 +1,164 @@
-/* 
- * include/30/omp.h.var 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef __OMP_H 
-#   define __OMP_H 
- 
-#   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@ 
-#   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@ 
-#   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@ 
-#   define KMP_BUILD_DATE       "@LIBOMP_BUILD_DATE@" 
- 
-#   ifdef __cplusplus 
-    extern "C" { 
-#   endif 
- 
-#       define omp_set_num_threads          ompc_set_num_threads 
-#       define omp_set_dynamic              ompc_set_dynamic 
-#       define omp_set_nested               ompc_set_nested 
-#       define omp_set_max_active_levels    ompc_set_max_active_levels 
-#       define omp_set_schedule             ompc_set_schedule 
-#       define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num 
-#       define omp_get_team_size            ompc_get_team_size 
- 
- 
-#       define kmp_set_stacksize            kmpc_set_stacksize 
-#       define kmp_set_stacksize_s          kmpc_set_stacksize_s 
-#       define kmp_set_blocktime            kmpc_set_blocktime 
-#       define kmp_set_library              kmpc_set_library 
-#       define kmp_set_defaults             kmpc_set_defaults 
-#       define kmp_set_affinity_mask_proc   kmpc_set_affinity_mask_proc 
-#       define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc 
-#       define kmp_get_affinity_mask_proc   kmpc_get_affinity_mask_proc 
- 
-#       define kmp_malloc                   kmpc_malloc 
-#       define kmp_calloc                   kmpc_calloc 
-#       define kmp_realloc                  kmpc_realloc 
-#       define kmp_free                     kmpc_free 
- 
- 
-#   if defined(_WIN32) 
-#       define __KAI_KMPC_CONVENTION __cdecl 
-#   else 
-#       define __KAI_KMPC_CONVENTION 
-#   endif 
- 
-    /* schedule kind constants */ 
-    typedef enum omp_sched_t { 
-	omp_sched_static  = 1, 
-	omp_sched_dynamic = 2, 
-	omp_sched_guided  = 3, 
-	omp_sched_auto    = 4 
-    } omp_sched_t; 
- 
-    /* set API functions */ 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int); 
- 
-    /* query API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void); 
-    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *); 
- 
-    /* lock API functions */ 
-    typedef struct omp_lock_t { 
-        void * _lk; 
-    } omp_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *); 
- 
-    /* nested lock API functions */ 
-    typedef struct omp_nest_lock_t { 
-        void * _lk; 
-    } omp_nest_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *); 
- 
-    /* time API functions */ 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void); 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void); 
- 
-#   include <stdlib.h> 
-    /* kmp API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int); 
-    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *); 
- 
-    /* affinity API functions */ 
-    typedef void * kmp_affinity_mask_t; 
- 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
- 
-    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *); 
- 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void); 
- 
-#   undef __KAI_KMPC_CONVENTION 
- 
-    /* Warning: 
-       The following typedefs are not standard, deprecated and will be removed in a future release. 
-    */ 
-    typedef int     omp_int_t; 
-    typedef double  omp_wtime_t; 
- 
-#   ifdef __cplusplus 
-    } 
-#   endif 
- 
-#endif /* __OMP_H */ 
- 
+/*
+ * include/30/omp.h.var
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef __OMP_H
+#   define __OMP_H
+
+#   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@
+#   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@
+#   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@
+#   define KMP_BUILD_DATE       "@LIBOMP_BUILD_DATE@"
+
+#   ifdef __cplusplus
+    extern "C" {
+#   endif
+
+#       define omp_set_num_threads          ompc_set_num_threads
+#       define omp_set_dynamic              ompc_set_dynamic
+#       define omp_set_nested               ompc_set_nested
+#       define omp_set_max_active_levels    ompc_set_max_active_levels
+#       define omp_set_schedule             ompc_set_schedule
+#       define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num
+#       define omp_get_team_size            ompc_get_team_size
+
+
+#       define kmp_set_stacksize            kmpc_set_stacksize
+#       define kmp_set_stacksize_s          kmpc_set_stacksize_s
+#       define kmp_set_blocktime            kmpc_set_blocktime
+#       define kmp_set_library              kmpc_set_library
+#       define kmp_set_defaults             kmpc_set_defaults
+#       define kmp_set_affinity_mask_proc   kmpc_set_affinity_mask_proc
+#       define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc
+#       define kmp_get_affinity_mask_proc   kmpc_get_affinity_mask_proc
+
+#       define kmp_malloc                   kmpc_malloc
+#       define kmp_calloc                   kmpc_calloc
+#       define kmp_realloc                  kmpc_realloc
+#       define kmp_free                     kmpc_free
+
+
+#   if defined(_WIN32)
+#       define __KAI_KMPC_CONVENTION __cdecl
+#   else
+#       define __KAI_KMPC_CONVENTION
+#   endif
+
+    /* schedule kind constants */
+    typedef enum omp_sched_t {
+	omp_sched_static  = 1,
+	omp_sched_dynamic = 2,
+	omp_sched_guided  = 3,
+	omp_sched_auto    = 4
+    } omp_sched_t;
+
+    /* set API functions */
+    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int);
+
+    /* query API functions */
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void);
+    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *);
+
+    /* lock API functions */
+    typedef struct omp_lock_t {
+        void * _lk;
+    } omp_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *);
+
+    /* nested lock API functions */
+    typedef struct omp_nest_lock_t {
+        void * _lk;
+    } omp_nest_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *);
+
+    /* time API functions */
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void);
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void);
+
+#   include <stdlib.h>
+    /* kmp API functions */
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int);
+    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *);
+
+    /* affinity API functions */
+    typedef void * kmp_affinity_mask_t;
+
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *);
+    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+
+    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t);
+    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *);
+
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
+
+#   undef __KAI_KMPC_CONVENTION
+
+    /* Warning:
+       The following typedefs are not standard, deprecated and will be removed in a future release.
+    */
+    typedef int     omp_int_t;
+    typedef double  omp_wtime_t;
+
+#   ifdef __cplusplus
+    }
+#   endif
+
+#endif /* __OMP_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var
index f46b5224ac..99122067af 100644
--- a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var
+++ b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f.var
@@ -1,633 +1,633 @@
-! include/30/omp_lib.f.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-!*** 
-!*** Some of the directives for the following routine extend past column 72, 
-!*** so process this file in 132-column mode. 
-!*** 
- 
-!dec$ fixedformlinesize:132 
- 
-      module omp_lib_kinds 
- 
-        integer, parameter :: omp_integer_kind       = 4 
-        integer, parameter :: omp_logical_kind       = 4 
-        integer, parameter :: omp_real_kind          = 4 
-        integer, parameter :: omp_lock_kind          = int_ptr_kind() 
-        integer, parameter :: omp_nest_lock_kind     = int_ptr_kind() 
-        integer, parameter :: omp_sched_kind         = omp_integer_kind 
-        integer, parameter :: kmp_pointer_kind       = int_ptr_kind() 
-        integer, parameter :: kmp_size_t_kind        = int_ptr_kind() 
-        integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() 
- 
-      end module omp_lib_kinds 
- 
-      module omp_lib 
- 
-        use omp_lib_kinds 
- 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-        character(*), parameter :: kmp_build_date    = '@LIBOMP_BUILD_DATE@' 
-        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
- 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-        interface 
- 
-!         *** 
-!         *** omp_* entry points 
-!         *** 
- 
-          subroutine omp_set_num_threads(nthreads) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) nthreads 
-          end subroutine omp_set_num_threads 
- 
-          subroutine omp_set_dynamic(enable) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) enable 
-          end subroutine omp_set_dynamic 
- 
-          subroutine omp_set_nested(enable) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) enable 
-          end subroutine omp_set_nested 
- 
-          function omp_get_num_threads() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_threads 
-          end function omp_get_num_threads 
- 
-          function omp_get_max_threads() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_threads 
-          end function omp_get_max_threads 
- 
-          function omp_get_thread_num() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_num 
-          end function omp_get_thread_num 
- 
-          function omp_get_num_procs() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_procs 
-          end function omp_get_num_procs 
- 
-          function omp_in_parallel() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_parallel 
-          end function omp_in_parallel 
- 
-          function omp_get_dynamic() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_dynamic 
-          end function omp_get_dynamic 
- 
-          function omp_get_nested() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_nested 
-          end function omp_get_nested 
- 
-          function omp_get_thread_limit() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_limit 
-          end function omp_get_thread_limit 
- 
-          subroutine omp_set_max_active_levels(max_levels) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) max_levels 
-          end subroutine omp_set_max_active_levels 
- 
-          function omp_get_max_active_levels() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_active_levels 
-          end function omp_get_max_active_levels 
- 
-          function omp_get_level() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_level 
-          end function omp_get_level 
- 
-          function omp_get_active_level() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_active_level 
-          end function omp_get_active_level 
- 
-          function omp_get_ancestor_thread_num(level) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) level 
-            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-          end function omp_get_ancestor_thread_num 
- 
-          function omp_get_team_size(level) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) level 
-            integer (kind=omp_integer_kind) omp_get_team_size 
-          end function omp_get_team_size 
- 
-          subroutine omp_set_schedule(kind, modifier) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_set_schedule 
- 
-          subroutine omp_get_schedule(kind, modifier) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_get_schedule 
- 
-          function omp_get_wtime() 
-            double precision omp_get_wtime 
-          end function omp_get_wtime 
- 
-          function omp_get_wtick () 
-            double precision omp_get_wtick 
-          end function omp_get_wtick 
- 
-          subroutine omp_init_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_init_lock 
- 
-          subroutine omp_destroy_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_destroy_lock 
- 
-          subroutine omp_set_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_set_lock 
- 
-          subroutine omp_unset_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_unset_lock 
- 
-          function omp_test_lock(lockvar) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_test_lock 
-            integer (kind=omp_lock_kind) lockvar 
-          end function omp_test_lock 
- 
-          subroutine omp_init_nest_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_init_nest_lock 
- 
-          subroutine omp_destroy_nest_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_destroy_nest_lock 
- 
-          subroutine omp_set_nest_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_set_nest_lock 
- 
-          subroutine omp_unset_nest_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_unset_nest_lock 
- 
-          function omp_test_nest_lock(lockvar) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_test_nest_lock 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end function omp_test_nest_lock 
- 
-!         *** 
-!         *** kmp_* entry points 
-!         *** 
- 
-          subroutine kmp_set_stacksize(size) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) size 
-          end subroutine kmp_set_stacksize 
- 
-          subroutine kmp_set_stacksize_s(size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) size 
-          end subroutine kmp_set_stacksize_s 
- 
-          subroutine kmp_set_blocktime(msec) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) msec 
-          end subroutine kmp_set_blocktime 
- 
-          subroutine kmp_set_library_serial() 
-          end subroutine kmp_set_library_serial 
- 
-          subroutine kmp_set_library_turnaround() 
-          end subroutine kmp_set_library_turnaround 
- 
-          subroutine kmp_set_library_throughput() 
-          end subroutine kmp_set_library_throughput 
- 
-          subroutine kmp_set_library(libnum) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) libnum 
-          end subroutine kmp_set_library 
- 
-          subroutine kmp_set_defaults(string) 
-            character*(*) string 
-          end subroutine kmp_set_defaults 
- 
-          function kmp_get_stacksize() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_stacksize 
-          end function kmp_get_stacksize 
- 
-          function kmp_get_stacksize_s() 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-          end function kmp_get_stacksize_s 
- 
-          function kmp_get_blocktime() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_blocktime 
-          end function kmp_get_blocktime 
- 
-          function kmp_get_library() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_library 
-          end function kmp_get_library 
- 
-          function kmp_set_affinity(mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity 
- 
-          function kmp_get_affinity(mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity 
- 
-          function kmp_get_affinity_max_proc() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-          end function kmp_get_affinity_max_proc 
- 
-          subroutine kmp_create_affinity_mask(mask) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_create_affinity_mask 
- 
-          subroutine kmp_destroy_affinity_mask(mask) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_destroy_affinity_mask 
- 
-          function kmp_set_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity_mask_proc 
- 
-          function kmp_unset_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_unset_affinity_mask_proc 
- 
-          function kmp_get_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity_mask_proc 
- 
-          function kmp_malloc(size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_malloc 
-            integer (kind=kmp_size_t_kind) size 
-          end function kmp_malloc 
- 
-          function kmp_calloc(nelem, elsize) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_calloc 
-            integer (kind=kmp_size_t_kind) nelem 
-            integer (kind=kmp_size_t_kind) elsize 
-          end function kmp_calloc 
- 
-          function kmp_realloc(ptr, size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_realloc 
-            integer (kind=kmp_pointer_kind) ptr 
-            integer (kind=kmp_size_t_kind) size 
-          end function kmp_realloc 
- 
-          subroutine kmp_free(ptr) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) ptr 
-          end subroutine kmp_free 
- 
-          subroutine kmp_set_warnings_on() 
-          end subroutine kmp_set_warnings_on 
- 
-          subroutine kmp_set_warnings_off() 
-          end subroutine kmp_set_warnings_off 
- 
-        end interface 
- 
-!dec$ if defined(_WIN32) 
-!dec$   if defined(_WIN64) .or. defined(_M_AMD64) 
- 
-!*** 
-!*** The Fortran entry points must be in uppercase, even if the /Qlowercase 
-!*** option is specified.  The alias attribute ensures that the specified 
-!*** string is used as the entry point. 
-!*** 
-!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an 
-!*** underscore prepended.  On the Windows* OS Intel(R) 64 
-!*** architecture, no underscore is prepended. 
-!*** 
- 
-!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads 
-!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic 
-!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested 
-!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads 
-!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads 
-!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num 
-!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs 
-!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel 
-!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic 
-!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested 
-!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit 
-!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels 
-!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels 
-!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level 
-!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level 
-!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num 
-!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size 
-!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule 
-!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule 
-!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime 
-!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick 
- 
-!dec$ attributes alias:'omp_init_lock' :: omp_init_lock 
-!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock' :: omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock' :: omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock 
-!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock 
- 
-!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$   else 
- 
-!*** 
-!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads 
-!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic 
-!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested 
-!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads 
-!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads 
-!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num 
-!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs 
-!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel 
-!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic 
-!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested 
-!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit 
-!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level 
-!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level 
-!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size 
-!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule 
-!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule 
-!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime 
-!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick 
- 
-!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock 
-!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock 
-!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock 
- 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'_KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$   endif 
-!dec$ endif 
- 
-!dec$ if defined(__linux) 
- 
-!*** 
-!*** The Linux* OS entry points are in lowercase, with an underscore appended. 
-!*** 
- 
-!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick 
- 
-!dec$ attributes alias:'omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off 
- 
-!dec$ endif 
- 
-!dec$ if defined(__APPLE__) 
- 
-!*** 
-!*** The Mac entry points are in lowercase, with an both an underscore 
-!*** appended and an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'_omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick 
- 
-!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'_kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off 
- 
-!dec$ endif 
- 
-      end module omp_lib 
- 
+! include/30/omp_lib.f.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+!***
+!*** Some of the directives for the following routine extend past column 72,
+!*** so process this file in 132-column mode.
+!***
+
+!dec$ fixedformlinesize:132
+
+      module omp_lib_kinds
+
+        integer, parameter :: omp_integer_kind       = 4
+        integer, parameter :: omp_logical_kind       = 4
+        integer, parameter :: omp_real_kind          = 4
+        integer, parameter :: omp_lock_kind          = int_ptr_kind()
+        integer, parameter :: omp_nest_lock_kind     = int_ptr_kind()
+        integer, parameter :: omp_sched_kind         = omp_integer_kind
+        integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
+        integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
+        integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+
+      end module omp_lib_kinds
+
+      module omp_lib
+
+        use omp_lib_kinds
+
+        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+        character(*), parameter :: kmp_build_date    = '@LIBOMP_BUILD_DATE@'
+        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+
+        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+        interface
+
+!         ***
+!         *** omp_* entry points
+!         ***
+
+          subroutine omp_set_num_threads(nthreads)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) nthreads
+          end subroutine omp_set_num_threads
+
+          subroutine omp_set_dynamic(enable)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) enable
+          end subroutine omp_set_dynamic
+
+          subroutine omp_set_nested(enable)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) enable
+          end subroutine omp_set_nested
+
+          function omp_get_num_threads()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_threads
+          end function omp_get_num_threads
+
+          function omp_get_max_threads()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_threads
+          end function omp_get_max_threads
+
+          function omp_get_thread_num()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_num
+          end function omp_get_thread_num
+
+          function omp_get_num_procs()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_procs
+          end function omp_get_num_procs
+
+          function omp_in_parallel()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_parallel
+          end function omp_in_parallel
+
+          function omp_get_dynamic()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_dynamic
+          end function omp_get_dynamic
+
+          function omp_get_nested()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_nested
+          end function omp_get_nested
+
+          function omp_get_thread_limit()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_limit
+          end function omp_get_thread_limit
+
+          subroutine omp_set_max_active_levels(max_levels)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) max_levels
+          end subroutine omp_set_max_active_levels
+
+          function omp_get_max_active_levels()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_active_levels
+          end function omp_get_max_active_levels
+
+          function omp_get_level()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_level
+          end function omp_get_level
+
+          function omp_get_active_level()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_active_level
+          end function omp_get_active_level
+
+          function omp_get_ancestor_thread_num(level)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) level
+            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+          end function omp_get_ancestor_thread_num
+
+          function omp_get_team_size(level)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) level
+            integer (kind=omp_integer_kind) omp_get_team_size
+          end function omp_get_team_size
+
+          subroutine omp_set_schedule(kind, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_set_schedule
+
+          subroutine omp_get_schedule(kind, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_get_schedule
+
+          function omp_get_wtime()
+            double precision omp_get_wtime
+          end function omp_get_wtime
+
+          function omp_get_wtick ()
+            double precision omp_get_wtick
+          end function omp_get_wtick
+
+          subroutine omp_init_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_init_lock
+
+          subroutine omp_destroy_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_destroy_lock
+
+          subroutine omp_set_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_set_lock
+
+          subroutine omp_unset_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_unset_lock
+
+          function omp_test_lock(lockvar)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_test_lock
+            integer (kind=omp_lock_kind) lockvar
+          end function omp_test_lock
+
+          subroutine omp_init_nest_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_init_nest_lock
+
+          subroutine omp_destroy_nest_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_destroy_nest_lock
+
+          subroutine omp_set_nest_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_set_nest_lock
+
+          subroutine omp_unset_nest_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_unset_nest_lock
+
+          function omp_test_nest_lock(lockvar)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_test_nest_lock
+            integer (kind=omp_nest_lock_kind) lockvar
+          end function omp_test_nest_lock
+
+!         ***
+!         *** kmp_* entry points
+!         ***
+
+          subroutine kmp_set_stacksize(size)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) size
+          end subroutine kmp_set_stacksize
+
+          subroutine kmp_set_stacksize_s(size)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) size
+          end subroutine kmp_set_stacksize_s
+
+          subroutine kmp_set_blocktime(msec)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) msec
+          end subroutine kmp_set_blocktime
+
+          subroutine kmp_set_library_serial()
+          end subroutine kmp_set_library_serial
+
+          subroutine kmp_set_library_turnaround()
+          end subroutine kmp_set_library_turnaround
+
+          subroutine kmp_set_library_throughput()
+          end subroutine kmp_set_library_throughput
+
+          subroutine kmp_set_library(libnum)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) libnum
+          end subroutine kmp_set_library
+
+          subroutine kmp_set_defaults(string)
+            character*(*) string
+          end subroutine kmp_set_defaults
+
+          function kmp_get_stacksize()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_stacksize
+          end function kmp_get_stacksize
+
+          function kmp_get_stacksize_s()
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+          end function kmp_get_stacksize_s
+
+          function kmp_get_blocktime()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_blocktime
+          end function kmp_get_blocktime
+
+          function kmp_get_library()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_library
+          end function kmp_get_library
+
+          function kmp_set_affinity(mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity
+
+          function kmp_get_affinity(mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity
+
+          function kmp_get_affinity_max_proc()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+          end function kmp_get_affinity_max_proc
+
+          subroutine kmp_create_affinity_mask(mask)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_create_affinity_mask
+
+          subroutine kmp_destroy_affinity_mask(mask)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_destroy_affinity_mask
+
+          function kmp_set_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity_mask_proc
+
+          function kmp_unset_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_unset_affinity_mask_proc
+
+          function kmp_get_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity_mask_proc
+
+          function kmp_malloc(size)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_malloc
+            integer (kind=kmp_size_t_kind) size
+          end function kmp_malloc
+
+          function kmp_calloc(nelem, elsize)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_calloc
+            integer (kind=kmp_size_t_kind) nelem
+            integer (kind=kmp_size_t_kind) elsize
+          end function kmp_calloc
+
+          function kmp_realloc(ptr, size)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_realloc
+            integer (kind=kmp_pointer_kind) ptr
+            integer (kind=kmp_size_t_kind) size
+          end function kmp_realloc
+
+          subroutine kmp_free(ptr)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) ptr
+          end subroutine kmp_free
+
+          subroutine kmp_set_warnings_on()
+          end subroutine kmp_set_warnings_on
+
+          subroutine kmp_set_warnings_off()
+          end subroutine kmp_set_warnings_off
+
+        end interface
+
+!dec$ if defined(_WIN32)
+!dec$   if defined(_WIN64) .or. defined(_M_AMD64)
+
+!***
+!*** The Fortran entry points must be in uppercase, even if the /Qlowercase
+!*** option is specified.  The alias attribute ensures that the specified
+!*** string is used as the entry point.
+!***
+!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an
+!*** underscore prepended.  On the Windows* OS Intel(R) 64
+!*** architecture, no underscore is prepended.
+!***
+
+!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads
+!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic
+!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested
+!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads
+!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads
+!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num
+!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs
+!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel
+!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic
+!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested
+!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit
+!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels
+!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels
+!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level
+!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level
+!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num
+!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size
+!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule
+!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule
+!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime
+!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick
+
+!dec$ attributes alias:'omp_init_lock' :: omp_init_lock
+!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock' :: omp_set_lock
+!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock
+!dec$ attributes alias:'omp_test_lock' :: omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock
+!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock
+
+!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$   else
+
+!***
+!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended.
+!***
+
+!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads
+!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic
+!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested
+!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads
+!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads
+!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num
+!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs
+!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel
+!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic
+!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested
+!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit
+!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels
+!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels
+!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level
+!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level
+!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num
+!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size
+!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule
+!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule
+!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime
+!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick
+
+!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
+!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock
+!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock
+
+!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'_KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$   endif
+!dec$ endif
+
+!dec$ if defined(__linux)
+
+!***
+!*** The Linux* OS entry points are in lowercase, with an underscore appended.
+!***
+
+!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'omp_get_level_'::omp_get_level
+!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick
+
+!dec$ attributes alias:'omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'kmp_free_'::kmp_free
+
+!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
+
+!dec$ endif
+
+!dec$ if defined(__APPLE__)
+
+!***
+!*** The Mac entry points are in lowercase, with an both an underscore
+!*** appended and an underscore prepended.
+!***
+
+!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'_omp_get_level_'::omp_get_level
+!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick
+
+!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'_kmp_free_'::kmp_free
+
+!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off
+
+!dec$ endif
+
+      end module omp_lib
+
diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var
index 328e2cfa8d..3325486d26 100644
--- a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var
+++ b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.f90.var
@@ -1,358 +1,358 @@
-! include/30/omp_lib.f90.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-      module omp_lib_kinds 
- 
-        use, intrinsic :: iso_c_binding 
- 
-        integer, parameter :: omp_integer_kind       = c_int 
-        integer, parameter :: omp_logical_kind       = 4 
-        integer, parameter :: omp_real_kind          = c_float 
-        integer, parameter :: kmp_double_kind        = c_double 
-        integer, parameter :: omp_lock_kind          = c_intptr_t 
-        integer, parameter :: omp_nest_lock_kind     = c_intptr_t 
-        integer, parameter :: omp_sched_kind         = omp_integer_kind 
-        integer, parameter :: kmp_pointer_kind       = c_intptr_t 
-        integer, parameter :: kmp_size_t_kind        = c_size_t 
-        integer, parameter :: kmp_affinity_mask_kind = c_intptr_t 
- 
-      end module omp_lib_kinds 
- 
-      module omp_lib 
- 
-        use omp_lib_kinds 
- 
-        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-        character(*)               kmp_build_date 
-        parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) 
- 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-        interface 
- 
-!         *** 
-!         *** omp_* entry points 
-!         *** 
- 
-          subroutine omp_set_num_threads(nthreads) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: nthreads 
-          end subroutine omp_set_num_threads 
- 
-          subroutine omp_set_dynamic(enable) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind), value :: enable 
-          end subroutine omp_set_dynamic 
- 
-          subroutine omp_set_nested(enable) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind), value :: enable 
-          end subroutine omp_set_nested 
- 
-          function omp_get_num_threads() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_threads 
-          end function omp_get_num_threads 
- 
-          function omp_get_max_threads() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_threads 
-          end function omp_get_max_threads 
- 
-          function omp_get_thread_num() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_num 
-          end function omp_get_thread_num 
- 
-          function omp_get_num_procs() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_procs 
-          end function omp_get_num_procs 
- 
-          function omp_in_parallel() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_parallel 
-          end function omp_in_parallel 
- 
-          function omp_in_final() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_final 
-          end function omp_in_final 
- 
-          function omp_get_dynamic() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_dynamic 
-          end function omp_get_dynamic 
- 
-          function omp_get_nested() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_nested 
-          end function omp_get_nested 
- 
-          function omp_get_thread_limit() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_limit 
-          end function omp_get_thread_limit 
- 
-          subroutine omp_set_max_active_levels(max_levels) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: max_levels 
-          end subroutine omp_set_max_active_levels 
- 
-          function omp_get_max_active_levels() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_active_levels 
-          end function omp_get_max_active_levels 
- 
-          function omp_get_level() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) :: omp_get_level 
-          end function omp_get_level 
- 
-          function omp_get_active_level() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) :: omp_get_active_level 
-          end function omp_get_active_level 
- 
-          function omp_get_ancestor_thread_num(level) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-            integer (kind=omp_integer_kind), value :: level 
-          end function omp_get_ancestor_thread_num 
- 
-          function omp_get_team_size(level) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_size 
-            integer (kind=omp_integer_kind), value :: level 
-          end function omp_get_team_size 
- 
-          subroutine omp_set_schedule(kind, modifier) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind), value :: kind 
-            integer (kind=omp_integer_kind), value :: modifier 
-          end subroutine omp_set_schedule 
- 
-          subroutine omp_get_schedule(kind, modifier) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind)   :: kind 
-            integer (kind=omp_integer_kind) :: modifier 
-          end subroutine omp_get_schedule 
- 
-          function omp_get_wtime() bind(c) 
-            use omp_lib_kinds 
-            real (kind=kmp_double_kind) omp_get_wtime 
-          end function omp_get_wtime 
- 
-          function omp_get_wtick() bind(c) 
-            use omp_lib_kinds 
-            real (kind=kmp_double_kind) omp_get_wtick 
-          end function omp_get_wtick 
- 
-          subroutine omp_init_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_init_lock 
- 
-          subroutine omp_destroy_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_destroy_lock 
- 
-          subroutine omp_set_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_set_lock 
- 
-          subroutine omp_unset_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_unset_lock 
- 
-          function omp_test_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_test_lock 
-            integer (kind=omp_lock_kind) lockvar 
-          end function omp_test_lock 
- 
-          subroutine omp_init_nest_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_init_nest_lock 
- 
-          subroutine omp_destroy_nest_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_destroy_nest_lock 
- 
-          subroutine omp_set_nest_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_set_nest_lock 
- 
-          subroutine omp_unset_nest_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_unset_nest_lock 
- 
-          function omp_test_nest_lock(lockvar) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_test_nest_lock 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end function omp_test_nest_lock 
- 
-!         *** 
-!         *** kmp_* entry points 
-!         *** 
- 
-          subroutine kmp_set_stacksize(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: size 
-          end subroutine kmp_set_stacksize 
- 
-          subroutine kmp_set_stacksize_s(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end subroutine kmp_set_stacksize_s 
- 
-          subroutine kmp_set_blocktime(msec) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: msec 
-          end subroutine kmp_set_blocktime 
- 
-          subroutine kmp_set_library_serial() bind(c) 
-          end subroutine kmp_set_library_serial 
- 
-          subroutine kmp_set_library_turnaround() bind(c) 
-          end subroutine kmp_set_library_turnaround 
- 
-          subroutine kmp_set_library_throughput() bind(c) 
-          end subroutine kmp_set_library_throughput 
- 
-          subroutine kmp_set_library(libnum) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: libnum 
-          end subroutine kmp_set_library 
- 
-          subroutine kmp_set_defaults(string) bind(c) 
-            use, intrinsic :: iso_c_binding 
-            character (kind=c_char) :: string(*) 
-          end subroutine kmp_set_defaults 
- 
-          function kmp_get_stacksize() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_stacksize 
-          end function kmp_get_stacksize 
- 
-          function kmp_get_stacksize_s() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-          end function kmp_get_stacksize_s 
- 
-          function kmp_get_blocktime() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_blocktime 
-          end function kmp_get_blocktime 
- 
-          function kmp_get_library() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_library 
-          end function kmp_get_library 
- 
-          function kmp_set_affinity(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity 
- 
-          function kmp_get_affinity(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity 
- 
-          function kmp_get_affinity_max_proc() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-          end function kmp_get_affinity_max_proc 
- 
-          subroutine kmp_create_affinity_mask(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_create_affinity_mask 
- 
-          subroutine kmp_destroy_affinity_mask(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_destroy_affinity_mask 
- 
-          function kmp_set_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity_mask_proc 
- 
-          function kmp_unset_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_unset_affinity_mask_proc 
- 
-          function kmp_get_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity_mask_proc 
- 
-          function kmp_malloc(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_malloc 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end function kmp_malloc 
- 
-          function kmp_calloc(nelem, elsize) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_calloc 
-            integer (kind=kmp_size_t_kind), value :: nelem 
-            integer (kind=kmp_size_t_kind), value :: elsize 
-          end function kmp_calloc 
- 
-          function kmp_realloc(ptr, size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_realloc 
-            integer (kind=kmp_pointer_kind), value :: ptr 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end function kmp_realloc 
- 
-          subroutine kmp_free(ptr) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind), value :: ptr 
-          end subroutine kmp_free 
- 
-          subroutine kmp_set_warnings_on() bind(c) 
-          end subroutine kmp_set_warnings_on 
- 
-          subroutine kmp_set_warnings_off() bind(c) 
-          end subroutine kmp_set_warnings_off 
- 
-        end interface 
- 
-      end module omp_lib 
+! include/30/omp_lib.f90.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+      module omp_lib_kinds
+
+        use, intrinsic :: iso_c_binding
+
+        integer, parameter :: omp_integer_kind       = c_int
+        integer, parameter :: omp_logical_kind       = 4
+        integer, parameter :: omp_real_kind          = c_float
+        integer, parameter :: kmp_double_kind        = c_double
+        integer, parameter :: omp_lock_kind          = c_intptr_t
+        integer, parameter :: omp_nest_lock_kind     = c_intptr_t
+        integer, parameter :: omp_sched_kind         = omp_integer_kind
+        integer, parameter :: kmp_pointer_kind       = c_intptr_t
+        integer, parameter :: kmp_size_t_kind        = c_size_t
+        integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
+
+      end module omp_lib_kinds
+
+      module omp_lib
+
+        use omp_lib_kinds
+
+        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+        character(*)               kmp_build_date
+        parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' )
+
+        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+        interface
+
+!         ***
+!         *** omp_* entry points
+!         ***
+
+          subroutine omp_set_num_threads(nthreads) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: nthreads
+          end subroutine omp_set_num_threads
+
+          subroutine omp_set_dynamic(enable) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind), value :: enable
+          end subroutine omp_set_dynamic
+
+          subroutine omp_set_nested(enable) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind), value :: enable
+          end subroutine omp_set_nested
+
+          function omp_get_num_threads() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_threads
+          end function omp_get_num_threads
+
+          function omp_get_max_threads() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_threads
+          end function omp_get_max_threads
+
+          function omp_get_thread_num() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_num
+          end function omp_get_thread_num
+
+          function omp_get_num_procs() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_procs
+          end function omp_get_num_procs
+
+          function omp_in_parallel() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_parallel
+          end function omp_in_parallel
+
+          function omp_in_final() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_final
+          end function omp_in_final
+
+          function omp_get_dynamic() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_dynamic
+          end function omp_get_dynamic
+
+          function omp_get_nested() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_nested
+          end function omp_get_nested
+
+          function omp_get_thread_limit() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_limit
+          end function omp_get_thread_limit
+
+          subroutine omp_set_max_active_levels(max_levels) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: max_levels
+          end subroutine omp_set_max_active_levels
+
+          function omp_get_max_active_levels() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_active_levels
+          end function omp_get_max_active_levels
+
+          function omp_get_level() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) :: omp_get_level
+          end function omp_get_level
+
+          function omp_get_active_level() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) :: omp_get_active_level
+          end function omp_get_active_level
+
+          function omp_get_ancestor_thread_num(level) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+            integer (kind=omp_integer_kind), value :: level
+          end function omp_get_ancestor_thread_num
+
+          function omp_get_team_size(level) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_size
+            integer (kind=omp_integer_kind), value :: level
+          end function omp_get_team_size
+
+          subroutine omp_set_schedule(kind, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind), value :: kind
+            integer (kind=omp_integer_kind), value :: modifier
+          end subroutine omp_set_schedule
+
+          subroutine omp_get_schedule(kind, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind)   :: kind
+            integer (kind=omp_integer_kind) :: modifier
+          end subroutine omp_get_schedule
+
+          function omp_get_wtime() bind(c)
+            use omp_lib_kinds
+            real (kind=kmp_double_kind) omp_get_wtime
+          end function omp_get_wtime
+
+          function omp_get_wtick() bind(c)
+            use omp_lib_kinds
+            real (kind=kmp_double_kind) omp_get_wtick
+          end function omp_get_wtick
+
+          subroutine omp_init_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_init_lock
+
+          subroutine omp_destroy_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_destroy_lock
+
+          subroutine omp_set_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_set_lock
+
+          subroutine omp_unset_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_unset_lock
+
+          function omp_test_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_test_lock
+            integer (kind=omp_lock_kind) lockvar
+          end function omp_test_lock
+
+          subroutine omp_init_nest_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_init_nest_lock
+
+          subroutine omp_destroy_nest_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_destroy_nest_lock
+
+          subroutine omp_set_nest_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_set_nest_lock
+
+          subroutine omp_unset_nest_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_unset_nest_lock
+
+          function omp_test_nest_lock(lockvar) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_test_nest_lock
+            integer (kind=omp_nest_lock_kind) lockvar
+          end function omp_test_nest_lock
+
+!         ***
+!         *** kmp_* entry points
+!         ***
+
+          subroutine kmp_set_stacksize(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: size
+          end subroutine kmp_set_stacksize
+
+          subroutine kmp_set_stacksize_s(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind), value :: size
+          end subroutine kmp_set_stacksize_s
+
+          subroutine kmp_set_blocktime(msec) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: msec
+          end subroutine kmp_set_blocktime
+
+          subroutine kmp_set_library_serial() bind(c)
+          end subroutine kmp_set_library_serial
+
+          subroutine kmp_set_library_turnaround() bind(c)
+          end subroutine kmp_set_library_turnaround
+
+          subroutine kmp_set_library_throughput() bind(c)
+          end subroutine kmp_set_library_throughput
+
+          subroutine kmp_set_library(libnum) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: libnum
+          end subroutine kmp_set_library
+
+          subroutine kmp_set_defaults(string) bind(c)
+            use, intrinsic :: iso_c_binding
+            character (kind=c_char) :: string(*)
+          end subroutine kmp_set_defaults
+
+          function kmp_get_stacksize() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_stacksize
+          end function kmp_get_stacksize
+
+          function kmp_get_stacksize_s() bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+          end function kmp_get_stacksize_s
+
+          function kmp_get_blocktime() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_blocktime
+          end function kmp_get_blocktime
+
+          function kmp_get_library() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_library
+          end function kmp_get_library
+
+          function kmp_set_affinity(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity
+
+          function kmp_get_affinity(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity
+
+          function kmp_get_affinity_max_proc() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+          end function kmp_get_affinity_max_proc
+
+          subroutine kmp_create_affinity_mask(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_create_affinity_mask
+
+          subroutine kmp_destroy_affinity_mask(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_destroy_affinity_mask
+
+          function kmp_set_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity_mask_proc
+
+          function kmp_unset_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_unset_affinity_mask_proc
+
+          function kmp_get_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity_mask_proc
+
+          function kmp_malloc(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_malloc
+            integer (kind=kmp_size_t_kind), value :: size
+          end function kmp_malloc
+
+          function kmp_calloc(nelem, elsize) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_calloc
+            integer (kind=kmp_size_t_kind), value :: nelem
+            integer (kind=kmp_size_t_kind), value :: elsize
+          end function kmp_calloc
+
+          function kmp_realloc(ptr, size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_realloc
+            integer (kind=kmp_pointer_kind), value :: ptr
+            integer (kind=kmp_size_t_kind), value :: size
+          end function kmp_realloc
+
+          subroutine kmp_free(ptr) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind), value :: ptr
+          end subroutine kmp_free
+
+          subroutine kmp_set_warnings_on() bind(c)
+          end subroutine kmp_set_warnings_on
+
+          subroutine kmp_set_warnings_off() bind(c)
+          end subroutine kmp_set_warnings_off
+
+        end interface
+
+      end module omp_lib
diff --git a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var
index c442f073b2..84ed39b321 100644
--- a/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/30/omp_lib.h.var
@@ -1,638 +1,638 @@
-! include/30/omp_lib.h.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-!*** 
-!*** Some of the directives for the following routine extend past column 72, 
-!*** so process this file in 132-column mode. 
-!*** 
- 
-!dec$ fixedformlinesize:132 
- 
-      integer, parameter :: omp_integer_kind       = 4 
-      integer, parameter :: omp_logical_kind       = 4 
-      integer, parameter :: omp_real_kind          = 4 
-      integer, parameter :: omp_lock_kind          = int_ptr_kind() 
-      integer, parameter :: omp_nest_lock_kind     = int_ptr_kind() 
-      integer, parameter :: omp_sched_kind         = omp_integer_kind 
-      integer, parameter :: kmp_pointer_kind       = int_ptr_kind() 
-      integer, parameter :: kmp_size_t_kind        = int_ptr_kind() 
-      integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() 
- 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-      character(*)               kmp_build_date 
-      parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) 
-      integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
- 
-      interface 
- 
-!       *** 
-!       *** omp_* entry points 
-!       *** 
- 
-        subroutine omp_set_num_threads(nthreads) 
-          import 
-          integer (kind=omp_integer_kind) nthreads 
-        end subroutine omp_set_num_threads 
- 
-        subroutine omp_set_dynamic(enable) 
-          import 
-          logical (kind=omp_logical_kind) enable 
-        end subroutine omp_set_dynamic 
- 
-        subroutine omp_set_nested(enable) 
-          import 
-          logical (kind=omp_logical_kind) enable 
-        end subroutine omp_set_nested 
- 
-        function omp_get_num_threads() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_threads 
-        end function omp_get_num_threads 
- 
-        function omp_get_max_threads() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_max_threads 
-        end function omp_get_max_threads 
- 
-        function omp_get_thread_num() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_thread_num 
-        end function omp_get_thread_num 
- 
-        function omp_get_num_procs() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_procs 
-        end function omp_get_num_procs 
- 
-        function omp_in_parallel() 
-          import 
-          logical (kind=omp_logical_kind) omp_in_parallel 
-        end function omp_in_parallel 
- 
-        function omp_in_final() 
-          import 
-          logical (kind=omp_logical_kind) omp_in_final 
-        end function omp_in_final 
- 
-        function omp_get_dynamic() 
-          import 
-          logical (kind=omp_logical_kind) omp_get_dynamic 
-        end function omp_get_dynamic 
- 
-        function omp_get_nested() 
-          import 
-          logical (kind=omp_logical_kind) omp_get_nested 
-        end function omp_get_nested 
- 
-        function omp_get_thread_limit() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_thread_limit 
-        end function omp_get_thread_limit 
- 
-        subroutine omp_set_max_active_levels(max_levels) 
-          import 
-          integer (kind=omp_integer_kind) max_levels 
-        end subroutine omp_set_max_active_levels 
- 
-        function omp_get_max_active_levels() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_max_active_levels 
-        end function omp_get_max_active_levels 
- 
-        function omp_get_level() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_level 
-        end function omp_get_level 
- 
-        function omp_get_active_level() 
-          import 
-          integer (kind=omp_integer_kind) omp_get_active_level 
-        end function omp_get_active_level 
- 
-        function omp_get_ancestor_thread_num(level) 
-          import 
-          integer (kind=omp_integer_kind) level 
-          integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-        end function omp_get_ancestor_thread_num 
- 
-        function omp_get_team_size(level) 
-          import 
-          integer (kind=omp_integer_kind) level 
-          integer (kind=omp_integer_kind) omp_get_team_size 
-        end function omp_get_team_size 
- 
-        subroutine omp_set_schedule(kind, modifier) 
-          import 
-          integer (kind=omp_sched_kind) kind 
-          integer (kind=omp_integer_kind) modifier 
-        end subroutine omp_set_schedule 
- 
-        subroutine omp_get_schedule(kind, modifier) 
-          import 
-          integer (kind=omp_sched_kind) kind 
-          integer (kind=omp_integer_kind) modifier 
-        end subroutine omp_get_schedule 
- 
-        function omp_get_wtime() 
-          double precision omp_get_wtime 
-        end function omp_get_wtime 
- 
-        function omp_get_wtick () 
-          double precision omp_get_wtick 
-        end function omp_get_wtick 
- 
-        subroutine omp_init_lock(lockvar) 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_init_lock 
- 
-        subroutine omp_destroy_lock(lockvar) 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_destroy_lock 
- 
-        subroutine omp_set_lock(lockvar) 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_set_lock 
- 
-        subroutine omp_unset_lock(lockvar) 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_unset_lock 
- 
-        function omp_test_lock(lockvar) 
-          import 
-          logical (kind=omp_logical_kind) omp_test_lock 
-          integer (kind=omp_lock_kind) lockvar 
-        end function omp_test_lock 
- 
-        subroutine omp_init_nest_lock(lockvar) 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_init_nest_lock 
- 
-        subroutine omp_destroy_nest_lock(lockvar) 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_destroy_nest_lock 
- 
-        subroutine omp_set_nest_lock(lockvar) 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_set_nest_lock 
- 
-        subroutine omp_unset_nest_lock(lockvar) 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_unset_nest_lock 
- 
-        function omp_test_nest_lock(lockvar) 
-          import 
-          integer (kind=omp_integer_kind) omp_test_nest_lock 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end function omp_test_nest_lock 
- 
-!       *** 
-!       *** kmp_* entry points 
-!       *** 
- 
-        subroutine kmp_set_stacksize(size) 
-          import 
-          integer (kind=omp_integer_kind) size 
-        end subroutine kmp_set_stacksize 
- 
-        subroutine kmp_set_stacksize_s(size) 
-          import 
-          integer (kind=kmp_size_t_kind) size 
-        end subroutine kmp_set_stacksize_s 
- 
-        subroutine kmp_set_blocktime(msec) 
-          import 
-          integer (kind=omp_integer_kind) msec 
-        end subroutine kmp_set_blocktime 
- 
-        subroutine kmp_set_library_serial() 
-        end subroutine kmp_set_library_serial 
- 
-        subroutine kmp_set_library_turnaround() 
-        end subroutine kmp_set_library_turnaround 
- 
-        subroutine kmp_set_library_throughput() 
-        end subroutine kmp_set_library_throughput 
- 
-        subroutine kmp_set_library(libnum) 
-          import 
-          integer (kind=omp_integer_kind) libnum 
-        end subroutine kmp_set_library 
- 
-        subroutine kmp_set_defaults(string) 
-          character*(*) string 
-        end subroutine kmp_set_defaults 
- 
-        function kmp_get_stacksize() 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_stacksize 
-        end function kmp_get_stacksize 
- 
-        function kmp_get_stacksize_s() 
-          import 
-          integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-        end function kmp_get_stacksize_s 
- 
-        function kmp_get_blocktime() 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_blocktime 
-        end function kmp_get_blocktime 
- 
-        function kmp_get_library() 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_library 
-        end function kmp_get_library 
- 
-        function kmp_set_affinity(mask) 
-          import 
-          integer (kind=omp_integer_kind) kmp_set_affinity 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_set_affinity 
- 
-        function kmp_get_affinity(mask) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_get_affinity 
- 
-        function kmp_get_affinity_max_proc() 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-        end function kmp_get_affinity_max_proc 
- 
-        subroutine kmp_create_affinity_mask(mask) 
-          import 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end subroutine kmp_create_affinity_mask 
- 
-        subroutine kmp_destroy_affinity_mask(mask) 
-          import 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end subroutine kmp_destroy_affinity_mask 
- 
-        function kmp_set_affinity_mask_proc(proc, mask) 
-          import 
-          integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-          integer (kind=omp_integer_kind) proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_set_affinity_mask_proc 
- 
-        function kmp_unset_affinity_mask_proc(proc, mask) 
-          import 
-          integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-          integer (kind=omp_integer_kind) proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_unset_affinity_mask_proc 
- 
-        function kmp_get_affinity_mask_proc(proc, mask) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-          integer (kind=omp_integer_kind) proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_get_affinity_mask_proc 
- 
-        function kmp_malloc(size) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_malloc 
-          integer (kind=kmp_size_t_kind) size 
-        end function kmp_malloc 
- 
-        function kmp_calloc(nelem, elsize) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_calloc 
-          integer (kind=kmp_size_t_kind) nelem 
-          integer (kind=kmp_size_t_kind) elsize 
-        end function kmp_calloc 
- 
-        function kmp_realloc(ptr, size) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_realloc 
-          integer (kind=kmp_pointer_kind) ptr 
-          integer (kind=kmp_size_t_kind) size 
-        end function kmp_realloc 
- 
-        subroutine kmp_free(ptr) 
-          import 
-          integer (kind=kmp_pointer_kind) ptr 
-        end subroutine kmp_free 
- 
-        subroutine kmp_set_warnings_on() 
-        end subroutine kmp_set_warnings_on 
- 
-        subroutine kmp_set_warnings_off() 
-        end subroutine kmp_set_warnings_off 
- 
-      end interface 
- 
-!dec$ if defined(_WIN32) 
-!dec$   if defined(_WIN64) .or. defined(_M_AMD64) 
- 
-!*** 
-!*** The Fortran entry points must be in uppercase, even if the /Qlowercase 
-!*** option is specified.  The alias attribute ensures that the specified 
-!*** string is used as the entry point. 
-!*** 
-!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an 
-!*** underscore prepended.  On the Windows* OS Intel(R) 64 
-!*** architecture, no underscore is prepended. 
-!*** 
- 
-!dec$ attributes alias:'OMP_SET_NUM_THREADS'::omp_set_num_threads 
-!dec$ attributes alias:'OMP_SET_DYNAMIC'::omp_set_dynamic 
-!dec$ attributes alias:'OMP_SET_NESTED'::omp_set_nested 
-!dec$ attributes alias:'OMP_GET_NUM_THREADS'::omp_get_num_threads 
-!dec$ attributes alias:'OMP_GET_MAX_THREADS'::omp_get_max_threads 
-!dec$ attributes alias:'OMP_GET_THREAD_NUM'::omp_get_thread_num 
-!dec$ attributes alias:'OMP_GET_NUM_PROCS'::omp_get_num_procs 
-!dec$ attributes alias:'OMP_IN_PARALLEL'::omp_in_parallel 
-!dec$ attributes alias:'OMP_IN_FINAL'::omp_in_final 
-!dec$ attributes alias:'OMP_GET_DYNAMIC'::omp_get_dynamic 
-!dec$ attributes alias:'OMP_GET_NESTED'::omp_get_nested 
-!dec$ attributes alias:'OMP_GET_THREAD_LIMIT'::omp_get_thread_limit 
-!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels 
-!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels 
-!dec$ attributes alias:'OMP_GET_LEVEL'::omp_get_level 
-!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL'::omp_get_active_level 
-!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'OMP_GET_TEAM_SIZE'::omp_get_team_size 
-!dec$ attributes alias:'OMP_SET_SCHEDULE'::omp_set_schedule 
-!dec$ attributes alias:'OMP_GET_SCHEDULE'::omp_get_schedule 
-!dec$ attributes alias:'OMP_GET_WTIME'::omp_get_wtime 
-!dec$ attributes alias:'OMP_GET_WTICK'::omp_get_wtick 
- 
-!dec$ attributes alias:'omp_init_lock'::omp_init_lock 
-!dec$ attributes alias:'omp_destroy_lock'::omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock'::omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock'::omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock'::omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock'::omp_init_nest_lock 
-!dec$ attributes alias:'omp_destroy_nest_lock'::omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock'::omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock'::omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'KMP_SET_DEFAULTS'::kmp_set_defaults 
-!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$   else 
- 
-!*** 
-!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_OMP_SET_NUM_THREADS'::omp_set_num_threads 
-!dec$ attributes alias:'_OMP_SET_DYNAMIC'::omp_set_dynamic 
-!dec$ attributes alias:'_OMP_SET_NESTED'::omp_set_nested 
-!dec$ attributes alias:'_OMP_GET_NUM_THREADS'::omp_get_num_threads 
-!dec$ attributes alias:'_OMP_GET_MAX_THREADS'::omp_get_max_threads 
-!dec$ attributes alias:'_OMP_GET_THREAD_NUM'::omp_get_thread_num 
-!dec$ attributes alias:'_OMP_GET_NUM_PROCS'::omp_get_num_procs 
-!dec$ attributes alias:'_OMP_IN_PARALLEL'::omp_in_parallel 
-!dec$ attributes alias:'_OMP_IN_FINAL'::omp_in_final 
-!dec$ attributes alias:'_OMP_GET_DYNAMIC'::omp_get_dynamic 
-!dec$ attributes alias:'_OMP_GET_NESTED'::omp_get_nested 
-!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT'::omp_get_thread_limit 
-!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_LEVEL'::omp_get_level 
-!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL'::omp_get_active_level 
-!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_OMP_GET_TEAM_SIZE'::omp_get_team_size 
-!dec$ attributes alias:'_OMP_SET_SCHEDULE'::omp_set_schedule 
-!dec$ attributes alias:'_OMP_GET_SCHEDULE'::omp_get_schedule 
-!dec$ attributes alias:'_OMP_GET_WTIME'::omp_get_wtime 
-!dec$ attributes alias:'_OMP_GET_WTICK'::omp_get_wtick 
- 
-!dec$ attributes alias:'_omp_init_lock'::omp_init_lock 
-!dec$ attributes alias:'_omp_destroy_lock'::omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock'::omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock'::omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock'::omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock'::omp_init_nest_lock 
-!dec$ attributes alias:'_omp_destroy_nest_lock'::omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock'::omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock'::omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'_KMP_SET_DEFAULTS'::kmp_set_defaults 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'_KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$   endif 
-!dec$ endif 
- 
-!dec$ if defined(__linux) 
- 
-!*** 
-!*** The Linux* OS entry points are in lowercase, with an underscore appended. 
-!*** 
- 
-!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'omp_in_final_'::omp_in_final 
-!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick 
- 
-!dec$ attributes alias:'omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'kmp_set_defaults_'::kmp_set_defaults 
-!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off 
- 
-!dec$ endif 
- 
-!dec$ if defined(__APPLE__) 
- 
-!*** 
-!*** The Mac entry points are in lowercase, with an both an underscore 
-!*** appended and an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'_omp_in_final_'::omp_in_final 
-!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'_omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick 
- 
-!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'_kmp_set_defaults_'::kmp_set_defaults 
-!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'_kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off 
- 
-!dec$ endif 
- 
- 
+! include/30/omp_lib.h.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+!***
+!*** Some of the directives for the following routine extend past column 72,
+!*** so process this file in 132-column mode.
+!***
+
+!dec$ fixedformlinesize:132
+
+      integer, parameter :: omp_integer_kind       = 4
+      integer, parameter :: omp_logical_kind       = 4
+      integer, parameter :: omp_real_kind          = 4
+      integer, parameter :: omp_lock_kind          = int_ptr_kind()
+      integer, parameter :: omp_nest_lock_kind     = int_ptr_kind()
+      integer, parameter :: omp_sched_kind         = omp_integer_kind
+      integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
+      integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
+      integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+
+      integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+      integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+      integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+      integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+      integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+      character(*)               kmp_build_date
+      parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' )
+      integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+
+      interface
+
+!       ***
+!       *** omp_* entry points
+!       ***
+
+        subroutine omp_set_num_threads(nthreads)
+          import
+          integer (kind=omp_integer_kind) nthreads
+        end subroutine omp_set_num_threads
+
+        subroutine omp_set_dynamic(enable)
+          import
+          logical (kind=omp_logical_kind) enable
+        end subroutine omp_set_dynamic
+
+        subroutine omp_set_nested(enable)
+          import
+          logical (kind=omp_logical_kind) enable
+        end subroutine omp_set_nested
+
+        function omp_get_num_threads()
+          import
+          integer (kind=omp_integer_kind) omp_get_num_threads
+        end function omp_get_num_threads
+
+        function omp_get_max_threads()
+          import
+          integer (kind=omp_integer_kind) omp_get_max_threads
+        end function omp_get_max_threads
+
+        function omp_get_thread_num()
+          import
+          integer (kind=omp_integer_kind) omp_get_thread_num
+        end function omp_get_thread_num
+
+        function omp_get_num_procs()
+          import
+          integer (kind=omp_integer_kind) omp_get_num_procs
+        end function omp_get_num_procs
+
+        function omp_in_parallel()
+          import
+          logical (kind=omp_logical_kind) omp_in_parallel
+        end function omp_in_parallel
+
+        function omp_in_final()
+          import
+          logical (kind=omp_logical_kind) omp_in_final
+        end function omp_in_final
+
+        function omp_get_dynamic()
+          import
+          logical (kind=omp_logical_kind) omp_get_dynamic
+        end function omp_get_dynamic
+
+        function omp_get_nested()
+          import
+          logical (kind=omp_logical_kind) omp_get_nested
+        end function omp_get_nested
+
+        function omp_get_thread_limit()
+          import
+          integer (kind=omp_integer_kind) omp_get_thread_limit
+        end function omp_get_thread_limit
+
+        subroutine omp_set_max_active_levels(max_levels)
+          import
+          integer (kind=omp_integer_kind) max_levels
+        end subroutine omp_set_max_active_levels
+
+        function omp_get_max_active_levels()
+          import
+          integer (kind=omp_integer_kind) omp_get_max_active_levels
+        end function omp_get_max_active_levels
+
+        function omp_get_level()
+          import
+          integer (kind=omp_integer_kind) omp_get_level
+        end function omp_get_level
+
+        function omp_get_active_level()
+          import
+          integer (kind=omp_integer_kind) omp_get_active_level
+        end function omp_get_active_level
+
+        function omp_get_ancestor_thread_num(level)
+          import
+          integer (kind=omp_integer_kind) level
+          integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+        end function omp_get_ancestor_thread_num
+
+        function omp_get_team_size(level)
+          import
+          integer (kind=omp_integer_kind) level
+          integer (kind=omp_integer_kind) omp_get_team_size
+        end function omp_get_team_size
+
+        subroutine omp_set_schedule(kind, modifier)
+          import
+          integer (kind=omp_sched_kind) kind
+          integer (kind=omp_integer_kind) modifier
+        end subroutine omp_set_schedule
+
+        subroutine omp_get_schedule(kind, modifier)
+          import
+          integer (kind=omp_sched_kind) kind
+          integer (kind=omp_integer_kind) modifier
+        end subroutine omp_get_schedule
+
+        function omp_get_wtime()
+          double precision omp_get_wtime
+        end function omp_get_wtime
+
+        function omp_get_wtick ()
+          double precision omp_get_wtick
+        end function omp_get_wtick
+
+        subroutine omp_init_lock(lockvar)
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_init_lock
+
+        subroutine omp_destroy_lock(lockvar)
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_destroy_lock
+
+        subroutine omp_set_lock(lockvar)
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_set_lock
+
+        subroutine omp_unset_lock(lockvar)
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_unset_lock
+
+        function omp_test_lock(lockvar)
+          import
+          logical (kind=omp_logical_kind) omp_test_lock
+          integer (kind=omp_lock_kind) lockvar
+        end function omp_test_lock
+
+        subroutine omp_init_nest_lock(lockvar)
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_init_nest_lock
+
+        subroutine omp_destroy_nest_lock(lockvar)
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_destroy_nest_lock
+
+        subroutine omp_set_nest_lock(lockvar)
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_set_nest_lock
+
+        subroutine omp_unset_nest_lock(lockvar)
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_unset_nest_lock
+
+        function omp_test_nest_lock(lockvar)
+          import
+          integer (kind=omp_integer_kind) omp_test_nest_lock
+          integer (kind=omp_nest_lock_kind) lockvar
+        end function omp_test_nest_lock
+
+!       ***
+!       *** kmp_* entry points
+!       ***
+
+        subroutine kmp_set_stacksize(size)
+          import
+          integer (kind=omp_integer_kind) size
+        end subroutine kmp_set_stacksize
+
+        subroutine kmp_set_stacksize_s(size)
+          import
+          integer (kind=kmp_size_t_kind) size
+        end subroutine kmp_set_stacksize_s
+
+        subroutine kmp_set_blocktime(msec)
+          import
+          integer (kind=omp_integer_kind) msec
+        end subroutine kmp_set_blocktime
+
+        subroutine kmp_set_library_serial()
+        end subroutine kmp_set_library_serial
+
+        subroutine kmp_set_library_turnaround()
+        end subroutine kmp_set_library_turnaround
+
+        subroutine kmp_set_library_throughput()
+        end subroutine kmp_set_library_throughput
+
+        subroutine kmp_set_library(libnum)
+          import
+          integer (kind=omp_integer_kind) libnum
+        end subroutine kmp_set_library
+
+        subroutine kmp_set_defaults(string)
+          character*(*) string
+        end subroutine kmp_set_defaults
+
+        function kmp_get_stacksize()
+          import
+          integer (kind=omp_integer_kind) kmp_get_stacksize
+        end function kmp_get_stacksize
+
+        function kmp_get_stacksize_s()
+          import
+          integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+        end function kmp_get_stacksize_s
+
+        function kmp_get_blocktime()
+          import
+          integer (kind=omp_integer_kind) kmp_get_blocktime
+        end function kmp_get_blocktime
+
+        function kmp_get_library()
+          import
+          integer (kind=omp_integer_kind) kmp_get_library
+        end function kmp_get_library
+
+        function kmp_set_affinity(mask)
+          import
+          integer (kind=omp_integer_kind) kmp_set_affinity
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_set_affinity
+
+        function kmp_get_affinity(mask)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_get_affinity
+
+        function kmp_get_affinity_max_proc()
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+        end function kmp_get_affinity_max_proc
+
+        subroutine kmp_create_affinity_mask(mask)
+          import
+          integer (kind=kmp_affinity_mask_kind) mask
+        end subroutine kmp_create_affinity_mask
+
+        subroutine kmp_destroy_affinity_mask(mask)
+          import
+          integer (kind=kmp_affinity_mask_kind) mask
+        end subroutine kmp_destroy_affinity_mask
+
+        function kmp_set_affinity_mask_proc(proc, mask)
+          import
+          integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+          integer (kind=omp_integer_kind) proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_set_affinity_mask_proc
+
+        function kmp_unset_affinity_mask_proc(proc, mask)
+          import
+          integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+          integer (kind=omp_integer_kind) proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_unset_affinity_mask_proc
+
+        function kmp_get_affinity_mask_proc(proc, mask)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+          integer (kind=omp_integer_kind) proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_get_affinity_mask_proc
+
+        function kmp_malloc(size)
+          import
+          integer (kind=kmp_pointer_kind) kmp_malloc
+          integer (kind=kmp_size_t_kind) size
+        end function kmp_malloc
+
+        function kmp_calloc(nelem, elsize)
+          import
+          integer (kind=kmp_pointer_kind) kmp_calloc
+          integer (kind=kmp_size_t_kind) nelem
+          integer (kind=kmp_size_t_kind) elsize
+        end function kmp_calloc
+
+        function kmp_realloc(ptr, size)
+          import
+          integer (kind=kmp_pointer_kind) kmp_realloc
+          integer (kind=kmp_pointer_kind) ptr
+          integer (kind=kmp_size_t_kind) size
+        end function kmp_realloc
+
+        subroutine kmp_free(ptr)
+          import
+          integer (kind=kmp_pointer_kind) ptr
+        end subroutine kmp_free
+
+        subroutine kmp_set_warnings_on()
+        end subroutine kmp_set_warnings_on
+
+        subroutine kmp_set_warnings_off()
+        end subroutine kmp_set_warnings_off
+
+      end interface
+
+!dec$ if defined(_WIN32)
+!dec$   if defined(_WIN64) .or. defined(_M_AMD64)
+
+!***
+!*** The Fortran entry points must be in uppercase, even if the /Qlowercase
+!*** option is specified.  The alias attribute ensures that the specified
+!*** string is used as the entry point.
+!***
+!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an
+!*** underscore prepended.  On the Windows* OS Intel(R) 64
+!*** architecture, no underscore is prepended.
+!***
+
+!dec$ attributes alias:'OMP_SET_NUM_THREADS'::omp_set_num_threads
+!dec$ attributes alias:'OMP_SET_DYNAMIC'::omp_set_dynamic
+!dec$ attributes alias:'OMP_SET_NESTED'::omp_set_nested
+!dec$ attributes alias:'OMP_GET_NUM_THREADS'::omp_get_num_threads
+!dec$ attributes alias:'OMP_GET_MAX_THREADS'::omp_get_max_threads
+!dec$ attributes alias:'OMP_GET_THREAD_NUM'::omp_get_thread_num
+!dec$ attributes alias:'OMP_GET_NUM_PROCS'::omp_get_num_procs
+!dec$ attributes alias:'OMP_IN_PARALLEL'::omp_in_parallel
+!dec$ attributes alias:'OMP_IN_FINAL'::omp_in_final
+!dec$ attributes alias:'OMP_GET_DYNAMIC'::omp_get_dynamic
+!dec$ attributes alias:'OMP_GET_NESTED'::omp_get_nested
+!dec$ attributes alias:'OMP_GET_THREAD_LIMIT'::omp_get_thread_limit
+!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels
+!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels
+!dec$ attributes alias:'OMP_GET_LEVEL'::omp_get_level
+!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL'::omp_get_active_level
+!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'OMP_GET_TEAM_SIZE'::omp_get_team_size
+!dec$ attributes alias:'OMP_SET_SCHEDULE'::omp_set_schedule
+!dec$ attributes alias:'OMP_GET_SCHEDULE'::omp_get_schedule
+!dec$ attributes alias:'OMP_GET_WTIME'::omp_get_wtime
+!dec$ attributes alias:'OMP_GET_WTICK'::omp_get_wtick
+
+!dec$ attributes alias:'omp_init_lock'::omp_init_lock
+!dec$ attributes alias:'omp_destroy_lock'::omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock'::omp_set_lock
+!dec$ attributes alias:'omp_unset_lock'::omp_unset_lock
+!dec$ attributes alias:'omp_test_lock'::omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock'::omp_init_nest_lock
+!dec$ attributes alias:'omp_destroy_nest_lock'::omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock'::omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock'::omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock'::omp_test_nest_lock
+
+!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'KMP_SET_DEFAULTS'::kmp_set_defaults
+!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$   else
+
+!***
+!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended.
+!***
+
+!dec$ attributes alias:'_OMP_SET_NUM_THREADS'::omp_set_num_threads
+!dec$ attributes alias:'_OMP_SET_DYNAMIC'::omp_set_dynamic
+!dec$ attributes alias:'_OMP_SET_NESTED'::omp_set_nested
+!dec$ attributes alias:'_OMP_GET_NUM_THREADS'::omp_get_num_threads
+!dec$ attributes alias:'_OMP_GET_MAX_THREADS'::omp_get_max_threads
+!dec$ attributes alias:'_OMP_GET_THREAD_NUM'::omp_get_thread_num
+!dec$ attributes alias:'_OMP_GET_NUM_PROCS'::omp_get_num_procs
+!dec$ attributes alias:'_OMP_IN_PARALLEL'::omp_in_parallel
+!dec$ attributes alias:'_OMP_IN_FINAL'::omp_in_final
+!dec$ attributes alias:'_OMP_GET_DYNAMIC'::omp_get_dynamic
+!dec$ attributes alias:'_OMP_GET_NESTED'::omp_get_nested
+!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT'::omp_get_thread_limit
+!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS'::omp_set_max_active_levels
+!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS'::omp_get_max_active_levels
+!dec$ attributes alias:'_OMP_GET_LEVEL'::omp_get_level
+!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL'::omp_get_active_level
+!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'_OMP_GET_TEAM_SIZE'::omp_get_team_size
+!dec$ attributes alias:'_OMP_SET_SCHEDULE'::omp_set_schedule
+!dec$ attributes alias:'_OMP_GET_SCHEDULE'::omp_get_schedule
+!dec$ attributes alias:'_OMP_GET_WTIME'::omp_get_wtime
+!dec$ attributes alias:'_OMP_GET_WTICK'::omp_get_wtick
+
+!dec$ attributes alias:'_omp_init_lock'::omp_init_lock
+!dec$ attributes alias:'_omp_destroy_lock'::omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock'::omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock'::omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock'::omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock'::omp_init_nest_lock
+!dec$ attributes alias:'_omp_destroy_nest_lock'::omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock'::omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock'::omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock'::omp_test_nest_lock
+
+!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'_KMP_SET_DEFAULTS'::kmp_set_defaults
+!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'_KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$   endif
+!dec$ endif
+
+!dec$ if defined(__linux)
+
+!***
+!*** The Linux* OS entry points are in lowercase, with an underscore appended.
+!***
+
+!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'omp_in_final_'::omp_in_final
+!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'omp_get_level_'::omp_get_level
+!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick
+
+!dec$ attributes alias:'omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'kmp_set_defaults_'::kmp_set_defaults
+!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'kmp_free_'::kmp_free
+
+!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
+
+!dec$ endif
+
+!dec$ if defined(__APPLE__)
+
+!***
+!*** The Mac entry points are in lowercase, with an both an underscore
+!*** appended and an underscore prepended.
+!***
+
+!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'_omp_in_final_'::omp_in_final
+!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'_omp_get_level_'::omp_get_level
+!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick
+
+!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'_kmp_set_defaults_'::kmp_set_defaults
+!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'_kmp_free_'::kmp_free
+
+!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off
+
+!dec$ endif
+
+
diff --git a/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var b/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var
index 773174a1f5..83b4f1e3df 100644
--- a/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/30/ompt.h.var
@@ -1,487 +1,487 @@
-/* 
- * include/30/ompt.h.var 
- */ 
- 
-#ifndef __OMPT__ 
-#define __OMPT__ 
- 
-/***************************************************************************** 
- * system include files 
- *****************************************************************************/ 
- 
-#include <stdint.h> 
- 
- 
- 
-/***************************************************************************** 
- * iteration macros 
- *****************************************************************************/ 
- 
-#define FOREACH_OMPT_INQUIRY_FN(macro)  \ 
-    macro (ompt_enumerate_state)        \ 
-                                        \ 
-    macro (ompt_set_callback)           \ 
-    macro (ompt_get_callback)           \ 
-                                        \ 
-    macro (ompt_get_idle_frame)         \ 
-    macro (ompt_get_task_frame)         \ 
-                                        \ 
-    macro (ompt_get_state)              \ 
-                                        \ 
-    macro (ompt_get_parallel_id)        \ 
-    macro (ompt_get_parallel_team_size) \ 
-    macro (ompt_get_task_id)            \ 
-    macro (ompt_get_thread_id) 
- 
-#define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \ 
-    macro (ompt_idle)                       \ 
-    macro (ompt_overhead)                   \ 
-    macro (ompt_barrier_wait)               \ 
-    macro (ompt_task_wait)                  \ 
-    macro (ompt_mutex_wait) 
- 
-#define FOREACH_OMPT_STATE(macro)                                                               \ 
-                                                                                                \ 
-    /* first */                                                                                 \ 
-    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \ 
-                                                                                                \ 
-    /* work states (0..15) */                                                                   \ 
-    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \ 
-    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \ 
-    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \ 
-                                                                                                \ 
-    /* idle (16..31) */                                                                         \ 
-    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \ 
-                                                                                                \ 
-    /* overhead states (32..63) */                                                              \ 
-    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \ 
-                                                                                                \ 
-    /* barrier wait states (64..79) */                                                          \ 
-    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \ 
-    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \ 
-    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \ 
-                                                                                                \ 
-    /* task wait states (80..95) */                                                             \ 
-    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \ 
-    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \ 
-                                                                                                \ 
-    /* mutex wait states (96..111) */                                                           \ 
-    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \ 
-    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \ 
-    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \ 
-    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \ 
-    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \ 
-    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \ 
-                                                                                                \ 
-    /* misc (112..127) */                                                                       \ 
-    macro (ompt_state_undefined, 0x70)       /* undefined thread state */ 
- 
- 
-#define FOREACH_OMPT_EVENT(macro)                                                                               \ 
-                                                                                                                \ 
-    /*--- Mandatory Events ---*/                                                                                \ 
-    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \ 
-    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \ 
-                                                                                                                \ 
-    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \ 
-    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \ 
-                                                                                                                \ 
-    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \ 
-    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \ 
-                                                                                                                \ 
-    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \ 
-                                                                                                                \ 
-    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \ 
-                                                                                                                \ 
-    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \ 
-    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \ 
-    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \ 
-    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \ 
-    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\ 
-    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \ 
-                                                                                                                \ 
-    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \ 
-    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \ 
-    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \ 
-                                                                                                                \ 
-    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \ 
-                                                                                                                \ 
-    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \ 
-                                                                                                                \ 
-    /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \ 
-    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \ 
-    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \ 
-                                                                                                                \ 
-    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \ 
-    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \ 
-                                                                                                                \ 
-    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \ 
-                                                                                                                \ 
-    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \ 
-    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \ 
-                                                                                                                \ 
-    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\ 
-    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \ 
-                                                                                                                \ 
-    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \ 
-    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \ 
-    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ 
-    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \ 
-    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \ 
-    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \ 
-                                                                                                                \ 
-    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \ 
-    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\ 
-    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \ 
-    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \ 
-    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \ 
-    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \ 
-    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \ 
-                                                                                                                \ 
-    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \ 
-    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \ 
-    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \ 
-    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \ 
-    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \ 
-    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \ 
-                                                                                                                \ 
-    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \ 
-    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \ 
-                                                                                                                \ 
-    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \ 
-    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \ 
-                                                                                                                \ 
-    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */ 
- 
- 
- 
-/***************************************************************************** 
- * data types 
- *****************************************************************************/ 
- 
-/*--------------------- 
- * identifiers 
- *---------------------*/ 
- 
-typedef uint64_t ompt_thread_id_t; 
-#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */ 
- 
-typedef uint64_t ompt_task_id_t; 
-#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */ 
- 
-typedef uint64_t ompt_parallel_id_t; 
-#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ 
- 
-typedef uint64_t ompt_wait_id_t; 
-#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */ 
- 
- 
-/*--------------------- 
- * ompt_frame_t 
- *---------------------*/ 
- 
-typedef struct ompt_frame_s { 
-    void *exit_runtime_frame;    /* next frame is user code     */ 
-    void *reenter_runtime_frame; /* previous frame is user code */ 
-} ompt_frame_t; 
- 
- 
-/***************************************************************************** 
- * enumerations for thread states and runtime events 
- *****************************************************************************/ 
- 
-/*--------------------- 
- * runtime states 
- *---------------------*/ 
- 
-typedef enum { 
-#define ompt_state_macro(state, code) state = code, 
-    FOREACH_OMPT_STATE(ompt_state_macro) 
-#undef ompt_state_macro 
-} ompt_state_t; 
- 
- 
-/*--------------------- 
- * runtime events 
- *---------------------*/ 
- 
-typedef enum { 
-#define ompt_event_macro(event, callback, eventid) event = eventid, 
-    FOREACH_OMPT_EVENT(ompt_event_macro) 
-#undef ompt_event_macro 
-} ompt_event_t; 
- 
- 
-/*--------------------- 
- * set callback results 
- *---------------------*/ 
-typedef enum { 
-    ompt_set_result_registration_error              = 0, 
-    ompt_set_result_event_may_occur_no_callback     = 1, 
-    ompt_set_result_event_never_occurs              = 2, 
-    ompt_set_result_event_may_occur_callback_some   = 3, 
-    ompt_set_result_event_may_occur_callback_always = 4, 
-} ompt_set_result_t; 
- 
- 
- 
-/***************************************************************************** 
- * callback signatures 
- *****************************************************************************/ 
- 
-/* initialization */ 
-typedef void (*ompt_interface_fn_t)(void); 
- 
-typedef ompt_interface_fn_t (*ompt_function_lookup_t)( 
-    const char *                      /* entry point to look up       */ 
-); 
- 
-/* threads */ 
-typedef void (*ompt_thread_callback_t) ( 
-    ompt_thread_id_t thread_id        /* ID of thread                 */ 
-); 
- 
-typedef enum { 
-    ompt_thread_initial = 1, // start the enumeration at 1 
-    ompt_thread_worker  = 2, 
-    ompt_thread_other   = 3 
-} ompt_thread_type_t; 
- 
-typedef enum { 
-    ompt_invoker_program = 0,         /* program invokes master task  */ 
-    ompt_invoker_runtime = 1          /* runtime invokes master task  */ 
-} ompt_invoker_t; 
- 
-typedef void (*ompt_thread_type_callback_t) ( 
-    ompt_thread_type_t thread_type,   /* type of thread               */ 
-    ompt_thread_id_t thread_id        /* ID of thread                 */ 
-); 
- 
-typedef void (*ompt_wait_callback_t) ( 
-    ompt_wait_id_t wait_id            /* wait id                      */ 
-); 
- 
-/* parallel and workshares */ 
-typedef void (*ompt_parallel_callback_t) ( 
-    ompt_parallel_id_t parallel_id,    /* id of parallel region       */ 
-    ompt_task_id_t task_id             /* id of task                  */ 
-); 
- 
-typedef void (*ompt_new_workshare_callback_t) ( 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */ 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    void *workshare_function          /* pointer to outlined function */ 
-); 
- 
-typedef void (*ompt_new_parallel_callback_t) ( 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */ 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */ 
-    uint32_t requested_team_size,     /* number of threads in team    */ 
-    void *parallel_function,          /* pointer to outlined function */ 
-    ompt_invoker_t invoker            /* who invokes master task?     */ 
-); 
- 
-typedef void (*ompt_end_parallel_callback_t) ( 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region       */ 
-    ompt_task_id_t task_id,           /* id of task                  */ 
-    ompt_invoker_t invoker            /* who invokes master task?    */  
-); 
- 
-/* tasks */ 
-typedef void (*ompt_task_callback_t) ( 
-    ompt_task_id_t task_id            /* id of task                   */ 
-); 
- 
-typedef void (*ompt_task_pair_callback_t) ( 
-    ompt_task_id_t first_task_id, 
-    ompt_task_id_t second_task_id 
-); 
- 
-typedef void (*ompt_new_task_callback_t) ( 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */ 
-    ompt_task_id_t  new_task_id,      /* id of created task           */ 
-    void *task_function               /* pointer to outlined function */ 
-); 
- 
-/* program */ 
-typedef void (*ompt_control_callback_t) ( 
-    uint64_t command,                 /* command of control call      */ 
-    uint64_t modifier                 /* modifier of control call     */ 
-); 
- 
-typedef void (*ompt_callback_t)(void); 
- 
- 
-/**************************************************************************** 
- * ompt API 
- ***************************************************************************/ 
- 
-#ifdef  __cplusplus 
-extern "C" { 
-#endif 
- 
-#define OMPT_API_FNTYPE(fn) fn##_t 
- 
-#define OMPT_API_FUNCTION(return_type, fn, args)  \ 
-    typedef return_type (*OMPT_API_FNTYPE(fn)) args 
- 
- 
- 
-/**************************************************************************** 
- * INQUIRY FUNCTIONS 
- ***************************************************************************/ 
- 
-/* state */ 
-OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( 
-    ompt_wait_id_t *ompt_wait_id 
-)); 
- 
-/* thread */ 
-OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); 
- 
-OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); 
- 
-/* parallel region */ 
-OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( 
-    int ancestor_level 
-)); 
- 
-OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( 
-    int ancestor_level 
-)); 
- 
-/* task */ 
-OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( 
-    int depth 
-)); 
- 
-OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( 
-    int depth 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * PLACEHOLDERS FOR PERFORMANCE REPORTING 
- ***************************************************************************/ 
- 
-/* idle */ 
-OMPT_API_FUNCTION(void, ompt_idle, ( 
-    void 
-)); 
- 
-/* overhead */ 
-OMPT_API_FUNCTION(void, ompt_overhead, ( 
-    void 
-)); 
- 
-/* barrier wait */ 
-OMPT_API_FUNCTION(void, ompt_barrier_wait, ( 
-    void 
-)); 
- 
-/* task wait */ 
-OMPT_API_FUNCTION(void, ompt_task_wait, ( 
-    void 
-)); 
- 
-/* mutex wait */ 
-OMPT_API_FUNCTION(void, ompt_mutex_wait, ( 
-    void 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * INITIALIZATION FUNCTIONS 
- ***************************************************************************/ 
- 
-OMPT_API_FUNCTION(void, ompt_initialize, ( 
-    ompt_function_lookup_t ompt_fn_lookup, 
-    const char *runtime_version, 
-    unsigned int ompt_version 
-)); 
- 
- 
-/* initialization interface to be defined by tool */ 
-ompt_initialize_t ompt_tool(void); 
- 
-typedef enum opt_init_mode_e { 
-    ompt_init_mode_never  = 0, 
-    ompt_init_mode_false  = 1, 
-    ompt_init_mode_true   = 2, 
-    ompt_init_mode_always = 3 
-} ompt_init_mode_t; 
- 
-OMPT_API_FUNCTION(int, ompt_set_callback, ( 
-    ompt_event_t event, 
-    ompt_callback_t callback 
-)); 
- 
-typedef enum ompt_set_callback_rc_e {  /* non-standard */ 
-    ompt_set_callback_error      = 0, 
-    ompt_has_event_no_callback   = 1, 
-    ompt_no_event_no_callback    = 2, 
-    ompt_has_event_may_callback  = 3, 
-    ompt_has_event_must_callback = 4, 
-} ompt_set_callback_rc_t; 
- 
- 
-OMPT_API_FUNCTION(int, ompt_get_callback, ( 
-    ompt_event_t event, 
-    ompt_callback_t *callback 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * MISCELLANEOUS FUNCTIONS 
- ***************************************************************************/ 
- 
-/* control */ 
-#if defined(_OPENMP) && (_OPENMP >= 201307) 
-#pragma omp declare target 
-#endif 
-void ompt_control( 
-    uint64_t command, 
-    uint64_t modifier 
-); 
-#if defined(_OPENMP) && (_OPENMP >= 201307) 
-#pragma omp end declare target 
-#endif 
- 
-/* state enumeration */ 
-OMPT_API_FUNCTION(int, ompt_enumerate_state, ( 
-    int current_state, 
-    int *next_state, 
-    const char **next_state_name 
-)); 
- 
-#ifdef  __cplusplus 
-}; 
-#endif 
- 
-#endif 
- 
+/*
+ * include/30/ompt.h.var
+ */
+
+#ifndef __OMPT__
+#define __OMPT__
+
+/*****************************************************************************
+ * system include files
+ *****************************************************************************/
+
+#include <stdint.h>
+
+
+
+/*****************************************************************************
+ * iteration macros
+ *****************************************************************************/
+
+#define FOREACH_OMPT_INQUIRY_FN(macro)  \
+    macro (ompt_enumerate_state)        \
+                                        \
+    macro (ompt_set_callback)           \
+    macro (ompt_get_callback)           \
+                                        \
+    macro (ompt_get_idle_frame)         \
+    macro (ompt_get_task_frame)         \
+                                        \
+    macro (ompt_get_state)              \
+                                        \
+    macro (ompt_get_parallel_id)        \
+    macro (ompt_get_parallel_team_size) \
+    macro (ompt_get_task_id)            \
+    macro (ompt_get_thread_id)
+
+#define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \
+    macro (ompt_idle)                       \
+    macro (ompt_overhead)                   \
+    macro (ompt_barrier_wait)               \
+    macro (ompt_task_wait)                  \
+    macro (ompt_mutex_wait)
+
+#define FOREACH_OMPT_STATE(macro)                                                               \
+                                                                                                \
+    /* first */                                                                                 \
+    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \
+                                                                                                \
+    /* work states (0..15) */                                                                   \
+    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \
+    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \
+    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \
+                                                                                                \
+    /* idle (16..31) */                                                                         \
+    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \
+                                                                                                \
+    /* overhead states (32..63) */                                                              \
+    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \
+                                                                                                \
+    /* barrier wait states (64..79) */                                                          \
+    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \
+    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \
+    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \
+                                                                                                \
+    /* task wait states (80..95) */                                                             \
+    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \
+    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \
+                                                                                                \
+    /* mutex wait states (96..111) */                                                           \
+    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \
+    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \
+    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \
+    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \
+    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \
+    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \
+                                                                                                \
+    /* misc (112..127) */                                                                       \
+    macro (ompt_state_undefined, 0x70)       /* undefined thread state */
+
+
+#define FOREACH_OMPT_EVENT(macro)                                                                               \
+                                                                                                                \
+    /*--- Mandatory Events ---*/                                                                                \
+    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \
+    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \
+                                                                                                                \
+    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \
+    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \
+                                                                                                                \
+    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \
+    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \
+                                                                                                                \
+    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \
+                                                                                                                \
+    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \
+                                                                                                                \
+    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \
+    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \
+    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \
+                                                                                                                \
+    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \
+    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \
+                                                                                                                \
+    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \
+    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \
+                                                                                                                \
+    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\
+    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \
+                                                                                                                \
+    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \
+    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \
+    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \
+                                                                                                                \
+    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \
+                                                                                                                \
+    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \
+                                                                                                                \
+    /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \
+    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \
+    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \
+                                                                                                                \
+    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \
+    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \
+                                                                                                                \
+    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \
+                                                                                                                \
+    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \
+    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \
+                                                                                                                \
+    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\
+    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \
+                                                                                                                \
+    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \
+    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \
+                                                                                                                \
+    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \
+    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \
+                                                                                                                \
+    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\
+    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \
+                                                                                                                \
+    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \
+    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \
+                                                                                                                \
+    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \
+    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \
+                                                                                                                \
+    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \
+    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \
+                                                                                                                \
+    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\
+    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \
+                                                                                                                \
+    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \
+                                                                                                                \
+    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \
+    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \
+    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \
+    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \
+    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \
+                                                                                                                \
+    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \
+    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \
+    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \
+    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \
+    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \
+    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \
+                                                                                                                \
+    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \
+    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \
+                                                                                                                \
+    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \
+    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \
+                                                                                                                \
+    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */
+
+
+
+/*****************************************************************************
+ * data types
+ *****************************************************************************/
+
+/*---------------------
+ * identifiers
+ *---------------------*/
+
+typedef uint64_t ompt_thread_id_t;
+#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */
+
+typedef uint64_t ompt_task_id_t;
+#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */
+
+typedef uint64_t ompt_parallel_id_t;
+#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */
+
+typedef uint64_t ompt_wait_id_t;
+#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */
+
+
+/*---------------------
+ * ompt_frame_t
+ *---------------------*/
+
+typedef struct ompt_frame_s {
+    void *exit_runtime_frame;    /* next frame is user code     */
+    void *reenter_runtime_frame; /* previous frame is user code */
+} ompt_frame_t;
+
+
+/*****************************************************************************
+ * enumerations for thread states and runtime events
+ *****************************************************************************/
+
+/*---------------------
+ * runtime states
+ *---------------------*/
+
+typedef enum {
+#define ompt_state_macro(state, code) state = code,
+    FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
+} ompt_state_t;
+
+
+/*---------------------
+ * runtime events
+ *---------------------*/
+
+typedef enum {
+#define ompt_event_macro(event, callback, eventid) event = eventid,
+    FOREACH_OMPT_EVENT(ompt_event_macro)
+#undef ompt_event_macro
+} ompt_event_t;
+
+
+/*---------------------
+ * set callback results
+ *---------------------*/
+typedef enum {
+    ompt_set_result_registration_error              = 0,
+    ompt_set_result_event_may_occur_no_callback     = 1,
+    ompt_set_result_event_never_occurs              = 2,
+    ompt_set_result_event_may_occur_callback_some   = 3,
+    ompt_set_result_event_may_occur_callback_always = 4,
+} ompt_set_result_t;
+
+
+
+/*****************************************************************************
+ * callback signatures
+ *****************************************************************************/
+
+/* initialization */
+typedef void (*ompt_interface_fn_t)(void);
+
+typedef ompt_interface_fn_t (*ompt_function_lookup_t)(
+    const char *                      /* entry point to look up       */
+);
+
+/* threads */
+typedef void (*ompt_thread_callback_t) (
+    ompt_thread_id_t thread_id        /* ID of thread                 */
+);
+
+typedef enum {
+    ompt_thread_initial = 1, // start the enumeration at 1
+    ompt_thread_worker  = 2,
+    ompt_thread_other   = 3
+} ompt_thread_type_t;
+
+typedef enum {
+    ompt_invoker_program = 0,         /* program invokes master task  */
+    ompt_invoker_runtime = 1          /* runtime invokes master task  */
+} ompt_invoker_t;
+
+typedef void (*ompt_thread_type_callback_t) (
+    ompt_thread_type_t thread_type,   /* type of thread               */
+    ompt_thread_id_t thread_id        /* ID of thread                 */
+);
+
+typedef void (*ompt_wait_callback_t) (
+    ompt_wait_id_t wait_id            /* wait id                      */
+);
+
+/* parallel and workshares */
+typedef void (*ompt_parallel_callback_t) (
+    ompt_parallel_id_t parallel_id,    /* id of parallel region       */
+    ompt_task_id_t task_id             /* id of task                  */
+);
+
+typedef void (*ompt_new_workshare_callback_t) (
+    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    void *workshare_function          /* pointer to outlined function */
+);
+
+typedef void (*ompt_new_parallel_callback_t) (
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */
+    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
+    uint32_t requested_team_size,     /* number of threads in team    */
+    void *parallel_function,          /* pointer to outlined function */
+    ompt_invoker_t invoker            /* who invokes master task?     */
+);
+
+typedef void (*ompt_end_parallel_callback_t) (
+    ompt_parallel_id_t parallel_id,   /* id of parallel region       */
+    ompt_task_id_t task_id,           /* id of task                  */
+    ompt_invoker_t invoker            /* who invokes master task?    */ 
+);
+
+/* tasks */
+typedef void (*ompt_task_callback_t) (
+    ompt_task_id_t task_id            /* id of task                   */
+);
+
+typedef void (*ompt_task_pair_callback_t) (
+    ompt_task_id_t first_task_id,
+    ompt_task_id_t second_task_id
+);
+
+typedef void (*ompt_new_task_callback_t) (
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */
+    ompt_task_id_t  new_task_id,      /* id of created task           */
+    void *task_function               /* pointer to outlined function */
+);
+
+/* program */
+typedef void (*ompt_control_callback_t) (
+    uint64_t command,                 /* command of control call      */
+    uint64_t modifier                 /* modifier of control call     */
+);
+
+typedef void (*ompt_callback_t)(void);
+
+
+/****************************************************************************
+ * ompt API
+ ***************************************************************************/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define OMPT_API_FNTYPE(fn) fn##_t
+
+#define OMPT_API_FUNCTION(return_type, fn, args)  \
+    typedef return_type (*OMPT_API_FNTYPE(fn)) args
+
+
+
+/****************************************************************************
+ * INQUIRY FUNCTIONS
+ ***************************************************************************/
+
+/* state */
+OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
+    ompt_wait_id_t *ompt_wait_id
+));
+
+/* thread */
+OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void));
+
+OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void));
+
+/* parallel region */
+OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, (
+    int ancestor_level
+));
+
+OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, (
+    int ancestor_level
+));
+
+/* task */
+OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, (
+    int depth
+));
+
+OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, (
+    int depth
+));
+
+
+
+/****************************************************************************
+ * PLACEHOLDERS FOR PERFORMANCE REPORTING
+ ***************************************************************************/
+
+/* idle */
+OMPT_API_FUNCTION(void, ompt_idle, (
+    void
+));
+
+/* overhead */
+OMPT_API_FUNCTION(void, ompt_overhead, (
+    void
+));
+
+/* barrier wait */
+OMPT_API_FUNCTION(void, ompt_barrier_wait, (
+    void
+));
+
+/* task wait */
+OMPT_API_FUNCTION(void, ompt_task_wait, (
+    void
+));
+
+/* mutex wait */
+OMPT_API_FUNCTION(void, ompt_mutex_wait, (
+    void
+));
+
+
+
+/****************************************************************************
+ * INITIALIZATION FUNCTIONS
+ ***************************************************************************/
+
+OMPT_API_FUNCTION(void, ompt_initialize, (
+    ompt_function_lookup_t ompt_fn_lookup,
+    const char *runtime_version,
+    unsigned int ompt_version
+));
+
+
+/* initialization interface to be defined by tool */
+ompt_initialize_t ompt_tool(void);
+
+typedef enum opt_init_mode_e {
+    ompt_init_mode_never  = 0,
+    ompt_init_mode_false  = 1,
+    ompt_init_mode_true   = 2,
+    ompt_init_mode_always = 3
+} ompt_init_mode_t;
+
+OMPT_API_FUNCTION(int, ompt_set_callback, (
+    ompt_event_t event,
+    ompt_callback_t callback
+));
+
+typedef enum ompt_set_callback_rc_e {  /* non-standard */
+    ompt_set_callback_error      = 0,
+    ompt_has_event_no_callback   = 1,
+    ompt_no_event_no_callback    = 2,
+    ompt_has_event_may_callback  = 3,
+    ompt_has_event_must_callback = 4,
+} ompt_set_callback_rc_t;
+
+
+OMPT_API_FUNCTION(int, ompt_get_callback, (
+    ompt_event_t event,
+    ompt_callback_t *callback
+));
+
+
+
+/****************************************************************************
+ * MISCELLANEOUS FUNCTIONS
+ ***************************************************************************/
+
+/* control */
+#if defined(_OPENMP) && (_OPENMP >= 201307)
+#pragma omp declare target
+#endif
+void ompt_control(
+    uint64_t command,
+    uint64_t modifier
+);
+#if defined(_OPENMP) && (_OPENMP >= 201307)
+#pragma omp end declare target
+#endif
+
+/* state enumeration */
+OMPT_API_FUNCTION(int, ompt_enumerate_state, (
+    int current_state,
+    int *next_state,
+    const char **next_state_name
+));
+
+#ifdef  __cplusplus
+};
+#endif
+
+#endif
+
diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp.h.var b/contrib/libs/cxxsupp/openmp/include/40/omp.h.var
index 99083072bf..4c518e77bc 100644
--- a/contrib/libs/cxxsupp/openmp/include/40/omp.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/40/omp.h.var
@@ -1,160 +1,160 @@
-/* 
- * include/40/omp.h.var 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef __OMP_H 
-#   define __OMP_H 
- 
-#   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@ 
-#   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@ 
-#   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@ 
-#   define KMP_BUILD_DATE       "@LIBOMP_BUILD_DATE@" 
- 
-#   ifdef __cplusplus 
-    extern "C" { 
-#   endif 
- 
-#   if defined(_WIN32) 
-#       define __KAI_KMPC_CONVENTION __cdecl 
-#   else 
-#       define __KAI_KMPC_CONVENTION 
-#   endif 
- 
-    /* schedule kind constants */ 
-    typedef enum omp_sched_t { 
-	omp_sched_static  = 1, 
-	omp_sched_dynamic = 2, 
-	omp_sched_guided  = 3, 
-	omp_sched_auto    = 4 
-    } omp_sched_t; 
- 
-    /* set API functions */ 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int); 
- 
-    /* query API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void); 
-    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *); 
- 
-    /* lock API functions */ 
-    typedef struct omp_lock_t { 
-        void * _lk; 
-    } omp_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *); 
- 
-    /* nested lock API functions */ 
-    typedef struct omp_nest_lock_t { 
-        void * _lk; 
-    } omp_nest_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *); 
- 
-    /* time API functions */ 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void); 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void); 
- 
-    /* OpenMP 4.0 */ 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_default_device (void); 
-    extern void __KAI_KMPC_CONVENTION  omp_set_default_device (int); 
-    extern int  __KAI_KMPC_CONVENTION  omp_is_initial_device (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void); 
- 
-#   include <stdlib.h> 
-    /* kmp API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int); 
-    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *); 
- 
-    /* Intel affinity API */ 
-    typedef void * kmp_affinity_mask_t; 
- 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
- 
-    /* OpenMP 4.0 affinity API */ 
-    typedef enum omp_proc_bind_t { 
-        omp_proc_bind_false = 0, 
-        omp_proc_bind_true = 1, 
-        omp_proc_bind_master = 2, 
-        omp_proc_bind_close = 3, 
-        omp_proc_bind_spread = 4 
-    } omp_proc_bind_t; 
- 
-    extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); 
- 
-    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *); 
- 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void); 
- 
-#   undef __KAI_KMPC_CONVENTION 
- 
-    /* Warning: 
-       The following typedefs are not standard, deprecated and will be removed in a future release. 
-    */ 
-    typedef int     omp_int_t; 
-    typedef double  omp_wtime_t; 
- 
-#   ifdef __cplusplus 
-    } 
-#   endif 
- 
-#endif /* __OMP_H */ 
- 
+/*
+ * include/40/omp.h.var
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef __OMP_H
+#   define __OMP_H
+
+#   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@
+#   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@
+#   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@
+#   define KMP_BUILD_DATE       "@LIBOMP_BUILD_DATE@"
+
+#   ifdef __cplusplus
+    extern "C" {
+#   endif
+
+#   if defined(_WIN32)
+#       define __KAI_KMPC_CONVENTION __cdecl
+#   else
+#       define __KAI_KMPC_CONVENTION
+#   endif
+
+    /* schedule kind constants */
+    typedef enum omp_sched_t {
+	omp_sched_static  = 1,
+	omp_sched_dynamic = 2,
+	omp_sched_guided  = 3,
+	omp_sched_auto    = 4
+    } omp_sched_t;
+
+    /* set API functions */
+    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int);
+
+    /* query API functions */
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void);
+    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *);
+
+    /* lock API functions */
+    typedef struct omp_lock_t {
+        void * _lk;
+    } omp_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *);
+
+    /* nested lock API functions */
+    typedef struct omp_nest_lock_t {
+        void * _lk;
+    } omp_nest_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *);
+
+    /* time API functions */
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void);
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void);
+
+    /* OpenMP 4.0 */
+    extern int  __KAI_KMPC_CONVENTION  omp_get_default_device (void);
+    extern void __KAI_KMPC_CONVENTION  omp_set_default_device (int);
+    extern int  __KAI_KMPC_CONVENTION  omp_is_initial_device (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void);
+
+#   include <stdlib.h>
+    /* kmp API functions */
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int);
+    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *);
+
+    /* Intel affinity API */
+    typedef void * kmp_affinity_mask_t;
+
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *);
+    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+
+    /* OpenMP 4.0 affinity API */
+    typedef enum omp_proc_bind_t {
+        omp_proc_bind_false = 0,
+        omp_proc_bind_true = 1,
+        omp_proc_bind_master = 2,
+        omp_proc_bind_close = 3,
+        omp_proc_bind_spread = 4
+    } omp_proc_bind_t;
+
+    extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);
+
+    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t);
+    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *);
+
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
+
+#   undef __KAI_KMPC_CONVENTION
+
+    /* Warning:
+       The following typedefs are not standard, deprecated and will be removed in a future release.
+    */
+    typedef int     omp_int_t;
+    typedef double  omp_wtime_t;
+
+#   ifdef __cplusplus
+    }
+#   endif
+
+#endif /* __OMP_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var
index 7f0276de9b..3a59162b4b 100644
--- a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var
+++ b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f.var
@@ -1,758 +1,758 @@
-! include/40/omp_lib.f.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-!*** 
-!*** Some of the directives for the following routine extend past column 72, 
-!*** so process this file in 132-column mode. 
-!*** 
- 
-!dec$ fixedformlinesize:132 
- 
-      module omp_lib_kinds 
- 
-        integer, parameter :: omp_integer_kind       = 4 
-        integer, parameter :: omp_logical_kind       = 4 
-        integer, parameter :: omp_real_kind          = 4 
-        integer, parameter :: omp_lock_kind          = int_ptr_kind() 
-        integer, parameter :: omp_nest_lock_kind     = int_ptr_kind() 
-        integer, parameter :: omp_sched_kind         = omp_integer_kind 
-        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind 
-        integer, parameter :: kmp_pointer_kind       = int_ptr_kind() 
-        integer, parameter :: kmp_size_t_kind        = int_ptr_kind() 
-        integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() 
-        integer, parameter :: kmp_cancel_kind        = omp_integer_kind 
- 
-      end module omp_lib_kinds 
- 
-      module omp_lib 
- 
-        use omp_lib_kinds 
- 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-        character(*), parameter :: kmp_build_date    = '@LIBOMP_BUILD_DATE@' 
-        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
- 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 
- 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 
- 
-        interface 
- 
-!         *** 
-!         *** omp_* entry points 
-!         *** 
- 
-          subroutine omp_set_num_threads(nthreads) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) nthreads 
-          end subroutine omp_set_num_threads 
- 
-          subroutine omp_set_dynamic(enable) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) enable 
-          end subroutine omp_set_dynamic 
- 
-          subroutine omp_set_nested(enable) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) enable 
-          end subroutine omp_set_nested 
- 
-          function omp_get_num_threads() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_threads 
-          end function omp_get_num_threads 
- 
-          function omp_get_max_threads() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_threads 
-          end function omp_get_max_threads 
- 
-          function omp_get_thread_num() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_num 
-          end function omp_get_thread_num 
- 
-          function omp_get_num_procs() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_procs 
-          end function omp_get_num_procs 
- 
-          function omp_in_parallel() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_parallel 
-          end function omp_in_parallel 
- 
-          function omp_get_dynamic() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_dynamic 
-          end function omp_get_dynamic 
- 
-          function omp_get_nested() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_nested 
-          end function omp_get_nested 
- 
-          function omp_get_thread_limit() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_limit 
-          end function omp_get_thread_limit 
- 
-          subroutine omp_set_max_active_levels(max_levels) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) max_levels 
-          end subroutine omp_set_max_active_levels 
- 
-          function omp_get_max_active_levels() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_active_levels 
-          end function omp_get_max_active_levels 
- 
-          function omp_get_level() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_level 
-          end function omp_get_level 
- 
-          function omp_get_active_level() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_active_level 
-          end function omp_get_active_level 
- 
-          function omp_get_ancestor_thread_num(level) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) level 
-            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-          end function omp_get_ancestor_thread_num 
- 
-          function omp_get_team_size(level) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) level 
-            integer (kind=omp_integer_kind) omp_get_team_size 
-          end function omp_get_team_size 
- 
-          subroutine omp_set_schedule(kind, modifier) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_set_schedule 
- 
-          subroutine omp_get_schedule(kind, modifier) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_get_schedule 
- 
-          function omp_get_proc_bind() 
-            use omp_lib_kinds 
-            integer (kind=omp_proc_bind_kind) omp_get_proc_bind 
-          end function omp_get_proc_bind 
- 
-          function omp_get_wtime() 
-            double precision omp_get_wtime 
-          end function omp_get_wtime 
- 
-          function omp_get_wtick () 
-            double precision omp_get_wtick 
-          end function omp_get_wtick 
- 
-          function omp_get_default_device() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_default_device 
-          end function omp_get_default_device 
- 
-          subroutine omp_set_default_device(dflt_device) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) dflt_device 
-          end subroutine omp_set_default_device 
- 
-          function omp_get_num_devices() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_devices 
-          end function omp_get_num_devices 
- 
-          function omp_get_num_teams() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_teams 
-          end function omp_get_num_teams 
- 
-          function omp_get_team_num() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_num 
-          end function omp_get_team_num 
- 
-          function omp_get_cancellation() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_cancellation 
-          end function omp_get_cancellation 
- 
-          function omp_is_initial_device() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_is_initial_device 
-          end function omp_is_initial_device 
- 
-          subroutine omp_init_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_init_lock 
- 
-          subroutine omp_destroy_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_destroy_lock 
- 
-          subroutine omp_set_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_set_lock 
- 
-          subroutine omp_unset_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_unset_lock 
- 
-          function omp_test_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_test_lock 
-            integer (kind=omp_lock_kind) lockvar 
-          end function omp_test_lock 
- 
-          subroutine omp_init_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_init_nest_lock 
- 
-          subroutine omp_destroy_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_destroy_nest_lock 
- 
-          subroutine omp_set_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_set_nest_lock 
- 
-          subroutine omp_unset_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_unset_nest_lock 
- 
-          function omp_test_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_test_nest_lock 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end function omp_test_nest_lock 
- 
-!         *** 
-!         *** kmp_* entry points 
-!         *** 
- 
-          subroutine kmp_set_stacksize(size) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) size 
-          end subroutine kmp_set_stacksize 
- 
-          subroutine kmp_set_stacksize_s(size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) size 
-          end subroutine kmp_set_stacksize_s 
- 
-          subroutine kmp_set_blocktime(msec) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) msec 
-          end subroutine kmp_set_blocktime 
- 
-          subroutine kmp_set_library_serial() 
-          end subroutine kmp_set_library_serial 
- 
-          subroutine kmp_set_library_turnaround() 
-          end subroutine kmp_set_library_turnaround 
- 
-          subroutine kmp_set_library_throughput() 
-          end subroutine kmp_set_library_throughput 
- 
-          subroutine kmp_set_library(libnum) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) libnum 
-          end subroutine kmp_set_library 
- 
-          subroutine kmp_set_defaults(string) 
-            character*(*) string 
-          end subroutine kmp_set_defaults 
- 
-          function kmp_get_stacksize() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_stacksize 
-          end function kmp_get_stacksize 
- 
-          function kmp_get_stacksize_s() 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-          end function kmp_get_stacksize_s 
- 
-          function kmp_get_blocktime() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_blocktime 
-          end function kmp_get_blocktime 
- 
-          function kmp_get_library() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_library 
-          end function kmp_get_library 
- 
-          function kmp_set_affinity(mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity 
- 
-          function kmp_get_affinity(mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity 
- 
-          function kmp_get_affinity_max_proc() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-          end function kmp_get_affinity_max_proc 
- 
-          subroutine kmp_create_affinity_mask(mask) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_create_affinity_mask 
- 
-          subroutine kmp_destroy_affinity_mask(mask) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_destroy_affinity_mask 
- 
-          function kmp_set_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity_mask_proc 
- 
-          function kmp_unset_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_unset_affinity_mask_proc 
- 
-          function kmp_get_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity_mask_proc 
- 
-          function kmp_malloc(size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_malloc 
-            integer (kind=kmp_size_t_kind) size 
-          end function kmp_malloc 
- 
-          function kmp_calloc(nelem, elsize) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_calloc 
-            integer (kind=kmp_size_t_kind) nelem 
-            integer (kind=kmp_size_t_kind) elsize 
-          end function kmp_calloc 
- 
-          function kmp_realloc(ptr, size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_realloc 
-            integer (kind=kmp_pointer_kind) ptr 
-            integer (kind=kmp_size_t_kind) size 
-          end function kmp_realloc 
- 
-          subroutine kmp_free(ptr) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) ptr 
-          end subroutine kmp_free 
- 
-          subroutine kmp_set_warnings_on() 
-          end subroutine kmp_set_warnings_on 
- 
-          subroutine kmp_set_warnings_off() 
-          end subroutine kmp_set_warnings_off 
- 
-          function kmp_get_cancellation_status(cancelkind) 
-            use omp_lib_kinds 
-            integer (kind=kmp_cancel_kind) cancelkind 
-            logical (kind=omp_logical_kind) kmp_get_cancellation_status 
-          end function kmp_get_cancellation_status 
- 
-        end interface 
- 
-!dec$ if defined(_WIN32) 
-!dec$   if defined(_WIN64) .or. defined(_M_AMD64) 
- 
-!*** 
-!*** The Fortran entry points must be in uppercase, even if the /Qlowercase 
-!*** option is specified.  The alias attribute ensures that the specified 
-!*** string is used as the entry point. 
-!*** 
-!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an 
-!*** underscore prepended.  On the Windows* OS Intel(R) 64 
-!*** architecture, no underscore is prepended. 
-!*** 
- 
-!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads 
-!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic 
-!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested 
-!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads 
-!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads 
-!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num 
-!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs 
-!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel 
-!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic 
-!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested 
-!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit 
-!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels 
-!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels 
-!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level 
-!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level 
-!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num 
-!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size 
-!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule 
-!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule 
-!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind 
-!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime 
-!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick 
-!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device 
-!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device 
-!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices 
-!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams 
-!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num 
-!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation 
-!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device 
- 
-!dec$ attributes alias:'omp_init_lock' :: omp_init_lock 
-!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock' :: omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock' :: omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock 
-!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock 
- 
-!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status 
- 
-!dec$   else 
- 
-!*** 
-!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads 
-!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic 
-!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested 
-!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads 
-!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads 
-!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num 
-!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs 
-!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel 
-!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic 
-!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested 
-!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit 
-!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level 
-!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level 
-!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size 
-!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule 
-!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule 
-!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind 
-!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime 
-!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick 
-!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device 
-!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device 
-!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices 
-!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams 
-!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num 
-!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation 
-!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device 
- 
-!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock 
-!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock 
-!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock 
- 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'_KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status 
- 
-!dec$   endif 
-!dec$ endif 
- 
-!dec$ if defined(__linux) 
- 
-!*** 
-!*** The Linux* OS entry points are in lowercase, with an underscore appended. 
-!*** 
- 
-!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind 
-!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick 
-!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device 
-!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device 
-!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices 
-!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams 
-!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num 
-!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation 
-!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device 
- 
-!dec$ attributes alias:'omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off 
-!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status 
- 
-!dec$ endif 
- 
-!dec$ if defined(__APPLE__) 
- 
-!*** 
-!*** The Mac entry points are in lowercase, with an both an underscore 
-!*** appended and an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'_omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind 
-!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick 
-!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams 
-!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num 
-!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation 
-!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device 
- 
-!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'_kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off 
- 
-!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status 
- 
-!dec$ endif 
- 
-      end module omp_lib 
- 
+! include/40/omp_lib.f.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+!***
+!*** Some of the directives for the following routine extend past column 72,
+!*** so process this file in 132-column mode.
+!***
+
+!dec$ fixedformlinesize:132
+
+      module omp_lib_kinds
+
+        integer, parameter :: omp_integer_kind       = 4
+        integer, parameter :: omp_logical_kind       = 4
+        integer, parameter :: omp_real_kind          = 4
+        integer, parameter :: omp_lock_kind          = int_ptr_kind()
+        integer, parameter :: omp_nest_lock_kind     = int_ptr_kind()
+        integer, parameter :: omp_sched_kind         = omp_integer_kind
+        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind
+        integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
+        integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
+        integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+        integer, parameter :: kmp_cancel_kind        = omp_integer_kind
+
+      end module omp_lib_kinds
+
+      module omp_lib
+
+        use omp_lib_kinds
+
+        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+        character(*), parameter :: kmp_build_date    = '@LIBOMP_BUILD_DATE@'
+        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+
+        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
+        interface
+
+!         ***
+!         *** omp_* entry points
+!         ***
+
+          subroutine omp_set_num_threads(nthreads)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) nthreads
+          end subroutine omp_set_num_threads
+
+          subroutine omp_set_dynamic(enable)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) enable
+          end subroutine omp_set_dynamic
+
+          subroutine omp_set_nested(enable)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) enable
+          end subroutine omp_set_nested
+
+          function omp_get_num_threads()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_threads
+          end function omp_get_num_threads
+
+          function omp_get_max_threads()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_threads
+          end function omp_get_max_threads
+
+          function omp_get_thread_num()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_num
+          end function omp_get_thread_num
+
+          function omp_get_num_procs()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_procs
+          end function omp_get_num_procs
+
+          function omp_in_parallel()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_parallel
+          end function omp_in_parallel
+
+          function omp_get_dynamic()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_dynamic
+          end function omp_get_dynamic
+
+          function omp_get_nested()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_nested
+          end function omp_get_nested
+
+          function omp_get_thread_limit()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_limit
+          end function omp_get_thread_limit
+
+          subroutine omp_set_max_active_levels(max_levels)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) max_levels
+          end subroutine omp_set_max_active_levels
+
+          function omp_get_max_active_levels()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_active_levels
+          end function omp_get_max_active_levels
+
+          function omp_get_level()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_level
+          end function omp_get_level
+
+          function omp_get_active_level()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_active_level
+          end function omp_get_active_level
+
+          function omp_get_ancestor_thread_num(level)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) level
+            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+          end function omp_get_ancestor_thread_num
+
+          function omp_get_team_size(level)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) level
+            integer (kind=omp_integer_kind) omp_get_team_size
+          end function omp_get_team_size
+
+          subroutine omp_set_schedule(kind, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_set_schedule
+
+          subroutine omp_get_schedule(kind, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_get_schedule
+
+          function omp_get_proc_bind()
+            use omp_lib_kinds
+            integer (kind=omp_proc_bind_kind) omp_get_proc_bind
+          end function omp_get_proc_bind
+
+          function omp_get_wtime()
+            double precision omp_get_wtime
+          end function omp_get_wtime
+
+          function omp_get_wtick ()
+            double precision omp_get_wtick
+          end function omp_get_wtick
+
+          function omp_get_default_device()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_default_device
+          end function omp_get_default_device
+
+          subroutine omp_set_default_device(dflt_device)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) dflt_device
+          end subroutine omp_set_default_device
+
+          function omp_get_num_devices()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_devices
+          end function omp_get_num_devices
+
+          function omp_get_num_teams()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_teams
+          end function omp_get_num_teams
+
+          function omp_get_team_num()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_num
+          end function omp_get_team_num
+
+          function omp_get_cancellation()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_cancellation
+          end function omp_get_cancellation
+
+          function omp_is_initial_device()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_is_initial_device
+          end function omp_is_initial_device
+
+          subroutine omp_init_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_init_lock
+
+          subroutine omp_destroy_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_destroy_lock
+
+          subroutine omp_set_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_set_lock
+
+          subroutine omp_unset_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_unset_lock
+
+          function omp_test_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_test_lock
+            integer (kind=omp_lock_kind) lockvar
+          end function omp_test_lock
+
+          subroutine omp_init_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_init_nest_lock
+
+          subroutine omp_destroy_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_destroy_nest_lock
+
+          subroutine omp_set_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_set_nest_lock
+
+          subroutine omp_unset_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_unset_nest_lock
+
+          function omp_test_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_test_nest_lock
+            integer (kind=omp_nest_lock_kind) lockvar
+          end function omp_test_nest_lock
+
+!         ***
+!         *** kmp_* entry points
+!         ***
+
+          subroutine kmp_set_stacksize(size)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) size
+          end subroutine kmp_set_stacksize
+
+          subroutine kmp_set_stacksize_s(size)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) size
+          end subroutine kmp_set_stacksize_s
+
+          subroutine kmp_set_blocktime(msec)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) msec
+          end subroutine kmp_set_blocktime
+
+          subroutine kmp_set_library_serial()
+          end subroutine kmp_set_library_serial
+
+          subroutine kmp_set_library_turnaround()
+          end subroutine kmp_set_library_turnaround
+
+          subroutine kmp_set_library_throughput()
+          end subroutine kmp_set_library_throughput
+
+          subroutine kmp_set_library(libnum)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) libnum
+          end subroutine kmp_set_library
+
+          subroutine kmp_set_defaults(string)
+            character*(*) string
+          end subroutine kmp_set_defaults
+
+          function kmp_get_stacksize()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_stacksize
+          end function kmp_get_stacksize
+
+          function kmp_get_stacksize_s()
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+          end function kmp_get_stacksize_s
+
+          function kmp_get_blocktime()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_blocktime
+          end function kmp_get_blocktime
+
+          function kmp_get_library()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_library
+          end function kmp_get_library
+
+          function kmp_set_affinity(mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity
+
+          function kmp_get_affinity(mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity
+
+          function kmp_get_affinity_max_proc()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+          end function kmp_get_affinity_max_proc
+
+          subroutine kmp_create_affinity_mask(mask)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_create_affinity_mask
+
+          subroutine kmp_destroy_affinity_mask(mask)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_destroy_affinity_mask
+
+          function kmp_set_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity_mask_proc
+
+          function kmp_unset_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_unset_affinity_mask_proc
+
+          function kmp_get_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity_mask_proc
+
+          function kmp_malloc(size)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_malloc
+            integer (kind=kmp_size_t_kind) size
+          end function kmp_malloc
+
+          function kmp_calloc(nelem, elsize)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_calloc
+            integer (kind=kmp_size_t_kind) nelem
+            integer (kind=kmp_size_t_kind) elsize
+          end function kmp_calloc
+
+          function kmp_realloc(ptr, size)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_realloc
+            integer (kind=kmp_pointer_kind) ptr
+            integer (kind=kmp_size_t_kind) size
+          end function kmp_realloc
+
+          subroutine kmp_free(ptr)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) ptr
+          end subroutine kmp_free
+
+          subroutine kmp_set_warnings_on()
+          end subroutine kmp_set_warnings_on
+
+          subroutine kmp_set_warnings_off()
+          end subroutine kmp_set_warnings_off
+
+          function kmp_get_cancellation_status(cancelkind)
+            use omp_lib_kinds
+            integer (kind=kmp_cancel_kind) cancelkind
+            logical (kind=omp_logical_kind) kmp_get_cancellation_status
+          end function kmp_get_cancellation_status
+
+        end interface
+
+!dec$ if defined(_WIN32)
+!dec$   if defined(_WIN64) .or. defined(_M_AMD64)
+
+!***
+!*** The Fortran entry points must be in uppercase, even if the /Qlowercase
+!*** option is specified.  The alias attribute ensures that the specified
+!*** string is used as the entry point.
+!***
+!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an
+!*** underscore prepended.  On the Windows* OS Intel(R) 64
+!*** architecture, no underscore is prepended.
+!***
+
+!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads
+!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic
+!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested
+!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads
+!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads
+!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num
+!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs
+!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel
+!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic
+!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested
+!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit
+!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels
+!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels
+!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level
+!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level
+!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num
+!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size
+!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule
+!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule
+!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind
+!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime
+!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick
+!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device
+!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device
+!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices
+!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams
+!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation
+!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
+
+!dec$ attributes alias:'omp_init_lock' :: omp_init_lock
+!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock' :: omp_set_lock
+!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock
+!dec$ attributes alias:'omp_test_lock' :: omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock
+!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock
+
+!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
+!dec$   else
+
+!***
+!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended.
+!***
+
+!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads
+!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic
+!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested
+!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads
+!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads
+!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num
+!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs
+!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel
+!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic
+!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested
+!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit
+!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels
+!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels
+!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level
+!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level
+!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num
+!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size
+!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule
+!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule
+!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind
+!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime
+!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick
+!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device
+!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device
+!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices
+!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams
+!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation
+!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
+
+!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
+!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock
+!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock
+
+!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'_KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
+!dec$   endif
+!dec$ endif
+
+!dec$ if defined(__linux)
+
+!***
+!*** The Linux* OS entry points are in lowercase, with an underscore appended.
+!***
+
+!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'omp_get_level_'::omp_get_level
+!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind
+!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick
+!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device
+!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device
+!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices
+!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams
+!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation
+!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device
+
+!dec$ attributes alias:'omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'kmp_free_'::kmp_free
+
+!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
+!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status
+
+!dec$ endif
+
+!dec$ if defined(__APPLE__)
+
+!***
+!*** The Mac entry points are in lowercase, with an both an underscore
+!*** appended and an underscore prepended.
+!***
+
+!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'_omp_get_level_'::omp_get_level
+!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind
+!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick
+!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams
+!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation
+!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device
+
+!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'_kmp_free_'::kmp_free
+
+!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off
+
+!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status
+
+!dec$ endif
+
+      end module omp_lib
+
diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var
index be4bcaf257..5be8026603 100644
--- a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var
+++ b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.f90.var
@@ -1,448 +1,448 @@
-! include/40/omp_lib.f90.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-      module omp_lib_kinds 
- 
-        use, intrinsic :: iso_c_binding 
- 
-        integer, parameter :: omp_integer_kind       = c_int 
-        integer, parameter :: omp_logical_kind       = 4 
-        integer, parameter :: omp_real_kind          = c_float 
-        integer, parameter :: kmp_double_kind        = c_double 
-        integer, parameter :: omp_lock_kind          = c_intptr_t 
-        integer, parameter :: omp_nest_lock_kind     = c_intptr_t 
-        integer, parameter :: omp_sched_kind         = omp_integer_kind 
-        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind 
-        integer, parameter :: kmp_pointer_kind       = c_intptr_t 
-        integer, parameter :: kmp_size_t_kind        = c_size_t 
-        integer, parameter :: kmp_affinity_mask_kind = c_intptr_t 
-        integer, parameter :: kmp_cancel_kind        = omp_integer_kind 
- 
-      end module omp_lib_kinds 
- 
-      module omp_lib 
- 
-        use omp_lib_kinds 
- 
-        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-        character(*)               kmp_build_date 
-        parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) 
- 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
- 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 
- 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 
- 
-        interface 
- 
-!         *** 
-!         *** omp_* entry points 
-!         *** 
- 
-          subroutine omp_set_num_threads(nthreads) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: nthreads 
-          end subroutine omp_set_num_threads 
- 
-          subroutine omp_set_dynamic(enable) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind), value :: enable 
-          end subroutine omp_set_dynamic 
- 
-          subroutine omp_set_nested(enable) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind), value :: enable 
-          end subroutine omp_set_nested 
- 
-          function omp_get_num_threads() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_threads 
-          end function omp_get_num_threads 
- 
-          function omp_get_max_threads() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_threads 
-          end function omp_get_max_threads 
- 
-          function omp_get_thread_num() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_num 
-          end function omp_get_thread_num 
- 
-          function omp_get_num_procs() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_procs 
-          end function omp_get_num_procs 
- 
-          function omp_in_parallel() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_parallel 
-          end function omp_in_parallel 
- 
-          function omp_in_final() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_final 
-          end function omp_in_final 
- 
-          function omp_get_dynamic() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_dynamic 
-          end function omp_get_dynamic 
- 
-          function omp_get_nested() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_nested 
-          end function omp_get_nested 
- 
-          function omp_get_thread_limit() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_limit 
-          end function omp_get_thread_limit 
- 
-          subroutine omp_set_max_active_levels(max_levels) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: max_levels 
-          end subroutine omp_set_max_active_levels 
- 
-          function omp_get_max_active_levels() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_active_levels 
-          end function omp_get_max_active_levels 
- 
-          function omp_get_level() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_level 
-          end function omp_get_level 
- 
-          function omp_get_active_level() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_active_level 
-          end function omp_get_active_level 
- 
-          function omp_get_ancestor_thread_num(level) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-            integer (kind=omp_integer_kind), value :: level 
-          end function omp_get_ancestor_thread_num 
- 
-          function omp_get_team_size(level) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_size 
-            integer (kind=omp_integer_kind), value :: level 
-          end function omp_get_team_size 
- 
-          subroutine omp_set_schedule(kind, modifier) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind), value :: kind 
-            integer (kind=omp_integer_kind), value :: modifier 
-          end subroutine omp_set_schedule 
- 
-          subroutine omp_get_schedule(kind, modifier) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_get_schedule 
- 
-          function omp_get_proc_bind() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_proc_bind_kind) omp_get_proc_bind 
-          end function omp_get_proc_bind 
- 
-          function omp_get_wtime() bind(c) 
-            use omp_lib_kinds 
-            real (kind=kmp_double_kind) omp_get_wtime 
-          end function omp_get_wtime 
- 
-          function omp_get_wtick() bind(c) 
-            use omp_lib_kinds 
-            real (kind=kmp_double_kind) omp_get_wtick 
-          end function omp_get_wtick 
- 
-          function omp_get_default_device() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_default_device 
-          end function omp_get_default_device 
- 
-          subroutine omp_set_default_device(dflt_device) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: dflt_device 
-          end subroutine omp_set_default_device 
- 
-          function omp_get_num_devices() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_devices 
-          end function omp_get_num_devices 
- 
-          function omp_get_num_teams() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_teams 
-          end function omp_get_num_teams 
- 
-          function omp_get_team_num() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_num 
-          end function omp_get_team_num 
- 
-          function omp_get_cancellation() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_cancellation 
-          end function omp_get_cancellation 
- 
-          function omp_is_initial_device() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_is_initial_device 
-          end function omp_is_initial_device 
- 
-          subroutine omp_init_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_init_lock 
- 
-          subroutine omp_destroy_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_destroy_lock 
- 
-          subroutine omp_set_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_set_lock 
- 
-          subroutine omp_unset_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_unset_lock 
- 
-          function omp_test_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_test_lock 
-            integer (kind=omp_lock_kind) lockvar 
-          end function omp_test_lock 
- 
-          subroutine omp_init_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_init_nest_lock 
- 
-          subroutine omp_destroy_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_destroy_nest_lock 
- 
-          subroutine omp_set_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_set_nest_lock 
- 
-          subroutine omp_unset_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_unset_nest_lock 
- 
-          function omp_test_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_test_nest_lock 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end function omp_test_nest_lock 
- 
-!         *** 
-!         *** kmp_* entry points 
-!         *** 
- 
-          subroutine kmp_set_stacksize(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: size 
-          end subroutine kmp_set_stacksize 
- 
-          subroutine kmp_set_stacksize_s(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end subroutine kmp_set_stacksize_s 
- 
-          subroutine kmp_set_blocktime(msec) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: msec 
-          end subroutine kmp_set_blocktime 
- 
-          subroutine kmp_set_library_serial() bind(c) 
-          end subroutine kmp_set_library_serial 
- 
-          subroutine kmp_set_library_turnaround() bind(c) 
-          end subroutine kmp_set_library_turnaround 
- 
-          subroutine kmp_set_library_throughput() bind(c) 
-          end subroutine kmp_set_library_throughput 
- 
-          subroutine kmp_set_library(libnum) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: libnum 
-          end subroutine kmp_set_library 
- 
-          subroutine kmp_set_defaults(string) bind(c) 
-            use, intrinsic :: iso_c_binding 
-            character (kind=c_char) :: string(*) 
-          end subroutine kmp_set_defaults 
- 
-          function kmp_get_stacksize() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_stacksize 
-          end function kmp_get_stacksize 
- 
-          function kmp_get_stacksize_s() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-          end function kmp_get_stacksize_s 
- 
-          function kmp_get_blocktime() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_blocktime 
-          end function kmp_get_blocktime 
- 
-          function kmp_get_library() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_library 
-          end function kmp_get_library 
- 
-          function kmp_set_affinity(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity 
- 
-          function kmp_get_affinity(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity 
- 
-          function kmp_get_affinity_max_proc() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-          end function kmp_get_affinity_max_proc 
- 
-          subroutine kmp_create_affinity_mask(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_create_affinity_mask 
- 
-          subroutine kmp_destroy_affinity_mask(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_destroy_affinity_mask 
- 
-          function kmp_set_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity_mask_proc 
- 
-          function kmp_unset_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_unset_affinity_mask_proc 
- 
-          function kmp_get_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity_mask_proc 
- 
-          function kmp_malloc(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_malloc 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end function kmp_malloc 
- 
-          function kmp_calloc(nelem, elsize) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_calloc 
-            integer (kind=kmp_size_t_kind), value :: nelem 
-            integer (kind=kmp_size_t_kind), value :: elsize 
-          end function kmp_calloc 
- 
-          function kmp_realloc(ptr, size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_realloc 
-            integer (kind=kmp_pointer_kind), value :: ptr 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end function kmp_realloc 
- 
-          subroutine kmp_free(ptr) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind), value :: ptr 
-          end subroutine kmp_free 
- 
-          subroutine kmp_set_warnings_on() bind(c) 
-          end subroutine kmp_set_warnings_on 
- 
-          subroutine kmp_set_warnings_off() bind(c) 
-          end subroutine kmp_set_warnings_off 
- 
-          function kmp_get_cancellation_status(cancelkind) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_cancel_kind), value :: cancelkind 
-            logical (kind=omp_logical_kind) kmp_get_cancellation_status 
-          end function kmp_get_cancellation_status 
- 
-        end interface 
- 
-      end module omp_lib 
+! include/40/omp_lib.f90.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+      module omp_lib_kinds
+
+        use, intrinsic :: iso_c_binding
+
+        integer, parameter :: omp_integer_kind       = c_int
+        integer, parameter :: omp_logical_kind       = 4
+        integer, parameter :: omp_real_kind          = c_float
+        integer, parameter :: kmp_double_kind        = c_double
+        integer, parameter :: omp_lock_kind          = c_intptr_t
+        integer, parameter :: omp_nest_lock_kind     = c_intptr_t
+        integer, parameter :: omp_sched_kind         = omp_integer_kind
+        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind
+        integer, parameter :: kmp_pointer_kind       = c_intptr_t
+        integer, parameter :: kmp_size_t_kind        = c_size_t
+        integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
+        integer, parameter :: kmp_cancel_kind        = omp_integer_kind
+
+      end module omp_lib_kinds
+
+      module omp_lib
+
+        use omp_lib_kinds
+
+        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+        character(*)               kmp_build_date
+        parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' )
+
+        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
+        interface
+
+!         ***
+!         *** omp_* entry points
+!         ***
+
+          subroutine omp_set_num_threads(nthreads) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: nthreads
+          end subroutine omp_set_num_threads
+
+          subroutine omp_set_dynamic(enable) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind), value :: enable
+          end subroutine omp_set_dynamic
+
+          subroutine omp_set_nested(enable) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind), value :: enable
+          end subroutine omp_set_nested
+
+          function omp_get_num_threads() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_threads
+          end function omp_get_num_threads
+
+          function omp_get_max_threads() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_threads
+          end function omp_get_max_threads
+
+          function omp_get_thread_num() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_num
+          end function omp_get_thread_num
+
+          function omp_get_num_procs() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_procs
+          end function omp_get_num_procs
+
+          function omp_in_parallel() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_parallel
+          end function omp_in_parallel
+
+          function omp_in_final() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_final
+          end function omp_in_final
+
+          function omp_get_dynamic() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_dynamic
+          end function omp_get_dynamic
+
+          function omp_get_nested() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_nested
+          end function omp_get_nested
+
+          function omp_get_thread_limit() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_limit
+          end function omp_get_thread_limit
+
+          subroutine omp_set_max_active_levels(max_levels) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: max_levels
+          end subroutine omp_set_max_active_levels
+
+          function omp_get_max_active_levels() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_active_levels
+          end function omp_get_max_active_levels
+
+          function omp_get_level() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_level
+          end function omp_get_level
+
+          function omp_get_active_level() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_active_level
+          end function omp_get_active_level
+
+          function omp_get_ancestor_thread_num(level) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+            integer (kind=omp_integer_kind), value :: level
+          end function omp_get_ancestor_thread_num
+
+          function omp_get_team_size(level) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_size
+            integer (kind=omp_integer_kind), value :: level
+          end function omp_get_team_size
+
+          subroutine omp_set_schedule(kind, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind), value :: kind
+            integer (kind=omp_integer_kind), value :: modifier
+          end subroutine omp_set_schedule
+
+          subroutine omp_get_schedule(kind, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_get_schedule
+
+          function omp_get_proc_bind() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_proc_bind_kind) omp_get_proc_bind
+          end function omp_get_proc_bind
+
+          function omp_get_wtime() bind(c)
+            use omp_lib_kinds
+            real (kind=kmp_double_kind) omp_get_wtime
+          end function omp_get_wtime
+
+          function omp_get_wtick() bind(c)
+            use omp_lib_kinds
+            real (kind=kmp_double_kind) omp_get_wtick
+          end function omp_get_wtick
+
+          function omp_get_default_device() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_default_device
+          end function omp_get_default_device
+
+          subroutine omp_set_default_device(dflt_device) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: dflt_device
+          end subroutine omp_set_default_device
+
+          function omp_get_num_devices() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_devices
+          end function omp_get_num_devices
+
+          function omp_get_num_teams() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_teams
+          end function omp_get_num_teams
+
+          function omp_get_team_num() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_num
+          end function omp_get_team_num
+
+          function omp_get_cancellation() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_cancellation
+          end function omp_get_cancellation
+
+          function omp_is_initial_device() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_is_initial_device
+          end function omp_is_initial_device
+
+          subroutine omp_init_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_init_lock
+
+          subroutine omp_destroy_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_destroy_lock
+
+          subroutine omp_set_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_set_lock
+
+          subroutine omp_unset_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_unset_lock
+
+          function omp_test_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_test_lock
+            integer (kind=omp_lock_kind) lockvar
+          end function omp_test_lock
+
+          subroutine omp_init_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_init_nest_lock
+
+          subroutine omp_destroy_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_destroy_nest_lock
+
+          subroutine omp_set_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_set_nest_lock
+
+          subroutine omp_unset_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_unset_nest_lock
+
+          function omp_test_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_test_nest_lock
+            integer (kind=omp_nest_lock_kind) lockvar
+          end function omp_test_nest_lock
+
+!         ***
+!         *** kmp_* entry points
+!         ***
+
+          subroutine kmp_set_stacksize(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: size
+          end subroutine kmp_set_stacksize
+
+          subroutine kmp_set_stacksize_s(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind), value :: size
+          end subroutine kmp_set_stacksize_s
+
+          subroutine kmp_set_blocktime(msec) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: msec
+          end subroutine kmp_set_blocktime
+
+          subroutine kmp_set_library_serial() bind(c)
+          end subroutine kmp_set_library_serial
+
+          subroutine kmp_set_library_turnaround() bind(c)
+          end subroutine kmp_set_library_turnaround
+
+          subroutine kmp_set_library_throughput() bind(c)
+          end subroutine kmp_set_library_throughput
+
+          subroutine kmp_set_library(libnum) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: libnum
+          end subroutine kmp_set_library
+
+          subroutine kmp_set_defaults(string) bind(c)
+            use, intrinsic :: iso_c_binding
+            character (kind=c_char) :: string(*)
+          end subroutine kmp_set_defaults
+
+          function kmp_get_stacksize() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_stacksize
+          end function kmp_get_stacksize
+
+          function kmp_get_stacksize_s() bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+          end function kmp_get_stacksize_s
+
+          function kmp_get_blocktime() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_blocktime
+          end function kmp_get_blocktime
+
+          function kmp_get_library() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_library
+          end function kmp_get_library
+
+          function kmp_set_affinity(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity
+
+          function kmp_get_affinity(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity
+
+          function kmp_get_affinity_max_proc() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+          end function kmp_get_affinity_max_proc
+
+          subroutine kmp_create_affinity_mask(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_create_affinity_mask
+
+          subroutine kmp_destroy_affinity_mask(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_destroy_affinity_mask
+
+          function kmp_set_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity_mask_proc
+
+          function kmp_unset_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_unset_affinity_mask_proc
+
+          function kmp_get_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity_mask_proc
+
+          function kmp_malloc(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_malloc
+            integer (kind=kmp_size_t_kind), value :: size
+          end function kmp_malloc
+
+          function kmp_calloc(nelem, elsize) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_calloc
+            integer (kind=kmp_size_t_kind), value :: nelem
+            integer (kind=kmp_size_t_kind), value :: elsize
+          end function kmp_calloc
+
+          function kmp_realloc(ptr, size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_realloc
+            integer (kind=kmp_pointer_kind), value :: ptr
+            integer (kind=kmp_size_t_kind), value :: size
+          end function kmp_realloc
+
+          subroutine kmp_free(ptr) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind), value :: ptr
+          end subroutine kmp_free
+
+          subroutine kmp_set_warnings_on() bind(c)
+          end subroutine kmp_set_warnings_on
+
+          subroutine kmp_set_warnings_off() bind(c)
+          end subroutine kmp_set_warnings_off
+
+          function kmp_get_cancellation_status(cancelkind) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_cancel_kind), value :: cancelkind
+            logical (kind=omp_logical_kind) kmp_get_cancellation_status
+          end function kmp_get_cancellation_status
+
+        end interface
+
+      end module omp_lib
diff --git a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var
index 4c933cda00..cc134fd352 100644
--- a/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/40/omp_lib.h.var
@@ -1,558 +1,558 @@
-! include/40/omp_lib.h.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-!*** 
-!*** Some of the directives for the following routine extend past column 72, 
-!*** so process this file in 132-column mode. 
-!*** 
- 
-!DIR$ fixedformlinesize:132 
- 
-      integer, parameter :: omp_integer_kind       = 4 
-      integer, parameter :: omp_logical_kind       = 4 
-      integer, parameter :: omp_real_kind          = 4 
-      integer, parameter :: omp_lock_kind          = int_ptr_kind() 
-      integer, parameter :: omp_nest_lock_kind     = int_ptr_kind() 
-      integer, parameter :: omp_sched_kind         = omp_integer_kind 
-      integer, parameter :: omp_proc_bind_kind     = omp_integer_kind 
-      integer, parameter :: kmp_pointer_kind       = int_ptr_kind() 
-      integer, parameter :: kmp_size_t_kind        = int_ptr_kind() 
-      integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() 
- 
-      integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-      character(*)               kmp_build_date 
-      parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) 
- 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 
- 
-      interface 
- 
-!       *** 
-!       *** omp_* entry points 
-!       *** 
- 
-        subroutine omp_set_num_threads(nthreads) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: nthreads 
-        end subroutine omp_set_num_threads 
- 
-        subroutine omp_set_dynamic(enable) bind(c) 
-          import 
-          logical (kind=omp_logical_kind), value :: enable 
-        end subroutine omp_set_dynamic 
- 
-        subroutine omp_set_nested(enable) bind(c) 
-          import 
-          logical (kind=omp_logical_kind), value :: enable 
-        end subroutine omp_set_nested 
- 
-        function omp_get_num_threads() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_threads 
-        end function omp_get_num_threads 
- 
-        function omp_get_max_threads() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_max_threads 
-        end function omp_get_max_threads 
- 
-        function omp_get_thread_num() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_thread_num 
-        end function omp_get_thread_num 
- 
-        function omp_get_num_procs() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_procs 
-        end function omp_get_num_procs 
- 
-        function omp_in_parallel() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_in_parallel 
-        end function omp_in_parallel 
- 
-        function omp_in_final() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_in_final 
-        end function omp_in_final 
- 
-        function omp_get_dynamic() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_get_dynamic 
-        end function omp_get_dynamic 
- 
-        function omp_get_nested() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_get_nested 
-        end function omp_get_nested 
- 
-        function omp_get_thread_limit() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_thread_limit 
-        end function omp_get_thread_limit 
- 
-        subroutine omp_set_max_active_levels(max_levels) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: max_levels 
-        end subroutine omp_set_max_active_levels 
- 
-        function omp_get_max_active_levels() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_max_active_levels 
-        end function omp_get_max_active_levels 
- 
-        function omp_get_level() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_level 
-        end function omp_get_level 
- 
-        function omp_get_active_level() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_active_level 
-        end function omp_get_active_level 
- 
-        function omp_get_ancestor_thread_num(level) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-          integer (kind=omp_integer_kind), value :: level 
-        end function omp_get_ancestor_thread_num 
- 
-        function omp_get_team_size(level) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_team_size 
-          integer (kind=omp_integer_kind), value :: level 
-        end function omp_get_team_size 
- 
-        subroutine omp_set_schedule(kind, modifier) bind(c) 
-          import 
-          integer (kind=omp_sched_kind), value :: kind 
-          integer (kind=omp_integer_kind), value :: modifier 
-        end subroutine omp_set_schedule 
- 
-        subroutine omp_get_schedule(kind, modifier) bind(c) 
-          import 
-          integer (kind=omp_sched_kind) kind 
-          integer (kind=omp_integer_kind) modifier 
-        end subroutine omp_get_schedule 
- 
-        function omp_get_proc_bind() bind(c) 
-          import 
-          integer (kind=omp_proc_bind_kind) omp_get_proc_bind 
-        end function omp_get_proc_bind 
- 
-        function omp_get_wtime() bind(c) 
-          double precision omp_get_wtime 
-        end function omp_get_wtime 
- 
-        function omp_get_wtick() bind(c) 
-          double precision omp_get_wtick 
-        end function omp_get_wtick 
- 
-        function omp_get_default_device() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_default_device 
-        end function omp_get_default_device 
- 
-        subroutine omp_set_default_device(dflt_device) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: dflt_device 
-        end subroutine omp_set_default_device 
- 
-        function omp_get_num_devices() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_devices 
-        end function omp_get_num_devices 
- 
-        function omp_get_num_teams() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_teams 
-        end function omp_get_num_teams 
- 
-        function omp_get_team_num() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_team_num 
-        end function omp_get_team_num 
- 
-        function omp_is_initial_device() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_is_initial_device 
-        end function omp_is_initial_device 
- 
-        subroutine omp_init_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_init_lock 
- 
-        subroutine omp_destroy_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_destroy_lock 
- 
-        subroutine omp_set_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_set_lock 
- 
-        subroutine omp_unset_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_unset_lock 
- 
-        function omp_test_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_lock 
-!DIR$ ENDIF 
-          import 
-          logical (kind=omp_logical_kind) omp_test_lock 
-          integer (kind=omp_lock_kind) lockvar 
-        end function omp_test_lock 
- 
-        subroutine omp_init_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_init_nest_lock 
- 
-        subroutine omp_destroy_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_destroy_nest_lock 
- 
-        subroutine omp_set_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_set_nest_lock 
- 
-        subroutine omp_unset_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_unset_nest_lock 
- 
-        function omp_test_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_integer_kind) omp_test_nest_lock 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end function omp_test_nest_lock 
- 
-!       *** 
-!       *** kmp_* entry points 
-!       *** 
- 
-        subroutine kmp_set_stacksize(size) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: size 
-        end subroutine kmp_set_stacksize 
- 
-        subroutine kmp_set_stacksize_s(size) bind(c) 
-          import 
-          integer (kind=kmp_size_t_kind), value :: size 
-        end subroutine kmp_set_stacksize_s 
- 
-        subroutine kmp_set_blocktime(msec) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: msec 
-        end subroutine kmp_set_blocktime 
- 
-        subroutine kmp_set_library_serial() bind(c) 
-        end subroutine kmp_set_library_serial 
- 
-        subroutine kmp_set_library_turnaround() bind(c) 
-        end subroutine kmp_set_library_turnaround 
- 
-        subroutine kmp_set_library_throughput() bind(c) 
-        end subroutine kmp_set_library_throughput 
- 
-        subroutine kmp_set_library(libnum) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: libnum 
-        end subroutine kmp_set_library 
- 
-        subroutine kmp_set_defaults(string) bind(c) 
-          character string(*) 
-        end subroutine kmp_set_defaults 
- 
-        function kmp_get_stacksize() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_stacksize 
-        end function kmp_get_stacksize 
- 
-        function kmp_get_stacksize_s() bind(c) 
-          import 
-          integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-        end function kmp_get_stacksize_s 
- 
-        function kmp_get_blocktime() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_blocktime 
-        end function kmp_get_blocktime 
- 
-        function kmp_get_library() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_library 
-        end function kmp_get_library 
- 
-        function kmp_set_affinity(mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_set_affinity 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_set_affinity 
- 
-        function kmp_get_affinity(mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_get_affinity 
- 
-        function kmp_get_affinity_max_proc() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-        end function kmp_get_affinity_max_proc 
- 
-        subroutine kmp_create_affinity_mask(mask) bind(c) 
-          import 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end subroutine kmp_create_affinity_mask 
- 
-        subroutine kmp_destroy_affinity_mask(mask) bind(c) 
-          import 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end subroutine kmp_destroy_affinity_mask 
- 
-        function kmp_set_affinity_mask_proc(proc, mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-          integer (kind=omp_integer_kind), value :: proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_set_affinity_mask_proc 
- 
-        function kmp_unset_affinity_mask_proc(proc, mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-          integer (kind=omp_integer_kind), value :: proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_unset_affinity_mask_proc 
- 
-        function kmp_get_affinity_mask_proc(proc, mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-          integer (kind=omp_integer_kind), value :: proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_get_affinity_mask_proc 
- 
-        function kmp_malloc(size) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_malloc 
-          integer (kind=kmp_size_t_kind), value :: size 
-        end function kmp_malloc 
- 
-        function kmp_calloc(nelem, elsize) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_calloc 
-          integer (kind=kmp_size_t_kind), value :: nelem 
-          integer (kind=kmp_size_t_kind), value :: elsize 
-        end function kmp_calloc 
- 
-        function kmp_realloc(ptr, size) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_realloc 
-          integer (kind=kmp_pointer_kind), value :: ptr 
-          integer (kind=kmp_size_t_kind), value :: size 
-        end function kmp_realloc 
- 
-        subroutine kmp_free(ptr) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind), value :: ptr 
-        end subroutine kmp_free 
- 
-        subroutine kmp_set_warnings_on() bind(c) 
-        end subroutine kmp_set_warnings_on 
- 
-        subroutine kmp_set_warnings_off() bind(c) 
-        end subroutine kmp_set_warnings_off 
- 
-      end interface 
- 
-!DIR$ IF DEFINED (__INTEL_OFFLOAD) 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off 
- 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!$omp declare target(omp_set_num_threads ) 
-!$omp declare target(omp_set_dynamic ) 
-!$omp declare target(omp_set_nested ) 
-!$omp declare target(omp_get_num_threads ) 
-!$omp declare target(omp_get_max_threads ) 
-!$omp declare target(omp_get_thread_num ) 
-!$omp declare target(omp_get_num_procs ) 
-!$omp declare target(omp_in_parallel ) 
-!$omp declare target(omp_in_final ) 
-!$omp declare target(omp_get_dynamic ) 
-!$omp declare target(omp_get_nested ) 
-!$omp declare target(omp_get_thread_limit ) 
-!$omp declare target(omp_set_max_active_levels ) 
-!$omp declare target(omp_get_max_active_levels ) 
-!$omp declare target(omp_get_level ) 
-!$omp declare target(omp_get_active_level ) 
-!$omp declare target(omp_get_ancestor_thread_num ) 
-!$omp declare target(omp_get_team_size ) 
-!$omp declare target(omp_set_schedule ) 
-!$omp declare target(omp_get_schedule ) 
-!$omp declare target(omp_get_proc_bind ) 
-!$omp declare target(omp_get_wtime ) 
-!$omp declare target(omp_get_wtick ) 
-!$omp declare target(omp_get_default_device ) 
-!$omp declare target(omp_set_default_device ) 
-!$omp declare target(omp_is_initial_device ) 
-!$omp declare target(omp_get_num_devices ) 
-!$omp declare target(omp_get_num_teams ) 
-!$omp declare target(omp_get_team_num ) 
-!$omp declare target(omp_init_lock ) 
-!$omp declare target(omp_destroy_lock ) 
-!$omp declare target(omp_set_lock ) 
-!$omp declare target(omp_unset_lock ) 
-!$omp declare target(omp_test_lock ) 
-!$omp declare target(omp_init_nest_lock ) 
-!$omp declare target(omp_destroy_nest_lock ) 
-!$omp declare target(omp_set_nest_lock ) 
-!$omp declare target(omp_unset_nest_lock ) 
-!$omp declare target(omp_test_nest_lock ) 
-!$omp declare target(kmp_set_stacksize ) 
-!$omp declare target(kmp_set_stacksize_s ) 
-!$omp declare target(kmp_set_blocktime ) 
-!$omp declare target(kmp_set_library_serial ) 
-!$omp declare target(kmp_set_library_turnaround ) 
-!$omp declare target(kmp_set_library_throughput ) 
-!$omp declare target(kmp_set_library ) 
-!$omp declare target(kmp_set_defaults ) 
-!$omp declare target(kmp_get_stacksize ) 
-!$omp declare target(kmp_get_stacksize_s ) 
-!$omp declare target(kmp_get_blocktime ) 
-!$omp declare target(kmp_get_library ) 
-!$omp declare target(kmp_set_affinity ) 
-!$omp declare target(kmp_get_affinity ) 
-!$omp declare target(kmp_get_affinity_max_proc ) 
-!$omp declare target(kmp_create_affinity_mask ) 
-!$omp declare target(kmp_destroy_affinity_mask ) 
-!$omp declare target(kmp_set_affinity_mask_proc ) 
-!$omp declare target(kmp_unset_affinity_mask_proc ) 
-!$omp declare target(kmp_get_affinity_mask_proc ) 
-!$omp declare target(kmp_malloc ) 
-!$omp declare target(kmp_calloc ) 
-!$omp declare target(kmp_realloc ) 
-!$omp declare target(kmp_free ) 
-!$omp declare target(kmp_set_warnings_on ) 
-!$omp declare target(kmp_set_warnings_off ) 
-!DIR$ ENDIF 
-!DIR$ ENDIF 
- 
+! include/40/omp_lib.h.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+!***
+!*** Some of the directives for the following routine extend past column 72,
+!*** so process this file in 132-column mode.
+!***
+
+!DIR$ fixedformlinesize:132
+
+      integer, parameter :: omp_integer_kind       = 4
+      integer, parameter :: omp_logical_kind       = 4
+      integer, parameter :: omp_real_kind          = 4
+      integer, parameter :: omp_lock_kind          = int_ptr_kind()
+      integer, parameter :: omp_nest_lock_kind     = int_ptr_kind()
+      integer, parameter :: omp_sched_kind         = omp_integer_kind
+      integer, parameter :: omp_proc_bind_kind     = omp_integer_kind
+      integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
+      integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
+      integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+
+      integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+      character(*)               kmp_build_date
+      parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' )
+
+      integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+      integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+      integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+      integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+
+      interface
+
+!       ***
+!       *** omp_* entry points
+!       ***
+
+        subroutine omp_set_num_threads(nthreads) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: nthreads
+        end subroutine omp_set_num_threads
+
+        subroutine omp_set_dynamic(enable) bind(c)
+          import
+          logical (kind=omp_logical_kind), value :: enable
+        end subroutine omp_set_dynamic
+
+        subroutine omp_set_nested(enable) bind(c)
+          import
+          logical (kind=omp_logical_kind), value :: enable
+        end subroutine omp_set_nested
+
+        function omp_get_num_threads() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_threads
+        end function omp_get_num_threads
+
+        function omp_get_max_threads() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_max_threads
+        end function omp_get_max_threads
+
+        function omp_get_thread_num() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_thread_num
+        end function omp_get_thread_num
+
+        function omp_get_num_procs() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_procs
+        end function omp_get_num_procs
+
+        function omp_in_parallel() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_in_parallel
+        end function omp_in_parallel
+
+        function omp_in_final() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_in_final
+        end function omp_in_final
+
+        function omp_get_dynamic() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_get_dynamic
+        end function omp_get_dynamic
+
+        function omp_get_nested() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_get_nested
+        end function omp_get_nested
+
+        function omp_get_thread_limit() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_thread_limit
+        end function omp_get_thread_limit
+
+        subroutine omp_set_max_active_levels(max_levels) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: max_levels
+        end subroutine omp_set_max_active_levels
+
+        function omp_get_max_active_levels() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_max_active_levels
+        end function omp_get_max_active_levels
+
+        function omp_get_level() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_level
+        end function omp_get_level
+
+        function omp_get_active_level() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_active_level
+        end function omp_get_active_level
+
+        function omp_get_ancestor_thread_num(level) bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+          integer (kind=omp_integer_kind), value :: level
+        end function omp_get_ancestor_thread_num
+
+        function omp_get_team_size(level) bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_team_size
+          integer (kind=omp_integer_kind), value :: level
+        end function omp_get_team_size
+
+        subroutine omp_set_schedule(kind, modifier) bind(c)
+          import
+          integer (kind=omp_sched_kind), value :: kind
+          integer (kind=omp_integer_kind), value :: modifier
+        end subroutine omp_set_schedule
+
+        subroutine omp_get_schedule(kind, modifier) bind(c)
+          import
+          integer (kind=omp_sched_kind) kind
+          integer (kind=omp_integer_kind) modifier
+        end subroutine omp_get_schedule
+
+        function omp_get_proc_bind() bind(c)
+          import
+          integer (kind=omp_proc_bind_kind) omp_get_proc_bind
+        end function omp_get_proc_bind
+
+        function omp_get_wtime() bind(c)
+          double precision omp_get_wtime
+        end function omp_get_wtime
+
+        function omp_get_wtick() bind(c)
+          double precision omp_get_wtick
+        end function omp_get_wtick
+
+        function omp_get_default_device() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_default_device
+        end function omp_get_default_device
+
+        subroutine omp_set_default_device(dflt_device) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: dflt_device
+        end subroutine omp_set_default_device
+
+        function omp_get_num_devices() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_devices
+        end function omp_get_num_devices
+
+        function omp_get_num_teams() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_teams
+        end function omp_get_num_teams
+
+        function omp_get_team_num() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_team_num
+        end function omp_get_team_num
+
+        function omp_is_initial_device() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_is_initial_device
+        end function omp_is_initial_device
+
+        subroutine omp_init_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_init_lock
+
+        subroutine omp_destroy_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_destroy_lock
+
+        subroutine omp_set_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_set_lock
+
+        subroutine omp_unset_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_unset_lock
+
+        function omp_test_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_lock
+!DIR$ ENDIF
+          import
+          logical (kind=omp_logical_kind) omp_test_lock
+          integer (kind=omp_lock_kind) lockvar
+        end function omp_test_lock
+
+        subroutine omp_init_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_init_nest_lock
+
+        subroutine omp_destroy_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_destroy_nest_lock
+
+        subroutine omp_set_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_set_nest_lock
+
+        subroutine omp_unset_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_unset_nest_lock
+
+        function omp_test_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_integer_kind) omp_test_nest_lock
+          integer (kind=omp_nest_lock_kind) lockvar
+        end function omp_test_nest_lock
+
+!       ***
+!       *** kmp_* entry points
+!       ***
+
+        subroutine kmp_set_stacksize(size) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: size
+        end subroutine kmp_set_stacksize
+
+        subroutine kmp_set_stacksize_s(size) bind(c)
+          import
+          integer (kind=kmp_size_t_kind), value :: size
+        end subroutine kmp_set_stacksize_s
+
+        subroutine kmp_set_blocktime(msec) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: msec
+        end subroutine kmp_set_blocktime
+
+        subroutine kmp_set_library_serial() bind(c)
+        end subroutine kmp_set_library_serial
+
+        subroutine kmp_set_library_turnaround() bind(c)
+        end subroutine kmp_set_library_turnaround
+
+        subroutine kmp_set_library_throughput() bind(c)
+        end subroutine kmp_set_library_throughput
+
+        subroutine kmp_set_library(libnum) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: libnum
+        end subroutine kmp_set_library
+
+        subroutine kmp_set_defaults(string) bind(c)
+          character string(*)
+        end subroutine kmp_set_defaults
+
+        function kmp_get_stacksize() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_stacksize
+        end function kmp_get_stacksize
+
+        function kmp_get_stacksize_s() bind(c)
+          import
+          integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+        end function kmp_get_stacksize_s
+
+        function kmp_get_blocktime() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_blocktime
+        end function kmp_get_blocktime
+
+        function kmp_get_library() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_library
+        end function kmp_get_library
+
+        function kmp_set_affinity(mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_set_affinity
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_set_affinity
+
+        function kmp_get_affinity(mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_get_affinity
+
+        function kmp_get_affinity_max_proc() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+        end function kmp_get_affinity_max_proc
+
+        subroutine kmp_create_affinity_mask(mask) bind(c)
+          import
+          integer (kind=kmp_affinity_mask_kind) mask
+        end subroutine kmp_create_affinity_mask
+
+        subroutine kmp_destroy_affinity_mask(mask) bind(c)
+          import
+          integer (kind=kmp_affinity_mask_kind) mask
+        end subroutine kmp_destroy_affinity_mask
+
+        function kmp_set_affinity_mask_proc(proc, mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+          integer (kind=omp_integer_kind), value :: proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_set_affinity_mask_proc
+
+        function kmp_unset_affinity_mask_proc(proc, mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+          integer (kind=omp_integer_kind), value :: proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_unset_affinity_mask_proc
+
+        function kmp_get_affinity_mask_proc(proc, mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+          integer (kind=omp_integer_kind), value :: proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_get_affinity_mask_proc
+
+        function kmp_malloc(size) bind(c)
+          import
+          integer (kind=kmp_pointer_kind) kmp_malloc
+          integer (kind=kmp_size_t_kind), value :: size
+        end function kmp_malloc
+
+        function kmp_calloc(nelem, elsize) bind(c)
+          import
+          integer (kind=kmp_pointer_kind) kmp_calloc
+          integer (kind=kmp_size_t_kind), value :: nelem
+          integer (kind=kmp_size_t_kind), value :: elsize
+        end function kmp_calloc
+
+        function kmp_realloc(ptr, size) bind(c)
+          import
+          integer (kind=kmp_pointer_kind) kmp_realloc
+          integer (kind=kmp_pointer_kind), value :: ptr
+          integer (kind=kmp_size_t_kind), value :: size
+        end function kmp_realloc
+
+        subroutine kmp_free(ptr) bind(c)
+          import
+          integer (kind=kmp_pointer_kind), value :: ptr
+        end subroutine kmp_free
+
+        subroutine kmp_set_warnings_on() bind(c)
+        end subroutine kmp_set_warnings_on
+
+        subroutine kmp_set_warnings_off() bind(c)
+        end subroutine kmp_set_warnings_off
+
+      end interface
+
+!DIR$ IF DEFINED (__INTEL_OFFLOAD)
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off
+
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!$omp declare target(omp_set_num_threads )
+!$omp declare target(omp_set_dynamic )
+!$omp declare target(omp_set_nested )
+!$omp declare target(omp_get_num_threads )
+!$omp declare target(omp_get_max_threads )
+!$omp declare target(omp_get_thread_num )
+!$omp declare target(omp_get_num_procs )
+!$omp declare target(omp_in_parallel )
+!$omp declare target(omp_in_final )
+!$omp declare target(omp_get_dynamic )
+!$omp declare target(omp_get_nested )
+!$omp declare target(omp_get_thread_limit )
+!$omp declare target(omp_set_max_active_levels )
+!$omp declare target(omp_get_max_active_levels )
+!$omp declare target(omp_get_level )
+!$omp declare target(omp_get_active_level )
+!$omp declare target(omp_get_ancestor_thread_num )
+!$omp declare target(omp_get_team_size )
+!$omp declare target(omp_set_schedule )
+!$omp declare target(omp_get_schedule )
+!$omp declare target(omp_get_proc_bind )
+!$omp declare target(omp_get_wtime )
+!$omp declare target(omp_get_wtick )
+!$omp declare target(omp_get_default_device )
+!$omp declare target(omp_set_default_device )
+!$omp declare target(omp_is_initial_device )
+!$omp declare target(omp_get_num_devices )
+!$omp declare target(omp_get_num_teams )
+!$omp declare target(omp_get_team_num )
+!$omp declare target(omp_init_lock )
+!$omp declare target(omp_destroy_lock )
+!$omp declare target(omp_set_lock )
+!$omp declare target(omp_unset_lock )
+!$omp declare target(omp_test_lock )
+!$omp declare target(omp_init_nest_lock )
+!$omp declare target(omp_destroy_nest_lock )
+!$omp declare target(omp_set_nest_lock )
+!$omp declare target(omp_unset_nest_lock )
+!$omp declare target(omp_test_nest_lock )
+!$omp declare target(kmp_set_stacksize )
+!$omp declare target(kmp_set_stacksize_s )
+!$omp declare target(kmp_set_blocktime )
+!$omp declare target(kmp_set_library_serial )
+!$omp declare target(kmp_set_library_turnaround )
+!$omp declare target(kmp_set_library_throughput )
+!$omp declare target(kmp_set_library )
+!$omp declare target(kmp_set_defaults )
+!$omp declare target(kmp_get_stacksize )
+!$omp declare target(kmp_get_stacksize_s )
+!$omp declare target(kmp_get_blocktime )
+!$omp declare target(kmp_get_library )
+!$omp declare target(kmp_set_affinity )
+!$omp declare target(kmp_get_affinity )
+!$omp declare target(kmp_get_affinity_max_proc )
+!$omp declare target(kmp_create_affinity_mask )
+!$omp declare target(kmp_destroy_affinity_mask )
+!$omp declare target(kmp_set_affinity_mask_proc )
+!$omp declare target(kmp_unset_affinity_mask_proc )
+!$omp declare target(kmp_get_affinity_mask_proc )
+!$omp declare target(kmp_malloc )
+!$omp declare target(kmp_calloc )
+!$omp declare target(kmp_realloc )
+!$omp declare target(kmp_free )
+!$omp declare target(kmp_set_warnings_on )
+!$omp declare target(kmp_set_warnings_off )
+!DIR$ ENDIF
+!DIR$ ENDIF
+
diff --git a/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var b/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var
index c99ec8e677..3a8c30c165 100644
--- a/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/40/ompt.h.var
@@ -1,487 +1,487 @@
-/* 
- * include/40/ompt.h.var 
- */ 
- 
-#ifndef __OMPT__ 
-#define __OMPT__ 
- 
-/***************************************************************************** 
- * system include files 
- *****************************************************************************/ 
- 
-#include <stdint.h> 
- 
- 
- 
-/***************************************************************************** 
- * iteration macros 
- *****************************************************************************/ 
- 
-#define FOREACH_OMPT_INQUIRY_FN(macro)  \ 
-    macro (ompt_enumerate_state)        \ 
-                                        \ 
-    macro (ompt_set_callback)           \ 
-    macro (ompt_get_callback)           \ 
-                                        \ 
-    macro (ompt_get_idle_frame)         \ 
-    macro (ompt_get_task_frame)         \ 
-                                        \ 
-    macro (ompt_get_state)              \ 
-                                        \ 
-    macro (ompt_get_parallel_id)        \ 
-    macro (ompt_get_parallel_team_size) \ 
-    macro (ompt_get_task_id)            \ 
-    macro (ompt_get_thread_id) 
- 
-#define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \ 
-    macro (ompt_idle)                       \ 
-    macro (ompt_overhead)                   \ 
-    macro (ompt_barrier_wait)               \ 
-    macro (ompt_task_wait)                  \ 
-    macro (ompt_mutex_wait) 
- 
-#define FOREACH_OMPT_STATE(macro)                                                               \ 
-                                                                                                \ 
-    /* first */                                                                                 \ 
-    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \ 
-                                                                                                \ 
-    /* work states (0..15) */                                                                   \ 
-    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \ 
-    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \ 
-    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \ 
-                                                                                                \ 
-    /* idle (16..31) */                                                                         \ 
-    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \ 
-                                                                                                \ 
-    /* overhead states (32..63) */                                                              \ 
-    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \ 
-                                                                                                \ 
-    /* barrier wait states (64..79) */                                                          \ 
-    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \ 
-    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \ 
-    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \ 
-                                                                                                \ 
-    /* task wait states (80..95) */                                                             \ 
-    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \ 
-    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \ 
-                                                                                                \ 
-    /* mutex wait states (96..111) */                                                           \ 
-    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \ 
-    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \ 
-    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \ 
-    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \ 
-    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \ 
-    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \ 
-                                                                                                \ 
-    /* misc (112..127) */                                                                       \ 
-    macro (ompt_state_undefined, 0x70)       /* undefined thread state */ 
- 
- 
-#define FOREACH_OMPT_EVENT(macro)                                                                               \ 
-                                                                                                                \ 
-    /*--- Mandatory Events ---*/                                                                                \ 
-    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \ 
-    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \ 
-                                                                                                                \ 
-    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \ 
-    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \ 
-                                                                                                                \ 
-    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \ 
-    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \ 
-                                                                                                                \ 
-    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \ 
-                                                                                                                \ 
-    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \ 
-                                                                                                                \ 
-    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \ 
-    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \ 
-    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \ 
-    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \ 
-    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\ 
-    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \ 
-                                                                                                                \ 
-    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \ 
-    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \ 
-    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \ 
-                                                                                                                \ 
-    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \ 
-                                                                                                                \ 
-    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \ 
-                                                                                                                \ 
-    /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \ 
-    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \ 
-    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \ 
-                                                                                                                \ 
-    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \ 
-    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \ 
-                                                                                                                \ 
-    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \ 
-                                                                                                                \ 
-    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \ 
-    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \ 
-                                                                                                                \ 
-    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\ 
-    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \ 
-                                                                                                                \ 
-    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \ 
-    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \ 
-    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ 
-    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \ 
-    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \ 
-    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \ 
-                                                                                                                \ 
-    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \ 
-    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\ 
-    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \ 
-    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \ 
-    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \ 
-    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \ 
-    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \ 
-                                                                                                                \ 
-    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \ 
-    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \ 
-    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \ 
-    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \ 
-    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \ 
-    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \ 
-                                                                                                                \ 
-    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \ 
-    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \ 
-                                                                                                                \ 
-    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \ 
-    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \ 
-                                                                                                                \ 
-    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */ 
- 
- 
- 
-/***************************************************************************** 
- * data types 
- *****************************************************************************/ 
- 
-/*--------------------- 
- * identifiers 
- *---------------------*/ 
- 
-typedef uint64_t ompt_thread_id_t; 
-#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */ 
- 
-typedef uint64_t ompt_task_id_t; 
-#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */ 
- 
-typedef uint64_t ompt_parallel_id_t; 
-#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ 
- 
-typedef uint64_t ompt_wait_id_t; 
-#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */ 
- 
- 
-/*--------------------- 
- * ompt_frame_t 
- *---------------------*/ 
- 
-typedef struct ompt_frame_s { 
-    void *exit_runtime_frame;    /* next frame is user code     */ 
-    void *reenter_runtime_frame; /* previous frame is user code */ 
-} ompt_frame_t; 
- 
- 
-/***************************************************************************** 
- * enumerations for thread states and runtime events 
- *****************************************************************************/ 
- 
-/*--------------------- 
- * runtime states 
- *---------------------*/ 
- 
-typedef enum { 
-#define ompt_state_macro(state, code) state = code, 
-    FOREACH_OMPT_STATE(ompt_state_macro) 
-#undef ompt_state_macro 
-} ompt_state_t; 
- 
- 
-/*--------------------- 
- * runtime events 
- *---------------------*/ 
- 
-typedef enum { 
-#define ompt_event_macro(event, callback, eventid) event = eventid, 
-    FOREACH_OMPT_EVENT(ompt_event_macro) 
-#undef ompt_event_macro 
-} ompt_event_t; 
- 
- 
-/*--------------------- 
- * set callback results 
- *---------------------*/ 
-typedef enum { 
-    ompt_set_result_registration_error              = 0, 
-    ompt_set_result_event_may_occur_no_callback     = 1, 
-    ompt_set_result_event_never_occurs              = 2, 
-    ompt_set_result_event_may_occur_callback_some   = 3, 
-    ompt_set_result_event_may_occur_callback_always = 4, 
-} ompt_set_result_t; 
- 
- 
- 
-/***************************************************************************** 
- * callback signatures 
- *****************************************************************************/ 
- 
-/* initialization */ 
-typedef void (*ompt_interface_fn_t)(void); 
- 
-typedef ompt_interface_fn_t (*ompt_function_lookup_t)( 
-    const char *                      /* entry point to look up       */ 
-); 
- 
-/* threads */ 
-typedef void (*ompt_thread_callback_t) ( 
-    ompt_thread_id_t thread_id        /* ID of thread                 */ 
-); 
- 
-typedef enum { 
-    ompt_thread_initial = 1, // start the enumeration at 1 
-    ompt_thread_worker  = 2, 
-    ompt_thread_other   = 3 
-} ompt_thread_type_t; 
- 
-typedef enum { 
-    ompt_invoker_program = 0,         /* program invokes master task  */ 
-    ompt_invoker_runtime = 1          /* runtime invokes master task  */ 
-} ompt_invoker_t; 
- 
-typedef void (*ompt_thread_type_callback_t) ( 
-    ompt_thread_type_t thread_type,   /* type of thread               */ 
-    ompt_thread_id_t thread_id        /* ID of thread                 */ 
-); 
- 
-typedef void (*ompt_wait_callback_t) ( 
-    ompt_wait_id_t wait_id            /* wait id                      */ 
-); 
- 
-/* parallel and workshares */ 
-typedef void (*ompt_parallel_callback_t) ( 
-    ompt_parallel_id_t parallel_id,    /* id of parallel region       */ 
-    ompt_task_id_t task_id             /* id of task                  */ 
-); 
- 
-typedef void (*ompt_new_workshare_callback_t) ( 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */ 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    void *workshare_function          /* pointer to outlined function */ 
-); 
- 
-typedef void (*ompt_new_parallel_callback_t) ( 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */ 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */ 
-    uint32_t requested_team_size,     /* number of threads in team    */ 
-    void *parallel_function,          /* pointer to outlined function */ 
-    ompt_invoker_t invoker            /* who invokes master task?     */ 
-); 
- 
-typedef void (*ompt_end_parallel_callback_t) ( 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region       */ 
-    ompt_task_id_t task_id,           /* id of task                  */ 
-    ompt_invoker_t invoker            /* who invokes master task?    */  
-); 
- 
-/* tasks */ 
-typedef void (*ompt_task_callback_t) ( 
-    ompt_task_id_t task_id            /* id of task                   */ 
-); 
- 
-typedef void (*ompt_task_pair_callback_t) ( 
-    ompt_task_id_t first_task_id, 
-    ompt_task_id_t second_task_id 
-); 
- 
-typedef void (*ompt_new_task_callback_t) ( 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */ 
-    ompt_task_id_t  new_task_id,      /* id of created task           */ 
-    void *task_function               /* pointer to outlined function */ 
-); 
- 
-/* program */ 
-typedef void (*ompt_control_callback_t) ( 
-    uint64_t command,                 /* command of control call      */ 
-    uint64_t modifier                 /* modifier of control call     */ 
-); 
- 
-typedef void (*ompt_callback_t)(void); 
- 
- 
-/**************************************************************************** 
- * ompt API 
- ***************************************************************************/ 
- 
-#ifdef  __cplusplus 
-extern "C" { 
-#endif 
- 
-#define OMPT_API_FNTYPE(fn) fn##_t 
- 
-#define OMPT_API_FUNCTION(return_type, fn, args)  \ 
-    typedef return_type (*OMPT_API_FNTYPE(fn)) args 
- 
- 
- 
-/**************************************************************************** 
- * INQUIRY FUNCTIONS 
- ***************************************************************************/ 
- 
-/* state */ 
-OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( 
-    ompt_wait_id_t *ompt_wait_id 
-)); 
- 
-/* thread */ 
-OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); 
- 
-OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); 
- 
-/* parallel region */ 
-OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( 
-    int ancestor_level 
-)); 
- 
-OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( 
-    int ancestor_level 
-)); 
- 
-/* task */ 
-OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( 
-    int depth 
-)); 
- 
-OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( 
-    int depth 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * PLACEHOLDERS FOR PERFORMANCE REPORTING 
- ***************************************************************************/ 
- 
-/* idle */ 
-OMPT_API_FUNCTION(void, ompt_idle, ( 
-    void 
-)); 
- 
-/* overhead */ 
-OMPT_API_FUNCTION(void, ompt_overhead, ( 
-    void 
-)); 
- 
-/* barrier wait */ 
-OMPT_API_FUNCTION(void, ompt_barrier_wait, ( 
-    void 
-)); 
- 
-/* task wait */ 
-OMPT_API_FUNCTION(void, ompt_task_wait, ( 
-    void 
-)); 
- 
-/* mutex wait */ 
-OMPT_API_FUNCTION(void, ompt_mutex_wait, ( 
-    void 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * INITIALIZATION FUNCTIONS 
- ***************************************************************************/ 
- 
-OMPT_API_FUNCTION(void, ompt_initialize, ( 
-    ompt_function_lookup_t ompt_fn_lookup, 
-    const char *runtime_version, 
-    unsigned int ompt_version 
-)); 
- 
- 
-/* initialization interface to be defined by tool */ 
-ompt_initialize_t ompt_tool(void); 
- 
-typedef enum opt_init_mode_e { 
-    ompt_init_mode_never  = 0, 
-    ompt_init_mode_false  = 1, 
-    ompt_init_mode_true   = 2, 
-    ompt_init_mode_always = 3 
-} ompt_init_mode_t; 
- 
-OMPT_API_FUNCTION(int, ompt_set_callback, ( 
-    ompt_event_t event, 
-    ompt_callback_t callback 
-)); 
- 
-typedef enum ompt_set_callback_rc_e {  /* non-standard */ 
-    ompt_set_callback_error      = 0, 
-    ompt_has_event_no_callback   = 1, 
-    ompt_no_event_no_callback    = 2, 
-    ompt_has_event_may_callback  = 3, 
-    ompt_has_event_must_callback = 4, 
-} ompt_set_callback_rc_t; 
- 
- 
-OMPT_API_FUNCTION(int, ompt_get_callback, ( 
-    ompt_event_t event, 
-    ompt_callback_t *callback 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * MISCELLANEOUS FUNCTIONS 
- ***************************************************************************/ 
- 
-/* control */ 
-#if defined(_OPENMP) && (_OPENMP >= 201307) 
-#pragma omp declare target 
-#endif 
-void ompt_control( 
-    uint64_t command, 
-    uint64_t modifier 
-); 
-#if defined(_OPENMP) && (_OPENMP >= 201307) 
-#pragma omp end declare target 
-#endif 
- 
-/* state enumeration */ 
-OMPT_API_FUNCTION(int, ompt_enumerate_state, ( 
-    int current_state, 
-    int *next_state, 
-    const char **next_state_name 
-)); 
- 
-#ifdef  __cplusplus 
-}; 
-#endif 
- 
-#endif 
- 
+/*
+ * include/40/ompt.h.var
+ */
+
+#ifndef __OMPT__
+#define __OMPT__
+
+/*****************************************************************************
+ * system include files
+ *****************************************************************************/
+
+#include <stdint.h>
+
+
+
+/*****************************************************************************
+ * iteration macros
+ *****************************************************************************/
+
+#define FOREACH_OMPT_INQUIRY_FN(macro)  \
+    macro (ompt_enumerate_state)        \
+                                        \
+    macro (ompt_set_callback)           \
+    macro (ompt_get_callback)           \
+                                        \
+    macro (ompt_get_idle_frame)         \
+    macro (ompt_get_task_frame)         \
+                                        \
+    macro (ompt_get_state)              \
+                                        \
+    macro (ompt_get_parallel_id)        \
+    macro (ompt_get_parallel_team_size) \
+    macro (ompt_get_task_id)            \
+    macro (ompt_get_thread_id)
+
+#define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \
+    macro (ompt_idle)                       \
+    macro (ompt_overhead)                   \
+    macro (ompt_barrier_wait)               \
+    macro (ompt_task_wait)                  \
+    macro (ompt_mutex_wait)
+
+#define FOREACH_OMPT_STATE(macro)                                                               \
+                                                                                                \
+    /* first */                                                                                 \
+    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \
+                                                                                                \
+    /* work states (0..15) */                                                                   \
+    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \
+    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \
+    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \
+                                                                                                \
+    /* idle (16..31) */                                                                         \
+    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \
+                                                                                                \
+    /* overhead states (32..63) */                                                              \
+    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \
+                                                                                                \
+    /* barrier wait states (64..79) */                                                          \
+    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \
+    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \
+    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \
+                                                                                                \
+    /* task wait states (80..95) */                                                             \
+    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \
+    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \
+                                                                                                \
+    /* mutex wait states (96..111) */                                                           \
+    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \
+    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \
+    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \
+    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \
+    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \
+    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \
+                                                                                                \
+    /* misc (112..127) */                                                                       \
+    macro (ompt_state_undefined, 0x70)       /* undefined thread state */
+
+
+#define FOREACH_OMPT_EVENT(macro)                                                                               \
+                                                                                                                \
+    /*--- Mandatory Events ---*/                                                                                \
+    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \
+    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \
+                                                                                                                \
+    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \
+    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \
+                                                                                                                \
+    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \
+    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \
+                                                                                                                \
+    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \
+                                                                                                                \
+    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \
+                                                                                                                \
+    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \
+    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \
+    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \
+                                                                                                                \
+    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \
+    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \
+                                                                                                                \
+    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \
+    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \
+                                                                                                                \
+    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\
+    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \
+                                                                                                                \
+    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \
+    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \
+    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \
+                                                                                                                \
+    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \
+                                                                                                                \
+    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \
+                                                                                                                \
+    /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \
+    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \
+    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \
+                                                                                                                \
+    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \
+    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \
+                                                                                                                \
+    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \
+                                                                                                                \
+    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \
+    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \
+                                                                                                                \
+    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\
+    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \
+                                                                                                                \
+    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \
+    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \
+                                                                                                                \
+    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \
+    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \
+                                                                                                                \
+    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\
+    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \
+                                                                                                                \
+    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \
+    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \
+                                                                                                                \
+    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \
+    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \
+                                                                                                                \
+    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \
+    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \
+                                                                                                                \
+    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\
+    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \
+                                                                                                                \
+    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \
+                                                                                                                \
+    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \
+    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \
+    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \
+    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \
+    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \
+                                                                                                                \
+    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \
+    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \
+    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \
+    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \
+    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \
+    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \
+                                                                                                                \
+    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \
+    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \
+                                                                                                                \
+    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \
+    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \
+                                                                                                                \
+    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */
+
+
+
+/*****************************************************************************
+ * data types
+ *****************************************************************************/
+
+/*---------------------
+ * identifiers
+ *---------------------*/
+
+typedef uint64_t ompt_thread_id_t;
+#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */
+
+typedef uint64_t ompt_task_id_t;
+#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */
+
+typedef uint64_t ompt_parallel_id_t;
+#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */
+
+typedef uint64_t ompt_wait_id_t;
+#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */
+
+
+/*---------------------
+ * ompt_frame_t
+ *---------------------*/
+
+typedef struct ompt_frame_s {
+    void *exit_runtime_frame;    /* next frame is user code     */
+    void *reenter_runtime_frame; /* previous frame is user code */
+} ompt_frame_t;
+
+
+/*****************************************************************************
+ * enumerations for thread states and runtime events
+ *****************************************************************************/
+
+/*---------------------
+ * runtime states
+ *---------------------*/
+
+typedef enum {
+#define ompt_state_macro(state, code) state = code,
+    FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
+} ompt_state_t;
+
+
+/*---------------------
+ * runtime events
+ *---------------------*/
+
+typedef enum {
+#define ompt_event_macro(event, callback, eventid) event = eventid,
+    FOREACH_OMPT_EVENT(ompt_event_macro)
+#undef ompt_event_macro
+} ompt_event_t;
+
+
+/*---------------------
+ * set callback results
+ *---------------------*/
+typedef enum {
+    ompt_set_result_registration_error              = 0,
+    ompt_set_result_event_may_occur_no_callback     = 1,
+    ompt_set_result_event_never_occurs              = 2,
+    ompt_set_result_event_may_occur_callback_some   = 3,
+    ompt_set_result_event_may_occur_callback_always = 4,
+} ompt_set_result_t;
+
+
+
+/*****************************************************************************
+ * callback signatures
+ *****************************************************************************/
+
+/* initialization */
+typedef void (*ompt_interface_fn_t)(void);
+
+typedef ompt_interface_fn_t (*ompt_function_lookup_t)(
+    const char *                      /* entry point to look up       */
+);
+
+/* threads */
+typedef void (*ompt_thread_callback_t) (
+    ompt_thread_id_t thread_id        /* ID of thread                 */
+);
+
+typedef enum {
+    ompt_thread_initial = 1, // start the enumeration at 1
+    ompt_thread_worker  = 2,
+    ompt_thread_other   = 3
+} ompt_thread_type_t;
+
+typedef enum {
+    ompt_invoker_program = 0,         /* program invokes master task  */
+    ompt_invoker_runtime = 1          /* runtime invokes master task  */
+} ompt_invoker_t;
+
+typedef void (*ompt_thread_type_callback_t) (
+    ompt_thread_type_t thread_type,   /* type of thread               */
+    ompt_thread_id_t thread_id        /* ID of thread                 */
+);
+
+typedef void (*ompt_wait_callback_t) (
+    ompt_wait_id_t wait_id            /* wait id                      */
+);
+
+/* parallel and workshares */
+typedef void (*ompt_parallel_callback_t) (
+    ompt_parallel_id_t parallel_id,    /* id of parallel region       */
+    ompt_task_id_t task_id             /* id of task                  */
+);
+
+typedef void (*ompt_new_workshare_callback_t) (
+    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    void *workshare_function          /* pointer to outlined function */
+);
+
+typedef void (*ompt_new_parallel_callback_t) (
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */
+    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
+    uint32_t requested_team_size,     /* number of threads in team    */
+    void *parallel_function,          /* pointer to outlined function */
+    ompt_invoker_t invoker            /* who invokes master task?     */
+);
+
+typedef void (*ompt_end_parallel_callback_t) (
+    ompt_parallel_id_t parallel_id,   /* id of parallel region       */
+    ompt_task_id_t task_id,           /* id of task                  */
+    ompt_invoker_t invoker            /* who invokes master task?    */ 
+);
+
+/* tasks */
+typedef void (*ompt_task_callback_t) (
+    ompt_task_id_t task_id            /* id of task                   */
+);
+
+typedef void (*ompt_task_pair_callback_t) (
+    ompt_task_id_t first_task_id,
+    ompt_task_id_t second_task_id
+);
+
+typedef void (*ompt_new_task_callback_t) (
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */
+    ompt_task_id_t  new_task_id,      /* id of created task           */
+    void *task_function               /* pointer to outlined function */
+);
+
+/* program */
+typedef void (*ompt_control_callback_t) (
+    uint64_t command,                 /* command of control call      */
+    uint64_t modifier                 /* modifier of control call     */
+);
+
+typedef void (*ompt_callback_t)(void);
+
+
+/****************************************************************************
+ * ompt API
+ ***************************************************************************/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define OMPT_API_FNTYPE(fn) fn##_t
+
+#define OMPT_API_FUNCTION(return_type, fn, args)  \
+    typedef return_type (*OMPT_API_FNTYPE(fn)) args
+
+
+
+/****************************************************************************
+ * INQUIRY FUNCTIONS
+ ***************************************************************************/
+
+/* state */
+OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
+    ompt_wait_id_t *ompt_wait_id
+));
+
+/* thread */
+OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void));
+
+OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void));
+
+/* parallel region */
+OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, (
+    int ancestor_level
+));
+
+OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, (
+    int ancestor_level
+));
+
+/* task */
+OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, (
+    int depth
+));
+
+OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, (
+    int depth
+));
+
+
+
+/****************************************************************************
+ * PLACEHOLDERS FOR PERFORMANCE REPORTING
+ ***************************************************************************/
+
+/* idle */
+OMPT_API_FUNCTION(void, ompt_idle, (
+    void
+));
+
+/* overhead */
+OMPT_API_FUNCTION(void, ompt_overhead, (
+    void
+));
+
+/* barrier wait */
+OMPT_API_FUNCTION(void, ompt_barrier_wait, (
+    void
+));
+
+/* task wait */
+OMPT_API_FUNCTION(void, ompt_task_wait, (
+    void
+));
+
+/* mutex wait */
+OMPT_API_FUNCTION(void, ompt_mutex_wait, (
+    void
+));
+
+
+
+/****************************************************************************
+ * INITIALIZATION FUNCTIONS
+ ***************************************************************************/
+
+OMPT_API_FUNCTION(void, ompt_initialize, (
+    ompt_function_lookup_t ompt_fn_lookup,
+    const char *runtime_version,
+    unsigned int ompt_version
+));
+
+
+/* initialization interface to be defined by tool */
+ompt_initialize_t ompt_tool(void);
+
+typedef enum opt_init_mode_e {
+    ompt_init_mode_never  = 0,
+    ompt_init_mode_false  = 1,
+    ompt_init_mode_true   = 2,
+    ompt_init_mode_always = 3
+} ompt_init_mode_t;
+
+OMPT_API_FUNCTION(int, ompt_set_callback, (
+    ompt_event_t event,
+    ompt_callback_t callback
+));
+
+typedef enum ompt_set_callback_rc_e {  /* non-standard */
+    ompt_set_callback_error      = 0,
+    ompt_has_event_no_callback   = 1,
+    ompt_no_event_no_callback    = 2,
+    ompt_has_event_may_callback  = 3,
+    ompt_has_event_must_callback = 4,
+} ompt_set_callback_rc_t;
+
+
+OMPT_API_FUNCTION(int, ompt_get_callback, (
+    ompt_event_t event,
+    ompt_callback_t *callback
+));
+
+
+
+/****************************************************************************
+ * MISCELLANEOUS FUNCTIONS
+ ***************************************************************************/
+
+/* control */
+#if defined(_OPENMP) && (_OPENMP >= 201307)
+#pragma omp declare target
+#endif
+void ompt_control(
+    uint64_t command,
+    uint64_t modifier
+);
+#if defined(_OPENMP) && (_OPENMP >= 201307)
+#pragma omp end declare target
+#endif
+
+/* state enumeration */
+OMPT_API_FUNCTION(int, ompt_enumerate_state, (
+    int current_state,
+    int *next_state,
+    const char **next_state_name
+));
+
+#ifdef  __cplusplus
+};
+#endif
+
+#endif
+
diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp.h.var b/contrib/libs/cxxsupp/openmp/include/41/omp.h.var
index 9e7c871a78..6d9fa43810 100644
--- a/contrib/libs/cxxsupp/openmp/include/41/omp.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/41/omp.h.var
@@ -1,176 +1,176 @@
-/* 
- * include/41/omp.h.var 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef __OMP_H 
-#   define __OMP_H 
- 
-#   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@ 
-#   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@ 
-#   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@ 
-#   define KMP_BUILD_DATE       "@LIBOMP_BUILD_DATE@" 
- 
-#   ifdef __cplusplus 
-    extern "C" { 
-#   endif 
- 
-#   if defined(_WIN32) 
-#       define __KAI_KMPC_CONVENTION __cdecl 
-#   else 
-#       define __KAI_KMPC_CONVENTION 
-#   endif 
- 
-    /* schedule kind constants */ 
-    typedef enum omp_sched_t { 
-	omp_sched_static  = 1, 
-	omp_sched_dynamic = 2, 
-	omp_sched_guided  = 3, 
-	omp_sched_auto    = 4 
-    } omp_sched_t; 
- 
-    /* set API functions */ 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int); 
- 
-    /* query API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void); 
-    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *); 
- 
-    /* lock API functions */ 
-    typedef struct omp_lock_t { 
-        void * _lk; 
-    } omp_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *); 
- 
-    /* nested lock API functions */ 
-    typedef struct omp_nest_lock_t { 
-        void * _lk; 
-    } omp_nest_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *); 
- 
-    /* lock hint type for dynamic user lock */ 
-    typedef enum omp_lock_hint_t { 
-        omp_lock_hint_none           = 0, 
-        omp_lock_hint_uncontended    = 1, 
-        omp_lock_hint_contended      = (1<<1 ), 
-        omp_lock_hint_nonspeculative = (1<<2 ), 
-        omp_lock_hint_speculative    = (1<<3 ), 
-        kmp_lock_hint_hle            = (1<<16), 
-        kmp_lock_hint_rtm            = (1<<17), 
-        kmp_lock_hint_adaptive       = (1<<18) 
-    } omp_lock_hint_t; 
- 
-    /* hinted lock initializers */ 
-    extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); 
-    extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); 
- 
-    /* time API functions */ 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void); 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void); 
- 
-    /* OpenMP 4.0 */ 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_default_device (void); 
-    extern void __KAI_KMPC_CONVENTION  omp_set_default_device (int); 
-    extern int  __KAI_KMPC_CONVENTION  omp_is_initial_device (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void); 
- 
-#   include <stdlib.h> 
-    /* kmp API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int); 
-    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *); 
- 
-    /* Intel affinity API */ 
-    typedef void * kmp_affinity_mask_t; 
- 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
- 
-    /* OpenMP 4.0 affinity API */ 
-    typedef enum omp_proc_bind_t { 
-        omp_proc_bind_false = 0, 
-        omp_proc_bind_true = 1, 
-        omp_proc_bind_master = 2, 
-        omp_proc_bind_close = 3, 
-        omp_proc_bind_spread = 4 
-    } omp_proc_bind_t; 
- 
-    extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); 
- 
-    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *); 
- 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void); 
- 
-#   undef __KAI_KMPC_CONVENTION 
- 
-    /* Warning: 
-       The following typedefs are not standard, deprecated and will be removed in a future release. 
-    */ 
-    typedef int     omp_int_t; 
-    typedef double  omp_wtime_t; 
- 
-#   ifdef __cplusplus 
-    } 
-#   endif 
- 
-#endif /* __OMP_H */ 
- 
+/*
+ * include/41/omp.h.var
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef __OMP_H
+#   define __OMP_H
+
+#   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@
+#   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@
+#   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@
+#   define KMP_BUILD_DATE       "@LIBOMP_BUILD_DATE@"
+
+#   ifdef __cplusplus
+    extern "C" {
+#   endif
+
+#   if defined(_WIN32)
+#       define __KAI_KMPC_CONVENTION __cdecl
+#   else
+#       define __KAI_KMPC_CONVENTION
+#   endif
+
+    /* schedule kind constants */
+    typedef enum omp_sched_t {
+	omp_sched_static  = 1,
+	omp_sched_dynamic = 2,
+	omp_sched_guided  = 3,
+	omp_sched_auto    = 4
+    } omp_sched_t;
+
+    /* set API functions */
+    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int);
+
+    /* query API functions */
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void);
+    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *);
+
+    /* lock API functions */
+    typedef struct omp_lock_t {
+        void * _lk;
+    } omp_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *);
+
+    /* nested lock API functions */
+    typedef struct omp_nest_lock_t {
+        void * _lk;
+    } omp_nest_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *);
+
+    /* lock hint type for dynamic user lock */
+    typedef enum omp_lock_hint_t {
+        omp_lock_hint_none           = 0,
+        omp_lock_hint_uncontended    = 1,
+        omp_lock_hint_contended      = (1<<1 ),
+        omp_lock_hint_nonspeculative = (1<<2 ),
+        omp_lock_hint_speculative    = (1<<3 ),
+        kmp_lock_hint_hle            = (1<<16),
+        kmp_lock_hint_rtm            = (1<<17),
+        kmp_lock_hint_adaptive       = (1<<18)
+    } omp_lock_hint_t;
+
+    /* hinted lock initializers */
+    extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);
+    extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);
+
+    /* time API functions */
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void);
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void);
+
+    /* OpenMP 4.0 */
+    extern int  __KAI_KMPC_CONVENTION  omp_get_default_device (void);
+    extern void __KAI_KMPC_CONVENTION  omp_set_default_device (int);
+    extern int  __KAI_KMPC_CONVENTION  omp_is_initial_device (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void);
+
+#   include <stdlib.h>
+    /* kmp API functions */
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int);
+    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *);
+
+    /* Intel affinity API */
+    typedef void * kmp_affinity_mask_t;
+
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *);
+    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+
+    /* OpenMP 4.0 affinity API */
+    typedef enum omp_proc_bind_t {
+        omp_proc_bind_false = 0,
+        omp_proc_bind_true = 1,
+        omp_proc_bind_master = 2,
+        omp_proc_bind_close = 3,
+        omp_proc_bind_spread = 4
+    } omp_proc_bind_t;
+
+    extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);
+
+    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t);
+    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *);
+
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
+
+#   undef __KAI_KMPC_CONVENTION
+
+    /* Warning:
+       The following typedefs are not standard, deprecated and will be removed in a future release.
+    */
+    typedef int     omp_int_t;
+    typedef double  omp_wtime_t;
+
+#   ifdef __cplusplus
+    }
+#   endif
+
+#endif /* __OMP_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var
index 49eb401df4..c801908cd4 100644
--- a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var
+++ b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f.var
@@ -1,788 +1,788 @@
-! include/41/omp_lib.f.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-!*** 
-!*** Some of the directives for the following routine extend past column 72, 
-!*** so process this file in 132-column mode. 
-!*** 
- 
-!dec$ fixedformlinesize:132 
- 
-      module omp_lib_kinds 
- 
-        integer, parameter :: omp_integer_kind       = 4 
-        integer, parameter :: omp_logical_kind       = 4 
-        integer, parameter :: omp_real_kind          = 4 
-        integer, parameter :: omp_lock_kind          = int_ptr_kind() 
-        integer, parameter :: omp_nest_lock_kind     = int_ptr_kind() 
-        integer, parameter :: omp_sched_kind         = omp_integer_kind 
-        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind 
-        integer, parameter :: kmp_pointer_kind       = int_ptr_kind() 
-        integer, parameter :: kmp_size_t_kind        = int_ptr_kind() 
-        integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() 
-        integer, parameter :: kmp_cancel_kind        = omp_integer_kind 
-        integer, parameter :: omp_lock_hint_kind     = omp_integer_kind 
- 
-      end module omp_lib_kinds 
- 
-      module omp_lib 
- 
-        use omp_lib_kinds 
- 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-        character(*), parameter :: kmp_build_date    = '@LIBOMP_BUILD_DATE@' 
-        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
- 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 
- 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 
- 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none           = 0 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended    = 1 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended      = 2 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative    = 8 
-        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle            = 65536 
-        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072 
-        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144 
- 
-        interface 
- 
-!         *** 
-!         *** omp_* entry points 
-!         *** 
- 
-          subroutine omp_set_num_threads(nthreads) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) nthreads 
-          end subroutine omp_set_num_threads 
- 
-          subroutine omp_set_dynamic(enable) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) enable 
-          end subroutine omp_set_dynamic 
- 
-          subroutine omp_set_nested(enable) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) enable 
-          end subroutine omp_set_nested 
- 
-          function omp_get_num_threads() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_threads 
-          end function omp_get_num_threads 
- 
-          function omp_get_max_threads() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_threads 
-          end function omp_get_max_threads 
- 
-          function omp_get_thread_num() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_num 
-          end function omp_get_thread_num 
- 
-          function omp_get_num_procs() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_procs 
-          end function omp_get_num_procs 
- 
-          function omp_in_parallel() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_parallel 
-          end function omp_in_parallel 
- 
-          function omp_get_dynamic() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_dynamic 
-          end function omp_get_dynamic 
- 
-          function omp_get_nested() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_nested 
-          end function omp_get_nested 
- 
-          function omp_get_thread_limit() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_limit 
-          end function omp_get_thread_limit 
- 
-          subroutine omp_set_max_active_levels(max_levels) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) max_levels 
-          end subroutine omp_set_max_active_levels 
- 
-          function omp_get_max_active_levels() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_active_levels 
-          end function omp_get_max_active_levels 
- 
-          function omp_get_level() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_level 
-          end function omp_get_level 
- 
-          function omp_get_active_level() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_active_level 
-          end function omp_get_active_level 
- 
-          function omp_get_ancestor_thread_num(level) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) level 
-            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-          end function omp_get_ancestor_thread_num 
- 
-          function omp_get_team_size(level) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) level 
-            integer (kind=omp_integer_kind) omp_get_team_size 
-          end function omp_get_team_size 
- 
-          subroutine omp_set_schedule(kind, modifier) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_set_schedule 
- 
-          subroutine omp_get_schedule(kind, modifier) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_get_schedule 
- 
-          function omp_get_proc_bind() 
-            use omp_lib_kinds 
-            integer (kind=omp_proc_bind_kind) omp_get_proc_bind 
-          end function omp_get_proc_bind 
- 
-          function omp_get_wtime() 
-            double precision omp_get_wtime 
-          end function omp_get_wtime 
- 
-          function omp_get_wtick () 
-            double precision omp_get_wtick 
-          end function omp_get_wtick 
- 
-          function omp_get_default_device() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_default_device 
-          end function omp_get_default_device 
- 
-          subroutine omp_set_default_device(dflt_device) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) dflt_device 
-          end subroutine omp_set_default_device 
- 
-          function omp_get_num_devices() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_devices 
-          end function omp_get_num_devices 
- 
-          function omp_get_num_teams() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_teams 
-          end function omp_get_num_teams 
- 
-          function omp_get_team_num() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_num 
-          end function omp_get_team_num 
- 
-          function omp_get_cancellation() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_cancellation 
-          end function omp_get_cancellation 
- 
-          function omp_is_initial_device() 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_is_initial_device 
-          end function omp_is_initial_device 
- 
-          subroutine omp_init_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_init_lock 
- 
-          subroutine omp_destroy_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_destroy_lock 
- 
-          subroutine omp_set_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_set_lock 
- 
-          subroutine omp_unset_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_unset_lock 
- 
-          function omp_test_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_test_lock 
-            integer (kind=omp_lock_kind) lockvar 
-          end function omp_test_lock 
- 
-          subroutine omp_init_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_init_nest_lock 
- 
-          subroutine omp_destroy_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_destroy_nest_lock 
- 
-          subroutine omp_set_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_set_nest_lock 
- 
-          subroutine omp_unset_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_unset_nest_lock 
- 
-          function omp_test_nest_lock(lockvar) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_test_nest_lock 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end function omp_test_nest_lock 
- 
-!         *** 
-!         *** kmp_* entry points 
-!         *** 
- 
-          subroutine kmp_set_stacksize(size) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) size 
-          end subroutine kmp_set_stacksize 
- 
-          subroutine kmp_set_stacksize_s(size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) size 
-          end subroutine kmp_set_stacksize_s 
- 
-          subroutine kmp_set_blocktime(msec) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) msec 
-          end subroutine kmp_set_blocktime 
- 
-          subroutine kmp_set_library_serial() 
-          end subroutine kmp_set_library_serial 
- 
-          subroutine kmp_set_library_turnaround() 
-          end subroutine kmp_set_library_turnaround 
- 
-          subroutine kmp_set_library_throughput() 
-          end subroutine kmp_set_library_throughput 
- 
-          subroutine kmp_set_library(libnum) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) libnum 
-          end subroutine kmp_set_library 
- 
-          subroutine kmp_set_defaults(string) 
-            character*(*) string 
-          end subroutine kmp_set_defaults 
- 
-          function kmp_get_stacksize() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_stacksize 
-          end function kmp_get_stacksize 
- 
-          function kmp_get_stacksize_s() 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-          end function kmp_get_stacksize_s 
- 
-          function kmp_get_blocktime() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_blocktime 
-          end function kmp_get_blocktime 
- 
-          function kmp_get_library() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_library 
-          end function kmp_get_library 
- 
-          function kmp_set_affinity(mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity 
- 
-          function kmp_get_affinity(mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity 
- 
-          function kmp_get_affinity_max_proc() 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-          end function kmp_get_affinity_max_proc 
- 
-          subroutine kmp_create_affinity_mask(mask) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_create_affinity_mask 
- 
-          subroutine kmp_destroy_affinity_mask(mask) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_destroy_affinity_mask 
- 
-          function kmp_set_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity_mask_proc 
- 
-          function kmp_unset_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_unset_affinity_mask_proc 
- 
-          function kmp_get_affinity_mask_proc(proc, mask) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-            integer (kind=omp_integer_kind) proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity_mask_proc 
- 
-          function kmp_malloc(size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_malloc 
-            integer (kind=kmp_size_t_kind) size 
-          end function kmp_malloc 
- 
-          function kmp_calloc(nelem, elsize) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_calloc 
-            integer (kind=kmp_size_t_kind) nelem 
-            integer (kind=kmp_size_t_kind) elsize 
-          end function kmp_calloc 
- 
-          function kmp_realloc(ptr, size) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_realloc 
-            integer (kind=kmp_pointer_kind) ptr 
-            integer (kind=kmp_size_t_kind) size 
-          end function kmp_realloc 
- 
-          subroutine kmp_free(ptr) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) ptr 
-          end subroutine kmp_free 
- 
-          subroutine kmp_set_warnings_on() 
-          end subroutine kmp_set_warnings_on 
- 
-          subroutine kmp_set_warnings_off() 
-          end subroutine kmp_set_warnings_off 
- 
-          function kmp_get_cancellation_status(cancelkind) 
-            use omp_lib_kinds 
-            integer (kind=kmp_cancel_kind) cancelkind 
-            logical (kind=omp_logical_kind) kmp_get_cancellation_status 
-          end function kmp_get_cancellation_status 
- 
-          subroutine kmp_init_lock_with_hint(lockvar, lockhint) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-            integer (kind=omp_lock_hint_kind) lockhint 
-          end subroutine kmp_init_lock_with_hint 
- 
-          subroutine kmp_init_nest_lock_with_hint(lockvar, lockhint) 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-            integer (kind=omp_lock_hint_kind) lockhint 
-          end subroutine kmp_init_nest_lock_with_hint 
- 
-        end interface 
- 
-!dec$ if defined(_WIN32) 
-!dec$   if defined(_WIN64) .or. defined(_M_AMD64) 
- 
-!*** 
-!*** The Fortran entry points must be in uppercase, even if the /Qlowercase 
-!*** option is specified.  The alias attribute ensures that the specified 
-!*** string is used as the entry point. 
-!*** 
-!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an 
-!*** underscore prepended.  On the Windows* OS Intel(R) 64 
-!*** architecture, no underscore is prepended. 
-!*** 
- 
-!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads 
-!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic 
-!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested 
-!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads 
-!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads 
-!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num 
-!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs 
-!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel 
-!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic 
-!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested 
-!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit 
-!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels 
-!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels 
-!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level 
-!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level 
-!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num 
-!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size 
-!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule 
-!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule 
-!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind 
-!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime 
-!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick 
-!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device 
-!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device 
-!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices 
-!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams 
-!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num 
-!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation 
-!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device 
- 
-!dec$ attributes alias:'omp_init_lock' :: omp_init_lock 
-!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint 
-!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock' :: omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock' :: omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock 
-!dec$ attributes alias:'omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint 
-!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock 
- 
-!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status 
- 
-!dec$   else 
- 
-!*** 
-!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads 
-!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic 
-!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested 
-!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads 
-!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads 
-!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num 
-!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs 
-!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel 
-!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic 
-!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested 
-!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit 
-!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels 
-!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level 
-!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level 
-!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size 
-!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule 
-!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule 
-!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind 
-!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime 
-!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick 
-!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device 
-!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device 
-!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices 
-!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams 
-!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num 
-!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation 
-!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device 
- 
-!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock 
-!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint 
-!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock 
-!dec$ attributes alias:'_omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint 
-!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock 
- 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize 
-!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput 
-!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize 
-!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime 
-!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library 
-!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc 
-!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc 
-!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc 
-!dec$ attributes alias:'_KMP_FREE'::kmp_free 
- 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on 
-!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off 
- 
-!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status 
- 
-!dec$   endif 
-!dec$ endif 
- 
-!dec$ if defined(__linux) 
- 
-!*** 
-!*** The Linux* OS entry points are in lowercase, with an underscore appended. 
-!*** 
- 
-!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind 
-!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick 
-!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device 
-!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device 
-!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices 
-!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams 
-!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num 
-!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation 
-!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device 
- 
-!dec$ attributes alias:'omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint 
-!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint 
-!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off 
-!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status 
- 
-!dec$ endif 
- 
-!dec$ if defined(__APPLE__) 
- 
-!*** 
-!*** The Mac entry points are in lowercase, with an both an underscore 
-!*** appended and an underscore prepended. 
-!*** 
- 
-!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads 
-!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic 
-!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested 
-!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads 
-!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads 
-!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num 
-!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs 
-!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel 
-!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic 
-!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested 
-!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit 
-!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels 
-!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels 
-!dec$ attributes alias:'_omp_get_level_'::omp_get_level 
-!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level 
-!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num 
-!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size 
-!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule 
-!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule 
-!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind 
-!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime 
-!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick 
-!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams 
-!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num 
-!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation 
-!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device 
- 
-!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock 
-!dec$ attributes alias:'_omp_init_lock_with_hint_'::omp_init_lock_with_hint 
-!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock 
-!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock 
-!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock 
-!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock 
-!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock 
-!dec$ attributes alias:'_omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint 
-!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock 
-!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock 
-!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock 
-!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock 
- 
-!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize 
-!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s 
-!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime 
-!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial 
-!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround 
-!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput 
-!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library 
-!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize 
-!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s 
-!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime 
-!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library 
-!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity 
-!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc 
-!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask 
-!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask 
-!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc 
-!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc 
-!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc 
-!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc 
-!dec$ attributes alias:'_kmp_free_'::kmp_free 
- 
-!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on 
-!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off 
- 
-!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status 
- 
-!dec$ endif 
- 
-      end module omp_lib 
- 
+! include/41/omp_lib.f.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+!***
+!*** Some of the directives for the following routine extend past column 72,
+!*** so process this file in 132-column mode.
+!***
+
+!dec$ fixedformlinesize:132
+
+      module omp_lib_kinds
+
+        integer, parameter :: omp_integer_kind       = 4
+        integer, parameter :: omp_logical_kind       = 4
+        integer, parameter :: omp_real_kind          = 4
+        integer, parameter :: omp_lock_kind          = int_ptr_kind()
+        integer, parameter :: omp_nest_lock_kind     = int_ptr_kind()
+        integer, parameter :: omp_sched_kind         = omp_integer_kind
+        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind
+        integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
+        integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
+        integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+        integer, parameter :: kmp_cancel_kind        = omp_integer_kind
+        integer, parameter :: omp_lock_hint_kind     = omp_integer_kind
+
+      end module omp_lib_kinds
+
+      module omp_lib
+
+        use omp_lib_kinds
+
+        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+        character(*), parameter :: kmp_build_date    = '@LIBOMP_BUILD_DATE@'
+        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+
+        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none           = 0
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended    = 1
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended      = 2
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative    = 8
+        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle            = 65536
+        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072
+        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144
+
+        interface
+
+!         ***
+!         *** omp_* entry points
+!         ***
+
+          subroutine omp_set_num_threads(nthreads)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) nthreads
+          end subroutine omp_set_num_threads
+
+          subroutine omp_set_dynamic(enable)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) enable
+          end subroutine omp_set_dynamic
+
+          subroutine omp_set_nested(enable)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) enable
+          end subroutine omp_set_nested
+
+          function omp_get_num_threads()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_threads
+          end function omp_get_num_threads
+
+          function omp_get_max_threads()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_threads
+          end function omp_get_max_threads
+
+          function omp_get_thread_num()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_num
+          end function omp_get_thread_num
+
+          function omp_get_num_procs()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_procs
+          end function omp_get_num_procs
+
+          function omp_in_parallel()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_parallel
+          end function omp_in_parallel
+
+          function omp_get_dynamic()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_dynamic
+          end function omp_get_dynamic
+
+          function omp_get_nested()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_nested
+          end function omp_get_nested
+
+          function omp_get_thread_limit()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_limit
+          end function omp_get_thread_limit
+
+          subroutine omp_set_max_active_levels(max_levels)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) max_levels
+          end subroutine omp_set_max_active_levels
+
+          function omp_get_max_active_levels()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_active_levels
+          end function omp_get_max_active_levels
+
+          function omp_get_level()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_level
+          end function omp_get_level
+
+          function omp_get_active_level()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_active_level
+          end function omp_get_active_level
+
+          function omp_get_ancestor_thread_num(level)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) level
+            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+          end function omp_get_ancestor_thread_num
+
+          function omp_get_team_size(level)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) level
+            integer (kind=omp_integer_kind) omp_get_team_size
+          end function omp_get_team_size
+
+          subroutine omp_set_schedule(kind, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_set_schedule
+
+          subroutine omp_get_schedule(kind, modifier)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_get_schedule
+
+          function omp_get_proc_bind()
+            use omp_lib_kinds
+            integer (kind=omp_proc_bind_kind) omp_get_proc_bind
+          end function omp_get_proc_bind
+
+          function omp_get_wtime()
+            double precision omp_get_wtime
+          end function omp_get_wtime
+
+          function omp_get_wtick ()
+            double precision omp_get_wtick
+          end function omp_get_wtick
+
+          function omp_get_default_device()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_default_device
+          end function omp_get_default_device
+
+          subroutine omp_set_default_device(dflt_device)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) dflt_device
+          end subroutine omp_set_default_device
+
+          function omp_get_num_devices()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_devices
+          end function omp_get_num_devices
+
+          function omp_get_num_teams()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_teams
+          end function omp_get_num_teams
+
+          function omp_get_team_num()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_num
+          end function omp_get_team_num
+
+          function omp_get_cancellation()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_cancellation
+          end function omp_get_cancellation
+
+          function omp_is_initial_device()
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_is_initial_device
+          end function omp_is_initial_device
+
+          subroutine omp_init_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_init_lock
+
+          subroutine omp_destroy_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_destroy_lock
+
+          subroutine omp_set_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_set_lock
+
+          subroutine omp_unset_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_unset_lock
+
+          function omp_test_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_test_lock
+            integer (kind=omp_lock_kind) lockvar
+          end function omp_test_lock
+
+          subroutine omp_init_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_init_nest_lock
+
+          subroutine omp_destroy_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_destroy_nest_lock
+
+          subroutine omp_set_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_set_nest_lock
+
+          subroutine omp_unset_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_unset_nest_lock
+
+          function omp_test_nest_lock(lockvar)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_test_nest_lock
+            integer (kind=omp_nest_lock_kind) lockvar
+          end function omp_test_nest_lock
+
+!         ***
+!         *** kmp_* entry points
+!         ***
+
+          subroutine kmp_set_stacksize(size)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) size
+          end subroutine kmp_set_stacksize
+
+          subroutine kmp_set_stacksize_s(size)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) size
+          end subroutine kmp_set_stacksize_s
+
+          subroutine kmp_set_blocktime(msec)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) msec
+          end subroutine kmp_set_blocktime
+
+          subroutine kmp_set_library_serial()
+          end subroutine kmp_set_library_serial
+
+          subroutine kmp_set_library_turnaround()
+          end subroutine kmp_set_library_turnaround
+
+          subroutine kmp_set_library_throughput()
+          end subroutine kmp_set_library_throughput
+
+          subroutine kmp_set_library(libnum)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) libnum
+          end subroutine kmp_set_library
+
+          subroutine kmp_set_defaults(string)
+            character*(*) string
+          end subroutine kmp_set_defaults
+
+          function kmp_get_stacksize()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_stacksize
+          end function kmp_get_stacksize
+
+          function kmp_get_stacksize_s()
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+          end function kmp_get_stacksize_s
+
+          function kmp_get_blocktime()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_blocktime
+          end function kmp_get_blocktime
+
+          function kmp_get_library()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_library
+          end function kmp_get_library
+
+          function kmp_set_affinity(mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity
+
+          function kmp_get_affinity(mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity
+
+          function kmp_get_affinity_max_proc()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+          end function kmp_get_affinity_max_proc
+
+          subroutine kmp_create_affinity_mask(mask)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_create_affinity_mask
+
+          subroutine kmp_destroy_affinity_mask(mask)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_destroy_affinity_mask
+
+          function kmp_set_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity_mask_proc
+
+          function kmp_unset_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_unset_affinity_mask_proc
+
+          function kmp_get_affinity_mask_proc(proc, mask)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+            integer (kind=omp_integer_kind) proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity_mask_proc
+
+          function kmp_malloc(size)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_malloc
+            integer (kind=kmp_size_t_kind) size
+          end function kmp_malloc
+
+          function kmp_calloc(nelem, elsize)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_calloc
+            integer (kind=kmp_size_t_kind) nelem
+            integer (kind=kmp_size_t_kind) elsize
+          end function kmp_calloc
+
+          function kmp_realloc(ptr, size)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_realloc
+            integer (kind=kmp_pointer_kind) ptr
+            integer (kind=kmp_size_t_kind) size
+          end function kmp_realloc
+
+          subroutine kmp_free(ptr)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) ptr
+          end subroutine kmp_free
+
+          subroutine kmp_set_warnings_on()
+          end subroutine kmp_set_warnings_on
+
+          subroutine kmp_set_warnings_off()
+          end subroutine kmp_set_warnings_off
+
+          function kmp_get_cancellation_status(cancelkind)
+            use omp_lib_kinds
+            integer (kind=kmp_cancel_kind) cancelkind
+            logical (kind=omp_logical_kind) kmp_get_cancellation_status
+          end function kmp_get_cancellation_status
+
+          subroutine kmp_init_lock_with_hint(lockvar, lockhint)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+            integer (kind=omp_lock_hint_kind) lockhint
+          end subroutine kmp_init_lock_with_hint
+
+          subroutine kmp_init_nest_lock_with_hint(lockvar, lockhint)
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+            integer (kind=omp_lock_hint_kind) lockhint
+          end subroutine kmp_init_nest_lock_with_hint
+
+        end interface
+
+!dec$ if defined(_WIN32)
+!dec$   if defined(_WIN64) .or. defined(_M_AMD64)
+
+!***
+!*** The Fortran entry points must be in uppercase, even if the /Qlowercase
+!*** option is specified.  The alias attribute ensures that the specified
+!*** string is used as the entry point.
+!***
+!*** On the Windows* OS IA-32 architecture, the Fortran entry points have an
+!*** underscore prepended.  On the Windows* OS Intel(R) 64
+!*** architecture, no underscore is prepended.
+!***
+
+!dec$ attributes alias:'OMP_SET_NUM_THREADS' :: omp_set_num_threads
+!dec$ attributes alias:'OMP_SET_DYNAMIC' :: omp_set_dynamic
+!dec$ attributes alias:'OMP_SET_NESTED' :: omp_set_nested
+!dec$ attributes alias:'OMP_GET_NUM_THREADS' :: omp_get_num_threads
+!dec$ attributes alias:'OMP_GET_MAX_THREADS' :: omp_get_max_threads
+!dec$ attributes alias:'OMP_GET_THREAD_NUM' :: omp_get_thread_num
+!dec$ attributes alias:'OMP_GET_NUM_PROCS' :: omp_get_num_procs
+!dec$ attributes alias:'OMP_IN_PARALLEL' :: omp_in_parallel
+!dec$ attributes alias:'OMP_GET_DYNAMIC' :: omp_get_dynamic
+!dec$ attributes alias:'OMP_GET_NESTED' :: omp_get_nested
+!dec$ attributes alias:'OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit
+!dec$ attributes alias:'OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels
+!dec$ attributes alias:'OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels
+!dec$ attributes alias:'OMP_GET_LEVEL' :: omp_get_level
+!dec$ attributes alias:'OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level
+!dec$ attributes alias:'OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num
+!dec$ attributes alias:'OMP_GET_TEAM_SIZE' :: omp_get_team_size
+!dec$ attributes alias:'OMP_SET_SCHEDULE' :: omp_set_schedule
+!dec$ attributes alias:'OMP_GET_SCHEDULE' :: omp_get_schedule
+!dec$ attributes alias:'OMP_GET_PROC_BIND' :: omp_get_proc_bind
+!dec$ attributes alias:'OMP_GET_WTIME' :: omp_get_wtime
+!dec$ attributes alias:'OMP_GET_WTICK' :: omp_get_wtick
+!dec$ attributes alias:'OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device
+!dec$ attributes alias:'OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device
+!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices
+!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams
+!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation
+!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
+
+!dec$ attributes alias:'omp_init_lock' :: omp_init_lock
+!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint
+!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock' :: omp_set_lock
+!dec$ attributes alias:'omp_unset_lock' :: omp_unset_lock
+!dec$ attributes alias:'omp_test_lock' :: omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock' :: omp_init_nest_lock
+!dec$ attributes alias:'omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint
+!dec$ attributes alias:'omp_destroy_nest_lock' :: omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock' :: omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock' :: omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock' :: omp_test_nest_lock
+
+!dec$ attributes alias:'KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
+!dec$   else
+
+!***
+!*** On Windows* OS IA-32 architecture, the Fortran entry points have an underscore prepended.
+!***
+
+!dec$ attributes alias:'_OMP_SET_NUM_THREADS' :: omp_set_num_threads
+!dec$ attributes alias:'_OMP_SET_DYNAMIC' :: omp_set_dynamic
+!dec$ attributes alias:'_OMP_SET_NESTED' :: omp_set_nested
+!dec$ attributes alias:'_OMP_GET_NUM_THREADS' :: omp_get_num_threads
+!dec$ attributes alias:'_OMP_GET_MAX_THREADS' :: omp_get_max_threads
+!dec$ attributes alias:'_OMP_GET_THREAD_NUM' :: omp_get_thread_num
+!dec$ attributes alias:'_OMP_GET_NUM_PROCS' :: omp_get_num_procs
+!dec$ attributes alias:'_OMP_IN_PARALLEL' :: omp_in_parallel
+!dec$ attributes alias:'_OMP_GET_DYNAMIC' :: omp_get_dynamic
+!dec$ attributes alias:'_OMP_GET_NESTED' :: omp_get_nested
+!dec$ attributes alias:'_OMP_GET_THREAD_LIMIT' :: omp_get_thread_limit
+!dec$ attributes alias:'_OMP_SET_MAX_ACTIVE_LEVELS' :: omp_set_max_active_levels
+!dec$ attributes alias:'_OMP_GET_MAX_ACTIVE_LEVELS' :: omp_get_max_active_levels
+!dec$ attributes alias:'_OMP_GET_LEVEL' :: omp_get_level
+!dec$ attributes alias:'_OMP_GET_ACTIVE_LEVEL' :: omp_get_active_level
+!dec$ attributes alias:'_OMP_GET_ANCESTOR_THREAD_NUM' :: omp_get_ancestor_thread_num
+!dec$ attributes alias:'_OMP_GET_TEAM_SIZE' :: omp_get_team_size
+!dec$ attributes alias:'_OMP_SET_SCHEDULE' :: omp_set_schedule
+!dec$ attributes alias:'_OMP_GET_SCHEDULE' :: omp_get_schedule
+!dec$ attributes alias:'_OMP_GET_PROC_BIND' :: omp_get_proc_bind
+!dec$ attributes alias:'_OMP_GET_WTIME' :: omp_get_wtime
+!dec$ attributes alias:'_OMP_GET_WTICK' :: omp_get_wtick
+!dec$ attributes alias:'_OMP_GET_DEFAULT_DEVICE' :: omp_get_default_device
+!dec$ attributes alias:'_OMP_SET_DEFAULT_DEVICE' :: omp_set_default_device
+!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices
+!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams
+!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation
+!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
+
+!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
+!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint
+!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock' :: omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock' :: omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock' :: omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock' :: omp_init_nest_lock
+!dec$ attributes alias:'_omp_init_nest_lock_with_hint' :: omp_init_nest_lock_with_hint
+!dec$ attributes alias:'_omp_destroy_nest_lock' :: omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock' :: omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock' :: omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock' :: omp_test_nest_lock
+
+!dec$ attributes alias:'_KMP_SET_STACKSIZE'::kmp_set_stacksize
+!dec$ attributes alias:'_KMP_SET_STACKSIZE_S'::kmp_set_stacksize_s
+!dec$ attributes alias:'_KMP_SET_BLOCKTIME'::kmp_set_blocktime
+!dec$ attributes alias:'_KMP_SET_LIBRARY_SERIAL'::kmp_set_library_serial
+!dec$ attributes alias:'_KMP_SET_LIBRARY_TURNAROUND'::kmp_set_library_turnaround
+!dec$ attributes alias:'_KMP_SET_LIBRARY_THROUGHPUT'::kmp_set_library_throughput
+!dec$ attributes alias:'_KMP_SET_LIBRARY'::kmp_set_library
+!dec$ attributes alias:'_KMP_GET_STACKSIZE'::kmp_get_stacksize
+!dec$ attributes alias:'_KMP_GET_STACKSIZE_S'::kmp_get_stacksize_s
+!dec$ attributes alias:'_KMP_GET_BLOCKTIME'::kmp_get_blocktime
+!dec$ attributes alias:'_KMP_GET_LIBRARY'::kmp_get_library
+!dec$ attributes alias:'_KMP_SET_AFFINITY'::kmp_set_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY'::kmp_get_affinity
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MAX_PROC'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_KMP_CREATE_AFFINITY_MASK'::kmp_create_affinity_mask
+!dec$ attributes alias:'_KMP_DESTROY_AFFINITY_MASK'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_KMP_SET_AFFINITY_MASK_PROC'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_KMP_UNSET_AFFINITY_MASK_PROC'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_KMP_GET_AFFINITY_MASK_PROC'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_KMP_MALLOC'::kmp_malloc
+!dec$ attributes alias:'_KMP_CALLOC'::kmp_calloc
+!dec$ attributes alias:'_KMP_REALLOC'::kmp_realloc
+!dec$ attributes alias:'_KMP_FREE'::kmp_free
+
+!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
+!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+
+!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
+!dec$   endif
+!dec$ endif
+
+!dec$ if defined(__linux)
+
+!***
+!*** The Linux* OS entry points are in lowercase, with an underscore appended.
+!***
+
+!dec$ attributes alias:'omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'omp_get_level_'::omp_get_level
+!dec$ attributes alias:'omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'omp_get_proc_bind_' :: omp_get_proc_bind
+!dec$ attributes alias:'omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'omp_get_wtick_'::omp_get_wtick
+!dec$ attributes alias:'omp_get_default_device_'::omp_get_default_device
+!dec$ attributes alias:'omp_set_default_device_'::omp_set_default_device
+!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices
+!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams
+!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation
+!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device
+
+!dec$ attributes alias:'omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint
+!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint
+!dec$ attributes alias:'omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'kmp_free_'::kmp_free
+
+!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
+!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status
+
+!dec$ endif
+
+!dec$ if defined(__APPLE__)
+
+!***
+!*** The Mac entry points are in lowercase, with an both an underscore
+!*** appended and an underscore prepended.
+!***
+
+!dec$ attributes alias:'_omp_set_num_threads_'::omp_set_num_threads
+!dec$ attributes alias:'_omp_set_dynamic_'::omp_set_dynamic
+!dec$ attributes alias:'_omp_set_nested_'::omp_set_nested
+!dec$ attributes alias:'_omp_get_num_threads_'::omp_get_num_threads
+!dec$ attributes alias:'_omp_get_max_threads_'::omp_get_max_threads
+!dec$ attributes alias:'_omp_get_thread_num_'::omp_get_thread_num
+!dec$ attributes alias:'_omp_get_num_procs_'::omp_get_num_procs
+!dec$ attributes alias:'_omp_in_parallel_'::omp_in_parallel
+!dec$ attributes alias:'_omp_get_dynamic_'::omp_get_dynamic
+!dec$ attributes alias:'_omp_get_nested_'::omp_get_nested
+!dec$ attributes alias:'_omp_get_thread_limit_'::omp_get_thread_limit
+!dec$ attributes alias:'_omp_set_max_active_levels_'::omp_set_max_active_levels
+!dec$ attributes alias:'_omp_get_max_active_levels_'::omp_get_max_active_levels
+!dec$ attributes alias:'_omp_get_level_'::omp_get_level
+!dec$ attributes alias:'_omp_get_active_level_'::omp_get_active_level
+!dec$ attributes alias:'_omp_get_ancestor_thread_num_'::omp_get_ancestor_thread_num
+!dec$ attributes alias:'_omp_get_team_size_'::omp_get_team_size
+!dec$ attributes alias:'_omp_set_schedule_'::omp_set_schedule
+!dec$ attributes alias:'_omp_get_schedule_'::omp_get_schedule
+!dec$ attributes alias:'_omp_get_proc_bind_' :: omp_get_proc_bind
+!dec$ attributes alias:'_omp_get_wtime_'::omp_get_wtime
+!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick
+!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams
+!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation
+!dec$ attributes alias:'_omp_is_initial_device_'::omp_is_initial_device
+
+!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock
+!dec$ attributes alias:'_omp_init_lock_with_hint_'::omp_init_lock_with_hint
+!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock
+!dec$ attributes alias:'_omp_set_lock_'::omp_set_lock
+!dec$ attributes alias:'_omp_unset_lock_'::omp_unset_lock
+!dec$ attributes alias:'_omp_test_lock_'::omp_test_lock
+!dec$ attributes alias:'_omp_init_nest_lock_'::omp_init_nest_lock
+!dec$ attributes alias:'_omp_init_nest_lock_with_hint_'::omp_init_nest_lock_with_hint
+!dec$ attributes alias:'_omp_destroy_nest_lock_'::omp_destroy_nest_lock
+!dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock
+!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
+!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
+
+!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
+!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
+!dec$ attributes alias:'_kmp_set_blocktime_'::kmp_set_blocktime
+!dec$ attributes alias:'_kmp_set_library_serial_'::kmp_set_library_serial
+!dec$ attributes alias:'_kmp_set_library_turnaround_'::kmp_set_library_turnaround
+!dec$ attributes alias:'_kmp_set_library_throughput_'::kmp_set_library_throughput
+!dec$ attributes alias:'_kmp_set_library_'::kmp_set_library
+!dec$ attributes alias:'_kmp_get_stacksize_'::kmp_get_stacksize
+!dec$ attributes alias:'_kmp_get_stacksize_s_'::kmp_get_stacksize_s
+!dec$ attributes alias:'_kmp_get_blocktime_'::kmp_get_blocktime
+!dec$ attributes alias:'_kmp_get_library_'::kmp_get_library
+!dec$ attributes alias:'_kmp_set_affinity_'::kmp_set_affinity
+!dec$ attributes alias:'_kmp_get_affinity_'::kmp_get_affinity
+!dec$ attributes alias:'_kmp_get_affinity_max_proc_'::kmp_get_affinity_max_proc
+!dec$ attributes alias:'_kmp_create_affinity_mask_'::kmp_create_affinity_mask
+!dec$ attributes alias:'_kmp_destroy_affinity_mask_'::kmp_destroy_affinity_mask
+!dec$ attributes alias:'_kmp_set_affinity_mask_proc_'::kmp_set_affinity_mask_proc
+!dec$ attributes alias:'_kmp_unset_affinity_mask_proc_'::kmp_unset_affinity_mask_proc
+!dec$ attributes alias:'_kmp_get_affinity_mask_proc_'::kmp_get_affinity_mask_proc
+!dec$ attributes alias:'_kmp_malloc_'::kmp_malloc
+!dec$ attributes alias:'_kmp_calloc_'::kmp_calloc
+!dec$ attributes alias:'_kmp_realloc_'::kmp_realloc
+!dec$ attributes alias:'_kmp_free_'::kmp_free
+
+!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on
+!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off
+
+!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status
+
+!dec$ endif
+
+      end module omp_lib
+
diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var
index 7066ee41c5..2d23667b10 100644
--- a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var
+++ b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.f90.var
@@ -1,470 +1,470 @@
-! include/41/omp_lib.f90.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-      module omp_lib_kinds 
- 
-        use, intrinsic :: iso_c_binding 
- 
-        integer, parameter :: omp_integer_kind       = c_int 
-        integer, parameter :: omp_logical_kind       = 4 
-        integer, parameter :: omp_real_kind          = c_float 
-        integer, parameter :: kmp_double_kind        = c_double 
-        integer, parameter :: omp_lock_kind          = c_intptr_t 
-        integer, parameter :: omp_nest_lock_kind     = c_intptr_t 
-        integer, parameter :: omp_sched_kind         = omp_integer_kind 
-        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind 
-        integer, parameter :: kmp_pointer_kind       = c_intptr_t 
-        integer, parameter :: kmp_size_t_kind        = c_size_t 
-        integer, parameter :: kmp_affinity_mask_kind = c_intptr_t 
-        integer, parameter :: kmp_cancel_kind        = omp_integer_kind 
-        integer, parameter :: omp_lock_hint_kind     = omp_integer_kind 
- 
-      end module omp_lib_kinds 
- 
-      module omp_lib 
- 
-        use omp_lib_kinds 
- 
-        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-        character(*)               kmp_build_date 
-        parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) 
- 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
- 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 
-        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 
- 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 
-        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 
- 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none           = 0 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended    = 1 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended      = 2 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 
-        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative    = 8 
-        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle            = 65536 
-        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072 
-        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144 
- 
-        interface 
- 
-!         *** 
-!         *** omp_* entry points 
-!         *** 
- 
-          subroutine omp_set_num_threads(nthreads) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: nthreads 
-          end subroutine omp_set_num_threads 
- 
-          subroutine omp_set_dynamic(enable) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind), value :: enable 
-          end subroutine omp_set_dynamic 
- 
-          subroutine omp_set_nested(enable) bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind), value :: enable 
-          end subroutine omp_set_nested 
- 
-          function omp_get_num_threads() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_threads 
-          end function omp_get_num_threads 
- 
-          function omp_get_max_threads() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_threads 
-          end function omp_get_max_threads 
- 
-          function omp_get_thread_num() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_num 
-          end function omp_get_thread_num 
- 
-          function omp_get_num_procs() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_procs 
-          end function omp_get_num_procs 
- 
-          function omp_in_parallel() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_parallel 
-          end function omp_in_parallel 
- 
-          function omp_in_final() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_in_final 
-          end function omp_in_final 
- 
-          function omp_get_dynamic() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_dynamic 
-          end function omp_get_dynamic 
- 
-          function omp_get_nested() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_get_nested 
-          end function omp_get_nested 
- 
-          function omp_get_thread_limit() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_thread_limit 
-          end function omp_get_thread_limit 
- 
-          subroutine omp_set_max_active_levels(max_levels) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: max_levels 
-          end subroutine omp_set_max_active_levels 
- 
-          function omp_get_max_active_levels() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_max_active_levels 
-          end function omp_get_max_active_levels 
- 
-          function omp_get_level() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_level 
-          end function omp_get_level 
- 
-          function omp_get_active_level() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_active_level 
-          end function omp_get_active_level 
- 
-          function omp_get_ancestor_thread_num(level) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-            integer (kind=omp_integer_kind), value :: level 
-          end function omp_get_ancestor_thread_num 
- 
-          function omp_get_team_size(level) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_size 
-            integer (kind=omp_integer_kind), value :: level 
-          end function omp_get_team_size 
- 
-          subroutine omp_set_schedule(kind, modifier) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind), value :: kind 
-            integer (kind=omp_integer_kind), value :: modifier 
-          end subroutine omp_set_schedule 
- 
-          subroutine omp_get_schedule(kind, modifier) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_sched_kind) kind 
-            integer (kind=omp_integer_kind) modifier 
-          end subroutine omp_get_schedule 
- 
-          function omp_get_proc_bind() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_proc_bind_kind) omp_get_proc_bind 
-          end function omp_get_proc_bind 
- 
-          function omp_get_wtime() bind(c) 
-            use omp_lib_kinds 
-            real (kind=kmp_double_kind) omp_get_wtime 
-          end function omp_get_wtime 
- 
-          function omp_get_wtick() bind(c) 
-            use omp_lib_kinds 
-            real (kind=kmp_double_kind) omp_get_wtick 
-          end function omp_get_wtick 
- 
-          function omp_get_default_device() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_default_device 
-          end function omp_get_default_device 
- 
-          subroutine omp_set_default_device(dflt_device) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: dflt_device 
-          end subroutine omp_set_default_device 
- 
-          function omp_get_num_devices() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_devices 
-          end function omp_get_num_devices 
- 
-          function omp_get_num_teams() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_num_teams 
-          end function omp_get_num_teams 
- 
-          function omp_get_team_num() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_team_num 
-          end function omp_get_team_num 
- 
-          function omp_get_cancellation() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_get_cancellation 
-          end function omp_get_cancellation 
- 
-          function omp_is_initial_device() bind(c) 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_is_initial_device 
-          end function omp_is_initial_device 
- 
-          subroutine omp_init_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_init_lock 
- 
-          subroutine omp_destroy_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_destroy_lock 
- 
-          subroutine omp_set_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_set_lock 
- 
-          subroutine omp_unset_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-          end subroutine omp_unset_lock 
- 
-          function omp_test_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            logical (kind=omp_logical_kind) omp_test_lock 
-            integer (kind=omp_lock_kind) lockvar 
-          end function omp_test_lock 
- 
-          subroutine omp_init_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_init_nest_lock 
- 
-          subroutine omp_destroy_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_destroy_nest_lock 
- 
-          subroutine omp_set_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_set_nest_lock 
- 
-          subroutine omp_unset_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end subroutine omp_unset_nest_lock 
- 
-          function omp_test_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_nest_lock 
-!DIR$ ENDIF 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) omp_test_nest_lock 
-            integer (kind=omp_nest_lock_kind) lockvar 
-          end function omp_test_nest_lock 
- 
-!         *** 
-!         *** kmp_* entry points 
-!         *** 
- 
-          subroutine kmp_set_stacksize(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: size 
-          end subroutine kmp_set_stacksize 
- 
-          subroutine kmp_set_stacksize_s(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end subroutine kmp_set_stacksize_s 
- 
-          subroutine kmp_set_blocktime(msec) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: msec 
-          end subroutine kmp_set_blocktime 
- 
-          subroutine kmp_set_library_serial() bind(c) 
-          end subroutine kmp_set_library_serial 
- 
-          subroutine kmp_set_library_turnaround() bind(c) 
-          end subroutine kmp_set_library_turnaround 
- 
-          subroutine kmp_set_library_throughput() bind(c) 
-          end subroutine kmp_set_library_throughput 
- 
-          subroutine kmp_set_library(libnum) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind), value :: libnum 
-          end subroutine kmp_set_library 
- 
-          subroutine kmp_set_defaults(string) bind(c) 
-            use, intrinsic :: iso_c_binding 
-            character (kind=c_char) :: string(*) 
-          end subroutine kmp_set_defaults 
- 
-          function kmp_get_stacksize() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_stacksize 
-          end function kmp_get_stacksize 
- 
-          function kmp_get_stacksize_s() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-          end function kmp_get_stacksize_s 
- 
-          function kmp_get_blocktime() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_blocktime 
-          end function kmp_get_blocktime 
- 
-          function kmp_get_library() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_library 
-          end function kmp_get_library 
- 
-          function kmp_set_affinity(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity 
- 
-          function kmp_get_affinity(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity 
- 
-          function kmp_get_affinity_max_proc() bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-          end function kmp_get_affinity_max_proc 
- 
-          subroutine kmp_create_affinity_mask(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_create_affinity_mask 
- 
-          subroutine kmp_destroy_affinity_mask(mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end subroutine kmp_destroy_affinity_mask 
- 
-          function kmp_set_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_set_affinity_mask_proc 
- 
-          function kmp_unset_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_unset_affinity_mask_proc 
- 
-          function kmp_get_affinity_mask_proc(proc, mask) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-            integer (kind=omp_integer_kind), value :: proc 
-            integer (kind=kmp_affinity_mask_kind) mask 
-          end function kmp_get_affinity_mask_proc 
- 
-          function kmp_malloc(size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_malloc 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end function kmp_malloc 
- 
-          function kmp_calloc(nelem, elsize) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_calloc 
-            integer (kind=kmp_size_t_kind), value :: nelem 
-            integer (kind=kmp_size_t_kind), value :: elsize 
-          end function kmp_calloc 
- 
-          function kmp_realloc(ptr, size) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind) kmp_realloc 
-            integer (kind=kmp_pointer_kind), value :: ptr 
-            integer (kind=kmp_size_t_kind), value :: size 
-          end function kmp_realloc 
- 
-          subroutine kmp_free(ptr) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_pointer_kind), value :: ptr 
-          end subroutine kmp_free 
- 
-          subroutine kmp_set_warnings_on() bind(c) 
-          end subroutine kmp_set_warnings_on 
- 
-          subroutine kmp_set_warnings_off() bind(c) 
-          end subroutine kmp_set_warnings_off 
- 
-          function kmp_get_cancellation_status(cancelkind) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=kmp_cancel_kind), value :: cancelkind 
-            logical (kind=omp_logical_kind) kmp_get_cancellation_status 
-          end function kmp_get_cancellation_status 
- 
-          subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-            integer (kind=omp_lock_hint_kind), value :: lockhint 
-          end subroutine omp_init_lock_with_hint 
- 
-          subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c) 
-            use omp_lib_kinds 
-            integer (kind=omp_lock_kind) lockvar 
-            integer (kind=omp_lock_hint_kind), value :: lockhint 
-          end subroutine omp_init_nest_lock_with_hint 
- 
-        end interface 
- 
-      end module omp_lib 
+! include/41/omp_lib.f90.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+      module omp_lib_kinds
+
+        use, intrinsic :: iso_c_binding
+
+        integer, parameter :: omp_integer_kind       = c_int
+        integer, parameter :: omp_logical_kind       = 4
+        integer, parameter :: omp_real_kind          = c_float
+        integer, parameter :: kmp_double_kind        = c_double
+        integer, parameter :: omp_lock_kind          = c_intptr_t
+        integer, parameter :: omp_nest_lock_kind     = c_intptr_t
+        integer, parameter :: omp_sched_kind         = omp_integer_kind
+        integer, parameter :: omp_proc_bind_kind     = omp_integer_kind
+        integer, parameter :: kmp_pointer_kind       = c_intptr_t
+        integer, parameter :: kmp_size_t_kind        = c_size_t
+        integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
+        integer, parameter :: kmp_cancel_kind        = omp_integer_kind
+        integer, parameter :: omp_lock_hint_kind     = omp_integer_kind
+
+      end module omp_lib_kinds
+
+      module omp_lib
+
+        use omp_lib_kinds
+
+        integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+        integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+        character(*)               kmp_build_date
+        parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' )
+
+        integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+        integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+        integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+        integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+        integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none           = 0
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended    = 1
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended      = 2
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4
+        integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative    = 8
+        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle            = 65536
+        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072
+        integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144
+
+        interface
+
+!         ***
+!         *** omp_* entry points
+!         ***
+
+          subroutine omp_set_num_threads(nthreads) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: nthreads
+          end subroutine omp_set_num_threads
+
+          subroutine omp_set_dynamic(enable) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind), value :: enable
+          end subroutine omp_set_dynamic
+
+          subroutine omp_set_nested(enable) bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind), value :: enable
+          end subroutine omp_set_nested
+
+          function omp_get_num_threads() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_threads
+          end function omp_get_num_threads
+
+          function omp_get_max_threads() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_threads
+          end function omp_get_max_threads
+
+          function omp_get_thread_num() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_num
+          end function omp_get_thread_num
+
+          function omp_get_num_procs() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_procs
+          end function omp_get_num_procs
+
+          function omp_in_parallel() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_parallel
+          end function omp_in_parallel
+
+          function omp_in_final() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_in_final
+          end function omp_in_final
+
+          function omp_get_dynamic() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_dynamic
+          end function omp_get_dynamic
+
+          function omp_get_nested() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_get_nested
+          end function omp_get_nested
+
+          function omp_get_thread_limit() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_thread_limit
+          end function omp_get_thread_limit
+
+          subroutine omp_set_max_active_levels(max_levels) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: max_levels
+          end subroutine omp_set_max_active_levels
+
+          function omp_get_max_active_levels() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_max_active_levels
+          end function omp_get_max_active_levels
+
+          function omp_get_level() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_level
+          end function omp_get_level
+
+          function omp_get_active_level() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_active_level
+          end function omp_get_active_level
+
+          function omp_get_ancestor_thread_num(level) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+            integer (kind=omp_integer_kind), value :: level
+          end function omp_get_ancestor_thread_num
+
+          function omp_get_team_size(level) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_size
+            integer (kind=omp_integer_kind), value :: level
+          end function omp_get_team_size
+
+          subroutine omp_set_schedule(kind, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind), value :: kind
+            integer (kind=omp_integer_kind), value :: modifier
+          end subroutine omp_set_schedule
+
+          subroutine omp_get_schedule(kind, modifier) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_sched_kind) kind
+            integer (kind=omp_integer_kind) modifier
+          end subroutine omp_get_schedule
+
+          function omp_get_proc_bind() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_proc_bind_kind) omp_get_proc_bind
+          end function omp_get_proc_bind
+
+          function omp_get_wtime() bind(c)
+            use omp_lib_kinds
+            real (kind=kmp_double_kind) omp_get_wtime
+          end function omp_get_wtime
+
+          function omp_get_wtick() bind(c)
+            use omp_lib_kinds
+            real (kind=kmp_double_kind) omp_get_wtick
+          end function omp_get_wtick
+
+          function omp_get_default_device() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_default_device
+          end function omp_get_default_device
+
+          subroutine omp_set_default_device(dflt_device) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: dflt_device
+          end subroutine omp_set_default_device
+
+          function omp_get_num_devices() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_devices
+          end function omp_get_num_devices
+
+          function omp_get_num_teams() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_num_teams
+          end function omp_get_num_teams
+
+          function omp_get_team_num() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_team_num
+          end function omp_get_team_num
+
+          function omp_get_cancellation() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_cancellation
+          end function omp_get_cancellation
+
+          function omp_is_initial_device() bind(c)
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_is_initial_device
+          end function omp_is_initial_device
+
+          subroutine omp_init_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_init_lock
+
+          subroutine omp_destroy_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_destroy_lock
+
+          subroutine omp_set_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_set_lock
+
+          subroutine omp_unset_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+          end subroutine omp_unset_lock
+
+          function omp_test_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            logical (kind=omp_logical_kind) omp_test_lock
+            integer (kind=omp_lock_kind) lockvar
+          end function omp_test_lock
+
+          subroutine omp_init_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_init_nest_lock
+
+          subroutine omp_destroy_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_destroy_nest_lock
+
+          subroutine omp_set_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_set_nest_lock
+
+          subroutine omp_unset_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_nest_lock_kind) lockvar
+          end subroutine omp_unset_nest_lock
+
+          function omp_test_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_nest_lock
+!DIR$ ENDIF
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_test_nest_lock
+            integer (kind=omp_nest_lock_kind) lockvar
+          end function omp_test_nest_lock
+
+!         ***
+!         *** kmp_* entry points
+!         ***
+
+          subroutine kmp_set_stacksize(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: size
+          end subroutine kmp_set_stacksize
+
+          subroutine kmp_set_stacksize_s(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind), value :: size
+          end subroutine kmp_set_stacksize_s
+
+          subroutine kmp_set_blocktime(msec) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: msec
+          end subroutine kmp_set_blocktime
+
+          subroutine kmp_set_library_serial() bind(c)
+          end subroutine kmp_set_library_serial
+
+          subroutine kmp_set_library_turnaround() bind(c)
+          end subroutine kmp_set_library_turnaround
+
+          subroutine kmp_set_library_throughput() bind(c)
+          end subroutine kmp_set_library_throughput
+
+          subroutine kmp_set_library(libnum) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: libnum
+          end subroutine kmp_set_library
+
+          subroutine kmp_set_defaults(string) bind(c)
+            use, intrinsic :: iso_c_binding
+            character (kind=c_char) :: string(*)
+          end subroutine kmp_set_defaults
+
+          function kmp_get_stacksize() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_stacksize
+          end function kmp_get_stacksize
+
+          function kmp_get_stacksize_s() bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+          end function kmp_get_stacksize_s
+
+          function kmp_get_blocktime() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_blocktime
+          end function kmp_get_blocktime
+
+          function kmp_get_library() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_library
+          end function kmp_get_library
+
+          function kmp_set_affinity(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity
+
+          function kmp_get_affinity(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity
+
+          function kmp_get_affinity_max_proc() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+          end function kmp_get_affinity_max_proc
+
+          subroutine kmp_create_affinity_mask(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_create_affinity_mask
+
+          subroutine kmp_destroy_affinity_mask(mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_affinity_mask_kind) mask
+          end subroutine kmp_destroy_affinity_mask
+
+          function kmp_set_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_set_affinity_mask_proc
+
+          function kmp_unset_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_unset_affinity_mask_proc
+
+          function kmp_get_affinity_mask_proc(proc, mask) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+            integer (kind=omp_integer_kind), value :: proc
+            integer (kind=kmp_affinity_mask_kind) mask
+          end function kmp_get_affinity_mask_proc
+
+          function kmp_malloc(size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_malloc
+            integer (kind=kmp_size_t_kind), value :: size
+          end function kmp_malloc
+
+          function kmp_calloc(nelem, elsize) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_calloc
+            integer (kind=kmp_size_t_kind), value :: nelem
+            integer (kind=kmp_size_t_kind), value :: elsize
+          end function kmp_calloc
+
+          function kmp_realloc(ptr, size) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind) kmp_realloc
+            integer (kind=kmp_pointer_kind), value :: ptr
+            integer (kind=kmp_size_t_kind), value :: size
+          end function kmp_realloc
+
+          subroutine kmp_free(ptr) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_pointer_kind), value :: ptr
+          end subroutine kmp_free
+
+          subroutine kmp_set_warnings_on() bind(c)
+          end subroutine kmp_set_warnings_on
+
+          subroutine kmp_set_warnings_off() bind(c)
+          end subroutine kmp_set_warnings_off
+
+          function kmp_get_cancellation_status(cancelkind) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_cancel_kind), value :: cancelkind
+            logical (kind=omp_logical_kind) kmp_get_cancellation_status
+          end function kmp_get_cancellation_status
+
+          subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+            integer (kind=omp_lock_hint_kind), value :: lockhint
+          end subroutine omp_init_lock_with_hint
+
+          subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_lock_kind) lockvar
+            integer (kind=omp_lock_hint_kind), value :: lockhint
+          end subroutine omp_init_nest_lock_with_hint
+
+        end interface
+
+      end module omp_lib
diff --git a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var
index 7d9a32db4c..867bcd97b0 100644
--- a/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/41/omp_lib.h.var
@@ -1,584 +1,584 @@
-! include/41/omp_lib.h.var 
- 
-! 
-!//===----------------------------------------------------------------------===// 
-!// 
-!//                     The LLVM Compiler Infrastructure 
-!// 
-!// This file is dual licensed under the MIT and the University of Illinois Open 
-!// Source Licenses. See LICENSE.txt for details. 
-!// 
-!//===----------------------------------------------------------------------===// 
-! 
- 
-!*** 
-!*** Some of the directives for the following routine extend past column 72, 
-!*** so process this file in 132-column mode. 
-!*** 
- 
-!DIR$ fixedformlinesize:132 
- 
-      integer, parameter :: omp_integer_kind       = 4 
-      integer, parameter :: omp_logical_kind       = 4 
-      integer, parameter :: omp_real_kind          = 4 
-      integer, parameter :: omp_lock_kind          = int_ptr_kind() 
-      integer, parameter :: omp_nest_lock_kind     = int_ptr_kind() 
-      integer, parameter :: omp_sched_kind         = omp_integer_kind 
-      integer, parameter :: omp_proc_bind_kind     = omp_integer_kind 
-      integer, parameter :: kmp_pointer_kind       = int_ptr_kind() 
-      integer, parameter :: kmp_size_t_kind        = int_ptr_kind() 
-      integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() 
-      integer, parameter :: omp_lock_hint_kind     = omp_integer_kind 
- 
-      integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@ 
-      integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@ 
-      character(*)               kmp_build_date 
-      parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' ) 
- 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3 
-      integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4 
- 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 
-      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 
- 
-      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none           = 0 
-      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended    = 1 
-      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended      = 2 
-      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4 
-      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative    = 8 
-      integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle            = 65536 
-      integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072 
-      integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144 
- 
-      interface 
- 
-!       *** 
-!       *** omp_* entry points 
-!       *** 
- 
-        subroutine omp_set_num_threads(nthreads) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: nthreads 
-        end subroutine omp_set_num_threads 
- 
-        subroutine omp_set_dynamic(enable) bind(c) 
-          import 
-          logical (kind=omp_logical_kind), value :: enable 
-        end subroutine omp_set_dynamic 
- 
-        subroutine omp_set_nested(enable) bind(c) 
-          import 
-          logical (kind=omp_logical_kind), value :: enable 
-        end subroutine omp_set_nested 
- 
-        function omp_get_num_threads() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_threads 
-        end function omp_get_num_threads 
- 
-        function omp_get_max_threads() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_max_threads 
-        end function omp_get_max_threads 
- 
-        function omp_get_thread_num() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_thread_num 
-        end function omp_get_thread_num 
- 
-        function omp_get_num_procs() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_procs 
-        end function omp_get_num_procs 
- 
-        function omp_in_parallel() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_in_parallel 
-        end function omp_in_parallel 
- 
-        function omp_in_final() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_in_final 
-        end function omp_in_final 
- 
-        function omp_get_dynamic() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_get_dynamic 
-        end function omp_get_dynamic 
- 
-        function omp_get_nested() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_get_nested 
-        end function omp_get_nested 
- 
-        function omp_get_thread_limit() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_thread_limit 
-        end function omp_get_thread_limit 
- 
-        subroutine omp_set_max_active_levels(max_levels) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: max_levels 
-        end subroutine omp_set_max_active_levels 
- 
-        function omp_get_max_active_levels() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_max_active_levels 
-        end function omp_get_max_active_levels 
- 
-        function omp_get_level() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_level 
-        end function omp_get_level 
- 
-        function omp_get_active_level() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_active_level 
-        end function omp_get_active_level 
- 
-        function omp_get_ancestor_thread_num(level) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_ancestor_thread_num 
-          integer (kind=omp_integer_kind), value :: level 
-        end function omp_get_ancestor_thread_num 
- 
-        function omp_get_team_size(level) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_team_size 
-          integer (kind=omp_integer_kind), value :: level 
-        end function omp_get_team_size 
- 
-        subroutine omp_set_schedule(kind, modifier) bind(c) 
-          import 
-          integer (kind=omp_sched_kind), value :: kind 
-          integer (kind=omp_integer_kind), value :: modifier 
-        end subroutine omp_set_schedule 
- 
-        subroutine omp_get_schedule(kind, modifier) bind(c) 
-          import 
-          integer (kind=omp_sched_kind) kind 
-          integer (kind=omp_integer_kind) modifier 
-        end subroutine omp_get_schedule 
- 
-        function omp_get_proc_bind() bind(c) 
-          import 
-          integer (kind=omp_proc_bind_kind) omp_get_proc_bind 
-        end function omp_get_proc_bind 
- 
-        function omp_get_wtime() bind(c) 
-          double precision omp_get_wtime 
-        end function omp_get_wtime 
- 
-        function omp_get_wtick() bind(c) 
-          double precision omp_get_wtick 
-        end function omp_get_wtick 
- 
-        function omp_get_default_device() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_default_device 
-        end function omp_get_default_device 
- 
-        subroutine omp_set_default_device(dflt_device) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: dflt_device 
-        end subroutine omp_set_default_device 
- 
-        function omp_get_num_devices() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_devices 
-        end function omp_get_num_devices 
- 
-        function omp_get_num_teams() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_num_teams 
-        end function omp_get_num_teams 
- 
-        function omp_get_team_num() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) omp_get_team_num 
-        end function omp_get_team_num 
- 
-        function omp_is_initial_device() bind(c) 
-          import 
-          logical (kind=omp_logical_kind) omp_is_initial_device 
-        end function omp_is_initial_device 
- 
-        subroutine omp_init_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_init_lock 
- 
-        subroutine omp_destroy_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_destroy_lock 
- 
-        subroutine omp_set_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_set_lock 
- 
-        subroutine omp_unset_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-        end subroutine omp_unset_lock 
- 
-        function omp_test_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_lock 
-!DIR$ ENDIF 
-          import 
-          logical (kind=omp_logical_kind) omp_test_lock 
-          integer (kind=omp_lock_kind) lockvar 
-        end function omp_test_lock 
- 
-        subroutine omp_init_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_init_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_init_nest_lock 
- 
-        subroutine omp_destroy_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_destroy_nest_lock 
- 
-        subroutine omp_set_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_set_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_set_nest_lock 
- 
-        subroutine omp_unset_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_unset_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end subroutine omp_unset_nest_lock 
- 
-        function omp_test_nest_lock(lockvar) bind(c) 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!DIR$ attributes known_intrinsic :: omp_test_nest_lock 
-!DIR$ ENDIF 
-          import 
-          integer (kind=omp_integer_kind) omp_test_nest_lock 
-          integer (kind=omp_nest_lock_kind) lockvar 
-        end function omp_test_nest_lock 
- 
-!       *** 
-!       *** kmp_* entry points 
-!       *** 
- 
-        subroutine kmp_set_stacksize(size) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: size 
-        end subroutine kmp_set_stacksize 
- 
-        subroutine kmp_set_stacksize_s(size) bind(c) 
-          import 
-          integer (kind=kmp_size_t_kind), value :: size 
-        end subroutine kmp_set_stacksize_s 
- 
-        subroutine kmp_set_blocktime(msec) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: msec 
-        end subroutine kmp_set_blocktime 
- 
-        subroutine kmp_set_library_serial() bind(c) 
-        end subroutine kmp_set_library_serial 
- 
-        subroutine kmp_set_library_turnaround() bind(c) 
-        end subroutine kmp_set_library_turnaround 
- 
-        subroutine kmp_set_library_throughput() bind(c) 
-        end subroutine kmp_set_library_throughput 
- 
-        subroutine kmp_set_library(libnum) bind(c) 
-          import 
-          integer (kind=omp_integer_kind), value :: libnum 
-        end subroutine kmp_set_library 
- 
-        subroutine kmp_set_defaults(string) bind(c) 
-          character string(*) 
-        end subroutine kmp_set_defaults 
- 
-        function kmp_get_stacksize() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_stacksize 
-        end function kmp_get_stacksize 
- 
-        function kmp_get_stacksize_s() bind(c) 
-          import 
-          integer (kind=kmp_size_t_kind) kmp_get_stacksize_s 
-        end function kmp_get_stacksize_s 
- 
-        function kmp_get_blocktime() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_blocktime 
-        end function kmp_get_blocktime 
- 
-        function kmp_get_library() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_library 
-        end function kmp_get_library 
- 
-        function kmp_set_affinity(mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_set_affinity 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_set_affinity 
- 
-        function kmp_get_affinity(mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_get_affinity 
- 
-        function kmp_get_affinity_max_proc() bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity_max_proc 
-        end function kmp_get_affinity_max_proc 
- 
-        subroutine kmp_create_affinity_mask(mask) bind(c) 
-          import 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end subroutine kmp_create_affinity_mask 
- 
-        subroutine kmp_destroy_affinity_mask(mask) bind(c) 
-          import 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end subroutine kmp_destroy_affinity_mask 
- 
-        function kmp_set_affinity_mask_proc(proc, mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc 
-          integer (kind=omp_integer_kind), value :: proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_set_affinity_mask_proc 
- 
-        function kmp_unset_affinity_mask_proc(proc, mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc 
-          integer (kind=omp_integer_kind), value :: proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_unset_affinity_mask_proc 
- 
-        function kmp_get_affinity_mask_proc(proc, mask) bind(c) 
-          import 
-          integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc 
-          integer (kind=omp_integer_kind), value :: proc 
-          integer (kind=kmp_affinity_mask_kind) mask 
-        end function kmp_get_affinity_mask_proc 
- 
-        function kmp_malloc(size) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_malloc 
-          integer (kind=kmp_size_t_kind), value :: size 
-        end function kmp_malloc 
- 
-        function kmp_calloc(nelem, elsize) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_calloc 
-          integer (kind=kmp_size_t_kind), value :: nelem 
-          integer (kind=kmp_size_t_kind), value :: elsize 
-        end function kmp_calloc 
- 
-        function kmp_realloc(ptr, size) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind) kmp_realloc 
-          integer (kind=kmp_pointer_kind), value :: ptr 
-          integer (kind=kmp_size_t_kind), value :: size 
-        end function kmp_realloc 
- 
-        subroutine kmp_free(ptr) bind(c) 
-          import 
-          integer (kind=kmp_pointer_kind), value :: ptr 
-        end subroutine kmp_free 
- 
-        subroutine kmp_set_warnings_on() bind(c) 
-        end subroutine kmp_set_warnings_on 
- 
-        subroutine kmp_set_warnings_off() bind(c) 
-        end subroutine kmp_set_warnings_off 
- 
-        subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c) 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-          integer (kind=omp_lock_hint_kind), value :: lockhint 
-        end subroutine omp_init_lock_with_hint 
- 
-        subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c) 
-          import 
-          integer (kind=omp_lock_kind) lockvar 
-          integer (kind=omp_lock_hint_kind), value :: lockhint 
-        end subroutine omp_init_nest_lock_with_hint 
- 
-      end interface 
- 
-!DIR$ IF DEFINED (__INTEL_OFFLOAD) 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock_with_hint 
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock_with_hint 
- 
-!DIR$ IF(__INTEL_COMPILER.GE.1400) 
-!$omp declare target(omp_set_num_threads ) 
-!$omp declare target(omp_set_dynamic ) 
-!$omp declare target(omp_set_nested ) 
-!$omp declare target(omp_get_num_threads ) 
-!$omp declare target(omp_get_max_threads ) 
-!$omp declare target(omp_get_thread_num ) 
-!$omp declare target(omp_get_num_procs ) 
-!$omp declare target(omp_in_parallel ) 
-!$omp declare target(omp_in_final ) 
-!$omp declare target(omp_get_dynamic ) 
-!$omp declare target(omp_get_nested ) 
-!$omp declare target(omp_get_thread_limit ) 
-!$omp declare target(omp_set_max_active_levels ) 
-!$omp declare target(omp_get_max_active_levels ) 
-!$omp declare target(omp_get_level ) 
-!$omp declare target(omp_get_active_level ) 
-!$omp declare target(omp_get_ancestor_thread_num ) 
-!$omp declare target(omp_get_team_size ) 
-!$omp declare target(omp_set_schedule ) 
-!$omp declare target(omp_get_schedule ) 
-!$omp declare target(omp_get_proc_bind ) 
-!$omp declare target(omp_get_wtime ) 
-!$omp declare target(omp_get_wtick ) 
-!$omp declare target(omp_get_default_device ) 
-!$omp declare target(omp_set_default_device ) 
-!$omp declare target(omp_is_initial_device ) 
-!$omp declare target(omp_get_num_devices ) 
-!$omp declare target(omp_get_num_teams ) 
-!$omp declare target(omp_get_team_num ) 
-!$omp declare target(omp_init_lock ) 
-!$omp declare target(omp_destroy_lock ) 
-!$omp declare target(omp_set_lock ) 
-!$omp declare target(omp_unset_lock ) 
-!$omp declare target(omp_test_lock ) 
-!$omp declare target(omp_init_nest_lock ) 
-!$omp declare target(omp_destroy_nest_lock ) 
-!$omp declare target(omp_set_nest_lock ) 
-!$omp declare target(omp_unset_nest_lock ) 
-!$omp declare target(omp_test_nest_lock ) 
-!$omp declare target(kmp_set_stacksize ) 
-!$omp declare target(kmp_set_stacksize_s ) 
-!$omp declare target(kmp_set_blocktime ) 
-!$omp declare target(kmp_set_library_serial ) 
-!$omp declare target(kmp_set_library_turnaround ) 
-!$omp declare target(kmp_set_library_throughput ) 
-!$omp declare target(kmp_set_library ) 
-!$omp declare target(kmp_set_defaults ) 
-!$omp declare target(kmp_get_stacksize ) 
-!$omp declare target(kmp_get_stacksize_s ) 
-!$omp declare target(kmp_get_blocktime ) 
-!$omp declare target(kmp_get_library ) 
-!$omp declare target(kmp_set_affinity ) 
-!$omp declare target(kmp_get_affinity ) 
-!$omp declare target(kmp_get_affinity_max_proc ) 
-!$omp declare target(kmp_create_affinity_mask ) 
-!$omp declare target(kmp_destroy_affinity_mask ) 
-!$omp declare target(kmp_set_affinity_mask_proc ) 
-!$omp declare target(kmp_unset_affinity_mask_proc ) 
-!$omp declare target(kmp_get_affinity_mask_proc ) 
-!$omp declare target(kmp_malloc ) 
-!$omp declare target(kmp_calloc ) 
-!$omp declare target(kmp_realloc ) 
-!$omp declare target(kmp_free ) 
-!$omp declare target(kmp_set_warnings_on ) 
-!$omp declare target(kmp_set_warnings_off ) 
-!$omp declare target(omp_init_lock_with_hint ) 
-!$omp declare target(omp_init_nest_lock_with_hint ) 
-!DIR$ ENDIF 
-!DIR$ ENDIF 
- 
+! include/41/omp_lib.h.var
+
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+!***
+!*** Some of the directives for the following routine extend past column 72,
+!*** so process this file in 132-column mode.
+!***
+
+!DIR$ fixedformlinesize:132
+
+      integer, parameter :: omp_integer_kind       = 4
+      integer, parameter :: omp_logical_kind       = 4
+      integer, parameter :: omp_real_kind          = 4
+      integer, parameter :: omp_lock_kind          = int_ptr_kind()
+      integer, parameter :: omp_nest_lock_kind     = int_ptr_kind()
+      integer, parameter :: omp_sched_kind         = omp_integer_kind
+      integer, parameter :: omp_proc_bind_kind     = omp_integer_kind
+      integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
+      integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
+      integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+      integer, parameter :: omp_lock_hint_kind     = omp_integer_kind
+
+      integer (kind=omp_integer_kind), parameter :: openmp_version    = @LIBOMP_OMP_YEAR_MONTH@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_minor = @LIBOMP_VERSION_MINOR@
+      integer (kind=omp_integer_kind), parameter :: kmp_version_build = @LIBOMP_VERSION_BUILD@
+      character(*)               kmp_build_date
+      parameter( kmp_build_date = '@LIBOMP_BUILD_DATE@' )
+
+      integer(kind=omp_sched_kind), parameter :: omp_sched_static  = 1
+      integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
+      integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
+      integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
+
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+      integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+
+      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_none           = 0
+      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_uncontended    = 1
+      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_contended      = 2
+      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_nonspeculative = 4
+      integer (kind=omp_lock_hint_kind), parameter :: omp_lock_hint_speculative    = 8
+      integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_hle            = 65536
+      integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm            = 131072
+      integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive       = 262144
+
+      interface
+
+!       ***
+!       *** omp_* entry points
+!       ***
+
+        subroutine omp_set_num_threads(nthreads) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: nthreads
+        end subroutine omp_set_num_threads
+
+        subroutine omp_set_dynamic(enable) bind(c)
+          import
+          logical (kind=omp_logical_kind), value :: enable
+        end subroutine omp_set_dynamic
+
+        subroutine omp_set_nested(enable) bind(c)
+          import
+          logical (kind=omp_logical_kind), value :: enable
+        end subroutine omp_set_nested
+
+        function omp_get_num_threads() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_threads
+        end function omp_get_num_threads
+
+        function omp_get_max_threads() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_max_threads
+        end function omp_get_max_threads
+
+        function omp_get_thread_num() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_thread_num
+        end function omp_get_thread_num
+
+        function omp_get_num_procs() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_procs
+        end function omp_get_num_procs
+
+        function omp_in_parallel() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_in_parallel
+        end function omp_in_parallel
+
+        function omp_in_final() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_in_final
+        end function omp_in_final
+
+        function omp_get_dynamic() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_get_dynamic
+        end function omp_get_dynamic
+
+        function omp_get_nested() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_get_nested
+        end function omp_get_nested
+
+        function omp_get_thread_limit() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_thread_limit
+        end function omp_get_thread_limit
+
+        subroutine omp_set_max_active_levels(max_levels) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: max_levels
+        end subroutine omp_set_max_active_levels
+
+        function omp_get_max_active_levels() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_max_active_levels
+        end function omp_get_max_active_levels
+
+        function omp_get_level() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_level
+        end function omp_get_level
+
+        function omp_get_active_level() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_active_level
+        end function omp_get_active_level
+
+        function omp_get_ancestor_thread_num(level) bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_ancestor_thread_num
+          integer (kind=omp_integer_kind), value :: level
+        end function omp_get_ancestor_thread_num
+
+        function omp_get_team_size(level) bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_team_size
+          integer (kind=omp_integer_kind), value :: level
+        end function omp_get_team_size
+
+        subroutine omp_set_schedule(kind, modifier) bind(c)
+          import
+          integer (kind=omp_sched_kind), value :: kind
+          integer (kind=omp_integer_kind), value :: modifier
+        end subroutine omp_set_schedule
+
+        subroutine omp_get_schedule(kind, modifier) bind(c)
+          import
+          integer (kind=omp_sched_kind) kind
+          integer (kind=omp_integer_kind) modifier
+        end subroutine omp_get_schedule
+
+        function omp_get_proc_bind() bind(c)
+          import
+          integer (kind=omp_proc_bind_kind) omp_get_proc_bind
+        end function omp_get_proc_bind
+
+        function omp_get_wtime() bind(c)
+          double precision omp_get_wtime
+        end function omp_get_wtime
+
+        function omp_get_wtick() bind(c)
+          double precision omp_get_wtick
+        end function omp_get_wtick
+
+        function omp_get_default_device() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_default_device
+        end function omp_get_default_device
+
+        subroutine omp_set_default_device(dflt_device) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: dflt_device
+        end subroutine omp_set_default_device
+
+        function omp_get_num_devices() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_devices
+        end function omp_get_num_devices
+
+        function omp_get_num_teams() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_num_teams
+        end function omp_get_num_teams
+
+        function omp_get_team_num() bind(c)
+          import
+          integer (kind=omp_integer_kind) omp_get_team_num
+        end function omp_get_team_num
+
+        function omp_is_initial_device() bind(c)
+          import
+          logical (kind=omp_logical_kind) omp_is_initial_device
+        end function omp_is_initial_device
+
+        subroutine omp_init_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_init_lock
+
+        subroutine omp_destroy_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_destroy_lock
+
+        subroutine omp_set_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_set_lock
+
+        subroutine omp_unset_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_lock_kind) lockvar
+        end subroutine omp_unset_lock
+
+        function omp_test_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_lock
+!DIR$ ENDIF
+          import
+          logical (kind=omp_logical_kind) omp_test_lock
+          integer (kind=omp_lock_kind) lockvar
+        end function omp_test_lock
+
+        subroutine omp_init_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_init_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_init_nest_lock
+
+        subroutine omp_destroy_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_destroy_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_destroy_nest_lock
+
+        subroutine omp_set_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_set_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_set_nest_lock
+
+        subroutine omp_unset_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_unset_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_nest_lock_kind) lockvar
+        end subroutine omp_unset_nest_lock
+
+        function omp_test_nest_lock(lockvar) bind(c)
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!DIR$ attributes known_intrinsic :: omp_test_nest_lock
+!DIR$ ENDIF
+          import
+          integer (kind=omp_integer_kind) omp_test_nest_lock
+          integer (kind=omp_nest_lock_kind) lockvar
+        end function omp_test_nest_lock
+
+!       ***
+!       *** kmp_* entry points
+!       ***
+
+        subroutine kmp_set_stacksize(size) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: size
+        end subroutine kmp_set_stacksize
+
+        subroutine kmp_set_stacksize_s(size) bind(c)
+          import
+          integer (kind=kmp_size_t_kind), value :: size
+        end subroutine kmp_set_stacksize_s
+
+        subroutine kmp_set_blocktime(msec) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: msec
+        end subroutine kmp_set_blocktime
+
+        subroutine kmp_set_library_serial() bind(c)
+        end subroutine kmp_set_library_serial
+
+        subroutine kmp_set_library_turnaround() bind(c)
+        end subroutine kmp_set_library_turnaround
+
+        subroutine kmp_set_library_throughput() bind(c)
+        end subroutine kmp_set_library_throughput
+
+        subroutine kmp_set_library(libnum) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: libnum
+        end subroutine kmp_set_library
+
+        subroutine kmp_set_defaults(string) bind(c)
+          character string(*)
+        end subroutine kmp_set_defaults
+
+        function kmp_get_stacksize() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_stacksize
+        end function kmp_get_stacksize
+
+        function kmp_get_stacksize_s() bind(c)
+          import
+          integer (kind=kmp_size_t_kind) kmp_get_stacksize_s
+        end function kmp_get_stacksize_s
+
+        function kmp_get_blocktime() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_blocktime
+        end function kmp_get_blocktime
+
+        function kmp_get_library() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_library
+        end function kmp_get_library
+
+        function kmp_set_affinity(mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_set_affinity
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_set_affinity
+
+        function kmp_get_affinity(mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_get_affinity
+
+        function kmp_get_affinity_max_proc() bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity_max_proc
+        end function kmp_get_affinity_max_proc
+
+        subroutine kmp_create_affinity_mask(mask) bind(c)
+          import
+          integer (kind=kmp_affinity_mask_kind) mask
+        end subroutine kmp_create_affinity_mask
+
+        subroutine kmp_destroy_affinity_mask(mask) bind(c)
+          import
+          integer (kind=kmp_affinity_mask_kind) mask
+        end subroutine kmp_destroy_affinity_mask
+
+        function kmp_set_affinity_mask_proc(proc, mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_set_affinity_mask_proc
+          integer (kind=omp_integer_kind), value :: proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_set_affinity_mask_proc
+
+        function kmp_unset_affinity_mask_proc(proc, mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_unset_affinity_mask_proc
+          integer (kind=omp_integer_kind), value :: proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_unset_affinity_mask_proc
+
+        function kmp_get_affinity_mask_proc(proc, mask) bind(c)
+          import
+          integer (kind=omp_integer_kind) kmp_get_affinity_mask_proc
+          integer (kind=omp_integer_kind), value :: proc
+          integer (kind=kmp_affinity_mask_kind) mask
+        end function kmp_get_affinity_mask_proc
+
+        function kmp_malloc(size) bind(c)
+          import
+          integer (kind=kmp_pointer_kind) kmp_malloc
+          integer (kind=kmp_size_t_kind), value :: size
+        end function kmp_malloc
+
+        function kmp_calloc(nelem, elsize) bind(c)
+          import
+          integer (kind=kmp_pointer_kind) kmp_calloc
+          integer (kind=kmp_size_t_kind), value :: nelem
+          integer (kind=kmp_size_t_kind), value :: elsize
+        end function kmp_calloc
+
+        function kmp_realloc(ptr, size) bind(c)
+          import
+          integer (kind=kmp_pointer_kind) kmp_realloc
+          integer (kind=kmp_pointer_kind), value :: ptr
+          integer (kind=kmp_size_t_kind), value :: size
+        end function kmp_realloc
+
+        subroutine kmp_free(ptr) bind(c)
+          import
+          integer (kind=kmp_pointer_kind), value :: ptr
+        end subroutine kmp_free
+
+        subroutine kmp_set_warnings_on() bind(c)
+        end subroutine kmp_set_warnings_on
+
+        subroutine kmp_set_warnings_off() bind(c)
+        end subroutine kmp_set_warnings_off
+
+        subroutine omp_init_lock_with_hint(lockvar, lockhint) bind(c)
+          import
+          integer (kind=omp_lock_kind) lockvar
+          integer (kind=omp_lock_hint_kind), value :: lockhint
+        end subroutine omp_init_lock_with_hint
+
+        subroutine omp_init_nest_lock_with_hint(lockvar, lockhint) bind(c)
+          import
+          integer (kind=omp_lock_kind) lockvar
+          integer (kind=omp_lock_hint_kind), value :: lockhint
+        end subroutine omp_init_nest_lock_with_hint
+
+      end interface
+
+!DIR$ IF DEFINED (__INTEL_OFFLOAD)
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_num_threads
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_dynamic
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nested
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_threads
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_threads
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_procs
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_parallel
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_in_final
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_dynamic
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_nested
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_thread_limit
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_max_active_levels
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_active_levels
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_level
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_active_level
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_ancestor_thread_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_size
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_schedule
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_schedule
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_proc_bind
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtime
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_wtick
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_default_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_default_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_is_initial_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_teams
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_team_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_destroy_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_serial
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_turnaround
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library_throughput
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_library
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_defaults
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_stacksize_s
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_blocktime
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_library
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_max_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_create_affinity_mask
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_destroy_affinity_mask
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_affinity_mask_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_unset_affinity_mask_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_get_affinity_mask_proc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_malloc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_calloc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_realloc
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_lock_with_hint
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_init_nest_lock_with_hint
+
+!DIR$ IF(__INTEL_COMPILER.GE.1400)
+!$omp declare target(omp_set_num_threads )
+!$omp declare target(omp_set_dynamic )
+!$omp declare target(omp_set_nested )
+!$omp declare target(omp_get_num_threads )
+!$omp declare target(omp_get_max_threads )
+!$omp declare target(omp_get_thread_num )
+!$omp declare target(omp_get_num_procs )
+!$omp declare target(omp_in_parallel )
+!$omp declare target(omp_in_final )
+!$omp declare target(omp_get_dynamic )
+!$omp declare target(omp_get_nested )
+!$omp declare target(omp_get_thread_limit )
+!$omp declare target(omp_set_max_active_levels )
+!$omp declare target(omp_get_max_active_levels )
+!$omp declare target(omp_get_level )
+!$omp declare target(omp_get_active_level )
+!$omp declare target(omp_get_ancestor_thread_num )
+!$omp declare target(omp_get_team_size )
+!$omp declare target(omp_set_schedule )
+!$omp declare target(omp_get_schedule )
+!$omp declare target(omp_get_proc_bind )
+!$omp declare target(omp_get_wtime )
+!$omp declare target(omp_get_wtick )
+!$omp declare target(omp_get_default_device )
+!$omp declare target(omp_set_default_device )
+!$omp declare target(omp_is_initial_device )
+!$omp declare target(omp_get_num_devices )
+!$omp declare target(omp_get_num_teams )
+!$omp declare target(omp_get_team_num )
+!$omp declare target(omp_init_lock )
+!$omp declare target(omp_destroy_lock )
+!$omp declare target(omp_set_lock )
+!$omp declare target(omp_unset_lock )
+!$omp declare target(omp_test_lock )
+!$omp declare target(omp_init_nest_lock )
+!$omp declare target(omp_destroy_nest_lock )
+!$omp declare target(omp_set_nest_lock )
+!$omp declare target(omp_unset_nest_lock )
+!$omp declare target(omp_test_nest_lock )
+!$omp declare target(kmp_set_stacksize )
+!$omp declare target(kmp_set_stacksize_s )
+!$omp declare target(kmp_set_blocktime )
+!$omp declare target(kmp_set_library_serial )
+!$omp declare target(kmp_set_library_turnaround )
+!$omp declare target(kmp_set_library_throughput )
+!$omp declare target(kmp_set_library )
+!$omp declare target(kmp_set_defaults )
+!$omp declare target(kmp_get_stacksize )
+!$omp declare target(kmp_get_stacksize_s )
+!$omp declare target(kmp_get_blocktime )
+!$omp declare target(kmp_get_library )
+!$omp declare target(kmp_set_affinity )
+!$omp declare target(kmp_get_affinity )
+!$omp declare target(kmp_get_affinity_max_proc )
+!$omp declare target(kmp_create_affinity_mask )
+!$omp declare target(kmp_destroy_affinity_mask )
+!$omp declare target(kmp_set_affinity_mask_proc )
+!$omp declare target(kmp_unset_affinity_mask_proc )
+!$omp declare target(kmp_get_affinity_mask_proc )
+!$omp declare target(kmp_malloc )
+!$omp declare target(kmp_calloc )
+!$omp declare target(kmp_realloc )
+!$omp declare target(kmp_free )
+!$omp declare target(kmp_set_warnings_on )
+!$omp declare target(kmp_set_warnings_off )
+!$omp declare target(omp_init_lock_with_hint )
+!$omp declare target(omp_init_nest_lock_with_hint )
+!DIR$ ENDIF
+!DIR$ ENDIF
+
diff --git a/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var b/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var
index 96818519c9..fbd95e858b 100644
--- a/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var
+++ b/contrib/libs/cxxsupp/openmp/include/41/ompt.h.var
@@ -1,487 +1,487 @@
-/* 
- * include/41/ompt.h.var 
- */ 
- 
-#ifndef __OMPT__ 
-#define __OMPT__ 
- 
-/***************************************************************************** 
- * system include files 
- *****************************************************************************/ 
- 
-#include <stdint.h> 
- 
- 
- 
-/***************************************************************************** 
- * iteration macros 
- *****************************************************************************/ 
- 
-#define FOREACH_OMPT_INQUIRY_FN(macro)  \ 
-    macro (ompt_enumerate_state)        \ 
-                                        \ 
-    macro (ompt_set_callback)           \ 
-    macro (ompt_get_callback)           \ 
-                                        \ 
-    macro (ompt_get_idle_frame)         \ 
-    macro (ompt_get_task_frame)         \ 
-                                        \ 
-    macro (ompt_get_state)              \ 
-                                        \ 
-    macro (ompt_get_parallel_id)        \ 
-    macro (ompt_get_parallel_team_size) \ 
-    macro (ompt_get_task_id)            \ 
-    macro (ompt_get_thread_id) 
- 
-#define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \ 
-    macro (ompt_idle)                       \ 
-    macro (ompt_overhead)                   \ 
-    macro (ompt_barrier_wait)               \ 
-    macro (ompt_task_wait)                  \ 
-    macro (ompt_mutex_wait) 
- 
-#define FOREACH_OMPT_STATE(macro)                                                               \ 
-                                                                                                \ 
-    /* first */                                                                                 \ 
-    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \ 
-                                                                                                \ 
-    /* work states (0..15) */                                                                   \ 
-    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \ 
-    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \ 
-    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \ 
-                                                                                                \ 
-    /* idle (16..31) */                                                                         \ 
-    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \ 
-                                                                                                \ 
-    /* overhead states (32..63) */                                                              \ 
-    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \ 
-                                                                                                \ 
-    /* barrier wait states (64..79) */                                                          \ 
-    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \ 
-    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \ 
-    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \ 
-                                                                                                \ 
-    /* task wait states (80..95) */                                                             \ 
-    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \ 
-    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \ 
-                                                                                                \ 
-    /* mutex wait states (96..111) */                                                           \ 
-    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \ 
-    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \ 
-    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \ 
-    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \ 
-    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \ 
-    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \ 
-                                                                                                \ 
-    /* misc (112..127) */                                                                       \ 
-    macro (ompt_state_undefined, 0x70)       /* undefined thread state */ 
- 
- 
-#define FOREACH_OMPT_EVENT(macro)                                                                               \ 
-                                                                                                                \ 
-    /*--- Mandatory Events ---*/                                                                                \ 
-    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \ 
-    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \ 
-                                                                                                                \ 
-    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \ 
-    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \ 
-                                                                                                                \ 
-    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \ 
-    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \ 
-                                                                                                                \ 
-    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \ 
-                                                                                                                \ 
-    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \ 
-                                                                                                                \ 
-    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \ 
-    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \ 
-    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \ 
-    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \ 
-    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\ 
-    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \ 
-                                                                                                                \ 
-    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \ 
-    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \ 
-    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \ 
-                                                                                                                \ 
-    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \ 
-                                                                                                                \ 
-    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \ 
-                                                                                                                \ 
-    /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \ 
-    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \ 
-    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \ 
-                                                                                                                \ 
-    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \ 
-    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \ 
-                                                                                                                \ 
-    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \ 
-                                                                                                                \ 
-    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \ 
-    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \ 
-                                                                                                                \ 
-    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\ 
-    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \ 
-                                                                                                                \ 
-    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \ 
-    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \ 
-    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ 
-    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \ 
-    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \ 
-                                                                                                                \ 
-    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \ 
-    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \ 
-                                                                                                                \ 
-    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \ 
-    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\ 
-    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \ 
-                                                                                                                \ 
-    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \ 
-                                                                                                                \ 
-    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \ 
-    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \ 
-    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \ 
-    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \ 
-    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \ 
-                                                                                                                \ 
-    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \ 
-    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \ 
-    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \ 
-    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \ 
-    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \ 
-    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \ 
-                                                                                                                \ 
-    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \ 
-    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \ 
-                                                                                                                \ 
-    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \ 
-    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \ 
-                                                                                                                \ 
-    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */ 
- 
- 
- 
-/***************************************************************************** 
- * data types 
- *****************************************************************************/ 
- 
-/*--------------------- 
- * identifiers 
- *---------------------*/ 
- 
-typedef uint64_t ompt_thread_id_t; 
-#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */ 
- 
-typedef uint64_t ompt_task_id_t; 
-#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */ 
- 
-typedef uint64_t ompt_parallel_id_t; 
-#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ 
- 
-typedef uint64_t ompt_wait_id_t; 
-#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */ 
- 
- 
-/*--------------------- 
- * ompt_frame_t 
- *---------------------*/ 
- 
-typedef struct ompt_frame_s { 
-    void *exit_runtime_frame;    /* next frame is user code     */ 
-    void *reenter_runtime_frame; /* previous frame is user code */ 
-} ompt_frame_t; 
- 
- 
-/***************************************************************************** 
- * enumerations for thread states and runtime events 
- *****************************************************************************/ 
- 
-/*--------------------- 
- * runtime states 
- *---------------------*/ 
- 
-typedef enum { 
-#define ompt_state_macro(state, code) state = code, 
-    FOREACH_OMPT_STATE(ompt_state_macro) 
-#undef ompt_state_macro 
-} ompt_state_t; 
- 
- 
-/*--------------------- 
- * runtime events 
- *---------------------*/ 
- 
-typedef enum { 
-#define ompt_event_macro(event, callback, eventid) event = eventid, 
-    FOREACH_OMPT_EVENT(ompt_event_macro) 
-#undef ompt_event_macro 
-} ompt_event_t; 
- 
- 
-/*--------------------- 
- * set callback results 
- *---------------------*/ 
-typedef enum { 
-    ompt_set_result_registration_error              = 0, 
-    ompt_set_result_event_may_occur_no_callback     = 1, 
-    ompt_set_result_event_never_occurs              = 2, 
-    ompt_set_result_event_may_occur_callback_some   = 3, 
-    ompt_set_result_event_may_occur_callback_always = 4, 
-} ompt_set_result_t; 
- 
- 
- 
-/***************************************************************************** 
- * callback signatures 
- *****************************************************************************/ 
- 
-/* initialization */ 
-typedef void (*ompt_interface_fn_t)(void); 
- 
-typedef ompt_interface_fn_t (*ompt_function_lookup_t)( 
-    const char *                      /* entry point to look up       */ 
-); 
- 
-/* threads */ 
-typedef void (*ompt_thread_callback_t) ( 
-    ompt_thread_id_t thread_id        /* ID of thread                 */ 
-); 
- 
-typedef enum { 
-    ompt_thread_initial = 1, // start the enumeration at 1 
-    ompt_thread_worker  = 2, 
-    ompt_thread_other   = 3 
-} ompt_thread_type_t; 
- 
-typedef enum { 
-    ompt_invoker_program = 0,         /* program invokes master task  */ 
-    ompt_invoker_runtime = 1          /* runtime invokes master task  */ 
-} ompt_invoker_t; 
- 
-typedef void (*ompt_thread_type_callback_t) ( 
-    ompt_thread_type_t thread_type,   /* type of thread               */ 
-    ompt_thread_id_t thread_id        /* ID of thread                 */ 
-); 
- 
-typedef void (*ompt_wait_callback_t) ( 
-    ompt_wait_id_t wait_id            /* wait id                      */ 
-); 
- 
-/* parallel and workshares */ 
-typedef void (*ompt_parallel_callback_t) ( 
-    ompt_parallel_id_t parallel_id,    /* id of parallel region       */ 
-    ompt_task_id_t task_id             /* id of task                  */ 
-); 
- 
-typedef void (*ompt_new_workshare_callback_t) ( 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */ 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    void *workshare_function          /* pointer to outlined function */ 
-); 
- 
-typedef void (*ompt_new_parallel_callback_t) ( 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */ 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region        */ 
-    uint32_t requested_team_size,     /* number of threads in team    */ 
-    void *parallel_function,          /* pointer to outlined function */ 
-    ompt_invoker_t invoker            /* who invokes master task?    */  
-); 
- 
-typedef void (*ompt_end_parallel_callback_t) ( 
-    ompt_parallel_id_t parallel_id,   /* id of parallel region       */ 
-    ompt_task_id_t task_id,           /* id of task                  */ 
-    ompt_invoker_t invoker            /* who invokes master task?    */  
-); 
- 
-/* tasks */ 
-typedef void (*ompt_task_callback_t) ( 
-    ompt_task_id_t task_id            /* id of task                   */ 
-); 
- 
-typedef void (*ompt_task_pair_callback_t) ( 
-    ompt_task_id_t first_task_id, 
-    ompt_task_id_t second_task_id 
-); 
- 
-typedef void (*ompt_new_task_callback_t) ( 
-    ompt_task_id_t parent_task_id,    /* id of parent task            */ 
-    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */ 
-    ompt_task_id_t  new_task_id,      /* id of created task           */ 
-    void *task_function               /* pointer to outlined function */ 
-); 
- 
-/* program */ 
-typedef void (*ompt_control_callback_t) ( 
-    uint64_t command,                 /* command of control call      */ 
-    uint64_t modifier                 /* modifier of control call     */ 
-); 
- 
-typedef void (*ompt_callback_t)(void); 
- 
- 
-/**************************************************************************** 
- * ompt API 
- ***************************************************************************/ 
- 
-#ifdef  __cplusplus 
-extern "C" { 
-#endif 
- 
-#define OMPT_API_FNTYPE(fn) fn##_t 
- 
-#define OMPT_API_FUNCTION(return_type, fn, args)  \ 
-    typedef return_type (*OMPT_API_FNTYPE(fn)) args 
- 
- 
- 
-/**************************************************************************** 
- * INQUIRY FUNCTIONS 
- ***************************************************************************/ 
- 
-/* state */ 
-OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( 
-    ompt_wait_id_t *ompt_wait_id 
-)); 
- 
-/* thread */ 
-OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); 
- 
-OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); 
- 
-/* parallel region */ 
-OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( 
-    int ancestor_level 
-)); 
- 
-OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( 
-    int ancestor_level 
-)); 
- 
-/* task */ 
-OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( 
-    int depth 
-)); 
- 
-OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( 
-    int depth 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * PLACEHOLDERS FOR PERFORMANCE REPORTING 
- ***************************************************************************/ 
- 
-/* idle */ 
-OMPT_API_FUNCTION(void, ompt_idle, ( 
-    void 
-)); 
- 
-/* overhead */ 
-OMPT_API_FUNCTION(void, ompt_overhead, ( 
-    void 
-)); 
- 
-/* barrier wait */ 
-OMPT_API_FUNCTION(void, ompt_barrier_wait, ( 
-    void 
-)); 
- 
-/* task wait */ 
-OMPT_API_FUNCTION(void, ompt_task_wait, ( 
-    void 
-)); 
- 
-/* mutex wait */ 
-OMPT_API_FUNCTION(void, ompt_mutex_wait, ( 
-    void 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * INITIALIZATION FUNCTIONS 
- ***************************************************************************/ 
- 
-OMPT_API_FUNCTION(void, ompt_initialize, ( 
-    ompt_function_lookup_t ompt_fn_lookup, 
-    const char *runtime_version, 
-    unsigned int ompt_version 
-)); 
- 
- 
-/* initialization interface to be defined by tool */ 
-ompt_initialize_t ompt_tool(void); 
- 
-typedef enum opt_init_mode_e { 
-    ompt_init_mode_never  = 0, 
-    ompt_init_mode_false  = 1, 
-    ompt_init_mode_true   = 2, 
-    ompt_init_mode_always = 3 
-} ompt_init_mode_t; 
- 
-OMPT_API_FUNCTION(int, ompt_set_callback, ( 
-    ompt_event_t event, 
-    ompt_callback_t callback 
-)); 
- 
-typedef enum ompt_set_callback_rc_e {  /* non-standard */ 
-    ompt_set_callback_error      = 0, 
-    ompt_has_event_no_callback   = 1, 
-    ompt_no_event_no_callback    = 2, 
-    ompt_has_event_may_callback  = 3, 
-    ompt_has_event_must_callback = 4, 
-} ompt_set_callback_rc_t; 
- 
- 
-OMPT_API_FUNCTION(int, ompt_get_callback, ( 
-    ompt_event_t event, 
-    ompt_callback_t *callback 
-)); 
- 
- 
- 
-/**************************************************************************** 
- * MISCELLANEOUS FUNCTIONS 
- ***************************************************************************/ 
- 
-/* control */ 
-#if defined(_OPENMP) && (_OPENMP >= 201307) 
-#pragma omp declare target 
-#endif 
-void ompt_control( 
-    uint64_t command, 
-    uint64_t modifier 
-); 
-#if defined(_OPENMP) && (_OPENMP >= 201307) 
-#pragma omp end declare target 
-#endif 
- 
-/* state enumeration */ 
-OMPT_API_FUNCTION(int, ompt_enumerate_state, ( 
-    int current_state, 
-    int *next_state, 
-    const char **next_state_name 
-)); 
- 
-#ifdef  __cplusplus 
-}; 
-#endif 
- 
-#endif 
- 
+/*
+ * include/41/ompt.h.var
+ */
+
+#ifndef __OMPT__
+#define __OMPT__
+
+/*****************************************************************************
+ * system include files
+ *****************************************************************************/
+
+#include <stdint.h>
+
+
+
+/*****************************************************************************
+ * iteration macros
+ *****************************************************************************/
+
+#define FOREACH_OMPT_INQUIRY_FN(macro)  \
+    macro (ompt_enumerate_state)        \
+                                        \
+    macro (ompt_set_callback)           \
+    macro (ompt_get_callback)           \
+                                        \
+    macro (ompt_get_idle_frame)         \
+    macro (ompt_get_task_frame)         \
+                                        \
+    macro (ompt_get_state)              \
+                                        \
+    macro (ompt_get_parallel_id)        \
+    macro (ompt_get_parallel_team_size) \
+    macro (ompt_get_task_id)            \
+    macro (ompt_get_thread_id)
+
+#define FOREACH_OMPT_PLACEHOLDER_FN(macro)  \
+    macro (ompt_idle)                       \
+    macro (ompt_overhead)                   \
+    macro (ompt_barrier_wait)               \
+    macro (ompt_task_wait)                  \
+    macro (ompt_mutex_wait)
+
+#define FOREACH_OMPT_STATE(macro)                                                               \
+                                                                                                \
+    /* first */                                                                                 \
+    macro (ompt_state_first, 0x71)          /* initial enumeration state */                     \
+                                                                                                \
+    /* work states (0..15) */                                                                   \
+    macro (ompt_state_work_serial, 0x00)    /* working outside parallel */                      \
+    macro (ompt_state_work_parallel, 0x01)  /* working within parallel */                       \
+    macro (ompt_state_work_reduction, 0x02) /* performing a reduction */                        \
+                                                                                                \
+    /* idle (16..31) */                                                                         \
+    macro (ompt_state_idle, 0x10)            /* waiting for work */                             \
+                                                                                                \
+    /* overhead states (32..63) */                                                              \
+    macro (ompt_state_overhead, 0x20)        /* overhead excluding wait states */               \
+                                                                                                \
+    /* barrier wait states (64..79) */                                                          \
+    macro (ompt_state_wait_barrier, 0x40)    /* waiting at a barrier */                         \
+    macro (ompt_state_wait_barrier_implicit, 0x41)    /* implicit barrier */                    \
+    macro (ompt_state_wait_barrier_explicit, 0x42)    /* explicit barrier */                    \
+                                                                                                \
+    /* task wait states (80..95) */                                                             \
+    macro (ompt_state_wait_taskwait, 0x50)   /* waiting at a taskwait */                        \
+    macro (ompt_state_wait_taskgroup, 0x51)  /* waiting at a taskgroup */                       \
+                                                                                                \
+    /* mutex wait states (96..111) */                                                           \
+    macro (ompt_state_wait_lock, 0x60)       /* waiting for lock */                             \
+    macro (ompt_state_wait_nest_lock, 0x61)  /* waiting for nest lock */                        \
+    macro (ompt_state_wait_critical, 0x62)   /* waiting for critical */                         \
+    macro (ompt_state_wait_atomic, 0x63)     /* waiting for atomic */                           \
+    macro (ompt_state_wait_ordered, 0x64)    /* waiting for ordered */                          \
+    macro (ompt_state_wait_single, 0x6F)     /* waiting for single region (non-standard!) */    \
+                                                                                                \
+    /* misc (112..127) */                                                                       \
+    macro (ompt_state_undefined, 0x70)       /* undefined thread state */
+
+
+#define FOREACH_OMPT_EVENT(macro)                                                                               \
+                                                                                                                \
+    /*--- Mandatory Events ---*/                                                                                \
+    macro (ompt_event_parallel_begin,           ompt_new_parallel_callback_t,   1) /* parallel begin */         \
+    macro (ompt_event_parallel_end,             ompt_end_parallel_callback_t,   2) /* parallel end */           \
+                                                                                                                \
+    macro (ompt_event_task_begin,               ompt_new_task_callback_t,       3) /* task begin */             \
+    macro (ompt_event_task_end,                 ompt_task_callback_t,           4) /* task destroy */           \
+                                                                                                                \
+    macro (ompt_event_thread_begin,             ompt_thread_type_callback_t,    5) /* thread begin */           \
+    macro (ompt_event_thread_end,               ompt_thread_type_callback_t,    6) /* thread end */             \
+                                                                                                                \
+    macro (ompt_event_control,                  ompt_control_callback_t,        7) /* support control calls */  \
+                                                                                                                \
+    macro (ompt_event_runtime_shutdown,         ompt_callback_t,                8) /* runtime shutdown */       \
+                                                                                                                \
+    /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/                                      \
+    macro (ompt_event_idle_begin,               ompt_thread_callback_t,         9) /* begin idle state */       \
+    macro (ompt_event_idle_end,                 ompt_thread_callback_t,        10) /* end idle state */         \
+                                                                                                                \
+    macro (ompt_event_wait_barrier_begin,       ompt_parallel_callback_t,      11) /* begin wait at barrier */  \
+    macro (ompt_event_wait_barrier_end,         ompt_parallel_callback_t,      12) /* end wait at barrier */    \
+                                                                                                                \
+    macro (ompt_event_wait_taskwait_begin,      ompt_parallel_callback_t,      13) /* begin wait at taskwait */ \
+    macro (ompt_event_wait_taskwait_end,        ompt_parallel_callback_t,      14) /* end wait at taskwait */   \
+                                                                                                                \
+    macro (ompt_event_wait_taskgroup_begin,     ompt_parallel_callback_t,      15) /* begin wait at taskgroup */\
+    macro (ompt_event_wait_taskgroup_end,       ompt_parallel_callback_t,      16) /* end wait at taskgroup */  \
+                                                                                                                \
+    macro (ompt_event_release_lock,             ompt_wait_callback_t,          17) /* lock release */           \
+    macro (ompt_event_release_nest_lock_last,   ompt_wait_callback_t,          18) /* last nest lock release */ \
+    macro (ompt_event_release_critical,         ompt_wait_callback_t,          19) /* critical release */       \
+                                                                                                                \
+    macro (ompt_event_release_atomic,           ompt_wait_callback_t,          20) /* atomic release */         \
+                                                                                                                \
+    macro (ompt_event_release_ordered,          ompt_wait_callback_t,          21) /* ordered release */        \
+                                                                                                                \
+    /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */                                 \
+    macro (ompt_event_implicit_task_begin,      ompt_parallel_callback_t,      22) /* implicit task begin   */  \
+    macro (ompt_event_implicit_task_end,        ompt_parallel_callback_t,      23) /* implicit task end  */     \
+                                                                                                                \
+    macro (ompt_event_initial_task_begin,       ompt_parallel_callback_t,      24) /* initial task begin   */   \
+    macro (ompt_event_initial_task_end,         ompt_parallel_callback_t,      25) /* initial task end  */      \
+                                                                                                                \
+    macro (ompt_event_task_switch,              ompt_task_pair_callback_t,     26) /* task switch */            \
+                                                                                                                \
+    macro (ompt_event_loop_begin,               ompt_new_workshare_callback_t, 27) /* task at loop begin */     \
+    macro (ompt_event_loop_end,                 ompt_parallel_callback_t,      28) /* task at loop end */       \
+                                                                                                                \
+    macro (ompt_event_sections_begin,           ompt_new_workshare_callback_t, 29) /* task at sections begin  */\
+    macro (ompt_event_sections_end,             ompt_parallel_callback_t,      30) /* task at sections end */   \
+                                                                                                                \
+    macro (ompt_event_single_in_block_begin,    ompt_new_workshare_callback_t, 31) /* task at single begin*/    \
+    macro (ompt_event_single_in_block_end,      ompt_parallel_callback_t,      32) /* task at single end */     \
+                                                                                                                \
+    macro (ompt_event_single_others_begin,      ompt_parallel_callback_t,      33) /* task at single begin */   \
+    macro (ompt_event_single_others_end,        ompt_parallel_callback_t,      34) /* task at single end */     \
+                                                                                                                \
+    macro (ompt_event_workshare_begin,          ompt_new_workshare_callback_t, 35) /* task at workshare begin */\
+    macro (ompt_event_workshare_end,            ompt_parallel_callback_t,      36) /* task at workshare end */  \
+                                                                                                                \
+    macro (ompt_event_master_begin,             ompt_parallel_callback_t,      37) /* task at master begin */   \
+    macro (ompt_event_master_end,               ompt_parallel_callback_t,      38) /* task at master end */     \
+                                                                                                                \
+    macro (ompt_event_barrier_begin,            ompt_parallel_callback_t,      39) /* task at barrier begin  */ \
+    macro (ompt_event_barrier_end,              ompt_parallel_callback_t,      40) /* task at barrier end */    \
+                                                                                                                \
+    macro (ompt_event_taskwait_begin,           ompt_parallel_callback_t,      41) /* task at taskwait begin */ \
+    macro (ompt_event_taskwait_end,             ompt_parallel_callback_t,      42) /* task at task wait end */  \
+                                                                                                                \
+    macro (ompt_event_taskgroup_begin,          ompt_parallel_callback_t,      43) /* task at taskgroup begin */\
+    macro (ompt_event_taskgroup_end,            ompt_parallel_callback_t,      44) /* task at taskgroup end */  \
+                                                                                                                \
+    macro (ompt_event_release_nest_lock_prev,   ompt_wait_callback_t,          45) /* prev nest lock release */ \
+                                                                                                                \
+    macro (ompt_event_wait_lock,                ompt_wait_callback_t,          46) /* lock wait */              \
+    macro (ompt_event_wait_nest_lock,           ompt_wait_callback_t,          47) /* nest lock wait */         \
+    macro (ompt_event_wait_critical,            ompt_wait_callback_t,          48) /* critical wait */          \
+    macro (ompt_event_wait_atomic,              ompt_wait_callback_t,          49) /* atomic wait */            \
+    macro (ompt_event_wait_ordered,             ompt_wait_callback_t,          50) /* ordered wait */           \
+                                                                                                                \
+    macro (ompt_event_acquired_lock,            ompt_wait_callback_t,          51) /* lock acquired */          \
+    macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t,          52) /* 1st nest lock acquired */ \
+    macro (ompt_event_acquired_nest_lock_next,  ompt_wait_callback_t,          53) /* next nest lock acquired*/ \
+    macro (ompt_event_acquired_critical,        ompt_wait_callback_t,          54) /* critical acquired */      \
+    macro (ompt_event_acquired_atomic,          ompt_wait_callback_t,          55) /* atomic acquired */        \
+    macro (ompt_event_acquired_ordered,         ompt_wait_callback_t,          56) /* ordered acquired */       \
+                                                                                                                \
+    macro (ompt_event_init_lock,                ompt_wait_callback_t,          57) /* lock init */              \
+    macro (ompt_event_init_nest_lock,           ompt_wait_callback_t,          58) /* nest lock init */         \
+                                                                                                                \
+    macro (ompt_event_destroy_lock,             ompt_wait_callback_t,          59) /* lock destruction */       \
+    macro (ompt_event_destroy_nest_lock,        ompt_wait_callback_t,          60) /* nest lock destruction */  \
+                                                                                                                \
+    macro (ompt_event_flush,                    ompt_callback_t,               61) /* after executing flush */
+
+
+
+/*****************************************************************************
+ * data types
+ *****************************************************************************/
+
+/*---------------------
+ * identifiers
+ *---------------------*/
+
+typedef uint64_t ompt_thread_id_t;
+#define ompt_thread_id_none ((ompt_thread_id_t) 0)     /* non-standard */
+
+typedef uint64_t ompt_task_id_t;
+#define ompt_task_id_none ((ompt_task_id_t) 0)         /* non-standard */
+
+typedef uint64_t ompt_parallel_id_t;
+#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */
+
+typedef uint64_t ompt_wait_id_t;
+#define ompt_wait_id_none ((ompt_wait_id_t) 0)         /* non-standard */
+
+
+/*---------------------
+ * ompt_frame_t
+ *---------------------*/
+
+typedef struct ompt_frame_s {
+    void *exit_runtime_frame;    /* next frame is user code     */
+    void *reenter_runtime_frame; /* previous frame is user code */
+} ompt_frame_t;
+
+
+/*****************************************************************************
+ * enumerations for thread states and runtime events
+ *****************************************************************************/
+
+/*---------------------
+ * runtime states
+ *---------------------*/
+
+typedef enum {
+#define ompt_state_macro(state, code) state = code,
+    FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
+} ompt_state_t;
+
+
+/*---------------------
+ * runtime events
+ *---------------------*/
+
+typedef enum {
+#define ompt_event_macro(event, callback, eventid) event = eventid,
+    FOREACH_OMPT_EVENT(ompt_event_macro)
+#undef ompt_event_macro
+} ompt_event_t;
+
+
+/*---------------------
+ * set callback results
+ *---------------------*/
+typedef enum {
+    ompt_set_result_registration_error              = 0,
+    ompt_set_result_event_may_occur_no_callback     = 1,
+    ompt_set_result_event_never_occurs              = 2,
+    ompt_set_result_event_may_occur_callback_some   = 3,
+    ompt_set_result_event_may_occur_callback_always = 4,
+} ompt_set_result_t;
+
+
+
+/*****************************************************************************
+ * callback signatures
+ *****************************************************************************/
+
+/* initialization */
+typedef void (*ompt_interface_fn_t)(void);
+
+typedef ompt_interface_fn_t (*ompt_function_lookup_t)(
+    const char *                      /* entry point to look up       */
+);
+
+/* threads */
+typedef void (*ompt_thread_callback_t) (
+    ompt_thread_id_t thread_id        /* ID of thread                 */
+);
+
+typedef enum {
+    ompt_thread_initial = 1, // start the enumeration at 1
+    ompt_thread_worker  = 2,
+    ompt_thread_other   = 3
+} ompt_thread_type_t;
+
+typedef enum {
+    ompt_invoker_program = 0,         /* program invokes master task  */
+    ompt_invoker_runtime = 1          /* runtime invokes master task  */
+} ompt_invoker_t;
+
+typedef void (*ompt_thread_type_callback_t) (
+    ompt_thread_type_t thread_type,   /* type of thread               */
+    ompt_thread_id_t thread_id        /* ID of thread                 */
+);
+
+typedef void (*ompt_wait_callback_t) (
+    ompt_wait_id_t wait_id            /* wait id                      */
+);
+
+/* parallel and workshares */
+typedef void (*ompt_parallel_callback_t) (
+    ompt_parallel_id_t parallel_id,    /* id of parallel region       */
+    ompt_task_id_t task_id             /* id of task                  */
+);
+
+typedef void (*ompt_new_workshare_callback_t) (
+    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    void *workshare_function          /* pointer to outlined function */
+);
+
+typedef void (*ompt_new_parallel_callback_t) (
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    ompt_frame_t *parent_task_frame,  /* frame data of parent task    */
+    ompt_parallel_id_t parallel_id,   /* id of parallel region        */
+    uint32_t requested_team_size,     /* number of threads in team    */
+    void *parallel_function,          /* pointer to outlined function */
+    ompt_invoker_t invoker            /* who invokes master task?    */ 
+);
+
+typedef void (*ompt_end_parallel_callback_t) (
+    ompt_parallel_id_t parallel_id,   /* id of parallel region       */
+    ompt_task_id_t task_id,           /* id of task                  */
+    ompt_invoker_t invoker            /* who invokes master task?    */ 
+);
+
+/* tasks */
+typedef void (*ompt_task_callback_t) (
+    ompt_task_id_t task_id            /* id of task                   */
+);
+
+typedef void (*ompt_task_pair_callback_t) (
+    ompt_task_id_t first_task_id,
+    ompt_task_id_t second_task_id
+);
+
+typedef void (*ompt_new_task_callback_t) (
+    ompt_task_id_t parent_task_id,    /* id of parent task            */
+    ompt_frame_t *parent_task_frame,  /* frame data for parent task   */
+    ompt_task_id_t  new_task_id,      /* id of created task           */
+    void *task_function               /* pointer to outlined function */
+);
+
+/* program */
+typedef void (*ompt_control_callback_t) (
+    uint64_t command,                 /* command of control call      */
+    uint64_t modifier                 /* modifier of control call     */
+);
+
+typedef void (*ompt_callback_t)(void);
+
+
+/****************************************************************************
+ * ompt API
+ ***************************************************************************/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#define OMPT_API_FNTYPE(fn) fn##_t
+
+#define OMPT_API_FUNCTION(return_type, fn, args)  \
+    typedef return_type (*OMPT_API_FNTYPE(fn)) args
+
+
+
+/****************************************************************************
+ * INQUIRY FUNCTIONS
+ ***************************************************************************/
+
+/* state */
+OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
+    ompt_wait_id_t *ompt_wait_id
+));
+
+/* thread */
+OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void));
+
+OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void));
+
+/* parallel region */
+OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, (
+    int ancestor_level
+));
+
+OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, (
+    int ancestor_level
+));
+
+/* task */
+OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, (
+    int depth
+));
+
+OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, (
+    int depth
+));
+
+
+
+/****************************************************************************
+ * PLACEHOLDERS FOR PERFORMANCE REPORTING
+ ***************************************************************************/
+
+/* idle */
+OMPT_API_FUNCTION(void, ompt_idle, (
+    void
+));
+
+/* overhead */
+OMPT_API_FUNCTION(void, ompt_overhead, (
+    void
+));
+
+/* barrier wait */
+OMPT_API_FUNCTION(void, ompt_barrier_wait, (
+    void
+));
+
+/* task wait */
+OMPT_API_FUNCTION(void, ompt_task_wait, (
+    void
+));
+
+/* mutex wait */
+OMPT_API_FUNCTION(void, ompt_mutex_wait, (
+    void
+));
+
+
+
+/****************************************************************************
+ * INITIALIZATION FUNCTIONS
+ ***************************************************************************/
+
+OMPT_API_FUNCTION(void, ompt_initialize, (
+    ompt_function_lookup_t ompt_fn_lookup,
+    const char *runtime_version,
+    unsigned int ompt_version
+));
+
+
+/* initialization interface to be defined by tool */
+ompt_initialize_t ompt_tool(void);
+
+typedef enum opt_init_mode_e {
+    ompt_init_mode_never  = 0,
+    ompt_init_mode_false  = 1,
+    ompt_init_mode_true   = 2,
+    ompt_init_mode_always = 3
+} ompt_init_mode_t;
+
+OMPT_API_FUNCTION(int, ompt_set_callback, (
+    ompt_event_t event,
+    ompt_callback_t callback
+));
+
+typedef enum ompt_set_callback_rc_e {  /* non-standard */
+    ompt_set_callback_error      = 0,
+    ompt_has_event_no_callback   = 1,
+    ompt_no_event_no_callback    = 2,
+    ompt_has_event_may_callback  = 3,
+    ompt_has_event_must_callback = 4,
+} ompt_set_callback_rc_t;
+
+
+OMPT_API_FUNCTION(int, ompt_get_callback, (
+    ompt_event_t event,
+    ompt_callback_t *callback
+));
+
+
+
+/****************************************************************************
+ * MISCELLANEOUS FUNCTIONS
+ ***************************************************************************/
+
+/* control */
+#if defined(_OPENMP) && (_OPENMP >= 201307)
+#pragma omp declare target
+#endif
+void ompt_control(
+    uint64_t command,
+    uint64_t modifier
+);
+#if defined(_OPENMP) && (_OPENMP >= 201307)
+#pragma omp end declare target
+#endif
+
+/* state enumeration */
+OMPT_API_FUNCTION(int, ompt_enumerate_state, (
+    int current_state,
+    int *next_state,
+    const char **next_state_name
+));
+
+#ifdef  __cplusplus
+};
+#endif
+
+#endif
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp.h b/contrib/libs/cxxsupp/openmp/kmp.h
index 4bc2611c71..66ebf6cbdb 100644
--- a/contrib/libs/cxxsupp/openmp/kmp.h
+++ b/contrib/libs/cxxsupp/openmp/kmp.h
@@ -1,3558 +1,3558 @@
-/*! \file */ 
-/* 
- * kmp.h -- KPTS runtime header file. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_H 
-#define KMP_H 
- 
-#include "kmp_config.h" 
- 
-/* #define BUILD_PARALLEL_ORDERED 1 */ 
- 
-/* This fix replaces gettimeofday with clock_gettime for better scalability on 
-   the Altix.  Requires user code to be linked with -lrt. 
-*/ 
-//#define FIX_SGI_CLOCK 
- 
-/* Defines for OpenMP 3.0 tasking and auto scheduling */ 
- 
-# ifndef KMP_STATIC_STEAL_ENABLED 
-#  define KMP_STATIC_STEAL_ENABLED 1 
-# endif 
- 
-#define TASK_CURRENT_NOT_QUEUED  0 
-#define TASK_CURRENT_QUEUED      1 
- 
-#define TASK_DEQUE_BITS          8  // Used solely to define TASK_DEQUE_SIZE and TASK_DEQUE_MASK. 
-#define TASK_DEQUE_SIZE          ( 1 << TASK_DEQUE_BITS ) 
-#define TASK_DEQUE_MASK          ( TASK_DEQUE_SIZE - 1 ) 
- 
-#ifdef BUILD_TIED_TASK_STACK 
-#define TASK_STACK_EMPTY         0  // entries when the stack is empty 
- 
-#define TASK_STACK_BLOCK_BITS    5  // Used to define TASK_STACK_SIZE and TASK_STACK_MASK 
-#define TASK_STACK_BLOCK_SIZE    ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array 
-#define TASK_STACK_INDEX_MASK    ( TASK_STACK_BLOCK_SIZE - 1 )  // Mask for determining index into stack block 
-#endif // BUILD_TIED_TASK_STACK 
- 
-#define TASK_NOT_PUSHED          1 
-#define TASK_SUCCESSFULLY_PUSHED 0 
-#define TASK_TIED                1 
-#define TASK_UNTIED              0 
-#define TASK_EXPLICIT            1 
-#define TASK_IMPLICIT            0 
-#define TASK_PROXY               1 
-#define TASK_FULL                0 
- 
-#define KMP_CANCEL_THREADS 
-#define KMP_THREAD_ATTR 
- 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <stddef.h> 
-#include <stdarg.h> 
-#include <string.h> 
-#include <signal.h> 
-/*  include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library  */ 
-/*  some macros provided below to replace some of these functions  */ 
-#ifndef __ABSOFT_WIN 
-#include <sys/types.h> 
-#endif 
-#include <limits.h> 
-#include <time.h> 
- 
-#include <errno.h> 
- 
-#include "kmp_os.h" 
- 
-#include "kmp_safe_c_api.h" 
- 
-#if KMP_STATS_ENABLED 
-class kmp_stats_list; 
-#endif 
- 
-#if KMP_USE_HWLOC 
-#include "hwloc.h" 
-extern hwloc_topology_t __kmp_hwloc_topology; 
-extern int __kmp_hwloc_error; 
-#endif 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-#include <xmmintrin.h> 
-#endif 
- 
-#include "kmp_version.h" 
-#include "kmp_debug.h" 
-#include "kmp_lock.h" 
-#if USE_DEBUGGER 
-#include "kmp_debugger.h" 
-#endif 
-#include "kmp_i18n.h" 
- 
-#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) 
- 
-#include "kmp_wrapper_malloc.h" 
-#if KMP_OS_UNIX 
-# include <unistd.h> 
-# if !defined NSIG && defined _NSIG 
-#  define NSIG _NSIG 
-# endif 
-#endif 
- 
-#if KMP_OS_LINUX 
-# pragma weak clock_gettime 
-#endif 
- 
-#if OMPT_SUPPORT 
-#include "ompt-internal.h" 
-#endif 
- 
-/*Select data placement in NUMA memory */ 
-#define NO_FIRST_TOUCH 0 
-#define FIRST_TOUCH 1       /* Exploit SGI's first touch page placement algo */ 
- 
-/* If not specified on compile command line, assume no first touch */ 
-#ifndef BUILD_MEMORY 
-#define BUILD_MEMORY NO_FIRST_TOUCH 
-#endif 
- 
-// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. 
-// 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size. 
-#ifndef USE_FAST_MEMORY 
-#define USE_FAST_MEMORY 3 
-#endif 
- 
-#ifndef KMP_NESTED_HOT_TEAMS 
-# define KMP_NESTED_HOT_TEAMS 0 
-# define USE_NESTED_HOT_ARG(x) 
-#else 
-# if KMP_NESTED_HOT_TEAMS 
-#  if OMP_40_ENABLED 
-#   define USE_NESTED_HOT_ARG(x) ,x 
-#  else 
-// Nested hot teams feature depends on omp 4.0, disable it for earlier versions 
-#   undef KMP_NESTED_HOT_TEAMS 
-#   define KMP_NESTED_HOT_TEAMS 0 
-#   define USE_NESTED_HOT_ARG(x) 
-#  endif 
-# else 
-#  define USE_NESTED_HOT_ARG(x) 
-# endif 
-#endif 
- 
-// Assume using BGET compare_exchange instruction instead of lock by default. 
-#ifndef USE_CMP_XCHG_FOR_BGET 
-#define USE_CMP_XCHG_FOR_BGET 1 
-#endif 
- 
-// Test to see if queuing lock is better than bootstrap lock for bget 
-// #ifndef USE_QUEUING_LOCK_FOR_BGET 
-// #define USE_QUEUING_LOCK_FOR_BGET 
-// #endif 
- 
-#define KMP_NSEC_PER_SEC 1000000000L 
-#define KMP_USEC_PER_SEC 1000000L 
- 
-/*! 
-@ingroup BASIC_TYPES 
-@{ 
-*/ 
- 
-// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...) 
-/*! 
-Values for bit flags used in the ident_t to describe the fields. 
-*/ 
-/*! Use trampoline for internal microtasks */ 
-#define KMP_IDENT_IMB             0x01 
-/*! Use c-style ident structure */ 
-#define KMP_IDENT_KMPC            0x02 
-/* 0x04 is no longer used */ 
-/*! Entry point generated by auto-parallelization */ 
-#define KMP_IDENT_AUTOPAR         0x08 
-/*! Compiler generates atomic reduction option for kmpc_reduce* */ 
-#define KMP_IDENT_ATOMIC_REDUCE   0x10 
-/*! To mark a 'barrier' directive in user code */ 
-#define KMP_IDENT_BARRIER_EXPL    0x20 
-/*! To Mark implicit barriers. */ 
-#define KMP_IDENT_BARRIER_IMPL           0x0040 
-#define KMP_IDENT_BARRIER_IMPL_MASK      0x01C0 
-#define KMP_IDENT_BARRIER_IMPL_FOR       0x0040 
-#define KMP_IDENT_BARRIER_IMPL_SECTIONS  0x00C0 
- 
-#define KMP_IDENT_BARRIER_IMPL_SINGLE    0x0140 
-#define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 
- 
-/*! 
- * The ident structure that describes a source location. 
- */ 
-typedef struct ident { 
-    kmp_int32 reserved_1;   /**<  might be used in Fortran; see above  */ 
-    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member  */ 
-    kmp_int32 reserved_2;   /**<  not really used in Fortran any more; see above */ 
-#if USE_ITT_BUILD 
-                            /*  but currently used for storing region-specific ITT */ 
-                            /*  contextual information. */ 
-#endif /* USE_ITT_BUILD */ 
-    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for C++  */ 
-    char const *psource;    /**< String describing the source location. 
-                            The string is composed of semi-colon separated fields which describe the source file, 
-                            the function and a pair of line numbers that delimit the construct. 
-                             */ 
-} ident_t; 
-/*! 
-@} 
-*/ 
- 
-// Some forward declarations. 
- 
-typedef union  kmp_team      kmp_team_t; 
-typedef struct kmp_taskdata  kmp_taskdata_t; 
-typedef union  kmp_task_team kmp_task_team_t; 
-typedef union  kmp_team      kmp_team_p; 
-typedef union  kmp_info      kmp_info_p; 
-typedef union  kmp_root      kmp_root_p; 
- 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Pack two 32-bit signed integers into a 64-bit signed integer */ 
-/* ToDo: Fix word ordering for big-endian machines. */ 
-#define KMP_PACK_64(HIGH_32,LOW_32) \ 
-    ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) ) 
- 
- 
-/* 
- * Generic string manipulation macros. 
- * Assume that _x is of type char * 
- */ 
-#define SKIP_WS(_x)     { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } 
-#define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } 
-#define SKIP_TO(_x,_c)  { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) 
-#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-/* Enumeration types */ 
- 
-enum kmp_state_timer { 
-    ts_stop, 
-    ts_start, 
-    ts_pause, 
- 
-    ts_last_state 
-}; 
- 
-enum dynamic_mode { 
-    dynamic_default, 
-#ifdef USE_LOAD_BALANCE 
-    dynamic_load_balance, 
-#endif /* USE_LOAD_BALANCE */ 
-    dynamic_random, 
-    dynamic_thread_limit, 
-    dynamic_max 
-}; 
- 
-/* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */ 
-#ifndef KMP_SCHED_TYPE_DEFINED 
-#define KMP_SCHED_TYPE_DEFINED 
-typedef enum kmp_sched { 
-    kmp_sched_lower             = 0,     // lower and upper bounds are for routine parameter check 
-    // Note: need to adjust __kmp_sch_map global array in case this enum is changed 
-    kmp_sched_static            = 1,     // mapped to kmp_sch_static_chunked           (33) 
-    kmp_sched_dynamic           = 2,     // mapped to kmp_sch_dynamic_chunked          (35) 
-    kmp_sched_guided            = 3,     // mapped to kmp_sch_guided_chunked           (36) 
-    kmp_sched_auto              = 4,     // mapped to kmp_sch_auto                     (38) 
-    kmp_sched_upper_std         = 5,     // upper bound for standard schedules 
-    kmp_sched_lower_ext         = 100,   // lower bound of Intel extension schedules 
-    kmp_sched_trapezoidal       = 101,   // mapped to kmp_sch_trapezoidal              (39) 
-//  kmp_sched_static_steal      = 102,   // mapped to kmp_sch_static_steal             (44) 
-    kmp_sched_upper             = 102, 
-    kmp_sched_default = kmp_sched_static // default scheduling 
-} kmp_sched_t; 
-#endif 
- 
-/*! 
- @ingroup WORK_SHARING 
- * Describes the loop schedule to be used for a parallel for loop. 
- */ 
-enum sched_type { 
-    kmp_sch_lower                     = 32,   /**< lower bound for unordered values */ 
-    kmp_sch_static_chunked            = 33, 
-    kmp_sch_static                    = 34,   /**< static unspecialized */ 
-    kmp_sch_dynamic_chunked           = 35, 
-    kmp_sch_guided_chunked            = 36,   /**< guided unspecialized */ 
-    kmp_sch_runtime                   = 37, 
-    kmp_sch_auto                      = 38,   /**< auto */ 
-    kmp_sch_trapezoidal               = 39, 
- 
-    /* accessible only through KMP_SCHEDULE environment variable */ 
-    kmp_sch_static_greedy             = 40, 
-    kmp_sch_static_balanced           = 41, 
-    /* accessible only through KMP_SCHEDULE environment variable */ 
-    kmp_sch_guided_iterative_chunked  = 42, 
-    kmp_sch_guided_analytical_chunked = 43, 
- 
-    kmp_sch_static_steal              = 44,   /**< accessible only through KMP_SCHEDULE environment variable */ 
- 
-    /* accessible only through KMP_SCHEDULE environment variable */ 
-    kmp_sch_upper                     = 45,   /**< upper bound for unordered values */ 
- 
-    kmp_ord_lower                     = 64,   /**< lower bound for ordered values, must be power of 2 */ 
-    kmp_ord_static_chunked            = 65, 
-    kmp_ord_static                    = 66,   /**< ordered static unspecialized */ 
-    kmp_ord_dynamic_chunked           = 67, 
-    kmp_ord_guided_chunked            = 68, 
-    kmp_ord_runtime                   = 69, 
-    kmp_ord_auto                      = 70,   /**< ordered auto */ 
-    kmp_ord_trapezoidal               = 71, 
-    kmp_ord_upper                     = 72,   /**< upper bound for ordered values */ 
- 
-#if OMP_40_ENABLED 
-    /* Schedules for Distribute construct */ 
-    kmp_distribute_static_chunked     = 91,   /**< distribute static chunked */ 
-    kmp_distribute_static             = 92,   /**< distribute static unspecialized */ 
-#endif 
- 
-    /* 
-     * For the "nomerge" versions, kmp_dispatch_next*() will always return 
-     * a single iteration/chunk, even if the loop is serialized.  For the 
-     * schedule types listed above, the entire iteration vector is returned 
-     * if the loop is serialized.  This doesn't work for gcc/gcomp sections. 
-     */ 
-    kmp_nm_lower                      = 160,  /**< lower bound for nomerge values */ 
- 
-    kmp_nm_static_chunked             = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), 
-    kmp_nm_static                     = 162,  /**< static unspecialized */ 
-    kmp_nm_dynamic_chunked            = 163, 
-    kmp_nm_guided_chunked             = 164,  /**< guided unspecialized */ 
-    kmp_nm_runtime                    = 165, 
-    kmp_nm_auto                       = 166,  /**< auto */ 
-    kmp_nm_trapezoidal                = 167, 
- 
-    /* accessible only through KMP_SCHEDULE environment variable */ 
-    kmp_nm_static_greedy              = 168, 
-    kmp_nm_static_balanced            = 169, 
-    /* accessible only through KMP_SCHEDULE environment variable */ 
-    kmp_nm_guided_iterative_chunked   = 170, 
-    kmp_nm_guided_analytical_chunked  = 171, 
-    kmp_nm_static_steal               = 172,  /* accessible only through OMP_SCHEDULE environment variable */ 
- 
-    kmp_nm_ord_static_chunked         = 193, 
-    kmp_nm_ord_static                 = 194,  /**< ordered static unspecialized */ 
-    kmp_nm_ord_dynamic_chunked        = 195, 
-    kmp_nm_ord_guided_chunked         = 196, 
-    kmp_nm_ord_runtime                = 197, 
-    kmp_nm_ord_auto                   = 198,  /**< auto */ 
-    kmp_nm_ord_trapezoidal            = 199, 
-    kmp_nm_upper                      = 200,  /**< upper bound for nomerge values */ 
- 
-    kmp_sch_default = kmp_sch_static  /**< default scheduling algorithm */ 
-}; 
- 
-/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ 
-typedef struct kmp_r_sched { 
-    enum sched_type r_sched_type; 
-    int             chunk; 
-} kmp_r_sched_t; 
- 
-extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types 
- 
-enum library_type { 
-    library_none, 
-    library_serial, 
-    library_turnaround, 
-    library_throughput 
-}; 
- 
-#if KMP_OS_LINUX 
-enum clock_function_type { 
-    clock_function_gettimeofday, 
-    clock_function_clock_gettime 
-}; 
-#endif /* KMP_OS_LINUX */ 
- 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-enum mic_type { 
-    non_mic, 
-    mic1, 
-    mic2, 
-    mic3, 
-    dummy 
-}; 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* -- fast reduction stuff ------------------------------------------------ */ 
- 
-#undef KMP_FAST_REDUCTION_BARRIER 
-#define KMP_FAST_REDUCTION_BARRIER 1 
- 
-#undef KMP_FAST_REDUCTION_CORE_DUO 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    #define KMP_FAST_REDUCTION_CORE_DUO 1 
-#endif 
- 
-enum _reduction_method { 
-    reduction_method_not_defined = 0, 
-    critical_reduce_block        = ( 1 << 8 ), 
-    atomic_reduce_block          = ( 2 << 8 ), 
-    tree_reduce_block            = ( 3 << 8 ), 
-    empty_reduce_block           = ( 4 << 8 ) 
-}; 
- 
-// description of the packed_reduction_method variable 
-// the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte: 
-// 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier 
-// 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction; 
-// reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty, 
-// so no need to execute a shift instruction while packing/unpacking 
- 
-#if KMP_FAST_REDUCTION_BARRIER 
-    #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ 
-            ( ( reduction_method ) | ( barrier_type ) ) 
- 
-    #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 
-            ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) ) 
- 
-    #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ 
-            ( ( enum barrier_type )(      ( packed_reduction_method ) & ( 0x000000FF ) ) ) 
-#else 
-    #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ 
-            ( reduction_method ) 
- 
-    #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 
-            ( packed_reduction_method ) 
- 
-    #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ 
-            ( bs_plain_barrier ) 
-#endif 
- 
-#define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \ 
-            ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) ) 
- 
-#if KMP_FAST_REDUCTION_BARRIER 
-    #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ 
-            ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) ) 
- 
-    #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ 
-            ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) ) 
-#endif 
- 
-typedef int PACKED_REDUCTION_METHOD_T; 
- 
-/* -- end of fast reduction stuff ----------------------------------------- */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_OS_WINDOWS 
-# define USE_CBLKDATA 
-# pragma warning( push ) 
-# pragma warning( disable: 271 310 ) 
-# include <windows.h> 
-# pragma warning( pop ) 
-#endif 
- 
-#if KMP_OS_UNIX 
-# include <pthread.h> 
-# include <dlfcn.h> 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * Only Linux* OS and Windows* OS support thread affinity. 
- */ 
-#if KMP_AFFINITY_SUPPORTED 
- 
-extern size_t __kmp_affin_mask_size; 
-# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) 
-# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) 
-# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) 
-# define KMP_CPU_SETSIZE        (__kmp_affin_mask_size * CHAR_BIT) 
- 
-#if KMP_USE_HWLOC 
- 
-typedef hwloc_cpuset_t kmp_affin_mask_t; 
-# define KMP_CPU_SET(i,mask)       hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i) 
-# define KMP_CPU_ISSET(i,mask)     hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i) 
-# define KMP_CPU_CLR(i,mask)       hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i) 
-# define KMP_CPU_ZERO(mask)        hwloc_bitmap_zero((hwloc_cpuset_t)mask) 
-# define KMP_CPU_COPY(dest, src)   hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) 
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ 
-    { \ 
-        unsigned i; \ 
-        for(i=0;i<(unsigned)max_bit_number+1;i++) { \ 
-            if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \ 
-                hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \ 
-            } else { \ 
-                hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \ 
-            } \ 
-        } \ 
-    } \ 
- 
-# define KMP_CPU_UNION(dest, src)  hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) 
-# define KMP_CPU_SET_ITERATE(i,mask) \ 
-    for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i)) 
- 
-# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc() 
-# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr); 
-# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) 
-# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) 
-# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) 
-# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) 
- 
-// 
-// The following macro should be used to index an array of masks. 
-// The array should be declared as "kmp_affinity_t *" and allocated with 
-// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact 
-// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but 
-// on Linux* OS, sizeof(kmp_affin_t) is 1. 
-// 
-# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i])) 
-# define KMP_CPU_ALLOC_ARRAY(arr, n) {                                   \ 
-    arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \ 
-    unsigned i;                                                           \ 
-    for(i=0;i<(unsigned)n;i++) {                                          \ 
-        arr[i] = hwloc_bitmap_alloc();                                    \ 
-    }                                                                     \ 
-   } 
-# define KMP_CPU_FREE_ARRAY(arr, n) { \ 
-    unsigned i;                        \ 
-    for(i=0;i<(unsigned)n;i++) {       \ 
-        hwloc_bitmap_free(arr[i]);     \ 
-    }                                  \ 
-    __kmp_free(arr);                   \ 
-   } 
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) {                               \ 
-    arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \ 
-    unsigned i;                                                                \ 
-    for(i=0;i<(unsigned)n;i++) {                                               \ 
-        arr[i] = hwloc_bitmap_alloc();                                         \ 
-    }                                                                          \ 
-   } 
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \ 
-    unsigned i;                                 \ 
-    for(i=0;i<(unsigned)n;i++) {                \ 
-        hwloc_bitmap_free(arr[i]);              \ 
-    }                                           \ 
-    KMP_INTERNAL_FREE(arr);                     \ 
-   } 
- 
-#else /* KMP_USE_HWLOC */ 
-#  define KMP_CPU_SET_ITERATE(i,mask) \ 
-    for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) 
- 
-# if KMP_OS_LINUX 
-// 
-// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size 
-// (in bytes).  It should be allocated on a word boundary. 
-// 
-// WARNING!!!  We have made the base type of the affinity mask unsigned char, 
-// in order to eliminate a lot of checks that the true system mask size is 
-// really a multiple of 4 bytes (on Linux* OS). 
-// 
-// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!! 
-// 
- 
-typedef unsigned char kmp_affin_mask_t; 
- 
-#  define _KMP_CPU_SET(i,mask)   (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT))) 
-#  define KMP_CPU_SET(i,mask)    _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) 
-#  define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))) 
-#  define KMP_CPU_ISSET(i,mask)  _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) 
-#  define _KMP_CPU_CLR(i,mask)   (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT))) 
-#  define KMP_CPU_CLR(i,mask)    _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) 
- 
-#  define KMP_CPU_ZERO(mask) \ 
-        {                                                                    \ 
-            size_t __i;                                                      \ 
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \ 
-                ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \ 
-            }                                                                \ 
-        } 
- 
-#  define KMP_CPU_COPY(dest, src) \ 
-        {                                                                    \ 
-            size_t __i;                                                      \ 
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \ 
-                ((kmp_affin_mask_t *)(dest))[__i]                            \ 
-                  = ((kmp_affin_mask_t *)(src))[__i];                        \ 
-            }                                                                \ 
-        } 
- 
-#  define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ 
-        {                                                                    \ 
-            size_t __i;                                                      \ 
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \ 
-                ((kmp_affin_mask_t *)(mask))[__i]                            \ 
-                  = ~((kmp_affin_mask_t *)(mask))[__i];                      \ 
-            }                                                                \ 
-        } 
- 
-#  define KMP_CPU_UNION(dest, src) \ 
-        {                                                                    \ 
-            size_t __i;                                                      \ 
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \ 
-                ((kmp_affin_mask_t *)(dest))[__i]                            \ 
-                  |= ((kmp_affin_mask_t *)(src))[__i];                       \ 
-            }                                                                \ 
-        } 
- 
-# endif /* KMP_OS_LINUX */ 
- 
-# if KMP_OS_WINDOWS 
-// 
-// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on 
-// Intel(R) 64 it is 8 bytes times the number of processor groups. 
-// 
- 
-#  if KMP_GROUP_AFFINITY 
- 
-// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). 
-#   if _MSC_VER < 1600 
-typedef struct GROUP_AFFINITY { 
-    KAFFINITY Mask; 
-    WORD Group; 
-    WORD Reserved[3]; 
-} GROUP_AFFINITY; 
-#   endif 
- 
-typedef DWORD_PTR kmp_affin_mask_t; 
- 
-extern int __kmp_num_proc_groups; 
- 
-#   define _KMP_CPU_SET(i,mask) \ 
-        (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |=                    \ 
-        (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) 
- 
-#   define KMP_CPU_SET(i,mask) \ 
-        _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) 
- 
-#   define _KMP_CPU_ISSET(i,mask) \ 
-        (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &                  \ 
-        (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))) 
- 
-#   define KMP_CPU_ISSET(i,mask) \ 
-        _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) 
- 
-#   define _KMP_CPU_CLR(i,mask) \ 
-        (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &=                    \ 
-        ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) 
- 
-#   define KMP_CPU_CLR(i,mask) \ 
-        _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) 
- 
-#   define KMP_CPU_ZERO(mask) \ 
-        {                                                                    \ 
-            int __i;                                                         \ 
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \ 
-                ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \ 
-            }                                                                \ 
-        } 
- 
-#   define KMP_CPU_COPY(dest, src) \ 
-        {                                                                    \ 
-            int __i;                                                         \ 
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \ 
-                ((kmp_affin_mask_t *)(dest))[__i]                            \ 
-                  = ((kmp_affin_mask_t *)(src))[__i];                        \ 
-            }                                                                \ 
-        } 
- 
-#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ 
-        {                                                                    \ 
-            int __i;                                                         \ 
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \ 
-                ((kmp_affin_mask_t *)(mask))[__i]                            \ 
-                  = ~((kmp_affin_mask_t *)(mask))[__i];                      \ 
-            }                                                                \ 
-        } 
- 
-#   define KMP_CPU_UNION(dest, src) \ 
-        {                                                                    \ 
-            int __i;                                                         \ 
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \ 
-                ((kmp_affin_mask_t *)(dest))[__i]                            \ 
-                  |= ((kmp_affin_mask_t *)(src))[__i];                       \ 
-            }                                                                \ 
-        } 
- 
-typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); 
-extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; 
- 
-typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); 
-extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; 
- 
-typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); 
-extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; 
- 
-typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); 
-extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; 
- 
-extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); 
- 
-#  else /* KMP_GROUP_AFFINITY */ 
- 
-typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */ 
- 
-#   define KMP_CPU_SET(i,mask)      (*(mask) |= (((kmp_affin_mask_t)1) << (i))) 
-#   define KMP_CPU_ISSET(i,mask)    (!!(*(mask) & (((kmp_affin_mask_t)1) << (i)))) 
-#   define KMP_CPU_CLR(i,mask)      (*(mask) &= ~(((kmp_affin_mask_t)1) << (i))) 
-#   define KMP_CPU_ZERO(mask)       (*(mask) = 0) 
-#   define KMP_CPU_COPY(dest, src)  (*(dest) = *(src)) 
-#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)) 
-#   define KMP_CPU_UNION(dest, src) (*(dest) |= *(src)) 
- 
-#  endif /* KMP_GROUP_AFFINITY */ 
- 
-# endif /* KMP_OS_WINDOWS */ 
- 
-// 
-// __kmp_allocate() will return memory allocated on a 4-bytes boundary. 
-// after zeroing it - it takes care of those assumptions stated above. 
-// 
-# define KMP_CPU_ALLOC(ptr) \ 
-        (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size))) 
-# define KMP_CPU_FREE(ptr) __kmp_free(ptr) 
-# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size))) 
-# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */ 
-# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size))) 
-# define KMP_CPU_INTERNAL_FREE(ptr)  KMP_INTERNAL_FREE(ptr) 
- 
-// 
-// The following macro should be used to index an array of masks. 
-// The array should be declared as "kmp_affinity_t *" and allocated with 
-// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact 
-// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but 
-// on Linux* OS, sizeof(kmp_affin_t) is 1. 
-// 
-# define KMP_CPU_INDEX(array,i) \ 
-        ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size)) 
-# define KMP_CPU_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size) 
-# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr); 
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size) 
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr); 
- 
-#endif /* KMP_USE_HWLOC */ 
- 
-// 
-// Declare local char buffers with this size for printing debug and info 
-// messages, using __kmp_affinity_print_mask(). 
-// 
-#define KMP_AFFIN_MASK_PRINT_LEN        1024 
- 
-enum affinity_type { 
-    affinity_none = 0, 
-    affinity_physical, 
-    affinity_logical, 
-    affinity_compact, 
-    affinity_scatter, 
-    affinity_explicit, 
-    affinity_balanced, 
-    affinity_disabled,  // not used outsize the env var parser 
-    affinity_default 
-}; 
- 
-enum affinity_gran { 
-    affinity_gran_fine = 0, 
-    affinity_gran_thread, 
-    affinity_gran_core, 
-    affinity_gran_package, 
-    affinity_gran_node, 
-#if KMP_GROUP_AFFINITY 
-    // 
-    // The "group" granularity isn't necesssarily coarser than all of the 
-    // other levels, but we put it last in the enum. 
-    // 
-    affinity_gran_group, 
-#endif /* KMP_GROUP_AFFINITY */ 
-    affinity_gran_default 
-}; 
- 
-enum affinity_top_method { 
-    affinity_top_method_all = 0, // try all (supported) methods, in order 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    affinity_top_method_apicid, 
-    affinity_top_method_x2apicid, 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
-    affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too 
-#if KMP_GROUP_AFFINITY 
-    affinity_top_method_group, 
-#endif /* KMP_GROUP_AFFINITY */ 
-    affinity_top_method_flat, 
-#if KMP_USE_HWLOC 
-    affinity_top_method_hwloc, 
-#endif 
-    affinity_top_method_default 
-}; 
- 
-#define affinity_respect_mask_default   (-1) 
- 
-extern enum affinity_type __kmp_affinity_type; /* Affinity type */ 
-extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ 
-extern int __kmp_affinity_gran_levels; /* corresponding int value */ 
-extern int __kmp_affinity_dups; /* Affinity duplicate masks */ 
-extern enum affinity_top_method __kmp_affinity_top_method; 
-extern int __kmp_affinity_compact; /* Affinity 'compact' value */ 
-extern int __kmp_affinity_offset; /* Affinity offset value  */ 
-extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ 
-extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ 
-extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */ 
-extern char * __kmp_affinity_proclist; /* proc ID list */ 
-extern kmp_affin_mask_t *__kmp_affinity_masks; 
-extern unsigned __kmp_affinity_num_masks; 
-extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error); 
-extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error); 
-extern void __kmp_affinity_bind_thread(int which); 
- 
-# if KMP_OS_LINUX 
-extern kmp_affin_mask_t *__kmp_affinity_get_fullMask(); 
-# endif /* KMP_OS_LINUX */ 
-extern char const * __kmp_cpuinfo_file; 
- 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-#if OMP_40_ENABLED 
- 
-// 
-// This needs to be kept in sync with the values in omp.h !!! 
-// 
-typedef enum kmp_proc_bind_t { 
-    proc_bind_false = 0, 
-    proc_bind_true, 
-    proc_bind_master, 
-    proc_bind_close, 
-    proc_bind_spread, 
-    proc_bind_intel,    // use KMP_AFFINITY interface 
-    proc_bind_default 
-} kmp_proc_bind_t; 
- 
-typedef struct kmp_nested_proc_bind_t { 
-    kmp_proc_bind_t *bind_types; 
-    int size; 
-    int used; 
-} kmp_nested_proc_bind_t; 
- 
-extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; 
- 
-#endif /* OMP_40_ENABLED */ 
- 
-# if KMP_AFFINITY_SUPPORTED 
-#  define KMP_PLACE_ALL       (-1) 
-#  define KMP_PLACE_UNDEFINED (-2) 
-# endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-extern int __kmp_affinity_num_places; 
- 
- 
-#if OMP_40_ENABLED 
-typedef enum kmp_cancel_kind_t { 
-    cancel_noreq = 0, 
-    cancel_parallel = 1, 
-    cancel_loop = 2, 
-    cancel_sections = 3, 
-    cancel_taskgroup = 4 
-} kmp_cancel_kind_t; 
-#endif // OMP_40_ENABLED 
- 
-extern int __kmp_place_num_sockets; 
-extern int __kmp_place_socket_offset; 
-extern int __kmp_place_num_cores; 
-extern int __kmp_place_core_offset; 
-extern int __kmp_place_num_threads_per_core; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#define KMP_PAD(type, sz)     (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 
- 
-// 
-// We need to avoid using -1 as a GTID as +1 is added to the gtid 
-// when storing it in a lock, and the value 0 is reserved. 
-// 
-#define KMP_GTID_DNE            (-2)    /* Does not exist */ 
-#define KMP_GTID_SHUTDOWN       (-3)    /* Library is shutting down */ 
-#define KMP_GTID_MONITOR        (-4)    /* Monitor thread ID */ 
-#define KMP_GTID_UNKNOWN        (-5)    /* Is not known */ 
-#define KMP_GTID_MIN            (-6)    /* Minimal gtid for low bound check in DEBUG */ 
- 
-#define __kmp_get_gtid()               __kmp_get_global_thread_id() 
-#define __kmp_entry_gtid()             __kmp_get_global_thread_id_reg() 
- 
-#define __kmp_tid_from_gtid(gtid)     ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ 
-                                        __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid ) 
- 
-#define __kmp_get_tid()               ( __kmp_tid_from_gtid( __kmp_get_gtid() ) ) 
-#define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \ 
-                                        team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid ) 
- 
-#define __kmp_get_team()              ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team ) 
-#define __kmp_team_from_gtid(gtid)    ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ 
-                                        __kmp_threads[ (gtid) ]-> th.th_team ) 
- 
-#define __kmp_thread_from_gtid(gtid)  ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] ) 
-#define __kmp_get_thread()            ( __kmp_thread_from_gtid( __kmp_get_gtid() ) ) 
- 
-    // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works 
-    // with registered and not-yet-registered threads. 
-#define __kmp_gtid_from_thread(thr)   ( KMP_DEBUG_ASSERT( (thr) != NULL ), \ 
-                                        (thr)->th.th_info.ds.ds_gtid ) 
- 
-// AT: Which way is correct? 
-// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; 
-// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; 
-#define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc ) 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#define KMP_UINT64_MAX         (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1))) 
- 
-#define KMP_MIN_NTH           1 
- 
-#ifndef KMP_MAX_NTH 
-#  if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX 
-#    define KMP_MAX_NTH          PTHREAD_THREADS_MAX 
-#  else 
-#    define KMP_MAX_NTH          INT_MAX 
-#  endif 
-#endif /* KMP_MAX_NTH */ 
- 
-#ifdef PTHREAD_STACK_MIN 
-# define KMP_MIN_STKSIZE         PTHREAD_STACK_MIN 
-#else 
-# define KMP_MIN_STKSIZE         ((size_t)(32 * 1024)) 
-#endif 
- 
-#define KMP_MAX_STKSIZE          (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) 
- 
-#if KMP_ARCH_X86 
-# define KMP_DEFAULT_STKSIZE     ((size_t)(2 * 1024 * 1024)) 
-#elif KMP_ARCH_X86_64 
-# define KMP_DEFAULT_STKSIZE     ((size_t)(4 * 1024 * 1024)) 
-# define KMP_BACKUP_STKSIZE      ((size_t)(2 * 1024 * 1024)) 
-#else 
-# define KMP_DEFAULT_STKSIZE     ((size_t)(1024 * 1024)) 
-#endif 
- 
-#define KMP_DEFAULT_MONITOR_STKSIZE     ((size_t)(64 * 1024)) 
- 
-#define KMP_DEFAULT_MALLOC_POOL_INCR    ((size_t) (1024 * 1024)) 
-#define KMP_MIN_MALLOC_POOL_INCR        ((size_t) (4 * 1024)) 
-#define KMP_MAX_MALLOC_POOL_INCR        (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) 
- 
-#define KMP_MIN_STKOFFSET       (0) 
-#define KMP_MAX_STKOFFSET       KMP_MAX_STKSIZE 
-#if KMP_OS_DARWIN 
-# define KMP_DEFAULT_STKOFFSET  KMP_MIN_STKOFFSET 
-#else 
-# define KMP_DEFAULT_STKOFFSET  CACHE_LINE 
-#endif 
- 
-#define KMP_MIN_STKPADDING      (0) 
-#define KMP_MAX_STKPADDING      (2 * 1024 * 1024) 
- 
-#define KMP_MIN_MONITOR_WAKEUPS      (1)       /* min number of times monitor wakes up per second */ 
-#define KMP_MAX_MONITOR_WAKEUPS      (1000)    /* maximum number of times monitor can wake up per second */ 
-#define KMP_BLOCKTIME_MULTIPLIER     (1000)    /* number of blocktime units per second */ 
-#define KMP_MIN_BLOCKTIME            (0) 
-#define KMP_MAX_BLOCKTIME            (INT_MAX) /* Must be this for "infinite" setting the work */ 
-#define KMP_DEFAULT_BLOCKTIME        (200)     /*  __kmp_blocktime is in milliseconds  */ 
-/* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */ 
-/* Only allow increasing number of wakeups */ 
-#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 
-                                 ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \ 
-                                   ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \ 
-                                   ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \ 
-                                       (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) ) 
- 
-/* Calculate number of intervals for a specific block time based on monitor_wakeups */ 
-#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups)  \ 
-                                 ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) /  \ 
-                                   (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) ) 
- 
-#define KMP_MIN_STATSCOLS       40 
-#define KMP_MAX_STATSCOLS       4096 
-#define KMP_DEFAULT_STATSCOLS   80 
- 
-#define KMP_MIN_INTERVAL        0 
-#define KMP_MAX_INTERVAL        (INT_MAX-1) 
-#define KMP_DEFAULT_INTERVAL    0 
- 
-#define KMP_MIN_CHUNK           1 
-#define KMP_MAX_CHUNK           (INT_MAX-1) 
-#define KMP_DEFAULT_CHUNK       1 
- 
-#define KMP_MIN_INIT_WAIT       1 
-#define KMP_MAX_INIT_WAIT       (INT_MAX/2) 
-#define KMP_DEFAULT_INIT_WAIT   2048U 
- 
-#define KMP_MIN_NEXT_WAIT       1 
-#define KMP_MAX_NEXT_WAIT       (INT_MAX/2) 
-#define KMP_DEFAULT_NEXT_WAIT   1024U 
- 
-// max possible dynamic loops in concurrent execution per team 
-#define KMP_MAX_DISP_BUF        7 
-#define KMP_MAX_ORDERED         8 
- 
-#define KMP_MAX_FIELDS          32 
- 
-#define KMP_MAX_BRANCH_BITS     31 
- 
-#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX 
- 
-/* Minimum number of threads before switch to TLS gtid (experimentally determined) */ 
-/* josh TODO: what about OS X* tuning? */ 
-#if   KMP_ARCH_X86 || KMP_ARCH_X86_64 
-# define KMP_TLS_GTID_MIN     5 
-#else 
-# define KMP_TLS_GTID_MIN     INT_MAX 
-#endif 
- 
-#define KMP_MASTER_TID(tid)      ( (tid) == 0 ) 
-#define KMP_WORKER_TID(tid)      ( (tid) != 0 ) 
- 
-#define KMP_MASTER_GTID(gtid)    ( __kmp_tid_from_gtid((gtid)) == 0 ) 
-#define KMP_WORKER_GTID(gtid)    ( __kmp_tid_from_gtid((gtid)) != 0 ) 
-#define KMP_UBER_GTID(gtid)                                           \ 
-    (                                                                 \ 
-        KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ),                   \ 
-        KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ),          \ 
-        (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \ 
-        (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\ 
-    ) 
-#define KMP_INITIAL_GTID(gtid)   ( (gtid) == 0 ) 
- 
-#ifndef TRUE 
-#define FALSE   0 
-#define TRUE    (! FALSE) 
-#endif 
- 
-/* NOTE: all of the following constants must be even */ 
- 
-#if KMP_OS_WINDOWS 
-#  define KMP_INIT_WAIT    64U          /* initial number of spin-tests   */ 
-#  define KMP_NEXT_WAIT    32U          /* susequent number of spin-tests */ 
-#elif KMP_OS_CNK 
-#  define KMP_INIT_WAIT    16U          /* initial number of spin-tests   */ 
-#  define KMP_NEXT_WAIT     8U          /* susequent number of spin-tests */ 
-#elif KMP_OS_LINUX 
-#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */ 
-#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */ 
-#elif KMP_OS_DARWIN 
-/* TODO: tune for KMP_OS_DARWIN */ 
-#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */ 
-#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */ 
-#elif KMP_OS_FREEBSD 
-/* TODO: tune for KMP_OS_FREEBSD */ 
-#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */ 
-#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */ 
-#elif KMP_OS_NETBSD 
-/* TODO: tune for KMP_OS_NETBSD */ 
-#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */ 
-#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */ 
-#endif 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-typedef struct kmp_cpuid { 
-    kmp_uint32  eax; 
-    kmp_uint32  ebx; 
-    kmp_uint32  ecx; 
-    kmp_uint32  edx; 
-} kmp_cpuid_t; 
-extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 
-# if KMP_ARCH_X86 
-  extern void __kmp_x86_pause( void ); 
-# elif KMP_MIC 
-  static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); }; 
-# else 
-  static void __kmp_x86_pause( void ) { _mm_pause(); }; 
-# endif 
-# define KMP_CPU_PAUSE() __kmp_x86_pause() 
-#elif KMP_ARCH_PPC64 
-# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1") 
-# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2") 
-# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory") 
-# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0) 
-#else 
-# define KMP_CPU_PAUSE()        /* nothing to do */ 
-#endif 
- 
-#define KMP_INIT_YIELD(count)           { (count) = __kmp_yield_init; } 
- 
-#define KMP_YIELD(cond)                 { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); } 
- 
-// Note the decrement of 2 in the following Macros.  With KMP_LIBRARY=turnaround, 
-// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd. 
- 
-#define KMP_YIELD_WHEN(cond,count)      { KMP_CPU_PAUSE(); (count) -= 2; \ 
-                                                if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } } 
-#define KMP_YIELD_SPIN(count)           { KMP_CPU_PAUSE(); (count) -=2; \ 
-                                                if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } } 
- 
-/* ------------------------------------------------------------------------ */ 
-/* Support datatypes for the orphaned construct nesting checks.             */ 
-/* ------------------------------------------------------------------------ */ 
- 
-enum cons_type { 
-    ct_none, 
-    ct_parallel, 
-    ct_pdo, 
-    ct_pdo_ordered, 
-    ct_psections, 
-    ct_psingle, 
- 
-    /* the following must be left in order and not split up */ 
-    ct_taskq, 
-    ct_task,                    /* really task inside non-ordered taskq, considered a worksharing type */ 
-    ct_task_ordered,            /* really task inside ordered taskq, considered a worksharing type */ 
-    /* the preceding must be left in order and not split up */ 
- 
-    ct_critical, 
-    ct_ordered_in_parallel, 
-    ct_ordered_in_pdo, 
-    ct_ordered_in_taskq, 
-    ct_master, 
-    ct_reduce, 
-    ct_barrier 
-}; 
- 
-/* test to see if we are in a taskq construct */ 
-# define IS_CONS_TYPE_TASKQ( ct )       ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) ) 
-# define IS_CONS_TYPE_ORDERED( ct )     ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) 
- 
-struct cons_data { 
-    ident_t const     *ident; 
-    enum cons_type     type; 
-    int                prev; 
-    kmp_user_lock_p    name;    /* address exclusively for critical section name comparison */ 
-}; 
- 
-struct cons_header { 
-    int                 p_top, w_top, s_top; 
-    int                 stack_size, stack_top; 
-    struct cons_data   *stack_data; 
-}; 
- 
-struct kmp_region_info { 
-    char                *text; 
-    int                 offset[KMP_MAX_FIELDS]; 
-    int                 length[KMP_MAX_FIELDS]; 
-}; 
- 
- 
-/* ---------------------------------------------------------------------- */ 
-/* ---------------------------------------------------------------------- */ 
- 
-#if KMP_OS_WINDOWS 
-    typedef HANDLE              kmp_thread_t; 
-    typedef DWORD               kmp_key_t; 
-#endif /* KMP_OS_WINDOWS */ 
- 
-#if KMP_OS_UNIX 
-    typedef pthread_t           kmp_thread_t; 
-    typedef pthread_key_t       kmp_key_t; 
-#endif 
- 
-extern kmp_key_t  __kmp_gtid_threadprivate_key; 
- 
-typedef struct kmp_sys_info { 
-    long maxrss;          /* the maximum resident set size utilized (in kilobytes)     */ 
-    long minflt;          /* the number of page faults serviced without any I/O        */ 
-    long majflt;          /* the number of page faults serviced that required I/O      */ 
-    long nswap;           /* the number of times a process was "swapped" out of memory */ 
-    long inblock;         /* the number of times the file system had to perform input  */ 
-    long oublock;         /* the number of times the file system had to perform output */ 
-    long nvcsw;           /* the number of times a context switch was voluntarily      */ 
-    long nivcsw;          /* the number of times a context switch was forced           */ 
-} kmp_sys_info_t; 
- 
-typedef struct kmp_cpuinfo { 
-    int        initialized;  // If 0, other fields are not initialized. 
-    int        signature;    // CPUID(1).EAX 
-    int        family;       // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family ) 
-    int        model;        // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model) 
-    int        stepping;     // CPUID(1).EAX[3:0] ( Stepping ) 
-    int        sse2;         // 0 if SSE2 instructions are not supported, 1 otherwise. 
-    int        rtm;          // 0 if RTM instructions are not supported, 1 otherwise. 
-    int        cpu_stackoffset; 
-    int        apic_id; 
-    int        physical_id; 
-    int        logical_id; 
-    kmp_uint64 frequency;    // Nominal CPU frequency in Hz. 
-} kmp_cpuinfo_t; 
- 
- 
-#ifdef BUILD_TV 
- 
-struct tv_threadprivate { 
-    /* Record type #1 */ 
-    void        *global_addr; 
-    void        *thread_addr; 
-}; 
- 
-struct tv_data { 
-    struct tv_data      *next; 
-    void                *type; 
-    union tv_union { 
-        struct tv_threadprivate tp; 
-    } u; 
-}; 
- 
-extern kmp_key_t __kmp_tv_key; 
- 
-#endif /* BUILD_TV */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#if USE_ITT_BUILD 
-// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here. 
-// Later we will check the type meets requirements. 
-typedef int kmp_itt_mark_t; 
-#define KMP_ITT_DEBUG 0 
-#endif /* USE_ITT_BUILD */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * Taskq data structures 
- */ 
- 
-#define HIGH_WATER_MARK(nslots)         (((nslots) * 3) / 4) 
-#define __KMP_TASKQ_THUNKS_PER_TH        1      /* num thunks that each thread can simultaneously execute from a task queue */ 
- 
-/*  flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags  */ 
- 
-#define TQF_IS_ORDERED          0x0001  /*  __kmpc_taskq interface, taskq ordered  */ 
-#define TQF_IS_LASTPRIVATE      0x0002  /*  __kmpc_taskq interface, taskq with lastprivate list  */ 
-#define TQF_IS_NOWAIT           0x0004  /*  __kmpc_taskq interface, end taskq nowait  */ 
-#define TQF_HEURISTICS          0x0008  /*  __kmpc_taskq interface, use heuristics to decide task queue size  */ 
-#define TQF_INTERFACE_RESERVED1 0x0010  /*  __kmpc_taskq interface, reserved for future use  */ 
-#define TQF_INTERFACE_RESERVED2 0x0020  /*  __kmpc_taskq interface, reserved for future use  */ 
-#define TQF_INTERFACE_RESERVED3 0x0040  /*  __kmpc_taskq interface, reserved for future use  */ 
-#define TQF_INTERFACE_RESERVED4 0x0080  /*  __kmpc_taskq interface, reserved for future use  */ 
- 
-#define TQF_INTERFACE_FLAGS     0x00ff  /*  all the __kmpc_taskq interface flags  */ 
- 
-#define TQF_IS_LAST_TASK        0x0100  /*  internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE  */ 
-#define TQF_TASKQ_TASK          0x0200  /*  internal use only; this thunk->th_task is the taskq_task  */ 
-#define TQF_RELEASE_WORKERS     0x0400  /*  internal use only; must release worker threads once ANY queued task exists (global) */ 
-#define TQF_ALL_TASKS_QUEUED    0x0800  /*  internal use only; notify workers that master has finished enqueuing tasks */ 
-#define TQF_PARALLEL_CONTEXT    0x1000  /*  internal use only: this queue encountered in a parallel context: not serialized */ 
-#define TQF_DEALLOCATED         0x2000  /*  internal use only; this queue is on the freelist and not in use */ 
- 
-#define TQF_INTERNAL_FLAGS      0x3f00  /*  all the internal use only flags  */ 
- 
-typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t { 
-    kmp_int32                      ai_data; 
-} kmpc_aligned_int32_t; 
- 
-typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t { 
-    struct kmpc_thunk_t   *qs_thunk; 
-} kmpc_aligned_queue_slot_t; 
- 
-typedef struct kmpc_task_queue_t { 
-        /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */ 
-    kmp_lock_t                    tq_link_lck;          /*  lock for child link, child next/prev links and child ref counts */ 
-    union { 
-        struct kmpc_task_queue_t *tq_parent;            /*  pointer to parent taskq, not locked */ 
-        struct kmpc_task_queue_t *tq_next_free;         /*  for taskq internal freelists, locked with global taskq_freelist_lck */ 
-    } tq; 
-    volatile struct kmpc_task_queue_t *tq_first_child;  /*  pointer to linked-list of children, locked by tq's tq_link_lck */ 
-    struct kmpc_task_queue_t     *tq_next_child;        /*  next child in linked-list, locked by parent tq's tq_link_lck */ 
-    struct kmpc_task_queue_t     *tq_prev_child;        /*  previous child in linked-list, locked by parent tq's tq_link_lck */ 
-    volatile kmp_int32            tq_ref_count;         /*  reference count of threads with access to this task queue */ 
-                                                        /*  (other than the thread executing the kmpc_end_taskq call) */ 
-                                                        /*  locked by parent tq's tq_link_lck */ 
- 
-        /* shared data for task queue */ 
-    struct kmpc_aligned_shared_vars_t    *tq_shareds;   /*  per-thread array of pointers to shared variable structures */ 
-                                                        /*  only one array element exists for all but outermost taskq */ 
- 
-        /* bookkeeping for ordered task queue */ 
-    kmp_uint32                    tq_tasknum_queuing;   /*  ordered task number assigned while queuing tasks */ 
-    volatile kmp_uint32           tq_tasknum_serving;   /*  ordered number of next task to be served (executed) */ 
- 
-        /* thunk storage management for task queue */ 
-    kmp_lock_t                    tq_free_thunks_lck;   /*  lock for thunk freelist manipulation */ 
-    struct kmpc_thunk_t          *tq_free_thunks;       /*  thunk freelist, chained via th.th_next_free  */ 
-    struct kmpc_thunk_t          *tq_thunk_space;       /*  space allocated for thunks for this task queue  */ 
- 
-        /* data fields for queue itself */ 
-    kmp_lock_t                    tq_queue_lck;         /*  lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */ 
-    kmpc_aligned_queue_slot_t    *tq_queue;             /*  array of queue slots to hold thunks for tasks */ 
-    volatile struct kmpc_thunk_t *tq_taskq_slot;        /*  special slot for taskq task thunk, occupied if not NULL  */ 
-    kmp_int32                     tq_nslots;            /*  # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space)  */ 
-    kmp_int32                     tq_head;              /*  enqueue puts next item in here (index into tq_queue array) */ 
-    kmp_int32                     tq_tail;              /*  dequeue takes next item out of here (index into tq_queue array) */ 
-    volatile kmp_int32            tq_nfull;             /*  # of occupied entries in task queue right now  */ 
-    kmp_int32                     tq_hiwat;             /*  high-water mark for tq_nfull and queue scheduling  */ 
-    volatile kmp_int32            tq_flags;             /*  TQF_xxx  */ 
- 
-        /* bookkeeping for outstanding thunks */ 
-    struct kmpc_aligned_int32_t  *tq_th_thunks;         /*  per-thread array for # of regular thunks currently being executed */ 
-    kmp_int32                     tq_nproc;             /*  number of thunks in the th_thunks array */ 
- 
-        /* statistics library bookkeeping */ 
-    ident_t                       *tq_loc;              /*  source location information for taskq directive */ 
-} kmpc_task_queue_t; 
- 
-typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk); 
- 
-/*  sizeof_shareds passed as arg to __kmpc_taskq call  */ 
-typedef struct kmpc_shared_vars_t {             /*  aligned during dynamic allocation */ 
-    kmpc_task_queue_t         *sv_queue; 
-    /*  (pointers to) shared vars  */ 
-} kmpc_shared_vars_t; 
- 
-typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t { 
-    volatile struct kmpc_shared_vars_t     *ai_data; 
-} kmpc_aligned_shared_vars_t; 
- 
-/*  sizeof_thunk passed as arg to kmpc_taskq call  */ 
-typedef struct kmpc_thunk_t {                   /*  aligned during dynamic allocation */ 
-    union {                                     /*  field used for internal freelists too  */ 
-        kmpc_shared_vars_t  *th_shareds; 
-        struct kmpc_thunk_t *th_next_free;      /*  freelist of individual thunks within queue, head at tq_free_thunks  */ 
-    } th; 
-    kmpc_task_t th_task;                        /*  taskq_task if flags & TQF_TASKQ_TASK  */ 
-    struct kmpc_thunk_t *th_encl_thunk;         /*  pointer to dynamically enclosing thunk on this thread's call stack */ 
-    kmp_int32 th_flags;                         /*  TQF_xxx (tq_flags interface plus possible internal flags)  */ 
-    kmp_int32 th_status; 
-    kmp_uint32 th_tasknum;                      /*  task number assigned in order of queuing, used for ordered sections */ 
-    /*  private vars  */ 
-} kmpc_thunk_t; 
- 
-typedef struct KMP_ALIGN_CACHE kmp_taskq { 
-    int                 tq_curr_thunk_capacity; 
- 
-    kmpc_task_queue_t  *tq_root; 
-    kmp_int32           tq_global_flags; 
- 
-    kmp_lock_t          tq_freelist_lck; 
-    kmpc_task_queue_t  *tq_freelist; 
- 
-    kmpc_thunk_t      **tq_curr_thunk; 
-} kmp_taskq_t; 
- 
-/* END Taskq data structures */ 
-/* --------------------------------------------------------------------------- */ 
- 
-typedef kmp_int32 kmp_critical_name[8]; 
- 
-/*! 
-@ingroup PARALLEL 
-The type for a microtask which gets passed to @ref __kmpc_fork_call(). 
-The arguments to the outlined function are 
-@param global_tid the global thread identity of the thread executing the function. 
-@param bound_tid  the local identitiy of the thread executing the function 
-@param ... pointers to shared variables accessed by the function. 
-*/ 
-typedef void (*kmpc_micro)              ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... ); 
-typedef void (*kmpc_micro_bound)        ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... ); 
- 
-/*! 
-@ingroup THREADPRIVATE 
-@{ 
-*/ 
-/* --------------------------------------------------------------------------- */ 
-/* Threadprivate initialization/finalization function declarations */ 
- 
-/*  for non-array objects:  __kmpc_threadprivate_register()  */ 
- 
-/*! 
- Pointer to the constructor function. 
- The first argument is the <tt>this</tt> pointer 
-*/ 
-typedef void *(*kmpc_ctor)    (void *); 
- 
-/*! 
- Pointer to the destructor function. 
- The first argument is the <tt>this</tt> pointer 
-*/ 
-typedef void (*kmpc_dtor)     (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */ 
-/*! 
- Pointer to an alternate constructor. 
- The first argument is the <tt>this</tt> pointer. 
-*/ 
-typedef void *(*kmpc_cctor)   (void *, void *); 
- 
-/*  for array objects: __kmpc_threadprivate_register_vec()  */ 
-                                /* First arg: "this" pointer */ 
-                                /* Last arg: number of array elements */ 
-/*! 
- Array constructor. 
- First argument is the <tt>this</tt> pointer 
- Second argument the number of array elements. 
-*/ 
-typedef void *(*kmpc_ctor_vec)  (void *, size_t); 
-/*! 
- Pointer to the array destructor function. 
- The first argument is the <tt>this</tt> pointer 
- Second argument the number of array elements. 
-*/ 
-typedef void (*kmpc_dtor_vec)   (void *, size_t); 
-/*! 
- Array constructor. 
- First argument is the <tt>this</tt> pointer 
- Third argument the number of array elements. 
-*/ 
-typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */ 
- 
-/*! 
-@} 
-*/ 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* keeps tracked of threadprivate cache allocations for cleanup later */ 
-typedef struct kmp_cached_addr { 
-    void                      **addr;           /* address of allocated cache */ 
-    struct kmp_cached_addr     *next;           /* pointer to next cached address */ 
-} kmp_cached_addr_t; 
- 
-struct private_data { 
-    struct private_data *next;          /* The next descriptor in the list      */ 
-    void                *data;          /* The data buffer for this descriptor  */ 
-    int                  more;          /* The repeat count for this descriptor */ 
-    size_t               size;          /* The data size for this descriptor    */ 
-}; 
- 
-struct private_common { 
-    struct private_common     *next; 
-    struct private_common     *link; 
-    void                      *gbl_addr; 
-    void                      *par_addr;        /* par_addr == gbl_addr for MASTER thread */ 
-    size_t                     cmn_size; 
-}; 
- 
-struct shared_common 
-{ 
-    struct shared_common      *next; 
-    struct private_data       *pod_init; 
-    void                      *obj_init; 
-    void                      *gbl_addr; 
-    union { 
-        kmpc_ctor              ctor; 
-        kmpc_ctor_vec          ctorv; 
-    } ct; 
-    union { 
-        kmpc_cctor             cctor; 
-        kmpc_cctor_vec         cctorv; 
-    } cct; 
-    union { 
-        kmpc_dtor              dtor; 
-        kmpc_dtor_vec          dtorv; 
-    } dt; 
-    size_t                     vec_len; 
-    int                        is_vec; 
-    size_t                     cmn_size; 
-}; 
- 
-#define KMP_HASH_TABLE_LOG2     9                               /* log2 of the hash table size */ 
-#define KMP_HASH_TABLE_SIZE     (1 << KMP_HASH_TABLE_LOG2)      /* size of the hash table */ 
-#define KMP_HASH_SHIFT          3                               /* throw away this many low bits from the address */ 
-#define KMP_HASH(x)             ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1)) 
- 
-struct common_table { 
-    struct  private_common      *data[ KMP_HASH_TABLE_SIZE ]; 
-}; 
- 
-struct shared_table { 
-    struct  shared_common       *data[ KMP_HASH_TABLE_SIZE ]; 
-}; 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef KMP_STATIC_STEAL_ENABLED 
-typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 
-    kmp_int32 count; 
-    kmp_int32 ub; 
-    /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 
-    kmp_int32 lb; 
-    kmp_int32 st; 
-    kmp_int32 tc; 
-    kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */ 
- 
-    // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) 
-    //    a) parm3 is properly aligned and 
-    //    b) all parm1-4 are in the same cache line. 
-    // Because of parm1-4 are used together, performance seems to be better 
-    // if they are in the same line (not measured though). 
- 
-    struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template 
-        kmp_int32 parm1;     //     structures in kmp_dispatch.cpp. This should 
-        kmp_int32 parm2;     //     make no real change at least while padding is off. 
-        kmp_int32 parm3; 
-        kmp_int32 parm4; 
-    }; 
- 
-    kmp_uint32 ordered_lower; 
-    kmp_uint32 ordered_upper; 
-#if KMP_OS_WINDOWS 
-    // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. 
-    // It would be nice to measure execution times. 
-    // Conditional if/endif can be removed at all. 
-    kmp_int32 last_upper; 
-#endif /* KMP_OS_WINDOWS */ 
-} dispatch_private_info32_t; 
- 
-typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 
-    kmp_int64 count;   /* current chunk number for static and static-steal scheduling*/ 
-    kmp_int64 ub;      /* upper-bound */ 
-    /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 
-    kmp_int64 lb;      /* lower-bound */ 
-    kmp_int64 st;      /* stride */ 
-    kmp_int64 tc;      /* trip count (number of iterations) */ 
-    kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */ 
- 
-    /* parm[1-4] are used in different ways by different scheduling algorithms */ 
- 
-    // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) 
-    //    a) parm3 is properly aligned and 
-    //    b) all parm1-4 are in the same cache line. 
-    // Because of parm1-4 are used together, performance seems to be better 
-    // if they are in the same line (not measured though). 
- 
-    struct KMP_ALIGN( 32 ) { 
-        kmp_int64 parm1; 
-        kmp_int64 parm2; 
-        kmp_int64 parm3; 
-        kmp_int64 parm4; 
-    }; 
- 
-    kmp_uint64 ordered_lower; 
-    kmp_uint64 ordered_upper; 
-#if KMP_OS_WINDOWS 
-    // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. 
-    // It would be nice to measure execution times. 
-    // Conditional if/endif can be removed at all. 
-    kmp_int64 last_upper; 
-#endif /* KMP_OS_WINDOWS */ 
-} dispatch_private_info64_t; 
-#else /* KMP_STATIC_STEAL_ENABLED */ 
-typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 
-    kmp_int32 lb; 
-    kmp_int32 ub; 
-    kmp_int32 st; 
-    kmp_int32 tc; 
- 
-    kmp_int32 parm1; 
-    kmp_int32 parm2; 
-    kmp_int32 parm3; 
-    kmp_int32 parm4; 
- 
-    kmp_int32 count; 
- 
-    kmp_uint32 ordered_lower; 
-    kmp_uint32 ordered_upper; 
-#if KMP_OS_WINDOWS 
-    kmp_int32 last_upper; 
-#endif /* KMP_OS_WINDOWS */ 
-} dispatch_private_info32_t; 
- 
-typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 
-    kmp_int64 lb;      /* lower-bound */ 
-    kmp_int64 ub;      /* upper-bound */ 
-    kmp_int64 st;      /* stride */ 
-    kmp_int64 tc;      /* trip count (number of iterations) */ 
- 
-    /* parm[1-4] are used in different ways by different scheduling algorithms */ 
-    kmp_int64 parm1; 
-    kmp_int64 parm2; 
-    kmp_int64 parm3; 
-    kmp_int64 parm4; 
- 
-    kmp_int64 count;   /* current chunk number for static scheduling */ 
- 
-    kmp_uint64 ordered_lower; 
-    kmp_uint64 ordered_upper; 
-#if KMP_OS_WINDOWS 
-    kmp_int64 last_upper; 
-#endif /* KMP_OS_WINDOWS */ 
-} dispatch_private_info64_t; 
-#endif /* KMP_STATIC_STEAL_ENABLED */ 
- 
-typedef struct KMP_ALIGN_CACHE dispatch_private_info { 
-    union private_info { 
-        dispatch_private_info32_t  p32; 
-        dispatch_private_info64_t  p64; 
-    } u; 
-    enum sched_type schedule;  /* scheduling algorithm */ 
-    kmp_int32       ordered;   /* ordered clause specified */ 
-    kmp_int32       ordered_bumped; 
-    kmp_int32   ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar 
-    struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */ 
-    kmp_int32       nomerge;   /* don't merge iters if serialized */ 
-    kmp_int32       type_size; /* the size of types in private_info */ 
-    enum cons_type  pushed_ws; 
-} dispatch_private_info_t; 
- 
-typedef struct dispatch_shared_info32 { 
-    /* chunk index under dynamic, number of idle threads under static-steal; 
-       iteration index otherwise */ 
-    volatile kmp_uint32      iteration; 
-    volatile kmp_uint32      num_done; 
-    volatile kmp_uint32      ordered_iteration; 
-    kmp_int32   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar 
-} dispatch_shared_info32_t; 
- 
-typedef struct dispatch_shared_info64 { 
-    /* chunk index under dynamic, number of idle threads under static-steal; 
-       iteration index otherwise */ 
-    volatile kmp_uint64      iteration; 
-    volatile kmp_uint64      num_done; 
-    volatile kmp_uint64      ordered_iteration; 
-    kmp_int64   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar 
-} dispatch_shared_info64_t; 
- 
-typedef struct dispatch_shared_info { 
-    union shared_info { 
-        dispatch_shared_info32_t  s32; 
-        dispatch_shared_info64_t  s64; 
-    } u; 
-/*    volatile kmp_int32      dispatch_abort;  depricated */ 
-    volatile kmp_uint32     buffer_index; 
-} dispatch_shared_info_t; 
- 
-typedef struct kmp_disp { 
-    /* Vector for ORDERED SECTION */ 
-    void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); 
-    /* Vector for END ORDERED SECTION */ 
-    void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); 
- 
-    dispatch_shared_info_t  *th_dispatch_sh_current; 
-    dispatch_private_info_t *th_dispatch_pr_current; 
- 
-    dispatch_private_info_t *th_disp_buffer; 
-    kmp_int32                th_disp_index; 
-    void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64 
-#if KMP_USE_INTERNODE_ALIGNMENT 
-    char more_padding[INTERNODE_CACHE_LINE]; 
-#endif 
-} kmp_disp_t; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Barrier stuff */ 
- 
-/* constants for barrier state update */ 
-#define KMP_INIT_BARRIER_STATE  0       /* should probably start from zero */ 
-#define KMP_BARRIER_SLEEP_BIT   0       /* bit used for suspend/sleep part of state */ 
-#define KMP_BARRIER_UNUSED_BIT  1       /* bit that must never be set for valid state */ 
-#define KMP_BARRIER_BUMP_BIT    2       /* lsb used for bump of go/arrived state */ 
- 
-#define KMP_BARRIER_SLEEP_STATE         ((kmp_uint) (1 << KMP_BARRIER_SLEEP_BIT)) 
-#define KMP_BARRIER_UNUSED_STATE        ((kmp_uint) (1 << KMP_BARRIER_UNUSED_BIT)) 
-#define KMP_BARRIER_STATE_BUMP          ((kmp_uint) (1 << KMP_BARRIER_BUMP_BIT)) 
- 
-#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) 
-# error "Barrier sleep bit must be smaller than barrier bump bit" 
-#endif 
-#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) 
-# error "Barrier unused bit must be smaller than barrier bump bit" 
-#endif 
- 
-// Constants for release barrier wait state: currently, hierarchical only 
-#define KMP_BARRIER_NOT_WAITING        0  // Normal state; worker not in wait_sleep 
-#define KMP_BARRIER_OWN_FLAG           1  // Normal state; worker waiting on own b_go flag in release 
-#define KMP_BARRIER_PARENT_FLAG        2  // Special state; worker waiting on parent's b_go flag in release 
-#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3  // Special state; tells worker to shift from parent to own b_go 
-#define KMP_BARRIER_SWITCHING          4  // Special state; worker resets appropriate flag on wake-up 
- 
-enum barrier_type { 
-    bs_plain_barrier = 0,       /* 0, All non-fork/join barriers (except reduction barriers if enabled) */ 
-    bs_forkjoin_barrier,        /* 1, All fork/join (parallel region) barriers */ 
-    #if KMP_FAST_REDUCTION_BARRIER 
-        bs_reduction_barrier,   /* 2, All barriers that are used in reduction */ 
-    #endif // KMP_FAST_REDUCTION_BARRIER 
-    bs_last_barrier             /* Just a placeholder to mark the end */ 
-}; 
- 
-// to work with reduction barriers just like with plain barriers 
-#if !KMP_FAST_REDUCTION_BARRIER 
-    #define bs_reduction_barrier bs_plain_barrier 
-#endif // KMP_FAST_REDUCTION_BARRIER 
- 
-typedef enum kmp_bar_pat {      /* Barrier communication patterns */ 
-    bp_linear_bar = 0,          /* Single level (degenerate) tree */ 
-    bp_tree_bar = 1,            /* Balanced tree with branching factor 2^n */ 
-    bp_hyper_bar = 2,           /* Hypercube-embedded tree with min branching factor 2^n */ 
-    bp_hierarchical_bar = 3,    /* Machine hierarchy tree */ 
-    bp_last_bar = 4             /* Placeholder to mark the end */ 
-} kmp_bar_pat_e; 
- 
-# define KMP_BARRIER_ICV_PUSH   1 
- 
-/* Record for holding the values of the internal controls stack records */ 
-typedef struct kmp_internal_control { 
-    int           serial_nesting_level;  /* corresponds to the value of the th_team_serialized field */ 
-    kmp_int8      nested;                /* internal control for nested parallelism (per thread) */ 
-    kmp_int8      dynamic;               /* internal control for dynamic adjustment of threads (per thread) */ 
-    kmp_int8      bt_set;                /* internal control for whether blocktime is explicitly set */ 
-    int           blocktime;             /* internal control for blocktime */ 
-    int           bt_intervals;          /* internal control for blocktime intervals */ 
-    int           nproc;                 /* internal control for #threads for next parallel region (per thread) */ 
-    int           max_active_levels;     /* internal control for max_active_levels */ 
-    kmp_r_sched_t sched;                 /* internal control for runtime schedule {sched,chunk} pair */ 
-#if OMP_40_ENABLED 
-    kmp_proc_bind_t proc_bind;           /* internal control for affinity  */ 
-#endif // OMP_40_ENABLED 
-    struct kmp_internal_control *next; 
-} kmp_internal_control_t; 
- 
-static inline void 
-copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) { 
-    *dst = *src; 
-} 
- 
-/* Thread barrier needs volatile barrier fields */ 
-typedef struct KMP_ALIGN_CACHE kmp_bstate { 
-    // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it). 
-    // It is not explicitly aligned below, because we *don't* want it to be padded -- instead, 
-    // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines 
-    // stores in the hierarchical barrier. 
-    kmp_internal_control_t th_fixed_icvs;          // Initial ICVs for the thread 
-    // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store 
-    volatile kmp_uint64 b_go;                      // STATE => task should proceed (hierarchical) 
-    KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point. 
-    kmp_uint32 *skip_per_level; 
-    kmp_uint32 my_level; 
-    kmp_int32 parent_tid; 
-    kmp_int32 old_tid; 
-    kmp_uint32 depth; 
-    struct kmp_bstate *parent_bar; 
-    kmp_team_t *team; 
-    kmp_uint64 leaf_state; 
-    kmp_uint32 nproc; 
-    kmp_uint8 base_leaf_kids; 
-    kmp_uint8 leaf_kids; 
-    kmp_uint8 offset; 
-    kmp_uint8 wait_flag; 
-    kmp_uint8 use_oncore_barrier; 
-#if USE_DEBUGGER 
-    // The following field is intended for the debugger solely. Only the worker thread itself accesses this 
-    // field: the worker increases it by 1 when it arrives to a barrier. 
-    KMP_ALIGN_CACHE kmp_uint b_worker_arrived; 
-#endif /* USE_DEBUGGER */ 
-} kmp_bstate_t; 
- 
-union KMP_ALIGN_CACHE kmp_barrier_union { 
-    double       b_align;        /* use worst case alignment */ 
-    char         b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ]; 
-    kmp_bstate_t bb; 
-}; 
- 
-typedef union kmp_barrier_union kmp_balign_t; 
- 
-/* Team barrier needs only non-volatile arrived counter */ 
-union KMP_ALIGN_CACHE kmp_barrier_team_union { 
-    double       b_align;        /* use worst case alignment */ 
-    char         b_pad[ CACHE_LINE ]; 
-    struct { 
-        kmp_uint64   b_arrived;       /* STATE => task reached synch point. */ 
-#if USE_DEBUGGER 
-        // The following two fields are indended for the debugger solely. Only master of the team accesses 
-        // these fields: the first one is increased by 1 when master arrives to a barrier, the 
-        // second one is increased by one when all the threads arrived. 
-        kmp_uint     b_master_arrived; 
-        kmp_uint     b_team_arrived; 
-#endif 
-    }; 
-}; 
- 
-typedef union kmp_barrier_team_union kmp_balign_team_t; 
- 
-/* 
- * Padding for Linux* OS pthreads condition variables and mutexes used to signal 
- * threads when a condition changes.  This is to workaround an NPTL bug 
- * where padding was added to pthread_cond_t which caused the initialization 
- * routine to write outside of the structure if compiled on pre-NPTL threads. 
- */ 
- 
-#if KMP_OS_WINDOWS 
-typedef struct kmp_win32_mutex 
-{ 
-    /* The Lock */ 
-    CRITICAL_SECTION cs; 
-} kmp_win32_mutex_t; 
- 
-typedef struct kmp_win32_cond 
-{ 
-    /* Count of the number of waiters. */ 
-    int waiters_count_; 
- 
-    /* Serialize access to <waiters_count_> */ 
-    kmp_win32_mutex_t waiters_count_lock_; 
- 
-    /* Number of threads to release via a <cond_broadcast> or a */ 
-    /* <cond_signal> */ 
-    int release_count_; 
- 
-    /* Keeps track of the current "generation" so that we don't allow */ 
-    /* one thread to steal all the "releases" from the broadcast. */ 
-    int wait_generation_count_; 
- 
-    /* A manual-reset event that's used to block and release waiting */ 
-    /* threads. */ 
-    HANDLE event_; 
-} kmp_win32_cond_t; 
-#endif 
- 
-#if KMP_OS_UNIX 
- 
-union KMP_ALIGN_CACHE kmp_cond_union { 
-    double              c_align; 
-    char                c_pad[ CACHE_LINE ]; 
-    pthread_cond_t      c_cond; 
-}; 
- 
-typedef union kmp_cond_union kmp_cond_align_t; 
- 
-union KMP_ALIGN_CACHE kmp_mutex_union { 
-    double              m_align; 
-    char                m_pad[ CACHE_LINE ]; 
-    pthread_mutex_t     m_mutex; 
-}; 
- 
-typedef union kmp_mutex_union kmp_mutex_align_t; 
- 
-#endif /* KMP_OS_UNIX */ 
- 
-typedef struct kmp_desc_base { 
-    void    *ds_stackbase; 
-    size_t            ds_stacksize; 
-    int               ds_stackgrow; 
-    kmp_thread_t      ds_thread; 
-    volatile int      ds_tid; 
-    int               ds_gtid; 
-#if KMP_OS_WINDOWS 
-    volatile int      ds_alive; 
-    DWORD             ds_thread_id; 
-        /* 
-            ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However, 
-            debugger support (libomp_db) cannot work with handles, because they uncomparable. For 
-            example, debugger requests info about thread with handle h. h is valid within debugger 
-            process, and meaningless within debugee process. Even if h is duped by call to 
-            DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new* 
-            handle which does *not* equal to any other handle in debugee... The only way to 
-            compare handles is convert them to system-wide ids. GetThreadId() function is 
-            available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is 
-            available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by 
-            call to GetCurrentThreadId() from within the thread and save it to let libomp_db 
-            identify threads. 
-        */ 
-#endif /* KMP_OS_WINDOWS */ 
-} kmp_desc_base_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_desc { 
-    double           ds_align;        /* use worst case alignment */ 
-    char             ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ]; 
-    kmp_desc_base_t  ds; 
-} kmp_desc_t; 
- 
- 
-typedef struct kmp_local { 
-    volatile int           this_construct; /* count of single's encountered by thread */ 
-    void                  *reduce_data; 
-#if KMP_USE_BGET 
-    void                  *bget_data; 
-    void                  *bget_list; 
-#if ! USE_CMP_XCHG_FOR_BGET 
-#ifdef USE_QUEUING_LOCK_FOR_BGET 
-    kmp_lock_t             bget_lock;      /* Lock for accessing bget free list */ 
-#else 
-    kmp_bootstrap_lock_t   bget_lock;      /* Lock for accessing bget free list */ 
-                                           /* Must be bootstrap lock so we can use it at library shutdown */ 
-#endif /* USE_LOCK_FOR_BGET */ 
-#endif /* ! USE_CMP_XCHG_FOR_BGET */ 
-#endif /* KMP_USE_BGET */ 
- 
-#ifdef BUILD_TV 
-    struct tv_data        *tv_data; 
-#endif 
- 
-    PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */ 
- 
-} kmp_local_t; 
- 
-#define get__blocktime( xteam, xtid )     ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) 
-#define get__bt_set( xteam, xtid )        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) 
-#define get__bt_intervals( xteam, xtid )  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) 
- 
-#define get__nested_2(xteam,xtid)         ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) 
-#define get__dynamic_2(xteam,xtid)        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) 
-#define get__nproc_2(xteam,xtid)          ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) 
-#define get__sched_2(xteam,xtid)          ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) 
- 
-#define set__blocktime_team( xteam, xtid, xval ) \ 
-        ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime )    = (xval) ) 
- 
-#define set__bt_intervals_team( xteam, xtid, xval ) \ 
-        ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) ) 
- 
-#define set__bt_set_team( xteam, xtid, xval ) \ 
-        ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set )       = (xval) ) 
- 
- 
-#define set__nested( xthread, xval )                            \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) ) 
-#define get__nested( xthread ) \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) ) 
- 
-#define set__dynamic( xthread, xval )                            \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) ) 
-#define get__dynamic( xthread ) \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) ) 
- 
-#define set__nproc( xthread, xval )                            \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) ) 
- 
-#define set__max_active_levels( xthread, xval )                            \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) ) 
- 
-#define set__sched( xthread, xval )                            \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) ) 
- 
-#if OMP_40_ENABLED 
- 
-#define set__proc_bind( xthread, xval )                          \ 
-        ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) ) 
-#define get__proc_bind( xthread ) \ 
-        ( (xthread)->th.th_current_task->td_icvs.proc_bind ) 
- 
-#endif /* OMP_40_ENABLED */ 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-// OpenMP tasking data structures 
-// 
- 
-typedef enum kmp_tasking_mode { 
-    tskm_immediate_exec = 0, 
-    tskm_extra_barrier = 1, 
-    tskm_task_teams = 2, 
-    tskm_max = 2 
-} kmp_tasking_mode_t; 
- 
-extern kmp_tasking_mode_t __kmp_tasking_mode;         /* determines how/when to execute tasks */ 
-extern kmp_int32 __kmp_task_stealing_constraint; 
- 
-/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */ 
-#define KMP_TASK_TO_TASKDATA(task)     (((kmp_taskdata_t *) task) - 1) 
-#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1) 
- 
-// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and 
-// queued since the previous barrier release. 
-#define KMP_TASKING_ENABLED(task_team) \ 
-    (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) 
-/*! 
-@ingroup BASIC_TYPES 
-@{ 
-*/ 
- 
-/*! 
- */ 
-typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * ); 
- 
-/*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */ 
-/*! 
- */ 
-typedef struct kmp_task {                   /* GEH: Shouldn't this be aligned somehow? */ 
-    void *              shareds;            /**< pointer to block of pointers to shared vars   */ 
-    kmp_routine_entry_t routine;            /**< pointer to routine to call for executing task */ 
-    kmp_int32           part_id;            /**< part id for the task                          */ 
-#if OMP_40_ENABLED 
-    kmp_routine_entry_t destructors;        /* pointer to function to invoke deconstructors of firstprivate C++ objects */ 
-#endif // OMP_40_ENABLED 
-    /*  private vars  */ 
-} kmp_task_t; 
- 
-/*! 
-@} 
-*/ 
- 
-#if OMP_40_ENABLED 
-typedef struct kmp_taskgroup { 
-    kmp_uint32            count;   // number of allocated and not yet complete tasks 
-    kmp_int32             cancel_request; // request for cancellation of this taskgroup 
-    struct kmp_taskgroup *parent;  // parent taskgroup 
-} kmp_taskgroup_t; 
- 
- 
-// forward declarations 
-typedef union kmp_depnode       kmp_depnode_t; 
-typedef struct kmp_depnode_list  kmp_depnode_list_t; 
-typedef struct kmp_dephash_entry kmp_dephash_entry_t; 
- 
-typedef struct kmp_depend_info { 
-     kmp_intptr_t               base_addr; 
-     size_t                     len; 
-     struct { 
-         bool                   in:1; 
-         bool                   out:1; 
-     } flags; 
-} kmp_depend_info_t; 
- 
-struct kmp_depnode_list { 
-   kmp_depnode_t *              node; 
-   kmp_depnode_list_t *         next; 
-}; 
- 
-typedef struct kmp_base_depnode { 
-    kmp_depnode_list_t        * successors; 
-    kmp_task_t                * task; 
- 
-    kmp_lock_t                  lock; 
- 
-#if KMP_SUPPORT_GRAPH_OUTPUT 
-    kmp_uint32                  id; 
-#endif 
- 
-    volatile kmp_int32          npredecessors; 
-    volatile kmp_int32          nrefs; 
-} kmp_base_depnode_t; 
- 
-union KMP_ALIGN_CACHE kmp_depnode { 
-    double          dn_align;        /* use worst case alignment */ 
-    char            dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ]; 
-    kmp_base_depnode_t dn; 
-}; 
- 
-struct kmp_dephash_entry { 
-    kmp_intptr_t               addr; 
-    kmp_depnode_t            * last_out; 
-    kmp_depnode_list_t       * last_ins; 
-    kmp_dephash_entry_t      * next_in_bucket; 
-}; 
- 
-typedef struct kmp_dephash { 
-   kmp_dephash_entry_t     ** buckets; 
-#ifdef KMP_DEBUG 
-   kmp_uint32                 nelements; 
-   kmp_uint32                 nconflicts; 
-#endif 
-} kmp_dephash_t; 
- 
-#endif 
- 
-#ifdef BUILD_TIED_TASK_STACK 
- 
-/* Tied Task stack definitions */ 
-typedef struct kmp_stack_block { 
-    kmp_taskdata_t *          sb_block[ TASK_STACK_BLOCK_SIZE ]; 
-    struct kmp_stack_block *  sb_next; 
-    struct kmp_stack_block *  sb_prev; 
-} kmp_stack_block_t; 
- 
-typedef struct kmp_task_stack { 
-    kmp_stack_block_t         ts_first_block;  // first block of stack entries 
-    kmp_taskdata_t **         ts_top;          // pointer to the top of stack 
-    kmp_int32                 ts_entries;      // number of entries on the stack 
-} kmp_task_stack_t; 
- 
-#endif // BUILD_TIED_TASK_STACK 
- 
-typedef struct kmp_tasking_flags {          /* Total struct must be exactly 32 bits */ 
-    /* Compiler flags */                    /* Total compiler flags must be 16 bits */ 
-    unsigned tiedness    : 1;               /* task is either tied (1) or untied (0) */ 
-    unsigned final       : 1;               /* task is final(1) so execute immediately */ 
-    unsigned merged_if0  : 1;               /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ 
-#if OMP_40_ENABLED 
-    unsigned destructors_thunk : 1;         /* set if the compiler creates a thunk to invoke destructors from the runtime */ 
-#if OMP_41_ENABLED 
-    unsigned proxy       : 1;               /* task is a proxy task (it will be executed outside the context of the RTL) */ 
-    unsigned reserved    : 11;              /* reserved for compiler use */ 
-#else 
-    unsigned reserved    : 12;              /* reserved for compiler use */ 
-#endif 
-#else // OMP_40_ENABLED 
-    unsigned reserved    : 13;              /* reserved for compiler use */ 
-#endif // OMP_40_ENABLED 
- 
-    /* Library flags */                     /* Total library flags must be 16 bits */ 
-    unsigned tasktype    : 1;               /* task is either explicit(1) or implicit (0) */ 
-    unsigned task_serial : 1;               /* this task is executed immediately (1) or deferred (0) */ 
-    unsigned tasking_ser : 1;               /* all tasks in team are either executed immediately (1) or may be deferred (0) */ 
-    unsigned team_serial : 1;               /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */ 
-                                            /* If either team_serial or tasking_ser is set, task team may be NULL */ 
-    /* Task State Flags: */ 
-    unsigned started     : 1;               /* 1==started, 0==not started     */ 
-    unsigned executing   : 1;               /* 1==executing, 0==not executing */ 
-    unsigned complete    : 1;               /* 1==complete, 0==not complete   */ 
-    unsigned freed       : 1;               /* 1==freed, 0==allocateed        */ 
-    unsigned native      : 1;               /* 1==gcc-compiled task, 0==intel */ 
-    unsigned reserved31  : 7;               /* reserved for library use */ 
- 
-} kmp_tasking_flags_t; 
- 
- 
-struct kmp_taskdata {                                 /* aligned during dynamic allocation       */ 
-    kmp_int32               td_task_id;               /* id, assigned by debugger                */ 
-    kmp_tasking_flags_t     td_flags;                 /* task flags                              */ 
-    kmp_team_t *            td_team;                  /* team for this task                      */ 
-    kmp_info_p *            td_alloc_thread;          /* thread that allocated data structures   */ 
-                                                      /* Currently not used except for perhaps IDB */ 
-    kmp_taskdata_t *        td_parent;                /* parent task                             */ 
-    kmp_int32               td_level;                 /* task nesting level                      */ 
-    ident_t *               td_ident;                 /* task identifier                         */ 
-                            // Taskwait data. 
-    ident_t *               td_taskwait_ident; 
-    kmp_uint32              td_taskwait_counter; 
-    kmp_int32               td_taskwait_thread;       /* gtid + 1 of thread encountered taskwait */ 
-    KMP_ALIGN_CACHE kmp_internal_control_t  td_icvs;  /* Internal control variables for the task */ 
-    volatile kmp_uint32     td_allocated_child_tasks;  /* Child tasks (+ current task) not yet deallocated */ 
-    volatile kmp_uint32     td_incomplete_child_tasks; /* Child tasks not yet complete */ 
-#if OMP_40_ENABLED 
-    kmp_taskgroup_t *       td_taskgroup;         // Each task keeps pointer to its current taskgroup 
-    kmp_dephash_t *         td_dephash;           // Dependencies for children tasks are tracked from here 
-    kmp_depnode_t *         td_depnode;           // Pointer to graph node if this task has dependencies 
-#endif 
-#if OMPT_SUPPORT 
-    ompt_task_info_t        ompt_task_info; 
-#endif 
-#if KMP_HAVE_QUAD 
-    _Quad                   td_dummy;             // Align structure 16-byte size since allocated just before kmp_task_t 
-#else 
-    kmp_uint32              td_dummy[2]; 
-#endif 
-}; // struct kmp_taskdata 
- 
-// Make sure padding above worked 
-KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 ); 
- 
-// Data for task team but per thread 
-typedef struct kmp_base_thread_data { 
-    kmp_info_p *            td_thr;                // Pointer back to thread info 
-                                                   // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued? 
-    kmp_bootstrap_lock_t    td_deque_lock;         // Lock for accessing deque 
-    kmp_taskdata_t **       td_deque;              // Deque of tasks encountered by td_thr, dynamically allocated 
-    kmp_uint32              td_deque_head;         // Head of deque (will wrap) 
-    kmp_uint32              td_deque_tail;         // Tail of deque (will wrap) 
-    kmp_int32               td_deque_ntasks;       // Number of tasks in deque 
-                                                   // GEH: shouldn't this be volatile since used in while-spin? 
-    kmp_int32               td_deque_last_stolen;  // Thread number of last successful steal 
-#ifdef BUILD_TIED_TASK_STACK 
-    kmp_task_stack_t        td_susp_tied_tasks;    // Stack of suspended tied tasks for task scheduling constraint 
-#endif // BUILD_TIED_TASK_STACK 
-} kmp_base_thread_data_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_thread_data { 
-    kmp_base_thread_data_t  td; 
-    double                  td_align;       /* use worst case alignment */ 
-    char                    td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ]; 
-} kmp_thread_data_t; 
- 
- 
-// Data for task teams which are used when tasking is enabled for the team 
-typedef struct kmp_base_task_team { 
-    kmp_bootstrap_lock_t    tt_threads_lock;       /* Lock used to allocate per-thread part of task team */ 
-                                                   /* must be bootstrap lock since used at library shutdown*/ 
-    kmp_task_team_t *       tt_next;               /* For linking the task team free list */ 
-    kmp_thread_data_t *     tt_threads_data;       /* Array of per-thread structures for task team */ 
-                                                   /* Data survives task team deallocation */ 
-    kmp_int32               tt_found_tasks;        /* Have we found tasks and queued them while executing this team? */ 
-                                                   /* TRUE means tt_threads_data is set up and initialized */ 
-    kmp_int32               tt_nproc;              /* #threads in team           */ 
-    kmp_int32               tt_max_threads;        /* number of entries allocated for threads_data array */ 
-#if OMP_41_ENABLED 
-    kmp_int32               tt_found_proxy_tasks;  /* Have we found proxy tasks since last barrier */ 
-#endif 
- 
-    KMP_ALIGN_CACHE 
-    volatile kmp_uint32     tt_unfinished_threads; /* #threads still active      */ 
- 
-    KMP_ALIGN_CACHE 
-    volatile kmp_uint32     tt_active;             /* is the team still actively executing tasks */ 
-} kmp_base_task_team_t; 
- 
-union KMP_ALIGN_CACHE kmp_task_team { 
-    kmp_base_task_team_t tt; 
-    double               tt_align;       /* use worst case alignment */ 
-    char                 tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ]; 
-}; 
- 
-#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) 
-// Free lists keep same-size free memory slots for fast memory allocation routines 
-typedef struct kmp_free_list { 
-    void             *th_free_list_self;   // Self-allocated tasks free list 
-    void             *th_free_list_sync;   // Self-allocated tasks stolen/returned by other threads 
-    void             *th_free_list_other;  // Non-self free list (to be returned to owner's sync list) 
-} kmp_free_list_t; 
-#endif 
-#if KMP_NESTED_HOT_TEAMS 
-// Hot teams array keeps hot teams and their sizes for given thread. 
-// Hot teams are not put in teams pool, and they don't put threads in threads pool. 
-typedef struct kmp_hot_team_ptr { 
-    kmp_team_p *hot_team;      // pointer to hot_team of given nesting level 
-    kmp_int32   hot_team_nth;  // number of threads allocated for the hot_team 
-} kmp_hot_team_ptr_t; 
-#endif 
-#if OMP_40_ENABLED 
-typedef struct kmp_teams_size { 
-    kmp_int32   nteams;        // number of teams in a league 
-    kmp_int32   nth;           // number of threads in each team of the league 
-} kmp_teams_size_t; 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-// OpenMP thread data structures 
-// 
- 
-typedef struct KMP_ALIGN_CACHE kmp_base_info { 
-/* 
- * Start with the readonly data which is cache aligned and padded. 
- * this is written before the thread starts working by the master. 
- * (uber masters may update themselves later) 
- * (usage does not consider serialized regions) 
- */ 
-    kmp_desc_t        th_info; 
-    kmp_team_p       *th_team;       /* team we belong to */ 
-    kmp_root_p       *th_root;       /* pointer to root of task hierarchy */ 
-    kmp_info_p       *th_next_pool;  /* next available thread in the pool */ 
-    kmp_disp_t       *th_dispatch;   /* thread's dispatch data */ 
-    int               th_in_pool;    /* in thread pool (32 bits for TCR/TCW) */ 
- 
-    /* The following are cached from the team info structure */ 
-    /* TODO use these in more places as determined to be needed via profiling */ 
-    int               th_team_nproc;      /* number of threads in a team */ 
-    kmp_info_p       *th_team_master;     /* the team's master thread */ 
-    int               th_team_serialized; /* team is serialized */ 
-#if OMP_40_ENABLED 
-    microtask_t       th_teams_microtask; /* save entry address for teams construct */ 
-    int               th_teams_level;     /* save initial level of teams construct */ 
-                                          /* it is 0 on device but may be any on host */ 
-#endif 
- 
-    /* The blocktime info is copied from the team struct to the thread sruct */ 
-    /* at the start of a barrier, and the values stored in the team are used */ 
-    /* at points in the code where the team struct is no longer guaranteed   */ 
-    /* to exist (from the POV of worker threads).                            */ 
-    int               th_team_bt_intervals; 
-    int               th_team_bt_set; 
- 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    kmp_affin_mask_t  *th_affin_mask; /* thread's current affinity mask */ 
-#endif 
- 
-/* 
- * The data set by the master at reinit, then R/W by the worker 
- */ 
-    KMP_ALIGN_CACHE int     th_set_nproc;  /* if > 0, then only use this request for the next fork */ 
-#if KMP_NESTED_HOT_TEAMS 
-    kmp_hot_team_ptr_t     *th_hot_teams;     /* array of hot teams */ 
-#endif 
-#if OMP_40_ENABLED 
-    kmp_proc_bind_t         th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ 
-    kmp_teams_size_t        th_teams_size;    /* number of teams/threads in teams construct */ 
-# if KMP_AFFINITY_SUPPORTED 
-    int                     th_current_place; /* place currently bound to */ 
-    int                     th_new_place;     /* place to bind to in par reg */ 
-    int                     th_first_place;   /* first place in partition */ 
-    int                     th_last_place;    /* last place in partition */ 
-# endif 
-#endif 
-#if USE_ITT_BUILD 
-    kmp_uint64              th_bar_arrive_time;           /* arrival to barrier timestamp */ 
-    kmp_uint64              th_bar_min_time;              /* minimum arrival time at the barrier */ 
-    kmp_uint64              th_frame_time;                /* frame timestamp */ 
-    kmp_uint64              th_frame_time_serialized;     /* frame timestamp in serialized parallel */ 
-#endif /* USE_ITT_BUILD */ 
-    kmp_local_t             th_local; 
-    struct private_common  *th_pri_head; 
- 
-/* 
- * Now the data only used by the worker (after initial allocation) 
- */ 
-    /* TODO the first serial team should actually be stored in the info_t 
-     * structure.  this will help reduce initial allocation overhead */ 
-    KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/ 
- 
-#if OMPT_SUPPORT 
-    ompt_thread_info_t      ompt_thread_info; 
-#endif 
- 
-/* The following are also read by the master during reinit */ 
-    struct common_table    *th_pri_common; 
- 
-    volatile kmp_uint32     th_spin_here;   /* thread-local location for spinning */ 
-                                            /* while awaiting queuing lock acquire */ 
- 
-    volatile void          *th_sleep_loc;   // this points at a kmp_flag<T> 
- 
-    ident_t          *th_ident; 
-    unsigned         th_x;                     // Random number generator data 
-    unsigned         th_a;                     // Random number generator data 
- 
-/* 
- * Tasking-related data for the thread 
- */ 
-    kmp_task_team_t    * th_task_team;           // Task team struct 
-    kmp_taskdata_t     * th_current_task;        // Innermost Task being executed 
-    kmp_uint8            th_task_state;          // alternating 0/1 for task team identification 
-    kmp_uint8          * th_task_state_memo_stack;  // Stack holding memos of th_task_state at nested levels 
-    kmp_uint32           th_task_state_top;         // Top element of th_task_state_memo_stack 
-    kmp_uint32           th_task_state_stack_sz;    // Size of th_task_state_memo_stack 
- 
-    /* 
-     * More stuff for keeping track of active/sleeping threads 
-     * (this part is written by the worker thread) 
-     */ 
-    kmp_uint8            th_active_in_pool;      // included in count of 
-                                                 // #active threads in pool 
-    int                  th_active;              // ! sleeping 
-                                                 // 32 bits for TCR/TCW 
- 
- 
-    struct cons_header * th_cons; // used for consistency check 
- 
-/* 
- * Add the syncronizing data which is cache aligned and padded. 
- */ 
-    KMP_ALIGN_CACHE kmp_balign_t      th_bar[ bs_last_barrier ]; 
- 
-    KMP_ALIGN_CACHE volatile     kmp_int32    th_next_waiting;  /* gtid+1 of next thread on lock wait queue, 0 if none */ 
- 
-#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) 
-    #define NUM_LISTS 4 
-    kmp_free_list_t   th_free_lists[NUM_LISTS];   // Free lists for fast memory allocation routines 
-#endif 
- 
-#if KMP_OS_WINDOWS 
-    kmp_win32_cond_t  th_suspend_cv; 
-    kmp_win32_mutex_t th_suspend_mx; 
-    int               th_suspend_init; 
-#endif 
-#if KMP_OS_UNIX 
-    kmp_cond_align_t  th_suspend_cv; 
-    kmp_mutex_align_t th_suspend_mx; 
-    int               th_suspend_init_count; 
-#endif 
- 
-#if USE_ITT_BUILD 
-    kmp_itt_mark_t        th_itt_mark_single; 
-    // alignment ??? 
-#endif /* USE_ITT_BUILD */ 
-#if KMP_STATS_ENABLED 
-    kmp_stats_list* th_stats; 
-#endif 
-} kmp_base_info_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_info { 
-    double          th_align;        /* use worst case alignment */ 
-    char            th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ]; 
-    kmp_base_info_t th; 
-} kmp_info_t; 
- 
-/* ------------------------------------------------------------------------ */ 
-// OpenMP thread team data structures 
-// 
-typedef struct kmp_base_data { 
-    volatile kmp_uint32 t_value; 
-} kmp_base_data_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_sleep_team { 
-    double              dt_align;        /* use worst case alignment */ 
-    char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; 
-    kmp_base_data_t     dt; 
-} kmp_sleep_team_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_ordered_team { 
-    double              dt_align;        /* use worst case alignment */ 
-    char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; 
-    kmp_base_data_t     dt; 
-} kmp_ordered_team_t; 
- 
-typedef int     (*launch_t)( int gtid ); 
- 
-/* Minimum number of ARGV entries to malloc if necessary */ 
-#define KMP_MIN_MALLOC_ARGV_ENTRIES     100 
- 
-// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we 
-// have supported at least 96 bytes. Using a larger value for more space between the master write/worker 
-// read section and read/write by all section seems to buy more performance on EPCC PARALLEL. 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-# define KMP_INLINE_ARGV_BYTES         ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) ) 
-#else 
-# define KMP_INLINE_ARGV_BYTES         ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) ) 
-#endif 
-#define KMP_INLINE_ARGV_ENTRIES        (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP ) 
- 
-typedef struct KMP_ALIGN_CACHE kmp_base_team { 
-    // Synchronization Data --------------------------------------------------------------------------------- 
-    KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; 
-    kmp_balign_team_t        t_bar[ bs_last_barrier ]; 
-    volatile int             t_construct;    // count of single directive encountered by team 
-    kmp_lock_t               t_single_lock;  // team specific lock 
- 
-    // Master only ----------------------------------------------------------------------------------------- 
-    KMP_ALIGN_CACHE int      t_master_tid;   // tid of master in parent team 
-    int                      t_master_this_cons; // "this_construct" single counter of master in parent team 
-    ident_t                 *t_ident;        // if volatile, have to change too much other crud to volatile too 
-    kmp_team_p              *t_parent;       // parent team 
-    kmp_team_p              *t_next_pool;    // next free team in the team pool 
-    kmp_disp_t              *t_dispatch;     // thread's dispatch data 
-    kmp_task_team_t         *t_task_team[2]; // Task team struct; switch between 2 
-#if OMP_40_ENABLED 
-    kmp_proc_bind_t          t_proc_bind;    // bind type for par region 
-#endif // OMP_40_ENABLED 
-#if USE_ITT_BUILD 
-    kmp_uint64               t_region_time;  // region begin timestamp 
-#endif /* USE_ITT_BUILD */ 
- 
-    // Master write, workers read -------------------------------------------------------------------------- 
-    KMP_ALIGN_CACHE void   **t_argv; 
-    int                      t_argc; 
-    int                      t_nproc;        // number of threads in team 
-    microtask_t              t_pkfn; 
-    launch_t                 t_invoke;       // procedure to launch the microtask 
- 
-#if OMPT_SUPPORT 
-    ompt_team_info_t         ompt_team_info; 
-    ompt_lw_taskteam_t      *ompt_serialized_team_info; 
-#endif 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    kmp_int8                 t_fp_control_saved; 
-    kmp_int8                 t_pad2b; 
-    kmp_int16                t_x87_fpu_control_word; // FP control regs 
-    kmp_uint32               t_mxcsr; 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-    void                    *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; 
- 
-    KMP_ALIGN_CACHE kmp_info_t **t_threads; 
-    int                      t_max_argc; 
-    int                      t_max_nproc;    // maximum threads this team can handle (dynamicly expandable) 
-    int                      t_serialized;   // levels deep of serialized teams 
-    dispatch_shared_info_t  *t_disp_buffer;  // buffers for dispatch system 
-    int                      t_id;           // team's id, assigned by debugger. 
-    int                      t_level;        // nested parallel level 
-    int                      t_active_level; // nested active parallel level 
-    kmp_r_sched_t            t_sched;        // run-time schedule for the team 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-    int                      t_first_place;  // first & last place in parent thread's partition. 
-    int                      t_last_place;   // Restore these values to master after par region. 
-#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-    int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call 
- 
-    // Read/write by workers as well ----------------------------------------------------------------------- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' 
-    // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' 
-    // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. 
-    char dummy_padding[1024]; 
-#endif 
-    KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata;  // Taskdata for the thread's implicit task 
-    kmp_internal_control_t  *t_control_stack_top;  // internal control stack for additional nested teams. 
-                                                   // for SERIALIZED teams nested 2 or more levels deep 
-#if OMP_40_ENABLED 
-    kmp_int32                t_cancel_request; // typed flag to store request state of cancellation 
-#endif 
-    int                      t_master_active;  // save on fork, restore on join 
-    kmp_taskq_t              t_taskq;          // this team's task queue 
-    void                    *t_copypriv_data;  // team specific pointer to copyprivate data array 
-    kmp_uint32               t_copyin_counter; 
-#if USE_ITT_BUILD 
-    void                    *t_stack_id;       // team specific stack stitching id (for ittnotify) 
-#endif /* USE_ITT_BUILD */ 
-} kmp_base_team_t; 
- 
-union KMP_ALIGN_CACHE kmp_team { 
-    kmp_base_team_t     t; 
-    double              t_align;       /* use worst case alignment */ 
-    char                t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ]; 
-}; 
- 
- 
-typedef union KMP_ALIGN_CACHE kmp_time_global { 
-    double              dt_align;        /* use worst case alignment */ 
-    char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; 
-    kmp_base_data_t     dt; 
-} kmp_time_global_t; 
- 
-typedef struct kmp_base_global { 
-    /* cache-aligned */ 
-    kmp_time_global_t   g_time; 
- 
-    /* non cache-aligned */ 
-    volatile int        g_abort; 
-    volatile int        g_done; 
- 
-    int                 g_dynamic; 
-    enum dynamic_mode   g_dynamic_mode; 
-} kmp_base_global_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_global { 
-    kmp_base_global_t   g; 
-    double              g_align;        /* use worst case alignment */ 
-    char                g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ]; 
-} kmp_global_t; 
- 
- 
-typedef struct kmp_base_root { 
-    // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0) 
-    // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch 
-    //             overhead or keeping r_active 
- 
-    volatile int        r_active;       /* TRUE if some region in a nest has > 1 thread */ 
-                                        // GEH: This is misnamed, should be r_in_parallel 
-    volatile int        r_nested;       // TODO: GEH - This is unused, just remove it entirely. 
-    int                 r_in_parallel;  /* keeps a count of active parallel regions per root */ 
-                                        // GEH: This is misnamed, should be r_active_levels 
-    kmp_team_t         *r_root_team; 
-    kmp_team_t         *r_hot_team; 
-    kmp_info_t         *r_uber_thread; 
-    kmp_lock_t          r_begin_lock; 
-    volatile int        r_begin; 
-    int                 r_blocktime; /* blocktime for this root and descendants */ 
-} kmp_base_root_t; 
- 
-typedef union KMP_ALIGN_CACHE kmp_root { 
-    kmp_base_root_t     r; 
-    double              r_align;        /* use worst case alignment */ 
-    char                r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ]; 
-} kmp_root_t; 
- 
-struct fortran_inx_info { 
-    kmp_int32   data; 
-}; 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-extern int      __kmp_settings; 
-extern int      __kmp_duplicate_library_ok; 
-#if USE_ITT_BUILD 
-extern int      __kmp_forkjoin_frames; 
-extern int      __kmp_forkjoin_frames_mode; 
-#endif 
-extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; 
-extern int      __kmp_determ_red; 
- 
-#ifdef KMP_DEBUG 
-extern int      kmp_a_debug; 
-extern int      kmp_b_debug; 
-extern int      kmp_c_debug; 
-extern int      kmp_d_debug; 
-extern int      kmp_e_debug; 
-extern int      kmp_f_debug; 
-#endif /* KMP_DEBUG */ 
- 
-/* For debug information logging using rotating buffer */ 
-#define KMP_DEBUG_BUF_LINES_INIT        512 
-#define KMP_DEBUG_BUF_LINES_MIN         1 
- 
-#define KMP_DEBUG_BUF_CHARS_INIT        128 
-#define KMP_DEBUG_BUF_CHARS_MIN         2 
- 
-extern int     __kmp_debug_buf;            /* TRUE means use buffer, FALSE means print to stderr */ 
-extern int     __kmp_debug_buf_lines;      /* How many lines of debug stored in buffer */ 
-extern int     __kmp_debug_buf_chars;      /* How many characters allowed per line in buffer */ 
-extern int     __kmp_debug_buf_atomic;     /* TRUE means use atomic update of buffer entry pointer */ 
- 
-extern char   *__kmp_debug_buffer;         /* Debug buffer itself */ 
-extern int     __kmp_debug_count;          /* Counter for number of lines printed in buffer so far */ 
-extern int     __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */ 
-/* end rotating debug buffer */ 
- 
-#ifdef KMP_DEBUG 
-extern int      __kmp_par_range;           /* +1 => only go par for constructs in range */ 
- 
-#define KMP_PAR_RANGE_ROUTINE_LEN       1024 
-extern char     __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; 
-#define KMP_PAR_RANGE_FILENAME_LEN      1024 
-extern char     __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; 
-extern int      __kmp_par_range_lb; 
-extern int      __kmp_par_range_ub; 
-#endif 
- 
-/* For printing out dynamic storage map for threads and teams */ 
-extern int      __kmp_storage_map;         /* True means print storage map for threads and teams */ 
-extern int      __kmp_storage_map_verbose; /* True means storage map includes placement info */ 
-extern int      __kmp_storage_map_verbose_specified; 
- 
-extern kmp_cpuinfo_t    __kmp_cpuinfo; 
- 
-extern volatile int __kmp_init_serial; 
-extern volatile int __kmp_init_gtid; 
-extern volatile int __kmp_init_common; 
-extern volatile int __kmp_init_middle; 
-extern volatile int __kmp_init_parallel; 
-extern volatile int __kmp_init_monitor; 
-extern volatile int __kmp_init_user_locks; 
-extern int __kmp_init_counter; 
-extern int __kmp_root_counter; 
-extern int __kmp_version; 
- 
-/* list of address of allocated caches for commons */ 
-extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; 
- 
-/* Barrier algorithm types and options */ 
-extern kmp_uint32    __kmp_barrier_gather_bb_dflt; 
-extern kmp_uint32    __kmp_barrier_release_bb_dflt; 
-extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; 
-extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; 
-extern kmp_uint32    __kmp_barrier_gather_branch_bits  [ bs_last_barrier ]; 
-extern kmp_uint32    __kmp_barrier_release_branch_bits [ bs_last_barrier ]; 
-extern kmp_bar_pat_e __kmp_barrier_gather_pattern      [ bs_last_barrier ]; 
-extern kmp_bar_pat_e __kmp_barrier_release_pattern     [ bs_last_barrier ]; 
-extern char const   *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ]; 
-extern char const   *__kmp_barrier_pattern_env_name    [ bs_last_barrier ]; 
-extern char const   *__kmp_barrier_type_name           [ bs_last_barrier ]; 
-extern char const   *__kmp_barrier_pattern_name        [ bp_last_bar ]; 
- 
-/* Global Locks */ 
-extern kmp_bootstrap_lock_t __kmp_initz_lock;     /* control initialization */ 
-extern kmp_bootstrap_lock_t __kmp_forkjoin_lock;  /* control fork/join access */ 
-extern kmp_bootstrap_lock_t __kmp_exit_lock;      /* exit() is not always thread-safe */ 
-extern kmp_bootstrap_lock_t __kmp_monitor_lock;   /* control monitor thread creation */ 
-extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ 
- 
-extern kmp_lock_t __kmp_global_lock;    /* control OS/global access  */ 
-extern kmp_queuing_lock_t __kmp_dispatch_lock;  /* control dispatch access  */ 
-extern kmp_lock_t __kmp_debug_lock;     /* control I/O access for KMP_DEBUG */ 
- 
-/* used for yielding spin-waits */ 
-extern unsigned int __kmp_init_wait;    /* initial number of spin-tests   */ 
-extern unsigned int __kmp_next_wait;    /* susequent number of spin-tests */ 
- 
-extern enum library_type __kmp_library; 
- 
-extern enum sched_type  __kmp_sched;    /* default runtime scheduling */ 
-extern enum sched_type  __kmp_static;   /* default static scheduling method */ 
-extern enum sched_type  __kmp_guided;   /* default guided scheduling method */ 
-extern enum sched_type  __kmp_auto;     /* default auto scheduling method */ 
-extern int              __kmp_chunk;    /* default runtime chunk size */ 
- 
-extern size_t     __kmp_stksize;        /* stack size per thread         */ 
-extern size_t     __kmp_monitor_stksize;/* stack size for monitor thread */ 
-extern size_t     __kmp_stkoffset;      /* stack offset per thread       */ 
-extern int        __kmp_stkpadding;     /* Should we pad root thread(s) stack */ 
- 
-extern size_t     __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ 
-extern int        __kmp_env_chunk;      /* was KMP_CHUNK specified?     */ 
-extern int        __kmp_env_stksize;    /* was KMP_STACKSIZE specified? */ 
-extern int        __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */ 
-extern int        __kmp_env_all_threads;    /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ 
-extern int        __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */ 
-extern int        __kmp_env_blocktime;  /* was KMP_BLOCKTIME specified? */ 
-extern int        __kmp_env_checks;     /* was KMP_CHECKS specified?    */ 
-extern int        __kmp_env_consistency_check;     /* was KMP_CONSISTENCY_CHECK specified?    */ 
-extern int        __kmp_generate_warnings; /* should we issue warnings? */ 
-extern int        __kmp_reserve_warn;   /* have we issued reserve_threads warning? */ 
- 
-#ifdef DEBUG_SUSPEND 
-extern int        __kmp_suspend_count;  /* count inside __kmp_suspend_template() */ 
-#endif 
- 
-extern kmp_uint32 __kmp_yield_init; 
-extern kmp_uint32 __kmp_yield_next; 
-extern kmp_uint32 __kmp_yielding_on; 
-extern kmp_uint32 __kmp_yield_cycle; 
-extern kmp_int32  __kmp_yield_on_count; 
-extern kmp_int32  __kmp_yield_off_count; 
- 
-/* ------------------------------------------------------------------------- */ 
-extern int        __kmp_allThreadsSpecified; 
- 
-extern size_t     __kmp_align_alloc; 
-/* following data protected by initialization routines */ 
-extern int        __kmp_xproc;          /* number of processors in the system */ 
-extern int        __kmp_avail_proc;      /* number of processors available to the process */ 
-extern size_t     __kmp_sys_min_stksize; /* system-defined minimum stack size */ 
-extern int        __kmp_sys_max_nth;    /* system-imposed maximum number of threads */ 
-extern int        __kmp_max_nth;        /* maximum total number of concurrently-existing threads */ 
-extern int        __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */ 
-extern int        __kmp_dflt_team_nth;  /* default number of threads in a parallel region a la OMP_NUM_THREADS */ 
-extern int        __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */ 
-extern int        __kmp_tp_capacity;    /* capacity of __kmp_threads if threadprivate is used (fixed) */ 
-extern int        __kmp_tp_cached;      /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ 
-extern int        __kmp_dflt_nested;    /* nested parallelism enabled by default a la OMP_NESTED */ 
-extern int        __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ 
-extern int        __kmp_monitor_wakeups;/* number of times monitor wakes up per second */ 
-extern int        __kmp_bt_intervals;   /* number of monitor timestamp intervals before blocking */ 
-#ifdef KMP_ADJUST_BLOCKTIME 
-extern int        __kmp_zero_bt;        /* whether blocktime has been forced to zero */ 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
-#ifdef KMP_DFLT_NTH_CORES 
-extern int        __kmp_ncores;         /* Total number of cores for threads placement */ 
-#endif 
-extern int        __kmp_abort_delay;    /* Number of millisecs to delay on abort for VTune */ 
- 
-extern int        __kmp_need_register_atfork_specified; 
-extern int        __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */ 
-extern int        __kmp_gtid_mode;      /* Method of getting gtid, values: 
-                                           0 - not set, will be set at runtime 
-                                           1 - using stack search 
-                                           2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS)) 
-                                           3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only. 
-                                         */ 
-extern int        __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ 
-#ifdef KMP_TDATA_GTID 
-#if KMP_OS_WINDOWS 
-extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */ 
-#else 
-extern __thread int __kmp_gtid; 
-#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ 
-#endif 
-extern int        __kmp_tls_gtid_min;   /* #threads below which use sp search for gtid */ 
-extern int        __kmp_foreign_tp;     /* If true, separate TP var for each foreign thread */ 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-extern int        __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */ 
-extern kmp_int16  __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */ 
-extern kmp_uint32 __kmp_init_mxcsr;      /* init thread's mxscr */ 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-extern int        __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */ 
-#if KMP_NESTED_HOT_TEAMS 
-extern int        __kmp_hot_teams_mode; 
-extern int        __kmp_hot_teams_max_level; 
-#endif 
- 
-# if KMP_OS_LINUX 
-extern enum clock_function_type __kmp_clock_function; 
-extern int __kmp_clock_function_param; 
-# endif /* KMP_OS_LINUX */ 
- 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-extern enum mic_type __kmp_mic_type; 
-#endif 
- 
-# ifdef USE_LOAD_BALANCE 
-extern double      __kmp_load_balance_interval;   /* Interval for the load balance algorithm */ 
-# endif /* USE_LOAD_BALANCE */ 
- 
-// OpenMP 3.1 - Nested num threads array 
-typedef struct kmp_nested_nthreads_t { 
-    int * nth; 
-    int   size; 
-    int   used; 
-} kmp_nested_nthreads_t; 
- 
-extern kmp_nested_nthreads_t __kmp_nested_nth; 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
- 
-// Parameters for the speculative lock backoff system. 
-struct kmp_adaptive_backoff_params_t { 
-    // Number of soft retries before it counts as a hard retry. 
-    kmp_uint32 max_soft_retries; 
-    // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right 
-    kmp_uint32 max_badness; 
-}; 
- 
-extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; 
- 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-extern char * __kmp_speculative_statsfile; 
-#endif 
- 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
-#if OMP_40_ENABLED 
-extern int __kmp_display_env;           /* TRUE or FALSE */ 
-extern int __kmp_display_env_verbose;   /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ 
-extern int __kmp_omp_cancellation;      /* TRUE or FALSE */ 
-#endif 
- 
-/* ------------------------------------------------------------------------- */ 
- 
-/* --------------------------------------------------------------------------- */ 
-/* the following are protected by the fork/join lock */ 
-/* write: lock  read: anytime */ 
-extern          kmp_info_t **__kmp_threads;      /* Descriptors for the threads */ 
-/* read/write: lock */ 
-extern volatile kmp_team_t  *     __kmp_team_pool; 
-extern volatile kmp_info_t  *     __kmp_thread_pool; 
- 
-/* total number of threads reachable from some root thread including all root threads*/ 
-extern volatile int __kmp_nth; 
-/* total number of threads reachable from some root thread including all root threads, 
-   and those in the thread pool */ 
-extern volatile int __kmp_all_nth; 
-extern int __kmp_thread_pool_nth; 
-extern volatile int __kmp_thread_pool_active_nth; 
- 
-extern kmp_root_t **__kmp_root;         /* root of thread hierarchy */ 
-/* end data protected by fork/join lock */ 
-/* --------------------------------------------------------------------------- */ 
- 
-extern kmp_global_t  __kmp_global;         /* global status */ 
- 
-extern kmp_info_t __kmp_monitor; 
-extern volatile kmp_uint32 __kmp_team_counter;      // Used by Debugging Support Library. 
-extern volatile kmp_uint32 __kmp_task_counter;      // Used by Debugging Support Library. 
- 
-#if USE_DEBUGGER 
- 
-#define _KMP_GEN_ID( counter )                                         \ 
-    (                                                                  \ 
-        __kmp_debugging                                                \ 
-        ?                                                              \ 
-        KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1    \ 
-        :                                                              \ 
-        ~ 0                                                            \ 
-    ) 
-#else 
-#define _KMP_GEN_ID( counter )                                         \ 
-    (                                                                  \ 
-        ~ 0                                                            \ 
-    ) 
-#endif /* USE_DEBUGGER */ 
- 
-#define KMP_GEN_TASK_ID()    _KMP_GEN_ID( __kmp_task_counter ) 
-#define KMP_GEN_TEAM_ID()    _KMP_GEN_ID( __kmp_team_counter ) 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... ); 
- 
-extern void __kmp_serial_initialize( void ); 
-extern void __kmp_middle_initialize( void ); 
-extern void __kmp_parallel_initialize( void ); 
- 
-extern void __kmp_internal_begin( void ); 
-extern void __kmp_internal_end_library( int gtid ); 
-extern void __kmp_internal_end_thread( int gtid ); 
-extern void __kmp_internal_end_atexit( void ); 
-extern void __kmp_internal_end_fini( void ); 
-extern void __kmp_internal_end_dtor( void ); 
-extern void __kmp_internal_end_dest( void* ); 
- 
-extern int  __kmp_register_root( int initial_thread ); 
-extern void __kmp_unregister_root( int gtid ); 
- 
-extern int  __kmp_ignore_mppbeg( void ); 
-extern int  __kmp_ignore_mppend( void ); 
- 
-extern int  __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ); 
-extern void __kmp_exit_single( int gtid ); 
- 
-extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); 
-extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); 
- 
-#ifdef USE_LOAD_BALANCE 
-extern int  __kmp_get_load_balance( int ); 
-#endif 
- 
-#ifdef BUILD_TV 
-extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ); 
-#endif 
- 
-extern int  __kmp_get_global_thread_id( void ); 
-extern int  __kmp_get_global_thread_id_reg( void ); 
-extern void __kmp_exit_thread( int exit_status ); 
-extern void __kmp_abort( char const * format, ... ); 
-extern void __kmp_abort_thread( void ); 
-extern void __kmp_abort_process( void ); 
-extern void __kmp_warn( char const * format, ... ); 
- 
-extern void __kmp_set_num_threads( int new_nth, int gtid ); 
- 
-// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered. 
-static inline kmp_info_t * __kmp_entry_thread() 
-{ 
-      int gtid = __kmp_entry_gtid(); 
- 
-      return __kmp_threads[gtid]; 
-} 
- 
-extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels ); 
-extern int  __kmp_get_max_active_levels( int gtid ); 
-extern int  __kmp_get_ancestor_thread_num( int gtid, int level ); 
-extern int  __kmp_get_team_size( int gtid, int level ); 
-extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk ); 
-extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk ); 
- 
-extern unsigned short __kmp_get_random( kmp_info_t * thread ); 
-extern void __kmp_init_random( kmp_info_t * thread ); 
- 
-extern kmp_r_sched_t __kmp_get_schedule_global( void ); 
-extern void __kmp_adjust_num_threads( int new_nproc ); 
- 
-extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL ); 
-extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ); 
-extern void   ___kmp_free( void * ptr KMP_SRC_LOC_DECL ); 
-#define __kmp_allocate( size )      ___kmp_allocate( (size) KMP_SRC_LOC_CURR ) 
-#define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR ) 
-#define __kmp_free( ptr )           ___kmp_free( (ptr) KMP_SRC_LOC_CURR ) 
- 
-#if USE_FAST_MEMORY 
-extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ); 
-extern void   ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL ); 
-extern void   __kmp_free_fast_memory( kmp_info_t *this_thr ); 
-extern void   __kmp_initialize_fast_memory( kmp_info_t *this_thr ); 
-#define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR ) 
-#define __kmp_fast_free( this_thr, ptr )      ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR ) 
-#endif 
- 
-extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ); 
-extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ); 
-extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ); 
-extern void   ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ); 
-#define __kmp_thread_malloc(  th, size )          ___kmp_thread_malloc(  (th), (size)            KMP_SRC_LOC_CURR ) 
-#define __kmp_thread_calloc(  th, nelem, elsize ) ___kmp_thread_calloc(  (th), (nelem), (elsize) KMP_SRC_LOC_CURR ) 
-#define __kmp_thread_realloc( th, ptr, size )     ___kmp_thread_realloc( (th), (ptr), (size)     KMP_SRC_LOC_CURR ) 
-#define __kmp_thread_free(    th, ptr )           ___kmp_thread_free(    (th), (ptr)             KMP_SRC_LOC_CURR ) 
- 
-#define KMP_INTERNAL_MALLOC(sz)    malloc(sz) 
-#define KMP_INTERNAL_FREE(p)       free(p) 
-#define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz)) 
-#define KMP_INTERNAL_CALLOC(n,sz)  calloc((n),(sz)) 
- 
-extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads ); 
- 
-#if OMP_40_ENABLED 
-extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind ); 
-extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads ); 
-#endif 
- 
-extern void __kmp_yield( int cond ); 
- 
-extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, 
-    kmp_int32 chunk ); 
-extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, 
-    kmp_int32 chunk ); 
-extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, 
-    kmp_int64 chunk ); 
-extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, 
-    kmp_int64 chunk ); 
- 
-extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, 
-    kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ); 
-extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, 
-    kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ); 
-extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, 
-    kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ); 
-extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, 
-    kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ); 
- 
-extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ); 
-extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ); 
-extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ); 
-extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ); 
- 
- 
-#ifdef KMP_GOMP_COMPAT 
- 
-extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, 
-    kmp_int32 chunk, int push_ws ); 
-extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, 
-    kmp_int32 chunk, int push_ws ); 
-extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, 
-    kmp_int64 chunk, int push_ws ); 
-extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, 
-    enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, 
-    kmp_int64 chunk, int push_ws ); 
-extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ); 
-extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ); 
-extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ); 
-extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ); 
- 
-#endif /* KMP_GOMP_COMPAT */ 
- 
- 
-extern kmp_uint32 __kmp_eq_4(  kmp_uint32 value, kmp_uint32 checker ); 
-extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker ); 
-extern kmp_uint32 __kmp_lt_4(  kmp_uint32 value, kmp_uint32 checker ); 
-extern kmp_uint32 __kmp_ge_4(  kmp_uint32 value, kmp_uint32 checker ); 
-extern kmp_uint32 __kmp_le_4(  kmp_uint32 value, kmp_uint32 checker ); 
- 
-extern kmp_uint32 __kmp_eq_8(  kmp_uint64 value, kmp_uint64 checker ); 
-extern kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker ); 
-extern kmp_uint32 __kmp_lt_8(  kmp_uint64 value, kmp_uint64 checker ); 
-extern kmp_uint32 __kmp_ge_8(  kmp_uint64 value, kmp_uint64 checker ); 
-extern kmp_uint32 __kmp_le_8(  kmp_uint64 value, kmp_uint64 checker ); 
- 
-extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj ); 
-extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj ); 
- 
-class kmp_flag_32; 
-class kmp_flag_64; 
-class kmp_flag_oncore; 
-extern void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin 
-#if USE_ITT_BUILD 
-                   , void * itt_sync_obj 
-#endif 
-                   ); 
-extern void __kmp_release_32(kmp_flag_32 *flag); 
-extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin 
-#if USE_ITT_BUILD 
-                   , void * itt_sync_obj 
-#endif 
-                   ); 
-extern void __kmp_release_64(kmp_flag_64 *flag); 
-extern void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin 
-#if USE_ITT_BUILD 
-                   , void * itt_sync_obj 
-#endif 
-                   ); 
-extern void __kmp_release_oncore(kmp_flag_oncore *flag); 
- 
-extern void __kmp_infinite_loop( void ); 
- 
-extern void __kmp_cleanup( void ); 
- 
-#if KMP_HANDLE_SIGNALS 
-    extern int  __kmp_handle_signals; 
-    extern void __kmp_install_signals( int parallel_init ); 
-    extern void __kmp_remove_signals( void ); 
-#endif 
- 
-extern void __kmp_clear_system_time( void ); 
-extern void __kmp_read_system_time( double *delta ); 
- 
-extern void __kmp_check_stack_overlap( kmp_info_t *thr ); 
- 
-extern void __kmp_expand_host_name( char *buffer, size_t size ); 
-extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern ); 
- 
-#if KMP_OS_WINDOWS 
-extern void __kmp_initialize_system_tick( void );  /* Initialize timer tick value */ 
-#endif 
- 
-extern void __kmp_runtime_initialize( void );  /* machine specific initialization */ 
-extern void __kmp_runtime_destroy( void ); 
- 
-#if KMP_AFFINITY_SUPPORTED 
-extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); 
-extern void __kmp_affinity_initialize(void); 
-extern void __kmp_affinity_uninitialize(void); 
-extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ 
-#if OMP_40_ENABLED 
-extern void __kmp_affinity_set_place(int gtid); 
-#endif 
-extern void __kmp_affinity_determine_capable( const char *env_var ); 
-extern int __kmp_aux_set_affinity(void **mask); 
-extern int __kmp_aux_get_affinity(void **mask); 
-extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); 
-extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); 
-extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); 
-extern void __kmp_balanced_affinity( int tid, int team_size ); 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-extern void __kmp_cleanup_hierarchy(); 
-extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
-extern int __kmp_futex_determine_capable( void ); 
- 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
-extern void __kmp_gtid_set_specific( int gtid ); 
-extern int  __kmp_gtid_get_specific( void ); 
- 
-extern double __kmp_read_cpu_time( void ); 
- 
-extern int  __kmp_read_system_info( struct kmp_sys_info *info ); 
- 
-extern void __kmp_create_monitor( kmp_info_t *th ); 
- 
-extern void *__kmp_launch_thread( kmp_info_t *thr ); 
- 
-extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ); 
- 
-#if KMP_OS_WINDOWS 
-extern int  __kmp_still_running(kmp_info_t *th); 
-extern int  __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ); 
-extern void __kmp_free_handle( kmp_thread_t tHandle ); 
-#endif 
- 
-extern void __kmp_reap_monitor( kmp_info_t *th ); 
-extern void __kmp_reap_worker( kmp_info_t *th ); 
-extern void __kmp_terminate_thread( int gtid ); 
- 
-extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag ); 
-extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag ); 
-extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag ); 
-extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag ); 
-extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag ); 
-extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag ); 
- 
-extern void __kmp_elapsed( double * ); 
-extern void __kmp_elapsed_tick( double * ); 
- 
-extern void __kmp_enable( int old_state ); 
-extern void __kmp_disable( int *old_state ); 
- 
-extern void __kmp_thread_sleep( int millis ); 
- 
-extern void __kmp_common_initialize( void ); 
-extern void __kmp_common_destroy( void ); 
-extern void __kmp_common_destroy_gtid( int gtid ); 
- 
-#if KMP_OS_UNIX 
-extern void __kmp_register_atfork( void ); 
-#endif 
-extern void __kmp_suspend_initialize( void ); 
-extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th ); 
- 
-extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root, 
-                                           kmp_team_t *team, int tid); 
-#if OMP_40_ENABLED 
-extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 
-#if OMPT_SUPPORT 
-                                         ompt_parallel_id_t ompt_parallel_id, 
-#endif 
-                                         kmp_proc_bind_t proc_bind, 
-                                         kmp_internal_control_t *new_icvs, 
-                                         int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); 
-#else 
-extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 
-#if OMPT_SUPPORT 
-                                         ompt_parallel_id_t ompt_parallel_id, 
-#endif 
-                                         kmp_internal_control_t *new_icvs, 
-                                         int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); 
-#endif // OMP_40_ENABLED 
-extern void __kmp_free_thread( kmp_info_t * ); 
-extern void __kmp_free_team( kmp_root_t *, kmp_team_t *  USE_NESTED_HOT_ARG(kmp_info_t *) ); 
-extern kmp_team_t * __kmp_reap_team( kmp_team_t * ); 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-extern void __kmp_initialize_bget( kmp_info_t *th ); 
-extern void __kmp_finalize_bget( kmp_info_t *th ); 
- 
-KMP_EXPORT void *kmpc_malloc( size_t size ); 
-KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize ); 
-KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size ); 
-KMP_EXPORT void  kmpc_free( void *ptr ); 
- 
-/* ------------------------------------------------------------------------ */ 
-/* declarations for internal use */ 
- 
-extern int  __kmp_barrier( enum barrier_type bt, int gtid, int is_split, 
-                           size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) ); 
-extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid ); 
- 
-/*! 
- * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call. 
- */ 
-enum fork_context_e 
-{ 
-    fork_context_gnu,                           /**< Called from GNU generated code, so must not invoke the microtask internally. */ 
-    fork_context_intel,                         /**< Called from Intel generated code.  */ 
-    fork_context_last 
-}; 
-extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context, 
-  kmp_int32 argc, 
-#if OMPT_SUPPORT 
-  void *unwrapped_task, 
-#endif 
-  microtask_t microtask, launch_t invoker, 
-/* TODO: revert workaround for Intel(R) 64 tracker #96 */ 
-#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-                             va_list *ap 
-#else 
-                             va_list ap 
-#endif 
-                             ); 
- 
-extern void __kmp_join_call( ident_t *loc, int gtid 
-#if OMPT_SUPPORT 
-                           , enum fork_context_e fork_context 
-#endif 
-#if OMP_40_ENABLED 
-                           , int exit_teams = 0 
-#endif 
-                           ); 
- 
-extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); 
-extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ); 
-extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ); 
-extern int __kmp_invoke_task_func( int gtid ); 
-extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); 
-extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); 
- 
-// should never have been exported 
-KMP_EXPORT int __kmpc_invoke_task_func( int gtid ); 
-#if OMP_40_ENABLED 
-extern int  __kmp_invoke_teams_master( int gtid ); 
-extern void __kmp_teams_master( int gtid ); 
-#endif 
-extern void __kmp_save_internal_controls( kmp_info_t * thread ); 
-extern void __kmp_user_set_library (enum library_type arg); 
-extern void __kmp_aux_set_library (enum library_type arg); 
-extern void __kmp_aux_set_stacksize( size_t arg); 
-extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid); 
-extern void __kmp_aux_set_defaults( char const * str, int len ); 
- 
-/* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */ 
-void kmpc_set_blocktime (int arg); 
-void ompc_set_nested( int flag ); 
-void ompc_set_dynamic( int flag ); 
-void ompc_set_num_threads( int arg ); 
- 
-extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr, 
-                  kmp_team_t *team, int tid ); 
-extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr ); 
-extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, 
-  kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 
-  kmp_routine_entry_t task_entry ); 
-extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, 
-                  kmp_team_t *team, int tid, int set_curr_task ); 
- 
-int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, 
-                           int *thread_finished, 
-#if USE_ITT_BUILD 
-                           void * itt_sync_obj, 
-#endif /* USE_ITT_BUILD */ 
-                           kmp_int32 is_constrained); 
-int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, 
-                           int *thread_finished, 
-#if USE_ITT_BUILD 
-                           void * itt_sync_obj, 
-#endif /* USE_ITT_BUILD */ 
-                           kmp_int32 is_constrained); 
-int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, 
-                               int *thread_finished, 
-#if USE_ITT_BUILD 
-                               void * itt_sync_obj, 
-#endif /* USE_ITT_BUILD */ 
-                               kmp_int32 is_constrained); 
- 
-extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ); 
-extern void __kmp_reap_task_teams( void ); 
-extern void __kmp_wait_to_unref_task_teams( void ); 
-extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always ); 
-extern void __kmp_task_team_sync  ( kmp_info_t *this_thr, kmp_team_t *team ); 
-extern void __kmp_task_team_wait  ( kmp_info_t *this_thr, kmp_team_t *team 
-#if USE_ITT_BUILD 
-                                    , void * itt_sync_obj 
-#endif /* USE_ITT_BUILD */ 
-                                    , int wait=1 
-); 
-extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ); 
- 
-extern int  __kmp_is_address_mapped( void *addr ); 
-extern kmp_uint64 __kmp_hardware_timestamp(void); 
- 
-#if KMP_OS_UNIX 
-extern int  __kmp_read_from_file( char const *path, char const *format, ... ); 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-// 
-// Assembly routines that have no compiler intrinsic replacement 
-// 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-extern void       __kmp_query_cpuid( kmp_cpuinfo_t *p ); 
- 
-#define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) 
-static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); } 
- 
-extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 
-extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 
-extern void __kmp_clear_x87_fpu_status_word(); 
-# define KMP_X86_MXCSR_MASK      0xffffffc0   /* ignore status flags (6 lsb) */ 
- 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[] 
-#if OMPT_SUPPORT 
-                                   , void **exit_frame_ptr 
-#endif 
-); 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-KMP_EXPORT void   __kmpc_begin                ( ident_t *, kmp_int32 flags ); 
-KMP_EXPORT void   __kmpc_end                  ( ident_t * ); 
- 
-KMP_EXPORT void   __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor, 
-                                                  kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length ); 
-KMP_EXPORT void   __kmpc_threadprivate_register     ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor ); 
-KMP_EXPORT void * __kmpc_threadprivate              ( ident_t *, kmp_int32 global_tid, void * data, size_t size ); 
- 
-KMP_EXPORT kmp_int32  __kmpc_global_thread_num  ( ident_t * ); 
-KMP_EXPORT kmp_int32  __kmpc_global_num_threads ( ident_t * ); 
-KMP_EXPORT kmp_int32  __kmpc_bound_thread_num   ( ident_t * ); 
-KMP_EXPORT kmp_int32  __kmpc_bound_num_threads  ( ident_t * ); 
- 
-KMP_EXPORT kmp_int32  __kmpc_ok_to_fork     ( ident_t * ); 
-KMP_EXPORT void   __kmpc_fork_call          ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... ); 
- 
-KMP_EXPORT void   __kmpc_serialized_parallel     ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid ); 
- 
-KMP_EXPORT void   __kmpc_flush              ( ident_t *); 
-KMP_EXPORT void   __kmpc_barrier            ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT kmp_int32  __kmpc_master         ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_end_master         ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_ordered            ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_end_ordered        ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_critical           ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); 
-KMP_EXPORT void   __kmpc_end_critical       ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); 
- 
-#if OMP_41_ENABLED 
-KMP_EXPORT void   __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint ); 
-#endif 
- 
-KMP_EXPORT kmp_int32  __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid ); 
- 
-KMP_EXPORT kmp_int32  __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid ); 
- 
-KMP_EXPORT kmp_int32  __kmpc_single         ( ident_t *, kmp_int32 global_tid ); 
-KMP_EXPORT void   __kmpc_end_single         ( ident_t *, kmp_int32 global_tid ); 
- 
-KMP_EXPORT void KMPC_FOR_STATIC_INIT    ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, 
-                                          kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk ); 
- 
-KMP_EXPORT void __kmpc_for_static_fini  ( ident_t *loc, kmp_int32 global_tid ); 
- 
-KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ); 
- 
-extern void KMPC_SET_NUM_THREADS        ( int arg ); 
-extern void KMPC_SET_DYNAMIC            ( int flag ); 
-extern void KMPC_SET_NESTED             ( int flag ); 
- 
-/* --------------------------------------------------------------------------- */ 
- 
-/* 
- * Taskq interface routines 
- */ 
- 
-KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk, 
-                                        size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds); 
-KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); 
-KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); 
-KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status); 
-KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); 
-KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task); 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * OMP 3.0 tasking interface routines 
- */ 
- 
-KMP_EXPORT kmp_int32 
-__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); 
-KMP_EXPORT kmp_task_t* 
-__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, 
-                       size_t sizeof_kmp_task_t, size_t sizeof_shareds, 
-                       kmp_routine_entry_t task_entry ); 
-KMP_EXPORT void 
-__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); 
-KMP_EXPORT void 
-__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); 
-KMP_EXPORT kmp_int32 
-__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); 
-KMP_EXPORT kmp_int32 
-__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ); 
- 
-KMP_EXPORT kmp_int32 
-__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ); 
- 
-#if TASK_UNUSED 
-void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); 
-void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); 
-#endif // TASK_UNUSED 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#if OMP_40_ENABLED 
- 
-KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid ); 
-KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid ); 
- 
-KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, 
-                                                 kmp_int32 ndeps, kmp_depend_info_t *dep_list, 
-                                                 kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); 
-KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 
-                                          kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ); 
-extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ); 
- 
-extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ); 
- 
-KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); 
-KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); 
-KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid); 
-KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); 
- 
-#if OMP_41_ENABLED 
- 
-KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ); 
-KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); 
- 
-#endif 
- 
-#endif 
- 
- 
-/* 
- * Lock interface routines (fast versions with gtid passed in) 
- */ 
-KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid,  void **user_lock ); 
-KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
-KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 
- 
-#if OMP_41_ENABLED 
-KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); 
-KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * Interface to fast scalable reduce methods routines 
- */ 
- 
-KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid, 
-                                           kmp_int32 num_vars, size_t reduce_size, 
-                                           void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 
-                                           kmp_critical_name *lck ); 
-KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); 
-KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid, 
-                                    kmp_int32 num_vars, size_t reduce_size, 
-                                    void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 
-                                    kmp_critical_name *lck ); 
-KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); 
- 
-/* 
- * internal fast reduction routines 
- */ 
- 
-extern PACKED_REDUCTION_METHOD_T 
-__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, 
-                                  kmp_int32 num_vars, size_t reduce_size, 
-                                  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 
-                                  kmp_critical_name *lck ); 
- 
-// this function is for testing set/get/determine reduce method 
-KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void ); 
- 
-KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); 
-KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); 
- 
-// this function exported for testing of KMP_PLACE_THREADS functionality 
-KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int); 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-// C++ port 
-// missing 'extern "C"' declarations 
- 
-KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc ); 
-KMP_EXPORT void __kmpc_pop_num_threads(  ident_t *loc, kmp_int32 global_tid ); 
-KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ); 
- 
-#if OMP_40_ENABLED 
-KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind ); 
-KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ); 
-KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); 
- 
-#endif 
- 
-KMP_EXPORT void* 
-__kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid, 
-                             void * data, size_t size, void *** cache ); 
- 
-// Symbols for MS mutual detection. 
-extern int _You_must_link_with_exactly_one_OpenMP_library; 
-extern int _You_must_link_with_Intel_OpenMP_library; 
-#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) 
-    extern int _You_must_link_with_Microsoft_OpenMP_library; 
-#endif 
- 
- 
-// The routines below are not exported. 
-// Consider making them 'static' in corresponding source files. 
-void 
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); 
-struct private_common * 
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); 
- 
-// 
-// ompc_, kmpc_ entries moved from omp.h. 
-// 
-#if KMP_OS_WINDOWS 
-#   define KMPC_CONVENTION __cdecl 
-#else 
-#   define KMPC_CONVENTION 
-#endif 
- 
-#ifndef __OMP_H 
-typedef enum omp_sched_t { 
-    omp_sched_static  = 1, 
-    omp_sched_dynamic = 2, 
-    omp_sched_guided  = 3, 
-    omp_sched_auto    = 4 
-} omp_sched_t; 
-typedef void * kmp_affinity_mask_t; 
-#endif 
- 
-KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); 
-KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); 
-KMP_EXPORT int  KMPC_CONVENTION ompc_get_ancestor_thread_num(int); 
-KMP_EXPORT int  KMPC_CONVENTION ompc_get_team_size(int); 
-KMP_EXPORT int  KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); 
-KMP_EXPORT int  KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); 
-KMP_EXPORT int  KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); 
- 
-KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); 
-KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); 
-KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); 
-KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif /* KMP_H */ 
- 
+/*! \file */
+/*
+ * kmp.h -- KPTS runtime header file.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_H
+#define KMP_H
+
+#include "kmp_config.h"
+
+/* #define BUILD_PARALLEL_ORDERED 1 */
+
+/* This fix replaces gettimeofday with clock_gettime for better scalability on
+   the Altix.  Requires user code to be linked with -lrt.
+*/
+//#define FIX_SGI_CLOCK
+
+/* Defines for OpenMP 3.0 tasking and auto scheduling */
+
+# ifndef KMP_STATIC_STEAL_ENABLED
+#  define KMP_STATIC_STEAL_ENABLED 1
+# endif
+
+#define TASK_CURRENT_NOT_QUEUED  0
+#define TASK_CURRENT_QUEUED      1
+
+#define TASK_DEQUE_BITS          8  // Used solely to define TASK_DEQUE_SIZE and TASK_DEQUE_MASK.
+#define TASK_DEQUE_SIZE          ( 1 << TASK_DEQUE_BITS )
+#define TASK_DEQUE_MASK          ( TASK_DEQUE_SIZE - 1 )
+
+#ifdef BUILD_TIED_TASK_STACK
+#define TASK_STACK_EMPTY         0  // entries when the stack is empty
+
+#define TASK_STACK_BLOCK_BITS    5  // Used to define TASK_STACK_SIZE and TASK_STACK_MASK
+#define TASK_STACK_BLOCK_SIZE    ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array
+#define TASK_STACK_INDEX_MASK    ( TASK_STACK_BLOCK_SIZE - 1 )  // Mask for determining index into stack block
+#endif // BUILD_TIED_TASK_STACK
+
+#define TASK_NOT_PUSHED          1
+#define TASK_SUCCESSFULLY_PUSHED 0
+#define TASK_TIED                1
+#define TASK_UNTIED              0
+#define TASK_EXPLICIT            1
+#define TASK_IMPLICIT            0
+#define TASK_PROXY               1
+#define TASK_FULL                0
+
+#define KMP_CANCEL_THREADS
+#define KMP_THREAD_ATTR
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <signal.h>
+/*  include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library  */
+/*  some macros provided below to replace some of these functions  */
+#ifndef __ABSOFT_WIN
+#include <sys/types.h>
+#endif
+#include <limits.h>
+#include <time.h>
+
+#include <errno.h>
+
+#include "kmp_os.h"
+
+#include "kmp_safe_c_api.h"
+
+#if KMP_STATS_ENABLED
+class kmp_stats_list;
+#endif
+
+#if KMP_USE_HWLOC
+#include "hwloc.h"
+extern hwloc_topology_t __kmp_hwloc_topology;
+extern int __kmp_hwloc_error;
+#endif
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#include <xmmintrin.h>
+#endif
+
+#include "kmp_version.h"
+#include "kmp_debug.h"
+#include "kmp_lock.h"
+#if USE_DEBUGGER
+#include "kmp_debugger.h"
+#endif
+#include "kmp_i18n.h"
+
+#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
+
+#include "kmp_wrapper_malloc.h"
+#if KMP_OS_UNIX
+# include <unistd.h>
+# if !defined NSIG && defined _NSIG
+#  define NSIG _NSIG
+# endif
+#endif
+
+#if KMP_OS_LINUX
+# pragma weak clock_gettime
+#endif
+
+#if OMPT_SUPPORT
+#include "ompt-internal.h"
+#endif
+
+/*Select data placement in NUMA memory */
+#define NO_FIRST_TOUCH 0
+#define FIRST_TOUCH 1       /* Exploit SGI's first touch page placement algo */
+
+/* If not specified on compile command line, assume no first touch */
+#ifndef BUILD_MEMORY
+#define BUILD_MEMORY NO_FIRST_TOUCH
+#endif
+
+// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
+// 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size.
+#ifndef USE_FAST_MEMORY
+#define USE_FAST_MEMORY 3
+#endif
+
+#ifndef KMP_NESTED_HOT_TEAMS
+# define KMP_NESTED_HOT_TEAMS 0
+# define USE_NESTED_HOT_ARG(x)
+#else
+# if KMP_NESTED_HOT_TEAMS
+#  if OMP_40_ENABLED
+#   define USE_NESTED_HOT_ARG(x) ,x
+#  else
+// Nested hot teams feature depends on omp 4.0, disable it for earlier versions
+#   undef KMP_NESTED_HOT_TEAMS
+#   define KMP_NESTED_HOT_TEAMS 0
+#   define USE_NESTED_HOT_ARG(x)
+#  endif
+# else
+#  define USE_NESTED_HOT_ARG(x)
+# endif
+#endif
+
+// Assume using BGET compare_exchange instruction instead of lock by default.
+#ifndef USE_CMP_XCHG_FOR_BGET
+#define USE_CMP_XCHG_FOR_BGET 1
+#endif
+
+// Test to see if queuing lock is better than bootstrap lock for bget
+// #ifndef USE_QUEUING_LOCK_FOR_BGET
+// #define USE_QUEUING_LOCK_FOR_BGET
+// #endif
+
+#define KMP_NSEC_PER_SEC 1000000000L
+#define KMP_USEC_PER_SEC 1000000L
+
+/*!
+@ingroup BASIC_TYPES
+@{
+*/
+
+// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...)
+/*!
+Values for bit flags used in the ident_t to describe the fields.
+*/
+/*! Use trampoline for internal microtasks */
+#define KMP_IDENT_IMB             0x01
+/*! Use c-style ident structure */
+#define KMP_IDENT_KMPC            0x02
+/* 0x04 is no longer used */
+/*! Entry point generated by auto-parallelization */
+#define KMP_IDENT_AUTOPAR         0x08
+/*! Compiler generates atomic reduction option for kmpc_reduce* */
+#define KMP_IDENT_ATOMIC_REDUCE   0x10
+/*! To mark a 'barrier' directive in user code */
+#define KMP_IDENT_BARRIER_EXPL    0x20
+/*! To Mark implicit barriers. */
+#define KMP_IDENT_BARRIER_IMPL           0x0040
+#define KMP_IDENT_BARRIER_IMPL_MASK      0x01C0
+#define KMP_IDENT_BARRIER_IMPL_FOR       0x0040
+#define KMP_IDENT_BARRIER_IMPL_SECTIONS  0x00C0
+
+#define KMP_IDENT_BARRIER_IMPL_SINGLE    0x0140
+#define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
+
+/*!
+ * The ident structure that describes a source location.
+ */
+typedef struct ident {
+    kmp_int32 reserved_1;   /**<  might be used in Fortran; see above  */
+    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member  */
+    kmp_int32 reserved_2;   /**<  not really used in Fortran any more; see above */
+#if USE_ITT_BUILD
+                            /*  but currently used for storing region-specific ITT */
+                            /*  contextual information. */
+#endif /* USE_ITT_BUILD */
+    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for C++  */
+    char const *psource;    /**< String describing the source location.
+                            The string is composed of semi-colon separated fields which describe the source file,
+                            the function and a pair of line numbers that delimit the construct.
+                             */
+} ident_t;
+/*!
+@}
+*/
+
+// Some forward declarations.
+
+typedef union  kmp_team      kmp_team_t;
+typedef struct kmp_taskdata  kmp_taskdata_t;
+typedef union  kmp_task_team kmp_task_team_t;
+typedef union  kmp_team      kmp_team_p;
+typedef union  kmp_info      kmp_info_p;
+typedef union  kmp_root      kmp_root_p;
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* Pack two 32-bit signed integers into a 64-bit signed integer */
+/* ToDo: Fix word ordering for big-endian machines. */
+#define KMP_PACK_64(HIGH_32,LOW_32) \
+    ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) )
+
+
+/*
+ * Generic string manipulation macros.
+ * Assume that _x is of type char *
+ */
+#define SKIP_WS(_x)     { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; }
+#define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; }
+#define SKIP_TO(_x,_c)  { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; }
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
+#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+
+/* Enumeration types */
+
+enum kmp_state_timer {
+    ts_stop,
+    ts_start,
+    ts_pause,
+
+    ts_last_state
+};
+
+enum dynamic_mode {
+    dynamic_default,
+#ifdef USE_LOAD_BALANCE
+    dynamic_load_balance,
+#endif /* USE_LOAD_BALANCE */
+    dynamic_random,
+    dynamic_thread_limit,
+    dynamic_max
+};
+
+/* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */
+#ifndef KMP_SCHED_TYPE_DEFINED
+#define KMP_SCHED_TYPE_DEFINED
+typedef enum kmp_sched {
+    kmp_sched_lower             = 0,     // lower and upper bounds are for routine parameter check
+    // Note: need to adjust __kmp_sch_map global array in case this enum is changed
+    kmp_sched_static            = 1,     // mapped to kmp_sch_static_chunked           (33)
+    kmp_sched_dynamic           = 2,     // mapped to kmp_sch_dynamic_chunked          (35)
+    kmp_sched_guided            = 3,     // mapped to kmp_sch_guided_chunked           (36)
+    kmp_sched_auto              = 4,     // mapped to kmp_sch_auto                     (38)
+    kmp_sched_upper_std         = 5,     // upper bound for standard schedules
+    kmp_sched_lower_ext         = 100,   // lower bound of Intel extension schedules
+    kmp_sched_trapezoidal       = 101,   // mapped to kmp_sch_trapezoidal              (39)
+//  kmp_sched_static_steal      = 102,   // mapped to kmp_sch_static_steal             (44)
+    kmp_sched_upper             = 102,
+    kmp_sched_default = kmp_sched_static // default scheduling
+} kmp_sched_t;
+#endif
+
+/*!
+ @ingroup WORK_SHARING
+ * Describes the loop schedule to be used for a parallel for loop.
+ */
+enum sched_type {
+    kmp_sch_lower                     = 32,   /**< lower bound for unordered values */
+    kmp_sch_static_chunked            = 33,
+    kmp_sch_static                    = 34,   /**< static unspecialized */
+    kmp_sch_dynamic_chunked           = 35,
+    kmp_sch_guided_chunked            = 36,   /**< guided unspecialized */
+    kmp_sch_runtime                   = 37,
+    kmp_sch_auto                      = 38,   /**< auto */
+    kmp_sch_trapezoidal               = 39,
+
+    /* accessible only through KMP_SCHEDULE environment variable */
+    kmp_sch_static_greedy             = 40,
+    kmp_sch_static_balanced           = 41,
+    /* accessible only through KMP_SCHEDULE environment variable */
+    kmp_sch_guided_iterative_chunked  = 42,
+    kmp_sch_guided_analytical_chunked = 43,
+
+    kmp_sch_static_steal              = 44,   /**< accessible only through KMP_SCHEDULE environment variable */
+
+    /* accessible only through KMP_SCHEDULE environment variable */
+    kmp_sch_upper                     = 45,   /**< upper bound for unordered values */
+
+    kmp_ord_lower                     = 64,   /**< lower bound for ordered values, must be power of 2 */
+    kmp_ord_static_chunked            = 65,
+    kmp_ord_static                    = 66,   /**< ordered static unspecialized */
+    kmp_ord_dynamic_chunked           = 67,
+    kmp_ord_guided_chunked            = 68,
+    kmp_ord_runtime                   = 69,
+    kmp_ord_auto                      = 70,   /**< ordered auto */
+    kmp_ord_trapezoidal               = 71,
+    kmp_ord_upper                     = 72,   /**< upper bound for ordered values */
+
+#if OMP_40_ENABLED
+    /* Schedules for Distribute construct */
+    kmp_distribute_static_chunked     = 91,   /**< distribute static chunked */
+    kmp_distribute_static             = 92,   /**< distribute static unspecialized */
+#endif
+
+    /*
+     * For the "nomerge" versions, kmp_dispatch_next*() will always return
+     * a single iteration/chunk, even if the loop is serialized.  For the
+     * schedule types listed above, the entire iteration vector is returned
+     * if the loop is serialized.  This doesn't work for gcc/gcomp sections.
+     */
+    kmp_nm_lower                      = 160,  /**< lower bound for nomerge values */
+
+    kmp_nm_static_chunked             = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
+    kmp_nm_static                     = 162,  /**< static unspecialized */
+    kmp_nm_dynamic_chunked            = 163,
+    kmp_nm_guided_chunked             = 164,  /**< guided unspecialized */
+    kmp_nm_runtime                    = 165,
+    kmp_nm_auto                       = 166,  /**< auto */
+    kmp_nm_trapezoidal                = 167,
+
+    /* accessible only through KMP_SCHEDULE environment variable */
+    kmp_nm_static_greedy              = 168,
+    kmp_nm_static_balanced            = 169,
+    /* accessible only through KMP_SCHEDULE environment variable */
+    kmp_nm_guided_iterative_chunked   = 170,
+    kmp_nm_guided_analytical_chunked  = 171,
+    kmp_nm_static_steal               = 172,  /* accessible only through OMP_SCHEDULE environment variable */
+
+    kmp_nm_ord_static_chunked         = 193,
+    kmp_nm_ord_static                 = 194,  /**< ordered static unspecialized */
+    kmp_nm_ord_dynamic_chunked        = 195,
+    kmp_nm_ord_guided_chunked         = 196,
+    kmp_nm_ord_runtime                = 197,
+    kmp_nm_ord_auto                   = 198,  /**< auto */
+    kmp_nm_ord_trapezoidal            = 199,
+    kmp_nm_upper                      = 200,  /**< upper bound for nomerge values */
+
+    kmp_sch_default = kmp_sch_static  /**< default scheduling algorithm */
+};
+
+/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
+typedef struct kmp_r_sched {
+    enum sched_type r_sched_type;
+    int             chunk;
+} kmp_r_sched_t;
+
+extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types
+
+enum library_type {
+    library_none,
+    library_serial,
+    library_turnaround,
+    library_throughput
+};
+
+#if KMP_OS_LINUX
+enum clock_function_type {
+    clock_function_gettimeofday,
+    clock_function_clock_gettime
+};
+#endif /* KMP_OS_LINUX */
+
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+enum mic_type {
+    non_mic,
+    mic1,
+    mic2,
+    mic3,
+    dummy
+};
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* -- fast reduction stuff ------------------------------------------------ */
+
+#undef KMP_FAST_REDUCTION_BARRIER
+#define KMP_FAST_REDUCTION_BARRIER 1
+
+#undef KMP_FAST_REDUCTION_CORE_DUO
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    #define KMP_FAST_REDUCTION_CORE_DUO 1
+#endif
+
+enum _reduction_method {
+    reduction_method_not_defined = 0,
+    critical_reduce_block        = ( 1 << 8 ),
+    atomic_reduce_block          = ( 2 << 8 ),
+    tree_reduce_block            = ( 3 << 8 ),
+    empty_reduce_block           = ( 4 << 8 )
+};
+
+// description of the packed_reduction_method variable
+// the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte:
+// 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier
+// 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction;
+// reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty,
+// so no need to execute a shift instruction while packing/unpacking
+
+#if KMP_FAST_REDUCTION_BARRIER
+    #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
+            ( ( reduction_method ) | ( barrier_type ) )
+
+    #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
+            ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) )
+
+    #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
+            ( ( enum barrier_type )(      ( packed_reduction_method ) & ( 0x000000FF ) ) )
+#else
+    #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
+            ( reduction_method )
+
+    #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
+            ( packed_reduction_method )
+
+    #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
+            ( bs_plain_barrier )
+#endif
+
+#define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \
+            ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) )
+
+#if KMP_FAST_REDUCTION_BARRIER
+    #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
+            ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) )
+
+    #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
+            ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) )
+#endif
+
+typedef int PACKED_REDUCTION_METHOD_T;
+
+/* -- end of fast reduction stuff ----------------------------------------- */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if KMP_OS_WINDOWS
+# define USE_CBLKDATA
+# pragma warning( push )
+# pragma warning( disable: 271 310 )
+# include <windows.h>
+# pragma warning( pop )
+#endif
+
+#if KMP_OS_UNIX
+# include <pthread.h>
+# include <dlfcn.h>
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Only Linux* OS and Windows* OS support thread affinity.
+ */
+#if KMP_AFFINITY_SUPPORTED
+
+extern size_t __kmp_affin_mask_size;
+# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
+# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
+# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
+# define KMP_CPU_SETSIZE        (__kmp_affin_mask_size * CHAR_BIT)
+
+#if KMP_USE_HWLOC
+
+typedef hwloc_cpuset_t kmp_affin_mask_t;
+# define KMP_CPU_SET(i,mask)       hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
+# define KMP_CPU_ISSET(i,mask)     hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
+# define KMP_CPU_CLR(i,mask)       hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
+# define KMP_CPU_ZERO(mask)        hwloc_bitmap_zero((hwloc_cpuset_t)mask)
+# define KMP_CPU_COPY(dest, src)   hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
+# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
+    { \
+        unsigned i; \
+        for(i=0;i<(unsigned)max_bit_number+1;i++) { \
+            if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
+                hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
+            } else { \
+                hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
+            } \
+        } \
+    } \
+
+# define KMP_CPU_UNION(dest, src)  hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
+# define KMP_CPU_SET_ITERATE(i,mask) \
+    for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))
+
+# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
+# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
+# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
+# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
+# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
+# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
+
+//
+// The following macro should be used to index an array of masks.
+// The array should be declared as "kmp_affinity_t *" and allocated with
+// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact
+// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
+// on Linux* OS, sizeof(kmp_affin_t) is 1.
+//
+# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
+# define KMP_CPU_ALLOC_ARRAY(arr, n) {                                   \
+    arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
+    unsigned i;                                                           \
+    for(i=0;i<(unsigned)n;i++) {                                          \
+        arr[i] = hwloc_bitmap_alloc();                                    \
+    }                                                                     \
+   }
+# define KMP_CPU_FREE_ARRAY(arr, n) { \
+    unsigned i;                        \
+    for(i=0;i<(unsigned)n;i++) {       \
+        hwloc_bitmap_free(arr[i]);     \
+    }                                  \
+    __kmp_free(arr);                   \
+   }
+# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) {                               \
+    arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
+    unsigned i;                                                                \
+    for(i=0;i<(unsigned)n;i++) {                                               \
+        arr[i] = hwloc_bitmap_alloc();                                         \
+    }                                                                          \
+   }
+# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
+    unsigned i;                                 \
+    for(i=0;i<(unsigned)n;i++) {                \
+        hwloc_bitmap_free(arr[i]);              \
+    }                                           \
+    KMP_INTERNAL_FREE(arr);                     \
+   }
+
+#else /* KMP_USE_HWLOC */
+#  define KMP_CPU_SET_ITERATE(i,mask) \
+    for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
+
+# if KMP_OS_LINUX
+//
+// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
+// (in bytes).  It should be allocated on a word boundary.
+//
+// WARNING!!!  We have made the base type of the affinity mask unsigned char,
+// in order to eliminate a lot of checks that the true system mask size is
+// really a multiple of 4 bytes (on Linux* OS).
+//
+// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!!
+//
+
+typedef unsigned char kmp_affin_mask_t;
+
+#  define _KMP_CPU_SET(i,mask)   (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
+#  define KMP_CPU_SET(i,mask)    _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
+#  define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT))))
+#  define KMP_CPU_ISSET(i,mask)  _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
+#  define _KMP_CPU_CLR(i,mask)   (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
+#  define KMP_CPU_CLR(i,mask)    _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
+
+#  define KMP_CPU_ZERO(mask) \
+        {                                                                    \
+            size_t __i;                                                      \
+            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
+                ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \
+            }                                                                \
+        }
+
+#  define KMP_CPU_COPY(dest, src) \
+        {                                                                    \
+            size_t __i;                                                      \
+            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
+                ((kmp_affin_mask_t *)(dest))[__i]                            \
+                  = ((kmp_affin_mask_t *)(src))[__i];                        \
+            }                                                                \
+        }
+
+#  define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
+        {                                                                    \
+            size_t __i;                                                      \
+            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
+                ((kmp_affin_mask_t *)(mask))[__i]                            \
+                  = ~((kmp_affin_mask_t *)(mask))[__i];                      \
+            }                                                                \
+        }
+
+#  define KMP_CPU_UNION(dest, src) \
+        {                                                                    \
+            size_t __i;                                                      \
+            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
+                ((kmp_affin_mask_t *)(dest))[__i]                            \
+                  |= ((kmp_affin_mask_t *)(src))[__i];                       \
+            }                                                                \
+        }
+
+# endif /* KMP_OS_LINUX */
+
+# if KMP_OS_WINDOWS
+//
+// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on
+// Intel(R) 64 it is 8 bytes times the number of processor groups.
+//
+
+#  if KMP_GROUP_AFFINITY
+
+// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
+#   if _MSC_VER < 1600
+typedef struct GROUP_AFFINITY {
+    KAFFINITY Mask;
+    WORD Group;
+    WORD Reserved[3];
+} GROUP_AFFINITY;
+#   endif
+
+typedef DWORD_PTR kmp_affin_mask_t;
+
+extern int __kmp_num_proc_groups;
+
+#   define _KMP_CPU_SET(i,mask) \
+        (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |=                    \
+        (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
+
+#   define KMP_CPU_SET(i,mask) \
+        _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
+
+#   define _KMP_CPU_ISSET(i,mask) \
+        (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &                  \
+        (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))))
+
+#   define KMP_CPU_ISSET(i,mask) \
+        _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
+
+#   define _KMP_CPU_CLR(i,mask) \
+        (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &=                    \
+        ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
+
+#   define KMP_CPU_CLR(i,mask) \
+        _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
+
+#   define KMP_CPU_ZERO(mask) \
+        {                                                                    \
+            int __i;                                                         \
+            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
+                ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \
+            }                                                                \
+        }
+
+#   define KMP_CPU_COPY(dest, src) \
+        {                                                                    \
+            int __i;                                                         \
+            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
+                ((kmp_affin_mask_t *)(dest))[__i]                            \
+                  = ((kmp_affin_mask_t *)(src))[__i];                        \
+            }                                                                \
+        }
+
+#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
+        {                                                                    \
+            int __i;                                                         \
+            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
+                ((kmp_affin_mask_t *)(mask))[__i]                            \
+                  = ~((kmp_affin_mask_t *)(mask))[__i];                      \
+            }                                                                \
+        }
+
+#   define KMP_CPU_UNION(dest, src) \
+        {                                                                    \
+            int __i;                                                         \
+            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
+                ((kmp_affin_mask_t *)(dest))[__i]                            \
+                  |= ((kmp_affin_mask_t *)(src))[__i];                       \
+            }                                                                \
+        }
+
+typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
+extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
+
+typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
+extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
+
+typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
+extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
+
+typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
+extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
+
+extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
+
+#  else /* KMP_GROUP_AFFINITY */
+
+typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
+
+#   define KMP_CPU_SET(i,mask)      (*(mask) |= (((kmp_affin_mask_t)1) << (i)))
+#   define KMP_CPU_ISSET(i,mask)    (!!(*(mask) & (((kmp_affin_mask_t)1) << (i))))
+#   define KMP_CPU_CLR(i,mask)      (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
+#   define KMP_CPU_ZERO(mask)       (*(mask) = 0)
+#   define KMP_CPU_COPY(dest, src)  (*(dest) = *(src))
+#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask))
+#   define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
+
+#  endif /* KMP_GROUP_AFFINITY */
+
+# endif /* KMP_OS_WINDOWS */
+
+//
+// __kmp_allocate() will return memory allocated on a 4-bytes boundary.
+// after zeroing it - it takes care of those assumptions stated above.
+//
+# define KMP_CPU_ALLOC(ptr) \
+        (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
+# define KMP_CPU_FREE(ptr) __kmp_free(ptr)
+# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
+# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
+# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
+# define KMP_CPU_INTERNAL_FREE(ptr)  KMP_INTERNAL_FREE(ptr)
+
+//
+// The following macro should be used to index an array of masks.
+// The array should be declared as "kmp_affinity_t *" and allocated with
+// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact
+// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
+// on Linux* OS, sizeof(kmp_affin_t) is 1.
+//
+# define KMP_CPU_INDEX(array,i) \
+        ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
+# define KMP_CPU_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
+# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
+# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
+# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);
+
+#endif /* KMP_USE_HWLOC */
+
+//
+// Declare local char buffers with this size for printing debug and info
+// messages, using __kmp_affinity_print_mask().
+//
+#define KMP_AFFIN_MASK_PRINT_LEN        1024
+
+enum affinity_type {
+    affinity_none = 0,
+    affinity_physical,
+    affinity_logical,
+    affinity_compact,
+    affinity_scatter,
+    affinity_explicit,
+    affinity_balanced,
+    affinity_disabled,  // not used outsize the env var parser
+    affinity_default
+};
+
+enum affinity_gran {
+    affinity_gran_fine = 0,
+    affinity_gran_thread,
+    affinity_gran_core,
+    affinity_gran_package,
+    affinity_gran_node,
+#if KMP_GROUP_AFFINITY
+    //
+    // The "group" granularity isn't necesssarily coarser than all of the
+    // other levels, but we put it last in the enum.
+    //
+    affinity_gran_group,
+#endif /* KMP_GROUP_AFFINITY */
+    affinity_gran_default
+};
+
+enum affinity_top_method {
+    affinity_top_method_all = 0, // try all (supported) methods, in order
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    affinity_top_method_apicid,
+    affinity_top_method_x2apicid,
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+    affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
+#if KMP_GROUP_AFFINITY
+    affinity_top_method_group,
+#endif /* KMP_GROUP_AFFINITY */
+    affinity_top_method_flat,
+#if KMP_USE_HWLOC
+    affinity_top_method_hwloc,
+#endif
+    affinity_top_method_default
+};
+
+#define affinity_respect_mask_default   (-1)
+
+extern enum affinity_type __kmp_affinity_type; /* Affinity type */
+extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
+extern int __kmp_affinity_gran_levels; /* corresponding int value */
+extern int __kmp_affinity_dups; /* Affinity duplicate masks */
+extern enum affinity_top_method __kmp_affinity_top_method;
+extern int __kmp_affinity_compact; /* Affinity 'compact' value */
+extern int __kmp_affinity_offset; /* Affinity offset value  */
+extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
+extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
+extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */
+extern char * __kmp_affinity_proclist; /* proc ID list */
+extern kmp_affin_mask_t *__kmp_affinity_masks;
+extern unsigned __kmp_affinity_num_masks;
+extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error);
+extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error);
+extern void __kmp_affinity_bind_thread(int which);
+
+# if KMP_OS_LINUX
+extern kmp_affin_mask_t *__kmp_affinity_get_fullMask();
+# endif /* KMP_OS_LINUX */
+extern char const * __kmp_cpuinfo_file;
+
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+#if OMP_40_ENABLED
+
+//
+// This needs to be kept in sync with the values in omp.h !!!
+//
+typedef enum kmp_proc_bind_t {
+    proc_bind_false = 0,
+    proc_bind_true,
+    proc_bind_master,
+    proc_bind_close,
+    proc_bind_spread,
+    proc_bind_intel,    // use KMP_AFFINITY interface
+    proc_bind_default
+} kmp_proc_bind_t;
+
+typedef struct kmp_nested_proc_bind_t {
+    kmp_proc_bind_t *bind_types;
+    int size;
+    int used;
+} kmp_nested_proc_bind_t;
+
+extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
+
+#endif /* OMP_40_ENABLED */
+
+# if KMP_AFFINITY_SUPPORTED
+#  define KMP_PLACE_ALL       (-1)
+#  define KMP_PLACE_UNDEFINED (-2)
+# endif /* KMP_AFFINITY_SUPPORTED */
+
+extern int __kmp_affinity_num_places;
+
+
+#if OMP_40_ENABLED
+typedef enum kmp_cancel_kind_t {
+    cancel_noreq = 0,
+    cancel_parallel = 1,
+    cancel_loop = 2,
+    cancel_sections = 3,
+    cancel_taskgroup = 4
+} kmp_cancel_kind_t;
+#endif // OMP_40_ENABLED
+
+extern int __kmp_place_num_sockets;
+extern int __kmp_place_socket_offset;
+extern int __kmp_place_num_cores;
+extern int __kmp_place_core_offset;
+extern int __kmp_place_num_threads_per_core;
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#define KMP_PAD(type, sz)     (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
+
+//
+// We need to avoid using -1 as a GTID as +1 is added to the gtid
+// when storing it in a lock, and the value 0 is reserved.
+//
+#define KMP_GTID_DNE            (-2)    /* Does not exist */
+#define KMP_GTID_SHUTDOWN       (-3)    /* Library is shutting down */
+#define KMP_GTID_MONITOR        (-4)    /* Monitor thread ID */
+#define KMP_GTID_UNKNOWN        (-5)    /* Is not known */
+#define KMP_GTID_MIN            (-6)    /* Minimal gtid for low bound check in DEBUG */
+
+#define __kmp_get_gtid()               __kmp_get_global_thread_id()
+#define __kmp_entry_gtid()             __kmp_get_global_thread_id_reg()
+
+#define __kmp_tid_from_gtid(gtid)     ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
+                                        __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
+
+#define __kmp_get_tid()               ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
+#define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \
+                                        team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid )
+
+#define __kmp_get_team()              ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team )
+#define __kmp_team_from_gtid(gtid)    ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
+                                        __kmp_threads[ (gtid) ]-> th.th_team )
+
+#define __kmp_thread_from_gtid(gtid)  ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] )
+#define __kmp_get_thread()            ( __kmp_thread_from_gtid( __kmp_get_gtid() ) )
+
+    // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works
+    // with registered and not-yet-registered threads.
+#define __kmp_gtid_from_thread(thr)   ( KMP_DEBUG_ASSERT( (thr) != NULL ), \
+                                        (thr)->th.th_info.ds.ds_gtid )
+
+// AT: Which way is correct?
+// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
+// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
+#define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc )
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#define KMP_UINT64_MAX         (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1)))
+
+#define KMP_MIN_NTH           1
+
+#ifndef KMP_MAX_NTH
+#  if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
+#    define KMP_MAX_NTH          PTHREAD_THREADS_MAX
+#  else
+#    define KMP_MAX_NTH          INT_MAX
+#  endif
+#endif /* KMP_MAX_NTH */
+
+#ifdef PTHREAD_STACK_MIN
+# define KMP_MIN_STKSIZE         PTHREAD_STACK_MIN
+#else
+# define KMP_MIN_STKSIZE         ((size_t)(32 * 1024))
+#endif
+
+#define KMP_MAX_STKSIZE          (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
+
+#if KMP_ARCH_X86
+# define KMP_DEFAULT_STKSIZE     ((size_t)(2 * 1024 * 1024))
+#elif KMP_ARCH_X86_64
+# define KMP_DEFAULT_STKSIZE     ((size_t)(4 * 1024 * 1024))
+# define KMP_BACKUP_STKSIZE      ((size_t)(2 * 1024 * 1024))
+#else
+# define KMP_DEFAULT_STKSIZE     ((size_t)(1024 * 1024))
+#endif
+
+#define KMP_DEFAULT_MONITOR_STKSIZE     ((size_t)(64 * 1024))
+
+#define KMP_DEFAULT_MALLOC_POOL_INCR    ((size_t) (1024 * 1024))
+#define KMP_MIN_MALLOC_POOL_INCR        ((size_t) (4 * 1024))
+#define KMP_MAX_MALLOC_POOL_INCR        (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
+
+#define KMP_MIN_STKOFFSET       (0)
+#define KMP_MAX_STKOFFSET       KMP_MAX_STKSIZE
+#if KMP_OS_DARWIN
+# define KMP_DEFAULT_STKOFFSET  KMP_MIN_STKOFFSET
+#else
+# define KMP_DEFAULT_STKOFFSET  CACHE_LINE
+#endif
+
+#define KMP_MIN_STKPADDING      (0)
+#define KMP_MAX_STKPADDING      (2 * 1024 * 1024)
+
+#define KMP_MIN_MONITOR_WAKEUPS      (1)       /* min number of times monitor wakes up per second */
+#define KMP_MAX_MONITOR_WAKEUPS      (1000)    /* maximum number of times monitor can wake up per second */
+#define KMP_BLOCKTIME_MULTIPLIER     (1000)    /* number of blocktime units per second */
+#define KMP_MIN_BLOCKTIME            (0)
+#define KMP_MAX_BLOCKTIME            (INT_MAX) /* Must be this for "infinite" setting the work */
+#define KMP_DEFAULT_BLOCKTIME        (200)     /*  __kmp_blocktime is in milliseconds  */
+/* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */
+/* Only allow increasing number of wakeups */
+#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
+                                 ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \
+                                   ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \
+                                   ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \
+                                       (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) )
+
+/* Calculate number of intervals for a specific block time based on monitor_wakeups */
+#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups)  \
+                                 ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) /  \
+                                   (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) )
+
+#define KMP_MIN_STATSCOLS       40
+#define KMP_MAX_STATSCOLS       4096
+#define KMP_DEFAULT_STATSCOLS   80
+
+#define KMP_MIN_INTERVAL        0
+#define KMP_MAX_INTERVAL        (INT_MAX-1)
+#define KMP_DEFAULT_INTERVAL    0
+
+#define KMP_MIN_CHUNK           1
+#define KMP_MAX_CHUNK           (INT_MAX-1)
+#define KMP_DEFAULT_CHUNK       1
+
+#define KMP_MIN_INIT_WAIT       1
+#define KMP_MAX_INIT_WAIT       (INT_MAX/2)
+#define KMP_DEFAULT_INIT_WAIT   2048U
+
+#define KMP_MIN_NEXT_WAIT       1
+#define KMP_MAX_NEXT_WAIT       (INT_MAX/2)
+#define KMP_DEFAULT_NEXT_WAIT   1024U
+
+// max possible dynamic loops in concurrent execution per team
+#define KMP_MAX_DISP_BUF        7
+#define KMP_MAX_ORDERED         8
+
+#define KMP_MAX_FIELDS          32
+
+#define KMP_MAX_BRANCH_BITS     31
+
+#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
+
+/* Minimum number of threads before switch to TLS gtid (experimentally determined) */
+/* josh TODO: what about OS X* tuning? */
+#if   KMP_ARCH_X86 || KMP_ARCH_X86_64
+# define KMP_TLS_GTID_MIN     5
+#else
+# define KMP_TLS_GTID_MIN     INT_MAX
+#endif
+
+#define KMP_MASTER_TID(tid)      ( (tid) == 0 )
+#define KMP_WORKER_TID(tid)      ( (tid) != 0 )
+
+#define KMP_MASTER_GTID(gtid)    ( __kmp_tid_from_gtid((gtid)) == 0 )
+#define KMP_WORKER_GTID(gtid)    ( __kmp_tid_from_gtid((gtid)) != 0 )
+#define KMP_UBER_GTID(gtid)                                           \
+    (                                                                 \
+        KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ),                   \
+        KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ),          \
+        (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \
+        (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\
+    )
+#define KMP_INITIAL_GTID(gtid)   ( (gtid) == 0 )
+
+#ifndef TRUE
+#define FALSE   0
+#define TRUE    (! FALSE)
+#endif
+
+/* NOTE: all of the following constants must be even */
+
+#if KMP_OS_WINDOWS
+#  define KMP_INIT_WAIT    64U          /* initial number of spin-tests   */
+#  define KMP_NEXT_WAIT    32U          /* susequent number of spin-tests */
+#elif KMP_OS_CNK
+#  define KMP_INIT_WAIT    16U          /* initial number of spin-tests   */
+#  define KMP_NEXT_WAIT     8U          /* susequent number of spin-tests */
+#elif KMP_OS_LINUX
+#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */
+#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */
+#elif KMP_OS_DARWIN
+/* TODO: tune for KMP_OS_DARWIN */
+#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */
+#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */
+#elif KMP_OS_FREEBSD
+/* TODO: tune for KMP_OS_FREEBSD */
+#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */
+#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */
+#elif KMP_OS_NETBSD
+/* TODO: tune for KMP_OS_NETBSD */
+#  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */
+#  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */
+#endif
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+typedef struct kmp_cpuid {
+    kmp_uint32  eax;
+    kmp_uint32  ebx;
+    kmp_uint32  ecx;
+    kmp_uint32  edx;
+} kmp_cpuid_t;
+extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
+# if KMP_ARCH_X86
+  extern void __kmp_x86_pause( void );
+# elif KMP_MIC
+  static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); };
+# else
+  static void __kmp_x86_pause( void ) { _mm_pause(); };
+# endif
+# define KMP_CPU_PAUSE() __kmp_x86_pause()
+#elif KMP_ARCH_PPC64
+# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
+# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
+# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
+# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0)
+#else
+# define KMP_CPU_PAUSE()        /* nothing to do */
+#endif
+
+#define KMP_INIT_YIELD(count)           { (count) = __kmp_yield_init; }
+
+#define KMP_YIELD(cond)                 { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); }
+
+// Note the decrement of 2 in the following Macros.  With KMP_LIBRARY=turnaround,
+// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd.
+
+#define KMP_YIELD_WHEN(cond,count)      { KMP_CPU_PAUSE(); (count) -= 2; \
+                                                if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } }
+#define KMP_YIELD_SPIN(count)           { KMP_CPU_PAUSE(); (count) -=2; \
+                                                if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } }
+
+/* ------------------------------------------------------------------------ */
+/* Support datatypes for the orphaned construct nesting checks.             */
+/* ------------------------------------------------------------------------ */
+
+enum cons_type {
+    ct_none,
+    ct_parallel,
+    ct_pdo,
+    ct_pdo_ordered,
+    ct_psections,
+    ct_psingle,
+
+    /* the following must be left in order and not split up */
+    ct_taskq,
+    ct_task,                    /* really task inside non-ordered taskq, considered a worksharing type */
+    ct_task_ordered,            /* really task inside ordered taskq, considered a worksharing type */
+    /* the preceding must be left in order and not split up */
+
+    ct_critical,
+    ct_ordered_in_parallel,
+    ct_ordered_in_pdo,
+    ct_ordered_in_taskq,
+    ct_master,
+    ct_reduce,
+    ct_barrier
+};
+
+/* test to see if we are in a taskq construct */
+# define IS_CONS_TYPE_TASKQ( ct )       ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) )
+# define IS_CONS_TYPE_ORDERED( ct )     ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered)
+
+struct cons_data {
+    ident_t const     *ident;
+    enum cons_type     type;
+    int                prev;
+    kmp_user_lock_p    name;    /* address exclusively for critical section name comparison */
+};
+
+struct cons_header {
+    int                 p_top, w_top, s_top;
+    int                 stack_size, stack_top;
+    struct cons_data   *stack_data;
+};
+
+struct kmp_region_info {
+    char                *text;
+    int                 offset[KMP_MAX_FIELDS];
+    int                 length[KMP_MAX_FIELDS];
+};
+
+
+/* ---------------------------------------------------------------------- */
+/* ---------------------------------------------------------------------- */
+
+#if KMP_OS_WINDOWS
+    typedef HANDLE              kmp_thread_t;
+    typedef DWORD               kmp_key_t;
+#endif /* KMP_OS_WINDOWS */
+
+#if KMP_OS_UNIX
+    typedef pthread_t           kmp_thread_t;
+    typedef pthread_key_t       kmp_key_t;
+#endif
+
+extern kmp_key_t  __kmp_gtid_threadprivate_key;
+
+typedef struct kmp_sys_info {
+    long maxrss;          /* the maximum resident set size utilized (in kilobytes)     */
+    long minflt;          /* the number of page faults serviced without any I/O        */
+    long majflt;          /* the number of page faults serviced that required I/O      */
+    long nswap;           /* the number of times a process was "swapped" out of memory */
+    long inblock;         /* the number of times the file system had to perform input  */
+    long oublock;         /* the number of times the file system had to perform output */
+    long nvcsw;           /* the number of times a context switch was voluntarily      */
+    long nivcsw;          /* the number of times a context switch was forced           */
+} kmp_sys_info_t;
+
+typedef struct kmp_cpuinfo {
+    int        initialized;  // If 0, other fields are not initialized.
+    int        signature;    // CPUID(1).EAX
+    int        family;       // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family )
+    int        model;        // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model)
+    int        stepping;     // CPUID(1).EAX[3:0] ( Stepping )
+    int        sse2;         // 0 if SSE2 instructions are not supported, 1 otherwise.
+    int        rtm;          // 0 if RTM instructions are not supported, 1 otherwise.
+    int        cpu_stackoffset;
+    int        apic_id;
+    int        physical_id;
+    int        logical_id;
+    kmp_uint64 frequency;    // Nominal CPU frequency in Hz.
+} kmp_cpuinfo_t;
+
+
+#ifdef BUILD_TV
+
+struct tv_threadprivate {
+    /* Record type #1 */
+    void        *global_addr;
+    void        *thread_addr;
+};
+
+struct tv_data {
+    struct tv_data      *next;
+    void                *type;
+    union tv_union {
+        struct tv_threadprivate tp;
+    } u;
+};
+
+extern kmp_key_t __kmp_tv_key;
+
+#endif /* BUILD_TV */
+
+/* ------------------------------------------------------------------------ */
+
+#if USE_ITT_BUILD
+// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here.
+// Later we will check the type meets requirements.
+typedef int kmp_itt_mark_t;
+#define KMP_ITT_DEBUG 0
+#endif /* USE_ITT_BUILD */
+
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Taskq data structures
+ */
+
+#define HIGH_WATER_MARK(nslots)         (((nslots) * 3) / 4)
+#define __KMP_TASKQ_THUNKS_PER_TH        1      /* num thunks that each thread can simultaneously execute from a task queue */
+
+/*  flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags  */
+
+#define TQF_IS_ORDERED          0x0001  /*  __kmpc_taskq interface, taskq ordered  */
+#define TQF_IS_LASTPRIVATE      0x0002  /*  __kmpc_taskq interface, taskq with lastprivate list  */
+#define TQF_IS_NOWAIT           0x0004  /*  __kmpc_taskq interface, end taskq nowait  */
+#define TQF_HEURISTICS          0x0008  /*  __kmpc_taskq interface, use heuristics to decide task queue size  */
+#define TQF_INTERFACE_RESERVED1 0x0010  /*  __kmpc_taskq interface, reserved for future use  */
+#define TQF_INTERFACE_RESERVED2 0x0020  /*  __kmpc_taskq interface, reserved for future use  */
+#define TQF_INTERFACE_RESERVED3 0x0040  /*  __kmpc_taskq interface, reserved for future use  */
+#define TQF_INTERFACE_RESERVED4 0x0080  /*  __kmpc_taskq interface, reserved for future use  */
+
+#define TQF_INTERFACE_FLAGS     0x00ff  /*  all the __kmpc_taskq interface flags  */
+
+#define TQF_IS_LAST_TASK        0x0100  /*  internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE  */
+#define TQF_TASKQ_TASK          0x0200  /*  internal use only; this thunk->th_task is the taskq_task  */
+#define TQF_RELEASE_WORKERS     0x0400  /*  internal use only; must release worker threads once ANY queued task exists (global) */
+#define TQF_ALL_TASKS_QUEUED    0x0800  /*  internal use only; notify workers that master has finished enqueuing tasks */
+#define TQF_PARALLEL_CONTEXT    0x1000  /*  internal use only: this queue encountered in a parallel context: not serialized */
+#define TQF_DEALLOCATED         0x2000  /*  internal use only; this queue is on the freelist and not in use */
+
+#define TQF_INTERNAL_FLAGS      0x3f00  /*  all the internal use only flags  */
+
+typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t {
+    kmp_int32                      ai_data;
+} kmpc_aligned_int32_t;
+
+typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t {
+    struct kmpc_thunk_t   *qs_thunk;
+} kmpc_aligned_queue_slot_t;
+
+typedef struct kmpc_task_queue_t {
+        /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */
+    kmp_lock_t                    tq_link_lck;          /*  lock for child link, child next/prev links and child ref counts */
+    union {
+        struct kmpc_task_queue_t *tq_parent;            /*  pointer to parent taskq, not locked */
+        struct kmpc_task_queue_t *tq_next_free;         /*  for taskq internal freelists, locked with global taskq_freelist_lck */
+    } tq;
+    volatile struct kmpc_task_queue_t *tq_first_child;  /*  pointer to linked-list of children, locked by tq's tq_link_lck */
+    struct kmpc_task_queue_t     *tq_next_child;        /*  next child in linked-list, locked by parent tq's tq_link_lck */
+    struct kmpc_task_queue_t     *tq_prev_child;        /*  previous child in linked-list, locked by parent tq's tq_link_lck */
+    volatile kmp_int32            tq_ref_count;         /*  reference count of threads with access to this task queue */
+                                                        /*  (other than the thread executing the kmpc_end_taskq call) */
+                                                        /*  locked by parent tq's tq_link_lck */
+
+        /* shared data for task queue */
+    struct kmpc_aligned_shared_vars_t    *tq_shareds;   /*  per-thread array of pointers to shared variable structures */
+                                                        /*  only one array element exists for all but outermost taskq */
+
+        /* bookkeeping for ordered task queue */
+    kmp_uint32                    tq_tasknum_queuing;   /*  ordered task number assigned while queuing tasks */
+    volatile kmp_uint32           tq_tasknum_serving;   /*  ordered number of next task to be served (executed) */
+
+        /* thunk storage management for task queue */
+    kmp_lock_t                    tq_free_thunks_lck;   /*  lock for thunk freelist manipulation */
+    struct kmpc_thunk_t          *tq_free_thunks;       /*  thunk freelist, chained via th.th_next_free  */
+    struct kmpc_thunk_t          *tq_thunk_space;       /*  space allocated for thunks for this task queue  */
+
+        /* data fields for queue itself */
+    kmp_lock_t                    tq_queue_lck;         /*  lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */
+    kmpc_aligned_queue_slot_t    *tq_queue;             /*  array of queue slots to hold thunks for tasks */
+    volatile struct kmpc_thunk_t *tq_taskq_slot;        /*  special slot for taskq task thunk, occupied if not NULL  */
+    kmp_int32                     tq_nslots;            /*  # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space)  */
+    kmp_int32                     tq_head;              /*  enqueue puts next item in here (index into tq_queue array) */
+    kmp_int32                     tq_tail;              /*  dequeue takes next item out of here (index into tq_queue array) */
+    volatile kmp_int32            tq_nfull;             /*  # of occupied entries in task queue right now  */
+    kmp_int32                     tq_hiwat;             /*  high-water mark for tq_nfull and queue scheduling  */
+    volatile kmp_int32            tq_flags;             /*  TQF_xxx  */
+
+        /* bookkeeping for outstanding thunks */
+    struct kmpc_aligned_int32_t  *tq_th_thunks;         /*  per-thread array for # of regular thunks currently being executed */
+    kmp_int32                     tq_nproc;             /*  number of thunks in the th_thunks array */
+
+        /* statistics library bookkeeping */
+    ident_t                       *tq_loc;              /*  source location information for taskq directive */
+} kmpc_task_queue_t;
+
+typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk);
+
+/*  sizeof_shareds passed as arg to __kmpc_taskq call  */
+typedef struct kmpc_shared_vars_t {             /*  aligned during dynamic allocation */
+    kmpc_task_queue_t         *sv_queue;
+    /*  (pointers to) shared vars  */
+} kmpc_shared_vars_t;
+
+typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t {
+    volatile struct kmpc_shared_vars_t     *ai_data;
+} kmpc_aligned_shared_vars_t;
+
+/*  sizeof_thunk passed as arg to kmpc_taskq call  */
+typedef struct kmpc_thunk_t {                   /*  aligned during dynamic allocation */
+    union {                                     /*  field used for internal freelists too  */
+        kmpc_shared_vars_t  *th_shareds;
+        struct kmpc_thunk_t *th_next_free;      /*  freelist of individual thunks within queue, head at tq_free_thunks  */
+    } th;
+    kmpc_task_t th_task;                        /*  taskq_task if flags & TQF_TASKQ_TASK  */
+    struct kmpc_thunk_t *th_encl_thunk;         /*  pointer to dynamically enclosing thunk on this thread's call stack */
+    kmp_int32 th_flags;                         /*  TQF_xxx (tq_flags interface plus possible internal flags)  */
+    kmp_int32 th_status;
+    kmp_uint32 th_tasknum;                      /*  task number assigned in order of queuing, used for ordered sections */
+    /*  private vars  */
+} kmpc_thunk_t;
+
+typedef struct KMP_ALIGN_CACHE kmp_taskq {
+    int                 tq_curr_thunk_capacity;
+
+    kmpc_task_queue_t  *tq_root;
+    kmp_int32           tq_global_flags;
+
+    kmp_lock_t          tq_freelist_lck;
+    kmpc_task_queue_t  *tq_freelist;
+
+    kmpc_thunk_t      **tq_curr_thunk;
+} kmp_taskq_t;
+
+/* END Taskq data structures */
+/* --------------------------------------------------------------------------- */
+
+typedef kmp_int32 kmp_critical_name[8];
+
+/*!
+@ingroup PARALLEL
+The type for a microtask which gets passed to @ref __kmpc_fork_call().
+The arguments to the outlined function are
+@param global_tid the global thread identity of the thread executing the function.
+@param bound_tid  the local identitiy of the thread executing the function
+@param ... pointers to shared variables accessed by the function.
+*/
+typedef void (*kmpc_micro)              ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... );
+typedef void (*kmpc_micro_bound)        ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... );
+
+/*!
+@ingroup THREADPRIVATE
+@{
+*/
+/* --------------------------------------------------------------------------- */
+/* Threadprivate initialization/finalization function declarations */
+
+/*  for non-array objects:  __kmpc_threadprivate_register()  */
+
+/*!
+ Pointer to the constructor function.
+ The first argument is the <tt>this</tt> pointer
+*/
+typedef void *(*kmpc_ctor)    (void *);
+
+/*!
+ Pointer to the destructor function.
+ The first argument is the <tt>this</tt> pointer
+*/
+typedef void (*kmpc_dtor)     (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */
+/*!
+ Pointer to an alternate constructor.
+ The first argument is the <tt>this</tt> pointer.
+*/
+typedef void *(*kmpc_cctor)   (void *, void *);
+
+/*  for array objects: __kmpc_threadprivate_register_vec()  */
+                                /* First arg: "this" pointer */
+                                /* Last arg: number of array elements */
+/*!
+ Array constructor.
+ First argument is the <tt>this</tt> pointer
+ Second argument the number of array elements.
+*/
+typedef void *(*kmpc_ctor_vec)  (void *, size_t);
+/*!
+ Pointer to the array destructor function.
+ The first argument is the <tt>this</tt> pointer
+ Second argument the number of array elements.
+*/
+typedef void (*kmpc_dtor_vec)   (void *, size_t);
+/*!
+ Array constructor.
+ First argument is the <tt>this</tt> pointer
+ Third argument the number of array elements.
+*/
+typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */
+
+/*!
+@}
+*/
+
+
+/* ------------------------------------------------------------------------ */
+
+/* keeps tracked of threadprivate cache allocations for cleanup later */
+typedef struct kmp_cached_addr {
+    void                      **addr;           /* address of allocated cache */
+    struct kmp_cached_addr     *next;           /* pointer to next cached address */
+} kmp_cached_addr_t;
+
+struct private_data {
+    struct private_data *next;          /* The next descriptor in the list      */
+    void                *data;          /* The data buffer for this descriptor  */
+    int                  more;          /* The repeat count for this descriptor */
+    size_t               size;          /* The data size for this descriptor    */
+};
+
+struct private_common {
+    struct private_common     *next;
+    struct private_common     *link;
+    void                      *gbl_addr;
+    void                      *par_addr;        /* par_addr == gbl_addr for MASTER thread */
+    size_t                     cmn_size;
+};
+
+struct shared_common
+{
+    struct shared_common      *next;
+    struct private_data       *pod_init;
+    void                      *obj_init;
+    void                      *gbl_addr;
+    union {
+        kmpc_ctor              ctor;
+        kmpc_ctor_vec          ctorv;
+    } ct;
+    union {
+        kmpc_cctor             cctor;
+        kmpc_cctor_vec         cctorv;
+    } cct;
+    union {
+        kmpc_dtor              dtor;
+        kmpc_dtor_vec          dtorv;
+    } dt;
+    size_t                     vec_len;
+    int                        is_vec;
+    size_t                     cmn_size;
+};
+
+#define KMP_HASH_TABLE_LOG2     9                               /* log2 of the hash table size */
+#define KMP_HASH_TABLE_SIZE     (1 << KMP_HASH_TABLE_LOG2)      /* size of the hash table */
+#define KMP_HASH_SHIFT          3                               /* throw away this many low bits from the address */
+#define KMP_HASH(x)             ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1))
+
+struct common_table {
+    struct  private_common      *data[ KMP_HASH_TABLE_SIZE ];
+};
+
+struct shared_table {
+    struct  shared_common       *data[ KMP_HASH_TABLE_SIZE ];
+};
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef KMP_STATIC_STEAL_ENABLED
+typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
+    kmp_int32 count;
+    kmp_int32 ub;
+    /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
+    kmp_int32 lb;
+    kmp_int32 st;
+    kmp_int32 tc;
+    kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */
+
+    // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
+    //    a) parm3 is properly aligned and
+    //    b) all parm1-4 are in the same cache line.
+    // Because of parm1-4 are used together, performance seems to be better
+    // if they are in the same line (not measured though).
+
+    struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template
+        kmp_int32 parm1;     //     structures in kmp_dispatch.cpp. This should
+        kmp_int32 parm2;     //     make no real change at least while padding is off.
+        kmp_int32 parm3;
+        kmp_int32 parm4;
+    };
+
+    kmp_uint32 ordered_lower;
+    kmp_uint32 ordered_upper;
+#if KMP_OS_WINDOWS
+    // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
+    // It would be nice to measure execution times.
+    // Conditional if/endif can be removed at all.
+    kmp_int32 last_upper;
+#endif /* KMP_OS_WINDOWS */
+} dispatch_private_info32_t;
+
+typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
+    kmp_int64 count;   /* current chunk number for static and static-steal scheduling*/
+    kmp_int64 ub;      /* upper-bound */
+    /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
+    kmp_int64 lb;      /* lower-bound */
+    kmp_int64 st;      /* stride */
+    kmp_int64 tc;      /* trip count (number of iterations) */
+    kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */
+
+    /* parm[1-4] are used in different ways by different scheduling algorithms */
+
+    // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
+    //    a) parm3 is properly aligned and
+    //    b) all parm1-4 are in the same cache line.
+    // Because of parm1-4 are used together, performance seems to be better
+    // if they are in the same line (not measured though).
+
+    struct KMP_ALIGN( 32 ) {
+        kmp_int64 parm1;
+        kmp_int64 parm2;
+        kmp_int64 parm3;
+        kmp_int64 parm4;
+    };
+
+    kmp_uint64 ordered_lower;
+    kmp_uint64 ordered_upper;
+#if KMP_OS_WINDOWS
+    // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
+    // It would be nice to measure execution times.
+    // Conditional if/endif can be removed at all.
+    kmp_int64 last_upper;
+#endif /* KMP_OS_WINDOWS */
+} dispatch_private_info64_t;
+#else /* KMP_STATIC_STEAL_ENABLED */
+typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
+    kmp_int32 lb;
+    kmp_int32 ub;
+    kmp_int32 st;
+    kmp_int32 tc;
+
+    kmp_int32 parm1;
+    kmp_int32 parm2;
+    kmp_int32 parm3;
+    kmp_int32 parm4;
+
+    kmp_int32 count;
+
+    kmp_uint32 ordered_lower;
+    kmp_uint32 ordered_upper;
+#if KMP_OS_WINDOWS
+    kmp_int32 last_upper;
+#endif /* KMP_OS_WINDOWS */
+} dispatch_private_info32_t;
+
+typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
+    kmp_int64 lb;      /* lower-bound */
+    kmp_int64 ub;      /* upper-bound */
+    kmp_int64 st;      /* stride */
+    kmp_int64 tc;      /* trip count (number of iterations) */
+
+    /* parm[1-4] are used in different ways by different scheduling algorithms */
+    kmp_int64 parm1;
+    kmp_int64 parm2;
+    kmp_int64 parm3;
+    kmp_int64 parm4;
+
+    kmp_int64 count;   /* current chunk number for static scheduling */
+
+    kmp_uint64 ordered_lower;
+    kmp_uint64 ordered_upper;
+#if KMP_OS_WINDOWS
+    kmp_int64 last_upper;
+#endif /* KMP_OS_WINDOWS */
+} dispatch_private_info64_t;
+#endif /* KMP_STATIC_STEAL_ENABLED */
+
+typedef struct KMP_ALIGN_CACHE dispatch_private_info {
+    union private_info {
+        dispatch_private_info32_t  p32;
+        dispatch_private_info64_t  p64;
+    } u;
+    enum sched_type schedule;  /* scheduling algorithm */
+    kmp_int32       ordered;   /* ordered clause specified */
+    kmp_int32       ordered_bumped;
+    kmp_int32   ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
+    struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */
+    kmp_int32       nomerge;   /* don't merge iters if serialized */
+    kmp_int32       type_size; /* the size of types in private_info */
+    enum cons_type  pushed_ws;
+} dispatch_private_info_t;
+
+typedef struct dispatch_shared_info32 {
+    /* chunk index under dynamic, number of idle threads under static-steal;
+       iteration index otherwise */
+    volatile kmp_uint32      iteration;
+    volatile kmp_uint32      num_done;
+    volatile kmp_uint32      ordered_iteration;
+    kmp_int32   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
+} dispatch_shared_info32_t;
+
+typedef struct dispatch_shared_info64 {
+    /* chunk index under dynamic, number of idle threads under static-steal;
+       iteration index otherwise */
+    volatile kmp_uint64      iteration;
+    volatile kmp_uint64      num_done;
+    volatile kmp_uint64      ordered_iteration;
+    kmp_int64   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
+} dispatch_shared_info64_t;
+
+typedef struct dispatch_shared_info {
+    union shared_info {
+        dispatch_shared_info32_t  s32;
+        dispatch_shared_info64_t  s64;
+    } u;
+/*    volatile kmp_int32      dispatch_abort;  depricated */
+    volatile kmp_uint32     buffer_index;
+} dispatch_shared_info_t;
+
+typedef struct kmp_disp {
+    /* Vector for ORDERED SECTION */
+    void (*th_deo_fcn)( int * gtid, int * cid, ident_t *);
+    /* Vector for END ORDERED SECTION */
+    void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *);
+
+    dispatch_shared_info_t  *th_dispatch_sh_current;
+    dispatch_private_info_t *th_dispatch_pr_current;
+
+    dispatch_private_info_t *th_disp_buffer;
+    kmp_int32                th_disp_index;
+    void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
+#if KMP_USE_INTERNODE_ALIGNMENT
+    char more_padding[INTERNODE_CACHE_LINE];
+#endif
+} kmp_disp_t;
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* Barrier stuff */
+
+/* constants for barrier state update */
+#define KMP_INIT_BARRIER_STATE  0       /* should probably start from zero */
+#define KMP_BARRIER_SLEEP_BIT   0       /* bit used for suspend/sleep part of state */
+#define KMP_BARRIER_UNUSED_BIT  1       /* bit that must never be set for valid state */
+#define KMP_BARRIER_BUMP_BIT    2       /* lsb used for bump of go/arrived state */
+
+#define KMP_BARRIER_SLEEP_STATE         ((kmp_uint) (1 << KMP_BARRIER_SLEEP_BIT))
+#define KMP_BARRIER_UNUSED_STATE        ((kmp_uint) (1 << KMP_BARRIER_UNUSED_BIT))
+#define KMP_BARRIER_STATE_BUMP          ((kmp_uint) (1 << KMP_BARRIER_BUMP_BIT))
+
+#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
+# error "Barrier sleep bit must be smaller than barrier bump bit"
+#endif
+#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
+# error "Barrier unused bit must be smaller than barrier bump bit"
+#endif
+
+// Constants for release barrier wait state: currently, hierarchical only
+#define KMP_BARRIER_NOT_WAITING        0  // Normal state; worker not in wait_sleep
+#define KMP_BARRIER_OWN_FLAG           1  // Normal state; worker waiting on own b_go flag in release
+#define KMP_BARRIER_PARENT_FLAG        2  // Special state; worker waiting on parent's b_go flag in release
+#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3  // Special state; tells worker to shift from parent to own b_go
+#define KMP_BARRIER_SWITCHING          4  // Special state; worker resets appropriate flag on wake-up
+
+enum barrier_type {
+    bs_plain_barrier = 0,       /* 0, All non-fork/join barriers (except reduction barriers if enabled) */
+    bs_forkjoin_barrier,        /* 1, All fork/join (parallel region) barriers */
+    #if KMP_FAST_REDUCTION_BARRIER
+        bs_reduction_barrier,   /* 2, All barriers that are used in reduction */
+    #endif // KMP_FAST_REDUCTION_BARRIER
+    bs_last_barrier             /* Just a placeholder to mark the end */
+};
+
+// to work with reduction barriers just like with plain barriers
+#if !KMP_FAST_REDUCTION_BARRIER
+    #define bs_reduction_barrier bs_plain_barrier
+#endif // KMP_FAST_REDUCTION_BARRIER
+
+typedef enum kmp_bar_pat {      /* Barrier communication patterns */
+    bp_linear_bar = 0,          /* Single level (degenerate) tree */
+    bp_tree_bar = 1,            /* Balanced tree with branching factor 2^n */
+    bp_hyper_bar = 2,           /* Hypercube-embedded tree with min branching factor 2^n */
+    bp_hierarchical_bar = 3,    /* Machine hierarchy tree */
+    bp_last_bar = 4             /* Placeholder to mark the end */
+} kmp_bar_pat_e;
+
+# define KMP_BARRIER_ICV_PUSH   1
+
+/* Record for holding the values of the internal controls stack records */
+typedef struct kmp_internal_control {
+    int           serial_nesting_level;  /* corresponds to the value of the th_team_serialized field */
+    kmp_int8      nested;                /* internal control for nested parallelism (per thread) */
+    kmp_int8      dynamic;               /* internal control for dynamic adjustment of threads (per thread) */
+    kmp_int8      bt_set;                /* internal control for whether blocktime is explicitly set */
+    int           blocktime;             /* internal control for blocktime */
+    int           bt_intervals;          /* internal control for blocktime intervals */
+    int           nproc;                 /* internal control for #threads for next parallel region (per thread) */
+    int           max_active_levels;     /* internal control for max_active_levels */
+    kmp_r_sched_t sched;                 /* internal control for runtime schedule {sched,chunk} pair */
+#if OMP_40_ENABLED
+    kmp_proc_bind_t proc_bind;           /* internal control for affinity  */
+#endif // OMP_40_ENABLED
+    struct kmp_internal_control *next;
+} kmp_internal_control_t;
+
+static inline void
+copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
+    *dst = *src;
+}
+
+/* Thread barrier needs volatile barrier fields */
+typedef struct KMP_ALIGN_CACHE kmp_bstate {
+    // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it).
+    // It is not explicitly aligned below, because we *don't* want it to be padded -- instead,
+    // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines
+    // stores in the hierarchical barrier.
+    kmp_internal_control_t th_fixed_icvs;          // Initial ICVs for the thread
+    // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store
+    volatile kmp_uint64 b_go;                      // STATE => task should proceed (hierarchical)
+    KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point.
+    kmp_uint32 *skip_per_level;
+    kmp_uint32 my_level;
+    kmp_int32 parent_tid;
+    kmp_int32 old_tid;
+    kmp_uint32 depth;
+    struct kmp_bstate *parent_bar;
+    kmp_team_t *team;
+    kmp_uint64 leaf_state;
+    kmp_uint32 nproc;
+    kmp_uint8 base_leaf_kids;
+    kmp_uint8 leaf_kids;
+    kmp_uint8 offset;
+    kmp_uint8 wait_flag;
+    kmp_uint8 use_oncore_barrier;
+#if USE_DEBUGGER
+    // The following field is intended for the debugger solely. Only the worker thread itself accesses this
+    // field: the worker increases it by 1 when it arrives to a barrier.
+    KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
+#endif /* USE_DEBUGGER */
+} kmp_bstate_t;
+
+union KMP_ALIGN_CACHE kmp_barrier_union {
+    double       b_align;        /* use worst case alignment */
+    char         b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ];
+    kmp_bstate_t bb;
+};
+
+typedef union kmp_barrier_union kmp_balign_t;
+
+/* Team barrier needs only non-volatile arrived counter */
+union KMP_ALIGN_CACHE kmp_barrier_team_union {
+    double       b_align;        /* use worst case alignment */
+    char         b_pad[ CACHE_LINE ];
+    struct {
+        kmp_uint64   b_arrived;       /* STATE => task reached synch point. */
+#if USE_DEBUGGER
+        // The following two fields are indended for the debugger solely. Only master of the team accesses
+        // these fields: the first one is increased by 1 when master arrives to a barrier, the
+        // second one is increased by one when all the threads arrived.
+        kmp_uint     b_master_arrived;
+        kmp_uint     b_team_arrived;
+#endif
+    };
+};
+
+typedef union kmp_barrier_team_union kmp_balign_team_t;
+
+/*
+ * Padding for Linux* OS pthreads condition variables and mutexes used to signal
+ * threads when a condition changes.  This is to workaround an NPTL bug
+ * where padding was added to pthread_cond_t which caused the initialization
+ * routine to write outside of the structure if compiled on pre-NPTL threads.
+ */
+
+#if KMP_OS_WINDOWS
+typedef struct kmp_win32_mutex
+{
+    /* The Lock */
+    CRITICAL_SECTION cs;
+} kmp_win32_mutex_t;
+
+typedef struct kmp_win32_cond
+{
+    /* Count of the number of waiters. */
+    int waiters_count_;
+
+    /* Serialize access to <waiters_count_> */
+    kmp_win32_mutex_t waiters_count_lock_;
+
+    /* Number of threads to release via a <cond_broadcast> or a */
+    /* <cond_signal> */
+    int release_count_;
+
+    /* Keeps track of the current "generation" so that we don't allow */
+    /* one thread to steal all the "releases" from the broadcast. */
+    int wait_generation_count_;
+
+    /* A manual-reset event that's used to block and release waiting */
+    /* threads. */
+    HANDLE event_;
+} kmp_win32_cond_t;
+#endif
+
+#if KMP_OS_UNIX
+
+union KMP_ALIGN_CACHE kmp_cond_union {
+    double              c_align;
+    char                c_pad[ CACHE_LINE ];
+    pthread_cond_t      c_cond;
+};
+
+typedef union kmp_cond_union kmp_cond_align_t;
+
+union KMP_ALIGN_CACHE kmp_mutex_union {
+    double              m_align;
+    char                m_pad[ CACHE_LINE ];
+    pthread_mutex_t     m_mutex;
+};
+
+typedef union kmp_mutex_union kmp_mutex_align_t;
+
+#endif /* KMP_OS_UNIX */
+
+typedef struct kmp_desc_base {
+    void    *ds_stackbase;
+    size_t            ds_stacksize;
+    int               ds_stackgrow;
+    kmp_thread_t      ds_thread;
+    volatile int      ds_tid;
+    int               ds_gtid;
+#if KMP_OS_WINDOWS
+    volatile int      ds_alive;
+    DWORD             ds_thread_id;
+        /*
+            ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However,
+            debugger support (libomp_db) cannot work with handles, because they uncomparable. For
+            example, debugger requests info about thread with handle h. h is valid within debugger
+            process, and meaningless within debugee process. Even if h is duped by call to
+            DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new*
+            handle which does *not* equal to any other handle in debugee... The only way to
+            compare handles is convert them to system-wide ids. GetThreadId() function is
+            available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is
+            available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by
+            call to GetCurrentThreadId() from within the thread and save it to let libomp_db
+            identify threads.
+        */
+#endif /* KMP_OS_WINDOWS */
+} kmp_desc_base_t;
+
+typedef union KMP_ALIGN_CACHE kmp_desc {
+    double           ds_align;        /* use worst case alignment */
+    char             ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ];
+    kmp_desc_base_t  ds;
+} kmp_desc_t;
+
+
+typedef struct kmp_local {
+    volatile int           this_construct; /* count of single's encountered by thread */
+    void                  *reduce_data;
+#if KMP_USE_BGET
+    void                  *bget_data;
+    void                  *bget_list;
+#if ! USE_CMP_XCHG_FOR_BGET
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+    kmp_lock_t             bget_lock;      /* Lock for accessing bget free list */
+#else
+    kmp_bootstrap_lock_t   bget_lock;      /* Lock for accessing bget free list */
+                                           /* Must be bootstrap lock so we can use it at library shutdown */
+#endif /* USE_LOCK_FOR_BGET */
+#endif /* ! USE_CMP_XCHG_FOR_BGET */
+#endif /* KMP_USE_BGET */
+
+#ifdef BUILD_TV
+    struct tv_data        *tv_data;
+#endif
+
+    PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */
+
+} kmp_local_t;
+
+#define get__blocktime( xteam, xtid )     ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
+#define get__bt_set( xteam, xtid )        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
+#define get__bt_intervals( xteam, xtid )  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
+
+#define get__nested_2(xteam,xtid)         ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
+#define get__dynamic_2(xteam,xtid)        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
+#define get__nproc_2(xteam,xtid)          ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
+#define get__sched_2(xteam,xtid)          ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
+
+#define set__blocktime_team( xteam, xtid, xval ) \
+        ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime )    = (xval) )
+
+#define set__bt_intervals_team( xteam, xtid, xval ) \
+        ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
+
+#define set__bt_set_team( xteam, xtid, xval ) \
+        ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set )       = (xval) )
+
+
+#define set__nested( xthread, xval )                            \
+        ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) )
+#define get__nested( xthread ) \
+        ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) )
+
+#define set__dynamic( xthread, xval )                            \
+        ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) )
+#define get__dynamic( xthread ) \
+        ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) )
+
+#define set__nproc( xthread, xval )                            \
+        ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) )
+
+#define set__max_active_levels( xthread, xval )                            \
+        ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) )
+
+#define set__sched( xthread, xval )                            \
+        ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) )
+
+#if OMP_40_ENABLED
+
+#define set__proc_bind( xthread, xval )                          \
+        ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) )
+#define get__proc_bind( xthread ) \
+        ( (xthread)->th.th_current_task->td_icvs.proc_bind )
+
+#endif /* OMP_40_ENABLED */
+
+
+/* ------------------------------------------------------------------------ */
+// OpenMP tasking data structures
+//
+
+typedef enum kmp_tasking_mode {
+    tskm_immediate_exec = 0,
+    tskm_extra_barrier = 1,
+    tskm_task_teams = 2,
+    tskm_max = 2
+} kmp_tasking_mode_t;
+
+extern kmp_tasking_mode_t __kmp_tasking_mode;         /* determines how/when to execute tasks */
+extern kmp_int32 __kmp_task_stealing_constraint;
+
+/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */
+#define KMP_TASK_TO_TASKDATA(task)     (((kmp_taskdata_t *) task) - 1)
+#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1)
+
+// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
+// queued since the previous barrier release.
+#define KMP_TASKING_ENABLED(task_team) \
+    (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
+/*!
+@ingroup BASIC_TYPES
+@{
+*/
+
+/*!
+ */
+typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * );
+
+/*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */
+/*!
+ */
+typedef struct kmp_task {                   /* GEH: Shouldn't this be aligned somehow? */
+    void *              shareds;            /**< pointer to block of pointers to shared vars   */
+    kmp_routine_entry_t routine;            /**< pointer to routine to call for executing task */
+    kmp_int32           part_id;            /**< part id for the task                          */
+#if OMP_40_ENABLED
+    kmp_routine_entry_t destructors;        /* pointer to function to invoke deconstructors of firstprivate C++ objects */
+#endif // OMP_40_ENABLED
+    /*  private vars  */
+} kmp_task_t;
+
+/*!
+@}
+*/
+
+#if OMP_40_ENABLED
+typedef struct kmp_taskgroup {
+    kmp_uint32            count;   // number of allocated and not yet complete tasks
+    kmp_int32             cancel_request; // request for cancellation of this taskgroup
+    struct kmp_taskgroup *parent;  // parent taskgroup
+} kmp_taskgroup_t;
+
+
+// forward declarations
+typedef union kmp_depnode       kmp_depnode_t;
+typedef struct kmp_depnode_list  kmp_depnode_list_t;
+typedef struct kmp_dephash_entry kmp_dephash_entry_t;
+
+typedef struct kmp_depend_info {
+     kmp_intptr_t               base_addr;
+     size_t                     len;
+     struct {
+         bool                   in:1;
+         bool                   out:1;
+     } flags;
+} kmp_depend_info_t;
+
+struct kmp_depnode_list {
+   kmp_depnode_t *              node;
+   kmp_depnode_list_t *         next;
+};
+
+typedef struct kmp_base_depnode {
+    kmp_depnode_list_t        * successors;
+    kmp_task_t                * task;
+
+    kmp_lock_t                  lock;
+
+#if KMP_SUPPORT_GRAPH_OUTPUT
+    kmp_uint32                  id;
+#endif
+
+    volatile kmp_int32          npredecessors;
+    volatile kmp_int32          nrefs;
+} kmp_base_depnode_t;
+
+union KMP_ALIGN_CACHE kmp_depnode {
+    double          dn_align;        /* use worst case alignment */
+    char            dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ];
+    kmp_base_depnode_t dn;
+};
+
+struct kmp_dephash_entry {
+    kmp_intptr_t               addr;
+    kmp_depnode_t            * last_out;
+    kmp_depnode_list_t       * last_ins;
+    kmp_dephash_entry_t      * next_in_bucket;
+};
+
+typedef struct kmp_dephash {
+   kmp_dephash_entry_t     ** buckets;
+#ifdef KMP_DEBUG
+   kmp_uint32                 nelements;
+   kmp_uint32                 nconflicts;
+#endif
+} kmp_dephash_t;
+
+#endif
+
+#ifdef BUILD_TIED_TASK_STACK
+
+/* Tied Task stack definitions */
+typedef struct kmp_stack_block {
+    kmp_taskdata_t *          sb_block[ TASK_STACK_BLOCK_SIZE ];
+    struct kmp_stack_block *  sb_next;
+    struct kmp_stack_block *  sb_prev;
+} kmp_stack_block_t;
+
+typedef struct kmp_task_stack {
+    kmp_stack_block_t         ts_first_block;  // first block of stack entries
+    kmp_taskdata_t **         ts_top;          // pointer to the top of stack
+    kmp_int32                 ts_entries;      // number of entries on the stack
+} kmp_task_stack_t;
+
+#endif // BUILD_TIED_TASK_STACK
+
+typedef struct kmp_tasking_flags {          /* Total struct must be exactly 32 bits */
+    /* Compiler flags */                    /* Total compiler flags must be 16 bits */
+    unsigned tiedness    : 1;               /* task is either tied (1) or untied (0) */
+    unsigned final       : 1;               /* task is final(1) so execute immediately */
+    unsigned merged_if0  : 1;               /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
+#if OMP_40_ENABLED
+    unsigned destructors_thunk : 1;         /* set if the compiler creates a thunk to invoke destructors from the runtime */
+#if OMP_41_ENABLED
+    unsigned proxy       : 1;               /* task is a proxy task (it will be executed outside the context of the RTL) */
+    unsigned reserved    : 11;              /* reserved for compiler use */
+#else
+    unsigned reserved    : 12;              /* reserved for compiler use */
+#endif
+#else // OMP_40_ENABLED
+    unsigned reserved    : 13;              /* reserved for compiler use */
+#endif // OMP_40_ENABLED
+
+    /* Library flags */                     /* Total library flags must be 16 bits */
+    unsigned tasktype    : 1;               /* task is either explicit(1) or implicit (0) */
+    unsigned task_serial : 1;               /* this task is executed immediately (1) or deferred (0) */
+    unsigned tasking_ser : 1;               /* all tasks in team are either executed immediately (1) or may be deferred (0) */
+    unsigned team_serial : 1;               /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */
+                                            /* If either team_serial or tasking_ser is set, task team may be NULL */
+    /* Task State Flags: */
+    unsigned started     : 1;               /* 1==started, 0==not started     */
+    unsigned executing   : 1;               /* 1==executing, 0==not executing */
+    unsigned complete    : 1;               /* 1==complete, 0==not complete   */
+    unsigned freed       : 1;               /* 1==freed, 0==allocateed        */
+    unsigned native      : 1;               /* 1==gcc-compiled task, 0==intel */
+    unsigned reserved31  : 7;               /* reserved for library use */
+
+} kmp_tasking_flags_t;
+
+
+struct kmp_taskdata {                                 /* aligned during dynamic allocation       */
+    kmp_int32               td_task_id;               /* id, assigned by debugger                */
+    kmp_tasking_flags_t     td_flags;                 /* task flags                              */
+    kmp_team_t *            td_team;                  /* team for this task                      */
+    kmp_info_p *            td_alloc_thread;          /* thread that allocated data structures   */
+                                                      /* Currently not used except for perhaps IDB */
+    kmp_taskdata_t *        td_parent;                /* parent task                             */
+    kmp_int32               td_level;                 /* task nesting level                      */
+    ident_t *               td_ident;                 /* task identifier                         */
+                            // Taskwait data.
+    ident_t *               td_taskwait_ident;
+    kmp_uint32              td_taskwait_counter;
+    kmp_int32               td_taskwait_thread;       /* gtid + 1 of thread encountered taskwait */
+    KMP_ALIGN_CACHE kmp_internal_control_t  td_icvs;  /* Internal control variables for the task */
+    volatile kmp_uint32     td_allocated_child_tasks;  /* Child tasks (+ current task) not yet deallocated */
+    volatile kmp_uint32     td_incomplete_child_tasks; /* Child tasks not yet complete */
+#if OMP_40_ENABLED
+    kmp_taskgroup_t *       td_taskgroup;         // Each task keeps pointer to its current taskgroup
+    kmp_dephash_t *         td_dephash;           // Dependencies for children tasks are tracked from here
+    kmp_depnode_t *         td_depnode;           // Pointer to graph node if this task has dependencies
+#endif
+#if OMPT_SUPPORT
+    ompt_task_info_t        ompt_task_info;
+#endif
+#if KMP_HAVE_QUAD
+    _Quad                   td_dummy;             // Align structure 16-byte size since allocated just before kmp_task_t
+#else
+    kmp_uint32              td_dummy[2];
+#endif
+}; // struct kmp_taskdata
+
+// Make sure padding above worked
+KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 );
+
+// Data for task team but per thread
+typedef struct kmp_base_thread_data {
+    kmp_info_p *            td_thr;                // Pointer back to thread info
+                                                   // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued?
+    kmp_bootstrap_lock_t    td_deque_lock;         // Lock for accessing deque
+    kmp_taskdata_t **       td_deque;              // Deque of tasks encountered by td_thr, dynamically allocated
+    kmp_uint32              td_deque_head;         // Head of deque (will wrap)
+    kmp_uint32              td_deque_tail;         // Tail of deque (will wrap)
+    kmp_int32               td_deque_ntasks;       // Number of tasks in deque
+                                                   // GEH: shouldn't this be volatile since used in while-spin?
+    kmp_int32               td_deque_last_stolen;  // Thread number of last successful steal
+#ifdef BUILD_TIED_TASK_STACK
+    kmp_task_stack_t        td_susp_tied_tasks;    // Stack of suspended tied tasks for task scheduling constraint
+#endif // BUILD_TIED_TASK_STACK
+} kmp_base_thread_data_t;
+
+typedef union KMP_ALIGN_CACHE kmp_thread_data {
+    kmp_base_thread_data_t  td;
+    double                  td_align;       /* use worst case alignment */
+    char                    td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ];
+} kmp_thread_data_t;
+
+
+// Data for task teams which are used when tasking is enabled for the team
+typedef struct kmp_base_task_team {
+    kmp_bootstrap_lock_t    tt_threads_lock;       /* Lock used to allocate per-thread part of task team */
+                                                   /* must be bootstrap lock since used at library shutdown*/
+    kmp_task_team_t *       tt_next;               /* For linking the task team free list */
+    kmp_thread_data_t *     tt_threads_data;       /* Array of per-thread structures for task team */
+                                                   /* Data survives task team deallocation */
+    kmp_int32               tt_found_tasks;        /* Have we found tasks and queued them while executing this team? */
+                                                   /* TRUE means tt_threads_data is set up and initialized */
+    kmp_int32               tt_nproc;              /* #threads in team           */
+    kmp_int32               tt_max_threads;        /* number of entries allocated for threads_data array */
+#if OMP_41_ENABLED
+    kmp_int32               tt_found_proxy_tasks;  /* Have we found proxy tasks since last barrier */
+#endif
+
+    KMP_ALIGN_CACHE
+    volatile kmp_uint32     tt_unfinished_threads; /* #threads still active      */
+
+    KMP_ALIGN_CACHE
+    volatile kmp_uint32     tt_active;             /* is the team still actively executing tasks */
+} kmp_base_task_team_t;
+
+union KMP_ALIGN_CACHE kmp_task_team {
+    kmp_base_task_team_t tt;
+    double               tt_align;       /* use worst case alignment */
+    char                 tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ];
+};
+
+#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
+// Free lists keep same-size free memory slots for fast memory allocation routines
+typedef struct kmp_free_list {
+    void             *th_free_list_self;   // Self-allocated tasks free list
+    void             *th_free_list_sync;   // Self-allocated tasks stolen/returned by other threads
+    void             *th_free_list_other;  // Non-self free list (to be returned to owner's sync list)
+} kmp_free_list_t;
+#endif
+#if KMP_NESTED_HOT_TEAMS
+// Hot teams array keeps hot teams and their sizes for given thread.
+// Hot teams are not put in teams pool, and they don't put threads in threads pool.
+typedef struct kmp_hot_team_ptr {
+    kmp_team_p *hot_team;      // pointer to hot_team of given nesting level
+    kmp_int32   hot_team_nth;  // number of threads allocated for the hot_team
+} kmp_hot_team_ptr_t;
+#endif
+#if OMP_40_ENABLED
+typedef struct kmp_teams_size {
+    kmp_int32   nteams;        // number of teams in a league
+    kmp_int32   nth;           // number of threads in each team of the league
+} kmp_teams_size_t;
+#endif
+
+/* ------------------------------------------------------------------------ */
+// OpenMP thread data structures
+//
+
+typedef struct KMP_ALIGN_CACHE kmp_base_info {
+/*
+ * Start with the readonly data which is cache aligned and padded.
+ * this is written before the thread starts working by the master.
+ * (uber masters may update themselves later)
+ * (usage does not consider serialized regions)
+ */
+    kmp_desc_t        th_info;
+    kmp_team_p       *th_team;       /* team we belong to */
+    kmp_root_p       *th_root;       /* pointer to root of task hierarchy */
+    kmp_info_p       *th_next_pool;  /* next available thread in the pool */
+    kmp_disp_t       *th_dispatch;   /* thread's dispatch data */
+    int               th_in_pool;    /* in thread pool (32 bits for TCR/TCW) */
+
+    /* The following are cached from the team info structure */
+    /* TODO use these in more places as determined to be needed via profiling */
+    int               th_team_nproc;      /* number of threads in a team */
+    kmp_info_p       *th_team_master;     /* the team's master thread */
+    int               th_team_serialized; /* team is serialized */
+#if OMP_40_ENABLED
+    microtask_t       th_teams_microtask; /* save entry address for teams construct */
+    int               th_teams_level;     /* save initial level of teams construct */
+                                          /* it is 0 on device but may be any on host */
+#endif
+
+    /* The blocktime info is copied from the team struct to the thread sruct */
+    /* at the start of a barrier, and the values stored in the team are used */
+    /* at points in the code where the team struct is no longer guaranteed   */
+    /* to exist (from the POV of worker threads).                            */
+    int               th_team_bt_intervals;
+    int               th_team_bt_set;
+
+
+#if KMP_AFFINITY_SUPPORTED
+    kmp_affin_mask_t  *th_affin_mask; /* thread's current affinity mask */
+#endif
+
+/*
+ * The data set by the master at reinit, then R/W by the worker
+ */
+    KMP_ALIGN_CACHE int     th_set_nproc;  /* if > 0, then only use this request for the next fork */
+#if KMP_NESTED_HOT_TEAMS
+    kmp_hot_team_ptr_t     *th_hot_teams;     /* array of hot teams */
+#endif
+#if OMP_40_ENABLED
+    kmp_proc_bind_t         th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
+    kmp_teams_size_t        th_teams_size;    /* number of teams/threads in teams construct */
+# if KMP_AFFINITY_SUPPORTED
+    int                     th_current_place; /* place currently bound to */
+    int                     th_new_place;     /* place to bind to in par reg */
+    int                     th_first_place;   /* first place in partition */
+    int                     th_last_place;    /* last place in partition */
+# endif
+#endif
+#if USE_ITT_BUILD
+    kmp_uint64              th_bar_arrive_time;           /* arrival to barrier timestamp */
+    kmp_uint64              th_bar_min_time;              /* minimum arrival time at the barrier */
+    kmp_uint64              th_frame_time;                /* frame timestamp */
+    kmp_uint64              th_frame_time_serialized;     /* frame timestamp in serialized parallel */
+#endif /* USE_ITT_BUILD */
+    kmp_local_t             th_local;
+    struct private_common  *th_pri_head;
+
+/*
+ * Now the data only used by the worker (after initial allocation)
+ */
+    /* TODO the first serial team should actually be stored in the info_t
+     * structure.  this will help reduce initial allocation overhead */
+    KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/
+
+#if OMPT_SUPPORT
+    ompt_thread_info_t      ompt_thread_info;
+#endif
+
+/* The following are also read by the master during reinit */
+    struct common_table    *th_pri_common;
+
+    volatile kmp_uint32     th_spin_here;   /* thread-local location for spinning */
+                                            /* while awaiting queuing lock acquire */
+
+    volatile void          *th_sleep_loc;   // this points at a kmp_flag<T>
+
+    ident_t          *th_ident;
+    unsigned         th_x;                     // Random number generator data
+    unsigned         th_a;                     // Random number generator data
+
+/*
+ * Tasking-related data for the thread
+ */
+    kmp_task_team_t    * th_task_team;           // Task team struct
+    kmp_taskdata_t     * th_current_task;        // Innermost Task being executed
+    kmp_uint8            th_task_state;          // alternating 0/1 for task team identification
+    kmp_uint8          * th_task_state_memo_stack;  // Stack holding memos of th_task_state at nested levels
+    kmp_uint32           th_task_state_top;         // Top element of th_task_state_memo_stack
+    kmp_uint32           th_task_state_stack_sz;    // Size of th_task_state_memo_stack
+
+    /*
+     * More stuff for keeping track of active/sleeping threads
+     * (this part is written by the worker thread)
+     */
+    kmp_uint8            th_active_in_pool;      // included in count of
+                                                 // #active threads in pool
+    int                  th_active;              // ! sleeping
+                                                 // 32 bits for TCR/TCW
+
+
+    struct cons_header * th_cons; // used for consistency check
+
+/*
+ * Add the syncronizing data which is cache aligned and padded.
+ */
+    KMP_ALIGN_CACHE kmp_balign_t      th_bar[ bs_last_barrier ];
+
+    KMP_ALIGN_CACHE volatile     kmp_int32    th_next_waiting;  /* gtid+1 of next thread on lock wait queue, 0 if none */
+
+#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
+    #define NUM_LISTS 4
+    kmp_free_list_t   th_free_lists[NUM_LISTS];   // Free lists for fast memory allocation routines
+#endif
+
+#if KMP_OS_WINDOWS
+    kmp_win32_cond_t  th_suspend_cv;
+    kmp_win32_mutex_t th_suspend_mx;
+    int               th_suspend_init;
+#endif
+#if KMP_OS_UNIX
+    kmp_cond_align_t  th_suspend_cv;
+    kmp_mutex_align_t th_suspend_mx;
+    int               th_suspend_init_count;
+#endif
+
+#if USE_ITT_BUILD
+    kmp_itt_mark_t        th_itt_mark_single;
+    // alignment ???
+#endif /* USE_ITT_BUILD */
+#if KMP_STATS_ENABLED
+    kmp_stats_list* th_stats;
+#endif
+} kmp_base_info_t;
+
+typedef union KMP_ALIGN_CACHE kmp_info {
+    double          th_align;        /* use worst case alignment */
+    char            th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ];
+    kmp_base_info_t th;
+} kmp_info_t;
+
+/* ------------------------------------------------------------------------ */
+// OpenMP thread team data structures
+//
+typedef struct kmp_base_data {
+    volatile kmp_uint32 t_value;
+} kmp_base_data_t;
+
+typedef union KMP_ALIGN_CACHE kmp_sleep_team {
+    double              dt_align;        /* use worst case alignment */
+    char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
+    kmp_base_data_t     dt;
+} kmp_sleep_team_t;
+
+typedef union KMP_ALIGN_CACHE kmp_ordered_team {
+    double              dt_align;        /* use worst case alignment */
+    char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
+    kmp_base_data_t     dt;
+} kmp_ordered_team_t;
+
+typedef int     (*launch_t)( int gtid );
+
+/* Minimum number of ARGV entries to malloc if necessary */
+#define KMP_MIN_MALLOC_ARGV_ENTRIES     100
+
+// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we
+// have supported at least 96 bytes. Using a larger value for more space between the master write/worker
+// read section and read/write by all section seems to buy more performance on EPCC PARALLEL.
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# define KMP_INLINE_ARGV_BYTES         ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
+#else
+# define KMP_INLINE_ARGV_BYTES         ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
+#endif
+#define KMP_INLINE_ARGV_ENTRIES        (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
+
+typedef struct KMP_ALIGN_CACHE kmp_base_team {
+    // Synchronization Data ---------------------------------------------------------------------------------
+    KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
+    kmp_balign_team_t        t_bar[ bs_last_barrier ];
+    volatile int             t_construct;    // count of single directive encountered by team
+    kmp_lock_t               t_single_lock;  // team specific lock
+
+    // Master only -----------------------------------------------------------------------------------------
+    KMP_ALIGN_CACHE int      t_master_tid;   // tid of master in parent team
+    int                      t_master_this_cons; // "this_construct" single counter of master in parent team
+    ident_t                 *t_ident;        // if volatile, have to change too much other crud to volatile too
+    kmp_team_p              *t_parent;       // parent team
+    kmp_team_p              *t_next_pool;    // next free team in the team pool
+    kmp_disp_t              *t_dispatch;     // thread's dispatch data
+    kmp_task_team_t         *t_task_team[2]; // Task team struct; switch between 2
+#if OMP_40_ENABLED
+    kmp_proc_bind_t          t_proc_bind;    // bind type for par region
+#endif // OMP_40_ENABLED
+#if USE_ITT_BUILD
+    kmp_uint64               t_region_time;  // region begin timestamp
+#endif /* USE_ITT_BUILD */
+
+    // Master write, workers read --------------------------------------------------------------------------
+    KMP_ALIGN_CACHE void   **t_argv;
+    int                      t_argc;
+    int                      t_nproc;        // number of threads in team
+    microtask_t              t_pkfn;
+    launch_t                 t_invoke;       // procedure to launch the microtask
+
+#if OMPT_SUPPORT
+    ompt_team_info_t         ompt_team_info;
+    ompt_lw_taskteam_t      *ompt_serialized_team_info;
+#endif
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    kmp_int8                 t_fp_control_saved;
+    kmp_int8                 t_pad2b;
+    kmp_int16                t_x87_fpu_control_word; // FP control regs
+    kmp_uint32               t_mxcsr;
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+    void                    *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
+
+    KMP_ALIGN_CACHE kmp_info_t **t_threads;
+    int                      t_max_argc;
+    int                      t_max_nproc;    // maximum threads this team can handle (dynamicly expandable)
+    int                      t_serialized;   // levels deep of serialized teams
+    dispatch_shared_info_t  *t_disp_buffer;  // buffers for dispatch system
+    int                      t_id;           // team's id, assigned by debugger.
+    int                      t_level;        // nested parallel level
+    int                      t_active_level; // nested active parallel level
+    kmp_r_sched_t            t_sched;        // run-time schedule for the team
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+    int                      t_first_place;  // first & last place in parent thread's partition.
+    int                      t_last_place;   // Restore these values to master after par region.
+#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+    int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
+
+    // Read/write by workers as well -----------------------------------------------------------------------
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
+    // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
+    // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
+    char dummy_padding[1024];
+#endif
+    KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata;  // Taskdata for the thread's implicit task
+    kmp_internal_control_t  *t_control_stack_top;  // internal control stack for additional nested teams.
+                                                   // for SERIALIZED teams nested 2 or more levels deep
+#if OMP_40_ENABLED
+    kmp_int32                t_cancel_request; // typed flag to store request state of cancellation
+#endif
+    int                      t_master_active;  // save on fork, restore on join
+    kmp_taskq_t              t_taskq;          // this team's task queue
+    void                    *t_copypriv_data;  // team specific pointer to copyprivate data array
+    kmp_uint32               t_copyin_counter;
+#if USE_ITT_BUILD
+    void                    *t_stack_id;       // team specific stack stitching id (for ittnotify)
+#endif /* USE_ITT_BUILD */
+} kmp_base_team_t;
+
+union KMP_ALIGN_CACHE kmp_team {
+    kmp_base_team_t     t;
+    double              t_align;       /* use worst case alignment */
+    char                t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ];
+};
+
+
+typedef union KMP_ALIGN_CACHE kmp_time_global {
+    double              dt_align;        /* use worst case alignment */
+    char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
+    kmp_base_data_t     dt;
+} kmp_time_global_t;
+
+typedef struct kmp_base_global {
+    /* cache-aligned */
+    kmp_time_global_t   g_time;
+
+    /* non cache-aligned */
+    volatile int        g_abort;
+    volatile int        g_done;
+
+    int                 g_dynamic;
+    enum dynamic_mode   g_dynamic_mode;
+} kmp_base_global_t;
+
+typedef union KMP_ALIGN_CACHE kmp_global {
+    kmp_base_global_t   g;
+    double              g_align;        /* use worst case alignment */
+    char                g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ];
+} kmp_global_t;
+
+
+typedef struct kmp_base_root {
+    // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0)
+    // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch
+    //             overhead or keeping r_active
+
+    volatile int        r_active;       /* TRUE if some region in a nest has > 1 thread */
+                                        // GEH: This is misnamed, should be r_in_parallel
+    volatile int        r_nested;       // TODO: GEH - This is unused, just remove it entirely.
+    int                 r_in_parallel;  /* keeps a count of active parallel regions per root */
+                                        // GEH: This is misnamed, should be r_active_levels
+    kmp_team_t         *r_root_team;
+    kmp_team_t         *r_hot_team;
+    kmp_info_t         *r_uber_thread;
+    kmp_lock_t          r_begin_lock;
+    volatile int        r_begin;
+    int                 r_blocktime; /* blocktime for this root and descendants */
+} kmp_base_root_t;
+
+typedef union KMP_ALIGN_CACHE kmp_root {
+    kmp_base_root_t     r;
+    double              r_align;        /* use worst case alignment */
+    char                r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ];
+} kmp_root_t;
+
+struct fortran_inx_info {
+    kmp_int32   data;
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+extern int      __kmp_settings;
+extern int      __kmp_duplicate_library_ok;
+#if USE_ITT_BUILD
+extern int      __kmp_forkjoin_frames;
+extern int      __kmp_forkjoin_frames_mode;
+#endif
+extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
+extern int      __kmp_determ_red;
+
+#ifdef KMP_DEBUG
+extern int      kmp_a_debug;
+extern int      kmp_b_debug;
+extern int      kmp_c_debug;
+extern int      kmp_d_debug;
+extern int      kmp_e_debug;
+extern int      kmp_f_debug;
+#endif /* KMP_DEBUG */
+
+/* For debug information logging using rotating buffer */
+#define KMP_DEBUG_BUF_LINES_INIT        512
+#define KMP_DEBUG_BUF_LINES_MIN         1
+
+#define KMP_DEBUG_BUF_CHARS_INIT        128
+#define KMP_DEBUG_BUF_CHARS_MIN         2
+
+extern int     __kmp_debug_buf;            /* TRUE means use buffer, FALSE means print to stderr */
+extern int     __kmp_debug_buf_lines;      /* How many lines of debug stored in buffer */
+extern int     __kmp_debug_buf_chars;      /* How many characters allowed per line in buffer */
+extern int     __kmp_debug_buf_atomic;     /* TRUE means use atomic update of buffer entry pointer */
+
+extern char   *__kmp_debug_buffer;         /* Debug buffer itself */
+extern int     __kmp_debug_count;          /* Counter for number of lines printed in buffer so far */
+extern int     __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */
+/* end rotating debug buffer */
+
+#ifdef KMP_DEBUG
+extern int      __kmp_par_range;           /* +1 => only go par for constructs in range */
+
+#define KMP_PAR_RANGE_ROUTINE_LEN       1024
+extern char     __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
+#define KMP_PAR_RANGE_FILENAME_LEN      1024
+extern char     __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
+extern int      __kmp_par_range_lb;
+extern int      __kmp_par_range_ub;
+#endif
+
+/* For printing out dynamic storage map for threads and teams */
+extern int      __kmp_storage_map;         /* True means print storage map for threads and teams */
+extern int      __kmp_storage_map_verbose; /* True means storage map includes placement info */
+extern int      __kmp_storage_map_verbose_specified;
+
+extern kmp_cpuinfo_t    __kmp_cpuinfo;
+
+extern volatile int __kmp_init_serial;
+extern volatile int __kmp_init_gtid;
+extern volatile int __kmp_init_common;
+extern volatile int __kmp_init_middle;
+extern volatile int __kmp_init_parallel;
+extern volatile int __kmp_init_monitor;
+extern volatile int __kmp_init_user_locks;
+extern int __kmp_init_counter;
+extern int __kmp_root_counter;
+extern int __kmp_version;
+
+/* list of address of allocated caches for commons */
+extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
+
+/* Barrier algorithm types and options */
+extern kmp_uint32    __kmp_barrier_gather_bb_dflt;
+extern kmp_uint32    __kmp_barrier_release_bb_dflt;
+extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
+extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
+extern kmp_uint32    __kmp_barrier_gather_branch_bits  [ bs_last_barrier ];
+extern kmp_uint32    __kmp_barrier_release_branch_bits [ bs_last_barrier ];
+extern kmp_bar_pat_e __kmp_barrier_gather_pattern      [ bs_last_barrier ];
+extern kmp_bar_pat_e __kmp_barrier_release_pattern     [ bs_last_barrier ];
+extern char const   *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ];
+extern char const   *__kmp_barrier_pattern_env_name    [ bs_last_barrier ];
+extern char const   *__kmp_barrier_type_name           [ bs_last_barrier ];
+extern char const   *__kmp_barrier_pattern_name        [ bp_last_bar ];
+
+/* Global Locks */
+extern kmp_bootstrap_lock_t __kmp_initz_lock;     /* control initialization */
+extern kmp_bootstrap_lock_t __kmp_forkjoin_lock;  /* control fork/join access */
+extern kmp_bootstrap_lock_t __kmp_exit_lock;      /* exit() is not always thread-safe */
+extern kmp_bootstrap_lock_t __kmp_monitor_lock;   /* control monitor thread creation */
+extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
+
+extern kmp_lock_t __kmp_global_lock;    /* control OS/global access  */
+extern kmp_queuing_lock_t __kmp_dispatch_lock;  /* control dispatch access  */
+extern kmp_lock_t __kmp_debug_lock;     /* control I/O access for KMP_DEBUG */
+
+/* used for yielding spin-waits */
+extern unsigned int __kmp_init_wait;    /* initial number of spin-tests   */
+extern unsigned int __kmp_next_wait;    /* susequent number of spin-tests */
+
+extern enum library_type __kmp_library;
+
+extern enum sched_type  __kmp_sched;    /* default runtime scheduling */
+extern enum sched_type  __kmp_static;   /* default static scheduling method */
+extern enum sched_type  __kmp_guided;   /* default guided scheduling method */
+extern enum sched_type  __kmp_auto;     /* default auto scheduling method */
+extern int              __kmp_chunk;    /* default runtime chunk size */
+
+extern size_t     __kmp_stksize;        /* stack size per thread         */
+extern size_t     __kmp_monitor_stksize;/* stack size for monitor thread */
+extern size_t     __kmp_stkoffset;      /* stack offset per thread       */
+extern int        __kmp_stkpadding;     /* Should we pad root thread(s) stack */
+
+extern size_t     __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
+extern int        __kmp_env_chunk;      /* was KMP_CHUNK specified?     */
+extern int        __kmp_env_stksize;    /* was KMP_STACKSIZE specified? */
+extern int        __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */
+extern int        __kmp_env_all_threads;    /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */
+extern int        __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */
+extern int        __kmp_env_blocktime;  /* was KMP_BLOCKTIME specified? */
+extern int        __kmp_env_checks;     /* was KMP_CHECKS specified?    */
+extern int        __kmp_env_consistency_check;     /* was KMP_CONSISTENCY_CHECK specified?    */
+extern int        __kmp_generate_warnings; /* should we issue warnings? */
+extern int        __kmp_reserve_warn;   /* have we issued reserve_threads warning? */
+
+#ifdef DEBUG_SUSPEND
+extern int        __kmp_suspend_count;  /* count inside __kmp_suspend_template() */
+#endif
+
+extern kmp_uint32 __kmp_yield_init;
+extern kmp_uint32 __kmp_yield_next;
+extern kmp_uint32 __kmp_yielding_on;
+extern kmp_uint32 __kmp_yield_cycle;
+extern kmp_int32  __kmp_yield_on_count;
+extern kmp_int32  __kmp_yield_off_count;
+
+/* ------------------------------------------------------------------------- */
+extern int        __kmp_allThreadsSpecified;
+
+extern size_t     __kmp_align_alloc;
+/* following data protected by initialization routines */
+extern int        __kmp_xproc;          /* number of processors in the system */
+extern int        __kmp_avail_proc;      /* number of processors available to the process */
+extern size_t     __kmp_sys_min_stksize; /* system-defined minimum stack size */
+extern int        __kmp_sys_max_nth;    /* system-imposed maximum number of threads */
+extern int        __kmp_max_nth;        /* maximum total number of concurrently-existing threads */
+extern int        __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */
+extern int        __kmp_dflt_team_nth;  /* default number of threads in a parallel region a la OMP_NUM_THREADS */
+extern int        __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */
+extern int        __kmp_tp_capacity;    /* capacity of __kmp_threads if threadprivate is used (fixed) */
+extern int        __kmp_tp_cached;      /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */
+extern int        __kmp_dflt_nested;    /* nested parallelism enabled by default a la OMP_NESTED */
+extern int        __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */
+extern int        __kmp_monitor_wakeups;/* number of times monitor wakes up per second */
+extern int        __kmp_bt_intervals;   /* number of monitor timestamp intervals before blocking */
+#ifdef KMP_ADJUST_BLOCKTIME
+extern int        __kmp_zero_bt;        /* whether blocktime has been forced to zero */
+#endif /* KMP_ADJUST_BLOCKTIME */
+#ifdef KMP_DFLT_NTH_CORES
+extern int        __kmp_ncores;         /* Total number of cores for threads placement */
+#endif
+extern int        __kmp_abort_delay;    /* Number of millisecs to delay on abort for VTune */
+
+extern int        __kmp_need_register_atfork_specified;
+extern int        __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */
+extern int        __kmp_gtid_mode;      /* Method of getting gtid, values:
+                                           0 - not set, will be set at runtime
+                                           1 - using stack search
+                                           2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS))
+                                           3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only.
+                                         */
+extern int        __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
+#ifdef KMP_TDATA_GTID
+#if KMP_OS_WINDOWS
+extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */
+#else
+extern __thread int __kmp_gtid;
+#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */
+#endif
+extern int        __kmp_tls_gtid_min;   /* #threads below which use sp search for gtid */
+extern int        __kmp_foreign_tp;     /* If true, separate TP var for each foreign thread */
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+extern int        __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */
+extern kmp_int16  __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */
+extern kmp_uint32 __kmp_init_mxcsr;      /* init thread's mxscr */
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+extern int        __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */
+#if KMP_NESTED_HOT_TEAMS
+extern int        __kmp_hot_teams_mode;
+extern int        __kmp_hot_teams_max_level;
+#endif
+
+# if KMP_OS_LINUX
+extern enum clock_function_type __kmp_clock_function;
+extern int __kmp_clock_function_param;
+# endif /* KMP_OS_LINUX */
+
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+extern enum mic_type __kmp_mic_type;
+#endif
+
+# ifdef USE_LOAD_BALANCE
+extern double      __kmp_load_balance_interval;   /* Interval for the load balance algorithm */
+# endif /* USE_LOAD_BALANCE */
+
+// OpenMP 3.1 - Nested num threads array
+typedef struct kmp_nested_nthreads_t {
+    int * nth;
+    int   size;
+    int   used;
+} kmp_nested_nthreads_t;
+
+extern kmp_nested_nthreads_t __kmp_nested_nth;
+
+#if KMP_USE_ADAPTIVE_LOCKS
+
+// Parameters for the speculative lock backoff system.
+struct kmp_adaptive_backoff_params_t {
+    // Number of soft retries before it counts as a hard retry.
+    kmp_uint32 max_soft_retries;
+    // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right
+    kmp_uint32 max_badness;
+};
+
+extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
+
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+extern char * __kmp_speculative_statsfile;
+#endif
+
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+#if OMP_40_ENABLED
+extern int __kmp_display_env;           /* TRUE or FALSE */
+extern int __kmp_display_env_verbose;   /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
+extern int __kmp_omp_cancellation;      /* TRUE or FALSE */
+#endif
+
+/* ------------------------------------------------------------------------- */
+
+/* --------------------------------------------------------------------------- */
+/* the following are protected by the fork/join lock */
+/* write: lock  read: anytime */
+extern          kmp_info_t **__kmp_threads;      /* Descriptors for the threads */
+/* read/write: lock */
+extern volatile kmp_team_t  *     __kmp_team_pool;
+extern volatile kmp_info_t  *     __kmp_thread_pool;
+
+/* total number of threads reachable from some root thread including all root threads*/
+extern volatile int __kmp_nth;
+/* total number of threads reachable from some root thread including all root threads,
+   and those in the thread pool */
+extern volatile int __kmp_all_nth;
+extern int __kmp_thread_pool_nth;
+extern volatile int __kmp_thread_pool_active_nth;
+
+extern kmp_root_t **__kmp_root;         /* root of thread hierarchy */
+/* end data protected by fork/join lock */
+/* --------------------------------------------------------------------------- */
+
+extern kmp_global_t  __kmp_global;         /* global status */
+
+extern kmp_info_t __kmp_monitor;
+extern volatile kmp_uint32 __kmp_team_counter;      // Used by Debugging Support Library.
+extern volatile kmp_uint32 __kmp_task_counter;      // Used by Debugging Support Library.
+
+#if USE_DEBUGGER
+
+#define _KMP_GEN_ID( counter )                                         \
+    (                                                                  \
+        __kmp_debugging                                                \
+        ?                                                              \
+        KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1    \
+        :                                                              \
+        ~ 0                                                            \
+    )
+#else
+#define _KMP_GEN_ID( counter )                                         \
+    (                                                                  \
+        ~ 0                                                            \
+    )
+#endif /* USE_DEBUGGER */
+
+#define KMP_GEN_TASK_ID()    _KMP_GEN_ID( __kmp_task_counter )
+#define KMP_GEN_TEAM_ID()    _KMP_GEN_ID( __kmp_team_counter )
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... );
+
+extern void __kmp_serial_initialize( void );
+extern void __kmp_middle_initialize( void );
+extern void __kmp_parallel_initialize( void );
+
+extern void __kmp_internal_begin( void );
+extern void __kmp_internal_end_library( int gtid );
+extern void __kmp_internal_end_thread( int gtid );
+extern void __kmp_internal_end_atexit( void );
+extern void __kmp_internal_end_fini( void );
+extern void __kmp_internal_end_dtor( void );
+extern void __kmp_internal_end_dest( void* );
+
+extern int  __kmp_register_root( int initial_thread );
+extern void __kmp_unregister_root( int gtid );
+
+extern int  __kmp_ignore_mppbeg( void );
+extern int  __kmp_ignore_mppend( void );
+
+extern int  __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws );
+extern void __kmp_exit_single( int gtid );
+
+extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
+extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
+
+#ifdef USE_LOAD_BALANCE
+extern int  __kmp_get_load_balance( int );
+#endif
+
+#ifdef BUILD_TV
+extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr );
+#endif
+
+extern int  __kmp_get_global_thread_id( void );
+extern int  __kmp_get_global_thread_id_reg( void );
+extern void __kmp_exit_thread( int exit_status );
+extern void __kmp_abort( char const * format, ... );
+extern void __kmp_abort_thread( void );
+extern void __kmp_abort_process( void );
+extern void __kmp_warn( char const * format, ... );
+
+extern void __kmp_set_num_threads( int new_nth, int gtid );
+
+// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
+static inline kmp_info_t * __kmp_entry_thread()
+{
+      int gtid = __kmp_entry_gtid();
+
+      return __kmp_threads[gtid];
+}
+
+extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels );
+extern int  __kmp_get_max_active_levels( int gtid );
+extern int  __kmp_get_ancestor_thread_num( int gtid, int level );
+extern int  __kmp_get_team_size( int gtid, int level );
+extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk );
+extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk );
+
+extern unsigned short __kmp_get_random( kmp_info_t * thread );
+extern void __kmp_init_random( kmp_info_t * thread );
+
+extern kmp_r_sched_t __kmp_get_schedule_global( void );
+extern void __kmp_adjust_num_threads( int new_nproc );
+
+extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL );
+extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL );
+extern void   ___kmp_free( void * ptr KMP_SRC_LOC_DECL );
+#define __kmp_allocate( size )      ___kmp_allocate( (size) KMP_SRC_LOC_CURR )
+#define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR )
+#define __kmp_free( ptr )           ___kmp_free( (ptr) KMP_SRC_LOC_CURR )
+
+#if USE_FAST_MEMORY
+extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL );
+extern void   ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL );
+extern void   __kmp_free_fast_memory( kmp_info_t *this_thr );
+extern void   __kmp_initialize_fast_memory( kmp_info_t *this_thr );
+#define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR )
+#define __kmp_fast_free( this_thr, ptr )      ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR )
+#endif
+
+extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL );
+extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL );
+extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL );
+extern void   ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL );
+#define __kmp_thread_malloc(  th, size )          ___kmp_thread_malloc(  (th), (size)            KMP_SRC_LOC_CURR )
+#define __kmp_thread_calloc(  th, nelem, elsize ) ___kmp_thread_calloc(  (th), (nelem), (elsize) KMP_SRC_LOC_CURR )
+#define __kmp_thread_realloc( th, ptr, size )     ___kmp_thread_realloc( (th), (ptr), (size)     KMP_SRC_LOC_CURR )
+#define __kmp_thread_free(    th, ptr )           ___kmp_thread_free(    (th), (ptr)             KMP_SRC_LOC_CURR )
+
+#define KMP_INTERNAL_MALLOC(sz)    malloc(sz)
+#define KMP_INTERNAL_FREE(p)       free(p)
+#define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz))
+#define KMP_INTERNAL_CALLOC(n,sz)  calloc((n),(sz))
+
+extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads );
+
+#if OMP_40_ENABLED
+extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind );
+extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads );
+#endif
+
+extern void __kmp_yield( int cond );
+
+extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
+    kmp_int32 chunk );
+extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
+    kmp_int32 chunk );
+extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
+    kmp_int64 chunk );
+extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
+    kmp_int64 chunk );
+
+extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid,
+    kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st );
+extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid,
+    kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st );
+extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid,
+    kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st );
+extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid,
+    kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st );
+
+extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid );
+extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid );
+extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid );
+extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid );
+
+
+#ifdef KMP_GOMP_COMPAT
+
+extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
+    kmp_int32 chunk, int push_ws );
+extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
+    kmp_int32 chunk, int push_ws );
+extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
+    kmp_int64 chunk, int push_ws );
+extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
+    enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
+    kmp_int64 chunk, int push_ws );
+extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid );
+extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid );
+extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid );
+extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid );
+
+#endif /* KMP_GOMP_COMPAT */
+
+
+extern kmp_uint32 __kmp_eq_4(  kmp_uint32 value, kmp_uint32 checker );
+extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker );
+extern kmp_uint32 __kmp_lt_4(  kmp_uint32 value, kmp_uint32 checker );
+extern kmp_uint32 __kmp_ge_4(  kmp_uint32 value, kmp_uint32 checker );
+extern kmp_uint32 __kmp_le_4(  kmp_uint32 value, kmp_uint32 checker );
+
+extern kmp_uint32 __kmp_eq_8(  kmp_uint64 value, kmp_uint64 checker );
+extern kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker );
+extern kmp_uint32 __kmp_lt_8(  kmp_uint64 value, kmp_uint64 checker );
+extern kmp_uint32 __kmp_ge_8(  kmp_uint64 value, kmp_uint64 checker );
+extern kmp_uint32 __kmp_le_8(  kmp_uint64 value, kmp_uint64 checker );
+
+extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj );
+extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj );
+
+class kmp_flag_32;
+class kmp_flag_64;
+class kmp_flag_oncore;
+extern void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin
+#if USE_ITT_BUILD
+                   , void * itt_sync_obj
+#endif
+                   );
+extern void __kmp_release_32(kmp_flag_32 *flag);
+extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
+#if USE_ITT_BUILD
+                   , void * itt_sync_obj
+#endif
+                   );
+extern void __kmp_release_64(kmp_flag_64 *flag);
+extern void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin
+#if USE_ITT_BUILD
+                   , void * itt_sync_obj
+#endif
+                   );
+extern void __kmp_release_oncore(kmp_flag_oncore *flag);
+
+extern void __kmp_infinite_loop( void );
+
+extern void __kmp_cleanup( void );
+
+#if KMP_HANDLE_SIGNALS
+    extern int  __kmp_handle_signals;
+    extern void __kmp_install_signals( int parallel_init );
+    extern void __kmp_remove_signals( void );
+#endif
+
+extern void __kmp_clear_system_time( void );
+extern void __kmp_read_system_time( double *delta );
+
+extern void __kmp_check_stack_overlap( kmp_info_t *thr );
+
+extern void __kmp_expand_host_name( char *buffer, size_t size );
+extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern );
+
+#if KMP_OS_WINDOWS
+extern void __kmp_initialize_system_tick( void );  /* Initialize timer tick value */
+#endif
+
+extern void __kmp_runtime_initialize( void );  /* machine specific initialization */
+extern void __kmp_runtime_destroy( void );
+
+#if KMP_AFFINITY_SUPPORTED
+extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask);
+extern void __kmp_affinity_initialize(void);
+extern void __kmp_affinity_uninitialize(void);
+extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
+#if OMP_40_ENABLED
+extern void __kmp_affinity_set_place(int gtid);
+#endif
+extern void __kmp_affinity_determine_capable( const char *env_var );
+extern int __kmp_aux_set_affinity(void **mask);
+extern int __kmp_aux_get_affinity(void **mask);
+extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
+extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
+extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
+extern void __kmp_balanced_affinity( int tid, int team_size );
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+extern void __kmp_cleanup_hierarchy();
+extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+extern int __kmp_futex_determine_capable( void );
+
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+extern void __kmp_gtid_set_specific( int gtid );
+extern int  __kmp_gtid_get_specific( void );
+
+extern double __kmp_read_cpu_time( void );
+
+extern int  __kmp_read_system_info( struct kmp_sys_info *info );
+
+extern void __kmp_create_monitor( kmp_info_t *th );
+
+extern void *__kmp_launch_thread( kmp_info_t *thr );
+
+extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size );
+
+#if KMP_OS_WINDOWS
+extern int  __kmp_still_running(kmp_info_t *th);
+extern int  __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val );
+extern void __kmp_free_handle( kmp_thread_t tHandle );
+#endif
+
+extern void __kmp_reap_monitor( kmp_info_t *th );
+extern void __kmp_reap_worker( kmp_info_t *th );
+extern void __kmp_terminate_thread( int gtid );
+
+extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag );
+extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag );
+extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag );
+extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag );
+extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag );
+extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag );
+
+extern void __kmp_elapsed( double * );
+extern void __kmp_elapsed_tick( double * );
+
+extern void __kmp_enable( int old_state );
+extern void __kmp_disable( int *old_state );
+
+extern void __kmp_thread_sleep( int millis );
+
+extern void __kmp_common_initialize( void );
+extern void __kmp_common_destroy( void );
+extern void __kmp_common_destroy_gtid( int gtid );
+
+#if KMP_OS_UNIX
+extern void __kmp_register_atfork( void );
+#endif
+extern void __kmp_suspend_initialize( void );
+extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th );
+
+extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
+                                           kmp_team_t *team, int tid);
+#if OMP_40_ENABLED
+extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
+#if OMPT_SUPPORT
+                                         ompt_parallel_id_t ompt_parallel_id,
+#endif
+                                         kmp_proc_bind_t proc_bind,
+                                         kmp_internal_control_t *new_icvs,
+                                         int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
+#else
+extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
+#if OMPT_SUPPORT
+                                         ompt_parallel_id_t ompt_parallel_id,
+#endif
+                                         kmp_internal_control_t *new_icvs,
+                                         int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
+#endif // OMP_40_ENABLED
+extern void __kmp_free_thread( kmp_info_t * );
+extern void __kmp_free_team( kmp_root_t *, kmp_team_t *  USE_NESTED_HOT_ARG(kmp_info_t *) );
+extern kmp_team_t * __kmp_reap_team( kmp_team_t * );
+
+/* ------------------------------------------------------------------------ */
+
+extern void __kmp_initialize_bget( kmp_info_t *th );
+extern void __kmp_finalize_bget( kmp_info_t *th );
+
+KMP_EXPORT void *kmpc_malloc( size_t size );
+KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize );
+KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size );
+KMP_EXPORT void  kmpc_free( void *ptr );
+
+/* ------------------------------------------------------------------------ */
+/* declarations for internal use */
+
+extern int  __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
+                           size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) );
+extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid );
+
+/*!
+ * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call.
+ */
+enum fork_context_e
+{
+    fork_context_gnu,                           /**< Called from GNU generated code, so must not invoke the microtask internally. */
+    fork_context_intel,                         /**< Called from Intel generated code.  */
+    fork_context_last
+};
+extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
+  kmp_int32 argc,
+#if OMPT_SUPPORT
+  void *unwrapped_task,
+#endif
+  microtask_t microtask, launch_t invoker,
+/* TODO: revert workaround for Intel(R) 64 tracker #96 */
+#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+                             va_list *ap
+#else
+                             va_list ap
+#endif
+                             );
+
+extern void __kmp_join_call( ident_t *loc, int gtid
+#if OMPT_SUPPORT
+                           , enum fork_context_e fork_context
+#endif
+#if OMP_40_ENABLED
+                           , int exit_teams = 0
+#endif
+                           );
+
+extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
+extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team );
+extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team );
+extern int __kmp_invoke_task_func( int gtid );
+extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
+extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
+
+// should never have been exported
+KMP_EXPORT int __kmpc_invoke_task_func( int gtid );
+#if OMP_40_ENABLED
+extern int  __kmp_invoke_teams_master( int gtid );
+extern void __kmp_teams_master( int gtid );
+#endif
+extern void __kmp_save_internal_controls( kmp_info_t * thread );
+extern void __kmp_user_set_library (enum library_type arg);
+extern void __kmp_aux_set_library (enum library_type arg);
+extern void __kmp_aux_set_stacksize( size_t arg);
+extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid);
+extern void __kmp_aux_set_defaults( char const * str, int len );
+
+/* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */
+void kmpc_set_blocktime (int arg);
+void ompc_set_nested( int flag );
+void ompc_set_dynamic( int flag );
+void ompc_set_num_threads( int arg );
+
+extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr,
+                  kmp_team_t *team, int tid );
+extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr );
+extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid,
+  kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  kmp_routine_entry_t task_entry );
+extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr,
+                  kmp_team_t *team, int tid, int set_curr_task );
+
+int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
+                           int *thread_finished,
+#if USE_ITT_BUILD
+                           void * itt_sync_obj,
+#endif /* USE_ITT_BUILD */
+                           kmp_int32 is_constrained);
+int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
+                           int *thread_finished,
+#if USE_ITT_BUILD
+                           void * itt_sync_obj,
+#endif /* USE_ITT_BUILD */
+                           kmp_int32 is_constrained);
+int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
+                               int *thread_finished,
+#if USE_ITT_BUILD
+                               void * itt_sync_obj,
+#endif /* USE_ITT_BUILD */
+                               kmp_int32 is_constrained);
+
+extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team );
+extern void __kmp_reap_task_teams( void );
+extern void __kmp_wait_to_unref_task_teams( void );
+extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always );
+extern void __kmp_task_team_sync  ( kmp_info_t *this_thr, kmp_team_t *team );
+extern void __kmp_task_team_wait  ( kmp_info_t *this_thr, kmp_team_t *team
+#if USE_ITT_BUILD
+                                    , void * itt_sync_obj
+#endif /* USE_ITT_BUILD */
+                                    , int wait=1
+);
+extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );
+
+extern int  __kmp_is_address_mapped( void *addr );
+extern kmp_uint64 __kmp_hardware_timestamp(void);
+
+#if KMP_OS_UNIX
+extern int  __kmp_read_from_file( char const *path, char const *format, ... );
+#endif
+
+/* ------------------------------------------------------------------------ */
+//
+// Assembly routines that have no compiler intrinsic replacement
+//
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+extern void       __kmp_query_cpuid( kmp_cpuinfo_t *p );
+
+#define __kmp_load_mxcsr(p) _mm_setcsr(*(p))
+static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
+
+extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
+extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p );
+extern void __kmp_clear_x87_fpu_status_word();
+# define KMP_X86_MXCSR_MASK      0xffffffc0   /* ignore status flags (6 lsb) */
+
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[]
+#if OMPT_SUPPORT
+                                   , void **exit_frame_ptr
+#endif
+);
+
+
+/* ------------------------------------------------------------------------ */
+
+KMP_EXPORT void   __kmpc_begin                ( ident_t *, kmp_int32 flags );
+KMP_EXPORT void   __kmpc_end                  ( ident_t * );
+
+KMP_EXPORT void   __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor,
+                                                  kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length );
+KMP_EXPORT void   __kmpc_threadprivate_register     ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor );
+KMP_EXPORT void * __kmpc_threadprivate              ( ident_t *, kmp_int32 global_tid, void * data, size_t size );
+
+KMP_EXPORT kmp_int32  __kmpc_global_thread_num  ( ident_t * );
+KMP_EXPORT kmp_int32  __kmpc_global_num_threads ( ident_t * );
+KMP_EXPORT kmp_int32  __kmpc_bound_thread_num   ( ident_t * );
+KMP_EXPORT kmp_int32  __kmpc_bound_num_threads  ( ident_t * );
+
+KMP_EXPORT kmp_int32  __kmpc_ok_to_fork     ( ident_t * );
+KMP_EXPORT void   __kmpc_fork_call          ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... );
+
+KMP_EXPORT void   __kmpc_serialized_parallel     ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid );
+
+KMP_EXPORT void   __kmpc_flush              ( ident_t *);
+KMP_EXPORT void   __kmpc_barrier            ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT kmp_int32  __kmpc_master         ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_end_master         ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_ordered            ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_end_ordered        ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_critical           ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
+KMP_EXPORT void   __kmpc_end_critical       ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
+
+#if OMP_41_ENABLED
+KMP_EXPORT void   __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint );
+#endif
+
+KMP_EXPORT kmp_int32  __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid );
+
+KMP_EXPORT kmp_int32  __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid );
+
+KMP_EXPORT kmp_int32  __kmpc_single         ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT void   __kmpc_end_single         ( ident_t *, kmp_int32 global_tid );
+
+KMP_EXPORT void KMPC_FOR_STATIC_INIT    ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter,
+                                          kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk );
+
+KMP_EXPORT void __kmpc_for_static_fini  ( ident_t *loc, kmp_int32 global_tid );
+
+KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit );
+
+extern void KMPC_SET_NUM_THREADS        ( int arg );
+extern void KMPC_SET_DYNAMIC            ( int flag );
+extern void KMPC_SET_NESTED             ( int flag );
+
+/* --------------------------------------------------------------------------- */
+
+/*
+ * Taskq interface routines
+ */
+
+KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk,
+                                        size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds);
+KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
+KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
+KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status);
+KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
+KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task);
+
+/* ------------------------------------------------------------------------ */
+
+/*
+ * OMP 3.0 tasking interface routines
+ */
+
+KMP_EXPORT kmp_int32
+__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
+KMP_EXPORT kmp_task_t*
+__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+                       size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                       kmp_routine_entry_t task_entry );
+KMP_EXPORT void
+__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
+KMP_EXPORT void
+__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
+KMP_EXPORT kmp_int32
+__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
+KMP_EXPORT kmp_int32
+__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid );
+
+KMP_EXPORT kmp_int32
+__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part );
+
+#if TASK_UNUSED
+void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
+void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
+#endif // TASK_UNUSED
+
+/* ------------------------------------------------------------------------ */
+
+#if OMP_40_ENABLED
+
+KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
+KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
+
+KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+                                                 kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+                                                 kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
+KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+                                          kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
+extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task );
+
+extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate );
+
+KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
+KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
+
+#if OMP_41_ENABLED
+
+KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask );
+KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
+
+#endif
+
+#endif
+
+
+/*
+ * Lock interface routines (fast versions with gtid passed in)
+ */
+KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid,  void **user_lock );
+KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+
+#if OMP_41_ENABLED
+KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint );
+KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint );
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Interface to fast scalable reduce methods routines
+ */
+
+KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid,
+                                           kmp_int32 num_vars, size_t reduce_size,
+                                           void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+                                           kmp_critical_name *lck );
+KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
+KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid,
+                                    kmp_int32 num_vars, size_t reduce_size,
+                                    void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+                                    kmp_critical_name *lck );
+KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
+
+/*
+ * internal fast reduction routines
+ */
+
+extern PACKED_REDUCTION_METHOD_T
+__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
+                                  kmp_int32 num_vars, size_t reduce_size,
+                                  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+                                  kmp_critical_name *lck );
+
+// this function is for testing set/get/determine reduce method
+KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
+
+KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
+KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
+
+// this function exported for testing of KMP_PLACE_THREADS functionality
+KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+// C++ port
+// missing 'extern "C"' declarations
+
+KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc );
+KMP_EXPORT void __kmpc_pop_num_threads(  ident_t *loc, kmp_int32 global_tid );
+KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads );
+
+#if OMP_40_ENABLED
+KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind );
+KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads );
+KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...);
+
+#endif
+
+KMP_EXPORT void*
+__kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid,
+                             void * data, size_t size, void *** cache );
+
+// Symbols for MS mutual detection.
+extern int _You_must_link_with_exactly_one_OpenMP_library;
+extern int _You_must_link_with_Intel_OpenMP_library;
+#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 )
+    extern int _You_must_link_with_Microsoft_OpenMP_library;
+#endif
+
+
+// The routines below are not exported.
+// Consider making them 'static' in corresponding source files.
+void
+kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+struct private_common *
+kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+
+//
+// ompc_, kmpc_ entries moved from omp.h.
+//
+#if KMP_OS_WINDOWS
+#   define KMPC_CONVENTION __cdecl
+#else
+#   define KMPC_CONVENTION
+#endif
+
+#ifndef __OMP_H
+typedef enum omp_sched_t {
+    omp_sched_static  = 1,
+    omp_sched_dynamic = 2,
+    omp_sched_guided  = 3,
+    omp_sched_auto    = 4
+} omp_sched_t;
+typedef void * kmp_affinity_mask_t;
+#endif
+
+KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
+KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
+KMP_EXPORT int  KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
+KMP_EXPORT int  KMPC_CONVENTION ompc_get_team_size(int);
+KMP_EXPORT int  KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int  KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int  KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
+
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* KMP_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp
index 4c16b2f7f3..4e6699ff21 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_affinity.cpp
@@ -1,4735 +1,4735 @@
-/* 
- * kmp_affinity.cpp -- affinity management 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
-#include "kmp_str.h" 
-#include "kmp_wrapper_getpid.h" 
-#include "kmp_affinity.h" 
- 
-// Store the real or imagined machine hierarchy here 
-static hierarchy_info machine_hierarchy; 
- 
-void __kmp_cleanup_hierarchy() { 
-    machine_hierarchy.fini(); 
-} 
- 
-void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { 
-    kmp_uint32 depth; 
-    // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. 
-    if (TCR_1(machine_hierarchy.uninitialized)) 
-        machine_hierarchy.init(NULL, nproc); 
- 
-    // Adjust the hierarchy in case num threads exceeds original 
-    if (nproc > machine_hierarchy.base_num_threads) 
-        machine_hierarchy.resize(nproc); 
- 
-    depth = machine_hierarchy.depth; 
-    KMP_DEBUG_ASSERT(depth > 0); 
- 
-    thr_bar->depth = depth; 
-    thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; 
-    thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; 
-} 
- 
-#if KMP_AFFINITY_SUPPORTED 
- 
-// 
-// Print the affinity mask to the character array in a pretty format. 
-// 
-#if KMP_USE_HWLOC 
-char * 
-__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) 
-{ 
-    int num_chars_to_write, num_chars_written; 
-    char* scan; 
-    KMP_ASSERT(buf_len >= 40); 
- 
-    // bufsize of 0 just retrieves the needed buffer size. 
-    num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask); 
- 
-    // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes 
-    // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not 
-    //   take into account the '\0' character. 
-    if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) { 
-        KMP_SNPRINTF(buf, buf_len, "{<empty>}"); 
-    } else if(num_chars_to_write < buf_len - 3) { 
-        // no problem fitting the mask into buf_len number of characters 
-        buf[0] = '{'; 
-        // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer 
-        num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask); 
-        buf[num_chars_written+1] = '}'; 
-        buf[num_chars_written+2] = '\0'; 
-    } else { 
-        // Need to truncate the affinity mask string and add ellipsis. 
-        // To do this, we first write out the '{' + str(mask) 
-        buf[0] = '{'; 
-        hwloc_bitmap_list_snprintf(buf+1, buf_len-7, (hwloc_bitmap_t)mask); 
-        // then, what we do here is go to the 7th to last character, then go backwards until we are NOT 
-        // on a digit then write "...}\0".  This way it is a clean ellipsis addition and we don't 
-        // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get 
-        // { 45, 67,...} instead. 
-        scan = buf + buf_len - 7; 
-        while(*scan >= '0' && *scan <= '9' && scan >= buf) 
-            scan--; 
-        *(scan+1) = '.'; 
-        *(scan+2) = '.'; 
-        *(scan+3) = '.'; 
-        *(scan+4) = '}'; 
-        *(scan+5) = '\0'; 
-    } 
-    return buf; 
-} 
-#else 
-char * 
-__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) 
-{ 
-    KMP_ASSERT(buf_len >= 40); 
-    char *scan = buf; 
-    char *end = buf + buf_len - 1; 
- 
-    // 
-    // Find first element / check for empty set. 
-    // 
-    size_t i; 
-    for (i = 0; i < KMP_CPU_SETSIZE; i++) { 
-        if (KMP_CPU_ISSET(i, mask)) { 
-            break; 
-        } 
-    } 
-    if (i == KMP_CPU_SETSIZE) { 
-        KMP_SNPRINTF(scan, end-scan+1, "{<empty>}"); 
-        while (*scan != '\0') scan++; 
-        KMP_ASSERT(scan <= end); 
-        return buf; 
-    } 
- 
-    KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i); 
-    while (*scan != '\0') scan++; 
-    i++; 
-    for (; i < KMP_CPU_SETSIZE; i++) { 
-        if (! KMP_CPU_ISSET(i, mask)) { 
-            continue; 
-        } 
- 
-        // 
-        // Check for buffer overflow.  A string of the form ",<n>" will have 
-        // at most 10 characters, plus we want to leave room to print ",...}" 
-        // if the set is too large to print for a total of 15 characters. 
-        // We already left room for '\0' in setting end. 
-        // 
-        if (end - scan < 15) { 
-           break; 
-        } 
-        KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i); 
-        while (*scan != '\0') scan++; 
-    } 
-    if (i < KMP_CPU_SETSIZE) { 
-        KMP_SNPRINTF(scan, end-scan+1,  ",..."); 
-        while (*scan != '\0') scan++; 
-    } 
-    KMP_SNPRINTF(scan, end-scan+1, "}"); 
-    while (*scan != '\0') scan++; 
-    KMP_ASSERT(scan <= end); 
-    return buf; 
-} 
-#endif // KMP_USE_HWLOC 
- 
- 
-void 
-__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) 
-{ 
-    KMP_CPU_ZERO(mask); 
- 
-# if KMP_GROUP_AFFINITY 
- 
-    if (__kmp_num_proc_groups > 1) { 
-        int group; 
-        KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); 
-        for (group = 0; group < __kmp_num_proc_groups; group++) { 
-            int i; 
-            int num = __kmp_GetActiveProcessorCount(group); 
-            for (i = 0; i < num; i++) { 
-                KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask); 
-            } 
-        } 
-    } 
-    else 
- 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
-    { 
-        int proc; 
-        for (proc = 0; proc < __kmp_xproc; proc++) { 
-            KMP_CPU_SET(proc, mask); 
-        } 
-    } 
-} 
- 
-// 
-// When sorting by labels, __kmp_affinity_assign_child_nums() must first be 
-// called to renumber the labels from [0..n] and place them into the child_num 
-// vector of the address object.  This is done in case the labels used for 
-// the children at one node of the hierarchy differ from those used for 
-// another node at the same level.  Example:  suppose the machine has 2 nodes 
-// with 2 packages each.  The first node contains packages 601 and 602, and 
-// second node contains packages 603 and 604.  If we try to sort the table 
-// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604 
-// because we are paying attention to the labels themselves, not the ordinal 
-// child numbers.  By using the child numbers in the sort, the result is 
-// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604. 
-// 
-static void 
-__kmp_affinity_assign_child_nums(AddrUnsPair *address2os, 
-  int numAddrs) 
-{ 
-    KMP_DEBUG_ASSERT(numAddrs > 0); 
-    int depth = address2os->first.depth; 
-    unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); 
-    unsigned *lastLabel = (unsigned *)__kmp_allocate(depth 
-      * sizeof(unsigned)); 
-    int labCt; 
-    for (labCt = 0; labCt < depth; labCt++) { 
-        address2os[0].first.childNums[labCt] = counts[labCt] = 0; 
-        lastLabel[labCt] = address2os[0].first.labels[labCt]; 
-    } 
-    int i; 
-    for (i = 1; i < numAddrs; i++) { 
-        for (labCt = 0; labCt < depth; labCt++) { 
-            if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { 
-                int labCt2; 
-                for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { 
-                    counts[labCt2] = 0; 
-                    lastLabel[labCt2] = address2os[i].first.labels[labCt2]; 
-                } 
-                counts[labCt]++; 
-                lastLabel[labCt] = address2os[i].first.labels[labCt]; 
-                break; 
-            } 
-        } 
-        for (labCt = 0; labCt < depth; labCt++) { 
-            address2os[i].first.childNums[labCt] = counts[labCt]; 
-        } 
-        for (; labCt < (int)Address::maxDepth; labCt++) { 
-            address2os[i].first.childNums[labCt] = 0; 
-        } 
-    } 
-} 
- 
- 
-// 
-// All of the __kmp_affinity_create_*_map() routines should set 
-// __kmp_affinity_masks to a vector of affinity mask objects of length 
-// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and 
-// return the number of levels in the machine topology tree (zero if 
-// __kmp_affinity_type == affinity_none). 
-// 
-// All of the __kmp_affinity_create_*_map() routines should set *fullMask 
-// to the affinity mask for the initialization thread.  They need to save and 
-// restore the mask, and it could be needed later, so saving it is just an 
-// optimization to avoid calling kmp_get_system_affinity() again. 
-// 
-static kmp_affin_mask_t *fullMask = NULL; 
- 
-kmp_affin_mask_t * 
-__kmp_affinity_get_fullMask() { return fullMask; } 
- 
- 
-static int nCoresPerPkg, nPackages; 
-static int __kmp_nThreadsPerCore; 
-#ifndef KMP_DFLT_NTH_CORES 
-static int __kmp_ncores; 
-#endif 
- 
-// 
-// __kmp_affinity_uniform_topology() doesn't work when called from 
-// places which support arbitrarily many levels in the machine topology 
-// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map() 
-// __kmp_affinity_create_x2apicid_map(). 
-// 
-inline static bool 
-__kmp_affinity_uniform_topology() 
-{ 
-    return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); 
-} 
- 
- 
-// 
-// Print out the detailed machine topology map, i.e. the physical locations 
-// of each OS proc. 
-// 
-static void 
-__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth, 
-  int pkgLevel, int coreLevel, int threadLevel) 
-{ 
-    int proc; 
- 
-    KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); 
-    for (proc = 0; proc < len; proc++) { 
-        int level; 
-        kmp_str_buf_t buf; 
-        __kmp_str_buf_init(&buf); 
-        for (level = 0; level < depth; level++) { 
-            if (level == threadLevel) { 
-                __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)); 
-            } 
-            else if (level == coreLevel) { 
-                __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)); 
-            } 
-            else if (level == pkgLevel) { 
-                __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)); 
-            } 
-            else if (level > pkgLevel) { 
-                __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node), 
-                  level - pkgLevel - 1); 
-            } 
-            else { 
-                __kmp_str_buf_print(&buf, "L%d ", level); 
-            } 
-            __kmp_str_buf_print(&buf, "%d ", 
-              address2os[proc].first.labels[level]); 
-        } 
-        KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second, 
-          buf.str); 
-        __kmp_str_buf_free(&buf); 
-    } 
-} 
- 
-#if KMP_USE_HWLOC 
-static int 
-__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, 
-  kmp_i18n_id_t *const msg_id) 
-{ 
-    *address2os = NULL; 
-    *msg_id = kmp_i18n_null; 
- 
-    // 
-    // Save the affinity mask for the current thread. 
-    // 
-    kmp_affin_mask_t *oldMask; 
-    KMP_CPU_ALLOC(oldMask); 
-    __kmp_get_system_affinity(oldMask, TRUE); 
- 
-    unsigned depth = hwloc_topology_get_depth(__kmp_hwloc_topology); 
-    int threadLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_PU); 
-    int coreLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_CORE); 
-    int pkgLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET); 
-    __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 0; 
- 
-    // 
-    // This makes an assumption about the topology being four levels: 
-    // machines -> packages -> cores -> hardware threads 
-    // 
-    hwloc_obj_t current_level_iterator = hwloc_get_root_obj(__kmp_hwloc_topology); 
-    hwloc_obj_t child_iterator; 
-    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); 
-        child_iterator != NULL; 
-        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) 
-    { 
-        nPackages++; 
-    } 
-    current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, pkgLevel, 0); 
-    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); 
-        child_iterator != NULL; 
-        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) 
-    { 
-        nCoresPerPkg++; 
-    } 
-    current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, coreLevel, 0); 
-    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL); 
-        child_iterator != NULL; 
-        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator)) 
-    { 
-        __kmp_nThreadsPerCore++; 
-    } 
- 
-    if (! KMP_AFFINITY_CAPABLE()) 
-    { 
-        // 
-        // Hack to try and infer the machine topology using only the data 
-        // available from cpuid on the current thread, and __kmp_xproc. 
-        // 
-        KMP_ASSERT(__kmp_affinity_type == affinity_none); 
- 
-        __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; 
-        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            if (__kmp_affinity_uniform_topology()) { 
-                KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            } else { 
-                KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-            } 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
-        return 0; 
-    } 
- 
-    // 
-    // Allocate the data structure to be returned. 
-    // 
-    AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); 
- 
-    unsigned num_hardware_threads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel); 
-    unsigned i; 
-    hwloc_obj_t hardware_thread_iterator; 
-    int nActiveThreads = 0; 
-    for(i=0;i<num_hardware_threads;i++) { 
-        hardware_thread_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, threadLevel, i); 
-        Address addr(3); 
-        if(! KMP_CPU_ISSET(i, fullMask)) continue; 
-        addr.labels[0] = hardware_thread_iterator->parent->parent->logical_index; 
-        addr.labels[1] = hardware_thread_iterator->parent->logical_index % nCoresPerPkg; 
-        addr.labels[2] = hardware_thread_iterator->logical_index % __kmp_nThreadsPerCore; 
-        retval[nActiveThreads] = AddrUnsPair(addr, hardware_thread_iterator->os_index); 
-        nActiveThreads++; 
-    } 
- 
-    // 
-    // If there's only one thread context to bind to, return now. 
-    // 
-    KMP_ASSERT(nActiveThreads > 0); 
-    if (nActiveThreads == 1) { 
-        __kmp_ncores = nPackages = 1; 
-        __kmp_nThreadsPerCore = nCoresPerPkg = 1; 
-        if (__kmp_affinity_verbose) { 
-            char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); 
- 
-            KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); 
-            if (__kmp_affinity_respect_mask) { 
-                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-            } else { 
-                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-            } 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
- 
-        if (__kmp_affinity_type == affinity_none) { 
-            __kmp_free(retval); 
-            KMP_CPU_FREE(oldMask); 
-            return 0; 
-        } 
- 
-        // 
-        // Form an Address object which only includes the package level. 
-        // 
-        Address addr(1); 
-        addr.labels[0] = retval[0].first.labels[pkgLevel-1]; 
-        retval[0].first = addr; 
- 
-        if (__kmp_affinity_gran_levels < 0) { 
-            __kmp_affinity_gran_levels = 0; 
-        } 
- 
-        if (__kmp_affinity_verbose) { 
-            __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); 
-        } 
- 
-        *address2os = retval; 
-        KMP_CPU_FREE(oldMask); 
-        return 1; 
-    } 
- 
-    // 
-    // Sort the table by physical Id. 
-    // 
-    qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels); 
- 
-    // 
-    // When affinity is off, this routine will still be called to set 
-    // __kmp_ncores, as well as __kmp_nThreadsPerCore, 
-    // nCoresPerPkg, & nPackages.  Make sure all these vars are set 
-    // correctly, and return if affinity is not enabled. 
-    // 
-    __kmp_ncores = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, coreLevel); 
- 
-    // 
-    // Check to see if the machine topology is uniform 
-    // 
-    unsigned npackages = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, pkgLevel); 
-    unsigned ncores = __kmp_ncores; 
-    unsigned nthreads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel); 
-    unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads); 
- 
-    // 
-    // Print the machine topology summary. 
-    // 
-    if (__kmp_affinity_verbose) { 
-        char mask[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); 
- 
-        KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); 
-        if (__kmp_affinity_respect_mask) { 
-            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); 
-        } else { 
-            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); 
-        } 
-        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-        if (uniform) { 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-        } else { 
-            KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-        } 
- 
-        kmp_str_buf_t buf; 
-        __kmp_str_buf_init(&buf); 
- 
-        __kmp_str_buf_print(&buf, "%d", npackages); 
-        //for (level = 1; level <= pkgLevel; level++) { 
-        //    __kmp_str_buf_print(&buf, " x %d", maxCt[level]); 
-       // } 
-        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, 
-          __kmp_nThreadsPerCore, __kmp_ncores); 
- 
-        __kmp_str_buf_free(&buf); 
-    } 
- 
-    if (__kmp_affinity_type == affinity_none) { 
-        KMP_CPU_FREE(oldMask); 
-        return 0; 
-    } 
- 
-    // 
-    // Find any levels with radiix 1, and remove them from the map 
-    // (except for the package level). 
-    // 
-    int new_depth = 0; 
-    int level; 
-    unsigned proc; 
-    for (level = 1; level < (int)depth; level++) { 
-        if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) { 
-           continue; 
-        } 
-        new_depth++; 
-    } 
- 
-    // 
-    // If we are removing any levels, allocate a new vector to return, 
-    // and copy the relevant information to it. 
-    // 
-    if (new_depth != depth-1) { 
-        AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( 
-          sizeof(AddrUnsPair) * nActiveThreads); 
-        for (proc = 0; (int)proc < nActiveThreads; proc++) { 
-            Address addr(new_depth); 
-            new_retval[proc] = AddrUnsPair(addr, retval[proc].second); 
-        } 
-        int new_level = 0; 
-        for (level = 1; level < (int)depth; level++) { 
-            if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) { 
-               if (level == threadLevel) { 
-                   threadLevel = -1; 
-               } 
-               else if ((threadLevel >= 0) && (level < threadLevel)) { 
-                   threadLevel--; 
-               } 
-               if (level == coreLevel) { 
-                   coreLevel = -1; 
-               } 
-               else if ((coreLevel >= 0) && (level < coreLevel)) { 
-                   coreLevel--; 
-               } 
-               if (level < pkgLevel) { 
-                   pkgLevel--; 
-               } 
-               continue; 
-            } 
-            for (proc = 0; (int)proc < nActiveThreads; proc++) { 
-                new_retval[proc].first.labels[new_level] 
-                  = retval[proc].first.labels[level]; 
-            } 
-            new_level++; 
-        } 
- 
-        __kmp_free(retval); 
-        retval = new_retval; 
-        depth = new_depth; 
-    } 
- 
-    if (__kmp_affinity_gran_levels < 0) { 
-        // 
-        // Set the granularity level based on what levels are modeled 
-        // in the machine topology map. 
-        // 
-        __kmp_affinity_gran_levels = 0; 
-        if ((threadLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-        if ((coreLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-        if (__kmp_affinity_gran > affinity_gran_package) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-    } 
- 
-    if (__kmp_affinity_verbose) { 
-        __kmp_affinity_print_topology(retval, nActiveThreads, depth-1, pkgLevel-1, 
-          coreLevel-1, threadLevel-1); 
-    } 
- 
-    KMP_CPU_FREE(oldMask); 
-    *address2os = retval; 
-    if(depth == 0) return 0; 
-    else return depth-1; 
-} 
-#endif // KMP_USE_HWLOC 
- 
-// 
-// If we don't know how to retrieve the machine's processor topology, or 
-// encounter an error in doing so, this routine is called to form a "flat" 
-// mapping of os thread id's <-> processor id's. 
-// 
-static int 
-__kmp_affinity_create_flat_map(AddrUnsPair **address2os, 
-  kmp_i18n_id_t *const msg_id) 
-{ 
-    *address2os = NULL; 
-    *msg_id = kmp_i18n_null; 
- 
-    // 
-    // Even if __kmp_affinity_type == affinity_none, this routine might still 
-    // called to set __kmp_ncores, as well as 
-    // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. 
-    // 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-        __kmp_ncores = nPackages = __kmp_xproc; 
-        __kmp_nThreadsPerCore = nCoresPerPkg = 1; 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffFlatTopology, "KMP_AFFINITY"); 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
-        return 0; 
-    } 
- 
-    // 
-    // When affinity is off, this routine will still be called to set 
-    // __kmp_ncores, as well as __kmp_nThreadsPerCore, 
-    // nCoresPerPkg, & nPackages.  Make sure all these vars are set 
-    //  correctly, and return now if affinity is not enabled. 
-    // 
-    __kmp_ncores = nPackages = __kmp_avail_proc; 
-    __kmp_nThreadsPerCore = nCoresPerPkg = 1; 
-    if (__kmp_affinity_verbose) { 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask); 
- 
-        KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY"); 
-        if (__kmp_affinity_respect_mask) { 
-            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-        } else { 
-            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-        } 
-        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-        KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-        KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-          __kmp_nThreadsPerCore, __kmp_ncores); 
-    } 
-    if (__kmp_affinity_type == affinity_none) { 
-        return 0; 
-    } 
- 
-    // 
-    // Contruct the data structure to be returned. 
-    // 
-    *address2os = (AddrUnsPair*) 
-      __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); 
-    int avail_ct = 0; 
-    unsigned int i; 
-    KMP_CPU_SET_ITERATE(i, fullMask) { 
-        // 
-        // Skip this proc if it is not included in the machine model. 
-        // 
-        if (! KMP_CPU_ISSET(i, fullMask)) { 
-            continue; 
-        } 
- 
-        Address addr(1); 
-        addr.labels[0] = i; 
-        (*address2os)[avail_ct++] = AddrUnsPair(addr,i); 
-    } 
-    if (__kmp_affinity_verbose) { 
-        KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); 
-    } 
- 
-    if (__kmp_affinity_gran_levels < 0) { 
-        // 
-        // Only the package level is modeled in the machine topology map, 
-        // so the #levels of granularity is either 0 or 1. 
-        // 
-        if (__kmp_affinity_gran > affinity_gran_package) { 
-            __kmp_affinity_gran_levels = 1; 
-        } 
-        else { 
-            __kmp_affinity_gran_levels = 0; 
-        } 
-    } 
-    return 1; 
-} 
- 
- 
-# if KMP_GROUP_AFFINITY 
- 
-// 
-// If multiple Windows* OS processor groups exist, we can create a 2-level 
-// topology map with the groups at level 0 and the individual procs at 
-// level 1. 
-// 
-// This facilitates letting the threads float among all procs in a group, 
-// if granularity=group (the default when there are multiple groups). 
-// 
-static int 
-__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, 
-  kmp_i18n_id_t *const msg_id) 
-{ 
-    *address2os = NULL; 
-    *msg_id = kmp_i18n_null; 
- 
-    // 
-    // If we don't have multiple processor groups, return now. 
-    // The flat mapping will be used. 
-    // 
-    if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) { 
-        // FIXME set *msg_id 
-        return -1; 
-    } 
- 
-    // 
-    // Contruct the data structure to be returned. 
-    // 
-    *address2os = (AddrUnsPair*) 
-      __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); 
-    int avail_ct = 0; 
-    int i; 
-    KMP_CPU_SET_ITERATE(i, fullMask) { 
-        // 
-        // Skip this proc if it is not included in the machine model. 
-        // 
-        if (! KMP_CPU_ISSET(i, fullMask)) { 
-            continue; 
-        } 
- 
-        Address addr(2); 
-        addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); 
-        addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); 
-        (*address2os)[avail_ct++] = AddrUnsPair(addr,i); 
- 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0], 
-              addr.labels[1]); 
-        } 
-    } 
- 
-    if (__kmp_affinity_gran_levels < 0) { 
-        if (__kmp_affinity_gran == affinity_gran_group) { 
-            __kmp_affinity_gran_levels = 1; 
-        } 
-        else if ((__kmp_affinity_gran == affinity_gran_fine) 
-          || (__kmp_affinity_gran == affinity_gran_thread)) { 
-            __kmp_affinity_gran_levels = 0; 
-        } 
-        else { 
-            const char *gran_str = NULL; 
-            if (__kmp_affinity_gran == affinity_gran_core) { 
-                gran_str = "core"; 
-            } 
-            else if (__kmp_affinity_gran == affinity_gran_package) { 
-                gran_str = "package"; 
-            } 
-            else if (__kmp_affinity_gran == affinity_gran_node) { 
-                gran_str = "node"; 
-            } 
-            else { 
-                KMP_ASSERT(0); 
-            } 
- 
-            // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread" 
-            __kmp_affinity_gran_levels = 0; 
-        } 
-    } 
-    return 2; 
-} 
- 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
- 
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-static int 
-__kmp_cpuid_mask_width(int count) { 
-    int r = 0; 
- 
-    while((1<<r) < count) 
-        ++r; 
-    return r; 
-} 
- 
- 
-class apicThreadInfo { 
-public: 
-    unsigned osId;              // param to __kmp_affinity_bind_thread 
-    unsigned apicId;            // from cpuid after binding 
-    unsigned maxCoresPerPkg;    //      "" 
-    unsigned maxThreadsPerPkg;  //      "" 
-    unsigned pkgId;             // inferred from above values 
-    unsigned coreId;            //      "" 
-    unsigned threadId;          //      "" 
-}; 
- 
- 
-static int 
-__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b) 
-{ 
-    const apicThreadInfo *aa = (const apicThreadInfo *)a; 
-    const apicThreadInfo *bb = (const apicThreadInfo *)b; 
-    if (aa->osId < bb->osId) return -1; 
-    if (aa->osId > bb->osId) return 1; 
-    return 0; 
-} 
- 
- 
-static int 
-__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b) 
-{ 
-    const apicThreadInfo *aa = (const apicThreadInfo *)a; 
-    const apicThreadInfo *bb = (const apicThreadInfo *)b; 
-    if (aa->pkgId < bb->pkgId) return -1; 
-    if (aa->pkgId > bb->pkgId) return 1; 
-    if (aa->coreId < bb->coreId) return -1; 
-    if (aa->coreId > bb->coreId) return 1; 
-    if (aa->threadId < bb->threadId) return -1; 
-    if (aa->threadId > bb->threadId) return 1; 
-    return 0; 
-} 
- 
- 
-// 
-// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use 
-// an algorithm which cycles through the available os threads, setting 
-// the current thread's affinity mask to that thread, and then retrieves 
-// the Apic Id for each thread context using the cpuid instruction. 
-// 
-static int 
-__kmp_affinity_create_apicid_map(AddrUnsPair **address2os, 
-  kmp_i18n_id_t *const msg_id) 
-{ 
-    kmp_cpuid buf; 
-    int rc; 
-    *address2os = NULL; 
-    *msg_id = kmp_i18n_null; 
- 
-    // 
-    // Check if cpuid leaf 4 is supported. 
-    // 
-        __kmp_x86_cpuid(0, 0, &buf); 
-        if (buf.eax < 4) { 
-            *msg_id = kmp_i18n_str_NoLeaf4Support; 
-            return -1; 
-        } 
- 
-    // 
-    // The algorithm used starts by setting the affinity to each available 
-    // thread and retrieving info from the cpuid instruction, so if we are 
-    // not capable of calling __kmp_get_system_affinity() and 
-    // _kmp_get_system_affinity(), then we need to do something else - use 
-    // the defaults that we calculated from issuing cpuid without binding 
-    // to each proc. 
-    // 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        // 
-        // Hack to try and infer the machine topology using only the data 
-        // available from cpuid on the current thread, and __kmp_xproc. 
-        // 
-        KMP_ASSERT(__kmp_affinity_type == affinity_none); 
- 
-        // 
-        // Get an upper bound on the number of threads per package using 
-        // cpuid(1). 
-        // 
-        // On some OS/chps combinations where HT is supported by the chip 
-        // but is disabled, this value will be 2 on a single core chip. 
-        // Usually, it will be 2 if HT is enabled and 1 if HT is disabled. 
-        // 
-        __kmp_x86_cpuid(1, 0, &buf); 
-        int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; 
-        if (maxThreadsPerPkg == 0) { 
-            maxThreadsPerPkg = 1; 
-        } 
- 
-        // 
-        // The num cores per pkg comes from cpuid(4). 
-        // 1 must be added to the encoded value. 
-        // 
-        // The author of cpu_count.cpp treated this only an upper bound 
-        // on the number of cores, but I haven't seen any cases where it 
-        // was greater than the actual number of cores, so we will treat 
-        // it as exact in this block of code. 
-        // 
-        // First, we need to check if cpuid(4) is supported on this chip. 
-        // To see if cpuid(n) is supported, issue cpuid(0) and check if eax 
-        // has the value n or greater. 
-        // 
-        __kmp_x86_cpuid(0, 0, &buf); 
-        if (buf.eax >= 4) { 
-            __kmp_x86_cpuid(4, 0, &buf); 
-            nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; 
-        } 
-        else { 
-            nCoresPerPkg = 1; 
-        } 
- 
-        // 
-        // There is no way to reliably tell if HT is enabled without issuing 
-        // the cpuid instruction from every thread, can correlating the cpuid 
-        // info, so if the machine is not affinity capable, we assume that HT 
-        // is off.  We have seen quite a few machines where maxThreadsPerPkg 
-        // is 2, yet the machine does not support HT. 
-        // 
-        // - Older OSes are usually found on machines with older chips, which 
-        //   do not support HT. 
-        // 
-        // - The performance penalty for mistakenly identifying a machine as 
-        //   HT when it isn't (which results in blocktime being incorrecly set 
-        //   to 0) is greater than the penalty when for mistakenly identifying 
-        //   a machine as being 1 thread/core when it is really HT enabled 
-        //   (which results in blocktime being incorrectly set to a positive 
-        //   value). 
-        // 
-        __kmp_ncores = __kmp_xproc; 
-        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; 
-        __kmp_nThreadsPerCore = 1; 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY"); 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            if (__kmp_affinity_uniform_topology()) { 
-                KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            } else { 
-                KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-            } 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
-        return 0; 
-    } 
- 
-    // 
-    // 
-    // From here on, we can assume that it is safe to call 
-    // __kmp_get_system_affinity() and __kmp_set_system_affinity(), 
-    // even if __kmp_affinity_type = affinity_none. 
-    // 
- 
-    // 
-    // Save the affinity mask for the current thread. 
-    // 
-    kmp_affin_mask_t *oldMask; 
-    KMP_CPU_ALLOC(oldMask); 
-    KMP_ASSERT(oldMask != NULL); 
-    __kmp_get_system_affinity(oldMask, TRUE); 
- 
-    // 
-    // Run through each of the available contexts, binding the current thread 
-    // to it, and obtaining the pertinent information using the cpuid instr. 
-    // 
-    // The relevant information is: 
-    // 
-    // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context 
-    //    has a uniqie Apic Id, which is of the form pkg# : core# : thread#. 
-    // 
-    // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1).  The 
-    //    value of this field determines the width of the core# + thread# 
-    //    fields in the Apic Id.  It is also an upper bound on the number 
-    //    of threads per package, but it has been verified that situations 
-    //    happen were it is not exact.  In particular, on certain OS/chip 
-    //    combinations where Intel(R) Hyper-Threading Technology is supported 
-    //    by the chip but has 
-    //    been disabled, the value of this field will be 2 (for a single core 
-    //    chip).  On other OS/chip combinations supporting 
-    //    Intel(R) Hyper-Threading Technology, the value of 
-    //    this field will be 1 when Intel(R) Hyper-Threading Technology is 
-    //    disabled and 2 when it is enabled. 
-    // 
-    // Max Cores Per Pkg:  Bits 26:31 of eax after issuing cpuid(4).  The 
-    //    value of this field (+1) determines the width of the core# field in 
-    //    the Apic Id.  The comments in "cpucount.cpp" say that this value is 
-    //    an upper bound, but the IA-32 architecture manual says that it is 
-    //    exactly the number of cores per package, and I haven't seen any 
-    //    case where it wasn't. 
-    // 
-    // From this information, deduce the package Id, core Id, and thread Id, 
-    // and set the corresponding fields in the apicThreadInfo struct. 
-    // 
-    unsigned i; 
-    apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate( 
-      __kmp_avail_proc * sizeof(apicThreadInfo)); 
-    unsigned nApics = 0; 
-    KMP_CPU_SET_ITERATE(i, fullMask) { 
-        // 
-        // Skip this proc if it is not included in the machine model. 
-        // 
-        if (! KMP_CPU_ISSET(i, fullMask)) { 
-            continue; 
-        } 
-        KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); 
- 
-        __kmp_affinity_bind_thread(i); 
-        threadInfo[nApics].osId = i; 
- 
-        // 
-        // The apic id and max threads per pkg come from cpuid(1). 
-        // 
-        __kmp_x86_cpuid(1, 0, &buf); 
-        if (! (buf.edx >> 9) & 1) { 
-            __kmp_set_system_affinity(oldMask, TRUE); 
-            __kmp_free(threadInfo); 
-            KMP_CPU_FREE(oldMask); 
-            *msg_id = kmp_i18n_str_ApicNotPresent; 
-            return -1; 
-        } 
-        threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; 
-        threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; 
-        if (threadInfo[nApics].maxThreadsPerPkg == 0) { 
-            threadInfo[nApics].maxThreadsPerPkg = 1; 
-        } 
- 
-        // 
-        // Max cores per pkg comes from cpuid(4). 
-        // 1 must be added to the encoded value. 
-        // 
-        // First, we need to check if cpuid(4) is supported on this chip. 
-        // To see if cpuid(n) is supported, issue cpuid(0) and check if eax 
-        // has the value n or greater. 
-        // 
-        __kmp_x86_cpuid(0, 0, &buf); 
-        if (buf.eax >= 4) { 
-            __kmp_x86_cpuid(4, 0, &buf); 
-            threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; 
-        } 
-        else { 
-            threadInfo[nApics].maxCoresPerPkg = 1; 
-        } 
- 
-        // 
-        // Infer the pkgId / coreId / threadId using only the info 
-        // obtained locally. 
-        // 
-        int widthCT = __kmp_cpuid_mask_width( 
-          threadInfo[nApics].maxThreadsPerPkg); 
-        threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; 
- 
-        int widthC = __kmp_cpuid_mask_width( 
-          threadInfo[nApics].maxCoresPerPkg); 
-        int widthT = widthCT - widthC; 
-        if (widthT < 0) { 
-            // 
-            // I've never seen this one happen, but I suppose it could, if 
-            // the cpuid instruction on a chip was really screwed up. 
-            // Make sure to restore the affinity mask before the tail call. 
-            // 
-            __kmp_set_system_affinity(oldMask, TRUE); 
-            __kmp_free(threadInfo); 
-            KMP_CPU_FREE(oldMask); 
-            *msg_id = kmp_i18n_str_InvalidCpuidInfo; 
-            return -1; 
-        } 
- 
-        int maskC = (1 << widthC) - 1; 
-        threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) 
-          &maskC; 
- 
-        int maskT = (1 << widthT) - 1; 
-        threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT; 
- 
-        nApics++; 
-    } 
- 
-    // 
-    // We've collected all the info we need. 
-    // Restore the old affinity mask for this thread. 
-    // 
-    __kmp_set_system_affinity(oldMask, TRUE); 
- 
-    // 
-    // If there's only one thread context to bind to, form an Address object 
-    // with depth 1 and return immediately (or, if affinity is off, set 
-    // address2os to NULL and return). 
-    // 
-    // If it is configured to omit the package level when there is only a 
-    // single package, the logic at the end of this routine won't work if 
-    // there is only a single thread - it would try to form an Address 
-    // object with depth 0. 
-    // 
-    KMP_ASSERT(nApics > 0); 
-    if (nApics == 1) { 
-        __kmp_ncores = nPackages = 1; 
-        __kmp_nThreadsPerCore = nCoresPerPkg = 1; 
-        if (__kmp_affinity_verbose) { 
-            char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); 
- 
-            KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); 
-            if (__kmp_affinity_respect_mask) { 
-                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-            } else { 
-                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-            } 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
- 
-        if (__kmp_affinity_type == affinity_none) { 
-            __kmp_free(threadInfo); 
-            KMP_CPU_FREE(oldMask); 
-            return 0; 
-        } 
- 
-        *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); 
-        Address addr(1); 
-        addr.labels[0] = threadInfo[0].pkgId; 
-        (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); 
- 
-        if (__kmp_affinity_gran_levels < 0) { 
-            __kmp_affinity_gran_levels = 0; 
-        } 
- 
-        if (__kmp_affinity_verbose) { 
-            __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); 
-        } 
- 
-        __kmp_free(threadInfo); 
-        KMP_CPU_FREE(oldMask); 
-        return 1; 
-    } 
- 
-    // 
-    // Sort the threadInfo table by physical Id. 
-    // 
-    qsort(threadInfo, nApics, sizeof(*threadInfo), 
-      __kmp_affinity_cmp_apicThreadInfo_phys_id); 
- 
-    // 
-    // The table is now sorted by pkgId / coreId / threadId, but we really 
-    // don't know the radix of any of the fields.  pkgId's may be sparsely 
-    // assigned among the chips on a system.  Although coreId's are usually 
-    // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned 
-    // [0..threadsPerCore-1], we don't want to make any such assumptions. 
-    // 
-    // For that matter, we don't know what coresPerPkg and threadsPerCore 
-    // (or the total # packages) are at this point - we want to determine 
-    // that now.  We only have an upper bound on the first two figures. 
-    // 
-    // We also perform a consistency check at this point: the values returned 
-    // by the cpuid instruction for any thread bound to a given package had 
-    // better return the same info for maxThreadsPerPkg and maxCoresPerPkg. 
-    // 
-    nPackages = 1; 
-    nCoresPerPkg = 1; 
-    __kmp_nThreadsPerCore = 1; 
-    unsigned nCores = 1; 
- 
-    unsigned pkgCt = 1;                         // to determine radii 
-    unsigned lastPkgId = threadInfo[0].pkgId; 
-    unsigned coreCt = 1; 
-    unsigned lastCoreId = threadInfo[0].coreId; 
-    unsigned threadCt = 1; 
-    unsigned lastThreadId = threadInfo[0].threadId; 
- 
-                                                // intra-pkg consist checks 
-    unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; 
-    unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; 
- 
-    for (i = 1; i < nApics; i++) { 
-        if (threadInfo[i].pkgId != lastPkgId) { 
-            nCores++; 
-            pkgCt++; 
-            lastPkgId = threadInfo[i].pkgId; 
-            if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; 
-            coreCt = 1; 
-            lastCoreId = threadInfo[i].coreId; 
-            if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; 
-            threadCt = 1; 
-            lastThreadId = threadInfo[i].threadId; 
- 
-            // 
-            // This is a different package, so go on to the next iteration 
-            // without doing any consistency checks.  Reset the consistency 
-            // check vars, though. 
-            // 
-            prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; 
-            prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; 
-            continue; 
-        } 
- 
-        if (threadInfo[i].coreId != lastCoreId) { 
-            nCores++; 
-            coreCt++; 
-            lastCoreId = threadInfo[i].coreId; 
-            if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; 
-            threadCt = 1; 
-            lastThreadId = threadInfo[i].threadId; 
-        } 
-        else if (threadInfo[i].threadId != lastThreadId) { 
-            threadCt++; 
-            lastThreadId = threadInfo[i].threadId; 
-        } 
-        else { 
-            __kmp_free(threadInfo); 
-            KMP_CPU_FREE(oldMask); 
-            *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; 
-            return -1; 
-        } 
- 
-        // 
-        // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg 
-        // fields agree between all the threads bounds to a given package. 
-        // 
-        if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) 
-          || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { 
-            __kmp_free(threadInfo); 
-            KMP_CPU_FREE(oldMask); 
-            *msg_id = kmp_i18n_str_InconsistentCpuidInfo; 
-            return -1; 
-        } 
-    } 
-    nPackages = pkgCt; 
-    if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; 
-    if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; 
- 
-    // 
-    // When affinity is off, this routine will still be called to set 
-    // __kmp_ncores, as well as __kmp_nThreadsPerCore, 
-    // nCoresPerPkg, & nPackages.  Make sure all these vars are set 
-    // correctly, and return now if affinity is not enabled. 
-    // 
-    __kmp_ncores = nCores; 
-    if (__kmp_affinity_verbose) { 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); 
- 
-        KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); 
-        if (__kmp_affinity_respect_mask) { 
-            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-        } else { 
-            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-        } 
-        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-        if (__kmp_affinity_uniform_topology()) { 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-        } else { 
-            KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-        } 
-        KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-          __kmp_nThreadsPerCore, __kmp_ncores); 
- 
-    } 
- 
-    if (__kmp_affinity_type == affinity_none) { 
-        __kmp_free(threadInfo); 
-        KMP_CPU_FREE(oldMask); 
-        return 0; 
-    } 
- 
-    // 
-    // Now that we've determined the number of packages, the number of cores 
-    // per package, and the number of threads per core, we can construct the 
-    // data structure that is to be returned. 
-    // 
-    int pkgLevel = 0; 
-    int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; 
-    int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); 
-    unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); 
- 
-    KMP_ASSERT(depth > 0); 
-    *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics); 
- 
-    for (i = 0; i < nApics; ++i) { 
-        Address addr(depth); 
-        unsigned os = threadInfo[i].osId; 
-        int d = 0; 
- 
-        if (pkgLevel >= 0) { 
-            addr.labels[d++] = threadInfo[i].pkgId; 
-        } 
-        if (coreLevel >= 0) { 
-            addr.labels[d++] = threadInfo[i].coreId; 
-        } 
-        if (threadLevel >= 0) { 
-            addr.labels[d++] = threadInfo[i].threadId; 
-        } 
-        (*address2os)[i] = AddrUnsPair(addr, os); 
-    } 
- 
-    if (__kmp_affinity_gran_levels < 0) { 
-        // 
-        // Set the granularity level based on what levels are modeled 
-        // in the machine topology map. 
-        // 
-        __kmp_affinity_gran_levels = 0; 
-        if ((threadLevel >= 0) 
-          && (__kmp_affinity_gran > affinity_gran_thread)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-        if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-        if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-    } 
- 
-    if (__kmp_affinity_verbose) { 
-        __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, 
-          coreLevel, threadLevel); 
-    } 
- 
-    __kmp_free(threadInfo); 
-    KMP_CPU_FREE(oldMask); 
-    return depth; 
-} 
- 
- 
-// 
-// Intel(R) microarchitecture code name Nehalem, Dunnington and later 
-// architectures support a newer interface for specifying the x2APIC Ids, 
-// based on cpuid leaf 11. 
-// 
-static int 
-__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, 
-  kmp_i18n_id_t *const msg_id) 
-{ 
-    kmp_cpuid buf; 
- 
-    *address2os = NULL; 
-    *msg_id = kmp_i18n_null; 
- 
-    // 
-    // Check to see if cpuid leaf 11 is supported. 
-    // 
-    __kmp_x86_cpuid(0, 0, &buf); 
-    if (buf.eax < 11) { 
-        *msg_id = kmp_i18n_str_NoLeaf11Support; 
-        return -1; 
-    } 
-    __kmp_x86_cpuid(11, 0, &buf); 
-    if (buf.ebx == 0) { 
-        *msg_id = kmp_i18n_str_NoLeaf11Support; 
-        return -1; 
-    } 
- 
-    // 
-    // Find the number of levels in the machine topology.  While we're at it, 
-    // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg.  We will 
-    // try to get more accurate values later by explicitly counting them, 
-    // but get reasonable defaults now, in case we return early. 
-    // 
-    int level; 
-    int threadLevel = -1; 
-    int coreLevel = -1; 
-    int pkgLevel = -1; 
-    __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; 
- 
-    for (level = 0;; level++) { 
-        if (level > 31) { 
-            // 
-            // FIXME: Hack for DPD200163180 
-            // 
-            // If level is big then something went wrong -> exiting 
-            // 
-            // There could actually be 32 valid levels in the machine topology, 
-            // but so far, the only machine we have seen which does not exit 
-            // this loop before iteration 32 has fubar x2APIC settings. 
-            // 
-            // For now, just reject this case based upon loop trip count. 
-            // 
-            *msg_id = kmp_i18n_str_InvalidCpuidInfo; 
-            return -1; 
-        } 
-        __kmp_x86_cpuid(11, level, &buf); 
-        if (buf.ebx == 0) { 
-            if (pkgLevel < 0) { 
-                // 
-                // Will infer nPackages from __kmp_xproc 
-                // 
-                pkgLevel = level; 
-                level++; 
-            } 
-            break; 
-        } 
-        int kind = (buf.ecx >> 8) & 0xff; 
-        if (kind == 1) { 
-            // 
-            // SMT level 
-            // 
-            threadLevel = level; 
-            coreLevel = -1; 
-            pkgLevel = -1; 
-            __kmp_nThreadsPerCore = buf.ebx & 0xff; 
-            if (__kmp_nThreadsPerCore == 0) { 
-                *msg_id = kmp_i18n_str_InvalidCpuidInfo; 
-                return -1; 
-            } 
-        } 
-        else if (kind == 2) { 
-            // 
-            // core level 
-            // 
-            coreLevel = level; 
-            pkgLevel = -1; 
-            nCoresPerPkg = buf.ebx & 0xff; 
-            if (nCoresPerPkg == 0) { 
-                *msg_id = kmp_i18n_str_InvalidCpuidInfo; 
-                return -1; 
-            } 
-        } 
-        else { 
-            if (level <= 0) { 
-                *msg_id = kmp_i18n_str_InvalidCpuidInfo; 
-                return -1; 
-            } 
-            if (pkgLevel >= 0) { 
-                continue; 
-            } 
-            pkgLevel = level; 
-            nPackages = buf.ebx & 0xff; 
-            if (nPackages == 0) { 
-                *msg_id = kmp_i18n_str_InvalidCpuidInfo; 
-                return -1; 
-            } 
-        } 
-    } 
-    int depth = level; 
- 
-    // 
-    // In the above loop, "level" was counted from the finest level (usually 
-    // thread) to the coarsest.  The caller expects that we will place the 
-    // labels in (*address2os)[].first.labels[] in the inverse order, so 
-    // we need to invert the vars saying which level means what. 
-    // 
-    if (threadLevel >= 0) { 
-        threadLevel = depth - threadLevel - 1; 
-    } 
-    if (coreLevel >= 0) { 
-        coreLevel = depth - coreLevel - 1; 
-    } 
-    KMP_DEBUG_ASSERT(pkgLevel >= 0); 
-    pkgLevel = depth - pkgLevel - 1; 
- 
-    // 
-    // The algorithm used starts by setting the affinity to each available 
-    // thread and retrieving info from the cpuid instruction, so if we are 
-    // not capable of calling __kmp_get_system_affinity() and 
-    // _kmp_get_system_affinity(), then we need to do something else - use 
-    // the defaults that we calculated from issuing cpuid without binding 
-    // to each proc. 
-    // 
-    if (! KMP_AFFINITY_CAPABLE()) 
-    { 
-        // 
-        // Hack to try and infer the machine topology using only the data 
-        // available from cpuid on the current thread, and __kmp_xproc. 
-        // 
-        KMP_ASSERT(__kmp_affinity_type == affinity_none); 
- 
-        __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; 
-        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            if (__kmp_affinity_uniform_topology()) { 
-                KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            } else { 
-                KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-            } 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
-        return 0; 
-    } 
- 
-    // 
-    // 
-    // From here on, we can assume that it is safe to call 
-    // __kmp_get_system_affinity() and __kmp_set_system_affinity(), 
-    // even if __kmp_affinity_type = affinity_none. 
-    // 
- 
-    // 
-    // Save the affinity mask for the current thread. 
-    // 
-    kmp_affin_mask_t *oldMask; 
-    KMP_CPU_ALLOC(oldMask); 
-    __kmp_get_system_affinity(oldMask, TRUE); 
- 
-    // 
-    // Allocate the data structure to be returned. 
-    // 
-    AddrUnsPair *retval = (AddrUnsPair *) 
-      __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); 
- 
-    // 
-    // Run through each of the available contexts, binding the current thread 
-    // to it, and obtaining the pertinent information using the cpuid instr. 
-    // 
-    unsigned int proc; 
-    int nApics = 0; 
-    KMP_CPU_SET_ITERATE(proc, fullMask) { 
-        // 
-        // Skip this proc if it is not included in the machine model. 
-        // 
-        if (! KMP_CPU_ISSET(proc, fullMask)) { 
-            continue; 
-        } 
-        KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); 
- 
-        __kmp_affinity_bind_thread(proc); 
- 
-        // 
-        // Extrach the labels for each level in the machine topology map 
-        // from the Apic ID. 
-        // 
-        Address addr(depth); 
-        int prev_shift = 0; 
- 
-        for (level = 0; level < depth; level++) { 
-            __kmp_x86_cpuid(11, level, &buf); 
-            unsigned apicId = buf.edx; 
-            if (buf.ebx == 0) { 
-                if (level != depth - 1) { 
-                    KMP_CPU_FREE(oldMask); 
-                    *msg_id = kmp_i18n_str_InconsistentCpuidInfo; 
-                    return -1; 
-                } 
-                addr.labels[depth - level - 1] = apicId >> prev_shift; 
-                level++; 
-                break; 
-            } 
-            int shift = buf.eax & 0x1f; 
-            int mask = (1 << shift) - 1; 
-            addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; 
-            prev_shift = shift; 
-        } 
-        if (level != depth) { 
-            KMP_CPU_FREE(oldMask); 
-            *msg_id = kmp_i18n_str_InconsistentCpuidInfo; 
-            return -1; 
-        } 
- 
-        retval[nApics] = AddrUnsPair(addr, proc); 
-        nApics++; 
-    } 
- 
-    // 
-    // We've collected all the info we need. 
-    // Restore the old affinity mask for this thread. 
-    // 
-    __kmp_set_system_affinity(oldMask, TRUE); 
- 
-    // 
-    // If there's only one thread context to bind to, return now. 
-    // 
-    KMP_ASSERT(nApics > 0); 
-    if (nApics == 1) { 
-        __kmp_ncores = nPackages = 1; 
-        __kmp_nThreadsPerCore = nCoresPerPkg = 1; 
-        if (__kmp_affinity_verbose) { 
-            char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); 
- 
-            KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); 
-            if (__kmp_affinity_respect_mask) { 
-                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-            } else { 
-                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-            } 
-            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, 
-              __kmp_nThreadsPerCore, __kmp_ncores); 
-        } 
- 
-        if (__kmp_affinity_type == affinity_none) { 
-            __kmp_free(retval); 
-            KMP_CPU_FREE(oldMask); 
-            return 0; 
-        } 
- 
-        // 
-        // Form an Address object which only includes the package level. 
-        // 
-        Address addr(1); 
-        addr.labels[0] = retval[0].first.labels[pkgLevel]; 
-        retval[0].first = addr; 
- 
-        if (__kmp_affinity_gran_levels < 0) { 
-            __kmp_affinity_gran_levels = 0; 
-        } 
- 
-        if (__kmp_affinity_verbose) { 
-            __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); 
-        } 
- 
-        *address2os = retval; 
-        KMP_CPU_FREE(oldMask); 
-        return 1; 
-    } 
- 
-    // 
-    // Sort the table by physical Id. 
-    // 
-    qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); 
- 
-    // 
-    // Find the radix at each of the levels. 
-    // 
-    unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); 
-    unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); 
-    unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); 
-    unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); 
-    for (level = 0; level < depth; level++) { 
-        totals[level] = 1; 
-        maxCt[level] = 1; 
-        counts[level] = 1; 
-        last[level] = retval[0].first.labels[level]; 
-    } 
- 
-    // 
-    // From here on, the iteration variable "level" runs from the finest 
-    // level to the coarsest, i.e. we iterate forward through 
-    // (*address2os)[].first.labels[] - in the previous loops, we iterated 
-    // backwards. 
-    // 
-    for (proc = 1; (int)proc < nApics; proc++) { 
-        int level; 
-        for (level = 0; level < depth; level++) { 
-            if (retval[proc].first.labels[level] != last[level]) { 
-                int j; 
-                for (j = level + 1; j < depth; j++) { 
-                    totals[j]++; 
-                    counts[j] = 1; 
-                    // The line below causes printing incorrect topology information 
-                    // in case the max value for some level (maxCt[level]) is encountered earlier than 
-                    // some less value while going through the array. 
-                    // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2 
-                    // whereas it must be 4. 
-                    // TODO!!! Check if it can be commented safely 
-                    //maxCt[j] = 1; 
-                    last[j] = retval[proc].first.labels[j]; 
-                } 
-                totals[level]++; 
-                counts[level]++; 
-                if (counts[level] > maxCt[level]) { 
-                    maxCt[level] = counts[level]; 
-                } 
-                last[level] = retval[proc].first.labels[level]; 
-                break; 
-            } 
-            else if (level == depth - 1) { 
-                __kmp_free(last); 
-                __kmp_free(maxCt); 
-                __kmp_free(counts); 
-                __kmp_free(totals); 
-                __kmp_free(retval); 
-                KMP_CPU_FREE(oldMask); 
-                *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; 
-                return -1; 
-            } 
-        } 
-    } 
- 
-    // 
-    // When affinity is off, this routine will still be called to set 
-    // __kmp_ncores, as well as __kmp_nThreadsPerCore, 
-    // nCoresPerPkg, & nPackages.  Make sure all these vars are set 
-    // correctly, and return if affinity is not enabled. 
-    // 
-    if (threadLevel >= 0) { 
-        __kmp_nThreadsPerCore = maxCt[threadLevel]; 
-    } 
-    else { 
-        __kmp_nThreadsPerCore = 1; 
-    } 
-    nPackages = totals[pkgLevel]; 
- 
-    if (coreLevel >= 0) { 
-        __kmp_ncores = totals[coreLevel]; 
-        nCoresPerPkg = maxCt[coreLevel]; 
-    } 
-    else { 
-        __kmp_ncores = nPackages; 
-        nCoresPerPkg = 1; 
-    } 
- 
-    // 
-    // Check to see if the machine topology is uniform 
-    // 
-    unsigned prod = maxCt[0]; 
-    for (level = 1; level < depth; level++) { 
-       prod *= maxCt[level]; 
-    } 
-    bool uniform = (prod == totals[level - 1]); 
- 
-    // 
-    // Print the machine topology summary. 
-    // 
-    if (__kmp_affinity_verbose) { 
-        char mask[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); 
- 
-        KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); 
-        if (__kmp_affinity_respect_mask) { 
-            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); 
-        } else { 
-            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); 
-        } 
-        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-        if (uniform) { 
-            KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-        } else { 
-            KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-        } 
- 
-        kmp_str_buf_t buf; 
-        __kmp_str_buf_init(&buf); 
- 
-        __kmp_str_buf_print(&buf, "%d", totals[0]); 
-        for (level = 1; level <= pkgLevel; level++) { 
-            __kmp_str_buf_print(&buf, " x %d", maxCt[level]); 
-        } 
-        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, 
-          __kmp_nThreadsPerCore, __kmp_ncores); 
- 
-        __kmp_str_buf_free(&buf); 
-    } 
- 
-    if (__kmp_affinity_type == affinity_none) { 
-        __kmp_free(last); 
-        __kmp_free(maxCt); 
-        __kmp_free(counts); 
-        __kmp_free(totals); 
-        __kmp_free(retval); 
-        KMP_CPU_FREE(oldMask); 
-        return 0; 
-    } 
- 
-    // 
-    // Find any levels with radiix 1, and remove them from the map 
-    // (except for the package level). 
-    // 
-    int new_depth = 0; 
-    for (level = 0; level < depth; level++) { 
-        if ((maxCt[level] == 1) && (level != pkgLevel)) { 
-           continue; 
-        } 
-        new_depth++; 
-    } 
- 
-    // 
-    // If we are removing any levels, allocate a new vector to return, 
-    // and copy the relevant information to it. 
-    // 
-    if (new_depth != depth) { 
-        AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( 
-          sizeof(AddrUnsPair) * nApics); 
-        for (proc = 0; (int)proc < nApics; proc++) { 
-            Address addr(new_depth); 
-            new_retval[proc] = AddrUnsPair(addr, retval[proc].second); 
-        } 
-        int new_level = 0; 
-        int newPkgLevel = -1; 
-        int newCoreLevel = -1; 
-        int newThreadLevel = -1; 
-        int i; 
-        for (level = 0; level < depth; level++) { 
-            if ((maxCt[level] == 1) 
-              && (level != pkgLevel)) { 
-                // 
-                // Remove this level. Never remove the package level 
-                // 
-                continue; 
-            } 
-            if (level == pkgLevel) { 
-                newPkgLevel = level; 
-            } 
-            if (level == coreLevel) { 
-                newCoreLevel = level; 
-            } 
-            if (level == threadLevel) { 
-                newThreadLevel = level; 
-            } 
-            for (proc = 0; (int)proc < nApics; proc++) { 
-                new_retval[proc].first.labels[new_level] 
-                  = retval[proc].first.labels[level]; 
-            } 
-            new_level++; 
-        } 
- 
-        __kmp_free(retval); 
-        retval = new_retval; 
-        depth = new_depth; 
-        pkgLevel = newPkgLevel; 
-        coreLevel = newCoreLevel; 
-        threadLevel = newThreadLevel; 
-    } 
- 
-    if (__kmp_affinity_gran_levels < 0) { 
-        // 
-        // Set the granularity level based on what levels are modeled 
-        // in the machine topology map. 
-        // 
-        __kmp_affinity_gran_levels = 0; 
-        if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-        if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-        if (__kmp_affinity_gran > affinity_gran_package) { 
-            __kmp_affinity_gran_levels++; 
-        } 
-    } 
- 
-    if (__kmp_affinity_verbose) { 
-        __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, 
-          coreLevel, threadLevel); 
-    } 
- 
-    __kmp_free(last); 
-    __kmp_free(maxCt); 
-    __kmp_free(counts); 
-    __kmp_free(totals); 
-    KMP_CPU_FREE(oldMask); 
-    *address2os = retval; 
-    return depth; 
-} 
- 
- 
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
- 
-#define osIdIndex       0 
-#define threadIdIndex   1 
-#define coreIdIndex     2 
-#define pkgIdIndex      3 
-#define nodeIdIndex     4 
- 
-typedef unsigned *ProcCpuInfo; 
-static unsigned maxIndex = pkgIdIndex; 
- 
- 
-static int 
-__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) 
-{ 
-    const unsigned *aa = (const unsigned *)a; 
-    const unsigned *bb = (const unsigned *)b; 
-    if (aa[osIdIndex] < bb[osIdIndex]) return -1; 
-    if (aa[osIdIndex] > bb[osIdIndex]) return 1; 
-    return 0; 
-}; 
- 
- 
-static int 
-__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b) 
-{ 
-    unsigned i; 
-    const unsigned *aa = *((const unsigned **)a); 
-    const unsigned *bb = *((const unsigned **)b); 
-    for (i = maxIndex; ; i--) { 
-        if (aa[i] < bb[i]) return -1; 
-        if (aa[i] > bb[i]) return 1; 
-        if (i == osIdIndex) break; 
-    } 
-    return 0; 
-} 
- 
- 
-// 
-// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the 
-// affinity map. 
-// 
-static int 
-__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, 
-  kmp_i18n_id_t *const msg_id, FILE *f) 
-{ 
-    *address2os = NULL; 
-    *msg_id = kmp_i18n_null; 
- 
-    // 
-    // Scan of the file, and count the number of "processor" (osId) fields, 
-    // and find the highest value of <n> for a node_<n> field. 
-    // 
-    char buf[256]; 
-    unsigned num_records = 0; 
-    while (! feof(f)) { 
-        buf[sizeof(buf) - 1] = 1; 
-        if (! fgets(buf, sizeof(buf), f)) { 
-            // 
-            // Read errors presumably because of EOF 
-            // 
-            break; 
-        } 
- 
-        char s1[] = "processor"; 
-        if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { 
-            num_records++; 
-            continue; 
-        } 
- 
-        // 
-        // FIXME - this will match "node_<n> <garbage>" 
-        // 
-        unsigned level; 
-        if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { 
-            if (nodeIdIndex + level >= maxIndex) { 
-                maxIndex = nodeIdIndex + level; 
-            } 
-            continue; 
-        } 
-    } 
- 
-    // 
-    // Check for empty file / no valid processor records, or too many. 
-    // The number of records can't exceed the number of valid bits in the 
-    // affinity mask. 
-    // 
-    if (num_records == 0) { 
-        *line = 0; 
-        *msg_id = kmp_i18n_str_NoProcRecords; 
-        return -1; 
-    } 
-    if (num_records > (unsigned)__kmp_xproc) { 
-        *line = 0; 
-        *msg_id = kmp_i18n_str_TooManyProcRecords; 
-        return -1; 
-    } 
- 
-    // 
-    // Set the file pointer back to the begginning, so that we can scan the 
-    // file again, this time performing a full parse of the data. 
-    // Allocate a vector of ProcCpuInfo object, where we will place the data. 
-    // Adding an extra element at the end allows us to remove a lot of extra 
-    // checks for termination conditions. 
-    // 
-    if (fseek(f, 0, SEEK_SET) != 0) { 
-        *line = 0; 
-        *msg_id = kmp_i18n_str_CantRewindCpuinfo; 
-        return -1; 
-    } 
- 
-    // 
-    // Allocate the array of records to store the proc info in.  The dummy 
-    // element at the end makes the logic in filling them out easier to code. 
-    // 
-    unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1) 
-      * sizeof(unsigned *)); 
-    unsigned i; 
-    for (i = 0; i <= num_records; i++) { 
-        threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1) 
-          * sizeof(unsigned)); 
-    } 
- 
-#define CLEANUP_THREAD_INFO \ 
-    for (i = 0; i <= num_records; i++) {                                \ 
-        __kmp_free(threadInfo[i]);                                      \ 
-    }                                                                   \ 
-    __kmp_free(threadInfo); 
- 
-    // 
-    // A value of UINT_MAX means that we didn't find the field 
-    // 
-    unsigned __index; 
- 
-#define INIT_PROC_INFO(p) \ 
-    for (__index = 0; __index <= maxIndex; __index++) {                 \ 
-        (p)[__index] = UINT_MAX;                                        \ 
-    } 
- 
-    for (i = 0; i <= num_records; i++) { 
-        INIT_PROC_INFO(threadInfo[i]); 
-    } 
- 
-    unsigned num_avail = 0; 
-    *line = 0; 
-    while (! feof(f)) { 
-        // 
-        // Create an inner scoping level, so that all the goto targets at the 
-        // end of the loop appear in an outer scoping level.  This avoids 
-        // warnings about jumping past an initialization to a target in the 
-        // same block. 
-        // 
-        { 
-            buf[sizeof(buf) - 1] = 1; 
-            bool long_line = false; 
-            if (! fgets(buf, sizeof(buf), f)) { 
-                // 
-                // Read errors presumably because of EOF 
-                // 
-                // If there is valid data in threadInfo[num_avail], then fake 
-                // a blank line in ensure that the last address gets parsed. 
-                // 
-                bool valid = false; 
-                for (i = 0; i <= maxIndex; i++) { 
-                    if (threadInfo[num_avail][i] != UINT_MAX) { 
-                        valid = true; 
-                    } 
-                } 
-                if (! valid) { 
-                    break; 
-                } 
-                buf[0] = 0; 
-            } else if (!buf[sizeof(buf) - 1]) { 
-                // 
-                // The line is longer than the buffer.  Set a flag and don't 
-                // emit an error if we were going to ignore the line, anyway. 
-                // 
-                long_line = true; 
- 
-#define CHECK_LINE \ 
-    if (long_line) {                                                    \ 
-        CLEANUP_THREAD_INFO;                                            \ 
-        *msg_id = kmp_i18n_str_LongLineCpuinfo;                         \ 
-        return -1;                                                      \ 
-    } 
-            } 
-            (*line)++; 
- 
-            char s1[] = "processor"; 
-            if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { 
-                CHECK_LINE; 
-                char *p = strchr(buf + sizeof(s1) - 1, ':'); 
-                unsigned val; 
-                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; 
-                if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field; 
-                threadInfo[num_avail][osIdIndex] = val; 
-#if KMP_OS_LINUX && USE_SYSFS_INFO 
-                char path[256]; 
-                KMP_SNPRINTF(path, sizeof(path), 
-                    "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", 
-                    threadInfo[num_avail][osIdIndex]); 
-                __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); 
- 
-                KMP_SNPRINTF(path, sizeof(path), 
-                    "/sys/devices/system/cpu/cpu%u/topology/core_id", 
-                    threadInfo[num_avail][osIdIndex]); 
-                __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); 
-                continue; 
-#else 
-            } 
-            char s2[] = "physical id"; 
-            if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { 
-                CHECK_LINE; 
-                char *p = strchr(buf + sizeof(s2) - 1, ':'); 
-                unsigned val; 
-                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; 
-                if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field; 
-                threadInfo[num_avail][pkgIdIndex] = val; 
-                continue; 
-            } 
-            char s3[] = "core id"; 
-            if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { 
-                CHECK_LINE; 
-                char *p = strchr(buf + sizeof(s3) - 1, ':'); 
-                unsigned val; 
-                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; 
-                if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field; 
-                threadInfo[num_avail][coreIdIndex] = val; 
-                continue; 
-#endif // KMP_OS_LINUX && USE_SYSFS_INFO 
-            } 
-            char s4[] = "thread id"; 
-            if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { 
-                CHECK_LINE; 
-                char *p = strchr(buf + sizeof(s4) - 1, ':'); 
-                unsigned val; 
-                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; 
-                if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field; 
-                threadInfo[num_avail][threadIdIndex] = val; 
-                continue; 
-            } 
-            unsigned level; 
-            if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { 
-                CHECK_LINE; 
-                char *p = strchr(buf + sizeof(s4) - 1, ':'); 
-                unsigned val; 
-                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; 
-                KMP_ASSERT(nodeIdIndex + level <= maxIndex); 
-                if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field; 
-                threadInfo[num_avail][nodeIdIndex + level] = val; 
-                continue; 
-            } 
- 
-            // 
-            // We didn't recognize the leading token on the line. 
-            // There are lots of leading tokens that we don't recognize - 
-            // if the line isn't empty, go on to the next line. 
-            // 
-            if ((*buf != 0) && (*buf != '\n')) { 
-                // 
-                // If the line is longer than the buffer, read characters 
-                // until we find a newline. 
-                // 
-                if (long_line) { 
-                    int ch; 
-                    while (((ch = fgetc(f)) != EOF) && (ch != '\n')); 
-                } 
-                continue; 
-            } 
- 
-            // 
-            // A newline has signalled the end of the processor record. 
-            // Check that there aren't too many procs specified. 
-            // 
-            if ((int)num_avail == __kmp_xproc) { 
-                CLEANUP_THREAD_INFO; 
-                *msg_id = kmp_i18n_str_TooManyEntries; 
-                return -1; 
-            } 
- 
-            // 
-            // Check for missing fields.  The osId field must be there, and we 
-            // currently require that the physical id field is specified, also. 
-            // 
-            if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { 
-                CLEANUP_THREAD_INFO; 
-                *msg_id = kmp_i18n_str_MissingProcField; 
-                return -1; 
-            } 
-            if (threadInfo[0][pkgIdIndex] == UINT_MAX) { 
-                CLEANUP_THREAD_INFO; 
-                *msg_id = kmp_i18n_str_MissingPhysicalIDField; 
-                return -1; 
-            } 
- 
-            // 
-            // Skip this proc if it is not included in the machine model. 
-            // 
-            if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) { 
-                INIT_PROC_INFO(threadInfo[num_avail]); 
-                continue; 
-            } 
- 
-            // 
-            // We have a successful parse of this proc's info. 
-            // Increment the counter, and prepare for the next proc. 
-            // 
-            num_avail++; 
-            KMP_ASSERT(num_avail <= num_records); 
-            INIT_PROC_INFO(threadInfo[num_avail]); 
-        } 
-        continue; 
- 
-        no_val: 
-        CLEANUP_THREAD_INFO; 
-        *msg_id = kmp_i18n_str_MissingValCpuinfo; 
-        return -1; 
- 
-        dup_field: 
-        CLEANUP_THREAD_INFO; 
-        *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; 
-        return -1; 
-    } 
-    *line = 0; 
- 
-# if KMP_MIC && REDUCE_TEAM_SIZE 
-    unsigned teamSize = 0; 
-# endif // KMP_MIC && REDUCE_TEAM_SIZE 
- 
-    // check for num_records == __kmp_xproc ??? 
- 
-    // 
-    // If there's only one thread context to bind to, form an Address object 
-    // with depth 1 and return immediately (or, if affinity is off, set 
-    // address2os to NULL and return). 
-    // 
-    // If it is configured to omit the package level when there is only a 
-    // single package, the logic at the end of this routine won't work if 
-    // there is only a single thread - it would try to form an Address 
-    // object with depth 0. 
-    // 
-    KMP_ASSERT(num_avail > 0); 
-    KMP_ASSERT(num_avail <= num_records); 
-    if (num_avail == 1) { 
-        __kmp_ncores = 1; 
-        __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; 
-        if (__kmp_affinity_verbose) { 
-            if (! KMP_AFFINITY_CAPABLE()) { 
-                KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); 
-                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-                KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            } 
-            else { 
-                char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-                __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-                  fullMask); 
-                KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); 
-                if (__kmp_affinity_respect_mask) { 
-                    KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-                } else { 
-                    KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-                } 
-                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-                KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-            } 
-            int index; 
-            kmp_str_buf_t buf; 
-            __kmp_str_buf_init(&buf); 
-            __kmp_str_buf_print(&buf, "1"); 
-            for (index = maxIndex - 1; index > pkgIdIndex; index--) { 
-                __kmp_str_buf_print(&buf, " x 1"); 
-            } 
-            KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1); 
-            __kmp_str_buf_free(&buf); 
-        } 
- 
-        if (__kmp_affinity_type == affinity_none) { 
-            CLEANUP_THREAD_INFO; 
-            return 0; 
-        } 
- 
-        *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); 
-        Address addr(1); 
-        addr.labels[0] = threadInfo[0][pkgIdIndex]; 
-        (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]); 
- 
-        if (__kmp_affinity_gran_levels < 0) { 
-            __kmp_affinity_gran_levels = 0; 
-        } 
- 
-        if (__kmp_affinity_verbose) { 
-            __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); 
-        } 
- 
-        CLEANUP_THREAD_INFO; 
-        return 1; 
-    } 
- 
-    // 
-    // Sort the threadInfo table by physical Id. 
-    // 
-    qsort(threadInfo, num_avail, sizeof(*threadInfo), 
-      __kmp_affinity_cmp_ProcCpuInfo_phys_id); 
- 
-    // 
-    // The table is now sorted by pkgId / coreId / threadId, but we really 
-    // don't know the radix of any of the fields.  pkgId's may be sparsely 
-    // assigned among the chips on a system.  Although coreId's are usually 
-    // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned 
-    // [0..threadsPerCore-1], we don't want to make any such assumptions. 
-    // 
-    // For that matter, we don't know what coresPerPkg and threadsPerCore 
-    // (or the total # packages) are at this point - we want to determine 
-    // that now.  We only have an upper bound on the first two figures. 
-    // 
-    unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1) 
-      * sizeof(unsigned)); 
-    unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1) 
-      * sizeof(unsigned)); 
-    unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1) 
-      * sizeof(unsigned)); 
-    unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1) 
-      * sizeof(unsigned)); 
- 
-    bool assign_thread_ids = false; 
-    unsigned threadIdCt; 
-    unsigned index; 
- 
-    restart_radix_check: 
-    threadIdCt = 0; 
- 
-    // 
-    // Initialize the counter arrays with data from threadInfo[0]. 
-    // 
-    if (assign_thread_ids) { 
-        if (threadInfo[0][threadIdIndex] == UINT_MAX) { 
-            threadInfo[0][threadIdIndex] = threadIdCt++; 
-        } 
-        else if (threadIdCt <= threadInfo[0][threadIdIndex]) { 
-            threadIdCt = threadInfo[0][threadIdIndex] + 1; 
-        } 
-    } 
-    for (index = 0; index <= maxIndex; index++) { 
-        counts[index] = 1; 
-        maxCt[index] = 1; 
-        totals[index] = 1; 
-        lastId[index] = threadInfo[0][index];; 
-    } 
- 
-    // 
-    // Run through the rest of the OS procs. 
-    // 
-    for (i = 1; i < num_avail; i++) { 
-        // 
-        // Find the most significant index whose id differs 
-        // from the id for the previous OS proc. 
-        // 
-        for (index = maxIndex; index >= threadIdIndex; index--) { 
-            if (assign_thread_ids && (index == threadIdIndex)) { 
-                // 
-                // Auto-assign the thread id field if it wasn't specified. 
-                // 
-                if (threadInfo[i][threadIdIndex] == UINT_MAX) { 
-                    threadInfo[i][threadIdIndex] = threadIdCt++; 
-                } 
- 
-                // 
-                // Aparrently the thread id field was specified for some 
-                // entries and not others.  Start the thread id counter 
-                // off at the next higher thread id. 
-                // 
-                else if (threadIdCt <= threadInfo[i][threadIdIndex]) { 
-                    threadIdCt = threadInfo[i][threadIdIndex] + 1; 
-                } 
-            } 
-            if (threadInfo[i][index] != lastId[index]) { 
-                // 
-                // Run through all indices which are less significant, 
-                // and reset the counts to 1. 
-                // 
-                // At all levels up to and including index, we need to 
-                // increment the totals and record the last id. 
-                // 
-                unsigned index2; 
-                for (index2 = threadIdIndex; index2 < index; index2++) { 
-                    totals[index2]++; 
-                    if (counts[index2] > maxCt[index2]) { 
-                        maxCt[index2] = counts[index2]; 
-                    } 
-                    counts[index2] = 1; 
-                    lastId[index2] = threadInfo[i][index2]; 
-                } 
-                counts[index]++; 
-                totals[index]++; 
-                lastId[index] = threadInfo[i][index]; 
- 
-                if (assign_thread_ids && (index > threadIdIndex)) { 
- 
-# if KMP_MIC && REDUCE_TEAM_SIZE 
-                    // 
-                    // The default team size is the total #threads in the machine 
-                    // minus 1 thread for every core that has 3 or more threads. 
-                    // 
-                    teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); 
-# endif // KMP_MIC && REDUCE_TEAM_SIZE 
- 
-                    // 
-                    // Restart the thread counter, as we are on a new core. 
-                    // 
-                    threadIdCt = 0; 
- 
-                    // 
-                    // Auto-assign the thread id field if it wasn't specified. 
-                    // 
-                    if (threadInfo[i][threadIdIndex] == UINT_MAX) { 
-                        threadInfo[i][threadIdIndex] = threadIdCt++; 
-                    } 
- 
-                    // 
-                    // Aparrently the thread id field was specified for some 
-                    // entries and not others.  Start the thread id counter 
-                    // off at the next higher thread id. 
-                    // 
-                    else if (threadIdCt <= threadInfo[i][threadIdIndex]) { 
-                        threadIdCt = threadInfo[i][threadIdIndex] + 1; 
-                    } 
-                } 
-                break; 
-            } 
-        } 
-        if (index < threadIdIndex) { 
-            // 
-            // If thread ids were specified, it is an error if they are not 
-            // unique.  Also, check that we waven't already restarted the 
-            // loop (to be safe - shouldn't need to). 
-            // 
-            if ((threadInfo[i][threadIdIndex] != UINT_MAX) 
-              || assign_thread_ids) { 
-                __kmp_free(lastId); 
-                __kmp_free(totals); 
-                __kmp_free(maxCt); 
-                __kmp_free(counts); 
-                CLEANUP_THREAD_INFO; 
-                *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; 
-                return -1; 
-            } 
- 
-            // 
-            // If the thread ids were not specified and we see entries 
-            // entries that are duplicates, start the loop over and 
-            // assign the thread ids manually. 
-            // 
-            assign_thread_ids = true; 
-            goto restart_radix_check; 
-        } 
-    } 
- 
-# if KMP_MIC && REDUCE_TEAM_SIZE 
-    // 
-    // The default team size is the total #threads in the machine 
-    // minus 1 thread for every core that has 3 or more threads. 
-    // 
-    teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); 
-# endif // KMP_MIC && REDUCE_TEAM_SIZE 
- 
-    for (index = threadIdIndex; index <= maxIndex; index++) { 
-        if (counts[index] > maxCt[index]) { 
-            maxCt[index] = counts[index]; 
-        } 
-    } 
- 
-    __kmp_nThreadsPerCore = maxCt[threadIdIndex]; 
-    nCoresPerPkg = maxCt[coreIdIndex]; 
-    nPackages = totals[pkgIdIndex]; 
- 
-    // 
-    // Check to see if the machine topology is uniform 
-    // 
-    unsigned prod = totals[maxIndex]; 
-    for (index = threadIdIndex; index < maxIndex; index++) { 
-       prod *= maxCt[index]; 
-    } 
-    bool uniform = (prod == totals[threadIdIndex]); 
- 
-    // 
-    // When affinity is off, this routine will still be called to set 
-    // __kmp_ncores, as well as __kmp_nThreadsPerCore, 
-    // nCoresPerPkg, & nPackages.  Make sure all these vars are set 
-    // correctly, and return now if affinity is not enabled. 
-    // 
-    __kmp_ncores = totals[coreIdIndex]; 
- 
-    if (__kmp_affinity_verbose) { 
-        if (! KMP_AFFINITY_CAPABLE()) { 
-                KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); 
-                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-                if (uniform) { 
-                    KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-                } else { 
-                    KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-                } 
-        } 
-        else { 
-            char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask); 
-                KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); 
-                if (__kmp_affinity_respect_mask) { 
-                    KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); 
-                } else { 
-                    KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); 
-                } 
-                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); 
-                if (uniform) { 
-                    KMP_INFORM(Uniform, "KMP_AFFINITY"); 
-                } else { 
-                    KMP_INFORM(NonUniform, "KMP_AFFINITY"); 
-                } 
-        } 
-        kmp_str_buf_t buf; 
-        __kmp_str_buf_init(&buf); 
- 
-        __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); 
-        for (index = maxIndex - 1; index >= pkgIdIndex; index--) { 
-            __kmp_str_buf_print(&buf, " x %d", maxCt[index]); 
-        } 
-        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str,  maxCt[coreIdIndex], 
-          maxCt[threadIdIndex], __kmp_ncores); 
- 
-        __kmp_str_buf_free(&buf); 
-    } 
- 
-# if KMP_MIC && REDUCE_TEAM_SIZE 
-    // 
-    // Set the default team size. 
-    // 
-    if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { 
-        __kmp_dflt_team_nth = teamSize; 
-        KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n", 
-          __kmp_dflt_team_nth)); 
-    } 
-# endif // KMP_MIC && REDUCE_TEAM_SIZE 
- 
-    if (__kmp_affinity_type == affinity_none) { 
-        __kmp_free(lastId); 
-        __kmp_free(totals); 
-        __kmp_free(maxCt); 
-        __kmp_free(counts); 
-        CLEANUP_THREAD_INFO; 
-        return 0; 
-    } 
- 
-    // 
-    // Count the number of levels which have more nodes at that level than 
-    // at the parent's level (with there being an implicit root node of 
-    // the top level).  This is equivalent to saying that there is at least 
-    // one node at this level which has a sibling.  These levels are in the 
-    // map, and the package level is always in the map. 
-    // 
-    bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool)); 
-    int level = 0; 
-    for (index = threadIdIndex; index < maxIndex; index++) { 
-        KMP_ASSERT(totals[index] >= totals[index + 1]); 
-        inMap[index] = (totals[index] > totals[index + 1]); 
-    } 
-    inMap[maxIndex] = (totals[maxIndex] > 1); 
-    inMap[pkgIdIndex] = true; 
- 
-    int depth = 0; 
-    for (index = threadIdIndex; index <= maxIndex; index++) { 
-        if (inMap[index]) { 
-            depth++; 
-        } 
-    } 
-    KMP_ASSERT(depth > 0); 
- 
-    // 
-    // Construct the data structure that is to be returned. 
-    // 
-    *address2os = (AddrUnsPair*) 
-      __kmp_allocate(sizeof(AddrUnsPair) * num_avail); 
-    int pkgLevel = -1; 
-    int coreLevel = -1; 
-    int threadLevel = -1; 
- 
-    for (i = 0; i < num_avail; ++i) { 
-        Address addr(depth); 
-        unsigned os = threadInfo[i][osIdIndex]; 
-        int src_index; 
-        int dst_index = 0; 
- 
-        for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { 
-            if (! inMap[src_index]) { 
-                continue; 
-            } 
-            addr.labels[dst_index] = threadInfo[i][src_index]; 
-            if (src_index == pkgIdIndex) { 
-                pkgLevel = dst_index; 
-            } 
-            else if (src_index == coreIdIndex) { 
-                coreLevel = dst_index; 
-            } 
-            else if (src_index == threadIdIndex) { 
-                threadLevel = dst_index; 
-            } 
-            dst_index++; 
-        } 
-        (*address2os)[i] = AddrUnsPair(addr, os); 
-    } 
- 
-    if (__kmp_affinity_gran_levels < 0) { 
-        // 
-        // Set the granularity level based on what levels are modeled 
-        // in the machine topology map. 
-        // 
-        unsigned src_index; 
-        __kmp_affinity_gran_levels = 0; 
-        for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) { 
-            if (! inMap[src_index]) { 
-                continue; 
-            } 
-            switch (src_index) { 
-                case threadIdIndex: 
-                if (__kmp_affinity_gran > affinity_gran_thread) { 
-                    __kmp_affinity_gran_levels++; 
-                } 
- 
-                break; 
-                case coreIdIndex: 
-                if (__kmp_affinity_gran > affinity_gran_core) { 
-                    __kmp_affinity_gran_levels++; 
-                } 
-                break; 
- 
-                case pkgIdIndex: 
-                if (__kmp_affinity_gran > affinity_gran_package) { 
-                    __kmp_affinity_gran_levels++; 
-                } 
-                break; 
-            } 
-        } 
-    } 
- 
-    if (__kmp_affinity_verbose) { 
-        __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, 
-          coreLevel, threadLevel); 
-    } 
- 
-    __kmp_free(inMap); 
-    __kmp_free(lastId); 
-    __kmp_free(totals); 
-    __kmp_free(maxCt); 
-    __kmp_free(counts); 
-    CLEANUP_THREAD_INFO; 
-    return depth; 
-} 
- 
- 
-// 
-// Create and return a table of affinity masks, indexed by OS thread ID. 
-// This routine handles OR'ing together all the affinity masks of threads 
-// that are sufficiently close, if granularity > fine. 
-// 
-static kmp_affin_mask_t * 
-__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique, 
-  AddrUnsPair *address2os, unsigned numAddrs) 
-{ 
-    // 
-    // First form a table of affinity masks in order of OS thread id. 
-    // 
-    unsigned depth; 
-    unsigned maxOsId; 
-    unsigned i; 
- 
-    KMP_ASSERT(numAddrs > 0); 
-    depth = address2os[0].first.depth; 
- 
-    maxOsId = 0; 
-    for (i = 0; i < numAddrs; i++) { 
-        unsigned osId = address2os[i].second; 
-        if (osId > maxOsId) { 
-            maxOsId = osId; 
-        } 
-    } 
-    kmp_affin_mask_t *osId2Mask; 
-    KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1)); 
- 
-    // 
-    // Sort the address2os table according to physical order.  Doing so 
-    // will put all threads on the same core/package/node in consecutive 
-    // locations. 
-    // 
-    qsort(address2os, numAddrs, sizeof(*address2os), 
-      __kmp_affinity_cmp_Address_labels); 
- 
-    KMP_ASSERT(__kmp_affinity_gran_levels >= 0); 
-    if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { 
-        KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY",  __kmp_affinity_gran_levels); 
-    } 
-    if (__kmp_affinity_gran_levels >= (int)depth) { 
-        if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-          && (__kmp_affinity_type != affinity_none))) { 
-            KMP_WARNING(AffThreadsMayMigrate); 
-        } 
-    } 
- 
-    // 
-    // Run through the table, forming the masks for all threads on each 
-    // core.  Threads on the same core will have identical "Address" 
-    // objects, not considering the last level, which must be the thread 
-    // id.  All threads on a core will appear consecutively. 
-    // 
-    unsigned unique = 0; 
-    unsigned j = 0;                             // index of 1st thread on core 
-    unsigned leader = 0; 
-    Address *leaderAddr = &(address2os[0].first); 
-    kmp_affin_mask_t *sum; 
-    KMP_CPU_ALLOC_ON_STACK(sum); 
-    KMP_CPU_ZERO(sum); 
-    KMP_CPU_SET(address2os[0].second, sum); 
-    for (i = 1; i < numAddrs; i++) { 
-        // 
-        // If this thread is sufficiently close to the leader (within the 
-        // granularity setting), then set the bit for this os thread in the 
-        // affinity mask for this group, and go on to the next thread. 
-        // 
-        if (leaderAddr->isClose(address2os[i].first, 
-          __kmp_affinity_gran_levels)) { 
-            KMP_CPU_SET(address2os[i].second, sum); 
-            continue; 
-        } 
- 
-        // 
-        // For every thread in this group, copy the mask to the thread's 
-        // entry in the osId2Mask table.  Mark the first address as a 
-        // leader. 
-        // 
-        for (; j < i; j++) { 
-            unsigned osId = address2os[j].second; 
-            KMP_DEBUG_ASSERT(osId <= maxOsId); 
-            kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); 
-            KMP_CPU_COPY(mask, sum); 
-            address2os[j].first.leader = (j == leader); 
-        } 
-        unique++; 
- 
-        // 
-        // Start a new mask. 
-        // 
-        leader = i; 
-        leaderAddr = &(address2os[i].first); 
-        KMP_CPU_ZERO(sum); 
-        KMP_CPU_SET(address2os[i].second, sum); 
-    } 
- 
-    // 
-    // For every thread in last group, copy the mask to the thread's 
-    // entry in the osId2Mask table. 
-    // 
-    for (; j < i; j++) { 
-        unsigned osId = address2os[j].second; 
-        KMP_DEBUG_ASSERT(osId <= maxOsId); 
-        kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); 
-        KMP_CPU_COPY(mask, sum); 
-        address2os[j].first.leader = (j == leader); 
-    } 
-    unique++; 
-    KMP_CPU_FREE_FROM_STACK(sum); 
- 
-    *maxIndex = maxOsId; 
-    *numUnique = unique; 
-    return osId2Mask; 
-} 
- 
- 
-// 
-// Stuff for the affinity proclist parsers.  It's easier to declare these vars 
-// as file-static than to try and pass them through the calling sequence of 
-// the recursive-descent OMP_PLACES parser. 
-// 
-static kmp_affin_mask_t *newMasks; 
-static int numNewMasks; 
-static int nextNewMask; 
- 
-#define ADD_MASK(_mask) \ 
-    {                                                                   \ 
-        if (nextNewMask >= numNewMasks) {                               \ 
-            int i;                                                      \ 
-            numNewMasks *= 2;                                           \ 
-            kmp_affin_mask_t* temp;                                     \ 
-            KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks);            \ 
-            for(i=0;i<numNewMasks/2;i++) {                              \ 
-                kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);    \ 
-                kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i);        \ 
-                KMP_CPU_COPY(dest, src);                                \ 
-            }                                                           \ 
-            KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2);       \ 
-            newMasks = temp;                                            \ 
-        }                                                               \ 
-        KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));    \ 
-        nextNewMask++;                                                  \ 
-    } 
- 
-#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \ 
-    {                                                                   \ 
-        if (((_osId) > _maxOsId) ||                                     \ 
-          (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 
-            if (__kmp_affinity_verbose || (__kmp_affinity_warnings      \ 
-              && (__kmp_affinity_type != affinity_none))) {             \ 
-                KMP_WARNING(AffIgnoreInvalidProcID, _osId);             \ 
-            }                                                           \ 
-        }                                                               \ 
-        else {                                                          \ 
-            ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId)));               \ 
-        }                                                               \ 
-    } 
- 
- 
-// 
-// Re-parse the proclist (for the explicit affinity type), and form the list 
-// of affinity newMasks indexed by gtid. 
-// 
-static void 
-__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, 
-  unsigned int *out_numMasks, const char *proclist, 
-  kmp_affin_mask_t *osId2Mask, int maxOsId) 
-{ 
-    int i; 
-    const char *scan = proclist; 
-    const char *next = proclist; 
- 
-    // 
-    // We use malloc() for the temporary mask vector, 
-    // so that we can use realloc() to extend it. 
-    // 
-    numNewMasks = 2; 
-    KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); 
-    nextNewMask = 0; 
-    kmp_affin_mask_t *sumMask; 
-    KMP_CPU_ALLOC(sumMask); 
-    int setSize = 0; 
- 
-    for (;;) { 
-        int start, end, stride; 
- 
-        SKIP_WS(scan); 
-        next = scan; 
-        if (*next == '\0') { 
-            break; 
-        } 
- 
-        if (*next == '{') { 
-            int num; 
-            setSize = 0; 
-            next++;     // skip '{' 
-            SKIP_WS(next); 
-            scan = next; 
- 
-            // 
-            // Read the first integer in the set. 
-            // 
-            KMP_ASSERT2((*next >= '0') && (*next <= '9'), 
-              "bad proclist"); 
-            SKIP_DIGITS(next); 
-            num = __kmp_str_to_int(scan, *next); 
-            KMP_ASSERT2(num >= 0, "bad explicit proc list"); 
- 
-            // 
-            // Copy the mask for that osId to the sum (union) mask. 
-            // 
-            if ((num > maxOsId) || 
-              (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { 
-                if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                  && (__kmp_affinity_type != affinity_none))) { 
-                    KMP_WARNING(AffIgnoreInvalidProcID, num); 
-                } 
-                KMP_CPU_ZERO(sumMask); 
-            } 
-            else { 
-                KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); 
-                setSize = 1; 
-            } 
- 
-            for (;;) { 
-                // 
-                // Check for end of set. 
-                // 
-                SKIP_WS(next); 
-                if (*next == '}') { 
-                    next++;     // skip '}' 
-                    break; 
-                } 
- 
-                // 
-                // Skip optional comma. 
-                // 
-                if (*next == ',') { 
-                    next++; 
-                } 
-                SKIP_WS(next); 
- 
-                // 
-                // Read the next integer in the set. 
-                // 
-                scan = next; 
-                KMP_ASSERT2((*next >= '0') && (*next <= '9'), 
-                  "bad explicit proc list"); 
- 
-                SKIP_DIGITS(next); 
-                num = __kmp_str_to_int(scan, *next); 
-                KMP_ASSERT2(num >= 0, "bad explicit proc list"); 
- 
-                // 
-                // Add the mask for that osId to the sum mask. 
-                // 
-                if ((num > maxOsId) || 
-                  (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { 
-                    if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                      && (__kmp_affinity_type != affinity_none))) { 
-                        KMP_WARNING(AffIgnoreInvalidProcID, num); 
-                    } 
-                } 
-                else { 
-                    KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); 
-                    setSize++; 
-                } 
-            } 
-            if (setSize > 0) { 
-                ADD_MASK(sumMask); 
-            } 
- 
-            SKIP_WS(next); 
-            if (*next == ',') { 
-                next++; 
-            } 
-            scan = next; 
-            continue; 
-        } 
- 
-        // 
-        // Read the first integer. 
-        // 
-        KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); 
-        SKIP_DIGITS(next); 
-        start = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT2(start >= 0, "bad explicit proc list"); 
-        SKIP_WS(next); 
- 
-        // 
-        // If this isn't a range, then add a mask to the list and go on. 
-        // 
-        if (*next != '-') { 
-            ADD_MASK_OSID(start, osId2Mask, maxOsId); 
- 
-            // 
-            // Skip optional comma. 
-            // 
-            if (*next == ',') { 
-                next++; 
-            } 
-            scan = next; 
-            continue; 
-        } 
- 
-        // 
-        // This is a range.  Skip over the '-' and read in the 2nd int. 
-        // 
-        next++;         // skip '-' 
-        SKIP_WS(next); 
-        scan = next; 
-        KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); 
-        SKIP_DIGITS(next); 
-        end = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT2(end >= 0, "bad explicit proc list"); 
- 
-        // 
-        // Check for a stride parameter 
-        // 
-        stride = 1; 
-        SKIP_WS(next); 
-        if (*next == ':') { 
-            // 
-            // A stride is specified.  Skip over the ':" and read the 3rd int. 
-            // 
-            int sign = +1; 
-            next++;         // skip ':' 
-            SKIP_WS(next); 
-            scan = next; 
-            if (*next == '-') { 
-                sign = -1; 
-                next++; 
-                SKIP_WS(next); 
-                scan = next; 
-            } 
-            KMP_ASSERT2((*next >=  '0') && (*next <= '9'), 
-              "bad explicit proc list"); 
-            SKIP_DIGITS(next); 
-            stride = __kmp_str_to_int(scan, *next); 
-            KMP_ASSERT2(stride >= 0, "bad explicit proc list"); 
-            stride *= sign; 
-        } 
- 
-        // 
-        // Do some range checks. 
-        // 
-        KMP_ASSERT2(stride != 0, "bad explicit proc list"); 
-        if (stride > 0) { 
-            KMP_ASSERT2(start <= end, "bad explicit proc list"); 
-        } 
-        else { 
-            KMP_ASSERT2(start >= end, "bad explicit proc list"); 
-        } 
-        KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); 
- 
-        // 
-        // Add the mask for each OS proc # to the list. 
-        // 
-        if (stride > 0) { 
-            do { 
-                ADD_MASK_OSID(start, osId2Mask, maxOsId); 
-                start += stride; 
-            } while (start <= end); 
-        } 
-        else { 
-            do { 
-                ADD_MASK_OSID(start, osId2Mask, maxOsId); 
-                start += stride; 
-            } while (start >= end); 
-        } 
- 
-        // 
-        // Skip optional comma. 
-        // 
-        SKIP_WS(next); 
-        if (*next == ',') { 
-            next++; 
-        } 
-        scan = next; 
-    } 
- 
-    *out_numMasks = nextNewMask; 
-    if (nextNewMask == 0) { 
-        *out_masks = NULL; 
-        KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); 
-        return; 
-    } 
-    KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); 
-    for(i = 0; i < nextNewMask; i++) { 
-        kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i); 
-        kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); 
-        KMP_CPU_COPY(dest, src); 
-    } 
-    KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); 
-    KMP_CPU_FREE(sumMask); 
-} 
- 
- 
-# if OMP_40_ENABLED 
- 
-/*----------------------------------------------------------------------------- 
- 
-Re-parse the OMP_PLACES proc id list, forming the newMasks for the different 
-places.  Again, Here is the grammar: 
- 
-place_list := place 
-place_list := place , place_list 
-place := num 
-place := place : num 
-place := place : num : signed 
-place := { subplacelist } 
-place := ! place                  // (lowest priority) 
-subplace_list := subplace 
-subplace_list := subplace , subplace_list 
-subplace := num 
-subplace := num : num 
-subplace := num : num : signed 
-signed := num 
-signed := + signed 
-signed := - signed 
- 
------------------------------------------------------------------------------*/ 
- 
-static void 
-__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask, 
-  int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) 
-{ 
-    const char *next; 
- 
-    for (;;) { 
-        int start, count, stride, i; 
- 
-        // 
-        // Read in the starting proc id 
-        // 
-        SKIP_WS(*scan); 
-        KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), 
-          "bad explicit places list"); 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        start = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(start >= 0); 
-        *scan = next; 
- 
-        // 
-        // valid follow sets are ',' ':' and '}' 
-        // 
-        SKIP_WS(*scan); 
-        if (**scan == '}' || **scan == ',') { 
-            if ((start > maxOsId) || 
-              (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { 
-                if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                  && (__kmp_affinity_type != affinity_none))) { 
-                    KMP_WARNING(AffIgnoreInvalidProcID, start); 
-                } 
-            } 
-            else { 
-                KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); 
-                (*setSize)++; 
-            } 
-            if (**scan == '}') { 
-                break; 
-            } 
-            (*scan)++;  // skip ',' 
-            continue; 
-        } 
-        KMP_ASSERT2(**scan == ':', "bad explicit places list"); 
-        (*scan)++;      // skip ':' 
- 
-        // 
-        // Read count parameter 
-        // 
-        SKIP_WS(*scan); 
-        KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), 
-          "bad explicit places list"); 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        count = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(count >= 0); 
-        *scan = next; 
- 
-        // 
-        // valid follow sets are ',' ':' and '}' 
-        // 
-        SKIP_WS(*scan); 
-        if (**scan == '}' || **scan == ',') { 
-            for (i = 0; i < count; i++) { 
-                if ((start > maxOsId) || 
-                  (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { 
-                    if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                      && (__kmp_affinity_type != affinity_none))) { 
-                        KMP_WARNING(AffIgnoreInvalidProcID, start); 
-                    } 
-                    break;  // don't proliferate warnings for large count 
-                } 
-                else { 
-                    KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); 
-                    start++; 
-                    (*setSize)++; 
-                } 
-            } 
-            if (**scan == '}') { 
-                break; 
-            } 
-            (*scan)++;  // skip ',' 
-            continue; 
-        } 
-        KMP_ASSERT2(**scan == ':', "bad explicit places list"); 
-        (*scan)++;      // skip ':' 
- 
-        // 
-        // Read stride parameter 
-        // 
-        int sign = +1; 
-        for (;;) { 
-            SKIP_WS(*scan); 
-            if (**scan == '+') { 
-                (*scan)++; // skip '+' 
-                continue; 
-            } 
-            if (**scan == '-') { 
-                sign *= -1; 
-                (*scan)++; // skip '-' 
-                continue; 
-            } 
-            break; 
-        } 
-        SKIP_WS(*scan); 
-        KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), 
-          "bad explicit places list"); 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        stride = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(stride >= 0); 
-        *scan = next; 
-        stride *= sign; 
- 
-        // 
-        // valid follow sets are ',' and '}' 
-        // 
-        SKIP_WS(*scan); 
-        if (**scan == '}' || **scan == ',') { 
-            for (i = 0; i < count; i++) { 
-                if ((start > maxOsId) || 
-                  (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { 
-                    if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                      && (__kmp_affinity_type != affinity_none))) { 
-                        KMP_WARNING(AffIgnoreInvalidProcID, start); 
-                    } 
-                    break;  // don't proliferate warnings for large count 
-                } 
-                else { 
-                    KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); 
-                    start += stride; 
-                    (*setSize)++; 
-                } 
-            } 
-            if (**scan == '}') { 
-                break; 
-            } 
-            (*scan)++;  // skip ',' 
-            continue; 
-        } 
- 
-        KMP_ASSERT2(0, "bad explicit places list"); 
-    } 
-} 
- 
- 
-static void 
-__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, 
-  int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) 
-{ 
-    const char *next; 
- 
-    // 
-    // valid follow sets are '{' '!' and num 
-    // 
-    SKIP_WS(*scan); 
-    if (**scan == '{') { 
-        (*scan)++;      // skip '{' 
-        __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask, 
-          setSize); 
-        KMP_ASSERT2(**scan == '}', "bad explicit places list"); 
-        (*scan)++;      // skip '}' 
-    } 
-    else if (**scan == '!') { 
-        (*scan)++;      // skip '!' 
-        __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); 
-        KMP_CPU_COMPLEMENT(maxOsId, tempMask); 
-    } 
-    else if ((**scan >= '0') && (**scan <= '9')) { 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        int num = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(num >= 0); 
-        if ((num > maxOsId) || 
-          (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { 
-            if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-              && (__kmp_affinity_type != affinity_none))) { 
-                KMP_WARNING(AffIgnoreInvalidProcID, num); 
-            } 
-        } 
-        else { 
-            KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); 
-            (*setSize)++; 
-        } 
-        *scan = next;  // skip num 
-    } 
-    else { 
-        KMP_ASSERT2(0, "bad explicit places list"); 
-    } 
-} 
- 
- 
-//static void 
-void 
-__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, 
-  unsigned int *out_numMasks, const char *placelist, 
-  kmp_affin_mask_t *osId2Mask, int maxOsId) 
-{ 
-    int i,j,count,stride,sign; 
-    const char *scan = placelist; 
-    const char *next = placelist; 
- 
-    numNewMasks = 2; 
-    KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); 
-    nextNewMask = 0; 
- 
-    // tempMask is modified based on the previous or initial 
-    //   place to form the current place 
-    // previousMask contains the previous place 
-    kmp_affin_mask_t *tempMask; 
-    kmp_affin_mask_t *previousMask; 
-    KMP_CPU_ALLOC(tempMask); 
-    KMP_CPU_ZERO(tempMask); 
-    KMP_CPU_ALLOC(previousMask); 
-    KMP_CPU_ZERO(previousMask); 
-    int setSize = 0; 
- 
-    for (;;) { 
-        __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); 
- 
-        // 
-        // valid follow sets are ',' ':' and EOL 
-        // 
-        SKIP_WS(scan); 
-        if (*scan == '\0' || *scan == ',') { 
-            if (setSize > 0) { 
-                ADD_MASK(tempMask); 
-            } 
-            KMP_CPU_ZERO(tempMask); 
-            setSize = 0; 
-            if (*scan == '\0') { 
-                break; 
-            } 
-            scan++;     // skip ',' 
-            continue; 
-        } 
- 
-        KMP_ASSERT2(*scan == ':', "bad explicit places list"); 
-        scan++;         // skip ':' 
- 
-        // 
-        // Read count parameter 
-        // 
-        SKIP_WS(scan); 
-        KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), 
-          "bad explicit places list"); 
-        next = scan; 
-        SKIP_DIGITS(next); 
-        count = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT(count >= 0); 
-        scan = next; 
- 
-        // 
-        // valid follow sets are ',' ':' and EOL 
-        // 
-        SKIP_WS(scan); 
-        if (*scan == '\0' || *scan == ',') { 
-            stride = +1; 
-        } 
-        else { 
-            KMP_ASSERT2(*scan == ':', "bad explicit places list"); 
-            scan++;         // skip ':' 
- 
-            // 
-            // Read stride parameter 
-            // 
-            sign = +1; 
-            for (;;) { 
-                SKIP_WS(scan); 
-                if (*scan == '+') { 
-                    scan++; // skip '+' 
-                    continue; 
-                } 
-                if (*scan == '-') { 
-                    sign *= -1; 
-                    scan++; // skip '-' 
-                    continue; 
-                } 
-                break; 
-            } 
-            SKIP_WS(scan); 
-            KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), 
-              "bad explicit places list"); 
-            next = scan; 
-            SKIP_DIGITS(next); 
-            stride = __kmp_str_to_int(scan, *next); 
-            KMP_DEBUG_ASSERT(stride >= 0); 
-            scan = next; 
-            stride *= sign; 
-        } 
- 
-        // Add places determined by initial_place : count : stride 
-        for (i = 0; i < count; i++) { 
-            if (setSize == 0) { 
-                break; 
-            } 
-            // Add the current place, then build the next place (tempMask) from that 
-            KMP_CPU_COPY(previousMask, tempMask); 
-            ADD_MASK(previousMask); 
-            KMP_CPU_ZERO(tempMask); 
-            setSize = 0; 
-            KMP_CPU_SET_ITERATE(j, previousMask) { 
-                if (! KMP_CPU_ISSET(j, previousMask)) { 
-                    continue; 
-                } 
-                else if ((j+stride > maxOsId) || (j+stride < 0) || 
-                  (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) { 
-                    if ((__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                      && (__kmp_affinity_type != affinity_none))) && i < count - 1) { 
-                        KMP_WARNING(AffIgnoreInvalidProcID, j+stride); 
-                    } 
-                } 
-                else { 
-                    KMP_CPU_SET(j+stride, tempMask); 
-                    setSize++; 
-                } 
-            } 
-        } 
-        KMP_CPU_ZERO(tempMask); 
-        setSize = 0; 
- 
-        // 
-        // valid follow sets are ',' and EOL 
-        // 
-        SKIP_WS(scan); 
-        if (*scan == '\0') { 
-            break; 
-        } 
-        if (*scan == ',') { 
-            scan++;     // skip ',' 
-            continue; 
-        } 
- 
-        KMP_ASSERT2(0, "bad explicit places list"); 
-    } 
- 
-    *out_numMasks = nextNewMask; 
-    if (nextNewMask == 0) { 
-        *out_masks = NULL; 
-        KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); 
-        return; 
-    } 
-    KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); 
-    KMP_CPU_FREE(tempMask); 
-    KMP_CPU_FREE(previousMask); 
-    for(i = 0; i < nextNewMask; i++) { 
-        kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i); 
-        kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); 
-        KMP_CPU_COPY(dest, src); 
-    } 
-    KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); 
-} 
- 
-# endif /* OMP_40_ENABLED */ 
- 
-#undef ADD_MASK 
-#undef ADD_MASK_OSID 
- 
-static void 
-__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) 
-{ 
-    if (__kmp_place_num_sockets == 0 && 
-        __kmp_place_num_cores == 0 && 
-        __kmp_place_num_threads_per_core == 0 ) 
-        return;   // no topology limiting actions requested, exit 
-    if (__kmp_place_num_sockets == 0) 
-        __kmp_place_num_sockets = nPackages;    // use all available sockets 
-    if (__kmp_place_num_cores == 0) 
-        __kmp_place_num_cores = nCoresPerPkg;   // use all available cores 
-    if (__kmp_place_num_threads_per_core == 0 || 
-        __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore) 
-        __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts 
- 
-    if ( !__kmp_affinity_uniform_topology() ) { 
-        KMP_WARNING( AffThrPlaceNonUniform ); 
-        return; // don't support non-uniform topology 
-    } 
-    if ( depth != 3 ) { 
-        KMP_WARNING( AffThrPlaceNonThreeLevel ); 
-        return; // don't support not-3-level topology 
-    } 
-    if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) { 
-        KMP_WARNING(AffThrPlaceManySockets); 
-        return; 
-    } 
-    if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) { 
-        KMP_WARNING( AffThrPlaceManyCores ); 
-        return; 
-    } 
- 
-    AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * 
-        __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); 
- 
-    int i, j, k, n_old = 0, n_new = 0; 
-    for (i = 0; i < nPackages; ++i) 
-        if (i < __kmp_place_socket_offset || 
-            i >= __kmp_place_socket_offset + __kmp_place_num_sockets) 
-            n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket 
-        else 
-            for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket 
-                if (j < __kmp_place_core_offset || 
-                    j >= __kmp_place_core_offset + __kmp_place_num_cores) 
-                    n_old += __kmp_nThreadsPerCore; // skip not-requested core 
-                else 
-                    for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core 
-                        if (k < __kmp_place_num_threads_per_core) { 
-                            newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data 
-                            n_new++; 
-                        } 
-                        n_old++; 
-                    } 
-    KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); 
-    KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores * 
-                     __kmp_place_num_threads_per_core); 
- 
-    nPackages = __kmp_place_num_sockets;                      // correct nPackages 
-    nCoresPerPkg = __kmp_place_num_cores;                     // correct nCoresPerPkg 
-    __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore 
-    __kmp_avail_proc = n_new;                                 // correct avail_proc 
-    __kmp_ncores = nPackages * __kmp_place_num_cores;         // correct ncores 
- 
-    __kmp_free( *pAddr ); 
-    *pAddr = newAddr;      // replace old topology with new one 
-} 
- 
- 
-static AddrUnsPair *address2os = NULL; 
-static int           * procarr = NULL; 
-static int     __kmp_aff_depth = 0; 
- 
-static void 
-__kmp_aux_affinity_initialize(void) 
-{ 
-    if (__kmp_affinity_masks != NULL) { 
-        KMP_ASSERT(fullMask != NULL); 
-        return; 
-    } 
- 
-    // 
-    // Create the "full" mask - this defines all of the processors that we 
-    // consider to be in the machine model.  If respect is set, then it is 
-    // the initialization thread's affinity mask.  Otherwise, it is all 
-    // processors that we know about on the machine. 
-    // 
-    if (fullMask == NULL) { 
-        KMP_CPU_ALLOC(fullMask); 
-    } 
-    if (KMP_AFFINITY_CAPABLE()) { 
-        if (__kmp_affinity_respect_mask) { 
-            __kmp_get_system_affinity(fullMask, TRUE); 
- 
-            // 
-            // Count the number of available processors. 
-            // 
-            unsigned i; 
-            __kmp_avail_proc = 0; 
-            KMP_CPU_SET_ITERATE(i, fullMask) { 
-                if (! KMP_CPU_ISSET(i, fullMask)) { 
-                    continue; 
-                } 
-                __kmp_avail_proc++; 
-            } 
-            if (__kmp_avail_proc > __kmp_xproc) { 
-                if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                  && (__kmp_affinity_type != affinity_none))) { 
-                    KMP_WARNING(ErrorInitializeAffinity); 
-                } 
-                __kmp_affinity_type = affinity_none; 
-                KMP_AFFINITY_DISABLE(); 
-                return; 
-            } 
-        } 
-        else { 
-            __kmp_affinity_entire_machine_mask(fullMask); 
-            __kmp_avail_proc = __kmp_xproc; 
-        } 
-    } 
- 
-    int depth = -1; 
-    kmp_i18n_id_t msg_id = kmp_i18n_null; 
- 
-    // 
-    // For backward compatibility, setting KMP_CPUINFO_FILE => 
-    // KMP_TOPOLOGY_METHOD=cpuinfo 
-    // 
-    if ((__kmp_cpuinfo_file != NULL) && 
-      (__kmp_affinity_top_method == affinity_top_method_all)) { 
-        __kmp_affinity_top_method = affinity_top_method_cpuinfo; 
-    } 
- 
-    if (__kmp_affinity_top_method == affinity_top_method_all) { 
-        // 
-        // In the default code path, errors are not fatal - we just try using 
-        // another method.  We only emit a warning message if affinity is on, 
-        // or the verbose flag is set, an the nowarnings flag was not set. 
-        // 
-        const char *file_name = NULL; 
-        int line = 0; 
-# if KMP_USE_HWLOC 
-        if (depth < 0) { 
-            if (__kmp_affinity_verbose) { 
-                KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); 
-            } 
-            if(!__kmp_hwloc_error) { 
-                depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); 
-                if (depth == 0) { 
-                    KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-                    KMP_ASSERT(address2os == NULL); 
-                    return; 
-                } else if(depth < 0 && __kmp_affinity_verbose) { 
-                    KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); 
-                } 
-            } else if(__kmp_affinity_verbose) { 
-                KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); 
-            } 
-        } 
-# endif 
- 
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-        if (depth < 0) { 
-            if (__kmp_affinity_verbose) { 
-                KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); 
-            } 
- 
-            file_name = NULL; 
-            depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); 
-            if (depth == 0) { 
-                KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-                KMP_ASSERT(address2os == NULL); 
-                return; 
-            } 
- 
-            if (depth < 0) { 
-                if (__kmp_affinity_verbose) { 
-                    if (msg_id != kmp_i18n_null) { 
-                        KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), 
-                          KMP_I18N_STR(DecodingLegacyAPIC)); 
-                    } 
-                    else { 
-                        KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); 
-                    } 
-                } 
- 
-                file_name = NULL; 
-                depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); 
-                if (depth == 0) { 
-                    KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-                    KMP_ASSERT(address2os == NULL); 
-                    return; 
-                } 
-            } 
-        } 
- 
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-# if KMP_OS_LINUX 
- 
-        if (depth < 0) { 
-            if (__kmp_affinity_verbose) { 
-                if (msg_id != kmp_i18n_null) { 
-                    KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); 
-                } 
-                else { 
-                    KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); 
-                } 
-            } 
- 
-            FILE *f = fopen("/proc/cpuinfo", "r"); 
-            if (f == NULL) { 
-                msg_id = kmp_i18n_str_CantOpenCpuinfo; 
-            } 
-            else { 
-                file_name = "/proc/cpuinfo"; 
-                depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); 
-                fclose(f); 
-                if (depth == 0) { 
-                    KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-                    KMP_ASSERT(address2os == NULL); 
-                    return; 
-                } 
-            } 
-        } 
- 
-# endif /* KMP_OS_LINUX */ 
- 
-# if KMP_GROUP_AFFINITY 
- 
-        if ((depth < 0) && (__kmp_num_proc_groups > 1)) { 
-            if (__kmp_affinity_verbose) { 
-                KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); 
-            } 
- 
-            depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); 
-            KMP_ASSERT(depth != 0); 
-        } 
- 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
-        if (depth < 0) { 
-            if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { 
-                if (file_name == NULL) { 
-                    KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); 
-                } 
-                else if (line == 0) { 
-                    KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); 
-                } 
-                else { 
-                    KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id)); 
-                } 
-            } 
-            // FIXME - print msg if msg_id = kmp_i18n_null ??? 
- 
-            file_name = ""; 
-            depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); 
-            if (depth == 0) { 
-                KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-                KMP_ASSERT(address2os == NULL); 
-                return; 
-            } 
-            KMP_ASSERT(depth > 0); 
-            KMP_ASSERT(address2os != NULL); 
-        } 
-    } 
- 
-    // 
-    // If the user has specified that a paricular topology discovery method 
-    // is to be used, then we abort if that method fails.  The exception is 
-    // group affinity, which might have been implicitly set. 
-    // 
- 
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-    else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffInfoStr, "KMP_AFFINITY", 
-              KMP_I18N_STR(Decodingx2APIC)); 
-        } 
- 
-        depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); 
-        if (depth == 0) { 
-            KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-            KMP_ASSERT(address2os == NULL); 
-            return; 
-        } 
-        if (depth < 0) { 
-            KMP_ASSERT(msg_id != kmp_i18n_null); 
-            KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); 
-        } 
-    } 
-    else if (__kmp_affinity_top_method == affinity_top_method_apicid) { 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffInfoStr, "KMP_AFFINITY", 
-              KMP_I18N_STR(DecodingLegacyAPIC)); 
-        } 
- 
-        depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); 
-        if (depth == 0) { 
-            KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-            KMP_ASSERT(address2os == NULL); 
-            return; 
-        } 
-        if (depth < 0) { 
-            KMP_ASSERT(msg_id != kmp_i18n_null); 
-            KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); 
-        } 
-    } 
- 
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-    else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { 
-        const char *filename; 
-        if (__kmp_cpuinfo_file != NULL) { 
-            filename = __kmp_cpuinfo_file; 
-        } 
-        else { 
-            filename = "/proc/cpuinfo"; 
-        } 
- 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); 
-        } 
- 
-        FILE *f = fopen(filename, "r"); 
-        if (f == NULL) { 
-            int code = errno; 
-            if (__kmp_cpuinfo_file != NULL) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG(CantOpenFileForReading, filename), 
-                    KMP_ERR(code), 
-                    KMP_HNT(NameComesFrom_CPUINFO_FILE), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            else { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG(CantOpenFileForReading, filename), 
-                    KMP_ERR(code), 
-                    __kmp_msg_null 
-                ); 
-            } 
-        } 
-        int line = 0; 
-        depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); 
-        fclose(f); 
-        if (depth < 0) { 
-            KMP_ASSERT(msg_id != kmp_i18n_null); 
-            if (line > 0) { 
-                KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)); 
-            } 
-            else { 
-                KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id)); 
-            } 
-        } 
-        if (__kmp_affinity_type == affinity_none) { 
-            KMP_ASSERT(depth == 0); 
-            KMP_ASSERT(address2os == NULL); 
-            return; 
-        } 
-    } 
- 
-# if KMP_GROUP_AFFINITY 
- 
-    else if (__kmp_affinity_top_method == affinity_top_method_group) { 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); 
-        } 
- 
-        depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); 
-        KMP_ASSERT(depth != 0); 
-        if (depth < 0) { 
-            KMP_ASSERT(msg_id != kmp_i18n_null); 
-            KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); 
-        } 
-    } 
- 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
-    else if (__kmp_affinity_top_method == affinity_top_method_flat) { 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY"); 
-        } 
- 
-        depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); 
-        if (depth == 0) { 
-            KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-            KMP_ASSERT(address2os == NULL); 
-            return; 
-        } 
-        // should not fail 
-        KMP_ASSERT(depth > 0); 
-        KMP_ASSERT(address2os != NULL); 
-    } 
- 
-# if KMP_USE_HWLOC 
-    else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { 
-        if (__kmp_affinity_verbose) { 
-            KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); 
-        } 
-        depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); 
-        if (depth == 0) { 
-            KMP_ASSERT(__kmp_affinity_type == affinity_none); 
-            KMP_ASSERT(address2os == NULL); 
-            return; 
-        } 
-#  if KMP_DEBUG 
-        AddrUnsPair *otheraddress2os = NULL; 
-        int otherdepth = -1; 
-#   if KMP_MIC 
-        otherdepth = __kmp_affinity_create_apicid_map(&otheraddress2os, &msg_id); 
-#   else 
-        otherdepth = __kmp_affinity_create_x2apicid_map(&otheraddress2os, &msg_id); 
-#   endif 
-        if(otheraddress2os != NULL && address2os != NULL) { 
-            int i; 
-            unsigned arent_equal_flag = 0; 
-            for(i=0;i<__kmp_avail_proc;i++) { 
-                if(otheraddress2os[i] != address2os[i]) arent_equal_flag = 1; 
-            } 
-            if(arent_equal_flag) { 
-                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are different from APICID\n")); 
-                KA_TRACE(10, ("__kmp_aux_affinity_initialize: APICID Table:\n")); 
-                for(i=0;i<__kmp_avail_proc;i++) { 
-                    otheraddress2os[i].print(); __kmp_printf("\n"); 
-                } 
-                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc Table:\n")); 
-                for(i=0;i<__kmp_avail_proc;i++) { 
-                    address2os[i].print(); __kmp_printf("\n"); 
-                } 
-            } 
-            else { 
-                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are same as APICID\n")); 
-            } 
-        } 
-#  endif // KMP_DEBUG 
-    } 
-# endif // KMP_USE_HWLOC 
- 
-    if (address2os == NULL) { 
-        if (KMP_AFFINITY_CAPABLE() 
-          && (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-          && (__kmp_affinity_type != affinity_none)))) { 
-            KMP_WARNING(ErrorInitializeAffinity); 
-        } 
-        __kmp_affinity_type = affinity_none; 
-        KMP_AFFINITY_DISABLE(); 
-        return; 
-    } 
- 
-    __kmp_apply_thread_places(&address2os, depth); 
- 
-    // 
-    // Create the table of masks, indexed by thread Id. 
-    // 
-    unsigned maxIndex; 
-    unsigned numUnique; 
-    kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique, 
-      address2os, __kmp_avail_proc); 
-    if (__kmp_affinity_gran_levels == 0) { 
-        KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); 
-    } 
- 
-    // 
-    // Set the childNums vector in all Address objects.  This must be done 
-    // before we can sort using __kmp_affinity_cmp_Address_child_num(), 
-    // which takes into account the setting of __kmp_affinity_compact. 
-    // 
-    __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); 
- 
-    switch (__kmp_affinity_type) { 
- 
-        case affinity_explicit: 
-        KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); 
-# if OMP_40_ENABLED 
-        if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) 
-# endif 
-        { 
-            __kmp_affinity_process_proclist(&__kmp_affinity_masks, 
-              &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, 
-              maxIndex); 
-        } 
-# if OMP_40_ENABLED 
-        else { 
-            __kmp_affinity_process_placelist(&__kmp_affinity_masks, 
-              &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, 
-              maxIndex); 
-        } 
-# endif 
-        if (__kmp_affinity_num_masks == 0) { 
-            if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-              && (__kmp_affinity_type != affinity_none))) { 
-                KMP_WARNING(AffNoValidProcID); 
-            } 
-            __kmp_affinity_type = affinity_none; 
-            return; 
-        } 
-        break; 
- 
-        // 
-        // The other affinity types rely on sorting the Addresses according 
-        // to some permutation of the machine topology tree.  Set 
-        // __kmp_affinity_compact and __kmp_affinity_offset appropriately, 
-        // then jump to a common code fragment to do the sort and create 
-        // the array of affinity masks. 
-        // 
- 
-        case affinity_logical: 
-        __kmp_affinity_compact = 0; 
-        if (__kmp_affinity_offset) { 
-            __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset 
-              % __kmp_avail_proc; 
-        } 
-        goto sortAddresses; 
- 
-        case affinity_physical: 
-        if (__kmp_nThreadsPerCore > 1) { 
-            __kmp_affinity_compact = 1; 
-            if (__kmp_affinity_compact >= depth) { 
-                __kmp_affinity_compact = 0; 
-            } 
-        } else { 
-            __kmp_affinity_compact = 0; 
-        } 
-        if (__kmp_affinity_offset) { 
-            __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset 
-              % __kmp_avail_proc; 
-        } 
-        goto sortAddresses; 
- 
-        case affinity_scatter: 
-        if (__kmp_affinity_compact >= depth) { 
-            __kmp_affinity_compact = 0; 
-        } 
-        else { 
-            __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; 
-        } 
-        goto sortAddresses; 
- 
-        case affinity_compact: 
-        if (__kmp_affinity_compact >= depth) { 
-            __kmp_affinity_compact = depth - 1; 
-        } 
-        goto sortAddresses; 
- 
-        case affinity_balanced: 
-        // Balanced works only for the case of a single package 
-        if( nPackages > 1 ) { 
-            if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { 
-                KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" ); 
-            } 
-            __kmp_affinity_type = affinity_none; 
-            return; 
-        } else if( __kmp_affinity_uniform_topology() ) { 
-            break; 
-        } else { // Non-uniform topology 
- 
-            // Save the depth for further usage 
-            __kmp_aff_depth = depth; 
- 
-            // Number of hyper threads per core in HT machine 
-            int nth_per_core = __kmp_nThreadsPerCore; 
- 
-            int core_level; 
-            if( nth_per_core > 1 ) { 
-                core_level = depth - 2; 
-            } else { 
-                core_level = depth - 1; 
-            } 
-            int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1; 
-            int nproc = nth_per_core * ncores; 
- 
-            procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); 
-            for( int i = 0; i < nproc; i++ ) { 
-                procarr[ i ] = -1; 
-            } 
- 
-            for( int i = 0; i < __kmp_avail_proc; i++ ) { 
-                int proc = address2os[ i ].second; 
-                // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread. 
-                // If there is only one thread per core then depth == 2: level 0 - package, 
-                // level 1 - core. 
-                int level = depth - 1; 
- 
-                // __kmp_nth_per_core == 1 
-                int thread = 0; 
-                int core = address2os[ i ].first.labels[ level ]; 
-                // If the thread level exists, that is we have more than one thread context per core 
-                if( nth_per_core > 1 ) { 
-                    thread = address2os[ i ].first.labels[ level ] % nth_per_core; 
-                    core = address2os[ i ].first.labels[ level - 1 ]; 
-                } 
-                procarr[ core * nth_per_core + thread ] = proc; 
-            } 
- 
-            break; 
-        } 
- 
-        sortAddresses: 
-        // 
-        // Allocate the gtid->affinity mask table. 
-        // 
-        if (__kmp_affinity_dups) { 
-            __kmp_affinity_num_masks = __kmp_avail_proc; 
-        } 
-        else { 
-            __kmp_affinity_num_masks = numUnique; 
-        } 
- 
-# if OMP_40_ENABLED 
-        if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) 
-          && ( __kmp_affinity_num_places > 0 ) 
-          && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) { 
-            __kmp_affinity_num_masks = __kmp_affinity_num_places; 
-        } 
-# endif 
- 
-        KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); 
- 
-        // 
-        // Sort the address2os table according to the current setting of 
-        // __kmp_affinity_compact, then fill out __kmp_affinity_masks. 
-        // 
-        qsort(address2os, __kmp_avail_proc, sizeof(*address2os), 
-          __kmp_affinity_cmp_Address_child_num); 
-        { 
-            int i; 
-            unsigned j; 
-            for (i = 0, j = 0; i < __kmp_avail_proc; i++) { 
-                if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) { 
-                    continue; 
-                } 
-                unsigned osId = address2os[i].second; 
-                kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); 
-                kmp_affin_mask_t *dest 
-                  = KMP_CPU_INDEX(__kmp_affinity_masks, j); 
-                KMP_ASSERT(KMP_CPU_ISSET(osId, src)); 
-                KMP_CPU_COPY(dest, src); 
-                if (++j >= __kmp_affinity_num_masks) { 
-                    break; 
-                } 
-            } 
-            KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); 
-        } 
-        break; 
- 
-        default: 
-        KMP_ASSERT2(0, "Unexpected affinity setting"); 
-    } 
- 
-    __kmp_free(osId2Mask); 
-    machine_hierarchy.init(address2os, __kmp_avail_proc); 
-} 
- 
- 
-void 
-__kmp_affinity_initialize(void) 
-{ 
-    // 
-    // Much of the code above was written assumming that if a machine was not 
-    // affinity capable, then __kmp_affinity_type == affinity_none.  We now 
-    // explicitly represent this as __kmp_affinity_type == affinity_disabled. 
-    // 
-    // There are too many checks for __kmp_affinity_type == affinity_none 
-    // in this code.  Instead of trying to change them all, check if 
-    // __kmp_affinity_type == affinity_disabled, and if so, slam it with 
-    // affinity_none, call the real initialization routine, then restore 
-    // __kmp_affinity_type to affinity_disabled. 
-    // 
-    int disabled = (__kmp_affinity_type == affinity_disabled); 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        KMP_ASSERT(disabled); 
-    } 
-    if (disabled) { 
-        __kmp_affinity_type = affinity_none; 
-    } 
-    __kmp_aux_affinity_initialize(); 
-    if (disabled) { 
-        __kmp_affinity_type = affinity_disabled; 
-    } 
-} 
- 
- 
-void 
-__kmp_affinity_uninitialize(void) 
-{ 
-    if (__kmp_affinity_masks != NULL) { 
-        KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); 
-        __kmp_affinity_masks = NULL; 
-    } 
-    if (fullMask != NULL) { 
-        KMP_CPU_FREE(fullMask); 
-        fullMask = NULL; 
-    } 
-    __kmp_affinity_num_masks = 0; 
-# if OMP_40_ENABLED 
-    __kmp_affinity_num_places = 0; 
-# endif 
-    if (__kmp_affinity_proclist != NULL) { 
-        __kmp_free(__kmp_affinity_proclist); 
-        __kmp_affinity_proclist = NULL; 
-    } 
-    if( address2os != NULL ) { 
-        __kmp_free( address2os ); 
-        address2os = NULL; 
-    } 
-    if( procarr != NULL ) { 
-        __kmp_free( procarr ); 
-        procarr = NULL; 
-    } 
-} 
- 
- 
-void 
-__kmp_affinity_set_init_mask(int gtid, int isa_root) 
-{ 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return; 
-    } 
- 
-    kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); 
-    if (th->th.th_affin_mask == NULL) { 
-        KMP_CPU_ALLOC(th->th.th_affin_mask); 
-    } 
-    else { 
-        KMP_CPU_ZERO(th->th.th_affin_mask); 
-    } 
- 
-    // 
-    // Copy the thread mask to the kmp_info_t strucuture. 
-    // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one 
-    // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask 
-    // is set, then the full mask is the same as the mask of the initialization 
-    // thread. 
-    // 
-    kmp_affin_mask_t *mask; 
-    int i; 
- 
-# if OMP_40_ENABLED 
-    if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) 
-# endif 
-    { 
-        if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced) 
-          ) { 
-# if KMP_GROUP_AFFINITY 
-            if (__kmp_num_proc_groups > 1) { 
-                return; 
-            } 
-# endif 
-            KMP_ASSERT(fullMask != NULL); 
-            i = KMP_PLACE_ALL; 
-            mask = fullMask; 
-        } 
-        else { 
-            KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); 
-            i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; 
-            mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); 
-        } 
-    } 
-# if OMP_40_ENABLED 
-    else { 
-        if ((! isa_root) 
-          || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { 
-#  if KMP_GROUP_AFFINITY 
-            if (__kmp_num_proc_groups > 1) { 
-                return; 
-            } 
-#  endif 
-            KMP_ASSERT(fullMask != NULL); 
-            i = KMP_PLACE_ALL; 
-            mask = fullMask; 
-        } 
-        else { 
-            // 
-            // int i = some hash function or just a counter that doesn't 
-            // always start at 0.  Use gtid for now. 
-            // 
-            KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); 
-            i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; 
-            mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); 
-        } 
-    } 
-# endif 
- 
-# if OMP_40_ENABLED 
-    th->th.th_current_place = i; 
-    if (isa_root) { 
-        th->th.th_new_place = i; 
-        th->th.th_first_place = 0; 
-        th->th.th_last_place = __kmp_affinity_num_masks - 1; 
-    } 
- 
-    if (i == KMP_PLACE_ALL) { 
-        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", 
-          gtid)); 
-    } 
-    else { 
-        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", 
-          gtid, i)); 
-    } 
-# else 
-    if (i == -1) { 
-        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n", 
-          gtid)); 
-    } 
-    else { 
-        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n", 
-          gtid, i)); 
-    } 
-# endif /* OMP_40_ENABLED */ 
- 
-    KMP_CPU_COPY(th->th.th_affin_mask, mask); 
- 
-    if (__kmp_affinity_verbose) { 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          th->th.th_affin_mask); 
-        KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid, 
-          buf); 
-    } 
- 
-# if KMP_OS_WINDOWS 
-    // 
-    // On Windows* OS, the process affinity mask might have changed. 
-    // If the user didn't request affinity and this call fails, 
-    // just continue silently.  See CQ171393. 
-    // 
-    if ( __kmp_affinity_type == affinity_none ) { 
-        __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); 
-    } 
-    else 
-# endif 
-    __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); 
-} 
- 
- 
-# if OMP_40_ENABLED 
- 
-void 
-__kmp_affinity_set_place(int gtid) 
-{ 
-    int retval; 
- 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return; 
-    } 
- 
-    kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); 
- 
-    KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n", 
-      gtid, th->th.th_new_place, th->th.th_current_place)); 
- 
-    // 
-    // Check that the new place is within this thread's partition. 
-    // 
-    KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); 
-    KMP_ASSERT(th->th.th_new_place >= 0); 
-    KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); 
-    if (th->th.th_first_place <= th->th.th_last_place) { 
-        KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) 
-         && (th->th.th_new_place <= th->th.th_last_place)); 
-    } 
-    else { 
-        KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) 
-         || (th->th.th_new_place >= th->th.th_last_place)); 
-    } 
- 
-    // 
-    // Copy the thread mask to the kmp_info_t strucuture, 
-    // and set this thread's affinity. 
-    // 
-    kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, 
-      th->th.th_new_place); 
-    KMP_CPU_COPY(th->th.th_affin_mask, mask); 
-    th->th.th_current_place = th->th.th_new_place; 
- 
-    if (__kmp_affinity_verbose) { 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          th->th.th_affin_mask); 
-        KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), 
-          gtid, buf); 
-    } 
-    __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); 
-} 
- 
-# endif /* OMP_40_ENABLED */ 
- 
- 
-int 
-__kmp_aux_set_affinity(void **mask) 
-{ 
-    int gtid; 
-    kmp_info_t *th; 
-    int retval; 
- 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return -1; 
-    } 
- 
-    gtid = __kmp_entry_gtid(); 
-    KA_TRACE(1000, ;{ 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          (kmp_affin_mask_t *)(*mask)); 
-        __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n", 
-          gtid, buf); 
-    }); 
- 
-    if (__kmp_env_consistency_check) { 
-        if ((mask == NULL) || (*mask == NULL)) { 
-            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 
-        } 
-        else { 
-            unsigned proc; 
-            int num_procs = 0; 
- 
-            KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) { 
-                if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) { 
-                    continue; 
-                } 
-                num_procs++; 
-                if (! KMP_CPU_ISSET(proc, fullMask)) { 
-                    KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 
-                    break; 
-                } 
-            } 
-            if (num_procs == 0) { 
-                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 
-            } 
- 
-# if KMP_GROUP_AFFINITY 
-            if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) { 
-                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 
-            } 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
-        } 
-    } 
- 
-    th = __kmp_threads[gtid]; 
-    KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); 
-    retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); 
-    if (retval == 0) { 
-        KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); 
-    } 
- 
-# if OMP_40_ENABLED 
-    th->th.th_current_place = KMP_PLACE_UNDEFINED; 
-    th->th.th_new_place = KMP_PLACE_UNDEFINED; 
-    th->th.th_first_place = 0; 
-    th->th.th_last_place = __kmp_affinity_num_masks - 1; 
- 
-    // 
-    // Turn off 4.0 affinity for the current tread at this parallel level. 
-    // 
-    th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; 
-# endif 
- 
-    return retval; 
-} 
- 
- 
-int 
-__kmp_aux_get_affinity(void **mask) 
-{ 
-    int gtid; 
-    int retval; 
-    kmp_info_t *th; 
- 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return -1; 
-    } 
- 
-    gtid = __kmp_entry_gtid(); 
-    th = __kmp_threads[gtid]; 
-    KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); 
- 
-    KA_TRACE(1000, ;{ 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          th->th.th_affin_mask); 
-        __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf); 
-    }); 
- 
-    if (__kmp_env_consistency_check) { 
-        if ((mask == NULL) || (*mask == NULL)) { 
-            KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity"); 
-        } 
-    } 
- 
-# if !KMP_OS_WINDOWS 
- 
-    retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); 
-    KA_TRACE(1000, ;{ 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          (kmp_affin_mask_t *)(*mask)); 
-        __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf); 
-    }); 
-    return retval; 
- 
-# else 
- 
-    KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); 
-    return 0; 
- 
-# endif /* KMP_OS_WINDOWS */ 
- 
-} 
- 
-int 
-__kmp_aux_set_affinity_mask_proc(int proc, void **mask) 
-{ 
-    int retval; 
- 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return -1; 
-    } 
- 
-    KA_TRACE(1000, ;{ 
-        int gtid = __kmp_entry_gtid(); 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          (kmp_affin_mask_t *)(*mask)); 
-        __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n", 
-          proc, gtid, buf); 
-    }); 
- 
-    if (__kmp_env_consistency_check) { 
-        if ((mask == NULL) || (*mask == NULL)) { 
-            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc"); 
-        } 
-    } 
- 
-    if ((proc < 0) 
-# if !KMP_USE_HWLOC 
-         || ((unsigned)proc >= KMP_CPU_SETSIZE) 
-# endif 
-       ) { 
-        return -1; 
-    } 
-    if (! KMP_CPU_ISSET(proc, fullMask)) { 
-        return -2; 
-    } 
- 
-    KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask)); 
-    return 0; 
-} 
- 
- 
-int 
-__kmp_aux_unset_affinity_mask_proc(int proc, void **mask) 
-{ 
-    int retval; 
- 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return -1; 
-    } 
- 
-    KA_TRACE(1000, ;{ 
-        int gtid = __kmp_entry_gtid(); 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          (kmp_affin_mask_t *)(*mask)); 
-        __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n", 
-          proc, gtid, buf); 
-    }); 
- 
-    if (__kmp_env_consistency_check) { 
-        if ((mask == NULL) || (*mask == NULL)) { 
-            KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc"); 
-        } 
-    } 
- 
-    if ((proc < 0) 
-# if !KMP_USE_HWLOC 
-         || ((unsigned)proc >= KMP_CPU_SETSIZE) 
-# endif 
-       ) { 
-        return -1; 
-    } 
-    if (! KMP_CPU_ISSET(proc, fullMask)) { 
-        return -2; 
-    } 
- 
-    KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask)); 
-    return 0; 
-} 
- 
- 
-int 
-__kmp_aux_get_affinity_mask_proc(int proc, void **mask) 
-{ 
-    int retval; 
- 
-    if (! KMP_AFFINITY_CAPABLE()) { 
-        return -1; 
-    } 
- 
-    KA_TRACE(1000, ;{ 
-        int gtid = __kmp_entry_gtid(); 
-        char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, 
-          (kmp_affin_mask_t *)(*mask)); 
-        __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n", 
-          proc, gtid, buf); 
-    }); 
- 
-    if (__kmp_env_consistency_check) { 
-        if ((mask == NULL) || (*mask == NULL)) { 
-            KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc"); 
-        } 
-    } 
- 
-    if ((proc < 0) 
-# if !KMP_USE_HWLOC 
-         || ((unsigned)proc >= KMP_CPU_SETSIZE) 
-# endif 
-       ) { 
-        return -1; 
-    } 
-    if (! KMP_CPU_ISSET(proc, fullMask)) { 
-        return 0; 
-    } 
- 
-    return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); 
-} 
- 
- 
-// Dynamic affinity settings - Affinity balanced 
-void __kmp_balanced_affinity( int tid, int nthreads ) 
-{ 
-    if( __kmp_affinity_uniform_topology() ) { 
-        int coreID; 
-        int threadID; 
-        // Number of hyper threads per core in HT machine 
-        int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; 
-        // Number of cores 
-        int ncores = __kmp_ncores; 
-        // How many threads will be bound to each core 
-        int chunk = nthreads / ncores; 
-        // How many cores will have an additional thread bound to it - "big cores" 
-        int big_cores = nthreads % ncores; 
-        // Number of threads on the big cores 
-        int big_nth = ( chunk + 1 ) * big_cores; 
-        if( tid < big_nth ) { 
-            coreID = tid / (chunk + 1 ); 
-            threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ; 
-        } else { //tid >= big_nth 
-            coreID = ( tid - big_cores ) / chunk; 
-            threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ; 
-        } 
- 
-        KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(), 
-          "Illegal set affinity operation when not capable"); 
- 
-        kmp_affin_mask_t *mask; 
-        KMP_CPU_ALLOC_ON_STACK(mask); 
-        KMP_CPU_ZERO(mask); 
- 
-        // Granularity == thread 
-        if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { 
-            int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second; 
-            KMP_CPU_SET( osID, mask); 
-        } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core 
-            for( int i = 0; i < __kmp_nth_per_core; i++ ) { 
-                int osID; 
-                osID = address2os[ coreID * __kmp_nth_per_core + i ].second; 
-                KMP_CPU_SET( osID, mask); 
-            } 
-        } 
-        if (__kmp_affinity_verbose) { 
-            char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); 
-            KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), 
-              tid, buf); 
-        } 
-        __kmp_set_system_affinity( mask, TRUE ); 
-        KMP_CPU_FREE_FROM_STACK(mask); 
-    } else { // Non-uniform topology 
- 
-        kmp_affin_mask_t *mask; 
-        KMP_CPU_ALLOC_ON_STACK(mask); 
-        KMP_CPU_ZERO(mask); 
- 
-        // Number of hyper threads per core in HT machine 
-        int nth_per_core = __kmp_nThreadsPerCore; 
-        int core_level; 
-        if( nth_per_core > 1 ) { 
-            core_level = __kmp_aff_depth - 2; 
-        } else { 
-            core_level = __kmp_aff_depth - 1; 
-        } 
- 
-        // Number of cores - maximum value; it does not count trail cores with 0 processors 
-        int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1; 
- 
-        // For performance gain consider the special case nthreads == __kmp_avail_proc 
-        if( nthreads == __kmp_avail_proc ) { 
-            if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { 
-                int osID = address2os[ tid ].second; 
-                KMP_CPU_SET( osID, mask); 
-            } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core 
-                int coreID = address2os[ tid ].first.labels[ core_level ]; 
-                // We'll count found osIDs for the current core; they can be not more than nth_per_core; 
-                // since the address2os is sortied we can break when cnt==nth_per_core 
-                int cnt = 0; 
-                for( int i = 0; i < __kmp_avail_proc; i++ ) { 
-                    int osID = address2os[ i ].second; 
-                    int core = address2os[ i ].first.labels[ core_level ]; 
-                    if( core == coreID ) { 
-                        KMP_CPU_SET( osID, mask); 
-                        cnt++; 
-                        if( cnt == nth_per_core ) { 
-                            break; 
-                        } 
-                    } 
-                } 
-            } 
-        } else if( nthreads <= __kmp_ncores ) { 
- 
-            int core = 0; 
-            for( int i = 0; i < ncores; i++ ) { 
-                // Check if this core from procarr[] is in the mask 
-                int in_mask = 0; 
-                for( int j = 0; j < nth_per_core; j++ ) { 
-                    if( procarr[ i * nth_per_core + j ] != - 1 ) { 
-                        in_mask = 1; 
-                        break; 
-                    } 
-                } 
-                if( in_mask ) { 
-                    if( tid == core ) { 
-                        for( int j = 0; j < nth_per_core; j++ ) { 
-                            int osID = procarr[ i * nth_per_core + j ]; 
-                            if( osID != -1 ) { 
-                                KMP_CPU_SET( osID, mask ); 
-                                // For granularity=thread it is enough to set the first available osID for this core 
-                                if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { 
-                                    break; 
-                                } 
-                            } 
-                        } 
-                        break; 
-                    } else { 
-                        core++; 
-                    } 
-                } 
-            } 
- 
-        } else { // nthreads > __kmp_ncores 
- 
-            // Array to save the number of processors at each core 
-            int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores); 
-            // Array to save the number of cores with "x" available processors; 
-            int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); 
-            // Array to save the number of cores with # procs from x to nth_per_core 
-            int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); 
- 
-            for( int i = 0; i <= nth_per_core; i++ ) { 
-                ncores_with_x_procs[ i ] = 0; 
-                ncores_with_x_to_max_procs[ i ] = 0; 
-            } 
- 
-            for( int i = 0; i < ncores; i++ ) { 
-                int cnt = 0; 
-                for( int j = 0; j < nth_per_core; j++ ) { 
-                    if( procarr[ i * nth_per_core + j ] != -1 ) { 
-                        cnt++; 
-                    } 
-                } 
-                nproc_at_core[ i ] = cnt; 
-                ncores_with_x_procs[ cnt ]++; 
-            } 
- 
-            for( int i = 0; i <= nth_per_core; i++ ) { 
-                for( int j = i; j <= nth_per_core; j++ ) { 
-                    ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ]; 
-                } 
-            } 
- 
-            // Max number of processors 
-            int nproc = nth_per_core * ncores; 
-            // An array to keep number of threads per each context 
-            int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); 
-            for( int i = 0; i < nproc; i++ ) { 
-                newarr[ i ] = 0; 
-            } 
- 
-            int nth = nthreads; 
-            int flag = 0; 
-            while( nth > 0 ) { 
-                for( int j = 1; j <= nth_per_core; j++ ) { 
-                    int cnt = ncores_with_x_to_max_procs[ j ]; 
-                    for( int i = 0; i < ncores; i++ ) { 
-                        // Skip the core with 0 processors 
-                        if( nproc_at_core[ i ] == 0 ) { 
-                            continue; 
-                        } 
-                        for( int k = 0; k < nth_per_core; k++ ) { 
-                            if( procarr[ i * nth_per_core + k ] != -1 ) { 
-                                if( newarr[ i * nth_per_core + k ] == 0 ) { 
-                                    newarr[ i * nth_per_core + k ] = 1; 
-                                    cnt--; 
-                                    nth--; 
-                                    break; 
-                                } else { 
-                                    if( flag != 0 ) { 
-                                        newarr[ i * nth_per_core + k ] ++; 
-                                        cnt--; 
-                                        nth--; 
-                                        break; 
-                                    } 
-                                } 
-                            } 
-                        } 
-                        if( cnt == 0 || nth == 0 ) { 
-                            break; 
-                        } 
-                    } 
-                    if( nth == 0 ) { 
-                        break; 
-                    } 
-                } 
-                flag = 1; 
-            } 
-            int sum = 0; 
-            for( int i = 0; i < nproc; i++ ) { 
-                sum += newarr[ i ]; 
-                if( sum > tid ) { 
-                    // Granularity == thread 
-                    if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) { 
-                        int osID = procarr[ i ]; 
-                        KMP_CPU_SET( osID, mask); 
-                    } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core 
-                        int coreID = i / nth_per_core; 
-                        for( int ii = 0; ii < nth_per_core; ii++ ) { 
-                            int osID = procarr[ coreID * nth_per_core + ii ]; 
-                            if( osID != -1 ) { 
-                                KMP_CPU_SET( osID, mask); 
-                            } 
-                        } 
-                    } 
-                    break; 
-                } 
-            } 
-            __kmp_free( newarr ); 
-        } 
- 
-        if (__kmp_affinity_verbose) { 
-            char buf[KMP_AFFIN_MASK_PRINT_LEN]; 
-            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); 
-            KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), 
-              tid, buf); 
-        } 
-        __kmp_set_system_affinity( mask, TRUE ); 
-        KMP_CPU_FREE_FROM_STACK(mask); 
-    } 
-} 
- 
-#endif // KMP_AFFINITY_SUPPORTED 
+/*
+ * kmp_affinity.cpp -- affinity management
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_str.h"
+#include "kmp_wrapper_getpid.h"
+#include "kmp_affinity.h"
+
+// Store the real or imagined machine hierarchy here
+static hierarchy_info machine_hierarchy;
+
+void __kmp_cleanup_hierarchy() {
+    machine_hierarchy.fini();
+}
+
+void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
+    kmp_uint32 depth;
+    // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
+    if (TCR_1(machine_hierarchy.uninitialized))
+        machine_hierarchy.init(NULL, nproc);
+
+    // Adjust the hierarchy in case num threads exceeds original
+    if (nproc > machine_hierarchy.base_num_threads)
+        machine_hierarchy.resize(nproc);
+
+    depth = machine_hierarchy.depth;
+    KMP_DEBUG_ASSERT(depth > 0);
+
+    thr_bar->depth = depth;
+    thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
+    thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
+}
+
+#if KMP_AFFINITY_SUPPORTED
+
+//
+// Print the affinity mask to the character array in a pretty format.
+//
+#if KMP_USE_HWLOC
+char *
+__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
+{
+    int num_chars_to_write, num_chars_written;
+    char* scan;
+    KMP_ASSERT(buf_len >= 40);
+
+    // bufsize of 0 just retrieves the needed buffer size.
+    num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
+
+    // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
+    // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
+    //   take into account the '\0' character.
+    if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
+        KMP_SNPRINTF(buf, buf_len, "{<empty>}");
+    } else if(num_chars_to_write < buf_len - 3) {
+        // no problem fitting the mask into buf_len number of characters
+        buf[0] = '{';
+        // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
+        num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
+        buf[num_chars_written+1] = '}';
+        buf[num_chars_written+2] = '\0';
+    } else {
+        // Need to truncate the affinity mask string and add ellipsis.
+        // To do this, we first write out the '{' + str(mask)
+        buf[0] = '{';
+        hwloc_bitmap_list_snprintf(buf+1, buf_len-7, (hwloc_bitmap_t)mask);
+        // then, what we do here is go to the 7th to last character, then go backwards until we are NOT
+        // on a digit then write "...}\0".  This way it is a clean ellipsis addition and we don't
+        // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
+        // { 45, 67,...} instead.
+        scan = buf + buf_len - 7;
+        while(*scan >= '0' && *scan <= '9' && scan >= buf)
+            scan--;
+        *(scan+1) = '.';
+        *(scan+2) = '.';
+        *(scan+3) = '.';
+        *(scan+4) = '}';
+        *(scan+5) = '\0';
+    }
+    return buf;
+}
+#else
+char *
+__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
+{
+    KMP_ASSERT(buf_len >= 40);
+    char *scan = buf;
+    char *end = buf + buf_len - 1;
+
+    //
+    // Find first element / check for empty set.
+    //
+    size_t i;
+    for (i = 0; i < KMP_CPU_SETSIZE; i++) {
+        if (KMP_CPU_ISSET(i, mask)) {
+            break;
+        }
+    }
+    if (i == KMP_CPU_SETSIZE) {
+        KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
+        while (*scan != '\0') scan++;
+        KMP_ASSERT(scan <= end);
+        return buf;
+    }
+
+    KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
+    while (*scan != '\0') scan++;
+    i++;
+    for (; i < KMP_CPU_SETSIZE; i++) {
+        if (! KMP_CPU_ISSET(i, mask)) {
+            continue;
+        }
+
+        //
+        // Check for buffer overflow.  A string of the form ",<n>" will have
+        // at most 10 characters, plus we want to leave room to print ",...}"
+        // if the set is too large to print for a total of 15 characters.
+        // We already left room for '\0' in setting end.
+        //
+        if (end - scan < 15) {
+           break;
+        }
+        KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
+        while (*scan != '\0') scan++;
+    }
+    if (i < KMP_CPU_SETSIZE) {
+        KMP_SNPRINTF(scan, end-scan+1,  ",...");
+        while (*scan != '\0') scan++;
+    }
+    KMP_SNPRINTF(scan, end-scan+1, "}");
+    while (*scan != '\0') scan++;
+    KMP_ASSERT(scan <= end);
+    return buf;
+}
+#endif // KMP_USE_HWLOC
+
+
+void
+__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
+{
+    KMP_CPU_ZERO(mask);
+
+# if KMP_GROUP_AFFINITY
+
+    if (__kmp_num_proc_groups > 1) {
+        int group;
+        KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
+        for (group = 0; group < __kmp_num_proc_groups; group++) {
+            int i;
+            int num = __kmp_GetActiveProcessorCount(group);
+            for (i = 0; i < num; i++) {
+                KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
+            }
+        }
+    }
+    else
+
+# endif /* KMP_GROUP_AFFINITY */
+
+    {
+        int proc;
+        for (proc = 0; proc < __kmp_xproc; proc++) {
+            KMP_CPU_SET(proc, mask);
+        }
+    }
+}
+
+//
+// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
+// called to renumber the labels from [0..n] and place them into the child_num
+// vector of the address object.  This is done in case the labels used for
+// the children at one node of the hierarchy differ from those used for
+// another node at the same level.  Example:  suppose the machine has 2 nodes
+// with 2 packages each.  The first node contains packages 601 and 602, and
+// second node contains packages 603 and 604.  If we try to sort the table
+// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
+// because we are paying attention to the labels themselves, not the ordinal
+// child numbers.  By using the child numbers in the sort, the result is
+// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
+//
+static void
+__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
+  int numAddrs)
+{
+    KMP_DEBUG_ASSERT(numAddrs > 0);
+    int depth = address2os->first.depth;
+    unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+    unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
+      * sizeof(unsigned));
+    int labCt;
+    for (labCt = 0; labCt < depth; labCt++) {
+        address2os[0].first.childNums[labCt] = counts[labCt] = 0;
+        lastLabel[labCt] = address2os[0].first.labels[labCt];
+    }
+    int i;
+    for (i = 1; i < numAddrs; i++) {
+        for (labCt = 0; labCt < depth; labCt++) {
+            if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
+                int labCt2;
+                for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
+                    counts[labCt2] = 0;
+                    lastLabel[labCt2] = address2os[i].first.labels[labCt2];
+                }
+                counts[labCt]++;
+                lastLabel[labCt] = address2os[i].first.labels[labCt];
+                break;
+            }
+        }
+        for (labCt = 0; labCt < depth; labCt++) {
+            address2os[i].first.childNums[labCt] = counts[labCt];
+        }
+        for (; labCt < (int)Address::maxDepth; labCt++) {
+            address2os[i].first.childNums[labCt] = 0;
+        }
+    }
+}
+
+
+//
+// All of the __kmp_affinity_create_*_map() routines should set
+// __kmp_affinity_masks to a vector of affinity mask objects of length
+// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
+// return the number of levels in the machine topology tree (zero if
+// __kmp_affinity_type == affinity_none).
+//
+// All of the __kmp_affinity_create_*_map() routines should set *fullMask
+// to the affinity mask for the initialization thread.  They need to save and
+// restore the mask, and it could be needed later, so saving it is just an
+// optimization to avoid calling kmp_get_system_affinity() again.
+//
+static kmp_affin_mask_t *fullMask = NULL;
+
+kmp_affin_mask_t *
+__kmp_affinity_get_fullMask() { return fullMask; }
+
+
+static int nCoresPerPkg, nPackages;
+static int __kmp_nThreadsPerCore;
+#ifndef KMP_DFLT_NTH_CORES
+static int __kmp_ncores;
+#endif
+
+//
+// __kmp_affinity_uniform_topology() doesn't work when called from
+// places which support arbitrarily many levels in the machine topology
+// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
+// __kmp_affinity_create_x2apicid_map().
+//
+inline static bool
+__kmp_affinity_uniform_topology()
+{
+    return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
+}
+
+
+//
+// Print out the detailed machine topology map, i.e. the physical locations
+// of each OS proc.
+//
+static void
+__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
+  int pkgLevel, int coreLevel, int threadLevel)
+{
+    int proc;
+
+    KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
+    for (proc = 0; proc < len; proc++) {
+        int level;
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init(&buf);
+        for (level = 0; level < depth; level++) {
+            if (level == threadLevel) {
+                __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
+            }
+            else if (level == coreLevel) {
+                __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
+            }
+            else if (level == pkgLevel) {
+                __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
+            }
+            else if (level > pkgLevel) {
+                __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
+                  level - pkgLevel - 1);
+            }
+            else {
+                __kmp_str_buf_print(&buf, "L%d ", level);
+            }
+            __kmp_str_buf_print(&buf, "%d ",
+              address2os[proc].first.labels[level]);
+        }
+        KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
+          buf.str);
+        __kmp_str_buf_free(&buf);
+    }
+}
+
+#if KMP_USE_HWLOC
+static int
+__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
+  kmp_i18n_id_t *const msg_id)
+{
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // Save the affinity mask for the current thread.
+    //
+    kmp_affin_mask_t *oldMask;
+    KMP_CPU_ALLOC(oldMask);
+    __kmp_get_system_affinity(oldMask, TRUE);
+
+    unsigned depth = hwloc_topology_get_depth(__kmp_hwloc_topology);
+    int threadLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_PU);
+    int coreLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
+    int pkgLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
+    __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 0;
+
+    //
+    // This makes an assumption about the topology being four levels:
+    // machines -> packages -> cores -> hardware threads
+    //
+    hwloc_obj_t current_level_iterator = hwloc_get_root_obj(__kmp_hwloc_topology);
+    hwloc_obj_t child_iterator;
+    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
+        child_iterator != NULL;
+        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
+    {
+        nPackages++;
+    }
+    current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, pkgLevel, 0);
+    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
+        child_iterator != NULL;
+        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
+    {
+        nCoresPerPkg++;
+    }
+    current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, coreLevel, 0);
+    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
+        child_iterator != NULL;
+        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
+    {
+        __kmp_nThreadsPerCore++;
+    }
+
+    if (! KMP_AFFINITY_CAPABLE())
+    {
+        //
+        // Hack to try and infer the machine topology using only the data
+        // available from cpuid on the current thread, and __kmp_xproc.
+        //
+        KMP_ASSERT(__kmp_affinity_type == affinity_none);
+
+        __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
+        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            if (__kmp_affinity_uniform_topology()) {
+                KMP_INFORM(Uniform, "KMP_AFFINITY");
+            } else {
+                KMP_INFORM(NonUniform, "KMP_AFFINITY");
+            }
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+        return 0;
+    }
+
+    //
+    // Allocate the data structure to be returned.
+    //
+    AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+
+    unsigned num_hardware_threads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel);
+    unsigned i;
+    hwloc_obj_t hardware_thread_iterator;
+    int nActiveThreads = 0;
+    for(i=0;i<num_hardware_threads;i++) {
+        hardware_thread_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, threadLevel, i);
+        Address addr(3);
+        if(! KMP_CPU_ISSET(i, fullMask)) continue;
+        addr.labels[0] = hardware_thread_iterator->parent->parent->logical_index;
+        addr.labels[1] = hardware_thread_iterator->parent->logical_index % nCoresPerPkg;
+        addr.labels[2] = hardware_thread_iterator->logical_index % __kmp_nThreadsPerCore;
+        retval[nActiveThreads] = AddrUnsPair(addr, hardware_thread_iterator->os_index);
+        nActiveThreads++;
+    }
+
+    //
+    // If there's only one thread context to bind to, return now.
+    //
+    KMP_ASSERT(nActiveThreads > 0);
+    if (nActiveThreads == 1) {
+        __kmp_ncores = nPackages = 1;
+        __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+        if (__kmp_affinity_verbose) {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+            KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+            if (__kmp_affinity_respect_mask) {
+                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+            } else {
+                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+            }
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+
+        if (__kmp_affinity_type == affinity_none) {
+            __kmp_free(retval);
+            KMP_CPU_FREE(oldMask);
+            return 0;
+        }
+
+        //
+        // Form an Address object which only includes the package level.
+        //
+        Address addr(1);
+        addr.labels[0] = retval[0].first.labels[pkgLevel-1];
+        retval[0].first = addr;
+
+        if (__kmp_affinity_gran_levels < 0) {
+            __kmp_affinity_gran_levels = 0;
+        }
+
+        if (__kmp_affinity_verbose) {
+            __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
+        }
+
+        *address2os = retval;
+        KMP_CPU_FREE(oldMask);
+        return 1;
+    }
+
+    //
+    // Sort the table by physical Id.
+    //
+    qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
+
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    // correctly, and return if affinity is not enabled.
+    //
+    __kmp_ncores = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, coreLevel);
+
+    //
+    // Check to see if the machine topology is uniform
+    //
+    unsigned npackages = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, pkgLevel);
+    unsigned ncores = __kmp_ncores;
+    unsigned nthreads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel);
+    unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
+
+    //
+    // Print the machine topology summary.
+    //
+    if (__kmp_affinity_verbose) {
+        char mask[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+        KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+        if (__kmp_affinity_respect_mask) {
+            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
+        } else {
+            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
+        }
+        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+        if (uniform) {
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+        } else {
+            KMP_INFORM(NonUniform, "KMP_AFFINITY");
+        }
+
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init(&buf);
+
+        __kmp_str_buf_print(&buf, "%d", npackages);
+        //for (level = 1; level <= pkgLevel; level++) {
+        //    __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
+       // }
+        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
+          __kmp_nThreadsPerCore, __kmp_ncores);
+
+        __kmp_str_buf_free(&buf);
+    }
+
+    if (__kmp_affinity_type == affinity_none) {
+        KMP_CPU_FREE(oldMask);
+        return 0;
+    }
+
+    //
+    // Find any levels with radiix 1, and remove them from the map
+    // (except for the package level).
+    //
+    int new_depth = 0;
+    int level;
+    unsigned proc;
+    for (level = 1; level < (int)depth; level++) {
+        if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) {
+           continue;
+        }
+        new_depth++;
+    }
+
+    //
+    // If we are removing any levels, allocate a new vector to return,
+    // and copy the relevant information to it.
+    //
+    if (new_depth != depth-1) {
+        AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
+          sizeof(AddrUnsPair) * nActiveThreads);
+        for (proc = 0; (int)proc < nActiveThreads; proc++) {
+            Address addr(new_depth);
+            new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
+        }
+        int new_level = 0;
+        for (level = 1; level < (int)depth; level++) {
+            if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) {
+               if (level == threadLevel) {
+                   threadLevel = -1;
+               }
+               else if ((threadLevel >= 0) && (level < threadLevel)) {
+                   threadLevel--;
+               }
+               if (level == coreLevel) {
+                   coreLevel = -1;
+               }
+               else if ((coreLevel >= 0) && (level < coreLevel)) {
+                   coreLevel--;
+               }
+               if (level < pkgLevel) {
+                   pkgLevel--;
+               }
+               continue;
+            }
+            for (proc = 0; (int)proc < nActiveThreads; proc++) {
+                new_retval[proc].first.labels[new_level]
+                  = retval[proc].first.labels[level];
+            }
+            new_level++;
+        }
+
+        __kmp_free(retval);
+        retval = new_retval;
+        depth = new_depth;
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        //
+        // Set the granularity level based on what levels are modeled
+        // in the machine topology map.
+        //
+        __kmp_affinity_gran_levels = 0;
+        if ((threadLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if ((coreLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if (__kmp_affinity_gran > affinity_gran_package) {
+            __kmp_affinity_gran_levels++;
+        }
+    }
+
+    if (__kmp_affinity_verbose) {
+        __kmp_affinity_print_topology(retval, nActiveThreads, depth-1, pkgLevel-1,
+          coreLevel-1, threadLevel-1);
+    }
+
+    KMP_CPU_FREE(oldMask);
+    *address2os = retval;
+    if(depth == 0) return 0;
+    else return depth-1;
+}
+#endif // KMP_USE_HWLOC
+
+//
+// If we don't know how to retrieve the machine's processor topology, or
+// encounter an error in doing so, this routine is called to form a "flat"
+// mapping of os thread id's <-> processor id's.
+//
+static int
+__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
+  kmp_i18n_id_t *const msg_id)
+{
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // Even if __kmp_affinity_type == affinity_none, this routine might still
+    // called to set __kmp_ncores, as well as
+    // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
+    //
+    if (! KMP_AFFINITY_CAPABLE()) {
+        KMP_ASSERT(__kmp_affinity_type == affinity_none);
+        __kmp_ncores = nPackages = __kmp_xproc;
+        __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+        return 0;
+    }
+
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    //  correctly, and return now if affinity is not enabled.
+    //
+    __kmp_ncores = nPackages = __kmp_avail_proc;
+    __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+    if (__kmp_affinity_verbose) {
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
+
+        KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
+        if (__kmp_affinity_respect_mask) {
+            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+        } else {
+            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+        }
+        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+        KMP_INFORM(Uniform, "KMP_AFFINITY");
+        KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+          __kmp_nThreadsPerCore, __kmp_ncores);
+    }
+    if (__kmp_affinity_type == affinity_none) {
+        return 0;
+    }
+
+    //
+    // Contruct the data structure to be returned.
+    //
+    *address2os = (AddrUnsPair*)
+      __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+    int avail_ct = 0;
+    unsigned int i;
+    KMP_CPU_SET_ITERATE(i, fullMask) {
+        //
+        // Skip this proc if it is not included in the machine model.
+        //
+        if (! KMP_CPU_ISSET(i, fullMask)) {
+            continue;
+        }
+
+        Address addr(1);
+        addr.labels[0] = i;
+        (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
+    }
+    if (__kmp_affinity_verbose) {
+        KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        //
+        // Only the package level is modeled in the machine topology map,
+        // so the #levels of granularity is either 0 or 1.
+        //
+        if (__kmp_affinity_gran > affinity_gran_package) {
+            __kmp_affinity_gran_levels = 1;
+        }
+        else {
+            __kmp_affinity_gran_levels = 0;
+        }
+    }
+    return 1;
+}
+
+
+# if KMP_GROUP_AFFINITY
+
+//
+// If multiple Windows* OS processor groups exist, we can create a 2-level
+// topology map with the groups at level 0 and the individual procs at
+// level 1.
+//
+// This facilitates letting the threads float among all procs in a group,
+// if granularity=group (the default when there are multiple groups).
+//
+static int
+__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
+  kmp_i18n_id_t *const msg_id)
+{
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // If we don't have multiple processor groups, return now.
+    // The flat mapping will be used.
+    //
+    if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
+        // FIXME set *msg_id
+        return -1;
+    }
+
+    //
+    // Contruct the data structure to be returned.
+    //
+    *address2os = (AddrUnsPair*)
+      __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+    int avail_ct = 0;
+    int i;
+    KMP_CPU_SET_ITERATE(i, fullMask) {
+        //
+        // Skip this proc if it is not included in the machine model.
+        //
+        if (! KMP_CPU_ISSET(i, fullMask)) {
+            continue;
+        }
+
+        Address addr(2);
+        addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
+        addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
+        (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
+
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
+              addr.labels[1]);
+        }
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        if (__kmp_affinity_gran == affinity_gran_group) {
+            __kmp_affinity_gran_levels = 1;
+        }
+        else if ((__kmp_affinity_gran == affinity_gran_fine)
+          || (__kmp_affinity_gran == affinity_gran_thread)) {
+            __kmp_affinity_gran_levels = 0;
+        }
+        else {
+            const char *gran_str = NULL;
+            if (__kmp_affinity_gran == affinity_gran_core) {
+                gran_str = "core";
+            }
+            else if (__kmp_affinity_gran == affinity_gran_package) {
+                gran_str = "package";
+            }
+            else if (__kmp_affinity_gran == affinity_gran_node) {
+                gran_str = "node";
+            }
+            else {
+                KMP_ASSERT(0);
+            }
+
+            // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
+            __kmp_affinity_gran_levels = 0;
+        }
+    }
+    return 2;
+}
+
+# endif /* KMP_GROUP_AFFINITY */
+
+
+# if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+static int
+__kmp_cpuid_mask_width(int count) {
+    int r = 0;
+
+    while((1<<r) < count)
+        ++r;
+    return r;
+}
+
+
+class apicThreadInfo {
+public:
+    unsigned osId;              // param to __kmp_affinity_bind_thread
+    unsigned apicId;            // from cpuid after binding
+    unsigned maxCoresPerPkg;    //      ""
+    unsigned maxThreadsPerPkg;  //      ""
+    unsigned pkgId;             // inferred from above values
+    unsigned coreId;            //      ""
+    unsigned threadId;          //      ""
+};
+
+
+static int
+__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
+{
+    const apicThreadInfo *aa = (const apicThreadInfo *)a;
+    const apicThreadInfo *bb = (const apicThreadInfo *)b;
+    if (aa->osId < bb->osId) return -1;
+    if (aa->osId > bb->osId) return 1;
+    return 0;
+}
+
+
+static int
+__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
+{
+    const apicThreadInfo *aa = (const apicThreadInfo *)a;
+    const apicThreadInfo *bb = (const apicThreadInfo *)b;
+    if (aa->pkgId < bb->pkgId) return -1;
+    if (aa->pkgId > bb->pkgId) return 1;
+    if (aa->coreId < bb->coreId) return -1;
+    if (aa->coreId > bb->coreId) return 1;
+    if (aa->threadId < bb->threadId) return -1;
+    if (aa->threadId > bb->threadId) return 1;
+    return 0;
+}
+
+
+//
+// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
+// an algorithm which cycles through the available os threads, setting
+// the current thread's affinity mask to that thread, and then retrieves
+// the Apic Id for each thread context using the cpuid instruction.
+//
+static int
+__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
+  kmp_i18n_id_t *const msg_id)
+{
+    kmp_cpuid buf;
+    int rc;
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // Check if cpuid leaf 4 is supported.
+    //
+        __kmp_x86_cpuid(0, 0, &buf);
+        if (buf.eax < 4) {
+            *msg_id = kmp_i18n_str_NoLeaf4Support;
+            return -1;
+        }
+
+    //
+    // The algorithm used starts by setting the affinity to each available
+    // thread and retrieving info from the cpuid instruction, so if we are
+    // not capable of calling __kmp_get_system_affinity() and
+    // _kmp_get_system_affinity(), then we need to do something else - use
+    // the defaults that we calculated from issuing cpuid without binding
+    // to each proc.
+    //
+    if (! KMP_AFFINITY_CAPABLE()) {
+        //
+        // Hack to try and infer the machine topology using only the data
+        // available from cpuid on the current thread, and __kmp_xproc.
+        //
+        KMP_ASSERT(__kmp_affinity_type == affinity_none);
+
+        //
+        // Get an upper bound on the number of threads per package using
+        // cpuid(1).
+        //
+        // On some OS/chps combinations where HT is supported by the chip
+        // but is disabled, this value will be 2 on a single core chip.
+        // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
+        //
+        __kmp_x86_cpuid(1, 0, &buf);
+        int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
+        if (maxThreadsPerPkg == 0) {
+            maxThreadsPerPkg = 1;
+        }
+
+        //
+        // The num cores per pkg comes from cpuid(4).
+        // 1 must be added to the encoded value.
+        //
+        // The author of cpu_count.cpp treated this only an upper bound
+        // on the number of cores, but I haven't seen any cases where it
+        // was greater than the actual number of cores, so we will treat
+        // it as exact in this block of code.
+        //
+        // First, we need to check if cpuid(4) is supported on this chip.
+        // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
+        // has the value n or greater.
+        //
+        __kmp_x86_cpuid(0, 0, &buf);
+        if (buf.eax >= 4) {
+            __kmp_x86_cpuid(4, 0, &buf);
+            nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
+        }
+        else {
+            nCoresPerPkg = 1;
+        }
+
+        //
+        // There is no way to reliably tell if HT is enabled without issuing
+        // the cpuid instruction from every thread, can correlating the cpuid
+        // info, so if the machine is not affinity capable, we assume that HT
+        // is off.  We have seen quite a few machines where maxThreadsPerPkg
+        // is 2, yet the machine does not support HT.
+        //
+        // - Older OSes are usually found on machines with older chips, which
+        //   do not support HT.
+        //
+        // - The performance penalty for mistakenly identifying a machine as
+        //   HT when it isn't (which results in blocktime being incorrecly set
+        //   to 0) is greater than the penalty when for mistakenly identifying
+        //   a machine as being 1 thread/core when it is really HT enabled
+        //   (which results in blocktime being incorrectly set to a positive
+        //   value).
+        //
+        __kmp_ncores = __kmp_xproc;
+        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+        __kmp_nThreadsPerCore = 1;
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            if (__kmp_affinity_uniform_topology()) {
+                KMP_INFORM(Uniform, "KMP_AFFINITY");
+            } else {
+                KMP_INFORM(NonUniform, "KMP_AFFINITY");
+            }
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+        return 0;
+    }
+
+    //
+    //
+    // From here on, we can assume that it is safe to call
+    // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
+    // even if __kmp_affinity_type = affinity_none.
+    //
+
+    //
+    // Save the affinity mask for the current thread.
+    //
+    kmp_affin_mask_t *oldMask;
+    KMP_CPU_ALLOC(oldMask);
+    KMP_ASSERT(oldMask != NULL);
+    __kmp_get_system_affinity(oldMask, TRUE);
+
+    //
+    // Run through each of the available contexts, binding the current thread
+    // to it, and obtaining the pertinent information using the cpuid instr.
+    //
+    // The relevant information is:
+    //
+    // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
+    //    has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
+    //
+    // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1).  The
+    //    value of this field determines the width of the core# + thread#
+    //    fields in the Apic Id.  It is also an upper bound on the number
+    //    of threads per package, but it has been verified that situations
+    //    happen were it is not exact.  In particular, on certain OS/chip
+    //    combinations where Intel(R) Hyper-Threading Technology is supported
+    //    by the chip but has
+    //    been disabled, the value of this field will be 2 (for a single core
+    //    chip).  On other OS/chip combinations supporting
+    //    Intel(R) Hyper-Threading Technology, the value of
+    //    this field will be 1 when Intel(R) Hyper-Threading Technology is
+    //    disabled and 2 when it is enabled.
+    //
+    // Max Cores Per Pkg:  Bits 26:31 of eax after issuing cpuid(4).  The
+    //    value of this field (+1) determines the width of the core# field in
+    //    the Apic Id.  The comments in "cpucount.cpp" say that this value is
+    //    an upper bound, but the IA-32 architecture manual says that it is
+    //    exactly the number of cores per package, and I haven't seen any
+    //    case where it wasn't.
+    //
+    // From this information, deduce the package Id, core Id, and thread Id,
+    // and set the corresponding fields in the apicThreadInfo struct.
+    //
+    unsigned i;
+    apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
+      __kmp_avail_proc * sizeof(apicThreadInfo));
+    unsigned nApics = 0;
+    KMP_CPU_SET_ITERATE(i, fullMask) {
+        //
+        // Skip this proc if it is not included in the machine model.
+        //
+        if (! KMP_CPU_ISSET(i, fullMask)) {
+            continue;
+        }
+        KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
+
+        __kmp_affinity_bind_thread(i);
+        threadInfo[nApics].osId = i;
+
+        //
+        // The apic id and max threads per pkg come from cpuid(1).
+        //
+        __kmp_x86_cpuid(1, 0, &buf);
+        if (! (buf.edx >> 9) & 1) {
+            __kmp_set_system_affinity(oldMask, TRUE);
+            __kmp_free(threadInfo);
+            KMP_CPU_FREE(oldMask);
+            *msg_id = kmp_i18n_str_ApicNotPresent;
+            return -1;
+        }
+        threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
+        threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
+        if (threadInfo[nApics].maxThreadsPerPkg == 0) {
+            threadInfo[nApics].maxThreadsPerPkg = 1;
+        }
+
+        //
+        // Max cores per pkg comes from cpuid(4).
+        // 1 must be added to the encoded value.
+        //
+        // First, we need to check if cpuid(4) is supported on this chip.
+        // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
+        // has the value n or greater.
+        //
+        __kmp_x86_cpuid(0, 0, &buf);
+        if (buf.eax >= 4) {
+            __kmp_x86_cpuid(4, 0, &buf);
+            threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
+        }
+        else {
+            threadInfo[nApics].maxCoresPerPkg = 1;
+        }
+
+        //
+        // Infer the pkgId / coreId / threadId using only the info
+        // obtained locally.
+        //
+        int widthCT = __kmp_cpuid_mask_width(
+          threadInfo[nApics].maxThreadsPerPkg);
+        threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
+
+        int widthC = __kmp_cpuid_mask_width(
+          threadInfo[nApics].maxCoresPerPkg);
+        int widthT = widthCT - widthC;
+        if (widthT < 0) {
+            //
+            // I've never seen this one happen, but I suppose it could, if
+            // the cpuid instruction on a chip was really screwed up.
+            // Make sure to restore the affinity mask before the tail call.
+            //
+            __kmp_set_system_affinity(oldMask, TRUE);
+            __kmp_free(threadInfo);
+            KMP_CPU_FREE(oldMask);
+            *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+            return -1;
+        }
+
+        int maskC = (1 << widthC) - 1;
+        threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
+          &maskC;
+
+        int maskT = (1 << widthT) - 1;
+        threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
+
+        nApics++;
+    }
+
+    //
+    // We've collected all the info we need.
+    // Restore the old affinity mask for this thread.
+    //
+    __kmp_set_system_affinity(oldMask, TRUE);
+
+    //
+    // If there's only one thread context to bind to, form an Address object
+    // with depth 1 and return immediately (or, if affinity is off, set
+    // address2os to NULL and return).
+    //
+    // If it is configured to omit the package level when there is only a
+    // single package, the logic at the end of this routine won't work if
+    // there is only a single thread - it would try to form an Address
+    // object with depth 0.
+    //
+    KMP_ASSERT(nApics > 0);
+    if (nApics == 1) {
+        __kmp_ncores = nPackages = 1;
+        __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+        if (__kmp_affinity_verbose) {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+            KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
+            if (__kmp_affinity_respect_mask) {
+                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+            } else {
+                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+            }
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+
+        if (__kmp_affinity_type == affinity_none) {
+            __kmp_free(threadInfo);
+            KMP_CPU_FREE(oldMask);
+            return 0;
+        }
+
+        *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
+        Address addr(1);
+        addr.labels[0] = threadInfo[0].pkgId;
+        (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
+
+        if (__kmp_affinity_gran_levels < 0) {
+            __kmp_affinity_gran_levels = 0;
+        }
+
+        if (__kmp_affinity_verbose) {
+            __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
+        }
+
+        __kmp_free(threadInfo);
+        KMP_CPU_FREE(oldMask);
+        return 1;
+    }
+
+    //
+    // Sort the threadInfo table by physical Id.
+    //
+    qsort(threadInfo, nApics, sizeof(*threadInfo),
+      __kmp_affinity_cmp_apicThreadInfo_phys_id);
+
+    //
+    // The table is now sorted by pkgId / coreId / threadId, but we really
+    // don't know the radix of any of the fields.  pkgId's may be sparsely
+    // assigned among the chips on a system.  Although coreId's are usually
+    // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
+    // [0..threadsPerCore-1], we don't want to make any such assumptions.
+    //
+    // For that matter, we don't know what coresPerPkg and threadsPerCore
+    // (or the total # packages) are at this point - we want to determine
+    // that now.  We only have an upper bound on the first two figures.
+    //
+    // We also perform a consistency check at this point: the values returned
+    // by the cpuid instruction for any thread bound to a given package had
+    // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
+    //
+    nPackages = 1;
+    nCoresPerPkg = 1;
+    __kmp_nThreadsPerCore = 1;
+    unsigned nCores = 1;
+
+    unsigned pkgCt = 1;                         // to determine radii
+    unsigned lastPkgId = threadInfo[0].pkgId;
+    unsigned coreCt = 1;
+    unsigned lastCoreId = threadInfo[0].coreId;
+    unsigned threadCt = 1;
+    unsigned lastThreadId = threadInfo[0].threadId;
+
+                                                // intra-pkg consist checks
+    unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
+    unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
+
+    for (i = 1; i < nApics; i++) {
+        if (threadInfo[i].pkgId != lastPkgId) {
+            nCores++;
+            pkgCt++;
+            lastPkgId = threadInfo[i].pkgId;
+            if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
+            coreCt = 1;
+            lastCoreId = threadInfo[i].coreId;
+            if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
+            threadCt = 1;
+            lastThreadId = threadInfo[i].threadId;
+
+            //
+            // This is a different package, so go on to the next iteration
+            // without doing any consistency checks.  Reset the consistency
+            // check vars, though.
+            //
+            prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
+            prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
+            continue;
+        }
+
+        if (threadInfo[i].coreId != lastCoreId) {
+            nCores++;
+            coreCt++;
+            lastCoreId = threadInfo[i].coreId;
+            if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
+            threadCt = 1;
+            lastThreadId = threadInfo[i].threadId;
+        }
+        else if (threadInfo[i].threadId != lastThreadId) {
+            threadCt++;
+            lastThreadId = threadInfo[i].threadId;
+        }
+        else {
+            __kmp_free(threadInfo);
+            KMP_CPU_FREE(oldMask);
+            *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
+            return -1;
+        }
+
+        //
+        // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
+        // fields agree between all the threads bounds to a given package.
+        //
+        if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
+          || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
+            __kmp_free(threadInfo);
+            KMP_CPU_FREE(oldMask);
+            *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
+            return -1;
+        }
+    }
+    nPackages = pkgCt;
+    if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
+    if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
+
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    // correctly, and return now if affinity is not enabled.
+    //
+    __kmp_ncores = nCores;
+    if (__kmp_affinity_verbose) {
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+        KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
+        if (__kmp_affinity_respect_mask) {
+            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+        } else {
+            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+        }
+        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+        if (__kmp_affinity_uniform_topology()) {
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+        } else {
+            KMP_INFORM(NonUniform, "KMP_AFFINITY");
+        }
+        KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+          __kmp_nThreadsPerCore, __kmp_ncores);
+
+    }
+
+    if (__kmp_affinity_type == affinity_none) {
+        __kmp_free(threadInfo);
+        KMP_CPU_FREE(oldMask);
+        return 0;
+    }
+
+    //
+    // Now that we've determined the number of packages, the number of cores
+    // per package, and the number of threads per core, we can construct the
+    // data structure that is to be returned.
+    //
+    int pkgLevel = 0;
+    int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
+    int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
+    unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
+
+    KMP_ASSERT(depth > 0);
+    *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
+
+    for (i = 0; i < nApics; ++i) {
+        Address addr(depth);
+        unsigned os = threadInfo[i].osId;
+        int d = 0;
+
+        if (pkgLevel >= 0) {
+            addr.labels[d++] = threadInfo[i].pkgId;
+        }
+        if (coreLevel >= 0) {
+            addr.labels[d++] = threadInfo[i].coreId;
+        }
+        if (threadLevel >= 0) {
+            addr.labels[d++] = threadInfo[i].threadId;
+        }
+        (*address2os)[i] = AddrUnsPair(addr, os);
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        //
+        // Set the granularity level based on what levels are modeled
+        // in the machine topology map.
+        //
+        __kmp_affinity_gran_levels = 0;
+        if ((threadLevel >= 0)
+          && (__kmp_affinity_gran > affinity_gran_thread)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
+            __kmp_affinity_gran_levels++;
+        }
+    }
+
+    if (__kmp_affinity_verbose) {
+        __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
+          coreLevel, threadLevel);
+    }
+
+    __kmp_free(threadInfo);
+    KMP_CPU_FREE(oldMask);
+    return depth;
+}
+
+
+//
+// Intel(R) microarchitecture code name Nehalem, Dunnington and later
+// architectures support a newer interface for specifying the x2APIC Ids,
+// based on cpuid leaf 11.
+//
+static int
+__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
+  kmp_i18n_id_t *const msg_id)
+{
+    kmp_cpuid buf;
+
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // Check to see if cpuid leaf 11 is supported.
+    //
+    __kmp_x86_cpuid(0, 0, &buf);
+    if (buf.eax < 11) {
+        *msg_id = kmp_i18n_str_NoLeaf11Support;
+        return -1;
+    }
+    __kmp_x86_cpuid(11, 0, &buf);
+    if (buf.ebx == 0) {
+        *msg_id = kmp_i18n_str_NoLeaf11Support;
+        return -1;
+    }
+
+    //
+    // Find the number of levels in the machine topology.  While we're at it,
+    // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg.  We will
+    // try to get more accurate values later by explicitly counting them,
+    // but get reasonable defaults now, in case we return early.
+    //
+    int level;
+    int threadLevel = -1;
+    int coreLevel = -1;
+    int pkgLevel = -1;
+    __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
+
+    for (level = 0;; level++) {
+        if (level > 31) {
+            //
+            // FIXME: Hack for DPD200163180
+            //
+            // If level is big then something went wrong -> exiting
+            //
+            // There could actually be 32 valid levels in the machine topology,
+            // but so far, the only machine we have seen which does not exit
+            // this loop before iteration 32 has fubar x2APIC settings.
+            //
+            // For now, just reject this case based upon loop trip count.
+            //
+            *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+            return -1;
+        }
+        __kmp_x86_cpuid(11, level, &buf);
+        if (buf.ebx == 0) {
+            if (pkgLevel < 0) {
+                //
+                // Will infer nPackages from __kmp_xproc
+                //
+                pkgLevel = level;
+                level++;
+            }
+            break;
+        }
+        int kind = (buf.ecx >> 8) & 0xff;
+        if (kind == 1) {
+            //
+            // SMT level
+            //
+            threadLevel = level;
+            coreLevel = -1;
+            pkgLevel = -1;
+            __kmp_nThreadsPerCore = buf.ebx & 0xff;
+            if (__kmp_nThreadsPerCore == 0) {
+                *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+                return -1;
+            }
+        }
+        else if (kind == 2) {
+            //
+            // core level
+            //
+            coreLevel = level;
+            pkgLevel = -1;
+            nCoresPerPkg = buf.ebx & 0xff;
+            if (nCoresPerPkg == 0) {
+                *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+                return -1;
+            }
+        }
+        else {
+            if (level <= 0) {
+                *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+                return -1;
+            }
+            if (pkgLevel >= 0) {
+                continue;
+            }
+            pkgLevel = level;
+            nPackages = buf.ebx & 0xff;
+            if (nPackages == 0) {
+                *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+                return -1;
+            }
+        }
+    }
+    int depth = level;
+
+    //
+    // In the above loop, "level" was counted from the finest level (usually
+    // thread) to the coarsest.  The caller expects that we will place the
+    // labels in (*address2os)[].first.labels[] in the inverse order, so
+    // we need to invert the vars saying which level means what.
+    //
+    if (threadLevel >= 0) {
+        threadLevel = depth - threadLevel - 1;
+    }
+    if (coreLevel >= 0) {
+        coreLevel = depth - coreLevel - 1;
+    }
+    KMP_DEBUG_ASSERT(pkgLevel >= 0);
+    pkgLevel = depth - pkgLevel - 1;
+
+    //
+    // The algorithm used starts by setting the affinity to each available
+    // thread and retrieving info from the cpuid instruction, so if we are
+    // not capable of calling __kmp_get_system_affinity() and
+    // _kmp_get_system_affinity(), then we need to do something else - use
+    // the defaults that we calculated from issuing cpuid without binding
+    // to each proc.
+    //
+    if (! KMP_AFFINITY_CAPABLE())
+    {
+        //
+        // Hack to try and infer the machine topology using only the data
+        // available from cpuid on the current thread, and __kmp_xproc.
+        //
+        KMP_ASSERT(__kmp_affinity_type == affinity_none);
+
+        __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
+        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            if (__kmp_affinity_uniform_topology()) {
+                KMP_INFORM(Uniform, "KMP_AFFINITY");
+            } else {
+                KMP_INFORM(NonUniform, "KMP_AFFINITY");
+            }
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+        return 0;
+    }
+
+    //
+    //
+    // From here on, we can assume that it is safe to call
+    // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
+    // even if __kmp_affinity_type = affinity_none.
+    //
+
+    //
+    // Save the affinity mask for the current thread.
+    //
+    kmp_affin_mask_t *oldMask;
+    KMP_CPU_ALLOC(oldMask);
+    __kmp_get_system_affinity(oldMask, TRUE);
+
+    //
+    // Allocate the data structure to be returned.
+    //
+    AddrUnsPair *retval = (AddrUnsPair *)
+      __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+
+    //
+    // Run through each of the available contexts, binding the current thread
+    // to it, and obtaining the pertinent information using the cpuid instr.
+    //
+    unsigned int proc;
+    int nApics = 0;
+    KMP_CPU_SET_ITERATE(proc, fullMask) {
+        //
+        // Skip this proc if it is not included in the machine model.
+        //
+        if (! KMP_CPU_ISSET(proc, fullMask)) {
+            continue;
+        }
+        KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
+
+        __kmp_affinity_bind_thread(proc);
+
+        //
+        // Extrach the labels for each level in the machine topology map
+        // from the Apic ID.
+        //
+        Address addr(depth);
+        int prev_shift = 0;
+
+        for (level = 0; level < depth; level++) {
+            __kmp_x86_cpuid(11, level, &buf);
+            unsigned apicId = buf.edx;
+            if (buf.ebx == 0) {
+                if (level != depth - 1) {
+                    KMP_CPU_FREE(oldMask);
+                    *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
+                    return -1;
+                }
+                addr.labels[depth - level - 1] = apicId >> prev_shift;
+                level++;
+                break;
+            }
+            int shift = buf.eax & 0x1f;
+            int mask = (1 << shift) - 1;
+            addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
+            prev_shift = shift;
+        }
+        if (level != depth) {
+            KMP_CPU_FREE(oldMask);
+            *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
+            return -1;
+        }
+
+        retval[nApics] = AddrUnsPair(addr, proc);
+        nApics++;
+    }
+
+    //
+    // We've collected all the info we need.
+    // Restore the old affinity mask for this thread.
+    //
+    __kmp_set_system_affinity(oldMask, TRUE);
+
+    //
+    // If there's only one thread context to bind to, return now.
+    //
+    KMP_ASSERT(nApics > 0);
+    if (nApics == 1) {
+        __kmp_ncores = nPackages = 1;
+        __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+        if (__kmp_affinity_verbose) {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+            KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
+            if (__kmp_affinity_respect_mask) {
+                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+            } else {
+                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+            }
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+
+        if (__kmp_affinity_type == affinity_none) {
+            __kmp_free(retval);
+            KMP_CPU_FREE(oldMask);
+            return 0;
+        }
+
+        //
+        // Form an Address object which only includes the package level.
+        //
+        Address addr(1);
+        addr.labels[0] = retval[0].first.labels[pkgLevel];
+        retval[0].first = addr;
+
+        if (__kmp_affinity_gran_levels < 0) {
+            __kmp_affinity_gran_levels = 0;
+        }
+
+        if (__kmp_affinity_verbose) {
+            __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
+        }
+
+        *address2os = retval;
+        KMP_CPU_FREE(oldMask);
+        return 1;
+    }
+
+    //
+    // Sort the table by physical Id.
+    //
+    qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
+
+    //
+    // Find the radix at each of the levels.
+    //
+    unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+    unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+    unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+    unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+    for (level = 0; level < depth; level++) {
+        totals[level] = 1;
+        maxCt[level] = 1;
+        counts[level] = 1;
+        last[level] = retval[0].first.labels[level];
+    }
+
+    //
+    // From here on, the iteration variable "level" runs from the finest
+    // level to the coarsest, i.e. we iterate forward through
+    // (*address2os)[].first.labels[] - in the previous loops, we iterated
+    // backwards.
+    //
+    for (proc = 1; (int)proc < nApics; proc++) {
+        int level;
+        for (level = 0; level < depth; level++) {
+            if (retval[proc].first.labels[level] != last[level]) {
+                int j;
+                for (j = level + 1; j < depth; j++) {
+                    totals[j]++;
+                    counts[j] = 1;
+                    // The line below causes printing incorrect topology information
+                    // in case the max value for some level (maxCt[level]) is encountered earlier than
+                    // some less value while going through the array.
+                    // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
+                    // whereas it must be 4.
+                    // TODO!!! Check if it can be commented safely
+                    //maxCt[j] = 1;
+                    last[j] = retval[proc].first.labels[j];
+                }
+                totals[level]++;
+                counts[level]++;
+                if (counts[level] > maxCt[level]) {
+                    maxCt[level] = counts[level];
+                }
+                last[level] = retval[proc].first.labels[level];
+                break;
+            }
+            else if (level == depth - 1) {
+                __kmp_free(last);
+                __kmp_free(maxCt);
+                __kmp_free(counts);
+                __kmp_free(totals);
+                __kmp_free(retval);
+                KMP_CPU_FREE(oldMask);
+                *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
+                return -1;
+            }
+        }
+    }
+
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    // correctly, and return if affinity is not enabled.
+    //
+    if (threadLevel >= 0) {
+        __kmp_nThreadsPerCore = maxCt[threadLevel];
+    }
+    else {
+        __kmp_nThreadsPerCore = 1;
+    }
+    nPackages = totals[pkgLevel];
+
+    if (coreLevel >= 0) {
+        __kmp_ncores = totals[coreLevel];
+        nCoresPerPkg = maxCt[coreLevel];
+    }
+    else {
+        __kmp_ncores = nPackages;
+        nCoresPerPkg = 1;
+    }
+
+    //
+    // Check to see if the machine topology is uniform
+    //
+    unsigned prod = maxCt[0];
+    for (level = 1; level < depth; level++) {
+       prod *= maxCt[level];
+    }
+    bool uniform = (prod == totals[level - 1]);
+
+    //
+    // Print the machine topology summary.
+    //
+    if (__kmp_affinity_verbose) {
+        char mask[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+        KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
+        if (__kmp_affinity_respect_mask) {
+            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
+        } else {
+            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
+        }
+        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+        if (uniform) {
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+        } else {
+            KMP_INFORM(NonUniform, "KMP_AFFINITY");
+        }
+
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init(&buf);
+
+        __kmp_str_buf_print(&buf, "%d", totals[0]);
+        for (level = 1; level <= pkgLevel; level++) {
+            __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
+        }
+        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
+          __kmp_nThreadsPerCore, __kmp_ncores);
+
+        __kmp_str_buf_free(&buf);
+    }
+
+    if (__kmp_affinity_type == affinity_none) {
+        __kmp_free(last);
+        __kmp_free(maxCt);
+        __kmp_free(counts);
+        __kmp_free(totals);
+        __kmp_free(retval);
+        KMP_CPU_FREE(oldMask);
+        return 0;
+    }
+
+    //
+    // Find any levels with radiix 1, and remove them from the map
+    // (except for the package level).
+    //
+    int new_depth = 0;
+    for (level = 0; level < depth; level++) {
+        if ((maxCt[level] == 1) && (level != pkgLevel)) {
+           continue;
+        }
+        new_depth++;
+    }
+
+    //
+    // If we are removing any levels, allocate a new vector to return,
+    // and copy the relevant information to it.
+    //
+    if (new_depth != depth) {
+        AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
+          sizeof(AddrUnsPair) * nApics);
+        for (proc = 0; (int)proc < nApics; proc++) {
+            Address addr(new_depth);
+            new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
+        }
+        int new_level = 0;
+        int newPkgLevel = -1;
+        int newCoreLevel = -1;
+        int newThreadLevel = -1;
+        int i;
+        for (level = 0; level < depth; level++) {
+            if ((maxCt[level] == 1)
+              && (level != pkgLevel)) {
+                //
+                // Remove this level. Never remove the package level
+                //
+                continue;
+            }
+            if (level == pkgLevel) {
+                newPkgLevel = level;
+            }
+            if (level == coreLevel) {
+                newCoreLevel = level;
+            }
+            if (level == threadLevel) {
+                newThreadLevel = level;
+            }
+            for (proc = 0; (int)proc < nApics; proc++) {
+                new_retval[proc].first.labels[new_level]
+                  = retval[proc].first.labels[level];
+            }
+            new_level++;
+        }
+
+        __kmp_free(retval);
+        retval = new_retval;
+        depth = new_depth;
+        pkgLevel = newPkgLevel;
+        coreLevel = newCoreLevel;
+        threadLevel = newThreadLevel;
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        //
+        // Set the granularity level based on what levels are modeled
+        // in the machine topology map.
+        //
+        __kmp_affinity_gran_levels = 0;
+        if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if (__kmp_affinity_gran > affinity_gran_package) {
+            __kmp_affinity_gran_levels++;
+        }
+    }
+
+    if (__kmp_affinity_verbose) {
+        __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
+          coreLevel, threadLevel);
+    }
+
+    __kmp_free(last);
+    __kmp_free(maxCt);
+    __kmp_free(counts);
+    __kmp_free(totals);
+    KMP_CPU_FREE(oldMask);
+    *address2os = retval;
+    return depth;
+}
+
+
+# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+
+#define osIdIndex       0
+#define threadIdIndex   1
+#define coreIdIndex     2
+#define pkgIdIndex      3
+#define nodeIdIndex     4
+
+typedef unsigned *ProcCpuInfo;
+static unsigned maxIndex = pkgIdIndex;
+
+
+static int
+__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
+{
+    const unsigned *aa = (const unsigned *)a;
+    const unsigned *bb = (const unsigned *)b;
+    if (aa[osIdIndex] < bb[osIdIndex]) return -1;
+    if (aa[osIdIndex] > bb[osIdIndex]) return 1;
+    return 0;
+};
+
+
+static int
+__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
+{
+    unsigned i;
+    const unsigned *aa = *((const unsigned **)a);
+    const unsigned *bb = *((const unsigned **)b);
+    for (i = maxIndex; ; i--) {
+        if (aa[i] < bb[i]) return -1;
+        if (aa[i] > bb[i]) return 1;
+        if (i == osIdIndex) break;
+    }
+    return 0;
+}
+
+
+//
+// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
+// affinity map.
+//
+static int
+__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
+  kmp_i18n_id_t *const msg_id, FILE *f)
+{
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // Scan of the file, and count the number of "processor" (osId) fields,
+    // and find the highest value of <n> for a node_<n> field.
+    //
+    char buf[256];
+    unsigned num_records = 0;
+    while (! feof(f)) {
+        buf[sizeof(buf) - 1] = 1;
+        if (! fgets(buf, sizeof(buf), f)) {
+            //
+            // Read errors presumably because of EOF
+            //
+            break;
+        }
+
+        char s1[] = "processor";
+        if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
+            num_records++;
+            continue;
+        }
+
+        //
+        // FIXME - this will match "node_<n> <garbage>"
+        //
+        unsigned level;
+        if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
+            if (nodeIdIndex + level >= maxIndex) {
+                maxIndex = nodeIdIndex + level;
+            }
+            continue;
+        }
+    }
+
+    //
+    // Check for empty file / no valid processor records, or too many.
+    // The number of records can't exceed the number of valid bits in the
+    // affinity mask.
+    //
+    if (num_records == 0) {
+        *line = 0;
+        *msg_id = kmp_i18n_str_NoProcRecords;
+        return -1;
+    }
+    if (num_records > (unsigned)__kmp_xproc) {
+        *line = 0;
+        *msg_id = kmp_i18n_str_TooManyProcRecords;
+        return -1;
+    }
+
+    //
+    // Set the file pointer back to the begginning, so that we can scan the
+    // file again, this time performing a full parse of the data.
+    // Allocate a vector of ProcCpuInfo object, where we will place the data.
+    // Adding an extra element at the end allows us to remove a lot of extra
+    // checks for termination conditions.
+    //
+    if (fseek(f, 0, SEEK_SET) != 0) {
+        *line = 0;
+        *msg_id = kmp_i18n_str_CantRewindCpuinfo;
+        return -1;
+    }
+
+    //
+    // Allocate the array of records to store the proc info in.  The dummy
+    // element at the end makes the logic in filling them out easier to code.
+    //
+    unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
+      * sizeof(unsigned *));
+    unsigned i;
+    for (i = 0; i <= num_records; i++) {
+        threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
+          * sizeof(unsigned));
+    }
+
+#define CLEANUP_THREAD_INFO \
+    for (i = 0; i <= num_records; i++) {                                \
+        __kmp_free(threadInfo[i]);                                      \
+    }                                                                   \
+    __kmp_free(threadInfo);
+
+    //
+    // A value of UINT_MAX means that we didn't find the field
+    //
+    unsigned __index;
+
+#define INIT_PROC_INFO(p) \
+    for (__index = 0; __index <= maxIndex; __index++) {                 \
+        (p)[__index] = UINT_MAX;                                        \
+    }
+
+    for (i = 0; i <= num_records; i++) {
+        INIT_PROC_INFO(threadInfo[i]);
+    }
+
+    unsigned num_avail = 0;
+    *line = 0;
+    while (! feof(f)) {
+        //
+        // Create an inner scoping level, so that all the goto targets at the
+        // end of the loop appear in an outer scoping level.  This avoids
+        // warnings about jumping past an initialization to a target in the
+        // same block.
+        //
+        {
+            buf[sizeof(buf) - 1] = 1;
+            bool long_line = false;
+            if (! fgets(buf, sizeof(buf), f)) {
+                //
+                // Read errors presumably because of EOF
+                //
+                // If there is valid data in threadInfo[num_avail], then fake
+                // a blank line in ensure that the last address gets parsed.
+                //
+                bool valid = false;
+                for (i = 0; i <= maxIndex; i++) {
+                    if (threadInfo[num_avail][i] != UINT_MAX) {
+                        valid = true;
+                    }
+                }
+                if (! valid) {
+                    break;
+                }
+                buf[0] = 0;
+            } else if (!buf[sizeof(buf) - 1]) {
+                //
+                // The line is longer than the buffer.  Set a flag and don't
+                // emit an error if we were going to ignore the line, anyway.
+                //
+                long_line = true;
+
+#define CHECK_LINE \
+    if (long_line) {                                                    \
+        CLEANUP_THREAD_INFO;                                            \
+        *msg_id = kmp_i18n_str_LongLineCpuinfo;                         \
+        return -1;                                                      \
+    }
+            }
+            (*line)++;
+
+            char s1[] = "processor";
+            if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
+                CHECK_LINE;
+                char *p = strchr(buf + sizeof(s1) - 1, ':');
+                unsigned val;
+                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
+                if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
+                threadInfo[num_avail][osIdIndex] = val;
+#if KMP_OS_LINUX && USE_SYSFS_INFO
+                char path[256];
+                KMP_SNPRINTF(path, sizeof(path),
+                    "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
+                    threadInfo[num_avail][osIdIndex]);
+                __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
+
+                KMP_SNPRINTF(path, sizeof(path),
+                    "/sys/devices/system/cpu/cpu%u/topology/core_id",
+                    threadInfo[num_avail][osIdIndex]);
+                __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
+                continue;
+#else
+            }
+            char s2[] = "physical id";
+            if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
+                CHECK_LINE;
+                char *p = strchr(buf + sizeof(s2) - 1, ':');
+                unsigned val;
+                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
+                if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
+                threadInfo[num_avail][pkgIdIndex] = val;
+                continue;
+            }
+            char s3[] = "core id";
+            if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
+                CHECK_LINE;
+                char *p = strchr(buf + sizeof(s3) - 1, ':');
+                unsigned val;
+                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
+                if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
+                threadInfo[num_avail][coreIdIndex] = val;
+                continue;
+#endif // KMP_OS_LINUX && USE_SYSFS_INFO
+            }
+            char s4[] = "thread id";
+            if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
+                CHECK_LINE;
+                char *p = strchr(buf + sizeof(s4) - 1, ':');
+                unsigned val;
+                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
+                if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
+                threadInfo[num_avail][threadIdIndex] = val;
+                continue;
+            }
+            unsigned level;
+            if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
+                CHECK_LINE;
+                char *p = strchr(buf + sizeof(s4) - 1, ':');
+                unsigned val;
+                if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
+                KMP_ASSERT(nodeIdIndex + level <= maxIndex);
+                if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
+                threadInfo[num_avail][nodeIdIndex + level] = val;
+                continue;
+            }
+
+            //
+            // We didn't recognize the leading token on the line.
+            // There are lots of leading tokens that we don't recognize -
+            // if the line isn't empty, go on to the next line.
+            //
+            if ((*buf != 0) && (*buf != '\n')) {
+                //
+                // If the line is longer than the buffer, read characters
+                // until we find a newline.
+                //
+                if (long_line) {
+                    int ch;
+                    while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
+                }
+                continue;
+            }
+
+            //
+            // A newline has signalled the end of the processor record.
+            // Check that there aren't too many procs specified.
+            //
+            if ((int)num_avail == __kmp_xproc) {
+                CLEANUP_THREAD_INFO;
+                *msg_id = kmp_i18n_str_TooManyEntries;
+                return -1;
+            }
+
+            //
+            // Check for missing fields.  The osId field must be there, and we
+            // currently require that the physical id field is specified, also.
+            //
+            if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
+                CLEANUP_THREAD_INFO;
+                *msg_id = kmp_i18n_str_MissingProcField;
+                return -1;
+            }
+            if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
+                CLEANUP_THREAD_INFO;
+                *msg_id = kmp_i18n_str_MissingPhysicalIDField;
+                return -1;
+            }
+
+            //
+            // Skip this proc if it is not included in the machine model.
+            //
+            if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
+                INIT_PROC_INFO(threadInfo[num_avail]);
+                continue;
+            }
+
+            //
+            // We have a successful parse of this proc's info.
+            // Increment the counter, and prepare for the next proc.
+            //
+            num_avail++;
+            KMP_ASSERT(num_avail <= num_records);
+            INIT_PROC_INFO(threadInfo[num_avail]);
+        }
+        continue;
+
+        no_val:
+        CLEANUP_THREAD_INFO;
+        *msg_id = kmp_i18n_str_MissingValCpuinfo;
+        return -1;
+
+        dup_field:
+        CLEANUP_THREAD_INFO;
+        *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
+        return -1;
+    }
+    *line = 0;
+
+# if KMP_MIC && REDUCE_TEAM_SIZE
+    unsigned teamSize = 0;
+# endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+    // check for num_records == __kmp_xproc ???
+
+    //
+    // If there's only one thread context to bind to, form an Address object
+    // with depth 1 and return immediately (or, if affinity is off, set
+    // address2os to NULL and return).
+    //
+    // If it is configured to omit the package level when there is only a
+    // single package, the logic at the end of this routine won't work if
+    // there is only a single thread - it would try to form an Address
+    // object with depth 0.
+    //
+    KMP_ASSERT(num_avail > 0);
+    KMP_ASSERT(num_avail <= num_records);
+    if (num_avail == 1) {
+        __kmp_ncores = 1;
+        __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
+        if (__kmp_affinity_verbose) {
+            if (! KMP_AFFINITY_CAPABLE()) {
+                KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
+                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+                KMP_INFORM(Uniform, "KMP_AFFINITY");
+            }
+            else {
+                char buf[KMP_AFFIN_MASK_PRINT_LEN];
+                __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+                  fullMask);
+                KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
+                if (__kmp_affinity_respect_mask) {
+                    KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+                } else {
+                    KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+                }
+                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+                KMP_INFORM(Uniform, "KMP_AFFINITY");
+            }
+            int index;
+            kmp_str_buf_t buf;
+            __kmp_str_buf_init(&buf);
+            __kmp_str_buf_print(&buf, "1");
+            for (index = maxIndex - 1; index > pkgIdIndex; index--) {
+                __kmp_str_buf_print(&buf, " x 1");
+            }
+            KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
+            __kmp_str_buf_free(&buf);
+        }
+
+        if (__kmp_affinity_type == affinity_none) {
+            CLEANUP_THREAD_INFO;
+            return 0;
+        }
+
+        *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
+        Address addr(1);
+        addr.labels[0] = threadInfo[0][pkgIdIndex];
+        (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
+
+        if (__kmp_affinity_gran_levels < 0) {
+            __kmp_affinity_gran_levels = 0;
+        }
+
+        if (__kmp_affinity_verbose) {
+            __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
+        }
+
+        CLEANUP_THREAD_INFO;
+        return 1;
+    }
+
+    //
+    // Sort the threadInfo table by physical Id.
+    //
+    qsort(threadInfo, num_avail, sizeof(*threadInfo),
+      __kmp_affinity_cmp_ProcCpuInfo_phys_id);
+
+    //
+    // The table is now sorted by pkgId / coreId / threadId, but we really
+    // don't know the radix of any of the fields.  pkgId's may be sparsely
+    // assigned among the chips on a system.  Although coreId's are usually
+    // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
+    // [0..threadsPerCore-1], we don't want to make any such assumptions.
+    //
+    // For that matter, we don't know what coresPerPkg and threadsPerCore
+    // (or the total # packages) are at this point - we want to determine
+    // that now.  We only have an upper bound on the first two figures.
+    //
+    unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
+      * sizeof(unsigned));
+    unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
+      * sizeof(unsigned));
+    unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
+      * sizeof(unsigned));
+    unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
+      * sizeof(unsigned));
+
+    bool assign_thread_ids = false;
+    unsigned threadIdCt;
+    unsigned index;
+
+    restart_radix_check:
+    threadIdCt = 0;
+
+    //
+    // Initialize the counter arrays with data from threadInfo[0].
+    //
+    if (assign_thread_ids) {
+        if (threadInfo[0][threadIdIndex] == UINT_MAX) {
+            threadInfo[0][threadIdIndex] = threadIdCt++;
+        }
+        else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
+            threadIdCt = threadInfo[0][threadIdIndex] + 1;
+        }
+    }
+    for (index = 0; index <= maxIndex; index++) {
+        counts[index] = 1;
+        maxCt[index] = 1;
+        totals[index] = 1;
+        lastId[index] = threadInfo[0][index];;
+    }
+
+    //
+    // Run through the rest of the OS procs.
+    //
+    for (i = 1; i < num_avail; i++) {
+        //
+        // Find the most significant index whose id differs
+        // from the id for the previous OS proc.
+        //
+        for (index = maxIndex; index >= threadIdIndex; index--) {
+            if (assign_thread_ids && (index == threadIdIndex)) {
+                //
+                // Auto-assign the thread id field if it wasn't specified.
+                //
+                if (threadInfo[i][threadIdIndex] == UINT_MAX) {
+                    threadInfo[i][threadIdIndex] = threadIdCt++;
+                }
+
+                //
+                // Aparrently the thread id field was specified for some
+                // entries and not others.  Start the thread id counter
+                // off at the next higher thread id.
+                //
+                else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
+                    threadIdCt = threadInfo[i][threadIdIndex] + 1;
+                }
+            }
+            if (threadInfo[i][index] != lastId[index]) {
+                //
+                // Run through all indices which are less significant,
+                // and reset the counts to 1.
+                //
+                // At all levels up to and including index, we need to
+                // increment the totals and record the last id.
+                //
+                unsigned index2;
+                for (index2 = threadIdIndex; index2 < index; index2++) {
+                    totals[index2]++;
+                    if (counts[index2] > maxCt[index2]) {
+                        maxCt[index2] = counts[index2];
+                    }
+                    counts[index2] = 1;
+                    lastId[index2] = threadInfo[i][index2];
+                }
+                counts[index]++;
+                totals[index]++;
+                lastId[index] = threadInfo[i][index];
+
+                if (assign_thread_ids && (index > threadIdIndex)) {
+
+# if KMP_MIC && REDUCE_TEAM_SIZE
+                    //
+                    // The default team size is the total #threads in the machine
+                    // minus 1 thread for every core that has 3 or more threads.
+                    //
+                    teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
+# endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+                    //
+                    // Restart the thread counter, as we are on a new core.
+                    //
+                    threadIdCt = 0;
+
+                    //
+                    // Auto-assign the thread id field if it wasn't specified.
+                    //
+                    if (threadInfo[i][threadIdIndex] == UINT_MAX) {
+                        threadInfo[i][threadIdIndex] = threadIdCt++;
+                    }
+
+                    //
+                    // Aparrently the thread id field was specified for some
+                    // entries and not others.  Start the thread id counter
+                    // off at the next higher thread id.
+                    //
+                    else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
+                        threadIdCt = threadInfo[i][threadIdIndex] + 1;
+                    }
+                }
+                break;
+            }
+        }
+        if (index < threadIdIndex) {
+            //
+            // If thread ids were specified, it is an error if they are not
+            // unique.  Also, check that we waven't already restarted the
+            // loop (to be safe - shouldn't need to).
+            //
+            if ((threadInfo[i][threadIdIndex] != UINT_MAX)
+              || assign_thread_ids) {
+                __kmp_free(lastId);
+                __kmp_free(totals);
+                __kmp_free(maxCt);
+                __kmp_free(counts);
+                CLEANUP_THREAD_INFO;
+                *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
+                return -1;
+            }
+
+            //
+            // If the thread ids were not specified and we see entries
+            // entries that are duplicates, start the loop over and
+            // assign the thread ids manually.
+            //
+            assign_thread_ids = true;
+            goto restart_radix_check;
+        }
+    }
+
+# if KMP_MIC && REDUCE_TEAM_SIZE
+    //
+    // The default team size is the total #threads in the machine
+    // minus 1 thread for every core that has 3 or more threads.
+    //
+    teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
+# endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+    for (index = threadIdIndex; index <= maxIndex; index++) {
+        if (counts[index] > maxCt[index]) {
+            maxCt[index] = counts[index];
+        }
+    }
+
+    __kmp_nThreadsPerCore = maxCt[threadIdIndex];
+    nCoresPerPkg = maxCt[coreIdIndex];
+    nPackages = totals[pkgIdIndex];
+
+    //
+    // Check to see if the machine topology is uniform
+    //
+    unsigned prod = totals[maxIndex];
+    for (index = threadIdIndex; index < maxIndex; index++) {
+       prod *= maxCt[index];
+    }
+    bool uniform = (prod == totals[threadIdIndex]);
+
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    // correctly, and return now if affinity is not enabled.
+    //
+    __kmp_ncores = totals[coreIdIndex];
+
+    if (__kmp_affinity_verbose) {
+        if (! KMP_AFFINITY_CAPABLE()) {
+                KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
+                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+                if (uniform) {
+                    KMP_INFORM(Uniform, "KMP_AFFINITY");
+                } else {
+                    KMP_INFORM(NonUniform, "KMP_AFFINITY");
+                }
+        }
+        else {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
+                KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
+                if (__kmp_affinity_respect_mask) {
+                    KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+                } else {
+                    KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+                }
+                KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+                if (uniform) {
+                    KMP_INFORM(Uniform, "KMP_AFFINITY");
+                } else {
+                    KMP_INFORM(NonUniform, "KMP_AFFINITY");
+                }
+        }
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init(&buf);
+
+        __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
+        for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
+            __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
+        }
+        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str,  maxCt[coreIdIndex],
+          maxCt[threadIdIndex], __kmp_ncores);
+
+        __kmp_str_buf_free(&buf);
+    }
+
+# if KMP_MIC && REDUCE_TEAM_SIZE
+    //
+    // Set the default team size.
+    //
+    if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
+        __kmp_dflt_team_nth = teamSize;
+        KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
+          __kmp_dflt_team_nth));
+    }
+# endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+    if (__kmp_affinity_type == affinity_none) {
+        __kmp_free(lastId);
+        __kmp_free(totals);
+        __kmp_free(maxCt);
+        __kmp_free(counts);
+        CLEANUP_THREAD_INFO;
+        return 0;
+    }
+
+    //
+    // Count the number of levels which have more nodes at that level than
+    // at the parent's level (with there being an implicit root node of
+    // the top level).  This is equivalent to saying that there is at least
+    // one node at this level which has a sibling.  These levels are in the
+    // map, and the package level is always in the map.
+    //
+    bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
+    int level = 0;
+    for (index = threadIdIndex; index < maxIndex; index++) {
+        KMP_ASSERT(totals[index] >= totals[index + 1]);
+        inMap[index] = (totals[index] > totals[index + 1]);
+    }
+    inMap[maxIndex] = (totals[maxIndex] > 1);
+    inMap[pkgIdIndex] = true;
+
+    int depth = 0;
+    for (index = threadIdIndex; index <= maxIndex; index++) {
+        if (inMap[index]) {
+            depth++;
+        }
+    }
+    KMP_ASSERT(depth > 0);
+
+    //
+    // Construct the data structure that is to be returned.
+    //
+    *address2os = (AddrUnsPair*)
+      __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
+    int pkgLevel = -1;
+    int coreLevel = -1;
+    int threadLevel = -1;
+
+    for (i = 0; i < num_avail; ++i) {
+        Address addr(depth);
+        unsigned os = threadInfo[i][osIdIndex];
+        int src_index;
+        int dst_index = 0;
+
+        for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
+            if (! inMap[src_index]) {
+                continue;
+            }
+            addr.labels[dst_index] = threadInfo[i][src_index];
+            if (src_index == pkgIdIndex) {
+                pkgLevel = dst_index;
+            }
+            else if (src_index == coreIdIndex) {
+                coreLevel = dst_index;
+            }
+            else if (src_index == threadIdIndex) {
+                threadLevel = dst_index;
+            }
+            dst_index++;
+        }
+        (*address2os)[i] = AddrUnsPair(addr, os);
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        //
+        // Set the granularity level based on what levels are modeled
+        // in the machine topology map.
+        //
+        unsigned src_index;
+        __kmp_affinity_gran_levels = 0;
+        for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
+            if (! inMap[src_index]) {
+                continue;
+            }
+            switch (src_index) {
+                case threadIdIndex:
+                if (__kmp_affinity_gran > affinity_gran_thread) {
+                    __kmp_affinity_gran_levels++;
+                }
+
+                break;
+                case coreIdIndex:
+                if (__kmp_affinity_gran > affinity_gran_core) {
+                    __kmp_affinity_gran_levels++;
+                }
+                break;
+
+                case pkgIdIndex:
+                if (__kmp_affinity_gran > affinity_gran_package) {
+                    __kmp_affinity_gran_levels++;
+                }
+                break;
+            }
+        }
+    }
+
+    if (__kmp_affinity_verbose) {
+        __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
+          coreLevel, threadLevel);
+    }
+
+    __kmp_free(inMap);
+    __kmp_free(lastId);
+    __kmp_free(totals);
+    __kmp_free(maxCt);
+    __kmp_free(counts);
+    CLEANUP_THREAD_INFO;
+    return depth;
+}
+
+
+//
+// Create and return a table of affinity masks, indexed by OS thread ID.
+// This routine handles OR'ing together all the affinity masks of threads
+// that are sufficiently close, if granularity > fine.
+//
+static kmp_affin_mask_t *
+__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
+  AddrUnsPair *address2os, unsigned numAddrs)
+{
+    //
+    // First form a table of affinity masks in order of OS thread id.
+    //
+    unsigned depth;
+    unsigned maxOsId;
+    unsigned i;
+
+    KMP_ASSERT(numAddrs > 0);
+    depth = address2os[0].first.depth;
+
+    maxOsId = 0;
+    for (i = 0; i < numAddrs; i++) {
+        unsigned osId = address2os[i].second;
+        if (osId > maxOsId) {
+            maxOsId = osId;
+        }
+    }
+    kmp_affin_mask_t *osId2Mask;
+    KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
+
+    //
+    // Sort the address2os table according to physical order.  Doing so
+    // will put all threads on the same core/package/node in consecutive
+    // locations.
+    //
+    qsort(address2os, numAddrs, sizeof(*address2os),
+      __kmp_affinity_cmp_Address_labels);
+
+    KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
+    if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
+        KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY",  __kmp_affinity_gran_levels);
+    }
+    if (__kmp_affinity_gran_levels >= (int)depth) {
+        if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+          && (__kmp_affinity_type != affinity_none))) {
+            KMP_WARNING(AffThreadsMayMigrate);
+        }
+    }
+
+    //
+    // Run through the table, forming the masks for all threads on each
+    // core.  Threads on the same core will have identical "Address"
+    // objects, not considering the last level, which must be the thread
+    // id.  All threads on a core will appear consecutively.
+    //
+    unsigned unique = 0;
+    unsigned j = 0;                             // index of 1st thread on core
+    unsigned leader = 0;
+    Address *leaderAddr = &(address2os[0].first);
+    kmp_affin_mask_t *sum;
+    KMP_CPU_ALLOC_ON_STACK(sum);
+    KMP_CPU_ZERO(sum);
+    KMP_CPU_SET(address2os[0].second, sum);
+    for (i = 1; i < numAddrs; i++) {
+        //
+        // If this thread is sufficiently close to the leader (within the
+        // granularity setting), then set the bit for this os thread in the
+        // affinity mask for this group, and go on to the next thread.
+        //
+        if (leaderAddr->isClose(address2os[i].first,
+          __kmp_affinity_gran_levels)) {
+            KMP_CPU_SET(address2os[i].second, sum);
+            continue;
+        }
+
+        //
+        // For every thread in this group, copy the mask to the thread's
+        // entry in the osId2Mask table.  Mark the first address as a
+        // leader.
+        //
+        for (; j < i; j++) {
+            unsigned osId = address2os[j].second;
+            KMP_DEBUG_ASSERT(osId <= maxOsId);
+            kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
+            KMP_CPU_COPY(mask, sum);
+            address2os[j].first.leader = (j == leader);
+        }
+        unique++;
+
+        //
+        // Start a new mask.
+        //
+        leader = i;
+        leaderAddr = &(address2os[i].first);
+        KMP_CPU_ZERO(sum);
+        KMP_CPU_SET(address2os[i].second, sum);
+    }
+
+    //
+    // For every thread in last group, copy the mask to the thread's
+    // entry in the osId2Mask table.
+    //
+    for (; j < i; j++) {
+        unsigned osId = address2os[j].second;
+        KMP_DEBUG_ASSERT(osId <= maxOsId);
+        kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
+        KMP_CPU_COPY(mask, sum);
+        address2os[j].first.leader = (j == leader);
+    }
+    unique++;
+    KMP_CPU_FREE_FROM_STACK(sum);
+
+    *maxIndex = maxOsId;
+    *numUnique = unique;
+    return osId2Mask;
+}
+
+
+//
+// Stuff for the affinity proclist parsers.  It's easier to declare these vars
+// as file-static than to try and pass them through the calling sequence of
+// the recursive-descent OMP_PLACES parser.
+//
+static kmp_affin_mask_t *newMasks;
+static int numNewMasks;
+static int nextNewMask;
+
+#define ADD_MASK(_mask) \
+    {                                                                   \
+        if (nextNewMask >= numNewMasks) {                               \
+            int i;                                                      \
+            numNewMasks *= 2;                                           \
+            kmp_affin_mask_t* temp;                                     \
+            KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks);            \
+            for(i=0;i<numNewMasks/2;i++) {                              \
+                kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);    \
+                kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i);        \
+                KMP_CPU_COPY(dest, src);                                \
+            }                                                           \
+            KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2);       \
+            newMasks = temp;                                            \
+        }                                                               \
+        KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));    \
+        nextNewMask++;                                                  \
+    }
+
+#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
+    {                                                                   \
+        if (((_osId) > _maxOsId) ||                                     \
+          (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
+            if (__kmp_affinity_verbose || (__kmp_affinity_warnings      \
+              && (__kmp_affinity_type != affinity_none))) {             \
+                KMP_WARNING(AffIgnoreInvalidProcID, _osId);             \
+            }                                                           \
+        }                                                               \
+        else {                                                          \
+            ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId)));               \
+        }                                                               \
+    }
+
+
+//
+// Re-parse the proclist (for the explicit affinity type), and form the list
+// of affinity newMasks indexed by gtid.
+//
+static void
+__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
+  unsigned int *out_numMasks, const char *proclist,
+  kmp_affin_mask_t *osId2Mask, int maxOsId)
+{
+    int i;
+    const char *scan = proclist;
+    const char *next = proclist;
+
+    //
+    // We use malloc() for the temporary mask vector,
+    // so that we can use realloc() to extend it.
+    //
+    numNewMasks = 2;
+    KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
+    nextNewMask = 0;
+    kmp_affin_mask_t *sumMask;
+    KMP_CPU_ALLOC(sumMask);
+    int setSize = 0;
+
+    for (;;) {
+        int start, end, stride;
+
+        SKIP_WS(scan);
+        next = scan;
+        if (*next == '\0') {
+            break;
+        }
+
+        if (*next == '{') {
+            int num;
+            setSize = 0;
+            next++;     // skip '{'
+            SKIP_WS(next);
+            scan = next;
+
+            //
+            // Read the first integer in the set.
+            //
+            KMP_ASSERT2((*next >= '0') && (*next <= '9'),
+              "bad proclist");
+            SKIP_DIGITS(next);
+            num = __kmp_str_to_int(scan, *next);
+            KMP_ASSERT2(num >= 0, "bad explicit proc list");
+
+            //
+            // Copy the mask for that osId to the sum (union) mask.
+            //
+            if ((num > maxOsId) ||
+              (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
+                if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                  && (__kmp_affinity_type != affinity_none))) {
+                    KMP_WARNING(AffIgnoreInvalidProcID, num);
+                }
+                KMP_CPU_ZERO(sumMask);
+            }
+            else {
+                KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
+                setSize = 1;
+            }
+
+            for (;;) {
+                //
+                // Check for end of set.
+                //
+                SKIP_WS(next);
+                if (*next == '}') {
+                    next++;     // skip '}'
+                    break;
+                }
+
+                //
+                // Skip optional comma.
+                //
+                if (*next == ',') {
+                    next++;
+                }
+                SKIP_WS(next);
+
+                //
+                // Read the next integer in the set.
+                //
+                scan = next;
+                KMP_ASSERT2((*next >= '0') && (*next <= '9'),
+                  "bad explicit proc list");
+
+                SKIP_DIGITS(next);
+                num = __kmp_str_to_int(scan, *next);
+                KMP_ASSERT2(num >= 0, "bad explicit proc list");
+
+                //
+                // Add the mask for that osId to the sum mask.
+                //
+                if ((num > maxOsId) ||
+                  (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
+                    if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                      && (__kmp_affinity_type != affinity_none))) {
+                        KMP_WARNING(AffIgnoreInvalidProcID, num);
+                    }
+                }
+                else {
+                    KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
+                    setSize++;
+                }
+            }
+            if (setSize > 0) {
+                ADD_MASK(sumMask);
+            }
+
+            SKIP_WS(next);
+            if (*next == ',') {
+                next++;
+            }
+            scan = next;
+            continue;
+        }
+
+        //
+        // Read the first integer.
+        //
+        KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
+        SKIP_DIGITS(next);
+        start = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT2(start >= 0, "bad explicit proc list");
+        SKIP_WS(next);
+
+        //
+        // If this isn't a range, then add a mask to the list and go on.
+        //
+        if (*next != '-') {
+            ADD_MASK_OSID(start, osId2Mask, maxOsId);
+
+            //
+            // Skip optional comma.
+            //
+            if (*next == ',') {
+                next++;
+            }
+            scan = next;
+            continue;
+        }
+
+        //
+        // This is a range.  Skip over the '-' and read in the 2nd int.
+        //
+        next++;         // skip '-'
+        SKIP_WS(next);
+        scan = next;
+        KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
+        SKIP_DIGITS(next);
+        end = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT2(end >= 0, "bad explicit proc list");
+
+        //
+        // Check for a stride parameter
+        //
+        stride = 1;
+        SKIP_WS(next);
+        if (*next == ':') {
+            //
+            // A stride is specified.  Skip over the ':" and read the 3rd int.
+            //
+            int sign = +1;
+            next++;         // skip ':'
+            SKIP_WS(next);
+            scan = next;
+            if (*next == '-') {
+                sign = -1;
+                next++;
+                SKIP_WS(next);
+                scan = next;
+            }
+            KMP_ASSERT2((*next >=  '0') && (*next <= '9'),
+              "bad explicit proc list");
+            SKIP_DIGITS(next);
+            stride = __kmp_str_to_int(scan, *next);
+            KMP_ASSERT2(stride >= 0, "bad explicit proc list");
+            stride *= sign;
+        }
+
+        //
+        // Do some range checks.
+        //
+        KMP_ASSERT2(stride != 0, "bad explicit proc list");
+        if (stride > 0) {
+            KMP_ASSERT2(start <= end, "bad explicit proc list");
+        }
+        else {
+            KMP_ASSERT2(start >= end, "bad explicit proc list");
+        }
+        KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
+
+        //
+        // Add the mask for each OS proc # to the list.
+        //
+        if (stride > 0) {
+            do {
+                ADD_MASK_OSID(start, osId2Mask, maxOsId);
+                start += stride;
+            } while (start <= end);
+        }
+        else {
+            do {
+                ADD_MASK_OSID(start, osId2Mask, maxOsId);
+                start += stride;
+            } while (start >= end);
+        }
+
+        //
+        // Skip optional comma.
+        //
+        SKIP_WS(next);
+        if (*next == ',') {
+            next++;
+        }
+        scan = next;
+    }
+
+    *out_numMasks = nextNewMask;
+    if (nextNewMask == 0) {
+        *out_masks = NULL;
+        KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+        return;
+    }
+    KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
+    for(i = 0; i < nextNewMask; i++) {
+        kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);
+        kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
+        KMP_CPU_COPY(dest, src);
+    }
+    KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+    KMP_CPU_FREE(sumMask);
+}
+
+
+# if OMP_40_ENABLED
+
+/*-----------------------------------------------------------------------------
+
+Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
+places.  Again, Here is the grammar:
+
+place_list := place
+place_list := place , place_list
+place := num
+place := place : num
+place := place : num : signed
+place := { subplacelist }
+place := ! place                  // (lowest priority)
+subplace_list := subplace
+subplace_list := subplace , subplace_list
+subplace := num
+subplace := num : num
+subplace := num : num : signed
+signed := num
+signed := + signed
+signed := - signed
+
+-----------------------------------------------------------------------------*/
+
+static void
+__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
+  int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
+{
+    const char *next;
+
+    for (;;) {
+        int start, count, stride, i;
+
+        //
+        // Read in the starting proc id
+        //
+        SKIP_WS(*scan);
+        KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
+          "bad explicit places list");
+        next = *scan;
+        SKIP_DIGITS(next);
+        start = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(start >= 0);
+        *scan = next;
+
+        //
+        // valid follow sets are ',' ':' and '}'
+        //
+        SKIP_WS(*scan);
+        if (**scan == '}' || **scan == ',') {
+            if ((start > maxOsId) ||
+              (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
+                if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                  && (__kmp_affinity_type != affinity_none))) {
+                    KMP_WARNING(AffIgnoreInvalidProcID, start);
+                }
+            }
+            else {
+                KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
+                (*setSize)++;
+            }
+            if (**scan == '}') {
+                break;
+            }
+            (*scan)++;  // skip ','
+            continue;
+        }
+        KMP_ASSERT2(**scan == ':', "bad explicit places list");
+        (*scan)++;      // skip ':'
+
+        //
+        // Read count parameter
+        //
+        SKIP_WS(*scan);
+        KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
+          "bad explicit places list");
+        next = *scan;
+        SKIP_DIGITS(next);
+        count = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(count >= 0);
+        *scan = next;
+
+        //
+        // valid follow sets are ',' ':' and '}'
+        //
+        SKIP_WS(*scan);
+        if (**scan == '}' || **scan == ',') {
+            for (i = 0; i < count; i++) {
+                if ((start > maxOsId) ||
+                  (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
+                    if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                      && (__kmp_affinity_type != affinity_none))) {
+                        KMP_WARNING(AffIgnoreInvalidProcID, start);
+                    }
+                    break;  // don't proliferate warnings for large count
+                }
+                else {
+                    KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
+                    start++;
+                    (*setSize)++;
+                }
+            }
+            if (**scan == '}') {
+                break;
+            }
+            (*scan)++;  // skip ','
+            continue;
+        }
+        KMP_ASSERT2(**scan == ':', "bad explicit places list");
+        (*scan)++;      // skip ':'
+
+        //
+        // Read stride parameter
+        //
+        int sign = +1;
+        for (;;) {
+            SKIP_WS(*scan);
+            if (**scan == '+') {
+                (*scan)++; // skip '+'
+                continue;
+            }
+            if (**scan == '-') {
+                sign *= -1;
+                (*scan)++; // skip '-'
+                continue;
+            }
+            break;
+        }
+        SKIP_WS(*scan);
+        KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
+          "bad explicit places list");
+        next = *scan;
+        SKIP_DIGITS(next);
+        stride = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(stride >= 0);
+        *scan = next;
+        stride *= sign;
+
+        //
+        // valid follow sets are ',' and '}'
+        //
+        SKIP_WS(*scan);
+        if (**scan == '}' || **scan == ',') {
+            for (i = 0; i < count; i++) {
+                if ((start > maxOsId) ||
+                  (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
+                    if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                      && (__kmp_affinity_type != affinity_none))) {
+                        KMP_WARNING(AffIgnoreInvalidProcID, start);
+                    }
+                    break;  // don't proliferate warnings for large count
+                }
+                else {
+                    KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
+                    start += stride;
+                    (*setSize)++;
+                }
+            }
+            if (**scan == '}') {
+                break;
+            }
+            (*scan)++;  // skip ','
+            continue;
+        }
+
+        KMP_ASSERT2(0, "bad explicit places list");
+    }
+}
+
+
+static void
+__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
+  int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
+{
+    const char *next;
+
+    //
+    // valid follow sets are '{' '!' and num
+    //
+    SKIP_WS(*scan);
+    if (**scan == '{') {
+        (*scan)++;      // skip '{'
+        __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
+          setSize);
+        KMP_ASSERT2(**scan == '}', "bad explicit places list");
+        (*scan)++;      // skip '}'
+    }
+    else if (**scan == '!') {
+        (*scan)++;      // skip '!'
+        __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
+        KMP_CPU_COMPLEMENT(maxOsId, tempMask);
+    }
+    else if ((**scan >= '0') && (**scan <= '9')) {
+        next = *scan;
+        SKIP_DIGITS(next);
+        int num = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(num >= 0);
+        if ((num > maxOsId) ||
+          (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
+            if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+              && (__kmp_affinity_type != affinity_none))) {
+                KMP_WARNING(AffIgnoreInvalidProcID, num);
+            }
+        }
+        else {
+            KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
+            (*setSize)++;
+        }
+        *scan = next;  // skip num
+    }
+    else {
+        KMP_ASSERT2(0, "bad explicit places list");
+    }
+}
+
+
+//static void
+void
+__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
+  unsigned int *out_numMasks, const char *placelist,
+  kmp_affin_mask_t *osId2Mask, int maxOsId)
+{
+    int i,j,count,stride,sign;
+    const char *scan = placelist;
+    const char *next = placelist;
+
+    numNewMasks = 2;
+    KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
+    nextNewMask = 0;
+
+    // tempMask is modified based on the previous or initial
+    //   place to form the current place
+    // previousMask contains the previous place
+    kmp_affin_mask_t *tempMask;
+    kmp_affin_mask_t *previousMask;
+    KMP_CPU_ALLOC(tempMask);
+    KMP_CPU_ZERO(tempMask);
+    KMP_CPU_ALLOC(previousMask);
+    KMP_CPU_ZERO(previousMask);
+    int setSize = 0;
+
+    for (;;) {
+        __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
+
+        //
+        // valid follow sets are ',' ':' and EOL
+        //
+        SKIP_WS(scan);
+        if (*scan == '\0' || *scan == ',') {
+            if (setSize > 0) {
+                ADD_MASK(tempMask);
+            }
+            KMP_CPU_ZERO(tempMask);
+            setSize = 0;
+            if (*scan == '\0') {
+                break;
+            }
+            scan++;     // skip ','
+            continue;
+        }
+
+        KMP_ASSERT2(*scan == ':', "bad explicit places list");
+        scan++;         // skip ':'
+
+        //
+        // Read count parameter
+        //
+        SKIP_WS(scan);
+        KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
+          "bad explicit places list");
+        next = scan;
+        SKIP_DIGITS(next);
+        count = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT(count >= 0);
+        scan = next;
+
+        //
+        // valid follow sets are ',' ':' and EOL
+        //
+        SKIP_WS(scan);
+        if (*scan == '\0' || *scan == ',') {
+            stride = +1;
+        }
+        else {
+            KMP_ASSERT2(*scan == ':', "bad explicit places list");
+            scan++;         // skip ':'
+
+            //
+            // Read stride parameter
+            //
+            sign = +1;
+            for (;;) {
+                SKIP_WS(scan);
+                if (*scan == '+') {
+                    scan++; // skip '+'
+                    continue;
+                }
+                if (*scan == '-') {
+                    sign *= -1;
+                    scan++; // skip '-'
+                    continue;
+                }
+                break;
+            }
+            SKIP_WS(scan);
+            KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
+              "bad explicit places list");
+            next = scan;
+            SKIP_DIGITS(next);
+            stride = __kmp_str_to_int(scan, *next);
+            KMP_DEBUG_ASSERT(stride >= 0);
+            scan = next;
+            stride *= sign;
+        }
+
+        // Add places determined by initial_place : count : stride
+        for (i = 0; i < count; i++) {
+            if (setSize == 0) {
+                break;
+            }
+            // Add the current place, then build the next place (tempMask) from that
+            KMP_CPU_COPY(previousMask, tempMask);
+            ADD_MASK(previousMask);
+            KMP_CPU_ZERO(tempMask);
+            setSize = 0;
+            KMP_CPU_SET_ITERATE(j, previousMask) {
+                if (! KMP_CPU_ISSET(j, previousMask)) {
+                    continue;
+                }
+                else if ((j+stride > maxOsId) || (j+stride < 0) ||
+                  (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
+                    if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
+                      && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
+                        KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
+                    }
+                }
+                else {
+                    KMP_CPU_SET(j+stride, tempMask);
+                    setSize++;
+                }
+            }
+        }
+        KMP_CPU_ZERO(tempMask);
+        setSize = 0;
+
+        //
+        // valid follow sets are ',' and EOL
+        //
+        SKIP_WS(scan);
+        if (*scan == '\0') {
+            break;
+        }
+        if (*scan == ',') {
+            scan++;     // skip ','
+            continue;
+        }
+
+        KMP_ASSERT2(0, "bad explicit places list");
+    }
+
+    *out_numMasks = nextNewMask;
+    if (nextNewMask == 0) {
+        *out_masks = NULL;
+        KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+        return;
+    }
+    KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
+    KMP_CPU_FREE(tempMask);
+    KMP_CPU_FREE(previousMask);
+    for(i = 0; i < nextNewMask; i++) {
+        kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);
+        kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
+        KMP_CPU_COPY(dest, src);
+    }
+    KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+}
+
+# endif /* OMP_40_ENABLED */
+
+#undef ADD_MASK
+#undef ADD_MASK_OSID
+
+static void
+__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
+{
+    if (__kmp_place_num_sockets == 0 &&
+        __kmp_place_num_cores == 0 &&
+        __kmp_place_num_threads_per_core == 0 )
+        return;   // no topology limiting actions requested, exit
+    if (__kmp_place_num_sockets == 0)
+        __kmp_place_num_sockets = nPackages;    // use all available sockets
+    if (__kmp_place_num_cores == 0)
+        __kmp_place_num_cores = nCoresPerPkg;   // use all available cores
+    if (__kmp_place_num_threads_per_core == 0 ||
+        __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
+        __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
+
+    if ( !__kmp_affinity_uniform_topology() ) {
+        KMP_WARNING( AffThrPlaceNonUniform );
+        return; // don't support non-uniform topology
+    }
+    if ( depth != 3 ) {
+        KMP_WARNING( AffThrPlaceNonThreeLevel );
+        return; // don't support not-3-level topology
+    }
+    if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
+        KMP_WARNING(AffThrPlaceManySockets);
+        return;
+    }
+    if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
+        KMP_WARNING( AffThrPlaceManyCores );
+        return;
+    }
+
+    AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
+        __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
+
+    int i, j, k, n_old = 0, n_new = 0;
+    for (i = 0; i < nPackages; ++i)
+        if (i < __kmp_place_socket_offset ||
+            i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
+            n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
+        else
+            for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
+                if (j < __kmp_place_core_offset ||
+                    j >= __kmp_place_core_offset + __kmp_place_num_cores)
+                    n_old += __kmp_nThreadsPerCore; // skip not-requested core
+                else
+                    for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
+                        if (k < __kmp_place_num_threads_per_core) {
+                            newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
+                            n_new++;
+                        }
+                        n_old++;
+                    }
+    KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
+    KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
+                     __kmp_place_num_threads_per_core);
+
+    nPackages = __kmp_place_num_sockets;                      // correct nPackages
+    nCoresPerPkg = __kmp_place_num_cores;                     // correct nCoresPerPkg
+    __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
+    __kmp_avail_proc = n_new;                                 // correct avail_proc
+    __kmp_ncores = nPackages * __kmp_place_num_cores;         // correct ncores
+
+    __kmp_free( *pAddr );
+    *pAddr = newAddr;      // replace old topology with new one
+}
+
+
+static AddrUnsPair *address2os = NULL;
+static int           * procarr = NULL;
+static int     __kmp_aff_depth = 0;
+
+static void
+__kmp_aux_affinity_initialize(void)
+{
+    if (__kmp_affinity_masks != NULL) {
+        KMP_ASSERT(fullMask != NULL);
+        return;
+    }
+
+    //
+    // Create the "full" mask - this defines all of the processors that we
+    // consider to be in the machine model.  If respect is set, then it is
+    // the initialization thread's affinity mask.  Otherwise, it is all
+    // processors that we know about on the machine.
+    //
+    if (fullMask == NULL) {
+        KMP_CPU_ALLOC(fullMask);
+    }
+    if (KMP_AFFINITY_CAPABLE()) {
+        if (__kmp_affinity_respect_mask) {
+            __kmp_get_system_affinity(fullMask, TRUE);
+
+            //
+            // Count the number of available processors.
+            //
+            unsigned i;
+            __kmp_avail_proc = 0;
+            KMP_CPU_SET_ITERATE(i, fullMask) {
+                if (! KMP_CPU_ISSET(i, fullMask)) {
+                    continue;
+                }
+                __kmp_avail_proc++;
+            }
+            if (__kmp_avail_proc > __kmp_xproc) {
+                if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                  && (__kmp_affinity_type != affinity_none))) {
+                    KMP_WARNING(ErrorInitializeAffinity);
+                }
+                __kmp_affinity_type = affinity_none;
+                KMP_AFFINITY_DISABLE();
+                return;
+            }
+        }
+        else {
+            __kmp_affinity_entire_machine_mask(fullMask);
+            __kmp_avail_proc = __kmp_xproc;
+        }
+    }
+
+    int depth = -1;
+    kmp_i18n_id_t msg_id = kmp_i18n_null;
+
+    //
+    // For backward compatibility, setting KMP_CPUINFO_FILE =>
+    // KMP_TOPOLOGY_METHOD=cpuinfo
+    //
+    if ((__kmp_cpuinfo_file != NULL) &&
+      (__kmp_affinity_top_method == affinity_top_method_all)) {
+        __kmp_affinity_top_method = affinity_top_method_cpuinfo;
+    }
+
+    if (__kmp_affinity_top_method == affinity_top_method_all) {
+        //
+        // In the default code path, errors are not fatal - we just try using
+        // another method.  We only emit a warning message if affinity is on,
+        // or the verbose flag is set, an the nowarnings flag was not set.
+        //
+        const char *file_name = NULL;
+        int line = 0;
+# if KMP_USE_HWLOC
+        if (depth < 0) {
+            if (__kmp_affinity_verbose) {
+                KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+            }
+            if(!__kmp_hwloc_error) {
+                depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
+                if (depth == 0) {
+                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                    KMP_ASSERT(address2os == NULL);
+                    return;
+                } else if(depth < 0 && __kmp_affinity_verbose) {
+                    KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+                }
+            } else if(__kmp_affinity_verbose) {
+                KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+            }
+        }
+# endif
+
+# if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+        if (depth < 0) {
+            if (__kmp_affinity_verbose) {
+                KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
+            }
+
+            file_name = NULL;
+            depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
+            if (depth == 0) {
+                KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                KMP_ASSERT(address2os == NULL);
+                return;
+            }
+
+            if (depth < 0) {
+                if (__kmp_affinity_verbose) {
+                    if (msg_id != kmp_i18n_null) {
+                        KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
+                          KMP_I18N_STR(DecodingLegacyAPIC));
+                    }
+                    else {
+                        KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
+                    }
+                }
+
+                file_name = NULL;
+                depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
+                if (depth == 0) {
+                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                    KMP_ASSERT(address2os == NULL);
+                    return;
+                }
+            }
+        }
+
+# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+# if KMP_OS_LINUX
+
+        if (depth < 0) {
+            if (__kmp_affinity_verbose) {
+                if (msg_id != kmp_i18n_null) {
+                    KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
+                }
+                else {
+                    KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
+                }
+            }
+
+            FILE *f = fopen("/proc/cpuinfo", "r");
+            if (f == NULL) {
+                msg_id = kmp_i18n_str_CantOpenCpuinfo;
+            }
+            else {
+                file_name = "/proc/cpuinfo";
+                depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
+                fclose(f);
+                if (depth == 0) {
+                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                    KMP_ASSERT(address2os == NULL);
+                    return;
+                }
+            }
+        }
+
+# endif /* KMP_OS_LINUX */
+
+# if KMP_GROUP_AFFINITY
+
+        if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
+            if (__kmp_affinity_verbose) {
+                KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
+            }
+
+            depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
+            KMP_ASSERT(depth != 0);
+        }
+
+# endif /* KMP_GROUP_AFFINITY */
+
+        if (depth < 0) {
+            if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
+                if (file_name == NULL) {
+                    KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
+                }
+                else if (line == 0) {
+                    KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
+                }
+                else {
+                    KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
+                }
+            }
+            // FIXME - print msg if msg_id = kmp_i18n_null ???
+
+            file_name = "";
+            depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
+            if (depth == 0) {
+                KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                KMP_ASSERT(address2os == NULL);
+                return;
+            }
+            KMP_ASSERT(depth > 0);
+            KMP_ASSERT(address2os != NULL);
+        }
+    }
+
+    //
+    // If the user has specified that a paricular topology discovery method
+    // is to be used, then we abort if that method fails.  The exception is
+    // group affinity, which might have been implicitly set.
+    //
+
+# if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+    else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
+              KMP_I18N_STR(Decodingx2APIC));
+        }
+
+        depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
+        if (depth == 0) {
+            KMP_ASSERT(__kmp_affinity_type == affinity_none);
+            KMP_ASSERT(address2os == NULL);
+            return;
+        }
+        if (depth < 0) {
+            KMP_ASSERT(msg_id != kmp_i18n_null);
+            KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
+        }
+    }
+    else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
+              KMP_I18N_STR(DecodingLegacyAPIC));
+        }
+
+        depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
+        if (depth == 0) {
+            KMP_ASSERT(__kmp_affinity_type == affinity_none);
+            KMP_ASSERT(address2os == NULL);
+            return;
+        }
+        if (depth < 0) {
+            KMP_ASSERT(msg_id != kmp_i18n_null);
+            KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
+        }
+    }
+
+# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+    else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
+        const char *filename;
+        if (__kmp_cpuinfo_file != NULL) {
+            filename = __kmp_cpuinfo_file;
+        }
+        else {
+            filename = "/proc/cpuinfo";
+        }
+
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
+        }
+
+        FILE *f = fopen(filename, "r");
+        if (f == NULL) {
+            int code = errno;
+            if (__kmp_cpuinfo_file != NULL) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG(CantOpenFileForReading, filename),
+                    KMP_ERR(code),
+                    KMP_HNT(NameComesFrom_CPUINFO_FILE),
+                    __kmp_msg_null
+                );
+            }
+            else {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG(CantOpenFileForReading, filename),
+                    KMP_ERR(code),
+                    __kmp_msg_null
+                );
+            }
+        }
+        int line = 0;
+        depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
+        fclose(f);
+        if (depth < 0) {
+            KMP_ASSERT(msg_id != kmp_i18n_null);
+            if (line > 0) {
+                KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
+            }
+            else {
+                KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
+            }
+        }
+        if (__kmp_affinity_type == affinity_none) {
+            KMP_ASSERT(depth == 0);
+            KMP_ASSERT(address2os == NULL);
+            return;
+        }
+    }
+
+# if KMP_GROUP_AFFINITY
+
+    else if (__kmp_affinity_top_method == affinity_top_method_group) {
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
+        }
+
+        depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
+        KMP_ASSERT(depth != 0);
+        if (depth < 0) {
+            KMP_ASSERT(msg_id != kmp_i18n_null);
+            KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
+        }
+    }
+
+# endif /* KMP_GROUP_AFFINITY */
+
+    else if (__kmp_affinity_top_method == affinity_top_method_flat) {
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
+        }
+
+        depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
+        if (depth == 0) {
+            KMP_ASSERT(__kmp_affinity_type == affinity_none);
+            KMP_ASSERT(address2os == NULL);
+            return;
+        }
+        // should not fail
+        KMP_ASSERT(depth > 0);
+        KMP_ASSERT(address2os != NULL);
+    }
+
+# if KMP_USE_HWLOC
+    else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+        }
+        depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
+        if (depth == 0) {
+            KMP_ASSERT(__kmp_affinity_type == affinity_none);
+            KMP_ASSERT(address2os == NULL);
+            return;
+        }
+#  if KMP_DEBUG
+        AddrUnsPair *otheraddress2os = NULL;
+        int otherdepth = -1;
+#   if KMP_MIC
+        otherdepth = __kmp_affinity_create_apicid_map(&otheraddress2os, &msg_id);
+#   else
+        otherdepth = __kmp_affinity_create_x2apicid_map(&otheraddress2os, &msg_id);
+#   endif
+        if(otheraddress2os != NULL && address2os != NULL) {
+            int i;
+            unsigned arent_equal_flag = 0;
+            for(i=0;i<__kmp_avail_proc;i++) {
+                if(otheraddress2os[i] != address2os[i]) arent_equal_flag = 1;
+            }
+            if(arent_equal_flag) {
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are different from APICID\n"));
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: APICID Table:\n"));
+                for(i=0;i<__kmp_avail_proc;i++) {
+                    otheraddress2os[i].print(); __kmp_printf("\n");
+                }
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc Table:\n"));
+                for(i=0;i<__kmp_avail_proc;i++) {
+                    address2os[i].print(); __kmp_printf("\n");
+                }
+            }
+            else {
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are same as APICID\n"));
+            }
+        }
+#  endif // KMP_DEBUG
+    }
+# endif // KMP_USE_HWLOC
+
+    if (address2os == NULL) {
+        if (KMP_AFFINITY_CAPABLE()
+          && (__kmp_affinity_verbose || (__kmp_affinity_warnings
+          && (__kmp_affinity_type != affinity_none)))) {
+            KMP_WARNING(ErrorInitializeAffinity);
+        }
+        __kmp_affinity_type = affinity_none;
+        KMP_AFFINITY_DISABLE();
+        return;
+    }
+
+    __kmp_apply_thread_places(&address2os, depth);
+
+    //
+    // Create the table of masks, indexed by thread Id.
+    //
+    unsigned maxIndex;
+    unsigned numUnique;
+    kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
+      address2os, __kmp_avail_proc);
+    if (__kmp_affinity_gran_levels == 0) {
+        KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
+    }
+
+    //
+    // Set the childNums vector in all Address objects.  This must be done
+    // before we can sort using __kmp_affinity_cmp_Address_child_num(),
+    // which takes into account the setting of __kmp_affinity_compact.
+    //
+    __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
+
+    switch (__kmp_affinity_type) {
+
+        case affinity_explicit:
+        KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
+# if OMP_40_ENABLED
+        if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
+# endif
+        {
+            __kmp_affinity_process_proclist(&__kmp_affinity_masks,
+              &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
+              maxIndex);
+        }
+# if OMP_40_ENABLED
+        else {
+            __kmp_affinity_process_placelist(&__kmp_affinity_masks,
+              &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
+              maxIndex);
+        }
+# endif
+        if (__kmp_affinity_num_masks == 0) {
+            if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+              && (__kmp_affinity_type != affinity_none))) {
+                KMP_WARNING(AffNoValidProcID);
+            }
+            __kmp_affinity_type = affinity_none;
+            return;
+        }
+        break;
+
+        //
+        // The other affinity types rely on sorting the Addresses according
+        // to some permutation of the machine topology tree.  Set
+        // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
+        // then jump to a common code fragment to do the sort and create
+        // the array of affinity masks.
+        //
+
+        case affinity_logical:
+        __kmp_affinity_compact = 0;
+        if (__kmp_affinity_offset) {
+            __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
+              % __kmp_avail_proc;
+        }
+        goto sortAddresses;
+
+        case affinity_physical:
+        if (__kmp_nThreadsPerCore > 1) {
+            __kmp_affinity_compact = 1;
+            if (__kmp_affinity_compact >= depth) {
+                __kmp_affinity_compact = 0;
+            }
+        } else {
+            __kmp_affinity_compact = 0;
+        }
+        if (__kmp_affinity_offset) {
+            __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
+              % __kmp_avail_proc;
+        }
+        goto sortAddresses;
+
+        case affinity_scatter:
+        if (__kmp_affinity_compact >= depth) {
+            __kmp_affinity_compact = 0;
+        }
+        else {
+            __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
+        }
+        goto sortAddresses;
+
+        case affinity_compact:
+        if (__kmp_affinity_compact >= depth) {
+            __kmp_affinity_compact = depth - 1;
+        }
+        goto sortAddresses;
+
+        case affinity_balanced:
+        // Balanced works only for the case of a single package
+        if( nPackages > 1 ) {
+            if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
+                KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
+            }
+            __kmp_affinity_type = affinity_none;
+            return;
+        } else if( __kmp_affinity_uniform_topology() ) {
+            break;
+        } else { // Non-uniform topology
+
+            // Save the depth for further usage
+            __kmp_aff_depth = depth;
+
+            // Number of hyper threads per core in HT machine
+            int nth_per_core = __kmp_nThreadsPerCore;
+
+            int core_level;
+            if( nth_per_core > 1 ) {
+                core_level = depth - 2;
+            } else {
+                core_level = depth - 1;
+            }
+            int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
+            int nproc = nth_per_core * ncores;
+
+            procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
+            for( int i = 0; i < nproc; i++ ) {
+                procarr[ i ] = -1;
+            }
+
+            for( int i = 0; i < __kmp_avail_proc; i++ ) {
+                int proc = address2os[ i ].second;
+                // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
+                // If there is only one thread per core then depth == 2: level 0 - package,
+                // level 1 - core.
+                int level = depth - 1;
+
+                // __kmp_nth_per_core == 1
+                int thread = 0;
+                int core = address2os[ i ].first.labels[ level ];
+                // If the thread level exists, that is we have more than one thread context per core
+                if( nth_per_core > 1 ) {
+                    thread = address2os[ i ].first.labels[ level ] % nth_per_core;
+                    core = address2os[ i ].first.labels[ level - 1 ];
+                }
+                procarr[ core * nth_per_core + thread ] = proc;
+            }
+
+            break;
+        }
+
+        sortAddresses:
+        //
+        // Allocate the gtid->affinity mask table.
+        //
+        if (__kmp_affinity_dups) {
+            __kmp_affinity_num_masks = __kmp_avail_proc;
+        }
+        else {
+            __kmp_affinity_num_masks = numUnique;
+        }
+
+# if OMP_40_ENABLED
+        if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
+          && ( __kmp_affinity_num_places > 0 )
+          && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
+            __kmp_affinity_num_masks = __kmp_affinity_num_places;
+        }
+# endif
+
+        KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
+
+        //
+        // Sort the address2os table according to the current setting of
+        // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
+        //
+        qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
+          __kmp_affinity_cmp_Address_child_num);
+        {
+            int i;
+            unsigned j;
+            for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
+                if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
+                    continue;
+                }
+                unsigned osId = address2os[i].second;
+                kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
+                kmp_affin_mask_t *dest
+                  = KMP_CPU_INDEX(__kmp_affinity_masks, j);
+                KMP_ASSERT(KMP_CPU_ISSET(osId, src));
+                KMP_CPU_COPY(dest, src);
+                if (++j >= __kmp_affinity_num_masks) {
+                    break;
+                }
+            }
+            KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
+        }
+        break;
+
+        default:
+        KMP_ASSERT2(0, "Unexpected affinity setting");
+    }
+
+    __kmp_free(osId2Mask);
+    machine_hierarchy.init(address2os, __kmp_avail_proc);
+}
+
+
+void
+__kmp_affinity_initialize(void)
+{
+    //
+    // Much of the code above was written assumming that if a machine was not
+    // affinity capable, then __kmp_affinity_type == affinity_none.  We now
+    // explicitly represent this as __kmp_affinity_type == affinity_disabled.
+    //
+    // There are too many checks for __kmp_affinity_type == affinity_none
+    // in this code.  Instead of trying to change them all, check if
+    // __kmp_affinity_type == affinity_disabled, and if so, slam it with
+    // affinity_none, call the real initialization routine, then restore
+    // __kmp_affinity_type to affinity_disabled.
+    //
+    int disabled = (__kmp_affinity_type == affinity_disabled);
+    if (! KMP_AFFINITY_CAPABLE()) {
+        KMP_ASSERT(disabled);
+    }
+    if (disabled) {
+        __kmp_affinity_type = affinity_none;
+    }
+    __kmp_aux_affinity_initialize();
+    if (disabled) {
+        __kmp_affinity_type = affinity_disabled;
+    }
+}
+
+
+void
+__kmp_affinity_uninitialize(void)
+{
+    if (__kmp_affinity_masks != NULL) {
+        KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
+        __kmp_affinity_masks = NULL;
+    }
+    if (fullMask != NULL) {
+        KMP_CPU_FREE(fullMask);
+        fullMask = NULL;
+    }
+    __kmp_affinity_num_masks = 0;
+# if OMP_40_ENABLED
+    __kmp_affinity_num_places = 0;
+# endif
+    if (__kmp_affinity_proclist != NULL) {
+        __kmp_free(__kmp_affinity_proclist);
+        __kmp_affinity_proclist = NULL;
+    }
+    if( address2os != NULL ) {
+        __kmp_free( address2os );
+        address2os = NULL;
+    }
+    if( procarr != NULL ) {
+        __kmp_free( procarr );
+        procarr = NULL;
+    }
+}
+
+
+void
+__kmp_affinity_set_init_mask(int gtid, int isa_root)
+{
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return;
+    }
+
+    kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
+    if (th->th.th_affin_mask == NULL) {
+        KMP_CPU_ALLOC(th->th.th_affin_mask);
+    }
+    else {
+        KMP_CPU_ZERO(th->th.th_affin_mask);
+    }
+
+    //
+    // Copy the thread mask to the kmp_info_t strucuture.
+    // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
+    // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
+    // is set, then the full mask is the same as the mask of the initialization
+    // thread.
+    //
+    kmp_affin_mask_t *mask;
+    int i;
+
+# if OMP_40_ENABLED
+    if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
+# endif
+    {
+        if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
+          ) {
+# if KMP_GROUP_AFFINITY
+            if (__kmp_num_proc_groups > 1) {
+                return;
+            }
+# endif
+            KMP_ASSERT(fullMask != NULL);
+            i = KMP_PLACE_ALL;
+            mask = fullMask;
+        }
+        else {
+            KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
+            i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
+            mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
+        }
+    }
+# if OMP_40_ENABLED
+    else {
+        if ((! isa_root)
+          || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
+#  if KMP_GROUP_AFFINITY
+            if (__kmp_num_proc_groups > 1) {
+                return;
+            }
+#  endif
+            KMP_ASSERT(fullMask != NULL);
+            i = KMP_PLACE_ALL;
+            mask = fullMask;
+        }
+        else {
+            //
+            // int i = some hash function or just a counter that doesn't
+            // always start at 0.  Use gtid for now.
+            //
+            KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
+            i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
+            mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
+        }
+    }
+# endif
+
+# if OMP_40_ENABLED
+    th->th.th_current_place = i;
+    if (isa_root) {
+        th->th.th_new_place = i;
+        th->th.th_first_place = 0;
+        th->th.th_last_place = __kmp_affinity_num_masks - 1;
+    }
+
+    if (i == KMP_PLACE_ALL) {
+        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
+          gtid));
+    }
+    else {
+        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
+          gtid, i));
+    }
+# else
+    if (i == -1) {
+        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
+          gtid));
+    }
+    else {
+        KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
+          gtid, i));
+    }
+# endif /* OMP_40_ENABLED */
+
+    KMP_CPU_COPY(th->th.th_affin_mask, mask);
+
+    if (__kmp_affinity_verbose) {
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          th->th.th_affin_mask);
+        KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
+          buf);
+    }
+
+# if KMP_OS_WINDOWS
+    //
+    // On Windows* OS, the process affinity mask might have changed.
+    // If the user didn't request affinity and this call fails,
+    // just continue silently.  See CQ171393.
+    //
+    if ( __kmp_affinity_type == affinity_none ) {
+        __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
+    }
+    else
+# endif
+    __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
+}
+
+
+# if OMP_40_ENABLED
+
+void
+__kmp_affinity_set_place(int gtid)
+{
+    int retval;
+
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return;
+    }
+
+    kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
+
+    KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
+      gtid, th->th.th_new_place, th->th.th_current_place));
+
+    //
+    // Check that the new place is within this thread's partition.
+    //
+    KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+    KMP_ASSERT(th->th.th_new_place >= 0);
+    KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
+    if (th->th.th_first_place <= th->th.th_last_place) {
+        KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
+         && (th->th.th_new_place <= th->th.th_last_place));
+    }
+    else {
+        KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
+         || (th->th.th_new_place >= th->th.th_last_place));
+    }
+
+    //
+    // Copy the thread mask to the kmp_info_t strucuture,
+    // and set this thread's affinity.
+    //
+    kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
+      th->th.th_new_place);
+    KMP_CPU_COPY(th->th.th_affin_mask, mask);
+    th->th.th_current_place = th->th.th_new_place;
+
+    if (__kmp_affinity_verbose) {
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          th->th.th_affin_mask);
+        KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
+          gtid, buf);
+    }
+    __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
+}
+
+# endif /* OMP_40_ENABLED */
+
+
+int
+__kmp_aux_set_affinity(void **mask)
+{
+    int gtid;
+    kmp_info_t *th;
+    int retval;
+
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return -1;
+    }
+
+    gtid = __kmp_entry_gtid();
+    KA_TRACE(1000, ;{
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          (kmp_affin_mask_t *)(*mask));
+        __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
+          gtid, buf);
+    });
+
+    if (__kmp_env_consistency_check) {
+        if ((mask == NULL) || (*mask == NULL)) {
+            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+        }
+        else {
+            unsigned proc;
+            int num_procs = 0;
+
+            KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
+                if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
+                    continue;
+                }
+                num_procs++;
+                if (! KMP_CPU_ISSET(proc, fullMask)) {
+                    KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+                    break;
+                }
+            }
+            if (num_procs == 0) {
+                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+            }
+
+# if KMP_GROUP_AFFINITY
+            if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
+                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+            }
+# endif /* KMP_GROUP_AFFINITY */
+
+        }
+    }
+
+    th = __kmp_threads[gtid];
+    KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+    retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
+    if (retval == 0) {
+        KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
+    }
+
+# if OMP_40_ENABLED
+    th->th.th_current_place = KMP_PLACE_UNDEFINED;
+    th->th.th_new_place = KMP_PLACE_UNDEFINED;
+    th->th.th_first_place = 0;
+    th->th.th_last_place = __kmp_affinity_num_masks - 1;
+
+    //
+    // Turn off 4.0 affinity for the current tread at this parallel level.
+    //
+    th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
+# endif
+
+    return retval;
+}
+
+
+int
+__kmp_aux_get_affinity(void **mask)
+{
+    int gtid;
+    int retval;
+    kmp_info_t *th;
+
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return -1;
+    }
+
+    gtid = __kmp_entry_gtid();
+    th = __kmp_threads[gtid];
+    KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+
+    KA_TRACE(1000, ;{
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          th->th.th_affin_mask);
+        __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
+    });
+
+    if (__kmp_env_consistency_check) {
+        if ((mask == NULL) || (*mask == NULL)) {
+            KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
+        }
+    }
+
+# if !KMP_OS_WINDOWS
+
+    retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
+    KA_TRACE(1000, ;{
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          (kmp_affin_mask_t *)(*mask));
+        __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
+    });
+    return retval;
+
+# else
+
+    KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
+    return 0;
+
+# endif /* KMP_OS_WINDOWS */
+
+}
+
+int
+__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
+{
+    int retval;
+
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return -1;
+    }
+
+    KA_TRACE(1000, ;{
+        int gtid = __kmp_entry_gtid();
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          (kmp_affin_mask_t *)(*mask));
+        __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
+          proc, gtid, buf);
+    });
+
+    if (__kmp_env_consistency_check) {
+        if ((mask == NULL) || (*mask == NULL)) {
+            KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
+        }
+    }
+
+    if ((proc < 0)
+# if !KMP_USE_HWLOC
+         || ((unsigned)proc >= KMP_CPU_SETSIZE)
+# endif
+       ) {
+        return -1;
+    }
+    if (! KMP_CPU_ISSET(proc, fullMask)) {
+        return -2;
+    }
+
+    KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
+    return 0;
+}
+
+
+int
+__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
+{
+    int retval;
+
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return -1;
+    }
+
+    KA_TRACE(1000, ;{
+        int gtid = __kmp_entry_gtid();
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          (kmp_affin_mask_t *)(*mask));
+        __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
+          proc, gtid, buf);
+    });
+
+    if (__kmp_env_consistency_check) {
+        if ((mask == NULL) || (*mask == NULL)) {
+            KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
+        }
+    }
+
+    if ((proc < 0)
+# if !KMP_USE_HWLOC
+         || ((unsigned)proc >= KMP_CPU_SETSIZE)
+# endif
+       ) {
+        return -1;
+    }
+    if (! KMP_CPU_ISSET(proc, fullMask)) {
+        return -2;
+    }
+
+    KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
+    return 0;
+}
+
+
+int
+__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
+{
+    int retval;
+
+    if (! KMP_AFFINITY_CAPABLE()) {
+        return -1;
+    }
+
+    KA_TRACE(1000, ;{
+        int gtid = __kmp_entry_gtid();
+        char buf[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+          (kmp_affin_mask_t *)(*mask));
+        __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
+          proc, gtid, buf);
+    });
+
+    if (__kmp_env_consistency_check) {
+        if ((mask == NULL) || (*mask == NULL)) {
+            KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
+        }
+    }
+
+    if ((proc < 0)
+# if !KMP_USE_HWLOC
+         || ((unsigned)proc >= KMP_CPU_SETSIZE)
+# endif
+       ) {
+        return -1;
+    }
+    if (! KMP_CPU_ISSET(proc, fullMask)) {
+        return 0;
+    }
+
+    return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
+}
+
+
+// Dynamic affinity settings - Affinity balanced
+void __kmp_balanced_affinity( int tid, int nthreads )
+{
+    if( __kmp_affinity_uniform_topology() ) {
+        int coreID;
+        int threadID;
+        // Number of hyper threads per core in HT machine
+        int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
+        // Number of cores
+        int ncores = __kmp_ncores;
+        // How many threads will be bound to each core
+        int chunk = nthreads / ncores;
+        // How many cores will have an additional thread bound to it - "big cores"
+        int big_cores = nthreads % ncores;
+        // Number of threads on the big cores
+        int big_nth = ( chunk + 1 ) * big_cores;
+        if( tid < big_nth ) {
+            coreID = tid / (chunk + 1 );
+            threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
+        } else { //tid >= big_nth
+            coreID = ( tid - big_cores ) / chunk;
+            threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
+        }
+
+        KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
+          "Illegal set affinity operation when not capable");
+
+        kmp_affin_mask_t *mask;
+        KMP_CPU_ALLOC_ON_STACK(mask);
+        KMP_CPU_ZERO(mask);
+
+        // Granularity == thread
+        if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
+            int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
+            KMP_CPU_SET( osID, mask);
+        } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
+            for( int i = 0; i < __kmp_nth_per_core; i++ ) {
+                int osID;
+                osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
+                KMP_CPU_SET( osID, mask);
+            }
+        }
+        if (__kmp_affinity_verbose) {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
+            KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+              tid, buf);
+        }
+        __kmp_set_system_affinity( mask, TRUE );
+        KMP_CPU_FREE_FROM_STACK(mask);
+    } else { // Non-uniform topology
+
+        kmp_affin_mask_t *mask;
+        KMP_CPU_ALLOC_ON_STACK(mask);
+        KMP_CPU_ZERO(mask);
+
+        // Number of hyper threads per core in HT machine
+        int nth_per_core = __kmp_nThreadsPerCore;
+        int core_level;
+        if( nth_per_core > 1 ) {
+            core_level = __kmp_aff_depth - 2;
+        } else {
+            core_level = __kmp_aff_depth - 1;
+        }
+
+        // Number of cores - maximum value; it does not count trail cores with 0 processors
+        int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
+
+        // For performance gain consider the special case nthreads == __kmp_avail_proc
+        if( nthreads == __kmp_avail_proc ) {
+            if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
+                int osID = address2os[ tid ].second;
+                KMP_CPU_SET( osID, mask);
+            } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
+                int coreID = address2os[ tid ].first.labels[ core_level ];
+                // We'll count found osIDs for the current core; they can be not more than nth_per_core;
+                // since the address2os is sortied we can break when cnt==nth_per_core
+                int cnt = 0;
+                for( int i = 0; i < __kmp_avail_proc; i++ ) {
+                    int osID = address2os[ i ].second;
+                    int core = address2os[ i ].first.labels[ core_level ];
+                    if( core == coreID ) {
+                        KMP_CPU_SET( osID, mask);
+                        cnt++;
+                        if( cnt == nth_per_core ) {
+                            break;
+                        }
+                    }
+                }
+            }
+        } else if( nthreads <= __kmp_ncores ) {
+
+            int core = 0;
+            for( int i = 0; i < ncores; i++ ) {
+                // Check if this core from procarr[] is in the mask
+                int in_mask = 0;
+                for( int j = 0; j < nth_per_core; j++ ) {
+                    if( procarr[ i * nth_per_core + j ] != - 1 ) {
+                        in_mask = 1;
+                        break;
+                    }
+                }
+                if( in_mask ) {
+                    if( tid == core ) {
+                        for( int j = 0; j < nth_per_core; j++ ) {
+                            int osID = procarr[ i * nth_per_core + j ];
+                            if( osID != -1 ) {
+                                KMP_CPU_SET( osID, mask );
+                                // For granularity=thread it is enough to set the first available osID for this core
+                                if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
+                                    break;
+                                }
+                            }
+                        }
+                        break;
+                    } else {
+                        core++;
+                    }
+                }
+            }
+
+        } else { // nthreads > __kmp_ncores
+
+            // Array to save the number of processors at each core
+            int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
+            // Array to save the number of cores with "x" available processors;
+            int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
+            // Array to save the number of cores with # procs from x to nth_per_core
+            int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
+
+            for( int i = 0; i <= nth_per_core; i++ ) {
+                ncores_with_x_procs[ i ] = 0;
+                ncores_with_x_to_max_procs[ i ] = 0;
+            }
+
+            for( int i = 0; i < ncores; i++ ) {
+                int cnt = 0;
+                for( int j = 0; j < nth_per_core; j++ ) {
+                    if( procarr[ i * nth_per_core + j ] != -1 ) {
+                        cnt++;
+                    }
+                }
+                nproc_at_core[ i ] = cnt;
+                ncores_with_x_procs[ cnt ]++;
+            }
+
+            for( int i = 0; i <= nth_per_core; i++ ) {
+                for( int j = i; j <= nth_per_core; j++ ) {
+                    ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
+                }
+            }
+
+            // Max number of processors
+            int nproc = nth_per_core * ncores;
+            // An array to keep number of threads per each context
+            int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
+            for( int i = 0; i < nproc; i++ ) {
+                newarr[ i ] = 0;
+            }
+
+            int nth = nthreads;
+            int flag = 0;
+            while( nth > 0 ) {
+                for( int j = 1; j <= nth_per_core; j++ ) {
+                    int cnt = ncores_with_x_to_max_procs[ j ];
+                    for( int i = 0; i < ncores; i++ ) {
+                        // Skip the core with 0 processors
+                        if( nproc_at_core[ i ] == 0 ) {
+                            continue;
+                        }
+                        for( int k = 0; k < nth_per_core; k++ ) {
+                            if( procarr[ i * nth_per_core + k ] != -1 ) {
+                                if( newarr[ i * nth_per_core + k ] == 0 ) {
+                                    newarr[ i * nth_per_core + k ] = 1;
+                                    cnt--;
+                                    nth--;
+                                    break;
+                                } else {
+                                    if( flag != 0 ) {
+                                        newarr[ i * nth_per_core + k ] ++;
+                                        cnt--;
+                                        nth--;
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                        if( cnt == 0 || nth == 0 ) {
+                            break;
+                        }
+                    }
+                    if( nth == 0 ) {
+                        break;
+                    }
+                }
+                flag = 1;
+            }
+            int sum = 0;
+            for( int i = 0; i < nproc; i++ ) {
+                sum += newarr[ i ];
+                if( sum > tid ) {
+                    // Granularity == thread
+                    if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
+                        int osID = procarr[ i ];
+                        KMP_CPU_SET( osID, mask);
+                    } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
+                        int coreID = i / nth_per_core;
+                        for( int ii = 0; ii < nth_per_core; ii++ ) {
+                            int osID = procarr[ coreID * nth_per_core + ii ];
+                            if( osID != -1 ) {
+                                KMP_CPU_SET( osID, mask);
+                            }
+                        }
+                    }
+                    break;
+                }
+            }
+            __kmp_free( newarr );
+        }
+
+        if (__kmp_affinity_verbose) {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
+            KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+              tid, buf);
+        }
+        __kmp_set_system_affinity( mask, TRUE );
+        KMP_CPU_FREE_FROM_STACK(mask);
+    }
+}
+
+#endif // KMP_AFFINITY_SUPPORTED
diff --git a/contrib/libs/cxxsupp/openmp/kmp_affinity.h b/contrib/libs/cxxsupp/openmp/kmp_affinity.h
index c4d08e3a35..4ff6dbaac6 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_affinity.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_affinity.h
@@ -1,319 +1,319 @@
-/* 
- * kmp_affinity.h -- header for affinity management 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef KMP_AFFINITY_H 
-#define KMP_AFFINITY_H 
- 
-extern int __kmp_affinity_compact; /* Affinity 'compact' value */ 
- 
-class Address { 
-public: 
-    static const unsigned maxDepth = 32; 
-    unsigned labels[maxDepth]; 
-    unsigned childNums[maxDepth]; 
-    unsigned depth; 
-    unsigned leader; 
-    Address(unsigned _depth) 
-      : depth(_depth), leader(FALSE) { 
-    } 
-    Address &operator=(const Address &b) { 
-        depth = b.depth; 
-        for (unsigned i = 0; i < depth; i++) { 
-            labels[i] = b.labels[i]; 
-            childNums[i] = b.childNums[i]; 
-        } 
-        leader = FALSE; 
-        return *this; 
-    } 
-    bool operator==(const Address &b) const { 
-        if (depth != b.depth) 
-            return false; 
-        for (unsigned i = 0; i < depth; i++) 
-            if(labels[i] != b.labels[i]) 
-                return false; 
-        return true; 
-    } 
-    bool isClose(const Address &b, int level) const { 
-        if (depth != b.depth) 
-            return false; 
-        if ((unsigned)level >= depth) 
-            return true; 
-        for (unsigned i = 0; i < (depth - level); i++) 
-            if(labels[i] != b.labels[i]) 
-                return false; 
-        return true; 
-    } 
-    bool operator!=(const Address &b) const { 
-        return !operator==(b); 
-    } 
-    void print() const { 
-        unsigned i; 
-        printf("Depth: %u --- ", depth); 
-        for(i=0;i<depth;i++) { 
-            printf("%u ", labels[i]); 
-        } 
-    } 
-}; 
- 
-class AddrUnsPair { 
-public: 
-    Address first; 
-    unsigned second; 
-    AddrUnsPair(Address _first, unsigned _second) 
-      : first(_first), second(_second) { 
-    } 
-    AddrUnsPair &operator=(const AddrUnsPair &b) 
-    { 
-        first = b.first; 
-        second = b.second; 
-        return *this; 
-    } 
-    void print() const { 
-        printf("first = "); first.print(); 
-        printf(" --- second = %u", second); 
-    } 
-    bool operator==(const AddrUnsPair &b) const { 
-        if(first != b.first) return false; 
-        if(second != b.second) return false; 
-        return true; 
-    } 
-    bool operator!=(const AddrUnsPair &b) const { 
-        return !operator==(b); 
-    } 
-}; 
- 
- 
-static int 
-__kmp_affinity_cmp_Address_labels(const void *a, const void *b) 
-{ 
-    const Address *aa = (const Address *)&(((AddrUnsPair *)a) 
-      ->first); 
-    const Address *bb = (const Address *)&(((AddrUnsPair *)b) 
-      ->first); 
-    unsigned depth = aa->depth; 
-    unsigned i; 
-    KMP_DEBUG_ASSERT(depth == bb->depth); 
-    for (i  = 0; i < depth; i++) { 
-        if (aa->labels[i] < bb->labels[i]) return -1; 
-        if (aa->labels[i] > bb->labels[i]) return 1; 
-    } 
-    return 0; 
-} 
- 
-#if KMP_AFFINITY_SUPPORTED 
-static int 
-__kmp_affinity_cmp_Address_child_num(const void *a, const void *b) 
-{ 
-    const Address *aa = (const Address *)&(((AddrUnsPair *)a) 
-      ->first); 
-    const Address *bb = (const Address *)&(((AddrUnsPair *)b) 
-      ->first); 
-    unsigned depth = aa->depth; 
-    unsigned i; 
-    KMP_DEBUG_ASSERT(depth == bb->depth); 
-    KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); 
-    KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); 
-    for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { 
-        int j = depth - i - 1; 
-        if (aa->childNums[j] < bb->childNums[j]) return -1; 
-        if (aa->childNums[j] > bb->childNums[j]) return 1; 
-    } 
-    for (; i < depth; i++) { 
-        int j = i - __kmp_affinity_compact; 
-        if (aa->childNums[j] < bb->childNums[j]) return -1; 
-        if (aa->childNums[j] > bb->childNums[j]) return 1; 
-    } 
-    return 0; 
-} 
-#endif 
- 
-/** A structure for holding machine-specific hierarchy info to be computed once at init. 
-    This structure represents a mapping of threads to the actual machine hierarchy, or to 
-    our best guess at what the hierarchy might be, for the purpose of performing an 
-    efficient barrier.  In the worst case, when there is no machine hierarchy information, 
-    it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ 
-class hierarchy_info { 
-public: 
-    /** Good default values for number of leaves and branching factor, given no affinity information. 
-	Behaves a bit like hyper barrier. */ 
-    static const kmp_uint32 maxLeaves=4; 
-    static const kmp_uint32 minBranch=4; 
-    /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package 
-	or socket, packages/node, nodes/machine, etc.  We don't want to get specific with 
-	nomenclature.  When the machine is oversubscribed we add levels to duplicate the 
-	hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ 
-    kmp_uint32 maxLevels; 
- 
-    /** This is specifically the depth of the machine configuration hierarchy, in terms of the 
-        number of levels along the longest path from root to any leaf. It corresponds to the 
-        number of entries in numPerLevel if we exclude all but one trailing 1. */ 
-    kmp_uint32 depth; 
-    kmp_uint32 base_num_threads; 
-    enum init_status { initialized=0, not_initialized=1, initializing=2 }; 
-    volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress 
-    volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 
- 
-    /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a 
-        node at level i has. For example, if we have a machine with 4 packages, 4 cores/package 
-        and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 
-    kmp_uint32 *numPerLevel; 
-    kmp_uint32 *skipPerLevel; 
- 
-    void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 
-        int hier_depth = adr2os[0].first.depth; 
-        int level = 0; 
-        for (int i=hier_depth-1; i>=0; --i) { 
-            int max = -1; 
-            for (int j=0; j<num_addrs; ++j) { 
-                int next = adr2os[j].first.childNums[i]; 
-                if (next > max) max = next; 
-            } 
-            numPerLevel[level] = max+1; 
-            ++level; 
-        } 
-    } 
- 
-    hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 
- 
-    void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } 
- 
-    void init(AddrUnsPair *adr2os, int num_addrs) 
-    { 
-        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); 
-        if (bool_result == 0) { // Wait for initialization 
-            while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); 
-            return; 
-        } 
-        KMP_DEBUG_ASSERT(bool_result==1); 
- 
-        /* Added explicit initialization of the data fields here to prevent usage of dirty value 
-           observed when static library is re-initialized multiple times (e.g. when 
-           non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ 
-        depth = 1; 
-        resizing = 0; 
-        maxLevels = 7; 
-        numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); 
-        skipPerLevel = &(numPerLevel[maxLevels]); 
-        for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level 
-            numPerLevel[i] = 1; 
-            skipPerLevel[i] = 1; 
-        } 
- 
-        // Sort table by physical ID 
-        if (adr2os) { 
-            qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels); 
-            deriveLevels(adr2os, num_addrs); 
-        } 
-        else { 
-            numPerLevel[0] = maxLeaves; 
-            numPerLevel[1] = num_addrs/maxLeaves; 
-            if (num_addrs%maxLeaves) numPerLevel[1]++; 
-        } 
- 
-        base_num_threads = num_addrs; 
-        for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth 
-            if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 
-                depth++; 
- 
-        kmp_uint32 branch = minBranch; 
-        if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; 
-        if (branch<minBranch) branch=minBranch; 
-        for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width 
-            while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! 
-                if (numPerLevel[d] & 1) numPerLevel[d]++; 
-                numPerLevel[d] = numPerLevel[d] >> 1; 
-                if (numPerLevel[d+1] == 1) depth++; 
-                numPerLevel[d+1] = numPerLevel[d+1] << 1; 
-            } 
-            if(numPerLevel[0] == 1) { 
-                branch = branch >> 1; 
-                if (branch<4) branch = minBranch; 
-            } 
-        } 
- 
-        for (kmp_uint32 i=1; i<depth; ++i) 
-            skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1]; 
-        // Fill in hierarchy in the case of oversubscription 
-        for (kmp_uint32 i=depth; i<maxLevels; ++i) 
-            skipPerLevel[i] = 2*skipPerLevel[i-1]; 
- 
-        uninitialized = initialized; // One writer 
- 
-    } 
- 
-    // Resize the hierarchy if nproc changes to something larger than before 
-    void resize(kmp_uint32 nproc) 
-    { 
-        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 
-        while (bool_result == 0) { // someone else is trying to resize 
-            KMP_CPU_PAUSE(); 
-            if (nproc <= base_num_threads)  // happy with other thread's resize 
-                return; 
-            else // try to resize 
-                bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 
-        } 
-        KMP_DEBUG_ASSERT(bool_result!=0); 
-        if (nproc <= base_num_threads) return; // happy with other thread's resize 
- 
-        // Calculate new maxLevels 
-        kmp_uint32 old_sz = skipPerLevel[depth-1]; 
-        kmp_uint32 incs = 0, old_maxLevels = maxLevels; 
-        // First see if old maxLevels is enough to contain new size 
-        for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) { 
-            skipPerLevel[i] = 2*skipPerLevel[i-1]; 
-            numPerLevel[i-1] *= 2; 
-            old_sz *= 2; 
-            depth++; 
-        } 
-        if (nproc > old_sz) { // Not enough space, need to expand hierarchy 
-            while (nproc > old_sz) { 
-                old_sz *=2; 
-                incs++; 
-                depth++; 
-            } 
-            maxLevels += incs; 
- 
-            // Resize arrays 
-            kmp_uint32 *old_numPerLevel = numPerLevel; 
-            kmp_uint32 *old_skipPerLevel = skipPerLevel; 
-            numPerLevel = skipPerLevel = NULL; 
-            numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); 
-            skipPerLevel = &(numPerLevel[maxLevels]); 
- 
-            // Copy old elements from old arrays 
-            for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level 
-                numPerLevel[i] = old_numPerLevel[i]; 
-                skipPerLevel[i] = old_skipPerLevel[i]; 
-            } 
- 
-            // Init new elements in arrays to 1 
-            for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level 
-                numPerLevel[i] = 1; 
-                skipPerLevel[i] = 1; 
-            } 
- 
-            // Free old arrays 
-            __kmp_free(old_numPerLevel); 
-        } 
- 
-        // Fill in oversubscription levels of hierarchy 
-        for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) 
-            skipPerLevel[i] = 2*skipPerLevel[i-1]; 
- 
-        base_num_threads = nproc; 
-        resizing = 0; // One writer 
- 
-    } 
-}; 
-#endif // KMP_AFFINITY_H 
+/*
+ * kmp_affinity.h -- header for affinity management
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_AFFINITY_H
+#define KMP_AFFINITY_H
+
+extern int __kmp_affinity_compact; /* Affinity 'compact' value */
+
+class Address {
+public:
+    static const unsigned maxDepth = 32;
+    unsigned labels[maxDepth];
+    unsigned childNums[maxDepth];
+    unsigned depth;
+    unsigned leader;
+    Address(unsigned _depth)
+      : depth(_depth), leader(FALSE) {
+    }
+    Address &operator=(const Address &b) {
+        depth = b.depth;
+        for (unsigned i = 0; i < depth; i++) {
+            labels[i] = b.labels[i];
+            childNums[i] = b.childNums[i];
+        }
+        leader = FALSE;
+        return *this;
+    }
+    bool operator==(const Address &b) const {
+        if (depth != b.depth)
+            return false;
+        for (unsigned i = 0; i < depth; i++)
+            if(labels[i] != b.labels[i])
+                return false;
+        return true;
+    }
+    bool isClose(const Address &b, int level) const {
+        if (depth != b.depth)
+            return false;
+        if ((unsigned)level >= depth)
+            return true;
+        for (unsigned i = 0; i < (depth - level); i++)
+            if(labels[i] != b.labels[i])
+                return false;
+        return true;
+    }
+    bool operator!=(const Address &b) const {
+        return !operator==(b);
+    }
+    void print() const {
+        unsigned i;
+        printf("Depth: %u --- ", depth);
+        for(i=0;i<depth;i++) {
+            printf("%u ", labels[i]);
+        }
+    }
+};
+
+class AddrUnsPair {
+public:
+    Address first;
+    unsigned second;
+    AddrUnsPair(Address _first, unsigned _second)
+      : first(_first), second(_second) {
+    }
+    AddrUnsPair &operator=(const AddrUnsPair &b)
+    {
+        first = b.first;
+        second = b.second;
+        return *this;
+    }
+    void print() const {
+        printf("first = "); first.print();
+        printf(" --- second = %u", second);
+    }
+    bool operator==(const AddrUnsPair &b) const {
+        if(first != b.first) return false;
+        if(second != b.second) return false;
+        return true;
+    }
+    bool operator!=(const AddrUnsPair &b) const {
+        return !operator==(b);
+    }
+};
+
+
+static int
+__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
+{
+    const Address *aa = (const Address *)&(((AddrUnsPair *)a)
+      ->first);
+    const Address *bb = (const Address *)&(((AddrUnsPair *)b)
+      ->first);
+    unsigned depth = aa->depth;
+    unsigned i;
+    KMP_DEBUG_ASSERT(depth == bb->depth);
+    for (i  = 0; i < depth; i++) {
+        if (aa->labels[i] < bb->labels[i]) return -1;
+        if (aa->labels[i] > bb->labels[i]) return 1;
+    }
+    return 0;
+}
+
+#if KMP_AFFINITY_SUPPORTED
+static int
+__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
+{
+    const Address *aa = (const Address *)&(((AddrUnsPair *)a)
+      ->first);
+    const Address *bb = (const Address *)&(((AddrUnsPair *)b)
+      ->first);
+    unsigned depth = aa->depth;
+    unsigned i;
+    KMP_DEBUG_ASSERT(depth == bb->depth);
+    KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
+    KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
+    for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
+        int j = depth - i - 1;
+        if (aa->childNums[j] < bb->childNums[j]) return -1;
+        if (aa->childNums[j] > bb->childNums[j]) return 1;
+    }
+    for (; i < depth; i++) {
+        int j = i - __kmp_affinity_compact;
+        if (aa->childNums[j] < bb->childNums[j]) return -1;
+        if (aa->childNums[j] > bb->childNums[j]) return 1;
+    }
+    return 0;
+}
+#endif
+
+/** A structure for holding machine-specific hierarchy info to be computed once at init.
+    This structure represents a mapping of threads to the actual machine hierarchy, or to
+    our best guess at what the hierarchy might be, for the purpose of performing an
+    efficient barrier.  In the worst case, when there is no machine hierarchy information,
+    it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
+class hierarchy_info {
+public:
+    /** Good default values for number of leaves and branching factor, given no affinity information.
+	Behaves a bit like hyper barrier. */
+    static const kmp_uint32 maxLeaves=4;
+    static const kmp_uint32 minBranch=4;
+    /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
+	or socket, packages/node, nodes/machine, etc.  We don't want to get specific with
+	nomenclature.  When the machine is oversubscribed we add levels to duplicate the
+	hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
+    kmp_uint32 maxLevels;
+
+    /** This is specifically the depth of the machine configuration hierarchy, in terms of the
+        number of levels along the longest path from root to any leaf. It corresponds to the
+        number of entries in numPerLevel if we exclude all but one trailing 1. */
+    kmp_uint32 depth;
+    kmp_uint32 base_num_threads;
+    enum init_status { initialized=0, not_initialized=1, initializing=2 };
+    volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
+    volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
+
+    /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
+        node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
+        and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
+    kmp_uint32 *numPerLevel;
+    kmp_uint32 *skipPerLevel;
+
+    void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
+        int hier_depth = adr2os[0].first.depth;
+        int level = 0;
+        for (int i=hier_depth-1; i>=0; --i) {
+            int max = -1;
+            for (int j=0; j<num_addrs; ++j) {
+                int next = adr2os[j].first.childNums[i];
+                if (next > max) max = next;
+            }
+            numPerLevel[level] = max+1;
+            ++level;
+        }
+    }
+
+    hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
+
+    void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
+
+    void init(AddrUnsPair *adr2os, int num_addrs)
+    {
+        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
+        if (bool_result == 0) { // Wait for initialization
+            while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
+            return;
+        }
+        KMP_DEBUG_ASSERT(bool_result==1);
+
+        /* Added explicit initialization of the data fields here to prevent usage of dirty value
+           observed when static library is re-initialized multiple times (e.g. when
+           non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
+        depth = 1;
+        resizing = 0;
+        maxLevels = 7;
+        numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+        skipPerLevel = &(numPerLevel[maxLevels]);
+        for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+            numPerLevel[i] = 1;
+            skipPerLevel[i] = 1;
+        }
+
+        // Sort table by physical ID
+        if (adr2os) {
+            qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
+            deriveLevels(adr2os, num_addrs);
+        }
+        else {
+            numPerLevel[0] = maxLeaves;
+            numPerLevel[1] = num_addrs/maxLeaves;
+            if (num_addrs%maxLeaves) numPerLevel[1]++;
+        }
+
+        base_num_threads = num_addrs;
+        for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
+            if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
+                depth++;
+
+        kmp_uint32 branch = minBranch;
+        if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
+        if (branch<minBranch) branch=minBranch;
+        for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
+            while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
+                if (numPerLevel[d] & 1) numPerLevel[d]++;
+                numPerLevel[d] = numPerLevel[d] >> 1;
+                if (numPerLevel[d+1] == 1) depth++;
+                numPerLevel[d+1] = numPerLevel[d+1] << 1;
+            }
+            if(numPerLevel[0] == 1) {
+                branch = branch >> 1;
+                if (branch<4) branch = minBranch;
+            }
+        }
+
+        for (kmp_uint32 i=1; i<depth; ++i)
+            skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
+        // Fill in hierarchy in the case of oversubscription
+        for (kmp_uint32 i=depth; i<maxLevels; ++i)
+            skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+        uninitialized = initialized; // One writer
+
+    }
+
+    // Resize the hierarchy if nproc changes to something larger than before
+    void resize(kmp_uint32 nproc)
+    {
+        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+        while (bool_result == 0) { // someone else is trying to resize
+            KMP_CPU_PAUSE();
+            if (nproc <= base_num_threads)  // happy with other thread's resize
+                return;
+            else // try to resize
+                bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+        }
+        KMP_DEBUG_ASSERT(bool_result!=0);
+        if (nproc <= base_num_threads) return; // happy with other thread's resize
+
+        // Calculate new maxLevels
+        kmp_uint32 old_sz = skipPerLevel[depth-1];
+        kmp_uint32 incs = 0, old_maxLevels = maxLevels;
+        // First see if old maxLevels is enough to contain new size
+        for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
+            skipPerLevel[i] = 2*skipPerLevel[i-1];
+            numPerLevel[i-1] *= 2;
+            old_sz *= 2;
+            depth++;
+        }
+        if (nproc > old_sz) { // Not enough space, need to expand hierarchy
+            while (nproc > old_sz) {
+                old_sz *=2;
+                incs++;
+                depth++;
+            }
+            maxLevels += incs;
+
+            // Resize arrays
+            kmp_uint32 *old_numPerLevel = numPerLevel;
+            kmp_uint32 *old_skipPerLevel = skipPerLevel;
+            numPerLevel = skipPerLevel = NULL;
+            numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+            skipPerLevel = &(numPerLevel[maxLevels]);
+
+            // Copy old elements from old arrays
+            for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+                numPerLevel[i] = old_numPerLevel[i];
+                skipPerLevel[i] = old_skipPerLevel[i];
+            }
+
+            // Init new elements in arrays to 1
+            for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+                numPerLevel[i] = 1;
+                skipPerLevel[i] = 1;
+            }
+
+            // Free old arrays
+            __kmp_free(old_numPerLevel);
+        }
+
+        // Fill in oversubscription levels of hierarchy
+        for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
+            skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+        base_num_threads = nproc;
+        resizing = 0; // One writer
+
+    }
+};
+#endif // KMP_AFFINITY_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_alloc.c b/contrib/libs/cxxsupp/openmp/kmp_alloc.c
index db6b6399ed..4e4656c6e8 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_alloc.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_alloc.c
@@ -1,22 +1,22 @@
-/* 
- * kmp_alloc.c -- private/shared dyanmic memory allocation and management 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_wrapper_malloc.h" 
-#include "kmp_io.h" 
- 
+/*
+ * kmp_alloc.c -- private/shared dyanmic memory allocation and management
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_wrapper_malloc.h"
+#include "kmp_io.h"
+
 #ifdef __clang__
 #if __has_feature(address_sanitizer)
 extern "C" { // sanitizers API
@@ -29,2035 +29,2035 @@ void __lsan_ignore_object(const void* p);
 #define __lsan_ignore_object(p)
 #endif
 
-// Disable bget when it is not used 
-#if KMP_USE_BGET 
- 
-/* Thread private buffer management code */ 
- 
-typedef int   (*bget_compact_t)(size_t, int); 
-typedef void *(*bget_acquire_t)(size_t); 
-typedef void  (*bget_release_t)(void *); 
- 
-/* NOTE: bufsize must be a signed datatype */ 
- 
-#if KMP_OS_WINDOWS 
-# if KMP_ARCH_X86 || KMP_ARCH_ARM 
-   typedef kmp_int32 bufsize; 
-# else 
-   typedef kmp_int64 bufsize; 
-# endif 
-#else 
-  typedef ssize_t bufsize; 
-#endif 
- 
-/* The three modes of operation are, fifo search, lifo search, and best-fit */ 
- 
-typedef enum bget_mode { 
-    bget_mode_fifo = 0, 
-    bget_mode_lifo = 1, 
-    bget_mode_best = 2 
-} bget_mode_t; 
- 
- 
-static void    bpool( kmp_info_t *th, void *buffer, bufsize len); 
-static void   *bget( kmp_info_t *th, bufsize size); 
-static void   *bgetz( kmp_info_t *th, bufsize size); 
-static void   *bgetr( kmp_info_t *th, void *buffer, bufsize newsize); 
-static void    brel( kmp_info_t *th, void *buf); 
-static void    bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr ); 
- 
-#ifdef KMP_DEBUG 
-static void    bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel); 
-static void    bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel); 
-static void    bufdump( kmp_info_t *th, void *buf); 
-static void    bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree); 
-static int     bpoolv( kmp_info_t *th, void *pool); 
-#endif 
- 
-/* BGET CONFIGURATION */ 
-                                      /* Buffer allocation size quantum: 
-                                         all buffers allocated are a 
-                                         multiple of this size.  This 
-                                         MUST be a power of two. */ 
- 
-                                      /* On IA-32 architecture with  Linux* OS, 
-                                         malloc() does not 
-                                         ensure 16 byte alignmnent */ 
- 
-#if KMP_ARCH_X86 || !KMP_HAVE_QUAD 
- 
-#define SizeQuant   8 
-#define AlignType   double 
- 
-#else 
- 
-#define SizeQuant   16 
-#define AlignType   _Quad 
- 
-#endif 
- 
-#define BufStats    1                 /* Define this symbol to enable the 
-                                         bstats() function which calculates 
-                                         the total free space in the buffer 
-                                         pool, the largest available 
-                                         buffer, and the total space 
-                                         currently allocated. */ 
- 
-#ifdef KMP_DEBUG 
- 
-#define BufDump     1                 /* Define this symbol to enable the 
-                                         bpoold() function which dumps the 
-                                         buffers in a buffer pool. */ 
- 
-#define BufValid    1                 /* Define this symbol to enable the 
-                                         bpoolv() function for validating 
-                                         a buffer pool. */ 
- 
-#define DumpData    1                 /* Define this symbol to enable the 
-                                         bufdump() function which allows 
-                                         dumping the contents of an allocated 
-                                         or free buffer. */ 
-#ifdef NOT_USED_NOW 
- 
-#define FreeWipe    1                 /* Wipe free buffers to a guaranteed 
-                                         pattern of garbage to trip up 
-                                         miscreants who attempt to use 
-                                         pointers into released buffers. */ 
- 
-#define BestFit     1                 /* Use a best fit algorithm when 
-                                         searching for space for an 
-                                         allocation request.  This uses 
-                                         memory more efficiently, but 
-                                         allocation will be much slower. */ 
-#endif /* NOT_USED_NOW */ 
-#endif /* KMP_DEBUG */ 
- 
- 
-static bufsize bget_bin_size[ ] = { 
-    0, 
-//    1 << 6,    /* .5 Cache line */ 
-    1 << 7,    /* 1 Cache line, new */ 
-    1 << 8,    /* 2 Cache lines */ 
-    1 << 9,    /* 4 Cache lines, new */ 
-    1 << 10,   /* 8 Cache lines */ 
-    1 << 11,   /* 16 Cache lines, new */ 
-    1 << 12, 
-    1 << 13,   /* new */ 
-    1 << 14, 
-    1 << 15,   /* new */ 
-    1 << 16, 
-    1 << 17, 
-    1 << 18, 
-    1 << 19, 
-    1 << 20,    /*  1MB */ 
-    1 << 21,    /*  2MB */ 
-    1 << 22,    /*  4MB */ 
-    1 << 23,    /*  8MB */ 
-    1 << 24,    /* 16MB */ 
-    1 << 25,    /* 32MB */ 
-}; 
- 
-#define MAX_BGET_BINS   (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 
- 
-struct bfhead; 
- 
-/*  Declare the interface, including the requested buffer size type, 
-    bufsize.  */ 
- 
-/* Queue links */ 
- 
-typedef struct qlinks { 
-    struct bfhead *flink;             /* Forward link */ 
-    struct bfhead *blink;             /* Backward link */ 
-} qlinks_t; 
- 
-/* Header in allocated and free buffers */ 
- 
-typedef struct bhead2 { 
-    kmp_info_t *bthr;                 /* The thread which owns the buffer pool */ 
-    bufsize     prevfree;             /* Relative link back to previous 
-                                         free buffer in memory or 0 if 
-                                         previous buffer is allocated.  */ 
-    bufsize     bsize;                /* Buffer size: positive if free, 
-                                         negative if allocated. */ 
-} bhead2_t; 
- 
-/* Make sure the bhead structure is a multiple of SizeQuant in size. */ 
- 
-typedef union bhead { 
-    KMP_ALIGN( SizeQuant ) 
-    AlignType           b_align; 
-    char                b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ]; 
-    bhead2_t            bb; 
-} bhead_t; 
-#define BH(p)   ((bhead_t *) (p)) 
- 
-/*  Header in directly allocated buffers (by acqfcn) */ 
- 
-typedef struct bdhead 
-{ 
-    bufsize tsize;                    /* Total size, including overhead */ 
-    bhead_t bh;                       /* Common header */ 
-} bdhead_t; 
-#define BDH(p)  ((bdhead_t *) (p)) 
- 
-/* Header in free buffers */ 
- 
-typedef struct bfhead { 
-    bhead_t  bh;                      /* Common allocated/free header */ 
-    qlinks_t ql;                      /* Links on free list */ 
-} bfhead_t; 
-#define BFH(p)  ((bfhead_t *) (p)) 
- 
-typedef struct thr_data { 
-    bfhead_t freelist[ MAX_BGET_BINS ]; 
-#if BufStats 
-    size_t totalloc;               /* Total space currently allocated */ 
-    long numget, numrel;           /* Number of bget() and brel() calls */ 
-    long numpblk;                  /* Number of pool blocks */ 
-    long numpget, numprel;         /* Number of block gets and rels */ 
-    long numdget, numdrel;         /* Number of direct gets and rels */ 
-#endif /* BufStats */ 
- 
-    /* Automatic expansion block management functions */ 
-    bget_compact_t compfcn; 
-    bget_acquire_t acqfcn; 
-    bget_release_t relfcn; 
- 
-    bget_mode_t    mode;              /* what allocation mode to use? */ 
- 
-    bufsize exp_incr;                 /* Expansion block size */ 
-    bufsize pool_len;                 /* 0: no bpool calls have been made 
-                                         -1: not all pool blocks are 
-                                             the same size 
-                                         >0: (common) block size for all 
-                                             bpool calls made so far 
-                                      */ 
-    bfhead_t * last_pool;             /* Last pool owned by this thread (delay dealocation) */ 
-} thr_data_t; 
- 
-/*  Minimum allocation quantum: */ 
- 
-#define QLSize  (sizeof(qlinks_t)) 
-#define SizeQ   ((SizeQuant > QLSize) ? SizeQuant : QLSize) 
-#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) ) 
-    // Maximun for the requested size. 
- 
-/* End sentinel: value placed in bsize field of dummy block delimiting 
-   end of pool block.  The most negative number which will  fit  in  a 
-   bufsize, defined in a way that the compiler will accept. */ 
- 
-#define ESent   ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2)) 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Thread Data management routines */ 
- 
-static int 
-bget_get_bin( bufsize size ) 
-{ 
-    // binary chop bins 
-    int lo = 0, hi = MAX_BGET_BINS - 1; 
- 
-    KMP_DEBUG_ASSERT( size > 0 ); 
- 
-    while ( (hi - lo) > 1 ) { 
-        int mid = (lo + hi) >> 1; 
-        if (size < bget_bin_size[ mid ]) 
-            hi = mid - 1; 
-        else 
-            lo = mid; 
-    } 
- 
-    KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) ); 
- 
-    return lo; 
-} 
- 
-static void 
-set_thr_data( kmp_info_t *th ) 
-{ 
-    int i; 
-    thr_data_t *data; 
- 
-    data = 
-        (thr_data_t *)( 
-            ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data 
-        ); 
- 
-    memset( data, '\0', sizeof( *data ) ); 
- 
-    for (i = 0; i < MAX_BGET_BINS; ++i) { 
-        data->freelist[ i ].ql.flink = & data->freelist[ i ]; 
-        data->freelist[ i ].ql.blink = & data->freelist[ i ]; 
-    } 
- 
-    th->th.th_local.bget_data = data; 
-    th->th.th_local.bget_list = 0; 
-#if ! USE_CMP_XCHG_FOR_BGET 
-#ifdef USE_QUEUING_LOCK_FOR_BGET 
-    __kmp_init_lock( & th->th.th_local.bget_lock ); 
-#else 
-    __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock ); 
-#endif /* USE_LOCK_FOR_BGET */ 
-#endif /* ! USE_CMP_XCHG_FOR_BGET */ 
-} 
- 
-static thr_data_t * 
-get_thr_data( kmp_info_t *th ) 
-{ 
-    thr_data_t *data; 
- 
-    data = (thr_data_t *) th->th.th_local.bget_data; 
- 
-    KMP_DEBUG_ASSERT( data != 0 ); 
- 
-    return data; 
-} 
- 
- 
-#ifdef KMP_DEBUG 
- 
-static void 
-__kmp_bget_validate_queue( kmp_info_t *th ) 
-{ 
-    /* NOTE: assume that the global_lock is held */ 
- 
-    void *p = (void *) th->th.th_local.bget_list; 
- 
-    while (p != 0) { 
-        bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); 
- 
-        KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 
-        p = (void *) b->ql.flink; 
-    } 
-} 
- 
-#endif 
- 
-/* Walk the free list and release the enqueued buffers */ 
- 
-static void 
-__kmp_bget_dequeue( kmp_info_t *th ) 
-{ 
-    void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); 
- 
-    if (p != 0) { 
-        #if USE_CMP_XCHG_FOR_BGET 
-            { 
-                volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 
-                while ( ! KMP_COMPARE_AND_STORE_PTR( 
-                    & th->th.th_local.bget_list, old_value, NULL ) ) 
-                { 
-                    KMP_CPU_PAUSE(); 
-                    old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 
-                } 
-                p = (void *) old_value; 
-            } 
-        #else /* ! USE_CMP_XCHG_FOR_BGET */ 
-            #ifdef USE_QUEUING_LOCK_FOR_BGET 
-                __kmp_acquire_lock( & th->th.th_local.bget_lock, 
-                                    __kmp_gtid_from_thread(th) ); 
-            #else 
-                __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); 
-            #endif /* USE_QUEUING_LOCK_FOR_BGET */ 
- 
-             p = (void *) th->th.th_local.bget_list; 
-             th->th.th_local.bget_list = 0; 
- 
-            #ifdef USE_QUEUING_LOCK_FOR_BGET 
-                __kmp_release_lock( & th->th.th_local.bget_lock, 
-                                    __kmp_gtid_from_thread(th) ); 
-            #else 
-                __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); 
-            #endif 
-        #endif /* USE_CMP_XCHG_FOR_BGET */ 
- 
-        /* Check again to make sure the list is not empty */ 
- 
-        while (p != 0) { 
-            void *buf = p; 
-            bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); 
- 
-            KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); 
-            KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == 
-                                (kmp_uintptr_t)th ); // clear possible mark 
-            KMP_DEBUG_ASSERT( b->ql.blink == 0 ); 
- 
-            p = (void *) b->ql.flink; 
- 
-            brel( th, buf ); 
-        } 
-    } 
-} 
- 
-/* Chain together the free buffers by using the thread owner field */ 
- 
-static void 
-__kmp_bget_enqueue( kmp_info_t *th, void *buf 
-#ifdef USE_QUEUING_LOCK_FOR_BGET 
-                    , kmp_int32 rel_gtid 
-#endif 
-                  ) 
-{ 
-    bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t)); 
- 
-    KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); 
-    KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == 
-                        (kmp_uintptr_t)th ); // clear possible mark 
- 
-    b->ql.blink = 0; 
- 
-    KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n", 
-                    __kmp_gtid_from_thread( th ) ) ); 
- 
-#if USE_CMP_XCHG_FOR_BGET 
-    { 
-        volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); 
-        /* the next pointer must be set before setting bget_list to buf to avoid 
-           exposing a broken list to other threads, even for an instant. */ 
-        b->ql.flink = BFH( old_value ); 
- 
-        while ( ! KMP_COMPARE_AND_STORE_PTR( 
-            & th->th.th_local.bget_list, old_value, buf ) ) 
-        { 
-            KMP_CPU_PAUSE(); 
-            old_value = TCR_PTR(th->th.th_local.bget_list); 
-            /* the next pointer must be set before setting bget_list to buf to avoid 
-               exposing a broken list to other threads, even for an instant. */ 
-            b->ql.flink = BFH( old_value ); 
-        } 
-    } 
-#else /* ! USE_CMP_XCHG_FOR_BGET */ 
-# ifdef USE_QUEUING_LOCK_FOR_BGET 
-    __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid ); 
-# else 
-    __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); 
- # endif 
- 
-    b->ql.flink = BFH( th->th.th_local.bget_list ); 
-    th->th.th_local.bget_list = (void *) buf; 
- 
-# ifdef USE_QUEUING_LOCK_FOR_BGET 
-    __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid ); 
-# else 
-    __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); 
-# endif 
-#endif /* USE_CMP_XCHG_FOR_BGET */ 
-} 
- 
-/* insert buffer back onto a new freelist */ 
- 
-static void 
-__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b ) 
-{ 
-    int bin; 
- 
-    KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 ); 
-    KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 ); 
- 
-    bin = bget_get_bin( b->bh.bb.bsize ); 
- 
-    KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]); 
-    KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]); 
- 
-    b->ql.flink = &thr->freelist[ bin ]; 
-    b->ql.blink = thr->freelist[ bin ].ql.blink; 
- 
-    thr->freelist[ bin ].ql.blink = b; 
-    b->ql.blink->ql.flink = b; 
-} 
- 
-/* unlink the buffer from the old freelist */ 
- 
-static void 
-__kmp_bget_remove_from_freelist( bfhead_t *b ) 
-{ 
-    KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 
-    KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 
- 
-    b->ql.blink->ql.flink = b->ql.flink; 
-    b->ql.flink->ql.blink = b->ql.blink; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/*  GET STATS -- check info on free list */ 
- 
-static void 
-bcheck(  kmp_info_t *th, bufsize *max_free, bufsize *total_free ) 
-{ 
-    thr_data_t *thr = get_thr_data( th ); 
-    int bin; 
- 
-    *total_free = *max_free = 0; 
- 
-    for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 
-        bfhead_t *b, *best; 
- 
-        best = &thr->freelist[ bin ]; 
-        b = best->ql.flink; 
- 
-        while (b != &thr->freelist[ bin ]) { 
-            *total_free += (b->bh.bb.bsize - sizeof( bhead_t )); 
-            if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) 
-                best = b; 
- 
-            /* Link to next buffer */ 
-            b = b->ql.flink; 
-        } 
- 
-        if (*max_free < best->bh.bb.bsize) 
-            *max_free = best->bh.bb.bsize; 
-    } 
- 
-    if (*max_free > (bufsize)sizeof( bhead_t )) 
-        *max_free -= sizeof( bhead_t ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/*  BGET  --  Allocate a buffer.  */ 
- 
-static void * 
-bget(  kmp_info_t *th, bufsize requested_size ) 
-{ 
-    thr_data_t *thr = get_thr_data( th ); 
-    bufsize size = requested_size; 
-    bfhead_t *b; 
-    void *buf; 
-    int compactseq = 0; 
-    int use_blink = 0; 
-/* For BestFit */ 
-    bfhead_t *best; 
- 
-    if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) { 
-        return NULL; 
-    }; // if 
- 
-    __kmp_bget_dequeue( th );         /* Release any queued buffers */ 
- 
-    if (size < (bufsize)SizeQ) {      /* Need at least room for the */ 
-        size = SizeQ;                 /*    queue links.  */ 
-    } 
-    #if defined( SizeQuant ) && ( SizeQuant > 1 ) 
-        size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); 
-    #endif 
- 
-    size += sizeof(bhead_t);     /* Add overhead in allocated buffer 
-                                         to size required. */ 
-    KMP_DEBUG_ASSERT( size >= 0 ); 
-    KMP_DEBUG_ASSERT( size % SizeQuant == 0 ); 
- 
-    use_blink = ( thr->mode == bget_mode_lifo ); 
- 
-    /* If a compact function was provided in the call to bectl(), wrap 
-       a loop around the allocation process  to  allow  compaction  to 
-       intervene in case we don't find a suitable buffer in the chain. */ 
- 
-    for (;;) { 
-        int bin; 
- 
-        for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) { 
-            /* Link to next buffer */ 
-            b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink ); 
- 
-            if (thr->mode == bget_mode_best) { 
-                best = &thr->freelist[ bin ]; 
- 
-                /* Scan the free list searching for the first buffer big enough 
-                   to hold the requested size buffer. */ 
- 
-                while (b != &thr->freelist[ bin ]) { 
-                    if (b->bh.bb.bsize >= (bufsize) size) { 
-                        if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) { 
-                            best = b; 
-                        } 
-                    } 
- 
-                    /* Link to next buffer */ 
-                    b = ( use_blink ? b->ql.blink : b->ql.flink ); 
-                } 
-                b = best; 
-            } 
- 
-            while (b != &thr->freelist[ bin ]) { 
-                if ((bufsize) b->bh.bb.bsize >= (bufsize) size) { 
- 
-                    /* Buffer  is big enough to satisfy  the request.  Allocate it 
-                       to the caller.  We must decide whether the buffer is  large 
-                       enough  to  split  into  the part given to the caller and a 
-                       free buffer that remains on the free list, or  whether  the 
-                       entire  buffer  should  be  removed  from the free list and 
-                       given to the caller in its entirety.   We  only  split  the 
-                       buffer if enough room remains for a header plus the minimum 
-                       quantum of allocation. */ 
- 
-                    if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) { 
-                        bhead_t *ba, *bn; 
- 
-                        ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size)); 
-                        bn = BH(((char *) ba) + size); 
- 
-                        KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); 
- 
-                        /* Subtract size from length of free block. */ 
-                        b->bh.bb.bsize -= (bufsize) size; 
- 
-                        /* Link allocated buffer to the previous free buffer. */ 
-                        ba->bb.prevfree = b->bh.bb.bsize; 
- 
-                        /* Plug negative size into user buffer. */ 
-                        ba->bb.bsize = -size; 
- 
-                        /* Mark this buffer as owned by this thread. */ 
-                        TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it) 
-                        /* Mark buffer after this one not preceded by free block. */ 
-                        bn->bb.prevfree = 0; 
- 
-                        /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ 
-                        __kmp_bget_remove_from_freelist( b ); 
-                        __kmp_bget_insert_into_freelist( thr, b ); 
-#if BufStats 
-                        thr->totalloc += (size_t) size; 
-                        thr->numget++;        /* Increment number of bget() calls */ 
-#endif 
-                        buf = (void *) ((((char *) ba) + sizeof(bhead_t))); 
-                        KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); 
-                        return buf; 
-                    } else { 
-                        bhead_t *ba; 
- 
-                        ba = BH(((char *) b) + b->bh.bb.bsize); 
- 
-                        KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); 
- 
-                        /* The buffer isn't big enough to split.  Give  the  whole 
-                           shebang to the caller and remove it from the free list. */ 
- 
-                       __kmp_bget_remove_from_freelist( b ); 
-#if BufStats 
-                        thr->totalloc += (size_t) b->bh.bb.bsize; 
-                        thr->numget++;        /* Increment number of bget() calls */ 
-#endif 
-                        /* Negate size to mark buffer allocated. */ 
-                        b->bh.bb.bsize = -(b->bh.bb.bsize); 
- 
-                        /* Mark this buffer as owned by this thread. */ 
-                        TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it) 
-                        /* Zero the back pointer in the next buffer in memory 
-                           to indicate that this buffer is allocated. */ 
-                        ba->bb.prevfree = 0; 
- 
-                        /* Give user buffer starting at queue links. */ 
-                        buf =  (void *) &(b->ql); 
-                        KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); 
-                        return buf; 
-                    } 
-                } 
- 
-                /* Link to next buffer */ 
-                b = ( use_blink ? b->ql.blink : b->ql.flink ); 
-            } 
-        } 
- 
-        /* We failed to find a buffer.  If there's a compact  function 
-           defined,  notify  it  of the size requested.  If it returns 
-           TRUE, try the allocation again. */ 
- 
-        if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { 
-            break; 
-        } 
-    } 
- 
-    /* No buffer available with requested size free. */ 
- 
-    /* Don't give up yet -- look in the reserve supply. */ 
- 
-    if (thr->acqfcn != 0) { 
-        if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) { 
- 
-            /* Request  is  too  large  to  fit in a single expansion 
-               block.  Try to satisy it by a direct buffer acquisition. */ 
- 
-            bdhead_t *bdh; 
- 
-            size += sizeof(bdhead_t) - sizeof(bhead_t); 
- 
-            KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) ); 
- 
-            /* richryan */ 
-            bdh = BDH((*thr->acqfcn)((bufsize) size)); 
-            if (bdh != NULL) { 
- 
-                /*  Mark the buffer special by setting the size field 
-                    of its header to zero.  */ 
-                bdh->bh.bb.bsize = 0; 
- 
-                /* Mark this buffer as owned by this thread. */ 
-                TCW_PTR(bdh->bh.bb.bthr, th);  // don't mark buffer as allocated, 
-                                               // because direct buffer never goes to free list 
-                bdh->bh.bb.prevfree = 0; 
-                bdh->tsize = size; 
-#if BufStats 
-                thr->totalloc += (size_t) size; 
-                thr->numget++;        /* Increment number of bget() calls */ 
-                thr->numdget++;       /* Direct bget() call count */ 
-#endif 
-                buf =  (void *) (bdh + 1); 
-                KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); 
-                return buf; 
-            } 
- 
-        } else { 
- 
-            /*  Try to obtain a new expansion block */ 
- 
-            void *newpool; 
- 
-            KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) ); 
- 
-            /* richryan */ 
-            newpool = (*thr->acqfcn)((bufsize) thr->exp_incr); 
-            KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 ); 
-            if (newpool != NULL) { 
-                bpool( th, newpool, thr->exp_incr); 
-                buf =  bget( th, requested_size);  /* This can't, I say, can't get into a loop. */ 
-                return buf; 
-            } 
-        } 
-    } 
- 
-    /*  Still no buffer available */ 
- 
-    return NULL; 
-} 
- 
-/*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear 
-               the  entire  contents  of  the buffer to zero, not just the 
-               region requested by the caller. */ 
- 
-static void * 
-bgetz(  kmp_info_t *th, bufsize size ) 
-{ 
-    char *buf = (char *) bget( th, size); 
- 
-    if (buf != NULL) { 
-        bhead_t *b; 
-        bufsize rsize; 
- 
-        b = BH(buf - sizeof(bhead_t)); 
-        rsize = -(b->bb.bsize); 
-        if (rsize == 0) { 
-            bdhead_t *bd; 
- 
-            bd = BDH(buf - sizeof(bdhead_t)); 
-            rsize = bd->tsize - (bufsize) sizeof(bdhead_t); 
-        } else { 
-            rsize -= sizeof(bhead_t); 
-        } 
- 
-        KMP_DEBUG_ASSERT(rsize >= size); 
- 
-        (void) memset(buf, 0, (bufsize) rsize); 
-    } 
-    return ((void *) buf); 
-} 
- 
-/*  BGETR  --  Reallocate a buffer.  This is a minimal implementation, 
-               simply in terms of brel()  and  bget().   It  could  be 
-               enhanced to allow the buffer to grow into adjacent free 
-               blocks and to avoid moving data unnecessarily.  */ 
- 
-static void * 
-bgetr(  kmp_info_t *th, void *buf, bufsize size) 
-{ 
-    void *nbuf; 
-    bufsize osize;                    /* Old size of buffer */ 
-    bhead_t *b; 
- 
-    nbuf = bget( th, size ); 
-    if ( nbuf == NULL ) { /* Acquire new buffer */ 
-        return NULL; 
-    } 
-    if ( buf == NULL ) { 
-        return nbuf; 
-    } 
-    b = BH(((char *) buf) - sizeof(bhead_t)); 
-    osize = -b->bb.bsize; 
-    if (osize == 0) { 
-        /*  Buffer acquired directly through acqfcn. */ 
-        bdhead_t *bd; 
- 
-        bd = BDH(((char *) buf) - sizeof(bdhead_t)); 
-        osize = bd->tsize - (bufsize) sizeof(bdhead_t); 
-    } else { 
-        osize -= sizeof(bhead_t); 
-    }; 
- 
-    KMP_DEBUG_ASSERT(osize > 0); 
- 
-    (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */ 
-             (size_t) ((size < osize) ? size : osize)); 
-    brel( th, buf ); 
- 
-    return nbuf; 
-} 
- 
-/*  BREL  --  Release a buffer.  */ 
- 
-static void 
-brel(  kmp_info_t *th, void *buf ) 
-{ 
-    thr_data_t *thr = get_thr_data( th ); 
-    bfhead_t *b, *bn; 
-    kmp_info_t *bth; 
- 
-    KMP_DEBUG_ASSERT(buf != NULL); 
-    KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); 
- 
-    b = BFH(((char *) buf) - sizeof(bhead_t)); 
- 
-    if (b->bh.bb.bsize == 0) {        /* Directly-acquired buffer? */ 
-        bdhead_t *bdh; 
- 
-        bdh = BDH(((char *) buf) - sizeof(bdhead_t)); 
-        KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 
-#if BufStats 
-        thr->totalloc -= (size_t) bdh->tsize; 
-        thr->numdrel++;               /* Number of direct releases */ 
-        thr->numrel++;                /* Increment number of brel() calls */ 
-#endif /* BufStats */ 
-#ifdef FreeWipe 
-        (void) memset((char *) buf, 0x55, 
-                 (size_t) (bdh->tsize - sizeof(bdhead_t))); 
-#endif /* FreeWipe */ 
- 
-        KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) ); 
- 
-        KMP_DEBUG_ASSERT( thr->relfcn != 0 ); 
-        (*thr->relfcn)((void *) bdh);      /* Release it directly. */ 
-        return; 
-    } 
- 
-    bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison 
-    if ( bth != th ) { 
-        /* Add this buffer to be released by the owning thread later */ 
-        __kmp_bget_enqueue( bth, buf 
-#ifdef USE_QUEUING_LOCK_FOR_BGET 
-                            , __kmp_gtid_from_thread( th ) 
-#endif 
-        ); 
-        return; 
-    } 
- 
-    /* Buffer size must be negative, indicating that the buffer is 
-       allocated. */ 
- 
-    if (b->bh.bb.bsize >= 0) { 
-        bn = NULL; 
-    } 
-    KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); 
- 
-    /*  Back pointer in next buffer must be zero, indicating the 
-        same thing: */ 
- 
-    KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0); 
- 
-#if BufStats 
-    thr->numrel++;                    /* Increment number of brel() calls */ 
-    thr->totalloc += (size_t) b->bh.bb.bsize; 
-#endif 
- 
-    /* If the back link is nonzero, the previous buffer is free.  */ 
- 
-    if (b->bh.bb.prevfree != 0) { 
-        /* The previous buffer is free.  Consolidate this buffer  with  it 
-           by  adding  the  length  of  this  buffer  to the previous free 
-           buffer.  Note that we subtract the size  in  the  buffer  being 
-           released,  since  it's  negative to indicate that the buffer is 
-           allocated. */ 
- 
+// Disable bget when it is not used
+#if KMP_USE_BGET
+
+/* Thread private buffer management code */
+
+typedef int   (*bget_compact_t)(size_t, int);
+typedef void *(*bget_acquire_t)(size_t);
+typedef void  (*bget_release_t)(void *);
+
+/* NOTE: bufsize must be a signed datatype */
+
+#if KMP_OS_WINDOWS
+# if KMP_ARCH_X86 || KMP_ARCH_ARM
+   typedef kmp_int32 bufsize;
+# else
+   typedef kmp_int64 bufsize;
+# endif
+#else
+  typedef ssize_t bufsize;
+#endif
+
+/* The three modes of operation are, fifo search, lifo search, and best-fit */
+
+typedef enum bget_mode {
+    bget_mode_fifo = 0,
+    bget_mode_lifo = 1,
+    bget_mode_best = 2
+} bget_mode_t;
+
+
+static void    bpool( kmp_info_t *th, void *buffer, bufsize len);
+static void   *bget( kmp_info_t *th, bufsize size);
+static void   *bgetz( kmp_info_t *th, bufsize size);
+static void   *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
+static void    brel( kmp_info_t *th, void *buf);
+static void    bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
+
+#ifdef KMP_DEBUG
+static void    bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
+static void    bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
+static void    bufdump( kmp_info_t *th, void *buf);
+static void    bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
+static int     bpoolv( kmp_info_t *th, void *pool);
+#endif
+
+/* BGET CONFIGURATION */
+                                      /* Buffer allocation size quantum:
+                                         all buffers allocated are a
+                                         multiple of this size.  This
+                                         MUST be a power of two. */
+
+                                      /* On IA-32 architecture with  Linux* OS,
+                                         malloc() does not
+                                         ensure 16 byte alignmnent */
+
+#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
+
+#define SizeQuant   8
+#define AlignType   double
+
+#else
+
+#define SizeQuant   16
+#define AlignType   _Quad
+
+#endif
+
+#define BufStats    1                 /* Define this symbol to enable the
+                                         bstats() function which calculates
+                                         the total free space in the buffer
+                                         pool, the largest available
+                                         buffer, and the total space
+                                         currently allocated. */
+
+#ifdef KMP_DEBUG
+
+#define BufDump     1                 /* Define this symbol to enable the
+                                         bpoold() function which dumps the
+                                         buffers in a buffer pool. */
+
+#define BufValid    1                 /* Define this symbol to enable the
+                                         bpoolv() function for validating
+                                         a buffer pool. */
+
+#define DumpData    1                 /* Define this symbol to enable the
+                                         bufdump() function which allows
+                                         dumping the contents of an allocated
+                                         or free buffer. */
+#ifdef NOT_USED_NOW
+
+#define FreeWipe    1                 /* Wipe free buffers to a guaranteed
+                                         pattern of garbage to trip up
+                                         miscreants who attempt to use
+                                         pointers into released buffers. */
+
+#define BestFit     1                 /* Use a best fit algorithm when
+                                         searching for space for an
+                                         allocation request.  This uses
+                                         memory more efficiently, but
+                                         allocation will be much slower. */
+#endif /* NOT_USED_NOW */
+#endif /* KMP_DEBUG */
+
+
+static bufsize bget_bin_size[ ] = {
+    0,
+//    1 << 6,    /* .5 Cache line */
+    1 << 7,    /* 1 Cache line, new */
+    1 << 8,    /* 2 Cache lines */
+    1 << 9,    /* 4 Cache lines, new */
+    1 << 10,   /* 8 Cache lines */
+    1 << 11,   /* 16 Cache lines, new */
+    1 << 12,
+    1 << 13,   /* new */
+    1 << 14,
+    1 << 15,   /* new */
+    1 << 16,
+    1 << 17,
+    1 << 18,
+    1 << 19,
+    1 << 20,    /*  1MB */
+    1 << 21,    /*  2MB */
+    1 << 22,    /*  4MB */
+    1 << 23,    /*  8MB */
+    1 << 24,    /* 16MB */
+    1 << 25,    /* 32MB */
+};
+
+#define MAX_BGET_BINS   (int)(sizeof(bget_bin_size) / sizeof(bufsize))
+
+struct bfhead;
+
+/*  Declare the interface, including the requested buffer size type,
+    bufsize.  */
+
+/* Queue links */
+
+typedef struct qlinks {
+    struct bfhead *flink;             /* Forward link */
+    struct bfhead *blink;             /* Backward link */
+} qlinks_t;
+
+/* Header in allocated and free buffers */
+
+typedef struct bhead2 {
+    kmp_info_t *bthr;                 /* The thread which owns the buffer pool */
+    bufsize     prevfree;             /* Relative link back to previous
+                                         free buffer in memory or 0 if
+                                         previous buffer is allocated.  */
+    bufsize     bsize;                /* Buffer size: positive if free,
+                                         negative if allocated. */
+} bhead2_t;
+
+/* Make sure the bhead structure is a multiple of SizeQuant in size. */
+
+typedef union bhead {
+    KMP_ALIGN( SizeQuant )
+    AlignType           b_align;
+    char                b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
+    bhead2_t            bb;
+} bhead_t;
+#define BH(p)   ((bhead_t *) (p))
+
+/*  Header in directly allocated buffers (by acqfcn) */
+
+typedef struct bdhead
+{
+    bufsize tsize;                    /* Total size, including overhead */
+    bhead_t bh;                       /* Common header */
+} bdhead_t;
+#define BDH(p)  ((bdhead_t *) (p))
+
+/* Header in free buffers */
+
+typedef struct bfhead {
+    bhead_t  bh;                      /* Common allocated/free header */
+    qlinks_t ql;                      /* Links on free list */
+} bfhead_t;
+#define BFH(p)  ((bfhead_t *) (p))
+
+typedef struct thr_data {
+    bfhead_t freelist[ MAX_BGET_BINS ];
+#if BufStats
+    size_t totalloc;               /* Total space currently allocated */
+    long numget, numrel;           /* Number of bget() and brel() calls */
+    long numpblk;                  /* Number of pool blocks */
+    long numpget, numprel;         /* Number of block gets and rels */
+    long numdget, numdrel;         /* Number of direct gets and rels */
+#endif /* BufStats */
+
+    /* Automatic expansion block management functions */
+    bget_compact_t compfcn;
+    bget_acquire_t acqfcn;
+    bget_release_t relfcn;
+
+    bget_mode_t    mode;              /* what allocation mode to use? */
+
+    bufsize exp_incr;                 /* Expansion block size */
+    bufsize pool_len;                 /* 0: no bpool calls have been made
+                                         -1: not all pool blocks are
+                                             the same size
+                                         >0: (common) block size for all
+                                             bpool calls made so far
+                                      */
+    bfhead_t * last_pool;             /* Last pool owned by this thread (delay dealocation) */
+} thr_data_t;
+
+/*  Minimum allocation quantum: */
+
+#define QLSize  (sizeof(qlinks_t))
+#define SizeQ   ((SizeQuant > QLSize) ? SizeQuant : QLSize)
+#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
+    // Maximun for the requested size.
+
+/* End sentinel: value placed in bsize field of dummy block delimiting
+   end of pool block.  The most negative number which will  fit  in  a
+   bufsize, defined in a way that the compiler will accept. */
+
+#define ESent   ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
+
+/* ------------------------------------------------------------------------ */
+
+/* Thread Data management routines */
+
+static int
+bget_get_bin( bufsize size )
+{
+    // binary chop bins
+    int lo = 0, hi = MAX_BGET_BINS - 1;
+
+    KMP_DEBUG_ASSERT( size > 0 );
+
+    while ( (hi - lo) > 1 ) {
+        int mid = (lo + hi) >> 1;
+        if (size < bget_bin_size[ mid ])
+            hi = mid - 1;
+        else
+            lo = mid;
+    }
+
+    KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
+
+    return lo;
+}
+
+static void
+set_thr_data( kmp_info_t *th )
+{
+    int i;
+    thr_data_t *data;
+
+    data =
+        (thr_data_t *)(
+            ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
+        );
+
+    memset( data, '\0', sizeof( *data ) );
+
+    for (i = 0; i < MAX_BGET_BINS; ++i) {
+        data->freelist[ i ].ql.flink = & data->freelist[ i ];
+        data->freelist[ i ].ql.blink = & data->freelist[ i ];
+    }
+
+    th->th.th_local.bget_data = data;
+    th->th.th_local.bget_list = 0;
+#if ! USE_CMP_XCHG_FOR_BGET
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+    __kmp_init_lock( & th->th.th_local.bget_lock );
+#else
+    __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
+#endif /* USE_LOCK_FOR_BGET */
+#endif /* ! USE_CMP_XCHG_FOR_BGET */
+}
+
+static thr_data_t *
+get_thr_data( kmp_info_t *th )
+{
+    thr_data_t *data;
+
+    data = (thr_data_t *) th->th.th_local.bget_data;
+
+    KMP_DEBUG_ASSERT( data != 0 );
+
+    return data;
+}
+
+
+#ifdef KMP_DEBUG
+
+static void
+__kmp_bget_validate_queue( kmp_info_t *th )
+{
+    /* NOTE: assume that the global_lock is held */
+
+    void *p = (void *) th->th.th_local.bget_list;
+
+    while (p != 0) {
+        bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
+
+        KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
+        p = (void *) b->ql.flink;
+    }
+}
+
+#endif
+
+/* Walk the free list and release the enqueued buffers */
+
+static void
+__kmp_bget_dequeue( kmp_info_t *th )
+{
+    void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
+
+    if (p != 0) {
+        #if USE_CMP_XCHG_FOR_BGET
+            {
+                volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
+                while ( ! KMP_COMPARE_AND_STORE_PTR(
+                    & th->th.th_local.bget_list, old_value, NULL ) )
+                {
+                    KMP_CPU_PAUSE();
+                    old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
+                }
+                p = (void *) old_value;
+            }
+        #else /* ! USE_CMP_XCHG_FOR_BGET */
+            #ifdef USE_QUEUING_LOCK_FOR_BGET
+                __kmp_acquire_lock( & th->th.th_local.bget_lock,
+                                    __kmp_gtid_from_thread(th) );
+            #else
+                __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
+            #endif /* USE_QUEUING_LOCK_FOR_BGET */
+
+             p = (void *) th->th.th_local.bget_list;
+             th->th.th_local.bget_list = 0;
+
+            #ifdef USE_QUEUING_LOCK_FOR_BGET
+                __kmp_release_lock( & th->th.th_local.bget_lock,
+                                    __kmp_gtid_from_thread(th) );
+            #else
+                __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
+            #endif
+        #endif /* USE_CMP_XCHG_FOR_BGET */
+
+        /* Check again to make sure the list is not empty */
+
+        while (p != 0) {
+            void *buf = p;
+            bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
+
+            KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
+            KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
+                                (kmp_uintptr_t)th ); // clear possible mark
+            KMP_DEBUG_ASSERT( b->ql.blink == 0 );
+
+            p = (void *) b->ql.flink;
+
+            brel( th, buf );
+        }
+    }
+}
+
+/* Chain together the free buffers by using the thread owner field */
+
+static void
+__kmp_bget_enqueue( kmp_info_t *th, void *buf
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+                    , kmp_int32 rel_gtid
+#endif
+                  )
+{
+    bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
+
+    KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
+    KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
+                        (kmp_uintptr_t)th ); // clear possible mark
+
+    b->ql.blink = 0;
+
+    KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
+                    __kmp_gtid_from_thread( th ) ) );
+
+#if USE_CMP_XCHG_FOR_BGET
+    {
+        volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
+        /* the next pointer must be set before setting bget_list to buf to avoid
+           exposing a broken list to other threads, even for an instant. */
+        b->ql.flink = BFH( old_value );
+
+        while ( ! KMP_COMPARE_AND_STORE_PTR(
+            & th->th.th_local.bget_list, old_value, buf ) )
+        {
+            KMP_CPU_PAUSE();
+            old_value = TCR_PTR(th->th.th_local.bget_list);
+            /* the next pointer must be set before setting bget_list to buf to avoid
+               exposing a broken list to other threads, even for an instant. */
+            b->ql.flink = BFH( old_value );
+        }
+    }
+#else /* ! USE_CMP_XCHG_FOR_BGET */
+# ifdef USE_QUEUING_LOCK_FOR_BGET
+    __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
+# else
+    __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
+ # endif
+
+    b->ql.flink = BFH( th->th.th_local.bget_list );
+    th->th.th_local.bget_list = (void *) buf;
+
+# ifdef USE_QUEUING_LOCK_FOR_BGET
+    __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
+# else
+    __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
+# endif
+#endif /* USE_CMP_XCHG_FOR_BGET */
+}
+
+/* insert buffer back onto a new freelist */
+
+static void
+__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
+{
+    int bin;
+
+    KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
+    KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
+
+    bin = bget_get_bin( b->bh.bb.bsize );
+
+    KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
+    KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
+
+    b->ql.flink = &thr->freelist[ bin ];
+    b->ql.blink = thr->freelist[ bin ].ql.blink;
+
+    thr->freelist[ bin ].ql.blink = b;
+    b->ql.blink->ql.flink = b;
+}
+
+/* unlink the buffer from the old freelist */
+
+static void
+__kmp_bget_remove_from_freelist( bfhead_t *b )
+{
+    KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
+    KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
+
+    b->ql.blink->ql.flink = b->ql.flink;
+    b->ql.flink->ql.blink = b->ql.blink;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/*  GET STATS -- check info on free list */
+
+static void
+bcheck(  kmp_info_t *th, bufsize *max_free, bufsize *total_free )
+{
+    thr_data_t *thr = get_thr_data( th );
+    int bin;
+
+    *total_free = *max_free = 0;
+
+    for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+        bfhead_t *b, *best;
+
+        best = &thr->freelist[ bin ];
+        b = best->ql.flink;
+
+        while (b != &thr->freelist[ bin ]) {
+            *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
+            if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
+                best = b;
+
+            /* Link to next buffer */
+            b = b->ql.flink;
+        }
+
+        if (*max_free < best->bh.bb.bsize)
+            *max_free = best->bh.bb.bsize;
+    }
+
+    if (*max_free > (bufsize)sizeof( bhead_t ))
+        *max_free -= sizeof( bhead_t );
+}
+
+/* ------------------------------------------------------------------------ */
+
+/*  BGET  --  Allocate a buffer.  */
+
+static void *
+bget(  kmp_info_t *th, bufsize requested_size )
+{
+    thr_data_t *thr = get_thr_data( th );
+    bufsize size = requested_size;
+    bfhead_t *b;
+    void *buf;
+    int compactseq = 0;
+    int use_blink = 0;
+/* For BestFit */
+    bfhead_t *best;
+
+    if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
+        return NULL;
+    }; // if
+
+    __kmp_bget_dequeue( th );         /* Release any queued buffers */
+
+    if (size < (bufsize)SizeQ) {      /* Need at least room for the */
+        size = SizeQ;                 /*    queue links.  */
+    }
+    #if defined( SizeQuant ) && ( SizeQuant > 1 )
+        size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
+    #endif
+
+    size += sizeof(bhead_t);     /* Add overhead in allocated buffer
+                                         to size required. */
+    KMP_DEBUG_ASSERT( size >= 0 );
+    KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
+
+    use_blink = ( thr->mode == bget_mode_lifo );
+
+    /* If a compact function was provided in the call to bectl(), wrap
+       a loop around the allocation process  to  allow  compaction  to
+       intervene in case we don't find a suitable buffer in the chain. */
+
+    for (;;) {
+        int bin;
+
+        for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
+            /* Link to next buffer */
+            b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
+
+            if (thr->mode == bget_mode_best) {
+                best = &thr->freelist[ bin ];
+
+                /* Scan the free list searching for the first buffer big enough
+                   to hold the requested size buffer. */
+
+                while (b != &thr->freelist[ bin ]) {
+                    if (b->bh.bb.bsize >= (bufsize) size) {
+                        if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
+                            best = b;
+                        }
+                    }
+
+                    /* Link to next buffer */
+                    b = ( use_blink ? b->ql.blink : b->ql.flink );
+                }
+                b = best;
+            }
+
+            while (b != &thr->freelist[ bin ]) {
+                if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
+
+                    /* Buffer  is big enough to satisfy  the request.  Allocate it
+                       to the caller.  We must decide whether the buffer is  large
+                       enough  to  split  into  the part given to the caller and a
+                       free buffer that remains on the free list, or  whether  the
+                       entire  buffer  should  be  removed  from the free list and
+                       given to the caller in its entirety.   We  only  split  the
+                       buffer if enough room remains for a header plus the minimum
+                       quantum of allocation. */
+
+                    if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
+                        bhead_t *ba, *bn;
+
+                        ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
+                        bn = BH(((char *) ba) + size);
+
+                        KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
+
+                        /* Subtract size from length of free block. */
+                        b->bh.bb.bsize -= (bufsize) size;
+
+                        /* Link allocated buffer to the previous free buffer. */
+                        ba->bb.prevfree = b->bh.bb.bsize;
+
+                        /* Plug negative size into user buffer. */
+                        ba->bb.bsize = -size;
+
+                        /* Mark this buffer as owned by this thread. */
+                        TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it)
+                        /* Mark buffer after this one not preceded by free block. */
+                        bn->bb.prevfree = 0;
+
+                        /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
+                        __kmp_bget_remove_from_freelist( b );
+                        __kmp_bget_insert_into_freelist( thr, b );
+#if BufStats
+                        thr->totalloc += (size_t) size;
+                        thr->numget++;        /* Increment number of bget() calls */
+#endif
+                        buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
+                        KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
+                        return buf;
+                    } else {
+                        bhead_t *ba;
+
+                        ba = BH(((char *) b) + b->bh.bb.bsize);
+
+                        KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
+
+                        /* The buffer isn't big enough to split.  Give  the  whole
+                           shebang to the caller and remove it from the free list. */
+
+                       __kmp_bget_remove_from_freelist( b );
+#if BufStats
+                        thr->totalloc += (size_t) b->bh.bb.bsize;
+                        thr->numget++;        /* Increment number of bget() calls */
+#endif
+                        /* Negate size to mark buffer allocated. */
+                        b->bh.bb.bsize = -(b->bh.bb.bsize);
+
+                        /* Mark this buffer as owned by this thread. */
+                        TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it)
+                        /* Zero the back pointer in the next buffer in memory
+                           to indicate that this buffer is allocated. */
+                        ba->bb.prevfree = 0;
+
+                        /* Give user buffer starting at queue links. */
+                        buf =  (void *) &(b->ql);
+                        KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
+                        return buf;
+                    }
+                }
+
+                /* Link to next buffer */
+                b = ( use_blink ? b->ql.blink : b->ql.flink );
+            }
+        }
+
+        /* We failed to find a buffer.  If there's a compact  function
+           defined,  notify  it  of the size requested.  If it returns
+           TRUE, try the allocation again. */
+
+        if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
+            break;
+        }
+    }
+
+    /* No buffer available with requested size free. */
+
+    /* Don't give up yet -- look in the reserve supply. */
+
+    if (thr->acqfcn != 0) {
+        if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
+
+            /* Request  is  too  large  to  fit in a single expansion
+               block.  Try to satisy it by a direct buffer acquisition. */
+
+            bdhead_t *bdh;
+
+            size += sizeof(bdhead_t) - sizeof(bhead_t);
+
+            KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
+
+            /* richryan */
+            bdh = BDH((*thr->acqfcn)((bufsize) size));
+            if (bdh != NULL) {
+
+                /*  Mark the buffer special by setting the size field
+                    of its header to zero.  */
+                bdh->bh.bb.bsize = 0;
+
+                /* Mark this buffer as owned by this thread. */
+                TCW_PTR(bdh->bh.bb.bthr, th);  // don't mark buffer as allocated,
+                                               // because direct buffer never goes to free list
+                bdh->bh.bb.prevfree = 0;
+                bdh->tsize = size;
+#if BufStats
+                thr->totalloc += (size_t) size;
+                thr->numget++;        /* Increment number of bget() calls */
+                thr->numdget++;       /* Direct bget() call count */
+#endif
+                buf =  (void *) (bdh + 1);
+                KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
+                return buf;
+            }
+
+        } else {
+
+            /*  Try to obtain a new expansion block */
+
+            void *newpool;
+
+            KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
+
+            /* richryan */
+            newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
+            KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
+            if (newpool != NULL) {
+                bpool( th, newpool, thr->exp_incr);
+                buf =  bget( th, requested_size);  /* This can't, I say, can't get into a loop. */
+                return buf;
+            }
+        }
+    }
+
+    /*  Still no buffer available */
+
+    return NULL;
+}
+
+/*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear
+               the  entire  contents  of  the buffer to zero, not just the
+               region requested by the caller. */
+
+static void *
+bgetz(  kmp_info_t *th, bufsize size )
+{
+    char *buf = (char *) bget( th, size);
+
+    if (buf != NULL) {
+        bhead_t *b;
+        bufsize rsize;
+
+        b = BH(buf - sizeof(bhead_t));
+        rsize = -(b->bb.bsize);
+        if (rsize == 0) {
+            bdhead_t *bd;
+
+            bd = BDH(buf - sizeof(bdhead_t));
+            rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
+        } else {
+            rsize -= sizeof(bhead_t);
+        }
+
+        KMP_DEBUG_ASSERT(rsize >= size);
+
+        (void) memset(buf, 0, (bufsize) rsize);
+    }
+    return ((void *) buf);
+}
+
+/*  BGETR  --  Reallocate a buffer.  This is a minimal implementation,
+               simply in terms of brel()  and  bget().   It  could  be
+               enhanced to allow the buffer to grow into adjacent free
+               blocks and to avoid moving data unnecessarily.  */
+
+static void *
+bgetr(  kmp_info_t *th, void *buf, bufsize size)
+{
+    void *nbuf;
+    bufsize osize;                    /* Old size of buffer */
+    bhead_t *b;
+
+    nbuf = bget( th, size );
+    if ( nbuf == NULL ) { /* Acquire new buffer */
+        return NULL;
+    }
+    if ( buf == NULL ) {
+        return nbuf;
+    }
+    b = BH(((char *) buf) - sizeof(bhead_t));
+    osize = -b->bb.bsize;
+    if (osize == 0) {
+        /*  Buffer acquired directly through acqfcn. */
+        bdhead_t *bd;
+
+        bd = BDH(((char *) buf) - sizeof(bdhead_t));
+        osize = bd->tsize - (bufsize) sizeof(bdhead_t);
+    } else {
+        osize -= sizeof(bhead_t);
+    };
+
+    KMP_DEBUG_ASSERT(osize > 0);
+
+    (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
+             (size_t) ((size < osize) ? size : osize));
+    brel( th, buf );
+
+    return nbuf;
+}
+
+/*  BREL  --  Release a buffer.  */
+
+static void
+brel(  kmp_info_t *th, void *buf )
+{
+    thr_data_t *thr = get_thr_data( th );
+    bfhead_t *b, *bn;
+    kmp_info_t *bth;
+
+    KMP_DEBUG_ASSERT(buf != NULL);
+    KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
+
+    b = BFH(((char *) buf) - sizeof(bhead_t));
+
+    if (b->bh.bb.bsize == 0) {        /* Directly-acquired buffer? */
+        bdhead_t *bdh;
+
+        bdh = BDH(((char *) buf) - sizeof(bdhead_t));
+        KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
+#if BufStats
+        thr->totalloc -= (size_t) bdh->tsize;
+        thr->numdrel++;               /* Number of direct releases */
+        thr->numrel++;                /* Increment number of brel() calls */
+#endif /* BufStats */
+#ifdef FreeWipe
+        (void) memset((char *) buf, 0x55,
+                 (size_t) (bdh->tsize - sizeof(bdhead_t)));
+#endif /* FreeWipe */
+
+        KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
+
+        KMP_DEBUG_ASSERT( thr->relfcn != 0 );
+        (*thr->relfcn)((void *) bdh);      /* Release it directly. */
+        return;
+    }
+
+    bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
+    if ( bth != th ) {
+        /* Add this buffer to be released by the owning thread later */
+        __kmp_bget_enqueue( bth, buf
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+                            , __kmp_gtid_from_thread( th )
+#endif
+        );
+        return;
+    }
+
+    /* Buffer size must be negative, indicating that the buffer is
+       allocated. */
+
+    if (b->bh.bb.bsize >= 0) {
+        bn = NULL;
+    }
+    KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
+
+    /*  Back pointer in next buffer must be zero, indicating the
+        same thing: */
+
+    KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
+
+#if BufStats
+    thr->numrel++;                    /* Increment number of brel() calls */
+    thr->totalloc += (size_t) b->bh.bb.bsize;
+#endif
+
+    /* If the back link is nonzero, the previous buffer is free.  */
+
+    if (b->bh.bb.prevfree != 0) {
+        /* The previous buffer is free.  Consolidate this buffer  with  it
+           by  adding  the  length  of  this  buffer  to the previous free
+           buffer.  Note that we subtract the size  in  the  buffer  being
+           released,  since  it's  negative to indicate that the buffer is
+           allocated. */
+
         bufsize size = b->bh.bb.bsize;
- 
-        /* Make the previous buffer the one we're working on. */ 
-        KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree); 
-        b = BFH(((char *) b) - b->bh.bb.prevfree); 
-        b->bh.bb.bsize -= size; 
- 
-        /* unlink the buffer from the old freelist */ 
-        __kmp_bget_remove_from_freelist( b ); 
-    } 
-    else { 
-        /* The previous buffer isn't allocated.  Mark this buffer 
-           size as positive (i.e. free) and fall through to place 
-           the buffer on the free list as an isolated free block. */ 
- 
-        b->bh.bb.bsize = -b->bh.bb.bsize; 
-    } 
- 
-    /* insert buffer back onto a new freelist */ 
-    __kmp_bget_insert_into_freelist( thr, b ); 
- 
- 
-    /* Now we look at the next buffer in memory, located by advancing from 
-       the  start  of  this  buffer  by its size, to see if that buffer is 
-       free.  If it is, we combine  this  buffer  with  the  next  one  in 
-       memory, dechaining the second buffer from the free list. */ 
- 
-    bn =  BFH(((char *) b) + b->bh.bb.bsize); 
-    if (bn->bh.bb.bsize > 0) { 
- 
-        /* The buffer is free.  Remove it from the free list and add 
-           its size to that of our buffer. */ 
- 
-        KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize); 
- 
-        __kmp_bget_remove_from_freelist( bn ); 
- 
-        b->bh.bb.bsize += bn->bh.bb.bsize; 
- 
-        /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ 
- 
-        __kmp_bget_remove_from_freelist( b ); 
-        __kmp_bget_insert_into_freelist( thr, b ); 
- 
-        /* Finally,  advance  to   the  buffer  that   follows  the  newly 
-           consolidated free block.  We must set its  backpointer  to  the 
-           head  of  the  consolidated free block.  We know the next block 
-           must be an allocated block because the process of recombination 
-           guarantees  that  two  free  blocks will never be contiguous in 
-           memory.  */ 
- 
-        bn = BFH(((char *) b) + b->bh.bb.bsize); 
-    } 
-#ifdef FreeWipe 
-    (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, 
-            (size_t) (b->bh.bb.bsize - sizeof(bfhead_t))); 
-#endif 
-    KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); 
- 
-    /* The next buffer is allocated.  Set the backpointer in it  to  point 
-       to this buffer; the previous free buffer in memory. */ 
- 
-    bn->bh.bb.prevfree = b->bh.bb.bsize; 
- 
-    /*  If  a  block-release function is defined, and this free buffer 
-        constitutes the entire block, release it.  Note that  pool_len 
-        is  defined  in  such a way that the test will fail unless all 
-        pool blocks are the same size.  */ 
- 
-    if (thr->relfcn != 0 && 
-        b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) 
-    { 
-#if BufStats 
-        if (thr->numpblk != 1) {        /* Do not release the last buffer until finalization time */ 
-#endif 
- 
-            KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 
-            KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); 
-            KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); 
- 
-            /*  Unlink the buffer from the free list  */ 
-            __kmp_bget_remove_from_freelist( b ); 
- 
-            KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); 
- 
-            (*thr->relfcn)(b); 
-#if BufStats 
-            thr->numprel++;               /* Nr of expansion block releases */ 
-            thr->numpblk--;               /* Total number of blocks */ 
-            KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 
- 
-            /* avoid leaving stale last_pool pointer around if it is being dealloced */ 
-            if (thr->last_pool == b) thr->last_pool = 0; 
-        } 
-        else { 
-            thr->last_pool = b; 
-        } 
-#endif /* BufStats */ 
-    } 
-} 
- 
-/*  BECTL  --  Establish automatic pool expansion control  */ 
- 
-static void 
-bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr) 
-{ 
-    thr_data_t *thr = get_thr_data( th ); 
- 
-    thr->compfcn = compact; 
-    thr->acqfcn = acquire; 
-    thr->relfcn = release; 
-    thr->exp_incr = pool_incr; 
-} 
- 
-/*  BPOOL  --  Add a region of memory to the buffer pool.  */ 
- 
-static void 
-bpool(  kmp_info_t *th, void *buf, bufsize len) 
-{ 
-/*    int bin = 0; */ 
-    thr_data_t *thr = get_thr_data( th ); 
-    bfhead_t *b = BFH(buf); 
-    bhead_t *bn; 
- 
-    __kmp_bget_dequeue( th );         /* Release any queued buffers */ 
- 
-#ifdef SizeQuant 
-    len &= ~(SizeQuant - 1); 
-#endif 
-    if (thr->pool_len == 0) { 
-        thr->pool_len = len; 
-    } else if (len != thr->pool_len) { 
-        thr->pool_len = -1; 
-    } 
-#if BufStats 
-    thr->numpget++;                   /* Number of block acquisitions */ 
-    thr->numpblk++;                   /* Number of blocks total */ 
-    KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 
-#endif /* BufStats */ 
- 
-    /* Since the block is initially occupied by a single free  buffer, 
-       it  had  better  not  be  (much) larger than the largest buffer 
-       whose size we can store in bhead.bb.bsize. */ 
- 
-    KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1)); 
- 
-    /* Clear  the  backpointer at  the start of the block to indicate that 
-       there  is  no  free  block  prior  to  this   one.    That   blocks 
-       recombination when the first block in memory is released. */ 
- 
-    b->bh.bb.prevfree = 0; 
- 
-    /* Create a dummy allocated buffer at the end of the pool.  This dummy 
-       buffer is seen when a buffer at the end of the pool is released and 
-       blocks  recombination  of  the last buffer with the dummy buffer at 
-       the end.  The length in the dummy buffer  is  set  to  the  largest 
-       negative  number  to  denote  the  end  of  the pool for diagnostic 
-       routines (this specific value is  not  counted  on  by  the  actual 
-       allocation and release functions). */ 
- 
-    len -= sizeof(bhead_t); 
-    b->bh.bb.bsize = (bufsize) len; 
-    /* Set the owner of this buffer */ 
-    TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address 
- 
-    /* Chain the new block to the free list. */ 
-    __kmp_bget_insert_into_freelist( thr, b ); 
- 
-#ifdef FreeWipe 
-    (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, 
-             (size_t) (len - sizeof(bfhead_t))); 
-#endif 
-    bn = BH(((char *) b) + len); 
-    bn->bb.prevfree = (bufsize) len; 
-    /* Definition of ESent assumes two's complement! */ 
-    KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) ); 
- 
-    bn->bb.bsize = ESent; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/*  BFREED  --  Dump the free lists for this thread. */ 
- 
-static void 
-bfreed(  kmp_info_t *th ) 
-{ 
-    int bin = 0, count = 0; 
-    int gtid = __kmp_gtid_from_thread( th ); 
-    thr_data_t *thr = get_thr_data( th ); 
- 
-#if BufStats 
-    __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \ 
-           KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \ 
-           " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n", 
-           gtid, (kmp_uint64) thr->totalloc, 
-           (kmp_int64) thr->numget,  (kmp_int64) thr->numrel, 
-           (kmp_int64) thr->numpblk, 
-           (kmp_int64) thr->numpget, (kmp_int64) thr->numprel, 
-           (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel ); 
-#endif 
- 
-    for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 
-        bfhead_t *b; 
- 
-        for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) { 
-            bufsize bs = b->bh.bb.bsize; 
- 
-            KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b ); 
-            KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b ); 
-            KMP_DEBUG_ASSERT( bs > 0 ); 
- 
-            count += 1; 
- 
-            __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs ); 
-#ifdef FreeWipe 
-            { 
-                char *lerr = ((char *) b) + sizeof(bfhead_t); 
-                if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { 
-                    __kmp_printf_no_lock( "__kmp_printpool: T#%d     (Contents of above free block have been overstored.)\n", gtid ); 
-                } 
-            } 
-#endif 
-        } 
-    } 
- 
-    if (count == 0) 
-        __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef KMP_DEBUG 
- 
-#if BufStats 
- 
-/*  BSTATS  --  Return buffer allocation free space statistics.  */ 
- 
-static void 
-bstats(  kmp_info_t *th, bufsize *curalloc,  bufsize *totfree,  bufsize *maxfree, long *nget, long *nrel) 
-{ 
-    int bin = 0; 
-    thr_data_t *thr = get_thr_data( th ); 
- 
-    *nget = thr->numget; 
-    *nrel = thr->numrel; 
-    *curalloc = (bufsize) thr->totalloc; 
-    *totfree = 0; 
-    *maxfree = -1; 
- 
-    for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 
-        bfhead_t *b = thr->freelist[ bin ].ql.flink; 
- 
-        while (b != &thr->freelist[ bin ]) { 
-            KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); 
-            *totfree += b->bh.bb.bsize; 
-            if (b->bh.bb.bsize > *maxfree) { 
-                *maxfree = b->bh.bb.bsize; 
-            } 
-            b = b->ql.flink;              /* Link to next buffer */ 
-        } 
-    } 
-} 
- 
-/*  BSTATSE  --  Return extended statistics  */ 
- 
-static void 
-bstatse(  kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel) 
-{ 
-    thr_data_t *thr = get_thr_data( th ); 
- 
-    *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; 
-    *npool = thr->numpblk; 
-    *npget = thr->numpget; 
-    *nprel = thr->numprel; 
-    *ndget = thr->numdget; 
-    *ndrel = thr->numdrel; 
-} 
- 
-#endif /* BufStats */ 
- 
-/*  BUFDUMP  --  Dump the data in a buffer.  This is called with the  user 
-                 data pointer, and backs up to the buffer header.  It will 
-                 dump either a free block or an allocated one.  */ 
- 
-static void 
-bufdump(  kmp_info_t *th, void *buf ) 
-{ 
-    bfhead_t *b; 
-    unsigned char *bdump; 
-    bufsize bdlen; 
- 
-    b = BFH(((char *) buf) - sizeof(bhead_t)); 
-    KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 
-    if (b->bh.bb.bsize < 0) { 
-        bdump = (unsigned char *) buf; 
-        bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t); 
-    } else { 
-        bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t)); 
-        bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t); 
-    } 
- 
-    while (bdlen > 0) { 
-        int i, dupes = 0; 
-        bufsize l = bdlen; 
-        char bhex[50], bascii[20]; 
- 
-        if (l > 16) { 
-            l = 16; 
-        } 
- 
-        for (i = 0; i < l; i++) { 
-            (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); 
-            if (bdump[i] > 0x20 && bdump[i] < 0x7F) 
-                bascii[ i ] = bdump[ i ]; 
-            else 
-                bascii[ i ] = ' '; 
-        } 
-        bascii[i] = 0; 
-        (void) __kmp_printf_no_lock("%-48s   %s\n", bhex, bascii); 
-        bdump += l; 
-        bdlen -= l; 
-        while ((bdlen > 16) && (memcmp((char *) (bdump - 16), 
-                                       (char *) bdump, 16) == 0)) { 
-            dupes++; 
-            bdump += 16; 
-            bdlen -= 16; 
-        } 
-        if (dupes > 1) { 
-            (void) __kmp_printf_no_lock( 
-                "     (%d lines [%d bytes] identical to above line skipped)\n", 
-                dupes, dupes * 16); 
-        } else if (dupes == 1) { 
-            bdump -= 16; 
-            bdlen += 16; 
-        } 
-    } 
-} 
- 
-/*  BPOOLD  --  Dump a buffer pool.  The buffer headers are always listed. 
-                If DUMPALLOC is nonzero, the contents of allocated buffers 
-                are  dumped.   If  DUMPFREE  is  nonzero,  free blocks are 
-                dumped as well.  If FreeWipe  checking  is  enabled,  free 
-                blocks  which  have  been clobbered will always be dumped. */ 
- 
-static void 
-bpoold(  kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) 
-{ 
-    bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t)); 
- 
-    while (b->bh.bb.bsize != ESent) { 
-        bufsize bs = b->bh.bb.bsize; 
- 
-        if (bs < 0) { 
-            bs = -bs; 
-            (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs); 
-            if (dumpalloc) { 
-                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); 
-            } 
-        } else { 
-            const char *lerr = ""; 
- 
-            KMP_DEBUG_ASSERT(bs > 0); 
-            if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { 
-                lerr = "  (Bad free list links)"; 
-            } 
-            (void) __kmp_printf_no_lock("Free block:       size %6ld bytes.%s\n", 
-                (long) bs, lerr); 
-#ifdef FreeWipe 
-            lerr = ((char *) b) + sizeof(bfhead_t); 
-            if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || 
-                (memcmp(lerr, lerr + 1, 
-                  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { 
-                (void) __kmp_printf_no_lock( 
-                    "(Contents of above free block have been overstored.)\n"); 
-                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); 
-            } else 
-#endif 
-            if (dumpfree) { 
-                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); 
-            } 
-        } 
-        b = BFH(((char *) b) + bs); 
-    } 
-} 
- 
-/*  BPOOLV  --  Validate a buffer pool. */ 
- 
-static int 
-bpoolv(  kmp_info_t *th, void *buf ) 
-{ 
-    bfhead_t *b = BFH(buf); 
- 
-    while (b->bh.bb.bsize != ESent) { 
-        bufsize bs = b->bh.bb.bsize; 
- 
-        if (bs < 0) { 
-            bs = -bs; 
-        } else { 
-#ifdef FreeWipe 
-            char *lerr = ""; 
-#endif 
- 
-            KMP_DEBUG_ASSERT(bs > 0); 
-            if (bs <= 0) { 
-                return 0; 
-            } 
-            if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { 
-                (void) __kmp_printf_no_lock("Free block: size %6ld bytes.  (Bad free list links)\n", 
-                     (long) bs); 
-                KMP_DEBUG_ASSERT(0); 
-                return 0; 
-            } 
-#ifdef FreeWipe 
-            lerr = ((char *) b) + sizeof(bfhead_t); 
-            if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || 
-                (memcmp(lerr, lerr + 1, 
-                  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { 
-                (void) __kmp_printf_no_lock( 
-                    "(Contents of above free block have been overstored.)\n"); 
-                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); 
-                KMP_DEBUG_ASSERT(0); 
-                return 0; 
-            } 
-#endif /* FreeWipe */ 
-        } 
-        b = BFH(((char *) b) + bs); 
-    } 
-    return 1; 
-} 
- 
-#endif /* KMP_DEBUG */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_initialize_bget( kmp_info_t *th ) 
-{ 
-    KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) ); 
- 
-    set_thr_data( th ); 
- 
-    bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free, 
-           (bufsize) __kmp_malloc_pool_incr ); 
-} 
- 
-void 
-__kmp_finalize_bget( kmp_info_t *th ) 
-{ 
-    thr_data_t *thr; 
-    bfhead_t *b; 
- 
-    KMP_DEBUG_ASSERT( th != 0 ); 
- 
-#if BufStats 
-    thr = (thr_data_t *) th->th.th_local.bget_data; 
-    KMP_DEBUG_ASSERT( thr != NULL ); 
-    b = thr->last_pool; 
- 
-    /*  If  a  block-release function is defined, and this free buffer 
-        constitutes the entire block, release it.  Note that  pool_len 
-        is  defined  in  such a way that the test will fail unless all 
-        pool blocks are the same size.  */ 
- 
-    /* Deallocate the last pool if one exists because we no longer do it in brel() */ 
-    if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && 
-        b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) 
-    { 
-        KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 
-        KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); 
-        KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); 
- 
-        /*  Unlink the buffer from the free list  */ 
-        __kmp_bget_remove_from_freelist( b ); 
- 
-        KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); 
- 
-        (*thr->relfcn)(b); 
-        thr->numprel++;               /* Nr of expansion block releases */ 
-        thr->numpblk--;               /* Total number of blocks */ 
-        KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 
-    } 
-#endif /* BufStats */ 
- 
-    /* Deallocate bget_data */ 
-    if ( th->th.th_local.bget_data != NULL ) { 
-        __kmp_free( th->th.th_local.bget_data ); 
-        th->th.th_local.bget_data = NULL; 
-    }; // if 
-} 
- 
-void 
-kmpc_set_poolsize( size_t size ) 
-{ 
-    bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc, 
-           (bget_release_t) free, (bufsize) size ); 
-} 
- 
-size_t 
-kmpc_get_poolsize( void ) 
-{ 
-    thr_data_t *p; 
- 
-    p = get_thr_data( __kmp_get_thread() ); 
- 
-    return p->exp_incr; 
-} 
- 
-void 
-kmpc_set_poolmode( int mode ) 
-{ 
-    thr_data_t *p; 
- 
-    if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) { 
-        p = get_thr_data( __kmp_get_thread() ); 
-        p->mode = (bget_mode_t) mode; 
-    } 
-} 
- 
-int 
-kmpc_get_poolmode( void ) 
-{ 
-    thr_data_t *p; 
- 
-    p = get_thr_data( __kmp_get_thread() ); 
- 
-    return p->mode; 
-} 
- 
-void 
-kmpc_get_poolstat( size_t *maxmem, size_t *allmem ) 
-{ 
-    kmp_info_t *th = __kmp_get_thread(); 
-    bufsize a, b; 
- 
-    __kmp_bget_dequeue( th );         /* Release any queued buffers */ 
- 
-    bcheck( th, &a, &b ); 
- 
-    *maxmem = a; 
-    *allmem = b; 
-} 
- 
-void 
-kmpc_poolprint( void ) 
-{ 
-    kmp_info_t *th = __kmp_get_thread(); 
- 
-    __kmp_bget_dequeue( th );         /* Release any queued buffers */ 
- 
-    bfreed( th ); 
-} 
- 
-#endif // #if KMP_USE_BGET 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void * 
-kmpc_malloc( size_t size ) 
-{ 
-    void * ptr; 
-        ptr = bget( __kmp_entry_thread(), (bufsize) size ); 
- 
-    return ptr; 
-} 
- 
-void * 
-kmpc_calloc( size_t nelem, size_t elsize ) 
-{ 
-    void * ptr; 
-        ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) ); 
- 
-    return ptr; 
-} 
- 
-void * 
-kmpc_realloc( void * ptr, size_t size ) 
-{ 
-    void * result = NULL; 
- 
-        if ( ptr == NULL ) { 
-            // If pointer is NULL, realloc behaves like malloc. 
-            result = bget( __kmp_entry_thread(), (bufsize) size ); 
-        } else if ( size == 0 ) { 
-            // If size is 0, realloc behaves like free. 
-            // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before. 
-            // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread(). 
-            brel( __kmp_get_thread(), ptr ); 
-        } else { 
-            result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size ); 
-        }; // if 
- 
-    return result; 
-} 
- 
-/* NOTE: the library must have already been initialized by a previous allocate */ 
- 
-void 
-kmpc_free( void * ptr ) 
-{ 
-    if ( ! __kmp_init_serial ) { 
-        return; 
-    }; // if 
-    if ( ptr != NULL ) { 
-            kmp_info_t *th = __kmp_get_thread(); 
-            __kmp_bget_dequeue( th );         /* Release any queued buffers */ 
-            brel( th, ptr ); 
-    }; 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void * 
-___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ) 
-{ 
-    void * ptr; 
-    KE_TRACE( 30, ( 
-        "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", 
-        th, 
-        (int) size 
-        KMP_SRC_LOC_PARM 
-    ) ); 
-        ptr = bget( th, (bufsize) size ); 
-    KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) ); 
-    return ptr; 
-} 
- 
-void * 
-___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ) 
-{ 
-    void * ptr; 
-    KE_TRACE( 30, ( 
-        "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", 
-        th, 
-        (int) nelem, 
-        (int) elsize 
-        KMP_SRC_LOC_PARM 
-    ) ); 
-        ptr = bgetz( th, (bufsize) (nelem * elsize) ); 
-    KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) ); 
-    return ptr; 
-} 
- 
-void * 
-___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ) 
-{ 
-    KE_TRACE( 30, ( 
-        "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", 
-        th, 
-        ptr, 
-        (int) size 
-        KMP_SRC_LOC_PARM 
-    ) ); 
-        ptr = bgetr( th, ptr, (bufsize) size ); 
-    KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) ); 
-    return ptr; 
-} 
- 
-void 
-___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ) 
-{ 
-    KE_TRACE( 30, ( 
-        "-> __kmp_thread_free( %p, %p ) called from %s:%d\n", 
-        th, 
-        ptr 
-        KMP_SRC_LOC_PARM 
-    ) ); 
-    if ( ptr != NULL ) { 
-            __kmp_bget_dequeue( th );         /* Release any queued buffers */ 
-            brel( th, ptr ); 
-    } 
-    KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
-/* 
-    If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it 
-    may be useful for debugging memory corruptions, used freed pointers, etc. 
-*/ 
-/* #define LEAK_MEMORY */ 
- 
-struct kmp_mem_descr {      // Memory block descriptor. 
-    void * ptr_allocated;   // Pointer returned by malloc(), subject for free(). 
-    size_t size_allocated;  // Size of allocated memory block. 
-    void * ptr_aligned;     // Pointer to aligned memory, to be used by client code. 
-    size_t size_aligned;    // Size of aligned memory block. 
-}; 
-typedef struct kmp_mem_descr kmp_mem_descr_t; 
- 
-/* 
-    Allocate memory on requested boundary, fill allocated memory with 0x00. 
-    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. 
-    Must use __kmp_free when freeing memory allocated by this routine! 
- */ 
-static 
-void * 
-___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL ) 
-{ 
-    /* 
-            __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to 
-        return properly aligned pointer. Original pointer returned by malloc() and size of allocated 
-        block is saved in descriptor just before the aligned pointer. This information used by 
-        __kmp_free() -- it has to pass to free() original pointer, not aligned one. 
- 
-            +---------+------------+-----------------------------------+---------+ 
-            | padding | descriptor |           aligned block           | padding | 
-            +---------+------------+-----------------------------------+---------+ 
-            ^                      ^ 
-            |                      | 
-            |                      +- Aligned pointer returned to caller 
-            +- Pointer returned by malloc() 
- 
-        Aligned block is filled with zeros, paddings are filled with 0xEF. 
-    */ 
- 
-    kmp_mem_descr_t  descr; 
-    kmp_uintptr_t    addr_allocated;        // Address returned by malloc(). 
-    kmp_uintptr_t    addr_aligned;          // Aligned address to return to caller. 
-    kmp_uintptr_t    addr_descr;            // Address of memory block descriptor. 
- 
-    KE_TRACE( 25, ( 
-        "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", 
-        (int) size, 
-        (int) alignment 
-        KMP_SRC_LOC_PARM 
-    ) ); 
- 
-    KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too 
-    KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) ); 
-        // Make sure kmp_uintptr_t is enough to store addresses. 
- 
-    descr.size_aligned = size; 
-    descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment; 
- 
-    #if KMP_DEBUG 
-        descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ ); 
-    #else 
-    descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM ); 
-    #endif 
+
+        /* Make the previous buffer the one we're working on. */
+        KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
+        b = BFH(((char *) b) - b->bh.bb.prevfree);
+        b->bh.bb.bsize -= size;
+
+        /* unlink the buffer from the old freelist */
+        __kmp_bget_remove_from_freelist( b );
+    }
+    else {
+        /* The previous buffer isn't allocated.  Mark this buffer
+           size as positive (i.e. free) and fall through to place
+           the buffer on the free list as an isolated free block. */
+
+        b->bh.bb.bsize = -b->bh.bb.bsize;
+    }
+
+    /* insert buffer back onto a new freelist */
+    __kmp_bget_insert_into_freelist( thr, b );
+
+
+    /* Now we look at the next buffer in memory, located by advancing from
+       the  start  of  this  buffer  by its size, to see if that buffer is
+       free.  If it is, we combine  this  buffer  with  the  next  one  in
+       memory, dechaining the second buffer from the free list. */
+
+    bn =  BFH(((char *) b) + b->bh.bb.bsize);
+    if (bn->bh.bb.bsize > 0) {
+
+        /* The buffer is free.  Remove it from the free list and add
+           its size to that of our buffer. */
+
+        KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
+
+        __kmp_bget_remove_from_freelist( bn );
+
+        b->bh.bb.bsize += bn->bh.bb.bsize;
+
+        /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
+
+        __kmp_bget_remove_from_freelist( b );
+        __kmp_bget_insert_into_freelist( thr, b );
+
+        /* Finally,  advance  to   the  buffer  that   follows  the  newly
+           consolidated free block.  We must set its  backpointer  to  the
+           head  of  the  consolidated free block.  We know the next block
+           must be an allocated block because the process of recombination
+           guarantees  that  two  free  blocks will never be contiguous in
+           memory.  */
+
+        bn = BFH(((char *) b) + b->bh.bb.bsize);
+    }
+#ifdef FreeWipe
+    (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
+            (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
+#endif
+    KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
+
+    /* The next buffer is allocated.  Set the backpointer in it  to  point
+       to this buffer; the previous free buffer in memory. */
+
+    bn->bh.bb.prevfree = b->bh.bb.bsize;
+
+    /*  If  a  block-release function is defined, and this free buffer
+        constitutes the entire block, release it.  Note that  pool_len
+        is  defined  in  such a way that the test will fail unless all
+        pool blocks are the same size.  */
+
+    if (thr->relfcn != 0 &&
+        b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
+    {
+#if BufStats
+        if (thr->numpblk != 1) {        /* Do not release the last buffer until finalization time */
+#endif
+
+            KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
+            KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
+            KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
+
+            /*  Unlink the buffer from the free list  */
+            __kmp_bget_remove_from_freelist( b );
+
+            KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
+
+            (*thr->relfcn)(b);
+#if BufStats
+            thr->numprel++;               /* Nr of expansion block releases */
+            thr->numpblk--;               /* Total number of blocks */
+            KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
+
+            /* avoid leaving stale last_pool pointer around if it is being dealloced */
+            if (thr->last_pool == b) thr->last_pool = 0;
+        }
+        else {
+            thr->last_pool = b;
+        }
+#endif /* BufStats */
+    }
+}
+
+/*  BECTL  --  Establish automatic pool expansion control  */
+
+static void
+bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
+{
+    thr_data_t *thr = get_thr_data( th );
+
+    thr->compfcn = compact;
+    thr->acqfcn = acquire;
+    thr->relfcn = release;
+    thr->exp_incr = pool_incr;
+}
+
+/*  BPOOL  --  Add a region of memory to the buffer pool.  */
+
+static void
+bpool(  kmp_info_t *th, void *buf, bufsize len)
+{
+/*    int bin = 0; */
+    thr_data_t *thr = get_thr_data( th );
+    bfhead_t *b = BFH(buf);
+    bhead_t *bn;
+
+    __kmp_bget_dequeue( th );         /* Release any queued buffers */
+
+#ifdef SizeQuant
+    len &= ~(SizeQuant - 1);
+#endif
+    if (thr->pool_len == 0) {
+        thr->pool_len = len;
+    } else if (len != thr->pool_len) {
+        thr->pool_len = -1;
+    }
+#if BufStats
+    thr->numpget++;                   /* Number of block acquisitions */
+    thr->numpblk++;                   /* Number of blocks total */
+    KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
+#endif /* BufStats */
+
+    /* Since the block is initially occupied by a single free  buffer,
+       it  had  better  not  be  (much) larger than the largest buffer
+       whose size we can store in bhead.bb.bsize. */
+
+    KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
+
+    /* Clear  the  backpointer at  the start of the block to indicate that
+       there  is  no  free  block  prior  to  this   one.    That   blocks
+       recombination when the first block in memory is released. */
+
+    b->bh.bb.prevfree = 0;
+
+    /* Create a dummy allocated buffer at the end of the pool.  This dummy
+       buffer is seen when a buffer at the end of the pool is released and
+       blocks  recombination  of  the last buffer with the dummy buffer at
+       the end.  The length in the dummy buffer  is  set  to  the  largest
+       negative  number  to  denote  the  end  of  the pool for diagnostic
+       routines (this specific value is  not  counted  on  by  the  actual
+       allocation and release functions). */
+
+    len -= sizeof(bhead_t);
+    b->bh.bb.bsize = (bufsize) len;
+    /* Set the owner of this buffer */
+    TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
+
+    /* Chain the new block to the free list. */
+    __kmp_bget_insert_into_freelist( thr, b );
+
+#ifdef FreeWipe
+    (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
+             (size_t) (len - sizeof(bfhead_t)));
+#endif
+    bn = BH(((char *) b) + len);
+    bn->bb.prevfree = (bufsize) len;
+    /* Definition of ESent assumes two's complement! */
+    KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
+
+    bn->bb.bsize = ESent;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/*  BFREED  --  Dump the free lists for this thread. */
+
+static void
+bfreed(  kmp_info_t *th )
+{
+    int bin = 0, count = 0;
+    int gtid = __kmp_gtid_from_thread( th );
+    thr_data_t *thr = get_thr_data( th );
+
+#if BufStats
+    __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
+           KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
+           " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
+           gtid, (kmp_uint64) thr->totalloc,
+           (kmp_int64) thr->numget,  (kmp_int64) thr->numrel,
+           (kmp_int64) thr->numpblk,
+           (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
+           (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
+#endif
+
+    for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+        bfhead_t *b;
+
+        for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
+            bufsize bs = b->bh.bb.bsize;
+
+            KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
+            KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
+            KMP_DEBUG_ASSERT( bs > 0 );
+
+            count += 1;
+
+            __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
+#ifdef FreeWipe
+            {
+                char *lerr = ((char *) b) + sizeof(bfhead_t);
+                if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
+                    __kmp_printf_no_lock( "__kmp_printpool: T#%d     (Contents of above free block have been overstored.)\n", gtid );
+                }
+            }
+#endif
+        }
+    }
+
+    if (count == 0)
+        __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
+}
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef KMP_DEBUG
+
+#if BufStats
+
+/*  BSTATS  --  Return buffer allocation free space statistics.  */
+
+static void
+bstats(  kmp_info_t *th, bufsize *curalloc,  bufsize *totfree,  bufsize *maxfree, long *nget, long *nrel)
+{
+    int bin = 0;
+    thr_data_t *thr = get_thr_data( th );
+
+    *nget = thr->numget;
+    *nrel = thr->numrel;
+    *curalloc = (bufsize) thr->totalloc;
+    *totfree = 0;
+    *maxfree = -1;
+
+    for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+        bfhead_t *b = thr->freelist[ bin ].ql.flink;
+
+        while (b != &thr->freelist[ bin ]) {
+            KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
+            *totfree += b->bh.bb.bsize;
+            if (b->bh.bb.bsize > *maxfree) {
+                *maxfree = b->bh.bb.bsize;
+            }
+            b = b->ql.flink;              /* Link to next buffer */
+        }
+    }
+}
+
+/*  BSTATSE  --  Return extended statistics  */
+
+static void
+bstatse(  kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
+{
+    thr_data_t *thr = get_thr_data( th );
+
+    *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
+    *npool = thr->numpblk;
+    *npget = thr->numpget;
+    *nprel = thr->numprel;
+    *ndget = thr->numdget;
+    *ndrel = thr->numdrel;
+}
+
+#endif /* BufStats */
+
+/*  BUFDUMP  --  Dump the data in a buffer.  This is called with the  user
+                 data pointer, and backs up to the buffer header.  It will
+                 dump either a free block or an allocated one.  */
+
+static void
+bufdump(  kmp_info_t *th, void *buf )
+{
+    bfhead_t *b;
+    unsigned char *bdump;
+    bufsize bdlen;
+
+    b = BFH(((char *) buf) - sizeof(bhead_t));
+    KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
+    if (b->bh.bb.bsize < 0) {
+        bdump = (unsigned char *) buf;
+        bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
+    } else {
+        bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
+        bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
+    }
+
+    while (bdlen > 0) {
+        int i, dupes = 0;
+        bufsize l = bdlen;
+        char bhex[50], bascii[20];
+
+        if (l > 16) {
+            l = 16;
+        }
+
+        for (i = 0; i < l; i++) {
+            (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
+            if (bdump[i] > 0x20 && bdump[i] < 0x7F)
+                bascii[ i ] = bdump[ i ];
+            else
+                bascii[ i ] = ' ';
+        }
+        bascii[i] = 0;
+        (void) __kmp_printf_no_lock("%-48s   %s\n", bhex, bascii);
+        bdump += l;
+        bdlen -= l;
+        while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
+                                       (char *) bdump, 16) == 0)) {
+            dupes++;
+            bdump += 16;
+            bdlen -= 16;
+        }
+        if (dupes > 1) {
+            (void) __kmp_printf_no_lock(
+                "     (%d lines [%d bytes] identical to above line skipped)\n",
+                dupes, dupes * 16);
+        } else if (dupes == 1) {
+            bdump -= 16;
+            bdlen += 16;
+        }
+    }
+}
+
+/*  BPOOLD  --  Dump a buffer pool.  The buffer headers are always listed.
+                If DUMPALLOC is nonzero, the contents of allocated buffers
+                are  dumped.   If  DUMPFREE  is  nonzero,  free blocks are
+                dumped as well.  If FreeWipe  checking  is  enabled,  free
+                blocks  which  have  been clobbered will always be dumped. */
+
+static void
+bpoold(  kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
+{
+    bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
+
+    while (b->bh.bb.bsize != ESent) {
+        bufsize bs = b->bh.bb.bsize;
+
+        if (bs < 0) {
+            bs = -bs;
+            (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
+            if (dumpalloc) {
+                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
+            }
+        } else {
+            const char *lerr = "";
+
+            KMP_DEBUG_ASSERT(bs > 0);
+            if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
+                lerr = "  (Bad free list links)";
+            }
+            (void) __kmp_printf_no_lock("Free block:       size %6ld bytes.%s\n",
+                (long) bs, lerr);
+#ifdef FreeWipe
+            lerr = ((char *) b) + sizeof(bfhead_t);
+            if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
+                (memcmp(lerr, lerr + 1,
+                  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
+                (void) __kmp_printf_no_lock(
+                    "(Contents of above free block have been overstored.)\n");
+                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
+            } else
+#endif
+            if (dumpfree) {
+                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
+            }
+        }
+        b = BFH(((char *) b) + bs);
+    }
+}
+
+/*  BPOOLV  --  Validate a buffer pool. */
+
+static int
+bpoolv(  kmp_info_t *th, void *buf )
+{
+    bfhead_t *b = BFH(buf);
+
+    while (b->bh.bb.bsize != ESent) {
+        bufsize bs = b->bh.bb.bsize;
+
+        if (bs < 0) {
+            bs = -bs;
+        } else {
+#ifdef FreeWipe
+            char *lerr = "";
+#endif
+
+            KMP_DEBUG_ASSERT(bs > 0);
+            if (bs <= 0) {
+                return 0;
+            }
+            if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
+                (void) __kmp_printf_no_lock("Free block: size %6ld bytes.  (Bad free list links)\n",
+                     (long) bs);
+                KMP_DEBUG_ASSERT(0);
+                return 0;
+            }
+#ifdef FreeWipe
+            lerr = ((char *) b) + sizeof(bfhead_t);
+            if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
+                (memcmp(lerr, lerr + 1,
+                  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
+                (void) __kmp_printf_no_lock(
+                    "(Contents of above free block have been overstored.)\n");
+                bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
+                KMP_DEBUG_ASSERT(0);
+                return 0;
+            }
+#endif /* FreeWipe */
+        }
+        b = BFH(((char *) b) + bs);
+    }
+    return 1;
+}
+
+#endif /* KMP_DEBUG */
+
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_initialize_bget( kmp_info_t *th )
+{
+    KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
+
+    set_thr_data( th );
+
+    bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
+           (bufsize) __kmp_malloc_pool_incr );
+}
+
+void
+__kmp_finalize_bget( kmp_info_t *th )
+{
+    thr_data_t *thr;
+    bfhead_t *b;
+
+    KMP_DEBUG_ASSERT( th != 0 );
+
+#if BufStats
+    thr = (thr_data_t *) th->th.th_local.bget_data;
+    KMP_DEBUG_ASSERT( thr != NULL );
+    b = thr->last_pool;
+
+    /*  If  a  block-release function is defined, and this free buffer
+        constitutes the entire block, release it.  Note that  pool_len
+        is  defined  in  such a way that the test will fail unless all
+        pool blocks are the same size.  */
+
+    /* Deallocate the last pool if one exists because we no longer do it in brel() */
+    if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
+        b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
+    {
+        KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
+        KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
+        KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
+
+        /*  Unlink the buffer from the free list  */
+        __kmp_bget_remove_from_freelist( b );
+
+        KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
+
+        (*thr->relfcn)(b);
+        thr->numprel++;               /* Nr of expansion block releases */
+        thr->numpblk--;               /* Total number of blocks */
+        KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
+    }
+#endif /* BufStats */
+
+    /* Deallocate bget_data */
+    if ( th->th.th_local.bget_data != NULL ) {
+        __kmp_free( th->th.th_local.bget_data );
+        th->th.th_local.bget_data = NULL;
+    }; // if
+}
+
+void
+kmpc_set_poolsize( size_t size )
+{
+    bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
+           (bget_release_t) free, (bufsize) size );
+}
+
+size_t
+kmpc_get_poolsize( void )
+{
+    thr_data_t *p;
+
+    p = get_thr_data( __kmp_get_thread() );
+
+    return p->exp_incr;
+}
+
+void
+kmpc_set_poolmode( int mode )
+{
+    thr_data_t *p;
+
+    if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
+        p = get_thr_data( __kmp_get_thread() );
+        p->mode = (bget_mode_t) mode;
+    }
+}
+
+int
+kmpc_get_poolmode( void )
+{
+    thr_data_t *p;
+
+    p = get_thr_data( __kmp_get_thread() );
+
+    return p->mode;
+}
+
+void
+kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
+{
+    kmp_info_t *th = __kmp_get_thread();
+    bufsize a, b;
+
+    __kmp_bget_dequeue( th );         /* Release any queued buffers */
+
+    bcheck( th, &a, &b );
+
+    *maxmem = a;
+    *allmem = b;
+}
+
+void
+kmpc_poolprint( void )
+{
+    kmp_info_t *th = __kmp_get_thread();
+
+    __kmp_bget_dequeue( th );         /* Release any queued buffers */
+
+    bfreed( th );
+}
+
+#endif // #if KMP_USE_BGET
+
+/* ------------------------------------------------------------------------ */
+
+void *
+kmpc_malloc( size_t size )
+{
+    void * ptr;
+        ptr = bget( __kmp_entry_thread(), (bufsize) size );
+
+    return ptr;
+}
+
+void *
+kmpc_calloc( size_t nelem, size_t elsize )
+{
+    void * ptr;
+        ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
+
+    return ptr;
+}
+
+void *
+kmpc_realloc( void * ptr, size_t size )
+{
+    void * result = NULL;
+
+        if ( ptr == NULL ) {
+            // If pointer is NULL, realloc behaves like malloc.
+            result = bget( __kmp_entry_thread(), (bufsize) size );
+        } else if ( size == 0 ) {
+            // If size is 0, realloc behaves like free.
+            // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
+            // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
+            brel( __kmp_get_thread(), ptr );
+        } else {
+            result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
+        }; // if
+
+    return result;
+}
+
+/* NOTE: the library must have already been initialized by a previous allocate */
+
+void
+kmpc_free( void * ptr )
+{
+    if ( ! __kmp_init_serial ) {
+        return;
+    }; // if
+    if ( ptr != NULL ) {
+            kmp_info_t *th = __kmp_get_thread();
+            __kmp_bget_dequeue( th );         /* Release any queued buffers */
+            brel( th, ptr );
+    };
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+void *
+___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
+{
+    void * ptr;
+    KE_TRACE( 30, (
+        "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
+        th,
+        (int) size
+        KMP_SRC_LOC_PARM
+    ) );
+        ptr = bget( th, (bufsize) size );
+    KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
+    return ptr;
+}
+
+void *
+___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
+{
+    void * ptr;
+    KE_TRACE( 30, (
+        "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
+        th,
+        (int) nelem,
+        (int) elsize
+        KMP_SRC_LOC_PARM
+    ) );
+        ptr = bgetz( th, (bufsize) (nelem * elsize) );
+    KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
+    return ptr;
+}
+
+void *
+___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
+{
+    KE_TRACE( 30, (
+        "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
+        th,
+        ptr,
+        (int) size
+        KMP_SRC_LOC_PARM
+    ) );
+        ptr = bgetr( th, ptr, (bufsize) size );
+    KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
+    return ptr;
+}
+
+void
+___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
+{
+    KE_TRACE( 30, (
+        "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
+        th,
+        ptr
+        KMP_SRC_LOC_PARM
+    ) );
+    if ( ptr != NULL ) {
+            __kmp_bget_dequeue( th );         /* Release any queued buffers */
+            brel( th, ptr );
+    }
+    KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+/*
+    If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
+    may be useful for debugging memory corruptions, used freed pointers, etc.
+*/
+/* #define LEAK_MEMORY */
+
+struct kmp_mem_descr {      // Memory block descriptor.
+    void * ptr_allocated;   // Pointer returned by malloc(), subject for free().
+    size_t size_allocated;  // Size of allocated memory block.
+    void * ptr_aligned;     // Pointer to aligned memory, to be used by client code.
+    size_t size_aligned;    // Size of aligned memory block.
+};
+typedef struct kmp_mem_descr kmp_mem_descr_t;
+
+/*
+    Allocate memory on requested boundary, fill allocated memory with 0x00.
+    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
+    Must use __kmp_free when freeing memory allocated by this routine!
+ */
+static
+void *
+___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
+{
+    /*
+            __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
+        return properly aligned pointer. Original pointer returned by malloc() and size of allocated
+        block is saved in descriptor just before the aligned pointer. This information used by
+        __kmp_free() -- it has to pass to free() original pointer, not aligned one.
+
+            +---------+------------+-----------------------------------+---------+
+            | padding | descriptor |           aligned block           | padding |
+            +---------+------------+-----------------------------------+---------+
+            ^                      ^
+            |                      |
+            |                      +- Aligned pointer returned to caller
+            +- Pointer returned by malloc()
+
+        Aligned block is filled with zeros, paddings are filled with 0xEF.
+    */
+
+    kmp_mem_descr_t  descr;
+    kmp_uintptr_t    addr_allocated;        // Address returned by malloc().
+    kmp_uintptr_t    addr_aligned;          // Aligned address to return to caller.
+    kmp_uintptr_t    addr_descr;            // Address of memory block descriptor.
+
+    KE_TRACE( 25, (
+        "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
+        (int) size,
+        (int) alignment
+        KMP_SRC_LOC_PARM
+    ) );
+
+    KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
+    KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
+        // Make sure kmp_uintptr_t is enough to store addresses.
+
+    descr.size_aligned = size;
+    descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
+
+    #if KMP_DEBUG
+        descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
+    #else
+    descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
+    #endif
 
     __lsan_ignore_object(descr.ptr_allocated); // espetrov@yandex-team.ru: asan considers descr.ptr_allocated leaked because of address alignment arithmetics
 
-    KE_TRACE( 10, ( 
-        "   malloc( %d ) returned %p\n", 
-        (int) descr.size_allocated, 
-        descr.ptr_allocated 
-    ) ); 
-    if ( descr.ptr_allocated == NULL ) { 
-        KMP_FATAL( OutOfHeapMemory ); 
-    }; 
- 
-    addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; 
-    addr_aligned = 
-        ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment ) 
-        & ~ ( alignment - 1 ); 
-    addr_descr = addr_aligned - sizeof( kmp_mem_descr_t ); 
- 
-    descr.ptr_aligned = (void *) addr_aligned; 
- 
-    KE_TRACE( 26, ( 
-        "   ___kmp_allocate_align: " 
-            "ptr_allocated=%p, size_allocated=%d, " 
-            "ptr_aligned=%p, size_aligned=%d\n", 
-        descr.ptr_allocated, 
-        (int) descr.size_allocated, 
-        descr.ptr_aligned, 
-        (int) descr.size_aligned 
-    ) ); 
- 
-    KMP_DEBUG_ASSERT( addr_allocated <= addr_descr ); 
-    KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned ); 
-    KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); 
-    KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 ); 
- 
-    #ifdef KMP_DEBUG 
-        memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); 
-            // Fill allocated memory block with 0xEF. 
-    #endif 
-    memset( descr.ptr_aligned, 0x00, descr.size_aligned ); 
-        // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not 
-        // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding 
-        // bytes remain filled with 0xEF in debugging library.) 
-    * ( (kmp_mem_descr_t *) addr_descr ) = descr; 
- 
-    KMP_MB(); 
- 
-    KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) ); 
-    return descr.ptr_aligned; 
- 
-} // func ___kmp_allocate_align 
- 
- 
-/* 
-    Allocate memory on cache line boundary, fill allocated memory with 0x00. 
-    Do not call this func directly! Use __kmp_allocate macro instead. 
-    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. 
-    Must use __kmp_free when freeing memory allocated by this routine! 
- */ 
-void * 
-___kmp_allocate( size_t size KMP_SRC_LOC_DECL ) 
-{ 
- 
-    void * ptr; 
-    KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) ); 
-        ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM ); 
-    KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) ); 
-    return ptr; 
- 
-} // func ___kmp_allocate 
- 
-#if (BUILD_MEMORY==FIRST_TOUCH) 
-void * 
-__kmp_ft_page_allocate(size_t size) 
-{ 
-  void *adr, *aadr; 
-#if KMP_OS_LINUX 
-  /* TODO: Use this function to get page size everywhere */ 
-  int page_size = getpagesize(); 
-#else 
-  /* TODO: Find windows function to get page size and use it everywhere */ 
-  int page_size = PAGE_SIZE; 
-#endif /* KMP_OS_LINUX */ 
- 
-  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(), 
-                                    size + page_size + KMP_PTR_SKIP); 
-  if ( adr == 0 ) 
-    KMP_FATAL( OutOfHeapMemory ); 
- 
-  /* check to see if adr is on a page boundary. */ 
-  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0) 
-    /* nothing to do if adr is already on a page boundary. */ 
-    aadr = adr; 
-  else 
-    /* else set aadr to the first page boundary in the allocated memory. */ 
-    aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) ); 
- 
-  /* the first touch by the owner thread. */ 
-  *((void**)aadr) = adr; 
- 
-  /* skip the memory space used for storing adr above. */ 
-  return (void*)((char*)aadr + KMP_PTR_SKIP); 
-} 
-#endif 
- 
-/* 
-    Allocate memory on page boundary, fill allocated memory with 0x00. 
-    Does not call this func directly! Use __kmp_page_allocate macro instead. 
-    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. 
-    Must use __kmp_free when freeing memory allocated by this routine! 
- */ 
-void * 
-___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ) 
-{ 
-    int    page_size = 8 * 1024; 
-    void * ptr; 
- 
-    KE_TRACE( 25, ( 
-        "-> __kmp_page_allocate( %d ) called from %s:%d\n", 
-        (int) size 
-        KMP_SRC_LOC_PARM 
-    ) ); 
-        ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM ); 
-    KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) ); 
-    return ptr; 
-} // ___kmp_page_allocate 
- 
-/* 
-    Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). 
-    In debug mode, fill the memory block with 0xEF before call to free(). 
-*/ 
-void 
-___kmp_free( void * ptr KMP_SRC_LOC_DECL ) 
-{ 
- 
-        kmp_mem_descr_t descr; 
-        kmp_uintptr_t   addr_allocated;        // Address returned by malloc(). 
-        kmp_uintptr_t   addr_aligned;          // Aligned address passed by caller. 
- 
-        KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) ); 
-        KMP_ASSERT( ptr != NULL ); 
- 
-        descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) ); 
- 
-        KE_TRACE( 26, ( "   __kmp_free:     " 
-                        "ptr_allocated=%p, size_allocated=%d, " 
-                        "ptr_aligned=%p, size_aligned=%d\n", 
-                        descr.ptr_allocated, (int) descr.size_allocated, 
-                        descr.ptr_aligned, (int) descr.size_aligned )); 
- 
-        addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; 
-        addr_aligned   = (kmp_uintptr_t) descr.ptr_aligned; 
- 
-        KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 ); 
-        KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr ); 
-        KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned ); 
-        KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated ); 
-        KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); 
- 
-        #ifdef KMP_DEBUG 
-            memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); 
-                // Fill memory block with 0xEF, it helps catch using freed memory. 
-        #endif 
- 
-        #ifndef LEAK_MEMORY 
-            KE_TRACE( 10, ( "   free( %p )\n", descr.ptr_allocated ) ); 
-        # ifdef KMP_DEBUG 
-            _free_src_loc( descr.ptr_allocated, _file_, _line_ ); 
-        # else 
-            free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM ); 
-        # endif 
-        #endif 
- 
-    KMP_MB(); 
- 
-    KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) ); 
- 
-} // func ___kmp_free 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if USE_FAST_MEMORY == 3 
-// Allocate fast memory by first scanning the thread's free lists 
-// If a chunk the right size exists, grab it off the free list. 
-// Otherwise allocate normally using kmp_thread_malloc. 
- 
-// AC: How to choose the limit? Just get 16 for now... 
-#define KMP_FREE_LIST_LIMIT 16 
- 
-// Always use 128 bytes for determining buckets for caching memory blocks 
-#define DCACHE_LINE  128 
- 
-void * 
-___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ) 
-{ 
-    void            * ptr; 
-    int               num_lines; 
-    int               idx; 
-    int               index; 
-    void            * alloc_ptr; 
-    size_t            alloc_size; 
-    kmp_mem_descr_t * descr; 
- 
-    KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", 
-      __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) ); 
- 
-    num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE; 
-    idx = num_lines - 1; 
-    KMP_DEBUG_ASSERT( idx >= 0 ); 
-    if ( idx < 2 ) { 
-        index = 0;       // idx is [ 0, 1 ], use first free list 
-        num_lines = 2;   // 1, 2 cache lines or less than cache line 
-    } else if ( ( idx >>= 2 ) == 0 ) { 
-        index = 1;       // idx is [ 2, 3 ], use second free list 
-        num_lines = 4;   // 3, 4 cache lines 
-    } else if ( ( idx >>= 2 ) == 0 ) { 
-        index = 2;       // idx is [ 4, 15 ], use third free list 
-        num_lines = 16;  // 5, 6, ..., 16 cache lines 
-    } else if ( ( idx >>= 2 ) == 0 ) { 
-        index = 3;       // idx is [ 16, 63 ], use fourth free list 
-        num_lines = 64;  // 17, 18, ..., 64 cache lines 
-    } else { 
-        goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists 
-    } 
- 
-    ptr = this_thr->th.th_free_lists[index].th_free_list_self; 
-    if ( ptr != NULL ) { 
-        // pop the head of no-sync free list 
-        this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 
-        KMP_DEBUG_ASSERT( this_thr == 
-            ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); 
-        goto end; 
-    }; 
-    ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); 
-    if ( ptr != NULL ) { 
-        // no-sync free list is empty, use sync free list (filled in by other threads only) 
-        // pop the head of the sync free list, push NULL instead 
-        while ( ! KMP_COMPARE_AND_STORE_PTR( 
-            &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) ) 
-        { 
-            KMP_CPU_PAUSE(); 
-            ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); 
-        } 
-        // push the rest of chain into no-sync free list (can be NULL if there was the only block) 
-        this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 
-        KMP_DEBUG_ASSERT( this_thr == 
-            ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); 
-        goto end; 
-    } 
- 
-    alloc_call: 
-    // haven't found block in the free lists, thus allocate it 
-    size = num_lines * DCACHE_LINE; 
- 
-    alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE; 
-    KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n", 
-                   __kmp_gtid_from_thread( this_thr ), alloc_size ) ); 
-    alloc_ptr = bget( this_thr, (bufsize) alloc_size ); 
- 
-    // align ptr to DCACHE_LINE 
-    ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 )); 
-    descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); 
- 
-    descr->ptr_allocated = alloc_ptr;        // remember allocated pointer 
-    // we don't need size_allocated 
-    descr->ptr_aligned   = (void *)this_thr; // remember allocating thread 
-                                             // (it is already saved in bget buffer, 
-                                             // but we may want to use another allocator in future) 
-    descr->size_aligned  = size; 
- 
-    end: 
-    KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n", 
-                    __kmp_gtid_from_thread( this_thr ), ptr ) ); 
-    return ptr; 
-} // func __kmp_fast_allocate 
- 
-// Free fast memory and place it on the thread's free list if it is of 
-// the correct size. 
-void 
-___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL ) 
-{ 
-    kmp_mem_descr_t * descr; 
-    kmp_info_t      * alloc_thr; 
-    size_t            size; 
-    size_t            idx; 
-    int               index; 
- 
-    KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", 
-      __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) ); 
-    KMP_ASSERT( ptr != NULL ); 
- 
-    descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); 
- 
-    KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n", 
-                  (int) descr->size_aligned ) ); 
- 
-    size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines 
- 
-    idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block 
-    if ( idx == size ) { 
-        index = 0;       // 2 cache lines 
-    } else if ( ( idx <<= 1 ) == size ) { 
-        index = 1;       // 4 cache lines 
-    } else if ( ( idx <<= 2 ) == size ) { 
-        index = 2;       // 16 cache lines 
-    } else if ( ( idx <<= 2 ) == size ) { 
-        index = 3;       // 64 cache lines 
-    } else { 
-        KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 ); 
-        goto free_call;  // 65 or more cache lines ( > 8KB ) 
-    } 
- 
-    alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block 
-    if ( alloc_thr == this_thr ) { 
-        // push block to self no-sync free list, linking previous head (LIFO) 
-        *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; 
-        this_thr->th.th_free_lists[index].th_free_list_self = ptr; 
-    } else { 
-        void * head = this_thr->th.th_free_lists[index].th_free_list_other; 
-        if ( head == NULL ) { 
-            // Create new free list 
-            this_thr->th.th_free_lists[index].th_free_list_other = ptr; 
-            *((void **)ptr) = NULL;             // mark the tail of the list 
-            descr->size_allocated = (size_t)1;  // head of the list keeps its length 
-        } else { 
-            // need to check existed "other" list's owner thread and size of queue 
-            kmp_mem_descr_t * dsc  = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) ); 
-            kmp_info_t      * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes 
-            size_t            q_sz = dsc->size_allocated + 1;          // new size in case we add current task 
-            if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) { 
-                // we can add current task to "other" list, no sync needed 
-                *((void **)ptr) = head; 
-                descr->size_allocated = q_sz; 
-                this_thr->th.th_free_lists[index].th_free_list_other = ptr; 
-            } else { 
-                // either queue blocks owner is changing or size limit exceeded 
-                // return old queue to allocating thread (q_th) synchroneously, 
-                // and start new list for alloc_thr's tasks 
-                void * old_ptr; 
-                void * tail = head; 
-                void * next = *((void **)head); 
-                while ( next != NULL ) { 
-                    KMP_DEBUG_ASSERT( 
-                        // queue size should decrease by 1 each step through the list 
-                        ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 == 
-                        ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated ); 
-                    tail = next;   // remember tail node 
-                    next = *((void **)next); 
-                } 
-                KMP_DEBUG_ASSERT( q_th != NULL ); 
-                // push block to owner's sync free list 
-                old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); 
-                /* the next pointer must be set before setting free_list to ptr to avoid 
-                   exposing a broken list to other threads, even for an instant. */ 
-                *((void **)tail) = old_ptr; 
- 
-                while ( ! KMP_COMPARE_AND_STORE_PTR( 
-                    &q_th->th.th_free_lists[index].th_free_list_sync, 
-                    old_ptr, 
-                    head ) ) 
-                { 
-                    KMP_CPU_PAUSE(); 
-                    old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); 
-                    *((void **)tail) = old_ptr; 
-                } 
- 
-                // start new list of not-selt tasks 
-                this_thr->th.th_free_lists[index].th_free_list_other = ptr; 
-                *((void **)ptr) = NULL; 
-                descr->size_allocated = (size_t)1;  // head of queue keeps its length 
-            } 
-        } 
-    } 
-    goto end; 
- 
-    free_call: 
-    KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", 
-                   __kmp_gtid_from_thread( this_thr), size ) ); 
-    __kmp_bget_dequeue( this_thr );         /* Release any queued buffers */ 
-    brel( this_thr, descr->ptr_allocated ); 
- 
-    end: 
-    KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) ); 
- 
-} // func __kmp_fast_free 
- 
- 
-// Initialize the thread free lists related to fast memory 
-// Only do this when a thread is initially created. 
-void 
-__kmp_initialize_fast_memory( kmp_info_t *this_thr ) 
-{ 
-    KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) ); 
- 
-    memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) ); 
-} 
- 
-// Free the memory in the thread free lists related to fast memory 
-// Only do this when a thread is being reaped (destroyed). 
-void 
-__kmp_free_fast_memory( kmp_info_t *th ) 
-{ 
-    // Suppose we use BGET underlying allocator, walk through its structures... 
-    int          bin; 
-    thr_data_t * thr = get_thr_data( th ); 
-    void      ** lst = NULL; 
- 
-    KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n", 
-                   __kmp_gtid_from_thread( th ) ) ); 
- 
-    __kmp_bget_dequeue( th );         // Release any queued buffers 
- 
-    // Dig through free lists and extract all allocated blocks 
-    for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) { 
-        bfhead_t * b = thr->freelist[ bin ].ql.flink; 
-        while ( b != &thr->freelist[ bin ] ) { 
-            if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) {   // if the buffer is an allocated address? 
-                *((void**)b) = lst;   // link the list (override bthr, but keep flink yet) 
-                lst = (void**)b;      // push b into lst 
-            } 
-            b = b->ql.flink;          // get next buffer 
-        } 
-    } 
-    while ( lst != NULL ) { 
-        void * next = *lst; 
-        KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", 
-                      lst, next, th, __kmp_gtid_from_thread( th ) ) ); 
-        (*thr->relfcn)(lst); 
-        #if BufStats 
-            // count blocks to prevent problems in __kmp_finalize_bget() 
-            thr->numprel++;       /* Nr of expansion block releases */ 
-            thr->numpblk--;       /* Total number of blocks */ 
-        #endif 
-        lst = (void**)next; 
-    } 
- 
-    KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n", 
-                  __kmp_gtid_from_thread( th ) ) ); 
-} 
- 
-#endif // USE_FAST_MEMORY 
+    KE_TRACE( 10, (
+        "   malloc( %d ) returned %p\n",
+        (int) descr.size_allocated,
+        descr.ptr_allocated
+    ) );
+    if ( descr.ptr_allocated == NULL ) {
+        KMP_FATAL( OutOfHeapMemory );
+    };
+
+    addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
+    addr_aligned =
+        ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
+        & ~ ( alignment - 1 );
+    addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
+
+    descr.ptr_aligned = (void *) addr_aligned;
+
+    KE_TRACE( 26, (
+        "   ___kmp_allocate_align: "
+            "ptr_allocated=%p, size_allocated=%d, "
+            "ptr_aligned=%p, size_aligned=%d\n",
+        descr.ptr_allocated,
+        (int) descr.size_allocated,
+        descr.ptr_aligned,
+        (int) descr.size_aligned
+    ) );
+
+    KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
+    KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
+    KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
+    KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
+
+    #ifdef KMP_DEBUG
+        memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
+            // Fill allocated memory block with 0xEF.
+    #endif
+    memset( descr.ptr_aligned, 0x00, descr.size_aligned );
+        // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
+        // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
+        // bytes remain filled with 0xEF in debugging library.)
+    * ( (kmp_mem_descr_t *) addr_descr ) = descr;
+
+    KMP_MB();
+
+    KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
+    return descr.ptr_aligned;
+
+} // func ___kmp_allocate_align
+
+
+/*
+    Allocate memory on cache line boundary, fill allocated memory with 0x00.
+    Do not call this func directly! Use __kmp_allocate macro instead.
+    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
+    Must use __kmp_free when freeing memory allocated by this routine!
+ */
+void *
+___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
+{
+
+    void * ptr;
+    KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
+        ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
+    KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
+    return ptr;
+
+} // func ___kmp_allocate
+
+#if (BUILD_MEMORY==FIRST_TOUCH)
+void *
+__kmp_ft_page_allocate(size_t size)
+{
+  void *adr, *aadr;
+#if KMP_OS_LINUX
+  /* TODO: Use this function to get page size everywhere */
+  int page_size = getpagesize();
+#else
+  /* TODO: Find windows function to get page size and use it everywhere */
+  int page_size = PAGE_SIZE;
+#endif /* KMP_OS_LINUX */
+
+  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
+                                    size + page_size + KMP_PTR_SKIP);
+  if ( adr == 0 )
+    KMP_FATAL( OutOfHeapMemory );
+
+  /* check to see if adr is on a page boundary. */
+  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
+    /* nothing to do if adr is already on a page boundary. */
+    aadr = adr;
+  else
+    /* else set aadr to the first page boundary in the allocated memory. */
+    aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
+
+  /* the first touch by the owner thread. */
+  *((void**)aadr) = adr;
+
+  /* skip the memory space used for storing adr above. */
+  return (void*)((char*)aadr + KMP_PTR_SKIP);
+}
+#endif
+
+/*
+    Allocate memory on page boundary, fill allocated memory with 0x00.
+    Does not call this func directly! Use __kmp_page_allocate macro instead.
+    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
+    Must use __kmp_free when freeing memory allocated by this routine!
+ */
+void *
+___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
+{
+    int    page_size = 8 * 1024;
+    void * ptr;
+
+    KE_TRACE( 25, (
+        "-> __kmp_page_allocate( %d ) called from %s:%d\n",
+        (int) size
+        KMP_SRC_LOC_PARM
+    ) );
+        ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
+    KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
+    return ptr;
+} // ___kmp_page_allocate
+
+/*
+    Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
+    In debug mode, fill the memory block with 0xEF before call to free().
+*/
+void
+___kmp_free( void * ptr KMP_SRC_LOC_DECL )
+{
+
+        kmp_mem_descr_t descr;
+        kmp_uintptr_t   addr_allocated;        // Address returned by malloc().
+        kmp_uintptr_t   addr_aligned;          // Aligned address passed by caller.
+
+        KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
+        KMP_ASSERT( ptr != NULL );
+
+        descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
+
+        KE_TRACE( 26, ( "   __kmp_free:     "
+                        "ptr_allocated=%p, size_allocated=%d, "
+                        "ptr_aligned=%p, size_aligned=%d\n",
+                        descr.ptr_allocated, (int) descr.size_allocated,
+                        descr.ptr_aligned, (int) descr.size_aligned ));
+
+        addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
+        addr_aligned   = (kmp_uintptr_t) descr.ptr_aligned;
+
+        KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
+        KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
+        KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
+        KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
+        KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
+
+        #ifdef KMP_DEBUG
+            memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
+                // Fill memory block with 0xEF, it helps catch using freed memory.
+        #endif
+
+        #ifndef LEAK_MEMORY
+            KE_TRACE( 10, ( "   free( %p )\n", descr.ptr_allocated ) );
+        # ifdef KMP_DEBUG
+            _free_src_loc( descr.ptr_allocated, _file_, _line_ );
+        # else
+            free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
+        # endif
+        #endif
+
+    KMP_MB();
+
+    KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
+
+} // func ___kmp_free
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if USE_FAST_MEMORY == 3
+// Allocate fast memory by first scanning the thread's free lists
+// If a chunk the right size exists, grab it off the free list.
+// Otherwise allocate normally using kmp_thread_malloc.
+
+// AC: How to choose the limit? Just get 16 for now...
+#define KMP_FREE_LIST_LIMIT 16
+
+// Always use 128 bytes for determining buckets for caching memory blocks
+#define DCACHE_LINE  128
+
+void *
+___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
+{
+    void            * ptr;
+    int               num_lines;
+    int               idx;
+    int               index;
+    void            * alloc_ptr;
+    size_t            alloc_size;
+    kmp_mem_descr_t * descr;
+
+    KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
+      __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
+
+    num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
+    idx = num_lines - 1;
+    KMP_DEBUG_ASSERT( idx >= 0 );
+    if ( idx < 2 ) {
+        index = 0;       // idx is [ 0, 1 ], use first free list
+        num_lines = 2;   // 1, 2 cache lines or less than cache line
+    } else if ( ( idx >>= 2 ) == 0 ) {
+        index = 1;       // idx is [ 2, 3 ], use second free list
+        num_lines = 4;   // 3, 4 cache lines
+    } else if ( ( idx >>= 2 ) == 0 ) {
+        index = 2;       // idx is [ 4, 15 ], use third free list
+        num_lines = 16;  // 5, 6, ..., 16 cache lines
+    } else if ( ( idx >>= 2 ) == 0 ) {
+        index = 3;       // idx is [ 16, 63 ], use fourth free list
+        num_lines = 64;  // 17, 18, ..., 64 cache lines
+    } else {
+        goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
+    }
+
+    ptr = this_thr->th.th_free_lists[index].th_free_list_self;
+    if ( ptr != NULL ) {
+        // pop the head of no-sync free list
+        this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
+        KMP_DEBUG_ASSERT( this_thr ==
+            ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
+        goto end;
+    };
+    ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
+    if ( ptr != NULL ) {
+        // no-sync free list is empty, use sync free list (filled in by other threads only)
+        // pop the head of the sync free list, push NULL instead
+        while ( ! KMP_COMPARE_AND_STORE_PTR(
+            &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
+        {
+            KMP_CPU_PAUSE();
+            ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
+        }
+        // push the rest of chain into no-sync free list (can be NULL if there was the only block)
+        this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
+        KMP_DEBUG_ASSERT( this_thr ==
+            ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
+        goto end;
+    }
+
+    alloc_call:
+    // haven't found block in the free lists, thus allocate it
+    size = num_lines * DCACHE_LINE;
+
+    alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
+    KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
+                   __kmp_gtid_from_thread( this_thr ), alloc_size ) );
+    alloc_ptr = bget( this_thr, (bufsize) alloc_size );
+
+    // align ptr to DCACHE_LINE
+    ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
+    descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
+
+    descr->ptr_allocated = alloc_ptr;        // remember allocated pointer
+    // we don't need size_allocated
+    descr->ptr_aligned   = (void *)this_thr; // remember allocating thread
+                                             // (it is already saved in bget buffer,
+                                             // but we may want to use another allocator in future)
+    descr->size_aligned  = size;
+
+    end:
+    KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
+                    __kmp_gtid_from_thread( this_thr ), ptr ) );
+    return ptr;
+} // func __kmp_fast_allocate
+
+// Free fast memory and place it on the thread's free list if it is of
+// the correct size.
+void
+___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
+{
+    kmp_mem_descr_t * descr;
+    kmp_info_t      * alloc_thr;
+    size_t            size;
+    size_t            idx;
+    int               index;
+
+    KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
+      __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
+    KMP_ASSERT( ptr != NULL );
+
+    descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
+
+    KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
+                  (int) descr->size_aligned ) );
+
+    size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
+
+    idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
+    if ( idx == size ) {
+        index = 0;       // 2 cache lines
+    } else if ( ( idx <<= 1 ) == size ) {
+        index = 1;       // 4 cache lines
+    } else if ( ( idx <<= 2 ) == size ) {
+        index = 2;       // 16 cache lines
+    } else if ( ( idx <<= 2 ) == size ) {
+        index = 3;       // 64 cache lines
+    } else {
+        KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
+        goto free_call;  // 65 or more cache lines ( > 8KB )
+    }
+
+    alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
+    if ( alloc_thr == this_thr ) {
+        // push block to self no-sync free list, linking previous head (LIFO)
+        *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
+        this_thr->th.th_free_lists[index].th_free_list_self = ptr;
+    } else {
+        void * head = this_thr->th.th_free_lists[index].th_free_list_other;
+        if ( head == NULL ) {
+            // Create new free list
+            this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+            *((void **)ptr) = NULL;             // mark the tail of the list
+            descr->size_allocated = (size_t)1;  // head of the list keeps its length
+        } else {
+            // need to check existed "other" list's owner thread and size of queue
+            kmp_mem_descr_t * dsc  = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
+            kmp_info_t      * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
+            size_t            q_sz = dsc->size_allocated + 1;          // new size in case we add current task
+            if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
+                // we can add current task to "other" list, no sync needed
+                *((void **)ptr) = head;
+                descr->size_allocated = q_sz;
+                this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+            } else {
+                // either queue blocks owner is changing or size limit exceeded
+                // return old queue to allocating thread (q_th) synchroneously,
+                // and start new list for alloc_thr's tasks
+                void * old_ptr;
+                void * tail = head;
+                void * next = *((void **)head);
+                while ( next != NULL ) {
+                    KMP_DEBUG_ASSERT(
+                        // queue size should decrease by 1 each step through the list
+                        ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
+                        ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
+                    tail = next;   // remember tail node
+                    next = *((void **)next);
+                }
+                KMP_DEBUG_ASSERT( q_th != NULL );
+                // push block to owner's sync free list
+                old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
+                /* the next pointer must be set before setting free_list to ptr to avoid
+                   exposing a broken list to other threads, even for an instant. */
+                *((void **)tail) = old_ptr;
+
+                while ( ! KMP_COMPARE_AND_STORE_PTR(
+                    &q_th->th.th_free_lists[index].th_free_list_sync,
+                    old_ptr,
+                    head ) )
+                {
+                    KMP_CPU_PAUSE();
+                    old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
+                    *((void **)tail) = old_ptr;
+                }
+
+                // start new list of not-selt tasks
+                this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+                *((void **)ptr) = NULL;
+                descr->size_allocated = (size_t)1;  // head of queue keeps its length
+            }
+        }
+    }
+    goto end;
+
+    free_call:
+    KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
+                   __kmp_gtid_from_thread( this_thr), size ) );
+    __kmp_bget_dequeue( this_thr );         /* Release any queued buffers */
+    brel( this_thr, descr->ptr_allocated );
+
+    end:
+    KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
+
+} // func __kmp_fast_free
+
+
+// Initialize the thread free lists related to fast memory
+// Only do this when a thread is initially created.
+void
+__kmp_initialize_fast_memory( kmp_info_t *this_thr )
+{
+    KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
+
+    memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
+}
+
+// Free the memory in the thread free lists related to fast memory
+// Only do this when a thread is being reaped (destroyed).
+void
+__kmp_free_fast_memory( kmp_info_t *th )
+{
+    // Suppose we use BGET underlying allocator, walk through its structures...
+    int          bin;
+    thr_data_t * thr = get_thr_data( th );
+    void      ** lst = NULL;
+
+    KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
+                   __kmp_gtid_from_thread( th ) ) );
+
+    __kmp_bget_dequeue( th );         // Release any queued buffers
+
+    // Dig through free lists and extract all allocated blocks
+    for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
+        bfhead_t * b = thr->freelist[ bin ].ql.flink;
+        while ( b != &thr->freelist[ bin ] ) {
+            if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) {   // if the buffer is an allocated address?
+                *((void**)b) = lst;   // link the list (override bthr, but keep flink yet)
+                lst = (void**)b;      // push b into lst
+            }
+            b = b->ql.flink;          // get next buffer
+        }
+    }
+    while ( lst != NULL ) {
+        void * next = *lst;
+        KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
+                      lst, next, th, __kmp_gtid_from_thread( th ) ) );
+        (*thr->relfcn)(lst);
+        #if BufStats
+            // count blocks to prevent problems in __kmp_finalize_bget()
+            thr->numprel++;       /* Nr of expansion block releases */
+            thr->numpblk--;       /* Total number of blocks */
+        #endif
+        lst = (void**)next;
+    }
+
+    KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
+                  __kmp_gtid_from_thread( th ) ) );
+}
+
+#endif // USE_FAST_MEMORY
diff --git a/contrib/libs/cxxsupp/openmp/kmp_atomic.c b/contrib/libs/cxxsupp/openmp/kmp_atomic.c
index a0ec4a1f5f..5d5d3448f2 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_atomic.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_atomic.c
@@ -1,2907 +1,2907 @@
-/* 
- * kmp_atomic.c -- ATOMIC implementation routines 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp_atomic.h" 
-#include "kmp.h"                  // TRUE, asm routines prototypes 
- 
-typedef unsigned char uchar; 
-typedef unsigned short ushort; 
- 
-/*! 
-@defgroup ATOMIC_OPS Atomic Operations 
-These functions are used for implementing the many different varieties of atomic operations. 
- 
-The compiler is at liberty to inline atomic operations that are naturally supported 
-by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined 
-@code 
-static int s = 0; 
-#pragma omp atomic 
-    s++; 
-@endcode 
-using the single instruction: `lock; incl s` 
- 
-However the runtime does provide entrypoints for these operations to support compilers that choose 
-not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the 
-increment above.) 
- 
-The names of the functions are encoded by using the data type name and the operation name, as in these tables. 
- 
-Data Type  | Data type encoding 
------------|--------------- 
-int8_t     | `fixed1` 
-uint8_t    | `fixed1u` 
-int16_t    | `fixed2` 
-uint16_t   | `fixed2u` 
-int32_t    | `fixed4` 
-uint32_t   | `fixed4u` 
-int32_t    | `fixed8` 
-uint32_t   | `fixed8u` 
-float      | `float4` 
-double     | `float8` 
-float 10 (8087 eighty bit float)  | `float10` 
-complex<float>   |  `cmplx4` 
-complex<double>  | `cmplx8` 
-complex<float10> | `cmplx10` 
-<br> 
- 
-Operation | Operation encoding 
-----------|------------------- 
-+ | add 
-- | sub 
-\* | mul 
-/ | div 
-& | andb 
-<< | shl 
-\>\> | shr 
-\| | orb 
-^  | xor 
-&& | andl 
-\|\| | orl 
-maximum | max 
-minimum | min 
-.eqv.   | eqv 
-.neqv.  | neqv 
- 
-<br> 
-For non-commutative operations, `_rev` can also be added for the reversed operation. 
-For the functions that capture the result, the suffix `_cpt` is added. 
- 
-Update Functions 
-================ 
-The general form of an atomic function that just performs an update (without a `capture`) 
-@code 
-void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); 
-@endcode 
-@param ident_t  a pointer to source location 
-@param gtid  the global thread id 
-@param lhs   a pointer to the left operand 
-@param rhs   the right operand 
- 
-`capture` functions 
-=================== 
-The capture functions perform an atomic update and return a result, which is either the value 
-before the capture, or that after. They take an additional argument to determine which result is returned. 
-Their general form is therefore 
-@code 
-TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ); 
-@endcode 
-@param ident_t  a pointer to source location 
-@param gtid  the global thread id 
-@param lhs   a pointer to the left operand 
-@param rhs   the right operand 
-@param flag  one if the result is to be captured *after* the operation, zero if captured *before*. 
- 
-The one set of exceptions to this is the `complex<float>` type where the value is not returned, 
-rather an extra argument pointer is passed. 
- 
-They look like 
-@code 
-void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 
-@endcode 
- 
-Read and Write Operations 
-========================= 
-The OpenMP<sup>*</sup> standard now supports atomic operations that simply ensure that the 
-value is read or written atomically, with no modification 
-performed. In many cases on IA-32 architecture these operations can be inlined since 
-the architecture guarantees that no tearing occurs on aligned objects 
-accessed with a single memory operation of up to 64 bits in size. 
- 
-The general form of the read operations is 
-@code 
-TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 
-@endcode 
- 
-For the write operations the form is 
-@code 
-void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); 
-@endcode 
- 
-Full list of functions 
-====================== 
-This leads to the generation of 376 atomic functions, as follows. 
- 
-Functons for integers 
---------------------- 
-There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). 
-@code 
-    __kmpc_atomic_fixed1_add 
-    __kmpc_atomic_fixed1_add_cpt 
-    __kmpc_atomic_fixed1_add_fp 
-    __kmpc_atomic_fixed1_andb 
-    __kmpc_atomic_fixed1_andb_cpt 
-    __kmpc_atomic_fixed1_andl 
-    __kmpc_atomic_fixed1_andl_cpt 
-    __kmpc_atomic_fixed1_div 
-    __kmpc_atomic_fixed1_div_cpt 
-    __kmpc_atomic_fixed1_div_cpt_rev 
-    __kmpc_atomic_fixed1_div_float8 
-    __kmpc_atomic_fixed1_div_fp 
-    __kmpc_atomic_fixed1_div_rev 
-    __kmpc_atomic_fixed1_eqv 
-    __kmpc_atomic_fixed1_eqv_cpt 
-    __kmpc_atomic_fixed1_max 
-    __kmpc_atomic_fixed1_max_cpt 
-    __kmpc_atomic_fixed1_min 
-    __kmpc_atomic_fixed1_min_cpt 
-    __kmpc_atomic_fixed1_mul 
-    __kmpc_atomic_fixed1_mul_cpt 
-    __kmpc_atomic_fixed1_mul_float8 
-    __kmpc_atomic_fixed1_mul_fp 
-    __kmpc_atomic_fixed1_neqv 
-    __kmpc_atomic_fixed1_neqv_cpt 
-    __kmpc_atomic_fixed1_orb 
-    __kmpc_atomic_fixed1_orb_cpt 
-    __kmpc_atomic_fixed1_orl 
-    __kmpc_atomic_fixed1_orl_cpt 
-    __kmpc_atomic_fixed1_rd 
-    __kmpc_atomic_fixed1_shl 
-    __kmpc_atomic_fixed1_shl_cpt 
-    __kmpc_atomic_fixed1_shl_cpt_rev 
-    __kmpc_atomic_fixed1_shl_rev 
-    __kmpc_atomic_fixed1_shr 
-    __kmpc_atomic_fixed1_shr_cpt 
-    __kmpc_atomic_fixed1_shr_cpt_rev 
-    __kmpc_atomic_fixed1_shr_rev 
-    __kmpc_atomic_fixed1_sub 
-    __kmpc_atomic_fixed1_sub_cpt 
-    __kmpc_atomic_fixed1_sub_cpt_rev 
-    __kmpc_atomic_fixed1_sub_fp 
-    __kmpc_atomic_fixed1_sub_rev 
-    __kmpc_atomic_fixed1_swp 
-    __kmpc_atomic_fixed1_wr 
-    __kmpc_atomic_fixed1_xor 
-    __kmpc_atomic_fixed1_xor_cpt 
-    __kmpc_atomic_fixed1u_div 
-    __kmpc_atomic_fixed1u_div_cpt 
-    __kmpc_atomic_fixed1u_div_cpt_rev 
-    __kmpc_atomic_fixed1u_div_fp 
-    __kmpc_atomic_fixed1u_div_rev 
-    __kmpc_atomic_fixed1u_shr 
-    __kmpc_atomic_fixed1u_shr_cpt 
-    __kmpc_atomic_fixed1u_shr_cpt_rev 
-    __kmpc_atomic_fixed1u_shr_rev 
-    __kmpc_atomic_fixed2_add 
-    __kmpc_atomic_fixed2_add_cpt 
-    __kmpc_atomic_fixed2_add_fp 
-    __kmpc_atomic_fixed2_andb 
-    __kmpc_atomic_fixed2_andb_cpt 
-    __kmpc_atomic_fixed2_andl 
-    __kmpc_atomic_fixed2_andl_cpt 
-    __kmpc_atomic_fixed2_div 
-    __kmpc_atomic_fixed2_div_cpt 
-    __kmpc_atomic_fixed2_div_cpt_rev 
-    __kmpc_atomic_fixed2_div_float8 
-    __kmpc_atomic_fixed2_div_fp 
-    __kmpc_atomic_fixed2_div_rev 
-    __kmpc_atomic_fixed2_eqv 
-    __kmpc_atomic_fixed2_eqv_cpt 
-    __kmpc_atomic_fixed2_max 
-    __kmpc_atomic_fixed2_max_cpt 
-    __kmpc_atomic_fixed2_min 
-    __kmpc_atomic_fixed2_min_cpt 
-    __kmpc_atomic_fixed2_mul 
-    __kmpc_atomic_fixed2_mul_cpt 
-    __kmpc_atomic_fixed2_mul_float8 
-    __kmpc_atomic_fixed2_mul_fp 
-    __kmpc_atomic_fixed2_neqv 
-    __kmpc_atomic_fixed2_neqv_cpt 
-    __kmpc_atomic_fixed2_orb 
-    __kmpc_atomic_fixed2_orb_cpt 
-    __kmpc_atomic_fixed2_orl 
-    __kmpc_atomic_fixed2_orl_cpt 
-    __kmpc_atomic_fixed2_rd 
-    __kmpc_atomic_fixed2_shl 
-    __kmpc_atomic_fixed2_shl_cpt 
-    __kmpc_atomic_fixed2_shl_cpt_rev 
-    __kmpc_atomic_fixed2_shl_rev 
-    __kmpc_atomic_fixed2_shr 
-    __kmpc_atomic_fixed2_shr_cpt 
-    __kmpc_atomic_fixed2_shr_cpt_rev 
-    __kmpc_atomic_fixed2_shr_rev 
-    __kmpc_atomic_fixed2_sub 
-    __kmpc_atomic_fixed2_sub_cpt 
-    __kmpc_atomic_fixed2_sub_cpt_rev 
-    __kmpc_atomic_fixed2_sub_fp 
-    __kmpc_atomic_fixed2_sub_rev 
-    __kmpc_atomic_fixed2_swp 
-    __kmpc_atomic_fixed2_wr 
-    __kmpc_atomic_fixed2_xor 
-    __kmpc_atomic_fixed2_xor_cpt 
-    __kmpc_atomic_fixed2u_div 
-    __kmpc_atomic_fixed2u_div_cpt 
-    __kmpc_atomic_fixed2u_div_cpt_rev 
-    __kmpc_atomic_fixed2u_div_fp 
-    __kmpc_atomic_fixed2u_div_rev 
-    __kmpc_atomic_fixed2u_shr 
-    __kmpc_atomic_fixed2u_shr_cpt 
-    __kmpc_atomic_fixed2u_shr_cpt_rev 
-    __kmpc_atomic_fixed2u_shr_rev 
-    __kmpc_atomic_fixed4_add 
-    __kmpc_atomic_fixed4_add_cpt 
-    __kmpc_atomic_fixed4_add_fp 
-    __kmpc_atomic_fixed4_andb 
-    __kmpc_atomic_fixed4_andb_cpt 
-    __kmpc_atomic_fixed4_andl 
-    __kmpc_atomic_fixed4_andl_cpt 
-    __kmpc_atomic_fixed4_div 
-    __kmpc_atomic_fixed4_div_cpt 
-    __kmpc_atomic_fixed4_div_cpt_rev 
-    __kmpc_atomic_fixed4_div_float8 
-    __kmpc_atomic_fixed4_div_fp 
-    __kmpc_atomic_fixed4_div_rev 
-    __kmpc_atomic_fixed4_eqv 
-    __kmpc_atomic_fixed4_eqv_cpt 
-    __kmpc_atomic_fixed4_max 
-    __kmpc_atomic_fixed4_max_cpt 
-    __kmpc_atomic_fixed4_min 
-    __kmpc_atomic_fixed4_min_cpt 
-    __kmpc_atomic_fixed4_mul 
-    __kmpc_atomic_fixed4_mul_cpt 
-    __kmpc_atomic_fixed4_mul_float8 
-    __kmpc_atomic_fixed4_mul_fp 
-    __kmpc_atomic_fixed4_neqv 
-    __kmpc_atomic_fixed4_neqv_cpt 
-    __kmpc_atomic_fixed4_orb 
-    __kmpc_atomic_fixed4_orb_cpt 
-    __kmpc_atomic_fixed4_orl 
-    __kmpc_atomic_fixed4_orl_cpt 
-    __kmpc_atomic_fixed4_rd 
-    __kmpc_atomic_fixed4_shl 
-    __kmpc_atomic_fixed4_shl_cpt 
-    __kmpc_atomic_fixed4_shl_cpt_rev 
-    __kmpc_atomic_fixed4_shl_rev 
-    __kmpc_atomic_fixed4_shr 
-    __kmpc_atomic_fixed4_shr_cpt 
-    __kmpc_atomic_fixed4_shr_cpt_rev 
-    __kmpc_atomic_fixed4_shr_rev 
-    __kmpc_atomic_fixed4_sub 
-    __kmpc_atomic_fixed4_sub_cpt 
-    __kmpc_atomic_fixed4_sub_cpt_rev 
-    __kmpc_atomic_fixed4_sub_fp 
-    __kmpc_atomic_fixed4_sub_rev 
-    __kmpc_atomic_fixed4_swp 
-    __kmpc_atomic_fixed4_wr 
-    __kmpc_atomic_fixed4_xor 
-    __kmpc_atomic_fixed4_xor_cpt 
-    __kmpc_atomic_fixed4u_div 
-    __kmpc_atomic_fixed4u_div_cpt 
-    __kmpc_atomic_fixed4u_div_cpt_rev 
-    __kmpc_atomic_fixed4u_div_fp 
-    __kmpc_atomic_fixed4u_div_rev 
-    __kmpc_atomic_fixed4u_shr 
-    __kmpc_atomic_fixed4u_shr_cpt 
-    __kmpc_atomic_fixed4u_shr_cpt_rev 
-    __kmpc_atomic_fixed4u_shr_rev 
-    __kmpc_atomic_fixed8_add 
-    __kmpc_atomic_fixed8_add_cpt 
-    __kmpc_atomic_fixed8_add_fp 
-    __kmpc_atomic_fixed8_andb 
-    __kmpc_atomic_fixed8_andb_cpt 
-    __kmpc_atomic_fixed8_andl 
-    __kmpc_atomic_fixed8_andl_cpt 
-    __kmpc_atomic_fixed8_div 
-    __kmpc_atomic_fixed8_div_cpt 
-    __kmpc_atomic_fixed8_div_cpt_rev 
-    __kmpc_atomic_fixed8_div_float8 
-    __kmpc_atomic_fixed8_div_fp 
-    __kmpc_atomic_fixed8_div_rev 
-    __kmpc_atomic_fixed8_eqv 
-    __kmpc_atomic_fixed8_eqv_cpt 
-    __kmpc_atomic_fixed8_max 
-    __kmpc_atomic_fixed8_max_cpt 
-    __kmpc_atomic_fixed8_min 
-    __kmpc_atomic_fixed8_min_cpt 
-    __kmpc_atomic_fixed8_mul 
-    __kmpc_atomic_fixed8_mul_cpt 
-    __kmpc_atomic_fixed8_mul_float8 
-    __kmpc_atomic_fixed8_mul_fp 
-    __kmpc_atomic_fixed8_neqv 
-    __kmpc_atomic_fixed8_neqv_cpt 
-    __kmpc_atomic_fixed8_orb 
-    __kmpc_atomic_fixed8_orb_cpt 
-    __kmpc_atomic_fixed8_orl 
-    __kmpc_atomic_fixed8_orl_cpt 
-    __kmpc_atomic_fixed8_rd 
-    __kmpc_atomic_fixed8_shl 
-    __kmpc_atomic_fixed8_shl_cpt 
-    __kmpc_atomic_fixed8_shl_cpt_rev 
-    __kmpc_atomic_fixed8_shl_rev 
-    __kmpc_atomic_fixed8_shr 
-    __kmpc_atomic_fixed8_shr_cpt 
-    __kmpc_atomic_fixed8_shr_cpt_rev 
-    __kmpc_atomic_fixed8_shr_rev 
-    __kmpc_atomic_fixed8_sub 
-    __kmpc_atomic_fixed8_sub_cpt 
-    __kmpc_atomic_fixed8_sub_cpt_rev 
-    __kmpc_atomic_fixed8_sub_fp 
-    __kmpc_atomic_fixed8_sub_rev 
-    __kmpc_atomic_fixed8_swp 
-    __kmpc_atomic_fixed8_wr 
-    __kmpc_atomic_fixed8_xor 
-    __kmpc_atomic_fixed8_xor_cpt 
-    __kmpc_atomic_fixed8u_div 
-    __kmpc_atomic_fixed8u_div_cpt 
-    __kmpc_atomic_fixed8u_div_cpt_rev 
-    __kmpc_atomic_fixed8u_div_fp 
-    __kmpc_atomic_fixed8u_div_rev 
-    __kmpc_atomic_fixed8u_shr 
-    __kmpc_atomic_fixed8u_shr_cpt 
-    __kmpc_atomic_fixed8u_shr_cpt_rev 
-    __kmpc_atomic_fixed8u_shr_rev 
-@endcode 
- 
-Functions for floating point 
----------------------------- 
-There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes. 
-(Ten byte floats are used by X87, but are now rare). 
-@code 
-    __kmpc_atomic_float4_add 
-    __kmpc_atomic_float4_add_cpt 
-    __kmpc_atomic_float4_add_float8 
-    __kmpc_atomic_float4_add_fp 
-    __kmpc_atomic_float4_div 
-    __kmpc_atomic_float4_div_cpt 
-    __kmpc_atomic_float4_div_cpt_rev 
-    __kmpc_atomic_float4_div_float8 
-    __kmpc_atomic_float4_div_fp 
-    __kmpc_atomic_float4_div_rev 
-    __kmpc_atomic_float4_max 
-    __kmpc_atomic_float4_max_cpt 
-    __kmpc_atomic_float4_min 
-    __kmpc_atomic_float4_min_cpt 
-    __kmpc_atomic_float4_mul 
-    __kmpc_atomic_float4_mul_cpt 
-    __kmpc_atomic_float4_mul_float8 
-    __kmpc_atomic_float4_mul_fp 
-    __kmpc_atomic_float4_rd 
-    __kmpc_atomic_float4_sub 
-    __kmpc_atomic_float4_sub_cpt 
-    __kmpc_atomic_float4_sub_cpt_rev 
-    __kmpc_atomic_float4_sub_float8 
-    __kmpc_atomic_float4_sub_fp 
-    __kmpc_atomic_float4_sub_rev 
-    __kmpc_atomic_float4_swp 
-    __kmpc_atomic_float4_wr 
-    __kmpc_atomic_float8_add 
-    __kmpc_atomic_float8_add_cpt 
-    __kmpc_atomic_float8_add_fp 
-    __kmpc_atomic_float8_div 
-    __kmpc_atomic_float8_div_cpt 
-    __kmpc_atomic_float8_div_cpt_rev 
-    __kmpc_atomic_float8_div_fp 
-    __kmpc_atomic_float8_div_rev 
-    __kmpc_atomic_float8_max 
-    __kmpc_atomic_float8_max_cpt 
-    __kmpc_atomic_float8_min 
-    __kmpc_atomic_float8_min_cpt 
-    __kmpc_atomic_float8_mul 
-    __kmpc_atomic_float8_mul_cpt 
-    __kmpc_atomic_float8_mul_fp 
-    __kmpc_atomic_float8_rd 
-    __kmpc_atomic_float8_sub 
-    __kmpc_atomic_float8_sub_cpt 
-    __kmpc_atomic_float8_sub_cpt_rev 
-    __kmpc_atomic_float8_sub_fp 
-    __kmpc_atomic_float8_sub_rev 
-    __kmpc_atomic_float8_swp 
-    __kmpc_atomic_float8_wr 
-    __kmpc_atomic_float10_add 
-    __kmpc_atomic_float10_add_cpt 
-    __kmpc_atomic_float10_add_fp 
-    __kmpc_atomic_float10_div 
-    __kmpc_atomic_float10_div_cpt 
-    __kmpc_atomic_float10_div_cpt_rev 
-    __kmpc_atomic_float10_div_fp 
-    __kmpc_atomic_float10_div_rev 
-    __kmpc_atomic_float10_mul 
-    __kmpc_atomic_float10_mul_cpt 
-    __kmpc_atomic_float10_mul_fp 
-    __kmpc_atomic_float10_rd 
-    __kmpc_atomic_float10_sub 
-    __kmpc_atomic_float10_sub_cpt 
-    __kmpc_atomic_float10_sub_cpt_rev 
-    __kmpc_atomic_float10_sub_fp 
-    __kmpc_atomic_float10_sub_rev 
-    __kmpc_atomic_float10_swp 
-    __kmpc_atomic_float10_wr 
-    __kmpc_atomic_float16_add 
-    __kmpc_atomic_float16_add_cpt 
-    __kmpc_atomic_float16_div 
-    __kmpc_atomic_float16_div_cpt 
-    __kmpc_atomic_float16_div_cpt_rev 
-    __kmpc_atomic_float16_div_rev 
-    __kmpc_atomic_float16_max 
-    __kmpc_atomic_float16_max_cpt 
-    __kmpc_atomic_float16_min 
-    __kmpc_atomic_float16_min_cpt 
-    __kmpc_atomic_float16_mul 
-    __kmpc_atomic_float16_mul_cpt 
-    __kmpc_atomic_float16_rd 
-    __kmpc_atomic_float16_sub 
-    __kmpc_atomic_float16_sub_cpt 
-    __kmpc_atomic_float16_sub_cpt_rev 
-    __kmpc_atomic_float16_sub_rev 
-    __kmpc_atomic_float16_swp 
-    __kmpc_atomic_float16_wr 
-@endcode 
- 
-Functions for Complex types 
---------------------------- 
-Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. 
-The names here are based on the size of the component float, *not* the size of the complex type. So 
-`__kmpc_atomc_cmplx8_add` is an operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 
- 
-@code 
-    __kmpc_atomic_cmplx4_add 
-    __kmpc_atomic_cmplx4_add_cmplx8 
-    __kmpc_atomic_cmplx4_add_cpt 
-    __kmpc_atomic_cmplx4_div 
-    __kmpc_atomic_cmplx4_div_cmplx8 
-    __kmpc_atomic_cmplx4_div_cpt 
-    __kmpc_atomic_cmplx4_div_cpt_rev 
-    __kmpc_atomic_cmplx4_div_rev 
-    __kmpc_atomic_cmplx4_mul 
-    __kmpc_atomic_cmplx4_mul_cmplx8 
-    __kmpc_atomic_cmplx4_mul_cpt 
-    __kmpc_atomic_cmplx4_rd 
-    __kmpc_atomic_cmplx4_sub 
-    __kmpc_atomic_cmplx4_sub_cmplx8 
-    __kmpc_atomic_cmplx4_sub_cpt 
-    __kmpc_atomic_cmplx4_sub_cpt_rev 
-    __kmpc_atomic_cmplx4_sub_rev 
-    __kmpc_atomic_cmplx4_swp 
-    __kmpc_atomic_cmplx4_wr 
-    __kmpc_atomic_cmplx8_add 
-    __kmpc_atomic_cmplx8_add_cpt 
-    __kmpc_atomic_cmplx8_div 
-    __kmpc_atomic_cmplx8_div_cpt 
-    __kmpc_atomic_cmplx8_div_cpt_rev 
-    __kmpc_atomic_cmplx8_div_rev 
-    __kmpc_atomic_cmplx8_mul 
-    __kmpc_atomic_cmplx8_mul_cpt 
-    __kmpc_atomic_cmplx8_rd 
-    __kmpc_atomic_cmplx8_sub 
-    __kmpc_atomic_cmplx8_sub_cpt 
-    __kmpc_atomic_cmplx8_sub_cpt_rev 
-    __kmpc_atomic_cmplx8_sub_rev 
-    __kmpc_atomic_cmplx8_swp 
-    __kmpc_atomic_cmplx8_wr 
-    __kmpc_atomic_cmplx10_add 
-    __kmpc_atomic_cmplx10_add_cpt 
-    __kmpc_atomic_cmplx10_div 
-    __kmpc_atomic_cmplx10_div_cpt 
-    __kmpc_atomic_cmplx10_div_cpt_rev 
-    __kmpc_atomic_cmplx10_div_rev 
-    __kmpc_atomic_cmplx10_mul 
-    __kmpc_atomic_cmplx10_mul_cpt 
-    __kmpc_atomic_cmplx10_rd 
-    __kmpc_atomic_cmplx10_sub 
-    __kmpc_atomic_cmplx10_sub_cpt 
-    __kmpc_atomic_cmplx10_sub_cpt_rev 
-    __kmpc_atomic_cmplx10_sub_rev 
-    __kmpc_atomic_cmplx10_swp 
-    __kmpc_atomic_cmplx10_wr 
-    __kmpc_atomic_cmplx16_add 
-    __kmpc_atomic_cmplx16_add_cpt 
-    __kmpc_atomic_cmplx16_div 
-    __kmpc_atomic_cmplx16_div_cpt 
-    __kmpc_atomic_cmplx16_div_cpt_rev 
-    __kmpc_atomic_cmplx16_div_rev 
-    __kmpc_atomic_cmplx16_mul 
-    __kmpc_atomic_cmplx16_mul_cpt 
-    __kmpc_atomic_cmplx16_rd 
-    __kmpc_atomic_cmplx16_sub 
-    __kmpc_atomic_cmplx16_sub_cpt 
-    __kmpc_atomic_cmplx16_sub_cpt_rev 
-    __kmpc_atomic_cmplx16_swp 
-    __kmpc_atomic_cmplx16_wr 
-@endcode 
-*/ 
- 
-/*! 
-@ingroup ATOMIC_OPS 
-@{ 
-*/ 
- 
-/* 
- * Global vars 
- */ 
- 
-#ifndef KMP_GOMP_COMPAT 
-int __kmp_atomic_mode = 1;      // Intel perf 
-#else 
-int __kmp_atomic_mode = 2;      // GOMP compatibility 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-KMP_ALIGN(128) 
- 
-kmp_atomic_lock_t __kmp_atomic_lock;     /* Control access to all user coded atomics in Gnu compat mode   */ 
-kmp_atomic_lock_t __kmp_atomic_lock_1i;  /* Control access to all user coded atomics for 1-byte fixed data types */ 
-kmp_atomic_lock_t __kmp_atomic_lock_2i;  /* Control access to all user coded atomics for 2-byte fixed data types */ 
-kmp_atomic_lock_t __kmp_atomic_lock_4i;  /* Control access to all user coded atomics for 4-byte fixed data types */ 
-kmp_atomic_lock_t __kmp_atomic_lock_4r;  /* Control access to all user coded atomics for kmp_real32 data type    */ 
-kmp_atomic_lock_t __kmp_atomic_lock_8i;  /* Control access to all user coded atomics for 8-byte fixed data types */ 
-kmp_atomic_lock_t __kmp_atomic_lock_8r;  /* Control access to all user coded atomics for kmp_real64 data type    */ 
-kmp_atomic_lock_t __kmp_atomic_lock_8c;  /* Control access to all user coded atomics for complex byte data type  */ 
-kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type   */ 
-kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type         */ 
-kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ 
-kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ 
-kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ 
- 
- 
-/* 
-  2007-03-02: 
-  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a 
-  bug on *_32 and *_32e. This is just a temporary workaround for the problem. 
-  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG 
-  routines in assembler language. 
-*/ 
-#define KMP_ATOMIC_VOLATILE volatile 
- 
-#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD 
- 
-    static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; 
-    static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; 
-    static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; }; 
-    static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; }; 
-    static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; } 
-    static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; } 
- 
-    static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; }; 
-    static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; }; 
-    static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; }; 
-    static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; }; 
-    static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; } 
-    static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; } 
- 
-    static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; }; 
-    static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; }; 
-    static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; }; 
-    static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; }; 
- 
-    static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; }; 
-    static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; }; 
-    static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; }; 
-    static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; }; 
- 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ATOMIC implementation routines                                           */ 
-/* one routine for each operation and operand type                          */ 
-/* ------------------------------------------------------------------------ */ 
- 
-// All routines declarations looks like 
-// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 
-// ------------------------------------------------------------------------ 
- 
-#define KMP_CHECK_GTID                                                    \ 
-    if ( gtid == KMP_GTID_UNKNOWN ) {                                     \ 
-        gtid = __kmp_entry_gtid();                                        \ 
-    } // check and get gtid when needed 
- 
-// Beginning of a definition (provides name, parameters, gebug trace) 
-//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operands' type 
-#define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ 
-RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ 
-{                                                                                         \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 
- 
-// ------------------------------------------------------------------------ 
-// Lock variables used for critical sections for various size operands 
-#define ATOMIC_LOCK0   __kmp_atomic_lock       // all types, for Gnu compat 
-#define ATOMIC_LOCK1i  __kmp_atomic_lock_1i    // char 
-#define ATOMIC_LOCK2i  __kmp_atomic_lock_2i    // short 
-#define ATOMIC_LOCK4i  __kmp_atomic_lock_4i    // long int 
-#define ATOMIC_LOCK4r  __kmp_atomic_lock_4r    // float 
-#define ATOMIC_LOCK8i  __kmp_atomic_lock_8i    // long long int 
-#define ATOMIC_LOCK8r  __kmp_atomic_lock_8r    // double 
-#define ATOMIC_LOCK8c  __kmp_atomic_lock_8c    // float complex 
-#define ATOMIC_LOCK10r __kmp_atomic_lock_10r   // long double 
-#define ATOMIC_LOCK16r __kmp_atomic_lock_16r   // _Quad 
-#define ATOMIC_LOCK16c __kmp_atomic_lock_16c   // double complex 
-#define ATOMIC_LOCK20c __kmp_atomic_lock_20c   // long double complex 
-#define ATOMIC_LOCK32c __kmp_atomic_lock_32c   // _Quad complex 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs bound by critical section 
-//     OP     - operator (it's supposed to contain an assignment) 
-//     LCK_ID - lock identifier 
-// Note: don't check gtid as it should always be valid 
-// 1, 2-byte - expect valid parameter, other - check before this macro 
-#define OP_CRITICAL(OP,LCK_ID) \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \ 
-                                                                          \ 
-    (*lhs) OP (rhs);                                                      \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 
- 
-// ------------------------------------------------------------------------ 
-// For GNU compatibility, we may need to use a critical section, 
-// even though it is not required by the ISA. 
-// 
-// On IA-32 architecture, all atomic operations except for fixed 4 byte add, 
-// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 
-// critical section.  On Intel(R) 64, all atomic operations are done with fetch 
-// and add or compare and exchange.  Therefore, the FLAG parameter to this 
-// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 
-// require a critical section, where we predict that they will be implemented 
-// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 
-// 
-// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 
-// the FLAG parameter should always be 1.  If we know that we will be using 
-// a critical section, then we want to make certain that we use the generic 
-// lock __kmp_atomic_lock to protect the atomic update, and not of of the 
-// locks that are specialized based upon the size or type of the data. 
-// 
-// If FLAG is 0, then we are relying on dead code elimination by the build 
-// compiler to get rid of the useless block of code, and save a needless 
-// branch at runtime. 
-// 
- 
-#ifdef KMP_GOMP_COMPAT 
-# define OP_GOMP_CRITICAL(OP,FLAG)                                        \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL( OP, 0 );                                             \ 
-        return;                                                           \ 
-    } 
-# else 
-# define OP_GOMP_CRITICAL(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-#if KMP_MIC 
-# define KMP_DO_PAUSE _mm_delay_32( 1 ) 
-#else 
-# define KMP_DO_PAUSE KMP_CPU_PAUSE() 
-#endif /* KMP_MIC */ 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs using "compare_and_store" routine 
-//     TYPE    - operands' type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator 
-#define OP_CMPXCHG(TYPE,BITS,OP)                                          \ 
-    {                                                                     \ 
-        TYPE old_value, new_value;                                        \ 
-        old_value = *(TYPE volatile *)lhs;                                \ 
-        new_value = old_value OP rhs;                                     \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \ 
-        {                                                                 \ 
-                KMP_DO_PAUSE;                                             \ 
-                                                                          \ 
-            old_value = *(TYPE volatile *)lhs;                            \ 
-            new_value = old_value OP rhs;                                 \ 
-        }                                                                 \ 
-    } 
- 
-#if USE_CMPXCHG_FIX 
-// 2007-06-25: 
-// workaround for C78287 (complex(kind=4) data type) 
-// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) 
-// Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro. 
-// This is a problem of the compiler. 
-// Related tracker is C76005, targeted to 11.0. 
-// I verified the asm of the workaround. 
-#define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                               \ 
-    {                                                                     \ 
-	struct _sss {                                                     \ 
-	    TYPE            cmp;                                          \ 
-	    kmp_int##BITS   *vvv;                                         \ 
-	};                                                                \ 
-        struct _sss old_value, new_value;                                 \ 
-        old_value.vvv = ( kmp_int##BITS * )&old_value.cmp;                \ 
-        new_value.vvv = ( kmp_int##BITS * )&new_value.cmp;                \ 
-        *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;              \ 
-        new_value.cmp = old_value.cmp OP rhs;                             \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,      \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) )   \ 
-        {                                                                 \ 
-            KMP_DO_PAUSE;                                                 \ 
-                                                                          \ 
-	    *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;          \ 
-	    new_value.cmp = old_value.cmp OP rhs;                         \ 
-        }                                                                 \ 
-    } 
-// end of the first part of the workaround for C78287 
-#endif // USE_CMPXCHG_FIX 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-// ------------------------------------------------------------------------ 
-// X86 or X86_64: no alignment problems ==================================== 
-#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \ 
-    /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \ 
-    KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                                \ 
-} 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \ 
-    OP_CMPXCHG(TYPE,BITS,OP)                                               \ 
-} 
-#if USE_CMPXCHG_FIX 
-// ------------------------------------------------------------------------- 
-// workaround for C78287 (complex(kind=4) data type) 
-#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \ 
-    OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                               \ 
-} 
-// end of the second part of the workaround for C78287 
-#endif 
- 
-#else 
-// ------------------------------------------------------------------------- 
-// Code for other architectures that don't handle unaligned accesses. 
-#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \ 
-        /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */  \ 
-        KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                            \ 
-    } else {                                                               \ 
-        KMP_CHECK_GTID;                                                    \ 
-        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \ 
-    }                                                                      \ 
-} 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \ 
-        OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                 \ 
-    } else {                                                               \ 
-        KMP_CHECK_GTID;                                                    \ 
-        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \ 
-    }                                                                      \ 
-} 
-#if USE_CMPXCHG_FIX 
-// ------------------------------------------------------------------------- 
-// workaround for C78287 (complex(kind=4) data type) 
-#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                      \ 
-        OP_CMPXCHG(TYPE,BITS,OP)             /* aligned address */                    \ 
-    } else {                                                                          \ 
-        KMP_CHECK_GTID;                                                               \ 
-        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */             \ 
-    }                                                                                 \ 
-} 
-// end of the second part of the workaround for C78287 
-#endif // USE_CMPXCHG_FIX 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-// Routines for ATOMIC 4-byte operands addition and subtraction 
-ATOMIC_FIXED_ADD( fixed4, add, kmp_int32,  32, +, 4i, 3, 0            )  // __kmpc_atomic_fixed4_add 
-ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32,  32, -, 4i, 3, 0            )  // __kmpc_atomic_fixed4_sub 
- 
-ATOMIC_CMPXCHG( float4,  add, kmp_real32, 32, +,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add 
-ATOMIC_CMPXCHG( float4,  sub, kmp_real32, 32, -,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub 
- 
-// Routines for ATOMIC 8-byte operands addition and subtraction 
-ATOMIC_FIXED_ADD( fixed8, add, kmp_int64,  64, +, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add 
-ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64,  64, -, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub 
- 
-ATOMIC_CMPXCHG( float8,  add, kmp_real64, 64, +,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add 
-ATOMIC_CMPXCHG( float8,  sub, kmp_real64, 64, -,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub 
- 
-// ------------------------------------------------------------------------ 
-// Entries definition for integer operands 
-//     TYPE_ID - operands type and size (fixed4, float4) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operand type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator (used in critical section) 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-//     MASK    - used for alignment check 
- 
-//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG 
-// ------------------------------------------------------------------------ 
-// Routines for ATOMIC integer operands, other operators 
-// ------------------------------------------------------------------------ 
-//              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG 
-ATOMIC_CMPXCHG( fixed1,  add, kmp_int8,    8, +,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add 
-ATOMIC_CMPXCHG( fixed1, andb, kmp_int8,    8, &,  1i, 0, 0            )  // __kmpc_atomic_fixed1_andb 
-ATOMIC_CMPXCHG( fixed1,  div, kmp_int8,    8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div 
-ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8,   8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div 
-ATOMIC_CMPXCHG( fixed1,  mul, kmp_int8,    8, *,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul 
-ATOMIC_CMPXCHG( fixed1,  orb, kmp_int8,    8, |,  1i, 0, 0            )  // __kmpc_atomic_fixed1_orb 
-ATOMIC_CMPXCHG( fixed1,  shl, kmp_int8,    8, <<, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl 
-ATOMIC_CMPXCHG( fixed1,  shr, kmp_int8,    8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr 
-ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8,   8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr 
-ATOMIC_CMPXCHG( fixed1,  sub, kmp_int8,    8, -,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub 
-ATOMIC_CMPXCHG( fixed1,  xor, kmp_int8,    8, ^,  1i, 0, 0            )  // __kmpc_atomic_fixed1_xor 
-ATOMIC_CMPXCHG( fixed2,  add, kmp_int16,  16, +,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add 
-ATOMIC_CMPXCHG( fixed2, andb, kmp_int16,  16, &,  2i, 1, 0            )  // __kmpc_atomic_fixed2_andb 
-ATOMIC_CMPXCHG( fixed2,  div, kmp_int16,  16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div 
-ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div 
-ATOMIC_CMPXCHG( fixed2,  mul, kmp_int16,  16, *,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul 
-ATOMIC_CMPXCHG( fixed2,  orb, kmp_int16,  16, |,  2i, 1, 0            )  // __kmpc_atomic_fixed2_orb 
-ATOMIC_CMPXCHG( fixed2,  shl, kmp_int16,  16, <<, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl 
-ATOMIC_CMPXCHG( fixed2,  shr, kmp_int16,  16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr 
-ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr 
-ATOMIC_CMPXCHG( fixed2,  sub, kmp_int16,  16, -,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub 
-ATOMIC_CMPXCHG( fixed2,  xor, kmp_int16,  16, ^,  2i, 1, 0            )  // __kmpc_atomic_fixed2_xor 
-ATOMIC_CMPXCHG( fixed4, andb, kmp_int32,  32, &,  4i, 3, 0            )  // __kmpc_atomic_fixed4_andb 
-ATOMIC_CMPXCHG( fixed4,  div, kmp_int32,  32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div 
-ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div 
-ATOMIC_CMPXCHG( fixed4,  mul, kmp_int32,  32, *,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul 
-ATOMIC_CMPXCHG( fixed4,  orb, kmp_int32,  32, |,  4i, 3, 0            )  // __kmpc_atomic_fixed4_orb 
-ATOMIC_CMPXCHG( fixed4,  shl, kmp_int32,  32, <<, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl 
-ATOMIC_CMPXCHG( fixed4,  shr, kmp_int32,  32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr 
-ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr 
-ATOMIC_CMPXCHG( fixed4,  xor, kmp_int32,  32, ^,  4i, 3, 0            )  // __kmpc_atomic_fixed4_xor 
-ATOMIC_CMPXCHG( fixed8, andb, kmp_int64,  64, &,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb 
-ATOMIC_CMPXCHG( fixed8,  div, kmp_int64,  64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div 
-ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div 
-ATOMIC_CMPXCHG( fixed8,  mul, kmp_int64,  64, *,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul 
-ATOMIC_CMPXCHG( fixed8,  orb, kmp_int64,  64, |,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb 
-ATOMIC_CMPXCHG( fixed8,  shl, kmp_int64,  64, <<, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl 
-ATOMIC_CMPXCHG( fixed8,  shr, kmp_int64,  64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr 
-ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr 
-ATOMIC_CMPXCHG( fixed8,  xor, kmp_int64,  64, ^,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor 
-ATOMIC_CMPXCHG( float4,  div, kmp_real32, 32, /,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div 
-ATOMIC_CMPXCHG( float4,  mul, kmp_real32, 32, *,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul 
-ATOMIC_CMPXCHG( float8,  div, kmp_real64, 64, /,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div 
-ATOMIC_CMPXCHG( float8,  mul, kmp_real64, 64, *,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul 
-//              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* Routines for C/C++ Reduction operators && and ||                         */ 
-/* ------------------------------------------------------------------------ */ 
- 
-// ------------------------------------------------------------------------ 
-// Need separate macros for &&, || because there is no combined assignment 
-//   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 
-#define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)             \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \ 
-    OP_CRITICAL( = *lhs OP, LCK_ID )                                      \ 
-} 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-// ------------------------------------------------------------------------ 
-// X86 or X86_64: no alignment problems =================================== 
-#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \ 
-    OP_CMPXCHG(TYPE,BITS,OP)                                              \ 
-} 
- 
-#else 
-// ------------------------------------------------------------------------ 
-// Code for other architectures that don't handle unaligned accesses. 
-#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG)                                 \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \ 
-        OP_CMPXCHG(TYPE,BITS,OP)       /* aligned address */              \ 
-    } else {                                                              \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL(= *lhs OP,LCK_ID)  /* unaligned - use critical */     \ 
-    }                                                                     \ 
-} 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-ATOMIC_CMPX_L( fixed1, andl, char,       8, &&, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl 
-ATOMIC_CMPX_L( fixed1,  orl, char,       8, ||, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl 
-ATOMIC_CMPX_L( fixed2, andl, short,     16, &&, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl 
-ATOMIC_CMPX_L( fixed2,  orl, short,     16, ||, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl 
-ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 )             // __kmpc_atomic_fixed4_andl 
-ATOMIC_CMPX_L( fixed4,  orl, kmp_int32, 32, ||, 4i, 3, 0 )             // __kmpc_atomic_fixed4_orl 
-ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl 
-ATOMIC_CMPX_L( fixed8,  orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl 
- 
- 
-/* ------------------------------------------------------------------------- */ 
-/* Routines for Fortran operators that matched no one in C:                  */ 
-/* MAX, MIN, .EQV., .NEQV.                                                   */ 
-/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */ 
-/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */ 
-/* ------------------------------------------------------------------------- */ 
- 
-// ------------------------------------------------------------------------- 
-// MIN and MAX need separate macros 
-// OP - operator to check if we need any actions? 
-#define MIN_MAX_CRITSECT(OP,LCK_ID)                                        \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \ 
-                                                                           \ 
-    if ( *lhs OP rhs ) {                 /* still need actions? */         \ 
-        *lhs = rhs;                                                        \ 
-    }                                                                      \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 
- 
-// ------------------------------------------------------------------------- 
-#ifdef KMP_GOMP_COMPAT 
-#define GOMP_MIN_MAX_CRITSECT(OP,FLAG)                                     \ 
-    if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \ 
-        KMP_CHECK_GTID;                                                    \ 
-        MIN_MAX_CRITSECT( OP, 0 );                                         \ 
-        return;                                                            \ 
-    } 
-#else 
-#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-// ------------------------------------------------------------------------- 
-#define MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \ 
-    {                                                                      \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \ 
-        TYPE old_value;                                                    \ 
-        temp_val = *lhs;                                                   \ 
-        old_value = temp_val;                                              \ 
-        while ( old_value OP rhs &&          /* still need actions? */     \ 
-            ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \ 
-        {                                                                  \ 
-            KMP_CPU_PAUSE();                                               \ 
-            temp_val = *lhs;                                               \ 
-            old_value = temp_val;                                          \ 
-        }                                                                  \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-// 1-byte, 2-byte operands - use critical section 
-#define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    if ( *lhs OP rhs ) {     /* need actions? */                           \ 
-        GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \ 
-        MIN_MAX_CRITSECT(OP,LCK_ID)                                        \ 
-    }                                                                      \ 
-} 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-// ------------------------------------------------------------------------- 
-// X86 or X86_64: no alignment problems ==================================== 
-#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    if ( *lhs OP rhs ) {                                                   \ 
-        GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \ 
-        MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \ 
-    }                                                                      \ 
-} 
- 
-#else 
-// ------------------------------------------------------------------------- 
-// Code for other architectures that don't handle unaligned accesses. 
-#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \ 
-    if ( *lhs OP rhs ) {                                                   \ 
-        GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \ 
-        if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                       \ 
-            MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */            \ 
-        } else {                                                           \ 
-            KMP_CHECK_GTID;                                                \ 
-            MIN_MAX_CRITSECT(OP,LCK_ID)   /* unaligned address */          \ 
-        }                                                                  \ 
-    }                                                                      \ 
-} 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-MIN_MAX_COMPXCHG( fixed1,  max, char,        8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max 
-MIN_MAX_COMPXCHG( fixed1,  min, char,        8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min 
-MIN_MAX_COMPXCHG( fixed2,  max, short,      16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max 
-MIN_MAX_COMPXCHG( fixed2,  min, short,      16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min 
-MIN_MAX_COMPXCHG( fixed4,  max, kmp_int32,  32, <, 4i, 3, 0 )            // __kmpc_atomic_fixed4_max 
-MIN_MAX_COMPXCHG( fixed4,  min, kmp_int32,  32, >, 4i, 3, 0 )            // __kmpc_atomic_fixed4_min 
-MIN_MAX_COMPXCHG( fixed8,  max, kmp_int64,  64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max 
-MIN_MAX_COMPXCHG( fixed8,  min, kmp_int64,  64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min 
-MIN_MAX_COMPXCHG( float4,  max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max 
-MIN_MAX_COMPXCHG( float4,  min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min 
-MIN_MAX_COMPXCHG( float8,  max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max 
-MIN_MAX_COMPXCHG( float8,  min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min 
-#if KMP_HAVE_QUAD 
-MIN_MAX_CRITICAL( float16, max,     QUAD_LEGACY,      <, 16r,   1 )            // __kmpc_atomic_float16_max 
-MIN_MAX_CRITICAL( float16, min,     QUAD_LEGACY,      >, 16r,   1 )            // __kmpc_atomic_float16_min 
-#if ( KMP_ARCH_X86 ) 
-    MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t,     <, 16r,   1 )            // __kmpc_atomic_float16_max_a16 
-    MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t,     >, 16r,   1 )            // __kmpc_atomic_float16_min_a16 
-#endif 
-#endif 
-// ------------------------------------------------------------------------ 
-// Need separate macros for .EQV. because of the need of complement (~) 
-// OP ignored for critical sections, ^=~ used instead 
-#define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \ 
-    OP_CRITICAL(^=~,LCK_ID)    /* send assignment and complement */       \ 
-} 
- 
-// ------------------------------------------------------------------------ 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-// ------------------------------------------------------------------------ 
-// X86 or X86_64: no alignment problems =================================== 
-#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \ 
-    OP_CMPXCHG(TYPE,BITS,OP)                                              \ 
-} 
-// ------------------------------------------------------------------------ 
-#else 
-// ------------------------------------------------------------------------ 
-// Code for other architectures that don't handle unaligned accesses. 
-#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(^=~,GOMP_FLAG)                                       \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \ 
-        OP_CMPXCHG(TYPE,BITS,OP)   /* aligned address */                  \ 
-    } else {                                                              \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL(^=~,LCK_ID)    /* unaligned address - use critical */ \ 
-    }                                                                     \ 
-} 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-ATOMIC_CMPXCHG(  fixed1, neqv, kmp_int8,   8,   ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv 
-ATOMIC_CMPXCHG(  fixed2, neqv, kmp_int16, 16,   ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv 
-ATOMIC_CMPXCHG(  fixed4, neqv, kmp_int32, 32,   ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv 
-ATOMIC_CMPXCHG(  fixed8, neqv, kmp_int64, 64,   ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv 
-ATOMIC_CMPX_EQV( fixed1, eqv,  kmp_int8,   8,  ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv 
-ATOMIC_CMPX_EQV( fixed2, eqv,  kmp_int16, 16,  ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv 
-ATOMIC_CMPX_EQV( fixed4, eqv,  kmp_int32, 32,  ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv 
-ATOMIC_CMPX_EQV( fixed8, eqv,  kmp_int64, 64,  ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv 
- 
- 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-//     TYPE_ID, OP_ID, TYPE - detailed above 
-//     OP      - operator 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-#define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */              \ 
-    OP_CRITICAL(OP##=,LCK_ID)          /* send assignment */              \ 
-} 
- 
-/* ------------------------------------------------------------------------- */ 
-// routines for long double type 
-ATOMIC_CRITICAL( float10, add, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add 
-ATOMIC_CRITICAL( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub 
-ATOMIC_CRITICAL( float10, mul, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul 
-ATOMIC_CRITICAL( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div 
-#if KMP_HAVE_QUAD 
-// routines for _Quad type 
-ATOMIC_CRITICAL( float16, add, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add 
-ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub 
-ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul 
-ATOMIC_CRITICAL( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 )           // __kmpc_atomic_float16_add_a16 
-    ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16 
-    ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 )           // __kmpc_atomic_float16_mul_a16 
-    ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16 
-#endif 
-#endif 
-// routines for complex types 
- 
-#if USE_CMPXCHG_FIX 
-// workaround for C78287 (complex(kind=4) data type) 
-ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_add 
-ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_sub 
-ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_mul 
-ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_div 
-// end of the workaround for C78287 
-#else 
-ATOMIC_CRITICAL( cmplx4,  add, kmp_cmplx32,     +,  8c,   1 )            // __kmpc_atomic_cmplx4_add 
-ATOMIC_CRITICAL( cmplx4,  sub, kmp_cmplx32,     -,  8c,   1 )            // __kmpc_atomic_cmplx4_sub 
-ATOMIC_CRITICAL( cmplx4,  mul, kmp_cmplx32,     *,  8c,   1 )            // __kmpc_atomic_cmplx4_mul 
-ATOMIC_CRITICAL( cmplx4,  div, kmp_cmplx32,     /,  8c,   1 )            // __kmpc_atomic_cmplx4_div 
-#endif // USE_CMPXCHG_FIX 
- 
-ATOMIC_CRITICAL( cmplx8,  add, kmp_cmplx64,     +, 16c,   1 )            // __kmpc_atomic_cmplx8_add 
-ATOMIC_CRITICAL( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub 
-ATOMIC_CRITICAL( cmplx8,  mul, kmp_cmplx64,     *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul 
-ATOMIC_CRITICAL( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div 
-ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80,     +, 20c,   1 )            // __kmpc_atomic_cmplx10_add 
-ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub 
-ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80,     *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul 
-ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG,     +, 32c,   1 )            // __kmpc_atomic_cmplx16_add 
-ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub 
-ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG,     *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul 
-ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 )   // __kmpc_atomic_cmplx16_add_a16 
-    ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16 
-    ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 )   // __kmpc_atomic_cmplx16_mul_a16 
-    ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16 
-#endif 
-#endif 
- 
-#if OMP_40_ENABLED 
- 
-// OpenMP 4.0: x = expr binop x for non-commutative operations. 
-// Supported only on IA-32 architecture and Intel(R) 64 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs bound by critical section 
-//     OP     - operator (it's supposed to contain an assignment) 
-//     LCK_ID - lock identifier 
-// Note: don't check gtid as it should always be valid 
-// 1, 2-byte - expect valid parameter, other - check before this macro 
-#define OP_CRITICAL_REV(OP,LCK_ID) \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    (*lhs) = (rhs) OP (*lhs);                                             \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_REV(OP,FLAG)                                     \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_REV( OP, 0 );                                         \ 
-        return;                                                           \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_REV(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
- 
-// Beginning of a definition (provides name, parameters, gebug trace) 
-//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operands' type 
-#define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ 
-RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ 
-{                                                                                         \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid )); 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs using "compare_and_store" routine 
-//     TYPE    - operands' type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator 
-// Note: temp_val introduced in order to force the compiler to read 
-//       *lhs only once (w/o it the compiler reads *lhs twice) 
-#define OP_CMPXCHG_REV(TYPE,BITS,OP)                                      \ 
-    {                                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-        TYPE old_value, new_value;                                        \ 
-        temp_val = *lhs;                                                  \ 
-        old_value = temp_val;                                             \ 
-        new_value = rhs OP old_value;                                     \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \ 
-        {                                                                 \ 
-            KMP_DO_PAUSE;                                                 \ 
-                                                                          \ 
-            temp_val = *lhs;                                              \ 
-            old_value = temp_val;                                         \ 
-            new_value = rhs OP old_value;                                 \ 
-        }                                                                 \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG)   \ 
-ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                 \ 
-    OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                    \ 
-    OP_CMPXCHG_REV(TYPE,BITS,OP)                                          \ 
-} 
- 
-// ------------------------------------------------------------------------ 
-// Entries definition for integer operands 
-//     TYPE_ID - operands type and size (fixed4, float4) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operand type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator (used in critical section) 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
- 
-//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG 
-// ------------------------------------------------------------------------ 
-// Routines for ATOMIC integer operands, other operators 
-// ------------------------------------------------------------------------ 
-//                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG 
-ATOMIC_CMPXCHG_REV( fixed1,  div, kmp_int8,    8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_rev 
-ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8,   8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_rev 
-ATOMIC_CMPXCHG_REV( fixed1,  shl, kmp_int8,    8, <<, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_rev 
-ATOMIC_CMPXCHG_REV( fixed1,  shr, kmp_int8,    8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8,   8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed1,  sub, kmp_int8,    8, -,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_rev 
- 
-ATOMIC_CMPXCHG_REV( fixed2,  div, kmp_int16,  16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_rev 
-ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_rev 
-ATOMIC_CMPXCHG_REV( fixed2,  shl, kmp_int16,  16, <<, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_rev 
-ATOMIC_CMPXCHG_REV( fixed2,  shr, kmp_int16,  16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed2,  sub, kmp_int16,  16, -,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_rev 
- 
-ATOMIC_CMPXCHG_REV( fixed4,  div, kmp_int32,  32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_rev 
-ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_rev 
-ATOMIC_CMPXCHG_REV( fixed4,  shl, kmp_int32,  32, <<, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_rev 
-ATOMIC_CMPXCHG_REV( fixed4,  shr, kmp_int32,  32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed4,  sub, kmp_int32,  32, -,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_rev 
- 
-ATOMIC_CMPXCHG_REV( fixed8,  div, kmp_int64,  64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_rev 
-ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_rev 
-ATOMIC_CMPXCHG_REV( fixed8,  shl, kmp_int64,  64, <<, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_rev 
-ATOMIC_CMPXCHG_REV( fixed8,  shr, kmp_int64,  64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_rev 
-ATOMIC_CMPXCHG_REV( fixed8,  sub, kmp_int64,  64, -,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_rev 
- 
-ATOMIC_CMPXCHG_REV( float4,  div, kmp_real32, 32, /,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_rev 
-ATOMIC_CMPXCHG_REV( float4,  sub, kmp_real32, 32, -,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_rev 
- 
-ATOMIC_CMPXCHG_REV( float8,  div, kmp_real64, 64, /,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_rev 
-ATOMIC_CMPXCHG_REV( float8,  sub, kmp_real64, 64, -,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_rev 
-//                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG 
- 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-//     TYPE_ID, OP_ID, TYPE - detailed above 
-//     OP      - operator 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-#define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \ 
-ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                        \ 
-    OP_CRITICAL_REV(OP,LCK_ID)                                                \ 
-} 
- 
-/* ------------------------------------------------------------------------- */ 
-// routines for long double type 
-ATOMIC_CRITICAL_REV( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_rev 
-ATOMIC_CRITICAL_REV( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_rev 
-#if KMP_HAVE_QUAD 
-// routines for _Quad type 
-ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_rev 
-ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_rev 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16_rev 
-    ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16_rev 
-#endif 
-#endif 
- 
-// routines for complex types 
-ATOMIC_CRITICAL_REV( cmplx4,  sub, kmp_cmplx32,     -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_rev 
-ATOMIC_CRITICAL_REV( cmplx4,  div, kmp_cmplx32,     /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_rev 
-ATOMIC_CRITICAL_REV( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_rev 
-ATOMIC_CRITICAL_REV( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_rev 
-ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_rev 
-ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_rev 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_rev 
-ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_rev 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16_rev 
-    ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16_rev 
-#endif 
-#endif 
- 
- 
-#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 
-// End of OpenMP 4.0: x = expr binop x for non-commutative operations. 
- 
-#endif //OMP_40_ENABLED 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* Routines for mixed types of LHS and RHS, when RHS is "larger"            */ 
-/* Note: in order to reduce the total number of types combinations          */ 
-/*       it is supposed that compiler converts RHS to longest floating type,*/ 
-/*       that is _Quad, before call to any of these routines                */ 
-/* Conversion to _Quad will be done by the compiler during calculation,     */ 
-/*    conversion back to TYPE - before the assignment, like:                */ 
-/*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */ 
-/* Performance penalty expected because of SW emulation use                 */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                             \ 
-void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \ 
-{                                                                                                       \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                              \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)         \ 
-ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                       \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */                              \ 
-    OP_CRITICAL(OP##=,LCK_ID)  /* send assignment */                                      \ 
-} 
- 
-// ------------------------------------------------------------------------- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-// ------------------------------------------------------------------------- 
-// X86 or X86_64: no alignment problems ==================================== 
-#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \ 
-    OP_CMPXCHG(TYPE,BITS,OP)                                                                \ 
-} 
-// ------------------------------------------------------------------------- 
-#else 
-// ------------------------------------------------------------------------ 
-// Code for other architectures that don't handle unaligned accesses. 
-#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                            \ 
-        OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                  \ 
-    } else {                                                                                \ 
-        KMP_CHECK_GTID;                                                                     \ 
-        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                   \ 
-    }                                                                                       \ 
-} 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-// RHS=float8 
-ATOMIC_CMPXCHG_MIX( fixed1, char,       mul,  8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8 
-ATOMIC_CMPXCHG_MIX( fixed1, char,       div,  8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8 
-ATOMIC_CMPXCHG_MIX( fixed2, short,      mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8 
-ATOMIC_CMPXCHG_MIX( fixed2, short,      div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8 
-ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  mul, 32, *, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_float8 
-ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  div, 32, /, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_float8 
-ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8 
-ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8 
-ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8 
-ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8 
-ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8 
-ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 
- 
-// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) 
-#if KMP_HAVE_QUAD 
-ATOMIC_CMPXCHG_MIX( fixed1,  char,       add,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp 
-ATOMIC_CMPXCHG_MIX( fixed1,  char,       sub,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp 
-ATOMIC_CMPXCHG_MIX( fixed1,  char,       mul,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp 
-ATOMIC_CMPXCHG_MIX( fixed1,  char,       div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp 
-ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp 
- 
-ATOMIC_CMPXCHG_MIX( fixed2,  short,      add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp 
-ATOMIC_CMPXCHG_MIX( fixed2,  short,      sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp 
-ATOMIC_CMPXCHG_MIX( fixed2,  short,      mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp 
-ATOMIC_CMPXCHG_MIX( fixed2,  short,      div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp 
-ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp 
- 
-ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  add, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_add_fp 
-ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  sub, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_fp 
-ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  mul, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_fp 
-ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_fp 
-ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_fp 
- 
-ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp 
-ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp 
-ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp 
-ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp 
-ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp 
- 
-ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp 
-ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp 
-ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp 
-ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp 
- 
-ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp 
-ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp 
-ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp 
-ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp 
- 
-ATOMIC_CRITICAL_FP( float10, long double,    add, +, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_add_fp 
-ATOMIC_CRITICAL_FP( float10, long double,    sub, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_fp 
-ATOMIC_CRITICAL_FP( float10, long double,    mul, *, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_mul_fp 
-ATOMIC_CRITICAL_FP( float10, long double,    div, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_fp 
-#endif 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-// ------------------------------------------------------------------------ 
-// X86 or X86_64: no alignment problems ==================================== 
-#if USE_CMPXCHG_FIX 
-// workaround for C78287 (complex(kind=4) data type) 
-#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \ 
-    OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                                       \ 
-} 
-// end of the second part of the workaround for C78287 
-#else 
-#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \ 
-    OP_CMPXCHG(TYPE,BITS,OP)                                                                  \ 
-} 
-#endif // USE_CMPXCHG_FIX 
-#else 
-// ------------------------------------------------------------------------ 
-// Code for other architectures that don't handle unaligned accesses. 
-#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 
-ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \ 
-    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \ 
-    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                              \ 
-        OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                    \ 
-    } else {                                                                                  \ 
-        KMP_CHECK_GTID;                                                                       \ 
-        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                     \ 
-    }                                                                                         \ 
-} 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8 
-ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8 
-ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8 
-ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8 
- 
-// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-////////////////////////////////////////////////////////////////////////////////////////////////////// 
-// ------------------------------------------------------------------------ 
-// Atomic READ routines 
-// ------------------------------------------------------------------------ 
- 
-// ------------------------------------------------------------------------ 
-// Beginning of a definition (provides name, parameters, gebug trace) 
-//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operands' type 
-#define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ 
-RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \ 
-{                                                                                   \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs using "compare_and_store_ret" routine 
-//     TYPE    - operands' type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator 
-// Note: temp_val introduced in order to force the compiler to read 
-//       *lhs only once (w/o it the compiler reads *lhs twice) 
-// TODO: check if it is still necessary 
-// Return old value regardless of the result of "compare & swap# operation 
- 
-#define OP_CMPXCHG_READ(TYPE,BITS,OP)                                     \ 
-    {                                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-        union f_i_union {                                                 \ 
-            TYPE f_val;                                                   \ 
-            kmp_int##BITS i_val;                                          \ 
-        };                                                                \ 
-        union f_i_union old_value;                                        \ 
-        temp_val = *loc;                                                  \ 
-        old_value.f_val = temp_val;                                       \ 
-        old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val,   \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \ 
-        new_value = old_value.f_val;                                      \ 
-        return new_value;                                                 \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-// Operation on *lhs, rhs bound by critical section 
-//     OP     - operator (it's supposed to contain an assignment) 
-//     LCK_ID - lock identifier 
-// Note: don't check gtid as it should always be valid 
-// 1, 2-byte - expect valid parameter, other - check before this macro 
-#define OP_CRITICAL_READ(OP,LCK_ID)                                       \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \ 
-                                                                          \ 
-    new_value = (*loc);                                                   \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 
- 
-// ------------------------------------------------------------------------- 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_READ(OP,FLAG)                                    \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_READ( OP, 0 );                                        \ 
-        return new_value;                                                 \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_READ(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \ 
-ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \ 
-    TYPE new_value;                                                       \ 
-    OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \ 
-    new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 );                     \ 
-    return new_value;                                                     \ 
-} 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \ 
-ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \ 
-    TYPE new_value;                                                       \ 
-    OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \ 
-    OP_CMPXCHG_READ(TYPE,BITS,OP)                                         \ 
-} 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-//     TYPE_ID, OP_ID, TYPE - detailed above 
-//     OP      - operator 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-#define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \ 
-ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \ 
-    TYPE new_value;                                                       \ 
-    OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)  /* send assignment */         \ 
-    OP_CRITICAL_READ(OP,LCK_ID)          /* send assignment */            \ 
-    return new_value;                                                     \ 
-} 
- 
-// ------------------------------------------------------------------------ 
-// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work. 
-// Let's return the read value through the additional parameter. 
- 
-#if ( KMP_OS_WINDOWS ) 
- 
-#define OP_CRITICAL_READ_WRK(OP,LCK_ID)                                   \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \ 
-                                                                          \ 
-    (*out) = (*loc);                                                      \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 
-// ------------------------------------------------------------------------ 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)                                \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_READ_WRK( OP, 0 );                                    \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
-// ------------------------------------------------------------------------ 
-#define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ 
-void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \ 
-{                                                                                   \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 
- 
-// ------------------------------------------------------------------------ 
-#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \ 
-ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE)                                     \ 
-    OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG)  /* send assignment */         \ 
-    OP_CRITICAL_READ_WRK(OP,LCK_ID)          /* send assignment */            \ 
-} 
- 
-#endif // KMP_OS_WINDOWS 
- 
-// ------------------------------------------------------------------------ 
-//                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG 
-ATOMIC_FIXED_READ( fixed4, rd, kmp_int32,  32, +, 0            )      // __kmpc_atomic_fixed4_rd 
-ATOMIC_FIXED_READ( fixed8, rd, kmp_int64,  64, +, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_rd 
-ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 )    // __kmpc_atomic_float4_rd 
-ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 )    // __kmpc_atomic_float8_rd 
- 
-// !!! TODO: Remove lock operations for "char" since it can't be non-atomic 
-ATOMIC_CMPXCHG_READ( fixed1,  rd, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_rd 
-ATOMIC_CMPXCHG_READ( fixed2,  rd, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_rd 
- 
-ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r,   1 )         // __kmpc_atomic_float10_rd 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r,   1 )         // __kmpc_atomic_float16_rd 
-#endif // KMP_HAVE_QUAD 
- 
-// Fix for CQ220361 on Windows* OS 
-#if ( KMP_OS_WINDOWS ) 
-    ATOMIC_CRITICAL_READ_WRK( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )   // __kmpc_atomic_cmplx4_rd 
-#else 
-    ATOMIC_CRITICAL_READ( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )       // __kmpc_atomic_cmplx4_rd 
-#endif 
-ATOMIC_CRITICAL_READ( cmplx8,  rd, kmp_cmplx64, +, 16c, 1 )           // __kmpc_atomic_cmplx8_rd 
-ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 )           // __kmpc_atomic_cmplx10_rd 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 )           // __kmpc_atomic_cmplx16_rd 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 )         // __kmpc_atomic_float16_a16_rd 
-    ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd 
-#endif 
-#endif 
- 
- 
-// ------------------------------------------------------------------------ 
-// Atomic WRITE routines 
-// ------------------------------------------------------------------------ 
- 
-#define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)              \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \ 
-    KMP_XCHG_FIXED##BITS( lhs, rhs );                                     \ 
-} 
-// ------------------------------------------------------------------------ 
-#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)        \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \ 
-    KMP_XCHG_REAL##BITS( lhs, rhs );                                      \ 
-} 
- 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs using "compare_and_store" routine 
-//     TYPE    - operands' type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator 
-// Note: temp_val introduced in order to force the compiler to read 
-//       *lhs only once (w/o it the compiler reads *lhs twice) 
-#define OP_CMPXCHG_WR(TYPE,BITS,OP)                                       \ 
-    {                                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-        TYPE old_value, new_value;                                        \ 
-        temp_val = *lhs;                                                  \ 
-        old_value = temp_val;                                             \ 
-        new_value = rhs;                                                  \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \ 
-        {                                                                 \ 
-            KMP_CPU_PAUSE();                                              \ 
-                                                                          \ 
-            temp_val = *lhs;                                              \ 
-            old_value = temp_val;                                         \ 
-            new_value = rhs;                                              \ 
-        }                                                                 \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \ 
-    OP_CMPXCHG_WR(TYPE,BITS,OP)                                           \ 
-} 
- 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-//     TYPE_ID, OP_ID, TYPE - detailed above 
-//     OP      - operator 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-#define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)        \ 
-ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \ 
-    OP_GOMP_CRITICAL(OP,GOMP_FLAG)       /* send assignment */            \ 
-    OP_CRITICAL(OP,LCK_ID)               /* send assignment */            \ 
-} 
-// ------------------------------------------------------------------------- 
- 
-ATOMIC_XCHG_WR( fixed1,  wr, kmp_int8,    8, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_wr 
-ATOMIC_XCHG_WR( fixed2,  wr, kmp_int16,  16, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_wr 
-ATOMIC_XCHG_WR( fixed4,  wr, kmp_int32,  32, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_wr 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CMPXCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_wr 
-#else 
-    ATOMIC_XCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )         // __kmpc_atomic_fixed8_wr 
-#endif 
- 
-ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 )         // __kmpc_atomic_float4_wr 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CMPXCHG_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )     // __kmpc_atomic_float8_wr 
-#else 
-    ATOMIC_XCHG_FLOAT_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_wr 
-#endif 
- 
-ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r,   1 )         // __kmpc_atomic_float10_wr 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r,   1 )         // __kmpc_atomic_float16_wr 
-#endif 
-ATOMIC_CRITICAL_WR( cmplx4,  wr, kmp_cmplx32, =,  8c,   1 )         // __kmpc_atomic_cmplx4_wr 
-ATOMIC_CRITICAL_WR( cmplx8,  wr, kmp_cmplx64, =, 16c,   1 )         // __kmpc_atomic_cmplx8_wr 
-ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c,   1 )         // __kmpc_atomic_cmplx10_wr 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c,   1 )         // __kmpc_atomic_cmplx16_wr 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t,         =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr 
-    ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr 
-#endif 
-#endif 
- 
- 
-// ------------------------------------------------------------------------ 
-// Atomic CAPTURE routines 
-// ------------------------------------------------------------------------ 
- 
-// Beginning of a definition (provides name, parameters, gebug trace) 
-//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operands' type 
-#define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE)                                    \ 
-RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \ 
-{                                                                                         \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 
- 
-// ------------------------------------------------------------------------- 
-// Operation on *lhs, rhs bound by critical section 
-//     OP     - operator (it's supposed to contain an assignment) 
-//     LCK_ID - lock identifier 
-// Note: don't check gtid as it should always be valid 
-// 1, 2-byte - expect valid parameter, other - check before this macro 
-#define OP_CRITICAL_CPT(OP,LCK_ID)                                        \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    if( flag ) {                                                          \ 
-        (*lhs) OP rhs;                                                    \ 
-        new_value = (*lhs);                                               \ 
-    } else {                                                              \ 
-        new_value = (*lhs);                                               \ 
-        (*lhs) OP rhs;                                                    \ 
-    }                                                                     \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-    return new_value; 
- 
-// ------------------------------------------------------------------------ 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_CPT(OP,FLAG)                                     \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_CPT( OP##=, 0 );                                      \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_CPT(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs using "compare_and_store" routine 
-//     TYPE    - operands' type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator 
-// Note: temp_val introduced in order to force the compiler to read 
-//       *lhs only once (w/o it the compiler reads *lhs twice) 
-#define OP_CMPXCHG_CPT(TYPE,BITS,OP)                                      \ 
-    {                                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-        TYPE old_value, new_value;                                        \ 
-        temp_val = *lhs;                                                  \ 
-        old_value = temp_val;                                             \ 
-        new_value = old_value OP rhs;                                     \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \ 
-        {                                                                 \ 
-            KMP_CPU_PAUSE();                                              \ 
-                                                                          \ 
-            temp_val = *lhs;                                              \ 
-            old_value = temp_val;                                         \ 
-            new_value = old_value OP rhs;                                 \ 
-        }                                                                 \ 
-        if( flag ) {                                                      \ 
-            return new_value;                                             \ 
-        } else                                                            \ 
-            return old_value;                                             \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \ 
-    TYPE new_value;                                                        \ 
-    OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \ 
-    OP_CMPXCHG_CPT(TYPE,BITS,OP)                                           \ 
-} 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \ 
-    TYPE old_value, new_value;                                             \ 
-    OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \ 
-    /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \ 
-    old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                    \ 
-    if( flag ) {                                                           \ 
-        return old_value OP rhs;                                           \ 
-    } else                                                                 \ 
-        return old_value;                                                  \ 
-} 
-// ------------------------------------------------------------------------- 
- 
-ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32,  32, +, 0            )  // __kmpc_atomic_fixed4_add_cpt 
-ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32,  32, -, 0            )  // __kmpc_atomic_fixed4_sub_cpt 
-ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64,  64, +, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add_cpt 
-ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64,  64, -, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt 
- 
-ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add_cpt 
-ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt 
-ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add_cpt 
-ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt 
- 
-// ------------------------------------------------------------------------ 
-// Entries definition for integer operands 
-//     TYPE_ID - operands type and size (fixed4, float4) 
-//     OP_ID   - operation identifier (add, sub, mul, ...) 
-//     TYPE    - operand type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator (used in critical section) 
-//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG 
-// ------------------------------------------------------------------------ 
-// Routines for ATOMIC integer operands, other operators 
-// ------------------------------------------------------------------------ 
-//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG 
-ATOMIC_CMPXCHG_CPT( fixed1,  add_cpt, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8,    8, &,  0            )  // __kmpc_atomic_fixed1_andb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  div_cpt, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  mul_cpt, kmp_int8,    8, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  orb_cpt, kmp_int8,    8, |,  0            )  // __kmpc_atomic_fixed1_orb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  shl_cpt, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  shr_cpt, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  sub_cpt, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt 
-ATOMIC_CMPXCHG_CPT( fixed1,  xor_cpt, kmp_int8,    8, ^,  0            )  // __kmpc_atomic_fixed1_xor_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  add_cpt, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16,  16, &,  0            )  // __kmpc_atomic_fixed2_andb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  div_cpt, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  mul_cpt, kmp_int16,  16, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  orb_cpt, kmp_int16,  16, |,  0            )  // __kmpc_atomic_fixed2_orb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  shl_cpt, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  shr_cpt, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  sub_cpt, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt 
-ATOMIC_CMPXCHG_CPT( fixed2,  xor_cpt, kmp_int16,  16, ^,  0            )  // __kmpc_atomic_fixed2_xor_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32,  32, &,  0            )  // __kmpc_atomic_fixed4_andb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4,  div_cpt, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4,  mul_cpt, kmp_int32,  32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4,  orb_cpt, kmp_int32,  32, |,  0            )  // __kmpc_atomic_fixed4_orb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4,  shl_cpt, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4,  shr_cpt, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed4,  xor_cpt, kmp_int32,  32, ^,  0            )  // __kmpc_atomic_fixed4_xor_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64,  64, &,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8,  div_cpt, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8,  mul_cpt, kmp_int64,  64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8,  orb_cpt, kmp_int64,  64, |,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8,  shl_cpt, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8,  shr_cpt, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt 
-ATOMIC_CMPXCHG_CPT( fixed8,  xor_cpt, kmp_int64,  64, ^,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor_cpt 
-ATOMIC_CMPXCHG_CPT( float4,  div_cpt, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt 
-ATOMIC_CMPXCHG_CPT( float4,  mul_cpt, kmp_real32, 32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul_cpt 
-ATOMIC_CMPXCHG_CPT( float8,  div_cpt, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt 
-ATOMIC_CMPXCHG_CPT( float8,  mul_cpt, kmp_real64, 64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul_cpt 
-//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG 
- 
-// ------------------------------------------------------------------------ 
-// Routines for C/C++ Reduction operators && and || 
-// ------------------------------------------------------------------------ 
- 
-// ------------------------------------------------------------------------- 
-// Operation on *lhs, rhs bound by critical section 
-//     OP     - operator (it's supposed to contain an assignment) 
-//     LCK_ID - lock identifier 
-// Note: don't check gtid as it should always be valid 
-// 1, 2-byte - expect valid parameter, other - check before this macro 
-#define OP_CRITICAL_L_CPT(OP,LCK_ID)                                      \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \ 
-                                                                          \ 
-    if( flag ) {                                                          \ 
-        new_value OP rhs;                                                 \ 
-    } else                                                                \ 
-        new_value = (*lhs);                                               \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 
- 
-// ------------------------------------------------------------------------ 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)                                   \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_L_CPT( OP, 0 );                                       \ 
-        return new_value;                                                 \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-// ------------------------------------------------------------------------ 
-// Need separate macros for &&, || because there is no combined assignment 
-#define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \ 
-    TYPE new_value;                                                       \ 
-    OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG )                        \ 
-    OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \ 
-} 
- 
-ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char,       8, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl_cpt 
-ATOMIC_CMPX_L_CPT( fixed1,  orl_cpt, char,       8, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl_cpt 
-ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short,     16, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl_cpt 
-ATOMIC_CMPX_L_CPT( fixed2,  orl_cpt, short,     16, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl_cpt 
-ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 )             // __kmpc_atomic_fixed4_andl_cpt 
-ATOMIC_CMPX_L_CPT( fixed4,  orl_cpt, kmp_int32, 32, ||, 0 )             // __kmpc_atomic_fixed4_orl_cpt 
-ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl_cpt 
-ATOMIC_CMPX_L_CPT( fixed8,  orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl_cpt 
- 
- 
-// ------------------------------------------------------------------------- 
-// Routines for Fortran operators that matched no one in C: 
-// MAX, MIN, .EQV., .NEQV. 
-// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 
-// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 
-// ------------------------------------------------------------------------- 
- 
-// ------------------------------------------------------------------------- 
-// MIN and MAX need separate macros 
-// OP - operator to check if we need any actions? 
-#define MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \ 
-                                                                           \ 
-    if ( *lhs OP rhs ) {                 /* still need actions? */         \ 
-        old_value = *lhs;                                                  \ 
-        *lhs = rhs;                                                        \ 
-        if ( flag )                                                        \ 
-            new_value = rhs;                                               \ 
-        else                                                               \ 
-            new_value = old_value;                                         \ 
-    }                                                                      \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \ 
-    return new_value;                                                      \ 
- 
-// ------------------------------------------------------------------------- 
-#ifdef KMP_GOMP_COMPAT 
-#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)                                 \ 
-    if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \ 
-        KMP_CHECK_GTID;                                                    \ 
-        MIN_MAX_CRITSECT_CPT( OP, 0 );                                     \ 
-    } 
-#else 
-#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-// ------------------------------------------------------------------------- 
-#define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \ 
-    {                                                                      \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \ 
-        /*TYPE old_value; */                                               \ 
-        temp_val = *lhs;                                                   \ 
-        old_value = temp_val;                                              \ 
-        while ( old_value OP rhs &&          /* still need actions? */     \ 
-            ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \ 
-        {                                                                  \ 
-            KMP_CPU_PAUSE();                                               \ 
-            temp_val = *lhs;                                               \ 
-            old_value = temp_val;                                          \ 
-        }                                                                  \ 
-        if( flag )                                                         \ 
-            return rhs;                                                    \ 
-        else                                                               \ 
-            return old_value;                                              \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-// 1-byte, 2-byte operands - use critical section 
-#define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)       \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \ 
-    TYPE new_value, old_value;                                             \ 
-    if ( *lhs OP rhs ) {     /* need actions? */                           \ 
-        GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \ 
-        MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \ 
-    }                                                                      \ 
-    return *lhs;                                                           \ 
-} 
- 
-#define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \ 
-    TYPE new_value, old_value;                                             \ 
-    if ( *lhs OP rhs ) {                                                   \ 
-        GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \ 
-        MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \ 
-    }                                                                      \ 
-    return *lhs;                                                           \ 
-} 
- 
- 
-MIN_MAX_COMPXCHG_CPT( fixed1,  max_cpt, char,        8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed1,  min_cpt, char,        8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed2,  max_cpt, short,      16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed2,  min_cpt, short,      16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed4,  max_cpt, kmp_int32,  32, <, 0 )            // __kmpc_atomic_fixed4_max_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed4,  min_cpt, kmp_int32,  32, >, 0 )            // __kmpc_atomic_fixed4_min_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed8,  max_cpt, kmp_int64,  64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt 
-MIN_MAX_COMPXCHG_CPT( fixed8,  min_cpt, kmp_int64,  64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt 
-MIN_MAX_COMPXCHG_CPT( float4,  max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt 
-MIN_MAX_COMPXCHG_CPT( float4,  min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt 
-MIN_MAX_COMPXCHG_CPT( float8,  max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt 
-MIN_MAX_COMPXCHG_CPT( float8,  min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt 
-#if KMP_HAVE_QUAD 
-MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY,    <, 16r,   1 )     // __kmpc_atomic_float16_max_cpt 
-MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY,    >, 16r,   1 )     // __kmpc_atomic_float16_min_cpt 
-#if ( KMP_ARCH_X86 ) 
-    MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r,  1 )  // __kmpc_atomic_float16_max_a16_cpt 
-    MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r,  1 )  // __kmpc_atomic_float16_mix_a16_cpt 
-#endif 
-#endif 
- 
-// ------------------------------------------------------------------------ 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)                                 \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_CPT( OP, 0 );                                         \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
-// ------------------------------------------------------------------------ 
-#define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \ 
-    TYPE new_value;                                                       \ 
-    OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG)  /* send assignment */        \ 
-    OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \ 
-} 
- 
-// ------------------------------------------------------------------------ 
- 
-ATOMIC_CMPXCHG_CPT(  fixed1, neqv_cpt, kmp_int8,   8,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt 
-ATOMIC_CMPXCHG_CPT(  fixed2, neqv_cpt, kmp_int16, 16,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt 
-ATOMIC_CMPXCHG_CPT(  fixed4, neqv_cpt, kmp_int32, 32,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt 
-ATOMIC_CMPXCHG_CPT(  fixed8, neqv_cpt, kmp_int64, 64,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt 
-ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt,  kmp_int8,   8,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt 
-ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt,  kmp_int16, 16,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt 
-ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt,  kmp_int32, 32,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt 
-ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt,  kmp_int64, 64,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt 
- 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-//     TYPE_ID, OP_ID, TYPE - detailed above 
-//     OP      - operator 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-#define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                           \ 
-    TYPE new_value;                                                 \ 
-    OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)  /* send assignment */       \ 
-    OP_CRITICAL_CPT(OP##=,LCK_ID)          /* send assignment */    \ 
-} 
- 
-// ------------------------------------------------------------------------ 
- 
-// Workaround for cmplx4. Regular routines with return value don't work 
-// on Win_32e. Let's return captured values through the additional parameter. 
-#define OP_CRITICAL_CPT_WRK(OP,LCK_ID)                                    \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    if( flag ) {                                                          \ 
-        (*lhs) OP rhs;                                                    \ 
-        (*out) = (*lhs);                                                  \ 
-    } else {                                                              \ 
-        (*out) = (*lhs);                                                  \ 
-        (*lhs) OP rhs;                                                    \ 
-    }                                                                     \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-    return; 
-// ------------------------------------------------------------------------ 
- 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)                                 \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_CPT_WRK( OP##=, 0 );                                  \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
-// ------------------------------------------------------------------------ 
- 
-#define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                              \ 
-void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \ 
-{                                                                         \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 
-// ------------------------------------------------------------------------ 
- 
-#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \ 
-ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                      \ 
-    OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG)                                \ 
-    OP_CRITICAL_CPT_WRK(OP##=,LCK_ID)                                     \ 
-} 
-// The end of workaround for cmplx4 
- 
-/* ------------------------------------------------------------------------- */ 
-// routines for long double type 
-ATOMIC_CRITICAL_CPT( float10, add_cpt, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add_cpt 
-ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt 
-ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul_cpt 
-ATOMIC_CRITICAL_CPT( float10, div_cpt, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt 
-#if KMP_HAVE_QUAD 
-// routines for _Quad type 
-ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add_cpt 
-ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt 
-ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul_cpt 
-ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r,  1 )          // __kmpc_atomic_float16_add_a16_cpt 
-    ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt 
-    ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r,  1 )          // __kmpc_atomic_float16_mul_a16_cpt 
-    ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt 
-#endif 
-#endif 
- 
-// routines for complex types 
- 
-// cmplx4 routines to return void 
-ATOMIC_CRITICAL_CPT_WRK( cmplx4,  add_cpt, kmp_cmplx32, +, 8c,    1 )            // __kmpc_atomic_cmplx4_add_cpt 
-ATOMIC_CRITICAL_CPT_WRK( cmplx4,  sub_cpt, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt 
-ATOMIC_CRITICAL_CPT_WRK( cmplx4,  mul_cpt, kmp_cmplx32, *, 8c,    1 )            // __kmpc_atomic_cmplx4_mul_cpt 
-ATOMIC_CRITICAL_CPT_WRK( cmplx4,  div_cpt, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt 
- 
-ATOMIC_CRITICAL_CPT( cmplx8,  add_cpt, kmp_cmplx64, +, 16c,   1 )            // __kmpc_atomic_cmplx8_add_cpt 
-ATOMIC_CRITICAL_CPT( cmplx8,  sub_cpt, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt 
-ATOMIC_CRITICAL_CPT( cmplx8,  mul_cpt, kmp_cmplx64, *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul_cpt 
-ATOMIC_CRITICAL_CPT( cmplx8,  div_cpt, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt 
-ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c,   1 )            // __kmpc_atomic_cmplx10_add_cpt 
-ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt 
-ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul_cpt 
-ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c,   1 )            // __kmpc_atomic_cmplx16_add_cpt 
-ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt 
-ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul_cpt 
-ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,   1 )   // __kmpc_atomic_cmplx16_add_a16_cpt 
-    ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt 
-    ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,   1 )   // __kmpc_atomic_cmplx16_mul_a16_cpt 
-    ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt 
-#endif 
-#endif 
- 
-#if OMP_40_ENABLED 
- 
-// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; }  for non-commutative operations. 
-// Supported only on IA-32 architecture and Intel(R) 64 
- 
-// ------------------------------------------------------------------------- 
-// Operation on *lhs, rhs bound by critical section 
-//     OP     - operator (it's supposed to contain an assignment) 
-//     LCK_ID - lock identifier 
-// Note: don't check gtid as it should always be valid 
-// 1, 2-byte - expect valid parameter, other - check before this macro 
-#define OP_CRITICAL_CPT_REV(OP,LCK_ID)                                    \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    if( flag ) {                                                          \ 
-        /*temp_val = (*lhs);*/\ 
-        (*lhs) = (rhs) OP (*lhs);                                         \ 
-        new_value = (*lhs);                                               \ 
-    } else {                                                              \ 
-        new_value = (*lhs);\ 
-        (*lhs) = (rhs) OP (*lhs);                                         \ 
-    }                                                                     \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-    return new_value; 
- 
-// ------------------------------------------------------------------------ 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)                                 \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_CPT_REV( OP, 0 );                                     \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-// ------------------------------------------------------------------------ 
-// Operation on *lhs, rhs using "compare_and_store" routine 
-//     TYPE    - operands' type 
-//     BITS    - size in bits, used to distinguish low level calls 
-//     OP      - operator 
-// Note: temp_val introduced in order to force the compiler to read 
-//       *lhs only once (w/o it the compiler reads *lhs twice) 
-#define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                  \ 
-    {                                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-        TYPE old_value, new_value;                                        \ 
-        temp_val = *lhs;                                                  \ 
-        old_value = temp_val;                                             \ 
-        new_value = rhs OP old_value;                                     \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \ 
-        {                                                                 \ 
-            KMP_CPU_PAUSE();                                              \ 
-                                                                          \ 
-            temp_val = *lhs;                                              \ 
-            old_value = temp_val;                                         \ 
-            new_value = rhs OP old_value;                                 \ 
-        }                                                                 \ 
-        if( flag ) {                                                      \ 
-            return new_value;                                             \ 
-        } else                                                            \ 
-            return old_value;                                             \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)       \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \ 
-    TYPE new_value;                                                        \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-    OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                                 \ 
-    OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                       \ 
-} 
- 
- 
-ATOMIC_CMPXCHG_CPT_REV( fixed1,  div_cpt_rev, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed1,  shl_cpt_rev, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed1,  shr_cpt_rev, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed1,  sub_cpt_rev, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed2,  div_cpt_rev, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed2,  shl_cpt_rev, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed2,  shr_cpt_rev, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed2,  sub_cpt_rev, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed4,  div_cpt_rev, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed4,  shl_cpt_rev, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed4,  shr_cpt_rev, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed4,  sub_cpt_rev, kmp_int32,  32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed8,  div_cpt_rev, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed8,  shl_cpt_rev, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed8,  shr_cpt_rev, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( fixed8,  sub_cpt_rev, kmp_int64,  64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( float4,  div_cpt_rev, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( float4,  sub_cpt_rev, kmp_real32, 32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( float8,  div_cpt_rev, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt_rev 
-ATOMIC_CMPXCHG_CPT_REV( float8,  sub_cpt_rev, kmp_real64, 64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt_rev 
-//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG 
- 
- 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-//     TYPE_ID, OP_ID, TYPE - detailed above 
-//     OP      - operator 
-//     LCK_ID  - lock identifier, used to possibly distinguish lock variable 
-#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 
-ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                               \ 
-    TYPE new_value;                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-    /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\ 
-    OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                              \ 
-    OP_CRITICAL_CPT_REV(OP,LCK_ID)                                      \ 
-} 
- 
- 
-/* ------------------------------------------------------------------------- */ 
-// routines for long double type 
-ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_rev 
-#if KMP_HAVE_QUAD 
-// routines for _Quad type 
-ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt_rev 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt_rev 
-    ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt_rev 
-#endif 
-#endif 
- 
-// routines for complex types 
- 
-// ------------------------------------------------------------------------ 
- 
-// Workaround for cmplx4. Regular routines with return value don't work 
-// on Win_32e. Let's return captured values through the additional parameter. 
-#define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    if( flag ) {                                                          \ 
-        (*lhs) = (rhs) OP (*lhs);                                         \ 
-        (*out) = (*lhs);                                                  \ 
-    } else {                                                              \ 
-        (*out) = (*lhs);                                                  \ 
-        (*lhs) = (rhs) OP (*lhs);                                         \ 
-    }                                                                     \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-    return; 
-// ------------------------------------------------------------------------ 
- 
-#ifdef KMP_GOMP_COMPAT 
-#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)                             \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        OP_CRITICAL_CPT_REV_WRK( OP, 0 );                                 \ 
-    } 
-#else 
-#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
-// ------------------------------------------------------------------------ 
- 
-#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \ 
-ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                          \ 
-    OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG)                                \ 
-    OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                        \ 
-} 
-// The end of workaround for cmplx4 
- 
- 
-// !!! TODO: check if we need to return void for cmplx4 routines 
-// cmplx4 routines to return void 
-ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  sub_cpt_rev, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  div_cpt_rev, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt_rev 
- 
-ATOMIC_CRITICAL_CPT_REV( cmplx8,  sub_cpt_rev, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV( cmplx8,  div_cpt_rev, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt_rev 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt_rev 
-ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt_rev 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 
-    ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt_rev 
-#endif 
-#endif 
- 
-//   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 
- 
-#define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                                    \ 
-TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs )     \ 
-{                                                                                         \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); 
- 
-#define CRITICAL_SWP(LCK_ID)                                              \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    old_value = (*lhs);                                                   \ 
-    (*lhs) = rhs;                                                         \ 
-                                                                          \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-    return old_value; 
- 
-// ------------------------------------------------------------------------ 
-#ifdef KMP_GOMP_COMPAT 
-#define GOMP_CRITICAL_SWP(FLAG)                                           \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        CRITICAL_SWP( 0 );                                                \ 
-    } 
-#else 
-#define GOMP_CRITICAL_SWP(FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
- 
- 
-#define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                      \ 
-ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \ 
-    TYPE old_value;                                                       \ 
-    GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \ 
-    old_value = KMP_XCHG_FIXED##BITS( lhs, rhs );                         \ 
-    return old_value;                                                     \ 
-} 
-// ------------------------------------------------------------------------ 
-#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                \ 
-ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \ 
-    TYPE old_value;                                                       \ 
-    GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \ 
-    old_value = KMP_XCHG_REAL##BITS( lhs, rhs );                          \ 
-    return old_value;                                                     \ 
-} 
- 
-// ------------------------------------------------------------------------ 
-#define CMPXCHG_SWP(TYPE,BITS)                                            \ 
-    {                                                                     \ 
-        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \ 
-        TYPE old_value, new_value;                                        \ 
-        temp_val = *lhs;                                                  \ 
-        old_value = temp_val;                                             \ 
-        new_value = rhs;                                                  \ 
-        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \ 
-                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \ 
-        {                                                                 \ 
-            KMP_CPU_PAUSE();                                              \ 
-                                                                          \ 
-            temp_val = *lhs;                                              \ 
-            old_value = temp_val;                                         \ 
-            new_value = rhs;                                              \ 
-        }                                                                 \ 
-        return old_value;                                                 \ 
-    } 
- 
-// ------------------------------------------------------------------------- 
-#define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                   \ 
-ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \ 
-    TYPE old_value;                                                       \ 
-    GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \ 
-    CMPXCHG_SWP(TYPE,BITS)                                                \ 
-} 
- 
-ATOMIC_XCHG_SWP( fixed1, kmp_int8,    8, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_swp 
-ATOMIC_XCHG_SWP( fixed2, kmp_int16,  16, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_swp 
-ATOMIC_XCHG_SWP( fixed4, kmp_int32,  32, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_swp 
- 
-ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 )      // __kmpc_atomic_float4_swp 
- 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_swp 
-    ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )     // __kmpc_atomic_float8_swp 
-#else 
-    ATOMIC_XCHG_SWP(       fixed8, kmp_int64, 64, KMP_ARCH_X86 )   // __kmpc_atomic_fixed8_swp 
-    ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )  // __kmpc_atomic_float8_swp 
-#endif 
- 
-// ------------------------------------------------------------------------ 
-// Routines for Extended types: long double, _Quad, complex flavours (use critical section) 
-#define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG)              \ 
-ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                          \ 
-    TYPE old_value;                                                     \ 
-    GOMP_CRITICAL_SWP(GOMP_FLAG)                                        \ 
-    CRITICAL_SWP(LCK_ID)                                                \ 
-} 
- 
-// ------------------------------------------------------------------------ 
- 
-// !!! TODO: check if we need to return void for cmplx4 routines 
-// Workaround for cmplx4. Regular routines with return value don't work 
-// on Win_32e. Let's return captured values through the additional parameter. 
- 
-#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                                \ 
-void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out )     \ 
-{                                                                                         \ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \ 
-    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); 
- 
- 
-#define CRITICAL_SWP_WRK(LCK_ID)                                          \ 
-    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-                                                                          \ 
-    tmp = (*lhs);                                                         \ 
-    (*lhs) = (rhs);                                                       \ 
-    (*out) = tmp;                                                         \ 
-    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \ 
-    return; 
- 
-// ------------------------------------------------------------------------ 
- 
-#ifdef KMP_GOMP_COMPAT 
-#define GOMP_CRITICAL_SWP_WRK(FLAG)                                       \ 
-    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \ 
-        KMP_CHECK_GTID;                                                   \ 
-        CRITICAL_SWP_WRK( 0 );                                            \ 
-    } 
-#else 
-#define GOMP_CRITICAL_SWP_WRK(FLAG) 
-#endif /* KMP_GOMP_COMPAT */ 
-// ------------------------------------------------------------------------ 
- 
-#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG)           \ 
-ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                        \ 
-    TYPE tmp;                                                             \ 
-    GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                      \ 
-    CRITICAL_SWP_WRK(LCK_ID)                                              \ 
-} 
-// The end of workaround for cmplx4 
- 
- 
-ATOMIC_CRITICAL_SWP( float10, long double, 10r,   1 )              // __kmpc_atomic_float10_swp 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r,   1 )              // __kmpc_atomic_float16_swp 
-#endif 
-// cmplx4 routine to return void 
-ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp 
- 
-//ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp 
- 
- 
-ATOMIC_CRITICAL_SWP( cmplx8,  kmp_cmplx64, 16c,   1 )              // __kmpc_atomic_cmplx8_swp 
-ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c,   1 )              // __kmpc_atomic_cmplx10_swp 
-#if KMP_HAVE_QUAD 
-ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c,   1 )              // __kmpc_atomic_cmplx16_swp 
-#if ( KMP_ARCH_X86 ) 
-    ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t,         16r, 1 )  // __kmpc_atomic_float16_a16_swp 
-    ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 )  // __kmpc_atomic_cmplx16_a16_swp 
-#endif 
-#endif 
- 
- 
-// End of OpenMP 4.0 Capture 
- 
-#endif //OMP_40_ENABLED 
- 
-#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
- 
-#undef OP_CRITICAL 
- 
-/* ------------------------------------------------------------------------ */ 
-/* Generic atomic routines                                                  */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    if ( 
-#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 
-        FALSE                                   /* must use lock */ 
-#else 
-        TRUE 
-#endif 
-	) 
-    { 
-	kmp_int8 old_value, new_value; 
- 
-	old_value = *(kmp_int8 *) lhs; 
-	(*f)( &new_value, &old_value, rhs ); 
- 
-	/* TODO: Should this be acquire or release? */ 
-	while ( !  KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs, 
-		    		*(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) ) 
-	{ 
-	    KMP_CPU_PAUSE(); 
- 
-	    old_value = *(kmp_int8 *) lhs; 
-	    (*f)( &new_value, &old_value, rhs ); 
-	} 
- 
-	return; 
-    } 
-    else { 
-        // 
-        // All 1-byte data is of integer data type. 
-        // 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid ); 
- 
-	(*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid ); 
-    } 
-} 
- 
-void 
-__kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    if ( 
-#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 
-        FALSE                                   /* must use lock */ 
-#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 
-	TRUE					/* no alignment problems */ 
-#else 
-	! ( (kmp_uintptr_t) lhs & 0x1)		/* make sure address is 2-byte aligned */ 
-#endif 
-	) 
-    { 
-	kmp_int16 old_value, new_value; 
- 
-	old_value = *(kmp_int16 *) lhs; 
-	(*f)( &new_value, &old_value, rhs ); 
- 
-	/* TODO: Should this be acquire or release? */ 
-	while ( !  KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs, 
-		    		*(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) ) 
-	{ 
-	    KMP_CPU_PAUSE(); 
- 
-	    old_value = *(kmp_int16 *) lhs; 
-	    (*f)( &new_value, &old_value, rhs ); 
-	} 
- 
-	return; 
-    } 
-    else { 
-        // 
-        // All 2-byte data is of integer data type. 
-        // 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid ); 
- 
-	(*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid ); 
-    } 
-} 
- 
-void 
-__kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    if ( 
-        // 
-        // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 
-        // Gomp compatibility is broken if this routine is called for floats. 
-        // 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-	TRUE					/* no alignment problems */ 
-#else 
-	! ( (kmp_uintptr_t) lhs & 0x3)		/* make sure address is 4-byte aligned */ 
-#endif 
-	) 
-    { 
-	kmp_int32 old_value, new_value; 
- 
-	old_value = *(kmp_int32 *) lhs; 
-	(*f)( &new_value, &old_value, rhs ); 
- 
-	/* TODO: Should this be acquire or release? */ 
-	while ( !  KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs, 
-		    		*(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) ) 
-	{ 
-	    KMP_CPU_PAUSE(); 
- 
-	    old_value = *(kmp_int32 *) lhs; 
-	    (*f)( &new_value, &old_value, rhs ); 
-	} 
- 
-	return; 
-    } 
-    else { 
-        // 
-        // Use __kmp_atomic_lock_4i for all 4-byte data, 
-        // even if it isn't of integer data type. 
-        // 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid ); 
- 
-	(*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid ); 
-    } 
-} 
- 
-void 
-__kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    if ( 
- 
-#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 
-        FALSE                                   /* must use lock */ 
-#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 
-	TRUE					/* no alignment problems */ 
-#else 
-	! ( (kmp_uintptr_t) lhs & 0x7)		/* make sure address is 8-byte aligned */ 
-#endif 
-	) 
-    { 
-	kmp_int64 old_value, new_value; 
- 
-	old_value = *(kmp_int64 *) lhs; 
-	(*f)( &new_value, &old_value, rhs ); 
-	/* TODO: Should this be acquire or release? */ 
-	while ( !  KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs, 
-					       *(kmp_int64 *) &old_value, 
-					       *(kmp_int64 *) &new_value ) ) 
-	{ 
-	    KMP_CPU_PAUSE(); 
- 
-	    old_value = *(kmp_int64 *) lhs; 
-	    (*f)( &new_value, &old_value, rhs ); 
-	} 
- 
-	return; 
-    } else { 
-        // 
-        // Use __kmp_atomic_lock_8i for all 8-byte data, 
-        // even if it isn't of integer data type. 
-        // 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid ); 
- 
-	(*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if ( __kmp_atomic_mode == 2 ) { 
-            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-	__kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid ); 
-    } 
-} 
- 
-void 
-__kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid ); 
- 
-    (*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid ); 
-} 
- 
-void 
-__kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid ); 
- 
-    (*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid ); 
-} 
- 
-void 
-__kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid ); 
- 
-    (*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid ); 
-} 
- 
-void 
-__kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid ); 
- 
-    (*f)( lhs, lhs, rhs ); 
- 
-#ifdef KMP_GOMP_COMPAT 
-    if ( __kmp_atomic_mode == 2 ) { 
-        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 
-    } 
-    else 
-#endif /* KMP_GOMP_COMPAT */ 
-    __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid ); 
-} 
- 
-// AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler 
-//     duplicated in order to not use 3-party names in pure Intel code 
-// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 
-void 
-__kmpc_atomic_start(void) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 
-    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 
-} 
- 
- 
-void 
-__kmpc_atomic_end(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 
-    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
-/*! 
-@} 
-*/ 
- 
-// end of file 
+/*
+ * kmp_atomic.c -- ATOMIC implementation routines
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp_atomic.h"
+#include "kmp.h"                  // TRUE, asm routines prototypes
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+
+/*!
+@defgroup ATOMIC_OPS Atomic Operations
+These functions are used for implementing the many different varieties of atomic operations.
+
+The compiler is at liberty to inline atomic operations that are naturally supported
+by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined
+@code
+static int s = 0;
+#pragma omp atomic
+    s++;
+@endcode
+using the single instruction: `lock; incl s`
+
+However the runtime does provide entrypoints for these operations to support compilers that choose
+not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the
+increment above.)
+
+The names of the functions are encoded by using the data type name and the operation name, as in these tables.
+
+Data Type  | Data type encoding
+-----------|---------------
+int8_t     | `fixed1`
+uint8_t    | `fixed1u`
+int16_t    | `fixed2`
+uint16_t   | `fixed2u`
+int32_t    | `fixed4`
+uint32_t   | `fixed4u`
+int32_t    | `fixed8`
+uint32_t   | `fixed8u`
+float      | `float4`
+double     | `float8`
+float 10 (8087 eighty bit float)  | `float10`
+complex<float>   |  `cmplx4`
+complex<double>  | `cmplx8`
+complex<float10> | `cmplx10`
+<br>
+
+Operation | Operation encoding
+----------|-------------------
++ | add
+- | sub
+\* | mul
+/ | div
+& | andb
+<< | shl
+\>\> | shr
+\| | orb
+^  | xor
+&& | andl
+\|\| | orl
+maximum | max
+minimum | min
+.eqv.   | eqv
+.neqv.  | neqv
+
+<br>
+For non-commutative operations, `_rev` can also be added for the reversed operation.
+For the functions that capture the result, the suffix `_cpt` is added.
+
+Update Functions
+================
+The general form of an atomic function that just performs an update (without a `capture`)
+@code
+void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs );
+@endcode
+@param ident_t  a pointer to source location
+@param gtid  the global thread id
+@param lhs   a pointer to the left operand
+@param rhs   the right operand
+
+`capture` functions
+===================
+The capture functions perform an atomic update and return a result, which is either the value
+before the capture, or that after. They take an additional argument to determine which result is returned.
+Their general form is therefore
+@code
+TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag );
+@endcode
+@param ident_t  a pointer to source location
+@param gtid  the global thread id
+@param lhs   a pointer to the left operand
+@param rhs   the right operand
+@param flag  one if the result is to be captured *after* the operation, zero if captured *before*.
+
+The one set of exceptions to this is the `complex<float>` type where the value is not returned,
+rather an extra argument pointer is passed.
+
+They look like
+@code
+void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
+@endcode
+
+Read and Write Operations
+=========================
+The OpenMP<sup>*</sup> standard now supports atomic operations that simply ensure that the
+value is read or written atomically, with no modification
+performed. In many cases on IA-32 architecture these operations can be inlined since
+the architecture guarantees that no tearing occurs on aligned objects
+accessed with a single memory operation of up to 64 bits in size.
+
+The general form of the read operations is
+@code
+TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
+@endcode
+
+For the write operations the form is
+@code
+void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs );
+@endcode
+
+Full list of functions
+======================
+This leads to the generation of 376 atomic functions, as follows.
+
+Functons for integers
+---------------------
+There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters).
+@code
+    __kmpc_atomic_fixed1_add
+    __kmpc_atomic_fixed1_add_cpt
+    __kmpc_atomic_fixed1_add_fp
+    __kmpc_atomic_fixed1_andb
+    __kmpc_atomic_fixed1_andb_cpt
+    __kmpc_atomic_fixed1_andl
+    __kmpc_atomic_fixed1_andl_cpt
+    __kmpc_atomic_fixed1_div
+    __kmpc_atomic_fixed1_div_cpt
+    __kmpc_atomic_fixed1_div_cpt_rev
+    __kmpc_atomic_fixed1_div_float8
+    __kmpc_atomic_fixed1_div_fp
+    __kmpc_atomic_fixed1_div_rev
+    __kmpc_atomic_fixed1_eqv
+    __kmpc_atomic_fixed1_eqv_cpt
+    __kmpc_atomic_fixed1_max
+    __kmpc_atomic_fixed1_max_cpt
+    __kmpc_atomic_fixed1_min
+    __kmpc_atomic_fixed1_min_cpt
+    __kmpc_atomic_fixed1_mul
+    __kmpc_atomic_fixed1_mul_cpt
+    __kmpc_atomic_fixed1_mul_float8
+    __kmpc_atomic_fixed1_mul_fp
+    __kmpc_atomic_fixed1_neqv
+    __kmpc_atomic_fixed1_neqv_cpt
+    __kmpc_atomic_fixed1_orb
+    __kmpc_atomic_fixed1_orb_cpt
+    __kmpc_atomic_fixed1_orl
+    __kmpc_atomic_fixed1_orl_cpt
+    __kmpc_atomic_fixed1_rd
+    __kmpc_atomic_fixed1_shl
+    __kmpc_atomic_fixed1_shl_cpt
+    __kmpc_atomic_fixed1_shl_cpt_rev
+    __kmpc_atomic_fixed1_shl_rev
+    __kmpc_atomic_fixed1_shr
+    __kmpc_atomic_fixed1_shr_cpt
+    __kmpc_atomic_fixed1_shr_cpt_rev
+    __kmpc_atomic_fixed1_shr_rev
+    __kmpc_atomic_fixed1_sub
+    __kmpc_atomic_fixed1_sub_cpt
+    __kmpc_atomic_fixed1_sub_cpt_rev
+    __kmpc_atomic_fixed1_sub_fp
+    __kmpc_atomic_fixed1_sub_rev
+    __kmpc_atomic_fixed1_swp
+    __kmpc_atomic_fixed1_wr
+    __kmpc_atomic_fixed1_xor
+    __kmpc_atomic_fixed1_xor_cpt
+    __kmpc_atomic_fixed1u_div
+    __kmpc_atomic_fixed1u_div_cpt
+    __kmpc_atomic_fixed1u_div_cpt_rev
+    __kmpc_atomic_fixed1u_div_fp
+    __kmpc_atomic_fixed1u_div_rev
+    __kmpc_atomic_fixed1u_shr
+    __kmpc_atomic_fixed1u_shr_cpt
+    __kmpc_atomic_fixed1u_shr_cpt_rev
+    __kmpc_atomic_fixed1u_shr_rev
+    __kmpc_atomic_fixed2_add
+    __kmpc_atomic_fixed2_add_cpt
+    __kmpc_atomic_fixed2_add_fp
+    __kmpc_atomic_fixed2_andb
+    __kmpc_atomic_fixed2_andb_cpt
+    __kmpc_atomic_fixed2_andl
+    __kmpc_atomic_fixed2_andl_cpt
+    __kmpc_atomic_fixed2_div
+    __kmpc_atomic_fixed2_div_cpt
+    __kmpc_atomic_fixed2_div_cpt_rev
+    __kmpc_atomic_fixed2_div_float8
+    __kmpc_atomic_fixed2_div_fp
+    __kmpc_atomic_fixed2_div_rev
+    __kmpc_atomic_fixed2_eqv
+    __kmpc_atomic_fixed2_eqv_cpt
+    __kmpc_atomic_fixed2_max
+    __kmpc_atomic_fixed2_max_cpt
+    __kmpc_atomic_fixed2_min
+    __kmpc_atomic_fixed2_min_cpt
+    __kmpc_atomic_fixed2_mul
+    __kmpc_atomic_fixed2_mul_cpt
+    __kmpc_atomic_fixed2_mul_float8
+    __kmpc_atomic_fixed2_mul_fp
+    __kmpc_atomic_fixed2_neqv
+    __kmpc_atomic_fixed2_neqv_cpt
+    __kmpc_atomic_fixed2_orb
+    __kmpc_atomic_fixed2_orb_cpt
+    __kmpc_atomic_fixed2_orl
+    __kmpc_atomic_fixed2_orl_cpt
+    __kmpc_atomic_fixed2_rd
+    __kmpc_atomic_fixed2_shl
+    __kmpc_atomic_fixed2_shl_cpt
+    __kmpc_atomic_fixed2_shl_cpt_rev
+    __kmpc_atomic_fixed2_shl_rev
+    __kmpc_atomic_fixed2_shr
+    __kmpc_atomic_fixed2_shr_cpt
+    __kmpc_atomic_fixed2_shr_cpt_rev
+    __kmpc_atomic_fixed2_shr_rev
+    __kmpc_atomic_fixed2_sub
+    __kmpc_atomic_fixed2_sub_cpt
+    __kmpc_atomic_fixed2_sub_cpt_rev
+    __kmpc_atomic_fixed2_sub_fp
+    __kmpc_atomic_fixed2_sub_rev
+    __kmpc_atomic_fixed2_swp
+    __kmpc_atomic_fixed2_wr
+    __kmpc_atomic_fixed2_xor
+    __kmpc_atomic_fixed2_xor_cpt
+    __kmpc_atomic_fixed2u_div
+    __kmpc_atomic_fixed2u_div_cpt
+    __kmpc_atomic_fixed2u_div_cpt_rev
+    __kmpc_atomic_fixed2u_div_fp
+    __kmpc_atomic_fixed2u_div_rev
+    __kmpc_atomic_fixed2u_shr
+    __kmpc_atomic_fixed2u_shr_cpt
+    __kmpc_atomic_fixed2u_shr_cpt_rev
+    __kmpc_atomic_fixed2u_shr_rev
+    __kmpc_atomic_fixed4_add
+    __kmpc_atomic_fixed4_add_cpt
+    __kmpc_atomic_fixed4_add_fp
+    __kmpc_atomic_fixed4_andb
+    __kmpc_atomic_fixed4_andb_cpt
+    __kmpc_atomic_fixed4_andl
+    __kmpc_atomic_fixed4_andl_cpt
+    __kmpc_atomic_fixed4_div
+    __kmpc_atomic_fixed4_div_cpt
+    __kmpc_atomic_fixed4_div_cpt_rev
+    __kmpc_atomic_fixed4_div_float8
+    __kmpc_atomic_fixed4_div_fp
+    __kmpc_atomic_fixed4_div_rev
+    __kmpc_atomic_fixed4_eqv
+    __kmpc_atomic_fixed4_eqv_cpt
+    __kmpc_atomic_fixed4_max
+    __kmpc_atomic_fixed4_max_cpt
+    __kmpc_atomic_fixed4_min
+    __kmpc_atomic_fixed4_min_cpt
+    __kmpc_atomic_fixed4_mul
+    __kmpc_atomic_fixed4_mul_cpt
+    __kmpc_atomic_fixed4_mul_float8
+    __kmpc_atomic_fixed4_mul_fp
+    __kmpc_atomic_fixed4_neqv
+    __kmpc_atomic_fixed4_neqv_cpt
+    __kmpc_atomic_fixed4_orb
+    __kmpc_atomic_fixed4_orb_cpt
+    __kmpc_atomic_fixed4_orl
+    __kmpc_atomic_fixed4_orl_cpt
+    __kmpc_atomic_fixed4_rd
+    __kmpc_atomic_fixed4_shl
+    __kmpc_atomic_fixed4_shl_cpt
+    __kmpc_atomic_fixed4_shl_cpt_rev
+    __kmpc_atomic_fixed4_shl_rev
+    __kmpc_atomic_fixed4_shr
+    __kmpc_atomic_fixed4_shr_cpt
+    __kmpc_atomic_fixed4_shr_cpt_rev
+    __kmpc_atomic_fixed4_shr_rev
+    __kmpc_atomic_fixed4_sub
+    __kmpc_atomic_fixed4_sub_cpt
+    __kmpc_atomic_fixed4_sub_cpt_rev
+    __kmpc_atomic_fixed4_sub_fp
+    __kmpc_atomic_fixed4_sub_rev
+    __kmpc_atomic_fixed4_swp
+    __kmpc_atomic_fixed4_wr
+    __kmpc_atomic_fixed4_xor
+    __kmpc_atomic_fixed4_xor_cpt
+    __kmpc_atomic_fixed4u_div
+    __kmpc_atomic_fixed4u_div_cpt
+    __kmpc_atomic_fixed4u_div_cpt_rev
+    __kmpc_atomic_fixed4u_div_fp
+    __kmpc_atomic_fixed4u_div_rev
+    __kmpc_atomic_fixed4u_shr
+    __kmpc_atomic_fixed4u_shr_cpt
+    __kmpc_atomic_fixed4u_shr_cpt_rev
+    __kmpc_atomic_fixed4u_shr_rev
+    __kmpc_atomic_fixed8_add
+    __kmpc_atomic_fixed8_add_cpt
+    __kmpc_atomic_fixed8_add_fp
+    __kmpc_atomic_fixed8_andb
+    __kmpc_atomic_fixed8_andb_cpt
+    __kmpc_atomic_fixed8_andl
+    __kmpc_atomic_fixed8_andl_cpt
+    __kmpc_atomic_fixed8_div
+    __kmpc_atomic_fixed8_div_cpt
+    __kmpc_atomic_fixed8_div_cpt_rev
+    __kmpc_atomic_fixed8_div_float8
+    __kmpc_atomic_fixed8_div_fp
+    __kmpc_atomic_fixed8_div_rev
+    __kmpc_atomic_fixed8_eqv
+    __kmpc_atomic_fixed8_eqv_cpt
+    __kmpc_atomic_fixed8_max
+    __kmpc_atomic_fixed8_max_cpt
+    __kmpc_atomic_fixed8_min
+    __kmpc_atomic_fixed8_min_cpt
+    __kmpc_atomic_fixed8_mul
+    __kmpc_atomic_fixed8_mul_cpt
+    __kmpc_atomic_fixed8_mul_float8
+    __kmpc_atomic_fixed8_mul_fp
+    __kmpc_atomic_fixed8_neqv
+    __kmpc_atomic_fixed8_neqv_cpt
+    __kmpc_atomic_fixed8_orb
+    __kmpc_atomic_fixed8_orb_cpt
+    __kmpc_atomic_fixed8_orl
+    __kmpc_atomic_fixed8_orl_cpt
+    __kmpc_atomic_fixed8_rd
+    __kmpc_atomic_fixed8_shl
+    __kmpc_atomic_fixed8_shl_cpt
+    __kmpc_atomic_fixed8_shl_cpt_rev
+    __kmpc_atomic_fixed8_shl_rev
+    __kmpc_atomic_fixed8_shr
+    __kmpc_atomic_fixed8_shr_cpt
+    __kmpc_atomic_fixed8_shr_cpt_rev
+    __kmpc_atomic_fixed8_shr_rev
+    __kmpc_atomic_fixed8_sub
+    __kmpc_atomic_fixed8_sub_cpt
+    __kmpc_atomic_fixed8_sub_cpt_rev
+    __kmpc_atomic_fixed8_sub_fp
+    __kmpc_atomic_fixed8_sub_rev
+    __kmpc_atomic_fixed8_swp
+    __kmpc_atomic_fixed8_wr
+    __kmpc_atomic_fixed8_xor
+    __kmpc_atomic_fixed8_xor_cpt
+    __kmpc_atomic_fixed8u_div
+    __kmpc_atomic_fixed8u_div_cpt
+    __kmpc_atomic_fixed8u_div_cpt_rev
+    __kmpc_atomic_fixed8u_div_fp
+    __kmpc_atomic_fixed8u_div_rev
+    __kmpc_atomic_fixed8u_shr
+    __kmpc_atomic_fixed8u_shr_cpt
+    __kmpc_atomic_fixed8u_shr_cpt_rev
+    __kmpc_atomic_fixed8u_shr_rev
+@endcode
+
+Functions for floating point
+----------------------------
+There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes.
+(Ten byte floats are used by X87, but are now rare).
+@code
+    __kmpc_atomic_float4_add
+    __kmpc_atomic_float4_add_cpt
+    __kmpc_atomic_float4_add_float8
+    __kmpc_atomic_float4_add_fp
+    __kmpc_atomic_float4_div
+    __kmpc_atomic_float4_div_cpt
+    __kmpc_atomic_float4_div_cpt_rev
+    __kmpc_atomic_float4_div_float8
+    __kmpc_atomic_float4_div_fp
+    __kmpc_atomic_float4_div_rev
+    __kmpc_atomic_float4_max
+    __kmpc_atomic_float4_max_cpt
+    __kmpc_atomic_float4_min
+    __kmpc_atomic_float4_min_cpt
+    __kmpc_atomic_float4_mul
+    __kmpc_atomic_float4_mul_cpt
+    __kmpc_atomic_float4_mul_float8
+    __kmpc_atomic_float4_mul_fp
+    __kmpc_atomic_float4_rd
+    __kmpc_atomic_float4_sub
+    __kmpc_atomic_float4_sub_cpt
+    __kmpc_atomic_float4_sub_cpt_rev
+    __kmpc_atomic_float4_sub_float8
+    __kmpc_atomic_float4_sub_fp
+    __kmpc_atomic_float4_sub_rev
+    __kmpc_atomic_float4_swp
+    __kmpc_atomic_float4_wr
+    __kmpc_atomic_float8_add
+    __kmpc_atomic_float8_add_cpt
+    __kmpc_atomic_float8_add_fp
+    __kmpc_atomic_float8_div
+    __kmpc_atomic_float8_div_cpt
+    __kmpc_atomic_float8_div_cpt_rev
+    __kmpc_atomic_float8_div_fp
+    __kmpc_atomic_float8_div_rev
+    __kmpc_atomic_float8_max
+    __kmpc_atomic_float8_max_cpt
+    __kmpc_atomic_float8_min
+    __kmpc_atomic_float8_min_cpt
+    __kmpc_atomic_float8_mul
+    __kmpc_atomic_float8_mul_cpt
+    __kmpc_atomic_float8_mul_fp
+    __kmpc_atomic_float8_rd
+    __kmpc_atomic_float8_sub
+    __kmpc_atomic_float8_sub_cpt
+    __kmpc_atomic_float8_sub_cpt_rev
+    __kmpc_atomic_float8_sub_fp
+    __kmpc_atomic_float8_sub_rev
+    __kmpc_atomic_float8_swp
+    __kmpc_atomic_float8_wr
+    __kmpc_atomic_float10_add
+    __kmpc_atomic_float10_add_cpt
+    __kmpc_atomic_float10_add_fp
+    __kmpc_atomic_float10_div
+    __kmpc_atomic_float10_div_cpt
+    __kmpc_atomic_float10_div_cpt_rev
+    __kmpc_atomic_float10_div_fp
+    __kmpc_atomic_float10_div_rev
+    __kmpc_atomic_float10_mul
+    __kmpc_atomic_float10_mul_cpt
+    __kmpc_atomic_float10_mul_fp
+    __kmpc_atomic_float10_rd
+    __kmpc_atomic_float10_sub
+    __kmpc_atomic_float10_sub_cpt
+    __kmpc_atomic_float10_sub_cpt_rev
+    __kmpc_atomic_float10_sub_fp
+    __kmpc_atomic_float10_sub_rev
+    __kmpc_atomic_float10_swp
+    __kmpc_atomic_float10_wr
+    __kmpc_atomic_float16_add
+    __kmpc_atomic_float16_add_cpt
+    __kmpc_atomic_float16_div
+    __kmpc_atomic_float16_div_cpt
+    __kmpc_atomic_float16_div_cpt_rev
+    __kmpc_atomic_float16_div_rev
+    __kmpc_atomic_float16_max
+    __kmpc_atomic_float16_max_cpt
+    __kmpc_atomic_float16_min
+    __kmpc_atomic_float16_min_cpt
+    __kmpc_atomic_float16_mul
+    __kmpc_atomic_float16_mul_cpt
+    __kmpc_atomic_float16_rd
+    __kmpc_atomic_float16_sub
+    __kmpc_atomic_float16_sub_cpt
+    __kmpc_atomic_float16_sub_cpt_rev
+    __kmpc_atomic_float16_sub_rev
+    __kmpc_atomic_float16_swp
+    __kmpc_atomic_float16_wr
+@endcode
+
+Functions for Complex types
+---------------------------
+Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes.
+The names here are based on the size of the component float, *not* the size of the complex type. So
+`__kmpc_atomc_cmplx8_add` is an operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
+
+@code
+    __kmpc_atomic_cmplx4_add
+    __kmpc_atomic_cmplx4_add_cmplx8
+    __kmpc_atomic_cmplx4_add_cpt
+    __kmpc_atomic_cmplx4_div
+    __kmpc_atomic_cmplx4_div_cmplx8
+    __kmpc_atomic_cmplx4_div_cpt
+    __kmpc_atomic_cmplx4_div_cpt_rev
+    __kmpc_atomic_cmplx4_div_rev
+    __kmpc_atomic_cmplx4_mul
+    __kmpc_atomic_cmplx4_mul_cmplx8
+    __kmpc_atomic_cmplx4_mul_cpt
+    __kmpc_atomic_cmplx4_rd
+    __kmpc_atomic_cmplx4_sub
+    __kmpc_atomic_cmplx4_sub_cmplx8
+    __kmpc_atomic_cmplx4_sub_cpt
+    __kmpc_atomic_cmplx4_sub_cpt_rev
+    __kmpc_atomic_cmplx4_sub_rev
+    __kmpc_atomic_cmplx4_swp
+    __kmpc_atomic_cmplx4_wr
+    __kmpc_atomic_cmplx8_add
+    __kmpc_atomic_cmplx8_add_cpt
+    __kmpc_atomic_cmplx8_div
+    __kmpc_atomic_cmplx8_div_cpt
+    __kmpc_atomic_cmplx8_div_cpt_rev
+    __kmpc_atomic_cmplx8_div_rev
+    __kmpc_atomic_cmplx8_mul
+    __kmpc_atomic_cmplx8_mul_cpt
+    __kmpc_atomic_cmplx8_rd
+    __kmpc_atomic_cmplx8_sub
+    __kmpc_atomic_cmplx8_sub_cpt
+    __kmpc_atomic_cmplx8_sub_cpt_rev
+    __kmpc_atomic_cmplx8_sub_rev
+    __kmpc_atomic_cmplx8_swp
+    __kmpc_atomic_cmplx8_wr
+    __kmpc_atomic_cmplx10_add
+    __kmpc_atomic_cmplx10_add_cpt
+    __kmpc_atomic_cmplx10_div
+    __kmpc_atomic_cmplx10_div_cpt
+    __kmpc_atomic_cmplx10_div_cpt_rev
+    __kmpc_atomic_cmplx10_div_rev
+    __kmpc_atomic_cmplx10_mul
+    __kmpc_atomic_cmplx10_mul_cpt
+    __kmpc_atomic_cmplx10_rd
+    __kmpc_atomic_cmplx10_sub
+    __kmpc_atomic_cmplx10_sub_cpt
+    __kmpc_atomic_cmplx10_sub_cpt_rev
+    __kmpc_atomic_cmplx10_sub_rev
+    __kmpc_atomic_cmplx10_swp
+    __kmpc_atomic_cmplx10_wr
+    __kmpc_atomic_cmplx16_add
+    __kmpc_atomic_cmplx16_add_cpt
+    __kmpc_atomic_cmplx16_div
+    __kmpc_atomic_cmplx16_div_cpt
+    __kmpc_atomic_cmplx16_div_cpt_rev
+    __kmpc_atomic_cmplx16_div_rev
+    __kmpc_atomic_cmplx16_mul
+    __kmpc_atomic_cmplx16_mul_cpt
+    __kmpc_atomic_cmplx16_rd
+    __kmpc_atomic_cmplx16_sub
+    __kmpc_atomic_cmplx16_sub_cpt
+    __kmpc_atomic_cmplx16_sub_cpt_rev
+    __kmpc_atomic_cmplx16_swp
+    __kmpc_atomic_cmplx16_wr
+@endcode
+*/
+
+/*!
+@ingroup ATOMIC_OPS
+@{
+*/
+
+/*
+ * Global vars
+ */
+
+#ifndef KMP_GOMP_COMPAT
+int __kmp_atomic_mode = 1;      // Intel perf
+#else
+int __kmp_atomic_mode = 2;      // GOMP compatibility
+#endif /* KMP_GOMP_COMPAT */
+
+KMP_ALIGN(128)
+
+kmp_atomic_lock_t __kmp_atomic_lock;     /* Control access to all user coded atomics in Gnu compat mode   */
+kmp_atomic_lock_t __kmp_atomic_lock_1i;  /* Control access to all user coded atomics for 1-byte fixed data types */
+kmp_atomic_lock_t __kmp_atomic_lock_2i;  /* Control access to all user coded atomics for 2-byte fixed data types */
+kmp_atomic_lock_t __kmp_atomic_lock_4i;  /* Control access to all user coded atomics for 4-byte fixed data types */
+kmp_atomic_lock_t __kmp_atomic_lock_4r;  /* Control access to all user coded atomics for kmp_real32 data type    */
+kmp_atomic_lock_t __kmp_atomic_lock_8i;  /* Control access to all user coded atomics for 8-byte fixed data types */
+kmp_atomic_lock_t __kmp_atomic_lock_8r;  /* Control access to all user coded atomics for kmp_real64 data type    */
+kmp_atomic_lock_t __kmp_atomic_lock_8c;  /* Control access to all user coded atomics for complex byte data type  */
+kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type   */
+kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type         */
+kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
+kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
+kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
+
+
+/*
+  2007-03-02:
+  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
+  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
+  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
+  routines in assembler language.
+*/
+#define KMP_ATOMIC_VOLATILE volatile
+
+#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
+
+    static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
+    static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
+    static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
+    static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
+    static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
+    static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
+
+    static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
+    static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
+    static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
+    static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
+    static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
+    static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
+
+    static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
+    static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
+    static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
+    static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
+
+    static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
+    static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
+    static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
+    static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
+
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ATOMIC implementation routines                                           */
+/* one routine for each operation and operand type                          */
+/* ------------------------------------------------------------------------ */
+
+// All routines declarations looks like
+// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
+// ------------------------------------------------------------------------
+
+#define KMP_CHECK_GTID                                                    \
+    if ( gtid == KMP_GTID_UNKNOWN ) {                                     \
+        gtid = __kmp_entry_gtid();                                        \
+    } // check and get gtid when needed
+
+// Beginning of a definition (provides name, parameters, gebug trace)
+//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operands' type
+#define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
+RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
+{                                                                                         \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
+
+// ------------------------------------------------------------------------
+// Lock variables used for critical sections for various size operands
+#define ATOMIC_LOCK0   __kmp_atomic_lock       // all types, for Gnu compat
+#define ATOMIC_LOCK1i  __kmp_atomic_lock_1i    // char
+#define ATOMIC_LOCK2i  __kmp_atomic_lock_2i    // short
+#define ATOMIC_LOCK4i  __kmp_atomic_lock_4i    // long int
+#define ATOMIC_LOCK4r  __kmp_atomic_lock_4r    // float
+#define ATOMIC_LOCK8i  __kmp_atomic_lock_8i    // long long int
+#define ATOMIC_LOCK8r  __kmp_atomic_lock_8r    // double
+#define ATOMIC_LOCK8c  __kmp_atomic_lock_8c    // float complex
+#define ATOMIC_LOCK10r __kmp_atomic_lock_10r   // long double
+#define ATOMIC_LOCK16r __kmp_atomic_lock_16r   // _Quad
+#define ATOMIC_LOCK16c __kmp_atomic_lock_16c   // double complex
+#define ATOMIC_LOCK20c __kmp_atomic_lock_20c   // long double complex
+#define ATOMIC_LOCK32c __kmp_atomic_lock_32c   // _Quad complex
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs bound by critical section
+//     OP     - operator (it's supposed to contain an assignment)
+//     LCK_ID - lock identifier
+// Note: don't check gtid as it should always be valid
+// 1, 2-byte - expect valid parameter, other - check before this macro
+#define OP_CRITICAL(OP,LCK_ID) \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
+                                                                          \
+    (*lhs) OP (rhs);                                                      \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
+
+// ------------------------------------------------------------------------
+// For GNU compatibility, we may need to use a critical section,
+// even though it is not required by the ISA.
+//
+// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
+// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
+// critical section.  On Intel(R) 64, all atomic operations are done with fetch
+// and add or compare and exchange.  Therefore, the FLAG parameter to this
+// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
+// require a critical section, where we predict that they will be implemented
+// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
+//
+// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
+// the FLAG parameter should always be 1.  If we know that we will be using
+// a critical section, then we want to make certain that we use the generic
+// lock __kmp_atomic_lock to protect the atomic update, and not of of the
+// locks that are specialized based upon the size or type of the data.
+//
+// If FLAG is 0, then we are relying on dead code elimination by the build
+// compiler to get rid of the useless block of code, and save a needless
+// branch at runtime.
+//
+
+#ifdef KMP_GOMP_COMPAT
+# define OP_GOMP_CRITICAL(OP,FLAG)                                        \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL( OP, 0 );                                             \
+        return;                                                           \
+    }
+# else
+# define OP_GOMP_CRITICAL(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+#if KMP_MIC
+# define KMP_DO_PAUSE _mm_delay_32( 1 )
+#else
+# define KMP_DO_PAUSE KMP_CPU_PAUSE()
+#endif /* KMP_MIC */
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs using "compare_and_store" routine
+//     TYPE    - operands' type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator
+#define OP_CMPXCHG(TYPE,BITS,OP)                                          \
+    {                                                                     \
+        TYPE old_value, new_value;                                        \
+        old_value = *(TYPE volatile *)lhs;                                \
+        new_value = old_value OP rhs;                                     \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
+                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
+        {                                                                 \
+                KMP_DO_PAUSE;                                             \
+                                                                          \
+            old_value = *(TYPE volatile *)lhs;                            \
+            new_value = old_value OP rhs;                                 \
+        }                                                                 \
+    }
+
+#if USE_CMPXCHG_FIX
+// 2007-06-25:
+// workaround for C78287 (complex(kind=4) data type)
+// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
+// Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
+// This is a problem of the compiler.
+// Related tracker is C76005, targeted to 11.0.
+// I verified the asm of the workaround.
+#define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                               \
+    {                                                                     \
+	struct _sss {                                                     \
+	    TYPE            cmp;                                          \
+	    kmp_int##BITS   *vvv;                                         \
+	};                                                                \
+        struct _sss old_value, new_value;                                 \
+        old_value.vvv = ( kmp_int##BITS * )&old_value.cmp;                \
+        new_value.vvv = ( kmp_int##BITS * )&new_value.cmp;                \
+        *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;              \
+        new_value.cmp = old_value.cmp OP rhs;                             \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,      \
+                      *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) )   \
+        {                                                                 \
+            KMP_DO_PAUSE;                                                 \
+                                                                          \
+	    *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;          \
+	    new_value.cmp = old_value.cmp OP rhs;                         \
+        }                                                                 \
+    }
+// end of the first part of the workaround for C78287
+#endif // USE_CMPXCHG_FIX
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+// ------------------------------------------------------------------------
+// X86 or X86_64: no alignment problems ====================================
+#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
+    /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
+    KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                                \
+}
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
+    OP_CMPXCHG(TYPE,BITS,OP)                                               \
+}
+#if USE_CMPXCHG_FIX
+// -------------------------------------------------------------------------
+// workaround for C78287 (complex(kind=4) data type)
+#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \
+    OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                               \
+}
+// end of the second part of the workaround for C78287
+#endif
+
+#else
+// -------------------------------------------------------------------------
+// Code for other architectures that don't handle unaligned accesses.
+#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
+        /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */  \
+        KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                            \
+    } else {                                                               \
+        KMP_CHECK_GTID;                                                    \
+        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
+    }                                                                      \
+}
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
+        OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                 \
+    } else {                                                               \
+        KMP_CHECK_GTID;                                                    \
+        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
+    }                                                                      \
+}
+#if USE_CMPXCHG_FIX
+// -------------------------------------------------------------------------
+// workaround for C78287 (complex(kind=4) data type)
+#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                      \
+        OP_CMPXCHG(TYPE,BITS,OP)             /* aligned address */                    \
+    } else {                                                                          \
+        KMP_CHECK_GTID;                                                               \
+        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */             \
+    }                                                                                 \
+}
+// end of the second part of the workaround for C78287
+#endif // USE_CMPXCHG_FIX
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+// Routines for ATOMIC 4-byte operands addition and subtraction
+ATOMIC_FIXED_ADD( fixed4, add, kmp_int32,  32, +, 4i, 3, 0            )  // __kmpc_atomic_fixed4_add
+ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32,  32, -, 4i, 3, 0            )  // __kmpc_atomic_fixed4_sub
+
+ATOMIC_CMPXCHG( float4,  add, kmp_real32, 32, +,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add
+ATOMIC_CMPXCHG( float4,  sub, kmp_real32, 32, -,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub
+
+// Routines for ATOMIC 8-byte operands addition and subtraction
+ATOMIC_FIXED_ADD( fixed8, add, kmp_int64,  64, +, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add
+ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64,  64, -, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub
+
+ATOMIC_CMPXCHG( float8,  add, kmp_real64, 64, +,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add
+ATOMIC_CMPXCHG( float8,  sub, kmp_real64, 64, -,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub
+
+// ------------------------------------------------------------------------
+// Entries definition for integer operands
+//     TYPE_ID - operands type and size (fixed4, float4)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operand type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator (used in critical section)
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+//     MASK    - used for alignment check
+
+//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
+// ------------------------------------------------------------------------
+// Routines for ATOMIC integer operands, other operators
+// ------------------------------------------------------------------------
+//              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
+ATOMIC_CMPXCHG( fixed1,  add, kmp_int8,    8, +,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add
+ATOMIC_CMPXCHG( fixed1, andb, kmp_int8,    8, &,  1i, 0, 0            )  // __kmpc_atomic_fixed1_andb
+ATOMIC_CMPXCHG( fixed1,  div, kmp_int8,    8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div
+ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8,   8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div
+ATOMIC_CMPXCHG( fixed1,  mul, kmp_int8,    8, *,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul
+ATOMIC_CMPXCHG( fixed1,  orb, kmp_int8,    8, |,  1i, 0, 0            )  // __kmpc_atomic_fixed1_orb
+ATOMIC_CMPXCHG( fixed1,  shl, kmp_int8,    8, <<, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl
+ATOMIC_CMPXCHG( fixed1,  shr, kmp_int8,    8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr
+ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8,   8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr
+ATOMIC_CMPXCHG( fixed1,  sub, kmp_int8,    8, -,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub
+ATOMIC_CMPXCHG( fixed1,  xor, kmp_int8,    8, ^,  1i, 0, 0            )  // __kmpc_atomic_fixed1_xor
+ATOMIC_CMPXCHG( fixed2,  add, kmp_int16,  16, +,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add
+ATOMIC_CMPXCHG( fixed2, andb, kmp_int16,  16, &,  2i, 1, 0            )  // __kmpc_atomic_fixed2_andb
+ATOMIC_CMPXCHG( fixed2,  div, kmp_int16,  16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div
+ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div
+ATOMIC_CMPXCHG( fixed2,  mul, kmp_int16,  16, *,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul
+ATOMIC_CMPXCHG( fixed2,  orb, kmp_int16,  16, |,  2i, 1, 0            )  // __kmpc_atomic_fixed2_orb
+ATOMIC_CMPXCHG( fixed2,  shl, kmp_int16,  16, <<, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl
+ATOMIC_CMPXCHG( fixed2,  shr, kmp_int16,  16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr
+ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr
+ATOMIC_CMPXCHG( fixed2,  sub, kmp_int16,  16, -,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub
+ATOMIC_CMPXCHG( fixed2,  xor, kmp_int16,  16, ^,  2i, 1, 0            )  // __kmpc_atomic_fixed2_xor
+ATOMIC_CMPXCHG( fixed4, andb, kmp_int32,  32, &,  4i, 3, 0            )  // __kmpc_atomic_fixed4_andb
+ATOMIC_CMPXCHG( fixed4,  div, kmp_int32,  32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div
+ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div
+ATOMIC_CMPXCHG( fixed4,  mul, kmp_int32,  32, *,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul
+ATOMIC_CMPXCHG( fixed4,  orb, kmp_int32,  32, |,  4i, 3, 0            )  // __kmpc_atomic_fixed4_orb
+ATOMIC_CMPXCHG( fixed4,  shl, kmp_int32,  32, <<, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl
+ATOMIC_CMPXCHG( fixed4,  shr, kmp_int32,  32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr
+ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr
+ATOMIC_CMPXCHG( fixed4,  xor, kmp_int32,  32, ^,  4i, 3, 0            )  // __kmpc_atomic_fixed4_xor
+ATOMIC_CMPXCHG( fixed8, andb, kmp_int64,  64, &,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb
+ATOMIC_CMPXCHG( fixed8,  div, kmp_int64,  64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div
+ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div
+ATOMIC_CMPXCHG( fixed8,  mul, kmp_int64,  64, *,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul
+ATOMIC_CMPXCHG( fixed8,  orb, kmp_int64,  64, |,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb
+ATOMIC_CMPXCHG( fixed8,  shl, kmp_int64,  64, <<, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl
+ATOMIC_CMPXCHG( fixed8,  shr, kmp_int64,  64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr
+ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr
+ATOMIC_CMPXCHG( fixed8,  xor, kmp_int64,  64, ^,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor
+ATOMIC_CMPXCHG( float4,  div, kmp_real32, 32, /,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div
+ATOMIC_CMPXCHG( float4,  mul, kmp_real32, 32, *,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul
+ATOMIC_CMPXCHG( float8,  div, kmp_real64, 64, /,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div
+ATOMIC_CMPXCHG( float8,  mul, kmp_real64, 64, *,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul
+//              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
+
+
+/* ------------------------------------------------------------------------ */
+/* Routines for C/C++ Reduction operators && and ||                         */
+/* ------------------------------------------------------------------------ */
+
+// ------------------------------------------------------------------------
+// Need separate macros for &&, || because there is no combined assignment
+//   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
+#define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)             \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \
+    OP_CRITICAL( = *lhs OP, LCK_ID )                                      \
+}
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+// ------------------------------------------------------------------------
+// X86 or X86_64: no alignment problems ===================================
+#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \
+    OP_CMPXCHG(TYPE,BITS,OP)                                              \
+}
+
+#else
+// ------------------------------------------------------------------------
+// Code for other architectures that don't handle unaligned accesses.
+#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG)                                 \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \
+        OP_CMPXCHG(TYPE,BITS,OP)       /* aligned address */              \
+    } else {                                                              \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL(= *lhs OP,LCK_ID)  /* unaligned - use critical */     \
+    }                                                                     \
+}
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+ATOMIC_CMPX_L( fixed1, andl, char,       8, &&, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl
+ATOMIC_CMPX_L( fixed1,  orl, char,       8, ||, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl
+ATOMIC_CMPX_L( fixed2, andl, short,     16, &&, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl
+ATOMIC_CMPX_L( fixed2,  orl, short,     16, ||, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl
+ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 )             // __kmpc_atomic_fixed4_andl
+ATOMIC_CMPX_L( fixed4,  orl, kmp_int32, 32, ||, 4i, 3, 0 )             // __kmpc_atomic_fixed4_orl
+ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl
+ATOMIC_CMPX_L( fixed8,  orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl
+
+
+/* ------------------------------------------------------------------------- */
+/* Routines for Fortran operators that matched no one in C:                  */
+/* MAX, MIN, .EQV., .NEQV.                                                   */
+/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
+/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
+/* ------------------------------------------------------------------------- */
+
+// -------------------------------------------------------------------------
+// MIN and MAX need separate macros
+// OP - operator to check if we need any actions?
+#define MIN_MAX_CRITSECT(OP,LCK_ID)                                        \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
+                                                                           \
+    if ( *lhs OP rhs ) {                 /* still need actions? */         \
+        *lhs = rhs;                                                        \
+    }                                                                      \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
+
+// -------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define GOMP_MIN_MAX_CRITSECT(OP,FLAG)                                     \
+    if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \
+        KMP_CHECK_GTID;                                                    \
+        MIN_MAX_CRITSECT( OP, 0 );                                         \
+        return;                                                            \
+    }
+#else
+#define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+// -------------------------------------------------------------------------
+#define MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \
+    {                                                                      \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \
+        TYPE old_value;                                                    \
+        temp_val = *lhs;                                                   \
+        old_value = temp_val;                                              \
+        while ( old_value OP rhs &&          /* still need actions? */     \
+            ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \
+                      *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \
+        {                                                                  \
+            KMP_CPU_PAUSE();                                               \
+            temp_val = *lhs;                                               \
+            old_value = temp_val;                                          \
+        }                                                                  \
+    }
+
+// -------------------------------------------------------------------------
+// 1-byte, 2-byte operands - use critical section
+#define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    if ( *lhs OP rhs ) {     /* need actions? */                           \
+        GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
+        MIN_MAX_CRITSECT(OP,LCK_ID)                                        \
+    }                                                                      \
+}
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+// -------------------------------------------------------------------------
+// X86 or X86_64: no alignment problems ====================================
+#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    if ( *lhs OP rhs ) {                                                   \
+        GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
+        MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \
+    }                                                                      \
+}
+
+#else
+// -------------------------------------------------------------------------
+// Code for other architectures that don't handle unaligned accesses.
+#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
+    if ( *lhs OP rhs ) {                                                   \
+        GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
+        if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                       \
+            MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */            \
+        } else {                                                           \
+            KMP_CHECK_GTID;                                                \
+            MIN_MAX_CRITSECT(OP,LCK_ID)   /* unaligned address */          \
+        }                                                                  \
+    }                                                                      \
+}
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+MIN_MAX_COMPXCHG( fixed1,  max, char,        8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
+MIN_MAX_COMPXCHG( fixed1,  min, char,        8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
+MIN_MAX_COMPXCHG( fixed2,  max, short,      16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
+MIN_MAX_COMPXCHG( fixed2,  min, short,      16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
+MIN_MAX_COMPXCHG( fixed4,  max, kmp_int32,  32, <, 4i, 3, 0 )            // __kmpc_atomic_fixed4_max
+MIN_MAX_COMPXCHG( fixed4,  min, kmp_int32,  32, >, 4i, 3, 0 )            // __kmpc_atomic_fixed4_min
+MIN_MAX_COMPXCHG( fixed8,  max, kmp_int64,  64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
+MIN_MAX_COMPXCHG( fixed8,  min, kmp_int64,  64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
+MIN_MAX_COMPXCHG( float4,  max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
+MIN_MAX_COMPXCHG( float4,  min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
+MIN_MAX_COMPXCHG( float8,  max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
+MIN_MAX_COMPXCHG( float8,  min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
+#if KMP_HAVE_QUAD
+MIN_MAX_CRITICAL( float16, max,     QUAD_LEGACY,      <, 16r,   1 )            // __kmpc_atomic_float16_max
+MIN_MAX_CRITICAL( float16, min,     QUAD_LEGACY,      >, 16r,   1 )            // __kmpc_atomic_float16_min
+#if ( KMP_ARCH_X86 )
+    MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t,     <, 16r,   1 )            // __kmpc_atomic_float16_max_a16
+    MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t,     >, 16r,   1 )            // __kmpc_atomic_float16_min_a16
+#endif
+#endif
+// ------------------------------------------------------------------------
+// Need separate macros for .EQV. because of the need of complement (~)
+// OP ignored for critical sections, ^=~ used instead
+#define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \
+    OP_CRITICAL(^=~,LCK_ID)    /* send assignment and complement */       \
+}
+
+// ------------------------------------------------------------------------
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+// ------------------------------------------------------------------------
+// X86 or X86_64: no alignment problems ===================================
+#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \
+    OP_CMPXCHG(TYPE,BITS,OP)                                              \
+}
+// ------------------------------------------------------------------------
+#else
+// ------------------------------------------------------------------------
+// Code for other architectures that don't handle unaligned accesses.
+#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(^=~,GOMP_FLAG)                                       \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \
+        OP_CMPXCHG(TYPE,BITS,OP)   /* aligned address */                  \
+    } else {                                                              \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL(^=~,LCK_ID)    /* unaligned address - use critical */ \
+    }                                                                     \
+}
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+ATOMIC_CMPXCHG(  fixed1, neqv, kmp_int8,   8,   ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
+ATOMIC_CMPXCHG(  fixed2, neqv, kmp_int16, 16,   ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
+ATOMIC_CMPXCHG(  fixed4, neqv, kmp_int32, 32,   ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
+ATOMIC_CMPXCHG(  fixed8, neqv, kmp_int64, 64,   ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
+ATOMIC_CMPX_EQV( fixed1, eqv,  kmp_int8,   8,  ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
+ATOMIC_CMPX_EQV( fixed2, eqv,  kmp_int16, 16,  ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
+ATOMIC_CMPX_EQV( fixed4, eqv,  kmp_int32, 32,  ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
+ATOMIC_CMPX_EQV( fixed8, eqv,  kmp_int64, 64,  ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
+
+
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+//     TYPE_ID, OP_ID, TYPE - detailed above
+//     OP      - operator
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+#define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */              \
+    OP_CRITICAL(OP##=,LCK_ID)          /* send assignment */              \
+}
+
+/* ------------------------------------------------------------------------- */
+// routines for long double type
+ATOMIC_CRITICAL( float10, add, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add
+ATOMIC_CRITICAL( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub
+ATOMIC_CRITICAL( float10, mul, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul
+ATOMIC_CRITICAL( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div
+#if KMP_HAVE_QUAD
+// routines for _Quad type
+ATOMIC_CRITICAL( float16, add, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add
+ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub
+ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul
+ATOMIC_CRITICAL( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 )           // __kmpc_atomic_float16_add_a16
+    ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16
+    ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 )           // __kmpc_atomic_float16_mul_a16
+    ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16
+#endif
+#endif
+// routines for complex types
+
+#if USE_CMPXCHG_FIX
+// workaround for C78287 (complex(kind=4) data type)
+ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_add
+ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_sub
+ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_mul
+ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_div
+// end of the workaround for C78287
+#else
+ATOMIC_CRITICAL( cmplx4,  add, kmp_cmplx32,     +,  8c,   1 )            // __kmpc_atomic_cmplx4_add
+ATOMIC_CRITICAL( cmplx4,  sub, kmp_cmplx32,     -,  8c,   1 )            // __kmpc_atomic_cmplx4_sub
+ATOMIC_CRITICAL( cmplx4,  mul, kmp_cmplx32,     *,  8c,   1 )            // __kmpc_atomic_cmplx4_mul
+ATOMIC_CRITICAL( cmplx4,  div, kmp_cmplx32,     /,  8c,   1 )            // __kmpc_atomic_cmplx4_div
+#endif // USE_CMPXCHG_FIX
+
+ATOMIC_CRITICAL( cmplx8,  add, kmp_cmplx64,     +, 16c,   1 )            // __kmpc_atomic_cmplx8_add
+ATOMIC_CRITICAL( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub
+ATOMIC_CRITICAL( cmplx8,  mul, kmp_cmplx64,     *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul
+ATOMIC_CRITICAL( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div
+ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80,     +, 20c,   1 )            // __kmpc_atomic_cmplx10_add
+ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub
+ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80,     *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul
+ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG,     +, 32c,   1 )            // __kmpc_atomic_cmplx16_add
+ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub
+ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG,     *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul
+ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 )   // __kmpc_atomic_cmplx16_add_a16
+    ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16
+    ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 )   // __kmpc_atomic_cmplx16_mul_a16
+    ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16
+#endif
+#endif
+
+#if OMP_40_ENABLED
+
+// OpenMP 4.0: x = expr binop x for non-commutative operations.
+// Supported only on IA-32 architecture and Intel(R) 64
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs bound by critical section
+//     OP     - operator (it's supposed to contain an assignment)
+//     LCK_ID - lock identifier
+// Note: don't check gtid as it should always be valid
+// 1, 2-byte - expect valid parameter, other - check before this macro
+#define OP_CRITICAL_REV(OP,LCK_ID) \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    (*lhs) = (rhs) OP (*lhs);                                             \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
+
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_REV(OP,FLAG)                                     \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_REV( OP, 0 );                                         \
+        return;                                                           \
+    }
+#else
+#define OP_GOMP_CRITICAL_REV(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+
+// Beginning of a definition (provides name, parameters, gebug trace)
+//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operands' type
+#define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
+RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
+{                                                                                         \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs using "compare_and_store" routine
+//     TYPE    - operands' type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator
+// Note: temp_val introduced in order to force the compiler to read
+//       *lhs only once (w/o it the compiler reads *lhs twice)
+#define OP_CMPXCHG_REV(TYPE,BITS,OP)                                      \
+    {                                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+        TYPE old_value, new_value;                                        \
+        temp_val = *lhs;                                                  \
+        old_value = temp_val;                                             \
+        new_value = rhs OP old_value;                                     \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
+                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
+        {                                                                 \
+            KMP_DO_PAUSE;                                                 \
+                                                                          \
+            temp_val = *lhs;                                              \
+            old_value = temp_val;                                         \
+            new_value = rhs OP old_value;                                 \
+        }                                                                 \
+    }
+
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG)   \
+ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                 \
+    OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                    \
+    OP_CMPXCHG_REV(TYPE,BITS,OP)                                          \
+}
+
+// ------------------------------------------------------------------------
+// Entries definition for integer operands
+//     TYPE_ID - operands type and size (fixed4, float4)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operand type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator (used in critical section)
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+
+//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
+// ------------------------------------------------------------------------
+// Routines for ATOMIC integer operands, other operators
+// ------------------------------------------------------------------------
+//                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
+ATOMIC_CMPXCHG_REV( fixed1,  div, kmp_int8,    8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_rev
+ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8,   8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_rev
+ATOMIC_CMPXCHG_REV( fixed1,  shl, kmp_int8,    8, <<, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_rev
+ATOMIC_CMPXCHG_REV( fixed1,  shr, kmp_int8,    8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_rev
+ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8,   8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_rev
+ATOMIC_CMPXCHG_REV( fixed1,  sub, kmp_int8,    8, -,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_rev
+
+ATOMIC_CMPXCHG_REV( fixed2,  div, kmp_int16,  16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_rev
+ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_rev
+ATOMIC_CMPXCHG_REV( fixed2,  shl, kmp_int16,  16, <<, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_rev
+ATOMIC_CMPXCHG_REV( fixed2,  shr, kmp_int16,  16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_rev
+ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_rev
+ATOMIC_CMPXCHG_REV( fixed2,  sub, kmp_int16,  16, -,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_rev
+
+ATOMIC_CMPXCHG_REV( fixed4,  div, kmp_int32,  32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_rev
+ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_rev
+ATOMIC_CMPXCHG_REV( fixed4,  shl, kmp_int32,  32, <<, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_rev
+ATOMIC_CMPXCHG_REV( fixed4,  shr, kmp_int32,  32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_rev
+ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_rev
+ATOMIC_CMPXCHG_REV( fixed4,  sub, kmp_int32,  32, -,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_rev
+
+ATOMIC_CMPXCHG_REV( fixed8,  div, kmp_int64,  64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_rev
+ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_rev
+ATOMIC_CMPXCHG_REV( fixed8,  shl, kmp_int64,  64, <<, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_rev
+ATOMIC_CMPXCHG_REV( fixed8,  shr, kmp_int64,  64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_rev
+ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_rev
+ATOMIC_CMPXCHG_REV( fixed8,  sub, kmp_int64,  64, -,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_rev
+
+ATOMIC_CMPXCHG_REV( float4,  div, kmp_real32, 32, /,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_rev
+ATOMIC_CMPXCHG_REV( float4,  sub, kmp_real32, 32, -,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_rev
+
+ATOMIC_CMPXCHG_REV( float8,  div, kmp_real64, 64, /,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_rev
+ATOMIC_CMPXCHG_REV( float8,  sub, kmp_real64, 64, -,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_rev
+//                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
+
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+//     TYPE_ID, OP_ID, TYPE - detailed above
+//     OP      - operator
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+#define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
+ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                        \
+    OP_CRITICAL_REV(OP,LCK_ID)                                                \
+}
+
+/* ------------------------------------------------------------------------- */
+// routines for long double type
+ATOMIC_CRITICAL_REV( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_rev
+ATOMIC_CRITICAL_REV( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_rev
+#if KMP_HAVE_QUAD
+// routines for _Quad type
+ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_rev
+ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_rev
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16_rev
+    ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16_rev
+#endif
+#endif
+
+// routines for complex types
+ATOMIC_CRITICAL_REV( cmplx4,  sub, kmp_cmplx32,     -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_rev
+ATOMIC_CRITICAL_REV( cmplx4,  div, kmp_cmplx32,     /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_rev
+ATOMIC_CRITICAL_REV( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_rev
+ATOMIC_CRITICAL_REV( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_rev
+ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_rev
+ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_rev
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_rev
+ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_rev
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16_rev
+    ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16_rev
+#endif
+#endif
+
+
+#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
+// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
+
+#endif //OMP_40_ENABLED
+
+
+/* ------------------------------------------------------------------------ */
+/* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
+/* Note: in order to reduce the total number of types combinations          */
+/*       it is supposed that compiler converts RHS to longest floating type,*/
+/*       that is _Quad, before call to any of these routines                */
+/* Conversion to _Quad will be done by the compiler during calculation,     */
+/*    conversion back to TYPE - before the assignment, like:                */
+/*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
+/* Performance penalty expected because of SW emulation use                 */
+/* ------------------------------------------------------------------------ */
+
+#define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                             \
+void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
+{                                                                                                       \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                              \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
+
+// -------------------------------------------------------------------------
+#define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)         \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                       \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */                              \
+    OP_CRITICAL(OP##=,LCK_ID)  /* send assignment */                                      \
+}
+
+// -------------------------------------------------------------------------
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+// -------------------------------------------------------------------------
+// X86 or X86_64: no alignment problems ====================================
+#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \
+    OP_CMPXCHG(TYPE,BITS,OP)                                                                \
+}
+// -------------------------------------------------------------------------
+#else
+// ------------------------------------------------------------------------
+// Code for other architectures that don't handle unaligned accesses.
+#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                            \
+        OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                  \
+    } else {                                                                                \
+        KMP_CHECK_GTID;                                                                     \
+        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                   \
+    }                                                                                       \
+}
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+// RHS=float8
+ATOMIC_CMPXCHG_MIX( fixed1, char,       mul,  8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
+ATOMIC_CMPXCHG_MIX( fixed1, char,       div,  8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
+ATOMIC_CMPXCHG_MIX( fixed2, short,      mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
+ATOMIC_CMPXCHG_MIX( fixed2, short,      div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
+ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  mul, 32, *, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_float8
+ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  div, 32, /, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_float8
+ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
+ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
+ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
+ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
+ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
+ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
+
+// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
+#if KMP_HAVE_QUAD
+ATOMIC_CMPXCHG_MIX( fixed1,  char,       add,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
+ATOMIC_CMPXCHG_MIX( fixed1,  char,       sub,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
+ATOMIC_CMPXCHG_MIX( fixed1,  char,       mul,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
+ATOMIC_CMPXCHG_MIX( fixed1,  char,       div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
+ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
+
+ATOMIC_CMPXCHG_MIX( fixed2,  short,      add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
+ATOMIC_CMPXCHG_MIX( fixed2,  short,      sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
+ATOMIC_CMPXCHG_MIX( fixed2,  short,      mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
+ATOMIC_CMPXCHG_MIX( fixed2,  short,      div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
+ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
+
+ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  add, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_add_fp
+ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  sub, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_fp
+ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  mul, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_fp
+ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_fp
+ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_fp
+
+ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
+ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
+ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
+ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
+ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
+
+ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
+ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
+ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
+ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
+
+ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
+ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
+ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
+ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
+
+ATOMIC_CRITICAL_FP( float10, long double,    add, +, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_add_fp
+ATOMIC_CRITICAL_FP( float10, long double,    sub, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_fp
+ATOMIC_CRITICAL_FP( float10, long double,    mul, *, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_mul_fp
+ATOMIC_CRITICAL_FP( float10, long double,    div, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_fp
+#endif
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+// ------------------------------------------------------------------------
+// X86 or X86_64: no alignment problems ====================================
+#if USE_CMPXCHG_FIX
+// workaround for C78287 (complex(kind=4) data type)
+#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
+    OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                                       \
+}
+// end of the second part of the workaround for C78287
+#else
+#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
+    OP_CMPXCHG(TYPE,BITS,OP)                                                                  \
+}
+#endif // USE_CMPXCHG_FIX
+#else
+// ------------------------------------------------------------------------
+// Code for other architectures that don't handle unaligned accesses.
+#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
+    if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                              \
+        OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                    \
+    } else {                                                                                  \
+        KMP_CHECK_GTID;                                                                       \
+        OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                     \
+    }                                                                                         \
+}
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
+ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
+ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
+ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
+
+// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+// ------------------------------------------------------------------------
+// Atomic READ routines
+// ------------------------------------------------------------------------
+
+// ------------------------------------------------------------------------
+// Beginning of a definition (provides name, parameters, gebug trace)
+//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operands' type
+#define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
+RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
+{                                                                                   \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs using "compare_and_store_ret" routine
+//     TYPE    - operands' type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator
+// Note: temp_val introduced in order to force the compiler to read
+//       *lhs only once (w/o it the compiler reads *lhs twice)
+// TODO: check if it is still necessary
+// Return old value regardless of the result of "compare & swap# operation
+
+#define OP_CMPXCHG_READ(TYPE,BITS,OP)                                     \
+    {                                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+        union f_i_union {                                                 \
+            TYPE f_val;                                                   \
+            kmp_int##BITS i_val;                                          \
+        };                                                                \
+        union f_i_union old_value;                                        \
+        temp_val = *loc;                                                  \
+        old_value.f_val = temp_val;                                       \
+        old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val,   \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
+        new_value = old_value.f_val;                                      \
+        return new_value;                                                 \
+    }
+
+// -------------------------------------------------------------------------
+// Operation on *lhs, rhs bound by critical section
+//     OP     - operator (it's supposed to contain an assignment)
+//     LCK_ID - lock identifier
+// Note: don't check gtid as it should always be valid
+// 1, 2-byte - expect valid parameter, other - check before this macro
+#define OP_CRITICAL_READ(OP,LCK_ID)                                       \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
+                                                                          \
+    new_value = (*loc);                                                   \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
+
+// -------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_READ(OP,FLAG)                                    \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_READ( OP, 0 );                                        \
+        return new_value;                                                 \
+    }
+#else
+#define OP_GOMP_CRITICAL_READ(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+// -------------------------------------------------------------------------
+#define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
+ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
+    TYPE new_value;                                                       \
+    OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \
+    new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 );                     \
+    return new_value;                                                     \
+}
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
+ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
+    TYPE new_value;                                                       \
+    OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \
+    OP_CMPXCHG_READ(TYPE,BITS,OP)                                         \
+}
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+//     TYPE_ID, OP_ID, TYPE - detailed above
+//     OP      - operator
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+#define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \
+ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
+    TYPE new_value;                                                       \
+    OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)  /* send assignment */         \
+    OP_CRITICAL_READ(OP,LCK_ID)          /* send assignment */            \
+    return new_value;                                                     \
+}
+
+// ------------------------------------------------------------------------
+// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
+// Let's return the read value through the additional parameter.
+
+#if ( KMP_OS_WINDOWS )
+
+#define OP_CRITICAL_READ_WRK(OP,LCK_ID)                                   \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
+                                                                          \
+    (*out) = (*loc);                                                      \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
+// ------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)                                \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_READ_WRK( OP, 0 );                                    \
+    }
+#else
+#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+// ------------------------------------------------------------------------
+#define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
+void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
+{                                                                                   \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
+
+// ------------------------------------------------------------------------
+#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \
+ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE)                                     \
+    OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG)  /* send assignment */         \
+    OP_CRITICAL_READ_WRK(OP,LCK_ID)          /* send assignment */            \
+}
+
+#endif // KMP_OS_WINDOWS
+
+// ------------------------------------------------------------------------
+//                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
+ATOMIC_FIXED_READ( fixed4, rd, kmp_int32,  32, +, 0            )      // __kmpc_atomic_fixed4_rd
+ATOMIC_FIXED_READ( fixed8, rd, kmp_int64,  64, +, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_rd
+ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 )    // __kmpc_atomic_float4_rd
+ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 )    // __kmpc_atomic_float8_rd
+
+// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
+ATOMIC_CMPXCHG_READ( fixed1,  rd, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_rd
+ATOMIC_CMPXCHG_READ( fixed2,  rd, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_rd
+
+ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r,   1 )         // __kmpc_atomic_float10_rd
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r,   1 )         // __kmpc_atomic_float16_rd
+#endif // KMP_HAVE_QUAD
+
+// Fix for CQ220361 on Windows* OS
+#if ( KMP_OS_WINDOWS )
+    ATOMIC_CRITICAL_READ_WRK( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )   // __kmpc_atomic_cmplx4_rd
+#else
+    ATOMIC_CRITICAL_READ( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )       // __kmpc_atomic_cmplx4_rd
+#endif
+ATOMIC_CRITICAL_READ( cmplx8,  rd, kmp_cmplx64, +, 16c, 1 )           // __kmpc_atomic_cmplx8_rd
+ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 )           // __kmpc_atomic_cmplx10_rd
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 )           // __kmpc_atomic_cmplx16_rd
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 )         // __kmpc_atomic_float16_a16_rd
+    ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
+#endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// Atomic WRITE routines
+// ------------------------------------------------------------------------
+
+#define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)              \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
+    KMP_XCHG_FIXED##BITS( lhs, rhs );                                     \
+}
+// ------------------------------------------------------------------------
+#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)        \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
+    KMP_XCHG_REAL##BITS( lhs, rhs );                                      \
+}
+
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs using "compare_and_store" routine
+//     TYPE    - operands' type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator
+// Note: temp_val introduced in order to force the compiler to read
+//       *lhs only once (w/o it the compiler reads *lhs twice)
+#define OP_CMPXCHG_WR(TYPE,BITS,OP)                                       \
+    {                                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+        TYPE old_value, new_value;                                        \
+        temp_val = *lhs;                                                  \
+        old_value = temp_val;                                             \
+        new_value = rhs;                                                  \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
+                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
+        {                                                                 \
+            KMP_CPU_PAUSE();                                              \
+                                                                          \
+            temp_val = *lhs;                                              \
+            old_value = temp_val;                                         \
+            new_value = rhs;                                              \
+        }                                                                 \
+    }
+
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
+    OP_CMPXCHG_WR(TYPE,BITS,OP)                                           \
+}
+
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+//     TYPE_ID, OP_ID, TYPE - detailed above
+//     OP      - operator
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+#define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)        \
+ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
+    OP_GOMP_CRITICAL(OP,GOMP_FLAG)       /* send assignment */            \
+    OP_CRITICAL(OP,LCK_ID)               /* send assignment */            \
+}
+// -------------------------------------------------------------------------
+
+ATOMIC_XCHG_WR( fixed1,  wr, kmp_int8,    8, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_wr
+ATOMIC_XCHG_WR( fixed2,  wr, kmp_int16,  16, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_wr
+ATOMIC_XCHG_WR( fixed4,  wr, kmp_int32,  32, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_wr
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CMPXCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_wr
+#else
+    ATOMIC_XCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )         // __kmpc_atomic_fixed8_wr
+#endif
+
+ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 )         // __kmpc_atomic_float4_wr
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CMPXCHG_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )     // __kmpc_atomic_float8_wr
+#else
+    ATOMIC_XCHG_FLOAT_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_wr
+#endif
+
+ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r,   1 )         // __kmpc_atomic_float10_wr
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r,   1 )         // __kmpc_atomic_float16_wr
+#endif
+ATOMIC_CRITICAL_WR( cmplx4,  wr, kmp_cmplx32, =,  8c,   1 )         // __kmpc_atomic_cmplx4_wr
+ATOMIC_CRITICAL_WR( cmplx8,  wr, kmp_cmplx64, =, 16c,   1 )         // __kmpc_atomic_cmplx8_wr
+ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c,   1 )         // __kmpc_atomic_cmplx10_wr
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c,   1 )         // __kmpc_atomic_cmplx16_wr
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t,         =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
+    ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
+#endif
+#endif
+
+
+// ------------------------------------------------------------------------
+// Atomic CAPTURE routines
+// ------------------------------------------------------------------------
+
+// Beginning of a definition (provides name, parameters, gebug trace)
+//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operands' type
+#define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE)                                    \
+RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
+{                                                                                         \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
+
+// -------------------------------------------------------------------------
+// Operation on *lhs, rhs bound by critical section
+//     OP     - operator (it's supposed to contain an assignment)
+//     LCK_ID - lock identifier
+// Note: don't check gtid as it should always be valid
+// 1, 2-byte - expect valid parameter, other - check before this macro
+#define OP_CRITICAL_CPT(OP,LCK_ID)                                        \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    if( flag ) {                                                          \
+        (*lhs) OP rhs;                                                    \
+        new_value = (*lhs);                                               \
+    } else {                                                              \
+        new_value = (*lhs);                                               \
+        (*lhs) OP rhs;                                                    \
+    }                                                                     \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+    return new_value;
+
+// ------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_CPT(OP,FLAG)                                     \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_CPT( OP##=, 0 );                                      \
+    }
+#else
+#define OP_GOMP_CRITICAL_CPT(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs using "compare_and_store" routine
+//     TYPE    - operands' type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator
+// Note: temp_val introduced in order to force the compiler to read
+//       *lhs only once (w/o it the compiler reads *lhs twice)
+#define OP_CMPXCHG_CPT(TYPE,BITS,OP)                                      \
+    {                                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+        TYPE old_value, new_value;                                        \
+        temp_val = *lhs;                                                  \
+        old_value = temp_val;                                             \
+        new_value = old_value OP rhs;                                     \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
+                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
+        {                                                                 \
+            KMP_CPU_PAUSE();                                              \
+                                                                          \
+            temp_val = *lhs;                                              \
+            old_value = temp_val;                                         \
+            new_value = old_value OP rhs;                                 \
+        }                                                                 \
+        if( flag ) {                                                      \
+            return new_value;                                             \
+        } else                                                            \
+            return old_value;                                             \
+    }
+
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
+    TYPE new_value;                                                        \
+    OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
+    OP_CMPXCHG_CPT(TYPE,BITS,OP)                                           \
+}
+
+// -------------------------------------------------------------------------
+#define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
+    TYPE old_value, new_value;                                             \
+    OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
+    /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
+    old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                    \
+    if( flag ) {                                                           \
+        return old_value OP rhs;                                           \
+    } else                                                                 \
+        return old_value;                                                  \
+}
+// -------------------------------------------------------------------------
+
+ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32,  32, +, 0            )  // __kmpc_atomic_fixed4_add_cpt
+ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32,  32, -, 0            )  // __kmpc_atomic_fixed4_sub_cpt
+ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64,  64, +, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add_cpt
+ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64,  64, -, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt
+
+ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add_cpt
+ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt
+ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add_cpt
+ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt
+
+// ------------------------------------------------------------------------
+// Entries definition for integer operands
+//     TYPE_ID - operands type and size (fixed4, float4)
+//     OP_ID   - operation identifier (add, sub, mul, ...)
+//     TYPE    - operand type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator (used in critical section)
+//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
+// ------------------------------------------------------------------------
+// Routines for ATOMIC integer operands, other operators
+// ------------------------------------------------------------------------
+//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
+ATOMIC_CMPXCHG_CPT( fixed1,  add_cpt, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add_cpt
+ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8,    8, &,  0            )  // __kmpc_atomic_fixed1_andb_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  div_cpt, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  mul_cpt, kmp_int8,    8, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  orb_cpt, kmp_int8,    8, |,  0            )  // __kmpc_atomic_fixed1_orb_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  shl_cpt, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  shr_cpt, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  sub_cpt, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt
+ATOMIC_CMPXCHG_CPT( fixed1,  xor_cpt, kmp_int8,    8, ^,  0            )  // __kmpc_atomic_fixed1_xor_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  add_cpt, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add_cpt
+ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16,  16, &,  0            )  // __kmpc_atomic_fixed2_andb_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  div_cpt, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  mul_cpt, kmp_int16,  16, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  orb_cpt, kmp_int16,  16, |,  0            )  // __kmpc_atomic_fixed2_orb_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  shl_cpt, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  shr_cpt, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  sub_cpt, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt
+ATOMIC_CMPXCHG_CPT( fixed2,  xor_cpt, kmp_int16,  16, ^,  0            )  // __kmpc_atomic_fixed2_xor_cpt
+ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32,  32, &,  0            )  // __kmpc_atomic_fixed4_andb_cpt
+ATOMIC_CMPXCHG_CPT( fixed4,  div_cpt, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed4,  mul_cpt, kmp_int32,  32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul_cpt
+ATOMIC_CMPXCHG_CPT( fixed4,  orb_cpt, kmp_int32,  32, |,  0            )  // __kmpc_atomic_fixed4_orb_cpt
+ATOMIC_CMPXCHG_CPT( fixed4,  shl_cpt, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt
+ATOMIC_CMPXCHG_CPT( fixed4,  shr_cpt, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed4,  xor_cpt, kmp_int32,  32, ^,  0            )  // __kmpc_atomic_fixed4_xor_cpt
+ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64,  64, &,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb_cpt
+ATOMIC_CMPXCHG_CPT( fixed8,  div_cpt, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt
+ATOMIC_CMPXCHG_CPT( fixed8,  mul_cpt, kmp_int64,  64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul_cpt
+ATOMIC_CMPXCHG_CPT( fixed8,  orb_cpt, kmp_int64,  64, |,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb_cpt
+ATOMIC_CMPXCHG_CPT( fixed8,  shl_cpt, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt
+ATOMIC_CMPXCHG_CPT( fixed8,  shr_cpt, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt
+ATOMIC_CMPXCHG_CPT( fixed8,  xor_cpt, kmp_int64,  64, ^,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor_cpt
+ATOMIC_CMPXCHG_CPT( float4,  div_cpt, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt
+ATOMIC_CMPXCHG_CPT( float4,  mul_cpt, kmp_real32, 32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul_cpt
+ATOMIC_CMPXCHG_CPT( float8,  div_cpt, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt
+ATOMIC_CMPXCHG_CPT( float8,  mul_cpt, kmp_real64, 64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul_cpt
+//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
+
+// ------------------------------------------------------------------------
+// Routines for C/C++ Reduction operators && and ||
+// ------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+// Operation on *lhs, rhs bound by critical section
+//     OP     - operator (it's supposed to contain an assignment)
+//     LCK_ID - lock identifier
+// Note: don't check gtid as it should always be valid
+// 1, 2-byte - expect valid parameter, other - check before this macro
+#define OP_CRITICAL_L_CPT(OP,LCK_ID)                                      \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
+                                                                          \
+    if( flag ) {                                                          \
+        new_value OP rhs;                                                 \
+    } else                                                                \
+        new_value = (*lhs);                                               \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
+
+// ------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)                                   \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_L_CPT( OP, 0 );                                       \
+        return new_value;                                                 \
+    }
+#else
+#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+// ------------------------------------------------------------------------
+// Need separate macros for &&, || because there is no combined assignment
+#define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \
+    TYPE new_value;                                                       \
+    OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG )                        \
+    OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \
+}
+
+ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char,       8, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl_cpt
+ATOMIC_CMPX_L_CPT( fixed1,  orl_cpt, char,       8, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl_cpt
+ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short,     16, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl_cpt
+ATOMIC_CMPX_L_CPT( fixed2,  orl_cpt, short,     16, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl_cpt
+ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 )             // __kmpc_atomic_fixed4_andl_cpt
+ATOMIC_CMPX_L_CPT( fixed4,  orl_cpt, kmp_int32, 32, ||, 0 )             // __kmpc_atomic_fixed4_orl_cpt
+ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl_cpt
+ATOMIC_CMPX_L_CPT( fixed8,  orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl_cpt
+
+
+// -------------------------------------------------------------------------
+// Routines for Fortran operators that matched no one in C:
+// MAX, MIN, .EQV., .NEQV.
+// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
+// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
+// -------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+// MIN and MAX need separate macros
+// OP - operator to check if we need any actions?
+#define MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
+                                                                           \
+    if ( *lhs OP rhs ) {                 /* still need actions? */         \
+        old_value = *lhs;                                                  \
+        *lhs = rhs;                                                        \
+        if ( flag )                                                        \
+            new_value = rhs;                                               \
+        else                                                               \
+            new_value = old_value;                                         \
+    }                                                                      \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
+    return new_value;                                                      \
+
+// -------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)                                 \
+    if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \
+        KMP_CHECK_GTID;                                                    \
+        MIN_MAX_CRITSECT_CPT( OP, 0 );                                     \
+    }
+#else
+#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+// -------------------------------------------------------------------------
+#define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \
+    {                                                                      \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \
+        /*TYPE old_value; */                                               \
+        temp_val = *lhs;                                                   \
+        old_value = temp_val;                                              \
+        while ( old_value OP rhs &&          /* still need actions? */     \
+            ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \
+                      *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \
+        {                                                                  \
+            KMP_CPU_PAUSE();                                               \
+            temp_val = *lhs;                                               \
+            old_value = temp_val;                                          \
+        }                                                                  \
+        if( flag )                                                         \
+            return rhs;                                                    \
+        else                                                               \
+            return old_value;                                              \
+    }
+
+// -------------------------------------------------------------------------
+// 1-byte, 2-byte operands - use critical section
+#define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)       \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
+    TYPE new_value, old_value;                                             \
+    if ( *lhs OP rhs ) {     /* need actions? */                           \
+        GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \
+        MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \
+    }                                                                      \
+    return *lhs;                                                           \
+}
+
+#define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
+    TYPE new_value, old_value;                                             \
+    if ( *lhs OP rhs ) {                                                   \
+        GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \
+        MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \
+    }                                                                      \
+    return *lhs;                                                           \
+}
+
+
+MIN_MAX_COMPXCHG_CPT( fixed1,  max_cpt, char,        8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
+MIN_MAX_COMPXCHG_CPT( fixed1,  min_cpt, char,        8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
+MIN_MAX_COMPXCHG_CPT( fixed2,  max_cpt, short,      16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
+MIN_MAX_COMPXCHG_CPT( fixed2,  min_cpt, short,      16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
+MIN_MAX_COMPXCHG_CPT( fixed4,  max_cpt, kmp_int32,  32, <, 0 )            // __kmpc_atomic_fixed4_max_cpt
+MIN_MAX_COMPXCHG_CPT( fixed4,  min_cpt, kmp_int32,  32, >, 0 )            // __kmpc_atomic_fixed4_min_cpt
+MIN_MAX_COMPXCHG_CPT( fixed8,  max_cpt, kmp_int64,  64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
+MIN_MAX_COMPXCHG_CPT( fixed8,  min_cpt, kmp_int64,  64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
+MIN_MAX_COMPXCHG_CPT( float4,  max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
+MIN_MAX_COMPXCHG_CPT( float4,  min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
+MIN_MAX_COMPXCHG_CPT( float8,  max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
+MIN_MAX_COMPXCHG_CPT( float8,  min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
+#if KMP_HAVE_QUAD
+MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY,    <, 16r,   1 )     // __kmpc_atomic_float16_max_cpt
+MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY,    >, 16r,   1 )     // __kmpc_atomic_float16_min_cpt
+#if ( KMP_ARCH_X86 )
+    MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r,  1 )  // __kmpc_atomic_float16_max_a16_cpt
+    MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r,  1 )  // __kmpc_atomic_float16_mix_a16_cpt
+#endif
+#endif
+
+// ------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)                                 \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_CPT( OP, 0 );                                         \
+    }
+#else
+#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+// ------------------------------------------------------------------------
+#define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \
+    TYPE new_value;                                                       \
+    OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG)  /* send assignment */        \
+    OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \
+}
+
+// ------------------------------------------------------------------------
+
+ATOMIC_CMPXCHG_CPT(  fixed1, neqv_cpt, kmp_int8,   8,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
+ATOMIC_CMPXCHG_CPT(  fixed2, neqv_cpt, kmp_int16, 16,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
+ATOMIC_CMPXCHG_CPT(  fixed4, neqv_cpt, kmp_int32, 32,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
+ATOMIC_CMPXCHG_CPT(  fixed8, neqv_cpt, kmp_int64, 64,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
+ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt,  kmp_int8,   8,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
+ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt,  kmp_int16, 16,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
+ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt,  kmp_int32, 32,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
+ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt,  kmp_int64, 64,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
+
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+//     TYPE_ID, OP_ID, TYPE - detailed above
+//     OP      - operator
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+#define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                           \
+    TYPE new_value;                                                 \
+    OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)  /* send assignment */       \
+    OP_CRITICAL_CPT(OP##=,LCK_ID)          /* send assignment */    \
+}
+
+// ------------------------------------------------------------------------
+
+// Workaround for cmplx4. Regular routines with return value don't work
+// on Win_32e. Let's return captured values through the additional parameter.
+#define OP_CRITICAL_CPT_WRK(OP,LCK_ID)                                    \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    if( flag ) {                                                          \
+        (*lhs) OP rhs;                                                    \
+        (*out) = (*lhs);                                                  \
+    } else {                                                              \
+        (*out) = (*lhs);                                                  \
+        (*lhs) OP rhs;                                                    \
+    }                                                                     \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+    return;
+// ------------------------------------------------------------------------
+
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)                                 \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_CPT_WRK( OP##=, 0 );                                  \
+    }
+#else
+#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+// ------------------------------------------------------------------------
+
+#define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                              \
+void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
+{                                                                         \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
+// ------------------------------------------------------------------------
+
+#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \
+ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                      \
+    OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG)                                \
+    OP_CRITICAL_CPT_WRK(OP##=,LCK_ID)                                     \
+}
+// The end of workaround for cmplx4
+
+/* ------------------------------------------------------------------------- */
+// routines for long double type
+ATOMIC_CRITICAL_CPT( float10, add_cpt, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add_cpt
+ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt
+ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul_cpt
+ATOMIC_CRITICAL_CPT( float10, div_cpt, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt
+#if KMP_HAVE_QUAD
+// routines for _Quad type
+ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add_cpt
+ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt
+ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul_cpt
+ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r,  1 )          // __kmpc_atomic_float16_add_a16_cpt
+    ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt
+    ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r,  1 )          // __kmpc_atomic_float16_mul_a16_cpt
+    ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt
+#endif
+#endif
+
+// routines for complex types
+
+// cmplx4 routines to return void
+ATOMIC_CRITICAL_CPT_WRK( cmplx4,  add_cpt, kmp_cmplx32, +, 8c,    1 )            // __kmpc_atomic_cmplx4_add_cpt
+ATOMIC_CRITICAL_CPT_WRK( cmplx4,  sub_cpt, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt
+ATOMIC_CRITICAL_CPT_WRK( cmplx4,  mul_cpt, kmp_cmplx32, *, 8c,    1 )            // __kmpc_atomic_cmplx4_mul_cpt
+ATOMIC_CRITICAL_CPT_WRK( cmplx4,  div_cpt, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt
+
+ATOMIC_CRITICAL_CPT( cmplx8,  add_cpt, kmp_cmplx64, +, 16c,   1 )            // __kmpc_atomic_cmplx8_add_cpt
+ATOMIC_CRITICAL_CPT( cmplx8,  sub_cpt, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt
+ATOMIC_CRITICAL_CPT( cmplx8,  mul_cpt, kmp_cmplx64, *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul_cpt
+ATOMIC_CRITICAL_CPT( cmplx8,  div_cpt, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt
+ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c,   1 )            // __kmpc_atomic_cmplx10_add_cpt
+ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt
+ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul_cpt
+ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c,   1 )            // __kmpc_atomic_cmplx16_add_cpt
+ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt
+ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul_cpt
+ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,   1 )   // __kmpc_atomic_cmplx16_add_a16_cpt
+    ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt
+    ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,   1 )   // __kmpc_atomic_cmplx16_mul_a16_cpt
+    ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt
+#endif
+#endif
+
+#if OMP_40_ENABLED
+
+// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; }  for non-commutative operations.
+// Supported only on IA-32 architecture and Intel(R) 64
+
+// -------------------------------------------------------------------------
+// Operation on *lhs, rhs bound by critical section
+//     OP     - operator (it's supposed to contain an assignment)
+//     LCK_ID - lock identifier
+// Note: don't check gtid as it should always be valid
+// 1, 2-byte - expect valid parameter, other - check before this macro
+#define OP_CRITICAL_CPT_REV(OP,LCK_ID)                                    \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    if( flag ) {                                                          \
+        /*temp_val = (*lhs);*/\
+        (*lhs) = (rhs) OP (*lhs);                                         \
+        new_value = (*lhs);                                               \
+    } else {                                                              \
+        new_value = (*lhs);\
+        (*lhs) = (rhs) OP (*lhs);                                         \
+    }                                                                     \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+    return new_value;
+
+// ------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)                                 \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_CPT_REV( OP, 0 );                                     \
+    }
+#else
+#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+// ------------------------------------------------------------------------
+// Operation on *lhs, rhs using "compare_and_store" routine
+//     TYPE    - operands' type
+//     BITS    - size in bits, used to distinguish low level calls
+//     OP      - operator
+// Note: temp_val introduced in order to force the compiler to read
+//       *lhs only once (w/o it the compiler reads *lhs twice)
+#define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                  \
+    {                                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+        TYPE old_value, new_value;                                        \
+        temp_val = *lhs;                                                  \
+        old_value = temp_val;                                             \
+        new_value = rhs OP old_value;                                     \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
+                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
+        {                                                                 \
+            KMP_CPU_PAUSE();                                              \
+                                                                          \
+            temp_val = *lhs;                                              \
+            old_value = temp_val;                                         \
+            new_value = rhs OP old_value;                                 \
+        }                                                                 \
+        if( flag ) {                                                      \
+            return new_value;                                             \
+        } else                                                            \
+            return old_value;                                             \
+    }
+
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)       \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
+    TYPE new_value;                                                        \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+    OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                                 \
+    OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                       \
+}
+
+
+ATOMIC_CMPXCHG_CPT_REV( fixed1,  div_cpt_rev, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed1,  shl_cpt_rev, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed1,  shr_cpt_rev, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed1,  sub_cpt_rev, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed2,  div_cpt_rev, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed2,  shl_cpt_rev, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed2,  shr_cpt_rev, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed2,  sub_cpt_rev, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed4,  div_cpt_rev, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed4,  shl_cpt_rev, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed4,  shr_cpt_rev, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed4,  sub_cpt_rev, kmp_int32,  32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed8,  div_cpt_rev, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed8,  shl_cpt_rev, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed8,  shr_cpt_rev, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( fixed8,  sub_cpt_rev, kmp_int64,  64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( float4,  div_cpt_rev, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( float4,  sub_cpt_rev, kmp_real32, 32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( float8,  div_cpt_rev, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt_rev
+ATOMIC_CMPXCHG_CPT_REV( float8,  sub_cpt_rev, kmp_real64, 64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt_rev
+//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
+
+
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+//     TYPE_ID, OP_ID, TYPE - detailed above
+//     OP      - operator
+//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
+#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
+ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                               \
+    TYPE new_value;                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+    /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
+    OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                              \
+    OP_CRITICAL_CPT_REV(OP,LCK_ID)                                      \
+}
+
+
+/* ------------------------------------------------------------------------- */
+// routines for long double type
+ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_rev
+ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_rev
+#if KMP_HAVE_QUAD
+// routines for _Quad type
+ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt_rev
+ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt_rev
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt_rev
+    ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt_rev
+#endif
+#endif
+
+// routines for complex types
+
+// ------------------------------------------------------------------------
+
+// Workaround for cmplx4. Regular routines with return value don't work
+// on Win_32e. Let's return captured values through the additional parameter.
+#define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    if( flag ) {                                                          \
+        (*lhs) = (rhs) OP (*lhs);                                         \
+        (*out) = (*lhs);                                                  \
+    } else {                                                              \
+        (*out) = (*lhs);                                                  \
+        (*lhs) = (rhs) OP (*lhs);                                         \
+    }                                                                     \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+    return;
+// ------------------------------------------------------------------------
+
+#ifdef KMP_GOMP_COMPAT
+#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)                             \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        OP_CRITICAL_CPT_REV_WRK( OP, 0 );                                 \
+    }
+#else
+#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
+#endif /* KMP_GOMP_COMPAT */
+// ------------------------------------------------------------------------
+
+#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \
+ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                          \
+    OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG)                                \
+    OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                        \
+}
+// The end of workaround for cmplx4
+
+
+// !!! TODO: check if we need to return void for cmplx4 routines
+// cmplx4 routines to return void
+ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  sub_cpt_rev, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt_rev
+ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  div_cpt_rev, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt_rev
+
+ATOMIC_CRITICAL_CPT_REV( cmplx8,  sub_cpt_rev, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt_rev
+ATOMIC_CRITICAL_CPT_REV( cmplx8,  div_cpt_rev, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt_rev
+ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt_rev
+ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt_rev
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt_rev
+ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt_rev
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
+    ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt_rev
+#endif
+#endif
+
+//   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
+
+#define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                                    \
+TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs )     \
+{                                                                                         \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
+
+#define CRITICAL_SWP(LCK_ID)                                              \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    old_value = (*lhs);                                                   \
+    (*lhs) = rhs;                                                         \
+                                                                          \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+    return old_value;
+
+// ------------------------------------------------------------------------
+#ifdef KMP_GOMP_COMPAT
+#define GOMP_CRITICAL_SWP(FLAG)                                           \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        CRITICAL_SWP( 0 );                                                \
+    }
+#else
+#define GOMP_CRITICAL_SWP(FLAG)
+#endif /* KMP_GOMP_COMPAT */
+
+
+#define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                      \
+ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
+    TYPE old_value;                                                       \
+    GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
+    old_value = KMP_XCHG_FIXED##BITS( lhs, rhs );                         \
+    return old_value;                                                     \
+}
+// ------------------------------------------------------------------------
+#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                \
+ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
+    TYPE old_value;                                                       \
+    GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
+    old_value = KMP_XCHG_REAL##BITS( lhs, rhs );                          \
+    return old_value;                                                     \
+}
+
+// ------------------------------------------------------------------------
+#define CMPXCHG_SWP(TYPE,BITS)                                            \
+    {                                                                     \
+        TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
+        TYPE old_value, new_value;                                        \
+        temp_val = *lhs;                                                  \
+        old_value = temp_val;                                             \
+        new_value = rhs;                                                  \
+        while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
+                      *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
+                      *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
+        {                                                                 \
+            KMP_CPU_PAUSE();                                              \
+                                                                          \
+            temp_val = *lhs;                                              \
+            old_value = temp_val;                                         \
+            new_value = rhs;                                              \
+        }                                                                 \
+        return old_value;                                                 \
+    }
+
+// -------------------------------------------------------------------------
+#define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                   \
+ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
+    TYPE old_value;                                                       \
+    GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
+    CMPXCHG_SWP(TYPE,BITS)                                                \
+}
+
+ATOMIC_XCHG_SWP( fixed1, kmp_int8,    8, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_swp
+ATOMIC_XCHG_SWP( fixed2, kmp_int16,  16, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_swp
+ATOMIC_XCHG_SWP( fixed4, kmp_int32,  32, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_swp
+
+ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 )      // __kmpc_atomic_float4_swp
+
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_swp
+    ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )     // __kmpc_atomic_float8_swp
+#else
+    ATOMIC_XCHG_SWP(       fixed8, kmp_int64, 64, KMP_ARCH_X86 )   // __kmpc_atomic_fixed8_swp
+    ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )  // __kmpc_atomic_float8_swp
+#endif
+
+// ------------------------------------------------------------------------
+// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
+#define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG)              \
+ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                          \
+    TYPE old_value;                                                     \
+    GOMP_CRITICAL_SWP(GOMP_FLAG)                                        \
+    CRITICAL_SWP(LCK_ID)                                                \
+}
+
+// ------------------------------------------------------------------------
+
+// !!! TODO: check if we need to return void for cmplx4 routines
+// Workaround for cmplx4. Regular routines with return value don't work
+// on Win_32e. Let's return captured values through the additional parameter.
+
+#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                                \
+void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out )     \
+{                                                                                         \
+    KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
+    KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
+
+
+#define CRITICAL_SWP_WRK(LCK_ID)                                          \
+    __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+                                                                          \
+    tmp = (*lhs);                                                         \
+    (*lhs) = (rhs);                                                       \
+    (*out) = tmp;                                                         \
+    __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
+    return;
+
+// ------------------------------------------------------------------------
+
+#ifdef KMP_GOMP_COMPAT
+#define GOMP_CRITICAL_SWP_WRK(FLAG)                                       \
+    if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
+        KMP_CHECK_GTID;                                                   \
+        CRITICAL_SWP_WRK( 0 );                                            \
+    }
+#else
+#define GOMP_CRITICAL_SWP_WRK(FLAG)
+#endif /* KMP_GOMP_COMPAT */
+// ------------------------------------------------------------------------
+
+#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG)           \
+ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                        \
+    TYPE tmp;                                                             \
+    GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                      \
+    CRITICAL_SWP_WRK(LCK_ID)                                              \
+}
+// The end of workaround for cmplx4
+
+
+ATOMIC_CRITICAL_SWP( float10, long double, 10r,   1 )              // __kmpc_atomic_float10_swp
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r,   1 )              // __kmpc_atomic_float16_swp
+#endif
+// cmplx4 routine to return void
+ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
+
+//ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
+
+
+ATOMIC_CRITICAL_SWP( cmplx8,  kmp_cmplx64, 16c,   1 )              // __kmpc_atomic_cmplx8_swp
+ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c,   1 )              // __kmpc_atomic_cmplx10_swp
+#if KMP_HAVE_QUAD
+ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c,   1 )              // __kmpc_atomic_cmplx16_swp
+#if ( KMP_ARCH_X86 )
+    ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t,         16r, 1 )  // __kmpc_atomic_float16_a16_swp
+    ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 )  // __kmpc_atomic_cmplx16_a16_swp
+#endif
+#endif
+
+
+// End of OpenMP 4.0 Capture
+
+#endif //OMP_40_ENABLED
+
+#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+
+#undef OP_CRITICAL
+
+/* ------------------------------------------------------------------------ */
+/* Generic atomic routines                                                  */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    if (
+#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
+        FALSE                                   /* must use lock */
+#else
+        TRUE
+#endif
+	)
+    {
+	kmp_int8 old_value, new_value;
+
+	old_value = *(kmp_int8 *) lhs;
+	(*f)( &new_value, &old_value, rhs );
+
+	/* TODO: Should this be acquire or release? */
+	while ( !  KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
+		    		*(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
+	{
+	    KMP_CPU_PAUSE();
+
+	    old_value = *(kmp_int8 *) lhs;
+	    (*f)( &new_value, &old_value, rhs );
+	}
+
+	return;
+    }
+    else {
+        //
+        // All 1-byte data is of integer data type.
+        //
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
+
+	(*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
+    }
+}
+
+void
+__kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    if (
+#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
+        FALSE                                   /* must use lock */
+#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
+	TRUE					/* no alignment problems */
+#else
+	! ( (kmp_uintptr_t) lhs & 0x1)		/* make sure address is 2-byte aligned */
+#endif
+	)
+    {
+	kmp_int16 old_value, new_value;
+
+	old_value = *(kmp_int16 *) lhs;
+	(*f)( &new_value, &old_value, rhs );
+
+	/* TODO: Should this be acquire or release? */
+	while ( !  KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
+		    		*(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
+	{
+	    KMP_CPU_PAUSE();
+
+	    old_value = *(kmp_int16 *) lhs;
+	    (*f)( &new_value, &old_value, rhs );
+	}
+
+	return;
+    }
+    else {
+        //
+        // All 2-byte data is of integer data type.
+        //
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
+
+	(*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
+    }
+}
+
+void
+__kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    if (
+        //
+        // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
+        // Gomp compatibility is broken if this routine is called for floats.
+        //
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+	TRUE					/* no alignment problems */
+#else
+	! ( (kmp_uintptr_t) lhs & 0x3)		/* make sure address is 4-byte aligned */
+#endif
+	)
+    {
+	kmp_int32 old_value, new_value;
+
+	old_value = *(kmp_int32 *) lhs;
+	(*f)( &new_value, &old_value, rhs );
+
+	/* TODO: Should this be acquire or release? */
+	while ( !  KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
+		    		*(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
+	{
+	    KMP_CPU_PAUSE();
+
+	    old_value = *(kmp_int32 *) lhs;
+	    (*f)( &new_value, &old_value, rhs );
+	}
+
+	return;
+    }
+    else {
+        //
+        // Use __kmp_atomic_lock_4i for all 4-byte data,
+        // even if it isn't of integer data type.
+        //
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
+
+	(*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
+    }
+}
+
+void
+__kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    if (
+
+#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
+        FALSE                                   /* must use lock */
+#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
+	TRUE					/* no alignment problems */
+#else
+	! ( (kmp_uintptr_t) lhs & 0x7)		/* make sure address is 8-byte aligned */
+#endif
+	)
+    {
+	kmp_int64 old_value, new_value;
+
+	old_value = *(kmp_int64 *) lhs;
+	(*f)( &new_value, &old_value, rhs );
+	/* TODO: Should this be acquire or release? */
+	while ( !  KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
+					       *(kmp_int64 *) &old_value,
+					       *(kmp_int64 *) &new_value ) )
+	{
+	    KMP_CPU_PAUSE();
+
+	    old_value = *(kmp_int64 *) lhs;
+	    (*f)( &new_value, &old_value, rhs );
+	}
+
+	return;
+    } else {
+        //
+        // Use __kmp_atomic_lock_8i for all 8-byte data,
+        // even if it isn't of integer data type.
+        //
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
+
+	(*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+        if ( __kmp_atomic_mode == 2 ) {
+            __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+	__kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
+    }
+}
+
+void
+__kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
+
+    (*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
+}
+
+void
+__kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
+
+    (*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
+}
+
+void
+__kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
+
+    (*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
+}
+
+void
+__kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
+
+    (*f)( lhs, lhs, rhs );
+
+#ifdef KMP_GOMP_COMPAT
+    if ( __kmp_atomic_mode == 2 ) {
+        __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
+    }
+    else
+#endif /* KMP_GOMP_COMPAT */
+    __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
+}
+
+// AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
+//     duplicated in order to not use 3-party names in pure Intel code
+// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
+void
+__kmpc_atomic_start(void)
+{
+    int gtid = __kmp_entry_gtid();
+    KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
+    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
+}
+
+
+void
+__kmpc_atomic_end(void)
+{
+    int gtid = __kmp_get_gtid();
+    KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
+    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+/*!
+@}
+*/
+
+// end of file
diff --git a/contrib/libs/cxxsupp/openmp/kmp_atomic.h b/contrib/libs/cxxsupp/openmp/kmp_atomic.h
index 586848e921..33feae2189 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_atomic.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_atomic.h
@@ -1,1038 +1,1038 @@
-/* 
- * kmp_atomic.h - ATOMIC header file 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_ATOMIC_H 
-#define KMP_ATOMIC_H 
- 
-#include "kmp_os.h" 
-#include "kmp_lock.h" 
- 
-#if OMPT_SUPPORT 
-#include "ompt-specific.h" 
-#endif 
- 
-// C++ build port. 
-// Intel compiler does not support _Complex datatype on win. 
-// Intel compiler supports _Complex datatype on lin and mac. 
-// On the other side, there is a problem of stack alignment on lin_32 and mac_32 
-// if the rhs is cmplx80 or cmplx128 typedef'ed datatype. 
-// The decision is: to use compiler supported _Complex type on lin and mac, 
-//                  to use typedef'ed types on win. 
-// Condition for WIN64 was modified in anticipation of 10.1 build compiler. 
- 
-#if defined( __cplusplus ) && ( KMP_OS_WINDOWS ) 
-    // create shortcuts for c99 complex types 
- 
-    #if (_MSC_VER < 1600) && defined(_DEBUG) 
-        // Workaround for the problem of _DebugHeapTag unresolved external. 
-        // This problem prevented to use our static debug library for C tests 
-        // compiled with /MDd option (the library itself built with /MTd), 
-        #undef _DEBUG 
-        #define _DEBUG_TEMPORARILY_UNSET_ 
-    #endif 
- 
-    #include <complex> 
- 
-    template< typename type_lhs, typename type_rhs > 
-    std::complex< type_lhs > __kmp_lhs_div_rhs( 
-                const std::complex< type_lhs >& lhs, 
-                const std::complex< type_rhs >& rhs ) { 
-    type_lhs a = lhs.real(); 
-    type_lhs b = lhs.imag(); 
-    type_rhs c = rhs.real(); 
-    type_rhs d = rhs.imag(); 
-    type_rhs den = c*c + d*d; 
-    type_rhs r = ( a*c + b*d ); 
-    type_rhs i = ( b*c - a*d ); 
-    std::complex< type_lhs > ret( r/den, i/den ); 
-    return ret; 
-    } 
- 
-    // complex8 
-    struct __kmp_cmplx64_t : std::complex< double > { 
- 
-    __kmp_cmplx64_t() : std::complex< double > () {} 
- 
-    __kmp_cmplx64_t( const std::complex< double >& cd ) 
-                : std::complex< double > ( cd ) {} 
- 
-    void operator /= ( const __kmp_cmplx64_t& rhs ) { 
-        std::complex< double > lhs = *this; 
-        *this = __kmp_lhs_div_rhs( lhs, rhs ); 
-    } 
- 
-    __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) { 
-        std::complex< double > lhs = *this; 
-        return __kmp_lhs_div_rhs( lhs, rhs ); 
-    } 
- 
-    }; 
-    typedef struct __kmp_cmplx64_t kmp_cmplx64; 
- 
-    // complex4 
-    struct __kmp_cmplx32_t : std::complex< float > { 
- 
-    __kmp_cmplx32_t() : std::complex< float > () {} 
- 
-    __kmp_cmplx32_t( const std::complex<float>& cf ) 
-                : std::complex< float > ( cf ) {} 
- 
-    __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) { 
-        std::complex< float > lhs = *this; 
-        std::complex< float > rhs = b; 
-        return ( lhs + rhs ); 
-    } 
-    __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) { 
-        std::complex< float > lhs = *this; 
-        std::complex< float > rhs = b; 
-        return ( lhs - rhs ); 
-    } 
-    __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) { 
-        std::complex< float > lhs = *this; 
-        std::complex< float > rhs = b; 
-        return ( lhs * rhs ); 
-    } 
- 
-    __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) { 
-        kmp_cmplx64 t = kmp_cmplx64( *this ) + b; 
-        std::complex< double > d( t ); 
-        std::complex< float > f( d ); 
-        __kmp_cmplx32_t r( f ); 
-        return r; 
-    } 
-    __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) { 
-        kmp_cmplx64 t = kmp_cmplx64( *this ) - b; 
-        std::complex< double > d( t ); 
-        std::complex< float > f( d ); 
-        __kmp_cmplx32_t r( f ); 
-        return r; 
-    } 
-    __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) { 
-        kmp_cmplx64 t = kmp_cmplx64( *this ) * b; 
-        std::complex< double > d( t ); 
-        std::complex< float > f( d ); 
-        __kmp_cmplx32_t r( f ); 
-        return r; 
-    } 
- 
-    void operator /= ( const __kmp_cmplx32_t& rhs ) { 
-        std::complex< float > lhs = *this; 
-        *this = __kmp_lhs_div_rhs( lhs, rhs ); 
-    } 
- 
-    __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) { 
-        std::complex< float > lhs = *this; 
-        return __kmp_lhs_div_rhs( lhs, rhs ); 
-    } 
- 
-    void operator /= ( const kmp_cmplx64& rhs ) { 
-        std::complex< float > lhs = *this; 
-        *this = __kmp_lhs_div_rhs( lhs, rhs ); 
-    } 
- 
-    __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) { 
-        std::complex< float > lhs = *this; 
-        return __kmp_lhs_div_rhs( lhs, rhs ); 
-    } 
-    }; 
-    typedef struct __kmp_cmplx32_t kmp_cmplx32; 
- 
-    // complex10 
-    struct KMP_DO_ALIGN( 16 )  __kmp_cmplx80_t : std::complex< long double > { 
- 
-            __kmp_cmplx80_t() : std::complex< long double > () {} 
- 
-            __kmp_cmplx80_t( const std::complex< long double >& cld ) 
-                : std::complex< long double > ( cld ) {} 
- 
-        void operator /= ( const __kmp_cmplx80_t& rhs ) { 
-        std::complex< long double > lhs = *this; 
-        *this = __kmp_lhs_div_rhs( lhs, rhs ); 
-        } 
- 
-        __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) { 
-        std::complex< long double > lhs = *this; 
-        return __kmp_lhs_div_rhs( lhs, rhs ); 
-        } 
- 
-    }; 
-    typedef KMP_DO_ALIGN( 16 )  struct __kmp_cmplx80_t kmp_cmplx80; 
- 
-    // complex16 
-    #if KMP_HAVE_QUAD 
-    struct __kmp_cmplx128_t : std::complex< _Quad > { 
- 
-            __kmp_cmplx128_t() : std::complex< _Quad > () {} 
- 
-            __kmp_cmplx128_t( const std::complex< _Quad >& cq ) 
-                : std::complex< _Quad > ( cq ) {} 
- 
-        void operator /= ( const __kmp_cmplx128_t& rhs ) { 
-        std::complex< _Quad > lhs = *this; 
-        *this = __kmp_lhs_div_rhs( lhs, rhs ); 
-        } 
- 
-        __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) { 
-        std::complex< _Quad > lhs = *this; 
-        return __kmp_lhs_div_rhs( lhs, rhs ); 
-        } 
- 
-    }; 
-    typedef struct __kmp_cmplx128_t kmp_cmplx128; 
-    #endif /* KMP_HAVE_QUAD */ 
- 
-    #ifdef _DEBUG_TEMPORARILY_UNSET_ 
-        #undef _DEBUG_TEMPORARILY_UNSET_ 
-        // Set it back now 
-        #define _DEBUG 1 
-    #endif 
- 
-#else 
-    // create shortcuts for c99 complex types 
-    typedef float _Complex       kmp_cmplx32; 
-    typedef double _Complex      kmp_cmplx64; 
-    typedef long double _Complex kmp_cmplx80; 
-    #if KMP_HAVE_QUAD 
-    typedef _Quad _Complex       kmp_cmplx128; 
-    #endif 
-#endif 
- 
-// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad 
-// and kmp_cmplx128) on IA-32 architecture. The following aligned structures 
-// are implemented to support the old alignment in 10.1, 11.0, 11.1 and  
-// introduce the new alignment in 12.0. See CQ88405. 
-#if KMP_ARCH_X86 && KMP_HAVE_QUAD 
- 
-    // 4-byte aligned structures for backward compatibility. 
- 
-    #pragma pack( push, 4 ) 
- 
-     
-    struct KMP_DO_ALIGN( 4 ) Quad_a4_t { 
-        _Quad q; 
- 
-        Quad_a4_t(  ) : q(  ) {} 
-        Quad_a4_t( const _Quad & cq ) : q ( cq ) {} 
- 
-        Quad_a4_t operator + ( const Quad_a4_t& b ) { 
-        _Quad lhs = (*this).q; 
-        _Quad rhs = b.q; 
-        return (Quad_a4_t)( lhs + rhs ); 
-    } 
- 
-    Quad_a4_t operator - ( const Quad_a4_t& b ) { 
-        _Quad lhs = (*this).q; 
-        _Quad rhs = b.q; 
-        return (Quad_a4_t)( lhs - rhs ); 
-    } 
-    Quad_a4_t operator * ( const Quad_a4_t& b ) { 
-        _Quad lhs = (*this).q; 
-        _Quad rhs = b.q; 
-        return (Quad_a4_t)( lhs * rhs ); 
-    } 
- 
-    Quad_a4_t operator / ( const Quad_a4_t& b ) { 
-        _Quad lhs = (*this).q; 
-            _Quad rhs = b.q; 
-        return (Quad_a4_t)( lhs / rhs ); 
-    } 
- 
-    }; 
- 
-    struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t { 
-        kmp_cmplx128 q; 
- 
-    kmp_cmplx128_a4_t() : q () {} 
- 
-    kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} 
- 
-        kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a4_t)( lhs + rhs ); 
-    } 
-        kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a4_t)( lhs - rhs ); 
-    } 
-    kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a4_t)( lhs * rhs ); 
-    } 
- 
-    kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a4_t)( lhs / rhs ); 
-    } 
- 
-    }; 
- 
-    #pragma pack( pop ) 
- 
-    // New 16-byte aligned structures for 12.0 compiler. 
-    struct KMP_DO_ALIGN( 16 ) Quad_a16_t { 
-        _Quad q; 
- 
-        Quad_a16_t(  ) : q(  ) {} 
-        Quad_a16_t( const _Quad & cq ) : q ( cq ) {} 
- 
-        Quad_a16_t operator + ( const Quad_a16_t& b ) { 
-        _Quad lhs = (*this).q; 
-        _Quad rhs = b.q; 
-        return (Quad_a16_t)( lhs + rhs ); 
-    } 
- 
-    Quad_a16_t operator - ( const Quad_a16_t& b ) { 
-        _Quad lhs = (*this).q; 
-        _Quad rhs = b.q; 
-        return (Quad_a16_t)( lhs - rhs ); 
-    } 
-    Quad_a16_t operator * ( const Quad_a16_t& b ) { 
-        _Quad lhs = (*this).q; 
-        _Quad rhs = b.q; 
-        return (Quad_a16_t)( lhs * rhs ); 
-    } 
- 
-    Quad_a16_t operator / ( const Quad_a16_t& b ) { 
-        _Quad lhs = (*this).q; 
-            _Quad rhs = b.q; 
-        return (Quad_a16_t)( lhs / rhs ); 
-    } 
-    }; 
- 
-    struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t { 
-        kmp_cmplx128 q; 
- 
-    kmp_cmplx128_a16_t() : q () {} 
- 
-    kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} 
- 
-       kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a16_t)( lhs + rhs ); 
-    } 
-       kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a16_t)( lhs - rhs ); 
-    } 
-    kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a16_t)( lhs * rhs ); 
-    } 
- 
-    kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) { 
-        kmp_cmplx128 lhs = (*this).q; 
-        kmp_cmplx128 rhs = b.q; 
-        return (kmp_cmplx128_a16_t)( lhs / rhs ); 
-    } 
-    }; 
- 
-#endif 
- 
-#if ( KMP_ARCH_X86 ) 
-    #define QUAD_LEGACY Quad_a4_t 
-    #define CPLX128_LEG kmp_cmplx128_a4_t 
-#else 
-    #define QUAD_LEGACY _Quad 
-    #define CPLX128_LEG kmp_cmplx128 
-#endif 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif 
- 
-extern int __kmp_atomic_mode; 
- 
-// 
-// Atomic locks can easily become contended, so we use queuing locks for them. 
-// 
- 
-typedef kmp_queuing_lock_t kmp_atomic_lock_t; 
- 
-static inline void 
-__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) 
-{ 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled &&  
-        ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { 
-        ompt_callbacks.ompt_callback(ompt_event_wait_atomic)( 
-            (ompt_wait_id_t) lck); 
-    } 
-#endif 
- 
-    __kmp_acquire_queuing_lock( lck, gtid ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled &&  
-        ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { 
-        ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( 
-            (ompt_wait_id_t) lck); 
-    } 
-#endif 
-} 
- 
-static inline int 
-__kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    return __kmp_test_queuing_lock( lck, gtid ); 
-} 
- 
-static inline void 
-__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    __kmp_release_queuing_lock( lck, gtid ); 
-#if OMPT_SUPPORT && OMPT_BLAME 
+/*
+ * kmp_atomic.h - ATOMIC header file
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_ATOMIC_H
+#define KMP_ATOMIC_H
+
+#include "kmp_os.h"
+#include "kmp_lock.h"
+
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
+
+// C++ build port.
+// Intel compiler does not support _Complex datatype on win.
+// Intel compiler supports _Complex datatype on lin and mac.
+// On the other side, there is a problem of stack alignment on lin_32 and mac_32
+// if the rhs is cmplx80 or cmplx128 typedef'ed datatype.
+// The decision is: to use compiler supported _Complex type on lin and mac,
+//                  to use typedef'ed types on win.
+// Condition for WIN64 was modified in anticipation of 10.1 build compiler.
+
+#if defined( __cplusplus ) && ( KMP_OS_WINDOWS )
+    // create shortcuts for c99 complex types
+
+    #if (_MSC_VER < 1600) && defined(_DEBUG)
+        // Workaround for the problem of _DebugHeapTag unresolved external.
+        // This problem prevented to use our static debug library for C tests
+        // compiled with /MDd option (the library itself built with /MTd),
+        #undef _DEBUG
+        #define _DEBUG_TEMPORARILY_UNSET_
+    #endif
+
+    #include <complex>
+
+    template< typename type_lhs, typename type_rhs >
+    std::complex< type_lhs > __kmp_lhs_div_rhs(
+                const std::complex< type_lhs >& lhs,
+                const std::complex< type_rhs >& rhs ) {
+    type_lhs a = lhs.real();
+    type_lhs b = lhs.imag();
+    type_rhs c = rhs.real();
+    type_rhs d = rhs.imag();
+    type_rhs den = c*c + d*d;
+    type_rhs r = ( a*c + b*d );
+    type_rhs i = ( b*c - a*d );
+    std::complex< type_lhs > ret( r/den, i/den );
+    return ret;
+    }
+
+    // complex8
+    struct __kmp_cmplx64_t : std::complex< double > {
+
+    __kmp_cmplx64_t() : std::complex< double > () {}
+
+    __kmp_cmplx64_t( const std::complex< double >& cd )
+                : std::complex< double > ( cd ) {}
+
+    void operator /= ( const __kmp_cmplx64_t& rhs ) {
+        std::complex< double > lhs = *this;
+        *this = __kmp_lhs_div_rhs( lhs, rhs );
+    }
+
+    __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) {
+        std::complex< double > lhs = *this;
+        return __kmp_lhs_div_rhs( lhs, rhs );
+    }
+
+    };
+    typedef struct __kmp_cmplx64_t kmp_cmplx64;
+
+    // complex4
+    struct __kmp_cmplx32_t : std::complex< float > {
+
+    __kmp_cmplx32_t() : std::complex< float > () {}
+
+    __kmp_cmplx32_t( const std::complex<float>& cf )
+                : std::complex< float > ( cf ) {}
+
+    __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) {
+        std::complex< float > lhs = *this;
+        std::complex< float > rhs = b;
+        return ( lhs + rhs );
+    }
+    __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) {
+        std::complex< float > lhs = *this;
+        std::complex< float > rhs = b;
+        return ( lhs - rhs );
+    }
+    __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) {
+        std::complex< float > lhs = *this;
+        std::complex< float > rhs = b;
+        return ( lhs * rhs );
+    }
+
+    __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) {
+        kmp_cmplx64 t = kmp_cmplx64( *this ) + b;
+        std::complex< double > d( t );
+        std::complex< float > f( d );
+        __kmp_cmplx32_t r( f );
+        return r;
+    }
+    __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) {
+        kmp_cmplx64 t = kmp_cmplx64( *this ) - b;
+        std::complex< double > d( t );
+        std::complex< float > f( d );
+        __kmp_cmplx32_t r( f );
+        return r;
+    }
+    __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) {
+        kmp_cmplx64 t = kmp_cmplx64( *this ) * b;
+        std::complex< double > d( t );
+        std::complex< float > f( d );
+        __kmp_cmplx32_t r( f );
+        return r;
+    }
+
+    void operator /= ( const __kmp_cmplx32_t& rhs ) {
+        std::complex< float > lhs = *this;
+        *this = __kmp_lhs_div_rhs( lhs, rhs );
+    }
+
+    __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) {
+        std::complex< float > lhs = *this;
+        return __kmp_lhs_div_rhs( lhs, rhs );
+    }
+
+    void operator /= ( const kmp_cmplx64& rhs ) {
+        std::complex< float > lhs = *this;
+        *this = __kmp_lhs_div_rhs( lhs, rhs );
+    }
+
+    __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) {
+        std::complex< float > lhs = *this;
+        return __kmp_lhs_div_rhs( lhs, rhs );
+    }
+    };
+    typedef struct __kmp_cmplx32_t kmp_cmplx32;
+
+    // complex10
+    struct KMP_DO_ALIGN( 16 )  __kmp_cmplx80_t : std::complex< long double > {
+
+            __kmp_cmplx80_t() : std::complex< long double > () {}
+
+            __kmp_cmplx80_t( const std::complex< long double >& cld )
+                : std::complex< long double > ( cld ) {}
+
+        void operator /= ( const __kmp_cmplx80_t& rhs ) {
+        std::complex< long double > lhs = *this;
+        *this = __kmp_lhs_div_rhs( lhs, rhs );
+        }
+
+        __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) {
+        std::complex< long double > lhs = *this;
+        return __kmp_lhs_div_rhs( lhs, rhs );
+        }
+
+    };
+    typedef KMP_DO_ALIGN( 16 )  struct __kmp_cmplx80_t kmp_cmplx80;
+
+    // complex16
+    #if KMP_HAVE_QUAD
+    struct __kmp_cmplx128_t : std::complex< _Quad > {
+
+            __kmp_cmplx128_t() : std::complex< _Quad > () {}
+
+            __kmp_cmplx128_t( const std::complex< _Quad >& cq )
+                : std::complex< _Quad > ( cq ) {}
+
+        void operator /= ( const __kmp_cmplx128_t& rhs ) {
+        std::complex< _Quad > lhs = *this;
+        *this = __kmp_lhs_div_rhs( lhs, rhs );
+        }
+
+        __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) {
+        std::complex< _Quad > lhs = *this;
+        return __kmp_lhs_div_rhs( lhs, rhs );
+        }
+
+    };
+    typedef struct __kmp_cmplx128_t kmp_cmplx128;
+    #endif /* KMP_HAVE_QUAD */
+
+    #ifdef _DEBUG_TEMPORARILY_UNSET_
+        #undef _DEBUG_TEMPORARILY_UNSET_
+        // Set it back now
+        #define _DEBUG 1
+    #endif
+
+#else
+    // create shortcuts for c99 complex types
+    typedef float _Complex       kmp_cmplx32;
+    typedef double _Complex      kmp_cmplx64;
+    typedef long double _Complex kmp_cmplx80;
+    #if KMP_HAVE_QUAD
+    typedef _Quad _Complex       kmp_cmplx128;
+    #endif
+#endif
+
+// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
+// and kmp_cmplx128) on IA-32 architecture. The following aligned structures
+// are implemented to support the old alignment in 10.1, 11.0, 11.1 and 
+// introduce the new alignment in 12.0. See CQ88405.
+#if KMP_ARCH_X86 && KMP_HAVE_QUAD
+
+    // 4-byte aligned structures for backward compatibility.
+
+    #pragma pack( push, 4 )
+
+    
+    struct KMP_DO_ALIGN( 4 ) Quad_a4_t {
+        _Quad q;
+
+        Quad_a4_t(  ) : q(  ) {}
+        Quad_a4_t( const _Quad & cq ) : q ( cq ) {}
+
+        Quad_a4_t operator + ( const Quad_a4_t& b ) {
+        _Quad lhs = (*this).q;
+        _Quad rhs = b.q;
+        return (Quad_a4_t)( lhs + rhs );
+    }
+
+    Quad_a4_t operator - ( const Quad_a4_t& b ) {
+        _Quad lhs = (*this).q;
+        _Quad rhs = b.q;
+        return (Quad_a4_t)( lhs - rhs );
+    }
+    Quad_a4_t operator * ( const Quad_a4_t& b ) {
+        _Quad lhs = (*this).q;
+        _Quad rhs = b.q;
+        return (Quad_a4_t)( lhs * rhs );
+    }
+
+    Quad_a4_t operator / ( const Quad_a4_t& b ) {
+        _Quad lhs = (*this).q;
+            _Quad rhs = b.q;
+        return (Quad_a4_t)( lhs / rhs );
+    }
+
+    };
+
+    struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t {
+        kmp_cmplx128 q;
+
+    kmp_cmplx128_a4_t() : q () {}
+
+    kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {}
+
+        kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a4_t)( lhs + rhs );
+    }
+        kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a4_t)( lhs - rhs );
+    }
+    kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a4_t)( lhs * rhs );
+    }
+
+    kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a4_t)( lhs / rhs );
+    }
+
+    };
+
+    #pragma pack( pop )
+
+    // New 16-byte aligned structures for 12.0 compiler.
+    struct KMP_DO_ALIGN( 16 ) Quad_a16_t {
+        _Quad q;
+
+        Quad_a16_t(  ) : q(  ) {}
+        Quad_a16_t( const _Quad & cq ) : q ( cq ) {}
+
+        Quad_a16_t operator + ( const Quad_a16_t& b ) {
+        _Quad lhs = (*this).q;
+        _Quad rhs = b.q;
+        return (Quad_a16_t)( lhs + rhs );
+    }
+
+    Quad_a16_t operator - ( const Quad_a16_t& b ) {
+        _Quad lhs = (*this).q;
+        _Quad rhs = b.q;
+        return (Quad_a16_t)( lhs - rhs );
+    }
+    Quad_a16_t operator * ( const Quad_a16_t& b ) {
+        _Quad lhs = (*this).q;
+        _Quad rhs = b.q;
+        return (Quad_a16_t)( lhs * rhs );
+    }
+
+    Quad_a16_t operator / ( const Quad_a16_t& b ) {
+        _Quad lhs = (*this).q;
+            _Quad rhs = b.q;
+        return (Quad_a16_t)( lhs / rhs );
+    }
+    };
+
+    struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t {
+        kmp_cmplx128 q;
+
+    kmp_cmplx128_a16_t() : q () {}
+
+    kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {}
+
+       kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a16_t)( lhs + rhs );
+    }
+       kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a16_t)( lhs - rhs );
+    }
+    kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a16_t)( lhs * rhs );
+    }
+
+    kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) {
+        kmp_cmplx128 lhs = (*this).q;
+        kmp_cmplx128 rhs = b.q;
+        return (kmp_cmplx128_a16_t)( lhs / rhs );
+    }
+    };
+
+#endif
+
+#if ( KMP_ARCH_X86 )
+    #define QUAD_LEGACY Quad_a4_t
+    #define CPLX128_LEG kmp_cmplx128_a4_t
+#else
+    #define QUAD_LEGACY _Quad
+    #define CPLX128_LEG kmp_cmplx128
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+extern int __kmp_atomic_mode;
+
+//
+// Atomic locks can easily become contended, so we use queuing locks for them.
+//
+
+typedef kmp_queuing_lock_t kmp_atomic_lock_t;
+
+static inline void
+__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
+{
+#if OMPT_SUPPORT && OMPT_TRACE
     if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { 
-        ompt_callbacks.ompt_callback(ompt_event_release_atomic)( 
-            (ompt_wait_id_t) lck); 
-  } 
-#endif 
-} 
- 
-static inline void 
-__kmp_init_atomic_lock( kmp_atomic_lock_t *lck ) 
-{ 
-    __kmp_init_queuing_lock( lck ); 
-} 
- 
-static inline void 
-__kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck ) 
-{ 
-    __kmp_destroy_queuing_lock( lck ); 
-} 
- 
-// Global Locks 
- 
-extern kmp_atomic_lock_t __kmp_atomic_lock;    /* Control access to all user coded atomics in Gnu compat mode   */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_1i;  /* Control access to all user coded atomics for 1-byte fixed data types */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_2i;  /* Control access to all user coded atomics for 2-byte fixed data types */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_4i;  /* Control access to all user coded atomics for 4-byte fixed data types */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_4r;  /* Control access to all user coded atomics for kmp_real32 data type    */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_8i;  /* Control access to all user coded atomics for 8-byte fixed data types */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_8r;  /* Control access to all user coded atomics for kmp_real64 data type    */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_8c;  /* Control access to all user coded atomics for complex byte data type  */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type   */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type         */ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ 
-extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ 
- 
-// 
-//  Below routines for atomic UPDATE are listed 
-// 
- 
-// 1-byte 
-void __kmpc_atomic_fixed1_add(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_div(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); 
-void __kmpc_atomic_fixed1_mul(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_orb(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_shl(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_shr(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); 
-void __kmpc_atomic_fixed1_sub(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_xor(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-// 2-byte 
-void __kmpc_atomic_fixed2_add(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_div(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); 
-void __kmpc_atomic_fixed2_mul(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_orb(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_shl(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_shr(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); 
-void __kmpc_atomic_fixed2_sub(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_xor(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-// 4-byte add / sub fixed 
-void __kmpc_atomic_fixed4_add(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_sub(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-// 4-byte add / sub float 
-void __kmpc_atomic_float4_add(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); 
-void __kmpc_atomic_float4_sub(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); 
-// 8-byte add / sub fixed 
-void __kmpc_atomic_fixed8_add(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_sub(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-// 8-byte add / sub float 
-void __kmpc_atomic_float8_add(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float8_sub(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); 
-// 4-byte fixed 
-void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_div(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); 
-void __kmpc_atomic_fixed4_mul(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_orb(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_shl(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_shr(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); 
-void __kmpc_atomic_fixed4_xor(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-// 8-byte fixed 
-void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_div(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); 
-void __kmpc_atomic_fixed8_mul(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_orb(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_shl(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_shr(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); 
-void __kmpc_atomic_fixed8_xor(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-// 4-byte float 
-void __kmpc_atomic_float4_div(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); 
-void __kmpc_atomic_float4_mul(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); 
-// 8-byte float 
-void __kmpc_atomic_float8_div(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float8_mul(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); 
-// 1-, 2-, 4-, 8-byte logical (&&, ||) 
-void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_orl(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_orl(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_orl(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_orl(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-// MIN / MAX 
-void __kmpc_atomic_fixed1_max(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_min(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed2_max(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_min(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed4_max(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_min(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed8_max(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_min(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_float4_max(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); 
-void __kmpc_atomic_float4_min(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); 
-void __kmpc_atomic_float8_max(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float8_min(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only 
-    void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-#endif 
-#endif 
-// .NEQV. (same as xor) 
-void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-// .EQV. (same as ~xor) 
-void __kmpc_atomic_fixed1_eqv(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed2_eqv(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed4_eqv(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed8_eqv(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-// long double type 
-void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-// _Quad type 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary 
-    void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-#endif 
-#endif 
-// routines for complex types 
-void __kmpc_atomic_cmplx4_add(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx4_sub(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx4_mul(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx4_div(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx8_add(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx8_sub(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx8_mul(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx8_div(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary 
-    void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-    void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-    void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-    void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-#endif 
-#endif 
- 
-#if OMP_40_ENABLED 
- 
-// OpenMP 4.0: x = expr binop x for non-commutative operations. 
-// Supported only on IA-32 architecture and Intel(R) 64 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-void __kmpc_atomic_fixed1_sub_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_div_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); 
-void __kmpc_atomic_fixed1_shl_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1_shr_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs ); 
-void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); 
-void __kmpc_atomic_fixed2_sub_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_div_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); 
-void __kmpc_atomic_fixed2_shl_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2_shr_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs ); 
-void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); 
-void __kmpc_atomic_fixed4_sub_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_div_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); 
-void __kmpc_atomic_fixed4_shl_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4_shr_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); 
-void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); 
-void __kmpc_atomic_fixed8_sub_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_div_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); 
-void __kmpc_atomic_fixed8_shl_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8_shr_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); 
-void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); 
-void __kmpc_atomic_float4_sub_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs ); 
-void __kmpc_atomic_float4_div_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs ); 
-void __kmpc_atomic_float8_sub_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs ); 
-void __kmpc_atomic_float8_div_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs ); 
-void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-#endif 
-void __kmpc_atomic_cmplx4_sub_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx4_div_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx8_sub_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx8_div_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary 
-    void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-    void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-#endif 
-#endif // KMP_HAVE_QUAD 
- 
-#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-#endif //OMP_40_ENABLED 
- 
-// routines for mixed types 
- 
-// RHS=float8 
-void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); 
-void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); 
- 
-// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_fixed1_add_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed1_sub_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed1_mul_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed1_div_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); 
- 
-void __kmpc_atomic_fixed2_add_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed2_sub_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed2_mul_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed2_div_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); 
- 
-void __kmpc_atomic_fixed4_add_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed4_sub_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed4_mul_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed4_div_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); 
- 
-void __kmpc_atomic_fixed8_add_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed8_sub_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed8_mul_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed8_div_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); 
- 
-void __kmpc_atomic_float4_add_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_float4_sub_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_float4_mul_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); 
-void __kmpc_atomic_float4_div_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); 
- 
-void __kmpc_atomic_float8_add_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_float8_sub_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_float8_mul_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); 
-void __kmpc_atomic_float8_div_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); 
- 
-void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); 
-void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); 
-void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); 
-void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); 
-#endif // KMP_HAVE_QUAD 
- 
-// RHS=cmplx8 
-void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); 
- 
-// generic atomic routines 
-void __kmpc_atomic_1(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_2(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_4(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_8(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
-void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); 
- 
-// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-// 
-//  Below routines for atomic READ are listed 
-// 
- 
-char         __kmpc_atomic_fixed1_rd(  ident_t *id_ref, int gtid, char        * loc ); 
-short        __kmpc_atomic_fixed2_rd(  ident_t *id_ref, int gtid, short       * loc ); 
-kmp_int32    __kmpc_atomic_fixed4_rd(  ident_t *id_ref, int gtid, kmp_int32   * loc ); 
-kmp_int64    __kmpc_atomic_fixed8_rd(  ident_t *id_ref, int gtid, kmp_int64   * loc ); 
-kmp_real32   __kmpc_atomic_float4_rd(  ident_t *id_ref, int gtid, kmp_real32  * loc ); 
-kmp_real64   __kmpc_atomic_float8_rd(  ident_t *id_ref, int gtid, kmp_real64  * loc ); 
-long double  __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc ); 
-#if KMP_HAVE_QUAD 
-QUAD_LEGACY  __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc ); 
-#endif 
-// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be 
-// returned through an additional parameter 
-#if ( KMP_OS_WINDOWS ) 
-    void  __kmpc_atomic_cmplx4_rd(  kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); 
-#else 
-    kmp_cmplx32  __kmpc_atomic_cmplx4_rd(  ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); 
-#endif 
-kmp_cmplx64  __kmpc_atomic_cmplx8_rd(  ident_t *id_ref, int gtid, kmp_cmplx64 * loc ); 
-kmp_cmplx80  __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc ); 
-#if KMP_HAVE_QUAD 
-CPLX128_LEG  __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc ); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary 
-    Quad_a16_t         __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t         * loc ); 
-    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc ); 
-#endif 
-#endif 
- 
- 
-// 
-//  Below routines for atomic WRITE are listed 
-// 
- 
-void __kmpc_atomic_fixed1_wr(  ident_t *id_ref, int gtid, char        * lhs, char        rhs ); 
-void __kmpc_atomic_fixed2_wr(  ident_t *id_ref, int gtid, short       * lhs, short       rhs ); 
-void __kmpc_atomic_fixed4_wr(  ident_t *id_ref, int gtid, kmp_int32   * lhs, kmp_int32   rhs ); 
-void __kmpc_atomic_fixed8_wr(  ident_t *id_ref, int gtid, kmp_int64   * lhs, kmp_int64   rhs ); 
-void __kmpc_atomic_float4_wr(  ident_t *id_ref, int gtid, kmp_real32  * lhs, kmp_real32  rhs ); 
-void __kmpc_atomic_float8_wr(  ident_t *id_ref, int gtid, kmp_real64  * lhs, kmp_real64  rhs ); 
-void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-#endif 
-void __kmpc_atomic_cmplx4_wr(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
-void __kmpc_atomic_cmplx8_wr(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-#if KMP_HAVE_QUAD 
-void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary 
-    void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t         * lhs, Quad_a16_t         rhs ); 
-    void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-#endif 
-#endif 
- 
-// 
-//  Below routines for atomic CAPTURE are listed 
-// 
- 
-// 1-byte 
-char __kmpc_atomic_fixed1_add_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-char __kmpc_atomic_fixed1_div_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); 
-char __kmpc_atomic_fixed1_mul_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-char __kmpc_atomic_fixed1_orb_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-char __kmpc_atomic_fixed1_shl_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-char __kmpc_atomic_fixed1_shr_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); 
-char __kmpc_atomic_fixed1_sub_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-char __kmpc_atomic_fixed1_xor_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); 
-// 2-byte 
-short __kmpc_atomic_fixed2_add_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-short __kmpc_atomic_fixed2_div_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); 
-short __kmpc_atomic_fixed2_mul_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-short __kmpc_atomic_fixed2_orb_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-short __kmpc_atomic_fixed2_shl_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-short __kmpc_atomic_fixed2_shr_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); 
-short __kmpc_atomic_fixed2_sub_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-short __kmpc_atomic_fixed2_xor_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); 
-// 4-byte add / sub fixed 
-kmp_int32  __kmpc_atomic_fixed4_add_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32 rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_sub_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32 rhs, int flag); 
-// 4-byte add / sub float 
-kmp_real32 __kmpc_atomic_float4_add_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); 
-kmp_real32 __kmpc_atomic_float4_sub_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); 
-// 8-byte add / sub fixed 
-kmp_int64  __kmpc_atomic_fixed8_add_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64 rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_sub_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64 rhs, int flag); 
-// 8-byte add / sub float 
-kmp_real64 __kmpc_atomic_float8_add_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); 
-kmp_real64 __kmpc_atomic_float8_sub_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); 
-// 4-byte fixed 
-kmp_int32  __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_div_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_mul_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_orb_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_shl_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_shr_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); 
-kmp_int32  __kmpc_atomic_fixed4_xor_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag); 
-// 8-byte fixed 
-kmp_int64  __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_div_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_mul_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_orb_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_shl_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_shr_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); 
-kmp_int64  __kmpc_atomic_fixed8_xor_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag); 
-// 4-byte float 
-kmp_real32 __kmpc_atomic_float4_div_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); 
-kmp_real32 __kmpc_atomic_float4_mul_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); 
-// 8-byte float 
-kmp_real64 __kmpc_atomic_float8_div_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); 
-kmp_real64 __kmpc_atomic_float8_mul_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); 
-// 1-, 2-, 4-, 8-byte logical (&&, ||) 
-char      __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag); 
-char      __kmpc_atomic_fixed1_orl_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag); 
-short     __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag); 
-short     __kmpc_atomic_fixed2_orl_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag); 
-kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); 
-kmp_int32 __kmpc_atomic_fixed4_orl_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); 
-kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); 
-kmp_int64 __kmpc_atomic_fixed8_orl_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); 
-// MIN / MAX 
-char        __kmpc_atomic_fixed1_max_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag); 
-char        __kmpc_atomic_fixed1_min_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag); 
-short       __kmpc_atomic_fixed2_max_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag); 
-short       __kmpc_atomic_fixed2_min_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag); 
-kmp_int32   __kmpc_atomic_fixed4_max_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); 
-kmp_int32   __kmpc_atomic_fixed4_min_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); 
-kmp_int64   __kmpc_atomic_fixed8_max_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); 
-kmp_int64   __kmpc_atomic_fixed8_min_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); 
-kmp_real32  __kmpc_atomic_float4_max_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); 
-kmp_real32  __kmpc_atomic_float4_min_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); 
-kmp_real64  __kmpc_atomic_float8_max_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); 
-kmp_real64  __kmpc_atomic_float8_min_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); 
-#if KMP_HAVE_QUAD 
-QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); 
-QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); 
-#endif 
-// .NEQV. (same as xor) 
-char      __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag); 
-short     __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag); 
-kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); 
-kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); 
-// .EQV. (same as ~xor) 
-char      __kmpc_atomic_fixed1_eqv_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag); 
-short     __kmpc_atomic_fixed2_eqv_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag); 
-kmp_int32 __kmpc_atomic_fixed4_eqv_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); 
-kmp_int64 __kmpc_atomic_fixed8_eqv_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); 
-// long double type 
-long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); 
-long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); 
-long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); 
-long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); 
-#if KMP_HAVE_QUAD 
-// _Quad type 
-QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); 
-QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); 
-QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); 
-QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); 
-#endif 
-// routines for complex types 
-// Workaround for cmplx4 routines - return void; captured value is returned via the argument 
-void __kmpc_atomic_cmplx4_add_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); 
-void __kmpc_atomic_cmplx4_sub_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); 
-void __kmpc_atomic_cmplx4_mul_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); 
-void __kmpc_atomic_cmplx4_div_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); 
- 
-kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); 
-kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); 
-kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); 
-kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); 
-kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); 
-kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); 
-kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); 
-kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); 
-#if KMP_HAVE_QUAD 
-CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); 
-CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); 
-CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); 
-CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); 
-#if ( KMP_ARCH_X86 ) 
-    // Routines with 16-byte arguments aligned to 16-byte boundary 
-    Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); 
-    Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); 
-    Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); 
-    Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); 
-    Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); 
-    Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); 
-    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); 
-    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); 
-    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); 
-    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); 
-#endif 
-#endif 
- 
-void __kmpc_atomic_start(void); 
-void __kmpc_atomic_end(void); 
- 
-#if OMP_40_ENABLED 
- 
-// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; }  for non-commutative operations. 
- 
-char	       	__kmpc_atomic_fixed1_sub_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); 
-char		__kmpc_atomic_fixed1_div_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); 
-unsigned char 	__kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); 
-char 		__kmpc_atomic_fixed1_shl_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs , int flag); 
-char		__kmpc_atomic_fixed1_shr_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); 
-unsigned char 	__kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); 
-short 		__kmpc_atomic_fixed2_sub_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); 
-short 		__kmpc_atomic_fixed2_div_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); 
-unsigned short 	__kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); 
-short 		__kmpc_atomic_fixed2_shl_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); 
-short 		__kmpc_atomic_fixed2_shr_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); 
-unsigned short 	__kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); 
-kmp_int32 	__kmpc_atomic_fixed4_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag ); 
-kmp_int32 	__kmpc_atomic_fixed4_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag ); 
-kmp_uint32 	__kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); 
-kmp_int32 	__kmpc_atomic_fixed4_shl_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag ); 
-kmp_int32 	__kmpc_atomic_fixed4_shr_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag ); 
-kmp_uint32 	__kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); 
-kmp_int64 	__kmpc_atomic_fixed8_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag ); 
-kmp_int64 	__kmpc_atomic_fixed8_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag ); 
-kmp_uint64      __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); 
-kmp_int64 	__kmpc_atomic_fixed8_shl_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag ); 
-kmp_int64 	__kmpc_atomic_fixed8_shr_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag ); 
-kmp_uint64      __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); 
-float 		__kmpc_atomic_float4_sub_cpt_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); 
-float 		__kmpc_atomic_float4_div_cpt_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); 
-double 		__kmpc_atomic_float8_sub_cpt_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); 
-double 		__kmpc_atomic_float8_div_cpt_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); 
-long double 	__kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); 
-long double 	__kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); 
-#if KMP_HAVE_QUAD 
-QUAD_LEGACY	__kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); 
-QUAD_LEGACY	__kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); 
-#endif 
-// Workaround for cmplx4 routines - return void; captured value is returned via the argument 
-void     	__kmpc_atomic_cmplx4_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 
-void 	        __kmpc_atomic_cmplx4_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 
-kmp_cmplx64 	__kmpc_atomic_cmplx8_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); 
-kmp_cmplx64 	__kmpc_atomic_cmplx8_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); 
-kmp_cmplx80 	__kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); 
-kmp_cmplx80 	__kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); 
-#if KMP_HAVE_QUAD 
-CPLX128_LEG  	__kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); 
-CPLX128_LEG  	__kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); 
-#if ( KMP_ARCH_X86 ) 
-    Quad_a16_t 		__kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); 
-    Quad_a16_t		__kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); 
-    kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); 
-    kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); 
-#endif 
-#endif 
- 
-//   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 
-char 		__kmpc_atomic_fixed1_swp(  ident_t *id_ref, int gtid, char        * lhs, char        rhs ); 
-short           __kmpc_atomic_fixed2_swp(  ident_t *id_ref, int gtid, short       * lhs, short       rhs ); 
-kmp_int32       __kmpc_atomic_fixed4_swp(  ident_t *id_ref, int gtid, kmp_int32   * lhs, kmp_int32   rhs ); 
-kmp_int64 	__kmpc_atomic_fixed8_swp(  ident_t *id_ref, int gtid, kmp_int64   * lhs, kmp_int64   rhs ); 
-float 		__kmpc_atomic_float4_swp(  ident_t *id_ref, int gtid, float       * lhs, float  rhs ); 
-double		__kmpc_atomic_float8_swp(  ident_t *id_ref, int gtid, double      * lhs, double  rhs ); 
-long double	__kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); 
-#if KMP_HAVE_QUAD 
-QUAD_LEGACY    	__kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); 
-#endif 
-// !!! TODO: check if we need a workaround here 
-void        	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out ); 
-//kmp_cmplx32   	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); 
- 
-kmp_cmplx64 	__kmpc_atomic_cmplx8_swp(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); 
-kmp_cmplx80	__kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); 
-#if KMP_HAVE_QUAD 
-CPLX128_LEG 	__kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); 
-#if ( KMP_ARCH_X86 ) 
-    Quad_a16_t		__kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); 
-    kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); 
-#endif 
-#endif 
- 
-// End of OpenMP 4.0 capture 
- 
-#endif //OMP_40_ENABLED 
- 
-#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif 
- 
-#endif /* KMP_ATOMIC_H */ 
- 
-// end of file 
+        ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) {
+        ompt_callbacks.ompt_callback(ompt_event_wait_atomic)(
+            (ompt_wait_id_t) lck);
+    }
+#endif
+
+    __kmp_acquire_queuing_lock( lck, gtid );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled && 
+        ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) {
+        ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)(
+            (ompt_wait_id_t) lck);
+    }
+#endif
+}
+
+static inline int
+__kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
+{
+    return __kmp_test_queuing_lock( lck, gtid );
+}
+
+static inline void
+__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
+{
+    __kmp_release_queuing_lock( lck, gtid );
+#if OMPT_SUPPORT && OMPT_BLAME
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_release_atomic)) {
+        ompt_callbacks.ompt_callback(ompt_event_release_atomic)(
+            (ompt_wait_id_t) lck);
+  }
+#endif
+}
+
+static inline void
+__kmp_init_atomic_lock( kmp_atomic_lock_t *lck )
+{
+    __kmp_init_queuing_lock( lck );
+}
+
+static inline void
+__kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck )
+{
+    __kmp_destroy_queuing_lock( lck );
+}
+
+// Global Locks
+
+extern kmp_atomic_lock_t __kmp_atomic_lock;    /* Control access to all user coded atomics in Gnu compat mode   */
+extern kmp_atomic_lock_t __kmp_atomic_lock_1i;  /* Control access to all user coded atomics for 1-byte fixed data types */
+extern kmp_atomic_lock_t __kmp_atomic_lock_2i;  /* Control access to all user coded atomics for 2-byte fixed data types */
+extern kmp_atomic_lock_t __kmp_atomic_lock_4i;  /* Control access to all user coded atomics for 4-byte fixed data types */
+extern kmp_atomic_lock_t __kmp_atomic_lock_4r;  /* Control access to all user coded atomics for kmp_real32 data type    */
+extern kmp_atomic_lock_t __kmp_atomic_lock_8i;  /* Control access to all user coded atomics for 8-byte fixed data types */
+extern kmp_atomic_lock_t __kmp_atomic_lock_8r;  /* Control access to all user coded atomics for kmp_real64 data type    */
+extern kmp_atomic_lock_t __kmp_atomic_lock_8c;  /* Control access to all user coded atomics for complex byte data type  */
+extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type   */
+extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type         */
+extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
+extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
+extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
+
+//
+//  Below routines for atomic UPDATE are listed
+//
+
+// 1-byte
+void __kmpc_atomic_fixed1_add(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_div(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
+void __kmpc_atomic_fixed1_mul(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_orb(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_shl(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_shr(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
+void __kmpc_atomic_fixed1_sub(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_xor(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+// 2-byte
+void __kmpc_atomic_fixed2_add(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_div(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
+void __kmpc_atomic_fixed2_mul(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_orb(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_shl(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_shr(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
+void __kmpc_atomic_fixed2_sub(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_xor(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+// 4-byte add / sub fixed
+void __kmpc_atomic_fixed4_add(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_sub(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+// 4-byte add / sub float
+void __kmpc_atomic_float4_add(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
+void __kmpc_atomic_float4_sub(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
+// 8-byte add / sub fixed
+void __kmpc_atomic_fixed8_add(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_sub(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+// 8-byte add / sub float
+void __kmpc_atomic_float8_add(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float8_sub(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+// 4-byte fixed
+void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_div(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
+void __kmpc_atomic_fixed4_mul(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_orb(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_shl(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_shr(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
+void __kmpc_atomic_fixed4_xor(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+// 8-byte fixed
+void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_div(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
+void __kmpc_atomic_fixed8_mul(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_orb(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_shl(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_shr(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
+void __kmpc_atomic_fixed8_xor(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+// 4-byte float
+void __kmpc_atomic_float4_div(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
+void __kmpc_atomic_float4_mul(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
+// 8-byte float
+void __kmpc_atomic_float8_div(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float8_mul(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+// 1-, 2-, 4-, 8-byte logical (&&, ||)
+void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_orl(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_orl(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_orl(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_orl(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+// MIN / MAX
+void __kmpc_atomic_fixed1_max(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_min(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed2_max(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_min(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed4_max(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_min(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed8_max(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_min(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_float4_max(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
+void __kmpc_atomic_float4_min(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
+void __kmpc_atomic_float8_max(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float8_min(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only
+    void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+#endif
+#endif
+// .NEQV. (same as xor)
+void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+// .EQV. (same as ~xor)
+void __kmpc_atomic_fixed1_eqv(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed2_eqv(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed4_eqv(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed8_eqv(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+// long double type
+void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+// _Quad type
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary
+    void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+#endif
+#endif
+// routines for complex types
+void __kmpc_atomic_cmplx4_add(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx4_sub(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx4_mul(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx4_div(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx8_add(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx8_sub(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx8_mul(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx8_div(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary
+    void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+    void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+    void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+    void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+#endif
+#endif
+
+#if OMP_40_ENABLED
+
+// OpenMP 4.0: x = expr binop x for non-commutative operations.
+// Supported only on IA-32 architecture and Intel(R) 64
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+void __kmpc_atomic_fixed1_sub_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_div_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
+void __kmpc_atomic_fixed1_shl_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1_shr_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs );
+void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
+void __kmpc_atomic_fixed2_sub_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_div_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
+void __kmpc_atomic_fixed2_shl_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2_shr_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs );
+void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
+void __kmpc_atomic_fixed4_sub_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_div_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
+void __kmpc_atomic_fixed4_shl_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4_shr_rev(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
+void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
+void __kmpc_atomic_fixed8_sub_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_div_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
+void __kmpc_atomic_fixed8_shl_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8_shr_rev(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
+void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
+void __kmpc_atomic_float4_sub_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs );
+void __kmpc_atomic_float4_div_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs );
+void __kmpc_atomic_float8_sub_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs );
+void __kmpc_atomic_float8_div_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs );
+void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
+void __kmpc_atomic_cmplx4_sub_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx4_div_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx8_sub_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx8_div_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary
+    void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+    void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+#endif
+#endif // KMP_HAVE_QUAD
+
+#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+#endif //OMP_40_ENABLED
+
+// routines for mixed types
+
+// RHS=float8
+void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
+void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
+
+// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_fixed1_add_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
+void __kmpc_atomic_fixed1_sub_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
+void __kmpc_atomic_fixed1_mul_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
+void __kmpc_atomic_fixed1_div_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
+void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs );
+
+void __kmpc_atomic_fixed2_add_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
+void __kmpc_atomic_fixed2_sub_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
+void __kmpc_atomic_fixed2_mul_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
+void __kmpc_atomic_fixed2_div_fp(  ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
+void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs );
+
+void __kmpc_atomic_fixed4_add_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed4_sub_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed4_mul_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed4_div_fp(  ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs );
+
+void __kmpc_atomic_fixed8_add_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed8_sub_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed8_mul_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed8_div_fp(  ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
+void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs );
+
+void __kmpc_atomic_float4_add_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
+void __kmpc_atomic_float4_sub_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
+void __kmpc_atomic_float4_mul_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
+void __kmpc_atomic_float4_div_fp(  ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
+
+void __kmpc_atomic_float8_add_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
+void __kmpc_atomic_float8_sub_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
+void __kmpc_atomic_float8_mul_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
+void __kmpc_atomic_float8_div_fp(  ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
+
+void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
+void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
+void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
+void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
+#endif // KMP_HAVE_QUAD
+
+// RHS=cmplx8
+void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
+
+// generic atomic routines
+void __kmpc_atomic_1(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_2(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_4(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_8(  ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
+
+// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+//
+//  Below routines for atomic READ are listed
+//
+
+char         __kmpc_atomic_fixed1_rd(  ident_t *id_ref, int gtid, char        * loc );
+short        __kmpc_atomic_fixed2_rd(  ident_t *id_ref, int gtid, short       * loc );
+kmp_int32    __kmpc_atomic_fixed4_rd(  ident_t *id_ref, int gtid, kmp_int32   * loc );
+kmp_int64    __kmpc_atomic_fixed8_rd(  ident_t *id_ref, int gtid, kmp_int64   * loc );
+kmp_real32   __kmpc_atomic_float4_rd(  ident_t *id_ref, int gtid, kmp_real32  * loc );
+kmp_real64   __kmpc_atomic_float8_rd(  ident_t *id_ref, int gtid, kmp_real64  * loc );
+long double  __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc );
+#if KMP_HAVE_QUAD
+QUAD_LEGACY  __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc );
+#endif
+// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be
+// returned through an additional parameter
+#if ( KMP_OS_WINDOWS )
+    void  __kmpc_atomic_cmplx4_rd(  kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc );
+#else
+    kmp_cmplx32  __kmpc_atomic_cmplx4_rd(  ident_t *id_ref, int gtid, kmp_cmplx32 * loc );
+#endif
+kmp_cmplx64  __kmpc_atomic_cmplx8_rd(  ident_t *id_ref, int gtid, kmp_cmplx64 * loc );
+kmp_cmplx80  __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc );
+#if KMP_HAVE_QUAD
+CPLX128_LEG  __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc );
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary
+    Quad_a16_t         __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t         * loc );
+    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc );
+#endif
+#endif
+
+
+//
+//  Below routines for atomic WRITE are listed
+//
+
+void __kmpc_atomic_fixed1_wr(  ident_t *id_ref, int gtid, char        * lhs, char        rhs );
+void __kmpc_atomic_fixed2_wr(  ident_t *id_ref, int gtid, short       * lhs, short       rhs );
+void __kmpc_atomic_fixed4_wr(  ident_t *id_ref, int gtid, kmp_int32   * lhs, kmp_int32   rhs );
+void __kmpc_atomic_fixed8_wr(  ident_t *id_ref, int gtid, kmp_int64   * lhs, kmp_int64   rhs );
+void __kmpc_atomic_float4_wr(  ident_t *id_ref, int gtid, kmp_real32  * lhs, kmp_real32  rhs );
+void __kmpc_atomic_float8_wr(  ident_t *id_ref, int gtid, kmp_real64  * lhs, kmp_real64  rhs );
+void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
+void __kmpc_atomic_cmplx4_wr(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+void __kmpc_atomic_cmplx8_wr(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
+void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary
+    void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t         * lhs, Quad_a16_t         rhs );
+    void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+#endif
+#endif
+
+//
+//  Below routines for atomic CAPTURE are listed
+//
+
+// 1-byte
+char __kmpc_atomic_fixed1_add_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+char __kmpc_atomic_fixed1_div_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag);
+char __kmpc_atomic_fixed1_mul_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+char __kmpc_atomic_fixed1_orb_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+char __kmpc_atomic_fixed1_shl_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+char __kmpc_atomic_fixed1_shr_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag);
+char __kmpc_atomic_fixed1_sub_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+char __kmpc_atomic_fixed1_xor_cpt(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
+// 2-byte
+short __kmpc_atomic_fixed2_add_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+short __kmpc_atomic_fixed2_div_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag);
+short __kmpc_atomic_fixed2_mul_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+short __kmpc_atomic_fixed2_orb_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+short __kmpc_atomic_fixed2_shl_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+short __kmpc_atomic_fixed2_shr_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag);
+short __kmpc_atomic_fixed2_sub_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+short __kmpc_atomic_fixed2_xor_cpt(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
+// 4-byte add / sub fixed
+kmp_int32  __kmpc_atomic_fixed4_add_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32 rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_sub_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32 rhs, int flag);
+// 4-byte add / sub float
+kmp_real32 __kmpc_atomic_float4_add_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
+kmp_real32 __kmpc_atomic_float4_sub_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
+// 8-byte add / sub fixed
+kmp_int64  __kmpc_atomic_fixed8_add_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64 rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_sub_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64 rhs, int flag);
+// 8-byte add / sub float
+kmp_real64 __kmpc_atomic_float8_add_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+kmp_real64 __kmpc_atomic_float8_sub_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+// 4-byte fixed
+kmp_int32  __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_div_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_mul_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_orb_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_shl_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_shr_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag);
+kmp_int32  __kmpc_atomic_fixed4_xor_cpt(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag);
+// 8-byte fixed
+kmp_int64  __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_div_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_mul_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_orb_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_shl_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_shr_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag);
+kmp_int64  __kmpc_atomic_fixed8_xor_cpt(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag);
+// 4-byte float
+kmp_real32 __kmpc_atomic_float4_div_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
+kmp_real32 __kmpc_atomic_float4_mul_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
+// 8-byte float
+kmp_real64 __kmpc_atomic_float8_div_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+kmp_real64 __kmpc_atomic_float8_mul_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+// 1-, 2-, 4-, 8-byte logical (&&, ||)
+char      __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
+char      __kmpc_atomic_fixed1_orl_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
+short     __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
+short     __kmpc_atomic_fixed2_orl_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
+kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
+kmp_int32 __kmpc_atomic_fixed4_orl_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
+kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
+kmp_int64 __kmpc_atomic_fixed8_orl_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
+// MIN / MAX
+char        __kmpc_atomic_fixed1_max_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
+char        __kmpc_atomic_fixed1_min_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
+short       __kmpc_atomic_fixed2_max_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
+short       __kmpc_atomic_fixed2_min_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
+kmp_int32   __kmpc_atomic_fixed4_max_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
+kmp_int32   __kmpc_atomic_fixed4_min_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
+kmp_int64   __kmpc_atomic_fixed8_max_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
+kmp_int64   __kmpc_atomic_fixed8_min_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
+kmp_real32  __kmpc_atomic_float4_max_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
+kmp_real32  __kmpc_atomic_float4_min_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
+kmp_real64  __kmpc_atomic_float8_max_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+kmp_real64  __kmpc_atomic_float8_min_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+#if KMP_HAVE_QUAD
+QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+#endif
+// .NEQV. (same as xor)
+char      __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
+short     __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
+kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
+kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
+// .EQV. (same as ~xor)
+char      __kmpc_atomic_fixed1_eqv_cpt(  ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
+short     __kmpc_atomic_fixed2_eqv_cpt(  ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
+kmp_int32 __kmpc_atomic_fixed4_eqv_cpt(  ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
+kmp_int64 __kmpc_atomic_fixed8_eqv_cpt(  ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
+// long double type
+long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
+long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
+long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
+long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
+#if KMP_HAVE_QUAD
+// _Quad type
+QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+#endif
+// routines for complex types
+// Workaround for cmplx4 routines - return void; captured value is returned via the argument
+void __kmpc_atomic_cmplx4_add_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
+void __kmpc_atomic_cmplx4_sub_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
+void __kmpc_atomic_cmplx4_mul_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
+void __kmpc_atomic_cmplx4_div_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
+
+kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
+kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
+kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
+kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
+kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
+kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
+kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
+kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
+#if KMP_HAVE_QUAD
+CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
+CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
+CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
+CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
+#if ( KMP_ARCH_X86 )
+    // Routines with 16-byte arguments aligned to 16-byte boundary
+    Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
+    Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
+    Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
+    Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
+    Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
+    Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
+    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
+    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
+    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
+    kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
+#endif
+#endif
+
+void __kmpc_atomic_start(void);
+void __kmpc_atomic_end(void);
+
+#if OMP_40_ENABLED
+
+// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; }  for non-commutative operations.
+
+char	       	__kmpc_atomic_fixed1_sub_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
+char		__kmpc_atomic_fixed1_div_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
+unsigned char 	__kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag );
+char 		__kmpc_atomic_fixed1_shl_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs , int flag);
+char		__kmpc_atomic_fixed1_shr_cpt_rev(  ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
+unsigned char 	__kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag );
+short 		__kmpc_atomic_fixed2_sub_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
+short 		__kmpc_atomic_fixed2_div_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
+unsigned short 	__kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag );
+short 		__kmpc_atomic_fixed2_shl_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
+short 		__kmpc_atomic_fixed2_shr_cpt_rev(  ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
+unsigned short 	__kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag );
+kmp_int32 	__kmpc_atomic_fixed4_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag );
+kmp_int32 	__kmpc_atomic_fixed4_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag );
+kmp_uint32 	__kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag );
+kmp_int32 	__kmpc_atomic_fixed4_shl_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag );
+kmp_int32 	__kmpc_atomic_fixed4_shr_cpt_rev(  ident_t *id_ref, int gtid, kmp_int32  * lhs, kmp_int32  rhs, int flag );
+kmp_uint32 	__kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag );
+kmp_int64 	__kmpc_atomic_fixed8_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag );
+kmp_int64 	__kmpc_atomic_fixed8_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag );
+kmp_uint64      __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag );
+kmp_int64 	__kmpc_atomic_fixed8_shl_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag );
+kmp_int64 	__kmpc_atomic_fixed8_shr_cpt_rev(  ident_t *id_ref, int gtid, kmp_int64  * lhs, kmp_int64  rhs, int flag );
+kmp_uint64      __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag );
+float 		__kmpc_atomic_float4_sub_cpt_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs, int flag );
+float 		__kmpc_atomic_float4_div_cpt_rev(  ident_t *id_ref, int gtid, float * lhs, float rhs, int flag );
+double 		__kmpc_atomic_float8_sub_cpt_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
+double 		__kmpc_atomic_float8_div_cpt_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
+long double 	__kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
+long double 	__kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
+#if KMP_HAVE_QUAD
+QUAD_LEGACY	__kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
+QUAD_LEGACY	__kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
+#endif
+// Workaround for cmplx4 routines - return void; captured value is returned via the argument
+void     	__kmpc_atomic_cmplx4_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
+void 	        __kmpc_atomic_cmplx4_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
+kmp_cmplx64 	__kmpc_atomic_cmplx8_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
+kmp_cmplx64 	__kmpc_atomic_cmplx8_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
+kmp_cmplx80 	__kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
+kmp_cmplx80 	__kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
+#if KMP_HAVE_QUAD
+CPLX128_LEG  	__kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
+CPLX128_LEG  	__kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
+#if ( KMP_ARCH_X86 )
+    Quad_a16_t 		__kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag );
+    Quad_a16_t		__kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag );
+    kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
+    kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
+#endif
+#endif
+
+//   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
+char 		__kmpc_atomic_fixed1_swp(  ident_t *id_ref, int gtid, char        * lhs, char        rhs );
+short           __kmpc_atomic_fixed2_swp(  ident_t *id_ref, int gtid, short       * lhs, short       rhs );
+kmp_int32       __kmpc_atomic_fixed4_swp(  ident_t *id_ref, int gtid, kmp_int32   * lhs, kmp_int32   rhs );
+kmp_int64 	__kmpc_atomic_fixed8_swp(  ident_t *id_ref, int gtid, kmp_int64   * lhs, kmp_int64   rhs );
+float 		__kmpc_atomic_float4_swp(  ident_t *id_ref, int gtid, float       * lhs, float  rhs );
+double		__kmpc_atomic_float8_swp(  ident_t *id_ref, int gtid, double      * lhs, double  rhs );
+long double	__kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
+QUAD_LEGACY    	__kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
+// !!! TODO: check if we need a workaround here
+void        	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out );
+//kmp_cmplx32   	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
+
+kmp_cmplx64 	__kmpc_atomic_cmplx8_swp(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
+kmp_cmplx80	__kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
+CPLX128_LEG 	__kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
+#if ( KMP_ARCH_X86 )
+    Quad_a16_t		__kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
+    kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
+#endif
+#endif
+
+// End of OpenMP 4.0 capture
+
+#endif //OMP_40_ENABLED
+
+#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif
+
+#endif /* KMP_ATOMIC_H */
+
+// end of file
diff --git a/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp b/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp
index 23986c73ba..6b66dabba2 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_barrier.cpp
@@ -1,226 +1,226 @@
-/* 
- * kmp_barrier.cpp 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_wait_release.h" 
-#include "kmp_stats.h" 
-#include "kmp_itt.h" 
-#include "kmp_os.h" 
- 
- 
-#if KMP_MIC 
-#include <immintrin.h> 
-#define USE_NGO_STORES 1 
-#endif // KMP_MIC 
- 
-#if KMP_MIC && USE_NGO_STORES 
-// ICV copying 
-#define ngo_load(src)            __m512d Vt = _mm512_load_pd((void *)(src)) 
-#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 
-#define ngo_store_go(dst, src)   _mm512_storenrngo_pd((void *)(dst), Vt) 
-#define ngo_sync()               __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") 
-#else 
-#define ngo_load(src)            ((void)0) 
-#define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 
-#define ngo_store_go(dst, src)   KMP_MEMCPY((dst), (src), CACHE_LINE) 
-#define ngo_sync()               ((void)0) 
-#endif /* KMP_MIC && USE_NGO_STORES */ 
- 
-void __kmp_print_structure(void); // Forward declaration 
- 
-// ---------------------------- Barrier Algorithms ---------------------------- 
- 
-// Linear Barrier 
-static void 
-__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                            void (*reduce)(void *, void *) 
-                            USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather); 
+/*
+ * kmp_barrier.cpp
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_wait_release.h"
+#include "kmp_stats.h"
+#include "kmp_itt.h"
+#include "kmp_os.h"
+
+
+#if KMP_MIC
+#include <immintrin.h>
+#define USE_NGO_STORES 1
+#endif // KMP_MIC
+
+#if KMP_MIC && USE_NGO_STORES
+// ICV copying
+#define ngo_load(src)            __m512d Vt = _mm512_load_pd((void *)(src))
+#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
+#define ngo_store_go(dst, src)   _mm512_storenrngo_pd((void *)(dst), Vt)
+#define ngo_sync()               __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory")
+#else
+#define ngo_load(src)            ((void)0)
+#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
+#define ngo_store_go(dst, src)   KMP_MEMCPY((dst), (src), CACHE_LINE)
+#define ngo_sync()               ((void)0)
+#endif /* KMP_MIC && USE_NGO_STORES */
+
+void __kmp_print_structure(void); // Forward declaration
+
+// ---------------------------- Barrier Algorithms ----------------------------
+
+// Linear Barrier
+static void
+__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                            void (*reduce)(void *, void *)
+                            USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
     kmp_team_t *team = this_thr->th.th_team;
     kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
     kmp_info_t **other_threads = team->t.t_threads;
- 
-    KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-    // Barrier imbalance - save arrive time to the thread 
-    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { 
-        this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); 
-    } 
-#endif 
-    // We now perform a linear reduction to signal that all of the threads have arrived. 
-    if (!KMP_MASTER_TID(tid)) { 
-        KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 
-                      "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, 
-                      __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived, 
-                      thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); 
-        // Mark arrival to master thread 
-        /* After performing this write, a worker thread may not assume that the team is valid 
-           any more - it could be deallocated by the master thread at any time. */ 
-        kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); 
-        flag.release(); 
-    } else { 
+
+    KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    // Barrier imbalance - save arrive time to the thread
+    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
+        this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
+    }
+#endif
+    // We now perform a linear reduction to signal that all of the threads have arrived.
+    if (!KMP_MASTER_TID(tid)) {
+        KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
+                      "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
+                      __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived,
+                      thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
+        // Mark arrival to master thread
+        /* After performing this write, a worker thread may not assume that the team is valid
+           any more - it could be deallocated by the master thread at any time. */
+        kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
+        flag.release();
+    } else {
         kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
         int nproc = this_thr->th.th_team_nproc;
         int i;
-        // Don't have to worry about sleep bit here or atomic since team setting 
+        // Don't have to worry about sleep bit here or atomic since team setting
         kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
- 
-        // Collect all the worker team member threads. 
-        for (i=1; i<nproc; ++i) { 
-#if KMP_CACHE_MANAGE 
-            // Prefetch next thread's arrived count 
-            if (i+1 < nproc) 
-                KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived); 
-#endif /* KMP_CACHE_MANAGE */ 
-            KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 
-                          "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, 
-                            __kmp_gtid_from_tid(i, team), team->t.t_id, i, 
-                            &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); 
- 
-            // Wait for worker thread to arrive 
-            kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); 
-            flag.wait(this_thr, FALSE 
-                      USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-            // Barrier imbalance - write min of the thread time and the other thread time to the thread. 
-            if (__kmp_forkjoin_frames_mode == 2) { 
-                this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, 
-                                                          other_threads[i]->th.th_bar_min_time); 
-            } 
-#endif 
-            if (reduce) { 
-                KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid, 
-                               team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); 
-                (*reduce)(this_thr->th.th_local.reduce_data, 
-                          other_threads[i]->th.th_local.reduce_data); 
-            } 
-        } 
-        // Don't have to worry about sleep bit here or atomic since team setting 
-        team_bar->b_arrived = new_state; 
-        KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", 
-                      gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state)); 
-    } 
-    KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-static void 
-__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                             int propagate_icvs 
-                             USE_ITT_BUILD_ARG(void *itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release); 
+
+        // Collect all the worker team member threads.
+        for (i=1; i<nproc; ++i) {
+#if KMP_CACHE_MANAGE
+            // Prefetch next thread's arrived count
+            if (i+1 < nproc)
+                KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived);
+#endif /* KMP_CACHE_MANAGE */
+            KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
+                          "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
+                            __kmp_gtid_from_tid(i, team), team->t.t_id, i,
+                            &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
+
+            // Wait for worker thread to arrive
+            kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
+            flag.wait(this_thr, FALSE
+                      USE_ITT_BUILD_ARG(itt_sync_obj) );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+            // Barrier imbalance - write min of the thread time and the other thread time to the thread.
+            if (__kmp_forkjoin_frames_mode == 2) {
+                this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
+                                                          other_threads[i]->th.th_bar_min_time);
+            }
+#endif
+            if (reduce) {
+                KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid,
+                               team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i));
+                (*reduce)(this_thr->th.th_local.reduce_data,
+                          other_threads[i]->th.th_local.reduce_data);
+            }
+        }
+        // Don't have to worry about sleep bit here or atomic since team setting
+        team_bar->b_arrived = new_state;
+        KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
+                      gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state));
+    }
+    KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+static void
+__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                             int propagate_icvs
+                             USE_ITT_BUILD_ARG(void *itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
     kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     kmp_team_t *team;
- 
-    if (KMP_MASTER_TID(tid)) { 
+
+    if (KMP_MASTER_TID(tid)) {
         unsigned int i;
         kmp_uint32 nproc = this_thr->th.th_team_nproc;
         kmp_info_t **other_threads;
- 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        other_threads = team->t.t_threads; 
- 
-        KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", 
-                      gtid, team->t.t_id, tid, bt)); 
- 
-        if (nproc > 1) { 
-#if KMP_BARRIER_ICV_PUSH 
-            { 
-                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); 
-                if (propagate_icvs) { 
-                    ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); 
-                    for (i=1; i<nproc; ++i) { 
-                        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE); 
-                        ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, 
-                                       &team->t.t_implicit_task_taskdata[0].td_icvs); 
-                    } 
-                    ngo_sync(); 
-                } 
-            } 
-#endif // KMP_BARRIER_ICV_PUSH 
- 
-            // Now, release all of the worker threads 
-            for (i=1; i<nproc; ++i) { 
-#if KMP_CACHE_MANAGE 
-                // Prefetch next thread's go flag 
-                if (i+1 < nproc) 
-                    KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go); 
-#endif /* KMP_CACHE_MANAGE */ 
-                KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 
-                              "go(%p): %u => %u\n", gtid, team->t.t_id, tid, 
-                              other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, 
-                              &other_threads[i]->th.th_bar[bt].bb.b_go, 
-                              other_threads[i]->th.th_bar[bt].bb.b_go, 
-                              other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); 
-                kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]); 
-                flag.release(); 
-            } 
-        } 
-    } else { // Wait for the MASTER thread to release us 
-        KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", 
-                      gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); 
-        kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); 
-        flag.wait(this_thr, TRUE 
-                  USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { 
-            // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled) 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); 
-            // Cancel wait on previous parallel region... 
-            __kmp_itt_task_starting(itt_sync_obj); 
- 
-            if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) 
-                return; 
- 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 
-            if (itt_sync_obj != NULL) 
-                // Call prepare as early as possible for "new" barrier 
-                __kmp_itt_task_finished(itt_sync_obj); 
-        } else 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-        // Early exit for reaping threads releasing forkjoin barrier 
-        if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) ) 
-            return; 
-        // The worker thread may now assume that the team is valid. 
-#ifdef KMP_DEBUG 
-        tid = __kmp_tid_from_gtid(gtid); 
-        team = __kmp_threads[gtid]->th.th_team; 
-#endif 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); 
-        KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", 
-                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); 
-        KMP_MB();  // Flush all pending memory write invalidates. 
-    } 
-    KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-// Tree barrier 
-static void 
-__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                          void (*reduce)(void *, void *) 
-                          USE_ITT_BUILD_ARG(void *itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather); 
+
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        other_threads = team->t.t_threads;
+
+        KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
+                      gtid, team->t.t_id, tid, bt));
+
+        if (nproc > 1) {
+#if KMP_BARRIER_ICV_PUSH
+            {
+                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+                if (propagate_icvs) {
+                    ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
+                    for (i=1; i<nproc; ++i) {
+                        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
+                        ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
+                                       &team->t.t_implicit_task_taskdata[0].td_icvs);
+                    }
+                    ngo_sync();
+                }
+            }
+#endif // KMP_BARRIER_ICV_PUSH
+
+            // Now, release all of the worker threads
+            for (i=1; i<nproc; ++i) {
+#if KMP_CACHE_MANAGE
+                // Prefetch next thread's go flag
+                if (i+1 < nproc)
+                    KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go);
+#endif /* KMP_CACHE_MANAGE */
+                KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
+                              "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
+                              other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
+                              &other_threads[i]->th.th_bar[bt].bb.b_go,
+                              other_threads[i]->th.th_bar[bt].bb.b_go,
+                              other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
+                kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]);
+                flag.release();
+            }
+        }
+    } else { // Wait for the MASTER thread to release us
+        KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
+                      gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
+        kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
+        flag.wait(this_thr, TRUE
+                  USE_ITT_BUILD_ARG(itt_sync_obj) );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+        if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
+            // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled)
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
+            // Cancel wait on previous parallel region...
+            __kmp_itt_task_starting(itt_sync_obj);
+
+            if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+                return;
+
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+            if (itt_sync_obj != NULL)
+                // Call prepare as early as possible for "new" barrier
+                __kmp_itt_task_finished(itt_sync_obj);
+        } else
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+        // Early exit for reaping threads releasing forkjoin barrier
+        if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
+            return;
+        // The worker thread may now assume that the team is valid.
+#ifdef KMP_DEBUG
+        tid = __kmp_tid_from_gtid(gtid);
+        team = __kmp_threads[gtid]->th.th_team;
+#endif
+        KMP_DEBUG_ASSERT(team != NULL);
+        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
+        KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
+                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
+        KMP_MB();  // Flush all pending memory write invalidates.
+    }
+    KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+// Tree barrier
+static void
+__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                          void (*reduce)(void *, void *)
+                          USE_ITT_BUILD_ARG(void *itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
     kmp_team_t *team = this_thr->th.th_team;
     kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     kmp_info_t **other_threads = team->t.t_threads;
@@ -230,92 +230,92 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
     kmp_uint32 child;
     kmp_uint32 child_tid;
     kmp_uint64 new_state;
- 
-    KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-    // Barrier imbalance - save arrive time to the thread 
-    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { 
-        this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); 
-    } 
-#endif 
-    // Perform tree gather to wait until all threads have arrived; reduce any required data as we go 
-    child_tid = (tid << branch_bits) + 1; 
-    if (child_tid < nproc) { 
-        // Parent threads wait for all their children to arrive 
-        new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; 
-        child = 1; 
-        do { 
+
+    KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    // Barrier imbalance - save arrive time to the thread
+    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
+        this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
+    }
+#endif
+    // Perform tree gather to wait until all threads have arrived; reduce any required data as we go
+    child_tid = (tid << branch_bits) + 1;
+    if (child_tid < nproc) {
+        // Parent threads wait for all their children to arrive
+        new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
+        child = 1;
+        do {
             kmp_info_t *child_thr = other_threads[child_tid];
             kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-#if KMP_CACHE_MANAGE 
-            // Prefetch next thread's arrived count 
-            if (child+1 <= branch_factor && child_tid+1 < nproc) 
-                KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived); 
-#endif /* KMP_CACHE_MANAGE */ 
-            KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 
-                          "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, 
-                            __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, 
-                            &child_bar->b_arrived, new_state)); 
-            // Wait for child to arrive 
-            kmp_flag_64 flag(&child_bar->b_arrived, new_state); 
-            flag.wait(this_thr, FALSE 
-                      USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-            // Barrier imbalance - write min of the thread time and a child time to the thread. 
-            if (__kmp_forkjoin_frames_mode == 2) { 
-                this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, 
-                                                          child_thr->th.th_bar_min_time); 
-            } 
-#endif 
-            if (reduce) { 
-                KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", 
-                               gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                               team->t.t_id, child_tid)); 
-                (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); 
-            } 
-            child++; 
-            child_tid++; 
-        } 
-        while (child <= branch_factor && child_tid < nproc); 
-    } 
- 
-    if (!KMP_MASTER_TID(tid)) { // Worker threads 
+#if KMP_CACHE_MANAGE
+            // Prefetch next thread's arrived count
+            if (child+1 <= branch_factor && child_tid+1 < nproc)
+                KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived);
+#endif /* KMP_CACHE_MANAGE */
+            KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
+                          "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
+                            __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
+                            &child_bar->b_arrived, new_state));
+            // Wait for child to arrive
+            kmp_flag_64 flag(&child_bar->b_arrived, new_state);
+            flag.wait(this_thr, FALSE
+                      USE_ITT_BUILD_ARG(itt_sync_obj) );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+            // Barrier imbalance - write min of the thread time and a child time to the thread.
+            if (__kmp_forkjoin_frames_mode == 2) {
+                this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
+                                                          child_thr->th.th_bar_min_time);
+            }
+#endif
+            if (reduce) {
+                KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
+                               gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                               team->t.t_id, child_tid));
+                (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
+            }
+            child++;
+            child_tid++;
+        }
+        while (child <= branch_factor && child_tid < nproc);
+    }
+
+    if (!KMP_MASTER_TID(tid)) { // Worker threads
         kmp_int32 parent_tid = (tid - 1) >> branch_bits;
- 
-        KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 
-                      "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, 
-                      __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, 
-                      &thr_bar->b_arrived, thr_bar->b_arrived, 
-                      thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); 
- 
-        // Mark arrival to parent thread 
-        /* After performing this write, a worker thread may not assume that the team is valid 
-           any more - it could be deallocated by the master thread at any time.  */ 
-        kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); 
-        flag.release(); 
-    } else { 
-        // Need to update the team arrived pointer if we are the master thread 
-        if (nproc > 1) // New value was already computed above 
-            team->t.t_bar[bt].b_arrived = new_state; 
-        else 
-            team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; 
-        KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", 
-                      gtid, team->t.t_id, tid, team->t.t_id, 
-                      &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); 
-    } 
-    KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-static void 
-__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                           int propagate_icvs 
-                           USE_ITT_BUILD_ARG(void *itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release); 
+
+        KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
+                      "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
+                      __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
+                      &thr_bar->b_arrived, thr_bar->b_arrived,
+                      thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
+
+        // Mark arrival to parent thread
+        /* After performing this write, a worker thread may not assume that the team is valid
+           any more - it could be deallocated by the master thread at any time.  */
+        kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
+        flag.release();
+    } else {
+        // Need to update the team arrived pointer if we are the master thread
+        if (nproc > 1) // New value was already computed above
+            team->t.t_bar[bt].b_arrived = new_state;
+        else
+            team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
+        KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
+                      gtid, team->t.t_id, tid, team->t.t_id,
+                      &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
+    }
+    KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+static void
+__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                           int propagate_icvs
+                           USE_ITT_BUILD_ARG(void *itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
     kmp_team_t *team;
     kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     kmp_uint32 nproc;
@@ -323,102 +323,102 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
     kmp_uint32 branch_factor = 1 << branch_bits;
     kmp_uint32 child;
     kmp_uint32 child_tid;
- 
-    // Perform a tree release for all of the threads that have been gathered 
-    if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet 
-        KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", 
-                      gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); 
-        // Wait for parent thread to release us 
-        kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); 
-        flag.wait(this_thr, TRUE 
-                  USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { 
-            // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled) 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); 
-            // Cancel wait on previous parallel region... 
-            __kmp_itt_task_starting(itt_sync_obj); 
- 
-            if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) 
-                return; 
- 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 
-            if (itt_sync_obj != NULL) 
-                // Call prepare as early as possible for "new" barrier 
-                __kmp_itt_task_finished(itt_sync_obj); 
-        } else 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-        // Early exit for reaping threads releasing forkjoin barrier 
-        if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) 
-            return; 
- 
-        // The worker thread may now assume that the team is valid. 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        tid = __kmp_tid_from_gtid(gtid); 
- 
-        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); 
-        KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", 
-                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); 
-        KMP_MB();  // Flush all pending memory write invalidates. 
-    } else { 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", 
-                      gtid, team->t.t_id, tid, bt)); 
-    } 
-    nproc = this_thr->th.th_team_nproc; 
-    child_tid = (tid << branch_bits) + 1; 
- 
-    if (child_tid < nproc) { 
+
+    // Perform a tree release for all of the threads that have been gathered
+    if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet
+        KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
+                      gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
+        // Wait for parent thread to release us
+        kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
+        flag.wait(this_thr, TRUE
+                  USE_ITT_BUILD_ARG(itt_sync_obj) );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+        if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
+            // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled)
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
+            // Cancel wait on previous parallel region...
+            __kmp_itt_task_starting(itt_sync_obj);
+
+            if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+                return;
+
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+            if (itt_sync_obj != NULL)
+                // Call prepare as early as possible for "new" barrier
+                __kmp_itt_task_finished(itt_sync_obj);
+        } else
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+        // Early exit for reaping threads releasing forkjoin barrier
+        if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+            return;
+
+        // The worker thread may now assume that the team is valid.
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        tid = __kmp_tid_from_gtid(gtid);
+
+        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
+        KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
+                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
+        KMP_MB();  // Flush all pending memory write invalidates.
+    } else {
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
+                      gtid, team->t.t_id, tid, bt));
+    }
+    nproc = this_thr->th.th_team_nproc;
+    child_tid = (tid << branch_bits) + 1;
+
+    if (child_tid < nproc) {
         kmp_info_t **other_threads = team->t.t_threads;
-        child = 1; 
-        // Parent threads release all their children 
-        do { 
+        child = 1;
+        // Parent threads release all their children
+        do {
             kmp_info_t *child_thr = other_threads[child_tid];
             kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-#if KMP_CACHE_MANAGE 
-            // Prefetch next thread's go count 
-            if (child+1 <= branch_factor && child_tid+1 < nproc) 
-                KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go); 
-#endif /* KMP_CACHE_MANAGE */ 
- 
-#if KMP_BARRIER_ICV_PUSH 
-            { 
-                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); 
-                if (propagate_icvs) { 
-                    __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid], 
-                                             team, child_tid, FALSE); 
-                    copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, 
-                              &team->t.t_implicit_task_taskdata[0].td_icvs); 
-                } 
-            } 
-#endif // KMP_BARRIER_ICV_PUSH 
-            KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 
-                          "go(%p): %u => %u\n", gtid, team->t.t_id, tid, 
-                          __kmp_gtid_from_tid(child_tid, team), team->t.t_id, 
-                          child_tid, &child_bar->b_go, child_bar->b_go, 
-                          child_bar->b_go + KMP_BARRIER_STATE_BUMP)); 
-            // Release child from barrier 
-            kmp_flag_64 flag(&child_bar->b_go, child_thr); 
-            flag.release(); 
-            child++; 
-            child_tid++; 
-        } 
-        while (child <= branch_factor && child_tid < nproc); 
-    } 
-    KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
- 
-// Hyper Barrier 
-static void 
-__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                           void (*reduce)(void *, void *) 
-                           USE_ITT_BUILD_ARG(void *itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather); 
+#if KMP_CACHE_MANAGE
+            // Prefetch next thread's go count
+            if (child+1 <= branch_factor && child_tid+1 < nproc)
+                KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go);
+#endif /* KMP_CACHE_MANAGE */
+
+#if KMP_BARRIER_ICV_PUSH
+            {
+                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+                if (propagate_icvs) {
+                    __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
+                                             team, child_tid, FALSE);
+                    copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
+                              &team->t.t_implicit_task_taskdata[0].td_icvs);
+                }
+            }
+#endif // KMP_BARRIER_ICV_PUSH
+            KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
+                          "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
+                          __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
+                          child_tid, &child_bar->b_go, child_bar->b_go,
+                          child_bar->b_go + KMP_BARRIER_STATE_BUMP));
+            // Release child from barrier
+            kmp_flag_64 flag(&child_bar->b_go, child_thr);
+            flag.release();
+            child++;
+            child_tid++;
+        }
+        while (child <= branch_factor && child_tid < nproc);
+    }
+    KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+
+// Hyper Barrier
+static void
+__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                           void (*reduce)(void *, void *)
+                           USE_ITT_BUILD_ARG(void *itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
     kmp_team_t *team = this_thr->th.th_team;
     kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     kmp_info_t **other_threads = team->t.t_threads;
@@ -428,103 +428,103 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
     kmp_uint32 branch_factor = 1 << branch_bits;
     kmp_uint32 offset;
     kmp_uint32 level;
- 
-    KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
- 
-    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-    // Barrier imbalance - save arrive time to the thread 
-    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { 
-        this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); 
-    } 
-#endif 
-    /* Perform a hypercube-embedded tree gather to wait until all of the threads have 
-       arrived, and reduce any required data as we go.  */ 
-    kmp_flag_64 p_flag(&thr_bar->b_arrived); 
-    for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits) 
-    { 
+
+    KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+
+    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    // Barrier imbalance - save arrive time to the thread
+    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
+        this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
+    }
+#endif
+    /* Perform a hypercube-embedded tree gather to wait until all of the threads have
+       arrived, and reduce any required data as we go.  */
+    kmp_flag_64 p_flag(&thr_bar->b_arrived);
+    for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
+    {
         kmp_uint32 child;
         kmp_uint32 child_tid;
- 
-        if (((tid >> level) & (branch_factor - 1)) != 0) { 
+
+        if (((tid >> level) & (branch_factor - 1)) != 0) {
             kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1);
- 
-            KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 
-                          "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, 
-                          __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, 
-                          &thr_bar->b_arrived, thr_bar->b_arrived, 
-                          thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); 
-            // Mark arrival to parent thread 
-            /* After performing this write (in the last iteration of the enclosing for loop), 
-               a worker thread may not assume that the team is valid any more - it could be 
-               deallocated by the master thread at any time.  */ 
-            p_flag.set_waiter(other_threads[parent_tid]); 
-            p_flag.release(); 
-            break; 
-        } 
- 
-        // Parent threads wait for children to arrive 
-        if (new_state == KMP_BARRIER_UNUSED_STATE) 
-            new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; 
-        for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads; 
-             child++, child_tid+=(1 << level)) 
-        { 
+
+            KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
+                          "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
+                          __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
+                          &thr_bar->b_arrived, thr_bar->b_arrived,
+                          thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
+            // Mark arrival to parent thread
+            /* After performing this write (in the last iteration of the enclosing for loop),
+               a worker thread may not assume that the team is valid any more - it could be
+               deallocated by the master thread at any time.  */
+            p_flag.set_waiter(other_threads[parent_tid]);
+            p_flag.release();
+            break;
+        }
+
+        // Parent threads wait for children to arrive
+        if (new_state == KMP_BARRIER_UNUSED_STATE)
+            new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
+        for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads;
+             child++, child_tid+=(1 << level))
+        {
             kmp_info_t *child_thr = other_threads[child_tid];
             kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-#if KMP_CACHE_MANAGE 
+#if KMP_CACHE_MANAGE
             kmp_uint32 next_child_tid = child_tid + (1 << level);
-            // Prefetch next thread's arrived count 
-            if (child+1 < branch_factor && next_child_tid < num_threads) 
-                KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); 
-#endif /* KMP_CACHE_MANAGE */ 
-            KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 
-                          "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, 
-                          __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, 
-                          &child_bar->b_arrived, new_state)); 
-            // Wait for child to arrive 
-            kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); 
-            c_flag.wait(this_thr, FALSE 
-                        USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-            // Barrier imbalance - write min of the thread time and a child time to the thread. 
-            if (__kmp_forkjoin_frames_mode == 2) { 
-                this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, 
-                                                          child_thr->th.th_bar_min_time); 
-            } 
-#endif 
-            if (reduce) { 
-                KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", 
-                               gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                               team->t.t_id, child_tid)); 
-                (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); 
-            } 
-        } 
-    } 
- 
-    if (KMP_MASTER_TID(tid)) { 
-        // Need to update the team arrived pointer if we are the master thread 
-        if (new_state == KMP_BARRIER_UNUSED_STATE) 
-            team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; 
-        else 
-            team->t.t_bar[bt].b_arrived = new_state; 
-        KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", 
-                      gtid, team->t.t_id, tid, team->t.t_id, 
-                      &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); 
-    } 
-    KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-// The reverse versions seem to beat the forward versions overall 
-#define KMP_REVERSE_HYPER_BAR 
-static void 
-__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                            int propagate_icvs 
-                            USE_ITT_BUILD_ARG(void *itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release); 
+            // Prefetch next thread's arrived count
+            if (child+1 < branch_factor && next_child_tid < num_threads)
+                KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
+#endif /* KMP_CACHE_MANAGE */
+            KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
+                          "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
+                          __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
+                          &child_bar->b_arrived, new_state));
+            // Wait for child to arrive
+            kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
+            c_flag.wait(this_thr, FALSE
+                        USE_ITT_BUILD_ARG(itt_sync_obj) );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+            // Barrier imbalance - write min of the thread time and a child time to the thread.
+            if (__kmp_forkjoin_frames_mode == 2) {
+                this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
+                                                          child_thr->th.th_bar_min_time);
+            }
+#endif
+            if (reduce) {
+                KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
+                               gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                               team->t.t_id, child_tid));
+                (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
+            }
+        }
+    }
+
+    if (KMP_MASTER_TID(tid)) {
+        // Need to update the team arrived pointer if we are the master thread
+        if (new_state == KMP_BARRIER_UNUSED_STATE)
+            team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
+        else
+            team->t.t_bar[bt].b_arrived = new_state;
+        KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
+                      gtid, team->t.t_id, tid, team->t.t_id,
+                      &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
+    }
+    KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+// The reverse versions seem to beat the forward versions overall
+#define KMP_REVERSE_HYPER_BAR
+static void
+__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                            int propagate_icvs
+                            USE_ITT_BUILD_ARG(void *itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
     kmp_team_t    *team;
     kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
     kmp_info_t   **other_threads;
@@ -535,1208 +535,1208 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid
     kmp_uint32     child_tid;
     kmp_uint32     offset;
     kmp_uint32     level;
- 
-    /* Perform a hypercube-embedded tree release for all of the threads that have been gathered. 
-       If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse 
-       order of the corresponding gather, otherwise threads are released in the same order. */ 
-    if (KMP_MASTER_TID(tid)) { // master 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", 
-                      gtid, team->t.t_id, tid, bt)); 
-#if KMP_BARRIER_ICV_PUSH 
-        if (propagate_icvs) { // master already has ICVs in final destination; copy 
-            copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); 
-        } 
-#endif 
-    } 
-    else  { // Handle fork barrier workers who aren't part of a team yet 
-        KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", 
-                      gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); 
-        // Wait for parent thread to release us 
-        kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); 
-        flag.wait(this_thr, TRUE 
-                  USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { 
-            // In fork barrier where we could not get the object reliably 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); 
-            // Cancel wait on previous parallel region... 
-            __kmp_itt_task_starting(itt_sync_obj); 
- 
-            if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) 
-                return; 
- 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 
-            if (itt_sync_obj != NULL) 
-                // Call prepare as early as possible for "new" barrier 
-                __kmp_itt_task_finished(itt_sync_obj); 
-        } else 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-        // Early exit for reaping threads releasing forkjoin barrier 
-        if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) 
-            return; 
- 
-        // The worker thread may now assume that the team is valid. 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        tid = __kmp_tid_from_gtid(gtid); 
- 
-        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); 
-        KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", 
-                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); 
-        KMP_MB();  // Flush all pending memory write invalidates. 
-    } 
-    num_threads = this_thr->th.th_team_nproc; 
-    other_threads = team->t.t_threads; 
- 
-#ifdef KMP_REVERSE_HYPER_BAR 
-    // Count up to correct level for parent 
-    for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0); 
-         level+=branch_bits, offset<<=branch_bits); 
- 
-    // Now go down from there 
-    for (level-=branch_bits, offset>>=branch_bits; offset != 0; 
-         level-=branch_bits, offset>>=branch_bits) 
-#else 
-    // Go down the tree, level by level 
-    for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits) 
-#endif // KMP_REVERSE_HYPER_BAR 
-    { 
-#ifdef KMP_REVERSE_HYPER_BAR 
-        /* Now go in reverse order through the children, highest to lowest. 
-           Initial setting of child is conservative here. */ 
-        child = num_threads >> ((level==0)?level:level-1); 
-        for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level); 
-             child>=1; child--, child_tid-=(1<<level)) 
-#else 
-        if (((tid >> level) & (branch_factor - 1)) != 0) 
-            // No need to go lower than this, since this is the level parent would be notified 
-            break; 
-        // Iterate through children on this level of the tree 
-        for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads; 
-             child++, child_tid+=(1<<level)) 
-#endif // KMP_REVERSE_HYPER_BAR 
-        { 
-            if (child_tid >= num_threads) continue;  // Child doesn't exist so keep going 
-            else { 
+
+    /* Perform a hypercube-embedded tree release for all of the threads that have been gathered.
+       If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse
+       order of the corresponding gather, otherwise threads are released in the same order. */
+    if (KMP_MASTER_TID(tid)) { // master
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
+                      gtid, team->t.t_id, tid, bt));
+#if KMP_BARRIER_ICV_PUSH
+        if (propagate_icvs) { // master already has ICVs in final destination; copy
+            copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
+        }
+#endif
+    }
+    else  { // Handle fork barrier workers who aren't part of a team yet
+        KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
+                      gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
+        // Wait for parent thread to release us
+        kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
+        flag.wait(this_thr, TRUE
+                  USE_ITT_BUILD_ARG(itt_sync_obj) );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+        if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
+            // In fork barrier where we could not get the object reliably
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
+            // Cancel wait on previous parallel region...
+            __kmp_itt_task_starting(itt_sync_obj);
+
+            if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+                return;
+
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+            if (itt_sync_obj != NULL)
+                // Call prepare as early as possible for "new" barrier
+                __kmp_itt_task_finished(itt_sync_obj);
+        } else
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+        // Early exit for reaping threads releasing forkjoin barrier
+        if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+            return;
+
+        // The worker thread may now assume that the team is valid.
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        tid = __kmp_tid_from_gtid(gtid);
+
+        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
+        KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
+                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
+        KMP_MB();  // Flush all pending memory write invalidates.
+    }
+    num_threads = this_thr->th.th_team_nproc;
+    other_threads = team->t.t_threads;
+
+#ifdef KMP_REVERSE_HYPER_BAR
+    // Count up to correct level for parent
+    for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0);
+         level+=branch_bits, offset<<=branch_bits);
+
+    // Now go down from there
+    for (level-=branch_bits, offset>>=branch_bits; offset != 0;
+         level-=branch_bits, offset>>=branch_bits)
+#else
+    // Go down the tree, level by level
+    for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
+#endif // KMP_REVERSE_HYPER_BAR
+    {
+#ifdef KMP_REVERSE_HYPER_BAR
+        /* Now go in reverse order through the children, highest to lowest.
+           Initial setting of child is conservative here. */
+        child = num_threads >> ((level==0)?level:level-1);
+        for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level);
+             child>=1; child--, child_tid-=(1<<level))
+#else
+        if (((tid >> level) & (branch_factor - 1)) != 0)
+            // No need to go lower than this, since this is the level parent would be notified
+            break;
+        // Iterate through children on this level of the tree
+        for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads;
+             child++, child_tid+=(1<<level))
+#endif // KMP_REVERSE_HYPER_BAR
+        {
+            if (child_tid >= num_threads) continue;  // Child doesn't exist so keep going
+            else {
                 kmp_info_t *child_thr = other_threads[child_tid];
                 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-#if KMP_CACHE_MANAGE 
+#if KMP_CACHE_MANAGE
                 kmp_uint32 next_child_tid = child_tid - (1 << level);
-                // Prefetch next thread's go count 
-# ifdef KMP_REVERSE_HYPER_BAR 
-                if (child-1 >= 1 && next_child_tid < num_threads) 
-# else 
-                if (child+1 < branch_factor && next_child_tid < num_threads) 
-# endif // KMP_REVERSE_HYPER_BAR 
-                    KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); 
-#endif /* KMP_CACHE_MANAGE */ 
- 
-#if KMP_BARRIER_ICV_PUSH 
-                if (propagate_icvs) // push my fixed ICVs to my child 
-                    copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); 
-#endif // KMP_BARRIER_ICV_PUSH 
- 
-                KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 
-                              "go(%p): %u => %u\n", gtid, team->t.t_id, tid, 
-                              __kmp_gtid_from_tid(child_tid, team), team->t.t_id, 
-                              child_tid, &child_bar->b_go, child_bar->b_go, 
-                              child_bar->b_go + KMP_BARRIER_STATE_BUMP)); 
-                // Release child from barrier 
-                kmp_flag_64 flag(&child_bar->b_go, child_thr); 
-                flag.release(); 
-            } 
-        } 
-    } 
-#if KMP_BARRIER_ICV_PUSH 
-    if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest 
-        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); 
-        copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); 
-    } 
-#endif 
-    KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-// Hierarchical Barrier 
- 
-// Initialize thread barrier data 
-/* Initializes/re-initializes the hierarchical barrier data stored on a thread.  Performs the 
-   minimum amount of initialization required based on how the team has changed.  Returns true if 
-   leaf children will require both on-core and traditional wake-up mechanisms.  For example, if the 
-   team size increases, threads already in the team will respond to on-core wakeup on their parent 
-   thread, but threads newly added to the team will only be listening on the their local b_go. */ 
-static bool 
-__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc, 
-                                       int gtid, int tid, kmp_team_t *team) 
-{ 
-    // Checks to determine if (re-)initialization is needed 
-    bool uninitialized = thr_bar->team == NULL; 
-    bool team_changed = team != thr_bar->team; 
-    bool team_sz_changed = nproc != thr_bar->nproc; 
-    bool tid_changed = tid != thr_bar->old_tid; 
-    bool retval = false; 
- 
-    if (uninitialized || team_sz_changed) { 
-        __kmp_get_hierarchy(nproc, thr_bar); 
-    } 
- 
-    if (uninitialized || team_sz_changed || tid_changed) { 
-        thr_bar->my_level = thr_bar->depth-1; // default for master 
-        thr_bar->parent_tid = -1; // default for master 
-        if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy 
-            kmp_uint32 d=0; 
-            while (d<thr_bar->depth) { // find parent based on level of thread in hierarchy, and note level 
-                kmp_uint32 rem; 
-                if (d == thr_bar->depth-2) { // reached level right below the master 
-                    thr_bar->parent_tid = 0; 
-                    thr_bar->my_level = d; 
-                    break; 
-                } 
-                else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster? 
-                    // thread is not a subtree root at next level, so this is max 
-                    thr_bar->parent_tid = tid - rem; 
-                    thr_bar->my_level = d; 
-                    break; 
-                } 
-                ++d; 
-            } 
-        } 
-        thr_bar->offset = 7-(tid-thr_bar->parent_tid-1); 
-        thr_bar->old_tid = tid; 
-        thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; 
-        thr_bar->team = team; 
-        thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; 
-    } 
-    if (uninitialized || team_changed || tid_changed) { 
-        thr_bar->team = team; 
-        thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; 
-        retval = true; 
-    } 
-    if (uninitialized || team_sz_changed || tid_changed) { 
-        thr_bar->nproc = nproc; 
-        thr_bar->leaf_kids = thr_bar->base_leaf_kids; 
-        if (thr_bar->my_level == 0) thr_bar->leaf_kids=0; 
-        if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc) 
-            thr_bar->leaf_kids = nproc - tid - 1; 
-        thr_bar->leaf_state = 0; 
-        for (int i=0; i<thr_bar->leaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1; 
-    } 
-    return retval; 
-} 
- 
-static void 
-__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, 
-                                  int gtid, int tid, void (*reduce) (void *, void *) 
-                                  USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather); 
+                // Prefetch next thread's go count
+# ifdef KMP_REVERSE_HYPER_BAR
+                if (child-1 >= 1 && next_child_tid < num_threads)
+# else
+                if (child+1 < branch_factor && next_child_tid < num_threads)
+# endif // KMP_REVERSE_HYPER_BAR
+                    KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
+#endif /* KMP_CACHE_MANAGE */
+
+#if KMP_BARRIER_ICV_PUSH
+                if (propagate_icvs) // push my fixed ICVs to my child
+                    copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
+#endif // KMP_BARRIER_ICV_PUSH
+
+                KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
+                              "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
+                              __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
+                              child_tid, &child_bar->b_go, child_bar->b_go,
+                              child_bar->b_go + KMP_BARRIER_STATE_BUMP));
+                // Release child from barrier
+                kmp_flag_64 flag(&child_bar->b_go, child_thr);
+                flag.release();
+            }
+        }
+    }
+#if KMP_BARRIER_ICV_PUSH
+    if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
+        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+        copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
+    }
+#endif
+    KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+// Hierarchical Barrier
+
+// Initialize thread barrier data
+/* Initializes/re-initializes the hierarchical barrier data stored on a thread.  Performs the
+   minimum amount of initialization required based on how the team has changed.  Returns true if
+   leaf children will require both on-core and traditional wake-up mechanisms.  For example, if the
+   team size increases, threads already in the team will respond to on-core wakeup on their parent
+   thread, but threads newly added to the team will only be listening on the their local b_go. */
+static bool
+__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc,
+                                       int gtid, int tid, kmp_team_t *team)
+{
+    // Checks to determine if (re-)initialization is needed
+    bool uninitialized = thr_bar->team == NULL;
+    bool team_changed = team != thr_bar->team;
+    bool team_sz_changed = nproc != thr_bar->nproc;
+    bool tid_changed = tid != thr_bar->old_tid;
+    bool retval = false;
+
+    if (uninitialized || team_sz_changed) {
+        __kmp_get_hierarchy(nproc, thr_bar);
+    }
+
+    if (uninitialized || team_sz_changed || tid_changed) {
+        thr_bar->my_level = thr_bar->depth-1; // default for master
+        thr_bar->parent_tid = -1; // default for master
+        if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy
+            kmp_uint32 d=0;
+            while (d<thr_bar->depth) { // find parent based on level of thread in hierarchy, and note level
+                kmp_uint32 rem;
+                if (d == thr_bar->depth-2) { // reached level right below the master
+                    thr_bar->parent_tid = 0;
+                    thr_bar->my_level = d;
+                    break;
+                }
+                else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster?
+                    // thread is not a subtree root at next level, so this is max
+                    thr_bar->parent_tid = tid - rem;
+                    thr_bar->my_level = d;
+                    break;
+                }
+                ++d;
+            }
+        }
+        thr_bar->offset = 7-(tid-thr_bar->parent_tid-1);
+        thr_bar->old_tid = tid;
+        thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
+        thr_bar->team = team;
+        thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
+    }
+    if (uninitialized || team_changed || tid_changed) {
+        thr_bar->team = team;
+        thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
+        retval = true;
+    }
+    if (uninitialized || team_sz_changed || tid_changed) {
+        thr_bar->nproc = nproc;
+        thr_bar->leaf_kids = thr_bar->base_leaf_kids;
+        if (thr_bar->my_level == 0) thr_bar->leaf_kids=0;
+        if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc)
+            thr_bar->leaf_kids = nproc - tid - 1;
+        thr_bar->leaf_state = 0;
+        for (int i=0; i<thr_bar->leaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1;
+    }
+    return retval;
+}
+
+static void
+__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
+                                  int gtid, int tid, void (*reduce) (void *, void *)
+                                  USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
     kmp_team_t *team = this_thr->th.th_team;
     kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
     kmp_uint32 nproc = this_thr->th.th_team_nproc;
     kmp_info_t **other_threads = team->t.t_threads;
     kmp_uint64 new_state;
- 
-    int level = team->t.t_level; 
-#if OMP_40_ENABLED 
-    if (other_threads[0]->th.th_teams_microtask)    // are we inside the teams construct? 
-        if (this_thr->th.th_teams_size.nteams > 1) 
-            ++level; // level was not increased in teams construct for team_of_masters 
-#endif 
-    if (level == 1) thr_bar->use_oncore_barrier = 1; 
-    else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested 
- 
-    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-    // Barrier imbalance - save arrive time to the thread 
-    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { 
-        this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); 
-    } 
-#endif 
- 
-    (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); 
- 
-    if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) 
+
+    int level = team->t.t_level;
+#if OMP_40_ENABLED
+    if (other_threads[0]->th.th_teams_microtask)    // are we inside the teams construct?
+        if (this_thr->th.th_teams_size.nteams > 1)
+            ++level; // level was not increased in teams construct for team_of_masters
+#endif
+    if (level == 1) thr_bar->use_oncore_barrier = 1;
+    else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
+
+    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+    KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    // Barrier imbalance - save arrive time to the thread
+    if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
+        this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
+    }
+#endif
+
+    (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
+
+    if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
         kmp_int32 child_tid;
-        new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; 
-        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { 
-            if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag 
-                kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; 
-                KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n", 
-                              gtid, team->t.t_id, tid)); 
-                kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); 
-                flag.wait(this_thr, FALSE 
-                          USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-                if (reduce) { 
-                    for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) { 
-                        KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", 
-                                       gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                       team->t.t_id, child_tid)); 
-                        (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data); 
-                    } 
-                } 
-                (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits 
-            } 
-            // Next, wait for higher level children on each child's b_arrived flag 
-            for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) { // gather lowest level threads first, but skip 0 
-                kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; 
-                if (last > nproc) last = nproc; 
-                for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { 
+        new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
+        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
+            if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag
+                kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
+                KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n",
+                              gtid, team->t.t_id, tid));
+                kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
+                flag.wait(this_thr, FALSE
+                          USE_ITT_BUILD_ARG(itt_sync_obj) );
+                if (reduce) {
+                    for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) {
+                        KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
+                                       gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                       team->t.t_id, child_tid));
+                        (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data);
+                    }
+                }
+                (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits
+            }
+            // Next, wait for higher level children on each child's b_arrived flag
+            for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) { // gather lowest level threads first, but skip 0
+                kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
+                if (last > nproc) last = nproc;
+                for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
                     kmp_info_t *child_thr = other_threads[child_tid];
                     kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-                    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 
-                                  "arrived(%p) == %llu\n", 
-                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                  team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); 
-                    kmp_flag_64 flag(&child_bar->b_arrived, new_state); 
-                    flag.wait(this_thr, FALSE 
-                              USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-                    if (reduce) { 
-                        KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", 
-                                       gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                       team->t.t_id, child_tid)); 
-                        (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); 
-                    } 
-                } 
-            } 
-        } 
-        else { // Blocktime is not infinite 
-            for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) { // Gather lowest level threads first 
-                kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; 
-                if (last > nproc) last = nproc; 
-                for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { 
+                    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
+                                  "arrived(%p) == %llu\n",
+                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                  team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
+                    kmp_flag_64 flag(&child_bar->b_arrived, new_state);
+                    flag.wait(this_thr, FALSE
+                              USE_ITT_BUILD_ARG(itt_sync_obj) );
+                    if (reduce) {
+                        KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
+                                       gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                       team->t.t_id, child_tid));
+                        (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
+                    }
+                }
+            }
+        }
+        else { // Blocktime is not infinite
+            for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) { // Gather lowest level threads first
+                kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
+                if (last > nproc) last = nproc;
+                for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
                     kmp_info_t *child_thr = other_threads[child_tid];
                     kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-                    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 
-                                  "arrived(%p) == %llu\n", 
-                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                  team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); 
-                    kmp_flag_64 flag(&child_bar->b_arrived, new_state); 
-                    flag.wait(this_thr, FALSE 
-                              USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-                    if (reduce) { 
-                        KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", 
-                                       gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                       team->t.t_id, child_tid)); 
-                        (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); 
-                    } 
-                } 
-            } 
-        } 
-    } 
-    // All subordinates are gathered; now release parent if not master thread 
- 
-    if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy 
-        KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 
-                      "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, 
-                      __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid, 
-                      &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP)); 
-        /* Mark arrival to parent: After performing this write, a worker thread may not assume that 
-           the team is valid any more - it could be deallocated by the master thread at any time. */ 
-        if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME 
-            || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it 
-            kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); 
-            flag.release(); 
-        } 
-        else { // Leaf does special release on the "offset" bits of parent's b_arrived flag 
-            thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; 
-            kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset); 
-            flag.set_waiter(other_threads[thr_bar->parent_tid]); 
-            flag.release(); 
-        } 
-    } else { // Master thread needs to update the team's b_arrived value 
-        team->t.t_bar[bt].b_arrived = new_state; 
-        KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", 
-                      gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); 
-    } 
-    // Is the team access below unsafe or just technically invalid? 
-    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-static void 
-__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, 
-                                   int propagate_icvs 
-                                   USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release); 
+                    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
+                                  "arrived(%p) == %llu\n",
+                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                  team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
+                    kmp_flag_64 flag(&child_bar->b_arrived, new_state);
+                    flag.wait(this_thr, FALSE
+                              USE_ITT_BUILD_ARG(itt_sync_obj) );
+                    if (reduce) {
+                        KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
+                                       gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                       team->t.t_id, child_tid));
+                        (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
+                    }
+                }
+            }
+        }
+    }
+    // All subordinates are gathered; now release parent if not master thread
+
+    if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
+        KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
+                      "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
+                      __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid,
+                      &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP));
+        /* Mark arrival to parent: After performing this write, a worker thread may not assume that
+           the team is valid any more - it could be deallocated by the master thread at any time. */
+        if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
+            || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it
+            kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
+            flag.release();
+        }
+        else { // Leaf does special release on the "offset" bits of parent's b_arrived flag
+            thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
+            kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
+            flag.set_waiter(other_threads[thr_bar->parent_tid]);
+            flag.release();
+        }
+    } else { // Master thread needs to update the team's b_arrived value
+        team->t.t_bar[bt].b_arrived = new_state;
+        KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
+                      gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
+    }
+    // Is the team access below unsafe or just technically invalid?
+    KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+static void
+__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+                                   int propagate_icvs
+                                   USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
     kmp_team_t *team;
     kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     kmp_uint32 nproc;
-    bool team_change = false; // indicates on-core barrier shouldn't be used 
- 
-    if (KMP_MASTER_TID(tid)) { 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n", 
-                      gtid, team->t.t_id, tid, bt)); 
-    } 
-    else { // Worker threads 
-        // Wait for parent thread to release me 
-        if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME 
-            || thr_bar->my_level != 0 || thr_bar->team == NULL) { 
-            // Use traditional method of waiting on my own b_go flag 
-            thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; 
-            kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); 
-            flag.wait(this_thr, TRUE 
-                      USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-            TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time 
-        } 
-        else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested 
-            // Wait on my "offset" bits on parent's b_go flag 
-            thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; 
-            kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset, 
-                                 bt, this_thr 
-                                 USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-            flag.wait(this_thr, TRUE); 
-            if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go 
-                TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time 
-            } 
-            else { // Reset my bits on parent's b_go flag 
-                ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0; 
-            } 
-        } 
-        thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; 
-        // Early exit for reaping threads releasing forkjoin barrier 
-        if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) 
-            return; 
-        // The worker thread may now assume that the team is valid. 
-        team = __kmp_threads[gtid]->th.th_team; 
-        KMP_DEBUG_ASSERT(team != NULL); 
-        tid = __kmp_tid_from_gtid(gtid); 
- 
-        KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", 
-                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); 
-        KMP_MB();  // Flush all pending memory write invalidates. 
-    } 
- 
-    nproc = this_thr->th.th_team_nproc; 
-    int level = team->t.t_level; 
-#if OMP_40_ENABLED 
-    if (team->t.t_threads[0]->th.th_teams_microtask ) {    // are we inside the teams construct? 
-        if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level) 
-            ++level; // level was not increased in teams construct for team_of_workers 
-        if( this_thr->th.th_teams_size.nteams > 1 ) 
-            ++level; // level was not increased in teams construct for team_of_masters 
-    } 
-#endif 
-    if (level == 1) thr_bar->use_oncore_barrier = 1; 
-    else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested 
- 
-    // If the team size has increased, we still communicate with old leaves via oncore barrier. 
-    unsigned short int old_leaf_kids = thr_bar->leaf_kids; 
-    kmp_uint64 old_leaf_state = thr_bar->leaf_state; 
-    team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); 
-    // But if the entire team changes, we won't use oncore barrier at all 
-    if (team_change) old_leaf_kids = 0; 
- 
-#if KMP_BARRIER_ICV_PUSH 
-    if (propagate_icvs) { 
-        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); 
-        if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy 
-            copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); 
-        } 
-        else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime 
-            if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) 
-                // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store 
-                copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, 
-                          &thr_bar->parent_bar->th_fixed_icvs); 
-            // non-leaves will get ICVs piggybacked with b_go via NGO store 
-        } 
-        else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs 
-            if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access 
-                copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); 
-            else // leaves copy parent's fixed ICVs directly to local ICV store 
-                copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, 
-                          &thr_bar->parent_bar->th_fixed_icvs); 
-        } 
-    } 
-#endif // KMP_BARRIER_ICV_PUSH 
- 
-    // Now, release my children 
-    if (thr_bar->my_level) { // not a leaf 
+    bool team_change = false; // indicates on-core barrier shouldn't be used
+
+    if (KMP_MASTER_TID(tid)) {
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n",
+                      gtid, team->t.t_id, tid, bt));
+    }
+    else { // Worker threads
+        // Wait for parent thread to release me
+        if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
+            || thr_bar->my_level != 0 || thr_bar->team == NULL) {
+            // Use traditional method of waiting on my own b_go flag
+            thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
+            kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
+            flag.wait(this_thr, TRUE
+                      USE_ITT_BUILD_ARG(itt_sync_obj) );
+            TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
+        }
+        else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested
+            // Wait on my "offset" bits on parent's b_go flag
+            thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
+            kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset,
+                                 bt, this_thr
+                                 USE_ITT_BUILD_ARG(itt_sync_obj) );
+            flag.wait(this_thr, TRUE);
+            if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
+                TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
+            }
+            else { // Reset my bits on parent's b_go flag
+                ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
+            }
+        }
+        thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
+        // Early exit for reaping threads releasing forkjoin barrier
+        if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+            return;
+        // The worker thread may now assume that the team is valid.
+        team = __kmp_threads[gtid]->th.th_team;
+        KMP_DEBUG_ASSERT(team != NULL);
+        tid = __kmp_tid_from_gtid(gtid);
+
+        KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
+                      gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
+        KMP_MB();  // Flush all pending memory write invalidates.
+    }
+
+    nproc = this_thr->th.th_team_nproc;
+    int level = team->t.t_level;
+#if OMP_40_ENABLED
+    if (team->t.t_threads[0]->th.th_teams_microtask ) {    // are we inside the teams construct?
+        if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level)
+            ++level; // level was not increased in teams construct for team_of_workers
+        if( this_thr->th.th_teams_size.nteams > 1 )
+            ++level; // level was not increased in teams construct for team_of_masters
+    }
+#endif
+    if (level == 1) thr_bar->use_oncore_barrier = 1;
+    else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
+
+    // If the team size has increased, we still communicate with old leaves via oncore barrier.
+    unsigned short int old_leaf_kids = thr_bar->leaf_kids;
+    kmp_uint64 old_leaf_state = thr_bar->leaf_state;
+    team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
+    // But if the entire team changes, we won't use oncore barrier at all
+    if (team_change) old_leaf_kids = 0;
+
+#if KMP_BARRIER_ICV_PUSH
+    if (propagate_icvs) {
+        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+        if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy
+            copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
+        }
+        else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime
+            if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
+                // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store
+                copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+                          &thr_bar->parent_bar->th_fixed_icvs);
+            // non-leaves will get ICVs piggybacked with b_go via NGO store
+        }
+        else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
+            if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access
+                copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
+            else // leaves copy parent's fixed ICVs directly to local ICV store
+                copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+                          &thr_bar->parent_bar->th_fixed_icvs);
+        }
+    }
+#endif // KMP_BARRIER_ICV_PUSH
+
+    // Now, release my children
+    if (thr_bar->my_level) { // not a leaf
         kmp_int32 child_tid;
-        kmp_uint32 last; 
-        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { 
-            if (KMP_MASTER_TID(tid)) { // do a flat release 
-                // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go. 
-                thr_bar->b_go = KMP_BARRIER_STATE_BUMP; 
-                // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line 
-                ngo_load(&thr_bar->th_fixed_icvs); 
-                // This loops over all the threads skipping only the leaf nodes in the hierarchy 
-                for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) { 
+        kmp_uint32 last;
+        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
+            if (KMP_MASTER_TID(tid)) { // do a flat release
+                // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go.
+                thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
+                // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line
+                ngo_load(&thr_bar->th_fixed_icvs);
+                // This loops over all the threads skipping only the leaf nodes in the hierarchy
+                for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) {
                     kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
-                    KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 
-                                  " go(%p): %u => %u\n", 
-                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                  team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, 
-                                  child_bar->b_go + KMP_BARRIER_STATE_BUMP)); 
-                    // Use ngo store (if available) to both store ICVs and release child via child's b_go 
-                    ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); 
-                } 
-                ngo_sync(); 
-            } 
-            TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time 
-            // Now, release leaf children 
-            if (thr_bar->leaf_kids) { // if there are any 
-                // We test team_change on the off-chance that the level 1 team changed. 
-                if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new 
-                    if (old_leaf_kids) { // release old leaf kids 
-                        thr_bar->b_go |= old_leaf_state; 
-                    } 
-                    // Release new leaf kids 
-                    last = tid+thr_bar->skip_per_level[1]; 
-                    if (last > nproc) last = nproc; 
-                    for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1 
+                    KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
+                                  " go(%p): %u => %u\n",
+                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                  team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
+                                  child_bar->b_go + KMP_BARRIER_STATE_BUMP));
+                    // Use ngo store (if available) to both store ICVs and release child via child's b_go
+                    ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
+                }
+                ngo_sync();
+            }
+            TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
+            // Now, release leaf children
+            if (thr_bar->leaf_kids) { // if there are any
+                // We test team_change on the off-chance that the level 1 team changed.
+                if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new
+                    if (old_leaf_kids) { // release old leaf kids
+                        thr_bar->b_go |= old_leaf_state;
+                    }
+                    // Release new leaf kids
+                    last = tid+thr_bar->skip_per_level[1];
+                    if (last > nproc) last = nproc;
+                    for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1
                         kmp_info_t   *child_thr = team->t.t_threads[child_tid];
                         kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-                        KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 
-                                      " T#%d(%d:%d) go(%p): %u => %u\n", 
-                                      gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                      team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, 
-                                      child_bar->b_go + KMP_BARRIER_STATE_BUMP)); 
-                        // Release child using child's b_go flag 
-                        kmp_flag_64 flag(&child_bar->b_go, child_thr); 
-                        flag.release(); 
-                    } 
-                } 
-                else { // Release all children at once with leaf_state bits on my own b_go flag 
-                    thr_bar->b_go |= thr_bar->leaf_state; 
-                } 
-            } 
-        } 
-        else { // Blocktime is not infinite; do a simple hierarchical release 
-            for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first 
-                last = tid+thr_bar->skip_per_level[d+1]; 
-                kmp_uint32 skip = thr_bar->skip_per_level[d]; 
-                if (last > nproc) last = nproc; 
-                for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { 
+                        KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
+                                      " T#%d(%d:%d) go(%p): %u => %u\n",
+                                      gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                      team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
+                                      child_bar->b_go + KMP_BARRIER_STATE_BUMP));
+                        // Release child using child's b_go flag
+                        kmp_flag_64 flag(&child_bar->b_go, child_thr);
+                        flag.release();
+                    }
+                }
+                else { // Release all children at once with leaf_state bits on my own b_go flag
+                    thr_bar->b_go |= thr_bar->leaf_state;
+                }
+            }
+        }
+        else { // Blocktime is not infinite; do a simple hierarchical release
+            for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first
+                last = tid+thr_bar->skip_per_level[d+1];
+                kmp_uint32 skip = thr_bar->skip_per_level[d];
+                if (last > nproc) last = nproc;
+                for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
                     kmp_info_t   *child_thr = team->t.t_threads[child_tid];
                     kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
-                    KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 
-                                  " go(%p): %u => %u\n", 
-                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), 
-                                  team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, 
-                                  child_bar->b_go + KMP_BARRIER_STATE_BUMP)); 
-                    // Release child using child's b_go flag 
-                    kmp_flag_64 flag(&child_bar->b_go, child_thr); 
-                    flag.release(); 
-                } 
-            } 
-        } 
-#if KMP_BARRIER_ICV_PUSH 
-        if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest 
-            copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); 
-#endif // KMP_BARRIER_ICV_PUSH 
-    } 
-    KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", 
-                  gtid, team->t.t_id, tid, bt)); 
-} 
- 
-// ---------------------------- End of Barrier Algorithms ---------------------------- 
- 
-// Internal function to do a barrier. 
-/* If is_split is true, do a split barrier, otherwise, do a plain barrier 
-   If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier 
-   Returns 0 if master thread, 1 if worker thread.  */ 
-int 
-__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, 
-              void *reduce_data, void (*reduce)(void *, void *)) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_barrier); 
+                    KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
+                                  " go(%p): %u => %u\n",
+                                  gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
+                                  team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
+                                  child_bar->b_go + KMP_BARRIER_STATE_BUMP));
+                    // Release child using child's b_go flag
+                    kmp_flag_64 flag(&child_bar->b_go, child_thr);
+                    flag.release();
+                }
+            }
+        }
+#if KMP_BARRIER_ICV_PUSH
+        if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest
+            copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
+#endif // KMP_BARRIER_ICV_PUSH
+    }
+    KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
+                  gtid, team->t.t_id, tid, bt));
+}
+
+// ---------------------------- End of Barrier Algorithms ----------------------------
+
+// Internal function to do a barrier.
+/* If is_split is true, do a split barrier, otherwise, do a plain barrier
+   If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier
+   Returns 0 if master thread, 1 if worker thread.  */
+int
+__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
+              void *reduce_data, void (*reduce)(void *, void *))
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
     int tid = __kmp_tid_from_gtid(gtid);
     kmp_info_t *this_thr = __kmp_threads[gtid];
     kmp_team_t *team = this_thr->th.th_team;
     int status = 0;
-    ident_t *loc = __kmp_threads[gtid]->th.th_ident; 
-#if OMPT_SUPPORT 
-    ompt_task_id_t my_task_id; 
-    ompt_parallel_id_t my_parallel_id; 
-#endif 
- 
-    KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", 
-                  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-#if OMPT_BLAME 
-        my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; 
-        my_parallel_id = team->t.ompt_team_info.parallel_id; 
- 
-#if OMPT_TRACE 
-        if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { 
-            if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { 
-                ompt_callbacks.ompt_callback(ompt_event_single_others_end)( 
-                    my_parallel_id, my_task_id); 
-            } 
-        } 
-#endif 
-        if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( 
-                my_parallel_id, my_task_id); 
-        } 
-#endif 
-        // It is OK to report the barrier state after the barrier begin callback. 
-        // According to the OMPT specification, a compliant implementation may 
-        // even delay reporting this state until the barrier begins to wait. 
-        this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; 
-    } 
-#endif 
- 
-    if (! team->t.t_serialized) { 
-#if USE_ITT_BUILD 
-        // This value will be used in itt notify events below. 
-        void *itt_sync_obj = NULL; 
-# if USE_ITT_NOTIFY 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); 
-# endif 
-#endif /* USE_ITT_BUILD */ 
-        if (__kmp_tasking_mode == tskm_extra_barrier) { 
-            __kmp_tasking_barrier(team, this_thr, gtid); 
-            KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", 
-                          gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); 
-        } 
- 
-        /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when 
-           the team struct is not guaranteed to exist. */ 
-        // See note about the corresponding code in __kmp_join_barrier() being performance-critical. 
-        if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 
-            this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; 
-            this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; 
-        } 
- 
-#if USE_ITT_BUILD 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-            __kmp_itt_barrier_starting(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
-#if USE_DEBUGGER 
-        // Let the debugger know: the thread arrived to the barrier and waiting. 
-        if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure. 
-            team->t.t_bar[bt].b_master_arrived += 1; 
-        } else { 
-            this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; 
-        } // if 
-#endif /* USE_DEBUGGER */ 
-        if (reduce != NULL) { 
-            //KMP_DEBUG_ASSERT( is_split == TRUE );  // #C69956 
-            this_thr->th.th_local.reduce_data = reduce_data; 
-        } 
- 
-        if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) 
-            __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1 
- 
-        switch (__kmp_barrier_gather_pattern[bt]) { 
-        case bp_hyper_bar: { 
-            KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear 
-            __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce 
-                                       USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-            break; 
-        } 
-        case bp_hierarchical_bar: { 
-            __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce 
-                                              USE_ITT_BUILD_ARG(itt_sync_obj)); 
-            break; 
-        } 
-        case bp_tree_bar: { 
-            KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear 
-            __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce 
-                                      USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-            break; 
-        } 
-        default: { 
-            __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce 
-                                        USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        } 
-        } 
- 
-        KMP_MB(); 
- 
-        if (KMP_MASTER_TID(tid)) { 
-            status = 0; 
-            if (__kmp_tasking_mode != tskm_immediate_exec) { 
-                __kmp_task_team_wait(this_thr, team 
-                                     USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-            } 
-#if USE_DEBUGGER 
-            // Let the debugger know: All threads are arrived and starting leaving the barrier. 
-            team->t.t_bar[bt].b_team_arrived += 1; 
-#endif 
- 
-#if USE_ITT_BUILD 
-            /* TODO: In case of split reduction barrier, master thread may send acquired event early, 
-               before the final summation into the shared variable is done (final summation can be a 
-               long operation for array reductions).  */ 
-            if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-                __kmp_itt_barrier_middle(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-            // Barrier - report frame end (only if active_level == 1) 
-            if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && 
-#if OMP_40_ENABLED 
-                this_thr->th.th_teams_microtask == NULL && 
-#endif 
-                team->t.t_active_level == 1) 
-            { 
-                kmp_uint64 cur_time = __itt_get_timestamp(); 
-                kmp_info_t **other_threads = team->t.t_threads; 
-                int nproc = this_thr->th.th_team_nproc; 
-                int i; 
-                switch(__kmp_forkjoin_frames_mode) { 
-                case 1: 
-                    __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); 
-                    this_thr->th.th_frame_time = cur_time; 
-                    break; 
-                case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed) 
-                    __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); 
-                    break; 
-                case 3: 
-                    if( __itt_metadata_add_ptr ) { 
-                        // Initialize with master's wait time 
-                        kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; 
-                        for (i=1; i<nproc; ++i) { 
-                            delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time ); 
-                        } 
-                        __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL)); 
-                    } 
-                    __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); 
-                    this_thr->th.th_frame_time = cur_time; 
-                    break; 
-                } 
-            } 
-#endif /* USE_ITT_BUILD */ 
-        } else { 
-            status = 1; 
-#if USE_ITT_BUILD 
-            if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-                __kmp_itt_barrier_middle(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
-        } 
-        if (status == 1 || ! is_split) { 
-            switch (__kmp_barrier_release_pattern[bt]) { 
-            case bp_hyper_bar: { 
-                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); 
-                __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                            USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-                break; 
-            } 
-            case bp_hierarchical_bar: { 
-                __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                                   USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-                break; 
-            } 
-            case bp_tree_bar: { 
-                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); 
-                __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                           USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-                break; 
-            } 
-            default: { 
-                __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                             USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-            } 
-            } 
-            if (__kmp_tasking_mode != tskm_immediate_exec) { 
-                __kmp_task_team_sync(this_thr, team); 
-            } 
-        } 
- 
-#if USE_ITT_BUILD 
-        /* GEH: TODO: Move this under if-condition above and also include in 
-           __kmp_end_split_barrier(). This will more accurately represent the actual release time 
-           of the threads for split barriers.  */ 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-            __kmp_itt_barrier_finished(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
-    } else { // Team is serialized. 
-        status = 0; 
-        if (__kmp_tasking_mode != tskm_immediate_exec) { 
-#if OMP_41_ENABLED 
-            if ( this_thr->th.th_task_team != NULL ) { 
-                void *itt_sync_obj = NULL; 
-#if USE_ITT_NOTIFY 
-                if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { 
-                    itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); 
-                    __kmp_itt_barrier_starting(gtid, itt_sync_obj); 
-                } 
-#endif 
- 
-                KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE); 
-                __kmp_task_team_wait(this_thr, team 
-                                               USE_ITT_BUILD_ARG(itt_sync_obj)); 
-                __kmp_task_team_setup(this_thr, team, 0); 
- 
-#if USE_ITT_BUILD 
-                if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-                    __kmp_itt_barrier_finished(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
-            } 
-#else 
-            // The task team should be NULL for serialized code (tasks will be executed immediately) 
-            KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); 
-            KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); 
-#endif 
-        } 
-    } 
-    KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", 
-                  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status)); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-#if OMPT_BLAME 
-        if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { 
-            ompt_callbacks.ompt_callback(ompt_event_barrier_end)( 
-                my_parallel_id, my_task_id); 
-        } 
-#endif 
-        this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 
-    } 
-#endif 
- 
-    return status; 
-} 
- 
- 
-void 
-__kmp_end_split_barrier(enum barrier_type bt, int gtid) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier); 
-    int tid = __kmp_tid_from_gtid(gtid); 
-    kmp_info_t *this_thr = __kmp_threads[gtid]; 
-    kmp_team_t *team = this_thr->th.th_team; 
- 
-    if (!team->t.t_serialized) { 
-        if (KMP_MASTER_GTID(gtid)) { 
-            switch (__kmp_barrier_release_pattern[bt]) { 
-            case bp_hyper_bar: { 
-                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); 
-                __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                            USE_ITT_BUILD_ARG(NULL) ); 
-                break; 
-            } 
-            case bp_hierarchical_bar: { 
-                __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                                   USE_ITT_BUILD_ARG(NULL)); 
-                break; 
-            } 
-            case bp_tree_bar: { 
-                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); 
-                __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                           USE_ITT_BUILD_ARG(NULL) ); 
-                break; 
-            } 
-            default: { 
-                __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE 
-                                             USE_ITT_BUILD_ARG(NULL) ); 
-            } 
-            } 
-            if (__kmp_tasking_mode != tskm_immediate_exec) { 
-                __kmp_task_team_sync(this_thr, team); 
-            } // if 
-        } 
-    } 
-} 
- 
- 
-void 
-__kmp_join_barrier(int gtid) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier); 
+    ident_t *loc = __kmp_threads[gtid]->th.th_ident;
+#if OMPT_SUPPORT
+    ompt_task_id_t my_task_id;
+    ompt_parallel_id_t my_parallel_id;
+#endif
+
+    KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n",
+                  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+#if OMPT_BLAME
+        my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
+        my_parallel_id = team->t.ompt_team_info.parallel_id;
+
+#if OMPT_TRACE
+        if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
+            if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
+                ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
+                    my_parallel_id, my_task_id);
+            }
+        }
+#endif
+        if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
+                my_parallel_id, my_task_id);
+        }
+#endif
+        // It is OK to report the barrier state after the barrier begin callback.
+        // According to the OMPT specification, a compliant implementation may
+        // even delay reporting this state until the barrier begins to wait.
+        this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
+    }
+#endif
+
+    if (! team->t.t_serialized) {
+#if USE_ITT_BUILD
+        // This value will be used in itt notify events below.
+        void *itt_sync_obj = NULL;
+# if USE_ITT_NOTIFY
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
+# endif
+#endif /* USE_ITT_BUILD */
+        if (__kmp_tasking_mode == tskm_extra_barrier) {
+            __kmp_tasking_barrier(team, this_thr, gtid);
+            KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
+                          gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
+        }
+
+        /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when
+           the team struct is not guaranteed to exist. */
+        // See note about the corresponding code in __kmp_join_barrier() being performance-critical.
+        if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+            this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
+            this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
+        }
+
+#if USE_ITT_BUILD
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+            __kmp_itt_barrier_starting(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+#if USE_DEBUGGER
+        // Let the debugger know: the thread arrived to the barrier and waiting.
+        if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure.
+            team->t.t_bar[bt].b_master_arrived += 1;
+        } else {
+            this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
+        } // if
+#endif /* USE_DEBUGGER */
+        if (reduce != NULL) {
+            //KMP_DEBUG_ASSERT( is_split == TRUE );  // #C69956
+            this_thr->th.th_local.reduce_data = reduce_data;
+        }
+
+        if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
+            __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1
+
+        switch (__kmp_barrier_gather_pattern[bt]) {
+        case bp_hyper_bar: {
+            KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear
+            __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce
+                                       USE_ITT_BUILD_ARG(itt_sync_obj) );
+            break;
+        }
+        case bp_hierarchical_bar: {
+            __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce
+                                              USE_ITT_BUILD_ARG(itt_sync_obj));
+            break;
+        }
+        case bp_tree_bar: {
+            KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear
+            __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce
+                                      USE_ITT_BUILD_ARG(itt_sync_obj) );
+            break;
+        }
+        default: {
+            __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce
+                                        USE_ITT_BUILD_ARG(itt_sync_obj) );
+        }
+        }
+
+        KMP_MB();
+
+        if (KMP_MASTER_TID(tid)) {
+            status = 0;
+            if (__kmp_tasking_mode != tskm_immediate_exec) {
+                __kmp_task_team_wait(this_thr, team
+                                     USE_ITT_BUILD_ARG(itt_sync_obj) );
+            }
+#if USE_DEBUGGER
+            // Let the debugger know: All threads are arrived and starting leaving the barrier.
+            team->t.t_bar[bt].b_team_arrived += 1;
+#endif
+
+#if USE_ITT_BUILD
+            /* TODO: In case of split reduction barrier, master thread may send acquired event early,
+               before the final summation into the shared variable is done (final summation can be a
+               long operation for array reductions).  */
+            if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+                __kmp_itt_barrier_middle(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+            // Barrier - report frame end (only if active_level == 1)
+            if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
+#if OMP_40_ENABLED
+                this_thr->th.th_teams_microtask == NULL &&
+#endif
+                team->t.t_active_level == 1)
+            {
+                kmp_uint64 cur_time = __itt_get_timestamp();
+                kmp_info_t **other_threads = team->t.t_threads;
+                int nproc = this_thr->th.th_team_nproc;
+                int i;
+                switch(__kmp_forkjoin_frames_mode) {
+                case 1:
+                    __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
+                    this_thr->th.th_frame_time = cur_time;
+                    break;
+                case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed)
+                    __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
+                    break;
+                case 3:
+                    if( __itt_metadata_add_ptr ) {
+                        // Initialize with master's wait time
+                        kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
+                        for (i=1; i<nproc; ++i) {
+                            delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
+                        }
+                        __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL));
+                    }
+                    __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
+                    this_thr->th.th_frame_time = cur_time;
+                    break;
+                }
+            }
+#endif /* USE_ITT_BUILD */
+        } else {
+            status = 1;
+#if USE_ITT_BUILD
+            if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+                __kmp_itt_barrier_middle(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+        }
+        if (status == 1 || ! is_split) {
+            switch (__kmp_barrier_release_pattern[bt]) {
+            case bp_hyper_bar: {
+                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
+                __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                            USE_ITT_BUILD_ARG(itt_sync_obj) );
+                break;
+            }
+            case bp_hierarchical_bar: {
+                __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                                   USE_ITT_BUILD_ARG(itt_sync_obj) );
+                break;
+            }
+            case bp_tree_bar: {
+                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
+                __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                           USE_ITT_BUILD_ARG(itt_sync_obj) );
+                break;
+            }
+            default: {
+                __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                             USE_ITT_BUILD_ARG(itt_sync_obj) );
+            }
+            }
+            if (__kmp_tasking_mode != tskm_immediate_exec) {
+                __kmp_task_team_sync(this_thr, team);
+            }
+        }
+
+#if USE_ITT_BUILD
+        /* GEH: TODO: Move this under if-condition above and also include in
+           __kmp_end_split_barrier(). This will more accurately represent the actual release time
+           of the threads for split barriers.  */
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+            __kmp_itt_barrier_finished(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+    } else { // Team is serialized.
+        status = 0;
+        if (__kmp_tasking_mode != tskm_immediate_exec) {
+#if OMP_41_ENABLED
+            if ( this_thr->th.th_task_team != NULL ) {
+                void *itt_sync_obj = NULL;
+#if USE_ITT_NOTIFY
+                if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
+                    itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
+                    __kmp_itt_barrier_starting(gtid, itt_sync_obj);
+                }
+#endif
+
+                KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
+                __kmp_task_team_wait(this_thr, team
+                                               USE_ITT_BUILD_ARG(itt_sync_obj));
+                __kmp_task_team_setup(this_thr, team, 0);
+
+#if USE_ITT_BUILD
+                if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+                    __kmp_itt_barrier_finished(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+            }
+#else
+            // The task team should be NULL for serialized code (tasks will be executed immediately)
+            KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
+            KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
+#endif
+        }
+    }
+    KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
+                  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+#if OMPT_BLAME
+        if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
+            ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
+                my_parallel_id, my_task_id);
+        }
+#endif
+        this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    }
+#endif
+
+    return status;
+}
+
+
+void
+__kmp_end_split_barrier(enum barrier_type bt, int gtid)
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
+    int tid = __kmp_tid_from_gtid(gtid);
+    kmp_info_t *this_thr = __kmp_threads[gtid];
+    kmp_team_t *team = this_thr->th.th_team;
+
+    if (!team->t.t_serialized) {
+        if (KMP_MASTER_GTID(gtid)) {
+            switch (__kmp_barrier_release_pattern[bt]) {
+            case bp_hyper_bar: {
+                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
+                __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                            USE_ITT_BUILD_ARG(NULL) );
+                break;
+            }
+            case bp_hierarchical_bar: {
+                __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                                   USE_ITT_BUILD_ARG(NULL));
+                break;
+            }
+            case bp_tree_bar: {
+                KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
+                __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                           USE_ITT_BUILD_ARG(NULL) );
+                break;
+            }
+            default: {
+                __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
+                                             USE_ITT_BUILD_ARG(NULL) );
+            }
+            }
+            if (__kmp_tasking_mode != tskm_immediate_exec) {
+                __kmp_task_team_sync(this_thr, team);
+            } // if
+        }
+    }
+}
+
+
+void
+__kmp_join_barrier(int gtid)
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
     kmp_info_t *this_thr = __kmp_threads[gtid];
     kmp_team_t *team;
     kmp_uint nproc;
-    kmp_info_t *master_thread; 
-    int tid; 
-#ifdef KMP_DEBUG 
-    int team_id; 
-#endif /* KMP_DEBUG */ 
-#if USE_ITT_BUILD 
-    void *itt_sync_obj = NULL; 
-# if USE_ITT_NOTIFY 
-    if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need 
-        // Get object created at fork_barrier 
-        itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 
-# endif 
-#endif /* USE_ITT_BUILD */ 
-    KMP_MB(); 
- 
-    // Get current info 
-    team = this_thr->th.th_team; 
-    nproc = this_thr->th.th_team_nproc; 
-    KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc); 
-    tid = __kmp_tid_from_gtid(gtid); 
-#ifdef KMP_DEBUG 
-    team_id = team->t.t_id; 
-#endif /* KMP_DEBUG */ 
-    master_thread = this_thr->th.th_team_master; 
-#ifdef KMP_DEBUG 
-    if (master_thread != team->t.t_threads[0]) { 
-        __kmp_print_structure(); 
-    } 
-#endif /* KMP_DEBUG */ 
-    KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); 
-    KMP_MB(); 
- 
-    // Verify state 
-    KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 
-    KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); 
-    KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); 
-    KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); 
-    KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid)); 
- 
-#if OMPT_SUPPORT  
-#if OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { 
-        ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( 
-            team->t.ompt_team_info.parallel_id, 
-            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-    } 
-#endif 
-    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; 
-#endif 
- 
-    if (__kmp_tasking_mode == tskm_extra_barrier) { 
-        __kmp_tasking_barrier(team, this_thr, gtid); 
-        KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid)); 
-    } 
-# ifdef KMP_DEBUG 
-    if (__kmp_tasking_mode != tskm_immediate_exec) { 
-        KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n", 
-                       __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state], 
-                       this_thr->th.th_task_team)); 
-        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); 
-    } 
-# endif /* KMP_DEBUG */ 
- 
-    /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the 
-       team struct is not guaranteed to exist. Doing these loads causes a cache miss slows 
-       down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite, 
-       since the values are not used by __kmp_wait_template() in that case. */ 
-    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 
-        this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; 
-        this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; 
-    } 
- 
-#if USE_ITT_BUILD 
-    if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-        __kmp_itt_barrier_starting(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
- 
-    switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { 
-    case bp_hyper_bar: { 
-        KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); 
-        __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL 
-                                   USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        break; 
-    } 
-    case bp_hierarchical_bar: { 
-        __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL 
-                                          USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        break; 
-    } 
-    case bp_tree_bar: { 
-        KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); 
-        __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL 
-                                  USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        break; 
-    } 
-    default: { 
-        __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL 
-                                    USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-    } 
-    } 
- 
-    /* From this point on, the team data structure may be deallocated at any time by the 
-       master thread - it is unsafe to reference it in any of the worker threads. Any per-team 
-       data items that need to be referenced before the end of the barrier should be moved to 
-       the kmp_task_team_t structs.  */ 
-    if (KMP_MASTER_TID(tid)) { 
-        if (__kmp_tasking_mode != tskm_immediate_exec) { 
-            // Master shouldn't call decrease_load().         // TODO: enable master threads. 
-            // Master should have th_may_decrease_load == 0.  // TODO: enable master threads. 
-            __kmp_task_team_wait(this_thr, team 
-                                 USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        } 
-#if USE_ITT_BUILD 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-            __kmp_itt_barrier_middle(gtid, itt_sync_obj); 
-#endif /* USE_ITT_BUILD */ 
- 
-# if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        // Join barrier - report frame end 
-        if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && 
-#if OMP_40_ENABLED 
-            this_thr->th.th_teams_microtask == NULL && 
-#endif 
-            team->t.t_active_level == 1) 
-        { 
-            kmp_uint64 cur_time = __itt_get_timestamp(); 
-            ident_t * loc = team->t.t_ident; 
-            kmp_info_t **other_threads = team->t.t_threads; 
-            int nproc = this_thr->th.th_team_nproc; 
-            int i; 
-            switch(__kmp_forkjoin_frames_mode) { 
-            case 1: 
-                __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); 
-                break; 
-            case 2: 
-                __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); 
-                break; 
-            case 3: 
-                if( __itt_metadata_add_ptr ) { 
-                    // Initialize with master's wait time 
-                    kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; 
-                    for (i=1; i<nproc; ++i) { 
-                        delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time ); 
-                    } 
-                    __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0); 
-                } 
-                __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); 
-                this_thr->th.th_frame_time = cur_time; 
-                break; 
-            } 
-        } 
-# endif /* USE_ITT_BUILD */ 
-    } 
-#if USE_ITT_BUILD 
-    else { 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) 
-            __kmp_itt_barrier_middle(gtid, itt_sync_obj); 
-    } 
-#endif /* USE_ITT_BUILD */ 
- 
-#if KMP_DEBUG 
-    if (KMP_MASTER_TID(tid)) { 
-        KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", 
-                      gtid, team_id, tid, nproc)); 
-    } 
-#endif /* KMP_DEBUG */ 
- 
-    // TODO now, mark worker threads as done so they may be disbanded 
-    KMP_MB(); // Flush all pending memory write invalidates. 
-    KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); 
- 
+    kmp_info_t *master_thread;
+    int tid;
+#ifdef KMP_DEBUG
+    int team_id;
+#endif /* KMP_DEBUG */
+#if USE_ITT_BUILD
+    void *itt_sync_obj = NULL;
+# if USE_ITT_NOTIFY
+    if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
+        // Get object created at fork_barrier
+        itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+# endif
+#endif /* USE_ITT_BUILD */
+    KMP_MB();
+
+    // Get current info
+    team = this_thr->th.th_team;
+    nproc = this_thr->th.th_team_nproc;
+    KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
+    tid = __kmp_tid_from_gtid(gtid);
+#ifdef KMP_DEBUG
+    team_id = team->t.t_id;
+#endif /* KMP_DEBUG */
+    master_thread = this_thr->th.th_team_master;
+#ifdef KMP_DEBUG
+    if (master_thread != team->t.t_threads[0]) {
+        __kmp_print_structure();
+    }
+#endif /* KMP_DEBUG */
+    KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
+    KMP_MB();
+
+    // Verify state
+    KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
+    KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
+    KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
+    KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
+    KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
+
 #if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-#if OMPT_BLAME 
-        if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { 
-            ompt_callbacks.ompt_callback(ompt_event_barrier_end)( 
-                team->t.ompt_team_info.parallel_id, 
-                team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-        } 
-#endif 
- 
-        // return to default state 
-        this_thr->th.ompt_thread_info.state = ompt_state_overhead; 
-    } 
-#endif 
-} 
- 
- 
-// TODO release worker threads' fork barriers as we are ready instead of all at once 
-void 
-__kmp_fork_barrier(int gtid, int tid) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier); 
-    kmp_info_t *this_thr = __kmp_threads[gtid]; 
-    kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; 
-#if USE_ITT_BUILD 
-    void * itt_sync_obj = NULL; 
-#endif /* USE_ITT_BUILD */ 
- 
-    KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", 
-                  gtid, (team != NULL) ? team->t.t_id : -1, tid)); 
- 
-    // th_team pointer only valid for master thread here 
-    if (KMP_MASTER_TID(tid)) { 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { 
-            // Create itt barrier object 
-            itt_sync_obj  = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); 
-            __kmp_itt_barrier_middle(gtid, itt_sync_obj);  // Call acquired/releasing 
-        } 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
- 
-#ifdef KMP_DEBUG 
+#if OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
+        ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
+            team->t.ompt_team_info.parallel_id,
+            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+    }
+#endif
+    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
+#endif
+
+    if (__kmp_tasking_mode == tskm_extra_barrier) {
+        __kmp_tasking_barrier(team, this_thr, gtid);
+        KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
+    }
+# ifdef KMP_DEBUG
+    if (__kmp_tasking_mode != tskm_immediate_exec) {
+        KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
+                       __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
+                       this_thr->th.th_task_team));
+        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
+    }
+# endif /* KMP_DEBUG */
+
+    /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the
+       team struct is not guaranteed to exist. Doing these loads causes a cache miss slows
+       down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite,
+       since the values are not used by __kmp_wait_template() in that case. */
+    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+        this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
+        this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
+    }
+
+#if USE_ITT_BUILD
+    if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+        __kmp_itt_barrier_starting(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+
+    switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
+    case bp_hyper_bar: {
+        KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
+        __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
+                                   USE_ITT_BUILD_ARG(itt_sync_obj) );
+        break;
+    }
+    case bp_hierarchical_bar: {
+        __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
+                                          USE_ITT_BUILD_ARG(itt_sync_obj) );
+        break;
+    }
+    case bp_tree_bar: {
+        KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
+        __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
+                                  USE_ITT_BUILD_ARG(itt_sync_obj) );
+        break;
+    }
+    default: {
+        __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
+                                    USE_ITT_BUILD_ARG(itt_sync_obj) );
+    }
+    }
+
+    /* From this point on, the team data structure may be deallocated at any time by the
+       master thread - it is unsafe to reference it in any of the worker threads. Any per-team
+       data items that need to be referenced before the end of the barrier should be moved to
+       the kmp_task_team_t structs.  */
+    if (KMP_MASTER_TID(tid)) {
+        if (__kmp_tasking_mode != tskm_immediate_exec) {
+            // Master shouldn't call decrease_load().         // TODO: enable master threads.
+            // Master should have th_may_decrease_load == 0.  // TODO: enable master threads.
+            __kmp_task_team_wait(this_thr, team
+                                 USE_ITT_BUILD_ARG(itt_sync_obj) );
+        }
+#if USE_ITT_BUILD
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+            __kmp_itt_barrier_middle(gtid, itt_sync_obj);
+#endif /* USE_ITT_BUILD */
+
+# if USE_ITT_BUILD && USE_ITT_NOTIFY
+        // Join barrier - report frame end
+        if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
+#if OMP_40_ENABLED
+            this_thr->th.th_teams_microtask == NULL &&
+#endif
+            team->t.t_active_level == 1)
+        {
+            kmp_uint64 cur_time = __itt_get_timestamp();
+            ident_t * loc = team->t.t_ident;
+            kmp_info_t **other_threads = team->t.t_threads;
+            int nproc = this_thr->th.th_team_nproc;
+            int i;
+            switch(__kmp_forkjoin_frames_mode) {
+            case 1:
+                __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
+                break;
+            case 2:
+                __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
+                break;
+            case 3:
+                if( __itt_metadata_add_ptr ) {
+                    // Initialize with master's wait time
+                    kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
+                    for (i=1; i<nproc; ++i) {
+                        delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
+                    }
+                    __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0);
+                }
+                __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
+                this_thr->th.th_frame_time = cur_time;
+                break;
+            }
+        }
+# endif /* USE_ITT_BUILD */
+    }
+#if USE_ITT_BUILD
+    else {
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
+            __kmp_itt_barrier_middle(gtid, itt_sync_obj);
+    }
+#endif /* USE_ITT_BUILD */
+
+#if KMP_DEBUG
+    if (KMP_MASTER_TID(tid)) {
+        KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
+                      gtid, team_id, tid, nproc));
+    }
+#endif /* KMP_DEBUG */
+
+    // TODO now, mark worker threads as done so they may be disbanded
+    KMP_MB(); // Flush all pending memory write invalidates.
+    KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+#if OMPT_BLAME
+        if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
+            ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
+                team->t.ompt_team_info.parallel_id,
+                team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+        }
+#endif
+
+        // return to default state
+        this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+    }
+#endif
+}
+
+
+// TODO release worker threads' fork barriers as we are ready instead of all at once
+void
+__kmp_fork_barrier(int gtid, int tid)
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
+    kmp_info_t *this_thr = __kmp_threads[gtid];
+    kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
+#if USE_ITT_BUILD
+    void * itt_sync_obj = NULL;
+#endif /* USE_ITT_BUILD */
+
+    KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
+                  gtid, (team != NULL) ? team->t.t_id : -1, tid));
+
+    // th_team pointer only valid for master thread here
+    if (KMP_MASTER_TID(tid)) {
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
+            // Create itt barrier object
+            itt_sync_obj  = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
+            __kmp_itt_barrier_middle(gtid, itt_sync_obj);  // Call acquired/releasing
+        }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+
+#ifdef KMP_DEBUG
         kmp_info_t **other_threads = team->t.t_threads;
         int i;
- 
-        // Verify state 
-        KMP_MB(); 
- 
-        for(i=1; i<team->t.t_nproc; ++i) { 
-            KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n", 
-                           gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, 
-                           team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, 
-                           other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); 
-            KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) 
-                              & ~(KMP_BARRIER_SLEEP_STATE)) 
-                             == KMP_INIT_BARRIER_STATE); 
-            KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); 
-        } 
-#endif 
- 
-        if (__kmp_tasking_mode != tskm_immediate_exec) { 
-            __kmp_task_team_setup(this_thr, team, 0);  // 0 indicates setup current task team if nthreads > 1 
-        } 
- 
-        /* The master thread may have changed its blocktime between the join barrier and the 
-           fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can 
-           access it when the team struct is not guaranteed to exist. */ 
-        // See note about the corresponding code in __kmp_join_barrier() being performance-critical 
-        if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 
-            this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; 
-            this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; 
-        } 
-    } // master 
- 
-    switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { 
-    case bp_hyper_bar: { 
-        KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); 
-        __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE 
-                                    USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        break; 
-    } 
-    case bp_hierarchical_bar: { 
-        __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE 
-                                           USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        break; 
-    } 
-    case bp_tree_bar: { 
-        KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); 
-        __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE 
-                                   USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-        break; 
-    } 
-    default: { 
-        __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE 
-                                     USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-    } 
-    } 
- 
-    // Early exit for reaping threads releasing forkjoin barrier 
-    if (TCR_4(__kmp_global.g.g_done)) { 
-        this_thr->th.th_task_team = NULL; 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { 
-            if (!KMP_MASTER_TID(tid)) { 
-                itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 
-                if (itt_sync_obj) 
-                    __kmp_itt_barrier_finished(gtid, itt_sync_obj); 
-            } 
-        } 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-        KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); 
-        return; 
-    } 
- 
-    /* We can now assume that a valid team structure has been allocated by the master and 
-       propagated to all worker threads. The current thread, however, may not be part of the 
-       team, so we can't blindly assume that the team pointer is non-null.  */ 
-    team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); 
-    KMP_DEBUG_ASSERT(team != NULL); 
-    tid = __kmp_tid_from_gtid(gtid); 
- 
- 
-#if KMP_BARRIER_ICV_PULL 
-    /* Master thread's copy of the ICVs was set up on the implicit taskdata in 
-       __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has 
-       this data before this function is called. We cannot modify __kmp_fork_call() to look at 
-       the fixed ICVs in the master's thread struct, because it is not always the case that the 
-       threads arrays have been allocated when __kmp_fork_call() is executed. */ 
-    { 
-        KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); 
-        if (!KMP_MASTER_TID(tid)) {  // master thread already has ICVs 
-            // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. 
-            KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); 
-            __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); 
-            copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, 
-                      &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs); 
-        } 
-    } 
-#endif // KMP_BARRIER_ICV_PULL 
- 
-    if (__kmp_tasking_mode != tskm_immediate_exec) { 
-        __kmp_task_team_sync(this_thr, team); 
-    } 
- 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-    kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 
-    if (proc_bind == proc_bind_intel) { 
-#endif 
-#if KMP_AFFINITY_SUPPORTED 
-        // Call dynamic affinity settings 
-        if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { 
-            __kmp_balanced_affinity(tid, team->t.t_nproc); 
-        } 
-#endif // KMP_AFFINITY_SUPPORTED 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-    } 
-    else if (proc_bind != proc_bind_false) { 
-        if (this_thr->th.th_new_place == this_thr->th.th_current_place) { 
-            KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", 
-                           __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place)); 
-        } 
-        else { 
-            __kmp_affinity_set_place(gtid); 
-        } 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-    if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { 
-        if (!KMP_MASTER_TID(tid)) { 
-            // Get correct barrier object 
-            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); 
-            __kmp_itt_barrier_finished(gtid, itt_sync_obj);  // Workers call acquired 
-        } // (prepare called inside barrier_release) 
-    } 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-    KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid)); 
-} 
- 
- 
-void 
-__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy); 
- 
-    KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); 
-    KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 
- 
-    /* Master thread's copy of the ICVs was set up on the implicit taskdata in 
-       __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has 
-       this data before this function is called. */ 
-#if KMP_BARRIER_ICV_PULL 
-    /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where 
-       all of the worker threads can access them and make their own copies after the barrier. */ 
-    KMP_DEBUG_ASSERT(team->t.t_threads[0]);  // The threads arrays should be allocated at this point 
-    copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs); 
-    KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 
-                  0, team->t.t_threads[0], team)); 
-#elif KMP_BARRIER_ICV_PUSH 
-    // The ICVs will be propagated in the fork barrier, so nothing needs to be done here. 
-    KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 
-                  0, team->t.t_threads[0], team)); 
-#else 
-    // Copy the ICVs to each of the non-master threads.  This takes O(nthreads) time. 
-    ngo_load(new_icvs); 
-    KMP_DEBUG_ASSERT(team->t.t_threads[0]);  // The threads arrays should be allocated at this point 
-    for (int f=1; f<new_nproc; ++f) { // Skip the master thread 
-        // TODO: GEH - pass in better source location info since usually NULL here 
-        KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", 
-                      f, team->t.t_threads[f], team)); 
-        __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); 
-        ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); 
-        KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", 
-                      f, team->t.t_threads[f], team)); 
-    } 
-    ngo_sync(); 
-#endif // KMP_BARRIER_ICV_PULL 
-} 
+
+        // Verify state
+        KMP_MB();
+
+        for(i=1; i<team->t.t_nproc; ++i) {
+            KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n",
+                           gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
+                           team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
+                           other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
+            KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)
+                              & ~(KMP_BARRIER_SLEEP_STATE))
+                             == KMP_INIT_BARRIER_STATE);
+            KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
+        }
+#endif
+
+        if (__kmp_tasking_mode != tskm_immediate_exec) {
+            __kmp_task_team_setup(this_thr, team, 0);  // 0 indicates setup current task team if nthreads > 1
+        }
+
+        /* The master thread may have changed its blocktime between the join barrier and the
+           fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can
+           access it when the team struct is not guaranteed to exist. */
+        // See note about the corresponding code in __kmp_join_barrier() being performance-critical
+        if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+            this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
+            this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
+        }
+    } // master
+
+    switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
+    case bp_hyper_bar: {
+        KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
+        __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
+                                    USE_ITT_BUILD_ARG(itt_sync_obj) );
+        break;
+    }
+    case bp_hierarchical_bar: {
+        __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
+                                           USE_ITT_BUILD_ARG(itt_sync_obj) );
+        break;
+    }
+    case bp_tree_bar: {
+        KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
+        __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
+                                   USE_ITT_BUILD_ARG(itt_sync_obj) );
+        break;
+    }
+    default: {
+        __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
+                                     USE_ITT_BUILD_ARG(itt_sync_obj) );
+    }
+    }
+
+    // Early exit for reaping threads releasing forkjoin barrier
+    if (TCR_4(__kmp_global.g.g_done)) {
+        this_thr->th.th_task_team = NULL;
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+        if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
+            if (!KMP_MASTER_TID(tid)) {
+                itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+                if (itt_sync_obj)
+                    __kmp_itt_barrier_finished(gtid, itt_sync_obj);
+            }
+        }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+        KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
+        return;
+    }
+
+    /* We can now assume that a valid team structure has been allocated by the master and
+       propagated to all worker threads. The current thread, however, may not be part of the
+       team, so we can't blindly assume that the team pointer is non-null.  */
+    team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
+    KMP_DEBUG_ASSERT(team != NULL);
+    tid = __kmp_tid_from_gtid(gtid);
+
+
+#if KMP_BARRIER_ICV_PULL
+    /* Master thread's copy of the ICVs was set up on the implicit taskdata in
+       __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has
+       this data before this function is called. We cannot modify __kmp_fork_call() to look at
+       the fixed ICVs in the master's thread struct, because it is not always the case that the
+       threads arrays have been allocated when __kmp_fork_call() is executed. */
+    {
+        KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+        if (!KMP_MASTER_TID(tid)) {  // master thread already has ICVs
+            // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+            KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
+            __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+            copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+                      &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+        }
+    }
+#endif // KMP_BARRIER_ICV_PULL
+
+    if (__kmp_tasking_mode != tskm_immediate_exec) {
+        __kmp_task_team_sync(this_thr, team);
+    }
+
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+    kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
+    if (proc_bind == proc_bind_intel) {
+#endif
+#if KMP_AFFINITY_SUPPORTED
+        // Call dynamic affinity settings
+        if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
+            __kmp_balanced_affinity(tid, team->t.t_nproc);
+        }
+#endif // KMP_AFFINITY_SUPPORTED
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+    }
+    else if (proc_bind != proc_bind_false) {
+        if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
+            KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
+                           __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place));
+        }
+        else {
+            __kmp_affinity_set_place(gtid);
+        }
+    }
+#endif
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
+        if (!KMP_MASTER_TID(tid)) {
+            // Get correct barrier object
+            itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+            __kmp_itt_barrier_finished(gtid, itt_sync_obj);  // Workers call acquired
+        } // (prepare called inside barrier_release)
+    }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+    KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid));
+}
+
+
+void
+__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
+
+    KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
+    KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
+
+    /* Master thread's copy of the ICVs was set up on the implicit taskdata in
+       __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has
+       this data before this function is called. */
+#if KMP_BARRIER_ICV_PULL
+    /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where
+       all of the worker threads can access them and make their own copies after the barrier. */
+    KMP_DEBUG_ASSERT(team->t.t_threads[0]);  // The threads arrays should be allocated at this point
+    copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs);
+    KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n",
+                  0, team->t.t_threads[0], team));
+#elif KMP_BARRIER_ICV_PUSH
+    // The ICVs will be propagated in the fork barrier, so nothing needs to be done here.
+    KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n",
+                  0, team->t.t_threads[0], team));
+#else
+    // Copy the ICVs to each of the non-master threads.  This takes O(nthreads) time.
+    ngo_load(new_icvs);
+    KMP_DEBUG_ASSERT(team->t.t_threads[0]);  // The threads arrays should be allocated at this point
+    for (int f=1; f<new_nproc; ++f) { // Skip the master thread
+        // TODO: GEH - pass in better source location info since usually NULL here
+        KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
+                      f, team->t.t_threads[f], team));
+        __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
+        ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
+        KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
+                      f, team->t.t_threads[f], team));
+    }
+    ngo_sync();
+#endif // KMP_BARRIER_ICV_PULL
+}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp b/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp
index 17ea375cd0..d1eb00c664 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_cancel.cpp
@@ -1,282 +1,282 @@
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_str.h"
+
+#if OMP_40_ENABLED
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup)
+
+@return returns true if the cancellation request has been activated and the execution thread
+needs to proceed to the end of the canceled region.
+
+Request cancellation of the binding OpenMP region.
+*/
+kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
+    kmp_info_t *this_thr = __kmp_threads [ gtid ];
+    
+    KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) );
+
+    KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq);
+    KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || 
+                     cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); 
+    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+    if (__kmp_omp_cancellation) {
+        switch (cncl_kind) {
+        case cancel_parallel:
+        case cancel_loop:
+        case cancel_sections:
+            // cancellation requests for parallel and worksharing constructs
+            // are handled through the team structure
+            {
+                kmp_team_t *this_team = this_thr->th.th_team;
+                KMP_DEBUG_ASSERT(this_team);
+                kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind);
+                if (old == cancel_noreq || old == cncl_kind) {
+                    //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", 
+                    //       this_team->t.t_cancel_request, &(this_team->t.t_cancel_request));
+                    // we do not have a cancellation request in this team or we do have one
+                    // that matches the current request -> cancel
+                    return 1 /* true */;
+                }
+                break;
+            }
+        case cancel_taskgroup:
+            // cancellation requests for a task group
+            // are handled through the taskgroup structure
+            {
+                kmp_taskdata_t*  task; 
+                kmp_taskgroup_t* taskgroup;
+                
+                task = this_thr->th.th_current_task;
+                KMP_DEBUG_ASSERT( task );
+                
+                taskgroup = task->td_taskgroup;
+                if (taskgroup) {
+                    kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind);
+                    if (old == cancel_noreq || old == cncl_kind) {
+                        // we do not have a cancellation request in this taskgroup or we do have one
+                        // that matches the current request -> cancel
+                        return 1 /* true */;
+                    }
+                }
+                else {
+                    // TODO: what needs to happen here?
+                    // the specification disallows cancellation w/o taskgroups
+                    // so we might do anything here, let's abort for now
+                    KMP_ASSERT( 0 /* false */);
+                }
+            }
+            break;
+        default:
+            KMP_ASSERT (0 /* false */);
+        }
+    }
+
+    // ICV OMP_CANCELLATION=false, so we ignored this cancel request
+    KMP_DEBUG_ASSERT(!__kmp_omp_cancellation);
+    return 0 /* false */;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup)
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the 
+encountering thread has to cancel..
+
+Cancellation point for the encountering thread.
+*/
+kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
+    kmp_info_t *this_thr = __kmp_threads [ gtid ];
+
+    KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) );
+
+    KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq);
+    KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || 
+                     cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); 
+    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+    if (__kmp_omp_cancellation) {
+        switch (cncl_kind) {
+        case cancel_parallel:
+        case cancel_loop:
+        case cancel_sections:
+            // cancellation requests for parallel and worksharing constructs
+            // are handled through the team structure
+            {
+                kmp_team_t *this_team = this_thr->th.th_team;
+                KMP_DEBUG_ASSERT(this_team);
+                if (this_team->t.t_cancel_request) {
+                    if (cncl_kind == this_team->t.t_cancel_request) {
+                        // the request in the team structure matches the type of
+                        // cancellation point so we can cancel
+                        return 1 /* true */;
+                    }
+                    KMP_ASSERT( 0 /* false */);
+                }
+                else {
+                    // we do not have a cancellation request pending, so we just
+                    // ignore this cancellation point
+                    return 0;
+                }
+                break;
+            }
+        case cancel_taskgroup:
+            // cancellation requests for a task group
+            // are handled through the taskgroup structure
+            {
+                kmp_taskdata_t*  task; 
+                kmp_taskgroup_t* taskgroup;
+                
+                task = this_thr->th.th_current_task;
+                KMP_DEBUG_ASSERT( task );
+                
+                taskgroup = task->td_taskgroup;
+                if (taskgroup) {
+                    // return the current status of cancellation for the 
+                    // taskgroup
+                    return !!taskgroup->cancel_request;
+                }
+                else {
+                    // if a cancellation point is encountered by a task
+                    // that does not belong to a taskgroup, it is OK
+                    // to ignore it
+                    return 0 /* false */;
+                }
+            }
+        default:
+            KMP_ASSERT (0 /* false */);
+        }
+    }
+
+    // ICV OMP_CANCELLATION=false, so we ignore the cancellation point
+    KMP_DEBUG_ASSERT(!__kmp_omp_cancellation);
+    return 0 /* false */;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the 
+encountering thread has to cancel..
+
+Barrier with cancellation point to send threads from the barrier to the
+end of the parallel region.  Needs a special code pattern as documented 
+in the design document for the cancellation feature.
+*/
+kmp_int32
+__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) {
+    int ret = 0 /* false */;
+    kmp_info_t *this_thr = __kmp_threads [ gtid ];
+    kmp_team_t *this_team = this_thr->th.th_team;
+
+    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+    // call into the standard barrier
+    __kmpc_barrier(loc, gtid);
+
+    // if cancellation is active, check cancellation flag
+    if (__kmp_omp_cancellation) {
+        // depending on which construct to cancel, check the flag and
+        // reset the flag
+        switch (this_team->t.t_cancel_request) {
+        case cancel_parallel:
+            ret = 1;
+            // ensure that threads have checked the flag, when
+            // leaving the above barrier
+            __kmpc_barrier(loc, gtid);
+            this_team->t.t_cancel_request = cancel_noreq;
+            // the next barrier is the fork/join barrier, which
+            // synchronizes the threads leaving here        
+            break;
+        case cancel_loop:
+        case cancel_sections:
+            ret = 1;
+            // ensure that threads have checked the flag, when
+            // leaving the above barrier
+            __kmpc_barrier(loc, gtid);
+            this_team->t.t_cancel_request = cancel_noreq;
+            // synchronize the threads again to make sure we
+            // do not have any run-away threads that cause a race
+            // on the cancellation flag
+            __kmpc_barrier(loc, gtid);
+            break;
+        case cancel_taskgroup:
+            // this case should not occur
+            KMP_ASSERT (0 /* false */ );
+            break;
+        case cancel_noreq:
+            // do nothing
+            break;
+        default:
+            KMP_ASSERT ( 0 /* false */);
+        }
+    }
+    
+    return ret;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the 
+encountering thread has to cancel..
+
+Query function to query the current status of cancellation requests.
+Can be used to implement the following pattern:
  
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
-#include "kmp_str.h" 
- 
-#if OMP_40_ENABLED 
- 
-/*! 
-@ingroup CANCELLATION 
-@param loc_ref location of the original task directive 
-@param gtid Global thread ID of encountering thread 
-@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) 
- 
-@return returns true if the cancellation request has been activated and the execution thread 
-needs to proceed to the end of the canceled region. 
- 
-Request cancellation of the binding OpenMP region. 
-*/ 
-kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { 
-    kmp_info_t *this_thr = __kmp_threads [ gtid ]; 
-     
-    KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); 
- 
-    KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); 
-    KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop ||  
-                     cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup);  
-    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); 
- 
-    if (__kmp_omp_cancellation) { 
-        switch (cncl_kind) { 
-        case cancel_parallel: 
-        case cancel_loop: 
-        case cancel_sections: 
-            // cancellation requests for parallel and worksharing constructs 
-            // are handled through the team structure 
-            { 
-                kmp_team_t *this_team = this_thr->th.th_team; 
-                KMP_DEBUG_ASSERT(this_team); 
-                kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); 
-                if (old == cancel_noreq || old == cncl_kind) { 
-                    //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n",  
-                    //       this_team->t.t_cancel_request, &(this_team->t.t_cancel_request)); 
-                    // we do not have a cancellation request in this team or we do have one 
-                    // that matches the current request -> cancel 
-                    return 1 /* true */; 
-                } 
-                break; 
-            } 
-        case cancel_taskgroup: 
-            // cancellation requests for a task group 
-            // are handled through the taskgroup structure 
-            { 
-                kmp_taskdata_t*  task;  
-                kmp_taskgroup_t* taskgroup; 
-                 
-                task = this_thr->th.th_current_task; 
-                KMP_DEBUG_ASSERT( task ); 
-                 
-                taskgroup = task->td_taskgroup; 
-                if (taskgroup) { 
-                    kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind); 
-                    if (old == cancel_noreq || old == cncl_kind) { 
-                        // we do not have a cancellation request in this taskgroup or we do have one 
-                        // that matches the current request -> cancel 
-                        return 1 /* true */; 
-                    } 
-                } 
-                else { 
-                    // TODO: what needs to happen here? 
-                    // the specification disallows cancellation w/o taskgroups 
-                    // so we might do anything here, let's abort for now 
-                    KMP_ASSERT( 0 /* false */); 
-                } 
-            } 
-            break; 
-        default: 
-            KMP_ASSERT (0 /* false */); 
-        } 
-    } 
- 
-    // ICV OMP_CANCELLATION=false, so we ignored this cancel request 
-    KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); 
-    return 0 /* false */; 
-} 
- 
-/*! 
-@ingroup CANCELLATION 
-@param loc_ref location of the original task directive 
-@param gtid Global thread ID of encountering thread 
-@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) 
- 
-@return returns true if a matching cancellation request has been flagged in the RTL and the  
-encountering thread has to cancel.. 
- 
-Cancellation point for the encountering thread. 
-*/ 
-kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { 
-    kmp_info_t *this_thr = __kmp_threads [ gtid ]; 
- 
-    KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); 
- 
-    KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); 
-    KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop ||  
-                     cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup);  
-    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); 
- 
-    if (__kmp_omp_cancellation) { 
-        switch (cncl_kind) { 
-        case cancel_parallel: 
-        case cancel_loop: 
-        case cancel_sections: 
-            // cancellation requests for parallel and worksharing constructs 
-            // are handled through the team structure 
-            { 
-                kmp_team_t *this_team = this_thr->th.th_team; 
-                KMP_DEBUG_ASSERT(this_team); 
-                if (this_team->t.t_cancel_request) { 
-                    if (cncl_kind == this_team->t.t_cancel_request) { 
-                        // the request in the team structure matches the type of 
-                        // cancellation point so we can cancel 
-                        return 1 /* true */; 
-                    } 
-                    KMP_ASSERT( 0 /* false */); 
-                } 
-                else { 
-                    // we do not have a cancellation request pending, so we just 
-                    // ignore this cancellation point 
-                    return 0; 
-                } 
-                break; 
-            } 
-        case cancel_taskgroup: 
-            // cancellation requests for a task group 
-            // are handled through the taskgroup structure 
-            { 
-                kmp_taskdata_t*  task;  
-                kmp_taskgroup_t* taskgroup; 
-                 
-                task = this_thr->th.th_current_task; 
-                KMP_DEBUG_ASSERT( task ); 
-                 
-                taskgroup = task->td_taskgroup; 
-                if (taskgroup) { 
-                    // return the current status of cancellation for the  
-                    // taskgroup 
-                    return !!taskgroup->cancel_request; 
-                } 
-                else { 
-                    // if a cancellation point is encountered by a task 
-                    // that does not belong to a taskgroup, it is OK 
-                    // to ignore it 
-                    return 0 /* false */; 
-                } 
-            } 
-        default: 
-            KMP_ASSERT (0 /* false */); 
-        } 
-    } 
- 
-    // ICV OMP_CANCELLATION=false, so we ignore the cancellation point 
-    KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); 
-    return 0 /* false */; 
-} 
- 
-/*! 
-@ingroup CANCELLATION 
-@param loc_ref location of the original task directive 
-@param gtid Global thread ID of encountering thread 
- 
-@return returns true if a matching cancellation request has been flagged in the RTL and the  
-encountering thread has to cancel.. 
- 
-Barrier with cancellation point to send threads from the barrier to the 
-end of the parallel region.  Needs a special code pattern as documented  
-in the design document for the cancellation feature. 
-*/ 
-kmp_int32 
-__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { 
-    int ret = 0 /* false */; 
-    kmp_info_t *this_thr = __kmp_threads [ gtid ]; 
-    kmp_team_t *this_team = this_thr->th.th_team; 
- 
-    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); 
- 
-    // call into the standard barrier 
-    __kmpc_barrier(loc, gtid); 
- 
-    // if cancellation is active, check cancellation flag 
-    if (__kmp_omp_cancellation) { 
-        // depending on which construct to cancel, check the flag and 
-        // reset the flag 
-        switch (this_team->t.t_cancel_request) { 
-        case cancel_parallel: 
-            ret = 1; 
-            // ensure that threads have checked the flag, when 
-            // leaving the above barrier 
-            __kmpc_barrier(loc, gtid); 
-            this_team->t.t_cancel_request = cancel_noreq; 
-            // the next barrier is the fork/join barrier, which 
-            // synchronizes the threads leaving here         
-            break; 
-        case cancel_loop: 
-        case cancel_sections: 
-            ret = 1; 
-            // ensure that threads have checked the flag, when 
-            // leaving the above barrier 
-            __kmpc_barrier(loc, gtid); 
-            this_team->t.t_cancel_request = cancel_noreq; 
-            // synchronize the threads again to make sure we 
-            // do not have any run-away threads that cause a race 
-            // on the cancellation flag 
-            __kmpc_barrier(loc, gtid); 
-            break; 
-        case cancel_taskgroup: 
-            // this case should not occur 
-            KMP_ASSERT (0 /* false */ ); 
-            break; 
-        case cancel_noreq: 
-            // do nothing 
-            break; 
-        default: 
-            KMP_ASSERT ( 0 /* false */); 
-        } 
-    } 
-     
-    return ret; 
-} 
- 
-/*! 
-@ingroup CANCELLATION 
-@param loc_ref location of the original task directive 
-@param gtid Global thread ID of encountering thread 
- 
-@return returns true if a matching cancellation request has been flagged in the RTL and the  
-encountering thread has to cancel.. 
- 
-Query function to query the current status of cancellation requests. 
-Can be used to implement the following pattern: 
-  
-if (kmp_get_cancellation_status(kmp_cancel_parallel)) { 
-    perform_cleanup(); 
-    #pragma omp cancellation point parallel       
-} 
-*/ 
-int __kmp_get_cancellation_status(int cancel_kind) { 
-    if (__kmp_omp_cancellation) { 
-        kmp_info_t *this_thr = __kmp_entry_thread(); 
-         
-        switch (cancel_kind) { 
-        case cancel_parallel: 
-        case cancel_loop: 
-        case cancel_sections: 
-            { 
-                kmp_team_t *this_team = this_thr->th.th_team; 
-                return this_team->t.t_cancel_request == cancel_kind; 
-            } 
-        case cancel_taskgroup: 
-            { 
-                kmp_taskdata_t*  task;  
-                kmp_taskgroup_t* taskgroup; 
-                task = this_thr->th.th_current_task; 
-                taskgroup = task->td_taskgroup; 
-                return taskgroup && taskgroup->cancel_request; 
-            } 
-        } 
-    } 
- 
-    return 0 /* false */; 
-} 
- 
-#endif 
+if (kmp_get_cancellation_status(kmp_cancel_parallel)) {
+    perform_cleanup();
+    #pragma omp cancellation point parallel      
+}
+*/
+int __kmp_get_cancellation_status(int cancel_kind) {
+    if (__kmp_omp_cancellation) {
+        kmp_info_t *this_thr = __kmp_entry_thread();
+        
+        switch (cancel_kind) {
+        case cancel_parallel:
+        case cancel_loop:
+        case cancel_sections:
+            {
+                kmp_team_t *this_team = this_thr->th.th_team;
+                return this_team->t.t_cancel_request == cancel_kind;
+            }
+        case cancel_taskgroup:
+            {
+                kmp_taskdata_t*  task; 
+                kmp_taskgroup_t* taskgroup;
+                task = this_thr->th.th_current_task;
+                taskgroup = task->td_taskgroup;
+                return taskgroup && taskgroup->cancel_request;
+            }
+        }
+    }
+
+    return 0 /* false */;
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/openmp/kmp_config.h b/contrib/libs/cxxsupp/openmp/kmp_config.h
index f49adebdb0..35d10e2bfd 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_config.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_config.h
@@ -1,100 +1,100 @@
-/* 
- * kmp_config.h -- Feature macros 
- */ 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
-#ifndef KMP_CONFIG_H 
-#define KMP_CONFIG_H 
- 
-#include "kmp_platform.h" 
- 
-// cmakedefine01 MACRO will define MACRO as either 0 or 1 
-// cmakedefine MACRO 1 will define MACRO as 1 or leave undefined 
-#define DEBUG_BUILD 0 
-#define RELWITHDEBINFO_BUILD 0 
+/*
+ * kmp_config.h -- Feature macros
+ */
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef KMP_CONFIG_H
+#define KMP_CONFIG_H
+
+#include "kmp_platform.h"
+
+// cmakedefine01 MACRO will define MACRO as either 0 or 1
+// cmakedefine MACRO 1 will define MACRO as 1 or leave undefined
+#define DEBUG_BUILD 0
+#define RELWITHDEBINFO_BUILD 0
 #define LIBOMP_USE_ITT_NOTIFY 0
-#define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY 
-#if ! LIBOMP_USE_ITT_NOTIFY 
-# define INTEL_NO_ITTNOTIFY_API 
-#endif 
-#define LIBOMP_USE_VERSION_SYMBOLS 0 
-#if LIBOMP_USE_VERSION_SYMBOLS 
-# define KMP_USE_VERSION_SYMBOLS 
-#endif 
-#define LIBOMP_HAVE_WEAK_ATTRIBUTE 1 
-#define KMP_HAVE_WEAK_ATTRIBUTE LIBOMP_HAVE_WEAK_ATTRIBUTE 
-#define LIBOMP_HAVE_PSAPI 0 
-#define KMP_HAVE_PSAPI LIBOMP_HAVE_PSAPI 
-#define LIBOMP_STATS 0 
-#define KMP_STATS_ENABLED LIBOMP_STATS 
-#define LIBOMP_USE_DEBUGGER 0 
-#define USE_DEBUGGER LIBOMP_USE_DEBUGGER 
-#define LIBOMP_OMPT_DEBUG 0 
-#define OMPT_DEBUG LIBOMP_OMPT_DEBUG 
-#define LIBOMP_OMPT_SUPPORT 0 
-#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT 
-#define LIBOMP_OMPT_BLAME 1 
-#define OMPT_BLAME LIBOMP_OMPT_BLAME 
-#define LIBOMP_OMPT_TRACE 1 
-#define OMPT_TRACE LIBOMP_OMPT_TRACE 
-#define LIBOMP_USE_ADAPTIVE_LOCKS 1 
-#define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS 
-#define KMP_DEBUG_ADAPTIVE_LOCKS 0 
-#define LIBOMP_USE_INTERNODE_ALIGNMENT 0 
-#define KMP_USE_INTERNODE_ALIGNMENT LIBOMP_USE_INTERNODE_ALIGNMENT 
-#define LIBOMP_ENABLE_ASSERTIONS 1 
-#define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS 
-#define STUBS_LIBRARY 0 
-#define LIBOMP_USE_HWLOC 0 
-#define KMP_USE_HWLOC LIBOMP_USE_HWLOC 
-#define KMP_ARCH_STR "Intel(R) 64" 
-#define KMP_LIBRARY_FILE "libomp.so" 
-#define KMP_VERSION_MAJOR 5 
-#define KMP_VERSION_MINOR 0 
-#define LIBOMP_OMP_VERSION 41 
-#define OMP_50_ENABLED (LIBOMP_OMP_VERSION >= 50) 
-#define OMP_41_ENABLED (LIBOMP_OMP_VERSION >= 41) 
-#define OMP_40_ENABLED (LIBOMP_OMP_VERSION >= 40) 
-#define OMP_30_ENABLED (LIBOMP_OMP_VERSION >= 30) 
- 
-// Configured cache line based on architecture 
-#if KMP_ARCH_PPC64 
-# define CACHE_LINE 128 
-#else 
-# define CACHE_LINE 64 
-#endif 
- 
-#define KMP_DYNAMIC_LIB 1 
-#define KMP_NESTED_HOT_TEAMS 1 
-#define KMP_ADJUST_BLOCKTIME 1 
-#define BUILD_PARALLEL_ORDERED 1 
-#define KMP_ASM_INTRINS 1 
-#define USE_ITT_BUILD 1 
-#define INTEL_ITTNOTIFY_PREFIX __kmp_itt_ 
-#if ! KMP_MIC 
-# define USE_LOAD_BALANCE 1 
-#endif 
-#if ! (KMP_OS_WINDOWS || KMP_OS_DARWIN) 
-# define KMP_TDATA_GTID 1 
-#endif 
-#if STUBS_LIBRARY 
-# define KMP_STUB 1 
-#endif 
-#if DEBUG_BUILD || RELWITHDEBINFO_BUILD 
-# define KMP_DEBUG 1 
-#endif 
- 
-#if KMP_OS_WINDOWS 
-# define KMP_WIN_CDECL 
-#else 
-# define BUILD_TV 
-# define KMP_GOMP_COMPAT 
-#endif 
- 
-#endif // KMP_CONFIG_H 
+#define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY
+#if ! LIBOMP_USE_ITT_NOTIFY
+# define INTEL_NO_ITTNOTIFY_API
+#endif
+#define LIBOMP_USE_VERSION_SYMBOLS 0
+#if LIBOMP_USE_VERSION_SYMBOLS
+# define KMP_USE_VERSION_SYMBOLS
+#endif
+#define LIBOMP_HAVE_WEAK_ATTRIBUTE 1
+#define KMP_HAVE_WEAK_ATTRIBUTE LIBOMP_HAVE_WEAK_ATTRIBUTE
+#define LIBOMP_HAVE_PSAPI 0
+#define KMP_HAVE_PSAPI LIBOMP_HAVE_PSAPI
+#define LIBOMP_STATS 0
+#define KMP_STATS_ENABLED LIBOMP_STATS
+#define LIBOMP_USE_DEBUGGER 0
+#define USE_DEBUGGER LIBOMP_USE_DEBUGGER
+#define LIBOMP_OMPT_DEBUG 0
+#define OMPT_DEBUG LIBOMP_OMPT_DEBUG
+#define LIBOMP_OMPT_SUPPORT 0
+#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
+#define LIBOMP_OMPT_BLAME 1
+#define OMPT_BLAME LIBOMP_OMPT_BLAME
+#define LIBOMP_OMPT_TRACE 1
+#define OMPT_TRACE LIBOMP_OMPT_TRACE
+#define LIBOMP_USE_ADAPTIVE_LOCKS 1
+#define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS
+#define KMP_DEBUG_ADAPTIVE_LOCKS 0
+#define LIBOMP_USE_INTERNODE_ALIGNMENT 0
+#define KMP_USE_INTERNODE_ALIGNMENT LIBOMP_USE_INTERNODE_ALIGNMENT
+#define LIBOMP_ENABLE_ASSERTIONS 1
+#define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS
+#define STUBS_LIBRARY 0
+#define LIBOMP_USE_HWLOC 0
+#define KMP_USE_HWLOC LIBOMP_USE_HWLOC
+#define KMP_ARCH_STR "Intel(R) 64"
+#define KMP_LIBRARY_FILE "libomp.so"
+#define KMP_VERSION_MAJOR 5
+#define KMP_VERSION_MINOR 0
+#define LIBOMP_OMP_VERSION 41
+#define OMP_50_ENABLED (LIBOMP_OMP_VERSION >= 50)
+#define OMP_41_ENABLED (LIBOMP_OMP_VERSION >= 41)
+#define OMP_40_ENABLED (LIBOMP_OMP_VERSION >= 40)
+#define OMP_30_ENABLED (LIBOMP_OMP_VERSION >= 30)
+
+// Configured cache line based on architecture
+#if KMP_ARCH_PPC64
+# define CACHE_LINE 128
+#else
+# define CACHE_LINE 64
+#endif
+
+#define KMP_DYNAMIC_LIB 1
+#define KMP_NESTED_HOT_TEAMS 1
+#define KMP_ADJUST_BLOCKTIME 1
+#define BUILD_PARALLEL_ORDERED 1
+#define KMP_ASM_INTRINS 1
+#define USE_ITT_BUILD 1
+#define INTEL_ITTNOTIFY_PREFIX __kmp_itt_
+#if ! KMP_MIC
+# define USE_LOAD_BALANCE 1
+#endif
+#if ! (KMP_OS_WINDOWS || KMP_OS_DARWIN)
+# define KMP_TDATA_GTID 1
+#endif
+#if STUBS_LIBRARY
+# define KMP_STUB 1
+#endif
+#if DEBUG_BUILD || RELWITHDEBINFO_BUILD
+# define KMP_DEBUG 1
+#endif
+
+#if KMP_OS_WINDOWS
+# define KMP_WIN_CDECL
+#else
+# define BUILD_TV
+# define KMP_GOMP_COMPAT
+#endif
+
+#endif // KMP_CONFIG_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_csupport.c b/contrib/libs/cxxsupp/openmp/kmp_csupport.c
index 6d1d328543..e44886facd 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_csupport.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_csupport.c
@@ -1,3046 +1,3046 @@
-/* 
- * kmp_csupport.c -- kfront linkage support for OpenMP. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "omp.h"        /* extern "C" declarations of user-visible routines */ 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_itt.h" 
-#include "kmp_error.h" 
-#include "kmp_stats.h" 
- 
-#if OMPT_SUPPORT 
-#include "ompt-internal.h" 
-#include "ompt-specific.h" 
-#endif 
- 
-#define MAX_MESSAGE 512 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/*  flags will be used in future, e.g., to implement */ 
-/*  openmp_strict library restrictions               */ 
- 
-/*! 
- * @ingroup STARTUP_SHUTDOWN 
- * @param loc   in   source location information 
- * @param flags in   for future use (currently ignored) 
- * 
- * Initialize the runtime library. This call is optional; if it is not made then 
- * it will be implicitly called by attempts to use other library functions. 
- * 
- */ 
-void 
-__kmpc_begin(ident_t *loc, kmp_int32 flags) 
-{ 
-    // By default __kmp_ignore_mppbeg() returns TRUE. 
-    if (__kmp_ignore_mppbeg() == FALSE) { 
-        __kmp_internal_begin(); 
- 
-        KC_TRACE( 10, ("__kmpc_begin: called\n" ) ); 
-    } 
-} 
- 
-/*! 
- * @ingroup STARTUP_SHUTDOWN 
- * @param loc source location information 
- * 
- * Shutdown the runtime library. This is also optional, and even if called will not 
- * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero. 
-  */ 
-void 
-__kmpc_end(ident_t *loc) 
-{ 
-    // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op. 
-    // However, this can be overridden with KMP_IGNORE_MPPEND environment variable. 
-    // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end() 
-    // will unregister this root (it can cause library shut down). 
-    if (__kmp_ignore_mppend() == FALSE) { 
-        KC_TRACE( 10, ("__kmpc_end: called\n" ) ); 
-        KA_TRACE( 30, ("__kmpc_end\n" )); 
- 
-        __kmp_internal_end_thread( -1 ); 
-    } 
-} 
- 
-/*! 
-@ingroup THREAD_STATES 
-@param loc Source location information. 
-@return The global thread index of the active thread. 
- 
-This function can be called in any context. 
- 
-If the runtime has ony been entered at the outermost level from a 
-single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that 
-which would be returned by omp_get_thread_num() in the outermost 
-active parallel construct. (Or zero if there is no active parallel 
-construct, since the master thread is necessarily thread zero). 
- 
-If multiple non-OpenMP threads all enter an OpenMP construct then this 
-will be a unique thread identifier among all the threads created by 
-the OpenMP runtime (but the value cannote be defined in terms of 
-OpenMP thread ids returned by omp_get_thread_num()). 
- 
-*/ 
-kmp_int32 
-__kmpc_global_thread_num(ident_t *loc) 
-{ 
-    kmp_int32 gtid = __kmp_entry_gtid(); 
- 
-    KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) ); 
- 
-    return gtid; 
-} 
- 
-/*! 
-@ingroup THREAD_STATES 
-@param loc Source location information. 
-@return The number of threads under control of the OpenMP<sup>*</sup> runtime 
- 
-This function can be called in any context. 
-It returns the total number of threads under the control of the OpenMP runtime. That is 
-not a number that can be determined by any OpenMP standard calls, since the library may be 
-called from more than one non-OpenMP thread, and this reflects the total over all such calls. 
-Similarly the runtime maintains underlying threads even when they are not active (since the cost 
-of creating and destroying OS threads is high), this call counts all such threads even if they are not 
-waiting for work. 
-*/ 
-kmp_int32 
-__kmpc_global_num_threads(ident_t *loc) 
-{ 
-    KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) ); 
- 
-    return TCR_4(__kmp_nth); 
-} 
- 
-/*! 
-@ingroup THREAD_STATES 
-@param loc Source location information. 
-@return The thread number of the calling thread in the innermost active parallel construct. 
- 
-*/ 
-kmp_int32 
-__kmpc_bound_thread_num(ident_t *loc) 
-{ 
-    KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) ); 
-    return __kmp_tid_from_gtid( __kmp_entry_gtid() ); 
-} 
- 
-/*! 
-@ingroup THREAD_STATES 
-@param loc Source location information. 
-@return The number of threads in the innermost active parallel construct. 
-*/ 
-kmp_int32 
-__kmpc_bound_num_threads(ident_t *loc) 
-{ 
-    KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) ); 
- 
-    return __kmp_entry_thread() -> th.th_team -> t.t_nproc; 
-} 
- 
-/*! 
- * @ingroup DEPRECATED 
- * @param loc location description 
- * 
- * This function need not be called. It always returns TRUE. 
- */ 
-kmp_int32 
-__kmpc_ok_to_fork(ident_t *loc) 
-{ 
-#ifndef KMP_DEBUG 
- 
-    return TRUE; 
- 
-#else 
- 
-    const char *semi2; 
-    const char *semi3; 
-    int line_no; 
- 
-    if (__kmp_par_range == 0) { 
-        return TRUE; 
-    } 
-    semi2 = loc->psource; 
-    if (semi2 == NULL) { 
-        return TRUE; 
-    } 
-    semi2 = strchr(semi2, ';'); 
-    if (semi2 == NULL) { 
-        return TRUE; 
-    } 
-    semi2 = strchr(semi2 + 1, ';'); 
-    if (semi2 == NULL) { 
-        return TRUE; 
-    } 
-    if (__kmp_par_range_filename[0]) { 
-        const char *name = semi2 - 1; 
-        while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 
-            name--; 
-        } 
-        if ((*name == '/') || (*name == ';')) { 
-            name++; 
-        } 
-        if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 
-            return __kmp_par_range < 0; 
-        } 
-    } 
-    semi3 = strchr(semi2 + 1, ';'); 
-    if (__kmp_par_range_routine[0]) { 
-        if ((semi3 != NULL) && (semi3 > semi2) 
-          && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 
-            return __kmp_par_range < 0; 
-        } 
-    } 
-    if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 
-        if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 
-            return __kmp_par_range > 0; 
-        } 
-        return __kmp_par_range < 0; 
-    } 
-    return TRUE; 
- 
-#endif /* KMP_DEBUG */ 
- 
-} 
- 
-/*! 
-@ingroup THREAD_STATES 
-@param loc Source location information. 
-@return 1 if this thread is executing inside an active parallel region, zero if not. 
-*/ 
-kmp_int32 
-__kmpc_in_parallel( ident_t *loc ) 
-{ 
-    return __kmp_entry_thread() -> th.th_root -> r.r_active; 
-} 
- 
-/*! 
-@ingroup PARALLEL 
-@param loc source location information 
-@param global_tid global thread number 
-@param num_threads number of threads requested for this parallel construct 
- 
-Set the number of threads to be used by the next fork spawned by this thread. 
-This call is only required if the parallel construct has a `num_threads` clause. 
-*/ 
-void 
-__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ) 
-{ 
-    KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 
-      global_tid, num_threads ) ); 
- 
-    __kmp_push_num_threads( loc, global_tid, num_threads ); 
-} 
- 
-void 
-__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid ) 
-{ 
-    KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) ); 
- 
-    /* the num_threads are automatically popped */ 
-} 
- 
- 
-#if OMP_40_ENABLED 
- 
-void 
-__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind ) 
-{ 
-    KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", 
-      global_tid, proc_bind ) ); 
- 
-    __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind ); 
-} 
- 
-#endif /* OMP_40_ENABLED */ 
- 
- 
-/*! 
-@ingroup PARALLEL 
-@param loc  source location information 
-@param argc  total number of arguments in the ellipsis 
-@param microtask  pointer to callback routine consisting of outlined parallel construct 
-@param ...  pointers to shared variables that aren't global 
- 
-Do the actual fork and call the microtask in the relevant number of threads. 
-*/ 
-void 
-__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) 
-{ 
-  int         gtid = __kmp_entry_gtid(); 
- 
-#if (KMP_STATS_ENABLED)   
-  int inParallel = __kmpc_in_parallel(loc); 
-  if (inParallel) 
-  { 
-      KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 
-  } 
-  else 
-  { 
-      KMP_STOP_EXPLICIT_TIMER(OMP_serial); 
-      KMP_COUNT_BLOCK(OMP_PARALLEL); 
-  } 
-#endif 
- 
-  // maybe to save thr_state is enough here 
-  { 
-    va_list     ap; 
-    va_start(   ap, microtask ); 
- 
-#if OMPT_SUPPORT 
-    int tid = __kmp_tid_from_gtid( gtid ); 
-    kmp_info_t *master_th = __kmp_threads[ gtid ]; 
-    kmp_team_t *parent_team = master_th->th.th_team; 
-    if (ompt_enabled) { 
-       parent_team->t.t_implicit_task_taskdata[tid]. 
-           ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_FORKING(); 
-#endif 
-    __kmp_fork_call( loc, gtid, fork_context_intel, 
-            argc, 
-#if OMPT_SUPPORT 
-            VOLATILE_CAST(void *) microtask,      // "unwrapped" task 
-#endif 
-            VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 
-            VOLATILE_CAST(launch_t)    __kmp_invoke_task_func, 
-/* TODO: revert workaround for Intel(R) 64 tracker #96 */ 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-            &ap 
-#else 
-            ap 
-#endif 
-            ); 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_JOINING(); 
-#endif 
-    __kmp_join_call( loc, gtid 
-#if OMPT_SUPPORT 
-        , fork_context_intel 
-#endif 
-    ); 
- 
-    va_end( ap ); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        parent_team->t.t_implicit_task_taskdata[tid]. 
-            ompt_task_info.frame.reenter_runtime_frame = 0; 
-    } 
-#endif 
-  } 
-#if (KMP_STATS_ENABLED)   
-  if (!inParallel) 
-      KMP_START_EXPLICIT_TIMER(OMP_serial); 
-#endif 
-} 
- 
-#if OMP_40_ENABLED 
-/*! 
-@ingroup PARALLEL 
-@param loc source location information 
-@param global_tid global thread number 
-@param num_teams number of teams requested for the teams construct 
-@param num_threads number of threads per team requested for the teams construct 
- 
-Set the number of teams to be used by the teams construct. 
-This call is only required if the teams construct has a `num_teams` clause 
-or a `thread_limit` clause (or both). 
-*/ 
-void 
-__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ) 
-{ 
-    KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 
-      global_tid, num_teams, num_threads ) ); 
- 
-    __kmp_push_num_teams( loc, global_tid, num_teams, num_threads ); 
-} 
- 
-/*! 
-@ingroup PARALLEL 
-@param loc  source location information 
-@param argc  total number of arguments in the ellipsis 
-@param microtask  pointer to callback routine consisting of outlined teams construct 
-@param ...  pointers to shared variables that aren't global 
- 
-Do the actual fork and call the microtask in the relevant number of threads. 
-*/ 
-void 
-__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) 
-{ 
-    int         gtid = __kmp_entry_gtid(); 
-    kmp_info_t *this_thr = __kmp_threads[ gtid ]; 
-    va_list     ap; 
-    va_start(   ap, microtask ); 
- 
-    KMP_COUNT_BLOCK(OMP_TEAMS); 
- 
-    // remember teams entry point and nesting level 
-    this_thr->th.th_teams_microtask = microtask; 
-    this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host 
- 
-#if OMPT_SUPPORT 
-    kmp_team_t *parent_team = this_thr->th.th_team; 
-    int tid = __kmp_tid_from_gtid( gtid ); 
-    if (ompt_enabled) { 
-        parent_team->t.t_implicit_task_taskdata[tid]. 
-           ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-    // check if __kmpc_push_num_teams called, set default number of teams otherwise 
-    if ( this_thr->th.th_teams_size.nteams == 0 ) { 
-        __kmp_push_num_teams( loc, gtid, 0, 0 ); 
-    } 
-    KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 
-    KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 
-    KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 
- 
-    __kmp_fork_call( loc, gtid, fork_context_intel, 
-            argc, 
-#if OMPT_SUPPORT 
-            VOLATILE_CAST(void *) microtask,               // "unwrapped" task 
-#endif 
-            VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 
-            VOLATILE_CAST(launch_t)    __kmp_invoke_teams_master, 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-            &ap 
-#else 
-            ap 
-#endif 
-            ); 
-    __kmp_join_call( loc, gtid 
-#if OMPT_SUPPORT 
-        , fork_context_intel 
-#endif 
-    ); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        parent_team->t.t_implicit_task_taskdata[tid]. 
-           ompt_task_info.frame.reenter_runtime_frame = NULL; 
-    } 
-#endif 
- 
-    this_thr->th.th_teams_microtask = NULL; 
-    this_thr->th.th_teams_level = 0; 
-    *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L; 
-    va_end( ap ); 
-} 
-#endif /* OMP_40_ENABLED */ 
- 
- 
-// 
-// I don't think this function should ever have been exported. 
-// The __kmpc_ prefix was misapplied.  I'm fairly certain that no generated 
-// openmp code ever called it, but it's been exported from the RTL for so 
-// long that I'm afraid to remove the definition. 
-// 
-int 
-__kmpc_invoke_task_func( int gtid ) 
-{ 
-    return __kmp_invoke_task_func( gtid ); 
-} 
- 
-/*! 
-@ingroup PARALLEL 
-@param loc  source location information 
-@param global_tid  global thread number 
- 
-Enter a serialized parallel construct. This interface is used to handle a 
-conditional parallel region, like this, 
-@code 
-#pragma omp parallel if (condition) 
-@endcode 
-when the condition is false. 
-*/ 
-void 
-__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with 
-                                                 * kmp_fork_call since the tasks to be done are similar in each case. 
-                                                 */ 
-} 
- 
-/*! 
-@ingroup PARALLEL 
-@param loc  source location information 
-@param global_tid  global thread number 
- 
-Leave a serialized parallel construct. 
-*/ 
-void 
-__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    kmp_internal_control_t *top; 
-    kmp_info_t *this_thr; 
-    kmp_team_t *serial_team; 
- 
-    KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) ); 
- 
-    /* skip all this code for autopar serialized loops since it results in 
-       unacceptable overhead */ 
-    if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) 
-        return; 
- 
-    // Not autopar code 
-    if( ! TCR_4( __kmp_init_parallel ) ) 
-        __kmp_parallel_initialize(); 
- 
-    this_thr    = __kmp_threads[ global_tid ]; 
-    serial_team = this_thr->th.th_serial_team; 
- 
-   #if OMP_41_ENABLED 
-   kmp_task_team_t *   task_team = this_thr->th.th_task_team; 
- 
-   // we need to wait for the proxy tasks before finishing the thread 
-   if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) 
-        __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here? 
-   #endif 
- 
-    KMP_MB(); 
-    KMP_DEBUG_ASSERT( serial_team ); 
-    KMP_ASSERT(       serial_team -> t.t_serialized ); 
-    KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team ); 
-    KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team ); 
-    KMP_DEBUG_ASSERT( serial_team -> t.t_threads ); 
-    KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr ); 
- 
-    /* If necessary, pop the internal control stack values and replace the team values */ 
-    top = serial_team -> t.t_control_stack_top; 
-    if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) { 
-        copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top ); 
-        serial_team -> t.t_control_stack_top = top -> next; 
-        __kmp_free(top); 
-    } 
- 
-    //if( serial_team -> t.t_serialized > 1 ) 
-    serial_team -> t.t_level--; 
- 
-    /* pop dispatch buffers stack */ 
-    KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 
-    { 
-        dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer; 
-        serial_team->t.t_dispatch->th_disp_buffer = 
-            serial_team->t.t_dispatch->th_disp_buffer->next; 
-        __kmp_free( disp_buffer ); 
-    } 
- 
-    -- serial_team -> t.t_serialized; 
-    if ( serial_team -> t.t_serialized == 0 ) { 
- 
-        /* return to the parallel section */ 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-        if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) { 
-            __kmp_clear_x87_fpu_status_word(); 
-            __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word ); 
-            __kmp_load_mxcsr( &serial_team->t.t_mxcsr ); 
-        } 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-        this_thr -> th.th_team           = serial_team -> t.t_parent; 
-        this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid; 
- 
-        /* restore values cached in the thread */ 
-        this_thr -> th.th_team_nproc     = serial_team -> t.t_parent -> t.t_nproc;          /*  JPH */ 
-        this_thr -> th.th_team_master    = serial_team -> t.t_parent -> t.t_threads[0];     /* JPH */ 
-        this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized; 
- 
-        /* TODO the below shouldn't need to be adjusted for serialized teams */ 
-        this_thr -> th.th_dispatch       = & this_thr -> th.th_team -> 
-            t.t_dispatch[ serial_team -> t.t_master_tid ]; 
- 
-        __kmp_pop_current_task_from_thread( this_thr ); 
- 
-        KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 ); 
-        this_thr -> th.th_current_task -> td_flags.executing = 1; 
- 
-        if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-            // Copy the task team from the new child / old parent team to the thread. 
-            this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 
-            KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n", 
-                            global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) ); 
-        } 
-    } else { 
-        if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-            KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n", 
-                            global_tid, serial_team, serial_team -> t.t_serialized ) ); 
-        } 
-    } 
- 
-#if USE_ITT_BUILD 
-    kmp_uint64 cur_time = 0; 
-#if  USE_ITT_NOTIFY 
-    if ( __itt_get_timestamp_ptr ) { 
-        cur_time = __itt_get_timestamp(); 
-    } 
-#endif /* USE_ITT_NOTIFY */ 
-    if ( this_thr->th.th_team->t.t_level == 0 
-#if OMP_40_ENABLED 
-        && this_thr->th.th_teams_microtask == NULL 
-#endif 
-    ) { 
-        // Report the barrier 
-        this_thr->th.th_ident = loc; 
-        if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && 
-            ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) 
-        { 
-            __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, 
-                                    cur_time, 0, loc, this_thr->th.th_team_nproc, 0 ); 
-            if ( __kmp_forkjoin_frames_mode == 3 ) 
-                // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier. 
-                __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, 
-                                        cur_time, 0, loc, this_thr->th.th_team_nproc, 2 ); 
-        } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && 
-            ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) 
-            // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. 
-            __kmp_itt_region_joined( global_tid, 1 ); 
-    } 
-#endif /* USE_ITT_BUILD */ 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_parallel( global_tid, NULL ); 
-} 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc  source location information. 
- 
-Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 
-depending on the memory ordering convention obeyed by the compiler 
-even that may not be necessary). 
-*/ 
-void 
-__kmpc_flush(ident_t *loc) 
-{ 
-    KC_TRACE( 10, ("__kmpc_flush: called\n" ) ); 
- 
-    /* need explicit __mf() here since use volatile instead in library */ 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) 
-        #if KMP_MIC 
-            // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 
-            // We shouldn't need it, though, since the ABI rules require that 
-            // * If the compiler generates NGO stores it also generates the fence 
-            // * If users hand-code NGO stores they should insert the fence 
-            // therefore no incomplete unordered stores should be visible. 
-        #else 
-            // C74404 
-            // This is to address non-temporal store instructions (sfence needed). 
-            // The clflush instruction is addressed either (mfence needed). 
-            // Probably the non-temporal load monvtdqa instruction should also be addressed. 
-            // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 
-            if ( ! __kmp_cpuinfo.initialized ) { 
-                __kmp_query_cpuid( & __kmp_cpuinfo ); 
-            }; // if 
-            if ( ! __kmp_cpuinfo.sse2 ) { 
-                // CPU cannot execute SSE2 instructions. 
-            } else { 
-                #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC 
-                _mm_mfence(); 
-                #else 
-                __sync_synchronize(); 
-                #endif // KMP_COMPILER_ICC 
-            }; // if 
-        #endif // KMP_MIC 
-    #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-        // Nothing to see here move along 
-    #elif KMP_ARCH_PPC64 
-        // Nothing needed here (we have a real MB above). 
-        #if KMP_OS_CNK 
-        // The flushing thread needs to yield here; this prevents a 
-       // busy-waiting thread from saturating the pipeline. flush is 
-          // often used in loops like this: 
-           // while (!flag) { 
-           //   #pragma omp flush(flag) 
-           // } 
-       // and adding the yield here is good for at least a 10x speedup 
-          // when running >2 threads per core (on the NAS LU benchmark). 
-            __kmp_yield(TRUE); 
-        #endif 
-    #else 
-        #error Unknown or unsupported architecture 
-    #endif 
- 
-} 
- 
-/* -------------------------------------------------------------------------- */ 
- 
-/* -------------------------------------------------------------------------- */ 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid thread id. 
- 
-Execute a barrier. 
-*/ 
-void 
-__kmpc_barrier(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    KMP_COUNT_BLOCK(OMP_BARRIER); 
-    KMP_TIME_BLOCK(OMP_barrier); 
-    KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) ); 
- 
-    if (! TCR_4(__kmp_init_parallel)) 
-        __kmp_parallel_initialize(); 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( loc == 0 ) { 
-            KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? 
-        }; // if 
- 
-        __kmp_check_barrier( global_tid, ct_barrier, loc ); 
-    } 
- 
-    __kmp_threads[ global_tid ]->th.th_ident = loc; 
-    // TODO: explicit barrier_wait_id: 
-    //   this function is called when 'barrier' directive is present or 
-    //   implicit barrier at the end of a worksharing construct. 
-    // 1) better to add a per-thread barrier counter to a thread data structure 
-    // 2) set to 0 when a new team is created 
-    // 4) no sync is required 
- 
-    __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); 
-} 
- 
-/* The BARRIER for a MASTER section is always explicit   */ 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param global_tid  global thread number . 
-@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 
-*/ 
-kmp_int32 
-__kmpc_master(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    KMP_COUNT_BLOCK(OMP_MASTER); 
-    int status = 0; 
- 
-    KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) ); 
- 
-    if( ! TCR_4( __kmp_init_parallel ) ) 
-        __kmp_parallel_initialize(); 
- 
-    if( KMP_MASTER_GTID( global_tid )) { 
-        KMP_START_EXPLICIT_TIMER(OMP_master); 
-        status = 1; 
-    } 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (status) { 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_master_begin)) { 
-            kmp_info_t  *this_thr        = __kmp_threads[ global_tid ]; 
-            kmp_team_t  *team            = this_thr -> th.th_team; 
- 
-            int  tid = __kmp_tid_from_gtid( global_tid ); 
-            ompt_callbacks.ompt_callback(ompt_event_master_begin)( 
-                team->t.ompt_team_info.parallel_id, 
-                team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-        } 
-    } 
-#endif 
- 
-    if ( __kmp_env_consistency_check ) { 
-#if KMP_USE_DYNAMIC_LOCK 
-        if (status) 
-            __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 ); 
-        else 
-            __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 ); 
-#else 
-        if (status) 
-            __kmp_push_sync( global_tid, ct_master, loc, NULL ); 
-        else 
-            __kmp_check_sync( global_tid, ct_master, loc, NULL ); 
-#endif 
-    } 
- 
-    return status; 
-} 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param global_tid  global thread number . 
- 
-Mark the end of a <tt>master</tt> region. This should only be called by the thread 
-that executes the <tt>master</tt> region. 
-*/ 
-void 
-__kmpc_end_master(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) ); 
- 
-    KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid )); 
-    KMP_STOP_EXPLICIT_TIMER(OMP_master); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    kmp_info_t  *this_thr        = __kmp_threads[ global_tid ]; 
-    kmp_team_t  *team            = this_thr -> th.th_team; 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_master_end)) { 
-        int  tid = __kmp_tid_from_gtid( global_tid ); 
-        ompt_callbacks.ompt_callback(ompt_event_master_end)( 
-            team->t.ompt_team_info.parallel_id, 
-            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-    } 
-#endif 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if( global_tid < 0 ) 
-            KMP_WARNING( ThreadIdentInvalid ); 
- 
-        if( KMP_MASTER_GTID( global_tid )) 
-            __kmp_pop_sync( global_tid, ct_master, loc ); 
-    } 
-} 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param gtid  global thread number. 
- 
-Start execution of an <tt>ordered</tt> construct. 
-*/ 
-void 
-__kmpc_ordered( ident_t * loc, kmp_int32 gtid ) 
-{ 
-    int cid = 0; 
-    kmp_info_t *th; 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid )); 
- 
-    if (! TCR_4(__kmp_init_parallel)) 
-        __kmp_parallel_initialize(); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_ordered_prep( gtid ); 
-    // TODO: ordered_wait_id 
-#endif /* USE_ITT_BUILD */ 
- 
-    th = __kmp_threads[ gtid ]; 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled) { 
-        /* OMPT state update */ 
-        th->th.ompt_thread_info.wait_id = (uint64_t) loc; 
-        th->th.ompt_thread_info.state = ompt_state_wait_ordered; 
- 
-        /* OMPT event callback */ 
-        if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { 
-            ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( 
-                th->th.ompt_thread_info.wait_id); 
-        } 
-    } 
-#endif 
- 
-    if ( th -> th.th_dispatch -> th_deo_fcn != 0 ) 
-        (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc ); 
-    else 
-        __kmp_parallel_deo( & gtid, & cid, loc ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled) { 
-        /* OMPT state update */ 
-        th->th.ompt_thread_info.state = ompt_state_work_parallel; 
-        th->th.ompt_thread_info.wait_id = 0; 
- 
-        /* OMPT event callback */ 
-        if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { 
-            ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( 
-                th->th.ompt_thread_info.wait_id); 
-        } 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_ordered_start( gtid ); 
-#endif /* USE_ITT_BUILD */ 
-} 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param gtid  global thread number. 
- 
-End execution of an <tt>ordered</tt> construct. 
-*/ 
-void 
-__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid ) 
-{ 
-    int cid = 0; 
-    kmp_info_t *th; 
- 
-    KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) ); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_ordered_end( gtid ); 
-    // TODO: ordered_wait_id 
-#endif /* USE_ITT_BUILD */ 
- 
-    th = __kmp_threads[ gtid ]; 
- 
-    if ( th -> th.th_dispatch -> th_dxo_fcn != 0 ) 
-        (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc ); 
-    else 
-        __kmp_parallel_dxo( & gtid, & cid, loc ); 
- 
-#if OMPT_SUPPORT && OMPT_BLAME 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { 
-        ompt_callbacks.ompt_callback(ompt_event_release_ordered)( 
-            th->th.ompt_thread_info.wait_id); 
-    } 
-#endif 
-} 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-static __forceinline void 
-__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag) 
-{ 
-    // Pointer to the allocated indirect lock is written to crit, while indexing is ignored. 
-    void *idx; 
-    kmp_indirect_lock_t **lck; 
-    lck = (kmp_indirect_lock_t **)crit; 
-    kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 
-    KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 
-    KMP_SET_I_LOCK_LOCATION(ilk, loc); 
-    KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 
-    KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 
-#if USE_ITT_BUILD 
-    __kmp_itt_critical_creating(ilk->lock, loc); 
-#endif 
-    int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); 
-    if (status == 0) { 
-#if USE_ITT_BUILD 
-        __kmp_itt_critical_destroyed(ilk->lock); 
-#endif 
-        // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit. 
-        //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 
-    } 
-    KMP_DEBUG_ASSERT(*lck != NULL); 
-} 
- 
-// Fast-path acquire tas lock 
-#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) {                                                                       \ 
-    kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock;                                                                  \ 
-    if (l->lk.poll != KMP_LOCK_FREE(tas) ||                                                                      \ 
-            ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) {      \ 
-        kmp_uint32 spins;                                                                                        \ 
-        KMP_FSYNC_PREPARE(l);                                                                                    \ 
-        KMP_INIT_YIELD(spins);                                                                                   \ 
-        if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {                            \ 
-            KMP_YIELD(TRUE);                                                                                     \ 
-        } else {                                                                                                 \ 
-            KMP_YIELD_SPIN(spins);                                                                               \ 
-        }                                                                                                        \ 
-        while (l->lk.poll != KMP_LOCK_FREE(tas) ||                                                               \ 
-               ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) {   \ 
-            if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {                        \ 
-                KMP_YIELD(TRUE);                                                                                 \ 
-            } else {                                                                                             \ 
-                KMP_YIELD_SPIN(spins);                                                                           \ 
-            }                                                                                                    \ 
-        }                                                                                                        \ 
-    }                                                                                                            \ 
-    KMP_FSYNC_ACQUIRED(l);                                                                                       \ 
-} 
- 
-// Fast-path test tas lock 
-#define KMP_TEST_TAS_LOCK(lock, gtid, rc) {                                                            \ 
-    kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock;                                                        \ 
-    rc = l->lk.poll == KMP_LOCK_FREE(tas) &&                                                           \ 
-         KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas));   \ 
-} 
- 
-// Fast-path release tas lock 
-#define KMP_RELEASE_TAS_LOCK(lock, gtid) {                          \ 
-    TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas));   \ 
-    KMP_MB();                                                       \ 
-} 
- 
-#if KMP_USE_FUTEX 
- 
-# include <unistd.h> 
-# include <sys/syscall.h> 
-# ifndef FUTEX_WAIT 
-#  define FUTEX_WAIT 0 
-# endif 
-# ifndef FUTEX_WAKE 
-#  define FUTEX_WAKE 1 
-# endif 
- 
-// Fast-path acquire futex lock 
-#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) {                                                                        \ 
-    kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                                                               \ 
-    kmp_int32 gtid_code = (gtid+1) << 1;                                                                            \ 
-    KMP_MB();                                                                                                       \ 
-    KMP_FSYNC_PREPARE(ftx);                                                                                         \ 
-    kmp_int32 poll_val;                                                                                             \ 
-    while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex),                           \ 
-                                                   KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {     \ 
-        kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;                                                              \ 
-        if (!cond) {                                                                                                \ 
-            if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) {      \ 
-                continue;                                                                                           \ 
-            }                                                                                                       \ 
-            poll_val |= KMP_LOCK_BUSY(1, futex);                                                                    \ 
-        }                                                                                                           \ 
-        kmp_int32 rc;                                                                                               \ 
-        if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) {                \ 
-            continue;                                                                                               \ 
-        }                                                                                                           \ 
-        gtid_code |= 1;                                                                                             \ 
-    }                                                                                                               \ 
-    KMP_FSYNC_ACQUIRED(ftx);                                                                                        \ 
-} 
- 
-// Fast-path test futex lock 
-#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) {                                                                       \ 
-    kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                                                               \ 
-    if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) {    \ 
-        KMP_FSYNC_ACQUIRED(ftx);                                                                                    \ 
-        rc = TRUE;                                                                                                  \ 
-    } else {                                                                                                        \ 
-        rc = FALSE;                                                                                                 \ 
-    }                                                                                                               \ 
-} 
- 
-// Fast-path release futex lock 
-#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) {                                                        \ 
-    kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                                               \ 
-    KMP_MB();                                                                                       \ 
-    KMP_FSYNC_RELEASING(ftx);                                                                       \ 
-    kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex));                   \ 
-    if (KMP_LOCK_STRIP(poll_val) & 1) {                                                             \ 
-        syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0);   \ 
-    }                                                                                               \ 
-    KMP_MB();                                                                                       \ 
-    KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));              \ 
-} 
- 
-#endif // KMP_USE_FUTEX 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-static kmp_user_lock_p 
-__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid ) 
-{ 
-    kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 
- 
-    // 
-    // Because of the double-check, the following load 
-    // doesn't need to be volatile. 
-    // 
-    kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); 
- 
-    if ( lck == NULL ) { 
-        void * idx; 
- 
-        // Allocate & initialize the lock. 
-        // Remember allocated locks in table in order to free them in __kmp_cleanup() 
-        lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section ); 
-        __kmp_init_user_lock_with_checks( lck ); 
-        __kmp_set_user_lock_location( lck, loc ); 
-#if USE_ITT_BUILD 
-        __kmp_itt_critical_creating( lck ); 
-            // __kmp_itt_critical_creating() should be called *before* the first usage of underlying 
-            // lock. It is the only place where we can guarantee it. There are chances the lock will 
-            // destroyed with no usage, but it is not a problem, because this is not real event seen 
-            // by user but rather setting name for object (lock). See more details in kmp_itt.h. 
-#endif /* USE_ITT_BUILD */ 
- 
-        // 
-        // Use a cmpxchg instruction to slam the start of the critical 
-        // section with the lock pointer.  If another thread beat us 
-        // to it, deallocate the lock, and use the lock that the other 
-        // thread allocated. 
-        // 
-        int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck ); 
- 
-        if ( status == 0 ) { 
-            // Deallocate the lock and reload the value. 
-#if USE_ITT_BUILD 
-            __kmp_itt_critical_destroyed( lck ); 
-                // Let ITT know the lock is destroyed and the same memory location may be reused for 
-                // another purpose. 
-#endif /* USE_ITT_BUILD */ 
-            __kmp_destroy_user_lock_with_checks( lck ); 
-            __kmp_user_lock_free( &idx, gtid, lck ); 
-            lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); 
-            KMP_DEBUG_ASSERT( lck != NULL ); 
-        } 
-    } 
-    return lck; 
-} 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param global_tid  global thread number . 
-@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or 
-some other suitably unique value. 
- 
-Enter code protected by a `critical` construct. 
-This function blocks until the executing thread can enter the critical section. 
-*/ 
-void 
-__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) 
-{ 
-#if KMP_USE_DYNAMIC_LOCK 
-    __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 
-#else 
-    KMP_COUNT_BLOCK(OMP_CRITICAL); 
-    kmp_user_lock_p lck; 
- 
-    KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); 
- 
-    //TODO: add THR_OVHD_STATE 
- 
-    KMP_CHECK_USER_LOCK_INIT(); 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { 
-        lck = (kmp_user_lock_p)crit; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { 
-        lck = (kmp_user_lock_p)crit; 
-    } 
-#endif 
-    else { // ticket, queuing or drdpa 
-        lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); 
-    } 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_sync( global_tid, ct_critical, loc, lck ); 
- 
-    /* since the critical directive binds to all threads, not just 
-     * the current team we have to check this even if we are in a 
-     * serialized team */ 
-    /* also, even if we are the uber thread, we still have to conduct the lock, 
-     * as we have to contend with sibling threads */ 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_critical_acquiring( lck ); 
-#endif /* USE_ITT_BUILD */ 
-    // Value of 'crit' should be good for using as a critical_id of the critical section directive. 
-    __kmp_acquire_user_lock_with_checks( lck, global_tid ); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_critical_acquired( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-// Converts the given hint to an internal lock implementation 
-static __forceinline kmp_dyna_lockseq_t 
-__kmp_map_hint_to_lock(uintptr_t hint) 
-{ 
-#if KMP_USE_TSX 
-# define KMP_TSX_LOCK(seq) lockseq_##seq 
-#else 
-# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 
-#endif 
-    // Hints that do not require further logic 
-    if (hint & kmp_lock_hint_hle) 
-        return KMP_TSX_LOCK(hle); 
-    if (hint & kmp_lock_hint_rtm) 
-        return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq; 
-    if (hint & kmp_lock_hint_adaptive) 
-        return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq; 
- 
-    // Rule out conflicting hints first by returning the default lock 
-    if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 
-        return __kmp_user_lock_seq; 
-    if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative)) 
-        return __kmp_user_lock_seq; 
- 
-    // Do not even consider speculation when it appears to be contended 
-    if (hint & omp_lock_hint_contended) 
-        return lockseq_queuing; 
- 
-    // Uncontended lock without speculation 
-    if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 
-        return lockseq_tas; 
- 
-    // HLE lock for speculation 
-    if (hint & omp_lock_hint_speculative) 
-        return KMP_TSX_LOCK(hle); 
- 
-    return __kmp_user_lock_seq; 
-} 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param global_tid  global thread number. 
-@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, 
-or some other suitably unique value. 
-@param hint the lock hint. 
- 
-Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation. 
-This function blocks until the executing thread can enter the critical section unless the hint suggests use of 
-speculative execution and the hardware supports it. 
-*/ 
-void 
-__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint ) 
-{ 
-    KMP_COUNT_BLOCK(OMP_CRITICAL); 
-    kmp_user_lock_p lck; 
- 
-    KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); 
- 
-    kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 
-    // Check if it is initialized. 
-    if (*lk == 0) { 
-        kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 
-        if (KMP_IS_D_LOCK(lckseq)) { 
-            KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq)); 
-        } else { 
-            __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 
-        } 
-    } 
-    // Branch for accessing the actual lock object and set operation. This branching is inevitable since 
-    // this lock initialization does not follow the normal dispatch path (lock table is not used). 
-    if (KMP_EXTRACT_D_TAG(lk) != 0) { 
-        lck = (kmp_user_lock_p)lk; 
-        if (__kmp_env_consistency_check) { 
-            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); 
-        } 
-# if USE_ITT_BUILD 
-        __kmp_itt_critical_acquiring(lck); 
-# endif 
-# if KMP_USE_INLINED_TAS 
-        if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 
-            KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 
-        } else 
-# elif KMP_USE_INLINED_FUTEX 
-        if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 
-            KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 
-        } else 
-# endif 
-        { 
-            KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 
-        } 
-    } else { 
-        kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 
-        lck = ilk->lock; 
-        if (__kmp_env_consistency_check) { 
-            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); 
-        } 
-# if USE_ITT_BUILD 
-        __kmp_itt_critical_acquiring(lck); 
-# endif 
-        KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_critical_acquired( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); 
-} // __kmpc_critical_with_hint 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information. 
-@param global_tid  global thread number . 
-@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or 
-some other suitably unique value. 
- 
-Leave a critical section, releasing any lock that was held during its execution. 
-*/ 
-void 
-__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) 
-{ 
-    kmp_user_lock_p lck; 
- 
-    KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid )); 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-    if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 
-        lck = (kmp_user_lock_p)crit; 
-        KMP_ASSERT(lck != NULL); 
-        if (__kmp_env_consistency_check) { 
-            __kmp_pop_sync(global_tid, ct_critical, loc); 
-        } 
-# if USE_ITT_BUILD 
-        __kmp_itt_critical_releasing( lck ); 
-# endif 
-# if KMP_USE_INLINED_TAS 
-        if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 
-            KMP_RELEASE_TAS_LOCK(lck, global_tid); 
-        } else 
-# elif KMP_USE_INLINED_FUTEX 
-        if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 
-            KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 
-        } else 
-# endif 
-        { 
-            KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 
-        } 
-    } else { 
-        kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 
-        KMP_ASSERT(ilk != NULL); 
-        lck = ilk->lock; 
-        if (__kmp_env_consistency_check) { 
-            __kmp_pop_sync(global_tid, ct_critical, loc); 
-        } 
-# if USE_ITT_BUILD 
-        __kmp_itt_critical_releasing( lck ); 
-# endif 
-        KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 
-    } 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { 
-        lck = (kmp_user_lock_p)crit; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { 
-        lck = (kmp_user_lock_p)crit; 
-    } 
-#endif 
-    else { // ticket, queuing or drdpa 
-        lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit)); 
-    } 
- 
-    KMP_ASSERT(lck != NULL); 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_sync( global_tid, ct_critical, loc ); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_critical_releasing( lck ); 
-#endif /* USE_ITT_BUILD */ 
-    // Value of 'crit' should be good for using as a critical_id of the critical section directive. 
-    __kmp_release_user_lock_with_checks( lck, global_tid ); 
- 
-#if OMPT_SUPPORT && OMPT_BLAME 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_release_critical)) { 
-        ompt_callbacks.ompt_callback(ompt_event_release_critical)( 
-            (uint64_t) lck); 
-    } 
-#endif 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-    KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid )); 
-} 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid thread id. 
-@return one if the thread should execute the master block, zero otherwise 
- 
-Start execution of a combined barrier and master. The barrier is executed inside this function. 
-*/ 
-kmp_int32 
-__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    int status; 
- 
-    KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) ); 
- 
-    if (! TCR_4(__kmp_init_parallel)) 
-        __kmp_parallel_initialize(); 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_check_barrier( global_tid, ct_barrier, loc ); 
- 
-#if USE_ITT_NOTIFY 
-    __kmp_threads[global_tid]->th.th_ident = loc; 
-#endif 
-    status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL ); 
- 
-    return (status != 0) ? 0 : 1; 
-} 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid thread id. 
- 
-Complete the execution of a combined barrier and master. This function should 
-only be called at the completion of the <tt>master</tt> code. Other threads will 
-still be waiting at the barrier and this call releases them. 
-*/ 
-void 
-__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid )); 
- 
-    __kmp_end_split_barrier ( bs_plain_barrier, global_tid ); 
-} 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid thread id. 
-@return one if the thread should execute the master block, zero otherwise 
- 
-Start execution of a combined barrier and master(nowait) construct. 
-The barrier is executed inside this function. 
-There is no equivalent "end" function, since the 
-*/ 
-kmp_int32 
-__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid ) 
-{ 
-    kmp_int32 ret; 
- 
-    KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid )); 
- 
-    if (! TCR_4(__kmp_init_parallel)) 
-        __kmp_parallel_initialize(); 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( loc == 0 ) { 
-            KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? 
-        } 
-        __kmp_check_barrier( global_tid, ct_barrier, loc ); 
-    } 
- 
-#if USE_ITT_NOTIFY 
-    __kmp_threads[global_tid]->th.th_ident = loc; 
-#endif 
-    __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); 
- 
-    ret = __kmpc_master (loc, global_tid); 
- 
-    if ( __kmp_env_consistency_check ) { 
-        /*  there's no __kmpc_end_master called; so the (stats) */ 
-        /*  actions of __kmpc_end_master are done here          */ 
- 
-        if ( global_tid < 0 ) { 
-            KMP_WARNING( ThreadIdentInvalid ); 
-        } 
-        if (ret) { 
-            /* only one thread should do the pop since only */ 
-            /* one did the push (see __kmpc_master())       */ 
- 
-            __kmp_pop_sync( global_tid, ct_master, loc ); 
-        } 
-    } 
- 
-    return (ret); 
-} 
- 
-/* The BARRIER for a SINGLE process section is always explicit   */ 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information 
-@param global_tid  global thread number 
-@return One if this thread should execute the single construct, zero otherwise. 
- 
-Test whether to execute a <tt>single</tt> construct. 
-There are no implicit barriers in the two "single" calls, rather the compiler should 
-introduce an explicit barrier if it is required. 
-*/ 
- 
-kmp_int32 
-__kmpc_single(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    KMP_COUNT_BLOCK(OMP_SINGLE); 
-    kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE ); 
-    if(rc == TRUE) { 
-        KMP_START_EXPLICIT_TIMER(OMP_single); 
-    } 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    kmp_info_t *this_thr        = __kmp_threads[ global_tid ]; 
-    kmp_team_t *team            = this_thr -> th.th_team; 
-    int tid = __kmp_tid_from_gtid( global_tid ); 
- 
-    if (ompt_enabled) { 
-        if (rc) { 
-            if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { 
-                ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( 
-                    team->t.ompt_team_info.parallel_id, 
-                    team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, 
-                    team->t.ompt_team_info.microtask); 
-            } 
-        } else { 
-            if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { 
-                ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( 
-                    team->t.ompt_team_info.parallel_id, 
-                    team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-            } 
-            this_thr->th.ompt_thread_info.state = ompt_state_wait_single; 
-        } 
-    } 
-#endif 
- 
-    return rc; 
-} 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc  source location information 
-@param global_tid  global thread number 
- 
-Mark the end of a <tt>single</tt> construct.  This function should 
-only be called by the thread that executed the block of code protected 
-by the `single` construct. 
-*/ 
-void 
-__kmpc_end_single(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    __kmp_exit_single( global_tid ); 
-    KMP_STOP_EXPLICIT_TIMER(OMP_single); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    kmp_info_t *this_thr        = __kmp_threads[ global_tid ]; 
-    kmp_team_t *team            = this_thr -> th.th_team; 
-    int tid = __kmp_tid_from_gtid( global_tid ); 
- 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { 
-        ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( 
-            team->t.ompt_team_info.parallel_id, 
-            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-    } 
-#endif 
-} 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param loc Source location 
-@param global_tid Global thread id 
- 
-Mark the end of a statically scheduled loop. 
-*/ 
-void 
-__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid ) 
-{ 
-    KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_loop_end)) { 
-        kmp_info_t *this_thr = __kmp_threads[ global_tid ]; 
-        kmp_team_t *team     = this_thr -> th.th_team; 
-        int tid = __kmp_tid_from_gtid( global_tid ); 
- 
-        ompt_callbacks.ompt_callback(ompt_event_loop_end)( 
-            team->t.ompt_team_info.parallel_id, 
-            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); 
-    } 
-#endif 
- 
-    if ( __kmp_env_consistency_check ) 
-     __kmp_pop_workshare( global_tid, ct_pdo, loc ); 
-} 
- 
-/* 
- * User routines which take C-style arguments (call by value) 
- * different from the Fortran equivalent routines 
- */ 
- 
-void 
-ompc_set_num_threads( int arg ) 
-{ 
-// !!!!! TODO: check the per-task binding 
-    __kmp_set_num_threads( arg, __kmp_entry_gtid() ); 
-} 
- 
-void 
-ompc_set_dynamic( int flag ) 
-{ 
-    kmp_info_t *thread; 
- 
-    /* For the thread-private implementation of the internal controls */ 
-    thread = __kmp_entry_thread(); 
- 
-    __kmp_save_internal_controls( thread ); 
- 
-    set__dynamic( thread, flag ? TRUE : FALSE ); 
-} 
- 
-void 
-ompc_set_nested( int flag ) 
-{ 
-    kmp_info_t *thread; 
- 
-    /* For the thread-private internal controls implementation */ 
-    thread = __kmp_entry_thread(); 
- 
-    __kmp_save_internal_controls( thread ); 
- 
-    set__nested( thread, flag ? TRUE : FALSE ); 
-} 
- 
-void 
-ompc_set_max_active_levels( int max_active_levels ) 
-{ 
-    /* TO DO */ 
-    /* we want per-task implementation of this internal control */ 
- 
-    /* For the per-thread internal controls implementation */ 
-    __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels ); 
-} 
- 
-void 
-ompc_set_schedule( omp_sched_t kind, int modifier ) 
-{ 
-// !!!!! TODO: check the per-task binding 
-    __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier ); 
-} 
- 
-int 
-ompc_get_ancestor_thread_num( int level ) 
-{ 
-    return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level ); 
-} 
- 
-int 
-ompc_get_team_size( int level ) 
-{ 
-    return __kmp_get_team_size( __kmp_entry_gtid(), level ); 
-} 
- 
-void 
-kmpc_set_stacksize( int arg ) 
-{ 
-    // __kmp_aux_set_stacksize initializes the library if needed 
-    __kmp_aux_set_stacksize( arg ); 
-} 
- 
-void 
-kmpc_set_stacksize_s( size_t arg ) 
-{ 
-    // __kmp_aux_set_stacksize initializes the library if needed 
-    __kmp_aux_set_stacksize( arg ); 
-} 
- 
-void 
-kmpc_set_blocktime( int arg ) 
-{ 
-    int gtid, tid; 
-    kmp_info_t *thread; 
- 
-    gtid = __kmp_entry_gtid(); 
-    tid = __kmp_tid_from_gtid(gtid); 
-    thread = __kmp_thread_from_gtid(gtid); 
- 
-    __kmp_aux_set_blocktime( arg, thread, tid ); 
-} 
- 
-void 
-kmpc_set_library( int arg ) 
-{ 
-    // __kmp_user_set_library initializes the library if needed 
-    __kmp_user_set_library( (enum library_type)arg ); 
-} 
- 
-void 
-kmpc_set_defaults( char const * str ) 
-{ 
-    // __kmp_aux_set_defaults initializes the library if needed 
-    __kmp_aux_set_defaults( str, KMP_STRLEN( str ) ); 
-} 
- 
-int 
-kmpc_set_affinity_mask_proc( int proc, void **mask ) 
-{ 
-#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-    return -1; 
-#else 
-    if ( ! TCR_4(__kmp_init_middle) ) { 
-        __kmp_middle_initialize(); 
-    } 
-    return __kmp_aux_set_affinity_mask_proc( proc, mask ); 
-#endif 
-} 
- 
-int 
-kmpc_unset_affinity_mask_proc( int proc, void **mask ) 
-{ 
-#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-    return -1; 
-#else 
-    if ( ! TCR_4(__kmp_init_middle) ) { 
-        __kmp_middle_initialize(); 
-    } 
-    return __kmp_aux_unset_affinity_mask_proc( proc, mask ); 
-#endif 
-} 
- 
-int 
-kmpc_get_affinity_mask_proc( int proc, void **mask ) 
-{ 
-#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-    return -1; 
-#else 
-    if ( ! TCR_4(__kmp_init_middle) ) { 
-        __kmp_middle_initialize(); 
-    } 
-    return __kmp_aux_get_affinity_mask_proc( proc, mask ); 
-#endif 
-} 
- 
- 
-/* -------------------------------------------------------------------------- */ 
-/*! 
-@ingroup THREADPRIVATE 
-@param loc       source location information 
-@param gtid      global thread number 
-@param cpy_size  size of the cpy_data buffer 
-@param cpy_data  pointer to data to be copied 
-@param cpy_func  helper function to call for copying data 
-@param didit     flag variable: 1=single thread; 0=not single thread 
- 
-__kmpc_copyprivate implements the interface for the private data broadcast needed for 
-the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran). 
-All threads participating in the parallel region call this routine. 
-One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1 
-and all other threads should have that variable set to 0. 
-All threads pass a pointer to a data buffer (cpy_data) that they have built. 
- 
-The OpenMP specification forbids the use of nowait on the single region when a copyprivate 
-clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid 
-race conditions, so the code generation for the single region should avoid generating a barrier 
-after the call to @ref __kmpc_copyprivate. 
- 
-The <tt>gtid</tt> parameter is the global thread id for the current thread. 
-The <tt>loc</tt> parameter is a pointer to source location information. 
- 
-Internal implementation: The single thread will first copy its descriptor address (cpy_data) 
-to a team-private location, then the other threads will each call the function pointed to by 
-the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer. 
- 
-The cpy_func routine used for the copy and the contents of the data area defined by cpy_data 
-and cpy_size may be built in any fashion that will allow the copy to be done. For instance, 
-the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers 
-to the data. The cpy_func routine must interpret the cpy_data buffer appropriately. 
- 
-The interface to cpy_func is as follows: 
-@code 
-void cpy_func( void *destination, void *source ) 
-@endcode 
-where void *destination is the cpy_data pointer for the thread being copied to 
-and void *source is the cpy_data pointer for the thread being copied from. 
-*/ 
-void 
-__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ) 
-{ 
-    void **data_ptr; 
- 
-    KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid )); 
- 
-    KMP_MB(); 
- 
-    data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data; 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( loc == 0 ) { 
-            KMP_WARNING( ConstructIdentInvalid ); 
-        } 
-    } 
- 
-    /* ToDo: Optimize the following two barriers into some kind of split barrier */ 
- 
-    if (didit) *data_ptr = cpy_data; 
- 
-    /* This barrier is not a barrier region boundary */ 
-#if USE_ITT_NOTIFY 
-    __kmp_threads[gtid]->th.th_ident = loc; 
-#endif 
-    __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); 
- 
-    if (! didit) (*cpy_func)( cpy_data, *data_ptr ); 
- 
-    /* Consider next barrier the user-visible barrier for barrier region boundaries */ 
-    /* Nesting checks are already handled by the single construct checks */ 
- 
-#if USE_ITT_NOTIFY 
-    __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location) 
-#endif 
-    __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); 
-} 
- 
-/* -------------------------------------------------------------------------- */ 
- 
-#define INIT_LOCK                 __kmp_init_user_lock_with_checks 
-#define INIT_NESTED_LOCK          __kmp_init_nested_user_lock_with_checks 
-#define ACQUIRE_LOCK              __kmp_acquire_user_lock_with_checks 
-#define ACQUIRE_LOCK_TIMED        __kmp_acquire_user_lock_with_checks_timed 
-#define ACQUIRE_NESTED_LOCK       __kmp_acquire_nested_user_lock_with_checks 
-#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed 
-#define RELEASE_LOCK              __kmp_release_user_lock_with_checks 
-#define RELEASE_NESTED_LOCK       __kmp_release_nested_user_lock_with_checks 
-#define TEST_LOCK                 __kmp_test_user_lock_with_checks 
-#define TEST_NESTED_LOCK          __kmp_test_nested_user_lock_with_checks 
-#define DESTROY_LOCK              __kmp_destroy_user_lock_with_checks 
-#define DESTROY_NESTED_LOCK       __kmp_destroy_nested_user_lock_with_checks 
- 
- 
-/* 
- * TODO: Make check abort messages use location info & pass it 
- * into with_checks routines 
- */ 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-// internal lock initializer 
-static __forceinline void 
-__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) 
-{ 
-    if (KMP_IS_D_LOCK(seq)) { 
-        KMP_INIT_D_LOCK(lock, seq); 
-#if USE_ITT_BUILD 
-        __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 
-#endif 
-    } else { 
-        KMP_INIT_I_LOCK(lock, seq); 
-#if USE_ITT_BUILD 
-        kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 
-        __kmp_itt_lock_creating(ilk->lock, loc); 
-#endif 
-    } 
-} 
- 
-// internal nest lock initializer 
-static __forceinline void 
-__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) 
-{ 
-#if KMP_USE_TSX 
-    // Don't have nested lock implementation for speculative locks 
-    if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 
-        seq = __kmp_user_lock_seq; 
-#endif 
-    switch (seq) { 
-        case lockseq_tas: 
-            seq = lockseq_nested_tas; 
-            break; 
-#if KMP_USE_FUTEX 
-        case lockseq_futex: 
-            seq = lockseq_nested_futex; 
-            break; 
-#endif 
-        case lockseq_ticket: 
-            seq = lockseq_nested_ticket; 
-            break; 
-        case lockseq_queuing: 
-            seq = lockseq_nested_queuing; 
-            break; 
-        case lockseq_drdpa: 
-            seq = lockseq_nested_drdpa; 
-            break; 
-        default: 
-            seq = lockseq_nested_queuing; 
-    } 
-    KMP_INIT_I_LOCK(lock, seq); 
-#if USE_ITT_BUILD 
-    kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 
-    __kmp_itt_lock_creating(ilk->lock, loc); 
-#endif 
-} 
- 
-/* initialize the lock with a hint */ 
-void 
-__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) 
-{ 
-    KMP_DEBUG_ASSERT(__kmp_init_serial); 
-    if (__kmp_env_consistency_check && user_lock == NULL) { 
-        KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 
-    } 
- 
-    __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 
-} 
- 
-/* initialize the lock with a hint */ 
-void 
-__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) 
-{ 
-    KMP_DEBUG_ASSERT(__kmp_init_serial); 
-    if (__kmp_env_consistency_check && user_lock == NULL) { 
-        KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 
-    } 
- 
-    __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 
-} 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-/* initialize the lock */ 
-void 
-__kmpc_init_lock( ident_t * loc, kmp_int32 gtid,  void ** user_lock ) { 
-#if KMP_USE_DYNAMIC_LOCK 
-    KMP_DEBUG_ASSERT(__kmp_init_serial); 
-    if (__kmp_env_consistency_check && user_lock == NULL) { 
-        KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 
-    } 
-    __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    static char const * const func = "omp_init_lock"; 
-    kmp_user_lock_p lck; 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( user_lock == NULL ) { 
-            KMP_FATAL( LockIsUninitialized, func ); 
-        } 
-    } 
- 
-    KMP_CHECK_USER_LOCK_INIT(); 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); 
-    } 
-    INIT_LOCK( lck ); 
-    __kmp_set_user_lock_location( lck, loc ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_init_lock)) { 
-        ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck); 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_creating( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} // __kmpc_init_lock 
- 
-/* initialize the lock */ 
-void 
-__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-    KMP_DEBUG_ASSERT(__kmp_init_serial); 
-    if (__kmp_env_consistency_check && user_lock == NULL) { 
-        KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 
-    } 
-    __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    static char const * const func = "omp_init_nest_lock"; 
-    kmp_user_lock_p lck; 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( user_lock == NULL ) { 
-            KMP_FATAL( LockIsUninitialized, func ); 
-        } 
-    } 
- 
-    KMP_CHECK_USER_LOCK_INIT(); 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) 
-      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) 
-     <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); 
-    } 
- 
-    INIT_NESTED_LOCK( lck ); 
-    __kmp_set_user_lock_location( lck, loc ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { 
-        ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck); 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_creating( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} // __kmpc_init_nest_lock 
- 
-void 
-__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-# if USE_ITT_BUILD 
-    kmp_user_lock_p lck; 
-    if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 
-        lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 
-    } else { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-    __kmp_itt_lock_destroyed(lck); 
-# endif 
-    KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 
-#else 
-    kmp_user_lock_p lck; 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" ); 
-    } 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { 
-        ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck); 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_destroyed( lck ); 
-#endif /* USE_ITT_BUILD */ 
-    DESTROY_LOCK( lck ); 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        ; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        ; 
-    } 
-#endif 
-    else { 
-        __kmp_user_lock_free( user_lock, gtid, lck ); 
-    } 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} // __kmpc_destroy_lock 
- 
-/* destroy the lock */ 
-void 
-__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-# if USE_ITT_BUILD 
-    kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 
-    __kmp_itt_lock_destroyed(ilk->lock); 
-# endif 
-    KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_user_lock_p lck; 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) 
-      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) 
-     <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" ); 
-    } 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { 
-        ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck); 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_destroyed( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    DESTROY_NESTED_LOCK( lck ); 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) 
-     + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        ; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) 
-     <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        ; 
-    } 
-#endif 
-    else { 
-        __kmp_user_lock_free( user_lock, gtid, lck ); 
-    } 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} // __kmpc_destroy_nest_lock 
- 
-void 
-__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { 
-    KMP_COUNT_BLOCK(OMP_set_lock); 
-#if KMP_USE_DYNAMIC_LOCK 
-    int tag = KMP_EXTRACT_D_TAG(user_lock); 
-# if USE_ITT_BUILD 
-   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object. 
-# endif 
-# if KMP_USE_INLINED_TAS 
-    if (tag == locktag_tas && !__kmp_env_consistency_check) { 
-        KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 
-    } else 
-# elif KMP_USE_INLINED_FUTEX 
-    if (tag == locktag_futex && !__kmp_env_consistency_check) { 
-        KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 
-    } else 
-# endif 
-    { 
-        __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 
-    } 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 
-# endif 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_user_lock_p lck; 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" ); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    ACQUIRE_LOCK( lck, gtid ); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_acquired( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { 
-        ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck); 
-    } 
-#endif 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
-void 
-__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 
-# endif 
-    KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 
-#endif 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
-    int acquire_status; 
-    kmp_user_lock_p lck; 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) 
-      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) 
-     <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" ); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status ); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_acquired( lck ); 
-#endif /* USE_ITT_BUILD */ 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled) { 
-        if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 
-           if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) 
-              ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck); 
-        } else { 
-           if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) 
-              ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck); 
-        } 
-    } 
-#endif 
-} 
- 
-void 
-__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) 
-{ 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-    int tag = KMP_EXTRACT_D_TAG(user_lock); 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 
-# endif 
-# if KMP_USE_INLINED_TAS 
-    if (tag == locktag_tas && !__kmp_env_consistency_check) { 
-        KMP_RELEASE_TAS_LOCK(user_lock, gtid); 
-    } else 
-# elif KMP_USE_INLINED_FUTEX 
-    if (tag == locktag_futex && !__kmp_env_consistency_check) { 
-        KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 
-    } else 
-# endif 
-    { 
-        __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 
-    } 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_user_lock_p lck; 
- 
-    /* Can't use serial interval since not block structured */ 
-    /* release the lock */ 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-        // "fast" path implemented to fix customer performance issue 
-#if USE_ITT_BUILD 
-        __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); 
-#endif /* USE_ITT_BUILD */ 
-        TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 
-        KMP_MB(); 
-        return; 
-#else 
-        lck = (kmp_user_lock_p)user_lock; 
-#endif 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" ); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_releasing( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    RELEASE_LOCK( lck, gtid ); 
- 
-#if OMPT_SUPPORT && OMPT_BLAME 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_release_lock)) { 
-        ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck); 
-    } 
-#endif 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
-/* release the lock */ 
-void 
-__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) 
-{ 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 
-# endif 
-    KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_user_lock_p lck; 
- 
-    /* Can't use serial interval since not block structured */ 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) 
-      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-        // "fast" path implemented to fix customer performance issue 
-        kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock; 
-#if USE_ITT_BUILD 
-        __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); 
-#endif /* USE_ITT_BUILD */ 
-        if ( --(tl->lk.depth_locked) == 0 ) { 
-            TCW_4(tl->lk.poll, 0); 
-        } 
-        KMP_MB(); 
-        return; 
-#else 
-        lck = (kmp_user_lock_p)user_lock; 
-#endif 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) 
-     <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" ); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_releasing( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    int release_status; 
-    release_status = RELEASE_NESTED_LOCK( lck, gtid ); 
-#if OMPT_SUPPORT && OMPT_BLAME 
-    if (ompt_enabled) { 
-        if (release_status == KMP_LOCK_RELEASED) { 
-            if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { 
-                ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( 
-                    (uint64_t) lck); 
-            } 
-        } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) { 
-            ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( 
-                (uint64_t) lck); 
-        } 
-    } 
-#endif 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
-/* try to acquire the lock */ 
-int 
-__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) 
-{ 
-    KMP_COUNT_BLOCK(OMP_test_lock); 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-    int rc; 
-    int tag = KMP_EXTRACT_D_TAG(user_lock); 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 
-# endif 
-# if KMP_USE_INLINED_TAS 
-    if (tag == locktag_tas && !__kmp_env_consistency_check) { 
-        KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 
-    } else 
-# elif KMP_USE_INLINED_FUTEX 
-    if (tag == locktag_futex && !__kmp_env_consistency_check) { 
-        KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 
-    } else 
-# endif 
-    { 
-        rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 
-    } 
-    if (rc) { 
-# if USE_ITT_BUILD 
-        __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 
-# endif 
-        return FTN_TRUE; 
-    } else { 
-# if USE_ITT_BUILD 
-        __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 
-# endif 
-        return FTN_FALSE; 
-    } 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_user_lock_p lck; 
-    int          rc; 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) 
-      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" ); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    rc = TEST_LOCK( lck, gtid ); 
-#if USE_ITT_BUILD 
-    if ( rc ) { 
-        __kmp_itt_lock_acquired( lck ); 
-    } else { 
-        __kmp_itt_lock_cancelled( lck ); 
-    } 
-#endif /* USE_ITT_BUILD */ 
-    return ( rc ? FTN_TRUE : FTN_FALSE ); 
- 
-    /* Can't use serial interval since not block structured */ 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
-/* try to acquire the lock */ 
-int 
-__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) 
-{ 
-#if KMP_USE_DYNAMIC_LOCK 
-    int rc; 
-# if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 
-# endif 
-    rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 
-# if USE_ITT_BUILD 
-    if (rc) { 
-        __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 
-    } else { 
-        __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 
-    } 
-# endif 
-    return rc; 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_user_lock_p lck; 
-    int          rc; 
- 
-    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) 
-      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    else if ( ( __kmp_user_lock_kind == lk_futex ) 
-     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) 
-     <= OMP_NEST_LOCK_T_SIZE ) ) { 
-        lck = (kmp_user_lock_p)user_lock; 
-    } 
-#endif 
-    else { 
-        lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" ); 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_lock_acquiring( lck ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    rc = TEST_NESTED_LOCK( lck, gtid ); 
-#if USE_ITT_BUILD 
-    if ( rc ) { 
-        __kmp_itt_lock_acquired( lck ); 
-    } else { 
-        __kmp_itt_lock_cancelled( lck ); 
-    } 
-#endif /* USE_ITT_BUILD */ 
-    return rc; 
- 
-    /* Can't use serial interval since not block structured */ 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
- 
-/*--------------------------------------------------------------------------------------------------------------------*/ 
- 
-/* 
- * Interface to fast scalable reduce methods routines 
- */ 
- 
-// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions; 
-// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then) 
-// AT: which solution is better? 
-#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \ 
-                   ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) ) 
- 
-#define __KMP_GET_REDUCTION_METHOD(gtid) \ 
-                   ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) 
- 
-// description of the packed_reduction_method variable: look at the macros in kmp.h 
- 
- 
-// used in a critical section reduce block 
-static __forceinline void 
-__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { 
- 
-    // this lock was visible to a customer and to the threading profile tool as a serial overhead span 
-    //            (although it's used for an internal purpose only) 
-    //            why was it visible in previous implementation? 
-    //            should we keep it visible in new reduce block? 
-    kmp_user_lock_p lck; 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-    kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 
-    // Check if it is initialized. 
-    if (*lk == 0) { 
-        if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 
-            KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq)); 
-        } else { 
-            __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq)); 
-        } 
-    } 
-    // Branch for accessing the actual lock object and set operation. This branching is inevitable since 
-    // this lock initialization does not follow the normal dispatch path (lock table is not used). 
-    if (KMP_EXTRACT_D_TAG(lk) != 0) { 
-        lck = (kmp_user_lock_p)lk; 
-        KMP_DEBUG_ASSERT(lck != NULL); 
-        if (__kmp_env_consistency_check) { 
-            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 
-        } 
-        KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 
-    } else { 
-        kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 
-        lck = ilk->lock; 
-        KMP_DEBUG_ASSERT(lck != NULL); 
-        if (__kmp_env_consistency_check) { 
-            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 
-        } 
-        KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 
-    } 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    // We know that the fast reduction code is only emitted by Intel compilers 
-    // with 32 byte critical sections. If there isn't enough space, then we 
-    // have to use a pointer. 
-    if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) { 
-        lck = (kmp_user_lock_p)crit; 
-    } 
-    else { 
-        lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); 
-    } 
-    KMP_DEBUG_ASSERT( lck != NULL ); 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_sync( global_tid, ct_critical, loc, lck ); 
- 
-    __kmp_acquire_user_lock_with_checks( lck, global_tid ); 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} 
- 
-// used in a critical section reduce block 
-static __forceinline void 
-__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { 
- 
-    kmp_user_lock_p lck; 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-    if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 
-        lck = (kmp_user_lock_p)crit; 
-        if (__kmp_env_consistency_check) 
-            __kmp_pop_sync(global_tid, ct_critical, loc); 
-        KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 
-    } else { 
-        kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 
-        if (__kmp_env_consistency_check) 
-            __kmp_pop_sync(global_tid, ct_critical, loc); 
-        KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 
-    } 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-    // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical 
-    // sections. If there isn't enough space, then we have to use a pointer. 
-    if ( __kmp_base_user_lock_size > 32 ) { 
-        lck = *( (kmp_user_lock_p *) crit ); 
-        KMP_ASSERT( lck != NULL ); 
-    } else { 
-        lck = (kmp_user_lock_p) crit; 
-    } 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_sync( global_tid, ct_critical, loc ); 
- 
-    __kmp_release_user_lock_with_checks( lck, global_tid ); 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
-} // __kmp_end_critical_section_reduce_block 
- 
- 
-/* 2.a.i. Reduce Block without a terminating barrier */ 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid global thread number 
-@param num_vars number of items (variables) to be reduced 
-@param reduce_size size of data in bytes to be reduced 
-@param reduce_data pointer to data to be reduced 
-@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data 
-@param lck pointer to the unique lock data structure 
-@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed 
- 
-The nowait version is used for a reduce clause with the nowait argument. 
-*/ 
-kmp_int32 
-__kmpc_reduce_nowait( 
-    ident_t *loc, kmp_int32 global_tid, 
-    kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 
-    kmp_critical_name *lck ) { 
- 
-    KMP_COUNT_BLOCK(REDUCE_nowait); 
-    int retval = 0; 
-    PACKED_REDUCTION_METHOD_T packed_reduction_method; 
-#if OMP_40_ENABLED 
-    kmp_team_t *team; 
-    kmp_info_t *th; 
-    int teams_swapped = 0, task_state; 
-#endif 
-    KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) ); 
- 
-    // why do we need this initialization here at all? 
-    // Reduction clause can not be used as a stand-alone directive. 
- 
-    // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed 
-    // possible detection of false-positive race by the threadchecker ??? 
-    if( ! TCR_4( __kmp_init_parallel ) ) 
-        __kmp_parallel_initialize(); 
- 
-    // check correctness of reduce block nesting 
-#if KMP_USE_DYNAMIC_LOCK 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); 
-#else 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); 
-#endif 
- 
-#if OMP_40_ENABLED 
-    th = __kmp_thread_from_gtid(global_tid); 
-    if( th->th.th_teams_microtask ) {   // AC: check if we are inside the teams construct? 
-        team = th->th.th_team; 
-        if( team->t.t_level == th->th.th_teams_level ) { 
-            // this is reduction at teams construct 
-            KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);  // AC: check that tid == 0 
-            // Let's swap teams temporarily for the reduction barrier 
-            teams_swapped = 1; 
-            th->th.th_info.ds.ds_tid = team->t.t_master_tid; 
-            th->th.th_team = team->t.t_parent; 
-            th->th.th_team_nproc = th->th.th_team->t.t_nproc; 
-            th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 
-            task_state = th->th.th_task_state; 
-            th->th.th_task_state = 0; 
-        } 
-    } 
-#endif // OMP_40_ENABLED 
- 
-    // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable 
-    // the variable should be either a construct-specific or thread-specific property, not a team specific property 
-    //     (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct) 
-    // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?) 
-    //     (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed) 
-    // a thread-specific variable is better regarding two issues above (next construct and extra syncs) 
-    // a thread-specific "th_local.reduction_method" variable is used currently 
-    // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs) 
- 
-    packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); 
-    __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); 
- 
-    if( packed_reduction_method == critical_reduce_block ) { 
- 
-        __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); 
-        retval = 1; 
- 
-    } else if( packed_reduction_method == empty_reduce_block ) { 
- 
-        // usage: if team size == 1, no synchronization is required ( Intel platforms only ) 
-        retval = 1; 
- 
-    } else if( packed_reduction_method == atomic_reduce_block ) { 
- 
-        retval = 2; 
- 
-        // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen) 
-        //     (it's not quite good, because the checking block has been closed by this 'pop', 
-        //      but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction) 
-        if ( __kmp_env_consistency_check ) 
-            __kmp_pop_sync( global_tid, ct_reduce, loc ); 
- 
-    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { 
- 
-        //AT: performance issue: a real barrier here 
-        //AT:     (if master goes slow, other threads are blocked here waiting for the master to come and release them) 
-        //AT:     (it's not what a customer might expect specifying NOWAIT clause) 
-        //AT:     (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer) 
-        //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster 
-        //        and be more in line with sense of NOWAIT 
-        //AT: TO DO: do epcc test and compare times 
- 
-        // this barrier should be invisible to a customer and to the threading profile tool 
-        //              (it's neither a terminating barrier nor customer's code, it's used for an internal purpose) 
-#if USE_ITT_NOTIFY 
-        __kmp_threads[global_tid]->th.th_ident = loc; 
-#endif 
-        retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func ); 
-        retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); 
- 
-        // all other workers except master should do this pop here 
-        //     ( none of other workers will get to __kmpc_end_reduce_nowait() ) 
-        if ( __kmp_env_consistency_check ) { 
-            if( retval == 0 ) { 
-                __kmp_pop_sync( global_tid, ct_reduce, loc ); 
-            } 
-        } 
- 
-    } else { 
- 
-        // should never reach this block 
-        KMP_ASSERT( 0 ); // "unexpected method" 
- 
-    } 
-#if OMP_40_ENABLED 
-    if( teams_swapped ) { 
-        // Restore thread structure 
-        th->th.th_info.ds.ds_tid = 0; 
-        th->th.th_team = team; 
-        th->th.th_team_nproc = team->t.t_nproc; 
-        th->th.th_task_team = team->t.t_task_team[task_state]; 
-        th->th.th_task_state = task_state; 
-    } 
-#endif 
-    KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); 
- 
-    return retval; 
-} 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid global thread id. 
-@param lck pointer to the unique lock data structure 
- 
-Finish the execution of a reduce nowait. 
-*/ 
-void 
-__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { 
- 
-    PACKED_REDUCTION_METHOD_T packed_reduction_method; 
- 
-    KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) ); 
- 
-    packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); 
- 
-    if( packed_reduction_method == critical_reduce_block ) { 
- 
-        __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); 
- 
-    } else if( packed_reduction_method == empty_reduce_block ) { 
- 
-        // usage: if team size == 1, no synchronization is required ( on Intel platforms only ) 
- 
-    } else if( packed_reduction_method == atomic_reduce_block ) { 
- 
-        // neither master nor other workers should get here 
-        //     (code gen does not generate this call in case 2: atomic reduce block) 
-        // actually it's better to remove this elseif at all; 
-        // after removal this value will checked by the 'else' and will assert 
- 
-    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { 
- 
-        // only master gets here 
- 
-    } else { 
- 
-        // should never reach this block 
-        KMP_ASSERT( 0 ); // "unexpected method" 
- 
-    } 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_sync( global_tid, ct_reduce, loc ); 
- 
-    KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); 
- 
-    return; 
-} 
- 
-/* 2.a.ii. Reduce Block with a terminating barrier */ 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid global thread number 
-@param num_vars number of items (variables) to be reduced 
-@param reduce_size size of data in bytes to be reduced 
-@param reduce_data pointer to data to be reduced 
-@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data 
-@param lck pointer to the unique lock data structure 
-@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed 
- 
-A blocking reduce that includes an implicit barrier. 
-*/ 
-kmp_int32 
-__kmpc_reduce( 
-    ident_t *loc, kmp_int32 global_tid, 
-    kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 
-    void (*reduce_func)(void *lhs_data, void *rhs_data), 
-    kmp_critical_name *lck ) 
-{ 
-    KMP_COUNT_BLOCK(REDUCE_wait); 
-    int retval = 0; 
-    PACKED_REDUCTION_METHOD_T packed_reduction_method; 
- 
-    KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) ); 
- 
-    // why do we need this initialization here at all? 
-    // Reduction clause can not be a stand-alone directive. 
- 
-    // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed 
-    // possible detection of false-positive race by the threadchecker ??? 
-    if( ! TCR_4( __kmp_init_parallel ) ) 
-        __kmp_parallel_initialize(); 
- 
-    // check correctness of reduce block nesting 
-#if KMP_USE_DYNAMIC_LOCK 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); 
-#else 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); 
-#endif 
- 
-    packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); 
-    __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); 
- 
-    if( packed_reduction_method == critical_reduce_block ) { 
- 
-        __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); 
-        retval = 1; 
- 
-    } else if( packed_reduction_method == empty_reduce_block ) { 
- 
-        // usage: if team size == 1, no synchronization is required ( Intel platforms only ) 
-        retval = 1; 
- 
-    } else if( packed_reduction_method == atomic_reduce_block ) { 
- 
-        retval = 2; 
- 
-    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { 
- 
-        //case tree_reduce_block: 
-        // this barrier should be visible to a customer and to the threading profile tool 
-        //              (it's a terminating barrier on constructs if NOWAIT not specified) 
-#if USE_ITT_NOTIFY 
-        __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames 
-#endif 
-        retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func ); 
-        retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); 
- 
-        // all other workers except master should do this pop here 
-        //     ( none of other workers except master will enter __kmpc_end_reduce() ) 
-        if ( __kmp_env_consistency_check ) { 
-            if( retval == 0 ) { // 0: all other workers; 1: master 
-                __kmp_pop_sync( global_tid, ct_reduce, loc ); 
-            } 
-        } 
- 
-    } else { 
- 
-        // should never reach this block 
-        KMP_ASSERT( 0 ); // "unexpected method" 
- 
-    } 
- 
-    KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); 
- 
-    return retval; 
-} 
- 
-/*! 
-@ingroup SYNCHRONIZATION 
-@param loc source location information 
-@param global_tid global thread id. 
-@param lck pointer to the unique lock data structure 
- 
-Finish the execution of a blocking reduce. 
-The <tt>lck</tt> pointer must be the same as that used in the corresponding start function. 
-*/ 
-void 
-__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { 
- 
-    PACKED_REDUCTION_METHOD_T packed_reduction_method; 
- 
-    KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) ); 
- 
-    packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); 
- 
-    // this barrier should be visible to a customer and to the threading profile tool 
-    //              (it's a terminating barrier on constructs if NOWAIT not specified) 
- 
-    if( packed_reduction_method == critical_reduce_block ) { 
- 
-        __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); 
- 
-        // TODO: implicit barrier: should be exposed 
-#if USE_ITT_NOTIFY 
-        __kmp_threads[global_tid]->th.th_ident = loc; 
-#endif 
-        __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); 
- 
-    } else if( packed_reduction_method == empty_reduce_block ) { 
- 
-        // usage: if team size == 1, no synchronization is required ( Intel platforms only ) 
- 
-        // TODO: implicit barrier: should be exposed 
-#if USE_ITT_NOTIFY 
-        __kmp_threads[global_tid]->th.th_ident = loc; 
-#endif 
-        __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); 
- 
-    } else if( packed_reduction_method == atomic_reduce_block ) { 
- 
-        // TODO: implicit barrier: should be exposed 
-#if USE_ITT_NOTIFY 
-        __kmp_threads[global_tid]->th.th_ident = loc; 
-#endif 
-        __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); 
- 
-    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { 
- 
-        // only master executes here (master releases all other workers) 
-        __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid ); 
- 
-    } else { 
- 
-        // should never reach this block 
-        KMP_ASSERT( 0 ); // "unexpected method" 
- 
-    } 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_sync( global_tid, ct_reduce, loc ); 
- 
-    KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); 
- 
-    return; 
-} 
- 
-#undef __KMP_GET_REDUCTION_METHOD 
-#undef __KMP_SET_REDUCTION_METHOD 
- 
-/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/ 
- 
-kmp_uint64 
-__kmpc_get_taskid() { 
- 
-    kmp_int32    gtid; 
-    kmp_info_t * thread; 
- 
-    gtid = __kmp_get_gtid(); 
-    if ( gtid < 0 ) { 
-        return 0; 
-    }; // if 
-    thread = __kmp_thread_from_gtid( gtid ); 
-    return thread->th.th_current_task->td_task_id; 
- 
-} // __kmpc_get_taskid 
- 
- 
-kmp_uint64 
-__kmpc_get_parent_taskid() { 
- 
-    kmp_int32        gtid; 
-    kmp_info_t *     thread; 
-    kmp_taskdata_t * parent_task; 
- 
-    gtid = __kmp_get_gtid(); 
-    if ( gtid < 0 ) { 
-        return 0; 
-    }; // if 
-    thread      = __kmp_thread_from_gtid( gtid ); 
-    parent_task = thread->th.th_current_task->td_parent; 
-    return ( parent_task == NULL ? 0 : parent_task->td_task_id ); 
- 
-} // __kmpc_get_parent_taskid 
- 
-void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT) 
-{ 
-    if ( ! __kmp_init_serial ) { 
-        __kmp_serial_initialize(); 
-    } 
-    __kmp_place_num_sockets = nS; 
-    __kmp_place_socket_offset = sO; 
-    __kmp_place_num_cores = nC; 
-    __kmp_place_core_offset = cO; 
-    __kmp_place_num_threads_per_core = nT; 
-} 
- 
-// end of file // 
- 
+/*
+ * kmp_csupport.c -- kfront linkage support for OpenMP.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "omp.h"        /* extern "C" declarations of user-visible routines */
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_itt.h"
+#include "kmp_error.h"
+#include "kmp_stats.h"
+
+#if OMPT_SUPPORT
+#include "ompt-internal.h"
+#include "ompt-specific.h"
+#endif
+
+#define MAX_MESSAGE 512
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/*  flags will be used in future, e.g., to implement */
+/*  openmp_strict library restrictions               */
+
+/*!
+ * @ingroup STARTUP_SHUTDOWN
+ * @param loc   in   source location information
+ * @param flags in   for future use (currently ignored)
+ *
+ * Initialize the runtime library. This call is optional; if it is not made then
+ * it will be implicitly called by attempts to use other library functions.
+ *
+ */
+void
+__kmpc_begin(ident_t *loc, kmp_int32 flags)
+{
+    // By default __kmp_ignore_mppbeg() returns TRUE.
+    if (__kmp_ignore_mppbeg() == FALSE) {
+        __kmp_internal_begin();
+
+        KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
+    }
+}
+
+/*!
+ * @ingroup STARTUP_SHUTDOWN
+ * @param loc source location information
+ *
+ * Shutdown the runtime library. This is also optional, and even if called will not
+ * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
+  */
+void
+__kmpc_end(ident_t *loc)
+{
+    // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
+    // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
+    // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
+    // will unregister this root (it can cause library shut down).
+    if (__kmp_ignore_mppend() == FALSE) {
+        KC_TRACE( 10, ("__kmpc_end: called\n" ) );
+        KA_TRACE( 30, ("__kmpc_end\n" ));
+
+        __kmp_internal_end_thread( -1 );
+    }
+}
+
+/*!
+@ingroup THREAD_STATES
+@param loc Source location information.
+@return The global thread index of the active thread.
+
+This function can be called in any context.
+
+If the runtime has ony been entered at the outermost level from a
+single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
+which would be returned by omp_get_thread_num() in the outermost
+active parallel construct. (Or zero if there is no active parallel
+construct, since the master thread is necessarily thread zero).
+
+If multiple non-OpenMP threads all enter an OpenMP construct then this
+will be a unique thread identifier among all the threads created by
+the OpenMP runtime (but the value cannote be defined in terms of
+OpenMP thread ids returned by omp_get_thread_num()).
+
+*/
+kmp_int32
+__kmpc_global_thread_num(ident_t *loc)
+{
+    kmp_int32 gtid = __kmp_entry_gtid();
+
+    KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
+
+    return gtid;
+}
+
+/*!
+@ingroup THREAD_STATES
+@param loc Source location information.
+@return The number of threads under control of the OpenMP<sup>*</sup> runtime
+
+This function can be called in any context.
+It returns the total number of threads under the control of the OpenMP runtime. That is
+not a number that can be determined by any OpenMP standard calls, since the library may be
+called from more than one non-OpenMP thread, and this reflects the total over all such calls.
+Similarly the runtime maintains underlying threads even when they are not active (since the cost
+of creating and destroying OS threads is high), this call counts all such threads even if they are not
+waiting for work.
+*/
+kmp_int32
+__kmpc_global_num_threads(ident_t *loc)
+{
+    KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
+
+    return TCR_4(__kmp_nth);
+}
+
+/*!
+@ingroup THREAD_STATES
+@param loc Source location information.
+@return The thread number of the calling thread in the innermost active parallel construct.
+
+*/
+kmp_int32
+__kmpc_bound_thread_num(ident_t *loc)
+{
+    KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
+    return __kmp_tid_from_gtid( __kmp_entry_gtid() );
+}
+
+/*!
+@ingroup THREAD_STATES
+@param loc Source location information.
+@return The number of threads in the innermost active parallel construct.
+*/
+kmp_int32
+__kmpc_bound_num_threads(ident_t *loc)
+{
+    KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
+
+    return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
+}
+
+/*!
+ * @ingroup DEPRECATED
+ * @param loc location description
+ *
+ * This function need not be called. It always returns TRUE.
+ */
+kmp_int32
+__kmpc_ok_to_fork(ident_t *loc)
+{
+#ifndef KMP_DEBUG
+
+    return TRUE;
+
+#else
+
+    const char *semi2;
+    const char *semi3;
+    int line_no;
+
+    if (__kmp_par_range == 0) {
+        return TRUE;
+    }
+    semi2 = loc->psource;
+    if (semi2 == NULL) {
+        return TRUE;
+    }
+    semi2 = strchr(semi2, ';');
+    if (semi2 == NULL) {
+        return TRUE;
+    }
+    semi2 = strchr(semi2 + 1, ';');
+    if (semi2 == NULL) {
+        return TRUE;
+    }
+    if (__kmp_par_range_filename[0]) {
+        const char *name = semi2 - 1;
+        while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
+            name--;
+        }
+        if ((*name == '/') || (*name == ';')) {
+            name++;
+        }
+        if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
+            return __kmp_par_range < 0;
+        }
+    }
+    semi3 = strchr(semi2 + 1, ';');
+    if (__kmp_par_range_routine[0]) {
+        if ((semi3 != NULL) && (semi3 > semi2)
+          && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
+            return __kmp_par_range < 0;
+        }
+    }
+    if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
+        if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
+            return __kmp_par_range > 0;
+        }
+        return __kmp_par_range < 0;
+    }
+    return TRUE;
+
+#endif /* KMP_DEBUG */
+
+}
+
+/*!
+@ingroup THREAD_STATES
+@param loc Source location information.
+@return 1 if this thread is executing inside an active parallel region, zero if not.
+*/
+kmp_int32
+__kmpc_in_parallel( ident_t *loc )
+{
+    return __kmp_entry_thread() -> th.th_root -> r.r_active;
+}
+
+/*!
+@ingroup PARALLEL
+@param loc source location information
+@param global_tid global thread number
+@param num_threads number of threads requested for this parallel construct
+
+Set the number of threads to be used by the next fork spawned by this thread.
+This call is only required if the parallel construct has a `num_threads` clause.
+*/
+void
+__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
+{
+    KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
+      global_tid, num_threads ) );
+
+    __kmp_push_num_threads( loc, global_tid, num_threads );
+}
+
+void
+__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
+{
+    KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
+
+    /* the num_threads are automatically popped */
+}
+
+
+#if OMP_40_ENABLED
+
+void
+__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
+{
+    KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
+      global_tid, proc_bind ) );
+
+    __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
+}
+
+#endif /* OMP_40_ENABLED */
+
+
+/*!
+@ingroup PARALLEL
+@param loc  source location information
+@param argc  total number of arguments in the ellipsis
+@param microtask  pointer to callback routine consisting of outlined parallel construct
+@param ...  pointers to shared variables that aren't global
+
+Do the actual fork and call the microtask in the relevant number of threads.
+*/
+void
+__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
+{
+  int         gtid = __kmp_entry_gtid();
+
+#if (KMP_STATS_ENABLED)  
+  int inParallel = __kmpc_in_parallel(loc);
+  if (inParallel)
+  {
+      KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
+  }
+  else
+  {
+      KMP_STOP_EXPLICIT_TIMER(OMP_serial);
+      KMP_COUNT_BLOCK(OMP_PARALLEL);
+  }
+#endif
+
+  // maybe to save thr_state is enough here
+  {
+    va_list     ap;
+    va_start(   ap, microtask );
+
+#if OMPT_SUPPORT
+    int tid = __kmp_tid_from_gtid( gtid );
+    kmp_info_t *master_th = __kmp_threads[ gtid ];
+    kmp_team_t *parent_team = master_th->th.th_team;
+    if (ompt_enabled) {
+       parent_team->t.t_implicit_task_taskdata[tid].
+           ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_FORKING();
+#endif
+    __kmp_fork_call( loc, gtid, fork_context_intel,
+            argc,
+#if OMPT_SUPPORT
+            VOLATILE_CAST(void *) microtask,      // "unwrapped" task
+#endif
+            VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
+            VOLATILE_CAST(launch_t)    __kmp_invoke_task_func,
+/* TODO: revert workaround for Intel(R) 64 tracker #96 */
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+            &ap
+#else
+            ap
+#endif
+            );
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_JOINING();
+#endif
+    __kmp_join_call( loc, gtid
+#if OMPT_SUPPORT
+        , fork_context_intel
+#endif
+    );
+
+    va_end( ap );
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        parent_team->t.t_implicit_task_taskdata[tid].
+            ompt_task_info.frame.reenter_runtime_frame = 0;
+    }
+#endif
+  }
+#if (KMP_STATS_ENABLED)  
+  if (!inParallel)
+      KMP_START_EXPLICIT_TIMER(OMP_serial);
+#endif
+}
+
+#if OMP_40_ENABLED
+/*!
+@ingroup PARALLEL
+@param loc source location information
+@param global_tid global thread number
+@param num_teams number of teams requested for the teams construct
+@param num_threads number of threads per team requested for the teams construct
+
+Set the number of teams to be used by the teams construct.
+This call is only required if the teams construct has a `num_teams` clause
+or a `thread_limit` clause (or both).
+*/
+void
+__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
+{
+    KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
+      global_tid, num_teams, num_threads ) );
+
+    __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
+}
+
+/*!
+@ingroup PARALLEL
+@param loc  source location information
+@param argc  total number of arguments in the ellipsis
+@param microtask  pointer to callback routine consisting of outlined teams construct
+@param ...  pointers to shared variables that aren't global
+
+Do the actual fork and call the microtask in the relevant number of threads.
+*/
+void
+__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
+{
+    int         gtid = __kmp_entry_gtid();
+    kmp_info_t *this_thr = __kmp_threads[ gtid ];
+    va_list     ap;
+    va_start(   ap, microtask );
+
+    KMP_COUNT_BLOCK(OMP_TEAMS);
+
+    // remember teams entry point and nesting level
+    this_thr->th.th_teams_microtask = microtask;
+    this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
+
+#if OMPT_SUPPORT
+    kmp_team_t *parent_team = this_thr->th.th_team;
+    int tid = __kmp_tid_from_gtid( gtid );
+    if (ompt_enabled) {
+        parent_team->t.t_implicit_task_taskdata[tid].
+           ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+    // check if __kmpc_push_num_teams called, set default number of teams otherwise
+    if ( this_thr->th.th_teams_size.nteams == 0 ) {
+        __kmp_push_num_teams( loc, gtid, 0, 0 );
+    }
+    KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
+    KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
+    KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
+
+    __kmp_fork_call( loc, gtid, fork_context_intel,
+            argc,
+#if OMPT_SUPPORT
+            VOLATILE_CAST(void *) microtask,               // "unwrapped" task
+#endif
+            VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
+            VOLATILE_CAST(launch_t)    __kmp_invoke_teams_master,
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+            &ap
+#else
+            ap
+#endif
+            );
+    __kmp_join_call( loc, gtid
+#if OMPT_SUPPORT
+        , fork_context_intel
+#endif
+    );
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        parent_team->t.t_implicit_task_taskdata[tid].
+           ompt_task_info.frame.reenter_runtime_frame = NULL;
+    }
+#endif
+
+    this_thr->th.th_teams_microtask = NULL;
+    this_thr->th.th_teams_level = 0;
+    *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
+    va_end( ap );
+}
+#endif /* OMP_40_ENABLED */
+
+
+//
+// I don't think this function should ever have been exported.
+// The __kmpc_ prefix was misapplied.  I'm fairly certain that no generated
+// openmp code ever called it, but it's been exported from the RTL for so
+// long that I'm afraid to remove the definition.
+//
+int
+__kmpc_invoke_task_func( int gtid )
+{
+    return __kmp_invoke_task_func( gtid );
+}
+
+/*!
+@ingroup PARALLEL
+@param loc  source location information
+@param global_tid  global thread number
+
+Enter a serialized parallel construct. This interface is used to handle a
+conditional parallel region, like this,
+@code
+#pragma omp parallel if (condition)
+@endcode
+when the condition is false.
+*/
+void
+__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
+{
+    __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
+                                                 * kmp_fork_call since the tasks to be done are similar in each case.
+                                                 */
+}
+
+/*!
+@ingroup PARALLEL
+@param loc  source location information
+@param global_tid  global thread number
+
+Leave a serialized parallel construct.
+*/
+void
+__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
+{
+    kmp_internal_control_t *top;
+    kmp_info_t *this_thr;
+    kmp_team_t *serial_team;
+
+    KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
+
+    /* skip all this code for autopar serialized loops since it results in
+       unacceptable overhead */
+    if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
+        return;
+
+    // Not autopar code
+    if( ! TCR_4( __kmp_init_parallel ) )
+        __kmp_parallel_initialize();
+
+    this_thr    = __kmp_threads[ global_tid ];
+    serial_team = this_thr->th.th_serial_team;
+
+   #if OMP_41_ENABLED
+   kmp_task_team_t *   task_team = this_thr->th.th_task_team;
+
+   // we need to wait for the proxy tasks before finishing the thread
+   if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
+        __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
+   #endif
+
+    KMP_MB();
+    KMP_DEBUG_ASSERT( serial_team );
+    KMP_ASSERT(       serial_team -> t.t_serialized );
+    KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
+    KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
+    KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
+    KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
+
+    /* If necessary, pop the internal control stack values and replace the team values */
+    top = serial_team -> t.t_control_stack_top;
+    if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
+        copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
+        serial_team -> t.t_control_stack_top = top -> next;
+        __kmp_free(top);
+    }
+
+    //if( serial_team -> t.t_serialized > 1 )
+    serial_team -> t.t_level--;
+
+    /* pop dispatch buffers stack */
+    KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
+    {
+        dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
+        serial_team->t.t_dispatch->th_disp_buffer =
+            serial_team->t.t_dispatch->th_disp_buffer->next;
+        __kmp_free( disp_buffer );
+    }
+
+    -- serial_team -> t.t_serialized;
+    if ( serial_team -> t.t_serialized == 0 ) {
+
+        /* return to the parallel section */
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+        if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
+            __kmp_clear_x87_fpu_status_word();
+            __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
+            __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
+        }
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+        this_thr -> th.th_team           = serial_team -> t.t_parent;
+        this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
+
+        /* restore values cached in the thread */
+        this_thr -> th.th_team_nproc     = serial_team -> t.t_parent -> t.t_nproc;          /*  JPH */
+        this_thr -> th.th_team_master    = serial_team -> t.t_parent -> t.t_threads[0];     /* JPH */
+        this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
+
+        /* TODO the below shouldn't need to be adjusted for serialized teams */
+        this_thr -> th.th_dispatch       = & this_thr -> th.th_team ->
+            t.t_dispatch[ serial_team -> t.t_master_tid ];
+
+        __kmp_pop_current_task_from_thread( this_thr );
+
+        KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
+        this_thr -> th.th_current_task -> td_flags.executing = 1;
+
+        if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+            // Copy the task team from the new child / old parent team to the thread.
+            this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
+            KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
+                            global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
+        }
+    } else {
+        if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+            KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
+                            global_tid, serial_team, serial_team -> t.t_serialized ) );
+        }
+    }
+
+#if USE_ITT_BUILD
+    kmp_uint64 cur_time = 0;
+#if  USE_ITT_NOTIFY
+    if ( __itt_get_timestamp_ptr ) {
+        cur_time = __itt_get_timestamp();
+    }
+#endif /* USE_ITT_NOTIFY */
+    if ( this_thr->th.th_team->t.t_level == 0
+#if OMP_40_ENABLED
+        && this_thr->th.th_teams_microtask == NULL
+#endif
+    ) {
+        // Report the barrier
+        this_thr->th.th_ident = loc;
+        if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
+            ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
+        {
+            __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
+                                    cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
+            if ( __kmp_forkjoin_frames_mode == 3 )
+                // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
+                __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
+                                        cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
+        } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
+            ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
+            // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
+            __kmp_itt_region_joined( global_tid, 1 );
+    }
+#endif /* USE_ITT_BUILD */
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_parallel( global_tid, NULL );
+}
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc  source location information.
+
+Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
+depending on the memory ordering convention obeyed by the compiler
+even that may not be necessary).
+*/
+void
+__kmpc_flush(ident_t *loc)
+{
+    KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
+
+    /* need explicit __mf() here since use volatile instead in library */
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
+        #if KMP_MIC
+            // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
+            // We shouldn't need it, though, since the ABI rules require that
+            // * If the compiler generates NGO stores it also generates the fence
+            // * If users hand-code NGO stores they should insert the fence
+            // therefore no incomplete unordered stores should be visible.
+        #else
+            // C74404
+            // This is to address non-temporal store instructions (sfence needed).
+            // The clflush instruction is addressed either (mfence needed).
+            // Probably the non-temporal load monvtdqa instruction should also be addressed.
+            // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
+            if ( ! __kmp_cpuinfo.initialized ) {
+                __kmp_query_cpuid( & __kmp_cpuinfo );
+            }; // if
+            if ( ! __kmp_cpuinfo.sse2 ) {
+                // CPU cannot execute SSE2 instructions.
+            } else {
+                #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
+                _mm_mfence();
+                #else
+                __sync_synchronize();
+                #endif // KMP_COMPILER_ICC
+            }; // if
+        #endif // KMP_MIC
+    #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+        // Nothing to see here move along
+    #elif KMP_ARCH_PPC64
+        // Nothing needed here (we have a real MB above).
+        #if KMP_OS_CNK
+        // The flushing thread needs to yield here; this prevents a
+       // busy-waiting thread from saturating the pipeline. flush is
+          // often used in loops like this:
+           // while (!flag) {
+           //   #pragma omp flush(flag)
+           // }
+       // and adding the yield here is good for at least a 10x speedup
+          // when running >2 threads per core (on the NAS LU benchmark).
+            __kmp_yield(TRUE);
+        #endif
+    #else
+        #error Unknown or unsupported architecture
+    #endif
+
+}
+
+/* -------------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------------- */
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid thread id.
+
+Execute a barrier.
+*/
+void
+__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
+{
+    KMP_COUNT_BLOCK(OMP_BARRIER);
+    KMP_TIME_BLOCK(OMP_barrier);
+    KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
+
+    if (! TCR_4(__kmp_init_parallel))
+        __kmp_parallel_initialize();
+
+    if ( __kmp_env_consistency_check ) {
+        if ( loc == 0 ) {
+            KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
+        }; // if
+
+        __kmp_check_barrier( global_tid, ct_barrier, loc );
+    }
+
+    __kmp_threads[ global_tid ]->th.th_ident = loc;
+    // TODO: explicit barrier_wait_id:
+    //   this function is called when 'barrier' directive is present or
+    //   implicit barrier at the end of a worksharing construct.
+    // 1) better to add a per-thread barrier counter to a thread data structure
+    // 2) set to 0 when a new team is created
+    // 4) no sync is required
+
+    __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
+}
+
+/* The BARRIER for a MASTER section is always explicit   */
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param global_tid  global thread number .
+@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
+*/
+kmp_int32
+__kmpc_master(ident_t *loc, kmp_int32 global_tid)
+{
+    KMP_COUNT_BLOCK(OMP_MASTER);
+    int status = 0;
+
+    KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
+
+    if( ! TCR_4( __kmp_init_parallel ) )
+        __kmp_parallel_initialize();
+
+    if( KMP_MASTER_GTID( global_tid )) {
+        KMP_START_EXPLICIT_TIMER(OMP_master);
+        status = 1;
+    }
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (status) {
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
+            kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
+            kmp_team_t  *team            = this_thr -> th.th_team;
+
+            int  tid = __kmp_tid_from_gtid( global_tid );
+            ompt_callbacks.ompt_callback(ompt_event_master_begin)(
+                team->t.ompt_team_info.parallel_id,
+                team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+        }
+    }
+#endif
+
+    if ( __kmp_env_consistency_check ) {
+#if KMP_USE_DYNAMIC_LOCK
+        if (status)
+            __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
+        else
+            __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
+#else
+        if (status)
+            __kmp_push_sync( global_tid, ct_master, loc, NULL );
+        else
+            __kmp_check_sync( global_tid, ct_master, loc, NULL );
+#endif
+    }
+
+    return status;
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param global_tid  global thread number .
+
+Mark the end of a <tt>master</tt> region. This should only be called by the thread
+that executes the <tt>master</tt> region.
+*/
+void
+__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
+{
+    KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
+
+    KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
+    KMP_STOP_EXPLICIT_TIMER(OMP_master);
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
+    kmp_team_t  *team            = this_thr -> th.th_team;
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_master_end)) {
+        int  tid = __kmp_tid_from_gtid( global_tid );
+        ompt_callbacks.ompt_callback(ompt_event_master_end)(
+            team->t.ompt_team_info.parallel_id,
+            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+    }
+#endif
+
+    if ( __kmp_env_consistency_check ) {
+        if( global_tid < 0 )
+            KMP_WARNING( ThreadIdentInvalid );
+
+        if( KMP_MASTER_GTID( global_tid ))
+            __kmp_pop_sync( global_tid, ct_master, loc );
+    }
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param gtid  global thread number.
+
+Start execution of an <tt>ordered</tt> construct.
+*/
+void
+__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
+{
+    int cid = 0;
+    kmp_info_t *th;
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
+
+    if (! TCR_4(__kmp_init_parallel))
+        __kmp_parallel_initialize();
+
+#if USE_ITT_BUILD
+    __kmp_itt_ordered_prep( gtid );
+    // TODO: ordered_wait_id
+#endif /* USE_ITT_BUILD */
+
+    th = __kmp_threads[ gtid ];
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled) {
+        /* OMPT state update */
+        th->th.ompt_thread_info.wait_id = (uint64_t) loc;
+        th->th.ompt_thread_info.state = ompt_state_wait_ordered;
+
+        /* OMPT event callback */
+        if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
+            ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
+                th->th.ompt_thread_info.wait_id);
+        }
+    }
+#endif
+
+    if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
+        (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
+    else
+        __kmp_parallel_deo( & gtid, & cid, loc );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled) {
+        /* OMPT state update */
+        th->th.ompt_thread_info.state = ompt_state_work_parallel;
+        th->th.ompt_thread_info.wait_id = 0;
+
+        /* OMPT event callback */
+        if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
+            ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
+                th->th.ompt_thread_info.wait_id);
+        }
+    }
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_ordered_start( gtid );
+#endif /* USE_ITT_BUILD */
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param gtid  global thread number.
+
+End execution of an <tt>ordered</tt> construct.
+*/
+void
+__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
+{
+    int cid = 0;
+    kmp_info_t *th;
+
+    KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
+
+#if USE_ITT_BUILD
+    __kmp_itt_ordered_end( gtid );
+    // TODO: ordered_wait_id
+#endif /* USE_ITT_BUILD */
+
+    th = __kmp_threads[ gtid ];
+
+    if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
+        (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
+    else
+        __kmp_parallel_dxo( & gtid, & cid, loc );
+
+#if OMPT_SUPPORT && OMPT_BLAME
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
+        ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
+            th->th.ompt_thread_info.wait_id);
+    }
+#endif
+}
+
+#if KMP_USE_DYNAMIC_LOCK
+
+static __forceinline void
+__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
+{
+    // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
+    void *idx;
+    kmp_indirect_lock_t **lck;
+    lck = (kmp_indirect_lock_t **)crit;
+    kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
+    KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
+    KMP_SET_I_LOCK_LOCATION(ilk, loc);
+    KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
+    KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
+#if USE_ITT_BUILD
+    __kmp_itt_critical_creating(ilk->lock, loc);
+#endif
+    int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
+    if (status == 0) {
+#if USE_ITT_BUILD
+        __kmp_itt_critical_destroyed(ilk->lock);
+#endif
+        // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
+        //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
+    }
+    KMP_DEBUG_ASSERT(*lck != NULL);
+}
+
+// Fast-path acquire tas lock
+#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) {                                                                       \
+    kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock;                                                                  \
+    if (l->lk.poll != KMP_LOCK_FREE(tas) ||                                                                      \
+            ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) {      \
+        kmp_uint32 spins;                                                                                        \
+        KMP_FSYNC_PREPARE(l);                                                                                    \
+        KMP_INIT_YIELD(spins);                                                                                   \
+        if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {                            \
+            KMP_YIELD(TRUE);                                                                                     \
+        } else {                                                                                                 \
+            KMP_YIELD_SPIN(spins);                                                                               \
+        }                                                                                                        \
+        while (l->lk.poll != KMP_LOCK_FREE(tas) ||                                                               \
+               ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) {   \
+            if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {                        \
+                KMP_YIELD(TRUE);                                                                                 \
+            } else {                                                                                             \
+                KMP_YIELD_SPIN(spins);                                                                           \
+            }                                                                                                    \
+        }                                                                                                        \
+    }                                                                                                            \
+    KMP_FSYNC_ACQUIRED(l);                                                                                       \
+}
+
+// Fast-path test tas lock
+#define KMP_TEST_TAS_LOCK(lock, gtid, rc) {                                                            \
+    kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock;                                                        \
+    rc = l->lk.poll == KMP_LOCK_FREE(tas) &&                                                           \
+         KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas));   \
+}
+
+// Fast-path release tas lock
+#define KMP_RELEASE_TAS_LOCK(lock, gtid) {                          \
+    TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas));   \
+    KMP_MB();                                                       \
+}
+
+#if KMP_USE_FUTEX
+
+# include <unistd.h>
+# include <sys/syscall.h>
+# ifndef FUTEX_WAIT
+#  define FUTEX_WAIT 0
+# endif
+# ifndef FUTEX_WAKE
+#  define FUTEX_WAKE 1
+# endif
+
+// Fast-path acquire futex lock
+#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) {                                                                        \
+    kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                                                               \
+    kmp_int32 gtid_code = (gtid+1) << 1;                                                                            \
+    KMP_MB();                                                                                                       \
+    KMP_FSYNC_PREPARE(ftx);                                                                                         \
+    kmp_int32 poll_val;                                                                                             \
+    while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex),                           \
+                                                   KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {     \
+        kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;                                                              \
+        if (!cond) {                                                                                                \
+            if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) {      \
+                continue;                                                                                           \
+            }                                                                                                       \
+            poll_val |= KMP_LOCK_BUSY(1, futex);                                                                    \
+        }                                                                                                           \
+        kmp_int32 rc;                                                                                               \
+        if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) {                \
+            continue;                                                                                               \
+        }                                                                                                           \
+        gtid_code |= 1;                                                                                             \
+    }                                                                                                               \
+    KMP_FSYNC_ACQUIRED(ftx);                                                                                        \
+}
+
+// Fast-path test futex lock
+#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) {                                                                       \
+    kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                                                               \
+    if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) {    \
+        KMP_FSYNC_ACQUIRED(ftx);                                                                                    \
+        rc = TRUE;                                                                                                  \
+    } else {                                                                                                        \
+        rc = FALSE;                                                                                                 \
+    }                                                                                                               \
+}
+
+// Fast-path release futex lock
+#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) {                                                        \
+    kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                                               \
+    KMP_MB();                                                                                       \
+    KMP_FSYNC_RELEASING(ftx);                                                                       \
+    kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex));                   \
+    if (KMP_LOCK_STRIP(poll_val) & 1) {                                                             \
+        syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0);   \
+    }                                                                                               \
+    KMP_MB();                                                                                       \
+    KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));              \
+}
+
+#endif // KMP_USE_FUTEX
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+static kmp_user_lock_p
+__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
+{
+    kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
+
+    //
+    // Because of the double-check, the following load
+    // doesn't need to be volatile.
+    //
+    kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
+
+    if ( lck == NULL ) {
+        void * idx;
+
+        // Allocate & initialize the lock.
+        // Remember allocated locks in table in order to free them in __kmp_cleanup()
+        lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
+        __kmp_init_user_lock_with_checks( lck );
+        __kmp_set_user_lock_location( lck, loc );
+#if USE_ITT_BUILD
+        __kmp_itt_critical_creating( lck );
+            // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
+            // lock. It is the only place where we can guarantee it. There are chances the lock will
+            // destroyed with no usage, but it is not a problem, because this is not real event seen
+            // by user but rather setting name for object (lock). See more details in kmp_itt.h.
+#endif /* USE_ITT_BUILD */
+
+        //
+        // Use a cmpxchg instruction to slam the start of the critical
+        // section with the lock pointer.  If another thread beat us
+        // to it, deallocate the lock, and use the lock that the other
+        // thread allocated.
+        //
+        int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
+
+        if ( status == 0 ) {
+            // Deallocate the lock and reload the value.
+#if USE_ITT_BUILD
+            __kmp_itt_critical_destroyed( lck );
+                // Let ITT know the lock is destroyed and the same memory location may be reused for
+                // another purpose.
+#endif /* USE_ITT_BUILD */
+            __kmp_destroy_user_lock_with_checks( lck );
+            __kmp_user_lock_free( &idx, gtid, lck );
+            lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
+            KMP_DEBUG_ASSERT( lck != NULL );
+        }
+    }
+    return lck;
+}
+
+#endif // KMP_USE_DYNAMIC_LOCK
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param global_tid  global thread number .
+@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
+some other suitably unique value.
+
+Enter code protected by a `critical` construct.
+This function blocks until the executing thread can enter the critical section.
+*/
+void
+__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
+{
+#if KMP_USE_DYNAMIC_LOCK
+    __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
+#else
+    KMP_COUNT_BLOCK(OMP_CRITICAL);
+    kmp_user_lock_p lck;
+
+    KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
+
+    //TODO: add THR_OVHD_STATE
+
+    KMP_CHECK_USER_LOCK_INIT();
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
+        lck = (kmp_user_lock_p)crit;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
+        lck = (kmp_user_lock_p)crit;
+    }
+#endif
+    else { // ticket, queuing or drdpa
+        lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
+    }
+
+    if ( __kmp_env_consistency_check )
+        __kmp_push_sync( global_tid, ct_critical, loc, lck );
+
+    /* since the critical directive binds to all threads, not just
+     * the current team we have to check this even if we are in a
+     * serialized team */
+    /* also, even if we are the uber thread, we still have to conduct the lock,
+     * as we have to contend with sibling threads */
+
+#if USE_ITT_BUILD
+    __kmp_itt_critical_acquiring( lck );
+#endif /* USE_ITT_BUILD */
+    // Value of 'crit' should be good for using as a critical_id of the critical section directive.
+    __kmp_acquire_user_lock_with_checks( lck, global_tid );
+
+#if USE_ITT_BUILD
+    __kmp_itt_critical_acquired( lck );
+#endif /* USE_ITT_BUILD */
+
+    KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+#if KMP_USE_DYNAMIC_LOCK
+
+// Converts the given hint to an internal lock implementation
+static __forceinline kmp_dyna_lockseq_t
+__kmp_map_hint_to_lock(uintptr_t hint)
+{
+#if KMP_USE_TSX
+# define KMP_TSX_LOCK(seq) lockseq_##seq
+#else
+# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
+#endif
+    // Hints that do not require further logic
+    if (hint & kmp_lock_hint_hle)
+        return KMP_TSX_LOCK(hle);
+    if (hint & kmp_lock_hint_rtm)
+        return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
+    if (hint & kmp_lock_hint_adaptive)
+        return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
+
+    // Rule out conflicting hints first by returning the default lock
+    if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
+        return __kmp_user_lock_seq;
+    if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
+        return __kmp_user_lock_seq;
+
+    // Do not even consider speculation when it appears to be contended
+    if (hint & omp_lock_hint_contended)
+        return lockseq_queuing;
+
+    // Uncontended lock without speculation
+    if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
+        return lockseq_tas;
+
+    // HLE lock for speculation
+    if (hint & omp_lock_hint_speculative)
+        return KMP_TSX_LOCK(hle);
+
+    return __kmp_user_lock_seq;
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param global_tid  global thread number.
+@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section,
+or some other suitably unique value.
+@param hint the lock hint.
+
+Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation.
+This function blocks until the executing thread can enter the critical section unless the hint suggests use of
+speculative execution and the hardware supports it.
+*/
+void
+__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
+{
+    KMP_COUNT_BLOCK(OMP_CRITICAL);
+    kmp_user_lock_p lck;
+
+    KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
+
+    kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
+    // Check if it is initialized.
+    if (*lk == 0) {
+        kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
+        if (KMP_IS_D_LOCK(lckseq)) {
+            KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
+        } else {
+            __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
+        }
+    }
+    // Branch for accessing the actual lock object and set operation. This branching is inevitable since
+    // this lock initialization does not follow the normal dispatch path (lock table is not used).
+    if (KMP_EXTRACT_D_TAG(lk) != 0) {
+        lck = (kmp_user_lock_p)lk;
+        if (__kmp_env_consistency_check) {
+            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
+        }
+# if USE_ITT_BUILD
+        __kmp_itt_critical_acquiring(lck);
+# endif
+# if KMP_USE_INLINED_TAS
+        if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
+            KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
+        } else
+# elif KMP_USE_INLINED_FUTEX
+        if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
+            KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
+        } else
+# endif
+        {
+            KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
+        }
+    } else {
+        kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
+        lck = ilk->lock;
+        if (__kmp_env_consistency_check) {
+            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
+        }
+# if USE_ITT_BUILD
+        __kmp_itt_critical_acquiring(lck);
+# endif
+        KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_critical_acquired( lck );
+#endif /* USE_ITT_BUILD */
+
+    KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
+} // __kmpc_critical_with_hint
+
+#endif // KMP_USE_DYNAMIC_LOCK
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information.
+@param global_tid  global thread number .
+@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
+some other suitably unique value.
+
+Leave a critical section, releasing any lock that was held during its execution.
+*/
+void
+__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
+{
+    kmp_user_lock_p lck;
+
+    KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
+
+#if KMP_USE_DYNAMIC_LOCK
+    if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
+        lck = (kmp_user_lock_p)crit;
+        KMP_ASSERT(lck != NULL);
+        if (__kmp_env_consistency_check) {
+            __kmp_pop_sync(global_tid, ct_critical, loc);
+        }
+# if USE_ITT_BUILD
+        __kmp_itt_critical_releasing( lck );
+# endif
+# if KMP_USE_INLINED_TAS
+        if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
+            KMP_RELEASE_TAS_LOCK(lck, global_tid);
+        } else
+# elif KMP_USE_INLINED_FUTEX
+        if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
+            KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
+        } else
+# endif
+        {
+            KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
+        }
+    } else {
+        kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
+        KMP_ASSERT(ilk != NULL);
+        lck = ilk->lock;
+        if (__kmp_env_consistency_check) {
+            __kmp_pop_sync(global_tid, ct_critical, loc);
+        }
+# if USE_ITT_BUILD
+        __kmp_itt_critical_releasing( lck );
+# endif
+        KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
+    }
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
+        lck = (kmp_user_lock_p)crit;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
+        lck = (kmp_user_lock_p)crit;
+    }
+#endif
+    else { // ticket, queuing or drdpa
+        lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
+    }
+
+    KMP_ASSERT(lck != NULL);
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_sync( global_tid, ct_critical, loc );
+
+#if USE_ITT_BUILD
+    __kmp_itt_critical_releasing( lck );
+#endif /* USE_ITT_BUILD */
+    // Value of 'crit' should be good for using as a critical_id of the critical section directive.
+    __kmp_release_user_lock_with_checks( lck, global_tid );
+
+#if OMPT_SUPPORT && OMPT_BLAME
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
+        ompt_callbacks.ompt_callback(ompt_event_release_critical)(
+            (uint64_t) lck);
+    }
+#endif
+
+#endif // KMP_USE_DYNAMIC_LOCK
+
+    KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
+}
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid thread id.
+@return one if the thread should execute the master block, zero otherwise
+
+Start execution of a combined barrier and master. The barrier is executed inside this function.
+*/
+kmp_int32
+__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
+{
+    int status;
+
+    KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
+
+    if (! TCR_4(__kmp_init_parallel))
+        __kmp_parallel_initialize();
+
+    if ( __kmp_env_consistency_check )
+        __kmp_check_barrier( global_tid, ct_barrier, loc );
+
+#if USE_ITT_NOTIFY
+    __kmp_threads[global_tid]->th.th_ident = loc;
+#endif
+    status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
+
+    return (status != 0) ? 0 : 1;
+}
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid thread id.
+
+Complete the execution of a combined barrier and master. This function should
+only be called at the completion of the <tt>master</tt> code. Other threads will
+still be waiting at the barrier and this call releases them.
+*/
+void
+__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
+{
+    KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
+
+    __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
+}
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid thread id.
+@return one if the thread should execute the master block, zero otherwise
+
+Start execution of a combined barrier and master(nowait) construct.
+The barrier is executed inside this function.
+There is no equivalent "end" function, since the
+*/
+kmp_int32
+__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
+{
+    kmp_int32 ret;
+
+    KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
+
+    if (! TCR_4(__kmp_init_parallel))
+        __kmp_parallel_initialize();
+
+    if ( __kmp_env_consistency_check ) {
+        if ( loc == 0 ) {
+            KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
+        }
+        __kmp_check_barrier( global_tid, ct_barrier, loc );
+    }
+
+#if USE_ITT_NOTIFY
+    __kmp_threads[global_tid]->th.th_ident = loc;
+#endif
+    __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
+
+    ret = __kmpc_master (loc, global_tid);
+
+    if ( __kmp_env_consistency_check ) {
+        /*  there's no __kmpc_end_master called; so the (stats) */
+        /*  actions of __kmpc_end_master are done here          */
+
+        if ( global_tid < 0 ) {
+            KMP_WARNING( ThreadIdentInvalid );
+        }
+        if (ret) {
+            /* only one thread should do the pop since only */
+            /* one did the push (see __kmpc_master())       */
+
+            __kmp_pop_sync( global_tid, ct_master, loc );
+        }
+    }
+
+    return (ret);
+}
+
+/* The BARRIER for a SINGLE process section is always explicit   */
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information
+@param global_tid  global thread number
+@return One if this thread should execute the single construct, zero otherwise.
+
+Test whether to execute a <tt>single</tt> construct.
+There are no implicit barriers in the two "single" calls, rather the compiler should
+introduce an explicit barrier if it is required.
+*/
+
+kmp_int32
+__kmpc_single(ident_t *loc, kmp_int32 global_tid)
+{
+    KMP_COUNT_BLOCK(OMP_SINGLE);
+    kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
+    if(rc == TRUE) {
+        KMP_START_EXPLICIT_TIMER(OMP_single);
+    }
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    kmp_info_t *this_thr        = __kmp_threads[ global_tid ];
+    kmp_team_t *team            = this_thr -> th.th_team;
+    int tid = __kmp_tid_from_gtid( global_tid );
+
+    if (ompt_enabled) {
+        if (rc) {
+            if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
+                ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
+                    team->t.ompt_team_info.parallel_id,
+                    team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
+                    team->t.ompt_team_info.microtask);
+            }
+        } else {
+            if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
+                ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
+                    team->t.ompt_team_info.parallel_id,
+                    team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+            }
+            this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
+        }
+    }
+#endif
+
+    return rc;
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc  source location information
+@param global_tid  global thread number
+
+Mark the end of a <tt>single</tt> construct.  This function should
+only be called by the thread that executed the block of code protected
+by the `single` construct.
+*/
+void
+__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
+{
+    __kmp_exit_single( global_tid );
+    KMP_STOP_EXPLICIT_TIMER(OMP_single);
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    kmp_info_t *this_thr        = __kmp_threads[ global_tid ];
+    kmp_team_t *team            = this_thr -> th.th_team;
+    int tid = __kmp_tid_from_gtid( global_tid );
+
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
+        ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
+            team->t.ompt_team_info.parallel_id,
+            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+    }
+#endif
+}
+
+/*!
+@ingroup WORK_SHARING
+@param loc Source location
+@param global_tid Global thread id
+
+Mark the end of a statically scheduled loop.
+*/
+void
+__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
+{
+    KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
+        kmp_info_t *this_thr = __kmp_threads[ global_tid ];
+        kmp_team_t *team     = this_thr -> th.th_team;
+        int tid = __kmp_tid_from_gtid( global_tid );
+
+        ompt_callbacks.ompt_callback(ompt_event_loop_end)(
+            team->t.ompt_team_info.parallel_id,
+            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
+    }
+#endif
+
+    if ( __kmp_env_consistency_check )
+     __kmp_pop_workshare( global_tid, ct_pdo, loc );
+}
+
+/*
+ * User routines which take C-style arguments (call by value)
+ * different from the Fortran equivalent routines
+ */
+
+void
+ompc_set_num_threads( int arg )
+{
+// !!!!! TODO: check the per-task binding
+    __kmp_set_num_threads( arg, __kmp_entry_gtid() );
+}
+
+void
+ompc_set_dynamic( int flag )
+{
+    kmp_info_t *thread;
+
+    /* For the thread-private implementation of the internal controls */
+    thread = __kmp_entry_thread();
+
+    __kmp_save_internal_controls( thread );
+
+    set__dynamic( thread, flag ? TRUE : FALSE );
+}
+
+void
+ompc_set_nested( int flag )
+{
+    kmp_info_t *thread;
+
+    /* For the thread-private internal controls implementation */
+    thread = __kmp_entry_thread();
+
+    __kmp_save_internal_controls( thread );
+
+    set__nested( thread, flag ? TRUE : FALSE );
+}
+
+void
+ompc_set_max_active_levels( int max_active_levels )
+{
+    /* TO DO */
+    /* we want per-task implementation of this internal control */
+
+    /* For the per-thread internal controls implementation */
+    __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
+}
+
+void
+ompc_set_schedule( omp_sched_t kind, int modifier )
+{
+// !!!!! TODO: check the per-task binding
+    __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
+}
+
+int
+ompc_get_ancestor_thread_num( int level )
+{
+    return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
+}
+
+int
+ompc_get_team_size( int level )
+{
+    return __kmp_get_team_size( __kmp_entry_gtid(), level );
+}
+
+void
+kmpc_set_stacksize( int arg )
+{
+    // __kmp_aux_set_stacksize initializes the library if needed
+    __kmp_aux_set_stacksize( arg );
+}
+
+void
+kmpc_set_stacksize_s( size_t arg )
+{
+    // __kmp_aux_set_stacksize initializes the library if needed
+    __kmp_aux_set_stacksize( arg );
+}
+
+void
+kmpc_set_blocktime( int arg )
+{
+    int gtid, tid;
+    kmp_info_t *thread;
+
+    gtid = __kmp_entry_gtid();
+    tid = __kmp_tid_from_gtid(gtid);
+    thread = __kmp_thread_from_gtid(gtid);
+
+    __kmp_aux_set_blocktime( arg, thread, tid );
+}
+
+void
+kmpc_set_library( int arg )
+{
+    // __kmp_user_set_library initializes the library if needed
+    __kmp_user_set_library( (enum library_type)arg );
+}
+
+void
+kmpc_set_defaults( char const * str )
+{
+    // __kmp_aux_set_defaults initializes the library if needed
+    __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
+}
+
+int
+kmpc_set_affinity_mask_proc( int proc, void **mask )
+{
+#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+    return -1;
+#else
+    if ( ! TCR_4(__kmp_init_middle) ) {
+        __kmp_middle_initialize();
+    }
+    return __kmp_aux_set_affinity_mask_proc( proc, mask );
+#endif
+}
+
+int
+kmpc_unset_affinity_mask_proc( int proc, void **mask )
+{
+#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+    return -1;
+#else
+    if ( ! TCR_4(__kmp_init_middle) ) {
+        __kmp_middle_initialize();
+    }
+    return __kmp_aux_unset_affinity_mask_proc( proc, mask );
+#endif
+}
+
+int
+kmpc_get_affinity_mask_proc( int proc, void **mask )
+{
+#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+    return -1;
+#else
+    if ( ! TCR_4(__kmp_init_middle) ) {
+        __kmp_middle_initialize();
+    }
+    return __kmp_aux_get_affinity_mask_proc( proc, mask );
+#endif
+}
+
+
+/* -------------------------------------------------------------------------- */
+/*!
+@ingroup THREADPRIVATE
+@param loc       source location information
+@param gtid      global thread number
+@param cpy_size  size of the cpy_data buffer
+@param cpy_data  pointer to data to be copied
+@param cpy_func  helper function to call for copying data
+@param didit     flag variable: 1=single thread; 0=not single thread
+
+__kmpc_copyprivate implements the interface for the private data broadcast needed for
+the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
+All threads participating in the parallel region call this routine.
+One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
+and all other threads should have that variable set to 0.
+All threads pass a pointer to a data buffer (cpy_data) that they have built.
+
+The OpenMP specification forbids the use of nowait on the single region when a copyprivate
+clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
+race conditions, so the code generation for the single region should avoid generating a barrier
+after the call to @ref __kmpc_copyprivate.
+
+The <tt>gtid</tt> parameter is the global thread id for the current thread.
+The <tt>loc</tt> parameter is a pointer to source location information.
+
+Internal implementation: The single thread will first copy its descriptor address (cpy_data)
+to a team-private location, then the other threads will each call the function pointed to by
+the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
+
+The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
+and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
+the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
+to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
+
+The interface to cpy_func is as follows:
+@code
+void cpy_func( void *destination, void *source )
+@endcode
+where void *destination is the cpy_data pointer for the thread being copied to
+and void *source is the cpy_data pointer for the thread being copied from.
+*/
+void
+__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
+{
+    void **data_ptr;
+
+    KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
+
+    KMP_MB();
+
+    data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
+
+    if ( __kmp_env_consistency_check ) {
+        if ( loc == 0 ) {
+            KMP_WARNING( ConstructIdentInvalid );
+        }
+    }
+
+    /* ToDo: Optimize the following two barriers into some kind of split barrier */
+
+    if (didit) *data_ptr = cpy_data;
+
+    /* This barrier is not a barrier region boundary */
+#if USE_ITT_NOTIFY
+    __kmp_threads[gtid]->th.th_ident = loc;
+#endif
+    __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
+
+    if (! didit) (*cpy_func)( cpy_data, *data_ptr );
+
+    /* Consider next barrier the user-visible barrier for barrier region boundaries */
+    /* Nesting checks are already handled by the single construct checks */
+
+#if USE_ITT_NOTIFY
+    __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
+#endif
+    __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
+}
+
+/* -------------------------------------------------------------------------- */
+
+#define INIT_LOCK                 __kmp_init_user_lock_with_checks
+#define INIT_NESTED_LOCK          __kmp_init_nested_user_lock_with_checks
+#define ACQUIRE_LOCK              __kmp_acquire_user_lock_with_checks
+#define ACQUIRE_LOCK_TIMED        __kmp_acquire_user_lock_with_checks_timed
+#define ACQUIRE_NESTED_LOCK       __kmp_acquire_nested_user_lock_with_checks
+#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
+#define RELEASE_LOCK              __kmp_release_user_lock_with_checks
+#define RELEASE_NESTED_LOCK       __kmp_release_nested_user_lock_with_checks
+#define TEST_LOCK                 __kmp_test_user_lock_with_checks
+#define TEST_NESTED_LOCK          __kmp_test_nested_user_lock_with_checks
+#define DESTROY_LOCK              __kmp_destroy_user_lock_with_checks
+#define DESTROY_NESTED_LOCK       __kmp_destroy_nested_user_lock_with_checks
+
+
+/*
+ * TODO: Make check abort messages use location info & pass it
+ * into with_checks routines
+ */
+
+#if KMP_USE_DYNAMIC_LOCK
+
+// internal lock initializer
+static __forceinline void
+__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
+{
+    if (KMP_IS_D_LOCK(seq)) {
+        KMP_INIT_D_LOCK(lock, seq);
+#if USE_ITT_BUILD
+        __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
+#endif
+    } else {
+        KMP_INIT_I_LOCK(lock, seq);
+#if USE_ITT_BUILD
+        kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+        __kmp_itt_lock_creating(ilk->lock, loc);
+#endif
+    }
+}
+
+// internal nest lock initializer
+static __forceinline void
+__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
+{
+#if KMP_USE_TSX
+    // Don't have nested lock implementation for speculative locks
+    if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
+        seq = __kmp_user_lock_seq;
+#endif
+    switch (seq) {
+        case lockseq_tas:
+            seq = lockseq_nested_tas;
+            break;
+#if KMP_USE_FUTEX
+        case lockseq_futex:
+            seq = lockseq_nested_futex;
+            break;
+#endif
+        case lockseq_ticket:
+            seq = lockseq_nested_ticket;
+            break;
+        case lockseq_queuing:
+            seq = lockseq_nested_queuing;
+            break;
+        case lockseq_drdpa:
+            seq = lockseq_nested_drdpa;
+            break;
+        default:
+            seq = lockseq_nested_queuing;
+    }
+    KMP_INIT_I_LOCK(lock, seq);
+#if USE_ITT_BUILD
+    kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+    __kmp_itt_lock_creating(ilk->lock, loc);
+#endif
+}
+
+/* initialize the lock with a hint */
+void
+__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
+{
+    KMP_DEBUG_ASSERT(__kmp_init_serial);
+    if (__kmp_env_consistency_check && user_lock == NULL) {
+        KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
+    }
+
+    __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
+}
+
+/* initialize the lock with a hint */
+void
+__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
+{
+    KMP_DEBUG_ASSERT(__kmp_init_serial);
+    if (__kmp_env_consistency_check && user_lock == NULL) {
+        KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
+    }
+
+    __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
+}
+
+#endif // KMP_USE_DYNAMIC_LOCK
+
+/* initialize the lock */
+void
+__kmpc_init_lock( ident_t * loc, kmp_int32 gtid,  void ** user_lock ) {
+#if KMP_USE_DYNAMIC_LOCK
+    KMP_DEBUG_ASSERT(__kmp_init_serial);
+    if (__kmp_env_consistency_check && user_lock == NULL) {
+        KMP_FATAL(LockIsUninitialized, "omp_init_lock");
+    }
+    __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    static char const * const func = "omp_init_lock";
+    kmp_user_lock_p lck;
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    if ( __kmp_env_consistency_check ) {
+        if ( user_lock == NULL ) {
+            KMP_FATAL( LockIsUninitialized, func );
+        }
+    }
+
+    KMP_CHECK_USER_LOCK_INIT();
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
+    }
+    INIT_LOCK( lck );
+    __kmp_set_user_lock_location( lck, loc );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
+        ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
+    }
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_creating( lck );
+#endif /* USE_ITT_BUILD */
+
+#endif // KMP_USE_DYNAMIC_LOCK
+} // __kmpc_init_lock
+
+/* initialize the lock */
+void
+__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
+#if KMP_USE_DYNAMIC_LOCK
+
+    KMP_DEBUG_ASSERT(__kmp_init_serial);
+    if (__kmp_env_consistency_check && user_lock == NULL) {
+        KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
+    }
+    __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    static char const * const func = "omp_init_nest_lock";
+    kmp_user_lock_p lck;
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    if ( __kmp_env_consistency_check ) {
+        if ( user_lock == NULL ) {
+            KMP_FATAL( LockIsUninitialized, func );
+        }
+    }
+
+    KMP_CHECK_USER_LOCK_INIT();
+
+    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
+     <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
+    }
+
+    INIT_NESTED_LOCK( lck );
+    __kmp_set_user_lock_location( lck, loc );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
+        ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
+    }
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_creating( lck );
+#endif /* USE_ITT_BUILD */
+
+#endif // KMP_USE_DYNAMIC_LOCK
+} // __kmpc_init_nest_lock
+
+void
+__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
+#if KMP_USE_DYNAMIC_LOCK
+
+# if USE_ITT_BUILD
+    kmp_user_lock_p lck;
+    if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
+        lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
+    } else {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+    __kmp_itt_lock_destroyed(lck);
+# endif
+    KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
+#else
+    kmp_user_lock_p lck;
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
+    }
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
+        ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
+    }
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_destroyed( lck );
+#endif /* USE_ITT_BUILD */
+    DESTROY_LOCK( lck );
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        ;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        ;
+    }
+#endif
+    else {
+        __kmp_user_lock_free( user_lock, gtid, lck );
+    }
+#endif // KMP_USE_DYNAMIC_LOCK
+} // __kmpc_destroy_lock
+
+/* destroy the lock */
+void
+__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
+#if KMP_USE_DYNAMIC_LOCK
+
+# if USE_ITT_BUILD
+    kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
+    __kmp_itt_lock_destroyed(ilk->lock);
+# endif
+    KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    kmp_user_lock_p lck;
+
+    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
+     <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
+    }
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
+        ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
+    }
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_destroyed( lck );
+#endif /* USE_ITT_BUILD */
+
+    DESTROY_NESTED_LOCK( lck );
+
+    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+     + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
+        ;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
+     <= OMP_NEST_LOCK_T_SIZE ) ) {
+        ;
+    }
+#endif
+    else {
+        __kmp_user_lock_free( user_lock, gtid, lck );
+    }
+#endif // KMP_USE_DYNAMIC_LOCK
+} // __kmpc_destroy_nest_lock
+
+void
+__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
+    KMP_COUNT_BLOCK(OMP_set_lock);
+#if KMP_USE_DYNAMIC_LOCK
+    int tag = KMP_EXTRACT_D_TAG(user_lock);
+# if USE_ITT_BUILD
+   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
+# endif
+# if KMP_USE_INLINED_TAS
+    if (tag == locktag_tas && !__kmp_env_consistency_check) {
+        KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
+    } else
+# elif KMP_USE_INLINED_FUTEX
+    if (tag == locktag_futex && !__kmp_env_consistency_check) {
+        KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
+    } else
+# endif
+    {
+        __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
+    }
+# if USE_ITT_BUILD
+    __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
+# endif
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    kmp_user_lock_p lck;
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring( lck );
+#endif /* USE_ITT_BUILD */
+
+    ACQUIRE_LOCK( lck, gtid );
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_acquired( lck );
+#endif /* USE_ITT_BUILD */
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
+        ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
+    }
+#endif
+
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+void
+__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
+#if KMP_USE_DYNAMIC_LOCK
+
+# if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
+# endif
+    KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
+# if USE_ITT_BUILD
+    __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
+#endif
+
+#else // KMP_USE_DYNAMIC_LOCK
+    int acquire_status;
+    kmp_user_lock_p lck;
+
+    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
+     <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring( lck );
+#endif /* USE_ITT_BUILD */
+
+    ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_acquired( lck );
+#endif /* USE_ITT_BUILD */
+#endif // KMP_USE_DYNAMIC_LOCK
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled) {
+        if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
+           if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
+              ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
+        } else {
+           if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
+              ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
+        }
+    }
+#endif
+}
+
+void
+__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
+{
+#if KMP_USE_DYNAMIC_LOCK
+
+    int tag = KMP_EXTRACT_D_TAG(user_lock);
+# if USE_ITT_BUILD
+    __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
+# endif
+# if KMP_USE_INLINED_TAS
+    if (tag == locktag_tas && !__kmp_env_consistency_check) {
+        KMP_RELEASE_TAS_LOCK(user_lock, gtid);
+    } else
+# elif KMP_USE_INLINED_FUTEX
+    if (tag == locktag_futex && !__kmp_env_consistency_check) {
+        KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
+    } else
+# endif
+    {
+        __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
+    }
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    kmp_user_lock_p lck;
+
+    /* Can't use serial interval since not block structured */
+    /* release the lock */
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+        // "fast" path implemented to fix customer performance issue
+#if USE_ITT_BUILD
+        __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
+#endif /* USE_ITT_BUILD */
+        TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
+        KMP_MB();
+        return;
+#else
+        lck = (kmp_user_lock_p)user_lock;
+#endif
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_releasing( lck );
+#endif /* USE_ITT_BUILD */
+
+    RELEASE_LOCK( lck, gtid );
+
+#if OMPT_SUPPORT && OMPT_BLAME
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
+        ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
+    }
+#endif
+
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+/* release the lock */
+void
+__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
+{
+#if KMP_USE_DYNAMIC_LOCK
+
+# if USE_ITT_BUILD
+    __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
+# endif
+    KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    kmp_user_lock_p lck;
+
+    /* Can't use serial interval since not block structured */
+
+    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+        // "fast" path implemented to fix customer performance issue
+        kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
+#if USE_ITT_BUILD
+        __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
+#endif /* USE_ITT_BUILD */
+        if ( --(tl->lk.depth_locked) == 0 ) {
+            TCW_4(tl->lk.poll, 0);
+        }
+        KMP_MB();
+        return;
+#else
+        lck = (kmp_user_lock_p)user_lock;
+#endif
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
+     <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_releasing( lck );
+#endif /* USE_ITT_BUILD */
+
+    int release_status;
+    release_status = RELEASE_NESTED_LOCK( lck, gtid );
+#if OMPT_SUPPORT && OMPT_BLAME
+    if (ompt_enabled) {
+        if (release_status == KMP_LOCK_RELEASED) {
+            if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
+                ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
+                    (uint64_t) lck);
+            }
+        } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
+            ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
+                (uint64_t) lck);
+        }
+    }
+#endif
+
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+/* try to acquire the lock */
+int
+__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
+{
+    KMP_COUNT_BLOCK(OMP_test_lock);
+
+#if KMP_USE_DYNAMIC_LOCK
+    int rc;
+    int tag = KMP_EXTRACT_D_TAG(user_lock);
+# if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
+# endif
+# if KMP_USE_INLINED_TAS
+    if (tag == locktag_tas && !__kmp_env_consistency_check) {
+        KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
+    } else
+# elif KMP_USE_INLINED_FUTEX
+    if (tag == locktag_futex && !__kmp_env_consistency_check) {
+        KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
+    } else
+# endif
+    {
+        rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
+    }
+    if (rc) {
+# if USE_ITT_BUILD
+        __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
+# endif
+        return FTN_TRUE;
+    } else {
+# if USE_ITT_BUILD
+        __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
+# endif
+        return FTN_FALSE;
+    }
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    kmp_user_lock_p lck;
+    int          rc;
+
+    if ( ( __kmp_user_lock_kind == lk_tas )
+      && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+      && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring( lck );
+#endif /* USE_ITT_BUILD */
+
+    rc = TEST_LOCK( lck, gtid );
+#if USE_ITT_BUILD
+    if ( rc ) {
+        __kmp_itt_lock_acquired( lck );
+    } else {
+        __kmp_itt_lock_cancelled( lck );
+    }
+#endif /* USE_ITT_BUILD */
+    return ( rc ? FTN_TRUE : FTN_FALSE );
+
+    /* Can't use serial interval since not block structured */
+
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+/* try to acquire the lock */
+int
+__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
+{
+#if KMP_USE_DYNAMIC_LOCK
+    int rc;
+# if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
+# endif
+    rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
+# if USE_ITT_BUILD
+    if (rc) {
+        __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
+    } else {
+        __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
+    }
+# endif
+    return rc;
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    kmp_user_lock_p lck;
+    int          rc;
+
+    if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    else if ( ( __kmp_user_lock_kind == lk_futex )
+     && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
+     <= OMP_NEST_LOCK_T_SIZE ) ) {
+        lck = (kmp_user_lock_p)user_lock;
+    }
+#endif
+    else {
+        lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_lock_acquiring( lck );
+#endif /* USE_ITT_BUILD */
+
+    rc = TEST_NESTED_LOCK( lck, gtid );
+#if USE_ITT_BUILD
+    if ( rc ) {
+        __kmp_itt_lock_acquired( lck );
+    } else {
+        __kmp_itt_lock_cancelled( lck );
+    }
+#endif /* USE_ITT_BUILD */
+    return rc;
+
+    /* Can't use serial interval since not block structured */
+
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+
+/*--------------------------------------------------------------------------------------------------------------------*/
+
+/*
+ * Interface to fast scalable reduce methods routines
+ */
+
+// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
+// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
+// AT: which solution is better?
+#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
+                   ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
+
+#define __KMP_GET_REDUCTION_METHOD(gtid) \
+                   ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
+
+// description of the packed_reduction_method variable: look at the macros in kmp.h
+
+
+// used in a critical section reduce block
+static __forceinline void
+__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
+
+    // this lock was visible to a customer and to the threading profile tool as a serial overhead span
+    //            (although it's used for an internal purpose only)
+    //            why was it visible in previous implementation?
+    //            should we keep it visible in new reduce block?
+    kmp_user_lock_p lck;
+
+#if KMP_USE_DYNAMIC_LOCK
+
+    kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
+    // Check if it is initialized.
+    if (*lk == 0) {
+        if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
+            KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
+        } else {
+            __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
+        }
+    }
+    // Branch for accessing the actual lock object and set operation. This branching is inevitable since
+    // this lock initialization does not follow the normal dispatch path (lock table is not used).
+    if (KMP_EXTRACT_D_TAG(lk) != 0) {
+        lck = (kmp_user_lock_p)lk;
+        KMP_DEBUG_ASSERT(lck != NULL);
+        if (__kmp_env_consistency_check) {
+            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
+        }
+        KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
+    } else {
+        kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
+        lck = ilk->lock;
+        KMP_DEBUG_ASSERT(lck != NULL);
+        if (__kmp_env_consistency_check) {
+            __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
+        }
+        KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
+    }
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    // We know that the fast reduction code is only emitted by Intel compilers
+    // with 32 byte critical sections. If there isn't enough space, then we
+    // have to use a pointer.
+    if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
+        lck = (kmp_user_lock_p)crit;
+    }
+    else {
+        lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
+    }
+    KMP_DEBUG_ASSERT( lck != NULL );
+
+    if ( __kmp_env_consistency_check )
+        __kmp_push_sync( global_tid, ct_critical, loc, lck );
+
+    __kmp_acquire_user_lock_with_checks( lck, global_tid );
+
+#endif // KMP_USE_DYNAMIC_LOCK
+}
+
+// used in a critical section reduce block
+static __forceinline void
+__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
+
+    kmp_user_lock_p lck;
+
+#if KMP_USE_DYNAMIC_LOCK
+
+    if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
+        lck = (kmp_user_lock_p)crit;
+        if (__kmp_env_consistency_check)
+            __kmp_pop_sync(global_tid, ct_critical, loc);
+        KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
+    } else {
+        kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
+        if (__kmp_env_consistency_check)
+            __kmp_pop_sync(global_tid, ct_critical, loc);
+        KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
+    }
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+    // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
+    // sections. If there isn't enough space, then we have to use a pointer.
+    if ( __kmp_base_user_lock_size > 32 ) {
+        lck = *( (kmp_user_lock_p *) crit );
+        KMP_ASSERT( lck != NULL );
+    } else {
+        lck = (kmp_user_lock_p) crit;
+    }
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_sync( global_tid, ct_critical, loc );
+
+    __kmp_release_user_lock_with_checks( lck, global_tid );
+
+#endif // KMP_USE_DYNAMIC_LOCK
+} // __kmp_end_critical_section_reduce_block
+
+
+/* 2.a.i. Reduce Block without a terminating barrier */
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid global thread number
+@param num_vars number of items (variables) to be reduced
+@param reduce_size size of data in bytes to be reduced
+@param reduce_data pointer to data to be reduced
+@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
+@param lck pointer to the unique lock data structure
+@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
+
+The nowait version is used for a reduce clause with the nowait argument.
+*/
+kmp_int32
+__kmpc_reduce_nowait(
+    ident_t *loc, kmp_int32 global_tid,
+    kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+    kmp_critical_name *lck ) {
+
+    KMP_COUNT_BLOCK(REDUCE_nowait);
+    int retval = 0;
+    PACKED_REDUCTION_METHOD_T packed_reduction_method;
+#if OMP_40_ENABLED
+    kmp_team_t *team;
+    kmp_info_t *th;
+    int teams_swapped = 0, task_state;
+#endif
+    KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
+
+    // why do we need this initialization here at all?
+    // Reduction clause can not be used as a stand-alone directive.
+
+    // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
+    // possible detection of false-positive race by the threadchecker ???
+    if( ! TCR_4( __kmp_init_parallel ) )
+        __kmp_parallel_initialize();
+
+    // check correctness of reduce block nesting
+#if KMP_USE_DYNAMIC_LOCK
+    if ( __kmp_env_consistency_check )
+        __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
+#else
+    if ( __kmp_env_consistency_check )
+        __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
+#endif
+
+#if OMP_40_ENABLED
+    th = __kmp_thread_from_gtid(global_tid);
+    if( th->th.th_teams_microtask ) {   // AC: check if we are inside the teams construct?
+        team = th->th.th_team;
+        if( team->t.t_level == th->th.th_teams_level ) {
+            // this is reduction at teams construct
+            KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);  // AC: check that tid == 0
+            // Let's swap teams temporarily for the reduction barrier
+            teams_swapped = 1;
+            th->th.th_info.ds.ds_tid = team->t.t_master_tid;
+            th->th.th_team = team->t.t_parent;
+            th->th.th_team_nproc = th->th.th_team->t.t_nproc;
+            th->th.th_task_team = th->th.th_team->t.t_task_team[0];
+            task_state = th->th.th_task_state;
+            th->th.th_task_state = 0;
+        }
+    }
+#endif // OMP_40_ENABLED
+
+    // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
+    // the variable should be either a construct-specific or thread-specific property, not a team specific property
+    //     (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
+    // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
+    //     (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
+    // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
+    // a thread-specific "th_local.reduction_method" variable is used currently
+    // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
+
+    packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
+    __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
+
+    if( packed_reduction_method == critical_reduce_block ) {
+
+        __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
+        retval = 1;
+
+    } else if( packed_reduction_method == empty_reduce_block ) {
+
+        // usage: if team size == 1, no synchronization is required ( Intel platforms only )
+        retval = 1;
+
+    } else if( packed_reduction_method == atomic_reduce_block ) {
+
+        retval = 2;
+
+        // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
+        //     (it's not quite good, because the checking block has been closed by this 'pop',
+        //      but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
+        if ( __kmp_env_consistency_check )
+            __kmp_pop_sync( global_tid, ct_reduce, loc );
+
+    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
+
+        //AT: performance issue: a real barrier here
+        //AT:     (if master goes slow, other threads are blocked here waiting for the master to come and release them)
+        //AT:     (it's not what a customer might expect specifying NOWAIT clause)
+        //AT:     (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
+        //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
+        //        and be more in line with sense of NOWAIT
+        //AT: TO DO: do epcc test and compare times
+
+        // this barrier should be invisible to a customer and to the threading profile tool
+        //              (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
+#if USE_ITT_NOTIFY
+        __kmp_threads[global_tid]->th.th_ident = loc;
+#endif
+        retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
+        retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
+
+        // all other workers except master should do this pop here
+        //     ( none of other workers will get to __kmpc_end_reduce_nowait() )
+        if ( __kmp_env_consistency_check ) {
+            if( retval == 0 ) {
+                __kmp_pop_sync( global_tid, ct_reduce, loc );
+            }
+        }
+
+    } else {
+
+        // should never reach this block
+        KMP_ASSERT( 0 ); // "unexpected method"
+
+    }
+#if OMP_40_ENABLED
+    if( teams_swapped ) {
+        // Restore thread structure
+        th->th.th_info.ds.ds_tid = 0;
+        th->th.th_team = team;
+        th->th.th_team_nproc = team->t.t_nproc;
+        th->th.th_task_team = team->t.t_task_team[task_state];
+        th->th.th_task_state = task_state;
+    }
+#endif
+    KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
+
+    return retval;
+}
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid global thread id.
+@param lck pointer to the unique lock data structure
+
+Finish the execution of a reduce nowait.
+*/
+void
+__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
+
+    PACKED_REDUCTION_METHOD_T packed_reduction_method;
+
+    KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
+
+    packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
+
+    if( packed_reduction_method == critical_reduce_block ) {
+
+        __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
+
+    } else if( packed_reduction_method == empty_reduce_block ) {
+
+        // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
+
+    } else if( packed_reduction_method == atomic_reduce_block ) {
+
+        // neither master nor other workers should get here
+        //     (code gen does not generate this call in case 2: atomic reduce block)
+        // actually it's better to remove this elseif at all;
+        // after removal this value will checked by the 'else' and will assert
+
+    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
+
+        // only master gets here
+
+    } else {
+
+        // should never reach this block
+        KMP_ASSERT( 0 ); // "unexpected method"
+
+    }
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_sync( global_tid, ct_reduce, loc );
+
+    KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
+
+    return;
+}
+
+/* 2.a.ii. Reduce Block with a terminating barrier */
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid global thread number
+@param num_vars number of items (variables) to be reduced
+@param reduce_size size of data in bytes to be reduced
+@param reduce_data pointer to data to be reduced
+@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
+@param lck pointer to the unique lock data structure
+@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
+
+A blocking reduce that includes an implicit barrier.
+*/
+kmp_int32
+__kmpc_reduce(
+    ident_t *loc, kmp_int32 global_tid,
+    kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+    void (*reduce_func)(void *lhs_data, void *rhs_data),
+    kmp_critical_name *lck )
+{
+    KMP_COUNT_BLOCK(REDUCE_wait);
+    int retval = 0;
+    PACKED_REDUCTION_METHOD_T packed_reduction_method;
+
+    KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
+
+    // why do we need this initialization here at all?
+    // Reduction clause can not be a stand-alone directive.
+
+    // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
+    // possible detection of false-positive race by the threadchecker ???
+    if( ! TCR_4( __kmp_init_parallel ) )
+        __kmp_parallel_initialize();
+
+    // check correctness of reduce block nesting
+#if KMP_USE_DYNAMIC_LOCK
+    if ( __kmp_env_consistency_check )
+        __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
+#else
+    if ( __kmp_env_consistency_check )
+        __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
+#endif
+
+    packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
+    __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
+
+    if( packed_reduction_method == critical_reduce_block ) {
+
+        __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
+        retval = 1;
+
+    } else if( packed_reduction_method == empty_reduce_block ) {
+
+        // usage: if team size == 1, no synchronization is required ( Intel platforms only )
+        retval = 1;
+
+    } else if( packed_reduction_method == atomic_reduce_block ) {
+
+        retval = 2;
+
+    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
+
+        //case tree_reduce_block:
+        // this barrier should be visible to a customer and to the threading profile tool
+        //              (it's a terminating barrier on constructs if NOWAIT not specified)
+#if USE_ITT_NOTIFY
+        __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
+#endif
+        retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
+        retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
+
+        // all other workers except master should do this pop here
+        //     ( none of other workers except master will enter __kmpc_end_reduce() )
+        if ( __kmp_env_consistency_check ) {
+            if( retval == 0 ) { // 0: all other workers; 1: master
+                __kmp_pop_sync( global_tid, ct_reduce, loc );
+            }
+        }
+
+    } else {
+
+        // should never reach this block
+        KMP_ASSERT( 0 ); // "unexpected method"
+
+    }
+
+    KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
+
+    return retval;
+}
+
+/*!
+@ingroup SYNCHRONIZATION
+@param loc source location information
+@param global_tid global thread id.
+@param lck pointer to the unique lock data structure
+
+Finish the execution of a blocking reduce.
+The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
+*/
+void
+__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
+
+    PACKED_REDUCTION_METHOD_T packed_reduction_method;
+
+    KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
+
+    packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
+
+    // this barrier should be visible to a customer and to the threading profile tool
+    //              (it's a terminating barrier on constructs if NOWAIT not specified)
+
+    if( packed_reduction_method == critical_reduce_block ) {
+
+        __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
+
+        // TODO: implicit barrier: should be exposed
+#if USE_ITT_NOTIFY
+        __kmp_threads[global_tid]->th.th_ident = loc;
+#endif
+        __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
+
+    } else if( packed_reduction_method == empty_reduce_block ) {
+
+        // usage: if team size == 1, no synchronization is required ( Intel platforms only )
+
+        // TODO: implicit barrier: should be exposed
+#if USE_ITT_NOTIFY
+        __kmp_threads[global_tid]->th.th_ident = loc;
+#endif
+        __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
+
+    } else if( packed_reduction_method == atomic_reduce_block ) {
+
+        // TODO: implicit barrier: should be exposed
+#if USE_ITT_NOTIFY
+        __kmp_threads[global_tid]->th.th_ident = loc;
+#endif
+        __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
+
+    } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
+
+        // only master executes here (master releases all other workers)
+        __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
+
+    } else {
+
+        // should never reach this block
+        KMP_ASSERT( 0 ); // "unexpected method"
+
+    }
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_sync( global_tid, ct_reduce, loc );
+
+    KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
+
+    return;
+}
+
+#undef __KMP_GET_REDUCTION_METHOD
+#undef __KMP_SET_REDUCTION_METHOD
+
+/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
+
+kmp_uint64
+__kmpc_get_taskid() {
+
+    kmp_int32    gtid;
+    kmp_info_t * thread;
+
+    gtid = __kmp_get_gtid();
+    if ( gtid < 0 ) {
+        return 0;
+    }; // if
+    thread = __kmp_thread_from_gtid( gtid );
+    return thread->th.th_current_task->td_task_id;
+
+} // __kmpc_get_taskid
+
+
+kmp_uint64
+__kmpc_get_parent_taskid() {
+
+    kmp_int32        gtid;
+    kmp_info_t *     thread;
+    kmp_taskdata_t * parent_task;
+
+    gtid = __kmp_get_gtid();
+    if ( gtid < 0 ) {
+        return 0;
+    }; // if
+    thread      = __kmp_thread_from_gtid( gtid );
+    parent_task = thread->th.th_current_task->td_parent;
+    return ( parent_task == NULL ? 0 : parent_task->td_task_id );
+
+} // __kmpc_get_parent_taskid
+
+void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
+{
+    if ( ! __kmp_init_serial ) {
+        __kmp_serial_initialize();
+    }
+    __kmp_place_num_sockets = nS;
+    __kmp_place_socket_offset = sO;
+    __kmp_place_num_cores = nC;
+    __kmp_place_core_offset = cO;
+    __kmp_place_num_threads_per_core = nT;
+}
+
+// end of file //
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_debug.c b/contrib/libs/cxxsupp/openmp/kmp_debug.c
index c3d96cf013..3bbffa53f2 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_debug.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_debug.c
@@ -1,142 +1,142 @@
-/* 
- * kmp_debug.c -- debug utilities for the Guide library 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_debug.h" /* really necessary? */ 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
- 
-#ifdef KMP_DEBUG 
-void 
-__kmp_debug_printf_stdout( char const * format, ... ) 
-{ 
-    va_list ap; 
-    va_start( ap, format ); 
- 
-    __kmp_vprintf( kmp_out, format, ap ); 
- 
-    va_end(ap); 
-} 
-#endif 
- 
-void 
-__kmp_debug_printf( char const * format, ... ) 
-{ 
-    va_list ap; 
-    va_start( ap, format ); 
- 
-    __kmp_vprintf( kmp_err, format, ap ); 
- 
-    va_end( ap ); 
-} 
- 
-#ifdef KMP_USE_ASSERT 
-    int 
-    __kmp_debug_assert( 
-        char const *  msg, 
-        char const *  file, 
-        int           line 
-    ) { 
- 
-        if ( file == NULL ) { 
-            file = KMP_I18N_STR( UnknownFile ); 
-        } else { 
-            // Remove directories from path, leave only file name. File name is enough, there is no need 
-            // in bothering developers and customers with full paths. 
-            char const * slash = strrchr( file, '/' ); 
-            if ( slash != NULL ) { 
-                file = slash + 1; 
-            }; // if 
-        }; // if 
- 
-        #ifdef KMP_DEBUG 
-            __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 
-            __kmp_debug_printf( "Assertion failure at %s(%d): %s.\n", file, line, msg ); 
-            __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 
-            #ifdef USE_ASSERT_BREAK 
-                #if KMP_OS_WINDOWS 
-                    DebugBreak(); 
-                #endif 
-            #endif // USE_ASSERT_BREAK 
-            #ifdef USE_ASSERT_STALL 
-                /*    __kmp_infinite_loop(); */ 
-                for(;;); 
-            #endif // USE_ASSERT_STALL 
-            #ifdef USE_ASSERT_SEG 
-                { 
-                    int volatile * ZERO = (int*) 0; 
-                    ++ (*ZERO); 
-                } 
-            #endif // USE_ASSERT_SEG 
-        #endif 
- 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( AssertionFailure, file, line ), 
-            KMP_HNT( SubmitBugReport ), 
-            __kmp_msg_null 
-        ); 
- 
-        return 0; 
- 
-    } // __kmp_debug_assert 
- 
-#endif // KMP_USE_ASSERT 
- 
-/* Dump debugging buffer to stderr */ 
-void 
-__kmp_dump_debug_buffer( void ) 
-{ 
-    if ( __kmp_debug_buffer != NULL ) { 
-        int i; 
-        int dc = __kmp_debug_count; 
-        char *db = & __kmp_debug_buffer[ (dc % __kmp_debug_buf_lines) * __kmp_debug_buf_chars ]; 
-        char *db_end = & __kmp_debug_buffer[ __kmp_debug_buf_lines * __kmp_debug_buf_chars ]; 
-        char *db2; 
- 
-        __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 
-        __kmp_printf_no_lock( "\nStart dump of debugging buffer (entry=%d):\n", 
-                 dc % __kmp_debug_buf_lines ); 
- 
-        for ( i = 0; i < __kmp_debug_buf_lines; i++ ) { 
- 
-            if ( *db != '\0' ) { 
-                /* Fix up where no carriage return before string termination char */ 
-                for ( db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2 ++) { 
-                    if ( *db2 == '\0' ) { 
-                        if ( *(db2-1) != '\n' ) { *db2 = '\n'; *(db2+1) = '\0'; } 
-                        break; 
-                    } 
-                } 
-                /* Handle case at end by shortening the printed message by one char if necessary */ 
-                if ( db2 == db + __kmp_debug_buf_chars - 1 && 
-                     *db2 == '\0' && *(db2-1) != '\n' ) { 
-                    *(db2-1) = '\n'; 
-                } 
- 
-                __kmp_printf_no_lock( "%4d: %.*s", i, __kmp_debug_buf_chars, db ); 
-                *db = '\0'; /* only let it print once! */ 
-            } 
- 
-            db += __kmp_debug_buf_chars; 
-            if ( db >= db_end ) 
-                db = __kmp_debug_buffer; 
-        } 
- 
-        __kmp_printf_no_lock( "End dump of debugging buffer (entry=%d).\n\n", 
-                 ( dc+i-1 ) % __kmp_debug_buf_lines ); 
-        __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 
-    } 
-} 
+/*
+ * kmp_debug.c -- debug utilities for the Guide library
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_debug.h" /* really necessary? */
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+
+#ifdef KMP_DEBUG
+void
+__kmp_debug_printf_stdout( char const * format, ... )
+{
+    va_list ap;
+    va_start( ap, format );
+
+    __kmp_vprintf( kmp_out, format, ap );
+
+    va_end(ap);
+}
+#endif
+
+void
+__kmp_debug_printf( char const * format, ... )
+{
+    va_list ap;
+    va_start( ap, format );
+
+    __kmp_vprintf( kmp_err, format, ap );
+
+    va_end( ap );
+}
+
+#ifdef KMP_USE_ASSERT
+    int
+    __kmp_debug_assert(
+        char const *  msg,
+        char const *  file,
+        int           line
+    ) {
+
+        if ( file == NULL ) {
+            file = KMP_I18N_STR( UnknownFile );
+        } else {
+            // Remove directories from path, leave only file name. File name is enough, there is no need
+            // in bothering developers and customers with full paths.
+            char const * slash = strrchr( file, '/' );
+            if ( slash != NULL ) {
+                file = slash + 1;
+            }; // if
+        }; // if
+
+        #ifdef KMP_DEBUG
+            __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
+            __kmp_debug_printf( "Assertion failure at %s(%d): %s.\n", file, line, msg );
+            __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
+            #ifdef USE_ASSERT_BREAK
+                #if KMP_OS_WINDOWS
+                    DebugBreak();
+                #endif
+            #endif // USE_ASSERT_BREAK
+            #ifdef USE_ASSERT_STALL
+                /*    __kmp_infinite_loop(); */
+                for(;;);
+            #endif // USE_ASSERT_STALL
+            #ifdef USE_ASSERT_SEG
+                {
+                    int volatile * ZERO = (int*) 0;
+                    ++ (*ZERO);
+                }
+            #endif // USE_ASSERT_SEG
+        #endif
+
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( AssertionFailure, file, line ),
+            KMP_HNT( SubmitBugReport ),
+            __kmp_msg_null
+        );
+
+        return 0;
+
+    } // __kmp_debug_assert
+
+#endif // KMP_USE_ASSERT
+
+/* Dump debugging buffer to stderr */
+void
+__kmp_dump_debug_buffer( void )
+{
+    if ( __kmp_debug_buffer != NULL ) {
+        int i;
+        int dc = __kmp_debug_count;
+        char *db = & __kmp_debug_buffer[ (dc % __kmp_debug_buf_lines) * __kmp_debug_buf_chars ];
+        char *db_end = & __kmp_debug_buffer[ __kmp_debug_buf_lines * __kmp_debug_buf_chars ];
+        char *db2;
+
+        __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
+        __kmp_printf_no_lock( "\nStart dump of debugging buffer (entry=%d):\n",
+                 dc % __kmp_debug_buf_lines );
+
+        for ( i = 0; i < __kmp_debug_buf_lines; i++ ) {
+
+            if ( *db != '\0' ) {
+                /* Fix up where no carriage return before string termination char */
+                for ( db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2 ++) {
+                    if ( *db2 == '\0' ) {
+                        if ( *(db2-1) != '\n' ) { *db2 = '\n'; *(db2+1) = '\0'; }
+                        break;
+                    }
+                }
+                /* Handle case at end by shortening the printed message by one char if necessary */
+                if ( db2 == db + __kmp_debug_buf_chars - 1 &&
+                     *db2 == '\0' && *(db2-1) != '\n' ) {
+                    *(db2-1) = '\n';
+                }
+
+                __kmp_printf_no_lock( "%4d: %.*s", i, __kmp_debug_buf_chars, db );
+                *db = '\0'; /* only let it print once! */
+            }
+
+            db += __kmp_debug_buf_chars;
+            if ( db >= db_end )
+                db = __kmp_debug_buffer;
+        }
+
+        __kmp_printf_no_lock( "End dump of debugging buffer (entry=%d).\n\n",
+                 ( dc+i-1 ) % __kmp_debug_buf_lines );
+        __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
+    }
+}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_debug.h b/contrib/libs/cxxsupp/openmp/kmp_debug.h
index 912d252b15..abc923edc6 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_debug.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_debug.h
@@ -1,131 +1,131 @@
-/* 
- * kmp_debug.h -- debug / assertion code for Assure library 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_DEBUG_H 
-#define KMP_DEBUG_H 
- 
-#include <stdarg.h> 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Build-time assertion. 
-// ------------------------------------------------------------------------------------------------- 
- 
-/* 
-    Build-time assertion can do compile-time checking of data structure sizes, etc. This works by 
-    declaring a negative-length array if the conditional expression evaluates to false.  In that 
-    case, the compiler issues a syntax error and stops the compilation. If the expression is 
-    true, we get an extraneous static single character array in the scope of the macro. 
- 
-    Usage: 
- 
-        KMP_BUILD_ASSERT( sizeof( some_t ) <= 32 ); 
-        KMP_BUILD_ASSERT( offsetof( some_t, field ) % 8 == 0 ); 
- 
-    Do not use _KMP_BUILD_ASSERT and __KMP_BUILD_ASSERT directly, it is working guts. 
-*/ 
- 
-#define __KMP_BUILD_ASSERT( expr, suffix )  typedef char __kmp_build_check_##suffix[ (expr) ? 1 : -1 ] 
-#define _KMP_BUILD_ASSERT( expr, suffix )   __KMP_BUILD_ASSERT( (expr), suffix ) 
-#ifdef KMP_USE_ASSERT  
-    #define KMP_BUILD_ASSERT( expr )            _KMP_BUILD_ASSERT( (expr), __LINE__ ) 
-#else 
-    #define KMP_BUILD_ASSERT( expr )            /* nothing to do */ 
-#endif 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Run-time assertions. 
-// ------------------------------------------------------------------------------------------------- 
- 
-extern void __kmp_dump_debug_buffer( void ); 
- 
+/*
+ * kmp_debug.h -- debug / assertion code for Assure library
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_DEBUG_H
+#define KMP_DEBUG_H
+
+#include <stdarg.h>
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+// -------------------------------------------------------------------------------------------------
+// Build-time assertion.
+// -------------------------------------------------------------------------------------------------
+
+/*
+    Build-time assertion can do compile-time checking of data structure sizes, etc. This works by
+    declaring a negative-length array if the conditional expression evaluates to false.  In that
+    case, the compiler issues a syntax error and stops the compilation. If the expression is
+    true, we get an extraneous static single character array in the scope of the macro.
+
+    Usage:
+
+        KMP_BUILD_ASSERT( sizeof( some_t ) <= 32 );
+        KMP_BUILD_ASSERT( offsetof( some_t, field ) % 8 == 0 );
+
+    Do not use _KMP_BUILD_ASSERT and __KMP_BUILD_ASSERT directly, it is working guts.
+*/
+
+#define __KMP_BUILD_ASSERT( expr, suffix )  typedef char __kmp_build_check_##suffix[ (expr) ? 1 : -1 ]
+#define _KMP_BUILD_ASSERT( expr, suffix )   __KMP_BUILD_ASSERT( (expr), suffix )
 #ifdef KMP_USE_ASSERT 
-    extern int __kmp_debug_assert( char const * expr, char const * file, int line ); 
-    #ifdef KMP_DEBUG 
-        #define KMP_ASSERT( cond )             ( (cond) ? 0 : __kmp_debug_assert( #cond, __FILE__, __LINE__ ) ) 
-        #define KMP_ASSERT2( cond, msg )       ( (cond) ? 0 : __kmp_debug_assert( (msg), __FILE__, __LINE__ ) ) 
-        #define KMP_DEBUG_ASSERT( cond )       KMP_ASSERT( cond ) 
-        #define KMP_DEBUG_ASSERT2( cond, msg ) KMP_ASSERT2( cond, msg ) 
-    #else 
-        // Do not expose condition in release build. Use "assertion failure". 
-        #define KMP_ASSERT( cond )             ( (cond) ? 0 : __kmp_debug_assert( "assertion failure", __FILE__, __LINE__ ) ) 
-        #define KMP_ASSERT2( cond, msg )       KMP_ASSERT( cond ) 
-        #define KMP_DEBUG_ASSERT( cond )       0 
-        #define KMP_DEBUG_ASSERT2( cond, msg ) 0 
-    #endif // KMP_DEBUG 
-#else 
-    #define KMP_ASSERT( cond )             0 
-    #define KMP_ASSERT2( cond, msg )       0 
-    #define KMP_DEBUG_ASSERT( cond )       0 
-    #define KMP_DEBUG_ASSERT2( cond, msg ) 0 
-#endif // KMP_USE_ASSERT 
- 
-#ifdef KMP_DEBUG 
-    extern void __kmp_debug_printf_stdout( char const * format, ... ); 
-#endif 
-extern void __kmp_debug_printf( char const * format, ... ); 
- 
-#ifdef KMP_DEBUG 
- 
-    extern int kmp_a_debug; 
-    extern int kmp_b_debug; 
-    extern int kmp_c_debug; 
-    extern int kmp_d_debug; 
-    extern int kmp_e_debug; 
-    extern int kmp_f_debug; 
-    extern int kmp_diag; 
- 
-    #define KA_TRACE(d,x)     if (kmp_a_debug >= d) { __kmp_debug_printf x ; } 
-    #define KB_TRACE(d,x)     if (kmp_b_debug >= d) { __kmp_debug_printf x ; } 
-    #define KC_TRACE(d,x)     if (kmp_c_debug >= d) { __kmp_debug_printf x ; } 
-    #define KD_TRACE(d,x)     if (kmp_d_debug >= d) { __kmp_debug_printf x ; } 
-    #define KE_TRACE(d,x)     if (kmp_e_debug >= d) { __kmp_debug_printf x ; } 
-    #define KF_TRACE(d,x)     if (kmp_f_debug >= d) { __kmp_debug_printf x ; } 
-    #define K_DIAG(d,x)       {if (kmp_diag == d) { __kmp_debug_printf_stdout x ; } } 
- 
-    #define KA_DUMP(d,x)     if (kmp_a_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } 
-    #define KB_DUMP(d,x)     if (kmp_b_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } 
-    #define KC_DUMP(d,x)     if (kmp_c_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } 
-    #define KD_DUMP(d,x)     if (kmp_d_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } 
-    #define KE_DUMP(d,x)     if (kmp_e_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } 
-    #define KF_DUMP(d,x)     if (kmp_f_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } 
- 
-#else 
- 
-    #define KA_TRACE(d,x)     /* nothing to do */ 
-    #define KB_TRACE(d,x)     /* nothing to do */ 
-    #define KC_TRACE(d,x)     /* nothing to do */ 
-    #define KD_TRACE(d,x)     /* nothing to do */ 
-    #define KE_TRACE(d,x)     /* nothing to do */ 
-    #define KF_TRACE(d,x)     /* nothing to do */ 
-    #define K_DIAG(d,x)       {}/* nothing to do */ 
- 
-    #define KA_DUMP(d,x)     /* nothing to do */ 
-    #define KB_DUMP(d,x)     /* nothing to do */ 
-    #define KC_DUMP(d,x)     /* nothing to do */ 
-    #define KD_DUMP(d,x)     /* nothing to do */ 
-    #define KE_DUMP(d,x)     /* nothing to do */ 
-    #define KF_DUMP(d,x)     /* nothing to do */ 
- 
-#endif // KMP_DEBUG 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif // __cplusplus 
- 
-#endif /* KMP_DEBUG_H */ 
+    #define KMP_BUILD_ASSERT( expr )            _KMP_BUILD_ASSERT( (expr), __LINE__ )
+#else
+    #define KMP_BUILD_ASSERT( expr )            /* nothing to do */
+#endif
+
+// -------------------------------------------------------------------------------------------------
+// Run-time assertions.
+// -------------------------------------------------------------------------------------------------
+
+extern void __kmp_dump_debug_buffer( void );
+
+#ifdef KMP_USE_ASSERT
+    extern int __kmp_debug_assert( char const * expr, char const * file, int line );
+    #ifdef KMP_DEBUG
+        #define KMP_ASSERT( cond )             ( (cond) ? 0 : __kmp_debug_assert( #cond, __FILE__, __LINE__ ) )
+        #define KMP_ASSERT2( cond, msg )       ( (cond) ? 0 : __kmp_debug_assert( (msg), __FILE__, __LINE__ ) )
+        #define KMP_DEBUG_ASSERT( cond )       KMP_ASSERT( cond )
+        #define KMP_DEBUG_ASSERT2( cond, msg ) KMP_ASSERT2( cond, msg )
+    #else
+        // Do not expose condition in release build. Use "assertion failure".
+        #define KMP_ASSERT( cond )             ( (cond) ? 0 : __kmp_debug_assert( "assertion failure", __FILE__, __LINE__ ) )
+        #define KMP_ASSERT2( cond, msg )       KMP_ASSERT( cond )
+        #define KMP_DEBUG_ASSERT( cond )       0
+        #define KMP_DEBUG_ASSERT2( cond, msg ) 0
+    #endif // KMP_DEBUG
+#else
+    #define KMP_ASSERT( cond )             0
+    #define KMP_ASSERT2( cond, msg )       0
+    #define KMP_DEBUG_ASSERT( cond )       0
+    #define KMP_DEBUG_ASSERT2( cond, msg ) 0
+#endif // KMP_USE_ASSERT
+
+#ifdef KMP_DEBUG
+    extern void __kmp_debug_printf_stdout( char const * format, ... );
+#endif
+extern void __kmp_debug_printf( char const * format, ... );
+
+#ifdef KMP_DEBUG
+
+    extern int kmp_a_debug;
+    extern int kmp_b_debug;
+    extern int kmp_c_debug;
+    extern int kmp_d_debug;
+    extern int kmp_e_debug;
+    extern int kmp_f_debug;
+    extern int kmp_diag;
+
+    #define KA_TRACE(d,x)     if (kmp_a_debug >= d) { __kmp_debug_printf x ; }
+    #define KB_TRACE(d,x)     if (kmp_b_debug >= d) { __kmp_debug_printf x ; }
+    #define KC_TRACE(d,x)     if (kmp_c_debug >= d) { __kmp_debug_printf x ; }
+    #define KD_TRACE(d,x)     if (kmp_d_debug >= d) { __kmp_debug_printf x ; }
+    #define KE_TRACE(d,x)     if (kmp_e_debug >= d) { __kmp_debug_printf x ; }
+    #define KF_TRACE(d,x)     if (kmp_f_debug >= d) { __kmp_debug_printf x ; }
+    #define K_DIAG(d,x)       {if (kmp_diag == d) { __kmp_debug_printf_stdout x ; } }
+
+    #define KA_DUMP(d,x)     if (kmp_a_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); }
+    #define KB_DUMP(d,x)     if (kmp_b_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); }
+    #define KC_DUMP(d,x)     if (kmp_c_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); }
+    #define KD_DUMP(d,x)     if (kmp_d_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); }
+    #define KE_DUMP(d,x)     if (kmp_e_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); }
+    #define KF_DUMP(d,x)     if (kmp_f_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); }
+
+#else
+
+    #define KA_TRACE(d,x)     /* nothing to do */
+    #define KB_TRACE(d,x)     /* nothing to do */
+    #define KC_TRACE(d,x)     /* nothing to do */
+    #define KD_TRACE(d,x)     /* nothing to do */
+    #define KE_TRACE(d,x)     /* nothing to do */
+    #define KF_TRACE(d,x)     /* nothing to do */
+    #define K_DIAG(d,x)       {}/* nothing to do */
+
+    #define KA_DUMP(d,x)     /* nothing to do */
+    #define KB_DUMP(d,x)     /* nothing to do */
+    #define KC_DUMP(d,x)     /* nothing to do */
+    #define KD_DUMP(d,x)     /* nothing to do */
+    #define KE_DUMP(d,x)     /* nothing to do */
+    #define KF_DUMP(d,x)     /* nothing to do */
+
+#endif // KMP_DEBUG
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+#endif /* KMP_DEBUG_H */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_debugger.c b/contrib/libs/cxxsupp/openmp/kmp_debugger.c
index c4180fd554..b3c1acb49b 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_debugger.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_debugger.c
@@ -1,314 +1,314 @@
-#if USE_DEBUGGER 
-/* 
- * kmp_debugger.c -- debugger support. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_lock.h" 
-#include "kmp_omp.h" 
-#include "kmp_str.h" 
- 
-/* 
-    NOTE: All variable names are known to the debugger, do not change! 
-*/ 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-        extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info; 
-    } // extern "C" 
-#endif // __cplusplus 
- 
-int __kmp_debugging          = FALSE;    // Boolean whether currently debugging OpenMP RTL. 
- 
-#define offset_and_size_of( structure, field )     \ 
-    {                                              \ 
-        offsetof( structure,           field ),    \ 
-        sizeof( ( (structure *) NULL)->field )     \ 
-    } 
- 
-#define offset_and_size_not_available \ 
-    { -1, -1 } 
- 
-#define addr_and_size_of( var )                    \ 
-    {                                              \ 
-        (kmp_uint64)( & var ),                     \ 
-        sizeof( var )                              \ 
-    } 
- 
-#define nthr_buffer_size 1024 
-static kmp_int32 
-kmp_omp_nthr_info_buffer[ nthr_buffer_size ] = 
-    { nthr_buffer_size * sizeof( kmp_int32 ) }; 
- 
-/* TODO: Check punctuation for various platforms here */ 
-static char func_microtask[]    = "__kmp_invoke_microtask"; 
-static char func_fork[]         = "__kmpc_fork_call"; 
-static char func_fork_teams[]   = "__kmpc_fork_teams"; 
- 
- 
-// Various info about runtime structures: addresses, field offsets, sizes, etc. 
-kmp_omp_struct_info_t 
-__kmp_omp_debug_struct_info = { 
- 
-    /* Change this only if you make a fundamental data structure change here */ 
-    KMP_OMP_VERSION, 
- 
-    /* sanity check.  Only should be checked if versions are identical 
-     * This is also used for backward compatibility to get the runtime 
-     * structure size if it the runtime is older than the interface */ 
-    sizeof( kmp_omp_struct_info_t ), 
- 
-    /* OpenMP RTL version info. */ 
-    addr_and_size_of( __kmp_version_major ), 
-    addr_and_size_of( __kmp_version_minor ), 
-    addr_and_size_of( __kmp_version_build ), 
-    addr_and_size_of( __kmp_openmp_version ), 
-    { (kmp_uint64)( __kmp_copyright ) + KMP_VERSION_MAGIC_LEN, 0 },        // Skip magic prefix. 
- 
-    /* Various globals. */ 
-    addr_and_size_of( __kmp_threads ), 
-    addr_and_size_of( __kmp_root ), 
-    addr_and_size_of( __kmp_threads_capacity ), 
-    addr_and_size_of( __kmp_monitor ), 
-#if ! KMP_USE_DYNAMIC_LOCK 
-    addr_and_size_of( __kmp_user_lock_table ), 
-#endif 
-    addr_and_size_of( func_microtask ), 
-    addr_and_size_of( func_fork ), 
-    addr_and_size_of( func_fork_teams ), 
-    addr_and_size_of( __kmp_team_counter ), 
-    addr_and_size_of( __kmp_task_counter ), 
-    addr_and_size_of( kmp_omp_nthr_info_buffer ), 
-    sizeof( void * ), 
-    OMP_LOCK_T_SIZE < sizeof(void *), 
-    bs_last_barrier, 
-    TASK_DEQUE_SIZE, 
- 
-    // thread structure information 
-    sizeof( kmp_base_info_t ), 
-    offset_and_size_of( kmp_base_info_t, th_info ), 
-    offset_and_size_of( kmp_base_info_t, th_team ), 
-    offset_and_size_of( kmp_base_info_t, th_root ), 
-    offset_and_size_of( kmp_base_info_t, th_serial_team ), 
-    offset_and_size_of( kmp_base_info_t, th_ident ), 
-    offset_and_size_of( kmp_base_info_t, th_spin_here    ), 
-    offset_and_size_of( kmp_base_info_t, th_next_waiting ), 
-    offset_and_size_of( kmp_base_info_t, th_task_team    ), 
-    offset_and_size_of( kmp_base_info_t, th_current_task ), 
-    offset_and_size_of( kmp_base_info_t, th_task_state   ), 
-    offset_and_size_of( kmp_base_info_t,   th_bar ), 
-    offset_and_size_of( kmp_bstate_t,      b_worker_arrived ), 
- 
-#if OMP_40_ENABLED 
-    // teams information 
-    offset_and_size_of( kmp_base_info_t, th_teams_microtask), 
-    offset_and_size_of( kmp_base_info_t, th_teams_level), 
-    offset_and_size_of( kmp_teams_size_t, nteams ), 
-    offset_and_size_of( kmp_teams_size_t, nth ), 
-#endif 
- 
-    // kmp_desc structure (for info field above) 
-    sizeof( kmp_desc_base_t ), 
-    offset_and_size_of( kmp_desc_base_t, ds_tid    ), 
-    offset_and_size_of( kmp_desc_base_t, ds_gtid   ), 
-    // On Windows* OS, ds_thread contains a thread /handle/, which is not usable, while thread /id/ 
-    // is in ds_thread_id. 
-    #if KMP_OS_WINDOWS 
-    offset_and_size_of( kmp_desc_base_t, ds_thread_id), 
-    #else 
-    offset_and_size_of( kmp_desc_base_t, ds_thread), 
-    #endif 
- 
-    // team structure information 
-    sizeof( kmp_base_team_t ), 
-    offset_and_size_of( kmp_base_team_t,   t_master_tid ), 
-    offset_and_size_of( kmp_base_team_t,   t_ident      ), 
-    offset_and_size_of( kmp_base_team_t,   t_parent     ), 
-    offset_and_size_of( kmp_base_team_t,   t_nproc      ), 
-    offset_and_size_of( kmp_base_team_t,   t_threads    ), 
-    offset_and_size_of( kmp_base_team_t,   t_serialized ), 
-    offset_and_size_of( kmp_base_team_t,   t_id         ), 
-    offset_and_size_of( kmp_base_team_t,   t_pkfn       ), 
-    offset_and_size_of( kmp_base_team_t,   t_task_team ), 
-    offset_and_size_of( kmp_base_team_t,   t_implicit_task_taskdata ), 
-#if OMP_40_ENABLED 
-    offset_and_size_of( kmp_base_team_t,   t_cancel_request ), 
-#endif 
-    offset_and_size_of( kmp_base_team_t,   t_bar ), 
-    offset_and_size_of( kmp_balign_team_t, b_master_arrived ), 
-    offset_and_size_of( kmp_balign_team_t, b_team_arrived ), 
- 
-    // root structure information 
-    sizeof( kmp_base_root_t ), 
-    offset_and_size_of( kmp_base_root_t, r_root_team   ), 
-    offset_and_size_of( kmp_base_root_t, r_hot_team    ), 
-    offset_and_size_of( kmp_base_root_t, r_uber_thread ), 
-    offset_and_size_not_available, 
- 
-    // ident structure information 
-    sizeof( ident_t ), 
-    offset_and_size_of( ident_t, psource ), 
-    offset_and_size_of( ident_t, flags   ), 
- 
-    // lock structure information 
-    sizeof( kmp_base_queuing_lock_t ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, initialized  ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, location ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, tail_id  ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, head_id  ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, next_ticket  ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, now_serving  ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, owner_id     ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, depth_locked ), 
-    offset_and_size_of( kmp_base_queuing_lock_t, flags ), 
- 
-#if ! KMP_USE_DYNAMIC_LOCK 
-    /* Lock table. */ 
-    sizeof( kmp_lock_table_t ), 
-    offset_and_size_of( kmp_lock_table_t, used       ), 
-    offset_and_size_of( kmp_lock_table_t, allocated  ), 
-    offset_and_size_of( kmp_lock_table_t, table      ), 
-#endif 
- 
-    // Task team structure information. 
-    sizeof( kmp_base_task_team_t ), 
-    offset_and_size_of( kmp_base_task_team_t, tt_threads_data       ), 
-    offset_and_size_of( kmp_base_task_team_t, tt_found_tasks        ), 
-    offset_and_size_of( kmp_base_task_team_t, tt_nproc              ), 
-    offset_and_size_of( kmp_base_task_team_t, tt_unfinished_threads ), 
-    offset_and_size_of( kmp_base_task_team_t, tt_active             ), 
- 
-    // task_data_t. 
-    sizeof( kmp_taskdata_t ), 
-    offset_and_size_of( kmp_taskdata_t, td_task_id                ), 
-    offset_and_size_of( kmp_taskdata_t, td_flags                  ), 
-    offset_and_size_of( kmp_taskdata_t, td_team                   ), 
-    offset_and_size_of( kmp_taskdata_t, td_parent                 ), 
-    offset_and_size_of( kmp_taskdata_t, td_level                  ), 
-    offset_and_size_of( kmp_taskdata_t, td_ident                  ), 
-    offset_and_size_of( kmp_taskdata_t, td_allocated_child_tasks  ), 
-    offset_and_size_of( kmp_taskdata_t, td_incomplete_child_tasks ), 
- 
-    offset_and_size_of( kmp_taskdata_t, td_taskwait_ident   ), 
-    offset_and_size_of( kmp_taskdata_t, td_taskwait_counter ), 
-    offset_and_size_of( kmp_taskdata_t, td_taskwait_thread  ), 
- 
-#if OMP_40_ENABLED 
-    offset_and_size_of( kmp_taskdata_t, td_taskgroup        ), 
-    offset_and_size_of( kmp_taskgroup_t, count              ), 
-    offset_and_size_of( kmp_taskgroup_t, cancel_request     ), 
- 
-    offset_and_size_of( kmp_taskdata_t, td_depnode          ), 
-    offset_and_size_of( kmp_depnode_list_t, node            ), 
-    offset_and_size_of( kmp_depnode_list_t, next            ), 
-    offset_and_size_of( kmp_base_depnode_t, successors      ), 
-    offset_and_size_of( kmp_base_depnode_t, task            ), 
-    offset_and_size_of( kmp_base_depnode_t, npredecessors   ), 
-    offset_and_size_of( kmp_base_depnode_t, nrefs           ), 
-#endif 
-    offset_and_size_of( kmp_task_t, routine                 ), 
- 
-    // thread_data_t. 
-    sizeof( kmp_thread_data_t ), 
-    offset_and_size_of( kmp_base_thread_data_t, td_deque             ), 
-    offset_and_size_of( kmp_base_thread_data_t, td_deque_head        ), 
-    offset_and_size_of( kmp_base_thread_data_t, td_deque_tail        ), 
-    offset_and_size_of( kmp_base_thread_data_t, td_deque_ntasks      ), 
-    offset_and_size_of( kmp_base_thread_data_t, td_deque_last_stolen ), 
- 
-    // The last field. 
-    KMP_OMP_VERSION, 
- 
-}; // __kmp_omp_debug_struct_info 
- 
-#undef offset_and_size_of 
-#undef addr_and_size_of 
- 
-/* 
-  Intel compiler on IA-32 architecture issues a warning "conversion 
-  from "unsigned long long" to "char *" may lose significant bits" 
-  when 64-bit value is assigned to 32-bit pointer. Use this function 
-  to suppress the warning. 
-*/ 
-static inline 
-void * 
-__kmp_convert_to_ptr( 
-    kmp_uint64    addr 
-) { 
-    #if KMP_COMPILER_ICC 
-        #pragma warning( push ) 
-        #pragma warning( disable:  810 ) // conversion from "unsigned long long" to "char *" may lose significant bits 
-        #pragma warning( disable: 1195 ) // conversion from integer to smaller pointer 
-    #endif // KMP_COMPILER_ICC 
-    return (void *) addr; 
-    #if KMP_COMPILER_ICC 
-        #pragma warning( pop ) 
-    #endif // KMP_COMPILER_ICC 
-} // __kmp_convert_to_ptr 
- 
- 
-static int 
-kmp_location_match( 
-    kmp_str_loc_t *        loc, 
-    kmp_omp_nthr_item_t *  item 
-) { 
- 
-    int file_match = 0; 
-    int func_match = 0; 
-    int line_match = 0; 
- 
-    char * file = (char *) __kmp_convert_to_ptr( item->file ); 
-    char * func = (char *) __kmp_convert_to_ptr( item->func ); 
-    file_match = __kmp_str_fname_match( & loc->fname, file ); 
-    func_match = 
-        item->func == 0  // If item->func is NULL, it allows any func name. 
-        || 
-        strcmp( func, "*" ) == 0 
-        || 
-        ( loc->func != NULL && strcmp( loc->func, func ) == 0 ); 
-    line_match = 
-        item->begin <= loc->line 
-        && 
-        ( item->end <= 0 || loc->line <= item->end ); // if item->end <= 0, it means "end of file". 
- 
-    return ( file_match && func_match && line_match ); 
- 
-} // kmp_location_match 
- 
- 
-int 
-__kmp_omp_num_threads( 
-    ident_t const * ident 
-) { 
- 
-    int num_threads = 0; 
- 
-    kmp_omp_nthr_info_t * info = 
-        (kmp_omp_nthr_info_t *) __kmp_convert_to_ptr(  __kmp_omp_debug_struct_info.nthr_info.addr ); 
-    if ( info->num > 0 && info->array != 0 ) { 
-        kmp_omp_nthr_item_t * items = (kmp_omp_nthr_item_t *) __kmp_convert_to_ptr( info->array ); 
-        kmp_str_loc_t         loc   = __kmp_str_loc_init( ident->psource, 1 ); 
-        int i; 
-        for ( i = 0; i < info->num; ++ i ) { 
-            if ( kmp_location_match( & loc, & items[ i ] ) ) { 
-                num_threads = items[ i ].num_threads; 
-            }; // if 
-        }; // for 
-        __kmp_str_loc_free( & loc ); 
-    }; // if 
- 
-    return num_threads;; 
- 
-} // __kmp_omp_num_threads 
-#endif /* USE_DEBUGGER */ 
+#if USE_DEBUGGER
+/*
+ * kmp_debugger.c -- debugger support.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_lock.h"
+#include "kmp_omp.h"
+#include "kmp_str.h"
+
+/*
+    NOTE: All variable names are known to the debugger, do not change!
+*/
+
+#ifdef __cplusplus
+    extern "C" {
+        extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info;
+    } // extern "C"
+#endif // __cplusplus
+
+int __kmp_debugging          = FALSE;    // Boolean whether currently debugging OpenMP RTL.
+
+#define offset_and_size_of( structure, field )     \
+    {                                              \
+        offsetof( structure,           field ),    \
+        sizeof( ( (structure *) NULL)->field )     \
+    }
+
+#define offset_and_size_not_available \
+    { -1, -1 }
+
+#define addr_and_size_of( var )                    \
+    {                                              \
+        (kmp_uint64)( & var ),                     \
+        sizeof( var )                              \
+    }
+
+#define nthr_buffer_size 1024
+static kmp_int32
+kmp_omp_nthr_info_buffer[ nthr_buffer_size ] =
+    { nthr_buffer_size * sizeof( kmp_int32 ) };
+
+/* TODO: Check punctuation for various platforms here */
+static char func_microtask[]    = "__kmp_invoke_microtask";
+static char func_fork[]         = "__kmpc_fork_call";
+static char func_fork_teams[]   = "__kmpc_fork_teams";
+
+
+// Various info about runtime structures: addresses, field offsets, sizes, etc.
+kmp_omp_struct_info_t
+__kmp_omp_debug_struct_info = {
+
+    /* Change this only if you make a fundamental data structure change here */
+    KMP_OMP_VERSION,
+
+    /* sanity check.  Only should be checked if versions are identical
+     * This is also used for backward compatibility to get the runtime
+     * structure size if it the runtime is older than the interface */
+    sizeof( kmp_omp_struct_info_t ),
+
+    /* OpenMP RTL version info. */
+    addr_and_size_of( __kmp_version_major ),
+    addr_and_size_of( __kmp_version_minor ),
+    addr_and_size_of( __kmp_version_build ),
+    addr_and_size_of( __kmp_openmp_version ),
+    { (kmp_uint64)( __kmp_copyright ) + KMP_VERSION_MAGIC_LEN, 0 },        // Skip magic prefix.
+
+    /* Various globals. */
+    addr_and_size_of( __kmp_threads ),
+    addr_and_size_of( __kmp_root ),
+    addr_and_size_of( __kmp_threads_capacity ),
+    addr_and_size_of( __kmp_monitor ),
+#if ! KMP_USE_DYNAMIC_LOCK
+    addr_and_size_of( __kmp_user_lock_table ),
+#endif
+    addr_and_size_of( func_microtask ),
+    addr_and_size_of( func_fork ),
+    addr_and_size_of( func_fork_teams ),
+    addr_and_size_of( __kmp_team_counter ),
+    addr_and_size_of( __kmp_task_counter ),
+    addr_and_size_of( kmp_omp_nthr_info_buffer ),
+    sizeof( void * ),
+    OMP_LOCK_T_SIZE < sizeof(void *),
+    bs_last_barrier,
+    TASK_DEQUE_SIZE,
+
+    // thread structure information
+    sizeof( kmp_base_info_t ),
+    offset_and_size_of( kmp_base_info_t, th_info ),
+    offset_and_size_of( kmp_base_info_t, th_team ),
+    offset_and_size_of( kmp_base_info_t, th_root ),
+    offset_and_size_of( kmp_base_info_t, th_serial_team ),
+    offset_and_size_of( kmp_base_info_t, th_ident ),
+    offset_and_size_of( kmp_base_info_t, th_spin_here    ),
+    offset_and_size_of( kmp_base_info_t, th_next_waiting ),
+    offset_and_size_of( kmp_base_info_t, th_task_team    ),
+    offset_and_size_of( kmp_base_info_t, th_current_task ),
+    offset_and_size_of( kmp_base_info_t, th_task_state   ),
+    offset_and_size_of( kmp_base_info_t,   th_bar ),
+    offset_and_size_of( kmp_bstate_t,      b_worker_arrived ),
+
+#if OMP_40_ENABLED
+    // teams information
+    offset_and_size_of( kmp_base_info_t, th_teams_microtask),
+    offset_and_size_of( kmp_base_info_t, th_teams_level),
+    offset_and_size_of( kmp_teams_size_t, nteams ),
+    offset_and_size_of( kmp_teams_size_t, nth ),
+#endif
+
+    // kmp_desc structure (for info field above)
+    sizeof( kmp_desc_base_t ),
+    offset_and_size_of( kmp_desc_base_t, ds_tid    ),
+    offset_and_size_of( kmp_desc_base_t, ds_gtid   ),
+    // On Windows* OS, ds_thread contains a thread /handle/, which is not usable, while thread /id/
+    // is in ds_thread_id.
+    #if KMP_OS_WINDOWS
+    offset_and_size_of( kmp_desc_base_t, ds_thread_id),
+    #else
+    offset_and_size_of( kmp_desc_base_t, ds_thread),
+    #endif
+
+    // team structure information
+    sizeof( kmp_base_team_t ),
+    offset_and_size_of( kmp_base_team_t,   t_master_tid ),
+    offset_and_size_of( kmp_base_team_t,   t_ident      ),
+    offset_and_size_of( kmp_base_team_t,   t_parent     ),
+    offset_and_size_of( kmp_base_team_t,   t_nproc      ),
+    offset_and_size_of( kmp_base_team_t,   t_threads    ),
+    offset_and_size_of( kmp_base_team_t,   t_serialized ),
+    offset_and_size_of( kmp_base_team_t,   t_id         ),
+    offset_and_size_of( kmp_base_team_t,   t_pkfn       ),
+    offset_and_size_of( kmp_base_team_t,   t_task_team ),
+    offset_and_size_of( kmp_base_team_t,   t_implicit_task_taskdata ),
+#if OMP_40_ENABLED
+    offset_and_size_of( kmp_base_team_t,   t_cancel_request ),
+#endif
+    offset_and_size_of( kmp_base_team_t,   t_bar ),
+    offset_and_size_of( kmp_balign_team_t, b_master_arrived ),
+    offset_and_size_of( kmp_balign_team_t, b_team_arrived ),
+
+    // root structure information
+    sizeof( kmp_base_root_t ),
+    offset_and_size_of( kmp_base_root_t, r_root_team   ),
+    offset_and_size_of( kmp_base_root_t, r_hot_team    ),
+    offset_and_size_of( kmp_base_root_t, r_uber_thread ),
+    offset_and_size_not_available,
+
+    // ident structure information
+    sizeof( ident_t ),
+    offset_and_size_of( ident_t, psource ),
+    offset_and_size_of( ident_t, flags   ),
+
+    // lock structure information
+    sizeof( kmp_base_queuing_lock_t ),
+    offset_and_size_of( kmp_base_queuing_lock_t, initialized  ),
+    offset_and_size_of( kmp_base_queuing_lock_t, location ),
+    offset_and_size_of( kmp_base_queuing_lock_t, tail_id  ),
+    offset_and_size_of( kmp_base_queuing_lock_t, head_id  ),
+    offset_and_size_of( kmp_base_queuing_lock_t, next_ticket  ),
+    offset_and_size_of( kmp_base_queuing_lock_t, now_serving  ),
+    offset_and_size_of( kmp_base_queuing_lock_t, owner_id     ),
+    offset_and_size_of( kmp_base_queuing_lock_t, depth_locked ),
+    offset_and_size_of( kmp_base_queuing_lock_t, flags ),
+
+#if ! KMP_USE_DYNAMIC_LOCK
+    /* Lock table. */
+    sizeof( kmp_lock_table_t ),
+    offset_and_size_of( kmp_lock_table_t, used       ),
+    offset_and_size_of( kmp_lock_table_t, allocated  ),
+    offset_and_size_of( kmp_lock_table_t, table      ),
+#endif
+
+    // Task team structure information.
+    sizeof( kmp_base_task_team_t ),
+    offset_and_size_of( kmp_base_task_team_t, tt_threads_data       ),
+    offset_and_size_of( kmp_base_task_team_t, tt_found_tasks        ),
+    offset_and_size_of( kmp_base_task_team_t, tt_nproc              ),
+    offset_and_size_of( kmp_base_task_team_t, tt_unfinished_threads ),
+    offset_and_size_of( kmp_base_task_team_t, tt_active             ),
+
+    // task_data_t.
+    sizeof( kmp_taskdata_t ),
+    offset_and_size_of( kmp_taskdata_t, td_task_id                ),
+    offset_and_size_of( kmp_taskdata_t, td_flags                  ),
+    offset_and_size_of( kmp_taskdata_t, td_team                   ),
+    offset_and_size_of( kmp_taskdata_t, td_parent                 ),
+    offset_and_size_of( kmp_taskdata_t, td_level                  ),
+    offset_and_size_of( kmp_taskdata_t, td_ident                  ),
+    offset_and_size_of( kmp_taskdata_t, td_allocated_child_tasks  ),
+    offset_and_size_of( kmp_taskdata_t, td_incomplete_child_tasks ),
+
+    offset_and_size_of( kmp_taskdata_t, td_taskwait_ident   ),
+    offset_and_size_of( kmp_taskdata_t, td_taskwait_counter ),
+    offset_and_size_of( kmp_taskdata_t, td_taskwait_thread  ),
+
+#if OMP_40_ENABLED
+    offset_and_size_of( kmp_taskdata_t, td_taskgroup        ),
+    offset_and_size_of( kmp_taskgroup_t, count              ),
+    offset_and_size_of( kmp_taskgroup_t, cancel_request     ),
+
+    offset_and_size_of( kmp_taskdata_t, td_depnode          ),
+    offset_and_size_of( kmp_depnode_list_t, node            ),
+    offset_and_size_of( kmp_depnode_list_t, next            ),
+    offset_and_size_of( kmp_base_depnode_t, successors      ),
+    offset_and_size_of( kmp_base_depnode_t, task            ),
+    offset_and_size_of( kmp_base_depnode_t, npredecessors   ),
+    offset_and_size_of( kmp_base_depnode_t, nrefs           ),
+#endif
+    offset_and_size_of( kmp_task_t, routine                 ),
+
+    // thread_data_t.
+    sizeof( kmp_thread_data_t ),
+    offset_and_size_of( kmp_base_thread_data_t, td_deque             ),
+    offset_and_size_of( kmp_base_thread_data_t, td_deque_head        ),
+    offset_and_size_of( kmp_base_thread_data_t, td_deque_tail        ),
+    offset_and_size_of( kmp_base_thread_data_t, td_deque_ntasks      ),
+    offset_and_size_of( kmp_base_thread_data_t, td_deque_last_stolen ),
+
+    // The last field.
+    KMP_OMP_VERSION,
+
+}; // __kmp_omp_debug_struct_info
+
+#undef offset_and_size_of
+#undef addr_and_size_of
+
+/*
+  Intel compiler on IA-32 architecture issues a warning "conversion
+  from "unsigned long long" to "char *" may lose significant bits"
+  when 64-bit value is assigned to 32-bit pointer. Use this function
+  to suppress the warning.
+*/
+static inline
+void *
+__kmp_convert_to_ptr(
+    kmp_uint64    addr
+) {
+    #if KMP_COMPILER_ICC
+        #pragma warning( push )
+        #pragma warning( disable:  810 ) // conversion from "unsigned long long" to "char *" may lose significant bits
+        #pragma warning( disable: 1195 ) // conversion from integer to smaller pointer
+    #endif // KMP_COMPILER_ICC
+    return (void *) addr;
+    #if KMP_COMPILER_ICC
+        #pragma warning( pop )
+    #endif // KMP_COMPILER_ICC
+} // __kmp_convert_to_ptr
+
+
+static int
+kmp_location_match(
+    kmp_str_loc_t *        loc,
+    kmp_omp_nthr_item_t *  item
+) {
+
+    int file_match = 0;
+    int func_match = 0;
+    int line_match = 0;
+
+    char * file = (char *) __kmp_convert_to_ptr( item->file );
+    char * func = (char *) __kmp_convert_to_ptr( item->func );
+    file_match = __kmp_str_fname_match( & loc->fname, file );
+    func_match =
+        item->func == 0  // If item->func is NULL, it allows any func name.
+        ||
+        strcmp( func, "*" ) == 0
+        ||
+        ( loc->func != NULL && strcmp( loc->func, func ) == 0 );
+    line_match =
+        item->begin <= loc->line
+        &&
+        ( item->end <= 0 || loc->line <= item->end ); // if item->end <= 0, it means "end of file".
+
+    return ( file_match && func_match && line_match );
+
+} // kmp_location_match
+
+
+int
+__kmp_omp_num_threads(
+    ident_t const * ident
+) {
+
+    int num_threads = 0;
+
+    kmp_omp_nthr_info_t * info =
+        (kmp_omp_nthr_info_t *) __kmp_convert_to_ptr(  __kmp_omp_debug_struct_info.nthr_info.addr );
+    if ( info->num > 0 && info->array != 0 ) {
+        kmp_omp_nthr_item_t * items = (kmp_omp_nthr_item_t *) __kmp_convert_to_ptr( info->array );
+        kmp_str_loc_t         loc   = __kmp_str_loc_init( ident->psource, 1 );
+        int i;
+        for ( i = 0; i < info->num; ++ i ) {
+            if ( kmp_location_match( & loc, & items[ i ] ) ) {
+                num_threads = items[ i ].num_threads;
+            }; // if
+        }; // for
+        __kmp_str_loc_free( & loc );
+    }; // if
+
+    return num_threads;;
+
+} // __kmp_omp_num_threads
+#endif /* USE_DEBUGGER */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_debugger.h b/contrib/libs/cxxsupp/openmp/kmp_debugger.h
index ae75706a3e..29f41340dd 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_debugger.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_debugger.h
@@ -1,51 +1,51 @@
-#if USE_DEBUGGER 
-/* 
- * kmp_debugger.h -- debugger support. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_DEBUGGER_H 
-#define KMP_DEBUGGER_H 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-/* * This external variable can be set by any debugger to flag to the runtime that we 
-   are currently executing inside a debugger.  This will allow the debugger to override 
-   the number of threads spawned in a parallel region by using __kmp_omp_num_threads() (below).  
-   * When __kmp_debugging is TRUE, each team and each task gets a unique integer identifier  
-   that can be used by debugger to conveniently identify teams and tasks. 
-   * The debugger has access to __kmp_omp_debug_struct_info which contains information 
-   about the OpenMP library's important internal structures.  This access will allow the debugger  
-   to read detailed information from the typical OpenMP constructs (teams, threads, tasking, etc. ) 
-   during a debugging session and offer detailed and useful information which the user can probe 
-   about the OpenMP portion of their code. 
-   */ 
-extern int __kmp_debugging;             /* Boolean whether currently debugging OpenMP RTL */ 
-// Return number of threads specified by the debugger for given parallel region. 
-/* The ident field, which represents a source file location, is used to check if the  
-   debugger has changed the number of threads for the parallel region at source file  
-   location ident.  This way, specific parallel regions' number of threads can be changed 
-   at the debugger's request. 
- */ 
-int __kmp_omp_num_threads( ident_t const * ident ); 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif // __cplusplus 
- 
- 
-#endif // KMP_DEBUGGER_H 
- 
-#endif // USE_DEBUGGER 
+#if USE_DEBUGGER
+/*
+ * kmp_debugger.h -- debugger support.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_DEBUGGER_H
+#define KMP_DEBUGGER_H
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+/* * This external variable can be set by any debugger to flag to the runtime that we
+   are currently executing inside a debugger.  This will allow the debugger to override
+   the number of threads spawned in a parallel region by using __kmp_omp_num_threads() (below). 
+   * When __kmp_debugging is TRUE, each team and each task gets a unique integer identifier 
+   that can be used by debugger to conveniently identify teams and tasks.
+   * The debugger has access to __kmp_omp_debug_struct_info which contains information
+   about the OpenMP library's important internal structures.  This access will allow the debugger 
+   to read detailed information from the typical OpenMP constructs (teams, threads, tasking, etc. )
+   during a debugging session and offer detailed and useful information which the user can probe
+   about the OpenMP portion of their code.
+   */
+extern int __kmp_debugging;             /* Boolean whether currently debugging OpenMP RTL */
+// Return number of threads specified by the debugger for given parallel region.
+/* The ident field, which represents a source file location, is used to check if the 
+   debugger has changed the number of threads for the parallel region at source file 
+   location ident.  This way, specific parallel regions' number of threads can be changed
+   at the debugger's request.
+ */
+int __kmp_omp_num_threads( ident_t const * ident );
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+
+#endif // KMP_DEBUGGER_H
+
+#endif // USE_DEBUGGER
diff --git a/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp b/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp
index db8c20a64c..c91bb8da3c 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_dispatch.cpp
@@ -1,2674 +1,2674 @@
-/* 
- * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-/* 
- * Dynamic scheduling initialization and dispatch. 
- * 
- * NOTE: __kmp_nth is a constant inside of any dispatch loop, however 
- *       it may change values between parallel regions.  __kmp_max_nth 
- *       is the largest value __kmp_nth may take, 1 is the smallest. 
- * 
- */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_itt.h" 
-#include "kmp_str.h" 
-#include "kmp_error.h" 
-#include "kmp_stats.h" 
-#if KMP_OS_WINDOWS && KMP_ARCH_X86 
-    #include <float.h> 
-#endif 
- 
-#if OMPT_SUPPORT 
-#include "ompt-internal.h" 
-#include "ompt-specific.h" 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-// template for type limits 
-template< typename T > 
-struct i_maxmin { 
-    static const T mx; 
-    static const T mn; 
-}; 
-template<> 
-struct i_maxmin< int > { 
-    static const int mx = 0x7fffffff; 
-    static const int mn = 0x80000000; 
-}; 
-template<> 
-struct i_maxmin< unsigned int > { 
-    static const unsigned int mx = 0xffffffff; 
-    static const unsigned int mn = 0x00000000; 
-}; 
-template<> 
-struct i_maxmin< long long > { 
-    static const long long mx = 0x7fffffffffffffffLL; 
-    static const long long mn = 0x8000000000000000LL; 
-}; 
-template<> 
-struct i_maxmin< unsigned long long > { 
-    static const unsigned long long mx = 0xffffffffffffffffLL; 
-    static const unsigned long long mn = 0x0000000000000000LL; 
-}; 
-//------------------------------------------------------------------------- 
- 
-#ifdef KMP_STATIC_STEAL_ENABLED 
- 
-    // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types 
-    template< typename T > 
-    struct dispatch_private_infoXX_template { 
-        typedef typename traits_t< T >::unsigned_t  UT; 
-        typedef typename traits_t< T >::signed_t    ST; 
-        UT count;                // unsigned 
-        T  ub; 
-        /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 
-        T  lb; 
-        ST st;                   // signed 
-        UT tc;                   // unsigned 
-        T  static_steal_counter; // for static_steal only; maybe better to put after ub 
- 
-        /* parm[1-4] are used in different ways by different scheduling algorithms */ 
- 
-        // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) 
-        //    a) parm3 is properly aligned and 
-        //    b) all parm1-4 are in the same cache line. 
-        // Because of parm1-4 are used together, performance seems to be better 
-        // if they are in the same line (not measured though). 
- 
-        struct KMP_ALIGN( 32 ) { // compiler does not accept sizeof(T)*4 
-            T  parm1; 
-            T  parm2; 
-            T  parm3; 
-            T  parm4; 
-        }; 
- 
-        UT ordered_lower; // unsigned 
-        UT ordered_upper; // unsigned 
-        #if KMP_OS_WINDOWS 
-        T  last_upper; 
-        #endif /* KMP_OS_WINDOWS */ 
-    }; 
- 
-#else /* KMP_STATIC_STEAL_ENABLED */ 
- 
-    // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types 
-    template< typename T > 
-    struct dispatch_private_infoXX_template { 
-        typedef typename traits_t< T >::unsigned_t  UT; 
-        typedef typename traits_t< T >::signed_t    ST; 
-        T  lb; 
-        T  ub; 
-        ST st;            // signed 
-        UT tc;            // unsigned 
- 
-        T  parm1; 
-        T  parm2; 
-        T  parm3; 
-        T  parm4; 
- 
-        UT count;         // unsigned 
- 
-        UT ordered_lower; // unsigned 
-        UT ordered_upper; // unsigned 
-        #if KMP_OS_WINDOWS 
-	T  last_upper; 
-        #endif /* KMP_OS_WINDOWS */ 
-    }; 
- 
-#endif /* KMP_STATIC_STEAL_ENABLED */ 
- 
-// replaces dispatch_private_info structure and dispatch_private_info_t type 
-template< typename T > 
-struct KMP_ALIGN_CACHE dispatch_private_info_template { 
-    // duplicate alignment here, otherwise size of structure is not correct in our compiler 
-    union KMP_ALIGN_CACHE private_info_tmpl { 
-        dispatch_private_infoXX_template< T > p; 
-        dispatch_private_info64_t             p64; 
-    } u; 
-    enum sched_type schedule;  /* scheduling algorithm */ 
-    kmp_uint32      ordered;   /* ordered clause specified */ 
-    kmp_uint32      ordered_bumped; 
-    kmp_int32   ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making order 
-    dispatch_private_info * next; /* stack of buffers for nest of serial regions */ 
-    kmp_uint32      nomerge;   /* don't merge iters if serialized */ 
-    kmp_uint32      type_size; 
-    enum cons_type  pushed_ws; 
-}; 
- 
- 
-// replaces dispatch_shared_info{32,64} structures and dispatch_shared_info{32,64}_t types 
-template< typename UT > 
-struct dispatch_shared_infoXX_template { 
-    /* chunk index under dynamic, number of idle threads under static-steal; 
-       iteration index otherwise */ 
-    volatile UT     iteration; 
-    volatile UT     num_done; 
-    volatile UT     ordered_iteration; 
-    UT   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size making ordered_iteration scalar 
-}; 
- 
-// replaces dispatch_shared_info structure and dispatch_shared_info_t type 
-template< typename UT > 
-struct dispatch_shared_info_template { 
-    // we need union here to keep the structure size 
-    union shared_info_tmpl { 
-        dispatch_shared_infoXX_template< UT >  s; 
-        dispatch_shared_info64_t               s64; 
-    } u; 
-    volatile kmp_uint32     buffer_index; 
-}; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#undef USE_TEST_LOCKS 
- 
-// test_then_add template (general template should NOT be used) 
-template< typename T > 
-static __forceinline T 
-test_then_add( volatile T *p, T d ) { KMP_ASSERT(0); }; 
- 
-template<> 
-__forceinline kmp_int32 
-test_then_add< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 d ) 
-{ 
-    kmp_int32 r; 
-    r = KMP_TEST_THEN_ADD32( p, d ); 
-    return r; 
-} 
- 
-template<> 
-__forceinline kmp_int64 
-test_then_add< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 r; 
-    r = KMP_TEST_THEN_ADD64( p, d ); 
-    return r; 
-} 
- 
-// test_then_inc_acq template (general template should NOT be used) 
-template< typename T > 
-static __forceinline T 
-test_then_inc_acq( volatile T *p ) { KMP_ASSERT(0); }; 
- 
-template<> 
-__forceinline kmp_int32 
-test_then_inc_acq< kmp_int32 >( volatile kmp_int32 *p ) 
-{ 
-    kmp_int32 r; 
-    r = KMP_TEST_THEN_INC_ACQ32( p ); 
-    return r; 
-} 
- 
-template<> 
-__forceinline kmp_int64 
-test_then_inc_acq< kmp_int64 >( volatile kmp_int64 *p ) 
-{ 
-    kmp_int64 r; 
-    r = KMP_TEST_THEN_INC_ACQ64( p ); 
-    return r; 
-} 
- 
-// test_then_inc template (general template should NOT be used) 
-template< typename T > 
-static __forceinline T 
-test_then_inc( volatile T *p ) { KMP_ASSERT(0); }; 
- 
-template<> 
-__forceinline kmp_int32 
-test_then_inc< kmp_int32 >( volatile kmp_int32 *p ) 
-{ 
-    kmp_int32 r; 
-    r = KMP_TEST_THEN_INC32( p ); 
-    return r; 
-} 
- 
-template<> 
-__forceinline kmp_int64 
-test_then_inc< kmp_int64 >( volatile kmp_int64 *p ) 
-{ 
-    kmp_int64 r; 
-    r = KMP_TEST_THEN_INC64( p ); 
-    return r; 
-} 
- 
-// compare_and_swap template (general template should NOT be used) 
-template< typename T > 
-static __forceinline kmp_int32 
-compare_and_swap( volatile T *p, T c, T s ) { KMP_ASSERT(0); }; 
- 
-template<> 
-__forceinline kmp_int32 
-compare_and_swap< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s ) 
-{ 
-    return KMP_COMPARE_AND_STORE_REL32( p, c, s ); 
-} 
- 
-template<> 
-__forceinline kmp_int32 
-compare_and_swap< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s ) 
-{ 
-    return KMP_COMPARE_AND_STORE_REL64( p, c, s ); 
-} 
- 
-/* 
-    Spin wait loop that first does pause, then yield. 
-    Waits until function returns non-zero when called with *spinner and check. 
-    Does NOT put threads to sleep. 
-#if USE_ITT_BUILD 
-    Arguments: 
-        obj -- is higher-level synchronization object to report to ittnotify. It is used to report 
-            locks consistently. For example, if lock is acquired immediately, its address is 
-            reported to ittnotify via KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired 
-            immediately and lock routine calls to KMP_WAIT_YIELD(), the later should report the same 
-            address, not an address of low-level spinner. 
-#endif // USE_ITT_BUILD 
-*/ 
-template< typename UT > 
-// ToDo: make inline function (move to header file for icl) 
-static UT  // unsigned 4- or 8-byte type 
-__kmp_wait_yield( volatile UT * spinner, 
-                  UT            checker, 
-                  kmp_uint32 (* pred)( UT, UT ) 
-                  USE_ITT_BUILD_ARG(void        * obj)    // Higher-level synchronization object, or NULL. 
-                  ) 
-{ 
-    // note: we may not belong to a team at this point 
+/*
+ * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+ * Dynamic scheduling initialization and dispatch.
+ *
+ * NOTE: __kmp_nth is a constant inside of any dispatch loop, however
+ *       it may change values between parallel regions.  __kmp_max_nth
+ *       is the largest value __kmp_nth may take, 1 is the smallest.
+ *
+ */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_itt.h"
+#include "kmp_str.h"
+#include "kmp_error.h"
+#include "kmp_stats.h"
+#if KMP_OS_WINDOWS && KMP_ARCH_X86
+    #include <float.h>
+#endif
+
+#if OMPT_SUPPORT
+#include "ompt-internal.h"
+#include "ompt-specific.h"
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+// template for type limits
+template< typename T >
+struct i_maxmin {
+    static const T mx;
+    static const T mn;
+};
+template<>
+struct i_maxmin< int > {
+    static const int mx = 0x7fffffff;
+    static const int mn = 0x80000000;
+};
+template<>
+struct i_maxmin< unsigned int > {
+    static const unsigned int mx = 0xffffffff;
+    static const unsigned int mn = 0x00000000;
+};
+template<>
+struct i_maxmin< long long > {
+    static const long long mx = 0x7fffffffffffffffLL;
+    static const long long mn = 0x8000000000000000LL;
+};
+template<>
+struct i_maxmin< unsigned long long > {
+    static const unsigned long long mx = 0xffffffffffffffffLL;
+    static const unsigned long long mn = 0x0000000000000000LL;
+};
+//-------------------------------------------------------------------------
+
+#ifdef KMP_STATIC_STEAL_ENABLED
+
+    // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types
+    template< typename T >
+    struct dispatch_private_infoXX_template {
+        typedef typename traits_t< T >::unsigned_t  UT;
+        typedef typename traits_t< T >::signed_t    ST;
+        UT count;                // unsigned
+        T  ub;
+        /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
+        T  lb;
+        ST st;                   // signed
+        UT tc;                   // unsigned
+        T  static_steal_counter; // for static_steal only; maybe better to put after ub
+
+        /* parm[1-4] are used in different ways by different scheduling algorithms */
+
+        // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
+        //    a) parm3 is properly aligned and
+        //    b) all parm1-4 are in the same cache line.
+        // Because of parm1-4 are used together, performance seems to be better
+        // if they are in the same line (not measured though).
+
+        struct KMP_ALIGN( 32 ) { // compiler does not accept sizeof(T)*4
+            T  parm1;
+            T  parm2;
+            T  parm3;
+            T  parm4;
+        };
+
+        UT ordered_lower; // unsigned
+        UT ordered_upper; // unsigned
+        #if KMP_OS_WINDOWS
+        T  last_upper;
+        #endif /* KMP_OS_WINDOWS */
+    };
+
+#else /* KMP_STATIC_STEAL_ENABLED */
+
+    // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types
+    template< typename T >
+    struct dispatch_private_infoXX_template {
+        typedef typename traits_t< T >::unsigned_t  UT;
+        typedef typename traits_t< T >::signed_t    ST;
+        T  lb;
+        T  ub;
+        ST st;            // signed
+        UT tc;            // unsigned
+
+        T  parm1;
+        T  parm2;
+        T  parm3;
+        T  parm4;
+
+        UT count;         // unsigned
+
+        UT ordered_lower; // unsigned
+        UT ordered_upper; // unsigned
+        #if KMP_OS_WINDOWS
+	T  last_upper;
+        #endif /* KMP_OS_WINDOWS */
+    };
+
+#endif /* KMP_STATIC_STEAL_ENABLED */
+
+// replaces dispatch_private_info structure and dispatch_private_info_t type
+template< typename T >
+struct KMP_ALIGN_CACHE dispatch_private_info_template {
+    // duplicate alignment here, otherwise size of structure is not correct in our compiler
+    union KMP_ALIGN_CACHE private_info_tmpl {
+        dispatch_private_infoXX_template< T > p;
+        dispatch_private_info64_t             p64;
+    } u;
+    enum sched_type schedule;  /* scheduling algorithm */
+    kmp_uint32      ordered;   /* ordered clause specified */
+    kmp_uint32      ordered_bumped;
+    kmp_int32   ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making order
+    dispatch_private_info * next; /* stack of buffers for nest of serial regions */
+    kmp_uint32      nomerge;   /* don't merge iters if serialized */
+    kmp_uint32      type_size;
+    enum cons_type  pushed_ws;
+};
+
+
+// replaces dispatch_shared_info{32,64} structures and dispatch_shared_info{32,64}_t types
+template< typename UT >
+struct dispatch_shared_infoXX_template {
+    /* chunk index under dynamic, number of idle threads under static-steal;
+       iteration index otherwise */
+    volatile UT     iteration;
+    volatile UT     num_done;
+    volatile UT     ordered_iteration;
+    UT   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size making ordered_iteration scalar
+};
+
+// replaces dispatch_shared_info structure and dispatch_shared_info_t type
+template< typename UT >
+struct dispatch_shared_info_template {
+    // we need union here to keep the structure size
+    union shared_info_tmpl {
+        dispatch_shared_infoXX_template< UT >  s;
+        dispatch_shared_info64_t               s64;
+    } u;
+    volatile kmp_uint32     buffer_index;
+};
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#undef USE_TEST_LOCKS
+
+// test_then_add template (general template should NOT be used)
+template< typename T >
+static __forceinline T
+test_then_add( volatile T *p, T d ) { KMP_ASSERT(0); };
+
+template<>
+__forceinline kmp_int32
+test_then_add< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 d )
+{
+    kmp_int32 r;
+    r = KMP_TEST_THEN_ADD32( p, d );
+    return r;
+}
+
+template<>
+__forceinline kmp_int64
+test_then_add< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 r;
+    r = KMP_TEST_THEN_ADD64( p, d );
+    return r;
+}
+
+// test_then_inc_acq template (general template should NOT be used)
+template< typename T >
+static __forceinline T
+test_then_inc_acq( volatile T *p ) { KMP_ASSERT(0); };
+
+template<>
+__forceinline kmp_int32
+test_then_inc_acq< kmp_int32 >( volatile kmp_int32 *p )
+{
+    kmp_int32 r;
+    r = KMP_TEST_THEN_INC_ACQ32( p );
+    return r;
+}
+
+template<>
+__forceinline kmp_int64
+test_then_inc_acq< kmp_int64 >( volatile kmp_int64 *p )
+{
+    kmp_int64 r;
+    r = KMP_TEST_THEN_INC_ACQ64( p );
+    return r;
+}
+
+// test_then_inc template (general template should NOT be used)
+template< typename T >
+static __forceinline T
+test_then_inc( volatile T *p ) { KMP_ASSERT(0); };
+
+template<>
+__forceinline kmp_int32
+test_then_inc< kmp_int32 >( volatile kmp_int32 *p )
+{
+    kmp_int32 r;
+    r = KMP_TEST_THEN_INC32( p );
+    return r;
+}
+
+template<>
+__forceinline kmp_int64
+test_then_inc< kmp_int64 >( volatile kmp_int64 *p )
+{
+    kmp_int64 r;
+    r = KMP_TEST_THEN_INC64( p );
+    return r;
+}
+
+// compare_and_swap template (general template should NOT be used)
+template< typename T >
+static __forceinline kmp_int32
+compare_and_swap( volatile T *p, T c, T s ) { KMP_ASSERT(0); };
+
+template<>
+__forceinline kmp_int32
+compare_and_swap< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
+{
+    return KMP_COMPARE_AND_STORE_REL32( p, c, s );
+}
+
+template<>
+__forceinline kmp_int32
+compare_and_swap< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
+{
+    return KMP_COMPARE_AND_STORE_REL64( p, c, s );
+}
+
+/*
+    Spin wait loop that first does pause, then yield.
+    Waits until function returns non-zero when called with *spinner and check.
+    Does NOT put threads to sleep.
+#if USE_ITT_BUILD
+    Arguments:
+        obj -- is higher-level synchronization object to report to ittnotify. It is used to report
+            locks consistently. For example, if lock is acquired immediately, its address is
+            reported to ittnotify via KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired
+            immediately and lock routine calls to KMP_WAIT_YIELD(), the later should report the same
+            address, not an address of low-level spinner.
+#endif // USE_ITT_BUILD
+*/
+template< typename UT >
+// ToDo: make inline function (move to header file for icl)
+static UT  // unsigned 4- or 8-byte type
+__kmp_wait_yield( volatile UT * spinner,
+                  UT            checker,
+                  kmp_uint32 (* pred)( UT, UT )
+                  USE_ITT_BUILD_ARG(void        * obj)    // Higher-level synchronization object, or NULL.
+                  )
+{
+    // note: we may not belong to a team at this point
     volatile UT         * spin          = spinner;
              UT           check         = checker;
              kmp_uint32   spins;
              kmp_uint32 (*f) ( UT, UT ) = pred;
              UT           r;
- 
-    KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); 
-    KMP_INIT_YIELD( spins ); 
-    // main wait spin loop 
-    while(!f(r = *spin, check)) 
-    { 
-        KMP_FSYNC_SPIN_PREPARE( obj ); 
-        /* GEH - remove this since it was accidentally introduced when kmp_wait was split. 
-           It causes problems with infinite recursion because of exit lock */ 
-        /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) 
-            __kmp_abort_thread(); */ 
- 
-        // if we are oversubscribed, 
-        // or have waited a bit (and KMP_LIBRARY=throughput, then yield 
-        // pause is in the following code 
-        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); 
-        KMP_YIELD_SPIN( spins ); 
-    } 
-    KMP_FSYNC_SPIN_ACQUIRED( obj ); 
-    return r; 
-} 
- 
-template< typename UT > 
-static kmp_uint32 __kmp_eq( UT value, UT checker) { 
-    return value == checker; 
-} 
- 
-template< typename UT > 
-static kmp_uint32 __kmp_neq( UT value, UT checker) { 
-    return value != checker; 
-} 
- 
-template< typename UT > 
-static kmp_uint32 __kmp_lt( UT value, UT checker) { 
-    return value < checker; 
-} 
- 
-template< typename UT > 
-static kmp_uint32 __kmp_ge( UT value, UT checker) { 
-    return value >= checker; 
-} 
- 
-template< typename UT > 
-static kmp_uint32 __kmp_le( UT value, UT checker) { 
-    return value <= checker; 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-static void 
-__kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    kmp_info_t *th; 
- 
-    KMP_DEBUG_ASSERT( gtid_ref ); 
- 
-    if ( __kmp_env_consistency_check ) { 
-        th = __kmp_threads[*gtid_ref]; 
-        if ( th -> th.th_root -> r.r_active 
-          && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) { 
-#if KMP_USE_DYNAMIC_LOCK 
-            __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 ); 
-#else 
-            __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL ); 
-#endif 
-        } 
-    } 
-} 
- 
-template< typename UT > 
-static void 
-__kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    typedef typename traits_t< UT >::signed_t    ST; 
-    dispatch_private_info_template< UT > * pr; 
- 
-    int gtid = *gtid_ref; 
-//    int  cid = *cid_ref; 
-    kmp_info_t *th = __kmp_threads[ gtid ]; 
-    KMP_DEBUG_ASSERT( th -> th.th_dispatch ); 
- 
-    KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid ) ); 
-    if ( __kmp_env_consistency_check ) { 
-        pr = reinterpret_cast< dispatch_private_info_template< UT >* > 
-            ( th -> th.th_dispatch -> th_dispatch_pr_current ); 
-        if ( pr -> pushed_ws != ct_none ) { 
-#if KMP_USE_DYNAMIC_LOCK 
-            __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 ); 
-#else 
-            __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL ); 
-#endif 
-        } 
-    } 
- 
-    if ( ! th -> th.th_team -> t.t_serialized ) { 
-        dispatch_shared_info_template< UT >  * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > 
-            ( th -> th.th_dispatch -> th_dispatch_sh_current ); 
-        UT  lower; 
- 
-        if ( ! __kmp_env_consistency_check ) { 
-                pr = reinterpret_cast< dispatch_private_info_template< UT >* > 
-                    ( th -> th.th_dispatch -> th_dispatch_pr_current ); 
-        } 
-        lower = pr->u.p.ordered_lower; 
- 
-        #if ! defined( KMP_GOMP_COMPAT ) 
-            if ( __kmp_env_consistency_check ) { 
-                if ( pr->ordered_bumped ) { 
-                    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
-                    __kmp_error_construct2( 
-                        kmp_i18n_msg_CnsMultipleNesting, 
-                        ct_ordered_in_pdo, loc_ref, 
-                        & p->stack_data[ p->w_top ] 
-                    ); 
-                } 
-            } 
-        #endif /* !defined(KMP_GOMP_COMPAT) */ 
- 
-        KMP_MB(); 
-        #ifdef KMP_DEBUG 
-        { 
-            const char * buff; 
-            // create format specifiers before the debug output 
-            buff = __kmp_str_format( 
-                "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n", 
-                traits_t< UT >::spec, traits_t< UT >::spec ); 
-            KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); 
-            __kmp_str_free( &buff ); 
-        } 
-        #endif 
- 
-        __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT > 
-                                USE_ITT_BUILD_ARG( NULL ) 
-                                ); 
-        KMP_MB();  /* is this necessary? */ 
-        #ifdef KMP_DEBUG 
-        { 
-            const char * buff; 
-            // create format specifiers before the debug output 
-            buff = __kmp_str_format( 
-                "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n", 
-                traits_t< UT >::spec, traits_t< UT >::spec ); 
-            KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); 
-            __kmp_str_free( &buff ); 
-        } 
-        #endif 
-    } 
-    KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid ) ); 
-} 
- 
-static void 
-__kmp_dispatch_dxo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    kmp_info_t *th; 
- 
-    if ( __kmp_env_consistency_check ) { 
-        th = __kmp_threads[*gtid_ref]; 
-        if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) { 
-            __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref ); 
-        } 
-    } 
-} 
- 
-template< typename UT > 
-static void 
-__kmp_dispatch_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    typedef typename traits_t< UT >::signed_t    ST; 
-    dispatch_private_info_template< UT > * pr; 
- 
-    int gtid = *gtid_ref; 
-//    int  cid = *cid_ref; 
-    kmp_info_t *th = __kmp_threads[ gtid ]; 
-    KMP_DEBUG_ASSERT( th -> th.th_dispatch ); 
- 
-    KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid ) ); 
-    if ( __kmp_env_consistency_check ) { 
-        pr = reinterpret_cast< dispatch_private_info_template< UT >* > 
-            ( th -> th.th_dispatch -> th_dispatch_pr_current ); 
-        if ( pr -> pushed_ws != ct_none ) { 
-            __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref ); 
-        } 
-    } 
- 
-    if ( ! th -> th.th_team -> t.t_serialized ) { 
-        dispatch_shared_info_template< UT >  * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > 
-            ( th -> th.th_dispatch -> th_dispatch_sh_current ); 
- 
-        if ( ! __kmp_env_consistency_check ) { 
-            pr = reinterpret_cast< dispatch_private_info_template< UT >* > 
-                ( th -> th.th_dispatch -> th_dispatch_pr_current ); 
-        } 
- 
-        KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration ); 
-        #if ! defined( KMP_GOMP_COMPAT ) 
-            if ( __kmp_env_consistency_check ) { 
-                if ( pr->ordered_bumped != 0 ) { 
-                    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
-                    /* How to test it? - OM */ 
-                    __kmp_error_construct2( 
-                        kmp_i18n_msg_CnsMultipleNesting, 
-                        ct_ordered_in_pdo, loc_ref, 
-                        & p->stack_data[ p->w_top ] 
-                    ); 
-                } 
-            } 
-        #endif /* !defined(KMP_GOMP_COMPAT) */ 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        pr->ordered_bumped += 1; 
- 
-        KD_TRACE(1000, ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n", 
-                        gtid, pr->ordered_bumped ) ); 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        /* TODO use general release procedure? */ 
-        test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    } 
-    KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid ) ); 
-} 
- 
-/* Computes and returns x to the power of y, where y must a non-negative integer */ 
-template< typename UT > 
-static __forceinline long double 
-__kmp_pow(long double x, UT y) { 
-    long double s=1.0L; 
- 
-    KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0); 
-    //KMP_DEBUG_ASSERT(y >= 0); // y is unsigned 
-    while(y) { 
-        if ( y & 1 ) 
-            s *= x; 
-        x *= x; 
-        y >>= 1; 
-    } 
-    return s; 
-} 
- 
-/* Computes and returns the number of unassigned iterations after idx chunks have been assigned 
-   (the total number of unassigned iterations in chunks with index greater than or equal to idx). 
-   __forceinline seems to be broken so that if we __forceinline this function, the behavior is wrong 
-   (one of the unit tests, sch_guided_analytical_basic.cpp, fails) 
-*/ 
-template< typename T > 
-static __inline typename traits_t< T >::unsigned_t 
-__kmp_dispatch_guided_remaining( 
-    T                                  tc, 
-    typename traits_t< T >::floating_t base, 
-    typename traits_t< T >::unsigned_t idx 
-) { 
-    /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at 
-       least for ICL 8.1, long double arithmetic may not really have 
-       long double precision, even with /Qlong_double.  Currently, we 
-       workaround that in the caller code, by manipulating the FPCW for 
-       Windows* OS on IA-32 architecture.  The lack of precision is not 
-       expected to be a correctness issue, though. 
-    */ 
-    typedef typename traits_t< T >::unsigned_t  UT; 
- 
-    long double x = tc * __kmp_pow< UT >(base, idx); 
-    UT r = (UT) x; 
-    if ( x == r ) 
-        return r; 
-    return r + 1; 
-} 
- 
-// Parameters of the guided-iterative algorithm: 
-//   p2 = n * nproc * ( chunk + 1 )  // point of switching to dynamic 
-//   p3 = 1 / ( n * nproc )          // remaining iterations multiplier 
-// by default n = 2. For example with n = 3 the chunks distribution will be more flat. 
-// With n = 1 first chunk is the same as for static schedule, e.g. trip / nproc. 
-static int guided_int_param = 2; 
-static double guided_flt_param = 0.5;// = 1.0 / guided_int_param; 
- 
-// UT - unsigned flavor of T, ST - signed flavor of T, 
-// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 
-template< typename T > 
-static void 
-__kmp_dispatch_init( 
-    ident_t                        * loc, 
-    int                              gtid, 
-    enum sched_type                  schedule, 
-    T                                lb, 
-    T                                ub, 
-    typename traits_t< T >::signed_t st, 
-    typename traits_t< T >::signed_t chunk, 
-    int                              push_ws 
-) { 
-    typedef typename traits_t< T >::unsigned_t  UT; 
-    typedef typename traits_t< T >::signed_t    ST; 
-    typedef typename traits_t< T >::floating_t  DBL; 
-    static const int ___kmp_size_type = sizeof( UT ); 
- 
-    int                                            active; 
-    T                                              tc; 
-    kmp_info_t *                                   th; 
-    kmp_team_t *                                   team; 
-    kmp_uint32                                     my_buffer_index; 
-    dispatch_private_info_template< T >          * pr; 
-    dispatch_shared_info_template< UT > volatile * sh; 
- 
-    KMP_BUILD_ASSERT( sizeof( dispatch_private_info_template< T > ) == sizeof( dispatch_private_info ) ); 
-    KMP_BUILD_ASSERT( sizeof( dispatch_shared_info_template< UT > ) == sizeof( dispatch_shared_info ) ); 
- 
-    if ( ! TCR_4( __kmp_init_parallel ) ) 
-        __kmp_parallel_initialize(); 
- 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_DISPATCH_INIT(); 
-#endif 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", 
-            traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 
-        KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
-    /* setup data */ 
-    th     = __kmp_threads[ gtid ]; 
-    team   = th -> th.th_team; 
-    active = ! team -> t.t_serialized; 
-    th->th.th_ident = loc; 
- 
-#if USE_ITT_BUILD 
-    kmp_uint64 cur_chunk = chunk; 
-    int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 
-        KMP_MASTER_GTID(gtid) && 
-#if OMP_40_ENABLED 
-        th->th.th_teams_microtask == NULL && 
-#endif 
-        team->t.t_active_level == 1; 
-#endif 
-    if ( ! active ) { 
-        pr = reinterpret_cast< dispatch_private_info_template< T >* > 
-            ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ 
-    } else { 
-        KMP_DEBUG_ASSERT( th->th.th_dispatch == 
-                &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); 
- 
-        my_buffer_index = th->th.th_dispatch->th_disp_index ++; 
- 
-        /* What happens when number of threads changes, need to resize buffer? */ 
-        pr = reinterpret_cast< dispatch_private_info_template< T >  * > 
-            ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] ); 
-        sh = reinterpret_cast< dispatch_shared_info_template< UT > volatile * > 
-            ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] ); 
-    } 
- 
-    /* Pick up the nomerge/ordered bits from the scheduling type */ 
-    if ( (schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper) ) { 
-        pr->nomerge = TRUE; 
-        schedule = (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); 
-    } else { 
-        pr->nomerge = FALSE; 
-    } 
-    pr->type_size = ___kmp_size_type; // remember the size of variables 
-    if ( kmp_ord_lower & schedule ) { 
-        pr->ordered = TRUE; 
-        schedule = (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); 
-    } else { 
-        pr->ordered = FALSE; 
-    } 
- 
-    if ( schedule == kmp_sch_static ) { 
-        schedule = __kmp_static; 
-    } else { 
-        if ( schedule == kmp_sch_runtime ) { 
-            // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if not specified) 
-            schedule = team -> t.t_sched.r_sched_type; 
-            // Detail the schedule if needed (global controls are differentiated appropriately) 
-            if ( schedule == kmp_sch_guided_chunked ) { 
-                schedule = __kmp_guided; 
-            } else if ( schedule == kmp_sch_static ) { 
-                schedule = __kmp_static; 
-            } 
-            // Use the chunk size specified by OMP_SCHEDULE (or default if not specified) 
-            chunk = team -> t.t_sched.chunk; 
-#if USE_ITT_BUILD 
-            cur_chunk = chunk; 
-#endif 
-            #ifdef KMP_DEBUG 
-            { 
-                const char * buff; 
-                // create format specifiers before the debug output 
-                buff = __kmp_str_format( 
-                    "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n", 
-                    traits_t< ST >::spec ); 
-                KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); 
-                __kmp_str_free( &buff ); 
-            } 
-            #endif 
-        } else { 
-            if ( schedule == kmp_sch_guided_chunked ) { 
-                schedule = __kmp_guided; 
-            } 
-            if ( chunk <= 0 ) { 
-                chunk = KMP_DEFAULT_CHUNK; 
-            } 
-        } 
- 
-        if ( schedule == kmp_sch_auto ) { 
-            // mapping and differentiation: in the __kmp_do_serial_initialize() 
-            schedule = __kmp_auto; 
-            #ifdef KMP_DEBUG 
-            { 
-                const char * buff; 
-                // create format specifiers before the debug output 
-                buff = __kmp_str_format( 
-                    "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n", 
-                    traits_t< ST >::spec ); 
-                KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); 
-                __kmp_str_free( &buff ); 
-            } 
-            #endif 
-        } 
- 
-        /* guided analytical not safe for too many threads */ 
-        if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) { 
-            schedule = kmp_sch_guided_iterative_chunked; 
-            KMP_WARNING( DispatchManyThreads ); 
-        } 
-        pr->u.p.parm1 = chunk; 
-    } 
-    KMP_ASSERT2( (kmp_sch_lower < schedule && schedule < kmp_sch_upper), 
-                "unknown scheduling type" ); 
- 
-    pr->u.p.count = 0; 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( st == 0 ) { 
-            __kmp_error_construct( 
-                kmp_i18n_msg_CnsLoopIncrZeroProhibited, 
-                ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc 
-            ); 
-        } 
-    } 
- 
-    tc = ( ub - lb + st ); 
-    if ( st != 1 ) { 
-        if ( st < 0 ) { 
-            if ( lb < ub ) { 
-                tc = 0;            // zero-trip 
-            } else {   // lb >= ub 
-                tc = (ST)tc / st;  // convert to signed division 
-            } 
-        } else {       // st > 0 
-            if ( ub < lb ) { 
-                tc = 0;            // zero-trip 
-            } else {   // lb >= ub 
-                tc /= st; 
-            } 
-        } 
-    } else if ( ub < lb ) {        // st == 1 
-        tc = 0;                    // zero-trip 
-    } 
- 
-    // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing 
-    // when statistics are disabled. 
-    if (schedule == __kmp_static) 
-    { 
-        KMP_COUNT_BLOCK(OMP_FOR_static); 
-        KMP_COUNT_VALUE(FOR_static_iterations, tc); 
-    } 
-    else 
-    { 
-        KMP_COUNT_BLOCK(OMP_FOR_dynamic); 
-        KMP_COUNT_VALUE(FOR_dynamic_iterations, tc); 
-    } 
- 
-    pr->u.p.lb = lb; 
-    pr->u.p.ub = ub; 
-    pr->u.p.st = st; 
-    pr->u.p.tc = tc; 
- 
-    #if KMP_OS_WINDOWS 
-    pr->u.p.last_upper = ub + st; 
-    #endif /* KMP_OS_WINDOWS */ 
- 
-    /* NOTE: only the active parallel region(s) has active ordered sections */ 
- 
-    if ( active ) { 
-        if ( pr->ordered == 0 ) { 
-            th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error; 
-            th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error; 
-        } else { 
-            pr->ordered_bumped = 0; 
- 
-            pr->u.p.ordered_lower = 1; 
-            pr->u.p.ordered_upper = 0; 
- 
-            th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >; 
-            th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >; 
-        } 
-    } 
- 
-    if ( __kmp_env_consistency_check ) { 
-        enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo; 
-        if ( push_ws ) { 
-            __kmp_push_workshare( gtid, ws, loc ); 
-            pr->pushed_ws = ws; 
-        } else { 
-            __kmp_check_workshare( gtid, ws, loc ); 
-            pr->pushed_ws = ct_none; 
-        } 
-    } 
- 
-    switch ( schedule ) { 
-    #if  ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 
-    case kmp_sch_static_steal: 
-        { 
-            T nproc = team->t.t_nproc; 
-            T ntc, init; 
- 
-            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) ); 
- 
-            ntc = (tc % chunk ? 1 : 0) + tc / chunk; 
-            if ( nproc > 1 && ntc >= nproc ) { 
-                T id = __kmp_tid_from_gtid(gtid); 
-                T small_chunk, extras; 
- 
-                small_chunk = ntc / nproc; 
-                extras = ntc % nproc; 
- 
-                init = id * small_chunk + ( id < extras ? id : extras ); 
-                pr->u.p.count = init; 
-                pr->u.p.ub = init + small_chunk + ( id < extras ? 1 : 0 ); 
- 
-                pr->u.p.parm2 = lb; 
-                //pr->pfields.parm3 = 0; // it's not used in static_steal 
-                pr->u.p.parm4 = id; 
-                pr->u.p.st = st; 
-                break; 
-            } else { 
-                KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n", 
-                               gtid ) ); 
-                schedule = kmp_sch_static_balanced; 
-                /* too few iterations: fall-through to kmp_sch_static_balanced */ 
-            } // if 
-            /* FALL-THROUGH to static balanced */ 
-        } // case 
-    #endif 
-    case kmp_sch_static_balanced: 
-        { 
-            T nproc = team->t.t_nproc; 
-            T init, limit; 
- 
-            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n", 
-                            gtid ) ); 
- 
-            if ( nproc > 1 ) { 
-                T id = __kmp_tid_from_gtid(gtid); 
- 
-                if ( tc < nproc ) { 
-                    if ( id < tc ) { 
-                        init = id; 
-                        limit = id; 
-                        pr->u.p.parm1 = (id == tc - 1);  /* parm1 stores *plastiter */ 
-                    } else { 
-                        pr->u.p.count = 1;  /* means no more chunks to execute */ 
-                        pr->u.p.parm1 = FALSE; 
-                        break; 
-                    } 
-                } else { 
-                    T small_chunk = tc / nproc; 
-                    T extras = tc % nproc; 
-                    init = id * small_chunk + (id < extras ? id : extras); 
-                    limit = init + small_chunk - (id < extras ? 0 : 1); 
-                    pr->u.p.parm1 = (id == nproc - 1); 
-                } 
-            } else { 
-                if ( tc > 0 ) { 
-                    init = 0; 
-                    limit = tc - 1; 
-                    pr->u.p.parm1 = TRUE; 
-                } else { 
-                    // zero trip count 
-                    pr->u.p.count = 1;  /* means no more chunks to execute */ 
-                    pr->u.p.parm1 = FALSE; 
-                    break; 
-                } 
-            } 
-#if USE_ITT_BUILD 
-            // Calculate chunk for metadata report 
-            if ( itt_need_metadata_reporting ) 
-                cur_chunk = limit - init + 1; 
-#endif 
-            if ( st == 1 ) { 
-                pr->u.p.lb = lb + init; 
-                pr->u.p.ub = lb + limit; 
-            } else { 
-                T ub_tmp = lb + limit * st;   // calculated upper bound, "ub" is user-defined upper bound 
-                pr->u.p.lb = lb + init * st; 
-                // adjust upper bound to "ub" if needed, so that MS lastprivate will match it exactly 
-                if ( st > 0 ) { 
-                    pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp ); 
-                } else { 
-                    pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp ); 
-                } 
-            } 
-            if ( pr->ordered ) { 
-                pr->u.p.ordered_lower = init; 
-                pr->u.p.ordered_upper = limit; 
-            } 
-            break; 
-        } // case 
-    case kmp_sch_guided_iterative_chunked : 
-        { 
-            T nproc = team->t.t_nproc; 
-            KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid)); 
- 
-            if ( nproc > 1 ) { 
-                if ( (2L * chunk + 1 ) * nproc >= tc ) { 
-                    /* chunk size too large, switch to dynamic */ 
-                    schedule = kmp_sch_dynamic_chunked; 
-                } else { 
-                    // when remaining iters become less than parm2 - switch to dynamic 
-                    pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 ); 
-                    *(double*)&pr->u.p.parm3 = guided_flt_param / nproc;   // may occupy parm3 and parm4 
-                } 
-            } else { 
-                KD_TRACE(100,("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid)); 
-                schedule = kmp_sch_static_greedy; 
-                /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ 
-                KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); 
-                pr->u.p.parm1 = tc; 
-            } // if 
-        } // case 
-        break; 
-    case kmp_sch_guided_analytical_chunked: 
-        { 
-            T nproc = team->t.t_nproc; 
-            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid)); 
- 
-            if ( nproc > 1 ) { 
-                if ( (2L * chunk + 1 ) * nproc >= tc ) { 
-                    /* chunk size too large, switch to dynamic */ 
-                    schedule = kmp_sch_dynamic_chunked; 
-                } else { 
-                    /* commonly used term: (2 nproc - 1)/(2 nproc) */ 
-                    DBL x; 
- 
-                    #if KMP_OS_WINDOWS && KMP_ARCH_X86 
-                    /* Linux* OS already has 64-bit computation by default for 
-		       long double, and on Windows* OS on Intel(R) 64, 
-		       /Qlong_double doesn't work.  On Windows* OS 
-		       on IA-32 architecture, we need to set precision to 
-		       64-bit instead of the default 53-bit. Even though long 
-		       double doesn't work on Windows* OS on Intel(R) 64, the 
-		       resulting lack of precision is not expected to impact 
-		       the correctness of the algorithm, but this has not been 
-		       mathematically proven. 
-                    */ 
-                    // save original FPCW and set precision to 64-bit, as 
-                    // Windows* OS on IA-32 architecture defaults to 53-bit 
-                    unsigned int oldFpcw = _control87(0,0); 
-                    _control87(_PC_64,_MCW_PC); // 0,0x30000 
-                    #endif 
-                    /* value used for comparison in solver for cross-over point */ 
-                    long double target = ((long double)chunk * 2 + 1) * nproc / tc; 
- 
-                    /* crossover point--chunk indexes equal to or greater than 
-		       this point switch to dynamic-style scheduling */ 
-                    UT   cross; 
- 
-                    /* commonly used term: (2 nproc - 1)/(2 nproc) */ 
-                    x = (long double)1.0 - (long double)0.5 / nproc; 
- 
-                    #ifdef KMP_DEBUG 
-                    { // test natural alignment 
-                        struct _test_a { 
-                            char a; 
-                            union { 
-                                char b; 
-                                DBL  d; 
-                            }; 
-                        } t; 
-                        ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; 
-                        //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long long)natural_alignment ); 
-                        KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 ); 
-                    } 
-                    #endif // KMP_DEBUG 
- 
-                    /* save the term in thread private dispatch structure */ 
-                    *(DBL*)&pr->u.p.parm3 = x; 
- 
-                    /* solve for the crossover point to the nearest integer i for which C_i <= chunk */ 
-                    { 
-                        UT          left, right, mid; 
-                        long double p; 
- 
-                        /* estimate initial upper and lower bound */ 
- 
-                        /* doesn't matter what value right is as long as it is positive, but 
-                           it affects performance of the solver 
-                        */ 
-                        right = 229; 
-                        p = __kmp_pow< UT >(x,right); 
-                        if ( p > target ) { 
-                            do{ 
-                                p *= p; 
-                                right <<= 1; 
-                            } while(p>target && right < (1<<27)); 
-                            left = right >> 1; /* lower bound is previous (failed) estimate of upper bound */ 
-                        } else { 
-                            left = 0; 
-                        } 
- 
-                        /* bisection root-finding method */ 
-                        while ( left + 1 < right ) { 
-                            mid = (left + right) / 2; 
-                            if ( __kmp_pow< UT >(x,mid) > target ) { 
-                                left = mid; 
-                            } else { 
-                                right = mid; 
-                            } 
-                        } // while 
-                        cross = right; 
-                    } 
-                    /* assert sanity of computed crossover point */ 
-                    KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target); 
- 
-                    /* save the crossover point in thread private dispatch structure */ 
-                    pr->u.p.parm2 = cross; 
- 
-                    // C75803 
-                    #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) ) 
-                        #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3) 
-                    #else 
-                        #define GUIDED_ANALYTICAL_WORKAROUND (x) 
-                    #endif 
-                    /* dynamic-style scheduling offset */ 
-                    pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk; 
-                    #if KMP_OS_WINDOWS && KMP_ARCH_X86 
-                        // restore FPCW 
-                        _control87(oldFpcw,_MCW_PC); 
-                    #endif 
-                } // if 
-            } else { 
-                KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n", 
-                               gtid ) ); 
-                schedule = kmp_sch_static_greedy; 
-                /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ 
-                pr->u.p.parm1 = tc; 
-            } // if 
-        } // case 
-        break; 
-    case kmp_sch_static_greedy: 
-        KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); 
-            pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ? 
-                ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc : 
-                tc; 
-        break; 
-    case kmp_sch_static_chunked : 
-    case kmp_sch_dynamic_chunked : 
-        if ( pr->u.p.parm1 <= 0 ) { 
-            pr->u.p.parm1 = KMP_DEFAULT_CHUNK; 
-        } 
-        KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid)); 
-        break; 
-    case kmp_sch_trapezoidal : 
-        { 
-            /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ 
- 
-            T parm1, parm2, parm3, parm4; 
-            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) ); 
- 
-            parm1 = chunk; 
- 
-            /* F : size of the first cycle */ 
-            parm2 = ( tc / (2 * team->t.t_nproc) ); 
- 
-            if ( parm2 < 1 ) { 
-                parm2 = 1; 
-            } 
- 
-            /* L : size of the last cycle.  Make sure the last cycle 
-             *     is not larger than the first cycle. 
-             */ 
-            if ( parm1 < 1 ) { 
-                parm1 = 1; 
-            } else if ( parm1 > parm2 ) { 
-                parm1 = parm2; 
-            } 
- 
-            /* N : number of cycles */ 
-            parm3 = ( parm2 + parm1 ); 
-            parm3 = ( 2 * tc + parm3 - 1) / parm3; 
- 
-            if ( parm3 < 2 ) { 
-                parm3 = 2; 
-            } 
- 
-            /* sigma : decreasing incr of the trapezoid */ 
-            parm4 = ( parm3 - 1 ); 
-            parm4 = ( parm2 - parm1 ) / parm4; 
- 
-            // pointless check, because parm4 >= 0 always 
-            //if ( parm4 < 0 ) { 
-            //    parm4 = 0; 
-            //} 
- 
-            pr->u.p.parm1 = parm1; 
-            pr->u.p.parm2 = parm2; 
-            pr->u.p.parm3 = parm3; 
-            pr->u.p.parm4 = parm4; 
-        } // case 
-        break; 
- 
-    default: 
-        { 
-            __kmp_msg( 
-                kmp_ms_fatal,                        // Severity 
-                KMP_MSG( UnknownSchedTypeDetected ), // Primary message 
-                KMP_HNT( GetNewerLibrary ),          // Hint 
-                __kmp_msg_null                       // Variadic argument list terminator 
-            ); 
-        } 
-        break; 
-    } // switch 
-    pr->schedule = schedule; 
-    if ( active ) { 
-        /* The name of this buffer should be my_buffer_index when it's free to use it */ 
- 
-        KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n", 
-                        gtid, my_buffer_index, sh->buffer_index) ); 
-        __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 > 
-                                        USE_ITT_BUILD_ARG( NULL ) 
-                                        ); 
-            // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and my_buffer_index are 
-            // *always* 32-bit integers. 
-        KMP_MB();  /* is this necessary? */ 
-        KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n", 
-                        gtid, my_buffer_index, sh->buffer_index) ); 
- 
-        th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr; 
-        th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*)  sh; 
-#if USE_ITT_BUILD 
-        if ( pr->ordered ) { 
-            __kmp_itt_ordered_init( gtid ); 
-        }; // if 
-        // Report loop metadata 
-        if ( itt_need_metadata_reporting ) { 
-            // Only report metadata by master of active team at level 1 
-            kmp_uint64 schedtype = 0; 
-            switch ( schedule ) { 
-            case kmp_sch_static_chunked: 
-            case kmp_sch_static_balanced:// Chunk is calculated in the switch above 
-                break; 
-            case kmp_sch_static_greedy: 
-                cur_chunk = pr->u.p.parm1; 
-                break; 
-            case kmp_sch_dynamic_chunked: 
-                schedtype = 1; 
-                break; 
-            case kmp_sch_guided_iterative_chunked: 
-            case kmp_sch_guided_analytical_chunked: 
-                schedtype = 2; 
-                break; 
-            default: 
-//            Should we put this case under "static"? 
-//            case kmp_sch_static_steal: 
-                schedtype = 3; 
-                break; 
-            } 
-            __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk); 
-        } 
-#endif /* USE_ITT_BUILD */ 
-    }; // if 
- 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \ 
-            " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \ 
-            " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", 
-            traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec, 
-            traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec, 
-            traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec, 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec ); 
-        KD_TRACE(10, ( buff, 
-            gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub, 
-            pr->u.p.st, pr->u.p.tc, pr->u.p.count, 
-            pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1, 
-            pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
-    #if ( KMP_STATIC_STEAL_ENABLED ) 
-    if ( ___kmp_size_type < 8 ) { 
-      // It cannot be guaranteed that after execution of a loop with some other schedule kind 
-      // all the parm3 variables will contain the same value. 
-      // Even if all parm3 will be the same, it still exists a bad case like using 0 and 1 
-      // rather than program life-time increment. 
-      // So the dedicated variable is required. The 'static_steal_counter' is used. 
-      if( schedule == kmp_sch_static_steal ) { 
-        // Other threads will inspect this variable when searching for a victim. 
-        // This is a flag showing that other threads may steal from this thread since then. 
-        volatile T * p = &pr->u.p.static_steal_counter; 
-        *p = *p + 1; 
-      } 
-    } 
-    #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING ) 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 
-        ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 
-        ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 
-        ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 
-            team_info->parallel_id, task_info->task_id, team_info->microtask); 
-    } 
-#endif 
-} 
- 
-/* 
- * For ordered loops, either __kmp_dispatch_finish() should be called after 
- * every iteration, or __kmp_dispatch_finish_chunk() should be called after 
- * every chunk of iterations.  If the ordered section(s) were not executed 
- * for this iteration (or every iteration in this chunk), we need to set the 
- * ordered iteration counters so that the next thread can proceed. 
- */ 
-template< typename UT > 
-static void 
-__kmp_dispatch_finish( int gtid, ident_t *loc ) 
-{ 
-    typedef typename traits_t< UT >::signed_t ST; 
-    kmp_info_t *th = __kmp_threads[ gtid ]; 
- 
-    KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid ) ); 
-    if ( ! th -> th.th_team -> t.t_serialized ) { 
- 
-        dispatch_private_info_template< UT > * pr = 
-            reinterpret_cast< dispatch_private_info_template< UT >* > 
-            ( th->th.th_dispatch->th_dispatch_pr_current ); 
-        dispatch_shared_info_template< UT > volatile * sh = 
-            reinterpret_cast< dispatch_shared_info_template< UT >volatile* > 
-            ( th->th.th_dispatch->th_dispatch_sh_current ); 
-        KMP_DEBUG_ASSERT( pr ); 
-        KMP_DEBUG_ASSERT( sh ); 
-        KMP_DEBUG_ASSERT( th->th.th_dispatch == 
-                 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); 
- 
-        if ( pr->ordered_bumped ) { 
-            KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", 
-                            gtid ) ); 
-            pr->ordered_bumped = 0; 
-        } else { 
-            UT lower = pr->u.p.ordered_lower; 
- 
-            #ifdef KMP_DEBUG 
-            { 
-                const char * buff; 
-                // create format specifiers before the debug output 
-                buff = __kmp_str_format( 
-                    "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n", 
-                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); 
-                __kmp_str_free( &buff ); 
-            } 
-            #endif 
- 
-            __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > 
-                                   USE_ITT_BUILD_ARG(NULL) 
-                                   ); 
-            KMP_MB();  /* is this necessary? */ 
-            #ifdef KMP_DEBUG 
-            { 
-                const char * buff; 
-                // create format specifiers before the debug output 
-                buff = __kmp_str_format( 
-                    "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n", 
-                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); 
-                __kmp_str_free( &buff ); 
-            } 
-            #endif 
- 
-            test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); 
-        } // if 
-    } // if 
-    KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid ) ); 
-} 
- 
-#ifdef KMP_GOMP_COMPAT 
- 
-template< typename UT > 
-static void 
-__kmp_dispatch_finish_chunk( int gtid, ident_t *loc ) 
-{ 
-    typedef typename traits_t< UT >::signed_t ST; 
-    kmp_info_t *th = __kmp_threads[ gtid ]; 
- 
-    KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) ); 
-    if ( ! th -> th.th_team -> t.t_serialized ) { 
-//        int cid; 
-        dispatch_private_info_template< UT > * pr = 
-            reinterpret_cast< dispatch_private_info_template< UT >* > 
-            ( th->th.th_dispatch->th_dispatch_pr_current ); 
-        dispatch_shared_info_template< UT > volatile * sh = 
-            reinterpret_cast< dispatch_shared_info_template< UT >volatile* > 
-            ( th->th.th_dispatch->th_dispatch_sh_current ); 
-        KMP_DEBUG_ASSERT( pr ); 
-        KMP_DEBUG_ASSERT( sh ); 
-        KMP_DEBUG_ASSERT( th->th.th_dispatch == 
-                 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); 
- 
-//        for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { 
-            UT lower = pr->u.p.ordered_lower; 
-            UT upper = pr->u.p.ordered_upper; 
-            UT inc = upper - lower + 1; 
- 
-            if ( pr->ordered_bumped == inc ) { 
-                KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", 
-                  gtid ) ); 
-                pr->ordered_bumped = 0; 
-            } else { 
-                inc -= pr->ordered_bumped; 
- 
-                #ifdef KMP_DEBUG 
-                { 
-                    const char * buff; 
-                    // create format specifiers before the debug output 
-                    buff = __kmp_str_format( 
-                        "__kmp_dispatch_finish_chunk: T#%%d before wait: " \ 
-                        "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", 
-                        traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); 
-                    KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) ); 
-                    __kmp_str_free( &buff ); 
-                } 
-                #endif 
- 
-                __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > 
-                                       USE_ITT_BUILD_ARG(NULL) 
-                                       ); 
- 
-                KMP_MB();  /* is this necessary? */ 
-                KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n", 
-                  gtid ) ); 
-                pr->ordered_bumped = 0; 
-//!!!!! TODO check if the inc should be unsigned, or signed??? 
-                #ifdef KMP_DEBUG 
-                { 
-                    const char * buff; 
-                    // create format specifiers before the debug output 
-                    buff = __kmp_str_format( 
-                        "__kmp_dispatch_finish_chunk: T#%%d after wait: " \ 
-                        "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", 
-                        traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); 
-                    KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) ); 
-                    __kmp_str_free( &buff ); 
-                } 
-                #endif 
- 
-                test_then_add< ST >( (volatile ST *) & sh->u.s.ordered_iteration, inc); 
-            } 
-//        } 
-    } 
-    KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) ); 
-} 
- 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 
- * (no more work), then tell OMPT the loop is over. In some cases 
- * kmp_dispatch_fini() is not called. */ 
-#if OMPT_SUPPORT && OMPT_TRACE 
-#define OMPT_LOOP_END                                                          \ 
-    if (status == 0) {                                                         \ 
-        if (ompt_enabled &&                     \ 
-            ompt_callbacks.ompt_callback(ompt_event_loop_end)) {               \ 
-            ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);        \ 
-            ompt_task_info_t *task_info = __ompt_get_taskinfo(0);              \ 
-            ompt_callbacks.ompt_callback(ompt_event_loop_end)(                 \ 
-                team_info->parallel_id, task_info->task_id);                   \ 
-        }                                                                      \ 
-    } 
-#else 
-#define OMPT_LOOP_END // no-op 
-#endif 
- 
-template< typename T > 
-static int 
-__kmp_dispatch_next( 
-    ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st 
-) { 
- 
-    typedef typename traits_t< T >::unsigned_t  UT; 
-    typedef typename traits_t< T >::signed_t    ST; 
-    typedef typename traits_t< T >::floating_t  DBL; 
-#if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 
-    static const int ___kmp_size_type = sizeof( UT ); 
-#endif 
- 
-    // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule 
-    // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs 
-    // more than a compile time choice to use static scheduling would.) 
-    KMP_TIME_BLOCK(FOR_dynamic_scheduling); 
- 
-    int                                   status; 
-    dispatch_private_info_template< T > * pr; 
-    kmp_info_t                          * th   = __kmp_threads[ gtid ]; 
-    kmp_team_t                          * team = th -> th.th_team; 
- 
-    KMP_DEBUG_ASSERT( p_lb && p_ub && p_st ); // AC: these cannot be NULL 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 
-        KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
- 
-    if ( team -> t.t_serialized ) { 
-        /* NOTE: serialize this dispatch becase we are not at the active level */ 
-        pr = reinterpret_cast< dispatch_private_info_template< T >* > 
-            ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ 
-        KMP_DEBUG_ASSERT( pr ); 
- 
-        if ( (status = (pr->u.p.tc != 0)) == 0 ) { 
-            *p_lb = 0; 
-            *p_ub = 0; 
-//            if ( p_last != NULL ) 
-//                *p_last = 0; 
-            if ( p_st != NULL ) 
-                *p_st = 0; 
-            if ( __kmp_env_consistency_check ) { 
-                if ( pr->pushed_ws != ct_none ) { 
-                    pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); 
-                } 
-            } 
-        } else if ( pr->nomerge ) { 
-            kmp_int32 last; 
-            T         start; 
-            UT        limit, trip, init; 
-            ST        incr; 
-            T         chunk = pr->u.p.parm1; 
- 
-            KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) ); 
- 
-            init = chunk * pr->u.p.count++; 
-            trip = pr->u.p.tc - 1; 
- 
-            if ( (status = (init <= trip)) == 0 ) { 
-                *p_lb = 0; 
-                *p_ub = 0; 
-//                if ( p_last != NULL ) 
-//                    *p_last = 0; 
-                if ( p_st != NULL ) 
-                    *p_st = 0; 
-                if ( __kmp_env_consistency_check ) { 
-                    if ( pr->pushed_ws != ct_none ) { 
-                        pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); 
-                    } 
-                } 
-            } else { 
-                start = pr->u.p.lb; 
-                limit = chunk + init - 1; 
-                incr  = pr->u.p.st; 
- 
-                if ( (last = (limit >= trip)) != 0 ) { 
-                    limit = trip; 
-                    #if KMP_OS_WINDOWS 
-                    pr->u.p.last_upper = pr->u.p.ub; 
-                    #endif /* KMP_OS_WINDOWS */ 
-                } 
-                if ( p_last != NULL ) 
-                    *p_last = last; 
-                if ( p_st != NULL ) 
-                    *p_st = incr; 
-                if ( incr == 1 ) { 
-                    *p_lb = start + init; 
-                    *p_ub = start + limit; 
-                } else { 
-                    *p_lb = start + init * incr; 
-                    *p_ub = start + limit * incr; 
-                } 
- 
-                if ( pr->ordered ) { 
-                    pr->u.p.ordered_lower = init; 
-                    pr->u.p.ordered_upper = limit; 
-                    #ifdef KMP_DEBUG 
-                    { 
-                        const char * buff; 
-                        // create format specifiers before the debug output 
-                        buff = __kmp_str_format( 
-                            "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                            traits_t< UT >::spec, traits_t< UT >::spec ); 
-                        KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                        __kmp_str_free( &buff ); 
-                    } 
-                    #endif 
-                } // if 
-            } // if 
-        } else { 
-            pr->u.p.tc = 0; 
-            *p_lb = pr->u.p.lb; 
-            *p_ub = pr->u.p.ub; 
-            #if KMP_OS_WINDOWS 
-            pr->u.p.last_upper = *p_ub; 
-            #endif /* KMP_OS_WINDOWS */ 
-            if ( p_last != NULL ) 
-                *p_last = TRUE; 
-            if ( p_st != NULL ) 
-                *p_st = pr->u.p.st; 
-        } // if 
-        #ifdef KMP_DEBUG 
-        { 
-            const char * buff; 
-            // create format specifiers before the debug output 
-            buff = __kmp_str_format( 
-                "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \ 
-                "p_ub:%%%s p_st:%%%s p_last:%%p %%d  returning:%%d\n", 
-                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 
-            KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) ); 
-            __kmp_str_free( &buff ); 
-        } 
-        #endif 
-#if INCLUDE_SSC_MARKS 
-        SSC_MARK_DISPATCH_NEXT(); 
-#endif 
-        OMPT_LOOP_END; 
-        return status; 
-    } else { 
-        kmp_int32 last = 0; 
-        dispatch_shared_info_template< UT > *sh; 
-        T         start; 
-        ST        incr; 
-        UT        limit, trip, init; 
- 
-        KMP_DEBUG_ASSERT( th->th.th_dispatch == 
-                &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); 
- 
-        pr = reinterpret_cast< dispatch_private_info_template< T >* > 
-            ( th->th.th_dispatch->th_dispatch_pr_current ); 
-        KMP_DEBUG_ASSERT( pr ); 
-        sh = reinterpret_cast< dispatch_shared_info_template< UT >* > 
-            ( th->th.th_dispatch->th_dispatch_sh_current ); 
-        KMP_DEBUG_ASSERT( sh ); 
- 
-        if ( pr->u.p.tc == 0 ) { 
-            // zero trip count 
-            status = 0; 
-        } else { 
-            switch (pr->schedule) { 
-            #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 
-            case kmp_sch_static_steal: 
-                { 
-                    T chunk = pr->u.p.parm1; 
- 
-                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) ); 
- 
-                    trip = pr->u.p.tc - 1; 
- 
-                    if ( ___kmp_size_type > 4 ) { 
-                        // Other threads do not look into the data of this thread, 
-                        //  so it's not necessary to make volatile casting. 
-                        init   = ( pr->u.p.count )++; 
-                        status = ( init < (UT)pr->u.p.ub ); 
-                    } else { 
-                        typedef union { 
-                            struct { 
-                                UT count; 
-                                T  ub; 
-                            } p; 
-                            kmp_int64 b; 
-                        } union_i4; 
-                        // All operations on 'count' or 'ub' must be combined atomically together. 
-                        // stealing implemented only for 4-byte indexes 
-                        { 
-                            union_i4 vold, vnew; 
-                            vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); 
-                            vnew = vold; 
-                            vnew.p.count++; 
-                            while( ! KMP_COMPARE_AND_STORE_ACQ64( 
-                                        ( volatile kmp_int64* )&pr->u.p.count, 
-                                        *VOLATILE_CAST(kmp_int64 *)&vold.b, 
-                                        *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { 
-                                KMP_CPU_PAUSE(); 
-                                vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); 
-                                vnew = vold; 
-                                vnew.p.count++; 
-                            } 
-                            vnew = vold; 
-                            init   = vnew.p.count; 
-                            status = ( init < (UT)vnew.p.ub ) ; 
-                        } 
- 
-                        if( !status ) { 
-                            kmp_info_t   **other_threads = team->t.t_threads; 
-                            int          while_limit = 10; 
-                            int          while_index = 0; 
- 
-                            // TODO: algorithm of searching for a victim 
-                            // should be cleaned up and measured 
-                            while ( ( !status ) && ( while_limit != ++while_index ) ) { 
-                                union_i4  vold, vnew; 
-                                kmp_int32 remaining; // kmp_int32 because KMP_I4 only 
-                                T         victimIdx    = pr->u.p.parm4; 
-                                T         oldVictimIdx = victimIdx; 
-                                dispatch_private_info_template< T > * victim; 
- 
-                                do { 
-                                    if( !victimIdx ) { 
-                                        victimIdx = team->t.t_nproc - 1; 
-                                    } else { 
-                                        --victimIdx; 
-                                    } 
-                                    victim = reinterpret_cast< dispatch_private_info_template< T >* > 
-                                        ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current ); 
-                                } while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx ); 
-                                // TODO: think about a proper place of this test 
-                                if ( ( !victim ) || 
-                                   ( (*( volatile T * )&victim->u.p.static_steal_counter) != 
-                                     (*( volatile T * )&pr->u.p.static_steal_counter) ) ) { 
-                                    // TODO: delay would be nice 
-                                    continue; 
-                                    // the victim is not ready yet to participate in stealing 
-                                    // because the victim is still in kmp_init_dispatch 
-                                } 
-                                if ( oldVictimIdx == victimIdx ) { 
-                                    break; 
-                                } 
-                                pr->u.p.parm4 = victimIdx; 
- 
-                                while( 1 ) { 
-                                    vold.b = *( volatile kmp_int64 * )( &victim->u.p.count ); 
-                                    vnew = vold; 
- 
-                                    KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip ); 
-                                    if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) { 
-                                        break; 
-                                    } 
-                                    vnew.p.ub -= (remaining >> 2); 
-                                    KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); 
-                                    #pragma warning( push ) 
-                                    // disable warning on pointless comparison of unsigned with 0 
-                                    #pragma warning( disable: 186 ) 
-                                        KMP_DEBUG_ASSERT(vnew.p.ub >= 0); 
-                                    #pragma warning( pop ) 
-                                    // TODO: Should this be acquire or release? 
-                                    if ( KMP_COMPARE_AND_STORE_ACQ64( 
-                                            ( volatile kmp_int64 * )&victim->u.p.count, 
-                                            *VOLATILE_CAST(kmp_int64 *)&vold.b, 
-                                            *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { 
-                                        status = 1; 
-                                        while_index = 0; 
-                                        // now update own count and ub 
-                                        #if KMP_ARCH_X86 
-                                        // stealing executed on non-KMP_ARCH_X86 only 
-                                            // Atomic 64-bit write on ia32 is 
-                                            // unavailable, so we do this in steps. 
-                                            //     This code is not tested. 
-                                            init = vold.p.count; 
-                                            pr->u.p.ub = 0; 
-                                            pr->u.p.count = init + 1; 
-                                            pr->u.p.ub = vnew.p.count; 
-                                        #else 
-                                            init = vnew.p.ub; 
-                                            vold.p.count = init + 1; 
-                                            // TODO: is it safe and enough? 
-                                            *( volatile kmp_int64 * )(&pr->u.p.count) = vold.b; 
-                                        #endif // KMP_ARCH_X86 
-                                        break; 
-                                    } // if 
-                                KMP_CPU_PAUSE(); 
-                                } // while (1) 
-                            } // while 
-                        } // if 
-                    } // if 
-                    if ( !status ) { 
-                        *p_lb = 0; 
-                        *p_ub = 0; 
-                        if ( p_st != NULL ) *p_st = 0; 
-                    } else { 
-                        start = pr->u.p.parm2; 
-                        init *= chunk; 
-                        limit = chunk + init - 1; 
-                        incr  = pr->u.p.st; 
- 
-                        KMP_DEBUG_ASSERT(init <= trip); 
-                        if ( (last = (limit >= trip)) != 0 ) 
-                            limit = trip; 
-                        if ( p_st != NULL ) *p_st = incr; 
- 
-                        if ( incr == 1 ) { 
-                            *p_lb = start + init; 
-                            *p_ub = start + limit; 
-                        } else { 
-                            *p_lb = start + init * incr; 
-                            *p_ub = start + limit * incr; 
-                        } 
- 
-                        if ( pr->ordered ) { 
-                            pr->u.p.ordered_lower = init; 
-                            pr->u.p.ordered_upper = limit; 
-                            #ifdef KMP_DEBUG 
-                            { 
-                                const char * buff; 
-                                // create format specifiers before the debug output 
-                                buff = __kmp_str_format( 
-                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                                __kmp_str_free( &buff ); 
-                            } 
-                            #endif 
-                        } // if 
-                    } // if 
-                    break; 
-                } // case 
-            #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 
-            case kmp_sch_static_balanced: 
-                { 
-                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) ); 
-                    if ( (status = !pr->u.p.count) != 0 ) {  /* check if thread has any iteration to do */ 
-                        pr->u.p.count = 1; 
-                        *p_lb = pr->u.p.lb; 
-                        *p_ub = pr->u.p.ub; 
-                        last = pr->u.p.parm1; 
-                        if ( p_st != NULL ) 
-                            *p_st = pr->u.p.st; 
-                    } else {  /* no iterations to do */ 
-                        pr->u.p.lb = pr->u.p.ub + pr->u.p.st; 
-                    } 
-                    if ( pr->ordered ) { 
-                        #ifdef KMP_DEBUG 
-                        { 
-                            const char * buff; 
-                            // create format specifiers before the debug output 
-                            buff = __kmp_str_format( 
-                                "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                traits_t< UT >::spec, traits_t< UT >::spec ); 
-                            KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                            __kmp_str_free( &buff ); 
-                        } 
-                        #endif 
-                    } // if 
-                } // case 
-                break; 
-            case kmp_sch_static_greedy:  /* original code for kmp_sch_static_greedy was merged here */ 
-            case kmp_sch_static_chunked: 
-                { 
-                    T parm1; 
- 
-                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n", 
-                                   gtid ) ); 
-                    parm1 = pr->u.p.parm1; 
- 
-                    trip  = pr->u.p.tc - 1; 
-                    init  = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid)); 
- 
-                    if ( (status = (init <= trip)) != 0 ) { 
-                        start = pr->u.p.lb; 
-                        incr  = pr->u.p.st; 
-                        limit = parm1 + init - 1; 
- 
-                        if ( (last = (limit >= trip)) != 0 ) 
-                            limit = trip; 
- 
-                        if ( p_st != NULL ) *p_st = incr; 
- 
-                        pr->u.p.count += team->t.t_nproc; 
- 
-                        if ( incr == 1 ) { 
-                            *p_lb = start + init; 
-                            *p_ub = start + limit; 
-                        } 
-                        else { 
-                            *p_lb = start + init * incr; 
-                            *p_ub = start + limit * incr; 
-                        } 
- 
-                        if ( pr->ordered ) { 
-                            pr->u.p.ordered_lower = init; 
-                            pr->u.p.ordered_upper = limit; 
-                            #ifdef KMP_DEBUG 
-                            { 
-                                const char * buff; 
-                                // create format specifiers before the debug output 
-                                buff = __kmp_str_format( 
-                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                                __kmp_str_free( &buff ); 
-                            } 
-                            #endif 
-                        } // if 
-                    } // if 
-                } // case 
-                break; 
- 
-            case kmp_sch_dynamic_chunked: 
-                { 
-                    T chunk = pr->u.p.parm1; 
- 
-                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", 
-                                   gtid ) ); 
- 
-                    init = chunk * test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); 
-                    trip = pr->u.p.tc - 1; 
- 
-                    if ( (status = (init <= trip)) == 0 ) { 
-                        *p_lb = 0; 
-                        *p_ub = 0; 
-                        if ( p_st != NULL ) *p_st = 0; 
-                    } else { 
-                        start = pr->u.p.lb; 
-                        limit = chunk + init - 1; 
-                        incr  = pr->u.p.st; 
- 
-                        if ( (last = (limit >= trip)) != 0 ) 
-                            limit = trip; 
- 
-                        if ( p_st != NULL ) *p_st = incr; 
- 
-                        if ( incr == 1 ) { 
-                            *p_lb = start + init; 
-                            *p_ub = start + limit; 
-                        } else { 
-                            *p_lb = start + init * incr; 
-                            *p_ub = start + limit * incr; 
-                        } 
- 
-                        if ( pr->ordered ) { 
-                            pr->u.p.ordered_lower = init; 
-                            pr->u.p.ordered_upper = limit; 
-                            #ifdef KMP_DEBUG 
-                            { 
-                                const char * buff; 
-                                // create format specifiers before the debug output 
-                                buff = __kmp_str_format( 
-                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                                __kmp_str_free( &buff ); 
-                            } 
-                            #endif 
-                        } // if 
-                    } // if 
-                } // case 
-                break; 
- 
-            case kmp_sch_guided_iterative_chunked: 
-                { 
-                    T  chunkspec = pr->u.p.parm1; 
-                    KD_TRACE(100, 
-                        ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid)); 
-                    trip  = pr->u.p.tc; 
-                    // Start atomic part of calculations 
-                    while(1) { 
-                        ST  remaining;             // signed, because can be < 0 
-                        init = sh->u.s.iteration;  // shared value 
-                        remaining = trip - init; 
-                        if ( remaining <= 0 ) {    // AC: need to compare with 0 first 
-                            // nothing to do, don't try atomic op 
-                            status = 0; 
-                            break; 
-                        } 
-                        if ( (T)remaining < pr->u.p.parm2 ) { // compare with K*nproc*(chunk+1), K=2 by default 
-                            // use dynamic-style shcedule 
-                            // atomically inrement iterations, get old value 
-                            init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec ); 
-                            remaining = trip - init; 
-                            if (remaining <= 0) { 
-                                status = 0;    // all iterations got by other threads 
-                            } else { 
-                                // got some iterations to work on 
-                                status = 1; 
-                                if ( (T)remaining > chunkspec ) { 
-                                    limit = init + chunkspec - 1; 
-                                } else { 
-                                    last = 1;   // the last chunk 
-                                    limit = init + remaining - 1; 
-                                } // if 
-                            } // if 
-                            break; 
-                        } // if 
-                        limit = init + (UT)( remaining * *(double*)&pr->u.p.parm3 ); // divide by K*nproc 
-                        if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) { 
-                            // CAS was successful, chunk obtained 
-                            status = 1; 
-                            --limit; 
-                            break; 
-                        } // if 
-                    } // while 
-                    if ( status != 0 ) { 
-                        start = pr->u.p.lb; 
-                        incr = pr->u.p.st; 
-                        if ( p_st != NULL ) 
-                            *p_st = incr; 
-                        *p_lb = start + init * incr; 
-                        *p_ub = start + limit * incr; 
-                        if ( pr->ordered ) { 
-                            pr->u.p.ordered_lower = init; 
-                            pr->u.p.ordered_upper = limit; 
-                            #ifdef KMP_DEBUG 
-                            { 
-                                const char * buff; 
-                                // create format specifiers before the debug output 
-                                buff = __kmp_str_format( 
-                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                                __kmp_str_free( &buff ); 
-                            } 
-                            #endif 
-                        } // if 
-                    } else { 
-                        *p_lb = 0; 
-                        *p_ub = 0; 
-                        if ( p_st != NULL ) 
-                            *p_st = 0; 
-                    } // if 
-                } // case 
-                break; 
- 
-            case kmp_sch_guided_analytical_chunked: 
-                { 
-                    T   chunkspec = pr->u.p.parm1; 
-                    UT chunkIdx; 
-    #if KMP_OS_WINDOWS && KMP_ARCH_X86 
-                    /* for storing original FPCW value for Windows* OS on 
-		       IA-32 architecture 8-byte version */ 
-                    unsigned int oldFpcw; 
-                    unsigned int fpcwSet = 0; 
-    #endif 
-                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n", 
-                                   gtid ) ); 
- 
-                    trip  = pr->u.p.tc; 
- 
-                    KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 
-                    KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip); 
- 
-                    while(1) { /* this while loop is a safeguard against unexpected zero chunk sizes */ 
-                        chunkIdx = test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); 
-                        if ( chunkIdx >= (UT)pr->u.p.parm2 ) { 
-                            --trip; 
-                            /* use dynamic-style scheduling */ 
-                            init = chunkIdx * chunkspec + pr->u.p.count; 
-                            /* need to verify init > 0 in case of overflow in the above calculation */ 
-                            if ( (status = (init > 0 && init <= trip)) != 0 ) { 
-                                limit = init + chunkspec -1; 
- 
-                                if ( (last = (limit >= trip)) != 0 ) 
-                                    limit = trip; 
-                            } 
-                            break; 
-                        } else { 
-                            /* use exponential-style scheduling */ 
-                            /* The following check is to workaround the lack of long double precision on Windows* OS. 
-                               This check works around the possible effect that init != 0 for chunkIdx == 0. 
-                             */ 
-    #if KMP_OS_WINDOWS && KMP_ARCH_X86 
-                            /* If we haven't already done so, save original 
-			       FPCW and set precision to 64-bit, as Windows* OS 
-			       on IA-32 architecture defaults to 53-bit */ 
-                            if ( !fpcwSet ) { 
-                                oldFpcw = _control87(0,0); 
-                                _control87(_PC_64,_MCW_PC); 
-                                fpcwSet = 0x30000; 
-                            } 
-    #endif 
-                            if ( chunkIdx ) { 
-                                init = __kmp_dispatch_guided_remaining< T >( 
-                                           trip, *( DBL * )&pr->u.p.parm3, chunkIdx ); 
-                                KMP_DEBUG_ASSERT(init); 
-                                init = trip - init; 
-                            } else 
-                                init = 0; 
-                            limit = trip - __kmp_dispatch_guided_remaining< T >( 
-                                               trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 ); 
-                            KMP_ASSERT(init <= limit); 
-                            if ( init < limit ) { 
-                                KMP_DEBUG_ASSERT(limit <= trip); 
-                                --limit; 
-                                status = 1; 
-                                break; 
-                            } // if 
-                        } // if 
-                    } // while (1) 
-    #if KMP_OS_WINDOWS && KMP_ARCH_X86 
-                    /* restore FPCW if necessary 
-                       AC: check fpcwSet flag first because oldFpcw can be uninitialized here 
-                    */ 
-                    if ( fpcwSet && ( oldFpcw & fpcwSet ) ) 
-                        _control87(oldFpcw,_MCW_PC); 
-    #endif 
-                    if ( status != 0 ) { 
-                        start = pr->u.p.lb; 
-                        incr = pr->u.p.st; 
-                        if ( p_st != NULL ) 
-                            *p_st = incr; 
-                        *p_lb = start + init * incr; 
-                        *p_ub = start + limit * incr; 
-                        if ( pr->ordered ) { 
-                            pr->u.p.ordered_lower = init; 
-                            pr->u.p.ordered_upper = limit; 
-                            #ifdef KMP_DEBUG 
-                            { 
-                                const char * buff; 
-                                // create format specifiers before the debug output 
-                                buff = __kmp_str_format( 
-                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                                __kmp_str_free( &buff ); 
-                            } 
-                            #endif 
-                        } 
-                    } else { 
-                        *p_lb = 0; 
-                        *p_ub = 0; 
-                        if ( p_st != NULL ) 
-                            *p_st = 0; 
-                    } 
-                } // case 
-                break; 
- 
-            case kmp_sch_trapezoidal: 
-                { 
-                    UT   index; 
-                    T    parm2 = pr->u.p.parm2; 
-                    T    parm3 = pr->u.p.parm3; 
-                    T    parm4 = pr->u.p.parm4; 
-                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n", 
-                                   gtid ) ); 
- 
-                    index = test_then_inc< ST >( (volatile ST *) & sh->u.s.iteration ); 
- 
-                    init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2; 
-                    trip = pr->u.p.tc - 1; 
- 
-                    if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) { 
-                        *p_lb = 0; 
-                        *p_ub = 0; 
-                        if ( p_st != NULL ) *p_st = 0; 
-                    } else { 
-                        start = pr->u.p.lb; 
-                        limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1; 
-                        incr  = pr->u.p.st; 
- 
-                        if ( (last = (limit >= trip)) != 0 ) 
-                            limit = trip; 
- 
-                        if ( p_st != NULL ) *p_st = incr; 
- 
-                        if ( incr == 1 ) { 
-                            *p_lb = start + init; 
-                            *p_ub = start + limit; 
-                        } else { 
-                            *p_lb = start + init * incr; 
-                            *p_ub = start + limit * incr; 
-                        } 
- 
-                        if ( pr->ordered ) { 
-                            pr->u.p.ordered_lower = init; 
-                            pr->u.p.ordered_upper = limit; 
-                            #ifdef KMP_DEBUG 
-                            { 
-                                const char * buff; 
-                                // create format specifiers before the debug output 
-                                buff = __kmp_str_format( 
-                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", 
-                                    traits_t< UT >::spec, traits_t< UT >::spec ); 
-                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); 
-                                __kmp_str_free( &buff ); 
-                            } 
-                            #endif 
-                        } // if 
-                    } // if 
-                } // case 
-                break; 
-            default: 
-                { 
-                    status = 0; // to avoid complaints on uninitialized variable use 
-                    __kmp_msg( 
-                        kmp_ms_fatal,                        // Severity 
-                        KMP_MSG( UnknownSchedTypeDetected ), // Primary message 
-                        KMP_HNT( GetNewerLibrary ),          // Hint 
-                        __kmp_msg_null                       // Variadic argument list terminator 
-                    ); 
-                } 
-                break; 
-            } // switch 
-        } // if tc == 0; 
- 
-        if ( status == 0 ) { 
-            UT   num_done; 
- 
-            num_done = test_then_inc< ST >( (volatile ST *) & sh->u.s.num_done ); 
-            #ifdef KMP_DEBUG 
-            { 
-                const char * buff; 
-                // create format specifiers before the debug output 
-                buff = __kmp_str_format( 
-                    "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", 
-                    traits_t< UT >::spec ); 
-                KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) ); 
-                __kmp_str_free( &buff ); 
-            } 
-            #endif 
- 
-            if ( (ST)num_done == team->t.t_nproc-1 ) { 
-                /* NOTE: release this buffer to be reused */ 
- 
-                KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-                sh->u.s.num_done = 0; 
-                sh->u.s.iteration = 0; 
- 
-                /* TODO replace with general release procedure? */ 
-                if ( pr->ordered ) { 
-                    sh->u.s.ordered_iteration = 0; 
-                } 
- 
-                KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-                sh -> buffer_index += KMP_MAX_DISP_BUF; 
-                KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", 
-                                gtid, sh->buffer_index) ); 
- 
-                KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-            } // if 
-            if ( __kmp_env_consistency_check ) { 
-                if ( pr->pushed_ws != ct_none ) { 
-                    pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); 
-                } 
-            } 
- 
-            th -> th.th_dispatch -> th_deo_fcn = NULL; 
-            th -> th.th_dispatch -> th_dxo_fcn = NULL; 
-            th -> th.th_dispatch -> th_dispatch_sh_current = NULL; 
-            th -> th.th_dispatch -> th_dispatch_pr_current = NULL; 
-        } // if (status == 0) 
-#if KMP_OS_WINDOWS 
-        else if ( last ) { 
-            pr->u.p.last_upper = pr->u.p.ub; 
-        } 
-#endif /* KMP_OS_WINDOWS */ 
-        if ( p_last != NULL && status != 0 ) 
-            *p_last = last; 
-    } // if 
- 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmp_dispatch_next: T#%%d normal case: " \ 
-            "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p  returning:%%d\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 
-        KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_DISPATCH_NEXT(); 
-#endif 
-    OMPT_LOOP_END; 
-    return status; 
-} 
- 
-template< typename T > 
-static void 
-__kmp_dist_get_bounds( 
-    ident_t                          *loc, 
-    kmp_int32                         gtid, 
-    kmp_int32                        *plastiter, 
-    T                                *plower, 
-    T                                *pupper, 
-    typename traits_t< T >::signed_t  incr 
-) { 
-    typedef typename traits_t< T >::unsigned_t  UT; 
-    typedef typename traits_t< T >::signed_t    ST; 
+
+    KMP_FSYNC_SPIN_INIT( obj, (void*) spin );
+    KMP_INIT_YIELD( spins );
+    // main wait spin loop
+    while(!f(r = *spin, check))
+    {
+        KMP_FSYNC_SPIN_PREPARE( obj );
+        /* GEH - remove this since it was accidentally introduced when kmp_wait was split.
+           It causes problems with infinite recursion because of exit lock */
+        /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
+            __kmp_abort_thread(); */
+
+        // if we are oversubscribed,
+        // or have waited a bit (and KMP_LIBRARY=throughput, then yield
+        // pause is in the following code
+        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
+        KMP_YIELD_SPIN( spins );
+    }
+    KMP_FSYNC_SPIN_ACQUIRED( obj );
+    return r;
+}
+
+template< typename UT >
+static kmp_uint32 __kmp_eq( UT value, UT checker) {
+    return value == checker;
+}
+
+template< typename UT >
+static kmp_uint32 __kmp_neq( UT value, UT checker) {
+    return value != checker;
+}
+
+template< typename UT >
+static kmp_uint32 __kmp_lt( UT value, UT checker) {
+    return value < checker;
+}
+
+template< typename UT >
+static kmp_uint32 __kmp_ge( UT value, UT checker) {
+    return value >= checker;
+}
+
+template< typename UT >
+static kmp_uint32 __kmp_le( UT value, UT checker) {
+    return value <= checker;
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+static void
+__kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    kmp_info_t *th;
+
+    KMP_DEBUG_ASSERT( gtid_ref );
+
+    if ( __kmp_env_consistency_check ) {
+        th = __kmp_threads[*gtid_ref];
+        if ( th -> th.th_root -> r.r_active
+          && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
+#if KMP_USE_DYNAMIC_LOCK
+            __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
+#else
+            __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
+#endif
+        }
+    }
+}
+
+template< typename UT >
+static void
+__kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    typedef typename traits_t< UT >::signed_t    ST;
+    dispatch_private_info_template< UT > * pr;
+
+    int gtid = *gtid_ref;
+//    int  cid = *cid_ref;
+    kmp_info_t *th = __kmp_threads[ gtid ];
+    KMP_DEBUG_ASSERT( th -> th.th_dispatch );
+
+    KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid ) );
+    if ( __kmp_env_consistency_check ) {
+        pr = reinterpret_cast< dispatch_private_info_template< UT >* >
+            ( th -> th.th_dispatch -> th_dispatch_pr_current );
+        if ( pr -> pushed_ws != ct_none ) {
+#if KMP_USE_DYNAMIC_LOCK
+            __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
+#else
+            __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
+#endif
+        }
+    }
+
+    if ( ! th -> th.th_team -> t.t_serialized ) {
+        dispatch_shared_info_template< UT >  * sh = reinterpret_cast< dispatch_shared_info_template< UT >* >
+            ( th -> th.th_dispatch -> th_dispatch_sh_current );
+        UT  lower;
+
+        if ( ! __kmp_env_consistency_check ) {
+                pr = reinterpret_cast< dispatch_private_info_template< UT >* >
+                    ( th -> th.th_dispatch -> th_dispatch_pr_current );
+        }
+        lower = pr->u.p.ordered_lower;
+
+        #if ! defined( KMP_GOMP_COMPAT )
+            if ( __kmp_env_consistency_check ) {
+                if ( pr->ordered_bumped ) {
+                    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+                    __kmp_error_construct2(
+                        kmp_i18n_msg_CnsMultipleNesting,
+                        ct_ordered_in_pdo, loc_ref,
+                        & p->stack_data[ p->w_top ]
+                    );
+                }
+            }
+        #endif /* !defined(KMP_GOMP_COMPAT) */
+
+        KMP_MB();
+        #ifdef KMP_DEBUG
+        {
+            const char * buff;
+            // create format specifiers before the debug output
+            buff = __kmp_str_format(
+                "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
+                traits_t< UT >::spec, traits_t< UT >::spec );
+            KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
+            __kmp_str_free( &buff );
+        }
+        #endif
+
+        __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
+                                USE_ITT_BUILD_ARG( NULL )
+                                );
+        KMP_MB();  /* is this necessary? */
+        #ifdef KMP_DEBUG
+        {
+            const char * buff;
+            // create format specifiers before the debug output
+            buff = __kmp_str_format(
+                "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
+                traits_t< UT >::spec, traits_t< UT >::spec );
+            KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
+            __kmp_str_free( &buff );
+        }
+        #endif
+    }
+    KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid ) );
+}
+
+static void
+__kmp_dispatch_dxo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    kmp_info_t *th;
+
+    if ( __kmp_env_consistency_check ) {
+        th = __kmp_threads[*gtid_ref];
+        if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
+            __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
+        }
+    }
+}
+
+template< typename UT >
+static void
+__kmp_dispatch_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    typedef typename traits_t< UT >::signed_t    ST;
+    dispatch_private_info_template< UT > * pr;
+
+    int gtid = *gtid_ref;
+//    int  cid = *cid_ref;
+    kmp_info_t *th = __kmp_threads[ gtid ];
+    KMP_DEBUG_ASSERT( th -> th.th_dispatch );
+
+    KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid ) );
+    if ( __kmp_env_consistency_check ) {
+        pr = reinterpret_cast< dispatch_private_info_template< UT >* >
+            ( th -> th.th_dispatch -> th_dispatch_pr_current );
+        if ( pr -> pushed_ws != ct_none ) {
+            __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
+        }
+    }
+
+    if ( ! th -> th.th_team -> t.t_serialized ) {
+        dispatch_shared_info_template< UT >  * sh = reinterpret_cast< dispatch_shared_info_template< UT >* >
+            ( th -> th.th_dispatch -> th_dispatch_sh_current );
+
+        if ( ! __kmp_env_consistency_check ) {
+            pr = reinterpret_cast< dispatch_private_info_template< UT >* >
+                ( th -> th.th_dispatch -> th_dispatch_pr_current );
+        }
+
+        KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
+        #if ! defined( KMP_GOMP_COMPAT )
+            if ( __kmp_env_consistency_check ) {
+                if ( pr->ordered_bumped != 0 ) {
+                    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+                    /* How to test it? - OM */
+                    __kmp_error_construct2(
+                        kmp_i18n_msg_CnsMultipleNesting,
+                        ct_ordered_in_pdo, loc_ref,
+                        & p->stack_data[ p->w_top ]
+                    );
+                }
+            }
+        #endif /* !defined(KMP_GOMP_COMPAT) */
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        pr->ordered_bumped += 1;
+
+        KD_TRACE(1000, ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
+                        gtid, pr->ordered_bumped ) );
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        /* TODO use general release procedure? */
+        test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration );
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    }
+    KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
+}
+
+/* Computes and returns x to the power of y, where y must a non-negative integer */
+template< typename UT >
+static __forceinline long double
+__kmp_pow(long double x, UT y) {
+    long double s=1.0L;
+
+    KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
+    //KMP_DEBUG_ASSERT(y >= 0); // y is unsigned
+    while(y) {
+        if ( y & 1 )
+            s *= x;
+        x *= x;
+        y >>= 1;
+    }
+    return s;
+}
+
+/* Computes and returns the number of unassigned iterations after idx chunks have been assigned
+   (the total number of unassigned iterations in chunks with index greater than or equal to idx).
+   __forceinline seems to be broken so that if we __forceinline this function, the behavior is wrong
+   (one of the unit tests, sch_guided_analytical_basic.cpp, fails)
+*/
+template< typename T >
+static __inline typename traits_t< T >::unsigned_t
+__kmp_dispatch_guided_remaining(
+    T                                  tc,
+    typename traits_t< T >::floating_t base,
+    typename traits_t< T >::unsigned_t idx
+) {
+    /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at
+       least for ICL 8.1, long double arithmetic may not really have
+       long double precision, even with /Qlong_double.  Currently, we
+       workaround that in the caller code, by manipulating the FPCW for
+       Windows* OS on IA-32 architecture.  The lack of precision is not
+       expected to be a correctness issue, though.
+    */
+    typedef typename traits_t< T >::unsigned_t  UT;
+
+    long double x = tc * __kmp_pow< UT >(base, idx);
+    UT r = (UT) x;
+    if ( x == r )
+        return r;
+    return r + 1;
+}
+
+// Parameters of the guided-iterative algorithm:
+//   p2 = n * nproc * ( chunk + 1 )  // point of switching to dynamic
+//   p3 = 1 / ( n * nproc )          // remaining iterations multiplier
+// by default n = 2. For example with n = 3 the chunks distribution will be more flat.
+// With n = 1 first chunk is the same as for static schedule, e.g. trip / nproc.
+static int guided_int_param = 2;
+static double guided_flt_param = 0.5;// = 1.0 / guided_int_param;
+
+// UT - unsigned flavor of T, ST - signed flavor of T,
+// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8
+template< typename T >
+static void
+__kmp_dispatch_init(
+    ident_t                        * loc,
+    int                              gtid,
+    enum sched_type                  schedule,
+    T                                lb,
+    T                                ub,
+    typename traits_t< T >::signed_t st,
+    typename traits_t< T >::signed_t chunk,
+    int                              push_ws
+) {
+    typedef typename traits_t< T >::unsigned_t  UT;
+    typedef typename traits_t< T >::signed_t    ST;
+    typedef typename traits_t< T >::floating_t  DBL;
+    static const int ___kmp_size_type = sizeof( UT );
+
+    int                                            active;
+    T                                              tc;
+    kmp_info_t *                                   th;
+    kmp_team_t *                                   team;
+    kmp_uint32                                     my_buffer_index;
+    dispatch_private_info_template< T >          * pr;
+    dispatch_shared_info_template< UT > volatile * sh;
+
+    KMP_BUILD_ASSERT( sizeof( dispatch_private_info_template< T > ) == sizeof( dispatch_private_info ) );
+    KMP_BUILD_ASSERT( sizeof( dispatch_shared_info_template< UT > ) == sizeof( dispatch_shared_info ) );
+
+    if ( ! TCR_4( __kmp_init_parallel ) )
+        __kmp_parallel_initialize();
+
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_DISPATCH_INIT();
+#endif
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
+            traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
+        KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+    /* setup data */
+    th     = __kmp_threads[ gtid ];
+    team   = th -> th.th_team;
+    active = ! team -> t.t_serialized;
+    th->th.th_ident = loc;
+
+#if USE_ITT_BUILD
+    kmp_uint64 cur_chunk = chunk;
+    int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
+        KMP_MASTER_GTID(gtid) &&
+#if OMP_40_ENABLED
+        th->th.th_teams_microtask == NULL &&
+#endif
+        team->t.t_active_level == 1;
+#endif
+    if ( ! active ) {
+        pr = reinterpret_cast< dispatch_private_info_template< T >* >
+            ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */
+    } else {
+        KMP_DEBUG_ASSERT( th->th.th_dispatch ==
+                &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
+
+        my_buffer_index = th->th.th_dispatch->th_disp_index ++;
+
+        /* What happens when number of threads changes, need to resize buffer? */
+        pr = reinterpret_cast< dispatch_private_info_template< T >  * >
+            ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
+        sh = reinterpret_cast< dispatch_shared_info_template< UT > volatile * >
+            ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
+    }
+
+    /* Pick up the nomerge/ordered bits from the scheduling type */
+    if ( (schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper) ) {
+        pr->nomerge = TRUE;
+        schedule = (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
+    } else {
+        pr->nomerge = FALSE;
+    }
+    pr->type_size = ___kmp_size_type; // remember the size of variables
+    if ( kmp_ord_lower & schedule ) {
+        pr->ordered = TRUE;
+        schedule = (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
+    } else {
+        pr->ordered = FALSE;
+    }
+
+    if ( schedule == kmp_sch_static ) {
+        schedule = __kmp_static;
+    } else {
+        if ( schedule == kmp_sch_runtime ) {
+            // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if not specified)
+            schedule = team -> t.t_sched.r_sched_type;
+            // Detail the schedule if needed (global controls are differentiated appropriately)
+            if ( schedule == kmp_sch_guided_chunked ) {
+                schedule = __kmp_guided;
+            } else if ( schedule == kmp_sch_static ) {
+                schedule = __kmp_static;
+            }
+            // Use the chunk size specified by OMP_SCHEDULE (or default if not specified)
+            chunk = team -> t.t_sched.chunk;
+#if USE_ITT_BUILD
+            cur_chunk = chunk;
+#endif
+            #ifdef KMP_DEBUG
+            {
+                const char * buff;
+                // create format specifiers before the debug output
+                buff = __kmp_str_format(
+                    "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
+                    traits_t< ST >::spec );
+                KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
+                __kmp_str_free( &buff );
+            }
+            #endif
+        } else {
+            if ( schedule == kmp_sch_guided_chunked ) {
+                schedule = __kmp_guided;
+            }
+            if ( chunk <= 0 ) {
+                chunk = KMP_DEFAULT_CHUNK;
+            }
+        }
+
+        if ( schedule == kmp_sch_auto ) {
+            // mapping and differentiation: in the __kmp_do_serial_initialize()
+            schedule = __kmp_auto;
+            #ifdef KMP_DEBUG
+            {
+                const char * buff;
+                // create format specifiers before the debug output
+                buff = __kmp_str_format(
+                    "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
+                    traits_t< ST >::spec );
+                KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
+                __kmp_str_free( &buff );
+            }
+            #endif
+        }
+
+        /* guided analytical not safe for too many threads */
+        if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
+            schedule = kmp_sch_guided_iterative_chunked;
+            KMP_WARNING( DispatchManyThreads );
+        }
+        pr->u.p.parm1 = chunk;
+    }
+    KMP_ASSERT2( (kmp_sch_lower < schedule && schedule < kmp_sch_upper),
+                "unknown scheduling type" );
+
+    pr->u.p.count = 0;
+
+    if ( __kmp_env_consistency_check ) {
+        if ( st == 0 ) {
+            __kmp_error_construct(
+                kmp_i18n_msg_CnsLoopIncrZeroProhibited,
+                ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
+            );
+        }
+    }
+
+    tc = ( ub - lb + st );
+    if ( st != 1 ) {
+        if ( st < 0 ) {
+            if ( lb < ub ) {
+                tc = 0;            // zero-trip
+            } else {   // lb >= ub
+                tc = (ST)tc / st;  // convert to signed division
+            }
+        } else {       // st > 0
+            if ( ub < lb ) {
+                tc = 0;            // zero-trip
+            } else {   // lb >= ub
+                tc /= st;
+            }
+        }
+    } else if ( ub < lb ) {        // st == 1
+        tc = 0;                    // zero-trip
+    }
+
+    // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
+    // when statistics are disabled.
+    if (schedule == __kmp_static)
+    {
+        KMP_COUNT_BLOCK(OMP_FOR_static);
+        KMP_COUNT_VALUE(FOR_static_iterations, tc);
+    }
+    else
+    {
+        KMP_COUNT_BLOCK(OMP_FOR_dynamic);
+        KMP_COUNT_VALUE(FOR_dynamic_iterations, tc);
+    }
+
+    pr->u.p.lb = lb;
+    pr->u.p.ub = ub;
+    pr->u.p.st = st;
+    pr->u.p.tc = tc;
+
+    #if KMP_OS_WINDOWS
+    pr->u.p.last_upper = ub + st;
+    #endif /* KMP_OS_WINDOWS */
+
+    /* NOTE: only the active parallel region(s) has active ordered sections */
+
+    if ( active ) {
+        if ( pr->ordered == 0 ) {
+            th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
+            th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
+        } else {
+            pr->ordered_bumped = 0;
+
+            pr->u.p.ordered_lower = 1;
+            pr->u.p.ordered_upper = 0;
+
+            th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
+            th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
+        }
+    }
+
+    if ( __kmp_env_consistency_check ) {
+        enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
+        if ( push_ws ) {
+            __kmp_push_workshare( gtid, ws, loc );
+            pr->pushed_ws = ws;
+        } else {
+            __kmp_check_workshare( gtid, ws, loc );
+            pr->pushed_ws = ct_none;
+        }
+    }
+
+    switch ( schedule ) {
+    #if  ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
+    case kmp_sch_static_steal:
+        {
+            T nproc = team->t.t_nproc;
+            T ntc, init;
+
+            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
+
+            ntc = (tc % chunk ? 1 : 0) + tc / chunk;
+            if ( nproc > 1 && ntc >= nproc ) {
+                T id = __kmp_tid_from_gtid(gtid);
+                T small_chunk, extras;
+
+                small_chunk = ntc / nproc;
+                extras = ntc % nproc;
+
+                init = id * small_chunk + ( id < extras ? id : extras );
+                pr->u.p.count = init;
+                pr->u.p.ub = init + small_chunk + ( id < extras ? 1 : 0 );
+
+                pr->u.p.parm2 = lb;
+                //pr->pfields.parm3 = 0; // it's not used in static_steal
+                pr->u.p.parm4 = id;
+                pr->u.p.st = st;
+                break;
+            } else {
+                KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
+                               gtid ) );
+                schedule = kmp_sch_static_balanced;
+                /* too few iterations: fall-through to kmp_sch_static_balanced */
+            } // if
+            /* FALL-THROUGH to static balanced */
+        } // case
+    #endif
+    case kmp_sch_static_balanced:
+        {
+            T nproc = team->t.t_nproc;
+            T init, limit;
+
+            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
+                            gtid ) );
+
+            if ( nproc > 1 ) {
+                T id = __kmp_tid_from_gtid(gtid);
+
+                if ( tc < nproc ) {
+                    if ( id < tc ) {
+                        init = id;
+                        limit = id;
+                        pr->u.p.parm1 = (id == tc - 1);  /* parm1 stores *plastiter */
+                    } else {
+                        pr->u.p.count = 1;  /* means no more chunks to execute */
+                        pr->u.p.parm1 = FALSE;
+                        break;
+                    }
+                } else {
+                    T small_chunk = tc / nproc;
+                    T extras = tc % nproc;
+                    init = id * small_chunk + (id < extras ? id : extras);
+                    limit = init + small_chunk - (id < extras ? 0 : 1);
+                    pr->u.p.parm1 = (id == nproc - 1);
+                }
+            } else {
+                if ( tc > 0 ) {
+                    init = 0;
+                    limit = tc - 1;
+                    pr->u.p.parm1 = TRUE;
+                } else {
+                    // zero trip count
+                    pr->u.p.count = 1;  /* means no more chunks to execute */
+                    pr->u.p.parm1 = FALSE;
+                    break;
+                }
+            }
+#if USE_ITT_BUILD
+            // Calculate chunk for metadata report
+            if ( itt_need_metadata_reporting )
+                cur_chunk = limit - init + 1;
+#endif
+            if ( st == 1 ) {
+                pr->u.p.lb = lb + init;
+                pr->u.p.ub = lb + limit;
+            } else {
+                T ub_tmp = lb + limit * st;   // calculated upper bound, "ub" is user-defined upper bound
+                pr->u.p.lb = lb + init * st;
+                // adjust upper bound to "ub" if needed, so that MS lastprivate will match it exactly
+                if ( st > 0 ) {
+                    pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
+                } else {
+                    pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
+                }
+            }
+            if ( pr->ordered ) {
+                pr->u.p.ordered_lower = init;
+                pr->u.p.ordered_upper = limit;
+            }
+            break;
+        } // case
+    case kmp_sch_guided_iterative_chunked :
+        {
+            T nproc = team->t.t_nproc;
+            KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
+
+            if ( nproc > 1 ) {
+                if ( (2L * chunk + 1 ) * nproc >= tc ) {
+                    /* chunk size too large, switch to dynamic */
+                    schedule = kmp_sch_dynamic_chunked;
+                } else {
+                    // when remaining iters become less than parm2 - switch to dynamic
+                    pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
+                    *(double*)&pr->u.p.parm3 = guided_flt_param / nproc;   // may occupy parm3 and parm4
+                }
+            } else {
+                KD_TRACE(100,("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
+                schedule = kmp_sch_static_greedy;
+                /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
+                KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
+                pr->u.p.parm1 = tc;
+            } // if
+        } // case
+        break;
+    case kmp_sch_guided_analytical_chunked:
+        {
+            T nproc = team->t.t_nproc;
+            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
+
+            if ( nproc > 1 ) {
+                if ( (2L * chunk + 1 ) * nproc >= tc ) {
+                    /* chunk size too large, switch to dynamic */
+                    schedule = kmp_sch_dynamic_chunked;
+                } else {
+                    /* commonly used term: (2 nproc - 1)/(2 nproc) */
+                    DBL x;
+
+                    #if KMP_OS_WINDOWS && KMP_ARCH_X86
+                    /* Linux* OS already has 64-bit computation by default for
+		       long double, and on Windows* OS on Intel(R) 64,
+		       /Qlong_double doesn't work.  On Windows* OS
+		       on IA-32 architecture, we need to set precision to
+		       64-bit instead of the default 53-bit. Even though long
+		       double doesn't work on Windows* OS on Intel(R) 64, the
+		       resulting lack of precision is not expected to impact
+		       the correctness of the algorithm, but this has not been
+		       mathematically proven.
+                    */
+                    // save original FPCW and set precision to 64-bit, as
+                    // Windows* OS on IA-32 architecture defaults to 53-bit
+                    unsigned int oldFpcw = _control87(0,0);
+                    _control87(_PC_64,_MCW_PC); // 0,0x30000
+                    #endif
+                    /* value used for comparison in solver for cross-over point */
+                    long double target = ((long double)chunk * 2 + 1) * nproc / tc;
+
+                    /* crossover point--chunk indexes equal to or greater than
+		       this point switch to dynamic-style scheduling */
+                    UT   cross;
+
+                    /* commonly used term: (2 nproc - 1)/(2 nproc) */
+                    x = (long double)1.0 - (long double)0.5 / nproc;
+
+                    #ifdef KMP_DEBUG
+                    { // test natural alignment
+                        struct _test_a {
+                            char a;
+                            union {
+                                char b;
+                                DBL  d;
+                            };
+                        } t;
+                        ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
+                        //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long long)natural_alignment );
+                        KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
+                    }
+                    #endif // KMP_DEBUG
+
+                    /* save the term in thread private dispatch structure */
+                    *(DBL*)&pr->u.p.parm3 = x;
+
+                    /* solve for the crossover point to the nearest integer i for which C_i <= chunk */
+                    {
+                        UT          left, right, mid;
+                        long double p;
+
+                        /* estimate initial upper and lower bound */
+
+                        /* doesn't matter what value right is as long as it is positive, but
+                           it affects performance of the solver
+                        */
+                        right = 229;
+                        p = __kmp_pow< UT >(x,right);
+                        if ( p > target ) {
+                            do{
+                                p *= p;
+                                right <<= 1;
+                            } while(p>target && right < (1<<27));
+                            left = right >> 1; /* lower bound is previous (failed) estimate of upper bound */
+                        } else {
+                            left = 0;
+                        }
+
+                        /* bisection root-finding method */
+                        while ( left + 1 < right ) {
+                            mid = (left + right) / 2;
+                            if ( __kmp_pow< UT >(x,mid) > target ) {
+                                left = mid;
+                            } else {
+                                right = mid;
+                            }
+                        } // while
+                        cross = right;
+                    }
+                    /* assert sanity of computed crossover point */
+                    KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
+
+                    /* save the crossover point in thread private dispatch structure */
+                    pr->u.p.parm2 = cross;
+
+                    // C75803
+                    #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
+                        #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
+                    #else
+                        #define GUIDED_ANALYTICAL_WORKAROUND (x)
+                    #endif
+                    /* dynamic-style scheduling offset */
+                    pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
+                    #if KMP_OS_WINDOWS && KMP_ARCH_X86
+                        // restore FPCW
+                        _control87(oldFpcw,_MCW_PC);
+                    #endif
+                } // if
+            } else {
+                KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
+                               gtid ) );
+                schedule = kmp_sch_static_greedy;
+                /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
+                pr->u.p.parm1 = tc;
+            } // if
+        } // case
+        break;
+    case kmp_sch_static_greedy:
+        KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
+            pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
+                ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
+                tc;
+        break;
+    case kmp_sch_static_chunked :
+    case kmp_sch_dynamic_chunked :
+        if ( pr->u.p.parm1 <= 0 ) {
+            pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
+        }
+        KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
+        break;
+    case kmp_sch_trapezoidal :
+        {
+            /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */
+
+            T parm1, parm2, parm3, parm4;
+            KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
+
+            parm1 = chunk;
+
+            /* F : size of the first cycle */
+            parm2 = ( tc / (2 * team->t.t_nproc) );
+
+            if ( parm2 < 1 ) {
+                parm2 = 1;
+            }
+
+            /* L : size of the last cycle.  Make sure the last cycle
+             *     is not larger than the first cycle.
+             */
+            if ( parm1 < 1 ) {
+                parm1 = 1;
+            } else if ( parm1 > parm2 ) {
+                parm1 = parm2;
+            }
+
+            /* N : number of cycles */
+            parm3 = ( parm2 + parm1 );
+            parm3 = ( 2 * tc + parm3 - 1) / parm3;
+
+            if ( parm3 < 2 ) {
+                parm3 = 2;
+            }
+
+            /* sigma : decreasing incr of the trapezoid */
+            parm4 = ( parm3 - 1 );
+            parm4 = ( parm2 - parm1 ) / parm4;
+
+            // pointless check, because parm4 >= 0 always
+            //if ( parm4 < 0 ) {
+            //    parm4 = 0;
+            //}
+
+            pr->u.p.parm1 = parm1;
+            pr->u.p.parm2 = parm2;
+            pr->u.p.parm3 = parm3;
+            pr->u.p.parm4 = parm4;
+        } // case
+        break;
+
+    default:
+        {
+            __kmp_msg(
+                kmp_ms_fatal,                        // Severity
+                KMP_MSG( UnknownSchedTypeDetected ), // Primary message
+                KMP_HNT( GetNewerLibrary ),          // Hint
+                __kmp_msg_null                       // Variadic argument list terminator
+            );
+        }
+        break;
+    } // switch
+    pr->schedule = schedule;
+    if ( active ) {
+        /* The name of this buffer should be my_buffer_index when it's free to use it */
+
+        KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
+                        gtid, my_buffer_index, sh->buffer_index) );
+        __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
+                                        USE_ITT_BUILD_ARG( NULL )
+                                        );
+            // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and my_buffer_index are
+            // *always* 32-bit integers.
+        KMP_MB();  /* is this necessary? */
+        KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
+                        gtid, my_buffer_index, sh->buffer_index) );
+
+        th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
+        th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*)  sh;
+#if USE_ITT_BUILD
+        if ( pr->ordered ) {
+            __kmp_itt_ordered_init( gtid );
+        }; // if
+        // Report loop metadata
+        if ( itt_need_metadata_reporting ) {
+            // Only report metadata by master of active team at level 1
+            kmp_uint64 schedtype = 0;
+            switch ( schedule ) {
+            case kmp_sch_static_chunked:
+            case kmp_sch_static_balanced:// Chunk is calculated in the switch above
+                break;
+            case kmp_sch_static_greedy:
+                cur_chunk = pr->u.p.parm1;
+                break;
+            case kmp_sch_dynamic_chunked:
+                schedtype = 1;
+                break;
+            case kmp_sch_guided_iterative_chunked:
+            case kmp_sch_guided_analytical_chunked:
+                schedtype = 2;
+                break;
+            default:
+//            Should we put this case under "static"?
+//            case kmp_sch_static_steal:
+                schedtype = 3;
+                break;
+            }
+            __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
+        }
+#endif /* USE_ITT_BUILD */
+    }; // if
+
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
+            " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
+            " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
+            traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
+            traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
+            traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
+        KD_TRACE(10, ( buff,
+            gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
+            pr->u.p.st, pr->u.p.tc, pr->u.p.count,
+            pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
+            pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+    #if ( KMP_STATIC_STEAL_ENABLED )
+    if ( ___kmp_size_type < 8 ) {
+      // It cannot be guaranteed that after execution of a loop with some other schedule kind
+      // all the parm3 variables will contain the same value.
+      // Even if all parm3 will be the same, it still exists a bad case like using 0 and 1
+      // rather than program life-time increment.
+      // So the dedicated variable is required. The 'static_steal_counter' is used.
+      if( schedule == kmp_sch_static_steal ) {
+        // Other threads will inspect this variable when searching for a victim.
+        // This is a flag showing that other threads may steal from this thread since then.
+        volatile T * p = &pr->u.p.static_steal_counter;
+        *p = *p + 1;
+      }
+    }
+    #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
+        ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+        ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
+        ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
+            team_info->parallel_id, task_info->task_id, team_info->microtask);
+    }
+#endif
+}
+
+/*
+ * For ordered loops, either __kmp_dispatch_finish() should be called after
+ * every iteration, or __kmp_dispatch_finish_chunk() should be called after
+ * every chunk of iterations.  If the ordered section(s) were not executed
+ * for this iteration (or every iteration in this chunk), we need to set the
+ * ordered iteration counters so that the next thread can proceed.
+ */
+template< typename UT >
+static void
+__kmp_dispatch_finish( int gtid, ident_t *loc )
+{
+    typedef typename traits_t< UT >::signed_t ST;
+    kmp_info_t *th = __kmp_threads[ gtid ];
+
+    KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid ) );
+    if ( ! th -> th.th_team -> t.t_serialized ) {
+
+        dispatch_private_info_template< UT > * pr =
+            reinterpret_cast< dispatch_private_info_template< UT >* >
+            ( th->th.th_dispatch->th_dispatch_pr_current );
+        dispatch_shared_info_template< UT > volatile * sh =
+            reinterpret_cast< dispatch_shared_info_template< UT >volatile* >
+            ( th->th.th_dispatch->th_dispatch_sh_current );
+        KMP_DEBUG_ASSERT( pr );
+        KMP_DEBUG_ASSERT( sh );
+        KMP_DEBUG_ASSERT( th->th.th_dispatch ==
+                 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
+
+        if ( pr->ordered_bumped ) {
+            KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
+                            gtid ) );
+            pr->ordered_bumped = 0;
+        } else {
+            UT lower = pr->u.p.ordered_lower;
+
+            #ifdef KMP_DEBUG
+            {
+                const char * buff;
+                // create format specifiers before the debug output
+                buff = __kmp_str_format(
+                    "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
+                    traits_t< UT >::spec, traits_t< UT >::spec );
+                KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
+                __kmp_str_free( &buff );
+            }
+            #endif
+
+            __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
+                                   USE_ITT_BUILD_ARG(NULL)
+                                   );
+            KMP_MB();  /* is this necessary? */
+            #ifdef KMP_DEBUG
+            {
+                const char * buff;
+                // create format specifiers before the debug output
+                buff = __kmp_str_format(
+                    "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
+                    traits_t< UT >::spec, traits_t< UT >::spec );
+                KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
+                __kmp_str_free( &buff );
+            }
+            #endif
+
+            test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration );
+        } // if
+    } // if
+    KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid ) );
+}
+
+#ifdef KMP_GOMP_COMPAT
+
+template< typename UT >
+static void
+__kmp_dispatch_finish_chunk( int gtid, ident_t *loc )
+{
+    typedef typename traits_t< UT >::signed_t ST;
+    kmp_info_t *th = __kmp_threads[ gtid ];
+
+    KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
+    if ( ! th -> th.th_team -> t.t_serialized ) {
+//        int cid;
+        dispatch_private_info_template< UT > * pr =
+            reinterpret_cast< dispatch_private_info_template< UT >* >
+            ( th->th.th_dispatch->th_dispatch_pr_current );
+        dispatch_shared_info_template< UT > volatile * sh =
+            reinterpret_cast< dispatch_shared_info_template< UT >volatile* >
+            ( th->th.th_dispatch->th_dispatch_sh_current );
+        KMP_DEBUG_ASSERT( pr );
+        KMP_DEBUG_ASSERT( sh );
+        KMP_DEBUG_ASSERT( th->th.th_dispatch ==
+                 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
+
+//        for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) {
+            UT lower = pr->u.p.ordered_lower;
+            UT upper = pr->u.p.ordered_upper;
+            UT inc = upper - lower + 1;
+
+            if ( pr->ordered_bumped == inc ) {
+                KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
+                  gtid ) );
+                pr->ordered_bumped = 0;
+            } else {
+                inc -= pr->ordered_bumped;
+
+                #ifdef KMP_DEBUG
+                {
+                    const char * buff;
+                    // create format specifiers before the debug output
+                    buff = __kmp_str_format(
+                        "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
+                        "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
+                        traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
+                    KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
+                    __kmp_str_free( &buff );
+                }
+                #endif
+
+                __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
+                                       USE_ITT_BUILD_ARG(NULL)
+                                       );
+
+                KMP_MB();  /* is this necessary? */
+                KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
+                  gtid ) );
+                pr->ordered_bumped = 0;
+//!!!!! TODO check if the inc should be unsigned, or signed???
+                #ifdef KMP_DEBUG
+                {
+                    const char * buff;
+                    // create format specifiers before the debug output
+                    buff = __kmp_str_format(
+                        "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
+                        "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
+                        traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
+                    KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
+                    __kmp_str_free( &buff );
+                }
+                #endif
+
+                test_then_add< ST >( (volatile ST *) & sh->u.s.ordered_iteration, inc);
+            }
+//        }
+    }
+    KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
+}
+
+#endif /* KMP_GOMP_COMPAT */
+
+/* Define a macro for exiting __kmp_dispatch_next(). If status is 0
+ * (no more work), then tell OMPT the loop is over. In some cases
+ * kmp_dispatch_fini() is not called. */
+#if OMPT_SUPPORT && OMPT_TRACE
+#define OMPT_LOOP_END                                                          \
+    if (status == 0) {                                                         \
+        if (ompt_enabled &&                     \
+            ompt_callbacks.ompt_callback(ompt_event_loop_end)) {               \
+            ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);        \
+            ompt_task_info_t *task_info = __ompt_get_taskinfo(0);              \
+            ompt_callbacks.ompt_callback(ompt_event_loop_end)(                 \
+                team_info->parallel_id, task_info->task_id);                   \
+        }                                                                      \
+    }
+#else
+#define OMPT_LOOP_END // no-op
+#endif
+
+template< typename T >
+static int
+__kmp_dispatch_next(
+    ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st
+) {
+
+    typedef typename traits_t< T >::unsigned_t  UT;
+    typedef typename traits_t< T >::signed_t    ST;
+    typedef typename traits_t< T >::floating_t  DBL;
+#if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
+    static const int ___kmp_size_type = sizeof( UT );
+#endif
+
+    // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
+    // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
+    // more than a compile time choice to use static scheduling would.)
+    KMP_TIME_BLOCK(FOR_dynamic_scheduling);
+
+    int                                   status;
+    dispatch_private_info_template< T > * pr;
+    kmp_info_t                          * th   = __kmp_threads[ gtid ];
+    kmp_team_t                          * team = th -> th.th_team;
+
+    KMP_DEBUG_ASSERT( p_lb && p_ub && p_st ); // AC: these cannot be NULL
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
+        KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+
+    if ( team -> t.t_serialized ) {
+        /* NOTE: serialize this dispatch becase we are not at the active level */
+        pr = reinterpret_cast< dispatch_private_info_template< T >* >
+            ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */
+        KMP_DEBUG_ASSERT( pr );
+
+        if ( (status = (pr->u.p.tc != 0)) == 0 ) {
+            *p_lb = 0;
+            *p_ub = 0;
+//            if ( p_last != NULL )
+//                *p_last = 0;
+            if ( p_st != NULL )
+                *p_st = 0;
+            if ( __kmp_env_consistency_check ) {
+                if ( pr->pushed_ws != ct_none ) {
+                    pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
+                }
+            }
+        } else if ( pr->nomerge ) {
+            kmp_int32 last;
+            T         start;
+            UT        limit, trip, init;
+            ST        incr;
+            T         chunk = pr->u.p.parm1;
+
+            KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
+
+            init = chunk * pr->u.p.count++;
+            trip = pr->u.p.tc - 1;
+
+            if ( (status = (init <= trip)) == 0 ) {
+                *p_lb = 0;
+                *p_ub = 0;
+//                if ( p_last != NULL )
+//                    *p_last = 0;
+                if ( p_st != NULL )
+                    *p_st = 0;
+                if ( __kmp_env_consistency_check ) {
+                    if ( pr->pushed_ws != ct_none ) {
+                        pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
+                    }
+                }
+            } else {
+                start = pr->u.p.lb;
+                limit = chunk + init - 1;
+                incr  = pr->u.p.st;
+
+                if ( (last = (limit >= trip)) != 0 ) {
+                    limit = trip;
+                    #if KMP_OS_WINDOWS
+                    pr->u.p.last_upper = pr->u.p.ub;
+                    #endif /* KMP_OS_WINDOWS */
+                }
+                if ( p_last != NULL )
+                    *p_last = last;
+                if ( p_st != NULL )
+                    *p_st = incr;
+                if ( incr == 1 ) {
+                    *p_lb = start + init;
+                    *p_ub = start + limit;
+                } else {
+                    *p_lb = start + init * incr;
+                    *p_ub = start + limit * incr;
+                }
+
+                if ( pr->ordered ) {
+                    pr->u.p.ordered_lower = init;
+                    pr->u.p.ordered_upper = limit;
+                    #ifdef KMP_DEBUG
+                    {
+                        const char * buff;
+                        // create format specifiers before the debug output
+                        buff = __kmp_str_format(
+                            "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                            traits_t< UT >::spec, traits_t< UT >::spec );
+                        KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                        __kmp_str_free( &buff );
+                    }
+                    #endif
+                } // if
+            } // if
+        } else {
+            pr->u.p.tc = 0;
+            *p_lb = pr->u.p.lb;
+            *p_ub = pr->u.p.ub;
+            #if KMP_OS_WINDOWS
+            pr->u.p.last_upper = *p_ub;
+            #endif /* KMP_OS_WINDOWS */
+            if ( p_last != NULL )
+                *p_last = TRUE;
+            if ( p_st != NULL )
+                *p_st = pr->u.p.st;
+        } // if
+        #ifdef KMP_DEBUG
+        {
+            const char * buff;
+            // create format specifiers before the debug output
+            buff = __kmp_str_format(
+                "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
+                "p_ub:%%%s p_st:%%%s p_last:%%p %%d  returning:%%d\n",
+                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
+            KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) );
+            __kmp_str_free( &buff );
+        }
+        #endif
+#if INCLUDE_SSC_MARKS
+        SSC_MARK_DISPATCH_NEXT();
+#endif
+        OMPT_LOOP_END;
+        return status;
+    } else {
+        kmp_int32 last = 0;
+        dispatch_shared_info_template< UT > *sh;
+        T         start;
+        ST        incr;
+        UT        limit, trip, init;
+
+        KMP_DEBUG_ASSERT( th->th.th_dispatch ==
+                &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
+
+        pr = reinterpret_cast< dispatch_private_info_template< T >* >
+            ( th->th.th_dispatch->th_dispatch_pr_current );
+        KMP_DEBUG_ASSERT( pr );
+        sh = reinterpret_cast< dispatch_shared_info_template< UT >* >
+            ( th->th.th_dispatch->th_dispatch_sh_current );
+        KMP_DEBUG_ASSERT( sh );
+
+        if ( pr->u.p.tc == 0 ) {
+            // zero trip count
+            status = 0;
+        } else {
+            switch (pr->schedule) {
+            #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
+            case kmp_sch_static_steal:
+                {
+                    T chunk = pr->u.p.parm1;
+
+                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
+
+                    trip = pr->u.p.tc - 1;
+
+                    if ( ___kmp_size_type > 4 ) {
+                        // Other threads do not look into the data of this thread,
+                        //  so it's not necessary to make volatile casting.
+                        init   = ( pr->u.p.count )++;
+                        status = ( init < (UT)pr->u.p.ub );
+                    } else {
+                        typedef union {
+                            struct {
+                                UT count;
+                                T  ub;
+                            } p;
+                            kmp_int64 b;
+                        } union_i4;
+                        // All operations on 'count' or 'ub' must be combined atomically together.
+                        // stealing implemented only for 4-byte indexes
+                        {
+                            union_i4 vold, vnew;
+                            vold.b = *( volatile kmp_int64 * )(&pr->u.p.count);
+                            vnew = vold;
+                            vnew.p.count++;
+                            while( ! KMP_COMPARE_AND_STORE_ACQ64(
+                                        ( volatile kmp_int64* )&pr->u.p.count,
+                                        *VOLATILE_CAST(kmp_int64 *)&vold.b,
+                                        *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
+                                KMP_CPU_PAUSE();
+                                vold.b = *( volatile kmp_int64 * )(&pr->u.p.count);
+                                vnew = vold;
+                                vnew.p.count++;
+                            }
+                            vnew = vold;
+                            init   = vnew.p.count;
+                            status = ( init < (UT)vnew.p.ub ) ;
+                        }
+
+                        if( !status ) {
+                            kmp_info_t   **other_threads = team->t.t_threads;
+                            int          while_limit = 10;
+                            int          while_index = 0;
+
+                            // TODO: algorithm of searching for a victim
+                            // should be cleaned up and measured
+                            while ( ( !status ) && ( while_limit != ++while_index ) ) {
+                                union_i4  vold, vnew;
+                                kmp_int32 remaining; // kmp_int32 because KMP_I4 only
+                                T         victimIdx    = pr->u.p.parm4;
+                                T         oldVictimIdx = victimIdx;
+                                dispatch_private_info_template< T > * victim;
+
+                                do {
+                                    if( !victimIdx ) {
+                                        victimIdx = team->t.t_nproc - 1;
+                                    } else {
+                                        --victimIdx;
+                                    }
+                                    victim = reinterpret_cast< dispatch_private_info_template< T >* >
+                                        ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
+                                } while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
+                                // TODO: think about a proper place of this test
+                                if ( ( !victim ) ||
+                                   ( (*( volatile T * )&victim->u.p.static_steal_counter) !=
+                                     (*( volatile T * )&pr->u.p.static_steal_counter) ) ) {
+                                    // TODO: delay would be nice
+                                    continue;
+                                    // the victim is not ready yet to participate in stealing
+                                    // because the victim is still in kmp_init_dispatch
+                                }
+                                if ( oldVictimIdx == victimIdx ) {
+                                    break;
+                                }
+                                pr->u.p.parm4 = victimIdx;
+
+                                while( 1 ) {
+                                    vold.b = *( volatile kmp_int64 * )( &victim->u.p.count );
+                                    vnew = vold;
+
+                                    KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
+                                    if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
+                                        break;
+                                    }
+                                    vnew.p.ub -= (remaining >> 2);
+                                    KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
+                                    #pragma warning( push )
+                                    // disable warning on pointless comparison of unsigned with 0
+                                    #pragma warning( disable: 186 )
+                                        KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
+                                    #pragma warning( pop )
+                                    // TODO: Should this be acquire or release?
+                                    if ( KMP_COMPARE_AND_STORE_ACQ64(
+                                            ( volatile kmp_int64 * )&victim->u.p.count,
+                                            *VOLATILE_CAST(kmp_int64 *)&vold.b,
+                                            *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
+                                        status = 1;
+                                        while_index = 0;
+                                        // now update own count and ub
+                                        #if KMP_ARCH_X86
+                                        // stealing executed on non-KMP_ARCH_X86 only
+                                            // Atomic 64-bit write on ia32 is
+                                            // unavailable, so we do this in steps.
+                                            //     This code is not tested.
+                                            init = vold.p.count;
+                                            pr->u.p.ub = 0;
+                                            pr->u.p.count = init + 1;
+                                            pr->u.p.ub = vnew.p.count;
+                                        #else
+                                            init = vnew.p.ub;
+                                            vold.p.count = init + 1;
+                                            // TODO: is it safe and enough?
+                                            *( volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
+                                        #endif // KMP_ARCH_X86
+                                        break;
+                                    } // if
+                                KMP_CPU_PAUSE();
+                                } // while (1)
+                            } // while
+                        } // if
+                    } // if
+                    if ( !status ) {
+                        *p_lb = 0;
+                        *p_ub = 0;
+                        if ( p_st != NULL ) *p_st = 0;
+                    } else {
+                        start = pr->u.p.parm2;
+                        init *= chunk;
+                        limit = chunk + init - 1;
+                        incr  = pr->u.p.st;
+
+                        KMP_DEBUG_ASSERT(init <= trip);
+                        if ( (last = (limit >= trip)) != 0 )
+                            limit = trip;
+                        if ( p_st != NULL ) *p_st = incr;
+
+                        if ( incr == 1 ) {
+                            *p_lb = start + init;
+                            *p_ub = start + limit;
+                        } else {
+                            *p_lb = start + init * incr;
+                            *p_ub = start + limit * incr;
+                        }
+
+                        if ( pr->ordered ) {
+                            pr->u.p.ordered_lower = init;
+                            pr->u.p.ordered_upper = limit;
+                            #ifdef KMP_DEBUG
+                            {
+                                const char * buff;
+                                // create format specifiers before the debug output
+                                buff = __kmp_str_format(
+                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                    traits_t< UT >::spec, traits_t< UT >::spec );
+                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                                __kmp_str_free( &buff );
+                            }
+                            #endif
+                        } // if
+                    } // if
+                    break;
+                } // case
+            #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
+            case kmp_sch_static_balanced:
+                {
+                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
+                    if ( (status = !pr->u.p.count) != 0 ) {  /* check if thread has any iteration to do */
+                        pr->u.p.count = 1;
+                        *p_lb = pr->u.p.lb;
+                        *p_ub = pr->u.p.ub;
+                        last = pr->u.p.parm1;
+                        if ( p_st != NULL )
+                            *p_st = pr->u.p.st;
+                    } else {  /* no iterations to do */
+                        pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
+                    }
+                    if ( pr->ordered ) {
+                        #ifdef KMP_DEBUG
+                        {
+                            const char * buff;
+                            // create format specifiers before the debug output
+                            buff = __kmp_str_format(
+                                "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                traits_t< UT >::spec, traits_t< UT >::spec );
+                            KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                            __kmp_str_free( &buff );
+                        }
+                        #endif
+                    } // if
+                } // case
+                break;
+            case kmp_sch_static_greedy:  /* original code for kmp_sch_static_greedy was merged here */
+            case kmp_sch_static_chunked:
+                {
+                    T parm1;
+
+                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
+                                   gtid ) );
+                    parm1 = pr->u.p.parm1;
+
+                    trip  = pr->u.p.tc - 1;
+                    init  = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
+
+                    if ( (status = (init <= trip)) != 0 ) {
+                        start = pr->u.p.lb;
+                        incr  = pr->u.p.st;
+                        limit = parm1 + init - 1;
+
+                        if ( (last = (limit >= trip)) != 0 )
+                            limit = trip;
+
+                        if ( p_st != NULL ) *p_st = incr;
+
+                        pr->u.p.count += team->t.t_nproc;
+
+                        if ( incr == 1 ) {
+                            *p_lb = start + init;
+                            *p_ub = start + limit;
+                        }
+                        else {
+                            *p_lb = start + init * incr;
+                            *p_ub = start + limit * incr;
+                        }
+
+                        if ( pr->ordered ) {
+                            pr->u.p.ordered_lower = init;
+                            pr->u.p.ordered_upper = limit;
+                            #ifdef KMP_DEBUG
+                            {
+                                const char * buff;
+                                // create format specifiers before the debug output
+                                buff = __kmp_str_format(
+                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                    traits_t< UT >::spec, traits_t< UT >::spec );
+                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                                __kmp_str_free( &buff );
+                            }
+                            #endif
+                        } // if
+                    } // if
+                } // case
+                break;
+
+            case kmp_sch_dynamic_chunked:
+                {
+                    T chunk = pr->u.p.parm1;
+
+                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
+                                   gtid ) );
+
+                    init = chunk * test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration );
+                    trip = pr->u.p.tc - 1;
+
+                    if ( (status = (init <= trip)) == 0 ) {
+                        *p_lb = 0;
+                        *p_ub = 0;
+                        if ( p_st != NULL ) *p_st = 0;
+                    } else {
+                        start = pr->u.p.lb;
+                        limit = chunk + init - 1;
+                        incr  = pr->u.p.st;
+
+                        if ( (last = (limit >= trip)) != 0 )
+                            limit = trip;
+
+                        if ( p_st != NULL ) *p_st = incr;
+
+                        if ( incr == 1 ) {
+                            *p_lb = start + init;
+                            *p_ub = start + limit;
+                        } else {
+                            *p_lb = start + init * incr;
+                            *p_ub = start + limit * incr;
+                        }
+
+                        if ( pr->ordered ) {
+                            pr->u.p.ordered_lower = init;
+                            pr->u.p.ordered_upper = limit;
+                            #ifdef KMP_DEBUG
+                            {
+                                const char * buff;
+                                // create format specifiers before the debug output
+                                buff = __kmp_str_format(
+                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                    traits_t< UT >::spec, traits_t< UT >::spec );
+                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                                __kmp_str_free( &buff );
+                            }
+                            #endif
+                        } // if
+                    } // if
+                } // case
+                break;
+
+            case kmp_sch_guided_iterative_chunked:
+                {
+                    T  chunkspec = pr->u.p.parm1;
+                    KD_TRACE(100,
+                        ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
+                    trip  = pr->u.p.tc;
+                    // Start atomic part of calculations
+                    while(1) {
+                        ST  remaining;             // signed, because can be < 0
+                        init = sh->u.s.iteration;  // shared value
+                        remaining = trip - init;
+                        if ( remaining <= 0 ) {    // AC: need to compare with 0 first
+                            // nothing to do, don't try atomic op
+                            status = 0;
+                            break;
+                        }
+                        if ( (T)remaining < pr->u.p.parm2 ) { // compare with K*nproc*(chunk+1), K=2 by default
+                            // use dynamic-style shcedule
+                            // atomically inrement iterations, get old value
+                            init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
+                            remaining = trip - init;
+                            if (remaining <= 0) {
+                                status = 0;    // all iterations got by other threads
+                            } else {
+                                // got some iterations to work on
+                                status = 1;
+                                if ( (T)remaining > chunkspec ) {
+                                    limit = init + chunkspec - 1;
+                                } else {
+                                    last = 1;   // the last chunk
+                                    limit = init + remaining - 1;
+                                } // if
+                            } // if
+                            break;
+                        } // if
+                        limit = init + (UT)( remaining * *(double*)&pr->u.p.parm3 ); // divide by K*nproc
+                        if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
+                            // CAS was successful, chunk obtained
+                            status = 1;
+                            --limit;
+                            break;
+                        } // if
+                    } // while
+                    if ( status != 0 ) {
+                        start = pr->u.p.lb;
+                        incr = pr->u.p.st;
+                        if ( p_st != NULL )
+                            *p_st = incr;
+                        *p_lb = start + init * incr;
+                        *p_ub = start + limit * incr;
+                        if ( pr->ordered ) {
+                            pr->u.p.ordered_lower = init;
+                            pr->u.p.ordered_upper = limit;
+                            #ifdef KMP_DEBUG
+                            {
+                                const char * buff;
+                                // create format specifiers before the debug output
+                                buff = __kmp_str_format(
+                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                    traits_t< UT >::spec, traits_t< UT >::spec );
+                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                                __kmp_str_free( &buff );
+                            }
+                            #endif
+                        } // if
+                    } else {
+                        *p_lb = 0;
+                        *p_ub = 0;
+                        if ( p_st != NULL )
+                            *p_st = 0;
+                    } // if
+                } // case
+                break;
+
+            case kmp_sch_guided_analytical_chunked:
+                {
+                    T   chunkspec = pr->u.p.parm1;
+                    UT chunkIdx;
+    #if KMP_OS_WINDOWS && KMP_ARCH_X86
+                    /* for storing original FPCW value for Windows* OS on
+		       IA-32 architecture 8-byte version */
+                    unsigned int oldFpcw;
+                    unsigned int fpcwSet = 0;
+    #endif
+                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
+                                   gtid ) );
+
+                    trip  = pr->u.p.tc;
+
+                    KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
+                    KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
+
+                    while(1) { /* this while loop is a safeguard against unexpected zero chunk sizes */
+                        chunkIdx = test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration );
+                        if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
+                            --trip;
+                            /* use dynamic-style scheduling */
+                            init = chunkIdx * chunkspec + pr->u.p.count;
+                            /* need to verify init > 0 in case of overflow in the above calculation */
+                            if ( (status = (init > 0 && init <= trip)) != 0 ) {
+                                limit = init + chunkspec -1;
+
+                                if ( (last = (limit >= trip)) != 0 )
+                                    limit = trip;
+                            }
+                            break;
+                        } else {
+                            /* use exponential-style scheduling */
+                            /* The following check is to workaround the lack of long double precision on Windows* OS.
+                               This check works around the possible effect that init != 0 for chunkIdx == 0.
+                             */
+    #if KMP_OS_WINDOWS && KMP_ARCH_X86
+                            /* If we haven't already done so, save original
+			       FPCW and set precision to 64-bit, as Windows* OS
+			       on IA-32 architecture defaults to 53-bit */
+                            if ( !fpcwSet ) {
+                                oldFpcw = _control87(0,0);
+                                _control87(_PC_64,_MCW_PC);
+                                fpcwSet = 0x30000;
+                            }
+    #endif
+                            if ( chunkIdx ) {
+                                init = __kmp_dispatch_guided_remaining< T >(
+                                           trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
+                                KMP_DEBUG_ASSERT(init);
+                                init = trip - init;
+                            } else
+                                init = 0;
+                            limit = trip - __kmp_dispatch_guided_remaining< T >(
+                                               trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
+                            KMP_ASSERT(init <= limit);
+                            if ( init < limit ) {
+                                KMP_DEBUG_ASSERT(limit <= trip);
+                                --limit;
+                                status = 1;
+                                break;
+                            } // if
+                        } // if
+                    } // while (1)
+    #if KMP_OS_WINDOWS && KMP_ARCH_X86
+                    /* restore FPCW if necessary
+                       AC: check fpcwSet flag first because oldFpcw can be uninitialized here
+                    */
+                    if ( fpcwSet && ( oldFpcw & fpcwSet ) )
+                        _control87(oldFpcw,_MCW_PC);
+    #endif
+                    if ( status != 0 ) {
+                        start = pr->u.p.lb;
+                        incr = pr->u.p.st;
+                        if ( p_st != NULL )
+                            *p_st = incr;
+                        *p_lb = start + init * incr;
+                        *p_ub = start + limit * incr;
+                        if ( pr->ordered ) {
+                            pr->u.p.ordered_lower = init;
+                            pr->u.p.ordered_upper = limit;
+                            #ifdef KMP_DEBUG
+                            {
+                                const char * buff;
+                                // create format specifiers before the debug output
+                                buff = __kmp_str_format(
+                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                    traits_t< UT >::spec, traits_t< UT >::spec );
+                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                                __kmp_str_free( &buff );
+                            }
+                            #endif
+                        }
+                    } else {
+                        *p_lb = 0;
+                        *p_ub = 0;
+                        if ( p_st != NULL )
+                            *p_st = 0;
+                    }
+                } // case
+                break;
+
+            case kmp_sch_trapezoidal:
+                {
+                    UT   index;
+                    T    parm2 = pr->u.p.parm2;
+                    T    parm3 = pr->u.p.parm3;
+                    T    parm4 = pr->u.p.parm4;
+                    KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
+                                   gtid ) );
+
+                    index = test_then_inc< ST >( (volatile ST *) & sh->u.s.iteration );
+
+                    init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
+                    trip = pr->u.p.tc - 1;
+
+                    if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
+                        *p_lb = 0;
+                        *p_ub = 0;
+                        if ( p_st != NULL ) *p_st = 0;
+                    } else {
+                        start = pr->u.p.lb;
+                        limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
+                        incr  = pr->u.p.st;
+
+                        if ( (last = (limit >= trip)) != 0 )
+                            limit = trip;
+
+                        if ( p_st != NULL ) *p_st = incr;
+
+                        if ( incr == 1 ) {
+                            *p_lb = start + init;
+                            *p_ub = start + limit;
+                        } else {
+                            *p_lb = start + init * incr;
+                            *p_ub = start + limit * incr;
+                        }
+
+                        if ( pr->ordered ) {
+                            pr->u.p.ordered_lower = init;
+                            pr->u.p.ordered_upper = limit;
+                            #ifdef KMP_DEBUG
+                            {
+                                const char * buff;
+                                // create format specifiers before the debug output
+                                buff = __kmp_str_format(
+                                    "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
+                                    traits_t< UT >::spec, traits_t< UT >::spec );
+                                KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
+                                __kmp_str_free( &buff );
+                            }
+                            #endif
+                        } // if
+                    } // if
+                } // case
+                break;
+            default:
+                {
+                    status = 0; // to avoid complaints on uninitialized variable use
+                    __kmp_msg(
+                        kmp_ms_fatal,                        // Severity
+                        KMP_MSG( UnknownSchedTypeDetected ), // Primary message
+                        KMP_HNT( GetNewerLibrary ),          // Hint
+                        __kmp_msg_null                       // Variadic argument list terminator
+                    );
+                }
+                break;
+            } // switch
+        } // if tc == 0;
+
+        if ( status == 0 ) {
+            UT   num_done;
+
+            num_done = test_then_inc< ST >( (volatile ST *) & sh->u.s.num_done );
+            #ifdef KMP_DEBUG
+            {
+                const char * buff;
+                // create format specifiers before the debug output
+                buff = __kmp_str_format(
+                    "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
+                    traits_t< UT >::spec );
+                KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
+                __kmp_str_free( &buff );
+            }
+            #endif
+
+            if ( (ST)num_done == team->t.t_nproc-1 ) {
+                /* NOTE: release this buffer to be reused */
+
+                KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+                sh->u.s.num_done = 0;
+                sh->u.s.iteration = 0;
+
+                /* TODO replace with general release procedure? */
+                if ( pr->ordered ) {
+                    sh->u.s.ordered_iteration = 0;
+                }
+
+                KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+                sh -> buffer_index += KMP_MAX_DISP_BUF;
+                KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",
+                                gtid, sh->buffer_index) );
+
+                KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+            } // if
+            if ( __kmp_env_consistency_check ) {
+                if ( pr->pushed_ws != ct_none ) {
+                    pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
+                }
+            }
+
+            th -> th.th_dispatch -> th_deo_fcn = NULL;
+            th -> th.th_dispatch -> th_dxo_fcn = NULL;
+            th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
+            th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
+        } // if (status == 0)
+#if KMP_OS_WINDOWS
+        else if ( last ) {
+            pr->u.p.last_upper = pr->u.p.ub;
+        }
+#endif /* KMP_OS_WINDOWS */
+        if ( p_last != NULL && status != 0 )
+            *p_last = last;
+    } // if
+
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmp_dispatch_next: T#%%d normal case: " \
+            "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p  returning:%%d\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
+        KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_DISPATCH_NEXT();
+#endif
+    OMPT_LOOP_END;
+    return status;
+}
+
+template< typename T >
+static void
+__kmp_dist_get_bounds(
+    ident_t                          *loc,
+    kmp_int32                         gtid,
+    kmp_int32                        *plastiter,
+    T                                *plower,
+    T                                *pupper,
+    typename traits_t< T >::signed_t  incr
+) {
+    typedef typename traits_t< T >::unsigned_t  UT;
+    typedef typename traits_t< T >::signed_t    ST;
     kmp_uint32  team_id;
     kmp_uint32  nteams;
     UT          trip_count;
     kmp_team_t *team;
-    kmp_info_t * th; 
- 
-    KMP_DEBUG_ASSERT( plastiter && plower && pupper ); 
-    KE_TRACE( 10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( "__kmpc_dist_get_bounds: T#%%d liter=%%d "\ 
-            "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 
-            traits_t< T >::spec ); 
-        KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
- 
-    if( __kmp_env_consistency_check ) { 
-        if( incr == 0 ) { 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 
-        } 
-        if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 
-            // The loop is illegal. 
-            // Some zero-trip loops maintained by compiler, e.g.: 
-            //   for(i=10;i<0;++i) // lower >= upper - run-time check 
-            //   for(i=0;i>10;--i) // lower <= upper - run-time check 
-            //   for(i=0;i>10;++i) // incr > 0       - compile-time check 
-            //   for(i=10;i<0;--i) // incr < 0       - compile-time check 
-            // Compiler does not check the following illegal loops: 
-            //   for(i=0;i<10;i+=incr) // where incr<0 
-            //   for(i=10;i>0;i-=incr) // where incr<0 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 
-        } 
-    } 
-    th = __kmp_threads[gtid]; 
-    team = th->th.th_team; 
-    #if OMP_40_ENABLED 
-    KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct 
-    nteams = th->th.th_teams_size.nteams; 
-    #endif 
-    team_id = team->t.t_master_tid; 
-    KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 
- 
-    // compute global trip count 
-    if( incr == 1 ) { 
-        trip_count = *pupper - *plower + 1; 
-    } else if(incr == -1) { 
-        trip_count = *plower - *pupper + 1; 
-    } else { 
-        trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case 
-    } 
- 
-    if( trip_count <= nteams ) { 
-        KMP_DEBUG_ASSERT( 
-            __kmp_static == kmp_sch_static_greedy || \ 
-            __kmp_static == kmp_sch_static_balanced 
-        ); // Unknown static scheduling type. 
-        // only some teams get single iteration, others get nothing 
-        if( team_id < trip_count ) { 
-            *pupper = *plower = *plower + team_id * incr; 
-        } else { 
-            *plower = *pupper + incr; // zero-trip loop 
-        } 
-        if( plastiter != NULL ) 
-            *plastiter = ( team_id == trip_count - 1 ); 
-    } else { 
-        if( __kmp_static == kmp_sch_static_balanced ) { 
+    kmp_info_t * th;
+
+    KMP_DEBUG_ASSERT( plastiter && plower && pupper );
+    KE_TRACE( 10, ("__kmpc_dist_get_bounds called (%d)\n", gtid));
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format( "__kmpc_dist_get_bounds: T#%%d liter=%%d "\
+            "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
+            traits_t< T >::spec );
+        KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+
+    if( __kmp_env_consistency_check ) {
+        if( incr == 0 ) {
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
+        }
+        if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
+            // The loop is illegal.
+            // Some zero-trip loops maintained by compiler, e.g.:
+            //   for(i=10;i<0;++i) // lower >= upper - run-time check
+            //   for(i=0;i>10;--i) // lower <= upper - run-time check
+            //   for(i=0;i>10;++i) // incr > 0       - compile-time check
+            //   for(i=10;i<0;--i) // incr < 0       - compile-time check
+            // Compiler does not check the following illegal loops:
+            //   for(i=0;i<10;i+=incr) // where incr<0
+            //   for(i=10;i>0;i-=incr) // where incr<0
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
+        }
+    }
+    th = __kmp_threads[gtid];
+    team = th->th.th_team;
+    #if OMP_40_ENABLED
+    KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
+    nteams = th->th.th_teams_size.nteams;
+    #endif
+    team_id = team->t.t_master_tid;
+    KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
+
+    // compute global trip count
+    if( incr == 1 ) {
+        trip_count = *pupper - *plower + 1;
+    } else if(incr == -1) {
+        trip_count = *plower - *pupper + 1;
+    } else {
+        trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
+    }
+
+    if( trip_count <= nteams ) {
+        KMP_DEBUG_ASSERT(
+            __kmp_static == kmp_sch_static_greedy || \
+            __kmp_static == kmp_sch_static_balanced
+        ); // Unknown static scheduling type.
+        // only some teams get single iteration, others get nothing
+        if( team_id < trip_count ) {
+            *pupper = *plower = *plower + team_id * incr;
+        } else {
+            *plower = *pupper + incr; // zero-trip loop
+        }
+        if( plastiter != NULL )
+            *plastiter = ( team_id == trip_count - 1 );
+    } else {
+        if( __kmp_static == kmp_sch_static_balanced ) {
             UT chunk = trip_count / nteams;
             UT extras = trip_count % nteams;
-            *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) ); 
-            *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr ); 
-            if( plastiter != NULL ) 
-                *plastiter = ( team_id == nteams - 1 ); 
-        } else { 
+            *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) );
+            *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr );
+            if( plastiter != NULL )
+                *plastiter = ( team_id == nteams - 1 );
+        } else {
             T chunk_inc_count =
-                ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; 
+                ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
             T upper = *pupper;
-            KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 
-                // Unknown static scheduling type. 
-            *plower += team_id * chunk_inc_count; 
-            *pupper = *plower + chunk_inc_count - incr; 
-            // Check/correct bounds if needed 
-            if( incr > 0 ) { 
-                if( *pupper < *plower ) 
-                    *pupper = i_maxmin< T >::mx; 
-                if( plastiter != NULL ) 
-                    *plastiter = *plower <= upper && *pupper > upper - incr; 
-                if( *pupper > upper ) 
-                    *pupper = upper; // tracker C73258 
-            } else { 
-                if( *pupper > *plower ) 
-                    *pupper = i_maxmin< T >::mn; 
-                if( plastiter != NULL ) 
-                    *plastiter = *plower >= upper && *pupper < upper - incr; 
-                if( *pupper < upper ) 
-                    *pupper = upper; // tracker C73258 
-            } 
-        } 
-    } 
-} 
- 
-//----------------------------------------------------------------------------------------- 
-// Dispatch routines 
-//    Transfer call to template< type T > 
-//    __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, 
-//                         T lb, T ub, ST st, ST chunk ) 
-extern "C" { 
- 
-/*! 
-@ingroup WORK_SHARING 
-@{ 
-@param loc Source location 
-@param gtid Global thread id 
-@param schedule Schedule type 
-@param lb  Lower bound 
-@param ub  Upper bound 
-@param st  Step (or increment if you prefer) 
-@param chunk The chunk size to block with 
- 
-This function prepares the runtime to start a dynamically scheduled for loop, saving the loop arguments. 
-These functions are all identical apart from the types of the arguments. 
-*/ 
- 
-void 
-__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                        kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
-/*! 
-See @ref __kmpc_dispatch_init_4 
-*/ 
-void 
-__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                        kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_init_4 
-*/ 
-void 
-__kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                        kmp_int64 lb, kmp_int64 ub, 
-                        kmp_int64 st, kmp_int64 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_init_4 
-*/ 
-void 
-__kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                         kmp_uint64 lb, kmp_uint64 ub, 
-                         kmp_int64 st, kmp_int64 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_init_4 
- 
-Difference from __kmpc_dispatch_init set of functions is these functions 
-are called for composite distribute parallel for construct. Thus before 
-regular iterations dispatching we need to calc per-team iteration space. 
- 
-These functions are all identical apart from the types of the arguments. 
-*/ 
-void 
-__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-    kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st ); 
-    __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-void 
-__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-    kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st ); 
-    __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-void 
-__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-    kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st ); 
-    __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-void 
-__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-    kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st ); 
-    __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); 
-} 
- 
-/*! 
-@param loc Source code location 
-@param gtid Global thread id 
-@param p_last Pointer to a flag set to one if this is the last chunk or zero otherwise 
-@param p_lb   Pointer to the lower bound for the next chunk of work 
-@param p_ub   Pointer to the upper bound for the next chunk of work 
-@param p_st   Pointer to the stride for the next chunk of work 
-@return one if there is work to be done, zero otherwise 
- 
-Get the next dynamically allocated chunk of work for this thread. 
-If there is no more work, then the lb,ub and stride need not be modified. 
-*/ 
-int 
-__kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-                        kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ) 
-{ 
-    return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_next_4 
-*/ 
-int 
-__kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-                        kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ) 
-{ 
-    return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_next_4 
-*/ 
-int 
-__kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-                        kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ) 
-{ 
-    return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_next_4 
-*/ 
-int 
-__kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-                        kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ) 
-{ 
-    return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); 
-} 
- 
-/*! 
-@param loc Source code location 
-@param gtid Global thread id 
- 
-Mark the end of a dynamic loop. 
-*/ 
-void 
-__kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_fini_4 
-*/ 
-void 
-__kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_fini_4 
-*/ 
-void 
-__kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); 
-} 
- 
-/*! 
-See @ref __kmpc_dispatch_fini_4 
-*/ 
-void 
-__kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); 
-} 
-/*! @} */ 
- 
-//----------------------------------------------------------------------------------------- 
-//Non-template routines from kmp_dispatch.c used in other sources 
- 
-kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) { 
-    return value == checker; 
-} 
- 
-kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) { 
-    return value != checker; 
-} 
- 
-kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) { 
-    return value < checker; 
-} 
- 
-kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) { 
-    return value >= checker; 
-} 
- 
-kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) { 
-    return value <= checker; 
-} 
-kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) { 
-    return value == checker; 
-} 
- 
-kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) { 
-    return value != checker; 
-} 
- 
-kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) { 
-    return value < checker; 
-} 
- 
-kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) { 
-    return value >= checker; 
-} 
- 
-kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) { 
-    return value <= checker; 
-} 
- 
-kmp_uint32 
-__kmp_wait_yield_4(volatile kmp_uint32 * spinner, 
-                   kmp_uint32            checker, 
-                   kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 ) 
-                   , void        * obj    // Higher-level synchronization object, or NULL. 
-                   ) 
-{ 
-    // note: we may not belong to a team at this point 
+            KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
+                // Unknown static scheduling type.
+            *plower += team_id * chunk_inc_count;
+            *pupper = *plower + chunk_inc_count - incr;
+            // Check/correct bounds if needed
+            if( incr > 0 ) {
+                if( *pupper < *plower )
+                    *pupper = i_maxmin< T >::mx;
+                if( plastiter != NULL )
+                    *plastiter = *plower <= upper && *pupper > upper - incr;
+                if( *pupper > upper )
+                    *pupper = upper; // tracker C73258
+            } else {
+                if( *pupper > *plower )
+                    *pupper = i_maxmin< T >::mn;
+                if( plastiter != NULL )
+                    *plastiter = *plower >= upper && *pupper < upper - incr;
+                if( *pupper < upper )
+                    *pupper = upper; // tracker C73258
+            }
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------------------
+// Dispatch routines
+//    Transfer call to template< type T >
+//    __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule,
+//                         T lb, T ub, ST st, ST chunk )
+extern "C" {
+
+/*!
+@ingroup WORK_SHARING
+@{
+@param loc Source location
+@param gtid Global thread id
+@param schedule Schedule type
+@param lb  Lower bound
+@param ub  Upper bound
+@param st  Step (or increment if you prefer)
+@param chunk The chunk size to block with
+
+This function prepares the runtime to start a dynamically scheduled for loop, saving the loop arguments.
+These functions are all identical apart from the types of the arguments.
+*/
+
+void
+__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                        kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+/*!
+See @ref __kmpc_dispatch_init_4
+*/
+void
+__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                        kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+/*!
+See @ref __kmpc_dispatch_init_4
+*/
+void
+__kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                        kmp_int64 lb, kmp_int64 ub,
+                        kmp_int64 st, kmp_int64 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+/*!
+See @ref __kmpc_dispatch_init_4
+*/
+void
+__kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                         kmp_uint64 lb, kmp_uint64 ub,
+                         kmp_int64 st, kmp_int64 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+/*!
+See @ref __kmpc_dispatch_init_4
+
+Difference from __kmpc_dispatch_init set of functions is these functions
+are called for composite distribute parallel for construct. Thus before
+regular iterations dispatching we need to calc per-team iteration space.
+
+These functions are all identical apart from the types of the arguments.
+*/
+void
+__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+    kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
+    __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+void
+__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+    kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
+    __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+void
+__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+    kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
+    __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+void
+__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+    kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
+    __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
+}
+
+/*!
+@param loc Source code location
+@param gtid Global thread id
+@param p_last Pointer to a flag set to one if this is the last chunk or zero otherwise
+@param p_lb   Pointer to the lower bound for the next chunk of work
+@param p_ub   Pointer to the upper bound for the next chunk of work
+@param p_st   Pointer to the stride for the next chunk of work
+@return one if there is work to be done, zero otherwise
+
+Get the next dynamically allocated chunk of work for this thread.
+If there is no more work, then the lb,ub and stride need not be modified.
+*/
+int
+__kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+                        kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
+{
+    return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
+}
+
+/*!
+See @ref __kmpc_dispatch_next_4
+*/
+int
+__kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+                        kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
+{
+    return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
+}
+
+/*!
+See @ref __kmpc_dispatch_next_4
+*/
+int
+__kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+                        kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
+{
+    return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
+}
+
+/*!
+See @ref __kmpc_dispatch_next_4
+*/
+int
+__kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+                        kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
+{
+    return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
+}
+
+/*!
+@param loc Source code location
+@param gtid Global thread id
+
+Mark the end of a dynamic loop.
+*/
+void
+__kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
+}
+
+/*!
+See @ref __kmpc_dispatch_fini_4
+*/
+void
+__kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
+}
+
+/*!
+See @ref __kmpc_dispatch_fini_4
+*/
+void
+__kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
+}
+
+/*!
+See @ref __kmpc_dispatch_fini_4
+*/
+void
+__kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
+}
+/*! @} */
+
+//-----------------------------------------------------------------------------------------
+//Non-template routines from kmp_dispatch.c used in other sources
+
+kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
+    return value == checker;
+}
+
+kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
+    return value != checker;
+}
+
+kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
+    return value < checker;
+}
+
+kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
+    return value >= checker;
+}
+
+kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
+    return value <= checker;
+}
+kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
+    return value == checker;
+}
+
+kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
+    return value != checker;
+}
+
+kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
+    return value < checker;
+}
+
+kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
+    return value >= checker;
+}
+
+kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
+    return value <= checker;
+}
+
+kmp_uint32
+__kmp_wait_yield_4(volatile kmp_uint32 * spinner,
+                   kmp_uint32            checker,
+                   kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
+                   , void        * obj    // Higher-level synchronization object, or NULL.
+                   )
+{
+    // note: we may not belong to a team at this point
     volatile kmp_uint32         * spin          = spinner;
              kmp_uint32           check         = checker;
              kmp_uint32   spins;
              kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
              kmp_uint32           r;
- 
-    KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); 
-    KMP_INIT_YIELD( spins ); 
-    // main wait spin loop 
-    while(!f(r = TCR_4(*spin), check)) { 
-        KMP_FSYNC_SPIN_PREPARE( obj ); 
-        /* GEH - remove this since it was accidentally introduced when kmp_wait was split. 
-           It causes problems with infinite recursion because of exit lock */ 
-        /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) 
-            __kmp_abort_thread(); */ 
- 
-        /* if we have waited a bit, or are oversubscribed, yield */ 
-        /* pause is in the following code */ 
-        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); 
-        KMP_YIELD_SPIN( spins ); 
-    } 
-    KMP_FSYNC_SPIN_ACQUIRED( obj ); 
-    return r; 
-} 
- 
-kmp_uint64 
-__kmp_wait_yield_8( volatile kmp_uint64 * spinner, 
-                    kmp_uint64            checker, 
-                    kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 ) 
-                    , void        * obj    // Higher-level synchronization object, or NULL. 
-                    ) 
-{ 
-    // note: we may not belong to a team at this point 
+
+    KMP_FSYNC_SPIN_INIT( obj, (void*) spin );
+    KMP_INIT_YIELD( spins );
+    // main wait spin loop
+    while(!f(r = TCR_4(*spin), check)) {
+        KMP_FSYNC_SPIN_PREPARE( obj );
+        /* GEH - remove this since it was accidentally introduced when kmp_wait was split.
+           It causes problems with infinite recursion because of exit lock */
+        /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
+            __kmp_abort_thread(); */
+
+        /* if we have waited a bit, or are oversubscribed, yield */
+        /* pause is in the following code */
+        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
+        KMP_YIELD_SPIN( spins );
+    }
+    KMP_FSYNC_SPIN_ACQUIRED( obj );
+    return r;
+}
+
+kmp_uint64
+__kmp_wait_yield_8( volatile kmp_uint64 * spinner,
+                    kmp_uint64            checker,
+                    kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
+                    , void        * obj    // Higher-level synchronization object, or NULL.
+                    )
+{
+    // note: we may not belong to a team at this point
     volatile kmp_uint64         * spin          = spinner;
              kmp_uint64           check         = checker;
              kmp_uint32   spins;
              kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
              kmp_uint64           r;
- 
-    KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); 
-    KMP_INIT_YIELD( spins ); 
-    // main wait spin loop 
-    while(!f(r = *spin, check)) 
-    { 
-        KMP_FSYNC_SPIN_PREPARE( obj ); 
-        /* GEH - remove this since it was accidentally introduced when kmp_wait was split. 
-           It causes problems with infinite recursion because of exit lock */ 
-        /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) 
-            __kmp_abort_thread(); */ 
- 
-        // if we are oversubscribed, 
-        // or have waited a bit (and KMP_LIBARRY=throughput, then yield 
-        // pause is in the following code 
-        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); 
-        KMP_YIELD_SPIN( spins ); 
-    } 
-    KMP_FSYNC_SPIN_ACQUIRED( obj ); 
-    return r; 
-} 
- 
-} // extern "C" 
- 
-#ifdef KMP_GOMP_COMPAT 
- 
-void 
-__kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                           kmp_int32 lb, kmp_int32 ub, kmp_int32 st, 
-                           kmp_int32 chunk, int push_ws ) 
-{ 
-    __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, 
-                                      push_ws ); 
-} 
- 
-void 
-__kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                            kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, 
-                            kmp_int32 chunk, int push_ws ) 
-{ 
-    __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, 
-                                       push_ws ); 
-} 
- 
-void 
-__kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                           kmp_int64 lb, kmp_int64 ub, kmp_int64 st, 
-                           kmp_int64 chunk, int push_ws ) 
-{ 
-    __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, 
-                                      push_ws ); 
-} 
- 
-void 
-__kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, 
-                            kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, 
-                            kmp_int64 chunk, int push_ws ) 
-{ 
-    __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, 
-                                       push_ws ); 
-} 
- 
-void 
-__kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); 
-} 
- 
-void 
-__kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); 
-} 
- 
-void 
-__kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); 
-} 
- 
-void 
-__kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ) 
-{ 
-    __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); 
-} 
- 
-#endif /* KMP_GOMP_COMPAT */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
+
+    KMP_FSYNC_SPIN_INIT( obj, (void*) spin );
+    KMP_INIT_YIELD( spins );
+    // main wait spin loop
+    while(!f(r = *spin, check))
+    {
+        KMP_FSYNC_SPIN_PREPARE( obj );
+        /* GEH - remove this since it was accidentally introduced when kmp_wait was split.
+           It causes problems with infinite recursion because of exit lock */
+        /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
+            __kmp_abort_thread(); */
+
+        // if we are oversubscribed,
+        // or have waited a bit (and KMP_LIBARRY=throughput, then yield
+        // pause is in the following code
+        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
+        KMP_YIELD_SPIN( spins );
+    }
+    KMP_FSYNC_SPIN_ACQUIRED( obj );
+    return r;
+}
+
+} // extern "C"
+
+#ifdef KMP_GOMP_COMPAT
+
+void
+__kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                           kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
+                           kmp_int32 chunk, int push_ws )
+{
+    __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
+                                      push_ws );
+}
+
+void
+__kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                            kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
+                            kmp_int32 chunk, int push_ws )
+{
+    __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
+                                       push_ws );
+}
+
+void
+__kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                           kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
+                           kmp_int64 chunk, int push_ws )
+{
+    __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
+                                      push_ws );
+}
+
+void
+__kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
+                            kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
+                            kmp_int64 chunk, int push_ws )
+{
+    __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
+                                       push_ws );
+}
+
+void
+__kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
+}
+
+void
+__kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
+}
+
+void
+__kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
+}
+
+void
+__kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid )
+{
+    __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
+}
+
+#endif /* KMP_GOMP_COMPAT */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_environment.c b/contrib/libs/cxxsupp/openmp/kmp_environment.c
index f1da960491..75090d6c32 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_environment.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_environment.c
@@ -1,596 +1,596 @@
-/* 
- * kmp_environment.c -- Handle environment variables OS-independently. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of 
-    loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) 
-    unavailable.  getenv() apparently gets a clean copy of the env variables as they existed 
-    at the start of the run. 
-    JH 12/23/2002 
-    ------------------------------------------------------------------------------------------------ 
-    On Windows* OS, there are two environments (at least, see below): 
- 
-        1. Environment maintained by Windows* OS on IA-32 architecture.  
-	   Accessible through GetEnvironmentVariable(), 
-           SetEnvironmentVariable(), and GetEnvironmentStrings(). 
- 
-        2. Environment maintained by C RTL. Accessible through getenv(), putenv(). 
- 
-    putenv() function updates both C and Windows* OS on IA-32 architecture. getenv() function  
-    search for variables in C RTL environment only. Windows* OS on IA-32 architecture functions work *only*  
-    with Windows* OS on IA-32 architecture. 
- 
-    Windows* OS on IA-32 architecture maintained by OS, so there is always only one Windows* OS on  
-    IA-32 architecture per process. Changes in Windows* OS on IA-32 architecture are process-visible. 
- 
-    C environment maintained by C RTL. Multiple copies of C RTL may be present in the process, and 
-    each C RTL maintains its own environment. :-( 
- 
-    Thus, proper way to work with environment on Windows* OS is: 
- 
-        1. Set variables with putenv() function -- both C and Windows* OS on 
-	   IA-32 architecture are being updated. Windows* OS on  
-	   IA-32 architecture may be considered as primary target, 
-	   while updating C RTL environment is a free bonus. 
- 
-        2. Get variables with GetEnvironmentVariable() -- getenv() does not  
-	   search Windows* OS on IA-32 architecture, and can not see variables 
-	   set with SetEnvironmentVariable(). 
- 
-    2007-04-05 -- lev 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-#include "kmp_environment.h" 
- 
-#include "kmp_os.h"    // KMP_OS_*. 
-#include "kmp.h"       // 
-#include "kmp_str.h"   // __kmp_str_*(). 
-#include "kmp_i18n.h" 
- 
-#if KMP_OS_UNIX 
-    #include <stdlib.h>    // getenv, setenv, unsetenv. 
-    #include <string.h>    // strlen, strcpy. 
-    #if KMP_OS_DARWIN 
-        #include <crt_externs.h> 
-        #define environ (*_NSGetEnviron()) 
-    #else 
-        extern char * * environ; 
-    #endif 
-#elif KMP_OS_WINDOWS 
-    #include <windows.h>   // GetEnvironmentVariable, SetEnvironmentVariable, GetLastError. 
-#else 
-    #error Unknown or unsupported OS. 
-#endif 
- 
- 
-// TODO: Eliminate direct memory allocations, use string operations instead. 
- 
-static inline 
-void * 
-allocate( 
-    size_t size 
-) { 
-    void * ptr = KMP_INTERNAL_MALLOC( size ); 
-    if ( ptr == NULL ) { 
-	KMP_FATAL( MemoryAllocFailed ); 
-    }; // if 
-    return ptr; 
-} // allocate 
- 
- 
-char * 
-__kmp_env_get( char const * name ) { 
- 
-    char * result = NULL; 
- 
-    #if KMP_OS_UNIX 
-        char const * value = getenv( name ); 
-        if ( value != NULL ) { 
-            size_t len = KMP_STRLEN( value ) + 1; 
-            result = (char *) KMP_INTERNAL_MALLOC( len ); 
-            if ( result == NULL ) { 
-		KMP_FATAL( MemoryAllocFailed ); 
-            }; // if 
-            KMP_STRNCPY_S( result, len, value, len ); 
-        }; // if 
-    #elif KMP_OS_WINDOWS 
-        /* 
-            We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of 
-            loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) 
-            unavailable.  getenv() apparently gets a clean copy of the env variables as they existed 
-            at the start of the run. 
-            JH 12/23/2002 
-        */ 
-        DWORD rc; 
-        rc = GetEnvironmentVariable( name, NULL, 0 ); 
-        if ( ! rc ) { 
-            DWORD error = GetLastError(); 
-            if ( error != ERROR_ENVVAR_NOT_FOUND ) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantGetEnvVar, name ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            }; // if 
-            // Variable is not found, it's ok, just continue. 
-        } else { 
-            DWORD len = rc; 
-            result = (char *) KMP_INTERNAL_MALLOC( len ); 
-            if ( result == NULL ) { 
-		KMP_FATAL( MemoryAllocFailed ); 
-            }; // if 
-            rc = GetEnvironmentVariable( name, result, len ); 
-            if ( ! rc ) { 
-                // GetEnvironmentVariable() may return 0 if variable is empty. 
-                // In such a case GetLastError() returns ERROR_SUCCESS. 
-                DWORD error = GetLastError(); 
-                if ( error != ERROR_SUCCESS ) { 
-                    // Unexpected error. The variable should be in the environment, 
-                    // and buffer should be large enough. 
-                    __kmp_msg( 
-                        kmp_ms_fatal, 
-                        KMP_MSG( CantGetEnvVar, name ), 
-                        KMP_ERR( error ), 
-                        __kmp_msg_null 
-                    ); 
-                    KMP_INTERNAL_FREE( (void *) result ); 
-                    result = NULL; 
-                }; // if 
-            }; // if 
-        }; // if 
-    #else 
-        #error Unknown or unsupported OS. 
-    #endif 
- 
-    return result; 
- 
-} // func __kmp_env_get 
- 
- 
-// TODO: Find and replace all regular free() with __kmp_env_free(). 
- 
-void 
-__kmp_env_free( char const * * value ) { 
- 
-    KMP_DEBUG_ASSERT( value != NULL ); 
-    KMP_INTERNAL_FREE( (void *) * value ); 
-    * value = NULL; 
- 
-} // func __kmp_env_free 
- 
- 
- 
-int 
-__kmp_env_exists( char const * name ) { 
- 
-    #if KMP_OS_UNIX 
-        char const * value = getenv( name ); 
-        return ( ( value == NULL ) ? ( 0 ) : ( 1 ) ); 
-    #elif KMP_OS_WINDOWS 
-        DWORD rc; 
-        rc = GetEnvironmentVariable( name, NULL, 0 ); 
-        if ( rc == 0 ) { 
-            DWORD error = GetLastError(); 
-            if ( error != ERROR_ENVVAR_NOT_FOUND ) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantGetEnvVar, name ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            }; // if 
-            return 0; 
-        }; // if 
-        return 1; 
-    #else 
-        #error Unknown or unsupported OS. 
-    #endif 
- 
-} // func __kmp_env_exists 
- 
- 
- 
-void 
-__kmp_env_set( char const * name, char const * value, int overwrite ) { 
- 
-    #if KMP_OS_UNIX 
-        int rc = setenv( name, value, overwrite ); 
-        if ( rc != 0 ) { 
-            // Dead code. I tried to put too many variables into Linux* OS 
-            // environment on IA-32 architecture. When application consumes 
-            // more than ~2.5 GB of memory, entire system feels bad. Sometimes 
-            // application is killed (by OS?), sometimes system stops  
-            // responding... But this error message never appears. --ln 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantSetEnvVar, name ), 
-                KMP_HNT( NotEnoughMemory ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-    #elif KMP_OS_WINDOWS 
-        BOOL rc; 
-        if ( ! overwrite ) { 
-            rc = GetEnvironmentVariable( name, NULL, 0 ); 
-            if ( rc ) { 
-                // Variable exists, do not overwrite. 
-                return; 
-            }; // if 
-            DWORD error = GetLastError(); 
-            if ( error != ERROR_ENVVAR_NOT_FOUND ) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantGetEnvVar, name ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            }; // if 
-        }; // if 
-        rc = SetEnvironmentVariable( name, value ); 
-        if ( ! rc ) { 
-            DWORD error = GetLastError(); 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantSetEnvVar, name ), 
-                KMP_ERR( error ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-    #else 
-        #error Unknown or unsupported OS. 
-    #endif 
- 
-} // func __kmp_env_set 
- 
- 
- 
-void 
-__kmp_env_unset( char const * name ) { 
- 
-    #if KMP_OS_UNIX 
-        unsetenv( name ); 
-    #elif KMP_OS_WINDOWS 
-        BOOL rc = SetEnvironmentVariable( name, NULL ); 
-        if ( ! rc ) { 
-            DWORD error = GetLastError(); 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantSetEnvVar, name ), 
-                KMP_ERR( error ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-    #else 
-        #error Unknown or unsupported OS. 
-    #endif 
- 
-} // func __kmp_env_unset 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-/* 
-    Intel OpenMP RTL string representation of environment: just a string of characters, variables 
-    are separated with vertical bars, e. g.: 
- 
-        "KMP_WARNINGS=0|KMP_AFFINITY=compact|" 
- 
-    Empty variables are allowed and ignored: 
- 
-        "||KMP_WARNINGS=1||" 
- 
-*/ 
- 
-static 
-void 
-___kmp_env_blk_parse_string( 
-    kmp_env_blk_t * block,   // M: Env block to fill. 
-    char const *    env      // I: String to parse. 
-) { 
- 
-    char const chr_delimiter   = '|'; 
-    char const str_delimiter[] = { chr_delimiter, 0 }; 
- 
-    char *          bulk       = NULL; 
-    kmp_env_var_t * vars       = NULL; 
-    int             count      = 0;  // Number of used elements in vars array. 
-    int             delimiters = 0;  // Number of delimiters in input string. 
- 
-    // Copy original string, we will modify the copy. 
-    bulk = __kmp_str_format( "%s", env ); 
- 
-    // Loop thru all the vars in environment block. Count delimiters (maximum number of variables 
-    // is number of delimiters plus one). 
-    { 
-        char const * ptr = bulk; 
-        for ( ; ; ) { 
-            ptr = strchr( ptr, chr_delimiter ); 
-            if ( ptr == NULL ) { 
-                break; 
-            }; // if 
-            ++ delimiters; 
-            ptr += 1; 
-        }; // forever 
-    } 
- 
-    // Allocate vars array. 
-    vars = (kmp_env_var_t *) allocate( ( delimiters + 1 ) * sizeof( kmp_env_var_t ) ); 
- 
-    // Loop thru all the variables. 
-    { 
-        char * var;     // Pointer to variable (both name and value). 
-        char * name;    // Pointer to name of variable. 
-        char * value;   // Pointer to value. 
-        char * buf;     // Buffer for __kmp_str_token() function. 
-        var = __kmp_str_token( bulk, str_delimiter, & buf );      // Get the first var. 
-        while ( var != NULL ) { 
-            // Save found variable in vars array. 
-            __kmp_str_split( var, '=', & name, & value ); 
-            KMP_DEBUG_ASSERT( count < delimiters + 1 ); 
-            vars[ count ].name  = name; 
-            vars[ count ].value = value; 
-            ++ count; 
-            // Get the next var. 
-            var = __kmp_str_token( NULL, str_delimiter, & buf ); 
-        }; // while 
-    } 
- 
-    // Fill out result. 
-    block->bulk  = bulk; 
-    block->vars  = vars; 
-    block->count = count; 
- 
-}; // ___kmp_env_blk_parse_string 
- 
- 
- 
-/* 
-    Windows* OS (actually, DOS) environment block is a piece of memory with environment variables. Each 
-    variable is terminated with zero byte, entire block is terminated with one extra zero byte, so 
-    we have two zero bytes at the end of environment block, e. g.: 
- 
-        "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00" 
- 
-    It is not clear how empty environment is represented. "\x00\x00"? 
-*/ 
- 
-#if KMP_OS_WINDOWS 
-static 
-void 
-___kmp_env_blk_parse_windows( 
-    kmp_env_blk_t * block,   // M: Env block to fill. 
-    char const *    env      // I: Pointer to Windows* OS (DOS) environment block. 
-) { 
- 
-    char *          bulk  = NULL; 
-    kmp_env_var_t * vars  = NULL; 
-    int             count = 0;     // Number of used elements in vars array. 
-    int             size  = 0;     // Size of bulk. 
- 
-    char * name;    // Pointer to name of variable. 
-    char * value;   // Pointer to value. 
- 
-    if ( env != NULL ) { 
- 
-        // Loop thru all the vars in environment block. Count variables, find size of block. 
-        { 
-            char const * var;     // Pointer to beginning of var. 
-            int          len;     // Length of variable. 
-            count = 0; 
-            var = env;            // The first variable starts and beginning of environment block. 
-            len = KMP_STRLEN( var ); 
-            while ( len != 0 ) { 
-                ++ count; 
-                size = size + len + 1; 
-                var = var + len + 1; // Move pointer to the beginning of the next variable. 
-                len = KMP_STRLEN( var ); 
-            }; // while 
-            size = size + 1;         // Total size of env block, including terminating zero byte. 
-        } 
- 
-        // Copy original block to bulk, we will modify bulk, not original block. 
-        bulk = (char *) allocate( size ); 
-        KMP_MEMCPY_S( bulk, size, env, size ); 
-        // Allocate vars array. 
-        vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); 
- 
-        // Loop thru all the vars, now in bulk. 
-        { 
-            char * var;     // Pointer to beginning of var. 
-            int    len;     // Length of variable. 
-            count = 0; 
-            var = bulk; 
-            len = KMP_STRLEN( var ); 
-            while ( len != 0 ) { 
-                // Save variable in vars array. 
-                __kmp_str_split( var, '=', & name, & value ); 
-                vars[ count ].name  = name; 
-                vars[ count ].value = value; 
-                ++ count; 
-                // Get the next var. 
-                var = var + len + 1; 
-                len = KMP_STRLEN( var ); 
-            }; // while 
-        } 
- 
-    }; // if 
- 
-    // Fill out result. 
-    block->bulk  = bulk; 
-    block->vars  = vars; 
-    block->count = count; 
- 
-}; // ___kmp_env_blk_parse_windows 
-#endif 
- 
- 
-/* 
-    Unix environment block is a array of pointers to variables, last pointer in array is NULL: 
- 
-        { "HOME=/home/lev", "TERM=xterm", NULL } 
-*/ 
- 
-static 
-void 
-___kmp_env_blk_parse_unix( 
-    kmp_env_blk_t * block,   // M: Env block to fill. 
-    char * *        env      // I: Unix environment to parse. 
-) { 
- 
-    char *          bulk  = NULL; 
-    kmp_env_var_t * vars  = NULL; 
-    int             count = 0; 
-    int             size  = 0;    // Size of bulk. 
- 
-    // Count number of variables and length of required bulk. 
-    { 
-        count = 0; 
-        size  = 0; 
-        while ( env[ count ] != NULL ) { 
-            size += KMP_STRLEN( env[ count ] ) + 1; 
-            ++ count; 
-        }; // while 
-    } 
- 
-    // Allocate memory. 
-    bulk = (char *) allocate( size ); 
-    vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); 
- 
-    // Loop thru all the vars. 
-    { 
-        char * var;     // Pointer to beginning of var. 
-        char * name;    // Pointer to name of variable. 
-        char * value;   // Pointer to value. 
-        int    len;     // Length of variable. 
-        int    i; 
-        var = bulk; 
-        for ( i = 0; i < count; ++ i ) { 
-            // Copy variable to bulk. 
-            len = KMP_STRLEN( env[ i ] ); 
-            KMP_MEMCPY_S( var, size, env[ i ], len + 1 ); 
-            // Save found variable in vars array. 
-            __kmp_str_split( var, '=', & name, & value ); 
-            vars[ i ].name  = name; 
-            vars[ i ].value = value; 
-            // Move pointer. 
-            var += len + 1; 
-        }; // for 
-    } 
- 
-    // Fill out result. 
-    block->bulk  = bulk; 
-    block->vars  = vars; 
-    block->count = count; 
- 
-}; // ___kmp_env_blk_parse_unix 
- 
- 
- 
-void 
-__kmp_env_blk_init( 
-    kmp_env_blk_t * block,  // M: Block to initialize. 
-    char const *    bulk    // I: Initialization string, or NULL. 
-) { 
- 
-    if ( bulk != NULL ) { 
-        ___kmp_env_blk_parse_string( block, bulk ); 
-    } else { 
-        #if KMP_OS_UNIX 
-            ___kmp_env_blk_parse_unix( block, environ ); 
-        #elif KMP_OS_WINDOWS 
-            { 
-                char * mem = GetEnvironmentStrings(); 
-                if ( mem == NULL ) { 
-                    DWORD error = GetLastError(); 
-                    __kmp_msg( 
-                        kmp_ms_fatal, 
-                        KMP_MSG( CantGetEnvironment ), 
-                        KMP_ERR( error ), 
-                        __kmp_msg_null 
-                    ); 
-                }; // if 
-                ___kmp_env_blk_parse_windows( block, mem ); 
-                FreeEnvironmentStrings( mem ); 
-            } 
-        #else 
-            #error Unknown or unsupported OS. 
-        #endif 
-    }; // if 
- 
-} // __kmp_env_blk_init 
- 
- 
- 
-static 
-int 
-___kmp_env_var_cmp(                              // Comparison function for qsort(). 
-    kmp_env_var_t const * lhs, 
-    kmp_env_var_t const * rhs 
-) { 
-    return strcmp( lhs->name, rhs->name ); 
-} 
- 
-void 
-__kmp_env_blk_sort( 
-    kmp_env_blk_t * block  // M: Block of environment variables to sort. 
-) { 
- 
-    qsort( 
-        (void *) block->vars, 
-        block->count, 
-        sizeof( kmp_env_var_t ), 
-        ( int ( * )( void const *, void const * ) ) & ___kmp_env_var_cmp 
-    ); 
- 
-} // __kmp_env_block_sort 
- 
- 
- 
-void 
-__kmp_env_blk_free( 
-    kmp_env_blk_t * block  // M: Block of environment variables to free. 
-) { 
- 
-    KMP_INTERNAL_FREE( (void *) block->vars ); 
-    KMP_INTERNAL_FREE( (void *) block->bulk ); 
- 
-    block->count = 0; 
-    block->vars  = NULL; 
-    block->bulk  = NULL; 
- 
-} // __kmp_env_blk_free 
- 
- 
- 
-char const *               // R: Value of variable or NULL if variable does not exist. 
-__kmp_env_blk_var( 
-    kmp_env_blk_t * block, // I: Block of environment variables. 
-    char const *    name   // I: Name of variable to find. 
-) { 
- 
-    int i; 
-    for ( i = 0; i < block->count; ++ i ) { 
-        if ( strcmp( block->vars[ i ].name, name ) == 0 ) { 
-            return block->vars[ i ].value; 
-        }; // if 
-    }; // for 
-    return NULL; 
- 
-} // __kmp_env_block_var 
- 
- 
-// end of file // 
+/*
+ * kmp_environment.c -- Handle environment variables OS-independently.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of
+    loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv())
+    unavailable.  getenv() apparently gets a clean copy of the env variables as they existed
+    at the start of the run.
+    JH 12/23/2002
+    ------------------------------------------------------------------------------------------------
+    On Windows* OS, there are two environments (at least, see below):
+
+        1. Environment maintained by Windows* OS on IA-32 architecture. 
+	   Accessible through GetEnvironmentVariable(),
+           SetEnvironmentVariable(), and GetEnvironmentStrings().
+
+        2. Environment maintained by C RTL. Accessible through getenv(), putenv().
+
+    putenv() function updates both C and Windows* OS on IA-32 architecture. getenv() function 
+    search for variables in C RTL environment only. Windows* OS on IA-32 architecture functions work *only* 
+    with Windows* OS on IA-32 architecture.
+
+    Windows* OS on IA-32 architecture maintained by OS, so there is always only one Windows* OS on 
+    IA-32 architecture per process. Changes in Windows* OS on IA-32 architecture are process-visible.
+
+    C environment maintained by C RTL. Multiple copies of C RTL may be present in the process, and
+    each C RTL maintains its own environment. :-(
+
+    Thus, proper way to work with environment on Windows* OS is:
+
+        1. Set variables with putenv() function -- both C and Windows* OS on
+	   IA-32 architecture are being updated. Windows* OS on 
+	   IA-32 architecture may be considered as primary target,
+	   while updating C RTL environment is a free bonus.
+
+        2. Get variables with GetEnvironmentVariable() -- getenv() does not 
+	   search Windows* OS on IA-32 architecture, and can not see variables
+	   set with SetEnvironmentVariable().
+
+    2007-04-05 -- lev
+    ------------------------------------------------------------------------------------------------
+*/
+
+#include "kmp_environment.h"
+
+#include "kmp_os.h"    // KMP_OS_*.
+#include "kmp.h"       //
+#include "kmp_str.h"   // __kmp_str_*().
+#include "kmp_i18n.h"
+
+#if KMP_OS_UNIX
+    #include <stdlib.h>    // getenv, setenv, unsetenv.
+    #include <string.h>    // strlen, strcpy.
+    #if KMP_OS_DARWIN
+        #include <crt_externs.h>
+        #define environ (*_NSGetEnviron())
+    #else
+        extern char * * environ;
+    #endif
+#elif KMP_OS_WINDOWS
+    #include <windows.h>   // GetEnvironmentVariable, SetEnvironmentVariable, GetLastError.
+#else
+    #error Unknown or unsupported OS.
+#endif
+
+
+// TODO: Eliminate direct memory allocations, use string operations instead.
+
+static inline
+void *
+allocate(
+    size_t size
+) {
+    void * ptr = KMP_INTERNAL_MALLOC( size );
+    if ( ptr == NULL ) {
+	KMP_FATAL( MemoryAllocFailed );
+    }; // if
+    return ptr;
+} // allocate
+
+
+char *
+__kmp_env_get( char const * name ) {
+
+    char * result = NULL;
+
+    #if KMP_OS_UNIX
+        char const * value = getenv( name );
+        if ( value != NULL ) {
+            size_t len = KMP_STRLEN( value ) + 1;
+            result = (char *) KMP_INTERNAL_MALLOC( len );
+            if ( result == NULL ) {
+		KMP_FATAL( MemoryAllocFailed );
+            }; // if
+            KMP_STRNCPY_S( result, len, value, len );
+        }; // if
+    #elif KMP_OS_WINDOWS
+        /*
+            We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of
+            loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv())
+            unavailable.  getenv() apparently gets a clean copy of the env variables as they existed
+            at the start of the run.
+            JH 12/23/2002
+        */
+        DWORD rc;
+        rc = GetEnvironmentVariable( name, NULL, 0 );
+        if ( ! rc ) {
+            DWORD error = GetLastError();
+            if ( error != ERROR_ENVVAR_NOT_FOUND ) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantGetEnvVar, name ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }; // if
+            // Variable is not found, it's ok, just continue.
+        } else {
+            DWORD len = rc;
+            result = (char *) KMP_INTERNAL_MALLOC( len );
+            if ( result == NULL ) {
+		KMP_FATAL( MemoryAllocFailed );
+            }; // if
+            rc = GetEnvironmentVariable( name, result, len );
+            if ( ! rc ) {
+                // GetEnvironmentVariable() may return 0 if variable is empty.
+                // In such a case GetLastError() returns ERROR_SUCCESS.
+                DWORD error = GetLastError();
+                if ( error != ERROR_SUCCESS ) {
+                    // Unexpected error. The variable should be in the environment,
+                    // and buffer should be large enough.
+                    __kmp_msg(
+                        kmp_ms_fatal,
+                        KMP_MSG( CantGetEnvVar, name ),
+                        KMP_ERR( error ),
+                        __kmp_msg_null
+                    );
+                    KMP_INTERNAL_FREE( (void *) result );
+                    result = NULL;
+                }; // if
+            }; // if
+        }; // if
+    #else
+        #error Unknown or unsupported OS.
+    #endif
+
+    return result;
+
+} // func __kmp_env_get
+
+
+// TODO: Find and replace all regular free() with __kmp_env_free().
+
+void
+__kmp_env_free( char const * * value ) {
+
+    KMP_DEBUG_ASSERT( value != NULL );
+    KMP_INTERNAL_FREE( (void *) * value );
+    * value = NULL;
+
+} // func __kmp_env_free
+
+
+
+int
+__kmp_env_exists( char const * name ) {
+
+    #if KMP_OS_UNIX
+        char const * value = getenv( name );
+        return ( ( value == NULL ) ? ( 0 ) : ( 1 ) );
+    #elif KMP_OS_WINDOWS
+        DWORD rc;
+        rc = GetEnvironmentVariable( name, NULL, 0 );
+        if ( rc == 0 ) {
+            DWORD error = GetLastError();
+            if ( error != ERROR_ENVVAR_NOT_FOUND ) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantGetEnvVar, name ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }; // if
+            return 0;
+        }; // if
+        return 1;
+    #else
+        #error Unknown or unsupported OS.
+    #endif
+
+} // func __kmp_env_exists
+
+
+
+void
+__kmp_env_set( char const * name, char const * value, int overwrite ) {
+
+    #if KMP_OS_UNIX
+        int rc = setenv( name, value, overwrite );
+        if ( rc != 0 ) {
+            // Dead code. I tried to put too many variables into Linux* OS
+            // environment on IA-32 architecture. When application consumes
+            // more than ~2.5 GB of memory, entire system feels bad. Sometimes
+            // application is killed (by OS?), sometimes system stops 
+            // responding... But this error message never appears. --ln
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantSetEnvVar, name ),
+                KMP_HNT( NotEnoughMemory ),
+                __kmp_msg_null
+            );
+        }; // if
+    #elif KMP_OS_WINDOWS
+        BOOL rc;
+        if ( ! overwrite ) {
+            rc = GetEnvironmentVariable( name, NULL, 0 );
+            if ( rc ) {
+                // Variable exists, do not overwrite.
+                return;
+            }; // if
+            DWORD error = GetLastError();
+            if ( error != ERROR_ENVVAR_NOT_FOUND ) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantGetEnvVar, name ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }; // if
+        }; // if
+        rc = SetEnvironmentVariable( name, value );
+        if ( ! rc ) {
+            DWORD error = GetLastError();
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantSetEnvVar, name ),
+                KMP_ERR( error ),
+                __kmp_msg_null
+            );
+        }; // if
+    #else
+        #error Unknown or unsupported OS.
+    #endif
+
+} // func __kmp_env_set
+
+
+
+void
+__kmp_env_unset( char const * name ) {
+
+    #if KMP_OS_UNIX
+        unsetenv( name );
+    #elif KMP_OS_WINDOWS
+        BOOL rc = SetEnvironmentVariable( name, NULL );
+        if ( ! rc ) {
+            DWORD error = GetLastError();
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantSetEnvVar, name ),
+                KMP_ERR( error ),
+                __kmp_msg_null
+            );
+        }; // if
+    #else
+        #error Unknown or unsupported OS.
+    #endif
+
+} // func __kmp_env_unset
+
+// -------------------------------------------------------------------------------------------------
+
+/*
+    Intel OpenMP RTL string representation of environment: just a string of characters, variables
+    are separated with vertical bars, e. g.:
+
+        "KMP_WARNINGS=0|KMP_AFFINITY=compact|"
+
+    Empty variables are allowed and ignored:
+
+        "||KMP_WARNINGS=1||"
+
+*/
+
+static
+void
+___kmp_env_blk_parse_string(
+    kmp_env_blk_t * block,   // M: Env block to fill.
+    char const *    env      // I: String to parse.
+) {
+
+    char const chr_delimiter   = '|';
+    char const str_delimiter[] = { chr_delimiter, 0 };
+
+    char *          bulk       = NULL;
+    kmp_env_var_t * vars       = NULL;
+    int             count      = 0;  // Number of used elements in vars array.
+    int             delimiters = 0;  // Number of delimiters in input string.
+
+    // Copy original string, we will modify the copy.
+    bulk = __kmp_str_format( "%s", env );
+
+    // Loop thru all the vars in environment block. Count delimiters (maximum number of variables
+    // is number of delimiters plus one).
+    {
+        char const * ptr = bulk;
+        for ( ; ; ) {
+            ptr = strchr( ptr, chr_delimiter );
+            if ( ptr == NULL ) {
+                break;
+            }; // if
+            ++ delimiters;
+            ptr += 1;
+        }; // forever
+    }
+
+    // Allocate vars array.
+    vars = (kmp_env_var_t *) allocate( ( delimiters + 1 ) * sizeof( kmp_env_var_t ) );
+
+    // Loop thru all the variables.
+    {
+        char * var;     // Pointer to variable (both name and value).
+        char * name;    // Pointer to name of variable.
+        char * value;   // Pointer to value.
+        char * buf;     // Buffer for __kmp_str_token() function.
+        var = __kmp_str_token( bulk, str_delimiter, & buf );      // Get the first var.
+        while ( var != NULL ) {
+            // Save found variable in vars array.
+            __kmp_str_split( var, '=', & name, & value );
+            KMP_DEBUG_ASSERT( count < delimiters + 1 );
+            vars[ count ].name  = name;
+            vars[ count ].value = value;
+            ++ count;
+            // Get the next var.
+            var = __kmp_str_token( NULL, str_delimiter, & buf );
+        }; // while
+    }
+
+    // Fill out result.
+    block->bulk  = bulk;
+    block->vars  = vars;
+    block->count = count;
+
+}; // ___kmp_env_blk_parse_string
+
+
+
+/*
+    Windows* OS (actually, DOS) environment block is a piece of memory with environment variables. Each
+    variable is terminated with zero byte, entire block is terminated with one extra zero byte, so
+    we have two zero bytes at the end of environment block, e. g.:
+
+        "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00"
+
+    It is not clear how empty environment is represented. "\x00\x00"?
+*/
+
+#if KMP_OS_WINDOWS
+static
+void
+___kmp_env_blk_parse_windows(
+    kmp_env_blk_t * block,   // M: Env block to fill.
+    char const *    env      // I: Pointer to Windows* OS (DOS) environment block.
+) {
+
+    char *          bulk  = NULL;
+    kmp_env_var_t * vars  = NULL;
+    int             count = 0;     // Number of used elements in vars array.
+    int             size  = 0;     // Size of bulk.
+
+    char * name;    // Pointer to name of variable.
+    char * value;   // Pointer to value.
+
+    if ( env != NULL ) {
+
+        // Loop thru all the vars in environment block. Count variables, find size of block.
+        {
+            char const * var;     // Pointer to beginning of var.
+            int          len;     // Length of variable.
+            count = 0;
+            var = env;            // The first variable starts and beginning of environment block.
+            len = KMP_STRLEN( var );
+            while ( len != 0 ) {
+                ++ count;
+                size = size + len + 1;
+                var = var + len + 1; // Move pointer to the beginning of the next variable.
+                len = KMP_STRLEN( var );
+            }; // while
+            size = size + 1;         // Total size of env block, including terminating zero byte.
+        }
+
+        // Copy original block to bulk, we will modify bulk, not original block.
+        bulk = (char *) allocate( size );
+        KMP_MEMCPY_S( bulk, size, env, size );
+        // Allocate vars array.
+        vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) );
+
+        // Loop thru all the vars, now in bulk.
+        {
+            char * var;     // Pointer to beginning of var.
+            int    len;     // Length of variable.
+            count = 0;
+            var = bulk;
+            len = KMP_STRLEN( var );
+            while ( len != 0 ) {
+                // Save variable in vars array.
+                __kmp_str_split( var, '=', & name, & value );
+                vars[ count ].name  = name;
+                vars[ count ].value = value;
+                ++ count;
+                // Get the next var.
+                var = var + len + 1;
+                len = KMP_STRLEN( var );
+            }; // while
+        }
+
+    }; // if
+
+    // Fill out result.
+    block->bulk  = bulk;
+    block->vars  = vars;
+    block->count = count;
+
+}; // ___kmp_env_blk_parse_windows
+#endif
+
+
+/*
+    Unix environment block is a array of pointers to variables, last pointer in array is NULL:
+
+        { "HOME=/home/lev", "TERM=xterm", NULL }
+*/
+
+static
+void
+___kmp_env_blk_parse_unix(
+    kmp_env_blk_t * block,   // M: Env block to fill.
+    char * *        env      // I: Unix environment to parse.
+) {
+
+    char *          bulk  = NULL;
+    kmp_env_var_t * vars  = NULL;
+    int             count = 0;
+    int             size  = 0;    // Size of bulk.
+
+    // Count number of variables and length of required bulk.
+    {
+        count = 0;
+        size  = 0;
+        while ( env[ count ] != NULL ) {
+            size += KMP_STRLEN( env[ count ] ) + 1;
+            ++ count;
+        }; // while
+    }
+
+    // Allocate memory.
+    bulk = (char *) allocate( size );
+    vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) );
+
+    // Loop thru all the vars.
+    {
+        char * var;     // Pointer to beginning of var.
+        char * name;    // Pointer to name of variable.
+        char * value;   // Pointer to value.
+        int    len;     // Length of variable.
+        int    i;
+        var = bulk;
+        for ( i = 0; i < count; ++ i ) {
+            // Copy variable to bulk.
+            len = KMP_STRLEN( env[ i ] );
+            KMP_MEMCPY_S( var, size, env[ i ], len + 1 );
+            // Save found variable in vars array.
+            __kmp_str_split( var, '=', & name, & value );
+            vars[ i ].name  = name;
+            vars[ i ].value = value;
+            // Move pointer.
+            var += len + 1;
+        }; // for
+    }
+
+    // Fill out result.
+    block->bulk  = bulk;
+    block->vars  = vars;
+    block->count = count;
+
+}; // ___kmp_env_blk_parse_unix
+
+
+
+void
+__kmp_env_blk_init(
+    kmp_env_blk_t * block,  // M: Block to initialize.
+    char const *    bulk    // I: Initialization string, or NULL.
+) {
+
+    if ( bulk != NULL ) {
+        ___kmp_env_blk_parse_string( block, bulk );
+    } else {
+        #if KMP_OS_UNIX
+            ___kmp_env_blk_parse_unix( block, environ );
+        #elif KMP_OS_WINDOWS
+            {
+                char * mem = GetEnvironmentStrings();
+                if ( mem == NULL ) {
+                    DWORD error = GetLastError();
+                    __kmp_msg(
+                        kmp_ms_fatal,
+                        KMP_MSG( CantGetEnvironment ),
+                        KMP_ERR( error ),
+                        __kmp_msg_null
+                    );
+                }; // if
+                ___kmp_env_blk_parse_windows( block, mem );
+                FreeEnvironmentStrings( mem );
+            }
+        #else
+            #error Unknown or unsupported OS.
+        #endif
+    }; // if
+
+} // __kmp_env_blk_init
+
+
+
+static
+int
+___kmp_env_var_cmp(                              // Comparison function for qsort().
+    kmp_env_var_t const * lhs,
+    kmp_env_var_t const * rhs
+) {
+    return strcmp( lhs->name, rhs->name );
+}
+
+void
+__kmp_env_blk_sort(
+    kmp_env_blk_t * block  // M: Block of environment variables to sort.
+) {
+
+    qsort(
+        (void *) block->vars,
+        block->count,
+        sizeof( kmp_env_var_t ),
+        ( int ( * )( void const *, void const * ) ) & ___kmp_env_var_cmp
+    );
+
+} // __kmp_env_block_sort
+
+
+
+void
+__kmp_env_blk_free(
+    kmp_env_blk_t * block  // M: Block of environment variables to free.
+) {
+
+    KMP_INTERNAL_FREE( (void *) block->vars );
+    KMP_INTERNAL_FREE( (void *) block->bulk );
+
+    block->count = 0;
+    block->vars  = NULL;
+    block->bulk  = NULL;
+
+} // __kmp_env_blk_free
+
+
+
+char const *               // R: Value of variable or NULL if variable does not exist.
+__kmp_env_blk_var(
+    kmp_env_blk_t * block, // I: Block of environment variables.
+    char const *    name   // I: Name of variable to find.
+) {
+
+    int i;
+    for ( i = 0; i < block->count; ++ i ) {
+        if ( strcmp( block->vars[ i ].name, name ) == 0 ) {
+            return block->vars[ i ].value;
+        }; // if
+    }; // for
+    return NULL;
+
+} // __kmp_env_block_var
+
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_environment.h b/contrib/libs/cxxsupp/openmp/kmp_environment.h
index 52b462478f..243b547451 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_environment.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_environment.h
@@ -1,81 +1,81 @@
-/* 
- * kmp_environment.h -- Handle environment varoiables OS-independently. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_ENVIRONMENT_H 
-#define KMP_ENVIRONMENT_H 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-// Return a copy of the value of environment variable or NULL if the variable does not exist. 
-// *Note*: Returned pointed *must* be freed after use with __kmp_env_free(). 
-char * __kmp_env_get( char const * name ); 
-void   __kmp_env_free( char const * * value ); 
- 
-// Return 1 if the environment variable exists or 0 if does not exist. 
-int __kmp_env_exists( char const * name ); 
- 
-// Set the environment variable. 
-void __kmp_env_set( char const * name, char const * value, int overwrite ); 
- 
-// Unset (remove) environment variable. 
-void __kmp_env_unset( char const * name ); 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-//  Working with environment blocks. 
-// ------------------------------------------------------------------------------------------------- 
- 
-/* 
-    kmp_env_blk_t is read-only collection of environment variables (or environment-like). Usage: 
- 
-        kmp_env_blk_t block; 
-        __kmp_env_blk_init( & block, NULL ); // Initialize block from process environment. 
-        // or 
-        __kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string. 
-        __kmp_env_blk_sort( & block ); // Optionally, sort list. 
-        for ( i = 0; i < block.count; ++ i ) { 
-            // Process block.vars[ i ].name and block.vars[ i ].value... 
-        }; // for i 
-        __kmp_env_block_free( & block ); 
-*/ 
- 
-struct __kmp_env_var { 
-    char const * name; 
-    char const * value; 
-}; 
-typedef struct __kmp_env_var kmp_env_var_t; 
- 
-struct __kmp_env_blk { 
-    char const *          bulk; 
-    kmp_env_var_t const * vars; 
-    int                   count; 
-}; 
-typedef struct __kmp_env_blk kmp_env_blk_t; 
- 
-void         __kmp_env_blk_init( kmp_env_blk_t * block, char const * bulk ); 
-void         __kmp_env_blk_free( kmp_env_blk_t * block ); 
-void         __kmp_env_blk_sort( kmp_env_blk_t * block ); 
-char const * __kmp_env_blk_var(  kmp_env_blk_t * block, char const * name ); 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif // KMP_ENVIRONMENT_H 
- 
-// end of file // 
- 
+/*
+ * kmp_environment.h -- Handle environment varoiables OS-independently.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_ENVIRONMENT_H
+#define KMP_ENVIRONMENT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Return a copy of the value of environment variable or NULL if the variable does not exist.
+// *Note*: Returned pointed *must* be freed after use with __kmp_env_free().
+char * __kmp_env_get( char const * name );
+void   __kmp_env_free( char const * * value );
+
+// Return 1 if the environment variable exists or 0 if does not exist.
+int __kmp_env_exists( char const * name );
+
+// Set the environment variable.
+void __kmp_env_set( char const * name, char const * value, int overwrite );
+
+// Unset (remove) environment variable.
+void __kmp_env_unset( char const * name );
+
+
+// -------------------------------------------------------------------------------------------------
+//  Working with environment blocks.
+// -------------------------------------------------------------------------------------------------
+
+/*
+    kmp_env_blk_t is read-only collection of environment variables (or environment-like). Usage:
+
+        kmp_env_blk_t block;
+        __kmp_env_blk_init( & block, NULL ); // Initialize block from process environment.
+        // or
+        __kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string.
+        __kmp_env_blk_sort( & block ); // Optionally, sort list.
+        for ( i = 0; i < block.count; ++ i ) {
+            // Process block.vars[ i ].name and block.vars[ i ].value...
+        }; // for i
+        __kmp_env_block_free( & block );
+*/
+
+struct __kmp_env_var {
+    char const * name;
+    char const * value;
+};
+typedef struct __kmp_env_var kmp_env_var_t;
+
+struct __kmp_env_blk {
+    char const *          bulk;
+    kmp_env_var_t const * vars;
+    int                   count;
+};
+typedef struct __kmp_env_blk kmp_env_blk_t;
+
+void         __kmp_env_blk_init( kmp_env_blk_t * block, char const * bulk );
+void         __kmp_env_blk_free( kmp_env_blk_t * block );
+void         __kmp_env_blk_sort( kmp_env_blk_t * block );
+char const * __kmp_env_blk_var(  kmp_env_blk_t * block, char const * name );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // KMP_ENVIRONMENT_H
+
+// end of file //
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_error.c b/contrib/libs/cxxsupp/openmp/kmp_error.c
index 5bb4bcf22f..6866df5d49 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_error.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_error.c
@@ -1,523 +1,523 @@
-/* 
- * kmp_error.c -- KPTS functions for error checking at runtime 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_str.h" 
-#include "kmp_error.h" 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#define MIN_STACK       100 
- 
- 
-static char const * cons_text_c[] = { 
-    "(none)", 
-    "\"parallel\"", 
-    "work-sharing",             /* this is not called "for" because of lowering of "sections" pragmas */ 
-    "\"ordered\" work-sharing", /* this is not called "for ordered" because of lowering of "sections" pragmas */ 
-    "\"sections\"", 
-    "work-sharing",             /* this is not called "single" because of lowering of "sections" pragmas */ 
-    "\"taskq\"", 
-    "\"taskq\"", 
-    "\"taskq ordered\"", 
-    "\"critical\"", 
-    "\"ordered\"",              /* in PARALLEL */ 
-    "\"ordered\"",              /* in PDO */ 
-    "\"ordered\"",              /* in TASKQ */ 
-    "\"master\"", 
-    "\"reduce\"", 
-    "\"barrier\"" 
-}; 
- 
-#define get_src( ident )   ( (ident) == NULL ? NULL : (ident)->psource ) 
- 
-#define PUSH_MSG( ct, ident ) \ 
-    "\tpushing on stack: %s (%s)\n", cons_text_c[ (ct) ], get_src( (ident) ) 
-#define POP_MSG( p )                                  \ 
-    "\tpopping off stack: %s (%s)\n",                 \ 
-    cons_text_c[ (p)->stack_data[ tos ].type ],       \ 
-    get_src( (p)->stack_data[ tos ].ident ) 
- 
-static int const cons_text_c_num    = sizeof( cons_text_c    ) / sizeof( char const * ); 
- 
-/* ------------------------------------------------------------------------ */ 
-/* --------------- START OF STATIC LOCAL ROUTINES ------------------------- */ 
-/* ------------------------------------------------------------------------ */ 
- 
-static void 
-__kmp_check_null_func( void ) 
-{ 
-    /* nothing to do */ 
-} 
- 
-static void 
-__kmp_expand_cons_stack( int gtid, struct cons_header *p ) 
-{ 
-    int    i; 
-    struct cons_data *d; 
- 
-    /* TODO for monitor perhaps? */ 
-    if (gtid < 0) 
-        __kmp_check_null_func(); 
- 
-    KE_TRACE( 10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
- 
-    d = p->stack_data; 
- 
-    p->stack_size = (p->stack_size * 2) + 100; 
- 
-    /* TODO free the old data */ 
-    p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (p->stack_size+1) ); 
- 
-    for (i = p->stack_top; i >= 0; --i) 
-        p->stack_data[i] = d[i]; 
- 
-    /* NOTE: we do not free the old stack_data */ 
-} 
- 
-// NOTE: Function returns allocated memory, caller must free it! 
-static char const * 
-__kmp_pragma( 
-    int              ct, 
-    ident_t const *  ident 
-) { 
-    char const * cons = NULL;  // Construct name. 
-    char * file = NULL;  // File name. 
-    char * func = NULL;  // Function (routine) name. 
-    char * line = NULL;  // Line number. 
-    kmp_str_buf_t buffer; 
-    kmp_msg_t     prgm; 
-    __kmp_str_buf_init( & buffer ); 
-    if ( 0 < ct && ct < cons_text_c_num ) { 
-        cons = cons_text_c[ ct ]; 
-    } else { 
-        KMP_DEBUG_ASSERT( 0 ); 
-    }; 
-    if ( ident != NULL && ident->psource != NULL ) { 
-        char * tail = NULL; 
-        __kmp_str_buf_print( & buffer, "%s", ident->psource ); // Copy source to buffer. 
-        // Split string in buffer to file, func, and line. 
-        tail = buffer.str; 
-        __kmp_str_split( tail, ';', NULL,   & tail ); 
-        __kmp_str_split( tail, ';', & file, & tail ); 
-        __kmp_str_split( tail, ';', & func, & tail ); 
-        __kmp_str_split( tail, ';', & line, & tail ); 
-    }; // if 
-    prgm = __kmp_msg_format( kmp_i18n_fmt_Pragma, cons, file, func, line ); 
-    __kmp_str_buf_free( & buffer ); 
-    return prgm.str; 
-} // __kmp_pragma 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ----------------- END OF STATIC LOCAL ROUTINES ------------------------- */ 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-void 
-__kmp_error_construct( 
-    kmp_i18n_id_t    id,     // Message identifier. 
-    enum cons_type   ct,     // Construct type. 
-    ident_t const *  ident   // Construct ident. 
-) { 
-    char const * construct = __kmp_pragma( ct, ident ); 
-    __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct ), __kmp_msg_null ); 
-    KMP_INTERNAL_FREE( (void *) construct ); 
-} 
- 
-void 
-__kmp_error_construct2( 
-    kmp_i18n_id_t            id,     // Message identifier. 
-    enum cons_type           ct,     // First construct type. 
-    ident_t const *          ident,  // First construct ident. 
-    struct cons_data const * cons    // Second construct. 
-) { 
-    char const * construct1 = __kmp_pragma( ct, ident ); 
-    char const * construct2 = __kmp_pragma( cons->type, cons->ident ); 
-    __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct1, construct2 ), __kmp_msg_null ); 
-    KMP_INTERNAL_FREE( (void *) construct1 ); 
-    KMP_INTERNAL_FREE( (void *) construct2 ); 
-} 
- 
- 
-struct cons_header * 
-__kmp_allocate_cons_stack( int gtid ) 
-{ 
-    struct cons_header *p; 
- 
-    /* TODO for monitor perhaps? */ 
-    if ( gtid < 0 ) { 
-        __kmp_check_null_func(); 
-    }; // if 
-    KE_TRACE( 10, ("allocate cons_stack (%d)\n", gtid ) ); 
-    p = (struct cons_header *) __kmp_allocate( sizeof( struct cons_header ) ); 
-    p->p_top = p->w_top = p->s_top = 0; 
-    p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (MIN_STACK+1) ); 
-    p->stack_size = MIN_STACK; 
-    p->stack_top  = 0; 
-    p->stack_data[ 0 ].type = ct_none; 
-    p->stack_data[ 0 ].prev = 0; 
-    p->stack_data[ 0 ].ident = NULL; 
-    return p; 
-} 
- 
-void 
-__kmp_free_cons_stack( void * ptr ) { 
-    struct cons_header * p = (struct cons_header *) ptr; 
-    if ( p != NULL ) { 
-        if ( p->stack_data != NULL ) { 
-            __kmp_free( p->stack_data ); 
-            p->stack_data = NULL; 
-        }; // if 
-        __kmp_free( p ); 
-    }; // if 
-} 
- 
- 
-#if KMP_DEBUG 
-static void 
-dump_cons_stack( int gtid, struct cons_header * p ) { 
-    int i; 
-    int tos = p->stack_top; 
-    kmp_str_buf_t buffer; 
-    __kmp_str_buf_init( & buffer ); 
-    __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); 
-    __kmp_str_buf_print( & buffer, "Begin construct stack with %d items for thread %d\n", tos, gtid ); 
-    __kmp_str_buf_print( & buffer, "     stack_top=%d { P=%d, W=%d, S=%d }\n", tos, p->p_top, p->w_top, p->s_top ); 
-    for ( i = tos; i > 0; i-- ) { 
-        struct cons_data * c = & ( p->stack_data[ i ] ); 
-        __kmp_str_buf_print( & buffer, "        stack_data[%2d] = { %s (%s) %d %p }\n", i, cons_text_c[ c->type ], get_src( c->ident ), c->prev, c->name ); 
-    }; // for i 
-    __kmp_str_buf_print( & buffer, "End construct stack for thread %d\n", gtid ); 
-    __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); 
-    __kmp_debug_printf( "%s", buffer.str ); 
-    __kmp_str_buf_free( & buffer ); 
-} 
-#endif 
- 
-void 
-__kmp_push_parallel( int gtid, ident_t const * ident ) 
-{ 
-    int tos; 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
- 
-    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); 
-    KE_TRACE( 10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
-    KE_TRACE( 100, ( PUSH_MSG( ct_parallel, ident ) ) ); 
-    if ( p->stack_top >= p->stack_size ) { 
-        __kmp_expand_cons_stack( gtid, p ); 
-    }; // if 
-    tos = ++p->stack_top; 
-    p->stack_data[ tos ].type = ct_parallel; 
-    p->stack_data[ tos ].prev = p->p_top; 
-    p->stack_data[ tos ].ident = ident; 
-    p->stack_data[ tos ].name = NULL; 
-    p->p_top = tos; 
-    KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); 
-} 
- 
-void 
-__kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ) 
-{ 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
- 
-    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); 
-    KE_TRACE( 10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
- 
- 
-    if ( p->stack_top >= p->stack_size ) { 
-        __kmp_expand_cons_stack( gtid, p ); 
-    }; // if 
-    if ( p->w_top > p->p_top && 
-        !(IS_CONS_TYPE_TASKQ(p->stack_data[ p->w_top ].type) && IS_CONS_TYPE_TASKQ(ct))) { 
-        // We are already in a WORKSHARE construct for this PARALLEL region. 
-        __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->w_top ] ); 
-    }; // if 
-    if ( p->s_top > p->p_top ) { 
-        // We are already in a SYNC construct for this PARALLEL region. 
-        __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->s_top ] ); 
-    }; // if 
-} 
- 
-void 
-__kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ) 
-{ 
-    int         tos; 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
-    KE_TRACE( 10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
-    __kmp_check_workshare( gtid, ct, ident ); 
-    KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); 
-    tos = ++p->stack_top; 
-    p->stack_data[ tos ].type = ct; 
-    p->stack_data[ tos ].prev = p->w_top; 
-    p->stack_data[ tos ].ident = ident; 
-    p->stack_data[ tos ].name = NULL; 
-    p->w_top = tos; 
-    KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); 
-} 
- 
-void 
-#if KMP_USE_DYNAMIC_LOCK 
-__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) 
-#else 
-__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) 
-#endif 
-{ 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
- 
-    KE_TRACE( 10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid() ) ); 
- 
-    if (p->stack_top >= p->stack_size) 
-       __kmp_expand_cons_stack( gtid, p ); 
- 
-    if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || ct == ct_ordered_in_taskq ) { 
-        if (p->w_top <= p->p_top) { 
-            /* we are not in a worksharing construct */ 
-            #ifdef BUILD_PARALLEL_ORDERED 
-                /* do not report error messages for PARALLEL ORDERED */ 
-                KMP_ASSERT( ct == ct_ordered_in_parallel ); 
-            #else 
-                __kmp_error_construct( kmp_i18n_msg_CnsBoundToWorksharing, ct, ident ); 
-            #endif /* BUILD_PARALLEL_ORDERED */ 
-        } else { 
-            /* inside a WORKSHARING construct for this PARALLEL region */ 
-            if (!IS_CONS_TYPE_ORDERED(p->stack_data[ p->w_top ].type)) { 
-                if (p->stack_data[ p->w_top ].type == ct_taskq) { 
-                    __kmp_error_construct2( 
-                        kmp_i18n_msg_CnsNotInTaskConstruct, 
-                        ct, ident, 
-                        & p->stack_data[ p->w_top ] 
-                    ); 
-                } else { 
-                    __kmp_error_construct2( 
-                        kmp_i18n_msg_CnsNoOrderedClause, 
-                        ct, ident, 
-                        & p->stack_data[ p->w_top ] 
-                    ); 
-               } 
-            } 
-        } 
-        if (p->s_top > p->p_top && p->s_top > p->w_top) { 
-            /* inside a sync construct which is inside a worksharing construct */ 
-            int index = p->s_top; 
-            enum cons_type stack_type; 
- 
-            stack_type = p->stack_data[ index ].type; 
- 
-            if (stack_type == ct_critical || 
-                ( ( stack_type == ct_ordered_in_parallel || 
-                    stack_type == ct_ordered_in_pdo      || 
-                    stack_type == ct_ordered_in_taskq  ) &&     /* C doesn't allow named ordered; ordered in ordered gets error */ 
-                 p->stack_data[ index ].ident != NULL && 
-                 (p->stack_data[ index ].ident->flags & KMP_IDENT_KMPC ))) { 
-                /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */ 
-                __kmp_error_construct2( 
-                    kmp_i18n_msg_CnsInvalidNesting, 
-                    ct, ident, 
-                    & p->stack_data[ index ] 
-                ); 
-            } 
-        } 
-    } else if ( ct == ct_critical ) { 
-#if KMP_USE_DYNAMIC_LOCK 
-        if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) {    /* this same thread already has lock for this critical section */ 
-#else 
-        if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) {    /* this same thread already has lock for this critical section */ 
-#endif 
-            int index = p->s_top; 
-            struct cons_data cons = { NULL, ct_critical, 0, NULL }; 
-            /* walk up construct stack and try to find critical with matching name */ 
-            while ( index != 0 && p->stack_data[ index ].name != lck ) { 
-                index = p->stack_data[ index ].prev; 
-            } 
-            if ( index != 0 ) { 
-                /* found match on the stack (may not always because of interleaved critical for Fortran) */ 
-                cons = p->stack_data[ index ]; 
-            } 
-            /* we are in CRITICAL which is inside a CRITICAL construct of the same name */ 
-            __kmp_error_construct2( kmp_i18n_msg_CnsNestingSameName, ct, ident, & cons ); 
-        } 
-    } else if ( ct == ct_master || ct == ct_reduce ) { 
-        if (p->w_top > p->p_top) { 
-            /* inside a WORKSHARING construct for this PARALLEL region */ 
-           __kmp_error_construct2( 
-               kmp_i18n_msg_CnsInvalidNesting, 
-               ct, ident, 
-               & p->stack_data[ p->w_top ] 
-           ); 
-        } 
-        if (ct == ct_reduce && p->s_top > p->p_top) { 
-            /* inside a another SYNC construct for this PARALLEL region */ 
-            __kmp_error_construct2( 
-                kmp_i18n_msg_CnsInvalidNesting, 
-                ct, ident, 
-                & p->stack_data[ p->s_top ] 
-            ); 
-        }; // if 
-    }; // if 
-} 
- 
-void 
-#if KMP_USE_DYNAMIC_LOCK 
-__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) 
-#else 
-__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) 
-#endif 
-{ 
-    int         tos; 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
- 
-    KMP_ASSERT( gtid == __kmp_get_gtid() ); 
-    KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) ); 
-#if KMP_USE_DYNAMIC_LOCK 
-    __kmp_check_sync( gtid, ct, ident, lck, seq ); 
-#else 
-    __kmp_check_sync( gtid, ct, ident, lck ); 
-#endif 
-    KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); 
-    tos = ++ p->stack_top; 
-    p->stack_data[ tos ].type  = ct; 
-    p->stack_data[ tos ].prev  = p->s_top; 
-    p->stack_data[ tos ].ident = ident; 
-    p->stack_data[ tos ].name  = lck; 
-    p->s_top = tos; 
-    KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_pop_parallel( int gtid, ident_t const * ident ) 
-{ 
-    int tos; 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
-    tos = p->stack_top; 
-    KE_TRACE( 10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
-    if ( tos == 0 || p->p_top == 0 ) { 
-        __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident ); 
-    } 
-    if ( tos != p->p_top || p->stack_data[ tos ].type != ct_parallel ) { 
-        __kmp_error_construct2( 
-            kmp_i18n_msg_CnsExpectedEnd, 
-            ct_parallel, ident, 
-            & p->stack_data[ tos ] 
-        ); 
-    } 
-    KE_TRACE( 100, ( POP_MSG( p ) ) ); 
-    p->p_top = p->stack_data[ tos ].prev; 
-    p->stack_data[ tos ].type = ct_none; 
-    p->stack_data[ tos ].ident = NULL; 
-    p->stack_top = tos - 1; 
-    KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); 
-} 
- 
-enum cons_type 
-__kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ) 
-{ 
-    int tos; 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
- 
-    tos = p->stack_top; 
-    KE_TRACE( 10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
-    if ( tos == 0 || p->w_top == 0 ) { 
-        __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); 
-    } 
- 
-    if ( tos != p->w_top || 
-         ( p->stack_data[ tos ].type != ct && 
-          /* below are two exceptions to the rule that construct types must match */ 
-          ! ( p->stack_data[ tos ].type == ct_pdo_ordered && ct == ct_pdo ) && 
-          ! ( p->stack_data[ tos ].type == ct_task_ordered && ct == ct_task ) 
-         ) 
-       ) { 
-        __kmp_check_null_func(); 
-        __kmp_error_construct2( 
-            kmp_i18n_msg_CnsExpectedEnd, 
-            ct, ident, 
-            & p->stack_data[ tos ] 
-        ); 
-    } 
-    KE_TRACE( 100, ( POP_MSG( p ) ) ); 
-    p->w_top = p->stack_data[ tos ].prev; 
-    p->stack_data[ tos ].type = ct_none; 
-    p->stack_data[ tos ].ident = NULL; 
-    p->stack_top = tos - 1; 
-    KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); 
-    return p->stack_data[ p->w_top ].type; 
-} 
- 
-void 
-__kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ) 
-{ 
-    int tos; 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
-    tos = p->stack_top; 
-    KE_TRACE( 10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid() ) ); 
-    if ( tos == 0 || p->s_top == 0 ) { 
-        __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); 
-    }; 
-    if ( tos != p->s_top || p->stack_data[ tos ].type != ct ) { 
-        __kmp_check_null_func(); 
-        __kmp_error_construct2( 
-            kmp_i18n_msg_CnsExpectedEnd, 
-            ct, ident, 
-            & p->stack_data[ tos ] 
-        ); 
-    }; 
-    if ( gtid < 0 ) { 
-        __kmp_check_null_func(); 
-    }; 
-    KE_TRACE( 100, ( POP_MSG( p ) ) ); 
-    p->s_top = p->stack_data[ tos ].prev; 
-    p->stack_data[ tos ].type = ct_none; 
-    p->stack_data[ tos ].ident = NULL; 
-    p->stack_top = tos - 1; 
-    KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ) 
-{ 
-    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; 
-    KE_TRACE( 10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, __kmp_get_gtid() ) ); 
-    if ( ident != 0 ) { 
-        __kmp_check_null_func(); 
-    } 
-    if ( p->w_top > p->p_top ) { 
-        /* we are already in a WORKSHARING construct for this PARALLEL region */ 
-        __kmp_error_construct2( 
-            kmp_i18n_msg_CnsInvalidNesting, 
-            ct, ident, 
-            & p->stack_data[ p->w_top ] 
-        ); 
-    } 
-    if (p->s_top > p->p_top) { 
-        /* we are already in a SYNC construct for this PARALLEL region */ 
-        __kmp_error_construct2( 
-            kmp_i18n_msg_CnsInvalidNesting, 
-            ct, ident, 
-            & p->stack_data[ p->s_top ] 
-        ); 
-    } 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
+/*
+ * kmp_error.c -- KPTS functions for error checking at runtime
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_str.h"
+#include "kmp_error.h"
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#define MIN_STACK       100
+
+
+static char const * cons_text_c[] = {
+    "(none)",
+    "\"parallel\"",
+    "work-sharing",             /* this is not called "for" because of lowering of "sections" pragmas */
+    "\"ordered\" work-sharing", /* this is not called "for ordered" because of lowering of "sections" pragmas */
+    "\"sections\"",
+    "work-sharing",             /* this is not called "single" because of lowering of "sections" pragmas */
+    "\"taskq\"",
+    "\"taskq\"",
+    "\"taskq ordered\"",
+    "\"critical\"",
+    "\"ordered\"",              /* in PARALLEL */
+    "\"ordered\"",              /* in PDO */
+    "\"ordered\"",              /* in TASKQ */
+    "\"master\"",
+    "\"reduce\"",
+    "\"barrier\""
+};
+
+#define get_src( ident )   ( (ident) == NULL ? NULL : (ident)->psource )
+
+#define PUSH_MSG( ct, ident ) \
+    "\tpushing on stack: %s (%s)\n", cons_text_c[ (ct) ], get_src( (ident) )
+#define POP_MSG( p )                                  \
+    "\tpopping off stack: %s (%s)\n",                 \
+    cons_text_c[ (p)->stack_data[ tos ].type ],       \
+    get_src( (p)->stack_data[ tos ].ident )
+
+static int const cons_text_c_num    = sizeof( cons_text_c    ) / sizeof( char const * );
+
+/* ------------------------------------------------------------------------ */
+/* --------------- START OF STATIC LOCAL ROUTINES ------------------------- */
+/* ------------------------------------------------------------------------ */
+
+static void
+__kmp_check_null_func( void )
+{
+    /* nothing to do */
+}
+
+static void
+__kmp_expand_cons_stack( int gtid, struct cons_header *p )
+{
+    int    i;
+    struct cons_data *d;
+
+    /* TODO for monitor perhaps? */
+    if (gtid < 0)
+        __kmp_check_null_func();
+
+    KE_TRACE( 10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid() ) );
+
+    d = p->stack_data;
+
+    p->stack_size = (p->stack_size * 2) + 100;
+
+    /* TODO free the old data */
+    p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (p->stack_size+1) );
+
+    for (i = p->stack_top; i >= 0; --i)
+        p->stack_data[i] = d[i];
+
+    /* NOTE: we do not free the old stack_data */
+}
+
+// NOTE: Function returns allocated memory, caller must free it!
+static char const *
+__kmp_pragma(
+    int              ct,
+    ident_t const *  ident
+) {
+    char const * cons = NULL;  // Construct name.
+    char * file = NULL;  // File name.
+    char * func = NULL;  // Function (routine) name.
+    char * line = NULL;  // Line number.
+    kmp_str_buf_t buffer;
+    kmp_msg_t     prgm;
+    __kmp_str_buf_init( & buffer );
+    if ( 0 < ct && ct < cons_text_c_num ) {
+        cons = cons_text_c[ ct ];
+    } else {
+        KMP_DEBUG_ASSERT( 0 );
+    };
+    if ( ident != NULL && ident->psource != NULL ) {
+        char * tail = NULL;
+        __kmp_str_buf_print( & buffer, "%s", ident->psource ); // Copy source to buffer.
+        // Split string in buffer to file, func, and line.
+        tail = buffer.str;
+        __kmp_str_split( tail, ';', NULL,   & tail );
+        __kmp_str_split( tail, ';', & file, & tail );
+        __kmp_str_split( tail, ';', & func, & tail );
+        __kmp_str_split( tail, ';', & line, & tail );
+    }; // if
+    prgm = __kmp_msg_format( kmp_i18n_fmt_Pragma, cons, file, func, line );
+    __kmp_str_buf_free( & buffer );
+    return prgm.str;
+} // __kmp_pragma
+
+/* ------------------------------------------------------------------------ */
+/* ----------------- END OF STATIC LOCAL ROUTINES ------------------------- */
+/* ------------------------------------------------------------------------ */
+
+
+void
+__kmp_error_construct(
+    kmp_i18n_id_t    id,     // Message identifier.
+    enum cons_type   ct,     // Construct type.
+    ident_t const *  ident   // Construct ident.
+) {
+    char const * construct = __kmp_pragma( ct, ident );
+    __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct ), __kmp_msg_null );
+    KMP_INTERNAL_FREE( (void *) construct );
+}
+
+void
+__kmp_error_construct2(
+    kmp_i18n_id_t            id,     // Message identifier.
+    enum cons_type           ct,     // First construct type.
+    ident_t const *          ident,  // First construct ident.
+    struct cons_data const * cons    // Second construct.
+) {
+    char const * construct1 = __kmp_pragma( ct, ident );
+    char const * construct2 = __kmp_pragma( cons->type, cons->ident );
+    __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct1, construct2 ), __kmp_msg_null );
+    KMP_INTERNAL_FREE( (void *) construct1 );
+    KMP_INTERNAL_FREE( (void *) construct2 );
+}
+
+
+struct cons_header *
+__kmp_allocate_cons_stack( int gtid )
+{
+    struct cons_header *p;
+
+    /* TODO for monitor perhaps? */
+    if ( gtid < 0 ) {
+        __kmp_check_null_func();
+    }; // if
+    KE_TRACE( 10, ("allocate cons_stack (%d)\n", gtid ) );
+    p = (struct cons_header *) __kmp_allocate( sizeof( struct cons_header ) );
+    p->p_top = p->w_top = p->s_top = 0;
+    p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (MIN_STACK+1) );
+    p->stack_size = MIN_STACK;
+    p->stack_top  = 0;
+    p->stack_data[ 0 ].type = ct_none;
+    p->stack_data[ 0 ].prev = 0;
+    p->stack_data[ 0 ].ident = NULL;
+    return p;
+}
+
+void
+__kmp_free_cons_stack( void * ptr ) {
+    struct cons_header * p = (struct cons_header *) ptr;
+    if ( p != NULL ) {
+        if ( p->stack_data != NULL ) {
+            __kmp_free( p->stack_data );
+            p->stack_data = NULL;
+        }; // if
+        __kmp_free( p );
+    }; // if
+}
+
+
+#if KMP_DEBUG
+static void
+dump_cons_stack( int gtid, struct cons_header * p ) {
+    int i;
+    int tos = p->stack_top;
+    kmp_str_buf_t buffer;
+    __kmp_str_buf_init( & buffer );
+    __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" );
+    __kmp_str_buf_print( & buffer, "Begin construct stack with %d items for thread %d\n", tos, gtid );
+    __kmp_str_buf_print( & buffer, "     stack_top=%d { P=%d, W=%d, S=%d }\n", tos, p->p_top, p->w_top, p->s_top );
+    for ( i = tos; i > 0; i-- ) {
+        struct cons_data * c = & ( p->stack_data[ i ] );
+        __kmp_str_buf_print( & buffer, "        stack_data[%2d] = { %s (%s) %d %p }\n", i, cons_text_c[ c->type ], get_src( c->ident ), c->prev, c->name );
+    }; // for i
+    __kmp_str_buf_print( & buffer, "End construct stack for thread %d\n", gtid );
+    __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" );
+    __kmp_debug_printf( "%s", buffer.str );
+    __kmp_str_buf_free( & buffer );
+}
+#endif
+
+void
+__kmp_push_parallel( int gtid, ident_t const * ident )
+{
+    int tos;
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+
+    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons );
+    KE_TRACE( 10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) );
+    KE_TRACE( 100, ( PUSH_MSG( ct_parallel, ident ) ) );
+    if ( p->stack_top >= p->stack_size ) {
+        __kmp_expand_cons_stack( gtid, p );
+    }; // if
+    tos = ++p->stack_top;
+    p->stack_data[ tos ].type = ct_parallel;
+    p->stack_data[ tos ].prev = p->p_top;
+    p->stack_data[ tos ].ident = ident;
+    p->stack_data[ tos ].name = NULL;
+    p->p_top = tos;
+    KE_DUMP( 1000, dump_cons_stack( gtid, p ) );
+}
+
+void
+__kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident )
+{
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+
+    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons );
+    KE_TRACE( 10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) );
+
+
+    if ( p->stack_top >= p->stack_size ) {
+        __kmp_expand_cons_stack( gtid, p );
+    }; // if
+    if ( p->w_top > p->p_top &&
+        !(IS_CONS_TYPE_TASKQ(p->stack_data[ p->w_top ].type) && IS_CONS_TYPE_TASKQ(ct))) {
+        // We are already in a WORKSHARE construct for this PARALLEL region.
+        __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->w_top ] );
+    }; // if
+    if ( p->s_top > p->p_top ) {
+        // We are already in a SYNC construct for this PARALLEL region.
+        __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->s_top ] );
+    }; // if
+}
+
+void
+__kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident )
+{
+    int         tos;
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+    KE_TRACE( 10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) );
+    __kmp_check_workshare( gtid, ct, ident );
+    KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) );
+    tos = ++p->stack_top;
+    p->stack_data[ tos ].type = ct;
+    p->stack_data[ tos ].prev = p->w_top;
+    p->stack_data[ tos ].ident = ident;
+    p->stack_data[ tos ].name = NULL;
+    p->w_top = tos;
+    KE_DUMP( 1000, dump_cons_stack( gtid, p ) );
+}
+
+void
+#if KMP_USE_DYNAMIC_LOCK
+__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq )
+#else
+__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck )
+#endif
+{
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+
+    KE_TRACE( 10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid() ) );
+
+    if (p->stack_top >= p->stack_size)
+       __kmp_expand_cons_stack( gtid, p );
+
+    if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || ct == ct_ordered_in_taskq ) {
+        if (p->w_top <= p->p_top) {
+            /* we are not in a worksharing construct */
+            #ifdef BUILD_PARALLEL_ORDERED
+                /* do not report error messages for PARALLEL ORDERED */
+                KMP_ASSERT( ct == ct_ordered_in_parallel );
+            #else
+                __kmp_error_construct( kmp_i18n_msg_CnsBoundToWorksharing, ct, ident );
+            #endif /* BUILD_PARALLEL_ORDERED */
+        } else {
+            /* inside a WORKSHARING construct for this PARALLEL region */
+            if (!IS_CONS_TYPE_ORDERED(p->stack_data[ p->w_top ].type)) {
+                if (p->stack_data[ p->w_top ].type == ct_taskq) {
+                    __kmp_error_construct2(
+                        kmp_i18n_msg_CnsNotInTaskConstruct,
+                        ct, ident,
+                        & p->stack_data[ p->w_top ]
+                    );
+                } else {
+                    __kmp_error_construct2(
+                        kmp_i18n_msg_CnsNoOrderedClause,
+                        ct, ident,
+                        & p->stack_data[ p->w_top ]
+                    );
+               }
+            }
+        }
+        if (p->s_top > p->p_top && p->s_top > p->w_top) {
+            /* inside a sync construct which is inside a worksharing construct */
+            int index = p->s_top;
+            enum cons_type stack_type;
+
+            stack_type = p->stack_data[ index ].type;
+
+            if (stack_type == ct_critical ||
+                ( ( stack_type == ct_ordered_in_parallel ||
+                    stack_type == ct_ordered_in_pdo      ||
+                    stack_type == ct_ordered_in_taskq  ) &&     /* C doesn't allow named ordered; ordered in ordered gets error */
+                 p->stack_data[ index ].ident != NULL &&
+                 (p->stack_data[ index ].ident->flags & KMP_IDENT_KMPC ))) {
+                /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */
+                __kmp_error_construct2(
+                    kmp_i18n_msg_CnsInvalidNesting,
+                    ct, ident,
+                    & p->stack_data[ index ]
+                );
+            }
+        }
+    } else if ( ct == ct_critical ) {
+#if KMP_USE_DYNAMIC_LOCK
+        if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) {    /* this same thread already has lock for this critical section */
+#else
+        if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) {    /* this same thread already has lock for this critical section */
+#endif
+            int index = p->s_top;
+            struct cons_data cons = { NULL, ct_critical, 0, NULL };
+            /* walk up construct stack and try to find critical with matching name */
+            while ( index != 0 && p->stack_data[ index ].name != lck ) {
+                index = p->stack_data[ index ].prev;
+            }
+            if ( index != 0 ) {
+                /* found match on the stack (may not always because of interleaved critical for Fortran) */
+                cons = p->stack_data[ index ];
+            }
+            /* we are in CRITICAL which is inside a CRITICAL construct of the same name */
+            __kmp_error_construct2( kmp_i18n_msg_CnsNestingSameName, ct, ident, & cons );
+        }
+    } else if ( ct == ct_master || ct == ct_reduce ) {
+        if (p->w_top > p->p_top) {
+            /* inside a WORKSHARING construct for this PARALLEL region */
+           __kmp_error_construct2(
+               kmp_i18n_msg_CnsInvalidNesting,
+               ct, ident,
+               & p->stack_data[ p->w_top ]
+           );
+        }
+        if (ct == ct_reduce && p->s_top > p->p_top) {
+            /* inside a another SYNC construct for this PARALLEL region */
+            __kmp_error_construct2(
+                kmp_i18n_msg_CnsInvalidNesting,
+                ct, ident,
+                & p->stack_data[ p->s_top ]
+            );
+        }; // if
+    }; // if
+}
+
+void
+#if KMP_USE_DYNAMIC_LOCK
+__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq )
+#else
+__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck )
+#endif
+{
+    int         tos;
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+
+    KMP_ASSERT( gtid == __kmp_get_gtid() );
+    KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) );
+#if KMP_USE_DYNAMIC_LOCK
+    __kmp_check_sync( gtid, ct, ident, lck, seq );
+#else
+    __kmp_check_sync( gtid, ct, ident, lck );
+#endif
+    KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) );
+    tos = ++ p->stack_top;
+    p->stack_data[ tos ].type  = ct;
+    p->stack_data[ tos ].prev  = p->s_top;
+    p->stack_data[ tos ].ident = ident;
+    p->stack_data[ tos ].name  = lck;
+    p->s_top = tos;
+    KE_DUMP( 1000, dump_cons_stack( gtid, p ) );
+}
+
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_pop_parallel( int gtid, ident_t const * ident )
+{
+    int tos;
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+    tos = p->stack_top;
+    KE_TRACE( 10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) );
+    if ( tos == 0 || p->p_top == 0 ) {
+        __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident );
+    }
+    if ( tos != p->p_top || p->stack_data[ tos ].type != ct_parallel ) {
+        __kmp_error_construct2(
+            kmp_i18n_msg_CnsExpectedEnd,
+            ct_parallel, ident,
+            & p->stack_data[ tos ]
+        );
+    }
+    KE_TRACE( 100, ( POP_MSG( p ) ) );
+    p->p_top = p->stack_data[ tos ].prev;
+    p->stack_data[ tos ].type = ct_none;
+    p->stack_data[ tos ].ident = NULL;
+    p->stack_top = tos - 1;
+    KE_DUMP( 1000, dump_cons_stack( gtid, p ) );
+}
+
+enum cons_type
+__kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident )
+{
+    int tos;
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+
+    tos = p->stack_top;
+    KE_TRACE( 10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) );
+    if ( tos == 0 || p->w_top == 0 ) {
+        __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident );
+    }
+
+    if ( tos != p->w_top ||
+         ( p->stack_data[ tos ].type != ct &&
+          /* below are two exceptions to the rule that construct types must match */
+          ! ( p->stack_data[ tos ].type == ct_pdo_ordered && ct == ct_pdo ) &&
+          ! ( p->stack_data[ tos ].type == ct_task_ordered && ct == ct_task )
+         )
+       ) {
+        __kmp_check_null_func();
+        __kmp_error_construct2(
+            kmp_i18n_msg_CnsExpectedEnd,
+            ct, ident,
+            & p->stack_data[ tos ]
+        );
+    }
+    KE_TRACE( 100, ( POP_MSG( p ) ) );
+    p->w_top = p->stack_data[ tos ].prev;
+    p->stack_data[ tos ].type = ct_none;
+    p->stack_data[ tos ].ident = NULL;
+    p->stack_top = tos - 1;
+    KE_DUMP( 1000, dump_cons_stack( gtid, p ) );
+    return p->stack_data[ p->w_top ].type;
+}
+
+void
+__kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident )
+{
+    int tos;
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+    tos = p->stack_top;
+    KE_TRACE( 10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid() ) );
+    if ( tos == 0 || p->s_top == 0 ) {
+        __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident );
+    };
+    if ( tos != p->s_top || p->stack_data[ tos ].type != ct ) {
+        __kmp_check_null_func();
+        __kmp_error_construct2(
+            kmp_i18n_msg_CnsExpectedEnd,
+            ct, ident,
+            & p->stack_data[ tos ]
+        );
+    };
+    if ( gtid < 0 ) {
+        __kmp_check_null_func();
+    };
+    KE_TRACE( 100, ( POP_MSG( p ) ) );
+    p->s_top = p->stack_data[ tos ].prev;
+    p->stack_data[ tos ].type = ct_none;
+    p->stack_data[ tos ].ident = NULL;
+    p->stack_top = tos - 1;
+    KE_DUMP( 1000, dump_cons_stack( gtid, p ) );
+}
+
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident )
+{
+    struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
+    KE_TRACE( 10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, __kmp_get_gtid() ) );
+    if ( ident != 0 ) {
+        __kmp_check_null_func();
+    }
+    if ( p->w_top > p->p_top ) {
+        /* we are already in a WORKSHARING construct for this PARALLEL region */
+        __kmp_error_construct2(
+            kmp_i18n_msg_CnsInvalidNesting,
+            ct, ident,
+            & p->stack_data[ p->w_top ]
+        );
+    }
+    if (p->s_top > p->p_top) {
+        /* we are already in a SYNC construct for this PARALLEL region */
+        __kmp_error_construct2(
+            kmp_i18n_msg_CnsInvalidNesting,
+            ct, ident,
+            & p->stack_data[ p->s_top ]
+        );
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_error.h b/contrib/libs/cxxsupp/openmp/kmp_error.h
index 0df196f64d..9dfe111dea 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_error.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_error.h
@@ -1,57 +1,57 @@
-/* 
- * kmp_error.h -- PTS functions for error checking at runtime. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_ERROR_H 
-#define KMP_ERROR_H 
- 
-#include "kmp_i18n.h" 
- 
-/* ------------------------------------------------------------------------ */ 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif 
- 
-void __kmp_error_construct(  kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident ); 
-void __kmp_error_construct2( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident, struct cons_data const * cons ); 
- 
-struct cons_header * __kmp_allocate_cons_stack( int gtid ); 
-void                 __kmp_free_cons_stack( void * ptr ); 
- 
-void __kmp_push_parallel( int gtid, ident_t const * ident ); 
-void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ); 
-#if KMP_USE_DYNAMIC_LOCK 
-void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); 
-#else 
-void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); 
-#endif 
- 
-void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ); 
-#if KMP_USE_DYNAMIC_LOCK 
-void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); 
-#else 
-void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); 
-#endif 
- 
-void __kmp_pop_parallel( int gtid, ident_t const * ident ); 
-enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ); 
-void __kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ); 
-void __kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ); 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif 
- 
-#endif // KMP_ERROR_H 
- 
+/*
+ * kmp_error.h -- PTS functions for error checking at runtime.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_ERROR_H
+#define KMP_ERROR_H
+
+#include "kmp_i18n.h"
+
+/* ------------------------------------------------------------------------ */
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+void __kmp_error_construct(  kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident );
+void __kmp_error_construct2( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident, struct cons_data const * cons );
+
+struct cons_header * __kmp_allocate_cons_stack( int gtid );
+void                 __kmp_free_cons_stack( void * ptr );
+
+void __kmp_push_parallel( int gtid, ident_t const * ident );
+void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident );
+#if KMP_USE_DYNAMIC_LOCK
+void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 );
+#else
+void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name );
+#endif
+
+void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident );
+#if KMP_USE_DYNAMIC_LOCK
+void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 );
+#else
+void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name );
+#endif
+
+void __kmp_pop_parallel( int gtid, ident_t const * ident );
+enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident );
+void __kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident );
+void __kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident );
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif
+
+#endif // KMP_ERROR_H
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c b/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c
index 472ec894df..51fa1bf895 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_cdecl.c
@@ -1,35 +1,35 @@
-/* 
- * kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
- 
-#if KMP_OS_WINDOWS 
-#   if defined  KMP_WIN_CDECL ||  !defined KMP_DYNAMIC_LIB 
-#       define KMP_FTN_ENTRIES      KMP_FTN_UPPER 
-#   endif 
-#elif KMP_OS_UNIX 
-#   define KMP_FTN_ENTRIES  KMP_FTN_PLAIN 
-#endif 
- 
-// Note: This string is not printed when KMP_VERSION=1. 
-char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " 
-#ifdef KMP_FTN_ENTRIES 
-    "yes"; 
-#   define FTN_STDCALL 	/* no stdcall */ 
-#   include "kmp_ftn_os.h" 
-#   include "kmp_ftn_entry.h" 
-#else 
-    "no"; 
-#endif /* KMP_FTN_ENTRIES */ 
+/*
+ * kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+
+#if KMP_OS_WINDOWS
+#   if defined  KMP_WIN_CDECL ||  !defined KMP_DYNAMIC_LIB
+#       define KMP_FTN_ENTRIES      KMP_FTN_UPPER
+#   endif
+#elif KMP_OS_UNIX
+#   define KMP_FTN_ENTRIES  KMP_FTN_PLAIN
+#endif
+
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: "
+#ifdef KMP_FTN_ENTRIES
+    "yes";
+#   define FTN_STDCALL 	/* no stdcall */
+#   include "kmp_ftn_os.h"
+#   include "kmp_ftn_entry.h"
+#else
+    "no";
+#endif /* KMP_FTN_ENTRIES */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h b/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h
index b54e8765ed..fcbaacbffa 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_entry.h
@@ -1,1253 +1,1253 @@
-/* 
- * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef FTN_STDCALL 
-# error The support file kmp_ftn_entry.h should not be compiled by itself. 
-#endif 
- 
-#ifdef KMP_STUB 
-    #include "kmp_stub.h" 
-#endif 
- 
-#include "kmp_i18n.h" 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-/* 
- * For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(), 
- * omp_set_nested(), and omp_set_dynamic() [in lowercase on MS, and w/o 
- * a trailing underscore on Linux* OS] take call by value integer arguments. 
- * + omp_set_max_active_levels() 
- * + omp_set_schedule() 
- * 
- * For backward compatibility with 9.1 and previous Intel compiler, these 
- * entry points take call by reference integer arguments. 
- */ 
-#ifdef KMP_GOMP_COMPAT 
-# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER) 
-#  define PASS_ARGS_BY_VALUE 1 
-# endif 
-#endif 
-#if KMP_OS_WINDOWS 
-# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND) 
-#  define PASS_ARGS_BY_VALUE 1 
-# endif 
-#endif 
- 
-// This macro helps to reduce code duplication. 
-#ifdef PASS_ARGS_BY_VALUE 
-    #define KMP_DEREF 
-#else 
-    #define KMP_DEREF * 
-#endif 
- 
-void  FTN_STDCALL 
-FTN_SET_STACKSIZE( int KMP_DEREF arg ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_stacksize( KMP_DEREF arg ); 
-    #else 
-        // __kmp_aux_set_stacksize initializes the library if needed 
-        __kmp_aux_set_stacksize( (size_t) KMP_DEREF arg ); 
-    #endif 
-} 
- 
-void  FTN_STDCALL 
-FTN_SET_STACKSIZE_S( size_t KMP_DEREF arg ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_stacksize( KMP_DEREF arg ); 
-    #else 
-        // __kmp_aux_set_stacksize initializes the library if needed 
-        __kmp_aux_set_stacksize( KMP_DEREF arg ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_STACKSIZE( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_stacksize(); 
-    #else 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        }; 
-        return (int)__kmp_stksize; 
-    #endif 
-} 
- 
-size_t FTN_STDCALL 
-FTN_GET_STACKSIZE_S( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_stacksize(); 
-    #else 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        }; 
-        return __kmp_stksize; 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_BLOCKTIME( int KMP_DEREF arg ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_blocktime( KMP_DEREF arg ); 
-    #else 
-	int gtid, tid; 
-	kmp_info_t *thread; 
- 
-	gtid = __kmp_entry_gtid(); 
-	tid = __kmp_tid_from_gtid(gtid); 
-	thread = __kmp_thread_from_gtid(gtid); 
- 
-        __kmp_aux_set_blocktime( KMP_DEREF arg, thread, tid ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_BLOCKTIME( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_blocktime(); 
-    #else 
-	int gtid, tid; 
-	kmp_info_t *thread; 
-        kmp_team_p *team; 
- 
-	gtid = __kmp_entry_gtid(); 
-	tid = __kmp_tid_from_gtid(gtid); 
-	thread = __kmp_thread_from_gtid(gtid); 
-        team = __kmp_threads[ gtid ] -> th.th_team; 
- 
-        /* These must match the settings used in __kmp_wait_sleep() */ 
-        if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { 
-	    KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", 
-			  gtid, team->t.t_id, tid, KMP_MAX_BLOCKTIME) ); 
-            return KMP_MAX_BLOCKTIME; 
-        } 
-#ifdef KMP_ADJUST_BLOCKTIME 
-        else if ( __kmp_zero_bt && !get__bt_set( team, tid ) ) { 
-	    KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", 
-			  gtid, team->t.t_id, tid, 0) ); 
-            return 0; 
-        } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
-        else { 
-	    KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", 
-              gtid, team->t.t_id, tid, get__blocktime( team, tid ) ) ); 
-            return get__blocktime( team, tid ); 
-        }; 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_LIBRARY_SERIAL( void ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_library( library_serial ); 
-    #else 
-        // __kmp_user_set_library initializes the library if needed 
-        __kmp_user_set_library( library_serial ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_LIBRARY_TURNAROUND( void ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_library( library_turnaround ); 
-    #else 
-        // __kmp_user_set_library initializes the library if needed 
-        __kmp_user_set_library( library_turnaround ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_LIBRARY_THROUGHPUT( void ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_library( library_throughput ); 
-    #else 
-        // __kmp_user_set_library initializes the library if needed 
-        __kmp_user_set_library( library_throughput ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_LIBRARY( int KMP_DEREF arg ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_library( KMP_DEREF arg ); 
-    #else 
-        enum library_type lib; 
-        lib = (enum library_type) KMP_DEREF arg; 
-        // __kmp_user_set_library initializes the library if needed 
-        __kmp_user_set_library( lib ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_LIBRARY (void) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_library(); 
-    #else 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        } 
-        return ((int) __kmp_library); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_SET_AFFINITY( void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        return -1; 
-    #else 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        return __kmp_aux_set_affinity( mask ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_AFFINITY( void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        return -1; 
-    #else 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        return __kmp_aux_get_affinity( mask ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_AFFINITY_MAX_PROC( void ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        return 0; 
-    #else 
-        // 
-        // We really only NEED serial initialization here. 
-        // 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        if ( ! ( KMP_AFFINITY_CAPABLE() ) ) { 
-            return 0; 
-        } 
- 
-    #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC 
-        if ( __kmp_num_proc_groups > 1 ) { 
-            return (int)KMP_CPU_SETSIZE; 
-        } 
-    #endif /* KMP_GROUP_AFFINITY */ 
-        return __kmp_xproc; 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_CREATE_AFFINITY_MASK( void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        *mask = NULL; 
-    #else 
-        // 
-        // We really only NEED serial initialization here. 
-        // 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-    # if KMP_USE_HWLOC 
-        *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc(); 
-    # else 
-        *mask = kmpc_malloc( __kmp_affin_mask_size ); 
-    # endif 
-        KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_DESTROY_AFFINITY_MASK( void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        // Nothing 
-    #else 
-        // 
-        // We really only NEED serial initialization here. 
-        // 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        if ( __kmp_env_consistency_check ) { 
-            if ( *mask == NULL ) { 
-	        KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" ); 
-	    } 
-        } 
-    # if KMP_USE_HWLOC 
-        hwloc_bitmap_free((hwloc_cpuset_t)(*mask)); 
-    # else 
-        kmpc_free( *mask ); 
-    # endif 
-        *mask = NULL; 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_SET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        return -1; 
-    #else 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        return __kmp_aux_set_affinity_mask_proc( KMP_DEREF proc, mask ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_UNSET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        return -1; 
-    #else 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        return __kmp_aux_unset_affinity_mask_proc( KMP_DEREF proc, mask ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) 
-{ 
-    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 
-        return -1; 
-    #else 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        return __kmp_aux_get_affinity_mask_proc( KMP_DEREF proc, mask ); 
-    #endif 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* sets the requested number of threads for the next parallel region */ 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg ) 
-{ 
-    #ifdef KMP_STUB 
-        // Nothing. 
-    #else 
-        __kmp_set_num_threads( KMP_DEREF arg, __kmp_entry_gtid() ); 
-    #endif 
-} 
- 
- 
-/* returns the number of threads in current team */ 
-int FTN_STDCALL 
-xexpand(FTN_GET_NUM_THREADS)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 1; 
-    #else 
-        // __kmpc_bound_num_threads initializes the library if needed 
-        return __kmpc_bound_num_threads(NULL); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_MAX_THREADS)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 1; 
-    #else 
-        int         gtid; 
-        kmp_info_t *thread; 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        gtid   = __kmp_entry_gtid(); 
-        thread = __kmp_threads[ gtid ]; 
-        //return thread -> th.th_team -> t.t_current_task[ thread->th.th_info.ds.ds_tid ] -> icvs.nproc; 
-	return thread -> th.th_current_task -> td_icvs.nproc; 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_THREAD_NUM)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 0; 
-    #else 
-        int gtid; 
- 
-        #if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD 
-            gtid = __kmp_entry_gtid(); 
-        #elif KMP_OS_WINDOWS 
-            if (!__kmp_init_parallel || 
-                (gtid = (int)((kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ))) == 0) { 
-                // Either library isn't initialized or thread is not registered 
-                // 0 is the correct TID in this case 
-                return 0; 
-            } 
-            --gtid; // We keep (gtid+1) in TLS 
-        #elif KMP_OS_LINUX 
-            #ifdef KMP_TDATA_GTID 
-            if ( __kmp_gtid_mode >= 3 ) { 
-                if ((gtid = __kmp_gtid) == KMP_GTID_DNE) { 
-                    return 0; 
-                } 
-            } else { 
-            #endif 
-                if (!__kmp_init_parallel || 
-                    (gtid = (kmp_intptr_t)(pthread_getspecific( __kmp_gtid_threadprivate_key ))) == 0) { 
-                    return 0; 
-                } 
-                --gtid; 
-            #ifdef KMP_TDATA_GTID 
-            } 
-            #endif 
-        #else 
-            #error Unknown or unsupported OS 
-        #endif 
- 
-        return __kmp_tid_from_gtid( gtid ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_NUM_KNOWN_THREADS( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 1; 
-    #else 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        } 
-        /* NOTE: this is not syncronized, so it can change at any moment */ 
-        /* NOTE: this number also includes threads preallocated in hot-teams */ 
-        return TCR_4(__kmp_nth); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_NUM_PROCS)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 1; 
-    #else 
-        if ( ! TCR_4(__kmp_init_middle) ) { 
-            __kmp_middle_initialize(); 
-        } 
-        return __kmp_avail_proc; 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_NESTED)( int KMP_DEREF flag ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_nested( KMP_DEREF flag ); 
-    #else 
-        kmp_info_t *thread; 
-        /* For the thread-private internal controls implementation */ 
-        thread = __kmp_entry_thread(); 
-        __kmp_save_internal_controls( thread ); 
-        set__nested( thread, ( (KMP_DEREF flag) ? TRUE : FALSE ) ); 
-    #endif 
-} 
- 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_NESTED)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_nested(); 
-    #else 
-        kmp_info_t *thread; 
-        thread = __kmp_entry_thread(); 
-        return get__nested( thread ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE ); 
-    #else 
-        kmp_info_t *thread; 
-        /* For the thread-private implementation of the internal controls */ 
-        thread = __kmp_entry_thread(); 
-        // !!! What if foreign thread calls it? 
-        __kmp_save_internal_controls( thread ); 
-        set__dynamic( thread, KMP_DEREF flag ? TRUE : FALSE ); 
-    #endif 
-} 
- 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_DYNAMIC)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_dynamic(); 
-    #else 
-        kmp_info_t *thread; 
-        thread = __kmp_entry_thread(); 
-        return get__dynamic( thread ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_IN_PARALLEL)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 0; 
-    #else 
-        kmp_info_t *th = __kmp_entry_thread(); 
-#if OMP_40_ENABLED 
-        if ( th->th.th_teams_microtask ) { 
-            // AC: r_in_parallel does not work inside teams construct 
-            //     where real parallel is inactive, but all threads have same root, 
-            //     so setting it in one team affects other teams. 
-            //     The solution is to use per-team nesting level 
-            return ( th->th.th_team->t.t_active_level ? 1 : 0 ); 
-        } 
-        else 
-#endif /* OMP_40_ENABLED */ 
-            return ( th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier ); 
-    #else 
-	/*  TO DO  */ 
-        /* For the per-task implementation of the internal controls */ 
-        __kmp_set_schedule( __kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier ) 
-{ 
-    #ifdef KMP_STUB 
-        __kmps_get_schedule( kind, modifier ); 
-    #else 
-	/*  TO DO  */ 
-	/* For the per-task implementation of the internal controls */ 
-        __kmp_get_schedule( __kmp_entry_gtid(), kind, modifier ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg ) 
-{ 
-    #ifdef KMP_STUB 
-	// Nothing. 
-    #else 
-	/*  TO DO  */ 
-        /* We want per-task implementation of this internal control */ 
-        __kmp_set_max_active_levels( __kmp_entry_gtid(), KMP_DEREF arg ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void ) 
-{ 
-    #ifdef KMP_STUB 
-	return 0; 
-    #else 
-	/*  TO DO  */ 
-	/* We want per-task implementation of this internal control */ 
-	return __kmp_get_max_active_levels( __kmp_entry_gtid() ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_ACTIVE_LEVEL)( void ) 
-{ 
-    #ifdef KMP_STUB 
-	return 0; // returns 0 if it is called from the sequential part of the program 
-    #else 
-	/*  TO DO  */ 
-	/* For the per-task implementation of the internal controls */ 
-        return __kmp_entry_thread() -> th.th_team -> t.t_active_level; 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_LEVEL)( void ) 
-{ 
-    #ifdef KMP_STUB 
-	return 0; // returns 0 if it is called from the sequential part of the program 
-    #else 
-	/*  TO DO  */ 
-	/* For the per-task implementation of the internal controls */ 
-        return __kmp_entry_thread() -> th.th_team -> t.t_level; 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level ) 
-{ 
-    #ifdef KMP_STUB 
-	return ( KMP_DEREF level ) ? ( -1 ) : ( 0 ); 
-    #else 
-	return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), KMP_DEREF level ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level ) 
-{ 
-    #ifdef KMP_STUB 
-        return ( KMP_DEREF level ) ? ( -1 ) : ( 1 ); 
-    #else 
-        return __kmp_get_team_size( __kmp_entry_gtid(), KMP_DEREF level ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_THREAD_LIMIT)( void ) 
-{ 
-    #ifdef KMP_STUB 
-	return 1;   // TO DO: clarify whether it returns 1 or 0? 
-    #else 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        }; 
-        /* global ICV */ 
-	return __kmp_max_nth; 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_IN_FINAL)( void ) 
-{ 
-    #ifdef KMP_STUB 
-	return 0;   // TO DO: clarify whether it returns 1 or 0? 
-    #else 
-        if ( ! TCR_4(__kmp_init_parallel) ) { 
-            return 0; 
-        } 
-	return __kmp_entry_thread() -> th.th_current_task -> td_flags.final; 
-    #endif 
-} 
- 
-#if OMP_40_ENABLED 
- 
- 
-kmp_proc_bind_t FTN_STDCALL 
-xexpand(FTN_GET_PROC_BIND)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_proc_bind(); 
-    #else 
-        return get__proc_bind( __kmp_entry_thread() ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_NUM_TEAMS)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 1; 
-    #else 
-        kmp_info_t *thr = __kmp_entry_thread(); 
-        if ( thr->th.th_teams_microtask ) { 
-            kmp_team_t *team = thr->th.th_team; 
-            int tlevel = thr->th.th_teams_level; 
-            int ii = team->t.t_level;            // the level of the teams construct 
-            int dd = team -> t.t_serialized; 
-            int level = tlevel + 1; 
-            KMP_DEBUG_ASSERT( ii >= tlevel ); 
-            while( ii > level ) 
-            { 
-                for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 
-                { 
-                } 
-                if( team -> t.t_serialized && ( !dd ) ) { 
-                    team = team->t.t_parent; 
-                    continue; 
-                } 
-                if( ii > level ) { 
-                    team = team->t.t_parent; 
-                    ii--; 
-                } 
-            } 
-            if ( dd > 1 ) { 
-                return 1;  // teams region is serialized ( 1 team of 1 thread ). 
-            } else { 
-                return team->t.t_parent->t.t_nproc; 
-            } 
-        } else { 
-            return 1; 
-        } 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_GET_TEAM_NUM)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return 0; 
-    #else 
-        kmp_info_t *thr = __kmp_entry_thread(); 
-        if ( thr->th.th_teams_microtask ) { 
-            kmp_team_t *team = thr->th.th_team; 
-            int tlevel = thr->th.th_teams_level; // the level of the teams construct 
-            int ii = team->t.t_level; 
-            int dd = team -> t.t_serialized; 
-            int level = tlevel + 1; 
-            KMP_DEBUG_ASSERT( ii >= tlevel ); 
-            while( ii > level ) 
-            { 
-                for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 
-                { 
-                } 
-                if( team -> t.t_serialized && ( !dd ) ) { 
-                    team = team->t.t_parent; 
-                    continue; 
-                } 
-                if( ii > level ) { 
-                    team = team->t.t_parent; 
-                    ii--; 
-                } 
-            } 
-            if ( dd > 1 ) { 
-                return 0;  // teams region is serialized ( 1 team of 1 thread ). 
-            } else { 
-                return team->t.t_master_tid; 
-            } 
-        } else { 
-            return 0; 
-        } 
-    #endif 
-} 
- 
-#if KMP_MIC || KMP_OS_DARWIN 
- 
-static int __kmp_default_device = 0; 
- 
-int FTN_STDCALL 
-FTN_GET_DEFAULT_DEVICE( void ) 
-{ 
-    return __kmp_default_device; 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_DEFAULT_DEVICE( int KMP_DEREF arg ) 
-{ 
-    __kmp_default_device = KMP_DEREF arg; 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_NUM_DEVICES( void ) 
-{ 
-    return 0; 
-} 
- 
-#endif // KMP_MIC || KMP_OS_DARWIN 
- 
-#if ! KMP_OS_LINUX 
- 
-int FTN_STDCALL 
-xexpand(FTN_IS_INITIAL_DEVICE)( void ) 
-{ 
-    return 1; 
-} 
- 
-#else 
- 
-// This internal function is used when the entry from the offload library 
-// is not found. 
-int _Offload_get_device_number( void )  __attribute__((weak)); 
- 
-int FTN_STDCALL 
-xexpand(FTN_IS_INITIAL_DEVICE)( void ) 
-{ 
-    if( _Offload_get_device_number ) { 
-        return _Offload_get_device_number() == -1; 
-    } else { 
-        return 1; 
-    } 
-} 
- 
-#endif // ! KMP_OS_LINUX 
- 
-#endif // OMP_40_ENABLED 
- 
-#ifdef KMP_STUB 
-typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; 
-#endif /* KMP_STUB */ 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-void FTN_STDCALL 
-FTN_INIT_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) 
-{ 
-    #ifdef KMP_STUB 
-        *((kmp_stub_lock_t *)user_lock) = UNLOCKED; 
-    #else 
-        __kmpc_init_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_INIT_NEST_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) 
-{ 
-    #ifdef KMP_STUB 
-        *((kmp_stub_lock_t *)user_lock) = UNLOCKED; 
-    #else 
-        __kmpc_init_nest_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); 
-    #endif 
-} 
-#endif 
- 
-/* initialize the lock */ 
-void FTN_STDCALL 
-xexpand(FTN_INIT_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        *((kmp_stub_lock_t *)user_lock) = UNLOCKED; 
-    #else 
-        __kmpc_init_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-/* initialize the lock */ 
-void FTN_STDCALL 
-xexpand(FTN_INIT_NEST_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        *((kmp_stub_lock_t *)user_lock) = UNLOCKED; 
-    #else 
-        __kmpc_init_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_DESTROY_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        *((kmp_stub_lock_t *)user_lock) = UNINIT; 
-    #else 
-        __kmpc_destroy_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        *((kmp_stub_lock_t *)user_lock) = UNINIT; 
-    #else 
-        __kmpc_destroy_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        if ( *((kmp_stub_lock_t *)user_lock) != UNLOCKED ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        *((kmp_stub_lock_t *)user_lock) = LOCKED; 
-    #else 
-        __kmpc_set_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_SET_NEST_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        (*((int *)user_lock))++; 
-    #else 
-        __kmpc_set_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_UNSET_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        *((kmp_stub_lock_t *)user_lock) = UNLOCKED; 
-    #else 
-        __kmpc_unset_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        (*((int *)user_lock))--; 
-    #else 
-        __kmpc_unset_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_TEST_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        if ( *((kmp_stub_lock_t *)user_lock) == LOCKED ) { 
-            return 0; 
-        }; // if 
-        *((kmp_stub_lock_t *)user_lock) = LOCKED; 
-        return 1; 
-    #else 
-        return __kmpc_test_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-int FTN_STDCALL 
-xexpand(FTN_TEST_NEST_LOCK)( void **user_lock ) 
-{ 
-    #ifdef KMP_STUB 
-        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { 
-            // TODO: Issue an error. 
-        }; // if 
-        return ++(*((int *)user_lock)); 
-    #else 
-        return __kmpc_test_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); 
-    #endif 
-} 
- 
-double FTN_STDCALL 
-xexpand(FTN_GET_WTIME)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_wtime(); 
-    #else 
-        double data; 
-        #if ! KMP_OS_LINUX 
-        // We don't need library initialization to get the time on Linux* OS. 
-        // The routine can be used to measure library initialization time on Linux* OS now. 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        }; 
-        #endif 
-        __kmp_elapsed( & data ); 
-        return data; 
-    #endif 
-} 
- 
-double FTN_STDCALL 
-xexpand(FTN_GET_WTICK)( void ) 
-{ 
-    #ifdef KMP_STUB 
-        return __kmps_get_wtick(); 
-    #else 
-        double data; 
-        if ( ! __kmp_init_serial ) { 
-            __kmp_serial_initialize(); 
-        }; 
-        __kmp_elapsed_tick( & data ); 
-        return data; 
-    #endif 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void * FTN_STDCALL 
-FTN_MALLOC( size_t KMP_DEREF size ) 
-{ 
-    // kmpc_malloc initializes the library if needed 
-    return kmpc_malloc( KMP_DEREF size ); 
-} 
- 
-void * FTN_STDCALL 
-FTN_CALLOC( size_t KMP_DEREF nelem, size_t KMP_DEREF elsize ) 
-{ 
-    // kmpc_calloc initializes the library if needed 
-    return kmpc_calloc( KMP_DEREF nelem, KMP_DEREF elsize ); 
-} 
- 
-void * FTN_STDCALL 
-FTN_REALLOC( void * KMP_DEREF ptr, size_t KMP_DEREF size ) 
-{ 
-    // kmpc_realloc initializes the library if needed 
-    return kmpc_realloc( KMP_DEREF ptr, KMP_DEREF size ); 
-} 
- 
-void FTN_STDCALL 
-FTN_FREE( void * KMP_DEREF ptr ) 
-{ 
-    // does nothing if the library is not initialized 
-    kmpc_free( KMP_DEREF ptr ); 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_WARNINGS_ON( void ) 
-{ 
-    #ifndef KMP_STUB 
-        __kmp_generate_warnings = kmp_warnings_explicit; 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_WARNINGS_OFF( void ) 
-{ 
-    #ifndef KMP_STUB 
-        __kmp_generate_warnings = FALSE; 
-    #endif 
-} 
- 
-void FTN_STDCALL 
-FTN_SET_DEFAULTS( char const * str 
-    #ifndef PASS_ARGS_BY_VALUE 
-        , int len 
-    #endif 
-) 
-{ 
-    #ifndef KMP_STUB 
-        #ifdef PASS_ARGS_BY_VALUE 
-            int len = (int)KMP_STRLEN( str ); 
-        #endif 
-        __kmp_aux_set_defaults( str, len ); 
-    #endif 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-#if OMP_40_ENABLED 
-/* returns the status of cancellation */ 
-int FTN_STDCALL 
-xexpand(FTN_GET_CANCELLATION)(void) { 
-#ifdef KMP_STUB 
-    return 0 /* false */; 
-#else 
-    // initialize the library if needed 
-    if ( ! __kmp_init_serial ) { 
-        __kmp_serial_initialize(); 
-    } 
-    return __kmp_omp_cancellation; 
-#endif 
-} 
- 
-int FTN_STDCALL 
-FTN_GET_CANCELLATION_STATUS(int cancel_kind) { 
-#ifdef KMP_STUB 
-    return 0 /* false */; 
-#else 
-    return __kmp_get_cancellation_status(cancel_kind); 
-#endif 
-} 
- 
-#endif // OMP_40_ENABLED 
- 
-// GCC compatibility (versioned symbols) 
-#ifdef KMP_USE_VERSION_SYMBOLS 
- 
-/* 
-    These following sections create function aliases (dummy symbols) for the omp_* routines. 
-    These aliases will then be versioned according to how libgomp ``versions'' its 
-    symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the 
-    default version which libomp uses: VERSION (defined in exports_so.txt) 
-    If you want to see the versioned symbols for libgomp.so.1 then just type: 
- 
-    objdump -T /path/to/libgomp.so.1 | grep omp_ 
- 
-    Example: 
-    Step 1)  Create __kmp_api_omp_set_num_threads_10_alias 
-             which is alias of __kmp_api_omp_set_num_threads 
-    Step 2)  Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0 
-    Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION 
-*/ 
- 
-// OMP_1.0 aliases 
-xaliasify(FTN_SET_NUM_THREADS,   10); 
-xaliasify(FTN_GET_NUM_THREADS,   10); 
-xaliasify(FTN_GET_MAX_THREADS,   10); 
-xaliasify(FTN_GET_THREAD_NUM,    10); 
-xaliasify(FTN_GET_NUM_PROCS,     10); 
-xaliasify(FTN_IN_PARALLEL,       10); 
-xaliasify(FTN_SET_DYNAMIC,       10); 
-xaliasify(FTN_GET_DYNAMIC,       10); 
-xaliasify(FTN_SET_NESTED,        10); 
-xaliasify(FTN_GET_NESTED,        10); 
-xaliasify(FTN_INIT_LOCK,         10); 
-xaliasify(FTN_INIT_NEST_LOCK,    10); 
-xaliasify(FTN_DESTROY_LOCK,      10); 
-xaliasify(FTN_DESTROY_NEST_LOCK, 10); 
-xaliasify(FTN_SET_LOCK,          10); 
-xaliasify(FTN_SET_NEST_LOCK,     10); 
-xaliasify(FTN_UNSET_LOCK,        10); 
-xaliasify(FTN_UNSET_NEST_LOCK,   10); 
-xaliasify(FTN_TEST_LOCK,         10); 
-xaliasify(FTN_TEST_NEST_LOCK,    10); 
- 
-// OMP_2.0 aliases 
-xaliasify(FTN_GET_WTICK, 20); 
-xaliasify(FTN_GET_WTIME, 20); 
- 
-// OMP_3.0 aliases 
-xaliasify(FTN_SET_SCHEDULE,            30); 
-xaliasify(FTN_GET_SCHEDULE,            30); 
-xaliasify(FTN_GET_THREAD_LIMIT,        30); 
-xaliasify(FTN_SET_MAX_ACTIVE_LEVELS,   30); 
-xaliasify(FTN_GET_MAX_ACTIVE_LEVELS,   30); 
-xaliasify(FTN_GET_LEVEL,               30); 
-xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30); 
-xaliasify(FTN_GET_TEAM_SIZE,           30); 
-xaliasify(FTN_GET_ACTIVE_LEVEL,        30); 
-xaliasify(FTN_INIT_LOCK,               30); 
-xaliasify(FTN_INIT_NEST_LOCK,          30); 
-xaliasify(FTN_DESTROY_LOCK,            30); 
-xaliasify(FTN_DESTROY_NEST_LOCK,       30); 
-xaliasify(FTN_SET_LOCK,                30); 
-xaliasify(FTN_SET_NEST_LOCK,           30); 
-xaliasify(FTN_UNSET_LOCK,              30); 
-xaliasify(FTN_UNSET_NEST_LOCK,         30); 
-xaliasify(FTN_TEST_LOCK,               30); 
-xaliasify(FTN_TEST_NEST_LOCK,          30); 
- 
-// OMP_3.1 aliases 
-xaliasify(FTN_IN_FINAL, 31); 
- 
-#if OMP_40_ENABLED 
-// OMP_4.0 aliases 
-xaliasify(FTN_GET_PROC_BIND, 40); 
-xaliasify(FTN_GET_NUM_TEAMS, 40); 
-xaliasify(FTN_GET_TEAM_NUM, 40); 
-xaliasify(FTN_GET_CANCELLATION, 40); 
-xaliasify(FTN_IS_INITIAL_DEVICE, 40); 
-#endif /* OMP_40_ENABLED */ 
- 
-#if OMP_41_ENABLED 
-// OMP_4.1 aliases 
-#endif 
- 
-#if OMP_50_ENABLED 
-// OMP_5.0 aliases 
-#endif 
- 
-// OMP_1.0 versioned symbols 
-xversionify(FTN_SET_NUM_THREADS,   10, "OMP_1.0"); 
-xversionify(FTN_GET_NUM_THREADS,   10, "OMP_1.0"); 
-xversionify(FTN_GET_MAX_THREADS,   10, "OMP_1.0"); 
-xversionify(FTN_GET_THREAD_NUM,    10, "OMP_1.0"); 
-xversionify(FTN_GET_NUM_PROCS,     10, "OMP_1.0"); 
-xversionify(FTN_IN_PARALLEL,       10, "OMP_1.0"); 
-xversionify(FTN_SET_DYNAMIC,       10, "OMP_1.0"); 
-xversionify(FTN_GET_DYNAMIC,       10, "OMP_1.0"); 
-xversionify(FTN_SET_NESTED,        10, "OMP_1.0"); 
-xversionify(FTN_GET_NESTED,        10, "OMP_1.0"); 
-xversionify(FTN_INIT_LOCK,         10, "OMP_1.0"); 
-xversionify(FTN_INIT_NEST_LOCK,    10, "OMP_1.0"); 
-xversionify(FTN_DESTROY_LOCK,      10, "OMP_1.0"); 
-xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0"); 
-xversionify(FTN_SET_LOCK,          10, "OMP_1.0"); 
-xversionify(FTN_SET_NEST_LOCK,     10, "OMP_1.0"); 
-xversionify(FTN_UNSET_LOCK,        10, "OMP_1.0"); 
-xversionify(FTN_UNSET_NEST_LOCK,   10, "OMP_1.0"); 
-xversionify(FTN_TEST_LOCK,         10, "OMP_1.0"); 
-xversionify(FTN_TEST_NEST_LOCK,    10, "OMP_1.0"); 
- 
-// OMP_2.0 versioned symbols 
-xversionify(FTN_GET_WTICK,         20, "OMP_2.0"); 
-xversionify(FTN_GET_WTIME,         20, "OMP_2.0"); 
- 
-// OMP_3.0 versioned symbols 
-xversionify(FTN_SET_SCHEDULE,      30, "OMP_3.0"); 
-xversionify(FTN_GET_SCHEDULE,      30, "OMP_3.0"); 
-xversionify(FTN_GET_THREAD_LIMIT,        30, "OMP_3.0"); 
-xversionify(FTN_SET_MAX_ACTIVE_LEVELS,   30, "OMP_3.0"); 
-xversionify(FTN_GET_MAX_ACTIVE_LEVELS,   30, "OMP_3.0"); 
-xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0"); 
-xversionify(FTN_GET_LEVEL,               30, "OMP_3.0"); 
-xversionify(FTN_GET_TEAM_SIZE,     30, "OMP_3.0"); 
-xversionify(FTN_GET_ACTIVE_LEVEL,  30, "OMP_3.0"); 
- 
-// the lock routines have a 1.0 and 3.0 version 
-xversionify(FTN_INIT_LOCK,         30, "OMP_3.0"); 
-xversionify(FTN_INIT_NEST_LOCK,    30, "OMP_3.0"); 
-xversionify(FTN_DESTROY_LOCK,      30, "OMP_3.0"); 
-xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0"); 
-xversionify(FTN_SET_LOCK,          30, "OMP_3.0"); 
-xversionify(FTN_SET_NEST_LOCK,     30, "OMP_3.0"); 
-xversionify(FTN_UNSET_LOCK,        30, "OMP_3.0"); 
-xversionify(FTN_UNSET_NEST_LOCK,   30, "OMP_3.0"); 
-xversionify(FTN_TEST_LOCK,         30, "OMP_3.0"); 
-xversionify(FTN_TEST_NEST_LOCK,    30, "OMP_3.0"); 
- 
-// OMP_3.1 versioned symbol 
-xversionify(FTN_IN_FINAL,          31, "OMP_3.1"); 
- 
-#if OMP_40_ENABLED 
-// OMP_4.0 versioned symbols 
-xversionify(FTN_GET_PROC_BIND,     40, "OMP_4.0"); 
-xversionify(FTN_GET_NUM_TEAMS,     40, "OMP_4.0"); 
-xversionify(FTN_GET_TEAM_NUM,      40, "OMP_4.0"); 
-xversionify(FTN_GET_CANCELLATION,  40, "OMP_4.0"); 
-xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); 
-#endif /* OMP_40_ENABLED */ 
- 
-#if OMP_41_ENABLED 
-// OMP_4.1 versioned symbols 
-#endif 
- 
-#if OMP_50_ENABLED 
-// OMP_5.0 versioned symbols 
-#endif 
- 
-#endif // KMP_USE_VERSION_SYMBOLS 
- 
-#ifdef __cplusplus 
-    } //extern "C" 
-#endif // __cplusplus 
- 
-// end of file // 
+/*
+ * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef FTN_STDCALL
+# error The support file kmp_ftn_entry.h should not be compiled by itself.
+#endif
+
+#ifdef KMP_STUB
+    #include "kmp_stub.h"
+#endif
+
+#include "kmp_i18n.h"
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+/*
+ * For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(),
+ * omp_set_nested(), and omp_set_dynamic() [in lowercase on MS, and w/o
+ * a trailing underscore on Linux* OS] take call by value integer arguments.
+ * + omp_set_max_active_levels()
+ * + omp_set_schedule()
+ *
+ * For backward compatibility with 9.1 and previous Intel compiler, these
+ * entry points take call by reference integer arguments.
+ */
+#ifdef KMP_GOMP_COMPAT
+# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER)
+#  define PASS_ARGS_BY_VALUE 1
+# endif
+#endif
+#if KMP_OS_WINDOWS
+# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND)
+#  define PASS_ARGS_BY_VALUE 1
+# endif
+#endif
+
+// This macro helps to reduce code duplication.
+#ifdef PASS_ARGS_BY_VALUE
+    #define KMP_DEREF
+#else
+    #define KMP_DEREF *
+#endif
+
+void  FTN_STDCALL
+FTN_SET_STACKSIZE( int KMP_DEREF arg )
+{
+    #ifdef KMP_STUB
+        __kmps_set_stacksize( KMP_DEREF arg );
+    #else
+        // __kmp_aux_set_stacksize initializes the library if needed
+        __kmp_aux_set_stacksize( (size_t) KMP_DEREF arg );
+    #endif
+}
+
+void  FTN_STDCALL
+FTN_SET_STACKSIZE_S( size_t KMP_DEREF arg )
+{
+    #ifdef KMP_STUB
+        __kmps_set_stacksize( KMP_DEREF arg );
+    #else
+        // __kmp_aux_set_stacksize initializes the library if needed
+        __kmp_aux_set_stacksize( KMP_DEREF arg );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_STACKSIZE( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_stacksize();
+    #else
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        };
+        return (int)__kmp_stksize;
+    #endif
+}
+
+size_t FTN_STDCALL
+FTN_GET_STACKSIZE_S( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_stacksize();
+    #else
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        };
+        return __kmp_stksize;
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_BLOCKTIME( int KMP_DEREF arg )
+{
+    #ifdef KMP_STUB
+        __kmps_set_blocktime( KMP_DEREF arg );
+    #else
+	int gtid, tid;
+	kmp_info_t *thread;
+
+	gtid = __kmp_entry_gtid();
+	tid = __kmp_tid_from_gtid(gtid);
+	thread = __kmp_thread_from_gtid(gtid);
+
+        __kmp_aux_set_blocktime( KMP_DEREF arg, thread, tid );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_BLOCKTIME( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_blocktime();
+    #else
+	int gtid, tid;
+	kmp_info_t *thread;
+        kmp_team_p *team;
+
+	gtid = __kmp_entry_gtid();
+	tid = __kmp_tid_from_gtid(gtid);
+	thread = __kmp_thread_from_gtid(gtid);
+        team = __kmp_threads[ gtid ] -> th.th_team;
+
+        /* These must match the settings used in __kmp_wait_sleep() */
+        if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
+	    KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n",
+			  gtid, team->t.t_id, tid, KMP_MAX_BLOCKTIME) );
+            return KMP_MAX_BLOCKTIME;
+        }
+#ifdef KMP_ADJUST_BLOCKTIME
+        else if ( __kmp_zero_bt && !get__bt_set( team, tid ) ) {
+	    KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n",
+			  gtid, team->t.t_id, tid, 0) );
+            return 0;
+        }
+#endif /* KMP_ADJUST_BLOCKTIME */
+        else {
+	    KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n",
+              gtid, team->t.t_id, tid, get__blocktime( team, tid ) ) );
+            return get__blocktime( team, tid );
+        };
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_LIBRARY_SERIAL( void )
+{
+    #ifdef KMP_STUB
+        __kmps_set_library( library_serial );
+    #else
+        // __kmp_user_set_library initializes the library if needed
+        __kmp_user_set_library( library_serial );
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_LIBRARY_TURNAROUND( void )
+{
+    #ifdef KMP_STUB
+        __kmps_set_library( library_turnaround );
+    #else
+        // __kmp_user_set_library initializes the library if needed
+        __kmp_user_set_library( library_turnaround );
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_LIBRARY_THROUGHPUT( void )
+{
+    #ifdef KMP_STUB
+        __kmps_set_library( library_throughput );
+    #else
+        // __kmp_user_set_library initializes the library if needed
+        __kmp_user_set_library( library_throughput );
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_LIBRARY( int KMP_DEREF arg )
+{
+    #ifdef KMP_STUB
+        __kmps_set_library( KMP_DEREF arg );
+    #else
+        enum library_type lib;
+        lib = (enum library_type) KMP_DEREF arg;
+        // __kmp_user_set_library initializes the library if needed
+        __kmp_user_set_library( lib );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_LIBRARY (void)
+{
+    #ifdef KMP_STUB
+        return __kmps_get_library();
+    #else
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        }
+        return ((int) __kmp_library);
+    #endif
+}
+
+int FTN_STDCALL
+FTN_SET_AFFINITY( void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        return -1;
+    #else
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        return __kmp_aux_set_affinity( mask );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_AFFINITY( void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        return -1;
+    #else
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        return __kmp_aux_get_affinity( mask );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_AFFINITY_MAX_PROC( void )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        return 0;
+    #else
+        //
+        // We really only NEED serial initialization here.
+        //
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        if ( ! ( KMP_AFFINITY_CAPABLE() ) ) {
+            return 0;
+        }
+
+    #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC
+        if ( __kmp_num_proc_groups > 1 ) {
+            return (int)KMP_CPU_SETSIZE;
+        }
+    #endif /* KMP_GROUP_AFFINITY */
+        return __kmp_xproc;
+    #endif
+}
+
+void FTN_STDCALL
+FTN_CREATE_AFFINITY_MASK( void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        *mask = NULL;
+    #else
+        //
+        // We really only NEED serial initialization here.
+        //
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+    # if KMP_USE_HWLOC
+        *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc();
+    # else
+        *mask = kmpc_malloc( __kmp_affin_mask_size );
+    # endif
+        KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) );
+    #endif
+}
+
+void FTN_STDCALL
+FTN_DESTROY_AFFINITY_MASK( void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        // Nothing
+    #else
+        //
+        // We really only NEED serial initialization here.
+        //
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        if ( __kmp_env_consistency_check ) {
+            if ( *mask == NULL ) {
+	        KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" );
+	    }
+        }
+    # if KMP_USE_HWLOC
+        hwloc_bitmap_free((hwloc_cpuset_t)(*mask));
+    # else
+        kmpc_free( *mask );
+    # endif
+        *mask = NULL;
+    #endif
+}
+
+int FTN_STDCALL
+FTN_SET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        return -1;
+    #else
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        return __kmp_aux_set_affinity_mask_proc( KMP_DEREF proc, mask );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_UNSET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        return -1;
+    #else
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        return __kmp_aux_unset_affinity_mask_proc( KMP_DEREF proc, mask );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask )
+{
+    #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
+        return -1;
+    #else
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        return __kmp_aux_get_affinity_mask_proc( KMP_DEREF proc, mask );
+    #endif
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+/* sets the requested number of threads for the next parallel region */
+
+void FTN_STDCALL
+xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg )
+{
+    #ifdef KMP_STUB
+        // Nothing.
+    #else
+        __kmp_set_num_threads( KMP_DEREF arg, __kmp_entry_gtid() );
+    #endif
+}
+
+
+/* returns the number of threads in current team */
+int FTN_STDCALL
+xexpand(FTN_GET_NUM_THREADS)( void )
+{
+    #ifdef KMP_STUB
+        return 1;
+    #else
+        // __kmpc_bound_num_threads initializes the library if needed
+        return __kmpc_bound_num_threads(NULL);
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_MAX_THREADS)( void )
+{
+    #ifdef KMP_STUB
+        return 1;
+    #else
+        int         gtid;
+        kmp_info_t *thread;
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        gtid   = __kmp_entry_gtid();
+        thread = __kmp_threads[ gtid ];
+        //return thread -> th.th_team -> t.t_current_task[ thread->th.th_info.ds.ds_tid ] -> icvs.nproc;
+	return thread -> th.th_current_task -> td_icvs.nproc;
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_THREAD_NUM)( void )
+{
+    #ifdef KMP_STUB
+        return 0;
+    #else
+        int gtid;
+
+        #if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD
+            gtid = __kmp_entry_gtid();
+        #elif KMP_OS_WINDOWS
+            if (!__kmp_init_parallel ||
+                (gtid = (int)((kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ))) == 0) {
+                // Either library isn't initialized or thread is not registered
+                // 0 is the correct TID in this case
+                return 0;
+            }
+            --gtid; // We keep (gtid+1) in TLS
+        #elif KMP_OS_LINUX
+            #ifdef KMP_TDATA_GTID
+            if ( __kmp_gtid_mode >= 3 ) {
+                if ((gtid = __kmp_gtid) == KMP_GTID_DNE) {
+                    return 0;
+                }
+            } else {
+            #endif
+                if (!__kmp_init_parallel ||
+                    (gtid = (kmp_intptr_t)(pthread_getspecific( __kmp_gtid_threadprivate_key ))) == 0) {
+                    return 0;
+                }
+                --gtid;
+            #ifdef KMP_TDATA_GTID
+            }
+            #endif
+        #else
+            #error Unknown or unsupported OS
+        #endif
+
+        return __kmp_tid_from_gtid( gtid );
+    #endif
+}
+
+int FTN_STDCALL
+FTN_GET_NUM_KNOWN_THREADS( void )
+{
+    #ifdef KMP_STUB
+        return 1;
+    #else
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        }
+        /* NOTE: this is not syncronized, so it can change at any moment */
+        /* NOTE: this number also includes threads preallocated in hot-teams */
+        return TCR_4(__kmp_nth);
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_NUM_PROCS)( void )
+{
+    #ifdef KMP_STUB
+        return 1;
+    #else
+        if ( ! TCR_4(__kmp_init_middle) ) {
+            __kmp_middle_initialize();
+        }
+        return __kmp_avail_proc;
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_SET_NESTED)( int KMP_DEREF flag )
+{
+    #ifdef KMP_STUB
+        __kmps_set_nested( KMP_DEREF flag );
+    #else
+        kmp_info_t *thread;
+        /* For the thread-private internal controls implementation */
+        thread = __kmp_entry_thread();
+        __kmp_save_internal_controls( thread );
+        set__nested( thread, ( (KMP_DEREF flag) ? TRUE : FALSE ) );
+    #endif
+}
+
+
+int FTN_STDCALL
+xexpand(FTN_GET_NESTED)( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_nested();
+    #else
+        kmp_info_t *thread;
+        thread = __kmp_entry_thread();
+        return get__nested( thread );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag )
+{
+    #ifdef KMP_STUB
+        __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE );
+    #else
+        kmp_info_t *thread;
+        /* For the thread-private implementation of the internal controls */
+        thread = __kmp_entry_thread();
+        // !!! What if foreign thread calls it?
+        __kmp_save_internal_controls( thread );
+        set__dynamic( thread, KMP_DEREF flag ? TRUE : FALSE );
+    #endif
+}
+
+
+int FTN_STDCALL
+xexpand(FTN_GET_DYNAMIC)( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_dynamic();
+    #else
+        kmp_info_t *thread;
+        thread = __kmp_entry_thread();
+        return get__dynamic( thread );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_IN_PARALLEL)( void )
+{
+    #ifdef KMP_STUB
+        return 0;
+    #else
+        kmp_info_t *th = __kmp_entry_thread();
+#if OMP_40_ENABLED
+        if ( th->th.th_teams_microtask ) {
+            // AC: r_in_parallel does not work inside teams construct
+            //     where real parallel is inactive, but all threads have same root,
+            //     so setting it in one team affects other teams.
+            //     The solution is to use per-team nesting level
+            return ( th->th.th_team->t.t_active_level ? 1 : 0 );
+        }
+        else
+#endif /* OMP_40_ENABLED */
+            return ( th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier )
+{
+    #ifdef KMP_STUB
+        __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier );
+    #else
+	/*  TO DO  */
+        /* For the per-task implementation of the internal controls */
+        __kmp_set_schedule( __kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier )
+{
+    #ifdef KMP_STUB
+        __kmps_get_schedule( kind, modifier );
+    #else
+	/*  TO DO  */
+	/* For the per-task implementation of the internal controls */
+        __kmp_get_schedule( __kmp_entry_gtid(), kind, modifier );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg )
+{
+    #ifdef KMP_STUB
+	// Nothing.
+    #else
+	/*  TO DO  */
+        /* We want per-task implementation of this internal control */
+        __kmp_set_max_active_levels( __kmp_entry_gtid(), KMP_DEREF arg );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void )
+{
+    #ifdef KMP_STUB
+	return 0;
+    #else
+	/*  TO DO  */
+	/* We want per-task implementation of this internal control */
+	return __kmp_get_max_active_levels( __kmp_entry_gtid() );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_ACTIVE_LEVEL)( void )
+{
+    #ifdef KMP_STUB
+	return 0; // returns 0 if it is called from the sequential part of the program
+    #else
+	/*  TO DO  */
+	/* For the per-task implementation of the internal controls */
+        return __kmp_entry_thread() -> th.th_team -> t.t_active_level;
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_LEVEL)( void )
+{
+    #ifdef KMP_STUB
+	return 0; // returns 0 if it is called from the sequential part of the program
+    #else
+	/*  TO DO  */
+	/* For the per-task implementation of the internal controls */
+        return __kmp_entry_thread() -> th.th_team -> t.t_level;
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level )
+{
+    #ifdef KMP_STUB
+	return ( KMP_DEREF level ) ? ( -1 ) : ( 0 );
+    #else
+	return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), KMP_DEREF level );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level )
+{
+    #ifdef KMP_STUB
+        return ( KMP_DEREF level ) ? ( -1 ) : ( 1 );
+    #else
+        return __kmp_get_team_size( __kmp_entry_gtid(), KMP_DEREF level );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_THREAD_LIMIT)( void )
+{
+    #ifdef KMP_STUB
+	return 1;   // TO DO: clarify whether it returns 1 or 0?
+    #else
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        };
+        /* global ICV */
+	return __kmp_max_nth;
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_IN_FINAL)( void )
+{
+    #ifdef KMP_STUB
+	return 0;   // TO DO: clarify whether it returns 1 or 0?
+    #else
+        if ( ! TCR_4(__kmp_init_parallel) ) {
+            return 0;
+        }
+	return __kmp_entry_thread() -> th.th_current_task -> td_flags.final;
+    #endif
+}
+
+#if OMP_40_ENABLED
+
+
+kmp_proc_bind_t FTN_STDCALL
+xexpand(FTN_GET_PROC_BIND)( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_proc_bind();
+    #else
+        return get__proc_bind( __kmp_entry_thread() );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_NUM_TEAMS)( void )
+{
+    #ifdef KMP_STUB
+        return 1;
+    #else
+        kmp_info_t *thr = __kmp_entry_thread();
+        if ( thr->th.th_teams_microtask ) {
+            kmp_team_t *team = thr->th.th_team;
+            int tlevel = thr->th.th_teams_level;
+            int ii = team->t.t_level;            // the level of the teams construct
+            int dd = team -> t.t_serialized;
+            int level = tlevel + 1;
+            KMP_DEBUG_ASSERT( ii >= tlevel );
+            while( ii > level )
+            {
+                for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
+                {
+                }
+                if( team -> t.t_serialized && ( !dd ) ) {
+                    team = team->t.t_parent;
+                    continue;
+                }
+                if( ii > level ) {
+                    team = team->t.t_parent;
+                    ii--;
+                }
+            }
+            if ( dd > 1 ) {
+                return 1;  // teams region is serialized ( 1 team of 1 thread ).
+            } else {
+                return team->t.t_parent->t.t_nproc;
+            }
+        } else {
+            return 1;
+        }
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_GET_TEAM_NUM)( void )
+{
+    #ifdef KMP_STUB
+        return 0;
+    #else
+        kmp_info_t *thr = __kmp_entry_thread();
+        if ( thr->th.th_teams_microtask ) {
+            kmp_team_t *team = thr->th.th_team;
+            int tlevel = thr->th.th_teams_level; // the level of the teams construct
+            int ii = team->t.t_level;
+            int dd = team -> t.t_serialized;
+            int level = tlevel + 1;
+            KMP_DEBUG_ASSERT( ii >= tlevel );
+            while( ii > level )
+            {
+                for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
+                {
+                }
+                if( team -> t.t_serialized && ( !dd ) ) {
+                    team = team->t.t_parent;
+                    continue;
+                }
+                if( ii > level ) {
+                    team = team->t.t_parent;
+                    ii--;
+                }
+            }
+            if ( dd > 1 ) {
+                return 0;  // teams region is serialized ( 1 team of 1 thread ).
+            } else {
+                return team->t.t_master_tid;
+            }
+        } else {
+            return 0;
+        }
+    #endif
+}
+
+#if KMP_MIC || KMP_OS_DARWIN
+
+static int __kmp_default_device = 0;
+
+int FTN_STDCALL
+FTN_GET_DEFAULT_DEVICE( void )
+{
+    return __kmp_default_device;
+}
+
+void FTN_STDCALL
+FTN_SET_DEFAULT_DEVICE( int KMP_DEREF arg )
+{
+    __kmp_default_device = KMP_DEREF arg;
+}
+
+int FTN_STDCALL
+FTN_GET_NUM_DEVICES( void )
+{
+    return 0;
+}
+
+#endif // KMP_MIC || KMP_OS_DARWIN
+
+#if ! KMP_OS_LINUX
+
+int FTN_STDCALL
+xexpand(FTN_IS_INITIAL_DEVICE)( void )
+{
+    return 1;
+}
+
+#else
+
+// This internal function is used when the entry from the offload library
+// is not found.
+int _Offload_get_device_number( void )  __attribute__((weak));
+
+int FTN_STDCALL
+xexpand(FTN_IS_INITIAL_DEVICE)( void )
+{
+    if( _Offload_get_device_number ) {
+        return _Offload_get_device_number() == -1;
+    } else {
+        return 1;
+    }
+}
+
+#endif // ! KMP_OS_LINUX
+
+#endif // OMP_40_ENABLED
+
+#ifdef KMP_STUB
+typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t;
+#endif /* KMP_STUB */
+
+#if KMP_USE_DYNAMIC_LOCK
+void FTN_STDCALL
+FTN_INIT_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint )
+{
+    #ifdef KMP_STUB
+        *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
+    #else
+        __kmpc_init_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint );
+    #endif
+}
+
+void FTN_STDCALL
+FTN_INIT_NEST_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint )
+{
+    #ifdef KMP_STUB
+        *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
+    #else
+        __kmpc_init_nest_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint );
+    #endif
+}
+#endif
+
+/* initialize the lock */
+void FTN_STDCALL
+xexpand(FTN_INIT_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
+    #else
+        __kmpc_init_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+/* initialize the lock */
+void FTN_STDCALL
+xexpand(FTN_INIT_NEST_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
+    #else
+        __kmpc_init_nest_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_DESTROY_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        *((kmp_stub_lock_t *)user_lock) = UNINIT;
+    #else
+        __kmpc_destroy_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        *((kmp_stub_lock_t *)user_lock) = UNINIT;
+    #else
+        __kmpc_destroy_nest_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_SET_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
+            // TODO: Issue an error.
+        }; // if
+        if ( *((kmp_stub_lock_t *)user_lock) != UNLOCKED ) {
+            // TODO: Issue an error.
+        }; // if
+        *((kmp_stub_lock_t *)user_lock) = LOCKED;
+    #else
+        __kmpc_set_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_SET_NEST_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
+            // TODO: Issue an error.
+        }; // if
+        (*((int *)user_lock))++;
+    #else
+        __kmpc_set_nest_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_UNSET_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
+            // TODO: Issue an error.
+        }; // if
+        if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) {
+            // TODO: Issue an error.
+        }; // if
+        *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
+    #else
+        __kmpc_unset_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+void FTN_STDCALL
+xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
+            // TODO: Issue an error.
+        }; // if
+        if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) {
+            // TODO: Issue an error.
+        }; // if
+        (*((int *)user_lock))--;
+    #else
+        __kmpc_unset_nest_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_TEST_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
+            // TODO: Issue an error.
+        }; // if
+        if ( *((kmp_stub_lock_t *)user_lock) == LOCKED ) {
+            return 0;
+        }; // if
+        *((kmp_stub_lock_t *)user_lock) = LOCKED;
+        return 1;
+    #else
+        return __kmpc_test_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+int FTN_STDCALL
+xexpand(FTN_TEST_NEST_LOCK)( void **user_lock )
+{
+    #ifdef KMP_STUB
+        if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
+            // TODO: Issue an error.
+        }; // if
+        return ++(*((int *)user_lock));
+    #else
+        return __kmpc_test_nest_lock( NULL, __kmp_entry_gtid(), user_lock );
+    #endif
+}
+
+double FTN_STDCALL
+xexpand(FTN_GET_WTIME)( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_wtime();
+    #else
+        double data;
+        #if ! KMP_OS_LINUX
+        // We don't need library initialization to get the time on Linux* OS.
+        // The routine can be used to measure library initialization time on Linux* OS now.
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        };
+        #endif
+        __kmp_elapsed( & data );
+        return data;
+    #endif
+}
+
+double FTN_STDCALL
+xexpand(FTN_GET_WTICK)( void )
+{
+    #ifdef KMP_STUB
+        return __kmps_get_wtick();
+    #else
+        double data;
+        if ( ! __kmp_init_serial ) {
+            __kmp_serial_initialize();
+        };
+        __kmp_elapsed_tick( & data );
+        return data;
+    #endif
+}
+
+/* ------------------------------------------------------------------------ */
+
+void * FTN_STDCALL
+FTN_MALLOC( size_t KMP_DEREF size )
+{
+    // kmpc_malloc initializes the library if needed
+    return kmpc_malloc( KMP_DEREF size );
+}
+
+void * FTN_STDCALL
+FTN_CALLOC( size_t KMP_DEREF nelem, size_t KMP_DEREF elsize )
+{
+    // kmpc_calloc initializes the library if needed
+    return kmpc_calloc( KMP_DEREF nelem, KMP_DEREF elsize );
+}
+
+void * FTN_STDCALL
+FTN_REALLOC( void * KMP_DEREF ptr, size_t KMP_DEREF size )
+{
+    // kmpc_realloc initializes the library if needed
+    return kmpc_realloc( KMP_DEREF ptr, KMP_DEREF size );
+}
+
+void FTN_STDCALL
+FTN_FREE( void * KMP_DEREF ptr )
+{
+    // does nothing if the library is not initialized
+    kmpc_free( KMP_DEREF ptr );
+}
+
+void FTN_STDCALL
+FTN_SET_WARNINGS_ON( void )
+{
+    #ifndef KMP_STUB
+        __kmp_generate_warnings = kmp_warnings_explicit;
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_WARNINGS_OFF( void )
+{
+    #ifndef KMP_STUB
+        __kmp_generate_warnings = FALSE;
+    #endif
+}
+
+void FTN_STDCALL
+FTN_SET_DEFAULTS( char const * str
+    #ifndef PASS_ARGS_BY_VALUE
+        , int len
+    #endif
+)
+{
+    #ifndef KMP_STUB
+        #ifdef PASS_ARGS_BY_VALUE
+            int len = (int)KMP_STRLEN( str );
+        #endif
+        __kmp_aux_set_defaults( str, len );
+    #endif
+}
+
+/* ------------------------------------------------------------------------ */
+
+
+#if OMP_40_ENABLED
+/* returns the status of cancellation */
+int FTN_STDCALL
+xexpand(FTN_GET_CANCELLATION)(void) {
+#ifdef KMP_STUB
+    return 0 /* false */;
+#else
+    // initialize the library if needed
+    if ( ! __kmp_init_serial ) {
+        __kmp_serial_initialize();
+    }
+    return __kmp_omp_cancellation;
+#endif
+}
+
+int FTN_STDCALL
+FTN_GET_CANCELLATION_STATUS(int cancel_kind) {
+#ifdef KMP_STUB
+    return 0 /* false */;
+#else
+    return __kmp_get_cancellation_status(cancel_kind);
+#endif
+}
+
+#endif // OMP_40_ENABLED
+
+// GCC compatibility (versioned symbols)
+#ifdef KMP_USE_VERSION_SYMBOLS
+
+/*
+    These following sections create function aliases (dummy symbols) for the omp_* routines.
+    These aliases will then be versioned according to how libgomp ``versions'' its
+    symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the
+    default version which libomp uses: VERSION (defined in exports_so.txt)
+    If you want to see the versioned symbols for libgomp.so.1 then just type:
+
+    objdump -T /path/to/libgomp.so.1 | grep omp_
+
+    Example:
+    Step 1)  Create __kmp_api_omp_set_num_threads_10_alias
+             which is alias of __kmp_api_omp_set_num_threads
+    Step 2)  Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0
+    Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION
+*/
+
+// OMP_1.0 aliases
+xaliasify(FTN_SET_NUM_THREADS,   10);
+xaliasify(FTN_GET_NUM_THREADS,   10);
+xaliasify(FTN_GET_MAX_THREADS,   10);
+xaliasify(FTN_GET_THREAD_NUM,    10);
+xaliasify(FTN_GET_NUM_PROCS,     10);
+xaliasify(FTN_IN_PARALLEL,       10);
+xaliasify(FTN_SET_DYNAMIC,       10);
+xaliasify(FTN_GET_DYNAMIC,       10);
+xaliasify(FTN_SET_NESTED,        10);
+xaliasify(FTN_GET_NESTED,        10);
+xaliasify(FTN_INIT_LOCK,         10);
+xaliasify(FTN_INIT_NEST_LOCK,    10);
+xaliasify(FTN_DESTROY_LOCK,      10);
+xaliasify(FTN_DESTROY_NEST_LOCK, 10);
+xaliasify(FTN_SET_LOCK,          10);
+xaliasify(FTN_SET_NEST_LOCK,     10);
+xaliasify(FTN_UNSET_LOCK,        10);
+xaliasify(FTN_UNSET_NEST_LOCK,   10);
+xaliasify(FTN_TEST_LOCK,         10);
+xaliasify(FTN_TEST_NEST_LOCK,    10);
+
+// OMP_2.0 aliases
+xaliasify(FTN_GET_WTICK, 20);
+xaliasify(FTN_GET_WTIME, 20);
+
+// OMP_3.0 aliases
+xaliasify(FTN_SET_SCHEDULE,            30);
+xaliasify(FTN_GET_SCHEDULE,            30);
+xaliasify(FTN_GET_THREAD_LIMIT,        30);
+xaliasify(FTN_SET_MAX_ACTIVE_LEVELS,   30);
+xaliasify(FTN_GET_MAX_ACTIVE_LEVELS,   30);
+xaliasify(FTN_GET_LEVEL,               30);
+xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30);
+xaliasify(FTN_GET_TEAM_SIZE,           30);
+xaliasify(FTN_GET_ACTIVE_LEVEL,        30);
+xaliasify(FTN_INIT_LOCK,               30);
+xaliasify(FTN_INIT_NEST_LOCK,          30);
+xaliasify(FTN_DESTROY_LOCK,            30);
+xaliasify(FTN_DESTROY_NEST_LOCK,       30);
+xaliasify(FTN_SET_LOCK,                30);
+xaliasify(FTN_SET_NEST_LOCK,           30);
+xaliasify(FTN_UNSET_LOCK,              30);
+xaliasify(FTN_UNSET_NEST_LOCK,         30);
+xaliasify(FTN_TEST_LOCK,               30);
+xaliasify(FTN_TEST_NEST_LOCK,          30);
+
+// OMP_3.1 aliases
+xaliasify(FTN_IN_FINAL, 31);
+
+#if OMP_40_ENABLED
+// OMP_4.0 aliases
+xaliasify(FTN_GET_PROC_BIND, 40);
+xaliasify(FTN_GET_NUM_TEAMS, 40);
+xaliasify(FTN_GET_TEAM_NUM, 40);
+xaliasify(FTN_GET_CANCELLATION, 40);
+xaliasify(FTN_IS_INITIAL_DEVICE, 40);
+#endif /* OMP_40_ENABLED */
+
+#if OMP_41_ENABLED
+// OMP_4.1 aliases
+#endif
+
+#if OMP_50_ENABLED
+// OMP_5.0 aliases
+#endif
+
+// OMP_1.0 versioned symbols
+xversionify(FTN_SET_NUM_THREADS,   10, "OMP_1.0");
+xversionify(FTN_GET_NUM_THREADS,   10, "OMP_1.0");
+xversionify(FTN_GET_MAX_THREADS,   10, "OMP_1.0");
+xversionify(FTN_GET_THREAD_NUM,    10, "OMP_1.0");
+xversionify(FTN_GET_NUM_PROCS,     10, "OMP_1.0");
+xversionify(FTN_IN_PARALLEL,       10, "OMP_1.0");
+xversionify(FTN_SET_DYNAMIC,       10, "OMP_1.0");
+xversionify(FTN_GET_DYNAMIC,       10, "OMP_1.0");
+xversionify(FTN_SET_NESTED,        10, "OMP_1.0");
+xversionify(FTN_GET_NESTED,        10, "OMP_1.0");
+xversionify(FTN_INIT_LOCK,         10, "OMP_1.0");
+xversionify(FTN_INIT_NEST_LOCK,    10, "OMP_1.0");
+xversionify(FTN_DESTROY_LOCK,      10, "OMP_1.0");
+xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_SET_LOCK,          10, "OMP_1.0");
+xversionify(FTN_SET_NEST_LOCK,     10, "OMP_1.0");
+xversionify(FTN_UNSET_LOCK,        10, "OMP_1.0");
+xversionify(FTN_UNSET_NEST_LOCK,   10, "OMP_1.0");
+xversionify(FTN_TEST_LOCK,         10, "OMP_1.0");
+xversionify(FTN_TEST_NEST_LOCK,    10, "OMP_1.0");
+
+// OMP_2.0 versioned symbols
+xversionify(FTN_GET_WTICK,         20, "OMP_2.0");
+xversionify(FTN_GET_WTIME,         20, "OMP_2.0");
+
+// OMP_3.0 versioned symbols
+xversionify(FTN_SET_SCHEDULE,      30, "OMP_3.0");
+xversionify(FTN_GET_SCHEDULE,      30, "OMP_3.0");
+xversionify(FTN_GET_THREAD_LIMIT,        30, "OMP_3.0");
+xversionify(FTN_SET_MAX_ACTIVE_LEVELS,   30, "OMP_3.0");
+xversionify(FTN_GET_MAX_ACTIVE_LEVELS,   30, "OMP_3.0");
+xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0");
+xversionify(FTN_GET_LEVEL,               30, "OMP_3.0");
+xversionify(FTN_GET_TEAM_SIZE,     30, "OMP_3.0");
+xversionify(FTN_GET_ACTIVE_LEVEL,  30, "OMP_3.0");
+
+// the lock routines have a 1.0 and 3.0 version
+xversionify(FTN_INIT_LOCK,         30, "OMP_3.0");
+xversionify(FTN_INIT_NEST_LOCK,    30, "OMP_3.0");
+xversionify(FTN_DESTROY_LOCK,      30, "OMP_3.0");
+xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_SET_LOCK,          30, "OMP_3.0");
+xversionify(FTN_SET_NEST_LOCK,     30, "OMP_3.0");
+xversionify(FTN_UNSET_LOCK,        30, "OMP_3.0");
+xversionify(FTN_UNSET_NEST_LOCK,   30, "OMP_3.0");
+xversionify(FTN_TEST_LOCK,         30, "OMP_3.0");
+xversionify(FTN_TEST_NEST_LOCK,    30, "OMP_3.0");
+
+// OMP_3.1 versioned symbol
+xversionify(FTN_IN_FINAL,          31, "OMP_3.1");
+
+#if OMP_40_ENABLED
+// OMP_4.0 versioned symbols
+xversionify(FTN_GET_PROC_BIND,     40, "OMP_4.0");
+xversionify(FTN_GET_NUM_TEAMS,     40, "OMP_4.0");
+xversionify(FTN_GET_TEAM_NUM,      40, "OMP_4.0");
+xversionify(FTN_GET_CANCELLATION,  40, "OMP_4.0");
+xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0");
+#endif /* OMP_40_ENABLED */
+
+#if OMP_41_ENABLED
+// OMP_4.1 versioned symbols
+#endif
+
+#if OMP_50_ENABLED
+// OMP_5.0 versioned symbols
+#endif
+
+#endif // KMP_USE_VERSION_SYMBOLS
+
+#ifdef __cplusplus
+    } //extern "C"
+#endif // __cplusplus
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c b/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c
index c954d2072a..1d0fb4ca13 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_extra.c
@@ -1,33 +1,33 @@
-/* 
- * kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
- 
-#if KMP_OS_WINDOWS 
-#   define KMP_FTN_ENTRIES KMP_FTN_PLAIN 
-#elif KMP_OS_UNIX 
-#   define KMP_FTN_ENTRIES KMP_FTN_APPEND 
-#endif 
- 
-// Note: This string is not printed when KMP_VERSION=1. 
-char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " 
-#ifdef KMP_FTN_ENTRIES 
-    "yes"; 
-#   define FTN_STDCALL /* nothing to do */ 
-#   include "kmp_ftn_os.h" 
-#   include "kmp_ftn_entry.h" 
-#else 
-    "no"; 
-#endif /* KMP_FTN_ENTRIES */ 
+/*
+ * kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+
+#if KMP_OS_WINDOWS
+#   define KMP_FTN_ENTRIES KMP_FTN_PLAIN
+#elif KMP_OS_UNIX
+#   define KMP_FTN_ENTRIES KMP_FTN_APPEND
+#endif
+
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: "
+#ifdef KMP_FTN_ENTRIES
+    "yes";
+#   define FTN_STDCALL /* nothing to do */
+#   include "kmp_ftn_os.h"
+#   include "kmp_ftn_entry.h"
+#else
+    "no";
+#endif /* KMP_FTN_ENTRIES */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h b/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h
index d7d30343de..4b41260a9d 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_os.h
@@ -1,532 +1,532 @@
-/* 
- * kmp_ftn_os.h -- KPTS Fortran defines header file. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_FTN_OS_H 
-#define KMP_FTN_OS_H 
- 
-// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, KMP_FTN_UAPPEND. 
- 
- 
-/* -------------------------- External definitions ------------------------ */ 
- 
-#if KMP_FTN_ENTRIES == KMP_FTN_PLAIN 
- 
-    #define FTN_SET_STACKSIZE                    kmp_set_stacksize 
-    #define FTN_SET_STACKSIZE_S                  kmp_set_stacksize_s 
-    #define FTN_GET_STACKSIZE                    kmp_get_stacksize 
-    #define FTN_GET_STACKSIZE_S                  kmp_get_stacksize_s 
-    #define FTN_SET_BLOCKTIME                    kmp_set_blocktime 
-    #define FTN_GET_BLOCKTIME                    kmp_get_blocktime 
-    #define FTN_SET_LIBRARY_SERIAL               kmp_set_library_serial 
-    #define FTN_SET_LIBRARY_TURNAROUND           kmp_set_library_turnaround 
-    #define FTN_SET_LIBRARY_THROUGHPUT           kmp_set_library_throughput 
-    #define FTN_SET_LIBRARY                      kmp_set_library 
-    #define FTN_GET_LIBRARY                      kmp_get_library 
-    #define FTN_SET_DEFAULTS                     kmp_set_defaults 
-    #define FTN_SET_AFFINITY                     kmp_set_affinity 
-    #define FTN_GET_AFFINITY                     kmp_get_affinity 
-    #define FTN_GET_AFFINITY_MAX_PROC            kmp_get_affinity_max_proc 
-    #define FTN_CREATE_AFFINITY_MASK             kmp_create_affinity_mask 
-    #define FTN_DESTROY_AFFINITY_MASK            kmp_destroy_affinity_mask 
-    #define FTN_SET_AFFINITY_MASK_PROC           kmp_set_affinity_mask_proc 
-    #define FTN_UNSET_AFFINITY_MASK_PROC         kmp_unset_affinity_mask_proc 
-    #define FTN_GET_AFFINITY_MASK_PROC           kmp_get_affinity_mask_proc 
- 
-    #define FTN_MALLOC                           kmp_malloc 
-    #define FTN_CALLOC                           kmp_calloc 
-    #define FTN_REALLOC                          kmp_realloc 
-    #define FTN_FREE                             kmp_free 
- 
-    #define FTN_GET_NUM_KNOWN_THREADS            kmp_get_num_known_threads 
- 
-    #define FTN_SET_NUM_THREADS                  omp_set_num_threads 
-    #define FTN_GET_NUM_THREADS                  omp_get_num_threads 
-    #define FTN_GET_MAX_THREADS                  omp_get_max_threads 
-    #define FTN_GET_THREAD_NUM                   omp_get_thread_num 
-    #define FTN_GET_NUM_PROCS                    omp_get_num_procs 
-    #define FTN_SET_DYNAMIC                      omp_set_dynamic 
-    #define FTN_GET_DYNAMIC                      omp_get_dynamic 
-    #define FTN_SET_NESTED                       omp_set_nested 
-    #define FTN_GET_NESTED                       omp_get_nested 
-    #define FTN_IN_PARALLEL                      omp_in_parallel 
-    #define FTN_GET_THREAD_LIMIT                 omp_get_thread_limit 
-    #define FTN_SET_SCHEDULE                     omp_set_schedule 
-    #define FTN_GET_SCHEDULE                     omp_get_schedule 
-    #define FTN_SET_MAX_ACTIVE_LEVELS            omp_set_max_active_levels 
-    #define FTN_GET_MAX_ACTIVE_LEVELS            omp_get_max_active_levels 
-    #define FTN_GET_ACTIVE_LEVEL                 omp_get_active_level 
-    #define FTN_GET_LEVEL                        omp_get_level 
-    #define FTN_GET_ANCESTOR_THREAD_NUM          omp_get_ancestor_thread_num 
-    #define FTN_GET_TEAM_SIZE                    omp_get_team_size 
-    #define FTN_IN_FINAL                         omp_in_final 
-//  #define FTN_SET_PROC_BIND                    omp_set_proc_bind 
-    #define FTN_GET_PROC_BIND                    omp_get_proc_bind 
-//  #define FTN_CURR_PROC_BIND                   omp_curr_proc_bind 
-#if OMP_40_ENABLED 
-    #define FTN_GET_NUM_TEAMS                    omp_get_num_teams 
-    #define FTN_GET_TEAM_NUM                     omp_get_team_num 
-#endif 
-    #define FTN_INIT_LOCK                        omp_init_lock 
-#if KMP_USE_DYNAMIC_LOCK 
-    #define FTN_INIT_LOCK_WITH_HINT              omp_init_lock_with_hint 
-    #define FTN_INIT_NEST_LOCK_WITH_HINT         omp_init_nest_lock_with_hint 
-#endif 
-    #define FTN_DESTROY_LOCK                     omp_destroy_lock 
-    #define FTN_SET_LOCK                         omp_set_lock 
-    #define FTN_UNSET_LOCK                       omp_unset_lock 
-    #define FTN_TEST_LOCK                        omp_test_lock 
-    #define FTN_INIT_NEST_LOCK                   omp_init_nest_lock 
-    #define FTN_DESTROY_NEST_LOCK                omp_destroy_nest_lock 
-    #define FTN_SET_NEST_LOCK                    omp_set_nest_lock 
-    #define FTN_UNSET_NEST_LOCK                  omp_unset_nest_lock 
-    #define FTN_TEST_NEST_LOCK                   omp_test_nest_lock 
- 
-    #define FTN_SET_WARNINGS_ON                  kmp_set_warnings_on 
-    #define FTN_SET_WARNINGS_OFF                 kmp_set_warnings_off 
- 
-    #define FTN_GET_WTIME                        omp_get_wtime 
-    #define FTN_GET_WTICK                        omp_get_wtick 
- 
-#if OMP_40_ENABLED 
-#if KMP_MIC || KMP_OS_DARWIN 
-    #define FTN_GET_DEFAULT_DEVICE               omp_get_default_device 
-    #define FTN_SET_DEFAULT_DEVICE               omp_set_default_device 
-    #define FTN_GET_NUM_DEVICES                  omp_get_num_devices 
-#endif 
-    #define FTN_IS_INITIAL_DEVICE                omp_is_initial_device 
-#endif 
- 
-#if OMP_40_ENABLED 
-    #define FTN_GET_CANCELLATION                 omp_get_cancellation 
-    #define FTN_GET_CANCELLATION_STATUS          kmp_get_cancellation_status 
-#endif 
- 
-#endif /* KMP_FTN_PLAIN */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_FTN_ENTRIES == KMP_FTN_APPEND 
- 
-    #define FTN_SET_STACKSIZE                    kmp_set_stacksize_ 
-    #define FTN_SET_STACKSIZE_S                  kmp_set_stacksize_s_ 
-    #define FTN_GET_STACKSIZE                    kmp_get_stacksize_ 
-    #define FTN_GET_STACKSIZE_S                  kmp_get_stacksize_s_ 
-    #define FTN_SET_BLOCKTIME                    kmp_set_blocktime_ 
-    #define FTN_GET_BLOCKTIME                    kmp_get_blocktime_ 
-    #define FTN_SET_LIBRARY_SERIAL               kmp_set_library_serial_ 
-    #define FTN_SET_LIBRARY_TURNAROUND           kmp_set_library_turnaround_ 
-    #define FTN_SET_LIBRARY_THROUGHPUT           kmp_set_library_throughput_ 
-    #define FTN_SET_LIBRARY                      kmp_set_library_ 
-    #define FTN_GET_LIBRARY                      kmp_get_library_ 
-    #define FTN_SET_DEFAULTS                     kmp_set_defaults_ 
-    #define FTN_SET_AFFINITY                     kmp_set_affinity_ 
-    #define FTN_GET_AFFINITY                     kmp_get_affinity_ 
-    #define FTN_GET_AFFINITY_MAX_PROC            kmp_get_affinity_max_proc_ 
-    #define FTN_CREATE_AFFINITY_MASK             kmp_create_affinity_mask_ 
-    #define FTN_DESTROY_AFFINITY_MASK            kmp_destroy_affinity_mask_ 
-    #define FTN_SET_AFFINITY_MASK_PROC           kmp_set_affinity_mask_proc_ 
-    #define FTN_UNSET_AFFINITY_MASK_PROC         kmp_unset_affinity_mask_proc_ 
-    #define FTN_GET_AFFINITY_MASK_PROC           kmp_get_affinity_mask_proc_ 
- 
-    #define FTN_MALLOC                           kmp_malloc_ 
-    #define FTN_CALLOC                           kmp_calloc_ 
-    #define FTN_REALLOC                          kmp_realloc_ 
-    #define FTN_FREE                             kmp_free_ 
- 
-    #define FTN_GET_NUM_KNOWN_THREADS            kmp_get_num_known_threads_ 
- 
-    #define FTN_SET_NUM_THREADS                  omp_set_num_threads_ 
-    #define FTN_GET_NUM_THREADS                  omp_get_num_threads_ 
-    #define FTN_GET_MAX_THREADS                  omp_get_max_threads_ 
-    #define FTN_GET_THREAD_NUM                   omp_get_thread_num_ 
-    #define FTN_GET_NUM_PROCS                    omp_get_num_procs_ 
-    #define FTN_SET_DYNAMIC                      omp_set_dynamic_ 
-    #define FTN_GET_DYNAMIC                      omp_get_dynamic_ 
-    #define FTN_SET_NESTED                       omp_set_nested_ 
-    #define FTN_GET_NESTED                       omp_get_nested_ 
-    #define FTN_IN_PARALLEL                      omp_in_parallel_ 
-    #define FTN_GET_THREAD_LIMIT                 omp_get_thread_limit_ 
-    #define FTN_SET_SCHEDULE                     omp_set_schedule_ 
-    #define FTN_GET_SCHEDULE                     omp_get_schedule_ 
-    #define FTN_SET_MAX_ACTIVE_LEVELS            omp_set_max_active_levels_ 
-    #define FTN_GET_MAX_ACTIVE_LEVELS            omp_get_max_active_levels_ 
-    #define FTN_GET_ACTIVE_LEVEL                 omp_get_active_level_ 
-    #define FTN_GET_LEVEL                        omp_get_level_ 
-    #define FTN_GET_ANCESTOR_THREAD_NUM          omp_get_ancestor_thread_num_ 
-    #define FTN_GET_TEAM_SIZE                    omp_get_team_size_ 
-    #define FTN_IN_FINAL                         omp_in_final_ 
-//  #define FTN_SET_PROC_BIND                    omp_set_proc_bind_ 
-    #define FTN_GET_PROC_BIND                    omp_get_proc_bind_ 
-//  #define FTN_CURR_PROC_BIND                   omp_curr_proc_bind_ 
-#if OMP_40_ENABLED 
-    #define FTN_GET_NUM_TEAMS                    omp_get_num_teams_ 
-    #define FTN_GET_TEAM_NUM                     omp_get_team_num_ 
-#endif 
-    #define FTN_INIT_LOCK                        omp_init_lock_ 
-#if KMP_USE_DYNAMIC_LOCK 
-    #define FTN_INIT_LOCK_WITH_HINT              omp_init_lock_with_hint_ 
-    #define FTN_INIT_NEST_LOCK_WITH_HINT         omp_init_nest_lock_with_hint_ 
-#endif 
-    #define FTN_DESTROY_LOCK                     omp_destroy_lock_ 
-    #define FTN_SET_LOCK                         omp_set_lock_ 
-    #define FTN_UNSET_LOCK                       omp_unset_lock_ 
-    #define FTN_TEST_LOCK                        omp_test_lock_ 
-    #define FTN_INIT_NEST_LOCK                   omp_init_nest_lock_ 
-    #define FTN_DESTROY_NEST_LOCK                omp_destroy_nest_lock_ 
-    #define FTN_SET_NEST_LOCK                    omp_set_nest_lock_ 
-    #define FTN_UNSET_NEST_LOCK                  omp_unset_nest_lock_ 
-    #define FTN_TEST_NEST_LOCK                   omp_test_nest_lock_ 
- 
-    #define FTN_SET_WARNINGS_ON                  kmp_set_warnings_on_ 
-    #define FTN_SET_WARNINGS_OFF                 kmp_set_warnings_off_ 
- 
-    #define FTN_GET_WTIME                        omp_get_wtime_ 
-    #define FTN_GET_WTICK                        omp_get_wtick_ 
- 
-#if OMP_40_ENABLED 
-#if KMP_MIC || KMP_OS_DARWIN 
-    #define FTN_GET_DEFAULT_DEVICE               omp_get_default_device_ 
-    #define FTN_SET_DEFAULT_DEVICE               omp_set_default_device_ 
-    #define FTN_GET_NUM_DEVICES                  omp_get_num_devices_ 
-#endif 
-    #define FTN_IS_INITIAL_DEVICE                omp_is_initial_device_ 
-#endif 
- 
- 
-#if OMP_40_ENABLED 
-    #define FTN_GET_CANCELLATION                 omp_get_cancellation_ 
-    #define FTN_GET_CANCELLATION_STATUS          kmp_get_cancellation_status_ 
-#endif 
- 
-#endif /* KMP_FTN_APPEND */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_FTN_ENTRIES == KMP_FTN_UPPER 
- 
-    #define FTN_SET_STACKSIZE                    KMP_SET_STACKSIZE 
-    #define FTN_SET_STACKSIZE_S                  KMP_SET_STACKSIZE_S 
-    #define FTN_GET_STACKSIZE                    KMP_GET_STACKSIZE 
-    #define FTN_GET_STACKSIZE_S                  KMP_GET_STACKSIZE_S 
-    #define FTN_SET_BLOCKTIME                    KMP_SET_BLOCKTIME 
-    #define FTN_GET_BLOCKTIME                    KMP_GET_BLOCKTIME 
-    #define FTN_SET_LIBRARY_SERIAL               KMP_SET_LIBRARY_SERIAL 
-    #define FTN_SET_LIBRARY_TURNAROUND           KMP_SET_LIBRARY_TURNAROUND 
-    #define FTN_SET_LIBRARY_THROUGHPUT           KMP_SET_LIBRARY_THROUGHPUT 
-    #define FTN_SET_LIBRARY                      KMP_SET_LIBRARY 
-    #define FTN_GET_LIBRARY                      KMP_GET_LIBRARY 
-    #define FTN_SET_DEFAULTS                     KMP_SET_DEFAULTS 
-    #define FTN_SET_AFFINITY                     KMP_SET_AFFINITY 
-    #define FTN_GET_AFFINITY                     KMP_GET_AFFINITY 
-    #define FTN_GET_AFFINITY_MAX_PROC            KMP_GET_AFFINITY_MAX_PROC 
-    #define FTN_CREATE_AFFINITY_MASK             KMP_CREATE_AFFINITY_MASK 
-    #define FTN_DESTROY_AFFINITY_MASK            KMP_DESTROY_AFFINITY_MASK 
-    #define FTN_SET_AFFINITY_MASK_PROC           KMP_SET_AFFINITY_MASK_PROC 
-    #define FTN_UNSET_AFFINITY_MASK_PROC         KMP_UNSET_AFFINITY_MASK_PROC 
-    #define FTN_GET_AFFINITY_MASK_PROC           KMP_GET_AFFINITY_MASK_PROC 
- 
-    #define FTN_MALLOC                           KMP_MALLOC 
-    #define FTN_CALLOC                           KMP_CALLOC 
-    #define FTN_REALLOC                          KMP_REALLOC 
-    #define FTN_FREE                             KMP_FREE 
- 
-    #define FTN_GET_NUM_KNOWN_THREADS            KMP_GET_NUM_KNOWN_THREADS 
- 
-    #define FTN_SET_NUM_THREADS                  OMP_SET_NUM_THREADS 
-    #define FTN_GET_NUM_THREADS                  OMP_GET_NUM_THREADS 
-    #define FTN_GET_MAX_THREADS                  OMP_GET_MAX_THREADS 
-    #define FTN_GET_THREAD_NUM                   OMP_GET_THREAD_NUM 
-    #define FTN_GET_NUM_PROCS                    OMP_GET_NUM_PROCS 
-    #define FTN_SET_DYNAMIC                      OMP_SET_DYNAMIC 
-    #define FTN_GET_DYNAMIC                      OMP_GET_DYNAMIC 
-    #define FTN_SET_NESTED                       OMP_SET_NESTED 
-    #define FTN_GET_NESTED                       OMP_GET_NESTED 
-    #define FTN_IN_PARALLEL                      OMP_IN_PARALLEL 
-    #define FTN_GET_THREAD_LIMIT                 OMP_GET_THREAD_LIMIT 
-    #define FTN_SET_SCHEDULE                     OMP_SET_SCHEDULE 
-    #define FTN_GET_SCHEDULE                     OMP_GET_SCHEDULE 
-    #define FTN_SET_MAX_ACTIVE_LEVELS            OMP_SET_MAX_ACTIVE_LEVELS 
-    #define FTN_GET_MAX_ACTIVE_LEVELS            OMP_GET_MAX_ACTIVE_LEVELS 
-    #define FTN_GET_ACTIVE_LEVEL                 OMP_GET_ACTIVE_LEVEL 
-    #define FTN_GET_LEVEL                        OMP_GET_LEVEL 
-    #define FTN_GET_ANCESTOR_THREAD_NUM          OMP_GET_ANCESTOR_THREAD_NUM 
-    #define FTN_GET_TEAM_SIZE                    OMP_GET_TEAM_SIZE 
-    #define FTN_IN_FINAL                         OMP_IN_FINAL 
-//  #define FTN_SET_PROC_BIND                    OMP_SET_PROC_BIND 
-    #define FTN_GET_PROC_BIND                    OMP_GET_PROC_BIND 
-//  #define FTN_CURR_PROC_BIND                   OMP_CURR_PROC_BIND 
-#if OMP_40_ENABLED 
-    #define FTN_GET_NUM_TEAMS                    OMP_GET_NUM_TEAMS 
-    #define FTN_GET_TEAM_NUM                     OMP_GET_TEAM_NUM 
-#endif 
-    #define FTN_INIT_LOCK                        OMP_INIT_LOCK 
-#if KMP_USE_DYNAMIC_LOCK 
-    #define FTN_INIT_LOCK_WITH_HINT              OMP_INIT_LOCK_WITH_HINT 
-    #define FTN_INIT_NEST_LOCK_WITH_HINT         OMP_INIT_NEST_LOCK_WITH_HINT 
-#endif 
-    #define FTN_DESTROY_LOCK                     OMP_DESTROY_LOCK 
-    #define FTN_SET_LOCK                         OMP_SET_LOCK 
-    #define FTN_UNSET_LOCK                       OMP_UNSET_LOCK 
-    #define FTN_TEST_LOCK                        OMP_TEST_LOCK 
-    #define FTN_INIT_NEST_LOCK                   OMP_INIT_NEST_LOCK 
-    #define FTN_DESTROY_NEST_LOCK                OMP_DESTROY_NEST_LOCK 
-    #define FTN_SET_NEST_LOCK                    OMP_SET_NEST_LOCK 
-    #define FTN_UNSET_NEST_LOCK                  OMP_UNSET_NEST_LOCK 
-    #define FTN_TEST_NEST_LOCK                   OMP_TEST_NEST_LOCK 
- 
-    #define FTN_SET_WARNINGS_ON                  KMP_SET_WARNINGS_ON 
-    #define FTN_SET_WARNINGS_OFF                 KMP_SET_WARNINGS_OFF 
- 
-    #define FTN_GET_WTIME                        OMP_GET_WTIME 
-    #define FTN_GET_WTICK                        OMP_GET_WTICK 
- 
-#if OMP_40_ENABLED 
-#if KMP_MIC || KMP_OS_DARWIN 
-    #define FTN_GET_DEFAULT_DEVICE               OMP_GET_DEFAULT_DEVICE 
-    #define FTN_SET_DEFAULT_DEVICE               OMP_SET_DEFAULT_DEVICE 
-    #define FTN_GET_NUM_DEVICES                  OMP_GET_NUM_DEVICES 
-#endif 
-    #define FTN_IS_INITIAL_DEVICE                OMP_IS_INITIAL_DEVICE 
-#endif 
- 
- 
-#if OMP_40_ENABLED 
-    #define FTN_GET_CANCELLATION                 OMP_GET_CANCELLATION 
-    #define FTN_GET_CANCELLATION_STATUS          KMP_GET_CANCELLATION_STATUS 
-#endif 
- 
-#endif /* KMP_FTN_UPPER */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_FTN_ENTRIES == KMP_FTN_UAPPEND 
- 
-    #define FTN_SET_STACKSIZE                    KMP_SET_STACKSIZE_ 
-    #define FTN_SET_STACKSIZE_S                  KMP_SET_STACKSIZE_S_ 
-    #define FTN_GET_STACKSIZE                    KMP_GET_STACKSIZE_ 
-    #define FTN_GET_STACKSIZE_S                  KMP_GET_STACKSIZE_S_ 
-    #define FTN_SET_BLOCKTIME                    KMP_SET_BLOCKTIME_ 
-    #define FTN_GET_BLOCKTIME                    KMP_GET_BLOCKTIME_ 
-    #define FTN_SET_LIBRARY_SERIAL               KMP_SET_LIBRARY_SERIAL_ 
-    #define FTN_SET_LIBRARY_TURNAROUND           KMP_SET_LIBRARY_TURNAROUND_ 
-    #define FTN_SET_LIBRARY_THROUGHPUT           KMP_SET_LIBRARY_THROUGHPUT_ 
-    #define FTN_SET_LIBRARY                      KMP_SET_LIBRARY_ 
-    #define FTN_GET_LIBRARY                      KMP_GET_LIBRARY_ 
-    #define FTN_SET_DEFAULTS                     KMP_SET_DEFAULTS_ 
-    #define FTN_SET_AFFINITY                     KMP_SET_AFFINITY_ 
-    #define FTN_GET_AFFINITY                     KMP_GET_AFFINITY_ 
-    #define FTN_GET_AFFINITY_MAX_PROC            KMP_GET_AFFINITY_MAX_PROC_ 
-    #define FTN_CREATE_AFFINITY_MASK             KMP_CREATE_AFFINITY_MASK_ 
-    #define FTN_DESTROY_AFFINITY_MASK            KMP_DESTROY_AFFINITY_MASK_ 
-    #define FTN_SET_AFFINITY_MASK_PROC           KMP_SET_AFFINITY_MASK_PROC_ 
-    #define FTN_UNSET_AFFINITY_MASK_PROC         KMP_UNSET_AFFINITY_MASK_PROC_ 
-    #define FTN_GET_AFFINITY_MASK_PROC           KMP_GET_AFFINITY_MASK_PROC_ 
- 
-    #define FTN_MALLOC                           KMP_MALLOC_ 
-    #define FTN_CALLOC                           KMP_CALLOC_ 
-    #define FTN_REALLOC                          KMP_REALLOC_ 
-    #define FTN_FREE                             KMP_FREE_ 
- 
-    #define FTN_GET_NUM_KNOWN_THREADS            KMP_GET_NUM_KNOWN_THREADS_ 
- 
-    #define FTN_SET_NUM_THREADS                  OMP_SET_NUM_THREADS_ 
-    #define FTN_GET_NUM_THREADS                  OMP_GET_NUM_THREADS_ 
-    #define FTN_GET_MAX_THREADS                  OMP_GET_MAX_THREADS_ 
-    #define FTN_GET_THREAD_NUM                   OMP_GET_THREAD_NUM_ 
-    #define FTN_GET_NUM_PROCS                    OMP_GET_NUM_PROCS_ 
-    #define FTN_SET_DYNAMIC                      OMP_SET_DYNAMIC_ 
-    #define FTN_GET_DYNAMIC                      OMP_GET_DYNAMIC_ 
-    #define FTN_SET_NESTED                       OMP_SET_NESTED_ 
-    #define FTN_GET_NESTED                       OMP_GET_NESTED_ 
-    #define FTN_IN_PARALLEL                      OMP_IN_PARALLEL_ 
-    #define FTN_GET_THREAD_LIMIT                 OMP_GET_THREAD_LIMIT_ 
-    #define FTN_SET_SCHEDULE                     OMP_SET_SCHEDULE_ 
-    #define FTN_GET_SCHEDULE                     OMP_GET_SCHEDULE_ 
-    #define FTN_SET_MAX_ACTIVE_LEVELS            OMP_SET_MAX_ACTIVE_LEVELS_ 
-    #define FTN_GET_MAX_ACTIVE_LEVELS            OMP_GET_MAX_ACTIVE_LEVELS_ 
-    #define FTN_GET_ACTIVE_LEVEL                 OMP_GET_ACTIVE_LEVEL_ 
-    #define FTN_GET_LEVEL                        OMP_GET_LEVEL_ 
-    #define FTN_GET_ANCESTOR_THREAD_NUM          OMP_GET_ANCESTOR_THREAD_NUM_ 
-    #define FTN_GET_TEAM_SIZE                    OMP_GET_TEAM_SIZE_ 
-    #define FTN_IN_FINAL                         OMP_IN_FINAL_ 
-//  #define FTN_SET_PROC_BIND                    OMP_SET_PROC_BIND_ 
-    #define FTN_GET_PROC_BIND                    OMP_GET_PROC_BIND_ 
-//  #define FTN_CURR_PROC_BIND                   OMP_CURR_PROC_BIND_ 
-#if OMP_40_ENABLED 
-    #define FTN_GET_NUM_TEAMS                    OMP_GET_NUM_TEAMS_ 
-    #define FTN_GET_TEAM_NUM                     OMP_GET_TEAM_NUM_ 
-#endif 
-    #define FTN_INIT_LOCK                        OMP_INIT_LOCK_ 
-#if KMP_USE_DYNAMIC_LOCK 
-    #define FTN_INIT_LOCK_WITH_HINT              OMP_INIT_LOCK_WITH_HINT_ 
-    #define FTN_INIT_NEST_LOCK_WITH_HINT         OMP_INIT_NEST_LOCK_WITH_HINT_ 
-#endif 
-    #define FTN_DESTROY_LOCK                     OMP_DESTROY_LOCK_ 
-    #define FTN_SET_LOCK                         OMP_SET_LOCK_ 
-    #define FTN_UNSET_LOCK                       OMP_UNSET_LOCK_ 
-    #define FTN_TEST_LOCK                        OMP_TEST_LOCK_ 
-    #define FTN_INIT_NEST_LOCK                   OMP_INIT_NEST_LOCK_ 
-    #define FTN_DESTROY_NEST_LOCK                OMP_DESTROY_NEST_LOCK_ 
-    #define FTN_SET_NEST_LOCK                    OMP_SET_NEST_LOCK_ 
-    #define FTN_UNSET_NEST_LOCK                  OMP_UNSET_NEST_LOCK_ 
-    #define FTN_TEST_NEST_LOCK                   OMP_TEST_NEST_LOCK_ 
- 
-    #define FTN_SET_WARNINGS_ON                  KMP_SET_WARNINGS_ON_ 
-    #define FTN_SET_WARNINGS_OFF                 KMP_SET_WARNINGS_OFF_ 
- 
-    #define FTN_GET_WTIME                        OMP_GET_WTIME_ 
-    #define FTN_GET_WTICK                        OMP_GET_WTICK_ 
- 
-#if OMP_40_ENABLED 
-#if KMP_MIC || KMP_OS_DARWIN 
-    #define FTN_GET_DEFAULT_DEVICE               OMP_GET_DEFAULT_DEVICE_ 
-    #define FTN_SET_DEFAULT_DEVICE               OMP_SET_DEFAULT_DEVICE_ 
-    #define FTN_GET_NUM_DEVICES                  OMP_GET_NUM_DEVICES_ 
-#endif 
-    #define FTN_IS_INITIAL_DEVICE                OMP_IS_INITIAL_DEVICE_ 
-#endif 
- 
- 
-#if OMP_40_ENABLED 
-    #define FTN_GET_CANCELLATION                 OMP_GET_CANCELLATION_ 
-    #define FTN_GET_CANCELLATION_STATUS          KMP_GET_CANCELLATION_STATUS_ 
-#endif 
- 
-#endif /* KMP_FTN_UAPPEND */ 
- 
-/* ------------------------------------------------------------------ */ 
-/* -------------------------- GOMP API NAMES ------------------------ */ 
-// All GOMP_1.0 symbols 
-#define KMP_API_NAME_GOMP_ATOMIC_END                   GOMP_atomic_end 
-#define KMP_API_NAME_GOMP_ATOMIC_START                 GOMP_atomic_start 
-#define KMP_API_NAME_GOMP_BARRIER                      GOMP_barrier 
-#define KMP_API_NAME_GOMP_CRITICAL_END                 GOMP_critical_end 
-#define KMP_API_NAME_GOMP_CRITICAL_NAME_END            GOMP_critical_name_end 
-#define KMP_API_NAME_GOMP_CRITICAL_NAME_START          GOMP_critical_name_start 
-#define KMP_API_NAME_GOMP_CRITICAL_START               GOMP_critical_start 
-#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT            GOMP_loop_dynamic_next 
-#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START           GOMP_loop_dynamic_start 
-#define KMP_API_NAME_GOMP_LOOP_END                     GOMP_loop_end 
-#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT              GOMP_loop_end_nowait 
-#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT             GOMP_loop_guided_next 
-#define KMP_API_NAME_GOMP_LOOP_GUIDED_START            GOMP_loop_guided_start 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT    GOMP_loop_ordered_dynamic_next 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START   GOMP_loop_ordered_dynamic_start 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT     GOMP_loop_ordered_guided_next 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START    GOMP_loop_ordered_guided_start 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT    GOMP_loop_ordered_runtime_next 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START   GOMP_loop_ordered_runtime_start 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT     GOMP_loop_ordered_static_next 
-#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START    GOMP_loop_ordered_static_start 
-#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT            GOMP_loop_runtime_next 
-#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START           GOMP_loop_runtime_start 
-#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT             GOMP_loop_static_next 
-#define KMP_API_NAME_GOMP_LOOP_STATIC_START            GOMP_loop_static_start 
-#define KMP_API_NAME_GOMP_ORDERED_END                  GOMP_ordered_end 
-#define KMP_API_NAME_GOMP_ORDERED_START                GOMP_ordered_start 
-#define KMP_API_NAME_GOMP_PARALLEL_END                 GOMP_parallel_end 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START  GOMP_parallel_loop_dynamic_start 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START   GOMP_parallel_loop_guided_start 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START  GOMP_parallel_loop_runtime_start 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START   GOMP_parallel_loop_static_start 
-#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START      GOMP_parallel_sections_start 
-#define KMP_API_NAME_GOMP_PARALLEL_START               GOMP_parallel_start 
-#define KMP_API_NAME_GOMP_SECTIONS_END                 GOMP_sections_end 
-#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT          GOMP_sections_end_nowait 
-#define KMP_API_NAME_GOMP_SECTIONS_NEXT                GOMP_sections_next 
-#define KMP_API_NAME_GOMP_SECTIONS_START               GOMP_sections_start 
-#define KMP_API_NAME_GOMP_SINGLE_COPY_END              GOMP_single_copy_end 
-#define KMP_API_NAME_GOMP_SINGLE_COPY_START            GOMP_single_copy_start 
-#define KMP_API_NAME_GOMP_SINGLE_START                 GOMP_single_start 
- 
-// All GOMP_2.0 symbols 
-#define KMP_API_NAME_GOMP_TASK                           GOMP_task 
-#define KMP_API_NAME_GOMP_TASKWAIT                       GOMP_taskwait 
-#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT          GOMP_loop_ull_dynamic_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START         GOMP_loop_ull_dynamic_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT           GOMP_loop_ull_guided_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START          GOMP_loop_ull_guided_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT  GOMP_loop_ull_ordered_dynamic_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT   GOMP_loop_ull_ordered_guided_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START  GOMP_loop_ull_ordered_guided_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT  GOMP_loop_ull_ordered_runtime_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT   GOMP_loop_ull_ordered_static_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START  GOMP_loop_ull_ordered_static_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT          GOMP_loop_ull_runtime_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START         GOMP_loop_ull_runtime_start 
-#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT           GOMP_loop_ull_static_next 
-#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START          GOMP_loop_ull_static_start 
- 
-// All GOMP_3.0 symbols 
-#define KMP_API_NAME_GOMP_TASKYIELD                      GOMP_taskyield 
- 
-// All GOMP_4.0 symbols  
-// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libomp 
-#define KMP_API_NAME_GOMP_BARRIER_CANCEL                 GOMP_barrier_cancel 
-#define KMP_API_NAME_GOMP_CANCEL                         GOMP_cancel 
-#define KMP_API_NAME_GOMP_CANCELLATION_POINT             GOMP_cancellation_point 
-#define KMP_API_NAME_GOMP_LOOP_END_CANCEL                GOMP_loop_end_cancel 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC          GOMP_parallel_loop_dynamic 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED           GOMP_parallel_loop_guided 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME          GOMP_parallel_loop_runtime 
-#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC           GOMP_parallel_loop_static 
-#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS              GOMP_parallel_sections 
-#define KMP_API_NAME_GOMP_PARALLEL                       GOMP_parallel 
-#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL            GOMP_sections_end_cancel 
-#define KMP_API_NAME_GOMP_TASKGROUP_START                GOMP_taskgroup_start 
-#define KMP_API_NAME_GOMP_TASKGROUP_END                  GOMP_taskgroup_end 
-/* Target functions should be taken care of by liboffload */ 
-#define KMP_API_NAME_GOMP_TARGET                         GOMP_target 
-#define KMP_API_NAME_GOMP_TARGET_DATA                    GOMP_target_data 
-#define KMP_API_NAME_GOMP_TARGET_END_DATA                GOMP_target_end_data 
-#define KMP_API_NAME_GOMP_TARGET_UPDATE                  GOMP_target_update 
-#define KMP_API_NAME_GOMP_TEAMS                          GOMP_teams 
- 
-#ifdef KMP_USE_VERSION_SYMBOLS 
-    #define xstr(x) str(x) 
-    #define str(x) #x 
- 
-    // If Linux, xexpand prepends __kmp_api_ to the real API name 
-    #define xexpand(api_name) expand(api_name) 
-    #define expand(api_name) __kmp_api_##api_name 
- 
-    #define xaliasify(api_name,ver) aliasify(api_name,ver) 
-    #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name)))) 
- 
-    #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION") 
-    #define versionify(api_name, version_num, version_str, default_ver) \ 
-    __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \ 
-    __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t") 
- 
-#else // KMP_USE_VERSION_SYMBOLS 
-    #define xstr(x) /* Nothing */ 
-    #define str(x)  /* Nothing */ 
- 
-    // if Windows or Mac, xexpand does no name transformation 
-    #define xexpand(api_name) expand(api_name) 
-    #define expand(api_name) api_name 
- 
-    #define xaliasify(api_name,ver) /* Nothing */ 
-    #define aliasify(api_name,ver)  /* Nothing */ 
- 
-    #define xversionify(api_name, version_num, version_str) /* Nothing */ 
-    #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */ 
- 
-#endif // KMP_USE_VERSION_SYMBOLS 
- 
-#endif /* KMP_FTN_OS_H */ 
- 
+/*
+ * kmp_ftn_os.h -- KPTS Fortran defines header file.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_FTN_OS_H
+#define KMP_FTN_OS_H
+
+// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, KMP_FTN_UAPPEND.
+
+
+/* -------------------------- External definitions ------------------------ */
+
+#if KMP_FTN_ENTRIES == KMP_FTN_PLAIN
+
+    #define FTN_SET_STACKSIZE                    kmp_set_stacksize
+    #define FTN_SET_STACKSIZE_S                  kmp_set_stacksize_s
+    #define FTN_GET_STACKSIZE                    kmp_get_stacksize
+    #define FTN_GET_STACKSIZE_S                  kmp_get_stacksize_s
+    #define FTN_SET_BLOCKTIME                    kmp_set_blocktime
+    #define FTN_GET_BLOCKTIME                    kmp_get_blocktime
+    #define FTN_SET_LIBRARY_SERIAL               kmp_set_library_serial
+    #define FTN_SET_LIBRARY_TURNAROUND           kmp_set_library_turnaround
+    #define FTN_SET_LIBRARY_THROUGHPUT           kmp_set_library_throughput
+    #define FTN_SET_LIBRARY                      kmp_set_library
+    #define FTN_GET_LIBRARY                      kmp_get_library
+    #define FTN_SET_DEFAULTS                     kmp_set_defaults
+    #define FTN_SET_AFFINITY                     kmp_set_affinity
+    #define FTN_GET_AFFINITY                     kmp_get_affinity
+    #define FTN_GET_AFFINITY_MAX_PROC            kmp_get_affinity_max_proc
+    #define FTN_CREATE_AFFINITY_MASK             kmp_create_affinity_mask
+    #define FTN_DESTROY_AFFINITY_MASK            kmp_destroy_affinity_mask
+    #define FTN_SET_AFFINITY_MASK_PROC           kmp_set_affinity_mask_proc
+    #define FTN_UNSET_AFFINITY_MASK_PROC         kmp_unset_affinity_mask_proc
+    #define FTN_GET_AFFINITY_MASK_PROC           kmp_get_affinity_mask_proc
+
+    #define FTN_MALLOC                           kmp_malloc
+    #define FTN_CALLOC                           kmp_calloc
+    #define FTN_REALLOC                          kmp_realloc
+    #define FTN_FREE                             kmp_free
+
+    #define FTN_GET_NUM_KNOWN_THREADS            kmp_get_num_known_threads
+
+    #define FTN_SET_NUM_THREADS                  omp_set_num_threads
+    #define FTN_GET_NUM_THREADS                  omp_get_num_threads
+    #define FTN_GET_MAX_THREADS                  omp_get_max_threads
+    #define FTN_GET_THREAD_NUM                   omp_get_thread_num
+    #define FTN_GET_NUM_PROCS                    omp_get_num_procs
+    #define FTN_SET_DYNAMIC                      omp_set_dynamic
+    #define FTN_GET_DYNAMIC                      omp_get_dynamic
+    #define FTN_SET_NESTED                       omp_set_nested
+    #define FTN_GET_NESTED                       omp_get_nested
+    #define FTN_IN_PARALLEL                      omp_in_parallel
+    #define FTN_GET_THREAD_LIMIT                 omp_get_thread_limit
+    #define FTN_SET_SCHEDULE                     omp_set_schedule
+    #define FTN_GET_SCHEDULE                     omp_get_schedule
+    #define FTN_SET_MAX_ACTIVE_LEVELS            omp_set_max_active_levels
+    #define FTN_GET_MAX_ACTIVE_LEVELS            omp_get_max_active_levels
+    #define FTN_GET_ACTIVE_LEVEL                 omp_get_active_level
+    #define FTN_GET_LEVEL                        omp_get_level
+    #define FTN_GET_ANCESTOR_THREAD_NUM          omp_get_ancestor_thread_num
+    #define FTN_GET_TEAM_SIZE                    omp_get_team_size
+    #define FTN_IN_FINAL                         omp_in_final
+//  #define FTN_SET_PROC_BIND                    omp_set_proc_bind
+    #define FTN_GET_PROC_BIND                    omp_get_proc_bind
+//  #define FTN_CURR_PROC_BIND                   omp_curr_proc_bind
+#if OMP_40_ENABLED
+    #define FTN_GET_NUM_TEAMS                    omp_get_num_teams
+    #define FTN_GET_TEAM_NUM                     omp_get_team_num
+#endif
+    #define FTN_INIT_LOCK                        omp_init_lock
+#if KMP_USE_DYNAMIC_LOCK
+    #define FTN_INIT_LOCK_WITH_HINT              omp_init_lock_with_hint
+    #define FTN_INIT_NEST_LOCK_WITH_HINT         omp_init_nest_lock_with_hint
+#endif
+    #define FTN_DESTROY_LOCK                     omp_destroy_lock
+    #define FTN_SET_LOCK                         omp_set_lock
+    #define FTN_UNSET_LOCK                       omp_unset_lock
+    #define FTN_TEST_LOCK                        omp_test_lock
+    #define FTN_INIT_NEST_LOCK                   omp_init_nest_lock
+    #define FTN_DESTROY_NEST_LOCK                omp_destroy_nest_lock
+    #define FTN_SET_NEST_LOCK                    omp_set_nest_lock
+    #define FTN_UNSET_NEST_LOCK                  omp_unset_nest_lock
+    #define FTN_TEST_NEST_LOCK                   omp_test_nest_lock
+
+    #define FTN_SET_WARNINGS_ON                  kmp_set_warnings_on
+    #define FTN_SET_WARNINGS_OFF                 kmp_set_warnings_off
+
+    #define FTN_GET_WTIME                        omp_get_wtime
+    #define FTN_GET_WTICK                        omp_get_wtick
+
+#if OMP_40_ENABLED
+#if KMP_MIC || KMP_OS_DARWIN
+    #define FTN_GET_DEFAULT_DEVICE               omp_get_default_device
+    #define FTN_SET_DEFAULT_DEVICE               omp_set_default_device
+    #define FTN_GET_NUM_DEVICES                  omp_get_num_devices
+#endif
+    #define FTN_IS_INITIAL_DEVICE                omp_is_initial_device
+#endif
+
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 omp_get_cancellation
+    #define FTN_GET_CANCELLATION_STATUS          kmp_get_cancellation_status
+#endif
+
+#endif /* KMP_FTN_PLAIN */
+
+/* ------------------------------------------------------------------------ */
+
+#if KMP_FTN_ENTRIES == KMP_FTN_APPEND
+
+    #define FTN_SET_STACKSIZE                    kmp_set_stacksize_
+    #define FTN_SET_STACKSIZE_S                  kmp_set_stacksize_s_
+    #define FTN_GET_STACKSIZE                    kmp_get_stacksize_
+    #define FTN_GET_STACKSIZE_S                  kmp_get_stacksize_s_
+    #define FTN_SET_BLOCKTIME                    kmp_set_blocktime_
+    #define FTN_GET_BLOCKTIME                    kmp_get_blocktime_
+    #define FTN_SET_LIBRARY_SERIAL               kmp_set_library_serial_
+    #define FTN_SET_LIBRARY_TURNAROUND           kmp_set_library_turnaround_
+    #define FTN_SET_LIBRARY_THROUGHPUT           kmp_set_library_throughput_
+    #define FTN_SET_LIBRARY                      kmp_set_library_
+    #define FTN_GET_LIBRARY                      kmp_get_library_
+    #define FTN_SET_DEFAULTS                     kmp_set_defaults_
+    #define FTN_SET_AFFINITY                     kmp_set_affinity_
+    #define FTN_GET_AFFINITY                     kmp_get_affinity_
+    #define FTN_GET_AFFINITY_MAX_PROC            kmp_get_affinity_max_proc_
+    #define FTN_CREATE_AFFINITY_MASK             kmp_create_affinity_mask_
+    #define FTN_DESTROY_AFFINITY_MASK            kmp_destroy_affinity_mask_
+    #define FTN_SET_AFFINITY_MASK_PROC           kmp_set_affinity_mask_proc_
+    #define FTN_UNSET_AFFINITY_MASK_PROC         kmp_unset_affinity_mask_proc_
+    #define FTN_GET_AFFINITY_MASK_PROC           kmp_get_affinity_mask_proc_
+
+    #define FTN_MALLOC                           kmp_malloc_
+    #define FTN_CALLOC                           kmp_calloc_
+    #define FTN_REALLOC                          kmp_realloc_
+    #define FTN_FREE                             kmp_free_
+
+    #define FTN_GET_NUM_KNOWN_THREADS            kmp_get_num_known_threads_
+
+    #define FTN_SET_NUM_THREADS                  omp_set_num_threads_
+    #define FTN_GET_NUM_THREADS                  omp_get_num_threads_
+    #define FTN_GET_MAX_THREADS                  omp_get_max_threads_
+    #define FTN_GET_THREAD_NUM                   omp_get_thread_num_
+    #define FTN_GET_NUM_PROCS                    omp_get_num_procs_
+    #define FTN_SET_DYNAMIC                      omp_set_dynamic_
+    #define FTN_GET_DYNAMIC                      omp_get_dynamic_
+    #define FTN_SET_NESTED                       omp_set_nested_
+    #define FTN_GET_NESTED                       omp_get_nested_
+    #define FTN_IN_PARALLEL                      omp_in_parallel_
+    #define FTN_GET_THREAD_LIMIT                 omp_get_thread_limit_
+    #define FTN_SET_SCHEDULE                     omp_set_schedule_
+    #define FTN_GET_SCHEDULE                     omp_get_schedule_
+    #define FTN_SET_MAX_ACTIVE_LEVELS            omp_set_max_active_levels_
+    #define FTN_GET_MAX_ACTIVE_LEVELS            omp_get_max_active_levels_
+    #define FTN_GET_ACTIVE_LEVEL                 omp_get_active_level_
+    #define FTN_GET_LEVEL                        omp_get_level_
+    #define FTN_GET_ANCESTOR_THREAD_NUM          omp_get_ancestor_thread_num_
+    #define FTN_GET_TEAM_SIZE                    omp_get_team_size_
+    #define FTN_IN_FINAL                         omp_in_final_
+//  #define FTN_SET_PROC_BIND                    omp_set_proc_bind_
+    #define FTN_GET_PROC_BIND                    omp_get_proc_bind_
+//  #define FTN_CURR_PROC_BIND                   omp_curr_proc_bind_
+#if OMP_40_ENABLED
+    #define FTN_GET_NUM_TEAMS                    omp_get_num_teams_
+    #define FTN_GET_TEAM_NUM                     omp_get_team_num_
+#endif
+    #define FTN_INIT_LOCK                        omp_init_lock_
+#if KMP_USE_DYNAMIC_LOCK
+    #define FTN_INIT_LOCK_WITH_HINT              omp_init_lock_with_hint_
+    #define FTN_INIT_NEST_LOCK_WITH_HINT         omp_init_nest_lock_with_hint_
+#endif
+    #define FTN_DESTROY_LOCK                     omp_destroy_lock_
+    #define FTN_SET_LOCK                         omp_set_lock_
+    #define FTN_UNSET_LOCK                       omp_unset_lock_
+    #define FTN_TEST_LOCK                        omp_test_lock_
+    #define FTN_INIT_NEST_LOCK                   omp_init_nest_lock_
+    #define FTN_DESTROY_NEST_LOCK                omp_destroy_nest_lock_
+    #define FTN_SET_NEST_LOCK                    omp_set_nest_lock_
+    #define FTN_UNSET_NEST_LOCK                  omp_unset_nest_lock_
+    #define FTN_TEST_NEST_LOCK                   omp_test_nest_lock_
+
+    #define FTN_SET_WARNINGS_ON                  kmp_set_warnings_on_
+    #define FTN_SET_WARNINGS_OFF                 kmp_set_warnings_off_
+
+    #define FTN_GET_WTIME                        omp_get_wtime_
+    #define FTN_GET_WTICK                        omp_get_wtick_
+
+#if OMP_40_ENABLED
+#if KMP_MIC || KMP_OS_DARWIN
+    #define FTN_GET_DEFAULT_DEVICE               omp_get_default_device_
+    #define FTN_SET_DEFAULT_DEVICE               omp_set_default_device_
+    #define FTN_GET_NUM_DEVICES                  omp_get_num_devices_
+#endif
+    #define FTN_IS_INITIAL_DEVICE                omp_is_initial_device_
+#endif
+
+
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 omp_get_cancellation_
+    #define FTN_GET_CANCELLATION_STATUS          kmp_get_cancellation_status_
+#endif
+
+#endif /* KMP_FTN_APPEND */
+
+/* ------------------------------------------------------------------------ */
+
+#if KMP_FTN_ENTRIES == KMP_FTN_UPPER
+
+    #define FTN_SET_STACKSIZE                    KMP_SET_STACKSIZE
+    #define FTN_SET_STACKSIZE_S                  KMP_SET_STACKSIZE_S
+    #define FTN_GET_STACKSIZE                    KMP_GET_STACKSIZE
+    #define FTN_GET_STACKSIZE_S                  KMP_GET_STACKSIZE_S
+    #define FTN_SET_BLOCKTIME                    KMP_SET_BLOCKTIME
+    #define FTN_GET_BLOCKTIME                    KMP_GET_BLOCKTIME
+    #define FTN_SET_LIBRARY_SERIAL               KMP_SET_LIBRARY_SERIAL
+    #define FTN_SET_LIBRARY_TURNAROUND           KMP_SET_LIBRARY_TURNAROUND
+    #define FTN_SET_LIBRARY_THROUGHPUT           KMP_SET_LIBRARY_THROUGHPUT
+    #define FTN_SET_LIBRARY                      KMP_SET_LIBRARY
+    #define FTN_GET_LIBRARY                      KMP_GET_LIBRARY
+    #define FTN_SET_DEFAULTS                     KMP_SET_DEFAULTS
+    #define FTN_SET_AFFINITY                     KMP_SET_AFFINITY
+    #define FTN_GET_AFFINITY                     KMP_GET_AFFINITY
+    #define FTN_GET_AFFINITY_MAX_PROC            KMP_GET_AFFINITY_MAX_PROC
+    #define FTN_CREATE_AFFINITY_MASK             KMP_CREATE_AFFINITY_MASK
+    #define FTN_DESTROY_AFFINITY_MASK            KMP_DESTROY_AFFINITY_MASK
+    #define FTN_SET_AFFINITY_MASK_PROC           KMP_SET_AFFINITY_MASK_PROC
+    #define FTN_UNSET_AFFINITY_MASK_PROC         KMP_UNSET_AFFINITY_MASK_PROC
+    #define FTN_GET_AFFINITY_MASK_PROC           KMP_GET_AFFINITY_MASK_PROC
+
+    #define FTN_MALLOC                           KMP_MALLOC
+    #define FTN_CALLOC                           KMP_CALLOC
+    #define FTN_REALLOC                          KMP_REALLOC
+    #define FTN_FREE                             KMP_FREE
+
+    #define FTN_GET_NUM_KNOWN_THREADS            KMP_GET_NUM_KNOWN_THREADS
+
+    #define FTN_SET_NUM_THREADS                  OMP_SET_NUM_THREADS
+    #define FTN_GET_NUM_THREADS                  OMP_GET_NUM_THREADS
+    #define FTN_GET_MAX_THREADS                  OMP_GET_MAX_THREADS
+    #define FTN_GET_THREAD_NUM                   OMP_GET_THREAD_NUM
+    #define FTN_GET_NUM_PROCS                    OMP_GET_NUM_PROCS
+    #define FTN_SET_DYNAMIC                      OMP_SET_DYNAMIC
+    #define FTN_GET_DYNAMIC                      OMP_GET_DYNAMIC
+    #define FTN_SET_NESTED                       OMP_SET_NESTED
+    #define FTN_GET_NESTED                       OMP_GET_NESTED
+    #define FTN_IN_PARALLEL                      OMP_IN_PARALLEL
+    #define FTN_GET_THREAD_LIMIT                 OMP_GET_THREAD_LIMIT
+    #define FTN_SET_SCHEDULE                     OMP_SET_SCHEDULE
+    #define FTN_GET_SCHEDULE                     OMP_GET_SCHEDULE
+    #define FTN_SET_MAX_ACTIVE_LEVELS            OMP_SET_MAX_ACTIVE_LEVELS
+    #define FTN_GET_MAX_ACTIVE_LEVELS            OMP_GET_MAX_ACTIVE_LEVELS
+    #define FTN_GET_ACTIVE_LEVEL                 OMP_GET_ACTIVE_LEVEL
+    #define FTN_GET_LEVEL                        OMP_GET_LEVEL
+    #define FTN_GET_ANCESTOR_THREAD_NUM          OMP_GET_ANCESTOR_THREAD_NUM
+    #define FTN_GET_TEAM_SIZE                    OMP_GET_TEAM_SIZE
+    #define FTN_IN_FINAL                         OMP_IN_FINAL
+//  #define FTN_SET_PROC_BIND                    OMP_SET_PROC_BIND
+    #define FTN_GET_PROC_BIND                    OMP_GET_PROC_BIND
+//  #define FTN_CURR_PROC_BIND                   OMP_CURR_PROC_BIND
+#if OMP_40_ENABLED
+    #define FTN_GET_NUM_TEAMS                    OMP_GET_NUM_TEAMS
+    #define FTN_GET_TEAM_NUM                     OMP_GET_TEAM_NUM
+#endif
+    #define FTN_INIT_LOCK                        OMP_INIT_LOCK
+#if KMP_USE_DYNAMIC_LOCK
+    #define FTN_INIT_LOCK_WITH_HINT              OMP_INIT_LOCK_WITH_HINT
+    #define FTN_INIT_NEST_LOCK_WITH_HINT         OMP_INIT_NEST_LOCK_WITH_HINT
+#endif
+    #define FTN_DESTROY_LOCK                     OMP_DESTROY_LOCK
+    #define FTN_SET_LOCK                         OMP_SET_LOCK
+    #define FTN_UNSET_LOCK                       OMP_UNSET_LOCK
+    #define FTN_TEST_LOCK                        OMP_TEST_LOCK
+    #define FTN_INIT_NEST_LOCK                   OMP_INIT_NEST_LOCK
+    #define FTN_DESTROY_NEST_LOCK                OMP_DESTROY_NEST_LOCK
+    #define FTN_SET_NEST_LOCK                    OMP_SET_NEST_LOCK
+    #define FTN_UNSET_NEST_LOCK                  OMP_UNSET_NEST_LOCK
+    #define FTN_TEST_NEST_LOCK                   OMP_TEST_NEST_LOCK
+
+    #define FTN_SET_WARNINGS_ON                  KMP_SET_WARNINGS_ON
+    #define FTN_SET_WARNINGS_OFF                 KMP_SET_WARNINGS_OFF
+
+    #define FTN_GET_WTIME                        OMP_GET_WTIME
+    #define FTN_GET_WTICK                        OMP_GET_WTICK
+
+#if OMP_40_ENABLED
+#if KMP_MIC || KMP_OS_DARWIN
+    #define FTN_GET_DEFAULT_DEVICE               OMP_GET_DEFAULT_DEVICE
+    #define FTN_SET_DEFAULT_DEVICE               OMP_SET_DEFAULT_DEVICE
+    #define FTN_GET_NUM_DEVICES                  OMP_GET_NUM_DEVICES
+#endif
+    #define FTN_IS_INITIAL_DEVICE                OMP_IS_INITIAL_DEVICE
+#endif
+
+
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 OMP_GET_CANCELLATION
+    #define FTN_GET_CANCELLATION_STATUS          KMP_GET_CANCELLATION_STATUS
+#endif
+
+#endif /* KMP_FTN_UPPER */
+
+/* ------------------------------------------------------------------------ */
+
+#if KMP_FTN_ENTRIES == KMP_FTN_UAPPEND
+
+    #define FTN_SET_STACKSIZE                    KMP_SET_STACKSIZE_
+    #define FTN_SET_STACKSIZE_S                  KMP_SET_STACKSIZE_S_
+    #define FTN_GET_STACKSIZE                    KMP_GET_STACKSIZE_
+    #define FTN_GET_STACKSIZE_S                  KMP_GET_STACKSIZE_S_
+    #define FTN_SET_BLOCKTIME                    KMP_SET_BLOCKTIME_
+    #define FTN_GET_BLOCKTIME                    KMP_GET_BLOCKTIME_
+    #define FTN_SET_LIBRARY_SERIAL               KMP_SET_LIBRARY_SERIAL_
+    #define FTN_SET_LIBRARY_TURNAROUND           KMP_SET_LIBRARY_TURNAROUND_
+    #define FTN_SET_LIBRARY_THROUGHPUT           KMP_SET_LIBRARY_THROUGHPUT_
+    #define FTN_SET_LIBRARY                      KMP_SET_LIBRARY_
+    #define FTN_GET_LIBRARY                      KMP_GET_LIBRARY_
+    #define FTN_SET_DEFAULTS                     KMP_SET_DEFAULTS_
+    #define FTN_SET_AFFINITY                     KMP_SET_AFFINITY_
+    #define FTN_GET_AFFINITY                     KMP_GET_AFFINITY_
+    #define FTN_GET_AFFINITY_MAX_PROC            KMP_GET_AFFINITY_MAX_PROC_
+    #define FTN_CREATE_AFFINITY_MASK             KMP_CREATE_AFFINITY_MASK_
+    #define FTN_DESTROY_AFFINITY_MASK            KMP_DESTROY_AFFINITY_MASK_
+    #define FTN_SET_AFFINITY_MASK_PROC           KMP_SET_AFFINITY_MASK_PROC_
+    #define FTN_UNSET_AFFINITY_MASK_PROC         KMP_UNSET_AFFINITY_MASK_PROC_
+    #define FTN_GET_AFFINITY_MASK_PROC           KMP_GET_AFFINITY_MASK_PROC_
+
+    #define FTN_MALLOC                           KMP_MALLOC_
+    #define FTN_CALLOC                           KMP_CALLOC_
+    #define FTN_REALLOC                          KMP_REALLOC_
+    #define FTN_FREE                             KMP_FREE_
+
+    #define FTN_GET_NUM_KNOWN_THREADS            KMP_GET_NUM_KNOWN_THREADS_
+
+    #define FTN_SET_NUM_THREADS                  OMP_SET_NUM_THREADS_
+    #define FTN_GET_NUM_THREADS                  OMP_GET_NUM_THREADS_
+    #define FTN_GET_MAX_THREADS                  OMP_GET_MAX_THREADS_
+    #define FTN_GET_THREAD_NUM                   OMP_GET_THREAD_NUM_
+    #define FTN_GET_NUM_PROCS                    OMP_GET_NUM_PROCS_
+    #define FTN_SET_DYNAMIC                      OMP_SET_DYNAMIC_
+    #define FTN_GET_DYNAMIC                      OMP_GET_DYNAMIC_
+    #define FTN_SET_NESTED                       OMP_SET_NESTED_
+    #define FTN_GET_NESTED                       OMP_GET_NESTED_
+    #define FTN_IN_PARALLEL                      OMP_IN_PARALLEL_
+    #define FTN_GET_THREAD_LIMIT                 OMP_GET_THREAD_LIMIT_
+    #define FTN_SET_SCHEDULE                     OMP_SET_SCHEDULE_
+    #define FTN_GET_SCHEDULE                     OMP_GET_SCHEDULE_
+    #define FTN_SET_MAX_ACTIVE_LEVELS            OMP_SET_MAX_ACTIVE_LEVELS_
+    #define FTN_GET_MAX_ACTIVE_LEVELS            OMP_GET_MAX_ACTIVE_LEVELS_
+    #define FTN_GET_ACTIVE_LEVEL                 OMP_GET_ACTIVE_LEVEL_
+    #define FTN_GET_LEVEL                        OMP_GET_LEVEL_
+    #define FTN_GET_ANCESTOR_THREAD_NUM          OMP_GET_ANCESTOR_THREAD_NUM_
+    #define FTN_GET_TEAM_SIZE                    OMP_GET_TEAM_SIZE_
+    #define FTN_IN_FINAL                         OMP_IN_FINAL_
+//  #define FTN_SET_PROC_BIND                    OMP_SET_PROC_BIND_
+    #define FTN_GET_PROC_BIND                    OMP_GET_PROC_BIND_
+//  #define FTN_CURR_PROC_BIND                   OMP_CURR_PROC_BIND_
+#if OMP_40_ENABLED
+    #define FTN_GET_NUM_TEAMS                    OMP_GET_NUM_TEAMS_
+    #define FTN_GET_TEAM_NUM                     OMP_GET_TEAM_NUM_
+#endif
+    #define FTN_INIT_LOCK                        OMP_INIT_LOCK_
+#if KMP_USE_DYNAMIC_LOCK
+    #define FTN_INIT_LOCK_WITH_HINT              OMP_INIT_LOCK_WITH_HINT_
+    #define FTN_INIT_NEST_LOCK_WITH_HINT         OMP_INIT_NEST_LOCK_WITH_HINT_
+#endif
+    #define FTN_DESTROY_LOCK                     OMP_DESTROY_LOCK_
+    #define FTN_SET_LOCK                         OMP_SET_LOCK_
+    #define FTN_UNSET_LOCK                       OMP_UNSET_LOCK_
+    #define FTN_TEST_LOCK                        OMP_TEST_LOCK_
+    #define FTN_INIT_NEST_LOCK                   OMP_INIT_NEST_LOCK_
+    #define FTN_DESTROY_NEST_LOCK                OMP_DESTROY_NEST_LOCK_
+    #define FTN_SET_NEST_LOCK                    OMP_SET_NEST_LOCK_
+    #define FTN_UNSET_NEST_LOCK                  OMP_UNSET_NEST_LOCK_
+    #define FTN_TEST_NEST_LOCK                   OMP_TEST_NEST_LOCK_
+
+    #define FTN_SET_WARNINGS_ON                  KMP_SET_WARNINGS_ON_
+    #define FTN_SET_WARNINGS_OFF                 KMP_SET_WARNINGS_OFF_
+
+    #define FTN_GET_WTIME                        OMP_GET_WTIME_
+    #define FTN_GET_WTICK                        OMP_GET_WTICK_
+
+#if OMP_40_ENABLED
+#if KMP_MIC || KMP_OS_DARWIN
+    #define FTN_GET_DEFAULT_DEVICE               OMP_GET_DEFAULT_DEVICE_
+    #define FTN_SET_DEFAULT_DEVICE               OMP_SET_DEFAULT_DEVICE_
+    #define FTN_GET_NUM_DEVICES                  OMP_GET_NUM_DEVICES_
+#endif
+    #define FTN_IS_INITIAL_DEVICE                OMP_IS_INITIAL_DEVICE_
+#endif
+
+
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 OMP_GET_CANCELLATION_
+    #define FTN_GET_CANCELLATION_STATUS          KMP_GET_CANCELLATION_STATUS_
+#endif
+
+#endif /* KMP_FTN_UAPPEND */
+
+/* ------------------------------------------------------------------ */
+/* -------------------------- GOMP API NAMES ------------------------ */
+// All GOMP_1.0 symbols
+#define KMP_API_NAME_GOMP_ATOMIC_END                   GOMP_atomic_end
+#define KMP_API_NAME_GOMP_ATOMIC_START                 GOMP_atomic_start
+#define KMP_API_NAME_GOMP_BARRIER                      GOMP_barrier
+#define KMP_API_NAME_GOMP_CRITICAL_END                 GOMP_critical_end
+#define KMP_API_NAME_GOMP_CRITICAL_NAME_END            GOMP_critical_name_end
+#define KMP_API_NAME_GOMP_CRITICAL_NAME_START          GOMP_critical_name_start
+#define KMP_API_NAME_GOMP_CRITICAL_START               GOMP_critical_start
+#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT            GOMP_loop_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START           GOMP_loop_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_END                     GOMP_loop_end
+#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT              GOMP_loop_end_nowait
+#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT             GOMP_loop_guided_next
+#define KMP_API_NAME_GOMP_LOOP_GUIDED_START            GOMP_loop_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT    GOMP_loop_ordered_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START   GOMP_loop_ordered_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT     GOMP_loop_ordered_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START    GOMP_loop_ordered_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT    GOMP_loop_ordered_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START   GOMP_loop_ordered_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT     GOMP_loop_ordered_static_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START    GOMP_loop_ordered_static_start
+#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT            GOMP_loop_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START           GOMP_loop_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT             GOMP_loop_static_next
+#define KMP_API_NAME_GOMP_LOOP_STATIC_START            GOMP_loop_static_start
+#define KMP_API_NAME_GOMP_ORDERED_END                  GOMP_ordered_end
+#define KMP_API_NAME_GOMP_ORDERED_START                GOMP_ordered_start
+#define KMP_API_NAME_GOMP_PARALLEL_END                 GOMP_parallel_end
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START  GOMP_parallel_loop_dynamic_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START   GOMP_parallel_loop_guided_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START  GOMP_parallel_loop_runtime_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START   GOMP_parallel_loop_static_start
+#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START      GOMP_parallel_sections_start
+#define KMP_API_NAME_GOMP_PARALLEL_START               GOMP_parallel_start
+#define KMP_API_NAME_GOMP_SECTIONS_END                 GOMP_sections_end
+#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT          GOMP_sections_end_nowait
+#define KMP_API_NAME_GOMP_SECTIONS_NEXT                GOMP_sections_next
+#define KMP_API_NAME_GOMP_SECTIONS_START               GOMP_sections_start
+#define KMP_API_NAME_GOMP_SINGLE_COPY_END              GOMP_single_copy_end
+#define KMP_API_NAME_GOMP_SINGLE_COPY_START            GOMP_single_copy_start
+#define KMP_API_NAME_GOMP_SINGLE_START                 GOMP_single_start
+
+// All GOMP_2.0 symbols
+#define KMP_API_NAME_GOMP_TASK                           GOMP_task
+#define KMP_API_NAME_GOMP_TASKWAIT                       GOMP_taskwait
+#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT          GOMP_loop_ull_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START         GOMP_loop_ull_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT           GOMP_loop_ull_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START          GOMP_loop_ull_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT  GOMP_loop_ull_ordered_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT   GOMP_loop_ull_ordered_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START  GOMP_loop_ull_ordered_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT  GOMP_loop_ull_ordered_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT   GOMP_loop_ull_ordered_static_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START  GOMP_loop_ull_ordered_static_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT          GOMP_loop_ull_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START         GOMP_loop_ull_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT           GOMP_loop_ull_static_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START          GOMP_loop_ull_static_start
+
+// All GOMP_3.0 symbols
+#define KMP_API_NAME_GOMP_TASKYIELD                      GOMP_taskyield
+
+// All GOMP_4.0 symbols 
+// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libomp
+#define KMP_API_NAME_GOMP_BARRIER_CANCEL                 GOMP_barrier_cancel
+#define KMP_API_NAME_GOMP_CANCEL                         GOMP_cancel
+#define KMP_API_NAME_GOMP_CANCELLATION_POINT             GOMP_cancellation_point
+#define KMP_API_NAME_GOMP_LOOP_END_CANCEL                GOMP_loop_end_cancel
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC          GOMP_parallel_loop_dynamic
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED           GOMP_parallel_loop_guided
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME          GOMP_parallel_loop_runtime
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC           GOMP_parallel_loop_static
+#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS              GOMP_parallel_sections
+#define KMP_API_NAME_GOMP_PARALLEL                       GOMP_parallel
+#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL            GOMP_sections_end_cancel
+#define KMP_API_NAME_GOMP_TASKGROUP_START                GOMP_taskgroup_start
+#define KMP_API_NAME_GOMP_TASKGROUP_END                  GOMP_taskgroup_end
+/* Target functions should be taken care of by liboffload */
+#define KMP_API_NAME_GOMP_TARGET                         GOMP_target
+#define KMP_API_NAME_GOMP_TARGET_DATA                    GOMP_target_data
+#define KMP_API_NAME_GOMP_TARGET_END_DATA                GOMP_target_end_data
+#define KMP_API_NAME_GOMP_TARGET_UPDATE                  GOMP_target_update
+#define KMP_API_NAME_GOMP_TEAMS                          GOMP_teams
+
+#ifdef KMP_USE_VERSION_SYMBOLS
+    #define xstr(x) str(x)
+    #define str(x) #x
+
+    // If Linux, xexpand prepends __kmp_api_ to the real API name
+    #define xexpand(api_name) expand(api_name)
+    #define expand(api_name) __kmp_api_##api_name
+
+    #define xaliasify(api_name,ver) aliasify(api_name,ver)
+    #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name))))
+
+    #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION")
+    #define versionify(api_name, version_num, version_str, default_ver) \
+    __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \
+    __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t")
+
+#else // KMP_USE_VERSION_SYMBOLS
+    #define xstr(x) /* Nothing */
+    #define str(x)  /* Nothing */
+
+    // if Windows or Mac, xexpand does no name transformation
+    #define xexpand(api_name) expand(api_name)
+    #define expand(api_name) api_name
+
+    #define xaliasify(api_name,ver) /* Nothing */
+    #define aliasify(api_name,ver)  /* Nothing */
+
+    #define xversionify(api_name, version_num, version_str) /* Nothing */
+    #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */
+
+#endif // KMP_USE_VERSION_SYMBOLS
+
+#endif /* KMP_FTN_OS_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c b/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c
index 2083eced2e..cf70d74af5 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_ftn_stdcall.c
@@ -1,35 +1,35 @@
-/* 
- * kmp_ftn_stdcall.c -- Fortran __stdcall linkage support for OpenMP. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
- 
-// Note: This string is not printed when KMP_VERSION=1. 
-char const __kmp_version_ftnstdcall[] = KMP_VERSION_PREFIX "Fortran __stdcall OMP support: " 
-#ifdef USE_FTN_STDCALL 
-    "yes"; 
-#else 
-    "no"; 
-#endif 
- 
-#ifdef USE_FTN_STDCALL 
- 
-#define FTN_STDCALL 	KMP_STDCALL 
-#define KMP_FTN_ENTRIES	USE_FTN_STDCALL 
- 
-#include "kmp_ftn_os.h" 
-#include "kmp_ftn_entry.h" 
- 
-#endif /* USE_FTN_STDCALL */ 
- 
+/*
+ * kmp_ftn_stdcall.c -- Fortran __stdcall linkage support for OpenMP.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftnstdcall[] = KMP_VERSION_PREFIX "Fortran __stdcall OMP support: "
+#ifdef USE_FTN_STDCALL
+    "yes";
+#else
+    "no";
+#endif
+
+#ifdef USE_FTN_STDCALL
+
+#define FTN_STDCALL 	KMP_STDCALL
+#define KMP_FTN_ENTRIES	USE_FTN_STDCALL
+
+#include "kmp_ftn_os.h"
+#include "kmp_ftn_entry.h"
+
+#endif /* USE_FTN_STDCALL */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_global.c b/contrib/libs/cxxsupp/openmp/kmp_global.c
index 7c30744357..bdac75b005 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_global.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_global.c
@@ -1,471 +1,471 @@
-/* 
- * kmp_global.c -- KPTS global variables for runtime support library 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
- 
-kmp_key_t __kmp_gtid_threadprivate_key; 
- 
-kmp_cpuinfo_t   __kmp_cpuinfo = { 0 }; // Not initialized 
- 
-#if KMP_STATS_ENABLED 
-#include "kmp_stats.h" 
-// lock for modifying the global __kmp_stats_list 
-kmp_tas_lock_t __kmp_stats_lock; 
- 
-// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called. 
-kmp_stats_list __kmp_stats_list; 
- 
-// thread local pointer to stats node within list 
-__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list; 
- 
-// gives reference tick for all events (considered the 0 tick) 
-tsc_tick_count __kmp_stats_start_time; 
-#endif 
-#if KMP_USE_HWLOC 
-int __kmp_hwloc_error = FALSE; 
-hwloc_topology_t __kmp_hwloc_topology = NULL; 
-#endif 
- 
-/* ----------------------------------------------------- */ 
-/* INITIALIZATION VARIABLES */ 
-/* they are syncronized to write during init, but read anytime */ 
-volatile int        __kmp_init_serial     = FALSE; 
-volatile int        __kmp_init_gtid       = FALSE; 
-volatile int        __kmp_init_common     = FALSE; 
-volatile int        __kmp_init_middle     = FALSE; 
-volatile int        __kmp_init_parallel   = FALSE; 
-volatile int        __kmp_init_monitor    = 0;  /* 1 - launched, 2 - actually started (Windows* OS only) */ 
-volatile int        __kmp_init_user_locks = FALSE; 
- 
-/* list of address of allocated caches for commons */ 
-kmp_cached_addr_t  *__kmp_threadpriv_cache_list = NULL; 
- 
-int                 __kmp_init_counter  = 0; 
-int                 __kmp_root_counter  = 0; 
-int                 __kmp_version       = 0; 
- 
-volatile kmp_uint32 __kmp_team_counter  = 0; 
-volatile kmp_uint32 __kmp_task_counter  = 0; 
- 
-unsigned int __kmp_init_wait = KMP_DEFAULT_INIT_WAIT;   /* initial number of spin-tests   */ 
-unsigned int __kmp_next_wait = KMP_DEFAULT_NEXT_WAIT;   /* susequent number of spin-tests */ 
- 
-size_t      __kmp_stksize         = KMP_DEFAULT_STKSIZE; 
-size_t      __kmp_monitor_stksize = 0;  // auto adjust 
-size_t      __kmp_stkoffset       = KMP_DEFAULT_STKOFFSET; 
-int         __kmp_stkpadding      = KMP_MIN_STKPADDING; 
- 
-size_t    __kmp_malloc_pool_incr  = KMP_DEFAULT_MALLOC_POOL_INCR; 
- 
-/* Barrier method defaults, settings, and strings */ 
-/* branch factor = 2^branch_bits (only relevant for tree and hyper barrier types) */ 
-#if KMP_ARCH_X86_64 
-kmp_uint32 __kmp_barrier_gather_bb_dflt      = 2;  /* branch_factor = 4 */ /* hyper2: C78980 */ 
-kmp_uint32 __kmp_barrier_release_bb_dflt     = 2;  /* branch_factor = 4 */ /* hyper2: C78980 */ 
-#else 
-kmp_uint32 __kmp_barrier_gather_bb_dflt      = 2;  /* branch_factor = 4 */ /* communication in core for MIC */ 
-kmp_uint32 __kmp_barrier_release_bb_dflt     = 2;  /* branch_factor = 4 */ /* communication in core for MIC */ 
-#endif // KMP_ARCH_X86_64 
-#if KMP_ARCH_X86_64 
-kmp_bar_pat_e __kmp_barrier_gather_pat_dflt  = bp_hyper_bar;  /* hyper2: C78980 */ 
-kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar;  /* hyper2: C78980 */ 
-#else 
-kmp_bar_pat_e __kmp_barrier_gather_pat_dflt  = bp_linear_bar; 
-kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_linear_bar; 
-#endif 
-kmp_uint32 __kmp_barrier_gather_branch_bits  [ bs_last_barrier ] = { 0 }; 
-kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ] = { 0 }; 
-kmp_bar_pat_e __kmp_barrier_gather_pattern   [ bs_last_barrier ] = { bp_linear_bar }; 
-kmp_bar_pat_e __kmp_barrier_release_pattern  [ bs_last_barrier ] = { bp_linear_bar }; 
-char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ] = 
-                            { "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER" 
-                                #if KMP_FAST_REDUCTION_BARRIER 
-                                    , "KMP_REDUCTION_BARRIER" 
-                                #endif // KMP_FAST_REDUCTION_BARRIER 
-                            }; 
-char const *__kmp_barrier_pattern_env_name    [ bs_last_barrier ] = 
-                            { "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN" 
-                                #if KMP_FAST_REDUCTION_BARRIER 
-                                    , "KMP_REDUCTION_BARRIER_PATTERN" 
-                                #endif // KMP_FAST_REDUCTION_BARRIER 
-                            }; 
-char const *__kmp_barrier_type_name           [ bs_last_barrier ] = 
-                            { "plain", "forkjoin" 
-                                #if KMP_FAST_REDUCTION_BARRIER 
-                                    , "reduction" 
-                                #endif // KMP_FAST_REDUCTION_BARRIER 
-                            }; 
-char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear","tree","hyper","hierarchical"}; 
- 
-int       __kmp_allThreadsSpecified = 0; 
-size_t    __kmp_align_alloc = CACHE_LINE; 
- 
- 
-int     __kmp_generate_warnings = kmp_warnings_low; 
-int          __kmp_reserve_warn = 0; 
-int                 __kmp_xproc = 0; 
-int            __kmp_avail_proc = 0; 
-size_t    __kmp_sys_min_stksize = KMP_MIN_STKSIZE; 
-int           __kmp_sys_max_nth = KMP_MAX_NTH; 
-int               __kmp_max_nth = 0; 
-int      __kmp_threads_capacity = 0; 
-int         __kmp_dflt_team_nth = 0; 
-int      __kmp_dflt_team_nth_ub = 0; 
-int           __kmp_tp_capacity = 0; 
-int             __kmp_tp_cached = 0; 
-int           __kmp_dflt_nested = FALSE; 
-int __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ 
-#if KMP_NESTED_HOT_TEAMS 
-int __kmp_hot_teams_mode         = 0; /* 0 - free extra threads when reduced */ 
-                                      /* 1 - keep extra threads when reduced */ 
-int __kmp_hot_teams_max_level    = 1; /* nesting level of hot teams */ 
-#endif 
-enum library_type __kmp_library = library_none; 
-enum sched_type     __kmp_sched = kmp_sch_default;  /* scheduling method for runtime scheduling */ 
-enum sched_type    __kmp_static = kmp_sch_static_greedy; /* default static scheduling method */ 
-enum sched_type    __kmp_guided = kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ 
-enum sched_type      __kmp_auto = kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ 
-int        __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 
-int       __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; 
-int          __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( KMP_DEFAULT_BLOCKTIME, KMP_MIN_MONITOR_WAKEUPS ); 
-#ifdef KMP_ADJUST_BLOCKTIME 
-int               __kmp_zero_bt = FALSE; 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
-#ifdef KMP_DFLT_NTH_CORES 
-int                __kmp_ncores = 0; 
-#endif 
-int                 __kmp_chunk = 0; 
-int           __kmp_abort_delay = 0; 
-#if KMP_OS_LINUX && defined(KMP_TDATA_GTID) 
-int             __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ 
-int      __kmp_adjust_gtid_mode = FALSE; 
-#elif KMP_OS_WINDOWS 
-int             __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ 
-int      __kmp_adjust_gtid_mode = FALSE; 
-#else 
-int             __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ 
-int      __kmp_adjust_gtid_mode = TRUE; 
-#endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */ 
-#ifdef KMP_TDATA_GTID 
-#if KMP_OS_WINDOWS 
-__declspec(thread) int __kmp_gtid = KMP_GTID_DNE; 
-#else 
-__thread int __kmp_gtid = KMP_GTID_DNE; 
-#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ 
-#endif /* KMP_TDATA_GTID */ 
-int          __kmp_tls_gtid_min = INT_MAX; 
-int            __kmp_foreign_tp = TRUE; 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-int    __kmp_inherit_fp_control = TRUE; 
-kmp_int16  __kmp_init_x87_fpu_control_word = 0; 
-kmp_uint32     __kmp_init_mxcsr = 0; 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-#ifdef USE_LOAD_BALANCE 
-double  __kmp_load_balance_interval   = 1.0; 
-#endif /* USE_LOAD_BALANCE */ 
- 
-kmp_nested_nthreads_t __kmp_nested_nth  = { NULL, 0, 0 }; 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
- 
-kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { 1, 1024 }; // TODO: tune it! 
- 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-char * __kmp_speculative_statsfile = "-"; 
-#endif 
- 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
-#if OMP_40_ENABLED 
-int __kmp_display_env           = FALSE; 
-int __kmp_display_env_verbose   = FALSE; 
-int __kmp_omp_cancellation      = FALSE; 
-#endif 
- 
-/* map OMP 3.0 schedule types with our internal schedule types */ 
-enum sched_type __kmp_sch_map[ kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ] = { 
-    kmp_sch_static_chunked,     // ==> kmp_sched_static            = 1 
-    kmp_sch_dynamic_chunked,    // ==> kmp_sched_dynamic           = 2 
-    kmp_sch_guided_chunked,     // ==> kmp_sched_guided            = 3 
-    kmp_sch_auto,               // ==> kmp_sched_auto              = 4 
-    kmp_sch_trapezoidal         // ==> kmp_sched_trapezoidal       = 101 
-                                // will likely not used, introduced here just to debug the code 
-                                // of public intel extension schedules 
-}; 
- 
-#if KMP_OS_LINUX 
-enum clock_function_type __kmp_clock_function; 
-int __kmp_clock_function_param; 
-#endif /* KMP_OS_LINUX */ 
- 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-enum mic_type __kmp_mic_type = non_mic; 
-#endif 
- 
-#if KMP_AFFINITY_SUPPORTED 
- 
-# if KMP_GROUP_AFFINITY 
- 
-int __kmp_num_proc_groups = 1; 
- 
-kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; 
-kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; 
-kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; 
-kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; 
- 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
-size_t   __kmp_affin_mask_size = 0; 
-enum affinity_type __kmp_affinity_type = affinity_default; 
-enum affinity_gran __kmp_affinity_gran = affinity_gran_default; 
-int __kmp_affinity_gran_levels  = -1; 
-int __kmp_affinity_dups = TRUE; 
-enum affinity_top_method __kmp_affinity_top_method = affinity_top_method_default; 
-int      __kmp_affinity_compact      = 0; 
-int      __kmp_affinity_offset       = 0; 
-int      __kmp_affinity_verbose      = FALSE; 
-int      __kmp_affinity_warnings     = TRUE; 
-int      __kmp_affinity_respect_mask = affinity_respect_mask_default; 
-char *   __kmp_affinity_proclist     = NULL; 
-kmp_affin_mask_t *__kmp_affinity_masks = NULL; 
-unsigned __kmp_affinity_num_masks    = 0; 
- 
-char const *  __kmp_cpuinfo_file     = NULL; 
- 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-#if OMP_40_ENABLED 
-kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 }; 
-int __kmp_affinity_num_places = 0; 
-#endif 
- 
-int __kmp_place_num_sockets = 0; 
-int __kmp_place_socket_offset = 0; 
-int __kmp_place_num_cores = 0; 
-int __kmp_place_core_offset = 0; 
-int __kmp_place_num_threads_per_core = 0; 
- 
-kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; 
- 
-/* This check ensures that the compiler is passing the correct data type 
- * for the flags formal parameter of the function kmpc_omp_task_alloc(). 
- * If the type is not a 4-byte type, then give an error message about 
- * a non-positive length array pointing here.  If that happens, the 
- * kmp_tasking_flags_t structure must be redefined to have exactly 32 bits. 
- */ 
-KMP_BUILD_ASSERT( sizeof(kmp_tasking_flags_t) == 4 ); 
- 
-kmp_int32 __kmp_task_stealing_constraint = 1;   /* Constrain task stealing by default */ 
- 
-#ifdef DEBUG_SUSPEND 
-int         __kmp_suspend_count = 0; 
-#endif 
- 
-int     __kmp_settings = FALSE; 
-int     __kmp_duplicate_library_ok = 0; 
-#if USE_ITT_BUILD 
-int     __kmp_forkjoin_frames = 1; 
-int     __kmp_forkjoin_frames_mode = 3; 
-#endif 
-PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined; 
-int     __kmp_determ_red = FALSE; 
- 
-#ifdef KMP_DEBUG 
-int     kmp_a_debug = 0; 
-int     kmp_b_debug = 0; 
-int     kmp_c_debug = 0; 
-int     kmp_d_debug = 0; 
-int     kmp_e_debug = 0; 
-int     kmp_f_debug = 0; 
-int     kmp_diag    = 0; 
-#endif 
- 
-/* For debug information logging using rotating buffer */ 
-int     __kmp_debug_buf = FALSE;        /* TRUE means use buffer, FALSE means print to stderr */ 
-int     __kmp_debug_buf_lines = KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ 
-int     __kmp_debug_buf_chars = KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ 
-int     __kmp_debug_buf_atomic = FALSE; /* TRUE means use atomic update of buffer entry pointer */ 
- 
-char   *__kmp_debug_buffer = NULL;      /* Debug buffer itself */ 
-int     __kmp_debug_count = 0;          /* Counter for number of lines printed in buffer so far */ 
-int     __kmp_debug_buf_warn_chars = 0; /* Keep track of char increase recommended in warnings */ 
-/* end rotating debug buffer */ 
- 
-#ifdef KMP_DEBUG 
-int     __kmp_par_range;           /* +1 => only go par for constructs in range */ 
-                                           /* -1 => only go par for constructs outside range */ 
-char    __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = { '\0' }; 
-char    __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = { '\0' }; 
-int     __kmp_par_range_lb = 0; 
-int     __kmp_par_range_ub = INT_MAX; 
-#endif /* KMP_DEBUG */ 
- 
-/* For printing out dynamic storage map for threads and teams */ 
-int     __kmp_storage_map = FALSE;         /* True means print storage map for threads and teams */ 
-int     __kmp_storage_map_verbose = FALSE; /* True means storage map includes placement info */ 
-int     __kmp_storage_map_verbose_specified = FALSE; 
-/* Initialize the library data structures when we fork a child process, defaults to TRUE */ 
-int     __kmp_need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */ 
-int     __kmp_need_register_atfork_specified = TRUE; 
- 
-int        __kmp_env_chunk       = FALSE;  /* KMP_CHUNK specified?     */ 
-int        __kmp_env_stksize     = FALSE;  /* KMP_STACKSIZE specified? */ 
-int        __kmp_env_omp_stksize = FALSE;  /* OMP_STACKSIZE specified? */ 
-int        __kmp_env_all_threads     = FALSE;/* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ 
-int        __kmp_env_omp_all_threads = FALSE;/* OMP_THREAD_LIMIT specified? */ 
-int        __kmp_env_blocktime   = FALSE;  /* KMP_BLOCKTIME specified? */ 
-int        __kmp_env_checks      = FALSE;  /* KMP_CHECKS specified?    */ 
-int        __kmp_env_consistency_check  = FALSE;  /* KMP_CONSISTENCY_CHECK specified? */ 
- 
-kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; 
-kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; 
-kmp_uint32 __kmp_yielding_on = 1; 
-#if KMP_OS_CNK 
-kmp_uint32 __kmp_yield_cycle = 0; 
-#else 
-kmp_uint32 __kmp_yield_cycle = 1;     /* Yield-cycle is on by default */ 
-#endif 
-kmp_int32  __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */ 
-kmp_int32  __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */ 
-/* ----------------------------------------------------- */ 
- 
- 
-/* ------------------------------------------------------ */ 
-/* STATE mostly syncronized with global lock */ 
-/* data written to rarely by masters, read often by workers */ 
-/* 
- * SHALL WE EDIT THE COMMENT BELOW IN SOME WAY? 
- * TODO:  None of this global padding stuff works consistently because 
- * the order of declaration is not necessarily correlated to storage order. 
- * To fix this, all the important globals must be put in a big structure 
- * instead. 
- */ 
-KMP_ALIGN_CACHE 
-         kmp_info_t **__kmp_threads     = NULL; 
-         kmp_root_t **__kmp_root        = NULL; 
- 
-/* data read/written to often by masters */ 
-KMP_ALIGN_CACHE 
-volatile int          __kmp_nth                    = 0; 
-volatile int          __kmp_all_nth                = 0; 
-int                   __kmp_thread_pool_nth        = 0; 
-volatile kmp_info_t  *__kmp_thread_pool            = NULL; 
-volatile kmp_team_t  *__kmp_team_pool              = NULL; 
- 
-KMP_ALIGN_CACHE 
-volatile int          __kmp_thread_pool_active_nth = 0; 
- 
-/* ------------------------------------------------- 
- * GLOBAL/ROOT STATE */ 
-KMP_ALIGN_CACHE 
-kmp_global_t __kmp_global = {{ 0 }}; 
- 
-/* ----------------------------------------------- */ 
-/* GLOBAL SYNCHRONIZATION LOCKS */ 
-/* TODO verify the need for these locks and if they need to be global */ 
- 
-#if KMP_USE_INTERNODE_ALIGNMENT 
-/* Multinode systems have larger cache line granularity which can cause 
- * false sharing if the alignment is not large enough for these locks */ 
-KMP_ALIGN_CACHE_INTERNODE 
- 
-kmp_bootstrap_lock_t __kmp_initz_lock   = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock   ); /* Control initializations */ 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_bootstrap_lock_t __kmp_exit_lock;   /* exit() is not always thread-safe */ 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ 
- 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_lock_t __kmp_global_lock;           /* Control OS/global access */ 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_queuing_lock_t __kmp_dispatch_lock;         /* Control dispatch access  */ 
-KMP_ALIGN_CACHE_INTERNODE 
-kmp_lock_t __kmp_debug_lock;            /* Control I/O access for KMP_DEBUG */ 
-#else 
-KMP_ALIGN_CACHE 
- 
-kmp_bootstrap_lock_t __kmp_initz_lock   = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock   ); /* Control initializations */ 
-kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ 
-kmp_bootstrap_lock_t __kmp_exit_lock;   /* exit() is not always thread-safe */ 
-kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ 
-kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ 
- 
-KMP_ALIGN(128) 
-kmp_lock_t __kmp_global_lock;           /* Control OS/global access */ 
-KMP_ALIGN(128) 
-kmp_queuing_lock_t __kmp_dispatch_lock;         /* Control dispatch access  */ 
-KMP_ALIGN(128) 
-kmp_lock_t __kmp_debug_lock;            /* Control I/O access for KMP_DEBUG */ 
-#endif 
- 
-/* ----------------------------------------------- */ 
- 
-#if KMP_HANDLE_SIGNALS 
-    /* 
-        Signal handling is disabled by default, because it confuses users: In case of sigsegv 
-        (or other trouble) in user code signal handler catches the signal, which then "appears" in 
-        the monitor thread (when the monitor executes raise() function). Users see signal in the 
-        monitor thread and blame OpenMP RTL. 
- 
-        Grant said signal handling required on some older OSes (Irix?) supported by KAI, because 
-        bad applications hung but not aborted. Currently it is not a problem for Linux* OS, OS X* and 
-        Windows* OS. 
- 
-        Grant: Found new hangs for EL4, EL5, and a Fedora Core machine.  So I'm putting 
-        the default back for now to see if that fixes hangs on those machines. 
- 
-        2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of stack backtrace 
-        when program is aborting, but the code is not signal-safe. When multiple signals raised at 
-        the same time (which occurs in dynamic negative tests because all the worker threads detects 
-        the same error), Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided 
-        by Steve R., and will be available soon. 
-    */ 
-    int __kmp_handle_signals = FALSE; 
-#endif 
- 
-/* ----------------------------------------------- */ 
-#ifdef BUILD_TV 
-kmp_key_t __kmp_tv_key = 0; 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef DEBUG_SUSPEND 
-int 
-get_suspend_count_( void ) { 
-    int count = __kmp_suspend_count; 
-    __kmp_suspend_count = 0; 
-    return count; 
-} 
-void 
-set_suspend_count_( int * value ) { 
-    __kmp_suspend_count = *value; 
-} 
-#endif 
- 
-// Symbols for MS mutual detection. 
-int _You_must_link_with_exactly_one_OpenMP_library = 1; 
-int _You_must_link_with_Intel_OpenMP_library       = 1; 
-#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) 
-    int _You_must_link_with_Microsoft_OpenMP_library = 1; 
-#endif 
- 
-// end of file // 
+/*
+ * kmp_global.c -- KPTS global variables for runtime support library
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+
+kmp_key_t __kmp_gtid_threadprivate_key;
+
+kmp_cpuinfo_t   __kmp_cpuinfo = { 0 }; // Not initialized
+
+#if KMP_STATS_ENABLED
+#include "kmp_stats.h"
+// lock for modifying the global __kmp_stats_list
+kmp_tas_lock_t __kmp_stats_lock;
+
+// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called.
+kmp_stats_list __kmp_stats_list;
+
+// thread local pointer to stats node within list
+__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
+
+// gives reference tick for all events (considered the 0 tick)
+tsc_tick_count __kmp_stats_start_time;
+#endif
+#if KMP_USE_HWLOC
+int __kmp_hwloc_error = FALSE;
+hwloc_topology_t __kmp_hwloc_topology = NULL;
+#endif
+
+/* ----------------------------------------------------- */
+/* INITIALIZATION VARIABLES */
+/* they are syncronized to write during init, but read anytime */
+volatile int        __kmp_init_serial     = FALSE;
+volatile int        __kmp_init_gtid       = FALSE;
+volatile int        __kmp_init_common     = FALSE;
+volatile int        __kmp_init_middle     = FALSE;
+volatile int        __kmp_init_parallel   = FALSE;
+volatile int        __kmp_init_monitor    = 0;  /* 1 - launched, 2 - actually started (Windows* OS only) */
+volatile int        __kmp_init_user_locks = FALSE;
+
+/* list of address of allocated caches for commons */
+kmp_cached_addr_t  *__kmp_threadpriv_cache_list = NULL;
+
+int                 __kmp_init_counter  = 0;
+int                 __kmp_root_counter  = 0;
+int                 __kmp_version       = 0;
+
+volatile kmp_uint32 __kmp_team_counter  = 0;
+volatile kmp_uint32 __kmp_task_counter  = 0;
+
+unsigned int __kmp_init_wait = KMP_DEFAULT_INIT_WAIT;   /* initial number of spin-tests   */
+unsigned int __kmp_next_wait = KMP_DEFAULT_NEXT_WAIT;   /* susequent number of spin-tests */
+
+size_t      __kmp_stksize         = KMP_DEFAULT_STKSIZE;
+size_t      __kmp_monitor_stksize = 0;  // auto adjust
+size_t      __kmp_stkoffset       = KMP_DEFAULT_STKOFFSET;
+int         __kmp_stkpadding      = KMP_MIN_STKPADDING;
+
+size_t    __kmp_malloc_pool_incr  = KMP_DEFAULT_MALLOC_POOL_INCR;
+
+/* Barrier method defaults, settings, and strings */
+/* branch factor = 2^branch_bits (only relevant for tree and hyper barrier types) */
+#if KMP_ARCH_X86_64
+kmp_uint32 __kmp_barrier_gather_bb_dflt      = 2;  /* branch_factor = 4 */ /* hyper2: C78980 */
+kmp_uint32 __kmp_barrier_release_bb_dflt     = 2;  /* branch_factor = 4 */ /* hyper2: C78980 */
+#else
+kmp_uint32 __kmp_barrier_gather_bb_dflt      = 2;  /* branch_factor = 4 */ /* communication in core for MIC */
+kmp_uint32 __kmp_barrier_release_bb_dflt     = 2;  /* branch_factor = 4 */ /* communication in core for MIC */
+#endif // KMP_ARCH_X86_64
+#if KMP_ARCH_X86_64
+kmp_bar_pat_e __kmp_barrier_gather_pat_dflt  = bp_hyper_bar;  /* hyper2: C78980 */
+kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar;  /* hyper2: C78980 */
+#else
+kmp_bar_pat_e __kmp_barrier_gather_pat_dflt  = bp_linear_bar;
+kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_linear_bar;
+#endif
+kmp_uint32 __kmp_barrier_gather_branch_bits  [ bs_last_barrier ] = { 0 };
+kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ] = { 0 };
+kmp_bar_pat_e __kmp_barrier_gather_pattern   [ bs_last_barrier ] = { bp_linear_bar };
+kmp_bar_pat_e __kmp_barrier_release_pattern  [ bs_last_barrier ] = { bp_linear_bar };
+char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ] =
+                            { "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER"
+                                #if KMP_FAST_REDUCTION_BARRIER
+                                    , "KMP_REDUCTION_BARRIER"
+                                #endif // KMP_FAST_REDUCTION_BARRIER
+                            };
+char const *__kmp_barrier_pattern_env_name    [ bs_last_barrier ] =
+                            { "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN"
+                                #if KMP_FAST_REDUCTION_BARRIER
+                                    , "KMP_REDUCTION_BARRIER_PATTERN"
+                                #endif // KMP_FAST_REDUCTION_BARRIER
+                            };
+char const *__kmp_barrier_type_name           [ bs_last_barrier ] =
+                            { "plain", "forkjoin"
+                                #if KMP_FAST_REDUCTION_BARRIER
+                                    , "reduction"
+                                #endif // KMP_FAST_REDUCTION_BARRIER
+                            };
+char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear","tree","hyper","hierarchical"};
+
+int       __kmp_allThreadsSpecified = 0;
+size_t    __kmp_align_alloc = CACHE_LINE;
+
+
+int     __kmp_generate_warnings = kmp_warnings_low;
+int          __kmp_reserve_warn = 0;
+int                 __kmp_xproc = 0;
+int            __kmp_avail_proc = 0;
+size_t    __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
+int           __kmp_sys_max_nth = KMP_MAX_NTH;
+int               __kmp_max_nth = 0;
+int      __kmp_threads_capacity = 0;
+int         __kmp_dflt_team_nth = 0;
+int      __kmp_dflt_team_nth_ub = 0;
+int           __kmp_tp_capacity = 0;
+int             __kmp_tp_cached = 0;
+int           __kmp_dflt_nested = FALSE;
+int __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */
+#if KMP_NESTED_HOT_TEAMS
+int __kmp_hot_teams_mode         = 0; /* 0 - free extra threads when reduced */
+                                      /* 1 - keep extra threads when reduced */
+int __kmp_hot_teams_max_level    = 1; /* nesting level of hot teams */
+#endif
+enum library_type __kmp_library = library_none;
+enum sched_type     __kmp_sched = kmp_sch_default;  /* scheduling method for runtime scheduling */
+enum sched_type    __kmp_static = kmp_sch_static_greedy; /* default static scheduling method */
+enum sched_type    __kmp_guided = kmp_sch_guided_iterative_chunked; /* default guided scheduling method */
+enum sched_type      __kmp_auto = kmp_sch_guided_analytical_chunked; /* default auto scheduling method */
+int        __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
+int       __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS;
+int          __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( KMP_DEFAULT_BLOCKTIME, KMP_MIN_MONITOR_WAKEUPS );
+#ifdef KMP_ADJUST_BLOCKTIME
+int               __kmp_zero_bt = FALSE;
+#endif /* KMP_ADJUST_BLOCKTIME */
+#ifdef KMP_DFLT_NTH_CORES
+int                __kmp_ncores = 0;
+#endif
+int                 __kmp_chunk = 0;
+int           __kmp_abort_delay = 0;
+#if KMP_OS_LINUX && defined(KMP_TDATA_GTID)
+int             __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */
+int      __kmp_adjust_gtid_mode = FALSE;
+#elif KMP_OS_WINDOWS
+int             __kmp_gtid_mode = 2; /* use TLS functions to store gtid */
+int      __kmp_adjust_gtid_mode = FALSE;
+#else
+int             __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */
+int      __kmp_adjust_gtid_mode = TRUE;
+#endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */
+#ifdef KMP_TDATA_GTID
+#if KMP_OS_WINDOWS
+__declspec(thread) int __kmp_gtid = KMP_GTID_DNE;
+#else
+__thread int __kmp_gtid = KMP_GTID_DNE;
+#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */
+#endif /* KMP_TDATA_GTID */
+int          __kmp_tls_gtid_min = INT_MAX;
+int            __kmp_foreign_tp = TRUE;
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+int    __kmp_inherit_fp_control = TRUE;
+kmp_int16  __kmp_init_x87_fpu_control_word = 0;
+kmp_uint32     __kmp_init_mxcsr = 0;
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+#ifdef USE_LOAD_BALANCE
+double  __kmp_load_balance_interval   = 1.0;
+#endif /* USE_LOAD_BALANCE */
+
+kmp_nested_nthreads_t __kmp_nested_nth  = { NULL, 0, 0 };
+
+#if KMP_USE_ADAPTIVE_LOCKS
+
+kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { 1, 1024 }; // TODO: tune it!
+
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+char * __kmp_speculative_statsfile = "-";
+#endif
+
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+#if OMP_40_ENABLED
+int __kmp_display_env           = FALSE;
+int __kmp_display_env_verbose   = FALSE;
+int __kmp_omp_cancellation      = FALSE;
+#endif
+
+/* map OMP 3.0 schedule types with our internal schedule types */
+enum sched_type __kmp_sch_map[ kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ] = {
+    kmp_sch_static_chunked,     // ==> kmp_sched_static            = 1
+    kmp_sch_dynamic_chunked,    // ==> kmp_sched_dynamic           = 2
+    kmp_sch_guided_chunked,     // ==> kmp_sched_guided            = 3
+    kmp_sch_auto,               // ==> kmp_sched_auto              = 4
+    kmp_sch_trapezoidal         // ==> kmp_sched_trapezoidal       = 101
+                                // will likely not used, introduced here just to debug the code
+                                // of public intel extension schedules
+};
+
+#if KMP_OS_LINUX
+enum clock_function_type __kmp_clock_function;
+int __kmp_clock_function_param;
+#endif /* KMP_OS_LINUX */
+
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+enum mic_type __kmp_mic_type = non_mic;
+#endif
+
+#if KMP_AFFINITY_SUPPORTED
+
+# if KMP_GROUP_AFFINITY
+
+int __kmp_num_proc_groups = 1;
+
+kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL;
+kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL;
+kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL;
+kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL;
+
+# endif /* KMP_GROUP_AFFINITY */
+
+size_t   __kmp_affin_mask_size = 0;
+enum affinity_type __kmp_affinity_type = affinity_default;
+enum affinity_gran __kmp_affinity_gran = affinity_gran_default;
+int __kmp_affinity_gran_levels  = -1;
+int __kmp_affinity_dups = TRUE;
+enum affinity_top_method __kmp_affinity_top_method = affinity_top_method_default;
+int      __kmp_affinity_compact      = 0;
+int      __kmp_affinity_offset       = 0;
+int      __kmp_affinity_verbose      = FALSE;
+int      __kmp_affinity_warnings     = TRUE;
+int      __kmp_affinity_respect_mask = affinity_respect_mask_default;
+char *   __kmp_affinity_proclist     = NULL;
+kmp_affin_mask_t *__kmp_affinity_masks = NULL;
+unsigned __kmp_affinity_num_masks    = 0;
+
+char const *  __kmp_cpuinfo_file     = NULL;
+
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+#if OMP_40_ENABLED
+kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 };
+int __kmp_affinity_num_places = 0;
+#endif
+
+int __kmp_place_num_sockets = 0;
+int __kmp_place_socket_offset = 0;
+int __kmp_place_num_cores = 0;
+int __kmp_place_core_offset = 0;
+int __kmp_place_num_threads_per_core = 0;
+
+kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams;
+
+/* This check ensures that the compiler is passing the correct data type
+ * for the flags formal parameter of the function kmpc_omp_task_alloc().
+ * If the type is not a 4-byte type, then give an error message about
+ * a non-positive length array pointing here.  If that happens, the
+ * kmp_tasking_flags_t structure must be redefined to have exactly 32 bits.
+ */
+KMP_BUILD_ASSERT( sizeof(kmp_tasking_flags_t) == 4 );
+
+kmp_int32 __kmp_task_stealing_constraint = 1;   /* Constrain task stealing by default */
+
+#ifdef DEBUG_SUSPEND
+int         __kmp_suspend_count = 0;
+#endif
+
+int     __kmp_settings = FALSE;
+int     __kmp_duplicate_library_ok = 0;
+#if USE_ITT_BUILD
+int     __kmp_forkjoin_frames = 1;
+int     __kmp_forkjoin_frames_mode = 3;
+#endif
+PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined;
+int     __kmp_determ_red = FALSE;
+
+#ifdef KMP_DEBUG
+int     kmp_a_debug = 0;
+int     kmp_b_debug = 0;
+int     kmp_c_debug = 0;
+int     kmp_d_debug = 0;
+int     kmp_e_debug = 0;
+int     kmp_f_debug = 0;
+int     kmp_diag    = 0;
+#endif
+
+/* For debug information logging using rotating buffer */
+int     __kmp_debug_buf = FALSE;        /* TRUE means use buffer, FALSE means print to stderr */
+int     __kmp_debug_buf_lines = KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */
+int     __kmp_debug_buf_chars = KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */
+int     __kmp_debug_buf_atomic = FALSE; /* TRUE means use atomic update of buffer entry pointer */
+
+char   *__kmp_debug_buffer = NULL;      /* Debug buffer itself */
+int     __kmp_debug_count = 0;          /* Counter for number of lines printed in buffer so far */
+int     __kmp_debug_buf_warn_chars = 0; /* Keep track of char increase recommended in warnings */
+/* end rotating debug buffer */
+
+#ifdef KMP_DEBUG
+int     __kmp_par_range;           /* +1 => only go par for constructs in range */
+                                           /* -1 => only go par for constructs outside range */
+char    __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = { '\0' };
+char    __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = { '\0' };
+int     __kmp_par_range_lb = 0;
+int     __kmp_par_range_ub = INT_MAX;
+#endif /* KMP_DEBUG */
+
+/* For printing out dynamic storage map for threads and teams */
+int     __kmp_storage_map = FALSE;         /* True means print storage map for threads and teams */
+int     __kmp_storage_map_verbose = FALSE; /* True means storage map includes placement info */
+int     __kmp_storage_map_verbose_specified = FALSE;
+/* Initialize the library data structures when we fork a child process, defaults to TRUE */
+int     __kmp_need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */
+int     __kmp_need_register_atfork_specified = TRUE;
+
+int        __kmp_env_chunk       = FALSE;  /* KMP_CHUNK specified?     */
+int        __kmp_env_stksize     = FALSE;  /* KMP_STACKSIZE specified? */
+int        __kmp_env_omp_stksize = FALSE;  /* OMP_STACKSIZE specified? */
+int        __kmp_env_all_threads     = FALSE;/* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */
+int        __kmp_env_omp_all_threads = FALSE;/* OMP_THREAD_LIMIT specified? */
+int        __kmp_env_blocktime   = FALSE;  /* KMP_BLOCKTIME specified? */
+int        __kmp_env_checks      = FALSE;  /* KMP_CHECKS specified?    */
+int        __kmp_env_consistency_check  = FALSE;  /* KMP_CONSISTENCY_CHECK specified? */
+
+kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
+kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
+kmp_uint32 __kmp_yielding_on = 1;
+#if KMP_OS_CNK
+kmp_uint32 __kmp_yield_cycle = 0;
+#else
+kmp_uint32 __kmp_yield_cycle = 1;     /* Yield-cycle is on by default */
+#endif
+kmp_int32  __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */
+kmp_int32  __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */
+/* ----------------------------------------------------- */
+
+
+/* ------------------------------------------------------ */
+/* STATE mostly syncronized with global lock */
+/* data written to rarely by masters, read often by workers */
+/*
+ * SHALL WE EDIT THE COMMENT BELOW IN SOME WAY?
+ * TODO:  None of this global padding stuff works consistently because
+ * the order of declaration is not necessarily correlated to storage order.
+ * To fix this, all the important globals must be put in a big structure
+ * instead.
+ */
+KMP_ALIGN_CACHE
+         kmp_info_t **__kmp_threads     = NULL;
+         kmp_root_t **__kmp_root        = NULL;
+
+/* data read/written to often by masters */
+KMP_ALIGN_CACHE
+volatile int          __kmp_nth                    = 0;
+volatile int          __kmp_all_nth                = 0;
+int                   __kmp_thread_pool_nth        = 0;
+volatile kmp_info_t  *__kmp_thread_pool            = NULL;
+volatile kmp_team_t  *__kmp_team_pool              = NULL;
+
+KMP_ALIGN_CACHE
+volatile int          __kmp_thread_pool_active_nth = 0;
+
+/* -------------------------------------------------
+ * GLOBAL/ROOT STATE */
+KMP_ALIGN_CACHE
+kmp_global_t __kmp_global = {{ 0 }};
+
+/* ----------------------------------------------- */
+/* GLOBAL SYNCHRONIZATION LOCKS */
+/* TODO verify the need for these locks and if they need to be global */
+
+#if KMP_USE_INTERNODE_ALIGNMENT
+/* Multinode systems have larger cache line granularity which can cause
+ * false sharing if the alignment is not large enough for these locks */
+KMP_ALIGN_CACHE_INTERNODE
+
+kmp_bootstrap_lock_t __kmp_initz_lock   = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock   ); /* Control initializations */
+KMP_ALIGN_CACHE_INTERNODE
+kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
+KMP_ALIGN_CACHE_INTERNODE
+kmp_bootstrap_lock_t __kmp_exit_lock;   /* exit() is not always thread-safe */
+KMP_ALIGN_CACHE_INTERNODE
+kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */
+KMP_ALIGN_CACHE_INTERNODE
+kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
+
+KMP_ALIGN_CACHE_INTERNODE
+kmp_lock_t __kmp_global_lock;           /* Control OS/global access */
+KMP_ALIGN_CACHE_INTERNODE
+kmp_queuing_lock_t __kmp_dispatch_lock;         /* Control dispatch access  */
+KMP_ALIGN_CACHE_INTERNODE
+kmp_lock_t __kmp_debug_lock;            /* Control I/O access for KMP_DEBUG */
+#else
+KMP_ALIGN_CACHE
+
+kmp_bootstrap_lock_t __kmp_initz_lock   = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock   ); /* Control initializations */
+kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
+kmp_bootstrap_lock_t __kmp_exit_lock;   /* exit() is not always thread-safe */
+kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */
+kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
+
+KMP_ALIGN(128)
+kmp_lock_t __kmp_global_lock;           /* Control OS/global access */
+KMP_ALIGN(128)
+kmp_queuing_lock_t __kmp_dispatch_lock;         /* Control dispatch access  */
+KMP_ALIGN(128)
+kmp_lock_t __kmp_debug_lock;            /* Control I/O access for KMP_DEBUG */
+#endif
+
+/* ----------------------------------------------- */
+
+#if KMP_HANDLE_SIGNALS
+    /*
+        Signal handling is disabled by default, because it confuses users: In case of sigsegv
+        (or other trouble) in user code signal handler catches the signal, which then "appears" in
+        the monitor thread (when the monitor executes raise() function). Users see signal in the
+        monitor thread and blame OpenMP RTL.
+
+        Grant said signal handling required on some older OSes (Irix?) supported by KAI, because
+        bad applications hung but not aborted. Currently it is not a problem for Linux* OS, OS X* and
+        Windows* OS.
+
+        Grant: Found new hangs for EL4, EL5, and a Fedora Core machine.  So I'm putting
+        the default back for now to see if that fixes hangs on those machines.
+
+        2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of stack backtrace
+        when program is aborting, but the code is not signal-safe. When multiple signals raised at
+        the same time (which occurs in dynamic negative tests because all the worker threads detects
+        the same error), Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided
+        by Steve R., and will be available soon.
+    */
+    int __kmp_handle_signals = FALSE;
+#endif
+
+/* ----------------------------------------------- */
+#ifdef BUILD_TV
+kmp_key_t __kmp_tv_key = 0;
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef DEBUG_SUSPEND
+int
+get_suspend_count_( void ) {
+    int count = __kmp_suspend_count;
+    __kmp_suspend_count = 0;
+    return count;
+}
+void
+set_suspend_count_( int * value ) {
+    __kmp_suspend_count = *value;
+}
+#endif
+
+// Symbols for MS mutual detection.
+int _You_must_link_with_exactly_one_OpenMP_library = 1;
+int _You_must_link_with_Intel_OpenMP_library       = 1;
+#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 )
+    int _You_must_link_with_Microsoft_OpenMP_library = 1;
+#endif
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_gsupport.c b/contrib/libs/cxxsupp/openmp/kmp_gsupport.c
index e0fb6ff047..2a89aa2f94 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_gsupport.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_gsupport.c
@@ -1,1605 +1,1605 @@
-/* 
- * kmp_gsupport.c 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#if defined(__x86_64) || defined (__powerpc64__) || defined(__aarch64__) 
-# define KMP_I8 
-#endif 
-#include "kmp.h" 
-#include "kmp_atomic.h" 
- 
+/*
+ * kmp_gsupport.c
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if defined(__x86_64) || defined (__powerpc64__) || defined(__aarch64__)
+# define KMP_I8
+#endif
+#include "kmp.h"
+#include "kmp_atomic.h"
+
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+#define MKLOC(loc,routine) \
+    static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" };
+
+#include "kmp_ftn_os.h"
+
+void
+xexpand(KMP_API_NAME_GOMP_BARRIER)(void)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_barrier");
+    KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
+    __kmpc_barrier(&loc, gtid);
+}
+
+
+//
+// Mutual exclusion
+//
+
+//
+// The symbol that icc/ifort generates for unnamed for unnamed critical
+// sections - .gomp_critical_user_ - is defined using .comm in any objects
+// reference it.  We can't reference it directly here in C code, as the
+// symbol contains a ".".
+//
+// The RTL contains an assembly language definition of .gomp_critical_user_
+// with another symbol __kmp_unnamed_critical_addr initialized with it's
+// address.
+//
+extern kmp_critical_name *__kmp_unnamed_critical_addr;
+
+
+void
+xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_critical_start");
+    KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
+    __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_critical_end");
+    KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
+    __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_critical_name_start");
+    KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));
+    __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr)
+{
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_critical_name_end");
+    KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));
+    __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);
+}
+
+
+//
+// The Gnu codegen tries to use locked operations to perform atomic updates
+// inline.  If it can't, then it calls GOMP_atomic_start() before performing
+// the update and GOMP_atomic_end() afterward, regardless of the data type.
+//
+
+void
+xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void)
+{
+    int gtid = __kmp_entry_gtid();
+    KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
+
+#if OMPT_SUPPORT
+    __ompt_thread_assign_wait_id(0);
+#endif
+
+    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
+    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
+}
+
+
+int
+xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_single_start");
+    KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));
+
+    if (! TCR_4(__kmp_init_parallel))
+        __kmp_parallel_initialize();
+
+    //
+    // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
+    // workshare when USE_CHECKS is defined.  We need to avoid the push,
+    // as there is no corresponding GOMP_single_end() call.
+    //
+    return __kmp_enter_single(gtid, &loc, FALSE);
+}
+
+
+void *
+xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void)
+{
+    void *retval;
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_single_copy_start");
+    KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));
+
+    if (! TCR_4(__kmp_init_parallel))
+        __kmp_parallel_initialize();
+
+    //
+    // If this is the first thread to enter, return NULL.  The generated
+    // code will then call GOMP_single_copy_end() for this thread only,
+    // with the copyprivate data pointer as an argument.
+    //
+    if (__kmp_enter_single(gtid, &loc, FALSE))
+        return NULL;
+
+    //
+    // Wait for the first thread to set the copyprivate data pointer,
+    // and for all other threads to reach this point.
+    //
+    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+
+    //
+    // Retrieve the value of the copyprivate data point, and wait for all
+    // threads to do likewise, then return.
+    //
+    retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
+    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+    return retval;
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data)
+{
+    int gtid = __kmp_get_gtid();
+    KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));
+
+    //
+    // Set the copyprivate data pointer fo the team, then hit the barrier
+    // so that the other threads will continue on and read it.  Hit another
+    // barrier before continuing, so that the know that the copyprivate
+    // data pointer has been propagated to all threads before trying to
+    // reuse the t_copypriv_data field.
+    //
+    __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
+    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_ordered_start");
+    KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
+    __kmpc_ordered(&loc, gtid);
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_ordered_end");
+    KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
+    __kmpc_end_ordered(&loc, gtid);
+}
+
+
+//
+// Dispatch macro defs
+//
+// They come in two flavors: 64-bit unsigned, and either 32-bit signed
+// (IA-32 architecture) or 64-bit signed (Intel(R) 64).
+//
+
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
+# define KMP_DISPATCH_INIT              __kmp_aux_dispatch_init_4
+# define KMP_DISPATCH_FINI_CHUNK        __kmp_aux_dispatch_fini_chunk_4
+# define KMP_DISPATCH_NEXT              __kmpc_dispatch_next_4
+#else
+# define KMP_DISPATCH_INIT              __kmp_aux_dispatch_init_8
+# define KMP_DISPATCH_FINI_CHUNK        __kmp_aux_dispatch_fini_chunk_8
+# define KMP_DISPATCH_NEXT              __kmpc_dispatch_next_8
+#endif /* KMP_ARCH_X86 */
+
+# define KMP_DISPATCH_INIT_ULL          __kmp_aux_dispatch_init_8u
+# define KMP_DISPATCH_FINI_CHUNK_ULL    __kmp_aux_dispatch_fini_chunk_8u
+# define KMP_DISPATCH_NEXT_ULL          __kmpc_dispatch_next_8u
+
+
+//
+// The parallel contruct
+//
+
+#ifndef KMP_DEBUG
+static
+#endif /* KMP_DEBUG */
+void
+__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
+  void *data)
+{
+#if OMPT_SUPPORT
+    kmp_info_t *thr;
+    ompt_frame_t *ompt_frame;
+    ompt_state_t enclosing_state;
+
+    if (ompt_enabled) {
+        // get pointer to thread data structure
+        thr = __kmp_threads[*gtid];
+
+        // save enclosing task state; set current state for task
+        enclosing_state = thr->th.ompt_thread_info.state;
+        thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+
+        // set task frame
+        ompt_frame = __ompt_get_task_frame_internal(0);
+        ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+    task(data);
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        // clear task frame
+        ompt_frame->exit_runtime_frame = NULL;
+
+        // restore enclosing state
+        thr->th.ompt_thread_info.state = enclosing_state;
+    }
+#endif
+}
+
+
+#ifndef KMP_DEBUG
+static
+#endif /* KMP_DEBUG */
+void
+__kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
+  void (*task)(void *), void *data, unsigned num_threads, ident_t *loc,
+  enum sched_type schedule, long start, long end, long incr, long chunk_size)
+{
+    //
+    // Intialize the loop worksharing construct.
+    //
+    KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
+      schedule != kmp_sch_static);
+
+#if OMPT_SUPPORT
+    kmp_info_t *thr;
+    ompt_frame_t *ompt_frame;
+    ompt_state_t enclosing_state;
+
+    if (ompt_enabled) {
+        thr = __kmp_threads[*gtid];
+        // save enclosing task state; set current state for task
+        enclosing_state = thr->th.ompt_thread_info.state;
+        thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+
+        // set task frame
+        ompt_frame = __ompt_get_task_frame_internal(0);
+        ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+    //
+    // Now invoke the microtask.
+    //
+    task(data);
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        // clear task frame
+        ompt_frame->exit_runtime_frame = NULL;
+
+        // reset enclosing state
+        thr->th.ompt_thread_info.state = enclosing_state;
+    }
+#endif
+}
+
+
+#ifndef KMP_DEBUG
+static
+#endif /* KMP_DEBUG */
+void
+__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), microtask_t wrapper, int argc,...)
+{
+    int rc;
+    kmp_info_t *thr = __kmp_threads[gtid];
+    kmp_team_t *team = thr->th.th_team;
+    int tid = __kmp_tid_from_gtid(gtid);
+
+    va_list ap;
+    va_start(ap, argc);
+
+    rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc,
+#if OMPT_SUPPORT
+      VOLATILE_CAST(void *) unwrapped_task,
+#endif
+      wrapper, __kmp_invoke_task_func,
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+      &ap
+#else
+      ap
+#endif
+      );
+
+    va_end(ap);
+
+    if (rc) {
+        __kmp_run_before_invoked_task(gtid, tid, thr, team);
+    }
+
 #if OMPT_SUPPORT 
-#include "ompt-specific.h" 
-#endif 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-#define MKLOC(loc,routine) \ 
-    static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" }; 
- 
-#include "kmp_ftn_os.h" 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_BARRIER)(void) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_barrier"); 
-    KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); 
-    __kmpc_barrier(&loc, gtid); 
-} 
- 
- 
-// 
-// Mutual exclusion 
-// 
- 
-// 
-// The symbol that icc/ifort generates for unnamed for unnamed critical 
-// sections - .gomp_critical_user_ - is defined using .comm in any objects 
-// reference it.  We can't reference it directly here in C code, as the 
-// symbol contains a ".". 
-// 
-// The RTL contains an assembly language definition of .gomp_critical_user_ 
-// with another symbol __kmp_unnamed_critical_addr initialized with it's 
-// address. 
-// 
-extern kmp_critical_name *__kmp_unnamed_critical_addr; 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_critical_start"); 
-    KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); 
-    __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_critical_end"); 
-    KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); 
-    __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_critical_name_start"); 
-    KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); 
-    __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_critical_name_end"); 
-    KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); 
-    __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); 
-} 
- 
- 
-// 
-// The Gnu codegen tries to use locked operations to perform atomic updates 
-// inline.  If it can't, then it calls GOMP_atomic_start() before performing 
-// the update and GOMP_atomic_end() afterward, regardless of the data type. 
-// 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); 
- 
-#if OMPT_SUPPORT 
-    __ompt_thread_assign_wait_id(0); 
-#endif 
- 
-    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); 
-    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 
-} 
- 
- 
-int 
-xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_single_start"); 
-    KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); 
- 
-    if (! TCR_4(__kmp_init_parallel)) 
-        __kmp_parallel_initialize(); 
- 
-    // 
-    // 3rd parameter == FALSE prevents kmp_enter_single from pushing a 
-    // workshare when USE_CHECKS is defined.  We need to avoid the push, 
-    // as there is no corresponding GOMP_single_end() call. 
-    // 
-    return __kmp_enter_single(gtid, &loc, FALSE); 
-} 
- 
- 
-void * 
-xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) 
-{ 
-    void *retval; 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_single_copy_start"); 
-    KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); 
- 
-    if (! TCR_4(__kmp_init_parallel)) 
-        __kmp_parallel_initialize(); 
- 
-    // 
-    // If this is the first thread to enter, return NULL.  The generated 
-    // code will then call GOMP_single_copy_end() for this thread only, 
-    // with the copyprivate data pointer as an argument. 
-    // 
-    if (__kmp_enter_single(gtid, &loc, FALSE)) 
-        return NULL; 
- 
-    // 
-    // Wait for the first thread to set the copyprivate data pointer, 
-    // and for all other threads to reach this point. 
-    // 
-    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 
- 
-    // 
-    // Retrieve the value of the copyprivate data point, and wait for all 
-    // threads to do likewise, then return. 
-    // 
-    retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; 
-    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 
-    return retval; 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); 
- 
-    // 
-    // Set the copyprivate data pointer fo the team, then hit the barrier 
-    // so that the other threads will continue on and read it.  Hit another 
-    // barrier before continuing, so that the know that the copyprivate 
-    // data pointer has been propagated to all threads before trying to 
-    // reuse the t_copypriv_data field. 
-    // 
-    __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; 
-    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 
-    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_ordered_start"); 
-    KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); 
-    __kmpc_ordered(&loc, gtid); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_ordered_end"); 
-    KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); 
-    __kmpc_end_ordered(&loc, gtid); 
-} 
- 
- 
-// 
-// Dispatch macro defs 
-// 
-// They come in two flavors: 64-bit unsigned, and either 32-bit signed 
-// (IA-32 architecture) or 64-bit signed (Intel(R) 64). 
-// 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_ARM 
-# define KMP_DISPATCH_INIT              __kmp_aux_dispatch_init_4 
-# define KMP_DISPATCH_FINI_CHUNK        __kmp_aux_dispatch_fini_chunk_4 
-# define KMP_DISPATCH_NEXT              __kmpc_dispatch_next_4 
-#else 
-# define KMP_DISPATCH_INIT              __kmp_aux_dispatch_init_8 
-# define KMP_DISPATCH_FINI_CHUNK        __kmp_aux_dispatch_fini_chunk_8 
-# define KMP_DISPATCH_NEXT              __kmpc_dispatch_next_8 
-#endif /* KMP_ARCH_X86 */ 
- 
-# define KMP_DISPATCH_INIT_ULL          __kmp_aux_dispatch_init_8u 
-# define KMP_DISPATCH_FINI_CHUNK_ULL    __kmp_aux_dispatch_fini_chunk_8u 
-# define KMP_DISPATCH_NEXT_ULL          __kmpc_dispatch_next_8u 
- 
- 
-// 
-// The parallel contruct 
-// 
- 
-#ifndef KMP_DEBUG 
-static 
-#endif /* KMP_DEBUG */ 
-void 
-__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), 
-  void *data) 
-{ 
-#if OMPT_SUPPORT 
-    kmp_info_t *thr; 
-    ompt_frame_t *ompt_frame; 
-    ompt_state_t enclosing_state; 
- 
-    if (ompt_enabled) { 
-        // get pointer to thread data structure 
-        thr = __kmp_threads[*gtid]; 
- 
-        // save enclosing task state; set current state for task 
-        enclosing_state = thr->th.ompt_thread_info.state; 
-        thr->th.ompt_thread_info.state = ompt_state_work_parallel; 
- 
-        // set task frame 
-        ompt_frame = __ompt_get_task_frame_internal(0); 
-        ompt_frame->exit_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-    task(data); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        // clear task frame 
-        ompt_frame->exit_runtime_frame = NULL; 
- 
-        // restore enclosing state 
-        thr->th.ompt_thread_info.state = enclosing_state; 
-    } 
-#endif 
-} 
- 
- 
-#ifndef KMP_DEBUG 
-static 
-#endif /* KMP_DEBUG */ 
-void 
-__kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, 
-  void (*task)(void *), void *data, unsigned num_threads, ident_t *loc, 
-  enum sched_type schedule, long start, long end, long incr, long chunk_size) 
-{ 
-    // 
-    // Intialize the loop worksharing construct. 
-    // 
-    KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, 
-      schedule != kmp_sch_static); 
- 
-#if OMPT_SUPPORT 
-    kmp_info_t *thr; 
-    ompt_frame_t *ompt_frame; 
-    ompt_state_t enclosing_state; 
- 
-    if (ompt_enabled) { 
-        thr = __kmp_threads[*gtid]; 
-        // save enclosing task state; set current state for task 
-        enclosing_state = thr->th.ompt_thread_info.state; 
-        thr->th.ompt_thread_info.state = ompt_state_work_parallel; 
- 
-        // set task frame 
-        ompt_frame = __ompt_get_task_frame_internal(0); 
-        ompt_frame->exit_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-    // 
-    // Now invoke the microtask. 
-    // 
-    task(data); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        // clear task frame 
-        ompt_frame->exit_runtime_frame = NULL; 
- 
-        // reset enclosing state 
-        thr->th.ompt_thread_info.state = enclosing_state; 
-    } 
-#endif 
-} 
- 
- 
-#ifndef KMP_DEBUG 
-static 
-#endif /* KMP_DEBUG */ 
-void 
-__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), microtask_t wrapper, int argc,...) 
-{ 
-    int rc; 
-    kmp_info_t *thr = __kmp_threads[gtid]; 
-    kmp_team_t *team = thr->th.th_team; 
-    int tid = __kmp_tid_from_gtid(gtid); 
- 
-    va_list ap; 
-    va_start(ap, argc); 
- 
-    rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, 
-#if OMPT_SUPPORT 
-      VOLATILE_CAST(void *) unwrapped_task, 
-#endif 
-      wrapper, __kmp_invoke_task_func, 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-      &ap 
-#else 
-      ap 
-#endif 
-      ); 
- 
-    va_end(ap); 
- 
-    if (rc) { 
-        __kmp_run_before_invoked_task(gtid, tid, thr, team); 
-    } 
- 
-#if OMPT_SUPPORT  
-    if (ompt_enabled) { 
-#if OMPT_TRACE 
-        ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 
-        ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 
- 
-        // implicit task callback 
-        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 
-                team_info->parallel_id, task_info->task_id); 
-        } 
-#endif 
-        thr->th.ompt_thread_info.state = ompt_state_work_parallel; 
-    } 
-#endif 
-} 
- 
-static void 
-__kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) 
-{ 
-    __kmp_serialized_parallel(loc, gtid); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        ompt_task_id_t ompt_task_id = __ompt_get_task_id_internal(0); 
-        ompt_frame_t  *ompt_frame = __ompt_get_task_frame_internal(0); 
-        kmp_info_t *thr = __kmp_threads[gtid]; 
- 
-        ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(gtid); 
-        ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); 
- 
-        ompt_frame->exit_runtime_frame = NULL; 
- 
-        // parallel region callback 
-        if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { 
-            int team_size = 1; 
-            ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( 
-                ompt_task_id, ompt_frame, ompt_parallel_id, 
-                team_size, (void *) task, 
-                OMPT_INVOKER(fork_context_gnu)); 
-        } 
- 
-        // set up lightweight task 
-        ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) 
-            __kmp_allocate(sizeof(ompt_lw_taskteam_t)); 
-        __ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id); 
-        lwt->ompt_task_info.task_id = my_ompt_task_id; 
-        lwt->ompt_task_info.frame.exit_runtime_frame = 0; 
-        __ompt_lw_taskteam_link(lwt, thr); 
- 
-#if OMPT_TRACE 
-        // implicit task callback 
-        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 
-                ompt_parallel_id, my_ompt_task_id); 
-        } 
-        thr->th.ompt_thread_info.state = ompt_state_work_parallel; 
-#endif 
-    } 
-#endif 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
- 
-#if OMPT_SUPPORT 
-    ompt_frame_t *parent_frame; 
- 
-    if (ompt_enabled) { 
-        parent_frame = __ompt_get_task_frame_internal(0); 
-        parent_frame->reenter_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-    MKLOC(loc, "GOMP_parallel_start"); 
-    KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); 
- 
-    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { 
-        if (num_threads != 0) { 
-            __kmp_push_num_threads(&loc, gtid, num_threads); 
-        } 
-        __kmp_GOMP_fork_call(&loc, gtid, task, 
-          (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); 
-    } 
-    else { 
-        __kmp_GOMP_serialized_parallel(&loc, gtid, task); 
-    } 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        parent_frame->reenter_runtime_frame = NULL; 
-    } 
-#endif 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    kmp_info_t *thr; 
- 
-    thr = __kmp_threads[gtid]; 
- 
-    MKLOC(loc, "GOMP_parallel_end"); 
-    KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); 
- 
- 
-#if OMPT_SUPPORT 
-    ompt_parallel_id_t parallel_id; 
-    ompt_frame_t *ompt_frame = NULL; 
- 
-    if (ompt_enabled) { 
-        ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 
-        parallel_id = team_info->parallel_id; 
- 
-        // Record that we re-entered the runtime system in the implicit 
-        // task frame representing the parallel region.  
-        ompt_frame = __ompt_get_task_frame_internal(0); 
-        ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); 
- 
-#if OMPT_TRACE 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 
-            ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 
-                parallel_id, task_info->task_id); 
-        } 
-#endif 
- 
-        // unlink if necessary. no-op if there is not a lightweight task. 
-        ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); 
-        // GOMP allocates/frees lwt since it can't be kept on the stack 
-        if (lwt) {  
-           __kmp_free(lwt); 
-      
-#if OMPT_SUPPORT 
-           if (ompt_enabled) { 
-              // Since a lightweight task was destroyed, make sure that the 
-              // remaining deepest task knows the stack frame where the runtime  
-              // was reentered. 
-              ompt_frame = __ompt_get_task_frame_internal(0); 
-              ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); 
-           } 
-#endif 
-        } 
-    } 
-#endif 
- 
-    if (! thr->th.th_team->t.t_serialized) { 
-        __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, 
-          thr->th.th_team); 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-          // Set reenter frame in parent task, which will become current task 
-          // in the midst of join. This is needed before the end_parallel callback. 
-          ompt_frame = __ompt_get_task_frame_internal(1); 
-          ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); 
-        } 
-#endif 
- 
-        __kmp_join_call(&loc, gtid 
-#if OMPT_SUPPORT 
-            , fork_context_gnu 
-#endif 
-        ); 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-          ompt_frame->reenter_runtime_frame = NULL; 
-        } 
-#endif 
-    } 
-    else { 
-        __kmpc_end_serialized_parallel(&loc, gtid); 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-            // Record that we re-entered the runtime system in the frame that  
-            // created the parallel region. 
-            ompt_frame->reenter_runtime_frame = __builtin_frame_address(0); 
- 
-            if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 
-                ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 
-                ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 
-                    parallel_id, task_info->task_id,  
-                    OMPT_INVOKER(fork_context_gnu)); 
-            } 
- 
-            ompt_frame->reenter_runtime_frame = NULL; 
- 
-            thr->th.ompt_thread_info.state = 
-                (((thr->th.th_team)->t.t_serialized) ? 
-                ompt_state_work_serial : ompt_state_work_parallel); 
-        } 
-#endif 
-    } 
-} 
- 
- 
-// 
-// Loop worksharing constructs 
-// 
- 
-// 
-// The Gnu codegen passes in an exclusive upper bound for the overall range, 
-// but the libguide dispatch code expects an inclusive upper bound, hence the 
-// "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th 
-// argument to __kmp_GOMP_fork_call). 
-// 
-// Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub, 
-// but the Gnu codegen expects an excluside upper bound, so the adjustment 
-// "*p_ub += stride" compenstates for the discrepancy. 
-// 
-// Correction: the gnu codegen always adjusts the upper bound by +-1, not the 
-// stride value.  We adjust the dispatch parameters accordingly (by +-1), but 
-// we still adjust p_ub by the actual stride value. 
-// 
-// The "runtime" versions do not take a chunk_sz parameter. 
-// 
-// The profile lib cannot support construct checking of unordered loops that 
-// are predetermined by the compiler to be statically scheduled, as the gcc 
-// codegen will not always emit calls to GOMP_loop_static_next() to get the 
-// next iteration.  Instead, it emits inline code to call omp_get_thread_num() 
-// num and calculate the iteration space using the result.  It doesn't do this 
-// with ordered static loop, so they can be checked. 
-// 
- 
-#define LOOP_START(func,schedule) \ 
-    int func (long lb, long ub, long str, long chunk_sz, long *p_lb,         \ 
-      long *p_ub)                                                            \ 
-    {                                                                        \ 
-        int status;                                                          \ 
-        long stride;                                                         \ 
-        int gtid = __kmp_entry_gtid();                                       \ 
-        MKLOC(loc, #func);                                                   \ 
-        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \ 
-          gtid, lb, ub, str, chunk_sz ));                                    \ 
-                                                                             \ 
-        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \ 
-            KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                    \ 
-              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,                \ 
-              (schedule) != kmp_sch_static);                                 \ 
-            status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \ 
-              (kmp_int *)p_ub, (kmp_int *)&stride);                          \ 
-            if (status) {                                                    \ 
-                KMP_DEBUG_ASSERT(stride == str);                             \ 
-                *p_ub += (str > 0) ? 1 : -1;                                 \ 
-            }                                                                \ 
-        }                                                                    \ 
-        else {                                                               \ 
-            status = 0;                                                      \ 
-        }                                                                    \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ 
-          gtid, *p_lb, *p_ub, status));                                      \ 
-        return status;                                                       \ 
-    } 
- 
- 
-#define LOOP_RUNTIME_START(func,schedule) \ 
-    int func (long lb, long ub, long str, long *p_lb, long *p_ub)            \ 
-    {                                                                        \ 
-        int status;                                                          \ 
-        long stride;                                                         \ 
-        long chunk_sz = 0;                                                   \ 
-        int gtid = __kmp_entry_gtid();                                       \ 
-        MKLOC(loc, #func);                                                   \ 
-        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n",  \ 
-          gtid, lb, ub, str, chunk_sz ));                                    \ 
-                                                                             \ 
-        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \ 
-            KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                    \ 
-              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE);         \ 
-            status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \ 
-              (kmp_int *)p_ub, (kmp_int *)&stride);                          \ 
-            if (status) {                                                    \ 
-                KMP_DEBUG_ASSERT(stride == str);                             \ 
-                *p_ub += (str > 0) ? 1 : -1;                                 \ 
-            }                                                                \ 
-        }                                                                    \ 
-        else {                                                               \ 
-            status = 0;                                                      \ 
-        }                                                                    \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ 
-          gtid, *p_lb, *p_ub, status));                                      \ 
-        return status;                                                       \ 
-    } 
- 
- 
-#define LOOP_NEXT(func,fini_code) \ 
-    int func(long *p_lb, long *p_ub)                                         \ 
-    {                                                                        \ 
-        int status;                                                          \ 
-        long stride;                                                         \ 
-        int gtid = __kmp_get_gtid();                                         \ 
-        MKLOC(loc, #func);                                                   \ 
-        KA_TRACE(20, ( #func ": T#%d\n", gtid));                             \ 
-                                                                             \ 
-        fini_code                                                            \ 
-        status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,        \ 
-          (kmp_int *)p_ub, (kmp_int *)&stride);                              \ 
-        if (status) {                                                        \ 
-            *p_ub += (stride > 0) ? 1 : -1;                                  \ 
-        }                                                                    \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, "  \ 
-          "returning %d\n", gtid, *p_lb, *p_ub, stride, status));            \ 
-        return status;                                                       \ 
-    } 
- 
- 
-LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) 
-LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) 
-LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) 
-LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) 
- 
-LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 
-LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 
-LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 
-LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime) 
-LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) 
- 
-    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 
- 
-    KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) 
-{ 
-    KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) 
-} 
- 
- 
-// 
-// Unsigned long long loop worksharing constructs 
-// 
-// These are new with gcc 4.4 
-// 
- 
-#define LOOP_START_ULL(func,schedule) \ 
-    int func (int up, unsigned long long lb, unsigned long long ub,          \ 
-      unsigned long long str, unsigned long long chunk_sz,                   \ 
-      unsigned long long *p_lb, unsigned long long *p_ub)                    \ 
-    {                                                                        \ 
-        int status;                                                          \ 
-        long long str2 = up ? ((long long)str) : -((long long)str);          \ 
-        long long stride;                                                    \ 
-        int gtid = __kmp_entry_gtid();                                       \ 
-        MKLOC(loc, #func);                                                   \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ 
-          gtid, up, lb, ub, str, chunk_sz ));                                \ 
-                                                                             \ 
-        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \ 
-            KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                \ 
-              (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,              \ 
-              (schedule) != kmp_sch_static);                                 \ 
-            status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL,                 \ 
-              (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 
-            if (status) {                                                    \ 
-                KMP_DEBUG_ASSERT(stride == str2);                            \ 
-                *p_ub += (str > 0) ? 1 : -1;                                 \ 
-            }                                                                \ 
-        }                                                                    \ 
-        else {                                                               \ 
-            status = 0;                                                      \ 
-        }                                                                    \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ 
-          gtid, *p_lb, *p_ub, status));                                      \ 
-        return status;                                                       \ 
-    } 
- 
- 
-#define LOOP_RUNTIME_START_ULL(func,schedule) \ 
-    int func (int up, unsigned long long lb, unsigned long long ub,          \ 
-      unsigned long long str, unsigned long long *p_lb,                      \ 
-      unsigned long long *p_ub)                                              \ 
-    {                                                                        \ 
-        int status;                                                          \ 
-        long long str2 = up ? ((long long)str) : -((long long)str);          \ 
-        unsigned long long stride;                                           \ 
-        unsigned long long chunk_sz = 0;                                     \ 
-        int gtid = __kmp_entry_gtid();                                       \ 
-        MKLOC(loc, #func);                                                   \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ 
-          gtid, up, lb, ub, str, chunk_sz ));                                \ 
-                                                                             \ 
-        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \ 
-            KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                \ 
-              (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, TRUE);       \ 
-            status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL,                 \ 
-              (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ 
-            if (status) {                                                    \ 
-                KMP_DEBUG_ASSERT((long long)stride == str2);                 \ 
-                *p_ub += (str > 0) ? 1 : -1;                                 \ 
-            }                                                                \ 
-        }                                                                    \ 
-        else {                                                               \ 
-            status = 0;                                                      \ 
-        }                                                                    \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ 
-          gtid, *p_lb, *p_ub, status));                                      \ 
-        return status;                                                       \ 
-    } 
- 
- 
-#define LOOP_NEXT_ULL(func,fini_code) \ 
-    int func(unsigned long long *p_lb, unsigned long long *p_ub)             \ 
-    {                                                                        \ 
-        int status;                                                          \ 
-        long long stride;                                                    \ 
-        int gtid = __kmp_get_gtid();                                         \ 
-        MKLOC(loc, #func);                                                   \ 
-        KA_TRACE(20, ( #func ": T#%d\n", gtid));                             \ 
-                                                                             \ 
-        fini_code                                                            \ 
-        status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ 
-          (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);                         \ 
-        if (status) {                                                        \ 
-            *p_ub += (stride > 0) ? 1 : -1;                                  \ 
-        }                                                                    \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ 
-          "returning %d\n", gtid, *p_lb, *p_ub, stride, status));            \ 
-        return status;                                                       \ 
-    } 
- 
- 
-LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) 
-LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) 
-LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) 
-LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) 
- 
-LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 
-LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 
-LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 
-LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime) 
-LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \ 
-    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) 
- 
- 
-// 
-// Combined parallel / loop worksharing constructs 
-// 
-// There are no ull versions (yet). 
-// 
- 
-#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ 
-    void func (void (*task) (void *), void *data, unsigned num_threads,      \ 
-      long lb, long ub, long str, long chunk_sz)                             \ 
-    {                                                                        \ 
-        int gtid = __kmp_entry_gtid();                                       \ 
-        MKLOC(loc, #func);                                                   \ 
-        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",        \ 
-          gtid, lb, ub, str, chunk_sz ));                                    \ 
-                                                                             \ 
-        ompt_pre();                                                          \ 
-                                                                             \ 
-        if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                 \ 
-            if (num_threads != 0) {                                          \ 
-                __kmp_push_num_threads(&loc, gtid, num_threads);             \ 
-            }                                                                \ 
-            __kmp_GOMP_fork_call(&loc, gtid, task,                           \ 
-              (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,         \ 
-              task, data, num_threads, &loc, (schedule), lb,                 \ 
-              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);               \ 
-        }                                                                    \ 
-        else {                                                               \ 
-            __kmp_GOMP_serialized_parallel(&loc, gtid, task);                \ 
-        }                                                                    \ 
-                                                                             \ 
-        KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                        \ 
-          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,                    \ 
-          (schedule) != kmp_sch_static);                                     \ 
-                                                                             \ 
-        ompt_post();                                                         \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d\n", gtid));                        \ 
-    } 
- 
- 
- 
-#if OMPT_SUPPORT 
- 
-#define OMPT_LOOP_PRE() \ 
-    ompt_frame_t *parent_frame; \ 
-    if (ompt_enabled) { \ 
-        parent_frame = __ompt_get_task_frame_internal(0); \ 
-        parent_frame->reenter_runtime_frame = __builtin_frame_address(0); \ 
-    } 
- 
- 
-#define OMPT_LOOP_POST() \ 
-    if (ompt_enabled) { \ 
-        parent_frame->reenter_runtime_frame = NULL; \ 
-    } 
- 
-#else 
- 
-#define OMPT_LOOP_PRE()  
- 
-#define OMPT_LOOP_POST() 
- 
-#endif 
- 
- 
-PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),  
-                    kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) 
-PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),  
-                    kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 
-PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),  
-                    kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) 
-PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),  
-                    kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) 
- 
- 
-// 
-// Tasking constructs 
-// 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *), 
-  long arg_size, long arg_align, int if_cond, unsigned gomp_flags) 
-{ 
-    MKLOC(loc, "GOMP_task"); 
-    int gtid = __kmp_entry_gtid(); 
-    kmp_int32 flags = 0; 
-    kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; 
- 
-    KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); 
- 
-    // The low-order bit is the "tied" flag 
-    if (gomp_flags & 1) { 
-        input_flags->tiedness = 1; 
-    } 
-    // The second low-order bit is the "final" flag 
-    if (gomp_flags & 2) { 
-        input_flags->final = 1; 
-    } 
-    input_flags->native = 1; 
-    // __kmp_task_alloc() sets up all other flags 
- 
-    if (! if_cond) { 
-        arg_size = 0; 
-    } 
- 
-    kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags, 
-      sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0, 
-      (kmp_routine_entry_t)func); 
- 
-    if (arg_size > 0) { 
-        if (arg_align > 0) { 
-            task->shareds = (void *)((((size_t)task->shareds) 
-              + arg_align - 1) / arg_align * arg_align); 
-        } 
-        //else error?? 
- 
-        if (copy_func) { 
-            (*copy_func)(task->shareds, data); 
-        } 
-        else { 
-            KMP_MEMCPY(task->shareds, data, arg_size); 
-        } 
-    } 
- 
-    if (if_cond) { 
-        __kmpc_omp_task(&loc, gtid, task); 
-    } 
-    else { 
-#if OMPT_SUPPORT 
-        ompt_thread_info_t oldInfo; 
-        kmp_info_t *thread; 
-        kmp_taskdata_t *taskdata; 
-        if (ompt_enabled) { 
-            // Store the threads states and restore them after the task 
-            thread = __kmp_threads[ gtid ]; 
-            taskdata = KMP_TASK_TO_TASKDATA(task); 
-            oldInfo = thread->th.ompt_thread_info; 
-            thread->th.ompt_thread_info.wait_id = 0; 
-            thread->th.ompt_thread_info.state = ompt_state_work_parallel; 
-            taskdata->ompt_task_info.frame.exit_runtime_frame = 
-                __builtin_frame_address(0); 
-        } 
-#endif 
- 
-        __kmpc_omp_task_begin_if0(&loc, gtid, task); 
-        func(data); 
-        __kmpc_omp_task_complete_if0(&loc, gtid, task); 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-            thread->th.ompt_thread_info = oldInfo; 
-            taskdata->ompt_task_info.frame.exit_runtime_frame = 0; 
-        } 
-#endif 
-    } 
- 
-    KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) 
-{ 
-    MKLOC(loc, "GOMP_taskwait"); 
-    int gtid = __kmp_entry_gtid(); 
- 
-    KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); 
- 
-    __kmpc_omp_taskwait(&loc, gtid); 
- 
-    KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); 
-} 
- 
- 
-// 
-// Sections worksharing constructs 
-// 
- 
-// 
-// For the sections construct, we initialize a dynamically scheduled loop 
-// worksharing construct with lb 1 and stride 1, and use the iteration #'s 
-// that its returns as sections ids. 
-// 
-// There are no special entry points for ordered sections, so we always use 
-// the dynamically scheduled workshare, even if the sections aren't ordered. 
-// 
- 
-unsigned 
-xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) 
-{ 
-    int status; 
-    kmp_int lb, ub, stride; 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_sections_start"); 
-    KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); 
- 
-    KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); 
- 
-    status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); 
-    if (status) { 
-        KMP_DEBUG_ASSERT(stride == 1); 
-        KMP_DEBUG_ASSERT(lb > 0); 
-        KMP_ASSERT(lb == ub); 
-    } 
-    else { 
-        lb = 0; 
-    } 
- 
-    KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, 
-      (unsigned)lb)); 
-    return (unsigned)lb; 
-} 
- 
- 
-unsigned 
-xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) 
-{ 
-    int status; 
-    kmp_int lb, ub, stride; 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_sections_next"); 
-    KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); 
- 
-    status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); 
-    if (status) { 
-        KMP_DEBUG_ASSERT(stride == 1); 
-        KMP_DEBUG_ASSERT(lb > 0); 
-        KMP_ASSERT(lb == ub); 
-    } 
-    else { 
-        lb = 0; 
-    } 
- 
-    KA_TRACE(20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, 
-      (unsigned)lb)); 
-    return (unsigned)lb; 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data, 
-  unsigned num_threads, unsigned count) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
- 
-#if OMPT_SUPPORT 
-    ompt_frame_t *parent_frame; 
- 
-    if (ompt_enabled) { 
-        parent_frame = __ompt_get_task_frame_internal(0); 
-        parent_frame->reenter_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-    MKLOC(loc, "GOMP_parallel_sections_start"); 
-    KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); 
- 
-    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { 
-        if (num_threads != 0) { 
-            __kmp_push_num_threads(&loc, gtid, num_threads); 
-        } 
-        __kmp_GOMP_fork_call(&loc, gtid, task, 
-          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, 
-          num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, 
-          (kmp_int)count, (kmp_int)1, (kmp_int)1); 
-    } 
-    else { 
-        __kmp_GOMP_serialized_parallel(&loc, gtid, task); 
-    } 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        parent_frame->reenter_runtime_frame = NULL; 
-    } 
-#endif 
- 
-    KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); 
- 
-    KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) 
- 
-    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 
- 
-    KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) 
-} 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) 
-{ 
-    KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) 
-} 
- 
-// libgomp has an empty function for GOMP_taskyield as of 2013-10-10 
-void 
-xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void) 
-{ 
-    KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) 
-    return; 
-} 
- 
-#if OMP_40_ENABLED // these are new GOMP_4.0 entry points 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, unsigned num_threads, unsigned int flags) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_parallel"); 
-    KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); 
- 
-    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { 
-        if (num_threads != 0) { 
-            __kmp_push_num_threads(&loc, gtid, num_threads); 
-        } 
-        if(flags != 0) { 
-            __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); 
-        } 
-        __kmp_GOMP_fork_call(&loc, gtid, task, 
-          (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); 
-    } 
-    else { 
-        __kmp_GOMP_serialized_parallel(&loc, gtid, task); 
-    } 
-    task(data); 
-    xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); 
-} 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data, 
-  unsigned num_threads, unsigned count, unsigned flags) 
-{ 
-    int gtid = __kmp_entry_gtid(); 
-    MKLOC(loc, "GOMP_parallel_sections"); 
-    KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); 
- 
-    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { 
-        if (num_threads != 0) { 
-            __kmp_push_num_threads(&loc, gtid, num_threads); 
-        } 
-        if(flags != 0) { 
-            __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); 
-        } 
-        __kmp_GOMP_fork_call(&loc, gtid, task, 
-          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, 
-          num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, 
-          (kmp_int)count, (kmp_int)1, (kmp_int)1); 
-    } 
-    else { 
-        __kmp_GOMP_serialized_parallel(&loc, gtid, task); 
-    } 
- 
-    KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); 
- 
-    task(data); 
-    xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); 
-    KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); 
-} 
- 
-#define PARALLEL_LOOP(func, schedule) \ 
-    void func (void (*task) (void *), void *data, unsigned num_threads,      \ 
-      long lb, long ub, long str, long chunk_sz, unsigned flags)             \ 
-    {                                                                        \ 
-        int gtid = __kmp_entry_gtid();                                       \ 
-        MKLOC(loc, #func);                                                   \ 
-        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",        \ 
-          gtid, lb, ub, str, chunk_sz ));                                    \ 
-                                                                             \ 
-        if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                 \ 
-            if (num_threads != 0) {                                          \ 
-                __kmp_push_num_threads(&loc, gtid, num_threads);             \ 
-            }                                                                \ 
-            if (flags != 0) {                                                \ 
-                __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);    \ 
-            }                                                                \ 
-            __kmp_GOMP_fork_call(&loc, gtid, task,                           \ 
-              (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,         \ 
-              task, data, num_threads, &loc, (schedule), lb,                 \ 
-              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);               \ 
-        }                                                                    \ 
-        else {                                                               \ 
-            __kmp_GOMP_serialized_parallel(&loc, gtid, task);                \ 
-        }                                                                    \ 
-                                                                             \ 
-        KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                        \ 
-          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,                    \ 
-          (schedule) != kmp_sch_static);                                     \ 
-        task(data);                                                          \ 
-        xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();                           \ 
-                                                                             \ 
-        KA_TRACE(20, ( #func " exit: T#%d\n", gtid));                        \ 
-    } 
- 
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static) 
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked) 
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked) 
-PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime) 
- 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_taskgroup_start"); 
-    KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); 
- 
-    __kmpc_taskgroup(&loc, gtid); 
- 
-    return; 
-} 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) 
-{ 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_taskgroup_end"); 
-    KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); 
- 
-    __kmpc_end_taskgroup(&loc, gtid); 
- 
-    return; 
-} 
- 
-#ifndef KMP_DEBUG 
-static 
-#endif /* KMP_DEBUG */ 
-kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { 
-    kmp_int32 cncl_kind = 0; 
-    switch(gomp_kind) { 
-      case 1: 
-        cncl_kind = cancel_parallel; 
-        break; 
-      case 2: 
-        cncl_kind = cancel_loop; 
-        break; 
-      case 4: 
-        cncl_kind = cancel_sections; 
-        break; 
-      case 8: 
-        cncl_kind = cancel_taskgroup; 
-        break; 
-    } 
-    return cncl_kind; 
-} 
- 
-bool 
-xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) 
-{ 
-    if(__kmp_omp_cancellation) { 
-        KMP_FATAL(NoGompCancellation); 
-    } 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_cancellation_point"); 
-    KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid)); 
- 
-    kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); 
- 
-    return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); 
-} 
- 
-bool 
-xexpand(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) 
-{ 
-    if(__kmp_omp_cancellation) { 
-        KMP_FATAL(NoGompCancellation); 
-    } 
-    KMP_FATAL(NoGompCancellation); 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_barrier_cancel"); 
-    KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); 
- 
-    return __kmpc_cancel_barrier(&loc, gtid); 
-} 
- 
-bool 
-xexpand(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) 
-{ 
-    if(__kmp_omp_cancellation) { 
-        KMP_FATAL(NoGompCancellation); 
-    } else { 
-        return FALSE; 
-    } 
- 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_cancel"); 
-    KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid)); 
- 
-    kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); 
- 
-    if(do_cancel == FALSE) { 
-        return xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which); 
-    } else { 
-        return __kmpc_cancel(&loc, gtid, cncl_kind); 
-    } 
-} 
- 
-bool 
-xexpand(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) 
-{ 
-    if(__kmp_omp_cancellation) { 
-        KMP_FATAL(NoGompCancellation); 
-    } 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_sections_end_cancel"); 
-    KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); 
- 
-    return __kmpc_cancel_barrier(&loc, gtid); 
-} 
- 
-bool 
-xexpand(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) 
-{ 
-    if(__kmp_omp_cancellation) { 
-        KMP_FATAL(NoGompCancellation); 
-    } 
-    int gtid = __kmp_get_gtid(); 
-    MKLOC(loc, "GOMP_loop_end_cancel"); 
-    KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); 
- 
-    return __kmpc_cancel_barrier(&loc, gtid); 
-} 
- 
-// All target functions are empty as of 2014-05-29 
-void 
-xexpand(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn) (void *), const void *openmp_target, 
-             size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) 
-{ 
-    return; 
-} 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TARGET_DATA)(int device, const void *openmp_target, size_t mapnum, 
-                  void **hostaddrs, size_t *sizes, unsigned char *kinds) 
-{ 
-    return; 
-} 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) 
-{ 
-    return; 
-} 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TARGET_UPDATE)(int device, const void *openmp_target, size_t mapnum, 
-                    void **hostaddrs, size_t *sizes, unsigned char *kinds) 
-{ 
-    return; 
-} 
- 
-void 
-xexpand(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, unsigned int thread_limit) 
-{ 
-    return; 
-} 
-#endif // OMP_40_ENABLED 
- 
- 
-/* 
-    The following sections of code create aliases for the GOMP_* functions, 
-    then create versioned symbols using the assembler directive .symver. 
-    This is only pertinent for ELF .so library 
-    xaliasify and xversionify are defined in kmp_ftn_os.h 
-*/ 
- 
-#ifdef KMP_USE_VERSION_SYMBOLS 
- 
-// GOMP_1.0 aliases 
-xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_BARRIER, 10); 
-xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10); 
-xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10); 
-xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10); 
-xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10); 
-xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10); 
- 
-// GOMP_2.0 aliases 
-xaliasify(KMP_API_NAME_GOMP_TASK, 20); 
-xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20); 
- 
-// GOMP_3.0 aliases 
-xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30); 
- 
-// GOMP_4.0 aliases 
-// The GOMP_parallel* entry points below aren't OpenMP 4.0 related. 
-#if OMP_40_ENABLED 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL, 40); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40); 
-xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40); 
-xaliasify(KMP_API_NAME_GOMP_TASKGROUP_START, 40); 
-xaliasify(KMP_API_NAME_GOMP_TASKGROUP_END, 40); 
-xaliasify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40); 
-xaliasify(KMP_API_NAME_GOMP_CANCEL, 40); 
-xaliasify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40); 
-xaliasify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40); 
-xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40); 
-xaliasify(KMP_API_NAME_GOMP_TARGET, 40); 
-xaliasify(KMP_API_NAME_GOMP_TARGET_DATA, 40); 
-xaliasify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40); 
-xaliasify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40); 
-xaliasify(KMP_API_NAME_GOMP_TEAMS, 40); 
-#endif 
- 
-// GOMP_1.0 versioned symbols 
-xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0"); 
-xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0"); 
- 
-// GOMP_2.0 versioned symbols 
-xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0"); 
- 
-// GOMP_3.0 versioned symbols 
-xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0"); 
- 
-// GOMP_4.0 versioned symbols 
-#if OMP_40_ENABLED 
-xversionify(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0"); 
-xversionify(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0"); 
-#endif 
- 
-#endif // KMP_USE_VERSION_SYMBOLS 
- 
-#ifdef __cplusplus 
-    } //extern "C" 
-#endif // __cplusplus 
- 
- 
+    if (ompt_enabled) {
+#if OMPT_TRACE
+        ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+        ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
+
+        // implicit task callback
+        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
+                team_info->parallel_id, task_info->task_id);
+        }
+#endif
+        thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    }
+#endif
+}
+
+static void
+__kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *))
+{
+    __kmp_serialized_parallel(loc, gtid);
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        ompt_task_id_t ompt_task_id = __ompt_get_task_id_internal(0);
+        ompt_frame_t  *ompt_frame = __ompt_get_task_frame_internal(0);
+        kmp_info_t *thr = __kmp_threads[gtid];
+
+        ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(gtid);
+        ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid);
+
+        ompt_frame->exit_runtime_frame = NULL;
+
+        // parallel region callback
+        if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
+            int team_size = 1;
+            ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
+                ompt_task_id, ompt_frame, ompt_parallel_id,
+                team_size, (void *) task,
+                OMPT_INVOKER(fork_context_gnu));
+        }
+
+        // set up lightweight task
+        ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
+            __kmp_allocate(sizeof(ompt_lw_taskteam_t));
+        __ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id);
+        lwt->ompt_task_info.task_id = my_ompt_task_id;
+        lwt->ompt_task_info.frame.exit_runtime_frame = 0;
+        __ompt_lw_taskteam_link(lwt, thr);
+
+#if OMPT_TRACE
+        // implicit task callback
+        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
+                ompt_parallel_id, my_ompt_task_id);
+        }
+        thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+#endif
+    }
+#endif
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads)
+{
+    int gtid = __kmp_entry_gtid();
+
+#if OMPT_SUPPORT
+    ompt_frame_t *parent_frame;
+
+    if (ompt_enabled) {
+        parent_frame = __ompt_get_task_frame_internal(0);
+        parent_frame->reenter_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+    MKLOC(loc, "GOMP_parallel_start");
+    KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
+
+    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
+        if (num_threads != 0) {
+            __kmp_push_num_threads(&loc, gtid, num_threads);
+        }
+        __kmp_GOMP_fork_call(&loc, gtid, task,
+          (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data);
+    }
+    else {
+        __kmp_GOMP_serialized_parallel(&loc, gtid, task);
+    }
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        parent_frame->reenter_runtime_frame = NULL;
+    }
+#endif
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    kmp_info_t *thr;
+
+    thr = __kmp_threads[gtid];
+
+    MKLOC(loc, "GOMP_parallel_end");
+    KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
+
+
+#if OMPT_SUPPORT
+    ompt_parallel_id_t parallel_id;
+    ompt_frame_t *ompt_frame = NULL;
+
+    if (ompt_enabled) {
+        ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+        parallel_id = team_info->parallel_id;
+
+        // Record that we re-entered the runtime system in the implicit
+        // task frame representing the parallel region. 
+        ompt_frame = __ompt_get_task_frame_internal(0);
+        ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
+
+#if OMPT_TRACE
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
+            ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
+            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
+                parallel_id, task_info->task_id);
+        }
+#endif
+
+        // unlink if necessary. no-op if there is not a lightweight task.
+        ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr);
+        // GOMP allocates/frees lwt since it can't be kept on the stack
+        if (lwt) { 
+           __kmp_free(lwt);
+     
+#if OMPT_SUPPORT
+           if (ompt_enabled) {
+              // Since a lightweight task was destroyed, make sure that the
+              // remaining deepest task knows the stack frame where the runtime 
+              // was reentered.
+              ompt_frame = __ompt_get_task_frame_internal(0);
+              ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
+           }
+#endif
+        }
+    }
+#endif
+
+    if (! thr->th.th_team->t.t_serialized) {
+        __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
+          thr->th.th_team);
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+          // Set reenter frame in parent task, which will become current task
+          // in the midst of join. This is needed before the end_parallel callback.
+          ompt_frame = __ompt_get_task_frame_internal(1);
+          ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
+        }
+#endif
+
+        __kmp_join_call(&loc, gtid
+#if OMPT_SUPPORT
+            , fork_context_gnu
+#endif
+        );
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+          ompt_frame->reenter_runtime_frame = NULL;
+        }
+#endif
+    }
+    else {
+        __kmpc_end_serialized_parallel(&loc, gtid);
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+            // Record that we re-entered the runtime system in the frame that 
+            // created the parallel region.
+            ompt_frame->reenter_runtime_frame = __builtin_frame_address(0);
+
+            if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
+                ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
+                ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
+                    parallel_id, task_info->task_id, 
+                    OMPT_INVOKER(fork_context_gnu));
+            }
+
+            ompt_frame->reenter_runtime_frame = NULL;
+
+            thr->th.ompt_thread_info.state =
+                (((thr->th.th_team)->t.t_serialized) ?
+                ompt_state_work_serial : ompt_state_work_parallel);
+        }
+#endif
+    }
+}
+
+
+//
+// Loop worksharing constructs
+//
+
+//
+// The Gnu codegen passes in an exclusive upper bound for the overall range,
+// but the libguide dispatch code expects an inclusive upper bound, hence the
+// "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th
+// argument to __kmp_GOMP_fork_call).
+//
+// Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,
+// but the Gnu codegen expects an excluside upper bound, so the adjustment
+// "*p_ub += stride" compenstates for the discrepancy.
+//
+// Correction: the gnu codegen always adjusts the upper bound by +-1, not the
+// stride value.  We adjust the dispatch parameters accordingly (by +-1), but
+// we still adjust p_ub by the actual stride value.
+//
+// The "runtime" versions do not take a chunk_sz parameter.
+//
+// The profile lib cannot support construct checking of unordered loops that
+// are predetermined by the compiler to be statically scheduled, as the gcc
+// codegen will not always emit calls to GOMP_loop_static_next() to get the
+// next iteration.  Instead, it emits inline code to call omp_get_thread_num()
+// num and calculate the iteration space using the result.  It doesn't do this
+// with ordered static loop, so they can be checked.
+//
+
+#define LOOP_START(func,schedule) \
+    int func (long lb, long ub, long str, long chunk_sz, long *p_lb,         \
+      long *p_ub)                                                            \
+    {                                                                        \
+        int status;                                                          \
+        long stride;                                                         \
+        int gtid = __kmp_entry_gtid();                                       \
+        MKLOC(loc, #func);                                                   \
+        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
+          gtid, lb, ub, str, chunk_sz ));                                    \
+                                                                             \
+        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \
+            KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                    \
+              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,                \
+              (schedule) != kmp_sch_static);                                 \
+            status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \
+              (kmp_int *)p_ub, (kmp_int *)&stride);                          \
+            if (status) {                                                    \
+                KMP_DEBUG_ASSERT(stride == str);                             \
+                *p_ub += (str > 0) ? 1 : -1;                                 \
+            }                                                                \
+        }                                                                    \
+        else {                                                               \
+            status = 0;                                                      \
+        }                                                                    \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
+          gtid, *p_lb, *p_ub, status));                                      \
+        return status;                                                       \
+    }
+
+
+#define LOOP_RUNTIME_START(func,schedule) \
+    int func (long lb, long ub, long str, long *p_lb, long *p_ub)            \
+    {                                                                        \
+        int status;                                                          \
+        long stride;                                                         \
+        long chunk_sz = 0;                                                   \
+        int gtid = __kmp_entry_gtid();                                       \
+        MKLOC(loc, #func);                                                   \
+        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n",  \
+          gtid, lb, ub, str, chunk_sz ));                                    \
+                                                                             \
+        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \
+            KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                    \
+              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE);         \
+            status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \
+              (kmp_int *)p_ub, (kmp_int *)&stride);                          \
+            if (status) {                                                    \
+                KMP_DEBUG_ASSERT(stride == str);                             \
+                *p_ub += (str > 0) ? 1 : -1;                                 \
+            }                                                                \
+        }                                                                    \
+        else {                                                               \
+            status = 0;                                                      \
+        }                                                                    \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \
+          gtid, *p_lb, *p_ub, status));                                      \
+        return status;                                                       \
+    }
+
+
+#define LOOP_NEXT(func,fini_code) \
+    int func(long *p_lb, long *p_ub)                                         \
+    {                                                                        \
+        int status;                                                          \
+        long stride;                                                         \
+        int gtid = __kmp_get_gtid();                                         \
+        MKLOC(loc, #func);                                                   \
+        KA_TRACE(20, ( #func ": T#%d\n", gtid));                             \
+                                                                             \
+        fini_code                                                            \
+        status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,        \
+          (kmp_int *)p_ub, (kmp_int *)&stride);                              \
+        if (status) {                                                        \
+            *p_ub += (stride > 0) ? 1 : -1;                                  \
+        }                                                                    \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, "  \
+          "returning %d\n", gtid, *p_lb, *p_ub, stride, status));            \
+        return status;                                                       \
+    }
+
+
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
+LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
+
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
+LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
+
+
+void
+xexpand(KMP_API_NAME_GOMP_LOOP_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
+
+    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+
+    KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void)
+{
+    KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
+}
+
+
+//
+// Unsigned long long loop worksharing constructs
+//
+// These are new with gcc 4.4
+//
+
+#define LOOP_START_ULL(func,schedule) \
+    int func (int up, unsigned long long lb, unsigned long long ub,          \
+      unsigned long long str, unsigned long long chunk_sz,                   \
+      unsigned long long *p_lb, unsigned long long *p_ub)                    \
+    {                                                                        \
+        int status;                                                          \
+        long long str2 = up ? ((long long)str) : -((long long)str);          \
+        long long stride;                                                    \
+        int gtid = __kmp_entry_gtid();                                       \
+        MKLOC(loc, #func);                                                   \
+                                                                             \
+        KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \
+          gtid, up, lb, ub, str, chunk_sz ));                                \
+                                                                             \
+        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \
+            KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                \
+              (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,              \
+              (schedule) != kmp_sch_static);                                 \
+            status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL,                 \
+              (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
+            if (status) {                                                    \
+                KMP_DEBUG_ASSERT(stride == str2);                            \
+                *p_ub += (str > 0) ? 1 : -1;                                 \
+            }                                                                \
+        }                                                                    \
+        else {                                                               \
+            status = 0;                                                      \
+        }                                                                    \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
+          gtid, *p_lb, *p_ub, status));                                      \
+        return status;                                                       \
+    }
+
+
+#define LOOP_RUNTIME_START_ULL(func,schedule) \
+    int func (int up, unsigned long long lb, unsigned long long ub,          \
+      unsigned long long str, unsigned long long *p_lb,                      \
+      unsigned long long *p_ub)                                              \
+    {                                                                        \
+        int status;                                                          \
+        long long str2 = up ? ((long long)str) : -((long long)str);          \
+        unsigned long long stride;                                           \
+        unsigned long long chunk_sz = 0;                                     \
+        int gtid = __kmp_entry_gtid();                                       \
+        MKLOC(loc, #func);                                                   \
+                                                                             \
+        KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \
+          gtid, up, lb, ub, str, chunk_sz ));                                \
+                                                                             \
+        if ((str > 0) ? (lb < ub) : (lb > ub)) {                             \
+            KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                \
+              (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, TRUE);       \
+            status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL,                 \
+              (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \
+            if (status) {                                                    \
+                KMP_DEBUG_ASSERT((long long)stride == str2);                 \
+                *p_ub += (str > 0) ? 1 : -1;                                 \
+            }                                                                \
+        }                                                                    \
+        else {                                                               \
+            status = 0;                                                      \
+        }                                                                    \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \
+          gtid, *p_lb, *p_ub, status));                                      \
+        return status;                                                       \
+    }
+
+
+#define LOOP_NEXT_ULL(func,fini_code) \
+    int func(unsigned long long *p_lb, unsigned long long *p_ub)             \
+    {                                                                        \
+        int status;                                                          \
+        long long stride;                                                    \
+        int gtid = __kmp_get_gtid();                                         \
+        MKLOC(loc, #func);                                                   \
+        KA_TRACE(20, ( #func ": T#%d\n", gtid));                             \
+                                                                             \
+        fini_code                                                            \
+        status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \
+          (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);                         \
+        if (status) {                                                        \
+            *p_ub += (stride > 0) ? 1 : -1;                                  \
+        }                                                                    \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \
+          "returning %d\n", gtid, *p_lb, *p_ub, stride, status));            \
+        return status;                                                       \
+    }
+
+
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
+LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
+
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
+LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
+    { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
+
+
+//
+// Combined parallel / loop worksharing constructs
+//
+// There are no ull versions (yet).
+//
+
+#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \
+    void func (void (*task) (void *), void *data, unsigned num_threads,      \
+      long lb, long ub, long str, long chunk_sz)                             \
+    {                                                                        \
+        int gtid = __kmp_entry_gtid();                                       \
+        MKLOC(loc, #func);                                                   \
+        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",        \
+          gtid, lb, ub, str, chunk_sz ));                                    \
+                                                                             \
+        ompt_pre();                                                          \
+                                                                             \
+        if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                 \
+            if (num_threads != 0) {                                          \
+                __kmp_push_num_threads(&loc, gtid, num_threads);             \
+            }                                                                \
+            __kmp_GOMP_fork_call(&loc, gtid, task,                           \
+              (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,         \
+              task, data, num_threads, &loc, (schedule), lb,                 \
+              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);               \
+        }                                                                    \
+        else {                                                               \
+            __kmp_GOMP_serialized_parallel(&loc, gtid, task);                \
+        }                                                                    \
+                                                                             \
+        KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                        \
+          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,                    \
+          (schedule) != kmp_sch_static);                                     \
+                                                                             \
+        ompt_post();                                                         \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d\n", gtid));                        \
+    }
+
+
+
+#if OMPT_SUPPORT
+
+#define OMPT_LOOP_PRE() \
+    ompt_frame_t *parent_frame; \
+    if (ompt_enabled) { \
+        parent_frame = __ompt_get_task_frame_internal(0); \
+        parent_frame->reenter_runtime_frame = __builtin_frame_address(0); \
+    }
+
+
+#define OMPT_LOOP_POST() \
+    if (ompt_enabled) { \
+        parent_frame->reenter_runtime_frame = NULL; \
+    }
+
+#else
+
+#define OMPT_LOOP_PRE() 
+
+#define OMPT_LOOP_POST()
+
+#endif
+
+
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), 
+                    kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), 
+                    kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), 
+                    kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), 
+                    kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+
+
+//
+// Tasking constructs
+//
+
+void
+xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *),
+  long arg_size, long arg_align, int if_cond, unsigned gomp_flags)
+{
+    MKLOC(loc, "GOMP_task");
+    int gtid = __kmp_entry_gtid();
+    kmp_int32 flags = 0;
+    kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
+
+    KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));
+
+    // The low-order bit is the "tied" flag
+    if (gomp_flags & 1) {
+        input_flags->tiedness = 1;
+    }
+    // The second low-order bit is the "final" flag
+    if (gomp_flags & 2) {
+        input_flags->final = 1;
+    }
+    input_flags->native = 1;
+    // __kmp_task_alloc() sets up all other flags
+
+    if (! if_cond) {
+        arg_size = 0;
+    }
+
+    kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags,
+      sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0,
+      (kmp_routine_entry_t)func);
+
+    if (arg_size > 0) {
+        if (arg_align > 0) {
+            task->shareds = (void *)((((size_t)task->shareds)
+              + arg_align - 1) / arg_align * arg_align);
+        }
+        //else error??
+
+        if (copy_func) {
+            (*copy_func)(task->shareds, data);
+        }
+        else {
+            KMP_MEMCPY(task->shareds, data, arg_size);
+        }
+    }
+
+    if (if_cond) {
+        __kmpc_omp_task(&loc, gtid, task);
+    }
+    else {
+#if OMPT_SUPPORT
+        ompt_thread_info_t oldInfo;
+        kmp_info_t *thread;
+        kmp_taskdata_t *taskdata;
+        if (ompt_enabled) {
+            // Store the threads states and restore them after the task
+            thread = __kmp_threads[ gtid ];
+            taskdata = KMP_TASK_TO_TASKDATA(task);
+            oldInfo = thread->th.ompt_thread_info;
+            thread->th.ompt_thread_info.wait_id = 0;
+            thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+            taskdata->ompt_task_info.frame.exit_runtime_frame =
+                __builtin_frame_address(0);
+        }
+#endif
+
+        __kmpc_omp_task_begin_if0(&loc, gtid, task);
+        func(data);
+        __kmpc_omp_task_complete_if0(&loc, gtid, task);
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+            thread->th.ompt_thread_info = oldInfo;
+            taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
+        }
+#endif
+    }
+
+    KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void)
+{
+    MKLOC(loc, "GOMP_taskwait");
+    int gtid = __kmp_entry_gtid();
+
+    KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
+
+    __kmpc_omp_taskwait(&loc, gtid);
+
+    KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));
+}
+
+
+//
+// Sections worksharing constructs
+//
+
+//
+// For the sections construct, we initialize a dynamically scheduled loop
+// worksharing construct with lb 1 and stride 1, and use the iteration #'s
+// that its returns as sections ids.
+//
+// There are no special entry points for ordered sections, so we always use
+// the dynamically scheduled workshare, even if the sections aren't ordered.
+//
+
+unsigned
+xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count)
+{
+    int status;
+    kmp_int lb, ub, stride;
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_sections_start");
+    KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));
+
+    KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
+
+    status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
+    if (status) {
+        KMP_DEBUG_ASSERT(stride == 1);
+        KMP_DEBUG_ASSERT(lb > 0);
+        KMP_ASSERT(lb == ub);
+    }
+    else {
+        lb = 0;
+    }
+
+    KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,
+      (unsigned)lb));
+    return (unsigned)lb;
+}
+
+
+unsigned
+xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void)
+{
+    int status;
+    kmp_int lb, ub, stride;
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_sections_next");
+    KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));
+
+    status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
+    if (status) {
+        KMP_DEBUG_ASSERT(stride == 1);
+        KMP_DEBUG_ASSERT(lb > 0);
+        KMP_ASSERT(lb == ub);
+    }
+    else {
+        lb = 0;
+    }
+
+    KA_TRACE(20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid,
+      (unsigned)lb));
+    return (unsigned)lb;
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data,
+  unsigned num_threads, unsigned count)
+{
+    int gtid = __kmp_entry_gtid();
+
+#if OMPT_SUPPORT
+    ompt_frame_t *parent_frame;
+
+    if (ompt_enabled) {
+        parent_frame = __ompt_get_task_frame_internal(0);
+        parent_frame->reenter_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+    MKLOC(loc, "GOMP_parallel_sections_start");
+    KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
+
+    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
+        if (num_threads != 0) {
+            __kmp_push_num_threads(&loc, gtid, num_threads);
+        }
+        __kmp_GOMP_fork_call(&loc, gtid, task,
+          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data,
+          num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1,
+          (kmp_int)count, (kmp_int)1, (kmp_int)1);
+    }
+    else {
+        __kmp_GOMP_serialized_parallel(&loc, gtid, task);
+    }
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        parent_frame->reenter_runtime_frame = NULL;
+    }
+#endif
+
+    KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
+
+    KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
+
+    __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+
+    KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
+}
+
+
+void
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void)
+{
+    KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
+}
+
+// libgomp has an empty function for GOMP_taskyield as of 2013-10-10
+void
+xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void)
+{
+    KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid()))
+    return;
+}
+
+#if OMP_40_ENABLED // these are new GOMP_4.0 entry points
+
+void
+xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, unsigned num_threads, unsigned int flags)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_parallel");
+    KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));
+
+    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
+        if (num_threads != 0) {
+            __kmp_push_num_threads(&loc, gtid, num_threads);
+        }
+        if(flags != 0) {
+            __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
+        }
+        __kmp_GOMP_fork_call(&loc, gtid, task,
+          (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data);
+    }
+    else {
+        __kmp_GOMP_serialized_parallel(&loc, gtid, task);
+    }
+    task(data);
+    xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();
+}
+
+void
+xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data,
+  unsigned num_threads, unsigned count, unsigned flags)
+{
+    int gtid = __kmp_entry_gtid();
+    MKLOC(loc, "GOMP_parallel_sections");
+    KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
+
+    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
+        if (num_threads != 0) {
+            __kmp_push_num_threads(&loc, gtid, num_threads);
+        }
+        if(flags != 0) {
+            __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
+        }
+        __kmp_GOMP_fork_call(&loc, gtid, task,
+          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data,
+          num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1,
+          (kmp_int)count, (kmp_int)1, (kmp_int)1);
+    }
+    else {
+        __kmp_GOMP_serialized_parallel(&loc, gtid, task);
+    }
+
+    KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
+
+    task(data);
+    xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();
+    KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
+}
+
+#define PARALLEL_LOOP(func, schedule) \
+    void func (void (*task) (void *), void *data, unsigned num_threads,      \
+      long lb, long ub, long str, long chunk_sz, unsigned flags)             \
+    {                                                                        \
+        int gtid = __kmp_entry_gtid();                                       \
+        MKLOC(loc, #func);                                                   \
+        KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",        \
+          gtid, lb, ub, str, chunk_sz ));                                    \
+                                                                             \
+        if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                 \
+            if (num_threads != 0) {                                          \
+                __kmp_push_num_threads(&loc, gtid, num_threads);             \
+            }                                                                \
+            if (flags != 0) {                                                \
+                __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);    \
+            }                                                                \
+            __kmp_GOMP_fork_call(&loc, gtid, task,                           \
+              (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,         \
+              task, data, num_threads, &loc, (schedule), lb,                 \
+              (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);               \
+        }                                                                    \
+        else {                                                               \
+            __kmp_GOMP_serialized_parallel(&loc, gtid, task);                \
+        }                                                                    \
+                                                                             \
+        KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                        \
+          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,                    \
+          (schedule) != kmp_sch_static);                                     \
+        task(data);                                                          \
+        xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();                           \
+                                                                             \
+        KA_TRACE(20, ( #func " exit: T#%d\n", gtid));                        \
+    }
+
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked)
+PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime)
+
+
+void
+xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void)
+{
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_taskgroup_start");
+    KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
+
+    __kmpc_taskgroup(&loc, gtid);
+
+    return;
+}
+
+void
+xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void)
+{
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_taskgroup_end");
+    KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
+
+    __kmpc_end_taskgroup(&loc, gtid);
+
+    return;
+}
+
+#ifndef KMP_DEBUG
+static
+#endif /* KMP_DEBUG */
+kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) {
+    kmp_int32 cncl_kind = 0;
+    switch(gomp_kind) {
+      case 1:
+        cncl_kind = cancel_parallel;
+        break;
+      case 2:
+        cncl_kind = cancel_loop;
+        break;
+      case 4:
+        cncl_kind = cancel_sections;
+        break;
+      case 8:
+        cncl_kind = cancel_taskgroup;
+        break;
+    }
+    return cncl_kind;
+}
+
+bool
+xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which)
+{
+    if(__kmp_omp_cancellation) {
+        KMP_FATAL(NoGompCancellation);
+    }
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_cancellation_point");
+    KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid));
+
+    kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
+
+    return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
+}
+
+bool
+xexpand(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void)
+{
+    if(__kmp_omp_cancellation) {
+        KMP_FATAL(NoGompCancellation);
+    }
+    KMP_FATAL(NoGompCancellation);
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_barrier_cancel");
+    KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid));
+
+    return __kmpc_cancel_barrier(&loc, gtid);
+}
+
+bool
+xexpand(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel)
+{
+    if(__kmp_omp_cancellation) {
+        KMP_FATAL(NoGompCancellation);
+    } else {
+        return FALSE;
+    }
+
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_cancel");
+    KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid));
+
+    kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
+
+    if(do_cancel == FALSE) {
+        return xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which);
+    } else {
+        return __kmpc_cancel(&loc, gtid, cncl_kind);
+    }
+}
+
+bool
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void)
+{
+    if(__kmp_omp_cancellation) {
+        KMP_FATAL(NoGompCancellation);
+    }
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_sections_end_cancel");
+    KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid));
+
+    return __kmpc_cancel_barrier(&loc, gtid);
+}
+
+bool
+xexpand(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void)
+{
+    if(__kmp_omp_cancellation) {
+        KMP_FATAL(NoGompCancellation);
+    }
+    int gtid = __kmp_get_gtid();
+    MKLOC(loc, "GOMP_loop_end_cancel");
+    KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid));
+
+    return __kmpc_cancel_barrier(&loc, gtid);
+}
+
+// All target functions are empty as of 2014-05-29
+void
+xexpand(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn) (void *), const void *openmp_target,
+             size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+    return;
+}
+
+void
+xexpand(KMP_API_NAME_GOMP_TARGET_DATA)(int device, const void *openmp_target, size_t mapnum,
+                  void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+    return;
+}
+
+void
+xexpand(KMP_API_NAME_GOMP_TARGET_END_DATA)(void)
+{
+    return;
+}
+
+void
+xexpand(KMP_API_NAME_GOMP_TARGET_UPDATE)(int device, const void *openmp_target, size_t mapnum,
+                    void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+    return;
+}
+
+void
+xexpand(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, unsigned int thread_limit)
+{
+    return;
+}
+#endif // OMP_40_ENABLED
+
+
+/*
+    The following sections of code create aliases for the GOMP_* functions,
+    then create versioned symbols using the assembler directive .symver.
+    This is only pertinent for ELF .so library
+    xaliasify and xversionify are defined in kmp_ftn_os.h
+*/
+
+#ifdef KMP_USE_VERSION_SYMBOLS
+
+// GOMP_1.0 aliases
+xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10);
+xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_BARRIER, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10);
+xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10);
+
+// GOMP_2.0 aliases
+xaliasify(KMP_API_NAME_GOMP_TASK, 20);
+xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20);
+
+// GOMP_3.0 aliases
+xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30);
+
+// GOMP_4.0 aliases
+// The GOMP_parallel* entry points below aren't OpenMP 4.0 related.
+#if OMP_40_ENABLED
+xaliasify(KMP_API_NAME_GOMP_PARALLEL, 40);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40);
+xaliasify(KMP_API_NAME_GOMP_TASKGROUP_START, 40);
+xaliasify(KMP_API_NAME_GOMP_TASKGROUP_END, 40);
+xaliasify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40);
+xaliasify(KMP_API_NAME_GOMP_CANCEL, 40);
+xaliasify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40);
+xaliasify(KMP_API_NAME_GOMP_TARGET, 40);
+xaliasify(KMP_API_NAME_GOMP_TARGET_DATA, 40);
+xaliasify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40);
+xaliasify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40);
+xaliasify(KMP_API_NAME_GOMP_TEAMS, 40);
+#endif
+
+// GOMP_1.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
+
+// GOMP_2.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
+
+// GOMP_3.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
+
+// GOMP_4.0 versioned symbols
+#if OMP_40_ENABLED
+xversionify(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0");
+xversionify(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0");
+#endif
+
+#endif // KMP_USE_VERSION_SYMBOLS
+
+#ifdef __cplusplus
+    } //extern "C"
+#endif // __cplusplus
+
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n.c b/contrib/libs/cxxsupp/openmp/kmp_i18n.c
index 3296624e17..8dad2553b0 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_i18n.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_i18n.c
@@ -1,974 +1,974 @@
-/* 
- * kmp_i18n.c 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
- 
-#include "kmp_i18n.h" 
- 
-#include "kmp_os.h" 
-#include "kmp_debug.h" 
-#include "kmp.h" 
-#include "kmp_lock.h" 
-#include "kmp_io.h"          // __kmp_printf. 
- 
-#include <stdio.h> 
-#include <errno.h> 
-#include <string.h> 
-#include <locale.h> 
-#include <stdarg.h> 
- 
-#include "kmp_i18n_default.inc" 
-#include "kmp_str.h" 
-#include "kmp_environment.h" 
- 
-#undef KMP_I18N_OK 
- 
-#define get_section( id )  ( (id) >> 16 ) 
-#define get_number( id )   ( (id) & 0xFFFF ) 
- 
-kmp_msg_t           __kmp_msg_empty = { kmp_mt_dummy, 0, "", 0  }; 
-kmp_msg_t           __kmp_msg_null  = { kmp_mt_dummy, 0, NULL, 0 }; 
-static char const * no_message_available = "(No message available)"; 
- 
-enum kmp_i18n_cat_status { 
-    KMP_I18N_CLOSED,    // Not yet opened or closed. 
-    KMP_I18N_OPENED,    // Opened successfully, ready to use. 
-    KMP_I18N_ABSENT     // Opening failed, message catalog should not be used. 
-}; // enum kmp_i18n_cat_status 
-typedef enum kmp_i18n_cat_status  kmp_i18n_cat_status_t; 
-static volatile kmp_i18n_cat_status_t  status = KMP_I18N_CLOSED; 
- 
-/* 
-    Message catalog is opened at first usage, so we have to synchronize opening to avoid race and 
-    multiple openings. 
- 
-    Closing does not require synchronization, because catalog is closed very late at library 
-    shutting down, when no other threads are alive. 
-*/ 
- 
-static void __kmp_i18n_do_catopen(); 
-static kmp_bootstrap_lock_t  lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ); 
-    // `lock' variable may be placed into __kmp_i18n_catopen function because it is used only by 
-    // that function. But we afraid a (buggy) compiler may treat it wrongly. So we put it outside of 
-    // function just in case. 
- 
-void 
-__kmp_i18n_catopen( 
-) { 
-    if ( status == KMP_I18N_CLOSED ) { 
-        __kmp_acquire_bootstrap_lock( & lock ); 
-        if ( status == KMP_I18N_CLOSED ) { 
-            __kmp_i18n_do_catopen(); 
-        }; // if 
-        __kmp_release_bootstrap_lock( & lock ); 
-    }; // if 
-} // func __kmp_i18n_catopen 
- 
- 
-/* 
-    ================================================================================================ 
-    Linux* OS and OS X* part. 
-    ================================================================================================ 
-*/ 
- 
-#if KMP_OS_UNIX 
-#define KMP_I18N_OK 
- 
-#include <nl_types.h> 
- 
-#define KMP_I18N_NULLCAT ((nl_catd)( -1 )) 
-static nl_catd       cat  = KMP_I18N_NULLCAT;    // !!! Shall it be volatile? 
-static char const *  name = ( KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat" ); 
- 
-/* 
-    Useful links: 
-        http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02 
-        http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html 
-        http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html 
-*/ 
- 
-void 
-__kmp_i18n_do_catopen( 
-) { 
-    int    english = 0; 
-    char * lang    = __kmp_env_get( "LANG" ); 
-    // TODO: What about LC_ALL or LC_MESSAGES? 
- 
-    KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); 
-    KMP_DEBUG_ASSERT( cat    == KMP_I18N_NULLCAT ); 
- 
-    english = 
-	lang == NULL                       ||  // In all these cases English language is used. 
-	strcmp( lang, "" )            == 0 || 
-        strcmp( lang, " " )           == 0 || 
-              // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime resets LANG env var 
-              // to space if it is not set". 
-	strcmp( lang, "C" )           == 0 || 
-	strcmp( lang, "POSIX" )       == 0; 
- 
-    if ( ! english ) {  // English language is not yet detected, let us continue. 
-        // Format of LANG is: [language[_territory][.codeset][@modifier]] 
-        // Strip all parts except language. 
-        char * tail = NULL; 
-        __kmp_str_split( lang, '@', & lang, & tail ); 
-        __kmp_str_split( lang, '.', & lang, & tail ); 
-        __kmp_str_split( lang, '_', & lang, & tail ); 
-        english = ( strcmp( lang, "en" ) == 0 ); 
-    }; // if 
- 
-    KMP_INTERNAL_FREE( lang ); 
- 
-    // Do not try to open English catalog because internal messages are 
-    // exact copy of messages in English catalog. 
-    if ( english ) { 
-	status = KMP_I18N_ABSENT;  // mark catalog as absent so it will not be re-opened. 
-	return; 
-    } 
- 
-    cat = catopen( name, 0 ); 
-    // TODO: Why do we pass 0 in flags? 
-    status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); 
- 
-    if ( status == KMP_I18N_ABSENT ) { 
-      if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to 
-        int    error   = errno; // Save errno immediately. 
-	char * nlspath = __kmp_env_get( "NLSPATH" ); 
-        char * lang    = __kmp_env_get( "LANG" ); 
- 
-	// Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so 
-	// __kmp_i18n_catgets() will not try to open catalog, but will return default message. 
-	__kmp_msg( 
-	    kmp_ms_warning, 
-	    KMP_MSG( CantOpenMessageCatalog, name ), 
-	    KMP_ERR( error ), 
-	    KMP_HNT( CheckEnvVar, "NLSPATH", nlspath ), 
-            KMP_HNT( CheckEnvVar, "LANG", lang ), 
-	    __kmp_msg_null 
-	); 
-	KMP_INFORM( WillUseDefaultMessages ); 
-        KMP_INTERNAL_FREE( nlspath ); 
-        KMP_INTERNAL_FREE( lang ); 
-      } 
-    } else { // status == KMP_I18N_OPENED 
- 
-        int section = get_section( kmp_i18n_prp_Version ); 
-        int number  = get_number( kmp_i18n_prp_Version ); 
-        char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; 
-            // Expected version of the catalog. 
-        kmp_str_buf_t version;   // Actual version of the catalog. 
-        __kmp_str_buf_init( & version ); 
-        __kmp_str_buf_print( & version, "%s", catgets( cat, section, number, NULL ) ); 
- 
-            // String returned by catgets is invalid after closing the catalog, so copy it. 
-        if ( strcmp( version.str, expected ) != 0 ) { 
-            __kmp_i18n_catclose();     // Close bad catalog. 
-            status = KMP_I18N_ABSENT;  // And mark it as absent. 
-            if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to 
-                // And now print a warning using default messages. 
-                char const * name    = "NLSPATH"; 
-                char const * nlspath = __kmp_env_get( name ); 
-                __kmp_msg( 
-                    kmp_ms_warning, 
-                    KMP_MSG( WrongMessageCatalog, name, version.str, expected ), 
-                    KMP_HNT( CheckEnvVar, name, nlspath ), 
-                    __kmp_msg_null 
-                ); 
-                KMP_INFORM( WillUseDefaultMessages ); 
-                KMP_INTERNAL_FREE( (void *) nlspath ); 
-            } // __kmp_generate_warnings 
-        }; // if 
-        __kmp_str_buf_free( & version ); 
- 
-    }; // if 
- 
-} // func __kmp_i18n_do_catopen 
- 
- 
-void 
-__kmp_i18n_catclose( 
-) { 
-    if ( status == KMP_I18N_OPENED ) { 
-        KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); 
-        catclose( cat ); 
-        cat = KMP_I18N_NULLCAT; 
-    }; // if 
-    status = KMP_I18N_CLOSED; 
-} // func __kmp_i18n_catclose 
- 
- 
-char const * 
-__kmp_i18n_catgets( 
-    kmp_i18n_id_t  id 
-) { 
- 
-    int section = get_section( id ); 
-    int number  = get_number( id ); 
-    char const * message = NULL; 
- 
-    if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { 
-        if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { 
-            if ( status == KMP_I18N_CLOSED ) { 
-                __kmp_i18n_catopen(); 
-            }; // if 
-            if ( status == KMP_I18N_OPENED ) { 
-                message = 
-                    catgets( 
-                        cat, 
-                        section, number, 
-                        __kmp_i18n_default_table.sect[ section ].str[ number ] 
-                    ); 
-            }; // if 
-            if ( message == NULL ) { 
-                message = __kmp_i18n_default_table.sect[ section ].str[ number ]; 
-            }; // if 
-        }; // if 
-    }; // if 
-    if ( message == NULL ) { 
-        message = no_message_available; 
-    }; // if 
-    return message; 
- 
-} // func __kmp_i18n_catgets 
- 
- 
-#endif // KMP_OS_UNIX 
- 
-/* 
-    ================================================================================================ 
-    Windows* OS part. 
-    ================================================================================================ 
-*/ 
- 
-#if KMP_OS_WINDOWS 
-#define KMP_I18N_OK 
- 
-#include "kmp_environment.h" 
-#include <windows.h> 
- 
-#define KMP_I18N_NULLCAT  NULL 
-static HMODULE       cat  = KMP_I18N_NULLCAT;    // !!! Shall it be volatile? 
-static char const *  name = ( KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll" ); 
- 
-static kmp_i18n_table_t  table             = { 0, NULL }; 
-    // Messages formatted by FormatMessage() should be freed, but catgets() interface assumes 
-    // user will not free messages. So we cache all the retrieved messages in the table, which 
-    // are freed at catclose(). 
-static UINT const        default_code_page = CP_OEMCP; 
-static UINT              code_page         = default_code_page; 
- 
-static char const * ___catgets( kmp_i18n_id_t  id ); 
-static UINT         get_code_page(); 
-static void         kmp_i18n_table_free( kmp_i18n_table_t * table ); 
- 
- 
-static UINT 
-get_code_page( 
-) { 
- 
-    UINT cp = default_code_page; 
-    char const * value = __kmp_env_get( "KMP_CODEPAGE" ); 
-    if ( value != NULL ) { 
-        if ( _stricmp( value, "ANSI" ) == 0 ) { 
-            cp = CP_ACP; 
-        } else if ( _stricmp( value, "OEM" ) == 0 ) { 
-            cp = CP_OEMCP; 
-        } else if ( _stricmp( value, "UTF-8" ) == 0 || _stricmp( value, "UTF8" ) == 0 ) { 
-            cp = CP_UTF8; 
-        } else if ( _stricmp( value, "UTF-7" ) == 0 || _stricmp( value, "UTF7" ) == 0 ) { 
-            cp = CP_UTF7; 
-        } else { 
-            // !!! TODO: Issue a warning? 
-        }; // if 
-    }; // if 
-    KMP_INTERNAL_FREE( (void *) value ); 
-    return cp; 
- 
-} // func get_code_page 
- 
- 
-static void 
-kmp_i18n_table_free( 
-    kmp_i18n_table_t * table 
-) { 
-    int s; 
-    int m; 
-    for ( s = 0; s < table->size; ++ s ) { 
-        for ( m = 0; m < table->sect[ s ].size; ++ m ) { 
-            // Free message. 
-            KMP_INTERNAL_FREE( (void *) table->sect[ s ].str[ m ] ); 
-            table->sect[ s ].str[ m ] = NULL; 
-        }; // for m 
-        table->sect[ s ].size = 0; 
-        // Free section itself. 
-        KMP_INTERNAL_FREE ( (void *) table->sect[ s ].str ); 
-        table->sect[ s ].str = NULL; 
-    }; // for s 
-    table->size = 0; 
-    KMP_INTERNAL_FREE( (void *) table->sect ); 
-    table->sect = NULL; 
-} // kmp_i8n_table_free 
- 
- 
-void 
-__kmp_i18n_do_catopen( 
-) { 
- 
-    LCID          locale_id = GetThreadLocale(); 
-    WORD 	  lang_id = LANGIDFROMLCID( locale_id ); 
-    WORD          primary_lang_id = PRIMARYLANGID( lang_id ); 
-    kmp_str_buf_t path; 
- 
-    KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); 
-    KMP_DEBUG_ASSERT( cat    == KMP_I18N_NULLCAT ); 
- 
-    __kmp_str_buf_init( & path ); 
- 
-    // Do not try to open English catalog because internal messages are 
-    // exact copy of messages in English catalog. 
-    if ( primary_lang_id == LANG_ENGLISH ) { 
-	status = KMP_I18N_ABSENT;  // mark catalog as absent so it will not be re-opened. 
-	goto end; 
-    }; // if 
- 
-    // Construct resource DLL name. 
-    /* 
-        Simple 
-            LoadLibrary( name ) 
-        is not suitable due to security issue (see 
-        http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full 
-        path to the message catalog. 
-    */ 
-    { 
- 
-        // Get handle of our DLL first. 
-        HMODULE handle; 
-        BOOL brc = 
-            GetModuleHandleEx( 
-                GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 
-                reinterpret_cast< LPCSTR >( & __kmp_i18n_do_catopen ), 
-                & handle 
-            ); 
-        if ( ! brc ) {    // Error occurred. 
-            status = KMP_I18N_ABSENT;  // mark catalog as absent so it will not be re-opened. 
-            goto end; 
-            // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and print 
-            // a proper warning. 
-        }; // if 
- 
-        // Now get path to the our DLL. 
-        for ( ; ; ) { 
-            DWORD drc = GetModuleFileName( handle, path.str, path.size ); 
-            if ( drc == 0 ) {    // Error occurred. 
-                status = KMP_I18N_ABSENT; 
-                goto end; 
-            }; // if 
-            if ( drc < path.size ) { 
-                path.used = drc; 
-                break; 
-            }; // if 
-            __kmp_str_buf_reserve( & path, path.size * 2 ); 
-        }; // forever 
- 
-        // Now construct the name of message catalog. 
-        kmp_str_fname fname; 
-        __kmp_str_fname_init( & fname, path.str ); 
-        __kmp_str_buf_clear( & path ); 
-        __kmp_str_buf_print( & path, "%s%lu/%s", fname.dir, (unsigned long)( locale_id ), name ); 
-        __kmp_str_fname_free( & fname ); 
- 
-    } 
- 
-    // For security reasons, use LoadLibraryEx() and load message catalog as a data file. 
-    cat = LoadLibraryEx( path.str, NULL, LOAD_LIBRARY_AS_DATAFILE ); 
-    status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); 
- 
-    if ( status == KMP_I18N_ABSENT ) { 
-      if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to 
-	DWORD error = GetLastError(); 
-	// Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so 
-	// __kmp_i18n_catgets() will not try to open catalog but will return default message. 
-        /* 
-         If message catalog for another architecture found (e.g. OpenMP RTL 
-	 for IA-32 architecture opens libompui.dll for Intel(R) 64) 
-	 Windows* OS returns error 193 (ERROR_BAD_EXE_FORMAT). However, 
-         FormatMessage fails to return a message for this error, so user 
-	 will see: 
- 
-         OMP: Warning #2: Cannot open message catalog "1041\libompui.dll": 
-         OMP: System error #193: (No system error message available) 
-         OMP: Info #3: Default messages will be used. 
- 
-         Issue a hint in this case to let cause of trouble more understandable. 
-        */ 
-	__kmp_msg( 
-	    kmp_ms_warning, 
-	    KMP_MSG( CantOpenMessageCatalog, path.str ), 
-	    KMP_SYSERRCODE( error ), 
-            ( error == ERROR_BAD_EXE_FORMAT ? KMP_HNT( BadExeFormat, path.str, KMP_ARCH_STR ) : __kmp_msg_null ), 
-	    __kmp_msg_null 
-	); 
-	KMP_INFORM( WillUseDefaultMessages ); 
-      } 
-    } else { // status == KMP_I18N_OPENED 
- 
-        int section = get_section( kmp_i18n_prp_Version ); 
-        int number  = get_number( kmp_i18n_prp_Version ); 
-        char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; 
-        kmp_str_buf_t version;   // Actual version of the catalog. 
-        __kmp_str_buf_init( & version ); 
-        __kmp_str_buf_print( & version, "%s", ___catgets( kmp_i18n_prp_Version ) ); 
-            // String returned by catgets is invalid after closing the catalog, so copy it. 
-        if ( strcmp( version.str, expected ) != 0 ) { 
-            // Close bad catalog. 
-            __kmp_i18n_catclose(); 
-            status = KMP_I18N_ABSENT;  // And mark it as absent. 
-            if (__kmp_generate_warnings > kmp_warnings_low) { 
-                // And now print a warning using default messages. 
-                __kmp_msg( 
-                    kmp_ms_warning, 
-                    KMP_MSG( WrongMessageCatalog, path.str, version.str, expected ), 
-                    __kmp_msg_null 
-                ); 
-                KMP_INFORM( WillUseDefaultMessages ); 
-            } // __kmp_generate_warnings 
-        }; // if 
-        __kmp_str_buf_free( & version ); 
- 
-    }; // if 
-    code_page = get_code_page(); 
- 
-    end: 
-        __kmp_str_buf_free( & path ); 
-        return; 
- 
-} // func __kmp_i18n_do_catopen 
- 
- 
-void 
-__kmp_i18n_catclose( 
-) { 
-    if ( status == KMP_I18N_OPENED ) { 
-        KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); 
-        kmp_i18n_table_free( & table ); 
-        FreeLibrary( cat ); 
-        cat = KMP_I18N_NULLCAT; 
-    }; // if 
-    code_page = default_code_page; 
-    status = KMP_I18N_CLOSED; 
-} // func __kmp_i18n_catclose 
- 
-/* 
-    We use FormatMessage() to get strings from catalog, get system error messages, etc. 
-    FormatMessage() tends to return Windows* OS-style end-of-lines, "\r\n". When string is printed, 
-    printf() also replaces all the occurrences of "\n" with "\r\n" (again!), so sequences like 
-    "\r\r\r\n" appear in output. It is not too good. 
- 
-    Additional mess comes from message catalog: Our catalog source en_US.mc file (generated by 
-    message-converter.pl) contains only "\n" characters, but en_US_msg_1033.bin file (produced by 
-    mc.exe) may contain "\r\n" or just "\n". This mess goes from en_US_msg_1033.bin file to 
-    message catalog, libompui.dll. For example, message 
- 
-        Error 
- 
-    (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while 
- 
-        OMP: Error %1!d!: %2!s!\n 
- 
-    (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: %2!s!\r\n\n". 
- 
-    Thus, stripping all "\r" normalizes string and returns it to canonical form, so printf() will 
-    produce correct end-of-line sequences. 
- 
-    ___strip_crs() serves for this purpose: it removes all the occurrences of "\r" in-place and 
-    returns new length of string. 
-*/ 
-static 
-int 
-___strip_crs( 
-    char * str 
-) { 
-    int in  = 0;  // Input character index. 
-    int out = 0;  // Output character index. 
-    for ( ; ; ) { 
-        if ( str[ in ] != '\r' ) { 
-            str[ out ] = str[ in ]; 
-            ++ out; 
-        }; // if 
-        if ( str[ in ] == 0 ) { 
-            break; 
-        }; // if 
-        ++ in; 
-    }; // forever 
-    return out - 1; 
-} // func __strip_crs 
- 
- 
-static 
-char const * 
-___catgets( 
-    kmp_i18n_id_t  id 
-) { 
- 
-    char *    result = NULL; 
-    PVOID     addr   = NULL; 
-    wchar_t * wmsg   = NULL; 
-    DWORD     wlen   = 0; 
-    char *    msg    = NULL; 
-    int       len    = 0; 
-    int       rc; 
- 
-    KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); 
-    wlen =    // wlen does *not* include terminating null. 
-        FormatMessageW( 
-            FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | 
-                FORMAT_MESSAGE_IGNORE_INSERTS, 
-            cat, 
-            id, 
-            0,             // LangId 
-            (LPWSTR) & addr, 
-            0,             // Size in elements, not in bytes. 
-            NULL 
-        ); 
-    if ( wlen <= 0 ) { 
-        goto end; 
-    }; // if 
-    wmsg = (wchar_t *) addr;  // Warning: wmsg may be not nul-terminated! 
- 
-    // Calculate length of multibyte message. 
-    len =     // Since wlen does not include terminating null, len does not include it also. 
-        WideCharToMultiByte( 
-            code_page, 
-            0,                // Flags. 
-            wmsg, wlen,       // Wide buffer and size. 
-            NULL, 0,          // Buffer and size. 
-            NULL, NULL        // Default char and used default char. 
-        ); 
-    if ( len <= 0 ) { 
-        goto end; 
-    }; // if 
- 
-    // Allocate memory. 
-    msg = (char *) KMP_INTERNAL_MALLOC( len + 1 ); 
- 
-    // Convert wide message to multibyte one. 
-    rc = 
-        WideCharToMultiByte( 
-            code_page, 
-            0,                // Flags. 
-            wmsg, wlen,       // Wide buffer and size. 
-            msg, len,         // Buffer and size. 
-            NULL, NULL        // Default char and used default char. 
-        ); 
-    if ( rc <= 0 || rc > len ) { 
-        goto end; 
-    }; // if 
-    KMP_DEBUG_ASSERT( rc == len ); 
-    len = rc; 
-    msg[ len ] = 0;           // Put terminating null to the end. 
- 
-    // Stripping all "\r" before stripping last end-of-line simplifies the task. 
-    len = ___strip_crs( msg ); 
- 
-    // Every message in catalog is terminated with "\n". Strip it. 
-    if ( len >= 1 && msg[ len - 1 ] == '\n' ) { 
-        -- len; 
-        msg[ len ] = 0; 
-    }; // if 
- 
-    // Everything looks ok. 
-    result = msg; 
-    msg    = NULL; 
- 
-    end: 
- 
-    if ( msg != NULL ) { 
-        KMP_INTERNAL_FREE( msg ); 
-    }; // if 
-    if ( wmsg != NULL ) { 
-        LocalFree( wmsg ); 
-    }; // if 
- 
-    return result; 
- 
-} // ___catgets 
- 
- 
-char const * 
-__kmp_i18n_catgets( 
-    kmp_i18n_id_t  id 
-) { 
- 
-    int section = get_section( id ); 
-    int number  = get_number( id ); 
-    char const * message = NULL; 
- 
-    if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { 
-        if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { 
-            if ( status == KMP_I18N_CLOSED ) { 
-                __kmp_i18n_catopen(); 
-            }; // if 
-            if ( cat != KMP_I18N_NULLCAT ) { 
-                if ( table.size == 0 ) { 
-                    table.sect = (kmp_i18n_section_t *) 
-                        KMP_INTERNAL_CALLOC( 
-                            ( __kmp_i18n_default_table.size + 2 ), 
-                            sizeof( kmp_i18n_section_t ) 
-                        ); 
-                    table.size = __kmp_i18n_default_table.size; 
-                }; // if 
-                if ( table.sect[ section ].size == 0 ) { 
-                    table.sect[ section ].str = (const char **) 
-                        KMP_INTERNAL_CALLOC( 
-                            __kmp_i18n_default_table.sect[ section ].size + 2, 
-                            sizeof( char const * ) 
-                        ); 
-                    table.sect[ section ].size = __kmp_i18n_default_table.sect[ section ].size; 
-                }; // if 
-                if ( table.sect[ section ].str[ number ] == NULL ) { 
-                    table.sect[ section ].str[ number ] = ___catgets( id ); 
-                }; // if 
-                message = table.sect[ section ].str[ number ]; 
-            }; // if 
-            if ( message == NULL ) { 
-                // Catalog is not opened or message is not found, return default message. 
-                message = __kmp_i18n_default_table.sect[ section ].str[ number ]; 
-            }; // if 
-        }; // if 
-    }; // if 
-    if ( message == NULL ) { 
-        message = no_message_available; 
-    }; // if 
-    return message; 
- 
-} // func __kmp_i18n_catgets 
- 
- 
-#endif // KMP_OS_WINDOWS 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-#ifndef KMP_I18N_OK 
-    #error I18n support is not implemented for this OS. 
-#endif // KMP_I18N_OK 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-void 
-__kmp_i18n_dump_catalog( 
-    kmp_str_buf_t * buffer 
-) { 
- 
-    struct kmp_i18n_id_range_t { 
-        kmp_i18n_id_t  first; 
-        kmp_i18n_id_t  last; 
-    }; // struct kmp_i18n_id_range_t 
- 
-    static struct kmp_i18n_id_range_t ranges[] = { 
-        { kmp_i18n_prp_first, kmp_i18n_prp_last }, 
-        { kmp_i18n_str_first, kmp_i18n_str_last }, 
-        { kmp_i18n_fmt_first, kmp_i18n_fmt_last }, 
-        { kmp_i18n_msg_first, kmp_i18n_msg_last }, 
-        { kmp_i18n_hnt_first, kmp_i18n_hnt_last } 
-    }; // ranges 
- 
-    int           num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t ); 
-    int           range; 
-    kmp_i18n_id_t id; 
- 
-    for ( range = 0; range < num_of_ranges; ++ range ) { 
-        __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 ); 
-        for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 ); 
-              id < ranges[ range ].last; 
-              id = (kmp_i18n_id_t)( id + 1 ) ) { 
-             __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) ); 
-        }; // for id 
-    }; // for range 
- 
-    __kmp_printf( "%s", buffer->str ); 
- 
-} // __kmp_i18n_dump_catalog 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-kmp_msg_t 
-__kmp_msg_format( 
-    kmp_i18n_id_t id, 
-    ... 
-) { 
- 
-    kmp_msg_t      msg; 
-    va_list        args; 
-    kmp_str_buf_t  buffer; 
-    __kmp_str_buf_init( & buffer ); 
- 
-    va_start( args, id ); 
-    #if KMP_OS_UNIX 
-        // On Linux* OS and OS X*, printf() family functions process parameter numbers, for example: 
-        // "%2$s %1$s". 
-        __kmp_str_buf_vprint( & buffer, __kmp_i18n_catgets( id ), args ); 
-    #elif KMP_OS_WINDOWS 
-        // On Winodws, printf() family functions does not recognize GNU style parameter numbers, 
-        // so we have to use FormatMessage() instead. It recognizes parameter numbers, e. g.: 
-        // "%2!s! "%1!s!". 
-        { 
-            LPTSTR str = NULL; 
-            int    len; 
-            FormatMessage( 
-                FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, 
-                __kmp_i18n_catgets( id ), 
-                0, 0, 
-                (LPTSTR)( & str ), 
-                0, 
-                & args 
-            ); 
-            len = ___strip_crs( str ); 
-            __kmp_str_buf_cat( & buffer, str, len ); 
-            LocalFree( str ); 
-        } 
-    #else 
-        #error 
-    #endif 
-    va_end( args ); 
-    __kmp_str_buf_detach( & buffer ); 
- 
-    msg.type = (kmp_msg_type_t)( id >> 16 ); 
-    msg.num  = id & 0xFFFF; 
-    msg.str  = buffer.str; 
-    msg.len  = buffer.used; 
- 
-    return msg; 
- 
-} // __kmp_msg_format 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-static 
-char * 
-sys_error( 
-    int err 
-) { 
- 
-    char * message = NULL; 
- 
-    #if KMP_OS_WINDOWS 
- 
-        LPVOID  buffer = NULL; 
-        int     len; 
-        DWORD   rc; 
-        rc = 
-            FormatMessage( 
-                FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, 
-                NULL, 
-                err, 
-                MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ), // Default language. 
-                (LPTSTR) & buffer, 
-                0, 
-                NULL 
-            ); 
-        if ( rc > 0 ) { 
-            // Message formatted. Copy it (so we can free it later with normal free(). 
-            message = __kmp_str_format( "%s", (char *) buffer ); 
-            len = ___strip_crs( message ); // Delete carriage returns if any. 
-            // Strip trailing newlines. 
-            while ( len > 0 && message[ len - 1 ] == '\n' ) { 
-                -- len; 
-            }; // while 
-            message[ len ] = 0; 
-        } else { 
-            // FormatMessage() failed to format system error message. GetLastError() would give us 
-            // error code, which we would convert to message... this it dangerous recursion, which 
-            // cannot clarify original error, so we will not even start it. 
-        }; // if 
-        if ( buffer != NULL ) { 
-            LocalFree( buffer ); 
-        }; // if 
- 
-    #else // Non-Windows* OS: Linux* OS or OS X* 
- 
-        /* 
-            There are 2 incompatible versions of strerror_r: 
- 
-                char * strerror_r( int, char *, size_t );  // GNU version 
-                int    strerror_r( int, char *, size_t );  // XSI version 
-        */ 
- 
-        #if KMP_OS_LINUX 
- 
-            // GNU version of strerror_r. 
- 
-            char   buffer[ 2048 ]; 
-            char * const err_msg = strerror_r( err, buffer, sizeof( buffer ) ); 
-                // Do not eliminate this assignment to temporary variable, otherwise compiler would 
-                // not issue warning if strerror_r() returns `int' instead of expected `char *'. 
-            message = __kmp_str_format( "%s", err_msg ); 
- 
-        #else // OS X*, FreeBSD* etc. 
- 
-            // XSI version of strerror_r. 
- 
-            int    size   = 2048; 
-            // TODO: Add checking result of malloc(). 
-            char * buffer = (char *) KMP_INTERNAL_MALLOC( size ); 
-            int    rc; 
-            rc = strerror_r( err, buffer, size ); 
-            if ( rc == -1 ) { 
-                rc = errno;            // XSI version sets errno. 
-            }; // if 
-            while ( rc == ERANGE ) {   // ERANGE means the buffer is too small. 
-                KMP_INTERNAL_FREE( buffer ); 
-                size *= 2; 
-                buffer = (char *) KMP_INTERNAL_MALLOC( size ); 
-                rc = strerror_r( err, buffer, size ); 
-                if ( rc == -1 ) { 
-                    rc = errno;        // XSI version sets errno. 
-                }; // if 
-            }; // while 
-            if ( rc == 0 ) { 
-                message = buffer; 
-            } else { 
-                // Buffer is unused. Free it. 
-                KMP_INTERNAL_FREE( buffer ); 
-            }; // if 
- 
-        #endif 
- 
-    #endif /* KMP_OS_WINDOWS */ 
- 
-    if ( message == NULL ) { 
-        // TODO: I18n this message. 
-        message = __kmp_str_format( "%s", "(No system error message available)" ); 
-    }; // if 
-    return message; 
- 
-} // sys_error 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-kmp_msg_t 
-__kmp_msg_error_code( 
-    int  code 
-) { 
- 
-    kmp_msg_t      msg; 
-    msg.type = kmp_mt_syserr; 
-    msg.num  = code; 
-    msg.str  = sys_error( code ); 
-    msg.len  = KMP_STRLEN( msg.str ); 
-    return msg; 
- 
-} // __kmp_msg_error_code 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-kmp_msg_t 
-__kmp_msg_error_mesg( 
-    char const * mesg 
-) { 
- 
-    kmp_msg_t      msg; 
-    msg.type = kmp_mt_syserr; 
-    msg.num  = 0; 
-    msg.str  = __kmp_str_format( "%s", mesg ); 
-    msg.len  = KMP_STRLEN( msg.str ); 
-    return msg; 
- 
-} // __kmp_msg_error_mesg 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-void 
-__kmp_msg( 
-    kmp_msg_severity_t  severity, 
-    kmp_msg_t           message, 
-    ... 
-) { 
- 
-    va_list        args; 
-    kmp_i18n_id_t  format;      // format identifier 
-    kmp_msg_t      fmsg;        // formatted message 
-    kmp_str_buf_t  buffer; 
- 
-    if ( severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off ) 
-        return; // no reason to form a string in order to not print it 
- 
-    __kmp_str_buf_init( & buffer ); 
- 
-    // Format the primary message. 
-    switch ( severity ) { 
-        case kmp_ms_inform : { 
-            format = kmp_i18n_fmt_Info; 
-        } break; 
-        case kmp_ms_warning : { 
-            format = kmp_i18n_fmt_Warning; 
-        } break; 
-        case kmp_ms_fatal : { 
-            format = kmp_i18n_fmt_Fatal; 
-        } break; 
-        default : { 
-            KMP_DEBUG_ASSERT( 0 ); 
-        }; 
-    }; // switch 
-    fmsg = __kmp_msg_format( format, message.num, message.str ); 
-    KMP_INTERNAL_FREE( (void *) message.str ); 
-    __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); 
-    KMP_INTERNAL_FREE( (void *) fmsg.str ); 
- 
-    // Format other messages. 
-    va_start( args, message ); 
-    for ( ; ; ) { 
-        message = va_arg( args, kmp_msg_t ); 
-        if ( message.type == kmp_mt_dummy && message.str == NULL ) { 
-            break; 
-        }; // if 
-        if ( message.type == kmp_mt_dummy && message.str == __kmp_msg_empty.str ) { 
-            continue; 
-        }; // if 
-        switch ( message.type ) { 
-            case kmp_mt_hint : { 
-                format = kmp_i18n_fmt_Hint; 
-            } break; 
-            case kmp_mt_syserr : { 
-                format = kmp_i18n_fmt_SysErr; 
-            } break; 
-            default : { 
-                KMP_DEBUG_ASSERT( 0 ); 
-            }; 
-        }; // switch 
-        fmsg = __kmp_msg_format( format, message.num, message.str ); 
-        KMP_INTERNAL_FREE( (void *) message.str ); 
-        __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); 
-        KMP_INTERNAL_FREE( (void *) fmsg.str ); 
-    }; // forever 
-    va_end( args ); 
- 
-    // Print formatted messages. 
-    // This lock prevents multiple fatal errors on the same problem. 
-    // __kmp_acquire_bootstrap_lock( & lock );    // GEH - This lock causing tests to hang on OS X*. 
-    __kmp_printf( "%s", buffer.str ); 
-    __kmp_str_buf_free( & buffer ); 
- 
-    if ( severity == kmp_ms_fatal ) { 
-        #if KMP_OS_WINDOWS 
-        __kmp_thread_sleep( 500 );   /* Delay to give message a chance to appear before reaping */ 
-        #endif 
-        __kmp_abort_process(); 
-    }; // if 
- 
-    // __kmp_release_bootstrap_lock( & lock );  // GEH - this lock causing tests to hang on OS X*. 
- 
-} // __kmp_msg 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-// end of file // 
+/*
+ * kmp_i18n.c
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "kmp_i18n.h"
+
+#include "kmp_os.h"
+#include "kmp_debug.h"
+#include "kmp.h"
+#include "kmp_lock.h"
+#include "kmp_io.h"          // __kmp_printf.
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <locale.h>
+#include <stdarg.h>
+
+#include "kmp_i18n_default.inc"
+#include "kmp_str.h"
+#include "kmp_environment.h"
+
+#undef KMP_I18N_OK
+
+#define get_section( id )  ( (id) >> 16 )
+#define get_number( id )   ( (id) & 0xFFFF )
+
+kmp_msg_t           __kmp_msg_empty = { kmp_mt_dummy, 0, "", 0  };
+kmp_msg_t           __kmp_msg_null  = { kmp_mt_dummy, 0, NULL, 0 };
+static char const * no_message_available = "(No message available)";
+
+enum kmp_i18n_cat_status {
+    KMP_I18N_CLOSED,    // Not yet opened or closed.
+    KMP_I18N_OPENED,    // Opened successfully, ready to use.
+    KMP_I18N_ABSENT     // Opening failed, message catalog should not be used.
+}; // enum kmp_i18n_cat_status
+typedef enum kmp_i18n_cat_status  kmp_i18n_cat_status_t;
+static volatile kmp_i18n_cat_status_t  status = KMP_I18N_CLOSED;
+
+/*
+    Message catalog is opened at first usage, so we have to synchronize opening to avoid race and
+    multiple openings.
+
+    Closing does not require synchronization, because catalog is closed very late at library
+    shutting down, when no other threads are alive.
+*/
+
+static void __kmp_i18n_do_catopen();
+static kmp_bootstrap_lock_t  lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( lock );
+    // `lock' variable may be placed into __kmp_i18n_catopen function because it is used only by
+    // that function. But we afraid a (buggy) compiler may treat it wrongly. So we put it outside of
+    // function just in case.
+
+void
+__kmp_i18n_catopen(
+) {
+    if ( status == KMP_I18N_CLOSED ) {
+        __kmp_acquire_bootstrap_lock( & lock );
+        if ( status == KMP_I18N_CLOSED ) {
+            __kmp_i18n_do_catopen();
+        }; // if
+        __kmp_release_bootstrap_lock( & lock );
+    }; // if
+} // func __kmp_i18n_catopen
+
+
+/*
+    ================================================================================================
+    Linux* OS and OS X* part.
+    ================================================================================================
+*/
+
+#if KMP_OS_UNIX
+#define KMP_I18N_OK
+
+#include <nl_types.h>
+
+#define KMP_I18N_NULLCAT ((nl_catd)( -1 ))
+static nl_catd       cat  = KMP_I18N_NULLCAT;    // !!! Shall it be volatile?
+static char const *  name = ( KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat" );
+
+/*
+    Useful links:
+        http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02
+        http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html
+        http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html
+*/
+
+void
+__kmp_i18n_do_catopen(
+) {
+    int    english = 0;
+    char * lang    = __kmp_env_get( "LANG" );
+    // TODO: What about LC_ALL or LC_MESSAGES?
+
+    KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED );
+    KMP_DEBUG_ASSERT( cat    == KMP_I18N_NULLCAT );
+
+    english =
+	lang == NULL                       ||  // In all these cases English language is used.
+	strcmp( lang, "" )            == 0 ||
+        strcmp( lang, " " )           == 0 ||
+              // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime resets LANG env var
+              // to space if it is not set".
+	strcmp( lang, "C" )           == 0 ||
+	strcmp( lang, "POSIX" )       == 0;
+
+    if ( ! english ) {  // English language is not yet detected, let us continue.
+        // Format of LANG is: [language[_territory][.codeset][@modifier]]
+        // Strip all parts except language.
+        char * tail = NULL;
+        __kmp_str_split( lang, '@', & lang, & tail );
+        __kmp_str_split( lang, '.', & lang, & tail );
+        __kmp_str_split( lang, '_', & lang, & tail );
+        english = ( strcmp( lang, "en" ) == 0 );
+    }; // if
+
+    KMP_INTERNAL_FREE( lang );
+
+    // Do not try to open English catalog because internal messages are
+    // exact copy of messages in English catalog.
+    if ( english ) {
+	status = KMP_I18N_ABSENT;  // mark catalog as absent so it will not be re-opened.
+	return;
+    }
+
+    cat = catopen( name, 0 );
+    // TODO: Why do we pass 0 in flags?
+    status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED );
+
+    if ( status == KMP_I18N_ABSENT ) {
+      if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to
+        int    error   = errno; // Save errno immediately.
+	char * nlspath = __kmp_env_get( "NLSPATH" );
+        char * lang    = __kmp_env_get( "LANG" );
+
+	// Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so
+	// __kmp_i18n_catgets() will not try to open catalog, but will return default message.
+	__kmp_msg(
+	    kmp_ms_warning,
+	    KMP_MSG( CantOpenMessageCatalog, name ),
+	    KMP_ERR( error ),
+	    KMP_HNT( CheckEnvVar, "NLSPATH", nlspath ),
+            KMP_HNT( CheckEnvVar, "LANG", lang ),
+	    __kmp_msg_null
+	);
+	KMP_INFORM( WillUseDefaultMessages );
+        KMP_INTERNAL_FREE( nlspath );
+        KMP_INTERNAL_FREE( lang );
+      }
+    } else { // status == KMP_I18N_OPENED
+
+        int section = get_section( kmp_i18n_prp_Version );
+        int number  = get_number( kmp_i18n_prp_Version );
+        char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ];
+            // Expected version of the catalog.
+        kmp_str_buf_t version;   // Actual version of the catalog.
+        __kmp_str_buf_init( & version );
+        __kmp_str_buf_print( & version, "%s", catgets( cat, section, number, NULL ) );
+
+            // String returned by catgets is invalid after closing the catalog, so copy it.
+        if ( strcmp( version.str, expected ) != 0 ) {
+            __kmp_i18n_catclose();     // Close bad catalog.
+            status = KMP_I18N_ABSENT;  // And mark it as absent.
+            if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to
+                // And now print a warning using default messages.
+                char const * name    = "NLSPATH";
+                char const * nlspath = __kmp_env_get( name );
+                __kmp_msg(
+                    kmp_ms_warning,
+                    KMP_MSG( WrongMessageCatalog, name, version.str, expected ),
+                    KMP_HNT( CheckEnvVar, name, nlspath ),
+                    __kmp_msg_null
+                );
+                KMP_INFORM( WillUseDefaultMessages );
+                KMP_INTERNAL_FREE( (void *) nlspath );
+            } // __kmp_generate_warnings
+        }; // if
+        __kmp_str_buf_free( & version );
+
+    }; // if
+
+} // func __kmp_i18n_do_catopen
+
+
+void
+__kmp_i18n_catclose(
+) {
+    if ( status == KMP_I18N_OPENED ) {
+        KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT );
+        catclose( cat );
+        cat = KMP_I18N_NULLCAT;
+    }; // if
+    status = KMP_I18N_CLOSED;
+} // func __kmp_i18n_catclose
+
+
+char const *
+__kmp_i18n_catgets(
+    kmp_i18n_id_t  id
+) {
+
+    int section = get_section( id );
+    int number  = get_number( id );
+    char const * message = NULL;
+
+    if ( 1 <= section && section <= __kmp_i18n_default_table.size ) {
+        if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) {
+            if ( status == KMP_I18N_CLOSED ) {
+                __kmp_i18n_catopen();
+            }; // if
+            if ( status == KMP_I18N_OPENED ) {
+                message =
+                    catgets(
+                        cat,
+                        section, number,
+                        __kmp_i18n_default_table.sect[ section ].str[ number ]
+                    );
+            }; // if
+            if ( message == NULL ) {
+                message = __kmp_i18n_default_table.sect[ section ].str[ number ];
+            }; // if
+        }; // if
+    }; // if
+    if ( message == NULL ) {
+        message = no_message_available;
+    }; // if
+    return message;
+
+} // func __kmp_i18n_catgets
+
+
+#endif // KMP_OS_UNIX
+
+/*
+    ================================================================================================
+    Windows* OS part.
+    ================================================================================================
+*/
+
+#if KMP_OS_WINDOWS
+#define KMP_I18N_OK
+
+#include "kmp_environment.h"
+#include <windows.h>
+
+#define KMP_I18N_NULLCAT  NULL
+static HMODULE       cat  = KMP_I18N_NULLCAT;    // !!! Shall it be volatile?
+static char const *  name = ( KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll" );
+
+static kmp_i18n_table_t  table             = { 0, NULL };
+    // Messages formatted by FormatMessage() should be freed, but catgets() interface assumes
+    // user will not free messages. So we cache all the retrieved messages in the table, which
+    // are freed at catclose().
+static UINT const        default_code_page = CP_OEMCP;
+static UINT              code_page         = default_code_page;
+
+static char const * ___catgets( kmp_i18n_id_t  id );
+static UINT         get_code_page();
+static void         kmp_i18n_table_free( kmp_i18n_table_t * table );
+
+
+static UINT
+get_code_page(
+) {
+
+    UINT cp = default_code_page;
+    char const * value = __kmp_env_get( "KMP_CODEPAGE" );
+    if ( value != NULL ) {
+        if ( _stricmp( value, "ANSI" ) == 0 ) {
+            cp = CP_ACP;
+        } else if ( _stricmp( value, "OEM" ) == 0 ) {
+            cp = CP_OEMCP;
+        } else if ( _stricmp( value, "UTF-8" ) == 0 || _stricmp( value, "UTF8" ) == 0 ) {
+            cp = CP_UTF8;
+        } else if ( _stricmp( value, "UTF-7" ) == 0 || _stricmp( value, "UTF7" ) == 0 ) {
+            cp = CP_UTF7;
+        } else {
+            // !!! TODO: Issue a warning?
+        }; // if
+    }; // if
+    KMP_INTERNAL_FREE( (void *) value );
+    return cp;
+
+} // func get_code_page
+
+
+static void
+kmp_i18n_table_free(
+    kmp_i18n_table_t * table
+) {
+    int s;
+    int m;
+    for ( s = 0; s < table->size; ++ s ) {
+        for ( m = 0; m < table->sect[ s ].size; ++ m ) {
+            // Free message.
+            KMP_INTERNAL_FREE( (void *) table->sect[ s ].str[ m ] );
+            table->sect[ s ].str[ m ] = NULL;
+        }; // for m
+        table->sect[ s ].size = 0;
+        // Free section itself.
+        KMP_INTERNAL_FREE ( (void *) table->sect[ s ].str );
+        table->sect[ s ].str = NULL;
+    }; // for s
+    table->size = 0;
+    KMP_INTERNAL_FREE( (void *) table->sect );
+    table->sect = NULL;
+} // kmp_i8n_table_free
+
+
+void
+__kmp_i18n_do_catopen(
+) {
+
+    LCID          locale_id = GetThreadLocale();
+    WORD 	  lang_id = LANGIDFROMLCID( locale_id );
+    WORD          primary_lang_id = PRIMARYLANGID( lang_id );
+    kmp_str_buf_t path;
+
+    KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED );
+    KMP_DEBUG_ASSERT( cat    == KMP_I18N_NULLCAT );
+
+    __kmp_str_buf_init( & path );
+
+    // Do not try to open English catalog because internal messages are
+    // exact copy of messages in English catalog.
+    if ( primary_lang_id == LANG_ENGLISH ) {
+	status = KMP_I18N_ABSENT;  // mark catalog as absent so it will not be re-opened.
+	goto end;
+    }; // if
+
+    // Construct resource DLL name.
+    /*
+        Simple
+            LoadLibrary( name )
+        is not suitable due to security issue (see
+        http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full
+        path to the message catalog.
+    */
+    {
+
+        // Get handle of our DLL first.
+        HMODULE handle;
+        BOOL brc =
+            GetModuleHandleEx(
+                GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                reinterpret_cast< LPCSTR >( & __kmp_i18n_do_catopen ),
+                & handle
+            );
+        if ( ! brc ) {    // Error occurred.
+            status = KMP_I18N_ABSENT;  // mark catalog as absent so it will not be re-opened.
+            goto end;
+            // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and print
+            // a proper warning.
+        }; // if
+
+        // Now get path to the our DLL.
+        for ( ; ; ) {
+            DWORD drc = GetModuleFileName( handle, path.str, path.size );
+            if ( drc == 0 ) {    // Error occurred.
+                status = KMP_I18N_ABSENT;
+                goto end;
+            }; // if
+            if ( drc < path.size ) {
+                path.used = drc;
+                break;
+            }; // if
+            __kmp_str_buf_reserve( & path, path.size * 2 );
+        }; // forever
+
+        // Now construct the name of message catalog.
+        kmp_str_fname fname;
+        __kmp_str_fname_init( & fname, path.str );
+        __kmp_str_buf_clear( & path );
+        __kmp_str_buf_print( & path, "%s%lu/%s", fname.dir, (unsigned long)( locale_id ), name );
+        __kmp_str_fname_free( & fname );
+
+    }
+
+    // For security reasons, use LoadLibraryEx() and load message catalog as a data file.
+    cat = LoadLibraryEx( path.str, NULL, LOAD_LIBRARY_AS_DATAFILE );
+    status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED );
+
+    if ( status == KMP_I18N_ABSENT ) {
+      if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to
+	DWORD error = GetLastError();
+	// Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so
+	// __kmp_i18n_catgets() will not try to open catalog but will return default message.
+        /*
+         If message catalog for another architecture found (e.g. OpenMP RTL
+	 for IA-32 architecture opens libompui.dll for Intel(R) 64)
+	 Windows* OS returns error 193 (ERROR_BAD_EXE_FORMAT). However,
+         FormatMessage fails to return a message for this error, so user
+	 will see:
+
+         OMP: Warning #2: Cannot open message catalog "1041\libompui.dll":
+         OMP: System error #193: (No system error message available)
+         OMP: Info #3: Default messages will be used.
+
+         Issue a hint in this case to let cause of trouble more understandable.
+        */
+	__kmp_msg(
+	    kmp_ms_warning,
+	    KMP_MSG( CantOpenMessageCatalog, path.str ),
+	    KMP_SYSERRCODE( error ),
+            ( error == ERROR_BAD_EXE_FORMAT ? KMP_HNT( BadExeFormat, path.str, KMP_ARCH_STR ) : __kmp_msg_null ),
+	    __kmp_msg_null
+	);
+	KMP_INFORM( WillUseDefaultMessages );
+      }
+    } else { // status == KMP_I18N_OPENED
+
+        int section = get_section( kmp_i18n_prp_Version );
+        int number  = get_number( kmp_i18n_prp_Version );
+        char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ];
+        kmp_str_buf_t version;   // Actual version of the catalog.
+        __kmp_str_buf_init( & version );
+        __kmp_str_buf_print( & version, "%s", ___catgets( kmp_i18n_prp_Version ) );
+            // String returned by catgets is invalid after closing the catalog, so copy it.
+        if ( strcmp( version.str, expected ) != 0 ) {
+            // Close bad catalog.
+            __kmp_i18n_catclose();
+            status = KMP_I18N_ABSENT;  // And mark it as absent.
+            if (__kmp_generate_warnings > kmp_warnings_low) {
+                // And now print a warning using default messages.
+                __kmp_msg(
+                    kmp_ms_warning,
+                    KMP_MSG( WrongMessageCatalog, path.str, version.str, expected ),
+                    __kmp_msg_null
+                );
+                KMP_INFORM( WillUseDefaultMessages );
+            } // __kmp_generate_warnings
+        }; // if
+        __kmp_str_buf_free( & version );
+
+    }; // if
+    code_page = get_code_page();
+
+    end:
+        __kmp_str_buf_free( & path );
+        return;
+
+} // func __kmp_i18n_do_catopen
+
+
+void
+__kmp_i18n_catclose(
+) {
+    if ( status == KMP_I18N_OPENED ) {
+        KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT );
+        kmp_i18n_table_free( & table );
+        FreeLibrary( cat );
+        cat = KMP_I18N_NULLCAT;
+    }; // if
+    code_page = default_code_page;
+    status = KMP_I18N_CLOSED;
+} // func __kmp_i18n_catclose
+
+/*
+    We use FormatMessage() to get strings from catalog, get system error messages, etc.
+    FormatMessage() tends to return Windows* OS-style end-of-lines, "\r\n". When string is printed,
+    printf() also replaces all the occurrences of "\n" with "\r\n" (again!), so sequences like
+    "\r\r\r\n" appear in output. It is not too good.
+
+    Additional mess comes from message catalog: Our catalog source en_US.mc file (generated by
+    message-converter.pl) contains only "\n" characters, but en_US_msg_1033.bin file (produced by
+    mc.exe) may contain "\r\n" or just "\n". This mess goes from en_US_msg_1033.bin file to
+    message catalog, libompui.dll. For example, message
+
+        Error
+
+    (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while
+
+        OMP: Error %1!d!: %2!s!\n
+
+    (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: %2!s!\r\n\n".
+
+    Thus, stripping all "\r" normalizes string and returns it to canonical form, so printf() will
+    produce correct end-of-line sequences.
+
+    ___strip_crs() serves for this purpose: it removes all the occurrences of "\r" in-place and
+    returns new length of string.
+*/
+static
+int
+___strip_crs(
+    char * str
+) {
+    int in  = 0;  // Input character index.
+    int out = 0;  // Output character index.
+    for ( ; ; ) {
+        if ( str[ in ] != '\r' ) {
+            str[ out ] = str[ in ];
+            ++ out;
+        }; // if
+        if ( str[ in ] == 0 ) {
+            break;
+        }; // if
+        ++ in;
+    }; // forever
+    return out - 1;
+} // func __strip_crs
+
+
+static
+char const *
+___catgets(
+    kmp_i18n_id_t  id
+) {
+
+    char *    result = NULL;
+    PVOID     addr   = NULL;
+    wchar_t * wmsg   = NULL;
+    DWORD     wlen   = 0;
+    char *    msg    = NULL;
+    int       len    = 0;
+    int       rc;
+
+    KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT );
+    wlen =    // wlen does *not* include terminating null.
+        FormatMessageW(
+            FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE |
+                FORMAT_MESSAGE_IGNORE_INSERTS,
+            cat,
+            id,
+            0,             // LangId
+            (LPWSTR) & addr,
+            0,             // Size in elements, not in bytes.
+            NULL
+        );
+    if ( wlen <= 0 ) {
+        goto end;
+    }; // if
+    wmsg = (wchar_t *) addr;  // Warning: wmsg may be not nul-terminated!
+
+    // Calculate length of multibyte message.
+    len =     // Since wlen does not include terminating null, len does not include it also.
+        WideCharToMultiByte(
+            code_page,
+            0,                // Flags.
+            wmsg, wlen,       // Wide buffer and size.
+            NULL, 0,          // Buffer and size.
+            NULL, NULL        // Default char and used default char.
+        );
+    if ( len <= 0 ) {
+        goto end;
+    }; // if
+
+    // Allocate memory.
+    msg = (char *) KMP_INTERNAL_MALLOC( len + 1 );
+
+    // Convert wide message to multibyte one.
+    rc =
+        WideCharToMultiByte(
+            code_page,
+            0,                // Flags.
+            wmsg, wlen,       // Wide buffer and size.
+            msg, len,         // Buffer and size.
+            NULL, NULL        // Default char and used default char.
+        );
+    if ( rc <= 0 || rc > len ) {
+        goto end;
+    }; // if
+    KMP_DEBUG_ASSERT( rc == len );
+    len = rc;
+    msg[ len ] = 0;           // Put terminating null to the end.
+
+    // Stripping all "\r" before stripping last end-of-line simplifies the task.
+    len = ___strip_crs( msg );
+
+    // Every message in catalog is terminated with "\n". Strip it.
+    if ( len >= 1 && msg[ len - 1 ] == '\n' ) {
+        -- len;
+        msg[ len ] = 0;
+    }; // if
+
+    // Everything looks ok.
+    result = msg;
+    msg    = NULL;
+
+    end:
+
+    if ( msg != NULL ) {
+        KMP_INTERNAL_FREE( msg );
+    }; // if
+    if ( wmsg != NULL ) {
+        LocalFree( wmsg );
+    }; // if
+
+    return result;
+
+} // ___catgets
+
+
+char const *
+__kmp_i18n_catgets(
+    kmp_i18n_id_t  id
+) {
+
+    int section = get_section( id );
+    int number  = get_number( id );
+    char const * message = NULL;
+
+    if ( 1 <= section && section <= __kmp_i18n_default_table.size ) {
+        if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) {
+            if ( status == KMP_I18N_CLOSED ) {
+                __kmp_i18n_catopen();
+            }; // if
+            if ( cat != KMP_I18N_NULLCAT ) {
+                if ( table.size == 0 ) {
+                    table.sect = (kmp_i18n_section_t *)
+                        KMP_INTERNAL_CALLOC(
+                            ( __kmp_i18n_default_table.size + 2 ),
+                            sizeof( kmp_i18n_section_t )
+                        );
+                    table.size = __kmp_i18n_default_table.size;
+                }; // if
+                if ( table.sect[ section ].size == 0 ) {
+                    table.sect[ section ].str = (const char **)
+                        KMP_INTERNAL_CALLOC(
+                            __kmp_i18n_default_table.sect[ section ].size + 2,
+                            sizeof( char const * )
+                        );
+                    table.sect[ section ].size = __kmp_i18n_default_table.sect[ section ].size;
+                }; // if
+                if ( table.sect[ section ].str[ number ] == NULL ) {
+                    table.sect[ section ].str[ number ] = ___catgets( id );
+                }; // if
+                message = table.sect[ section ].str[ number ];
+            }; // if
+            if ( message == NULL ) {
+                // Catalog is not opened or message is not found, return default message.
+                message = __kmp_i18n_default_table.sect[ section ].str[ number ];
+            }; // if
+        }; // if
+    }; // if
+    if ( message == NULL ) {
+        message = no_message_available;
+    }; // if
+    return message;
+
+} // func __kmp_i18n_catgets
+
+
+#endif // KMP_OS_WINDOWS
+
+// -------------------------------------------------------------------------------------------------
+
+#ifndef KMP_I18N_OK
+    #error I18n support is not implemented for this OS.
+#endif // KMP_I18N_OK
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_i18n_dump_catalog(
+    kmp_str_buf_t * buffer
+) {
+
+    struct kmp_i18n_id_range_t {
+        kmp_i18n_id_t  first;
+        kmp_i18n_id_t  last;
+    }; // struct kmp_i18n_id_range_t
+
+    static struct kmp_i18n_id_range_t ranges[] = {
+        { kmp_i18n_prp_first, kmp_i18n_prp_last },
+        { kmp_i18n_str_first, kmp_i18n_str_last },
+        { kmp_i18n_fmt_first, kmp_i18n_fmt_last },
+        { kmp_i18n_msg_first, kmp_i18n_msg_last },
+        { kmp_i18n_hnt_first, kmp_i18n_hnt_last }
+    }; // ranges
+
+    int           num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t );
+    int           range;
+    kmp_i18n_id_t id;
+
+    for ( range = 0; range < num_of_ranges; ++ range ) {
+        __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 );
+        for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 );
+              id < ranges[ range ].last;
+              id = (kmp_i18n_id_t)( id + 1 ) ) {
+             __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) );
+        }; // for id
+    }; // for range
+
+    __kmp_printf( "%s", buffer->str );
+
+} // __kmp_i18n_dump_catalog
+
+// -------------------------------------------------------------------------------------------------
+
+kmp_msg_t
+__kmp_msg_format(
+    kmp_i18n_id_t id,
+    ...
+) {
+
+    kmp_msg_t      msg;
+    va_list        args;
+    kmp_str_buf_t  buffer;
+    __kmp_str_buf_init( & buffer );
+
+    va_start( args, id );
+    #if KMP_OS_UNIX
+        // On Linux* OS and OS X*, printf() family functions process parameter numbers, for example:
+        // "%2$s %1$s".
+        __kmp_str_buf_vprint( & buffer, __kmp_i18n_catgets( id ), args );
+    #elif KMP_OS_WINDOWS
+        // On Winodws, printf() family functions does not recognize GNU style parameter numbers,
+        // so we have to use FormatMessage() instead. It recognizes parameter numbers, e. g.:
+        // "%2!s! "%1!s!".
+        {
+            LPTSTR str = NULL;
+            int    len;
+            FormatMessage(
+                FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+                __kmp_i18n_catgets( id ),
+                0, 0,
+                (LPTSTR)( & str ),
+                0,
+                & args
+            );
+            len = ___strip_crs( str );
+            __kmp_str_buf_cat( & buffer, str, len );
+            LocalFree( str );
+        }
+    #else
+        #error
+    #endif
+    va_end( args );
+    __kmp_str_buf_detach( & buffer );
+
+    msg.type = (kmp_msg_type_t)( id >> 16 );
+    msg.num  = id & 0xFFFF;
+    msg.str  = buffer.str;
+    msg.len  = buffer.used;
+
+    return msg;
+
+} // __kmp_msg_format
+
+// -------------------------------------------------------------------------------------------------
+
+static
+char *
+sys_error(
+    int err
+) {
+
+    char * message = NULL;
+
+    #if KMP_OS_WINDOWS
+
+        LPVOID  buffer = NULL;
+        int     len;
+        DWORD   rc;
+        rc =
+            FormatMessage(
+                FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
+                NULL,
+                err,
+                MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ), // Default language.
+                (LPTSTR) & buffer,
+                0,
+                NULL
+            );
+        if ( rc > 0 ) {
+            // Message formatted. Copy it (so we can free it later with normal free().
+            message = __kmp_str_format( "%s", (char *) buffer );
+            len = ___strip_crs( message ); // Delete carriage returns if any.
+            // Strip trailing newlines.
+            while ( len > 0 && message[ len - 1 ] == '\n' ) {
+                -- len;
+            }; // while
+            message[ len ] = 0;
+        } else {
+            // FormatMessage() failed to format system error message. GetLastError() would give us
+            // error code, which we would convert to message... this it dangerous recursion, which
+            // cannot clarify original error, so we will not even start it.
+        }; // if
+        if ( buffer != NULL ) {
+            LocalFree( buffer );
+        }; // if
+
+    #else // Non-Windows* OS: Linux* OS or OS X*
+
+        /*
+            There are 2 incompatible versions of strerror_r:
+
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // XSI version
+        */
+
+        #if KMP_OS_LINUX
+
+            // GNU version of strerror_r.
+
+            char   buffer[ 2048 ];
+            char * const err_msg = strerror_r( err, buffer, sizeof( buffer ) );
+                // Do not eliminate this assignment to temporary variable, otherwise compiler would
+                // not issue warning if strerror_r() returns `int' instead of expected `char *'.
+            message = __kmp_str_format( "%s", err_msg );
+
+        #else // OS X*, FreeBSD* etc.
+
+            // XSI version of strerror_r.
+
+            int    size   = 2048;
+            // TODO: Add checking result of malloc().
+            char * buffer = (char *) KMP_INTERNAL_MALLOC( size );
+            int    rc;
+            rc = strerror_r( err, buffer, size );
+            if ( rc == -1 ) {
+                rc = errno;            // XSI version sets errno.
+            }; // if
+            while ( rc == ERANGE ) {   // ERANGE means the buffer is too small.
+                KMP_INTERNAL_FREE( buffer );
+                size *= 2;
+                buffer = (char *) KMP_INTERNAL_MALLOC( size );
+                rc = strerror_r( err, buffer, size );
+                if ( rc == -1 ) {
+                    rc = errno;        // XSI version sets errno.
+                }; // if
+            }; // while
+            if ( rc == 0 ) {
+                message = buffer;
+            } else {
+                // Buffer is unused. Free it.
+                KMP_INTERNAL_FREE( buffer );
+            }; // if
+
+        #endif
+
+    #endif /* KMP_OS_WINDOWS */
+
+    if ( message == NULL ) {
+        // TODO: I18n this message.
+        message = __kmp_str_format( "%s", "(No system error message available)" );
+    }; // if
+    return message;
+
+} // sys_error
+
+// -------------------------------------------------------------------------------------------------
+
+kmp_msg_t
+__kmp_msg_error_code(
+    int  code
+) {
+
+    kmp_msg_t      msg;
+    msg.type = kmp_mt_syserr;
+    msg.num  = code;
+    msg.str  = sys_error( code );
+    msg.len  = KMP_STRLEN( msg.str );
+    return msg;
+
+} // __kmp_msg_error_code
+
+// -------------------------------------------------------------------------------------------------
+
+kmp_msg_t
+__kmp_msg_error_mesg(
+    char const * mesg
+) {
+
+    kmp_msg_t      msg;
+    msg.type = kmp_mt_syserr;
+    msg.num  = 0;
+    msg.str  = __kmp_str_format( "%s", mesg );
+    msg.len  = KMP_STRLEN( msg.str );
+    return msg;
+
+} // __kmp_msg_error_mesg
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_msg(
+    kmp_msg_severity_t  severity,
+    kmp_msg_t           message,
+    ...
+) {
+
+    va_list        args;
+    kmp_i18n_id_t  format;      // format identifier
+    kmp_msg_t      fmsg;        // formatted message
+    kmp_str_buf_t  buffer;
+
+    if ( severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off )
+        return; // no reason to form a string in order to not print it
+
+    __kmp_str_buf_init( & buffer );
+
+    // Format the primary message.
+    switch ( severity ) {
+        case kmp_ms_inform : {
+            format = kmp_i18n_fmt_Info;
+        } break;
+        case kmp_ms_warning : {
+            format = kmp_i18n_fmt_Warning;
+        } break;
+        case kmp_ms_fatal : {
+            format = kmp_i18n_fmt_Fatal;
+        } break;
+        default : {
+            KMP_DEBUG_ASSERT( 0 );
+        };
+    }; // switch
+    fmsg = __kmp_msg_format( format, message.num, message.str );
+    KMP_INTERNAL_FREE( (void *) message.str );
+    __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len );
+    KMP_INTERNAL_FREE( (void *) fmsg.str );
+
+    // Format other messages.
+    va_start( args, message );
+    for ( ; ; ) {
+        message = va_arg( args, kmp_msg_t );
+        if ( message.type == kmp_mt_dummy && message.str == NULL ) {
+            break;
+        }; // if
+        if ( message.type == kmp_mt_dummy && message.str == __kmp_msg_empty.str ) {
+            continue;
+        }; // if
+        switch ( message.type ) {
+            case kmp_mt_hint : {
+                format = kmp_i18n_fmt_Hint;
+            } break;
+            case kmp_mt_syserr : {
+                format = kmp_i18n_fmt_SysErr;
+            } break;
+            default : {
+                KMP_DEBUG_ASSERT( 0 );
+            };
+        }; // switch
+        fmsg = __kmp_msg_format( format, message.num, message.str );
+        KMP_INTERNAL_FREE( (void *) message.str );
+        __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len );
+        KMP_INTERNAL_FREE( (void *) fmsg.str );
+    }; // forever
+    va_end( args );
+
+    // Print formatted messages.
+    // This lock prevents multiple fatal errors on the same problem.
+    // __kmp_acquire_bootstrap_lock( & lock );    // GEH - This lock causing tests to hang on OS X*.
+    __kmp_printf( "%s", buffer.str );
+    __kmp_str_buf_free( & buffer );
+
+    if ( severity == kmp_ms_fatal ) {
+        #if KMP_OS_WINDOWS
+        __kmp_thread_sleep( 500 );   /* Delay to give message a chance to appear before reaping */
+        #endif
+        __kmp_abort_process();
+    }; // if
+
+    // __kmp_release_bootstrap_lock( & lock );  // GEH - this lock causing tests to hang on OS X*.
+
+} // __kmp_msg
+
+// -------------------------------------------------------------------------------------------------
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n.h b/contrib/libs/cxxsupp/openmp/kmp_i18n.h
index 37e59300d1..82ec51b244 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_i18n.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_i18n.h
@@ -1,193 +1,193 @@
-/* 
- * kmp_i18n.h 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_I18N_H 
-#define KMP_I18N_H 
- 
-#include "kmp_str.h" 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-/* 
-    kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with identifiers of all the 
-    messages in the catalog. There is one special identifier: kmp_i18n_null, which denotes absence 
-    of message. 
-*/ 
-#include "kmp_i18n_id.inc" // Generated file. Do not edit it manually. 
- 
-/* 
-    Low-level functions handling message catalog. __kmp_i18n_open() opens message catalog, 
-    __kmp_i18n_closes() it. Explicit opening is not required: if message catalog is not yet open, 
-    __kmp_i18n_catgets() will open it implicitly. However, catalog should be explicitly closed, 
-    otherwise resources (mamory, handles) may leak. 
- 
-    __kmp_i18n_catgets() returns read-only string. It should not be freed. 
- 
-    KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. Following two lines are 
-    equivalent: 
- 
-        __kmp_i18n_catgets( kmp_i18n_str_Warning ) 
-        KMP_I18N_STR( Warning ) 
-*/ 
- 
-void            __kmp_i18n_catopen(); 
-void            __kmp_i18n_catclose(); 
-char const *    __kmp_i18n_catgets( kmp_i18n_id_t id ); 
- 
-#define KMP_I18N_STR( id )    __kmp_i18n_catgets( kmp_i18n_str_ ## id ) 
- 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
- 
-    High-level interface for printing strings targeted to the user. 
- 
-    All the strings are divided into 3 types: 
- 
-        * messages, 
-        * hints, 
-        * system errors. 
- 
-    There are 3 kind of message severities: 
- 
-        * informational messages, 
-        * warnings (non-fatal errors), 
-        * fatal errors. 
- 
-    For example: 
- 
-        OMP: Warning #2: Cannot open message catalog "libguide.cat":   (1) 
-        OMP: System error #2: No such file or directory                (2) 
-        OMP: Hint: Please check NLSPATH environment variable.          (3) 
-        OMP: Info #3: Default messages will be used.                   (4) 
- 
-    where 
- 
-        (1) is a message of warning severity, 
-        (2) is a system error caused the previous warning, 
-        (3) is a hint for the user how to fix the problem, 
-        (4) is a message of informational severity. 
- 
-   Usage in complex cases (message is accompanied with hints and system errors): 
- 
-       int error = errno;   // We need save errno immediately, because it may be changed. 
-       __kmp_msg( 
-           kmp_ms_warning,                            // Severity 
-           KMP_MSG( CantOpenMessageCatalog, name ),   // Primary message 
-           KMP_ERR( error ),                          // System error 
-           KMP_HNT( CheckNLSPATH ),                   // Hint 
-           __kmp_msg_null                             // Variadic argument list finisher 
-       ); 
- 
-    Usage in simple cases (just a message, no system errors or hints): 
- 
-        KMP_INFORM( WillUseDefaultMessages ); 
-        KMP_WARNING( CantOpenMessageCatalog, name ); 
-        KMP_FATAL( StackOverlap ); 
-        KMP_SYSFAIL( "pthread_create", status ); 
-        KMP_CHECK_SYSFAIL( "pthread_create", status ); 
-        KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); 
- 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-enum kmp_msg_type { 
-    kmp_mt_dummy  =  0, // Special type for internal purposes. 
-    kmp_mt_mesg   =  4, // Primary OpenMP message, could be information, warning, or fatal. 
-    kmp_mt_hint   =  5, // Hint to the user. 
-    kmp_mt_syserr = -1  // System error message. 
-}; // enum kmp_msg_type 
-typedef enum kmp_msg_type  kmp_msg_type_t; 
- 
-struct kmp_msg { 
-    kmp_msg_type_t  type; 
-    int             num; 
-    char const *    str; 
-    int             len; 
-}; // struct kmp_message 
-typedef struct kmp_msg  kmp_msg_t; 
- 
-// Two special messages. 
-extern kmp_msg_t __kmp_msg_empty;  // Can be used in place where message is required syntactically. 
-extern kmp_msg_t __kmp_msg_null;   // Denotes the end of variadic list of arguments. 
- 
-// Helper functions. Creates messages either from message catalog or from system. Note: these 
-// functions allocate memory. You should pass created messages to __kmp_msg() function, it will 
-// print messages and destroy them. 
-kmp_msg_t  __kmp_msg_format( kmp_i18n_id_t id, ... ); 
-kmp_msg_t  __kmp_msg_error_code( int code ); 
-kmp_msg_t  __kmp_msg_error_mesg( char const * mesg ); 
- 
-// Helper macros to make calls shorter. 
-#define KMP_MSG( ...  )   __kmp_msg_format( kmp_i18n_msg_ ## __VA_ARGS__ ) 
-#define KMP_HNT( ...  )   __kmp_msg_format( kmp_i18n_hnt_ ## __VA_ARGS__ ) 
-#define KMP_SYSERRCODE( code )  __kmp_msg_error_code( code ) 
-#define KMP_SYSERRMESG( mesg )  __kmp_msg_error_mesg( mesg ) 
-#define KMP_ERR KMP_SYSERRCODE 
- 
-// Message severity. 
-enum kmp_msg_severity { 
-    kmp_ms_inform,      // Just information for the user. 
-    kmp_ms_warning,     // Non-fatal error, execution continues. 
-    kmp_ms_fatal        // Fatal error, program aborts. 
-}; // enum kmp_msg_severity 
-typedef enum kmp_msg_severity  kmp_msg_severity_t; 
- 
-// Primary function for printing messages for the user. The first message is mandatory. Any number 
-// of system errors and hints may be specified. Argument list must be finished with __kmp_msg_null. 
-void    __kmp_msg( kmp_msg_severity_t severity, kmp_msg_t message, ... ); 
- 
-// Helper macros to make calls shorter in simple cases. 
-#define KMP_INFORM( ...  ) __kmp_msg( kmp_ms_inform,  KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) 
-#define KMP_WARNING( ... ) __kmp_msg( kmp_ms_warning, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) 
-#define KMP_FATAL(   ... ) __kmp_msg( kmp_ms_fatal,   KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) 
-#define KMP_SYSFAIL( func, error )                                                                 \ 
-    __kmp_msg(                                                                                     \ 
-        kmp_ms_fatal,                                                                              \ 
-        KMP_MSG( FunctionError, func ),                                                            \ 
-        KMP_SYSERRCODE( error ),                                                                   \ 
-        __kmp_msg_null                                                                             \ 
-    ) 
- 
-// Check error, if not zero, generate fatal error message. 
-#define KMP_CHECK_SYSFAIL( func, error )                                                           \ 
-    {                                                                                              \ 
-        if ( error ) {                                                                             \ 
-            KMP_SYSFAIL( func, error );                                                            \ 
-        };                                                                                         \ 
-    } 
- 
-// Check status, if not zero, generate fatal error message using errno. 
-#define KMP_CHECK_SYSFAIL_ERRNO( func, status )                                                    \ 
-    {                                                                                              \ 
-        if ( status != 0 ) {                                                                       \ 
-            int error = errno;                                                                     \ 
-            KMP_SYSFAIL( func, error );                                                            \ 
-        };                                                                                         \ 
-    } 
- 
-#ifdef KMP_DEBUG 
-    void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer ); 
-#endif // KMP_DEBUG 
- 
-#ifdef __cplusplus 
-    }; // extern "C" 
-#endif // __cplusplus 
- 
-#endif // KMP_I18N_H 
- 
-// end of file // 
+/*
+ * kmp_i18n.h
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_I18N_H
+#define KMP_I18N_H
+
+#include "kmp_str.h"
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+/*
+    kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with identifiers of all the
+    messages in the catalog. There is one special identifier: kmp_i18n_null, which denotes absence
+    of message.
+*/
+#include "kmp_i18n_id.inc" // Generated file. Do not edit it manually.
+
+/*
+    Low-level functions handling message catalog. __kmp_i18n_open() opens message catalog,
+    __kmp_i18n_closes() it. Explicit opening is not required: if message catalog is not yet open,
+    __kmp_i18n_catgets() will open it implicitly. However, catalog should be explicitly closed,
+    otherwise resources (mamory, handles) may leak.
+
+    __kmp_i18n_catgets() returns read-only string. It should not be freed.
+
+    KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. Following two lines are
+    equivalent:
+
+        __kmp_i18n_catgets( kmp_i18n_str_Warning )
+        KMP_I18N_STR( Warning )
+*/
+
+void            __kmp_i18n_catopen();
+void            __kmp_i18n_catclose();
+char const *    __kmp_i18n_catgets( kmp_i18n_id_t id );
+
+#define KMP_I18N_STR( id )    __kmp_i18n_catgets( kmp_i18n_str_ ## id )
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+
+    High-level interface for printing strings targeted to the user.
+
+    All the strings are divided into 3 types:
+
+        * messages,
+        * hints,
+        * system errors.
+
+    There are 3 kind of message severities:
+
+        * informational messages,
+        * warnings (non-fatal errors),
+        * fatal errors.
+
+    For example:
+
+        OMP: Warning #2: Cannot open message catalog "libguide.cat":   (1)
+        OMP: System error #2: No such file or directory                (2)
+        OMP: Hint: Please check NLSPATH environment variable.          (3)
+        OMP: Info #3: Default messages will be used.                   (4)
+
+    where
+
+        (1) is a message of warning severity,
+        (2) is a system error caused the previous warning,
+        (3) is a hint for the user how to fix the problem,
+        (4) is a message of informational severity.
+
+   Usage in complex cases (message is accompanied with hints and system errors):
+
+       int error = errno;   // We need save errno immediately, because it may be changed.
+       __kmp_msg(
+           kmp_ms_warning,                            // Severity
+           KMP_MSG( CantOpenMessageCatalog, name ),   // Primary message
+           KMP_ERR( error ),                          // System error
+           KMP_HNT( CheckNLSPATH ),                   // Hint
+           __kmp_msg_null                             // Variadic argument list finisher
+       );
+
+    Usage in simple cases (just a message, no system errors or hints):
+
+        KMP_INFORM( WillUseDefaultMessages );
+        KMP_WARNING( CantOpenMessageCatalog, name );
+        KMP_FATAL( StackOverlap );
+        KMP_SYSFAIL( "pthread_create", status );
+        KMP_CHECK_SYSFAIL( "pthread_create", status );
+        KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status );
+
+    ------------------------------------------------------------------------------------------------
+*/
+
+enum kmp_msg_type {
+    kmp_mt_dummy  =  0, // Special type for internal purposes.
+    kmp_mt_mesg   =  4, // Primary OpenMP message, could be information, warning, or fatal.
+    kmp_mt_hint   =  5, // Hint to the user.
+    kmp_mt_syserr = -1  // System error message.
+}; // enum kmp_msg_type
+typedef enum kmp_msg_type  kmp_msg_type_t;
+
+struct kmp_msg {
+    kmp_msg_type_t  type;
+    int             num;
+    char const *    str;
+    int             len;
+}; // struct kmp_message
+typedef struct kmp_msg  kmp_msg_t;
+
+// Two special messages.
+extern kmp_msg_t __kmp_msg_empty;  // Can be used in place where message is required syntactically.
+extern kmp_msg_t __kmp_msg_null;   // Denotes the end of variadic list of arguments.
+
+// Helper functions. Creates messages either from message catalog or from system. Note: these
+// functions allocate memory. You should pass created messages to __kmp_msg() function, it will
+// print messages and destroy them.
+kmp_msg_t  __kmp_msg_format( kmp_i18n_id_t id, ... );
+kmp_msg_t  __kmp_msg_error_code( int code );
+kmp_msg_t  __kmp_msg_error_mesg( char const * mesg );
+
+// Helper macros to make calls shorter.
+#define KMP_MSG( ...  )   __kmp_msg_format( kmp_i18n_msg_ ## __VA_ARGS__ )
+#define KMP_HNT( ...  )   __kmp_msg_format( kmp_i18n_hnt_ ## __VA_ARGS__ )
+#define KMP_SYSERRCODE( code )  __kmp_msg_error_code( code )
+#define KMP_SYSERRMESG( mesg )  __kmp_msg_error_mesg( mesg )
+#define KMP_ERR KMP_SYSERRCODE
+
+// Message severity.
+enum kmp_msg_severity {
+    kmp_ms_inform,      // Just information for the user.
+    kmp_ms_warning,     // Non-fatal error, execution continues.
+    kmp_ms_fatal        // Fatal error, program aborts.
+}; // enum kmp_msg_severity
+typedef enum kmp_msg_severity  kmp_msg_severity_t;
+
+// Primary function for printing messages for the user. The first message is mandatory. Any number
+// of system errors and hints may be specified. Argument list must be finished with __kmp_msg_null.
+void    __kmp_msg( kmp_msg_severity_t severity, kmp_msg_t message, ... );
+
+// Helper macros to make calls shorter in simple cases.
+#define KMP_INFORM( ...  ) __kmp_msg( kmp_ms_inform,  KMP_MSG( __VA_ARGS__ ), __kmp_msg_null )
+#define KMP_WARNING( ... ) __kmp_msg( kmp_ms_warning, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null )
+#define KMP_FATAL(   ... ) __kmp_msg( kmp_ms_fatal,   KMP_MSG( __VA_ARGS__ ), __kmp_msg_null )
+#define KMP_SYSFAIL( func, error )                                                                 \
+    __kmp_msg(                                                                                     \
+        kmp_ms_fatal,                                                                              \
+        KMP_MSG( FunctionError, func ),                                                            \
+        KMP_SYSERRCODE( error ),                                                                   \
+        __kmp_msg_null                                                                             \
+    )
+
+// Check error, if not zero, generate fatal error message.
+#define KMP_CHECK_SYSFAIL( func, error )                                                           \
+    {                                                                                              \
+        if ( error ) {                                                                             \
+            KMP_SYSFAIL( func, error );                                                            \
+        };                                                                                         \
+    }
+
+// Check status, if not zero, generate fatal error message using errno.
+#define KMP_CHECK_SYSFAIL_ERRNO( func, status )                                                    \
+    {                                                                                              \
+        if ( status != 0 ) {                                                                       \
+            int error = errno;                                                                     \
+            KMP_SYSFAIL( func, error );                                                            \
+        };                                                                                         \
+    }
+
+#ifdef KMP_DEBUG
+    void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer );
+#endif // KMP_DEBUG
+
+#ifdef __cplusplus
+    }; // extern "C"
+#endif // __cplusplus
+
+#endif // KMP_I18N_H
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc b/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc
index f975b83794..0a1aa74adf 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc
+++ b/contrib/libs/cxxsupp/openmp/kmp_i18n_default.inc
@@ -1,414 +1,414 @@
-// Do not edit this file! // 
-// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. // 
- 
-static char const * 
-__kmp_i18n_default_meta[] = 
-    { 
-        NULL, 
-        "English", 
-        "USA", 
-        "1033", 
-        "2", 
-        "20140827", 
-        NULL 
-    }; 
- 
-static char const * 
-__kmp_i18n_default_strings[] = 
-    { 
-        NULL, 
-        "Error", 
-        "(unknown file)", 
-        "not a number", 
-        "bad unit", 
-        "illegal characters", 
-        "value too large", 
-        "value too small", 
-        "value is not a multiple of 4k", 
-        "Unknown processor topology", 
-        "Cannot open /proc/cpuinfo", 
-        "/proc/cpuinfo", 
-        "cpuinfo file invalid (No processor records)", 
-        "cpuinfo file invalid (Too many processor records)", 
-        "Cannot rewind cpuinfo file", 
-        "cpuinfo file invalid (long line)", 
-        "cpuinfo file contains too many entries", 
-        "cpuinfo file missing processor field", 
-        "cpuinfo file missing physical id field", 
-        "cpuinfo file invalid (missing val)", 
-        "cpuinfo file invalid (duplicate field)", 
-        "Physical node/pkg/core/thread ids not unique", 
-        "APIC not present", 
-        "Invalid cpuid info", 
-        "APIC ids not unique", 
-        "Inconsistent cpuid info", 
-        "Out of heap memory", 
-        "Memory allocation failed", 
-        "core", 
-        "thread", 
-        "package", 
-        "node", 
-        "<undef>", 
-        "decoding legacy APIC ids", 
-        "parsing /proc/cpuinfo", 
-        "value is not defined", 
-        "Effective settings:", 
-        "User settings:", 
-        "warning: pointers or size don't make sense", 
-        "CPU", 
-        "TPU", 
-        "TPUs per package", 
-        "HT enabled", 
-        "HT disabled", 
-        "decoding x2APIC ids", 
-        "cpuid leaf 11 not supported", 
-        "cpuid leaf 4 not supported", 
-        "thread ids not unique", 
-        "using pthread info", 
-        "legacy APIC ids not unique", 
-        "x2APIC ids not unique", 
-        "OPENMP DISPLAY ENVIRONMENT BEGIN", 
-        "OPENMP DISPLAY ENVIRONMENT END", 
-        "[device]", 
-        "[host]", 
-        NULL 
-    }; 
- 
-static char const * 
-__kmp_i18n_default_formats[] = 
-    { 
-        NULL, 
-        "OMP: Info #%1$d: %2$s\n", 
-        "OMP: Warning #%1$d: %2$s\n", 
-        "OMP: Error #%1$d: %2$s\n", 
-        "OMP: System error #%1$d: %2$s\n", 
-        "OMP: Hint: %2$s\n", 
-        "%1$s pragma (at %2$s:%3$s():%4$s)", 
-        NULL 
-    }; 
- 
-static char const * 
-__kmp_i18n_default_messages[] = 
-    { 
-        NULL, 
-        "Library is \"serial\".", 
-        "Cannot open message catalog \"%1$s\":", 
-        "Default messages will be used.", 
-        "%1$s: Lock is uninitialized", 
-        "%1$s: Lock was initialized as simple, but used as nestable", 
-        "%1$s: Lock was initialized as nestable, but used as simple", 
-        "%1$s: Lock is already owned by requesting thread", 
-        "%1$s: Lock is still owned by a thread", 
-        "%1$s: Attempt to release a lock not owned by any thread", 
-        "%1$s: Attempt to release a lock owned by another thread", 
-        "Stack overflow detected for OpenMP thread #%1$d", 
-        "Stack overlap detected. ", 
-        "Assertion failure at %1$s(%2$d).", 
-        "Unable to register a new user thread.", 
-        "Initializing %1$s, but found %2$s already initialized.", 
-        "Cannot open file \"%1$s\" for reading:", 
-        "Getting environment variable \"%1$s\" failed:", 
-        "Setting environment variable \"%1$s\" failed:", 
-        "Getting environment failed:", 
-        "%1$s=\"%2$s\": Wrong value, boolean expected.", 
-        "No Helper Thread support built in this OMP library.", 
-        "Helper thread failed to soft terminate.", 
-        "Buffer overflow detected.", 
-        "Real-time scheduling policy is not supported.", 
-        "OMP application is running at maximum priority with real-time scheduling policy. ", 
-        "Changing priority of the monitor thread failed:", 
-        "Deadlocks are highly possible due to monitor thread starvation.", 
-        "Unable to set monitor thread stack size to %1$lu bytes:", 
-        "Unable to set OMP thread stack size to %1$lu bytes:", 
-        "Thread attribute initialization failed:", 
-        "Thread attribute destroying failed:", 
-        "OMP thread joinable state setting failed:", 
-        "Monitor thread joinable state setting failed:", 
-        "System unable to allocate necessary resources for OMP thread:", 
-        "System unable to allocate necessary resources for the monitor thread:", 
-        "Unable to terminate OMP thread:", 
-        "Wrong schedule type %1$d, see <omp.h> or <omp_lib.h> file for the list of values supported.", 
-        "Unknown scheduling type \"%1$d\".", 
-        "%1$s value \"%2$s\" is invalid.", 
-        "%1$s value \"%2$s\" is too small.", 
-        "%1$s value \"%2$s\" is too large.", 
-        "%1$s: \"%2$s\" is an invalid value; ignored.", 
-        "%1$s release value \"%2$s\" is invalid.", 
-        "%1$s gather value \"%2$s\" is invalid.", 
-        "%1$s supported only on debug builds; ignored.", 
-        "Syntax error: Usage: %1$s=[ routine=<func> | filename=<file> | range=<lb>:<ub> | excl_range=<lb>:<ub> ],...", 
-        "Unbalanced quotes in %1$s.", 
-        "Empty string specified for %1$s; ignored.", 
-        "%1$s value is too long; ignored.", 
-        "%1$s: Invalid clause in \"%2$s\".", 
-        "Empty clause in %1$s.", 
-        "%1$s value \"%2$s\" is invalid chunk size.", 
-        "%1$s value \"%2$s\" is to large chunk size.", 
-        "%1$s value \"%2$s\" is ignored.", 
-        "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY.", 
-        "%1$s must be set prior to first parallel region; ignored.", 
-        "%1$s: parameter has been specified already, ignoring \"%2$s\".", 
-        "%1$s: parameter invalid, ignoring \"%2$s\".", 
-        "%1$s: too many integer parameters specified, ignoring \"%2$s\".", 
-        "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\".", 
-        "%1$s: '%2$s' type does not take any integer parameters, ignoring them.", 
-        "%1$s: proclist not specified with explicit affinity type, using \"none\".", 
-        "%1$s: proclist specified, setting affinity type to \"explicit\".", 
-        "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored.", 
-        "%1$s: syntax error, not using affinity.", 
-        "%1$s: range error (zero stride), not using affinity.", 
-        "%1$s: range error (%2$d > %3$d), not using affinity.", 
-        "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity.", 
-        "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity.", 
-        "%1$s: %2$s is defined. %3$s will be ignored.", 
-        "%1$s: affinity not supported, using \"disabled\".", 
-        "%1$s: affinity only supported for Intel(R) processors.", 
-        "%1$s: getaffinity system call not supported.", 
-        "%1$s: setaffinity system call not supported.", 
-        "%1$s: pthread_aff_set_np call not found.", 
-        "%1$s: pthread_get_num_resources_np call not found.", 
-        "%1$s: the OS kernel does not support affinity.", 
-        "%1$s: pthread_get_num_resources_np returned %2$d.", 
-        "%1$s: cannot determine proper affinity mask size.", 
-        "%1$s=\"%2$s\": %3$s.", 
-        "%1$s: extra trailing characters ignored: \"%2$s\".", 
-        "%1$s: unknown method \"%2$s\".", 
-        "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday.", 
-        "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday.", 
-        "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday.", 
-        "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday.", 
-        "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\").", 
-        "Unknown scheduling type detected.", 
-        "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling.", 
-        "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed.", 
-        "ittnotify: Loading \"%1$s\" library failed.", 
-        "ittnotify: All itt notifications disabled.", 
-        "ittnotify: Object state itt notifications disabled.", 
-        "ittnotify: Mark itt notifications disabled.", 
-        "ittnotify: Unloading \"%1$s\" library failed.", 
-        "Cannot form a team with %1$d threads, using %2$d instead.", 
-        "Requested number of active parallel levels \"%1$d\" is negative; ignored.", 
-        "Requested number of active parallel levels \"%1$d\" exceeds supported limit; the following limit value will be used: \"%1$d\".", 
-        "kmp_set_library must only be called from the top level serial thread; ignored.", 
-        "Fatal system error detected.", 
-        "Out of heap memory.", 
-        "Clearing __KMP_REGISTERED_LIB env var failed.", 
-        "Registering library with env var failed.", 
-        "%1$s value \"%2$d\" will be used.", 
-        "%1$s value \"%2$u\" will be used.", 
-        "%1$s value \"%2$s\" will be used.", 
-        "%1$s value \"%2$s\" will be used.", 
-        "%1$s maximum value \"%2$d\" will be used.", 
-        "%1$s minimum value \"%2$d\" will be used.", 
-        "Memory allocation failed.", 
-        "File name too long.", 
-        "Lock table overflow.", 
-        "Too many threads to use threadprivate directive.", 
-        "%1$s: invalid mask.", 
-        "Wrong definition.", 
-        "Windows* OS: TLS Set Value failed.", 
-        "Windows* OS: TLS out of indexes.", 
-        "PDONE directive must be nested within a DO directive.", 
-        "Cannot get number of available CPUs.", 
-        "Assumed number of CPUs is 2.", 
-        "Error initializing affinity - not using affinity.", 
-        "Threads may migrate across all available OS procs (granularity setting too coarse).", 
-        "Ignoring invalid OS proc ID %1$d.", 
-        "No valid OS proc IDs specified - not using affinity.", 
-        "%1$s - using \"flat\" OS <-> physical proc mapping.", 
-        "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping.", 
-        "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping.", 
-        "%1$s: %2$s - exiting.", 
-        "%1$s, line %2$d: %3$s - exiting.", 
-        "Construct identifier invalid.", 
-        "Thread identifier invalid.", 
-        "runtime library not initialized.", 
-        "Inconsistent THREADPRIVATE common block declarations are non-conforming and are unsupported. Either all threadprivate common blocks must be declared identically, or the largest instance of each threadprivate common block must be referenced first during the run.", 
-        "Cannot set thread affinity mask.", 
-        "Cannot set thread priority.", 
-        "Cannot create thread.", 
-        "Cannot create event.", 
-        "Cannot set event.", 
-        "Cannot close handle.", 
-        "Unknown library type: %1$d.", 
-        "Monitor did not reap properly.", 
-        "Worker thread failed to join.", 
-        "Cannot change thread affinity mask.", 
-        "%1$s: Threads may migrate across %2$d innermost levels of machine", 
-        "%1$s: decrease to %2$d threads", 
-        "%1$s: increase to %2$d threads", 
-        "%1$s: Internal thread %2$d bound to OS proc set %3$s", 
-        "%1$s: Affinity capable, using cpuinfo file", 
-        "%1$s: Affinity capable, using global cpuid info", 
-        "%1$s: Affinity capable, using default \"flat\" topology", 
-        "%1$s: Affinity not capable, using local cpuid info", 
-        "%1$s: Affinity not capable, using cpuinfo file", 
-        "%1$s: Affinity not capable, assumming \"flat\" topology", 
-        "%1$s: Initial OS proc set respected: %2$s", 
-        "%1$s: Initial OS proc set not respected: %2$s", 
-        "%1$s: %2$d available OS procs", 
-        "%1$s: Uniform topology", 
-        "%1$s: Nonuniform topology", 
-        "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)", 
-        "%1$s: OS proc to physical thread map ([] => level not in map):", 
-        "%1$s: OS proc <n> maps to <n>th package core 0", 
-        "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]", 
-        "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]", 
-        "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d", 
-        "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]", 
-        "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]", 
-        "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d", 
-        "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]", 
-        "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d", 
-        "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d", 
-        "%1$s: OS proc %2$d maps to %3$s", 
-        "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s", 
-        "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d", 
-        "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d", 
-        "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package", 
-        "%1$s: HT disabled; %2$d packages", 
-        "Threads encountered barriers in different order. ", 
-        "Function %1$s failed:", 
-        "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)", 
-        "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected.", 
-        "%1$s: ignored because %2$s has been defined", 
-        "%1$s: overrides %3$s specified before", 
-        "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause", 
-        "Detected end of %1$s without first executing a corresponding beginning.", 
-        "Iteration range too large in %1$s.", 
-        "%1$s must not have a loop increment that evaluates to zero.", 
-        "Expected end of %1$s; %2$s, however, has most recently begun execution.", 
-        "%1$s is incorrectly nested within %2$s", 
-        "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s", 
-        "%1$s is incorrectly nested within %2$s of the same name", 
-        "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause", 
-        "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs", 
-        "One thread at %1$s while another thread is at %2$s.", 
-        "Cannot connect to %1$s", 
-        "Cannot connect to %1$s - Using %2$s", 
-        "%1$s does not support %2$s. Continuing without using %2$s.", 
-        "%1$s does not support %2$s for %3$s. Continuing without using %2$s.", 
-        "Static %1$s does not support %2$s. Continuing without using %2$s.", 
-        "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0", 
-        "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\".", 
-        "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu.", 
-        "%1$s: Affinity capable, using global cpuid leaf 11 info", 
-        "%1$s: Affinity not capable, using local cpuid leaf 11 info", 
-        "%1$s: %2$s.", 
-        "%1$s: %2$s - %3$s.", 
-        "%1$s: OS proc to physical thread map:", 
-        "%1$s: using \"flat\" OS <-> physical proc mapping.", 
-        "%1$s: parsing %2$s.", 
-        "%1$s - exiting.", 
-        "Incompatible %1$s library with version %2$s found.", 
-        "ittnotify: Function %1$s failed:", 
-        "ittnofify: Error #%1$d.", 
-        "%1$s must be set prior to first parallel region or certain API calls; ignored.", 
-        "Lock initialized at %1$s(%2$d) was not destroyed", 
-        "Cannot determine machine load balance - Using %1$s", 
-        "%1$s: Affinity not capable, using pthread info", 
-        "%1$s: Affinity capable, using pthread info", 
-        "Loading \"%1$s\" library failed:", 
-        "Lookup of \"%1$s\" function failed:", 
-        "Buffer too small.", 
-        "Error #%1$d.", 
-        "%1$s: Invalid symbols found. Check the value \"%2$s\".", 
-        "%1$s: Spaces between digits are not allowed \"%2$s\".", 
-        "%1$s: %2$s - parsing %3$s.", 
-        "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group.", 
-        "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".", 
-        "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".", 
-        "%1$s: Mapping Windows* OS processor group <i> proc <j> to OS proc 64*<i>+<j>.", 
-        "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d", 
-        "%1$s: Affinity balanced is not available.", 
-        "%1$s: granularity=core will be used.", 
-        "%1$s must be set prior to first OMP lock call or critical section; ignored.", 
-        "futex system call not supported; %1$s=%2$s ignored.", 
-        "%1$s: granularity=%2$s will be used.", 
-        "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt (nSockets@offset, nCores@offset, nTthreads per core)\".", 
-        "KMP_PLACE_THREADS ignored: unsupported architecture.", 
-        "KMP_PLACE_THREADS ignored: too many cores requested.", 
-        "%1$s: syntax error, using %2$s.", 
-        "%1$s: Adaptive locks are not supported; using queuing.", 
-        "%1$s: Invalid symbols found. Check the value \"%2$s\".", 
-        "%1$s: Spaces between digits are not allowed \"%2$s\".", 
-        "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s", 
-        "%1$s error: parallel loop increment and condition are inconsistent.", 
-        "libgomp cancellation is not currently supported.", 
-        "KMP_PLACE_THREADS ignored: non-uniform topology.", 
-        "KMP_PLACE_THREADS ignored: only three-level topology is supported.", 
-        "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\".", 
-        "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\".", 
-        "KMP_PLACE_THREADS ignored: too many sockets requested.", 
-        "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value.", 
-        "%1$s: Affinity capable, using hwloc.", 
-        "%1$s: Ignoring hwloc mechanism.", 
-        "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms.", 
-        NULL 
-    }; 
- 
-static char const * 
-__kmp_i18n_default_hints[] = 
-    { 
-        NULL, 
-        "Please submit a bug report with this message, compile and run commands used, and machine configuration info including native compiler and operating system versions. Faster response will be obtained by including all program sources. For information on submitting this issue, please see http://www.intel.com/software/products/support/.", 
-        "Check NLSPATH environment variable, its value is \"%1$s\".", 
-        "Please try changing the shell stack limit or adjusting the OMP_STACKSIZE environment variable.", 
-        "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set).", 
-        "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d.", 
-        "This could also be due to a system-related limit on the number of threads.", 
-        "This means that multiple copies of the OpenMP runtime have been linked into the program. That is dangerous, since it can degrade performance or cause incorrect results. The best thing to do is to ensure that only a single OpenMP runtime is linked into the process, e.g. by avoiding static linking of the OpenMP runtime in any library. As an unsafe, unsupported, undocumented workaround you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow the program to continue to execute, but that may cause crashes or silently produce incorrect results. For more information, please see http://www.intel.com/software/products/support/.", 
-        "This name is specified in environment variable KMP_CPUINFO_FILE.", 
-        "Seems application required too much memory.", 
-        "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, \"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values.", 
-        "Perhaps too many threads.", 
-        "Decrease priority of application. This will allow the monitor thread run at higher priority than other threads.", 
-        "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit.", 
-        "Try changing OMP_STACKSIZE and/or the shell stack limit.", 
-        "Try increasing OMP_STACKSIZE or the shell stack limit.", 
-        "Try decreasing OMP_STACKSIZE.", 
-        "Try decreasing the value of OMP_NUM_THREADS.", 
-        "Try increasing KMP_MONITOR_STACKSIZE.", 
-        "Try decreasing KMP_MONITOR_STACKSIZE.", 
-        "Try decreasing the number of threads in use simultaneously.", 
-        "Will use default schedule type (%1$s).", 
-        "It could be a result of using an older OMP library with a newer compiler or memory corruption. You may check the proper OMP library is linked to the application.", 
-        "Check %1$s environment variable, its value is \"%2$s\".", 
-        "You may want to use an %1$s library that supports %2$s interface with version %3$s.", 
-        "You may want to use an %1$s library with version %2$s.", 
-        "System error #193 is \"Bad format of EXE or DLL file\". Usually it means the file is found, but it is corrupted or a file for another architecture. Check whether \"%1$s\" is a file for %2$s architecture.", 
-        "System-related limit on the number of threads.", 
-        NULL 
-    }; 
- 
-struct kmp_i18n_section { 
-    int           size; 
-    char const ** str; 
-}; // struct kmp_i18n_section 
-typedef struct kmp_i18n_section  kmp_i18n_section_t; 
- 
-static kmp_i18n_section_t 
-__kmp_i18n_sections[] = 
-    { 
-        { 0, NULL }, 
-        { 5, __kmp_i18n_default_meta }, 
-        { 54, __kmp_i18n_default_strings }, 
-        { 6, __kmp_i18n_default_formats }, 
-        { 253, __kmp_i18n_default_messages }, 
-        { 27, __kmp_i18n_default_hints }, 
-        { 0, NULL } 
-    }; 
- 
-struct kmp_i18n_table { 
-    int                   size; 
-    kmp_i18n_section_t *  sect; 
-}; // struct kmp_i18n_table 
-typedef struct kmp_i18n_table  kmp_i18n_table_t; 
- 
-static kmp_i18n_table_t __kmp_i18n_default_table = 
-    { 
-        5, 
-        __kmp_i18n_sections 
-    }; 
- 
-// end of file // 
+// Do not edit this file! //
+// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. //
+
+static char const *
+__kmp_i18n_default_meta[] =
+    {
+        NULL,
+        "English",
+        "USA",
+        "1033",
+        "2",
+        "20140827",
+        NULL
+    };
+
+static char const *
+__kmp_i18n_default_strings[] =
+    {
+        NULL,
+        "Error",
+        "(unknown file)",
+        "not a number",
+        "bad unit",
+        "illegal characters",
+        "value too large",
+        "value too small",
+        "value is not a multiple of 4k",
+        "Unknown processor topology",
+        "Cannot open /proc/cpuinfo",
+        "/proc/cpuinfo",
+        "cpuinfo file invalid (No processor records)",
+        "cpuinfo file invalid (Too many processor records)",
+        "Cannot rewind cpuinfo file",
+        "cpuinfo file invalid (long line)",
+        "cpuinfo file contains too many entries",
+        "cpuinfo file missing processor field",
+        "cpuinfo file missing physical id field",
+        "cpuinfo file invalid (missing val)",
+        "cpuinfo file invalid (duplicate field)",
+        "Physical node/pkg/core/thread ids not unique",
+        "APIC not present",
+        "Invalid cpuid info",
+        "APIC ids not unique",
+        "Inconsistent cpuid info",
+        "Out of heap memory",
+        "Memory allocation failed",
+        "core",
+        "thread",
+        "package",
+        "node",
+        "<undef>",
+        "decoding legacy APIC ids",
+        "parsing /proc/cpuinfo",
+        "value is not defined",
+        "Effective settings:",
+        "User settings:",
+        "warning: pointers or size don't make sense",
+        "CPU",
+        "TPU",
+        "TPUs per package",
+        "HT enabled",
+        "HT disabled",
+        "decoding x2APIC ids",
+        "cpuid leaf 11 not supported",
+        "cpuid leaf 4 not supported",
+        "thread ids not unique",
+        "using pthread info",
+        "legacy APIC ids not unique",
+        "x2APIC ids not unique",
+        "OPENMP DISPLAY ENVIRONMENT BEGIN",
+        "OPENMP DISPLAY ENVIRONMENT END",
+        "[device]",
+        "[host]",
+        NULL
+    };
+
+static char const *
+__kmp_i18n_default_formats[] =
+    {
+        NULL,
+        "OMP: Info #%1$d: %2$s\n",
+        "OMP: Warning #%1$d: %2$s\n",
+        "OMP: Error #%1$d: %2$s\n",
+        "OMP: System error #%1$d: %2$s\n",
+        "OMP: Hint: %2$s\n",
+        "%1$s pragma (at %2$s:%3$s():%4$s)",
+        NULL
+    };
+
+static char const *
+__kmp_i18n_default_messages[] =
+    {
+        NULL,
+        "Library is \"serial\".",
+        "Cannot open message catalog \"%1$s\":",
+        "Default messages will be used.",
+        "%1$s: Lock is uninitialized",
+        "%1$s: Lock was initialized as simple, but used as nestable",
+        "%1$s: Lock was initialized as nestable, but used as simple",
+        "%1$s: Lock is already owned by requesting thread",
+        "%1$s: Lock is still owned by a thread",
+        "%1$s: Attempt to release a lock not owned by any thread",
+        "%1$s: Attempt to release a lock owned by another thread",
+        "Stack overflow detected for OpenMP thread #%1$d",
+        "Stack overlap detected. ",
+        "Assertion failure at %1$s(%2$d).",
+        "Unable to register a new user thread.",
+        "Initializing %1$s, but found %2$s already initialized.",
+        "Cannot open file \"%1$s\" for reading:",
+        "Getting environment variable \"%1$s\" failed:",
+        "Setting environment variable \"%1$s\" failed:",
+        "Getting environment failed:",
+        "%1$s=\"%2$s\": Wrong value, boolean expected.",
+        "No Helper Thread support built in this OMP library.",
+        "Helper thread failed to soft terminate.",
+        "Buffer overflow detected.",
+        "Real-time scheduling policy is not supported.",
+        "OMP application is running at maximum priority with real-time scheduling policy. ",
+        "Changing priority of the monitor thread failed:",
+        "Deadlocks are highly possible due to monitor thread starvation.",
+        "Unable to set monitor thread stack size to %1$lu bytes:",
+        "Unable to set OMP thread stack size to %1$lu bytes:",
+        "Thread attribute initialization failed:",
+        "Thread attribute destroying failed:",
+        "OMP thread joinable state setting failed:",
+        "Monitor thread joinable state setting failed:",
+        "System unable to allocate necessary resources for OMP thread:",
+        "System unable to allocate necessary resources for the monitor thread:",
+        "Unable to terminate OMP thread:",
+        "Wrong schedule type %1$d, see <omp.h> or <omp_lib.h> file for the list of values supported.",
+        "Unknown scheduling type \"%1$d\".",
+        "%1$s value \"%2$s\" is invalid.",
+        "%1$s value \"%2$s\" is too small.",
+        "%1$s value \"%2$s\" is too large.",
+        "%1$s: \"%2$s\" is an invalid value; ignored.",
+        "%1$s release value \"%2$s\" is invalid.",
+        "%1$s gather value \"%2$s\" is invalid.",
+        "%1$s supported only on debug builds; ignored.",
+        "Syntax error: Usage: %1$s=[ routine=<func> | filename=<file> | range=<lb>:<ub> | excl_range=<lb>:<ub> ],...",
+        "Unbalanced quotes in %1$s.",
+        "Empty string specified for %1$s; ignored.",
+        "%1$s value is too long; ignored.",
+        "%1$s: Invalid clause in \"%2$s\".",
+        "Empty clause in %1$s.",
+        "%1$s value \"%2$s\" is invalid chunk size.",
+        "%1$s value \"%2$s\" is to large chunk size.",
+        "%1$s value \"%2$s\" is ignored.",
+        "Cannot get processor frequency, using zero KMP_ITT_PREPARE_DELAY.",
+        "%1$s must be set prior to first parallel region; ignored.",
+        "%1$s: parameter has been specified already, ignoring \"%2$s\".",
+        "%1$s: parameter invalid, ignoring \"%2$s\".",
+        "%1$s: too many integer parameters specified, ignoring \"%2$s\".",
+        "%1$s: too many integer parameters specified for logical or physical type, ignoring \"%2$d\".",
+        "%1$s: '%2$s' type does not take any integer parameters, ignoring them.",
+        "%1$s: proclist not specified with explicit affinity type, using \"none\".",
+        "%1$s: proclist specified, setting affinity type to \"explicit\".",
+        "%1$s: proclist specified without \"explicit\" affinity type, proclist ignored.",
+        "%1$s: syntax error, not using affinity.",
+        "%1$s: range error (zero stride), not using affinity.",
+        "%1$s: range error (%2$d > %3$d), not using affinity.",
+        "%1$s: range error (%2$d < %3$d & stride < 0), not using affinity.",
+        "%1$s: range error ((%2$d-%3$d)/%4$d too big), not using affinity.",
+        "%1$s: %2$s is defined. %3$s will be ignored.",
+        "%1$s: affinity not supported, using \"disabled\".",
+        "%1$s: affinity only supported for Intel(R) processors.",
+        "%1$s: getaffinity system call not supported.",
+        "%1$s: setaffinity system call not supported.",
+        "%1$s: pthread_aff_set_np call not found.",
+        "%1$s: pthread_get_num_resources_np call not found.",
+        "%1$s: the OS kernel does not support affinity.",
+        "%1$s: pthread_get_num_resources_np returned %2$d.",
+        "%1$s: cannot determine proper affinity mask size.",
+        "%1$s=\"%2$s\": %3$s.",
+        "%1$s: extra trailing characters ignored: \"%2$s\".",
+        "%1$s: unknown method \"%2$s\".",
+        "KMP_STATS_TIMER: clock_gettime is undefined, using gettimeofday.",
+        "KMP_STATS_TIMER: \"%1$s\" needs additional parameter, e.g. 'clock_gettime,2'. Using gettimeofday.",
+        "KMP_STATS_TIMER: clock_gettime parameter \"%1$s\" is invalid, using gettimeofday.",
+        "KMP_STATS_TIMER: clock_gettime failed, using gettimeofday.",
+        "KMP_STATS_TIMER: clock function unknown (ignoring value \"%1$s\").",
+        "Unknown scheduling type detected.",
+        "Too many threads to use analytical guided scheduling - switching to iterative guided scheduling.",
+        "ittnotify: Lookup of \"%1$s\" function in \"%2$s\" library failed.",
+        "ittnotify: Loading \"%1$s\" library failed.",
+        "ittnotify: All itt notifications disabled.",
+        "ittnotify: Object state itt notifications disabled.",
+        "ittnotify: Mark itt notifications disabled.",
+        "ittnotify: Unloading \"%1$s\" library failed.",
+        "Cannot form a team with %1$d threads, using %2$d instead.",
+        "Requested number of active parallel levels \"%1$d\" is negative; ignored.",
+        "Requested number of active parallel levels \"%1$d\" exceeds supported limit; the following limit value will be used: \"%1$d\".",
+        "kmp_set_library must only be called from the top level serial thread; ignored.",
+        "Fatal system error detected.",
+        "Out of heap memory.",
+        "Clearing __KMP_REGISTERED_LIB env var failed.",
+        "Registering library with env var failed.",
+        "%1$s value \"%2$d\" will be used.",
+        "%1$s value \"%2$u\" will be used.",
+        "%1$s value \"%2$s\" will be used.",
+        "%1$s value \"%2$s\" will be used.",
+        "%1$s maximum value \"%2$d\" will be used.",
+        "%1$s minimum value \"%2$d\" will be used.",
+        "Memory allocation failed.",
+        "File name too long.",
+        "Lock table overflow.",
+        "Too many threads to use threadprivate directive.",
+        "%1$s: invalid mask.",
+        "Wrong definition.",
+        "Windows* OS: TLS Set Value failed.",
+        "Windows* OS: TLS out of indexes.",
+        "PDONE directive must be nested within a DO directive.",
+        "Cannot get number of available CPUs.",
+        "Assumed number of CPUs is 2.",
+        "Error initializing affinity - not using affinity.",
+        "Threads may migrate across all available OS procs (granularity setting too coarse).",
+        "Ignoring invalid OS proc ID %1$d.",
+        "No valid OS proc IDs specified - not using affinity.",
+        "%1$s - using \"flat\" OS <-> physical proc mapping.",
+        "%1$s: %2$s - using \"flat\" OS <-> physical proc mapping.",
+        "%1$s, line %2$d: %3$s - using \"flat\" OS <-> physical proc mapping.",
+        "%1$s: %2$s - exiting.",
+        "%1$s, line %2$d: %3$s - exiting.",
+        "Construct identifier invalid.",
+        "Thread identifier invalid.",
+        "runtime library not initialized.",
+        "Inconsistent THREADPRIVATE common block declarations are non-conforming and are unsupported. Either all threadprivate common blocks must be declared identically, or the largest instance of each threadprivate common block must be referenced first during the run.",
+        "Cannot set thread affinity mask.",
+        "Cannot set thread priority.",
+        "Cannot create thread.",
+        "Cannot create event.",
+        "Cannot set event.",
+        "Cannot close handle.",
+        "Unknown library type: %1$d.",
+        "Monitor did not reap properly.",
+        "Worker thread failed to join.",
+        "Cannot change thread affinity mask.",
+        "%1$s: Threads may migrate across %2$d innermost levels of machine",
+        "%1$s: decrease to %2$d threads",
+        "%1$s: increase to %2$d threads",
+        "%1$s: Internal thread %2$d bound to OS proc set %3$s",
+        "%1$s: Affinity capable, using cpuinfo file",
+        "%1$s: Affinity capable, using global cpuid info",
+        "%1$s: Affinity capable, using default \"flat\" topology",
+        "%1$s: Affinity not capable, using local cpuid info",
+        "%1$s: Affinity not capable, using cpuinfo file",
+        "%1$s: Affinity not capable, assumming \"flat\" topology",
+        "%1$s: Initial OS proc set respected: %2$s",
+        "%1$s: Initial OS proc set not respected: %2$s",
+        "%1$s: %2$d available OS procs",
+        "%1$s: Uniform topology",
+        "%1$s: Nonuniform topology",
+        "%1$s: %2$d packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)",
+        "%1$s: OS proc to physical thread map ([] => level not in map):",
+        "%1$s: OS proc <n> maps to <n>th package core 0",
+        "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]",
+        "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] [thread %5$d]",
+        "%1$s: OS proc %2$d maps to [package %3$d] [core %4$d] thread %5$d",
+        "%1$s: OS proc %2$d maps to [package %3$d] core %4$d [thread %5$d]",
+        "%1$s: OS proc %2$d maps to package %3$d [core %4$d] [thread %5$d]",
+        "%1$s: OS proc %2$d maps to [package %3$d] core %4$d thread %5$d",
+        "%1$s: OS proc %2$d maps to package %3$d core %4$d [thread %5$d]",
+        "%1$s: OS proc %2$d maps to package %3$d [core %4$d] thread %5$d",
+        "%1$s: OS proc %2$d maps to package %3$d core %4$d thread %5$d",
+        "%1$s: OS proc %2$d maps to %3$s",
+        "%1$s: Internal thread %2$d changed affinity mask from %3$s to %4$s",
+        "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d, TPU %5$d",
+        "%1$s: OS proc %2$d maps to package %3$d, CPU %4$d",
+        "%1$s: HT enabled; %2$d packages; %3$d TPU; %4$d TPUs per package",
+        "%1$s: HT disabled; %2$d packages",
+        "Threads encountered barriers in different order. ",
+        "Function %1$s failed:",
+        "%1$s: %2$s packages x %3$d cores/pkg x %4$d threads/core (%5$d total cores)",
+        "Incompatible message catalog \"%1$s\": Version \"%2$s\" found, version \"%3$s\" expected.",
+        "%1$s: ignored because %2$s has been defined",
+        "%1$s: overrides %3$s specified before",
+        "%1$s must be bound to a work-sharing or work-queuing construct with an \"ordered\" clause",
+        "Detected end of %1$s without first executing a corresponding beginning.",
+        "Iteration range too large in %1$s.",
+        "%1$s must not have a loop increment that evaluates to zero.",
+        "Expected end of %1$s; %2$s, however, has most recently begun execution.",
+        "%1$s is incorrectly nested within %2$s",
+        "%1$s cannot be executed multiple times during execution of one parallel iteration/section of %2$s",
+        "%1$s is incorrectly nested within %2$s of the same name",
+        "%1$s is incorrectly nested within %2$s that does not have an \"ordered\" clause",
+        "%1$s is incorrectly nested within %2$s but not within any of its \"task\" constructs",
+        "One thread at %1$s while another thread is at %2$s.",
+        "Cannot connect to %1$s",
+        "Cannot connect to %1$s - Using %2$s",
+        "%1$s does not support %2$s. Continuing without using %2$s.",
+        "%1$s does not support %2$s for %3$s. Continuing without using %2$s.",
+        "Static %1$s does not support %2$s. Continuing without using %2$s.",
+        "KMP_DYNAMIC_MODE=irml cannot be used with KMP_USE_IRML=0",
+        "ittnotify: Unknown group \"%2$s\" specified in environment variable \"%1$s\".",
+        "ittnotify: Environment variable \"%1$s\" too long: Actual lengths is %2$lu, max allowed length is %3$lu.",
+        "%1$s: Affinity capable, using global cpuid leaf 11 info",
+        "%1$s: Affinity not capable, using local cpuid leaf 11 info",
+        "%1$s: %2$s.",
+        "%1$s: %2$s - %3$s.",
+        "%1$s: OS proc to physical thread map:",
+        "%1$s: using \"flat\" OS <-> physical proc mapping.",
+        "%1$s: parsing %2$s.",
+        "%1$s - exiting.",
+        "Incompatible %1$s library with version %2$s found.",
+        "ittnotify: Function %1$s failed:",
+        "ittnofify: Error #%1$d.",
+        "%1$s must be set prior to first parallel region or certain API calls; ignored.",
+        "Lock initialized at %1$s(%2$d) was not destroyed",
+        "Cannot determine machine load balance - Using %1$s",
+        "%1$s: Affinity not capable, using pthread info",
+        "%1$s: Affinity capable, using pthread info",
+        "Loading \"%1$s\" library failed:",
+        "Lookup of \"%1$s\" function failed:",
+        "Buffer too small.",
+        "Error #%1$d.",
+        "%1$s: Invalid symbols found. Check the value \"%2$s\".",
+        "%1$s: Spaces between digits are not allowed \"%2$s\".",
+        "%1$s: %2$s - parsing %3$s.",
+        "%1$s cannot be specified via kmp_set_defaults() on this machine because it has more than one processor group.",
+        "Cannot use affinity type \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".",
+        "Cannot use affinity granularity \"%1$s\" with multiple Windows* OS processor groups, using \"%2$s\".",
+        "%1$s: Mapping Windows* OS processor group <i> proc <j> to OS proc 64*<i>+<j>.",
+        "%1$s: OS proc %2$d maps to Windows* OS processor group %3$d proc %4$d",
+        "%1$s: Affinity balanced is not available.",
+        "%1$s: granularity=core will be used.",
+        "%1$s must be set prior to first OMP lock call or critical section; ignored.",
+        "futex system call not supported; %1$s=%2$s ignored.",
+        "%1$s: granularity=%2$s will be used.",
+        "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt (nSockets@offset, nCores@offset, nTthreads per core)\".",
+        "KMP_PLACE_THREADS ignored: unsupported architecture.",
+        "KMP_PLACE_THREADS ignored: too many cores requested.",
+        "%1$s: syntax error, using %2$s.",
+        "%1$s: Adaptive locks are not supported; using queuing.",
+        "%1$s: Invalid symbols found. Check the value \"%2$s\".",
+        "%1$s: Spaces between digits are not allowed \"%2$s\".",
+        "%1$s: pid %2$d thread %3$d bound to OS proc set %4$s",
+        "%1$s error: parallel loop increment and condition are inconsistent.",
+        "libgomp cancellation is not currently supported.",
+        "KMP_PLACE_THREADS ignored: non-uniform topology.",
+        "KMP_PLACE_THREADS ignored: only three-level topology is supported.",
+        "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\".",
+        "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\".",
+        "KMP_PLACE_THREADS ignored: too many sockets requested.",
+        "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value.",
+        "%1$s: Affinity capable, using hwloc.",
+        "%1$s: Ignoring hwloc mechanism.",
+        "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms.",
+        NULL
+    };
+
+static char const *
+__kmp_i18n_default_hints[] =
+    {
+        NULL,
+        "Please submit a bug report with this message, compile and run commands used, and machine configuration info including native compiler and operating system versions. Faster response will be obtained by including all program sources. For information on submitting this issue, please see http://www.intel.com/software/products/support/.",
+        "Check NLSPATH environment variable, its value is \"%1$s\".",
+        "Please try changing the shell stack limit or adjusting the OMP_STACKSIZE environment variable.",
+        "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set).",
+        "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d.",
+        "This could also be due to a system-related limit on the number of threads.",
+        "This means that multiple copies of the OpenMP runtime have been linked into the program. That is dangerous, since it can degrade performance or cause incorrect results. The best thing to do is to ensure that only a single OpenMP runtime is linked into the process, e.g. by avoiding static linking of the OpenMP runtime in any library. As an unsafe, unsupported, undocumented workaround you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow the program to continue to execute, but that may cause crashes or silently produce incorrect results. For more information, please see http://www.intel.com/software/products/support/.",
+        "This name is specified in environment variable KMP_CPUINFO_FILE.",
+        "Seems application required too much memory.",
+        "Use \"0\", \"FALSE\". \".F.\", \"off\", \"no\" as false values, \"1\", \"TRUE\", \".T.\", \"on\", \"yes\" as true values.",
+        "Perhaps too many threads.",
+        "Decrease priority of application. This will allow the monitor thread run at higher priority than other threads.",
+        "Try changing KMP_MONITOR_STACKSIZE or the shell stack limit.",
+        "Try changing OMP_STACKSIZE and/or the shell stack limit.",
+        "Try increasing OMP_STACKSIZE or the shell stack limit.",
+        "Try decreasing OMP_STACKSIZE.",
+        "Try decreasing the value of OMP_NUM_THREADS.",
+        "Try increasing KMP_MONITOR_STACKSIZE.",
+        "Try decreasing KMP_MONITOR_STACKSIZE.",
+        "Try decreasing the number of threads in use simultaneously.",
+        "Will use default schedule type (%1$s).",
+        "It could be a result of using an older OMP library with a newer compiler or memory corruption. You may check the proper OMP library is linked to the application.",
+        "Check %1$s environment variable, its value is \"%2$s\".",
+        "You may want to use an %1$s library that supports %2$s interface with version %3$s.",
+        "You may want to use an %1$s library with version %2$s.",
+        "System error #193 is \"Bad format of EXE or DLL file\". Usually it means the file is found, but it is corrupted or a file for another architecture. Check whether \"%1$s\" is a file for %2$s architecture.",
+        "System-related limit on the number of threads.",
+        NULL
+    };
+
+struct kmp_i18n_section {
+    int           size;
+    char const ** str;
+}; // struct kmp_i18n_section
+typedef struct kmp_i18n_section  kmp_i18n_section_t;
+
+static kmp_i18n_section_t
+__kmp_i18n_sections[] =
+    {
+        { 0, NULL },
+        { 5, __kmp_i18n_default_meta },
+        { 54, __kmp_i18n_default_strings },
+        { 6, __kmp_i18n_default_formats },
+        { 253, __kmp_i18n_default_messages },
+        { 27, __kmp_i18n_default_hints },
+        { 0, NULL }
+    };
+
+struct kmp_i18n_table {
+    int                   size;
+    kmp_i18n_section_t *  sect;
+}; // struct kmp_i18n_table
+typedef struct kmp_i18n_table  kmp_i18n_table_t;
+
+static kmp_i18n_table_t __kmp_i18n_default_table =
+    {
+        5,
+        __kmp_i18n_sections
+    };
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc b/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc
index c57f3efe6c..5de276377c 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc
+++ b/contrib/libs/cxxsupp/openmp/kmp_i18n_id.inc
@@ -1,381 +1,381 @@
-// Do not edit this file! // 
-// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. // 
- 
-enum kmp_i18n_id { 
- 
-    // A special id for absence of message. 
-    kmp_i18n_null = 0, 
- 
-    // Set #1, meta. 
-    kmp_i18n_prp_first = 65536, 
-    kmp_i18n_prp_Language, 
-    kmp_i18n_prp_Country, 
-    kmp_i18n_prp_LangId, 
-    kmp_i18n_prp_Version, 
-    kmp_i18n_prp_Revision, 
-    kmp_i18n_prp_last, 
- 
-    // Set #2, strings. 
-    kmp_i18n_str_first = 131072, 
-    kmp_i18n_str_Error, 
-    kmp_i18n_str_UnknownFile, 
-    kmp_i18n_str_NotANumber, 
-    kmp_i18n_str_BadUnit, 
-    kmp_i18n_str_IllegalCharacters, 
-    kmp_i18n_str_ValueTooLarge, 
-    kmp_i18n_str_ValueTooSmall, 
-    kmp_i18n_str_NotMultiple4K, 
-    kmp_i18n_str_UnknownTopology, 
-    kmp_i18n_str_CantOpenCpuinfo, 
-    kmp_i18n_str_ProcCpuinfo, 
-    kmp_i18n_str_NoProcRecords, 
-    kmp_i18n_str_TooManyProcRecords, 
-    kmp_i18n_str_CantRewindCpuinfo, 
-    kmp_i18n_str_LongLineCpuinfo, 
-    kmp_i18n_str_TooManyEntries, 
-    kmp_i18n_str_MissingProcField, 
-    kmp_i18n_str_MissingPhysicalIDField, 
-    kmp_i18n_str_MissingValCpuinfo, 
-    kmp_i18n_str_DuplicateFieldCpuinfo, 
-    kmp_i18n_str_PhysicalIDsNotUnique, 
-    kmp_i18n_str_ApicNotPresent, 
-    kmp_i18n_str_InvalidCpuidInfo, 
-    kmp_i18n_str_OBSOLETE1, 
-    kmp_i18n_str_InconsistentCpuidInfo, 
-    kmp_i18n_str_OutOfHeapMemory, 
-    kmp_i18n_str_MemoryAllocFailed, 
-    kmp_i18n_str_Core, 
-    kmp_i18n_str_Thread, 
-    kmp_i18n_str_Package, 
-    kmp_i18n_str_Node, 
-    kmp_i18n_str_OBSOLETE2, 
-    kmp_i18n_str_DecodingLegacyAPIC, 
-    kmp_i18n_str_OBSOLETE3, 
-    kmp_i18n_str_NotDefined, 
-    kmp_i18n_str_EffectiveSettings, 
-    kmp_i18n_str_UserSettings, 
-    kmp_i18n_str_StorageMapWarning, 
-    kmp_i18n_str_OBSOLETE4, 
-    kmp_i18n_str_OBSOLETE5, 
-    kmp_i18n_str_OBSOLETE6, 
-    kmp_i18n_str_OBSOLETE7, 
-    kmp_i18n_str_OBSOLETE8, 
-    kmp_i18n_str_Decodingx2APIC, 
-    kmp_i18n_str_NoLeaf11Support, 
-    kmp_i18n_str_NoLeaf4Support, 
-    kmp_i18n_str_ThreadIDsNotUnique, 
-    kmp_i18n_str_UsingPthread, 
-    kmp_i18n_str_LegacyApicIDsNotUnique, 
-    kmp_i18n_str_x2ApicIDsNotUnique, 
-    kmp_i18n_str_DisplayEnvBegin, 
-    kmp_i18n_str_DisplayEnvEnd, 
-    kmp_i18n_str_Device, 
-    kmp_i18n_str_Host, 
-    kmp_i18n_str_last, 
- 
-    // Set #3, formats. 
-    kmp_i18n_fmt_first = 196608, 
-    kmp_i18n_fmt_Info, 
-    kmp_i18n_fmt_Warning, 
-    kmp_i18n_fmt_Fatal, 
-    kmp_i18n_fmt_SysErr, 
-    kmp_i18n_fmt_Hint, 
-    kmp_i18n_fmt_Pragma, 
-    kmp_i18n_fmt_last, 
- 
-    // Set #4, messages. 
-    kmp_i18n_msg_first = 262144, 
-    kmp_i18n_msg_LibraryIsSerial, 
-    kmp_i18n_msg_CantOpenMessageCatalog, 
-    kmp_i18n_msg_WillUseDefaultMessages, 
-    kmp_i18n_msg_LockIsUninitialized, 
-    kmp_i18n_msg_LockSimpleUsedAsNestable, 
-    kmp_i18n_msg_LockNestableUsedAsSimple, 
-    kmp_i18n_msg_LockIsAlreadyOwned, 
-    kmp_i18n_msg_LockStillOwned, 
-    kmp_i18n_msg_LockUnsettingFree, 
-    kmp_i18n_msg_LockUnsettingSetByAnother, 
-    kmp_i18n_msg_StackOverflow, 
-    kmp_i18n_msg_StackOverlap, 
-    kmp_i18n_msg_AssertionFailure, 
-    kmp_i18n_msg_CantRegisterNewThread, 
-    kmp_i18n_msg_DuplicateLibrary, 
-    kmp_i18n_msg_CantOpenFileForReading, 
-    kmp_i18n_msg_CantGetEnvVar, 
-    kmp_i18n_msg_CantSetEnvVar, 
-    kmp_i18n_msg_CantGetEnvironment, 
-    kmp_i18n_msg_BadBoolValue, 
-    kmp_i18n_msg_SSPNotBuiltIn, 
-    kmp_i18n_msg_SPPSotfTerminateFailed, 
-    kmp_i18n_msg_BufferOverflow, 
-    kmp_i18n_msg_RealTimeSchedNotSupported, 
-    kmp_i18n_msg_RunningAtMaxPriority, 
-    kmp_i18n_msg_CantChangeMonitorPriority, 
-    kmp_i18n_msg_MonitorWillStarve, 
-    kmp_i18n_msg_CantSetMonitorStackSize, 
-    kmp_i18n_msg_CantSetWorkerStackSize, 
-    kmp_i18n_msg_CantInitThreadAttrs, 
-    kmp_i18n_msg_CantDestroyThreadAttrs, 
-    kmp_i18n_msg_CantSetWorkerState, 
-    kmp_i18n_msg_CantSetMonitorState, 
-    kmp_i18n_msg_NoResourcesForWorkerThread, 
-    kmp_i18n_msg_NoResourcesForMonitorThread, 
-    kmp_i18n_msg_CantTerminateWorkerThread, 
-    kmp_i18n_msg_ScheduleKindOutOfRange, 
-    kmp_i18n_msg_UnknownSchedulingType, 
-    kmp_i18n_msg_InvalidValue, 
-    kmp_i18n_msg_SmallValue, 
-    kmp_i18n_msg_LargeValue, 
-    kmp_i18n_msg_StgInvalidValue, 
-    kmp_i18n_msg_BarrReleaseValueInvalid, 
-    kmp_i18n_msg_BarrGatherValueInvalid, 
-    kmp_i18n_msg_OBSOLETE9, 
-    kmp_i18n_msg_ParRangeSyntax, 
-    kmp_i18n_msg_UnbalancedQuotes, 
-    kmp_i18n_msg_EmptyString, 
-    kmp_i18n_msg_LongValue, 
-    kmp_i18n_msg_InvalidClause, 
-    kmp_i18n_msg_EmptyClause, 
-    kmp_i18n_msg_InvalidChunk, 
-    kmp_i18n_msg_LargeChunk, 
-    kmp_i18n_msg_IgnoreChunk, 
-    kmp_i18n_msg_CantGetProcFreq, 
-    kmp_i18n_msg_EnvParallelWarn, 
-    kmp_i18n_msg_AffParamDefined, 
-    kmp_i18n_msg_AffInvalidParam, 
-    kmp_i18n_msg_AffManyParams, 
-    kmp_i18n_msg_AffManyParamsForLogic, 
-    kmp_i18n_msg_AffNoParam, 
-    kmp_i18n_msg_AffNoProcList, 
-    kmp_i18n_msg_AffProcListNoType, 
-    kmp_i18n_msg_AffProcListNotExplicit, 
-    kmp_i18n_msg_AffSyntaxError, 
-    kmp_i18n_msg_AffZeroStride, 
-    kmp_i18n_msg_AffStartGreaterEnd, 
-    kmp_i18n_msg_AffStrideLessZero, 
-    kmp_i18n_msg_AffRangeTooBig, 
-    kmp_i18n_msg_OBSOLETE10, 
-    kmp_i18n_msg_AffNotSupported, 
-    kmp_i18n_msg_OBSOLETE11, 
-    kmp_i18n_msg_GetAffSysCallNotSupported, 
-    kmp_i18n_msg_SetAffSysCallNotSupported, 
-    kmp_i18n_msg_OBSOLETE12, 
-    kmp_i18n_msg_OBSOLETE13, 
-    kmp_i18n_msg_OBSOLETE14, 
-    kmp_i18n_msg_OBSOLETE15, 
-    kmp_i18n_msg_AffCantGetMaskSize, 
-    kmp_i18n_msg_ParseSizeIntWarn, 
-    kmp_i18n_msg_ParseExtraCharsWarn, 
-    kmp_i18n_msg_UnknownForceReduction, 
-    kmp_i18n_msg_TimerUseGettimeofday, 
-    kmp_i18n_msg_TimerNeedMoreParam, 
-    kmp_i18n_msg_TimerInvalidParam, 
-    kmp_i18n_msg_TimerGettimeFailed, 
-    kmp_i18n_msg_TimerUnknownFunction, 
-    kmp_i18n_msg_UnknownSchedTypeDetected, 
-    kmp_i18n_msg_DispatchManyThreads, 
-    kmp_i18n_msg_IttLookupFailed, 
-    kmp_i18n_msg_IttLoadLibFailed, 
-    kmp_i18n_msg_IttAllNotifDisabled, 
-    kmp_i18n_msg_IttObjNotifDisabled, 
-    kmp_i18n_msg_IttMarkNotifDisabled, 
-    kmp_i18n_msg_IttUnloadLibFailed, 
-    kmp_i18n_msg_CantFormThrTeam, 
-    kmp_i18n_msg_ActiveLevelsNegative, 
-    kmp_i18n_msg_ActiveLevelsExceedLimit, 
-    kmp_i18n_msg_SetLibraryIncorrectCall, 
-    kmp_i18n_msg_FatalSysError, 
-    kmp_i18n_msg_OutOfHeapMemory, 
-    kmp_i18n_msg_OBSOLETE16, 
-    kmp_i18n_msg_OBSOLETE17, 
-    kmp_i18n_msg_Using_int_Value, 
-    kmp_i18n_msg_Using_uint_Value, 
-    kmp_i18n_msg_Using_uint64_Value, 
-    kmp_i18n_msg_Using_str_Value, 
-    kmp_i18n_msg_MaxValueUsing, 
-    kmp_i18n_msg_MinValueUsing, 
-    kmp_i18n_msg_MemoryAllocFailed, 
-    kmp_i18n_msg_FileNameTooLong, 
-    kmp_i18n_msg_OBSOLETE18, 
-    kmp_i18n_msg_ManyThreadsForTPDirective, 
-    kmp_i18n_msg_AffinityInvalidMask, 
-    kmp_i18n_msg_WrongDefinition, 
-    kmp_i18n_msg_TLSSetValueFailed, 
-    kmp_i18n_msg_TLSOutOfIndexes, 
-    kmp_i18n_msg_OBSOLETE19, 
-    kmp_i18n_msg_CantGetNumAvailCPU, 
-    kmp_i18n_msg_AssumedNumCPU, 
-    kmp_i18n_msg_ErrorInitializeAffinity, 
-    kmp_i18n_msg_AffThreadsMayMigrate, 
-    kmp_i18n_msg_AffIgnoreInvalidProcID, 
-    kmp_i18n_msg_AffNoValidProcID, 
-    kmp_i18n_msg_UsingFlatOS, 
-    kmp_i18n_msg_UsingFlatOSFile, 
-    kmp_i18n_msg_UsingFlatOSFileLine, 
-    kmp_i18n_msg_FileMsgExiting, 
-    kmp_i18n_msg_FileLineMsgExiting, 
-    kmp_i18n_msg_ConstructIdentInvalid, 
-    kmp_i18n_msg_ThreadIdentInvalid, 
-    kmp_i18n_msg_RTLNotInitialized, 
-    kmp_i18n_msg_TPCommonBlocksInconsist, 
-    kmp_i18n_msg_CantSetThreadAffMask, 
-    kmp_i18n_msg_CantSetThreadPriority, 
-    kmp_i18n_msg_CantCreateThread, 
-    kmp_i18n_msg_CantCreateEvent, 
-    kmp_i18n_msg_CantSetEvent, 
-    kmp_i18n_msg_CantCloseHandle, 
-    kmp_i18n_msg_UnknownLibraryType, 
-    kmp_i18n_msg_ReapMonitorError, 
-    kmp_i18n_msg_ReapWorkerError, 
-    kmp_i18n_msg_ChangeThreadAffMaskError, 
-    kmp_i18n_msg_ThreadsMigrate, 
-    kmp_i18n_msg_DecreaseToThreads, 
-    kmp_i18n_msg_IncreaseToThreads, 
-    kmp_i18n_msg_OBSOLETE20, 
-    kmp_i18n_msg_AffCapableUseCpuinfo, 
-    kmp_i18n_msg_AffUseGlobCpuid, 
-    kmp_i18n_msg_AffCapableUseFlat, 
-    kmp_i18n_msg_AffNotCapableUseLocCpuid, 
-    kmp_i18n_msg_AffNotCapableUseCpuinfo, 
-    kmp_i18n_msg_AffFlatTopology, 
-    kmp_i18n_msg_InitOSProcSetRespect, 
-    kmp_i18n_msg_InitOSProcSetNotRespect, 
-    kmp_i18n_msg_AvailableOSProc, 
-    kmp_i18n_msg_Uniform, 
-    kmp_i18n_msg_NonUniform, 
-    kmp_i18n_msg_Topology, 
-    kmp_i18n_msg_OBSOLETE21, 
-    kmp_i18n_msg_OSProcToPackage, 
-    kmp_i18n_msg_OBSOLETE22, 
-    kmp_i18n_msg_OBSOLETE23, 
-    kmp_i18n_msg_OBSOLETE24, 
-    kmp_i18n_msg_OBSOLETE25, 
-    kmp_i18n_msg_OBSOLETE26, 
-    kmp_i18n_msg_OBSOLETE27, 
-    kmp_i18n_msg_OBSOLETE28, 
-    kmp_i18n_msg_OBSOLETE29, 
-    kmp_i18n_msg_OBSOLETE30, 
-    kmp_i18n_msg_OSProcMapToPack, 
-    kmp_i18n_msg_OBSOLETE31, 
-    kmp_i18n_msg_OBSOLETE32, 
-    kmp_i18n_msg_OBSOLETE33, 
-    kmp_i18n_msg_OBSOLETE34, 
-    kmp_i18n_msg_OBSOLETE35, 
-    kmp_i18n_msg_BarriersInDifferentOrder, 
-    kmp_i18n_msg_FunctionError, 
-    kmp_i18n_msg_TopologyExtra, 
-    kmp_i18n_msg_WrongMessageCatalog, 
-    kmp_i18n_msg_StgIgnored, 
-    kmp_i18n_msg_OBSOLETE36, 
-    kmp_i18n_msg_CnsBoundToWorksharing, 
-    kmp_i18n_msg_CnsDetectedEnd, 
-    kmp_i18n_msg_CnsIterationRangeTooLarge, 
-    kmp_i18n_msg_CnsLoopIncrZeroProhibited, 
-    kmp_i18n_msg_CnsExpectedEnd, 
-    kmp_i18n_msg_CnsInvalidNesting, 
-    kmp_i18n_msg_CnsMultipleNesting, 
-    kmp_i18n_msg_CnsNestingSameName, 
-    kmp_i18n_msg_CnsNoOrderedClause, 
-    kmp_i18n_msg_CnsNotInTaskConstruct, 
-    kmp_i18n_msg_CnsThreadsAtBarrier, 
-    kmp_i18n_msg_CantConnect, 
-    kmp_i18n_msg_CantConnectUsing, 
-    kmp_i18n_msg_LibNotSupport, 
-    kmp_i18n_msg_LibNotSupportFor, 
-    kmp_i18n_msg_StaticLibNotSupport, 
-    kmp_i18n_msg_OBSOLETE37, 
-    kmp_i18n_msg_IttUnknownGroup, 
-    kmp_i18n_msg_IttEnvVarTooLong, 
-    kmp_i18n_msg_AffUseGlobCpuidL11, 
-    kmp_i18n_msg_AffNotCapableUseLocCpuidL11, 
-    kmp_i18n_msg_AffInfoStr, 
-    kmp_i18n_msg_AffInfoStrStr, 
-    kmp_i18n_msg_OSProcToPhysicalThreadMap, 
-    kmp_i18n_msg_AffUsingFlatOS, 
-    kmp_i18n_msg_AffParseFilename, 
-    kmp_i18n_msg_MsgExiting, 
-    kmp_i18n_msg_IncompatibleLibrary, 
-    kmp_i18n_msg_IttFunctionError, 
-    kmp_i18n_msg_IttUnknownError, 
-    kmp_i18n_msg_EnvMiddleWarn, 
-    kmp_i18n_msg_CnsLockNotDestroyed, 
-    kmp_i18n_msg_CantLoadBalUsing, 
-    kmp_i18n_msg_AffNotCapableUsePthread, 
-    kmp_i18n_msg_AffUsePthread, 
-    kmp_i18n_msg_OBSOLETE38, 
-    kmp_i18n_msg_OBSOLETE39, 
-    kmp_i18n_msg_OBSOLETE40, 
-    kmp_i18n_msg_OBSOLETE41, 
-    kmp_i18n_msg_NthSyntaxError, 
-    kmp_i18n_msg_NthSpacesNotAllowed, 
-    kmp_i18n_msg_AffStrParseFilename, 
-    kmp_i18n_msg_OBSOLETE42, 
-    kmp_i18n_msg_AffTypeCantUseMultGroups, 
-    kmp_i18n_msg_AffGranCantUseMultGroups, 
-    kmp_i18n_msg_AffWindowsProcGroupMap, 
-    kmp_i18n_msg_AffOSProcToGroup, 
-    kmp_i18n_msg_AffBalancedNotAvail, 
-    kmp_i18n_msg_OBSOLETE43, 
-    kmp_i18n_msg_EnvLockWarn, 
-    kmp_i18n_msg_FutexNotSupported, 
-    kmp_i18n_msg_AffGranUsing, 
-    kmp_i18n_msg_AffThrPlaceInvalid, 
-    kmp_i18n_msg_AffThrPlaceUnsupported, 
-    kmp_i18n_msg_AffThrPlaceManyCores, 
-    kmp_i18n_msg_SyntaxErrorUsing, 
-    kmp_i18n_msg_AdaptiveNotSupported, 
-    kmp_i18n_msg_EnvSyntaxError, 
-    kmp_i18n_msg_EnvSpacesNotAllowed, 
-    kmp_i18n_msg_BoundToOSProcSet, 
-    kmp_i18n_msg_CnsLoopIncrIllegal, 
-    kmp_i18n_msg_NoGompCancellation, 
-    kmp_i18n_msg_AffThrPlaceNonUniform, 
-    kmp_i18n_msg_AffThrPlaceNonThreeLevel, 
-    kmp_i18n_msg_AffGranTopGroup, 
-    kmp_i18n_msg_AffGranGroupType, 
-    kmp_i18n_msg_AffThrPlaceManySockets, 
-    kmp_i18n_msg_AffThrPlaceDeprecated, 
-    kmp_i18n_msg_AffUsingHwloc, 
-    kmp_i18n_msg_AffIgnoringHwloc, 
-    kmp_i18n_msg_AffHwlocErrorOccurred, 
-    kmp_i18n_msg_last, 
- 
-    // Set #5, hints. 
-    kmp_i18n_hnt_first = 327680, 
-    kmp_i18n_hnt_SubmitBugReport, 
-    kmp_i18n_hnt_OBSOLETE44, 
-    kmp_i18n_hnt_ChangeStackLimit, 
-    kmp_i18n_hnt_Unset_ALL_THREADS, 
-    kmp_i18n_hnt_Set_ALL_THREADPRIVATE, 
-    kmp_i18n_hnt_PossibleSystemLimitOnThreads, 
-    kmp_i18n_hnt_DuplicateLibrary, 
-    kmp_i18n_hnt_NameComesFrom_CPUINFO_FILE, 
-    kmp_i18n_hnt_NotEnoughMemory, 
-    kmp_i18n_hnt_ValidBoolValues, 
-    kmp_i18n_hnt_BufferOverflow, 
-    kmp_i18n_hnt_RunningAtMaxPriority, 
-    kmp_i18n_hnt_ChangeMonitorStackSize, 
-    kmp_i18n_hnt_ChangeWorkerStackSize, 
-    kmp_i18n_hnt_IncreaseWorkerStackSize, 
-    kmp_i18n_hnt_DecreaseWorkerStackSize, 
-    kmp_i18n_hnt_Decrease_NUM_THREADS, 
-    kmp_i18n_hnt_IncreaseMonitorStackSize, 
-    kmp_i18n_hnt_DecreaseMonitorStackSize, 
-    kmp_i18n_hnt_DecreaseNumberOfThreadsInUse, 
-    kmp_i18n_hnt_DefaultScheduleKindUsed, 
-    kmp_i18n_hnt_GetNewerLibrary, 
-    kmp_i18n_hnt_CheckEnvVar, 
-    kmp_i18n_hnt_OBSOLETE45, 
-    kmp_i18n_hnt_OBSOLETE46, 
-    kmp_i18n_hnt_BadExeFormat, 
-    kmp_i18n_hnt_SystemLimitOnThreads, 
-    kmp_i18n_hnt_last, 
- 
-    kmp_i18n_xxx_lastest 
- 
-}; // enum kmp_i18n_id 
- 
-typedef enum kmp_i18n_id  kmp_i18n_id_t; 
- 
- 
-// end of file // 
+// Do not edit this file! //
+// The file was generated from en_US.txt by message-converter.pl on Fri Dec 18 12:25:36 2015. //
+
+enum kmp_i18n_id {
+
+    // A special id for absence of message.
+    kmp_i18n_null = 0,
+
+    // Set #1, meta.
+    kmp_i18n_prp_first = 65536,
+    kmp_i18n_prp_Language,
+    kmp_i18n_prp_Country,
+    kmp_i18n_prp_LangId,
+    kmp_i18n_prp_Version,
+    kmp_i18n_prp_Revision,
+    kmp_i18n_prp_last,
+
+    // Set #2, strings.
+    kmp_i18n_str_first = 131072,
+    kmp_i18n_str_Error,
+    kmp_i18n_str_UnknownFile,
+    kmp_i18n_str_NotANumber,
+    kmp_i18n_str_BadUnit,
+    kmp_i18n_str_IllegalCharacters,
+    kmp_i18n_str_ValueTooLarge,
+    kmp_i18n_str_ValueTooSmall,
+    kmp_i18n_str_NotMultiple4K,
+    kmp_i18n_str_UnknownTopology,
+    kmp_i18n_str_CantOpenCpuinfo,
+    kmp_i18n_str_ProcCpuinfo,
+    kmp_i18n_str_NoProcRecords,
+    kmp_i18n_str_TooManyProcRecords,
+    kmp_i18n_str_CantRewindCpuinfo,
+    kmp_i18n_str_LongLineCpuinfo,
+    kmp_i18n_str_TooManyEntries,
+    kmp_i18n_str_MissingProcField,
+    kmp_i18n_str_MissingPhysicalIDField,
+    kmp_i18n_str_MissingValCpuinfo,
+    kmp_i18n_str_DuplicateFieldCpuinfo,
+    kmp_i18n_str_PhysicalIDsNotUnique,
+    kmp_i18n_str_ApicNotPresent,
+    kmp_i18n_str_InvalidCpuidInfo,
+    kmp_i18n_str_OBSOLETE1,
+    kmp_i18n_str_InconsistentCpuidInfo,
+    kmp_i18n_str_OutOfHeapMemory,
+    kmp_i18n_str_MemoryAllocFailed,
+    kmp_i18n_str_Core,
+    kmp_i18n_str_Thread,
+    kmp_i18n_str_Package,
+    kmp_i18n_str_Node,
+    kmp_i18n_str_OBSOLETE2,
+    kmp_i18n_str_DecodingLegacyAPIC,
+    kmp_i18n_str_OBSOLETE3,
+    kmp_i18n_str_NotDefined,
+    kmp_i18n_str_EffectiveSettings,
+    kmp_i18n_str_UserSettings,
+    kmp_i18n_str_StorageMapWarning,
+    kmp_i18n_str_OBSOLETE4,
+    kmp_i18n_str_OBSOLETE5,
+    kmp_i18n_str_OBSOLETE6,
+    kmp_i18n_str_OBSOLETE7,
+    kmp_i18n_str_OBSOLETE8,
+    kmp_i18n_str_Decodingx2APIC,
+    kmp_i18n_str_NoLeaf11Support,
+    kmp_i18n_str_NoLeaf4Support,
+    kmp_i18n_str_ThreadIDsNotUnique,
+    kmp_i18n_str_UsingPthread,
+    kmp_i18n_str_LegacyApicIDsNotUnique,
+    kmp_i18n_str_x2ApicIDsNotUnique,
+    kmp_i18n_str_DisplayEnvBegin,
+    kmp_i18n_str_DisplayEnvEnd,
+    kmp_i18n_str_Device,
+    kmp_i18n_str_Host,
+    kmp_i18n_str_last,
+
+    // Set #3, formats.
+    kmp_i18n_fmt_first = 196608,
+    kmp_i18n_fmt_Info,
+    kmp_i18n_fmt_Warning,
+    kmp_i18n_fmt_Fatal,
+    kmp_i18n_fmt_SysErr,
+    kmp_i18n_fmt_Hint,
+    kmp_i18n_fmt_Pragma,
+    kmp_i18n_fmt_last,
+
+    // Set #4, messages.
+    kmp_i18n_msg_first = 262144,
+    kmp_i18n_msg_LibraryIsSerial,
+    kmp_i18n_msg_CantOpenMessageCatalog,
+    kmp_i18n_msg_WillUseDefaultMessages,
+    kmp_i18n_msg_LockIsUninitialized,
+    kmp_i18n_msg_LockSimpleUsedAsNestable,
+    kmp_i18n_msg_LockNestableUsedAsSimple,
+    kmp_i18n_msg_LockIsAlreadyOwned,
+    kmp_i18n_msg_LockStillOwned,
+    kmp_i18n_msg_LockUnsettingFree,
+    kmp_i18n_msg_LockUnsettingSetByAnother,
+    kmp_i18n_msg_StackOverflow,
+    kmp_i18n_msg_StackOverlap,
+    kmp_i18n_msg_AssertionFailure,
+    kmp_i18n_msg_CantRegisterNewThread,
+    kmp_i18n_msg_DuplicateLibrary,
+    kmp_i18n_msg_CantOpenFileForReading,
+    kmp_i18n_msg_CantGetEnvVar,
+    kmp_i18n_msg_CantSetEnvVar,
+    kmp_i18n_msg_CantGetEnvironment,
+    kmp_i18n_msg_BadBoolValue,
+    kmp_i18n_msg_SSPNotBuiltIn,
+    kmp_i18n_msg_SPPSotfTerminateFailed,
+    kmp_i18n_msg_BufferOverflow,
+    kmp_i18n_msg_RealTimeSchedNotSupported,
+    kmp_i18n_msg_RunningAtMaxPriority,
+    kmp_i18n_msg_CantChangeMonitorPriority,
+    kmp_i18n_msg_MonitorWillStarve,
+    kmp_i18n_msg_CantSetMonitorStackSize,
+    kmp_i18n_msg_CantSetWorkerStackSize,
+    kmp_i18n_msg_CantInitThreadAttrs,
+    kmp_i18n_msg_CantDestroyThreadAttrs,
+    kmp_i18n_msg_CantSetWorkerState,
+    kmp_i18n_msg_CantSetMonitorState,
+    kmp_i18n_msg_NoResourcesForWorkerThread,
+    kmp_i18n_msg_NoResourcesForMonitorThread,
+    kmp_i18n_msg_CantTerminateWorkerThread,
+    kmp_i18n_msg_ScheduleKindOutOfRange,
+    kmp_i18n_msg_UnknownSchedulingType,
+    kmp_i18n_msg_InvalidValue,
+    kmp_i18n_msg_SmallValue,
+    kmp_i18n_msg_LargeValue,
+    kmp_i18n_msg_StgInvalidValue,
+    kmp_i18n_msg_BarrReleaseValueInvalid,
+    kmp_i18n_msg_BarrGatherValueInvalid,
+    kmp_i18n_msg_OBSOLETE9,
+    kmp_i18n_msg_ParRangeSyntax,
+    kmp_i18n_msg_UnbalancedQuotes,
+    kmp_i18n_msg_EmptyString,
+    kmp_i18n_msg_LongValue,
+    kmp_i18n_msg_InvalidClause,
+    kmp_i18n_msg_EmptyClause,
+    kmp_i18n_msg_InvalidChunk,
+    kmp_i18n_msg_LargeChunk,
+    kmp_i18n_msg_IgnoreChunk,
+    kmp_i18n_msg_CantGetProcFreq,
+    kmp_i18n_msg_EnvParallelWarn,
+    kmp_i18n_msg_AffParamDefined,
+    kmp_i18n_msg_AffInvalidParam,
+    kmp_i18n_msg_AffManyParams,
+    kmp_i18n_msg_AffManyParamsForLogic,
+    kmp_i18n_msg_AffNoParam,
+    kmp_i18n_msg_AffNoProcList,
+    kmp_i18n_msg_AffProcListNoType,
+    kmp_i18n_msg_AffProcListNotExplicit,
+    kmp_i18n_msg_AffSyntaxError,
+    kmp_i18n_msg_AffZeroStride,
+    kmp_i18n_msg_AffStartGreaterEnd,
+    kmp_i18n_msg_AffStrideLessZero,
+    kmp_i18n_msg_AffRangeTooBig,
+    kmp_i18n_msg_OBSOLETE10,
+    kmp_i18n_msg_AffNotSupported,
+    kmp_i18n_msg_OBSOLETE11,
+    kmp_i18n_msg_GetAffSysCallNotSupported,
+    kmp_i18n_msg_SetAffSysCallNotSupported,
+    kmp_i18n_msg_OBSOLETE12,
+    kmp_i18n_msg_OBSOLETE13,
+    kmp_i18n_msg_OBSOLETE14,
+    kmp_i18n_msg_OBSOLETE15,
+    kmp_i18n_msg_AffCantGetMaskSize,
+    kmp_i18n_msg_ParseSizeIntWarn,
+    kmp_i18n_msg_ParseExtraCharsWarn,
+    kmp_i18n_msg_UnknownForceReduction,
+    kmp_i18n_msg_TimerUseGettimeofday,
+    kmp_i18n_msg_TimerNeedMoreParam,
+    kmp_i18n_msg_TimerInvalidParam,
+    kmp_i18n_msg_TimerGettimeFailed,
+    kmp_i18n_msg_TimerUnknownFunction,
+    kmp_i18n_msg_UnknownSchedTypeDetected,
+    kmp_i18n_msg_DispatchManyThreads,
+    kmp_i18n_msg_IttLookupFailed,
+    kmp_i18n_msg_IttLoadLibFailed,
+    kmp_i18n_msg_IttAllNotifDisabled,
+    kmp_i18n_msg_IttObjNotifDisabled,
+    kmp_i18n_msg_IttMarkNotifDisabled,
+    kmp_i18n_msg_IttUnloadLibFailed,
+    kmp_i18n_msg_CantFormThrTeam,
+    kmp_i18n_msg_ActiveLevelsNegative,
+    kmp_i18n_msg_ActiveLevelsExceedLimit,
+    kmp_i18n_msg_SetLibraryIncorrectCall,
+    kmp_i18n_msg_FatalSysError,
+    kmp_i18n_msg_OutOfHeapMemory,
+    kmp_i18n_msg_OBSOLETE16,
+    kmp_i18n_msg_OBSOLETE17,
+    kmp_i18n_msg_Using_int_Value,
+    kmp_i18n_msg_Using_uint_Value,
+    kmp_i18n_msg_Using_uint64_Value,
+    kmp_i18n_msg_Using_str_Value,
+    kmp_i18n_msg_MaxValueUsing,
+    kmp_i18n_msg_MinValueUsing,
+    kmp_i18n_msg_MemoryAllocFailed,
+    kmp_i18n_msg_FileNameTooLong,
+    kmp_i18n_msg_OBSOLETE18,
+    kmp_i18n_msg_ManyThreadsForTPDirective,
+    kmp_i18n_msg_AffinityInvalidMask,
+    kmp_i18n_msg_WrongDefinition,
+    kmp_i18n_msg_TLSSetValueFailed,
+    kmp_i18n_msg_TLSOutOfIndexes,
+    kmp_i18n_msg_OBSOLETE19,
+    kmp_i18n_msg_CantGetNumAvailCPU,
+    kmp_i18n_msg_AssumedNumCPU,
+    kmp_i18n_msg_ErrorInitializeAffinity,
+    kmp_i18n_msg_AffThreadsMayMigrate,
+    kmp_i18n_msg_AffIgnoreInvalidProcID,
+    kmp_i18n_msg_AffNoValidProcID,
+    kmp_i18n_msg_UsingFlatOS,
+    kmp_i18n_msg_UsingFlatOSFile,
+    kmp_i18n_msg_UsingFlatOSFileLine,
+    kmp_i18n_msg_FileMsgExiting,
+    kmp_i18n_msg_FileLineMsgExiting,
+    kmp_i18n_msg_ConstructIdentInvalid,
+    kmp_i18n_msg_ThreadIdentInvalid,
+    kmp_i18n_msg_RTLNotInitialized,
+    kmp_i18n_msg_TPCommonBlocksInconsist,
+    kmp_i18n_msg_CantSetThreadAffMask,
+    kmp_i18n_msg_CantSetThreadPriority,
+    kmp_i18n_msg_CantCreateThread,
+    kmp_i18n_msg_CantCreateEvent,
+    kmp_i18n_msg_CantSetEvent,
+    kmp_i18n_msg_CantCloseHandle,
+    kmp_i18n_msg_UnknownLibraryType,
+    kmp_i18n_msg_ReapMonitorError,
+    kmp_i18n_msg_ReapWorkerError,
+    kmp_i18n_msg_ChangeThreadAffMaskError,
+    kmp_i18n_msg_ThreadsMigrate,
+    kmp_i18n_msg_DecreaseToThreads,
+    kmp_i18n_msg_IncreaseToThreads,
+    kmp_i18n_msg_OBSOLETE20,
+    kmp_i18n_msg_AffCapableUseCpuinfo,
+    kmp_i18n_msg_AffUseGlobCpuid,
+    kmp_i18n_msg_AffCapableUseFlat,
+    kmp_i18n_msg_AffNotCapableUseLocCpuid,
+    kmp_i18n_msg_AffNotCapableUseCpuinfo,
+    kmp_i18n_msg_AffFlatTopology,
+    kmp_i18n_msg_InitOSProcSetRespect,
+    kmp_i18n_msg_InitOSProcSetNotRespect,
+    kmp_i18n_msg_AvailableOSProc,
+    kmp_i18n_msg_Uniform,
+    kmp_i18n_msg_NonUniform,
+    kmp_i18n_msg_Topology,
+    kmp_i18n_msg_OBSOLETE21,
+    kmp_i18n_msg_OSProcToPackage,
+    kmp_i18n_msg_OBSOLETE22,
+    kmp_i18n_msg_OBSOLETE23,
+    kmp_i18n_msg_OBSOLETE24,
+    kmp_i18n_msg_OBSOLETE25,
+    kmp_i18n_msg_OBSOLETE26,
+    kmp_i18n_msg_OBSOLETE27,
+    kmp_i18n_msg_OBSOLETE28,
+    kmp_i18n_msg_OBSOLETE29,
+    kmp_i18n_msg_OBSOLETE30,
+    kmp_i18n_msg_OSProcMapToPack,
+    kmp_i18n_msg_OBSOLETE31,
+    kmp_i18n_msg_OBSOLETE32,
+    kmp_i18n_msg_OBSOLETE33,
+    kmp_i18n_msg_OBSOLETE34,
+    kmp_i18n_msg_OBSOLETE35,
+    kmp_i18n_msg_BarriersInDifferentOrder,
+    kmp_i18n_msg_FunctionError,
+    kmp_i18n_msg_TopologyExtra,
+    kmp_i18n_msg_WrongMessageCatalog,
+    kmp_i18n_msg_StgIgnored,
+    kmp_i18n_msg_OBSOLETE36,
+    kmp_i18n_msg_CnsBoundToWorksharing,
+    kmp_i18n_msg_CnsDetectedEnd,
+    kmp_i18n_msg_CnsIterationRangeTooLarge,
+    kmp_i18n_msg_CnsLoopIncrZeroProhibited,
+    kmp_i18n_msg_CnsExpectedEnd,
+    kmp_i18n_msg_CnsInvalidNesting,
+    kmp_i18n_msg_CnsMultipleNesting,
+    kmp_i18n_msg_CnsNestingSameName,
+    kmp_i18n_msg_CnsNoOrderedClause,
+    kmp_i18n_msg_CnsNotInTaskConstruct,
+    kmp_i18n_msg_CnsThreadsAtBarrier,
+    kmp_i18n_msg_CantConnect,
+    kmp_i18n_msg_CantConnectUsing,
+    kmp_i18n_msg_LibNotSupport,
+    kmp_i18n_msg_LibNotSupportFor,
+    kmp_i18n_msg_StaticLibNotSupport,
+    kmp_i18n_msg_OBSOLETE37,
+    kmp_i18n_msg_IttUnknownGroup,
+    kmp_i18n_msg_IttEnvVarTooLong,
+    kmp_i18n_msg_AffUseGlobCpuidL11,
+    kmp_i18n_msg_AffNotCapableUseLocCpuidL11,
+    kmp_i18n_msg_AffInfoStr,
+    kmp_i18n_msg_AffInfoStrStr,
+    kmp_i18n_msg_OSProcToPhysicalThreadMap,
+    kmp_i18n_msg_AffUsingFlatOS,
+    kmp_i18n_msg_AffParseFilename,
+    kmp_i18n_msg_MsgExiting,
+    kmp_i18n_msg_IncompatibleLibrary,
+    kmp_i18n_msg_IttFunctionError,
+    kmp_i18n_msg_IttUnknownError,
+    kmp_i18n_msg_EnvMiddleWarn,
+    kmp_i18n_msg_CnsLockNotDestroyed,
+    kmp_i18n_msg_CantLoadBalUsing,
+    kmp_i18n_msg_AffNotCapableUsePthread,
+    kmp_i18n_msg_AffUsePthread,
+    kmp_i18n_msg_OBSOLETE38,
+    kmp_i18n_msg_OBSOLETE39,
+    kmp_i18n_msg_OBSOLETE40,
+    kmp_i18n_msg_OBSOLETE41,
+    kmp_i18n_msg_NthSyntaxError,
+    kmp_i18n_msg_NthSpacesNotAllowed,
+    kmp_i18n_msg_AffStrParseFilename,
+    kmp_i18n_msg_OBSOLETE42,
+    kmp_i18n_msg_AffTypeCantUseMultGroups,
+    kmp_i18n_msg_AffGranCantUseMultGroups,
+    kmp_i18n_msg_AffWindowsProcGroupMap,
+    kmp_i18n_msg_AffOSProcToGroup,
+    kmp_i18n_msg_AffBalancedNotAvail,
+    kmp_i18n_msg_OBSOLETE43,
+    kmp_i18n_msg_EnvLockWarn,
+    kmp_i18n_msg_FutexNotSupported,
+    kmp_i18n_msg_AffGranUsing,
+    kmp_i18n_msg_AffThrPlaceInvalid,
+    kmp_i18n_msg_AffThrPlaceUnsupported,
+    kmp_i18n_msg_AffThrPlaceManyCores,
+    kmp_i18n_msg_SyntaxErrorUsing,
+    kmp_i18n_msg_AdaptiveNotSupported,
+    kmp_i18n_msg_EnvSyntaxError,
+    kmp_i18n_msg_EnvSpacesNotAllowed,
+    kmp_i18n_msg_BoundToOSProcSet,
+    kmp_i18n_msg_CnsLoopIncrIllegal,
+    kmp_i18n_msg_NoGompCancellation,
+    kmp_i18n_msg_AffThrPlaceNonUniform,
+    kmp_i18n_msg_AffThrPlaceNonThreeLevel,
+    kmp_i18n_msg_AffGranTopGroup,
+    kmp_i18n_msg_AffGranGroupType,
+    kmp_i18n_msg_AffThrPlaceManySockets,
+    kmp_i18n_msg_AffThrPlaceDeprecated,
+    kmp_i18n_msg_AffUsingHwloc,
+    kmp_i18n_msg_AffIgnoringHwloc,
+    kmp_i18n_msg_AffHwlocErrorOccurred,
+    kmp_i18n_msg_last,
+
+    // Set #5, hints.
+    kmp_i18n_hnt_first = 327680,
+    kmp_i18n_hnt_SubmitBugReport,
+    kmp_i18n_hnt_OBSOLETE44,
+    kmp_i18n_hnt_ChangeStackLimit,
+    kmp_i18n_hnt_Unset_ALL_THREADS,
+    kmp_i18n_hnt_Set_ALL_THREADPRIVATE,
+    kmp_i18n_hnt_PossibleSystemLimitOnThreads,
+    kmp_i18n_hnt_DuplicateLibrary,
+    kmp_i18n_hnt_NameComesFrom_CPUINFO_FILE,
+    kmp_i18n_hnt_NotEnoughMemory,
+    kmp_i18n_hnt_ValidBoolValues,
+    kmp_i18n_hnt_BufferOverflow,
+    kmp_i18n_hnt_RunningAtMaxPriority,
+    kmp_i18n_hnt_ChangeMonitorStackSize,
+    kmp_i18n_hnt_ChangeWorkerStackSize,
+    kmp_i18n_hnt_IncreaseWorkerStackSize,
+    kmp_i18n_hnt_DecreaseWorkerStackSize,
+    kmp_i18n_hnt_Decrease_NUM_THREADS,
+    kmp_i18n_hnt_IncreaseMonitorStackSize,
+    kmp_i18n_hnt_DecreaseMonitorStackSize,
+    kmp_i18n_hnt_DecreaseNumberOfThreadsInUse,
+    kmp_i18n_hnt_DefaultScheduleKindUsed,
+    kmp_i18n_hnt_GetNewerLibrary,
+    kmp_i18n_hnt_CheckEnvVar,
+    kmp_i18n_hnt_OBSOLETE45,
+    kmp_i18n_hnt_OBSOLETE46,
+    kmp_i18n_hnt_BadExeFormat,
+    kmp_i18n_hnt_SystemLimitOnThreads,
+    kmp_i18n_hnt_last,
+
+    kmp_i18n_xxx_lastest
+
+}; // enum kmp_i18n_id
+
+typedef enum kmp_i18n_id  kmp_i18n_id_t;
+
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_import.c b/contrib/libs/cxxsupp/openmp/kmp_import.c
index 6f0105602f..42fba412c1 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_import.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_import.c
@@ -1,42 +1,42 @@
-/* 
- * kmp_import.c 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Object generated from this source file is linked to Windows* OS DLL import library (libompmd.lib) 
-    only! It is not a part of regular static or dynamic OpenMP RTL. Any code that just needs to go 
-    in the libompmd.lib (but not in libompmt.lib and libompmd.dll) should be placed in this 
-    file. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-/* 
-    These symbols are required for mutual exclusion with Microsoft OpenMP RTL (and compatibility 
-    with MS Compiler). 
-*/ 
- 
-int _You_must_link_with_exactly_one_OpenMP_library = 1; 
-int _You_must_link_with_Intel_OpenMP_library       = 1; 
-int _You_must_link_with_Microsoft_OpenMP_library = 1; 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-// end of file // 
+/*
+ * kmp_import.c
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Object generated from this source file is linked to Windows* OS DLL import library (libompmd.lib)
+    only! It is not a part of regular static or dynamic OpenMP RTL. Any code that just needs to go
+    in the libompmd.lib (but not in libompmt.lib and libompmd.dll) should be placed in this
+    file.
+    ------------------------------------------------------------------------------------------------
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+    These symbols are required for mutual exclusion with Microsoft OpenMP RTL (and compatibility
+    with MS Compiler).
+*/
+
+int _You_must_link_with_exactly_one_OpenMP_library = 1;
+int _You_must_link_with_Intel_OpenMP_library       = 1;
+int _You_must_link_with_Microsoft_OpenMP_library = 1;
+
+#ifdef __cplusplus
+}
+#endif
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_io.c b/contrib/libs/cxxsupp/openmp/kmp_io.c
index bd16a970f8..ef808af8fb 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_io.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_io.c
@@ -1,248 +1,248 @@
-/* 
- * KMP_IO.c -- RTL IO 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <stddef.h> 
-#include <stdarg.h> 
-#include <string.h> 
-#ifndef __ABSOFT_WIN 
-# include <sys/types.h> 
-#endif 
- 
-#include "kmp_os.h" 
-#include "kmp_lock.h" 
-#include "kmp_str.h" 
-#include "kmp_io.h" 
-#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc 
- 
-#if KMP_OS_WINDOWS 
-# pragma warning( push ) 
-# pragma warning( disable: 271 310 ) 
-# include <windows.h> 
-# pragma warning( pop ) 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-kmp_bootstrap_lock_t __kmp_stdio_lock   = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_stdio_lock   ); /* Control stdio functions */ 
-kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_console_lock ); /* Control console initialization */ 
- 
-#if KMP_OS_WINDOWS 
- 
-    # ifdef KMP_DEBUG  
-    /* __kmp_stdout is used only for dev build */ 
-    static HANDLE    __kmp_stdout = NULL; 
-    # endif 
-    static HANDLE        __kmp_stderr = NULL; 
-    static int           __kmp_console_exists = FALSE; 
-    static kmp_str_buf_t __kmp_console_buf; 
- 
-    static int 
-    is_console( void ) 
-    { 
-        char buffer[ 128 ]; 
-        DWORD rc  = 0; 
-        DWORD err = 0; 
-        // Try to get console title. 
-        SetLastError( 0 ); 
-            // GetConsoleTitle does not reset last error in case of success or short buffer, 
-            // so we need to clear it explicitly. 
-        rc = GetConsoleTitle( buffer, sizeof( buffer ) ); 
-        if ( rc == 0 ) { 
-            // rc == 0 means getting console title failed. Let us find out why. 
-            err = GetLastError(); 
-            // err == 0 means buffer too short (we suppose console exists). 
-            // In Window applications we usually have err == 6 (invalid handle). 
-        }; // if 
-        return rc > 0 || err == 0; 
-    } 
- 
-    void 
-    __kmp_close_console( void ) 
-    { 
-        /* wait until user presses return before closing window */ 
-        /* TODO only close if a window was opened */ 
-        if( __kmp_console_exists ) { 
-            #ifdef KMP_DEBUG  
-            /* standard out is used only in dev build */ 
-            __kmp_stdout = NULL; 
-            #endif 
-            __kmp_stderr = NULL; 
-            __kmp_str_buf_free( &__kmp_console_buf ); 
-            __kmp_console_exists = FALSE; 
-        } 
-    } 
- 
-    /* For windows, call this before stdout, stderr, or stdin are used. 
-     * It opens a console window and starts processing */ 
-    static void 
-    __kmp_redirect_output( void ) 
-    { 
-        __kmp_acquire_bootstrap_lock( &__kmp_console_lock ); 
- 
-        if( ! __kmp_console_exists ) { 
-            #ifdef KMP_DEBUG  
-            /* standard out is used only in dev build */ 
-            HANDLE ho; 
-            #endif 
-            HANDLE he; 
- 
-            __kmp_str_buf_init( &__kmp_console_buf ); 
- 
-            AllocConsole(); 
-            // We do not check the result of AllocConsole because 
-            //  1. the call is harmless 
-            //  2. it is not clear how to communicate failue 
-            //  3. we will detect failure later when we get handle(s) 
- 
+/*
+ * KMP_IO.c -- RTL IO
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#ifndef __ABSOFT_WIN
+# include <sys/types.h>
+#endif
+
+#include "kmp_os.h"
+#include "kmp_lock.h"
+#include "kmp_str.h"
+#include "kmp_io.h"
+#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc
+
+#if KMP_OS_WINDOWS
+# pragma warning( push )
+# pragma warning( disable: 271 310 )
+# include <windows.h>
+# pragma warning( pop )
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+kmp_bootstrap_lock_t __kmp_stdio_lock   = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_stdio_lock   ); /* Control stdio functions */
+kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_console_lock ); /* Control console initialization */
+
+#if KMP_OS_WINDOWS
+
+    # ifdef KMP_DEBUG 
+    /* __kmp_stdout is used only for dev build */
+    static HANDLE    __kmp_stdout = NULL;
+    # endif
+    static HANDLE        __kmp_stderr = NULL;
+    static int           __kmp_console_exists = FALSE;
+    static kmp_str_buf_t __kmp_console_buf;
+
+    static int
+    is_console( void )
+    {
+        char buffer[ 128 ];
+        DWORD rc  = 0;
+        DWORD err = 0;
+        // Try to get console title.
+        SetLastError( 0 );
+            // GetConsoleTitle does not reset last error in case of success or short buffer,
+            // so we need to clear it explicitly.
+        rc = GetConsoleTitle( buffer, sizeof( buffer ) );
+        if ( rc == 0 ) {
+            // rc == 0 means getting console title failed. Let us find out why.
+            err = GetLastError();
+            // err == 0 means buffer too short (we suppose console exists).
+            // In Window applications we usually have err == 6 (invalid handle).
+        }; // if
+        return rc > 0 || err == 0;
+    }
+
+    void
+    __kmp_close_console( void )
+    {
+        /* wait until user presses return before closing window */
+        /* TODO only close if a window was opened */
+        if( __kmp_console_exists ) {
             #ifdef KMP_DEBUG 
-                ho = GetStdHandle( STD_OUTPUT_HANDLE ); 
-                if ( ho == INVALID_HANDLE_VALUE || ho == NULL ) { 
- 
-                    DWORD  err = GetLastError(); 
-                    // TODO: output error somehow (maybe message box) 
-                    __kmp_stdout = NULL; 
- 
-                } else { 
- 
-                    __kmp_stdout = ho; // temporary code, need new global for ho 
- 
-                } 
-            #endif 
-            he = GetStdHandle( STD_ERROR_HANDLE ); 
-            if ( he == INVALID_HANDLE_VALUE || he == NULL ) { 
- 
-                DWORD  err = GetLastError(); 
-                // TODO: output error somehow (maybe message box) 
-                __kmp_stderr = NULL; 
- 
-            } else { 
- 
-                __kmp_stderr = he; // temporary code, need new global 
-            } 
-            __kmp_console_exists = TRUE; 
-        } 
-        __kmp_release_bootstrap_lock( &__kmp_console_lock ); 
-    } 
- 
-#else 
-    #define       __kmp_stderr     (stderr) 
-#endif /* KMP_OS_WINDOWS */ 
- 
-void 
-__kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ) 
-{ 
-    #if KMP_OS_WINDOWS 
-        if( !__kmp_console_exists ) { 
-            __kmp_redirect_output(); 
-        } 
-            if( ! __kmp_stderr && __kmp_io == kmp_err ) { 
-            return; 
-        } 
-        #ifdef KMP_DEBUG 
-            if( ! __kmp_stdout && __kmp_io == kmp_out ) { 
-                return; 
-            } 
-        #endif 
-    #endif /* KMP_OS_WINDOWS */ 
- 
-    if ( __kmp_debug_buf && __kmp_debug_buffer != NULL ) { 
- 
-        int dc = ( __kmp_debug_buf_atomic ? 
-                   KMP_TEST_THEN_INC32( & __kmp_debug_count) : __kmp_debug_count++ ) 
-                   % __kmp_debug_buf_lines; 
-        char *db = & __kmp_debug_buffer[ dc * __kmp_debug_buf_chars ]; 
-        int chars = 0; 
- 
-        #ifdef KMP_DEBUG_PIDS 
-            chars = KMP_SNPRINTF( db, __kmp_debug_buf_chars, "pid=%d: ", (kmp_int32)getpid() ); 
-        #endif 
-        chars += KMP_VSNPRINTF( db, __kmp_debug_buf_chars, format, ap ); 
- 
-        if ( chars + 1 > __kmp_debug_buf_chars ) { 
-            if ( chars + 1 > __kmp_debug_buf_warn_chars ) { 
-                #if KMP_OS_WINDOWS 
-                    DWORD count; 
-                    __kmp_str_buf_print( &__kmp_console_buf, 
-                        "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", 
-                        chars + 1 ); 
-                    WriteFile( __kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, &count, NULL ); 
-                    __kmp_str_buf_clear( &__kmp_console_buf ); 
-                #else 
-                    fprintf( __kmp_stderr, 
-                         "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", 
-                         chars + 1 ); 
-                    fflush( __kmp_stderr ); 
-                #endif 
-                __kmp_debug_buf_warn_chars = chars + 1; 
-            } 
-            /* terminate string if overflow occurred */ 
-            db[ __kmp_debug_buf_chars - 2 ] = '\n'; 
-            db[ __kmp_debug_buf_chars - 1 ] = '\0'; 
-        } 
-    } else { 
-        #if KMP_OS_WINDOWS 
-            DWORD count; 
-            #ifdef KMP_DEBUG_PIDS 
-                __kmp_str_buf_print( &__kmp_console_buf, "pid=%d: ", 
-                  (kmp_int32)getpid() ); 
-            #endif 
-            __kmp_str_buf_vprint( &__kmp_console_buf, format, ap ); 
-            WriteFile( 
-                __kmp_stderr, 
-                __kmp_console_buf.str, 
-                __kmp_console_buf.used, 
-                &count, 
-                NULL 
-            ); 
-            __kmp_str_buf_clear( &__kmp_console_buf ); 
-        #else 
-            #ifdef KMP_DEBUG_PIDS 
-                fprintf( __kmp_stderr, "pid=%d: ", (kmp_int32)getpid() ); 
-            #endif 
-            vfprintf( __kmp_stderr, format, ap ); 
-            fflush( __kmp_stderr ); 
-        #endif 
-    } 
-} 
- 
-void 
-__kmp_printf( char const * format, ... ) 
-{ 
-    va_list ap; 
-    va_start( ap, format ); 
- 
-    __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 
-    __kmp_vprintf( kmp_err, format, ap ); 
-    __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 
- 
-    va_end( ap ); 
-} 
- 
-void 
-__kmp_printf_no_lock( char const * format, ... ) 
-{ 
-    va_list ap; 
-    va_start( ap, format ); 
- 
-    __kmp_vprintf( kmp_err, format, ap ); 
- 
-    va_end( ap ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
+            /* standard out is used only in dev build */
+            __kmp_stdout = NULL;
+            #endif
+            __kmp_stderr = NULL;
+            __kmp_str_buf_free( &__kmp_console_buf );
+            __kmp_console_exists = FALSE;
+        }
+    }
+
+    /* For windows, call this before stdout, stderr, or stdin are used.
+     * It opens a console window and starts processing */
+    static void
+    __kmp_redirect_output( void )
+    {
+        __kmp_acquire_bootstrap_lock( &__kmp_console_lock );
+
+        if( ! __kmp_console_exists ) {
+            #ifdef KMP_DEBUG 
+            /* standard out is used only in dev build */
+            HANDLE ho;
+            #endif
+            HANDLE he;
+
+            __kmp_str_buf_init( &__kmp_console_buf );
+
+            AllocConsole();
+            // We do not check the result of AllocConsole because
+            //  1. the call is harmless
+            //  2. it is not clear how to communicate failue
+            //  3. we will detect failure later when we get handle(s)
+
+            #ifdef KMP_DEBUG
+                ho = GetStdHandle( STD_OUTPUT_HANDLE );
+                if ( ho == INVALID_HANDLE_VALUE || ho == NULL ) {
+
+                    DWORD  err = GetLastError();
+                    // TODO: output error somehow (maybe message box)
+                    __kmp_stdout = NULL;
+
+                } else {
+
+                    __kmp_stdout = ho; // temporary code, need new global for ho
+
+                }
+            #endif
+            he = GetStdHandle( STD_ERROR_HANDLE );
+            if ( he == INVALID_HANDLE_VALUE || he == NULL ) {
+
+                DWORD  err = GetLastError();
+                // TODO: output error somehow (maybe message box)
+                __kmp_stderr = NULL;
+
+            } else {
+
+                __kmp_stderr = he; // temporary code, need new global
+            }
+            __kmp_console_exists = TRUE;
+        }
+        __kmp_release_bootstrap_lock( &__kmp_console_lock );
+    }
+
+#else
+    #define       __kmp_stderr     (stderr)
+#endif /* KMP_OS_WINDOWS */
+
+void
+__kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap )
+{
+    #if KMP_OS_WINDOWS
+        if( !__kmp_console_exists ) {
+            __kmp_redirect_output();
+        }
+            if( ! __kmp_stderr && __kmp_io == kmp_err ) {
+            return;
+        }
+        #ifdef KMP_DEBUG
+            if( ! __kmp_stdout && __kmp_io == kmp_out ) {
+                return;
+            }
+        #endif
+    #endif /* KMP_OS_WINDOWS */
+
+    if ( __kmp_debug_buf && __kmp_debug_buffer != NULL ) {
+
+        int dc = ( __kmp_debug_buf_atomic ?
+                   KMP_TEST_THEN_INC32( & __kmp_debug_count) : __kmp_debug_count++ )
+                   % __kmp_debug_buf_lines;
+        char *db = & __kmp_debug_buffer[ dc * __kmp_debug_buf_chars ];
+        int chars = 0;
+
+        #ifdef KMP_DEBUG_PIDS
+            chars = KMP_SNPRINTF( db, __kmp_debug_buf_chars, "pid=%d: ", (kmp_int32)getpid() );
+        #endif
+        chars += KMP_VSNPRINTF( db, __kmp_debug_buf_chars, format, ap );
+
+        if ( chars + 1 > __kmp_debug_buf_chars ) {
+            if ( chars + 1 > __kmp_debug_buf_warn_chars ) {
+                #if KMP_OS_WINDOWS
+                    DWORD count;
+                    __kmp_str_buf_print( &__kmp_console_buf,
+                        "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n",
+                        chars + 1 );
+                    WriteFile( __kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, &count, NULL );
+                    __kmp_str_buf_clear( &__kmp_console_buf );
+                #else
+                    fprintf( __kmp_stderr,
+                         "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n",
+                         chars + 1 );
+                    fflush( __kmp_stderr );
+                #endif
+                __kmp_debug_buf_warn_chars = chars + 1;
+            }
+            /* terminate string if overflow occurred */
+            db[ __kmp_debug_buf_chars - 2 ] = '\n';
+            db[ __kmp_debug_buf_chars - 1 ] = '\0';
+        }
+    } else {
+        #if KMP_OS_WINDOWS
+            DWORD count;
+            #ifdef KMP_DEBUG_PIDS
+                __kmp_str_buf_print( &__kmp_console_buf, "pid=%d: ",
+                  (kmp_int32)getpid() );
+            #endif
+            __kmp_str_buf_vprint( &__kmp_console_buf, format, ap );
+            WriteFile(
+                __kmp_stderr,
+                __kmp_console_buf.str,
+                __kmp_console_buf.used,
+                &count,
+                NULL
+            );
+            __kmp_str_buf_clear( &__kmp_console_buf );
+        #else
+            #ifdef KMP_DEBUG_PIDS
+                fprintf( __kmp_stderr, "pid=%d: ", (kmp_int32)getpid() );
+            #endif
+            vfprintf( __kmp_stderr, format, ap );
+            fflush( __kmp_stderr );
+        #endif
+    }
+}
+
+void
+__kmp_printf( char const * format, ... )
+{
+    va_list ap;
+    va_start( ap, format );
+
+    __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
+    __kmp_vprintf( kmp_err, format, ap );
+    __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
+
+    va_end( ap );
+}
+
+void
+__kmp_printf_no_lock( char const * format, ... )
+{
+    va_list ap;
+    va_start( ap, format );
+
+    __kmp_vprintf( kmp_err, format, ap );
+
+    va_end( ap );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_io.h b/contrib/libs/cxxsupp/openmp/kmp_io.h
index cbc74027c5..a0caa644ee 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_io.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_io.h
@@ -1,44 +1,44 @@
-/* 
- * kmp_io.h -- RTL IO header file. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_IO_H 
-#define KMP_IO_H 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-enum kmp_io { 
-    kmp_out = 0, 
-    kmp_err 
-}; 
- 
-extern kmp_bootstrap_lock_t __kmp_stdio_lock;     /* Control stdio functions */ 
-extern kmp_bootstrap_lock_t __kmp_console_lock;   /* Control console initialization */ 
- 
-extern void __kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ); 
-extern void __kmp_printf( char const * format, ... ); 
-extern void __kmp_printf_no_lock( char const * format, ... ); 
-extern void __kmp_close_console( void ); 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif /* KMP_IO_H */ 
- 
+/*
+ * kmp_io.h -- RTL IO header file.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_IO_H
+#define KMP_IO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+enum kmp_io {
+    kmp_out = 0,
+    kmp_err
+};
+
+extern kmp_bootstrap_lock_t __kmp_stdio_lock;     /* Control stdio functions */
+extern kmp_bootstrap_lock_t __kmp_console_lock;   /* Control console initialization */
+
+extern void __kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap );
+extern void __kmp_printf( char const * format, ... );
+extern void __kmp_printf_no_lock( char const * format, ... );
+extern void __kmp_close_console( void );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* KMP_IO_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.c b/contrib/libs/cxxsupp/openmp/kmp_itt.c
index 89d665b8b6..486d63550e 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_itt.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_itt.c
@@ -1,144 +1,144 @@
-#include "kmp_config.h" 
- 
-#if USE_ITT_BUILD 
-/* 
- * kmp_itt.c -- ITT Notify interface. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp_itt.h" 
- 
-#if KMP_DEBUG 
-    #include "kmp_itt.inl" 
-#endif 
- 
- 
-#if USE_ITT_NOTIFY 
- 
-    kmp_int32 __kmp_barrier_domain_count; 
-    kmp_int32 __kmp_region_domain_count; 
-    __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; 
-    __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; 
-    __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; 
-    kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; 
-    __itt_domain * metadata_domain = NULL; 
- 
-    #include "kmp_version.h" 
-    #include "kmp_i18n.h" 
-    #include "kmp_str.h" 
- 
-    KMP_BUILD_ASSERT( sizeof( kmp_itt_mark_t ) == sizeof( __itt_mark_type ) ); 
- 
-    /* 
-        Previously used warnings: 
- 
-        KMP_WARNING( IttAllNotifDisabled ); 
-        KMP_WARNING( IttObjNotifDisabled ); 
-        KMP_WARNING( IttMarkNotifDisabled ); 
-        KMP_WARNING( IttUnloadLibFailed, libittnotify ); 
-    */ 
- 
- 
-    kmp_int32 __kmp_itt_prepare_delay = 0; 
-    kmp_bootstrap_lock_t __kmp_itt_debug_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_itt_debug_lock ); 
- 
-#endif // USE_ITT_NOTIFY 
- 
-void __kmp_itt_initialize() { 
- 
-    // ITTNotify library is loaded and initialized at first call to any ittnotify function, 
-    // so we do not need to explicitly load it any more. 
-    // Jusr report OMP RTL version to ITTNotify. 
- 
-    #if USE_ITT_NOTIFY 
-        // Report OpenMP RTL version. 
-        kmp_str_buf_t       buf; 
-        __itt_mark_type     version; 
-        __kmp_str_buf_init( & buf ); 
-        __kmp_str_buf_print( 
-            & buf, 
-            "OMP RTL Version %d.%d.%d", 
-            __kmp_version_major, 
-            __kmp_version_minor, 
-            __kmp_version_build 
-        ); 
-        if ( __itt_api_version_ptr != NULL ) { 
-            __kmp_str_buf_print( & buf, ":%s", __itt_api_version() ); 
-        }; // if 
-        version = __itt_mark_create( buf.str ); 
-        __itt_mark( version, NULL ); 
-        __kmp_str_buf_free( & buf ); 
-    #endif 
- 
-} // __kmp_itt_initialize 
- 
- 
-void __kmp_itt_destroy() { 
-    #if USE_ITT_NOTIFY 
-        __kmp_itt_fini_ittlib(); 
-    #endif 
-} // __kmp_itt_destroy 
- 
- 
-extern "C" 
-void 
-__itt_error_handler( 
-    __itt_error_code err, 
-    va_list args 
-) { 
- 
-    switch ( err ) { 
-        case __itt_error_no_module : { 
-            char const * library = va_arg( args, char const * ); 
-            #if KMP_OS_WINDOWS 
-                int sys_err = va_arg( args, int ); 
-                __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null ); 
-            #else 
-                char const * sys_err = va_arg( args, char const * ); 
-                __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRMESG( sys_err ), __kmp_msg_null ); 
-            #endif 
-        } break; 
-        case __itt_error_no_symbol : { 
-            char const * library = va_arg( args, char const * ); 
-            char const * symbol  = va_arg( args, char const * ); 
-            KMP_WARNING( IttLookupFailed, symbol, library ); 
-        } break; 
-        case __itt_error_unknown_group : { 
-            char const * var   = va_arg( args, char const * ); 
-            char const * group = va_arg( args, char const * ); 
-            KMP_WARNING( IttUnknownGroup, var, group ); 
-        } break; 
-        case __itt_error_env_too_long : { 
-            char const * var     = va_arg( args, char const * ); 
-            size_t       act_len = va_arg( args, size_t ); 
-            size_t       max_len = va_arg( args, size_t ); 
-            KMP_WARNING( IttEnvVarTooLong, var, (unsigned long) act_len, (unsigned long) max_len ); 
-        } break; 
-        case __itt_error_cant_read_env : { 
-            char const * var     = va_arg( args, char const * ); 
-            int          sys_err = va_arg( args, int ); 
-            __kmp_msg( kmp_ms_warning, KMP_MSG( CantGetEnvVar, var ), KMP_ERR( sys_err ), __kmp_msg_null ); 
-        } break; 
-        case __itt_error_system : { 
-            char const * func    = va_arg( args, char const * ); 
-            int          sys_err = va_arg( args, int ); 
-            __kmp_msg( kmp_ms_warning, KMP_MSG( IttFunctionError, func ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null ); 
-        } break; 
-        default : { 
-            KMP_WARNING( IttUnknownError, err ); 
-        }; 
-    }; // switch 
- 
-} // __itt_error_handler 
- 
-#endif /* USE_ITT_BUILD */ 
+#include "kmp_config.h"
+
+#if USE_ITT_BUILD
+/*
+ * kmp_itt.c -- ITT Notify interface.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp_itt.h"
+
+#if KMP_DEBUG
+    #include "kmp_itt.inl"
+#endif
+
+
+#if USE_ITT_NOTIFY
+
+    kmp_int32 __kmp_barrier_domain_count;
+    kmp_int32 __kmp_region_domain_count;
+    __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
+    __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
+    __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
+    kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
+    __itt_domain * metadata_domain = NULL;
+
+    #include "kmp_version.h"
+    #include "kmp_i18n.h"
+    #include "kmp_str.h"
+
+    KMP_BUILD_ASSERT( sizeof( kmp_itt_mark_t ) == sizeof( __itt_mark_type ) );
+
+    /*
+        Previously used warnings:
+
+        KMP_WARNING( IttAllNotifDisabled );
+        KMP_WARNING( IttObjNotifDisabled );
+        KMP_WARNING( IttMarkNotifDisabled );
+        KMP_WARNING( IttUnloadLibFailed, libittnotify );
+    */
+
+
+    kmp_int32 __kmp_itt_prepare_delay = 0;
+    kmp_bootstrap_lock_t __kmp_itt_debug_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_itt_debug_lock );
+
+#endif // USE_ITT_NOTIFY
+
+void __kmp_itt_initialize() {
+
+    // ITTNotify library is loaded and initialized at first call to any ittnotify function,
+    // so we do not need to explicitly load it any more.
+    // Jusr report OMP RTL version to ITTNotify.
+
+    #if USE_ITT_NOTIFY
+        // Report OpenMP RTL version.
+        kmp_str_buf_t       buf;
+        __itt_mark_type     version;
+        __kmp_str_buf_init( & buf );
+        __kmp_str_buf_print(
+            & buf,
+            "OMP RTL Version %d.%d.%d",
+            __kmp_version_major,
+            __kmp_version_minor,
+            __kmp_version_build
+        );
+        if ( __itt_api_version_ptr != NULL ) {
+            __kmp_str_buf_print( & buf, ":%s", __itt_api_version() );
+        }; // if
+        version = __itt_mark_create( buf.str );
+        __itt_mark( version, NULL );
+        __kmp_str_buf_free( & buf );
+    #endif
+
+} // __kmp_itt_initialize
+
+
+void __kmp_itt_destroy() {
+    #if USE_ITT_NOTIFY
+        __kmp_itt_fini_ittlib();
+    #endif
+} // __kmp_itt_destroy
+
+
+extern "C"
+void
+__itt_error_handler(
+    __itt_error_code err,
+    va_list args
+) {
+
+    switch ( err ) {
+        case __itt_error_no_module : {
+            char const * library = va_arg( args, char const * );
+            #if KMP_OS_WINDOWS
+                int sys_err = va_arg( args, int );
+                __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null );
+            #else
+                char const * sys_err = va_arg( args, char const * );
+                __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), KMP_SYSERRMESG( sys_err ), __kmp_msg_null );
+            #endif
+        } break;
+        case __itt_error_no_symbol : {
+            char const * library = va_arg( args, char const * );
+            char const * symbol  = va_arg( args, char const * );
+            KMP_WARNING( IttLookupFailed, symbol, library );
+        } break;
+        case __itt_error_unknown_group : {
+            char const * var   = va_arg( args, char const * );
+            char const * group = va_arg( args, char const * );
+            KMP_WARNING( IttUnknownGroup, var, group );
+        } break;
+        case __itt_error_env_too_long : {
+            char const * var     = va_arg( args, char const * );
+            size_t       act_len = va_arg( args, size_t );
+            size_t       max_len = va_arg( args, size_t );
+            KMP_WARNING( IttEnvVarTooLong, var, (unsigned long) act_len, (unsigned long) max_len );
+        } break;
+        case __itt_error_cant_read_env : {
+            char const * var     = va_arg( args, char const * );
+            int          sys_err = va_arg( args, int );
+            __kmp_msg( kmp_ms_warning, KMP_MSG( CantGetEnvVar, var ), KMP_ERR( sys_err ), __kmp_msg_null );
+        } break;
+        case __itt_error_system : {
+            char const * func    = va_arg( args, char const * );
+            int          sys_err = va_arg( args, int );
+            __kmp_msg( kmp_ms_warning, KMP_MSG( IttFunctionError, func ), KMP_SYSERRCODE( sys_err ), __kmp_msg_null );
+        } break;
+        default : {
+            KMP_WARNING( IttUnknownError, err );
+        };
+    }; // switch
+
+} // __itt_error_handler
+
+#endif /* USE_ITT_BUILD */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.h b/contrib/libs/cxxsupp/openmp/kmp_itt.h
index 8797c57802..925a4f04ca 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_itt.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_itt.h
@@ -1,309 +1,309 @@
-#if USE_ITT_BUILD 
-/* 
- * kmp_itt.h -- ITT Notify interface. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_ITT_H 
-#define KMP_ITT_H 
- 
-#include "kmp_lock.h" 
- 
-#define INTEL_ITTNOTIFY_API_PRIVATE 
-#include "ittnotify.h" 
-#include "legacy/ittnotify.h" 
- 
-#if KMP_DEBUG 
-    #define __kmp_inline           // Turn off inlining in debug mode. 
-#else 
-    #define __kmp_inline static inline 
-#endif 
- 
-#if USE_ITT_NOTIFY 
-    extern kmp_int32  __kmp_itt_prepare_delay; 
-# ifdef __cplusplus 
-    extern "C" void __kmp_itt_fini_ittlib(void); 
-# else 
-    extern void __kmp_itt_fini_ittlib(void); 
-# endif 
-#endif 
- 
-// Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled. 
-#define USE_ITT_BUILD_ARG(x) ,x 
- 
-void __kmp_itt_initialize(); 
-void __kmp_itt_destroy(); 
- 
-// ------------------------------------------------------------------------------------------------- 
-// New stuff for reporting high-level constructs. 
-// ------------------------------------------------------------------------------------------------- 
- 
-// Note the naming convention: 
-//     __kmp_itt_xxxing() function should be called before action, while 
-//     __kmp_itt_xxxed()  function should be called after action. 
- 
-// --- Parallel region reporting --- 
-__kmp_inline void __kmp_itt_region_forking(  int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads. 
-__kmp_inline void __kmp_itt_region_joined(   int gtid, int serialized = 0 ); // Master only, after joining threads. 
-    // (*) Note: A thread may execute tasks after this point, though. 
- 
-// --- Frame reporting --- 
-// region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel 
-__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 ); 
- 
-// --- Metadata reporting --- 
-// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier 
-__kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ); 
-// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size 
-__kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ); 
-__kmp_inline void __kmp_itt_metadata_single( ident_t * loc ); 
- 
-// --- Barrier reporting --- 
-__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); 
-__kmp_inline void   __kmp_itt_barrier_starting( int gtid, void * object ); 
-__kmp_inline void   __kmp_itt_barrier_middle(   int gtid, void * object ); 
-__kmp_inline void   __kmp_itt_barrier_finished( int gtid, void * object ); 
- 
-// --- Taskwait reporting --- 
-__kmp_inline void * __kmp_itt_taskwait_object( int gtid ); 
-__kmp_inline void   __kmp_itt_taskwait_starting( int gtid, void * object ); 
-__kmp_inline void   __kmp_itt_taskwait_finished(   int gtid, void * object ); 
- 
-// --- Task reporting --- 
-__kmp_inline void   __kmp_itt_task_starting( void * object ); 
-__kmp_inline void   __kmp_itt_task_finished( void * object ); 
- 
-// --- Lock reporting --- 
-#if KMP_USE_DYNAMIC_LOCK 
-__kmp_inline void   __kmp_itt_lock_creating(  kmp_user_lock_p lock, const ident_t * ); 
-#else 
-__kmp_inline void   __kmp_itt_lock_creating(  kmp_user_lock_p lock ); 
-#endif 
-__kmp_inline void   __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_lock_acquired(  kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_lock_releasing( kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_lock_cancelled( kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); 
- 
-// --- Critical reporting --- 
-#if KMP_USE_DYNAMIC_LOCK 
-__kmp_inline void   __kmp_itt_critical_creating(  kmp_user_lock_p lock, const ident_t * ); 
-#else 
-__kmp_inline void   __kmp_itt_critical_creating(  kmp_user_lock_p lock ); 
-#endif 
-__kmp_inline void   __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_critical_acquired(  kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_critical_releasing( kmp_user_lock_p lock ); 
-__kmp_inline void   __kmp_itt_critical_destroyed( kmp_user_lock_p lock ); 
- 
-// --- Single reporting --- 
-__kmp_inline void   __kmp_itt_single_start( int gtid ); 
-__kmp_inline void   __kmp_itt_single_end(   int gtid ); 
- 
-// --- Ordered reporting --- 
-__kmp_inline void   __kmp_itt_ordered_init(  int gtid ); 
-__kmp_inline void   __kmp_itt_ordered_prep(  int gtid ); 
-__kmp_inline void   __kmp_itt_ordered_start( int gtid ); 
-__kmp_inline void   __kmp_itt_ordered_end(   int gtid ); 
- 
-// --- Threads reporting --- 
-__kmp_inline void  __kmp_itt_thread_ignore(); 
-__kmp_inline void  __kmp_itt_thread_name( int gtid ); 
- 
-// --- System objects --- 
-__kmp_inline void   __kmp_itt_system_object_created( void * object, char const * name ); 
- 
-// --- Stack stitching --- 
-__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); 
-__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); 
-__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); 
-__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Old stuff for reporting low-level internal synchronization. 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if USE_ITT_NOTIFY 
- 
-    /* 
-     * Support for SSC marks, which are used by SDE 
-     * http://software.intel.com/en-us/articles/intel-software-development-emulator 
-     * to mark points in instruction traces that represent spin-loops and are 
-     * therefore uninteresting when collecting traces for architecture simulation. 
-     */ 
-    #ifndef INCLUDE_SSC_MARKS 
-    # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) 
-    #endif 
- 
-    /* Linux 64 only for now */ 
-    #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) 
-    // Portable (at least for gcc and icc) code to insert the necessary instructions 
-    // to set %ebx and execute the unlikely no-op. 
-      #if defined( __INTEL_COMPILER ) 
-      # define INSERT_SSC_MARK(tag) __SSC_MARK(tag) 
-      #else 
-      # define INSERT_SSC_MARK(tag)                                          \ 
-      __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx") 
-      #endif 
-    #else 
-    # define INSERT_SSC_MARK(tag) ((void)0) 
-    #endif 
- 
-    /* Markers for the start and end of regions that represent polling and 
-     * are therefore uninteresting to architectural simulations 0x4376 and 
-     * 0x4377 are arbitrary numbers that should be unique in the space of 
-     * SSC tags, but there is no central issuing authority rather 
-     * randomness is expected to work. 
-     */ 
-    #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) 
-    #define SSC_MARK_SPIN_END()   INSERT_SSC_MARK(0x4377) 
- 
-    // Markers for architecture simulation. 
-    // FORKING      : Before the master thread forks. 
-    // JOINING      : At the start of the join. 
-    // INVOKING     : Before the threads invoke microtasks. 
-    // DISPATCH_INIT: At the start of dynamically scheduled loop. 
-    // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. 
-    #define SSC_MARK_FORKING()          INSERT_SSC_MARK(0xd693) 
-    #define SSC_MARK_JOINING()          INSERT_SSC_MARK(0xd694) 
-    #define SSC_MARK_INVOKING()         INSERT_SSC_MARK(0xd695) 
-    #define SSC_MARK_DISPATCH_INIT()    INSERT_SSC_MARK(0xd696) 
-    #define SSC_MARK_DISPATCH_NEXT()    INSERT_SSC_MARK(0xd697) 
- 
-    // The object is an address that associates a specific set of the prepare, acquire, release, 
-    // and cancel operations. 
- 
-    /* Sync prepare indicates a thread is going to start waiting for another thread 
-       to send a release event.  This operation should be done just before the thread 
-       begins checking for the existence of the release event */ 
- 
-    /* Sync cancel indicates a thread is cancelling a wait on another thread anc 
-       continuing execution without waiting for the other thread to release it */ 
- 
-    /* Sync acquired indicates a thread has received a release event from another 
-       thread and has stopped waiting.  This operation must occur only after the release 
-       event is received. */ 
- 
-    /* Sync release indicates a thread is going to send a release event to another thread 
-       so it will stop waiting and continue execution. This operation must just happen before 
-       the release event. */ 
- 
-    #define KMP_FSYNC_PREPARE(   obj )  __itt_fsync_prepare(   (void *)( obj ) ) 
-    #define KMP_FSYNC_CANCEL(    obj )  __itt_fsync_cancel(    (void *)( obj ) ) 
-    #define KMP_FSYNC_ACQUIRED(  obj )  __itt_fsync_acquired(  (void *)( obj ) ) 
-    #define KMP_FSYNC_RELEASING( obj )  __itt_fsync_releasing( (void *)( obj ) ) 
- 
-    /* 
-        In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay 
-        (and not called at all if waiting time is small). So, in spin loops, do not use 
-        KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), 
-        KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED(). 
-        See KMP_WAIT_YIELD() for example. 
-    */ 
- 
-    #undef  KMP_FSYNC_SPIN_INIT 
-    #define KMP_FSYNC_SPIN_INIT( obj, spin )    \ 
-        int sync_iters = 0;                     \ 
-        if ( __itt_fsync_prepare_ptr ) {        \ 
-            if ( obj == NULL ) {                \ 
-                obj = spin;                     \ 
-            } /* if */                          \ 
-        } /* if */                              \ 
-        SSC_MARK_SPIN_START() 
- 
-    #undef  KMP_FSYNC_SPIN_PREPARE 
-    #define KMP_FSYNC_SPIN_PREPARE( obj ) do {                          \ 
-        if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \ 
-            ++ sync_iters;                                              \ 
-            if ( sync_iters >= __kmp_itt_prepare_delay ) {              \ 
-                KMP_FSYNC_PREPARE( (void*) obj );                       \ 
-            } /* if */                                                  \ 
-        } /* if */                                                      \ 
-     } while (0) 
-    #undef  KMP_FSYNC_SPIN_ACQUIRED 
-    #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do {         \ 
-        SSC_MARK_SPIN_END();                            \ 
-        if ( sync_iters >= __kmp_itt_prepare_delay ) {  \ 
-            KMP_FSYNC_ACQUIRED( (void*) obj );          \ 
-        } /* if */                                      \ 
-     } while (0) 
- 
-    /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: 
-           KMP_ITT_IGNORE( 
-               ptr = malloc( size ); 
-           ); 
-    */ 
-    #define KMP_ITT_IGNORE( statement ) do {                            \ 
-            __itt_state_t __itt_state_;                                 \ 
-            if ( __itt_state_get_ptr ) {                                \ 
-                __itt_state_ = __itt_state_get();                       \ 
-                __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \ 
-            }  /* if */                                                 \ 
-            { statement }                                               \ 
-            if ( __itt_state_get_ptr ) {                                \ 
-                __itt_state_set( __itt_state_ );                        \ 
-            }  /* if */                                                 \ 
-    } while (0) 
- 
-    const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to 
-                                           // different OpenMP regions in the user source code). 
-    extern kmp_int32 __kmp_barrier_domain_count; 
-    extern kmp_int32 __kmp_region_domain_count; 
-    extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; 
-    extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; 
-    extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; 
-    extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; 
-    extern __itt_domain * metadata_domain; 
- 
-#else 
- 
-// Null definitions of the synchronization tracing functions. 
-# define KMP_FSYNC_PREPARE(   obj )        ((void)0) 
-# define KMP_FSYNC_CANCEL(    obj )        ((void)0) 
-# define KMP_FSYNC_ACQUIRED(  obj )        ((void)0) 
-# define KMP_FSYNC_RELEASING( obj )        ((void)0) 
- 
-# define KMP_FSYNC_SPIN_INIT( obj, spin )  ((void)0) 
-# define KMP_FSYNC_SPIN_PREPARE(  obj )    ((void)0) 
-# define KMP_FSYNC_SPIN_ACQUIRED( obj )    ((void)0) 
- 
-# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) 
- 
-#endif // USE_ITT_NOTIFY 
- 
-#if ! KMP_DEBUG 
-    // In release mode include definitions of inline functions. 
-    #include "kmp_itt.inl" 
-#endif 
- 
-#endif // KMP_ITT_H 
- 
-#else  /* USE_ITT_BUILD */ 
- 
-// Null definitions of the synchronization tracing functions. 
-// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. 
-// By defining these we avoid unpleasant ifdef tests in many places. 
-# define KMP_FSYNC_PREPARE(   obj )        ((void)0) 
-# define KMP_FSYNC_CANCEL(    obj )        ((void)0) 
-# define KMP_FSYNC_ACQUIRED(  obj )        ((void)0) 
-# define KMP_FSYNC_RELEASING( obj )        ((void)0) 
- 
-# define KMP_FSYNC_SPIN_INIT( obj, spin )  ((void)0) 
-# define KMP_FSYNC_SPIN_PREPARE(  obj )    ((void)0) 
-# define KMP_FSYNC_SPIN_ACQUIRED( obj )    ((void)0) 
- 
-# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) 
- 
-# define USE_ITT_BUILD_ARG(x) 
- 
-#endif /* USE_ITT_BUILD */ 
+#if USE_ITT_BUILD
+/*
+ * kmp_itt.h -- ITT Notify interface.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_ITT_H
+#define KMP_ITT_H
+
+#include "kmp_lock.h"
+
+#define INTEL_ITTNOTIFY_API_PRIVATE
+#include "ittnotify.h"
+#include "legacy/ittnotify.h"
+
+#if KMP_DEBUG
+    #define __kmp_inline           // Turn off inlining in debug mode.
+#else
+    #define __kmp_inline static inline
+#endif
+
+#if USE_ITT_NOTIFY
+    extern kmp_int32  __kmp_itt_prepare_delay;
+# ifdef __cplusplus
+    extern "C" void __kmp_itt_fini_ittlib(void);
+# else
+    extern void __kmp_itt_fini_ittlib(void);
+# endif
+#endif
+
+// Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled.
+#define USE_ITT_BUILD_ARG(x) ,x
+
+void __kmp_itt_initialize();
+void __kmp_itt_destroy();
+
+// -------------------------------------------------------------------------------------------------
+// New stuff for reporting high-level constructs.
+// -------------------------------------------------------------------------------------------------
+
+// Note the naming convention:
+//     __kmp_itt_xxxing() function should be called before action, while
+//     __kmp_itt_xxxed()  function should be called after action.
+
+// --- Parallel region reporting ---
+__kmp_inline void __kmp_itt_region_forking(  int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads.
+__kmp_inline void __kmp_itt_region_joined(   int gtid, int serialized = 0 ); // Master only, after joining threads.
+    // (*) Note: A thread may execute tasks after this point, though.
+
+// --- Frame reporting ---
+// region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel
+__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 );
+
+// --- Metadata reporting ---
+// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier
+__kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction );
+// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size
+__kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk );
+__kmp_inline void __kmp_itt_metadata_single( ident_t * loc );
+
+// --- Barrier reporting ---
+__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
+__kmp_inline void   __kmp_itt_barrier_starting( int gtid, void * object );
+__kmp_inline void   __kmp_itt_barrier_middle(   int gtid, void * object );
+__kmp_inline void   __kmp_itt_barrier_finished( int gtid, void * object );
+
+// --- Taskwait reporting ---
+__kmp_inline void * __kmp_itt_taskwait_object( int gtid );
+__kmp_inline void   __kmp_itt_taskwait_starting( int gtid, void * object );
+__kmp_inline void   __kmp_itt_taskwait_finished(   int gtid, void * object );
+
+// --- Task reporting ---
+__kmp_inline void   __kmp_itt_task_starting( void * object );
+__kmp_inline void   __kmp_itt_task_finished( void * object );
+
+// --- Lock reporting ---
+#if KMP_USE_DYNAMIC_LOCK
+__kmp_inline void   __kmp_itt_lock_creating(  kmp_user_lock_p lock, const ident_t * );
+#else
+__kmp_inline void   __kmp_itt_lock_creating(  kmp_user_lock_p lock );
+#endif
+__kmp_inline void   __kmp_itt_lock_acquiring( kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_lock_acquired(  kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_lock_releasing( kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_lock_cancelled( kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_lock_destroyed( kmp_user_lock_p lock );
+
+// --- Critical reporting ---
+#if KMP_USE_DYNAMIC_LOCK
+__kmp_inline void   __kmp_itt_critical_creating(  kmp_user_lock_p lock, const ident_t * );
+#else
+__kmp_inline void   __kmp_itt_critical_creating(  kmp_user_lock_p lock );
+#endif
+__kmp_inline void   __kmp_itt_critical_acquiring( kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_critical_acquired(  kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_critical_releasing( kmp_user_lock_p lock );
+__kmp_inline void   __kmp_itt_critical_destroyed( kmp_user_lock_p lock );
+
+// --- Single reporting ---
+__kmp_inline void   __kmp_itt_single_start( int gtid );
+__kmp_inline void   __kmp_itt_single_end(   int gtid );
+
+// --- Ordered reporting ---
+__kmp_inline void   __kmp_itt_ordered_init(  int gtid );
+__kmp_inline void   __kmp_itt_ordered_prep(  int gtid );
+__kmp_inline void   __kmp_itt_ordered_start( int gtid );
+__kmp_inline void   __kmp_itt_ordered_end(   int gtid );
+
+// --- Threads reporting ---
+__kmp_inline void  __kmp_itt_thread_ignore();
+__kmp_inline void  __kmp_itt_thread_name( int gtid );
+
+// --- System objects ---
+__kmp_inline void   __kmp_itt_system_object_created( void * object, char const * name );
+
+// --- Stack stitching ---
+__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
+__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
+__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
+__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
+
+// -------------------------------------------------------------------------------------------------
+// Old stuff for reporting low-level internal synchronization.
+// -------------------------------------------------------------------------------------------------
+
+#if USE_ITT_NOTIFY
+
+    /*
+     * Support for SSC marks, which are used by SDE
+     * http://software.intel.com/en-us/articles/intel-software-development-emulator
+     * to mark points in instruction traces that represent spin-loops and are
+     * therefore uninteresting when collecting traces for architecture simulation.
+     */
+    #ifndef INCLUDE_SSC_MARKS
+    # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
+    #endif
+
+    /* Linux 64 only for now */
+    #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
+    // Portable (at least for gcc and icc) code to insert the necessary instructions
+    // to set %ebx and execute the unlikely no-op.
+      #if defined( __INTEL_COMPILER )
+      # define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
+      #else
+      # define INSERT_SSC_MARK(tag)                                          \
+      __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx")
+      #endif
+    #else
+    # define INSERT_SSC_MARK(tag) ((void)0)
+    #endif
+
+    /* Markers for the start and end of regions that represent polling and
+     * are therefore uninteresting to architectural simulations 0x4376 and
+     * 0x4377 are arbitrary numbers that should be unique in the space of
+     * SSC tags, but there is no central issuing authority rather
+     * randomness is expected to work.
+     */
+    #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
+    #define SSC_MARK_SPIN_END()   INSERT_SSC_MARK(0x4377)
+
+    // Markers for architecture simulation.
+    // FORKING      : Before the master thread forks.
+    // JOINING      : At the start of the join.
+    // INVOKING     : Before the threads invoke microtasks.
+    // DISPATCH_INIT: At the start of dynamically scheduled loop.
+    // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
+    #define SSC_MARK_FORKING()          INSERT_SSC_MARK(0xd693)
+    #define SSC_MARK_JOINING()          INSERT_SSC_MARK(0xd694)
+    #define SSC_MARK_INVOKING()         INSERT_SSC_MARK(0xd695)
+    #define SSC_MARK_DISPATCH_INIT()    INSERT_SSC_MARK(0xd696)
+    #define SSC_MARK_DISPATCH_NEXT()    INSERT_SSC_MARK(0xd697)
+
+    // The object is an address that associates a specific set of the prepare, acquire, release,
+    // and cancel operations.
+
+    /* Sync prepare indicates a thread is going to start waiting for another thread
+       to send a release event.  This operation should be done just before the thread
+       begins checking for the existence of the release event */
+
+    /* Sync cancel indicates a thread is cancelling a wait on another thread anc
+       continuing execution without waiting for the other thread to release it */
+
+    /* Sync acquired indicates a thread has received a release event from another
+       thread and has stopped waiting.  This operation must occur only after the release
+       event is received. */
+
+    /* Sync release indicates a thread is going to send a release event to another thread
+       so it will stop waiting and continue execution. This operation must just happen before
+       the release event. */
+
+    #define KMP_FSYNC_PREPARE(   obj )  __itt_fsync_prepare(   (void *)( obj ) )
+    #define KMP_FSYNC_CANCEL(    obj )  __itt_fsync_cancel(    (void *)( obj ) )
+    #define KMP_FSYNC_ACQUIRED(  obj )  __itt_fsync_acquired(  (void *)( obj ) )
+    #define KMP_FSYNC_RELEASING( obj )  __itt_fsync_releasing( (void *)( obj ) )
+
+    /*
+        In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay
+        (and not called at all if waiting time is small). So, in spin loops, do not use
+        KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop),
+        KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED().
+        See KMP_WAIT_YIELD() for example.
+    */
+
+    #undef  KMP_FSYNC_SPIN_INIT
+    #define KMP_FSYNC_SPIN_INIT( obj, spin )    \
+        int sync_iters = 0;                     \
+        if ( __itt_fsync_prepare_ptr ) {        \
+            if ( obj == NULL ) {                \
+                obj = spin;                     \
+            } /* if */                          \
+        } /* if */                              \
+        SSC_MARK_SPIN_START()
+
+    #undef  KMP_FSYNC_SPIN_PREPARE
+    #define KMP_FSYNC_SPIN_PREPARE( obj ) do {                          \
+        if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \
+            ++ sync_iters;                                              \
+            if ( sync_iters >= __kmp_itt_prepare_delay ) {              \
+                KMP_FSYNC_PREPARE( (void*) obj );                       \
+            } /* if */                                                  \
+        } /* if */                                                      \
+     } while (0)
+    #undef  KMP_FSYNC_SPIN_ACQUIRED
+    #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do {         \
+        SSC_MARK_SPIN_END();                            \
+        if ( sync_iters >= __kmp_itt_prepare_delay ) {  \
+            KMP_FSYNC_ACQUIRED( (void*) obj );          \
+        } /* if */                                      \
+     } while (0)
+
+    /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
+           KMP_ITT_IGNORE(
+               ptr = malloc( size );
+           );
+    */
+    #define KMP_ITT_IGNORE( statement ) do {                            \
+            __itt_state_t __itt_state_;                                 \
+            if ( __itt_state_get_ptr ) {                                \
+                __itt_state_ = __itt_state_get();                       \
+                __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \
+            }  /* if */                                                 \
+            { statement }                                               \
+            if ( __itt_state_get_ptr ) {                                \
+                __itt_state_set( __itt_state_ );                        \
+            }  /* if */                                                 \
+    } while (0)
+
+    const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to
+                                           // different OpenMP regions in the user source code).
+    extern kmp_int32 __kmp_barrier_domain_count;
+    extern kmp_int32 __kmp_region_domain_count;
+    extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
+    extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
+    extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
+    extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
+    extern __itt_domain * metadata_domain;
+
+#else
+
+// Null definitions of the synchronization tracing functions.
+# define KMP_FSYNC_PREPARE(   obj )        ((void)0)
+# define KMP_FSYNC_CANCEL(    obj )        ((void)0)
+# define KMP_FSYNC_ACQUIRED(  obj )        ((void)0)
+# define KMP_FSYNC_RELEASING( obj )        ((void)0)
+
+# define KMP_FSYNC_SPIN_INIT( obj, spin )  ((void)0)
+# define KMP_FSYNC_SPIN_PREPARE(  obj )    ((void)0)
+# define KMP_FSYNC_SPIN_ACQUIRED( obj )    ((void)0)
+
+# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
+
+#endif // USE_ITT_NOTIFY
+
+#if ! KMP_DEBUG
+    // In release mode include definitions of inline functions.
+    #include "kmp_itt.inl"
+#endif
+
+#endif // KMP_ITT_H
+
+#else  /* USE_ITT_BUILD */
+
+// Null definitions of the synchronization tracing functions.
+// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
+// By defining these we avoid unpleasant ifdef tests in many places.
+# define KMP_FSYNC_PREPARE(   obj )        ((void)0)
+# define KMP_FSYNC_CANCEL(    obj )        ((void)0)
+# define KMP_FSYNC_ACQUIRED(  obj )        ((void)0)
+# define KMP_FSYNC_RELEASING( obj )        ((void)0)
+
+# define KMP_FSYNC_SPIN_INIT( obj, spin )  ((void)0)
+# define KMP_FSYNC_SPIN_PREPARE(  obj )    ((void)0)
+# define KMP_FSYNC_SPIN_ACQUIRED( obj )    ((void)0)
+
+# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
+
+# define USE_ITT_BUILD_ARG(x)
+
+#endif /* USE_ITT_BUILD */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_itt.inl b/contrib/libs/cxxsupp/openmp/kmp_itt.inl
index 625d879840..6dafa6c16e 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_itt.inl
+++ b/contrib/libs/cxxsupp/openmp/kmp_itt.inl
@@ -1,1130 +1,1130 @@
-#if USE_ITT_BUILD 
-/* 
- * kmp_itt.inl -- Inline functions of ITT Notify. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-// Inline function definitions. This file should be included into kmp_itt.h file for prodiction 
-// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce 
-// the number of files to recompile and save build time). 
- 
- 
-#include "kmp.h" 
-#include "kmp_str.h" 
- 
-#if KMP_ITT_DEBUG 
-    extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 
-    #define KMP_ITT_DEBUG_LOCK() {                                   \ 
-        __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock );      \ 
-    } 
-    #define KMP_ITT_DEBUG_PRINT( ... ) {                             \ 
-        fprintf( stderr, "#%02d: ", __kmp_get_gtid() );              \ 
-        fprintf( stderr, __VA_ARGS__ );                              \ 
-        fflush( stderr );                                            \ 
-        __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock );      \ 
-    } 
-#else 
-    #define KMP_ITT_DEBUG_LOCK() 
-    #define KMP_ITT_DEBUG_PRINT( ... ) 
-#endif // KMP_ITT_DEBUG 
- 
-// Ensure that the functions are static if they're supposed to be 
-// being inlined. Otherwise they cannot be used in more than one file, 
-// since there will be multiple definitions. 
-#if KMP_DEBUG 
-# define LINKAGE 
-#else 
-# define LINKAGE static inline 
-#endif 
- 
-// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this 
-// API to support user-defined synchronization primitives, but does not use ZCA; 
-// it would be safe to turn this off until wider support becomes available. 
-#if USE_ITT_ZCA 
-#ifdef __INTEL_COMPILER 
-#   if __INTEL_COMPILER >= 1200 
-#       undef __itt_sync_acquired 
-#       undef __itt_sync_releasing 
-#       define __itt_sync_acquired(addr)    __notify_zc_intrinsic((char *)"sync_acquired", addr) 
-#       define __itt_sync_releasing(addr)   __notify_intrinsic((char *)"sync_releasing", addr) 
-#   endif 
-#endif 
-#endif 
- 
-static kmp_bootstrap_lock_t  metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock ); 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Parallel region reporting. 
- 
-        * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of 
-          call does not matter, but it should be completed before any thread of this team calls 
-          __kmp_itt_region_starting. 
-        * __kmp_itt_region_starting should be called by each thread of a team just before entering 
-          parallel region body. 
-        * __kmp_itt_region_finished should be called by each thread of a team right after returning 
-          from parallel region body. 
-        * __kmp_itt_region_joined should be called by master thread of a team, after all threads 
-          called __kmp_itt_region_finished. 
- 
-    Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more 
-    user code -- such a thread can execute tasks. 
- 
-    Note: The overhead of logging region_starting and region_finished in each thread is too large, 
-    so these calls are not used. 
- 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) { 
-#if USE_ITT_NOTIFY 
-    kmp_team_t *      team = __kmp_team_from_gtid( gtid ); 
-    if (team->t.t_active_level + serialized > 1) 
-    { 
-        // The frame notifications are only supported for the outermost teams. 
-        return; 
-    } 
-    ident_t *         loc  = __kmp_thread_from_gtid( gtid )->th.th_ident; 
-    if (loc) { 
-        // Use the reserved_2 field to store the index to the region domain. 
-        // Assume that reserved_2 contains zero initially.  Since zero is special 
-        // value here, store the index into domain array increased by 1. 
-        if (loc->reserved_2 == 0) { 
-            if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 
-                int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value 
-                if (frm >= KMP_MAX_FRAME_DOMAINS) { 
-                    KMP_TEST_THEN_DEC32( & __kmp_region_domain_count );       // revert the count 
-                    return;                      // loc->reserved_2 is still 0 
-                } 
-                //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { 
-                //    frm = loc->reserved_2 - 1;   // get value saved by other thread for same loc 
-                //} // AC: this block is to replace next unsynchronized line 
- 
-                // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 
-                // field but put region index to the low two bytes and barrier indexes to the high 
-                // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. 
-                loc->reserved_2 |= (frm + 1);                                    // save "new" value 
- 
-                // Transform compiler-generated region location into the format 
-                // that the tools more or less standardized on: 
-                //                               "<func>$omp$parallel@[file:]<line>[:<col>]" 
-                const char * buff = NULL; 
-                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
-                buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", 
-                                        str_loc.func, team_size, str_loc.file, 
-                                        str_loc.line, str_loc.col); 
- 
-                __itt_suppress_push(__itt_suppress_memory_errors); 
-                __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); 
-                __itt_suppress_pop(); 
- 
-                __kmp_str_free( &buff ); 
-                if( barriers ) { 
-                    if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 
-                        int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value 
-                        if (frm >= KMP_MAX_FRAME_DOMAINS) { 
-                            KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count );       // revert the count 
-                            return;                      // loc->reserved_2 is still 0 
-                        } 
-                        const char * buff = NULL; 
-                        buff = __kmp_str_format("%s$omp$barrier@%s:%d", 
-                                                str_loc.func, str_loc.file, str_loc.col); 
-                        __itt_suppress_push(__itt_suppress_memory_errors); 
-                        __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); 
-                        __itt_suppress_pop(); 
-                        __kmp_str_free( &buff ); 
-                        // Save the barrier frame index to the high two bytes. 
-                        loc->reserved_2 |= (frm + 1) << 16; 
-                    } 
-                } 
-                __kmp_str_loc_free( &str_loc ); 
-                __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL); 
-            } 
-        } else { // Region domain exists for this location 
-            // Check if team size was changed. Then create new region domain for this location 
-            int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 
-            if( __kmp_itt_region_team_size[frm] != team_size ) { 
-                const char * buff = NULL; 
-                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
-                buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", 
-                                        str_loc.func, team_size, str_loc.file, 
-                                        str_loc.line, str_loc.col); 
- 
-                __itt_suppress_push(__itt_suppress_memory_errors); 
-                __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); 
-                __itt_suppress_pop(); 
- 
-                __kmp_str_free( &buff ); 
-                __kmp_str_loc_free( &str_loc ); 
-                __kmp_itt_region_team_size[frm] = team_size; 
-                __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 
-            } else { // Team size was not changed. Use existing domain. 
-                __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 
-            } 
-        } 
-        KMP_ITT_DEBUG_LOCK(); 
-        KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n", 
-                         gtid, loc->reserved_2, serialized, loc ); 
-    } 
-#endif 
-} // __kmp_itt_region_forking 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) { 
-#if USE_ITT_NOTIFY 
-    if( region ) { 
-        kmp_team_t *      team = __kmp_team_from_gtid( gtid ); 
-        int serialized = ( region == 2 ? 1 : 0 ); 
-        if (team->t.t_active_level + serialized > 1) 
-        { 
-            // The frame notifications are only supported for the outermost teams. 
-            return; 
-        } 
-         //Check region domain has not been created before. It's index is saved in the low two bytes. 
-         if ((loc->reserved_2 & 0x0000FFFF) == 0) { 
-             if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 
-                 int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value 
-                 if (frm >= KMP_MAX_FRAME_DOMAINS) { 
-                     KMP_TEST_THEN_DEC32( & __kmp_region_domain_count );       // revert the count 
-                     return;                      // loc->reserved_2 is still 0 
-                 } 
- 
-                 // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 
-                 // field but put region index to the low two bytes and barrier indexes to the high 
-                 // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. 
-                 loc->reserved_2 |= (frm + 1);                                 // save "new" value 
- 
-                 // Transform compiler-generated region location into the format 
-                 // that the tools more or less standardized on: 
-                 //                               "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 
-                 const char * buff = NULL; 
-                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
-                 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", 
-                                         str_loc.func, team_size, str_loc.file, 
-                                         str_loc.line, str_loc.col); 
- 
-                 __itt_suppress_push(__itt_suppress_memory_errors); 
-                 __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); 
-                 __itt_suppress_pop(); 
- 
-                 __kmp_str_free( &buff ); 
-                 __kmp_str_loc_free( &str_loc ); 
-                 __kmp_itt_region_team_size[frm] = team_size; 
-                 __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); 
-             } 
-         } else { // Region domain exists for this location 
-             // Check if team size was changed. Then create new region domain for this location 
-             int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 
-             if( __kmp_itt_region_team_size[frm] != team_size ) { 
-                 const char * buff = NULL; 
-                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
-                 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", 
-                                         str_loc.func, team_size, str_loc.file, 
-                                         str_loc.line, str_loc.col); 
- 
-                 __itt_suppress_push(__itt_suppress_memory_errors); 
-                 __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); 
-                 __itt_suppress_pop(); 
- 
-                 __kmp_str_free( &buff ); 
-                 __kmp_str_loc_free( &str_loc ); 
-                 __kmp_itt_region_team_size[frm] = team_size; 
-                 __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); 
-             } else { // Team size was not changed. Use existing domain. 
-                 __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); 
-             } 
-         } 
-         KMP_ITT_DEBUG_LOCK(); 
-         KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", 
-                          gtid, loc->reserved_2, region, loc, begin, end ); 
-         return; 
-    } else { // called for barrier reporting 
-        if (loc) { 
-            if ((loc->reserved_2 & 0xFFFF0000) == 0) { 
-                if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 
-                    int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value 
-                    if (frm >= KMP_MAX_FRAME_DOMAINS) { 
-                        KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count );       // revert the count 
-                        return;                      // loc->reserved_2 is still 0 
-                    } 
-                    // Save the barrier frame index to the high two bytes. 
-                    loc->reserved_2 |= (frm + 1) << 16;                          // save "new" value 
- 
-                    // Transform compiler-generated region location into the format 
-                    // that the tools more or less standardized on: 
-                    //                               "<func>$omp$frame@[file:]<line>[:<col>]" 
-                    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
-                    if( imbalance ) { 
-                        const char * buff_imb = NULL; 
-                        buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", 
-                                                str_loc.func, team_size, str_loc.file, str_loc.col); 
-                        __itt_suppress_push(__itt_suppress_memory_errors); 
-                        __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb ); 
-                        __itt_suppress_pop(); 
-                        __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end ); 
-                        __kmp_str_free( &buff_imb ); 
-                    } else { 
-                        const char * buff = NULL; 
-                        buff = __kmp_str_format("%s$omp$barrier@%s:%d", 
-                                                str_loc.func, str_loc.file, str_loc.col); 
-                        __itt_suppress_push(__itt_suppress_memory_errors); 
-                        __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); 
-                        __itt_suppress_pop(); 
-                        __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end ); 
-                        __kmp_str_free( &buff ); 
-                    } 
-                    __kmp_str_loc_free( &str_loc ); 
-                } 
-            } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS 
-                if( imbalance ) { 
-                    __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end ); 
-                } else { 
-                    __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end ); 
-                } 
-            } 
-            KMP_ITT_DEBUG_LOCK(); 
-            KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", 
-                             gtid, loc->reserved_2, loc, begin, end ); 
-        } 
-    } 
-#endif 
-} // __kmp_itt_frame_submit 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) { 
-#if USE_ITT_NOTIFY 
-    if( metadata_domain == NULL) { 
-        __kmp_acquire_bootstrap_lock( & metadata_lock ); 
-        if( metadata_domain == NULL) { 
-            __itt_suppress_push(__itt_suppress_memory_errors); 
-            metadata_domain = __itt_domain_create( "OMP Metadata" ); 
-            __itt_suppress_pop(); 
-        } 
-        __kmp_release_bootstrap_lock( & metadata_lock ); 
-    } 
- 
-    __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance"); 
- 
-    kmp_uint64 imbalance_data[ 4 ]; 
-    imbalance_data[ 0 ] = begin; 
-    imbalance_data[ 1 ] = end; 
-    imbalance_data[ 2 ] = imbalance; 
-    imbalance_data[ 3 ] = reduction; 
- 
-    __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data); 
-#endif 
-} // __kmp_itt_metadata_imbalance 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) { 
-#if USE_ITT_NOTIFY 
-    if( metadata_domain == NULL) { 
-        __kmp_acquire_bootstrap_lock( & metadata_lock ); 
-        if( metadata_domain == NULL) { 
-            __itt_suppress_push(__itt_suppress_memory_errors); 
-            metadata_domain = __itt_domain_create( "OMP Metadata" ); 
-            __itt_suppress_pop(); 
-        } 
-        __kmp_release_bootstrap_lock( & metadata_lock ); 
-    } 
- 
-    __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop"); 
-    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
- 
-    kmp_uint64 loop_data[ 5 ]; 
-    loop_data[ 0 ] = str_loc.line; 
-    loop_data[ 1 ] = str_loc.col; 
-    loop_data[ 2 ] = sched_type; 
-    loop_data[ 3 ] = iterations; 
-    loop_data[ 4 ] = chunk; 
- 
-    __kmp_str_loc_free( &str_loc ); 
- 
-    __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data); 
-#endif 
-} // __kmp_itt_metadata_loop 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_metadata_single( ident_t * loc ) { 
-#if USE_ITT_NOTIFY 
-    if( metadata_domain == NULL) { 
-        __kmp_acquire_bootstrap_lock( & metadata_lock ); 
-        if( metadata_domain == NULL) { 
-            __itt_suppress_push(__itt_suppress_memory_errors); 
-            metadata_domain = __itt_domain_create( "OMP Metadata" ); 
-            __itt_suppress_pop(); 
-        } 
-        __kmp_release_bootstrap_lock( & metadata_lock ); 
-    } 
- 
-    __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single"); 
-    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); 
-    kmp_uint64 single_data[ 2 ]; 
-    single_data[ 0 ] = str_loc.line; 
-    single_data[ 1 ] = str_loc.col; 
- 
-    __kmp_str_loc_free( &str_loc ); 
- 
-    __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data); 
-#endif 
-} // __kmp_itt_metadata_single 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_region_starting( int gtid ) { 
-#if USE_ITT_NOTIFY 
-#endif 
-} // __kmp_itt_region_starting 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_region_finished( int gtid ) { 
-#if USE_ITT_NOTIFY 
-#endif 
-} // __kmp_itt_region_finished 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-LINKAGE void 
-__kmp_itt_region_joined( int gtid, int serialized ) { 
-#if USE_ITT_NOTIFY 
-    kmp_team_t *      team = __kmp_team_from_gtid( gtid ); 
-    if (team->t.t_active_level + serialized > 1) 
-    { 
-        // The frame notifications are only supported for the outermost teams. 
-        return; 
-    } 
-    ident_t *         loc  = __kmp_thread_from_gtid( gtid )->th.th_ident; 
-    if (loc && loc->reserved_2) 
-    { 
-        int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 
-        if(frm < KMP_MAX_FRAME_DOMAINS) { 
-            KMP_ITT_DEBUG_LOCK(); 
-            __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); 
-            KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n", 
-                         gtid, loc->reserved_2, serialized, loc ); 
-        } 
-    } 
-#endif 
-} // __kmp_itt_region_joined 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Barriers reporting. 
- 
-    A barrier consists of two phases: 
- 
-        1. Gather -- master waits for arriving of all the worker threads; each worker thread 
-           registers arrival and goes further. 
-        2. Release -- each worker threads waits until master lets it go; master lets worker threads 
-           go. 
- 
-    Function should be called by each thread: 
- 
-        * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 
-        * __kmp_itt_barrier_middle()   -- between gather and release phases. 
-        * __kmp_itt_barrier_finished() -- after release phase. 
- 
-    Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save 
-    result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather 
-    phase) would return itt sync object for the next barrier! 
- 
-    ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have 
-    barrier object or barrier data structure. Barrier is just a counter in team and thread 
-    structures. We could use an address of team structure as an barrier sync object, but ITT wants 
-    different objects for different barriers (even whithin the same team). So let us use 
-    team address as barrier sync object for the first barrier, then increase it by one for the next 
-    barrier, and so on (but wrap it not to use addresses outside of team structure). 
- 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-void * 
-__kmp_itt_barrier_object( 
-    int  gtid, 
-    int  bt, 
-    int  set_name, 
-    int  delta    // 0 (current barrier) is default value; specify -1 to get previous barrier. 
-) { 
-    void * object = NULL; 
-#if USE_ITT_NOTIFY 
-    kmp_info_t *    thr  = __kmp_thread_from_gtid( gtid ); 
-    kmp_team_t *    team = thr->th.th_team; 
- 
-    // NOTE: 
-    // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if" 
-    // helps to avoid crash. However, this is not complete solution, and reporting fork/join 
-    // barriers to ITT should be revisited. 
- 
-    if ( team != NULL ) { 
- 
-        // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived 
-        // by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 
-        kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 
-        // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of 
-        // different types do not have the same ids. 
-        KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier ); 
-            // This conditon is a must (we would have zero divide otherwise). 
-        KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier ); 
-            // More strong condition: make sure we have room at least for for two differtent ids 
-            // (for each barrier type). 
-        object = 
-            reinterpret_cast< void * >( 
-                kmp_uintptr_t( team ) 
-                    + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier 
-                    + bt 
-            ); 
-        KMP_ITT_DEBUG_LOCK(); 
-        KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object ); 
- 
-        if ( set_name ) { 
-            ident_t const * loc  = NULL; 
-            char const *    src  = NULL; 
-            char const *    type = "OMP Barrier"; 
-            switch ( bt ) { 
-                case bs_plain_barrier : { 
-                    // For plain barrier compiler calls __kmpc_barrier() function, which saves 
-                    // location in thr->th.th_ident. 
-                    loc = thr->th.th_ident; 
-                    // Get the barrier type from flags provided by compiler. 
-                    kmp_int32   expl = 0; 
-                    kmp_uint32  impl = 0; 
-                    if ( loc != NULL ) { 
-                        src  = loc->psource; 
-                        expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0; 
-                        impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0; 
-                    }; // if 
-                    if ( impl ) { 
-                        switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) { 
-                            case KMP_IDENT_BARRIER_IMPL_FOR : { 
-                                type = "OMP For Barrier"; 
-                            } break; 
-                            case KMP_IDENT_BARRIER_IMPL_SECTIONS : { 
-                                type = "OMP Sections Barrier"; 
-                            } break; 
-                            case KMP_IDENT_BARRIER_IMPL_SINGLE : { 
-                                type = "OMP Single Barrier"; 
-                            } break; 
-                            case KMP_IDENT_BARRIER_IMPL_WORKSHARE : { 
-                                type = "OMP Workshare Barrier"; 
-                            } break; 
-                            default : { 
-                                type = "OMP Implicit Barrier"; 
-                                KMP_DEBUG_ASSERT( 0 ); 
-                            }; 
-                        }; /* switch */ 
-                    } else if ( expl ) { 
-                        type = "OMP Explicit Barrier"; 
-                    }; /* if */ 
-                } break; 
-                case bs_forkjoin_barrier : { 
-                    // In case of fork/join barrier we can read thr->th.th_ident, because it 
-                    // contains location of last passed construct (while join barrier is not 
-                    // such one). Use th_ident of master thread instead -- __kmp_join_call() 
-                    // called by the master thread saves location. 
-                    // 
-                    // AC: cannot read from master because __kmp_join_call may be not called 
-                    //    yet, so we read the location from team. This is the same location. 
-                    //    And team is valid at the enter to join barrier where this happens. 
-                    loc  = team->t.t_ident; 
-                    if ( loc != NULL ) { 
-                        src  = loc->psource; 
-                    }; // if 
-                    type = "OMP Join Barrier"; 
-                } break; 
-            }; // switch 
-            KMP_ITT_DEBUG_LOCK(); 
-            __itt_sync_create( object, type, src, __itt_attr_barrier ); 
-            KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src ); 
-        }; // if 
- 
-    }; // if 
-#endif 
-    return object; 
-} // __kmp_itt_barrier_object 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-void 
-__kmp_itt_barrier_starting( int gtid, void * object ) { 
-#if USE_ITT_NOTIFY 
-    if ( !KMP_MASTER_GTID( gtid ) ) { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_releasing( object ); 
-        KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object ); 
-    }; // if 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_prepare( object ); 
-    KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object ); 
-#endif 
-} // __kmp_itt_barrier_starting 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-void 
-__kmp_itt_barrier_middle( int gtid, void * object ) { 
-#if USE_ITT_NOTIFY 
-    if ( KMP_MASTER_GTID( gtid ) ) { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_acquired( object ); 
-        KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object ); 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_releasing( object ); 
-        KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object ); 
-    } else { 
-    }; // if 
-#endif 
-} // __kmp_itt_barrier_middle 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-void 
-__kmp_itt_barrier_finished( int gtid, void * object ) { 
-#if USE_ITT_NOTIFY 
-    if ( KMP_MASTER_GTID( gtid ) ) { 
-    } else { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_acquired( object ); 
-        KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object ); 
-    }; // if 
-#endif 
-} // __kmp_itt_barrier_finished 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Taskwait reporting. 
- 
-    ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait 
-    structure, so we need to construct something. 
- 
-*/ 
- 
-void * 
-__kmp_itt_taskwait_object( int gtid ) { 
-    void * object = NULL; 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        kmp_info_t *     thread   = __kmp_thread_from_gtid( gtid ); 
-        kmp_taskdata_t * taskdata = thread -> th.th_current_task; 
-        object = 
-            reinterpret_cast< void * >( 
-                kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t ) 
-            ); 
-    }; // if 
-#endif 
-    return object; 
-} // __kmp_itt_taskwait_object 
- 
-void 
-__kmp_itt_taskwait_starting( 
-    int     gtid, 
-    void *  object 
-) { 
-#if USE_ITT_NOTIFY 
-    kmp_info_t *     thread   = __kmp_thread_from_gtid( gtid ); 
-    kmp_taskdata_t * taskdata = thread -> th.th_current_task; 
-    ident_t const *  loc      = taskdata->td_taskwait_ident; 
-    char const *     src      = ( loc == NULL? NULL : loc->psource ); 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_create( object, "OMP Taskwait", src, 0 ); 
-    KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src ); 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_prepare( object ); 
-    KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object ); 
-#endif 
-} // __kmp_itt_taskwait_starting 
- 
-void 
-__kmp_itt_taskwait_finished( 
-    int     gtid, 
-    void *  object 
-) { 
-#if USE_ITT_NOTIFY 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_acquired( object ); 
-    KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object ); 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_destroy( object ); 
-    KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object ); 
-#endif 
-} // __kmp_itt_taskwait_finished 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Task reporting. 
- 
-    Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait). 
-    Synch object passed to the function must be barrier of taskwait the threads waiting at. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-void 
-__kmp_itt_task_starting( 
-    void * object     // ITT sync object: barrier or taskwait. 
-) { 
-#if USE_ITT_NOTIFY 
-    if ( object != NULL ) { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_cancel( object ); 
-        KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object ); 
-    }; // if 
-#endif 
-} // __kmp_itt_task_starting 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-void 
-__kmp_itt_task_finished( 
-    void * object     // ITT sync object: barrier or taskwait. 
-) { 
-#if USE_ITT_NOTIFY 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_prepare( object ); 
-    KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object ); 
-#endif 
-} // __kmp_itt_task_finished 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Lock reporting. 
- 
-        * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation 
-          (set/unset). It is not a real event shown to the user but just setting a name for 
-          synchronization object. `lock' is an address of sync object, the same address should be 
-          used in all subsequent calls. 
- 
-        * __kmp_itt_lock_acquiring() should be called before setting the lock. 
- 
-        * __kmp_itt_lock_acquired() should be called after setting the lock. 
- 
-        * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 
- 
-        * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock. 
- 
-        * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After 
-          __kmp_itt_lock_destroyed() all the references to the same address will be considered 
-          as another sync object, not related with the original one. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-// Takes location information directly 
-__kmp_inline 
-void 
-___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        char const *    src = ( loc == NULL ? NULL : loc->psource ); 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_create( lock, type, src, 0 ); 
-        KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); 
-    } 
-#endif 
-} 
-#else // KMP_USE_DYNAMIC_LOCK 
-// Internal guts -- common code for locks and critical sections, do not call directly. 
-__kmp_inline 
-void 
-___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        ident_t const * loc = NULL; 
-        if ( __kmp_get_user_lock_location_ != NULL ) 
-            loc = __kmp_get_user_lock_location_( (lock) ); 
-        char const *    src = ( loc == NULL ? NULL : loc->psource ); 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_sync_create( lock, type, src, 0 ); 
-        KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); 
-    }; // if 
-#endif 
-} // ___kmp_itt_lock_init 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-// Internal guts -- common code for locks and critical sections, do not call directly. 
-__kmp_inline 
-void 
-___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) { 
-#if USE_ITT_NOTIFY 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_destroy( lock ); 
-    KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock ); 
-#endif 
-} // ___kmp_itt_lock_fini 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-void 
-__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { 
-    ___kmp_itt_lock_init( lock, "OMP Lock", loc ); 
-} 
-#else 
-void 
-__kmp_itt_lock_creating( kmp_user_lock_p lock ) { 
-    ___kmp_itt_lock_init( lock, "OMP Lock" ); 
-} // __kmp_itt_lock_creating 
-#endif 
- 
-void 
-__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { 
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 
-    // postpone lock object access 
-    if ( __itt_sync_prepare_ptr ) { 
-        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { 
-            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 
-            __itt_sync_prepare( ilk->lock ); 
-        } else { 
-            __itt_sync_prepare( lock ); 
-        } 
-    } 
-#else 
-    __itt_sync_prepare( lock ); 
-#endif 
-} // __kmp_itt_lock_acquiring 
- 
-void 
-__kmp_itt_lock_acquired( kmp_user_lock_p lock ) { 
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 
-    // postpone lock object access 
-    if ( __itt_sync_acquired_ptr ) { 
-        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { 
-            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 
-            __itt_sync_acquired( ilk->lock ); 
-        } else { 
-            __itt_sync_acquired( lock ); 
-        } 
-    } 
-#else 
-    __itt_sync_acquired( lock ); 
-#endif 
-} // __kmp_itt_lock_acquired 
- 
-void 
-__kmp_itt_lock_releasing( kmp_user_lock_p lock ) { 
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 
-    if ( __itt_sync_releasing_ptr ) { 
-        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { 
-            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 
-            __itt_sync_releasing( ilk->lock ); 
-        } else { 
-            __itt_sync_releasing( lock ); 
-        } 
-    } 
-#else 
-    __itt_sync_releasing( lock ); 
-#endif 
-} // __kmp_itt_lock_releasing 
- 
-void 
-__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { 
-#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 
-    if ( __itt_sync_cancel_ptr ) { 
-        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { 
-            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 
-            __itt_sync_cancel( ilk->lock ); 
-        } else { 
-            __itt_sync_cancel( lock ); 
-        } 
-    } 
-#else 
-    __itt_sync_cancel( lock ); 
-#endif 
-} // __kmp_itt_lock_cancelled 
- 
-void 
-__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) { 
-    ___kmp_itt_lock_fini( lock, "OMP Lock" ); 
-} // __kmp_itt_lock_destroyed 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Critical reporting. 
- 
-    Critical sections are treated exactly as locks (but have different object type). 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
-#if KMP_USE_DYNAMIC_LOCK 
-void 
-__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { 
-    ___kmp_itt_lock_init( lock, "OMP Critical", loc); 
-} 
-#else 
-void 
-__kmp_itt_critical_creating( kmp_user_lock_p lock ) { 
-    ___kmp_itt_lock_init( lock, "OMP Critical" ); 
-} // __kmp_itt_critical_creating 
-#endif 
- 
-void 
-__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { 
-    __itt_sync_prepare( lock ); 
-} // __kmp_itt_critical_acquiring 
- 
-void 
-__kmp_itt_critical_acquired( kmp_user_lock_p lock ) { 
-    __itt_sync_acquired( lock ); 
-} // __kmp_itt_critical_acquired 
- 
-void 
-__kmp_itt_critical_releasing( kmp_user_lock_p lock ) { 
-    __itt_sync_releasing( lock ); 
-} // __kmp_itt_critical_releasing 
- 
-void 
-__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { 
-    ___kmp_itt_lock_fini( lock, "OMP Critical" ); 
-} // __kmp_itt_critical_destroyed 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Single reporting. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-void 
-__kmp_itt_single_start( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { 
-        kmp_info_t *   thr = __kmp_thread_from_gtid( (gtid) ); 
-        ident_t *      loc = thr->th.th_ident; 
-        char const *   src = ( loc == NULL ? NULL : loc->psource ); 
-        kmp_str_buf_t  name; 
-        __kmp_str_buf_init( & name ); 
-        __kmp_str_buf_print( & name, "OMP Single-%s", src ); 
-        KMP_ITT_DEBUG_LOCK(); 
-        thr->th.th_itt_mark_single = __itt_mark_create( name.str ); 
-        KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single ); 
-        __kmp_str_buf_free( & name ); 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_mark( thr->th.th_itt_mark_single, NULL ); 
-        KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single ); 
-    }; // if 
-#endif 
-} // __kmp_itt_single_start 
- 
-void 
-__kmp_itt_single_end( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    __itt_mark_type  mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single; 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_mark_off( mark ); 
-    KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark ); 
-#endif 
-} // __kmp_itt_single_end 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Ordered reporting. 
- 
-    __kmp_itt_ordered_init is called by each thread *before* first using sync 
-    object. ITT team would like it to be called once, but it requires extra synchronization. 
- 
-    __kmp_itt_ordered_prep is called when thread is going to enter ordered section 
-    (before synchronization). 
- 
-    __kmp_itt_ordered_start is called just before entering user code (after 
-    synchronization). 
- 
-    __kmp_itt_ordered_end is called after returning from user code. 
- 
-    Sync object is th->th.th_dispatch->th_dispatch_sh_current. 
- 
-    Events are not generated in case of serialized team. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-void 
-__kmp_itt_ordered_init( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        kmp_info_t *   thr   = __kmp_thread_from_gtid( gtid ); 
-        ident_t const * loc  = thr->th.th_ident; 
-        char const *    src  = ( loc == NULL ? NULL : loc->psource ); 
-        __itt_sync_create( 
-            thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0 
-        ); 
-    }; // if 
-#endif 
-} // __kmp_itt_ordered_init 
- 
-void 
-__kmp_itt_ordered_prep( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        kmp_team_t * t = __kmp_team_from_gtid( gtid ); 
-        if ( ! t->t.t_serialized ) { 
-            kmp_info_t * th = __kmp_thread_from_gtid( gtid ); 
-            __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current ); 
-        }; // if 
-    }; // if 
-#endif 
-} // __kmp_itt_ordered_prep 
- 
-void 
-__kmp_itt_ordered_start( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        kmp_team_t * t = __kmp_team_from_gtid( gtid ); 
-        if ( ! t->t.t_serialized ) { 
-            kmp_info_t * th = __kmp_thread_from_gtid( gtid ); 
-            __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current ); 
-        }; // if 
-    }; // if 
-#endif 
-} // __kmp_itt_ordered_start 
- 
-void 
-__kmp_itt_ordered_end( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_sync_create_ptr ) { 
-        kmp_team_t * t = __kmp_team_from_gtid( gtid ); 
-        if ( ! t->t.t_serialized ) { 
-            kmp_info_t * th = __kmp_thread_from_gtid( gtid ); 
-            __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current ); 
-        }; // if 
-    }; // if 
-#endif 
-} // __kmp_itt_ordered_end 
- 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Threads reporting. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-void 
-__kmp_itt_thread_ignore() { 
-    __itt_thr_ignore(); 
-} // __kmp_itt_thread_ignore 
- 
-void 
-__kmp_itt_thread_name( int gtid ) { 
-#if USE_ITT_NOTIFY 
-    if ( __itt_thr_name_set_ptr ) { 
-        kmp_str_buf_t name; 
-        __kmp_str_buf_init( & name ); 
-        if( KMP_MASTER_GTID(gtid) ) { 
-            __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid ); 
-        } else { 
-            __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid ); 
-        } 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_thr_name_set( name.str, name.used ); 
-        KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str ); 
-        __kmp_str_buf_free( & name ); 
-    }; // if 
-#endif 
-} // __kmp_itt_thread_name 
- 
- 
-/* 
-    -------------------------------------------------------------------------- 
-    System object reporting. 
- 
-    ITT catches operations with system sync objects (like Windows* OS on IA-32 
-    architecture API critical sections and events). We only need to specify 
-    name ("OMP Scheduler") for the object to let ITT know it is an object used 
-    by OpenMP RTL for internal purposes. 
-    -------------------------------------------------------------------------- 
-*/ 
- 
-void 
-__kmp_itt_system_object_created( void * object, char const * name ) { 
-#if USE_ITT_NOTIFY 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_sync_create( object, "OMP Scheduler", name, 0 ); 
-   KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name ); 
-#endif 
-} // __kmp_itt_system_object_created 
- 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    Stack stitching api. 
- 
-    Master calls "create" and put the stitching id into team structure. 
-    Workers read the stitching id and call "enter" / "leave" api. 
-    Master calls "destroy" at the end of the parallel region. 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-__itt_caller 
-__kmp_itt_stack_caller_create() 
-{ 
-#if USE_ITT_NOTIFY 
-    if ( !__itt_stack_caller_create_ptr ) 
-        return NULL; 
-    KMP_ITT_DEBUG_LOCK(); 
-    __itt_caller id = __itt_stack_caller_create(); 
-    KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id ); 
-    return id; 
-#endif 
-    return NULL; 
-} 
- 
-void 
-__kmp_itt_stack_caller_destroy( __itt_caller id ) 
-{ 
-#if USE_ITT_NOTIFY 
-    if ( __itt_stack_caller_destroy_ptr ) { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_stack_caller_destroy( id ); 
-        KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id ); 
-    } 
-#endif 
-} 
- 
-void 
-__kmp_itt_stack_callee_enter( __itt_caller id ) 
-{ 
-#if USE_ITT_NOTIFY 
-    if ( __itt_stack_callee_enter_ptr ) { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_stack_callee_enter( id ); 
-        KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id ); 
-    } 
-#endif 
-} 
- 
-void 
-__kmp_itt_stack_callee_leave( __itt_caller id ) 
-{ 
-#if USE_ITT_NOTIFY 
-    if ( __itt_stack_callee_leave_ptr ) { 
-        KMP_ITT_DEBUG_LOCK(); 
-        __itt_stack_callee_leave( id ); 
-        KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id ); 
-    } 
-#endif 
-} 
- 
-#endif /* USE_ITT_BUILD */ 
+#if USE_ITT_BUILD
+/*
+ * kmp_itt.inl -- Inline functions of ITT Notify.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Inline function definitions. This file should be included into kmp_itt.h file for prodiction
+// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce
+// the number of files to recompile and save build time).
+
+
+#include "kmp.h"
+#include "kmp_str.h"
+
+#if KMP_ITT_DEBUG
+    extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
+    #define KMP_ITT_DEBUG_LOCK() {                                   \
+        __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock );      \
+    }
+    #define KMP_ITT_DEBUG_PRINT( ... ) {                             \
+        fprintf( stderr, "#%02d: ", __kmp_get_gtid() );              \
+        fprintf( stderr, __VA_ARGS__ );                              \
+        fflush( stderr );                                            \
+        __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock );      \
+    }
+#else
+    #define KMP_ITT_DEBUG_LOCK()
+    #define KMP_ITT_DEBUG_PRINT( ... )
+#endif // KMP_ITT_DEBUG
+
+// Ensure that the functions are static if they're supposed to be
+// being inlined. Otherwise they cannot be used in more than one file,
+// since there will be multiple definitions.
+#if KMP_DEBUG
+# define LINKAGE
+#else
+# define LINKAGE static inline
+#endif
+
+// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this
+// API to support user-defined synchronization primitives, but does not use ZCA;
+// it would be safe to turn this off until wider support becomes available.
+#if USE_ITT_ZCA
+#ifdef __INTEL_COMPILER
+#   if __INTEL_COMPILER >= 1200
+#       undef __itt_sync_acquired
+#       undef __itt_sync_releasing
+#       define __itt_sync_acquired(addr)    __notify_zc_intrinsic((char *)"sync_acquired", addr)
+#       define __itt_sync_releasing(addr)   __notify_intrinsic((char *)"sync_releasing", addr)
+#   endif
+#endif
+#endif
+
+static kmp_bootstrap_lock_t  metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock );
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Parallel region reporting.
+
+        * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of
+          call does not matter, but it should be completed before any thread of this team calls
+          __kmp_itt_region_starting.
+        * __kmp_itt_region_starting should be called by each thread of a team just before entering
+          parallel region body.
+        * __kmp_itt_region_finished should be called by each thread of a team right after returning
+          from parallel region body.
+        * __kmp_itt_region_joined should be called by master thread of a team, after all threads
+          called __kmp_itt_region_finished.
+
+    Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more
+    user code -- such a thread can execute tasks.
+
+    Note: The overhead of logging region_starting and region_finished in each thread is too large,
+    so these calls are not used.
+
+    ------------------------------------------------------------------------------------------------
+*/
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized ) {
+#if USE_ITT_NOTIFY
+    kmp_team_t *      team = __kmp_team_from_gtid( gtid );
+    if (team->t.t_active_level + serialized > 1)
+    {
+        // The frame notifications are only supported for the outermost teams.
+        return;
+    }
+    ident_t *         loc  = __kmp_thread_from_gtid( gtid )->th.th_ident;
+    if (loc) {
+        // Use the reserved_2 field to store the index to the region domain.
+        // Assume that reserved_2 contains zero initially.  Since zero is special
+        // value here, store the index into domain array increased by 1.
+        if (loc->reserved_2 == 0) {
+            if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
+                int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value
+                if (frm >= KMP_MAX_FRAME_DOMAINS) {
+                    KMP_TEST_THEN_DEC32( & __kmp_region_domain_count );       // revert the count
+                    return;                      // loc->reserved_2 is still 0
+                }
+                //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
+                //    frm = loc->reserved_2 - 1;   // get value saved by other thread for same loc
+                //} // AC: this block is to replace next unsynchronized line
+
+                // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2
+                // field but put region index to the low two bytes and barrier indexes to the high
+                // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512.
+                loc->reserved_2 |= (frm + 1);                                    // save "new" value
+
+                // Transform compiler-generated region location into the format
+                // that the tools more or less standardized on:
+                //                               "<func>$omp$parallel@[file:]<line>[:<col>]"
+                const char * buff = NULL;
+                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+                buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+                                        str_loc.func, team_size, str_loc.file,
+                                        str_loc.line, str_loc.col);
+
+                __itt_suppress_push(__itt_suppress_memory_errors);
+                __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+                __itt_suppress_pop();
+
+                __kmp_str_free( &buff );
+                if( barriers ) {
+                    if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
+                        int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value
+                        if (frm >= KMP_MAX_FRAME_DOMAINS) {
+                            KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count );       // revert the count
+                            return;                      // loc->reserved_2 is still 0
+                        }
+                        const char * buff = NULL;
+                        buff = __kmp_str_format("%s$omp$barrier@%s:%d",
+                                                str_loc.func, str_loc.file, str_loc.col);
+                        __itt_suppress_push(__itt_suppress_memory_errors);
+                        __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff );
+                        __itt_suppress_pop();
+                        __kmp_str_free( &buff );
+                        // Save the barrier frame index to the high two bytes.
+                        loc->reserved_2 |= (frm + 1) << 16;
+                    }
+                }
+                __kmp_str_loc_free( &str_loc );
+                __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL);
+            }
+        } else { // Region domain exists for this location
+            // Check if team size was changed. Then create new region domain for this location
+            int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+            if( __kmp_itt_region_team_size[frm] != team_size ) {
+                const char * buff = NULL;
+                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+                buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+                                        str_loc.func, team_size, str_loc.file,
+                                        str_loc.line, str_loc.col);
+
+                __itt_suppress_push(__itt_suppress_memory_errors);
+                __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+                __itt_suppress_pop();
+
+                __kmp_str_free( &buff );
+                __kmp_str_loc_free( &str_loc );
+                __kmp_itt_region_team_size[frm] = team_size;
+                __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+            } else { // Team size was not changed. Use existing domain.
+                __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
+            }
+        }
+        KMP_ITT_DEBUG_LOCK();
+        KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, serialized:%d, loc:%p\n",
+                         gtid, loc->reserved_2, serialized, loc );
+    }
+#endif
+} // __kmp_itt_region_forking
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) {
+#if USE_ITT_NOTIFY
+    if( region ) {
+        kmp_team_t *      team = __kmp_team_from_gtid( gtid );
+        int serialized = ( region == 2 ? 1 : 0 );
+        if (team->t.t_active_level + serialized > 1)
+        {
+            // The frame notifications are only supported for the outermost teams.
+            return;
+        }
+         //Check region domain has not been created before. It's index is saved in the low two bytes.
+         if ((loc->reserved_2 & 0x0000FFFF) == 0) {
+             if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
+                 int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value
+                 if (frm >= KMP_MAX_FRAME_DOMAINS) {
+                     KMP_TEST_THEN_DEC32( & __kmp_region_domain_count );       // revert the count
+                     return;                      // loc->reserved_2 is still 0
+                 }
+
+                 // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2
+                 // field but put region index to the low two bytes and barrier indexes to the high
+                 // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512.
+                 loc->reserved_2 |= (frm + 1);                                 // save "new" value
+
+                 // Transform compiler-generated region location into the format
+                 // that the tools more or less standardized on:
+                 //                               "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
+                 const char * buff = NULL;
+                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+                 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+                                         str_loc.func, team_size, str_loc.file,
+                                         str_loc.line, str_loc.col);
+
+                 __itt_suppress_push(__itt_suppress_memory_errors);
+                 __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+                 __itt_suppress_pop();
+
+                 __kmp_str_free( &buff );
+                 __kmp_str_loc_free( &str_loc );
+                 __kmp_itt_region_team_size[frm] = team_size;
+                 __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
+             }
+         } else { // Region domain exists for this location
+             // Check if team size was changed. Then create new region domain for this location
+             int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+             if( __kmp_itt_region_team_size[frm] != team_size ) {
+                 const char * buff = NULL;
+                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+                 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d",
+                                         str_loc.func, team_size, str_loc.file,
+                                         str_loc.line, str_loc.col);
+
+                 __itt_suppress_push(__itt_suppress_memory_errors);
+                 __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff );
+                 __itt_suppress_pop();
+
+                 __kmp_str_free( &buff );
+                 __kmp_str_loc_free( &str_loc );
+                 __kmp_itt_region_team_size[frm] = team_size;
+                 __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
+             } else { // Team size was not changed. Use existing domain.
+                 __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end );
+             }
+         }
+         KMP_ITT_DEBUG_LOCK();
+         KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
+                          gtid, loc->reserved_2, region, loc, begin, end );
+         return;
+    } else { // called for barrier reporting
+        if (loc) {
+            if ((loc->reserved_2 & 0xFFFF0000) == 0) {
+                if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
+                    int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value
+                    if (frm >= KMP_MAX_FRAME_DOMAINS) {
+                        KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count );       // revert the count
+                        return;                      // loc->reserved_2 is still 0
+                    }
+                    // Save the barrier frame index to the high two bytes.
+                    loc->reserved_2 |= (frm + 1) << 16;                          // save "new" value
+
+                    // Transform compiler-generated region location into the format
+                    // that the tools more or less standardized on:
+                    //                               "<func>$omp$frame@[file:]<line>[:<col>]"
+                    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+                    if( imbalance ) {
+                        const char * buff_imb = NULL;
+                        buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
+                                                str_loc.func, team_size, str_loc.file, str_loc.col);
+                        __itt_suppress_push(__itt_suppress_memory_errors);
+                        __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb );
+                        __itt_suppress_pop();
+                        __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end );
+                        __kmp_str_free( &buff_imb );
+                    } else {
+                        const char * buff = NULL;
+                        buff = __kmp_str_format("%s$omp$barrier@%s:%d",
+                                                str_loc.func, str_loc.file, str_loc.col);
+                        __itt_suppress_push(__itt_suppress_memory_errors);
+                        __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff );
+                        __itt_suppress_pop();
+                        __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end );
+                        __kmp_str_free( &buff );
+                    }
+                    __kmp_str_loc_free( &str_loc );
+                }
+            } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
+                if( imbalance ) {
+                    __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end );
+                } else {
+                    __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end );
+                }
+            }
+            KMP_ITT_DEBUG_LOCK();
+            KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n",
+                             gtid, loc->reserved_2, loc, begin, end );
+        }
+    }
+#endif
+} // __kmp_itt_frame_submit
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) {
+#if USE_ITT_NOTIFY
+    if( metadata_domain == NULL) {
+        __kmp_acquire_bootstrap_lock( & metadata_lock );
+        if( metadata_domain == NULL) {
+            __itt_suppress_push(__itt_suppress_memory_errors);
+            metadata_domain = __itt_domain_create( "OMP Metadata" );
+            __itt_suppress_pop();
+        }
+        __kmp_release_bootstrap_lock( & metadata_lock );
+    }
+
+    __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_imbalance");
+
+    kmp_uint64 imbalance_data[ 4 ];
+    imbalance_data[ 0 ] = begin;
+    imbalance_data[ 1 ] = end;
+    imbalance_data[ 2 ] = imbalance;
+    imbalance_data[ 3 ] = reduction;
+
+    __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 4, imbalance_data);
+#endif
+} // __kmp_itt_metadata_imbalance
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) {
+#if USE_ITT_NOTIFY
+    if( metadata_domain == NULL) {
+        __kmp_acquire_bootstrap_lock( & metadata_lock );
+        if( metadata_domain == NULL) {
+            __itt_suppress_push(__itt_suppress_memory_errors);
+            metadata_domain = __itt_domain_create( "OMP Metadata" );
+            __itt_suppress_pop();
+        }
+        __kmp_release_bootstrap_lock( & metadata_lock );
+    }
+
+    __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_loop");
+    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+
+    kmp_uint64 loop_data[ 5 ];
+    loop_data[ 0 ] = str_loc.line;
+    loop_data[ 1 ] = str_loc.col;
+    loop_data[ 2 ] = sched_type;
+    loop_data[ 3 ] = iterations;
+    loop_data[ 4 ] = chunk;
+
+    __kmp_str_loc_free( &str_loc );
+
+    __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 5, loop_data);
+#endif
+} // __kmp_itt_metadata_loop
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_metadata_single( ident_t * loc ) {
+#if USE_ITT_NOTIFY
+    if( metadata_domain == NULL) {
+        __kmp_acquire_bootstrap_lock( & metadata_lock );
+        if( metadata_domain == NULL) {
+            __itt_suppress_push(__itt_suppress_memory_errors);
+            metadata_domain = __itt_domain_create( "OMP Metadata" );
+            __itt_suppress_pop();
+        }
+        __kmp_release_bootstrap_lock( & metadata_lock );
+    }
+
+    __itt_string_handle * string_handle = __itt_string_handle_create( "omp_metadata_single");
+    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+    kmp_uint64 single_data[ 2 ];
+    single_data[ 0 ] = str_loc.line;
+    single_data[ 1 ] = str_loc.col;
+
+    __kmp_str_loc_free( &str_loc );
+
+    __itt_metadata_add(metadata_domain, __itt_null, string_handle, __itt_metadata_u64, 2, single_data);
+#endif
+} // __kmp_itt_metadata_single
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_starting( int gtid ) {
+#if USE_ITT_NOTIFY
+#endif
+} // __kmp_itt_region_starting
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_finished( int gtid ) {
+#if USE_ITT_NOTIFY
+#endif
+} // __kmp_itt_region_finished
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
+__kmp_itt_region_joined( int gtid, int serialized ) {
+#if USE_ITT_NOTIFY
+    kmp_team_t *      team = __kmp_team_from_gtid( gtid );
+    if (team->t.t_active_level + serialized > 1)
+    {
+        // The frame notifications are only supported for the outermost teams.
+        return;
+    }
+    ident_t *         loc  = __kmp_thread_from_gtid( gtid )->th.th_ident;
+    if (loc && loc->reserved_2)
+    {
+        int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
+        if(frm < KMP_MAX_FRAME_DOMAINS) {
+            KMP_ITT_DEBUG_LOCK();
+            __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
+            KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, serialized:%d, loc:%p\n",
+                         gtid, loc->reserved_2, serialized, loc );
+        }
+    }
+#endif
+} // __kmp_itt_region_joined
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Barriers reporting.
+
+    A barrier consists of two phases:
+
+        1. Gather -- master waits for arriving of all the worker threads; each worker thread
+           registers arrival and goes further.
+        2. Release -- each worker threads waits until master lets it go; master lets worker threads
+           go.
+
+    Function should be called by each thread:
+
+        * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
+        * __kmp_itt_barrier_middle()   -- between gather and release phases.
+        * __kmp_itt_barrier_finished() -- after release phase.
+
+    Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save
+    result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather
+    phase) would return itt sync object for the next barrier!
+
+    ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have
+    barrier object or barrier data structure. Barrier is just a counter in team and thread
+    structures. We could use an address of team structure as an barrier sync object, but ITT wants
+    different objects for different barriers (even whithin the same team). So let us use
+    team address as barrier sync object for the first barrier, then increase it by one for the next
+    barrier, and so on (but wrap it not to use addresses outside of team structure).
+
+    ------------------------------------------------------------------------------------------------
+*/
+
+void *
+__kmp_itt_barrier_object(
+    int  gtid,
+    int  bt,
+    int  set_name,
+    int  delta    // 0 (current barrier) is default value; specify -1 to get previous barrier.
+) {
+    void * object = NULL;
+#if USE_ITT_NOTIFY
+    kmp_info_t *    thr  = __kmp_thread_from_gtid( gtid );
+    kmp_team_t *    team = thr->th.th_team;
+
+    // NOTE:
+    // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if"
+    // helps to avoid crash. However, this is not complete solution, and reporting fork/join
+    // barriers to ITT should be revisited.
+
+    if ( team != NULL ) {
+
+        // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived
+        // by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
+        kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
+        // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of
+        // different types do not have the same ids.
+        KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier );
+            // This conditon is a must (we would have zero divide otherwise).
+        KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier );
+            // More strong condition: make sure we have room at least for for two differtent ids
+            // (for each barrier type).
+        object =
+            reinterpret_cast< void * >(
+                kmp_uintptr_t( team )
+                    + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier
+                    + bt
+            );
+        KMP_ITT_DEBUG_LOCK();
+        KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object );
+
+        if ( set_name ) {
+            ident_t const * loc  = NULL;
+            char const *    src  = NULL;
+            char const *    type = "OMP Barrier";
+            switch ( bt ) {
+                case bs_plain_barrier : {
+                    // For plain barrier compiler calls __kmpc_barrier() function, which saves
+                    // location in thr->th.th_ident.
+                    loc = thr->th.th_ident;
+                    // Get the barrier type from flags provided by compiler.
+                    kmp_int32   expl = 0;
+                    kmp_uint32  impl = 0;
+                    if ( loc != NULL ) {
+                        src  = loc->psource;
+                        expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0;
+                        impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0;
+                    }; // if
+                    if ( impl ) {
+                        switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) {
+                            case KMP_IDENT_BARRIER_IMPL_FOR : {
+                                type = "OMP For Barrier";
+                            } break;
+                            case KMP_IDENT_BARRIER_IMPL_SECTIONS : {
+                                type = "OMP Sections Barrier";
+                            } break;
+                            case KMP_IDENT_BARRIER_IMPL_SINGLE : {
+                                type = "OMP Single Barrier";
+                            } break;
+                            case KMP_IDENT_BARRIER_IMPL_WORKSHARE : {
+                                type = "OMP Workshare Barrier";
+                            } break;
+                            default : {
+                                type = "OMP Implicit Barrier";
+                                KMP_DEBUG_ASSERT( 0 );
+                            };
+                        }; /* switch */
+                    } else if ( expl ) {
+                        type = "OMP Explicit Barrier";
+                    }; /* if */
+                } break;
+                case bs_forkjoin_barrier : {
+                    // In case of fork/join barrier we can read thr->th.th_ident, because it
+                    // contains location of last passed construct (while join barrier is not
+                    // such one). Use th_ident of master thread instead -- __kmp_join_call()
+                    // called by the master thread saves location.
+                    //
+                    // AC: cannot read from master because __kmp_join_call may be not called
+                    //    yet, so we read the location from team. This is the same location.
+                    //    And team is valid at the enter to join barrier where this happens.
+                    loc  = team->t.t_ident;
+                    if ( loc != NULL ) {
+                        src  = loc->psource;
+                    }; // if
+                    type = "OMP Join Barrier";
+                } break;
+            }; // switch
+            KMP_ITT_DEBUG_LOCK();
+            __itt_sync_create( object, type, src, __itt_attr_barrier );
+            KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src );
+        }; // if
+
+    }; // if
+#endif
+    return object;
+} // __kmp_itt_barrier_object
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_barrier_starting( int gtid, void * object ) {
+#if USE_ITT_NOTIFY
+    if ( !KMP_MASTER_GTID( gtid ) ) {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_releasing( object );
+        KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object );
+    }; // if
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_prepare( object );
+    KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object );
+#endif
+} // __kmp_itt_barrier_starting
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_barrier_middle( int gtid, void * object ) {
+#if USE_ITT_NOTIFY
+    if ( KMP_MASTER_GTID( gtid ) ) {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_acquired( object );
+        KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object );
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_releasing( object );
+        KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object );
+    } else {
+    }; // if
+#endif
+} // __kmp_itt_barrier_middle
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_barrier_finished( int gtid, void * object ) {
+#if USE_ITT_NOTIFY
+    if ( KMP_MASTER_GTID( gtid ) ) {
+    } else {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_acquired( object );
+        KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object );
+    }; // if
+#endif
+} // __kmp_itt_barrier_finished
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Taskwait reporting.
+
+    ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait
+    structure, so we need to construct something.
+
+*/
+
+void *
+__kmp_itt_taskwait_object( int gtid ) {
+    void * object = NULL;
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        kmp_info_t *     thread   = __kmp_thread_from_gtid( gtid );
+        kmp_taskdata_t * taskdata = thread -> th.th_current_task;
+        object =
+            reinterpret_cast< void * >(
+                kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t )
+            );
+    }; // if
+#endif
+    return object;
+} // __kmp_itt_taskwait_object
+
+void
+__kmp_itt_taskwait_starting(
+    int     gtid,
+    void *  object
+) {
+#if USE_ITT_NOTIFY
+    kmp_info_t *     thread   = __kmp_thread_from_gtid( gtid );
+    kmp_taskdata_t * taskdata = thread -> th.th_current_task;
+    ident_t const *  loc      = taskdata->td_taskwait_ident;
+    char const *     src      = ( loc == NULL? NULL : loc->psource );
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_create( object, "OMP Taskwait", src, 0 );
+    KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src );
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_prepare( object );
+    KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object );
+#endif
+} // __kmp_itt_taskwait_starting
+
+void
+__kmp_itt_taskwait_finished(
+    int     gtid,
+    void *  object
+) {
+#if USE_ITT_NOTIFY
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_acquired( object );
+    KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object );
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_destroy( object );
+    KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object );
+#endif
+} // __kmp_itt_taskwait_finished
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Task reporting.
+
+    Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait).
+    Synch object passed to the function must be barrier of taskwait the threads waiting at.
+    ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_task_starting(
+    void * object     // ITT sync object: barrier or taskwait.
+) {
+#if USE_ITT_NOTIFY
+    if ( object != NULL ) {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_cancel( object );
+        KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object );
+    }; // if
+#endif
+} // __kmp_itt_task_starting
+
+// -------------------------------------------------------------------------------------------------
+
+void
+__kmp_itt_task_finished(
+    void * object     // ITT sync object: barrier or taskwait.
+) {
+#if USE_ITT_NOTIFY
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_prepare( object );
+    KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object );
+#endif
+} // __kmp_itt_task_finished
+
+// -------------------------------------------------------------------------------------------------
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Lock reporting.
+
+        * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation
+          (set/unset). It is not a real event shown to the user but just setting a name for
+          synchronization object. `lock' is an address of sync object, the same address should be
+          used in all subsequent calls.
+
+        * __kmp_itt_lock_acquiring() should be called before setting the lock.
+
+        * __kmp_itt_lock_acquired() should be called after setting the lock.
+
+        * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
+
+        * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock.
+
+        * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After
+          __kmp_itt_lock_destroyed() all the references to the same address will be considered
+          as another sync object, not related with the original one.
+    ------------------------------------------------------------------------------------------------
+*/
+
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_USE_DYNAMIC_LOCK
+// Takes location information directly
+__kmp_inline
+void
+___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        char const *    src = ( loc == NULL ? NULL : loc->psource );
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_create( lock, type, src, 0 );
+        KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
+    }
+#endif
+}
+#else // KMP_USE_DYNAMIC_LOCK
+// Internal guts -- common code for locks and critical sections, do not call directly.
+__kmp_inline
+void
+___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        ident_t const * loc = NULL;
+        if ( __kmp_get_user_lock_location_ != NULL )
+            loc = __kmp_get_user_lock_location_( (lock) );
+        char const *    src = ( loc == NULL ? NULL : loc->psource );
+        KMP_ITT_DEBUG_LOCK();
+        __itt_sync_create( lock, type, src, 0 );
+        KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
+    }; // if
+#endif
+} // ___kmp_itt_lock_init
+#endif // KMP_USE_DYNAMIC_LOCK
+
+// Internal guts -- common code for locks and critical sections, do not call directly.
+__kmp_inline
+void
+___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) {
+#if USE_ITT_NOTIFY
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_destroy( lock );
+    KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock );
+#endif
+} // ___kmp_itt_lock_fini
+
+
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_USE_DYNAMIC_LOCK
+void
+__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) {
+    ___kmp_itt_lock_init( lock, "OMP Lock", loc );
+}
+#else
+void
+__kmp_itt_lock_creating( kmp_user_lock_p lock ) {
+    ___kmp_itt_lock_init( lock, "OMP Lock" );
+} // __kmp_itt_lock_creating
+#endif
+
+void
+__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+    // postpone lock object access
+    if ( __itt_sync_prepare_ptr ) {
+        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+            __itt_sync_prepare( ilk->lock );
+        } else {
+            __itt_sync_prepare( lock );
+        }
+    }
+#else
+    __itt_sync_prepare( lock );
+#endif
+} // __kmp_itt_lock_acquiring
+
+void
+__kmp_itt_lock_acquired( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+    // postpone lock object access
+    if ( __itt_sync_acquired_ptr ) {
+        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+            __itt_sync_acquired( ilk->lock );
+        } else {
+            __itt_sync_acquired( lock );
+        }
+    }
+#else
+    __itt_sync_acquired( lock );
+#endif
+} // __kmp_itt_lock_acquired
+
+void
+__kmp_itt_lock_releasing( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+    if ( __itt_sync_releasing_ptr ) {
+        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+            __itt_sync_releasing( ilk->lock );
+        } else {
+            __itt_sync_releasing( lock );
+        }
+    }
+#else
+    __itt_sync_releasing( lock );
+#endif
+} // __kmp_itt_lock_releasing
+
+void
+__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) {
+#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
+    if ( __itt_sync_cancel_ptr ) {
+        if ( KMP_EXTRACT_D_TAG(lock) == 0 ) {
+            kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
+            __itt_sync_cancel( ilk->lock );
+        } else {
+            __itt_sync_cancel( lock );
+        }
+    }
+#else
+    __itt_sync_cancel( lock );
+#endif
+} // __kmp_itt_lock_cancelled
+
+void
+__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) {
+    ___kmp_itt_lock_fini( lock, "OMP Lock" );
+} // __kmp_itt_lock_destroyed
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Critical reporting.
+
+    Critical sections are treated exactly as locks (but have different object type).
+    ------------------------------------------------------------------------------------------------
+*/
+#if KMP_USE_DYNAMIC_LOCK
+void
+__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) {
+    ___kmp_itt_lock_init( lock, "OMP Critical", loc);
+}
+#else
+void
+__kmp_itt_critical_creating( kmp_user_lock_p lock ) {
+    ___kmp_itt_lock_init( lock, "OMP Critical" );
+} // __kmp_itt_critical_creating
+#endif
+
+void
+__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) {
+    __itt_sync_prepare( lock );
+} // __kmp_itt_critical_acquiring
+
+void
+__kmp_itt_critical_acquired( kmp_user_lock_p lock ) {
+    __itt_sync_acquired( lock );
+} // __kmp_itt_critical_acquired
+
+void
+__kmp_itt_critical_releasing( kmp_user_lock_p lock ) {
+    __itt_sync_releasing( lock );
+} // __kmp_itt_critical_releasing
+
+void
+__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) {
+    ___kmp_itt_lock_fini( lock, "OMP Critical" );
+} // __kmp_itt_critical_destroyed
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Single reporting.
+    ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_single_start( int gtid ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) {
+        kmp_info_t *   thr = __kmp_thread_from_gtid( (gtid) );
+        ident_t *      loc = thr->th.th_ident;
+        char const *   src = ( loc == NULL ? NULL : loc->psource );
+        kmp_str_buf_t  name;
+        __kmp_str_buf_init( & name );
+        __kmp_str_buf_print( & name, "OMP Single-%s", src );
+        KMP_ITT_DEBUG_LOCK();
+        thr->th.th_itt_mark_single = __itt_mark_create( name.str );
+        KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single );
+        __kmp_str_buf_free( & name );
+        KMP_ITT_DEBUG_LOCK();
+        __itt_mark( thr->th.th_itt_mark_single, NULL );
+        KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single );
+    }; // if
+#endif
+} // __kmp_itt_single_start
+
+void
+__kmp_itt_single_end( int gtid ) {
+#if USE_ITT_NOTIFY
+    __itt_mark_type  mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single;
+    KMP_ITT_DEBUG_LOCK();
+    __itt_mark_off( mark );
+    KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark );
+#endif
+} // __kmp_itt_single_end
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Ordered reporting.
+
+    __kmp_itt_ordered_init is called by each thread *before* first using sync
+    object. ITT team would like it to be called once, but it requires extra synchronization.
+
+    __kmp_itt_ordered_prep is called when thread is going to enter ordered section
+    (before synchronization).
+
+    __kmp_itt_ordered_start is called just before entering user code (after
+    synchronization).
+
+    __kmp_itt_ordered_end is called after returning from user code.
+
+    Sync object is th->th.th_dispatch->th_dispatch_sh_current.
+
+    Events are not generated in case of serialized team.
+    ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_ordered_init( int gtid ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        kmp_info_t *   thr   = __kmp_thread_from_gtid( gtid );
+        ident_t const * loc  = thr->th.th_ident;
+        char const *    src  = ( loc == NULL ? NULL : loc->psource );
+        __itt_sync_create(
+            thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0
+        );
+    }; // if
+#endif
+} // __kmp_itt_ordered_init
+
+void
+__kmp_itt_ordered_prep( int gtid ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        kmp_team_t * t = __kmp_team_from_gtid( gtid );
+        if ( ! t->t.t_serialized ) {
+            kmp_info_t * th = __kmp_thread_from_gtid( gtid );
+            __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current );
+        }; // if
+    }; // if
+#endif
+} // __kmp_itt_ordered_prep
+
+void
+__kmp_itt_ordered_start( int gtid ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        kmp_team_t * t = __kmp_team_from_gtid( gtid );
+        if ( ! t->t.t_serialized ) {
+            kmp_info_t * th = __kmp_thread_from_gtid( gtid );
+            __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current );
+        }; // if
+    }; // if
+#endif
+} // __kmp_itt_ordered_start
+
+void
+__kmp_itt_ordered_end( int gtid ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_sync_create_ptr ) {
+        kmp_team_t * t = __kmp_team_from_gtid( gtid );
+        if ( ! t->t.t_serialized ) {
+            kmp_info_t * th = __kmp_thread_from_gtid( gtid );
+            __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current );
+        }; // if
+    }; // if
+#endif
+} // __kmp_itt_ordered_end
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Threads reporting.
+    ------------------------------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_thread_ignore() {
+    __itt_thr_ignore();
+} // __kmp_itt_thread_ignore
+
+void
+__kmp_itt_thread_name( int gtid ) {
+#if USE_ITT_NOTIFY
+    if ( __itt_thr_name_set_ptr ) {
+        kmp_str_buf_t name;
+        __kmp_str_buf_init( & name );
+        if( KMP_MASTER_GTID(gtid) ) {
+            __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid );
+        } else {
+            __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid );
+        }
+        KMP_ITT_DEBUG_LOCK();
+        __itt_thr_name_set( name.str, name.used );
+        KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str );
+        __kmp_str_buf_free( & name );
+    }; // if
+#endif
+} // __kmp_itt_thread_name
+
+
+/*
+    --------------------------------------------------------------------------
+    System object reporting.
+
+    ITT catches operations with system sync objects (like Windows* OS on IA-32
+    architecture API critical sections and events). We only need to specify
+    name ("OMP Scheduler") for the object to let ITT know it is an object used
+    by OpenMP RTL for internal purposes.
+    --------------------------------------------------------------------------
+*/
+
+void
+__kmp_itt_system_object_created( void * object, char const * name ) {
+#if USE_ITT_NOTIFY
+    KMP_ITT_DEBUG_LOCK();
+    __itt_sync_create( object, "OMP Scheduler", name, 0 );
+   KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name );
+#endif
+} // __kmp_itt_system_object_created
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    Stack stitching api.
+
+    Master calls "create" and put the stitching id into team structure.
+    Workers read the stitching id and call "enter" / "leave" api.
+    Master calls "destroy" at the end of the parallel region.
+    ------------------------------------------------------------------------------------------------
+*/
+
+__itt_caller
+__kmp_itt_stack_caller_create()
+{
+#if USE_ITT_NOTIFY
+    if ( !__itt_stack_caller_create_ptr )
+        return NULL;
+    KMP_ITT_DEBUG_LOCK();
+    __itt_caller id = __itt_stack_caller_create();
+    KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id );
+    return id;
+#endif
+    return NULL;
+}
+
+void
+__kmp_itt_stack_caller_destroy( __itt_caller id )
+{
+#if USE_ITT_NOTIFY
+    if ( __itt_stack_caller_destroy_ptr ) {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_stack_caller_destroy( id );
+        KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id );
+    }
+#endif
+}
+
+void
+__kmp_itt_stack_callee_enter( __itt_caller id )
+{
+#if USE_ITT_NOTIFY
+    if ( __itt_stack_callee_enter_ptr ) {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_stack_callee_enter( id );
+        KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id );
+    }
+#endif
+}
+
+void
+__kmp_itt_stack_callee_leave( __itt_caller id )
+{
+#if USE_ITT_NOTIFY
+    if ( __itt_stack_callee_leave_ptr ) {
+        KMP_ITT_DEBUG_LOCK();
+        __itt_stack_callee_leave( id );
+        KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id );
+    }
+#endif
+}
+
+#endif /* USE_ITT_BUILD */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_lock.cpp b/contrib/libs/cxxsupp/openmp/kmp_lock.cpp
index ec884f6a51..becf7eddf6 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_lock.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_lock.cpp
@@ -1,4207 +1,4207 @@
-/* 
- * kmp_lock.cpp -- lock-related functions 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include <stddef.h> 
- 
-#include "kmp.h" 
-#include "kmp_itt.h" 
-#include "kmp_i18n.h" 
-#include "kmp_lock.h" 
-#include "kmp_io.h" 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-# include <unistd.h> 
-# include <sys/syscall.h> 
-// We should really include <futex.h>, but that causes compatibility problems on different 
-// Linux* OS distributions that either require that you include (or break when you try to include) 
-// <pci/types.h>. 
-// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 
-// we just define the constants here and don't include <futex.h> 
-# ifndef FUTEX_WAIT 
-#  define FUTEX_WAIT    0 
-# endif 
-# ifndef FUTEX_WAKE 
-#  define FUTEX_WAKE    1 
-# endif 
-#endif 
- 
-/* Implement spin locks for internal library use.             */ 
-/* The algorithm implemented is Lamport's bakery lock [1974]. */ 
- 
-void 
-__kmp_validate_locks( void ) 
-{ 
-    int i; 
-    kmp_uint32  x, y; 
- 
-    /* Check to make sure unsigned arithmetic does wraps properly */ 
-    x = ~((kmp_uint32) 0) - 2; 
-    y = x - 2; 
- 
-    for (i = 0; i < 8; ++i, ++x, ++y) { 
-        kmp_uint32 z = (x - y); 
-        KMP_ASSERT( z == 2 ); 
-    } 
- 
-    KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* test and set locks */ 
- 
-// 
-// For the non-nested locks, we can only assume that the first 4 bytes were 
-// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 
-// compiler only allocates a 4 byte pointer on IA-32 architecture.  On 
-// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 
-// 
-// gcc reserves >= 8 bytes for nested locks, so we can assume that the 
-// entire 8 bytes were allocated for nested locks on all 64-bit platforms. 
-// 
- 
-static kmp_int32 
-__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 
-{ 
-    return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 
-} 
- 
-static inline bool 
-__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 
-{ 
-    return lck->lk.depth_locked != -1; 
-} 
- 
-__forceinline static int 
-__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_MB(); 
- 
-#ifdef USE_LOCK_PROFILE 
-    kmp_uint32 curr = TCR_4( lck->lk.poll ); 
-    if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 
-        __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 
-    /* else __kmp_printf( "." );*/ 
-#endif /* USE_LOCK_PROFILE */ 
- 
-    if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 
-      && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 
-        KMP_FSYNC_ACQUIRED(lck); 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
- 
-    kmp_uint32 spins; 
-    KMP_FSYNC_PREPARE( lck ); 
-    KMP_INIT_YIELD( spins ); 
-    if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 
-      __kmp_xproc ) ) { 
-        KMP_YIELD( TRUE ); 
-    } 
-    else { 
-        KMP_YIELD_SPIN( spins ); 
-    } 
- 
-    while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || 
-      ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { 
-        // 
-        // FIXME - use exponential backoff here 
-        // 
-        if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 
-          __kmp_xproc ) ) { 
-            KMP_YIELD( TRUE ); 
-        } 
-        else { 
-            KMP_YIELD_SPIN( spins ); 
-        } 
-    } 
-    KMP_FSYNC_ACQUIRED( lck ); 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    return __kmp_acquire_tas_lock_timed_template( lck, gtid ); 
-} 
- 
-static int 
-__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_lock"; 
-    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 
-        KMP_FATAL( LockIsAlreadyOwned, func ); 
-    } 
-    return __kmp_acquire_tas_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 
-      && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 
-        KMP_FSYNC_ACQUIRED( lck ); 
-        return TRUE; 
-    } 
-    return FALSE; 
-} 
- 
-static int 
-__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_lock"; 
-    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    return __kmp_test_tas_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KMP_FSYNC_RELEASING(lck); 
-    KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 
-      __kmp_xproc ) ); 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 
-      && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_tas_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_tas_lock( kmp_tas_lock_t * lck ) 
-{ 
-    TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); 
-} 
- 
-static void 
-__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 
-{ 
-    __kmp_init_tas_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 
-{ 
-    lck->lk.poll = 0; 
-} 
- 
-static void 
-__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_lock"; 
-    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_tas_lock( lck ); 
-} 
- 
- 
-// 
-// nested test and set locks 
-// 
- 
-int 
-__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 
-        lck->lk.depth_locked += 1; 
-        return KMP_LOCK_ACQUIRED_NEXT; 
-    } 
-    else { 
-        __kmp_acquire_tas_lock_timed_template( lck, gtid ); 
-        lck->lk.depth_locked = 1; 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
-} 
- 
-static int 
-__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_nest_lock"; 
-    if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_acquire_nested_tas_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    int retval; 
- 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 
-        retval = ++lck->lk.depth_locked; 
-    } 
-    else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 
-        retval = 0; 
-    } 
-    else { 
-        KMP_MB(); 
-        retval = lck->lk.depth_locked = 1; 
-    } 
-    return retval; 
-} 
- 
-static int 
-__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_nest_lock"; 
-    if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_test_nested_tas_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    KMP_MB(); 
-    if ( --(lck->lk.depth_locked) == 0 ) { 
-        __kmp_release_tas_lock( lck, gtid ); 
-        return KMP_LOCK_RELEASED; 
-    } 
-    return KMP_LOCK_STILL_HELD; 
-} 
- 
-static int 
-__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_nest_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_nested_tas_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 
-{ 
-    __kmp_init_tas_lock( lck ); 
-    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 
-} 
- 
-static void 
-__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 
-{ 
-    __kmp_init_nested_tas_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 
-{ 
-    __kmp_destroy_tas_lock( lck ); 
-    lck->lk.depth_locked = 0; 
-} 
- 
-static void 
-__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_nest_lock"; 
-    if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_nested_tas_lock( lck ); 
-} 
- 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
-/* ------------------------------------------------------------------------ */ 
-/* futex locks */ 
- 
-// futex locks are really just test and set locks, with a different method 
-// of handling contention.  They take the same amount of space as test and 
-// set locks, and are allocated the same way (i.e. use the area allocated by 
-// the compiler for non-nested locks / allocate nested locks on the heap). 
- 
-static kmp_int32 
-__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 
-{ 
-    return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 
-} 
- 
-static inline bool 
-__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 
-{ 
-    return lck->lk.depth_locked != -1; 
-} 
- 
-__forceinline static int 
-__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    kmp_int32 gtid_code = ( gtid + 1 ) << 1; 
- 
-    KMP_MB(); 
- 
-#ifdef USE_LOCK_PROFILE 
-    kmp_uint32 curr = TCR_4( lck->lk.poll ); 
-    if ( ( curr != 0 ) && ( curr != gtid_code ) ) 
-        __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 
-    /* else __kmp_printf( "." );*/ 
-#endif /* USE_LOCK_PROFILE */ 
- 
-    KMP_FSYNC_PREPARE( lck ); 
-    KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 
-      lck, lck->lk.poll, gtid ) ); 
- 
-    kmp_int32 poll_val; 
- 
-    while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), 
-             KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { 
- 
-        kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 
-        KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 
-           lck, gtid, poll_val, cond ) ); 
- 
-        // 
-        // NOTE: if you try to use the following condition for this branch 
-        // 
-        // if ( poll_val & 1 == 0 ) 
-        // 
-        // Then the 12.0 compiler has a bug where the following block will 
-        // always be skipped, regardless of the value of the LSB of poll_val. 
-        // 
-        if ( ! cond ) { 
-            // 
-            // Try to set the lsb in the poll to indicate to the owner 
-            // thread that they need to wake this thread up. 
-            // 
-            if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { 
-                KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 
-                  lck, lck->lk.poll, gtid ) ); 
-                continue; 
-            } 
-            poll_val |= KMP_LOCK_BUSY(1, futex); 
- 
-            KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 
-              lck, lck->lk.poll, gtid ) ); 
-        } 
- 
-        KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 
-           lck, gtid, poll_val ) ); 
- 
-        kmp_int32 rc; 
-        if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 
-          poll_val, NULL, NULL, 0 ) ) != 0 ) { 
-            KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 
-               lck, gtid, poll_val, rc, errno ) ); 
-            continue; 
-        } 
- 
-        KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 
-           lck, gtid, poll_val ) ); 
-        // 
-        // This thread has now done a successful futex wait call and was 
-        // entered on the OS futex queue.  We must now perform a futex 
-        // wake call when releasing the lock, as we have no idea how many 
-        // other threads are in the queue. 
-        // 
-        gtid_code |= 1; 
-    } 
- 
-    KMP_FSYNC_ACQUIRED( lck ); 
-    KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 
-      lck, lck->lk.poll, gtid ) ); 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-   return __kmp_acquire_futex_lock_timed_template( lck, gtid ); 
-} 
- 
-static int 
-__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_lock"; 
-    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 
-        KMP_FATAL( LockIsAlreadyOwned, func ); 
-    } 
-    return __kmp_acquire_futex_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) { 
-        KMP_FSYNC_ACQUIRED( lck ); 
-        return TRUE; 
-    } 
-    return FALSE; 
-} 
- 
-static int 
-__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_lock"; 
-    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    return __kmp_test_futex_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 
-      lck, lck->lk.poll, gtid ) ); 
- 
-    KMP_FSYNC_RELEASING(lck); 
- 
-    kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); 
- 
-    KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 
-       lck, gtid, poll_val ) ); 
- 
-    if ( KMP_LOCK_STRIP(poll_val) & 1 ) { 
-        KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 
-           lck, gtid ) ); 
-        syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 
-    } 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 
-      lck, lck->lk.poll, gtid ) ); 
- 
-    KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 
-      __kmp_xproc ) ); 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 
-      && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_futex_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_futex_lock( kmp_futex_lock_t * lck ) 
-{ 
-    TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); 
-} 
- 
-static void 
-__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 
-{ 
-    __kmp_init_futex_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 
-{ 
-    lck->lk.poll = 0; 
-} 
- 
-static void 
-__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_lock"; 
-    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 
-      && __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_futex_lock( lck ); 
-} 
- 
- 
-// 
-// nested futex locks 
-// 
- 
-int 
-__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 
-        lck->lk.depth_locked += 1; 
-        return KMP_LOCK_ACQUIRED_NEXT; 
-    } 
-    else { 
-        __kmp_acquire_futex_lock_timed_template( lck, gtid ); 
-        lck->lk.depth_locked = 1; 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
-} 
- 
-static int 
-__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_nest_lock"; 
-    if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_acquire_nested_futex_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    int retval; 
- 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 
-        retval = ++lck->lk.depth_locked; 
-    } 
-    else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 
-        retval = 0; 
-    } 
-    else { 
-        KMP_MB(); 
-        retval = lck->lk.depth_locked = 1; 
-    } 
-    return retval; 
-} 
- 
-static int 
-__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_nest_lock"; 
-    if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_test_nested_futex_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    KMP_MB(); 
-    if ( --(lck->lk.depth_locked) == 0 ) { 
-        __kmp_release_futex_lock( lck, gtid ); 
-        return KMP_LOCK_RELEASED; 
-    } 
-    return KMP_LOCK_STILL_HELD; 
-} 
- 
-static int 
-__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_nest_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_nested_futex_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 
-{ 
-    __kmp_init_futex_lock( lck ); 
-    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 
-} 
- 
-static void 
-__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 
-{ 
-    __kmp_init_nested_futex_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 
-{ 
-    __kmp_destroy_futex_lock( lck ); 
-    lck->lk.depth_locked = 0; 
-} 
- 
-static void 
-__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_nest_lock"; 
-    if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_nested_futex_lock( lck ); 
-} 
- 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ticket (bakery) locks */ 
- 
-static kmp_int32 
-__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 
-{ 
-    return TCR_4( lck->lk.owner_id ) - 1; 
-} 
- 
-static inline bool 
-__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 
-{ 
-    return lck->lk.depth_locked != -1; 
-} 
- 
-static kmp_uint32 
-__kmp_bakery_check(kmp_uint value, kmp_uint checker) 
-{ 
+/*
+ * kmp_lock.cpp -- lock-related functions
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stddef.h>
+
+#include "kmp.h"
+#include "kmp_itt.h"
+#include "kmp_i18n.h"
+#include "kmp_lock.h"
+#include "kmp_io.h"
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+# include <unistd.h>
+# include <sys/syscall.h>
+// We should really include <futex.h>, but that causes compatibility problems on different
+// Linux* OS distributions that either require that you include (or break when you try to include)
+// <pci/types.h>.
+// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
+// we just define the constants here and don't include <futex.h>
+# ifndef FUTEX_WAIT
+#  define FUTEX_WAIT    0
+# endif
+# ifndef FUTEX_WAKE
+#  define FUTEX_WAKE    1
+# endif
+#endif
+
+/* Implement spin locks for internal library use.             */
+/* The algorithm implemented is Lamport's bakery lock [1974]. */
+
+void
+__kmp_validate_locks( void )
+{
+    int i;
+    kmp_uint32  x, y;
+
+    /* Check to make sure unsigned arithmetic does wraps properly */
+    x = ~((kmp_uint32) 0) - 2;
+    y = x - 2;
+
+    for (i = 0; i < 8; ++i, ++x, ++y) {
+        kmp_uint32 z = (x - y);
+        KMP_ASSERT( z == 2 );
+    }
+
+    KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* test and set locks */
+
+//
+// For the non-nested locks, we can only assume that the first 4 bytes were
+// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
+// compiler only allocates a 4 byte pointer on IA-32 architecture.  On
+// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
+//
+// gcc reserves >= 8 bytes for nested locks, so we can assume that the
+// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
+//
+
+static kmp_int32
+__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
+{
+    return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
+}
+
+static inline bool
+__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
+{
+    return lck->lk.depth_locked != -1;
+}
+
+__forceinline static int
+__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_MB();
+
+#ifdef USE_LOCK_PROFILE
+    kmp_uint32 curr = TCR_4( lck->lk.poll );
+    if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
+        __kmp_printf( "LOCK CONTENTION: %p\n", lck );
+    /* else __kmp_printf( "." );*/
+#endif /* USE_LOCK_PROFILE */
+
+    if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
+      && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
+        KMP_FSYNC_ACQUIRED(lck);
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+
+    kmp_uint32 spins;
+    KMP_FSYNC_PREPARE( lck );
+    KMP_INIT_YIELD( spins );
+    if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
+      __kmp_xproc ) ) {
+        KMP_YIELD( TRUE );
+    }
+    else {
+        KMP_YIELD_SPIN( spins );
+    }
+
+    while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
+      ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
+        //
+        // FIXME - use exponential backoff here
+        //
+        if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
+          __kmp_xproc ) ) {
+            KMP_YIELD( TRUE );
+        }
+        else {
+            KMP_YIELD_SPIN( spins );
+        }
+    }
+    KMP_FSYNC_ACQUIRED( lck );
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    return __kmp_acquire_tas_lock_timed_template( lck, gtid );
+}
+
+static int
+__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_lock";
+    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
+        KMP_FATAL( LockIsAlreadyOwned, func );
+    }
+    return __kmp_acquire_tas_lock( lck, gtid );
+}
+
+int
+__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
+      && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
+        KMP_FSYNC_ACQUIRED( lck );
+        return TRUE;
+    }
+    return FALSE;
+}
+
+static int
+__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_lock";
+    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    return __kmp_test_tas_lock( lck, gtid );
+}
+
+int
+__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KMP_FSYNC_RELEASING(lck);
+    KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
+      __kmp_xproc ) );
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
+      && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_tas_lock( lck, gtid );
+}
+
+void
+__kmp_init_tas_lock( kmp_tas_lock_t * lck )
+{
+    TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
+}
+
+static void
+__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
+{
+    __kmp_init_tas_lock( lck );
+}
+
+void
+__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
+{
+    lck->lk.poll = 0;
+}
+
+static void
+__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
+{
+    char const * const func = "omp_destroy_lock";
+    if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_tas_lock( lck );
+}
+
+
+//
+// nested test and set locks
+//
+
+int
+__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
+        lck->lk.depth_locked += 1;
+        return KMP_LOCK_ACQUIRED_NEXT;
+    }
+    else {
+        __kmp_acquire_tas_lock_timed_template( lck, gtid );
+        lck->lk.depth_locked = 1;
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+}
+
+static int
+__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_nest_lock";
+    if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_acquire_nested_tas_lock( lck, gtid );
+}
+
+int
+__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    int retval;
+
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
+        retval = ++lck->lk.depth_locked;
+    }
+    else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
+        retval = 0;
+    }
+    else {
+        KMP_MB();
+        retval = lck->lk.depth_locked = 1;
+    }
+    return retval;
+}
+
+static int
+__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_nest_lock";
+    if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_test_nested_tas_lock( lck, gtid );
+}
+
+int
+__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    KMP_MB();
+    if ( --(lck->lk.depth_locked) == 0 ) {
+        __kmp_release_tas_lock( lck, gtid );
+        return KMP_LOCK_RELEASED;
+    }
+    return KMP_LOCK_STILL_HELD;
+}
+
+static int
+__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_nest_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_nested_tas_lock( lck, gtid );
+}
+
+void
+__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
+{
+    __kmp_init_tas_lock( lck );
+    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
+}
+
+static void
+__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
+{
+    __kmp_init_nested_tas_lock( lck );
+}
+
+void
+__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
+{
+    __kmp_destroy_tas_lock( lck );
+    lck->lk.depth_locked = 0;
+}
+
+static void
+__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
+{
+    char const * const func = "omp_destroy_nest_lock";
+    if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_nested_tas_lock( lck );
+}
+
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+/* ------------------------------------------------------------------------ */
+/* futex locks */
+
+// futex locks are really just test and set locks, with a different method
+// of handling contention.  They take the same amount of space as test and
+// set locks, and are allocated the same way (i.e. use the area allocated by
+// the compiler for non-nested locks / allocate nested locks on the heap).
+
+static kmp_int32
+__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
+{
+    return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
+}
+
+static inline bool
+__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
+{
+    return lck->lk.depth_locked != -1;
+}
+
+__forceinline static int
+__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_int32 gtid_code = ( gtid + 1 ) << 1;
+
+    KMP_MB();
+
+#ifdef USE_LOCK_PROFILE
+    kmp_uint32 curr = TCR_4( lck->lk.poll );
+    if ( ( curr != 0 ) && ( curr != gtid_code ) )
+        __kmp_printf( "LOCK CONTENTION: %p\n", lck );
+    /* else __kmp_printf( "." );*/
+#endif /* USE_LOCK_PROFILE */
+
+    KMP_FSYNC_PREPARE( lck );
+    KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
+      lck, lck->lk.poll, gtid ) );
+
+    kmp_int32 poll_val;
+
+    while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
+             KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
+
+        kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
+        KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
+           lck, gtid, poll_val, cond ) );
+
+        //
+        // NOTE: if you try to use the following condition for this branch
+        //
+        // if ( poll_val & 1 == 0 )
+        //
+        // Then the 12.0 compiler has a bug where the following block will
+        // always be skipped, regardless of the value of the LSB of poll_val.
+        //
+        if ( ! cond ) {
+            //
+            // Try to set the lsb in the poll to indicate to the owner
+            // thread that they need to wake this thread up.
+            //
+            if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
+                KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
+                  lck, lck->lk.poll, gtid ) );
+                continue;
+            }
+            poll_val |= KMP_LOCK_BUSY(1, futex);
+
+            KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
+              lck, lck->lk.poll, gtid ) );
+        }
+
+        KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
+           lck, gtid, poll_val ) );
+
+        kmp_int32 rc;
+        if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
+          poll_val, NULL, NULL, 0 ) ) != 0 ) {
+            KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
+               lck, gtid, poll_val, rc, errno ) );
+            continue;
+        }
+
+        KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
+           lck, gtid, poll_val ) );
+        //
+        // This thread has now done a successful futex wait call and was
+        // entered on the OS futex queue.  We must now perform a futex
+        // wake call when releasing the lock, as we have no idea how many
+        // other threads are in the queue.
+        //
+        gtid_code |= 1;
+    }
+
+    KMP_FSYNC_ACQUIRED( lck );
+    KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
+      lck, lck->lk.poll, gtid ) );
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+   return __kmp_acquire_futex_lock_timed_template( lck, gtid );
+}
+
+static int
+__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_lock";
+    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
+        KMP_FATAL( LockIsAlreadyOwned, func );
+    }
+    return __kmp_acquire_futex_lock( lck, gtid );
+}
+
+int
+__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
+        KMP_FSYNC_ACQUIRED( lck );
+        return TRUE;
+    }
+    return FALSE;
+}
+
+static int
+__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_lock";
+    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    return __kmp_test_futex_lock( lck, gtid );
+}
+
+int
+__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
+      lck, lck->lk.poll, gtid ) );
+
+    KMP_FSYNC_RELEASING(lck);
+
+    kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
+
+    KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
+       lck, gtid, poll_val ) );
+
+    if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
+        KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
+           lck, gtid ) );
+        syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
+    }
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
+      lck, lck->lk.poll, gtid ) );
+
+    KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
+      __kmp_xproc ) );
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
+      && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_futex_lock( lck, gtid );
+}
+
+void
+__kmp_init_futex_lock( kmp_futex_lock_t * lck )
+{
+    TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
+}
+
+static void
+__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
+{
+    __kmp_init_futex_lock( lck );
+}
+
+void
+__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
+{
+    lck->lk.poll = 0;
+}
+
+static void
+__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
+{
+    char const * const func = "omp_destroy_lock";
+    if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
+      && __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_futex_lock( lck );
+}
+
+
+//
+// nested futex locks
+//
+
+int
+__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
+        lck->lk.depth_locked += 1;
+        return KMP_LOCK_ACQUIRED_NEXT;
+    }
+    else {
+        __kmp_acquire_futex_lock_timed_template( lck, gtid );
+        lck->lk.depth_locked = 1;
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+}
+
+static int
+__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_nest_lock";
+    if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_acquire_nested_futex_lock( lck, gtid );
+}
+
+int
+__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    int retval;
+
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
+        retval = ++lck->lk.depth_locked;
+    }
+    else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
+        retval = 0;
+    }
+    else {
+        KMP_MB();
+        retval = lck->lk.depth_locked = 1;
+    }
+    return retval;
+}
+
+static int
+__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_nest_lock";
+    if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_test_nested_futex_lock( lck, gtid );
+}
+
+int
+__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    KMP_MB();
+    if ( --(lck->lk.depth_locked) == 0 ) {
+        __kmp_release_futex_lock( lck, gtid );
+        return KMP_LOCK_RELEASED;
+    }
+    return KMP_LOCK_STILL_HELD;
+}
+
+static int
+__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_nest_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_nested_futex_lock( lck, gtid );
+}
+
+void
+__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
+{
+    __kmp_init_futex_lock( lck );
+    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
+}
+
+static void
+__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
+{
+    __kmp_init_nested_futex_lock( lck );
+}
+
+void
+__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
+{
+    __kmp_destroy_futex_lock( lck );
+    lck->lk.depth_locked = 0;
+}
+
+static void
+__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
+{
+    char const * const func = "omp_destroy_nest_lock";
+    if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_nested_futex_lock( lck );
+}
+
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+
+
+/* ------------------------------------------------------------------------ */
+/* ticket (bakery) locks */
+
+static kmp_int32
+__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
+{
+    return TCR_4( lck->lk.owner_id ) - 1;
+}
+
+static inline bool
+__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
+{
+    return lck->lk.depth_locked != -1;
+}
+
+static kmp_uint32
+__kmp_bakery_check(kmp_uint value, kmp_uint checker)
+{
     kmp_uint32 pause;
- 
-    if (value == checker) { 
-        return TRUE; 
-    } 
-    for (pause = checker - value; pause != 0; --pause); 
-    return FALSE; 
-} 
- 
-__forceinline static int 
-__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    kmp_uint32 my_ticket; 
-    KMP_MB(); 
- 
-    my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 
- 
-#ifdef USE_LOCK_PROFILE 
-    if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 
-        __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 
-    /* else __kmp_printf( "." );*/ 
-#endif /* USE_LOCK_PROFILE */ 
- 
-    if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 
-        KMP_FSYNC_ACQUIRED(lck); 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
-    KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 
-    KMP_FSYNC_ACQUIRED(lck); 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    return __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 
-} 
- 
-static int 
-__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 
-        KMP_FATAL( LockIsAlreadyOwned, func ); 
-    } 
- 
-    __kmp_acquire_ticket_lock( lck, gtid ); 
- 
-    lck->lk.owner_id = gtid + 1; 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 
-    if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 
-        kmp_uint32 next_ticket = my_ticket + 1; 
-        if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 
-          my_ticket, next_ticket ) ) { 
-            KMP_FSYNC_ACQUIRED( lck ); 
-            return TRUE; 
-        } 
-    } 
-    return FALSE; 
-} 
- 
-static int 
-__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
- 
-    int retval = __kmp_test_ticket_lock( lck, gtid ); 
- 
-    if ( retval ) { 
-        lck->lk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-int 
-__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    kmp_uint32  distance; 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KMP_FSYNC_RELEASING(lck); 
-    distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 
- 
-    KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KMP_YIELD( distance 
-      > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 
-      && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    lck->lk.owner_id = 0; 
-    return __kmp_release_ticket_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 
-{ 
-    lck->lk.location = NULL; 
-    TCW_4( lck->lk.next_ticket, 0 ); 
-    TCW_4( lck->lk.now_serving, 0 ); 
-    lck->lk.owner_id = 0;      // no thread owns the lock. 
-    lck->lk.depth_locked = -1; // -1 => not a nested lock. 
-    lck->lk.initialized = (kmp_ticket_lock *)lck; 
-} 
- 
-static void 
-__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 
-{ 
-    __kmp_init_ticket_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 
-{ 
-    lck->lk.initialized = NULL; 
-    lck->lk.location    = NULL; 
-    lck->lk.next_ticket = 0; 
-    lck->lk.now_serving = 0; 
-    lck->lk.owner_id = 0; 
-    lck->lk.depth_locked = -1; 
-} 
- 
-static void 
-__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_ticket_lock( lck ); 
-} 
- 
- 
-// 
-// nested ticket locks 
-// 
- 
-int 
-__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 
-        lck->lk.depth_locked += 1; 
-        return KMP_LOCK_ACQUIRED_NEXT; 
-    } 
-    else { 
-        __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 
-        KMP_MB(); 
-        lck->lk.depth_locked = 1; 
-        KMP_MB(); 
-        lck->lk.owner_id = gtid + 1; 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
-} 
- 
-static int 
-__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_acquire_nested_ticket_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    int retval; 
- 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 
-        retval = ++lck->lk.depth_locked; 
-    } 
-    else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 
-        retval = 0; 
-    } 
-    else { 
-        KMP_MB(); 
-        retval = lck->lk.depth_locked = 1; 
-        KMP_MB(); 
-        lck->lk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-static int 
-__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 
-  kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_test_nested_ticket_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    KMP_MB(); 
-    if ( --(lck->lk.depth_locked) == 0 ) { 
-        KMP_MB(); 
-        lck->lk.owner_id = 0; 
-        __kmp_release_ticket_lock( lck, gtid ); 
-        return KMP_LOCK_RELEASED; 
-    } 
-    return KMP_LOCK_STILL_HELD; 
-} 
- 
-static int 
-__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_nest_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_nested_ticket_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 
-{ 
-    __kmp_init_ticket_lock( lck ); 
-    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 
-} 
- 
-static void 
-__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 
-{ 
-    __kmp_init_nested_ticket_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 
-{ 
-    __kmp_destroy_ticket_lock( lck ); 
-    lck->lk.depth_locked = 0; 
-} 
- 
-static void 
-__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_nested_ticket_lock( lck ); 
-} 
- 
- 
-// 
-// access functions to fields which don't exist for all lock kinds. 
-// 
- 
-static int 
-__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 
-{ 
-    return lck == lck->lk.initialized; 
-} 
- 
-static const ident_t * 
-__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 
-{ 
-    return lck->lk.location; 
-} 
- 
-static void 
-__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 
-{ 
-    lck->lk.location = loc; 
-} 
- 
-static kmp_lock_flags_t 
-__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 
-{ 
-    return lck->lk.flags; 
-} 
- 
-static void 
-__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 
-{ 
-    lck->lk.flags = flags; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* queuing locks */ 
- 
-/* 
- * First the states 
- * (head,tail) =  0, 0  means lock is unheld, nobody on queue 
- *   UINT_MAX or -1, 0  means lock is held, nobody on queue 
- *                h, h  means lock is held or about to transition, 1 element on queue 
- *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue 
- * 
- * Now the transitions 
- *    Acquire(0,0)  = -1 ,0 
- *    Release(0,0)  = Error 
- *    Acquire(-1,0) =  h ,h    h > 0 
- *    Release(-1,0) =  0 ,0 
- *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t 
- *    Release(h,h)  = -1 ,0    h > 0 
- *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 
- *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t 
- * 
- * And pictorially 
- * 
- * 
- *          +-----+ 
- *          | 0, 0|------- release -------> Error 
- *          +-----+ 
- *            |  ^ 
- *     acquire|  |release 
- *            |  | 
- *            |  | 
- *            v  | 
- *          +-----+ 
- *          |-1, 0| 
- *          +-----+ 
- *            |  ^ 
- *     acquire|  |release 
- *            |  | 
- *            |  | 
- *            v  | 
- *          +-----+ 
- *          | h, h| 
- *          +-----+ 
- *            |  ^ 
- *     acquire|  |release 
- *            |  | 
- *            |  | 
- *            v  | 
- *          +-----+ 
- *          | h, t|----- acquire, release loopback ---+ 
- *          +-----+                                   | 
- *               ^                                    | 
- *               |                                    | 
- *               +------------------------------------+ 
- * 
- */ 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
- 
-/* Stuff for circular trace buffer */ 
-#define TRACE_BUF_ELE	1024 
-static char traces[TRACE_BUF_ELE][128] = { 0 } 
-static int tc = 0; 
-#define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y ); 
-#define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 
-#define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 
- 
-static void 
-__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 
-  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 
-{ 
-    kmp_int32 t, i; 
- 
-    __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 
- 
-    i = tc % TRACE_BUF_ELE; 
-    __kmp_printf_no_lock( "%s\n", traces[i] ); 
-    i = (i+1) % TRACE_BUF_ELE; 
-    while ( i != (tc % TRACE_BUF_ELE) ) { 
-        __kmp_printf_no_lock( "%s", traces[i] ); 
-        i = (i+1) % TRACE_BUF_ELE; 
-    } 
-    __kmp_printf_no_lock( "\n" ); 
- 
-    __kmp_printf_no_lock( 
-             "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 
-             gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 
-             head_id, tail_id ); 
- 
-    __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 
- 
-    if ( lck->lk.head_id >= 1 ) { 
-        t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 
-        while (t > 0) { 
-            __kmp_printf_no_lock( "-> %d ", t ); 
-            t = __kmp_threads[t-1]->th.th_next_waiting; 
-        } 
-    } 
-    __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id ); 
-    __kmp_printf_no_lock( "\n\n" ); 
-} 
- 
-#endif /* DEBUG_QUEUING_LOCKS */ 
- 
-static kmp_int32 
-__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 
-{ 
-    return TCR_4( lck->lk.owner_id ) - 1; 
-} 
- 
-static inline bool 
-__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 
-{ 
-    return lck->lk.depth_locked != -1; 
-} 
- 
-/* Acquire a lock using a the queuing lock implementation */ 
-template <bool takeTime> 
-/* [TLW] The unused template above is left behind because of what BEB believes is a 
-   potential compiler problem with __forceinline. */ 
-__forceinline static int 
-__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 
-  kmp_int32 gtid ) 
-{ 
+
+    if (value == checker) {
+        return TRUE;
+    }
+    for (pause = checker - value; pause != 0; --pause);
+    return FALSE;
+}
+
+__forceinline static int
+__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_uint32 my_ticket;
+    KMP_MB();
+
+    my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
+
+#ifdef USE_LOCK_PROFILE
+    if ( TCR_4( lck->lk.now_serving ) != my_ticket )
+        __kmp_printf( "LOCK CONTENTION: %p\n", lck );
+    /* else __kmp_printf( "." );*/
+#endif /* USE_LOCK_PROFILE */
+
+    if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
+        KMP_FSYNC_ACQUIRED(lck);
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+    KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
+    KMP_FSYNC_ACQUIRED(lck);
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
+}
+
+static int
+__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
+        KMP_FATAL( LockIsAlreadyOwned, func );
+    }
+
+    __kmp_acquire_ticket_lock( lck, gtid );
+
+    lck->lk.owner_id = gtid + 1;
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
+    if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
+        kmp_uint32 next_ticket = my_ticket + 1;
+        if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
+          my_ticket, next_ticket ) ) {
+            KMP_FSYNC_ACQUIRED( lck );
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+static int
+__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+
+    int retval = __kmp_test_ticket_lock( lck, gtid );
+
+    if ( retval ) {
+        lck->lk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+int
+__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_uint32  distance;
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KMP_FSYNC_RELEASING(lck);
+    distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
+
+    KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KMP_YIELD( distance
+      > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
+      && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    lck->lk.owner_id = 0;
+    return __kmp_release_ticket_lock( lck, gtid );
+}
+
+void
+__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
+{
+    lck->lk.location = NULL;
+    TCW_4( lck->lk.next_ticket, 0 );
+    TCW_4( lck->lk.now_serving, 0 );
+    lck->lk.owner_id = 0;      // no thread owns the lock.
+    lck->lk.depth_locked = -1; // -1 => not a nested lock.
+    lck->lk.initialized = (kmp_ticket_lock *)lck;
+}
+
+static void
+__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
+{
+    __kmp_init_ticket_lock( lck );
+}
+
+void
+__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
+{
+    lck->lk.initialized = NULL;
+    lck->lk.location    = NULL;
+    lck->lk.next_ticket = 0;
+    lck->lk.now_serving = 0;
+    lck->lk.owner_id = 0;
+    lck->lk.depth_locked = -1;
+}
+
+static void
+__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
+{
+    char const * const func = "omp_destroy_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_ticket_lock( lck );
+}
+
+
+//
+// nested ticket locks
+//
+
+int
+__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
+        lck->lk.depth_locked += 1;
+        return KMP_LOCK_ACQUIRED_NEXT;
+    }
+    else {
+        __kmp_acquire_ticket_lock_timed_template( lck, gtid );
+        KMP_MB();
+        lck->lk.depth_locked = 1;
+        KMP_MB();
+        lck->lk.owner_id = gtid + 1;
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+}
+
+static int
+__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_acquire_nested_ticket_lock( lck, gtid );
+}
+
+int
+__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    int retval;
+
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
+        retval = ++lck->lk.depth_locked;
+    }
+    else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
+        retval = 0;
+    }
+    else {
+        KMP_MB();
+        retval = lck->lk.depth_locked = 1;
+        KMP_MB();
+        lck->lk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+static int
+__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
+  kmp_int32 gtid )
+{
+    char const * const func = "omp_test_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_test_nested_ticket_lock( lck, gtid );
+}
+
+int
+__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    KMP_MB();
+    if ( --(lck->lk.depth_locked) == 0 ) {
+        KMP_MB();
+        lck->lk.owner_id = 0;
+        __kmp_release_ticket_lock( lck, gtid );
+        return KMP_LOCK_RELEASED;
+    }
+    return KMP_LOCK_STILL_HELD;
+}
+
+static int
+__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_nest_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_nested_ticket_lock( lck, gtid );
+}
+
+void
+__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
+{
+    __kmp_init_ticket_lock( lck );
+    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
+}
+
+static void
+__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
+{
+    __kmp_init_nested_ticket_lock( lck );
+}
+
+void
+__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
+{
+    __kmp_destroy_ticket_lock( lck );
+    lck->lk.depth_locked = 0;
+}
+
+static void
+__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
+{
+    char const * const func = "omp_destroy_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_nested_ticket_lock( lck );
+}
+
+
+//
+// access functions to fields which don't exist for all lock kinds.
+//
+
+static int
+__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
+{
+    return lck == lck->lk.initialized;
+}
+
+static const ident_t *
+__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
+{
+    return lck->lk.location;
+}
+
+static void
+__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
+{
+    lck->lk.location = loc;
+}
+
+static kmp_lock_flags_t
+__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
+{
+    return lck->lk.flags;
+}
+
+static void
+__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
+{
+    lck->lk.flags = flags;
+}
+
+/* ------------------------------------------------------------------------ */
+/* queuing locks */
+
+/*
+ * First the states
+ * (head,tail) =  0, 0  means lock is unheld, nobody on queue
+ *   UINT_MAX or -1, 0  means lock is held, nobody on queue
+ *                h, h  means lock is held or about to transition, 1 element on queue
+ *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
+ *
+ * Now the transitions
+ *    Acquire(0,0)  = -1 ,0
+ *    Release(0,0)  = Error
+ *    Acquire(-1,0) =  h ,h    h > 0
+ *    Release(-1,0) =  0 ,0
+ *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
+ *    Release(h,h)  = -1 ,0    h > 0
+ *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
+ *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
+ *
+ * And pictorially
+ *
+ *
+ *          +-----+
+ *          | 0, 0|------- release -------> Error
+ *          +-----+
+ *            |  ^
+ *     acquire|  |release
+ *            |  |
+ *            |  |
+ *            v  |
+ *          +-----+
+ *          |-1, 0|
+ *          +-----+
+ *            |  ^
+ *     acquire|  |release
+ *            |  |
+ *            |  |
+ *            v  |
+ *          +-----+
+ *          | h, h|
+ *          +-----+
+ *            |  ^
+ *     acquire|  |release
+ *            |  |
+ *            |  |
+ *            v  |
+ *          +-----+
+ *          | h, t|----- acquire, release loopback ---+
+ *          +-----+                                   |
+ *               ^                                    |
+ *               |                                    |
+ *               +------------------------------------+
+ *
+ */
+
+#ifdef DEBUG_QUEUING_LOCKS
+
+/* Stuff for circular trace buffer */
+#define TRACE_BUF_ELE	1024
+static char traces[TRACE_BUF_ELE][128] = { 0 }
+static int tc = 0;
+#define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y );
+#define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
+#define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
+
+static void
+__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
+  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
+{
+    kmp_int32 t, i;
+
+    __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
+
+    i = tc % TRACE_BUF_ELE;
+    __kmp_printf_no_lock( "%s\n", traces[i] );
+    i = (i+1) % TRACE_BUF_ELE;
+    while ( i != (tc % TRACE_BUF_ELE) ) {
+        __kmp_printf_no_lock( "%s", traces[i] );
+        i = (i+1) % TRACE_BUF_ELE;
+    }
+    __kmp_printf_no_lock( "\n" );
+
+    __kmp_printf_no_lock(
+             "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
+             gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
+             head_id, tail_id );
+
+    __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
+
+    if ( lck->lk.head_id >= 1 ) {
+        t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
+        while (t > 0) {
+            __kmp_printf_no_lock( "-> %d ", t );
+            t = __kmp_threads[t-1]->th.th_next_waiting;
+        }
+    }
+    __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
+    __kmp_printf_no_lock( "\n\n" );
+}
+
+#endif /* DEBUG_QUEUING_LOCKS */
+
+static kmp_int32
+__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
+{
+    return TCR_4( lck->lk.owner_id ) - 1;
+}
+
+static inline bool
+__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
+{
+    return lck->lk.depth_locked != -1;
+}
+
+/* Acquire a lock using a the queuing lock implementation */
+template <bool takeTime>
+/* [TLW] The unused template above is left behind because of what BEB believes is a
+   potential compiler problem with __forceinline. */
+__forceinline static int
+__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
+  kmp_int32 gtid )
+{
     kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
-    volatile kmp_int32  *head_id_p   = & lck->lk.head_id; 
-    volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id; 
-    volatile kmp_uint32 *spin_here_p; 
-    kmp_int32 need_mf = 1; 
- 
-#if OMPT_SUPPORT 
-    ompt_state_t prev_state = ompt_state_undefined; 
-#endif 
- 
-    KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 
- 
-    KMP_FSYNC_PREPARE( lck ); 
-    KMP_DEBUG_ASSERT( this_thr != NULL ); 
-    spin_here_p = & this_thr->th.th_spin_here; 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-    TRACE_LOCK( gtid+1, "acq ent" ); 
-    if ( *spin_here_p ) 
-        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 
-    if ( this_thr->th.th_next_waiting != 0 ) 
-        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 
-#endif 
-    KMP_DEBUG_ASSERT( !*spin_here_p ); 
-    KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 
- 
- 
-    /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 
-       that may follow, not just in execution order, but also in visibility order.  This way, 
-       when a releasing thread observes the changes to the queue by this thread, it can 
-       rightly assume that spin_here_p has already been set to TRUE, so that when it sets 
-       spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p 
-       to FALSE before this thread sets it to TRUE, this thread will hang. 
-    */ 
-    *spin_here_p = TRUE;  /* before enqueuing to prevent race */ 
- 
-    while( 1 ) { 
-        kmp_int32 enqueued; 
-        kmp_int32 head; 
-        kmp_int32 tail; 
- 
-        head = *head_id_p; 
- 
-        switch ( head ) { 
- 
-            case -1: 
-            { 
-#ifdef DEBUG_QUEUING_LOCKS 
-                tail = *tail_id_p; 
-                TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 
-#endif 
-                tail = 0;  /* to make sure next link asynchronously read is not set accidentally; 
-                           this assignment prevents us from entering the if ( t > 0 ) 
-                           condition in the enqueued case below, which is not necessary for 
-                           this state transition */ 
- 
-                need_mf = 0; 
-                /* try (-1,0)->(tid,tid) */ 
-                enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 
-                  KMP_PACK_64( -1, 0 ), 
-                  KMP_PACK_64( gtid+1, gtid+1 ) ); 
-#ifdef DEBUG_QUEUING_LOCKS 
-                  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 
-#endif 
-            } 
-            break; 
- 
-            default: 
-            { 
-                tail = *tail_id_p; 
-                KMP_DEBUG_ASSERT( tail != gtid + 1 ); 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-                TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 
-#endif 
- 
-                if ( tail == 0 ) { 
-                    enqueued = FALSE; 
-                } 
-                else { 
-                    need_mf = 0; 
-                    /* try (h,t) or (h,h)->(h,tid) */ 
-                    enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-                        if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 
-#endif 
-                } 
-            } 
-            break; 
- 
-            case 0: /* empty queue */ 
-            { 
-                kmp_int32 grabbed_lock; 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-                tail = *tail_id_p; 
-                TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 
-#endif 
-                /* try (0,0)->(-1,0) */ 
- 
-                /* only legal transition out of head = 0 is head = -1 with no change to tail */ 
-                grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 
- 
-                if ( grabbed_lock ) { 
- 
-                    *spin_here_p = FALSE; 
- 
-                    KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 
-                              lck, gtid )); 
-#ifdef DEBUG_QUEUING_LOCKS 
-                    TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 
-#endif 
- 
-#if OMPT_SUPPORT 
-                    if (ompt_enabled && prev_state != ompt_state_undefined) { 
-                        /* change the state before clearing wait_id */ 
-                        this_thr->th.ompt_thread_info.state = prev_state; 
-                        this_thr->th.ompt_thread_info.wait_id = 0; 
-                    } 
-#endif 
- 
-                    KMP_FSYNC_ACQUIRED( lck ); 
-                    return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 
-                } 
-                enqueued = FALSE; 
-            } 
-            break; 
-        } 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled && prev_state == ompt_state_undefined) { 
-            /* this thread will spin; set wait_id before entering wait state */ 
-            prev_state = this_thr->th.ompt_thread_info.state; 
-            this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 
-            this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 
-        } 
-#endif 
- 
-        if ( enqueued ) { 
-            if ( tail > 0 ) { 
-                kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 
-                KMP_ASSERT( tail_thr != NULL ); 
-                tail_thr->th.th_next_waiting = gtid+1; 
-                /* corresponding wait for this write in release code */ 
-            } 
-            KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 
- 
- 
-            /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for 
-             *       throughput only here. 
-             */ 
-            KMP_MB(); 
-            KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-            TRACE_LOCK( gtid+1, "acq spin" ); 
- 
-            if ( this_thr->th.th_next_waiting != 0 ) 
-                __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 
-#endif 
-            KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 
-            KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 
-                      lck, gtid )); 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-            TRACE_LOCK( gtid+1, "acq exit 2" ); 
-#endif 
- 
-#if OMPT_SUPPORT 
-            /* change the state before clearing wait_id */ 
-            this_thr->th.ompt_thread_info.state = prev_state; 
-            this_thr->th.ompt_thread_info.wait_id = 0; 
-#endif 
- 
-            /* got lock, we were dequeued by the thread that released lock */ 
-            return KMP_LOCK_ACQUIRED_FIRST; 
-        } 
- 
-        /* Yield if number of threads > number of logical processors */ 
-        /* ToDo: Not sure why this should only be in oversubscription case, 
-           maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 
-        KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 
-          __kmp_xproc ) ); 
-#ifdef DEBUG_QUEUING_LOCKS 
-        TRACE_LOCK( gtid+1, "acq retry" ); 
-#endif 
- 
-    } 
-    KMP_ASSERT2( 0, "should not get here" ); 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 
-} 
- 
-static int 
-__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 
-  kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 
-        KMP_FATAL( LockIsAlreadyOwned, func ); 
-    } 
- 
-    __kmp_acquire_queuing_lock( lck, gtid ); 
- 
-    lck->lk.owner_id = gtid + 1; 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    volatile kmp_int32 *head_id_p  = & lck->lk.head_id; 
-    kmp_int32 head; 
-#ifdef KMP_DEBUG 
-    kmp_info_t *this_thr; 
-#endif 
- 
-    KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
-#ifdef KMP_DEBUG 
-    this_thr = __kmp_thread_from_gtid( gtid ); 
-    KMP_DEBUG_ASSERT( this_thr != NULL ); 
-    KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 
-#endif 
- 
-    head = *head_id_p; 
- 
-    if ( head == 0 ) { /* nobody on queue, nobody holding */ 
- 
-        /* try (0,0)->(-1,0) */ 
- 
-        if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 
-            KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 
-            KMP_FSYNC_ACQUIRED(lck); 
-            return TRUE; 
-        } 
-    } 
- 
-    KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 
-    return FALSE; 
-} 
- 
-static int 
-__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
- 
-    int retval = __kmp_test_queuing_lock( lck, gtid ); 
- 
-    if ( retval ) { 
-        lck->lk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-int 
-__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
+    volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
+    volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
+    volatile kmp_uint32 *spin_here_p;
+    kmp_int32 need_mf = 1;
+
+#if OMPT_SUPPORT
+    ompt_state_t prev_state = ompt_state_undefined;
+#endif
+
+    KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
+
+    KMP_FSYNC_PREPARE( lck );
+    KMP_DEBUG_ASSERT( this_thr != NULL );
+    spin_here_p = & this_thr->th.th_spin_here;
+
+#ifdef DEBUG_QUEUING_LOCKS
+    TRACE_LOCK( gtid+1, "acq ent" );
+    if ( *spin_here_p )
+        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
+    if ( this_thr->th.th_next_waiting != 0 )
+        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
+#endif
+    KMP_DEBUG_ASSERT( !*spin_here_p );
+    KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
+
+
+    /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
+       that may follow, not just in execution order, but also in visibility order.  This way,
+       when a releasing thread observes the changes to the queue by this thread, it can
+       rightly assume that spin_here_p has already been set to TRUE, so that when it sets
+       spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
+       to FALSE before this thread sets it to TRUE, this thread will hang.
+    */
+    *spin_here_p = TRUE;  /* before enqueuing to prevent race */
+
+    while( 1 ) {
+        kmp_int32 enqueued;
+        kmp_int32 head;
+        kmp_int32 tail;
+
+        head = *head_id_p;
+
+        switch ( head ) {
+
+            case -1:
+            {
+#ifdef DEBUG_QUEUING_LOCKS
+                tail = *tail_id_p;
+                TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
+#endif
+                tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
+                           this assignment prevents us from entering the if ( t > 0 )
+                           condition in the enqueued case below, which is not necessary for
+                           this state transition */
+
+                need_mf = 0;
+                /* try (-1,0)->(tid,tid) */
+                enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
+                  KMP_PACK_64( -1, 0 ),
+                  KMP_PACK_64( gtid+1, gtid+1 ) );
+#ifdef DEBUG_QUEUING_LOCKS
+                  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
+#endif
+            }
+            break;
+
+            default:
+            {
+                tail = *tail_id_p;
+                KMP_DEBUG_ASSERT( tail != gtid + 1 );
+
+#ifdef DEBUG_QUEUING_LOCKS
+                TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
+#endif
+
+                if ( tail == 0 ) {
+                    enqueued = FALSE;
+                }
+                else {
+                    need_mf = 0;
+                    /* try (h,t) or (h,h)->(h,tid) */
+                    enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
+
+#ifdef DEBUG_QUEUING_LOCKS
+                        if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
+#endif
+                }
+            }
+            break;
+
+            case 0: /* empty queue */
+            {
+                kmp_int32 grabbed_lock;
+
+#ifdef DEBUG_QUEUING_LOCKS
+                tail = *tail_id_p;
+                TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
+#endif
+                /* try (0,0)->(-1,0) */
+
+                /* only legal transition out of head = 0 is head = -1 with no change to tail */
+                grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
+
+                if ( grabbed_lock ) {
+
+                    *spin_here_p = FALSE;
+
+                    KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
+                              lck, gtid ));
+#ifdef DEBUG_QUEUING_LOCKS
+                    TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
+#endif
+
+#if OMPT_SUPPORT
+                    if (ompt_enabled && prev_state != ompt_state_undefined) {
+                        /* change the state before clearing wait_id */
+                        this_thr->th.ompt_thread_info.state = prev_state;
+                        this_thr->th.ompt_thread_info.wait_id = 0;
+                    }
+#endif
+
+                    KMP_FSYNC_ACQUIRED( lck );
+                    return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
+                }
+                enqueued = FALSE;
+            }
+            break;
+        }
+
+#if OMPT_SUPPORT
+        if (ompt_enabled && prev_state == ompt_state_undefined) {
+            /* this thread will spin; set wait_id before entering wait state */
+            prev_state = this_thr->th.ompt_thread_info.state;
+            this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
+            this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
+        }
+#endif
+
+        if ( enqueued ) {
+            if ( tail > 0 ) {
+                kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
+                KMP_ASSERT( tail_thr != NULL );
+                tail_thr->th.th_next_waiting = gtid+1;
+                /* corresponding wait for this write in release code */
+            }
+            KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
+
+
+            /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
+             *       throughput only here.
+             */
+            KMP_MB();
+            KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
+
+#ifdef DEBUG_QUEUING_LOCKS
+            TRACE_LOCK( gtid+1, "acq spin" );
+
+            if ( this_thr->th.th_next_waiting != 0 )
+                __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
+#endif
+            KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
+            KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
+                      lck, gtid ));
+
+#ifdef DEBUG_QUEUING_LOCKS
+            TRACE_LOCK( gtid+1, "acq exit 2" );
+#endif
+
+#if OMPT_SUPPORT
+            /* change the state before clearing wait_id */
+            this_thr->th.ompt_thread_info.state = prev_state;
+            this_thr->th.ompt_thread_info.wait_id = 0;
+#endif
+
+            /* got lock, we were dequeued by the thread that released lock */
+            return KMP_LOCK_ACQUIRED_FIRST;
+        }
+
+        /* Yield if number of threads > number of logical processors */
+        /* ToDo: Not sure why this should only be in oversubscription case,
+           maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
+        KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
+          __kmp_xproc ) );
+#ifdef DEBUG_QUEUING_LOCKS
+        TRACE_LOCK( gtid+1, "acq retry" );
+#endif
+
+    }
+    KMP_ASSERT2( 0, "should not get here" );
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
+}
+
+static int
+__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
+  kmp_int32 gtid )
+{
+    char const * const func = "omp_set_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
+        KMP_FATAL( LockIsAlreadyOwned, func );
+    }
+
+    __kmp_acquire_queuing_lock( lck, gtid );
+
+    lck->lk.owner_id = gtid + 1;
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
+    kmp_int32 head;
+#ifdef KMP_DEBUG
     kmp_info_t *this_thr;
-    volatile kmp_int32 *head_id_p = & lck->lk.head_id; 
-    volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 
- 
-    KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
-    this_thr    = __kmp_thread_from_gtid( gtid ); 
-    KMP_DEBUG_ASSERT( this_thr != NULL ); 
-#ifdef DEBUG_QUEUING_LOCKS 
-    TRACE_LOCK( gtid+1, "rel ent" ); 
- 
-    if ( this_thr->th.th_spin_here ) 
-        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 
-    if ( this_thr->th.th_next_waiting != 0 ) 
-        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 
-#endif 
-    KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 
-    KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 
- 
-    KMP_FSYNC_RELEASING(lck); 
- 
-    while( 1 ) { 
-        kmp_int32 dequeued; 
-        kmp_int32 head; 
-        kmp_int32 tail; 
- 
-        head = *head_id_p; 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-        tail = *tail_id_p; 
-        TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 
-        if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 
-#endif 
-        KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 
- 
-        if ( head == -1 ) { /* nobody on queue */ 
- 
-            /* try (-1,0)->(0,0) */ 
-            if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 
-                KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 
-                          lck, gtid )); 
-#ifdef DEBUG_QUEUING_LOCKS 
-                TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 
-#endif 
- 
-#if OMPT_SUPPORT 
-                /* nothing to do - no other thread is trying to shift blame */ 
-#endif 
- 
-                return KMP_LOCK_RELEASED; 
-            } 
-            dequeued = FALSE; 
- 
-        } 
-        else { 
- 
-            tail = *tail_id_p; 
-            if ( head == tail ) {  /* only one thread on the queue */ 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-                if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 
-#endif 
-                KMP_DEBUG_ASSERT( head > 0 ); 
- 
-                /* try (h,h)->(-1,0) */ 
-                dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 
-                  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 
-#ifdef DEBUG_QUEUING_LOCKS 
-                TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 
-#endif 
- 
-            } 
-            else { 
-                volatile kmp_int32 *waiting_id_p; 
-                kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 ); 
-                KMP_DEBUG_ASSERT( head_thr != NULL ); 
-                waiting_id_p = & head_thr->th.th_next_waiting; 
- 
-                /* Does this require synchronous reads? */ 
-#ifdef DEBUG_QUEUING_LOCKS 
-                if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 
-#endif 
-                KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 
- 
-                /* try (h,t)->(h',t) or (t,t) */ 
- 
-                KMP_MB(); 
-                /* make sure enqueuing thread has time to update next waiting thread field */ 
-                *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 
-#ifdef DEBUG_QUEUING_LOCKS 
-                TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 
-#endif 
-                dequeued = TRUE; 
-            } 
-        } 
- 
-        if ( dequeued ) { 
-            kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 
-            KMP_DEBUG_ASSERT( head_thr != NULL ); 
- 
-            /* Does this require synchronous reads? */ 
-#ifdef DEBUG_QUEUING_LOCKS 
-            if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 
-#endif 
-            KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 
- 
-            /* For clean code only. 
-             * Thread not released until next statement prevents race with acquire code. 
-             */ 
-            head_thr->th.th_next_waiting = 0; 
-#ifdef DEBUG_QUEUING_LOCKS 
-            TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 
-#endif 
- 
-            KMP_MB(); 
-            /* reset spin value */ 
-            head_thr->th.th_spin_here = FALSE; 
- 
-            KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 
-                      lck, gtid )); 
-#ifdef DEBUG_QUEUING_LOCKS 
-            TRACE_LOCK( gtid+1, "rel exit 2" ); 
-#endif 
-            return KMP_LOCK_RELEASED; 
-        } 
-        /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */ 
- 
-#ifdef DEBUG_QUEUING_LOCKS 
-        TRACE_LOCK( gtid+1, "rel retry" ); 
-#endif 
- 
-    } /* while */ 
-    KMP_ASSERT2( 0, "should not get here" ); 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 
-  kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    lck->lk.owner_id = 0; 
-    return __kmp_release_queuing_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 
-{ 
-    lck->lk.location = NULL; 
-    lck->lk.head_id = 0; 
-    lck->lk.tail_id = 0; 
-    lck->lk.next_ticket = 0; 
-    lck->lk.now_serving = 0; 
-    lck->lk.owner_id = 0;      // no thread owns the lock. 
-    lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 
-    lck->lk.initialized = lck; 
- 
-    KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 
-} 
- 
-static void 
-__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 
-{ 
-    __kmp_init_queuing_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 
-{ 
-    lck->lk.initialized = NULL; 
-    lck->lk.location = NULL; 
-    lck->lk.head_id = 0; 
-    lck->lk.tail_id = 0; 
-    lck->lk.next_ticket = 0; 
-    lck->lk.now_serving = 0; 
-    lck->lk.owner_id = 0; 
-    lck->lk.depth_locked = -1; 
-} 
- 
-static void 
-__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_queuing_lock( lck ); 
-} 
- 
- 
-// 
-// nested queuing locks 
-// 
- 
-int 
-__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 
-        lck->lk.depth_locked += 1; 
-        return KMP_LOCK_ACQUIRED_NEXT; 
-    } 
-    else { 
-        __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 
-        KMP_MB(); 
-        lck->lk.depth_locked = 1; 
-        KMP_MB(); 
-        lck->lk.owner_id = gtid + 1; 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
-} 
- 
-static int 
-__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_acquire_nested_queuing_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    int retval; 
- 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 
-        retval = ++lck->lk.depth_locked; 
-    } 
-    else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 
-        retval = 0; 
-    } 
-    else { 
-        KMP_MB(); 
-        retval = lck->lk.depth_locked = 1; 
-        KMP_MB(); 
-        lck->lk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-static int 
-__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 
-  kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_test_nested_queuing_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    KMP_MB(); 
-    if ( --(lck->lk.depth_locked) == 0 ) { 
-        KMP_MB(); 
-        lck->lk.owner_id = 0; 
-        __kmp_release_queuing_lock( lck, gtid ); 
-        return KMP_LOCK_RELEASED; 
-    } 
-    return KMP_LOCK_STILL_HELD; 
-} 
- 
-static int 
-__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_nest_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_nested_queuing_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 
-{ 
-    __kmp_init_queuing_lock( lck ); 
-    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 
-} 
- 
-static void 
-__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 
-{ 
-    __kmp_init_nested_queuing_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 
-{ 
-    __kmp_destroy_queuing_lock( lck ); 
-    lck->lk.depth_locked = 0; 
-} 
- 
-static void 
-__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_nested_queuing_lock( lck ); 
-} 
- 
- 
-// 
-// access functions to fields which don't exist for all lock kinds. 
-// 
- 
-static int 
-__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 
-{ 
-    return lck == lck->lk.initialized; 
-} 
- 
-static const ident_t * 
-__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 
-{ 
-    return lck->lk.location; 
-} 
- 
-static void 
-__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 
-{ 
-    lck->lk.location = loc; 
-} 
- 
-static kmp_lock_flags_t 
-__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 
-{ 
-    return lck->lk.flags; 
-} 
- 
-static void 
-__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 
-{ 
-    lck->lk.flags = flags; 
-} 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
- 
-/* 
-    RTM Adaptive locks 
-*/ 
- 
-#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 
- 
-#include <immintrin.h> 
-#define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 
- 
-#else 
- 
-// Values from the status register after failed speculation. 
-#define _XBEGIN_STARTED          (~0u) 
-#define _XABORT_EXPLICIT         (1 << 0) 
-#define _XABORT_RETRY            (1 << 1) 
-#define _XABORT_CONFLICT         (1 << 2) 
-#define _XABORT_CAPACITY         (1 << 3) 
-#define _XABORT_DEBUG            (1 << 4) 
-#define _XABORT_NESTED           (1 << 5) 
-#define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF)) 
- 
-// Aborts for which it's worth trying again immediately 
-#define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 
- 
-#define STRINGIZE_INTERNAL(arg) #arg 
-#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 
- 
-// Access to RTM instructions 
- 
-/* 
-  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 
-  This is the same definition as the compiler intrinsic that will be supported at some point. 
-*/ 
-static __inline int _xbegin() 
-{ 
-    int res = -1; 
- 
-#if KMP_OS_WINDOWS 
-#if KMP_ARCH_X86_64 
-    _asm { 
-        _emit 0xC7 
-        _emit 0xF8 
-        _emit 2 
-        _emit 0 
-        _emit 0 
-        _emit 0 
-        jmp   L2 
-        mov   res, eax 
-    L2: 
-    } 
-#else /* IA32 */ 
-    _asm { 
-        _emit 0xC7 
-        _emit 0xF8 
-        _emit 2 
-        _emit 0 
-        _emit 0 
-        _emit 0 
-        jmp   L2 
-        mov   res, eax 
-    L2: 
-    } 
-#endif // KMP_ARCH_X86_64 
-#else 
-    /* Note that %eax must be noted as killed (clobbered), because 
-     * the XSR is returned in %eax(%rax) on abort.  Other register 
-     * values are restored, so don't need to be killed. 
-     * 
-     * We must also mark 'res' as an input and an output, since otherwise 
-     * 'res=-1' may be dropped as being dead, whereas we do need the 
-     * assignment on the successful (i.e., non-abort) path. 
-     */ 
-    __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n" 
-                      "   .long  1f-1b-6\n" 
-                      "    jmp   2f\n" 
-                      "1:  movl  %%eax,%0\n" 
-                      "2:" 
-                      :"+r"(res)::"memory","%eax"); 
-#endif // KMP_OS_WINDOWS 
-    return res; 
-} 
- 
-/* 
-  Transaction end 
-*/ 
-static __inline void _xend() 
-{ 
-#if KMP_OS_WINDOWS 
-    __asm  { 
-        _emit 0x0f 
-        _emit 0x01 
-        _emit 0xd5 
-    } 
-#else 
-    __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 
-#endif 
-} 
- 
-/* 
-  This is a macro, the argument must be a single byte constant which 
-  can be evaluated by the inline assembler, since it is emitted as a 
-  byte into the assembly code. 
-*/ 
-#if KMP_OS_WINDOWS 
-#define _xabort(ARG)                            \ 
-    _asm _emit 0xc6                             \ 
-    _asm _emit 0xf8                             \ 
-    _asm _emit ARG 
-#else 
-#define _xabort(ARG) \ 
-    __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 
-#endif 
- 
-#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 
- 
-// 
-//    Statistics is collected for testing purpose 
-// 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
- 
-// We accumulate speculative lock statistics when the lock is destroyed. 
-// We keep locks that haven't been destroyed in the liveLocks list 
-// so that we can grab their statistics too. 
-static kmp_adaptive_lock_statistics_t destroyedStats; 
- 
-// To hold the list of live locks. 
-static kmp_adaptive_lock_info_t liveLocks; 
- 
-// A lock so we can safely update the list of locks. 
-static kmp_bootstrap_lock_t chain_lock; 
- 
-// Initialize the list of stats. 
-void 
-__kmp_init_speculative_stats() 
-{ 
-    kmp_adaptive_lock_info_t *lck = &liveLocks; 
- 
-    memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 
-    lck->stats.next = lck; 
-    lck->stats.prev = lck; 
- 
-    KMP_ASSERT( lck->stats.next->stats.prev == lck ); 
-    KMP_ASSERT( lck->stats.prev->stats.next == lck ); 
- 
-    __kmp_init_bootstrap_lock( &chain_lock ); 
- 
-} 
- 
-// Insert the lock into the circular list 
-static void 
-__kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 
-{ 
-    __kmp_acquire_bootstrap_lock( &chain_lock ); 
- 
-    lck->stats.next = liveLocks.stats.next; 
-    lck->stats.prev = &liveLocks; 
- 
-    liveLocks.stats.next = lck; 
-    lck->stats.next->stats.prev  = lck; 
- 
-    KMP_ASSERT( lck->stats.next->stats.prev == lck ); 
-    KMP_ASSERT( lck->stats.prev->stats.next == lck ); 
- 
-    __kmp_release_bootstrap_lock( &chain_lock ); 
-} 
- 
-static void 
-__kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 
-{ 
-    KMP_ASSERT( lck->stats.next->stats.prev == lck ); 
-    KMP_ASSERT( lck->stats.prev->stats.next == lck ); 
- 
-    kmp_adaptive_lock_info_t * n = lck->stats.next; 
-    kmp_adaptive_lock_info_t * p = lck->stats.prev; 
- 
-    n->stats.prev = p; 
-    p->stats.next = n; 
-} 
- 
-static void 
-__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 
-{ 
-    memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 
-    __kmp_remember_lock( lck ); 
-} 
- 
-static void 
-__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 
-{ 
-    kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 
- 
-    t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 
-    t->successfulSpeculations += s->successfulSpeculations; 
-    t->hardFailedSpeculations += s->hardFailedSpeculations; 
-    t->softFailedSpeculations += s->softFailedSpeculations; 
-    t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 
-    t->lemmingYields          += s->lemmingYields; 
-} 
- 
-static void 
-__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 
-{ 
-    kmp_adaptive_lock_statistics_t *t = &destroyedStats; 
- 
-    __kmp_acquire_bootstrap_lock( &chain_lock ); 
- 
-    __kmp_add_stats( &destroyedStats, lck ); 
-    __kmp_forget_lock( lck ); 
- 
-    __kmp_release_bootstrap_lock( &chain_lock ); 
-} 
- 
-static float 
-percent (kmp_uint32 count, kmp_uint32 total) 
-{ 
-    return (total == 0) ? 0.0: (100.0 * count)/total; 
-} 
- 
-static 
-FILE * __kmp_open_stats_file() 
-{ 
-    if (strcmp (__kmp_speculative_statsfile, "-") == 0) 
-        return stdout; 
- 
-    size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 
-    char buffer[buffLen]; 
-    KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 
-      (kmp_int32)getpid()); 
-    FILE * result = fopen(&buffer[0], "w"); 
- 
-    // Maybe we should issue a warning here... 
-    return result ? result : stdout; 
-} 
- 
-void 
-__kmp_print_speculative_stats() 
-{ 
-    if (__kmp_user_lock_kind != lk_adaptive) 
-        return; 
- 
-    FILE * statsFile = __kmp_open_stats_file(); 
- 
-    kmp_adaptive_lock_statistics_t total = destroyedStats; 
-    kmp_adaptive_lock_info_t *lck; 
- 
-    for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 
-        __kmp_add_stats( &total, lck ); 
-    } 
-    kmp_adaptive_lock_statistics_t *t = &total; 
-    kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations; 
-    kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 
-                                   t->softFailedSpeculations; 
- 
-    fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 
-    fprintf ( statsFile, " Lock parameters: \n" 
-             "   max_soft_retries               : %10d\n" 
-             "   max_badness                    : %10d\n", 
-             __kmp_adaptive_backoff_params.max_soft_retries, 
-             __kmp_adaptive_backoff_params.max_badness); 
-    fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 
-    fprintf( statsFile, " Total critical sections          : %10d\n", totalSections ); 
-    fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n", 
-             t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 
-    fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n", 
-             t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 
-    fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields ); 
- 
-    fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations ); 
-    fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n", 
-             t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 
-    fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n", 
-             t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 
-    fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n", 
-             t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 
- 
-    if (statsFile != stdout) 
-        fclose( statsFile ); 
-} 
- 
-# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 
-#else 
-# define KMP_INC_STAT(lck,stat) 
- 
-#endif // KMP_DEBUG_ADAPTIVE_LOCKS 
- 
-static inline bool 
-__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 
-{ 
-    // It is enough to check that the head_id is zero. 
-    // We don't also need to check the tail. 
-    bool res = lck->lk.head_id == 0; 
- 
-    // We need a fence here, since we must ensure that no memory operations 
-    // from later in this thread float above that read. 
-#if KMP_COMPILER_ICC 
-    _mm_mfence(); 
-#else 
-    __sync_synchronize(); 
-#endif 
- 
-    return res; 
-} 
- 
-// Functions for manipulating the badness 
-static __inline void 
-__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 
-{ 
-    // Reset the badness to zero so we eagerly try to speculate again 
-    lck->lk.adaptive.badness = 0; 
-    KMP_INC_STAT(lck,successfulSpeculations); 
-} 
- 
-// Create a bit mask with one more set bit. 
-static __inline void 
-__kmp_step_badness( kmp_adaptive_lock_t *lck ) 
-{ 
-    kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 
-    if ( newBadness > lck->lk.adaptive.max_badness) { 
-        return; 
-    } else { 
-        lck->lk.adaptive.badness = newBadness; 
-    } 
-} 
- 
-// Check whether speculation should be attempted. 
-static __inline int 
-__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    kmp_uint32 badness = lck->lk.adaptive.badness; 
-    kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 
-    int res = (attempts & badness) == 0; 
-    return res; 
-} 
- 
-// Attempt to acquire only the speculative lock. 
-// Does not back off to the non-speculative lock. 
-// 
-static int 
-__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 
-{ 
-    int retries = lck->lk.adaptive.max_soft_retries; 
- 
-    // We don't explicitly count the start of speculation, rather we record 
-    // the results (success, hard fail, soft fail). The sum of all of those 
-    // is the total number of times we started speculation since all 
-    // speculations must end one of those ways. 
-    do 
-    { 
-        kmp_uint32 status = _xbegin(); 
-        // Switch this in to disable actual speculation but exercise 
-        // at least some of the rest of the code. Useful for debugging... 
-        // kmp_uint32 status = _XABORT_NESTED; 
- 
-        if (status == _XBEGIN_STARTED ) 
-        { /* We have successfully started speculation 
-           * Check that no-one acquired the lock for real between when we last looked 
-           * and now. This also gets the lock cache line into our read-set, 
-           * which we need so that we'll abort if anyone later claims it for real. 
-           */ 
-            if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 
-            { 
-                // Lock is now visibly acquired, so someone beat us to it. 
-                // Abort the transaction so we'll restart from _xbegin with the 
-                // failure status. 
-                _xabort(0x01); 
-                KMP_ASSERT2( 0, "should not get here" ); 
-            } 
-            return 1;   // Lock has been acquired (speculatively) 
-        } else { 
-            // We have aborted, update the statistics 
-            if ( status & SOFT_ABORT_MASK) 
-            { 
-                KMP_INC_STAT(lck,softFailedSpeculations); 
-                // and loop round to retry. 
-            } 
-            else 
-            { 
-                KMP_INC_STAT(lck,hardFailedSpeculations); 
-                // Give up if we had a hard failure. 
-                break; 
-            } 
-        } 
-    }  while( retries-- ); // Loop while we have retries, and didn't fail hard. 
- 
-    // Either we had a hard failure or we didn't succeed softly after 
-    // the full set of attempts, so back off the badness. 
-    __kmp_step_badness( lck ); 
-    return 0; 
-} 
- 
-// Attempt to acquire the speculative lock, or back off to the non-speculative one 
-// if the speculative lock cannot be acquired. 
-// We can succeed speculatively, non-speculatively, or fail. 
-static int 
-__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    // First try to acquire the lock speculatively 
-    if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 
-        return 1; 
- 
-    // Speculative acquisition failed, so try to acquire it non-speculatively. 
-    // Count the non-speculative acquire attempt 
-    lck->lk.adaptive.acquire_attempts++; 
- 
-    // Use base, non-speculative lock. 
-    if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 
-    { 
-        KMP_INC_STAT(lck,nonSpeculativeAcquires); 
-        return 1;       // Lock is acquired (non-speculatively) 
-    } 
-    else 
-    { 
-        return 0;       // Failed to acquire the lock, it's already visibly locked. 
-    } 
-} 
- 
-static int 
-__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_lock"; 
-    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
- 
-    int retval = __kmp_test_adaptive_lock( lck, gtid ); 
- 
-    if ( retval ) { 
-        lck->lk.qlk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-// Block until we can acquire a speculative, adaptive lock. 
-// We check whether we should be trying to speculate. 
-// If we should be, we check the real lock to see if it is free, 
-// and, if not, pause without attempting to acquire it until it is. 
-// Then we try the speculative acquire. 
-// This means that although we suffer from lemmings a little ( 
-// because all we can't acquire the lock speculatively until 
-// the queue of threads waiting has cleared), we don't get into a 
-// state where we can never acquire the lock speculatively (because we 
-// force the queue to clear by preventing new arrivals from entering the 
-// queue). 
-// This does mean that when we're trying to break lemmings, the lock 
-// is no longer fair. However OpenMP makes no guarantee that its 
-// locks are fair, so this isn't a real problem. 
-static void 
-__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 
-{ 
-    if ( __kmp_should_speculate( lck, gtid ) ) 
-    { 
-        if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 
-        { 
-            if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 
-                return; 
-            // We tried speculation and failed, so give up. 
-        } 
-        else 
-        { 
-            // We can't try speculation until the lock is free, so we 
-            // pause here (without suspending on the queueing lock, 
-            // to allow it to drain, then try again. 
-            // All other threads will also see the same result for 
-            // shouldSpeculate, so will be doing the same if they 
-            // try to claim the lock from now on. 
-            while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 
-            { 
-                KMP_INC_STAT(lck,lemmingYields); 
-                __kmp_yield (TRUE); 
-            } 
- 
-            if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 
-                return; 
-        } 
-    } 
- 
-    // Speculative acquisition failed, so acquire it non-speculatively. 
-    // Count the non-speculative acquire attempt 
-    lck->lk.adaptive.acquire_attempts++; 
- 
-    __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 
-    // We have acquired the base lock, so count that. 
-    KMP_INC_STAT(lck,nonSpeculativeAcquires ); 
-} 
- 
-static void 
-__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_lock"; 
-    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 
-        KMP_FATAL( LockIsAlreadyOwned, func ); 
-    } 
- 
-    __kmp_acquire_adaptive_lock( lck, gtid ); 
- 
-    lck->lk.qlk.owner_id = gtid + 1; 
-} 
- 
-static int 
-__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 
-    {   // If the lock doesn't look claimed we must be speculating. 
-        // (Or the user's code is buggy and they're releasing without locking; 
-        // if we had XTEST we'd be able to check that case...) 
-        _xend();        // Exit speculation 
-        __kmp_update_badness_after_success( lck ); 
-    } 
-    else 
-    {   // Since the lock *is* visibly locked we're not speculating, 
-        // so should use the underlying lock's release scheme. 
-        __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 
-    } 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    lck->lk.qlk.owner_id = 0; 
-    __kmp_release_adaptive_lock( lck, gtid ); 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static void 
-__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 
-{ 
-    __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 
-    lck->lk.adaptive.badness = 0; 
-    lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 
-    lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 
-    lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness; 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-    __kmp_zero_speculative_stats( &lck->lk.adaptive ); 
-#endif 
-    KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 
-} 
- 
-static void 
-__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 
-{ 
-    __kmp_init_adaptive_lock( lck ); 
-} 
- 
-static void 
-__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 
-{ 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-    __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 
-#endif 
-    __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 
-    // Nothing needed for the speculative part. 
-} 
- 
-static void 
-__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_lock"; 
-    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_adaptive_lock( lck ); 
-} 
- 
- 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* DRDPA ticket locks                                                */ 
-/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 
- 
-static kmp_int32 
-__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 
-{ 
-    return TCR_4( lck->lk.owner_id ) - 1; 
-} 
- 
-static inline bool 
-__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 
-{ 
-    return lck->lk.depth_locked != -1; 
-} 
- 
-__forceinline static int 
-__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 
-    kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load 
-    volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 
-      = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-      TCR_PTR(lck->lk.polls);                           // volatile load 
- 
-#ifdef USE_LOCK_PROFILE 
-    if (TCR_8(polls[ticket & mask].poll) != ticket) 
-        __kmp_printf("LOCK CONTENTION: %p\n", lck); 
-    /* else __kmp_printf( "." );*/ 
-#endif /* USE_LOCK_PROFILE */ 
- 
-    // 
-    // Now spin-wait, but reload the polls pointer and mask, in case the 
-    // polling area has been reconfigured.  Unless it is reconfigured, the 
-    // reloads stay in L1 cache and are cheap. 
-    // 
-    // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 
-    // 
-    // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 
-    // and poll to be re-read every spin iteration. 
-    // 
-    kmp_uint32 spins; 
- 
-    KMP_FSYNC_PREPARE(lck); 
-    KMP_INIT_YIELD(spins); 
-    while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 
-        // If we are oversubscribed, 
-        // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 
-        // CPU Pause is in the macros for yield. 
-        // 
-        KMP_YIELD(TCR_4(__kmp_nth) 
-          > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 
-        KMP_YIELD_SPIN(spins); 
- 
-        // Re-read the mask and the poll pointer from the lock structure. 
-        // 
-        // Make certain that "mask" is read before "polls" !!! 
-        // 
-        // If another thread picks reconfigures the polling area and updates 
-        // their values, and we get the new value of mask and the old polls 
-        // pointer, we could access memory beyond the end of the old polling 
-        // area. 
-        // 
-        mask = TCR_8(lck->lk.mask);                     // volatile load 
-        polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-          TCR_PTR(lck->lk.polls);                       // volatile load 
-    } 
- 
-    // 
-    // Critical section starts here 
-    // 
-    KMP_FSYNC_ACQUIRED(lck); 
-    KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 
-      ticket, lck)); 
-    lck->lk.now_serving = ticket;                       // non-volatile store 
- 
-    // 
-    // Deallocate a garbage polling area if we know that we are the last 
-    // thread that could possibly access it. 
-    // 
-    // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 
-    // ticket. 
-    // 
-    if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 
-        __kmp_free((void *)lck->lk.old_polls); 
-        lck->lk.old_polls = NULL; 
-        lck->lk.cleanup_ticket = 0; 
-    } 
- 
-    // 
-    // Check to see if we should reconfigure the polling area. 
-    // If there is still a garbage polling area to be deallocated from a 
-    // previous reconfiguration, let a later thread reconfigure it. 
-    // 
-    if (lck->lk.old_polls == NULL) { 
-        bool reconfigure = false; 
-        volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 
-        kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 
- 
-        if (TCR_4(__kmp_nth) 
-          > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 
-            // 
-            // We are in oversubscription mode.  Contract the polling area 
-            // down to a single location, if that hasn't been done already. 
-            // 
-            if (num_polls > 1) { 
-                reconfigure = true; 
-                num_polls = TCR_4(lck->lk.num_polls); 
-                mask = 0; 
-                num_polls = 1; 
-                polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-                  __kmp_allocate(num_polls * sizeof(*polls)); 
-                polls[0].poll = ticket; 
-            } 
-        } 
-        else { 
-            // 
-            // We are in under/fully subscribed mode.  Check the number of 
-            // threads waiting on the lock.  The size of the polling area 
-            // should be at least the number of threads waiting. 
-            // 
-            kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 
-            if (num_waiting > num_polls) { 
-                kmp_uint32 old_num_polls = num_polls; 
-                reconfigure = true; 
-                do { 
-                    mask = (mask << 1) | 1; 
-                    num_polls *= 2; 
-                } while (num_polls <= num_waiting); 
- 
-                // 
-                // Allocate the new polling area, and copy the relevant portion 
-                // of the old polling area to the new area.  __kmp_allocate() 
-                // zeroes the memory it allocates, and most of the old area is 
-                // just zero padding, so we only copy the release counters. 
-                // 
-                polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-                  __kmp_allocate(num_polls * sizeof(*polls)); 
-                kmp_uint32 i; 
-                for (i = 0; i < old_num_polls; i++) { 
-                    polls[i].poll = old_polls[i].poll; 
-                } 
-            } 
-        } 
- 
-        if (reconfigure) { 
-            // 
-            // Now write the updated fields back to the lock structure. 
-            // 
-            // Make certain that "polls" is written before "mask" !!! 
-            // 
-            // If another thread picks up the new value of mask and the old 
-            // polls pointer , it could access memory beyond the end of the 
-            // old polling area. 
-            // 
-            // On x86, we need memory fences. 
-            // 
-            KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 
-              ticket, lck, num_polls)); 
- 
-            lck->lk.old_polls = old_polls;              // non-volatile store 
-            lck->lk.polls = polls;                      // volatile store 
- 
-            KMP_MB(); 
- 
-            lck->lk.num_polls = num_polls;              // non-volatile store 
-            lck->lk.mask = mask;                        // volatile store 
- 
-            KMP_MB(); 
- 
-            // 
-            // Only after the new polling area and mask have been flushed 
-            // to main memory can we update the cleanup ticket field. 
-            // 
-            // volatile load / non-volatile store 
-            // 
-            lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 
-        } 
-    } 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    return __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 
-} 
- 
-static int 
-__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 
-        KMP_FATAL( LockIsAlreadyOwned, func ); 
-    } 
- 
-    __kmp_acquire_drdpa_lock( lck, gtid ); 
- 
-    lck->lk.owner_id = gtid + 1; 
-    return KMP_LOCK_ACQUIRED_FIRST; 
-} 
- 
-int 
-__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    // 
-    // First get a ticket, then read the polls pointer and the mask. 
-    // The polls pointer must be read before the mask!!! (See above) 
-    // 
-    kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load 
-    volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 
-      = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-      TCR_PTR(lck->lk.polls);                           // volatile load 
-    kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load 
-    if (TCR_8(polls[ticket & mask].poll) == ticket) { 
-        kmp_uint64 next_ticket = ticket + 1; 
-        if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 
-          ticket, next_ticket)) { 
-            KMP_FSYNC_ACQUIRED(lck); 
-            KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 
-               ticket, lck)); 
-            lck->lk.now_serving = ticket;               // non-volatile store 
- 
-            // 
-            // Since no threads are waiting, there is no possibility that 
-            // we would want to reconfigure the polling area.  We might 
-            // have the cleanup ticket value (which says that it is now 
-            // safe to deallocate old_polls), but we'll let a later thread 
-            // which calls __kmp_acquire_lock do that - this routine 
-            // isn't supposed to block, and we would risk blocks if we 
-            // called __kmp_free() to do the deallocation. 
-            // 
-            return TRUE; 
-        } 
-    } 
-    return FALSE; 
-} 
- 
-static int 
-__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
- 
-    int retval = __kmp_test_drdpa_lock( lck, gtid ); 
- 
-    if ( retval ) { 
-        lck->lk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-int 
-__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    // 
-    // Read the ticket value from the lock data struct, then the polls 
-    // pointer and the mask.  The polls pointer must be read before the 
-    // mask!!! (See above) 
-    // 
-    kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load 
-    volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 
-      = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-      TCR_PTR(lck->lk.polls);                           // volatile load 
-    kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load 
-    KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 
-       ticket - 1, lck)); 
-    KMP_FSYNC_RELEASING(lck); 
-    KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 
-      && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    lck->lk.owner_id = 0; 
-    return __kmp_release_drdpa_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 
-{ 
-    lck->lk.location = NULL; 
-    lck->lk.mask = 0; 
-    lck->lk.num_polls = 1; 
-    lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 
-      __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 
-    lck->lk.cleanup_ticket = 0; 
-    lck->lk.old_polls = NULL; 
-    lck->lk.next_ticket = 0; 
-    lck->lk.now_serving = 0; 
-    lck->lk.owner_id = 0;      // no thread owns the lock. 
-    lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 
-    lck->lk.initialized = lck; 
- 
-    KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 
-} 
- 
-static void 
-__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 
-{ 
-    __kmp_init_drdpa_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 
-{ 
-    lck->lk.initialized = NULL; 
-    lck->lk.location    = NULL; 
-    if (lck->lk.polls != NULL) { 
-        __kmp_free((void *)lck->lk.polls); 
-        lck->lk.polls = NULL; 
-    } 
-    if (lck->lk.old_polls != NULL) { 
-        __kmp_free((void *)lck->lk.old_polls); 
-        lck->lk.old_polls = NULL; 
-    } 
-    lck->lk.mask = 0; 
-    lck->lk.num_polls = 0; 
-    lck->lk.cleanup_ticket = 0; 
-    lck->lk.next_ticket = 0; 
-    lck->lk.now_serving = 0; 
-    lck->lk.owner_id = 0; 
-    lck->lk.depth_locked = -1; 
-} 
- 
-static void 
-__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockNestableUsedAsSimple, func ); 
-    } 
-    if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_drdpa_lock( lck ); 
-} 
- 
- 
-// 
-// nested drdpa ticket locks 
-// 
- 
-int 
-__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 
-        lck->lk.depth_locked += 1; 
-        return KMP_LOCK_ACQUIRED_NEXT; 
-    } 
-    else { 
-        __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 
-        KMP_MB(); 
-        lck->lk.depth_locked = 1; 
-        KMP_MB(); 
-        lck->lk.owner_id = gtid + 1; 
-        return KMP_LOCK_ACQUIRED_FIRST; 
-    } 
-} 
- 
-static void 
-__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_set_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    __kmp_acquire_nested_drdpa_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    int retval; 
- 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 
-        retval = ++lck->lk.depth_locked; 
-    } 
-    else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 
-        retval = 0; 
-    } 
-    else { 
-        KMP_MB(); 
-        retval = lck->lk.depth_locked = 1; 
-        KMP_MB(); 
-        lck->lk.owner_id = gtid + 1; 
-    } 
-    return retval; 
-} 
- 
-static int 
-__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_test_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    return __kmp_test_nested_drdpa_lock( lck, gtid ); 
-} 
- 
-int 
-__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( gtid >= 0 ); 
- 
-    KMP_MB(); 
-    if ( --(lck->lk.depth_locked) == 0 ) { 
-        KMP_MB(); 
-        lck->lk.owner_id = 0; 
-        __kmp_release_drdpa_lock( lck, gtid ); 
-        return KMP_LOCK_RELEASED; 
-    } 
-    return KMP_LOCK_STILL_HELD; 
-} 
- 
-static int 
-__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    char const * const func = "omp_unset_nest_lock"; 
-    KMP_MB();  /* in case another processor initialized lock */ 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 
-        KMP_FATAL( LockUnsettingFree, func ); 
-    } 
-    if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 
-        KMP_FATAL( LockUnsettingSetByAnother, func ); 
-    } 
-    return __kmp_release_nested_drdpa_lock( lck, gtid ); 
-} 
- 
-void 
-__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 
-{ 
-    __kmp_init_drdpa_lock( lck ); 
-    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 
-} 
- 
-static void 
-__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 
-{ 
-    __kmp_init_nested_drdpa_lock( lck ); 
-} 
- 
-void 
-__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 
-{ 
-    __kmp_destroy_drdpa_lock( lck ); 
-    lck->lk.depth_locked = 0; 
-} 
- 
-static void 
-__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 
-{ 
-    char const * const func = "omp_destroy_nest_lock"; 
-    if ( lck->lk.initialized != lck ) { 
-        KMP_FATAL( LockIsUninitialized, func ); 
-    } 
-    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 
-        KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-    } 
-    if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 
-        KMP_FATAL( LockStillOwned, func ); 
-    } 
-    __kmp_destroy_nested_drdpa_lock( lck ); 
-} 
- 
- 
-// 
-// access functions to fields which don't exist for all lock kinds. 
-// 
- 
-static int 
-__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 
-{ 
-    return lck == lck->lk.initialized; 
-} 
- 
-static const ident_t * 
-__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 
-{ 
-    return lck->lk.location; 
-} 
- 
-static void 
-__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 
-{ 
-    lck->lk.location = loc; 
-} 
- 
-static kmp_lock_flags_t 
-__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 
-{ 
-    return lck->lk.flags; 
-} 
- 
-static void 
-__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 
-{ 
-    lck->lk.flags = flags; 
-} 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 
-static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) 
-{ 
-    TCW_4(*lck, KMP_GET_D_TAG(seq)); 
-    KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 
-} 
- 
-#if KMP_USE_TSX 
- 
-// HLE lock functions - imported from the testbed runtime. 
-#define HLE_ACQUIRE ".byte 0xf2;" 
-#define HLE_RELEASE ".byte 0xf3;" 
- 
-static inline kmp_uint32 
-swap4(kmp_uint32 volatile *p, kmp_uint32 v) 
-{ 
-    __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 
-                    : "+r"(v), "+m"(*p) 
-                    : 
-                    : "memory"); 
-    return v; 
-} 
- 
-static void 
-__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 
-{ 
-    TCW_4(*lck, 0); 
-} 
- 
-static void 
-__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 
-{ 
-    // Use gtid for KMP_LOCK_BUSY if necessary 
-    if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 
-        int delay = 1; 
-        do { 
-            while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 
-                for (int i = delay; i != 0; --i) 
-                    KMP_CPU_PAUSE(); 
-                delay = ((delay << 1) | 1) & 7; 
-            } 
-        } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 
-    } 
-} 
- 
-static void 
-__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 
-{ 
-    __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 
-} 
- 
-static int 
-__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 
-{ 
-    __asm__ volatile(HLE_RELEASE "movl %1,%0" 
-                    : "=m"(*lck) 
-                    : "r"(KMP_LOCK_FREE(hle)) 
-                    : "memory"); 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 
-{ 
-    return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 
-} 
- 
-static int 
-__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 
-{ 
-    return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 
-} 
- 
-static int 
-__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 
-{ 
-    return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 
-} 
- 
-static void 
-__kmp_init_rtm_lock(kmp_queuing_lock_t *lck) 
-{ 
-    __kmp_init_queuing_lock(lck); 
-} 
- 
-static void 
-__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) 
-{ 
-    __kmp_destroy_queuing_lock(lck); 
-} 
- 
-static void 
-__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 
-{ 
-    unsigned retries=3, status; 
-    do { 
-        status = _xbegin(); 
-        if (status == _XBEGIN_STARTED) { 
-            if (__kmp_is_unlocked_queuing_lock(lck)) 
-                return; 
-            _xabort(0xff); 
-        } 
-        if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 
-            // Wait until lock becomes free 
-            while (! __kmp_is_unlocked_queuing_lock(lck)) 
-                __kmp_yield(TRUE); 
-        } 
-        else if (!(status & _XABORT_RETRY)) 
-            break; 
-    } while (retries--); 
- 
-    // Fall-back non-speculative lock (xchg) 
-    __kmp_acquire_queuing_lock(lck, gtid); 
-} 
- 
-static void 
-__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 
-{ 
-    __kmp_acquire_rtm_lock(lck, gtid); 
-} 
- 
-static int 
-__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 
-{ 
-    if (__kmp_is_unlocked_queuing_lock(lck)) { 
-        // Releasing from speculation 
-        _xend(); 
-    } 
-    else { 
-        // Releasing from a real lock 
-        __kmp_release_queuing_lock(lck, gtid); 
-    } 
-    return KMP_LOCK_RELEASED; 
-} 
- 
-static int 
-__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 
-{ 
-    return __kmp_release_rtm_lock(lck, gtid); 
-} 
- 
-static int 
-__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 
-{ 
-    unsigned retries=3, status; 
-    do { 
-        status = _xbegin(); 
-        if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 
-            return 1; 
-        } 
-        if (!(status & _XABORT_RETRY)) 
-            break; 
-    } while (retries--); 
- 
-    return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; 
-} 
- 
-static int 
-__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 
-{ 
-    return __kmp_test_rtm_lock(lck, gtid); 
-} 
- 
-#endif // KMP_USE_TSX 
- 
-// Entry functions for indirect locks (first element of direct lock jump tables). 
-static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 
-static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 
-static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 
-static int  __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 
-static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 
-static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 
-static int  __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 
-static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 
- 
-// 
-// Jump tables for the indirect lock functions. 
-// Only fill in the odd entries, that avoids the need to shift out the low bit. 
-// 
- 
-// init functions 
-#define expand(l, op) 0,__kmp_init_direct_lock, 
-void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 
-    = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; 
-#undef expand 
- 
-// destroy functions 
-#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 
-void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) 
-    = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; 
-#undef expand 
- 
-// set/acquire functions 
-#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 
-static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) 
-    = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 
-#undef expand 
-#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 
-static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) 
-    = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 
-#undef expand 
- 
-// unset/release and test functions 
-#define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 
-static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) 
-    = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; 
-static int  (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) 
-    = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; 
-#undef expand 
-#define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 
-static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) 
-    = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; 
-static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) 
-    = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; 
-#undef expand 
- 
-// Exposes only one set of jump tables (*lock or *lock_with_checks). 
-void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 
-int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 
-int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 
- 
-// 
-// Jump tables for the indirect lock functions. 
-// 
-#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 
-void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; 
-void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; 
-#undef expand 
- 
-// set/acquire functions 
-#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 
-static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 
-#undef expand 
-#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 
-static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 
-#undef expand 
- 
-// unset/release and test functions 
-#define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 
-static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 
-static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 
-#undef expand 
-#define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 
-static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 
-static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 
-#undef expand 
- 
-// Exposes only one jump tables (*lock or *lock_with_checks). 
-void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 
-int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 
-int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 
- 
-// Lock index table. 
-kmp_indirect_lock_table_t __kmp_i_lock_table; 
- 
-// Size of indirect locks. 
-static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; 
- 
-// Jump tables for lock accessor/modifier. 
-void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 
-void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 
-const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 
-kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 
- 
-// Use different lock pools for different lock types. 
-static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; 
- 
-// User lock allocator for dynamically dispatched indirect locks. 
-// Every entry of the indirect lock table holds the address and type of the allocated indrect lock 
-// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock 
-// object is returned to the reusable pool of locks, unique to each lock type. 
-kmp_indirect_lock_t * 
-__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 
-{ 
-    kmp_indirect_lock_t *lck; 
-    kmp_lock_index_t idx; 
- 
-    __kmp_acquire_lock(&__kmp_global_lock, gtid); 
- 
-    if (__kmp_indirect_lock_pool[tag] != NULL) { 
-        // Reuse the allocated and destroyed lock object 
-        lck = __kmp_indirect_lock_pool[tag]; 
-        if (OMP_LOCK_T_SIZE < sizeof(void *)) 
-            idx = lck->lock->pool.index; 
-        __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 
-        KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); 
-    } else { 
-        idx = __kmp_i_lock_table.next; 
-        // Check capacity and double the size if it is full 
-        if (idx == __kmp_i_lock_table.size) { 
-            // Double up the space for block pointers 
-            int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; 
-            kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 
-            __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); 
-            KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); 
-            __kmp_free(old_table); 
-            // Allocate new objects in the new blocks 
-            for (int i = row; i < 2*row; ++i) 
-                *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) 
-                                                  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 
-            __kmp_i_lock_table.size = 2*idx; 
-        } 
-        __kmp_i_lock_table.next++; 
-        lck = KMP_GET_I_LOCK(idx); 
-        // Allocate a new base lock object 
-        lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 
-        KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 
-    } 
- 
-    __kmp_release_lock(&__kmp_global_lock, gtid); 
- 
-    lck->type = tag; 
- 
-    if (OMP_LOCK_T_SIZE < sizeof(void *)) { 
-        *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 
-    } else { 
-        *((kmp_indirect_lock_t **)user_lock) = lck; 
-    } 
- 
-    return lck; 
-} 
- 
-// User lock lookup for dynamically dispatched locks. 
-static __forceinline 
-kmp_indirect_lock_t * 
-__kmp_lookup_indirect_lock(void **user_lock, const char *func) 
-{ 
-    if (__kmp_env_consistency_check) { 
-        kmp_indirect_lock_t *lck = NULL; 
-        if (user_lock == NULL) { 
-            KMP_FATAL(LockIsUninitialized, func); 
-        } 
-        if (OMP_LOCK_T_SIZE < sizeof(void *)) { 
-            kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 
-            if (idx >= __kmp_i_lock_table.size) { 
-                KMP_FATAL(LockIsUninitialized, func); 
-            } 
-            lck = KMP_GET_I_LOCK(idx); 
-        } else { 
-            lck = *((kmp_indirect_lock_t **)user_lock); 
-        } 
-        if (lck == NULL) { 
-            KMP_FATAL(LockIsUninitialized, func); 
-        } 
-        return lck;  
-    } else { 
-        if (OMP_LOCK_T_SIZE < sizeof(void *)) { 
-            return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 
-        } else { 
-            return *((kmp_indirect_lock_t **)user_lock); 
-        } 
-    } 
-} 
- 
-static void 
-__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 
-{ 
-#if KMP_USE_ADAPTIVE_LOCKS 
-    if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 
-        KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 
-        seq = lockseq_queuing; 
-    } 
-#endif 
-#if KMP_USE_TSX 
-    if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 
-        seq = lockseq_queuing; 
-    } 
-#endif 
-    kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 
-    kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 
-    KMP_I_LOCK_FUNC(l, init)(l->lock); 
-    KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); 
-} 
- 
-static void 
-__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 
-{ 
-    kmp_uint32 gtid = __kmp_entry_gtid(); 
-    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 
-    KMP_I_LOCK_FUNC(l, destroy)(l->lock); 
-    kmp_indirect_locktag_t tag = l->type; 
- 
-    __kmp_acquire_lock(&__kmp_global_lock, gtid); 
- 
-    // Use the base lock's space to keep the pool chain. 
-    l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 
-    if (OMP_LOCK_T_SIZE < sizeof(void *)) { 
-        l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 
-    } 
-    __kmp_indirect_lock_pool[tag] = l; 
- 
-    __kmp_release_lock(&__kmp_global_lock, gtid); 
-} 
- 
-static void 
-__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 
-{ 
-    kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 
-    KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 
-} 
- 
-static int 
-__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 
-{ 
-    kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 
-    return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 
-} 
- 
-static int 
-__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 
-{ 
-    kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 
-    return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 
-} 
- 
-static void 
-__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 
-{ 
-    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 
-    KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 
-} 
- 
-static int 
-__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 
-{ 
-    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 
-    return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 
-} 
- 
-static int 
-__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 
-{ 
-    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 
-    return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 
-} 
- 
-kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 
- 
-// This is used only in kmp_error.c when consistency checking is on. 
-kmp_int32 
-__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 
-{ 
-    switch (seq) { 
-        case lockseq_tas: 
-        case lockseq_nested_tas: 
-            return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 
-#if KMP_HAS_FUTEX 
-        case lockseq_futex: 
-        case lockseq_nested_futex: 
-            return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 
-#endif 
-        case lockseq_ticket: 
-        case lockseq_nested_ticket: 
-            return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 
-        case lockseq_queuing: 
-        case lockseq_nested_queuing: 
-#if KMP_USE_ADAPTIVE_LOCKS 
-        case lockseq_adaptive: 
-            return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 
-#endif 
-        case lockseq_drdpa: 
-        case lockseq_nested_drdpa: 
-            return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 
-        default: 
-            return 0; 
-    } 
-} 
- 
-// Initializes data for dynamic user locks. 
-void 
-__kmp_init_dynamic_user_locks() 
-{ 
-    // Initialize jump table for the lock functions 
-    if (__kmp_env_consistency_check) { 
-        __kmp_direct_set     = direct_set_check; 
-        __kmp_direct_unset   = direct_unset_check; 
-        __kmp_direct_test    = direct_test_check; 
-        __kmp_indirect_set   = indirect_set_check; 
-        __kmp_indirect_unset = indirect_unset_check; 
-        __kmp_indirect_test  = indirect_test_check; 
-    } 
-    else { 
-        __kmp_direct_set     = direct_set; 
-        __kmp_direct_unset   = direct_unset; 
-        __kmp_direct_test    = direct_test; 
-        __kmp_indirect_set   = indirect_set; 
-        __kmp_indirect_unset = indirect_unset; 
-        __kmp_indirect_test  = indirect_test; 
-    } 
- 
-    // Initialize lock index table 
-    __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 
-    __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 
-    *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) 
-                                  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));  
-    __kmp_i_lock_table.next = 0; 
- 
-    // Indirect lock size 
-    __kmp_indirect_lock_size[locktag_ticket]         = sizeof(kmp_ticket_lock_t); 
-    __kmp_indirect_lock_size[locktag_queuing]        = sizeof(kmp_queuing_lock_t); 
-#if KMP_USE_ADAPTIVE_LOCKS 
-    __kmp_indirect_lock_size[locktag_adaptive]       = sizeof(kmp_adaptive_lock_t); 
-#endif 
-    __kmp_indirect_lock_size[locktag_drdpa]          = sizeof(kmp_drdpa_lock_t); 
-#if KMP_USE_TSX 
-    __kmp_indirect_lock_size[locktag_rtm]            = sizeof(kmp_queuing_lock_t); 
-#endif 
-    __kmp_indirect_lock_size[locktag_nested_tas]     = sizeof(kmp_tas_lock_t); 
-#if KMP_USE_FUTEX 
-    __kmp_indirect_lock_size[locktag_nested_futex]   = sizeof(kmp_futex_lock_t); 
-#endif 
-    __kmp_indirect_lock_size[locktag_nested_ticket]  = sizeof(kmp_ticket_lock_t); 
-    __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 
-    __kmp_indirect_lock_size[locktag_nested_drdpa]   = sizeof(kmp_drdpa_lock_t); 
- 
-    // Initialize lock accessor/modifier 
-#define fill_jumps(table, expand, sep) {            \ 
-    table[locktag##sep##ticket]  = expand(ticket);  \ 
-    table[locktag##sep##queuing] = expand(queuing); \ 
-    table[locktag##sep##drdpa]   = expand(drdpa);   \ 
-} 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
-# define fill_table(table, expand) {           \ 
-    fill_jumps(table, expand, _);              \ 
-    table[locktag_adaptive] = expand(queuing); \ 
-    fill_jumps(table, expand, _nested_);       \ 
-} 
-#else 
-# define fill_table(table, expand) {           \ 
-    fill_jumps(table, expand, _);              \ 
-    fill_jumps(table, expand, _nested_);       \ 
-} 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
-#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 
-    fill_table(__kmp_indirect_set_location, expand); 
-#undef expand 
-#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 
-    fill_table(__kmp_indirect_set_flags, expand); 
-#undef expand 
-#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 
-    fill_table(__kmp_indirect_get_location, expand); 
-#undef expand 
-#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 
-    fill_table(__kmp_indirect_get_flags, expand); 
-#undef expand 
- 
-    __kmp_init_user_locks = TRUE; 
-} 
- 
-// Clean up the lock table. 
-void 
-__kmp_cleanup_indirect_user_locks() 
-{ 
-    kmp_lock_index_t i; 
-    int k; 
- 
-    // Clean up locks in the pools first (they were already destroyed before going into the pools). 
-    for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 
-        kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 
-        while (l != NULL) { 
-            kmp_indirect_lock_t *ll = l; 
-            l = (kmp_indirect_lock_t *)l->lock->pool.next; 
-            KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); 
-            __kmp_free(ll->lock); 
-            ll->lock = NULL; 
-        } 
-    } 
-    // Clean up the remaining undestroyed locks. 
-    for (i = 0; i < __kmp_i_lock_table.next; i++) { 
-        kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 
-        if (l->lock != NULL) { 
-            // Locks not destroyed explicitly need to be destroyed here. 
-            KMP_I_LOCK_FUNC(l, destroy)(l->lock); 
-            KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); 
-            __kmp_free(l->lock); 
-        } 
-    } 
-    // Free the table 
-    for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 
-        __kmp_free(__kmp_i_lock_table.table[i]); 
-    __kmp_free(__kmp_i_lock_table.table); 
- 
-    __kmp_init_user_locks = FALSE; 
-} 
- 
-enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 
-int __kmp_num_locks_in_block = 1;             // FIXME - tune this value 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-/* ------------------------------------------------------------------------ */ 
-/* user locks 
- * 
- * They are implemented as a table of function pointers which are set to the 
- * lock functions of the appropriate kind, once that has been determined. 
- */ 
- 
-enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 
- 
-size_t __kmp_base_user_lock_size = 0; 
-size_t __kmp_user_lock_size = 0; 
- 
-kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 
-int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 
- 
-int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 
-int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 
-void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 
-void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 
-void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 
-int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 
- 
-int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 
-int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 
-void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 
-void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 
- 
-int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 
-const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 
-void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 
-kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 
-void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 
- 
-void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 
-{ 
-    switch ( user_lock_kind ) { 
-        case lk_default: 
-        default: 
-        KMP_ASSERT( 0 ); 
- 
-        case lk_tas: { 
-            __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 
-            __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 
- 
-            __kmp_get_user_lock_owner_ = 
-              ( kmp_int32 ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_tas_lock_owner ); 
- 
-            if ( __kmp_env_consistency_check ) { 
-                KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 
-                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 
-            } 
-            else { 
-                KMP_BIND_USER_LOCK(tas); 
-                KMP_BIND_NESTED_USER_LOCK(tas); 
-            } 
- 
-            __kmp_destroy_user_lock_ = 
-              ( void ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_destroy_tas_lock ); 
- 
-             __kmp_is_user_lock_initialized_ = 
-               ( int ( * )( kmp_user_lock_p ) ) NULL; 
- 
-             __kmp_get_user_lock_location_ = 
-               ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 
- 
-             __kmp_set_user_lock_location_ = 
-               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 
- 
-             __kmp_get_user_lock_flags_ = 
-               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 
- 
-             __kmp_set_user_lock_flags_ = 
-               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 
-        } 
-        break; 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 
- 
-        case lk_futex: { 
-            __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 
-            __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 
- 
-            __kmp_get_user_lock_owner_ = 
-              ( kmp_int32 ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_futex_lock_owner ); 
- 
-            if ( __kmp_env_consistency_check ) { 
-                KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 
-                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 
-            } 
-            else { 
-                KMP_BIND_USER_LOCK(futex); 
-                KMP_BIND_NESTED_USER_LOCK(futex); 
-            } 
- 
-            __kmp_destroy_user_lock_ = 
-              ( void ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_destroy_futex_lock ); 
- 
-             __kmp_is_user_lock_initialized_ = 
-               ( int ( * )( kmp_user_lock_p ) ) NULL; 
- 
-             __kmp_get_user_lock_location_ = 
-               ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 
- 
-             __kmp_set_user_lock_location_ = 
-               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 
- 
-             __kmp_get_user_lock_flags_ = 
-               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 
- 
-             __kmp_set_user_lock_flags_ = 
-               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 
-        } 
-        break; 
- 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 
- 
-        case lk_ticket: { 
-            __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 
-            __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 
- 
-            __kmp_get_user_lock_owner_ = 
-              ( kmp_int32 ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_ticket_lock_owner ); 
- 
-            if ( __kmp_env_consistency_check ) { 
-                KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 
-                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 
-            } 
-            else { 
-                KMP_BIND_USER_LOCK(ticket); 
-                KMP_BIND_NESTED_USER_LOCK(ticket); 
-            } 
- 
-            __kmp_destroy_user_lock_ = 
-              ( void ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_destroy_ticket_lock ); 
- 
-             __kmp_is_user_lock_initialized_ = 
-               ( int ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_is_ticket_lock_initialized ); 
- 
-             __kmp_get_user_lock_location_ = 
-               ( const ident_t * ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_get_ticket_lock_location ); 
- 
-             __kmp_set_user_lock_location_ = 
-               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 
-               ( &__kmp_set_ticket_lock_location ); 
- 
-             __kmp_get_user_lock_flags_ = 
-               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_get_ticket_lock_flags ); 
- 
-             __kmp_set_user_lock_flags_ = 
-               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 
-               ( &__kmp_set_ticket_lock_flags ); 
-        } 
-        break; 
- 
-        case lk_queuing: { 
-            __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 
-            __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 
- 
-            __kmp_get_user_lock_owner_ = 
-              ( kmp_int32 ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_queuing_lock_owner ); 
- 
-            if ( __kmp_env_consistency_check ) { 
-                KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 
-                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 
-            } 
-            else { 
-                KMP_BIND_USER_LOCK(queuing); 
-                KMP_BIND_NESTED_USER_LOCK(queuing); 
-            } 
- 
-            __kmp_destroy_user_lock_ = 
-              ( void ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_destroy_queuing_lock ); 
- 
-             __kmp_is_user_lock_initialized_ = 
-               ( int ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_is_queuing_lock_initialized ); 
- 
-             __kmp_get_user_lock_location_ = 
-               ( const ident_t * ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_get_queuing_lock_location ); 
- 
-             __kmp_set_user_lock_location_ = 
-               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 
-               ( &__kmp_set_queuing_lock_location ); 
- 
-             __kmp_get_user_lock_flags_ = 
-               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_get_queuing_lock_flags ); 
- 
-             __kmp_set_user_lock_flags_ = 
-               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 
-               ( &__kmp_set_queuing_lock_flags ); 
-        } 
-        break; 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
-        case lk_adaptive: { 
-            __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 
-            __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 
- 
-            __kmp_get_user_lock_owner_ = 
-              ( kmp_int32 ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_queuing_lock_owner ); 
- 
-            if ( __kmp_env_consistency_check ) { 
-                KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 
-            } 
-            else { 
-                KMP_BIND_USER_LOCK(adaptive); 
-            } 
- 
-            __kmp_destroy_user_lock_ = 
-              ( void ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_destroy_adaptive_lock ); 
- 
-            __kmp_is_user_lock_initialized_ = 
-              ( int ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_is_queuing_lock_initialized ); 
- 
-            __kmp_get_user_lock_location_ = 
-              ( const ident_t * ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_queuing_lock_location ); 
- 
-            __kmp_set_user_lock_location_ = 
-              ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 
-              ( &__kmp_set_queuing_lock_location ); 
- 
-            __kmp_get_user_lock_flags_ = 
-              ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_queuing_lock_flags ); 
- 
-            __kmp_set_user_lock_flags_ = 
-              ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 
-              ( &__kmp_set_queuing_lock_flags ); 
- 
-        } 
-        break; 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
-        case lk_drdpa: { 
-            __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 
-            __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 
- 
-            __kmp_get_user_lock_owner_ = 
-              ( kmp_int32 ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_get_drdpa_lock_owner ); 
- 
-            if ( __kmp_env_consistency_check ) { 
-                KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 
-                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 
-            } 
-            else { 
-                KMP_BIND_USER_LOCK(drdpa); 
-                KMP_BIND_NESTED_USER_LOCK(drdpa); 
-            } 
- 
-            __kmp_destroy_user_lock_ = 
-              ( void ( * )( kmp_user_lock_p ) ) 
-              ( &__kmp_destroy_drdpa_lock ); 
- 
-             __kmp_is_user_lock_initialized_ = 
-               ( int ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_is_drdpa_lock_initialized ); 
- 
-             __kmp_get_user_lock_location_ = 
-               ( const ident_t * ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_get_drdpa_lock_location ); 
- 
-             __kmp_set_user_lock_location_ = 
-               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 
-               ( &__kmp_set_drdpa_lock_location ); 
- 
-             __kmp_get_user_lock_flags_ = 
-               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 
-               ( &__kmp_get_drdpa_lock_flags ); 
- 
-             __kmp_set_user_lock_flags_ = 
-               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 
-               ( &__kmp_set_drdpa_lock_flags ); 
-        } 
-        break; 
-    } 
-} 
- 
- 
-// ---------------------------------------------------------------------------- 
-// User lock table & lock allocation 
- 
-kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 
-kmp_user_lock_p __kmp_lock_pool = NULL; 
- 
-// Lock block-allocation support. 
-kmp_block_of_locks* __kmp_lock_blocks = NULL; 
-int __kmp_num_locks_in_block = 1;             // FIXME - tune this value 
- 
-static kmp_lock_index_t 
-__kmp_lock_table_insert( kmp_user_lock_p lck ) 
-{ 
-    // Assume that kmp_global_lock is held upon entry/exit. 
-    kmp_lock_index_t index; 
-    if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 
-        kmp_lock_index_t size; 
-        kmp_user_lock_p *table; 
-        // Reallocate lock table. 
-        if ( __kmp_user_lock_table.allocated == 0 ) { 
-            size = 1024; 
-        } 
-        else { 
-            size = __kmp_user_lock_table.allocated * 2; 
-        } 
-        table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 
-        KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 
-        table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 
-            // We cannot free the previous table now, since it may be in use by other 
-            // threads. So save the pointer to the previous table in in the first element of the 
-            // new table. All the tables will be organized into a list, and could be freed when 
-            // library shutting down. 
-        __kmp_user_lock_table.table = table; 
-        __kmp_user_lock_table.allocated = size; 
-    } 
-    KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 
-    index = __kmp_user_lock_table.used; 
-    __kmp_user_lock_table.table[ index ] = lck; 
-    ++ __kmp_user_lock_table.used; 
-    return index; 
-} 
- 
-static kmp_user_lock_p 
-__kmp_lock_block_allocate() 
-{ 
-    // Assume that kmp_global_lock is held upon entry/exit. 
-    static int last_index = 0; 
-    if ( ( last_index >= __kmp_num_locks_in_block ) 
-      || ( __kmp_lock_blocks == NULL ) ) { 
-        // Restart the index. 
-        last_index = 0; 
-        // Need to allocate a new block. 
-        KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 
-        size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 
-        char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 
-        // Set up the new block. 
-        kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 
-        new_block->next_block = __kmp_lock_blocks; 
-        new_block->locks = (void *)buffer; 
-        // Publish the new block. 
-        KMP_MB(); 
-        __kmp_lock_blocks = new_block; 
-    } 
-    kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 
-      [ last_index * __kmp_user_lock_size ] ) ); 
-    last_index++; 
-    return ret; 
-} 
- 
-// 
-// Get memory for a lock. It may be freshly allocated memory or reused memory 
-// from lock pool. 
-// 
-kmp_user_lock_p 
-__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 
-  kmp_lock_flags_t flags ) 
-{ 
-    kmp_user_lock_p lck; 
-    kmp_lock_index_t index; 
-    KMP_DEBUG_ASSERT( user_lock ); 
- 
-    __kmp_acquire_lock( &__kmp_global_lock, gtid ); 
- 
-    if ( __kmp_lock_pool == NULL ) { 
-        // Lock pool is empty. Allocate new memory. 
-        if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 
-            lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 
-        } 
-        else { 
-            lck = __kmp_lock_block_allocate(); 
-        } 
- 
-        // Insert lock in the table so that it can be freed in __kmp_cleanup, 
-        // and debugger has info on all allocated locks. 
-        index = __kmp_lock_table_insert( lck ); 
-    } 
-    else { 
-        // Pick up lock from pool. 
-        lck = __kmp_lock_pool; 
-        index = __kmp_lock_pool->pool.index; 
-        __kmp_lock_pool = __kmp_lock_pool->pool.next; 
-    } 
- 
-    // 
-    // We could potentially differentiate between nested and regular locks 
-    // here, and do the lock table lookup for regular locks only. 
-    // 
-    if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 
-        * ( (kmp_lock_index_t *) user_lock ) = index; 
-    } 
-    else { 
-        * ( (kmp_user_lock_p *) user_lock ) = lck; 
-    } 
- 
-    // mark the lock if it is critical section lock. 
-    __kmp_set_user_lock_flags( lck, flags ); 
- 
-    __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 
- 
-    return lck; 
-} 
- 
-// Put lock's memory to pool for reusing. 
-void 
-__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( user_lock != NULL ); 
-    KMP_DEBUG_ASSERT( lck != NULL ); 
- 
-    __kmp_acquire_lock( & __kmp_global_lock, gtid ); 
- 
-    lck->pool.next = __kmp_lock_pool; 
-    __kmp_lock_pool = lck; 
-    if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 
-        kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 
-        KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 
-        lck->pool.index = index; 
-    } 
- 
-    __kmp_release_lock( & __kmp_global_lock, gtid ); 
-} 
- 
-kmp_user_lock_p 
-__kmp_lookup_user_lock( void **user_lock, char const *func ) 
-{ 
-    kmp_user_lock_p lck = NULL; 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( user_lock == NULL ) { 
-            KMP_FATAL( LockIsUninitialized, func ); 
-        } 
-    } 
- 
-    if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 
-        kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 
-        if ( __kmp_env_consistency_check ) { 
-            if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 
-                KMP_FATAL( LockIsUninitialized, func ); 
-            } 
-        } 
-        KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 
-        KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 
-        lck = __kmp_user_lock_table.table[index]; 
-    } 
-    else { 
-        lck = *( (kmp_user_lock_p *)user_lock ); 
-    } 
- 
-    if ( __kmp_env_consistency_check ) { 
-        if ( lck == NULL ) { 
-            KMP_FATAL( LockIsUninitialized, func ); 
-        } 
-    } 
- 
-    return lck; 
-} 
- 
-void 
-__kmp_cleanup_user_locks( void ) 
-{ 
-    // 
-    // Reset lock pool. Do not worry about lock in the pool -- we will free 
-    // them when iterating through lock table (it includes all the locks, 
-    // dead or alive). 
-    // 
-    __kmp_lock_pool = NULL; 
- 
-#define IS_CRITICAL(lck) \ 
-        ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 
-        ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 
- 
-    // 
-    // Loop through lock table, free all locks. 
-    // 
-    // Do not free item [0], it is reserved for lock tables list. 
-    // 
-    // FIXME - we are iterating through a list of (pointers to) objects of 
-    // type union kmp_user_lock, but we have no way of knowing whether the 
-    // base type is currently "pool" or whatever the global user lock type 
-    // is. 
-    // 
-    // We are relying on the fact that for all of the user lock types 
-    // (except "tas"), the first field in the lock struct is the "initialized" 
-    // field, which is set to the address of the lock object itself when 
-    // the lock is initialized.  When the union is of type "pool", the 
-    // first field is a pointer to the next object in the free list, which 
-    // will not be the same address as the object itself. 
-    // 
-    // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 
-    // will fail for "pool" objects on the free list.  This must happen as 
-    // the "location" field of real user locks overlaps the "index" field 
-    // of "pool" objects. 
-    // 
-    // It would be better to run through the free list, and remove all "pool" 
-    // objects from the lock table before executing this loop.  However, 
-    // "pool" objects do not always have their index field set (only on 
-    // lin_32e), and I don't want to search the lock table for the address 
-    // of every "pool" object on the free list. 
-    // 
-    while ( __kmp_user_lock_table.used > 1 ) { 
-        const ident *loc; 
- 
-        // 
-        // reduce __kmp_user_lock_table.used before freeing the lock, 
-        // so that state of locks is consistent 
-        // 
-        kmp_user_lock_p lck = __kmp_user_lock_table.table[ 
-          --__kmp_user_lock_table.used ]; 
- 
-        if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 
-          ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 
-            // 
-            // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 
-            // initialized AND it is NOT a critical section (user is not 
-            // responsible for destroying criticals) AND we know source 
-            // location to report. 
-            // 
-            if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 
-              ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 
-              ( loc->psource != NULL ) ) { 
-                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 
-                KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 
-                __kmp_str_loc_free( &str_loc); 
-            } 
- 
-#ifdef KMP_DEBUG 
-            if ( IS_CRITICAL( lck ) ) { 
-                KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 
-            } 
-            else { 
-                KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 
-            } 
-#endif // KMP_DEBUG 
- 
-            // 
-            // Cleanup internal lock dynamic resources 
-            // (for drdpa locks particularly). 
-            // 
-            __kmp_destroy_user_lock( lck ); 
-        } 
- 
-        // 
-        // Free the lock if block allocation of locks is not used. 
-        // 
-        if ( __kmp_lock_blocks == NULL ) { 
-            __kmp_free( lck ); 
-        } 
-    } 
- 
-#undef IS_CRITICAL 
- 
-    // 
-    // delete lock table(s). 
-    // 
-    kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 
-    __kmp_user_lock_table.table = NULL; 
-    __kmp_user_lock_table.allocated = 0; 
- 
-    while ( table_ptr != NULL ) { 
-        // 
-        // In the first element we saved the pointer to the previous 
-        // (smaller) lock table. 
-        // 
-        kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 
-        __kmp_free( table_ptr ); 
-        table_ptr = next; 
-    } 
- 
-    // 
-    // Free buffers allocated for blocks of locks. 
-    // 
-    kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 
-    __kmp_lock_blocks = NULL; 
- 
-    while ( block_ptr != NULL ) { 
-        kmp_block_of_locks_t *next = block_ptr->next_block; 
-        __kmp_free( block_ptr->locks ); 
-        // 
-        // *block_ptr itself was allocated at the end of the locks vector. 
-        // 
-	block_ptr = next; 
-    } 
- 
-    TCW_4(__kmp_init_user_locks, FALSE); 
-} 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
+#endif
+
+    KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+#ifdef KMP_DEBUG
+    this_thr = __kmp_thread_from_gtid( gtid );
+    KMP_DEBUG_ASSERT( this_thr != NULL );
+    KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
+#endif
+
+    head = *head_id_p;
+
+    if ( head == 0 ) { /* nobody on queue, nobody holding */
+
+        /* try (0,0)->(-1,0) */
+
+        if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
+            KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
+            KMP_FSYNC_ACQUIRED(lck);
+            return TRUE;
+        }
+    }
+
+    KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
+    return FALSE;
+}
+
+static int
+__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+
+    int retval = __kmp_test_queuing_lock( lck, gtid );
+
+    if ( retval ) {
+        lck->lk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+int
+__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_info_t *this_thr;
+    volatile kmp_int32 *head_id_p = & lck->lk.head_id;
+    volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
+
+    KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+    this_thr    = __kmp_thread_from_gtid( gtid );
+    KMP_DEBUG_ASSERT( this_thr != NULL );
+#ifdef DEBUG_QUEUING_LOCKS
+    TRACE_LOCK( gtid+1, "rel ent" );
+
+    if ( this_thr->th.th_spin_here )
+        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
+    if ( this_thr->th.th_next_waiting != 0 )
+        __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
+#endif
+    KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
+    KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
+
+    KMP_FSYNC_RELEASING(lck);
+
+    while( 1 ) {
+        kmp_int32 dequeued;
+        kmp_int32 head;
+        kmp_int32 tail;
+
+        head = *head_id_p;
+
+#ifdef DEBUG_QUEUING_LOCKS
+        tail = *tail_id_p;
+        TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
+        if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
+#endif
+        KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
+
+        if ( head == -1 ) { /* nobody on queue */
+
+            /* try (-1,0)->(0,0) */
+            if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
+                KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
+                          lck, gtid ));
+#ifdef DEBUG_QUEUING_LOCKS
+                TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
+#endif
+
+#if OMPT_SUPPORT
+                /* nothing to do - no other thread is trying to shift blame */
+#endif
+
+                return KMP_LOCK_RELEASED;
+            }
+            dequeued = FALSE;
+
+        }
+        else {
+
+            tail = *tail_id_p;
+            if ( head == tail ) {  /* only one thread on the queue */
+
+#ifdef DEBUG_QUEUING_LOCKS
+                if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
+#endif
+                KMP_DEBUG_ASSERT( head > 0 );
+
+                /* try (h,h)->(-1,0) */
+                dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
+                  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
+#ifdef DEBUG_QUEUING_LOCKS
+                TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
+#endif
+
+            }
+            else {
+                volatile kmp_int32 *waiting_id_p;
+                kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
+                KMP_DEBUG_ASSERT( head_thr != NULL );
+                waiting_id_p = & head_thr->th.th_next_waiting;
+
+                /* Does this require synchronous reads? */
+#ifdef DEBUG_QUEUING_LOCKS
+                if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
+#endif
+                KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
+
+                /* try (h,t)->(h',t) or (t,t) */
+
+                KMP_MB();
+                /* make sure enqueuing thread has time to update next waiting thread field */
+                *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
+#ifdef DEBUG_QUEUING_LOCKS
+                TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
+#endif
+                dequeued = TRUE;
+            }
+        }
+
+        if ( dequeued ) {
+            kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
+            KMP_DEBUG_ASSERT( head_thr != NULL );
+
+            /* Does this require synchronous reads? */
+#ifdef DEBUG_QUEUING_LOCKS
+            if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
+#endif
+            KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
+
+            /* For clean code only.
+             * Thread not released until next statement prevents race with acquire code.
+             */
+            head_thr->th.th_next_waiting = 0;
+#ifdef DEBUG_QUEUING_LOCKS
+            TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
+#endif
+
+            KMP_MB();
+            /* reset spin value */
+            head_thr->th.th_spin_here = FALSE;
+
+            KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
+                      lck, gtid ));
+#ifdef DEBUG_QUEUING_LOCKS
+            TRACE_LOCK( gtid+1, "rel exit 2" );
+#endif
+            return KMP_LOCK_RELEASED;
+        }
+        /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
+
+#ifdef DEBUG_QUEUING_LOCKS
+        TRACE_LOCK( gtid+1, "rel retry" );
+#endif
+
+    } /* while */
+    KMP_ASSERT2( 0, "should not get here" );
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
+  kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    lck->lk.owner_id = 0;
+    return __kmp_release_queuing_lock( lck, gtid );
+}
+
+void
+__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
+{
+    lck->lk.location = NULL;
+    lck->lk.head_id = 0;
+    lck->lk.tail_id = 0;
+    lck->lk.next_ticket = 0;
+    lck->lk.now_serving = 0;
+    lck->lk.owner_id = 0;      // no thread owns the lock.
+    lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
+    lck->lk.initialized = lck;
+
+    KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
+}
+
+static void
+__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
+{
+    __kmp_init_queuing_lock( lck );
+}
+
+void
+__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
+{
+    lck->lk.initialized = NULL;
+    lck->lk.location = NULL;
+    lck->lk.head_id = 0;
+    lck->lk.tail_id = 0;
+    lck->lk.next_ticket = 0;
+    lck->lk.now_serving = 0;
+    lck->lk.owner_id = 0;
+    lck->lk.depth_locked = -1;
+}
+
+static void
+__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
+{
+    char const * const func = "omp_destroy_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_queuing_lock( lck );
+}
+
+
+//
+// nested queuing locks
+//
+
+int
+__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
+        lck->lk.depth_locked += 1;
+        return KMP_LOCK_ACQUIRED_NEXT;
+    }
+    else {
+        __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
+        KMP_MB();
+        lck->lk.depth_locked = 1;
+        KMP_MB();
+        lck->lk.owner_id = gtid + 1;
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+}
+
+static int
+__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_acquire_nested_queuing_lock( lck, gtid );
+}
+
+int
+__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    int retval;
+
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
+        retval = ++lck->lk.depth_locked;
+    }
+    else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
+        retval = 0;
+    }
+    else {
+        KMP_MB();
+        retval = lck->lk.depth_locked = 1;
+        KMP_MB();
+        lck->lk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+static int
+__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
+  kmp_int32 gtid )
+{
+    char const * const func = "omp_test_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_test_nested_queuing_lock( lck, gtid );
+}
+
+int
+__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    KMP_MB();
+    if ( --(lck->lk.depth_locked) == 0 ) {
+        KMP_MB();
+        lck->lk.owner_id = 0;
+        __kmp_release_queuing_lock( lck, gtid );
+        return KMP_LOCK_RELEASED;
+    }
+    return KMP_LOCK_STILL_HELD;
+}
+
+static int
+__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_nest_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_nested_queuing_lock( lck, gtid );
+}
+
+void
+__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
+{
+    __kmp_init_queuing_lock( lck );
+    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
+}
+
+static void
+__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
+{
+    __kmp_init_nested_queuing_lock( lck );
+}
+
+void
+__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
+{
+    __kmp_destroy_queuing_lock( lck );
+    lck->lk.depth_locked = 0;
+}
+
+static void
+__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
+{
+    char const * const func = "omp_destroy_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_nested_queuing_lock( lck );
+}
+
+
+//
+// access functions to fields which don't exist for all lock kinds.
+//
+
+static int
+__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
+{
+    return lck == lck->lk.initialized;
+}
+
+static const ident_t *
+__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
+{
+    return lck->lk.location;
+}
+
+static void
+__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
+{
+    lck->lk.location = loc;
+}
+
+static kmp_lock_flags_t
+__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
+{
+    return lck->lk.flags;
+}
+
+static void
+__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
+{
+    lck->lk.flags = flags;
+}
+
+#if KMP_USE_ADAPTIVE_LOCKS
+
+/*
+    RTM Adaptive locks
+*/
+
+#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
+
+#include <immintrin.h>
+#define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
+
+#else
+
+// Values from the status register after failed speculation.
+#define _XBEGIN_STARTED          (~0u)
+#define _XABORT_EXPLICIT         (1 << 0)
+#define _XABORT_RETRY            (1 << 1)
+#define _XABORT_CONFLICT         (1 << 2)
+#define _XABORT_CAPACITY         (1 << 3)
+#define _XABORT_DEBUG            (1 << 4)
+#define _XABORT_NESTED           (1 << 5)
+#define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
+
+// Aborts for which it's worth trying again immediately
+#define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
+
+#define STRINGIZE_INTERNAL(arg) #arg
+#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
+
+// Access to RTM instructions
+
+/*
+  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
+  This is the same definition as the compiler intrinsic that will be supported at some point.
+*/
+static __inline int _xbegin()
+{
+    int res = -1;
+
+#if KMP_OS_WINDOWS
+#if KMP_ARCH_X86_64
+    _asm {
+        _emit 0xC7
+        _emit 0xF8
+        _emit 2
+        _emit 0
+        _emit 0
+        _emit 0
+        jmp   L2
+        mov   res, eax
+    L2:
+    }
+#else /* IA32 */
+    _asm {
+        _emit 0xC7
+        _emit 0xF8
+        _emit 2
+        _emit 0
+        _emit 0
+        _emit 0
+        jmp   L2
+        mov   res, eax
+    L2:
+    }
+#endif // KMP_ARCH_X86_64
+#else
+    /* Note that %eax must be noted as killed (clobbered), because
+     * the XSR is returned in %eax(%rax) on abort.  Other register
+     * values are restored, so don't need to be killed.
+     *
+     * We must also mark 'res' as an input and an output, since otherwise
+     * 'res=-1' may be dropped as being dead, whereas we do need the
+     * assignment on the successful (i.e., non-abort) path.
+     */
+    __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
+                      "   .long  1f-1b-6\n"
+                      "    jmp   2f\n"
+                      "1:  movl  %%eax,%0\n"
+                      "2:"
+                      :"+r"(res)::"memory","%eax");
+#endif // KMP_OS_WINDOWS
+    return res;
+}
+
+/*
+  Transaction end
+*/
+static __inline void _xend()
+{
+#if KMP_OS_WINDOWS
+    __asm  {
+        _emit 0x0f
+        _emit 0x01
+        _emit 0xd5
+    }
+#else
+    __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
+#endif
+}
+
+/*
+  This is a macro, the argument must be a single byte constant which
+  can be evaluated by the inline assembler, since it is emitted as a
+  byte into the assembly code.
+*/
+#if KMP_OS_WINDOWS
+#define _xabort(ARG)                            \
+    _asm _emit 0xc6                             \
+    _asm _emit 0xf8                             \
+    _asm _emit ARG
+#else
+#define _xabort(ARG) \
+    __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
+#endif
+
+#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
+
+//
+//    Statistics is collected for testing purpose
+//
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+
+// We accumulate speculative lock statistics when the lock is destroyed.
+// We keep locks that haven't been destroyed in the liveLocks list
+// so that we can grab their statistics too.
+static kmp_adaptive_lock_statistics_t destroyedStats;
+
+// To hold the list of live locks.
+static kmp_adaptive_lock_info_t liveLocks;
+
+// A lock so we can safely update the list of locks.
+static kmp_bootstrap_lock_t chain_lock;
+
+// Initialize the list of stats.
+void
+__kmp_init_speculative_stats()
+{
+    kmp_adaptive_lock_info_t *lck = &liveLocks;
+
+    memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
+    lck->stats.next = lck;
+    lck->stats.prev = lck;
+
+    KMP_ASSERT( lck->stats.next->stats.prev == lck );
+    KMP_ASSERT( lck->stats.prev->stats.next == lck );
+
+    __kmp_init_bootstrap_lock( &chain_lock );
+
+}
+
+// Insert the lock into the circular list
+static void
+__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
+{
+    __kmp_acquire_bootstrap_lock( &chain_lock );
+
+    lck->stats.next = liveLocks.stats.next;
+    lck->stats.prev = &liveLocks;
+
+    liveLocks.stats.next = lck;
+    lck->stats.next->stats.prev  = lck;
+
+    KMP_ASSERT( lck->stats.next->stats.prev == lck );
+    KMP_ASSERT( lck->stats.prev->stats.next == lck );
+
+    __kmp_release_bootstrap_lock( &chain_lock );
+}
+
+static void
+__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
+{
+    KMP_ASSERT( lck->stats.next->stats.prev == lck );
+    KMP_ASSERT( lck->stats.prev->stats.next == lck );
+
+    kmp_adaptive_lock_info_t * n = lck->stats.next;
+    kmp_adaptive_lock_info_t * p = lck->stats.prev;
+
+    n->stats.prev = p;
+    p->stats.next = n;
+}
+
+static void
+__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
+{
+    memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
+    __kmp_remember_lock( lck );
+}
+
+static void
+__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
+{
+    kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
+
+    t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
+    t->successfulSpeculations += s->successfulSpeculations;
+    t->hardFailedSpeculations += s->hardFailedSpeculations;
+    t->softFailedSpeculations += s->softFailedSpeculations;
+    t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
+    t->lemmingYields          += s->lemmingYields;
+}
+
+static void
+__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
+{
+    kmp_adaptive_lock_statistics_t *t = &destroyedStats;
+
+    __kmp_acquire_bootstrap_lock( &chain_lock );
+
+    __kmp_add_stats( &destroyedStats, lck );
+    __kmp_forget_lock( lck );
+
+    __kmp_release_bootstrap_lock( &chain_lock );
+}
+
+static float
+percent (kmp_uint32 count, kmp_uint32 total)
+{
+    return (total == 0) ? 0.0: (100.0 * count)/total;
+}
+
+static
+FILE * __kmp_open_stats_file()
+{
+    if (strcmp (__kmp_speculative_statsfile, "-") == 0)
+        return stdout;
+
+    size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
+    char buffer[buffLen];
+    KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
+      (kmp_int32)getpid());
+    FILE * result = fopen(&buffer[0], "w");
+
+    // Maybe we should issue a warning here...
+    return result ? result : stdout;
+}
+
+void
+__kmp_print_speculative_stats()
+{
+    if (__kmp_user_lock_kind != lk_adaptive)
+        return;
+
+    FILE * statsFile = __kmp_open_stats_file();
+
+    kmp_adaptive_lock_statistics_t total = destroyedStats;
+    kmp_adaptive_lock_info_t *lck;
+
+    for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
+        __kmp_add_stats( &total, lck );
+    }
+    kmp_adaptive_lock_statistics_t *t = &total;
+    kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
+    kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
+                                   t->softFailedSpeculations;
+
+    fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
+    fprintf ( statsFile, " Lock parameters: \n"
+             "   max_soft_retries               : %10d\n"
+             "   max_badness                    : %10d\n",
+             __kmp_adaptive_backoff_params.max_soft_retries,
+             __kmp_adaptive_backoff_params.max_badness);
+    fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
+    fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
+    fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
+             t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
+    fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
+             t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
+    fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
+
+    fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
+    fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
+             t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
+    fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
+             t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
+    fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
+             t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
+
+    if (statsFile != stdout)
+        fclose( statsFile );
+}
+
+# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
+#else
+# define KMP_INC_STAT(lck,stat)
+
+#endif // KMP_DEBUG_ADAPTIVE_LOCKS
+
+static inline bool
+__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
+{
+    // It is enough to check that the head_id is zero.
+    // We don't also need to check the tail.
+    bool res = lck->lk.head_id == 0;
+
+    // We need a fence here, since we must ensure that no memory operations
+    // from later in this thread float above that read.
+#if KMP_COMPILER_ICC
+    _mm_mfence();
+#else
+    __sync_synchronize();
+#endif
+
+    return res;
+}
+
+// Functions for manipulating the badness
+static __inline void
+__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
+{
+    // Reset the badness to zero so we eagerly try to speculate again
+    lck->lk.adaptive.badness = 0;
+    KMP_INC_STAT(lck,successfulSpeculations);
+}
+
+// Create a bit mask with one more set bit.
+static __inline void
+__kmp_step_badness( kmp_adaptive_lock_t *lck )
+{
+    kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
+    if ( newBadness > lck->lk.adaptive.max_badness) {
+        return;
+    } else {
+        lck->lk.adaptive.badness = newBadness;
+    }
+}
+
+// Check whether speculation should be attempted.
+static __inline int
+__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_uint32 badness = lck->lk.adaptive.badness;
+    kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
+    int res = (attempts & badness) == 0;
+    return res;
+}
+
+// Attempt to acquire only the speculative lock.
+// Does not back off to the non-speculative lock.
+//
+static int
+__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
+{
+    int retries = lck->lk.adaptive.max_soft_retries;
+
+    // We don't explicitly count the start of speculation, rather we record
+    // the results (success, hard fail, soft fail). The sum of all of those
+    // is the total number of times we started speculation since all
+    // speculations must end one of those ways.
+    do
+    {
+        kmp_uint32 status = _xbegin();
+        // Switch this in to disable actual speculation but exercise
+        // at least some of the rest of the code. Useful for debugging...
+        // kmp_uint32 status = _XABORT_NESTED;
+
+        if (status == _XBEGIN_STARTED )
+        { /* We have successfully started speculation
+           * Check that no-one acquired the lock for real between when we last looked
+           * and now. This also gets the lock cache line into our read-set,
+           * which we need so that we'll abort if anyone later claims it for real.
+           */
+            if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
+            {
+                // Lock is now visibly acquired, so someone beat us to it.
+                // Abort the transaction so we'll restart from _xbegin with the
+                // failure status.
+                _xabort(0x01);
+                KMP_ASSERT2( 0, "should not get here" );
+            }
+            return 1;   // Lock has been acquired (speculatively)
+        } else {
+            // We have aborted, update the statistics
+            if ( status & SOFT_ABORT_MASK)
+            {
+                KMP_INC_STAT(lck,softFailedSpeculations);
+                // and loop round to retry.
+            }
+            else
+            {
+                KMP_INC_STAT(lck,hardFailedSpeculations);
+                // Give up if we had a hard failure.
+                break;
+            }
+        }
+    }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
+
+    // Either we had a hard failure or we didn't succeed softly after
+    // the full set of attempts, so back off the badness.
+    __kmp_step_badness( lck );
+    return 0;
+}
+
+// Attempt to acquire the speculative lock, or back off to the non-speculative one
+// if the speculative lock cannot be acquired.
+// We can succeed speculatively, non-speculatively, or fail.
+static int
+__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
+{
+    // First try to acquire the lock speculatively
+    if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
+        return 1;
+
+    // Speculative acquisition failed, so try to acquire it non-speculatively.
+    // Count the non-speculative acquire attempt
+    lck->lk.adaptive.acquire_attempts++;
+
+    // Use base, non-speculative lock.
+    if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
+    {
+        KMP_INC_STAT(lck,nonSpeculativeAcquires);
+        return 1;       // Lock is acquired (non-speculatively)
+    }
+    else
+    {
+        return 0;       // Failed to acquire the lock, it's already visibly locked.
+    }
+}
+
+static int
+__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_lock";
+    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+
+    int retval = __kmp_test_adaptive_lock( lck, gtid );
+
+    if ( retval ) {
+        lck->lk.qlk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+// Block until we can acquire a speculative, adaptive lock.
+// We check whether we should be trying to speculate.
+// If we should be, we check the real lock to see if it is free,
+// and, if not, pause without attempting to acquire it until it is.
+// Then we try the speculative acquire.
+// This means that although we suffer from lemmings a little (
+// because all we can't acquire the lock speculatively until
+// the queue of threads waiting has cleared), we don't get into a
+// state where we can never acquire the lock speculatively (because we
+// force the queue to clear by preventing new arrivals from entering the
+// queue).
+// This does mean that when we're trying to break lemmings, the lock
+// is no longer fair. However OpenMP makes no guarantee that its
+// locks are fair, so this isn't a real problem.
+static void
+__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
+{
+    if ( __kmp_should_speculate( lck, gtid ) )
+    {
+        if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
+        {
+            if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
+                return;
+            // We tried speculation and failed, so give up.
+        }
+        else
+        {
+            // We can't try speculation until the lock is free, so we
+            // pause here (without suspending on the queueing lock,
+            // to allow it to drain, then try again.
+            // All other threads will also see the same result for
+            // shouldSpeculate, so will be doing the same if they
+            // try to claim the lock from now on.
+            while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
+            {
+                KMP_INC_STAT(lck,lemmingYields);
+                __kmp_yield (TRUE);
+            }
+
+            if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
+                return;
+        }
+    }
+
+    // Speculative acquisition failed, so acquire it non-speculatively.
+    // Count the non-speculative acquire attempt
+    lck->lk.adaptive.acquire_attempts++;
+
+    __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
+    // We have acquired the base lock, so count that.
+    KMP_INC_STAT(lck,nonSpeculativeAcquires );
+}
+
+static void
+__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_lock";
+    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
+        KMP_FATAL( LockIsAlreadyOwned, func );
+    }
+
+    __kmp_acquire_adaptive_lock( lck, gtid );
+
+    lck->lk.qlk.owner_id = gtid + 1;
+}
+
+static int
+__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
+{
+    if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
+    {   // If the lock doesn't look claimed we must be speculating.
+        // (Or the user's code is buggy and they're releasing without locking;
+        // if we had XTEST we'd be able to check that case...)
+        _xend();        // Exit speculation
+        __kmp_update_badness_after_success( lck );
+    }
+    else
+    {   // Since the lock *is* visibly locked we're not speculating,
+        // so should use the underlying lock's release scheme.
+        __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
+    }
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    lck->lk.qlk.owner_id = 0;
+    __kmp_release_adaptive_lock( lck, gtid );
+    return KMP_LOCK_RELEASED;
+}
+
+static void
+__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
+{
+    __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
+    lck->lk.adaptive.badness = 0;
+    lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
+    lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
+    lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+    __kmp_zero_speculative_stats( &lck->lk.adaptive );
+#endif
+    KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
+}
+
+static void
+__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
+{
+    __kmp_init_adaptive_lock( lck );
+}
+
+static void
+__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
+{
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+    __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
+#endif
+    __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
+    // Nothing needed for the speculative part.
+}
+
+static void
+__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
+{
+    char const * const func = "omp_destroy_lock";
+    if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_adaptive_lock( lck );
+}
+
+
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+
+/* ------------------------------------------------------------------------ */
+/* DRDPA ticket locks                                                */
+/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
+
+static kmp_int32
+__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
+{
+    return TCR_4( lck->lk.owner_id ) - 1;
+}
+
+static inline bool
+__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
+{
+    return lck->lk.depth_locked != -1;
+}
+
+__forceinline static int
+__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
+    kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
+    volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
+      = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+      TCR_PTR(lck->lk.polls);                           // volatile load
+
+#ifdef USE_LOCK_PROFILE
+    if (TCR_8(polls[ticket & mask].poll) != ticket)
+        __kmp_printf("LOCK CONTENTION: %p\n", lck);
+    /* else __kmp_printf( "." );*/
+#endif /* USE_LOCK_PROFILE */
+
+    //
+    // Now spin-wait, but reload the polls pointer and mask, in case the
+    // polling area has been reconfigured.  Unless it is reconfigured, the
+    // reloads stay in L1 cache and are cheap.
+    //
+    // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
+    //
+    // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
+    // and poll to be re-read every spin iteration.
+    //
+    kmp_uint32 spins;
+
+    KMP_FSYNC_PREPARE(lck);
+    KMP_INIT_YIELD(spins);
+    while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
+        // If we are oversubscribed,
+        // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
+        // CPU Pause is in the macros for yield.
+        //
+        KMP_YIELD(TCR_4(__kmp_nth)
+          > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+        KMP_YIELD_SPIN(spins);
+
+        // Re-read the mask and the poll pointer from the lock structure.
+        //
+        // Make certain that "mask" is read before "polls" !!!
+        //
+        // If another thread picks reconfigures the polling area and updates
+        // their values, and we get the new value of mask and the old polls
+        // pointer, we could access memory beyond the end of the old polling
+        // area.
+        //
+        mask = TCR_8(lck->lk.mask);                     // volatile load
+        polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+          TCR_PTR(lck->lk.polls);                       // volatile load
+    }
+
+    //
+    // Critical section starts here
+    //
+    KMP_FSYNC_ACQUIRED(lck);
+    KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
+      ticket, lck));
+    lck->lk.now_serving = ticket;                       // non-volatile store
+
+    //
+    // Deallocate a garbage polling area if we know that we are the last
+    // thread that could possibly access it.
+    //
+    // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
+    // ticket.
+    //
+    if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
+        __kmp_free((void *)lck->lk.old_polls);
+        lck->lk.old_polls = NULL;
+        lck->lk.cleanup_ticket = 0;
+    }
+
+    //
+    // Check to see if we should reconfigure the polling area.
+    // If there is still a garbage polling area to be deallocated from a
+    // previous reconfiguration, let a later thread reconfigure it.
+    //
+    if (lck->lk.old_polls == NULL) {
+        bool reconfigure = false;
+        volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
+        kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
+
+        if (TCR_4(__kmp_nth)
+          > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
+            //
+            // We are in oversubscription mode.  Contract the polling area
+            // down to a single location, if that hasn't been done already.
+            //
+            if (num_polls > 1) {
+                reconfigure = true;
+                num_polls = TCR_4(lck->lk.num_polls);
+                mask = 0;
+                num_polls = 1;
+                polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+                  __kmp_allocate(num_polls * sizeof(*polls));
+                polls[0].poll = ticket;
+            }
+        }
+        else {
+            //
+            // We are in under/fully subscribed mode.  Check the number of
+            // threads waiting on the lock.  The size of the polling area
+            // should be at least the number of threads waiting.
+            //
+            kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
+            if (num_waiting > num_polls) {
+                kmp_uint32 old_num_polls = num_polls;
+                reconfigure = true;
+                do {
+                    mask = (mask << 1) | 1;
+                    num_polls *= 2;
+                } while (num_polls <= num_waiting);
+
+                //
+                // Allocate the new polling area, and copy the relevant portion
+                // of the old polling area to the new area.  __kmp_allocate()
+                // zeroes the memory it allocates, and most of the old area is
+                // just zero padding, so we only copy the release counters.
+                //
+                polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+                  __kmp_allocate(num_polls * sizeof(*polls));
+                kmp_uint32 i;
+                for (i = 0; i < old_num_polls; i++) {
+                    polls[i].poll = old_polls[i].poll;
+                }
+            }
+        }
+
+        if (reconfigure) {
+            //
+            // Now write the updated fields back to the lock structure.
+            //
+            // Make certain that "polls" is written before "mask" !!!
+            //
+            // If another thread picks up the new value of mask and the old
+            // polls pointer , it could access memory beyond the end of the
+            // old polling area.
+            //
+            // On x86, we need memory fences.
+            //
+            KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
+              ticket, lck, num_polls));
+
+            lck->lk.old_polls = old_polls;              // non-volatile store
+            lck->lk.polls = polls;                      // volatile store
+
+            KMP_MB();
+
+            lck->lk.num_polls = num_polls;              // non-volatile store
+            lck->lk.mask = mask;                        // volatile store
+
+            KMP_MB();
+
+            //
+            // Only after the new polling area and mask have been flushed
+            // to main memory can we update the cleanup ticket field.
+            //
+            // volatile load / non-volatile store
+            //
+            lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
+        }
+    }
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
+}
+
+static int
+__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
+        KMP_FATAL( LockIsAlreadyOwned, func );
+    }
+
+    __kmp_acquire_drdpa_lock( lck, gtid );
+
+    lck->lk.owner_id = gtid + 1;
+    return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+int
+__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    //
+    // First get a ticket, then read the polls pointer and the mask.
+    // The polls pointer must be read before the mask!!! (See above)
+    //
+    kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
+    volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
+      = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+      TCR_PTR(lck->lk.polls);                           // volatile load
+    kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
+    if (TCR_8(polls[ticket & mask].poll) == ticket) {
+        kmp_uint64 next_ticket = ticket + 1;
+        if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
+          ticket, next_ticket)) {
+            KMP_FSYNC_ACQUIRED(lck);
+            KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
+               ticket, lck));
+            lck->lk.now_serving = ticket;               // non-volatile store
+
+            //
+            // Since no threads are waiting, there is no possibility that
+            // we would want to reconfigure the polling area.  We might
+            // have the cleanup ticket value (which says that it is now
+            // safe to deallocate old_polls), but we'll let a later thread
+            // which calls __kmp_acquire_lock do that - this routine
+            // isn't supposed to block, and we would risk blocks if we
+            // called __kmp_free() to do the deallocation.
+            //
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+static int
+__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+
+    int retval = __kmp_test_drdpa_lock( lck, gtid );
+
+    if ( retval ) {
+        lck->lk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+int
+__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    //
+    // Read the ticket value from the lock data struct, then the polls
+    // pointer and the mask.  The polls pointer must be read before the
+    // mask!!! (See above)
+    //
+    kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
+    volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
+      = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+      TCR_PTR(lck->lk.polls);                           // volatile load
+    kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
+    KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
+       ticket - 1, lck));
+    KMP_FSYNC_RELEASING(lck);
+    KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
+      && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    lck->lk.owner_id = 0;
+    return __kmp_release_drdpa_lock( lck, gtid );
+}
+
+void
+__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
+{
+    lck->lk.location = NULL;
+    lck->lk.mask = 0;
+    lck->lk.num_polls = 1;
+    lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
+      __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
+    lck->lk.cleanup_ticket = 0;
+    lck->lk.old_polls = NULL;
+    lck->lk.next_ticket = 0;
+    lck->lk.now_serving = 0;
+    lck->lk.owner_id = 0;      // no thread owns the lock.
+    lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
+    lck->lk.initialized = lck;
+
+    KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
+}
+
+static void
+__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
+{
+    __kmp_init_drdpa_lock( lck );
+}
+
+void
+__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
+{
+    lck->lk.initialized = NULL;
+    lck->lk.location    = NULL;
+    if (lck->lk.polls != NULL) {
+        __kmp_free((void *)lck->lk.polls);
+        lck->lk.polls = NULL;
+    }
+    if (lck->lk.old_polls != NULL) {
+        __kmp_free((void *)lck->lk.old_polls);
+        lck->lk.old_polls = NULL;
+    }
+    lck->lk.mask = 0;
+    lck->lk.num_polls = 0;
+    lck->lk.cleanup_ticket = 0;
+    lck->lk.next_ticket = 0;
+    lck->lk.now_serving = 0;
+    lck->lk.owner_id = 0;
+    lck->lk.depth_locked = -1;
+}
+
+static void
+__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
+{
+    char const * const func = "omp_destroy_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockNestableUsedAsSimple, func );
+    }
+    if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_drdpa_lock( lck );
+}
+
+
+//
+// nested drdpa ticket locks
+//
+
+int
+__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
+        lck->lk.depth_locked += 1;
+        return KMP_LOCK_ACQUIRED_NEXT;
+    }
+    else {
+        __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
+        KMP_MB();
+        lck->lk.depth_locked = 1;
+        KMP_MB();
+        lck->lk.owner_id = gtid + 1;
+        return KMP_LOCK_ACQUIRED_FIRST;
+    }
+}
+
+static void
+__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_set_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    __kmp_acquire_nested_drdpa_lock( lck, gtid );
+}
+
+int
+__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    int retval;
+
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
+        retval = ++lck->lk.depth_locked;
+    }
+    else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
+        retval = 0;
+    }
+    else {
+        KMP_MB();
+        retval = lck->lk.depth_locked = 1;
+        KMP_MB();
+        lck->lk.owner_id = gtid + 1;
+    }
+    return retval;
+}
+
+static int
+__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_test_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    return __kmp_test_nested_drdpa_lock( lck, gtid );
+}
+
+int
+__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( gtid >= 0 );
+
+    KMP_MB();
+    if ( --(lck->lk.depth_locked) == 0 ) {
+        KMP_MB();
+        lck->lk.owner_id = 0;
+        __kmp_release_drdpa_lock( lck, gtid );
+        return KMP_LOCK_RELEASED;
+    }
+    return KMP_LOCK_STILL_HELD;
+}
+
+static int
+__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
+{
+    char const * const func = "omp_unset_nest_lock";
+    KMP_MB();  /* in case another processor initialized lock */
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
+        KMP_FATAL( LockUnsettingFree, func );
+    }
+    if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
+        KMP_FATAL( LockUnsettingSetByAnother, func );
+    }
+    return __kmp_release_nested_drdpa_lock( lck, gtid );
+}
+
+void
+__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
+{
+    __kmp_init_drdpa_lock( lck );
+    lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
+}
+
+static void
+__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
+{
+    __kmp_init_nested_drdpa_lock( lck );
+}
+
+void
+__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
+{
+    __kmp_destroy_drdpa_lock( lck );
+    lck->lk.depth_locked = 0;
+}
+
+static void
+__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
+{
+    char const * const func = "omp_destroy_nest_lock";
+    if ( lck->lk.initialized != lck ) {
+        KMP_FATAL( LockIsUninitialized, func );
+    }
+    if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
+        KMP_FATAL( LockSimpleUsedAsNestable, func );
+    }
+    if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
+        KMP_FATAL( LockStillOwned, func );
+    }
+    __kmp_destroy_nested_drdpa_lock( lck );
+}
+
+
+//
+// access functions to fields which don't exist for all lock kinds.
+//
+
+static int
+__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
+{
+    return lck == lck->lk.initialized;
+}
+
+static const ident_t *
+__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
+{
+    return lck->lk.location;
+}
+
+static void
+__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
+{
+    lck->lk.location = loc;
+}
+
+static kmp_lock_flags_t
+__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
+{
+    return lck->lk.flags;
+}
+
+static void
+__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
+{
+    lck->lk.flags = flags;
+}
+
+#if KMP_USE_DYNAMIC_LOCK
+
+// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
+static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
+{
+    TCW_4(*lck, KMP_GET_D_TAG(seq));
+    KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
+}
+
+#if KMP_USE_TSX
+
+// HLE lock functions - imported from the testbed runtime.
+#define HLE_ACQUIRE ".byte 0xf2;"
+#define HLE_RELEASE ".byte 0xf3;"
+
+static inline kmp_uint32
+swap4(kmp_uint32 volatile *p, kmp_uint32 v)
+{
+    __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
+                    : "+r"(v), "+m"(*p)
+                    :
+                    : "memory");
+    return v;
+}
+
+static void
+__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
+{
+    TCW_4(*lck, 0);
+}
+
+static void
+__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
+{
+    // Use gtid for KMP_LOCK_BUSY if necessary
+    if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
+        int delay = 1;
+        do {
+            while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
+                for (int i = delay; i != 0; --i)
+                    KMP_CPU_PAUSE();
+                delay = ((delay << 1) | 1) & 7;
+            }
+        } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
+    }
+}
+
+static void
+__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
+{
+    __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
+}
+
+static int
+__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
+{
+    __asm__ volatile(HLE_RELEASE "movl %1,%0"
+                    : "=m"(*lck)
+                    : "r"(KMP_LOCK_FREE(hle))
+                    : "memory");
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
+{
+    return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
+}
+
+static int
+__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
+{
+    return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
+}
+
+static int
+__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
+{
+    return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
+}
+
+static void
+__kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
+{
+    __kmp_init_queuing_lock(lck);
+}
+
+static void
+__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
+{
+    __kmp_destroy_queuing_lock(lck);
+}
+
+static void
+__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+    unsigned retries=3, status;
+    do {
+        status = _xbegin();
+        if (status == _XBEGIN_STARTED) {
+            if (__kmp_is_unlocked_queuing_lock(lck))
+                return;
+            _xabort(0xff);
+        }
+        if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
+            // Wait until lock becomes free
+            while (! __kmp_is_unlocked_queuing_lock(lck))
+                __kmp_yield(TRUE);
+        }
+        else if (!(status & _XABORT_RETRY))
+            break;
+    } while (retries--);
+
+    // Fall-back non-speculative lock (xchg)
+    __kmp_acquire_queuing_lock(lck, gtid);
+}
+
+static void
+__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+    __kmp_acquire_rtm_lock(lck, gtid);
+}
+
+static int
+__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+    if (__kmp_is_unlocked_queuing_lock(lck)) {
+        // Releasing from speculation
+        _xend();
+    }
+    else {
+        // Releasing from a real lock
+        __kmp_release_queuing_lock(lck, gtid);
+    }
+    return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+    return __kmp_release_rtm_lock(lck, gtid);
+}
+
+static int
+__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+    unsigned retries=3, status;
+    do {
+        status = _xbegin();
+        if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
+            return 1;
+        }
+        if (!(status & _XABORT_RETRY))
+            break;
+    } while (retries--);
+
+    return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
+}
+
+static int
+__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+    return __kmp_test_rtm_lock(lck, gtid);
+}
+
+#endif // KMP_USE_TSX
+
+// Entry functions for indirect locks (first element of direct lock jump tables).
+static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
+static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
+static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
+static int  __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
+static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
+static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
+static int  __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
+static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
+
+//
+// Jump tables for the indirect lock functions.
+// Only fill in the odd entries, that avoids the need to shift out the low bit.
+//
+
+// init functions
+#define expand(l, op) 0,__kmp_init_direct_lock,
+void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
+    = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
+#undef expand
+
+// destroy functions
+#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
+void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
+    = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
+#undef expand
+
+// set/acquire functions
+#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
+static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
+    = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
+#undef expand
+#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
+static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
+    = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
+#undef expand
+
+// unset/release and test functions
+#define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
+static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
+    = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
+static int  (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
+    = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
+#undef expand
+#define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
+static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
+    = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
+static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
+    = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
+#undef expand
+
+// Exposes only one set of jump tables (*lock or *lock_with_checks).
+void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
+int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
+int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
+
+//
+// Jump tables for the indirect lock functions.
+//
+#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
+void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
+void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
+#undef expand
+
+// set/acquire functions
+#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
+static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
+#undef expand
+#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
+static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
+#undef expand
+
+// unset/release and test functions
+#define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
+static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
+static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
+#undef expand
+#define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
+static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
+static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
+#undef expand
+
+// Exposes only one jump tables (*lock or *lock_with_checks).
+void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
+int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
+int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
+
+// Lock index table.
+kmp_indirect_lock_table_t __kmp_i_lock_table;
+
+// Size of indirect locks.
+static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
+
+// Jump tables for lock accessor/modifier.
+void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
+void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
+const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
+kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
+
+// Use different lock pools for different lock types.
+static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
+
+// User lock allocator for dynamically dispatched indirect locks.
+// Every entry of the indirect lock table holds the address and type of the allocated indrect lock
+// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
+// object is returned to the reusable pool of locks, unique to each lock type.
+kmp_indirect_lock_t *
+__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
+{
+    kmp_indirect_lock_t *lck;
+    kmp_lock_index_t idx;
+
+    __kmp_acquire_lock(&__kmp_global_lock, gtid);
+
+    if (__kmp_indirect_lock_pool[tag] != NULL) {
+        // Reuse the allocated and destroyed lock object
+        lck = __kmp_indirect_lock_pool[tag];
+        if (OMP_LOCK_T_SIZE < sizeof(void *))
+            idx = lck->lock->pool.index;
+        __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
+        KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
+    } else {
+        idx = __kmp_i_lock_table.next;
+        // Check capacity and double the size if it is full
+        if (idx == __kmp_i_lock_table.size) {
+            // Double up the space for block pointers
+            int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
+            kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
+            __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
+            KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
+            __kmp_free(old_table);
+            // Allocate new objects in the new blocks
+            for (int i = row; i < 2*row; ++i)
+                *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
+                                                  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
+            __kmp_i_lock_table.size = 2*idx;
+        }
+        __kmp_i_lock_table.next++;
+        lck = KMP_GET_I_LOCK(idx);
+        // Allocate a new base lock object
+        lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
+        KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
+    }
+
+    __kmp_release_lock(&__kmp_global_lock, gtid);
+
+    lck->type = tag;
+
+    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
+        *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
+    } else {
+        *((kmp_indirect_lock_t **)user_lock) = lck;
+    }
+
+    return lck;
+}
+
+// User lock lookup for dynamically dispatched locks.
+static __forceinline
+kmp_indirect_lock_t *
+__kmp_lookup_indirect_lock(void **user_lock, const char *func)
+{
+    if (__kmp_env_consistency_check) {
+        kmp_indirect_lock_t *lck = NULL;
+        if (user_lock == NULL) {
+            KMP_FATAL(LockIsUninitialized, func);
+        }
+        if (OMP_LOCK_T_SIZE < sizeof(void *)) {
+            kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
+            if (idx >= __kmp_i_lock_table.size) {
+                KMP_FATAL(LockIsUninitialized, func);
+            }
+            lck = KMP_GET_I_LOCK(idx);
+        } else {
+            lck = *((kmp_indirect_lock_t **)user_lock);
+        }
+        if (lck == NULL) {
+            KMP_FATAL(LockIsUninitialized, func);
+        }
+        return lck; 
+    } else {
+        if (OMP_LOCK_T_SIZE < sizeof(void *)) {
+            return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
+        } else {
+            return *((kmp_indirect_lock_t **)user_lock);
+        }
+    }
+}
+
+static void
+__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
+{
+#if KMP_USE_ADAPTIVE_LOCKS
+    if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
+        KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
+        seq = lockseq_queuing;
+    }
+#endif
+#if KMP_USE_TSX
+    if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
+        seq = lockseq_queuing;
+    }
+#endif
+    kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
+    kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
+    KMP_I_LOCK_FUNC(l, init)(l->lock);
+    KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
+}
+
+static void
+__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
+{
+    kmp_uint32 gtid = __kmp_entry_gtid();
+    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
+    KMP_I_LOCK_FUNC(l, destroy)(l->lock);
+    kmp_indirect_locktag_t tag = l->type;
+
+    __kmp_acquire_lock(&__kmp_global_lock, gtid);
+
+    // Use the base lock's space to keep the pool chain.
+    l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
+    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
+        l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
+    }
+    __kmp_indirect_lock_pool[tag] = l;
+
+    __kmp_release_lock(&__kmp_global_lock, gtid);
+}
+
+static void
+__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
+{
+    kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
+    KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
+}
+
+static int
+__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
+{
+    kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
+    return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
+}
+
+static int
+__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
+{
+    kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
+    return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
+}
+
+static void
+__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
+{
+    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
+    KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
+}
+
+static int
+__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
+{
+    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
+    return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
+}
+
+static int
+__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
+{
+    kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
+    return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
+}
+
+kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
+
+// This is used only in kmp_error.c when consistency checking is on.
+kmp_int32
+__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
+{
+    switch (seq) {
+        case lockseq_tas:
+        case lockseq_nested_tas:
+            return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
+#if KMP_HAS_FUTEX
+        case lockseq_futex:
+        case lockseq_nested_futex:
+            return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
+#endif
+        case lockseq_ticket:
+        case lockseq_nested_ticket:
+            return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
+        case lockseq_queuing:
+        case lockseq_nested_queuing:
+#if KMP_USE_ADAPTIVE_LOCKS
+        case lockseq_adaptive:
+            return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
+#endif
+        case lockseq_drdpa:
+        case lockseq_nested_drdpa:
+            return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
+        default:
+            return 0;
+    }
+}
+
+// Initializes data for dynamic user locks.
+void
+__kmp_init_dynamic_user_locks()
+{
+    // Initialize jump table for the lock functions
+    if (__kmp_env_consistency_check) {
+        __kmp_direct_set     = direct_set_check;
+        __kmp_direct_unset   = direct_unset_check;
+        __kmp_direct_test    = direct_test_check;
+        __kmp_indirect_set   = indirect_set_check;
+        __kmp_indirect_unset = indirect_unset_check;
+        __kmp_indirect_test  = indirect_test_check;
+    }
+    else {
+        __kmp_direct_set     = direct_set;
+        __kmp_direct_unset   = direct_unset;
+        __kmp_direct_test    = direct_test;
+        __kmp_indirect_set   = indirect_set;
+        __kmp_indirect_unset = indirect_unset;
+        __kmp_indirect_test  = indirect_test;
+    }
+
+    // Initialize lock index table
+    __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
+    __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
+    *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
+                                  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 
+    __kmp_i_lock_table.next = 0;
+
+    // Indirect lock size
+    __kmp_indirect_lock_size[locktag_ticket]         = sizeof(kmp_ticket_lock_t);
+    __kmp_indirect_lock_size[locktag_queuing]        = sizeof(kmp_queuing_lock_t);
+#if KMP_USE_ADAPTIVE_LOCKS
+    __kmp_indirect_lock_size[locktag_adaptive]       = sizeof(kmp_adaptive_lock_t);
+#endif
+    __kmp_indirect_lock_size[locktag_drdpa]          = sizeof(kmp_drdpa_lock_t);
+#if KMP_USE_TSX
+    __kmp_indirect_lock_size[locktag_rtm]            = sizeof(kmp_queuing_lock_t);
+#endif
+    __kmp_indirect_lock_size[locktag_nested_tas]     = sizeof(kmp_tas_lock_t);
+#if KMP_USE_FUTEX
+    __kmp_indirect_lock_size[locktag_nested_futex]   = sizeof(kmp_futex_lock_t);
+#endif
+    __kmp_indirect_lock_size[locktag_nested_ticket]  = sizeof(kmp_ticket_lock_t);
+    __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
+    __kmp_indirect_lock_size[locktag_nested_drdpa]   = sizeof(kmp_drdpa_lock_t);
+
+    // Initialize lock accessor/modifier
+#define fill_jumps(table, expand, sep) {            \
+    table[locktag##sep##ticket]  = expand(ticket);  \
+    table[locktag##sep##queuing] = expand(queuing); \
+    table[locktag##sep##drdpa]   = expand(drdpa);   \
+}
+
+#if KMP_USE_ADAPTIVE_LOCKS
+# define fill_table(table, expand) {           \
+    fill_jumps(table, expand, _);              \
+    table[locktag_adaptive] = expand(queuing); \
+    fill_jumps(table, expand, _nested_);       \
+}
+#else
+# define fill_table(table, expand) {           \
+    fill_jumps(table, expand, _);              \
+    fill_jumps(table, expand, _nested_);       \
+}
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
+    fill_table(__kmp_indirect_set_location, expand);
+#undef expand
+#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
+    fill_table(__kmp_indirect_set_flags, expand);
+#undef expand
+#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
+    fill_table(__kmp_indirect_get_location, expand);
+#undef expand
+#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
+    fill_table(__kmp_indirect_get_flags, expand);
+#undef expand
+
+    __kmp_init_user_locks = TRUE;
+}
+
+// Clean up the lock table.
+void
+__kmp_cleanup_indirect_user_locks()
+{
+    kmp_lock_index_t i;
+    int k;
+
+    // Clean up locks in the pools first (they were already destroyed before going into the pools).
+    for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
+        kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
+        while (l != NULL) {
+            kmp_indirect_lock_t *ll = l;
+            l = (kmp_indirect_lock_t *)l->lock->pool.next;
+            KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
+            __kmp_free(ll->lock);
+            ll->lock = NULL;
+        }
+    }
+    // Clean up the remaining undestroyed locks.
+    for (i = 0; i < __kmp_i_lock_table.next; i++) {
+        kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
+        if (l->lock != NULL) {
+            // Locks not destroyed explicitly need to be destroyed here.
+            KMP_I_LOCK_FUNC(l, destroy)(l->lock);
+            KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
+            __kmp_free(l->lock);
+        }
+    }
+    // Free the table
+    for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
+        __kmp_free(__kmp_i_lock_table.table[i]);
+    __kmp_free(__kmp_i_lock_table.table);
+
+    __kmp_init_user_locks = FALSE;
+}
+
+enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
+int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+/* ------------------------------------------------------------------------ */
+/* user locks
+ *
+ * They are implemented as a table of function pointers which are set to the
+ * lock functions of the appropriate kind, once that has been determined.
+ */
+
+enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
+
+size_t __kmp_base_user_lock_size = 0;
+size_t __kmp_user_lock_size = 0;
+
+kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
+int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
+
+int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
+int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
+void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
+void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
+void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
+int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
+
+int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
+int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
+void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
+void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
+
+int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
+const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
+void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
+kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
+void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
+
+void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
+{
+    switch ( user_lock_kind ) {
+        case lk_default:
+        default:
+        KMP_ASSERT( 0 );
+
+        case lk_tas: {
+            __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
+            __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
+
+            __kmp_get_user_lock_owner_ =
+              ( kmp_int32 ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_tas_lock_owner );
+
+            if ( __kmp_env_consistency_check ) {
+                KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
+                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
+            }
+            else {
+                KMP_BIND_USER_LOCK(tas);
+                KMP_BIND_NESTED_USER_LOCK(tas);
+            }
+
+            __kmp_destroy_user_lock_ =
+              ( void ( * )( kmp_user_lock_p ) )
+              ( &__kmp_destroy_tas_lock );
+
+             __kmp_is_user_lock_initialized_ =
+               ( int ( * )( kmp_user_lock_p ) ) NULL;
+
+             __kmp_get_user_lock_location_ =
+               ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
+
+             __kmp_set_user_lock_location_ =
+               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
+
+             __kmp_get_user_lock_flags_ =
+               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
+
+             __kmp_set_user_lock_flags_ =
+               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
+        }
+        break;
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+
+        case lk_futex: {
+            __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
+            __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
+
+            __kmp_get_user_lock_owner_ =
+              ( kmp_int32 ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_futex_lock_owner );
+
+            if ( __kmp_env_consistency_check ) {
+                KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
+                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
+            }
+            else {
+                KMP_BIND_USER_LOCK(futex);
+                KMP_BIND_NESTED_USER_LOCK(futex);
+            }
+
+            __kmp_destroy_user_lock_ =
+              ( void ( * )( kmp_user_lock_p ) )
+              ( &__kmp_destroy_futex_lock );
+
+             __kmp_is_user_lock_initialized_ =
+               ( int ( * )( kmp_user_lock_p ) ) NULL;
+
+             __kmp_get_user_lock_location_ =
+               ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
+
+             __kmp_set_user_lock_location_ =
+               ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
+
+             __kmp_get_user_lock_flags_ =
+               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
+
+             __kmp_set_user_lock_flags_ =
+               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
+        }
+        break;
+
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+
+        case lk_ticket: {
+            __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
+            __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
+
+            __kmp_get_user_lock_owner_ =
+              ( kmp_int32 ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_ticket_lock_owner );
+
+            if ( __kmp_env_consistency_check ) {
+                KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
+                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
+            }
+            else {
+                KMP_BIND_USER_LOCK(ticket);
+                KMP_BIND_NESTED_USER_LOCK(ticket);
+            }
+
+            __kmp_destroy_user_lock_ =
+              ( void ( * )( kmp_user_lock_p ) )
+              ( &__kmp_destroy_ticket_lock );
+
+             __kmp_is_user_lock_initialized_ =
+               ( int ( * )( kmp_user_lock_p ) )
+               ( &__kmp_is_ticket_lock_initialized );
+
+             __kmp_get_user_lock_location_ =
+               ( const ident_t * ( * )( kmp_user_lock_p ) )
+               ( &__kmp_get_ticket_lock_location );
+
+             __kmp_set_user_lock_location_ =
+               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
+               ( &__kmp_set_ticket_lock_location );
+
+             __kmp_get_user_lock_flags_ =
+               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
+               ( &__kmp_get_ticket_lock_flags );
+
+             __kmp_set_user_lock_flags_ =
+               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
+               ( &__kmp_set_ticket_lock_flags );
+        }
+        break;
+
+        case lk_queuing: {
+            __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
+            __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
+
+            __kmp_get_user_lock_owner_ =
+              ( kmp_int32 ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_queuing_lock_owner );
+
+            if ( __kmp_env_consistency_check ) {
+                KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
+                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
+            }
+            else {
+                KMP_BIND_USER_LOCK(queuing);
+                KMP_BIND_NESTED_USER_LOCK(queuing);
+            }
+
+            __kmp_destroy_user_lock_ =
+              ( void ( * )( kmp_user_lock_p ) )
+              ( &__kmp_destroy_queuing_lock );
+
+             __kmp_is_user_lock_initialized_ =
+               ( int ( * )( kmp_user_lock_p ) )
+               ( &__kmp_is_queuing_lock_initialized );
+
+             __kmp_get_user_lock_location_ =
+               ( const ident_t * ( * )( kmp_user_lock_p ) )
+               ( &__kmp_get_queuing_lock_location );
+
+             __kmp_set_user_lock_location_ =
+               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
+               ( &__kmp_set_queuing_lock_location );
+
+             __kmp_get_user_lock_flags_ =
+               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
+               ( &__kmp_get_queuing_lock_flags );
+
+             __kmp_set_user_lock_flags_ =
+               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
+               ( &__kmp_set_queuing_lock_flags );
+        }
+        break;
+
+#if KMP_USE_ADAPTIVE_LOCKS
+        case lk_adaptive: {
+            __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
+            __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
+
+            __kmp_get_user_lock_owner_ =
+              ( kmp_int32 ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_queuing_lock_owner );
+
+            if ( __kmp_env_consistency_check ) {
+                KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
+            }
+            else {
+                KMP_BIND_USER_LOCK(adaptive);
+            }
+
+            __kmp_destroy_user_lock_ =
+              ( void ( * )( kmp_user_lock_p ) )
+              ( &__kmp_destroy_adaptive_lock );
+
+            __kmp_is_user_lock_initialized_ =
+              ( int ( * )( kmp_user_lock_p ) )
+              ( &__kmp_is_queuing_lock_initialized );
+
+            __kmp_get_user_lock_location_ =
+              ( const ident_t * ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_queuing_lock_location );
+
+            __kmp_set_user_lock_location_ =
+              ( void ( * )( kmp_user_lock_p, const ident_t * ) )
+              ( &__kmp_set_queuing_lock_location );
+
+            __kmp_get_user_lock_flags_ =
+              ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_queuing_lock_flags );
+
+            __kmp_set_user_lock_flags_ =
+              ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
+              ( &__kmp_set_queuing_lock_flags );
+
+        }
+        break;
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+        case lk_drdpa: {
+            __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
+            __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
+
+            __kmp_get_user_lock_owner_ =
+              ( kmp_int32 ( * )( kmp_user_lock_p ) )
+              ( &__kmp_get_drdpa_lock_owner );
+
+            if ( __kmp_env_consistency_check ) {
+                KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
+                KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
+            }
+            else {
+                KMP_BIND_USER_LOCK(drdpa);
+                KMP_BIND_NESTED_USER_LOCK(drdpa);
+            }
+
+            __kmp_destroy_user_lock_ =
+              ( void ( * )( kmp_user_lock_p ) )
+              ( &__kmp_destroy_drdpa_lock );
+
+             __kmp_is_user_lock_initialized_ =
+               ( int ( * )( kmp_user_lock_p ) )
+               ( &__kmp_is_drdpa_lock_initialized );
+
+             __kmp_get_user_lock_location_ =
+               ( const ident_t * ( * )( kmp_user_lock_p ) )
+               ( &__kmp_get_drdpa_lock_location );
+
+             __kmp_set_user_lock_location_ =
+               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
+               ( &__kmp_set_drdpa_lock_location );
+
+             __kmp_get_user_lock_flags_ =
+               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
+               ( &__kmp_get_drdpa_lock_flags );
+
+             __kmp_set_user_lock_flags_ =
+               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
+               ( &__kmp_set_drdpa_lock_flags );
+        }
+        break;
+    }
+}
+
+
+// ----------------------------------------------------------------------------
+// User lock table & lock allocation
+
+kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
+kmp_user_lock_p __kmp_lock_pool = NULL;
+
+// Lock block-allocation support.
+kmp_block_of_locks* __kmp_lock_blocks = NULL;
+int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
+
+static kmp_lock_index_t
+__kmp_lock_table_insert( kmp_user_lock_p lck )
+{
+    // Assume that kmp_global_lock is held upon entry/exit.
+    kmp_lock_index_t index;
+    if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
+        kmp_lock_index_t size;
+        kmp_user_lock_p *table;
+        // Reallocate lock table.
+        if ( __kmp_user_lock_table.allocated == 0 ) {
+            size = 1024;
+        }
+        else {
+            size = __kmp_user_lock_table.allocated * 2;
+        }
+        table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
+        KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
+        table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
+            // We cannot free the previous table now, since it may be in use by other
+            // threads. So save the pointer to the previous table in in the first element of the
+            // new table. All the tables will be organized into a list, and could be freed when
+            // library shutting down.
+        __kmp_user_lock_table.table = table;
+        __kmp_user_lock_table.allocated = size;
+    }
+    KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
+    index = __kmp_user_lock_table.used;
+    __kmp_user_lock_table.table[ index ] = lck;
+    ++ __kmp_user_lock_table.used;
+    return index;
+}
+
+static kmp_user_lock_p
+__kmp_lock_block_allocate()
+{
+    // Assume that kmp_global_lock is held upon entry/exit.
+    static int last_index = 0;
+    if ( ( last_index >= __kmp_num_locks_in_block )
+      || ( __kmp_lock_blocks == NULL ) ) {
+        // Restart the index.
+        last_index = 0;
+        // Need to allocate a new block.
+        KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
+        size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
+        char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
+        // Set up the new block.
+        kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
+        new_block->next_block = __kmp_lock_blocks;
+        new_block->locks = (void *)buffer;
+        // Publish the new block.
+        KMP_MB();
+        __kmp_lock_blocks = new_block;
+    }
+    kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
+      [ last_index * __kmp_user_lock_size ] ) );
+    last_index++;
+    return ret;
+}
+
+//
+// Get memory for a lock. It may be freshly allocated memory or reused memory
+// from lock pool.
+//
+kmp_user_lock_p
+__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
+  kmp_lock_flags_t flags )
+{
+    kmp_user_lock_p lck;
+    kmp_lock_index_t index;
+    KMP_DEBUG_ASSERT( user_lock );
+
+    __kmp_acquire_lock( &__kmp_global_lock, gtid );
+
+    if ( __kmp_lock_pool == NULL ) {
+        // Lock pool is empty. Allocate new memory.
+        if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
+            lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
+        }
+        else {
+            lck = __kmp_lock_block_allocate();
+        }
+
+        // Insert lock in the table so that it can be freed in __kmp_cleanup,
+        // and debugger has info on all allocated locks.
+        index = __kmp_lock_table_insert( lck );
+    }
+    else {
+        // Pick up lock from pool.
+        lck = __kmp_lock_pool;
+        index = __kmp_lock_pool->pool.index;
+        __kmp_lock_pool = __kmp_lock_pool->pool.next;
+    }
+
+    //
+    // We could potentially differentiate between nested and regular locks
+    // here, and do the lock table lookup for regular locks only.
+    //
+    if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
+        * ( (kmp_lock_index_t *) user_lock ) = index;
+    }
+    else {
+        * ( (kmp_user_lock_p *) user_lock ) = lck;
+    }
+
+    // mark the lock if it is critical section lock.
+    __kmp_set_user_lock_flags( lck, flags );
+
+    __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
+
+    return lck;
+}
+
+// Put lock's memory to pool for reusing.
+void
+__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( user_lock != NULL );
+    KMP_DEBUG_ASSERT( lck != NULL );
+
+    __kmp_acquire_lock( & __kmp_global_lock, gtid );
+
+    lck->pool.next = __kmp_lock_pool;
+    __kmp_lock_pool = lck;
+    if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
+        kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
+        KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
+        lck->pool.index = index;
+    }
+
+    __kmp_release_lock( & __kmp_global_lock, gtid );
+}
+
+kmp_user_lock_p
+__kmp_lookup_user_lock( void **user_lock, char const *func )
+{
+    kmp_user_lock_p lck = NULL;
+
+    if ( __kmp_env_consistency_check ) {
+        if ( user_lock == NULL ) {
+            KMP_FATAL( LockIsUninitialized, func );
+        }
+    }
+
+    if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
+        kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
+        if ( __kmp_env_consistency_check ) {
+            if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
+                KMP_FATAL( LockIsUninitialized, func );
+            }
+        }
+        KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
+        KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
+        lck = __kmp_user_lock_table.table[index];
+    }
+    else {
+        lck = *( (kmp_user_lock_p *)user_lock );
+    }
+
+    if ( __kmp_env_consistency_check ) {
+        if ( lck == NULL ) {
+            KMP_FATAL( LockIsUninitialized, func );
+        }
+    }
+
+    return lck;
+}
+
+void
+__kmp_cleanup_user_locks( void )
+{
+    //
+    // Reset lock pool. Do not worry about lock in the pool -- we will free
+    // them when iterating through lock table (it includes all the locks,
+    // dead or alive).
+    //
+    __kmp_lock_pool = NULL;
+
+#define IS_CRITICAL(lck) \
+        ( ( __kmp_get_user_lock_flags_ != NULL ) && \
+        ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
+
+    //
+    // Loop through lock table, free all locks.
+    //
+    // Do not free item [0], it is reserved for lock tables list.
+    //
+    // FIXME - we are iterating through a list of (pointers to) objects of
+    // type union kmp_user_lock, but we have no way of knowing whether the
+    // base type is currently "pool" or whatever the global user lock type
+    // is.
+    //
+    // We are relying on the fact that for all of the user lock types
+    // (except "tas"), the first field in the lock struct is the "initialized"
+    // field, which is set to the address of the lock object itself when
+    // the lock is initialized.  When the union is of type "pool", the
+    // first field is a pointer to the next object in the free list, which
+    // will not be the same address as the object itself.
+    //
+    // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
+    // will fail for "pool" objects on the free list.  This must happen as
+    // the "location" field of real user locks overlaps the "index" field
+    // of "pool" objects.
+    //
+    // It would be better to run through the free list, and remove all "pool"
+    // objects from the lock table before executing this loop.  However,
+    // "pool" objects do not always have their index field set (only on
+    // lin_32e), and I don't want to search the lock table for the address
+    // of every "pool" object on the free list.
+    //
+    while ( __kmp_user_lock_table.used > 1 ) {
+        const ident *loc;
+
+        //
+        // reduce __kmp_user_lock_table.used before freeing the lock,
+        // so that state of locks is consistent
+        //
+        kmp_user_lock_p lck = __kmp_user_lock_table.table[
+          --__kmp_user_lock_table.used ];
+
+        if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
+          ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
+            //
+            // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
+            // initialized AND it is NOT a critical section (user is not
+            // responsible for destroying criticals) AND we know source
+            // location to report.
+            //
+            if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
+              ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
+              ( loc->psource != NULL ) ) {
+                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
+                KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
+                __kmp_str_loc_free( &str_loc);
+            }
+
+#ifdef KMP_DEBUG
+            if ( IS_CRITICAL( lck ) ) {
+                KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
+            }
+            else {
+                KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
+            }
+#endif // KMP_DEBUG
+
+            //
+            // Cleanup internal lock dynamic resources
+            // (for drdpa locks particularly).
+            //
+            __kmp_destroy_user_lock( lck );
+        }
+
+        //
+        // Free the lock if block allocation of locks is not used.
+        //
+        if ( __kmp_lock_blocks == NULL ) {
+            __kmp_free( lck );
+        }
+    }
+
+#undef IS_CRITICAL
+
+    //
+    // delete lock table(s).
+    //
+    kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
+    __kmp_user_lock_table.table = NULL;
+    __kmp_user_lock_table.allocated = 0;
+
+    while ( table_ptr != NULL ) {
+        //
+        // In the first element we saved the pointer to the previous
+        // (smaller) lock table.
+        //
+        kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
+        __kmp_free( table_ptr );
+        table_ptr = next;
+    }
+
+    //
+    // Free buffers allocated for blocks of locks.
+    //
+    kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
+    __kmp_lock_blocks = NULL;
+
+    while ( block_ptr != NULL ) {
+        kmp_block_of_locks_t *next = block_ptr->next_block;
+        __kmp_free( block_ptr->locks );
+        //
+        // *block_ptr itself was allocated at the end of the locks vector.
+        //
+	block_ptr = next;
+    }
+
+    TCW_4(__kmp_init_user_locks, FALSE);
+}
+
+#endif // KMP_USE_DYNAMIC_LOCK
diff --git a/contrib/libs/cxxsupp/openmp/kmp_lock.h b/contrib/libs/cxxsupp/openmp/kmp_lock.h
index d79db4ae96..8cd01d3981 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_lock.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_lock.h
@@ -1,1273 +1,1273 @@
-/* 
- * kmp_lock.h -- lock header file 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_LOCK_H 
-#define KMP_LOCK_H 
- 
-#include <limits.h>    // CHAR_BIT 
-#include <stddef.h>    // offsetof 
- 
-#include "kmp_os.h" 
-#include "kmp_debug.h" 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif // __cplusplus 
- 
-// ---------------------------------------------------------------------------- 
-// Have to copy these definitions from kmp.h because kmp.h cannot be included 
-// due to circular dependencies.  Will undef these at end of file. 
- 
-#define KMP_PAD(type, sz)     (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 
-#define KMP_GTID_DNE (-2) 
- 
-// Forward declaration of ident and ident_t 
- 
-struct ident; 
-typedef struct ident ident_t; 
- 
-// End of copied code. 
-// ---------------------------------------------------------------------------- 
- 
-// 
-// We need to know the size of the area we can assume that the compiler(s) 
-// allocated for obects of type omp_lock_t and omp_nest_lock_t.  The Intel 
-// compiler always allocates a pointer-sized area, as does visual studio. 
-// 
-// gcc however, only allocates 4 bytes for regular locks, even on 64-bit 
-// intel archs.  It allocates at least 8 bytes for nested lock (more on 
-// recent versions), but we are bounded by the pointer-sized chunks that 
-// the Intel compiler allocates. 
-// 
- 
-#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) 
-# define OMP_LOCK_T_SIZE        sizeof(int) 
-# define OMP_NEST_LOCK_T_SIZE   sizeof(void *) 
-#else 
-# define OMP_LOCK_T_SIZE        sizeof(void *) 
-# define OMP_NEST_LOCK_T_SIZE   sizeof(void *) 
-#endif 
- 
-// 
-// The Intel compiler allocates a 32-byte chunk for a critical section. 
-// Both gcc and visual studio only allocate enough space for a pointer. 
-// Sometimes we know that the space was allocated by the Intel compiler. 
-// 
-#define OMP_CRITICAL_SIZE       sizeof(void *) 
-#define INTEL_CRITICAL_SIZE     32 
- 
-// 
-// lock flags 
-// 
-typedef kmp_uint32 kmp_lock_flags_t; 
- 
-#define kmp_lf_critical_section 1 
- 
-// 
-// When a lock table is used, the indices are of kmp_lock_index_t 
-// 
-typedef kmp_uint32 kmp_lock_index_t; 
- 
-// 
-// When memory allocated for locks are on the lock pool (free list), 
-// it is treated as structs of this type. 
-// 
-struct kmp_lock_pool { 
-    union kmp_user_lock *next; 
-    kmp_lock_index_t index; 
-}; 
- 
-typedef struct kmp_lock_pool kmp_lock_pool_t; 
- 
- 
-extern void __kmp_validate_locks( void ); 
- 
- 
-// ---------------------------------------------------------------------------- 
-// 
-//  There are 5 lock implementations: 
-// 
-//       1. Test and set locks. 
-//       2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) 
-//       3. Ticket (Lamport bakery) locks. 
-//       4. Queuing locks (with separate spin fields). 
-//       5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks 
-// 
-//   and 3 lock purposes: 
-// 
-//       1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. 
-//          These do not require non-negative global thread ID's. 
-//       2. Internal RTL locks -- Used everywhere else in the RTL 
-//       3. User locks (includes critical sections) 
-// 
-// ---------------------------------------------------------------------------- 
- 
- 
-// ============================================================================ 
-// Lock implementations. 
-// ============================================================================ 
- 
- 
-// ---------------------------------------------------------------------------- 
-// Test and set locks. 
-// 
-// Non-nested test and set locks differ from the other lock kinds (except 
-// futex) in that we use the memory allocated by the compiler for the lock, 
-// rather than a pointer to it. 
-// 
-// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 
-// bytes, so we have to use a lock table for nested locks, and avoid accessing 
-// the depth_locked field for non-nested locks. 
-// 
-// Information normally available to the tools, such as lock location, 
-// lock usage (normal lock vs. critical section), etc. is not available with 
-// test and set locks. 
-// ---------------------------------------------------------------------------- 
- 
-struct kmp_base_tas_lock { 
-    volatile kmp_int32 poll;         // 0 => unlocked 
-                                     // locked: (gtid+1) of owning thread 
-    kmp_int32          depth_locked; // depth locked, for nested locks only 
-}; 
- 
-typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; 
- 
-union kmp_tas_lock { 
-    kmp_base_tas_lock_t lk; 
-    kmp_lock_pool_t pool;   // make certain struct is large enough 
-    double lk_align;        // use worst case alignment 
-                            // no cache line padding 
-}; 
- 
-typedef union kmp_tas_lock kmp_tas_lock_t; 
- 
-// 
-// Static initializer for test and set lock variables. Usage: 
-//    kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); 
-// 
-#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } 
- 
-extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); 
-extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); 
- 
-extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); 
-extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); 
- 
-#define KMP_LOCK_RELEASED       1 
-#define KMP_LOCK_STILL_HELD     0 
-#define KMP_LOCK_ACQUIRED_FIRST 1 
-#define KMP_LOCK_ACQUIRED_NEXT  0 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
-// ---------------------------------------------------------------------------- 
-// futex locks.  futex locks are only available on Linux* OS. 
-// 
-// Like non-nested test and set lock, non-nested futex locks use the memory 
-// allocated by the compiler for the lock, rather than a pointer to it. 
-// 
-// Information normally available to the tools, such as lock location, 
-// lock usage (normal lock vs. critical section), etc. is not available with 
-// test and set locks.  With non-nested futex locks, the lock owner is not 
-// even available. 
-// ---------------------------------------------------------------------------- 
- 
-struct kmp_base_futex_lock { 
-    volatile kmp_int32 poll;         // 0 => unlocked 
-                                     // 2*(gtid+1) of owning thread, 0 if unlocked 
-                                     // locked: (gtid+1) of owning thread 
-    kmp_int32          depth_locked; // depth locked, for nested locks only 
-}; 
- 
-typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; 
- 
-union kmp_futex_lock { 
-    kmp_base_futex_lock_t lk; 
-    kmp_lock_pool_t pool;   // make certain struct is large enough 
-    double lk_align;        // use worst case alignment 
-                            // no cache line padding 
-}; 
- 
-typedef union kmp_futex_lock kmp_futex_lock_t; 
- 
-// 
-// Static initializer for futex lock variables. Usage: 
-//    kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); 
-// 
-#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } 
- 
-extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); 
-extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); 
- 
-extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); 
-extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); 
- 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
- 
-// ---------------------------------------------------------------------------- 
-// Ticket locks. 
-// ---------------------------------------------------------------------------- 
- 
-struct kmp_base_ticket_lock { 
-    // `initialized' must be the first entry in the lock data structure! 
-    volatile union kmp_ticket_lock * initialized;  // points to the lock union if in initialized state 
-    ident_t const *     location;     // Source code location of omp_init_lock(). 
-    volatile kmp_uint32 next_ticket;  // ticket number to give to next thread which acquires 
-    volatile kmp_uint32 now_serving;  // ticket number for thread which holds the lock 
-    volatile kmp_int32  owner_id;     // (gtid+1) of owning thread, 0 if unlocked 
-    kmp_int32           depth_locked; // depth locked, for nested locks only 
-    kmp_lock_flags_t    flags;        // lock specifics, e.g. critical section lock 
-}; 
- 
-typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; 
- 
-union KMP_ALIGN_CACHE kmp_ticket_lock { 
-    kmp_base_ticket_lock_t lk;       // This field must be first to allow static initializing. 
-    kmp_lock_pool_t pool; 
-    double                 lk_align; // use worst case alignment 
-    char                   lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; 
-}; 
- 
-typedef union kmp_ticket_lock kmp_ticket_lock_t; 
- 
-// 
-// Static initializer for simple ticket lock variables. Usage: 
-//    kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); 
-// Note the macro argument. It is important to make var properly initialized. 
-// 
-#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } } 
- 
-extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); 
-extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); 
- 
-extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); 
-extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); 
- 
- 
-// ---------------------------------------------------------------------------- 
-// Queuing locks. 
-// ---------------------------------------------------------------------------- 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
- 
-struct kmp_adaptive_lock_info; 
- 
-typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; 
- 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
- 
-struct kmp_adaptive_lock_statistics { 
-    /* So we can get stats from locks that haven't been destroyed. */ 
-    kmp_adaptive_lock_info_t * next; 
-    kmp_adaptive_lock_info_t * prev; 
- 
-    /* Other statistics */ 
-    kmp_uint32 successfulSpeculations; 
-    kmp_uint32 hardFailedSpeculations; 
-    kmp_uint32 softFailedSpeculations; 
-    kmp_uint32 nonSpeculativeAcquires; 
-    kmp_uint32 nonSpeculativeAcquireAttempts; 
-    kmp_uint32 lemmingYields; 
-}; 
- 
-typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; 
- 
-extern void __kmp_print_speculative_stats(); 
-extern void __kmp_init_speculative_stats(); 
- 
-#endif // KMP_DEBUG_ADAPTIVE_LOCKS 
- 
-struct kmp_adaptive_lock_info 
-{ 
-    /* Values used for adaptivity. 
-     * Although these are accessed from multiple threads we don't access them atomically, 
-     * because if we miss updates it probably doesn't matter much. (It just affects our 
-     * decision about whether to try speculation on the lock). 
-     */ 
-    kmp_uint32 volatile badness; 
-    kmp_uint32 volatile acquire_attempts; 
-    /* Parameters of the lock. */ 
-    kmp_uint32 max_badness; 
-    kmp_uint32 max_soft_retries; 
- 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-    kmp_adaptive_lock_statistics_t volatile stats; 
-#endif 
-}; 
- 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
- 
-struct kmp_base_queuing_lock { 
- 
-    //  `initialized' must be the first entry in the lock data structure! 
-    volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. 
- 
-    ident_t const *     location;     // Source code location of omp_init_lock(). 
- 
-    KMP_ALIGN( 8 )                    // tail_id  must be 8-byte aligned! 
- 
-    volatile kmp_int32  tail_id;      // (gtid+1) of thread at tail of wait queue, 0 if empty 
-                                      // Must be no padding here since head/tail used in 8-byte CAS 
-    volatile kmp_int32  head_id;      // (gtid+1) of thread at head of wait queue, 0 if empty 
-                                      // Decl order assumes little endian 
-    // bakery-style lock 
-    volatile kmp_uint32 next_ticket;  // ticket number to give to next thread which acquires 
-    volatile kmp_uint32 now_serving;  // ticket number for thread which holds the lock 
-    volatile kmp_int32  owner_id;     // (gtid+1) of owning thread, 0 if unlocked 
-    kmp_int32           depth_locked; // depth locked, for nested locks only 
- 
-    kmp_lock_flags_t    flags;        // lock specifics, e.g. critical section lock 
-}; 
- 
-typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; 
- 
-KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); 
- 
-union KMP_ALIGN_CACHE kmp_queuing_lock { 
-    kmp_base_queuing_lock_t lk;       // This field must be first to allow static initializing. 
-    kmp_lock_pool_t pool; 
-    double                   lk_align; // use worst case alignment 
-    char                     lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; 
-}; 
- 
-typedef union kmp_queuing_lock kmp_queuing_lock_t; 
- 
-extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); 
-extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); 
- 
-extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); 
-extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
- 
-// ---------------------------------------------------------------------------- 
-// Adaptive locks. 
-// ---------------------------------------------------------------------------- 
-struct kmp_base_adaptive_lock { 
-    kmp_base_queuing_lock qlk; 
-    KMP_ALIGN(CACHE_LINE) 
-    kmp_adaptive_lock_info_t adaptive;     // Information for the speculative adaptive lock 
-}; 
- 
-typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; 
- 
-union KMP_ALIGN_CACHE kmp_adaptive_lock { 
-    kmp_base_adaptive_lock_t lk; 
-    kmp_lock_pool_t pool; 
-    double lk_align; 
-    char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; 
-}; 
-typedef union kmp_adaptive_lock kmp_adaptive_lock_t; 
- 
-# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) 
- 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
-// ---------------------------------------------------------------------------- 
-// DRDPA ticket locks. 
-// ---------------------------------------------------------------------------- 
- 
-struct kmp_base_drdpa_lock { 
-    // 
-    // All of the fields on the first cache line are only written when 
-    // initializing or reconfiguring the lock.  These are relatively rare 
-    // operations, so data from the first cache line will usually stay 
-    // resident in the cache of each thread trying to acquire the lock. 
-    // 
-    // initialized must be the first entry in the lock data structure! 
-    // 
-    KMP_ALIGN_CACHE 
- 
-    volatile union kmp_drdpa_lock * initialized;    // points to the lock union if in initialized state 
-    ident_t const *                 location;       // Source code location of omp_init_lock(). 
-    volatile struct kmp_lock_poll { 
-        kmp_uint64 poll; 
-    } * volatile polls; 
-    volatile kmp_uint64             mask;           // is 2**num_polls-1 for mod op 
-    kmp_uint64                      cleanup_ticket; // thread with cleanup ticket 
-    volatile struct kmp_lock_poll * old_polls;      // will deallocate old_polls 
-    kmp_uint32                      num_polls;      // must be power of 2 
- 
-    // 
-    // next_ticket it needs to exist in a separate cache line, as it is 
-    // invalidated every time a thread takes a new ticket. 
-    // 
-    KMP_ALIGN_CACHE 
- 
-    volatile kmp_uint64             next_ticket; 
- 
-    // 
-    // now_serving is used to store our ticket value while we hold the lock. 
-    // It has a slightly different meaning in the DRDPA ticket locks (where 
-    // it is written by the acquiring thread) than it does in the simple 
-    // ticket locks (where it is written by the releasing thread). 
-    // 
-    // Since now_serving is only read an written in the critical section, 
-    // it is non-volatile, but it needs to exist on a separate cache line, 
-    // as it is invalidated at every lock acquire. 
-    // 
-    // Likewise, the vars used for nested locks (owner_id and depth_locked) 
-    // are only written by the thread owning the lock, so they are put in 
-    // this cache line.  owner_id is read by other threads, so it must be 
-    // declared volatile. 
-    // 
-    KMP_ALIGN_CACHE 
- 
-    kmp_uint64                      now_serving;    // doesn't have to be volatile 
-    volatile kmp_uint32             owner_id;       // (gtid+1) of owning thread, 0 if unlocked 
-    kmp_int32                       depth_locked;   // depth locked 
-    kmp_lock_flags_t                flags;          // lock specifics, e.g. critical section lock 
-}; 
- 
-typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; 
- 
-union KMP_ALIGN_CACHE kmp_drdpa_lock { 
-    kmp_base_drdpa_lock_t lk;       // This field must be first to allow static initializing. */ 
-    kmp_lock_pool_t pool; 
-    double                lk_align; // use worst case alignment 
-    char                  lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; 
-}; 
- 
-typedef union kmp_drdpa_lock kmp_drdpa_lock_t; 
- 
-extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); 
-extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); 
- 
-extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 
-extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 
-extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); 
-extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); 
- 
- 
-// ============================================================================ 
-// Lock purposes. 
-// ============================================================================ 
- 
- 
-// ---------------------------------------------------------------------------- 
-// Bootstrap locks. 
-// ---------------------------------------------------------------------------- 
- 
-// Bootstrap locks -- very few locks used at library initialization time. 
-// Bootstrap locks are currently implemented as ticket locks. 
-// They could also be implemented as test and set lock, but cannot be 
-// implemented with other lock kinds as they require gtids which are not 
-// available at initialization time. 
- 
-typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; 
- 
-#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) 
- 
-static inline int 
-__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 
-{ 
-    return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); 
-} 
- 
-static inline int 
-__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 
-{ 
-    return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); 
-} 
- 
-static inline void 
-__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 
-{ 
-    __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); 
-} 
- 
-static inline void 
-__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 
-{ 
-    __kmp_init_ticket_lock( lck ); 
-} 
- 
-static inline void 
-__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 
-{ 
-    __kmp_destroy_ticket_lock( lck ); 
-} 
- 
- 
-// ---------------------------------------------------------------------------- 
-// Internal RTL locks. 
-// ---------------------------------------------------------------------------- 
- 
-// 
-// Internal RTL locks are also implemented as ticket locks, for now. 
-// 
-// FIXME - We should go through and figure out which lock kind works best for 
-// each internal lock, and use the type declaration and function calls for 
-// that explicit lock kind (and get rid of this section). 
-// 
- 
-typedef kmp_ticket_lock_t kmp_lock_t; 
- 
-static inline int 
-__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    return __kmp_acquire_ticket_lock( lck, gtid ); 
-} 
- 
-static inline int 
-__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    return __kmp_test_ticket_lock( lck, gtid ); 
-} 
- 
-static inline void 
-__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) 
-{ 
-    __kmp_release_ticket_lock( lck, gtid ); 
-} 
- 
-static inline void 
-__kmp_init_lock( kmp_lock_t *lck ) 
-{ 
-    __kmp_init_ticket_lock( lck ); 
-} 
- 
-static inline void 
-__kmp_destroy_lock( kmp_lock_t *lck ) 
-{ 
-    __kmp_destroy_ticket_lock( lck ); 
-} 
- 
- 
-// ---------------------------------------------------------------------------- 
-// User locks. 
-// ---------------------------------------------------------------------------- 
- 
-// 
-// Do not allocate objects of type union kmp_user_lock!!! 
-// This will waste space unless __kmp_user_lock_kind == lk_drdpa. 
-// Instead, check the value of __kmp_user_lock_kind and allocate objects of 
-// the type of the appropriate union member, and cast their addresses to 
-// kmp_user_lock_p. 
-// 
- 
-enum kmp_lock_kind { 
-    lk_default = 0, 
-    lk_tas, 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    lk_futex, 
-#endif 
-#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX 
-    lk_hle, 
-    lk_rtm, 
-#endif 
-    lk_ticket, 
-    lk_queuing, 
-    lk_drdpa, 
-#if KMP_USE_ADAPTIVE_LOCKS 
-    lk_adaptive 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
-}; 
- 
-typedef enum kmp_lock_kind kmp_lock_kind_t; 
- 
-extern kmp_lock_kind_t __kmp_user_lock_kind; 
- 
-union kmp_user_lock { 
-    kmp_tas_lock_t     tas; 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-    kmp_futex_lock_t   futex; 
-#endif 
-    kmp_ticket_lock_t  ticket; 
-    kmp_queuing_lock_t queuing; 
-    kmp_drdpa_lock_t   drdpa; 
-#if KMP_USE_ADAPTIVE_LOCKS 
-    kmp_adaptive_lock_t     adaptive; 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
-    kmp_lock_pool_t    pool; 
-}; 
- 
-typedef union kmp_user_lock *kmp_user_lock_p; 
- 
-#if ! KMP_USE_DYNAMIC_LOCK 
- 
-extern size_t __kmp_base_user_lock_size; 
-extern size_t __kmp_user_lock_size; 
- 
-extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); 
- 
-static inline kmp_int32 
-__kmp_get_user_lock_owner( kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); 
-    return ( *__kmp_get_user_lock_owner_ )( lck ); 
-} 
- 
-extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
-#define __kmp_acquire_user_lock_with_checks(lck,gtid)                                           \ 
-    if (__kmp_user_lock_kind == lk_tas) {                                                       \ 
-        if ( __kmp_env_consistency_check ) {                                                    \ 
-            char const * const func = "omp_set_lock";                                           \ 
-            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )                               \ 
-                && lck->tas.lk.depth_locked != -1 ) {                                           \ 
-                KMP_FATAL( LockNestableUsedAsSimple, func );                                    \ 
-            }                                                                                   \ 
-            if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) {                          \ 
-                KMP_FATAL( LockIsAlreadyOwned, func );                                          \ 
-            }                                                                                   \ 
-        }                                                                                       \ 
-        if ( ( lck->tas.lk.poll != 0 ) ||                                                       \ 
-          ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {            \ 
-            kmp_uint32 spins;                                                                   \ 
-            KMP_FSYNC_PREPARE( lck );                                                           \ 
-            KMP_INIT_YIELD( spins );                                                            \ 
-            if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) {     \ 
-                KMP_YIELD( TRUE );                                                              \ 
-            } else {                                                                            \ 
-                KMP_YIELD_SPIN( spins );                                                        \ 
-            }                                                                                   \ 
-            while ( ( lck->tas.lk.poll != 0 ) ||                                                \ 
-              ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  )  {       \ 
-                if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ 
-                    KMP_YIELD( TRUE );                                                          \ 
-                } else {                                                                        \ 
-                    KMP_YIELD_SPIN( spins );                                                    \ 
-                }                                                                               \ 
-            }                                                                                   \ 
-        }                                                                                       \ 
-        KMP_FSYNC_ACQUIRED( lck );                                                              \ 
-    } else {                                                                                    \ 
-        KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );                       \ 
-        ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );                                 \ 
-    } 
- 
-#else 
-static inline int 
-__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); 
-    return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); 
-} 
-#endif 
- 
-extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
- 
-#include "kmp_i18n.h"                       /* AC: KMP_FATAL definition */ 
-extern int __kmp_env_consistency_check;     /* AC: copy from kmp.h here */ 
-static inline int 
-__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    if ( __kmp_user_lock_kind == lk_tas ) { 
-        if ( __kmp_env_consistency_check ) { 
-            char const * const func = "omp_test_lock"; 
-            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 
-                && lck->tas.lk.depth_locked != -1 ) { 
-                KMP_FATAL( LockNestableUsedAsSimple, func ); 
-            } 
-        } 
-        return ( ( lck->tas.lk.poll == 0 ) && 
-          KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); 
-    } else { 
-        KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); 
-        return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); 
-    } 
-} 
-#else 
-static inline int 
-__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); 
-    return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); 
-} 
-#endif 
- 
-extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 
- 
-static inline void 
-__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); 
-    ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); 
-} 
- 
-extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); 
- 
-static inline void 
-__kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); 
-    ( *__kmp_init_user_lock_with_checks_ )( lck ); 
-} 
- 
-// 
-// We need a non-checking version of destroy lock for when the RTL is 
-// doing the cleanup as it can't always tell if the lock is nested or not. 
-// 
-extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); 
- 
-static inline void 
-__kmp_destroy_user_lock( kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); 
-    ( *__kmp_destroy_user_lock_ )( lck ); 
-} 
- 
-extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); 
- 
-static inline void 
-__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); 
-    ( *__kmp_destroy_user_lock_with_checks_ )( lck ); 
-} 
- 
-extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 
- 
-#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth)                                  \ 
-    if (__kmp_user_lock_kind == lk_tas) {                                                           \ 
-        if ( __kmp_env_consistency_check ) {                                                        \ 
-            char const * const func = "omp_set_nest_lock";                                          \ 
-            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )                              \ 
-                && lck->tas.lk.depth_locked == -1 ) {                                               \ 
-                KMP_FATAL( LockSimpleUsedAsNestable, func );                                        \ 
-            }                                                                                       \ 
-        }                                                                                           \ 
-        if ( lck->tas.lk.poll - 1 == gtid ) {                                                       \ 
-            lck->tas.lk.depth_locked += 1;                                                          \ 
-            *depth = KMP_LOCK_ACQUIRED_NEXT;                                                        \ 
-        } else {                                                                                    \ 
-            if ( ( lck->tas.lk.poll != 0 ) ||                                                       \ 
-              ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {            \ 
-                kmp_uint32 spins;                                                                   \ 
-                KMP_FSYNC_PREPARE( lck );                                                           \ 
-                KMP_INIT_YIELD( spins );                                                            \ 
-                if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) {     \ 
-                    KMP_YIELD( TRUE );                                                              \ 
-                } else {                                                                            \ 
-                    KMP_YIELD_SPIN( spins );                                                        \ 
-                }                                                                                   \ 
-                while ( ( lck->tas.lk.poll != 0 ) ||                                                \ 
-                  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {        \ 
-                    if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ 
-                        KMP_YIELD( TRUE );                                                          \ 
-                    } else {                                                                        \ 
-                        KMP_YIELD_SPIN( spins );                                                    \ 
-                    }                                                                               \ 
-                }                                                                                   \ 
-            }                                                                                       \ 
-            lck->tas.lk.depth_locked = 1;                                                           \ 
-            *depth = KMP_LOCK_ACQUIRED_FIRST;                                                       \ 
-        }                                                                                           \ 
-        KMP_FSYNC_ACQUIRED( lck );                                                                  \ 
-    } else {                                                                                        \ 
-        KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );                    \ 
-        *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );                     \ 
-    } 
- 
-#else 
-static inline void 
-__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); 
-    *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); 
-} 
-#endif 
- 
-extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 
-static inline int 
-__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    if ( __kmp_user_lock_kind == lk_tas ) { 
-        int retval; 
-        if ( __kmp_env_consistency_check ) { 
-            char const * const func = "omp_test_nest_lock"; 
-            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) 
-                && lck->tas.lk.depth_locked == -1 ) { 
-                KMP_FATAL( LockSimpleUsedAsNestable, func ); 
-            } 
-        } 
-        KMP_DEBUG_ASSERT( gtid >= 0 ); 
-        if ( lck->tas.lk.poll - 1 == gtid ) {   /* __kmp_get_tas_lock_owner( lck ) == gtid */ 
-            return ++lck->tas.lk.depth_locked;  /* same owner, depth increased */ 
-        } 
-        retval = ( ( lck->tas.lk.poll == 0 ) && 
-          KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); 
-        if ( retval ) { 
-            KMP_MB(); 
-            lck->tas.lk.depth_locked = 1; 
-        } 
-        return retval; 
-    } else { 
-        KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); 
-        return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); 
-    } 
-} 
-#else 
-static inline int 
-__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); 
-    return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); 
-} 
-#endif 
- 
-extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 
- 
-static inline int 
-__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); 
-    return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); 
-} 
- 
-extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); 
- 
-static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); 
-    ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); 
-} 
- 
-extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); 
- 
-static inline void 
-__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); 
-    ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); 
-} 
- 
-// 
-// user lock functions which do not necessarily exist for all lock kinds. 
-// 
-// The "set" functions usually have wrapper routines that check for a NULL set 
-// function pointer and call it if non-NULL. 
-// 
-// In some cases, it makes sense to have a "get" wrapper function check for a 
-// NULL get function pointer and return NULL / invalid value / error code if 
-// the function pointer is NULL. 
-// 
-// In other cases, the calling code really should differentiate between an 
-// unimplemented function and one that is implemented but returning NULL / 
-// invalied value.  If this is the case, no get function wrapper exists. 
-// 
- 
-extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); 
- 
-// no set function; fields set durining local allocation 
- 
-extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); 
- 
-static inline const ident_t * 
-__kmp_get_user_lock_location( kmp_user_lock_p lck ) 
-{ 
-    if ( __kmp_get_user_lock_location_  != NULL ) { 
-        return ( *__kmp_get_user_lock_location_ )( lck ); 
-    } 
-    else { 
-        return NULL; 
-    } 
-} 
- 
-extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); 
- 
-static inline void 
-__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) 
-{ 
-    if ( __kmp_set_user_lock_location_  != NULL ) { 
-        ( *__kmp_set_user_lock_location_ )( lck, loc ); 
-    } 
-} 
- 
-extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); 
- 
-extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); 
- 
-static inline void 
-__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) 
-{ 
-    if ( __kmp_set_user_lock_flags_  != NULL ) { 
-        ( *__kmp_set_user_lock_flags_ )( lck, flags ); 
-    } 
-} 
- 
-// 
-// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. 
-// 
-extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); 
- 
-// 
-// Macros for binding user lock functions. 
-// 
-#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) {                                       \ 
-    __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \ 
-                                                  __kmp_acquire##nest##kind##_##suffix;         \ 
-    __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \ 
-                                                  __kmp_release##nest##kind##_##suffix;         \ 
-    __kmp_test##nest##user_lock_with_checks_    = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \ 
-                                                  __kmp_test##nest##kind##_##suffix;            \ 
-    __kmp_init##nest##user_lock_with_checks_    = ( void (*)( kmp_user_lock_p ) )               \ 
-                                                  __kmp_init##nest##kind##_##suffix;            \ 
-    __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) )               \ 
-                                                  __kmp_destroy##nest##kind##_##suffix;         \ 
-} 
- 
-#define KMP_BIND_USER_LOCK(kind)                    KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) 
-#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)        KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) 
-#define KMP_BIND_NESTED_USER_LOCK(kind)             KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) 
-#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) 
- 
-// ---------------------------------------------------------------------------- 
-// User lock table & lock allocation 
-// ---------------------------------------------------------------------------- 
- 
-/* 
-    On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which 
-    is not enough to store a pointer, so we have to use lock indexes instead of pointers and 
-    maintain lock table to map indexes to pointers. 
- 
- 
-    Note: The first element of the table is not a pointer to lock! It is a pointer to previously 
-    allocated table (or NULL if it is the first table). 
- 
-    Usage: 
- 
-        if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE 
-            Lock table is fully utilized. User locks are indexes, so table is 
-            used on user lock operation. 
-            Note: it may be the case (lin_32) that we don't need to use a lock 
-            table for regular locks, but do need the table for nested locks. 
-        } 
-        else { 
-            Lock table initialized but not actually used. 
-        } 
-*/ 
- 
-struct kmp_lock_table { 
-    kmp_lock_index_t  used;      // Number of used elements 
-    kmp_lock_index_t  allocated; // Number of allocated elements 
-    kmp_user_lock_p * table;     // Lock table. 
-}; 
- 
-typedef struct kmp_lock_table kmp_lock_table_t; 
- 
-extern kmp_lock_table_t __kmp_user_lock_table; 
-extern kmp_user_lock_p __kmp_lock_pool; 
- 
-struct kmp_block_of_locks { 
-    struct kmp_block_of_locks * next_block; 
-    void *                      locks; 
-}; 
- 
-typedef struct kmp_block_of_locks kmp_block_of_locks_t; 
- 
-extern kmp_block_of_locks_t *__kmp_lock_blocks; 
-extern int __kmp_num_locks_in_block; 
- 
-extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); 
-extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); 
-extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); 
-extern void __kmp_cleanup_user_locks(); 
- 
-#define KMP_CHECK_USER_LOCK_INIT() \ 
-        {                                                               \ 
-            if ( ! TCR_4( __kmp_init_user_locks ) ) {                   \ 
-                __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );      \ 
-                if ( ! TCR_4( __kmp_init_user_locks ) ) {               \ 
-                    TCW_4( __kmp_init_user_locks, TRUE );               \ 
-                }                                                       \ 
-                __kmp_release_bootstrap_lock( &__kmp_initz_lock );      \ 
-            }                                                           \ 
-        } 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-#undef KMP_PAD 
-#undef KMP_GTID_DNE 
- 
-#if KMP_USE_DYNAMIC_LOCK 
- 
-// 
-// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current 
-// compatibility. Essential functionality of this new code is dynamic dispatch, but it also 
-// implements (or enables implementation of) hinted user lock and critical section which will be 
-// part of OMP 4.1 soon. 
-// 
-// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock 
-// function call on the created lock object requires type extraction and call through jump table 
-// using the extracted type. This type information is stored in two different ways depending on 
-// the size of the lock object, and we differentiate lock types by this size requirement - direct 
-// and indirect locks. 
-// 
-// Direct locks: 
-// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and 
-// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage 
-// for the lock type, and appropriate bit operation is required to access the data meaningful to 
-// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB 
-// of the lock object. The newly introduced "hle" lock is also a direct lock. 
-// 
-// Indirect locks: 
-// An indirect lock object requires more space than the compiler-generated space, and it should be 
-// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., 
-// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated 
-// indirect lock (void * fits in the object) or an index to the indirect lock table entry that 
-// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly 
-// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. 
-// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to 
-// differentiate the lock from a direct lock, and the remaining part is the actual index to the 
-// indirect lock table. 
-// 
- 
-#include <stdint.h> // for uintptr_t 
- 
-// Shortcuts 
-#define KMP_USE_FUTEX          (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) 
-#define KMP_USE_INLINED_TAS    (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 
-#define KMP_USE_INLINED_FUTEX  KMP_USE_FUTEX && 0 
- 
-// List of lock definitions; all nested locks are indirect locks. 
-// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. 
-// All nested locks are indirect lock types. 
-#if KMP_USE_TSX 
-# if KMP_USE_FUTEX 
-#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) m(futex, a) m(hle, a) 
-#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ 
-                                    m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)         \ 
-                                    m(nested_queuing, a) m(nested_drdpa, a) 
-# else 
-#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a)             m(hle, a) 
-#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ 
-                                    m(nested_tas, a)                    m(nested_ticket, a)         \ 
-                                    m(nested_queuing, a) m(nested_drdpa, a) 
-# endif // KMP_USE_FUTEX 
-# define KMP_LAST_D_LOCK lockseq_hle 
-#else 
-# if KMP_USE_FUTEX 
-#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) m(futex, a) 
-#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a)                m(drdpa, a)           \ 
-                                    m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)         \ 
-                                    m(nested_queuing, a) m(nested_drdpa, a) 
-#  define KMP_LAST_D_LOCK lockseq_futex 
-# else 
-#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) 
-#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a)                m(drdpa, a)           \ 
-                                    m(nested_tas, a)                    m(nested_ticket, a)         \ 
-                                    m(nested_queuing, a) m(nested_drdpa, a) 
-#  define KMP_LAST_D_LOCK lockseq_tas 
-# endif // KMP_USE_FUTEX 
-#endif // KMP_USE_TSX 
- 
-// Information used in dynamic dispatch 
-#define KMP_LOCK_SHIFT   8 // number of low bits to be used as tag for direct locks 
-#define KMP_FIRST_D_LOCK lockseq_tas 
-#define KMP_FIRST_I_LOCK lockseq_ticket 
-#define KMP_LAST_I_LOCK  lockseq_nested_drdpa 
-#define KMP_NUM_I_LOCKS  (locktag_nested_drdpa+1) // number of indirect lock types 
- 
-// Base type for dynamic locks. 
-typedef kmp_uint32 kmp_dyna_lock_t; 
- 
-// Lock sequence that enumerates all lock kinds. 
-// Always make this enumeration consistent with kmp_lockseq_t in the include directory. 
-typedef enum { 
-    lockseq_indirect = 0, 
-#define expand_seq(l,a) lockseq_##l, 
-    KMP_FOREACH_D_LOCK(expand_seq, 0) 
-    KMP_FOREACH_I_LOCK(expand_seq, 0) 
-#undef expand_seq 
-} kmp_dyna_lockseq_t; 
- 
-// Enumerates indirect lock tags. 
-typedef enum { 
-#define expand_tag(l,a) locktag_##l, 
-    KMP_FOREACH_I_LOCK(expand_tag, 0) 
-#undef expand_tag 
-} kmp_indirect_locktag_t; 
- 
-// Utility macros that extract information from lock sequences. 
-#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) 
-#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) 
-#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) 
-#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) 
- 
-// Enumerates direct lock tags starting from indirect tag. 
-typedef enum { 
-#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), 
-    KMP_FOREACH_D_LOCK(expand_tag, 0) 
-#undef expand_tag 
-} kmp_direct_locktag_t; 
- 
-// Indirect lock type 
-typedef struct { 
-    kmp_user_lock_p lock; 
-    kmp_indirect_locktag_t type; 
-} kmp_indirect_lock_t; 
- 
-// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. 
-extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); 
-extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); 
-extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); 
-extern int  (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); 
-extern int  (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); 
- 
-// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. 
-extern void (*__kmp_indirect_init[])(kmp_user_lock_p); 
-extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); 
-extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); 
-extern int  (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); 
-extern int  (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); 
- 
-// Extracts direct lock tag from a user lock pointer 
-#define KMP_EXTRACT_D_TAG(l)   (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1)) 
- 
-// Extracts indirect lock index from a user lock pointer 
-#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) 
- 
-// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). 
-#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] 
- 
-// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). 
-#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] 
- 
-// Initializes a direct lock with the given lock pointer and lock sequence. 
-#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) 
- 
-// Initializes an indirect lock with the given lock pointer and lock sequence. 
-#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) 
- 
-// Returns "free" lock value for the given lock type. 
-#define KMP_LOCK_FREE(type)      (locktag_##type) 
- 
-// Returns "busy" lock value for the given lock teyp. 
-#define KMP_LOCK_BUSY(v, type)   ((v)<<KMP_LOCK_SHIFT | locktag_##type) 
- 
-// Returns lock value after removing (shifting) lock tag. 
-#define KMP_LOCK_STRIP(v)        ((v)>>KMP_LOCK_SHIFT) 
- 
-// Initializes global states and data structures for managing dynamic user locks. 
-extern void __kmp_init_dynamic_user_locks(); 
- 
-// Allocates and returns an indirect lock with the given indirect lock tag. 
-extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); 
- 
-// Cleans up global states and data structures for managing dynamic user locks. 
-extern void __kmp_cleanup_indirect_user_locks(); 
- 
-// Default user lock sequence when not using hinted locks.  
-extern kmp_dyna_lockseq_t __kmp_user_lock_seq; 
- 
-// Jump table for "set lock location", available only for indirect locks. 
-extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); 
-#define KMP_SET_I_LOCK_LOCATION(lck, loc) {                         \ 
-    if (__kmp_indirect_set_location[(lck)->type] != NULL)           \ 
-        __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ 
-} 
- 
-// Jump table for "set lock flags", available only for indirect locks. 
-extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); 
-#define KMP_SET_I_LOCK_FLAGS(lck, flag) {                         \ 
-    if (__kmp_indirect_set_flags[(lck)->type] != NULL)            \ 
-        __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ 
-} 
- 
-// Jump table for "get lock location", available only for indirect locks. 
-extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); 
-#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL       \ 
-                                      ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ 
-                                      : NULL ) 
- 
-// Jump table for "get lock flags", available only for indirect locks. 
-extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); 
-#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL       \ 
-                                   ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ 
-                                   : NULL ) 
- 
-#define KMP_I_LOCK_CHUNK 1024       // number of kmp_indirect_lock_t objects to be allocated together 
- 
-// Lock table for indirect locks. 
-typedef struct kmp_indirect_lock_table { 
-    kmp_indirect_lock_t **table;    // blocks of indirect locks allocated 
-    kmp_lock_index_t size;          // size of the indirect lock table 
-    kmp_lock_index_t next;          // index to the next lock to be allocated 
-} kmp_indirect_lock_table_t; 
- 
-extern kmp_indirect_lock_table_t __kmp_i_lock_table; 
- 
-// Returns the indirect lock associated with the given index. 
-#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) 
- 
-// Number of locks in a lock block, which is fixed to "1" now. 
-// TODO: No lock block implementation now. If we do support, we need to manage lock block data 
-// structure for each indirect lock type. 
-extern int __kmp_num_locks_in_block; 
- 
-// Fast lock table lookup without consistency checking 
-#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *))       \ 
-                               ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ 
-                               : *((kmp_indirect_lock_t **)(l)) ) 
- 
-// Used once in kmp_error.c 
-extern kmp_int32 
-__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); 
- 
-#else // KMP_USE_DYNAMIC_LOCK 
- 
-# define KMP_LOCK_BUSY(v, type)    (v) 
-# define KMP_LOCK_FREE(type)       0 
-# define KMP_LOCK_STRIP(v)         (v) 
- 
-#endif // KMP_USE_DYNAMIC_LOCK 
- 
-#ifdef __cplusplus 
-} // extern "C" 
-#endif // __cplusplus 
- 
-#endif /* KMP_LOCK_H */ 
- 
+/*
+ * kmp_lock.h -- lock header file
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_LOCK_H
+#define KMP_LOCK_H
+
+#include <limits.h>    // CHAR_BIT
+#include <stddef.h>    // offsetof
+
+#include "kmp_os.h"
+#include "kmp_debug.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// ----------------------------------------------------------------------------
+// Have to copy these definitions from kmp.h because kmp.h cannot be included
+// due to circular dependencies.  Will undef these at end of file.
+
+#define KMP_PAD(type, sz)     (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
+#define KMP_GTID_DNE (-2)
+
+// Forward declaration of ident and ident_t
+
+struct ident;
+typedef struct ident ident_t;
+
+// End of copied code.
+// ----------------------------------------------------------------------------
+
+//
+// We need to know the size of the area we can assume that the compiler(s)
+// allocated for obects of type omp_lock_t and omp_nest_lock_t.  The Intel
+// compiler always allocates a pointer-sized area, as does visual studio.
+//
+// gcc however, only allocates 4 bytes for regular locks, even on 64-bit
+// intel archs.  It allocates at least 8 bytes for nested lock (more on
+// recent versions), but we are bounded by the pointer-sized chunks that
+// the Intel compiler allocates.
+//
+
+#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT)
+# define OMP_LOCK_T_SIZE        sizeof(int)
+# define OMP_NEST_LOCK_T_SIZE   sizeof(void *)
+#else
+# define OMP_LOCK_T_SIZE        sizeof(void *)
+# define OMP_NEST_LOCK_T_SIZE   sizeof(void *)
+#endif
+
+//
+// The Intel compiler allocates a 32-byte chunk for a critical section.
+// Both gcc and visual studio only allocate enough space for a pointer.
+// Sometimes we know that the space was allocated by the Intel compiler.
+//
+#define OMP_CRITICAL_SIZE       sizeof(void *)
+#define INTEL_CRITICAL_SIZE     32
+
+//
+// lock flags
+//
+typedef kmp_uint32 kmp_lock_flags_t;
+
+#define kmp_lf_critical_section 1
+
+//
+// When a lock table is used, the indices are of kmp_lock_index_t
+//
+typedef kmp_uint32 kmp_lock_index_t;
+
+//
+// When memory allocated for locks are on the lock pool (free list),
+// it is treated as structs of this type.
+//
+struct kmp_lock_pool {
+    union kmp_user_lock *next;
+    kmp_lock_index_t index;
+};
+
+typedef struct kmp_lock_pool kmp_lock_pool_t;
+
+
+extern void __kmp_validate_locks( void );
+
+
+// ----------------------------------------------------------------------------
+//
+//  There are 5 lock implementations:
+//
+//       1. Test and set locks.
+//       2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture)
+//       3. Ticket (Lamport bakery) locks.
+//       4. Queuing locks (with separate spin fields).
+//       5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
+//
+//   and 3 lock purposes:
+//
+//       1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time.
+//          These do not require non-negative global thread ID's.
+//       2. Internal RTL locks -- Used everywhere else in the RTL
+//       3. User locks (includes critical sections)
+//
+// ----------------------------------------------------------------------------
+
+
+// ============================================================================
+// Lock implementations.
+// ============================================================================
+
+
+// ----------------------------------------------------------------------------
+// Test and set locks.
+//
+// Non-nested test and set locks differ from the other lock kinds (except
+// futex) in that we use the memory allocated by the compiler for the lock,
+// rather than a pointer to it.
+//
+// On lin32, lin_32e, and win_32, the space allocated may be as small as 4
+// bytes, so we have to use a lock table for nested locks, and avoid accessing
+// the depth_locked field for non-nested locks.
+//
+// Information normally available to the tools, such as lock location,
+// lock usage (normal lock vs. critical section), etc. is not available with
+// test and set locks.
+// ----------------------------------------------------------------------------
+
+struct kmp_base_tas_lock {
+    volatile kmp_int32 poll;         // 0 => unlocked
+                                     // locked: (gtid+1) of owning thread
+    kmp_int32          depth_locked; // depth locked, for nested locks only
+};
+
+typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
+
+union kmp_tas_lock {
+    kmp_base_tas_lock_t lk;
+    kmp_lock_pool_t pool;   // make certain struct is large enough
+    double lk_align;        // use worst case alignment
+                            // no cache line padding
+};
+
+typedef union kmp_tas_lock kmp_tas_lock_t;
+
+//
+// Static initializer for test and set lock variables. Usage:
+//    kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
+//
+#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } }
+
+extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck );
+extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck );
+
+extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck );
+extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );
+
+#define KMP_LOCK_RELEASED       1
+#define KMP_LOCK_STILL_HELD     0
+#define KMP_LOCK_ACQUIRED_FIRST 1
+#define KMP_LOCK_ACQUIRED_NEXT  0
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+// ----------------------------------------------------------------------------
+// futex locks.  futex locks are only available on Linux* OS.
+//
+// Like non-nested test and set lock, non-nested futex locks use the memory
+// allocated by the compiler for the lock, rather than a pointer to it.
+//
+// Information normally available to the tools, such as lock location,
+// lock usage (normal lock vs. critical section), etc. is not available with
+// test and set locks.  With non-nested futex locks, the lock owner is not
+// even available.
+// ----------------------------------------------------------------------------
+
+struct kmp_base_futex_lock {
+    volatile kmp_int32 poll;         // 0 => unlocked
+                                     // 2*(gtid+1) of owning thread, 0 if unlocked
+                                     // locked: (gtid+1) of owning thread
+    kmp_int32          depth_locked; // depth locked, for nested locks only
+};
+
+typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
+
+union kmp_futex_lock {
+    kmp_base_futex_lock_t lk;
+    kmp_lock_pool_t pool;   // make certain struct is large enough
+    double lk_align;        // use worst case alignment
+                            // no cache line padding
+};
+
+typedef union kmp_futex_lock kmp_futex_lock_t;
+
+//
+// Static initializer for futex lock variables. Usage:
+//    kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
+//
+#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } }
+
+extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck );
+extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck );
+
+extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
+extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );
+
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+
+// ----------------------------------------------------------------------------
+// Ticket locks.
+// ----------------------------------------------------------------------------
+
+struct kmp_base_ticket_lock {
+    // `initialized' must be the first entry in the lock data structure!
+    volatile union kmp_ticket_lock * initialized;  // points to the lock union if in initialized state
+    ident_t const *     location;     // Source code location of omp_init_lock().
+    volatile kmp_uint32 next_ticket;  // ticket number to give to next thread which acquires
+    volatile kmp_uint32 now_serving;  // ticket number for thread which holds the lock
+    volatile kmp_int32  owner_id;     // (gtid+1) of owning thread, 0 if unlocked
+    kmp_int32           depth_locked; // depth locked, for nested locks only
+    kmp_lock_flags_t    flags;        // lock specifics, e.g. critical section lock
+};
+
+typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
+
+union KMP_ALIGN_CACHE kmp_ticket_lock {
+    kmp_base_ticket_lock_t lk;       // This field must be first to allow static initializing.
+    kmp_lock_pool_t pool;
+    double                 lk_align; // use worst case alignment
+    char                   lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ];
+};
+
+typedef union kmp_ticket_lock kmp_ticket_lock_t;
+
+//
+// Static initializer for simple ticket lock variables. Usage:
+//    kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
+// Note the macro argument. It is important to make var properly initialized.
+//
+#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } }
+
+extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck );
+extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck );
+
+extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck );
+extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck );
+
+
+// ----------------------------------------------------------------------------
+// Queuing locks.
+// ----------------------------------------------------------------------------
+
+#if KMP_USE_ADAPTIVE_LOCKS
+
+struct kmp_adaptive_lock_info;
+
+typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
+
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+
+struct kmp_adaptive_lock_statistics {
+    /* So we can get stats from locks that haven't been destroyed. */
+    kmp_adaptive_lock_info_t * next;
+    kmp_adaptive_lock_info_t * prev;
+
+    /* Other statistics */
+    kmp_uint32 successfulSpeculations;
+    kmp_uint32 hardFailedSpeculations;
+    kmp_uint32 softFailedSpeculations;
+    kmp_uint32 nonSpeculativeAcquires;
+    kmp_uint32 nonSpeculativeAcquireAttempts;
+    kmp_uint32 lemmingYields;
+};
+
+typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
+
+extern void __kmp_print_speculative_stats();
+extern void __kmp_init_speculative_stats();
+
+#endif // KMP_DEBUG_ADAPTIVE_LOCKS
+
+struct kmp_adaptive_lock_info
+{
+    /* Values used for adaptivity.
+     * Although these are accessed from multiple threads we don't access them atomically,
+     * because if we miss updates it probably doesn't matter much. (It just affects our
+     * decision about whether to try speculation on the lock).
+     */
+    kmp_uint32 volatile badness;
+    kmp_uint32 volatile acquire_attempts;
+    /* Parameters of the lock. */
+    kmp_uint32 max_badness;
+    kmp_uint32 max_soft_retries;
+
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+    kmp_adaptive_lock_statistics_t volatile stats;
+#endif
+};
+
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+
+struct kmp_base_queuing_lock {
+
+    //  `initialized' must be the first entry in the lock data structure!
+    volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state.
+
+    ident_t const *     location;     // Source code location of omp_init_lock().
+
+    KMP_ALIGN( 8 )                    // tail_id  must be 8-byte aligned!
+
+    volatile kmp_int32  tail_id;      // (gtid+1) of thread at tail of wait queue, 0 if empty
+                                      // Must be no padding here since head/tail used in 8-byte CAS
+    volatile kmp_int32  head_id;      // (gtid+1) of thread at head of wait queue, 0 if empty
+                                      // Decl order assumes little endian
+    // bakery-style lock
+    volatile kmp_uint32 next_ticket;  // ticket number to give to next thread which acquires
+    volatile kmp_uint32 now_serving;  // ticket number for thread which holds the lock
+    volatile kmp_int32  owner_id;     // (gtid+1) of owning thread, 0 if unlocked
+    kmp_int32           depth_locked; // depth locked, for nested locks only
+
+    kmp_lock_flags_t    flags;        // lock specifics, e.g. critical section lock
+};
+
+typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
+
+KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 );
+
+union KMP_ALIGN_CACHE kmp_queuing_lock {
+    kmp_base_queuing_lock_t lk;       // This field must be first to allow static initializing.
+    kmp_lock_pool_t pool;
+    double                   lk_align; // use worst case alignment
+    char                     lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ];
+};
+
+typedef union kmp_queuing_lock kmp_queuing_lock_t;
+
+extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck );
+extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck );
+
+extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck );
+extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck );
+
+#if KMP_USE_ADAPTIVE_LOCKS
+
+// ----------------------------------------------------------------------------
+// Adaptive locks.
+// ----------------------------------------------------------------------------
+struct kmp_base_adaptive_lock {
+    kmp_base_queuing_lock qlk;
+    KMP_ALIGN(CACHE_LINE)
+    kmp_adaptive_lock_info_t adaptive;     // Information for the speculative adaptive lock
+};
+
+typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
+
+union KMP_ALIGN_CACHE kmp_adaptive_lock {
+    kmp_base_adaptive_lock_t lk;
+    kmp_lock_pool_t pool;
+    double lk_align;
+    char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ];
+};
+typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
+
+# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk)
+
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+// ----------------------------------------------------------------------------
+// DRDPA ticket locks.
+// ----------------------------------------------------------------------------
+
+struct kmp_base_drdpa_lock {
+    //
+    // All of the fields on the first cache line are only written when
+    // initializing or reconfiguring the lock.  These are relatively rare
+    // operations, so data from the first cache line will usually stay
+    // resident in the cache of each thread trying to acquire the lock.
+    //
+    // initialized must be the first entry in the lock data structure!
+    //
+    KMP_ALIGN_CACHE
+
+    volatile union kmp_drdpa_lock * initialized;    // points to the lock union if in initialized state
+    ident_t const *                 location;       // Source code location of omp_init_lock().
+    volatile struct kmp_lock_poll {
+        kmp_uint64 poll;
+    } * volatile polls;
+    volatile kmp_uint64             mask;           // is 2**num_polls-1 for mod op
+    kmp_uint64                      cleanup_ticket; // thread with cleanup ticket
+    volatile struct kmp_lock_poll * old_polls;      // will deallocate old_polls
+    kmp_uint32                      num_polls;      // must be power of 2
+
+    //
+    // next_ticket it needs to exist in a separate cache line, as it is
+    // invalidated every time a thread takes a new ticket.
+    //
+    KMP_ALIGN_CACHE
+
+    volatile kmp_uint64             next_ticket;
+
+    //
+    // now_serving is used to store our ticket value while we hold the lock.
+    // It has a slightly different meaning in the DRDPA ticket locks (where
+    // it is written by the acquiring thread) than it does in the simple
+    // ticket locks (where it is written by the releasing thread).
+    //
+    // Since now_serving is only read an written in the critical section,
+    // it is non-volatile, but it needs to exist on a separate cache line,
+    // as it is invalidated at every lock acquire.
+    //
+    // Likewise, the vars used for nested locks (owner_id and depth_locked)
+    // are only written by the thread owning the lock, so they are put in
+    // this cache line.  owner_id is read by other threads, so it must be
+    // declared volatile.
+    //
+    KMP_ALIGN_CACHE
+
+    kmp_uint64                      now_serving;    // doesn't have to be volatile
+    volatile kmp_uint32             owner_id;       // (gtid+1) of owning thread, 0 if unlocked
+    kmp_int32                       depth_locked;   // depth locked
+    kmp_lock_flags_t                flags;          // lock specifics, e.g. critical section lock
+};
+
+typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
+
+union KMP_ALIGN_CACHE kmp_drdpa_lock {
+    kmp_base_drdpa_lock_t lk;       // This field must be first to allow static initializing. */
+    kmp_lock_pool_t pool;
+    double                lk_align; // use worst case alignment
+    char                  lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ];
+};
+
+typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
+
+extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck );
+extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck );
+
+extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
+extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
+extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
+extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
+
+
+// ============================================================================
+// Lock purposes.
+// ============================================================================
+
+
+// ----------------------------------------------------------------------------
+// Bootstrap locks.
+// ----------------------------------------------------------------------------
+
+// Bootstrap locks -- very few locks used at library initialization time.
+// Bootstrap locks are currently implemented as ticket locks.
+// They could also be implemented as test and set lock, but cannot be
+// implemented with other lock kinds as they require gtids which are not
+// available at initialization time.
+
+typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
+
+#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) )
+
+static inline int
+__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck )
+{
+    return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE );
+}
+
+static inline int
+__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck )
+{
+    return __kmp_test_ticket_lock( lck, KMP_GTID_DNE );
+}
+
+static inline void
+__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck )
+{
+    __kmp_release_ticket_lock( lck, KMP_GTID_DNE );
+}
+
+static inline void
+__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck )
+{
+    __kmp_init_ticket_lock( lck );
+}
+
+static inline void
+__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
+{
+    __kmp_destroy_ticket_lock( lck );
+}
+
+
+// ----------------------------------------------------------------------------
+// Internal RTL locks.
+// ----------------------------------------------------------------------------
+
+//
+// Internal RTL locks are also implemented as ticket locks, for now.
+//
+// FIXME - We should go through and figure out which lock kind works best for
+// each internal lock, and use the type declaration and function calls for
+// that explicit lock kind (and get rid of this section).
+//
+
+typedef kmp_ticket_lock_t kmp_lock_t;
+
+static inline int
+__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid )
+{
+    return __kmp_acquire_ticket_lock( lck, gtid );
+}
+
+static inline int
+__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid )
+{
+    return __kmp_test_ticket_lock( lck, gtid );
+}
+
+static inline void
+__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid )
+{
+    __kmp_release_ticket_lock( lck, gtid );
+}
+
+static inline void
+__kmp_init_lock( kmp_lock_t *lck )
+{
+    __kmp_init_ticket_lock( lck );
+}
+
+static inline void
+__kmp_destroy_lock( kmp_lock_t *lck )
+{
+    __kmp_destroy_ticket_lock( lck );
+}
+
+
+// ----------------------------------------------------------------------------
+// User locks.
+// ----------------------------------------------------------------------------
+
+//
+// Do not allocate objects of type union kmp_user_lock!!!
+// This will waste space unless __kmp_user_lock_kind == lk_drdpa.
+// Instead, check the value of __kmp_user_lock_kind and allocate objects of
+// the type of the appropriate union member, and cast their addresses to
+// kmp_user_lock_p.
+//
+
+enum kmp_lock_kind {
+    lk_default = 0,
+    lk_tas,
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    lk_futex,
+#endif
+#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
+    lk_hle,
+    lk_rtm,
+#endif
+    lk_ticket,
+    lk_queuing,
+    lk_drdpa,
+#if KMP_USE_ADAPTIVE_LOCKS
+    lk_adaptive
+#endif // KMP_USE_ADAPTIVE_LOCKS
+};
+
+typedef enum kmp_lock_kind kmp_lock_kind_t;
+
+extern kmp_lock_kind_t __kmp_user_lock_kind;
+
+union kmp_user_lock {
+    kmp_tas_lock_t     tas;
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+    kmp_futex_lock_t   futex;
+#endif
+    kmp_ticket_lock_t  ticket;
+    kmp_queuing_lock_t queuing;
+    kmp_drdpa_lock_t   drdpa;
+#if KMP_USE_ADAPTIVE_LOCKS
+    kmp_adaptive_lock_t     adaptive;
+#endif // KMP_USE_ADAPTIVE_LOCKS
+    kmp_lock_pool_t    pool;
+};
+
+typedef union kmp_user_lock *kmp_user_lock_p;
+
+#if ! KMP_USE_DYNAMIC_LOCK
+
+extern size_t __kmp_base_user_lock_size;
+extern size_t __kmp_user_lock_size;
+
+extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck );
+
+static inline kmp_int32
+__kmp_get_user_lock_owner( kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL );
+    return ( *__kmp_get_user_lock_owner_ )( lck );
+}
+
+extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+#define __kmp_acquire_user_lock_with_checks(lck,gtid)                                           \
+    if (__kmp_user_lock_kind == lk_tas) {                                                       \
+        if ( __kmp_env_consistency_check ) {                                                    \
+            char const * const func = "omp_set_lock";                                           \
+            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )                               \
+                && lck->tas.lk.depth_locked != -1 ) {                                           \
+                KMP_FATAL( LockNestableUsedAsSimple, func );                                    \
+            }                                                                                   \
+            if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) {                          \
+                KMP_FATAL( LockIsAlreadyOwned, func );                                          \
+            }                                                                                   \
+        }                                                                                       \
+        if ( ( lck->tas.lk.poll != 0 ) ||                                                       \
+          ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {            \
+            kmp_uint32 spins;                                                                   \
+            KMP_FSYNC_PREPARE( lck );                                                           \
+            KMP_INIT_YIELD( spins );                                                            \
+            if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) {     \
+                KMP_YIELD( TRUE );                                                              \
+            } else {                                                                            \
+                KMP_YIELD_SPIN( spins );                                                        \
+            }                                                                                   \
+            while ( ( lck->tas.lk.poll != 0 ) ||                                                \
+              ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  )  {       \
+                if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
+                    KMP_YIELD( TRUE );                                                          \
+                } else {                                                                        \
+                    KMP_YIELD_SPIN( spins );                                                    \
+                }                                                                               \
+            }                                                                                   \
+        }                                                                                       \
+        KMP_FSYNC_ACQUIRED( lck );                                                              \
+    } else {                                                                                    \
+        KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );                       \
+        ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );                                 \
+    }
+
+#else
+static inline int
+__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );
+    return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );
+}
+#endif
+
+extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+
+#include "kmp_i18n.h"                       /* AC: KMP_FATAL definition */
+extern int __kmp_env_consistency_check;     /* AC: copy from kmp.h here */
+static inline int
+__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    if ( __kmp_user_lock_kind == lk_tas ) {
+        if ( __kmp_env_consistency_check ) {
+            char const * const func = "omp_test_lock";
+            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
+                && lck->tas.lk.depth_locked != -1 ) {
+                KMP_FATAL( LockNestableUsedAsSimple, func );
+            }
+        }
+        return ( ( lck->tas.lk.poll == 0 ) &&
+          KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
+    } else {
+        KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
+        return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
+    }
+}
+#else
+static inline int
+__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
+    return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
+}
+#endif
+
+extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
+
+static inline void
+__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL );
+    ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid );
+}
+
+extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck );
+
+static inline void
+__kmp_init_user_lock_with_checks( kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL );
+    ( *__kmp_init_user_lock_with_checks_ )( lck );
+}
+
+//
+// We need a non-checking version of destroy lock for when the RTL is
+// doing the cleanup as it can't always tell if the lock is nested or not.
+//
+extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck );
+
+static inline void
+__kmp_destroy_user_lock( kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL );
+    ( *__kmp_destroy_user_lock_ )( lck );
+}
+
+extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck );
+
+static inline void
+__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL );
+    ( *__kmp_destroy_user_lock_with_checks_ )( lck );
+}
+
+extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+
+#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth)                                  \
+    if (__kmp_user_lock_kind == lk_tas) {                                                           \
+        if ( __kmp_env_consistency_check ) {                                                        \
+            char const * const func = "omp_set_nest_lock";                                          \
+            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )                              \
+                && lck->tas.lk.depth_locked == -1 ) {                                               \
+                KMP_FATAL( LockSimpleUsedAsNestable, func );                                        \
+            }                                                                                       \
+        }                                                                                           \
+        if ( lck->tas.lk.poll - 1 == gtid ) {                                                       \
+            lck->tas.lk.depth_locked += 1;                                                          \
+            *depth = KMP_LOCK_ACQUIRED_NEXT;                                                        \
+        } else {                                                                                    \
+            if ( ( lck->tas.lk.poll != 0 ) ||                                                       \
+              ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {            \
+                kmp_uint32 spins;                                                                   \
+                KMP_FSYNC_PREPARE( lck );                                                           \
+                KMP_INIT_YIELD( spins );                                                            \
+                if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) {     \
+                    KMP_YIELD( TRUE );                                                              \
+                } else {                                                                            \
+                    KMP_YIELD_SPIN( spins );                                                        \
+                }                                                                                   \
+                while ( ( lck->tas.lk.poll != 0 ) ||                                                \
+                  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {        \
+                    if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
+                        KMP_YIELD( TRUE );                                                          \
+                    } else {                                                                        \
+                        KMP_YIELD_SPIN( spins );                                                    \
+                    }                                                                               \
+                }                                                                                   \
+            }                                                                                       \
+            lck->tas.lk.depth_locked = 1;                                                           \
+            *depth = KMP_LOCK_ACQUIRED_FIRST;                                                       \
+        }                                                                                           \
+        KMP_FSYNC_ACQUIRED( lck );                                                                  \
+    } else {                                                                                        \
+        KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );                    \
+        *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );                     \
+    }
+
+#else
+static inline void
+__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth )
+{
+    KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );
+    *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );
+}
+#endif
+
+extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+static inline int
+__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    if ( __kmp_user_lock_kind == lk_tas ) {
+        int retval;
+        if ( __kmp_env_consistency_check ) {
+            char const * const func = "omp_test_nest_lock";
+            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )
+                && lck->tas.lk.depth_locked == -1 ) {
+                KMP_FATAL( LockSimpleUsedAsNestable, func );
+            }
+        }
+        KMP_DEBUG_ASSERT( gtid >= 0 );
+        if ( lck->tas.lk.poll - 1 == gtid ) {   /* __kmp_get_tas_lock_owner( lck ) == gtid */
+            return ++lck->tas.lk.depth_locked;  /* same owner, depth increased */
+        }
+        retval = ( ( lck->tas.lk.poll == 0 ) &&
+          KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
+        if ( retval ) {
+            KMP_MB();
+            lck->tas.lk.depth_locked = 1;
+        }
+        return retval;
+    } else {
+        KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
+        return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
+    }
+}
+#else
+static inline int
+__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
+    return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
+}
+#endif
+
+extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
+
+static inline int
+__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
+{
+    KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL );
+    return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid );
+}
+
+extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
+
+static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL );
+    ( *__kmp_init_nested_user_lock_with_checks_ )( lck );
+}
+
+extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
+
+static inline void
+__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck )
+{
+    KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL );
+    ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck );
+}
+
+//
+// user lock functions which do not necessarily exist for all lock kinds.
+//
+// The "set" functions usually have wrapper routines that check for a NULL set
+// function pointer and call it if non-NULL.
+//
+// In some cases, it makes sense to have a "get" wrapper function check for a
+// NULL get function pointer and return NULL / invalid value / error code if
+// the function pointer is NULL.
+//
+// In other cases, the calling code really should differentiate between an
+// unimplemented function and one that is implemented but returning NULL /
+// invalied value.  If this is the case, no get function wrapper exists.
+//
+
+extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck );
+
+// no set function; fields set durining local allocation
+
+extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck );
+
+static inline const ident_t *
+__kmp_get_user_lock_location( kmp_user_lock_p lck )
+{
+    if ( __kmp_get_user_lock_location_  != NULL ) {
+        return ( *__kmp_get_user_lock_location_ )( lck );
+    }
+    else {
+        return NULL;
+    }
+}
+
+extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc );
+
+static inline void
+__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc )
+{
+    if ( __kmp_set_user_lock_location_  != NULL ) {
+        ( *__kmp_set_user_lock_location_ )( lck, loc );
+    }
+}
+
+extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck );
+
+extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags );
+
+static inline void
+__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags )
+{
+    if ( __kmp_set_user_lock_flags_  != NULL ) {
+        ( *__kmp_set_user_lock_flags_ )( lck, flags );
+    }
+}
+
+//
+// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t.
+//
+extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind );
+
+//
+// Macros for binding user lock functions.
+//
+#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) {                                       \
+    __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \
+                                                  __kmp_acquire##nest##kind##_##suffix;         \
+    __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \
+                                                  __kmp_release##nest##kind##_##suffix;         \
+    __kmp_test##nest##user_lock_with_checks_    = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \
+                                                  __kmp_test##nest##kind##_##suffix;            \
+    __kmp_init##nest##user_lock_with_checks_    = ( void (*)( kmp_user_lock_p ) )               \
+                                                  __kmp_init##nest##kind##_##suffix;            \
+    __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) )               \
+                                                  __kmp_destroy##nest##kind##_##suffix;         \
+}
+
+#define KMP_BIND_USER_LOCK(kind)                    KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
+#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)        KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
+#define KMP_BIND_NESTED_USER_LOCK(kind)             KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
+#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
+
+// ----------------------------------------------------------------------------
+// User lock table & lock allocation
+// ----------------------------------------------------------------------------
+
+/*
+    On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which
+    is not enough to store a pointer, so we have to use lock indexes instead of pointers and
+    maintain lock table to map indexes to pointers.
+
+
+    Note: The first element of the table is not a pointer to lock! It is a pointer to previously
+    allocated table (or NULL if it is the first table).
+
+    Usage:
+
+        if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
+            Lock table is fully utilized. User locks are indexes, so table is
+            used on user lock operation.
+            Note: it may be the case (lin_32) that we don't need to use a lock
+            table for regular locks, but do need the table for nested locks.
+        }
+        else {
+            Lock table initialized but not actually used.
+        }
+*/
+
+struct kmp_lock_table {
+    kmp_lock_index_t  used;      // Number of used elements
+    kmp_lock_index_t  allocated; // Number of allocated elements
+    kmp_user_lock_p * table;     // Lock table.
+};
+
+typedef struct kmp_lock_table kmp_lock_table_t;
+
+extern kmp_lock_table_t __kmp_user_lock_table;
+extern kmp_user_lock_p __kmp_lock_pool;
+
+struct kmp_block_of_locks {
+    struct kmp_block_of_locks * next_block;
+    void *                      locks;
+};
+
+typedef struct kmp_block_of_locks kmp_block_of_locks_t;
+
+extern kmp_block_of_locks_t *__kmp_lock_blocks;
+extern int __kmp_num_locks_in_block;
+
+extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags );
+extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck );
+extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func );
+extern void __kmp_cleanup_user_locks();
+
+#define KMP_CHECK_USER_LOCK_INIT() \
+        {                                                               \
+            if ( ! TCR_4( __kmp_init_user_locks ) ) {                   \
+                __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );      \
+                if ( ! TCR_4( __kmp_init_user_locks ) ) {               \
+                    TCW_4( __kmp_init_user_locks, TRUE );               \
+                }                                                       \
+                __kmp_release_bootstrap_lock( &__kmp_initz_lock );      \
+            }                                                           \
+        }
+
+#endif // KMP_USE_DYNAMIC_LOCK
+
+#undef KMP_PAD
+#undef KMP_GTID_DNE
+
+#if KMP_USE_DYNAMIC_LOCK
+
+//
+// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current
+// compatibility. Essential functionality of this new code is dynamic dispatch, but it also
+// implements (or enables implementation of) hinted user lock and critical section which will be
+// part of OMP 4.1 soon.
+//
+// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock
+// function call on the created lock object requires type extraction and call through jump table
+// using the extracted type. This type information is stored in two different ways depending on
+// the size of the lock object, and we differentiate lock types by this size requirement - direct
+// and indirect locks.
+//
+// Direct locks:
+// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and
+// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage
+// for the lock type, and appropriate bit operation is required to access the data meaningful to
+// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB
+// of the lock object. The newly introduced "hle" lock is also a direct lock.
+//
+// Indirect locks:
+// An indirect lock object requires more space than the compiler-generated space, and it should be
+// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e.,
+// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated
+// indirect lock (void * fits in the object) or an index to the indirect lock table entry that
+// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly
+// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock.
+// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to
+// differentiate the lock from a direct lock, and the remaining part is the actual index to the
+// indirect lock table.
+//
+
+#include <stdint.h> // for uintptr_t
+
+// Shortcuts
+#define KMP_USE_FUTEX          (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
+#define KMP_USE_INLINED_TAS    (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1
+#define KMP_USE_INLINED_FUTEX  KMP_USE_FUTEX && 0
+
+// List of lock definitions; all nested locks are indirect locks.
+// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
+// All nested locks are indirect lock types.
+#if KMP_USE_TSX
+# if KMP_USE_FUTEX
+#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) m(futex, a) m(hle, a)
+#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
+                                    m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)         \
+                                    m(nested_queuing, a) m(nested_drdpa, a)
+# else
+#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a)             m(hle, a)
+#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
+                                    m(nested_tas, a)                    m(nested_ticket, a)         \
+                                    m(nested_queuing, a) m(nested_drdpa, a)
+# endif // KMP_USE_FUTEX
+# define KMP_LAST_D_LOCK lockseq_hle
+#else
+# if KMP_USE_FUTEX
+#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) m(futex, a)
+#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a)                m(drdpa, a)           \
+                                    m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)         \
+                                    m(nested_queuing, a) m(nested_drdpa, a)
+#  define KMP_LAST_D_LOCK lockseq_futex
+# else
+#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a)
+#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a)                m(drdpa, a)           \
+                                    m(nested_tas, a)                    m(nested_ticket, a)         \
+                                    m(nested_queuing, a) m(nested_drdpa, a)
+#  define KMP_LAST_D_LOCK lockseq_tas
+# endif // KMP_USE_FUTEX
+#endif // KMP_USE_TSX
+
+// Information used in dynamic dispatch
+#define KMP_LOCK_SHIFT   8 // number of low bits to be used as tag for direct locks
+#define KMP_FIRST_D_LOCK lockseq_tas
+#define KMP_FIRST_I_LOCK lockseq_ticket
+#define KMP_LAST_I_LOCK  lockseq_nested_drdpa
+#define KMP_NUM_I_LOCKS  (locktag_nested_drdpa+1) // number of indirect lock types
+
+// Base type for dynamic locks.
+typedef kmp_uint32 kmp_dyna_lock_t;
+
+// Lock sequence that enumerates all lock kinds.
+// Always make this enumeration consistent with kmp_lockseq_t in the include directory.
+typedef enum {
+    lockseq_indirect = 0,
+#define expand_seq(l,a) lockseq_##l,
+    KMP_FOREACH_D_LOCK(expand_seq, 0)
+    KMP_FOREACH_I_LOCK(expand_seq, 0)
+#undef expand_seq
+} kmp_dyna_lockseq_t;
+
+// Enumerates indirect lock tags.
+typedef enum {
+#define expand_tag(l,a) locktag_##l,
+    KMP_FOREACH_I_LOCK(expand_tag, 0)
+#undef expand_tag
+} kmp_indirect_locktag_t;
+
+// Utility macros that extract information from lock sequences.
+#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK)
+#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK)
+#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK)
+#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1)
+
+// Enumerates direct lock tags starting from indirect tag.
+typedef enum {
+#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l),
+    KMP_FOREACH_D_LOCK(expand_tag, 0)
+#undef expand_tag
+} kmp_direct_locktag_t;
+
+// Indirect lock type
+typedef struct {
+    kmp_user_lock_p lock;
+    kmp_indirect_locktag_t type;
+} kmp_indirect_lock_t;
+
+// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking.
+extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
+extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *);
+extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
+extern int  (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
+extern int  (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
+
+// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking.
+extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
+extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p);
+extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
+extern int  (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
+extern int  (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);
+
+// Extracts direct lock tag from a user lock pointer
+#define KMP_EXTRACT_D_TAG(l)   (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1))
+
+// Extracts indirect lock index from a user lock pointer
+#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1)
+
+// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type).
+#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)]
+
+// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type).
+#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
+
+// Initializes a direct lock with the given lock pointer and lock sequence.
+#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)
+
+// Initializes an indirect lock with the given lock pointer and lock sequence.
+#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
+
+// Returns "free" lock value for the given lock type.
+#define KMP_LOCK_FREE(type)      (locktag_##type)
+
+// Returns "busy" lock value for the given lock teyp.
+#define KMP_LOCK_BUSY(v, type)   ((v)<<KMP_LOCK_SHIFT | locktag_##type)
+
+// Returns lock value after removing (shifting) lock tag.
+#define KMP_LOCK_STRIP(v)        ((v)>>KMP_LOCK_SHIFT)
+
+// Initializes global states and data structures for managing dynamic user locks.
+extern void __kmp_init_dynamic_user_locks();
+
+// Allocates and returns an indirect lock with the given indirect lock tag.
+extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
+
+// Cleans up global states and data structures for managing dynamic user locks.
+extern void __kmp_cleanup_indirect_user_locks();
+
+// Default user lock sequence when not using hinted locks. 
+extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
+
+// Jump table for "set lock location", available only for indirect locks.
+extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *);
+#define KMP_SET_I_LOCK_LOCATION(lck, loc) {                         \
+    if (__kmp_indirect_set_location[(lck)->type] != NULL)           \
+        __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
+}
+
+// Jump table for "set lock flags", available only for indirect locks.
+extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t);
+#define KMP_SET_I_LOCK_FLAGS(lck, flag) {                         \
+    if (__kmp_indirect_set_flags[(lck)->type] != NULL)            \
+        __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
+}
+
+// Jump table for "get lock location", available only for indirect locks.
+extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
+#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL       \
+                                      ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
+                                      : NULL )
+
+// Jump table for "get lock flags", available only for indirect locks.
+extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
+#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL       \
+                                   ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
+                                   : NULL )
+
+#define KMP_I_LOCK_CHUNK 1024       // number of kmp_indirect_lock_t objects to be allocated together
+
+// Lock table for indirect locks.
+typedef struct kmp_indirect_lock_table {
+    kmp_indirect_lock_t **table;    // blocks of indirect locks allocated
+    kmp_lock_index_t size;          // size of the indirect lock table
+    kmp_lock_index_t next;          // index to the next lock to be allocated
+} kmp_indirect_lock_table_t;
+
+extern kmp_indirect_lock_table_t __kmp_i_lock_table;
+
+// Returns the indirect lock associated with the given index.
+#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK)
+
+// Number of locks in a lock block, which is fixed to "1" now.
+// TODO: No lock block implementation now. If we do support, we need to manage lock block data
+// structure for each indirect lock type.
+extern int __kmp_num_locks_in_block;
+
+// Fast lock table lookup without consistency checking
+#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *))       \
+                               ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \
+                               : *((kmp_indirect_lock_t **)(l)) )
+
+// Used once in kmp_error.c
+extern kmp_int32
+__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
+
+#else // KMP_USE_DYNAMIC_LOCK
+
+# define KMP_LOCK_BUSY(v, type)    (v)
+# define KMP_LOCK_FREE(type)       0
+# define KMP_LOCK_STRIP(v)         (v)
+
+#endif // KMP_USE_DYNAMIC_LOCK
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif /* KMP_LOCK_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_omp.h b/contrib/libs/cxxsupp/openmp/kmp_omp.h
index 311d7e67ae..fc4de0f236 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_omp.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_omp.h
@@ -1,233 +1,233 @@
-#if USE_DEBUGGER 
-/* 
- * kmp_omp.h -- OpenMP definition for kmp_omp_struct_info_t. 
- *              This is for information about runtime library structures. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-/* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE 
- * It should instead be modified in the OpenMP runtime and copied 
- * to the interface library code.  This way we can minimize the 
- * problems that this is sure to cause having two copies of the 
- * same file. 
- * 
- * files live in libomp and libomp_db/src/include 
- */ 
- 
-/* CHANGE THIS WHEN STRUCTURES BELOW CHANGE 
- * Before we release this to a customer, please don't change this value.  After it is released and 
- * stable, then any new updates to the structures or data structure traversal algorithms need to 
- * change this value. 
- */ 
-#define KMP_OMP_VERSION 9 
- 
-typedef struct { 
-    kmp_int32  offset; 
-    kmp_int32  size; 
-} offset_and_size_t; 
- 
-typedef struct { 
-    kmp_uint64    addr; 
-    kmp_int32     size; 
-    kmp_int32     padding; 
-} addr_and_size_t; 
- 
-typedef struct { 
-    kmp_uint64   flags;         // Flags for future extensions. 
-    kmp_uint64   file;          // Pointer to name of source file where the parallel region is. 
-    kmp_uint64   func;          // Pointer to name of routine where the parallel region is. 
-    kmp_int32    begin;         // Beginning of source line range. 
-    kmp_int32    end;           // End of source line range. 
-    kmp_int32    num_threads;   // Specified number of threads. 
-} kmp_omp_nthr_item_t; 
- 
-typedef struct { 
-    kmp_int32     num;          // Number of items in the arrray. 
-    kmp_uint64    array;        // Address of array of kmp_omp_num_threads_item_t. 
-} kmp_omp_nthr_info_t; 
- 
- 
-/* This structure is known to the idb interface library */ 
-typedef struct { 
- 
-    /* Change this only if you make a fundamental data structure change here */ 
-    kmp_int32          lib_version; 
- 
-    /* sanity check.  Only should be checked if versions are identical 
-     * This is also used for backward compatibility to get the runtime 
-     * structure size if it the runtime is older than the interface */ 
-    kmp_int32          sizeof_this_structure; 
- 
-    /* OpenMP RTL version info. */ 
-    addr_and_size_t    major; 
-    addr_and_size_t    minor; 
-    addr_and_size_t    build; 
-    addr_and_size_t    openmp_version; 
-    addr_and_size_t    banner; 
- 
-    /* Various globals. */ 
-    addr_and_size_t  threads;            // Pointer to __kmp_threads. 
-    addr_and_size_t  roots;              // Pointer to __kmp_root. 
-    addr_and_size_t  capacity;           // Pointer to __kmp_threads_capacity. 
-    addr_and_size_t  monitor;            // Pointer to __kmp_monitor. 
-#if ! KMP_USE_DYNAMIC_LOCK 
-    addr_and_size_t  lock_table;         // Pointer to __kmp_lock_table. 
-#endif 
-    addr_and_size_t  func_microtask; 
-    addr_and_size_t  func_fork; 
-    addr_and_size_t  func_fork_teams; 
-    addr_and_size_t  team_counter; 
-    addr_and_size_t  task_counter; 
-    addr_and_size_t  nthr_info; 
-    kmp_int32        address_width; 
-    kmp_int32        indexed_locks; 
-    kmp_int32        last_barrier;       // The end in enum barrier_type 
-    kmp_int32        deque_size;         // TASK_DEQUE_SIZE 
- 
-    /* thread structure information. */ 
-    kmp_int32          th_sizeof_struct; 
-    offset_and_size_t  th_info;          // descriptor for thread 
-    offset_and_size_t  th_team;          // team for this thread 
-    offset_and_size_t  th_root;          // root for this thread 
-    offset_and_size_t  th_serial_team;   // serial team under this thread 
-    offset_and_size_t  th_ident;         // location for this thread (if available) 
-    offset_and_size_t  th_spin_here;     // is thread waiting for lock (if available) 
-    offset_and_size_t  th_next_waiting;  // next thread waiting for lock (if available) 
-    offset_and_size_t  th_task_team;     // task team struct 
-    offset_and_size_t  th_current_task;  // innermost task being executed 
-    offset_and_size_t  th_task_state;    // alternating 0/1 for task team identification 
-    offset_and_size_t  th_bar; 
-    offset_and_size_t  th_b_worker_arrived; // the worker increases it by 1 when it arrives to the barrier 
- 
-#if OMP_40_ENABLED 
-    /* teams information */ 
-    offset_and_size_t th_teams_microtask;// entry address for teams construct 
-    offset_and_size_t th_teams_level;    // initial level of teams construct 
-    offset_and_size_t th_teams_nteams;   // number of teams in a league 
-    offset_and_size_t th_teams_nth;      // number of threads in each team of the league 
-#endif 
- 
-    /* kmp_desc structure (for info field above) */ 
-    kmp_int32          ds_sizeof_struct; 
-    offset_and_size_t  ds_tid;           // team thread id 
-    offset_and_size_t  ds_gtid;          // global thread id 
-    offset_and_size_t  ds_thread;        // native thread id 
- 
-    /* team structure information */ 
-    kmp_int32          t_sizeof_struct; 
-    offset_and_size_t  t_master_tid;     // tid of master in parent team 
-    offset_and_size_t  t_ident;          // location of parallel region 
-    offset_and_size_t  t_parent;         // parent team 
-    offset_and_size_t  t_nproc;          // # team threads 
-    offset_and_size_t  t_threads;        // array of threads 
-    offset_and_size_t  t_serialized;     // # levels of serialized teams 
-    offset_and_size_t  t_id;             // unique team id 
-    offset_and_size_t  t_pkfn; 
-    offset_and_size_t  t_task_team;      // task team structure 
-    offset_and_size_t  t_implicit_task;  // taskdata for the thread's implicit task 
-#if OMP_40_ENABLED 
-    offset_and_size_t  t_cancel_request; 
-#endif 
-    offset_and_size_t  t_bar; 
-    offset_and_size_t  t_b_master_arrived; // increased by 1 when master arrives to a barrier 
-    offset_and_size_t  t_b_team_arrived;   // increased by one when all the threads arrived 
- 
-    /* root structure information */ 
-    kmp_int32          r_sizeof_struct; 
-    offset_and_size_t  r_root_team;      // team at root 
-    offset_and_size_t  r_hot_team;       // hot team for this root 
-    offset_and_size_t  r_uber_thread;    // root thread 
-    offset_and_size_t  r_root_id;        // unique root id (if available) 
- 
-    /* ident structure information */ 
-    kmp_int32          id_sizeof_struct; 
-    offset_and_size_t  id_psource;       /* address of string ";file;func;line1;line2;;". */ 
-    offset_and_size_t  id_flags; 
- 
-    /* lock structure information */ 
-    kmp_int32          lk_sizeof_struct; 
-    offset_and_size_t  lk_initialized; 
-    offset_and_size_t  lk_location; 
-    offset_and_size_t  lk_tail_id; 
-    offset_and_size_t  lk_head_id; 
-    offset_and_size_t  lk_next_ticket; 
-    offset_and_size_t  lk_now_serving; 
-    offset_and_size_t  lk_owner_id; 
-    offset_and_size_t  lk_depth_locked; 
-    offset_and_size_t  lk_lock_flags; 
- 
-#if ! KMP_USE_DYNAMIC_LOCK 
-    /* lock_table_t */ 
-    kmp_int32          lt_size_of_struct;    /* Size and layout of kmp_lock_table_t. */ 
-    offset_and_size_t  lt_used; 
-    offset_and_size_t  lt_allocated; 
-    offset_and_size_t  lt_table; 
-#endif 
- 
-    /* task_team_t */ 
-    kmp_int32          tt_sizeof_struct; 
-    offset_and_size_t  tt_threads_data; 
-    offset_and_size_t  tt_found_tasks; 
-    offset_and_size_t  tt_nproc; 
-    offset_and_size_t  tt_unfinished_threads; 
-    offset_and_size_t  tt_active; 
- 
-    /* kmp_taskdata_t */ 
-    kmp_int32          td_sizeof_struct; 
-    offset_and_size_t  td_task_id;                  // task id 
-    offset_and_size_t  td_flags;                    // task flags 
-    offset_and_size_t  td_team;                     // team for this task 
-    offset_and_size_t  td_parent;                   // parent task 
-    offset_and_size_t  td_level;                    // task testing level 
-    offset_and_size_t  td_ident;                    // task identifier 
-    offset_and_size_t  td_allocated_child_tasks;    // child tasks (+ current task) not yet deallocated 
-    offset_and_size_t  td_incomplete_child_tasks;   // child tasks not yet complete 
- 
-    /* Taskwait */ 
-    offset_and_size_t  td_taskwait_ident; 
-    offset_and_size_t  td_taskwait_counter; 
-    offset_and_size_t  td_taskwait_thread;          // gtid + 1 of thread encountered taskwait 
- 
-#if OMP_40_ENABLED 
-    /* Taskgroup */ 
-    offset_and_size_t  td_taskgroup;                // pointer to the current taskgroup 
-    offset_and_size_t  td_task_count;               // number of allocated and not yet complete tasks 
-    offset_and_size_t  td_cancel;                   // request for cancellation of this taskgroup 
- 
-    /* Task dependency */ 
-    offset_and_size_t  td_depnode;                  // pointer to graph node if the task has dependencies 
-    offset_and_size_t  dn_node; 
-    offset_and_size_t  dn_next; 
-    offset_and_size_t  dn_successors; 
-    offset_and_size_t  dn_task; 
-    offset_and_size_t  dn_npredecessors; 
-    offset_and_size_t  dn_nrefs; 
-#endif 
-    offset_and_size_t  dn_routine; 
- 
-    /* kmp_thread_data_t */ 
-    kmp_int32          hd_sizeof_struct; 
-    offset_and_size_t  hd_deque; 
-    offset_and_size_t  hd_deque_head; 
-    offset_and_size_t  hd_deque_tail; 
-    offset_and_size_t  hd_deque_ntasks; 
-    offset_and_size_t  hd_deque_last_stolen; 
- 
-    // The last field of stable version. 
-    kmp_uint64         last_field; 
- 
-} kmp_omp_struct_info_t; 
- 
-#endif /* USE_DEBUGGER */ 
- 
-/* end of file */ 
+#if USE_DEBUGGER
+/*
+ * kmp_omp.h -- OpenMP definition for kmp_omp_struct_info_t.
+ *              This is for information about runtime library structures.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE
+ * It should instead be modified in the OpenMP runtime and copied
+ * to the interface library code.  This way we can minimize the
+ * problems that this is sure to cause having two copies of the
+ * same file.
+ *
+ * files live in libomp and libomp_db/src/include
+ */
+
+/* CHANGE THIS WHEN STRUCTURES BELOW CHANGE
+ * Before we release this to a customer, please don't change this value.  After it is released and
+ * stable, then any new updates to the structures or data structure traversal algorithms need to
+ * change this value.
+ */
+#define KMP_OMP_VERSION 9
+
+typedef struct {
+    kmp_int32  offset;
+    kmp_int32  size;
+} offset_and_size_t;
+
+typedef struct {
+    kmp_uint64    addr;
+    kmp_int32     size;
+    kmp_int32     padding;
+} addr_and_size_t;
+
+typedef struct {
+    kmp_uint64   flags;         // Flags for future extensions.
+    kmp_uint64   file;          // Pointer to name of source file where the parallel region is.
+    kmp_uint64   func;          // Pointer to name of routine where the parallel region is.
+    kmp_int32    begin;         // Beginning of source line range.
+    kmp_int32    end;           // End of source line range.
+    kmp_int32    num_threads;   // Specified number of threads.
+} kmp_omp_nthr_item_t;
+
+typedef struct {
+    kmp_int32     num;          // Number of items in the arrray.
+    kmp_uint64    array;        // Address of array of kmp_omp_num_threads_item_t.
+} kmp_omp_nthr_info_t;
+
+
+/* This structure is known to the idb interface library */
+typedef struct {
+
+    /* Change this only if you make a fundamental data structure change here */
+    kmp_int32          lib_version;
+
+    /* sanity check.  Only should be checked if versions are identical
+     * This is also used for backward compatibility to get the runtime
+     * structure size if it the runtime is older than the interface */
+    kmp_int32          sizeof_this_structure;
+
+    /* OpenMP RTL version info. */
+    addr_and_size_t    major;
+    addr_and_size_t    minor;
+    addr_and_size_t    build;
+    addr_and_size_t    openmp_version;
+    addr_and_size_t    banner;
+
+    /* Various globals. */
+    addr_and_size_t  threads;            // Pointer to __kmp_threads.
+    addr_and_size_t  roots;              // Pointer to __kmp_root.
+    addr_and_size_t  capacity;           // Pointer to __kmp_threads_capacity.
+    addr_and_size_t  monitor;            // Pointer to __kmp_monitor.
+#if ! KMP_USE_DYNAMIC_LOCK
+    addr_and_size_t  lock_table;         // Pointer to __kmp_lock_table.
+#endif
+    addr_and_size_t  func_microtask;
+    addr_and_size_t  func_fork;
+    addr_and_size_t  func_fork_teams;
+    addr_and_size_t  team_counter;
+    addr_and_size_t  task_counter;
+    addr_and_size_t  nthr_info;
+    kmp_int32        address_width;
+    kmp_int32        indexed_locks;
+    kmp_int32        last_barrier;       // The end in enum barrier_type
+    kmp_int32        deque_size;         // TASK_DEQUE_SIZE
+
+    /* thread structure information. */
+    kmp_int32          th_sizeof_struct;
+    offset_and_size_t  th_info;          // descriptor for thread
+    offset_and_size_t  th_team;          // team for this thread
+    offset_and_size_t  th_root;          // root for this thread
+    offset_and_size_t  th_serial_team;   // serial team under this thread
+    offset_and_size_t  th_ident;         // location for this thread (if available)
+    offset_and_size_t  th_spin_here;     // is thread waiting for lock (if available)
+    offset_and_size_t  th_next_waiting;  // next thread waiting for lock (if available)
+    offset_and_size_t  th_task_team;     // task team struct
+    offset_and_size_t  th_current_task;  // innermost task being executed
+    offset_and_size_t  th_task_state;    // alternating 0/1 for task team identification
+    offset_and_size_t  th_bar;
+    offset_and_size_t  th_b_worker_arrived; // the worker increases it by 1 when it arrives to the barrier
+
+#if OMP_40_ENABLED
+    /* teams information */
+    offset_and_size_t th_teams_microtask;// entry address for teams construct
+    offset_and_size_t th_teams_level;    // initial level of teams construct
+    offset_and_size_t th_teams_nteams;   // number of teams in a league
+    offset_and_size_t th_teams_nth;      // number of threads in each team of the league
+#endif
+
+    /* kmp_desc structure (for info field above) */
+    kmp_int32          ds_sizeof_struct;
+    offset_and_size_t  ds_tid;           // team thread id
+    offset_and_size_t  ds_gtid;          // global thread id
+    offset_and_size_t  ds_thread;        // native thread id
+
+    /* team structure information */
+    kmp_int32          t_sizeof_struct;
+    offset_and_size_t  t_master_tid;     // tid of master in parent team
+    offset_and_size_t  t_ident;          // location of parallel region
+    offset_and_size_t  t_parent;         // parent team
+    offset_and_size_t  t_nproc;          // # team threads
+    offset_and_size_t  t_threads;        // array of threads
+    offset_and_size_t  t_serialized;     // # levels of serialized teams
+    offset_and_size_t  t_id;             // unique team id
+    offset_and_size_t  t_pkfn;
+    offset_and_size_t  t_task_team;      // task team structure
+    offset_and_size_t  t_implicit_task;  // taskdata for the thread's implicit task
+#if OMP_40_ENABLED
+    offset_and_size_t  t_cancel_request;
+#endif
+    offset_and_size_t  t_bar;
+    offset_and_size_t  t_b_master_arrived; // increased by 1 when master arrives to a barrier
+    offset_and_size_t  t_b_team_arrived;   // increased by one when all the threads arrived
+
+    /* root structure information */
+    kmp_int32          r_sizeof_struct;
+    offset_and_size_t  r_root_team;      // team at root
+    offset_and_size_t  r_hot_team;       // hot team for this root
+    offset_and_size_t  r_uber_thread;    // root thread
+    offset_and_size_t  r_root_id;        // unique root id (if available)
+
+    /* ident structure information */
+    kmp_int32          id_sizeof_struct;
+    offset_and_size_t  id_psource;       /* address of string ";file;func;line1;line2;;". */
+    offset_and_size_t  id_flags;
+
+    /* lock structure information */
+    kmp_int32          lk_sizeof_struct;
+    offset_and_size_t  lk_initialized;
+    offset_and_size_t  lk_location;
+    offset_and_size_t  lk_tail_id;
+    offset_and_size_t  lk_head_id;
+    offset_and_size_t  lk_next_ticket;
+    offset_and_size_t  lk_now_serving;
+    offset_and_size_t  lk_owner_id;
+    offset_and_size_t  lk_depth_locked;
+    offset_and_size_t  lk_lock_flags;
+
+#if ! KMP_USE_DYNAMIC_LOCK
+    /* lock_table_t */
+    kmp_int32          lt_size_of_struct;    /* Size and layout of kmp_lock_table_t. */
+    offset_and_size_t  lt_used;
+    offset_and_size_t  lt_allocated;
+    offset_and_size_t  lt_table;
+#endif
+
+    /* task_team_t */
+    kmp_int32          tt_sizeof_struct;
+    offset_and_size_t  tt_threads_data;
+    offset_and_size_t  tt_found_tasks;
+    offset_and_size_t  tt_nproc;
+    offset_and_size_t  tt_unfinished_threads;
+    offset_and_size_t  tt_active;
+
+    /* kmp_taskdata_t */
+    kmp_int32          td_sizeof_struct;
+    offset_and_size_t  td_task_id;                  // task id
+    offset_and_size_t  td_flags;                    // task flags
+    offset_and_size_t  td_team;                     // team for this task
+    offset_and_size_t  td_parent;                   // parent task
+    offset_and_size_t  td_level;                    // task testing level
+    offset_and_size_t  td_ident;                    // task identifier
+    offset_and_size_t  td_allocated_child_tasks;    // child tasks (+ current task) not yet deallocated
+    offset_and_size_t  td_incomplete_child_tasks;   // child tasks not yet complete
+
+    /* Taskwait */
+    offset_and_size_t  td_taskwait_ident;
+    offset_and_size_t  td_taskwait_counter;
+    offset_and_size_t  td_taskwait_thread;          // gtid + 1 of thread encountered taskwait
+
+#if OMP_40_ENABLED
+    /* Taskgroup */
+    offset_and_size_t  td_taskgroup;                // pointer to the current taskgroup
+    offset_and_size_t  td_task_count;               // number of allocated and not yet complete tasks
+    offset_and_size_t  td_cancel;                   // request for cancellation of this taskgroup
+
+    /* Task dependency */
+    offset_and_size_t  td_depnode;                  // pointer to graph node if the task has dependencies
+    offset_and_size_t  dn_node;
+    offset_and_size_t  dn_next;
+    offset_and_size_t  dn_successors;
+    offset_and_size_t  dn_task;
+    offset_and_size_t  dn_npredecessors;
+    offset_and_size_t  dn_nrefs;
+#endif
+    offset_and_size_t  dn_routine;
+
+    /* kmp_thread_data_t */
+    kmp_int32          hd_sizeof_struct;
+    offset_and_size_t  hd_deque;
+    offset_and_size_t  hd_deque_head;
+    offset_and_size_t  hd_deque_tail;
+    offset_and_size_t  hd_deque_ntasks;
+    offset_and_size_t  hd_deque_last_stolen;
+
+    // The last field of stable version.
+    kmp_uint64         last_field;
+
+} kmp_omp_struct_info_t;
+
+#endif /* USE_DEBUGGER */
+
+/* end of file */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_os.h b/contrib/libs/cxxsupp/openmp/kmp_os.h
index 90b26d0567..4f89c7379c 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_os.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_os.h
@@ -1,726 +1,726 @@
-/* 
- * kmp_os.h -- KPTS runtime header file. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_OS_H 
-#define KMP_OS_H 
- 
-#include "kmp_config.h" 
-#include <stdlib.h> 
- 
-#define KMP_FTN_PLAIN   1 
-#define KMP_FTN_APPEND  2 
-#define KMP_FTN_UPPER   3 
-/* 
-#define KMP_FTN_PREPEND 4 
-#define KMP_FTN_UAPPEND 5 
-*/ 
- 
-#define KMP_PTR_SKIP    (sizeof(void*)) 
- 
-/* -------------------------- Compiler variations ------------------------ */ 
- 
-#define KMP_OFF				0 
-#define KMP_ON				1 
- 
-#define KMP_MEM_CONS_VOLATILE		0 
-#define KMP_MEM_CONS_FENCE		1 
- 
-#ifndef KMP_MEM_CONS_MODEL 
-# define KMP_MEM_CONS_MODEL	 KMP_MEM_CONS_VOLATILE 
-#endif 
- 
-/* ------------------------- Compiler recognition ---------------------- */ 
-#define KMP_COMPILER_ICC 0 
-#define KMP_COMPILER_GCC 0 
-#define KMP_COMPILER_CLANG 0 
-#define KMP_COMPILER_MSVC 0 
- 
-#if defined( __INTEL_COMPILER ) 
-# undef KMP_COMPILER_ICC 
-# define KMP_COMPILER_ICC 1 
-#elif defined( __clang__ ) 
-# undef KMP_COMPILER_CLANG 
-# define KMP_COMPILER_CLANG 1 
-#elif defined( __GNUC__ ) 
-# undef KMP_COMPILER_GCC 
-# define KMP_COMPILER_GCC 1 
-#elif defined( _MSC_VER ) 
-# undef KMP_COMPILER_MSVC 
-# define KMP_COMPILER_MSVC 1 
-#else 
-# error Unknown compiler 
-#endif 
- 
-#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64 
-# define KMP_AFFINITY_SUPPORTED 1 
-# if KMP_OS_WINDOWS && KMP_ARCH_X86_64 
-#  define KMP_GROUP_AFFINITY    1 
-# else 
-#  define KMP_GROUP_AFFINITY    0 
-# endif 
-#else 
-# define KMP_AFFINITY_SUPPORTED 0 
-# define KMP_GROUP_AFFINITY     0 
-#endif 
- 
-/* Check for quad-precision extension. */ 
-#define KMP_HAVE_QUAD 0 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-# if KMP_COMPILER_ICC 
-   /* _Quad is already defined for icc */ 
-#  undef  KMP_HAVE_QUAD 
-#  define KMP_HAVE_QUAD 1 
-# elif KMP_COMPILER_CLANG 
-   /* Clang doesn't support a software-implemented 
-      128-bit extended precision type yet */ 
-   typedef long double _Quad; 
-# elif KMP_COMPILER_GCC 
-   typedef __float128 _Quad; 
-#  undef  KMP_HAVE_QUAD 
-#  define KMP_HAVE_QUAD 1 
-# elif KMP_COMPILER_MSVC 
-   typedef long double _Quad; 
-# endif 
-#else 
-# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC 
-   typedef long double _Quad; 
-#  undef  KMP_HAVE_QUAD 
-#  define KMP_HAVE_QUAD 1 
-# endif 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-#if KMP_OS_WINDOWS 
-  typedef char              kmp_int8; 
-  typedef unsigned char     kmp_uint8; 
-  typedef short             kmp_int16; 
-  typedef unsigned short    kmp_uint16; 
-  typedef int               kmp_int32; 
-  typedef unsigned int      kmp_uint32; 
-# define KMP_INT32_SPEC     "d" 
-# define KMP_UINT32_SPEC    "u" 
-# ifndef KMP_STRUCT64 
-   typedef __int64 		kmp_int64; 
-   typedef unsigned __int64 	kmp_uint64; 
-   #define KMP_INT64_SPEC 	"I64d" 
-   #define KMP_UINT64_SPEC	"I64u" 
-# else 
-   struct kmp_struct64 { 
-    kmp_int32   a,b; 
-   }; 
-   typedef struct kmp_struct64 kmp_int64; 
-   typedef struct kmp_struct64 kmp_uint64; 
-   /* Not sure what to use for KMP_[U]INT64_SPEC here */ 
-# endif 
-# if KMP_ARCH_X86_64 
-#  define KMP_INTPTR 1 
-   typedef __int64         	kmp_intptr_t; 
-   typedef unsigned __int64	kmp_uintptr_t; 
-#  define KMP_INTPTR_SPEC  	"I64d" 
-#  define KMP_UINTPTR_SPEC 	"I64u" 
-# endif 
-#endif /* KMP_OS_WINDOWS */ 
- 
-#if KMP_OS_UNIX 
-  typedef char               kmp_int8; 
-  typedef unsigned char      kmp_uint8; 
-  typedef short              kmp_int16; 
-  typedef unsigned short     kmp_uint16; 
-  typedef int                kmp_int32; 
-  typedef unsigned int       kmp_uint32; 
-  typedef long long          kmp_int64; 
-  typedef unsigned long long kmp_uint64; 
-# define KMP_INT32_SPEC      "d" 
-# define KMP_UINT32_SPEC     "u" 
-# define KMP_INT64_SPEC	     "lld" 
-# define KMP_UINT64_SPEC     "llu" 
-#endif /* KMP_OS_UNIX */ 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_ARM 
-# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC 
-#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 
-# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC 
-#else 
-# error "Can't determine size_t printf format specifier." 
-#endif 
- 
-#if KMP_ARCH_X86 
-# define KMP_SIZE_T_MAX (0xFFFFFFFF) 
-#else 
-# define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) 
-#endif 
- 
-typedef size_t  kmp_size_t; 
-typedef float   kmp_real32; 
-typedef double  kmp_real64; 
- 
-#ifndef KMP_INTPTR 
-# define KMP_INTPTR 1 
-  typedef long             kmp_intptr_t; 
-  typedef unsigned long    kmp_uintptr_t; 
-# define KMP_INTPTR_SPEC   "ld" 
-# define KMP_UINTPTR_SPEC  "lu" 
-#endif 
- 
-#ifdef KMP_I8 
-  typedef kmp_int64      kmp_int; 
-  typedef kmp_uint64     kmp_uint; 
-# define  KMP_INT_SPEC	 KMP_INT64_SPEC 
-# define  KMP_UINT_SPEC	 KMP_UINT64_SPEC 
-# define  KMP_INT_MAX    ((kmp_int64)0x7FFFFFFFFFFFFFFFLL) 
-# define  KMP_INT_MIN    ((kmp_int64)0x8000000000000000LL) 
-#else 
-  typedef kmp_int32      kmp_int; 
-  typedef kmp_uint32     kmp_uint; 
-# define  KMP_INT_SPEC	 KMP_INT32_SPEC 
-# define  KMP_UINT_SPEC	 KMP_UINT32_SPEC 
-# define  KMP_INT_MAX    ((kmp_int32)0x7FFFFFFF) 
-# define  KMP_INT_MIN    ((kmp_int32)0x80000000) 
-#endif /* KMP_I8 */ 
- 
-#ifdef __cplusplus 
-    //------------------------------------------------------------------------- 
-    // template for debug prints specification ( d, u, lld, llu ), and to obtain 
-    // signed/unsigned flavors of a type 
-    template< typename T > 
-    struct traits_t { 
-        typedef T           signed_t; 
-        typedef T           unsigned_t; 
-        typedef T           floating_t; 
-        static char const * spec; 
-    }; 
-    // int 
-    template<> 
-    struct traits_t< signed int > { 
-        typedef signed int    signed_t; 
-        typedef unsigned int  unsigned_t; 
-        typedef double        floating_t; 
-        static char const *   spec; 
-    }; 
-    // unsigned int 
-    template<> 
-    struct traits_t< unsigned int > { 
-        typedef signed int    signed_t; 
-        typedef unsigned int  unsigned_t; 
-        typedef double        floating_t; 
-        static char const *   spec; 
-    }; 
-    // long long 
-    template<> 
-    struct traits_t< signed long long > { 
-        typedef signed long long    signed_t; 
-        typedef unsigned long long  unsigned_t; 
-        typedef long double         floating_t; 
-        static char const *         spec; 
-    }; 
-    // unsigned long long 
-    template<> 
-    struct traits_t< unsigned long long > { 
-        typedef signed long long    signed_t; 
-        typedef unsigned long long  unsigned_t; 
-        typedef long double         floating_t; 
-        static char const *         spec; 
-    }; 
-    //------------------------------------------------------------------------- 
-#endif // __cplusplus 
- 
-#define KMP_EXPORT	extern	/* export declaration in guide libraries */ 
- 
-#if __GNUC__ >= 4 
-    #define __forceinline __inline 
-#endif 
- 
-#define PAGE_SIZE                       (0x4000) 
-#define PAGE_ALIGNED(_addr)     ( ! ((size_t) _addr & \ 
-                                     (size_t)(PAGE_SIZE - 1))) 
-#define ALIGN_TO_PAGE(x)   (void *)(((size_t)(x)) & ~((size_t)(PAGE_SIZE - 1))) 
- 
-/* ---------------------- Support for cache alignment, padding, etc. -----------------*/ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif // __cplusplus 
- 
-#define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */ 
- 
-/* Define the default size of the cache line */ 
-#ifndef CACHE_LINE 
-    #define CACHE_LINE                  128         /* cache line size in bytes */ 
-#else 
-    #if ( CACHE_LINE < 64 ) && ! defined( KMP_OS_DARWIN ) 
-        // 2006-02-13: This produces too many warnings on OS X*. Disable it for a while... 
-        #warning CACHE_LINE is too small. 
-    #endif 
-#endif /* CACHE_LINE */ 
- 
-#define KMP_CACHE_PREFETCH(ADDR) 	/* nothing */ 
- 
-/* Temporary note: if performance testing of this passes, we can remove 
-   all references to KMP_DO_ALIGN and replace with KMP_ALIGN.  */ 
-#if KMP_OS_UNIX && defined(__GNUC__) 
-# define KMP_DO_ALIGN(bytes)  __attribute__((aligned(bytes))) 
-# define KMP_ALIGN_CACHE      __attribute__((aligned(CACHE_LINE))) 
-# define KMP_ALIGN_CACHE_INTERNODE __attribute__((aligned(INTERNODE_CACHE_LINE))) 
-# define KMP_ALIGN(bytes)     __attribute__((aligned(bytes))) 
-#else 
-# define KMP_DO_ALIGN(bytes)  __declspec( align(bytes) ) 
-# define KMP_ALIGN_CACHE      __declspec( align(CACHE_LINE) ) 
-# define KMP_ALIGN_CACHE_INTERNODE      __declspec( align(INTERNODE_CACHE_LINE) ) 
-# define KMP_ALIGN(bytes)     __declspec( align(bytes) ) 
-#endif 
- 
-/* General purpose fence types for memory operations */ 
-enum kmp_mem_fence_type { 
-    kmp_no_fence,         /* No memory fence */ 
-    kmp_acquire_fence,    /* Acquire (read) memory fence */ 
-    kmp_release_fence,    /* Release (write) memory fence */ 
-    kmp_full_fence        /* Full (read+write) memory fence */ 
-}; 
- 
- 
-// 
-// Synchronization primitives 
-// 
- 
-#if KMP_ASM_INTRINS && KMP_OS_WINDOWS 
- 
-#include <Windows.h> 
- 
-#pragma intrinsic(InterlockedExchangeAdd) 
-#pragma intrinsic(InterlockedCompareExchange) 
-#pragma intrinsic(InterlockedExchange) 
-#pragma intrinsic(InterlockedExchange64) 
- 
-// 
-// Using InterlockedIncrement / InterlockedDecrement causes a library loading 
-// ordering problem, so we use InterlockedExchangeAdd instead. 
-// 
-# define KMP_TEST_THEN_INC32(p)                 InterlockedExchangeAdd( (volatile long *)(p), 1 ) 
-# define KMP_TEST_THEN_INC_ACQ32(p)             InterlockedExchangeAdd( (volatile long *)(p), 1 ) 
-# define KMP_TEST_THEN_ADD4_32(p)               InterlockedExchangeAdd( (volatile long *)(p), 4 ) 
-# define KMP_TEST_THEN_ADD4_ACQ32(p)            InterlockedExchangeAdd( (volatile long *)(p), 4 ) 
-# define KMP_TEST_THEN_DEC32(p)                 InterlockedExchangeAdd( (volatile long *)(p), -1 ) 
-# define KMP_TEST_THEN_DEC_ACQ32(p)             InterlockedExchangeAdd( (volatile long *)(p), -1 ) 
-# define KMP_TEST_THEN_ADD32(p, v)              InterlockedExchangeAdd( (volatile long *)(p), (v) ) 
- 
-extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); 
-extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); 
-extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); 
-# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) ) 
- 
-# define KMP_XCHG_FIXED32(p, v)                 InterlockedExchange( (volatile long *)(p), (long)(v) ) 
-# define KMP_XCHG_FIXED64(p, v)                 InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) ) 
- 
-inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) 
-{ 
-    kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v); 
-    return *(kmp_real32*)&tmp; 
-} 
- 
-// 
-// Routines that we still need to implement in assembly. 
-// 
-extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); 
-extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); 
-extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); 
- 
-extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-extern kmp_int8  __kmp_compare_and_store_ret8(  volatile kmp_int8  *p, kmp_int8  cv, kmp_int8  sv ); 
-extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
- 
-extern kmp_int8  __kmp_xchg_fixed8( volatile kmp_int8  *p, kmp_int8  v ); 
-extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); 
-extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); 
-extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); 
-extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); 
-# define KMP_TEST_THEN_ADD8(p, v)              __kmp_test_then_add8( (p), (v) ) 
- 
-//# define KMP_TEST_THEN_INC32(p)                 __kmp_test_then_add32( (p), 1 ) 
-# define KMP_TEST_THEN_OR8(p, v)               __kmp_test_then_or8( (p), (v) ) 
-# define KMP_TEST_THEN_AND8(p, v)              __kmp_test_then_and8( (p), (v) ) 
-//# define KMP_TEST_THEN_INC_ACQ32(p)             __kmp_test_then_add32( (p), 1 ) 
-# define KMP_TEST_THEN_INC64(p)                 __kmp_test_then_add64( (p), 1LL ) 
-# define KMP_TEST_THEN_INC_ACQ64(p)             __kmp_test_then_add64( (p), 1LL ) 
-//# define KMP_TEST_THEN_ADD4_32(p)               __kmp_test_then_add32( (p), 4 ) 
-//# define KMP_TEST_THEN_ADD4_ACQ32(p)            __kmp_test_then_add32( (p), 4 ) 
-# define KMP_TEST_THEN_ADD4_64(p)               __kmp_test_then_add64( (p), 4LL ) 
-# define KMP_TEST_THEN_ADD4_ACQ64(p)            __kmp_test_then_add64( (p), 4LL ) 
-//# define KMP_TEST_THEN_DEC32(p)                 __kmp_test_then_add32( (p), -1 ) 
-//# define KMP_TEST_THEN_DEC_ACQ32(p)             __kmp_test_then_add32( (p), -1 ) 
-# define KMP_TEST_THEN_DEC64(p)                 __kmp_test_then_add64( (p), -1LL ) 
-# define KMP_TEST_THEN_DEC_ACQ64(p)             __kmp_test_then_add64( (p), -1LL ) 
-//# define KMP_TEST_THEN_ADD32(p, v)              __kmp_test_then_add32( (p), (v) ) 
-# define KMP_TEST_THEN_ADD64(p, v)              __kmp_test_then_add64( (p), (v) ) 
- 
-# define KMP_TEST_THEN_OR32(p, v)               __kmp_test_then_or32( (p), (v) ) 
-# define KMP_TEST_THEN_AND32(p, v)              __kmp_test_then_and32( (p), (v) ) 
-# define KMP_TEST_THEN_OR64(p, v)               __kmp_test_then_or64( (p), (v) ) 
-# define KMP_TEST_THEN_AND64(p, v)              __kmp_test_then_and64( (p), (v) ) 
- 
-# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) 
- 
-# if KMP_ARCH_X86 
-#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) 
-# else /* 64 bit pointers */ 
-#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) 
-# endif /* KMP_ARCH_X86 */ 
- 
-# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __kmp_compare_and_store_ret8( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) 
-//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) 
- 
-# define KMP_XCHG_FIXED8(p, v)                  __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); 
-# define KMP_XCHG_FIXED16(p, v)                 __kmp_xchg_fixed16( (p), (v) ); 
-//# define KMP_XCHG_FIXED32(p, v)                 __kmp_xchg_fixed32( (p), (v) ); 
-//# define KMP_XCHG_FIXED64(p, v)                 __kmp_xchg_fixed64( (p), (v) ); 
-//# define KMP_XCHG_REAL32(p, v)                  __kmp_xchg_real32( (p), (v) ); 
-# define KMP_XCHG_REAL64(p, v)                  __kmp_xchg_real64( (p), (v) ); 
- 
- 
-#elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) 
-# define KMP_TEST_THEN_ADD8(p, v)               __sync_fetch_and_add( (kmp_int8 *)(p), (v) ) 
- 
-/* cast p to correct type so that proper intrinsic will be used */ 
-# define KMP_TEST_THEN_INC32(p)                 __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) 
-# define KMP_TEST_THEN_OR8(p, v)                __sync_fetch_and_or( (kmp_int8 *)(p), (v) ) 
-# define KMP_TEST_THEN_AND8(p, v)               __sync_fetch_and_and( (kmp_int8 *)(p), (v) ) 
-# define KMP_TEST_THEN_INC_ACQ32(p)             __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) 
-# define KMP_TEST_THEN_INC64(p)                 __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) 
-# define KMP_TEST_THEN_INC_ACQ64(p)             __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) 
-# define KMP_TEST_THEN_ADD4_32(p)               __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) 
-# define KMP_TEST_THEN_ADD4_ACQ32(p)            __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) 
-# define KMP_TEST_THEN_ADD4_64(p)               __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) 
-# define KMP_TEST_THEN_ADD4_ACQ64(p)            __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) 
-# define KMP_TEST_THEN_DEC32(p)                 __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) 
-# define KMP_TEST_THEN_DEC_ACQ32(p)             __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) 
-# define KMP_TEST_THEN_DEC64(p)                 __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) 
-# define KMP_TEST_THEN_DEC_ACQ64(p)             __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) 
-# define KMP_TEST_THEN_ADD32(p, v)              __sync_fetch_and_add( (kmp_int32 *)(p), (v) ) 
-# define KMP_TEST_THEN_ADD64(p, v)              __sync_fetch_and_add( (kmp_int64 *)(p), (v) ) 
- 
-# define KMP_TEST_THEN_OR32(p, v)               __sync_fetch_and_or( (kmp_int32 *)(p), (v) ) 
-# define KMP_TEST_THEN_AND32(p, v)              __sync_fetch_and_and( (kmp_int32 *)(p), (v) ) 
-# define KMP_TEST_THEN_OR64(p, v)               __sync_fetch_and_or( (kmp_int64 *)(p), (v) ) 
-# define KMP_TEST_THEN_AND64(p, v)              __sync_fetch_and_and( (kmp_int64 *)(p), (v) ) 
- 
-# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) 
-# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) 
-# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) 
-# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) 
-# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) 
-# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)   __sync_bool_compare_and_swap( (volatile void **)(p),(void *)(cv),(void *)(sv) ) 
- 
-# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __sync_val_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) 
-# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) 
-# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) 
-# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) 
- 
-#define KMP_XCHG_FIXED8(p, v)                   __sync_lock_test_and_set( (volatile kmp_uint8 *)(p), (kmp_uint8)(v) ) 
-#define KMP_XCHG_FIXED16(p, v)                  __sync_lock_test_and_set( (volatile kmp_uint16 *)(p), (kmp_uint16)(v) ) 
-#define KMP_XCHG_FIXED32(p, v)                  __sync_lock_test_and_set( (volatile kmp_uint32 *)(p), (kmp_uint32)(v) ) 
-#define KMP_XCHG_FIXED64(p, v)                  __sync_lock_test_and_set( (volatile kmp_uint64 *)(p), (kmp_uint64)(v) ) 
- 
-extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); 
-extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); 
-extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); 
-inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) 
-{ 
-    kmp_int32 tmp = __sync_lock_test_and_set( (kmp_int32*)p, *(kmp_int32*)&v); 
-    return *(kmp_real32*)&tmp; 
-} 
- 
-inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v) 
-{ 
-    kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v); 
-    return *(kmp_real64*)&tmp; 
-} 
- 
-#else 
- 
-extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); 
-extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); 
-extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); 
- 
-extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-extern kmp_int8  __kmp_compare_and_store_ret8(  volatile kmp_int8  *p, kmp_int8  cv, kmp_int8  sv ); 
-extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
- 
-extern kmp_int8  __kmp_xchg_fixed8( volatile kmp_int8  *p, kmp_int8  v ); 
-extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); 
-extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); 
-extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); 
-extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); 
-# define KMP_TEST_THEN_ADD8(p, v)               __kmp_test_then_add8( (p), (v) ) 
-extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); 
- 
-# define KMP_TEST_THEN_INC32(p)                 __kmp_test_then_add32( (p), 1 ) 
-# define KMP_TEST_THEN_OR8(p, v)                __kmp_test_then_or8( (p), (v) ) 
-# define KMP_TEST_THEN_AND8(p, v)               __kmp_test_then_and8( (p), (v) ) 
-# define KMP_TEST_THEN_INC_ACQ32(p)             __kmp_test_then_add32( (p), 1 ) 
-# define KMP_TEST_THEN_INC64(p)                 __kmp_test_then_add64( (p), 1LL ) 
-# define KMP_TEST_THEN_INC_ACQ64(p)             __kmp_test_then_add64( (p), 1LL ) 
-# define KMP_TEST_THEN_ADD4_32(p)               __kmp_test_then_add32( (p), 4 ) 
-# define KMP_TEST_THEN_ADD4_ACQ32(p)            __kmp_test_then_add32( (p), 4 ) 
-# define KMP_TEST_THEN_ADD4_64(p)               __kmp_test_then_add64( (p), 4LL ) 
-# define KMP_TEST_THEN_ADD4_ACQ64(p)            __kmp_test_then_add64( (p), 4LL ) 
-# define KMP_TEST_THEN_DEC32(p)                 __kmp_test_then_add32( (p), -1 ) 
-# define KMP_TEST_THEN_DEC_ACQ32(p)             __kmp_test_then_add32( (p), -1 ) 
-# define KMP_TEST_THEN_DEC64(p)                 __kmp_test_then_add64( (p), -1LL ) 
-# define KMP_TEST_THEN_DEC_ACQ64(p)             __kmp_test_then_add64( (p), -1LL ) 
-# define KMP_TEST_THEN_ADD32(p, v)              __kmp_test_then_add32( (p), (v) ) 
-# define KMP_TEST_THEN_ADD64(p, v)              __kmp_test_then_add64( (p), (v) ) 
- 
-# define KMP_TEST_THEN_OR32(p, v)               __kmp_test_then_or32( (p), (v) ) 
-# define KMP_TEST_THEN_AND32(p, v)              __kmp_test_then_and32( (p), (v) ) 
-# define KMP_TEST_THEN_OR64(p, v)               __kmp_test_then_or64( (p), (v) ) 
-# define KMP_TEST_THEN_AND64(p, v)              __kmp_test_then_and64( (p), (v) ) 
- 
-# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) 
- 
-# if KMP_ARCH_X86 
-#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) 
-# else /* 64 bit pointers */ 
-#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) 
-# endif /* KMP_ARCH_X86 */ 
- 
-# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __kmp_compare_and_store_ret8( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) 
-# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) 
- 
-# define KMP_XCHG_FIXED8(p, v)                  __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); 
-# define KMP_XCHG_FIXED16(p, v)                 __kmp_xchg_fixed16( (p), (v) ); 
-# define KMP_XCHG_FIXED32(p, v)                 __kmp_xchg_fixed32( (p), (v) ); 
-# define KMP_XCHG_FIXED64(p, v)                 __kmp_xchg_fixed64( (p), (v) ); 
-# define KMP_XCHG_REAL32(p, v)                  __kmp_xchg_real32( (p), (v) ); 
-# define KMP_XCHG_REAL64(p, v)                  __kmp_xchg_real64( (p), (v) ); 
- 
-#endif /* KMP_ASM_INTRINS */ 
- 
- 
-/* ------------- relaxed consistency memory model stuff ------------------ */ 
- 
-#if KMP_OS_WINDOWS 
-# ifdef __ABSOFT_WIN 
-#   define KMP_MB()     asm ("nop") 
-#   define KMP_IMB()    asm ("nop") 
-# else 
-#   define KMP_MB()     /* _asm{ nop } */ 
-#   define KMP_IMB()    /* _asm{ nop } */ 
-# endif 
-#endif /* KMP_OS_WINDOWS */ 
- 
-#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 
-# define KMP_MB()       __sync_synchronize() 
-#endif 
- 
-#ifndef KMP_MB 
-# define KMP_MB()       /* nothing to do */ 
-#endif 
- 
-#ifndef KMP_IMB 
-# define KMP_IMB()      /* nothing to do */ 
-#endif 
- 
-#ifndef KMP_ST_REL32 
-# define KMP_ST_REL32(A,D)      ( *(A) = (D) ) 
-#endif 
- 
-#ifndef KMP_ST_REL64 
-# define KMP_ST_REL64(A,D)      ( *(A) = (D) ) 
-#endif 
- 
-#ifndef KMP_LD_ACQ32 
-# define KMP_LD_ACQ32(A)        ( *(A) ) 
-#endif 
- 
-#ifndef KMP_LD_ACQ64 
-# define KMP_LD_ACQ64(A)        ( *(A) ) 
-#endif 
- 
-#define TCR_1(a)            (a) 
-#define TCW_1(a,b)          (a) = (b) 
-/* ------------------------------------------------------------------------ */ 
-// 
-// FIXME - maybe this should this be 
-// 
-// #define TCR_4(a)    (*(volatile kmp_int32 *)(&a)) 
-// #define TCW_4(a,b)  (a) = (*(volatile kmp_int32 *)&(b)) 
-// 
-// #define TCR_8(a)    (*(volatile kmp_int64 *)(a)) 
-// #define TCW_8(a,b)  (a) = (*(volatile kmp_int64 *)(&b)) 
-// 
-// I'm fairly certain this is the correct thing to do, but I'm afraid 
-// of performance regressions. 
-// 
- 
-#define TCR_4(a)            (a) 
-#define TCW_4(a,b)          (a) = (b) 
-#define TCR_8(a)            (a) 
-#define TCW_8(a,b)          (a) = (b) 
-#define TCR_SYNC_4(a)       (a) 
-#define TCW_SYNC_4(a,b)     (a) = (b) 
-#define TCX_SYNC_4(a,b,c)   KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), (kmp_int32)(b), (kmp_int32)(c)) 
-#define TCR_SYNC_8(a)       (a) 
-#define TCW_SYNC_8(a,b)     (a) = (b) 
-#define TCX_SYNC_8(a,b,c)   KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), (kmp_int64)(b), (kmp_int64)(c)) 
- 
-#if KMP_ARCH_X86 
-// What about ARM? 
-    #define TCR_PTR(a)          ((void *)TCR_4(a)) 
-    #define TCW_PTR(a,b)        TCW_4((a),(b)) 
-    #define TCR_SYNC_PTR(a)     ((void *)TCR_SYNC_4(a)) 
-    #define TCW_SYNC_PTR(a,b)   TCW_SYNC_4((a),(b)) 
-    #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_4((a),(b),(c))) 
- 
-#else /* 64 bit pointers */ 
- 
-    #define TCR_PTR(a)          ((void *)TCR_8(a)) 
-    #define TCW_PTR(a,b)        TCW_8((a),(b)) 
-    #define TCR_SYNC_PTR(a)     ((void *)TCR_SYNC_8(a)) 
-    #define TCW_SYNC_PTR(a,b)   TCW_SYNC_8((a),(b)) 
-    #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_8((a),(b),(c))) 
- 
-#endif /* KMP_ARCH_X86 */ 
- 
-/* 
- * If these FTN_{TRUE,FALSE} values change, may need to 
- * change several places where they are used to check that 
- * language is Fortran, not C. 
- */ 
- 
-#ifndef FTN_TRUE 
-# define FTN_TRUE       TRUE 
-#endif 
- 
-#ifndef FTN_FALSE 
-# define FTN_FALSE      FALSE 
-#endif 
- 
-typedef void    (*microtask_t)( int *gtid, int *npr, ... ); 
- 
-#ifdef USE_VOLATILE_CAST 
-# define VOLATILE_CAST(x)        (volatile x) 
-#else 
-# define VOLATILE_CAST(x)        (x) 
-#endif 
- 
-#ifdef KMP_I8 
-# define KMP_WAIT_YIELD           __kmp_wait_yield_8 
-# define KMP_EQ                   __kmp_eq_8 
-# define KMP_NEQ                  __kmp_neq_8 
-# define KMP_LT                   __kmp_lt_8 
-# define KMP_GE                   __kmp_ge_8 
-# define KMP_LE                   __kmp_le_8 
-#else 
-# define KMP_WAIT_YIELD           __kmp_wait_yield_4 
-# define KMP_EQ                   __kmp_eq_4 
-# define KMP_NEQ                  __kmp_neq_4 
-# define KMP_LT                   __kmp_lt_4 
-# define KMP_GE                   __kmp_ge_4 
-# define KMP_LE                   __kmp_le_4 
-#endif /* KMP_I8 */ 
- 
-/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */ 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX 
-# define STATIC_EFI2_WORKAROUND 
-#else 
-# define STATIC_EFI2_WORKAROUND static 
-#endif 
- 
-// Support of BGET usage 
-#ifndef KMP_USE_BGET 
-#define KMP_USE_BGET 1 
-#endif 
- 
- 
-// Switches for OSS builds 
-#ifndef USE_SYSFS_INFO 
-# define USE_SYSFS_INFO  0 
-#endif 
-#ifndef USE_CMPXCHG_FIX 
-# define USE_CMPXCHG_FIX 1 
-#endif 
- 
-// Enable dynamic user lock 
-#if OMP_41_ENABLED 
-# define KMP_USE_DYNAMIC_LOCK 1 
-#endif 
- 
-// Enable TSX if dynamic user lock is turned on 
-#if KMP_USE_DYNAMIC_LOCK 
-// Visual studio can't handle the asm sections in this code 
-# define KMP_USE_TSX             (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC 
-# ifdef KMP_USE_ADAPTIVE_LOCKS 
-#  undef KMP_USE_ADAPTIVE_LOCKS 
-# endif 
-# define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX 
-#endif 
- 
-// Enable tick time conversion of ticks to seconds 
-#if KMP_STATS_ENABLED 
-# define KMP_HAVE_TICK_TIME (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) 
-#endif 
- 
-// Warning levels 
-enum kmp_warnings_level { 
-    kmp_warnings_off = 0,		/* No warnings */ 
-    kmp_warnings_low,			/* Minimal warnings (default) */ 
-    kmp_warnings_explicit = 6,		/* Explicitly set to ON - more warnings */ 
-    kmp_warnings_verbose		/* reserved */ 
-}; 
- 
-#ifdef __cplusplus 
-} // extern "C" 
-#endif // __cplusplus 
- 
-#endif /* KMP_OS_H */ 
-// Safe C API 
-#include "kmp_safe_c_api.h" 
- 
+/*
+ * kmp_os.h -- KPTS runtime header file.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_OS_H
+#define KMP_OS_H
+
+#include "kmp_config.h"
+#include <stdlib.h>
+
+#define KMP_FTN_PLAIN   1
+#define KMP_FTN_APPEND  2
+#define KMP_FTN_UPPER   3
+/*
+#define KMP_FTN_PREPEND 4
+#define KMP_FTN_UAPPEND 5
+*/
+
+#define KMP_PTR_SKIP    (sizeof(void*))
+
+/* -------------------------- Compiler variations ------------------------ */
+
+#define KMP_OFF				0
+#define KMP_ON				1
+
+#define KMP_MEM_CONS_VOLATILE		0
+#define KMP_MEM_CONS_FENCE		1
+
+#ifndef KMP_MEM_CONS_MODEL
+# define KMP_MEM_CONS_MODEL	 KMP_MEM_CONS_VOLATILE
+#endif
+
+/* ------------------------- Compiler recognition ---------------------- */
+#define KMP_COMPILER_ICC 0
+#define KMP_COMPILER_GCC 0
+#define KMP_COMPILER_CLANG 0
+#define KMP_COMPILER_MSVC 0
+
+#if defined( __INTEL_COMPILER )
+# undef KMP_COMPILER_ICC
+# define KMP_COMPILER_ICC 1
+#elif defined( __clang__ )
+# undef KMP_COMPILER_CLANG
+# define KMP_COMPILER_CLANG 1
+#elif defined( __GNUC__ )
+# undef KMP_COMPILER_GCC
+# define KMP_COMPILER_GCC 1
+#elif defined( _MSC_VER )
+# undef KMP_COMPILER_MSVC
+# define KMP_COMPILER_MSVC 1
+#else
+# error Unknown compiler
+#endif
+
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64
+# define KMP_AFFINITY_SUPPORTED 1
+# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
+#  define KMP_GROUP_AFFINITY    1
+# else
+#  define KMP_GROUP_AFFINITY    0
+# endif
+#else
+# define KMP_AFFINITY_SUPPORTED 0
+# define KMP_GROUP_AFFINITY     0
+#endif
+
+/* Check for quad-precision extension. */
+#define KMP_HAVE_QUAD 0
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# if KMP_COMPILER_ICC
+   /* _Quad is already defined for icc */
+#  undef  KMP_HAVE_QUAD
+#  define KMP_HAVE_QUAD 1
+# elif KMP_COMPILER_CLANG
+   /* Clang doesn't support a software-implemented
+      128-bit extended precision type yet */
+   typedef long double _Quad;
+# elif KMP_COMPILER_GCC
+   typedef __float128 _Quad;
+#  undef  KMP_HAVE_QUAD
+#  define KMP_HAVE_QUAD 1
+# elif KMP_COMPILER_MSVC
+   typedef long double _Quad;
+# endif
+#else
+# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC
+   typedef long double _Quad;
+#  undef  KMP_HAVE_QUAD
+#  define KMP_HAVE_QUAD 1
+# endif
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+#if KMP_OS_WINDOWS
+  typedef char              kmp_int8;
+  typedef unsigned char     kmp_uint8;
+  typedef short             kmp_int16;
+  typedef unsigned short    kmp_uint16;
+  typedef int               kmp_int32;
+  typedef unsigned int      kmp_uint32;
+# define KMP_INT32_SPEC     "d"
+# define KMP_UINT32_SPEC    "u"
+# ifndef KMP_STRUCT64
+   typedef __int64 		kmp_int64;
+   typedef unsigned __int64 	kmp_uint64;
+   #define KMP_INT64_SPEC 	"I64d"
+   #define KMP_UINT64_SPEC	"I64u"
+# else
+   struct kmp_struct64 {
+    kmp_int32   a,b;
+   };
+   typedef struct kmp_struct64 kmp_int64;
+   typedef struct kmp_struct64 kmp_uint64;
+   /* Not sure what to use for KMP_[U]INT64_SPEC here */
+# endif
+# if KMP_ARCH_X86_64
+#  define KMP_INTPTR 1
+   typedef __int64         	kmp_intptr_t;
+   typedef unsigned __int64	kmp_uintptr_t;
+#  define KMP_INTPTR_SPEC  	"I64d"
+#  define KMP_UINTPTR_SPEC 	"I64u"
+# endif
+#endif /* KMP_OS_WINDOWS */
+
+#if KMP_OS_UNIX
+  typedef char               kmp_int8;
+  typedef unsigned char      kmp_uint8;
+  typedef short              kmp_int16;
+  typedef unsigned short     kmp_uint16;
+  typedef int                kmp_int32;
+  typedef unsigned int       kmp_uint32;
+  typedef long long          kmp_int64;
+  typedef unsigned long long kmp_uint64;
+# define KMP_INT32_SPEC      "d"
+# define KMP_UINT32_SPEC     "u"
+# define KMP_INT64_SPEC	     "lld"
+# define KMP_UINT64_SPEC     "llu"
+#endif /* KMP_OS_UNIX */
+
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
+# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
+#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
+# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
+#else
+# error "Can't determine size_t printf format specifier."
+#endif
+
+#if KMP_ARCH_X86
+# define KMP_SIZE_T_MAX (0xFFFFFFFF)
+#else
+# define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF)
+#endif
+
+typedef size_t  kmp_size_t;
+typedef float   kmp_real32;
+typedef double  kmp_real64;
+
+#ifndef KMP_INTPTR
+# define KMP_INTPTR 1
+  typedef long             kmp_intptr_t;
+  typedef unsigned long    kmp_uintptr_t;
+# define KMP_INTPTR_SPEC   "ld"
+# define KMP_UINTPTR_SPEC  "lu"
+#endif
+
+#ifdef KMP_I8
+  typedef kmp_int64      kmp_int;
+  typedef kmp_uint64     kmp_uint;
+# define  KMP_INT_SPEC	 KMP_INT64_SPEC
+# define  KMP_UINT_SPEC	 KMP_UINT64_SPEC
+# define  KMP_INT_MAX    ((kmp_int64)0x7FFFFFFFFFFFFFFFLL)
+# define  KMP_INT_MIN    ((kmp_int64)0x8000000000000000LL)
+#else
+  typedef kmp_int32      kmp_int;
+  typedef kmp_uint32     kmp_uint;
+# define  KMP_INT_SPEC	 KMP_INT32_SPEC
+# define  KMP_UINT_SPEC	 KMP_UINT32_SPEC
+# define  KMP_INT_MAX    ((kmp_int32)0x7FFFFFFF)
+# define  KMP_INT_MIN    ((kmp_int32)0x80000000)
+#endif /* KMP_I8 */
+
+#ifdef __cplusplus
+    //-------------------------------------------------------------------------
+    // template for debug prints specification ( d, u, lld, llu ), and to obtain
+    // signed/unsigned flavors of a type
+    template< typename T >
+    struct traits_t {
+        typedef T           signed_t;
+        typedef T           unsigned_t;
+        typedef T           floating_t;
+        static char const * spec;
+    };
+    // int
+    template<>
+    struct traits_t< signed int > {
+        typedef signed int    signed_t;
+        typedef unsigned int  unsigned_t;
+        typedef double        floating_t;
+        static char const *   spec;
+    };
+    // unsigned int
+    template<>
+    struct traits_t< unsigned int > {
+        typedef signed int    signed_t;
+        typedef unsigned int  unsigned_t;
+        typedef double        floating_t;
+        static char const *   spec;
+    };
+    // long long
+    template<>
+    struct traits_t< signed long long > {
+        typedef signed long long    signed_t;
+        typedef unsigned long long  unsigned_t;
+        typedef long double         floating_t;
+        static char const *         spec;
+    };
+    // unsigned long long
+    template<>
+    struct traits_t< unsigned long long > {
+        typedef signed long long    signed_t;
+        typedef unsigned long long  unsigned_t;
+        typedef long double         floating_t;
+        static char const *         spec;
+    };
+    //-------------------------------------------------------------------------
+#endif // __cplusplus
+
+#define KMP_EXPORT	extern	/* export declaration in guide libraries */
+
+#if __GNUC__ >= 4
+    #define __forceinline __inline
+#endif
+
+#define PAGE_SIZE                       (0x4000)
+#define PAGE_ALIGNED(_addr)     ( ! ((size_t) _addr & \
+                                     (size_t)(PAGE_SIZE - 1)))
+#define ALIGN_TO_PAGE(x)   (void *)(((size_t)(x)) & ~((size_t)(PAGE_SIZE - 1)))
+
+/* ---------------------- Support for cache alignment, padding, etc. -----------------*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */
+
+/* Define the default size of the cache line */
+#ifndef CACHE_LINE
+    #define CACHE_LINE                  128         /* cache line size in bytes */
+#else
+    #if ( CACHE_LINE < 64 ) && ! defined( KMP_OS_DARWIN )
+        // 2006-02-13: This produces too many warnings on OS X*. Disable it for a while...
+        #warning CACHE_LINE is too small.
+    #endif
+#endif /* CACHE_LINE */
+
+#define KMP_CACHE_PREFETCH(ADDR) 	/* nothing */
+
+/* Temporary note: if performance testing of this passes, we can remove
+   all references to KMP_DO_ALIGN and replace with KMP_ALIGN.  */
+#if KMP_OS_UNIX && defined(__GNUC__)
+# define KMP_DO_ALIGN(bytes)  __attribute__((aligned(bytes)))
+# define KMP_ALIGN_CACHE      __attribute__((aligned(CACHE_LINE)))
+# define KMP_ALIGN_CACHE_INTERNODE __attribute__((aligned(INTERNODE_CACHE_LINE)))
+# define KMP_ALIGN(bytes)     __attribute__((aligned(bytes)))
+#else
+# define KMP_DO_ALIGN(bytes)  __declspec( align(bytes) )
+# define KMP_ALIGN_CACHE      __declspec( align(CACHE_LINE) )
+# define KMP_ALIGN_CACHE_INTERNODE      __declspec( align(INTERNODE_CACHE_LINE) )
+# define KMP_ALIGN(bytes)     __declspec( align(bytes) )
+#endif
+
+/* General purpose fence types for memory operations */
+enum kmp_mem_fence_type {
+    kmp_no_fence,         /* No memory fence */
+    kmp_acquire_fence,    /* Acquire (read) memory fence */
+    kmp_release_fence,    /* Release (write) memory fence */
+    kmp_full_fence        /* Full (read+write) memory fence */
+};
+
+
+//
+// Synchronization primitives
+//
+
+#if KMP_ASM_INTRINS && KMP_OS_WINDOWS
+
+#include <Windows.h>
+
+#pragma intrinsic(InterlockedExchangeAdd)
+#pragma intrinsic(InterlockedCompareExchange)
+#pragma intrinsic(InterlockedExchange)
+#pragma intrinsic(InterlockedExchange64)
+
+//
+// Using InterlockedIncrement / InterlockedDecrement causes a library loading
+// ordering problem, so we use InterlockedExchangeAdd instead.
+//
+# define KMP_TEST_THEN_INC32(p)                 InterlockedExchangeAdd( (volatile long *)(p), 1 )
+# define KMP_TEST_THEN_INC_ACQ32(p)             InterlockedExchangeAdd( (volatile long *)(p), 1 )
+# define KMP_TEST_THEN_ADD4_32(p)               InterlockedExchangeAdd( (volatile long *)(p), 4 )
+# define KMP_TEST_THEN_ADD4_ACQ32(p)            InterlockedExchangeAdd( (volatile long *)(p), 4 )
+# define KMP_TEST_THEN_DEC32(p)                 InterlockedExchangeAdd( (volatile long *)(p), -1 )
+# define KMP_TEST_THEN_DEC_ACQ32(p)             InterlockedExchangeAdd( (volatile long *)(p), -1 )
+# define KMP_TEST_THEN_ADD32(p, v)              InterlockedExchangeAdd( (volatile long *)(p), (v) )
+
+extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v );
+extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v );
+extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v );
+# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) )
+
+# define KMP_XCHG_FIXED32(p, v)                 InterlockedExchange( (volatile long *)(p), (long)(v) )
+# define KMP_XCHG_FIXED64(p, v)                 InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) )
+
+inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v)
+{
+    kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v);
+    return *(kmp_real32*)&tmp;
+}
+
+//
+// Routines that we still need to implement in assembly.
+//
+extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v );
+
+extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+extern kmp_int8  __kmp_compare_and_store_ret8(  volatile kmp_int8  *p, kmp_int8  cv, kmp_int8  sv );
+extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+
+extern kmp_int8  __kmp_xchg_fixed8( volatile kmp_int8  *p, kmp_int8  v );
+extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v );
+extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v );
+extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v );
+# define KMP_TEST_THEN_ADD8(p, v)              __kmp_test_then_add8( (p), (v) )
+
+//# define KMP_TEST_THEN_INC32(p)                 __kmp_test_then_add32( (p), 1 )
+# define KMP_TEST_THEN_OR8(p, v)               __kmp_test_then_or8( (p), (v) )
+# define KMP_TEST_THEN_AND8(p, v)              __kmp_test_then_and8( (p), (v) )
+//# define KMP_TEST_THEN_INC_ACQ32(p)             __kmp_test_then_add32( (p), 1 )
+# define KMP_TEST_THEN_INC64(p)                 __kmp_test_then_add64( (p), 1LL )
+# define KMP_TEST_THEN_INC_ACQ64(p)             __kmp_test_then_add64( (p), 1LL )
+//# define KMP_TEST_THEN_ADD4_32(p)               __kmp_test_then_add32( (p), 4 )
+//# define KMP_TEST_THEN_ADD4_ACQ32(p)            __kmp_test_then_add32( (p), 4 )
+# define KMP_TEST_THEN_ADD4_64(p)               __kmp_test_then_add64( (p), 4LL )
+# define KMP_TEST_THEN_ADD4_ACQ64(p)            __kmp_test_then_add64( (p), 4LL )
+//# define KMP_TEST_THEN_DEC32(p)                 __kmp_test_then_add32( (p), -1 )
+//# define KMP_TEST_THEN_DEC_ACQ32(p)             __kmp_test_then_add32( (p), -1 )
+# define KMP_TEST_THEN_DEC64(p)                 __kmp_test_then_add64( (p), -1LL )
+# define KMP_TEST_THEN_DEC_ACQ64(p)             __kmp_test_then_add64( (p), -1LL )
+//# define KMP_TEST_THEN_ADD32(p, v)              __kmp_test_then_add32( (p), (v) )
+# define KMP_TEST_THEN_ADD64(p, v)              __kmp_test_then_add64( (p), (v) )
+
+# define KMP_TEST_THEN_OR32(p, v)               __kmp_test_then_or32( (p), (v) )
+# define KMP_TEST_THEN_AND32(p, v)              __kmp_test_then_and32( (p), (v) )
+# define KMP_TEST_THEN_OR64(p, v)               __kmp_test_then_or64( (p), (v) )
+# define KMP_TEST_THEN_AND64(p, v)              __kmp_test_then_and64( (p), (v) )
+
+# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+
+# if KMP_ARCH_X86
+#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) )
+# else /* 64 bit pointers */
+#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) )
+# endif /* KMP_ARCH_X86 */
+
+# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __kmp_compare_and_store_ret8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) )
+//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) )
+
+# define KMP_XCHG_FIXED8(p, v)                  __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) );
+# define KMP_XCHG_FIXED16(p, v)                 __kmp_xchg_fixed16( (p), (v) );
+//# define KMP_XCHG_FIXED32(p, v)                 __kmp_xchg_fixed32( (p), (v) );
+//# define KMP_XCHG_FIXED64(p, v)                 __kmp_xchg_fixed64( (p), (v) );
+//# define KMP_XCHG_REAL32(p, v)                  __kmp_xchg_real32( (p), (v) );
+# define KMP_XCHG_REAL64(p, v)                  __kmp_xchg_real64( (p), (v) );
+
+
+#elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+# define KMP_TEST_THEN_ADD8(p, v)               __sync_fetch_and_add( (kmp_int8 *)(p), (v) )
+
+/* cast p to correct type so that proper intrinsic will be used */
+# define KMP_TEST_THEN_INC32(p)                 __sync_fetch_and_add( (kmp_int32 *)(p), 1 )
+# define KMP_TEST_THEN_OR8(p, v)                __sync_fetch_and_or( (kmp_int8 *)(p), (v) )
+# define KMP_TEST_THEN_AND8(p, v)               __sync_fetch_and_and( (kmp_int8 *)(p), (v) )
+# define KMP_TEST_THEN_INC_ACQ32(p)             __sync_fetch_and_add( (kmp_int32 *)(p), 1 )
+# define KMP_TEST_THEN_INC64(p)                 __sync_fetch_and_add( (kmp_int64 *)(p), 1LL )
+# define KMP_TEST_THEN_INC_ACQ64(p)             __sync_fetch_and_add( (kmp_int64 *)(p), 1LL )
+# define KMP_TEST_THEN_ADD4_32(p)               __sync_fetch_and_add( (kmp_int32 *)(p), 4 )
+# define KMP_TEST_THEN_ADD4_ACQ32(p)            __sync_fetch_and_add( (kmp_int32 *)(p), 4 )
+# define KMP_TEST_THEN_ADD4_64(p)               __sync_fetch_and_add( (kmp_int64 *)(p), 4LL )
+# define KMP_TEST_THEN_ADD4_ACQ64(p)            __sync_fetch_and_add( (kmp_int64 *)(p), 4LL )
+# define KMP_TEST_THEN_DEC32(p)                 __sync_fetch_and_sub( (kmp_int32 *)(p), 1 )
+# define KMP_TEST_THEN_DEC_ACQ32(p)             __sync_fetch_and_sub( (kmp_int32 *)(p), 1 )
+# define KMP_TEST_THEN_DEC64(p)                 __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL )
+# define KMP_TEST_THEN_DEC_ACQ64(p)             __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL )
+# define KMP_TEST_THEN_ADD32(p, v)              __sync_fetch_and_add( (kmp_int32 *)(p), (v) )
+# define KMP_TEST_THEN_ADD64(p, v)              __sync_fetch_and_add( (kmp_int64 *)(p), (v) )
+
+# define KMP_TEST_THEN_OR32(p, v)               __sync_fetch_and_or( (kmp_int32 *)(p), (v) )
+# define KMP_TEST_THEN_AND32(p, v)              __sync_fetch_and_and( (kmp_int32 *)(p), (v) )
+# define KMP_TEST_THEN_OR64(p, v)               __sync_fetch_and_or( (kmp_int64 *)(p), (v) )
+# define KMP_TEST_THEN_AND64(p, v)              __sync_fetch_and_and( (kmp_int64 *)(p), (v) )
+
+# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) )
+# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) )
+# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) )
+# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) )
+# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) )
+# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) )
+# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) )
+# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) )
+# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)   __sync_bool_compare_and_swap( (volatile void **)(p),(void *)(cv),(void *)(sv) )
+
+# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __sync_val_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) )
+# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) )
+# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) )
+# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) )
+
+#define KMP_XCHG_FIXED8(p, v)                   __sync_lock_test_and_set( (volatile kmp_uint8 *)(p), (kmp_uint8)(v) )
+#define KMP_XCHG_FIXED16(p, v)                  __sync_lock_test_and_set( (volatile kmp_uint16 *)(p), (kmp_uint16)(v) )
+#define KMP_XCHG_FIXED32(p, v)                  __sync_lock_test_and_set( (volatile kmp_uint32 *)(p), (kmp_uint32)(v) )
+#define KMP_XCHG_FIXED64(p, v)                  __sync_lock_test_and_set( (volatile kmp_uint64 *)(p), (kmp_uint64)(v) )
+
+extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v );
+extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v );
+extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v );
+inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v)
+{
+    kmp_int32 tmp = __sync_lock_test_and_set( (kmp_int32*)p, *(kmp_int32*)&v);
+    return *(kmp_real32*)&tmp;
+}
+
+inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v)
+{
+    kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v);
+    return *(kmp_real64*)&tmp;
+}
+
+#else
+
+extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v );
+
+extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+extern kmp_int8  __kmp_compare_and_store_ret8(  volatile kmp_int8  *p, kmp_int8  cv, kmp_int8  sv );
+extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+
+extern kmp_int8  __kmp_xchg_fixed8( volatile kmp_int8  *p, kmp_int8  v );
+extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v );
+extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v );
+# define KMP_TEST_THEN_ADD8(p, v)               __kmp_test_then_add8( (p), (v) )
+extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v );
+
+# define KMP_TEST_THEN_INC32(p)                 __kmp_test_then_add32( (p), 1 )
+# define KMP_TEST_THEN_OR8(p, v)                __kmp_test_then_or8( (p), (v) )
+# define KMP_TEST_THEN_AND8(p, v)               __kmp_test_then_and8( (p), (v) )
+# define KMP_TEST_THEN_INC_ACQ32(p)             __kmp_test_then_add32( (p), 1 )
+# define KMP_TEST_THEN_INC64(p)                 __kmp_test_then_add64( (p), 1LL )
+# define KMP_TEST_THEN_INC_ACQ64(p)             __kmp_test_then_add64( (p), 1LL )
+# define KMP_TEST_THEN_ADD4_32(p)               __kmp_test_then_add32( (p), 4 )
+# define KMP_TEST_THEN_ADD4_ACQ32(p)            __kmp_test_then_add32( (p), 4 )
+# define KMP_TEST_THEN_ADD4_64(p)               __kmp_test_then_add64( (p), 4LL )
+# define KMP_TEST_THEN_ADD4_ACQ64(p)            __kmp_test_then_add64( (p), 4LL )
+# define KMP_TEST_THEN_DEC32(p)                 __kmp_test_then_add32( (p), -1 )
+# define KMP_TEST_THEN_DEC_ACQ32(p)             __kmp_test_then_add32( (p), -1 )
+# define KMP_TEST_THEN_DEC64(p)                 __kmp_test_then_add64( (p), -1LL )
+# define KMP_TEST_THEN_DEC_ACQ64(p)             __kmp_test_then_add64( (p), -1LL )
+# define KMP_TEST_THEN_ADD32(p, v)              __kmp_test_then_add32( (p), (v) )
+# define KMP_TEST_THEN_ADD64(p, v)              __kmp_test_then_add64( (p), (v) )
+
+# define KMP_TEST_THEN_OR32(p, v)               __kmp_test_then_or32( (p), (v) )
+# define KMP_TEST_THEN_AND32(p, v)              __kmp_test_then_and32( (p), (v) )
+# define KMP_TEST_THEN_OR64(p, v)               __kmp_test_then_or64( (p), (v) )
+# define KMP_TEST_THEN_AND64(p, v)              __kmp_test_then_and64( (p), (v) )
+
+# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+
+# if KMP_ARCH_X86
+#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) )
+# else /* 64 bit pointers */
+#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) )
+# endif /* KMP_ARCH_X86 */
+
+# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __kmp_compare_and_store_ret8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) )
+
+# define KMP_XCHG_FIXED8(p, v)                  __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) );
+# define KMP_XCHG_FIXED16(p, v)                 __kmp_xchg_fixed16( (p), (v) );
+# define KMP_XCHG_FIXED32(p, v)                 __kmp_xchg_fixed32( (p), (v) );
+# define KMP_XCHG_FIXED64(p, v)                 __kmp_xchg_fixed64( (p), (v) );
+# define KMP_XCHG_REAL32(p, v)                  __kmp_xchg_real32( (p), (v) );
+# define KMP_XCHG_REAL64(p, v)                  __kmp_xchg_real64( (p), (v) );
+
+#endif /* KMP_ASM_INTRINS */
+
+
+/* ------------- relaxed consistency memory model stuff ------------------ */
+
+#if KMP_OS_WINDOWS
+# ifdef __ABSOFT_WIN
+#   define KMP_MB()     asm ("nop")
+#   define KMP_IMB()    asm ("nop")
+# else
+#   define KMP_MB()     /* _asm{ nop } */
+#   define KMP_IMB()    /* _asm{ nop } */
+# endif
+#endif /* KMP_OS_WINDOWS */
+
+#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64
+# define KMP_MB()       __sync_synchronize()
+#endif
+
+#ifndef KMP_MB
+# define KMP_MB()       /* nothing to do */
+#endif
+
+#ifndef KMP_IMB
+# define KMP_IMB()      /* nothing to do */
+#endif
+
+#ifndef KMP_ST_REL32
+# define KMP_ST_REL32(A,D)      ( *(A) = (D) )
+#endif
+
+#ifndef KMP_ST_REL64
+# define KMP_ST_REL64(A,D)      ( *(A) = (D) )
+#endif
+
+#ifndef KMP_LD_ACQ32
+# define KMP_LD_ACQ32(A)        ( *(A) )
+#endif
+
+#ifndef KMP_LD_ACQ64
+# define KMP_LD_ACQ64(A)        ( *(A) )
+#endif
+
+#define TCR_1(a)            (a)
+#define TCW_1(a,b)          (a) = (b)
+/* ------------------------------------------------------------------------ */
+//
+// FIXME - maybe this should this be
+//
+// #define TCR_4(a)    (*(volatile kmp_int32 *)(&a))
+// #define TCW_4(a,b)  (a) = (*(volatile kmp_int32 *)&(b))
+//
+// #define TCR_8(a)    (*(volatile kmp_int64 *)(a))
+// #define TCW_8(a,b)  (a) = (*(volatile kmp_int64 *)(&b))
+//
+// I'm fairly certain this is the correct thing to do, but I'm afraid
+// of performance regressions.
+//
+
+#define TCR_4(a)            (a)
+#define TCW_4(a,b)          (a) = (b)
+#define TCR_8(a)            (a)
+#define TCW_8(a,b)          (a) = (b)
+#define TCR_SYNC_4(a)       (a)
+#define TCW_SYNC_4(a,b)     (a) = (b)
+#define TCX_SYNC_4(a,b,c)   KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), (kmp_int32)(b), (kmp_int32)(c))
+#define TCR_SYNC_8(a)       (a)
+#define TCW_SYNC_8(a,b)     (a) = (b)
+#define TCX_SYNC_8(a,b,c)   KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), (kmp_int64)(b), (kmp_int64)(c))
+
+#if KMP_ARCH_X86
+// What about ARM?
+    #define TCR_PTR(a)          ((void *)TCR_4(a))
+    #define TCW_PTR(a,b)        TCW_4((a),(b))
+    #define TCR_SYNC_PTR(a)     ((void *)TCR_SYNC_4(a))
+    #define TCW_SYNC_PTR(a,b)   TCW_SYNC_4((a),(b))
+    #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_4((a),(b),(c)))
+
+#else /* 64 bit pointers */
+
+    #define TCR_PTR(a)          ((void *)TCR_8(a))
+    #define TCW_PTR(a,b)        TCW_8((a),(b))
+    #define TCR_SYNC_PTR(a)     ((void *)TCR_SYNC_8(a))
+    #define TCW_SYNC_PTR(a,b)   TCW_SYNC_8((a),(b))
+    #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_8((a),(b),(c)))
+
+#endif /* KMP_ARCH_X86 */
+
+/*
+ * If these FTN_{TRUE,FALSE} values change, may need to
+ * change several places where they are used to check that
+ * language is Fortran, not C.
+ */
+
+#ifndef FTN_TRUE
+# define FTN_TRUE       TRUE
+#endif
+
+#ifndef FTN_FALSE
+# define FTN_FALSE      FALSE
+#endif
+
+typedef void    (*microtask_t)( int *gtid, int *npr, ... );
+
+#ifdef USE_VOLATILE_CAST
+# define VOLATILE_CAST(x)        (volatile x)
+#else
+# define VOLATILE_CAST(x)        (x)
+#endif
+
+#ifdef KMP_I8
+# define KMP_WAIT_YIELD           __kmp_wait_yield_8
+# define KMP_EQ                   __kmp_eq_8
+# define KMP_NEQ                  __kmp_neq_8
+# define KMP_LT                   __kmp_lt_8
+# define KMP_GE                   __kmp_ge_8
+# define KMP_LE                   __kmp_le_8
+#else
+# define KMP_WAIT_YIELD           __kmp_wait_yield_4
+# define KMP_EQ                   __kmp_eq_4
+# define KMP_NEQ                  __kmp_neq_4
+# define KMP_LT                   __kmp_lt_4
+# define KMP_GE                   __kmp_ge_4
+# define KMP_LE                   __kmp_le_4
+#endif /* KMP_I8 */
+
+/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */
+#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX
+# define STATIC_EFI2_WORKAROUND
+#else
+# define STATIC_EFI2_WORKAROUND static
+#endif
+
+// Support of BGET usage
+#ifndef KMP_USE_BGET
+#define KMP_USE_BGET 1
+#endif
+
+
+// Switches for OSS builds
+#ifndef USE_SYSFS_INFO
+# define USE_SYSFS_INFO  0
+#endif
+#ifndef USE_CMPXCHG_FIX
+# define USE_CMPXCHG_FIX 1
+#endif
+
+// Enable dynamic user lock
+#if OMP_41_ENABLED
+# define KMP_USE_DYNAMIC_LOCK 1
+#endif
+
+// Enable TSX if dynamic user lock is turned on
+#if KMP_USE_DYNAMIC_LOCK
+// Visual studio can't handle the asm sections in this code
+# define KMP_USE_TSX             (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC
+# ifdef KMP_USE_ADAPTIVE_LOCKS
+#  undef KMP_USE_ADAPTIVE_LOCKS
+# endif
+# define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX
+#endif
+
+// Enable tick time conversion of ticks to seconds
+#if KMP_STATS_ENABLED
+# define KMP_HAVE_TICK_TIME (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64))
+#endif
+
+// Warning levels
+enum kmp_warnings_level {
+    kmp_warnings_off = 0,		/* No warnings */
+    kmp_warnings_low,			/* Minimal warnings (default) */
+    kmp_warnings_explicit = 6,		/* Explicitly set to ON - more warnings */
+    kmp_warnings_verbose		/* reserved */
+};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif /* KMP_OS_H */
+// Safe C API
+#include "kmp_safe_c_api.h"
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_platform.h b/contrib/libs/cxxsupp/openmp/kmp_platform.h
index a7c734cab9..0707f2b40a 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_platform.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_platform.h
@@ -1,168 +1,168 @@
-/* 
- * kmp_platform.h -- header for determining operating system and architecture 
- */ 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef KMP_PLATFORM_H 
-#define KMP_PLATFORM_H 
- 
-/* ---------------------- Operating system recognition ------------------- */ 
- 
-#define KMP_OS_LINUX    0 
-#define KMP_OS_FREEBSD  0 
-#define KMP_OS_NETBSD   0 
-#define KMP_OS_DARWIN   0 
-#define KMP_OS_WINDOWS  0 
-#define KMP_OS_CNK      0 
-#define KMP_OS_UNIX     0  /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ 
- 
- 
-#ifdef _WIN32 
-# undef KMP_OS_WINDOWS 
-# define KMP_OS_WINDOWS 1 
-#endif 
- 
-#if ( defined __APPLE__ && defined __MACH__ ) 
-# undef KMP_OS_DARWIN 
-# define KMP_OS_DARWIN 1 
-#endif 
- 
-// in some ppc64 linux installations, only the second condition is met 
-#if ( defined __linux ) 
-# undef KMP_OS_LINUX 
-# define KMP_OS_LINUX 1 
-#elif ( defined __linux__) 
-# undef KMP_OS_LINUX 
-# define KMP_OS_LINUX 1 
-#else 
-#endif 
- 
-#if ( defined __FreeBSD__ ) 
-# undef KMP_OS_FREEBSD 
-# define KMP_OS_FREEBSD 1 
-#endif 
- 
-#if ( defined __NetBSD__ ) 
-# undef KMP_OS_NETBSD 
-# define KMP_OS_NETBSD 1 
-#endif 
- 
-#if ( defined __bgq__ ) 
-# undef KMP_OS_CNK 
-# define KMP_OS_CNK 1 
-#endif 
- 
-#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS) 
-# error Unknown OS 
-#endif 
- 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN 
-# undef KMP_OS_UNIX 
-# define KMP_OS_UNIX 1 
-#endif 
- 
-/* ---------------------- Architecture recognition ------------------- */ 
- 
-#define KMP_ARCH_X86        0 
-#define KMP_ARCH_X86_64     0 
-#define KMP_ARCH_AARCH64    0 
-#define KMP_ARCH_PPC64_BE   0 
-#define KMP_ARCH_PPC64_LE   0 
-#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) 
- 
-#if KMP_OS_WINDOWS 
-# if defined _M_AMD64 
-#  undef KMP_ARCH_X86_64 
-#  define KMP_ARCH_X86_64 1 
-# else 
-#  undef KMP_ARCH_X86 
-#  define KMP_ARCH_X86 1 
-# endif 
-#endif 
- 
-#if KMP_OS_UNIX 
-# if defined __x86_64 
-#  undef KMP_ARCH_X86_64 
-#  define KMP_ARCH_X86_64 1 
-# elif defined __i386 
-#  undef KMP_ARCH_X86 
-#  define KMP_ARCH_X86 1 
-# elif defined __powerpc64__ 
-#  if defined __LITTLE_ENDIAN__ 
-#   undef KMP_ARCH_PPC64_LE 
-#   define KMP_ARCH_PPC64_LE 1 
-#  else 
-#   undef KMP_ARCH_PPC64_BE 
-#   define KMP_ARCH_PPC64_BE 1 
-#  endif 
-# elif defined __aarch64__ 
-#  undef KMP_ARCH_AARCH64 
-#  define KMP_ARCH_AARCH64 1 
-# endif 
-#endif 
- 
-#if defined(__ARM_ARCH_7__)   || defined(__ARM_ARCH_7R__)  || \ 
-    defined(__ARM_ARCH_7A__) 
-# define KMP_ARCH_ARMV7 1 
-#endif 
- 
-#if defined(KMP_ARCH_ARMV7)   || defined(__ARM_ARCH_6__)   || \ 
-    defined(__ARM_ARCH_6J__)  || defined(__ARM_ARCH_6K__)  || \ 
-    defined(__ARM_ARCH_6Z__)  || defined(__ARM_ARCH_6T2__) || \ 
-    defined(__ARM_ARCH_6ZK__) 
-# define KMP_ARCH_ARMV6 1 
-#endif 
- 
-#if defined(KMP_ARCH_ARMV6)   || defined(__ARM_ARCH_5T__)  || \ 
-    defined(__ARM_ARCH_5E__)  || defined(__ARM_ARCH_5TE__) || \ 
-    defined(__ARM_ARCH_5TEJ__) 
-# define KMP_ARCH_ARMV5 1 
-#endif 
- 
-#if defined(KMP_ARCH_ARMV5)   || defined(__ARM_ARCH_4__)   || \ 
-    defined(__ARM_ARCH_4T__) 
-# define KMP_ARCH_ARMV4 1 
-#endif 
- 
-#if defined(KMP_ARCH_ARMV4)   || defined(__ARM_ARCH_3__)   || \ 
-    defined(__ARM_ARCH_3M__) 
-# define KMP_ARCH_ARMV3 1 
-#endif 
- 
-#if defined(KMP_ARCH_ARMV3)   || defined(__ARM_ARCH_2__) 
-# define KMP_ARCH_ARMV2 1 
-#endif 
- 
-#if defined(KMP_ARCH_ARMV2) 
-# define KMP_ARCH_ARM 1 
-#endif 
- 
-#if defined(__MIC__) || defined(__MIC2__) 
-# define KMP_MIC  1 
-# if __MIC2__ || __KNC__ 
-#  define KMP_MIC1 0 
-#  define KMP_MIC2 1 
-# else 
-#  define KMP_MIC1 1 
-#  define KMP_MIC2 0 
-# endif 
-#else 
-# define KMP_MIC  0 
-# define KMP_MIC1 0 
-# define KMP_MIC2 0 
-#endif 
- 
-// TODO: Fixme - This is clever, but really fugly 
-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64) 
-# error Unknown or unsupported architecture 
-#endif 
- 
-#endif // KMP_PLATFORM_H 
+/*
+ * kmp_platform.h -- header for determining operating system and architecture
+ */
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_PLATFORM_H
+#define KMP_PLATFORM_H
+
+/* ---------------------- Operating system recognition ------------------- */
+
+#define KMP_OS_LINUX    0
+#define KMP_OS_FREEBSD  0
+#define KMP_OS_NETBSD   0
+#define KMP_OS_DARWIN   0
+#define KMP_OS_WINDOWS  0
+#define KMP_OS_CNK      0
+#define KMP_OS_UNIX     0  /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */
+
+
+#ifdef _WIN32
+# undef KMP_OS_WINDOWS
+# define KMP_OS_WINDOWS 1
+#endif
+
+#if ( defined __APPLE__ && defined __MACH__ )
+# undef KMP_OS_DARWIN
+# define KMP_OS_DARWIN 1
+#endif
+
+// in some ppc64 linux installations, only the second condition is met
+#if ( defined __linux )
+# undef KMP_OS_LINUX
+# define KMP_OS_LINUX 1
+#elif ( defined __linux__)
+# undef KMP_OS_LINUX
+# define KMP_OS_LINUX 1
+#else
+#endif
+
+#if ( defined __FreeBSD__ )
+# undef KMP_OS_FREEBSD
+# define KMP_OS_FREEBSD 1
+#endif
+
+#if ( defined __NetBSD__ )
+# undef KMP_OS_NETBSD
+# define KMP_OS_NETBSD 1
+#endif
+
+#if ( defined __bgq__ )
+# undef KMP_OS_CNK
+# define KMP_OS_CNK 1
+#endif
+
+#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS)
+# error Unknown OS
+#endif
+
+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN
+# undef KMP_OS_UNIX
+# define KMP_OS_UNIX 1
+#endif
+
+/* ---------------------- Architecture recognition ------------------- */
+
+#define KMP_ARCH_X86        0
+#define KMP_ARCH_X86_64     0
+#define KMP_ARCH_AARCH64    0
+#define KMP_ARCH_PPC64_BE   0
+#define KMP_ARCH_PPC64_LE   0
+#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE)
+
+#if KMP_OS_WINDOWS
+# if defined _M_AMD64
+#  undef KMP_ARCH_X86_64
+#  define KMP_ARCH_X86_64 1
+# else
+#  undef KMP_ARCH_X86
+#  define KMP_ARCH_X86 1
+# endif
+#endif
+
+#if KMP_OS_UNIX
+# if defined __x86_64
+#  undef KMP_ARCH_X86_64
+#  define KMP_ARCH_X86_64 1
+# elif defined __i386
+#  undef KMP_ARCH_X86
+#  define KMP_ARCH_X86 1
+# elif defined __powerpc64__
+#  if defined __LITTLE_ENDIAN__
+#   undef KMP_ARCH_PPC64_LE
+#   define KMP_ARCH_PPC64_LE 1
+#  else
+#   undef KMP_ARCH_PPC64_BE
+#   define KMP_ARCH_PPC64_BE 1
+#  endif
+# elif defined __aarch64__
+#  undef KMP_ARCH_AARCH64
+#  define KMP_ARCH_AARCH64 1
+# endif
+#endif
+
+#if defined(__ARM_ARCH_7__)   || defined(__ARM_ARCH_7R__)  || \
+    defined(__ARM_ARCH_7A__)
+# define KMP_ARCH_ARMV7 1
+#endif
+
+#if defined(KMP_ARCH_ARMV7)   || defined(__ARM_ARCH_6__)   || \
+    defined(__ARM_ARCH_6J__)  || defined(__ARM_ARCH_6K__)  || \
+    defined(__ARM_ARCH_6Z__)  || defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6ZK__)
+# define KMP_ARCH_ARMV6 1
+#endif
+
+#if defined(KMP_ARCH_ARMV6)   || defined(__ARM_ARCH_5T__)  || \
+    defined(__ARM_ARCH_5E__)  || defined(__ARM_ARCH_5TE__) || \
+    defined(__ARM_ARCH_5TEJ__)
+# define KMP_ARCH_ARMV5 1
+#endif
+
+#if defined(KMP_ARCH_ARMV5)   || defined(__ARM_ARCH_4__)   || \
+    defined(__ARM_ARCH_4T__)
+# define KMP_ARCH_ARMV4 1
+#endif
+
+#if defined(KMP_ARCH_ARMV4)   || defined(__ARM_ARCH_3__)   || \
+    defined(__ARM_ARCH_3M__)
+# define KMP_ARCH_ARMV3 1
+#endif
+
+#if defined(KMP_ARCH_ARMV3)   || defined(__ARM_ARCH_2__)
+# define KMP_ARCH_ARMV2 1
+#endif
+
+#if defined(KMP_ARCH_ARMV2)
+# define KMP_ARCH_ARM 1
+#endif
+
+#if defined(__MIC__) || defined(__MIC2__)
+# define KMP_MIC  1
+# if __MIC2__ || __KNC__
+#  define KMP_MIC1 0
+#  define KMP_MIC2 1
+# else
+#  define KMP_MIC1 1
+#  define KMP_MIC2 0
+# endif
+#else
+# define KMP_MIC  0
+# define KMP_MIC1 0
+# define KMP_MIC2 0
+#endif
+
+// TODO: Fixme - This is clever, but really fugly
+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64)
+# error Unknown or unsupported architecture
+#endif
+
+#endif // KMP_PLATFORM_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_runtime.c b/contrib/libs/cxxsupp/openmp/kmp_runtime.c
index 015278a1ca..4749934808 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_runtime.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_runtime.c
@@ -1,7654 +1,7654 @@
-/* 
- * kmp_runtime.c -- KPTS runtime support library 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_atomic.h" 
-#include "kmp_wrapper_getpid.h" 
-#include "kmp_environment.h" 
-#include "kmp_itt.h" 
-#include "kmp_str.h" 
-#include "kmp_settings.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
-#include "kmp_error.h" 
-#include "kmp_stats.h" 
-#include "kmp_wait_release.h" 
- 
-#if OMPT_SUPPORT 
-#include "ompt-specific.h" 
-#endif 
- 
-/* these are temporary issues to be dealt with */ 
-#define KMP_USE_PRCTL 0 
-#define KMP_USE_POOLED_ALLOC 0 
- 
-#if KMP_OS_WINDOWS 
-#include <process.h> 
-#endif 
- 
-#if defined(KMP_GOMP_COMPAT) 
-char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; 
-#endif /* defined(KMP_GOMP_COMPAT) */ 
- 
-char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " 
-#if OMP_40_ENABLED 
-    "4.0 (201307)"; 
-#else 
-    "3.1 (201107)"; 
-#endif 
- 
-#ifdef KMP_DEBUG 
-char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; 
-#endif /* KMP_DEBUG */ 
- 
- 
-#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-kmp_info_t __kmp_monitor; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Forward declarations */ 
- 
-void __kmp_cleanup( void ); 
- 
-static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); 
-static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-static void __kmp_partition_places( kmp_team_t *team ); 
-#endif 
-static void __kmp_do_serial_initialize( void ); 
-void __kmp_fork_barrier( int gtid, int tid ); 
-void __kmp_join_barrier( int gtid ); 
-void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); 
- 
-#ifdef USE_LOAD_BALANCE 
-static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); 
-#endif 
- 
-static int __kmp_expand_threads(int nWish, int nNeed); 
-#if KMP_OS_WINDOWS 
-static int __kmp_unregister_root_other_thread( int gtid ); 
-#endif 
-static void __kmp_unregister_library( void ); // called by __kmp_internal_end() 
-static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); 
-static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Calculate the identifier of the current thread */ 
-/* fast (and somewhat portable) way to get unique */ 
-/* identifier of executing thread.                */ 
-/* returns KMP_GTID_DNE if we haven't been assigned a gtid   */ 
- 
-int 
-__kmp_get_global_thread_id( ) 
-{ 
-    int i; 
-    kmp_info_t   **other_threads; 
-    size_t         stack_data; 
-    char          *stack_addr; 
-    size_t         stack_size; 
-    char          *stack_base; 
- 
-    KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d  all_nproc=%d\n", 
-                      __kmp_nth, __kmp_all_nth )); 
- 
-    /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a 
-             parallel region, made it return KMP_GTID_DNE to force serial_initialize by 
-             caller.  Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 
-             __kmp_init_gtid for this to work.  */ 
- 
-    if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; 
- 
-#ifdef KMP_TDATA_GTID 
-    if ( TCR_4(__kmp_gtid_mode) >= 3) { 
-        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); 
-        return __kmp_gtid; 
-    } 
-#endif 
-    if ( TCR_4(__kmp_gtid_mode) >= 2) { 
-        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); 
-        return __kmp_gtid_get_specific(); 
-    } 
-    KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); 
- 
-    stack_addr    = (char*) & stack_data; 
-    other_threads = __kmp_threads; 
- 
-    /* 
-        ATT: The code below is a source of potential bugs due to unsynchronized access to 
-        __kmp_threads array. For example: 
-            1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. 
-            2. Current thread is suspended by OS. 
-            3. Another thread unregisters and finishes (debug versions of free() may fill memory 
-               with something like 0xEF). 
-            4. Current thread is resumed. 
-            5. Current thread reads junk from *thr. 
-        TODO: Fix it. 
-        --ln 
-    */ 
- 
-    for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { 
- 
-        kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 
-        if( !thr ) continue; 
- 
-        stack_size =  (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 
-        stack_base =  (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 
- 
-        /* stack grows down -- search through all of the active threads */ 
- 
-        if( stack_addr <= stack_base ) { 
-            size_t stack_diff = stack_base - stack_addr; 
- 
-            if( stack_diff <= stack_size ) { 
-                /* The only way we can be closer than the allocated */ 
-                /* stack size is if we are running on this thread. */ 
-                KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); 
-                return i; 
-            } 
-        } 
-    } 
- 
-    /* get specific to try and determine our gtid */ 
-    KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " 
-                      "thread, using TLS\n" )); 
-    i = __kmp_gtid_get_specific(); 
- 
-    /*fprintf( stderr, "=== %d\n", i );  */ /* GROO */ 
- 
-    /* if we havn't been assigned a gtid, then return code */ 
-    if( i<0 ) return i; 
- 
-    /* dynamically updated stack window for uber threads to avoid get_specific call */ 
-    if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { 
-        KMP_FATAL( StackOverflow, i ); 
-    } 
- 
-    stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; 
-    if( stack_addr > stack_base ) { 
-        TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 
-        TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 
-          other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); 
-    } else { 
-        TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); 
-    } 
- 
-    /* Reprint stack bounds for ubermaster since they have been refined */ 
-    if ( __kmp_storage_map ) { 
-        char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; 
-        char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 
-        __kmp_print_storage_map_gtid( i, stack_beg, stack_end, 
-                                      other_threads[i]->th.th_info.ds.ds_stacksize, 
-                                      "th_%d stack (refinement)", i ); 
-    } 
-    return i; 
-} 
- 
-int 
-__kmp_get_global_thread_id_reg( ) 
-{ 
-    int gtid; 
- 
-    if ( !__kmp_init_serial ) { 
-        gtid = KMP_GTID_DNE; 
-    } else 
-#ifdef KMP_TDATA_GTID 
-    if ( TCR_4(__kmp_gtid_mode) >= 3 ) { 
-        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); 
-        gtid = __kmp_gtid; 
-    } else 
-#endif 
-    if ( TCR_4(__kmp_gtid_mode) >= 2 ) { 
-        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); 
-        gtid = __kmp_gtid_get_specific(); 
-    } else { 
-        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); 
-        gtid = __kmp_get_global_thread_id(); 
-    } 
- 
-    /* we must be a new uber master sibling thread */ 
-    if( gtid == KMP_GTID_DNE ) { 
-        KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " 
-                        "Registering a new gtid.\n" )); 
-        __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
-        if( !__kmp_init_serial ) { 
-            __kmp_do_serial_initialize(); 
-            gtid = __kmp_gtid_get_specific(); 
-        } else { 
-            gtid = __kmp_register_root(FALSE); 
-        } 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 
-    } 
- 
-    KMP_DEBUG_ASSERT( gtid >=0 ); 
- 
-    return gtid; 
-} 
- 
-/* caller must hold forkjoin_lock */ 
-void 
-__kmp_check_stack_overlap( kmp_info_t *th ) 
-{ 
-    int f; 
-    char *stack_beg = NULL; 
-    char *stack_end = NULL; 
-    int gtid; 
- 
-    KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); 
-    if ( __kmp_storage_map ) { 
-        stack_end = (char *) th->th.th_info.ds.ds_stackbase; 
-        stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 
- 
-        gtid = __kmp_gtid_from_thread( th ); 
- 
-        if (gtid == KMP_GTID_MONITOR) { 
-            __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 
-                                     "th_%s stack (%s)", "mon", 
-                                     ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); 
-        } else { 
-            __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 
-                                     "th_%d stack (%s)", gtid, 
-                                     ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); 
-        } 
-    } 
- 
-    /* No point in checking ubermaster threads since they use refinement and cannot overlap */ 
-    gtid = __kmp_gtid_from_thread( th ); 
-    if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) 
-    { 
-        KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); 
-        if ( stack_beg == NULL ) { 
-            stack_end = (char *) th->th.th_info.ds.ds_stackbase; 
-            stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 
-        } 
- 
-        for( f=0 ; f < __kmp_threads_capacity ; f++ ) { 
-            kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 
- 
-            if( f_th && f_th != th ) { 
-                char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 
-                char *other_stack_beg = other_stack_end - 
-                                        (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 
-                if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 
-                   (stack_end > other_stack_beg && stack_end < other_stack_end)) { 
- 
-                    /* Print the other stack values before the abort */ 
-                    if ( __kmp_storage_map ) 
-                        __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, 
-                            (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 
-                            "th_%d stack (overlapped)", 
-                                                 __kmp_gtid_from_thread( f_th ) ); 
- 
-                    __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); 
-                } 
-            } 
-        } 
-    } 
-    KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_infinite_loop( void ) 
-{ 
-    static int done = FALSE; 
- 
-    while (! done) { 
-        KMP_YIELD( 1 ); 
-    } 
-} 
- 
-#define MAX_MESSAGE     512 
- 
-void 
-__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { 
-    char buffer[MAX_MESSAGE]; 
-    va_list ap; 
- 
-    va_start( ap, format); 
-    KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); 
-    __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 
-    __kmp_vprintf( kmp_err, buffer, ap ); 
-#if KMP_PRINT_DATA_PLACEMENT 
-    int node; 
-    if(gtid >= 0) { 
-        if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { 
-            if( __kmp_storage_map_verbose ) { 
-                node = __kmp_get_host_node(p1); 
-                if(node < 0)  /* doesn't work, so don't try this next time */ 
-                    __kmp_storage_map_verbose = FALSE; 
-                else { 
-                    char *last; 
-                    int lastNode; 
-                    int localProc = __kmp_get_cpu_from_gtid(gtid); 
- 
-                    p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) ); 
-                    p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) ); 
-                    if(localProc >= 0) 
-                        __kmp_printf_no_lock("  GTID %d localNode %d\n", gtid, localProc>>1); 
-                    else 
-                        __kmp_printf_no_lock("  GTID %d\n", gtid); 
-# if KMP_USE_PRCTL 
-/* The more elaborate format is disabled for now because of the prctl hanging bug. */ 
-                    do { 
-                        last = p1; 
-                        lastNode = node; 
-                        /* This loop collates adjacent pages with the same host node. */ 
-                        do { 
-                            (char*)p1 += PAGE_SIZE; 
-                        } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 
-                        __kmp_printf_no_lock("    %p-%p memNode %d\n", last, 
-                                             (char*)p1 - 1, lastNode); 
-                    } while(p1 <= p2); 
-# else 
-                    __kmp_printf_no_lock("    %p-%p memNode %d\n", p1, 
-                                         (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1)); 
-                    if(p1 < p2)  { 
-                        __kmp_printf_no_lock("    %p-%p memNode %d\n", p2, 
-                                             (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2)); 
-                    } 
-# endif 
-                } 
-            } 
-        } else 
-            __kmp_printf_no_lock("  %s\n", KMP_I18N_STR( StorageMapWarning ) ); 
-    } 
-#endif /* KMP_PRINT_DATA_PLACEMENT */ 
-    __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 
-} 
- 
-void 
-__kmp_warn( char const * format, ... ) 
-{ 
-    char buffer[MAX_MESSAGE]; 
-    va_list ap; 
- 
-    if ( __kmp_generate_warnings == kmp_warnings_off ) { 
-        return; 
-    } 
- 
-    va_start( ap, format ); 
- 
-    KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); 
-    __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 
-    __kmp_vprintf( kmp_err, buffer, ap ); 
-    __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 
- 
-    va_end( ap ); 
-} 
- 
-void 
-__kmp_abort_process() 
-{ 
- 
-    // Later threads may stall here, but that's ok because abort() will kill them. 
-    __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); 
- 
-    if ( __kmp_debug_buf ) { 
-        __kmp_dump_debug_buffer(); 
-    }; // if 
- 
-    if ( KMP_OS_WINDOWS ) { 
-        // Let other threads know of abnormal termination and prevent deadlock 
-        // if abort happened during library initialization or shutdown 
-        __kmp_global.g.g_abort = SIGABRT; 
- 
-        /* 
-            On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. 
-            Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() 
-            works well, but this function is not available in VS7 (this is not problem for DLL, but 
-            it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does 
-            not help, at least in some versions of MS C RTL. 
- 
-            It seems following sequence is the only way to simulate abort() and avoid pop-up error 
-            box. 
-        */ 
-        raise( SIGABRT ); 
-        _exit( 3 );    // Just in case, if signal ignored, exit anyway. 
-    } else { 
-        abort(); 
-    }; // if 
- 
-    __kmp_infinite_loop(); 
-    __kmp_release_bootstrap_lock( & __kmp_exit_lock ); 
- 
-} // __kmp_abort_process 
- 
-void 
-__kmp_abort_thread( void ) 
-{ 
-    // TODO: Eliminate g_abort global variable and this function. 
-    // In case of abort just call abort(), it will kill all the threads. 
-    __kmp_infinite_loop(); 
-} // __kmp_abort_thread 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * Print out the storage map for the major kmp_info_t thread data structures 
- * that are allocated together. 
- */ 
- 
-static void 
-__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) 
-{ 
-    __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); 
- 
-    __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), 
-                             "th_%d.th_info", gtid ); 
- 
-    __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), 
-                             "th_%d.th_local", gtid ); 
- 
-    __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 
-                             sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); 
- 
-    __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], 
-                             &thr->th.th_bar[bs_plain_barrier+1], 
-                             sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); 
- 
-    __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], 
-                             &thr->th.th_bar[bs_forkjoin_barrier+1], 
-                             sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); 
- 
-    #if KMP_FAST_REDUCTION_BARRIER 
-        __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], 
-                             &thr->th.th_bar[bs_reduction_barrier+1], 
-                             sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); 
-    #endif // KMP_FAST_REDUCTION_BARRIER 
-} 
- 
-/* 
- * Print out the storage map for the major kmp_team_t team data structures 
- * that are allocated together. 
- */ 
- 
-static void 
-__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) 
-{ 
-    int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2; 
-    __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 
-                             header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], 
-                             sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); 
- 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], 
-                             sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], 
-                             sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); 
- 
-    #if KMP_FAST_REDUCTION_BARRIER 
-        __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], 
-                             sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); 
-    #endif // KMP_FAST_REDUCTION_BARRIER 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 
-                             sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 
-                             sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], 
-                             sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", 
-                             header, team_id ); 
- 
-    /* 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr], 
-                             sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr], 
-                             sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr], 
-                             sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr], 
-                             sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr], 
-                             sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr], 
-                             sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); 
- 
-    //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr], 
-    //                        sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id ); 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr], 
-                             sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id ); 
-#if OMP_40_ENABLED 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr], 
-                             sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id ); 
-#endif 
-    */ 
- 
-    __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, 
-                             sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); 
-} 
- 
-static void __kmp_init_allocator() {} 
-static void __kmp_fini_allocator() {} 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef KMP_DYNAMIC_LIB 
-# if KMP_OS_WINDOWS 
- 
- 
-static void 
-__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { 
-    // TODO: Change to __kmp_break_bootstrap_lock(). 
-    __kmp_init_bootstrap_lock( lck ); // make the lock released 
-} 
- 
-static void 
-__kmp_reset_locks_on_process_detach( int gtid_req ) { 
-    int i; 
-    int thread_count; 
- 
-    // PROCESS_DETACH is expected to be called by a thread 
-    // that executes ProcessExit() or FreeLibrary(). 
-    // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). 
-    // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. 
-    // However, in fact, some threads can be still alive here, although being about to be terminated. 
-    // The threads in the array with ds_thread==0 are most suspicious. 
-    // Actually, it can be not safe to access the __kmp_threads[]. 
- 
-    // TODO: does it make sense to check __kmp_roots[] ? 
- 
-    // Let's check that there are no other alive threads registered with the OMP lib. 
-    while( 1 ) { 
-        thread_count = 0; 
-        for( i = 0; i < __kmp_threads_capacity; ++i ) { 
-            if( !__kmp_threads ) continue; 
-            kmp_info_t* th = __kmp_threads[ i ]; 
-            if( th == NULL ) continue; 
-            int gtid = th->th.th_info.ds.ds_gtid; 
-            if( gtid == gtid_req ) continue; 
-            if( gtid < 0 ) continue; 
-            DWORD exit_val; 
-            int alive = __kmp_is_thread_alive( th, &exit_val ); 
-            if( alive ) { 
-            ++thread_count; 
-            } 
-        } 
-        if( thread_count == 0 ) break; // success 
-    } 
- 
-    // Assume that I'm alone. 
- 
-    // Now it might be probably safe to check and reset locks. 
-    // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. 
-    __kmp_reset_lock( &__kmp_forkjoin_lock ); 
-    #ifdef KMP_DEBUG 
-    __kmp_reset_lock( &__kmp_stdio_lock ); 
-    #endif // KMP_DEBUG 
- 
- 
-} 
- 
-BOOL WINAPI 
-DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { 
-    //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
- 
-    switch( fdwReason ) { 
- 
-        case DLL_PROCESS_ATTACH: 
-            KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); 
- 
-            return TRUE; 
- 
-        case DLL_PROCESS_DETACH: 
-            KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", 
-                        __kmp_gtid_get_specific() )); 
- 
-            if( lpReserved != NULL ) 
-            { 
-                // lpReserved is used for telling the difference: 
-                //  lpReserved == NULL when FreeLibrary() was called, 
-                //  lpReserved != NULL when the process terminates. 
-                // When FreeLibrary() is called, worker threads remain alive. 
-                // So they will release the forkjoin lock by themselves. 
-                // When the process terminates, worker threads disappear triggering 
-                // the problem of unreleased forkjoin lock as described below. 
- 
-                // A worker thread can take the forkjoin lock 
-                // in __kmp_suspend_template()->__kmp_rml_decrease_load_before_sleep(). 
-                // The problem comes up if that worker thread becomes dead 
-                // before it releases the forkjoin lock. 
-                // The forkjoin lock remains taken, while the thread 
-                // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below 
-                // will try to take the forkjoin lock and will always fail, 
-                // so that the application will never finish [normally]. 
-                // This scenario is possible if __kmpc_end() has not been executed. 
-                // It looks like it's not a corner case, but common cases: 
-                // - the main function was compiled by an alternative compiler; 
-                // - the main function was compiled by icl but without /Qopenmp (application with plugins); 
-                // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. 
-                // - alive foreign thread prevented __kmpc_end from doing cleanup. 
- 
-                // This is a hack to work around the problem. 
-                // TODO: !!! to figure out something better. 
-                __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); 
-            } 
- 
-            __kmp_internal_end_library( __kmp_gtid_get_specific() ); 
- 
-            return TRUE; 
- 
-        case DLL_THREAD_ATTACH: 
-            KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); 
- 
-            /* if we wanted to register new siblings all the time here call 
-             * __kmp_get_gtid(); */ 
-            return TRUE; 
- 
-        case DLL_THREAD_DETACH: 
-            KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", 
-                        __kmp_gtid_get_specific() )); 
- 
-            __kmp_internal_end_thread( __kmp_gtid_get_specific() ); 
-            return TRUE; 
-    } 
- 
-    return TRUE; 
-} 
- 
-# endif /* KMP_OS_WINDOWS */ 
-#endif /* KMP_DYNAMIC_LIB */ 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Change the library type to "status" and return the old type */ 
-/* called from within initialization routines where __kmp_initz_lock is held */ 
-int 
-__kmp_change_library( int status ) 
-{ 
-    int old_status; 
- 
-    old_status = __kmp_yield_init & 1;  // check whether KMP_LIBRARY=throughput (even init count) 
- 
-    if (status) { 
-        __kmp_yield_init |= 1;  // throughput => turnaround (odd init count) 
-    } 
-    else { 
-        __kmp_yield_init &= ~1; // turnaround => throughput (even init count) 
-    } 
- 
-    return old_status;  // return previous setting of whether KMP_LIBRARY=throughput 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* __kmp_parallel_deo -- 
- * Wait until it's our turn. 
- */ 
-void 
-__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    int gtid = *gtid_ref; 
-#ifdef BUILD_PARALLEL_ORDERED 
-    kmp_team_t *team = __kmp_team_from_gtid( gtid ); 
-#endif /* BUILD_PARALLEL_ORDERED */ 
- 
-    if( __kmp_env_consistency_check ) { 
-        if( __kmp_threads[gtid]->th.th_root->r.r_active ) 
-#if KMP_USE_DYNAMIC_LOCK 
-            __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); 
-#else 
-            __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); 
-#endif 
-    } 
-#ifdef BUILD_PARALLEL_ORDERED 
-    if( !team->t.t_serialized ) { 
-        KMP_MB(); 
-        KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); 
-        KMP_MB(); 
-    } 
-#endif /* BUILD_PARALLEL_ORDERED */ 
-} 
- 
-/* __kmp_parallel_dxo -- 
- * Signal the next task. 
- */ 
- 
-void 
-__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    int gtid = *gtid_ref; 
-#ifdef BUILD_PARALLEL_ORDERED 
-    int tid =  __kmp_tid_from_gtid( gtid ); 
-    kmp_team_t *team = __kmp_team_from_gtid( gtid ); 
-#endif /* BUILD_PARALLEL_ORDERED */ 
- 
-    if( __kmp_env_consistency_check ) { 
-        if( __kmp_threads[gtid]->th.th_root->r.r_active ) 
-            __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); 
-    } 
-#ifdef BUILD_PARALLEL_ORDERED 
-    if ( ! team->t.t_serialized ) { 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        /* use the tid of the next thread in this team */ 
-        /* TODO repleace with general release procedure */ 
-        team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); 
- 
-#if OMPT_SUPPORT && OMPT_BLAME 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { 
-            /* accept blame for "ordered" waiting */ 
-            kmp_info_t *this_thread = __kmp_threads[gtid]; 
-            ompt_callbacks.ompt_callback(ompt_event_release_ordered)( 
-                this_thread->th.ompt_thread_info.wait_id); 
-        } 
-#endif 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    } 
-#endif /* BUILD_PARALLEL_ORDERED */ 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* The BARRIER for a SINGLE process section is always explicit   */ 
- 
-int 
-__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) 
-{ 
-    int status; 
-    kmp_info_t *th; 
-    kmp_team_t *team; 
- 
-    if( ! TCR_4(__kmp_init_parallel) ) 
-        __kmp_parallel_initialize(); 
- 
-    th   = __kmp_threads[ gtid ]; 
-    team = th->th.th_team; 
-    status = 0; 
- 
-    th->th.th_ident = id_ref; 
- 
-    if ( team->t.t_serialized ) { 
-        status = 1; 
-    } else { 
-        kmp_int32 old_this = th->th.th_local.this_construct; 
- 
-        ++th->th.th_local.this_construct; 
-        /* try to set team count to thread count--success means thread got the 
-           single block 
-        */ 
-        /* TODO: Should this be acquire or release? */ 
-        status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, 
-                                             th->th.th_local.this_construct); 
-#if USE_ITT_BUILD 
-        if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && 
-#if OMP_40_ENABLED 
-            th->th.th_teams_microtask == NULL && 
-#endif 
-            team->t.t_active_level == 1 ) 
-        {   // Only report metadata by master of active team at level 1 
-            __kmp_itt_metadata_single( id_ref ); 
-        } 
-#endif /* USE_ITT_BUILD */ 
-    } 
- 
-    if( __kmp_env_consistency_check ) { 
-        if (status && push_ws) { 
-            __kmp_push_workshare( gtid, ct_psingle, id_ref ); 
-        } else { 
-            __kmp_check_workshare( gtid, ct_psingle, id_ref ); 
-        } 
-    } 
-#if USE_ITT_BUILD 
-    if ( status ) { 
-        __kmp_itt_single_start( gtid ); 
-    } 
-#endif /* USE_ITT_BUILD */ 
-    return status; 
-} 
- 
-void 
-__kmp_exit_single( int gtid ) 
-{ 
-#if USE_ITT_BUILD 
-    __kmp_itt_single_end( gtid ); 
-#endif /* USE_ITT_BUILD */ 
-    if( __kmp_env_consistency_check ) 
-        __kmp_pop_workshare( gtid, ct_psingle, NULL ); 
-} 
- 
- 
-/* 
- * determine if we can go parallel or must use a serialized parallel region and 
- * how many threads we can use 
- * set_nproc is the number of threads requested for the team 
- * returns 0 if we should serialize or only use one thread, 
- * otherwise the number of threads to use 
- * The forkjoin lock is held by the caller. 
- */ 
-static int 
-__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, 
-   int master_tid, int set_nthreads 
-#if OMP_40_ENABLED 
-  , int enter_teams 
-#endif /* OMP_40_ENABLED */ 
-) 
-{ 
-    int capacity; 
-    int new_nthreads; 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    KMP_DEBUG_ASSERT( root && parent_team ); 
- 
-    // 
-    // If dyn-var is set, dynamically adjust the number of desired threads, 
-    // according to the method specified by dynamic_mode. 
-    // 
-    new_nthreads = set_nthreads; 
-    if ( ! get__dynamic_2( parent_team, master_tid ) ) { 
-        ; 
-    } 
-#ifdef USE_LOAD_BALANCE 
-    else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { 
-        new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); 
-        if ( new_nthreads == 1 ) { 
-            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", 
-              master_tid )); 
-            return 1; 
-        } 
-        if ( new_nthreads < set_nthreads ) { 
-            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", 
-              master_tid, new_nthreads )); 
-        } 
-    } 
-#endif /* USE_LOAD_BALANCE */ 
-    else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { 
-        new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 
-          : root->r.r_hot_team->t.t_nproc); 
-        if ( new_nthreads <= 1 ) { 
-            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", 
-              master_tid )); 
-            return 1; 
-        } 
-        if ( new_nthreads < set_nthreads ) { 
-            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", 
-              master_tid, new_nthreads )); 
-        } 
-        else { 
-            new_nthreads = set_nthreads; 
-        } 
-    } 
-    else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { 
-        if ( set_nthreads > 2 ) { 
-            new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); 
-            new_nthreads = ( new_nthreads % set_nthreads ) + 1; 
-            if ( new_nthreads == 1 ) { 
-                KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", 
-                  master_tid )); 
-                return 1; 
-            } 
-            if ( new_nthreads < set_nthreads ) { 
-                KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", 
-                  master_tid, new_nthreads )); 
-            } 
-        } 
-    } 
-    else { 
-        KMP_ASSERT( 0 ); 
-    } 
- 
-    // 
-    // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. 
-    // 
-    if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 
-      root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { 
-        int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : 
-          root->r.r_hot_team->t.t_nproc ); 
-        if ( tl_nthreads <= 0 ) { 
-            tl_nthreads = 1; 
-        } 
- 
-        // 
-        // If dyn-var is false, emit a 1-time warning. 
-        // 
-        if ( ! get__dynamic_2( parent_team, master_tid ) 
-          && ( ! __kmp_reserve_warn ) ) { 
-            __kmp_reserve_warn = 1; 
-            __kmp_msg( 
-                kmp_ms_warning, 
-                KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), 
-                KMP_HNT( Unset_ALL_THREADS ), 
-                __kmp_msg_null 
-            ); 
-        } 
-        if ( tl_nthreads == 1 ) { 
-            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", 
-              master_tid )); 
-            return 1; 
-        } 
-        KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", 
-          master_tid, tl_nthreads )); 
-        new_nthreads = tl_nthreads; 
-    } 
- 
- 
-    // 
-    // Check if the threads array is large enough, or needs expanding. 
-    // 
-    // See comment in __kmp_register_root() about the adjustment if 
-    // __kmp_threads[0] == NULL. 
-    // 
-    capacity = __kmp_threads_capacity; 
-    if ( TCR_PTR(__kmp_threads[0]) == NULL ) { 
-        --capacity; 
-    } 
-    if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 
-      root->r.r_hot_team->t.t_nproc ) > capacity ) { 
-        // 
-        // Expand the threads array. 
-        // 
-        int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 
-          root->r.r_hot_team->t.t_nproc ) - capacity; 
-        int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); 
-        if ( slotsAdded < slotsRequired ) { 
-            // 
-            // The threads array was not expanded enough. 
-            // 
-            new_nthreads -= ( slotsRequired - slotsAdded ); 
-            KMP_ASSERT( new_nthreads >= 1 ); 
- 
-            // 
-            // If dyn-var is false, emit a 1-time warning. 
-            // 
-            if ( ! get__dynamic_2( parent_team, master_tid ) 
-              && ( ! __kmp_reserve_warn ) ) { 
-                __kmp_reserve_warn = 1; 
-                if ( __kmp_tp_cached ) { 
-                    __kmp_msg( 
-                        kmp_ms_warning, 
-                        KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), 
-                        KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), 
-                        KMP_HNT( PossibleSystemLimitOnThreads ), 
-                        __kmp_msg_null 
-                    ); 
-                } 
-                else { 
-                    __kmp_msg( 
-                        kmp_ms_warning, 
-                        KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), 
-                        KMP_HNT( SystemLimitOnThreads ), 
-                        __kmp_msg_null 
-                    ); 
-                } 
-            } 
-        } 
-    } 
- 
-    if ( new_nthreads == 1 ) { 
-        KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", 
-                        __kmp_get_gtid(), set_nthreads ) ); 
-        return 1; 
-    } 
- 
-    KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", 
-                    __kmp_get_gtid(), new_nthreads, set_nthreads )); 
-    return new_nthreads; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* allocate threads from the thread pool and assign them to the new team */ 
-/* we are assured that there are enough threads available, because we 
- * checked on that earlier within critical section forkjoin */ 
- 
-static void 
-__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, 
-                         kmp_info_t *master_th, int master_gtid ) 
-{ 
-    int         i; 
-    int use_hot_team; 
- 
-    KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); 
-    KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); 
-    KMP_MB(); 
- 
-    /* first, let's setup the master thread */ 
-    master_th->th.th_info.ds.ds_tid  = 0; 
-    master_th->th.th_team            = team; 
-    master_th->th.th_team_nproc      = team->t.t_nproc; 
-    master_th->th.th_team_master     = master_th; 
-    master_th->th.th_team_serialized = FALSE; 
-    master_th->th.th_dispatch        = & team->t.t_dispatch[ 0 ]; 
- 
-    /* make sure we are not the optimized hot team */ 
-#if KMP_NESTED_HOT_TEAMS 
-    use_hot_team = 0; 
-    kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 
-    if( hot_teams ) {  // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 
-        int level = team->t.t_active_level - 1;    // index in array of hot teams 
-        if( master_th->th.th_teams_microtask ) {    // are we inside the teams? 
-            if( master_th->th.th_teams_size.nteams > 1 ) { 
-                ++level; // level was not increased in teams construct for team_of_masters 
-            } 
-            if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && 
-                master_th->th.th_teams_level == team->t.t_level ) { 
-                ++level; // level was not increased in teams construct for team_of_workers before the parallel 
-            }            // team->t.t_level will be increased inside parallel 
-        } 
-        if( level < __kmp_hot_teams_max_level ) { 
-            if( hot_teams[level].hot_team ) { 
-                // hot team has already been allocated for given level 
-                KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 
-                use_hot_team = 1; // the team is ready to use 
-            } else { 
-                use_hot_team = 0; // AC: threads are not allocated yet 
-                hot_teams[level].hot_team = team; // remember new hot team 
-                hot_teams[level].hot_team_nth = team->t.t_nproc; 
-            } 
-        } else { 
-            use_hot_team = 0; 
-        } 
-    } 
-#else 
-    use_hot_team = team == root->r.r_hot_team; 
-#endif 
-    if ( !use_hot_team ) { 
- 
-        /* install the master thread */ 
-        team->t.t_threads[ 0 ]    = master_th; 
-        __kmp_initialize_info( master_th, team, 0, master_gtid ); 
- 
-        /* now, install the worker threads */ 
-        for ( i=1 ;  i < team->t.t_nproc ; i++ ) { 
- 
-            /* fork or reallocate a new thread and install it in team */ 
-            kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); 
-            team->t.t_threads[ i ] = thr; 
-            KMP_DEBUG_ASSERT( thr ); 
-            KMP_DEBUG_ASSERT( thr->th.th_team == team ); 
-            /* align team and thread arrived states */ 
-            KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n", 
-                            __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, 
-                            __kmp_gtid_from_tid( i, team ), team->t.t_id, i, 
-                            team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, 
-                            team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); 
-#if OMP_40_ENABLED 
-            thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 
-            thr->th.th_teams_level     = master_th->th.th_teams_level; 
-            thr->th.th_teams_size      = master_th->th.th_teams_size; 
-#endif 
-            { // Initialize threads' barrier data. 
-                int b; 
-                kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; 
-                for ( b = 0; b < bs_last_barrier; ++ b ) { 
-                    balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived; 
-                    KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 
-#if USE_DEBUGGER 
-                    balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 
-#endif 
-                }; // for b 
-            } 
-        } 
- 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-        __kmp_partition_places( team ); 
-#endif 
- 
-    } 
- 
-    KMP_MB(); 
-} 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-// 
-// Propagate any changes to the floating point control registers out to the team 
-// We try to avoid unnecessary writes to the relevant cache line in the team structure, 
-// so we don't make changes unless they are needed. 
-// 
-inline static void 
-propagateFPControl(kmp_team_t * team) 
-{ 
-    if ( __kmp_inherit_fp_control ) { 
-        kmp_int16 x87_fpu_control_word; 
-        kmp_uint32 mxcsr; 
- 
-        // Get master values of FPU control flags (both X87 and vector) 
-        __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); 
-        __kmp_store_mxcsr( &mxcsr ); 
-        mxcsr &= KMP_X86_MXCSR_MASK; 
- 
-        // There is no point looking at t_fp_control_saved here. 
-        // If it is TRUE, we still have to update the values if they are different from those we now have. 
-        // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure 
-        // that the values in the team are the same as those we have. 
-        // So, this code achieves what we need whether or not t_fp_control_saved is true. 
-        // By checking whether the value needs updating we avoid unnecessary writes that would put the 
-        // cache-line into a written state, causing all threads in the team to have to read it again. 
-        if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { 
-            team->t.t_x87_fpu_control_word = x87_fpu_control_word; 
-        } 
-        if ( team->t.t_mxcsr != mxcsr ) { 
-            team->t.t_mxcsr = mxcsr; 
-        } 
-        // Although we don't use this value, other code in the runtime wants to know whether it should restore them. 
-        // So we must ensure it is correct. 
-        if (!team->t.t_fp_control_saved) { 
-            team->t.t_fp_control_saved = TRUE; 
-        } 
-    } 
-    else { 
-        // Similarly here. Don't write to this cache-line in the team structure unless we have to. 
-        if (team->t.t_fp_control_saved) 
-            team->t.t_fp_control_saved = FALSE; 
-    } 
-} 
- 
-// Do the opposite, setting the hardware registers to the updated values from the team. 
-inline static void 
-updateHWFPControl(kmp_team_t * team) 
-{ 
-    if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { 
-        // 
-        // Only reset the fp control regs if they have been changed in the team. 
-        // the parallel region that we are exiting. 
-        // 
-        kmp_int16 x87_fpu_control_word; 
-        kmp_uint32 mxcsr; 
-        __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); 
-        __kmp_store_mxcsr( &mxcsr ); 
-        mxcsr &= KMP_X86_MXCSR_MASK; 
- 
-        if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { 
-            __kmp_clear_x87_fpu_status_word(); 
-            __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); 
-        } 
- 
-        if ( team->t.t_mxcsr != mxcsr ) { 
-            __kmp_load_mxcsr( &team->t.t_mxcsr ); 
-        } 
-    } 
-} 
-#else 
-# define propagateFPControl(x) ((void)0) 
-# define updateHWFPControl(x)  ((void)0) 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-static void 
-__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration 
- 
-/* 
- * Run a parallel region that has been serialized, so runs only in a team of the single master thread. 
- */ 
-void 
-__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) 
-{ 
-    kmp_info_t *this_thr; 
-    kmp_team_t *serial_team; 
- 
-    KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); 
- 
-    /* Skip all this code for autopar serialized loops since it results in 
-       unacceptable overhead */ 
-    if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) 
-        return; 
- 
-    if( ! TCR_4( __kmp_init_parallel ) ) 
-        __kmp_parallel_initialize(); 
- 
-    this_thr     = __kmp_threads[ global_tid ]; 
-    serial_team  = this_thr->th.th_serial_team; 
- 
-    /* utilize the serialized team held by this thread */ 
-    KMP_DEBUG_ASSERT( serial_team ); 
-    KMP_MB(); 
- 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 
-        KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); 
-        KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", 
-                        global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); 
-        this_thr->th.th_task_team = NULL; 
-    } 
- 
-#if OMP_40_ENABLED 
-    kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 
-    if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { 
-        proc_bind = proc_bind_false; 
-    } 
-    else if ( proc_bind == proc_bind_default ) { 
-        // 
-        // No proc_bind clause was specified, so use the current value 
-        // of proc-bind-var for this parallel region. 
-        // 
-        proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 
-    } 
-    // 
-    // Reset for next parallel region 
-    // 
-    this_thr->th.th_set_proc_bind = proc_bind_default; 
-#endif /* OMP_40_ENABLED */ 
- 
-    if( this_thr->th.th_team != serial_team ) { 
-        // Nested level will be an index in the nested nthreads array 
-        int level = this_thr->th.th_team->t.t_level; 
- 
-        if( serial_team->t.t_serialized ) { 
-            /* this serial team was already used 
-             * TODO increase performance by making this locks more specific */ 
-            kmp_team_t *new_team; 
- 
-            __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-#if OMPT_SUPPORT 
-            ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 
-#endif 
- 
-            new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, 
-#if OMPT_SUPPORT 
-                                           ompt_parallel_id, 
-#endif 
-#if OMP_40_ENABLED 
-                                           proc_bind, 
-#endif 
-                                           & this_thr->th.th_current_task->td_icvs, 
-                                           0 USE_NESTED_HOT_ARG(NULL) ); 
-            __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-            KMP_ASSERT( new_team ); 
- 
-            /* setup new serialized team and install it */ 
-            new_team->t.t_threads[0] = this_thr; 
-            new_team->t.t_parent = this_thr->th.th_team; 
-            serial_team = new_team; 
-            this_thr->th.th_serial_team = serial_team; 
- 
-            KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 
-                            global_tid, serial_team ) ); 
- 
- 
-            /* TODO the above breaks the requirement that if we run out of 
-             * resources, then we can still guarantee that serialized teams 
-             * are ok, since we may need to allocate a new one */ 
-        } else { 
-            KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 
-                            global_tid, serial_team ) ); 
-        } 
- 
-        /* we have to initialize this serial team */ 
-        KMP_DEBUG_ASSERT( serial_team->t.t_threads ); 
-        KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); 
-        KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); 
-        serial_team->t.t_ident         = loc; 
-        serial_team->t.t_serialized    = 1; 
-        serial_team->t.t_nproc         = 1; 
-        serial_team->t.t_parent        = this_thr->th.th_team; 
-        serial_team->t.t_sched         = this_thr->th.th_team->t.t_sched; 
-        this_thr->th.th_team           = serial_team; 
-        serial_team->t.t_master_tid    = this_thr->th.th_info.ds.ds_tid; 
- 
-        KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", 
-                        global_tid, this_thr->th.th_current_task ) ); 
-        KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); 
-        this_thr->th.th_current_task->td_flags.executing = 0; 
- 
-        __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); 
- 
-        /* TODO: GEH: do the ICVs work for nested serialized teams?  Don't we need an implicit task for 
-           each serialized task represented by team->t.t_serialized? */ 
-        copy_icvs( 
-                  & this_thr->th.th_current_task->td_icvs, 
-                  & this_thr->th.th_current_task->td_parent->td_icvs ); 
- 
-        // Thread value exists in the nested nthreads array for the next nested level 
-        if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { 
-            this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; 
-        } 
- 
-#if OMP_40_ENABLED 
-        if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { 
-            this_thr->th.th_current_task->td_icvs.proc_bind 
-                = __kmp_nested_proc_bind.bind_types[ level + 1 ]; 
-        } 
-#endif /* OMP_40_ENABLED */ 
- 
-#if USE_DEBUGGER 
-        serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger. 
-#endif 
-        this_thr->th.th_info.ds.ds_tid = 0; 
- 
-        /* set thread cache values */ 
-        this_thr->th.th_team_nproc     = 1; 
-        this_thr->th.th_team_master    = this_thr; 
-        this_thr->th.th_team_serialized = 1; 
- 
-        serial_team->t.t_level        = serial_team->t.t_parent->t.t_level + 1; 
-        serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 
- 
-        propagateFPControl (serial_team); 
- 
-        /* check if we need to allocate dispatch buffers stack */ 
-        KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 
-        if ( !serial_team->t.t_dispatch->th_disp_buffer ) { 
-            serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) 
-                __kmp_allocate( sizeof( dispatch_private_info_t ) ); 
-        } 
-        this_thr->th.th_dispatch = serial_team->t.t_dispatch; 
- 
-#if OMPT_SUPPORT 
-        ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 
-        __ompt_team_assign_id(serial_team, ompt_parallel_id); 
-#endif 
- 
-        KMP_MB(); 
- 
-    } else { 
-        /* this serialized team is already being used, 
-         * that's fine, just add another nested level */ 
-        KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); 
-        KMP_DEBUG_ASSERT( serial_team->t.t_threads ); 
-        KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); 
-        ++ serial_team->t.t_serialized; 
-        this_thr->th.th_team_serialized = serial_team->t.t_serialized; 
- 
-        // Nested level will be an index in the nested nthreads array 
-        int level = this_thr->th.th_team->t.t_level; 
-        // Thread value exists in the nested nthreads array for the next nested level 
-        if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { 
-            this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; 
-        } 
-        serial_team->t.t_level++; 
-        KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", 
-                        global_tid, serial_team, serial_team->t.t_level ) ); 
- 
-        /* allocate/push dispatch buffers stack */ 
-        KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 
-        { 
-            dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) 
-                __kmp_allocate( sizeof( dispatch_private_info_t ) ); 
-            disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 
-            serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 
-        } 
-        this_thr->th.th_dispatch = serial_team->t.t_dispatch; 
- 
-        KMP_MB(); 
-    } 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_parallel( global_tid, NULL ); 
- 
-#if USE_ITT_BUILD 
-    // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment 
-    if ( serial_team->t.t_level == 1 
-#if OMP_40_ENABLED 
-        && this_thr->th.th_teams_microtask == NULL 
-#endif 
-    ) { 
-#if USE_ITT_NOTIFY 
-        // Save the start of the "parallel" region for VTune. This is the frame begin at the same time. 
-        if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) && 
-            ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) 
-        { 
-             serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); 
-        } else // only one notification scheme (either "submit" or "forking/joined", not both) 
-#endif 
-        if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && 
-             __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) 
-        { 
-            this_thr->th.th_ident = loc; 
-            // 0 - no barriers; 1 - serialized parallel 
-            __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 ); 
-        } 
-    } 
-#endif /* USE_ITT_BUILD */ 
-} 
- 
-/* most of the work for a fork */ 
-/* return true if we really went parallel, false if serialized */ 
-int 
-__kmp_fork_call( 
-    ident_t   * loc, 
-    int         gtid, 
-    enum fork_context_e  call_context, // Intel, GNU, ... 
-    kmp_int32   argc, 
-#if OMPT_SUPPORT 
-    void       *unwrapped_task, 
-#endif 
-    microtask_t microtask, 
-    launch_t    invoker, 
-/* TODO: revert workaround for Intel(R) 64 tracker #96 */ 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-    va_list   * ap 
-#else 
-    va_list     ap 
-#endif 
-    ) 
-{ 
-    void          **argv; 
-    int             i; 
-    int             master_tid; 
-    int             master_this_cons; 
-    kmp_team_t     *team; 
-    kmp_team_t     *parent_team; 
-    kmp_info_t     *master_th; 
-    kmp_root_t     *root; 
-    int             nthreads; 
-    int             master_active; 
-    int             master_set_numthreads; 
-    int             level; 
-#if OMP_40_ENABLED 
-    int             active_level; 
-    int             teams_level; 
-#endif 
-#if KMP_NESTED_HOT_TEAMS 
-    kmp_hot_team_ptr_t **p_hot_teams; 
-#endif 
-    { // KMP_TIME_BLOCK 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call); 
-    KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 
- 
-    KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); 
-    if ( __kmp_stkpadding > 0 &&  __kmp_root[gtid] != NULL ) { 
-        /* Some systems prefer the stack for the root thread(s) to start with */ 
-        /* some gap from the parent stack to prevent false sharing. */ 
-        void *dummy = KMP_ALLOCA(__kmp_stkpadding); 
-        /* These 2 lines below are so this does not get optimized out */ 
-        if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) 
-            __kmp_stkpadding += (short)((kmp_int64)dummy); 
-    } 
- 
-    /* initialize if needed */ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown 
-    if( ! TCR_4(__kmp_init_parallel) ) 
-        __kmp_parallel_initialize(); 
- 
-    /* setup current data */ 
-    master_th     = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown 
-    parent_team   = master_th->th.th_team; 
-    master_tid    = master_th->th.th_info.ds.ds_tid; 
-    master_this_cons = master_th->th.th_local.this_construct; 
-    root          = master_th->th.th_root; 
-    master_active = root->r.r_active; 
-    master_set_numthreads = master_th->th.th_set_nproc; 
- 
-#if OMPT_SUPPORT 
-    ompt_parallel_id_t ompt_parallel_id; 
-    ompt_task_id_t ompt_task_id; 
-    ompt_frame_t *ompt_frame; 
-    ompt_task_id_t my_task_id; 
-    ompt_parallel_id_t my_parallel_id; 
- 
-    if (ompt_enabled) { 
-        ompt_parallel_id = __ompt_parallel_id_new(gtid); 
-        ompt_task_id = __ompt_get_task_id_internal(0); 
-        ompt_frame = __ompt_get_task_frame_internal(0); 
-    } 
-#endif 
- 
-    // Nested level will be an index in the nested nthreads array 
-    level         = parent_team->t.t_level; 
-#if OMP_40_ENABLED 
-    active_level  = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed 
-    teams_level    = master_th->th.th_teams_level; // needed to check nesting inside the teams 
-#endif 
-#if KMP_NESTED_HOT_TEAMS 
-    p_hot_teams   = &master_th->th.th_hot_teams; 
-    if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { 
-        *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( 
-                sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 
-        (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 
-        (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) 
-    } 
-#endif 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { 
-        int team_size = master_set_numthreads; 
- 
-        ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( 
-            ompt_task_id, ompt_frame, ompt_parallel_id, 
-            team_size, unwrapped_task, OMPT_INVOKER(call_context)); 
-    } 
-#endif 
- 
-    master_th->th.th_ident = loc; 
- 
-#if OMP_40_ENABLED 
-    if ( master_th->th.th_teams_microtask && 
-         ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { 
-        // AC: This is start of parallel that is nested inside teams construct. 
-        //     The team is actual (hot), all workers are ready at the fork barrier. 
-        //     No lock needed to initialize the team a bit, then free workers. 
-        parent_team->t.t_ident = loc; 
-        parent_team->t.t_argc  = argc; 
-        argv = (void**)parent_team->t.t_argv; 
-        for( i=argc-1; i >= 0; --i ) 
-/* TODO: revert workaround for Intel(R) 64 tracker #96 */ 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-            *argv++ = va_arg( *ap, void * ); 
-#else 
-            *argv++ = va_arg( ap, void * ); 
-#endif 
-        /* Increment our nested depth levels, but not increase the serialization */ 
-        if ( parent_team == master_th->th.th_serial_team ) { 
-            // AC: we are in serialized parallel 
-            __kmpc_serialized_parallel(loc, gtid); 
-            KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); 
-            parent_team->t.t_serialized--; // AC: need this in order enquiry functions 
-                                           //     work correctly, will restore at join time 
- 
-#if OMPT_SUPPORT 
-            void *dummy; 
-            void **exit_runtime_p; 
- 
-            ompt_lw_taskteam_t lw_taskteam; 
- 
-            if (ompt_enabled) { 
-                __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 
-                    unwrapped_task, ompt_parallel_id); 
-                lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 
-                exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 
- 
-                __ompt_lw_taskteam_link(&lw_taskteam, master_th); 
- 
-#if OMPT_TRACE 
-                /* OMPT implicit task begin */ 
-                my_task_id = lw_taskteam.ompt_task_info.task_id; 
-                my_parallel_id = parent_team->t.ompt_team_info.parallel_id; 
-                if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 
-                    ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 
-                        my_parallel_id, my_task_id); 
-                } 
-#endif 
- 
-                /* OMPT state */ 
-                master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 
-            } else { 
-                exit_runtime_p = &dummy; 
-            } 
-#endif 
- 
-            { 
-                KMP_TIME_BLOCK(OMP_work); 
-                __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv 
-#if OMPT_SUPPORT 
-                                        , exit_runtime_p 
-#endif 
-                                        ); 
-            } 
- 
-#if OMPT_SUPPORT 
-            if (ompt_enabled) { 
-#if OMPT_TRACE 
-                lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; 
- 
-                if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 
-                    ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 
-                        ompt_parallel_id, ompt_task_id); 
-                } 
- 
-                __ompt_lw_taskteam_unlink(master_th); 
-                // reset clear the task id only after unlinking the task 
-                lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 
-#endif 
- 
-                if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 
-                    ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 
-                        ompt_parallel_id, ompt_task_id, 
-                        OMPT_INVOKER(call_context)); 
-                } 
-                master_th->th.ompt_thread_info.state = ompt_state_overhead; 
-            } 
-#endif 
-            return TRUE; 
-        } 
- 
-        parent_team->t.t_pkfn  = microtask; 
-#if OMPT_SUPPORT 
-        parent_team->t.ompt_team_info.microtask = unwrapped_task; 
-#endif 
-        parent_team->t.t_invoke = invoker; 
-        KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); 
-        parent_team->t.t_active_level ++; 
-        parent_team->t.t_level ++; 
- 
-        /* Change number of threads in the team if requested */ 
-        if ( master_set_numthreads ) {   // The parallel has num_threads clause 
-            if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { 
-                // AC: only can reduce the number of threads dynamically, cannot increase 
-                kmp_info_t **other_threads = parent_team->t.t_threads; 
-                parent_team->t.t_nproc = master_set_numthreads; 
-                for ( i = 0; i < master_set_numthreads; ++i ) { 
-                    other_threads[i]->th.th_team_nproc = master_set_numthreads; 
-                } 
-                // Keep extra threads hot in the team for possible next parallels 
-            } 
-            master_th->th.th_set_nproc = 0; 
-        } 
- 
-#if USE_DEBUGGER 
-    if ( __kmp_debugging ) {    // Let debugger override number of threads. 
-        int nth = __kmp_omp_num_threads( loc ); 
-        if ( nth > 0 ) {        // 0 means debugger does not want to change number of threads. 
-            master_set_numthreads = nth; 
-        }; // if 
-    }; // if 
-#endif 
- 
-        KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); 
-        __kmp_internal_fork( loc, gtid, parent_team ); 
-        KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); 
- 
-        /* Invoke microtask for MASTER thread */ 
-        KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", 
-                    gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); 
- 
-        { 
-            KMP_TIME_BLOCK(OMP_work); 
-            if (! parent_team->t.t_invoke( gtid )) { 
-                KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); 
-            } 
-        } 
-        KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", 
-            gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 
- 
-        return TRUE; 
-    } // Parallel closely nested in teams construct 
-#endif /* OMP_40_ENABLED */ 
- 
-#if KMP_DEBUG 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); 
-    } 
-#endif 
- 
-    if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { 
-        nthreads = 1; 
-    } else { 
-#if OMP_40_ENABLED 
-        int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level)); 
-#endif 
-        nthreads = master_set_numthreads ? 
-            master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task 
- 
-        // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct). 
-        // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels. 
-        if (nthreads > 1) { 
-            if ( ( !get__nested(master_th) && (root->r.r_in_parallel 
-#if OMP_40_ENABLED 
-                && !enter_teams 
-#endif /* OMP_40_ENABLED */ 
-            ) ) || ( __kmp_library == library_serial ) ) { 
-                KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n", 
-                                gtid, nthreads )); 
-                nthreads = 1; 
-            } 
-        } 
-        if ( nthreads > 1 ) { 
-            /* determine how many new threads we can use */ 
-            __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-            nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads 
-#if OMP_40_ENABLED 
-/* AC: If we execute teams from parallel region (on host), then teams should be created 
-   but each can only have 1 thread if nesting is disabled. If teams called from serial region, 
-   then teams and their threads should be created regardless of the nesting setting. */ 
-                                         , enter_teams 
-#endif /* OMP_40_ENABLED */ 
-                                         ); 
-            if ( nthreads == 1 ) { 
-                // Free lock for single thread execution here; 
-                // for multi-thread execution it will be freed later 
-                // after team of threads created and initialized 
-                __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-            } 
-        } 
-    } 
-    KMP_DEBUG_ASSERT( nthreads > 0 ); 
- 
-    /* If we temporarily changed the set number of threads then restore it now */ 
-    master_th->th.th_set_nproc = 0; 
- 
-    /* create a serialized parallel region? */ 
-    if ( nthreads == 1 ) { 
-        /* josh todo: hypothetical question: what do we do for OS X*? */ 
-#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-        void *   args[ argc ]; 
-#else 
-        void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) ); 
-#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ 
- 
-        KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); 
- 
-        __kmpc_serialized_parallel(loc, gtid); 
- 
-        if ( call_context == fork_context_intel ) { 
-            /* TODO this sucks, use the compiler itself to pass args! :) */ 
-            master_th->th.th_serial_team->t.t_ident = loc; 
-#if OMP_40_ENABLED 
-            if ( !ap ) { 
-                // revert change made in __kmpc_serialized_parallel() 
-                master_th->th.th_serial_team->t.t_level--; 
-                // Get args from parent team for teams construct 
- 
-#if OMPT_SUPPORT 
-                void *dummy; 
-                void **exit_runtime_p; 
- 
-                ompt_lw_taskteam_t lw_taskteam; 
- 
-                if (ompt_enabled) { 
-                    __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 
-                        unwrapped_task, ompt_parallel_id); 
-                    lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 
-                    exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 
- 
-                    __ompt_lw_taskteam_link(&lw_taskteam, master_th); 
- 
-#if OMPT_TRACE 
-                    my_task_id = lw_taskteam.ompt_task_info.task_id; 
-                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 
-                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 
-                            ompt_parallel_id, my_task_id); 
-                    } 
-#endif 
- 
-                    /* OMPT state */ 
-                    master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 
-                } else { 
-                    exit_runtime_p = &dummy; 
-                } 
-#endif 
- 
-                { 
-                    KMP_TIME_BLOCK(OMP_work); 
-                    __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv 
-#if OMPT_SUPPORT 
-                        , exit_runtime_p 
-#endif 
-                    ); 
-                } 
- 
-#if OMPT_SUPPORT 
-                if (ompt_enabled) { 
-                    lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; 
- 
-#if OMPT_TRACE 
-                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 
-                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 
-                            ompt_parallel_id, ompt_task_id); 
-                    } 
-#endif 
- 
-                    __ompt_lw_taskteam_unlink(master_th); 
-                    // reset clear the task id only after unlinking the task 
-                    lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 
- 
-                    if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 
-                        ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 
-                            ompt_parallel_id, ompt_task_id, 
-                            OMPT_INVOKER(call_context)); 
-                    } 
-                    master_th->th.ompt_thread_info.state = ompt_state_overhead; 
-                } 
-#endif 
-            } else if ( microtask == (microtask_t)__kmp_teams_master ) { 
-                KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); 
-                team = master_th->th.th_team; 
-                //team->t.t_pkfn = microtask; 
-                team->t.t_invoke = invoker; 
-                __kmp_alloc_argv_entries( argc, team, TRUE ); 
-                team->t.t_argc = argc; 
-                argv = (void**) team->t.t_argv; 
-                if ( ap ) { 
-                    for( i=argc-1; i >= 0; --i ) 
-// TODO: revert workaround for Intel(R) 64 tracker #96 
-# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-                        *argv++ = va_arg( *ap, void * ); 
-# else 
-                        *argv++ = va_arg( ap, void * ); 
-# endif 
-                } else { 
-                    for( i=0; i < argc; ++i ) 
-                        // Get args from parent team for teams construct 
-                        argv[i] = parent_team->t.t_argv[i]; 
-                } 
-                // AC: revert change made in __kmpc_serialized_parallel() 
-                //     because initial code in teams should have level=0 
-                team->t.t_level--; 
-                // AC: call special invoker for outer "parallel" of the teams construct 
-                { 
-                    KMP_TIME_BLOCK(OMP_work); 
-                    invoker(gtid); 
-                } 
-            } else { 
-#endif /* OMP_40_ENABLED */ 
-                argv = args; 
-                for( i=argc-1; i >= 0; --i ) 
-// TODO: revert workaround for Intel(R) 64 tracker #96 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-                    *argv++ = va_arg( *ap, void * ); 
-#else 
-                    *argv++ = va_arg( ap, void * ); 
-#endif 
-                KMP_MB(); 
- 
-#if OMPT_SUPPORT 
-                void *dummy; 
-                void **exit_runtime_p; 
- 
-                ompt_lw_taskteam_t lw_taskteam; 
- 
-                if (ompt_enabled) { 
-                    __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 
-                        unwrapped_task, ompt_parallel_id); 
-                    lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 
-                    exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 
- 
-                    __ompt_lw_taskteam_link(&lw_taskteam, master_th); 
- 
-#if OMPT_TRACE 
-                    /* OMPT implicit task begin */ 
-                    my_task_id = lw_taskteam.ompt_task_info.task_id; 
-                    my_parallel_id = ompt_parallel_id; 
-                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 
-                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 
-                            my_parallel_id, my_task_id); 
-                    } 
-#endif 
- 
-                    /* OMPT state */ 
-                    master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 
-                } else { 
-                    exit_runtime_p = &dummy; 
-                } 
-#endif 
- 
-                { 
-                    KMP_TIME_BLOCK(OMP_work); 
-                    __kmp_invoke_microtask( microtask, gtid, 0, argc, args 
-#if OMPT_SUPPORT 
-                        , exit_runtime_p 
-#endif 
-                    ); 
-                } 
- 
-#if OMPT_SUPPORT 
-                if (ompt_enabled) { 
-#if OMPT_TRACE 
-                    lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0; 
- 
-                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 
-                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 
-                            my_parallel_id, my_task_id); 
-                    } 
-#endif 
- 
-                    __ompt_lw_taskteam_unlink(master_th); 
-                    // reset clear the task id only after unlinking the task 
-                    lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 
- 
-                    if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 
-                        ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 
-                            ompt_parallel_id, ompt_task_id, 
-                            OMPT_INVOKER(call_context)); 
-                    } 
-                    master_th->th.ompt_thread_info.state = ompt_state_overhead; 
-                } 
-#endif 
-#if OMP_40_ENABLED 
-            } 
-#endif /* OMP_40_ENABLED */ 
-        } 
-        else if ( call_context == fork_context_gnu ) { 
-#if OMPT_SUPPORT 
-            ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) 
-                __kmp_allocate(sizeof(ompt_lw_taskteam_t)); 
-            __ompt_lw_taskteam_init(lwt, master_th, gtid, 
-                unwrapped_task, ompt_parallel_id); 
- 
-            lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); 
-            lwt->ompt_task_info.frame.exit_runtime_frame = 0; 
-            __ompt_lw_taskteam_link(lwt, master_th); 
-#endif 
- 
-            // we were called from GNU native code 
-            KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); 
-            return FALSE; 
-        } 
-        else { 
-            KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); 
-        } 
- 
- 
-        KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); 
-        KMP_MB(); 
-        return FALSE; 
-    } 
- 
-    // GEH: only modify the executing flag in the case when not serialized 
-    //      serialized case is handled in kmpc_serialized_parallel 
-    KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", 
-                  parent_team->t.t_active_level, master_th, master_th->th.th_current_task, 
-                  master_th->th.th_current_task->td_icvs.max_active_levels ) ); 
-    // TODO: GEH - cannot do this assertion because root thread not set up as executing 
-    // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 
-    master_th->th.th_current_task->td_flags.executing = 0; 
- 
-#if OMP_40_ENABLED 
-    if ( !master_th->th.th_teams_microtask || level > teams_level ) 
-#endif /* OMP_40_ENABLED */ 
-    { 
-        /* Increment our nested depth level */ 
-        KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); 
-    } 
- 
-    // See if we need to make a copy of the ICVs. 
-    int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 
-    if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { 
-        nthreads_icv = __kmp_nested_nth.nth[level+1]; 
-    } 
-    else { 
-        nthreads_icv = 0;  // don't update 
-    } 
- 
-#if OMP_40_ENABLED 
-    // Figure out the proc_bind_policy for the new team. 
-    kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 
-    kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update 
-    if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { 
-        proc_bind = proc_bind_false; 
-    } 
-    else { 
-        if (proc_bind == proc_bind_default) { 
-            // No proc_bind clause specified; use current proc-bind-var for this parallel region 
-            proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 
-        } 
-        /* else: The proc_bind policy was specified explicitly on parallel clause. This 
-           overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ 
-        // Figure the value of proc-bind-var for the child threads. 
-        if ((level+1 < __kmp_nested_proc_bind.used) 
-            && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { 
-            proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; 
-        } 
-    } 
- 
-    // Reset for next parallel region 
-    master_th->th.th_set_proc_bind = proc_bind_default; 
-#endif /* OMP_40_ENABLED */ 
- 
-    if ((nthreads_icv > 0) 
-#if OMP_40_ENABLED 
-        || (proc_bind_icv != proc_bind_default) 
-#endif /* OMP_40_ENABLED */ 
-        ) { 
-        kmp_internal_control_t new_icvs; 
-        copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 
-        new_icvs.next = NULL; 
-        if (nthreads_icv > 0) { 
-            new_icvs.nproc = nthreads_icv; 
-        } 
- 
-#if OMP_40_ENABLED 
-        if (proc_bind_icv != proc_bind_default) { 
-            new_icvs.proc_bind = proc_bind_icv; 
-        } 
-#endif /* OMP_40_ENABLED */ 
- 
-        /* allocate a new parallel team */ 
-        KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); 
-        team = __kmp_allocate_team(root, nthreads, nthreads, 
-#if OMPT_SUPPORT 
-                                   ompt_parallel_id, 
-#endif 
-#if OMP_40_ENABLED 
-                                   proc_bind, 
-#endif 
-                                   &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); 
-    } else { 
-        /* allocate a new parallel team */ 
-        KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); 
-        team = __kmp_allocate_team(root, nthreads, nthreads, 
-#if OMPT_SUPPORT 
-                                   ompt_parallel_id, 
-#endif 
-#if OMP_40_ENABLED 
-                                   proc_bind, 
-#endif 
-                                   &master_th->th.th_current_task->td_icvs, argc 
-                                   USE_NESTED_HOT_ARG(master_th) ); 
-    } 
-    KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); 
- 
-    /* setup the new team */ 
-    team->t.t_master_tid = master_tid; 
-    team->t.t_master_this_cons = master_this_cons; 
-    team->t.t_ident      = loc; 
-    team->t.t_parent     = parent_team; 
-    TCW_SYNC_PTR(team->t.t_pkfn, microtask); 
-#if OMPT_SUPPORT 
-    TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task); 
-#endif 
-    team->t.t_invoke     = invoker;  /* TODO move this to root, maybe */ 
-    // TODO: parent_team->t.t_level == INT_MAX ??? 
-#if OMP_40_ENABLED 
-    if ( !master_th->th.th_teams_microtask || level > teams_level ) { 
-#endif /* OMP_40_ENABLED */ 
-        team->t.t_level        = parent_team->t.t_level + 1; 
-        team->t.t_active_level = parent_team->t.t_active_level + 1; 
-#if OMP_40_ENABLED 
-    } else { 
-        // AC: Do not increase parallel level at start of the teams construct 
-        team->t.t_level        = parent_team->t.t_level; 
-        team->t.t_active_level = parent_team->t.t_active_level; 
-    } 
-#endif /* OMP_40_ENABLED */ 
-    team->t.t_sched      = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule 
- 
-#if OMP_40_ENABLED 
-    team->t.t_cancel_request = cancel_noreq; 
-#endif 
- 
-    // Update the floating point rounding in the team if required. 
-    propagateFPControl(team); 
- 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        // Set master's task team to team's task team. Unless this is hot team, it should be NULL. 
-#if 0 
-        // Patch out an assertion that trips while the runtime seems to operate correctly. 
-        // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch. 
-        KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); 
-#endif 
-        KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", 
-                      __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, 
-                      parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); 
-        if (level) { 
-            // Take a memo of master's task_state 
-            KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 
-            if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size 
-                kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; 
-                kmp_uint8 *old_stack, *new_stack; 
-                kmp_uint32 i; 
-                new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 
-                for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) { 
-                    new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 
-                } 
-                for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack 
-                    new_stack[i] = 0; 
-                } 
-                old_stack = master_th->th.th_task_state_memo_stack; 
-                master_th->th.th_task_state_memo_stack = new_stack; 
-                master_th->th.th_task_state_stack_sz = new_size; 
-                __kmp_free(old_stack); 
-            } 
-            // Store master's task_state on stack 
-            master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; 
-            master_th->th.th_task_state_top++; 
-#if KMP_NESTED_HOT_TEAMS 
-            if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team 
-                master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; 
-            } 
-            else { 
-#endif 
-                master_th->th.th_task_state = 0; 
-#if KMP_NESTED_HOT_TEAMS 
-            } 
-#endif 
-        } 
-#if !KMP_NESTED_HOT_TEAMS 
-        KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); 
-#endif 
-    } 
- 
-    KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 
-                gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); 
-    KMP_DEBUG_ASSERT( team != root->r.r_hot_team || 
-                      ( team->t.t_master_tid == 0 && 
-                        ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); 
-    KMP_MB(); 
- 
-    /* now, setup the arguments */ 
-    argv = (void**)team->t.t_argv; 
-#if OMP_40_ENABLED 
-    if ( ap ) { 
-#endif /* OMP_40_ENABLED */ 
-        for ( i=argc-1; i >= 0; --i ) 
-// TODO: revert workaround for Intel(R) 64 tracker #96 
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 
-            *argv++ = va_arg( *ap, void * ); 
-#else 
-            *argv++ = va_arg( ap, void * ); 
-#endif 
-#if OMP_40_ENABLED 
-    } else { 
-        for ( i=0; i < argc; ++i ) 
-            // Get args from parent team for teams construct 
-            argv[i] = team->t.t_parent->t.t_argv[i]; 
-    } 
-#endif /* OMP_40_ENABLED */ 
- 
-    /* now actually fork the threads */ 
-    team->t.t_master_active = master_active; 
-    if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 
-        root->r.r_active = TRUE; 
- 
-    __kmp_fork_team_threads( root, team, master_th, gtid ); 
-    __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); 
- 
-#if OMPT_SUPPORT 
-    master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 
-#endif 
- 
-    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
- 
-#if USE_ITT_BUILD 
-    if ( team->t.t_active_level == 1 // only report frames at level 1 
-# if OMP_40_ENABLED 
-        && !master_th->th.th_teams_microtask // not in teams construct 
-# endif /* OMP_40_ENABLED */ 
-    ) { 
-#if USE_ITT_NOTIFY 
-        if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && 
-             ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) 
-        { 
-            kmp_uint64 tmp_time = 0; 
-            if ( __itt_get_timestamp_ptr ) 
-                tmp_time = __itt_get_timestamp(); 
-            // Internal fork - report frame begin 
-            master_th->th.th_frame_time  = tmp_time; 
-            if ( __kmp_forkjoin_frames_mode == 3 ) 
-                team->t.t_region_time = tmp_time; 
-        } else // only one notification scheme (either "submit" or "forking/joined", not both) 
-#endif /* USE_ITT_NOTIFY */ 
-        if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && 
-             __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) 
-        { // Mark start of "parallel" region for VTune. 
-            __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 
-        } 
-    } 
-#endif /* USE_ITT_BUILD */ 
- 
-    /* now go on and do the work */ 
-    KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); 
-    KMP_MB(); 
-    KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 
-                  root, team, master_th, gtid)); 
- 
-#if USE_ITT_BUILD 
-    if ( __itt_stack_caller_create_ptr ) { 
-        team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier 
-    } 
-#endif /* USE_ITT_BUILD */ 
- 
-#if OMP_40_ENABLED 
-    if ( ap )   // AC: skip __kmp_internal_fork at teams construct, let only master threads execute 
-#endif /* OMP_40_ENABLED */ 
-    { 
-        __kmp_internal_fork( loc, gtid, team ); 
-        KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", 
-                      root, team, master_th, gtid)); 
-    } 
- 
-    if (call_context == fork_context_gnu) { 
-        KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 
-        return TRUE; 
-    } 
- 
-    /* Invoke microtask for MASTER thread */ 
-    KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", 
-                gtid, team->t.t_id, team->t.t_pkfn ) ); 
-    }  // END of timer KMP_fork_call block 
- 
-    { 
-        KMP_TIME_BLOCK(OMP_work); 
-        // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke); 
-        if (! team->t.t_invoke( gtid )) { 
-            KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); 
-        } 
-    } 
-    KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", 
-        gtid, team->t.t_id, team->t.t_pkfn ) ); 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        master_th->th.ompt_thread_info.state = ompt_state_overhead; 
-    } 
-#endif 
- 
-    return TRUE; 
-} 
- 
-#if OMPT_SUPPORT 
-static inline void 
-__kmp_join_restore_state( 
-    kmp_info_t *thread, 
-    kmp_team_t *team) 
-{ 
-    // restore state outside the region 
-    thread->th.ompt_thread_info.state = ((team->t.t_serialized) ? 
-        ompt_state_work_serial : ompt_state_work_parallel); 
-} 
- 
-static inline void 
-__kmp_join_ompt( 
-    kmp_info_t *thread, 
-    kmp_team_t *team, 
-    ompt_parallel_id_t parallel_id, 
-    fork_context_e fork_context) 
-{ 
-    if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 
-        ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 
-        ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 
-            parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); 
-    } 
- 
-    __kmp_join_restore_state(thread,team); 
-} 
-#endif 
- 
-void 
-__kmp_join_call(ident_t *loc, int gtid 
-#if OMPT_SUPPORT 
-               , enum fork_context_e fork_context 
-#endif 
-#if OMP_40_ENABLED 
-               , int exit_teams 
-#endif /* OMP_40_ENABLED */ 
-) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_join_call); 
-    kmp_team_t     *team; 
-    kmp_team_t     *parent_team; 
-    kmp_info_t     *master_th; 
-    kmp_root_t     *root; 
-    int             master_active; 
-    int             i; 
- 
-    KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); 
- 
-    /* setup current data */ 
-    master_th     = __kmp_threads[ gtid ]; 
-    root          = master_th->th.th_root; 
-    team          = master_th->th.th_team; 
-    parent_team   = team->t.t_parent; 
- 
-    master_th->th.th_ident = loc; 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        master_th->th.ompt_thread_info.state = ompt_state_overhead; 
-    } 
-#endif 
- 
-#if KMP_DEBUG 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", 
-                         __kmp_gtid_from_thread( master_th ), team, 
-                         team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); 
-        KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); 
-    } 
-#endif 
- 
-    if( team->t.t_serialized ) { 
-#if OMP_40_ENABLED 
-        if ( master_th->th.th_teams_microtask ) { 
-            // We are in teams construct 
-            int level = team->t.t_level; 
-            int tlevel = master_th->th.th_teams_level; 
-            if ( level == tlevel ) { 
-                // AC: we haven't incremented it earlier at start of teams construct, 
-                //     so do it here - at the end of teams construct 
-                team->t.t_level++; 
-            } else if ( level == tlevel + 1 ) { 
-                // AC: we are exiting parallel inside teams, need to increment serialization 
-                //     in order to restore it in the next call to __kmpc_end_serialized_parallel 
-                team->t.t_serialized++; 
-            } 
-        } 
-#endif /* OMP_40_ENABLED */ 
-        __kmpc_end_serialized_parallel( loc, gtid ); 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-            __kmp_join_restore_state(master_th, parent_team); 
-        } 
-#endif 
- 
-        return; 
-    } 
- 
-    master_active = team->t.t_master_active; 
- 
-#if OMP_40_ENABLED 
-    if (!exit_teams) 
-#endif /* OMP_40_ENABLED */ 
-    { 
-        // AC: No barrier for internal teams at exit from teams construct. 
-        //     But there is barrier for external team (league). 
-        __kmp_internal_join( loc, gtid, team ); 
-    } 
-#if OMP_40_ENABLED 
-    else { 
-        master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) 
-    } 
-#endif /* OMP_40_ENABLED */ 
- 
-    KMP_MB(); 
- 
-#if OMPT_SUPPORT 
-    ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; 
-#endif 
- 
-#if USE_ITT_BUILD 
-    if ( __itt_stack_caller_create_ptr ) { 
-        __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier 
-    } 
- 
-    // Mark end of "parallel" region for VTune. 
-    if ( team->t.t_active_level == 1 
-# if OMP_40_ENABLED 
-        && !master_th->th.th_teams_microtask /* not in teams construct */ 
-# endif /* OMP_40_ENABLED */ 
-    ) { 
-        master_th->th.th_ident = loc; 
-        // only one notification scheme (either "submit" or "forking/joined", not both) 
-        if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) 
-            __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 
-                                    0, loc, master_th->th.th_team_nproc, 1 ); 
-        else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && 
-            ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) 
-            __kmp_itt_region_joined( gtid ); 
-    } // active_level == 1 
-#endif /* USE_ITT_BUILD */ 
- 
-#if OMP_40_ENABLED 
-    if ( master_th->th.th_teams_microtask && 
-         !exit_teams && 
-         team->t.t_pkfn != (microtask_t)__kmp_teams_master && 
-         team->t.t_level == master_th->th.th_teams_level + 1 ) { 
-        // AC: We need to leave the team structure intact at the end 
-        //     of parallel inside the teams construct, so that at the next 
-        //     parallel same (hot) team works, only adjust nesting levels 
- 
-        /* Decrement our nested depth level */ 
-        team->t.t_level --; 
-        team->t.t_active_level --; 
-        KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); 
- 
-        /* Restore number of threads in the team if needed */ 
-        if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { 
-            int old_num = master_th->th.th_team_nproc; 
-            int new_num = master_th->th.th_teams_size.nth; 
-            kmp_info_t **other_threads = team->t.t_threads; 
-            team->t.t_nproc = new_num; 
-            for ( i = 0; i < old_num; ++i ) { 
-                other_threads[i]->th.th_team_nproc = new_num; 
-            } 
-            // Adjust states of non-used threads of the team 
-            for ( i = old_num; i < new_num; ++i ) { 
-                // Re-initialize thread's barrier data. 
-                int b; 
-                kmp_balign_t * balign = other_threads[i]->th.th_bar; 
-                for ( b = 0; b < bs_last_barrier; ++ b ) { 
-                    balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived; 
-                    KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 
-#if USE_DEBUGGER 
-                    balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 
-#endif 
-                } 
-                if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-                    // Synchronize thread's task state 
-                    other_threads[i]->th.th_task_state = master_th->th.th_task_state; 
-                } 
-            } 
-        } 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-            __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 
-        } 
-#endif 
- 
-        return; 
-    } 
-#endif /* OMP_40_ENABLED */ 
- 
-    /* do cleanup and restore the parent team */ 
-    master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; 
-    master_th->th.th_local.this_construct = team->t.t_master_this_cons; 
- 
-    master_th->th.th_dispatch = 
-                & parent_team->t.t_dispatch[ team->t.t_master_tid ]; 
- 
-    /* jc: The following lock has instructions with REL and ACQ semantics, 
-       separating the parallel user code called in this parallel region 
-       from the serial user code called after this function returns. 
-    */ 
-    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-#if OMP_40_ENABLED 
-    if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) 
-#endif /* OMP_40_ENABLED */ 
-    { 
-        /* Decrement our nested depth level */ 
-        KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); 
-    } 
-    KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); 
- 
-    KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 
-                   0, master_th, team ) ); 
-    __kmp_pop_current_task_from_thread( master_th ); 
- 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-    // 
-    // Restore master thread's partition. 
-    // 
-    master_th->th.th_first_place = team->t.t_first_place; 
-    master_th->th.th_last_place = team->t.t_last_place; 
-#endif /* OMP_40_ENABLED */ 
- 
-    updateHWFPControl (team); 
- 
-    if ( root->r.r_active != master_active ) 
-        root->r.r_active = master_active; 
- 
-    __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads 
- 
-    /* this race was fun to find.  make sure the following is in the critical 
-     * region otherwise assertions may fail occasionally since the old team 
-     * may be reallocated and the hierarchy appears inconsistent.  it is 
-     * actually safe to run and won't cause any bugs, but will cause those 
-     * assertion failures.  it's only one deref&assign so might as well put this 
-     * in the critical region */ 
-    master_th->th.th_team        =   parent_team; 
-    master_th->th.th_team_nproc  =   parent_team->t.t_nproc; 
-    master_th->th.th_team_master =   parent_team->t.t_threads[0]; 
-    master_th->th.th_team_serialized = parent_team->t.t_serialized; 
- 
-    /* restore serialized team, if need be */ 
-    if( parent_team->t.t_serialized && 
-        parent_team != master_th->th.th_serial_team && 
-        parent_team != root->r.r_root_team ) { 
-            __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); 
-            master_th->th.th_serial_team = parent_team; 
-    } 
- 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack 
-            KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 
-            // Remember master's state if we re-use this nested hot team 
-            master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; 
-            --master_th->th.th_task_state_top; // pop 
-            // Now restore state at this level 
-            master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; 
-        } 
-        // Copy the task team from the parent team to the master thread 
-        master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; 
-        KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", 
-                        __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); 
-    } 
- 
-     // TODO: GEH - cannot do this assertion because root thread not set up as executing 
-     // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 
-     master_th->th.th_current_task->td_flags.executing = 1; 
- 
-    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 
-    } 
-#endif 
- 
-    KMP_MB(); 
-    KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* Check whether we should push an internal control record onto the 
-   serial team stack.  If so, do it.  */ 
-void 
-__kmp_save_internal_controls ( kmp_info_t * thread ) 
-{ 
- 
-    if ( thread->th.th_team != thread->th.th_serial_team ) { 
-        return; 
-    } 
-    if (thread->th.th_team->t.t_serialized > 1) { 
-        int push = 0; 
- 
-        if (thread->th.th_team->t.t_control_stack_top == NULL) { 
-            push = 1; 
-        } else { 
-            if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 
-                 thread->th.th_team->t.t_serialized ) { 
-                push = 1; 
-            } 
-        } 
-        if (push) {  /* push a record on the serial team's stack */ 
-            kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); 
- 
-            copy_icvs( control, & thread->th.th_current_task->td_icvs ); 
- 
-            control->serial_nesting_level = thread->th.th_team->t.t_serialized; 
- 
-            control->next = thread->th.th_team->t.t_control_stack_top; 
-            thread->th.th_team->t.t_control_stack_top = control; 
-        } 
-    } 
-} 
- 
-/* Changes set_nproc */ 
-void 
-__kmp_set_num_threads( int new_nth, int gtid ) 
-{ 
-    kmp_info_t *thread; 
-    kmp_root_t *root; 
- 
-    KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    if (new_nth < 1) 
-        new_nth = 1; 
-    else if (new_nth > __kmp_max_nth) 
-        new_nth = __kmp_max_nth; 
- 
-    KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 
-    thread = __kmp_threads[gtid]; 
- 
-    __kmp_save_internal_controls( thread ); 
- 
-    set__nproc( thread, new_nth ); 
- 
-    // 
-    // If this omp_set_num_threads() call will cause the hot team size to be 
-    // reduced (in the absence of a num_threads clause), then reduce it now, 
-    // rather than waiting for the next parallel region. 
-    // 
-    root = thread->th.th_root; 
-    if ( __kmp_init_parallel && ( ! root->r.r_active ) 
-      && ( root->r.r_hot_team->t.t_nproc > new_nth ) 
-#if KMP_NESTED_HOT_TEAMS 
-      && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 
-#endif 
-    ) { 
-        kmp_team_t *hot_team = root->r.r_hot_team; 
-        int f; 
- 
-        __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
- 
-        // Release the extra threads we don't need any more. 
-        for ( f = new_nth;  f < hot_team->t.t_nproc; f++ ) { 
-            KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); 
-            if ( __kmp_tasking_mode != tskm_immediate_exec) { 
-                // When decreasing team size, threads no longer in the team should unref task team. 
-                hot_team->t.t_threads[f]->th.th_task_team = NULL; 
-            } 
-            __kmp_free_thread( hot_team->t.t_threads[f] ); 
-            hot_team->t.t_threads[f] =  NULL; 
-        } 
-        hot_team->t.t_nproc = new_nth; 
-#if KMP_NESTED_HOT_TEAMS 
-        if( thread->th.th_hot_teams ) { 
-            KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); 
-            thread->th.th_hot_teams[0].hot_team_nth = new_nth; 
-        } 
-#endif 
- 
- 
-        __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-        // 
-        // Update the t_nproc field in the threads that are still active. 
-        // 
-        for( f=0 ; f < new_nth; f++ ) { 
-            KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); 
-            hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 
-        } 
-        // Special flag in case omp_set_num_threads() call 
-        hot_team->t.t_size_changed = -1; 
-    } 
-} 
- 
-/* Changes max_active_levels */ 
-void 
-__kmp_set_max_active_levels( int gtid, int max_active_levels ) 
-{ 
-    kmp_info_t *thread; 
- 
-    KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    // validate max_active_levels 
-    if( max_active_levels < 0 ) { 
-        KMP_WARNING( ActiveLevelsNegative, max_active_levels ); 
-        // We ignore this call if the user has specified a negative value. 
-        // The current setting won't be changed. The last valid setting will be used. 
-        // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). 
-        KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 
-        return; 
-    } 
-    if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { 
-        // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] 
-        // We allow a zero value. (implementation defined behavior) 
-    } else { 
-        KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT  ); 
-        max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 
-        // Current upper limit is MAX_INT. (implementation defined behavior) 
-        // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) 
-        // Actually, the flow should never get here until we use MAX_INT limit. 
-    } 
-    KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 
- 
-    thread = __kmp_threads[ gtid ]; 
- 
-    __kmp_save_internal_controls( thread ); 
- 
-    set__max_active_levels( thread, max_active_levels ); 
- 
-} 
- 
-/* Gets max_active_levels */ 
-int 
-__kmp_get_max_active_levels( int gtid ) 
-{ 
-    kmp_info_t *thread; 
- 
-    KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    thread = __kmp_threads[ gtid ]; 
-    KMP_DEBUG_ASSERT( thread->th.th_current_task ); 
-    KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", 
-        gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); 
-    return thread->th.th_current_task->td_icvs.max_active_levels; 
-} 
- 
-/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 
-void 
-__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) 
-{ 
-    kmp_info_t *thread; 
-//    kmp_team_t *team; 
- 
-    KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    // Check if the kind parameter is valid, correct if needed. 
-    // Valid parameters should fit in one of two intervals - standard or extended: 
-    //       <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 
-    // 2008-01-25: 0,  1 - 4,       5,         100,     101 - 102, 103 
-    if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || 
-       ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) 
-    { 
-        // TODO: Hint needs attention in case we change the default schedule. 
-        __kmp_msg( 
-            kmp_ms_warning, 
-            KMP_MSG( ScheduleKindOutOfRange, kind ), 
-            KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), 
-            __kmp_msg_null 
-        ); 
-        kind = kmp_sched_default; 
-        chunk = 0;         // ignore chunk value in case of bad kind 
-    } 
- 
-    thread = __kmp_threads[ gtid ]; 
- 
-    __kmp_save_internal_controls( thread ); 
- 
-    if ( kind < kmp_sched_upper_std ) { 
-        if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { 
-            // differ static chunked vs. unchunked: 
-            // chunk should be invalid to indicate unchunked schedule (which is the default) 
-            thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 
-        } else { 
-            thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; 
-        } 
-    } else { 
-        //    __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; 
-        thread->th.th_current_task->td_icvs.sched.r_sched_type = 
-            __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; 
-    } 
-    if ( kind == kmp_sched_auto ) { 
-        // ignore parameter chunk for schedule auto 
-        thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 
-    } else { 
-        thread->th.th_current_task->td_icvs.sched.chunk = chunk; 
-    } 
-} 
- 
-/* Gets def_sched_var ICV values */ 
-void 
-__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) 
-{ 
-    kmp_info_t     *thread; 
-    enum sched_type th_type; 
- 
-    KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    thread = __kmp_threads[ gtid ]; 
- 
-    //th_type = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type; 
-    th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 
- 
-    switch ( th_type ) { 
-    case kmp_sch_static: 
-    case kmp_sch_static_greedy: 
-    case kmp_sch_static_balanced: 
-        *kind = kmp_sched_static; 
-        *chunk = 0;   // chunk was not set, try to show this fact via zero value 
-        return; 
-    case kmp_sch_static_chunked: 
-        *kind = kmp_sched_static; 
-        break; 
-    case kmp_sch_dynamic_chunked: 
-        *kind = kmp_sched_dynamic; 
-        break; 
-    case kmp_sch_guided_chunked: 
-    case kmp_sch_guided_iterative_chunked: 
-    case kmp_sch_guided_analytical_chunked: 
-        *kind = kmp_sched_guided; 
-        break; 
-    case kmp_sch_auto: 
-        *kind = kmp_sched_auto; 
-        break; 
-    case kmp_sch_trapezoidal: 
-        *kind = kmp_sched_trapezoidal; 
-        break; 
-/* 
-    case kmp_sch_static_steal: 
-        *kind = kmp_sched_static_steal; 
-        break; 
-*/ 
-    default: 
-        KMP_FATAL( UnknownSchedulingType, th_type ); 
-    } 
- 
-    //*chunk = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk; 
-    *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 
-} 
- 
-int 
-__kmp_get_ancestor_thread_num( int gtid, int level ) { 
- 
-    int ii, dd; 
-    kmp_team_t *team; 
-    kmp_info_t *thr; 
- 
-    KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    // validate level 
-    if( level == 0 ) return 0; 
-    if( level < 0 ) return -1; 
-    thr = __kmp_threads[ gtid ]; 
-    team = thr->th.th_team; 
-    ii = team->t.t_level; 
-    if( level > ii ) return -1; 
- 
-#if OMP_40_ENABLED 
-    if( thr->th.th_teams_microtask ) { 
-        // AC: we are in teams region where multiple nested teams have same level 
-        int tlevel = thr->th.th_teams_level; // the level of the teams construct 
-        if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) 
-            KMP_DEBUG_ASSERT( ii >= tlevel ); 
-            // AC: As we need to pass by the teams league, we need to artificially increase ii 
-            if ( ii == tlevel ) { 
-                ii += 2; // three teams have same level 
-            } else { 
-                ii ++;   // two teams have same level 
-            } 
-        } 
-    } 
-#endif 
- 
-    if( ii == level ) return __kmp_tid_from_gtid( gtid ); 
- 
-    dd = team->t.t_serialized; 
-    level++; 
-    while( ii > level ) 
-    { 
-        for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 
-        { 
-        } 
-        if( ( team->t.t_serialized ) && ( !dd ) ) { 
-            team = team->t.t_parent; 
-            continue; 
-        } 
-        if( ii > level ) { 
-            team = team->t.t_parent; 
-            dd = team->t.t_serialized; 
-            ii--; 
-        } 
-    } 
- 
-    return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); 
-} 
- 
-int 
-__kmp_get_team_size( int gtid, int level ) { 
- 
-    int ii, dd; 
-    kmp_team_t *team; 
-    kmp_info_t *thr; 
- 
-    KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
- 
-    // validate level 
-    if( level == 0 ) return 1; 
-    if( level < 0 ) return -1; 
-    thr = __kmp_threads[ gtid ]; 
-    team = thr->th.th_team; 
-    ii = team->t.t_level; 
-    if( level > ii ) return -1; 
- 
-#if OMP_40_ENABLED 
-    if( thr->th.th_teams_microtask ) { 
-        // AC: we are in teams region where multiple nested teams have same level 
-        int tlevel = thr->th.th_teams_level; // the level of the teams construct 
-        if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) 
-            KMP_DEBUG_ASSERT( ii >= tlevel ); 
-            // AC: As we need to pass by the teams league, we need to artificially increase ii 
-            if ( ii == tlevel ) { 
-                ii += 2; // three teams have same level 
-            } else { 
-                ii ++;   // two teams have same level 
-            } 
-        } 
-    } 
-#endif 
- 
-    while( ii > level ) 
-    { 
-        for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 
-        { 
-        } 
-        if( team->t.t_serialized && ( !dd ) ) { 
-            team = team->t.t_parent; 
-            continue; 
-        } 
-        if( ii > level ) { 
-            team = team->t.t_parent; 
-            ii--; 
-        } 
-    } 
- 
-    return team->t.t_nproc; 
-} 
- 
-kmp_r_sched_t 
-__kmp_get_schedule_global() { 
-// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) 
-// may be changed by kmp_set_defaults independently. So one can get the updated schedule here. 
- 
-    kmp_r_sched_t r_sched; 
- 
-    // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided 
-    // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, 
-    // and thus have different run-time schedules in different roots (even in OMP 2.5) 
-    if ( __kmp_sched == kmp_sch_static ) { 
-        r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) 
-    } else if ( __kmp_sched == kmp_sch_guided_chunked ) { 
-        r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) 
-    } else { 
-        r_sched.r_sched_type = __kmp_sched;  // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 
-    } 
- 
-    if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) 
-        r_sched.chunk = KMP_DEFAULT_CHUNK; 
-    } else { 
-        r_sched.chunk = __kmp_chunk; 
-    } 
- 
-    return r_sched; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-/* 
- * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 
- * at least argc number of *t_argv entries for the requested team. 
- */ 
-static void 
-__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) 
-{ 
- 
-    KMP_DEBUG_ASSERT( team ); 
-    if( !realloc || argc > team->t.t_max_argc ) { 
- 
-        KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", 
-                         team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); 
-        /* if previously allocated heap space for args, free them */ 
-        if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) 
-            __kmp_free( (void *) team->t.t_argv ); 
- 
-        if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { 
-            /* use unused space in the cache line for arguments */ 
-            team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 
-            KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", 
-                             team->t.t_id, team->t.t_max_argc )); 
-            team->t.t_argv = &team->t.t_inline_argv[0]; 
-            if ( __kmp_storage_map ) { 
-                __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], 
-                                         &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 
-                                         (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), 
-                                         "team_%d.t_inline_argv", 
-                                         team->t.t_id ); 
-            } 
-        } else { 
-            /* allocate space for arguments in the heap */ 
-            team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? 
-                                     KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; 
-            KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", 
-                             team->t.t_id, team->t.t_max_argc )); 
-            team->t.t_argv     = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); 
-            if ( __kmp_storage_map ) { 
-                __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], 
-                                         sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", 
-                                         team->t.t_id ); 
-            } 
-        } 
-    } 
-} 
- 
-static void 
-__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) 
-{ 
-    int i; 
-    int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2; 
-#if KMP_USE_POOLED_ALLOC 
-    // AC: TODO: fix bug here: size of t_disp_buffer should not be multiplied by max_nth! 
-    char *ptr = __kmp_allocate(max_nth * 
-                            ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*num_disp_buf 
-                               + sizeof(kmp_disp_t) + sizeof(int)*6 
-                               //+ sizeof(int) 
-                               + sizeof(kmp_r_sched_t) 
-                               + sizeof(kmp_taskdata_t) ) ); 
- 
-    team->t.t_threads          = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth; 
-    team->t.t_disp_buffer      = (dispatch_shared_info_t*) ptr; 
-                                   ptr += sizeof(dispatch_shared_info_t) * num_disp_buff; 
-    team->t.t_dispatch         = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth; 
-    team->t.t_set_nproc        = (int*) ptr; ptr += sizeof(int) * max_nth; 
-    team->t.t_set_dynamic      = (int*) ptr; ptr += sizeof(int) * max_nth; 
-    team->t.t_set_nested       = (int*) ptr; ptr += sizeof(int) * max_nth; 
-    team->t.t_set_blocktime    = (int*) ptr; ptr += sizeof(int) * max_nth; 
-    team->t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth; 
-    team->t.t_set_bt_set       = (int*) ptr; 
-    ptr += sizeof(int) * max_nth; 
-    //team->t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth; 
-    team->t.t_set_sched        = (kmp_r_sched_t*) ptr; 
-    ptr += sizeof(kmp_r_sched_t) * max_nth; 
-    team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr; 
-    ptr += sizeof(kmp_taskdata_t) * max_nth; 
-#else 
- 
-    team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); 
-    team->t.t_disp_buffer = (dispatch_shared_info_t*) 
-        __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); 
-    team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); 
-    //team->t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth ); 
-    //team->t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth ); 
-    team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); 
-#endif 
-    team->t.t_max_nproc = max_nth; 
- 
-    /* setup dispatch buffers */ 
-    for(i = 0 ; i < num_disp_buff; ++i) 
-        team->t.t_disp_buffer[i].buffer_index = i; 
-} 
- 
-static void 
-__kmp_free_team_arrays(kmp_team_t *team) { 
-    /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 
-    int i; 
-    for ( i = 0; i < team->t.t_max_nproc; ++ i ) { 
-        if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { 
-            __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); 
-            team->t.t_dispatch[ i ].th_disp_buffer = NULL; 
-        }; // if 
-    }; // for 
-    __kmp_free(team->t.t_threads); 
-    #if !KMP_USE_POOLED_ALLOC 
-        __kmp_free(team->t.t_disp_buffer); 
-        __kmp_free(team->t.t_dispatch); 
-        //__kmp_free(team->t.t_set_max_active_levels); 
-        //__kmp_free(team->t.t_set_sched); 
-        __kmp_free(team->t.t_implicit_task_taskdata); 
-    #endif 
-    team->t.t_threads     = NULL; 
-    team->t.t_disp_buffer = NULL; 
-    team->t.t_dispatch    = NULL; 
-    //team->t.t_set_sched   = 0; 
-    //team->t.t_set_max_active_levels = 0; 
-    team->t.t_implicit_task_taskdata = 0; 
-} 
- 
-static void 
-__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 
-    kmp_info_t **oldThreads = team->t.t_threads; 
- 
-    #if !KMP_USE_POOLED_ALLOC 
-        __kmp_free(team->t.t_disp_buffer); 
-        __kmp_free(team->t.t_dispatch); 
-        //__kmp_free(team->t.t_set_max_active_levels); 
-        //__kmp_free(team->t.t_set_sched); 
-        __kmp_free(team->t.t_implicit_task_taskdata); 
-    #endif 
-    __kmp_allocate_team_arrays(team, max_nth); 
- 
-    KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); 
- 
-    __kmp_free(oldThreads); 
-} 
- 
-static kmp_internal_control_t 
-__kmp_get_global_icvs( void ) { 
- 
-    kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals 
- 
-#if OMP_40_ENABLED 
-    KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); 
-#endif /* OMP_40_ENABLED */ 
- 
-    kmp_internal_control_t g_icvs = { 
-      0,                            //int serial_nesting_level; //corresponds to the value of the th_team_serialized field 
-      (kmp_int8)__kmp_dflt_nested,            //int nested;               //internal control for nested parallelism (per thread) 
-      (kmp_int8)__kmp_global.g.g_dynamic,                                 //internal control for dynamic adjustment of threads (per thread) 
-      (kmp_int8)__kmp_env_blocktime,          //int bt_set;               //internal control for whether blocktime is explicitly set 
-      __kmp_dflt_blocktime,         //int blocktime;            //internal control for blocktime 
-      __kmp_bt_intervals,           //int bt_intervals;         //internal control for blocktime intervals 
-      __kmp_dflt_team_nth,          //int nproc;                //internal control for # of threads for next parallel region (per thread) 
-                                    // (use a max ub on value if __kmp_parallel_initialize not called yet) 
-      __kmp_dflt_max_active_levels, //int max_active_levels;    //internal control for max_active_levels 
-      r_sched,                      //kmp_r_sched_t sched;      //internal control for runtime schedule {sched,chunk} pair 
-#if OMP_40_ENABLED 
-      __kmp_nested_proc_bind.bind_types[0], 
-#endif /* OMP_40_ENABLED */ 
-      NULL                          //struct kmp_internal_control *next; 
-    }; 
- 
-    return g_icvs; 
-} 
- 
-static kmp_internal_control_t 
-__kmp_get_x_global_icvs( const kmp_team_t *team ) { 
- 
-    kmp_internal_control_t gx_icvs; 
-    gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls 
-    copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); 
-    gx_icvs.next = NULL; 
- 
-    return gx_icvs; 
-} 
- 
-static void 
-__kmp_initialize_root( kmp_root_t *root ) 
-{ 
-    int           f; 
-    kmp_team_t   *root_team; 
-    kmp_team_t   *hot_team; 
-    int           hot_team_max_nth; 
-    kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals 
-    kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 
-    KMP_DEBUG_ASSERT( root ); 
-    KMP_ASSERT( ! root->r.r_begin ); 
- 
-    /* setup the root state structure */ 
-    __kmp_init_lock( &root->r.r_begin_lock ); 
-    root->r.r_begin        = FALSE; 
-    root->r.r_active       = FALSE; 
-    root->r.r_in_parallel  = 0; 
-    root->r.r_blocktime    = __kmp_dflt_blocktime; 
-    root->r.r_nested       = __kmp_dflt_nested; 
- 
-    /* setup the root team for this task */ 
-    /* allocate the root team structure */ 
-    KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); 
- 
-    root_team = 
-        __kmp_allocate_team( 
-            root, 
-            1,                                                         // new_nproc 
-            1,                                                         // max_nproc 
-#if OMPT_SUPPORT 
-            0, // root parallel id 
-#endif 
-#if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0], 
-#endif 
-            &r_icvs, 
-            0                                                          // argc 
-            USE_NESTED_HOT_ARG(NULL)                                   // master thread is unknown 
-        ); 
-#if USE_DEBUGGER 
-    // Non-NULL value should be assigned to make the debugger display the root team. 
-    TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 )); 
-#endif 
- 
-    KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); 
- 
-    root->r.r_root_team = root_team; 
-    root_team->t.t_control_stack_top = NULL; 
- 
-    /* initialize root team */ 
-    root_team->t.t_threads[0] = NULL; 
-    root_team->t.t_nproc      = 1; 
-    root_team->t.t_serialized = 1; 
-    // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 
-    root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 
-    root_team->t.t_sched.chunk        = r_sched.chunk; 
-    KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 
-                    root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 
- 
-    /* setup the  hot team for this task */ 
-    /* allocate the hot team structure */ 
-    KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); 
- 
-    hot_team = 
-        __kmp_allocate_team( 
-            root, 
-            1,                                                         // new_nproc 
-            __kmp_dflt_team_nth_ub * 2,                                // max_nproc 
-#if OMPT_SUPPORT 
-            0, // root parallel id 
-#endif 
-#if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0], 
-#endif 
-            &r_icvs, 
-            0                                                          // argc 
-            USE_NESTED_HOT_ARG(NULL)                                   // master thread is unknown 
-        ); 
-    KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); 
- 
-    root->r.r_hot_team = hot_team; 
-    root_team->t.t_control_stack_top = NULL; 
- 
-    /* first-time initialization */ 
-    hot_team->t.t_parent = root_team; 
- 
-    /* initialize hot team */ 
-    hot_team_max_nth = hot_team->t.t_max_nproc; 
-    for ( f = 0; f < hot_team_max_nth; ++ f ) { 
-        hot_team->t.t_threads[ f ] = NULL; 
-    }; // for 
-    hot_team->t.t_nproc = 1; 
-    // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 
-    hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 
-    hot_team->t.t_sched.chunk        = r_sched.chunk; 
-    hot_team->t.t_size_changed = 0; 
-} 
- 
-#ifdef KMP_DEBUG 
- 
- 
-typedef struct kmp_team_list_item { 
-    kmp_team_p const *           entry; 
-    struct kmp_team_list_item *  next; 
-} kmp_team_list_item_t; 
-typedef kmp_team_list_item_t * kmp_team_list_t; 
- 
- 
-static void 
-__kmp_print_structure_team_accum(    // Add team to list of teams. 
-    kmp_team_list_t     list,        // List of teams. 
-    kmp_team_p const *  team         // Team to add. 
-) { 
- 
-    // List must terminate with item where both entry and next are NULL. 
-    // Team is added to the list only once. 
-    // List is sorted in ascending order by team id. 
-    // Team id is *not* a key. 
- 
-    kmp_team_list_t l; 
- 
-    KMP_DEBUG_ASSERT( list != NULL ); 
-    if ( team == NULL ) { 
-        return; 
-    }; // if 
- 
-    __kmp_print_structure_team_accum( list, team->t.t_parent ); 
-    __kmp_print_structure_team_accum( list, team->t.t_next_pool ); 
- 
-    // Search list for the team. 
-    l = list; 
-    while ( l->next != NULL && l->entry != team ) { 
-        l = l->next; 
-    }; // while 
-    if ( l->next != NULL ) { 
-        return;  // Team has been added before, exit. 
-    }; // if 
- 
-    // Team is not found. Search list again for insertion point. 
-    l = list; 
-    while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) { 
-        l = l->next; 
-    }; // while 
- 
-    // Insert team. 
-    { 
-        kmp_team_list_item_t * item = 
-            (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof(  kmp_team_list_item_t ) ); 
-        * item = * l; 
-        l->entry = team; 
-        l->next  = item; 
-    } 
- 
-} 
- 
-static void 
-__kmp_print_structure_team( 
-    char const *       title, 
-    kmp_team_p const * team 
- 
-) { 
-    __kmp_printf( "%s", title ); 
-    if ( team != NULL ) { 
-        __kmp_printf( "%2x %p\n", team->t.t_id, team ); 
-    } else { 
-        __kmp_printf( " - (nil)\n" ); 
-    }; // if 
-} 
- 
-static void 
-__kmp_print_structure_thread( 
-    char const *       title, 
-    kmp_info_p const * thread 
- 
-) { 
-    __kmp_printf( "%s", title ); 
-    if ( thread != NULL ) { 
-        __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread ); 
-    } else { 
-        __kmp_printf( " - (nil)\n" ); 
-    }; // if 
-} 
- 
-void 
-__kmp_print_structure( 
-    void 
-) { 
- 
-    kmp_team_list_t list; 
- 
-    // Initialize list of teams. 
-    list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); 
-    list->entry = NULL; 
-    list->next  = NULL; 
- 
-    __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" ); 
-    { 
-        int gtid; 
-        for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 
-            __kmp_printf( "%2d", gtid ); 
-            if ( __kmp_threads != NULL ) { 
-                __kmp_printf( " %p", __kmp_threads[ gtid ] ); 
-            }; // if 
-            if ( __kmp_root != NULL ) { 
-                __kmp_printf( " %p", __kmp_root[ gtid ] ); 
-            }; // if 
-            __kmp_printf( "\n" ); 
-        }; // for gtid 
-    } 
- 
-    // Print out __kmp_threads array. 
-    __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" ); 
-    if ( __kmp_threads != NULL ) { 
-        int gtid; 
-        for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 
-            kmp_info_t const * thread = __kmp_threads[ gtid ]; 
-            if ( thread != NULL ) { 
-                __kmp_printf( "GTID %2d %p:\n", gtid, thread ); 
-                __kmp_printf(                 "    Our Root:        %p\n", thread->th.th_root ); 
-                __kmp_print_structure_team(   "    Our Team:     ",        thread->th.th_team ); 
-                __kmp_print_structure_team(   "    Serial Team:  ",        thread->th.th_serial_team ); 
-                __kmp_printf(                 "    Threads:      %2d\n",   thread->th.th_team_nproc ); 
-                __kmp_print_structure_thread( "    Master:       ",        thread->th.th_team_master ); 
-                __kmp_printf(                 "    Serialized?:  %2d\n",   thread->th.th_team_serialized ); 
-                __kmp_printf(                 "    Set NProc:    %2d\n",   thread->th.th_set_nproc ); 
-#if OMP_40_ENABLED 
-                __kmp_printf(                 "    Set Proc Bind: %2d\n",  thread->th.th_set_proc_bind ); 
-#endif 
-                __kmp_print_structure_thread( "    Next in pool: ",        thread->th.th_next_pool ); 
-                __kmp_printf( "\n" ); 
-                __kmp_print_structure_team_accum( list, thread->th.th_team ); 
-                __kmp_print_structure_team_accum( list, thread->th.th_serial_team ); 
-            }; // if 
-        }; // for gtid 
-    } else { 
-        __kmp_printf( "Threads array is not allocated.\n" ); 
-    }; // if 
- 
-    // Print out __kmp_root array. 
-    __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" ); 
-    if ( __kmp_root != NULL ) { 
-        int gtid; 
-        for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 
-            kmp_root_t const * root = __kmp_root[ gtid ]; 
-            if ( root != NULL ) { 
-                __kmp_printf( "GTID %2d %p:\n", gtid, root ); 
-                __kmp_print_structure_team(   "    Root Team:    ",      root->r.r_root_team ); 
-                __kmp_print_structure_team(   "    Hot Team:     ",      root->r.r_hot_team ); 
-                __kmp_print_structure_thread( "    Uber Thread:  ",      root->r.r_uber_thread ); 
-                __kmp_printf(                 "    Active?:      %2d\n", root->r.r_active ); 
-                __kmp_printf(                 "    Nested?:      %2d\n", root->r.r_nested ); 
-                __kmp_printf(                 "    In Parallel:  %2d\n", root->r.r_in_parallel ); 
-                __kmp_printf( "\n" ); 
-                __kmp_print_structure_team_accum( list, root->r.r_root_team ); 
-                __kmp_print_structure_team_accum( list, root->r.r_hot_team ); 
-            }; // if 
-        }; // for gtid 
-    } else { 
-        __kmp_printf( "Ubers array is not allocated.\n" ); 
-    }; // if 
- 
-    __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" ); 
-    while ( list->next != NULL ) { 
-        kmp_team_p const * team = list->entry; 
-        int i; 
-        __kmp_printf( "Team %2x %p:\n", team->t.t_id, team ); 
-        __kmp_print_structure_team( "    Parent Team:      ",      team->t.t_parent ); 
-        __kmp_printf(               "    Master TID:       %2d\n", team->t.t_master_tid ); 
-        __kmp_printf(               "    Max threads:      %2d\n", team->t.t_max_nproc ); 
-        __kmp_printf(               "    Levels of serial: %2d\n", team->t.t_serialized ); 
-        __kmp_printf(               "    Number threads:   %2d\n", team->t.t_nproc ); 
-        for ( i = 0; i < team->t.t_nproc; ++ i ) { 
-            __kmp_printf(           "    Thread %2d:      ", i ); 
-            __kmp_print_structure_thread( "", team->t.t_threads[ i ] ); 
-        }; // for i 
-        __kmp_print_structure_team( "    Next in pool:     ",      team->t.t_next_pool ); 
-        __kmp_printf( "\n" ); 
-        list = list->next; 
-    }; // while 
- 
-    // Print out __kmp_thread_pool and __kmp_team_pool. 
-    __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" ); 
-    __kmp_print_structure_thread(   "Thread pool:          ", (kmp_info_t *)__kmp_thread_pool ); 
-    __kmp_print_structure_team(     "Team pool:            ", (kmp_team_t *)__kmp_team_pool ); 
-    __kmp_printf( "\n" ); 
- 
-    // Free team list. 
-    while ( list != NULL ) { 
-        kmp_team_list_item_t * item = list; 
-        list = list->next; 
-        KMP_INTERNAL_FREE( item ); 
-    }; // while 
- 
-} 
- 
-#endif 
- 
- 
-//--------------------------------------------------------------------------- 
-//  Stuff for per-thread fast random number generator 
-//  Table of primes 
- 
-static const unsigned __kmp_primes[] = { 
-  0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 
-  0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b, 
-  0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 
-  0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 
-  0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801, 
-  0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 
-  0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 
-  0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b, 
-  0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 
-  0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 
-  0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7, 
-  0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 
-  0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 
-  0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b, 
-  0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 
-  0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f 
-}; 
- 
-//--------------------------------------------------------------------------- 
-//  __kmp_get_random: Get a random number using a linear congruential method. 
- 
-unsigned short 
-__kmp_get_random( kmp_info_t * thread ) 
-{ 
-  unsigned x = thread->th.th_x; 
-  unsigned short r = x>>16; 
- 
-  thread->th.th_x = x*thread->th.th_a+1; 
- 
-  KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 
-         thread->th.th_info.ds.ds_tid, r) ); 
- 
-  return r; 
-} 
-//-------------------------------------------------------- 
-// __kmp_init_random: Initialize a random number generator 
- 
-void 
-__kmp_init_random( kmp_info_t * thread ) 
-{ 
-  unsigned seed = thread->th.th_info.ds.ds_tid; 
- 
-  thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))]; 
-  thread->th.th_x = (seed+1)*thread->th.th_a+1; 
-  KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) ); 
-} 
- 
- 
-#if KMP_OS_WINDOWS 
-/* reclaim array entries for root threads that are already dead, returns number reclaimed */ 
-static int 
-__kmp_reclaim_dead_roots(void) { 
-    int i, r = 0; 
- 
-    for(i = 0; i < __kmp_threads_capacity; ++i) { 
-        if( KMP_UBER_GTID( i ) && 
-          !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 
-          !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state 
-            r += __kmp_unregister_root_other_thread(i); 
-        } 
-    } 
-    return r; 
-} 
-#endif 
- 
-/* 
-   This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of 
-   free entries generated. 
- 
-   For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are 
-   already dead. 
- 
-   On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate 
-   update to __kmp_threads_capacity.  Array capacity is increased by doubling with clipping to 
-    __kmp_tp_capacity, if threadprivate cache array has been created. 
-   Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 
- 
-   After any dead root reclamation, if the clipping value allows array expansion to result in the generation 
-   of a total of nWish free slots, the function does that expansion.  If not, but the clipping value allows 
-   array expansion to result in the generation of a total of nNeed free slots, the function does that expansion. 
-   Otherwise, nothing is done beyond the possible initial root thread reclamation.  However, if nNeed is zero, 
-   a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create 
-   as many free slots as possible up to nWish. 
- 
-   If any argument is negative, the behavior is undefined. 
-*/ 
-static int 
-__kmp_expand_threads(int nWish, int nNeed) { 
-    int added = 0; 
-    int old_tp_cached; 
-    int __kmp_actual_max_nth; 
- 
-    if(nNeed > nWish) /* normalize the arguments */ 
-        nWish = nNeed; 
-#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 
-/* only for Windows static library */ 
-    /* reclaim array entries for root threads that are already dead */ 
-    added = __kmp_reclaim_dead_roots(); 
- 
-    if(nNeed) { 
-        nNeed -= added; 
-        if(nNeed < 0) 
-            nNeed = 0; 
-    } 
-    if(nWish) { 
-        nWish -= added; 
-        if(nWish < 0) 
-            nWish = 0; 
-    } 
-#endif 
-    if(nWish <= 0) 
-        return added; 
- 
-    while(1) { 
-        int nTarget; 
-        int minimumRequiredCapacity; 
-        int newCapacity; 
-        kmp_info_t **newThreads; 
-        kmp_root_t **newRoot; 
- 
-        // 
-        // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. 
-        // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth 
-        // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may 
-        // become > __kmp_max_nth in one of two ways: 
-        // 
-        // 1) The initialization thread (gtid = 0) exits.  __kmp_threads[0] 
-        //    may not be resused by another thread, so we may need to increase 
-        //    __kmp_threads_capacity to __kmp_max_threads + 1. 
-        // 
-        // 2) New foreign root(s) are encountered.  We always register new 
-        //    foreign roots.  This may cause a smaller # of threads to be 
-        //    allocated at subsequent parallel regions, but the worker threads 
-        //    hang around (and eventually go to sleep) and need slots in the 
-        //    __kmp_threads[] array. 
-        // 
-        // Anyway, that is the reason for moving the check to see if 
-        // __kmp_max_threads was exceeded into __kmp_reseerve_threads() 
-        // instead of having it performed here. -BB 
-        // 
-        old_tp_cached = __kmp_tp_cached; 
-        __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; 
-        KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); 
- 
-        /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */ 
-        nTarget = nWish; 
-        if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 
-            /* can't fulfil nWish, so try nNeed */ 
-            if(nNeed) { 
-                nTarget = nNeed; 
-                if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 
-                    /* possible expansion too small -- give up */ 
-                    break; 
-                } 
-            } else { 
-                /* best-effort */ 
-                nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; 
-                if(!nTarget) { 
-                    /* can expand at all -- give up */ 
-                    break; 
-                } 
-            } 
-        } 
-        minimumRequiredCapacity = __kmp_threads_capacity + nTarget; 
- 
-        newCapacity = __kmp_threads_capacity; 
-        do{ 
-            newCapacity = 
-                newCapacity <= (__kmp_actual_max_nth >> 1) ? 
-                (newCapacity << 1) : 
-                __kmp_actual_max_nth; 
-        } while(newCapacity < minimumRequiredCapacity); 
-        newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE); 
-        newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity ); 
-        KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*)); 
-        KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*)); 
-        memset(newThreads + __kmp_threads_capacity, 0, 
-               (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*)); 
-        memset(newRoot + __kmp_threads_capacity, 0, 
-               (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*)); 
- 
-        if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 
-            /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache 
-               while we were allocating the expanded array, and our new capacity is larger than the threadprivate 
-               cache capacity, so we should deallocate the expanded arrays and try again.  This is the first check 
-               of a double-check pair. 
-            */ 
-            __kmp_free(newThreads); 
-            continue; /* start over and try again */ 
-        } 
-        __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 
-        if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 
-            /* Same check as above, but this time with the lock so we can be sure if we can succeed. */ 
-            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 
-            __kmp_free(newThreads); 
-            continue; /* start over and try again */ 
-        } else { 
-            /* success */ 
-            // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. 
-            // 
-            *(kmp_info_t**volatile*)&__kmp_threads = newThreads; 
-            *(kmp_root_t**volatile*)&__kmp_root = newRoot; 
-            added += newCapacity - __kmp_threads_capacity; 
-            *(volatile int*)&__kmp_threads_capacity = newCapacity; 
-            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 
-            break; /* succeeded, so we can exit the loop */ 
-        } 
-    } 
-    return added; 
-} 
- 
-/* register the current thread as a root thread and obtain our gtid */ 
-/* we must have the __kmp_initz_lock held at this point */ 
-/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */ 
-int 
-__kmp_register_root( int initial_thread ) 
-{ 
-    kmp_info_t *root_thread; 
-    kmp_root_t *root; 
-    int         gtid; 
-    int         capacity; 
-    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
-    KA_TRACE( 20, ("__kmp_register_root: entered\n")); 
-    KMP_MB(); 
- 
- 
-    /* 
-        2007-03-02: 
- 
-        If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one, 
-        "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may 
-        return false (that means there is at least one empty slot in __kmp_threads array), but it 
-        is possible the only free slot is #0, which is reserved for initial thread and so cannot be 
-        used for this one. Following code workarounds this bug. 
- 
-        However, right solution seems to be not reserving slot #0 for initial thread because: 
-            (1) there is no magic in slot #0, 
-            (2) we cannot detect initial thread reliably (the first thread which does serial 
-                initialization may be not a real initial thread). 
-    */ 
-    capacity = __kmp_threads_capacity; 
-    if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) { 
-        -- capacity; 
-    }; // if 
- 
-    /* see if there are too many threads */ 
-    if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) { 
-        if ( __kmp_tp_cached ) { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantRegisterNewThread ), 
-                KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), 
-                KMP_HNT( PossibleSystemLimitOnThreads ), 
-                __kmp_msg_null 
-            ); 
-        } 
-        else { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantRegisterNewThread ), 
-                KMP_HNT( SystemLimitOnThreads ), 
-                __kmp_msg_null 
-            ); 
-        } 
-    }; // if 
- 
-    /* find an available thread slot */ 
-    /* Don't reassign the zero slot since we need that to only be used by initial 
-       thread */ 
-    for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ ) 
-        ; 
-    KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid )); 
-    KMP_ASSERT( gtid < __kmp_threads_capacity ); 
- 
-    /* update global accounting */ 
-    __kmp_all_nth ++; 
-    TCW_4(__kmp_nth, __kmp_nth + 1); 
- 
-    // 
-    // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) 
-    // for low numbers of procs, and method #2 (keyed API call) for higher 
-    // numbers of procs. 
-    // 
-    if ( __kmp_adjust_gtid_mode ) { 
-        if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { 
-            if ( TCR_4(__kmp_gtid_mode) != 2) { 
-                TCW_4(__kmp_gtid_mode, 2); 
-            } 
-        } 
-        else { 
-            if (TCR_4(__kmp_gtid_mode) != 1 ) { 
-                TCW_4(__kmp_gtid_mode, 1); 
-            } 
-        } 
-    } 
- 
-#ifdef KMP_ADJUST_BLOCKTIME 
-    /* Adjust blocktime to zero if necessary            */ 
-    /* Middle initialization might not have occurred yet */ 
-    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 
-        if ( __kmp_nth > __kmp_avail_proc ) { 
-            __kmp_zero_bt = TRUE; 
-        } 
-    } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-    /* setup this new hierarchy */ 
-    if( ! ( root = __kmp_root[gtid] )) { 
-        root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) ); 
-        KMP_DEBUG_ASSERT( ! root->r.r_root_team ); 
-    } 
- 
-    __kmp_initialize_root( root ); 
- 
-    /* setup new root thread structure */ 
-    if( root->r.r_uber_thread ) { 
-        root_thread = root->r.r_uber_thread; 
-    } else { 
-        root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); 
-        if ( __kmp_storage_map ) { 
-            __kmp_print_thread_storage_map( root_thread, gtid ); 
-        } 
-        root_thread->th.th_info .ds.ds_gtid = gtid; 
-        root_thread->th.th_root =  root; 
-        if( __kmp_env_consistency_check ) { 
-            root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid ); 
-        } 
-        #if USE_FAST_MEMORY 
-            __kmp_initialize_fast_memory( root_thread ); 
-        #endif /* USE_FAST_MEMORY */ 
- 
-        #if KMP_USE_BGET 
-            KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL ); 
-            __kmp_initialize_bget( root_thread ); 
-        #endif 
-        __kmp_init_random( root_thread );  // Initialize random number generator 
-    } 
- 
-    /* setup the serial team held in reserve by the root thread */ 
-    if( ! root_thread->th.th_serial_team ) { 
-        kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 
-        KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) ); 
- 
-        root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1, 
-#if OMPT_SUPPORT 
-          0, // root parallel id 
-#endif 
-#if OMP_40_ENABLED 
-          proc_bind_default, 
-#endif 
-          &r_icvs, 
-          0 USE_NESTED_HOT_ARG(NULL) ); 
-    } 
-    KMP_ASSERT( root_thread->th.th_serial_team ); 
-    KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n", 
-      root_thread->th.th_serial_team ) ); 
- 
-    /* drop root_thread into place */ 
-    TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 
- 
-    root->r.r_root_team->t.t_threads[0] = root_thread; 
-    root->r.r_hot_team ->t.t_threads[0] = root_thread; 
-    root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 
-    root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). 
-    root->r.r_uber_thread = root_thread; 
- 
-    /* initialize the thread, get it ready to go */ 
-    __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid ); 
- 
-    /* prepare the master thread for get_gtid() */ 
-    __kmp_gtid_set_specific( gtid ); 
- 
-    __kmp_itt_thread_name( gtid ); 
- 
-    #ifdef KMP_TDATA_GTID 
-        __kmp_gtid = gtid; 
-    #endif 
-    __kmp_create_worker( gtid, root_thread, __kmp_stksize ); 
-    KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid ); 
-    TCW_4(__kmp_init_gtid, TRUE); 
- 
-    KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n", 
-                    gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ), 
-                    root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 
-                    KMP_INIT_BARRIER_STATE ) ); 
-    { // Initialize barrier data. 
-        int b; 
-        for ( b = 0; b < bs_last_barrier; ++ b ) { 
-            root_thread->th.th_bar[ b ].bb.b_arrived        = KMP_INIT_BARRIER_STATE; 
-#if USE_DEBUGGER 
-            root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0; 
-#endif 
-        }; // for 
-    } 
-    KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE ); 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    if ( TCR_4(__kmp_init_middle) ) { 
-        __kmp_affinity_set_init_mask( gtid, TRUE ); 
-    } 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-    __kmp_root_counter ++; 
- 
-    KMP_MB(); 
-    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-    return gtid; 
-} 
- 
-#if KMP_NESTED_HOT_TEAMS 
-static int 
-__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level ) 
-{ 
-    int i, n, nth; 
-    kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 
-    if( !hot_teams || !hot_teams[level].hot_team ) { 
-        return 0; 
-    } 
-    KMP_DEBUG_ASSERT( level < max_level ); 
-    kmp_team_t *team = hot_teams[level].hot_team; 
-    nth = hot_teams[level].hot_team_nth; 
-    n = nth - 1;                   // master is not freed 
-    if( level < max_level - 1 ) { 
-        for( i = 0; i < nth; ++i ) { 
-            kmp_info_t *th = team->t.t_threads[i]; 
-            n += __kmp_free_hot_teams( root, th, level + 1, max_level ); 
-            if( i > 0 && th->th.th_hot_teams ) { 
-                __kmp_free( th->th.th_hot_teams ); 
-                th->th.th_hot_teams = NULL; 
-            } 
-        } 
-    } 
-    __kmp_free_team( root, team, NULL ); 
-    return n; 
-} 
-#endif 
- 
-/* Resets a root thread and clear its root and hot teams. 
-   Returns the number of __kmp_threads entries directly and indirectly freed. 
-*/ 
-static int 
-__kmp_reset_root(int gtid, kmp_root_t *root) 
-{ 
-    kmp_team_t * root_team = root->r.r_root_team; 
-    kmp_team_t * hot_team  = root->r.r_hot_team; 
-    int          n         = hot_team->t.t_nproc; 
-    int i; 
- 
-    KMP_DEBUG_ASSERT( ! root->r.r_active ); 
- 
-    root->r.r_root_team = NULL; 
-    root->r.r_hot_team  = NULL; 
-        // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call 
-        // to __kmp_free_team(). 
-    __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) ); 
-#if KMP_NESTED_HOT_TEAMS 
-    if( __kmp_hot_teams_max_level > 1 ) {  // need to free nested hot teams and their threads if any 
-        for( i = 0; i < hot_team->t.t_nproc; ++i ) { 
-            kmp_info_t *th = hot_team->t.t_threads[i]; 
-            n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level ); 
-            if( th->th.th_hot_teams ) { 
-                __kmp_free( th->th.th_hot_teams ); 
-                th->th.th_hot_teams = NULL; 
-            } 
-        } 
-    } 
-#endif 
-    __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) ); 
- 
-    // 
-    // Before we can reap the thread, we need to make certain that all 
-    // other threads in the teams that had this root as ancestor have stopped trying to steal tasks. 
-    // 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        __kmp_wait_to_unref_task_teams(); 
-    } 
- 
-    #if KMP_OS_WINDOWS 
-        /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 
-        KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", 
-            (LPVOID)&(root->r.r_uber_thread->th), 
-            root->r.r_uber_thread->th.th_info.ds.ds_thread ) ); 
-        __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread ); 
-    #endif /* KMP_OS_WINDOWS */ 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 
-        int gtid = __kmp_get_gtid(); 
-        __ompt_thread_end(ompt_thread_initial, gtid); 
-    } 
-#endif 
- 
-    TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 
-    __kmp_reap_thread( root->r.r_uber_thread, 1 ); 
- 
-        // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing. 
-    root->r.r_uber_thread = NULL; 
-    /* mark root as no longer in use */ 
-    root->r.r_begin = FALSE; 
- 
-    return n; 
-} 
- 
-void 
-__kmp_unregister_root_current_thread( int gtid ) 
-{ 
-    KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid )); 
-    /* this lock should be ok, since unregister_root_current_thread is never called during 
-     * and abort, only during a normal close.  furthermore, if you have the 
-     * forkjoin lock, you should never try to get the initz lock */ 
- 
-    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
-    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 
-        KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid )); 
-        __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-        return; 
-    } 
-    kmp_root_t *root = __kmp_root[gtid]; 
- 
-    KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); 
-    KMP_ASSERT( KMP_UBER_GTID( gtid )); 
-    KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); 
-    KMP_ASSERT( root->r.r_active == FALSE ); 
- 
- 
-    KMP_MB(); 
- 
-#if OMP_41_ENABLED 
-   kmp_info_t * thread = __kmp_threads[gtid]; 
-   kmp_team_t * team = thread->th.th_team; 
-   kmp_task_team_t *   task_team = thread->th.th_task_team; 
- 
-   // we need to wait for the proxy tasks before finishing the thread 
-   if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) { 
-#if OMPT_SUPPORT 
-        // the runtime is shutting down so we won't report any events 
-        thread->th.ompt_thread_info.state = ompt_state_undefined; 
-#endif 
-        __kmp_task_team_wait(thread, team, NULL ); 
-   } 
-#endif 
- 
-    __kmp_reset_root(gtid, root); 
- 
-    /* free up this thread slot */ 
-    __kmp_gtid_set_specific( KMP_GTID_DNE ); 
-#ifdef KMP_TDATA_GTID 
-    __kmp_gtid = KMP_GTID_DNE; 
-#endif 
- 
-    KMP_MB(); 
-    KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid )); 
- 
-    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-} 
- 
-#if KMP_OS_WINDOWS 
-/* __kmp_forkjoin_lock must be already held 
-   Unregisters a root thread that is not the current thread.  Returns the number of 
-   __kmp_threads entries freed as a result. 
- */ 
-static int 
-__kmp_unregister_root_other_thread( int gtid ) 
-{ 
-    kmp_root_t *root = __kmp_root[gtid]; 
-    int r; 
- 
-    KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid )); 
-    KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); 
-    KMP_ASSERT( KMP_UBER_GTID( gtid )); 
-    KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); 
-    KMP_ASSERT( root->r.r_active == FALSE ); 
- 
-    r = __kmp_reset_root(gtid, root); 
-    KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid )); 
-    return r; 
-} 
-#endif 
- 
-#if KMP_DEBUG 
-void __kmp_task_info() { 
- 
-    kmp_int32 gtid       = __kmp_entry_gtid(); 
-    kmp_int32 tid        = __kmp_tid_from_gtid( gtid ); 
-    kmp_info_t *this_thr = __kmp_threads[ gtid ]; 
-    kmp_team_t *steam    = this_thr->th.th_serial_team; 
-    kmp_team_t *team     = this_thr->th.th_team; 
- 
-    __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n", 
-        gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent ); 
-} 
-#endif // KMP_DEBUG 
- 
-/* TODO optimize with one big memclr, take out what isn't needed, 
- * split responsibility to workers as much as possible, and delay 
- * initialization of features as much as possible  */ 
-static void 
-__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) 
-{ 
-    /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker 
-     * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 
-    kmp_info_t *master = team->t.t_threads[0]; 
-    KMP_DEBUG_ASSERT( this_thr != NULL ); 
-    KMP_DEBUG_ASSERT( this_thr->th.th_serial_team ); 
-    KMP_DEBUG_ASSERT( team ); 
-    KMP_DEBUG_ASSERT( team->t.t_threads  ); 
-    KMP_DEBUG_ASSERT( team->t.t_dispatch ); 
-    KMP_DEBUG_ASSERT( master ); 
-    KMP_DEBUG_ASSERT( master->th.th_root ); 
- 
-    KMP_MB(); 
- 
-    TCW_SYNC_PTR(this_thr->th.th_team, team); 
- 
-    this_thr->th.th_info.ds.ds_tid  = tid; 
-    this_thr->th.th_set_nproc       = 0; 
-#if OMP_40_ENABLED 
-    this_thr->th.th_set_proc_bind   = proc_bind_default; 
-# if KMP_AFFINITY_SUPPORTED 
-    this_thr->th.th_new_place       = this_thr->th.th_current_place; 
-# endif 
-#endif 
-    this_thr->th.th_root            = master->th.th_root; 
- 
-    /* setup the thread's cache of the team structure */ 
-    this_thr->th.th_team_nproc      = team->t.t_nproc; 
-    this_thr->th.th_team_master     = master; 
-    this_thr->th.th_team_serialized = team->t.t_serialized; 
-    TCW_PTR(this_thr->th.th_sleep_loc, NULL); 
- 
-    KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); 
- 
-    KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 
-                    tid, gtid, this_thr, this_thr->th.th_current_task ) ); 
- 
-    __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE ); 
- 
-    KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 
-                    tid, gtid, this_thr, this_thr->th.th_current_task ) ); 
-    // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()? 
- 
-    /* TODO no worksharing in speculative threads */ 
-    this_thr->th.th_dispatch      = &team->t.t_dispatch[ tid ]; 
- 
-    this_thr->th.th_local.this_construct = 0; 
- 
-#ifdef BUILD_TV 
-    this_thr->th.th_local.tv_data = 0; 
-#endif 
- 
-    if ( ! this_thr->th.th_pri_common ) { 
-        this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) ); 
-        if ( __kmp_storage_map ) { 
-            __kmp_print_storage_map_gtid( 
-                gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 
-                sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid 
-            ); 
-        }; // if 
-        this_thr->th.th_pri_head = NULL; 
-    }; // if 
- 
-    /* Initialize dynamic dispatch */ 
-    { 
-        volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 
-        /* 
-         * Use team max_nproc since this will never change for the team. 
-         */ 
-        size_t disp_size = sizeof( dispatch_private_info_t ) * 
-            ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ); 
-        KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) ); 
-        KMP_ASSERT( dispatch ); 
-        KMP_DEBUG_ASSERT( team->t.t_dispatch ); 
-        KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] ); 
- 
-        dispatch->th_disp_index = 0; 
- 
-        if( ! dispatch->th_disp_buffer )  { 
-            dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size ); 
- 
-            if ( __kmp_storage_map ) { 
-                __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ], 
-                                         &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ], 
-                                         disp_size, "th_%d.th_dispatch.th_disp_buffer " 
-                                         "(team_%d.t_dispatch[%d].th_disp_buffer)", 
-                                         gtid, team->t.t_id, gtid ); 
-            } 
-        } else { 
-            memset( & dispatch->th_disp_buffer[0], '\0', disp_size ); 
-        } 
- 
-        dispatch->th_dispatch_pr_current = 0; 
-        dispatch->th_dispatch_sh_current = 0; 
- 
-        dispatch->th_deo_fcn = 0;             /* ORDERED     */ 
-        dispatch->th_dxo_fcn = 0;             /* END ORDERED */ 
-    } 
- 
-    this_thr->th.th_next_pool = NULL; 
- 
-    if (!this_thr->th.th_task_state_memo_stack) { 
-        size_t i; 
-        this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); 
-        this_thr->th.th_task_state_top = 0; 
-        this_thr->th.th_task_state_stack_sz = 4; 
-        for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack 
-            this_thr->th.th_task_state_memo_stack[i] = 0; 
-    } 
- 
-    KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 
-    KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 
- 
-    KMP_MB(); 
-} 
- 
- 
-/* allocate a new thread for the requesting team.  this is only called from within a 
- * forkjoin critical section.  we will first try to get an available thread from the 
- * thread pool.  if none is available, we will fork a new one assuming we are able 
- * to create a new one.  this should be assured, as the caller should check on this 
- * first. 
- */ 
-kmp_info_t * 
-__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) 
-{ 
-    kmp_team_t  *serial_team; 
-    kmp_info_t  *new_thr; 
-    int          new_gtid; 
- 
-    KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() )); 
-    KMP_DEBUG_ASSERT( root && team ); 
-#if !KMP_NESTED_HOT_TEAMS 
-    KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() )); 
-#endif 
-    KMP_MB(); 
- 
-    /* first, try to get one from the thread pool */ 
-    if ( __kmp_thread_pool ) { 
- 
-        new_thr = (kmp_info_t*)__kmp_thread_pool; 
-        __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool; 
-        if ( new_thr == __kmp_thread_pool_insert_pt ) { 
-            __kmp_thread_pool_insert_pt = NULL; 
-        } 
-        TCW_4(new_thr->th.th_in_pool, FALSE); 
-        // 
-        // Don't touch th_active_in_pool or th_active. 
-        // The worker thread adjusts those flags as it sleeps/awakens. 
-        // 
- 
-        __kmp_thread_pool_nth--; 
- 
-        KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 
-                    __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid )); 
-        KMP_ASSERT(       ! new_thr->th.th_team ); 
-        KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity ); 
-        KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 ); 
- 
-        /* setup the thread structure */ 
-        __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid ); 
-        KMP_DEBUG_ASSERT( new_thr->th.th_serial_team ); 
- 
-        TCW_4(__kmp_nth, __kmp_nth + 1); 
- 
-        new_thr->th.th_task_state = 0; 
-        new_thr->th.th_task_state_top = 0; 
-        new_thr->th.th_task_state_stack_sz = 4; 
- 
-#ifdef KMP_ADJUST_BLOCKTIME 
-        /* Adjust blocktime back to zero if necessar      y */ 
-        /* Middle initialization might not have occurred yet */ 
-        if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 
-            if ( __kmp_nth > __kmp_avail_proc ) { 
-                __kmp_zero_bt = TRUE; 
-            } 
-        } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-#if KMP_DEBUG 
-        // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG. 
-        int b; 
-        kmp_balign_t * balign = new_thr->th.th_bar; 
-        for( b = 0; b < bs_last_barrier; ++ b ) 
-            KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 
-#endif 
- 
-        KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 
-                    __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid )); 
- 
-        KMP_MB(); 
-        return new_thr; 
-    } 
- 
- 
-    /* no, well fork a new one */ 
-    KMP_ASSERT( __kmp_nth    == __kmp_all_nth ); 
-    KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity ); 
- 
-    // 
-    // If this is the first worker thread the RTL is creating, then also 
-    // launch the monitor thread.  We try to do this as early as possible. 
-    // 
-    if ( ! TCR_4( __kmp_init_monitor ) ) { 
-        __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 
-        if ( ! TCR_4( __kmp_init_monitor ) ) { 
-            KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) ); 
-            TCW_4( __kmp_init_monitor, 1 ); 
-            __kmp_create_monitor( & __kmp_monitor ); 
-            KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); 
-            #if KMP_OS_WINDOWS 
-                // AC: wait until monitor has started. This is a fix for CQ232808. 
-                //     The reason is that if the library is loaded/unloaded in a loop with small (parallel) 
-                //     work in between, then there is high probability that monitor thread started after 
-                //     the library shutdown. At shutdown it is too late to cope with the problem, because 
-                //     when the master is in DllMain (process detach) the monitor has no chances to start 
-                //     (it is blocked), and master has no means to inform the monitor that the library has gone, 
-                //     because all the memory which the monitor can access is going to be released/reset. 
-                while ( TCR_4(__kmp_init_monitor) < 2 ) { 
-                    KMP_YIELD( TRUE ); 
-                } 
-                KF_TRACE( 10, ( "after monitor thread has started\n" ) ); 
-            #endif 
-        } 
-        __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 
-    } 
- 
-    KMP_MB(); 
-    for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) { 
-        KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity ); 
-    } 
- 
-    /* allocate space for it. */ 
-    new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); 
- 
-    TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 
- 
-    if ( __kmp_storage_map ) { 
-        __kmp_print_thread_storage_map( new_thr, new_gtid ); 
-    } 
- 
-    /* add the reserve serialized team, initialized from the team's master thread */ 
-    { 
-    kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team ); 
-    KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) ); 
- 
-    new_thr->th.th_serial_team = serial_team = 
-        (kmp_team_t*) __kmp_allocate_team( root, 1, 1, 
-#if OMPT_SUPPORT 
-                                           0, // root parallel id 
-#endif 
-#if OMP_40_ENABLED 
-                                           proc_bind_default, 
-#endif 
-                                           &r_icvs, 
-                                           0 USE_NESTED_HOT_ARG(NULL) ); 
-    } 
-    KMP_ASSERT ( serial_team ); 
-    serial_team->t.t_serialized = 0;   // AC: the team created in reserve, not for execution (it is unused for now). 
-    serial_team->t.t_threads[0] = new_thr; 
-    KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 
-      new_thr ) ); 
- 
-    /* setup the thread structures */ 
-    __kmp_initialize_info( new_thr, team, new_tid, new_gtid ); 
- 
-    #if USE_FAST_MEMORY 
-        __kmp_initialize_fast_memory( new_thr ); 
-    #endif /* USE_FAST_MEMORY */ 
- 
-    #if KMP_USE_BGET 
-        KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL ); 
-        __kmp_initialize_bget( new_thr ); 
-    #endif 
- 
-    __kmp_init_random( new_thr );  // Initialize random number generator 
- 
-    /* Initialize these only once when thread is grabbed for a team allocation */ 
-    KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 
-                    __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 
- 
-    int b; 
-    kmp_balign_t * balign = new_thr->th.th_bar; 
-    for(b=0; b<bs_last_barrier; ++b) { 
-        balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 
-        balign[b].bb.team = NULL; 
-        balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 
-        balign[b].bb.use_oncore_barrier = 0; 
-    } 
- 
-    new_thr->th.th_spin_here = FALSE; 
-    new_thr->th.th_next_waiting = 0; 
- 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
-    new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 
-    new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 
-    new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 
-    new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 
-#endif 
- 
-    TCW_4(new_thr->th.th_in_pool, FALSE); 
-    new_thr->th.th_active_in_pool = FALSE; 
-    TCW_4(new_thr->th.th_active, TRUE); 
- 
-    /* adjust the global counters */ 
-    __kmp_all_nth ++; 
-    __kmp_nth ++; 
- 
-    // 
-    // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) 
-    // for low numbers of procs, and method #2 (keyed API call) for higher 
-    // numbers of procs. 
-    // 
-    if ( __kmp_adjust_gtid_mode ) { 
-        if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { 
-            if ( TCR_4(__kmp_gtid_mode) != 2) { 
-                TCW_4(__kmp_gtid_mode, 2); 
-            } 
-        } 
-        else { 
-            if (TCR_4(__kmp_gtid_mode) != 1 ) { 
-                TCW_4(__kmp_gtid_mode, 1); 
-            } 
-        } 
-    } 
- 
-#ifdef KMP_ADJUST_BLOCKTIME 
-    /* Adjust blocktime back to zero if necessary       */ 
-    /* Middle initialization might not have occurred yet */ 
-    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 
-        if ( __kmp_nth > __kmp_avail_proc ) { 
-            __kmp_zero_bt = TRUE; 
-        } 
-    } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-    /* actually fork it and create the new worker thread */ 
-    KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr )); 
-    __kmp_create_worker( new_gtid, new_thr, __kmp_stksize ); 
-    KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr )); 
- 
- 
-    KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid )); 
-    KMP_MB(); 
-    return new_thr; 
-} 
- 
-/* 
- * reinitialize team for reuse. 
- * 
- * The hot team code calls this case at every fork barrier, so EPCC barrier 
- * test are extremely sensitive to changes in it, esp. writes to the team 
- * struct, which cause a cache invalidation in all threads. 
- * 
- * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! 
- */ 
-static void 
-__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) { 
-    KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 
-                    team->t.t_threads[0], team ) ); 
-    KMP_DEBUG_ASSERT( team && new_icvs); 
-    KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); 
-    team->t.t_ident = loc; 
- 
-    team->t.t_id = KMP_GEN_TEAM_ID(); 
- 
-    // Copy ICVs to the master thread's implicit taskdata 
-    __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); 
-    copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 
- 
-    KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 
-                    team->t.t_threads[0], team ) ); 
-} 
- 
- 
-/* initialize the team data structure 
- * this assumes the t_threads and t_max_nproc are already set 
- * also, we don't touch the arguments */ 
-static void 
-__kmp_initialize_team( 
-    kmp_team_t * team, 
-    int          new_nproc, 
-    kmp_internal_control_t * new_icvs, 
-    ident_t *                loc 
-) { 
-    KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); 
- 
-    /* verify */ 
-    KMP_DEBUG_ASSERT( team ); 
-    KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); 
-    KMP_DEBUG_ASSERT( team->t.t_threads ); 
-    KMP_MB(); 
- 
-    team->t.t_master_tid  = 0;    /* not needed */ 
-    /* team->t.t_master_bar;        not needed */ 
-    team->t.t_serialized  = new_nproc > 1 ? 0 : 1; 
-    team->t.t_nproc       = new_nproc; 
- 
-    /* team->t.t_parent     = NULL; TODO not needed & would mess up hot team */ 
-    team->t.t_next_pool   = NULL; 
-    /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */ 
- 
-    TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 
-    team->t.t_invoke      = NULL; /* not needed */ 
- 
-    // TODO???: team->t.t_max_active_levels       = new_max_active_levels; 
-    team->t.t_sched       = new_icvs->sched; 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    team->t.t_fp_control_saved = FALSE; /* not needed */ 
-    team->t.t_x87_fpu_control_word = 0; /* not needed */ 
-    team->t.t_mxcsr = 0;                /* not needed */ 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-    team->t.t_construct   = 0; 
-    __kmp_init_lock( & team->t.t_single_lock ); 
- 
-    team->t.t_ordered .dt.t_value = 0; 
-    team->t.t_master_active = FALSE; 
- 
-    memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t )); 
- 
-#ifdef KMP_DEBUG 
-    team->t.t_copypriv_data = NULL;  /* not necessary, but nice for debugging */ 
-#endif 
-    team->t.t_copyin_counter = 0;    /* for barrier-free copyin implementation */ 
- 
-    team->t.t_control_stack_top = NULL; 
- 
-    __kmp_reinitialize_team( team, new_icvs, loc ); 
- 
-    KMP_MB(); 
-    KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); 
-} 
- 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 
-/* Sets full mask for thread and returns old mask, no changes to structures. */ 
-static void 
-__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask ) 
-{ 
-    if ( KMP_AFFINITY_CAPABLE() ) { 
-        int status; 
-        if ( old_mask != NULL ) { 
-            status = __kmp_get_system_affinity( old_mask, TRUE ); 
-            int error = errno; 
-            if ( status != 0 ) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( ChangeThreadAffMaskError ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            } 
-        } 
-        __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE ); 
-    } 
-} 
-#endif 
- 
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 
- 
-// 
-// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 
-// It calculats the worker + master thread's partition based upon the parent 
-// thread's partition, and binds each worker to a thread in their partition. 
-// The master thread's partition should already include its current binding. 
-// 
-static void 
-__kmp_partition_places( kmp_team_t *team ) 
-{ 
-    // 
-    // Copy the master thread's place partion to the team struct 
-    // 
-    kmp_info_t *master_th = team->t.t_threads[0]; 
-    KMP_DEBUG_ASSERT( master_th != NULL ); 
-    kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 
-    int first_place = master_th->th.th_first_place; 
-    int last_place = master_th->th.th_last_place; 
-    int masters_place = master_th->th.th_current_place; 
-    team->t.t_first_place = first_place; 
-    team->t.t_last_place = last_place; 
- 
-    KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n", 
-       proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id, 
-       masters_place, first_place, last_place ) ); 
- 
-    switch ( proc_bind ) { 
- 
-        case proc_bind_default: 
-        // 
-        // serial teams might have the proc_bind policy set to 
-        // proc_bind_default.  It doesn't matter, as we don't 
-        // rebind the master thread for any proc_bind policy. 
-        // 
-        KMP_DEBUG_ASSERT( team->t.t_nproc == 1 ); 
-        break; 
- 
-        case proc_bind_master: 
-        { 
-            int f; 
-            int n_th = team->t.t_nproc; 
-            for ( f = 1; f < n_th; f++ ) { 
-                kmp_info_t *th = team->t.t_threads[f]; 
-                KMP_DEBUG_ASSERT( th != NULL ); 
-                th->th.th_first_place = first_place; 
-                th->th.th_last_place = last_place; 
-                th->th.th_new_place = masters_place; 
- 
-                KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n", 
-                  __kmp_gtid_from_thread( team->t.t_threads[f] ), 
-                  team->t.t_id, f, masters_place, first_place, last_place ) ); 
-            } 
-        } 
-        break; 
- 
-        case proc_bind_close: 
-        { 
-            int f; 
-            int n_th = team->t.t_nproc; 
-            int n_places; 
-            if ( first_place <= last_place ) { 
-                n_places = last_place - first_place + 1; 
-            } 
-            else { 
-                n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 
-            } 
-            if ( n_th <= n_places ) { 
-                int place = masters_place; 
-                for ( f = 1; f < n_th; f++ ) { 
-                    kmp_info_t *th = team->t.t_threads[f]; 
-                    KMP_DEBUG_ASSERT( th != NULL ); 
- 
-                    if ( place == last_place ) { 
-                        place = first_place; 
-                    } 
-                    else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                        place = 0; 
-                    } 
-                    else { 
-                        place++; 
-                    } 
-                    th->th.th_first_place = first_place; 
-                    th->th.th_last_place = last_place; 
-                    th->th.th_new_place = place; 
- 
-                    KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", 
-                       __kmp_gtid_from_thread( team->t.t_threads[f] ), 
-                       team->t.t_id, f, place, first_place, last_place ) ); 
-                } 
-            } 
-            else { 
-                int S, rem, gap, s_count; 
-                S = n_th / n_places; 
-                s_count = 0; 
-                rem = n_th - ( S * n_places ); 
-                gap = rem > 0 ? n_places/rem : n_places; 
-                int place = masters_place; 
-                int gap_ct = gap; 
-                for ( f = 0; f < n_th; f++ ) { 
-                    kmp_info_t *th = team->t.t_threads[f]; 
-                    KMP_DEBUG_ASSERT( th != NULL ); 
- 
-                    th->th.th_first_place = first_place; 
-                    th->th.th_last_place = last_place; 
-                    th->th.th_new_place = place; 
-                    s_count++; 
- 
-                    if ( (s_count == S) && rem && (gap_ct == gap) ) { 
-                        // do nothing, add an extra thread to place on next iteration 
-                    } 
-                    else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { 
-                        // we added an extra thread to this place; move to next place 
-                        if ( place == last_place ) { 
-                            place = first_place; 
-                        } 
-                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                            place = 0; 
-                        } 
-                        else { 
-                            place++; 
-                        } 
-                        s_count = 0; 
-                        gap_ct = 1; 
-                        rem--; 
-                    } 
-                    else if (s_count == S) { // place full; don't add extra 
-                        if ( place == last_place ) { 
-                            place = first_place; 
-                        } 
-                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                            place = 0; 
-                        } 
-                        else { 
-                            place++; 
-                        } 
-                        gap_ct++; 
-                        s_count = 0; 
-                    } 
- 
-                    KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", 
-                      __kmp_gtid_from_thread( team->t.t_threads[f] ), 
-                      team->t.t_id, f, th->th.th_new_place, first_place, 
-                      last_place ) ); 
-                } 
-                KMP_DEBUG_ASSERT( place == masters_place ); 
-            } 
-        } 
-        break; 
- 
-        case proc_bind_spread: 
-        { 
-            int f; 
-            int n_th = team->t.t_nproc; 
-            int n_places; 
-            if ( first_place <= last_place ) { 
-                n_places = last_place - first_place + 1; 
-            } 
-            else { 
-                n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 
-            } 
-            if ( n_th <= n_places ) { 
-                int place = masters_place; 
-                int S = n_places/n_th; 
-                int s_count, rem, gap, gap_ct; 
-                rem = n_places - n_th*S; 
-                gap = rem ? n_th/rem : 1; 
-                gap_ct = gap; 
-                for ( f = 0; f < n_th; f++ ) { 
-                    kmp_info_t *th = team->t.t_threads[f]; 
-                    KMP_DEBUG_ASSERT( th != NULL ); 
- 
-                    th->th.th_first_place = place; 
-                    th->th.th_new_place = place; 
-                    s_count = 1; 
-                    while (s_count < S) { 
-                        if ( place == last_place ) { 
-                            place = first_place; 
-                        } 
-                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                            place = 0; 
-                        } 
-                        else { 
-                            place++; 
-                        } 
-                        s_count++; 
-                    } 
-                    if (rem && (gap_ct == gap)) { 
-                        if ( place == last_place ) { 
-                            place = first_place; 
-                        } 
-                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                            place = 0; 
-                        } 
-                        else { 
-                            place++; 
-                        } 
-                        rem--; 
-                        gap_ct = 0; 
-                    } 
-                    th->th.th_last_place = place; 
-                    gap_ct++; 
- 
-                    if ( place == last_place ) { 
-                        place = first_place; 
-                    } 
-                    else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                        place = 0; 
-                    } 
-                    else { 
-                        place++; 
-                    } 
- 
-                    KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", 
-                      __kmp_gtid_from_thread( team->t.t_threads[f] ), 
-                      team->t.t_id, f, th->th.th_new_place, 
-                      th->th.th_first_place, th->th.th_last_place ) ); 
-                } 
-                KMP_DEBUG_ASSERT( place == masters_place ); 
-            } 
-            else { 
-                int S, rem, gap, s_count; 
-                S = n_th / n_places; 
-                s_count = 0; 
-                rem = n_th - ( S * n_places ); 
-                gap = rem > 0 ? n_places/rem : n_places; 
-                int place = masters_place; 
-                int gap_ct = gap; 
-                for ( f = 0; f < n_th; f++ ) { 
-                    kmp_info_t *th = team->t.t_threads[f]; 
-                    KMP_DEBUG_ASSERT( th != NULL ); 
- 
-                    th->th.th_first_place = place; 
-                    th->th.th_last_place = place; 
-                    th->th.th_new_place = place; 
-                    s_count++; 
- 
-                    if ( (s_count == S) && rem && (gap_ct == gap) ) { 
-                        // do nothing, add an extra thread to place on next iteration 
-                    } 
-                    else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { 
-                        // we added an extra thread to this place; move on to next place 
-                        if ( place == last_place ) { 
-                            place = first_place; 
-                        } 
-                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                            place = 0; 
-                        } 
-                        else { 
-                            place++; 
-                        } 
-                        s_count = 0; 
-                        gap_ct = 1; 
-                        rem--; 
-                    } 
-                    else if (s_count == S) { // place is full; don't add extra thread 
-                        if ( place == last_place ) { 
-                            place = first_place; 
-                        } 
-                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 
-                            place = 0; 
-                        } 
-                        else { 
-                            place++; 
-                        } 
-                        gap_ct++; 
-                        s_count = 0; 
-                    } 
- 
-                    KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", 
-                       __kmp_gtid_from_thread( team->t.t_threads[f] ), 
-                       team->t.t_id, f, th->th.th_new_place, 
-                       th->th.th_first_place, th->th.th_last_place) ); 
-                } 
-                KMP_DEBUG_ASSERT( place == masters_place ); 
-            } 
-        } 
-        break; 
- 
-        default: 
-        break; 
-    } 
- 
-    KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) ); 
-} 
- 
-#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ 
- 
-/* allocate a new team data structure to use.  take one off of the free pool if available */ 
-kmp_team_t * 
-__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 
-#if OMPT_SUPPORT 
-    ompt_parallel_id_t ompt_parallel_id, 
-#endif 
-#if OMP_40_ENABLED 
-    kmp_proc_bind_t new_proc_bind, 
-#endif 
-    kmp_internal_control_t *new_icvs, 
-    int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team); 
-    int f; 
-    kmp_team_t *team; 
-    int use_hot_team = ! root->r.r_active; 
-    int level = 0; 
- 
-    KA_TRACE( 20, ("__kmp_allocate_team: called\n")); 
-    KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); 
-    KMP_DEBUG_ASSERT( max_nproc >= new_nproc ); 
-    KMP_MB(); 
- 
-#if KMP_NESTED_HOT_TEAMS 
-    kmp_hot_team_ptr_t *hot_teams; 
-    if( master ) { 
-        team = master->th.th_team; 
-        level = team->t.t_active_level; 
-        if( master->th.th_teams_microtask ) {                         // in teams construct? 
-            if( master->th.th_teams_size.nteams > 1 && (             // #teams > 1 
-                team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams 
-                master->th.th_teams_level < team->t.t_level ) ) {    // or nested parallel inside the teams 
-                ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise 
-            } 
-        } 
-        hot_teams = master->th.th_hot_teams; 
-        if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team ) 
-        {   // hot team has already been allocated for given level 
-            use_hot_team = 1; 
-        } else { 
-            use_hot_team = 0; 
-        } 
-    } 
-#endif 
-    // Optimization to use a "hot" team 
-    if( use_hot_team && new_nproc > 1 ) { 
-        KMP_DEBUG_ASSERT( new_nproc == max_nproc ); 
-#if KMP_NESTED_HOT_TEAMS 
-        team = hot_teams[level].hot_team; 
-#else 
-        team =  root->r.r_hot_team; 
-#endif 
-#if KMP_DEBUG 
-        if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", 
-                           team->t.t_task_team[0], team->t.t_task_team[1] )); 
-        } 
-#endif 
- 
-        // Has the number of threads changed? 
-        /* Let's assume the most common case is that the number of threads is unchanged, and 
-           put that case first. */ 
-        if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 
-            KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); 
-            // This case can mean that omp_set_num_threads() was called and the hot team size 
-            // was already reduced, so we check the special flag 
-            if ( team->t.t_size_changed == -1 ) { 
-                team->t.t_size_changed = 1; 
-            } else { 
-                team->t.t_size_changed = 0; 
-            } 
- 
-            // TODO???: team->t.t_max_active_levels = new_max_active_levels; 
-            team->t.t_sched =  new_icvs->sched; 
- 
-            __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); 
- 
-            KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 
-                           0, team->t.t_threads[0], team ) ); 
-            __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); 
- 
-#if OMP_40_ENABLED 
-# if KMP_AFFINITY_SUPPORTED 
-            if ( ( team->t.t_size_changed == 0 ) 
-              && ( team->t.t_proc_bind == new_proc_bind ) ) { 
-                KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n", 
-                  team->t.t_id, new_proc_bind, team->t.t_first_place, 
-                  team->t.t_last_place ) ); 
-            } 
-            else { 
-                team->t.t_proc_bind = new_proc_bind; 
-                __kmp_partition_places( team ); 
-            } 
-# else 
-            if ( team->t.t_proc_bind != new_proc_bind ) { 
-                team->t.t_proc_bind = new_proc_bind; 
-            } 
-# endif /* KMP_AFFINITY_SUPPORTED */ 
-#endif /* OMP_40_ENABLED */ 
-        } 
-        else if( team->t.t_nproc > new_nproc ) { 
-            KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); 
- 
-            team->t.t_size_changed = 1; 
-#if KMP_NESTED_HOT_TEAMS 
-            if( __kmp_hot_teams_mode == 0 ) { 
-                // AC: saved number of threads should correspond to team's value in this mode, 
-                // can be bigger in mode 1, when hot team has some threads in reserve 
-                KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 
-                hot_teams[level].hot_team_nth = new_nproc; 
-#endif // KMP_NESTED_HOT_TEAMS 
-                /* release the extra threads we don't need any more */ 
-                for( f = new_nproc  ;  f < team->t.t_nproc  ;  f++ ) { 
-                    KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); 
-                    if ( __kmp_tasking_mode != tskm_immediate_exec) { 
-                        // When decreasing team size, threads no longer in the team should unref task team. 
-                        team->t.t_threads[f]->th.th_task_team = NULL; 
-                    } 
-                    __kmp_free_thread( team->t.t_threads[ f ] ); 
-                    team->t.t_threads[ f ] = NULL; 
-                } 
-#if KMP_NESTED_HOT_TEAMS 
-            } // (__kmp_hot_teams_mode == 0) 
-#endif // KMP_NESTED_HOT_TEAMS 
-            team->t.t_nproc =  new_nproc; 
-            // TODO???: team->t.t_max_active_levels = new_max_active_levels; 
-            team->t.t_sched =  new_icvs->sched; 
-            __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); 
- 
-            /* update the remaining threads */ 
-            for(f = 0; f < new_nproc; ++f) { 
-                team->t.t_threads[f]->th.th_team_nproc = new_nproc; 
-            } 
-            // restore the current task state of the master thread: should be the implicit task 
-            KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 
-                       0, team->t.t_threads[0], team ) ); 
- 
-            __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); 
- 
-#ifdef KMP_DEBUG 
-            for ( f = 0; f < team->t.t_nproc; f++ ) { 
-                KMP_DEBUG_ASSERT( team->t.t_threads[f] && 
-                    team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 
-            } 
-#endif 
- 
-#if OMP_40_ENABLED 
-            team->t.t_proc_bind = new_proc_bind; 
-# if KMP_AFFINITY_SUPPORTED 
-            __kmp_partition_places( team ); 
-# endif 
-#endif 
-        } 
-        else { // team->t.t_nproc < new_nproc 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 
-            kmp_affin_mask_t *old_mask; 
-            if ( KMP_AFFINITY_CAPABLE() ) { 
-                KMP_CPU_ALLOC(old_mask); 
-            } 
-#endif 
- 
-            KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc )); 
- 
-            team->t.t_size_changed = 1; 
- 
- 
-#if KMP_NESTED_HOT_TEAMS 
-            int avail_threads = hot_teams[level].hot_team_nth; 
-            if( new_nproc < avail_threads ) 
-                avail_threads = new_nproc; 
-            kmp_info_t **other_threads = team->t.t_threads; 
-            for ( f = team->t.t_nproc; f < avail_threads; ++f ) { 
-                // Adjust barrier data of reserved threads (if any) of the team 
-                // Other data will be set in __kmp_initialize_info() below. 
-                int b; 
-                kmp_balign_t * balign = other_threads[f]->th.th_bar; 
-                for ( b = 0; b < bs_last_barrier; ++ b ) { 
-                    balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 
-                    KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 
-#if USE_DEBUGGER 
-                    balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 
-#endif 
-                } 
-            } 
-            if( hot_teams[level].hot_team_nth >= new_nproc ) { 
-                // we have all needed threads in reserve, no need to allocate any 
-                // this only possible in mode 1, cannot have reserved threads in mode 0 
-                KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 
-                team->t.t_nproc = new_nproc;                     // just get reserved threads involved 
-            } else { 
-                // we may have some threads in reserve, but not enough 
-                team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any 
-                hot_teams[level].hot_team_nth = new_nproc;       // adjust hot team max size 
-#endif // KMP_NESTED_HOT_TEAMS 
-            if(team->t.t_max_nproc < new_nproc) { 
-                /* reallocate larger arrays */ 
-                __kmp_reallocate_team_arrays(team, new_nproc); 
-                __kmp_reinitialize_team( team, new_icvs, NULL ); 
-            } 
- 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 
-            /* Temporarily set full mask for master thread before 
-               creation of workers. The reason is that workers inherit 
-               the affinity from master, so if a lot of workers are 
-               created on the single core quickly, they don't get 
-               a chance to set their own affinity for a long time. 
-            */ 
-            __kmp_set_thread_affinity_mask_full_tmp( old_mask ); 
-#endif 
- 
-            /* allocate new threads for the hot team */ 
-            for( f = team->t.t_nproc  ;  f < new_nproc  ;  f++ ) { 
-                kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f ); 
-                KMP_DEBUG_ASSERT( new_worker ); 
-                team->t.t_threads[ f ] = new_worker; 
- 
-                KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n", 
-                                team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f, 
-                                team->t.t_bar[bs_forkjoin_barrier].b_arrived, 
-                                team->t.t_bar[bs_plain_barrier].b_arrived ) ); 
- 
-                { // Initialize barrier data for new threads. 
-                    int b; 
-                    kmp_balign_t * balign = new_worker->th.th_bar; 
-                    for( b = 0; b < bs_last_barrier; ++ b ) { 
-                        balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived; 
-                        KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 
-#if USE_DEBUGGER 
-                        balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 
-#endif 
-                    } 
-                } 
-            } 
- 
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 
-            if ( KMP_AFFINITY_CAPABLE() ) { 
-                /* Restore initial master thread's affinity mask */ 
-                __kmp_set_system_affinity( old_mask, TRUE ); 
-                KMP_CPU_FREE(old_mask); 
-            } 
-#endif 
-#if KMP_NESTED_HOT_TEAMS 
-            } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 
-#endif // KMP_NESTED_HOT_TEAMS 
-            /* make sure everyone is syncronized */ 
-            int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below 
-            __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); 
- 
-            /* reinitialize the threads */ 
-            KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 
-            for (f=0;  f < team->t.t_nproc; ++f) 
-                __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); 
-            if (level) { // set th_task_state for new threads in nested hot team 
-                // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the 
-                // th_task_state for the new threads. th_task_state for master thread will not be accurate until 
-                // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. 
-                for (f=old_nproc; f < team->t.t_nproc; ++f) 
-                    team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 
-            } 
-            else { // set th_task_state for new threads in non-nested hot team 
-                int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state 
-                for (f=old_nproc; f < team->t.t_nproc; ++f) 
-                    team->t.t_threads[f]->th.th_task_state = old_state; 
-            } 
- 
-#ifdef KMP_DEBUG 
-            for ( f = 0; f < team->t.t_nproc; ++ f ) { 
-                KMP_DEBUG_ASSERT( team->t.t_threads[f] && 
-                    team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 
-            } 
-#endif 
- 
-#if OMP_40_ENABLED 
-            team->t.t_proc_bind = new_proc_bind; 
-# if KMP_AFFINITY_SUPPORTED 
-            __kmp_partition_places( team ); 
-# endif 
-#endif 
-        } // Check changes in number of threads 
- 
-#if OMP_40_ENABLED 
-        kmp_info_t *master = team->t.t_threads[0]; 
-        if( master->th.th_teams_microtask ) { 
-            for( f = 1; f < new_nproc; ++f ) { 
-                // propagate teams construct specific info to workers 
-                kmp_info_t *thr = team->t.t_threads[f]; 
-                thr->th.th_teams_microtask = master->th.th_teams_microtask; 
-                thr->th.th_teams_level     = master->th.th_teams_level; 
-                thr->th.th_teams_size      = master->th.th_teams_size; 
-            } 
-        } 
-#endif /* OMP_40_ENABLED */ 
-#if KMP_NESTED_HOT_TEAMS 
-        if( level ) { 
-            // Sync barrier state for nested hot teams, not needed for outermost hot team. 
-            for( f = 1; f < new_nproc; ++f ) { 
-                kmp_info_t *thr = team->t.t_threads[f]; 
-                int b; 
-                kmp_balign_t * balign = thr->th.th_bar; 
-                for( b = 0; b < bs_last_barrier; ++ b ) { 
-                    balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived; 
-                    KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 
-#if USE_DEBUGGER 
-                    balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 
-#endif 
-                } 
-            } 
-        } 
-#endif // KMP_NESTED_HOT_TEAMS 
- 
-        /* reallocate space for arguments if necessary */ 
-        __kmp_alloc_argv_entries( argc, team, TRUE ); 
-        team->t.t_argc     = argc; 
-        // 
-        // The hot team re-uses the previous task team, 
-        // if untouched during the previous release->gather phase. 
-        // 
- 
-        KF_TRACE( 10, ( " hot_team = %p\n", team ) ); 
- 
-#if KMP_DEBUG 
-        if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", 
-                           team->t.t_task_team[0], team->t.t_task_team[1] )); 
-        } 
-#endif 
- 
-#if OMPT_SUPPORT 
-        __ompt_team_assign_id(team, ompt_parallel_id); 
-#endif 
- 
-        KMP_MB(); 
- 
-        return team; 
-    } 
- 
-    /* next, let's try to take one from the team pool */ 
-    KMP_MB(); 
-    for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; ) 
-    { 
-        /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */ 
-        if ( team->t.t_max_nproc >= max_nproc ) { 
-            /* take this team from the team pool */ 
-            __kmp_team_pool = team->t.t_next_pool; 
- 
-            /* setup the team for fresh use */ 
-            __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); 
- 
-            KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", 
-                            &team->t.t_task_team[0], &team->t.t_task_team[1]) ); 
-            team->t.t_task_team[0] = NULL; 
-            team->t.t_task_team[1] = NULL; 
- 
-            /* reallocate space for arguments if necessary */ 
-            __kmp_alloc_argv_entries( argc, team, TRUE ); 
-            team->t.t_argc     = argc; 
- 
-            KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 
-                            team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 
-            { // Initialize barrier data. 
-                int b; 
-                for ( b = 0; b < bs_last_barrier; ++ b) { 
-                    team->t.t_bar[ b ].b_arrived        = KMP_INIT_BARRIER_STATE; 
-#if USE_DEBUGGER 
-                    team->t.t_bar[ b ].b_master_arrived = 0; 
-                    team->t.t_bar[ b ].b_team_arrived   = 0; 
-#endif 
-                } 
-            } 
- 
-#if OMP_40_ENABLED 
-            team->t.t_proc_bind = new_proc_bind; 
-#endif 
- 
-            KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id )); 
- 
-#if OMPT_SUPPORT 
-            __ompt_team_assign_id(team, ompt_parallel_id); 
-#endif 
- 
-            KMP_MB(); 
- 
-            return team; 
-        } 
- 
-        /* reap team if it is too small, then loop back and check the next one */ 
-        /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */ 
-        /* TODO: Use technique to find the right size hot-team, don't reap them */ 
-        team =  __kmp_reap_team( team ); 
-        __kmp_team_pool = team; 
-    } 
- 
-    /* nothing available in the pool, no matter, make a new team! */ 
-    KMP_MB(); 
-    team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) ); 
- 
-    /* and set it up */ 
-    team->t.t_max_nproc   = max_nproc; 
-    /* NOTE well, for some reason allocating one big buffer and dividing it 
-     * up seems to really hurt performance a lot on the P4, so, let's not use 
-     * this... */ 
-    __kmp_allocate_team_arrays( team, max_nproc ); 
- 
-    KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); 
-    __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); 
- 
-    KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", 
-                    &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); 
-    team->t.t_task_team[0] = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate 
-    team->t.t_task_team[1] = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate 
- 
-    if ( __kmp_storage_map ) { 
-        __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); 
-    } 
- 
-    /* allocate space for arguments */ 
-    __kmp_alloc_argv_entries( argc, team, FALSE ); 
-    team->t.t_argc        = argc; 
- 
-    KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 
-                    team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 
-    { // Initialize barrier data. 
-        int b; 
-        for ( b = 0; b < bs_last_barrier; ++ b ) { 
-            team->t.t_bar[ b ].b_arrived        = KMP_INIT_BARRIER_STATE; 
-#if USE_DEBUGGER 
-            team->t.t_bar[ b ].b_master_arrived = 0; 
-            team->t.t_bar[ b ].b_team_arrived   = 0; 
-#endif 
-        } 
-    } 
- 
-#if OMP_40_ENABLED 
-    team->t.t_proc_bind = new_proc_bind; 
-#endif 
- 
-#if OMPT_SUPPORT 
-    __ompt_team_assign_id(team, ompt_parallel_id); 
-    team->t.ompt_serialized_team_info = NULL; 
-#endif 
- 
-    KMP_MB(); 
- 
-    KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id )); 
- 
-    return team; 
-} 
- 
-/* TODO implement hot-teams at all levels */ 
-/* TODO implement lazy thread release on demand (disband request) */ 
- 
-/* free the team.  return it to the team pool.  release all the threads 
- * associated with it */ 
-void 
-__kmp_free_team( kmp_root_t *root, kmp_team_t *team  USE_NESTED_HOT_ARG(kmp_info_t *master) ) 
-{ 
-    int f; 
-    KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id )); 
- 
-    /* verify state */ 
-    KMP_DEBUG_ASSERT( root ); 
-    KMP_DEBUG_ASSERT( team ); 
-    KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc ); 
-    KMP_DEBUG_ASSERT( team->t.t_threads ); 
- 
-    int use_hot_team = team == root->r.r_hot_team; 
-#if KMP_NESTED_HOT_TEAMS 
-    int level; 
-    kmp_hot_team_ptr_t *hot_teams; 
-    if( master ) { 
-        level = team->t.t_active_level - 1; 
-        if( master->th.th_teams_microtask ) {                         // in teams construct? 
-            if( master->th.th_teams_size.nteams > 1 ) { 
-               ++level; // level was not increased in teams construct for team_of_masters 
-            } 
-            if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && 
-                master->th.th_teams_level == team->t.t_level ) { 
-                ++level; // level was not increased in teams construct for team_of_workers before the parallel 
-            }            // team->t.t_level will be increased inside parallel 
-        } 
-        hot_teams = master->th.th_hot_teams; 
-        if( level < __kmp_hot_teams_max_level ) { 
-            KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team ); 
-            use_hot_team = 1; 
-        } 
-    } 
-#endif // KMP_NESTED_HOT_TEAMS 
- 
-    /* team is done working */ 
-    TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library. 
-    team->t.t_copyin_counter = 0; // init counter for possible reuse 
-    // Do not reset pointer to parent team to NULL for hot teams. 
- 
-    /* if we are non-hot team, release our threads */ 
-    if( ! use_hot_team ) { 
-        if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-            // Delete task teams 
-            int tt_idx; 
-            for (tt_idx=0; tt_idx<2; ++tt_idx) { 
-                kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 
-                if ( task_team != NULL ) { 
-                    for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams 
-                        team->t.t_threads[f]->th.th_task_team = NULL; 
-                    } 
-                    KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); 
-#if KMP_NESTED_HOT_TEAMS 
-                    __kmp_free_task_team( master, task_team ); 
-#endif 
-                    team->t.t_task_team[tt_idx] = NULL; 
-                } 
-            } 
-        } 
- 
-        // Reset pointer to parent team only for non-hot teams. 
-        team->t.t_parent = NULL; 
- 
- 
-        /* free the worker threads */ 
-        for ( f = 1; f < team->t.t_nproc; ++ f ) { 
-            KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); 
-            __kmp_free_thread( team->t.t_threads[ f ] ); 
-            team->t.t_threads[ f ] = NULL; 
-        } 
- 
- 
-        /* put the team back in the team pool */ 
-        /* TODO limit size of team pool, call reap_team if pool too large */ 
-        team->t.t_next_pool  = (kmp_team_t*) __kmp_team_pool; 
-        __kmp_team_pool        = (volatile kmp_team_t*) team; 
-    } 
- 
-    KMP_MB(); 
-} 
- 
- 
-/* reap the team.  destroy it, reclaim all its resources and free its memory */ 
-kmp_team_t * 
-__kmp_reap_team( kmp_team_t *team ) 
-{ 
-    kmp_team_t *next_pool = team->t.t_next_pool; 
- 
-    KMP_DEBUG_ASSERT( team ); 
-    KMP_DEBUG_ASSERT( team->t.t_dispatch    ); 
-    KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); 
-    KMP_DEBUG_ASSERT( team->t.t_threads     ); 
-    KMP_DEBUG_ASSERT( team->t.t_argv        ); 
- 
-    /* TODO clean the threads that are a part of this? */ 
- 
-    /* free stuff */ 
- 
-    __kmp_free_team_arrays( team ); 
-    if ( team->t.t_argv != &team->t.t_inline_argv[0] ) 
-        __kmp_free( (void*) team->t.t_argv ); 
-    __kmp_free( team ); 
- 
-    KMP_MB(); 
-    return next_pool; 
-} 
- 
-// 
-// Free the thread.  Don't reap it, just place it on the pool of available 
-// threads. 
-// 
-// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 
-// binding for the affinity mechanism to be useful. 
-// 
-// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 
-// However, we want to avoid a potential performance problem by always 
-// scanning through the list to find the correct point at which to insert 
-// the thread (potential N**2 behavior).  To do this we keep track of the 
-// last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 
-// With single-level parallelism, threads will always be added to the tail 
-// of the list, kept track of by __kmp_thread_pool_insert_pt.  With nested 
-// parallelism, all bets are off and we may need to scan through the entire 
-// free list. 
-// 
-// This change also has a potentially large performance benefit, for some 
-// applications.  Previously, as threads were freed from the hot team, they 
-// would be placed back on the free list in inverse order.  If the hot team 
-// grew back to it's original size, then the freed thread would be placed 
-// back on the hot team in reverse order.  This could cause bad cache 
-// locality problems on programs where the size of the hot team regularly 
-// grew and shrunk. 
-// 
-// Now, for single-level parallelism, the OMP tid is alway == gtid. 
-// 
-void 
-__kmp_free_thread( kmp_info_t *this_th ) 
-{ 
-    int gtid; 
-    kmp_info_t **scan; 
- 
-    KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 
-                __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid )); 
- 
-    KMP_DEBUG_ASSERT( this_th ); 
- 
-    // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team). 
-    int b; 
-    kmp_balign_t *balign = this_th->th.th_bar; 
-    for (b=0; b<bs_last_barrier; ++b) { 
-        if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 
-            balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 
-        balign[b].bb.team = NULL; 
-    } 
-    this_th->th.th_task_state = 0; 
- 
- 
-    /* put thread back on the free pool */ 
-    TCW_PTR(this_th->th.th_team, NULL); 
-    TCW_PTR(this_th->th.th_root, NULL); 
-    TCW_PTR(this_th->th.th_dispatch, NULL);               /* NOT NEEDED */ 
- 
-    // 
-    // If the __kmp_thread_pool_insert_pt is already past the new insert 
-    // point, then we need to re-scan the entire list. 
-    // 
-    gtid = this_th->th.th_info.ds.ds_gtid; 
-    if ( __kmp_thread_pool_insert_pt != NULL ) { 
-        KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL ); 
-        if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) { 
-             __kmp_thread_pool_insert_pt = NULL; 
-        } 
-    } 
- 
-    // 
-    // Scan down the list to find the place to insert the thread. 
-    // scan is the address of a link in the list, possibly the address of 
-    // __kmp_thread_pool itself. 
-    // 
-    // In the absence of nested parallism, the for loop will have 0 iterations. 
-    // 
-    if ( __kmp_thread_pool_insert_pt != NULL ) { 
-        scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool ); 
-    } 
-    else { 
-        scan = (kmp_info_t **)&__kmp_thread_pool; 
-    } 
-    for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid ); 
-      scan = &( (*scan)->th.th_next_pool ) ); 
- 
-    // 
-    // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 
-    // to its address. 
-    // 
-    TCW_PTR(this_th->th.th_next_pool, *scan); 
-    __kmp_thread_pool_insert_pt = *scan = this_th; 
-    KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL ) 
-      || ( this_th->th.th_info.ds.ds_gtid 
-      < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) ); 
-    TCW_4(this_th->th.th_in_pool, TRUE); 
-    __kmp_thread_pool_nth++; 
- 
-    TCW_4(__kmp_nth, __kmp_nth - 1); 
- 
-#ifdef KMP_ADJUST_BLOCKTIME 
-    /* Adjust blocktime back to user setting or default if necessary */ 
-    /* Middle initialization might never have occurred                */ 
-    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 
-        KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 
-        if ( __kmp_nth <= __kmp_avail_proc ) { 
-            __kmp_zero_bt = FALSE; 
-        } 
-    } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-    KMP_MB(); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void * 
-__kmp_launch_thread( kmp_info_t *this_thr ) 
-{ 
-    int                   gtid = this_thr->th.th_info.ds.ds_gtid; 
-/*    void                 *stack_data;*/ 
-    kmp_team_t *(*volatile pteam); 
- 
-    KMP_MB(); 
-    KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) ); 
- 
-    if( __kmp_env_consistency_check ) { 
-        this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );  // ATT: Memory leak? 
-    } 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        this_thr->th.ompt_thread_info.state = ompt_state_overhead; 
-        this_thr->th.ompt_thread_info.wait_id = 0; 
-        this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); 
-        if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { 
-            __ompt_thread_begin(ompt_thread_worker, gtid); 
-        } 
-    } 
-#endif 
- 
-    /* This is the place where threads wait for work */ 
-    while( ! TCR_4(__kmp_global.g.g_done) ) { 
-        KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] ); 
-        KMP_MB(); 
- 
-        /* wait for work to do */ 
-        KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid )); 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-            this_thr->th.ompt_thread_info.state = ompt_state_idle; 
-        } 
-#endif 
- 
-        /* No tid yet since not part of a team */ 
-        __kmp_fork_barrier( gtid, KMP_GTID_DNE ); 
- 
-#if OMPT_SUPPORT 
-        if (ompt_enabled) { 
-            this_thr->th.ompt_thread_info.state = ompt_state_overhead; 
-        } 
-#endif 
- 
-        pteam = (kmp_team_t *(*))(& this_thr->th.th_team); 
- 
-        /* have we been allocated? */ 
-        if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) { 
-            /* we were just woken up, so run our new task */ 
-            if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) { 
-                int rc; 
-                KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 
-                              gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); 
- 
-                updateHWFPControl (*pteam); 
- 
-#if OMPT_SUPPORT 
-                if (ompt_enabled) { 
-                    this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 
-                    // Initialize OMPT task id for implicit task. 
-                    int tid = __kmp_tid_from_gtid(gtid); 
-                    (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id = 
-                    __ompt_task_id_new(tid); 
-                } 
-#endif 
- 
-                KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); 
-                { 
-                    KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke); 
-                    rc = (*pteam)->t.t_invoke( gtid ); 
-                } 
-                KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); 
-                KMP_ASSERT( rc ); 
- 
-#if OMPT_SUPPORT 
-                if (ompt_enabled) { 
-                    /* no frame set while outside task */ 
-                    int tid = __kmp_tid_from_gtid(gtid); 
-                    (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0; 
- 
-                    this_thr->th.ompt_thread_info.state = ompt_state_overhead; 
-                } 
-#endif 
-                KMP_MB(); 
-                KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 
-                              gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); 
-            } 
-            /* join barrier after parallel region */ 
-            __kmp_join_barrier( gtid ); 
-        } 
-    } 
-    TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 
-        __ompt_thread_end(ompt_thread_worker, gtid); 
-    } 
-#endif 
- 
-    this_thr->th.th_task_team = NULL; 
-    /* run the destructors for the threadprivate data for this thread */ 
-    __kmp_common_destroy_gtid( gtid ); 
- 
-    KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) ); 
-    KMP_MB(); 
-    return this_thr; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_internal_end_dest( void *specific_gtid ) 
-{ 
-    #if KMP_COMPILER_ICC 
-        #pragma warning( push ) 
-        #pragma warning( disable:  810 ) // conversion from "void *" to "int" may lose significant bits 
-    #endif 
-    // Make sure no significant bits are lost 
-    int gtid = (kmp_intptr_t)specific_gtid - 1; 
-    #if KMP_COMPILER_ICC 
-        #pragma warning( pop ) 
-    #endif 
- 
-    KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 
-    /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 
-     * this is because 0 is reserved for the nothing-stored case */ 
- 
-    /* josh: One reason for setting the gtid specific data even when it is being 
-       destroyed by pthread is to allow gtid lookup through thread specific data 
-       (__kmp_gtid_get_specific).  Some of the code, especially stat code, 
-       that gets executed in the call to __kmp_internal_end_thread, actually 
-       gets the gtid through the thread specific data.  Setting it here seems 
-       rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread 
-       to run smoothly. 
-       todo: get rid of this after we remove the dependence on 
-       __kmp_gtid_get_specific 
-    */ 
-    if(gtid >= 0 && KMP_UBER_GTID(gtid)) 
-        __kmp_gtid_set_specific( gtid ); 
-    #ifdef KMP_TDATA_GTID 
-        __kmp_gtid = gtid; 
-    #endif 
-    __kmp_internal_end_thread( gtid ); 
-} 
- 
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB 
- 
-// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work 
-// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker 
-// option in makefile.mk works fine. 
- 
-__attribute__(( destructor )) 
-void 
-__kmp_internal_end_dtor( void ) 
-{ 
-    __kmp_internal_end_atexit(); 
-} 
- 
-void 
-__kmp_internal_end_fini( void ) 
-{ 
-    __kmp_internal_end_atexit(); 
-} 
- 
-#endif 
- 
-/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */ 
-void 
-__kmp_internal_end_atexit( void ) 
-{ 
-    KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) ); 
-    /* [Windows] 
-       josh: ideally, we want to completely shutdown the library in this atexit handler, but 
-       stat code that depends on thread specific data for gtid fails because that data becomes 
-       unavailable at some point during the shutdown, so we call __kmp_internal_end_thread 
-       instead.  We should eventually remove the dependency on __kmp_get_specific_gtid in the 
-       stat code and use __kmp_internal_end_library to cleanly shutdown the library. 
- 
-// TODO: Can some of this comment about GVS be removed? 
-       I suspect that the offending stat code is executed when the calling thread tries to 
-       clean up a dead root thread's data structures, resulting in GVS code trying to close 
-       the GVS structures for that thread, but since the stat code uses 
-       __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is 
-       cleaning up itself instead of another thread, it gets confused.  This happens because 
-       allowing a thread to unregister and cleanup another thread is a recent modification for 
-       addressing an issue with Maxon Cinema4D.  Based on the current design (20050722), a 
-       thread may end up trying to unregister another thread only if thread death does not 
-       trigger the calling of __kmp_internal_end_thread.  For Linux* OS, there is the thread 
-       specific data destructor function to detect thread death.  For Windows dynamic, there 
-       is DllMain(THREAD_DETACH).  For Windows static, there is nothing.  Thus, the 
-       workaround is applicable only for Windows static stat library. 
-    */ 
-    __kmp_internal_end_library( -1 ); 
-    #if KMP_OS_WINDOWS 
-        __kmp_close_console(); 
-    #endif 
-} 
- 
-static void 
-__kmp_reap_thread( 
-    kmp_info_t * thread, 
-    int is_root 
-) { 
- 
-    // It is assumed __kmp_forkjoin_lock is acquired. 
- 
-    int gtid; 
- 
-    KMP_DEBUG_ASSERT( thread != NULL ); 
- 
-    gtid = thread->th.th_info.ds.ds_gtid; 
- 
-    if ( ! is_root ) { 
- 
-        if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { 
-            /* Assume the threads are at the fork barrier here */ 
-            KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) ); 
-            /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */ 
-            kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread); 
-            __kmp_release_64(&flag); 
-        }; // if 
- 
- 
-        // Terminate OS thread. 
-        __kmp_reap_worker( thread ); 
- 
-        // 
-        // The thread was killed asynchronously.  If it was actively 
-        // spinning in the in the thread pool, decrement the global count. 
-        // 
-        // There is a small timing hole here - if the worker thread was 
-        // just waking up after sleeping in the pool, had reset it's 
-        // th_active_in_pool flag but not decremented the global counter 
-        // __kmp_thread_pool_active_nth yet, then the global counter 
-        // might not get updated. 
-        // 
-        // Currently, this can only happen as the library is unloaded, 
-        // so there are no harmful side effects. 
-        // 
-        if ( thread->th.th_active_in_pool ) { 
-            thread->th.th_active_in_pool = FALSE; 
-            KMP_TEST_THEN_DEC32( 
-              (kmp_int32 *) &__kmp_thread_pool_active_nth ); 
-            KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); 
-        } 
- 
-        // Decrement # of [worker] threads in the pool. 
-        KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 ); 
-        --__kmp_thread_pool_nth; 
-    }; // if 
- 
-    // Free the fast memory for tasking 
-    #if USE_FAST_MEMORY 
-        __kmp_free_fast_memory( thread ); 
-    #endif /* USE_FAST_MEMORY */ 
- 
-    __kmp_suspend_uninitialize_thread( thread ); 
- 
-    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread ); 
-    TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 
- 
-    -- __kmp_all_nth; 
-    // __kmp_nth was decremented when thread is added to the pool. 
- 
-#ifdef KMP_ADJUST_BLOCKTIME 
-    /* Adjust blocktime back to user setting or default if necessary */ 
-    /* Middle initialization might never have occurred                */ 
-    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 
-        KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 
-        if ( __kmp_nth <= __kmp_avail_proc ) { 
-            __kmp_zero_bt = FALSE; 
-        } 
-    } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-    /* free the memory being used */ 
-    if( __kmp_env_consistency_check ) { 
-        if ( thread->th.th_cons ) { 
-            __kmp_free_cons_stack( thread->th.th_cons ); 
-            thread->th.th_cons = NULL; 
-        }; // if 
-    } 
- 
-    if ( thread->th.th_pri_common != NULL ) { 
-        __kmp_free( thread->th.th_pri_common ); 
-        thread->th.th_pri_common = NULL; 
-    }; // if 
- 
-    if (thread->th.th_task_state_memo_stack != NULL) { 
-        __kmp_free(thread->th.th_task_state_memo_stack); 
-        thread->th.th_task_state_memo_stack = NULL; 
-    } 
- 
-    #if KMP_USE_BGET 
-        if ( thread->th.th_local.bget_data != NULL ) { 
-            __kmp_finalize_bget( thread ); 
-        }; // if 
-    #endif 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    if ( thread->th.th_affin_mask != NULL ) { 
-        KMP_CPU_FREE( thread->th.th_affin_mask ); 
-        thread->th.th_affin_mask = NULL; 
-    }; // if 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-    __kmp_reap_team( thread->th.th_serial_team ); 
-    thread->th.th_serial_team = NULL; 
-    __kmp_free( thread ); 
- 
-    KMP_MB(); 
- 
-} // __kmp_reap_thread 
- 
-static void 
-__kmp_internal_end(void) 
-{ 
-    int i; 
- 
-    /* First, unregister the library */ 
-    __kmp_unregister_library(); 
- 
-    #if KMP_OS_WINDOWS 
-        /* In Win static library, we can't tell when a root actually dies, so we 
-           reclaim the data structures for any root threads that have died but not 
-           unregistered themselves, in order to shut down cleanly. 
-           In Win dynamic library we also can't tell when a thread dies. 
-        */ 
-        __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots 
-    #endif 
- 
-    for( i=0 ; i<__kmp_threads_capacity ; i++ ) 
-        if( __kmp_root[i] ) 
-            if( __kmp_root[i]->r.r_active ) 
-                break; 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 
- 
-    if ( i < __kmp_threads_capacity ) { 
-        // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        // 
-        // Need to check that monitor was initialized before reaping it. 
-        // If we are called form __kmp_atfork_child (which sets 
-        // __kmp_init_parallel = 0), then __kmp_monitor will appear to 
-        // contain valid data, but it is only valid in the parent process, 
-        // not the child. 
-        // 
-        // New behavior (201008): instead of keying off of the flag 
-        // __kmp_init_parallel, the monitor thread creation is keyed off 
-        // of the new flag __kmp_init_monitor. 
-        // 
-        __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 
-        if ( TCR_4( __kmp_init_monitor ) ) { 
-            __kmp_reap_monitor( & __kmp_monitor ); 
-            TCW_4( __kmp_init_monitor, 0 ); 
-        } 
-        __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 
-        KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); 
-    } else { 
-        /* TODO move this to cleanup code */ 
-        #ifdef KMP_DEBUG 
-            /* make sure that everything has properly ended */ 
-            for ( i = 0; i < __kmp_threads_capacity; i++ ) { 
-                if( __kmp_root[i] ) { 
-//                    KMP_ASSERT( ! KMP_UBER_GTID( i ) );         // AC: there can be uber threads alive here 
-                    KMP_ASSERT( ! __kmp_root[i]->r.r_active );  // TODO: can they be active? 
-                } 
-            } 
-        #endif 
- 
-        KMP_MB(); 
- 
-        // Reap the worker threads. 
-        // This is valid for now, but be careful if threads are reaped sooner. 
-        while ( __kmp_thread_pool != NULL ) {    // Loop thru all the thread in the pool. 
-            // Get the next thread from the pool. 
-            kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool; 
-            __kmp_thread_pool = thread->th.th_next_pool; 
-            // Reap it. 
-            thread->th.th_next_pool = NULL; 
-            thread->th.th_in_pool = FALSE; 
-            __kmp_reap_thread( thread, 0 ); 
-        }; // while 
-        __kmp_thread_pool_insert_pt = NULL; 
- 
-        // Reap teams. 
-        while ( __kmp_team_pool != NULL ) {     // Loop thru all the teams in the pool. 
-            // Get the next team from the pool. 
-            kmp_team_t * team = (kmp_team_t *) __kmp_team_pool; 
-            __kmp_team_pool = team->t.t_next_pool; 
-            // Reap it. 
-            team->t.t_next_pool = NULL; 
-            __kmp_reap_team( team ); 
-        }; // while 
- 
-        __kmp_reap_task_teams( ); 
- 
-        for ( i = 0; i < __kmp_threads_capacity; ++ i ) { 
-            // TBD: Add some checking... 
-            // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 
-        } 
- 
-        /* Make sure all threadprivate destructors get run by joining with all worker 
-           threads before resetting this flag */ 
-        TCW_SYNC_4(__kmp_init_common, FALSE); 
- 
-        KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) ); 
-        KMP_MB(); 
- 
-        // 
-        // See note above: One of the possible fixes for CQ138434 / CQ140126 
-        // 
-        // FIXME: push both code fragments down and CSE them? 
-        // push them into __kmp_cleanup() ? 
-        // 
-        __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 
-        if ( TCR_4( __kmp_init_monitor ) ) { 
-            __kmp_reap_monitor( & __kmp_monitor ); 
-            TCW_4( __kmp_init_monitor, 0 ); 
-        } 
-        __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 
-        KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); 
- 
-    } /* else !__kmp_global.t_active */ 
-    TCW_4(__kmp_init_gtid, FALSE); 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
- 
-    __kmp_cleanup(); 
-#if OMPT_SUPPORT 
-    ompt_fini(); 
-#endif 
-} 
- 
-void 
-__kmp_internal_end_library( int gtid_req ) 
-{ 
-    /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 
-    /* this shouldn't be a race condition because __kmp_internal_end() is the 
-     * only place to clear __kmp_serial_init */ 
-    /* we'll check this later too, after we get the lock */ 
-    // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant, 
-    // because the next check will work in any case. 
-    if( __kmp_global.g.g_abort ) { 
-        KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" )); 
-        /* TODO abort? */ 
-        return; 
-    } 
-    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 
-        KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" )); 
-        return; 
-    } 
- 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    /* find out who we are and what we should do */ 
-    { 
-        int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); 
-        KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d  (%d)\n", gtid, gtid_req )); 
-        if( gtid == KMP_GTID_SHUTDOWN ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" )); 
-            return; 
-        } else if( gtid == KMP_GTID_MONITOR ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" )); 
-            return; 
-        } else if( gtid == KMP_GTID_DNE ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" )); 
-            /* we don't know who we are, but we may still shutdown the library */ 
-        } else if( KMP_UBER_GTID( gtid )) { 
-            /* unregister ourselves as an uber thread.  gtid is no longer valid */ 
-            if( __kmp_root[gtid]->r.r_active ) { 
-                __kmp_global.g.g_abort = -1; 
-                TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 
-                KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid )); 
-                return; 
-            } else { 
-                KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid )); 
-                __kmp_unregister_root_current_thread( gtid ); 
-            } 
-        } else { 
-            /* worker threads may call this function through the atexit handler, if they call exit() */ 
-            /* For now, skip the usual subsequent processing and just dump the debug buffer. 
-               TODO: do a thorough shutdown instead 
-            */ 
-            #ifdef DUMP_DEBUG_ON_EXIT 
-                if ( __kmp_debug_buf ) 
-                    __kmp_dump_debug_buffer( ); 
-            #endif 
-            return; 
-        } 
-    } 
-    /* synchronize the termination process */ 
-    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
- 
-    /* have we already finished */ 
-    if( __kmp_global.g.g_abort ) { 
-        KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" )); 
-        /* TODO abort? */ 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        return; 
-    } 
-    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        return; 
-    } 
- 
-    /* We need this lock to enforce mutex between this reading of 
-       __kmp_threads_capacity and the writing by __kmp_register_root. 
-       Alternatively, we can use a counter of roots that is 
-       atomically updated by __kmp_get_global_thread_id_reg, 
-       __kmp_do_serial_initialize and __kmp_internal_end_*. 
-    */ 
-    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-    /* now we can safely conduct the actual termination */ 
-    __kmp_internal_end(); 
- 
-    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-    __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
- 
-    KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) ); 
- 
-    #ifdef DUMP_DEBUG_ON_EXIT 
-        if ( __kmp_debug_buf ) 
-            __kmp_dump_debug_buffer(); 
-    #endif 
- 
-    #if KMP_OS_WINDOWS 
-        __kmp_close_console(); 
-    #endif 
- 
-    __kmp_fini_allocator(); 
- 
-} // __kmp_internal_end_library 
- 
-void 
-__kmp_internal_end_thread( int gtid_req ) 
-{ 
-    int i; 
- 
-    /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 
-    /* this shouldn't be a race condition because __kmp_internal_end() is the 
-     * only place to clear __kmp_serial_init */ 
-    /* we'll check this later too, after we get the lock */ 
-    // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant, 
-    // because the next check will work in any case. 
-    if( __kmp_global.g.g_abort ) { 
-        KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" )); 
-        /* TODO abort? */ 
-        return; 
-    } 
-    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 
-        KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" )); 
-        return; 
-    } 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    /* find out who we are and what we should do */ 
-    { 
-        int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); 
-        KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d  (%d)\n", gtid, gtid_req )); 
-        if( gtid == KMP_GTID_SHUTDOWN ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" )); 
-            return; 
-        } else if( gtid == KMP_GTID_MONITOR ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" )); 
-            return; 
-        } else if( gtid == KMP_GTID_DNE ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" )); 
-            return; 
-            /* we don't know who we are */ 
-        } else if( KMP_UBER_GTID( gtid )) { 
-        /* unregister ourselves as an uber thread.  gtid is no longer valid */ 
-            if( __kmp_root[gtid]->r.r_active ) { 
-                __kmp_global.g.g_abort = -1; 
-                TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 
-                KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid )); 
-                return; 
-            } else { 
-                KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid )); 
-                __kmp_unregister_root_current_thread( gtid ); 
-            } 
-        } else { 
-            /* just a worker thread, let's leave */ 
-            KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); 
- 
-            if ( gtid >= 0 ) { 
-                __kmp_threads[gtid]->th.th_task_team = NULL; 
-            } 
- 
-            KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); 
-            return; 
-        } 
-    } 
-    #if defined KMP_DYNAMIC_LIB 
-    // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread, 
-    //     because we will better shutdown later in the library destructor. 
-    //     The reason of this change is performance problem when non-openmp thread 
-    //     in a loop forks and joins many openmp threads. We can save a lot of time 
-    //     keeping worker threads alive until the program shutdown. 
-    // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and 
-    //     Windows(DPD200287443) that occurs when using critical sections from foreign threads. 
-        KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) ); 
-        return; 
-    #endif 
-    /* synchronize the termination process */ 
-    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
- 
-    /* have we already finished */ 
-    if( __kmp_global.g.g_abort ) { 
-        KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" )); 
-        /* TODO abort? */ 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        return; 
-    } 
-    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        return; 
-    } 
- 
-    /* We need this lock to enforce mutex between this reading of 
-       __kmp_threads_capacity and the writing by __kmp_register_root. 
-       Alternatively, we can use a counter of roots that is 
-       atomically updated by __kmp_get_global_thread_id_reg, 
-       __kmp_do_serial_initialize and __kmp_internal_end_*. 
-    */ 
- 
-    /* should we finish the run-time?  are all siblings done? */ 
-    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 
- 
-    for ( i = 0; i < __kmp_threads_capacity; ++ i ) { 
-        if ( KMP_UBER_GTID( i ) ) { 
-            KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i )); 
-            __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-            __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-            return; 
-        }; 
-    } 
- 
-    /* now we can safely conduct the actual termination */ 
- 
-    __kmp_internal_end(); 
- 
-    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 
-    __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
- 
-    KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) ); 
- 
-    #ifdef DUMP_DEBUG_ON_EXIT 
-        if ( __kmp_debug_buf ) 
-            __kmp_dump_debug_buffer(); 
-    #endif 
-} // __kmp_internal_end_thread 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Library registration stuff. 
- 
-static long   __kmp_registration_flag = 0; 
-    // Random value used to indicate library initialization. 
-static char * __kmp_registration_str  = NULL; 
-    // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 
- 
- 
-static inline 
-char * 
-__kmp_reg_status_name() { 
-    /* 
-        On RHEL 3u5 if linked statically, getpid() returns different values in each thread. 
-        If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case), 
-        the name of registered_lib_env env var can not be found, because the name will contain different pid. 
-    */ 
-    return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() ); 
-} // __kmp_reg_status_get 
- 
- 
-void 
-__kmp_register_library_startup( 
-    void 
-) { 
- 
-    char * name   = __kmp_reg_status_name();  // Name of the environment variable. 
-    int    done   = 0; 
-    union { 
-        double dtime; 
-        long   ltime; 
-    } time; 
-    #if KMP_OS_WINDOWS 
-        __kmp_initialize_system_tick(); 
-    #endif 
-    __kmp_read_system_time( & time.dtime ); 
-    __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL ); 
-    __kmp_registration_str = 
-        __kmp_str_format( 
-            "%p-%lx-%s", 
-            & __kmp_registration_flag, 
-            __kmp_registration_flag, 
-            KMP_LIBRARY_FILE 
-        ); 
- 
-    KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) ); 
- 
-    while ( ! done ) { 
- 
-        char * value  = NULL; // Actual value of the environment variable. 
- 
-        // Set environment variable, but do not overwrite if it is exist. 
-        __kmp_env_set( name, __kmp_registration_str, 0 ); 
-        // Check the variable is written. 
-        value = __kmp_env_get( name ); 
-        if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { 
- 
-            done = 1;    // Ok, environment variable set successfully, exit the loop. 
- 
-        } else { 
- 
-            // Oops. Write failed. Another copy of OpenMP RTL is in memory. 
-            // Check whether it alive or dead. 
-            int    neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 
-            char * tail          = value; 
-            char * flag_addr_str = NULL; 
-            char * flag_val_str  = NULL; 
-            char const * file_name     = NULL; 
-            __kmp_str_split( tail, '-', & flag_addr_str, & tail ); 
-            __kmp_str_split( tail, '-', & flag_val_str,  & tail ); 
-            file_name = tail; 
-            if ( tail != NULL ) { 
-                long * flag_addr = 0; 
-                long   flag_val  = 0; 
-                KMP_SSCANF( flag_addr_str, "%p",  & flag_addr ); 
-                KMP_SSCANF( flag_val_str,  "%lx", & flag_val  ); 
-                if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) { 
-                    // First, check whether environment-encoded address is mapped into addr space. 
-                    // If so, dereference it to see if it still has the right value. 
- 
-                    if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) { 
-                        neighbor = 1; 
-                    } else { 
-                        // If not, then we know the other copy of the library is no longer running. 
-                        neighbor = 2; 
-                    }; // if 
-                }; // if 
-            }; // if 
-            switch ( neighbor ) { 
-                case 0 :      // Cannot parse environment variable -- neighbor status unknown. 
-                    // Assume it is the incompatible format of future version of the library. 
-                    // Assume the other library is alive. 
-                    // WARN( ... ); // TODO: Issue a warning. 
-                    file_name = "unknown library"; 
-                    // Attention! Falling to the next case. That's intentional. 
-                case 1 : {    // Neighbor is alive. 
-                    // Check it is allowed. 
+/*
+ * kmp_runtime.c -- KPTS runtime support library
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_atomic.h"
+#include "kmp_wrapper_getpid.h"
+#include "kmp_environment.h"
+#include "kmp_itt.h"
+#include "kmp_str.h"
+#include "kmp_settings.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_error.h"
+#include "kmp_stats.h"
+#include "kmp_wait_release.h"
+
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
+
+/* these are temporary issues to be dealt with */
+#define KMP_USE_PRCTL 0
+#define KMP_USE_POOLED_ALLOC 0
+
+#if KMP_OS_WINDOWS
+#include <process.h>
+#endif
+
+#if defined(KMP_GOMP_COMPAT)
+char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
+#endif /* defined(KMP_GOMP_COMPAT) */
+
+char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
+#if OMP_40_ENABLED
+    "4.0 (201307)";
+#else
+    "3.1 (201107)";
+#endif
+
+#ifdef KMP_DEBUG
+char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
+#endif /* KMP_DEBUG */
+
+
+#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+kmp_info_t __kmp_monitor;
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* Forward declarations */
+
+void __kmp_cleanup( void );
+
+static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
+static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+static void __kmp_partition_places( kmp_team_t *team );
+#endif
+static void __kmp_do_serial_initialize( void );
+void __kmp_fork_barrier( int gtid, int tid );
+void __kmp_join_barrier( int gtid );
+void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
+
+#ifdef USE_LOAD_BALANCE
+static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
+#endif
+
+static int __kmp_expand_threads(int nWish, int nNeed);
+#if KMP_OS_WINDOWS
+static int __kmp_unregister_root_other_thread( int gtid );
+#endif
+static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
+static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
+static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* Calculate the identifier of the current thread */
+/* fast (and somewhat portable) way to get unique */
+/* identifier of executing thread.                */
+/* returns KMP_GTID_DNE if we haven't been assigned a gtid   */
+
+int
+__kmp_get_global_thread_id( )
+{
+    int i;
+    kmp_info_t   **other_threads;
+    size_t         stack_data;
+    char          *stack_addr;
+    size_t         stack_size;
+    char          *stack_base;
+
+    KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d  all_nproc=%d\n",
+                      __kmp_nth, __kmp_all_nth ));
+
+    /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
+             parallel region, made it return KMP_GTID_DNE to force serial_initialize by
+             caller.  Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
+             __kmp_init_gtid for this to work.  */
+
+    if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
+
+#ifdef KMP_TDATA_GTID
+    if ( TCR_4(__kmp_gtid_mode) >= 3) {
+        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
+        return __kmp_gtid;
+    }
+#endif
+    if ( TCR_4(__kmp_gtid_mode) >= 2) {
+        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
+        return __kmp_gtid_get_specific();
+    }
+    KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
+
+    stack_addr    = (char*) & stack_data;
+    other_threads = __kmp_threads;
+
+    /*
+        ATT: The code below is a source of potential bugs due to unsynchronized access to
+        __kmp_threads array. For example:
+            1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
+            2. Current thread is suspended by OS.
+            3. Another thread unregisters and finishes (debug versions of free() may fill memory
+               with something like 0xEF).
+            4. Current thread is resumed.
+            5. Current thread reads junk from *thr.
+        TODO: Fix it.
+        --ln
+    */
+
+    for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
+
+        kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
+        if( !thr ) continue;
+
+        stack_size =  (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
+        stack_base =  (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
+
+        /* stack grows down -- search through all of the active threads */
+
+        if( stack_addr <= stack_base ) {
+            size_t stack_diff = stack_base - stack_addr;
+
+            if( stack_diff <= stack_size ) {
+                /* The only way we can be closer than the allocated */
+                /* stack size is if we are running on this thread. */
+                KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
+                return i;
+            }
+        }
+    }
+
+    /* get specific to try and determine our gtid */
+    KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
+                      "thread, using TLS\n" ));
+    i = __kmp_gtid_get_specific();
+
+    /*fprintf( stderr, "=== %d\n", i );  */ /* GROO */
+
+    /* if we havn't been assigned a gtid, then return code */
+    if( i<0 ) return i;
+
+    /* dynamically updated stack window for uber threads to avoid get_specific call */
+    if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
+        KMP_FATAL( StackOverflow, i );
+    }
+
+    stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
+    if( stack_addr > stack_base ) {
+        TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
+        TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
+          other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
+    } else {
+        TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
+    }
+
+    /* Reprint stack bounds for ubermaster since they have been refined */
+    if ( __kmp_storage_map ) {
+        char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
+        char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
+        __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
+                                      other_threads[i]->th.th_info.ds.ds_stacksize,
+                                      "th_%d stack (refinement)", i );
+    }
+    return i;
+}
+
+int
+__kmp_get_global_thread_id_reg( )
+{
+    int gtid;
+
+    if ( !__kmp_init_serial ) {
+        gtid = KMP_GTID_DNE;
+    } else
+#ifdef KMP_TDATA_GTID
+    if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
+        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
+        gtid = __kmp_gtid;
+    } else
+#endif
+    if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
+        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
+        gtid = __kmp_gtid_get_specific();
+    } else {
+        KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
+        gtid = __kmp_get_global_thread_id();
+    }
+
+    /* we must be a new uber master sibling thread */
+    if( gtid == KMP_GTID_DNE ) {
+        KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
+                        "Registering a new gtid.\n" ));
+        __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+        if( !__kmp_init_serial ) {
+            __kmp_do_serial_initialize();
+            gtid = __kmp_gtid_get_specific();
+        } else {
+            gtid = __kmp_register_root(FALSE);
+        }
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
+    }
+
+    KMP_DEBUG_ASSERT( gtid >=0 );
+
+    return gtid;
+}
+
+/* caller must hold forkjoin_lock */
+void
+__kmp_check_stack_overlap( kmp_info_t *th )
+{
+    int f;
+    char *stack_beg = NULL;
+    char *stack_end = NULL;
+    int gtid;
+
+    KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
+    if ( __kmp_storage_map ) {
+        stack_end = (char *) th->th.th_info.ds.ds_stackbase;
+        stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
+
+        gtid = __kmp_gtid_from_thread( th );
+
+        if (gtid == KMP_GTID_MONITOR) {
+            __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
+                                     "th_%s stack (%s)", "mon",
+                                     ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
+        } else {
+            __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
+                                     "th_%d stack (%s)", gtid,
+                                     ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
+        }
+    }
+
+    /* No point in checking ubermaster threads since they use refinement and cannot overlap */
+    gtid = __kmp_gtid_from_thread( th );
+    if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
+    {
+        KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
+        if ( stack_beg == NULL ) {
+            stack_end = (char *) th->th.th_info.ds.ds_stackbase;
+            stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
+        }
+
+        for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
+            kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
+
+            if( f_th && f_th != th ) {
+                char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
+                char *other_stack_beg = other_stack_end -
+                                        (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
+                if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
+                   (stack_end > other_stack_beg && stack_end < other_stack_end)) {
+
+                    /* Print the other stack values before the abort */
+                    if ( __kmp_storage_map )
+                        __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
+                            (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
+                            "th_%d stack (overlapped)",
+                                                 __kmp_gtid_from_thread( f_th ) );
+
+                    __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
+                }
+            }
+        }
+    }
+    KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_infinite_loop( void )
+{
+    static int done = FALSE;
+
+    while (! done) {
+        KMP_YIELD( 1 );
+    }
+}
+
+#define MAX_MESSAGE     512
+
+void
+__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
+    char buffer[MAX_MESSAGE];
+    va_list ap;
+
+    va_start( ap, format);
+    KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
+    __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
+    __kmp_vprintf( kmp_err, buffer, ap );
+#if KMP_PRINT_DATA_PLACEMENT
+    int node;
+    if(gtid >= 0) {
+        if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
+            if( __kmp_storage_map_verbose ) {
+                node = __kmp_get_host_node(p1);
+                if(node < 0)  /* doesn't work, so don't try this next time */
+                    __kmp_storage_map_verbose = FALSE;
+                else {
+                    char *last;
+                    int lastNode;
+                    int localProc = __kmp_get_cpu_from_gtid(gtid);
+
+                    p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
+                    p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
+                    if(localProc >= 0)
+                        __kmp_printf_no_lock("  GTID %d localNode %d\n", gtid, localProc>>1);
+                    else
+                        __kmp_printf_no_lock("  GTID %d\n", gtid);
+# if KMP_USE_PRCTL
+/* The more elaborate format is disabled for now because of the prctl hanging bug. */
+                    do {
+                        last = p1;
+                        lastNode = node;
+                        /* This loop collates adjacent pages with the same host node. */
+                        do {
+                            (char*)p1 += PAGE_SIZE;
+                        } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
+                        __kmp_printf_no_lock("    %p-%p memNode %d\n", last,
+                                             (char*)p1 - 1, lastNode);
+                    } while(p1 <= p2);
+# else
+                    __kmp_printf_no_lock("    %p-%p memNode %d\n", p1,
+                                         (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
+                    if(p1 < p2)  {
+                        __kmp_printf_no_lock("    %p-%p memNode %d\n", p2,
+                                             (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
+                    }
+# endif
+                }
+            }
+        } else
+            __kmp_printf_no_lock("  %s\n", KMP_I18N_STR( StorageMapWarning ) );
+    }
+#endif /* KMP_PRINT_DATA_PLACEMENT */
+    __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
+}
+
+void
+__kmp_warn( char const * format, ... )
+{
+    char buffer[MAX_MESSAGE];
+    va_list ap;
+
+    if ( __kmp_generate_warnings == kmp_warnings_off ) {
+        return;
+    }
+
+    va_start( ap, format );
+
+    KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
+    __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
+    __kmp_vprintf( kmp_err, buffer, ap );
+    __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
+
+    va_end( ap );
+}
+
+void
+__kmp_abort_process()
+{
+
+    // Later threads may stall here, but that's ok because abort() will kill them.
+    __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
+
+    if ( __kmp_debug_buf ) {
+        __kmp_dump_debug_buffer();
+    }; // if
+
+    if ( KMP_OS_WINDOWS ) {
+        // Let other threads know of abnormal termination and prevent deadlock
+        // if abort happened during library initialization or shutdown
+        __kmp_global.g.g_abort = SIGABRT;
+
+        /*
+            On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
+            Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
+            works well, but this function is not available in VS7 (this is not problem for DLL, but
+            it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
+            not help, at least in some versions of MS C RTL.
+
+            It seems following sequence is the only way to simulate abort() and avoid pop-up error
+            box.
+        */
+        raise( SIGABRT );
+        _exit( 3 );    // Just in case, if signal ignored, exit anyway.
+    } else {
+        abort();
+    }; // if
+
+    __kmp_infinite_loop();
+    __kmp_release_bootstrap_lock( & __kmp_exit_lock );
+
+} // __kmp_abort_process
+
+void
+__kmp_abort_thread( void )
+{
+    // TODO: Eliminate g_abort global variable and this function.
+    // In case of abort just call abort(), it will kill all the threads.
+    __kmp_infinite_loop();
+} // __kmp_abort_thread
+
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Print out the storage map for the major kmp_info_t thread data structures
+ * that are allocated together.
+ */
+
+static void
+__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
+{
+    __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
+
+    __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
+                             "th_%d.th_info", gtid );
+
+    __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
+                             "th_%d.th_local", gtid );
+
+    __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
+                             sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
+
+    __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
+                             &thr->th.th_bar[bs_plain_barrier+1],
+                             sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
+
+    __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
+                             &thr->th.th_bar[bs_forkjoin_barrier+1],
+                             sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
+
+    #if KMP_FAST_REDUCTION_BARRIER
+        __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
+                             &thr->th.th_bar[bs_reduction_barrier+1],
+                             sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
+    #endif // KMP_FAST_REDUCTION_BARRIER
+}
+
+/*
+ * Print out the storage map for the major kmp_team_t team data structures
+ * that are allocated together.
+ */
+
+static void
+__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
+{
+    int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
+    __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
+                             header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
+                             sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
+
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
+                             sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
+                             sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
+
+    #if KMP_FAST_REDUCTION_BARRIER
+        __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
+                             sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
+    #endif // KMP_FAST_REDUCTION_BARRIER
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
+                             sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
+                             sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
+                             sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
+                             header, team_id );
+
+    /*
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr],
+                             sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr],
+                             sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr],
+                             sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr],
+                             sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr],
+                             sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr],
+                             sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id );
+
+    //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr],
+    //                        sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id );
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr],
+                             sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id );
+#if OMP_40_ENABLED
+    __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr],
+                             sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id );
+#endif
+    */
+
+    __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
+                             sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
+}
+
+static void __kmp_init_allocator() {}
+static void __kmp_fini_allocator() {}
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef KMP_DYNAMIC_LIB
+# if KMP_OS_WINDOWS
+
+
+static void
+__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
+    // TODO: Change to __kmp_break_bootstrap_lock().
+    __kmp_init_bootstrap_lock( lck ); // make the lock released
+}
+
+static void
+__kmp_reset_locks_on_process_detach( int gtid_req ) {
+    int i;
+    int thread_count;
+
+    // PROCESS_DETACH is expected to be called by a thread
+    // that executes ProcessExit() or FreeLibrary().
+    // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
+    // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
+    // However, in fact, some threads can be still alive here, although being about to be terminated.
+    // The threads in the array with ds_thread==0 are most suspicious.
+    // Actually, it can be not safe to access the __kmp_threads[].
+
+    // TODO: does it make sense to check __kmp_roots[] ?
+
+    // Let's check that there are no other alive threads registered with the OMP lib.
+    while( 1 ) {
+        thread_count = 0;
+        for( i = 0; i < __kmp_threads_capacity; ++i ) {
+            if( !__kmp_threads ) continue;
+            kmp_info_t* th = __kmp_threads[ i ];
+            if( th == NULL ) continue;
+            int gtid = th->th.th_info.ds.ds_gtid;
+            if( gtid == gtid_req ) continue;
+            if( gtid < 0 ) continue;
+            DWORD exit_val;
+            int alive = __kmp_is_thread_alive( th, &exit_val );
+            if( alive ) {
+            ++thread_count;
+            }
+        }
+        if( thread_count == 0 ) break; // success
+    }
+
+    // Assume that I'm alone.
+
+    // Now it might be probably safe to check and reset locks.
+    // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
+    __kmp_reset_lock( &__kmp_forkjoin_lock );
+    #ifdef KMP_DEBUG
+    __kmp_reset_lock( &__kmp_stdio_lock );
+    #endif // KMP_DEBUG
+
+
+}
+
+BOOL WINAPI
+DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
+    //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+
+    switch( fdwReason ) {
+
+        case DLL_PROCESS_ATTACH:
+            KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
+
+            return TRUE;
+
+        case DLL_PROCESS_DETACH:
+            KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
+                        __kmp_gtid_get_specific() ));
+
+            if( lpReserved != NULL )
+            {
+                // lpReserved is used for telling the difference:
+                //  lpReserved == NULL when FreeLibrary() was called,
+                //  lpReserved != NULL when the process terminates.
+                // When FreeLibrary() is called, worker threads remain alive.
+                // So they will release the forkjoin lock by themselves.
+                // When the process terminates, worker threads disappear triggering
+                // the problem of unreleased forkjoin lock as described below.
+
+                // A worker thread can take the forkjoin lock
+                // in __kmp_suspend_template()->__kmp_rml_decrease_load_before_sleep().
+                // The problem comes up if that worker thread becomes dead
+                // before it releases the forkjoin lock.
+                // The forkjoin lock remains taken, while the thread
+                // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
+                // will try to take the forkjoin lock and will always fail,
+                // so that the application will never finish [normally].
+                // This scenario is possible if __kmpc_end() has not been executed.
+                // It looks like it's not a corner case, but common cases:
+                // - the main function was compiled by an alternative compiler;
+                // - the main function was compiled by icl but without /Qopenmp (application with plugins);
+                // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
+                // - alive foreign thread prevented __kmpc_end from doing cleanup.
+
+                // This is a hack to work around the problem.
+                // TODO: !!! to figure out something better.
+                __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
+            }
+
+            __kmp_internal_end_library( __kmp_gtid_get_specific() );
+
+            return TRUE;
+
+        case DLL_THREAD_ATTACH:
+            KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
+
+            /* if we wanted to register new siblings all the time here call
+             * __kmp_get_gtid(); */
+            return TRUE;
+
+        case DLL_THREAD_DETACH:
+            KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
+                        __kmp_gtid_get_specific() ));
+
+            __kmp_internal_end_thread( __kmp_gtid_get_specific() );
+            return TRUE;
+    }
+
+    return TRUE;
+}
+
+# endif /* KMP_OS_WINDOWS */
+#endif /* KMP_DYNAMIC_LIB */
+
+
+/* ------------------------------------------------------------------------ */
+
+/* Change the library type to "status" and return the old type */
+/* called from within initialization routines where __kmp_initz_lock is held */
+int
+__kmp_change_library( int status )
+{
+    int old_status;
+
+    old_status = __kmp_yield_init & 1;  // check whether KMP_LIBRARY=throughput (even init count)
+
+    if (status) {
+        __kmp_yield_init |= 1;  // throughput => turnaround (odd init count)
+    }
+    else {
+        __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
+    }
+
+    return old_status;  // return previous setting of whether KMP_LIBRARY=throughput
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* __kmp_parallel_deo --
+ * Wait until it's our turn.
+ */
+void
+__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    int gtid = *gtid_ref;
+#ifdef BUILD_PARALLEL_ORDERED
+    kmp_team_t *team = __kmp_team_from_gtid( gtid );
+#endif /* BUILD_PARALLEL_ORDERED */
+
+    if( __kmp_env_consistency_check ) {
+        if( __kmp_threads[gtid]->th.th_root->r.r_active )
+#if KMP_USE_DYNAMIC_LOCK
+            __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
+#else
+            __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
+#endif
+    }
+#ifdef BUILD_PARALLEL_ORDERED
+    if( !team->t.t_serialized ) {
+        KMP_MB();
+        KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
+        KMP_MB();
+    }
+#endif /* BUILD_PARALLEL_ORDERED */
+}
+
+/* __kmp_parallel_dxo --
+ * Signal the next task.
+ */
+
+void
+__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    int gtid = *gtid_ref;
+#ifdef BUILD_PARALLEL_ORDERED
+    int tid =  __kmp_tid_from_gtid( gtid );
+    kmp_team_t *team = __kmp_team_from_gtid( gtid );
+#endif /* BUILD_PARALLEL_ORDERED */
+
+    if( __kmp_env_consistency_check ) {
+        if( __kmp_threads[gtid]->th.th_root->r.r_active )
+            __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
+    }
+#ifdef BUILD_PARALLEL_ORDERED
+    if ( ! team->t.t_serialized ) {
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        /* use the tid of the next thread in this team */
+        /* TODO repleace with general release procedure */
+        team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
+
+#if OMPT_SUPPORT && OMPT_BLAME
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
+            /* accept blame for "ordered" waiting */
+            kmp_info_t *this_thread = __kmp_threads[gtid];
+            ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
+                this_thread->th.ompt_thread_info.wait_id);
+        }
+#endif
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    }
+#endif /* BUILD_PARALLEL_ORDERED */
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* The BARRIER for a SINGLE process section is always explicit   */
+
+int
+__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
+{
+    int status;
+    kmp_info_t *th;
+    kmp_team_t *team;
+
+    if( ! TCR_4(__kmp_init_parallel) )
+        __kmp_parallel_initialize();
+
+    th   = __kmp_threads[ gtid ];
+    team = th->th.th_team;
+    status = 0;
+
+    th->th.th_ident = id_ref;
+
+    if ( team->t.t_serialized ) {
+        status = 1;
+    } else {
+        kmp_int32 old_this = th->th.th_local.this_construct;
+
+        ++th->th.th_local.this_construct;
+        /* try to set team count to thread count--success means thread got the
+           single block
+        */
+        /* TODO: Should this be acquire or release? */
+        status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
+                                             th->th.th_local.this_construct);
+#if USE_ITT_BUILD
+        if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
+#if OMP_40_ENABLED
+            th->th.th_teams_microtask == NULL &&
+#endif
+            team->t.t_active_level == 1 )
+        {   // Only report metadata by master of active team at level 1
+            __kmp_itt_metadata_single( id_ref );
+        }
+#endif /* USE_ITT_BUILD */
+    }
+
+    if( __kmp_env_consistency_check ) {
+        if (status && push_ws) {
+            __kmp_push_workshare( gtid, ct_psingle, id_ref );
+        } else {
+            __kmp_check_workshare( gtid, ct_psingle, id_ref );
+        }
+    }
+#if USE_ITT_BUILD
+    if ( status ) {
+        __kmp_itt_single_start( gtid );
+    }
+#endif /* USE_ITT_BUILD */
+    return status;
+}
+
+void
+__kmp_exit_single( int gtid )
+{
+#if USE_ITT_BUILD
+    __kmp_itt_single_end( gtid );
+#endif /* USE_ITT_BUILD */
+    if( __kmp_env_consistency_check )
+        __kmp_pop_workshare( gtid, ct_psingle, NULL );
+}
+
+
+/*
+ * determine if we can go parallel or must use a serialized parallel region and
+ * how many threads we can use
+ * set_nproc is the number of threads requested for the team
+ * returns 0 if we should serialize or only use one thread,
+ * otherwise the number of threads to use
+ * The forkjoin lock is held by the caller.
+ */
+static int
+__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
+   int master_tid, int set_nthreads
+#if OMP_40_ENABLED
+  , int enter_teams
+#endif /* OMP_40_ENABLED */
+)
+{
+    int capacity;
+    int new_nthreads;
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    KMP_DEBUG_ASSERT( root && parent_team );
+
+    //
+    // If dyn-var is set, dynamically adjust the number of desired threads,
+    // according to the method specified by dynamic_mode.
+    //
+    new_nthreads = set_nthreads;
+    if ( ! get__dynamic_2( parent_team, master_tid ) ) {
+        ;
+    }
+#ifdef USE_LOAD_BALANCE
+    else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
+        new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
+        if ( new_nthreads == 1 ) {
+            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
+              master_tid ));
+            return 1;
+        }
+        if ( new_nthreads < set_nthreads ) {
+            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
+              master_tid, new_nthreads ));
+        }
+    }
+#endif /* USE_LOAD_BALANCE */
+    else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
+        new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
+          : root->r.r_hot_team->t.t_nproc);
+        if ( new_nthreads <= 1 ) {
+            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
+              master_tid ));
+            return 1;
+        }
+        if ( new_nthreads < set_nthreads ) {
+            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
+              master_tid, new_nthreads ));
+        }
+        else {
+            new_nthreads = set_nthreads;
+        }
+    }
+    else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
+        if ( set_nthreads > 2 ) {
+            new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
+            new_nthreads = ( new_nthreads % set_nthreads ) + 1;
+            if ( new_nthreads == 1 ) {
+                KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
+                  master_tid ));
+                return 1;
+            }
+            if ( new_nthreads < set_nthreads ) {
+                KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
+                  master_tid, new_nthreads ));
+            }
+        }
+    }
+    else {
+        KMP_ASSERT( 0 );
+    }
+
+    //
+    // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
+    //
+    if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
+      root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
+        int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
+          root->r.r_hot_team->t.t_nproc );
+        if ( tl_nthreads <= 0 ) {
+            tl_nthreads = 1;
+        }
+
+        //
+        // If dyn-var is false, emit a 1-time warning.
+        //
+        if ( ! get__dynamic_2( parent_team, master_tid )
+          && ( ! __kmp_reserve_warn ) ) {
+            __kmp_reserve_warn = 1;
+            __kmp_msg(
+                kmp_ms_warning,
+                KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
+                KMP_HNT( Unset_ALL_THREADS ),
+                __kmp_msg_null
+            );
+        }
+        if ( tl_nthreads == 1 ) {
+            KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
+              master_tid ));
+            return 1;
+        }
+        KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
+          master_tid, tl_nthreads ));
+        new_nthreads = tl_nthreads;
+    }
+
+
+    //
+    // Check if the threads array is large enough, or needs expanding.
+    //
+    // See comment in __kmp_register_root() about the adjustment if
+    // __kmp_threads[0] == NULL.
+    //
+    capacity = __kmp_threads_capacity;
+    if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
+        --capacity;
+    }
+    if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
+      root->r.r_hot_team->t.t_nproc ) > capacity ) {
+        //
+        // Expand the threads array.
+        //
+        int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
+          root->r.r_hot_team->t.t_nproc ) - capacity;
+        int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
+        if ( slotsAdded < slotsRequired ) {
+            //
+            // The threads array was not expanded enough.
+            //
+            new_nthreads -= ( slotsRequired - slotsAdded );
+            KMP_ASSERT( new_nthreads >= 1 );
+
+            //
+            // If dyn-var is false, emit a 1-time warning.
+            //
+            if ( ! get__dynamic_2( parent_team, master_tid )
+              && ( ! __kmp_reserve_warn ) ) {
+                __kmp_reserve_warn = 1;
+                if ( __kmp_tp_cached ) {
+                    __kmp_msg(
+                        kmp_ms_warning,
+                        KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
+                        KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
+                        KMP_HNT( PossibleSystemLimitOnThreads ),
+                        __kmp_msg_null
+                    );
+                }
+                else {
+                    __kmp_msg(
+                        kmp_ms_warning,
+                        KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
+                        KMP_HNT( SystemLimitOnThreads ),
+                        __kmp_msg_null
+                    );
+                }
+            }
+        }
+    }
+
+    if ( new_nthreads == 1 ) {
+        KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
+                        __kmp_get_gtid(), set_nthreads ) );
+        return 1;
+    }
+
+    KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
+                    __kmp_get_gtid(), new_nthreads, set_nthreads ));
+    return new_nthreads;
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* allocate threads from the thread pool and assign them to the new team */
+/* we are assured that there are enough threads available, because we
+ * checked on that earlier within critical section forkjoin */
+
+static void
+__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
+                         kmp_info_t *master_th, int master_gtid )
+{
+    int         i;
+    int use_hot_team;
+
+    KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
+    KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
+    KMP_MB();
+
+    /* first, let's setup the master thread */
+    master_th->th.th_info.ds.ds_tid  = 0;
+    master_th->th.th_team            = team;
+    master_th->th.th_team_nproc      = team->t.t_nproc;
+    master_th->th.th_team_master     = master_th;
+    master_th->th.th_team_serialized = FALSE;
+    master_th->th.th_dispatch        = & team->t.t_dispatch[ 0 ];
+
+    /* make sure we are not the optimized hot team */
+#if KMP_NESTED_HOT_TEAMS
+    use_hot_team = 0;
+    kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
+    if( hot_teams ) {  // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
+        int level = team->t.t_active_level - 1;    // index in array of hot teams
+        if( master_th->th.th_teams_microtask ) {    // are we inside the teams?
+            if( master_th->th.th_teams_size.nteams > 1 ) {
+                ++level; // level was not increased in teams construct for team_of_masters
+            }
+            if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
+                master_th->th.th_teams_level == team->t.t_level ) {
+                ++level; // level was not increased in teams construct for team_of_workers before the parallel
+            }            // team->t.t_level will be increased inside parallel
+        }
+        if( level < __kmp_hot_teams_max_level ) {
+            if( hot_teams[level].hot_team ) {
+                // hot team has already been allocated for given level
+                KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
+                use_hot_team = 1; // the team is ready to use
+            } else {
+                use_hot_team = 0; // AC: threads are not allocated yet
+                hot_teams[level].hot_team = team; // remember new hot team
+                hot_teams[level].hot_team_nth = team->t.t_nproc;
+            }
+        } else {
+            use_hot_team = 0;
+        }
+    }
+#else
+    use_hot_team = team == root->r.r_hot_team;
+#endif
+    if ( !use_hot_team ) {
+
+        /* install the master thread */
+        team->t.t_threads[ 0 ]    = master_th;
+        __kmp_initialize_info( master_th, team, 0, master_gtid );
+
+        /* now, install the worker threads */
+        for ( i=1 ;  i < team->t.t_nproc ; i++ ) {
+
+            /* fork or reallocate a new thread and install it in team */
+            kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
+            team->t.t_threads[ i ] = thr;
+            KMP_DEBUG_ASSERT( thr );
+            KMP_DEBUG_ASSERT( thr->th.th_team == team );
+            /* align team and thread arrived states */
+            KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
+                            __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
+                            __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
+                            team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
+                            team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
+#if OMP_40_ENABLED
+            thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
+            thr->th.th_teams_level     = master_th->th.th_teams_level;
+            thr->th.th_teams_size      = master_th->th.th_teams_size;
+#endif
+            { // Initialize threads' barrier data.
+                int b;
+                kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
+                for ( b = 0; b < bs_last_barrier; ++ b ) {
+                    balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
+                    KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
+#if USE_DEBUGGER
+                    balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
+#endif
+                }; // for b
+            }
+        }
+
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+        __kmp_partition_places( team );
+#endif
+
+    }
+
+    KMP_MB();
+}
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+//
+// Propagate any changes to the floating point control registers out to the team
+// We try to avoid unnecessary writes to the relevant cache line in the team structure,
+// so we don't make changes unless they are needed.
+//
+inline static void
+propagateFPControl(kmp_team_t * team)
+{
+    if ( __kmp_inherit_fp_control ) {
+        kmp_int16 x87_fpu_control_word;
+        kmp_uint32 mxcsr;
+
+        // Get master values of FPU control flags (both X87 and vector)
+        __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
+        __kmp_store_mxcsr( &mxcsr );
+        mxcsr &= KMP_X86_MXCSR_MASK;
+
+        // There is no point looking at t_fp_control_saved here.
+        // If it is TRUE, we still have to update the values if they are different from those we now have.
+        // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
+        // that the values in the team are the same as those we have.
+        // So, this code achieves what we need whether or not t_fp_control_saved is true.
+        // By checking whether the value needs updating we avoid unnecessary writes that would put the
+        // cache-line into a written state, causing all threads in the team to have to read it again.
+        if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
+            team->t.t_x87_fpu_control_word = x87_fpu_control_word;
+        }
+        if ( team->t.t_mxcsr != mxcsr ) {
+            team->t.t_mxcsr = mxcsr;
+        }
+        // Although we don't use this value, other code in the runtime wants to know whether it should restore them.
+        // So we must ensure it is correct.
+        if (!team->t.t_fp_control_saved) {
+            team->t.t_fp_control_saved = TRUE;
+        }
+    }
+    else {
+        // Similarly here. Don't write to this cache-line in the team structure unless we have to.
+        if (team->t.t_fp_control_saved)
+            team->t.t_fp_control_saved = FALSE;
+    }
+}
+
+// Do the opposite, setting the hardware registers to the updated values from the team.
+inline static void
+updateHWFPControl(kmp_team_t * team)
+{
+    if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
+        //
+        // Only reset the fp control regs if they have been changed in the team.
+        // the parallel region that we are exiting.
+        //
+        kmp_int16 x87_fpu_control_word;
+        kmp_uint32 mxcsr;
+        __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
+        __kmp_store_mxcsr( &mxcsr );
+        mxcsr &= KMP_X86_MXCSR_MASK;
+
+        if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
+            __kmp_clear_x87_fpu_status_word();
+            __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
+        }
+
+        if ( team->t.t_mxcsr != mxcsr ) {
+            __kmp_load_mxcsr( &team->t.t_mxcsr );
+        }
+    }
+}
+#else
+# define propagateFPControl(x) ((void)0)
+# define updateHWFPControl(x)  ((void)0)
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+static void
+__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
+
+/*
+ * Run a parallel region that has been serialized, so runs only in a team of the single master thread.
+ */
+void
+__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
+{
+    kmp_info_t *this_thr;
+    kmp_team_t *serial_team;
+
+    KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
+
+    /* Skip all this code for autopar serialized loops since it results in
+       unacceptable overhead */
+    if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
+        return;
+
+    if( ! TCR_4( __kmp_init_parallel ) )
+        __kmp_parallel_initialize();
+
+    this_thr     = __kmp_threads[ global_tid ];
+    serial_team  = this_thr->th.th_serial_team;
+
+    /* utilize the serialized team held by this thread */
+    KMP_DEBUG_ASSERT( serial_team );
+    KMP_MB();
+
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
+        KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
+        KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
+                        global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
+        this_thr->th.th_task_team = NULL;
+    }
+
+#if OMP_40_ENABLED
+    kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
+    if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
+        proc_bind = proc_bind_false;
+    }
+    else if ( proc_bind == proc_bind_default ) {
+        //
+        // No proc_bind clause was specified, so use the current value
+        // of proc-bind-var for this parallel region.
+        //
+        proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
+    }
+    //
+    // Reset for next parallel region
+    //
+    this_thr->th.th_set_proc_bind = proc_bind_default;
+#endif /* OMP_40_ENABLED */
+
+    if( this_thr->th.th_team != serial_team ) {
+        // Nested level will be an index in the nested nthreads array
+        int level = this_thr->th.th_team->t.t_level;
+
+        if( serial_team->t.t_serialized ) {
+            /* this serial team was already used
+             * TODO increase performance by making this locks more specific */
+            kmp_team_t *new_team;
+
+            __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+#if OMPT_SUPPORT
+            ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
+#endif
+
+            new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
+#if OMPT_SUPPORT
+                                           ompt_parallel_id,
+#endif
+#if OMP_40_ENABLED
+                                           proc_bind,
+#endif
+                                           & this_thr->th.th_current_task->td_icvs,
+                                           0 USE_NESTED_HOT_ARG(NULL) );
+            __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+            KMP_ASSERT( new_team );
+
+            /* setup new serialized team and install it */
+            new_team->t.t_threads[0] = this_thr;
+            new_team->t.t_parent = this_thr->th.th_team;
+            serial_team = new_team;
+            this_thr->th.th_serial_team = serial_team;
+
+            KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
+                            global_tid, serial_team ) );
+
+
+            /* TODO the above breaks the requirement that if we run out of
+             * resources, then we can still guarantee that serialized teams
+             * are ok, since we may need to allocate a new one */
+        } else {
+            KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
+                            global_tid, serial_team ) );
+        }
+
+        /* we have to initialize this serial team */
+        KMP_DEBUG_ASSERT( serial_team->t.t_threads );
+        KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
+        KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
+        serial_team->t.t_ident         = loc;
+        serial_team->t.t_serialized    = 1;
+        serial_team->t.t_nproc         = 1;
+        serial_team->t.t_parent        = this_thr->th.th_team;
+        serial_team->t.t_sched         = this_thr->th.th_team->t.t_sched;
+        this_thr->th.th_team           = serial_team;
+        serial_team->t.t_master_tid    = this_thr->th.th_info.ds.ds_tid;
+
+        KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
+                        global_tid, this_thr->th.th_current_task ) );
+        KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
+        this_thr->th.th_current_task->td_flags.executing = 0;
+
+        __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
+
+        /* TODO: GEH: do the ICVs work for nested serialized teams?  Don't we need an implicit task for
+           each serialized task represented by team->t.t_serialized? */
+        copy_icvs(
+                  & this_thr->th.th_current_task->td_icvs,
+                  & this_thr->th.th_current_task->td_parent->td_icvs );
+
+        // Thread value exists in the nested nthreads array for the next nested level
+        if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
+            this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
+        }
+
+#if OMP_40_ENABLED
+        if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
+            this_thr->th.th_current_task->td_icvs.proc_bind
+                = __kmp_nested_proc_bind.bind_types[ level + 1 ];
+        }
+#endif /* OMP_40_ENABLED */
+
+#if USE_DEBUGGER
+        serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
+#endif
+        this_thr->th.th_info.ds.ds_tid = 0;
+
+        /* set thread cache values */
+        this_thr->th.th_team_nproc     = 1;
+        this_thr->th.th_team_master    = this_thr;
+        this_thr->th.th_team_serialized = 1;
+
+        serial_team->t.t_level        = serial_team->t.t_parent->t.t_level + 1;
+        serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
+
+        propagateFPControl (serial_team);
+
+        /* check if we need to allocate dispatch buffers stack */
+        KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
+        if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
+            serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
+                __kmp_allocate( sizeof( dispatch_private_info_t ) );
+        }
+        this_thr->th.th_dispatch = serial_team->t.t_dispatch;
+
+#if OMPT_SUPPORT
+        ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
+        __ompt_team_assign_id(serial_team, ompt_parallel_id);
+#endif
+
+        KMP_MB();
+
+    } else {
+        /* this serialized team is already being used,
+         * that's fine, just add another nested level */
+        KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
+        KMP_DEBUG_ASSERT( serial_team->t.t_threads );
+        KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
+        ++ serial_team->t.t_serialized;
+        this_thr->th.th_team_serialized = serial_team->t.t_serialized;
+
+        // Nested level will be an index in the nested nthreads array
+        int level = this_thr->th.th_team->t.t_level;
+        // Thread value exists in the nested nthreads array for the next nested level
+        if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
+            this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
+        }
+        serial_team->t.t_level++;
+        KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
+                        global_tid, serial_team, serial_team->t.t_level ) );
+
+        /* allocate/push dispatch buffers stack */
+        KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
+        {
+            dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
+                __kmp_allocate( sizeof( dispatch_private_info_t ) );
+            disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
+            serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
+        }
+        this_thr->th.th_dispatch = serial_team->t.t_dispatch;
+
+        KMP_MB();
+    }
+
+    if ( __kmp_env_consistency_check )
+        __kmp_push_parallel( global_tid, NULL );
+
+#if USE_ITT_BUILD
+    // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment
+    if ( serial_team->t.t_level == 1
+#if OMP_40_ENABLED
+        && this_thr->th.th_teams_microtask == NULL
+#endif
+    ) {
+#if USE_ITT_NOTIFY
+        // Save the start of the "parallel" region for VTune. This is the frame begin at the same time.
+        if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
+            ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
+        {
+             serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
+        } else // only one notification scheme (either "submit" or "forking/joined", not both)
+#endif
+        if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
+             __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
+        {
+            this_thr->th.th_ident = loc;
+            // 0 - no barriers; 1 - serialized parallel
+            __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
+        }
+    }
+#endif /* USE_ITT_BUILD */
+}
+
+/* most of the work for a fork */
+/* return true if we really went parallel, false if serialized */
+int
+__kmp_fork_call(
+    ident_t   * loc,
+    int         gtid,
+    enum fork_context_e  call_context, // Intel, GNU, ...
+    kmp_int32   argc,
+#if OMPT_SUPPORT
+    void       *unwrapped_task,
+#endif
+    microtask_t microtask,
+    launch_t    invoker,
+/* TODO: revert workaround for Intel(R) 64 tracker #96 */
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+    va_list   * ap
+#else
+    va_list     ap
+#endif
+    )
+{
+    void          **argv;
+    int             i;
+    int             master_tid;
+    int             master_this_cons;
+    kmp_team_t     *team;
+    kmp_team_t     *parent_team;
+    kmp_info_t     *master_th;
+    kmp_root_t     *root;
+    int             nthreads;
+    int             master_active;
+    int             master_set_numthreads;
+    int             level;
+#if OMP_40_ENABLED
+    int             active_level;
+    int             teams_level;
+#endif
+#if KMP_NESTED_HOT_TEAMS
+    kmp_hot_team_ptr_t **p_hot_teams;
+#endif
+    { // KMP_TIME_BLOCK
+    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
+    KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
+
+    KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
+    if ( __kmp_stkpadding > 0 &&  __kmp_root[gtid] != NULL ) {
+        /* Some systems prefer the stack for the root thread(s) to start with */
+        /* some gap from the parent stack to prevent false sharing. */
+        void *dummy = KMP_ALLOCA(__kmp_stkpadding);
+        /* These 2 lines below are so this does not get optimized out */
+        if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
+            __kmp_stkpadding += (short)((kmp_int64)dummy);
+    }
+
+    /* initialize if needed */
+    KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
+    if( ! TCR_4(__kmp_init_parallel) )
+        __kmp_parallel_initialize();
+
+    /* setup current data */
+    master_th     = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
+    parent_team   = master_th->th.th_team;
+    master_tid    = master_th->th.th_info.ds.ds_tid;
+    master_this_cons = master_th->th.th_local.this_construct;
+    root          = master_th->th.th_root;
+    master_active = root->r.r_active;
+    master_set_numthreads = master_th->th.th_set_nproc;
+
+#if OMPT_SUPPORT
+    ompt_parallel_id_t ompt_parallel_id;
+    ompt_task_id_t ompt_task_id;
+    ompt_frame_t *ompt_frame;
+    ompt_task_id_t my_task_id;
+    ompt_parallel_id_t my_parallel_id;
+
+    if (ompt_enabled) {
+        ompt_parallel_id = __ompt_parallel_id_new(gtid);
+        ompt_task_id = __ompt_get_task_id_internal(0);
+        ompt_frame = __ompt_get_task_frame_internal(0);
+    }
+#endif
+
+    // Nested level will be an index in the nested nthreads array
+    level         = parent_team->t.t_level;
+#if OMP_40_ENABLED
+    active_level  = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
+    teams_level    = master_th->th.th_teams_level; // needed to check nesting inside the teams
+#endif
+#if KMP_NESTED_HOT_TEAMS
+    p_hot_teams   = &master_th->th.th_hot_teams;
+    if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
+        *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
+                sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
+        (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
+        (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
+    }
+#endif
+
+#if OMPT_SUPPORT
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
+        int team_size = master_set_numthreads;
+
+        ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
+            ompt_task_id, ompt_frame, ompt_parallel_id,
+            team_size, unwrapped_task, OMPT_INVOKER(call_context));
+    }
+#endif
+
+    master_th->th.th_ident = loc;
+
+#if OMP_40_ENABLED
+    if ( master_th->th.th_teams_microtask &&
+         ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
+        // AC: This is start of parallel that is nested inside teams construct.
+        //     The team is actual (hot), all workers are ready at the fork barrier.
+        //     No lock needed to initialize the team a bit, then free workers.
+        parent_team->t.t_ident = loc;
+        parent_team->t.t_argc  = argc;
+        argv = (void**)parent_team->t.t_argv;
+        for( i=argc-1; i >= 0; --i )
+/* TODO: revert workaround for Intel(R) 64 tracker #96 */
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+            *argv++ = va_arg( *ap, void * );
+#else
+            *argv++ = va_arg( ap, void * );
+#endif
+        /* Increment our nested depth levels, but not increase the serialization */
+        if ( parent_team == master_th->th.th_serial_team ) {
+            // AC: we are in serialized parallel
+            __kmpc_serialized_parallel(loc, gtid);
+            KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
+            parent_team->t.t_serialized--; // AC: need this in order enquiry functions
+                                           //     work correctly, will restore at join time
+
+#if OMPT_SUPPORT
+            void *dummy;
+            void **exit_runtime_p;
+
+            ompt_lw_taskteam_t lw_taskteam;
+
+            if (ompt_enabled) {
+                __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+                    unwrapped_task, ompt_parallel_id);
+                lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
+                exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
+
+                __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+
+#if OMPT_TRACE
+                /* OMPT implicit task begin */
+                my_task_id = lw_taskteam.ompt_task_info.task_id;
+                my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
+                if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
+                    ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
+                        my_parallel_id, my_task_id);
+                }
+#endif
+
+                /* OMPT state */
+                master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+            } else {
+                exit_runtime_p = &dummy;
+            }
+#endif
+
+            {
+                KMP_TIME_BLOCK(OMP_work);
+                __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
+#if OMPT_SUPPORT
+                                        , exit_runtime_p
+#endif
+                                        );
+            }
+
+#if OMPT_SUPPORT
+            if (ompt_enabled) {
+#if OMPT_TRACE
+                lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
+
+                if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
+                    ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
+                        ompt_parallel_id, ompt_task_id);
+                }
+
+                __ompt_lw_taskteam_unlink(master_th);
+                // reset clear the task id only after unlinking the task
+                lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
+#endif
+
+                if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
+                    ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
+                        ompt_parallel_id, ompt_task_id,
+                        OMPT_INVOKER(call_context));
+                }
+                master_th->th.ompt_thread_info.state = ompt_state_overhead;
+            }
+#endif
+            return TRUE;
+        }
+
+        parent_team->t.t_pkfn  = microtask;
+#if OMPT_SUPPORT
+        parent_team->t.ompt_team_info.microtask = unwrapped_task;
+#endif
+        parent_team->t.t_invoke = invoker;
+        KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
+        parent_team->t.t_active_level ++;
+        parent_team->t.t_level ++;
+
+        /* Change number of threads in the team if requested */
+        if ( master_set_numthreads ) {   // The parallel has num_threads clause
+            if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
+                // AC: only can reduce the number of threads dynamically, cannot increase
+                kmp_info_t **other_threads = parent_team->t.t_threads;
+                parent_team->t.t_nproc = master_set_numthreads;
+                for ( i = 0; i < master_set_numthreads; ++i ) {
+                    other_threads[i]->th.th_team_nproc = master_set_numthreads;
+                }
+                // Keep extra threads hot in the team for possible next parallels
+            }
+            master_th->th.th_set_nproc = 0;
+        }
+
+#if USE_DEBUGGER
+    if ( __kmp_debugging ) {    // Let debugger override number of threads.
+        int nth = __kmp_omp_num_threads( loc );
+        if ( nth > 0 ) {        // 0 means debugger does not want to change number of threads.
+            master_set_numthreads = nth;
+        }; // if
+    }; // if
+#endif
+
+        KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
+        __kmp_internal_fork( loc, gtid, parent_team );
+        KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
+
+        /* Invoke microtask for MASTER thread */
+        KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
+                    gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
+
+        {
+            KMP_TIME_BLOCK(OMP_work);
+            if (! parent_team->t.t_invoke( gtid )) {
+                KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
+            }
+        }
+        KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
+            gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
+
+        return TRUE;
+    } // Parallel closely nested in teams construct
+#endif /* OMP_40_ENABLED */
+
+#if KMP_DEBUG
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
+    }
+#endif
+
+    if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
+        nthreads = 1;
+    } else {
+#if OMP_40_ENABLED
+        int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
+#endif
+        nthreads = master_set_numthreads ?
+            master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
+
+        // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
+        // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
+        if (nthreads > 1) {
+            if ( ( !get__nested(master_th) && (root->r.r_in_parallel
+#if OMP_40_ENABLED
+                && !enter_teams
+#endif /* OMP_40_ENABLED */
+            ) ) || ( __kmp_library == library_serial ) ) {
+                KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
+                                gtid, nthreads ));
+                nthreads = 1;
+            }
+        }
+        if ( nthreads > 1 ) {
+            /* determine how many new threads we can use */
+            __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+            nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
+#if OMP_40_ENABLED
+/* AC: If we execute teams from parallel region (on host), then teams should be created
+   but each can only have 1 thread if nesting is disabled. If teams called from serial region,
+   then teams and their threads should be created regardless of the nesting setting. */
+                                         , enter_teams
+#endif /* OMP_40_ENABLED */
+                                         );
+            if ( nthreads == 1 ) {
+                // Free lock for single thread execution here;
+                // for multi-thread execution it will be freed later
+                // after team of threads created and initialized
+                __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+            }
+        }
+    }
+    KMP_DEBUG_ASSERT( nthreads > 0 );
+
+    /* If we temporarily changed the set number of threads then restore it now */
+    master_th->th.th_set_nproc = 0;
+
+    /* create a serialized parallel region? */
+    if ( nthreads == 1 ) {
+        /* josh todo: hypothetical question: what do we do for OS X*? */
+#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+        void *   args[ argc ];
+#else
+        void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
+#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
+
+        KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
+
+        __kmpc_serialized_parallel(loc, gtid);
+
+        if ( call_context == fork_context_intel ) {
+            /* TODO this sucks, use the compiler itself to pass args! :) */
+            master_th->th.th_serial_team->t.t_ident = loc;
+#if OMP_40_ENABLED
+            if ( !ap ) {
+                // revert change made in __kmpc_serialized_parallel()
+                master_th->th.th_serial_team->t.t_level--;
+                // Get args from parent team for teams construct
+
+#if OMPT_SUPPORT
+                void *dummy;
+                void **exit_runtime_p;
+
+                ompt_lw_taskteam_t lw_taskteam;
+
+                if (ompt_enabled) {
+                    __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+                        unwrapped_task, ompt_parallel_id);
+                    lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
+                    exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
+
+                    __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+
+#if OMPT_TRACE
+                    my_task_id = lw_taskteam.ompt_task_info.task_id;
+                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
+                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
+                            ompt_parallel_id, my_task_id);
+                    }
+#endif
+
+                    /* OMPT state */
+                    master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+                } else {
+                    exit_runtime_p = &dummy;
+                }
+#endif
+
+                {
+                    KMP_TIME_BLOCK(OMP_work);
+                    __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
+#if OMPT_SUPPORT
+                        , exit_runtime_p
+#endif
+                    );
+                }
+
+#if OMPT_SUPPORT
+                if (ompt_enabled) {
+                    lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
+
+#if OMPT_TRACE
+                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
+                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
+                            ompt_parallel_id, ompt_task_id);
+                    }
+#endif
+
+                    __ompt_lw_taskteam_unlink(master_th);
+                    // reset clear the task id only after unlinking the task
+                    lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
+
+                    if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
+                        ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
+                            ompt_parallel_id, ompt_task_id,
+                            OMPT_INVOKER(call_context));
+                    }
+                    master_th->th.ompt_thread_info.state = ompt_state_overhead;
+                }
+#endif
+            } else if ( microtask == (microtask_t)__kmp_teams_master ) {
+                KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
+                team = master_th->th.th_team;
+                //team->t.t_pkfn = microtask;
+                team->t.t_invoke = invoker;
+                __kmp_alloc_argv_entries( argc, team, TRUE );
+                team->t.t_argc = argc;
+                argv = (void**) team->t.t_argv;
+                if ( ap ) {
+                    for( i=argc-1; i >= 0; --i )
+// TODO: revert workaround for Intel(R) 64 tracker #96
+# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+                        *argv++ = va_arg( *ap, void * );
+# else
+                        *argv++ = va_arg( ap, void * );
+# endif
+                } else {
+                    for( i=0; i < argc; ++i )
+                        // Get args from parent team for teams construct
+                        argv[i] = parent_team->t.t_argv[i];
+                }
+                // AC: revert change made in __kmpc_serialized_parallel()
+                //     because initial code in teams should have level=0
+                team->t.t_level--;
+                // AC: call special invoker for outer "parallel" of the teams construct
+                {
+                    KMP_TIME_BLOCK(OMP_work);
+                    invoker(gtid);
+                }
+            } else {
+#endif /* OMP_40_ENABLED */
+                argv = args;
+                for( i=argc-1; i >= 0; --i )
+// TODO: revert workaround for Intel(R) 64 tracker #96
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+                    *argv++ = va_arg( *ap, void * );
+#else
+                    *argv++ = va_arg( ap, void * );
+#endif
+                KMP_MB();
+
+#if OMPT_SUPPORT
+                void *dummy;
+                void **exit_runtime_p;
+
+                ompt_lw_taskteam_t lw_taskteam;
+
+                if (ompt_enabled) {
+                    __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+                        unwrapped_task, ompt_parallel_id);
+                    lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
+                    exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
+
+                    __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+
+#if OMPT_TRACE
+                    /* OMPT implicit task begin */
+                    my_task_id = lw_taskteam.ompt_task_info.task_id;
+                    my_parallel_id = ompt_parallel_id;
+                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
+                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
+                            my_parallel_id, my_task_id);
+                    }
+#endif
+
+                    /* OMPT state */
+                    master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+                } else {
+                    exit_runtime_p = &dummy;
+                }
+#endif
+
+                {
+                    KMP_TIME_BLOCK(OMP_work);
+                    __kmp_invoke_microtask( microtask, gtid, 0, argc, args
+#if OMPT_SUPPORT
+                        , exit_runtime_p
+#endif
+                    );
+                }
+
+#if OMPT_SUPPORT
+                if (ompt_enabled) {
+#if OMPT_TRACE
+                    lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
+
+                    if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
+                        ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
+                            my_parallel_id, my_task_id);
+                    }
+#endif
+
+                    __ompt_lw_taskteam_unlink(master_th);
+                    // reset clear the task id only after unlinking the task
+                    lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
+
+                    if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
+                        ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
+                            ompt_parallel_id, ompt_task_id,
+                            OMPT_INVOKER(call_context));
+                    }
+                    master_th->th.ompt_thread_info.state = ompt_state_overhead;
+                }
+#endif
+#if OMP_40_ENABLED
+            }
+#endif /* OMP_40_ENABLED */
+        }
+        else if ( call_context == fork_context_gnu ) {
+#if OMPT_SUPPORT
+            ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
+                __kmp_allocate(sizeof(ompt_lw_taskteam_t));
+            __ompt_lw_taskteam_init(lwt, master_th, gtid,
+                unwrapped_task, ompt_parallel_id);
+
+            lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
+            lwt->ompt_task_info.frame.exit_runtime_frame = 0;
+            __ompt_lw_taskteam_link(lwt, master_th);
+#endif
+
+            // we were called from GNU native code
+            KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
+            return FALSE;
+        }
+        else {
+            KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
+        }
+
+
+        KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
+        KMP_MB();
+        return FALSE;
+    }
+
+    // GEH: only modify the executing flag in the case when not serialized
+    //      serialized case is handled in kmpc_serialized_parallel
+    KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
+                  parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
+                  master_th->th.th_current_task->td_icvs.max_active_levels ) );
+    // TODO: GEH - cannot do this assertion because root thread not set up as executing
+    // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
+    master_th->th.th_current_task->td_flags.executing = 0;
+
+#if OMP_40_ENABLED
+    if ( !master_th->th.th_teams_microtask || level > teams_level )
+#endif /* OMP_40_ENABLED */
+    {
+        /* Increment our nested depth level */
+        KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
+    }
+
+    // See if we need to make a copy of the ICVs.
+    int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
+    if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
+        nthreads_icv = __kmp_nested_nth.nth[level+1];
+    }
+    else {
+        nthreads_icv = 0;  // don't update
+    }
+
+#if OMP_40_ENABLED
+    // Figure out the proc_bind_policy for the new team.
+    kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
+    kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
+    if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
+        proc_bind = proc_bind_false;
+    }
+    else {
+        if (proc_bind == proc_bind_default) {
+            // No proc_bind clause specified; use current proc-bind-var for this parallel region
+            proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
+        }
+        /* else: The proc_bind policy was specified explicitly on parallel clause. This
+           overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
+        // Figure the value of proc-bind-var for the child threads.
+        if ((level+1 < __kmp_nested_proc_bind.used)
+            && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
+            proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
+        }
+    }
+
+    // Reset for next parallel region
+    master_th->th.th_set_proc_bind = proc_bind_default;
+#endif /* OMP_40_ENABLED */
+
+    if ((nthreads_icv > 0)
+#if OMP_40_ENABLED
+        || (proc_bind_icv != proc_bind_default)
+#endif /* OMP_40_ENABLED */
+        ) {
+        kmp_internal_control_t new_icvs;
+        copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
+        new_icvs.next = NULL;
+        if (nthreads_icv > 0) {
+            new_icvs.nproc = nthreads_icv;
+        }
+
+#if OMP_40_ENABLED
+        if (proc_bind_icv != proc_bind_default) {
+            new_icvs.proc_bind = proc_bind_icv;
+        }
+#endif /* OMP_40_ENABLED */
+
+        /* allocate a new parallel team */
+        KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
+        team = __kmp_allocate_team(root, nthreads, nthreads,
+#if OMPT_SUPPORT
+                                   ompt_parallel_id,
+#endif
+#if OMP_40_ENABLED
+                                   proc_bind,
+#endif
+                                   &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
+    } else {
+        /* allocate a new parallel team */
+        KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
+        team = __kmp_allocate_team(root, nthreads, nthreads,
+#if OMPT_SUPPORT
+                                   ompt_parallel_id,
+#endif
+#if OMP_40_ENABLED
+                                   proc_bind,
+#endif
+                                   &master_th->th.th_current_task->td_icvs, argc
+                                   USE_NESTED_HOT_ARG(master_th) );
+    }
+    KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
+
+    /* setup the new team */
+    team->t.t_master_tid = master_tid;
+    team->t.t_master_this_cons = master_this_cons;
+    team->t.t_ident      = loc;
+    team->t.t_parent     = parent_team;
+    TCW_SYNC_PTR(team->t.t_pkfn, microtask);
+#if OMPT_SUPPORT
+    TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task);
+#endif
+    team->t.t_invoke     = invoker;  /* TODO move this to root, maybe */
+    // TODO: parent_team->t.t_level == INT_MAX ???
+#if OMP_40_ENABLED
+    if ( !master_th->th.th_teams_microtask || level > teams_level ) {
+#endif /* OMP_40_ENABLED */
+        team->t.t_level        = parent_team->t.t_level + 1;
+        team->t.t_active_level = parent_team->t.t_active_level + 1;
+#if OMP_40_ENABLED
+    } else {
+        // AC: Do not increase parallel level at start of the teams construct
+        team->t.t_level        = parent_team->t.t_level;
+        team->t.t_active_level = parent_team->t.t_active_level;
+    }
+#endif /* OMP_40_ENABLED */
+    team->t.t_sched      = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule
+
+#if OMP_40_ENABLED
+    team->t.t_cancel_request = cancel_noreq;
+#endif
+
+    // Update the floating point rounding in the team if required.
+    propagateFPControl(team);
+
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
+#if 0
+        // Patch out an assertion that trips while the runtime seems to operate correctly.
+        // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
+        KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
+#endif
+        KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
+                      __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
+                      parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
+        if (level) {
+            // Take a memo of master's task_state
+            KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+            if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
+                kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
+                kmp_uint8 *old_stack, *new_stack;
+                kmp_uint32 i;
+                new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
+                for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
+                    new_stack[i] = master_th->th.th_task_state_memo_stack[i];
+                }
+                for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
+                    new_stack[i] = 0;
+                }
+                old_stack = master_th->th.th_task_state_memo_stack;
+                master_th->th.th_task_state_memo_stack = new_stack;
+                master_th->th.th_task_state_stack_sz = new_size;
+                __kmp_free(old_stack);
+            }
+            // Store master's task_state on stack
+            master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
+            master_th->th.th_task_state_top++;
+#if KMP_NESTED_HOT_TEAMS
+            if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team
+                master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
+            }
+            else {
+#endif
+                master_th->th.th_task_state = 0;
+#if KMP_NESTED_HOT_TEAMS
+            }
+#endif
+        }
+#if !KMP_NESTED_HOT_TEAMS
+        KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
+#endif
+    }
+
+    KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
+                gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
+    KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
+                      ( team->t.t_master_tid == 0 &&
+                        ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
+    KMP_MB();
+
+    /* now, setup the arguments */
+    argv = (void**)team->t.t_argv;
+#if OMP_40_ENABLED
+    if ( ap ) {
+#endif /* OMP_40_ENABLED */
+        for ( i=argc-1; i >= 0; --i )
+// TODO: revert workaround for Intel(R) 64 tracker #96
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
+            *argv++ = va_arg( *ap, void * );
+#else
+            *argv++ = va_arg( ap, void * );
+#endif
+#if OMP_40_ENABLED
+    } else {
+        for ( i=0; i < argc; ++i )
+            // Get args from parent team for teams construct
+            argv[i] = team->t.t_parent->t.t_argv[i];
+    }
+#endif /* OMP_40_ENABLED */
+
+    /* now actually fork the threads */
+    team->t.t_master_active = master_active;
+    if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
+        root->r.r_active = TRUE;
+
+    __kmp_fork_team_threads( root, team, master_th, gtid );
+    __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
+
+#if OMPT_SUPPORT
+    master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+#endif
+
+    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+
+
+#if USE_ITT_BUILD
+    if ( team->t.t_active_level == 1 // only report frames at level 1
+# if OMP_40_ENABLED
+        && !master_th->th.th_teams_microtask // not in teams construct
+# endif /* OMP_40_ENABLED */
+    ) {
+#if USE_ITT_NOTIFY
+        if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
+             ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
+        {
+            kmp_uint64 tmp_time = 0;
+            if ( __itt_get_timestamp_ptr )
+                tmp_time = __itt_get_timestamp();
+            // Internal fork - report frame begin
+            master_th->th.th_frame_time  = tmp_time;
+            if ( __kmp_forkjoin_frames_mode == 3 )
+                team->t.t_region_time = tmp_time;
+        } else // only one notification scheme (either "submit" or "forking/joined", not both)
+#endif /* USE_ITT_NOTIFY */
+        if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
+             __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
+        { // Mark start of "parallel" region for VTune.
+            __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
+        }
+    }
+#endif /* USE_ITT_BUILD */
+
+    /* now go on and do the work */
+    KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
+    KMP_MB();
+    KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
+                  root, team, master_th, gtid));
+
+#if USE_ITT_BUILD
+    if ( __itt_stack_caller_create_ptr ) {
+        team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
+    }
+#endif /* USE_ITT_BUILD */
+
+#if OMP_40_ENABLED
+    if ( ap )   // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
+#endif /* OMP_40_ENABLED */
+    {
+        __kmp_internal_fork( loc, gtid, team );
+        KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
+                      root, team, master_th, gtid));
+    }
+
+    if (call_context == fork_context_gnu) {
+        KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
+        return TRUE;
+    }
+
+    /* Invoke microtask for MASTER thread */
+    KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
+                gtid, team->t.t_id, team->t.t_pkfn ) );
+    }  // END of timer KMP_fork_call block
+
+    {
+        KMP_TIME_BLOCK(OMP_work);
+        // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
+        if (! team->t.t_invoke( gtid )) {
+            KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
+        }
+    }
+    KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
+        gtid, team->t.t_id, team->t.t_pkfn ) );
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        master_th->th.ompt_thread_info.state = ompt_state_overhead;
+    }
+#endif
+
+    return TRUE;
+}
+
+#if OMPT_SUPPORT
+static inline void
+__kmp_join_restore_state(
+    kmp_info_t *thread,
+    kmp_team_t *team)
+{
+    // restore state outside the region
+    thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
+        ompt_state_work_serial : ompt_state_work_parallel);
+}
+
+static inline void
+__kmp_join_ompt(
+    kmp_info_t *thread,
+    kmp_team_t *team,
+    ompt_parallel_id_t parallel_id,
+    fork_context_e fork_context)
+{
+    if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
+        ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
+        ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
+            parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
+    }
+
+    __kmp_join_restore_state(thread,team);
+}
+#endif
+
+void
+__kmp_join_call(ident_t *loc, int gtid
+#if OMPT_SUPPORT
+               , enum fork_context_e fork_context
+#endif
+#if OMP_40_ENABLED
+               , int exit_teams
+#endif /* OMP_40_ENABLED */
+)
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
+    kmp_team_t     *team;
+    kmp_team_t     *parent_team;
+    kmp_info_t     *master_th;
+    kmp_root_t     *root;
+    int             master_active;
+    int             i;
+
+    KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
+
+    /* setup current data */
+    master_th     = __kmp_threads[ gtid ];
+    root          = master_th->th.th_root;
+    team          = master_th->th.th_team;
+    parent_team   = team->t.t_parent;
+
+    master_th->th.th_ident = loc;
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        master_th->th.ompt_thread_info.state = ompt_state_overhead;
+    }
+#endif
+
+#if KMP_DEBUG
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
+                         __kmp_gtid_from_thread( master_th ), team,
+                         team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
+        KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
+    }
+#endif
+
+    if( team->t.t_serialized ) {
+#if OMP_40_ENABLED
+        if ( master_th->th.th_teams_microtask ) {
+            // We are in teams construct
+            int level = team->t.t_level;
+            int tlevel = master_th->th.th_teams_level;
+            if ( level == tlevel ) {
+                // AC: we haven't incremented it earlier at start of teams construct,
+                //     so do it here - at the end of teams construct
+                team->t.t_level++;
+            } else if ( level == tlevel + 1 ) {
+                // AC: we are exiting parallel inside teams, need to increment serialization
+                //     in order to restore it in the next call to __kmpc_end_serialized_parallel
+                team->t.t_serialized++;
+            }
+        }
+#endif /* OMP_40_ENABLED */
+        __kmpc_end_serialized_parallel( loc, gtid );
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+            __kmp_join_restore_state(master_th, parent_team);
+        }
+#endif
+
+        return;
+    }
+
+    master_active = team->t.t_master_active;
+
+#if OMP_40_ENABLED
+    if (!exit_teams)
+#endif /* OMP_40_ENABLED */
+    {
+        // AC: No barrier for internal teams at exit from teams construct.
+        //     But there is barrier for external team (league).
+        __kmp_internal_join( loc, gtid, team );
+    }
+#if OMP_40_ENABLED
+    else {
+        master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
+    }
+#endif /* OMP_40_ENABLED */
+
+    KMP_MB();
+
+#if OMPT_SUPPORT
+    ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
+#endif
+
+#if USE_ITT_BUILD
+    if ( __itt_stack_caller_create_ptr ) {
+        __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
+    }
+
+    // Mark end of "parallel" region for VTune.
+    if ( team->t.t_active_level == 1
+# if OMP_40_ENABLED
+        && !master_th->th.th_teams_microtask /* not in teams construct */
+# endif /* OMP_40_ENABLED */
+    ) {
+        master_th->th.th_ident = loc;
+        // only one notification scheme (either "submit" or "forking/joined", not both)
+        if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
+            __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
+                                    0, loc, master_th->th.th_team_nproc, 1 );
+        else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
+            ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
+            __kmp_itt_region_joined( gtid );
+    } // active_level == 1
+#endif /* USE_ITT_BUILD */
+
+#if OMP_40_ENABLED
+    if ( master_th->th.th_teams_microtask &&
+         !exit_teams &&
+         team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
+         team->t.t_level == master_th->th.th_teams_level + 1 ) {
+        // AC: We need to leave the team structure intact at the end
+        //     of parallel inside the teams construct, so that at the next
+        //     parallel same (hot) team works, only adjust nesting levels
+
+        /* Decrement our nested depth level */
+        team->t.t_level --;
+        team->t.t_active_level --;
+        KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
+
+        /* Restore number of threads in the team if needed */
+        if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
+            int old_num = master_th->th.th_team_nproc;
+            int new_num = master_th->th.th_teams_size.nth;
+            kmp_info_t **other_threads = team->t.t_threads;
+            team->t.t_nproc = new_num;
+            for ( i = 0; i < old_num; ++i ) {
+                other_threads[i]->th.th_team_nproc = new_num;
+            }
+            // Adjust states of non-used threads of the team
+            for ( i = old_num; i < new_num; ++i ) {
+                // Re-initialize thread's barrier data.
+                int b;
+                kmp_balign_t * balign = other_threads[i]->th.th_bar;
+                for ( b = 0; b < bs_last_barrier; ++ b ) {
+                    balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
+                    KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
+#if USE_DEBUGGER
+                    balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
+#endif
+                }
+                if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+                    // Synchronize thread's task state
+                    other_threads[i]->th.th_task_state = master_th->th.th_task_state;
+                }
+            }
+        }
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+            __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+        }
+#endif
+
+        return;
+    }
+#endif /* OMP_40_ENABLED */
+
+    /* do cleanup and restore the parent team */
+    master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
+    master_th->th.th_local.this_construct = team->t.t_master_this_cons;
+
+    master_th->th.th_dispatch =
+                & parent_team->t.t_dispatch[ team->t.t_master_tid ];
+
+    /* jc: The following lock has instructions with REL and ACQ semantics,
+       separating the parallel user code called in this parallel region
+       from the serial user code called after this function returns.
+    */
+    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+#if OMP_40_ENABLED
+    if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
+#endif /* OMP_40_ENABLED */
+    {
+        /* Decrement our nested depth level */
+        KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
+    }
+    KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
+
+    KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
+                   0, master_th, team ) );
+    __kmp_pop_current_task_from_thread( master_th );
+
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+    //
+    // Restore master thread's partition.
+    //
+    master_th->th.th_first_place = team->t.t_first_place;
+    master_th->th.th_last_place = team->t.t_last_place;
+#endif /* OMP_40_ENABLED */
+
+    updateHWFPControl (team);
+
+    if ( root->r.r_active != master_active )
+        root->r.r_active = master_active;
+
+    __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
+
+    /* this race was fun to find.  make sure the following is in the critical
+     * region otherwise assertions may fail occasionally since the old team
+     * may be reallocated and the hierarchy appears inconsistent.  it is
+     * actually safe to run and won't cause any bugs, but will cause those
+     * assertion failures.  it's only one deref&assign so might as well put this
+     * in the critical region */
+    master_th->th.th_team        =   parent_team;
+    master_th->th.th_team_nproc  =   parent_team->t.t_nproc;
+    master_th->th.th_team_master =   parent_team->t.t_threads[0];
+    master_th->th.th_team_serialized = parent_team->t.t_serialized;
+
+    /* restore serialized team, if need be */
+    if( parent_team->t.t_serialized &&
+        parent_team != master_th->th.th_serial_team &&
+        parent_team != root->r.r_root_team ) {
+            __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
+            master_th->th.th_serial_team = parent_team;
+    }
+
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
+            KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+            // Remember master's state if we re-use this nested hot team
+            master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
+            --master_th->th.th_task_state_top; // pop
+            // Now restore state at this level
+            master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
+        }
+        // Copy the task team from the parent team to the master thread
+        master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
+        KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
+                        __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
+    }
+
+     // TODO: GEH - cannot do this assertion because root thread not set up as executing
+     // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
+     master_th->th.th_current_task->td_flags.executing = 1;
+
+    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+    }
+#endif
+
+    KMP_MB();
+    KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* Check whether we should push an internal control record onto the
+   serial team stack.  If so, do it.  */
+void
+__kmp_save_internal_controls ( kmp_info_t * thread )
+{
+
+    if ( thread->th.th_team != thread->th.th_serial_team ) {
+        return;
+    }
+    if (thread->th.th_team->t.t_serialized > 1) {
+        int push = 0;
+
+        if (thread->th.th_team->t.t_control_stack_top == NULL) {
+            push = 1;
+        } else {
+            if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
+                 thread->th.th_team->t.t_serialized ) {
+                push = 1;
+            }
+        }
+        if (push) {  /* push a record on the serial team's stack */
+            kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
+
+            copy_icvs( control, & thread->th.th_current_task->td_icvs );
+
+            control->serial_nesting_level = thread->th.th_team->t.t_serialized;
+
+            control->next = thread->th.th_team->t.t_control_stack_top;
+            thread->th.th_team->t.t_control_stack_top = control;
+        }
+    }
+}
+
+/* Changes set_nproc */
+void
+__kmp_set_num_threads( int new_nth, int gtid )
+{
+    kmp_info_t *thread;
+    kmp_root_t *root;
+
+    KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    if (new_nth < 1)
+        new_nth = 1;
+    else if (new_nth > __kmp_max_nth)
+        new_nth = __kmp_max_nth;
+
+    KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
+    thread = __kmp_threads[gtid];
+
+    __kmp_save_internal_controls( thread );
+
+    set__nproc( thread, new_nth );
+
+    //
+    // If this omp_set_num_threads() call will cause the hot team size to be
+    // reduced (in the absence of a num_threads clause), then reduce it now,
+    // rather than waiting for the next parallel region.
+    //
+    root = thread->th.th_root;
+    if ( __kmp_init_parallel && ( ! root->r.r_active )
+      && ( root->r.r_hot_team->t.t_nproc > new_nth )
+#if KMP_NESTED_HOT_TEAMS
+      && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
+#endif
+    ) {
+        kmp_team_t *hot_team = root->r.r_hot_team;
+        int f;
+
+        __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+
+        // Release the extra threads we don't need any more.
+        for ( f = new_nth;  f < hot_team->t.t_nproc; f++ ) {
+            KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
+            if ( __kmp_tasking_mode != tskm_immediate_exec) {
+                // When decreasing team size, threads no longer in the team should unref task team.
+                hot_team->t.t_threads[f]->th.th_task_team = NULL;
+            }
+            __kmp_free_thread( hot_team->t.t_threads[f] );
+            hot_team->t.t_threads[f] =  NULL;
+        }
+        hot_team->t.t_nproc = new_nth;
+#if KMP_NESTED_HOT_TEAMS
+        if( thread->th.th_hot_teams ) {
+            KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
+            thread->th.th_hot_teams[0].hot_team_nth = new_nth;
+        }
+#endif
+
+
+        __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+
+        //
+        // Update the t_nproc field in the threads that are still active.
+        //
+        for( f=0 ; f < new_nth; f++ ) {
+            KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
+            hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
+        }
+        // Special flag in case omp_set_num_threads() call
+        hot_team->t.t_size_changed = -1;
+    }
+}
+
+/* Changes max_active_levels */
+void
+__kmp_set_max_active_levels( int gtid, int max_active_levels )
+{
+    kmp_info_t *thread;
+
+    KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    // validate max_active_levels
+    if( max_active_levels < 0 ) {
+        KMP_WARNING( ActiveLevelsNegative, max_active_levels );
+        // We ignore this call if the user has specified a negative value.
+        // The current setting won't be changed. The last valid setting will be used.
+        // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
+        KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
+        return;
+    }
+    if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
+        // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
+        // We allow a zero value. (implementation defined behavior)
+    } else {
+        KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT  );
+        max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
+        // Current upper limit is MAX_INT. (implementation defined behavior)
+        // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
+        // Actually, the flow should never get here until we use MAX_INT limit.
+    }
+    KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
+
+    thread = __kmp_threads[ gtid ];
+
+    __kmp_save_internal_controls( thread );
+
+    set__max_active_levels( thread, max_active_levels );
+
+}
+
+/* Gets max_active_levels */
+int
+__kmp_get_max_active_levels( int gtid )
+{
+    kmp_info_t *thread;
+
+    KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    thread = __kmp_threads[ gtid ];
+    KMP_DEBUG_ASSERT( thread->th.th_current_task );
+    KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
+        gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
+    return thread->th.th_current_task->td_icvs.max_active_levels;
+}
+
+/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
+void
+__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
+{
+    kmp_info_t *thread;
+//    kmp_team_t *team;
+
+    KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    // Check if the kind parameter is valid, correct if needed.
+    // Valid parameters should fit in one of two intervals - standard or extended:
+    //       <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
+    // 2008-01-25: 0,  1 - 4,       5,         100,     101 - 102, 103
+    if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
+       ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
+    {
+        // TODO: Hint needs attention in case we change the default schedule.
+        __kmp_msg(
+            kmp_ms_warning,
+            KMP_MSG( ScheduleKindOutOfRange, kind ),
+            KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
+            __kmp_msg_null
+        );
+        kind = kmp_sched_default;
+        chunk = 0;         // ignore chunk value in case of bad kind
+    }
+
+    thread = __kmp_threads[ gtid ];
+
+    __kmp_save_internal_controls( thread );
+
+    if ( kind < kmp_sched_upper_std ) {
+        if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
+            // differ static chunked vs. unchunked:
+            // chunk should be invalid to indicate unchunked schedule (which is the default)
+            thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
+        } else {
+            thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
+        }
+    } else {
+        //    __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
+        thread->th.th_current_task->td_icvs.sched.r_sched_type =
+            __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
+    }
+    if ( kind == kmp_sched_auto ) {
+        // ignore parameter chunk for schedule auto
+        thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
+    } else {
+        thread->th.th_current_task->td_icvs.sched.chunk = chunk;
+    }
+}
+
+/* Gets def_sched_var ICV values */
+void
+__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
+{
+    kmp_info_t     *thread;
+    enum sched_type th_type;
+
+    KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    thread = __kmp_threads[ gtid ];
+
+    //th_type = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type;
+    th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
+
+    switch ( th_type ) {
+    case kmp_sch_static:
+    case kmp_sch_static_greedy:
+    case kmp_sch_static_balanced:
+        *kind = kmp_sched_static;
+        *chunk = 0;   // chunk was not set, try to show this fact via zero value
+        return;
+    case kmp_sch_static_chunked:
+        *kind = kmp_sched_static;
+        break;
+    case kmp_sch_dynamic_chunked:
+        *kind = kmp_sched_dynamic;
+        break;
+    case kmp_sch_guided_chunked:
+    case kmp_sch_guided_iterative_chunked:
+    case kmp_sch_guided_analytical_chunked:
+        *kind = kmp_sched_guided;
+        break;
+    case kmp_sch_auto:
+        *kind = kmp_sched_auto;
+        break;
+    case kmp_sch_trapezoidal:
+        *kind = kmp_sched_trapezoidal;
+        break;
+/*
+    case kmp_sch_static_steal:
+        *kind = kmp_sched_static_steal;
+        break;
+*/
+    default:
+        KMP_FATAL( UnknownSchedulingType, th_type );
+    }
+
+    //*chunk = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk;
+    *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
+}
+
+int
+__kmp_get_ancestor_thread_num( int gtid, int level ) {
+
+    int ii, dd;
+    kmp_team_t *team;
+    kmp_info_t *thr;
+
+    KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    // validate level
+    if( level == 0 ) return 0;
+    if( level < 0 ) return -1;
+    thr = __kmp_threads[ gtid ];
+    team = thr->th.th_team;
+    ii = team->t.t_level;
+    if( level > ii ) return -1;
+
+#if OMP_40_ENABLED
+    if( thr->th.th_teams_microtask ) {
+        // AC: we are in teams region where multiple nested teams have same level
+        int tlevel = thr->th.th_teams_level; // the level of the teams construct
+        if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
+            KMP_DEBUG_ASSERT( ii >= tlevel );
+            // AC: As we need to pass by the teams league, we need to artificially increase ii
+            if ( ii == tlevel ) {
+                ii += 2; // three teams have same level
+            } else {
+                ii ++;   // two teams have same level
+            }
+        }
+    }
+#endif
+
+    if( ii == level ) return __kmp_tid_from_gtid( gtid );
+
+    dd = team->t.t_serialized;
+    level++;
+    while( ii > level )
+    {
+        for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
+        {
+        }
+        if( ( team->t.t_serialized ) && ( !dd ) ) {
+            team = team->t.t_parent;
+            continue;
+        }
+        if( ii > level ) {
+            team = team->t.t_parent;
+            dd = team->t.t_serialized;
+            ii--;
+        }
+    }
+
+    return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
+}
+
+int
+__kmp_get_team_size( int gtid, int level ) {
+
+    int ii, dd;
+    kmp_team_t *team;
+    kmp_info_t *thr;
+
+    KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+
+    // validate level
+    if( level == 0 ) return 1;
+    if( level < 0 ) return -1;
+    thr = __kmp_threads[ gtid ];
+    team = thr->th.th_team;
+    ii = team->t.t_level;
+    if( level > ii ) return -1;
+
+#if OMP_40_ENABLED
+    if( thr->th.th_teams_microtask ) {
+        // AC: we are in teams region where multiple nested teams have same level
+        int tlevel = thr->th.th_teams_level; // the level of the teams construct
+        if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
+            KMP_DEBUG_ASSERT( ii >= tlevel );
+            // AC: As we need to pass by the teams league, we need to artificially increase ii
+            if ( ii == tlevel ) {
+                ii += 2; // three teams have same level
+            } else {
+                ii ++;   // two teams have same level
+            }
+        }
+    }
+#endif
+
+    while( ii > level )
+    {
+        for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
+        {
+        }
+        if( team->t.t_serialized && ( !dd ) ) {
+            team = team->t.t_parent;
+            continue;
+        }
+        if( ii > level ) {
+            team = team->t.t_parent;
+            ii--;
+        }
+    }
+
+    return team->t.t_nproc;
+}
+
+kmp_r_sched_t
+__kmp_get_schedule_global() {
+// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
+// may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
+
+    kmp_r_sched_t r_sched;
+
+    // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
+    // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
+    // and thus have different run-time schedules in different roots (even in OMP 2.5)
+    if ( __kmp_sched == kmp_sch_static ) {
+        r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
+    } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
+        r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
+    } else {
+        r_sched.r_sched_type = __kmp_sched;  // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
+    }
+
+    if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
+        r_sched.chunk = KMP_DEFAULT_CHUNK;
+    } else {
+        r_sched.chunk = __kmp_chunk;
+    }
+
+    return r_sched;
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+
+/*
+ * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
+ * at least argc number of *t_argv entries for the requested team.
+ */
+static void
+__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
+{
+
+    KMP_DEBUG_ASSERT( team );
+    if( !realloc || argc > team->t.t_max_argc ) {
+
+        KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
+                         team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
+        /* if previously allocated heap space for args, free them */
+        if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
+            __kmp_free( (void *) team->t.t_argv );
+
+        if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
+            /* use unused space in the cache line for arguments */
+            team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
+            KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
+                             team->t.t_id, team->t.t_max_argc ));
+            team->t.t_argv = &team->t.t_inline_argv[0];
+            if ( __kmp_storage_map ) {
+                __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
+                                         &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
+                                         (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
+                                         "team_%d.t_inline_argv",
+                                         team->t.t_id );
+            }
+        } else {
+            /* allocate space for arguments in the heap */
+            team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
+                                     KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
+            KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
+                             team->t.t_id, team->t.t_max_argc ));
+            team->t.t_argv     = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
+            if ( __kmp_storage_map ) {
+                __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
+                                         sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
+                                         team->t.t_id );
+            }
+        }
+    }
+}
+
+static void
+__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
+{
+    int i;
+    int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
+#if KMP_USE_POOLED_ALLOC
+    // AC: TODO: fix bug here: size of t_disp_buffer should not be multiplied by max_nth!
+    char *ptr = __kmp_allocate(max_nth *
+                            ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*num_disp_buf
+                               + sizeof(kmp_disp_t) + sizeof(int)*6
+                               //+ sizeof(int)
+                               + sizeof(kmp_r_sched_t)
+                               + sizeof(kmp_taskdata_t) ) );
+
+    team->t.t_threads          = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth;
+    team->t.t_disp_buffer      = (dispatch_shared_info_t*) ptr;
+                                   ptr += sizeof(dispatch_shared_info_t) * num_disp_buff;
+    team->t.t_dispatch         = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth;
+    team->t.t_set_nproc        = (int*) ptr; ptr += sizeof(int) * max_nth;
+    team->t.t_set_dynamic      = (int*) ptr; ptr += sizeof(int) * max_nth;
+    team->t.t_set_nested       = (int*) ptr; ptr += sizeof(int) * max_nth;
+    team->t.t_set_blocktime    = (int*) ptr; ptr += sizeof(int) * max_nth;
+    team->t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth;
+    team->t.t_set_bt_set       = (int*) ptr;
+    ptr += sizeof(int) * max_nth;
+    //team->t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth;
+    team->t.t_set_sched        = (kmp_r_sched_t*) ptr;
+    ptr += sizeof(kmp_r_sched_t) * max_nth;
+    team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
+    ptr += sizeof(kmp_taskdata_t) * max_nth;
+#else
+
+    team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
+    team->t.t_disp_buffer = (dispatch_shared_info_t*)
+        __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
+    team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
+    //team->t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth );
+    //team->t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth );
+    team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
+#endif
+    team->t.t_max_nproc = max_nth;
+
+    /* setup dispatch buffers */
+    for(i = 0 ; i < num_disp_buff; ++i)
+        team->t.t_disp_buffer[i].buffer_index = i;
+}
+
+static void
+__kmp_free_team_arrays(kmp_team_t *team) {
+    /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
+    int i;
+    for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
+        if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
+            __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
+            team->t.t_dispatch[ i ].th_disp_buffer = NULL;
+        }; // if
+    }; // for
+    __kmp_free(team->t.t_threads);
+    #if !KMP_USE_POOLED_ALLOC
+        __kmp_free(team->t.t_disp_buffer);
+        __kmp_free(team->t.t_dispatch);
+        //__kmp_free(team->t.t_set_max_active_levels);
+        //__kmp_free(team->t.t_set_sched);
+        __kmp_free(team->t.t_implicit_task_taskdata);
+    #endif
+    team->t.t_threads     = NULL;
+    team->t.t_disp_buffer = NULL;
+    team->t.t_dispatch    = NULL;
+    //team->t.t_set_sched   = 0;
+    //team->t.t_set_max_active_levels = 0;
+    team->t.t_implicit_task_taskdata = 0;
+}
+
+static void
+__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
+    kmp_info_t **oldThreads = team->t.t_threads;
+
+    #if !KMP_USE_POOLED_ALLOC
+        __kmp_free(team->t.t_disp_buffer);
+        __kmp_free(team->t.t_dispatch);
+        //__kmp_free(team->t.t_set_max_active_levels);
+        //__kmp_free(team->t.t_set_sched);
+        __kmp_free(team->t.t_implicit_task_taskdata);
+    #endif
+    __kmp_allocate_team_arrays(team, max_nth);
+
+    KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
+
+    __kmp_free(oldThreads);
+}
+
+static kmp_internal_control_t
+__kmp_get_global_icvs( void ) {
+
+    kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
+
+#if OMP_40_ENABLED
+    KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
+#endif /* OMP_40_ENABLED */
+
+    kmp_internal_control_t g_icvs = {
+      0,                            //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
+      (kmp_int8)__kmp_dflt_nested,            //int nested;               //internal control for nested parallelism (per thread)
+      (kmp_int8)__kmp_global.g.g_dynamic,                                 //internal control for dynamic adjustment of threads (per thread)
+      (kmp_int8)__kmp_env_blocktime,          //int bt_set;               //internal control for whether blocktime is explicitly set
+      __kmp_dflt_blocktime,         //int blocktime;            //internal control for blocktime
+      __kmp_bt_intervals,           //int bt_intervals;         //internal control for blocktime intervals
+      __kmp_dflt_team_nth,          //int nproc;                //internal control for # of threads for next parallel region (per thread)
+                                    // (use a max ub on value if __kmp_parallel_initialize not called yet)
+      __kmp_dflt_max_active_levels, //int max_active_levels;    //internal control for max_active_levels
+      r_sched,                      //kmp_r_sched_t sched;      //internal control for runtime schedule {sched,chunk} pair
+#if OMP_40_ENABLED
+      __kmp_nested_proc_bind.bind_types[0],
+#endif /* OMP_40_ENABLED */
+      NULL                          //struct kmp_internal_control *next;
+    };
+
+    return g_icvs;
+}
+
+static kmp_internal_control_t
+__kmp_get_x_global_icvs( const kmp_team_t *team ) {
+
+    kmp_internal_control_t gx_icvs;
+    gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
+    copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
+    gx_icvs.next = NULL;
+
+    return gx_icvs;
+}
+
+static void
+__kmp_initialize_root( kmp_root_t *root )
+{
+    int           f;
+    kmp_team_t   *root_team;
+    kmp_team_t   *hot_team;
+    int           hot_team_max_nth;
+    kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
+    kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
+    KMP_DEBUG_ASSERT( root );
+    KMP_ASSERT( ! root->r.r_begin );
+
+    /* setup the root state structure */
+    __kmp_init_lock( &root->r.r_begin_lock );
+    root->r.r_begin        = FALSE;
+    root->r.r_active       = FALSE;
+    root->r.r_in_parallel  = 0;
+    root->r.r_blocktime    = __kmp_dflt_blocktime;
+    root->r.r_nested       = __kmp_dflt_nested;
+
+    /* setup the root team for this task */
+    /* allocate the root team structure */
+    KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
+
+    root_team =
+        __kmp_allocate_team(
+            root,
+            1,                                                         // new_nproc
+            1,                                                         // max_nproc
+#if OMPT_SUPPORT
+            0, // root parallel id
+#endif
+#if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0],
+#endif
+            &r_icvs,
+            0                                                          // argc
+            USE_NESTED_HOT_ARG(NULL)                                   // master thread is unknown
+        );
+#if USE_DEBUGGER
+    // Non-NULL value should be assigned to make the debugger display the root team.
+    TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
+#endif
+
+    KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
+
+    root->r.r_root_team = root_team;
+    root_team->t.t_control_stack_top = NULL;
+
+    /* initialize root team */
+    root_team->t.t_threads[0] = NULL;
+    root_team->t.t_nproc      = 1;
+    root_team->t.t_serialized = 1;
+    // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
+    root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
+    root_team->t.t_sched.chunk        = r_sched.chunk;
+    KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
+                    root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
+
+    /* setup the  hot team for this task */
+    /* allocate the hot team structure */
+    KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
+
+    hot_team =
+        __kmp_allocate_team(
+            root,
+            1,                                                         // new_nproc
+            __kmp_dflt_team_nth_ub * 2,                                // max_nproc
+#if OMPT_SUPPORT
+            0, // root parallel id
+#endif
+#if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0],
+#endif
+            &r_icvs,
+            0                                                          // argc
+            USE_NESTED_HOT_ARG(NULL)                                   // master thread is unknown
+        );
+    KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
+
+    root->r.r_hot_team = hot_team;
+    root_team->t.t_control_stack_top = NULL;
+
+    /* first-time initialization */
+    hot_team->t.t_parent = root_team;
+
+    /* initialize hot team */
+    hot_team_max_nth = hot_team->t.t_max_nproc;
+    for ( f = 0; f < hot_team_max_nth; ++ f ) {
+        hot_team->t.t_threads[ f ] = NULL;
+    }; // for
+    hot_team->t.t_nproc = 1;
+    // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
+    hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
+    hot_team->t.t_sched.chunk        = r_sched.chunk;
+    hot_team->t.t_size_changed = 0;
+}
+
+#ifdef KMP_DEBUG
+
+
+typedef struct kmp_team_list_item {
+    kmp_team_p const *           entry;
+    struct kmp_team_list_item *  next;
+} kmp_team_list_item_t;
+typedef kmp_team_list_item_t * kmp_team_list_t;
+
+
+static void
+__kmp_print_structure_team_accum(    // Add team to list of teams.
+    kmp_team_list_t     list,        // List of teams.
+    kmp_team_p const *  team         // Team to add.
+) {
+
+    // List must terminate with item where both entry and next are NULL.
+    // Team is added to the list only once.
+    // List is sorted in ascending order by team id.
+    // Team id is *not* a key.
+
+    kmp_team_list_t l;
+
+    KMP_DEBUG_ASSERT( list != NULL );
+    if ( team == NULL ) {
+        return;
+    }; // if
+
+    __kmp_print_structure_team_accum( list, team->t.t_parent );
+    __kmp_print_structure_team_accum( list, team->t.t_next_pool );
+
+    // Search list for the team.
+    l = list;
+    while ( l->next != NULL && l->entry != team ) {
+        l = l->next;
+    }; // while
+    if ( l->next != NULL ) {
+        return;  // Team has been added before, exit.
+    }; // if
+
+    // Team is not found. Search list again for insertion point.
+    l = list;
+    while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
+        l = l->next;
+    }; // while
+
+    // Insert team.
+    {
+        kmp_team_list_item_t * item =
+            (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof(  kmp_team_list_item_t ) );
+        * item = * l;
+        l->entry = team;
+        l->next  = item;
+    }
+
+}
+
+static void
+__kmp_print_structure_team(
+    char const *       title,
+    kmp_team_p const * team
+
+) {
+    __kmp_printf( "%s", title );
+    if ( team != NULL ) {
+        __kmp_printf( "%2x %p\n", team->t.t_id, team );
+    } else {
+        __kmp_printf( " - (nil)\n" );
+    }; // if
+}
+
+static void
+__kmp_print_structure_thread(
+    char const *       title,
+    kmp_info_p const * thread
+
+) {
+    __kmp_printf( "%s", title );
+    if ( thread != NULL ) {
+        __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
+    } else {
+        __kmp_printf( " - (nil)\n" );
+    }; // if
+}
+
+void
+__kmp_print_structure(
+    void
+) {
+
+    kmp_team_list_t list;
+
+    // Initialize list of teams.
+    list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
+    list->entry = NULL;
+    list->next  = NULL;
+
+    __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
+    {
+        int gtid;
+        for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
+            __kmp_printf( "%2d", gtid );
+            if ( __kmp_threads != NULL ) {
+                __kmp_printf( " %p", __kmp_threads[ gtid ] );
+            }; // if
+            if ( __kmp_root != NULL ) {
+                __kmp_printf( " %p", __kmp_root[ gtid ] );
+            }; // if
+            __kmp_printf( "\n" );
+        }; // for gtid
+    }
+
+    // Print out __kmp_threads array.
+    __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
+    if ( __kmp_threads != NULL ) {
+        int gtid;
+        for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
+            kmp_info_t const * thread = __kmp_threads[ gtid ];
+            if ( thread != NULL ) {
+                __kmp_printf( "GTID %2d %p:\n", gtid, thread );
+                __kmp_printf(                 "    Our Root:        %p\n", thread->th.th_root );
+                __kmp_print_structure_team(   "    Our Team:     ",        thread->th.th_team );
+                __kmp_print_structure_team(   "    Serial Team:  ",        thread->th.th_serial_team );
+                __kmp_printf(                 "    Threads:      %2d\n",   thread->th.th_team_nproc );
+                __kmp_print_structure_thread( "    Master:       ",        thread->th.th_team_master );
+                __kmp_printf(                 "    Serialized?:  %2d\n",   thread->th.th_team_serialized );
+                __kmp_printf(                 "    Set NProc:    %2d\n",   thread->th.th_set_nproc );
+#if OMP_40_ENABLED
+                __kmp_printf(                 "    Set Proc Bind: %2d\n",  thread->th.th_set_proc_bind );
+#endif
+                __kmp_print_structure_thread( "    Next in pool: ",        thread->th.th_next_pool );
+                __kmp_printf( "\n" );
+                __kmp_print_structure_team_accum( list, thread->th.th_team );
+                __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
+            }; // if
+        }; // for gtid
+    } else {
+        __kmp_printf( "Threads array is not allocated.\n" );
+    }; // if
+
+    // Print out __kmp_root array.
+    __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
+    if ( __kmp_root != NULL ) {
+        int gtid;
+        for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
+            kmp_root_t const * root = __kmp_root[ gtid ];
+            if ( root != NULL ) {
+                __kmp_printf( "GTID %2d %p:\n", gtid, root );
+                __kmp_print_structure_team(   "    Root Team:    ",      root->r.r_root_team );
+                __kmp_print_structure_team(   "    Hot Team:     ",      root->r.r_hot_team );
+                __kmp_print_structure_thread( "    Uber Thread:  ",      root->r.r_uber_thread );
+                __kmp_printf(                 "    Active?:      %2d\n", root->r.r_active );
+                __kmp_printf(                 "    Nested?:      %2d\n", root->r.r_nested );
+                __kmp_printf(                 "    In Parallel:  %2d\n", root->r.r_in_parallel );
+                __kmp_printf( "\n" );
+                __kmp_print_structure_team_accum( list, root->r.r_root_team );
+                __kmp_print_structure_team_accum( list, root->r.r_hot_team );
+            }; // if
+        }; // for gtid
+    } else {
+        __kmp_printf( "Ubers array is not allocated.\n" );
+    }; // if
+
+    __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
+    while ( list->next != NULL ) {
+        kmp_team_p const * team = list->entry;
+        int i;
+        __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
+        __kmp_print_structure_team( "    Parent Team:      ",      team->t.t_parent );
+        __kmp_printf(               "    Master TID:       %2d\n", team->t.t_master_tid );
+        __kmp_printf(               "    Max threads:      %2d\n", team->t.t_max_nproc );
+        __kmp_printf(               "    Levels of serial: %2d\n", team->t.t_serialized );
+        __kmp_printf(               "    Number threads:   %2d\n", team->t.t_nproc );
+        for ( i = 0; i < team->t.t_nproc; ++ i ) {
+            __kmp_printf(           "    Thread %2d:      ", i );
+            __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
+        }; // for i
+        __kmp_print_structure_team( "    Next in pool:     ",      team->t.t_next_pool );
+        __kmp_printf( "\n" );
+        list = list->next;
+    }; // while
+
+    // Print out __kmp_thread_pool and __kmp_team_pool.
+    __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
+    __kmp_print_structure_thread(   "Thread pool:          ", (kmp_info_t *)__kmp_thread_pool );
+    __kmp_print_structure_team(     "Team pool:            ", (kmp_team_t *)__kmp_team_pool );
+    __kmp_printf( "\n" );
+
+    // Free team list.
+    while ( list != NULL ) {
+        kmp_team_list_item_t * item = list;
+        list = list->next;
+        KMP_INTERNAL_FREE( item );
+    }; // while
+
+}
+
+#endif
+
+
+//---------------------------------------------------------------------------
+//  Stuff for per-thread fast random number generator
+//  Table of primes
+
+static const unsigned __kmp_primes[] = {
+  0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
+  0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
+  0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
+  0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
+  0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
+  0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
+  0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
+  0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
+  0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
+  0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
+  0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
+  0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
+  0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
+  0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
+  0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
+  0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
+};
+
+//---------------------------------------------------------------------------
+//  __kmp_get_random: Get a random number using a linear congruential method.
+
+unsigned short
+__kmp_get_random( kmp_info_t * thread )
+{
+  unsigned x = thread->th.th_x;
+  unsigned short r = x>>16;
+
+  thread->th.th_x = x*thread->th.th_a+1;
+
+  KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
+         thread->th.th_info.ds.ds_tid, r) );
+
+  return r;
+}
+//--------------------------------------------------------
+// __kmp_init_random: Initialize a random number generator
+
+void
+__kmp_init_random( kmp_info_t * thread )
+{
+  unsigned seed = thread->th.th_info.ds.ds_tid;
+
+  thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
+  thread->th.th_x = (seed+1)*thread->th.th_a+1;
+  KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
+}
+
+
+#if KMP_OS_WINDOWS
+/* reclaim array entries for root threads that are already dead, returns number reclaimed */
+static int
+__kmp_reclaim_dead_roots(void) {
+    int i, r = 0;
+
+    for(i = 0; i < __kmp_threads_capacity; ++i) {
+        if( KMP_UBER_GTID( i ) &&
+          !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
+          !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
+            r += __kmp_unregister_root_other_thread(i);
+        }
+    }
+    return r;
+}
+#endif
+
+/*
+   This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
+   free entries generated.
+
+   For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
+   already dead.
+
+   On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
+   update to __kmp_threads_capacity.  Array capacity is increased by doubling with clipping to
+    __kmp_tp_capacity, if threadprivate cache array has been created.
+   Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
+
+   After any dead root reclamation, if the clipping value allows array expansion to result in the generation
+   of a total of nWish free slots, the function does that expansion.  If not, but the clipping value allows
+   array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
+   Otherwise, nothing is done beyond the possible initial root thread reclamation.  However, if nNeed is zero,
+   a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
+   as many free slots as possible up to nWish.
+
+   If any argument is negative, the behavior is undefined.
+*/
+static int
+__kmp_expand_threads(int nWish, int nNeed) {
+    int added = 0;
+    int old_tp_cached;
+    int __kmp_actual_max_nth;
+
+    if(nNeed > nWish) /* normalize the arguments */
+        nWish = nNeed;
+#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
+/* only for Windows static library */
+    /* reclaim array entries for root threads that are already dead */
+    added = __kmp_reclaim_dead_roots();
+
+    if(nNeed) {
+        nNeed -= added;
+        if(nNeed < 0)
+            nNeed = 0;
+    }
+    if(nWish) {
+        nWish -= added;
+        if(nWish < 0)
+            nWish = 0;
+    }
+#endif
+    if(nWish <= 0)
+        return added;
+
+    while(1) {
+        int nTarget;
+        int minimumRequiredCapacity;
+        int newCapacity;
+        kmp_info_t **newThreads;
+        kmp_root_t **newRoot;
+
+        //
+        // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
+        // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
+        // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
+        // become > __kmp_max_nth in one of two ways:
+        //
+        // 1) The initialization thread (gtid = 0) exits.  __kmp_threads[0]
+        //    may not be resused by another thread, so we may need to increase
+        //    __kmp_threads_capacity to __kmp_max_threads + 1.
+        //
+        // 2) New foreign root(s) are encountered.  We always register new
+        //    foreign roots.  This may cause a smaller # of threads to be
+        //    allocated at subsequent parallel regions, but the worker threads
+        //    hang around (and eventually go to sleep) and need slots in the
+        //    __kmp_threads[] array.
+        //
+        // Anyway, that is the reason for moving the check to see if
+        // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
+        // instead of having it performed here. -BB
+        //
+        old_tp_cached = __kmp_tp_cached;
+        __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
+        KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
+
+        /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
+        nTarget = nWish;
+        if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
+            /* can't fulfil nWish, so try nNeed */
+            if(nNeed) {
+                nTarget = nNeed;
+                if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
+                    /* possible expansion too small -- give up */
+                    break;
+                }
+            } else {
+                /* best-effort */
+                nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
+                if(!nTarget) {
+                    /* can expand at all -- give up */
+                    break;
+                }
+            }
+        }
+        minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
+
+        newCapacity = __kmp_threads_capacity;
+        do{
+            newCapacity =
+                newCapacity <= (__kmp_actual_max_nth >> 1) ?
+                (newCapacity << 1) :
+                __kmp_actual_max_nth;
+        } while(newCapacity < minimumRequiredCapacity);
+        newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
+        newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
+        KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
+        KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
+        memset(newThreads + __kmp_threads_capacity, 0,
+               (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
+        memset(newRoot + __kmp_threads_capacity, 0,
+               (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
+
+        if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
+            /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
+               while we were allocating the expanded array, and our new capacity is larger than the threadprivate
+               cache capacity, so we should deallocate the expanded arrays and try again.  This is the first check
+               of a double-check pair.
+            */
+            __kmp_free(newThreads);
+            continue; /* start over and try again */
+        }
+        __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
+        if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
+            /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
+            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
+            __kmp_free(newThreads);
+            continue; /* start over and try again */
+        } else {
+            /* success */
+            // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
+            //
+            *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
+            *(kmp_root_t**volatile*)&__kmp_root = newRoot;
+            added += newCapacity - __kmp_threads_capacity;
+            *(volatile int*)&__kmp_threads_capacity = newCapacity;
+            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
+            break; /* succeeded, so we can exit the loop */
+        }
+    }
+    return added;
+}
+
+/* register the current thread as a root thread and obtain our gtid */
+/* we must have the __kmp_initz_lock held at this point */
+/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
+int
+__kmp_register_root( int initial_thread )
+{
+    kmp_info_t *root_thread;
+    kmp_root_t *root;
+    int         gtid;
+    int         capacity;
+    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+    KA_TRACE( 20, ("__kmp_register_root: entered\n"));
+    KMP_MB();
+
+
+    /*
+        2007-03-02:
+
+        If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
+        "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
+        return false (that means there is at least one empty slot in __kmp_threads array), but it
+        is possible the only free slot is #0, which is reserved for initial thread and so cannot be
+        used for this one. Following code workarounds this bug.
+
+        However, right solution seems to be not reserving slot #0 for initial thread because:
+            (1) there is no magic in slot #0,
+            (2) we cannot detect initial thread reliably (the first thread which does serial
+                initialization may be not a real initial thread).
+    */
+    capacity = __kmp_threads_capacity;
+    if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
+        -- capacity;
+    }; // if
+
+    /* see if there are too many threads */
+    if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
+        if ( __kmp_tp_cached ) {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantRegisterNewThread ),
+                KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
+                KMP_HNT( PossibleSystemLimitOnThreads ),
+                __kmp_msg_null
+            );
+        }
+        else {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantRegisterNewThread ),
+                KMP_HNT( SystemLimitOnThreads ),
+                __kmp_msg_null
+            );
+        }
+    }; // if
+
+    /* find an available thread slot */
+    /* Don't reassign the zero slot since we need that to only be used by initial
+       thread */
+    for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
+        ;
+    KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
+    KMP_ASSERT( gtid < __kmp_threads_capacity );
+
+    /* update global accounting */
+    __kmp_all_nth ++;
+    TCW_4(__kmp_nth, __kmp_nth + 1);
+
+    //
+    // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
+    // for low numbers of procs, and method #2 (keyed API call) for higher
+    // numbers of procs.
+    //
+    if ( __kmp_adjust_gtid_mode ) {
+        if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
+            if ( TCR_4(__kmp_gtid_mode) != 2) {
+                TCW_4(__kmp_gtid_mode, 2);
+            }
+        }
+        else {
+            if (TCR_4(__kmp_gtid_mode) != 1 ) {
+                TCW_4(__kmp_gtid_mode, 1);
+            }
+        }
+    }
+
+#ifdef KMP_ADJUST_BLOCKTIME
+    /* Adjust blocktime to zero if necessary            */
+    /* Middle initialization might not have occurred yet */
+    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
+        if ( __kmp_nth > __kmp_avail_proc ) {
+            __kmp_zero_bt = TRUE;
+        }
+    }
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+    /* setup this new hierarchy */
+    if( ! ( root = __kmp_root[gtid] )) {
+        root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
+        KMP_DEBUG_ASSERT( ! root->r.r_root_team );
+    }
+
+    __kmp_initialize_root( root );
+
+    /* setup new root thread structure */
+    if( root->r.r_uber_thread ) {
+        root_thread = root->r.r_uber_thread;
+    } else {
+        root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
+        if ( __kmp_storage_map ) {
+            __kmp_print_thread_storage_map( root_thread, gtid );
+        }
+        root_thread->th.th_info .ds.ds_gtid = gtid;
+        root_thread->th.th_root =  root;
+        if( __kmp_env_consistency_check ) {
+            root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
+        }
+        #if USE_FAST_MEMORY
+            __kmp_initialize_fast_memory( root_thread );
+        #endif /* USE_FAST_MEMORY */
+
+        #if KMP_USE_BGET
+            KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
+            __kmp_initialize_bget( root_thread );
+        #endif
+        __kmp_init_random( root_thread );  // Initialize random number generator
+    }
+
+    /* setup the serial team held in reserve by the root thread */
+    if( ! root_thread->th.th_serial_team ) {
+        kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
+        KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
+
+        root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
+#if OMPT_SUPPORT
+          0, // root parallel id
+#endif
+#if OMP_40_ENABLED
+          proc_bind_default,
+#endif
+          &r_icvs,
+          0 USE_NESTED_HOT_ARG(NULL) );
+    }
+    KMP_ASSERT( root_thread->th.th_serial_team );
+    KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
+      root_thread->th.th_serial_team ) );
+
+    /* drop root_thread into place */
+    TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
+
+    root->r.r_root_team->t.t_threads[0] = root_thread;
+    root->r.r_hot_team ->t.t_threads[0] = root_thread;
+    root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
+    root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
+    root->r.r_uber_thread = root_thread;
+
+    /* initialize the thread, get it ready to go */
+    __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
+
+    /* prepare the master thread for get_gtid() */
+    __kmp_gtid_set_specific( gtid );
+
+    __kmp_itt_thread_name( gtid );
+
+    #ifdef KMP_TDATA_GTID
+        __kmp_gtid = gtid;
+    #endif
+    __kmp_create_worker( gtid, root_thread, __kmp_stksize );
+    KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
+    TCW_4(__kmp_init_gtid, TRUE);
+
+    KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
+                    gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
+                    root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
+                    KMP_INIT_BARRIER_STATE ) );
+    { // Initialize barrier data.
+        int b;
+        for ( b = 0; b < bs_last_barrier; ++ b ) {
+            root_thread->th.th_bar[ b ].bb.b_arrived        = KMP_INIT_BARRIER_STATE;
+#if USE_DEBUGGER
+            root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
+#endif
+        }; // for
+    }
+    KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
+
+#if KMP_AFFINITY_SUPPORTED
+    if ( TCR_4(__kmp_init_middle) ) {
+        __kmp_affinity_set_init_mask( gtid, TRUE );
+    }
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+    __kmp_root_counter ++;
+
+    KMP_MB();
+    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+
+    return gtid;
+}
+
+#if KMP_NESTED_HOT_TEAMS
+static int
+__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level )
+{
+    int i, n, nth;
+    kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
+    if( !hot_teams || !hot_teams[level].hot_team ) {
+        return 0;
+    }
+    KMP_DEBUG_ASSERT( level < max_level );
+    kmp_team_t *team = hot_teams[level].hot_team;
+    nth = hot_teams[level].hot_team_nth;
+    n = nth - 1;                   // master is not freed
+    if( level < max_level - 1 ) {
+        for( i = 0; i < nth; ++i ) {
+            kmp_info_t *th = team->t.t_threads[i];
+            n += __kmp_free_hot_teams( root, th, level + 1, max_level );
+            if( i > 0 && th->th.th_hot_teams ) {
+                __kmp_free( th->th.th_hot_teams );
+                th->th.th_hot_teams = NULL;
+            }
+        }
+    }
+    __kmp_free_team( root, team, NULL );
+    return n;
+}
+#endif
+
+/* Resets a root thread and clear its root and hot teams.
+   Returns the number of __kmp_threads entries directly and indirectly freed.
+*/
+static int
+__kmp_reset_root(int gtid, kmp_root_t *root)
+{
+    kmp_team_t * root_team = root->r.r_root_team;
+    kmp_team_t * hot_team  = root->r.r_hot_team;
+    int          n         = hot_team->t.t_nproc;
+    int i;
+
+    KMP_DEBUG_ASSERT( ! root->r.r_active );
+
+    root->r.r_root_team = NULL;
+    root->r.r_hot_team  = NULL;
+        // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
+        // to __kmp_free_team().
+    __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
+#if KMP_NESTED_HOT_TEAMS
+    if( __kmp_hot_teams_max_level > 1 ) {  // need to free nested hot teams and their threads if any
+        for( i = 0; i < hot_team->t.t_nproc; ++i ) {
+            kmp_info_t *th = hot_team->t.t_threads[i];
+            n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
+            if( th->th.th_hot_teams ) {
+                __kmp_free( th->th.th_hot_teams );
+                th->th.th_hot_teams = NULL;
+            }
+        }
+    }
+#endif
+    __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
+
+    //
+    // Before we can reap the thread, we need to make certain that all
+    // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
+    //
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        __kmp_wait_to_unref_task_teams();
+    }
+
+    #if KMP_OS_WINDOWS
+        /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
+        KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
+            (LPVOID)&(root->r.r_uber_thread->th),
+            root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
+        __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
+    #endif /* KMP_OS_WINDOWS */
+
+#if OMPT_SUPPORT
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
+        int gtid = __kmp_get_gtid();
+        __ompt_thread_end(ompt_thread_initial, gtid);
+    }
+#endif
+
+    TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
+    __kmp_reap_thread( root->r.r_uber_thread, 1 );
+
+        // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
+    root->r.r_uber_thread = NULL;
+    /* mark root as no longer in use */
+    root->r.r_begin = FALSE;
+
+    return n;
+}
+
+void
+__kmp_unregister_root_current_thread( int gtid )
+{
+    KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
+    /* this lock should be ok, since unregister_root_current_thread is never called during
+     * and abort, only during a normal close.  furthermore, if you have the
+     * forkjoin lock, you should never try to get the initz lock */
+
+    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
+        KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
+        __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+        return;
+    }
+    kmp_root_t *root = __kmp_root[gtid];
+
+    KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
+    KMP_ASSERT( KMP_UBER_GTID( gtid ));
+    KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
+    KMP_ASSERT( root->r.r_active == FALSE );
+
+
+    KMP_MB();
+
+#if OMP_41_ENABLED
+   kmp_info_t * thread = __kmp_threads[gtid];
+   kmp_team_t * team = thread->th.th_team;
+   kmp_task_team_t *   task_team = thread->th.th_task_team;
+
+   // we need to wait for the proxy tasks before finishing the thread
+   if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
+#if OMPT_SUPPORT
+        // the runtime is shutting down so we won't report any events
+        thread->th.ompt_thread_info.state = ompt_state_undefined;
+#endif
+        __kmp_task_team_wait(thread, team, NULL );
+   }
+#endif
+
+    __kmp_reset_root(gtid, root);
+
+    /* free up this thread slot */
+    __kmp_gtid_set_specific( KMP_GTID_DNE );
+#ifdef KMP_TDATA_GTID
+    __kmp_gtid = KMP_GTID_DNE;
+#endif
+
+    KMP_MB();
+    KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
+
+    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+}
+
+#if KMP_OS_WINDOWS
+/* __kmp_forkjoin_lock must be already held
+   Unregisters a root thread that is not the current thread.  Returns the number of
+   __kmp_threads entries freed as a result.
+ */
+static int
+__kmp_unregister_root_other_thread( int gtid )
+{
+    kmp_root_t *root = __kmp_root[gtid];
+    int r;
+
+    KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
+    KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
+    KMP_ASSERT( KMP_UBER_GTID( gtid ));
+    KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
+    KMP_ASSERT( root->r.r_active == FALSE );
+
+    r = __kmp_reset_root(gtid, root);
+    KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
+    return r;
+}
+#endif
+
+#if KMP_DEBUG
+void __kmp_task_info() {
+
+    kmp_int32 gtid       = __kmp_entry_gtid();
+    kmp_int32 tid        = __kmp_tid_from_gtid( gtid );
+    kmp_info_t *this_thr = __kmp_threads[ gtid ];
+    kmp_team_t *steam    = this_thr->th.th_serial_team;
+    kmp_team_t *team     = this_thr->th.th_team;
+
+    __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
+        gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
+}
+#endif // KMP_DEBUG
+
+/* TODO optimize with one big memclr, take out what isn't needed,
+ * split responsibility to workers as much as possible, and delay
+ * initialization of features as much as possible  */
+static void
+__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
+{
+    /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
+     * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
+    kmp_info_t *master = team->t.t_threads[0];
+    KMP_DEBUG_ASSERT( this_thr != NULL );
+    KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
+    KMP_DEBUG_ASSERT( team );
+    KMP_DEBUG_ASSERT( team->t.t_threads  );
+    KMP_DEBUG_ASSERT( team->t.t_dispatch );
+    KMP_DEBUG_ASSERT( master );
+    KMP_DEBUG_ASSERT( master->th.th_root );
+
+    KMP_MB();
+
+    TCW_SYNC_PTR(this_thr->th.th_team, team);
+
+    this_thr->th.th_info.ds.ds_tid  = tid;
+    this_thr->th.th_set_nproc       = 0;
+#if OMP_40_ENABLED
+    this_thr->th.th_set_proc_bind   = proc_bind_default;
+# if KMP_AFFINITY_SUPPORTED
+    this_thr->th.th_new_place       = this_thr->th.th_current_place;
+# endif
+#endif
+    this_thr->th.th_root            = master->th.th_root;
+
+    /* setup the thread's cache of the team structure */
+    this_thr->th.th_team_nproc      = team->t.t_nproc;
+    this_thr->th.th_team_master     = master;
+    this_thr->th.th_team_serialized = team->t.t_serialized;
+    TCW_PTR(this_thr->th.th_sleep_loc, NULL);
+
+    KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
+
+    KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
+                    tid, gtid, this_thr, this_thr->th.th_current_task ) );
+
+    __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
+
+    KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
+                    tid, gtid, this_thr, this_thr->th.th_current_task ) );
+    // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
+
+    /* TODO no worksharing in speculative threads */
+    this_thr->th.th_dispatch      = &team->t.t_dispatch[ tid ];
+
+    this_thr->th.th_local.this_construct = 0;
+
+#ifdef BUILD_TV
+    this_thr->th.th_local.tv_data = 0;
+#endif
+
+    if ( ! this_thr->th.th_pri_common ) {
+        this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
+        if ( __kmp_storage_map ) {
+            __kmp_print_storage_map_gtid(
+                gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
+                sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
+            );
+        }; // if
+        this_thr->th.th_pri_head = NULL;
+    }; // if
+
+    /* Initialize dynamic dispatch */
+    {
+        volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
+        /*
+         * Use team max_nproc since this will never change for the team.
+         */
+        size_t disp_size = sizeof( dispatch_private_info_t ) *
+            ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
+        KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
+        KMP_ASSERT( dispatch );
+        KMP_DEBUG_ASSERT( team->t.t_dispatch );
+        KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
+
+        dispatch->th_disp_index = 0;
+
+        if( ! dispatch->th_disp_buffer )  {
+            dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
+
+            if ( __kmp_storage_map ) {
+                __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
+                                         &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
+                                         disp_size, "th_%d.th_dispatch.th_disp_buffer "
+                                         "(team_%d.t_dispatch[%d].th_disp_buffer)",
+                                         gtid, team->t.t_id, gtid );
+            }
+        } else {
+            memset( & dispatch->th_disp_buffer[0], '\0', disp_size );
+        }
+
+        dispatch->th_dispatch_pr_current = 0;
+        dispatch->th_dispatch_sh_current = 0;
+
+        dispatch->th_deo_fcn = 0;             /* ORDERED     */
+        dispatch->th_dxo_fcn = 0;             /* END ORDERED */
+    }
+
+    this_thr->th.th_next_pool = NULL;
+
+    if (!this_thr->th.th_task_state_memo_stack) {
+        size_t i;
+        this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
+        this_thr->th.th_task_state_top = 0;
+        this_thr->th.th_task_state_stack_sz = 4;
+        for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
+            this_thr->th.th_task_state_memo_stack[i] = 0;
+    }
+
+    KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
+    KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
+
+    KMP_MB();
+}
+
+
+/* allocate a new thread for the requesting team.  this is only called from within a
+ * forkjoin critical section.  we will first try to get an available thread from the
+ * thread pool.  if none is available, we will fork a new one assuming we are able
+ * to create a new one.  this should be assured, as the caller should check on this
+ * first.
+ */
+kmp_info_t *
+__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
+{
+    kmp_team_t  *serial_team;
+    kmp_info_t  *new_thr;
+    int          new_gtid;
+
+    KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
+    KMP_DEBUG_ASSERT( root && team );
+#if !KMP_NESTED_HOT_TEAMS
+    KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
+#endif
+    KMP_MB();
+
+    /* first, try to get one from the thread pool */
+    if ( __kmp_thread_pool ) {
+
+        new_thr = (kmp_info_t*)__kmp_thread_pool;
+        __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
+        if ( new_thr == __kmp_thread_pool_insert_pt ) {
+            __kmp_thread_pool_insert_pt = NULL;
+        }
+        TCW_4(new_thr->th.th_in_pool, FALSE);
+        //
+        // Don't touch th_active_in_pool or th_active.
+        // The worker thread adjusts those flags as it sleeps/awakens.
+        //
+
+        __kmp_thread_pool_nth--;
+
+        KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
+                    __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
+        KMP_ASSERT(       ! new_thr->th.th_team );
+        KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
+        KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
+
+        /* setup the thread structure */
+        __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
+        KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
+
+        TCW_4(__kmp_nth, __kmp_nth + 1);
+
+        new_thr->th.th_task_state = 0;
+        new_thr->th.th_task_state_top = 0;
+        new_thr->th.th_task_state_stack_sz = 4;
+
+#ifdef KMP_ADJUST_BLOCKTIME
+        /* Adjust blocktime back to zero if necessar      y */
+        /* Middle initialization might not have occurred yet */
+        if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
+            if ( __kmp_nth > __kmp_avail_proc ) {
+                __kmp_zero_bt = TRUE;
+            }
+        }
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+#if KMP_DEBUG
+        // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG.
+        int b;
+        kmp_balign_t * balign = new_thr->th.th_bar;
+        for( b = 0; b < bs_last_barrier; ++ b )
+            KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
+#endif
+
+        KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
+                    __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
+
+        KMP_MB();
+        return new_thr;
+    }
+
+
+    /* no, well fork a new one */
+    KMP_ASSERT( __kmp_nth    == __kmp_all_nth );
+    KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
+
+    //
+    // If this is the first worker thread the RTL is creating, then also
+    // launch the monitor thread.  We try to do this as early as possible.
+    //
+    if ( ! TCR_4( __kmp_init_monitor ) ) {
+        __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
+        if ( ! TCR_4( __kmp_init_monitor ) ) {
+            KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
+            TCW_4( __kmp_init_monitor, 1 );
+            __kmp_create_monitor( & __kmp_monitor );
+            KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
+            #if KMP_OS_WINDOWS
+                // AC: wait until monitor has started. This is a fix for CQ232808.
+                //     The reason is that if the library is loaded/unloaded in a loop with small (parallel)
+                //     work in between, then there is high probability that monitor thread started after
+                //     the library shutdown. At shutdown it is too late to cope with the problem, because
+                //     when the master is in DllMain (process detach) the monitor has no chances to start
+                //     (it is blocked), and master has no means to inform the monitor that the library has gone,
+                //     because all the memory which the monitor can access is going to be released/reset.
+                while ( TCR_4(__kmp_init_monitor) < 2 ) {
+                    KMP_YIELD( TRUE );
+                }
+                KF_TRACE( 10, ( "after monitor thread has started\n" ) );
+            #endif
+        }
+        __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
+    }
+
+    KMP_MB();
+    for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
+        KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
+    }
+
+    /* allocate space for it. */
+    new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
+
+    TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
+
+    if ( __kmp_storage_map ) {
+        __kmp_print_thread_storage_map( new_thr, new_gtid );
+    }
+
+    /* add the reserve serialized team, initialized from the team's master thread */
+    {
+    kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
+    KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
+
+    new_thr->th.th_serial_team = serial_team =
+        (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
+#if OMPT_SUPPORT
+                                           0, // root parallel id
+#endif
+#if OMP_40_ENABLED
+                                           proc_bind_default,
+#endif
+                                           &r_icvs,
+                                           0 USE_NESTED_HOT_ARG(NULL) );
+    }
+    KMP_ASSERT ( serial_team );
+    serial_team->t.t_serialized = 0;   // AC: the team created in reserve, not for execution (it is unused for now).
+    serial_team->t.t_threads[0] = new_thr;
+    KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
+      new_thr ) );
+
+    /* setup the thread structures */
+    __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
+
+    #if USE_FAST_MEMORY
+        __kmp_initialize_fast_memory( new_thr );
+    #endif /* USE_FAST_MEMORY */
+
+    #if KMP_USE_BGET
+        KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
+        __kmp_initialize_bget( new_thr );
+    #endif
+
+    __kmp_init_random( new_thr );  // Initialize random number generator
+
+    /* Initialize these only once when thread is grabbed for a team allocation */
+    KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
+                    __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
+
+    int b;
+    kmp_balign_t * balign = new_thr->th.th_bar;
+    for(b=0; b<bs_last_barrier; ++b) {
+        balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
+        balign[b].bb.team = NULL;
+        balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
+        balign[b].bb.use_oncore_barrier = 0;
+    }
+
+    new_thr->th.th_spin_here = FALSE;
+    new_thr->th.th_next_waiting = 0;
+
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+    new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
+    new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
+    new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
+    new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
+#endif
+
+    TCW_4(new_thr->th.th_in_pool, FALSE);
+    new_thr->th.th_active_in_pool = FALSE;
+    TCW_4(new_thr->th.th_active, TRUE);
+
+    /* adjust the global counters */
+    __kmp_all_nth ++;
+    __kmp_nth ++;
+
+    //
+    // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
+    // for low numbers of procs, and method #2 (keyed API call) for higher
+    // numbers of procs.
+    //
+    if ( __kmp_adjust_gtid_mode ) {
+        if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
+            if ( TCR_4(__kmp_gtid_mode) != 2) {
+                TCW_4(__kmp_gtid_mode, 2);
+            }
+        }
+        else {
+            if (TCR_4(__kmp_gtid_mode) != 1 ) {
+                TCW_4(__kmp_gtid_mode, 1);
+            }
+        }
+    }
+
+#ifdef KMP_ADJUST_BLOCKTIME
+    /* Adjust blocktime back to zero if necessary       */
+    /* Middle initialization might not have occurred yet */
+    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
+        if ( __kmp_nth > __kmp_avail_proc ) {
+            __kmp_zero_bt = TRUE;
+        }
+    }
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+    /* actually fork it and create the new worker thread */
+    KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
+    __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
+    KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
+
+
+    KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
+    KMP_MB();
+    return new_thr;
+}
+
+/*
+ * reinitialize team for reuse.
+ *
+ * The hot team code calls this case at every fork barrier, so EPCC barrier
+ * test are extremely sensitive to changes in it, esp. writes to the team
+ * struct, which cause a cache invalidation in all threads.
+ *
+ * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
+ */
+static void
+__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) {
+    KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
+                    team->t.t_threads[0], team ) );
+    KMP_DEBUG_ASSERT( team && new_icvs);
+    KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
+    team->t.t_ident = loc;
+
+    team->t.t_id = KMP_GEN_TEAM_ID();
+
+    // Copy ICVs to the master thread's implicit taskdata
+    __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
+    copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
+
+    KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
+                    team->t.t_threads[0], team ) );
+}
+
+
+/* initialize the team data structure
+ * this assumes the t_threads and t_max_nproc are already set
+ * also, we don't touch the arguments */
+static void
+__kmp_initialize_team(
+    kmp_team_t * team,
+    int          new_nproc,
+    kmp_internal_control_t * new_icvs,
+    ident_t *                loc
+) {
+    KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
+
+    /* verify */
+    KMP_DEBUG_ASSERT( team );
+    KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
+    KMP_DEBUG_ASSERT( team->t.t_threads );
+    KMP_MB();
+
+    team->t.t_master_tid  = 0;    /* not needed */
+    /* team->t.t_master_bar;        not needed */
+    team->t.t_serialized  = new_nproc > 1 ? 0 : 1;
+    team->t.t_nproc       = new_nproc;
+
+    /* team->t.t_parent     = NULL; TODO not needed & would mess up hot team */
+    team->t.t_next_pool   = NULL;
+    /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
+
+    TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
+    team->t.t_invoke      = NULL; /* not needed */
+
+    // TODO???: team->t.t_max_active_levels       = new_max_active_levels;
+    team->t.t_sched       = new_icvs->sched;
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    team->t.t_fp_control_saved = FALSE; /* not needed */
+    team->t.t_x87_fpu_control_word = 0; /* not needed */
+    team->t.t_mxcsr = 0;                /* not needed */
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+    team->t.t_construct   = 0;
+    __kmp_init_lock( & team->t.t_single_lock );
+
+    team->t.t_ordered .dt.t_value = 0;
+    team->t.t_master_active = FALSE;
+
+    memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t ));
+
+#ifdef KMP_DEBUG
+    team->t.t_copypriv_data = NULL;  /* not necessary, but nice for debugging */
+#endif
+    team->t.t_copyin_counter = 0;    /* for barrier-free copyin implementation */
+
+    team->t.t_control_stack_top = NULL;
+
+    __kmp_reinitialize_team( team, new_icvs, loc );
+
+    KMP_MB();
+    KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
+}
+
+#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+/* Sets full mask for thread and returns old mask, no changes to structures. */
+static void
+__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
+{
+    if ( KMP_AFFINITY_CAPABLE() ) {
+        int status;
+        if ( old_mask != NULL ) {
+            status = __kmp_get_system_affinity( old_mask, TRUE );
+            int error = errno;
+            if ( status != 0 ) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( ChangeThreadAffMaskError ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }
+        }
+        __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
+    }
+}
+#endif
+
+#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+
+//
+// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
+// It calculats the worker + master thread's partition based upon the parent
+// thread's partition, and binds each worker to a thread in their partition.
+// The master thread's partition should already include its current binding.
+//
+static void
+__kmp_partition_places( kmp_team_t *team )
+{
+    //
+    // Copy the master thread's place partion to the team struct
+    //
+    kmp_info_t *master_th = team->t.t_threads[0];
+    KMP_DEBUG_ASSERT( master_th != NULL );
+    kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
+    int first_place = master_th->th.th_first_place;
+    int last_place = master_th->th.th_last_place;
+    int masters_place = master_th->th.th_current_place;
+    team->t.t_first_place = first_place;
+    team->t.t_last_place = last_place;
+
+    KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
+       proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
+       masters_place, first_place, last_place ) );
+
+    switch ( proc_bind ) {
+
+        case proc_bind_default:
+        //
+        // serial teams might have the proc_bind policy set to
+        // proc_bind_default.  It doesn't matter, as we don't
+        // rebind the master thread for any proc_bind policy.
+        //
+        KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
+        break;
+
+        case proc_bind_master:
+        {
+            int f;
+            int n_th = team->t.t_nproc;
+            for ( f = 1; f < n_th; f++ ) {
+                kmp_info_t *th = team->t.t_threads[f];
+                KMP_DEBUG_ASSERT( th != NULL );
+                th->th.th_first_place = first_place;
+                th->th.th_last_place = last_place;
+                th->th.th_new_place = masters_place;
+
+                KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
+                  __kmp_gtid_from_thread( team->t.t_threads[f] ),
+                  team->t.t_id, f, masters_place, first_place, last_place ) );
+            }
+        }
+        break;
+
+        case proc_bind_close:
+        {
+            int f;
+            int n_th = team->t.t_nproc;
+            int n_places;
+            if ( first_place <= last_place ) {
+                n_places = last_place - first_place + 1;
+            }
+            else {
+                n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+            }
+            if ( n_th <= n_places ) {
+                int place = masters_place;
+                for ( f = 1; f < n_th; f++ ) {
+                    kmp_info_t *th = team->t.t_threads[f];
+                    KMP_DEBUG_ASSERT( th != NULL );
+
+                    if ( place == last_place ) {
+                        place = first_place;
+                    }
+                    else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                        place = 0;
+                    }
+                    else {
+                        place++;
+                    }
+                    th->th.th_first_place = first_place;
+                    th->th.th_last_place = last_place;
+                    th->th.th_new_place = place;
+
+                    KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
+                       __kmp_gtid_from_thread( team->t.t_threads[f] ),
+                       team->t.t_id, f, place, first_place, last_place ) );
+                }
+            }
+            else {
+                int S, rem, gap, s_count;
+                S = n_th / n_places;
+                s_count = 0;
+                rem = n_th - ( S * n_places );
+                gap = rem > 0 ? n_places/rem : n_places;
+                int place = masters_place;
+                int gap_ct = gap;
+                for ( f = 0; f < n_th; f++ ) {
+                    kmp_info_t *th = team->t.t_threads[f];
+                    KMP_DEBUG_ASSERT( th != NULL );
+
+                    th->th.th_first_place = first_place;
+                    th->th.th_last_place = last_place;
+                    th->th.th_new_place = place;
+                    s_count++;
+
+                    if ( (s_count == S) && rem && (gap_ct == gap) ) {
+                        // do nothing, add an extra thread to place on next iteration
+                    }
+                    else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
+                        // we added an extra thread to this place; move to next place
+                        if ( place == last_place ) {
+                            place = first_place;
+                        }
+                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                            place = 0;
+                        }
+                        else {
+                            place++;
+                        }
+                        s_count = 0;
+                        gap_ct = 1;
+                        rem--;
+                    }
+                    else if (s_count == S) { // place full; don't add extra
+                        if ( place == last_place ) {
+                            place = first_place;
+                        }
+                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                            place = 0;
+                        }
+                        else {
+                            place++;
+                        }
+                        gap_ct++;
+                        s_count = 0;
+                    }
+
+                    KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
+                      __kmp_gtid_from_thread( team->t.t_threads[f] ),
+                      team->t.t_id, f, th->th.th_new_place, first_place,
+                      last_place ) );
+                }
+                KMP_DEBUG_ASSERT( place == masters_place );
+            }
+        }
+        break;
+
+        case proc_bind_spread:
+        {
+            int f;
+            int n_th = team->t.t_nproc;
+            int n_places;
+            if ( first_place <= last_place ) {
+                n_places = last_place - first_place + 1;
+            }
+            else {
+                n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+            }
+            if ( n_th <= n_places ) {
+                int place = masters_place;
+                int S = n_places/n_th;
+                int s_count, rem, gap, gap_ct;
+                rem = n_places - n_th*S;
+                gap = rem ? n_th/rem : 1;
+                gap_ct = gap;
+                for ( f = 0; f < n_th; f++ ) {
+                    kmp_info_t *th = team->t.t_threads[f];
+                    KMP_DEBUG_ASSERT( th != NULL );
+
+                    th->th.th_first_place = place;
+                    th->th.th_new_place = place;
+                    s_count = 1;
+                    while (s_count < S) {
+                        if ( place == last_place ) {
+                            place = first_place;
+                        }
+                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                            place = 0;
+                        }
+                        else {
+                            place++;
+                        }
+                        s_count++;
+                    }
+                    if (rem && (gap_ct == gap)) {
+                        if ( place == last_place ) {
+                            place = first_place;
+                        }
+                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                            place = 0;
+                        }
+                        else {
+                            place++;
+                        }
+                        rem--;
+                        gap_ct = 0;
+                    }
+                    th->th.th_last_place = place;
+                    gap_ct++;
+
+                    if ( place == last_place ) {
+                        place = first_place;
+                    }
+                    else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                        place = 0;
+                    }
+                    else {
+                        place++;
+                    }
+
+                    KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
+                      __kmp_gtid_from_thread( team->t.t_threads[f] ),
+                      team->t.t_id, f, th->th.th_new_place,
+                      th->th.th_first_place, th->th.th_last_place ) );
+                }
+                KMP_DEBUG_ASSERT( place == masters_place );
+            }
+            else {
+                int S, rem, gap, s_count;
+                S = n_th / n_places;
+                s_count = 0;
+                rem = n_th - ( S * n_places );
+                gap = rem > 0 ? n_places/rem : n_places;
+                int place = masters_place;
+                int gap_ct = gap;
+                for ( f = 0; f < n_th; f++ ) {
+                    kmp_info_t *th = team->t.t_threads[f];
+                    KMP_DEBUG_ASSERT( th != NULL );
+
+                    th->th.th_first_place = place;
+                    th->th.th_last_place = place;
+                    th->th.th_new_place = place;
+                    s_count++;
+
+                    if ( (s_count == S) && rem && (gap_ct == gap) ) {
+                        // do nothing, add an extra thread to place on next iteration
+                    }
+                    else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
+                        // we added an extra thread to this place; move on to next place
+                        if ( place == last_place ) {
+                            place = first_place;
+                        }
+                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                            place = 0;
+                        }
+                        else {
+                            place++;
+                        }
+                        s_count = 0;
+                        gap_ct = 1;
+                        rem--;
+                    }
+                    else if (s_count == S) { // place is full; don't add extra thread
+                        if ( place == last_place ) {
+                            place = first_place;
+                        }
+                        else if ( place == (int)(__kmp_affinity_num_masks - 1) ) {
+                            place = 0;
+                        }
+                        else {
+                            place++;
+                        }
+                        gap_ct++;
+                        s_count = 0;
+                    }
+
+                    KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
+                       __kmp_gtid_from_thread( team->t.t_threads[f] ),
+                       team->t.t_id, f, th->th.th_new_place,
+                       th->th.th_first_place, th->th.th_last_place) );
+                }
+                KMP_DEBUG_ASSERT( place == masters_place );
+            }
+        }
+        break;
+
+        default:
+        break;
+    }
+
+    KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
+}
+
+#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
+
+/* allocate a new team data structure to use.  take one off of the free pool if available */
+kmp_team_t *
+__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
+#if OMPT_SUPPORT
+    ompt_parallel_id_t ompt_parallel_id,
+#endif
+#if OMP_40_ENABLED
+    kmp_proc_bind_t new_proc_bind,
+#endif
+    kmp_internal_control_t *new_icvs,
+    int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
+{
+    KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
+    int f;
+    kmp_team_t *team;
+    int use_hot_team = ! root->r.r_active;
+    int level = 0;
+
+    KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
+    KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
+    KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
+    KMP_MB();
+
+#if KMP_NESTED_HOT_TEAMS
+    kmp_hot_team_ptr_t *hot_teams;
+    if( master ) {
+        team = master->th.th_team;
+        level = team->t.t_active_level;
+        if( master->th.th_teams_microtask ) {                         // in teams construct?
+            if( master->th.th_teams_size.nteams > 1 && (             // #teams > 1
+                team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams
+                master->th.th_teams_level < team->t.t_level ) ) {    // or nested parallel inside the teams
+                ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise
+            }
+        }
+        hot_teams = master->th.th_hot_teams;
+        if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
+        {   // hot team has already been allocated for given level
+            use_hot_team = 1;
+        } else {
+            use_hot_team = 0;
+        }
+    }
+#endif
+    // Optimization to use a "hot" team
+    if( use_hot_team && new_nproc > 1 ) {
+        KMP_DEBUG_ASSERT( new_nproc == max_nproc );
+#if KMP_NESTED_HOT_TEAMS
+        team = hot_teams[level].hot_team;
+#else
+        team =  root->r.r_hot_team;
+#endif
+#if KMP_DEBUG
+        if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
+                           team->t.t_task_team[0], team->t.t_task_team[1] ));
+        }
+#endif
+
+        // Has the number of threads changed?
+        /* Let's assume the most common case is that the number of threads is unchanged, and
+           put that case first. */
+        if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
+            KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
+            // This case can mean that omp_set_num_threads() was called and the hot team size
+            // was already reduced, so we check the special flag
+            if ( team->t.t_size_changed == -1 ) {
+                team->t.t_size_changed = 1;
+            } else {
+                team->t.t_size_changed = 0;
+            }
+
+            // TODO???: team->t.t_max_active_levels = new_max_active_levels;
+            team->t.t_sched =  new_icvs->sched;
+
+            __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
+
+            KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
+                           0, team->t.t_threads[0], team ) );
+            __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
+
+#if OMP_40_ENABLED
+# if KMP_AFFINITY_SUPPORTED
+            if ( ( team->t.t_size_changed == 0 )
+              && ( team->t.t_proc_bind == new_proc_bind ) ) {
+                KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
+                  team->t.t_id, new_proc_bind, team->t.t_first_place,
+                  team->t.t_last_place ) );
+            }
+            else {
+                team->t.t_proc_bind = new_proc_bind;
+                __kmp_partition_places( team );
+            }
+# else
+            if ( team->t.t_proc_bind != new_proc_bind ) {
+                team->t.t_proc_bind = new_proc_bind;
+            }
+# endif /* KMP_AFFINITY_SUPPORTED */
+#endif /* OMP_40_ENABLED */
+        }
+        else if( team->t.t_nproc > new_nproc ) {
+            KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
+
+            team->t.t_size_changed = 1;
+#if KMP_NESTED_HOT_TEAMS
+            if( __kmp_hot_teams_mode == 0 ) {
+                // AC: saved number of threads should correspond to team's value in this mode,
+                // can be bigger in mode 1, when hot team has some threads in reserve
+                KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
+                hot_teams[level].hot_team_nth = new_nproc;
+#endif // KMP_NESTED_HOT_TEAMS
+                /* release the extra threads we don't need any more */
+                for( f = new_nproc  ;  f < team->t.t_nproc  ;  f++ ) {
+                    KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
+                    if ( __kmp_tasking_mode != tskm_immediate_exec) {
+                        // When decreasing team size, threads no longer in the team should unref task team.
+                        team->t.t_threads[f]->th.th_task_team = NULL;
+                    }
+                    __kmp_free_thread( team->t.t_threads[ f ] );
+                    team->t.t_threads[ f ] = NULL;
+                }
+#if KMP_NESTED_HOT_TEAMS
+            } // (__kmp_hot_teams_mode == 0)
+#endif // KMP_NESTED_HOT_TEAMS
+            team->t.t_nproc =  new_nproc;
+            // TODO???: team->t.t_max_active_levels = new_max_active_levels;
+            team->t.t_sched =  new_icvs->sched;
+            __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
+
+            /* update the remaining threads */
+            for(f = 0; f < new_nproc; ++f) {
+                team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+            }
+            // restore the current task state of the master thread: should be the implicit task
+            KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
+                       0, team->t.t_threads[0], team ) );
+
+            __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
+
+#ifdef KMP_DEBUG
+            for ( f = 0; f < team->t.t_nproc; f++ ) {
+                KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
+                    team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
+            }
+#endif
+
+#if OMP_40_ENABLED
+            team->t.t_proc_bind = new_proc_bind;
+# if KMP_AFFINITY_SUPPORTED
+            __kmp_partition_places( team );
+# endif
+#endif
+        }
+        else { // team->t.t_nproc < new_nproc
+#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+            kmp_affin_mask_t *old_mask;
+            if ( KMP_AFFINITY_CAPABLE() ) {
+                KMP_CPU_ALLOC(old_mask);
+            }
+#endif
+
+            KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
+
+            team->t.t_size_changed = 1;
+
+
+#if KMP_NESTED_HOT_TEAMS
+            int avail_threads = hot_teams[level].hot_team_nth;
+            if( new_nproc < avail_threads )
+                avail_threads = new_nproc;
+            kmp_info_t **other_threads = team->t.t_threads;
+            for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
+                // Adjust barrier data of reserved threads (if any) of the team
+                // Other data will be set in __kmp_initialize_info() below.
+                int b;
+                kmp_balign_t * balign = other_threads[f]->th.th_bar;
+                for ( b = 0; b < bs_last_barrier; ++ b ) {
+                    balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
+                    KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
+#if USE_DEBUGGER
+                    balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
+#endif
+                }
+            }
+            if( hot_teams[level].hot_team_nth >= new_nproc ) {
+                // we have all needed threads in reserve, no need to allocate any
+                // this only possible in mode 1, cannot have reserved threads in mode 0
+                KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
+                team->t.t_nproc = new_nproc;                     // just get reserved threads involved
+            } else {
+                // we may have some threads in reserve, but not enough
+                team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any
+                hot_teams[level].hot_team_nth = new_nproc;       // adjust hot team max size
+#endif // KMP_NESTED_HOT_TEAMS
+            if(team->t.t_max_nproc < new_nproc) {
+                /* reallocate larger arrays */
+                __kmp_reallocate_team_arrays(team, new_nproc);
+                __kmp_reinitialize_team( team, new_icvs, NULL );
+            }
+
+#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+            /* Temporarily set full mask for master thread before
+               creation of workers. The reason is that workers inherit
+               the affinity from master, so if a lot of workers are
+               created on the single core quickly, they don't get
+               a chance to set their own affinity for a long time.
+            */
+            __kmp_set_thread_affinity_mask_full_tmp( old_mask );
+#endif
+
+            /* allocate new threads for the hot team */
+            for( f = team->t.t_nproc  ;  f < new_nproc  ;  f++ ) {
+                kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
+                KMP_DEBUG_ASSERT( new_worker );
+                team->t.t_threads[ f ] = new_worker;
+
+                KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
+                                team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
+                                team->t.t_bar[bs_forkjoin_barrier].b_arrived,
+                                team->t.t_bar[bs_plain_barrier].b_arrived ) );
+
+                { // Initialize barrier data for new threads.
+                    int b;
+                    kmp_balign_t * balign = new_worker->th.th_bar;
+                    for( b = 0; b < bs_last_barrier; ++ b ) {
+                        balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
+                        KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
+#if USE_DEBUGGER
+                        balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
+#endif
+                    }
+                }
+            }
+
+#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+            if ( KMP_AFFINITY_CAPABLE() ) {
+                /* Restore initial master thread's affinity mask */
+                __kmp_set_system_affinity( old_mask, TRUE );
+                KMP_CPU_FREE(old_mask);
+            }
+#endif
+#if KMP_NESTED_HOT_TEAMS
+            } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
+#endif // KMP_NESTED_HOT_TEAMS
+            /* make sure everyone is syncronized */
+            int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
+            __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
+
+            /* reinitialize the threads */
+            KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
+            for (f=0;  f < team->t.t_nproc; ++f)
+                __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+            if (level) { // set th_task_state for new threads in nested hot team
+                // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
+                // th_task_state for the new threads. th_task_state for master thread will not be accurate until
+                // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
+                for (f=old_nproc; f < team->t.t_nproc; ++f)
+                    team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
+            }
+            else { // set th_task_state for new threads in non-nested hot team
+                int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
+                for (f=old_nproc; f < team->t.t_nproc; ++f)
+                    team->t.t_threads[f]->th.th_task_state = old_state;
+            }
+
+#ifdef KMP_DEBUG
+            for ( f = 0; f < team->t.t_nproc; ++ f ) {
+                KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
+                    team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
+            }
+#endif
+
+#if OMP_40_ENABLED
+            team->t.t_proc_bind = new_proc_bind;
+# if KMP_AFFINITY_SUPPORTED
+            __kmp_partition_places( team );
+# endif
+#endif
+        } // Check changes in number of threads
+
+#if OMP_40_ENABLED
+        kmp_info_t *master = team->t.t_threads[0];
+        if( master->th.th_teams_microtask ) {
+            for( f = 1; f < new_nproc; ++f ) {
+                // propagate teams construct specific info to workers
+                kmp_info_t *thr = team->t.t_threads[f];
+                thr->th.th_teams_microtask = master->th.th_teams_microtask;
+                thr->th.th_teams_level     = master->th.th_teams_level;
+                thr->th.th_teams_size      = master->th.th_teams_size;
+            }
+        }
+#endif /* OMP_40_ENABLED */
+#if KMP_NESTED_HOT_TEAMS
+        if( level ) {
+            // Sync barrier state for nested hot teams, not needed for outermost hot team.
+            for( f = 1; f < new_nproc; ++f ) {
+                kmp_info_t *thr = team->t.t_threads[f];
+                int b;
+                kmp_balign_t * balign = thr->th.th_bar;
+                for( b = 0; b < bs_last_barrier; ++ b ) {
+                    balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
+                    KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
+#if USE_DEBUGGER
+                    balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
+#endif
+                }
+            }
+        }
+#endif // KMP_NESTED_HOT_TEAMS
+
+        /* reallocate space for arguments if necessary */
+        __kmp_alloc_argv_entries( argc, team, TRUE );
+        team->t.t_argc     = argc;
+        //
+        // The hot team re-uses the previous task team,
+        // if untouched during the previous release->gather phase.
+        //
+
+        KF_TRACE( 10, ( " hot_team = %p\n", team ) );
+
+#if KMP_DEBUG
+        if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
+                           team->t.t_task_team[0], team->t.t_task_team[1] ));
+        }
+#endif
+
+#if OMPT_SUPPORT
+        __ompt_team_assign_id(team, ompt_parallel_id);
+#endif
+
+        KMP_MB();
+
+        return team;
+    }
+
+    /* next, let's try to take one from the team pool */
+    KMP_MB();
+    for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
+    {
+        /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
+        if ( team->t.t_max_nproc >= max_nproc ) {
+            /* take this team from the team pool */
+            __kmp_team_pool = team->t.t_next_pool;
+
+            /* setup the team for fresh use */
+            __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
+
+            KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
+                            &team->t.t_task_team[0], &team->t.t_task_team[1]) );
+            team->t.t_task_team[0] = NULL;
+            team->t.t_task_team[1] = NULL;
+
+            /* reallocate space for arguments if necessary */
+            __kmp_alloc_argv_entries( argc, team, TRUE );
+            team->t.t_argc     = argc;
+
+            KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
+                            team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
+            { // Initialize barrier data.
+                int b;
+                for ( b = 0; b < bs_last_barrier; ++ b) {
+                    team->t.t_bar[ b ].b_arrived        = KMP_INIT_BARRIER_STATE;
+#if USE_DEBUGGER
+                    team->t.t_bar[ b ].b_master_arrived = 0;
+                    team->t.t_bar[ b ].b_team_arrived   = 0;
+#endif
+                }
+            }
+
+#if OMP_40_ENABLED
+            team->t.t_proc_bind = new_proc_bind;
+#endif
+
+            KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
+
+#if OMPT_SUPPORT
+            __ompt_team_assign_id(team, ompt_parallel_id);
+#endif
+
+            KMP_MB();
+
+            return team;
+        }
+
+        /* reap team if it is too small, then loop back and check the next one */
+        /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
+        /* TODO: Use technique to find the right size hot-team, don't reap them */
+        team =  __kmp_reap_team( team );
+        __kmp_team_pool = team;
+    }
+
+    /* nothing available in the pool, no matter, make a new team! */
+    KMP_MB();
+    team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
+
+    /* and set it up */
+    team->t.t_max_nproc   = max_nproc;
+    /* NOTE well, for some reason allocating one big buffer and dividing it
+     * up seems to really hurt performance a lot on the P4, so, let's not use
+     * this... */
+    __kmp_allocate_team_arrays( team, max_nproc );
+
+    KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
+    __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
+
+    KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
+                    &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
+    team->t.t_task_team[0] = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
+    team->t.t_task_team[1] = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
+
+    if ( __kmp_storage_map ) {
+        __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
+    }
+
+    /* allocate space for arguments */
+    __kmp_alloc_argv_entries( argc, team, FALSE );
+    team->t.t_argc        = argc;
+
+    KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
+                    team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
+    { // Initialize barrier data.
+        int b;
+        for ( b = 0; b < bs_last_barrier; ++ b ) {
+            team->t.t_bar[ b ].b_arrived        = KMP_INIT_BARRIER_STATE;
+#if USE_DEBUGGER
+            team->t.t_bar[ b ].b_master_arrived = 0;
+            team->t.t_bar[ b ].b_team_arrived   = 0;
+#endif
+        }
+    }
+
+#if OMP_40_ENABLED
+    team->t.t_proc_bind = new_proc_bind;
+#endif
+
+#if OMPT_SUPPORT
+    __ompt_team_assign_id(team, ompt_parallel_id);
+    team->t.ompt_serialized_team_info = NULL;
+#endif
+
+    KMP_MB();
+
+    KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
+
+    return team;
+}
+
+/* TODO implement hot-teams at all levels */
+/* TODO implement lazy thread release on demand (disband request) */
+
+/* free the team.  return it to the team pool.  release all the threads
+ * associated with it */
+void
+__kmp_free_team( kmp_root_t *root, kmp_team_t *team  USE_NESTED_HOT_ARG(kmp_info_t *master) )
+{
+    int f;
+    KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
+
+    /* verify state */
+    KMP_DEBUG_ASSERT( root );
+    KMP_DEBUG_ASSERT( team );
+    KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
+    KMP_DEBUG_ASSERT( team->t.t_threads );
+
+    int use_hot_team = team == root->r.r_hot_team;
+#if KMP_NESTED_HOT_TEAMS
+    int level;
+    kmp_hot_team_ptr_t *hot_teams;
+    if( master ) {
+        level = team->t.t_active_level - 1;
+        if( master->th.th_teams_microtask ) {                         // in teams construct?
+            if( master->th.th_teams_size.nteams > 1 ) {
+               ++level; // level was not increased in teams construct for team_of_masters
+            }
+            if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
+                master->th.th_teams_level == team->t.t_level ) {
+                ++level; // level was not increased in teams construct for team_of_workers before the parallel
+            }            // team->t.t_level will be increased inside parallel
+        }
+        hot_teams = master->th.th_hot_teams;
+        if( level < __kmp_hot_teams_max_level ) {
+            KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
+            use_hot_team = 1;
+        }
+    }
+#endif // KMP_NESTED_HOT_TEAMS
+
+    /* team is done working */
+    TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
+    team->t.t_copyin_counter = 0; // init counter for possible reuse
+    // Do not reset pointer to parent team to NULL for hot teams.
+
+    /* if we are non-hot team, release our threads */
+    if( ! use_hot_team ) {
+        if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+            // Delete task teams
+            int tt_idx;
+            for (tt_idx=0; tt_idx<2; ++tt_idx) {
+                kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+                if ( task_team != NULL ) {
+                    for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
+                        team->t.t_threads[f]->th.th_task_team = NULL;
+                    }
+                    KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
+#if KMP_NESTED_HOT_TEAMS
+                    __kmp_free_task_team( master, task_team );
+#endif
+                    team->t.t_task_team[tt_idx] = NULL;
+                }
+            }
+        }
+
+        // Reset pointer to parent team only for non-hot teams.
+        team->t.t_parent = NULL;
+
+
+        /* free the worker threads */
+        for ( f = 1; f < team->t.t_nproc; ++ f ) {
+            KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
+            __kmp_free_thread( team->t.t_threads[ f ] );
+            team->t.t_threads[ f ] = NULL;
+        }
+
+
+        /* put the team back in the team pool */
+        /* TODO limit size of team pool, call reap_team if pool too large */
+        team->t.t_next_pool  = (kmp_team_t*) __kmp_team_pool;
+        __kmp_team_pool        = (volatile kmp_team_t*) team;
+    }
+
+    KMP_MB();
+}
+
+
+/* reap the team.  destroy it, reclaim all its resources and free its memory */
+kmp_team_t *
+__kmp_reap_team( kmp_team_t *team )
+{
+    kmp_team_t *next_pool = team->t.t_next_pool;
+
+    KMP_DEBUG_ASSERT( team );
+    KMP_DEBUG_ASSERT( team->t.t_dispatch    );
+    KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
+    KMP_DEBUG_ASSERT( team->t.t_threads     );
+    KMP_DEBUG_ASSERT( team->t.t_argv        );
+
+    /* TODO clean the threads that are a part of this? */
+
+    /* free stuff */
+
+    __kmp_free_team_arrays( team );
+    if ( team->t.t_argv != &team->t.t_inline_argv[0] )
+        __kmp_free( (void*) team->t.t_argv );
+    __kmp_free( team );
+
+    KMP_MB();
+    return next_pool;
+}
+
+//
+// Free the thread.  Don't reap it, just place it on the pool of available
+// threads.
+//
+// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
+// binding for the affinity mechanism to be useful.
+//
+// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
+// However, we want to avoid a potential performance problem by always
+// scanning through the list to find the correct point at which to insert
+// the thread (potential N**2 behavior).  To do this we keep track of the
+// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
+// With single-level parallelism, threads will always be added to the tail
+// of the list, kept track of by __kmp_thread_pool_insert_pt.  With nested
+// parallelism, all bets are off and we may need to scan through the entire
+// free list.
+//
+// This change also has a potentially large performance benefit, for some
+// applications.  Previously, as threads were freed from the hot team, they
+// would be placed back on the free list in inverse order.  If the hot team
+// grew back to it's original size, then the freed thread would be placed
+// back on the hot team in reverse order.  This could cause bad cache
+// locality problems on programs where the size of the hot team regularly
+// grew and shrunk.
+//
+// Now, for single-level parallelism, the OMP tid is alway == gtid.
+//
+void
+__kmp_free_thread( kmp_info_t *this_th )
+{
+    int gtid;
+    kmp_info_t **scan;
+
+    KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
+                __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
+
+    KMP_DEBUG_ASSERT( this_th );
+
+    // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team).
+    int b;
+    kmp_balign_t *balign = this_th->th.th_bar;
+    for (b=0; b<bs_last_barrier; ++b) {
+        if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
+            balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
+        balign[b].bb.team = NULL;
+    }
+    this_th->th.th_task_state = 0;
+
+
+    /* put thread back on the free pool */
+    TCW_PTR(this_th->th.th_team, NULL);
+    TCW_PTR(this_th->th.th_root, NULL);
+    TCW_PTR(this_th->th.th_dispatch, NULL);               /* NOT NEEDED */
+
+    //
+    // If the __kmp_thread_pool_insert_pt is already past the new insert
+    // point, then we need to re-scan the entire list.
+    //
+    gtid = this_th->th.th_info.ds.ds_gtid;
+    if ( __kmp_thread_pool_insert_pt != NULL ) {
+        KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
+        if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
+             __kmp_thread_pool_insert_pt = NULL;
+        }
+    }
+
+    //
+    // Scan down the list to find the place to insert the thread.
+    // scan is the address of a link in the list, possibly the address of
+    // __kmp_thread_pool itself.
+    //
+    // In the absence of nested parallism, the for loop will have 0 iterations.
+    //
+    if ( __kmp_thread_pool_insert_pt != NULL ) {
+        scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
+    }
+    else {
+        scan = (kmp_info_t **)&__kmp_thread_pool;
+    }
+    for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
+      scan = &( (*scan)->th.th_next_pool ) );
+
+    //
+    // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
+    // to its address.
+    //
+    TCW_PTR(this_th->th.th_next_pool, *scan);
+    __kmp_thread_pool_insert_pt = *scan = this_th;
+    KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
+      || ( this_th->th.th_info.ds.ds_gtid
+      < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
+    TCW_4(this_th->th.th_in_pool, TRUE);
+    __kmp_thread_pool_nth++;
+
+    TCW_4(__kmp_nth, __kmp_nth - 1);
+
+#ifdef KMP_ADJUST_BLOCKTIME
+    /* Adjust blocktime back to user setting or default if necessary */
+    /* Middle initialization might never have occurred                */
+    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
+        KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
+        if ( __kmp_nth <= __kmp_avail_proc ) {
+            __kmp_zero_bt = FALSE;
+        }
+    }
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+    KMP_MB();
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+void *
+__kmp_launch_thread( kmp_info_t *this_thr )
+{
+    int                   gtid = this_thr->th.th_info.ds.ds_gtid;
+/*    void                 *stack_data;*/
+    kmp_team_t *(*volatile pteam);
+
+    KMP_MB();
+    KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
+
+    if( __kmp_env_consistency_check ) {
+        this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );  // ATT: Memory leak?
+    }
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+        this_thr->th.ompt_thread_info.wait_id = 0;
+        this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
+        if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
+            __ompt_thread_begin(ompt_thread_worker, gtid);
+        }
+    }
+#endif
+
+    /* This is the place where threads wait for work */
+    while( ! TCR_4(__kmp_global.g.g_done) ) {
+        KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
+        KMP_MB();
+
+        /* wait for work to do */
+        KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+            this_thr->th.ompt_thread_info.state = ompt_state_idle;
+        }
+#endif
+
+        /* No tid yet since not part of a team */
+        __kmp_fork_barrier( gtid, KMP_GTID_DNE );
+
+#if OMPT_SUPPORT
+        if (ompt_enabled) {
+            this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+        }
+#endif
+
+        pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
+
+        /* have we been allocated? */
+        if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
+            /* we were just woken up, so run our new task */
+            if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
+                int rc;
+                KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
+                              gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
+
+                updateHWFPControl (*pteam);
+
+#if OMPT_SUPPORT
+                if (ompt_enabled) {
+                    this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+                    // Initialize OMPT task id for implicit task.
+                    int tid = __kmp_tid_from_gtid(gtid);
+                    (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id =
+                    __ompt_task_id_new(tid);
+                }
+#endif
+
+                KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
+                {
+                    KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
+                    rc = (*pteam)->t.t_invoke( gtid );
+                }
+                KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
+                KMP_ASSERT( rc );
+
+#if OMPT_SUPPORT
+                if (ompt_enabled) {
+                    /* no frame set while outside task */
+                    int tid = __kmp_tid_from_gtid(gtid);
+                    (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
+
+                    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+                }
+#endif
+                KMP_MB();
+                KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
+                              gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
+            }
+            /* join barrier after parallel region */
+            __kmp_join_barrier( gtid );
+        }
+    }
+    TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
+
+#if OMPT_SUPPORT
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
+        __ompt_thread_end(ompt_thread_worker, gtid);
+    }
+#endif
+
+    this_thr->th.th_task_team = NULL;
+    /* run the destructors for the threadprivate data for this thread */
+    __kmp_common_destroy_gtid( gtid );
+
+    KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
+    KMP_MB();
+    return this_thr;
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_internal_end_dest( void *specific_gtid )
+{
+    #if KMP_COMPILER_ICC
+        #pragma warning( push )
+        #pragma warning( disable:  810 ) // conversion from "void *" to "int" may lose significant bits
+    #endif
+    // Make sure no significant bits are lost
+    int gtid = (kmp_intptr_t)specific_gtid - 1;
+    #if KMP_COMPILER_ICC
+        #pragma warning( pop )
+    #endif
+
+    KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
+    /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
+     * this is because 0 is reserved for the nothing-stored case */
+
+    /* josh: One reason for setting the gtid specific data even when it is being
+       destroyed by pthread is to allow gtid lookup through thread specific data
+       (__kmp_gtid_get_specific).  Some of the code, especially stat code,
+       that gets executed in the call to __kmp_internal_end_thread, actually
+       gets the gtid through the thread specific data.  Setting it here seems
+       rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
+       to run smoothly.
+       todo: get rid of this after we remove the dependence on
+       __kmp_gtid_get_specific
+    */
+    if(gtid >= 0 && KMP_UBER_GTID(gtid))
+        __kmp_gtid_set_specific( gtid );
+    #ifdef KMP_TDATA_GTID
+        __kmp_gtid = gtid;
+    #endif
+    __kmp_internal_end_thread( gtid );
+}
+
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+
+// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
+// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker
+// option in makefile.mk works fine.
+
+__attribute__(( destructor ))
+void
+__kmp_internal_end_dtor( void )
+{
+    __kmp_internal_end_atexit();
+}
+
+void
+__kmp_internal_end_fini( void )
+{
+    __kmp_internal_end_atexit();
+}
+
+#endif
+
+/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
+void
+__kmp_internal_end_atexit( void )
+{
+    KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
+    /* [Windows]
+       josh: ideally, we want to completely shutdown the library in this atexit handler, but
+       stat code that depends on thread specific data for gtid fails because that data becomes
+       unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
+       instead.  We should eventually remove the dependency on __kmp_get_specific_gtid in the
+       stat code and use __kmp_internal_end_library to cleanly shutdown the library.
+
+// TODO: Can some of this comment about GVS be removed?
+       I suspect that the offending stat code is executed when the calling thread tries to
+       clean up a dead root thread's data structures, resulting in GVS code trying to close
+       the GVS structures for that thread, but since the stat code uses
+       __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
+       cleaning up itself instead of another thread, it gets confused.  This happens because
+       allowing a thread to unregister and cleanup another thread is a recent modification for
+       addressing an issue with Maxon Cinema4D.  Based on the current design (20050722), a
+       thread may end up trying to unregister another thread only if thread death does not
+       trigger the calling of __kmp_internal_end_thread.  For Linux* OS, there is the thread
+       specific data destructor function to detect thread death.  For Windows dynamic, there
+       is DllMain(THREAD_DETACH).  For Windows static, there is nothing.  Thus, the
+       workaround is applicable only for Windows static stat library.
+    */
+    __kmp_internal_end_library( -1 );
+    #if KMP_OS_WINDOWS
+        __kmp_close_console();
+    #endif
+}
+
+static void
+__kmp_reap_thread(
+    kmp_info_t * thread,
+    int is_root
+) {
+
+    // It is assumed __kmp_forkjoin_lock is acquired.
+
+    int gtid;
+
+    KMP_DEBUG_ASSERT( thread != NULL );
+
+    gtid = thread->th.th_info.ds.ds_gtid;
+
+    if ( ! is_root ) {
+
+        if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
+            /* Assume the threads are at the fork barrier here */
+            KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
+            /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
+            kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
+            __kmp_release_64(&flag);
+        }; // if
+
+
+        // Terminate OS thread.
+        __kmp_reap_worker( thread );
+
+        //
+        // The thread was killed asynchronously.  If it was actively
+        // spinning in the in the thread pool, decrement the global count.
+        //
+        // There is a small timing hole here - if the worker thread was
+        // just waking up after sleeping in the pool, had reset it's
+        // th_active_in_pool flag but not decremented the global counter
+        // __kmp_thread_pool_active_nth yet, then the global counter
+        // might not get updated.
+        //
+        // Currently, this can only happen as the library is unloaded,
+        // so there are no harmful side effects.
+        //
+        if ( thread->th.th_active_in_pool ) {
+            thread->th.th_active_in_pool = FALSE;
+            KMP_TEST_THEN_DEC32(
+              (kmp_int32 *) &__kmp_thread_pool_active_nth );
+            KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
+        }
+
+        // Decrement # of [worker] threads in the pool.
+        KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
+        --__kmp_thread_pool_nth;
+    }; // if
+
+    // Free the fast memory for tasking
+    #if USE_FAST_MEMORY
+        __kmp_free_fast_memory( thread );
+    #endif /* USE_FAST_MEMORY */
+
+    __kmp_suspend_uninitialize_thread( thread );
+
+    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
+    TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
+
+    -- __kmp_all_nth;
+    // __kmp_nth was decremented when thread is added to the pool.
+
+#ifdef KMP_ADJUST_BLOCKTIME
+    /* Adjust blocktime back to user setting or default if necessary */
+    /* Middle initialization might never have occurred                */
+    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
+        KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
+        if ( __kmp_nth <= __kmp_avail_proc ) {
+            __kmp_zero_bt = FALSE;
+        }
+    }
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+    /* free the memory being used */
+    if( __kmp_env_consistency_check ) {
+        if ( thread->th.th_cons ) {
+            __kmp_free_cons_stack( thread->th.th_cons );
+            thread->th.th_cons = NULL;
+        }; // if
+    }
+
+    if ( thread->th.th_pri_common != NULL ) {
+        __kmp_free( thread->th.th_pri_common );
+        thread->th.th_pri_common = NULL;
+    }; // if
+
+    if (thread->th.th_task_state_memo_stack != NULL) {
+        __kmp_free(thread->th.th_task_state_memo_stack);
+        thread->th.th_task_state_memo_stack = NULL;
+    }
+
+    #if KMP_USE_BGET
+        if ( thread->th.th_local.bget_data != NULL ) {
+            __kmp_finalize_bget( thread );
+        }; // if
+    #endif
+
+#if KMP_AFFINITY_SUPPORTED
+    if ( thread->th.th_affin_mask != NULL ) {
+        KMP_CPU_FREE( thread->th.th_affin_mask );
+        thread->th.th_affin_mask = NULL;
+    }; // if
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+    __kmp_reap_team( thread->th.th_serial_team );
+    thread->th.th_serial_team = NULL;
+    __kmp_free( thread );
+
+    KMP_MB();
+
+} // __kmp_reap_thread
+
+static void
+__kmp_internal_end(void)
+{
+    int i;
+
+    /* First, unregister the library */
+    __kmp_unregister_library();
+
+    #if KMP_OS_WINDOWS
+        /* In Win static library, we can't tell when a root actually dies, so we
+           reclaim the data structures for any root threads that have died but not
+           unregistered themselves, in order to shut down cleanly.
+           In Win dynamic library we also can't tell when a thread dies.
+        */
+        __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
+    #endif
+
+    for( i=0 ; i<__kmp_threads_capacity ; i++ )
+        if( __kmp_root[i] )
+            if( __kmp_root[i]->r.r_active )
+                break;
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+    TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
+
+    if ( i < __kmp_threads_capacity ) {
+        // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        //
+        // Need to check that monitor was initialized before reaping it.
+        // If we are called form __kmp_atfork_child (which sets
+        // __kmp_init_parallel = 0), then __kmp_monitor will appear to
+        // contain valid data, but it is only valid in the parent process,
+        // not the child.
+        //
+        // New behavior (201008): instead of keying off of the flag
+        // __kmp_init_parallel, the monitor thread creation is keyed off
+        // of the new flag __kmp_init_monitor.
+        //
+        __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
+        if ( TCR_4( __kmp_init_monitor ) ) {
+            __kmp_reap_monitor( & __kmp_monitor );
+            TCW_4( __kmp_init_monitor, 0 );
+        }
+        __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
+        KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
+    } else {
+        /* TODO move this to cleanup code */
+        #ifdef KMP_DEBUG
+            /* make sure that everything has properly ended */
+            for ( i = 0; i < __kmp_threads_capacity; i++ ) {
+                if( __kmp_root[i] ) {
+//                    KMP_ASSERT( ! KMP_UBER_GTID( i ) );         // AC: there can be uber threads alive here
+                    KMP_ASSERT( ! __kmp_root[i]->r.r_active );  // TODO: can they be active?
+                }
+            }
+        #endif
+
+        KMP_MB();
+
+        // Reap the worker threads.
+        // This is valid for now, but be careful if threads are reaped sooner.
+        while ( __kmp_thread_pool != NULL ) {    // Loop thru all the thread in the pool.
+            // Get the next thread from the pool.
+            kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
+            __kmp_thread_pool = thread->th.th_next_pool;
+            // Reap it.
+            thread->th.th_next_pool = NULL;
+            thread->th.th_in_pool = FALSE;
+            __kmp_reap_thread( thread, 0 );
+        }; // while
+        __kmp_thread_pool_insert_pt = NULL;
+
+        // Reap teams.
+        while ( __kmp_team_pool != NULL ) {     // Loop thru all the teams in the pool.
+            // Get the next team from the pool.
+            kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
+            __kmp_team_pool = team->t.t_next_pool;
+            // Reap it.
+            team->t.t_next_pool = NULL;
+            __kmp_reap_team( team );
+        }; // while
+
+        __kmp_reap_task_teams( );
+
+        for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
+            // TBD: Add some checking...
+            // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
+        }
+
+        /* Make sure all threadprivate destructors get run by joining with all worker
+           threads before resetting this flag */
+        TCW_SYNC_4(__kmp_init_common, FALSE);
+
+        KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
+        KMP_MB();
+
+        //
+        // See note above: One of the possible fixes for CQ138434 / CQ140126
+        //
+        // FIXME: push both code fragments down and CSE them?
+        // push them into __kmp_cleanup() ?
+        //
+        __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
+        if ( TCR_4( __kmp_init_monitor ) ) {
+            __kmp_reap_monitor( & __kmp_monitor );
+            TCW_4( __kmp_init_monitor, 0 );
+        }
+        __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
+        KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
+
+    } /* else !__kmp_global.t_active */
+    TCW_4(__kmp_init_gtid, FALSE);
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+
+    __kmp_cleanup();
+#if OMPT_SUPPORT
+    ompt_fini();
+#endif
+}
+
+void
+__kmp_internal_end_library( int gtid_req )
+{
+    /* if we have already cleaned up, don't try again, it wouldn't be pretty */
+    /* this shouldn't be a race condition because __kmp_internal_end() is the
+     * only place to clear __kmp_serial_init */
+    /* we'll check this later too, after we get the lock */
+    // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
+    // because the next check will work in any case.
+    if( __kmp_global.g.g_abort ) {
+        KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
+        /* TODO abort? */
+        return;
+    }
+    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
+        KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
+        return;
+    }
+
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    /* find out who we are and what we should do */
+    {
+        int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
+        KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d  (%d)\n", gtid, gtid_req ));
+        if( gtid == KMP_GTID_SHUTDOWN ) {
+            KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
+            return;
+        } else if( gtid == KMP_GTID_MONITOR ) {
+            KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
+            return;
+        } else if( gtid == KMP_GTID_DNE ) {
+            KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
+            /* we don't know who we are, but we may still shutdown the library */
+        } else if( KMP_UBER_GTID( gtid )) {
+            /* unregister ourselves as an uber thread.  gtid is no longer valid */
+            if( __kmp_root[gtid]->r.r_active ) {
+                __kmp_global.g.g_abort = -1;
+                TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
+                KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
+                return;
+            } else {
+                KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
+                __kmp_unregister_root_current_thread( gtid );
+            }
+        } else {
+            /* worker threads may call this function through the atexit handler, if they call exit() */
+            /* For now, skip the usual subsequent processing and just dump the debug buffer.
+               TODO: do a thorough shutdown instead
+            */
+            #ifdef DUMP_DEBUG_ON_EXIT
+                if ( __kmp_debug_buf )
+                    __kmp_dump_debug_buffer( );
+            #endif
+            return;
+        }
+    }
+    /* synchronize the termination process */
+    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+
+    /* have we already finished */
+    if( __kmp_global.g.g_abort ) {
+        KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
+        /* TODO abort? */
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        return;
+    }
+    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        return;
+    }
+
+    /* We need this lock to enforce mutex between this reading of
+       __kmp_threads_capacity and the writing by __kmp_register_root.
+       Alternatively, we can use a counter of roots that is
+       atomically updated by __kmp_get_global_thread_id_reg,
+       __kmp_do_serial_initialize and __kmp_internal_end_*.
+    */
+    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+    /* now we can safely conduct the actual termination */
+    __kmp_internal_end();
+
+    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+    __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+
+    KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
+
+    #ifdef DUMP_DEBUG_ON_EXIT
+        if ( __kmp_debug_buf )
+            __kmp_dump_debug_buffer();
+    #endif
+
+    #if KMP_OS_WINDOWS
+        __kmp_close_console();
+    #endif
+
+    __kmp_fini_allocator();
+
+} // __kmp_internal_end_library
+
+void
+__kmp_internal_end_thread( int gtid_req )
+{
+    int i;
+
+    /* if we have already cleaned up, don't try again, it wouldn't be pretty */
+    /* this shouldn't be a race condition because __kmp_internal_end() is the
+     * only place to clear __kmp_serial_init */
+    /* we'll check this later too, after we get the lock */
+    // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
+    // because the next check will work in any case.
+    if( __kmp_global.g.g_abort ) {
+        KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
+        /* TODO abort? */
+        return;
+    }
+    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
+        KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
+        return;
+    }
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    /* find out who we are and what we should do */
+    {
+        int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
+        KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d  (%d)\n", gtid, gtid_req ));
+        if( gtid == KMP_GTID_SHUTDOWN ) {
+            KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
+            return;
+        } else if( gtid == KMP_GTID_MONITOR ) {
+            KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
+            return;
+        } else if( gtid == KMP_GTID_DNE ) {
+            KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
+            return;
+            /* we don't know who we are */
+        } else if( KMP_UBER_GTID( gtid )) {
+        /* unregister ourselves as an uber thread.  gtid is no longer valid */
+            if( __kmp_root[gtid]->r.r_active ) {
+                __kmp_global.g.g_abort = -1;
+                TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
+                KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
+                return;
+            } else {
+                KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
+                __kmp_unregister_root_current_thread( gtid );
+            }
+        } else {
+            /* just a worker thread, let's leave */
+            KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
+
+            if ( gtid >= 0 ) {
+                __kmp_threads[gtid]->th.th_task_team = NULL;
+            }
+
+            KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
+            return;
+        }
+    }
+    #if defined KMP_DYNAMIC_LIB
+    // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
+    //     because we will better shutdown later in the library destructor.
+    //     The reason of this change is performance problem when non-openmp thread
+    //     in a loop forks and joins many openmp threads. We can save a lot of time
+    //     keeping worker threads alive until the program shutdown.
+    // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
+    //     Windows(DPD200287443) that occurs when using critical sections from foreign threads.
+        KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
+        return;
+    #endif
+    /* synchronize the termination process */
+    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+
+    /* have we already finished */
+    if( __kmp_global.g.g_abort ) {
+        KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
+        /* TODO abort? */
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        return;
+    }
+    if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        return;
+    }
+
+    /* We need this lock to enforce mutex between this reading of
+       __kmp_threads_capacity and the writing by __kmp_register_root.
+       Alternatively, we can use a counter of roots that is
+       atomically updated by __kmp_get_global_thread_id_reg,
+       __kmp_do_serial_initialize and __kmp_internal_end_*.
+    */
+
+    /* should we finish the run-time?  are all siblings done? */
+    __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+    for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
+        if ( KMP_UBER_GTID( i ) ) {
+            KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
+            __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+            __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+            return;
+        };
+    }
+
+    /* now we can safely conduct the actual termination */
+
+    __kmp_internal_end();
+
+    __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+    __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+
+    KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
+
+    #ifdef DUMP_DEBUG_ON_EXIT
+        if ( __kmp_debug_buf )
+            __kmp_dump_debug_buffer();
+    #endif
+} // __kmp_internal_end_thread
+
+// -------------------------------------------------------------------------------------------------
+// Library registration stuff.
+
+static long   __kmp_registration_flag = 0;
+    // Random value used to indicate library initialization.
+static char * __kmp_registration_str  = NULL;
+    // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
+
+
+static inline
+char *
+__kmp_reg_status_name() {
+    /*
+        On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
+        If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
+        the name of registered_lib_env env var can not be found, because the name will contain different pid.
+    */
+    return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
+} // __kmp_reg_status_get
+
+
+void
+__kmp_register_library_startup(
+    void
+) {
+
+    char * name   = __kmp_reg_status_name();  // Name of the environment variable.
+    int    done   = 0;
+    union {
+        double dtime;
+        long   ltime;
+    } time;
+    #if KMP_OS_WINDOWS
+        __kmp_initialize_system_tick();
+    #endif
+    __kmp_read_system_time( & time.dtime );
+    __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
+    __kmp_registration_str =
+        __kmp_str_format(
+            "%p-%lx-%s",
+            & __kmp_registration_flag,
+            __kmp_registration_flag,
+            KMP_LIBRARY_FILE
+        );
+
+    KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
+
+    while ( ! done ) {
+
+        char * value  = NULL; // Actual value of the environment variable.
+
+        // Set environment variable, but do not overwrite if it is exist.
+        __kmp_env_set( name, __kmp_registration_str, 0 );
+        // Check the variable is written.
+        value = __kmp_env_get( name );
+        if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
+
+            done = 1;    // Ok, environment variable set successfully, exit the loop.
+
+        } else {
+
+            // Oops. Write failed. Another copy of OpenMP RTL is in memory.
+            // Check whether it alive or dead.
+            int    neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
+            char * tail          = value;
+            char * flag_addr_str = NULL;
+            char * flag_val_str  = NULL;
+            char const * file_name     = NULL;
+            __kmp_str_split( tail, '-', & flag_addr_str, & tail );
+            __kmp_str_split( tail, '-', & flag_val_str,  & tail );
+            file_name = tail;
+            if ( tail != NULL ) {
+                long * flag_addr = 0;
+                long   flag_val  = 0;
+                KMP_SSCANF( flag_addr_str, "%p",  & flag_addr );
+                KMP_SSCANF( flag_val_str,  "%lx", & flag_val  );
+                if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
+                    // First, check whether environment-encoded address is mapped into addr space.
+                    // If so, dereference it to see if it still has the right value.
+
+                    if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
+                        neighbor = 1;
+                    } else {
+                        // If not, then we know the other copy of the library is no longer running.
+                        neighbor = 2;
+                    }; // if
+                }; // if
+            }; // if
+            switch ( neighbor ) {
+                case 0 :      // Cannot parse environment variable -- neighbor status unknown.
+                    // Assume it is the incompatible format of future version of the library.
+                    // Assume the other library is alive.
+                    // WARN( ... ); // TODO: Issue a warning.
+                    file_name = "unknown library";
+                    // Attention! Falling to the next case. That's intentional.
+                case 1 : {    // Neighbor is alive.
+                    // Check it is allowed.
                     char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
-                    if ( ! __kmp_str_match_true( duplicate_ok ) ) { 
-                        // That's not allowed. Issue fatal error. 
-                        __kmp_msg( 
-                            kmp_ms_fatal, 
-                            KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ), 
-                            KMP_HNT( DuplicateLibrary ), 
-                            __kmp_msg_null 
-                        ); 
-                    }; // if 
-                    KMP_INTERNAL_FREE( duplicate_ok ); 
-                    __kmp_duplicate_library_ok = 1; 
-                    done = 1;    // Exit the loop. 
-                } break; 
-                case 2 : {    // Neighbor is dead. 
-                    // Clear the variable and try to register library again. 
-                    __kmp_env_unset( name ); 
-                }  break; 
-                default : { 
-                    KMP_DEBUG_ASSERT( 0 ); 
-                } break; 
-            }; // switch 
- 
-        }; // if 
-        KMP_INTERNAL_FREE( (void *) value ); 
- 
-    }; // while 
-    KMP_INTERNAL_FREE( (void *) name ); 
- 
-} // func __kmp_register_library_startup 
- 
- 
-void 
-__kmp_unregister_library( void ) { 
- 
-    char * name  = __kmp_reg_status_name(); 
-    char * value = __kmp_env_get( name ); 
- 
-    KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 ); 
-    KMP_DEBUG_ASSERT( __kmp_registration_str  != NULL ); 
-    if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { 
-        // Ok, this is our variable. Delete it. 
-        __kmp_env_unset( name ); 
-    }; // if 
- 
-    KMP_INTERNAL_FREE( __kmp_registration_str ); 
-    KMP_INTERNAL_FREE( value ); 
-    KMP_INTERNAL_FREE( name ); 
- 
-    __kmp_registration_flag = 0; 
-    __kmp_registration_str  = NULL; 
- 
-} // __kmp_unregister_library 
- 
- 
-// End of Library registration stuff. 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
- 
-static void __kmp_check_mic_type() 
-{ 
-    kmp_cpuid_t cpuid_state = {0}; 
-    kmp_cpuid_t * cs_p = &cpuid_state; 
-    __kmp_x86_cpuid(1, 0, cs_p); 
-    // We don't support mic1 at the moment 
-    if( (cs_p->eax & 0xff0) == 0xB10 ) { 
-        __kmp_mic_type = mic2; 
-    } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { 
-        __kmp_mic_type = mic3; 
-    } else { 
-        __kmp_mic_type = non_mic; 
-    } 
-} 
- 
-#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ 
- 
-static void 
-__kmp_do_serial_initialize( void ) 
-{ 
-    int i, gtid; 
-    int size; 
- 
-    KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) ); 
- 
-    KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 ); 
-    KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 ); 
-    KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 ); 
-    KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 ); 
-    KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) ); 
- 
-#if OMPT_SUPPORT 
-    ompt_pre_init(); 
-#endif 
- 
-    __kmp_validate_locks(); 
- 
-    /* Initialize internal memory allocator */ 
-    __kmp_init_allocator(); 
- 
-    /* Register the library startup via an environment variable 
-       and check to see whether another copy of the library is already 
-       registered. */ 
- 
-    __kmp_register_library_startup( ); 
- 
-    /* TODO reinitialization of library */ 
-    if( TCR_4(__kmp_global.g.g_done) ) { 
-       KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) ); 
-    } 
- 
-    __kmp_global.g.g_abort = 0; 
-    TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 
- 
-    /* initialize the locks */ 
-#if KMP_USE_ADAPTIVE_LOCKS 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-    __kmp_init_speculative_stats(); 
-#endif 
-#endif 
-#if KMP_STATS_ENABLED 
-    __kmp_init_tas_lock( & __kmp_stats_lock ); 
-#endif 
-    __kmp_init_lock( & __kmp_global_lock     ); 
-    __kmp_init_queuing_lock( & __kmp_dispatch_lock ); 
-    __kmp_init_lock( & __kmp_debug_lock      ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock     ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_1i  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_2i  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_4i  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_4r  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_8i  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_8r  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_8c  ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_10r ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_16r ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_16c ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_20c ); 
-    __kmp_init_atomic_lock( & __kmp_atomic_lock_32c ); 
-    __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock  ); 
-    __kmp_init_bootstrap_lock( & __kmp_exit_lock      ); 
-    __kmp_init_bootstrap_lock( & __kmp_monitor_lock   ); 
-    __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock ); 
- 
-    /* conduct initialization and initial setup of configuration */ 
- 
-    __kmp_runtime_initialize(); 
- 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-    __kmp_check_mic_type(); 
-#endif 
- 
-    // Some global variable initialization moved here from kmp_env_initialize() 
-#ifdef KMP_DEBUG 
-    kmp_diag = 0; 
-#endif 
-    __kmp_abort_delay = 0; 
- 
-    // From __kmp_init_dflt_team_nth() 
-    /* assume the entire machine will be used */ 
-    __kmp_dflt_team_nth_ub = __kmp_xproc; 
-    if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) { 
-        __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 
-    } 
-    if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) { 
-        __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 
-    } 
-    __kmp_max_nth = __kmp_sys_max_nth; 
- 
-    // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part 
-    __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 
-    __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 
-    __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 
-    // From "KMP_LIBRARY" part of __kmp_env_initialize() 
-    __kmp_library = library_throughput; 
-    // From KMP_SCHEDULE initialization 
-    __kmp_static = kmp_sch_static_balanced; 
-    // AC: do not use analytical here, because it is non-monotonous 
-    //__kmp_guided = kmp_sch_guided_iterative_chunked; 
-    //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment 
-    // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method 
-    // control parts 
-    #if KMP_FAST_REDUCTION_BARRIER 
-        #define kmp_reduction_barrier_gather_bb ((int)1) 
-        #define kmp_reduction_barrier_release_bb ((int)1) 
-        #define kmp_reduction_barrier_gather_pat bp_hyper_bar 
-        #define kmp_reduction_barrier_release_pat bp_hyper_bar 
-    #endif // KMP_FAST_REDUCTION_BARRIER 
-    for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 
-        __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt; 
-        __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; 
-        __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt; 
-        __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt; 
-        #if KMP_FAST_REDUCTION_BARRIER 
-        if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1 
-            __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb; 
-            __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb; 
-            __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat; 
-            __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat; 
-        } 
-        #endif // KMP_FAST_REDUCTION_BARRIER 
-    } 
-    #if KMP_FAST_REDUCTION_BARRIER 
-        #undef kmp_reduction_barrier_release_pat 
-        #undef kmp_reduction_barrier_gather_pat 
-        #undef kmp_reduction_barrier_release_bb 
-        #undef kmp_reduction_barrier_gather_bb 
-    #endif // KMP_FAST_REDUCTION_BARRIER 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-    if( __kmp_mic_type != non_mic ) { 
-        // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 
-        __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;  // plain gather 
-        __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;  // forkjoin release 
-        __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar; 
-        __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar; 
-    } 
-#if KMP_FAST_REDUCTION_BARRIER 
-    if( __kmp_mic_type != non_mic ) { 
-        __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar; 
-        __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar; 
-    } 
-#endif 
-#endif 
- 
-    // From KMP_CHECKS initialization 
-#ifdef KMP_DEBUG 
-    __kmp_env_checks = TRUE;   /* development versions have the extra checks */ 
-#else 
-    __kmp_env_checks = FALSE;  /* port versions do not have the extra checks */ 
-#endif 
- 
-    // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 
-    __kmp_foreign_tp = TRUE; 
- 
-    __kmp_global.g.g_dynamic = FALSE; 
-    __kmp_global.g.g_dynamic_mode = dynamic_default; 
- 
-    __kmp_env_initialize( NULL ); 
- 
-    // Print all messages in message catalog for testing purposes. 
-    #ifdef KMP_DEBUG 
-        char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" ); 
-        if ( __kmp_str_match_true( val ) ) { 
-            kmp_str_buf_t buffer; 
-            __kmp_str_buf_init( & buffer ); 
-            __kmp_i18n_dump_catalog( & buffer ); 
-            __kmp_printf( "%s", buffer.str ); 
-            __kmp_str_buf_free( & buffer ); 
-        }; // if 
-        __kmp_env_free( & val ); 
-    #endif 
- 
-    __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); 
-    // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 
-    __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 
- 
-    // If the library is shut down properly, both pools must be NULL. Just in case, set them 
-    // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed. 
-    KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL ); 
-    KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL ); 
-    KMP_DEBUG_ASSERT( __kmp_team_pool   == NULL ); 
-    __kmp_thread_pool = NULL; 
-    __kmp_thread_pool_insert_pt = NULL; 
-    __kmp_team_pool   = NULL; 
- 
-    /* Allocate all of the variable sized records */ 
-    /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */ 
-    /* Since allocation is cache-aligned, just add extra padding at the end */ 
-    size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE; 
-    __kmp_threads = (kmp_info_t**) __kmp_allocate( size ); 
-    __kmp_root    = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity ); 
- 
-    /* init thread counts */ 
-    KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and 
-    KMP_DEBUG_ASSERT( __kmp_nth == 0 );     // something was wrong in termination. 
-    __kmp_all_nth = 0; 
-    __kmp_nth     = 0; 
- 
-    /* setup the uber master thread and hierarchy */ 
-    gtid = __kmp_register_root( TRUE ); 
-    KA_TRACE( 10, ("__kmp_do_serial_initialize  T#%d\n", gtid )); 
-    KMP_ASSERT( KMP_UBER_GTID( gtid ) ); 
-    KMP_ASSERT( KMP_INITIAL_GTID( gtid ) ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    __kmp_common_initialize(); 
- 
-    #if KMP_OS_UNIX 
-        /* invoke the child fork handler */ 
-        __kmp_register_atfork(); 
-    #endif 
- 
-    #if ! defined KMP_DYNAMIC_LIB 
-        { 
-            /* Invoke the exit handler when the program finishes, only for static library. 
-               For dynamic library, we already have _fini and DllMain. 
-             */ 
-            int rc = atexit( __kmp_internal_end_atexit ); 
-            if ( rc != 0 ) { 
-                __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null ); 
-            }; // if 
-        } 
-    #endif 
- 
-    #if KMP_HANDLE_SIGNALS 
-        #if KMP_OS_UNIX 
-            /* NOTE: make sure that this is called before the user installs 
-             *          their own signal handlers so that the user handlers 
-             *          are called first.  this way they can return false, 
-             *          not call our handler, avoid terminating the library, 
-             *          and continue execution where they left off. */ 
-            __kmp_install_signals( FALSE ); 
-        #endif /* KMP_OS_UNIX */ 
-        #if KMP_OS_WINDOWS 
-            __kmp_install_signals( TRUE ); 
-        #endif /* KMP_OS_WINDOWS */ 
-    #endif 
- 
-    /* we have finished the serial initialization */ 
-    __kmp_init_counter ++; 
- 
-    __kmp_init_serial = TRUE; 
- 
-    if (__kmp_settings) { 
-        __kmp_env_print(); 
-    } 
- 
-#if OMP_40_ENABLED 
-    if (__kmp_display_env || __kmp_display_env_verbose) { 
-        __kmp_env_print_2(); 
-    } 
-#endif // OMP_40_ENABLED 
- 
-#if OMPT_SUPPORT 
-    ompt_post_init(); 
-#endif 
- 
-    KMP_MB(); 
- 
-    KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) ); 
-} 
- 
-void 
-__kmp_serial_initialize( void ) 
-{ 
-    if ( __kmp_init_serial ) { 
-        return; 
-    } 
-    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
-    if ( __kmp_init_serial ) { 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        return; 
-    } 
-    __kmp_do_serial_initialize(); 
-    __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-} 
- 
-static void 
-__kmp_do_middle_initialize( void ) 
-{ 
-    int i, j; 
-    int prev_dflt_team_nth; 
- 
-    if( !__kmp_init_serial ) { 
-        __kmp_do_serial_initialize(); 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) ); 
- 
-    // 
-    // Save the previous value for the __kmp_dflt_team_nth so that 
-    // we can avoid some reinitialization if it hasn't changed. 
-    // 
-    prev_dflt_team_nth = __kmp_dflt_team_nth; 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    // 
-    // __kmp_affinity_initialize() will try to set __kmp_ncores to the 
-    // number of cores on the machine. 
-    // 
-    __kmp_affinity_initialize(); 
- 
-    // 
-    // Run through the __kmp_threads array and set the affinity mask 
-    // for each root thread that is currently registered with the RTL. 
-    // 
-    for ( i = 0; i < __kmp_threads_capacity; i++ ) { 
-        if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) { 
-            __kmp_affinity_set_init_mask( i, TRUE ); 
-        } 
-    } 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-    KMP_ASSERT( __kmp_xproc > 0 ); 
-    if ( __kmp_avail_proc == 0 ) { 
-        __kmp_avail_proc = __kmp_xproc; 
-    } 
- 
-    // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now 
-    j = 0; 
-    while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) { 
-        __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc; 
-        j++; 
-    } 
- 
-    if ( __kmp_dflt_team_nth == 0 ) { 
-#ifdef KMP_DFLT_NTH_CORES 
-        // 
-        // Default #threads = #cores 
-        // 
-        __kmp_dflt_team_nth = __kmp_ncores; 
-        KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n", 
-          __kmp_dflt_team_nth ) ); 
-#else 
-        // 
-        // Default #threads = #available OS procs 
-        // 
-        __kmp_dflt_team_nth = __kmp_avail_proc; 
-        KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n", 
-          __kmp_dflt_team_nth ) ); 
-#endif /* KMP_DFLT_NTH_CORES */ 
-    } 
- 
-    if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) { 
-        __kmp_dflt_team_nth = KMP_MIN_NTH; 
-    } 
-    if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) { 
-        __kmp_dflt_team_nth = __kmp_sys_max_nth; 
-    } 
- 
-    // 
-    // There's no harm in continuing if the following check fails, 
-    // but it indicates an error in the previous logic. 
-    // 
-    KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub ); 
- 
-    if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) { 
-        // 
-        // Run through the __kmp_threads array and set the num threads icv 
-        // for each root thread that is currently registered with the RTL 
-        // (which has not already explicitly set its nthreads-var with a 
-        // call to omp_set_num_threads()). 
-        // 
-        for ( i = 0; i < __kmp_threads_capacity; i++ ) { 
-            kmp_info_t *thread = __kmp_threads[ i ]; 
-            if ( thread == NULL ) continue; 
-            if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue; 
- 
-            set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth ); 
-        } 
-    } 
-    KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 
-      __kmp_dflt_team_nth) ); 
- 
-#ifdef KMP_ADJUST_BLOCKTIME 
-    /* Adjust blocktime to zero if necessary */ 
-    /* now that __kmp_avail_proc is set      */ 
-    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 
-        KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 
-        if ( __kmp_nth > __kmp_avail_proc ) { 
-            __kmp_zero_bt = TRUE; 
-        } 
-    } 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-    /* we have finished middle initialization */ 
-    TCW_SYNC_4(__kmp_init_middle, TRUE); 
- 
-    KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) ); 
-} 
- 
-void 
-__kmp_middle_initialize( void ) 
-{ 
-    if ( __kmp_init_middle ) { 
-        return; 
-    } 
-    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
-    if ( __kmp_init_middle ) { 
-        __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-        return; 
-    } 
-    __kmp_do_middle_initialize(); 
-    __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-} 
- 
-void 
-__kmp_parallel_initialize( void ) 
-{ 
-    int gtid = __kmp_entry_gtid();      // this might be a new root 
- 
-    /* synchronize parallel initialization (for sibling) */ 
-    if( TCR_4(__kmp_init_parallel) ) return; 
-    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
-    if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; } 
- 
-    /* TODO reinitialization after we have already shut down */ 
-    if( TCR_4(__kmp_global.g.g_done) ) { 
-        KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) ); 
-        __kmp_infinite_loop(); 
-    } 
- 
-    /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize 
-           would cause a deadlock.  So we call __kmp_do_serial_initialize directly. 
-    */ 
-    if( !__kmp_init_middle ) { 
-        __kmp_do_middle_initialize(); 
-    } 
- 
-    /* begin initialization */ 
-    KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) ); 
-    KMP_ASSERT( KMP_UBER_GTID( gtid ) ); 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    // 
-    // Save the FP control regs. 
-    // Worker threads will set theirs to these values at thread startup. 
-    // 
-    __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); 
-    __kmp_store_mxcsr( &__kmp_init_mxcsr ); 
-    __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-#if KMP_OS_UNIX 
-# if KMP_HANDLE_SIGNALS 
-    /*  must be after __kmp_serial_initialize  */ 
-    __kmp_install_signals( TRUE ); 
-# endif 
-#endif 
- 
-    __kmp_suspend_initialize(); 
- 
-#  if defined(USE_LOAD_BALANCE) 
-    if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 
-        __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 
-    } 
-#else 
-    if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 
-        __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 
-    } 
-#endif 
- 
-    if ( __kmp_version ) { 
-        __kmp_print_version_2(); 
-    } 
- 
-    /* we have finished parallel initialization */ 
-    TCW_SYNC_4(__kmp_init_parallel, TRUE); 
- 
-    KMP_MB(); 
-    KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) ); 
- 
-    __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, 
-  kmp_team_t *team ) 
-{ 
-    kmp_disp_t *dispatch; 
- 
-    KMP_MB(); 
- 
-    /* none of the threads have encountered any constructs, yet. */ 
-    this_thr->th.th_local.this_construct = 0; 
-#if KMP_CACHE_MANAGE 
-    KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived ); 
-#endif /* KMP_CACHE_MANAGE */ 
-    dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 
-    KMP_DEBUG_ASSERT( dispatch ); 
-    KMP_DEBUG_ASSERT( team->t.t_dispatch ); 
-    //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] ); 
- 
-    dispatch->th_disp_index = 0;    /* reset the dispatch buffer counter */ 
- 
-    if( __kmp_env_consistency_check ) 
-        __kmp_push_parallel( gtid, team->t.t_ident ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-} 
- 
-void 
-__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, 
-  kmp_team_t *team ) 
-{ 
-    if( __kmp_env_consistency_check ) 
-        __kmp_pop_parallel( gtid, team->t.t_ident ); 
-} 
- 
-int 
-__kmp_invoke_task_func( int gtid ) 
-{ 
-    int          rc; 
-    int          tid      = __kmp_tid_from_gtid( gtid ); 
-    kmp_info_t  *this_thr = __kmp_threads[ gtid ]; 
-    kmp_team_t  *team     = this_thr->th.th_team; 
- 
-    __kmp_run_before_invoked_task( gtid, tid, this_thr, team ); 
-#if USE_ITT_BUILD 
-    if ( __itt_stack_caller_create_ptr ) { 
-        __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code 
-    } 
-#endif /* USE_ITT_BUILD */ 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_INVOKING(); 
-#endif 
- 
-#if OMPT_SUPPORT 
-    void *dummy; 
-    void **exit_runtime_p; 
-    ompt_task_id_t my_task_id; 
-    ompt_parallel_id_t my_parallel_id; 
- 
-    if (ompt_enabled) { 
-        exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]. 
-            ompt_task_info.frame.exit_runtime_frame); 
-    } else { 
-        exit_runtime_p = &dummy; 
-    } 
- 
-#if OMPT_TRACE 
-    my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; 
-    my_parallel_id = team->t.ompt_team_info.parallel_id; 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 
-        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 
-            my_parallel_id, my_task_id); 
-    } 
-#endif 
-#endif 
- 
-    { 
-        KMP_TIME_BLOCK(OMP_work); 
-        rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn), 
-                                     gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv 
-#if OMPT_SUPPORT 
-                                     , exit_runtime_p 
-#endif 
-                                     ); 
-    } 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled) { 
-        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 
-            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 
-                my_parallel_id, my_task_id); 
-        } 
-        // the implicit task is not dead yet, so we can't clear its task id here 
-        team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0; 
-    } 
-#endif 
- 
-#if USE_ITT_BUILD 
-    if ( __itt_stack_caller_create_ptr ) { 
-        __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code 
-    } 
-#endif /* USE_ITT_BUILD */ 
-    __kmp_run_after_invoked_task( gtid, tid, this_thr, team ); 
- 
-    return rc; 
-} 
- 
-#if OMP_40_ENABLED 
-void 
-__kmp_teams_master( int gtid ) 
-{ 
-    // This routine is called by all master threads in teams construct 
-    kmp_info_t *thr = __kmp_threads[ gtid ]; 
-    kmp_team_t *team = thr->th.th_team; 
-    ident_t     *loc =  team->t.t_ident; 
-    thr->th.th_set_nproc = thr->th.th_teams_size.nth; 
-    KMP_DEBUG_ASSERT( thr->th.th_teams_microtask ); 
-    KMP_DEBUG_ASSERT( thr->th.th_set_nproc ); 
-    KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", 
-                   gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) ); 
-    // Launch league of teams now, but not let workers execute 
-    // (they hang on fork barrier until next parallel) 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_FORKING(); 
-#endif 
-    __kmp_fork_call( loc, gtid, fork_context_intel, 
-            team->t.t_argc, 
-#if OMPT_SUPPORT 
-            (void *)thr->th.th_teams_microtask,      // "unwrapped" task 
-#endif 
-            (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 
-            VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 
-            NULL ); 
-#if INCLUDE_SSC_MARKS 
-    SSC_MARK_JOINING(); 
-#endif 
-     
-    // AC: last parameter "1" eliminates join barrier which won't work because 
-    // worker threads are in a fork barrier waiting for more parallel regions 
-    __kmp_join_call( loc, gtid 
-#if OMPT_SUPPORT 
-        , fork_context_intel 
-#endif 
-        , 1 );  
-} 
- 
-int 
-__kmp_invoke_teams_master( int gtid ) 
-{ 
-    kmp_info_t  *this_thr = __kmp_threads[ gtid ]; 
-    kmp_team_t  *team     = this_thr->th.th_team; 
-    #if KMP_DEBUG 
-    if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized ) 
-        KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master ); 
-    #endif 
-    __kmp_run_before_invoked_task( gtid, 0, this_thr, team ); 
-    __kmp_teams_master( gtid ); 
-    __kmp_run_after_invoked_task( gtid, 0, this_thr, team ); 
-    return 1; 
-} 
-#endif /* OMP_40_ENABLED */ 
- 
-/* this sets the requested number of threads for the next parallel region 
- * encountered by this team */ 
-/* since this should be enclosed in the forkjoin critical section it 
- * should avoid race conditions with assymmetrical nested parallelism */ 
- 
-void 
-__kmp_push_num_threads( ident_t *id, int gtid, int num_threads ) 
-{ 
-    kmp_info_t *thr = __kmp_threads[gtid]; 
- 
-    if( num_threads > 0 ) 
-        thr->th.th_set_nproc = num_threads; 
-} 
- 
-#if OMP_40_ENABLED 
- 
-/* this sets the requested number of teams for the teams region and/or 
- * the number of threads for the next parallel region encountered  */ 
-void 
-__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads ) 
-{ 
-    kmp_info_t *thr = __kmp_threads[gtid]; 
-    KMP_DEBUG_ASSERT(num_teams >= 0); 
-    KMP_DEBUG_ASSERT(num_threads >= 0); 
- 
-    if( num_teams == 0 ) 
-        num_teams = 1;    // default number of teams is 1. 
-    if( num_teams > __kmp_max_nth ) { // if too many teams requested? 
-        if ( !__kmp_reserve_warn ) { 
-            __kmp_reserve_warn = 1; 
-            __kmp_msg( 
-                kmp_ms_warning, 
-                KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ), 
-                KMP_HNT( Unset_ALL_THREADS ), 
-                __kmp_msg_null 
-            ); 
-        } 
-        num_teams = __kmp_max_nth; 
-    } 
-    // Set number of teams (number of threads in the outer "parallel" of the teams) 
-    thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 
- 
-    // Remember the number of threads for inner parallel regions 
-    if( num_threads == 0 ) { 
-        if( !TCR_4(__kmp_init_middle) ) 
-            __kmp_middle_initialize();  // get __kmp_avail_proc calculated 
-        num_threads = __kmp_avail_proc / num_teams; 
-        if( num_teams * num_threads > __kmp_max_nth ) { 
-            // adjust num_threads w/o warning as it is not user setting 
-            num_threads = __kmp_max_nth / num_teams; 
-        } 
-    } else { 
-        if( num_teams * num_threads > __kmp_max_nth ) { 
-            int new_threads = __kmp_max_nth / num_teams; 
-            if ( !__kmp_reserve_warn ) { // user asked for too many threads 
-                __kmp_reserve_warn = 1;  // that conflicts with OMP_THREAD_LIMIT 
-                __kmp_msg( 
-                    kmp_ms_warning, 
-                    KMP_MSG( CantFormThrTeam, num_threads, new_threads ), 
-                    KMP_HNT( Unset_ALL_THREADS ), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            num_threads = new_threads; 
-        } 
-    } 
-    thr->th.th_teams_size.nth = num_threads; 
-} 
- 
- 
-// 
-// Set the proc_bind var to use in the following parallel region. 
-// 
-void 
-__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind ) 
-{ 
-    kmp_info_t *thr = __kmp_threads[gtid]; 
-    thr->th.th_set_proc_bind = proc_bind; 
-} 
- 
-#endif /* OMP_40_ENABLED */ 
- 
-/* Launch the worker threads into the microtask. */ 
- 
-void 
-__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ) 
-{ 
-    kmp_info_t *this_thr = __kmp_threads[gtid]; 
- 
-#ifdef KMP_DEBUG 
-    int f; 
-#endif /* KMP_DEBUG */ 
- 
-    KMP_DEBUG_ASSERT( team ); 
-    KMP_DEBUG_ASSERT( this_thr->th.th_team  ==  team ); 
-    KMP_ASSERT(       KMP_MASTER_GTID(gtid) ); 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    team->t.t_construct = 0;          /* no single directives seen yet */ 
-    team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */ 
- 
-    /* Reset the identifiers on the dispatch buffer */ 
-    KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); 
-    if ( team->t.t_max_nproc > 1 ) { 
-        int i; 
-        for (i = 0; i <  KMP_MAX_DISP_BUF; ++i) 
-            team->t.t_disp_buffer[ i ].buffer_index = i; 
-    } else { 
-        team->t.t_disp_buffer[ 0 ].buffer_index = 0; 
-    } 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    KMP_ASSERT( this_thr->th.th_team  ==  team ); 
- 
-#ifdef KMP_DEBUG 
-    for( f=0 ; f<team->t.t_nproc ; f++ ) { 
-        KMP_DEBUG_ASSERT( team->t.t_threads[f] && 
-                          team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 
-    } 
-#endif /* KMP_DEBUG */ 
- 
-    /* release the worker threads so they may begin working */ 
-    __kmp_fork_barrier( gtid, 0 ); 
-} 
- 
- 
-void 
-__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ) 
-{ 
-    kmp_info_t *this_thr = __kmp_threads[gtid]; 
- 
-    KMP_DEBUG_ASSERT( team ); 
-    KMP_DEBUG_ASSERT( this_thr->th.th_team  ==  team ); 
-    KMP_ASSERT(       KMP_MASTER_GTID(gtid) ); 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    /* Join barrier after fork */ 
- 
-#ifdef KMP_DEBUG 
-    if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) { 
-        __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]); 
-        __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n", 
-                     gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc); 
-        __kmp_print_structure(); 
-    } 
-    KMP_DEBUG_ASSERT( __kmp_threads[gtid] && 
-                     __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc ); 
-#endif /* KMP_DEBUG */ 
- 
-    __kmp_join_barrier( gtid );  /* wait for everyone */ 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    KMP_ASSERT( this_thr->th.th_team  ==  team ); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef USE_LOAD_BALANCE 
- 
-// 
-// Return the worker threads actively spinning in the hot team, if we 
-// are at the outermost level of parallelism.  Otherwise, return 0. 
-// 
-static int 
-__kmp_active_hot_team_nproc( kmp_root_t *root ) 
-{ 
-    int i; 
-    int retval; 
-    kmp_team_t *hot_team; 
- 
-    if ( root->r.r_active ) { 
-        return 0; 
-    } 
-    hot_team = root->r.r_hot_team; 
-    if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { 
-        return hot_team->t.t_nproc - 1;  // Don't count master thread 
-    } 
- 
-    // 
-    // Skip the master thread - it is accounted for elsewhere. 
-    // 
-    retval = 0; 
-    for ( i = 1; i < hot_team->t.t_nproc; i++ ) { 
-        if ( hot_team->t.t_threads[i]->th.th_active ) { 
-            retval++; 
-        } 
-    } 
-    return retval; 
-} 
- 
-// 
-// Perform an automatic adjustment to the number of 
-// threads used by the next parallel region. 
-// 
-static int 
-__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc ) 
-{ 
-    int retval; 
-    int pool_active; 
-    int hot_team_active; 
-    int team_curr_active; 
-    int system_active; 
- 
-    KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", 
-                root, set_nproc ) ); 
-    KMP_DEBUG_ASSERT( root ); 
-    KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE ); 
-    KMP_DEBUG_ASSERT( set_nproc > 1 ); 
- 
-    if ( set_nproc == 1) { 
-        KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) ); 
-        return 1; 
-    } 
- 
-    // 
-    // Threads that are active in the thread pool, active in the hot team 
-    // for this particular root (if we are at the outer par level), and 
-    // the currently executing thread (to become the master) are available 
-    // to add to the new team, but are currently contributing to the system 
-    // load, and must be accounted for. 
-    // 
-    pool_active = TCR_4(__kmp_thread_pool_active_nth); 
-    hot_team_active = __kmp_active_hot_team_nproc( root ); 
-    team_curr_active = pool_active + hot_team_active + 1; 
- 
-    // 
-    // Check the system load. 
-    // 
-    system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active ); 
-    KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n", 
-      system_active, pool_active, hot_team_active ) ); 
- 
-    if ( system_active < 0 ) { 
-        // 
-        // There was an error reading the necessary info from /proc, 
-        // so use the thread limit algorithm instead.  Once we set 
-        // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit, 
-        // we shouldn't wind up getting back here. 
-        // 
-        __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 
-        KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" ); 
- 
-        // 
-        // Make this call behave like the thread limit algorithm. 
-        // 
-        retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 
-          : root->r.r_hot_team->t.t_nproc); 
-        if ( retval > set_nproc ) { 
-            retval = set_nproc; 
-        } 
-        if ( retval < KMP_MIN_NTH ) { 
-            retval = KMP_MIN_NTH; 
-        } 
- 
-        KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) ); 
-        return retval; 
-    } 
- 
-    // 
-    // There is a slight delay in the load balance algorithm in detecting 
-    // new running procs.  The real system load at this instant should be 
-    // at least as large as the #active omp thread that are available to 
-    // add to the team. 
-    // 
-    if ( system_active < team_curr_active ) { 
-        system_active = team_curr_active; 
-    } 
-    retval = __kmp_avail_proc - system_active + team_curr_active; 
-    if ( retval > set_nproc ) { 
-        retval = set_nproc; 
-    } 
-    if ( retval < KMP_MIN_NTH ) { 
-        retval = KMP_MIN_NTH; 
-    } 
- 
-    KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) ); 
-    return retval; 
-} // __kmp_load_balance_nproc() 
- 
-#endif /* USE_LOAD_BALANCE */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* NOTE: this is called with the __kmp_init_lock held */ 
-void 
-__kmp_cleanup( void ) 
-{ 
-    int f; 
- 
-    KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) ); 
- 
-    if (TCR_4(__kmp_init_parallel)) { 
-#if KMP_HANDLE_SIGNALS 
-        __kmp_remove_signals(); 
-#endif 
-        TCW_4(__kmp_init_parallel, FALSE); 
-    } 
- 
-    if (TCR_4(__kmp_init_middle)) { 
-#if KMP_AFFINITY_SUPPORTED 
-        __kmp_affinity_uninitialize(); 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
-        __kmp_cleanup_hierarchy(); 
-        TCW_4(__kmp_init_middle, FALSE); 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) ); 
- 
-    if (__kmp_init_serial) { 
-        __kmp_runtime_destroy(); 
-        __kmp_init_serial = FALSE; 
-    } 
- 
-    for ( f = 0; f < __kmp_threads_capacity; f++ ) { 
-        if ( __kmp_root[ f ] != NULL ) { 
-            __kmp_free( __kmp_root[ f ] ); 
-            __kmp_root[ f ] = NULL; 
-        } 
-    } 
-    __kmp_free( __kmp_threads ); 
-    // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in 
-    // freeing __kmp_root. 
-    __kmp_threads = NULL; 
-    __kmp_root    = NULL; 
-    __kmp_threads_capacity = 0; 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-    __kmp_cleanup_indirect_user_locks(); 
-#else 
-    __kmp_cleanup_user_locks(); 
-#endif 
- 
-    #if KMP_AFFINITY_SUPPORTED 
-        KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); 
-        __kmp_cpuinfo_file = NULL; 
-    #endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-   #if KMP_USE_ADAPTIVE_LOCKS 
-   #if KMP_DEBUG_ADAPTIVE_LOCKS 
-       __kmp_print_speculative_stats(); 
-   #endif 
-   #endif 
-    KMP_INTERNAL_FREE( __kmp_nested_nth.nth ); 
-    __kmp_nested_nth.nth = NULL; 
-    __kmp_nested_nth.size = 0; 
-    __kmp_nested_nth.used = 0; 
- 
-    __kmp_i18n_catclose(); 
- 
-#if KMP_STATS_ENABLED 
-    __kmp_accumulate_stats_at_exit(); 
-    __kmp_stats_list.deallocate(); 
-#endif 
- 
-    KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-int 
-__kmp_ignore_mppbeg( void ) 
-{ 
-    char *env; 
- 
-    if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) { 
-        if (__kmp_str_match_false( env )) 
-            return FALSE; 
-    } 
-    // By default __kmpc_begin() is no-op. 
-    return TRUE; 
-} 
- 
-int 
-__kmp_ignore_mppend( void ) 
-{ 
-    char *env; 
- 
-    if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) { 
-        if (__kmp_str_match_false( env )) 
-            return FALSE; 
-    } 
-    // By default __kmpc_end() is no-op. 
-    return TRUE; 
-} 
- 
-void 
-__kmp_internal_begin( void ) 
-{ 
-    int gtid; 
-    kmp_root_t *root; 
- 
-    /* this is a very important step as it will register new sibling threads 
-     * and assign these new uber threads a new gtid */ 
-    gtid = __kmp_entry_gtid(); 
-    root = __kmp_threads[ gtid ]->th.th_root; 
-    KMP_ASSERT( KMP_UBER_GTID( gtid )); 
- 
-    if( root->r.r_begin ) return; 
-    __kmp_acquire_lock( &root->r.r_begin_lock, gtid ); 
-    if( root->r.r_begin ) { 
-        __kmp_release_lock( & root->r.r_begin_lock, gtid ); 
-        return; 
-    } 
- 
-    root->r.r_begin = TRUE; 
- 
-    __kmp_release_lock( & root->r.r_begin_lock, gtid ); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_user_set_library (enum library_type arg) 
-{ 
-    int gtid; 
-    kmp_root_t *root; 
-    kmp_info_t *thread; 
- 
-    /* first, make sure we are initialized so we can get our gtid */ 
- 
-    gtid = __kmp_entry_gtid(); 
-    thread = __kmp_threads[ gtid ]; 
- 
-    root = thread->th.th_root; 
- 
-    KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial )); 
-    if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */ 
-        KMP_WARNING( SetLibraryIncorrectCall ); 
-        return; 
-    } 
- 
-    switch ( arg ) { 
-    case library_serial : 
-        thread->th.th_set_nproc = 0; 
-        set__nproc( thread, 1 ); 
-        break; 
-    case library_turnaround : 
-        thread->th.th_set_nproc = 0; 
-        set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); 
-        break; 
-    case library_throughput : 
-        thread->th.th_set_nproc = 0; 
-        set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); 
-        break; 
-    default: 
-        KMP_FATAL( UnknownLibraryType, arg ); 
-    } 
- 
-    __kmp_aux_set_library ( arg ); 
-} 
- 
-void 
-__kmp_aux_set_stacksize( size_t arg ) 
-{ 
-    if (! __kmp_init_serial) 
-        __kmp_serial_initialize(); 
- 
-#if KMP_OS_DARWIN 
-    if (arg & (0x1000 - 1)) { 
-        arg &= ~(0x1000 - 1); 
-        if(arg + 0x1000) /* check for overflow if we round up */ 
-            arg += 0x1000; 
-    } 
-#endif 
-    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 
- 
-    /* only change the default stacksize before the first parallel region */ 
-    if (! TCR_4(__kmp_init_parallel)) { 
-        size_t value = arg;       /* argument is in bytes */ 
- 
-        if (value < __kmp_sys_min_stksize ) 
-            value = __kmp_sys_min_stksize ; 
-        else if (value > KMP_MAX_STKSIZE) 
-            value = KMP_MAX_STKSIZE; 
- 
-        __kmp_stksize = value; 
- 
-        __kmp_env_stksize = TRUE;    /* was KMP_STACKSIZE specified? */ 
-    } 
- 
-    __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 
-} 
- 
-/* set the behaviour of the runtime library */ 
-/* TODO this can cause some odd behaviour with sibling parallelism... */ 
-void 
-__kmp_aux_set_library (enum library_type arg) 
-{ 
-    __kmp_library = arg; 
- 
-    switch ( __kmp_library ) { 
-    case library_serial : 
-        { 
-            KMP_INFORM( LibraryIsSerial ); 
-            (void) __kmp_change_library( TRUE ); 
-        } 
-        break; 
-    case library_turnaround : 
-        (void) __kmp_change_library( TRUE ); 
-        break; 
-    case library_throughput : 
-        (void) __kmp_change_library( FALSE ); 
-        break; 
-    default: 
-        KMP_FATAL( UnknownLibraryType, arg ); 
-    } 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid) 
-{ 
-    int blocktime = arg;        /* argument is in milliseconds */ 
-    int bt_intervals; 
-    int bt_set; 
- 
-    __kmp_save_internal_controls( thread ); 
- 
-    /* Normalize and set blocktime for the teams */ 
-    if (blocktime < KMP_MIN_BLOCKTIME) 
-        blocktime = KMP_MIN_BLOCKTIME; 
-    else if (blocktime > KMP_MAX_BLOCKTIME) 
-        blocktime = KMP_MAX_BLOCKTIME; 
- 
-    set__blocktime_team( thread->th.th_team, tid, blocktime ); 
-    set__blocktime_team( thread->th.th_serial_team, 0, blocktime ); 
- 
-    /* Calculate and set blocktime intervals for the teams */ 
-    bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 
- 
-    set__bt_intervals_team( thread->th.th_team, tid, bt_intervals ); 
-    set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals ); 
- 
-    /* Set whether blocktime has been set to "TRUE" */ 
-    bt_set = TRUE; 
- 
-    set__bt_set_team( thread->th.th_team, tid, bt_set ); 
-    set__bt_set_team( thread->th.th_serial_team, 0, bt_set ); 
-    KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n", 
-                  __kmp_gtid_from_tid(tid, thread->th.th_team), 
-                  thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) ); 
-} 
- 
-void 
-__kmp_aux_set_defaults( 
-    char const * str, 
-    int          len 
-) { 
-    if ( ! __kmp_init_serial ) { 
-        __kmp_serial_initialize(); 
-    }; 
-    __kmp_env_initialize( str ); 
- 
-    if (__kmp_settings 
-#if OMP_40_ENABLED 
-        || __kmp_display_env || __kmp_display_env_verbose 
-#endif // OMP_40_ENABLED 
-        ) { 
-        __kmp_env_print(); 
-    } 
-} // __kmp_aux_set_defaults 
- 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * internal fast reduction routines 
- */ 
- 
-PACKED_REDUCTION_METHOD_T 
-__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, 
-        kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 
-        kmp_critical_name *lck ) 
-{ 
- 
-    // Default reduction method: critical construct ( lck != NULL, like in current PAROPT ) 
-    // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL 
-    // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL 
-    // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT. 
- 
-    PACKED_REDUCTION_METHOD_T retval; 
- 
-    int team_size; 
- 
-    KMP_DEBUG_ASSERT( loc );    // it would be nice to test ( loc != 0 ) 
-    KMP_DEBUG_ASSERT( lck );    // it would be nice to test ( lck != 0 ) 
- 
-    #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 
-    #define FAST_REDUCTION_TREE_METHOD_GENERATED   ( ( reduce_data ) && ( reduce_func ) ) 
- 
-    retval = critical_reduce_block; 
- 
-    team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower 
- 
-    if( team_size == 1 ) { 
- 
-        retval = empty_reduce_block; 
- 
-    } else { 
- 
-        int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 
-        int tree_available   = FAST_REDUCTION_TREE_METHOD_GENERATED; 
- 
-        #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 
- 
-            #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 
- 
-	    int teamsize_cutoff = 4; 
- 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-                if( __kmp_mic_type != non_mic ) { 
-                    teamsize_cutoff = 8; 
-                } 
-#endif 
-                if( tree_available ) { 
-                    if( team_size <= teamsize_cutoff ) { 
-                        if ( atomic_available ) { 
-                            retval = atomic_reduce_block; 
-                        } 
-                    } else { 
-                        retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 
-                    } 
-                } else if ( atomic_available ) { 
-                    retval = atomic_reduce_block; 
-                } 
-            #else 
-                #error "Unknown or unsupported OS" 
-            #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 
- 
-        #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH 
- 
-            #if KMP_OS_LINUX || KMP_OS_WINDOWS 
- 
-                // basic tuning 
- 
-                if( atomic_available ) { 
-                    if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ??? 
-                        retval = atomic_reduce_block; 
-                    } 
-                } // otherwise: use critical section 
- 
-            #elif KMP_OS_DARWIN 
- 
-                if( atomic_available && ( num_vars <= 3 ) ) { 
-                        retval = atomic_reduce_block; 
-                } else if( tree_available ) { 
-                    if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) { 
-                        retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 
-                    } 
-                } // otherwise: use critical section 
- 
-            #else 
-                #error "Unknown or unsupported OS" 
-            #endif 
- 
-        #else 
-            #error "Unknown or unsupported architecture" 
-        #endif 
- 
-    } 
- 
-    // KMP_FORCE_REDUCTION 
- 
-    // If the team is serialized (team_size == 1), ignore the forced reduction 
-    // method and stay with the unsynchronized method (empty_reduce_block) 
-    if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) { 
- 
-        PACKED_REDUCTION_METHOD_T forced_retval; 
- 
-        int atomic_available, tree_available; 
- 
-        switch( ( forced_retval = __kmp_force_reduction_method ) ) 
-        { 
-            case critical_reduce_block: 
-                KMP_ASSERT( lck );              // lck should be != 0 
-                break; 
- 
-            case atomic_reduce_block: 
-                atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 
-                KMP_ASSERT( atomic_available ); // atomic_available should be != 0 
-                break; 
- 
-            case tree_reduce_block: 
-                tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 
-                KMP_ASSERT( tree_available );   // tree_available should be != 0 
-                #if KMP_FAST_REDUCTION_BARRIER 
-                forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 
-                #endif 
-                break; 
- 
-            default: 
-                KMP_ASSERT( 0 ); // "unsupported method specified" 
-        } 
- 
-        retval = forced_retval; 
-    } 
- 
-    KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) ); 
- 
-    #undef FAST_REDUCTION_TREE_METHOD_GENERATED 
-    #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 
- 
-    return ( retval ); 
-} 
- 
-// this function is for testing set/get/determine reduce method 
-kmp_int32 
-__kmp_get_reduce_method( void ) { 
-    return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
+                    if ( ! __kmp_str_match_true( duplicate_ok ) ) {
+                        // That's not allowed. Issue fatal error.
+                        __kmp_msg(
+                            kmp_ms_fatal,
+                            KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
+                            KMP_HNT( DuplicateLibrary ),
+                            __kmp_msg_null
+                        );
+                    }; // if
+                    KMP_INTERNAL_FREE( duplicate_ok );
+                    __kmp_duplicate_library_ok = 1;
+                    done = 1;    // Exit the loop.
+                } break;
+                case 2 : {    // Neighbor is dead.
+                    // Clear the variable and try to register library again.
+                    __kmp_env_unset( name );
+                }  break;
+                default : {
+                    KMP_DEBUG_ASSERT( 0 );
+                } break;
+            }; // switch
+
+        }; // if
+        KMP_INTERNAL_FREE( (void *) value );
+
+    }; // while
+    KMP_INTERNAL_FREE( (void *) name );
+
+} // func __kmp_register_library_startup
+
+
+void
+__kmp_unregister_library( void ) {
+
+    char * name  = __kmp_reg_status_name();
+    char * value = __kmp_env_get( name );
+
+    KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
+    KMP_DEBUG_ASSERT( __kmp_registration_str  != NULL );
+    if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
+        // Ok, this is our variable. Delete it.
+        __kmp_env_unset( name );
+    }; // if
+
+    KMP_INTERNAL_FREE( __kmp_registration_str );
+    KMP_INTERNAL_FREE( value );
+    KMP_INTERNAL_FREE( name );
+
+    __kmp_registration_flag = 0;
+    __kmp_registration_str  = NULL;
+
+} // __kmp_unregister_library
+
+
+// End of Library registration stuff.
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+
+static void __kmp_check_mic_type()
+{
+    kmp_cpuid_t cpuid_state = {0};
+    kmp_cpuid_t * cs_p = &cpuid_state;
+    __kmp_x86_cpuid(1, 0, cs_p);
+    // We don't support mic1 at the moment
+    if( (cs_p->eax & 0xff0) == 0xB10 ) {
+        __kmp_mic_type = mic2;
+    } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
+        __kmp_mic_type = mic3;
+    } else {
+        __kmp_mic_type = non_mic;
+    }
+}
+
+#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
+
+static void
+__kmp_do_serial_initialize( void )
+{
+    int i, gtid;
+    int size;
+
+    KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) );
+
+    KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
+    KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
+    KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
+    KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
+    KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
+
+#if OMPT_SUPPORT
+    ompt_pre_init();
+#endif
+
+    __kmp_validate_locks();
+
+    /* Initialize internal memory allocator */
+    __kmp_init_allocator();
+
+    /* Register the library startup via an environment variable
+       and check to see whether another copy of the library is already
+       registered. */
+
+    __kmp_register_library_startup( );
+
+    /* TODO reinitialization of library */
+    if( TCR_4(__kmp_global.g.g_done) ) {
+       KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
+    }
+
+    __kmp_global.g.g_abort = 0;
+    TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
+
+    /* initialize the locks */
+#if KMP_USE_ADAPTIVE_LOCKS
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+    __kmp_init_speculative_stats();
+#endif
+#endif
+#if KMP_STATS_ENABLED
+    __kmp_init_tas_lock( & __kmp_stats_lock );
+#endif
+    __kmp_init_lock( & __kmp_global_lock     );
+    __kmp_init_queuing_lock( & __kmp_dispatch_lock );
+    __kmp_init_lock( & __kmp_debug_lock      );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock     );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_1i  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_2i  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_4i  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_4r  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_8i  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_8r  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_8c  );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
+    __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
+    __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock  );
+    __kmp_init_bootstrap_lock( & __kmp_exit_lock      );
+    __kmp_init_bootstrap_lock( & __kmp_monitor_lock   );
+    __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
+
+    /* conduct initialization and initial setup of configuration */
+
+    __kmp_runtime_initialize();
+
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+    __kmp_check_mic_type();
+#endif
+
+    // Some global variable initialization moved here from kmp_env_initialize()
+#ifdef KMP_DEBUG
+    kmp_diag = 0;
+#endif
+    __kmp_abort_delay = 0;
+
+    // From __kmp_init_dflt_team_nth()
+    /* assume the entire machine will be used */
+    __kmp_dflt_team_nth_ub = __kmp_xproc;
+    if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
+        __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
+    }
+    if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
+        __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
+    }
+    __kmp_max_nth = __kmp_sys_max_nth;
+
+    // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
+    __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
+    __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
+    __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
+    // From "KMP_LIBRARY" part of __kmp_env_initialize()
+    __kmp_library = library_throughput;
+    // From KMP_SCHEDULE initialization
+    __kmp_static = kmp_sch_static_balanced;
+    // AC: do not use analytical here, because it is non-monotonous
+    //__kmp_guided = kmp_sch_guided_iterative_chunked;
+    //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
+    // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
+    // control parts
+    #if KMP_FAST_REDUCTION_BARRIER
+        #define kmp_reduction_barrier_gather_bb ((int)1)
+        #define kmp_reduction_barrier_release_bb ((int)1)
+        #define kmp_reduction_barrier_gather_pat bp_hyper_bar
+        #define kmp_reduction_barrier_release_pat bp_hyper_bar
+    #endif // KMP_FAST_REDUCTION_BARRIER
+    for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
+        __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
+        __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
+        __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
+        __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
+        #if KMP_FAST_REDUCTION_BARRIER
+        if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
+            __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
+            __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
+            __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
+            __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
+        }
+        #endif // KMP_FAST_REDUCTION_BARRIER
+    }
+    #if KMP_FAST_REDUCTION_BARRIER
+        #undef kmp_reduction_barrier_release_pat
+        #undef kmp_reduction_barrier_gather_pat
+        #undef kmp_reduction_barrier_release_bb
+        #undef kmp_reduction_barrier_gather_bb
+    #endif // KMP_FAST_REDUCTION_BARRIER
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+    if( __kmp_mic_type != non_mic ) {
+        // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
+        __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;  // plain gather
+        __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;  // forkjoin release
+        __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
+        __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
+    }
+#if KMP_FAST_REDUCTION_BARRIER
+    if( __kmp_mic_type != non_mic ) {
+        __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
+        __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
+    }
+#endif
+#endif
+
+    // From KMP_CHECKS initialization
+#ifdef KMP_DEBUG
+    __kmp_env_checks = TRUE;   /* development versions have the extra checks */
+#else
+    __kmp_env_checks = FALSE;  /* port versions do not have the extra checks */
+#endif
+
+    // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
+    __kmp_foreign_tp = TRUE;
+
+    __kmp_global.g.g_dynamic = FALSE;
+    __kmp_global.g.g_dynamic_mode = dynamic_default;
+
+    __kmp_env_initialize( NULL );
+
+    // Print all messages in message catalog for testing purposes.
+    #ifdef KMP_DEBUG
+        char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
+        if ( __kmp_str_match_true( val ) ) {
+            kmp_str_buf_t buffer;
+            __kmp_str_buf_init( & buffer );
+            __kmp_i18n_dump_catalog( & buffer );
+            __kmp_printf( "%s", buffer.str );
+            __kmp_str_buf_free( & buffer );
+        }; // if
+        __kmp_env_free( & val );
+    #endif
+
+    __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
+    // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
+    __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
+
+    // If the library is shut down properly, both pools must be NULL. Just in case, set them
+    // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
+    KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
+    KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
+    KMP_DEBUG_ASSERT( __kmp_team_pool   == NULL );
+    __kmp_thread_pool = NULL;
+    __kmp_thread_pool_insert_pt = NULL;
+    __kmp_team_pool   = NULL;
+
+    /* Allocate all of the variable sized records */
+    /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
+    /* Since allocation is cache-aligned, just add extra padding at the end */
+    size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
+    __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
+    __kmp_root    = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
+
+    /* init thread counts */
+    KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
+    KMP_DEBUG_ASSERT( __kmp_nth == 0 );     // something was wrong in termination.
+    __kmp_all_nth = 0;
+    __kmp_nth     = 0;
+
+    /* setup the uber master thread and hierarchy */
+    gtid = __kmp_register_root( TRUE );
+    KA_TRACE( 10, ("__kmp_do_serial_initialize  T#%d\n", gtid ));
+    KMP_ASSERT( KMP_UBER_GTID( gtid ) );
+    KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    __kmp_common_initialize();
+
+    #if KMP_OS_UNIX
+        /* invoke the child fork handler */
+        __kmp_register_atfork();
+    #endif
+
+    #if ! defined KMP_DYNAMIC_LIB
+        {
+            /* Invoke the exit handler when the program finishes, only for static library.
+               For dynamic library, we already have _fini and DllMain.
+             */
+            int rc = atexit( __kmp_internal_end_atexit );
+            if ( rc != 0 ) {
+                __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
+            }; // if
+        }
+    #endif
+
+    #if KMP_HANDLE_SIGNALS
+        #if KMP_OS_UNIX
+            /* NOTE: make sure that this is called before the user installs
+             *          their own signal handlers so that the user handlers
+             *          are called first.  this way they can return false,
+             *          not call our handler, avoid terminating the library,
+             *          and continue execution where they left off. */
+            __kmp_install_signals( FALSE );
+        #endif /* KMP_OS_UNIX */
+        #if KMP_OS_WINDOWS
+            __kmp_install_signals( TRUE );
+        #endif /* KMP_OS_WINDOWS */
+    #endif
+
+    /* we have finished the serial initialization */
+    __kmp_init_counter ++;
+
+    __kmp_init_serial = TRUE;
+
+    if (__kmp_settings) {
+        __kmp_env_print();
+    }
+
+#if OMP_40_ENABLED
+    if (__kmp_display_env || __kmp_display_env_verbose) {
+        __kmp_env_print_2();
+    }
+#endif // OMP_40_ENABLED
+
+#if OMPT_SUPPORT
+    ompt_post_init();
+#endif
+
+    KMP_MB();
+
+    KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
+}
+
+void
+__kmp_serial_initialize( void )
+{
+    if ( __kmp_init_serial ) {
+        return;
+    }
+    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+    if ( __kmp_init_serial ) {
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        return;
+    }
+    __kmp_do_serial_initialize();
+    __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+}
+
+static void
+__kmp_do_middle_initialize( void )
+{
+    int i, j;
+    int prev_dflt_team_nth;
+
+    if( !__kmp_init_serial ) {
+        __kmp_do_serial_initialize();
+    }
+
+    KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
+
+    //
+    // Save the previous value for the __kmp_dflt_team_nth so that
+    // we can avoid some reinitialization if it hasn't changed.
+    //
+    prev_dflt_team_nth = __kmp_dflt_team_nth;
+
+#if KMP_AFFINITY_SUPPORTED
+    //
+    // __kmp_affinity_initialize() will try to set __kmp_ncores to the
+    // number of cores on the machine.
+    //
+    __kmp_affinity_initialize();
+
+    //
+    // Run through the __kmp_threads array and set the affinity mask
+    // for each root thread that is currently registered with the RTL.
+    //
+    for ( i = 0; i < __kmp_threads_capacity; i++ ) {
+        if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
+            __kmp_affinity_set_init_mask( i, TRUE );
+        }
+    }
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+    KMP_ASSERT( __kmp_xproc > 0 );
+    if ( __kmp_avail_proc == 0 ) {
+        __kmp_avail_proc = __kmp_xproc;
+    }
+
+    // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
+    j = 0;
+    while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
+        __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
+        j++;
+    }
+
+    if ( __kmp_dflt_team_nth == 0 ) {
+#ifdef KMP_DFLT_NTH_CORES
+        //
+        // Default #threads = #cores
+        //
+        __kmp_dflt_team_nth = __kmp_ncores;
+        KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
+          __kmp_dflt_team_nth ) );
+#else
+        //
+        // Default #threads = #available OS procs
+        //
+        __kmp_dflt_team_nth = __kmp_avail_proc;
+        KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
+          __kmp_dflt_team_nth ) );
+#endif /* KMP_DFLT_NTH_CORES */
+    }
+
+    if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
+        __kmp_dflt_team_nth = KMP_MIN_NTH;
+    }
+    if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
+        __kmp_dflt_team_nth = __kmp_sys_max_nth;
+    }
+
+    //
+    // There's no harm in continuing if the following check fails,
+    // but it indicates an error in the previous logic.
+    //
+    KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
+
+    if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
+        //
+        // Run through the __kmp_threads array and set the num threads icv
+        // for each root thread that is currently registered with the RTL
+        // (which has not already explicitly set its nthreads-var with a
+        // call to omp_set_num_threads()).
+        //
+        for ( i = 0; i < __kmp_threads_capacity; i++ ) {
+            kmp_info_t *thread = __kmp_threads[ i ];
+            if ( thread == NULL ) continue;
+            if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
+
+            set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
+        }
+    }
+    KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
+      __kmp_dflt_team_nth) );
+
+#ifdef KMP_ADJUST_BLOCKTIME
+    /* Adjust blocktime to zero if necessary */
+    /* now that __kmp_avail_proc is set      */
+    if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
+        KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
+        if ( __kmp_nth > __kmp_avail_proc ) {
+            __kmp_zero_bt = TRUE;
+        }
+    }
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+    /* we have finished middle initialization */
+    TCW_SYNC_4(__kmp_init_middle, TRUE);
+
+    KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
+}
+
+void
+__kmp_middle_initialize( void )
+{
+    if ( __kmp_init_middle ) {
+        return;
+    }
+    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+    if ( __kmp_init_middle ) {
+        __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+        return;
+    }
+    __kmp_do_middle_initialize();
+    __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+}
+
+void
+__kmp_parallel_initialize( void )
+{
+    int gtid = __kmp_entry_gtid();      // this might be a new root
+
+    /* synchronize parallel initialization (for sibling) */
+    if( TCR_4(__kmp_init_parallel) ) return;
+    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+    if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
+
+    /* TODO reinitialization after we have already shut down */
+    if( TCR_4(__kmp_global.g.g_done) ) {
+        KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
+        __kmp_infinite_loop();
+    }
+
+    /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
+           would cause a deadlock.  So we call __kmp_do_serial_initialize directly.
+    */
+    if( !__kmp_init_middle ) {
+        __kmp_do_middle_initialize();
+    }
+
+    /* begin initialization */
+    KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
+    KMP_ASSERT( KMP_UBER_GTID( gtid ) );
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    //
+    // Save the FP control regs.
+    // Worker threads will set theirs to these values at thread startup.
+    //
+    __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
+    __kmp_store_mxcsr( &__kmp_init_mxcsr );
+    __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+#if KMP_OS_UNIX
+# if KMP_HANDLE_SIGNALS
+    /*  must be after __kmp_serial_initialize  */
+    __kmp_install_signals( TRUE );
+# endif
+#endif
+
+    __kmp_suspend_initialize();
+
+#  if defined(USE_LOAD_BALANCE)
+    if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
+        __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
+    }
+#else
+    if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
+        __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
+    }
+#endif
+
+    if ( __kmp_version ) {
+        __kmp_print_version_2();
+    }
+
+    /* we have finished parallel initialization */
+    TCW_SYNC_4(__kmp_init_parallel, TRUE);
+
+    KMP_MB();
+    KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
+
+    __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
+  kmp_team_t *team )
+{
+    kmp_disp_t *dispatch;
+
+    KMP_MB();
+
+    /* none of the threads have encountered any constructs, yet. */
+    this_thr->th.th_local.this_construct = 0;
+#if KMP_CACHE_MANAGE
+    KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
+#endif /* KMP_CACHE_MANAGE */
+    dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
+    KMP_DEBUG_ASSERT( dispatch );
+    KMP_DEBUG_ASSERT( team->t.t_dispatch );
+    //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
+
+    dispatch->th_disp_index = 0;    /* reset the dispatch buffer counter */
+
+    if( __kmp_env_consistency_check )
+        __kmp_push_parallel( gtid, team->t.t_ident );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+}
+
+void
+__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
+  kmp_team_t *team )
+{
+    if( __kmp_env_consistency_check )
+        __kmp_pop_parallel( gtid, team->t.t_ident );
+}
+
+int
+__kmp_invoke_task_func( int gtid )
+{
+    int          rc;
+    int          tid      = __kmp_tid_from_gtid( gtid );
+    kmp_info_t  *this_thr = __kmp_threads[ gtid ];
+    kmp_team_t  *team     = this_thr->th.th_team;
+
+    __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
+#if USE_ITT_BUILD
+    if ( __itt_stack_caller_create_ptr ) {
+        __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
+    }
+#endif /* USE_ITT_BUILD */
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_INVOKING();
+#endif
+
+#if OMPT_SUPPORT
+    void *dummy;
+    void **exit_runtime_p;
+    ompt_task_id_t my_task_id;
+    ompt_parallel_id_t my_parallel_id;
+
+    if (ompt_enabled) {
+        exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
+            ompt_task_info.frame.exit_runtime_frame);
+    } else {
+        exit_runtime_p = &dummy;
+    }
+
+#if OMPT_TRACE
+    my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
+    my_parallel_id = team->t.ompt_team_info.parallel_id;
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
+        ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
+            my_parallel_id, my_task_id);
+    }
+#endif
+#endif
+
+    {
+        KMP_TIME_BLOCK(OMP_work);
+        rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
+                                     gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
+#if OMPT_SUPPORT
+                                     , exit_runtime_p
+#endif
+                                     );
+    }
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled) {
+        if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
+            ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
+                my_parallel_id, my_task_id);
+        }
+        // the implicit task is not dead yet, so we can't clear its task id here
+        team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
+    }
+#endif
+
+#if USE_ITT_BUILD
+    if ( __itt_stack_caller_create_ptr ) {
+        __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
+    }
+#endif /* USE_ITT_BUILD */
+    __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
+
+    return rc;
+}
+
+#if OMP_40_ENABLED
+void
+__kmp_teams_master( int gtid )
+{
+    // This routine is called by all master threads in teams construct
+    kmp_info_t *thr = __kmp_threads[ gtid ];
+    kmp_team_t *team = thr->th.th_team;
+    ident_t     *loc =  team->t.t_ident;
+    thr->th.th_set_nproc = thr->th.th_teams_size.nth;
+    KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
+    KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
+    KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
+                   gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
+    // Launch league of teams now, but not let workers execute
+    // (they hang on fork barrier until next parallel)
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_FORKING();
+#endif
+    __kmp_fork_call( loc, gtid, fork_context_intel,
+            team->t.t_argc,
+#if OMPT_SUPPORT
+            (void *)thr->th.th_teams_microtask,      // "unwrapped" task
+#endif
+            (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
+            VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
+            NULL );
+#if INCLUDE_SSC_MARKS
+    SSC_MARK_JOINING();
+#endif
+    
+    // AC: last parameter "1" eliminates join barrier which won't work because
+    // worker threads are in a fork barrier waiting for more parallel regions
+    __kmp_join_call( loc, gtid
+#if OMPT_SUPPORT
+        , fork_context_intel
+#endif
+        , 1 ); 
+}
+
+int
+__kmp_invoke_teams_master( int gtid )
+{
+    kmp_info_t  *this_thr = __kmp_threads[ gtid ];
+    kmp_team_t  *team     = this_thr->th.th_team;
+    #if KMP_DEBUG
+    if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
+        KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
+    #endif
+    __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
+    __kmp_teams_master( gtid );
+    __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
+    return 1;
+}
+#endif /* OMP_40_ENABLED */
+
+/* this sets the requested number of threads for the next parallel region
+ * encountered by this team */
+/* since this should be enclosed in the forkjoin critical section it
+ * should avoid race conditions with assymmetrical nested parallelism */
+
+void
+__kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
+{
+    kmp_info_t *thr = __kmp_threads[gtid];
+
+    if( num_threads > 0 )
+        thr->th.th_set_nproc = num_threads;
+}
+
+#if OMP_40_ENABLED
+
+/* this sets the requested number of teams for the teams region and/or
+ * the number of threads for the next parallel region encountered  */
+void
+__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
+{
+    kmp_info_t *thr = __kmp_threads[gtid];
+    KMP_DEBUG_ASSERT(num_teams >= 0);
+    KMP_DEBUG_ASSERT(num_threads >= 0);
+
+    if( num_teams == 0 )
+        num_teams = 1;    // default number of teams is 1.
+    if( num_teams > __kmp_max_nth ) { // if too many teams requested?
+        if ( !__kmp_reserve_warn ) {
+            __kmp_reserve_warn = 1;
+            __kmp_msg(
+                kmp_ms_warning,
+                KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
+                KMP_HNT( Unset_ALL_THREADS ),
+                __kmp_msg_null
+            );
+        }
+        num_teams = __kmp_max_nth;
+    }
+    // Set number of teams (number of threads in the outer "parallel" of the teams)
+    thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
+
+    // Remember the number of threads for inner parallel regions
+    if( num_threads == 0 ) {
+        if( !TCR_4(__kmp_init_middle) )
+            __kmp_middle_initialize();  // get __kmp_avail_proc calculated
+        num_threads = __kmp_avail_proc / num_teams;
+        if( num_teams * num_threads > __kmp_max_nth ) {
+            // adjust num_threads w/o warning as it is not user setting
+            num_threads = __kmp_max_nth / num_teams;
+        }
+    } else {
+        if( num_teams * num_threads > __kmp_max_nth ) {
+            int new_threads = __kmp_max_nth / num_teams;
+            if ( !__kmp_reserve_warn ) { // user asked for too many threads
+                __kmp_reserve_warn = 1;  // that conflicts with OMP_THREAD_LIMIT
+                __kmp_msg(
+                    kmp_ms_warning,
+                    KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
+                    KMP_HNT( Unset_ALL_THREADS ),
+                    __kmp_msg_null
+                );
+            }
+            num_threads = new_threads;
+        }
+    }
+    thr->th.th_teams_size.nth = num_threads;
+}
+
+
+//
+// Set the proc_bind var to use in the following parallel region.
+//
+void
+__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
+{
+    kmp_info_t *thr = __kmp_threads[gtid];
+    thr->th.th_set_proc_bind = proc_bind;
+}
+
+#endif /* OMP_40_ENABLED */
+
+/* Launch the worker threads into the microtask. */
+
+void
+__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
+{
+    kmp_info_t *this_thr = __kmp_threads[gtid];
+
+#ifdef KMP_DEBUG
+    int f;
+#endif /* KMP_DEBUG */
+
+    KMP_DEBUG_ASSERT( team );
+    KMP_DEBUG_ASSERT( this_thr->th.th_team  ==  team );
+    KMP_ASSERT(       KMP_MASTER_GTID(gtid) );
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    team->t.t_construct = 0;          /* no single directives seen yet */
+    team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
+
+    /* Reset the identifiers on the dispatch buffer */
+    KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
+    if ( team->t.t_max_nproc > 1 ) {
+        int i;
+        for (i = 0; i <  KMP_MAX_DISP_BUF; ++i)
+            team->t.t_disp_buffer[ i ].buffer_index = i;
+    } else {
+        team->t.t_disp_buffer[ 0 ].buffer_index = 0;
+    }
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+    KMP_ASSERT( this_thr->th.th_team  ==  team );
+
+#ifdef KMP_DEBUG
+    for( f=0 ; f<team->t.t_nproc ; f++ ) {
+        KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
+                          team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
+    }
+#endif /* KMP_DEBUG */
+
+    /* release the worker threads so they may begin working */
+    __kmp_fork_barrier( gtid, 0 );
+}
+
+
+void
+__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
+{
+    kmp_info_t *this_thr = __kmp_threads[gtid];
+
+    KMP_DEBUG_ASSERT( team );
+    KMP_DEBUG_ASSERT( this_thr->th.th_team  ==  team );
+    KMP_ASSERT(       KMP_MASTER_GTID(gtid) );
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    /* Join barrier after fork */
+
+#ifdef KMP_DEBUG
+    if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
+        __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
+        __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
+                     gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
+        __kmp_print_structure();
+    }
+    KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
+                     __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
+#endif /* KMP_DEBUG */
+
+    __kmp_join_barrier( gtid );  /* wait for everyone */
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+    KMP_ASSERT( this_thr->th.th_team  ==  team );
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef USE_LOAD_BALANCE
+
+//
+// Return the worker threads actively spinning in the hot team, if we
+// are at the outermost level of parallelism.  Otherwise, return 0.
+//
+static int
+__kmp_active_hot_team_nproc( kmp_root_t *root )
+{
+    int i;
+    int retval;
+    kmp_team_t *hot_team;
+
+    if ( root->r.r_active ) {
+        return 0;
+    }
+    hot_team = root->r.r_hot_team;
+    if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
+        return hot_team->t.t_nproc - 1;  // Don't count master thread
+    }
+
+    //
+    // Skip the master thread - it is accounted for elsewhere.
+    //
+    retval = 0;
+    for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
+        if ( hot_team->t.t_threads[i]->th.th_active ) {
+            retval++;
+        }
+    }
+    return retval;
+}
+
+//
+// Perform an automatic adjustment to the number of
+// threads used by the next parallel region.
+//
+static int
+__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
+{
+    int retval;
+    int pool_active;
+    int hot_team_active;
+    int team_curr_active;
+    int system_active;
+
+    KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
+                root, set_nproc ) );
+    KMP_DEBUG_ASSERT( root );
+    KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
+    KMP_DEBUG_ASSERT( set_nproc > 1 );
+
+    if ( set_nproc == 1) {
+        KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
+        return 1;
+    }
+
+    //
+    // Threads that are active in the thread pool, active in the hot team
+    // for this particular root (if we are at the outer par level), and
+    // the currently executing thread (to become the master) are available
+    // to add to the new team, but are currently contributing to the system
+    // load, and must be accounted for.
+    //
+    pool_active = TCR_4(__kmp_thread_pool_active_nth);
+    hot_team_active = __kmp_active_hot_team_nproc( root );
+    team_curr_active = pool_active + hot_team_active + 1;
+
+    //
+    // Check the system load.
+    //
+    system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
+    KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
+      system_active, pool_active, hot_team_active ) );
+
+    if ( system_active < 0 ) {
+        //
+        // There was an error reading the necessary info from /proc,
+        // so use the thread limit algorithm instead.  Once we set
+        // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
+        // we shouldn't wind up getting back here.
+        //
+        __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
+        KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
+
+        //
+        // Make this call behave like the thread limit algorithm.
+        //
+        retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
+          : root->r.r_hot_team->t.t_nproc);
+        if ( retval > set_nproc ) {
+            retval = set_nproc;
+        }
+        if ( retval < KMP_MIN_NTH ) {
+            retval = KMP_MIN_NTH;
+        }
+
+        KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
+        return retval;
+    }
+
+    //
+    // There is a slight delay in the load balance algorithm in detecting
+    // new running procs.  The real system load at this instant should be
+    // at least as large as the #active omp thread that are available to
+    // add to the team.
+    //
+    if ( system_active < team_curr_active ) {
+        system_active = team_curr_active;
+    }
+    retval = __kmp_avail_proc - system_active + team_curr_active;
+    if ( retval > set_nproc ) {
+        retval = set_nproc;
+    }
+    if ( retval < KMP_MIN_NTH ) {
+        retval = KMP_MIN_NTH;
+    }
+
+    KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
+    return retval;
+} // __kmp_load_balance_nproc()
+
+#endif /* USE_LOAD_BALANCE */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* NOTE: this is called with the __kmp_init_lock held */
+void
+__kmp_cleanup( void )
+{
+    int f;
+
+    KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
+
+    if (TCR_4(__kmp_init_parallel)) {
+#if KMP_HANDLE_SIGNALS
+        __kmp_remove_signals();
+#endif
+        TCW_4(__kmp_init_parallel, FALSE);
+    }
+
+    if (TCR_4(__kmp_init_middle)) {
+#if KMP_AFFINITY_SUPPORTED
+        __kmp_affinity_uninitialize();
+#endif /* KMP_AFFINITY_SUPPORTED */
+        __kmp_cleanup_hierarchy();
+        TCW_4(__kmp_init_middle, FALSE);
+    }
+
+    KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
+
+    if (__kmp_init_serial) {
+        __kmp_runtime_destroy();
+        __kmp_init_serial = FALSE;
+    }
+
+    for ( f = 0; f < __kmp_threads_capacity; f++ ) {
+        if ( __kmp_root[ f ] != NULL ) {
+            __kmp_free( __kmp_root[ f ] );
+            __kmp_root[ f ] = NULL;
+        }
+    }
+    __kmp_free( __kmp_threads );
+    // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
+    // freeing __kmp_root.
+    __kmp_threads = NULL;
+    __kmp_root    = NULL;
+    __kmp_threads_capacity = 0;
+
+#if KMP_USE_DYNAMIC_LOCK
+    __kmp_cleanup_indirect_user_locks();
+#else
+    __kmp_cleanup_user_locks();
+#endif
+
+    #if KMP_AFFINITY_SUPPORTED
+        KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
+        __kmp_cpuinfo_file = NULL;
+    #endif /* KMP_AFFINITY_SUPPORTED */
+
+   #if KMP_USE_ADAPTIVE_LOCKS
+   #if KMP_DEBUG_ADAPTIVE_LOCKS
+       __kmp_print_speculative_stats();
+   #endif
+   #endif
+    KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
+    __kmp_nested_nth.nth = NULL;
+    __kmp_nested_nth.size = 0;
+    __kmp_nested_nth.used = 0;
+
+    __kmp_i18n_catclose();
+
+#if KMP_STATS_ENABLED
+    __kmp_accumulate_stats_at_exit();
+    __kmp_stats_list.deallocate();
+#endif
+
+    KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+int
+__kmp_ignore_mppbeg( void )
+{
+    char *env;
+
+    if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
+        if (__kmp_str_match_false( env ))
+            return FALSE;
+    }
+    // By default __kmpc_begin() is no-op.
+    return TRUE;
+}
+
+int
+__kmp_ignore_mppend( void )
+{
+    char *env;
+
+    if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
+        if (__kmp_str_match_false( env ))
+            return FALSE;
+    }
+    // By default __kmpc_end() is no-op.
+    return TRUE;
+}
+
+void
+__kmp_internal_begin( void )
+{
+    int gtid;
+    kmp_root_t *root;
+
+    /* this is a very important step as it will register new sibling threads
+     * and assign these new uber threads a new gtid */
+    gtid = __kmp_entry_gtid();
+    root = __kmp_threads[ gtid ]->th.th_root;
+    KMP_ASSERT( KMP_UBER_GTID( gtid ));
+
+    if( root->r.r_begin ) return;
+    __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
+    if( root->r.r_begin ) {
+        __kmp_release_lock( & root->r.r_begin_lock, gtid );
+        return;
+    }
+
+    root->r.r_begin = TRUE;
+
+    __kmp_release_lock( & root->r.r_begin_lock, gtid );
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_user_set_library (enum library_type arg)
+{
+    int gtid;
+    kmp_root_t *root;
+    kmp_info_t *thread;
+
+    /* first, make sure we are initialized so we can get our gtid */
+
+    gtid = __kmp_entry_gtid();
+    thread = __kmp_threads[ gtid ];
+
+    root = thread->th.th_root;
+
+    KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
+    if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
+        KMP_WARNING( SetLibraryIncorrectCall );
+        return;
+    }
+
+    switch ( arg ) {
+    case library_serial :
+        thread->th.th_set_nproc = 0;
+        set__nproc( thread, 1 );
+        break;
+    case library_turnaround :
+        thread->th.th_set_nproc = 0;
+        set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
+        break;
+    case library_throughput :
+        thread->th.th_set_nproc = 0;
+        set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
+        break;
+    default:
+        KMP_FATAL( UnknownLibraryType, arg );
+    }
+
+    __kmp_aux_set_library ( arg );
+}
+
+void
+__kmp_aux_set_stacksize( size_t arg )
+{
+    if (! __kmp_init_serial)
+        __kmp_serial_initialize();
+
+#if KMP_OS_DARWIN
+    if (arg & (0x1000 - 1)) {
+        arg &= ~(0x1000 - 1);
+        if(arg + 0x1000) /* check for overflow if we round up */
+            arg += 0x1000;
+    }
+#endif
+    __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
+
+    /* only change the default stacksize before the first parallel region */
+    if (! TCR_4(__kmp_init_parallel)) {
+        size_t value = arg;       /* argument is in bytes */
+
+        if (value < __kmp_sys_min_stksize )
+            value = __kmp_sys_min_stksize ;
+        else if (value > KMP_MAX_STKSIZE)
+            value = KMP_MAX_STKSIZE;
+
+        __kmp_stksize = value;
+
+        __kmp_env_stksize = TRUE;    /* was KMP_STACKSIZE specified? */
+    }
+
+    __kmp_release_bootstrap_lock( &__kmp_initz_lock );
+}
+
+/* set the behaviour of the runtime library */
+/* TODO this can cause some odd behaviour with sibling parallelism... */
+void
+__kmp_aux_set_library (enum library_type arg)
+{
+    __kmp_library = arg;
+
+    switch ( __kmp_library ) {
+    case library_serial :
+        {
+            KMP_INFORM( LibraryIsSerial );
+            (void) __kmp_change_library( TRUE );
+        }
+        break;
+    case library_turnaround :
+        (void) __kmp_change_library( TRUE );
+        break;
+    case library_throughput :
+        (void) __kmp_change_library( FALSE );
+        break;
+    default:
+        KMP_FATAL( UnknownLibraryType, arg );
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
+{
+    int blocktime = arg;        /* argument is in milliseconds */
+    int bt_intervals;
+    int bt_set;
+
+    __kmp_save_internal_controls( thread );
+
+    /* Normalize and set blocktime for the teams */
+    if (blocktime < KMP_MIN_BLOCKTIME)
+        blocktime = KMP_MIN_BLOCKTIME;
+    else if (blocktime > KMP_MAX_BLOCKTIME)
+        blocktime = KMP_MAX_BLOCKTIME;
+
+    set__blocktime_team( thread->th.th_team, tid, blocktime );
+    set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
+
+    /* Calculate and set blocktime intervals for the teams */
+    bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
+
+    set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
+    set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
+
+    /* Set whether blocktime has been set to "TRUE" */
+    bt_set = TRUE;
+
+    set__bt_set_team( thread->th.th_team, tid, bt_set );
+    set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
+    KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
+                  __kmp_gtid_from_tid(tid, thread->th.th_team),
+                  thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
+}
+
+void
+__kmp_aux_set_defaults(
+    char const * str,
+    int          len
+) {
+    if ( ! __kmp_init_serial ) {
+        __kmp_serial_initialize();
+    };
+    __kmp_env_initialize( str );
+
+    if (__kmp_settings
+#if OMP_40_ENABLED
+        || __kmp_display_env || __kmp_display_env_verbose
+#endif // OMP_40_ENABLED
+        ) {
+        __kmp_env_print();
+    }
+} // __kmp_aux_set_defaults
+
+/* ------------------------------------------------------------------------ */
+
+/*
+ * internal fast reduction routines
+ */
+
+PACKED_REDUCTION_METHOD_T
+__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
+        kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+        kmp_critical_name *lck )
+{
+
+    // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
+    // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
+    // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
+    // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
+
+    PACKED_REDUCTION_METHOD_T retval;
+
+    int team_size;
+
+    KMP_DEBUG_ASSERT( loc );    // it would be nice to test ( loc != 0 )
+    KMP_DEBUG_ASSERT( lck );    // it would be nice to test ( lck != 0 )
+
+    #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
+    #define FAST_REDUCTION_TREE_METHOD_GENERATED   ( ( reduce_data ) && ( reduce_func ) )
+
+    retval = critical_reduce_block;
+
+    team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
+
+    if( team_size == 1 ) {
+
+        retval = empty_reduce_block;
+
+    } else {
+
+        int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
+        int tree_available   = FAST_REDUCTION_TREE_METHOD_GENERATED;
+
+        #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
+
+            #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
+
+	    int teamsize_cutoff = 4;
+
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                if( __kmp_mic_type != non_mic ) {
+                    teamsize_cutoff = 8;
+                }
+#endif
+                if( tree_available ) {
+                    if( team_size <= teamsize_cutoff ) {
+                        if ( atomic_available ) {
+                            retval = atomic_reduce_block;
+                        }
+                    } else {
+                        retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
+                    }
+                } else if ( atomic_available ) {
+                    retval = atomic_reduce_block;
+                }
+            #else
+                #error "Unknown or unsupported OS"
+            #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
+
+        #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
+
+            #if KMP_OS_LINUX || KMP_OS_WINDOWS
+
+                // basic tuning
+
+                if( atomic_available ) {
+                    if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
+                        retval = atomic_reduce_block;
+                    }
+                } // otherwise: use critical section
+
+            #elif KMP_OS_DARWIN
+
+                if( atomic_available && ( num_vars <= 3 ) ) {
+                        retval = atomic_reduce_block;
+                } else if( tree_available ) {
+                    if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
+                        retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
+                    }
+                } // otherwise: use critical section
+
+            #else
+                #error "Unknown or unsupported OS"
+            #endif
+
+        #else
+            #error "Unknown or unsupported architecture"
+        #endif
+
+    }
+
+    // KMP_FORCE_REDUCTION
+
+    // If the team is serialized (team_size == 1), ignore the forced reduction
+    // method and stay with the unsynchronized method (empty_reduce_block)
+    if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
+
+        PACKED_REDUCTION_METHOD_T forced_retval;
+
+        int atomic_available, tree_available;
+
+        switch( ( forced_retval = __kmp_force_reduction_method ) )
+        {
+            case critical_reduce_block:
+                KMP_ASSERT( lck );              // lck should be != 0
+                break;
+
+            case atomic_reduce_block:
+                atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
+                KMP_ASSERT( atomic_available ); // atomic_available should be != 0
+                break;
+
+            case tree_reduce_block:
+                tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
+                KMP_ASSERT( tree_available );   // tree_available should be != 0
+                #if KMP_FAST_REDUCTION_BARRIER
+                forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
+                #endif
+                break;
+
+            default:
+                KMP_ASSERT( 0 ); // "unsupported method specified"
+        }
+
+        retval = forced_retval;
+    }
+
+    KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
+
+    #undef FAST_REDUCTION_TREE_METHOD_GENERATED
+    #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
+
+    return ( retval );
+}
+
+// this function is for testing set/get/determine reduce method
+kmp_int32
+__kmp_get_reduce_method( void ) {
+    return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
+}
+
+/* ------------------------------------------------------------------------ */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h b/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h
index 6ba814703b..c1df64c608 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_safe_c_api.h
@@ -1,62 +1,62 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_SAFE_C_API_H 
-#define KMP_SAFE_C_API_H 
- 
-// 
-// Replacement for banned C API 
-// 
- 
-// Not every unsafe call listed here is handled now, but keeping everything 
-// in one place should be handy for future maintenance. 
-#if KMP_OS_WINDOWS 
- 
-# define RSIZE_MAX_STR ( 4UL << 10 ) // 4KB 
- 
-// _malloca was suggested, but it is not a drop-in replacement for _alloca 
-# define KMP_ALLOCA                  _alloca 
- 
-# define KMP_MEMCPY_S                memcpy_s 
-# define KMP_SNPRINTF                sprintf_s 
-# define KMP_SSCANF                  sscanf_s 
-# define KMP_STRCPY_S                strcpy_s 
-# define KMP_STRNCPY_S               strncpy_s 
- 
-// Use this only when buffer size is unknown 
-# define KMP_MEMCPY(dst, src, cnt)   memcpy_s(dst, cnt, src, cnt) 
- 
-# define KMP_STRLEN(str)             strnlen_s(str, RSIZE_MAX_STR) 
- 
-// Use this only when buffer size is unknown 
-# define KMP_STRNCPY(dst, src, cnt)  strncpy_s(dst, cnt, src, cnt) 
- 
-// _TRUNCATE insures buffer size > max string to print. 
-# define KMP_VSNPRINTF(dst, cnt, fmt, arg)  vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg) 
- 
-#else // KMP_OS_WINDOWS 
- 
-// For now, these macros use the existing API. 
- 
-# define KMP_ALLOCA                         alloca 
-# define KMP_MEMCPY_S(dst, bsz, src, cnt)   memcpy(dst, src, cnt) 
-# define KMP_SNPRINTF                       snprintf 
-# define KMP_SSCANF                         sscanf 
-# define KMP_STRCPY_S(dst, bsz, src)        strcpy(dst, src)  
-# define KMP_STRNCPY_S(dst, bsz, src, cnt)  strncpy(dst, src, cnt) 
-# define KMP_VSNPRINTF                      vsnprintf 
-# define KMP_STRNCPY                        strncpy 
-# define KMP_STRLEN                         strlen 
-# define KMP_MEMCPY                         memcpy 
- 
-#endif // KMP_OS_WINDOWS 
- 
-#endif // KMP_SAFE_C_API_H 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_SAFE_C_API_H
+#define KMP_SAFE_C_API_H
+
+//
+// Replacement for banned C API
+//
+
+// Not every unsafe call listed here is handled now, but keeping everything
+// in one place should be handy for future maintenance.
+#if KMP_OS_WINDOWS
+
+# define RSIZE_MAX_STR ( 4UL << 10 ) // 4KB
+
+// _malloca was suggested, but it is not a drop-in replacement for _alloca
+# define KMP_ALLOCA                  _alloca
+
+# define KMP_MEMCPY_S                memcpy_s
+# define KMP_SNPRINTF                sprintf_s
+# define KMP_SSCANF                  sscanf_s
+# define KMP_STRCPY_S                strcpy_s
+# define KMP_STRNCPY_S               strncpy_s
+
+// Use this only when buffer size is unknown
+# define KMP_MEMCPY(dst, src, cnt)   memcpy_s(dst, cnt, src, cnt)
+
+# define KMP_STRLEN(str)             strnlen_s(str, RSIZE_MAX_STR)
+
+// Use this only when buffer size is unknown
+# define KMP_STRNCPY(dst, src, cnt)  strncpy_s(dst, cnt, src, cnt)
+
+// _TRUNCATE insures buffer size > max string to print.
+# define KMP_VSNPRINTF(dst, cnt, fmt, arg)  vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg)
+
+#else // KMP_OS_WINDOWS
+
+// For now, these macros use the existing API.
+
+# define KMP_ALLOCA                         alloca
+# define KMP_MEMCPY_S(dst, bsz, src, cnt)   memcpy(dst, src, cnt)
+# define KMP_SNPRINTF                       snprintf
+# define KMP_SSCANF                         sscanf
+# define KMP_STRCPY_S(dst, bsz, src)        strcpy(dst, src) 
+# define KMP_STRNCPY_S(dst, bsz, src, cnt)  strncpy(dst, src, cnt)
+# define KMP_VSNPRINTF                      vsnprintf
+# define KMP_STRNCPY                        strncpy
+# define KMP_STRLEN                         strlen
+# define KMP_MEMCPY                         memcpy
+
+#endif // KMP_OS_WINDOWS
+
+#endif // KMP_SAFE_C_API_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_sched.cpp b/contrib/libs/cxxsupp/openmp/kmp_sched.cpp
index 798ed0e206..80ad960a8d 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_sched.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_sched.cpp
@@ -1,940 +1,940 @@
-/* 
- * kmp_sched.c -- static scheduling -- iteration initialization 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-/* 
- * Static scheduling initialization. 
- * 
- * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 
- *       it may change values between parallel regions.  __kmp_max_nth 
- *       is the largest value __kmp_nth may take, 1 is the smallest. 
- * 
- */ 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_str.h" 
-#include "kmp_error.h" 
-#include "kmp_stats.h" 
-#include "kmp_itt.h" 
- 
-#if OMPT_SUPPORT 
-#include "ompt-specific.h" 
-#endif 
- 
-// template for type limits 
-template< typename T > 
-struct i_maxmin { 
-    static const T mx; 
-    static const T mn; 
-}; 
-template<> 
-struct i_maxmin< int > { 
-    static const int mx = 0x7fffffff; 
-    static const int mn = 0x80000000; 
-}; 
-template<> 
-struct i_maxmin< unsigned int > { 
-    static const unsigned int mx = 0xffffffff; 
-    static const unsigned int mn = 0x00000000; 
-}; 
-template<> 
-struct i_maxmin< long long > { 
-    static const long long mx = 0x7fffffffffffffffLL; 
-    static const long long mn = 0x8000000000000000LL; 
-}; 
-template<> 
-struct i_maxmin< unsigned long long > { 
-    static const unsigned long long mx = 0xffffffffffffffffLL; 
-    static const unsigned long long mn = 0x0000000000000000LL; 
-}; 
-//------------------------------------------------------------------------- 
-#ifdef KMP_DEBUG 
-//------------------------------------------------------------------------- 
-// template for debug prints specification ( d, u, lld, llu ) 
-    char const * traits_t< int >::spec = "d"; 
-    char const * traits_t< unsigned int >::spec = "u"; 
-    char const * traits_t< long long >::spec = "lld"; 
-    char const * traits_t< unsigned long long >::spec = "llu"; 
-//------------------------------------------------------------------------- 
-#endif 
- 
-template< typename T > 
-static void 
-__kmp_for_static_init( 
-    ident_t                          *loc, 
-    kmp_int32                         global_tid, 
-    kmp_int32                         schedtype, 
-    kmp_int32                        *plastiter, 
-    T                                *plower, 
-    T                                *pupper, 
-    typename traits_t< T >::signed_t *pstride, 
-    typename traits_t< T >::signed_t  incr, 
-    typename traits_t< T >::signed_t  chunk 
-) { 
-    KMP_COUNT_BLOCK(OMP_FOR_static); 
-    KMP_TIME_BLOCK (FOR_static_scheduling); 
- 
-    typedef typename traits_t< T >::unsigned_t  UT; 
-    typedef typename traits_t< T >::signed_t    ST; 
-    /*  this all has to be changed back to TID and such.. */ 
+/*
+ * kmp_sched.c -- static scheduling -- iteration initialization
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+ * Static scheduling initialization.
+ *
+ * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
+ *       it may change values between parallel regions.  __kmp_max_nth
+ *       is the largest value __kmp_nth may take, 1 is the smallest.
+ *
+ */
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_str.h"
+#include "kmp_error.h"
+#include "kmp_stats.h"
+#include "kmp_itt.h"
+
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
+
+// template for type limits
+template< typename T >
+struct i_maxmin {
+    static const T mx;
+    static const T mn;
+};
+template<>
+struct i_maxmin< int > {
+    static const int mx = 0x7fffffff;
+    static const int mn = 0x80000000;
+};
+template<>
+struct i_maxmin< unsigned int > {
+    static const unsigned int mx = 0xffffffff;
+    static const unsigned int mn = 0x00000000;
+};
+template<>
+struct i_maxmin< long long > {
+    static const long long mx = 0x7fffffffffffffffLL;
+    static const long long mn = 0x8000000000000000LL;
+};
+template<>
+struct i_maxmin< unsigned long long > {
+    static const unsigned long long mx = 0xffffffffffffffffLL;
+    static const unsigned long long mn = 0x0000000000000000LL;
+};
+//-------------------------------------------------------------------------
+#ifdef KMP_DEBUG
+//-------------------------------------------------------------------------
+// template for debug prints specification ( d, u, lld, llu )
+    char const * traits_t< int >::spec = "d";
+    char const * traits_t< unsigned int >::spec = "u";
+    char const * traits_t< long long >::spec = "lld";
+    char const * traits_t< unsigned long long >::spec = "llu";
+//-------------------------------------------------------------------------
+#endif
+
+template< typename T >
+static void
+__kmp_for_static_init(
+    ident_t                          *loc,
+    kmp_int32                         global_tid,
+    kmp_int32                         schedtype,
+    kmp_int32                        *plastiter,
+    T                                *plower,
+    T                                *pupper,
+    typename traits_t< T >::signed_t *pstride,
+    typename traits_t< T >::signed_t  incr,
+    typename traits_t< T >::signed_t  chunk
+) {
+    KMP_COUNT_BLOCK(OMP_FOR_static);
+    KMP_TIME_BLOCK (FOR_static_scheduling);
+
+    typedef typename traits_t< T >::unsigned_t  UT;
+    typedef typename traits_t< T >::signed_t    ST;
+    /*  this all has to be changed back to TID and such.. */
     kmp_int32   gtid = global_tid;
     kmp_uint32  tid;
     kmp_uint32  nth;
     UT          trip_count;
     kmp_team_t *team;
     kmp_info_t *th = __kmp_threads[ gtid ];
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    ompt_team_info_t *team_info = NULL;  
-    ompt_task_info_t *task_info = NULL;  
- 
-    if (ompt_enabled) { 
-        // Only fully initialize variables needed by OMPT if OMPT is enabled. 
-        team_info = __ompt_get_teaminfo(0, NULL); 
-        task_info = __ompt_get_taskinfo(0); 
-    } 
-#endif 
- 
-    KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); 
-    KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ 
-            " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 
-            traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 
-        KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, 
-            *plower, *pupper, *pstride, incr, chunk ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
- 
-    if ( __kmp_env_consistency_check ) { 
-        __kmp_push_workshare( global_tid, ct_pdo, loc ); 
-        if ( incr == 0 ) { 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 
-        } 
-    } 
-    /* special handling for zero-trip loops */ 
-    if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 
-        if( plastiter != NULL ) 
-            *plastiter = FALSE; 
-        /* leave pupper and plower set to entire iteration space */ 
-        *pstride = incr;   /* value should never be used */ 
-	//        *plower = *pupper - incr;   // let compiler bypass the illegal loop (like for(i=1;i<10;i--))  THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ 
-	  upper=0,stride=1) - JPH June 23, 2009. 
-        #ifdef KMP_DEBUG 
-        { 
-            const char * buff; 
-            // create format specifiers before the debug output 
-            buff = __kmp_str_format( 
-                "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", 
-                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 
-            KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); 
-            __kmp_str_free( &buff ); 
-        } 
-        #endif 
-        KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 
-                team_info->parallel_id, task_info->task_id, 
-                team_info->microtask); 
-        } 
-#endif 
-        KMP_COUNT_VALUE (FOR_static_iterations, 0); 
-        return; 
-    } 
- 
-    #if OMP_40_ENABLED 
-    if ( schedtype > kmp_ord_upper ) { 
-        // we are in DISTRIBUTE construct 
-        schedtype += kmp_sch_static - kmp_distribute_static;      // AC: convert to usual schedule type 
-        tid  = th->th.th_team->t.t_master_tid; 
-        team = th->th.th_team->t.t_parent; 
-    } else 
-    #endif 
-    { 
-        tid  = __kmp_tid_from_gtid( global_tid ); 
-        team = th->th.th_team; 
-    } 
- 
-    /* determine if "for" loop is an active worksharing construct */ 
-    if ( team -> t.t_serialized ) { 
-        /* serialized parallel, each thread executes whole iteration space */ 
-        if( plastiter != NULL ) 
-            *plastiter = TRUE; 
-        /* leave pupper and plower set to entire iteration space */ 
-        *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 
- 
-        #ifdef KMP_DEBUG 
-        { 
-            const char * buff; 
-            // create format specifiers before the debug output 
-            buff = __kmp_str_format( 
-                "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 
-                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 
-            KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 
-            __kmp_str_free( &buff ); 
-        } 
-        #endif 
-        KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 
-                team_info->parallel_id, task_info->task_id, 
-                team_info->microtask); 
-        } 
-#endif 
-        return; 
-    } 
-    nth = team->t.t_nproc; 
-    if ( nth == 1 ) { 
-        if( plastiter != NULL ) 
-            *plastiter = TRUE; 
-        *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 
-        #ifdef KMP_DEBUG 
-        { 
-            const char * buff; 
-            // create format specifiers before the debug output 
-            buff = __kmp_str_format( 
-                "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", 
-                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); 
-            KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 
-            __kmp_str_free( &buff ); 
-        } 
-        #endif 
-        KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 
-                team_info->parallel_id, task_info->task_id, 
-                team_info->microtask); 
-        } 
-#endif 
-        return; 
-    } 
- 
-    /* compute trip count */ 
-    if ( incr == 1 ) { 
-        trip_count = *pupper - *plower + 1; 
-    } else if (incr == -1) { 
-        trip_count = *plower - *pupper + 1; 
-    } else { 
-        if ( incr > 1 ) {  // the check is needed for unsigned division when incr < 0 
-            trip_count = (*pupper - *plower) / incr + 1; 
-        } else { 
-            trip_count = (*plower - *pupper) / ( -incr ) + 1; 
-        } 
-    } 
- 
-    if ( __kmp_env_consistency_check ) { 
-        /* tripcount overflow? */ 
-        if ( trip_count == 0 && *pupper != *plower ) { 
-            __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); 
-        } 
-    } 
-    KMP_COUNT_VALUE (FOR_static_iterations, trip_count); 
- 
-    /* compute remaining parameters */ 
-    switch ( schedtype ) { 
-    case kmp_sch_static: 
-        { 
-            if ( trip_count < nth ) { 
-                KMP_DEBUG_ASSERT( 
-                    __kmp_static == kmp_sch_static_greedy || \ 
-                    __kmp_static == kmp_sch_static_balanced 
-                ); // Unknown static scheduling type. 
-                if ( tid < trip_count ) { 
-                    *pupper = *plower = *plower + tid * incr; 
-                } else { 
-                    *plower = *pupper + incr; 
-                } 
-                if( plastiter != NULL ) 
-                    *plastiter = ( tid == trip_count - 1 ); 
-            } else { 
-                if ( __kmp_static == kmp_sch_static_balanced ) { 
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    ompt_team_info_t *team_info = NULL; 
+    ompt_task_info_t *task_info = NULL; 
+
+    if (ompt_enabled) {
+        // Only fully initialize variables needed by OMPT if OMPT is enabled.
+        team_info = __ompt_get_teaminfo(0, NULL);
+        task_info = __ompt_get_taskinfo(0);
+    }
+#endif
+
+    KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
+    KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
+            " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
+            traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
+        KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
+            *plower, *pupper, *pstride, incr, chunk ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+
+    if ( __kmp_env_consistency_check ) {
+        __kmp_push_workshare( global_tid, ct_pdo, loc );
+        if ( incr == 0 ) {
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
+        }
+    }
+    /* special handling for zero-trip loops */
+    if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
+        if( plastiter != NULL )
+            *plastiter = FALSE;
+        /* leave pupper and plower set to entire iteration space */
+        *pstride = incr;   /* value should never be used */
+	//        *plower = *pupper - incr;   // let compiler bypass the illegal loop (like for(i=1;i<10;i--))  THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
+	  upper=0,stride=1) - JPH June 23, 2009.
+        #ifdef KMP_DEBUG
+        {
+            const char * buff;
+            // create format specifiers before the debug output
+            buff = __kmp_str_format(
+                "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
+                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
+            KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
+            __kmp_str_free( &buff );
+        }
+        #endif
+        KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
+                team_info->parallel_id, task_info->task_id,
+                team_info->microtask);
+        }
+#endif
+        KMP_COUNT_VALUE (FOR_static_iterations, 0);
+        return;
+    }
+
+    #if OMP_40_ENABLED
+    if ( schedtype > kmp_ord_upper ) {
+        // we are in DISTRIBUTE construct
+        schedtype += kmp_sch_static - kmp_distribute_static;      // AC: convert to usual schedule type
+        tid  = th->th.th_team->t.t_master_tid;
+        team = th->th.th_team->t.t_parent;
+    } else
+    #endif
+    {
+        tid  = __kmp_tid_from_gtid( global_tid );
+        team = th->th.th_team;
+    }
+
+    /* determine if "for" loop is an active worksharing construct */
+    if ( team -> t.t_serialized ) {
+        /* serialized parallel, each thread executes whole iteration space */
+        if( plastiter != NULL )
+            *plastiter = TRUE;
+        /* leave pupper and plower set to entire iteration space */
+        *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
+
+        #ifdef KMP_DEBUG
+        {
+            const char * buff;
+            // create format specifiers before the debug output
+            buff = __kmp_str_format(
+                "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
+                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
+            KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
+            __kmp_str_free( &buff );
+        }
+        #endif
+        KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
+                team_info->parallel_id, task_info->task_id,
+                team_info->microtask);
+        }
+#endif
+        return;
+    }
+    nth = team->t.t_nproc;
+    if ( nth == 1 ) {
+        if( plastiter != NULL )
+            *plastiter = TRUE;
+        *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
+        #ifdef KMP_DEBUG
+        {
+            const char * buff;
+            // create format specifiers before the debug output
+            buff = __kmp_str_format(
+                "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
+                traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
+            KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
+            __kmp_str_free( &buff );
+        }
+        #endif
+        KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
+                team_info->parallel_id, task_info->task_id,
+                team_info->microtask);
+        }
+#endif
+        return;
+    }
+
+    /* compute trip count */
+    if ( incr == 1 ) {
+        trip_count = *pupper - *plower + 1;
+    } else if (incr == -1) {
+        trip_count = *plower - *pupper + 1;
+    } else {
+        if ( incr > 1 ) {  // the check is needed for unsigned division when incr < 0
+            trip_count = (*pupper - *plower) / incr + 1;
+        } else {
+            trip_count = (*plower - *pupper) / ( -incr ) + 1;
+        }
+    }
+
+    if ( __kmp_env_consistency_check ) {
+        /* tripcount overflow? */
+        if ( trip_count == 0 && *pupper != *plower ) {
+            __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
+        }
+    }
+    KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
+
+    /* compute remaining parameters */
+    switch ( schedtype ) {
+    case kmp_sch_static:
+        {
+            if ( trip_count < nth ) {
+                KMP_DEBUG_ASSERT(
+                    __kmp_static == kmp_sch_static_greedy || \
+                    __kmp_static == kmp_sch_static_balanced
+                ); // Unknown static scheduling type.
+                if ( tid < trip_count ) {
+                    *pupper = *plower = *plower + tid * incr;
+                } else {
+                    *plower = *pupper + incr;
+                }
+                if( plastiter != NULL )
+                    *plastiter = ( tid == trip_count - 1 );
+            } else {
+                if ( __kmp_static == kmp_sch_static_balanced ) {
                     UT small_chunk = trip_count / nth;
                     UT extras = trip_count % nth;
-                    *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); 
-                    *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); 
-                    if( plastiter != NULL ) 
-                        *plastiter = ( tid == nth - 1 ); 
-                } else { 
+                    *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
+                    *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
+                    if( plastiter != NULL )
+                        *plastiter = ( tid == nth - 1 );
+                } else {
                     T big_chunk_inc_count = ( trip_count/nth +
-                                                     ( ( trip_count % nth ) ? 1 : 0) ) * incr; 
+                                                     ( ( trip_count % nth ) ? 1 : 0) ) * incr;
                     T old_upper = *pupper;
- 
-                    KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 
-                        // Unknown static scheduling type. 
- 
-                    *plower += tid * big_chunk_inc_count; 
-                    *pupper = *plower + big_chunk_inc_count - incr; 
-                    if ( incr > 0 ) { 
-                        if( *pupper < *plower ) 
-                            *pupper = i_maxmin< T >::mx; 
-                        if( plastiter != NULL ) 
-                            *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 
-                        if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 
-                    } else { 
-                        if( *pupper > *plower ) 
-                            *pupper = i_maxmin< T >::mn; 
-                        if( plastiter != NULL ) 
-                            *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 
-                        if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 
-                    } 
-                } 
-            } 
-            break; 
-        } 
-    case kmp_sch_static_chunked: 
-        { 
+
+                    KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
+                        // Unknown static scheduling type.
+
+                    *plower += tid * big_chunk_inc_count;
+                    *pupper = *plower + big_chunk_inc_count - incr;
+                    if ( incr > 0 ) {
+                        if( *pupper < *plower )
+                            *pupper = i_maxmin< T >::mx;
+                        if( plastiter != NULL )
+                            *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
+                        if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
+                    } else {
+                        if( *pupper > *plower )
+                            *pupper = i_maxmin< T >::mn;
+                        if( plastiter != NULL )
+                            *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
+                        if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
+                    }
+                }
+            }
+            break;
+        }
+    case kmp_sch_static_chunked:
+        {
             ST span;
-            if ( chunk < 1 ) { 
-                chunk = 1; 
-            } 
-            span = chunk * incr; 
-            *pstride = span * nth; 
-            *plower = *plower + (span * tid); 
-            *pupper = *plower + span - incr; 
-            if( plastiter != NULL ) 
-                *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); 
-            break; 
-        } 
-    default: 
-        KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); 
-        break; 
-    } 
- 
-#if USE_ITT_BUILD 
-    // Report loop metadata 
-    if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 
-#if OMP_40_ENABLED 
-        th->th.th_teams_microtask == NULL && 
-#endif 
-        team->t.t_active_level == 1 ) 
-    { 
-        kmp_uint64 cur_chunk = chunk; 
-        // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked 
-        if ( schedtype == kmp_sch_static ) { 
-            cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); 
-        } 
-        // 0 - "static" schedule 
-        __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 
-    } 
-#endif 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); 
-        KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
-    KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { 
-        ompt_callbacks.ompt_callback(ompt_event_loop_begin)( 
-            team_info->parallel_id, task_info->task_id, team_info->microtask); 
-    } 
-#endif 
- 
-    return; 
-} 
- 
-template< typename T > 
-static void 
-__kmp_dist_for_static_init( 
-    ident_t                          *loc, 
-    kmp_int32                         gtid, 
-    kmp_int32                         schedule, 
-    kmp_int32                        *plastiter, 
-    T                                *plower, 
-    T                                *pupper, 
-    T                                *pupperDist, 
-    typename traits_t< T >::signed_t *pstride, 
-    typename traits_t< T >::signed_t  incr, 
-    typename traits_t< T >::signed_t  chunk 
-) { 
-    KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 
-    typedef typename traits_t< T >::unsigned_t  UT; 
-    typedef typename traits_t< T >::signed_t    ST; 
+            if ( chunk < 1 ) {
+                chunk = 1;
+            }
+            span = chunk * incr;
+            *pstride = span * nth;
+            *plower = *plower + (span * tid);
+            *pupper = *plower + span - incr;
+            if( plastiter != NULL )
+                *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
+            break;
+        }
+    default:
+        KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
+        break;
+    }
+
+#if USE_ITT_BUILD
+    // Report loop metadata
+    if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
+#if OMP_40_ENABLED
+        th->th.th_teams_microtask == NULL &&
+#endif
+        team->t.t_active_level == 1 )
+    {
+        kmp_uint64 cur_chunk = chunk;
+        // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
+        if ( schedtype == kmp_sch_static ) {
+            cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
+        }
+        // 0 - "static" schedule
+        __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
+    }
+#endif
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
+        KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+    KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
+
+#if OMPT_SUPPORT && OMPT_TRACE
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
+        ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
+            team_info->parallel_id, task_info->task_id, team_info->microtask);
+    }
+#endif
+
+    return;
+}
+
+template< typename T >
+static void
+__kmp_dist_for_static_init(
+    ident_t                          *loc,
+    kmp_int32                         gtid,
+    kmp_int32                         schedule,
+    kmp_int32                        *plastiter,
+    T                                *plower,
+    T                                *pupper,
+    T                                *pupperDist,
+    typename traits_t< T >::signed_t *pstride,
+    typename traits_t< T >::signed_t  incr,
+    typename traits_t< T >::signed_t  chunk
+) {
+    KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
+    typedef typename traits_t< T >::unsigned_t  UT;
+    typedef typename traits_t< T >::signed_t    ST;
     kmp_uint32  tid;
     kmp_uint32  nth;
     kmp_uint32  team_id;
     kmp_uint32  nteams;
     UT          trip_count;
     kmp_team_t *team;
-    kmp_info_t * th; 
- 
-    KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); 
-    KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ 
-            "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 
-            traits_t< ST >::spec, traits_t< T >::spec ); 
-        KD_TRACE(100, ( buff, gtid, schedule, *plastiter, 
-                       *plower, *pupper, incr, chunk ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
- 
-    if( __kmp_env_consistency_check ) { 
-        __kmp_push_workshare( gtid, ct_pdo, loc ); 
-        if( incr == 0 ) { 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 
-        } 
-        if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { 
-            // The loop is illegal. 
-            // Some zero-trip loops maintained by compiler, e.g.: 
-            //   for(i=10;i<0;++i) // lower >= upper - run-time check 
-            //   for(i=0;i>10;--i) // lower <= upper - run-time check 
-            //   for(i=0;i>10;++i) // incr > 0       - compile-time check 
-            //   for(i=10;i<0;--i) // incr < 0       - compile-time check 
-            // Compiler does not check the following illegal loops: 
-            //   for(i=0;i<10;i+=incr) // where incr<0 
-            //   for(i=10;i>0;i-=incr) // where incr<0 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 
-        } 
-    } 
-    tid = __kmp_tid_from_gtid( gtid ); 
-    th = __kmp_threads[gtid]; 
-    nth = th->th.th_team_nproc; 
-    team = th->th.th_team; 
-    #if OMP_40_ENABLED 
-    KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct 
-    nteams = th->th.th_teams_size.nteams; 
-    #endif 
-    team_id = team->t.t_master_tid; 
-    KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 
- 
-    // compute global trip count 
-    if( incr == 1 ) { 
-        trip_count = *pupper - *plower + 1; 
-    } else if(incr == -1) { 
-        trip_count = *plower - *pupper + 1; 
-    } else { 
-        trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case 
-    } 
- 
-    *pstride = *pupper - *plower;  // just in case (can be unused) 
-    if( trip_count <= nteams ) { 
-        KMP_DEBUG_ASSERT( 
-            __kmp_static == kmp_sch_static_greedy || \ 
-            __kmp_static == kmp_sch_static_balanced 
-        ); // Unknown static scheduling type. 
-        // only masters of some teams get single iteration, other threads get nothing 
-        if( team_id < trip_count && tid == 0 ) { 
-            *pupper = *pupperDist = *plower = *plower + team_id * incr; 
-        } else { 
-            *pupperDist = *pupper; 
-            *plower = *pupper + incr; // compiler should skip loop body 
-        } 
-        if( plastiter != NULL ) 
-            *plastiter = ( tid == 0 && team_id == trip_count - 1 ); 
-    } else { 
-        // Get the team's chunk first (each team gets at most one chunk) 
-        if( __kmp_static == kmp_sch_static_balanced ) { 
+    kmp_info_t * th;
+
+    KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
+    KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
+            "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
+            traits_t< ST >::spec, traits_t< T >::spec );
+        KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
+                       *plower, *pupper, incr, chunk ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+
+    if( __kmp_env_consistency_check ) {
+        __kmp_push_workshare( gtid, ct_pdo, loc );
+        if( incr == 0 ) {
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
+        }
+        if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
+            // The loop is illegal.
+            // Some zero-trip loops maintained by compiler, e.g.:
+            //   for(i=10;i<0;++i) // lower >= upper - run-time check
+            //   for(i=0;i>10;--i) // lower <= upper - run-time check
+            //   for(i=0;i>10;++i) // incr > 0       - compile-time check
+            //   for(i=10;i<0;--i) // incr < 0       - compile-time check
+            // Compiler does not check the following illegal loops:
+            //   for(i=0;i<10;i+=incr) // where incr<0
+            //   for(i=10;i>0;i-=incr) // where incr<0
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
+        }
+    }
+    tid = __kmp_tid_from_gtid( gtid );
+    th = __kmp_threads[gtid];
+    nth = th->th.th_team_nproc;
+    team = th->th.th_team;
+    #if OMP_40_ENABLED
+    KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
+    nteams = th->th.th_teams_size.nteams;
+    #endif
+    team_id = team->t.t_master_tid;
+    KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
+
+    // compute global trip count
+    if( incr == 1 ) {
+        trip_count = *pupper - *plower + 1;
+    } else if(incr == -1) {
+        trip_count = *plower - *pupper + 1;
+    } else {
+        trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
+    }
+
+    *pstride = *pupper - *plower;  // just in case (can be unused)
+    if( trip_count <= nteams ) {
+        KMP_DEBUG_ASSERT(
+            __kmp_static == kmp_sch_static_greedy || \
+            __kmp_static == kmp_sch_static_balanced
+        ); // Unknown static scheduling type.
+        // only masters of some teams get single iteration, other threads get nothing
+        if( team_id < trip_count && tid == 0 ) {
+            *pupper = *pupperDist = *plower = *plower + team_id * incr;
+        } else {
+            *pupperDist = *pupper;
+            *plower = *pupper + incr; // compiler should skip loop body
+        }
+        if( plastiter != NULL )
+            *plastiter = ( tid == 0 && team_id == trip_count - 1 );
+    } else {
+        // Get the team's chunk first (each team gets at most one chunk)
+        if( __kmp_static == kmp_sch_static_balanced ) {
             UT chunkD = trip_count / nteams;
             UT extras = trip_count % nteams;
-            *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); 
-            *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); 
-            if( plastiter != NULL ) 
-                *plastiter = ( team_id == nteams - 1 ); 
-        } else { 
+            *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
+            *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
+            if( plastiter != NULL )
+                *plastiter = ( team_id == nteams - 1 );
+        } else {
             T chunk_inc_count =
-                ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; 
+                ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
             T upper = *pupper;
-            KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 
-                // Unknown static scheduling type. 
-            *plower += team_id * chunk_inc_count; 
-            *pupperDist = *plower + chunk_inc_count - incr; 
-            // Check/correct bounds if needed 
-            if( incr > 0 ) { 
-                if( *pupperDist < *plower ) 
-                    *pupperDist = i_maxmin< T >::mx; 
-                if( plastiter != NULL ) 
-                    *plastiter = *plower <= upper && *pupperDist > upper - incr; 
-                if( *pupperDist > upper ) 
-                    *pupperDist = upper; // tracker C73258 
-                if( *plower > *pupperDist ) { 
-                    *pupper = *pupperDist;  // no iterations available for the team 
-                    goto end; 
-                } 
-            } else { 
-                if( *pupperDist > *plower ) 
-                    *pupperDist = i_maxmin< T >::mn; 
-                if( plastiter != NULL ) 
-                    *plastiter = *plower >= upper && *pupperDist < upper - incr; 
-                if( *pupperDist < upper ) 
-                    *pupperDist = upper; // tracker C73258 
-                if( *plower < *pupperDist ) { 
-                    *pupper = *pupperDist;  // no iterations available for the team 
-                    goto end; 
-                } 
-            } 
-        } 
-        // Get the parallel loop chunk now (for thread) 
-        // compute trip count for team's chunk 
-        if( incr == 1 ) { 
-            trip_count = *pupperDist - *plower + 1; 
-        } else if(incr == -1) { 
-            trip_count = *plower - *pupperDist + 1; 
-        } else { 
-            trip_count = (ST)(*pupperDist - *plower) / incr + 1; 
-        } 
-        KMP_DEBUG_ASSERT( trip_count ); 
-        switch( schedule ) { 
-        case kmp_sch_static: 
-        { 
-            if( trip_count <= nth ) { 
-                KMP_DEBUG_ASSERT( 
-                    __kmp_static == kmp_sch_static_greedy || \ 
-                    __kmp_static == kmp_sch_static_balanced 
-                ); // Unknown static scheduling type. 
-                if( tid < trip_count ) 
-                    *pupper = *plower = *plower + tid * incr; 
-                else 
-                    *plower = *pupper + incr; // no iterations available 
-                if( plastiter != NULL ) 
-                    if( *plastiter != 0 && !( tid == trip_count - 1 ) ) 
-                        *plastiter = 0; 
-            } else { 
-                if( __kmp_static == kmp_sch_static_balanced ) { 
+            KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
+                // Unknown static scheduling type.
+            *plower += team_id * chunk_inc_count;
+            *pupperDist = *plower + chunk_inc_count - incr;
+            // Check/correct bounds if needed
+            if( incr > 0 ) {
+                if( *pupperDist < *plower )
+                    *pupperDist = i_maxmin< T >::mx;
+                if( plastiter != NULL )
+                    *plastiter = *plower <= upper && *pupperDist > upper - incr;
+                if( *pupperDist > upper )
+                    *pupperDist = upper; // tracker C73258
+                if( *plower > *pupperDist ) {
+                    *pupper = *pupperDist;  // no iterations available for the team
+                    goto end;
+                }
+            } else {
+                if( *pupperDist > *plower )
+                    *pupperDist = i_maxmin< T >::mn;
+                if( plastiter != NULL )
+                    *plastiter = *plower >= upper && *pupperDist < upper - incr;
+                if( *pupperDist < upper )
+                    *pupperDist = upper; // tracker C73258
+                if( *plower < *pupperDist ) {
+                    *pupper = *pupperDist;  // no iterations available for the team
+                    goto end;
+                }
+            }
+        }
+        // Get the parallel loop chunk now (for thread)
+        // compute trip count for team's chunk
+        if( incr == 1 ) {
+            trip_count = *pupperDist - *plower + 1;
+        } else if(incr == -1) {
+            trip_count = *plower - *pupperDist + 1;
+        } else {
+            trip_count = (ST)(*pupperDist - *plower) / incr + 1;
+        }
+        KMP_DEBUG_ASSERT( trip_count );
+        switch( schedule ) {
+        case kmp_sch_static:
+        {
+            if( trip_count <= nth ) {
+                KMP_DEBUG_ASSERT(
+                    __kmp_static == kmp_sch_static_greedy || \
+                    __kmp_static == kmp_sch_static_balanced
+                ); // Unknown static scheduling type.
+                if( tid < trip_count )
+                    *pupper = *plower = *plower + tid * incr;
+                else
+                    *plower = *pupper + incr; // no iterations available
+                if( plastiter != NULL )
+                    if( *plastiter != 0 && !( tid == trip_count - 1 ) )
+                        *plastiter = 0;
+            } else {
+                if( __kmp_static == kmp_sch_static_balanced ) {
                     UT chunkL = trip_count / nth;
                     UT extras = trip_count % nth;
-                    *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 
-                    *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 
-                    if( plastiter != NULL ) 
-                        if( *plastiter != 0 && !( tid == nth - 1 ) ) 
-                            *plastiter = 0; 
-                } else { 
+                    *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
+                    *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
+                    if( plastiter != NULL )
+                        if( *plastiter != 0 && !( tid == nth - 1 ) )
+                            *plastiter = 0;
+                } else {
                     T chunk_inc_count =
-                        ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; 
+                        ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
                     T upper = *pupperDist;
-                    KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); 
-                        // Unknown static scheduling type. 
-                    *plower += tid * chunk_inc_count; 
-                    *pupper = *plower + chunk_inc_count - incr; 
-                    if( incr > 0 ) { 
-                        if( *pupper < *plower ) 
-                            *pupper = i_maxmin< T >::mx; 
-                        if( plastiter != NULL ) 
-                            if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) 
-                                *plastiter = 0; 
-                        if( *pupper > upper ) 
-                            *pupper = upper;//tracker C73258 
-                    } else { 
-                        if( *pupper > *plower ) 
-                            *pupper = i_maxmin< T >::mn; 
-                        if( plastiter != NULL ) 
-                            if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) 
-                                *plastiter = 0; 
-                        if( *pupper < upper ) 
-                            *pupper = upper;//tracker C73258 
-                    } 
-                } 
-            } 
-            break; 
-        } 
-        case kmp_sch_static_chunked: 
-        { 
+                    KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
+                        // Unknown static scheduling type.
+                    *plower += tid * chunk_inc_count;
+                    *pupper = *plower + chunk_inc_count - incr;
+                    if( incr > 0 ) {
+                        if( *pupper < *plower )
+                            *pupper = i_maxmin< T >::mx;
+                        if( plastiter != NULL )
+                            if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
+                                *plastiter = 0;
+                        if( *pupper > upper )
+                            *pupper = upper;//tracker C73258
+                    } else {
+                        if( *pupper > *plower )
+                            *pupper = i_maxmin< T >::mn;
+                        if( plastiter != NULL )
+                            if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
+                                *plastiter = 0;
+                        if( *pupper < upper )
+                            *pupper = upper;//tracker C73258
+                    }
+                }
+            }
+            break;
+        }
+        case kmp_sch_static_chunked:
+        {
             ST span;
-            if( chunk < 1 ) 
-                chunk = 1; 
-            span = chunk * incr; 
-            *pstride = span * nth; 
-            *plower = *plower + (span * tid); 
-            *pupper = *plower + span - incr; 
-            if( plastiter != NULL ) 
-                if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) 
-                    *plastiter = 0; 
-            break; 
-        } 
-        default: 
-            KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); 
-            break; 
-        } 
-    } 
-    end:; 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( 
-            "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ 
-            "stride=%%%s signed?<%s>\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, 
-            traits_t< ST >::spec, traits_t< T >::spec ); 
-        KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
-    KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); 
-    return; 
-} 
- 
-template< typename T > 
-static void 
-__kmp_team_static_init( 
-    ident_t                          *loc, 
-    kmp_int32                         gtid, 
-    kmp_int32                        *p_last, 
-    T                                *p_lb, 
-    T                                *p_ub, 
-    typename traits_t< T >::signed_t *p_st, 
-    typename traits_t< T >::signed_t  incr, 
-    typename traits_t< T >::signed_t  chunk 
-) { 
-    // The routine returns the first chunk distributed to the team and 
-    // stride for next chunks calculation. 
-    // Last iteration flag set for the team that will execute 
-    // the last iteration of the loop. 
-    // The routine is called for dist_schedue(static,chunk) only. 
-    typedef typename traits_t< T >::unsigned_t  UT; 
-    typedef typename traits_t< T >::signed_t    ST; 
-    kmp_uint32  team_id; 
-    kmp_uint32  nteams; 
-    UT          trip_count; 
-    T           lower; 
-    T           upper; 
-    ST          span; 
-    kmp_team_t *team; 
-    kmp_info_t *th; 
- 
-    KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); 
-    KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ 
-            "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 
-            traits_t< ST >::spec, traits_t< T >::spec ); 
-        KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
- 
-    lower = *p_lb; 
-    upper = *p_ub; 
-    if( __kmp_env_consistency_check ) { 
-        if( incr == 0 ) { 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); 
-        } 
-        if( incr > 0 ? (upper < lower) : (lower < upper) ) { 
-            // The loop is illegal. 
-            // Some zero-trip loops maintained by compiler, e.g.: 
-            //   for(i=10;i<0;++i) // lower >= upper - run-time check 
-            //   for(i=0;i>10;--i) // lower <= upper - run-time check 
-            //   for(i=0;i>10;++i) // incr > 0       - compile-time check 
-            //   for(i=10;i<0;--i) // incr < 0       - compile-time check 
-            // Compiler does not check the following illegal loops: 
-            //   for(i=0;i<10;i+=incr) // where incr<0 
-            //   for(i=10;i>0;i-=incr) // where incr<0 
-            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); 
-        } 
-    } 
-    th = __kmp_threads[gtid]; 
-    team = th->th.th_team; 
-    #if OMP_40_ENABLED 
-    KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct 
-    nteams = th->th.th_teams_size.nteams; 
-    #endif 
-    team_id = team->t.t_master_tid; 
-    KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); 
- 
-    // compute trip count 
-    if( incr == 1 ) { 
-        trip_count = upper - lower + 1; 
-    } else if(incr == -1) { 
-        trip_count = lower - upper + 1; 
-    } else { 
-        trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case 
-    } 
-    if( chunk < 1 ) 
-        chunk = 1; 
-    span = chunk * incr; 
-    *p_st = span * nteams; 
-    *p_lb = lower + (span * team_id); 
-    *p_ub = *p_lb + span - incr; 
-    if ( p_last != NULL ) 
-        *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); 
-    // Correct upper bound if needed 
-    if( incr > 0 ) { 
-        if( *p_ub < *p_lb ) // overflow? 
-            *p_ub = i_maxmin< T >::mx; 
-        if( *p_ub > upper ) 
-            *p_ub = upper; // tracker C73258 
-    } else {   // incr < 0 
-        if( *p_ub > *p_lb ) 
-            *p_ub = i_maxmin< T >::mn; 
-        if( *p_ub < upper ) 
-            *p_ub = upper; // tracker C73258 
-    } 
-    #ifdef KMP_DEBUG 
-    { 
-        const char * buff; 
-        // create format specifiers before the debug output 
-        buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ 
-            "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 
-            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, 
-            traits_t< ST >::spec ); 
-        KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); 
-        __kmp_str_free( &buff ); 
-    } 
-    #endif 
-} 
- 
-//-------------------------------------------------------------------------------------- 
-extern "C" { 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param    loc       Source code location 
-@param    gtid      Global thread id of this thread 
-@param    schedtype  Scheduling type 
-@param    plastiter Pointer to the "last iteration" flag 
-@param    plower    Pointer to the lower bound 
-@param    pupper    Pointer to the upper bound 
-@param    pstride   Pointer to the stride 
-@param    incr      Loop increment 
-@param    chunk     The chunk size 
- 
-Each of the four functions here are identical apart from the argument types. 
- 
-The functions compute the upper and lower bounds and stride to be used for the set of iterations 
-to be executed by the current thread from the statically scheduled loop that is described by the 
-initial values of the bounds, stride, increment and chunk size. 
- 
-@{ 
-*/ 
-void 
-__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 
-                      kmp_int32 *plower, kmp_int32 *pupper, 
-                      kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 
-{ 
-    __kmp_for_static_init< kmp_int32 >( 
-                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_for_static_init_4 
- */ 
-void 
-__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 
-                      kmp_uint32 *plower, kmp_uint32 *pupper, 
-                      kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 
-{ 
-    __kmp_for_static_init< kmp_uint32 >( 
-                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_for_static_init_4 
- */ 
-void 
-__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 
-                      kmp_int64 *plower, kmp_int64 *pupper, 
-                      kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 
-{ 
-    __kmp_for_static_init< kmp_int64 >( 
-                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_for_static_init_4 
- */ 
-void 
-__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, 
-                      kmp_uint64 *plower, kmp_uint64 *pupper, 
-                      kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 
-{ 
-    __kmp_for_static_init< kmp_uint64 >( 
-                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); 
-} 
-/*! 
-@} 
-*/ 
- 
-/*! 
-@ingroup WORK_SHARING 
-@param    loc       Source code location 
-@param    gtid      Global thread id of this thread 
-@param    schedule  Scheduling type for the parallel loop 
-@param    plastiter Pointer to the "last iteration" flag 
-@param    plower    Pointer to the lower bound 
-@param    pupper    Pointer to the upper bound of loop chunk 
-@param    pupperD   Pointer to the upper bound of dist_chunk 
-@param    pstride   Pointer to the stride for parallel loop 
-@param    incr      Loop increment 
-@param    chunk     The chunk size for the parallel loop 
- 
-Each of the four functions here are identical apart from the argument types. 
- 
-The functions compute the upper and lower bounds and strides to be used for the set of iterations 
-to be executed by the current thread from the statically scheduled loop that is described by the 
-initial values of the bounds, strides, increment and chunks for parallel loop and distribute 
-constructs. 
- 
-@{ 
-*/ 
-void 
-__kmpc_dist_for_static_init_4( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 
-    kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, 
-    kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 
-{ 
-    __kmp_dist_for_static_init< kmp_int32 >( 
-        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_dist_for_static_init_4 
- */ 
-void 
-__kmpc_dist_for_static_init_4u( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 
-    kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, 
-    kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) 
-{ 
-    __kmp_dist_for_static_init< kmp_uint32 >( 
-        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_dist_for_static_init_4 
- */ 
-void 
-__kmpc_dist_for_static_init_8( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 
-    kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, 
-    kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 
-{ 
-    __kmp_dist_for_static_init< kmp_int64 >( 
-        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_dist_for_static_init_4 
- */ 
-void 
-__kmpc_dist_for_static_init_8u( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, 
-    kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, 
-    kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) 
-{ 
-    __kmp_dist_for_static_init< kmp_uint64 >( 
-        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); 
-} 
-/*! 
-@} 
-*/ 
- 
-//----------------------------------------------------------------------------------------- 
-// Auxiliary routines for Distribute Parallel Loop construct implementation 
-//    Transfer call to template< type T > 
-//    __kmp_team_static_init( ident_t *loc, int gtid, 
-//        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 
- 
-/*! 
-@ingroup WORK_SHARING 
-@{ 
-@param loc Source location 
-@param gtid Global thread id 
-@param p_last pointer to last iteration flag 
-@param p_lb  pointer to Lower bound 
-@param p_ub  pointer to Upper bound 
-@param p_st  Step (or increment if you prefer) 
-@param incr  Loop increment 
-@param chunk The chunk size to block with 
- 
-The functions compute the upper and lower bounds and stride to be used for the set of iterations 
-to be executed by the current team from the statically scheduled loop that is described by the 
-initial values of the bounds, stride, increment and chunk for the distribute construct as part of 
-composite distribute parallel loop construct. 
-These functions are all identical apart from the types of the arguments. 
-*/ 
- 
-void 
-__kmpc_team_static_init_4( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-    kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_team_static_init_4 
- */ 
-void 
-__kmpc_team_static_init_4u( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-    kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_team_static_init_4 
- */ 
-void 
-__kmpc_team_static_init_8( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-    kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 
-} 
- 
-/*! 
- See @ref __kmpc_team_static_init_4 
- */ 
-void 
-__kmpc_team_static_init_8u( 
-    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 
-    kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_init_serial ); 
-    __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); 
-} 
-/*! 
-@} 
-*/ 
- 
-} // extern "C" 
- 
+            if( chunk < 1 )
+                chunk = 1;
+            span = chunk * incr;
+            *pstride = span * nth;
+            *plower = *plower + (span * tid);
+            *pupper = *plower + span - incr;
+            if( plastiter != NULL )
+                if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
+                    *plastiter = 0;
+            break;
+        }
+        default:
+            KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
+            break;
+        }
+    }
+    end:;
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format(
+            "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
+            "stride=%%%s signed?<%s>\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
+            traits_t< ST >::spec, traits_t< T >::spec );
+        KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+    KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
+    return;
+}
+
+template< typename T >
+static void
+__kmp_team_static_init(
+    ident_t                          *loc,
+    kmp_int32                         gtid,
+    kmp_int32                        *p_last,
+    T                                *p_lb,
+    T                                *p_ub,
+    typename traits_t< T >::signed_t *p_st,
+    typename traits_t< T >::signed_t  incr,
+    typename traits_t< T >::signed_t  chunk
+) {
+    // The routine returns the first chunk distributed to the team and
+    // stride for next chunks calculation.
+    // Last iteration flag set for the team that will execute
+    // the last iteration of the loop.
+    // The routine is called for dist_schedue(static,chunk) only.
+    typedef typename traits_t< T >::unsigned_t  UT;
+    typedef typename traits_t< T >::signed_t    ST;
+    kmp_uint32  team_id;
+    kmp_uint32  nteams;
+    UT          trip_count;
+    T           lower;
+    T           upper;
+    ST          span;
+    kmp_team_t *team;
+    kmp_info_t *th;
+
+    KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
+    KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
+            "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
+            traits_t< ST >::spec, traits_t< T >::spec );
+        KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+
+    lower = *p_lb;
+    upper = *p_ub;
+    if( __kmp_env_consistency_check ) {
+        if( incr == 0 ) {
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
+        }
+        if( incr > 0 ? (upper < lower) : (lower < upper) ) {
+            // The loop is illegal.
+            // Some zero-trip loops maintained by compiler, e.g.:
+            //   for(i=10;i<0;++i) // lower >= upper - run-time check
+            //   for(i=0;i>10;--i) // lower <= upper - run-time check
+            //   for(i=0;i>10;++i) // incr > 0       - compile-time check
+            //   for(i=10;i<0;--i) // incr < 0       - compile-time check
+            // Compiler does not check the following illegal loops:
+            //   for(i=0;i<10;i+=incr) // where incr<0
+            //   for(i=10;i>0;i-=incr) // where incr<0
+            __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
+        }
+    }
+    th = __kmp_threads[gtid];
+    team = th->th.th_team;
+    #if OMP_40_ENABLED
+    KMP_DEBUG_ASSERT(th->th.th_teams_microtask);   // we are in the teams construct
+    nteams = th->th.th_teams_size.nteams;
+    #endif
+    team_id = team->t.t_master_tid;
+    KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
+
+    // compute trip count
+    if( incr == 1 ) {
+        trip_count = upper - lower + 1;
+    } else if(incr == -1) {
+        trip_count = lower - upper + 1;
+    } else {
+        trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
+    }
+    if( chunk < 1 )
+        chunk = 1;
+    span = chunk * incr;
+    *p_st = span * nteams;
+    *p_lb = lower + (span * team_id);
+    *p_ub = *p_lb + span - incr;
+    if ( p_last != NULL )
+        *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
+    // Correct upper bound if needed
+    if( incr > 0 ) {
+        if( *p_ub < *p_lb ) // overflow?
+            *p_ub = i_maxmin< T >::mx;
+        if( *p_ub > upper )
+            *p_ub = upper; // tracker C73258
+    } else {   // incr < 0
+        if( *p_ub > *p_lb )
+            *p_ub = i_maxmin< T >::mn;
+        if( *p_ub < upper )
+            *p_ub = upper; // tracker C73258
+    }
+    #ifdef KMP_DEBUG
+    {
+        const char * buff;
+        // create format specifiers before the debug output
+        buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
+            "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
+            traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
+            traits_t< ST >::spec );
+        KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
+        __kmp_str_free( &buff );
+    }
+    #endif
+}
+
+//--------------------------------------------------------------------------------------
+extern "C" {
+
+/*!
+@ingroup WORK_SHARING
+@param    loc       Source code location
+@param    gtid      Global thread id of this thread
+@param    schedtype  Scheduling type
+@param    plastiter Pointer to the "last iteration" flag
+@param    plower    Pointer to the lower bound
+@param    pupper    Pointer to the upper bound
+@param    pstride   Pointer to the stride
+@param    incr      Loop increment
+@param    chunk     The chunk size
+
+Each of the four functions here are identical apart from the argument types.
+
+The functions compute the upper and lower bounds and stride to be used for the set of iterations
+to be executed by the current thread from the statically scheduled loop that is described by the
+initial values of the bounds, stride, increment and chunk size.
+
+@{
+*/
+void
+__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
+                      kmp_int32 *plower, kmp_int32 *pupper,
+                      kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
+{
+    __kmp_for_static_init< kmp_int32 >(
+                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_for_static_init_4
+ */
+void
+__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
+                      kmp_uint32 *plower, kmp_uint32 *pupper,
+                      kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
+{
+    __kmp_for_static_init< kmp_uint32 >(
+                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_for_static_init_4
+ */
+void
+__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
+                      kmp_int64 *plower, kmp_int64 *pupper,
+                      kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
+{
+    __kmp_for_static_init< kmp_int64 >(
+                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_for_static_init_4
+ */
+void
+__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
+                      kmp_uint64 *plower, kmp_uint64 *pupper,
+                      kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
+{
+    __kmp_for_static_init< kmp_uint64 >(
+                      loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
+}
+/*!
+@}
+*/
+
+/*!
+@ingroup WORK_SHARING
+@param    loc       Source code location
+@param    gtid      Global thread id of this thread
+@param    schedule  Scheduling type for the parallel loop
+@param    plastiter Pointer to the "last iteration" flag
+@param    plower    Pointer to the lower bound
+@param    pupper    Pointer to the upper bound of loop chunk
+@param    pupperD   Pointer to the upper bound of dist_chunk
+@param    pstride   Pointer to the stride for parallel loop
+@param    incr      Loop increment
+@param    chunk     The chunk size for the parallel loop
+
+Each of the four functions here are identical apart from the argument types.
+
+The functions compute the upper and lower bounds and strides to be used for the set of iterations
+to be executed by the current thread from the statically scheduled loop that is described by the
+initial values of the bounds, strides, increment and chunks for parallel loop and distribute
+constructs.
+
+@{
+*/
+void
+__kmpc_dist_for_static_init_4(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
+    kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
+    kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
+{
+    __kmp_dist_for_static_init< kmp_int32 >(
+        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_dist_for_static_init_4
+ */
+void
+__kmpc_dist_for_static_init_4u(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
+    kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
+    kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
+{
+    __kmp_dist_for_static_init< kmp_uint32 >(
+        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_dist_for_static_init_4
+ */
+void
+__kmpc_dist_for_static_init_8(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
+    kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
+    kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
+{
+    __kmp_dist_for_static_init< kmp_int64 >(
+        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_dist_for_static_init_4
+ */
+void
+__kmpc_dist_for_static_init_8u(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
+    kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
+    kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
+{
+    __kmp_dist_for_static_init< kmp_uint64 >(
+        loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
+}
+/*!
+@}
+*/
+
+//-----------------------------------------------------------------------------------------
+// Auxiliary routines for Distribute Parallel Loop construct implementation
+//    Transfer call to template< type T >
+//    __kmp_team_static_init( ident_t *loc, int gtid,
+//        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
+
+/*!
+@ingroup WORK_SHARING
+@{
+@param loc Source location
+@param gtid Global thread id
+@param p_last pointer to last iteration flag
+@param p_lb  pointer to Lower bound
+@param p_ub  pointer to Upper bound
+@param p_st  Step (or increment if you prefer)
+@param incr  Loop increment
+@param chunk The chunk size to block with
+
+The functions compute the upper and lower bounds and stride to be used for the set of iterations
+to be executed by the current team from the statically scheduled loop that is described by the
+initial values of the bounds, stride, increment and chunk for the distribute construct as part of
+composite distribute parallel loop construct.
+These functions are all identical apart from the types of the arguments.
+*/
+
+void
+__kmpc_team_static_init_4(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+    kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_team_static_init_4
+ */
+void
+__kmpc_team_static_init_4u(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+    kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_team_static_init_4
+ */
+void
+__kmpc_team_static_init_8(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+    kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
+}
+
+/*!
+ See @ref __kmpc_team_static_init_4
+ */
+void
+__kmpc_team_static_init_8u(
+    ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
+    kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
+{
+    KMP_DEBUG_ASSERT( __kmp_init_serial );
+    __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
+}
+/*!
+@}
+*/
+
+} // extern "C"
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_settings.c b/contrib/libs/cxxsupp/openmp/kmp_settings.c
index 2bc312f6f1..067574f242 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_settings.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_settings.c
@@ -1,5469 +1,5469 @@
-/* 
- * kmp_settings.c -- Initialize environment variables 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_wrapper_getpid.h" 
-#include "kmp_environment.h" 
-#include "kmp_atomic.h" 
-#include "kmp_itt.h" 
-#include "kmp_str.h" 
-#include "kmp_settings.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
- 
-static int __kmp_env_toPrint( char const * name, int flag ); 
- 
-bool __kmp_env_format = 0; // 0 - old format; 1 - new format 
-// ------------------------------------------------------------------------------------------------- 
-// Helper string functions. Subject to move to kmp_str. 
-// ------------------------------------------------------------------------------------------------- 
- 
-static double 
-__kmp_convert_to_double( char const * s ) 
-{ 
-    double result; 
- 
-    if ( KMP_SSCANF( s, "%lf", &result ) < 1 ) { 
-        result = 0.0; 
-    } 
- 
-    return result; 
-} 
- 
-#ifdef KMP_DEBUG 
-static unsigned int 
-__kmp_readstr_with_sentinel(char *dest, char const * src, size_t len, char sentinel) { 
-    unsigned int i; 
-    for (i = 0; i < len; i++) { 
-        if ((*src == '\0') || (*src == sentinel)) { 
-            break; 
-        } 
-        *(dest++) = *(src++); 
-    } 
-    *dest = '\0'; 
-    return i; 
-} 
-#endif 
- 
-static int 
-__kmp_match_with_sentinel( char const * a, char const * b, size_t len, char sentinel ) { 
-    size_t l = 0; 
- 
-    if(a == NULL) 
-        a = ""; 
-    if(b == NULL) 
-        b = ""; 
-    while(*a && *b && *b != sentinel) { 
-        char ca = *a, cb = *b; 
- 
-        if(ca >= 'a' && ca <= 'z') 
-            ca -= 'a' - 'A'; 
-        if(cb >= 'a' && cb <= 'z') 
-            cb -= 'a' - 'A'; 
-        if(ca != cb) 
-            return FALSE; 
-        ++l; 
-        ++a; 
-        ++b; 
-    } 
-    return l >= len; 
-} 
- 
-// 
-// Expected usage: 
-//     token is the token to check for. 
-//     buf is the string being parsed. 
-//     *end returns the char after the end of the token. 
-//        it is not modified unless a match occurs. 
-// 
-// 
-// Example 1: 
-// 
-//     if (__kmp_match_str("token", buf, *end) { 
-//         <do something> 
-//         buf = end; 
-//     } 
-// 
-//  Example 2: 
-// 
-//     if (__kmp_match_str("token", buf, *end) { 
-//         char *save = **end; 
-//         **end = sentinel; 
-//         <use any of the __kmp*_with_sentinel() functions> 
-//         **end = save; 
-//         buf = end; 
-//     } 
-// 
- 
-static int 
-__kmp_match_str( char const *token, char const *buf, const char **end) { 
- 
-    KMP_ASSERT(token != NULL); 
-    KMP_ASSERT(buf != NULL); 
-    KMP_ASSERT(end != NULL); 
- 
-    while (*token && *buf) { 
-        char ct = *token, cb = *buf; 
- 
-        if(ct >= 'a' && ct <= 'z') 
-            ct -= 'a' - 'A'; 
-        if(cb >= 'a' && cb <= 'z') 
-            cb -= 'a' - 'A'; 
-        if (ct != cb) 
-            return FALSE; 
-        ++token; 
-        ++buf; 
-    } 
-    if (*token) { 
-        return FALSE; 
-    } 
-    *end = buf; 
-    return TRUE; 
-} 
- 
- 
-static size_t 
-__kmp_round4k( size_t size ) { 
-    size_t _4k = 4 * 1024; 
-    if ( size & ( _4k - 1 ) ) { 
-        size &= ~ ( _4k - 1 ); 
-        if ( size <= KMP_SIZE_T_MAX - _4k ) { 
-            size += _4k;    // Round up if there is no overflow. 
-        }; // if 
-    }; // if 
-    return size; 
-} // __kmp_round4k 
- 
- 
-/* 
-    Here, multipliers are like __kmp_convert_to_seconds, but floating-point 
-    values are allowed, and the return value is in milliseconds.  The default 
-    multiplier is milliseconds.  Returns INT_MAX only if the value specified 
-    matches "infinit*".  Returns -1 if specified string is invalid. 
-*/ 
-int 
-__kmp_convert_to_milliseconds( char const * data ) 
-{ 
-    int ret, nvalues, factor; 
-    char mult, extra; 
-    double value; 
- 
-    if (data == NULL) return (-1); 
-    if ( __kmp_str_match( "infinit", -1, data)) return (INT_MAX); 
-    value = (double) 0.0; 
-    mult = '\0'; 
-    nvalues = KMP_SSCANF (data, "%lf%c%c", &value, &mult, &extra); 
-    if (nvalues < 1) return (-1); 
-    if (nvalues == 1) mult = '\0'; 
-    if (nvalues == 3) return (-1); 
- 
-    if (value < 0)    return (-1); 
- 
-    switch (mult) { 
-    case '\0': 
-        /*  default is milliseconds  */ 
-        factor = 1; 
-        break; 
-    case 's': case 'S': 
-        factor = 1000; 
-        break; 
-    case 'm': case 'M': 
-        factor = 1000 * 60; 
-        break; 
-    case 'h': case 'H': 
-        factor = 1000 * 60 * 60; 
-        break; 
-    case 'd': case 'D': 
-        factor = 1000 * 24 * 60 * 60; 
-        break; 
-    default: 
-        return (-1); 
-    } 
- 
-    if ( value >= ( (INT_MAX-1) / factor) ) 
-        ret = INT_MAX-1;        /* Don't allow infinite value here */ 
-    else 
-        ret = (int) (value * (double) factor);  /* truncate to int  */ 
- 
-    return ret; 
-} 
- 
- 
-static int 
-__kmp_strcasecmp_with_sentinel( char const * a, char const * b, char sentinel ) { 
-    if(a == NULL) 
-        a = ""; 
-    if(b == NULL) 
-        b = ""; 
-    while(*a && *b && *b != sentinel) { 
-        char ca = *a, cb = *b; 
- 
-        if(ca >= 'a' && ca <= 'z') 
-            ca -= 'a' - 'A'; 
-        if(cb >= 'a' && cb <= 'z') 
-            cb -= 'a' - 'A'; 
-        if(ca != cb) 
-            return (int)(unsigned char)*a - (int)(unsigned char)*b; 
-        ++a; 
-        ++b; 
-    } 
-    return *a ? 
-        (*b && *b != sentinel) ? (int)(unsigned char)*a - (int)(unsigned char)*b : 1 : 
-        (*b && *b != sentinel) ? -1 : 0; 
-} 
- 
- 
-// ================================================================================================= 
-// Table structures and helper functions. 
-// ================================================================================================= 
- 
-typedef struct __kmp_setting        kmp_setting_t; 
-typedef struct __kmp_stg_ss_data    kmp_stg_ss_data_t; 
-typedef struct __kmp_stg_wp_data    kmp_stg_wp_data_t; 
-typedef struct __kmp_stg_fr_data    kmp_stg_fr_data_t; 
- 
-typedef void ( * kmp_stg_parse_func_t )( char const * name, char const * value, void * data ); 
-typedef void ( * kmp_stg_print_func_t )( kmp_str_buf_t * buffer, char const * name, void * data ); 
- 
-struct __kmp_setting { 
-    char const *         name;        // Name of setting (environment variable). 
-    kmp_stg_parse_func_t parse;       // Parser function. 
-    kmp_stg_print_func_t print;       // Print function. 
-    void *               data;        // Data passed to parser and printer. 
-    int                  set;         // Variable set during this "session" 
-                                      //     (__kmp_env_initialize() or kmp_set_defaults() call). 
-    int                  defined;     // Variable set in any "session". 
-}; // struct __kmp_setting 
- 
-struct __kmp_stg_ss_data { 
-    size_t             factor;  // Default factor: 1 for KMP_STACKSIZE, 1024 for others. 
-    kmp_setting_t * *  rivals;  // Array of pointers to rivals (including itself). 
-}; // struct __kmp_stg_ss_data 
- 
-struct __kmp_stg_wp_data { 
-    int                omp;     // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY. 
-    kmp_setting_t * *  rivals;  // Array of pointers to rivals (including itself). 
-}; // struct __kmp_stg_wp_data 
- 
-struct __kmp_stg_fr_data { 
-    int                force;  // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION. 
-    kmp_setting_t * *  rivals;  // Array of pointers to rivals (including itself). 
-}; // struct __kmp_stg_fr_data 
- 
-static int 
-__kmp_stg_check_rivals(          // 0 -- Ok, 1 -- errors found. 
-    char const *       name,     // Name of variable. 
-    char const *       value,    // Value of the variable. 
-    kmp_setting_t * *  rivals    // List of rival settings (the list must include current one). 
-); 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Helper parse functions. 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_bool( 
-    char const * name, 
-    char const * value, 
-    int *        out 
-) { 
-    if ( __kmp_str_match_true( value ) ) { 
-        * out = TRUE; 
-    } else if (__kmp_str_match_false( value ) ) { 
-        * out = FALSE; 
-    } else { 
-        __kmp_msg( 
-            kmp_ms_warning, 
-            KMP_MSG( BadBoolValue, name, value ), 
-            KMP_HNT( ValidBoolValues ), 
-            __kmp_msg_null 
-        ); 
-    }; // if 
-} // __kmp_stg_parse_bool 
- 
-static void 
-__kmp_stg_parse_size( 
-    char const * name, 
-    char const * value, 
-    size_t       size_min, 
-    size_t       size_max, 
-    int *        is_specified, 
-    size_t *     out, 
-    size_t       factor 
-) { 
-    char const * msg = NULL; 
-    #if KMP_OS_DARWIN 
-        size_min = __kmp_round4k( size_min ); 
-        size_max = __kmp_round4k( size_max ); 
-    #endif // KMP_OS_DARWIN 
-    if ( value ) { 
-        if ( is_specified != NULL ) { 
-            * is_specified = 1; 
-        }; // if 
-        __kmp_str_to_size( value, out, factor, & msg ); 
-        if ( msg == NULL ) { 
-            if ( * out > size_max ) { 
-                * out = size_max; 
-                msg = KMP_I18N_STR( ValueTooLarge ); 
-            } else if ( * out < size_min ) { 
-                * out = size_min; 
-                msg = KMP_I18N_STR( ValueTooSmall ); 
-            } else { 
-                #if KMP_OS_DARWIN 
-                    size_t round4k = __kmp_round4k( * out ); 
-                    if ( * out != round4k ) { 
-                        * out = round4k; 
-                        msg = KMP_I18N_STR( NotMultiple4K ); 
-                    }; // if 
-                #endif 
-            }; // if 
-        } else { 
-            // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to size_max silently. 
-            if ( * out < size_min ) { 
-                * out = size_max; 
-            } 
-            else if ( * out >  size_max ) { 
-                * out = size_max; 
-            }; // if 
-        }; // if 
-        if ( msg != NULL ) { 
-            // Message is not empty. Print warning. 
-            kmp_str_buf_t buf; 
-            __kmp_str_buf_init( & buf ); 
-            __kmp_str_buf_print_size( & buf, * out ); 
-            KMP_WARNING( ParseSizeIntWarn, name, value, msg ); 
-            KMP_INFORM( Using_str_Value, name, buf.str ); 
-            __kmp_str_buf_free( & buf ); 
-        }; // if 
-    }; // if 
-} // __kmp_stg_parse_size 
- 
-#if KMP_AFFINITY_SUPPORTED 
-static void 
-__kmp_stg_parse_str( 
-    char const *      name, 
-    char const *      value, 
-    char const * *    out 
-) { 
-    KMP_INTERNAL_FREE( (void *) * out ); 
-    * out = __kmp_str_format( "%s", value ); 
-} // __kmp_stg_parse_str 
-#endif 
- 
-static void 
-__kmp_stg_parse_int( 
-    char const * name,   // I: Name of environment variable (used in warning messages). 
-    char const * value,  // I: Value of environment variable to parse. 
-    int          min,    // I: Miminal allowed value. 
-    int          max,    // I: Maximum allowed value. 
-    int *        out     // O: Output (parsed) value. 
-) { 
-    char const * msg  = NULL; 
-    kmp_uint64   uint = * out; 
-    __kmp_str_to_uint( value, & uint, & msg ); 
-    if ( msg == NULL ) { 
-        if ( uint < (unsigned int)min ) { 
-            msg = KMP_I18N_STR( ValueTooSmall ); 
-            uint = min; 
-        } else if ( uint > (unsigned int)max ) { 
-            msg = KMP_I18N_STR( ValueTooLarge ); 
-            uint = max; 
-        }; // if 
-    } else { 
-        // If overflow occurred msg contains error message and uint is very big. Cut tmp it 
-        // to INT_MAX. 
-        if ( uint < (unsigned int)min ) { 
-            uint = min; 
-        } 
-        else if ( uint > (unsigned int)max ) { 
-            uint = max; 
-        }; // if 
-    }; // if 
-    if ( msg != NULL ) { 
-        // Message is not empty. Print warning. 
-        kmp_str_buf_t buf; 
-        KMP_WARNING( ParseSizeIntWarn, name, value, msg ); 
-        __kmp_str_buf_init( & buf ); 
-        __kmp_str_buf_print( &buf, "%" KMP_UINT64_SPEC "", uint ); 
-        KMP_INFORM( Using_uint64_Value, name, buf.str ); 
-        __kmp_str_buf_free( &buf ); 
-    }; // if 
-    * out = uint; 
-} // __kmp_stg_parse_int 
- 
- 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-static void 
-__kmp_stg_parse_file( 
-    char const * name, 
-    char const * value, 
-    char *       suffix, 
-    char * *     out 
-) { 
-    char buffer[256]; 
-    char *t; 
-    int hasSuffix; 
-    KMP_INTERNAL_FREE( (void *) * out ); 
-    t = (char *) strrchr(value, '.'); 
-    hasSuffix = t && __kmp_str_eqf( t, suffix ); 
-    t = __kmp_str_format( "%s%s", value, hasSuffix ? "" : suffix ); 
-    __kmp_expand_file_name( buffer, sizeof(buffer), t); 
-    KMP_INTERNAL_FREE(t); 
-    * out = __kmp_str_format( "%s", buffer ); 
-} // __kmp_stg_parse_file 
-#endif 
- 
-#ifdef KMP_DEBUG 
-static char * par_range_to_print = NULL; 
- 
-static void 
-__kmp_stg_parse_par_range( 
-    char const * name, 
-    char const * value, 
-    int *        out_range, 
-    char *       out_routine, 
-    char *       out_file, 
-    int *        out_lb, 
-    int *        out_ub 
-) { 
-    size_t len = KMP_STRLEN( value + 1 ); 
-    par_range_to_print = (char *) KMP_INTERNAL_MALLOC( len +1 ); 
-    KMP_STRNCPY_S( par_range_to_print, len + 1, value, len + 1); 
-    __kmp_par_range = +1; 
-    __kmp_par_range_lb = 0; 
-    __kmp_par_range_ub = INT_MAX; 
-    for (;;) { 
-        unsigned int len; 
-        if (( value == NULL ) || ( *value == '\0' )) { 
-            break; 
-        } 
-        if ( ! __kmp_strcasecmp_with_sentinel( "routine", value, '=' )) { 
-            value = strchr( value, '=' ) + 1; 
-            len = __kmp_readstr_with_sentinel( out_routine, 
-              value, KMP_PAR_RANGE_ROUTINE_LEN - 1, ',' ); 
-            if ( len == 0 ) { 
-                goto par_range_error; 
-            } 
-            value = strchr( value, ',' ); 
-            if ( value != NULL ) { 
-                value++; 
-            } 
-            continue; 
-        } 
-        if ( ! __kmp_strcasecmp_with_sentinel( "filename", value, '=' )) { 
-            value = strchr( value, '=' ) + 1; 
-            len = __kmp_readstr_with_sentinel( out_file, 
-              value, KMP_PAR_RANGE_FILENAME_LEN - 1, ',' ); 
-            if ( len == 0) { 
-                goto par_range_error; 
-            } 
-            value = strchr( value, ',' ); 
-            if ( value != NULL ) { 
-                value++; 
-            } 
-            continue; 
-        } 
-        if (( ! __kmp_strcasecmp_with_sentinel( "range", value, '=' )) 
-          || ( ! __kmp_strcasecmp_with_sentinel( "incl_range", value, '=' ))) { 
-            value = strchr( value, '=' ) + 1; 
-            if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub ) != 2 ) { 
-                goto par_range_error; 
-            } 
-            *out_range = +1; 
-            value = strchr( value, ',' ); 
-            if ( value != NULL ) { 
-                value++; 
-            } 
-            continue; 
-        } 
-        if ( ! __kmp_strcasecmp_with_sentinel( "excl_range", value, '=' )) { 
-            value = strchr( value, '=' ) + 1; 
-            if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub) != 2 ) { 
-                goto par_range_error; 
-            } 
-            *out_range = -1; 
-            value = strchr( value, ',' ); 
-            if ( value != NULL ) { 
-                value++; 
-            } 
-            continue; 
-        } 
-        par_range_error: 
-        KMP_WARNING( ParRangeSyntax, name ); 
-        __kmp_par_range = 0; 
-        break; 
-    } 
-} // __kmp_stg_parse_par_range 
-#endif 
- 
-int 
-__kmp_initial_threads_capacity( int req_nproc ) 
-{ 
-    int nth = 32; 
- 
-    /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth) */ 
-    if (nth < (4 * req_nproc)) 
-        nth = (4 * req_nproc); 
-    if (nth < (4 * __kmp_xproc)) 
-        nth = (4 * __kmp_xproc); 
- 
-    if (nth > __kmp_max_nth) 
-        nth = __kmp_max_nth; 
- 
-    return nth; 
-} 
- 
- 
-int 
-__kmp_default_tp_capacity( int req_nproc, int max_nth, int all_threads_specified) { 
-    int nth = 128; 
- 
-    if(all_threads_specified) 
-        return max_nth; 
-    /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth ) */ 
-    if (nth < (4 * req_nproc)) 
-        nth = (4 * req_nproc); 
-    if (nth < (4 * __kmp_xproc)) 
-        nth = (4 * __kmp_xproc); 
- 
-    if (nth > __kmp_max_nth) 
-        nth = __kmp_max_nth; 
- 
-    return nth; 
-} 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Helper print functions. 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_print_bool( kmp_str_buf_t * buffer, char const * name, int value ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_BOOL; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s=%s\n", name, value ? "true" : "false" ); 
-    } 
-} // __kmp_stg_print_bool 
- 
-static void 
-__kmp_stg_print_int( kmp_str_buf_t * buffer, char const * name, int value ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_INT; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s=%d\n", name, value ); 
-    } 
-} // __kmp_stg_print_int 
- 
-static void 
-__kmp_stg_print_uint64( kmp_str_buf_t * buffer, char const * name, kmp_uint64 value ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_UINT64; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s=%" KMP_UINT64_SPEC "\n", name, value ); 
-    } 
-} // __kmp_stg_print_uint64 
- 
-static void 
-__kmp_stg_print_str( kmp_str_buf_t * buffer, char const * name, char const * value ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_STR; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s=%s\n", name, value ); 
-    } 
-} // __kmp_stg_print_str 
- 
-static void 
-__kmp_stg_print_size( kmp_str_buf_t * buffer, char const * name, size_t value ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME_EX(name); 
-        __kmp_str_buf_print_size( buffer, value ); 
-        __kmp_str_buf_print( buffer, "'\n" ); 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s=", name ); 
-        __kmp_str_buf_print_size( buffer, value ); 
-        __kmp_str_buf_print( buffer, "\n" ); 
-        return; 
-    } 
-} // __kmp_stg_print_size 
- 
- 
-// ================================================================================================= 
-// Parse and print functions. 
-// ================================================================================================= 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_all_threads( char const * name, char const * value, void * data ) { 
- 
-    kmp_setting_t * * rivals = (kmp_setting_t * *) data; 
-    int               rc; 
-    rc = __kmp_stg_check_rivals( name, value, rivals ); 
-    if ( rc ) { 
-        return; 
-    }; // if 
-    if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { 
-        __kmp_max_nth = __kmp_xproc; 
-        __kmp_allThreadsSpecified = 1; 
-    } else { 
-        __kmp_stg_parse_int( name, value, 1, __kmp_sys_max_nth, & __kmp_max_nth ); 
-        __kmp_allThreadsSpecified = 0; 
-    } 
-    K_DIAG( 1, ( "__kmp_max_nth == %d\n", __kmp_max_nth ) ); 
- 
-} // __kmp_stg_parse_all_threads 
- 
-static void 
-__kmp_stg_print_all_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_max_nth ); 
-} // __kmp_stg_print_all_threads 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_BLOCKTIME 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_blocktime( char const * name, char const * value, void * data ) { 
-    __kmp_dflt_blocktime = __kmp_convert_to_milliseconds( value ); 
-    if ( __kmp_dflt_blocktime < 0 ) { 
-        __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 
-        __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidValue, name, value ), __kmp_msg_null ); 
-        KMP_INFORM( Using_int_Value, name, __kmp_dflt_blocktime ); 
-        __kmp_env_blocktime = FALSE;  // Revert to default as if var not set. 
-    } else { 
-        if ( __kmp_dflt_blocktime < KMP_MIN_BLOCKTIME ) { 
-            __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME; 
-            __kmp_msg( kmp_ms_warning, KMP_MSG( SmallValue, name, value ), __kmp_msg_null ); 
-            KMP_INFORM( MinValueUsing, name, __kmp_dflt_blocktime ); 
-        } else if ( __kmp_dflt_blocktime > KMP_MAX_BLOCKTIME ) { 
-            __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; 
-            __kmp_msg( kmp_ms_warning, KMP_MSG( LargeValue, name, value ), __kmp_msg_null ); 
-            KMP_INFORM( MaxValueUsing, name, __kmp_dflt_blocktime ); 
-        }; // if 
-        __kmp_env_blocktime = TRUE;    // KMP_BLOCKTIME was specified. 
-    }; // if 
-    // calculate number of monitor thread wakeup intervals corresonding to blocktime. 
-    __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 
-    __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 
-    K_DIAG( 1, ( "__kmp_env_blocktime == %d\n", __kmp_env_blocktime ) ); 
-    if ( __kmp_env_blocktime ) { 
-        K_DIAG( 1, ( "__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime ) ); 
-    } 
-} // __kmp_stg_parse_blocktime 
- 
-static void 
-__kmp_stg_print_blocktime( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_dflt_blocktime ); 
-} // __kmp_stg_print_blocktime 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_DUPLICATE_LIB_OK 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_duplicate_lib_ok( char const * name, char const * value, void * data ) { 
-    /* actually this variable is not supported, 
-       put here for compatibility with earlier builds and for static/dynamic combination */ 
-    __kmp_stg_parse_bool( name, value, & __kmp_duplicate_library_ok ); 
-} // __kmp_stg_parse_duplicate_lib_ok 
- 
-static void 
-__kmp_stg_print_duplicate_lib_ok( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_duplicate_library_ok ); 
-} // __kmp_stg_print_duplicate_lib_ok 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_INHERIT_FP_CONTROL 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-static void 
-__kmp_stg_parse_inherit_fp_control( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_inherit_fp_control ); 
-} // __kmp_stg_parse_inherit_fp_control 
- 
-static void 
-__kmp_stg_print_inherit_fp_control( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-#if KMP_DEBUG 
-    __kmp_stg_print_bool( buffer, name, __kmp_inherit_fp_control ); 
-#endif /* KMP_DEBUG */ 
-} // __kmp_stg_print_inherit_fp_control 
- 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_LIBRARY, OMP_WAIT_POLICY 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_wait_policy( char const * name, char const * value, void * data ) { 
- 
-    kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; 
-    int                 rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, wait->rivals ); 
-    if ( rc ) { 
-        return; 
-    }; // if 
- 
-    if ( wait->omp ) { 
-        if ( __kmp_str_match( "ACTIVE", 1, value ) ) { 
-           __kmp_library = library_turnaround; 
-        } else if ( __kmp_str_match( "PASSIVE", 1, value ) ) { 
-           __kmp_library = library_throughput; 
-        } else { 
-            KMP_WARNING( StgInvalidValue, name, value ); 
-        }; // if 
-    } else { 
-        if ( __kmp_str_match( "serial", 1, value ) ) {             /* S */ 
-           __kmp_library = library_serial; 
-        } else if ( __kmp_str_match( "throughput", 2, value ) ) {  /* TH */ 
-           __kmp_library = library_throughput; 
-        } else if ( __kmp_str_match( "turnaround", 2, value ) ) {  /* TU */ 
-           __kmp_library = library_turnaround; 
-        } else if ( __kmp_str_match( "dedicated", 1, value ) ) {   /* D */ 
-           __kmp_library = library_turnaround; 
-        } else if ( __kmp_str_match( "multiuser", 1, value ) ) {   /* M */ 
-           __kmp_library = library_throughput; 
-        } else { 
-            KMP_WARNING( StgInvalidValue, name, value ); 
-        }; // if 
-    }; // if 
-    __kmp_aux_set_library( __kmp_library ); 
- 
-} // __kmp_stg_parse_wait_policy 
- 
-static void 
-__kmp_stg_print_wait_policy( kmp_str_buf_t * buffer, char const * name, void * data ) { 
- 
-    kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; 
-    char const *        value = NULL; 
- 
-    if ( wait->omp ) { 
-        switch ( __kmp_library ) { 
-            case library_turnaround : { 
-                value = "ACTIVE"; 
-            } break; 
-            case library_throughput : { 
-                value = "PASSIVE"; 
-            } break; 
-        }; // switch 
-    } else { 
-        switch ( __kmp_library ) { 
-            case library_serial : { 
-                value = "serial"; 
-            } break; 
-            case library_turnaround : { 
-                value = "turnaround"; 
-            } break; 
-            case library_throughput : { 
-                value = "throughput"; 
-            } break; 
-        }; // switch 
-    }; // if 
-    if ( value != NULL ) { 
-        __kmp_stg_print_str( buffer, name, value ); 
-    }; // if 
- 
-} // __kmp_stg_print_wait_policy 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_MONITOR_STACKSIZE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_monitor_stacksize( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_size( 
-        name, 
-        value, 
-        __kmp_sys_min_stksize, 
-        KMP_MAX_STKSIZE, 
-        NULL, 
-        & __kmp_monitor_stksize, 
-        1 
-    ); 
-} // __kmp_stg_parse_monitor_stacksize 
- 
-static void 
-__kmp_stg_print_monitor_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if( __kmp_env_format ) { 
-        if (  __kmp_monitor_stksize > 0 ) 
-            KMP_STR_BUF_PRINT_NAME_EX(name); 
-        else 
-            KMP_STR_BUF_PRINT_NAME; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s", name ); 
-    } 
-    if (  __kmp_monitor_stksize > 0 ) { 
-        __kmp_str_buf_print_size( buffer, __kmp_monitor_stksize ); 
-    } else { 
-        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-    } 
-    if( __kmp_env_format && __kmp_monitor_stksize ) { 
-            __kmp_str_buf_print( buffer, "'\n"); 
-    } 
- 
-} // __kmp_stg_print_monitor_stacksize 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_SETTINGS 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_settings( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_settings ); 
-} // __kmp_stg_parse_settings 
- 
-static void 
-__kmp_stg_print_settings( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_settings ); 
-} // __kmp_stg_print_settings 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_STACKPAD 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_stackpad( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( 
-        name,                             // Env var name 
-        value,                            // Env var value 
-        KMP_MIN_STKPADDING,               // Min value 
-        KMP_MAX_STKPADDING,               // Max value 
-        & __kmp_stkpadding                // Var to initialize 
-    ); 
-} // __kmp_stg_parse_stackpad 
- 
-static void 
-__kmp_stg_print_stackpad( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_stkpadding ); 
-} // __kmp_stg_print_stackpad 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_STACKOFFSET 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_stackoffset( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_size( 
-        name,                             // Env var name 
-        value,                            // Env var value 
-        KMP_MIN_STKOFFSET,                // Min value 
-        KMP_MAX_STKOFFSET,                // Max value 
-        NULL,                             // 
-        & __kmp_stkoffset,                // Var to initialize 
-        1 
-    ); 
-} // __kmp_stg_parse_stackoffset 
- 
-static void 
-__kmp_stg_print_stackoffset( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_size( buffer, name, __kmp_stkoffset ); 
-} // __kmp_stg_print_stackoffset 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_STACKSIZE, OMP_STACKSIZE, GOMP_STACKSIZE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_stacksize( char const * name, char const * value, void * data ) { 
- 
-    kmp_stg_ss_data_t *  stacksize = (kmp_stg_ss_data_t *) data; 
-    int                  rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, stacksize->rivals ); 
-    if ( rc ) { 
-        return; 
-    }; // if 
-    __kmp_stg_parse_size( 
-        name,                     // Env var name 
-        value,                    // Env var value 
-        __kmp_sys_min_stksize,    // Min value 
-        KMP_MAX_STKSIZE,          // Max value 
-        & __kmp_env_stksize,      // 
-        & __kmp_stksize,          // Var to initialize 
-        stacksize->factor 
-    ); 
- 
-} // __kmp_stg_parse_stacksize 
- 
-// This function is called for printing both KMP_STACKSIZE (factor is 1) and OMP_STACKSIZE (factor is 1024). 
-// Currently it is not possible to print OMP_STACKSIZE value in bytes. We can consider adding this 
-// possibility by a customer request in future. 
-static void 
-__kmp_stg_print_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    kmp_stg_ss_data_t *  stacksize = (kmp_stg_ss_data_t *) data; 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME_EX(name); 
-        __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); 
-        __kmp_str_buf_print( buffer, "'\n" ); 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s=", name ); 
-        __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); 
-        __kmp_str_buf_print( buffer, "\n" ); 
-    } 
-} // __kmp_stg_print_stacksize 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_VERSION 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_version( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_version ); 
-} // __kmp_stg_parse_version 
- 
-static void 
-__kmp_stg_print_version( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_version ); 
-} // __kmp_stg_print_version 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_WARNINGS 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_warnings( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_generate_warnings ); 
-    if (__kmp_generate_warnings != kmp_warnings_off) {   // AC: we have only 0/1 values documented, 
-        __kmp_generate_warnings = kmp_warnings_explicit; //     so reset it to explicit in order to 
-    }                                                    //     distinguish from default setting 
-} // __kmp_env_parse_warnings 
- 
-static void 
-__kmp_stg_print_warnings( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_generate_warnings ); // AC: TODO: change to print_int? 
-} // __kmp_env_print_warnings                                      //     (needs documentation change)... 
- 
-// ------------------------------------------------------------------------------------------------- 
-// OMP_NESTED, OMP_NUM_THREADS 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_nested( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_dflt_nested ); 
-} // __kmp_stg_parse_nested 
- 
-static void 
-__kmp_stg_print_nested( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_dflt_nested ); 
-} // __kmp_stg_print_nested 
- 
-static void 
-__kmp_parse_nested_num_threads( const char *var, const char *env, kmp_nested_nthreads_t *nth_array ) 
-{ 
-    const char *next = env; 
-    const char *scan = next; 
- 
-    int total = 0;          // Count elements that were set. It'll be used as an array size 
-    int prev_comma = FALSE; // For correct processing sequential commas 
- 
-    // Count the number of values in the env. var string 
-    for ( ; ; ) { 
-        SKIP_WS( next ); 
- 
-        if ( *next == '\0' ) { 
-            break; 
-        } 
-        // Next character is not an integer or not a comma => end of list 
-        if ( ( ( *next < '0' ) || ( *next > '9' ) ) && ( *next !=',') ) { 
-            KMP_WARNING( NthSyntaxError, var, env ); 
-            return; 
-        } 
-        // The next character is ',' 
-        if ( *next == ',' ) { 
-            // ',' is the fisrt character 
-            if ( total == 0 || prev_comma ) { 
-                total++; 
-            } 
-            prev_comma = TRUE; 
-            next++; //skip ',' 
-            SKIP_WS( next ); 
-        } 
-        // Next character is a digit 
-        if ( *next >= '0' && *next <= '9' ) { 
-            prev_comma = FALSE; 
-            SKIP_DIGITS( next ); 
-            total++; 
-            const char *tmp = next; 
-            SKIP_WS( tmp ); 
-            if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { 
-                KMP_WARNING( NthSpacesNotAllowed, var, env ); 
-                return; 
-            } 
-        } 
-    } 
-    KMP_DEBUG_ASSERT( total > 0 ); 
-    if( total <= 0 ) { 
-        KMP_WARNING( NthSyntaxError, var, env ); 
-        return; 
-    } 
- 
-    // Check if the nested nthreads array exists 
-    if ( ! nth_array->nth ) { 
-        // Allocate an array of double size 
-        nth_array->nth = ( int * )KMP_INTERNAL_MALLOC( sizeof( int ) * total * 2 ); 
-        if ( nth_array->nth == NULL ) { 
-            KMP_FATAL( MemoryAllocFailed ); 
-        } 
-        nth_array->size = total * 2; 
-    } else { 
-        if ( nth_array->size < total ) { 
-            // Increase the array size 
-            do { 
-                nth_array->size *= 2; 
-            } while ( nth_array->size < total ); 
- 
-            nth_array->nth = (int *) KMP_INTERNAL_REALLOC( 
-                nth_array->nth, sizeof( int ) * nth_array->size ); 
-            if ( nth_array->nth == NULL ) { 
-		KMP_FATAL( MemoryAllocFailed ); 
-            } 
-        } 
-    } 
-    nth_array->used = total; 
-    int i = 0; 
- 
-    prev_comma = FALSE; 
-    total = 0; 
-    // Save values in the array 
-    for ( ; ; ) { 
-        SKIP_WS( scan ); 
-        if ( *scan == '\0' ) { 
-            break; 
-        } 
-        // The next character is ',' 
-        if ( *scan == ',' ) { 
-            // ',' in the beginning of the list 
-            if ( total == 0 ) { 
-                // The value is supposed to be equal to __kmp_avail_proc but it is unknown at the moment. 
-                // So let's put a placeholder (#threads = 0) to correct it later. 
-                nth_array->nth[i++] = 0; 
-                total++; 
-            }else if ( prev_comma ) { 
-                // Num threads is inherited from the previous level 
-                nth_array->nth[i] = nth_array->nth[i - 1]; 
-                i++; 
-                total++; 
-            } 
-            prev_comma = TRUE; 
-            scan++; //skip ',' 
-            SKIP_WS( scan ); 
-        } 
-        // Next character is a digit 
-        if ( *scan >= '0' && *scan <= '9' ) { 
-            int num; 
-            const char *buf = scan; 
-            char const * msg  = NULL; 
-            prev_comma = FALSE; 
-            SKIP_DIGITS( scan ); 
-            total++; 
- 
-            num = __kmp_str_to_int( buf, *scan ); 
-            if ( num < KMP_MIN_NTH ) { 
-                msg = KMP_I18N_STR( ValueTooSmall ); 
-                num = KMP_MIN_NTH; 
-            } else if ( num > __kmp_sys_max_nth ) { 
-                msg = KMP_I18N_STR( ValueTooLarge ); 
-                num = __kmp_sys_max_nth; 
-            } 
-            if ( msg != NULL ) { 
-                // Message is not empty. Print warning. 
-                KMP_WARNING( ParseSizeIntWarn, var, env, msg ); 
-                KMP_INFORM( Using_int_Value, var, num ); 
-            } 
-            nth_array->nth[i++] = num; 
-        } 
-    } 
-} 
- 
-static void 
-__kmp_stg_parse_num_threads( char const * name, char const * value, void * data ) { 
-    // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! 
-    if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { 
-        // The array of 1 element 
-        __kmp_nested_nth.nth = ( int* )KMP_INTERNAL_MALLOC( sizeof( int ) ); 
-        __kmp_nested_nth.size = __kmp_nested_nth.used = 1; 
-        __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_xproc; 
-    } else { 
-        __kmp_parse_nested_num_threads( name, value, & __kmp_nested_nth ); 
-        if ( __kmp_nested_nth.nth ) { 
-            __kmp_dflt_team_nth = __kmp_nested_nth.nth[0]; 
-            if ( __kmp_dflt_team_nth_ub < __kmp_dflt_team_nth ) { 
-                __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth; 
-            } 
-        } 
-    }; // if 
-    K_DIAG( 1, ( "__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth ) ); 
-} // __kmp_stg_parse_num_threads 
- 
-static void 
-__kmp_stg_print_num_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s", name ); 
-    } 
-    if ( __kmp_nested_nth.used ) { 
-        kmp_str_buf_t buf; 
-        __kmp_str_buf_init( &buf ); 
-        for ( int i = 0; i < __kmp_nested_nth.used; i++) { 
-            __kmp_str_buf_print( &buf, "%d", __kmp_nested_nth.nth[i] ); 
-            if ( i < __kmp_nested_nth.used - 1 ) { 
-                __kmp_str_buf_print( &buf, "," ); 
-            } 
-        } 
-        __kmp_str_buf_print( buffer, "='%s'\n", buf.str ); 
-        __kmp_str_buf_free(&buf); 
-    } else { 
-        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-    } 
-} // __kmp_stg_print_num_threads 
- 
-// ------------------------------------------------------------------------------------------------- 
-// OpenMP 3.0: KMP_TASKING, OMP_MAX_ACTIVE_LEVELS, 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_tasking( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( name, value, 0, (int)tskm_max, (int *)&__kmp_tasking_mode ); 
-} // __kmp_stg_parse_tasking 
- 
-static void 
-__kmp_stg_print_tasking( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_tasking_mode ); 
-} // __kmp_stg_print_tasking 
- 
-static void 
-__kmp_stg_parse_task_stealing( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( name, value, 0, 1, (int *)&__kmp_task_stealing_constraint ); 
-} // __kmp_stg_parse_task_stealing 
- 
-static void 
-__kmp_stg_print_task_stealing( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_task_stealing_constraint ); 
-} // __kmp_stg_print_task_stealing 
- 
-static void 
-__kmp_stg_parse_max_active_levels( char const * name, char const * value, void * data ) { 
-	 __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_dflt_max_active_levels ); 
-} // __kmp_stg_parse_max_active_levels 
- 
-static void 
-__kmp_stg_print_max_active_levels( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_dflt_max_active_levels ); 
-} // __kmp_stg_print_max_active_levels 
- 
-#if KMP_NESTED_HOT_TEAMS 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_hot_teams_level( char const * name, char const * value, void * data ) { 
-    if ( TCR_4(__kmp_init_parallel) ) { 
-        KMP_WARNING( EnvParallelWarn, name ); 
-        return; 
-    }   // read value before first parallel only 
-    __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_max_level ); 
-} // __kmp_stg_parse_hot_teams_level 
- 
-static void 
-__kmp_stg_print_hot_teams_level( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_hot_teams_max_level ); 
-} // __kmp_stg_print_hot_teams_level 
- 
-static void 
-__kmp_stg_parse_hot_teams_mode( char const * name, char const * value, void * data ) { 
-    if ( TCR_4(__kmp_init_parallel) ) { 
-        KMP_WARNING( EnvParallelWarn, name ); 
-        return; 
-    }   // read value before first parallel only 
-    __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_mode ); 
-} // __kmp_stg_parse_hot_teams_mode 
- 
-static void 
-__kmp_stg_print_hot_teams_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_hot_teams_mode ); 
-} // __kmp_stg_print_hot_teams_mode 
- 
-#endif // KMP_NESTED_HOT_TEAMS 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_HANDLE_SIGNALS 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_HANDLE_SIGNALS 
- 
-static void 
-__kmp_stg_parse_handle_signals( char const * name, char const * value, void * data ) { 
-	__kmp_stg_parse_bool( name, value, & __kmp_handle_signals ); 
-} // __kmp_stg_parse_handle_signals 
- 
-static void 
-__kmp_stg_print_handle_signals( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_handle_signals ); 
-} // __kmp_stg_print_handle_signals 
- 
-#endif // KMP_HANDLE_SIGNALS 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_X_DEBUG, KMP_DEBUG, KMP_DEBUG_BUF_*, KMP_DIAG 
-// ------------------------------------------------------------------------------------------------- 
- 
-#ifdef KMP_DEBUG 
- 
-#define KMP_STG_X_DEBUG( x )                                                                            \ 
-    static void __kmp_stg_parse_##x##_debug( char const * name, char const * value, void * data ) {     \ 
-	__kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_##x##_debug );                              \ 
-    } /* __kmp_stg_parse_x_debug */                                                                     \ 
-    static void __kmp_stg_print_##x##_debug( kmp_str_buf_t * buffer, char const * name, void * data ) { \ 
-	__kmp_stg_print_int( buffer, name, kmp_##x##_debug );                                           \ 
-    } /* __kmp_stg_print_x_debug */ 
- 
-KMP_STG_X_DEBUG( a ) 
-KMP_STG_X_DEBUG( b ) 
-KMP_STG_X_DEBUG( c ) 
-KMP_STG_X_DEBUG( d ) 
-KMP_STG_X_DEBUG( e ) 
-KMP_STG_X_DEBUG( f ) 
- 
-#undef KMP_STG_X_DEBUG 
- 
-static void 
-__kmp_stg_parse_debug( char const * name, char const * value, void * data ) { 
-    int debug = 0; 
-    __kmp_stg_parse_int( name, value, 0, INT_MAX, & debug ); 
-    if ( kmp_a_debug < debug ) { 
-	kmp_a_debug = debug; 
-    }; // if 
-    if ( kmp_b_debug < debug ) { 
-	kmp_b_debug = debug; 
-    }; // if 
-    if ( kmp_c_debug < debug ) { 
-	kmp_c_debug = debug; 
-    }; // if 
-    if ( kmp_d_debug < debug ) { 
-	kmp_d_debug = debug; 
-    }; // if 
-    if ( kmp_e_debug < debug ) { 
-	kmp_e_debug = debug; 
-    }; // if 
-    if ( kmp_f_debug < debug ) { 
-	kmp_f_debug = debug; 
-    }; // if 
-} // __kmp_stg_parse_debug 
- 
-static void 
-__kmp_stg_parse_debug_buf( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_debug_buf ); 
-    // !!! TODO: Move buffer initialization of of this file! It may works incorrectly if 
-    // KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or KMP_DEBUG_BUF_CHARS. 
-    if ( __kmp_debug_buf ) { 
-	int i; 
-	int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars; 
- 
-	/* allocate and initialize all entries in debug buffer to empty */ 
-	__kmp_debug_buffer = (char *) __kmp_page_allocate( elements * sizeof( char ) ); 
-	for ( i = 0; i < elements; i += __kmp_debug_buf_chars ) 
-	   __kmp_debug_buffer[i] = '\0'; 
- 
-	__kmp_debug_count = 0; 
-    } 
-    K_DIAG( 1, ( "__kmp_debug_buf = %d\n", __kmp_debug_buf ) ); 
-} // __kmp_stg_parse_debug_buf 
- 
-static void 
-__kmp_stg_print_debug_buf( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_debug_buf ); 
-} // __kmp_stg_print_debug_buf 
- 
-static void 
-__kmp_stg_parse_debug_buf_atomic( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_debug_buf_atomic ); 
-} // __kmp_stg_parse_debug_buf_atomic 
- 
-static void 
-__kmp_stg_print_debug_buf_atomic( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_debug_buf_atomic ); 
-} // __kmp_stg_print_debug_buf_atomic 
- 
-static void 
-__kmp_stg_parse_debug_buf_chars( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( 
-	name, 
-	value, 
-	KMP_DEBUG_BUF_CHARS_MIN, 
-	INT_MAX, 
-	& __kmp_debug_buf_chars 
-    ); 
-} // __kmp_stg_debug_parse_buf_chars 
- 
-static void 
-__kmp_stg_print_debug_buf_chars( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_debug_buf_chars ); 
-} // __kmp_stg_print_debug_buf_chars 
- 
-static void 
-__kmp_stg_parse_debug_buf_lines( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( 
-	name, 
-	value, 
-	KMP_DEBUG_BUF_LINES_MIN, 
-	INT_MAX, 
-	& __kmp_debug_buf_lines 
-    ); 
-} // __kmp_stg_parse_debug_buf_lines 
- 
-static void 
-__kmp_stg_print_debug_buf_lines( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_debug_buf_lines ); 
-} // __kmp_stg_print_debug_buf_lines 
- 
-static void 
-__kmp_stg_parse_diag( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_diag ); 
-} // __kmp_stg_parse_diag 
- 
-static void 
-__kmp_stg_print_diag( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, kmp_diag ); 
-} // __kmp_stg_print_diag 
- 
-#endif // KMP_DEBUG 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ALIGN_ALLOC 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_align_alloc( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_size( 
-        name, 
-        value, 
-        CACHE_LINE, 
-        INT_MAX, 
-        NULL, 
-        & __kmp_align_alloc, 
-        1 
-    ); 
-} // __kmp_stg_parse_align_alloc 
- 
-static void 
-__kmp_stg_print_align_alloc( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-        __kmp_stg_print_size( buffer, name, __kmp_align_alloc ); 
-} // __kmp_stg_print_align_alloc 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_PLAIN_BARRIER, KMP_FORKJOIN_BARRIER, KMP_REDUCTION_BARRIER 
-// ------------------------------------------------------------------------------------------------- 
- 
-// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from parse and print 
-//       functions, pass required info through data argument. 
- 
-static void 
-__kmp_stg_parse_barrier_branch_bit( char const * name, char const * value, void * data ) { 
-    const char *var; 
- 
-    /* ---------- Barrier branch bit control ------------ */ 
-    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 
-        var = __kmp_barrier_branch_bit_env_name[ i ]; 
-        if ( ( strcmp( var, name) == 0 ) && ( value != 0 ) ) { 
-            char *comma; 
- 
-            comma = (char *) strchr( value, ',' ); 
-            __kmp_barrier_gather_branch_bits[ i ] = ( kmp_uint32 ) __kmp_str_to_int( value, ',' ); 
-            /* is there a specified release parameter? */ 
-            if ( comma == NULL ) { 
-                __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; 
-            } else { 
-                __kmp_barrier_release_branch_bits[ i ] = (kmp_uint32) __kmp_str_to_int( comma + 1, 0 ); 
- 
-                if ( __kmp_barrier_release_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) { 
-                    __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null ); 
-                    __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; 
-                } 
-            } 
-            if ( __kmp_barrier_gather_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) { 
-                    KMP_WARNING( BarrGatherValueInvalid, name, value ); 
-                    KMP_INFORM( Using_uint_Value, name, __kmp_barrier_gather_bb_dflt ); 
-                __kmp_barrier_gather_branch_bits[ i ] =  __kmp_barrier_gather_bb_dflt; 
-            } 
-        } 
-        K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[ i ], \ 
-                   __kmp_barrier_gather_branch_bits [ i ], \ 
-                   __kmp_barrier_release_branch_bits [ i ])) 
-    } 
-} // __kmp_stg_parse_barrier_branch_bit 
- 
-static void 
-__kmp_stg_print_barrier_branch_bit( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    const char *var; 
-    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 
-        var = __kmp_barrier_branch_bit_env_name[ i ]; 
-        if ( strcmp( var, name) == 0  ) { 
-            if( __kmp_env_format ) { 
-                KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_branch_bit_env_name[ i ]); 
-            } else { 
-                __kmp_str_buf_print( buffer, "   %s='", __kmp_barrier_branch_bit_env_name[ i ] ); 
-            } 
-            __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_barrier_gather_branch_bits [ i ], __kmp_barrier_release_branch_bits [ i ]); 
-        } 
-    } 
-} // __kmp_stg_print_barrier_branch_bit 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_PLAIN_BARRIER_PATTERN, KMP_FORKJOIN_BARRIER_PATTERN, KMP_REDUCTION_BARRIER_PATTERN 
-// ------------------------------------------------------------------------------------------------- 
- 
-// TODO: Remove __kmp_barrier_pattern_name variable, remove loops from parse and print functions, 
-//       pass required data to functions through data argument. 
- 
-static void 
-__kmp_stg_parse_barrier_pattern( char const * name, char const * value, void * data ) { 
-    const char *var; 
-    /* ---------- Barrier method control ------------ */ 
- 
-    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 
-        var = __kmp_barrier_pattern_env_name[ i ]; 
- 
-        if ( ( strcmp ( var, name ) == 0 ) && ( value != 0 ) ) { 
-            int j; 
-            char *comma = (char *) strchr( value, ',' ); 
- 
-            /* handle first parameter: gather pattern */ 
-            for ( j = bp_linear_bar; j<bp_last_bar; j++ ) { 
-                if (__kmp_match_with_sentinel( __kmp_barrier_pattern_name[j], value, 1, ',' )) { 
-                   __kmp_barrier_gather_pattern[ i ] = (kmp_bar_pat_e) j; 
-                   break; 
-                } 
-            } 
-            if ( j == bp_last_bar ) { 
-                KMP_WARNING( BarrGatherValueInvalid, name, value ); 
-                KMP_INFORM( Using_str_Value, name, __kmp_barrier_pattern_name[ bp_linear_bar ] ); 
-            } 
- 
-            /* handle second parameter: release pattern */ 
-            if ( comma != NULL ) { 
-                for ( j = bp_linear_bar; j < bp_last_bar; j++ ) { 
-                    if ( __kmp_str_match( __kmp_barrier_pattern_name[j], 1, comma + 1 ) ) { 
-                       __kmp_barrier_release_pattern[ i ] = (kmp_bar_pat_e) j; 
-                       break; 
-                    } 
-                } 
-                if (j == bp_last_bar) { 
-                    __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null ); 
-                    KMP_INFORM( Using_str_Value, name, __kmp_barrier_pattern_name[ bp_linear_bar ] ); 
-                } 
-            } 
-        } 
-    } 
-} // __kmp_stg_parse_barrier_pattern 
- 
-static void 
-__kmp_stg_print_barrier_pattern( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    const char *var; 
-    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 
-        var = __kmp_barrier_pattern_env_name[ i ]; 
-        if ( strcmp ( var, name ) == 0 ) { 
-            int j = __kmp_barrier_gather_pattern [ i ]; 
-            int k = __kmp_barrier_release_pattern [ i ]; 
-            if( __kmp_env_format ) { 
-                KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_pattern_env_name[ i ]); 
-            } else { 
-                __kmp_str_buf_print( buffer, "   %s='", __kmp_barrier_pattern_env_name[ i ] ); 
-            } 
-            __kmp_str_buf_print( buffer, "%s,%s'\n", __kmp_barrier_pattern_name [ j ], __kmp_barrier_pattern_name [ k ]); 
-        } 
-    } 
-} // __kmp_stg_print_barrier_pattern 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ABORT_DELAY 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_abort_delay( char const * name, char const * value, void * data ) { 
-    // Units of KMP_DELAY_ABORT are seconds, units of __kmp_abort_delay is milliseconds. 
-    int delay = __kmp_abort_delay / 1000; 
-    __kmp_stg_parse_int( name, value, 0, INT_MAX / 1000, & delay ); 
-    __kmp_abort_delay = delay * 1000; 
-} // __kmp_stg_parse_abort_delay 
- 
-static void 
-__kmp_stg_print_abort_delay( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_abort_delay ); 
-} // __kmp_stg_print_abort_delay 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_CPUINFO_FILE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_cpuinfo_file( char const * name, char const * value, void * data ) { 
-    #if KMP_AFFINITY_SUPPORTED 
-        __kmp_stg_parse_str( name, value, & __kmp_cpuinfo_file ); 
-        K_DIAG( 1, ( "__kmp_cpuinfo_file == %s\n", __kmp_cpuinfo_file ) ); 
-    #endif 
-} //__kmp_stg_parse_cpuinfo_file 
- 
-static void 
-__kmp_stg_print_cpuinfo_file( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    #if KMP_AFFINITY_SUPPORTED 
-        if( __kmp_env_format ) { 
-            KMP_STR_BUF_PRINT_NAME; 
-        } else { 
-            __kmp_str_buf_print( buffer, "   %s", name ); 
-        } 
-        if ( __kmp_cpuinfo_file ) { 
-            __kmp_str_buf_print( buffer, "='%s'\n", __kmp_cpuinfo_file ); 
-        } else { 
-            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-        } 
-    #endif 
-} //__kmp_stg_print_cpuinfo_file 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_FORCE_REDUCTION, KMP_DETERMINISTIC_REDUCTION 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_force_reduction( char const * name, char const * value, void * data ) 
-{ 
-    kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data; 
-    int                 rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, reduction->rivals ); 
-    if ( rc ) { 
-        return; 
-    }; // if 
-    if ( reduction->force ) { 
-        if( value != 0 ) { 
-            if( __kmp_str_match( "critical", 0, value ) ) 
-               __kmp_force_reduction_method = critical_reduce_block; 
-            else if( __kmp_str_match( "atomic", 0, value ) ) 
-               __kmp_force_reduction_method = atomic_reduce_block; 
-            else if( __kmp_str_match( "tree", 0, value ) ) 
-               __kmp_force_reduction_method = tree_reduce_block; 
-            else { 
-                KMP_FATAL( UnknownForceReduction, name, value ); 
-            } 
-        } 
-    } else { 
-        __kmp_stg_parse_bool( name, value, & __kmp_determ_red ); 
-        if( __kmp_determ_red ) { 
-            __kmp_force_reduction_method = tree_reduce_block; 
-        } else { 
-            __kmp_force_reduction_method = reduction_method_not_defined; 
-        } 
-    } 
-    K_DIAG( 1, ( "__kmp_force_reduction_method == %d\n", __kmp_force_reduction_method ) ); 
-} // __kmp_stg_parse_force_reduction 
- 
-static void 
-__kmp_stg_print_force_reduction( kmp_str_buf_t * buffer, char const * name, void * data ) { 
- 
-    kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data; 
-    if ( reduction->force ) { 
-        if( __kmp_force_reduction_method == critical_reduce_block) { 
-            __kmp_stg_print_str( buffer, name, "critical"); 
-        } else if ( __kmp_force_reduction_method == atomic_reduce_block ) { 
-            __kmp_stg_print_str( buffer, name, "atomic"); 
-        } else if ( __kmp_force_reduction_method == tree_reduce_block ) { 
-            __kmp_stg_print_str( buffer, name, "tree"); 
-        } else { 
-            if( __kmp_env_format ) { 
-                KMP_STR_BUF_PRINT_NAME; 
-            } else { 
-                __kmp_str_buf_print( buffer, "   %s", name ); 
-            } 
-            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-        } 
-    } else { 
-        __kmp_stg_print_bool( buffer, name, __kmp_determ_red ); 
-    } 
- 
- 
-} // __kmp_stg_print_force_reduction 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_STORAGE_MAP 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_storage_map( char const * name, char const * value, void * data ) { 
-    if ( __kmp_str_match(  "verbose", 1, value ) ) { 
-        __kmp_storage_map         = TRUE; 
-        __kmp_storage_map_verbose = TRUE; 
-        __kmp_storage_map_verbose_specified = TRUE; 
- 
-    } else { 
-        __kmp_storage_map_verbose = FALSE; 
-        __kmp_stg_parse_bool( name, value, & __kmp_storage_map ); // !!! 
-    }; // if 
-} // __kmp_stg_parse_storage_map 
- 
-static void 
-__kmp_stg_print_storage_map( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if ( __kmp_storage_map_verbose || __kmp_storage_map_verbose_specified ) { 
-        __kmp_stg_print_str( buffer, name, "verbose" ); 
-    } else { 
-        __kmp_stg_print_bool( buffer, name, __kmp_storage_map ); 
-    } 
-} // __kmp_stg_print_storage_map 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ALL_THREADPRIVATE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_all_threadprivate( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( name, value, __kmp_allThreadsSpecified ? __kmp_max_nth : 1, __kmp_max_nth, 
-        & __kmp_tp_capacity ); 
-} // __kmp_stg_parse_all_threadprivate 
- 
-static void 
-__kmp_stg_print_all_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_tp_capacity ); 
- 
-} 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_FOREIGN_THREADS_THREADPRIVATE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_foreign_threads_threadprivate( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_foreign_tp ); 
-} // __kmp_stg_parse_foreign_threads_threadprivate 
- 
-static void 
-__kmp_stg_print_foreign_threads_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_foreign_tp ); 
-} // __kmp_stg_print_foreign_threads_threadprivate 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_AFFINITY_SUPPORTED 
-// 
-// Parse the proc id list.  Return TRUE if successful, FALSE otherwise. 
-// 
-static int 
-__kmp_parse_affinity_proc_id_list( const char *var, const char *env, 
-    const char **nextEnv, char **proclist ) 
-{ 
-    const char *scan = env; 
-    const char *next = scan; 
-    int empty = TRUE; 
- 
-    *proclist = NULL; 
- 
-    for (;;) { 
-        int start, end, stride; 
- 
-        SKIP_WS(scan); 
-        next = scan; 
-        if (*next == '\0') { 
-            break; 
-        } 
- 
-        if (*next == '{') { 
-            int num; 
-            next++;     // skip '{' 
-            SKIP_WS(next); 
-            scan = next; 
- 
-            // 
-            // Read the first integer in the set. 
-            // 
-            if ((*next < '0') || (*next > '9')) { 
-                KMP_WARNING( AffSyntaxError, var ); 
-                return FALSE; 
-            } 
-            SKIP_DIGITS(next); 
-            num = __kmp_str_to_int(scan, *next); 
-            KMP_ASSERT(num >= 0); 
- 
-            for (;;) { 
-                // 
-                // Check for end of set. 
-                // 
-                SKIP_WS(next); 
-                if (*next == '}') { 
-                    next++;     // skip '}' 
-                    break; 
-                } 
- 
-                // 
-                // Skip optional comma. 
-                // 
-                if (*next == ',') { 
-                    next++; 
-                } 
-                SKIP_WS(next); 
- 
-                // 
-                // Read the next integer in the set. 
-                // 
-                scan = next; 
-                if ((*next < '0') || (*next > '9')) { 
-                    KMP_WARNING( AffSyntaxError, var ); 
-                    return FALSE; 
-                } 
- 
-                SKIP_DIGITS(next); 
-                num = __kmp_str_to_int(scan, *next); 
-                KMP_ASSERT(num >= 0); 
-            } 
-            empty = FALSE; 
- 
-            SKIP_WS(next); 
-            if (*next == ',') { 
-                next++; 
-            } 
-            scan = next; 
-            continue; 
-        } 
- 
-        // 
-        // Next character is not an integer => end of list 
-        // 
-        if ((*next < '0') || (*next > '9')) { 
-            if (empty) { 
-                KMP_WARNING( AffSyntaxError, var ); 
-                return FALSE; 
-            } 
-            break; 
-        } 
- 
-        // 
-        // Read the first integer. 
-        // 
-        SKIP_DIGITS(next); 
-        start = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT(start >= 0); 
-        SKIP_WS(next); 
- 
-        // 
-        // If this isn't a range, then go on. 
-        // 
-        if (*next != '-') { 
-            empty = FALSE; 
- 
-            // 
-            // Skip optional comma. 
-            // 
-            if (*next == ',') { 
-                next++; 
-            } 
-            scan = next; 
-            continue; 
-        } 
- 
-        // 
-        // This is a range.  Skip over the '-' and read in the 2nd int. 
-        // 
-        next++;         // skip '-' 
-        SKIP_WS(next); 
-        scan = next; 
-        if ((*next < '0') || (*next > '9')) { 
-            KMP_WARNING( AffSyntaxError, var ); 
-            return FALSE; 
-        } 
-        SKIP_DIGITS(next); 
-        end = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT(end >= 0); 
- 
-        // 
-        // Check for a stride parameter 
-        // 
-        stride = 1; 
-        SKIP_WS(next); 
-        if (*next == ':') { 
-            // 
-            // A stride is specified.  Skip over the ':" and read the 3rd int. 
-            // 
-            int sign = +1; 
-            next++;         // skip ':' 
-            SKIP_WS(next); 
-            scan = next; 
-            if (*next == '-') { 
-                sign = -1; 
-                next++; 
-                SKIP_WS(next); 
-                scan = next; 
-            } 
-            if ((*next < '0') || (*next > '9')) { 
-                KMP_WARNING( AffSyntaxError, var ); 
-                return FALSE; 
-            } 
-            SKIP_DIGITS(next); 
-            stride = __kmp_str_to_int(scan, *next); 
-            KMP_ASSERT(stride >= 0); 
-            stride *= sign; 
-        } 
- 
-        // 
-        // Do some range checks. 
-        // 
-        if (stride == 0) { 
-            KMP_WARNING( AffZeroStride, var ); 
-            return FALSE; 
-        } 
-        if (stride > 0) { 
-            if (start > end) { 
-                KMP_WARNING( AffStartGreaterEnd, var, start, end ); 
-                return FALSE; 
-            } 
-        } 
-        else { 
-            if (start < end) { 
-                KMP_WARNING( AffStrideLessZero, var, start, end ); 
-                return FALSE; 
-            } 
-        } 
-        if ((end - start) / stride > 65536 ) { 
-            KMP_WARNING( AffRangeTooBig, var, end, start, stride ); 
-            return FALSE; 
-        } 
- 
-        empty = FALSE; 
- 
-        // 
-        // Skip optional comma. 
-        // 
-        SKIP_WS(next); 
-        if (*next == ',') { 
-            next++; 
-        } 
-        scan = next; 
-    } 
- 
-    *nextEnv = next; 
- 
-    { 
-        int len = next - env; 
-        char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); 
-        KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); 
-        retlist[len] = '\0'; 
-        *proclist = retlist; 
-    } 
-    return TRUE; 
-} 
- 
- 
-// 
-// If KMP_AFFINITY is specified without a type, then 
-// __kmp_affinity_notype should point to its setting. 
-// 
-static kmp_setting_t *__kmp_affinity_notype = NULL; 
- 
-static void 
-__kmp_parse_affinity_env( char const * name, char const * value, 
-    enum affinity_type  * out_type, 
-    char                ** out_proclist, 
-    int                 * out_verbose, 
-    int                 * out_warn, 
-    int                 * out_respect, 
-    enum affinity_gran  * out_gran, 
-    int                 * out_gran_levels, 
-    int                 * out_dups, 
-    int                 * out_compact, 
-    int                 * out_offset 
-) 
-{ 
-    char * buffer = NULL;    // Copy of env var value. 
-    char * buf    = NULL;    // Buffer for strtok_r() function. 
-    char * next = NULL;      // end of token / start of next. 
-    const char * start;      // start of current token (for err msgs) 
-    int    count  = 0;       // Counter of parsed integer numbers. 
-    int    number[ 2 ];      // Parsed numbers. 
- 
-    // Guards. 
-    int type         = 0; 
-    int proclist     = 0; 
-    int max_proclist = 0; 
-    int verbose      = 0; 
-    int warnings     = 0; 
-    int respect      = 0; 
-    int gran         = 0; 
-    int dups         = 0; 
- 
-    KMP_ASSERT( value != NULL ); 
- 
-    if ( TCR_4(__kmp_init_middle) ) { 
-        KMP_WARNING( EnvMiddleWarn, name ); 
-        __kmp_env_toPrint( name, 0 ); 
-        return; 
-    } 
-    __kmp_env_toPrint( name, 1 ); 
- 
-    buffer = __kmp_str_format( "%s", value );         // Copy env var to keep original intact. 
-    buf = buffer; 
-    SKIP_WS(buf); 
- 
-    // Helper macros. 
- 
-    // 
-    // If we see a parse error, emit a warning and scan to the next ",". 
-    // 
-    // FIXME - there's got to be a better way to print an error 
-    // message, hopefully without overwritting peices of buf. 
-    // 
-    #define EMIT_WARN(skip,errlist) \ 
-        {                                                                     \ 
-            char ch;                                                          \ 
-            if (skip) {                                                       \ 
-                SKIP_TO(next, ',');                                           \ 
-            }                                                                 \ 
-            ch = *next;                                                       \ 
-            *next = '\0';                                                     \ 
-            KMP_WARNING errlist;                                              \ 
-            *next = ch;                                                       \ 
-            if (skip) {                                                       \ 
-                if (ch == ',') next++;                                        \ 
-            }                                                                 \ 
-            buf = next;                                                       \ 
-        } 
- 
-    #define _set_param(_guard,_var,_val)                                      \ 
-        {                                                                     \ 
-            if ( _guard == 0 ) {                                              \ 
-                _var = _val;                                                  \ 
-            } else {                                                          \ 
-                EMIT_WARN( FALSE, ( AffParamDefined, name, start ) );         \ 
-            };                                                                \ 
-            ++ _guard;                                                        \ 
-        } 
- 
-    #define set_type(val)          _set_param( type,     *out_type,        val ) 
-    #define set_verbose(val)       _set_param( verbose,  *out_verbose,     val ) 
-    #define set_warnings(val)      _set_param( warnings, *out_warn,        val ) 
-    #define set_respect(val)       _set_param( respect,  *out_respect,     val ) 
-    #define set_dups(val)          _set_param( dups,     *out_dups,        val ) 
-    #define set_proclist(val)      _set_param( proclist, *out_proclist,    val ) 
- 
-    #define set_gran(val,levels)                                              \ 
-        {                                                                     \ 
-            if ( gran == 0 ) {                                                \ 
-                *out_gran = val;                                              \ 
-                *out_gran_levels = levels;                                    \ 
-            } else {                                                          \ 
-                EMIT_WARN( FALSE, ( AffParamDefined, name, start ) );         \ 
-            };                                                                \ 
-            ++ gran;                                                          \ 
-        } 
- 
-# if OMP_40_ENABLED 
-    KMP_DEBUG_ASSERT( ( __kmp_nested_proc_bind.bind_types != NULL ) 
-      && ( __kmp_nested_proc_bind.used > 0 ) ); 
-# endif 
- 
-    while ( *buf != '\0' ) { 
-        start = next = buf; 
- 
-        if (__kmp_match_str("none", buf, (const char **)&next)) { 
-            set_type( affinity_none ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("scatter", buf, (const char **)&next)) { 
-            set_type( affinity_scatter ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("compact", buf, (const char **)&next)) { 
-            set_type( affinity_compact ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("logical", buf, (const char **)&next)) { 
-            set_type( affinity_logical ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("physical", buf, (const char **)&next)) { 
-            set_type( affinity_physical ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("explicit", buf, (const char **)&next)) { 
-            set_type( affinity_explicit ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("balanced", buf, (const char **)&next)) { 
-            set_type( affinity_balanced ); 
-#  if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-#  endif 
-            buf = next; 
-        } else if (__kmp_match_str("disabled", buf, (const char **)&next)) { 
-            set_type( affinity_disabled ); 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-# endif 
-            buf = next; 
-        } else if (__kmp_match_str("verbose", buf, (const char **)&next)) { 
-            set_verbose( TRUE ); 
-            buf = next; 
-        } else if (__kmp_match_str("noverbose", buf, (const char **)&next)) { 
-            set_verbose( FALSE ); 
-            buf = next; 
-        } else if (__kmp_match_str("warnings", buf, (const char **)&next)) { 
-            set_warnings( TRUE ); 
-            buf = next; 
-        } else if (__kmp_match_str("nowarnings", buf, (const char **)&next)) { 
-            set_warnings( FALSE ); 
-            buf = next; 
-        } else if (__kmp_match_str("respect", buf, (const char **)&next)) { 
-            set_respect( TRUE ); 
-            buf = next; 
-        } else if (__kmp_match_str("norespect", buf, (const char **)&next)) { 
-            set_respect( FALSE ); 
-            buf = next; 
-        } else if (__kmp_match_str("duplicates", buf, (const char **)&next) 
-          || __kmp_match_str("dups", buf, (const char **)&next)) { 
-            set_dups( TRUE ); 
-            buf = next; 
-        } else if (__kmp_match_str("noduplicates", buf, (const char **)&next) 
-          || __kmp_match_str("nodups", buf, (const char **)&next)) { 
-            set_dups( FALSE ); 
-            buf = next; 
-        } else if (__kmp_match_str("granularity", buf, (const char **)&next) 
-          || __kmp_match_str("gran", buf, (const char **)&next)) { 
-            SKIP_WS(next); 
-            if (*next != '=') { 
-                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); 
-                continue; 
-            } 
-            next++;      // skip '=' 
-            SKIP_WS(next); 
- 
-            buf = next; 
-            if (__kmp_match_str("fine", buf, (const char **)&next)) { 
-                set_gran( affinity_gran_fine, -1 ); 
-                buf = next; 
-            } else if (__kmp_match_str("thread", buf, (const char **)&next)) { 
-                set_gran( affinity_gran_thread, -1 ); 
-                buf = next; 
-            } else if (__kmp_match_str("core", buf, (const char **)&next)) { 
-                set_gran( affinity_gran_core, -1 ); 
-                buf = next; 
-            } else if (__kmp_match_str("package", buf, (const char **)&next)) { 
-                set_gran( affinity_gran_package, -1 ); 
-                buf = next; 
-            } else if (__kmp_match_str("node", buf, (const char **)&next)) { 
-                set_gran( affinity_gran_node, -1 ); 
-                buf = next; 
-# if KMP_GROUP_AFFINITY 
-            } else if (__kmp_match_str("group", buf, (const char **)&next)) { 
-                set_gran( affinity_gran_group, -1 ); 
-                buf = next; 
-# endif /* KMP_GROUP AFFINITY */ 
-            } else if ((*buf >= '0') && (*buf <= '9')) { 
-                int n; 
-                next = buf; 
-                SKIP_DIGITS(next); 
-                n = __kmp_str_to_int( buf, *next ); 
-                KMP_ASSERT(n >= 0); 
-                buf = next; 
-                set_gran( affinity_gran_default, n ); 
-            } else { 
-                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); 
-                continue; 
-            } 
-        } else if (__kmp_match_str("proclist", buf, (const char **)&next)) { 
-            char *temp_proclist; 
- 
-            SKIP_WS(next); 
-            if (*next != '=') { 
-                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); 
-                continue; 
-            } 
-            next++;      // skip '=' 
-            SKIP_WS(next); 
-            if (*next != '[') { 
-                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); 
-                continue; 
-            } 
-            next++;      // skip '[' 
-            buf = next; 
-            if (! __kmp_parse_affinity_proc_id_list(name, buf, 
-              (const char **)&next, &temp_proclist)) { 
-                // 
-                // warning already emitted. 
-                // 
-                SKIP_TO(next, ']'); 
-                if (*next == ']') next++; 
-                SKIP_TO(next, ','); 
-                if (*next == ',') next++; 
-                buf = next; 
-                continue; 
-            } 
-            if (*next != ']') { 
-                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); 
-                continue; 
-            } 
-            next++;      // skip ']' 
-            set_proclist( temp_proclist ); 
-        } else if ((*buf >= '0') && (*buf <= '9')) { 
-            // Parse integer numbers -- permute and offset. 
-            int n; 
-            next = buf; 
-            SKIP_DIGITS(next); 
-            n = __kmp_str_to_int( buf, *next ); 
-            KMP_ASSERT(n >= 0); 
-            buf = next; 
-            if ( count < 2 ) { 
-                number[ count ] = n; 
-            } else { 
-                KMP_WARNING( AffManyParams, name, start ); 
-            }; // if 
-            ++ count; 
-        } else { 
-            EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); 
-            continue; 
-        } 
- 
-        SKIP_WS(next); 
-        if (*next == ',') { 
-            next++; 
-            SKIP_WS(next); 
-        } 
-        else if (*next != '\0') { 
-            const char *temp = next; 
-            EMIT_WARN( TRUE, ( ParseExtraCharsWarn, name, temp ) ); 
-            continue; 
-        } 
-        buf = next; 
-    } // while 
- 
-    #undef EMIT_WARN 
-    #undef _set_param 
-    #undef set_type 
-    #undef set_verbose 
-    #undef set_warnings 
-    #undef set_respect 
-    #undef set_granularity 
- 
-    KMP_INTERNAL_FREE( buffer ); 
- 
-    if ( proclist ) { 
-        if ( ! type ) { 
-            KMP_WARNING( AffProcListNoType, name ); 
-            __kmp_affinity_type = affinity_explicit; 
-        } 
-        else if ( __kmp_affinity_type != affinity_explicit ) { 
-            KMP_WARNING( AffProcListNotExplicit, name ); 
-            KMP_ASSERT( *out_proclist != NULL ); 
-            KMP_INTERNAL_FREE( *out_proclist ); 
-            *out_proclist = NULL; 
-        } 
-    } 
-    switch ( *out_type ) { 
-        case affinity_logical: 
-        case affinity_physical: { 
-            if ( count > 0 ) { 
-                *out_offset = number[ 0 ]; 
-            }; // if 
-            if ( count > 1 ) { 
-                KMP_WARNING( AffManyParamsForLogic, name, number[ 1 ] ); 
-            }; // if 
-        } break; 
-        case affinity_balanced: { 
-            if ( count > 0 ) { 
-                *out_compact = number[ 0 ]; 
-            }; // if 
-            if ( count > 1 ) { 
-                *out_offset = number[ 1 ]; 
-            }; // if 
- 
-            if ( __kmp_affinity_gran == affinity_gran_default ) { 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-                if( __kmp_mic_type != non_mic ) { 
-                    if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { 
-                        KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" ); 
-                    } 
-                    __kmp_affinity_gran = affinity_gran_fine; 
-                } else 
-#endif 
-                { 
-                    if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { 
-                        KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" ); 
-                    } 
-                    __kmp_affinity_gran = affinity_gran_core; 
-                } 
-            } 
-        } break; 
-        case affinity_scatter: 
-        case affinity_compact: { 
-            if ( count > 0 ) { 
-                *out_compact = number[ 0 ]; 
-            }; // if 
-            if ( count > 1 ) { 
-                *out_offset = number[ 1 ]; 
-            }; // if 
-        } break; 
-        case affinity_explicit: { 
-            if ( *out_proclist == NULL ) { 
-                KMP_WARNING( AffNoProcList, name ); 
-                __kmp_affinity_type = affinity_none; 
-            } 
-            if ( count > 0 ) { 
-                KMP_WARNING( AffNoParam, name, "explicit" ); 
-            } 
-        } break; 
-        case affinity_none: { 
-            if ( count > 0 ) { 
-                KMP_WARNING( AffNoParam, name, "none" ); 
-            }; // if 
-        } break; 
-        case affinity_disabled: { 
-            if ( count > 0 ) { 
-                KMP_WARNING( AffNoParam, name, "disabled" ); 
-            }; // if 
-        } break; 
-        case affinity_default: { 
-            if ( count > 0 ) { 
-                KMP_WARNING( AffNoParam, name, "default" ); 
-            }; // if 
-        } break; 
-        default: { 
-            KMP_ASSERT( 0 ); 
-        }; 
-    }; // switch 
-} // __kmp_parse_affinity_env 
- 
-static void 
-__kmp_stg_parse_affinity( char const * name, char const * value, void * data ) 
-{ 
-    kmp_setting_t **rivals = (kmp_setting_t **) data; 
-    int rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, rivals ); 
-    if ( rc ) { 
-        return; 
-    } 
- 
-    __kmp_parse_affinity_env( name, value, & __kmp_affinity_type, 
-      & __kmp_affinity_proclist, & __kmp_affinity_verbose, 
-      & __kmp_affinity_warnings, & __kmp_affinity_respect_mask, 
-      & __kmp_affinity_gran, & __kmp_affinity_gran_levels, 
-      & __kmp_affinity_dups, & __kmp_affinity_compact, 
-      & __kmp_affinity_offset ); 
- 
-} // __kmp_stg_parse_affinity 
- 
-static void 
-__kmp_stg_print_affinity( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME_EX(name); 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s='", name ); 
-    } 
-    if ( __kmp_affinity_verbose ) { 
-        __kmp_str_buf_print( buffer, "%s,", "verbose"); 
-    } else { 
-        __kmp_str_buf_print( buffer, "%s,", "noverbose"); 
-    } 
-    if ( __kmp_affinity_warnings ) { 
-        __kmp_str_buf_print( buffer, "%s,", "warnings"); 
-    } else { 
-        __kmp_str_buf_print( buffer, "%s,", "nowarnings"); 
-    } 
-    if ( KMP_AFFINITY_CAPABLE() ) { 
-        if ( __kmp_affinity_respect_mask ) { 
-            __kmp_str_buf_print( buffer, "%s,", "respect"); 
-        } else { 
-            __kmp_str_buf_print( buffer, "%s,", "norespect"); 
-        } 
-        switch ( __kmp_affinity_gran ) { 
-            case affinity_gran_default: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=default,"); 
-                break; 
-            case affinity_gran_fine: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=fine,"); 
-                break; 
-            case affinity_gran_thread: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=thread,"); 
-                break; 
-            case affinity_gran_core: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=core,"); 
-                break; 
-            case affinity_gran_package: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=package,"); 
-                break; 
-            case affinity_gran_node: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=node,"); 
-                break; 
-# if KMP_GROUP_AFFINITY 
-            case affinity_gran_group: 
-                __kmp_str_buf_print( buffer, "%s", "granularity=group,"); 
-                break; 
-# endif /* KMP_GROUP_AFFINITY */ 
-        } 
-        if ( __kmp_affinity_dups ) { 
-            __kmp_str_buf_print( buffer, "%s,", "duplicates"); 
-        } else { 
-            __kmp_str_buf_print( buffer, "%s,", "noduplicates"); 
-        } 
-    } 
-    if ( ! KMP_AFFINITY_CAPABLE() ) { 
-        __kmp_str_buf_print( buffer, "%s", "disabled" ); 
-    } 
-    else switch ( __kmp_affinity_type ){ 
-        case affinity_none: 
-            __kmp_str_buf_print( buffer, "%s", "none"); 
-            break; 
-        case affinity_physical: 
-            __kmp_str_buf_print( buffer, "%s,%d", "physical", 
-              __kmp_affinity_offset ); 
-            break; 
-        case affinity_logical: 
-            __kmp_str_buf_print( buffer, "%s,%d", "logical", 
-              __kmp_affinity_offset ); 
-            break; 
-        case affinity_compact: 
-            __kmp_str_buf_print( buffer, "%s,%d,%d", "compact", 
-              __kmp_affinity_compact, __kmp_affinity_offset ); 
-            break; 
-        case affinity_scatter: 
-            __kmp_str_buf_print( buffer, "%s,%d,%d", "scatter", 
-              __kmp_affinity_compact, __kmp_affinity_offset ); 
-            break; 
-        case affinity_explicit: 
-            __kmp_str_buf_print( buffer, "%s=[%s],%s", "proclist", 
-              __kmp_affinity_proclist, "explicit" ); 
-            break; 
-        case affinity_balanced: 
-            __kmp_str_buf_print( buffer, "%s,%d,%d", "balanced", 
-              __kmp_affinity_compact, __kmp_affinity_offset ); 
-            break; 
-        case affinity_disabled: 
-            __kmp_str_buf_print( buffer, "%s", "disabled"); 
-            break; 
-        case affinity_default: 
-            __kmp_str_buf_print( buffer, "%s", "default"); 
-            break; 
-        default: 
-            __kmp_str_buf_print( buffer, "%s", "<unknown>"); 
-            break; 
-    } 
-        __kmp_str_buf_print( buffer, "'\n" ); 
-} //__kmp_stg_print_affinity 
- 
-# ifdef KMP_GOMP_COMPAT 
- 
-static void 
-__kmp_stg_parse_gomp_cpu_affinity( char const * name, char const * value, void * data ) 
-{ 
-    const char * next = NULL; 
-    char * temp_proclist; 
-    kmp_setting_t **rivals = (kmp_setting_t **) data; 
-    int rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, rivals ); 
-    if ( rc ) { 
-        return; 
-    } 
- 
-    if ( TCR_4(__kmp_init_middle) ) { 
-        KMP_WARNING( EnvMiddleWarn, name ); 
-        __kmp_env_toPrint( name, 0 ); 
-        return; 
-    } 
- 
-    __kmp_env_toPrint( name, 1 ); 
- 
-    if ( __kmp_parse_affinity_proc_id_list( name, value, &next, 
-      &temp_proclist )) { 
-        SKIP_WS(next); 
-        if (*next == '\0') { 
-            // 
-            // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... 
-            // 
-            __kmp_affinity_proclist = temp_proclist; 
-            __kmp_affinity_type = affinity_explicit; 
-            __kmp_affinity_gran = affinity_gran_fine; 
-# if OMP_40_ENABLED 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-# endif 
-        } 
-        else { 
-            KMP_WARNING( AffSyntaxError, name ); 
-            if (temp_proclist != NULL) { 
-                KMP_INTERNAL_FREE((void *)temp_proclist); 
-            } 
-        } 
-    } 
-    else { 
-        // 
-        // Warning already emitted 
-        // 
-        __kmp_affinity_type = affinity_none; 
-# if OMP_40_ENABLED 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-# endif 
-    } 
-} // __kmp_stg_parse_gomp_cpu_affinity 
- 
-# endif /* KMP_GOMP_COMPAT */ 
- 
- 
-# if OMP_40_ENABLED 
- 
-/*----------------------------------------------------------------------------- 
- 
-The OMP_PLACES proc id list parser. Here is the grammar: 
- 
-place_list := place 
-place_list := place , place_list 
-place := num 
-place := place : num 
-place := place : num : signed 
-place := { subplacelist } 
-place := ! place                  // (lowest priority) 
-subplace_list := subplace 
-subplace_list := subplace , subplace_list 
-subplace := num 
-subplace := num : num 
-subplace := num : num : signed 
-signed := num 
-signed := + signed 
-signed := - signed 
- 
------------------------------------------------------------------------------*/ 
- 
-static int 
-__kmp_parse_subplace_list( const char *var, const char **scan ) 
-{ 
-    const char *next; 
- 
-    for (;;) { 
-        int start, count, stride; 
- 
-        // 
-        // Read in the starting proc id 
-        // 
-        SKIP_WS(*scan); 
-        if ((**scan < '0') || (**scan > '9')) { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        start = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(start >= 0); 
-        *scan = next; 
- 
-        // 
-        // valid follow sets are ',' ':' and '}' 
-        // 
-        SKIP_WS(*scan); 
-        if (**scan == '}') { 
-            break; 
-        } 
-        if (**scan == ',') { 
-            (*scan)++;  // skip ',' 
-            continue; 
-        } 
-        if (**scan != ':') { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        (*scan)++;      // skip ':' 
- 
-        // 
-        // Read count parameter 
-        // 
-        SKIP_WS(*scan); 
-        if ((**scan < '0') || (**scan > '9')) { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        count = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(count >= 0); 
-        *scan = next; 
- 
-        // 
-        // valid follow sets are ',' ':' and '}' 
-        // 
-        SKIP_WS(*scan); 
-        if (**scan == '}') { 
-            break; 
-        } 
-        if (**scan == ',') { 
-            (*scan)++;  // skip ',' 
-            continue; 
-        } 
-        if (**scan != ':') { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        (*scan)++;      // skip ':' 
- 
-        // 
-        // Read stride parameter 
-        // 
-        int sign = +1; 
-        for (;;) { 
-            SKIP_WS(*scan); 
-            if (**scan == '+') { 
-                (*scan)++; // skip '+' 
-                continue; 
-            } 
-            if (**scan == '-') { 
-                sign *= -1; 
-                (*scan)++; // skip '-' 
-                continue; 
-            } 
-            break; 
-        } 
-        SKIP_WS(*scan); 
-        if ((**scan < '0') || (**scan > '9')) { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        stride = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(stride >= 0); 
-        *scan = next; 
-        stride *= sign; 
- 
-        // 
-        // valid follow sets are ',' and '}' 
-        // 
-        SKIP_WS(*scan); 
-        if (**scan == '}') { 
-            break; 
-        } 
-        if (**scan == ',') { 
-            (*scan)++;  // skip ',' 
-            continue; 
-        } 
- 
-        KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-        return FALSE; 
-    } 
-    return TRUE; 
-} 
- 
-static int 
-__kmp_parse_place( const char *var, const char ** scan ) 
-{ 
-    const char *next; 
- 
-    // 
-    // valid follow sets are '{' '!' and num 
-    // 
-    SKIP_WS(*scan); 
-    if (**scan == '{') { 
-        (*scan)++;      // skip '{' 
-        if (! __kmp_parse_subplace_list(var, scan)) { 
-            return FALSE; 
-        } 
-        if (**scan != '}') { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        (*scan)++;      // skip '}' 
-    } 
-    else if (**scan == '!') { 
-        (*scan)++;      // skip '!' 
-        return __kmp_parse_place(var, scan); //'!' has lower precedence than ':' 
-    } 
-    else if ((**scan >= '0') && (**scan <= '9')) { 
-        next = *scan; 
-        SKIP_DIGITS(next); 
-        int proc = __kmp_str_to_int(*scan, *next); 
-        KMP_ASSERT(proc >= 0); 
-        *scan = next; 
-    } 
-    else { 
-        KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-        return FALSE; 
-    } 
-    return TRUE; 
-} 
- 
-static int 
-__kmp_parse_place_list( const char *var, const char *env, char **place_list ) 
-{ 
-    const char *scan = env; 
-    const char *next = scan; 
- 
-    for (;;) { 
-        int start, count, stride; 
- 
-        if (! __kmp_parse_place(var, &scan)) { 
-            return FALSE; 
-        } 
- 
-        // 
-        // valid follow sets are ',' ':' and EOL 
-        // 
-        SKIP_WS(scan); 
-        if (*scan == '\0') { 
-            break; 
-        } 
-        if (*scan == ',') { 
-            scan++;     // skip ',' 
-            continue; 
-        } 
-        if (*scan != ':') { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        scan++;         // skip ':' 
- 
-        // 
-        // Read count parameter 
-        // 
-        SKIP_WS(scan); 
-        if ((*scan < '0') || (*scan > '9')) { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        next = scan; 
-        SKIP_DIGITS(next); 
-        count = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT(count >= 0); 
-        scan = next; 
- 
-        // 
-        // valid follow sets are ',' ':' and EOL 
-        // 
-        SKIP_WS(scan); 
-        if (*scan == '\0') { 
-            break; 
-        } 
-        if (*scan == ',') { 
-            scan++;     // skip ',' 
-            continue; 
-        } 
-        if (*scan != ':') { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        scan++;         // skip ':' 
- 
-        // 
-        // Read stride parameter 
-        // 
-        int sign = +1; 
-        for (;;) { 
-            SKIP_WS(scan); 
-            if (*scan == '+') { 
-                scan++; // skip '+' 
-                continue; 
-            } 
-            if (*scan == '-') { 
-                sign *= -1; 
-                scan++; // skip '-' 
-                continue; 
-            } 
-            break; 
-        } 
-        SKIP_WS(scan); 
-        if ((*scan < '0') || (*scan > '9')) { 
-            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-            return FALSE; 
-        } 
-        next = scan; 
-        SKIP_DIGITS(next); 
-        stride = __kmp_str_to_int(scan, *next); 
-        KMP_ASSERT(stride >= 0); 
-        scan = next; 
-        stride *= sign; 
- 
-        // 
-        // valid follow sets are ',' and EOL 
-        // 
-        SKIP_WS(scan); 
-        if (*scan == '\0') { 
-            break; 
-        } 
-        if (*scan == ',') { 
-            scan++;     // skip ',' 
-            continue; 
-        } 
- 
-        KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); 
-        return FALSE; 
-    } 
- 
-    { 
-        int len = scan - env; 
-        char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); 
-        KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); 
-        retlist[len] = '\0'; 
-        *place_list = retlist; 
-    } 
-    return TRUE; 
-} 
- 
-static void 
-__kmp_stg_parse_places( char const * name, char const * value, void * data ) 
-{ 
-    int count; 
-    const char *scan = value; 
-    const char *next = scan; 
-    const char *kind = "\"threads\""; 
-    kmp_setting_t **rivals = (kmp_setting_t **) data; 
-    int rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, rivals ); 
-    if ( rc ) { 
-        return; 
-    } 
- 
-    // 
-    // If OMP_PROC_BIND is not specified but OMP_PLACES is, 
-    // then let OMP_PROC_BIND default to true. 
-    // 
-    if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; 
-    } 
- 
-    //__kmp_affinity_num_places = 0; 
- 
-    if ( __kmp_match_str( "threads", scan, &next ) ) { 
-        scan = next; 
-        __kmp_affinity_type = affinity_compact; 
-        __kmp_affinity_gran = affinity_gran_thread; 
-        __kmp_affinity_dups = FALSE; 
-        kind = "\"threads\""; 
-    } 
-    else if ( __kmp_match_str( "cores", scan, &next ) ) { 
-        scan = next; 
-        __kmp_affinity_type = affinity_compact; 
-        __kmp_affinity_gran = affinity_gran_core; 
-        __kmp_affinity_dups = FALSE; 
-        kind = "\"cores\""; 
-    } 
-    else if ( __kmp_match_str( "sockets", scan, &next ) ) { 
-        scan = next; 
-        __kmp_affinity_type = affinity_compact; 
-        __kmp_affinity_gran = affinity_gran_package; 
-        __kmp_affinity_dups = FALSE; 
-        kind = "\"sockets\""; 
-    } 
-    else { 
-        if ( __kmp_affinity_proclist != NULL ) { 
-            KMP_INTERNAL_FREE( (void *)__kmp_affinity_proclist ); 
-            __kmp_affinity_proclist = NULL; 
-        } 
-        if ( __kmp_parse_place_list( name, value, &__kmp_affinity_proclist ) ) { 
-            __kmp_affinity_type = affinity_explicit; 
-            __kmp_affinity_gran = affinity_gran_fine; 
-            __kmp_affinity_dups = FALSE; 
-            if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { 
-                 __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; 
-            } 
-        } 
-        return; 
-    } 
- 
-    if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; 
-    } 
- 
-    SKIP_WS(scan); 
-    if ( *scan == '\0' ) { 
-        return; 
-    } 
- 
-    // 
-    // Parse option count parameter in parentheses 
-    // 
-    if ( *scan != '(' ) { 
-        KMP_WARNING( SyntaxErrorUsing, name, kind ); 
-        return; 
-    } 
-    scan++;     // skip '(' 
- 
-    SKIP_WS(scan); 
-    next = scan; 
-    SKIP_DIGITS(next); 
-    count = __kmp_str_to_int(scan, *next); 
-    KMP_ASSERT(count >= 0); 
-    scan = next; 
- 
-    SKIP_WS(scan); 
-    if ( *scan != ')' ) { 
-        KMP_WARNING( SyntaxErrorUsing, name, kind ); 
-        return; 
-    } 
-    scan++;     // skip ')' 
- 
-    SKIP_WS(scan); 
-    if ( *scan != '\0' ) { 
-        KMP_WARNING( ParseExtraCharsWarn, name, scan ); 
-    } 
-    __kmp_affinity_num_places = count; 
-} 
- 
-static void 
-__kmp_stg_print_places( kmp_str_buf_t * buffer, char const * name, 
-  void * data ) 
-{ 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s", name ); 
-    } 
-    if ( ( __kmp_nested_proc_bind.used == 0 ) 
-      || ( __kmp_nested_proc_bind.bind_types == NULL ) 
-      || ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_false ) ) { 
-        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-    } 
-    else if ( __kmp_affinity_type == affinity_explicit ) { 
-        if ( __kmp_affinity_proclist != NULL ) { 
-            __kmp_str_buf_print( buffer, "='%s'\n", __kmp_affinity_proclist ); 
-        } 
-        else { 
-            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-        } 
-    } 
-    else if ( __kmp_affinity_type == affinity_compact ) { 
-        int num; 
-        if ( __kmp_affinity_num_masks > 0 ) { 
-            num = __kmp_affinity_num_masks; 
-        } 
-        else if ( __kmp_affinity_num_places > 0 ) { 
-            num = __kmp_affinity_num_places; 
-        } 
-        else { 
-            num = 0; 
-        } 
-        if ( __kmp_affinity_gran == affinity_gran_thread ) { 
-            if ( num > 0 ) { 
-                __kmp_str_buf_print( buffer, "='threads(%d)'\n", num ); 
-            } 
-            else { 
-                __kmp_str_buf_print( buffer, "='threads'\n" ); 
-            } 
-        } 
-        else if ( __kmp_affinity_gran == affinity_gran_core ) { 
-            if ( num > 0 ) { 
-                __kmp_str_buf_print( buffer, "='cores(%d)' \n", num ); 
-            } 
-            else { 
-                __kmp_str_buf_print( buffer, "='cores'\n" ); 
-            } 
-        } 
-        else if ( __kmp_affinity_gran == affinity_gran_package ) { 
-            if ( num > 0 ) { 
-                __kmp_str_buf_print( buffer, "='sockets(%d)'\n", num ); 
-            } 
-            else { 
-                __kmp_str_buf_print( buffer, "='sockets'\n" ); 
-            } 
-        } 
-        else { 
-            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-        } 
-    } 
-    else { 
-        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-    } 
-} 
- 
-# endif /* OMP_40_ENABLED */ 
- 
-# if (! OMP_40_ENABLED) 
- 
-static void 
-__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) 
-{ 
-    int enabled; 
-    kmp_setting_t **rivals = (kmp_setting_t **) data; 
-    int rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, rivals ); 
-    if ( rc ) { 
-        return; 
-    } 
- 
-    // 
-    // in OMP 3.1, OMP_PROC_BIND is strictly a boolean 
-    // 
-    __kmp_stg_parse_bool( name, value, & enabled ); 
-    if ( enabled ) { 
-            // 
-            // OMP_PROC_BIND => granularity=fine,scatter on MIC 
-            // OMP_PROC_BIND => granularity=core,scatter elsewhere 
-            // 
-            __kmp_affinity_type = affinity_scatter; 
-#  if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-            if( __kmp_mic_type != non_mic ) 
-                __kmp_affinity_gran = affinity_gran_fine; 
-            else 
-#  endif 
-                __kmp_affinity_gran = affinity_gran_core; 
-    } 
-    else { 
-        __kmp_affinity_type = affinity_none; 
-    } 
-} // __kmp_parse_proc_bind 
- 
-# endif /* if (! OMP_40_ENABLED) */ 
- 
- 
-static void 
-__kmp_stg_parse_topology_method( char const * name, char const * value, 
-  void * data ) { 
-    if ( __kmp_str_match( "all", 1, value ) ) { 
-       __kmp_affinity_top_method = affinity_top_method_all; 
-    } 
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    else if ( __kmp_str_match( "x2apic id", 9, value ) 
-      || __kmp_str_match( "x2apic_id", 9, value ) 
-      || __kmp_str_match( "x2apic-id", 9, value ) 
-      || __kmp_str_match( "x2apicid", 8, value ) 
-      || __kmp_str_match( "cpuid leaf 11", 13, value ) 
-      || __kmp_str_match( "cpuid_leaf_11", 13, value ) 
-      || __kmp_str_match( "cpuid-leaf-11", 13, value ) 
-      || __kmp_str_match( "cpuid leaf11", 12, value ) 
-      || __kmp_str_match( "cpuid_leaf11", 12, value ) 
-      || __kmp_str_match( "cpuid-leaf11", 12, value ) 
-      || __kmp_str_match( "cpuidleaf 11", 12, value ) 
-      || __kmp_str_match( "cpuidleaf_11", 12, value ) 
-      || __kmp_str_match( "cpuidleaf-11", 12, value ) 
-      || __kmp_str_match( "cpuidleaf11", 11, value ) 
-      || __kmp_str_match( "cpuid 11", 8, value ) 
-      || __kmp_str_match( "cpuid_11", 8, value ) 
-      || __kmp_str_match( "cpuid-11", 8, value ) 
-      || __kmp_str_match( "cpuid11", 7, value ) 
-      || __kmp_str_match( "leaf 11", 7, value ) 
-      || __kmp_str_match( "leaf_11", 7, value ) 
-      || __kmp_str_match( "leaf-11", 7, value ) 
-      || __kmp_str_match( "leaf11", 6, value ) ) { 
-        __kmp_affinity_top_method = affinity_top_method_x2apicid; 
-    } 
-    else if ( __kmp_str_match( "apic id", 7, value ) 
-      || __kmp_str_match( "apic_id", 7, value ) 
-      || __kmp_str_match( "apic-id", 7, value ) 
-      || __kmp_str_match( "apicid", 6, value ) 
-      || __kmp_str_match( "cpuid leaf 4", 12, value ) 
-      || __kmp_str_match( "cpuid_leaf_4", 12, value ) 
-      || __kmp_str_match( "cpuid-leaf-4", 12, value ) 
-      || __kmp_str_match( "cpuid leaf4", 11, value ) 
-      || __kmp_str_match( "cpuid_leaf4", 11, value ) 
-      || __kmp_str_match( "cpuid-leaf4", 11, value ) 
-      || __kmp_str_match( "cpuidleaf 4", 11, value ) 
-      || __kmp_str_match( "cpuidleaf_4", 11, value ) 
-      || __kmp_str_match( "cpuidleaf-4", 11, value ) 
-      || __kmp_str_match( "cpuidleaf4", 10, value ) 
-      || __kmp_str_match( "cpuid 4", 7, value ) 
-      || __kmp_str_match( "cpuid_4", 7, value ) 
-      || __kmp_str_match( "cpuid-4", 7, value ) 
-      || __kmp_str_match( "cpuid4", 6, value ) 
-      || __kmp_str_match( "leaf 4", 6, value ) 
-      || __kmp_str_match( "leaf_4", 6, value ) 
-      || __kmp_str_match( "leaf-4", 6, value ) 
-      || __kmp_str_match( "leaf4", 5, value ) ) { 
-        __kmp_affinity_top_method = affinity_top_method_apicid; 
-    } 
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
-    else if ( __kmp_str_match( "/proc/cpuinfo", 2, value ) 
-      || __kmp_str_match( "cpuinfo", 5, value )) { 
-        __kmp_affinity_top_method = affinity_top_method_cpuinfo; 
-    } 
-# if KMP_GROUP_AFFINITY 
-    else if ( __kmp_str_match( "group", 1, value ) ) { 
-        __kmp_affinity_top_method = affinity_top_method_group; 
-    } 
-# endif /* KMP_GROUP_AFFINITY */ 
-    else if ( __kmp_str_match( "flat", 1, value ) ) { 
-        __kmp_affinity_top_method = affinity_top_method_flat; 
-    } 
-# if KMP_USE_HWLOC 
-    else if ( __kmp_str_match( "hwloc", 1, value) ) { 
-        __kmp_affinity_top_method = affinity_top_method_hwloc; 
-    } 
-# endif 
-    else { 
-        KMP_WARNING( StgInvalidValue, name, value ); 
-    } 
-} // __kmp_stg_parse_topology_method 
- 
-static void 
-__kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name, 
-  void * data ) { 
-# if KMP_DEBUG 
-    char const * value = NULL; 
- 
-    switch ( __kmp_affinity_top_method ) { 
-        case affinity_top_method_default: 
-        value = "default"; 
-        break; 
- 
-        case affinity_top_method_all: 
-        value = "all"; 
-        break; 
- 
-#  if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-        case affinity_top_method_x2apicid: 
-        value = "x2APIC id"; 
-        break; 
- 
-        case affinity_top_method_apicid: 
-        value = "APIC id"; 
-        break; 
-#  endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-        case affinity_top_method_cpuinfo: 
-        value = "cpuinfo"; 
-        break; 
- 
-#  if KMP_GROUP_AFFINITY 
-        case affinity_top_method_group: 
-        value = "group"; 
-        break; 
-#  endif /* KMP_GROUP_AFFINITY */ 
- 
-        case affinity_top_method_flat: 
-        value = "flat"; 
-        break; 
-    } 
- 
-    if ( value != NULL ) { 
-        __kmp_stg_print_str( buffer, name, value ); 
-    } 
-# endif /* KMP_DEBUG */ 
-} // __kmp_stg_print_topology_method 
- 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
- 
-#if OMP_40_ENABLED 
- 
-// 
-// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X* 
-// OMP_PLACES / place-partition-var is not. 
-// 
-static void 
-__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) 
-{ 
-    kmp_setting_t **rivals = (kmp_setting_t **) data; 
-    int rc; 
- 
-    rc = __kmp_stg_check_rivals( name, value, rivals ); 
-    if ( rc ) { 
-        return; 
-    } 
- 
-    // 
-    // in OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types. 
-    // 
-    KMP_DEBUG_ASSERT( (__kmp_nested_proc_bind.bind_types != NULL) 
-      && ( __kmp_nested_proc_bind.used > 0 ) ); 
- 
-    const char *buf = value; 
-    const char *next; 
-    int num; 
-    SKIP_WS( buf ); 
-    if ( (*buf >= '0') && (*buf <= '9') ) { 
-        next = buf; 
-        SKIP_DIGITS( next ); 
-        num = __kmp_str_to_int( buf, *next ); 
-        KMP_ASSERT( num >= 0 ); 
-        buf = next; 
-        SKIP_WS( buf ); 
-    } 
-    else { 
-        num = -1; 
-    } 
- 
-    next = buf; 
-    if ( __kmp_match_str( "disabled", buf, &next ) ) { 
-        buf = next; 
-        SKIP_WS( buf ); 
-# if KMP_AFFINITY_SUPPORTED 
-        __kmp_affinity_type = affinity_disabled; 
-# endif /* KMP_AFFINITY_SUPPORTED */ 
-        __kmp_nested_proc_bind.used = 1; 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-    } 
-    else if ( ( num == (int)proc_bind_false ) 
-      || __kmp_match_str( "false", buf, &next ) ) { 
-        buf = next; 
-        SKIP_WS( buf ); 
-# if KMP_AFFINITY_SUPPORTED 
-        __kmp_affinity_type = affinity_none; 
-# endif /* KMP_AFFINITY_SUPPORTED */ 
-        __kmp_nested_proc_bind.used = 1; 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-    } 
-    else if ( ( num == (int)proc_bind_true ) 
-      || __kmp_match_str( "true", buf, &next ) ) { 
-        buf = next; 
-        SKIP_WS( buf ); 
-        __kmp_nested_proc_bind.used = 1; 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; 
-    } 
-    else { 
-        // 
-        // Count the number of values in the env var string 
-        // 
-        const char *scan; 
-        int nelem = 1; 
-        for ( scan = buf; *scan != '\0'; scan++ ) { 
-            if ( *scan == ',' ) { 
-                nelem++; 
-            } 
-        } 
- 
-        // 
-        // Create / expand the nested proc_bind array as needed 
-        // 
-        if ( __kmp_nested_proc_bind.size < nelem ) { 
-            __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) 
-                KMP_INTERNAL_REALLOC( __kmp_nested_proc_bind.bind_types, 
-                sizeof(kmp_proc_bind_t) * nelem ); 
-            if ( __kmp_nested_proc_bind.bind_types == NULL ) { 
-                KMP_FATAL( MemoryAllocFailed ); 
-            } 
-            __kmp_nested_proc_bind.size = nelem; 
-        } 
-        __kmp_nested_proc_bind.used = nelem; 
- 
-        // 
-        // Save values in the nested proc_bind array 
-        // 
-        int i = 0; 
-        for (;;) { 
-            enum kmp_proc_bind_t bind; 
- 
-            if ( ( num == (int)proc_bind_master ) 
-              || __kmp_match_str( "master", buf, &next ) ) { 
-                buf = next; 
-                SKIP_WS( buf ); 
-                bind = proc_bind_master; 
-            } 
-            else if ( ( num == (int)proc_bind_close ) 
-              || __kmp_match_str( "close", buf, &next ) ) { 
-                buf = next; 
-                SKIP_WS( buf ); 
-                bind = proc_bind_close; 
-            } 
-            else if ( ( num == (int)proc_bind_spread ) 
-              || __kmp_match_str( "spread", buf, &next ) ) { 
-                buf = next; 
-                SKIP_WS( buf ); 
-                bind = proc_bind_spread; 
-            } 
-            else { 
-                KMP_WARNING( StgInvalidValue, name, value ); 
-                __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-                __kmp_nested_proc_bind.used = 1; 
-                return; 
-            } 
- 
-            __kmp_nested_proc_bind.bind_types[i++] = bind; 
-            if ( i >= nelem ) { 
-                break; 
-            } 
-            KMP_DEBUG_ASSERT( *buf == ',' ); 
-            buf++; 
-            SKIP_WS( buf ); 
- 
-            // 
-            // Read next value if it was specified as an integer 
-            // 
-            if ( (*buf >= '0') && (*buf <= '9') ) { 
-                next = buf; 
-                SKIP_DIGITS( next ); 
-                num = __kmp_str_to_int( buf, *next ); 
-                KMP_ASSERT( num >= 0 ); 
-                buf = next; 
-                SKIP_WS( buf ); 
-            } 
-            else { 
-                num = -1; 
-            } 
-        } 
-        SKIP_WS( buf ); 
-    } 
-    if ( *buf != '\0' ) { 
-        KMP_WARNING( ParseExtraCharsWarn, name, buf ); 
-    } 
-} 
- 
- 
-static void 
-__kmp_stg_print_proc_bind( kmp_str_buf_t * buffer, char const * name, 
-  void * data ) 
-{ 
-    int nelem = __kmp_nested_proc_bind.used; 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME; 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s", name ); 
-    } 
-    if ( nelem == 0 ) { 
-        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); 
-    } 
-    else { 
-        int i; 
-        __kmp_str_buf_print( buffer, "='", name ); 
-        for ( i = 0; i < nelem; i++ ) { 
-            switch ( __kmp_nested_proc_bind.bind_types[i] ) { 
-                case proc_bind_false: 
-                __kmp_str_buf_print( buffer, "false" ); 
-                break; 
- 
-                case proc_bind_true: 
-                __kmp_str_buf_print( buffer, "true" ); 
-                break; 
- 
-                case proc_bind_master: 
-                __kmp_str_buf_print( buffer, "master" ); 
-                break; 
- 
-                case proc_bind_close: 
-                __kmp_str_buf_print( buffer, "close" ); 
-                break; 
- 
-                case proc_bind_spread: 
-                __kmp_str_buf_print( buffer, "spread" ); 
-                break; 
- 
-                case proc_bind_intel: 
-                __kmp_str_buf_print( buffer, "intel" ); 
-                break; 
- 
-                case proc_bind_default: 
-                __kmp_str_buf_print( buffer, "default" ); 
-                break; 
-            } 
-            if ( i < nelem - 1 ) { 
-                __kmp_str_buf_print( buffer, "," ); 
-            } 
-        } 
-        __kmp_str_buf_print( buffer, "'\n" ); 
-    } 
-} 
- 
-#endif /* OMP_40_ENABLED */ 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// OMP_DYNAMIC 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_omp_dynamic( char const * name, char const * value, void * data ) 
-{ 
-    __kmp_stg_parse_bool( name, value, & (__kmp_global.g.g_dynamic) ); 
-} // __kmp_stg_parse_omp_dynamic 
- 
-static void 
-__kmp_stg_print_omp_dynamic( kmp_str_buf_t * buffer, char const * name, void * data ) 
-{ 
-    __kmp_stg_print_bool( buffer, name, __kmp_global.g.g_dynamic ); 
-} // __kmp_stg_print_omp_dynamic 
- 
-static void 
-__kmp_stg_parse_kmp_dynamic_mode( char const * name, char const * value, void * data ) 
-{ 
-    if ( TCR_4(__kmp_init_parallel) ) { 
-        KMP_WARNING( EnvParallelWarn, name ); 
-        __kmp_env_toPrint( name, 0 ); 
-        return; 
-    } 
-#ifdef USE_LOAD_BALANCE 
-    else if ( __kmp_str_match( "load balance", 2, value ) 
-      || __kmp_str_match( "load_balance", 2, value ) 
-      || __kmp_str_match( "load-balance", 2, value ) 
-      || __kmp_str_match( "loadbalance", 2, value ) 
-      || __kmp_str_match( "balance", 1, value ) ) { 
-        __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 
-    } 
-#endif /* USE_LOAD_BALANCE */ 
-    else if ( __kmp_str_match( "thread limit", 1, value ) 
-      || __kmp_str_match( "thread_limit", 1, value ) 
-      || __kmp_str_match( "thread-limit", 1, value ) 
-      || __kmp_str_match( "threadlimit", 1, value ) 
-      || __kmp_str_match( "limit", 2, value ) ) { 
-        __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 
-    } 
-    else if ( __kmp_str_match( "random", 1, value ) ) { 
-        __kmp_global.g.g_dynamic_mode = dynamic_random; 
-    } 
-    else { 
-        KMP_WARNING( StgInvalidValue, name, value ); 
-    } 
-} //__kmp_stg_parse_kmp_dynamic_mode 
- 
-static void 
-__kmp_stg_print_kmp_dynamic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) 
-{ 
-#if KMP_DEBUG 
-    if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 
-        __kmp_str_buf_print( buffer, "   %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); 
-    } 
-# ifdef USE_LOAD_BALANCE 
-    else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { 
-        __kmp_stg_print_str( buffer, name, "load balance" ); 
-    } 
-# endif /* USE_LOAD_BALANCE */ 
-    else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { 
-        __kmp_stg_print_str( buffer, name, "thread limit" ); 
-    } 
-    else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { 
-        __kmp_stg_print_str( buffer, name, "random" ); 
-    } 
-    else { 
-        KMP_ASSERT(0); 
-    } 
-#endif /* KMP_DEBUG */ 
-} // __kmp_stg_print_kmp_dynamic_mode 
- 
- 
-#ifdef USE_LOAD_BALANCE 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_LOAD_BALANCE_INTERVAL 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_ld_balance_interval( char const * name, char const * value, void * data ) 
-{ 
-    double interval = __kmp_convert_to_double( value ); 
-    if ( interval >= 0 ) { 
-        __kmp_load_balance_interval = interval; 
-    } else { 
-        KMP_WARNING( StgInvalidValue, name, value ); 
-    }; // if 
-} // __kmp_stg_parse_load_balance_interval 
- 
-static void 
-__kmp_stg_print_ld_balance_interval( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-#if KMP_DEBUG 
-    __kmp_str_buf_print( buffer, "   %s=%8.6f\n", name, __kmp_load_balance_interval ); 
-#endif /* KMP_DEBUG */ 
-} // __kmp_stg_print_load_balance_interval 
- 
-#endif /* USE_LOAD_BALANCE */ 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_INIT_AT_FORK 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_init_at_fork( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_need_register_atfork ); 
-    if ( __kmp_need_register_atfork ) { 
-        __kmp_need_register_atfork_specified = TRUE; 
-    }; 
-} // __kmp_stg_parse_init_at_fork 
- 
-static void 
-__kmp_stg_print_init_at_fork( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_need_register_atfork_specified ); 
-} // __kmp_stg_print_init_at_fork 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_SCHEDULE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_schedule( char const * name, char const * value, void * data ) { 
- 
-    if ( value != NULL ) { 
-        size_t length = KMP_STRLEN( value ); 
-        if ( length > INT_MAX ) { 
-            KMP_WARNING( LongValue, name ); 
-        } else { 
-            char *semicolon; 
-            if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'' ) 
-                KMP_WARNING( UnbalancedQuotes, name ); 
-            do { 
-                char sentinel; 
- 
-                semicolon = (char *) strchr( value, ';' ); 
-                if( *value && semicolon != value ) { 
-                    char *comma = (char *) strchr( value, ',' ); 
- 
-                    if ( comma ) { 
-                        ++comma; 
-                        sentinel = ','; 
-                    } else 
-                        sentinel = ';'; 
-                    if ( !__kmp_strcasecmp_with_sentinel( "static", value, sentinel ) ) { 
-                        if( !__kmp_strcasecmp_with_sentinel( "greedy", comma, ';' ) ) { 
-                            __kmp_static = kmp_sch_static_greedy; 
-                            continue; 
-                        } else if( !__kmp_strcasecmp_with_sentinel( "balanced", comma, ';' ) ) { 
-                            __kmp_static = kmp_sch_static_balanced; 
-                            continue; 
-                        } 
-                    } else if ( !__kmp_strcasecmp_with_sentinel( "guided", value, sentinel ) ) { 
-                        if ( !__kmp_strcasecmp_with_sentinel( "iterative", comma, ';' ) ) { 
-                            __kmp_guided = kmp_sch_guided_iterative_chunked; 
-                            continue; 
-                        } else if ( !__kmp_strcasecmp_with_sentinel( "analytical", comma, ';' ) ) { 
-                            /* analytical not allowed for too many threads */ 
-                            __kmp_guided = kmp_sch_guided_analytical_chunked; 
-                            continue; 
-                        } 
-                    } 
-                    KMP_WARNING( InvalidClause, name, value ); 
-                } else 
-                    KMP_WARNING( EmptyClause, name ); 
-            } while ( (value = semicolon ? semicolon + 1 : NULL) ); 
-        } 
-    }; // if 
- 
-} // __kmp_stg_parse__schedule 
- 
-static void 
-__kmp_stg_print_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME_EX(name); 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s='", name ); 
-    } 
-    if ( __kmp_static == kmp_sch_static_greedy ) { 
-        __kmp_str_buf_print( buffer, "%s", "static,greedy"); 
-    } else if ( __kmp_static == kmp_sch_static_balanced ) { 
-        __kmp_str_buf_print ( buffer, "%s", "static,balanced"); 
-    } 
-    if ( __kmp_guided == kmp_sch_guided_iterative_chunked ) { 
-        __kmp_str_buf_print( buffer, ";%s'\n", "guided,iterative"); 
-    } else if ( __kmp_guided == kmp_sch_guided_analytical_chunked ) { 
-        __kmp_str_buf_print( buffer, ";%s'\n", "guided,analytical"); 
-    } 
-} // __kmp_stg_print_schedule 
- 
-// ------------------------------------------------------------------------------------------------- 
-// OMP_SCHEDULE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_omp_schedule( char const * name, char const * value, void * data ) 
-{ 
-    size_t      length; 
-    if( value ) { 
-        length = KMP_STRLEN( value ); 
-        if( length ) { 
-            char *comma = (char *) strchr( value, ',' ); 
-            if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'') 
-                KMP_WARNING( UnbalancedQuotes, name ); 
-            /* get the specified scheduling style */ 
-            if (!__kmp_strcasecmp_with_sentinel("dynamic", value, ','))          /* DYNAMIC */ 
-                __kmp_sched = kmp_sch_dynamic_chunked; 
-            else if (!__kmp_strcasecmp_with_sentinel("guided", value, ','))      /* GUIDED */ 
-                __kmp_sched = kmp_sch_guided_chunked; 
-// AC: TODO: add AUTO schedule, and pprobably remove TRAPEZOIDAL (OMP 3.0 does not allow it) 
-            else if (!__kmp_strcasecmp_with_sentinel("auto", value, ',')) {       /* AUTO */ 
-                __kmp_sched = kmp_sch_auto; 
-                if( comma ) { 
-                    __kmp_msg( kmp_ms_warning, KMP_MSG( IgnoreChunk, name, comma ), __kmp_msg_null ); 
-                    comma = NULL; 
-                } 
-            } 
-            else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", value, ',')) /* TRAPEZOIDAL */ 
-                __kmp_sched = kmp_sch_trapezoidal; 
-            else if (!__kmp_strcasecmp_with_sentinel("static", value, ','))      /* STATIC */ 
-                __kmp_sched = kmp_sch_static; 
-#ifdef KMP_STATIC_STEAL_ENABLED 
-            else if (KMP_ARCH_X86_64 && 
-                     !__kmp_strcasecmp_with_sentinel("static_steal", value, ',')) 
-                __kmp_sched = kmp_sch_static_steal; 
-#endif 
-            else { 
-                KMP_WARNING( StgInvalidValue, name, value ); 
-                value = NULL; /* skip processing of comma */ 
-            } 
-            if( value && comma ) { 
-                __kmp_env_chunk = TRUE; 
- 
-                if(__kmp_sched == kmp_sch_static) 
-                    __kmp_sched = kmp_sch_static_chunked; 
-                ++comma; 
-                __kmp_chunk = __kmp_str_to_int( comma, 0 ); 
-                if ( __kmp_chunk < 1 ) { 
-                    __kmp_chunk = KMP_DEFAULT_CHUNK; 
-                    __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidChunk, name, comma ), __kmp_msg_null ); 
-                    KMP_INFORM( Using_int_Value, name, __kmp_chunk ); 
-// AC: next block commented out until KMP_DEFAULT_CHUNK != KMP_MIN_CHUNK (to improve code coverage :) 
-//     The default chunk size is 1 according to standard, thus making KMP_MIN_CHUNK not 1 we would introduce mess: 
-//     wrong chunk becomes 1, but it will be impossible to explicitely set 1, because it becomes KMP_MIN_CHUNK... 
-//                } else if ( __kmp_chunk < KMP_MIN_CHUNK ) { 
-//                    __kmp_chunk = KMP_MIN_CHUNK; 
-                } else if ( __kmp_chunk > KMP_MAX_CHUNK ) { 
-                    __kmp_chunk = KMP_MAX_CHUNK; 
-                    __kmp_msg( kmp_ms_warning, KMP_MSG( LargeChunk, name, comma ), __kmp_msg_null ); 
-                    KMP_INFORM( Using_int_Value, name, __kmp_chunk ); 
-                } 
-            } else 
-                __kmp_env_chunk = FALSE; 
-        } else 
-            KMP_WARNING( EmptyString, name ); 
-    } 
-    K_DIAG(1, ("__kmp_static == %d\n", __kmp_static)) 
-    K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided)) 
-    K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched)) 
-    K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk)) 
-} // __kmp_stg_parse_omp_schedule 
- 
-static void 
-__kmp_stg_print_omp_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME_EX(name); 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s='", name ); 
-    } 
-    if ( __kmp_chunk ) { 
-        switch ( __kmp_sched ) { 
-            case kmp_sch_dynamic_chunked: 
-                __kmp_str_buf_print( buffer, "%s,%d'\n", "dynamic", __kmp_chunk); 
-                break; 
-            case kmp_sch_guided_iterative_chunked: 
-            case kmp_sch_guided_analytical_chunked: 
-                __kmp_str_buf_print( buffer, "%s,%d'\n", "guided", __kmp_chunk); 
-                break; 
-            case kmp_sch_trapezoidal: 
-                __kmp_str_buf_print( buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk); 
-                break; 
-            case kmp_sch_static: 
-            case kmp_sch_static_chunked: 
-            case kmp_sch_static_balanced: 
-            case kmp_sch_static_greedy: 
-                __kmp_str_buf_print( buffer, "%s,%d'\n", "static", __kmp_chunk); 
-                break; 
-            case kmp_sch_static_steal: 
-                __kmp_str_buf_print( buffer, "%s,%d'\n", "static_steal", __kmp_chunk); 
-                break; 
-            case kmp_sch_auto: 
-                __kmp_str_buf_print( buffer, "%s,%d'\n", "auto", __kmp_chunk); 
-                break; 
-        } 
-    } else { 
-        switch ( __kmp_sched ) { 
-            case kmp_sch_dynamic_chunked: 
-                __kmp_str_buf_print( buffer, "%s'\n", "dynamic"); 
-                break; 
-            case kmp_sch_guided_iterative_chunked: 
-            case kmp_sch_guided_analytical_chunked: 
-                __kmp_str_buf_print( buffer, "%s'\n", "guided"); 
-                break; 
-            case kmp_sch_trapezoidal: 
-                __kmp_str_buf_print( buffer, "%s'\n", "trapezoidal"); 
-                break; 
-            case kmp_sch_static: 
-            case kmp_sch_static_chunked: 
-            case kmp_sch_static_balanced: 
-            case kmp_sch_static_greedy: 
-                __kmp_str_buf_print( buffer, "%s'\n", "static"); 
-                break; 
-            case kmp_sch_static_steal: 
-                __kmp_str_buf_print( buffer, "%s'\n", "static_steal"); 
-                break; 
-            case kmp_sch_auto: 
-                __kmp_str_buf_print( buffer, "%s'\n", "auto"); 
-                break; 
-        } 
-    } 
-} // __kmp_stg_print_omp_schedule 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ATOMIC_MODE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_atomic_mode( char const * name, char const * value, void * data ) { 
-    // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP compatibility mode. 
-    int mode = 0; 
-    int max  = 1; 
-    #ifdef KMP_GOMP_COMPAT 
-        max = 2; 
-    #endif /* KMP_GOMP_COMPAT */ 
-    __kmp_stg_parse_int( name, value, 0, max, & mode ); 
-    // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use 
-    // 0 rather that max value. 
-    if ( mode > 0 ) { 
-        __kmp_atomic_mode = mode; 
-    }; // if 
-} // __kmp_stg_parse_atomic_mode 
- 
-static void 
-__kmp_stg_print_atomic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_atomic_mode ); 
-} // __kmp_stg_print_atomic_mode 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_CONSISTENCY_CHECK 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_consistency_check( char const * name, char const * value, void * data ) { 
-    if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { 
-        // Note, this will not work from kmp_set_defaults because th_cons stack was not allocated 
-        // for existed thread(s) thus the first __kmp_push_<construct> will break with assertion. 
-        // TODO: allocate th_cons if called from kmp_set_defaults. 
-        __kmp_env_consistency_check = TRUE; 
-    } else if ( ! __kmp_strcasecmp_with_sentinel( "none", value, 0 ) ) { 
-        __kmp_env_consistency_check = FALSE; 
-    } else { 
-        KMP_WARNING( StgInvalidValue, name, value ); 
-    }; // if 
-} // __kmp_stg_parse_consistency_check 
- 
-static void 
-__kmp_stg_print_consistency_check( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-#if KMP_DEBUG 
-    const char *value = NULL; 
- 
-    if ( __kmp_env_consistency_check ) { 
-        value = "all"; 
-    } else { 
-        value = "none"; 
-    } 
- 
-    if ( value != NULL ) { 
-        __kmp_stg_print_str( buffer, name, value ); 
-    } 
-#endif /* KMP_DEBUG */ 
-} // __kmp_stg_print_consistency_check 
- 
- 
-#if USE_ITT_BUILD 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ITT_PREPARE_DELAY 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if USE_ITT_NOTIFY 
- 
-static void 
-__kmp_stg_parse_itt_prepare_delay( char const * name, char const * value, void * data ) 
-{ 
-    // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop iterations. 
-    int delay = 0; 
-    __kmp_stg_parse_int( name, value, 0, INT_MAX, & delay ); 
-    __kmp_itt_prepare_delay = delay; 
-} // __kmp_str_parse_itt_prepare_delay 
- 
-static void 
-__kmp_stg_print_itt_prepare_delay( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_uint64( buffer, name, __kmp_itt_prepare_delay ); 
- 
-} // __kmp_str_print_itt_prepare_delay 
- 
-#endif // USE_ITT_NOTIFY 
-#endif /* USE_ITT_BUILD */ 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_MALLOC_POOL_INCR 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_malloc_pool_incr( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_size( 
-            name, 
-            value, 
-            KMP_MIN_MALLOC_POOL_INCR, 
-            KMP_MAX_MALLOC_POOL_INCR, 
-            NULL, 
-            & __kmp_malloc_pool_incr, 
-            1 
-        ); 
-} // __kmp_stg_parse_malloc_pool_incr 
- 
-static void 
-__kmp_stg_print_malloc_pool_incr( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-       __kmp_stg_print_size( buffer, name, __kmp_malloc_pool_incr ); 
- 
-} // _kmp_stg_print_malloc_pool_incr 
- 
- 
-#ifdef KMP_DEBUG 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_PAR_RANGE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_par_range_env( char const * name, char const * value, void * data ) { 
-        __kmp_stg_parse_par_range( 
-            name, 
-            value, 
-            & __kmp_par_range, 
-            __kmp_par_range_routine, 
-            __kmp_par_range_filename, 
-            & __kmp_par_range_lb, 
-            & __kmp_par_range_ub 
-        ); 
-} // __kmp_stg_parse_par_range_env 
- 
-static void 
-__kmp_stg_print_par_range_env( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if (__kmp_par_range != 0) { 
-        __kmp_stg_print_str( buffer, name, par_range_to_print ); 
-    } 
-} // __kmp_stg_print_par_range_env 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_yield_cycle( char const * name, char const * value, void * data ) { 
-    int flag = __kmp_yield_cycle; 
-    __kmp_stg_parse_bool( name, value, & flag ); 
-    __kmp_yield_cycle = flag; 
-} // __kmp_stg_parse_yield_cycle 
- 
-static void 
-__kmp_stg_print_yield_cycle( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_yield_cycle ); 
-} // __kmp_stg_print_yield_cycle 
- 
-static void 
-__kmp_stg_parse_yield_on( char const * name, char const * value, void * data ) { 
-        __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_on_count ); 
-} // __kmp_stg_parse_yield_on 
- 
-static void 
-__kmp_stg_print_yield_on( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_yield_on_count ); 
-} // __kmp_stg_print_yield_on 
- 
-static void 
-__kmp_stg_parse_yield_off( char const * name, char const * value, void * data ) { 
-        __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_off_count ); 
-} // __kmp_stg_parse_yield_off 
- 
-static void 
-__kmp_stg_print_yield_off( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_yield_off_count ); 
-} // __kmp_stg_print_yield_off 
- 
-#endif 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_INIT_WAIT, KMP_NEXT_WAIT 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_init_wait( char const * name, char const * value, void * data ) { 
-   int wait; 
-   KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); 
-   wait = __kmp_init_wait / 2; 
-    __kmp_stg_parse_int( name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, & wait ); 
-    __kmp_init_wait = wait * 2; 
-    KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); 
-    __kmp_yield_init = __kmp_init_wait; 
-} // __kmp_stg_parse_init_wait 
- 
-static void 
-__kmp_stg_print_init_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_init_wait ); 
-} // __kmp_stg_print_init_wait 
- 
-static void 
-__kmp_stg_parse_next_wait( char const * name, char const * value, void * data ) { 
-    int wait; 
-    KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); 
-    wait = __kmp_next_wait / 2; 
-    __kmp_stg_parse_int( name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, & wait ); 
-    __kmp_next_wait = wait * 2; 
-    KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); 
-    __kmp_yield_next = __kmp_next_wait; 
-} // __kmp_stg_parse_next_wait 
- 
-static void 
-__kmp_stg_print_next_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_next_wait ); 
-} //__kmp_stg_print_next_wait 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_GTID_MODE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_gtid_mode( char const * name, char const * value, void * data ) { 
-    // 
-    // Modes: 
-    //   0 -- do not change default 
-    //   1 -- sp search 
-    //   2 -- use "keyed" TLS var, i.e. 
-    //        pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS) 
-    //   3 -- __declspec(thread) TLS var in tdata section 
-    // 
-    int mode = 0; 
-    int max  = 2; 
-    #ifdef KMP_TDATA_GTID 
-        max = 3; 
-    #endif /* KMP_TDATA_GTID */ 
-    __kmp_stg_parse_int( name, value, 0, max, & mode ); 
-    // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use 
-    // 0 rather that max value. 
-    if ( mode == 0 ) { 
-        __kmp_adjust_gtid_mode = TRUE; 
-    } 
-    else { 
-        __kmp_gtid_mode = mode; 
-        __kmp_adjust_gtid_mode = FALSE; 
-    }; // if 
-} // __kmp_str_parse_gtid_mode 
- 
-static void 
-__kmp_stg_print_gtid_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if ( __kmp_adjust_gtid_mode ) { 
-        __kmp_stg_print_int( buffer, name, 0 ); 
-    } 
-    else { 
-        __kmp_stg_print_int( buffer, name, __kmp_gtid_mode ); 
-    } 
-} // __kmp_stg_print_gtid_mode 
- 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_NUM_LOCKS_IN_BLOCK 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_lock_block( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( name, value, 0, KMP_INT_MAX, & __kmp_num_locks_in_block ); 
-} // __kmp_str_parse_lock_block 
- 
-static void 
-__kmp_stg_print_lock_block( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_num_locks_in_block ); 
-} // __kmp_stg_print_lock_block 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_LOCK_KIND 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if KMP_USE_DYNAMIC_LOCK 
-# define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a) 
-#else 
-# define KMP_STORE_LOCK_SEQ(a) 
-#endif 
- 
-static void 
-__kmp_stg_parse_lock_kind( char const * name, char const * value, void * data ) { 
-    if ( __kmp_init_user_locks ) { 
-        KMP_WARNING( EnvLockWarn, name ); 
-        return; 
-    } 
- 
-    if ( __kmp_str_match( "tas", 2, value ) 
-      || __kmp_str_match( "test and set", 2, value ) 
-      || __kmp_str_match( "test_and_set", 2, value ) 
-      || __kmp_str_match( "test-and-set", 2, value ) 
-      || __kmp_str_match( "test andset", 2, value ) 
-      || __kmp_str_match( "test_andset", 2, value ) 
-      || __kmp_str_match( "test-andset", 2, value ) 
-      || __kmp_str_match( "testand set", 2, value ) 
-      || __kmp_str_match( "testand_set", 2, value ) 
-      || __kmp_str_match( "testand-set", 2, value ) 
-      || __kmp_str_match( "testandset", 2, value ) ) { 
-        __kmp_user_lock_kind = lk_tas; 
-        KMP_STORE_LOCK_SEQ(tas); 
-    } 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 
-    else if ( __kmp_str_match( "futex", 1, value ) ) { 
-        if ( __kmp_futex_determine_capable() ) { 
-            __kmp_user_lock_kind = lk_futex; 
-            KMP_STORE_LOCK_SEQ(futex); 
-        } 
-        else { 
-            KMP_WARNING( FutexNotSupported, name, value ); 
-        } 
-    } 
-#endif 
-    else if ( __kmp_str_match( "ticket", 2, value ) ) { 
-        __kmp_user_lock_kind = lk_ticket; 
-        KMP_STORE_LOCK_SEQ(ticket); 
-    } 
-    else if ( __kmp_str_match( "queuing", 1, value ) 
-      || __kmp_str_match( "queue", 1, value ) ) { 
-        __kmp_user_lock_kind = lk_queuing; 
-        KMP_STORE_LOCK_SEQ(queuing); 
-    } 
-    else if ( __kmp_str_match( "drdpa ticket", 1, value ) 
-      || __kmp_str_match( "drdpa_ticket", 1, value ) 
-      || __kmp_str_match( "drdpa-ticket", 1, value ) 
-      || __kmp_str_match( "drdpaticket", 1, value ) 
-      || __kmp_str_match( "drdpa", 1, value ) ) { 
-        __kmp_user_lock_kind = lk_drdpa; 
-        KMP_STORE_LOCK_SEQ(drdpa); 
-    } 
-#if KMP_USE_ADAPTIVE_LOCKS 
-    else if ( __kmp_str_match( "adaptive", 1, value )  ) { 
-        if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here? 
-            __kmp_user_lock_kind = lk_adaptive; 
-            KMP_STORE_LOCK_SEQ(adaptive); 
-        } else { 
-            KMP_WARNING( AdaptiveNotSupported, name, value ); 
-            __kmp_user_lock_kind = lk_queuing; 
-            KMP_STORE_LOCK_SEQ(queuing); 
-        } 
-    } 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
-#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX 
-    else if ( __kmp_str_match("rtm", 1, value) ) { 
-        if ( __kmp_cpuinfo.rtm ) { 
-            __kmp_user_lock_kind = lk_rtm; 
-            KMP_STORE_LOCK_SEQ(rtm); 
-        } else { 
-            KMP_WARNING( AdaptiveNotSupported, name, value ); 
-            __kmp_user_lock_kind = lk_queuing; 
-            KMP_STORE_LOCK_SEQ(queuing); 
-        } 
-    } 
-    else if ( __kmp_str_match("hle", 1, value) ) { 
-        __kmp_user_lock_kind = lk_hle; 
-        KMP_STORE_LOCK_SEQ(hle); 
-    } 
-#endif 
-    else { 
-        KMP_WARNING( StgInvalidValue, name, value ); 
-    } 
-} 
- 
-static void 
-__kmp_stg_print_lock_kind( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    const char *value = NULL; 
- 
-    switch ( __kmp_user_lock_kind ) { 
-        case lk_default: 
-        value = "default"; 
-        break; 
- 
-        case lk_tas: 
-        value = "tas"; 
-        break; 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 
-        case lk_futex: 
-        value = "futex"; 
-        break; 
-#endif 
- 
-#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX 
-        case lk_rtm: 
-        value = "rtm"; 
-        break; 
- 
-        case lk_hle: 
-        value = "hle"; 
-        break; 
-#endif 
- 
-        case lk_ticket: 
-        value = "ticket"; 
-        break; 
- 
-        case lk_queuing: 
-        value = "queuing"; 
-        break; 
- 
-        case lk_drdpa: 
-        value = "drdpa"; 
-        break; 
-#if KMP_USE_ADAPTIVE_LOCKS 
-        case lk_adaptive: 
-        value = "adaptive"; 
-        break; 
-#endif 
-    } 
- 
-    if ( value != NULL ) { 
-        __kmp_stg_print_str( buffer, name, value ); 
-    } 
-} 
- 
-#if KMP_USE_ADAPTIVE_LOCKS 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_ADAPTIVE_LOCK_PROPS, KMP_SPECULATIVE_STATSFILE 
-// ------------------------------------------------------------------------------------------------- 
- 
-// Parse out values for the tunable parameters from a string of the form 
-// KMP_ADAPTIVE_LOCK_PROPS=max_soft_retries[,max_badness] 
-static void 
-__kmp_stg_parse_adaptive_lock_props( const char *name, const char *value, void *data ) 
-{ 
-    int max_retries = 0; 
-    int max_badness = 0; 
- 
-    const char *next = value; 
- 
-    int total = 0;          // Count elements that were set. It'll be used as an array size 
-    int prev_comma = FALSE; // For correct processing sequential commas 
-    int i; 
- 
-    // Save values in the structure __kmp_speculative_backoff_params 
-    // Run only 3 iterations because it is enough to read two values or find a syntax error 
-    for ( i = 0; i < 3 ; i++) { 
-        SKIP_WS( next ); 
- 
-        if ( *next == '\0' ) { 
-            break; 
-        } 
-        // Next character is not an integer or not a comma OR number of values > 2 => end of list 
-        if ( ( ( *next < '0' || *next > '9' ) && *next !=',' ) || total > 2 ) { 
-            KMP_WARNING( EnvSyntaxError, name, value ); 
-            return; 
-        } 
-        // The next character is ',' 
-        if ( *next == ',' ) { 
-            // ',' is the fisrt character 
-            if ( total == 0 || prev_comma ) { 
-                total++; 
-            } 
-            prev_comma = TRUE; 
-            next++; //skip ',' 
-            SKIP_WS( next ); 
-        } 
-        // Next character is a digit 
-        if ( *next >= '0' && *next <= '9' ) { 
-            int num; 
-            const char *buf = next; 
-            char const * msg  = NULL; 
-            prev_comma = FALSE; 
-            SKIP_DIGITS( next ); 
-            total++; 
- 
-            const char *tmp = next; 
-            SKIP_WS( tmp ); 
-            if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { 
-                KMP_WARNING( EnvSpacesNotAllowed, name, value ); 
-                return; 
-            } 
- 
-            num = __kmp_str_to_int( buf, *next ); 
-            if ( num < 0 ) { // The number of retries should be >= 0 
-                msg = KMP_I18N_STR( ValueTooSmall ); 
-                num = 1; 
-            } else if ( num > KMP_INT_MAX ) { 
-                msg = KMP_I18N_STR( ValueTooLarge ); 
-                num = KMP_INT_MAX; 
-            } 
-            if ( msg != NULL ) { 
-                // Message is not empty. Print warning. 
-                KMP_WARNING( ParseSizeIntWarn, name, value, msg ); 
-                KMP_INFORM( Using_int_Value, name, num ); 
-            } 
-            if( total == 1 ) { 
-                max_retries = num; 
-            } else if( total == 2 ) { 
-                max_badness = num; 
-            } 
-        } 
-    } 
-    KMP_DEBUG_ASSERT( total > 0 ); 
-    if( total <= 0 ) { 
-        KMP_WARNING( EnvSyntaxError, name, value ); 
-        return; 
-    } 
-    __kmp_adaptive_backoff_params.max_soft_retries = max_retries; 
-    __kmp_adaptive_backoff_params.max_badness = max_badness; 
-} 
- 
- 
-static void 
-__kmp_stg_print_adaptive_lock_props(kmp_str_buf_t * buffer, char const * name, void * data ) 
-{ 
-    if( __kmp_env_format ) { 
-        KMP_STR_BUF_PRINT_NAME_EX(name); 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s='", name ); 
-    } 
-    __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_adaptive_backoff_params.max_soft_retries, 
-                         __kmp_adaptive_backoff_params.max_badness ); 
-} // __kmp_stg_print_adaptive_lock_props 
- 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
- 
-static void 
-__kmp_stg_parse_speculative_statsfile( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_file( name, value, "", & __kmp_speculative_statsfile ); 
-} // __kmp_stg_parse_speculative_statsfile 
- 
-static void 
-__kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if ( __kmp_str_match( "-", 0, __kmp_speculative_statsfile )  ) { 
-        __kmp_stg_print_str( buffer, name, "stdout" ); 
-    } else { 
-        __kmp_stg_print_str( buffer, name, __kmp_speculative_statsfile ); 
-    } 
- 
-} // __kmp_stg_print_speculative_statsfile 
- 
-#endif // KMP_DEBUG_ADAPTIVE_LOCKS 
- 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_PLACE_THREADS 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) { 
-    // Value example: 5Cx2Tx15O 
-    // Which means "use 5 cores with offset 15, 2 threads per core" 
-    // AC: extended to sockets level, examples of 
-    //     "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core": 
-    //     2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s@6,2c@2,2t 
-    //     To not break legacy code core-offset can be last; 
-    //     postfix "o" or prefix @ can be offset designator. 
-    // Note: not all syntax errors are analyzed, some may be skipped. 
-#define CHECK_DELIM(_x)   (*(_x) == ',' || *(_x) == 'x') 
-    int         num; 
-    int single_warning = 0; 
-    int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0; 
-    const char *next = value; 
-    const char *prev; 
- 
-    SKIP_WS(next);  // skip white spaces 
-    if (*next == '\0') 
-        return;   // no data provided, retain default values 
-    // Get num_sockets first (or whatever specified) 
-    if (*next >= '0' && *next <= '9') { 
-        prev = next; 
-        SKIP_DIGITS(next); 
-        num = __kmp_str_to_int(prev, *next); 
-        SKIP_WS(next); 
-        if (*next == 's' || *next == 'S') {  // e.g. "2s" 
-            __kmp_place_num_sockets = num; 
-            flagS = 1; // got num sockets 
-            next++; 
-            if (*next == '@') { // socket offset, e.g. "2s@4" 
-                flagSO = 1; 
-                prev = ++next;  // don't allow spaces for simplicity 
-                if (!(*next >= '0' && *next <= '9')) { 
-                    KMP_WARNING(AffThrPlaceInvalid, name, value); 
-                    return; 
-                } 
-                SKIP_DIGITS(next); 
-                num = __kmp_str_to_int(prev, *next); 
-                __kmp_place_socket_offset = num; 
-            } 
-        } else if (*next == 'c' || *next == 'C') { 
-            __kmp_place_num_cores = num; 
-            flagS = flagC = 1; // sockets were not specified - use default 
-            next++; 
-            if (*next == '@') { // core offset, e.g. "2c@6" 
-                flagCO = 1; 
-                prev = ++next;  // don't allow spaces for simplicity 
-                if (!(*next >= '0' && *next <= '9')) { 
-                    KMP_WARNING(AffThrPlaceInvalid, name, value); 
-                    return; 
-                } 
-                SKIP_DIGITS(next); 
-                num = __kmp_str_to_int(prev, *next); 
-                __kmp_place_core_offset = num; 
-            } 
-        } else if (CHECK_DELIM(next)) { 
-            __kmp_place_num_cores = num; // no letter-designator - num cores 
-            flagS = flagC = 1; // sockets were not specified - use default 
-            next++; 
-        } else if (*next == 't' || *next == 'T') { 
-            __kmp_place_num_threads_per_core = num; 
-            // sockets, cores were not specified - use default 
-            return;   // we ignore offset value in case all cores are used 
-        } else if (*next == '\0') { 
-            __kmp_place_num_cores = num; 
-            return;   // the only value provided - set num cores 
-        } else { 
-            KMP_WARNING(AffThrPlaceInvalid, name, value); 
-            return; 
-        } 
-    } else { 
-        KMP_WARNING(AffThrPlaceInvalid, name, value); 
-        return; 
-    } 
-    KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here 
-    SKIP_WS(next); 
-    if (*next == '\0') 
-        return;   // " n  " - something like this 
-    if (CHECK_DELIM(next)) { 
-        next++;   // skip delimiter 
-        SKIP_WS(next); 
-    } 
- 
-    // Get second value (could be offset, num_cores, num_threads) 
-    if (*next >= '0' && *next <= '9') { 
-        prev = next; 
-        SKIP_DIGITS(next); 
-        num = __kmp_str_to_int(prev, *next); 
-        SKIP_WS(next); 
-        if (*next == 'c' || *next == 'C') { 
-            KMP_DEBUG_ASSERT(flagC == 0); 
-            __kmp_place_num_cores = num; 
-            flagC = 1; 
-            next++; 
-            if (*next == '@') { // core offset, e.g. "2c@6" 
-                flagCO = 1; 
-                prev = ++next;  // don't allow spaces for simplicity 
-                if (!(*next >= '0' && *next <= '9')) { 
-                    KMP_WARNING(AffThrPlaceInvalid, name, value); 
-                    return; 
-                } 
-                SKIP_DIGITS(next); 
-                num = __kmp_str_to_int(prev, *next); 
-                __kmp_place_core_offset = num; 
-            } 
-        } else if (*next == 'o' || *next == 'O') { // offset specified 
-            KMP_WARNING(AffThrPlaceDeprecated); 
-            single_warning = 1; 
-            if (flagC) { // whether num_cores already specified (sockets skipped) 
-                KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both 
-                __kmp_place_core_offset = num; 
-            } else { 
-                KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both 
-                __kmp_place_socket_offset = num; 
-            } 
-            next++; 
-        } else if (*next == 't' || *next == 'T') { 
-            KMP_DEBUG_ASSERT(flagT == 0); 
-            __kmp_place_num_threads_per_core = num; 
-            flagC = 1; // num_cores could be skipped ? 
-            flagT = 1; 
-            next++; // can have core-offset specified after num threads 
-        } else if (*next == '\0') { 
-            KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core 
-            __kmp_place_num_threads_per_core = num; 
-            return;   // two values provided without letter-designator 
-        } else { 
-            KMP_WARNING(AffThrPlaceInvalid, name, value); 
-            return; 
-        } 
-    } else { 
-        KMP_WARNING(AffThrPlaceInvalid, name, value); 
-        return; 
-    } 
-    SKIP_WS(next); 
-    if (*next == '\0') 
-        return;   // " Ns,Nc  " - something like this 
-    if (CHECK_DELIM(next)) { 
-        next++;   // skip delimiter 
-        SKIP_WS(next); 
-    } 
- 
-    // Get third value (could be core-offset, num_cores, num_threads) 
-    if (*next >= '0' && *next <= '9') { 
-        prev = next; 
-        SKIP_DIGITS(next); 
-        num = __kmp_str_to_int(prev, *next); 
-        SKIP_WS(next); 
-        if (*next == 't' || *next == 'T') { 
-            KMP_DEBUG_ASSERT(flagT == 0); 
-            __kmp_place_num_threads_per_core = num; 
-            if (flagC == 0) 
-                return; // num_cores could be skipped (e.g. 2s,4o,2t) 
-            flagT = 1; 
-            next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o) 
-        } else if (*next == 'c' || *next == 'C') { 
-            KMP_DEBUG_ASSERT(flagC == 0); 
-            __kmp_place_num_cores = num; 
-            flagC = 1; 
-            next++; 
-            //KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator 
-        } else if (*next == 'o' || *next == 'O') { 
-            KMP_WARNING(AffThrPlaceDeprecated); 
-            single_warning = 1; 
-            KMP_DEBUG_ASSERT(flagC); 
-            //KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator 
-            __kmp_place_core_offset = num; 
-            next++; 
-        } else { 
-            KMP_WARNING(AffThrPlaceInvalid, name, value); 
-            return; 
-        } 
-    } else { 
-        KMP_WARNING(AffThrPlaceInvalid, name, value); 
-        return; 
-    } 
-    KMP_DEBUG_ASSERT(flagC); 
-    SKIP_WS(next); 
-    if ( *next == '\0' ) 
-            return; 
-    if (CHECK_DELIM(next)) { 
-        next++;   // skip delimiter 
-        SKIP_WS(next); 
-    } 
- 
-    // Get 4-th value (could be core-offset, num_threads) 
-    if (*next >= '0' && *next <= '9') { 
-        prev = next; 
-        SKIP_DIGITS(next); 
-        num = __kmp_str_to_int(prev, *next); 
-        SKIP_WS(next); 
-        if (*next == 'o' || *next == 'O') { 
-            if (!single_warning) { // warn once 
-                KMP_WARNING(AffThrPlaceDeprecated); 
-            } 
-            KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator 
-            __kmp_place_core_offset = num; 
-            next++; 
-        } else if (*next == 't' || *next == 'T') { 
-            KMP_DEBUG_ASSERT(flagT == 0); 
-            __kmp_place_num_threads_per_core = num; 
-            flagT = 1; 
-            next++; // can have core-offset specified after num threads 
-        } else { 
-            KMP_WARNING(AffThrPlaceInvalid, name, value); 
-            return; 
-        } 
-    } else { 
-        KMP_WARNING(AffThrPlaceInvalid, name, value); 
-        return; 
-    } 
-    SKIP_WS(next); 
-    if ( *next == '\0' ) 
-        return; 
-    if (CHECK_DELIM(next)) { 
-        next++;   // skip delimiter 
-        SKIP_WS(next); 
-    } 
- 
-    // Get 5-th value (could be core-offset, num_threads) 
-    if (*next >= '0' && *next <= '9') { 
-        prev = next; 
-        SKIP_DIGITS(next); 
-        num = __kmp_str_to_int(prev, *next); 
-        SKIP_WS(next); 
-        if (*next == 'o' || *next == 'O') { 
-            if (!single_warning) { // warn once 
-                KMP_WARNING(AffThrPlaceDeprecated); 
-            } 
-            KMP_DEBUG_ASSERT(flagT); 
-            KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator 
-            __kmp_place_core_offset = num; 
-        } else if (*next == 't' || *next == 'T') { 
-            KMP_DEBUG_ASSERT(flagT == 0); 
-            __kmp_place_num_threads_per_core = num; 
-        } else { 
-            KMP_WARNING(AffThrPlaceInvalid, name, value); 
-        } 
-    } else { 
-        KMP_WARNING(AffThrPlaceInvalid, name, value); 
-    } 
-    return; 
-#undef CHECK_DELIM 
-} 
- 
-static void 
-__kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) { 
-        int comma = 0; 
-        kmp_str_buf_t buf; 
-        __kmp_str_buf_init(&buf); 
-        if(__kmp_env_format) 
-            KMP_STR_BUF_PRINT_NAME_EX(name); 
-        else 
-            __kmp_str_buf_print(buffer, "   %s='", name); 
-        if (__kmp_place_num_sockets) { 
-            __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets); 
-            if (__kmp_place_socket_offset) 
-                __kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset); 
-            comma = 1; 
-        } 
-        if (__kmp_place_num_cores) { 
-            __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores); 
-            if (__kmp_place_core_offset) 
-                __kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset); 
-            comma = 1; 
-        } 
-        if (__kmp_place_num_threads_per_core) 
-            __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core); 
-        __kmp_str_buf_print(buffer, "%s'\n", buf.str ); 
-        __kmp_str_buf_free(&buf); 
-/* 
-    } else { 
-        __kmp_str_buf_print( buffer, "   %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); 
-*/ 
-    } 
-} 
- 
-#if USE_ITT_BUILD 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_FORKJOIN_FRAMES 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_forkjoin_frames( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_bool( name, value, & __kmp_forkjoin_frames ); 
-} // __kmp_stg_parse_forkjoin_frames 
- 
-static void 
-__kmp_stg_print_forkjoin_frames( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_forkjoin_frames ); 
-} // __kmp_stg_print_forkjoin_frames 
- 
-// ------------------------------------------------------------------------------------------------- 
-// KMP_FORKJOIN_FRAMES_MODE 
-// ------------------------------------------------------------------------------------------------- 
- 
-static void 
-__kmp_stg_parse_forkjoin_frames_mode( char const * name, char const * value, void * data ) { 
-    __kmp_stg_parse_int( name, value, 0, 3, & __kmp_forkjoin_frames_mode ); 
-} // __kmp_stg_parse_forkjoin_frames 
- 
-static void 
-__kmp_stg_print_forkjoin_frames_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_int( buffer, name, __kmp_forkjoin_frames_mode ); 
-} // __kmp_stg_print_forkjoin_frames 
-#endif /* USE_ITT_BUILD */ 
- 
-// ------------------------------------------------------------------------------------------------- 
-// OMP_DISPLAY_ENV 
-// ------------------------------------------------------------------------------------------------- 
- 
-#if OMP_40_ENABLED 
- 
-static void 
-__kmp_stg_parse_omp_display_env( char const * name, char const * value, void * data ) 
-{ 
-    if ( __kmp_str_match( "VERBOSE", 1, value ) ) 
-    { 
-        __kmp_display_env_verbose = TRUE; 
-    } else { 
-        __kmp_stg_parse_bool( name, value, & __kmp_display_env ); 
-    } 
- 
-} // __kmp_stg_parse_omp_display_env 
- 
-static void 
-__kmp_stg_print_omp_display_env( kmp_str_buf_t * buffer, char const * name, void * data ) 
-{ 
-    if ( __kmp_display_env_verbose ) 
-    { 
-        __kmp_stg_print_str( buffer, name, "VERBOSE" ); 
-    } else { 
-        __kmp_stg_print_bool( buffer, name, __kmp_display_env ); 
-    } 
-} // __kmp_stg_print_omp_display_env 
- 
-static void 
-__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) { 
-    if ( TCR_4(__kmp_init_parallel) ) { 
-        KMP_WARNING( EnvParallelWarn, name ); 
-        return; 
-    }   // read value before first parallel only 
-    __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation ); 
-} // __kmp_stg_parse_omp_cancellation 
- 
-static void 
-__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) { 
-    __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation ); 
-} // __kmp_stg_print_omp_cancellation 
- 
-#endif 
- 
-// ------------------------------------------------------------------------------------------------- 
-// Table. 
-// ------------------------------------------------------------------------------------------------- 
- 
- 
-static kmp_setting_t __kmp_stg_table[] = { 
- 
-    { "KMP_ALL_THREADS",                   __kmp_stg_parse_all_threads,        __kmp_stg_print_all_threads,        NULL, 0, 0 }, 
-    { "KMP_BLOCKTIME",                     __kmp_stg_parse_blocktime,          __kmp_stg_print_blocktime,          NULL, 0, 0 }, 
-    { "KMP_DUPLICATE_LIB_OK",              __kmp_stg_parse_duplicate_lib_ok,   __kmp_stg_print_duplicate_lib_ok,   NULL, 0, 0 }, 
-    { "KMP_LIBRARY",                       __kmp_stg_parse_wait_policy,        __kmp_stg_print_wait_policy,        NULL, 0, 0 }, 
-    { "KMP_MAX_THREADS",                   __kmp_stg_parse_all_threads,        NULL,                               NULL, 0, 0 }, // For backward compatibility 
-    { "KMP_MONITOR_STACKSIZE",             __kmp_stg_parse_monitor_stacksize,  __kmp_stg_print_monitor_stacksize,  NULL, 0, 0 }, 
-    { "KMP_SETTINGS",                      __kmp_stg_parse_settings,           __kmp_stg_print_settings,           NULL, 0, 0 }, 
-    { "KMP_STACKOFFSET",                   __kmp_stg_parse_stackoffset,        __kmp_stg_print_stackoffset,        NULL, 0, 0 }, 
-    { "KMP_STACKSIZE",                     __kmp_stg_parse_stacksize,          __kmp_stg_print_stacksize,          NULL, 0, 0 }, 
-    { "KMP_STACKPAD",                      __kmp_stg_parse_stackpad,           __kmp_stg_print_stackpad,           NULL, 0, 0 }, 
-    { "KMP_VERSION",                       __kmp_stg_parse_version,            __kmp_stg_print_version,            NULL, 0, 0 }, 
-    { "KMP_WARNINGS",                      __kmp_stg_parse_warnings,           __kmp_stg_print_warnings,           NULL, 0, 0 }, 
- 
-    { "OMP_NESTED",                        __kmp_stg_parse_nested,             __kmp_stg_print_nested,             NULL, 0, 0 }, 
-    { "OMP_NUM_THREADS",                   __kmp_stg_parse_num_threads,        __kmp_stg_print_num_threads,        NULL, 0, 0 }, 
-    { "OMP_STACKSIZE",                     __kmp_stg_parse_stacksize,          __kmp_stg_print_stacksize,          NULL, 0, 0 }, 
- 
-    { "KMP_TASKING",                       __kmp_stg_parse_tasking,            __kmp_stg_print_tasking,            NULL, 0, 0 }, 
-    { "KMP_TASK_STEALING_CONSTRAINT",      __kmp_stg_parse_task_stealing,      __kmp_stg_print_task_stealing,      NULL, 0, 0 }, 
-    { "OMP_MAX_ACTIVE_LEVELS",             __kmp_stg_parse_max_active_levels,  __kmp_stg_print_max_active_levels,  NULL, 0, 0 }, 
-    { "OMP_THREAD_LIMIT",                  __kmp_stg_parse_all_threads,        __kmp_stg_print_all_threads,        NULL, 0, 0 }, 
-    { "OMP_WAIT_POLICY",                   __kmp_stg_parse_wait_policy,        __kmp_stg_print_wait_policy,        NULL, 0, 0 }, 
-#if KMP_NESTED_HOT_TEAMS 
-    { "KMP_HOT_TEAMS_MAX_LEVEL",           __kmp_stg_parse_hot_teams_level,    __kmp_stg_print_hot_teams_level,    NULL, 0, 0 }, 
-    { "KMP_HOT_TEAMS_MODE",                __kmp_stg_parse_hot_teams_mode,     __kmp_stg_print_hot_teams_mode,     NULL, 0, 0 }, 
-#endif // KMP_NESTED_HOT_TEAMS 
- 
-#if KMP_HANDLE_SIGNALS 
-    { "KMP_HANDLE_SIGNALS",                __kmp_stg_parse_handle_signals,     __kmp_stg_print_handle_signals,     NULL, 0, 0 }, 
-#endif 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    { "KMP_INHERIT_FP_CONTROL",            __kmp_stg_parse_inherit_fp_control, __kmp_stg_print_inherit_fp_control, NULL, 0, 0 }, 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-#ifdef KMP_GOMP_COMPAT 
-    { "GOMP_STACKSIZE",                    __kmp_stg_parse_stacksize,          NULL,                               NULL, 0, 0 }, 
-#endif 
- 
-#ifdef KMP_DEBUG 
-    { "KMP_A_DEBUG",                       __kmp_stg_parse_a_debug,            __kmp_stg_print_a_debug,            NULL, 0, 0 }, 
-    { "KMP_B_DEBUG",                       __kmp_stg_parse_b_debug,            __kmp_stg_print_b_debug,            NULL, 0, 0 }, 
-    { "KMP_C_DEBUG",                       __kmp_stg_parse_c_debug,            __kmp_stg_print_c_debug,            NULL, 0, 0 }, 
-    { "KMP_D_DEBUG",                       __kmp_stg_parse_d_debug,            __kmp_stg_print_d_debug,            NULL, 0, 0 }, 
-    { "KMP_E_DEBUG",                       __kmp_stg_parse_e_debug,            __kmp_stg_print_e_debug,            NULL, 0, 0 }, 
-    { "KMP_F_DEBUG",                       __kmp_stg_parse_f_debug,            __kmp_stg_print_f_debug,            NULL, 0, 0 }, 
-    { "KMP_DEBUG",                         __kmp_stg_parse_debug,              NULL, /* no print */                NULL, 0, 0 }, 
-    { "KMP_DEBUG_BUF",                     __kmp_stg_parse_debug_buf,          __kmp_stg_print_debug_buf,          NULL, 0, 0 }, 
-    { "KMP_DEBUG_BUF_ATOMIC",              __kmp_stg_parse_debug_buf_atomic,   __kmp_stg_print_debug_buf_atomic,   NULL, 0, 0 }, 
-    { "KMP_DEBUG_BUF_CHARS",               __kmp_stg_parse_debug_buf_chars,    __kmp_stg_print_debug_buf_chars,    NULL, 0, 0 }, 
-    { "KMP_DEBUG_BUF_LINES",               __kmp_stg_parse_debug_buf_lines,    __kmp_stg_print_debug_buf_lines,    NULL, 0, 0 }, 
-    { "KMP_DIAG",                          __kmp_stg_parse_diag,               __kmp_stg_print_diag,               NULL, 0, 0 }, 
- 
-    { "KMP_PAR_RANGE",                     __kmp_stg_parse_par_range_env,      __kmp_stg_print_par_range_env,      NULL, 0, 0 }, 
-    { "KMP_YIELD_CYCLE",                   __kmp_stg_parse_yield_cycle,        __kmp_stg_print_yield_cycle,        NULL, 0, 0 }, 
-    { "KMP_YIELD_ON",                      __kmp_stg_parse_yield_on,           __kmp_stg_print_yield_on,           NULL, 0, 0 }, 
-    { "KMP_YIELD_OFF",                     __kmp_stg_parse_yield_off,          __kmp_stg_print_yield_off,          NULL, 0, 0 }, 
-#endif // KMP_DEBUG 
- 
-    { "KMP_ALIGN_ALLOC",                   __kmp_stg_parse_align_alloc,        __kmp_stg_print_align_alloc,        NULL, 0, 0 }, 
- 
-    { "KMP_PLAIN_BARRIER",                 __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, 
-    { "KMP_PLAIN_BARRIER_PATTERN",         __kmp_stg_parse_barrier_pattern,    __kmp_stg_print_barrier_pattern,    NULL, 0, 0 }, 
-    { "KMP_FORKJOIN_BARRIER",              __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, 
-    { "KMP_FORKJOIN_BARRIER_PATTERN",      __kmp_stg_parse_barrier_pattern,    __kmp_stg_print_barrier_pattern,    NULL, 0, 0 }, 
-#if KMP_FAST_REDUCTION_BARRIER 
-    { "KMP_REDUCTION_BARRIER",             __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, 
-    { "KMP_REDUCTION_BARRIER_PATTERN",     __kmp_stg_parse_barrier_pattern,    __kmp_stg_print_barrier_pattern,    NULL, 0, 0 }, 
-#endif 
- 
-    { "KMP_ABORT_DELAY",                   __kmp_stg_parse_abort_delay,        __kmp_stg_print_abort_delay,        NULL, 0, 0 }, 
-    { "KMP_CPUINFO_FILE",                  __kmp_stg_parse_cpuinfo_file,       __kmp_stg_print_cpuinfo_file,       NULL, 0, 0 }, 
-    { "KMP_FORCE_REDUCTION",               __kmp_stg_parse_force_reduction,    __kmp_stg_print_force_reduction,    NULL, 0, 0 }, 
-    { "KMP_DETERMINISTIC_REDUCTION",       __kmp_stg_parse_force_reduction,    __kmp_stg_print_force_reduction,    NULL, 0, 0 }, 
-    { "KMP_STORAGE_MAP",                   __kmp_stg_parse_storage_map,        __kmp_stg_print_storage_map,        NULL, 0, 0 }, 
-    { "KMP_ALL_THREADPRIVATE",             __kmp_stg_parse_all_threadprivate,  __kmp_stg_print_all_threadprivate,  NULL, 0, 0 }, 
-    { "KMP_FOREIGN_THREADS_THREADPRIVATE", __kmp_stg_parse_foreign_threads_threadprivate, __kmp_stg_print_foreign_threads_threadprivate,     NULL, 0, 0 }, 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    { "KMP_AFFINITY",                      __kmp_stg_parse_affinity,           __kmp_stg_print_affinity,           NULL, 0, 0 }, 
-# ifdef KMP_GOMP_COMPAT 
-    { "GOMP_CPU_AFFINITY",                 __kmp_stg_parse_gomp_cpu_affinity,  NULL, /* no print */                NULL, 0, 0 }, 
-# endif /* KMP_GOMP_COMPAT */ 
-# if OMP_40_ENABLED 
-    { "OMP_PROC_BIND",                     __kmp_stg_parse_proc_bind,          __kmp_stg_print_proc_bind,          NULL, 0, 0 }, 
-    { "OMP_PLACES",                        __kmp_stg_parse_places,             __kmp_stg_print_places,             NULL, 0, 0 }, 
-# else 
-    { "OMP_PROC_BIND",                     __kmp_stg_parse_proc_bind,          NULL, /* no print */                NULL, 0, 0 }, 
-# endif /* OMP_40_ENABLED */ 
- 
-    { "KMP_TOPOLOGY_METHOD",               __kmp_stg_parse_topology_method,    __kmp_stg_print_topology_method,    NULL, 0, 0 }, 
- 
-#else 
- 
-    // 
-    // KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES. 
-    // OMP_PROC_BIND and proc-bind-var are supported, however. 
-    // 
-# if OMP_40_ENABLED 
-    { "OMP_PROC_BIND",                     __kmp_stg_parse_proc_bind,          __kmp_stg_print_proc_bind,          NULL, 0, 0 }, 
-# endif 
- 
-#endif // KMP_AFFINITY_SUPPORTED 
- 
-    { "KMP_INIT_AT_FORK",                  __kmp_stg_parse_init_at_fork,       __kmp_stg_print_init_at_fork,       NULL, 0, 0 }, 
-    { "KMP_SCHEDULE",                      __kmp_stg_parse_schedule,           __kmp_stg_print_schedule,           NULL, 0, 0 }, 
-    { "OMP_SCHEDULE",                      __kmp_stg_parse_omp_schedule,       __kmp_stg_print_omp_schedule,       NULL, 0, 0 }, 
-    { "KMP_ATOMIC_MODE",                   __kmp_stg_parse_atomic_mode,        __kmp_stg_print_atomic_mode,        NULL, 0, 0 }, 
-    { "KMP_CONSISTENCY_CHECK",             __kmp_stg_parse_consistency_check,  __kmp_stg_print_consistency_check,  NULL, 0, 0 }, 
- 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-    { "KMP_ITT_PREPARE_DELAY",             __kmp_stg_parse_itt_prepare_delay,  __kmp_stg_print_itt_prepare_delay,  NULL, 0, 0 }, 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-    { "KMP_MALLOC_POOL_INCR",              __kmp_stg_parse_malloc_pool_incr,   __kmp_stg_print_malloc_pool_incr,   NULL, 0, 0 }, 
-    { "KMP_INIT_WAIT",                     __kmp_stg_parse_init_wait,          __kmp_stg_print_init_wait,          NULL, 0, 0 }, 
-    { "KMP_NEXT_WAIT",                     __kmp_stg_parse_next_wait,          __kmp_stg_print_next_wait,          NULL, 0, 0 }, 
-    { "KMP_GTID_MODE",                     __kmp_stg_parse_gtid_mode,          __kmp_stg_print_gtid_mode,          NULL, 0, 0 }, 
-    { "OMP_DYNAMIC",                       __kmp_stg_parse_omp_dynamic,        __kmp_stg_print_omp_dynamic,        NULL, 0, 0 }, 
-    { "KMP_DYNAMIC_MODE",                  __kmp_stg_parse_kmp_dynamic_mode,   __kmp_stg_print_kmp_dynamic_mode,   NULL, 0, 0 }, 
- 
-#ifdef USE_LOAD_BALANCE 
-    { "KMP_LOAD_BALANCE_INTERVAL",         __kmp_stg_parse_ld_balance_interval,__kmp_stg_print_ld_balance_interval,NULL, 0, 0 }, 
-#endif 
- 
-    { "KMP_NUM_LOCKS_IN_BLOCK",            __kmp_stg_parse_lock_block,         __kmp_stg_print_lock_block,         NULL, 0, 0 }, 
-    { "KMP_LOCK_KIND",                     __kmp_stg_parse_lock_kind,          __kmp_stg_print_lock_kind,          NULL, 0, 0 }, 
-#if KMP_USE_ADAPTIVE_LOCKS 
-    { "KMP_ADAPTIVE_LOCK_PROPS",           __kmp_stg_parse_adaptive_lock_props,__kmp_stg_print_adaptive_lock_props,  NULL, 0, 0 }, 
-#if KMP_DEBUG_ADAPTIVE_LOCKS 
-    { "KMP_SPECULATIVE_STATSFILE",         __kmp_stg_parse_speculative_statsfile,__kmp_stg_print_speculative_statsfile,  NULL, 0, 0 }, 
-#endif 
-#endif // KMP_USE_ADAPTIVE_LOCKS 
-    { "KMP_PLACE_THREADS",                 __kmp_stg_parse_place_threads,      __kmp_stg_print_place_threads,      NULL, 0, 0 }, 
-#if USE_ITT_BUILD 
-    { "KMP_FORKJOIN_FRAMES",               __kmp_stg_parse_forkjoin_frames,    __kmp_stg_print_forkjoin_frames,    NULL, 0, 0 }, 
-    { "KMP_FORKJOIN_FRAMES_MODE",          __kmp_stg_parse_forkjoin_frames_mode,__kmp_stg_print_forkjoin_frames_mode,  NULL, 0, 0 }, 
-#endif 
- 
-# if OMP_40_ENABLED 
-    { "OMP_DISPLAY_ENV",                   __kmp_stg_parse_omp_display_env,    __kmp_stg_print_omp_display_env,    NULL, 0, 0 }, 
-    { "OMP_CANCELLATION",                  __kmp_stg_parse_omp_cancellation,   __kmp_stg_print_omp_cancellation,   NULL, 0, 0 }, 
-#endif 
-    { "",                                  NULL,                               NULL,                               NULL, 0, 0 } 
-}; // settings 
- 
-static int const __kmp_stg_count = sizeof( __kmp_stg_table ) / sizeof( kmp_setting_t ); 
- 
-static inline 
-kmp_setting_t * 
-__kmp_stg_find( char const * name ) { 
- 
-    int i; 
-    if ( name != NULL ) { 
-        for ( i = 0; i < __kmp_stg_count; ++ i ) { 
-            if ( strcmp( __kmp_stg_table[ i ].name, name ) == 0 ) { 
-                return & __kmp_stg_table[ i ]; 
-            }; // if 
-        }; // for 
-    }; // if 
-    return NULL; 
- 
-} // __kmp_stg_find 
- 
- 
-static int 
-__kmp_stg_cmp( void const * _a, void const * _b ) { 
-    kmp_setting_t * a = (kmp_setting_t *) _a; 
-    kmp_setting_t * b = (kmp_setting_t *) _b; 
- 
-    // 
-    // Process KMP_AFFINITY last. 
-    // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. 
-    // 
-    if ( strcmp( a->name, "KMP_AFFINITY" ) == 0 ) { 
-        if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { 
-            return 0; 
-        } 
-        return 1; 
-    } 
-    else if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { 
-        return -1; 
-    } 
-    return strcmp( a->name, b->name ); 
-} // __kmp_stg_cmp 
- 
- 
-static void 
-__kmp_stg_init( void 
-) { 
- 
-    static int initialized = 0; 
- 
-    if ( ! initialized ) { 
- 
-        // Sort table. 
-        qsort( __kmp_stg_table, __kmp_stg_count - 1, sizeof( kmp_setting_t ), __kmp_stg_cmp ); 
- 
-        { // Initialize *_STACKSIZE data. 
- 
-            kmp_setting_t * kmp_stacksize  = __kmp_stg_find( "KMP_STACKSIZE"  );      // 1st priority. 
-#ifdef KMP_GOMP_COMPAT 
-            kmp_setting_t * gomp_stacksize = __kmp_stg_find( "GOMP_STACKSIZE" );      // 2nd priority. 
-#endif 
-            kmp_setting_t * omp_stacksize  = __kmp_stg_find( "OMP_STACKSIZE"  );      // 3rd priority. 
- 
-            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. 
-            // !!! Compiler does not understand rivals is used and optimizes out assignments 
-            // !!!     rivals[ i ++ ] = ...; 
-            static kmp_setting_t * volatile rivals[ 4 ]; 
-            static kmp_stg_ss_data_t kmp_data  = {    1, (kmp_setting_t **)rivals }; 
-#ifdef KMP_GOMP_COMPAT 
-            static kmp_stg_ss_data_t gomp_data = { 1024, (kmp_setting_t **)rivals }; 
-#endif 
-            static kmp_stg_ss_data_t omp_data  = { 1024, (kmp_setting_t **)rivals }; 
-            int i = 0; 
- 
-            rivals[ i ++ ] = kmp_stacksize; 
-#ifdef KMP_GOMP_COMPAT 
-            if ( gomp_stacksize != NULL ) { 
-                rivals[ i ++ ] = gomp_stacksize; 
-            }; // if 
-#endif 
-            rivals[ i ++ ] = omp_stacksize; 
-            rivals[ i ++ ] = NULL; 
- 
-            kmp_stacksize->data = & kmp_data; 
-#ifdef KMP_GOMP_COMPAT 
-            if ( gomp_stacksize != NULL ) { 
-                gomp_stacksize->data = & gomp_data; 
-            }; // if 
-#endif 
-            omp_stacksize->data = & omp_data; 
- 
-        } 
- 
-        { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data. 
- 
-            kmp_setting_t * kmp_library     = __kmp_stg_find( "KMP_LIBRARY" );        // 1st priority. 
-            kmp_setting_t * omp_wait_policy = __kmp_stg_find( "OMP_WAIT_POLICY" );    // 2nd priority. 
- 
-            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. 
-            static kmp_setting_t * volatile rivals[ 3 ]; 
-            static kmp_stg_wp_data_t kmp_data  = { 0, (kmp_setting_t **)rivals }; 
-            static kmp_stg_wp_data_t omp_data  = { 1, (kmp_setting_t **)rivals }; 
-            int i = 0; 
- 
-            rivals[ i ++ ] = kmp_library; 
-            if ( omp_wait_policy != NULL ) { 
-                rivals[ i ++ ] = omp_wait_policy; 
-            }; // if 
-            rivals[ i ++ ] = NULL; 
- 
-            kmp_library->data  = & kmp_data; 
-            if ( omp_wait_policy != NULL ) { 
-                omp_wait_policy->data = & omp_data; 
-            }; // if 
- 
-        } 
- 
-        { // Initialize KMP_ALL_THREADS, KMP_MAX_THREADS, and OMP_THREAD_LIMIT data. 
- 
-            kmp_setting_t * kmp_all_threads  = __kmp_stg_find( "KMP_ALL_THREADS"  );  // 1st priority. 
-            kmp_setting_t * kmp_max_threads  = __kmp_stg_find( "KMP_MAX_THREADS"  );  // 2nd priority. 
-            kmp_setting_t * omp_thread_limit = __kmp_stg_find( "OMP_THREAD_LIMIT" );  // 3rd priority. 
- 
-            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. 
-            static kmp_setting_t * volatile rivals[ 4 ]; 
-            int i = 0; 
- 
-            rivals[ i ++ ] = kmp_all_threads; 
-            rivals[ i ++ ] = kmp_max_threads; 
-            if ( omp_thread_limit != NULL ) { 
-                rivals[ i ++ ] = omp_thread_limit; 
-            }; // if 
-            rivals[ i ++ ] = NULL; 
- 
-            kmp_all_threads->data = (void*)& rivals; 
-            kmp_max_threads->data = (void*)& rivals; 
-            if ( omp_thread_limit != NULL ) { 
-                omp_thread_limit->data = (void*)& rivals; 
-            }; // if 
- 
-        } 
- 
-#if KMP_AFFINITY_SUPPORTED 
-        { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data. 
- 
-            kmp_setting_t * kmp_affinity = __kmp_stg_find( "KMP_AFFINITY"  );  // 1st priority. 
-            KMP_DEBUG_ASSERT( kmp_affinity != NULL ); 
- 
-# ifdef KMP_GOMP_COMPAT 
-            kmp_setting_t * gomp_cpu_affinity = __kmp_stg_find( "GOMP_CPU_AFFINITY"  );  // 2nd priority. 
-            KMP_DEBUG_ASSERT( gomp_cpu_affinity != NULL ); 
-# endif 
- 
-            kmp_setting_t * omp_proc_bind = __kmp_stg_find( "OMP_PROC_BIND" );  // 3rd priority. 
-            KMP_DEBUG_ASSERT( omp_proc_bind != NULL ); 
- 
-            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. 
-            static kmp_setting_t * volatile rivals[ 4 ]; 
-            int i = 0; 
- 
-            rivals[ i ++ ] = kmp_affinity; 
- 
-# ifdef KMP_GOMP_COMPAT 
-            rivals[ i ++ ] = gomp_cpu_affinity; 
-            gomp_cpu_affinity->data = (void*)& rivals; 
-# endif 
- 
-            rivals[ i ++ ] = omp_proc_bind; 
-            omp_proc_bind->data = (void*)& rivals; 
-            rivals[ i ++ ] = NULL; 
- 
-# if OMP_40_ENABLED 
-            static kmp_setting_t * volatile places_rivals[ 4 ]; 
-            i = 0; 
- 
-            kmp_setting_t * omp_places = __kmp_stg_find( "OMP_PLACES" );  // 3rd priority. 
-            KMP_DEBUG_ASSERT( omp_places != NULL ); 
- 
-            places_rivals[ i ++ ] = kmp_affinity; 
-#  ifdef KMP_GOMP_COMPAT 
-            places_rivals[ i ++ ] = gomp_cpu_affinity; 
-#  endif 
-            places_rivals[ i ++ ] = omp_places; 
-            omp_places->data = (void*)& places_rivals; 
-            places_rivals[ i ++ ] = NULL; 
-# endif 
-        } 
-#else 
-    // KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals. 
-    // OMP_PLACES not supported yet. 
-#endif // KMP_AFFINITY_SUPPORTED 
- 
-        { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data. 
- 
-            kmp_setting_t * kmp_force_red  = __kmp_stg_find( "KMP_FORCE_REDUCTION" );         // 1st priority. 
-            kmp_setting_t * kmp_determ_red = __kmp_stg_find( "KMP_DETERMINISTIC_REDUCTION" ); // 2nd priority. 
- 
-            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. 
-            static kmp_setting_t * volatile rivals[ 3 ]; 
-            static kmp_stg_fr_data_t force_data   = { 1, (kmp_setting_t **)rivals }; 
-            static kmp_stg_fr_data_t determ_data  = { 0, (kmp_setting_t **)rivals }; 
-            int i = 0; 
- 
-            rivals[ i ++ ] = kmp_force_red; 
-            if ( kmp_determ_red != NULL ) { 
-                rivals[ i ++ ] = kmp_determ_red; 
-            }; // if 
-            rivals[ i ++ ] = NULL; 
- 
-            kmp_force_red->data = & force_data; 
-            if ( kmp_determ_red != NULL ) { 
-                kmp_determ_red->data  = & determ_data; 
-            }; // if 
-        } 
- 
-        initialized = 1; 
- 
-    }; // if 
- 
-    // Reset flags. 
-    int i; 
-    for ( i = 0; i < __kmp_stg_count; ++ i ) { 
-        __kmp_stg_table[ i ].set = 0; 
-    }; // for 
- 
-} // __kmp_stg_init 
- 
- 
-static void 
-__kmp_stg_parse( 
-    char const * name, 
-    char const * value 
-) { 
- 
-    // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, really nameless, they are 
-    // presented in environment block as "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them. 
-    if ( name[ 0 ] == 0 ) { 
-        return; 
-    }; // if 
- 
-    if ( value != NULL ) { 
-        kmp_setting_t * setting = __kmp_stg_find( name ); 
-        if ( setting != NULL ) { 
-            setting->parse( name, value, setting->data ); 
-            setting->defined = 1; 
-        }; // if 
-    }; // if 
- 
-} // __kmp_stg_parse 
- 
- 
-static int 
-__kmp_stg_check_rivals(          // 0 -- Ok, 1 -- errors found. 
-    char const *       name,     // Name of variable. 
-    char const *       value,    // Value of the variable. 
-    kmp_setting_t * *  rivals    // List of rival settings (the list must include current one). 
-) { 
- 
-    if ( rivals == NULL ) { 
-        return 0; 
-    } 
- 
-    // Loop thru higher priority settings (listed before current). 
-    int i = 0; 
-    for ( ; strcmp( rivals[ i ]->name, name ) != 0; i++ ) { 
-        KMP_DEBUG_ASSERT( rivals[ i ] != NULL ); 
- 
-#if KMP_AFFINITY_SUPPORTED 
-        if ( rivals[ i ] == __kmp_affinity_notype ) { 
-            // 
-            // If KMP_AFFINITY is specified without a type name, 
-            // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY. 
-            // 
-            continue; 
-        } 
-#endif 
- 
-        if ( rivals[ i ]->set ) { 
-            KMP_WARNING( StgIgnored, name, rivals[ i ]->name ); 
-            return 1; 
-        }; // if 
-    }; // while 
- 
-    ++ i; // Skip current setting. 
-    return 0; 
- 
-}; // __kmp_stg_check_rivals 
- 
- 
-static int 
-__kmp_env_toPrint( char const * name, int flag ) { 
-    int rc = 0; 
-    kmp_setting_t * setting = __kmp_stg_find( name ); 
-    if ( setting != NULL ) { 
-        rc = setting->defined; 
-        if ( flag >= 0 ) { 
-            setting->defined = flag; 
-        }; // if 
-    }; // if 
-    return rc; 
-} 
- 
- 
-static void 
-__kmp_aux_env_initialize( kmp_env_blk_t* block ) { 
- 
-    char const * value; 
- 
-    /* OMP_NUM_THREADS */ 
-    value = __kmp_env_blk_var( block, "OMP_NUM_THREADS" ); 
-    if ( value ) { 
-        ompc_set_num_threads( __kmp_dflt_team_nth ); 
-    } 
- 
-    /* KMP_BLOCKTIME */ 
-    value = __kmp_env_blk_var( block, "KMP_BLOCKTIME" ); 
-    if ( value ) { 
-        kmpc_set_blocktime( __kmp_dflt_blocktime ); 
-    } 
- 
-    /* OMP_NESTED */ 
-    value = __kmp_env_blk_var( block, "OMP_NESTED" ); 
-    if ( value ) { 
-        ompc_set_nested( __kmp_dflt_nested ); 
-    } 
- 
-    /* OMP_DYNAMIC */ 
-    value = __kmp_env_blk_var( block, "OMP_DYNAMIC" ); 
-    if ( value ) { 
-        ompc_set_dynamic( __kmp_global.g.g_dynamic ); 
-    } 
- 
-} 
- 
-void 
-__kmp_env_initialize( char const * string ) { 
- 
-    kmp_env_blk_t block; 
-    int           i; 
- 
-    __kmp_stg_init(); 
- 
-    // Hack!!! 
-    if ( string == NULL ) { 
-        // __kmp_max_nth = __kmp_sys_max_nth; 
-        __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); 
-    }; // if 
-    __kmp_env_blk_init( & block, string ); 
- 
-    // 
-    // update the set flag on all entries that have an env var 
-    // 
-    for ( i = 0; i < block.count; ++ i ) { 
-        if (( block.vars[ i ].name == NULL ) 
-          || ( *block.vars[ i ].name == '\0')) { 
-            continue; 
-        } 
-        if ( block.vars[ i ].value == NULL ) { 
-            continue; 
-        } 
-        kmp_setting_t * setting = __kmp_stg_find( block.vars[ i ].name ); 
-        if ( setting != NULL ) { 
-            setting->set = 1; 
-        } 
-    }; // for i 
- 
-    // Special case. If we parse environment, not a string, process KMP_WARNINGS first. 
-    if ( string == NULL ) { 
-        char const * name  = "KMP_WARNINGS"; 
-        char const * value = __kmp_env_blk_var( & block, name ); 
-        __kmp_stg_parse( name, value ); 
-    }; // if 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    // 
-    // Special case. KMP_AFFINITY is not a rival to other affinity env vars 
-    // if no affinity type is specified.  We want to allow 
-    // KMP_AFFINITY=[no],verbose/[no]warnings/etc.  to be enabled when 
-    // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0 
-    // affinity mechanism. 
-    // 
-    __kmp_affinity_notype = NULL; 
-    char const *aff_str = __kmp_env_blk_var( & block, "KMP_AFFINITY" ); 
-    if ( aff_str != NULL ) { 
-        // 
-        // Check if the KMP_AFFINITY type is specified in the string. 
-        // We just search the string for "compact", "scatter", etc. 
-        // without really parsing the string.  The syntax of the 
-        // KMP_AFFINITY env var is such that none of the affinity 
-        // type names can appear anywhere other that the type 
-        // specifier, even as substrings. 
-        // 
-        // I can't find a case-insensitive version of strstr on Windows* OS. 
-        // Use the case-sensitive version for now. 
-        // 
- 
-# if KMP_OS_WINDOWS 
-#  define FIND strstr 
-# else 
-#  define FIND strcasestr 
-# endif 
- 
-        if ( ( FIND( aff_str, "none" ) == NULL ) 
-          && ( FIND( aff_str, "physical" ) == NULL ) 
-          && ( FIND( aff_str, "logical" ) == NULL ) 
-          && ( FIND( aff_str, "compact" ) == NULL ) 
-          && ( FIND( aff_str, "scatter" ) == NULL ) 
-          && ( FIND( aff_str, "explicit" ) == NULL ) 
-          && ( FIND( aff_str, "balanced" ) == NULL ) 
-          && ( FIND( aff_str, "disabled" ) == NULL ) ) { 
-            __kmp_affinity_notype = __kmp_stg_find( "KMP_AFFINITY"  ); 
-        } 
-        else { 
-            // 
-            // A new affinity type is specified. 
-            // Reset the affinity flags to their default values, 
-            // in case this is called from kmp_set_defaults(). 
-            // 
-            __kmp_affinity_type = affinity_default; 
-            __kmp_affinity_gran = affinity_gran_default; 
-            __kmp_affinity_top_method = affinity_top_method_default; 
-            __kmp_affinity_respect_mask = affinity_respect_mask_default; 
-        } 
-# undef FIND 
- 
-#if OMP_40_ENABLED 
-        // 
-        // Also reset the affinity flags if OMP_PROC_BIND is specified. 
-        // 
-        aff_str = __kmp_env_blk_var( & block, "OMP_PROC_BIND" ); 
-        if ( aff_str != NULL ) { 
-            __kmp_affinity_type = affinity_default; 
-            __kmp_affinity_gran = affinity_gran_default; 
-            __kmp_affinity_top_method = affinity_top_method_default; 
-            __kmp_affinity_respect_mask = affinity_respect_mask_default; 
-        } 
-#endif /* OMP_40_ENABLED */ 
-    } 
- 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-#if OMP_40_ENABLED 
-    // 
-    // Set up the nested proc bind type vector. 
-    // 
-    if ( __kmp_nested_proc_bind.bind_types == NULL ) { 
-        __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) 
-          KMP_INTERNAL_MALLOC( sizeof(kmp_proc_bind_t) ); 
-        if ( __kmp_nested_proc_bind.bind_types == NULL ) { 
-            KMP_FATAL( MemoryAllocFailed ); 
-        } 
-        __kmp_nested_proc_bind.size = 1; 
-        __kmp_nested_proc_bind.used = 1; 
-# if KMP_AFFINITY_SUPPORTED 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_default; 
-# else 
-        // default proc bind is false if affinity not supported 
-        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-# endif 
- 
-    } 
-#endif /* OMP_40_ENABLED */ 
- 
-    // 
-    // Now process all of the settings. 
-    // 
-    for ( i = 0; i < block.count; ++ i ) { 
-        __kmp_stg_parse( block.vars[ i ].name, block.vars[ i ].value ); 
-    }; // for i 
- 
-    // 
-    // If user locks have been allocated yet, don't reset the lock vptr table. 
-    // 
-    if ( ! __kmp_init_user_locks ) { 
-        if ( __kmp_user_lock_kind == lk_default ) { 
-            __kmp_user_lock_kind = lk_queuing; 
-        } 
-#if KMP_USE_DYNAMIC_LOCK 
-        __kmp_init_dynamic_user_locks(); 
-#else 
-        __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); 
-#endif 
-    } 
-    else { 
-        KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called 
-        KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default ); 
-        // Binds lock functions again to follow the transition between different 
-        // KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long 
-        // as we do not allow lock kind changes after making a call to any 
-        // user lock functions (true). 
-#if KMP_USE_DYNAMIC_LOCK 
-        __kmp_init_dynamic_user_locks(); 
-#else 
-        __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); 
-#endif 
-    } 
- 
-#if KMP_AFFINITY_SUPPORTED 
- 
-    if ( ! TCR_4(__kmp_init_middle) ) { 
-        // 
-        // Determine if the machine/OS is actually capable of supporting 
-        // affinity. 
-        // 
-        const char *var = "KMP_AFFINITY"; 
-# if KMP_USE_HWLOC 
-        if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 
-            __kmp_hwloc_error = TRUE; 
-            if(__kmp_affinity_verbose) 
-                KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 
-        } 
-        hwloc_topology_ignore_type(__kmp_hwloc_topology, HWLOC_OBJ_CACHE); 
-# endif 
-        if ( __kmp_affinity_type == affinity_disabled ) { 
-            KMP_AFFINITY_DISABLE(); 
-        } 
-        else if ( ! KMP_AFFINITY_CAPABLE() ) { 
-# if KMP_USE_HWLOC 
-            const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 
-            if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { 
-                __kmp_hwloc_error = TRUE; 
-                if(__kmp_affinity_verbose) 
-                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 
-            } 
-            // Is the system capable of setting/getting this thread's affinity? 
-            // also, is topology discovery possible? (pu indicates ability to discover processing units) 
-            // and finally, were there no errors when calling any hwloc_* API functions? 
-            if(topology_support->cpubind->set_thisthread_cpubind && 
-               topology_support->cpubind->get_thisthread_cpubind && 
-               topology_support->discovery->pu && 
-               !__kmp_hwloc_error) 
-            { 
-                // enables affinity according to KMP_AFFINITY_CAPABLE() macro 
-                KMP_AFFINITY_ENABLE(TRUE); 
-            } else { 
-                // indicate that hwloc didn't work and disable affinity 
-                __kmp_hwloc_error = TRUE; 
-                KMP_AFFINITY_DISABLE(); 
-            } 
-# else 
-            __kmp_affinity_determine_capable( var ); 
-# endif // KMP_USE_HWLOC 
-            if ( ! KMP_AFFINITY_CAPABLE() ) { 
-                if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings 
-                  && ( __kmp_affinity_type != affinity_default ) 
-                  && ( __kmp_affinity_type != affinity_none ) 
-                  && ( __kmp_affinity_type != affinity_disabled ) ) ) { 
-                    KMP_WARNING( AffNotSupported, var ); 
-                } 
-                __kmp_affinity_type = affinity_disabled; 
-                __kmp_affinity_respect_mask = 0; 
-                __kmp_affinity_gran = affinity_gran_fine; 
-            } 
-        } 
- 
-# if OMP_40_ENABLED 
-        if ( __kmp_affinity_type == affinity_disabled )  { 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-        } 
-        else if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_true ) { 
-            // 
-            // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. 
-            // 
-            __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread; 
-        } 
-# endif /* OMP_40_ENABLED */ 
- 
-        if ( KMP_AFFINITY_CAPABLE() ) { 
- 
-# if KMP_GROUP_AFFINITY 
- 
-            // 
-            // Handle the Win 64 group affinity stuff if there are multiple 
-            // processor groups, or if the user requested it, and OMP 4.0 
-            // affinity is not in effect. 
-            // 
-            if ( ( ( __kmp_num_proc_groups > 1 ) 
-              && ( __kmp_affinity_type == affinity_default ) 
-#  if OMP_40_ENABLED 
-              && ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) ) 
-#  endif 
-              || ( __kmp_affinity_top_method == affinity_top_method_group ) ) { 
-                if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { 
-                    __kmp_affinity_respect_mask = FALSE; 
-                } 
-                if ( __kmp_affinity_type == affinity_default ) { 
-                    __kmp_affinity_type = affinity_compact; 
-#  if OMP_40_ENABLED 
-                    __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-#  endif 
-                } 
-                if ( __kmp_affinity_top_method == affinity_top_method_default ) { 
-                    if ( __kmp_affinity_gran == affinity_gran_default ) { 
-                        __kmp_affinity_top_method = affinity_top_method_group; 
-                        __kmp_affinity_gran = affinity_gran_group; 
-                    } 
-                    else if ( __kmp_affinity_gran == affinity_gran_group ) { 
-                        __kmp_affinity_top_method = affinity_top_method_group; 
-                    } 
-                    else { 
-                        __kmp_affinity_top_method = affinity_top_method_all; 
-                    } 
-                } 
-                else if ( __kmp_affinity_top_method == affinity_top_method_group ) { 
-                    if ( __kmp_affinity_gran == affinity_gran_default ) { 
-                        __kmp_affinity_gran = affinity_gran_group; 
-                    } 
-                    else if ( ( __kmp_affinity_gran != affinity_gran_group ) 
-                      && ( __kmp_affinity_gran != affinity_gran_fine ) 
-                      && ( __kmp_affinity_gran != affinity_gran_thread ) ) { 
-                        char *str = NULL; 
-                        switch ( __kmp_affinity_gran ) { 
-                            case affinity_gran_core: str = "core"; break; 
-                            case affinity_gran_package: str = "package"; break; 
-                            case affinity_gran_node: str = "node"; break; 
-                            default: KMP_DEBUG_ASSERT( 0 ); 
-                        } 
-                        KMP_WARNING( AffGranTopGroup, var, str ); 
-                        __kmp_affinity_gran = affinity_gran_fine; 
-                    } 
-                } 
-                else { 
-                    if ( __kmp_affinity_gran == affinity_gran_default ) { 
-                        __kmp_affinity_gran = affinity_gran_core; 
-                    } 
-                    else if ( __kmp_affinity_gran == affinity_gran_group ) { 
-                        char *str = NULL; 
-                        switch ( __kmp_affinity_type ) { 
-                            case affinity_physical: str = "physical"; break; 
-                            case affinity_logical: str = "logical"; break; 
-                            case affinity_compact: str = "compact"; break; 
-                            case affinity_scatter: str = "scatter"; break; 
-                            case affinity_explicit: str = "explicit"; break; 
-                            // No MIC on windows, so no affinity_balanced case 
-                            default: KMP_DEBUG_ASSERT( 0 ); 
-                        } 
-                        KMP_WARNING( AffGranGroupType, var, str ); 
-                        __kmp_affinity_gran = affinity_gran_core; 
-                    } 
-                } 
-            } 
-            else 
- 
-# endif /* KMP_GROUP_AFFINITY */ 
- 
-            { 
-                if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { 
-# if KMP_GROUP_AFFINITY 
-                    if ( __kmp_num_proc_groups > 1 ) { 
-                        __kmp_affinity_respect_mask = FALSE; 
-                    } 
-                    else 
-# endif /* KMP_GROUP_AFFINITY */ 
-                    { 
-                        __kmp_affinity_respect_mask = TRUE; 
-                    } 
-                } 
-# if OMP_40_ENABLED 
-                if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) 
-                  && ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ) ) { 
-                    if ( __kmp_affinity_type == affinity_default ) { 
-                        __kmp_affinity_type = affinity_compact; 
-                        __kmp_affinity_dups = FALSE; 
-                    } 
-                } 
-                else 
-# endif /* OMP_40_ENABLED */ 
-                if ( __kmp_affinity_type == affinity_default ) { 
-#if OMP_40_ENABLED 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-                    if( __kmp_mic_type != non_mic ) { 
-                        __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; 
-                    } else 
-#endif 
-                    { 
-                        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; 
-                    } 
-#endif /* OMP_40_ENABLED */ 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-                    if( __kmp_mic_type != non_mic ) { 
-                        __kmp_affinity_type = affinity_scatter; 
-                    } else 
-#endif 
-                    { 
-                        __kmp_affinity_type = affinity_none; 
-                    } 
- 
-                } 
-                if ( ( __kmp_affinity_gran == affinity_gran_default ) 
-                  &&  ( __kmp_affinity_gran_levels < 0 ) ) { 
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 
-                    if( __kmp_mic_type != non_mic ) { 
-                        __kmp_affinity_gran = affinity_gran_fine; 
-                    } else 
-#endif 
-                    { 
-                        __kmp_affinity_gran = affinity_gran_core; 
-                    } 
-                } 
-                if ( __kmp_affinity_top_method == affinity_top_method_default ) { 
-                    __kmp_affinity_top_method = affinity_top_method_all; 
-                } 
-            } 
-        } 
- 
-        K_DIAG( 1, ( "__kmp_affinity_type         == %d\n", __kmp_affinity_type         ) ); 
-        K_DIAG( 1, ( "__kmp_affinity_compact      == %d\n", __kmp_affinity_compact      ) ); 
-        K_DIAG( 1, ( "__kmp_affinity_offset       == %d\n", __kmp_affinity_offset       ) ); 
-        K_DIAG( 1, ( "__kmp_affinity_verbose      == %d\n", __kmp_affinity_verbose      ) ); 
-        K_DIAG( 1, ( "__kmp_affinity_warnings     == %d\n", __kmp_affinity_warnings     ) ); 
-        K_DIAG( 1, ( "__kmp_affinity_respect_mask == %d\n", __kmp_affinity_respect_mask ) ); 
-        K_DIAG( 1, ( "__kmp_affinity_gran         == %d\n", __kmp_affinity_gran         ) ); 
- 
-        KMP_DEBUG_ASSERT( __kmp_affinity_type != affinity_default); 
-# if OMP_40_ENABLED 
-        KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ); 
-# endif 
-    } 
- 
-#endif /* KMP_AFFINITY_SUPPORTED */ 
- 
-    if ( __kmp_version ) { 
-        __kmp_print_version_1(); 
-    }; // if 
- 
-    // Post-initialization step: some env. vars need their value's further processing 
-    if ( string != NULL) { // kmp_set_defaults() was called 
-        __kmp_aux_env_initialize( &block ); 
-    } 
- 
-    __kmp_env_blk_free( & block ); 
- 
-    KMP_MB(); 
- 
-} // __kmp_env_initialize 
- 
- 
-void 
-__kmp_env_print() { 
- 
-    kmp_env_blk_t block; 
-    int           i; 
-    kmp_str_buf_t buffer; 
- 
-    __kmp_stg_init(); 
-    __kmp_str_buf_init( & buffer ); 
- 
-    __kmp_env_blk_init( & block, NULL ); 
-    __kmp_env_blk_sort( & block ); 
- 
-    // Print real environment values. 
-    __kmp_str_buf_print( & buffer, "\n%s\n\n", KMP_I18N_STR( UserSettings )  ); 
-    for ( i = 0; i < block.count; ++ i ) { 
-        char const * name  = block.vars[ i ].name; 
-        char const * value = block.vars[ i ].value; 
-        if ( 
-            ( KMP_STRLEN( name ) > 4 && strncmp( name, "KMP_", 4 ) == 0 ) 
-            || strncmp( name, "OMP_", 4 ) == 0 
-            #ifdef KMP_GOMP_COMPAT 
-                || strncmp( name, "GOMP_", 5 ) == 0 
-            #endif // KMP_GOMP_COMPAT 
-        ) { 
-            __kmp_str_buf_print( & buffer, "   %s=%s\n", name, value ); 
-        }; // if 
-    }; // for 
-    __kmp_str_buf_print( & buffer, "\n" ); 
- 
-    // Print internal (effective) settings. 
-    __kmp_str_buf_print( & buffer, "%s\n\n", KMP_I18N_STR( EffectiveSettings ) ); 
-    for ( int i = 0; i < __kmp_stg_count; ++ i ) { 
-        if (  __kmp_stg_table[ i ].print != NULL ) { 
-            __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); 
-        }; // if 
-    }; // for 
- 
-    __kmp_printf( "%s", buffer.str ); 
- 
-    __kmp_env_blk_free( & block ); 
-    __kmp_str_buf_free( & buffer ); 
- 
-    __kmp_printf("\n"); 
- 
-} // __kmp_env_print 
- 
- 
-#if OMP_40_ENABLED 
-void 
-__kmp_env_print_2() { 
- 
-    kmp_env_blk_t block; 
-    kmp_str_buf_t buffer; 
- 
-    __kmp_env_format = 1; 
- 
-    __kmp_stg_init(); 
-    __kmp_str_buf_init( & buffer ); 
- 
-    __kmp_env_blk_init( & block, NULL ); 
-    __kmp_env_blk_sort( & block ); 
- 
-    __kmp_str_buf_print( & buffer, "\n%s\n", KMP_I18N_STR( DisplayEnvBegin )  ); 
-    __kmp_str_buf_print( & buffer, "   _OPENMP='%d'\n", __kmp_openmp_version ); 
- 
-    for ( int i = 0; i < __kmp_stg_count; ++ i ) { 
-        if (  __kmp_stg_table[ i ].print != NULL && 
-              ( ( __kmp_display_env && strncmp( __kmp_stg_table[ i ].name, "OMP_", 4 ) == 0 ) || __kmp_display_env_verbose ) ) { 
-            __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); 
-        }; // if 
-    }; // for 
- 
-    __kmp_str_buf_print( & buffer, "%s\n", KMP_I18N_STR( DisplayEnvEnd )  ); 
-    __kmp_str_buf_print( & buffer, "\n" ); 
- 
-    __kmp_printf( "%s", buffer.str ); 
- 
-    __kmp_env_blk_free( & block ); 
-    __kmp_str_buf_free( & buffer ); 
- 
-    __kmp_printf("\n"); 
- 
-} // __kmp_env_print_2 
-#endif // OMP_40_ENABLED 
- 
-// end of file 
- 
+/*
+ * kmp_settings.c -- Initialize environment variables
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_wrapper_getpid.h"
+#include "kmp_environment.h"
+#include "kmp_atomic.h"
+#include "kmp_itt.h"
+#include "kmp_str.h"
+#include "kmp_settings.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+
+static int __kmp_env_toPrint( char const * name, int flag );
+
+bool __kmp_env_format = 0; // 0 - old format; 1 - new format
+// -------------------------------------------------------------------------------------------------
+// Helper string functions. Subject to move to kmp_str.
+// -------------------------------------------------------------------------------------------------
+
+static double
+__kmp_convert_to_double( char const * s )
+{
+    double result;
+
+    if ( KMP_SSCANF( s, "%lf", &result ) < 1 ) {
+        result = 0.0;
+    }
+
+    return result;
+}
+
+#ifdef KMP_DEBUG
+static unsigned int
+__kmp_readstr_with_sentinel(char *dest, char const * src, size_t len, char sentinel) {
+    unsigned int i;
+    for (i = 0; i < len; i++) {
+        if ((*src == '\0') || (*src == sentinel)) {
+            break;
+        }
+        *(dest++) = *(src++);
+    }
+    *dest = '\0';
+    return i;
+}
+#endif
+
+static int
+__kmp_match_with_sentinel( char const * a, char const * b, size_t len, char sentinel ) {
+    size_t l = 0;
+
+    if(a == NULL)
+        a = "";
+    if(b == NULL)
+        b = "";
+    while(*a && *b && *b != sentinel) {
+        char ca = *a, cb = *b;
+
+        if(ca >= 'a' && ca <= 'z')
+            ca -= 'a' - 'A';
+        if(cb >= 'a' && cb <= 'z')
+            cb -= 'a' - 'A';
+        if(ca != cb)
+            return FALSE;
+        ++l;
+        ++a;
+        ++b;
+    }
+    return l >= len;
+}
+
+//
+// Expected usage:
+//     token is the token to check for.
+//     buf is the string being parsed.
+//     *end returns the char after the end of the token.
+//        it is not modified unless a match occurs.
+//
+//
+// Example 1:
+//
+//     if (__kmp_match_str("token", buf, *end) {
+//         <do something>
+//         buf = end;
+//     }
+//
+//  Example 2:
+//
+//     if (__kmp_match_str("token", buf, *end) {
+//         char *save = **end;
+//         **end = sentinel;
+//         <use any of the __kmp*_with_sentinel() functions>
+//         **end = save;
+//         buf = end;
+//     }
+//
+
+static int
+__kmp_match_str( char const *token, char const *buf, const char **end) {
+
+    KMP_ASSERT(token != NULL);
+    KMP_ASSERT(buf != NULL);
+    KMP_ASSERT(end != NULL);
+
+    while (*token && *buf) {
+        char ct = *token, cb = *buf;
+
+        if(ct >= 'a' && ct <= 'z')
+            ct -= 'a' - 'A';
+        if(cb >= 'a' && cb <= 'z')
+            cb -= 'a' - 'A';
+        if (ct != cb)
+            return FALSE;
+        ++token;
+        ++buf;
+    }
+    if (*token) {
+        return FALSE;
+    }
+    *end = buf;
+    return TRUE;
+}
+
+
+static size_t
+__kmp_round4k( size_t size ) {
+    size_t _4k = 4 * 1024;
+    if ( size & ( _4k - 1 ) ) {
+        size &= ~ ( _4k - 1 );
+        if ( size <= KMP_SIZE_T_MAX - _4k ) {
+            size += _4k;    // Round up if there is no overflow.
+        }; // if
+    }; // if
+    return size;
+} // __kmp_round4k
+
+
+/*
+    Here, multipliers are like __kmp_convert_to_seconds, but floating-point
+    values are allowed, and the return value is in milliseconds.  The default
+    multiplier is milliseconds.  Returns INT_MAX only if the value specified
+    matches "infinit*".  Returns -1 if specified string is invalid.
+*/
+int
+__kmp_convert_to_milliseconds( char const * data )
+{
+    int ret, nvalues, factor;
+    char mult, extra;
+    double value;
+
+    if (data == NULL) return (-1);
+    if ( __kmp_str_match( "infinit", -1, data)) return (INT_MAX);
+    value = (double) 0.0;
+    mult = '\0';
+    nvalues = KMP_SSCANF (data, "%lf%c%c", &value, &mult, &extra);
+    if (nvalues < 1) return (-1);
+    if (nvalues == 1) mult = '\0';
+    if (nvalues == 3) return (-1);
+
+    if (value < 0)    return (-1);
+
+    switch (mult) {
+    case '\0':
+        /*  default is milliseconds  */
+        factor = 1;
+        break;
+    case 's': case 'S':
+        factor = 1000;
+        break;
+    case 'm': case 'M':
+        factor = 1000 * 60;
+        break;
+    case 'h': case 'H':
+        factor = 1000 * 60 * 60;
+        break;
+    case 'd': case 'D':
+        factor = 1000 * 24 * 60 * 60;
+        break;
+    default:
+        return (-1);
+    }
+
+    if ( value >= ( (INT_MAX-1) / factor) )
+        ret = INT_MAX-1;        /* Don't allow infinite value here */
+    else
+        ret = (int) (value * (double) factor);  /* truncate to int  */
+
+    return ret;
+}
+
+
+static int
+__kmp_strcasecmp_with_sentinel( char const * a, char const * b, char sentinel ) {
+    if(a == NULL)
+        a = "";
+    if(b == NULL)
+        b = "";
+    while(*a && *b && *b != sentinel) {
+        char ca = *a, cb = *b;
+
+        if(ca >= 'a' && ca <= 'z')
+            ca -= 'a' - 'A';
+        if(cb >= 'a' && cb <= 'z')
+            cb -= 'a' - 'A';
+        if(ca != cb)
+            return (int)(unsigned char)*a - (int)(unsigned char)*b;
+        ++a;
+        ++b;
+    }
+    return *a ?
+        (*b && *b != sentinel) ? (int)(unsigned char)*a - (int)(unsigned char)*b : 1 :
+        (*b && *b != sentinel) ? -1 : 0;
+}
+
+
+// =================================================================================================
+// Table structures and helper functions.
+// =================================================================================================
+
+typedef struct __kmp_setting        kmp_setting_t;
+typedef struct __kmp_stg_ss_data    kmp_stg_ss_data_t;
+typedef struct __kmp_stg_wp_data    kmp_stg_wp_data_t;
+typedef struct __kmp_stg_fr_data    kmp_stg_fr_data_t;
+
+typedef void ( * kmp_stg_parse_func_t )( char const * name, char const * value, void * data );
+typedef void ( * kmp_stg_print_func_t )( kmp_str_buf_t * buffer, char const * name, void * data );
+
+struct __kmp_setting {
+    char const *         name;        // Name of setting (environment variable).
+    kmp_stg_parse_func_t parse;       // Parser function.
+    kmp_stg_print_func_t print;       // Print function.
+    void *               data;        // Data passed to parser and printer.
+    int                  set;         // Variable set during this "session"
+                                      //     (__kmp_env_initialize() or kmp_set_defaults() call).
+    int                  defined;     // Variable set in any "session".
+}; // struct __kmp_setting
+
+struct __kmp_stg_ss_data {
+    size_t             factor;  // Default factor: 1 for KMP_STACKSIZE, 1024 for others.
+    kmp_setting_t * *  rivals;  // Array of pointers to rivals (including itself).
+}; // struct __kmp_stg_ss_data
+
+struct __kmp_stg_wp_data {
+    int                omp;     // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY.
+    kmp_setting_t * *  rivals;  // Array of pointers to rivals (including itself).
+}; // struct __kmp_stg_wp_data
+
+struct __kmp_stg_fr_data {
+    int                force;  // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION.
+    kmp_setting_t * *  rivals;  // Array of pointers to rivals (including itself).
+}; // struct __kmp_stg_fr_data
+
+static int
+__kmp_stg_check_rivals(          // 0 -- Ok, 1 -- errors found.
+    char const *       name,     // Name of variable.
+    char const *       value,    // Value of the variable.
+    kmp_setting_t * *  rivals    // List of rival settings (the list must include current one).
+);
+
+
+// -------------------------------------------------------------------------------------------------
+// Helper parse functions.
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_bool(
+    char const * name,
+    char const * value,
+    int *        out
+) {
+    if ( __kmp_str_match_true( value ) ) {
+        * out = TRUE;
+    } else if (__kmp_str_match_false( value ) ) {
+        * out = FALSE;
+    } else {
+        __kmp_msg(
+            kmp_ms_warning,
+            KMP_MSG( BadBoolValue, name, value ),
+            KMP_HNT( ValidBoolValues ),
+            __kmp_msg_null
+        );
+    }; // if
+} // __kmp_stg_parse_bool
+
+static void
+__kmp_stg_parse_size(
+    char const * name,
+    char const * value,
+    size_t       size_min,
+    size_t       size_max,
+    int *        is_specified,
+    size_t *     out,
+    size_t       factor
+) {
+    char const * msg = NULL;
+    #if KMP_OS_DARWIN
+        size_min = __kmp_round4k( size_min );
+        size_max = __kmp_round4k( size_max );
+    #endif // KMP_OS_DARWIN
+    if ( value ) {
+        if ( is_specified != NULL ) {
+            * is_specified = 1;
+        }; // if
+        __kmp_str_to_size( value, out, factor, & msg );
+        if ( msg == NULL ) {
+            if ( * out > size_max ) {
+                * out = size_max;
+                msg = KMP_I18N_STR( ValueTooLarge );
+            } else if ( * out < size_min ) {
+                * out = size_min;
+                msg = KMP_I18N_STR( ValueTooSmall );
+            } else {
+                #if KMP_OS_DARWIN
+                    size_t round4k = __kmp_round4k( * out );
+                    if ( * out != round4k ) {
+                        * out = round4k;
+                        msg = KMP_I18N_STR( NotMultiple4K );
+                    }; // if
+                #endif
+            }; // if
+        } else {
+            // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to size_max silently.
+            if ( * out < size_min ) {
+                * out = size_max;
+            }
+            else if ( * out >  size_max ) {
+                * out = size_max;
+            }; // if
+        }; // if
+        if ( msg != NULL ) {
+            // Message is not empty. Print warning.
+            kmp_str_buf_t buf;
+            __kmp_str_buf_init( & buf );
+            __kmp_str_buf_print_size( & buf, * out );
+            KMP_WARNING( ParseSizeIntWarn, name, value, msg );
+            KMP_INFORM( Using_str_Value, name, buf.str );
+            __kmp_str_buf_free( & buf );
+        }; // if
+    }; // if
+} // __kmp_stg_parse_size
+
+#if KMP_AFFINITY_SUPPORTED
+static void
+__kmp_stg_parse_str(
+    char const *      name,
+    char const *      value,
+    char const * *    out
+) {
+    KMP_INTERNAL_FREE( (void *) * out );
+    * out = __kmp_str_format( "%s", value );
+} // __kmp_stg_parse_str
+#endif
+
+static void
+__kmp_stg_parse_int(
+    char const * name,   // I: Name of environment variable (used in warning messages).
+    char const * value,  // I: Value of environment variable to parse.
+    int          min,    // I: Miminal allowed value.
+    int          max,    // I: Maximum allowed value.
+    int *        out     // O: Output (parsed) value.
+) {
+    char const * msg  = NULL;
+    kmp_uint64   uint = * out;
+    __kmp_str_to_uint( value, & uint, & msg );
+    if ( msg == NULL ) {
+        if ( uint < (unsigned int)min ) {
+            msg = KMP_I18N_STR( ValueTooSmall );
+            uint = min;
+        } else if ( uint > (unsigned int)max ) {
+            msg = KMP_I18N_STR( ValueTooLarge );
+            uint = max;
+        }; // if
+    } else {
+        // If overflow occurred msg contains error message and uint is very big. Cut tmp it
+        // to INT_MAX.
+        if ( uint < (unsigned int)min ) {
+            uint = min;
+        }
+        else if ( uint > (unsigned int)max ) {
+            uint = max;
+        }; // if
+    }; // if
+    if ( msg != NULL ) {
+        // Message is not empty. Print warning.
+        kmp_str_buf_t buf;
+        KMP_WARNING( ParseSizeIntWarn, name, value, msg );
+        __kmp_str_buf_init( & buf );
+        __kmp_str_buf_print( &buf, "%" KMP_UINT64_SPEC "", uint );
+        KMP_INFORM( Using_uint64_Value, name, buf.str );
+        __kmp_str_buf_free( &buf );
+    }; // if
+    * out = uint;
+} // __kmp_stg_parse_int
+
+
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+static void
+__kmp_stg_parse_file(
+    char const * name,
+    char const * value,
+    char *       suffix,
+    char * *     out
+) {
+    char buffer[256];
+    char *t;
+    int hasSuffix;
+    KMP_INTERNAL_FREE( (void *) * out );
+    t = (char *) strrchr(value, '.');
+    hasSuffix = t && __kmp_str_eqf( t, suffix );
+    t = __kmp_str_format( "%s%s", value, hasSuffix ? "" : suffix );
+    __kmp_expand_file_name( buffer, sizeof(buffer), t);
+    KMP_INTERNAL_FREE(t);
+    * out = __kmp_str_format( "%s", buffer );
+} // __kmp_stg_parse_file
+#endif
+
+#ifdef KMP_DEBUG
+static char * par_range_to_print = NULL;
+
+static void
+__kmp_stg_parse_par_range(
+    char const * name,
+    char const * value,
+    int *        out_range,
+    char *       out_routine,
+    char *       out_file,
+    int *        out_lb,
+    int *        out_ub
+) {
+    size_t len = KMP_STRLEN( value + 1 );
+    par_range_to_print = (char *) KMP_INTERNAL_MALLOC( len +1 );
+    KMP_STRNCPY_S( par_range_to_print, len + 1, value, len + 1);
+    __kmp_par_range = +1;
+    __kmp_par_range_lb = 0;
+    __kmp_par_range_ub = INT_MAX;
+    for (;;) {
+        unsigned int len;
+        if (( value == NULL ) || ( *value == '\0' )) {
+            break;
+        }
+        if ( ! __kmp_strcasecmp_with_sentinel( "routine", value, '=' )) {
+            value = strchr( value, '=' ) + 1;
+            len = __kmp_readstr_with_sentinel( out_routine,
+              value, KMP_PAR_RANGE_ROUTINE_LEN - 1, ',' );
+            if ( len == 0 ) {
+                goto par_range_error;
+            }
+            value = strchr( value, ',' );
+            if ( value != NULL ) {
+                value++;
+            }
+            continue;
+        }
+        if ( ! __kmp_strcasecmp_with_sentinel( "filename", value, '=' )) {
+            value = strchr( value, '=' ) + 1;
+            len = __kmp_readstr_with_sentinel( out_file,
+              value, KMP_PAR_RANGE_FILENAME_LEN - 1, ',' );
+            if ( len == 0) {
+                goto par_range_error;
+            }
+            value = strchr( value, ',' );
+            if ( value != NULL ) {
+                value++;
+            }
+            continue;
+        }
+        if (( ! __kmp_strcasecmp_with_sentinel( "range", value, '=' ))
+          || ( ! __kmp_strcasecmp_with_sentinel( "incl_range", value, '=' ))) {
+            value = strchr( value, '=' ) + 1;
+            if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub ) != 2 ) {
+                goto par_range_error;
+            }
+            *out_range = +1;
+            value = strchr( value, ',' );
+            if ( value != NULL ) {
+                value++;
+            }
+            continue;
+        }
+        if ( ! __kmp_strcasecmp_with_sentinel( "excl_range", value, '=' )) {
+            value = strchr( value, '=' ) + 1;
+            if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub) != 2 ) {
+                goto par_range_error;
+            }
+            *out_range = -1;
+            value = strchr( value, ',' );
+            if ( value != NULL ) {
+                value++;
+            }
+            continue;
+        }
+        par_range_error:
+        KMP_WARNING( ParRangeSyntax, name );
+        __kmp_par_range = 0;
+        break;
+    }
+} // __kmp_stg_parse_par_range
+#endif
+
+int
+__kmp_initial_threads_capacity( int req_nproc )
+{
+    int nth = 32;
+
+    /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth) */
+    if (nth < (4 * req_nproc))
+        nth = (4 * req_nproc);
+    if (nth < (4 * __kmp_xproc))
+        nth = (4 * __kmp_xproc);
+
+    if (nth > __kmp_max_nth)
+        nth = __kmp_max_nth;
+
+    return nth;
+}
+
+
+int
+__kmp_default_tp_capacity( int req_nproc, int max_nth, int all_threads_specified) {
+    int nth = 128;
+
+    if(all_threads_specified)
+        return max_nth;
+    /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth ) */
+    if (nth < (4 * req_nproc))
+        nth = (4 * req_nproc);
+    if (nth < (4 * __kmp_xproc))
+        nth = (4 * __kmp_xproc);
+
+    if (nth > __kmp_max_nth)
+        nth = __kmp_max_nth;
+
+    return nth;
+}
+
+
+// -------------------------------------------------------------------------------------------------
+// Helper print functions.
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_print_bool( kmp_str_buf_t * buffer, char const * name, int value ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_BOOL;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s=%s\n", name, value ? "true" : "false" );
+    }
+} // __kmp_stg_print_bool
+
+static void
+__kmp_stg_print_int( kmp_str_buf_t * buffer, char const * name, int value ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_INT;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s=%d\n", name, value );
+    }
+} // __kmp_stg_print_int
+
+static void
+__kmp_stg_print_uint64( kmp_str_buf_t * buffer, char const * name, kmp_uint64 value ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_UINT64;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s=%" KMP_UINT64_SPEC "\n", name, value );
+    }
+} // __kmp_stg_print_uint64
+
+static void
+__kmp_stg_print_str( kmp_str_buf_t * buffer, char const * name, char const * value ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_STR;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s=%s\n", name, value );
+    }
+} // __kmp_stg_print_str
+
+static void
+__kmp_stg_print_size( kmp_str_buf_t * buffer, char const * name, size_t value ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME_EX(name);
+        __kmp_str_buf_print_size( buffer, value );
+        __kmp_str_buf_print( buffer, "'\n" );
+    } else {
+        __kmp_str_buf_print( buffer, "   %s=", name );
+        __kmp_str_buf_print_size( buffer, value );
+        __kmp_str_buf_print( buffer, "\n" );
+        return;
+    }
+} // __kmp_stg_print_size
+
+
+// =================================================================================================
+// Parse and print functions.
+// =================================================================================================
+
+// -------------------------------------------------------------------------------------------------
+// KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_all_threads( char const * name, char const * value, void * data ) {
+
+    kmp_setting_t * * rivals = (kmp_setting_t * *) data;
+    int               rc;
+    rc = __kmp_stg_check_rivals( name, value, rivals );
+    if ( rc ) {
+        return;
+    }; // if
+    if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) {
+        __kmp_max_nth = __kmp_xproc;
+        __kmp_allThreadsSpecified = 1;
+    } else {
+        __kmp_stg_parse_int( name, value, 1, __kmp_sys_max_nth, & __kmp_max_nth );
+        __kmp_allThreadsSpecified = 0;
+    }
+    K_DIAG( 1, ( "__kmp_max_nth == %d\n", __kmp_max_nth ) );
+
+} // __kmp_stg_parse_all_threads
+
+static void
+__kmp_stg_print_all_threads( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_max_nth );
+} // __kmp_stg_print_all_threads
+
+// -------------------------------------------------------------------------------------------------
+// KMP_BLOCKTIME
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_blocktime( char const * name, char const * value, void * data ) {
+    __kmp_dflt_blocktime = __kmp_convert_to_milliseconds( value );
+    if ( __kmp_dflt_blocktime < 0 ) {
+        __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
+        __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidValue, name, value ), __kmp_msg_null );
+        KMP_INFORM( Using_int_Value, name, __kmp_dflt_blocktime );
+        __kmp_env_blocktime = FALSE;  // Revert to default as if var not set.
+    } else {
+        if ( __kmp_dflt_blocktime < KMP_MIN_BLOCKTIME ) {
+            __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME;
+            __kmp_msg( kmp_ms_warning, KMP_MSG( SmallValue, name, value ), __kmp_msg_null );
+            KMP_INFORM( MinValueUsing, name, __kmp_dflt_blocktime );
+        } else if ( __kmp_dflt_blocktime > KMP_MAX_BLOCKTIME ) {
+            __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME;
+            __kmp_msg( kmp_ms_warning, KMP_MSG( LargeValue, name, value ), __kmp_msg_null );
+            KMP_INFORM( MaxValueUsing, name, __kmp_dflt_blocktime );
+        }; // if
+        __kmp_env_blocktime = TRUE;    // KMP_BLOCKTIME was specified.
+    }; // if
+    // calculate number of monitor thread wakeup intervals corresonding to blocktime.
+    __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
+    __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
+    K_DIAG( 1, ( "__kmp_env_blocktime == %d\n", __kmp_env_blocktime ) );
+    if ( __kmp_env_blocktime ) {
+        K_DIAG( 1, ( "__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime ) );
+    }
+} // __kmp_stg_parse_blocktime
+
+static void
+__kmp_stg_print_blocktime( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_dflt_blocktime );
+} // __kmp_stg_print_blocktime
+
+// -------------------------------------------------------------------------------------------------
+// KMP_DUPLICATE_LIB_OK
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_duplicate_lib_ok( char const * name, char const * value, void * data ) {
+    /* actually this variable is not supported,
+       put here for compatibility with earlier builds and for static/dynamic combination */
+    __kmp_stg_parse_bool( name, value, & __kmp_duplicate_library_ok );
+} // __kmp_stg_parse_duplicate_lib_ok
+
+static void
+__kmp_stg_print_duplicate_lib_ok( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_duplicate_library_ok );
+} // __kmp_stg_print_duplicate_lib_ok
+
+// -------------------------------------------------------------------------------------------------
+// KMP_INHERIT_FP_CONTROL
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+static void
+__kmp_stg_parse_inherit_fp_control( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_inherit_fp_control );
+} // __kmp_stg_parse_inherit_fp_control
+
+static void
+__kmp_stg_print_inherit_fp_control( kmp_str_buf_t * buffer, char const * name, void * data ) {
+#if KMP_DEBUG
+    __kmp_stg_print_bool( buffer, name, __kmp_inherit_fp_control );
+#endif /* KMP_DEBUG */
+} // __kmp_stg_print_inherit_fp_control
+
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+// -------------------------------------------------------------------------------------------------
+// KMP_LIBRARY, OMP_WAIT_POLICY
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_wait_policy( char const * name, char const * value, void * data ) {
+
+    kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data;
+    int                 rc;
+
+    rc = __kmp_stg_check_rivals( name, value, wait->rivals );
+    if ( rc ) {
+        return;
+    }; // if
+
+    if ( wait->omp ) {
+        if ( __kmp_str_match( "ACTIVE", 1, value ) ) {
+           __kmp_library = library_turnaround;
+        } else if ( __kmp_str_match( "PASSIVE", 1, value ) ) {
+           __kmp_library = library_throughput;
+        } else {
+            KMP_WARNING( StgInvalidValue, name, value );
+        }; // if
+    } else {
+        if ( __kmp_str_match( "serial", 1, value ) ) {             /* S */
+           __kmp_library = library_serial;
+        } else if ( __kmp_str_match( "throughput", 2, value ) ) {  /* TH */
+           __kmp_library = library_throughput;
+        } else if ( __kmp_str_match( "turnaround", 2, value ) ) {  /* TU */
+           __kmp_library = library_turnaround;
+        } else if ( __kmp_str_match( "dedicated", 1, value ) ) {   /* D */
+           __kmp_library = library_turnaround;
+        } else if ( __kmp_str_match( "multiuser", 1, value ) ) {   /* M */
+           __kmp_library = library_throughput;
+        } else {
+            KMP_WARNING( StgInvalidValue, name, value );
+        }; // if
+    }; // if
+    __kmp_aux_set_library( __kmp_library );
+
+} // __kmp_stg_parse_wait_policy
+
+static void
+__kmp_stg_print_wait_policy( kmp_str_buf_t * buffer, char const * name, void * data ) {
+
+    kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data;
+    char const *        value = NULL;
+
+    if ( wait->omp ) {
+        switch ( __kmp_library ) {
+            case library_turnaround : {
+                value = "ACTIVE";
+            } break;
+            case library_throughput : {
+                value = "PASSIVE";
+            } break;
+        }; // switch
+    } else {
+        switch ( __kmp_library ) {
+            case library_serial : {
+                value = "serial";
+            } break;
+            case library_turnaround : {
+                value = "turnaround";
+            } break;
+            case library_throughput : {
+                value = "throughput";
+            } break;
+        }; // switch
+    }; // if
+    if ( value != NULL ) {
+        __kmp_stg_print_str( buffer, name, value );
+    }; // if
+
+} // __kmp_stg_print_wait_policy
+
+// -------------------------------------------------------------------------------------------------
+// KMP_MONITOR_STACKSIZE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_monitor_stacksize( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_size(
+        name,
+        value,
+        __kmp_sys_min_stksize,
+        KMP_MAX_STKSIZE,
+        NULL,
+        & __kmp_monitor_stksize,
+        1
+    );
+} // __kmp_stg_parse_monitor_stacksize
+
+static void
+__kmp_stg_print_monitor_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if( __kmp_env_format ) {
+        if (  __kmp_monitor_stksize > 0 )
+            KMP_STR_BUF_PRINT_NAME_EX(name);
+        else
+            KMP_STR_BUF_PRINT_NAME;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s", name );
+    }
+    if (  __kmp_monitor_stksize > 0 ) {
+        __kmp_str_buf_print_size( buffer, __kmp_monitor_stksize );
+    } else {
+        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+    }
+    if( __kmp_env_format && __kmp_monitor_stksize ) {
+            __kmp_str_buf_print( buffer, "'\n");
+    }
+
+} // __kmp_stg_print_monitor_stacksize
+
+// -------------------------------------------------------------------------------------------------
+// KMP_SETTINGS
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_settings( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_settings );
+} // __kmp_stg_parse_settings
+
+static void
+__kmp_stg_print_settings( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_settings );
+} // __kmp_stg_print_settings
+
+// -------------------------------------------------------------------------------------------------
+// KMP_STACKPAD
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_stackpad( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int(
+        name,                             // Env var name
+        value,                            // Env var value
+        KMP_MIN_STKPADDING,               // Min value
+        KMP_MAX_STKPADDING,               // Max value
+        & __kmp_stkpadding                // Var to initialize
+    );
+} // __kmp_stg_parse_stackpad
+
+static void
+__kmp_stg_print_stackpad( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_stkpadding );
+} // __kmp_stg_print_stackpad
+
+// -------------------------------------------------------------------------------------------------
+// KMP_STACKOFFSET
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_stackoffset( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_size(
+        name,                             // Env var name
+        value,                            // Env var value
+        KMP_MIN_STKOFFSET,                // Min value
+        KMP_MAX_STKOFFSET,                // Max value
+        NULL,                             //
+        & __kmp_stkoffset,                // Var to initialize
+        1
+    );
+} // __kmp_stg_parse_stackoffset
+
+static void
+__kmp_stg_print_stackoffset( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_size( buffer, name, __kmp_stkoffset );
+} // __kmp_stg_print_stackoffset
+
+// -------------------------------------------------------------------------------------------------
+// KMP_STACKSIZE, OMP_STACKSIZE, GOMP_STACKSIZE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_stacksize( char const * name, char const * value, void * data ) {
+
+    kmp_stg_ss_data_t *  stacksize = (kmp_stg_ss_data_t *) data;
+    int                  rc;
+
+    rc = __kmp_stg_check_rivals( name, value, stacksize->rivals );
+    if ( rc ) {
+        return;
+    }; // if
+    __kmp_stg_parse_size(
+        name,                     // Env var name
+        value,                    // Env var value
+        __kmp_sys_min_stksize,    // Min value
+        KMP_MAX_STKSIZE,          // Max value
+        & __kmp_env_stksize,      //
+        & __kmp_stksize,          // Var to initialize
+        stacksize->factor
+    );
+
+} // __kmp_stg_parse_stacksize
+
+// This function is called for printing both KMP_STACKSIZE (factor is 1) and OMP_STACKSIZE (factor is 1024).
+// Currently it is not possible to print OMP_STACKSIZE value in bytes. We can consider adding this
+// possibility by a customer request in future.
+static void
+__kmp_stg_print_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    kmp_stg_ss_data_t *  stacksize = (kmp_stg_ss_data_t *) data;
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME_EX(name);
+        __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize );
+        __kmp_str_buf_print( buffer, "'\n" );
+    } else {
+        __kmp_str_buf_print( buffer, "   %s=", name );
+        __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize );
+        __kmp_str_buf_print( buffer, "\n" );
+    }
+} // __kmp_stg_print_stacksize
+
+// -------------------------------------------------------------------------------------------------
+// KMP_VERSION
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_version( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_version );
+} // __kmp_stg_parse_version
+
+static void
+__kmp_stg_print_version( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_version );
+} // __kmp_stg_print_version
+
+// -------------------------------------------------------------------------------------------------
+// KMP_WARNINGS
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_warnings( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_generate_warnings );
+    if (__kmp_generate_warnings != kmp_warnings_off) {   // AC: we have only 0/1 values documented,
+        __kmp_generate_warnings = kmp_warnings_explicit; //     so reset it to explicit in order to
+    }                                                    //     distinguish from default setting
+} // __kmp_env_parse_warnings
+
+static void
+__kmp_stg_print_warnings( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_generate_warnings ); // AC: TODO: change to print_int?
+} // __kmp_env_print_warnings                                      //     (needs documentation change)...
+
+// -------------------------------------------------------------------------------------------------
+// OMP_NESTED, OMP_NUM_THREADS
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_nested( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_dflt_nested );
+} // __kmp_stg_parse_nested
+
+static void
+__kmp_stg_print_nested( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_dflt_nested );
+} // __kmp_stg_print_nested
+
+static void
+__kmp_parse_nested_num_threads( const char *var, const char *env, kmp_nested_nthreads_t *nth_array )
+{
+    const char *next = env;
+    const char *scan = next;
+
+    int total = 0;          // Count elements that were set. It'll be used as an array size
+    int prev_comma = FALSE; // For correct processing sequential commas
+
+    // Count the number of values in the env. var string
+    for ( ; ; ) {
+        SKIP_WS( next );
+
+        if ( *next == '\0' ) {
+            break;
+        }
+        // Next character is not an integer or not a comma => end of list
+        if ( ( ( *next < '0' ) || ( *next > '9' ) ) && ( *next !=',') ) {
+            KMP_WARNING( NthSyntaxError, var, env );
+            return;
+        }
+        // The next character is ','
+        if ( *next == ',' ) {
+            // ',' is the fisrt character
+            if ( total == 0 || prev_comma ) {
+                total++;
+            }
+            prev_comma = TRUE;
+            next++; //skip ','
+            SKIP_WS( next );
+        }
+        // Next character is a digit
+        if ( *next >= '0' && *next <= '9' ) {
+            prev_comma = FALSE;
+            SKIP_DIGITS( next );
+            total++;
+            const char *tmp = next;
+            SKIP_WS( tmp );
+            if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) {
+                KMP_WARNING( NthSpacesNotAllowed, var, env );
+                return;
+            }
+        }
+    }
+    KMP_DEBUG_ASSERT( total > 0 );
+    if( total <= 0 ) {
+        KMP_WARNING( NthSyntaxError, var, env );
+        return;
+    }
+
+    // Check if the nested nthreads array exists
+    if ( ! nth_array->nth ) {
+        // Allocate an array of double size
+        nth_array->nth = ( int * )KMP_INTERNAL_MALLOC( sizeof( int ) * total * 2 );
+        if ( nth_array->nth == NULL ) {
+            KMP_FATAL( MemoryAllocFailed );
+        }
+        nth_array->size = total * 2;
+    } else {
+        if ( nth_array->size < total ) {
+            // Increase the array size
+            do {
+                nth_array->size *= 2;
+            } while ( nth_array->size < total );
+
+            nth_array->nth = (int *) KMP_INTERNAL_REALLOC(
+                nth_array->nth, sizeof( int ) * nth_array->size );
+            if ( nth_array->nth == NULL ) {
+		KMP_FATAL( MemoryAllocFailed );
+            }
+        }
+    }
+    nth_array->used = total;
+    int i = 0;
+
+    prev_comma = FALSE;
+    total = 0;
+    // Save values in the array
+    for ( ; ; ) {
+        SKIP_WS( scan );
+        if ( *scan == '\0' ) {
+            break;
+        }
+        // The next character is ','
+        if ( *scan == ',' ) {
+            // ',' in the beginning of the list
+            if ( total == 0 ) {
+                // The value is supposed to be equal to __kmp_avail_proc but it is unknown at the moment.
+                // So let's put a placeholder (#threads = 0) to correct it later.
+                nth_array->nth[i++] = 0;
+                total++;
+            }else if ( prev_comma ) {
+                // Num threads is inherited from the previous level
+                nth_array->nth[i] = nth_array->nth[i - 1];
+                i++;
+                total++;
+            }
+            prev_comma = TRUE;
+            scan++; //skip ','
+            SKIP_WS( scan );
+        }
+        // Next character is a digit
+        if ( *scan >= '0' && *scan <= '9' ) {
+            int num;
+            const char *buf = scan;
+            char const * msg  = NULL;
+            prev_comma = FALSE;
+            SKIP_DIGITS( scan );
+            total++;
+
+            num = __kmp_str_to_int( buf, *scan );
+            if ( num < KMP_MIN_NTH ) {
+                msg = KMP_I18N_STR( ValueTooSmall );
+                num = KMP_MIN_NTH;
+            } else if ( num > __kmp_sys_max_nth ) {
+                msg = KMP_I18N_STR( ValueTooLarge );
+                num = __kmp_sys_max_nth;
+            }
+            if ( msg != NULL ) {
+                // Message is not empty. Print warning.
+                KMP_WARNING( ParseSizeIntWarn, var, env, msg );
+                KMP_INFORM( Using_int_Value, var, num );
+            }
+            nth_array->nth[i++] = num;
+        }
+    }
+}
+
+static void
+__kmp_stg_parse_num_threads( char const * name, char const * value, void * data ) {
+    // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers!
+    if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) {
+        // The array of 1 element
+        __kmp_nested_nth.nth = ( int* )KMP_INTERNAL_MALLOC( sizeof( int ) );
+        __kmp_nested_nth.size = __kmp_nested_nth.used = 1;
+        __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_xproc;
+    } else {
+        __kmp_parse_nested_num_threads( name, value, & __kmp_nested_nth );
+        if ( __kmp_nested_nth.nth ) {
+            __kmp_dflt_team_nth = __kmp_nested_nth.nth[0];
+            if ( __kmp_dflt_team_nth_ub < __kmp_dflt_team_nth ) {
+                __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth;
+            }
+        }
+    }; // if
+    K_DIAG( 1, ( "__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth ) );
+} // __kmp_stg_parse_num_threads
+
+static void
+__kmp_stg_print_num_threads( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s", name );
+    }
+    if ( __kmp_nested_nth.used ) {
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init( &buf );
+        for ( int i = 0; i < __kmp_nested_nth.used; i++) {
+            __kmp_str_buf_print( &buf, "%d", __kmp_nested_nth.nth[i] );
+            if ( i < __kmp_nested_nth.used - 1 ) {
+                __kmp_str_buf_print( &buf, "," );
+            }
+        }
+        __kmp_str_buf_print( buffer, "='%s'\n", buf.str );
+        __kmp_str_buf_free(&buf);
+    } else {
+        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+    }
+} // __kmp_stg_print_num_threads
+
+// -------------------------------------------------------------------------------------------------
+// OpenMP 3.0: KMP_TASKING, OMP_MAX_ACTIVE_LEVELS,
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_tasking( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int( name, value, 0, (int)tskm_max, (int *)&__kmp_tasking_mode );
+} // __kmp_stg_parse_tasking
+
+static void
+__kmp_stg_print_tasking( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_tasking_mode );
+} // __kmp_stg_print_tasking
+
+static void
+__kmp_stg_parse_task_stealing( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int( name, value, 0, 1, (int *)&__kmp_task_stealing_constraint );
+} // __kmp_stg_parse_task_stealing
+
+static void
+__kmp_stg_print_task_stealing( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_task_stealing_constraint );
+} // __kmp_stg_print_task_stealing
+
+static void
+__kmp_stg_parse_max_active_levels( char const * name, char const * value, void * data ) {
+	 __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_dflt_max_active_levels );
+} // __kmp_stg_parse_max_active_levels
+
+static void
+__kmp_stg_print_max_active_levels( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_dflt_max_active_levels );
+} // __kmp_stg_print_max_active_levels
+
+#if KMP_NESTED_HOT_TEAMS
+// -------------------------------------------------------------------------------------------------
+// KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_hot_teams_level( char const * name, char const * value, void * data ) {
+    if ( TCR_4(__kmp_init_parallel) ) {
+        KMP_WARNING( EnvParallelWarn, name );
+        return;
+    }   // read value before first parallel only
+    __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_max_level );
+} // __kmp_stg_parse_hot_teams_level
+
+static void
+__kmp_stg_print_hot_teams_level( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_hot_teams_max_level );
+} // __kmp_stg_print_hot_teams_level
+
+static void
+__kmp_stg_parse_hot_teams_mode( char const * name, char const * value, void * data ) {
+    if ( TCR_4(__kmp_init_parallel) ) {
+        KMP_WARNING( EnvParallelWarn, name );
+        return;
+    }   // read value before first parallel only
+    __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_mode );
+} // __kmp_stg_parse_hot_teams_mode
+
+static void
+__kmp_stg_print_hot_teams_mode( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_hot_teams_mode );
+} // __kmp_stg_print_hot_teams_mode
+
+#endif // KMP_NESTED_HOT_TEAMS
+
+// -------------------------------------------------------------------------------------------------
+// KMP_HANDLE_SIGNALS
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_HANDLE_SIGNALS
+
+static void
+__kmp_stg_parse_handle_signals( char const * name, char const * value, void * data ) {
+	__kmp_stg_parse_bool( name, value, & __kmp_handle_signals );
+} // __kmp_stg_parse_handle_signals
+
+static void
+__kmp_stg_print_handle_signals( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_handle_signals );
+} // __kmp_stg_print_handle_signals
+
+#endif // KMP_HANDLE_SIGNALS
+
+// -------------------------------------------------------------------------------------------------
+// KMP_X_DEBUG, KMP_DEBUG, KMP_DEBUG_BUF_*, KMP_DIAG
+// -------------------------------------------------------------------------------------------------
+
+#ifdef KMP_DEBUG
+
+#define KMP_STG_X_DEBUG( x )                                                                            \
+    static void __kmp_stg_parse_##x##_debug( char const * name, char const * value, void * data ) {     \
+	__kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_##x##_debug );                              \
+    } /* __kmp_stg_parse_x_debug */                                                                     \
+    static void __kmp_stg_print_##x##_debug( kmp_str_buf_t * buffer, char const * name, void * data ) { \
+	__kmp_stg_print_int( buffer, name, kmp_##x##_debug );                                           \
+    } /* __kmp_stg_print_x_debug */
+
+KMP_STG_X_DEBUG( a )
+KMP_STG_X_DEBUG( b )
+KMP_STG_X_DEBUG( c )
+KMP_STG_X_DEBUG( d )
+KMP_STG_X_DEBUG( e )
+KMP_STG_X_DEBUG( f )
+
+#undef KMP_STG_X_DEBUG
+
+static void
+__kmp_stg_parse_debug( char const * name, char const * value, void * data ) {
+    int debug = 0;
+    __kmp_stg_parse_int( name, value, 0, INT_MAX, & debug );
+    if ( kmp_a_debug < debug ) {
+	kmp_a_debug = debug;
+    }; // if
+    if ( kmp_b_debug < debug ) {
+	kmp_b_debug = debug;
+    }; // if
+    if ( kmp_c_debug < debug ) {
+	kmp_c_debug = debug;
+    }; // if
+    if ( kmp_d_debug < debug ) {
+	kmp_d_debug = debug;
+    }; // if
+    if ( kmp_e_debug < debug ) {
+	kmp_e_debug = debug;
+    }; // if
+    if ( kmp_f_debug < debug ) {
+	kmp_f_debug = debug;
+    }; // if
+} // __kmp_stg_parse_debug
+
+static void
+__kmp_stg_parse_debug_buf( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_debug_buf );
+    // !!! TODO: Move buffer initialization of of this file! It may works incorrectly if
+    // KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or KMP_DEBUG_BUF_CHARS.
+    if ( __kmp_debug_buf ) {
+	int i;
+	int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars;
+
+	/* allocate and initialize all entries in debug buffer to empty */
+	__kmp_debug_buffer = (char *) __kmp_page_allocate( elements * sizeof( char ) );
+	for ( i = 0; i < elements; i += __kmp_debug_buf_chars )
+	   __kmp_debug_buffer[i] = '\0';
+
+	__kmp_debug_count = 0;
+    }
+    K_DIAG( 1, ( "__kmp_debug_buf = %d\n", __kmp_debug_buf ) );
+} // __kmp_stg_parse_debug_buf
+
+static void
+__kmp_stg_print_debug_buf( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_debug_buf );
+} // __kmp_stg_print_debug_buf
+
+static void
+__kmp_stg_parse_debug_buf_atomic( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_debug_buf_atomic );
+} // __kmp_stg_parse_debug_buf_atomic
+
+static void
+__kmp_stg_print_debug_buf_atomic( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_debug_buf_atomic );
+} // __kmp_stg_print_debug_buf_atomic
+
+static void
+__kmp_stg_parse_debug_buf_chars( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int(
+	name,
+	value,
+	KMP_DEBUG_BUF_CHARS_MIN,
+	INT_MAX,
+	& __kmp_debug_buf_chars
+    );
+} // __kmp_stg_debug_parse_buf_chars
+
+static void
+__kmp_stg_print_debug_buf_chars( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_debug_buf_chars );
+} // __kmp_stg_print_debug_buf_chars
+
+static void
+__kmp_stg_parse_debug_buf_lines( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int(
+	name,
+	value,
+	KMP_DEBUG_BUF_LINES_MIN,
+	INT_MAX,
+	& __kmp_debug_buf_lines
+    );
+} // __kmp_stg_parse_debug_buf_lines
+
+static void
+__kmp_stg_print_debug_buf_lines( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_debug_buf_lines );
+} // __kmp_stg_print_debug_buf_lines
+
+static void
+__kmp_stg_parse_diag( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_diag );
+} // __kmp_stg_parse_diag
+
+static void
+__kmp_stg_print_diag( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, kmp_diag );
+} // __kmp_stg_print_diag
+
+#endif // KMP_DEBUG
+
+// -------------------------------------------------------------------------------------------------
+// KMP_ALIGN_ALLOC
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_align_alloc( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_size(
+        name,
+        value,
+        CACHE_LINE,
+        INT_MAX,
+        NULL,
+        & __kmp_align_alloc,
+        1
+    );
+} // __kmp_stg_parse_align_alloc
+
+static void
+__kmp_stg_print_align_alloc( kmp_str_buf_t * buffer, char const * name, void * data ) {
+        __kmp_stg_print_size( buffer, name, __kmp_align_alloc );
+} // __kmp_stg_print_align_alloc
+
+// -------------------------------------------------------------------------------------------------
+// KMP_PLAIN_BARRIER, KMP_FORKJOIN_BARRIER, KMP_REDUCTION_BARRIER
+// -------------------------------------------------------------------------------------------------
+
+// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from parse and print
+//       functions, pass required info through data argument.
+
+static void
+__kmp_stg_parse_barrier_branch_bit( char const * name, char const * value, void * data ) {
+    const char *var;
+
+    /* ---------- Barrier branch bit control ------------ */
+    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
+        var = __kmp_barrier_branch_bit_env_name[ i ];
+        if ( ( strcmp( var, name) == 0 ) && ( value != 0 ) ) {
+            char *comma;
+
+            comma = (char *) strchr( value, ',' );
+            __kmp_barrier_gather_branch_bits[ i ] = ( kmp_uint32 ) __kmp_str_to_int( value, ',' );
+            /* is there a specified release parameter? */
+            if ( comma == NULL ) {
+                __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
+            } else {
+                __kmp_barrier_release_branch_bits[ i ] = (kmp_uint32) __kmp_str_to_int( comma + 1, 0 );
+
+                if ( __kmp_barrier_release_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) {
+                    __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null );
+                    __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
+                }
+            }
+            if ( __kmp_barrier_gather_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) {
+                    KMP_WARNING( BarrGatherValueInvalid, name, value );
+                    KMP_INFORM( Using_uint_Value, name, __kmp_barrier_gather_bb_dflt );
+                __kmp_barrier_gather_branch_bits[ i ] =  __kmp_barrier_gather_bb_dflt;
+            }
+        }
+        K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[ i ], \
+                   __kmp_barrier_gather_branch_bits [ i ], \
+                   __kmp_barrier_release_branch_bits [ i ]))
+    }
+} // __kmp_stg_parse_barrier_branch_bit
+
+static void
+__kmp_stg_print_barrier_branch_bit( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    const char *var;
+    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
+        var = __kmp_barrier_branch_bit_env_name[ i ];
+        if ( strcmp( var, name) == 0  ) {
+            if( __kmp_env_format ) {
+                KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_branch_bit_env_name[ i ]);
+            } else {
+                __kmp_str_buf_print( buffer, "   %s='", __kmp_barrier_branch_bit_env_name[ i ] );
+            }
+            __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_barrier_gather_branch_bits [ i ], __kmp_barrier_release_branch_bits [ i ]);
+        }
+    }
+} // __kmp_stg_print_barrier_branch_bit
+
+
+// -------------------------------------------------------------------------------------------------
+// KMP_PLAIN_BARRIER_PATTERN, KMP_FORKJOIN_BARRIER_PATTERN, KMP_REDUCTION_BARRIER_PATTERN
+// -------------------------------------------------------------------------------------------------
+
+// TODO: Remove __kmp_barrier_pattern_name variable, remove loops from parse and print functions,
+//       pass required data to functions through data argument.
+
+static void
+__kmp_stg_parse_barrier_pattern( char const * name, char const * value, void * data ) {
+    const char *var;
+    /* ---------- Barrier method control ------------ */
+
+    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
+        var = __kmp_barrier_pattern_env_name[ i ];
+
+        if ( ( strcmp ( var, name ) == 0 ) && ( value != 0 ) ) {
+            int j;
+            char *comma = (char *) strchr( value, ',' );
+
+            /* handle first parameter: gather pattern */
+            for ( j = bp_linear_bar; j<bp_last_bar; j++ ) {
+                if (__kmp_match_with_sentinel( __kmp_barrier_pattern_name[j], value, 1, ',' )) {
+                   __kmp_barrier_gather_pattern[ i ] = (kmp_bar_pat_e) j;
+                   break;
+                }
+            }
+            if ( j == bp_last_bar ) {
+                KMP_WARNING( BarrGatherValueInvalid, name, value );
+                KMP_INFORM( Using_str_Value, name, __kmp_barrier_pattern_name[ bp_linear_bar ] );
+            }
+
+            /* handle second parameter: release pattern */
+            if ( comma != NULL ) {
+                for ( j = bp_linear_bar; j < bp_last_bar; j++ ) {
+                    if ( __kmp_str_match( __kmp_barrier_pattern_name[j], 1, comma + 1 ) ) {
+                       __kmp_barrier_release_pattern[ i ] = (kmp_bar_pat_e) j;
+                       break;
+                    }
+                }
+                if (j == bp_last_bar) {
+                    __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null );
+                    KMP_INFORM( Using_str_Value, name, __kmp_barrier_pattern_name[ bp_linear_bar ] );
+                }
+            }
+        }
+    }
+} // __kmp_stg_parse_barrier_pattern
+
+static void
+__kmp_stg_print_barrier_pattern( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    const char *var;
+    for ( int i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
+        var = __kmp_barrier_pattern_env_name[ i ];
+        if ( strcmp ( var, name ) == 0 ) {
+            int j = __kmp_barrier_gather_pattern [ i ];
+            int k = __kmp_barrier_release_pattern [ i ];
+            if( __kmp_env_format ) {
+                KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_pattern_env_name[ i ]);
+            } else {
+                __kmp_str_buf_print( buffer, "   %s='", __kmp_barrier_pattern_env_name[ i ] );
+            }
+            __kmp_str_buf_print( buffer, "%s,%s'\n", __kmp_barrier_pattern_name [ j ], __kmp_barrier_pattern_name [ k ]);
+        }
+    }
+} // __kmp_stg_print_barrier_pattern
+
+// -------------------------------------------------------------------------------------------------
+// KMP_ABORT_DELAY
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_abort_delay( char const * name, char const * value, void * data ) {
+    // Units of KMP_DELAY_ABORT are seconds, units of __kmp_abort_delay is milliseconds.
+    int delay = __kmp_abort_delay / 1000;
+    __kmp_stg_parse_int( name, value, 0, INT_MAX / 1000, & delay );
+    __kmp_abort_delay = delay * 1000;
+} // __kmp_stg_parse_abort_delay
+
+static void
+__kmp_stg_print_abort_delay( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_abort_delay );
+} // __kmp_stg_print_abort_delay
+
+// -------------------------------------------------------------------------------------------------
+// KMP_CPUINFO_FILE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_cpuinfo_file( char const * name, char const * value, void * data ) {
+    #if KMP_AFFINITY_SUPPORTED
+        __kmp_stg_parse_str( name, value, & __kmp_cpuinfo_file );
+        K_DIAG( 1, ( "__kmp_cpuinfo_file == %s\n", __kmp_cpuinfo_file ) );
+    #endif
+} //__kmp_stg_parse_cpuinfo_file
+
+static void
+__kmp_stg_print_cpuinfo_file( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    #if KMP_AFFINITY_SUPPORTED
+        if( __kmp_env_format ) {
+            KMP_STR_BUF_PRINT_NAME;
+        } else {
+            __kmp_str_buf_print( buffer, "   %s", name );
+        }
+        if ( __kmp_cpuinfo_file ) {
+            __kmp_str_buf_print( buffer, "='%s'\n", __kmp_cpuinfo_file );
+        } else {
+            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+        }
+    #endif
+} //__kmp_stg_print_cpuinfo_file
+
+// -------------------------------------------------------------------------------------------------
+// KMP_FORCE_REDUCTION, KMP_DETERMINISTIC_REDUCTION
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_force_reduction( char const * name, char const * value, void * data )
+{
+    kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data;
+    int                 rc;
+
+    rc = __kmp_stg_check_rivals( name, value, reduction->rivals );
+    if ( rc ) {
+        return;
+    }; // if
+    if ( reduction->force ) {
+        if( value != 0 ) {
+            if( __kmp_str_match( "critical", 0, value ) )
+               __kmp_force_reduction_method = critical_reduce_block;
+            else if( __kmp_str_match( "atomic", 0, value ) )
+               __kmp_force_reduction_method = atomic_reduce_block;
+            else if( __kmp_str_match( "tree", 0, value ) )
+               __kmp_force_reduction_method = tree_reduce_block;
+            else {
+                KMP_FATAL( UnknownForceReduction, name, value );
+            }
+        }
+    } else {
+        __kmp_stg_parse_bool( name, value, & __kmp_determ_red );
+        if( __kmp_determ_red ) {
+            __kmp_force_reduction_method = tree_reduce_block;
+        } else {
+            __kmp_force_reduction_method = reduction_method_not_defined;
+        }
+    }
+    K_DIAG( 1, ( "__kmp_force_reduction_method == %d\n", __kmp_force_reduction_method ) );
+} // __kmp_stg_parse_force_reduction
+
+static void
+__kmp_stg_print_force_reduction( kmp_str_buf_t * buffer, char const * name, void * data ) {
+
+    kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data;
+    if ( reduction->force ) {
+        if( __kmp_force_reduction_method == critical_reduce_block) {
+            __kmp_stg_print_str( buffer, name, "critical");
+        } else if ( __kmp_force_reduction_method == atomic_reduce_block ) {
+            __kmp_stg_print_str( buffer, name, "atomic");
+        } else if ( __kmp_force_reduction_method == tree_reduce_block ) {
+            __kmp_stg_print_str( buffer, name, "tree");
+        } else {
+            if( __kmp_env_format ) {
+                KMP_STR_BUF_PRINT_NAME;
+            } else {
+                __kmp_str_buf_print( buffer, "   %s", name );
+            }
+            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+        }
+    } else {
+        __kmp_stg_print_bool( buffer, name, __kmp_determ_red );
+    }
+
+
+} // __kmp_stg_print_force_reduction
+
+// -------------------------------------------------------------------------------------------------
+// KMP_STORAGE_MAP
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_storage_map( char const * name, char const * value, void * data ) {
+    if ( __kmp_str_match(  "verbose", 1, value ) ) {
+        __kmp_storage_map         = TRUE;
+        __kmp_storage_map_verbose = TRUE;
+        __kmp_storage_map_verbose_specified = TRUE;
+
+    } else {
+        __kmp_storage_map_verbose = FALSE;
+        __kmp_stg_parse_bool( name, value, & __kmp_storage_map ); // !!!
+    }; // if
+} // __kmp_stg_parse_storage_map
+
+static void
+__kmp_stg_print_storage_map( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if ( __kmp_storage_map_verbose || __kmp_storage_map_verbose_specified ) {
+        __kmp_stg_print_str( buffer, name, "verbose" );
+    } else {
+        __kmp_stg_print_bool( buffer, name, __kmp_storage_map );
+    }
+} // __kmp_stg_print_storage_map
+
+// -------------------------------------------------------------------------------------------------
+// KMP_ALL_THREADPRIVATE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_all_threadprivate( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int( name, value, __kmp_allThreadsSpecified ? __kmp_max_nth : 1, __kmp_max_nth,
+        & __kmp_tp_capacity );
+} // __kmp_stg_parse_all_threadprivate
+
+static void
+__kmp_stg_print_all_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_tp_capacity );
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// KMP_FOREIGN_THREADS_THREADPRIVATE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_foreign_threads_threadprivate( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_foreign_tp );
+} // __kmp_stg_parse_foreign_threads_threadprivate
+
+static void
+__kmp_stg_print_foreign_threads_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_foreign_tp );
+} // __kmp_stg_print_foreign_threads_threadprivate
+
+
+// -------------------------------------------------------------------------------------------------
+// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_AFFINITY_SUPPORTED
+//
+// Parse the proc id list.  Return TRUE if successful, FALSE otherwise.
+//
+static int
+__kmp_parse_affinity_proc_id_list( const char *var, const char *env,
+    const char **nextEnv, char **proclist )
+{
+    const char *scan = env;
+    const char *next = scan;
+    int empty = TRUE;
+
+    *proclist = NULL;
+
+    for (;;) {
+        int start, end, stride;
+
+        SKIP_WS(scan);
+        next = scan;
+        if (*next == '\0') {
+            break;
+        }
+
+        if (*next == '{') {
+            int num;
+            next++;     // skip '{'
+            SKIP_WS(next);
+            scan = next;
+
+            //
+            // Read the first integer in the set.
+            //
+            if ((*next < '0') || (*next > '9')) {
+                KMP_WARNING( AffSyntaxError, var );
+                return FALSE;
+            }
+            SKIP_DIGITS(next);
+            num = __kmp_str_to_int(scan, *next);
+            KMP_ASSERT(num >= 0);
+
+            for (;;) {
+                //
+                // Check for end of set.
+                //
+                SKIP_WS(next);
+                if (*next == '}') {
+                    next++;     // skip '}'
+                    break;
+                }
+
+                //
+                // Skip optional comma.
+                //
+                if (*next == ',') {
+                    next++;
+                }
+                SKIP_WS(next);
+
+                //
+                // Read the next integer in the set.
+                //
+                scan = next;
+                if ((*next < '0') || (*next > '9')) {
+                    KMP_WARNING( AffSyntaxError, var );
+                    return FALSE;
+                }
+
+                SKIP_DIGITS(next);
+                num = __kmp_str_to_int(scan, *next);
+                KMP_ASSERT(num >= 0);
+            }
+            empty = FALSE;
+
+            SKIP_WS(next);
+            if (*next == ',') {
+                next++;
+            }
+            scan = next;
+            continue;
+        }
+
+        //
+        // Next character is not an integer => end of list
+        //
+        if ((*next < '0') || (*next > '9')) {
+            if (empty) {
+                KMP_WARNING( AffSyntaxError, var );
+                return FALSE;
+            }
+            break;
+        }
+
+        //
+        // Read the first integer.
+        //
+        SKIP_DIGITS(next);
+        start = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT(start >= 0);
+        SKIP_WS(next);
+
+        //
+        // If this isn't a range, then go on.
+        //
+        if (*next != '-') {
+            empty = FALSE;
+
+            //
+            // Skip optional comma.
+            //
+            if (*next == ',') {
+                next++;
+            }
+            scan = next;
+            continue;
+        }
+
+        //
+        // This is a range.  Skip over the '-' and read in the 2nd int.
+        //
+        next++;         // skip '-'
+        SKIP_WS(next);
+        scan = next;
+        if ((*next < '0') || (*next > '9')) {
+            KMP_WARNING( AffSyntaxError, var );
+            return FALSE;
+        }
+        SKIP_DIGITS(next);
+        end = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT(end >= 0);
+
+        //
+        // Check for a stride parameter
+        //
+        stride = 1;
+        SKIP_WS(next);
+        if (*next == ':') {
+            //
+            // A stride is specified.  Skip over the ':" and read the 3rd int.
+            //
+            int sign = +1;
+            next++;         // skip ':'
+            SKIP_WS(next);
+            scan = next;
+            if (*next == '-') {
+                sign = -1;
+                next++;
+                SKIP_WS(next);
+                scan = next;
+            }
+            if ((*next < '0') || (*next > '9')) {
+                KMP_WARNING( AffSyntaxError, var );
+                return FALSE;
+            }
+            SKIP_DIGITS(next);
+            stride = __kmp_str_to_int(scan, *next);
+            KMP_ASSERT(stride >= 0);
+            stride *= sign;
+        }
+
+        //
+        // Do some range checks.
+        //
+        if (stride == 0) {
+            KMP_WARNING( AffZeroStride, var );
+            return FALSE;
+        }
+        if (stride > 0) {
+            if (start > end) {
+                KMP_WARNING( AffStartGreaterEnd, var, start, end );
+                return FALSE;
+            }
+        }
+        else {
+            if (start < end) {
+                KMP_WARNING( AffStrideLessZero, var, start, end );
+                return FALSE;
+            }
+        }
+        if ((end - start) / stride > 65536 ) {
+            KMP_WARNING( AffRangeTooBig, var, end, start, stride );
+            return FALSE;
+        }
+
+        empty = FALSE;
+
+        //
+        // Skip optional comma.
+        //
+        SKIP_WS(next);
+        if (*next == ',') {
+            next++;
+        }
+        scan = next;
+    }
+
+    *nextEnv = next;
+
+    {
+        int len = next - env;
+        char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char));
+        KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char));
+        retlist[len] = '\0';
+        *proclist = retlist;
+    }
+    return TRUE;
+}
+
+
+//
+// If KMP_AFFINITY is specified without a type, then
+// __kmp_affinity_notype should point to its setting.
+//
+static kmp_setting_t *__kmp_affinity_notype = NULL;
+
+static void
+__kmp_parse_affinity_env( char const * name, char const * value,
+    enum affinity_type  * out_type,
+    char                ** out_proclist,
+    int                 * out_verbose,
+    int                 * out_warn,
+    int                 * out_respect,
+    enum affinity_gran  * out_gran,
+    int                 * out_gran_levels,
+    int                 * out_dups,
+    int                 * out_compact,
+    int                 * out_offset
+)
+{
+    char * buffer = NULL;    // Copy of env var value.
+    char * buf    = NULL;    // Buffer for strtok_r() function.
+    char * next = NULL;      // end of token / start of next.
+    const char * start;      // start of current token (for err msgs)
+    int    count  = 0;       // Counter of parsed integer numbers.
+    int    number[ 2 ];      // Parsed numbers.
+
+    // Guards.
+    int type         = 0;
+    int proclist     = 0;
+    int max_proclist = 0;
+    int verbose      = 0;
+    int warnings     = 0;
+    int respect      = 0;
+    int gran         = 0;
+    int dups         = 0;
+
+    KMP_ASSERT( value != NULL );
+
+    if ( TCR_4(__kmp_init_middle) ) {
+        KMP_WARNING( EnvMiddleWarn, name );
+        __kmp_env_toPrint( name, 0 );
+        return;
+    }
+    __kmp_env_toPrint( name, 1 );
+
+    buffer = __kmp_str_format( "%s", value );         // Copy env var to keep original intact.
+    buf = buffer;
+    SKIP_WS(buf);
+
+    // Helper macros.
+
+    //
+    // If we see a parse error, emit a warning and scan to the next ",".
+    //
+    // FIXME - there's got to be a better way to print an error
+    // message, hopefully without overwritting peices of buf.
+    //
+    #define EMIT_WARN(skip,errlist) \
+        {                                                                     \
+            char ch;                                                          \
+            if (skip) {                                                       \
+                SKIP_TO(next, ',');                                           \
+            }                                                                 \
+            ch = *next;                                                       \
+            *next = '\0';                                                     \
+            KMP_WARNING errlist;                                              \
+            *next = ch;                                                       \
+            if (skip) {                                                       \
+                if (ch == ',') next++;                                        \
+            }                                                                 \
+            buf = next;                                                       \
+        }
+
+    #define _set_param(_guard,_var,_val)                                      \
+        {                                                                     \
+            if ( _guard == 0 ) {                                              \
+                _var = _val;                                                  \
+            } else {                                                          \
+                EMIT_WARN( FALSE, ( AffParamDefined, name, start ) );         \
+            };                                                                \
+            ++ _guard;                                                        \
+        }
+
+    #define set_type(val)          _set_param( type,     *out_type,        val )
+    #define set_verbose(val)       _set_param( verbose,  *out_verbose,     val )
+    #define set_warnings(val)      _set_param( warnings, *out_warn,        val )
+    #define set_respect(val)       _set_param( respect,  *out_respect,     val )
+    #define set_dups(val)          _set_param( dups,     *out_dups,        val )
+    #define set_proclist(val)      _set_param( proclist, *out_proclist,    val )
+
+    #define set_gran(val,levels)                                              \
+        {                                                                     \
+            if ( gran == 0 ) {                                                \
+                *out_gran = val;                                              \
+                *out_gran_levels = levels;                                    \
+            } else {                                                          \
+                EMIT_WARN( FALSE, ( AffParamDefined, name, start ) );         \
+            };                                                                \
+            ++ gran;                                                          \
+        }
+
+# if OMP_40_ENABLED
+    KMP_DEBUG_ASSERT( ( __kmp_nested_proc_bind.bind_types != NULL )
+      && ( __kmp_nested_proc_bind.used > 0 ) );
+# endif
+
+    while ( *buf != '\0' ) {
+        start = next = buf;
+
+        if (__kmp_match_str("none", buf, (const char **)&next)) {
+            set_type( affinity_none );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("scatter", buf, (const char **)&next)) {
+            set_type( affinity_scatter );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("compact", buf, (const char **)&next)) {
+            set_type( affinity_compact );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("logical", buf, (const char **)&next)) {
+            set_type( affinity_logical );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("physical", buf, (const char **)&next)) {
+            set_type( affinity_physical );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("explicit", buf, (const char **)&next)) {
+            set_type( affinity_explicit );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("balanced", buf, (const char **)&next)) {
+            set_type( affinity_balanced );
+#  if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+#  endif
+            buf = next;
+        } else if (__kmp_match_str("disabled", buf, (const char **)&next)) {
+            set_type( affinity_disabled );
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+# endif
+            buf = next;
+        } else if (__kmp_match_str("verbose", buf, (const char **)&next)) {
+            set_verbose( TRUE );
+            buf = next;
+        } else if (__kmp_match_str("noverbose", buf, (const char **)&next)) {
+            set_verbose( FALSE );
+            buf = next;
+        } else if (__kmp_match_str("warnings", buf, (const char **)&next)) {
+            set_warnings( TRUE );
+            buf = next;
+        } else if (__kmp_match_str("nowarnings", buf, (const char **)&next)) {
+            set_warnings( FALSE );
+            buf = next;
+        } else if (__kmp_match_str("respect", buf, (const char **)&next)) {
+            set_respect( TRUE );
+            buf = next;
+        } else if (__kmp_match_str("norespect", buf, (const char **)&next)) {
+            set_respect( FALSE );
+            buf = next;
+        } else if (__kmp_match_str("duplicates", buf, (const char **)&next)
+          || __kmp_match_str("dups", buf, (const char **)&next)) {
+            set_dups( TRUE );
+            buf = next;
+        } else if (__kmp_match_str("noduplicates", buf, (const char **)&next)
+          || __kmp_match_str("nodups", buf, (const char **)&next)) {
+            set_dups( FALSE );
+            buf = next;
+        } else if (__kmp_match_str("granularity", buf, (const char **)&next)
+          || __kmp_match_str("gran", buf, (const char **)&next)) {
+            SKIP_WS(next);
+            if (*next != '=') {
+                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) );
+                continue;
+            }
+            next++;      // skip '='
+            SKIP_WS(next);
+
+            buf = next;
+            if (__kmp_match_str("fine", buf, (const char **)&next)) {
+                set_gran( affinity_gran_fine, -1 );
+                buf = next;
+            } else if (__kmp_match_str("thread", buf, (const char **)&next)) {
+                set_gran( affinity_gran_thread, -1 );
+                buf = next;
+            } else if (__kmp_match_str("core", buf, (const char **)&next)) {
+                set_gran( affinity_gran_core, -1 );
+                buf = next;
+            } else if (__kmp_match_str("package", buf, (const char **)&next)) {
+                set_gran( affinity_gran_package, -1 );
+                buf = next;
+            } else if (__kmp_match_str("node", buf, (const char **)&next)) {
+                set_gran( affinity_gran_node, -1 );
+                buf = next;
+# if KMP_GROUP_AFFINITY
+            } else if (__kmp_match_str("group", buf, (const char **)&next)) {
+                set_gran( affinity_gran_group, -1 );
+                buf = next;
+# endif /* KMP_GROUP AFFINITY */
+            } else if ((*buf >= '0') && (*buf <= '9')) {
+                int n;
+                next = buf;
+                SKIP_DIGITS(next);
+                n = __kmp_str_to_int( buf, *next );
+                KMP_ASSERT(n >= 0);
+                buf = next;
+                set_gran( affinity_gran_default, n );
+            } else {
+                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) );
+                continue;
+            }
+        } else if (__kmp_match_str("proclist", buf, (const char **)&next)) {
+            char *temp_proclist;
+
+            SKIP_WS(next);
+            if (*next != '=') {
+                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) );
+                continue;
+            }
+            next++;      // skip '='
+            SKIP_WS(next);
+            if (*next != '[') {
+                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) );
+                continue;
+            }
+            next++;      // skip '['
+            buf = next;
+            if (! __kmp_parse_affinity_proc_id_list(name, buf,
+              (const char **)&next, &temp_proclist)) {
+                //
+                // warning already emitted.
+                //
+                SKIP_TO(next, ']');
+                if (*next == ']') next++;
+                SKIP_TO(next, ',');
+                if (*next == ',') next++;
+                buf = next;
+                continue;
+            }
+            if (*next != ']') {
+                EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) );
+                continue;
+            }
+            next++;      // skip ']'
+            set_proclist( temp_proclist );
+        } else if ((*buf >= '0') && (*buf <= '9')) {
+            // Parse integer numbers -- permute and offset.
+            int n;
+            next = buf;
+            SKIP_DIGITS(next);
+            n = __kmp_str_to_int( buf, *next );
+            KMP_ASSERT(n >= 0);
+            buf = next;
+            if ( count < 2 ) {
+                number[ count ] = n;
+            } else {
+                KMP_WARNING( AffManyParams, name, start );
+            }; // if
+            ++ count;
+        } else {
+            EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) );
+            continue;
+        }
+
+        SKIP_WS(next);
+        if (*next == ',') {
+            next++;
+            SKIP_WS(next);
+        }
+        else if (*next != '\0') {
+            const char *temp = next;
+            EMIT_WARN( TRUE, ( ParseExtraCharsWarn, name, temp ) );
+            continue;
+        }
+        buf = next;
+    } // while
+
+    #undef EMIT_WARN
+    #undef _set_param
+    #undef set_type
+    #undef set_verbose
+    #undef set_warnings
+    #undef set_respect
+    #undef set_granularity
+
+    KMP_INTERNAL_FREE( buffer );
+
+    if ( proclist ) {
+        if ( ! type ) {
+            KMP_WARNING( AffProcListNoType, name );
+            __kmp_affinity_type = affinity_explicit;
+        }
+        else if ( __kmp_affinity_type != affinity_explicit ) {
+            KMP_WARNING( AffProcListNotExplicit, name );
+            KMP_ASSERT( *out_proclist != NULL );
+            KMP_INTERNAL_FREE( *out_proclist );
+            *out_proclist = NULL;
+        }
+    }
+    switch ( *out_type ) {
+        case affinity_logical:
+        case affinity_physical: {
+            if ( count > 0 ) {
+                *out_offset = number[ 0 ];
+            }; // if
+            if ( count > 1 ) {
+                KMP_WARNING( AffManyParamsForLogic, name, number[ 1 ] );
+            }; // if
+        } break;
+        case affinity_balanced: {
+            if ( count > 0 ) {
+                *out_compact = number[ 0 ];
+            }; // if
+            if ( count > 1 ) {
+                *out_offset = number[ 1 ];
+            }; // if
+
+            if ( __kmp_affinity_gran == affinity_gran_default ) {
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                if( __kmp_mic_type != non_mic ) {
+                    if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
+                        KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" );
+                    }
+                    __kmp_affinity_gran = affinity_gran_fine;
+                } else
+#endif
+                {
+                    if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
+                        KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" );
+                    }
+                    __kmp_affinity_gran = affinity_gran_core;
+                }
+            }
+        } break;
+        case affinity_scatter:
+        case affinity_compact: {
+            if ( count > 0 ) {
+                *out_compact = number[ 0 ];
+            }; // if
+            if ( count > 1 ) {
+                *out_offset = number[ 1 ];
+            }; // if
+        } break;
+        case affinity_explicit: {
+            if ( *out_proclist == NULL ) {
+                KMP_WARNING( AffNoProcList, name );
+                __kmp_affinity_type = affinity_none;
+            }
+            if ( count > 0 ) {
+                KMP_WARNING( AffNoParam, name, "explicit" );
+            }
+        } break;
+        case affinity_none: {
+            if ( count > 0 ) {
+                KMP_WARNING( AffNoParam, name, "none" );
+            }; // if
+        } break;
+        case affinity_disabled: {
+            if ( count > 0 ) {
+                KMP_WARNING( AffNoParam, name, "disabled" );
+            }; // if
+        } break;
+        case affinity_default: {
+            if ( count > 0 ) {
+                KMP_WARNING( AffNoParam, name, "default" );
+            }; // if
+        } break;
+        default: {
+            KMP_ASSERT( 0 );
+        };
+    }; // switch
+} // __kmp_parse_affinity_env
+
+static void
+__kmp_stg_parse_affinity( char const * name, char const * value, void * data )
+{
+    kmp_setting_t **rivals = (kmp_setting_t **) data;
+    int rc;
+
+    rc = __kmp_stg_check_rivals( name, value, rivals );
+    if ( rc ) {
+        return;
+    }
+
+    __kmp_parse_affinity_env( name, value, & __kmp_affinity_type,
+      & __kmp_affinity_proclist, & __kmp_affinity_verbose,
+      & __kmp_affinity_warnings, & __kmp_affinity_respect_mask,
+      & __kmp_affinity_gran, & __kmp_affinity_gran_levels,
+      & __kmp_affinity_dups, & __kmp_affinity_compact,
+      & __kmp_affinity_offset );
+
+} // __kmp_stg_parse_affinity
+
+static void
+__kmp_stg_print_affinity( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME_EX(name);
+    } else {
+        __kmp_str_buf_print( buffer, "   %s='", name );
+    }
+    if ( __kmp_affinity_verbose ) {
+        __kmp_str_buf_print( buffer, "%s,", "verbose");
+    } else {
+        __kmp_str_buf_print( buffer, "%s,", "noverbose");
+    }
+    if ( __kmp_affinity_warnings ) {
+        __kmp_str_buf_print( buffer, "%s,", "warnings");
+    } else {
+        __kmp_str_buf_print( buffer, "%s,", "nowarnings");
+    }
+    if ( KMP_AFFINITY_CAPABLE() ) {
+        if ( __kmp_affinity_respect_mask ) {
+            __kmp_str_buf_print( buffer, "%s,", "respect");
+        } else {
+            __kmp_str_buf_print( buffer, "%s,", "norespect");
+        }
+        switch ( __kmp_affinity_gran ) {
+            case affinity_gran_default:
+                __kmp_str_buf_print( buffer, "%s", "granularity=default,");
+                break;
+            case affinity_gran_fine:
+                __kmp_str_buf_print( buffer, "%s", "granularity=fine,");
+                break;
+            case affinity_gran_thread:
+                __kmp_str_buf_print( buffer, "%s", "granularity=thread,");
+                break;
+            case affinity_gran_core:
+                __kmp_str_buf_print( buffer, "%s", "granularity=core,");
+                break;
+            case affinity_gran_package:
+                __kmp_str_buf_print( buffer, "%s", "granularity=package,");
+                break;
+            case affinity_gran_node:
+                __kmp_str_buf_print( buffer, "%s", "granularity=node,");
+                break;
+# if KMP_GROUP_AFFINITY
+            case affinity_gran_group:
+                __kmp_str_buf_print( buffer, "%s", "granularity=group,");
+                break;
+# endif /* KMP_GROUP_AFFINITY */
+        }
+        if ( __kmp_affinity_dups ) {
+            __kmp_str_buf_print( buffer, "%s,", "duplicates");
+        } else {
+            __kmp_str_buf_print( buffer, "%s,", "noduplicates");
+        }
+    }
+    if ( ! KMP_AFFINITY_CAPABLE() ) {
+        __kmp_str_buf_print( buffer, "%s", "disabled" );
+    }
+    else switch ( __kmp_affinity_type ){
+        case affinity_none:
+            __kmp_str_buf_print( buffer, "%s", "none");
+            break;
+        case affinity_physical:
+            __kmp_str_buf_print( buffer, "%s,%d", "physical",
+              __kmp_affinity_offset );
+            break;
+        case affinity_logical:
+            __kmp_str_buf_print( buffer, "%s,%d", "logical",
+              __kmp_affinity_offset );
+            break;
+        case affinity_compact:
+            __kmp_str_buf_print( buffer, "%s,%d,%d", "compact",
+              __kmp_affinity_compact, __kmp_affinity_offset );
+            break;
+        case affinity_scatter:
+            __kmp_str_buf_print( buffer, "%s,%d,%d", "scatter",
+              __kmp_affinity_compact, __kmp_affinity_offset );
+            break;
+        case affinity_explicit:
+            __kmp_str_buf_print( buffer, "%s=[%s],%s", "proclist",
+              __kmp_affinity_proclist, "explicit" );
+            break;
+        case affinity_balanced:
+            __kmp_str_buf_print( buffer, "%s,%d,%d", "balanced",
+              __kmp_affinity_compact, __kmp_affinity_offset );
+            break;
+        case affinity_disabled:
+            __kmp_str_buf_print( buffer, "%s", "disabled");
+            break;
+        case affinity_default:
+            __kmp_str_buf_print( buffer, "%s", "default");
+            break;
+        default:
+            __kmp_str_buf_print( buffer, "%s", "<unknown>");
+            break;
+    }
+        __kmp_str_buf_print( buffer, "'\n" );
+} //__kmp_stg_print_affinity
+
+# ifdef KMP_GOMP_COMPAT
+
+static void
+__kmp_stg_parse_gomp_cpu_affinity( char const * name, char const * value, void * data )
+{
+    const char * next = NULL;
+    char * temp_proclist;
+    kmp_setting_t **rivals = (kmp_setting_t **) data;
+    int rc;
+
+    rc = __kmp_stg_check_rivals( name, value, rivals );
+    if ( rc ) {
+        return;
+    }
+
+    if ( TCR_4(__kmp_init_middle) ) {
+        KMP_WARNING( EnvMiddleWarn, name );
+        __kmp_env_toPrint( name, 0 );
+        return;
+    }
+
+    __kmp_env_toPrint( name, 1 );
+
+    if ( __kmp_parse_affinity_proc_id_list( name, value, &next,
+      &temp_proclist )) {
+        SKIP_WS(next);
+        if (*next == '\0') {
+            //
+            // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=...
+            //
+            __kmp_affinity_proclist = temp_proclist;
+            __kmp_affinity_type = affinity_explicit;
+            __kmp_affinity_gran = affinity_gran_fine;
+# if OMP_40_ENABLED
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+# endif
+        }
+        else {
+            KMP_WARNING( AffSyntaxError, name );
+            if (temp_proclist != NULL) {
+                KMP_INTERNAL_FREE((void *)temp_proclist);
+            }
+        }
+    }
+    else {
+        //
+        // Warning already emitted
+        //
+        __kmp_affinity_type = affinity_none;
+# if OMP_40_ENABLED
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+# endif
+    }
+} // __kmp_stg_parse_gomp_cpu_affinity
+
+# endif /* KMP_GOMP_COMPAT */
+
+
+# if OMP_40_ENABLED
+
+/*-----------------------------------------------------------------------------
+
+The OMP_PLACES proc id list parser. Here is the grammar:
+
+place_list := place
+place_list := place , place_list
+place := num
+place := place : num
+place := place : num : signed
+place := { subplacelist }
+place := ! place                  // (lowest priority)
+subplace_list := subplace
+subplace_list := subplace , subplace_list
+subplace := num
+subplace := num : num
+subplace := num : num : signed
+signed := num
+signed := + signed
+signed := - signed
+
+-----------------------------------------------------------------------------*/
+
+static int
+__kmp_parse_subplace_list( const char *var, const char **scan )
+{
+    const char *next;
+
+    for (;;) {
+        int start, count, stride;
+
+        //
+        // Read in the starting proc id
+        //
+        SKIP_WS(*scan);
+        if ((**scan < '0') || (**scan > '9')) {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        next = *scan;
+        SKIP_DIGITS(next);
+        start = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(start >= 0);
+        *scan = next;
+
+        //
+        // valid follow sets are ',' ':' and '}'
+        //
+        SKIP_WS(*scan);
+        if (**scan == '}') {
+            break;
+        }
+        if (**scan == ',') {
+            (*scan)++;  // skip ','
+            continue;
+        }
+        if (**scan != ':') {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        (*scan)++;      // skip ':'
+
+        //
+        // Read count parameter
+        //
+        SKIP_WS(*scan);
+        if ((**scan < '0') || (**scan > '9')) {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        next = *scan;
+        SKIP_DIGITS(next);
+        count = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(count >= 0);
+        *scan = next;
+
+        //
+        // valid follow sets are ',' ':' and '}'
+        //
+        SKIP_WS(*scan);
+        if (**scan == '}') {
+            break;
+        }
+        if (**scan == ',') {
+            (*scan)++;  // skip ','
+            continue;
+        }
+        if (**scan != ':') {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        (*scan)++;      // skip ':'
+
+        //
+        // Read stride parameter
+        //
+        int sign = +1;
+        for (;;) {
+            SKIP_WS(*scan);
+            if (**scan == '+') {
+                (*scan)++; // skip '+'
+                continue;
+            }
+            if (**scan == '-') {
+                sign *= -1;
+                (*scan)++; // skip '-'
+                continue;
+            }
+            break;
+        }
+        SKIP_WS(*scan);
+        if ((**scan < '0') || (**scan > '9')) {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        next = *scan;
+        SKIP_DIGITS(next);
+        stride = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(stride >= 0);
+        *scan = next;
+        stride *= sign;
+
+        //
+        // valid follow sets are ',' and '}'
+        //
+        SKIP_WS(*scan);
+        if (**scan == '}') {
+            break;
+        }
+        if (**scan == ',') {
+            (*scan)++;  // skip ','
+            continue;
+        }
+
+        KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+        return FALSE;
+    }
+    return TRUE;
+}
+
+static int
+__kmp_parse_place( const char *var, const char ** scan )
+{
+    const char *next;
+
+    //
+    // valid follow sets are '{' '!' and num
+    //
+    SKIP_WS(*scan);
+    if (**scan == '{') {
+        (*scan)++;      // skip '{'
+        if (! __kmp_parse_subplace_list(var, scan)) {
+            return FALSE;
+        }
+        if (**scan != '}') {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        (*scan)++;      // skip '}'
+    }
+    else if (**scan == '!') {
+        (*scan)++;      // skip '!'
+        return __kmp_parse_place(var, scan); //'!' has lower precedence than ':'
+    }
+    else if ((**scan >= '0') && (**scan <= '9')) {
+        next = *scan;
+        SKIP_DIGITS(next);
+        int proc = __kmp_str_to_int(*scan, *next);
+        KMP_ASSERT(proc >= 0);
+        *scan = next;
+    }
+    else {
+        KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+        return FALSE;
+    }
+    return TRUE;
+}
+
+static int
+__kmp_parse_place_list( const char *var, const char *env, char **place_list )
+{
+    const char *scan = env;
+    const char *next = scan;
+
+    for (;;) {
+        int start, count, stride;
+
+        if (! __kmp_parse_place(var, &scan)) {
+            return FALSE;
+        }
+
+        //
+        // valid follow sets are ',' ':' and EOL
+        //
+        SKIP_WS(scan);
+        if (*scan == '\0') {
+            break;
+        }
+        if (*scan == ',') {
+            scan++;     // skip ','
+            continue;
+        }
+        if (*scan != ':') {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        scan++;         // skip ':'
+
+        //
+        // Read count parameter
+        //
+        SKIP_WS(scan);
+        if ((*scan < '0') || (*scan > '9')) {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        next = scan;
+        SKIP_DIGITS(next);
+        count = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT(count >= 0);
+        scan = next;
+
+        //
+        // valid follow sets are ',' ':' and EOL
+        //
+        SKIP_WS(scan);
+        if (*scan == '\0') {
+            break;
+        }
+        if (*scan == ',') {
+            scan++;     // skip ','
+            continue;
+        }
+        if (*scan != ':') {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        scan++;         // skip ':'
+
+        //
+        // Read stride parameter
+        //
+        int sign = +1;
+        for (;;) {
+            SKIP_WS(scan);
+            if (*scan == '+') {
+                scan++; // skip '+'
+                continue;
+            }
+            if (*scan == '-') {
+                sign *= -1;
+                scan++; // skip '-'
+                continue;
+            }
+            break;
+        }
+        SKIP_WS(scan);
+        if ((*scan < '0') || (*scan > '9')) {
+            KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+            return FALSE;
+        }
+        next = scan;
+        SKIP_DIGITS(next);
+        stride = __kmp_str_to_int(scan, *next);
+        KMP_ASSERT(stride >= 0);
+        scan = next;
+        stride *= sign;
+
+        //
+        // valid follow sets are ',' and EOL
+        //
+        SKIP_WS(scan);
+        if (*scan == '\0') {
+            break;
+        }
+        if (*scan == ',') {
+            scan++;     // skip ','
+            continue;
+        }
+
+        KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" );
+        return FALSE;
+    }
+
+    {
+        int len = scan - env;
+        char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char));
+        KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char));
+        retlist[len] = '\0';
+        *place_list = retlist;
+    }
+    return TRUE;
+}
+
+static void
+__kmp_stg_parse_places( char const * name, char const * value, void * data )
+{
+    int count;
+    const char *scan = value;
+    const char *next = scan;
+    const char *kind = "\"threads\"";
+    kmp_setting_t **rivals = (kmp_setting_t **) data;
+    int rc;
+
+    rc = __kmp_stg_check_rivals( name, value, rivals );
+    if ( rc ) {
+        return;
+    }
+
+    //
+    // If OMP_PROC_BIND is not specified but OMP_PLACES is,
+    // then let OMP_PROC_BIND default to true.
+    //
+    if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) {
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+    }
+
+    //__kmp_affinity_num_places = 0;
+
+    if ( __kmp_match_str( "threads", scan, &next ) ) {
+        scan = next;
+        __kmp_affinity_type = affinity_compact;
+        __kmp_affinity_gran = affinity_gran_thread;
+        __kmp_affinity_dups = FALSE;
+        kind = "\"threads\"";
+    }
+    else if ( __kmp_match_str( "cores", scan, &next ) ) {
+        scan = next;
+        __kmp_affinity_type = affinity_compact;
+        __kmp_affinity_gran = affinity_gran_core;
+        __kmp_affinity_dups = FALSE;
+        kind = "\"cores\"";
+    }
+    else if ( __kmp_match_str( "sockets", scan, &next ) ) {
+        scan = next;
+        __kmp_affinity_type = affinity_compact;
+        __kmp_affinity_gran = affinity_gran_package;
+        __kmp_affinity_dups = FALSE;
+        kind = "\"sockets\"";
+    }
+    else {
+        if ( __kmp_affinity_proclist != NULL ) {
+            KMP_INTERNAL_FREE( (void *)__kmp_affinity_proclist );
+            __kmp_affinity_proclist = NULL;
+        }
+        if ( __kmp_parse_place_list( name, value, &__kmp_affinity_proclist ) ) {
+            __kmp_affinity_type = affinity_explicit;
+            __kmp_affinity_gran = affinity_gran_fine;
+            __kmp_affinity_dups = FALSE;
+            if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) {
+                 __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+            }
+        }
+        return;
+    }
+
+    if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) {
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+    }
+
+    SKIP_WS(scan);
+    if ( *scan == '\0' ) {
+        return;
+    }
+
+    //
+    // Parse option count parameter in parentheses
+    //
+    if ( *scan != '(' ) {
+        KMP_WARNING( SyntaxErrorUsing, name, kind );
+        return;
+    }
+    scan++;     // skip '('
+
+    SKIP_WS(scan);
+    next = scan;
+    SKIP_DIGITS(next);
+    count = __kmp_str_to_int(scan, *next);
+    KMP_ASSERT(count >= 0);
+    scan = next;
+
+    SKIP_WS(scan);
+    if ( *scan != ')' ) {
+        KMP_WARNING( SyntaxErrorUsing, name, kind );
+        return;
+    }
+    scan++;     // skip ')'
+
+    SKIP_WS(scan);
+    if ( *scan != '\0' ) {
+        KMP_WARNING( ParseExtraCharsWarn, name, scan );
+    }
+    __kmp_affinity_num_places = count;
+}
+
+static void
+__kmp_stg_print_places( kmp_str_buf_t * buffer, char const * name,
+  void * data )
+{
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s", name );
+    }
+    if ( ( __kmp_nested_proc_bind.used == 0 )
+      || ( __kmp_nested_proc_bind.bind_types == NULL )
+      || ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_false ) ) {
+        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+    }
+    else if ( __kmp_affinity_type == affinity_explicit ) {
+        if ( __kmp_affinity_proclist != NULL ) {
+            __kmp_str_buf_print( buffer, "='%s'\n", __kmp_affinity_proclist );
+        }
+        else {
+            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+        }
+    }
+    else if ( __kmp_affinity_type == affinity_compact ) {
+        int num;
+        if ( __kmp_affinity_num_masks > 0 ) {
+            num = __kmp_affinity_num_masks;
+        }
+        else if ( __kmp_affinity_num_places > 0 ) {
+            num = __kmp_affinity_num_places;
+        }
+        else {
+            num = 0;
+        }
+        if ( __kmp_affinity_gran == affinity_gran_thread ) {
+            if ( num > 0 ) {
+                __kmp_str_buf_print( buffer, "='threads(%d)'\n", num );
+            }
+            else {
+                __kmp_str_buf_print( buffer, "='threads'\n" );
+            }
+        }
+        else if ( __kmp_affinity_gran == affinity_gran_core ) {
+            if ( num > 0 ) {
+                __kmp_str_buf_print( buffer, "='cores(%d)' \n", num );
+            }
+            else {
+                __kmp_str_buf_print( buffer, "='cores'\n" );
+            }
+        }
+        else if ( __kmp_affinity_gran == affinity_gran_package ) {
+            if ( num > 0 ) {
+                __kmp_str_buf_print( buffer, "='sockets(%d)'\n", num );
+            }
+            else {
+                __kmp_str_buf_print( buffer, "='sockets'\n" );
+            }
+        }
+        else {
+            __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+        }
+    }
+    else {
+        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+    }
+}
+
+# endif /* OMP_40_ENABLED */
+
+# if (! OMP_40_ENABLED)
+
+static void
+__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data )
+{
+    int enabled;
+    kmp_setting_t **rivals = (kmp_setting_t **) data;
+    int rc;
+
+    rc = __kmp_stg_check_rivals( name, value, rivals );
+    if ( rc ) {
+        return;
+    }
+
+    //
+    // in OMP 3.1, OMP_PROC_BIND is strictly a boolean
+    //
+    __kmp_stg_parse_bool( name, value, & enabled );
+    if ( enabled ) {
+            //
+            // OMP_PROC_BIND => granularity=fine,scatter on MIC
+            // OMP_PROC_BIND => granularity=core,scatter elsewhere
+            //
+            __kmp_affinity_type = affinity_scatter;
+#  if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+            if( __kmp_mic_type != non_mic )
+                __kmp_affinity_gran = affinity_gran_fine;
+            else
+#  endif
+                __kmp_affinity_gran = affinity_gran_core;
+    }
+    else {
+        __kmp_affinity_type = affinity_none;
+    }
+} // __kmp_parse_proc_bind
+
+# endif /* if (! OMP_40_ENABLED) */
+
+
+static void
+__kmp_stg_parse_topology_method( char const * name, char const * value,
+  void * data ) {
+    if ( __kmp_str_match( "all", 1, value ) ) {
+       __kmp_affinity_top_method = affinity_top_method_all;
+    }
+# if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    else if ( __kmp_str_match( "x2apic id", 9, value )
+      || __kmp_str_match( "x2apic_id", 9, value )
+      || __kmp_str_match( "x2apic-id", 9, value )
+      || __kmp_str_match( "x2apicid", 8, value )
+      || __kmp_str_match( "cpuid leaf 11", 13, value )
+      || __kmp_str_match( "cpuid_leaf_11", 13, value )
+      || __kmp_str_match( "cpuid-leaf-11", 13, value )
+      || __kmp_str_match( "cpuid leaf11", 12, value )
+      || __kmp_str_match( "cpuid_leaf11", 12, value )
+      || __kmp_str_match( "cpuid-leaf11", 12, value )
+      || __kmp_str_match( "cpuidleaf 11", 12, value )
+      || __kmp_str_match( "cpuidleaf_11", 12, value )
+      || __kmp_str_match( "cpuidleaf-11", 12, value )
+      || __kmp_str_match( "cpuidleaf11", 11, value )
+      || __kmp_str_match( "cpuid 11", 8, value )
+      || __kmp_str_match( "cpuid_11", 8, value )
+      || __kmp_str_match( "cpuid-11", 8, value )
+      || __kmp_str_match( "cpuid11", 7, value )
+      || __kmp_str_match( "leaf 11", 7, value )
+      || __kmp_str_match( "leaf_11", 7, value )
+      || __kmp_str_match( "leaf-11", 7, value )
+      || __kmp_str_match( "leaf11", 6, value ) ) {
+        __kmp_affinity_top_method = affinity_top_method_x2apicid;
+    }
+    else if ( __kmp_str_match( "apic id", 7, value )
+      || __kmp_str_match( "apic_id", 7, value )
+      || __kmp_str_match( "apic-id", 7, value )
+      || __kmp_str_match( "apicid", 6, value )
+      || __kmp_str_match( "cpuid leaf 4", 12, value )
+      || __kmp_str_match( "cpuid_leaf_4", 12, value )
+      || __kmp_str_match( "cpuid-leaf-4", 12, value )
+      || __kmp_str_match( "cpuid leaf4", 11, value )
+      || __kmp_str_match( "cpuid_leaf4", 11, value )
+      || __kmp_str_match( "cpuid-leaf4", 11, value )
+      || __kmp_str_match( "cpuidleaf 4", 11, value )
+      || __kmp_str_match( "cpuidleaf_4", 11, value )
+      || __kmp_str_match( "cpuidleaf-4", 11, value )
+      || __kmp_str_match( "cpuidleaf4", 10, value )
+      || __kmp_str_match( "cpuid 4", 7, value )
+      || __kmp_str_match( "cpuid_4", 7, value )
+      || __kmp_str_match( "cpuid-4", 7, value )
+      || __kmp_str_match( "cpuid4", 6, value )
+      || __kmp_str_match( "leaf 4", 6, value )
+      || __kmp_str_match( "leaf_4", 6, value )
+      || __kmp_str_match( "leaf-4", 6, value )
+      || __kmp_str_match( "leaf4", 5, value ) ) {
+        __kmp_affinity_top_method = affinity_top_method_apicid;
+    }
+# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+    else if ( __kmp_str_match( "/proc/cpuinfo", 2, value )
+      || __kmp_str_match( "cpuinfo", 5, value )) {
+        __kmp_affinity_top_method = affinity_top_method_cpuinfo;
+    }
+# if KMP_GROUP_AFFINITY
+    else if ( __kmp_str_match( "group", 1, value ) ) {
+        __kmp_affinity_top_method = affinity_top_method_group;
+    }
+# endif /* KMP_GROUP_AFFINITY */
+    else if ( __kmp_str_match( "flat", 1, value ) ) {
+        __kmp_affinity_top_method = affinity_top_method_flat;
+    }
+# if KMP_USE_HWLOC
+    else if ( __kmp_str_match( "hwloc", 1, value) ) {
+        __kmp_affinity_top_method = affinity_top_method_hwloc;
+    }
+# endif
+    else {
+        KMP_WARNING( StgInvalidValue, name, value );
+    }
+} // __kmp_stg_parse_topology_method
+
+static void
+__kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name,
+  void * data ) {
+# if KMP_DEBUG
+    char const * value = NULL;
+
+    switch ( __kmp_affinity_top_method ) {
+        case affinity_top_method_default:
+        value = "default";
+        break;
+
+        case affinity_top_method_all:
+        value = "all";
+        break;
+
+#  if KMP_ARCH_X86 || KMP_ARCH_X86_64
+        case affinity_top_method_x2apicid:
+        value = "x2APIC id";
+        break;
+
+        case affinity_top_method_apicid:
+        value = "APIC id";
+        break;
+#  endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+        case affinity_top_method_cpuinfo:
+        value = "cpuinfo";
+        break;
+
+#  if KMP_GROUP_AFFINITY
+        case affinity_top_method_group:
+        value = "group";
+        break;
+#  endif /* KMP_GROUP_AFFINITY */
+
+        case affinity_top_method_flat:
+        value = "flat";
+        break;
+    }
+
+    if ( value != NULL ) {
+        __kmp_stg_print_str( buffer, name, value );
+    }
+# endif /* KMP_DEBUG */
+} // __kmp_stg_print_topology_method
+
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+
+#if OMP_40_ENABLED
+
+//
+// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X*
+// OMP_PLACES / place-partition-var is not.
+//
+static void
+__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data )
+{
+    kmp_setting_t **rivals = (kmp_setting_t **) data;
+    int rc;
+
+    rc = __kmp_stg_check_rivals( name, value, rivals );
+    if ( rc ) {
+        return;
+    }
+
+    //
+    // in OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types.
+    //
+    KMP_DEBUG_ASSERT( (__kmp_nested_proc_bind.bind_types != NULL)
+      && ( __kmp_nested_proc_bind.used > 0 ) );
+
+    const char *buf = value;
+    const char *next;
+    int num;
+    SKIP_WS( buf );
+    if ( (*buf >= '0') && (*buf <= '9') ) {
+        next = buf;
+        SKIP_DIGITS( next );
+        num = __kmp_str_to_int( buf, *next );
+        KMP_ASSERT( num >= 0 );
+        buf = next;
+        SKIP_WS( buf );
+    }
+    else {
+        num = -1;
+    }
+
+    next = buf;
+    if ( __kmp_match_str( "disabled", buf, &next ) ) {
+        buf = next;
+        SKIP_WS( buf );
+# if KMP_AFFINITY_SUPPORTED
+        __kmp_affinity_type = affinity_disabled;
+# endif /* KMP_AFFINITY_SUPPORTED */
+        __kmp_nested_proc_bind.used = 1;
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+    }
+    else if ( ( num == (int)proc_bind_false )
+      || __kmp_match_str( "false", buf, &next ) ) {
+        buf = next;
+        SKIP_WS( buf );
+# if KMP_AFFINITY_SUPPORTED
+        __kmp_affinity_type = affinity_none;
+# endif /* KMP_AFFINITY_SUPPORTED */
+        __kmp_nested_proc_bind.used = 1;
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+    }
+    else if ( ( num == (int)proc_bind_true )
+      || __kmp_match_str( "true", buf, &next ) ) {
+        buf = next;
+        SKIP_WS( buf );
+        __kmp_nested_proc_bind.used = 1;
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+    }
+    else {
+        //
+        // Count the number of values in the env var string
+        //
+        const char *scan;
+        int nelem = 1;
+        for ( scan = buf; *scan != '\0'; scan++ ) {
+            if ( *scan == ',' ) {
+                nelem++;
+            }
+        }
+
+        //
+        // Create / expand the nested proc_bind array as needed
+        //
+        if ( __kmp_nested_proc_bind.size < nelem ) {
+            __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *)
+                KMP_INTERNAL_REALLOC( __kmp_nested_proc_bind.bind_types,
+                sizeof(kmp_proc_bind_t) * nelem );
+            if ( __kmp_nested_proc_bind.bind_types == NULL ) {
+                KMP_FATAL( MemoryAllocFailed );
+            }
+            __kmp_nested_proc_bind.size = nelem;
+        }
+        __kmp_nested_proc_bind.used = nelem;
+
+        //
+        // Save values in the nested proc_bind array
+        //
+        int i = 0;
+        for (;;) {
+            enum kmp_proc_bind_t bind;
+
+            if ( ( num == (int)proc_bind_master )
+              || __kmp_match_str( "master", buf, &next ) ) {
+                buf = next;
+                SKIP_WS( buf );
+                bind = proc_bind_master;
+            }
+            else if ( ( num == (int)proc_bind_close )
+              || __kmp_match_str( "close", buf, &next ) ) {
+                buf = next;
+                SKIP_WS( buf );
+                bind = proc_bind_close;
+            }
+            else if ( ( num == (int)proc_bind_spread )
+              || __kmp_match_str( "spread", buf, &next ) ) {
+                buf = next;
+                SKIP_WS( buf );
+                bind = proc_bind_spread;
+            }
+            else {
+                KMP_WARNING( StgInvalidValue, name, value );
+                __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+                __kmp_nested_proc_bind.used = 1;
+                return;
+            }
+
+            __kmp_nested_proc_bind.bind_types[i++] = bind;
+            if ( i >= nelem ) {
+                break;
+            }
+            KMP_DEBUG_ASSERT( *buf == ',' );
+            buf++;
+            SKIP_WS( buf );
+
+            //
+            // Read next value if it was specified as an integer
+            //
+            if ( (*buf >= '0') && (*buf <= '9') ) {
+                next = buf;
+                SKIP_DIGITS( next );
+                num = __kmp_str_to_int( buf, *next );
+                KMP_ASSERT( num >= 0 );
+                buf = next;
+                SKIP_WS( buf );
+            }
+            else {
+                num = -1;
+            }
+        }
+        SKIP_WS( buf );
+    }
+    if ( *buf != '\0' ) {
+        KMP_WARNING( ParseExtraCharsWarn, name, buf );
+    }
+}
+
+
+static void
+__kmp_stg_print_proc_bind( kmp_str_buf_t * buffer, char const * name,
+  void * data )
+{
+    int nelem = __kmp_nested_proc_bind.used;
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME;
+    } else {
+        __kmp_str_buf_print( buffer, "   %s", name );
+    }
+    if ( nelem == 0 ) {
+        __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) );
+    }
+    else {
+        int i;
+        __kmp_str_buf_print( buffer, "='", name );
+        for ( i = 0; i < nelem; i++ ) {
+            switch ( __kmp_nested_proc_bind.bind_types[i] ) {
+                case proc_bind_false:
+                __kmp_str_buf_print( buffer, "false" );
+                break;
+
+                case proc_bind_true:
+                __kmp_str_buf_print( buffer, "true" );
+                break;
+
+                case proc_bind_master:
+                __kmp_str_buf_print( buffer, "master" );
+                break;
+
+                case proc_bind_close:
+                __kmp_str_buf_print( buffer, "close" );
+                break;
+
+                case proc_bind_spread:
+                __kmp_str_buf_print( buffer, "spread" );
+                break;
+
+                case proc_bind_intel:
+                __kmp_str_buf_print( buffer, "intel" );
+                break;
+
+                case proc_bind_default:
+                __kmp_str_buf_print( buffer, "default" );
+                break;
+            }
+            if ( i < nelem - 1 ) {
+                __kmp_str_buf_print( buffer, "," );
+            }
+        }
+        __kmp_str_buf_print( buffer, "'\n" );
+    }
+}
+
+#endif /* OMP_40_ENABLED */
+
+
+// -------------------------------------------------------------------------------------------------
+// OMP_DYNAMIC
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_omp_dynamic( char const * name, char const * value, void * data )
+{
+    __kmp_stg_parse_bool( name, value, & (__kmp_global.g.g_dynamic) );
+} // __kmp_stg_parse_omp_dynamic
+
+static void
+__kmp_stg_print_omp_dynamic( kmp_str_buf_t * buffer, char const * name, void * data )
+{
+    __kmp_stg_print_bool( buffer, name, __kmp_global.g.g_dynamic );
+} // __kmp_stg_print_omp_dynamic
+
+static void
+__kmp_stg_parse_kmp_dynamic_mode( char const * name, char const * value, void * data )
+{
+    if ( TCR_4(__kmp_init_parallel) ) {
+        KMP_WARNING( EnvParallelWarn, name );
+        __kmp_env_toPrint( name, 0 );
+        return;
+    }
+#ifdef USE_LOAD_BALANCE
+    else if ( __kmp_str_match( "load balance", 2, value )
+      || __kmp_str_match( "load_balance", 2, value )
+      || __kmp_str_match( "load-balance", 2, value )
+      || __kmp_str_match( "loadbalance", 2, value )
+      || __kmp_str_match( "balance", 1, value ) ) {
+        __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
+    }
+#endif /* USE_LOAD_BALANCE */
+    else if ( __kmp_str_match( "thread limit", 1, value )
+      || __kmp_str_match( "thread_limit", 1, value )
+      || __kmp_str_match( "thread-limit", 1, value )
+      || __kmp_str_match( "threadlimit", 1, value )
+      || __kmp_str_match( "limit", 2, value ) ) {
+        __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
+    }
+    else if ( __kmp_str_match( "random", 1, value ) ) {
+        __kmp_global.g.g_dynamic_mode = dynamic_random;
+    }
+    else {
+        KMP_WARNING( StgInvalidValue, name, value );
+    }
+} //__kmp_stg_parse_kmp_dynamic_mode
+
+static void
+__kmp_stg_print_kmp_dynamic_mode( kmp_str_buf_t * buffer, char const * name, void * data )
+{
+#if KMP_DEBUG
+    if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
+        __kmp_str_buf_print( buffer, "   %s: %s \n", name, KMP_I18N_STR( NotDefined ) );
+    }
+# ifdef USE_LOAD_BALANCE
+    else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
+        __kmp_stg_print_str( buffer, name, "load balance" );
+    }
+# endif /* USE_LOAD_BALANCE */
+    else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
+        __kmp_stg_print_str( buffer, name, "thread limit" );
+    }
+    else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
+        __kmp_stg_print_str( buffer, name, "random" );
+    }
+    else {
+        KMP_ASSERT(0);
+    }
+#endif /* KMP_DEBUG */
+} // __kmp_stg_print_kmp_dynamic_mode
+
+
+#ifdef USE_LOAD_BALANCE
+
+// -------------------------------------------------------------------------------------------------
+// KMP_LOAD_BALANCE_INTERVAL
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_ld_balance_interval( char const * name, char const * value, void * data )
+{
+    double interval = __kmp_convert_to_double( value );
+    if ( interval >= 0 ) {
+        __kmp_load_balance_interval = interval;
+    } else {
+        KMP_WARNING( StgInvalidValue, name, value );
+    }; // if
+} // __kmp_stg_parse_load_balance_interval
+
+static void
+__kmp_stg_print_ld_balance_interval( kmp_str_buf_t * buffer, char const * name, void * data ) {
+#if KMP_DEBUG
+    __kmp_str_buf_print( buffer, "   %s=%8.6f\n", name, __kmp_load_balance_interval );
+#endif /* KMP_DEBUG */
+} // __kmp_stg_print_load_balance_interval
+
+#endif /* USE_LOAD_BALANCE */
+
+// -------------------------------------------------------------------------------------------------
+// KMP_INIT_AT_FORK
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_init_at_fork( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_need_register_atfork );
+    if ( __kmp_need_register_atfork ) {
+        __kmp_need_register_atfork_specified = TRUE;
+    };
+} // __kmp_stg_parse_init_at_fork
+
+static void
+__kmp_stg_print_init_at_fork( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_need_register_atfork_specified );
+} // __kmp_stg_print_init_at_fork
+
+// -------------------------------------------------------------------------------------------------
+// KMP_SCHEDULE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_schedule( char const * name, char const * value, void * data ) {
+
+    if ( value != NULL ) {
+        size_t length = KMP_STRLEN( value );
+        if ( length > INT_MAX ) {
+            KMP_WARNING( LongValue, name );
+        } else {
+            char *semicolon;
+            if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'' )
+                KMP_WARNING( UnbalancedQuotes, name );
+            do {
+                char sentinel;
+
+                semicolon = (char *) strchr( value, ';' );
+                if( *value && semicolon != value ) {
+                    char *comma = (char *) strchr( value, ',' );
+
+                    if ( comma ) {
+                        ++comma;
+                        sentinel = ',';
+                    } else
+                        sentinel = ';';
+                    if ( !__kmp_strcasecmp_with_sentinel( "static", value, sentinel ) ) {
+                        if( !__kmp_strcasecmp_with_sentinel( "greedy", comma, ';' ) ) {
+                            __kmp_static = kmp_sch_static_greedy;
+                            continue;
+                        } else if( !__kmp_strcasecmp_with_sentinel( "balanced", comma, ';' ) ) {
+                            __kmp_static = kmp_sch_static_balanced;
+                            continue;
+                        }
+                    } else if ( !__kmp_strcasecmp_with_sentinel( "guided", value, sentinel ) ) {
+                        if ( !__kmp_strcasecmp_with_sentinel( "iterative", comma, ';' ) ) {
+                            __kmp_guided = kmp_sch_guided_iterative_chunked;
+                            continue;
+                        } else if ( !__kmp_strcasecmp_with_sentinel( "analytical", comma, ';' ) ) {
+                            /* analytical not allowed for too many threads */
+                            __kmp_guided = kmp_sch_guided_analytical_chunked;
+                            continue;
+                        }
+                    }
+                    KMP_WARNING( InvalidClause, name, value );
+                } else
+                    KMP_WARNING( EmptyClause, name );
+            } while ( (value = semicolon ? semicolon + 1 : NULL) );
+        }
+    }; // if
+
+} // __kmp_stg_parse__schedule
+
+static void
+__kmp_stg_print_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME_EX(name);
+    } else {
+        __kmp_str_buf_print( buffer, "   %s='", name );
+    }
+    if ( __kmp_static == kmp_sch_static_greedy ) {
+        __kmp_str_buf_print( buffer, "%s", "static,greedy");
+    } else if ( __kmp_static == kmp_sch_static_balanced ) {
+        __kmp_str_buf_print ( buffer, "%s", "static,balanced");
+    }
+    if ( __kmp_guided == kmp_sch_guided_iterative_chunked ) {
+        __kmp_str_buf_print( buffer, ";%s'\n", "guided,iterative");
+    } else if ( __kmp_guided == kmp_sch_guided_analytical_chunked ) {
+        __kmp_str_buf_print( buffer, ";%s'\n", "guided,analytical");
+    }
+} // __kmp_stg_print_schedule
+
+// -------------------------------------------------------------------------------------------------
+// OMP_SCHEDULE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_omp_schedule( char const * name, char const * value, void * data )
+{
+    size_t      length;
+    if( value ) {
+        length = KMP_STRLEN( value );
+        if( length ) {
+            char *comma = (char *) strchr( value, ',' );
+            if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'')
+                KMP_WARNING( UnbalancedQuotes, name );
+            /* get the specified scheduling style */
+            if (!__kmp_strcasecmp_with_sentinel("dynamic", value, ','))          /* DYNAMIC */
+                __kmp_sched = kmp_sch_dynamic_chunked;
+            else if (!__kmp_strcasecmp_with_sentinel("guided", value, ','))      /* GUIDED */
+                __kmp_sched = kmp_sch_guided_chunked;
+// AC: TODO: add AUTO schedule, and pprobably remove TRAPEZOIDAL (OMP 3.0 does not allow it)
+            else if (!__kmp_strcasecmp_with_sentinel("auto", value, ',')) {       /* AUTO */
+                __kmp_sched = kmp_sch_auto;
+                if( comma ) {
+                    __kmp_msg( kmp_ms_warning, KMP_MSG( IgnoreChunk, name, comma ), __kmp_msg_null );
+                    comma = NULL;
+                }
+            }
+            else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", value, ',')) /* TRAPEZOIDAL */
+                __kmp_sched = kmp_sch_trapezoidal;
+            else if (!__kmp_strcasecmp_with_sentinel("static", value, ','))      /* STATIC */
+                __kmp_sched = kmp_sch_static;
+#ifdef KMP_STATIC_STEAL_ENABLED
+            else if (KMP_ARCH_X86_64 &&
+                     !__kmp_strcasecmp_with_sentinel("static_steal", value, ','))
+                __kmp_sched = kmp_sch_static_steal;
+#endif
+            else {
+                KMP_WARNING( StgInvalidValue, name, value );
+                value = NULL; /* skip processing of comma */
+            }
+            if( value && comma ) {
+                __kmp_env_chunk = TRUE;
+
+                if(__kmp_sched == kmp_sch_static)
+                    __kmp_sched = kmp_sch_static_chunked;
+                ++comma;
+                __kmp_chunk = __kmp_str_to_int( comma, 0 );
+                if ( __kmp_chunk < 1 ) {
+                    __kmp_chunk = KMP_DEFAULT_CHUNK;
+                    __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidChunk, name, comma ), __kmp_msg_null );
+                    KMP_INFORM( Using_int_Value, name, __kmp_chunk );
+// AC: next block commented out until KMP_DEFAULT_CHUNK != KMP_MIN_CHUNK (to improve code coverage :)
+//     The default chunk size is 1 according to standard, thus making KMP_MIN_CHUNK not 1 we would introduce mess:
+//     wrong chunk becomes 1, but it will be impossible to explicitely set 1, because it becomes KMP_MIN_CHUNK...
+//                } else if ( __kmp_chunk < KMP_MIN_CHUNK ) {
+//                    __kmp_chunk = KMP_MIN_CHUNK;
+                } else if ( __kmp_chunk > KMP_MAX_CHUNK ) {
+                    __kmp_chunk = KMP_MAX_CHUNK;
+                    __kmp_msg( kmp_ms_warning, KMP_MSG( LargeChunk, name, comma ), __kmp_msg_null );
+                    KMP_INFORM( Using_int_Value, name, __kmp_chunk );
+                }
+            } else
+                __kmp_env_chunk = FALSE;
+        } else
+            KMP_WARNING( EmptyString, name );
+    }
+    K_DIAG(1, ("__kmp_static == %d\n", __kmp_static))
+    K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided))
+    K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched))
+    K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk))
+} // __kmp_stg_parse_omp_schedule
+
+static void
+__kmp_stg_print_omp_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME_EX(name);
+    } else {
+        __kmp_str_buf_print( buffer, "   %s='", name );
+    }
+    if ( __kmp_chunk ) {
+        switch ( __kmp_sched ) {
+            case kmp_sch_dynamic_chunked:
+                __kmp_str_buf_print( buffer, "%s,%d'\n", "dynamic", __kmp_chunk);
+                break;
+            case kmp_sch_guided_iterative_chunked:
+            case kmp_sch_guided_analytical_chunked:
+                __kmp_str_buf_print( buffer, "%s,%d'\n", "guided", __kmp_chunk);
+                break;
+            case kmp_sch_trapezoidal:
+                __kmp_str_buf_print( buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk);
+                break;
+            case kmp_sch_static:
+            case kmp_sch_static_chunked:
+            case kmp_sch_static_balanced:
+            case kmp_sch_static_greedy:
+                __kmp_str_buf_print( buffer, "%s,%d'\n", "static", __kmp_chunk);
+                break;
+            case kmp_sch_static_steal:
+                __kmp_str_buf_print( buffer, "%s,%d'\n", "static_steal", __kmp_chunk);
+                break;
+            case kmp_sch_auto:
+                __kmp_str_buf_print( buffer, "%s,%d'\n", "auto", __kmp_chunk);
+                break;
+        }
+    } else {
+        switch ( __kmp_sched ) {
+            case kmp_sch_dynamic_chunked:
+                __kmp_str_buf_print( buffer, "%s'\n", "dynamic");
+                break;
+            case kmp_sch_guided_iterative_chunked:
+            case kmp_sch_guided_analytical_chunked:
+                __kmp_str_buf_print( buffer, "%s'\n", "guided");
+                break;
+            case kmp_sch_trapezoidal:
+                __kmp_str_buf_print( buffer, "%s'\n", "trapezoidal");
+                break;
+            case kmp_sch_static:
+            case kmp_sch_static_chunked:
+            case kmp_sch_static_balanced:
+            case kmp_sch_static_greedy:
+                __kmp_str_buf_print( buffer, "%s'\n", "static");
+                break;
+            case kmp_sch_static_steal:
+                __kmp_str_buf_print( buffer, "%s'\n", "static_steal");
+                break;
+            case kmp_sch_auto:
+                __kmp_str_buf_print( buffer, "%s'\n", "auto");
+                break;
+        }
+    }
+} // __kmp_stg_print_omp_schedule
+
+// -------------------------------------------------------------------------------------------------
+// KMP_ATOMIC_MODE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_atomic_mode( char const * name, char const * value, void * data ) {
+    // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP compatibility mode.
+    int mode = 0;
+    int max  = 1;
+    #ifdef KMP_GOMP_COMPAT
+        max = 2;
+    #endif /* KMP_GOMP_COMPAT */
+    __kmp_stg_parse_int( name, value, 0, max, & mode );
+    // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use
+    // 0 rather that max value.
+    if ( mode > 0 ) {
+        __kmp_atomic_mode = mode;
+    }; // if
+} // __kmp_stg_parse_atomic_mode
+
+static void
+__kmp_stg_print_atomic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_atomic_mode );
+} // __kmp_stg_print_atomic_mode
+
+
+// -------------------------------------------------------------------------------------------------
+// KMP_CONSISTENCY_CHECK
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_consistency_check( char const * name, char const * value, void * data ) {
+    if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) {
+        // Note, this will not work from kmp_set_defaults because th_cons stack was not allocated
+        // for existed thread(s) thus the first __kmp_push_<construct> will break with assertion.
+        // TODO: allocate th_cons if called from kmp_set_defaults.
+        __kmp_env_consistency_check = TRUE;
+    } else if ( ! __kmp_strcasecmp_with_sentinel( "none", value, 0 ) ) {
+        __kmp_env_consistency_check = FALSE;
+    } else {
+        KMP_WARNING( StgInvalidValue, name, value );
+    }; // if
+} // __kmp_stg_parse_consistency_check
+
+static void
+__kmp_stg_print_consistency_check( kmp_str_buf_t * buffer, char const * name, void * data ) {
+#if KMP_DEBUG
+    const char *value = NULL;
+
+    if ( __kmp_env_consistency_check ) {
+        value = "all";
+    } else {
+        value = "none";
+    }
+
+    if ( value != NULL ) {
+        __kmp_stg_print_str( buffer, name, value );
+    }
+#endif /* KMP_DEBUG */
+} // __kmp_stg_print_consistency_check
+
+
+#if USE_ITT_BUILD
+// -------------------------------------------------------------------------------------------------
+// KMP_ITT_PREPARE_DELAY
+// -------------------------------------------------------------------------------------------------
+
+#if USE_ITT_NOTIFY
+
+static void
+__kmp_stg_parse_itt_prepare_delay( char const * name, char const * value, void * data )
+{
+    // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop iterations.
+    int delay = 0;
+    __kmp_stg_parse_int( name, value, 0, INT_MAX, & delay );
+    __kmp_itt_prepare_delay = delay;
+} // __kmp_str_parse_itt_prepare_delay
+
+static void
+__kmp_stg_print_itt_prepare_delay( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_uint64( buffer, name, __kmp_itt_prepare_delay );
+
+} // __kmp_str_print_itt_prepare_delay
+
+#endif // USE_ITT_NOTIFY
+#endif /* USE_ITT_BUILD */
+
+// -------------------------------------------------------------------------------------------------
+// KMP_MALLOC_POOL_INCR
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_malloc_pool_incr( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_size(
+            name,
+            value,
+            KMP_MIN_MALLOC_POOL_INCR,
+            KMP_MAX_MALLOC_POOL_INCR,
+            NULL,
+            & __kmp_malloc_pool_incr,
+            1
+        );
+} // __kmp_stg_parse_malloc_pool_incr
+
+static void
+__kmp_stg_print_malloc_pool_incr( kmp_str_buf_t * buffer, char const * name, void * data ) {
+       __kmp_stg_print_size( buffer, name, __kmp_malloc_pool_incr );
+
+} // _kmp_stg_print_malloc_pool_incr
+
+
+#ifdef KMP_DEBUG
+
+// -------------------------------------------------------------------------------------------------
+// KMP_PAR_RANGE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_par_range_env( char const * name, char const * value, void * data ) {
+        __kmp_stg_parse_par_range(
+            name,
+            value,
+            & __kmp_par_range,
+            __kmp_par_range_routine,
+            __kmp_par_range_filename,
+            & __kmp_par_range_lb,
+            & __kmp_par_range_ub
+        );
+} // __kmp_stg_parse_par_range_env
+
+static void
+__kmp_stg_print_par_range_env( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if (__kmp_par_range != 0) {
+        __kmp_stg_print_str( buffer, name, par_range_to_print );
+    }
+} // __kmp_stg_print_par_range_env
+
+// -------------------------------------------------------------------------------------------------
+// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_yield_cycle( char const * name, char const * value, void * data ) {
+    int flag = __kmp_yield_cycle;
+    __kmp_stg_parse_bool( name, value, & flag );
+    __kmp_yield_cycle = flag;
+} // __kmp_stg_parse_yield_cycle
+
+static void
+__kmp_stg_print_yield_cycle( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_yield_cycle );
+} // __kmp_stg_print_yield_cycle
+
+static void
+__kmp_stg_parse_yield_on( char const * name, char const * value, void * data ) {
+        __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_on_count );
+} // __kmp_stg_parse_yield_on
+
+static void
+__kmp_stg_print_yield_on( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_yield_on_count );
+} // __kmp_stg_print_yield_on
+
+static void
+__kmp_stg_parse_yield_off( char const * name, char const * value, void * data ) {
+        __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_off_count );
+} // __kmp_stg_parse_yield_off
+
+static void
+__kmp_stg_print_yield_off( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_yield_off_count );
+} // __kmp_stg_print_yield_off
+
+#endif
+
+// -------------------------------------------------------------------------------------------------
+// KMP_INIT_WAIT, KMP_NEXT_WAIT
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_init_wait( char const * name, char const * value, void * data ) {
+   int wait;
+   KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 );
+   wait = __kmp_init_wait / 2;
+    __kmp_stg_parse_int( name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, & wait );
+    __kmp_init_wait = wait * 2;
+    KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 );
+    __kmp_yield_init = __kmp_init_wait;
+} // __kmp_stg_parse_init_wait
+
+static void
+__kmp_stg_print_init_wait( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_init_wait );
+} // __kmp_stg_print_init_wait
+
+static void
+__kmp_stg_parse_next_wait( char const * name, char const * value, void * data ) {
+    int wait;
+    KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 );
+    wait = __kmp_next_wait / 2;
+    __kmp_stg_parse_int( name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, & wait );
+    __kmp_next_wait = wait * 2;
+    KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 );
+    __kmp_yield_next = __kmp_next_wait;
+} // __kmp_stg_parse_next_wait
+
+static void
+__kmp_stg_print_next_wait( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_next_wait );
+} //__kmp_stg_print_next_wait
+
+
+// -------------------------------------------------------------------------------------------------
+// KMP_GTID_MODE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_gtid_mode( char const * name, char const * value, void * data ) {
+    //
+    // Modes:
+    //   0 -- do not change default
+    //   1 -- sp search
+    //   2 -- use "keyed" TLS var, i.e.
+    //        pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS)
+    //   3 -- __declspec(thread) TLS var in tdata section
+    //
+    int mode = 0;
+    int max  = 2;
+    #ifdef KMP_TDATA_GTID
+        max = 3;
+    #endif /* KMP_TDATA_GTID */
+    __kmp_stg_parse_int( name, value, 0, max, & mode );
+    // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use
+    // 0 rather that max value.
+    if ( mode == 0 ) {
+        __kmp_adjust_gtid_mode = TRUE;
+    }
+    else {
+        __kmp_gtid_mode = mode;
+        __kmp_adjust_gtid_mode = FALSE;
+    }; // if
+} // __kmp_str_parse_gtid_mode
+
+static void
+__kmp_stg_print_gtid_mode( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if ( __kmp_adjust_gtid_mode ) {
+        __kmp_stg_print_int( buffer, name, 0 );
+    }
+    else {
+        __kmp_stg_print_int( buffer, name, __kmp_gtid_mode );
+    }
+} // __kmp_stg_print_gtid_mode
+
+
+// -------------------------------------------------------------------------------------------------
+// KMP_NUM_LOCKS_IN_BLOCK
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_lock_block( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int( name, value, 0, KMP_INT_MAX, & __kmp_num_locks_in_block );
+} // __kmp_str_parse_lock_block
+
+static void
+__kmp_stg_print_lock_block( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_num_locks_in_block );
+} // __kmp_stg_print_lock_block
+
+// -------------------------------------------------------------------------------------------------
+// KMP_LOCK_KIND
+// -------------------------------------------------------------------------------------------------
+
+#if KMP_USE_DYNAMIC_LOCK
+# define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a)
+#else
+# define KMP_STORE_LOCK_SEQ(a)
+#endif
+
+static void
+__kmp_stg_parse_lock_kind( char const * name, char const * value, void * data ) {
+    if ( __kmp_init_user_locks ) {
+        KMP_WARNING( EnvLockWarn, name );
+        return;
+    }
+
+    if ( __kmp_str_match( "tas", 2, value )
+      || __kmp_str_match( "test and set", 2, value )
+      || __kmp_str_match( "test_and_set", 2, value )
+      || __kmp_str_match( "test-and-set", 2, value )
+      || __kmp_str_match( "test andset", 2, value )
+      || __kmp_str_match( "test_andset", 2, value )
+      || __kmp_str_match( "test-andset", 2, value )
+      || __kmp_str_match( "testand set", 2, value )
+      || __kmp_str_match( "testand_set", 2, value )
+      || __kmp_str_match( "testand-set", 2, value )
+      || __kmp_str_match( "testandset", 2, value ) ) {
+        __kmp_user_lock_kind = lk_tas;
+        KMP_STORE_LOCK_SEQ(tas);
+    }
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+    else if ( __kmp_str_match( "futex", 1, value ) ) {
+        if ( __kmp_futex_determine_capable() ) {
+            __kmp_user_lock_kind = lk_futex;
+            KMP_STORE_LOCK_SEQ(futex);
+        }
+        else {
+            KMP_WARNING( FutexNotSupported, name, value );
+        }
+    }
+#endif
+    else if ( __kmp_str_match( "ticket", 2, value ) ) {
+        __kmp_user_lock_kind = lk_ticket;
+        KMP_STORE_LOCK_SEQ(ticket);
+    }
+    else if ( __kmp_str_match( "queuing", 1, value )
+      || __kmp_str_match( "queue", 1, value ) ) {
+        __kmp_user_lock_kind = lk_queuing;
+        KMP_STORE_LOCK_SEQ(queuing);
+    }
+    else if ( __kmp_str_match( "drdpa ticket", 1, value )
+      || __kmp_str_match( "drdpa_ticket", 1, value )
+      || __kmp_str_match( "drdpa-ticket", 1, value )
+      || __kmp_str_match( "drdpaticket", 1, value )
+      || __kmp_str_match( "drdpa", 1, value ) ) {
+        __kmp_user_lock_kind = lk_drdpa;
+        KMP_STORE_LOCK_SEQ(drdpa);
+    }
+#if KMP_USE_ADAPTIVE_LOCKS
+    else if ( __kmp_str_match( "adaptive", 1, value )  ) {
+        if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here?
+            __kmp_user_lock_kind = lk_adaptive;
+            KMP_STORE_LOCK_SEQ(adaptive);
+        } else {
+            KMP_WARNING( AdaptiveNotSupported, name, value );
+            __kmp_user_lock_kind = lk_queuing;
+            KMP_STORE_LOCK_SEQ(queuing);
+        }
+    }
+#endif // KMP_USE_ADAPTIVE_LOCKS
+#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
+    else if ( __kmp_str_match("rtm", 1, value) ) {
+        if ( __kmp_cpuinfo.rtm ) {
+            __kmp_user_lock_kind = lk_rtm;
+            KMP_STORE_LOCK_SEQ(rtm);
+        } else {
+            KMP_WARNING( AdaptiveNotSupported, name, value );
+            __kmp_user_lock_kind = lk_queuing;
+            KMP_STORE_LOCK_SEQ(queuing);
+        }
+    }
+    else if ( __kmp_str_match("hle", 1, value) ) {
+        __kmp_user_lock_kind = lk_hle;
+        KMP_STORE_LOCK_SEQ(hle);
+    }
+#endif
+    else {
+        KMP_WARNING( StgInvalidValue, name, value );
+    }
+}
+
+static void
+__kmp_stg_print_lock_kind( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    const char *value = NULL;
+
+    switch ( __kmp_user_lock_kind ) {
+        case lk_default:
+        value = "default";
+        break;
+
+        case lk_tas:
+        value = "tas";
+        break;
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+        case lk_futex:
+        value = "futex";
+        break;
+#endif
+
+#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
+        case lk_rtm:
+        value = "rtm";
+        break;
+
+        case lk_hle:
+        value = "hle";
+        break;
+#endif
+
+        case lk_ticket:
+        value = "ticket";
+        break;
+
+        case lk_queuing:
+        value = "queuing";
+        break;
+
+        case lk_drdpa:
+        value = "drdpa";
+        break;
+#if KMP_USE_ADAPTIVE_LOCKS
+        case lk_adaptive:
+        value = "adaptive";
+        break;
+#endif
+    }
+
+    if ( value != NULL ) {
+        __kmp_stg_print_str( buffer, name, value );
+    }
+}
+
+#if KMP_USE_ADAPTIVE_LOCKS
+
+// -------------------------------------------------------------------------------------------------
+// KMP_ADAPTIVE_LOCK_PROPS, KMP_SPECULATIVE_STATSFILE
+// -------------------------------------------------------------------------------------------------
+
+// Parse out values for the tunable parameters from a string of the form
+// KMP_ADAPTIVE_LOCK_PROPS=max_soft_retries[,max_badness]
+static void
+__kmp_stg_parse_adaptive_lock_props( const char *name, const char *value, void *data )
+{
+    int max_retries = 0;
+    int max_badness = 0;
+
+    const char *next = value;
+
+    int total = 0;          // Count elements that were set. It'll be used as an array size
+    int prev_comma = FALSE; // For correct processing sequential commas
+    int i;
+
+    // Save values in the structure __kmp_speculative_backoff_params
+    // Run only 3 iterations because it is enough to read two values or find a syntax error
+    for ( i = 0; i < 3 ; i++) {
+        SKIP_WS( next );
+
+        if ( *next == '\0' ) {
+            break;
+        }
+        // Next character is not an integer or not a comma OR number of values > 2 => end of list
+        if ( ( ( *next < '0' || *next > '9' ) && *next !=',' ) || total > 2 ) {
+            KMP_WARNING( EnvSyntaxError, name, value );
+            return;
+        }
+        // The next character is ','
+        if ( *next == ',' ) {
+            // ',' is the fisrt character
+            if ( total == 0 || prev_comma ) {
+                total++;
+            }
+            prev_comma = TRUE;
+            next++; //skip ','
+            SKIP_WS( next );
+        }
+        // Next character is a digit
+        if ( *next >= '0' && *next <= '9' ) {
+            int num;
+            const char *buf = next;
+            char const * msg  = NULL;
+            prev_comma = FALSE;
+            SKIP_DIGITS( next );
+            total++;
+
+            const char *tmp = next;
+            SKIP_WS( tmp );
+            if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) {
+                KMP_WARNING( EnvSpacesNotAllowed, name, value );
+                return;
+            }
+
+            num = __kmp_str_to_int( buf, *next );
+            if ( num < 0 ) { // The number of retries should be >= 0
+                msg = KMP_I18N_STR( ValueTooSmall );
+                num = 1;
+            } else if ( num > KMP_INT_MAX ) {
+                msg = KMP_I18N_STR( ValueTooLarge );
+                num = KMP_INT_MAX;
+            }
+            if ( msg != NULL ) {
+                // Message is not empty. Print warning.
+                KMP_WARNING( ParseSizeIntWarn, name, value, msg );
+                KMP_INFORM( Using_int_Value, name, num );
+            }
+            if( total == 1 ) {
+                max_retries = num;
+            } else if( total == 2 ) {
+                max_badness = num;
+            }
+        }
+    }
+    KMP_DEBUG_ASSERT( total > 0 );
+    if( total <= 0 ) {
+        KMP_WARNING( EnvSyntaxError, name, value );
+        return;
+    }
+    __kmp_adaptive_backoff_params.max_soft_retries = max_retries;
+    __kmp_adaptive_backoff_params.max_badness = max_badness;
+}
+
+
+static void
+__kmp_stg_print_adaptive_lock_props(kmp_str_buf_t * buffer, char const * name, void * data )
+{
+    if( __kmp_env_format ) {
+        KMP_STR_BUF_PRINT_NAME_EX(name);
+    } else {
+        __kmp_str_buf_print( buffer, "   %s='", name );
+    }
+    __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_adaptive_backoff_params.max_soft_retries,
+                         __kmp_adaptive_backoff_params.max_badness );
+} // __kmp_stg_print_adaptive_lock_props
+
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+
+static void
+__kmp_stg_parse_speculative_statsfile( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_file( name, value, "", & __kmp_speculative_statsfile );
+} // __kmp_stg_parse_speculative_statsfile
+
+static void
+__kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if ( __kmp_str_match( "-", 0, __kmp_speculative_statsfile )  ) {
+        __kmp_stg_print_str( buffer, name, "stdout" );
+    } else {
+        __kmp_stg_print_str( buffer, name, __kmp_speculative_statsfile );
+    }
+
+} // __kmp_stg_print_speculative_statsfile
+
+#endif // KMP_DEBUG_ADAPTIVE_LOCKS
+
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+// -------------------------------------------------------------------------------------------------
+// KMP_PLACE_THREADS
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) {
+    // Value example: 5Cx2Tx15O
+    // Which means "use 5 cores with offset 15, 2 threads per core"
+    // AC: extended to sockets level, examples of
+    //     "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core":
+    //     2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s@6,2c@2,2t
+    //     To not break legacy code core-offset can be last;
+    //     postfix "o" or prefix @ can be offset designator.
+    // Note: not all syntax errors are analyzed, some may be skipped.
+#define CHECK_DELIM(_x)   (*(_x) == ',' || *(_x) == 'x')
+    int         num;
+    int single_warning = 0;
+    int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0;
+    const char *next = value;
+    const char *prev;
+
+    SKIP_WS(next);  // skip white spaces
+    if (*next == '\0')
+        return;   // no data provided, retain default values
+    // Get num_sockets first (or whatever specified)
+    if (*next >= '0' && *next <= '9') {
+        prev = next;
+        SKIP_DIGITS(next);
+        num = __kmp_str_to_int(prev, *next);
+        SKIP_WS(next);
+        if (*next == 's' || *next == 'S') {  // e.g. "2s"
+            __kmp_place_num_sockets = num;
+            flagS = 1; // got num sockets
+            next++;
+            if (*next == '@') { // socket offset, e.g. "2s@4"
+                flagSO = 1;
+                prev = ++next;  // don't allow spaces for simplicity
+                if (!(*next >= '0' && *next <= '9')) {
+                    KMP_WARNING(AffThrPlaceInvalid, name, value);
+                    return;
+                }
+                SKIP_DIGITS(next);
+                num = __kmp_str_to_int(prev, *next);
+                __kmp_place_socket_offset = num;
+            }
+        } else if (*next == 'c' || *next == 'C') {
+            __kmp_place_num_cores = num;
+            flagS = flagC = 1; // sockets were not specified - use default
+            next++;
+            if (*next == '@') { // core offset, e.g. "2c@6"
+                flagCO = 1;
+                prev = ++next;  // don't allow spaces for simplicity
+                if (!(*next >= '0' && *next <= '9')) {
+                    KMP_WARNING(AffThrPlaceInvalid, name, value);
+                    return;
+                }
+                SKIP_DIGITS(next);
+                num = __kmp_str_to_int(prev, *next);
+                __kmp_place_core_offset = num;
+            }
+        } else if (CHECK_DELIM(next)) {
+            __kmp_place_num_cores = num; // no letter-designator - num cores
+            flagS = flagC = 1; // sockets were not specified - use default
+            next++;
+        } else if (*next == 't' || *next == 'T') {
+            __kmp_place_num_threads_per_core = num;
+            // sockets, cores were not specified - use default
+            return;   // we ignore offset value in case all cores are used
+        } else if (*next == '\0') {
+            __kmp_place_num_cores = num;
+            return;   // the only value provided - set num cores
+        } else {
+            KMP_WARNING(AffThrPlaceInvalid, name, value);
+            return;
+        }
+    } else {
+        KMP_WARNING(AffThrPlaceInvalid, name, value);
+        return;
+    }
+    KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here
+    SKIP_WS(next);
+    if (*next == '\0')
+        return;   // " n  " - something like this
+    if (CHECK_DELIM(next)) {
+        next++;   // skip delimiter
+        SKIP_WS(next);
+    }
+
+    // Get second value (could be offset, num_cores, num_threads)
+    if (*next >= '0' && *next <= '9') {
+        prev = next;
+        SKIP_DIGITS(next);
+        num = __kmp_str_to_int(prev, *next);
+        SKIP_WS(next);
+        if (*next == 'c' || *next == 'C') {
+            KMP_DEBUG_ASSERT(flagC == 0);
+            __kmp_place_num_cores = num;
+            flagC = 1;
+            next++;
+            if (*next == '@') { // core offset, e.g. "2c@6"
+                flagCO = 1;
+                prev = ++next;  // don't allow spaces for simplicity
+                if (!(*next >= '0' && *next <= '9')) {
+                    KMP_WARNING(AffThrPlaceInvalid, name, value);
+                    return;
+                }
+                SKIP_DIGITS(next);
+                num = __kmp_str_to_int(prev, *next);
+                __kmp_place_core_offset = num;
+            }
+        } else if (*next == 'o' || *next == 'O') { // offset specified
+            KMP_WARNING(AffThrPlaceDeprecated);
+            single_warning = 1;
+            if (flagC) { // whether num_cores already specified (sockets skipped)
+                KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both
+                __kmp_place_core_offset = num;
+            } else {
+                KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both
+                __kmp_place_socket_offset = num;
+            }
+            next++;
+        } else if (*next == 't' || *next == 'T') {
+            KMP_DEBUG_ASSERT(flagT == 0);
+            __kmp_place_num_threads_per_core = num;
+            flagC = 1; // num_cores could be skipped ?
+            flagT = 1;
+            next++; // can have core-offset specified after num threads
+        } else if (*next == '\0') {
+            KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core
+            __kmp_place_num_threads_per_core = num;
+            return;   // two values provided without letter-designator
+        } else {
+            KMP_WARNING(AffThrPlaceInvalid, name, value);
+            return;
+        }
+    } else {
+        KMP_WARNING(AffThrPlaceInvalid, name, value);
+        return;
+    }
+    SKIP_WS(next);
+    if (*next == '\0')
+        return;   // " Ns,Nc  " - something like this
+    if (CHECK_DELIM(next)) {
+        next++;   // skip delimiter
+        SKIP_WS(next);
+    }
+
+    // Get third value (could be core-offset, num_cores, num_threads)
+    if (*next >= '0' && *next <= '9') {
+        prev = next;
+        SKIP_DIGITS(next);
+        num = __kmp_str_to_int(prev, *next);
+        SKIP_WS(next);
+        if (*next == 't' || *next == 'T') {
+            KMP_DEBUG_ASSERT(flagT == 0);
+            __kmp_place_num_threads_per_core = num;
+            if (flagC == 0)
+                return; // num_cores could be skipped (e.g. 2s,4o,2t)
+            flagT = 1;
+            next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o)
+        } else if (*next == 'c' || *next == 'C') {
+            KMP_DEBUG_ASSERT(flagC == 0);
+            __kmp_place_num_cores = num;
+            flagC = 1;
+            next++;
+            //KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator
+        } else if (*next == 'o' || *next == 'O') {
+            KMP_WARNING(AffThrPlaceDeprecated);
+            single_warning = 1;
+            KMP_DEBUG_ASSERT(flagC);
+            //KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
+            __kmp_place_core_offset = num;
+            next++;
+        } else {
+            KMP_WARNING(AffThrPlaceInvalid, name, value);
+            return;
+        }
+    } else {
+        KMP_WARNING(AffThrPlaceInvalid, name, value);
+        return;
+    }
+    KMP_DEBUG_ASSERT(flagC);
+    SKIP_WS(next);
+    if ( *next == '\0' )
+            return;
+    if (CHECK_DELIM(next)) {
+        next++;   // skip delimiter
+        SKIP_WS(next);
+    }
+
+    // Get 4-th value (could be core-offset, num_threads)
+    if (*next >= '0' && *next <= '9') {
+        prev = next;
+        SKIP_DIGITS(next);
+        num = __kmp_str_to_int(prev, *next);
+        SKIP_WS(next);
+        if (*next == 'o' || *next == 'O') {
+            if (!single_warning) { // warn once
+                KMP_WARNING(AffThrPlaceDeprecated);
+            }
+            KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
+            __kmp_place_core_offset = num;
+            next++;
+        } else if (*next == 't' || *next == 'T') {
+            KMP_DEBUG_ASSERT(flagT == 0);
+            __kmp_place_num_threads_per_core = num;
+            flagT = 1;
+            next++; // can have core-offset specified after num threads
+        } else {
+            KMP_WARNING(AffThrPlaceInvalid, name, value);
+            return;
+        }
+    } else {
+        KMP_WARNING(AffThrPlaceInvalid, name, value);
+        return;
+    }
+    SKIP_WS(next);
+    if ( *next == '\0' )
+        return;
+    if (CHECK_DELIM(next)) {
+        next++;   // skip delimiter
+        SKIP_WS(next);
+    }
+
+    // Get 5-th value (could be core-offset, num_threads)
+    if (*next >= '0' && *next <= '9') {
+        prev = next;
+        SKIP_DIGITS(next);
+        num = __kmp_str_to_int(prev, *next);
+        SKIP_WS(next);
+        if (*next == 'o' || *next == 'O') {
+            if (!single_warning) { // warn once
+                KMP_WARNING(AffThrPlaceDeprecated);
+            }
+            KMP_DEBUG_ASSERT(flagT);
+            KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
+            __kmp_place_core_offset = num;
+        } else if (*next == 't' || *next == 'T') {
+            KMP_DEBUG_ASSERT(flagT == 0);
+            __kmp_place_num_threads_per_core = num;
+        } else {
+            KMP_WARNING(AffThrPlaceInvalid, name, value);
+        }
+    } else {
+        KMP_WARNING(AffThrPlaceInvalid, name, value);
+    }
+    return;
+#undef CHECK_DELIM
+}
+
+static void
+__kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) {
+        int comma = 0;
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init(&buf);
+        if(__kmp_env_format)
+            KMP_STR_BUF_PRINT_NAME_EX(name);
+        else
+            __kmp_str_buf_print(buffer, "   %s='", name);
+        if (__kmp_place_num_sockets) {
+            __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets);
+            if (__kmp_place_socket_offset)
+                __kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset);
+            comma = 1;
+        }
+        if (__kmp_place_num_cores) {
+            __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores);
+            if (__kmp_place_core_offset)
+                __kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset);
+            comma = 1;
+        }
+        if (__kmp_place_num_threads_per_core)
+            __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core);
+        __kmp_str_buf_print(buffer, "%s'\n", buf.str );
+        __kmp_str_buf_free(&buf);
+/*
+    } else {
+        __kmp_str_buf_print( buffer, "   %s: %s \n", name, KMP_I18N_STR( NotDefined ) );
+*/
+    }
+}
+
+#if USE_ITT_BUILD
+// -------------------------------------------------------------------------------------------------
+// KMP_FORKJOIN_FRAMES
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_forkjoin_frames( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_forkjoin_frames );
+} // __kmp_stg_parse_forkjoin_frames
+
+static void
+__kmp_stg_print_forkjoin_frames( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_forkjoin_frames );
+} // __kmp_stg_print_forkjoin_frames
+
+// -------------------------------------------------------------------------------------------------
+// KMP_FORKJOIN_FRAMES_MODE
+// -------------------------------------------------------------------------------------------------
+
+static void
+__kmp_stg_parse_forkjoin_frames_mode( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_int( name, value, 0, 3, & __kmp_forkjoin_frames_mode );
+} // __kmp_stg_parse_forkjoin_frames
+
+static void
+__kmp_stg_print_forkjoin_frames_mode( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_int( buffer, name, __kmp_forkjoin_frames_mode );
+} // __kmp_stg_print_forkjoin_frames
+#endif /* USE_ITT_BUILD */
+
+// -------------------------------------------------------------------------------------------------
+// OMP_DISPLAY_ENV
+// -------------------------------------------------------------------------------------------------
+
+#if OMP_40_ENABLED
+
+static void
+__kmp_stg_parse_omp_display_env( char const * name, char const * value, void * data )
+{
+    if ( __kmp_str_match( "VERBOSE", 1, value ) )
+    {
+        __kmp_display_env_verbose = TRUE;
+    } else {
+        __kmp_stg_parse_bool( name, value, & __kmp_display_env );
+    }
+
+} // __kmp_stg_parse_omp_display_env
+
+static void
+__kmp_stg_print_omp_display_env( kmp_str_buf_t * buffer, char const * name, void * data )
+{
+    if ( __kmp_display_env_verbose )
+    {
+        __kmp_stg_print_str( buffer, name, "VERBOSE" );
+    } else {
+        __kmp_stg_print_bool( buffer, name, __kmp_display_env );
+    }
+} // __kmp_stg_print_omp_display_env
+
+static void
+__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) {
+    if ( TCR_4(__kmp_init_parallel) ) {
+        KMP_WARNING( EnvParallelWarn, name );
+        return;
+    }   // read value before first parallel only
+    __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation );
+} // __kmp_stg_parse_omp_cancellation
+
+static void
+__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation );
+} // __kmp_stg_print_omp_cancellation
+
+#endif
+
+// -------------------------------------------------------------------------------------------------
+// Table.
+// -------------------------------------------------------------------------------------------------
+
+
+static kmp_setting_t __kmp_stg_table[] = {
+
+    { "KMP_ALL_THREADS",                   __kmp_stg_parse_all_threads,        __kmp_stg_print_all_threads,        NULL, 0, 0 },
+    { "KMP_BLOCKTIME",                     __kmp_stg_parse_blocktime,          __kmp_stg_print_blocktime,          NULL, 0, 0 },
+    { "KMP_DUPLICATE_LIB_OK",              __kmp_stg_parse_duplicate_lib_ok,   __kmp_stg_print_duplicate_lib_ok,   NULL, 0, 0 },
+    { "KMP_LIBRARY",                       __kmp_stg_parse_wait_policy,        __kmp_stg_print_wait_policy,        NULL, 0, 0 },
+    { "KMP_MAX_THREADS",                   __kmp_stg_parse_all_threads,        NULL,                               NULL, 0, 0 }, // For backward compatibility
+    { "KMP_MONITOR_STACKSIZE",             __kmp_stg_parse_monitor_stacksize,  __kmp_stg_print_monitor_stacksize,  NULL, 0, 0 },
+    { "KMP_SETTINGS",                      __kmp_stg_parse_settings,           __kmp_stg_print_settings,           NULL, 0, 0 },
+    { "KMP_STACKOFFSET",                   __kmp_stg_parse_stackoffset,        __kmp_stg_print_stackoffset,        NULL, 0, 0 },
+    { "KMP_STACKSIZE",                     __kmp_stg_parse_stacksize,          __kmp_stg_print_stacksize,          NULL, 0, 0 },
+    { "KMP_STACKPAD",                      __kmp_stg_parse_stackpad,           __kmp_stg_print_stackpad,           NULL, 0, 0 },
+    { "KMP_VERSION",                       __kmp_stg_parse_version,            __kmp_stg_print_version,            NULL, 0, 0 },
+    { "KMP_WARNINGS",                      __kmp_stg_parse_warnings,           __kmp_stg_print_warnings,           NULL, 0, 0 },
+
+    { "OMP_NESTED",                        __kmp_stg_parse_nested,             __kmp_stg_print_nested,             NULL, 0, 0 },
+    { "OMP_NUM_THREADS",                   __kmp_stg_parse_num_threads,        __kmp_stg_print_num_threads,        NULL, 0, 0 },
+    { "OMP_STACKSIZE",                     __kmp_stg_parse_stacksize,          __kmp_stg_print_stacksize,          NULL, 0, 0 },
+
+    { "KMP_TASKING",                       __kmp_stg_parse_tasking,            __kmp_stg_print_tasking,            NULL, 0, 0 },
+    { "KMP_TASK_STEALING_CONSTRAINT",      __kmp_stg_parse_task_stealing,      __kmp_stg_print_task_stealing,      NULL, 0, 0 },
+    { "OMP_MAX_ACTIVE_LEVELS",             __kmp_stg_parse_max_active_levels,  __kmp_stg_print_max_active_levels,  NULL, 0, 0 },
+    { "OMP_THREAD_LIMIT",                  __kmp_stg_parse_all_threads,        __kmp_stg_print_all_threads,        NULL, 0, 0 },
+    { "OMP_WAIT_POLICY",                   __kmp_stg_parse_wait_policy,        __kmp_stg_print_wait_policy,        NULL, 0, 0 },
+#if KMP_NESTED_HOT_TEAMS
+    { "KMP_HOT_TEAMS_MAX_LEVEL",           __kmp_stg_parse_hot_teams_level,    __kmp_stg_print_hot_teams_level,    NULL, 0, 0 },
+    { "KMP_HOT_TEAMS_MODE",                __kmp_stg_parse_hot_teams_mode,     __kmp_stg_print_hot_teams_mode,     NULL, 0, 0 },
+#endif // KMP_NESTED_HOT_TEAMS
+
+#if KMP_HANDLE_SIGNALS
+    { "KMP_HANDLE_SIGNALS",                __kmp_stg_parse_handle_signals,     __kmp_stg_print_handle_signals,     NULL, 0, 0 },
+#endif
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    { "KMP_INHERIT_FP_CONTROL",            __kmp_stg_parse_inherit_fp_control, __kmp_stg_print_inherit_fp_control, NULL, 0, 0 },
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+#ifdef KMP_GOMP_COMPAT
+    { "GOMP_STACKSIZE",                    __kmp_stg_parse_stacksize,          NULL,                               NULL, 0, 0 },
+#endif
+
+#ifdef KMP_DEBUG
+    { "KMP_A_DEBUG",                       __kmp_stg_parse_a_debug,            __kmp_stg_print_a_debug,            NULL, 0, 0 },
+    { "KMP_B_DEBUG",                       __kmp_stg_parse_b_debug,            __kmp_stg_print_b_debug,            NULL, 0, 0 },
+    { "KMP_C_DEBUG",                       __kmp_stg_parse_c_debug,            __kmp_stg_print_c_debug,            NULL, 0, 0 },
+    { "KMP_D_DEBUG",                       __kmp_stg_parse_d_debug,            __kmp_stg_print_d_debug,            NULL, 0, 0 },
+    { "KMP_E_DEBUG",                       __kmp_stg_parse_e_debug,            __kmp_stg_print_e_debug,            NULL, 0, 0 },
+    { "KMP_F_DEBUG",                       __kmp_stg_parse_f_debug,            __kmp_stg_print_f_debug,            NULL, 0, 0 },
+    { "KMP_DEBUG",                         __kmp_stg_parse_debug,              NULL, /* no print */                NULL, 0, 0 },
+    { "KMP_DEBUG_BUF",                     __kmp_stg_parse_debug_buf,          __kmp_stg_print_debug_buf,          NULL, 0, 0 },
+    { "KMP_DEBUG_BUF_ATOMIC",              __kmp_stg_parse_debug_buf_atomic,   __kmp_stg_print_debug_buf_atomic,   NULL, 0, 0 },
+    { "KMP_DEBUG_BUF_CHARS",               __kmp_stg_parse_debug_buf_chars,    __kmp_stg_print_debug_buf_chars,    NULL, 0, 0 },
+    { "KMP_DEBUG_BUF_LINES",               __kmp_stg_parse_debug_buf_lines,    __kmp_stg_print_debug_buf_lines,    NULL, 0, 0 },
+    { "KMP_DIAG",                          __kmp_stg_parse_diag,               __kmp_stg_print_diag,               NULL, 0, 0 },
+
+    { "KMP_PAR_RANGE",                     __kmp_stg_parse_par_range_env,      __kmp_stg_print_par_range_env,      NULL, 0, 0 },
+    { "KMP_YIELD_CYCLE",                   __kmp_stg_parse_yield_cycle,        __kmp_stg_print_yield_cycle,        NULL, 0, 0 },
+    { "KMP_YIELD_ON",                      __kmp_stg_parse_yield_on,           __kmp_stg_print_yield_on,           NULL, 0, 0 },
+    { "KMP_YIELD_OFF",                     __kmp_stg_parse_yield_off,          __kmp_stg_print_yield_off,          NULL, 0, 0 },
+#endif // KMP_DEBUG
+
+    { "KMP_ALIGN_ALLOC",                   __kmp_stg_parse_align_alloc,        __kmp_stg_print_align_alloc,        NULL, 0, 0 },
+
+    { "KMP_PLAIN_BARRIER",                 __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 },
+    { "KMP_PLAIN_BARRIER_PATTERN",         __kmp_stg_parse_barrier_pattern,    __kmp_stg_print_barrier_pattern,    NULL, 0, 0 },
+    { "KMP_FORKJOIN_BARRIER",              __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 },
+    { "KMP_FORKJOIN_BARRIER_PATTERN",      __kmp_stg_parse_barrier_pattern,    __kmp_stg_print_barrier_pattern,    NULL, 0, 0 },
+#if KMP_FAST_REDUCTION_BARRIER
+    { "KMP_REDUCTION_BARRIER",             __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 },
+    { "KMP_REDUCTION_BARRIER_PATTERN",     __kmp_stg_parse_barrier_pattern,    __kmp_stg_print_barrier_pattern,    NULL, 0, 0 },
+#endif
+
+    { "KMP_ABORT_DELAY",                   __kmp_stg_parse_abort_delay,        __kmp_stg_print_abort_delay,        NULL, 0, 0 },
+    { "KMP_CPUINFO_FILE",                  __kmp_stg_parse_cpuinfo_file,       __kmp_stg_print_cpuinfo_file,       NULL, 0, 0 },
+    { "KMP_FORCE_REDUCTION",               __kmp_stg_parse_force_reduction,    __kmp_stg_print_force_reduction,    NULL, 0, 0 },
+    { "KMP_DETERMINISTIC_REDUCTION",       __kmp_stg_parse_force_reduction,    __kmp_stg_print_force_reduction,    NULL, 0, 0 },
+    { "KMP_STORAGE_MAP",                   __kmp_stg_parse_storage_map,        __kmp_stg_print_storage_map,        NULL, 0, 0 },
+    { "KMP_ALL_THREADPRIVATE",             __kmp_stg_parse_all_threadprivate,  __kmp_stg_print_all_threadprivate,  NULL, 0, 0 },
+    { "KMP_FOREIGN_THREADS_THREADPRIVATE", __kmp_stg_parse_foreign_threads_threadprivate, __kmp_stg_print_foreign_threads_threadprivate,     NULL, 0, 0 },
+
+#if KMP_AFFINITY_SUPPORTED
+    { "KMP_AFFINITY",                      __kmp_stg_parse_affinity,           __kmp_stg_print_affinity,           NULL, 0, 0 },
+# ifdef KMP_GOMP_COMPAT
+    { "GOMP_CPU_AFFINITY",                 __kmp_stg_parse_gomp_cpu_affinity,  NULL, /* no print */                NULL, 0, 0 },
+# endif /* KMP_GOMP_COMPAT */
+# if OMP_40_ENABLED
+    { "OMP_PROC_BIND",                     __kmp_stg_parse_proc_bind,          __kmp_stg_print_proc_bind,          NULL, 0, 0 },
+    { "OMP_PLACES",                        __kmp_stg_parse_places,             __kmp_stg_print_places,             NULL, 0, 0 },
+# else
+    { "OMP_PROC_BIND",                     __kmp_stg_parse_proc_bind,          NULL, /* no print */                NULL, 0, 0 },
+# endif /* OMP_40_ENABLED */
+
+    { "KMP_TOPOLOGY_METHOD",               __kmp_stg_parse_topology_method,    __kmp_stg_print_topology_method,    NULL, 0, 0 },
+
+#else
+
+    //
+    // KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES.
+    // OMP_PROC_BIND and proc-bind-var are supported, however.
+    //
+# if OMP_40_ENABLED
+    { "OMP_PROC_BIND",                     __kmp_stg_parse_proc_bind,          __kmp_stg_print_proc_bind,          NULL, 0, 0 },
+# endif
+
+#endif // KMP_AFFINITY_SUPPORTED
+
+    { "KMP_INIT_AT_FORK",                  __kmp_stg_parse_init_at_fork,       __kmp_stg_print_init_at_fork,       NULL, 0, 0 },
+    { "KMP_SCHEDULE",                      __kmp_stg_parse_schedule,           __kmp_stg_print_schedule,           NULL, 0, 0 },
+    { "OMP_SCHEDULE",                      __kmp_stg_parse_omp_schedule,       __kmp_stg_print_omp_schedule,       NULL, 0, 0 },
+    { "KMP_ATOMIC_MODE",                   __kmp_stg_parse_atomic_mode,        __kmp_stg_print_atomic_mode,        NULL, 0, 0 },
+    { "KMP_CONSISTENCY_CHECK",             __kmp_stg_parse_consistency_check,  __kmp_stg_print_consistency_check,  NULL, 0, 0 },
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    { "KMP_ITT_PREPARE_DELAY",             __kmp_stg_parse_itt_prepare_delay,  __kmp_stg_print_itt_prepare_delay,  NULL, 0, 0 },
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+    { "KMP_MALLOC_POOL_INCR",              __kmp_stg_parse_malloc_pool_incr,   __kmp_stg_print_malloc_pool_incr,   NULL, 0, 0 },
+    { "KMP_INIT_WAIT",                     __kmp_stg_parse_init_wait,          __kmp_stg_print_init_wait,          NULL, 0, 0 },
+    { "KMP_NEXT_WAIT",                     __kmp_stg_parse_next_wait,          __kmp_stg_print_next_wait,          NULL, 0, 0 },
+    { "KMP_GTID_MODE",                     __kmp_stg_parse_gtid_mode,          __kmp_stg_print_gtid_mode,          NULL, 0, 0 },
+    { "OMP_DYNAMIC",                       __kmp_stg_parse_omp_dynamic,        __kmp_stg_print_omp_dynamic,        NULL, 0, 0 },
+    { "KMP_DYNAMIC_MODE",                  __kmp_stg_parse_kmp_dynamic_mode,   __kmp_stg_print_kmp_dynamic_mode,   NULL, 0, 0 },
+
+#ifdef USE_LOAD_BALANCE
+    { "KMP_LOAD_BALANCE_INTERVAL",         __kmp_stg_parse_ld_balance_interval,__kmp_stg_print_ld_balance_interval,NULL, 0, 0 },
+#endif
+
+    { "KMP_NUM_LOCKS_IN_BLOCK",            __kmp_stg_parse_lock_block,         __kmp_stg_print_lock_block,         NULL, 0, 0 },
+    { "KMP_LOCK_KIND",                     __kmp_stg_parse_lock_kind,          __kmp_stg_print_lock_kind,          NULL, 0, 0 },
+#if KMP_USE_ADAPTIVE_LOCKS
+    { "KMP_ADAPTIVE_LOCK_PROPS",           __kmp_stg_parse_adaptive_lock_props,__kmp_stg_print_adaptive_lock_props,  NULL, 0, 0 },
+#if KMP_DEBUG_ADAPTIVE_LOCKS
+    { "KMP_SPECULATIVE_STATSFILE",         __kmp_stg_parse_speculative_statsfile,__kmp_stg_print_speculative_statsfile,  NULL, 0, 0 },
+#endif
+#endif // KMP_USE_ADAPTIVE_LOCKS
+    { "KMP_PLACE_THREADS",                 __kmp_stg_parse_place_threads,      __kmp_stg_print_place_threads,      NULL, 0, 0 },
+#if USE_ITT_BUILD
+    { "KMP_FORKJOIN_FRAMES",               __kmp_stg_parse_forkjoin_frames,    __kmp_stg_print_forkjoin_frames,    NULL, 0, 0 },
+    { "KMP_FORKJOIN_FRAMES_MODE",          __kmp_stg_parse_forkjoin_frames_mode,__kmp_stg_print_forkjoin_frames_mode,  NULL, 0, 0 },
+#endif
+
+# if OMP_40_ENABLED
+    { "OMP_DISPLAY_ENV",                   __kmp_stg_parse_omp_display_env,    __kmp_stg_print_omp_display_env,    NULL, 0, 0 },
+    { "OMP_CANCELLATION",                  __kmp_stg_parse_omp_cancellation,   __kmp_stg_print_omp_cancellation,   NULL, 0, 0 },
+#endif
+    { "",                                  NULL,                               NULL,                               NULL, 0, 0 }
+}; // settings
+
+static int const __kmp_stg_count = sizeof( __kmp_stg_table ) / sizeof( kmp_setting_t );
+
+static inline
+kmp_setting_t *
+__kmp_stg_find( char const * name ) {
+
+    int i;
+    if ( name != NULL ) {
+        for ( i = 0; i < __kmp_stg_count; ++ i ) {
+            if ( strcmp( __kmp_stg_table[ i ].name, name ) == 0 ) {
+                return & __kmp_stg_table[ i ];
+            }; // if
+        }; // for
+    }; // if
+    return NULL;
+
+} // __kmp_stg_find
+
+
+static int
+__kmp_stg_cmp( void const * _a, void const * _b ) {
+    kmp_setting_t * a = (kmp_setting_t *) _a;
+    kmp_setting_t * b = (kmp_setting_t *) _b;
+
+    //
+    // Process KMP_AFFINITY last.
+    // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY.
+    //
+    if ( strcmp( a->name, "KMP_AFFINITY" ) == 0 ) {
+        if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) {
+            return 0;
+        }
+        return 1;
+    }
+    else if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) {
+        return -1;
+    }
+    return strcmp( a->name, b->name );
+} // __kmp_stg_cmp
+
+
+static void
+__kmp_stg_init( void
+) {
+
+    static int initialized = 0;
+
+    if ( ! initialized ) {
+
+        // Sort table.
+        qsort( __kmp_stg_table, __kmp_stg_count - 1, sizeof( kmp_setting_t ), __kmp_stg_cmp );
+
+        { // Initialize *_STACKSIZE data.
+
+            kmp_setting_t * kmp_stacksize  = __kmp_stg_find( "KMP_STACKSIZE"  );      // 1st priority.
+#ifdef KMP_GOMP_COMPAT
+            kmp_setting_t * gomp_stacksize = __kmp_stg_find( "GOMP_STACKSIZE" );      // 2nd priority.
+#endif
+            kmp_setting_t * omp_stacksize  = __kmp_stg_find( "OMP_STACKSIZE"  );      // 3rd priority.
+
+            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
+            // !!! Compiler does not understand rivals is used and optimizes out assignments
+            // !!!     rivals[ i ++ ] = ...;
+            static kmp_setting_t * volatile rivals[ 4 ];
+            static kmp_stg_ss_data_t kmp_data  = {    1, (kmp_setting_t **)rivals };
+#ifdef KMP_GOMP_COMPAT
+            static kmp_stg_ss_data_t gomp_data = { 1024, (kmp_setting_t **)rivals };
+#endif
+            static kmp_stg_ss_data_t omp_data  = { 1024, (kmp_setting_t **)rivals };
+            int i = 0;
+
+            rivals[ i ++ ] = kmp_stacksize;
+#ifdef KMP_GOMP_COMPAT
+            if ( gomp_stacksize != NULL ) {
+                rivals[ i ++ ] = gomp_stacksize;
+            }; // if
+#endif
+            rivals[ i ++ ] = omp_stacksize;
+            rivals[ i ++ ] = NULL;
+
+            kmp_stacksize->data = & kmp_data;
+#ifdef KMP_GOMP_COMPAT
+            if ( gomp_stacksize != NULL ) {
+                gomp_stacksize->data = & gomp_data;
+            }; // if
+#endif
+            omp_stacksize->data = & omp_data;
+
+        }
+
+        { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data.
+
+            kmp_setting_t * kmp_library     = __kmp_stg_find( "KMP_LIBRARY" );        // 1st priority.
+            kmp_setting_t * omp_wait_policy = __kmp_stg_find( "OMP_WAIT_POLICY" );    // 2nd priority.
+
+            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
+            static kmp_setting_t * volatile rivals[ 3 ];
+            static kmp_stg_wp_data_t kmp_data  = { 0, (kmp_setting_t **)rivals };
+            static kmp_stg_wp_data_t omp_data  = { 1, (kmp_setting_t **)rivals };
+            int i = 0;
+
+            rivals[ i ++ ] = kmp_library;
+            if ( omp_wait_policy != NULL ) {
+                rivals[ i ++ ] = omp_wait_policy;
+            }; // if
+            rivals[ i ++ ] = NULL;
+
+            kmp_library->data  = & kmp_data;
+            if ( omp_wait_policy != NULL ) {
+                omp_wait_policy->data = & omp_data;
+            }; // if
+
+        }
+
+        { // Initialize KMP_ALL_THREADS, KMP_MAX_THREADS, and OMP_THREAD_LIMIT data.
+
+            kmp_setting_t * kmp_all_threads  = __kmp_stg_find( "KMP_ALL_THREADS"  );  // 1st priority.
+            kmp_setting_t * kmp_max_threads  = __kmp_stg_find( "KMP_MAX_THREADS"  );  // 2nd priority.
+            kmp_setting_t * omp_thread_limit = __kmp_stg_find( "OMP_THREAD_LIMIT" );  // 3rd priority.
+
+            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
+            static kmp_setting_t * volatile rivals[ 4 ];
+            int i = 0;
+
+            rivals[ i ++ ] = kmp_all_threads;
+            rivals[ i ++ ] = kmp_max_threads;
+            if ( omp_thread_limit != NULL ) {
+                rivals[ i ++ ] = omp_thread_limit;
+            }; // if
+            rivals[ i ++ ] = NULL;
+
+            kmp_all_threads->data = (void*)& rivals;
+            kmp_max_threads->data = (void*)& rivals;
+            if ( omp_thread_limit != NULL ) {
+                omp_thread_limit->data = (void*)& rivals;
+            }; // if
+
+        }
+
+#if KMP_AFFINITY_SUPPORTED
+        { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data.
+
+            kmp_setting_t * kmp_affinity = __kmp_stg_find( "KMP_AFFINITY"  );  // 1st priority.
+            KMP_DEBUG_ASSERT( kmp_affinity != NULL );
+
+# ifdef KMP_GOMP_COMPAT
+            kmp_setting_t * gomp_cpu_affinity = __kmp_stg_find( "GOMP_CPU_AFFINITY"  );  // 2nd priority.
+            KMP_DEBUG_ASSERT( gomp_cpu_affinity != NULL );
+# endif
+
+            kmp_setting_t * omp_proc_bind = __kmp_stg_find( "OMP_PROC_BIND" );  // 3rd priority.
+            KMP_DEBUG_ASSERT( omp_proc_bind != NULL );
+
+            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
+            static kmp_setting_t * volatile rivals[ 4 ];
+            int i = 0;
+
+            rivals[ i ++ ] = kmp_affinity;
+
+# ifdef KMP_GOMP_COMPAT
+            rivals[ i ++ ] = gomp_cpu_affinity;
+            gomp_cpu_affinity->data = (void*)& rivals;
+# endif
+
+            rivals[ i ++ ] = omp_proc_bind;
+            omp_proc_bind->data = (void*)& rivals;
+            rivals[ i ++ ] = NULL;
+
+# if OMP_40_ENABLED
+            static kmp_setting_t * volatile places_rivals[ 4 ];
+            i = 0;
+
+            kmp_setting_t * omp_places = __kmp_stg_find( "OMP_PLACES" );  // 3rd priority.
+            KMP_DEBUG_ASSERT( omp_places != NULL );
+
+            places_rivals[ i ++ ] = kmp_affinity;
+#  ifdef KMP_GOMP_COMPAT
+            places_rivals[ i ++ ] = gomp_cpu_affinity;
+#  endif
+            places_rivals[ i ++ ] = omp_places;
+            omp_places->data = (void*)& places_rivals;
+            places_rivals[ i ++ ] = NULL;
+# endif
+        }
+#else
+    // KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals.
+    // OMP_PLACES not supported yet.
+#endif // KMP_AFFINITY_SUPPORTED
+
+        { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data.
+
+            kmp_setting_t * kmp_force_red  = __kmp_stg_find( "KMP_FORCE_REDUCTION" );         // 1st priority.
+            kmp_setting_t * kmp_determ_red = __kmp_stg_find( "KMP_DETERMINISTIC_REDUCTION" ); // 2nd priority.
+
+            // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
+            static kmp_setting_t * volatile rivals[ 3 ];
+            static kmp_stg_fr_data_t force_data   = { 1, (kmp_setting_t **)rivals };
+            static kmp_stg_fr_data_t determ_data  = { 0, (kmp_setting_t **)rivals };
+            int i = 0;
+
+            rivals[ i ++ ] = kmp_force_red;
+            if ( kmp_determ_red != NULL ) {
+                rivals[ i ++ ] = kmp_determ_red;
+            }; // if
+            rivals[ i ++ ] = NULL;
+
+            kmp_force_red->data = & force_data;
+            if ( kmp_determ_red != NULL ) {
+                kmp_determ_red->data  = & determ_data;
+            }; // if
+        }
+
+        initialized = 1;
+
+    }; // if
+
+    // Reset flags.
+    int i;
+    for ( i = 0; i < __kmp_stg_count; ++ i ) {
+        __kmp_stg_table[ i ].set = 0;
+    }; // for
+
+} // __kmp_stg_init
+
+
+static void
+__kmp_stg_parse(
+    char const * name,
+    char const * value
+) {
+
+    // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, really nameless, they are
+    // presented in environment block as "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them.
+    if ( name[ 0 ] == 0 ) {
+        return;
+    }; // if
+
+    if ( value != NULL ) {
+        kmp_setting_t * setting = __kmp_stg_find( name );
+        if ( setting != NULL ) {
+            setting->parse( name, value, setting->data );
+            setting->defined = 1;
+        }; // if
+    }; // if
+
+} // __kmp_stg_parse
+
+
+static int
+__kmp_stg_check_rivals(          // 0 -- Ok, 1 -- errors found.
+    char const *       name,     // Name of variable.
+    char const *       value,    // Value of the variable.
+    kmp_setting_t * *  rivals    // List of rival settings (the list must include current one).
+) {
+
+    if ( rivals == NULL ) {
+        return 0;
+    }
+
+    // Loop thru higher priority settings (listed before current).
+    int i = 0;
+    for ( ; strcmp( rivals[ i ]->name, name ) != 0; i++ ) {
+        KMP_DEBUG_ASSERT( rivals[ i ] != NULL );
+
+#if KMP_AFFINITY_SUPPORTED
+        if ( rivals[ i ] == __kmp_affinity_notype ) {
+            //
+            // If KMP_AFFINITY is specified without a type name,
+            // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY.
+            //
+            continue;
+        }
+#endif
+
+        if ( rivals[ i ]->set ) {
+            KMP_WARNING( StgIgnored, name, rivals[ i ]->name );
+            return 1;
+        }; // if
+    }; // while
+
+    ++ i; // Skip current setting.
+    return 0;
+
+}; // __kmp_stg_check_rivals
+
+
+static int
+__kmp_env_toPrint( char const * name, int flag ) {
+    int rc = 0;
+    kmp_setting_t * setting = __kmp_stg_find( name );
+    if ( setting != NULL ) {
+        rc = setting->defined;
+        if ( flag >= 0 ) {
+            setting->defined = flag;
+        }; // if
+    }; // if
+    return rc;
+}
+
+
+static void
+__kmp_aux_env_initialize( kmp_env_blk_t* block ) {
+
+    char const * value;
+
+    /* OMP_NUM_THREADS */
+    value = __kmp_env_blk_var( block, "OMP_NUM_THREADS" );
+    if ( value ) {
+        ompc_set_num_threads( __kmp_dflt_team_nth );
+    }
+
+    /* KMP_BLOCKTIME */
+    value = __kmp_env_blk_var( block, "KMP_BLOCKTIME" );
+    if ( value ) {
+        kmpc_set_blocktime( __kmp_dflt_blocktime );
+    }
+
+    /* OMP_NESTED */
+    value = __kmp_env_blk_var( block, "OMP_NESTED" );
+    if ( value ) {
+        ompc_set_nested( __kmp_dflt_nested );
+    }
+
+    /* OMP_DYNAMIC */
+    value = __kmp_env_blk_var( block, "OMP_DYNAMIC" );
+    if ( value ) {
+        ompc_set_dynamic( __kmp_global.g.g_dynamic );
+    }
+
+}
+
+void
+__kmp_env_initialize( char const * string ) {
+
+    kmp_env_blk_t block;
+    int           i;
+
+    __kmp_stg_init();
+
+    // Hack!!!
+    if ( string == NULL ) {
+        // __kmp_max_nth = __kmp_sys_max_nth;
+        __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
+    }; // if
+    __kmp_env_blk_init( & block, string );
+
+    //
+    // update the set flag on all entries that have an env var
+    //
+    for ( i = 0; i < block.count; ++ i ) {
+        if (( block.vars[ i ].name == NULL )
+          || ( *block.vars[ i ].name == '\0')) {
+            continue;
+        }
+        if ( block.vars[ i ].value == NULL ) {
+            continue;
+        }
+        kmp_setting_t * setting = __kmp_stg_find( block.vars[ i ].name );
+        if ( setting != NULL ) {
+            setting->set = 1;
+        }
+    }; // for i
+
+    // Special case. If we parse environment, not a string, process KMP_WARNINGS first.
+    if ( string == NULL ) {
+        char const * name  = "KMP_WARNINGS";
+        char const * value = __kmp_env_blk_var( & block, name );
+        __kmp_stg_parse( name, value );
+    }; // if
+
+#if KMP_AFFINITY_SUPPORTED
+    //
+    // Special case. KMP_AFFINITY is not a rival to other affinity env vars
+    // if no affinity type is specified.  We want to allow
+    // KMP_AFFINITY=[no],verbose/[no]warnings/etc.  to be enabled when
+    // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0
+    // affinity mechanism.
+    //
+    __kmp_affinity_notype = NULL;
+    char const *aff_str = __kmp_env_blk_var( & block, "KMP_AFFINITY" );
+    if ( aff_str != NULL ) {
+        //
+        // Check if the KMP_AFFINITY type is specified in the string.
+        // We just search the string for "compact", "scatter", etc.
+        // without really parsing the string.  The syntax of the
+        // KMP_AFFINITY env var is such that none of the affinity
+        // type names can appear anywhere other that the type
+        // specifier, even as substrings.
+        //
+        // I can't find a case-insensitive version of strstr on Windows* OS.
+        // Use the case-sensitive version for now.
+        //
+
+# if KMP_OS_WINDOWS
+#  define FIND strstr
+# else
+#  define FIND strcasestr
+# endif
+
+        if ( ( FIND( aff_str, "none" ) == NULL )
+          && ( FIND( aff_str, "physical" ) == NULL )
+          && ( FIND( aff_str, "logical" ) == NULL )
+          && ( FIND( aff_str, "compact" ) == NULL )
+          && ( FIND( aff_str, "scatter" ) == NULL )
+          && ( FIND( aff_str, "explicit" ) == NULL )
+          && ( FIND( aff_str, "balanced" ) == NULL )
+          && ( FIND( aff_str, "disabled" ) == NULL ) ) {
+            __kmp_affinity_notype = __kmp_stg_find( "KMP_AFFINITY"  );
+        }
+        else {
+            //
+            // A new affinity type is specified.
+            // Reset the affinity flags to their default values,
+            // in case this is called from kmp_set_defaults().
+            //
+            __kmp_affinity_type = affinity_default;
+            __kmp_affinity_gran = affinity_gran_default;
+            __kmp_affinity_top_method = affinity_top_method_default;
+            __kmp_affinity_respect_mask = affinity_respect_mask_default;
+        }
+# undef FIND
+
+#if OMP_40_ENABLED
+        //
+        // Also reset the affinity flags if OMP_PROC_BIND is specified.
+        //
+        aff_str = __kmp_env_blk_var( & block, "OMP_PROC_BIND" );
+        if ( aff_str != NULL ) {
+            __kmp_affinity_type = affinity_default;
+            __kmp_affinity_gran = affinity_gran_default;
+            __kmp_affinity_top_method = affinity_top_method_default;
+            __kmp_affinity_respect_mask = affinity_respect_mask_default;
+        }
+#endif /* OMP_40_ENABLED */
+    }
+
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+#if OMP_40_ENABLED
+    //
+    // Set up the nested proc bind type vector.
+    //
+    if ( __kmp_nested_proc_bind.bind_types == NULL ) {
+        __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *)
+          KMP_INTERNAL_MALLOC( sizeof(kmp_proc_bind_t) );
+        if ( __kmp_nested_proc_bind.bind_types == NULL ) {
+            KMP_FATAL( MemoryAllocFailed );
+        }
+        __kmp_nested_proc_bind.size = 1;
+        __kmp_nested_proc_bind.used = 1;
+# if KMP_AFFINITY_SUPPORTED
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_default;
+# else
+        // default proc bind is false if affinity not supported
+        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+# endif
+
+    }
+#endif /* OMP_40_ENABLED */
+
+    //
+    // Now process all of the settings.
+    //
+    for ( i = 0; i < block.count; ++ i ) {
+        __kmp_stg_parse( block.vars[ i ].name, block.vars[ i ].value );
+    }; // for i
+
+    //
+    // If user locks have been allocated yet, don't reset the lock vptr table.
+    //
+    if ( ! __kmp_init_user_locks ) {
+        if ( __kmp_user_lock_kind == lk_default ) {
+            __kmp_user_lock_kind = lk_queuing;
+        }
+#if KMP_USE_DYNAMIC_LOCK
+        __kmp_init_dynamic_user_locks();
+#else
+        __kmp_set_user_lock_vptrs( __kmp_user_lock_kind );
+#endif
+    }
+    else {
+        KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called
+        KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default );
+        // Binds lock functions again to follow the transition between different
+        // KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long
+        // as we do not allow lock kind changes after making a call to any
+        // user lock functions (true).
+#if KMP_USE_DYNAMIC_LOCK
+        __kmp_init_dynamic_user_locks();
+#else
+        __kmp_set_user_lock_vptrs( __kmp_user_lock_kind );
+#endif
+    }
+
+#if KMP_AFFINITY_SUPPORTED
+
+    if ( ! TCR_4(__kmp_init_middle) ) {
+        //
+        // Determine if the machine/OS is actually capable of supporting
+        // affinity.
+        //
+        const char *var = "KMP_AFFINITY";
+# if KMP_USE_HWLOC
+        if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
+            __kmp_hwloc_error = TRUE;
+            if(__kmp_affinity_verbose)
+                KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
+        }
+        hwloc_topology_ignore_type(__kmp_hwloc_topology, HWLOC_OBJ_CACHE);
+# endif
+        if ( __kmp_affinity_type == affinity_disabled ) {
+            KMP_AFFINITY_DISABLE();
+        }
+        else if ( ! KMP_AFFINITY_CAPABLE() ) {
+# if KMP_USE_HWLOC
+            const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
+            if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
+                __kmp_hwloc_error = TRUE;
+                if(__kmp_affinity_verbose)
+                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
+            }
+            // Is the system capable of setting/getting this thread's affinity?
+            // also, is topology discovery possible? (pu indicates ability to discover processing units)
+            // and finally, were there no errors when calling any hwloc_* API functions?
+            if(topology_support->cpubind->set_thisthread_cpubind &&
+               topology_support->cpubind->get_thisthread_cpubind &&
+               topology_support->discovery->pu &&
+               !__kmp_hwloc_error)
+            {
+                // enables affinity according to KMP_AFFINITY_CAPABLE() macro
+                KMP_AFFINITY_ENABLE(TRUE);
+            } else {
+                // indicate that hwloc didn't work and disable affinity
+                __kmp_hwloc_error = TRUE;
+                KMP_AFFINITY_DISABLE();
+            }
+# else
+            __kmp_affinity_determine_capable( var );
+# endif // KMP_USE_HWLOC
+            if ( ! KMP_AFFINITY_CAPABLE() ) {
+                if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings
+                  && ( __kmp_affinity_type != affinity_default )
+                  && ( __kmp_affinity_type != affinity_none )
+                  && ( __kmp_affinity_type != affinity_disabled ) ) ) {
+                    KMP_WARNING( AffNotSupported, var );
+                }
+                __kmp_affinity_type = affinity_disabled;
+                __kmp_affinity_respect_mask = 0;
+                __kmp_affinity_gran = affinity_gran_fine;
+            }
+        }
+
+# if OMP_40_ENABLED
+        if ( __kmp_affinity_type == affinity_disabled )  {
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+        }
+        else if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_true ) {
+            //
+            // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread.
+            //
+            __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread;
+        }
+# endif /* OMP_40_ENABLED */
+
+        if ( KMP_AFFINITY_CAPABLE() ) {
+
+# if KMP_GROUP_AFFINITY
+
+            //
+            // Handle the Win 64 group affinity stuff if there are multiple
+            // processor groups, or if the user requested it, and OMP 4.0
+            // affinity is not in effect.
+            //
+            if ( ( ( __kmp_num_proc_groups > 1 )
+              && ( __kmp_affinity_type == affinity_default )
+#  if OMP_40_ENABLED
+              && ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) )
+#  endif
+              || ( __kmp_affinity_top_method == affinity_top_method_group ) ) {
+                if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) {
+                    __kmp_affinity_respect_mask = FALSE;
+                }
+                if ( __kmp_affinity_type == affinity_default ) {
+                    __kmp_affinity_type = affinity_compact;
+#  if OMP_40_ENABLED
+                    __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+#  endif
+                }
+                if ( __kmp_affinity_top_method == affinity_top_method_default ) {
+                    if ( __kmp_affinity_gran == affinity_gran_default ) {
+                        __kmp_affinity_top_method = affinity_top_method_group;
+                        __kmp_affinity_gran = affinity_gran_group;
+                    }
+                    else if ( __kmp_affinity_gran == affinity_gran_group ) {
+                        __kmp_affinity_top_method = affinity_top_method_group;
+                    }
+                    else {
+                        __kmp_affinity_top_method = affinity_top_method_all;
+                    }
+                }
+                else if ( __kmp_affinity_top_method == affinity_top_method_group ) {
+                    if ( __kmp_affinity_gran == affinity_gran_default ) {
+                        __kmp_affinity_gran = affinity_gran_group;
+                    }
+                    else if ( ( __kmp_affinity_gran != affinity_gran_group )
+                      && ( __kmp_affinity_gran != affinity_gran_fine )
+                      && ( __kmp_affinity_gran != affinity_gran_thread ) ) {
+                        char *str = NULL;
+                        switch ( __kmp_affinity_gran ) {
+                            case affinity_gran_core: str = "core"; break;
+                            case affinity_gran_package: str = "package"; break;
+                            case affinity_gran_node: str = "node"; break;
+                            default: KMP_DEBUG_ASSERT( 0 );
+                        }
+                        KMP_WARNING( AffGranTopGroup, var, str );
+                        __kmp_affinity_gran = affinity_gran_fine;
+                    }
+                }
+                else {
+                    if ( __kmp_affinity_gran == affinity_gran_default ) {
+                        __kmp_affinity_gran = affinity_gran_core;
+                    }
+                    else if ( __kmp_affinity_gran == affinity_gran_group ) {
+                        char *str = NULL;
+                        switch ( __kmp_affinity_type ) {
+                            case affinity_physical: str = "physical"; break;
+                            case affinity_logical: str = "logical"; break;
+                            case affinity_compact: str = "compact"; break;
+                            case affinity_scatter: str = "scatter"; break;
+                            case affinity_explicit: str = "explicit"; break;
+                            // No MIC on windows, so no affinity_balanced case
+                            default: KMP_DEBUG_ASSERT( 0 );
+                        }
+                        KMP_WARNING( AffGranGroupType, var, str );
+                        __kmp_affinity_gran = affinity_gran_core;
+                    }
+                }
+            }
+            else
+
+# endif /* KMP_GROUP_AFFINITY */
+
+            {
+                if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) {
+# if KMP_GROUP_AFFINITY
+                    if ( __kmp_num_proc_groups > 1 ) {
+                        __kmp_affinity_respect_mask = FALSE;
+                    }
+                    else
+# endif /* KMP_GROUP_AFFINITY */
+                    {
+                        __kmp_affinity_respect_mask = TRUE;
+                    }
+                }
+# if OMP_40_ENABLED
+                if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
+                  && ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ) ) {
+                    if ( __kmp_affinity_type == affinity_default ) {
+                        __kmp_affinity_type = affinity_compact;
+                        __kmp_affinity_dups = FALSE;
+                    }
+                }
+                else
+# endif /* OMP_40_ENABLED */
+                if ( __kmp_affinity_type == affinity_default ) {
+#if OMP_40_ENABLED
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                    if( __kmp_mic_type != non_mic ) {
+                        __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+                    } else
+#endif
+                    {
+                        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+                    }
+#endif /* OMP_40_ENABLED */
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                    if( __kmp_mic_type != non_mic ) {
+                        __kmp_affinity_type = affinity_scatter;
+                    } else
+#endif
+                    {
+                        __kmp_affinity_type = affinity_none;
+                    }
+
+                }
+                if ( ( __kmp_affinity_gran == affinity_gran_default )
+                  &&  ( __kmp_affinity_gran_levels < 0 ) ) {
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                    if( __kmp_mic_type != non_mic ) {
+                        __kmp_affinity_gran = affinity_gran_fine;
+                    } else
+#endif
+                    {
+                        __kmp_affinity_gran = affinity_gran_core;
+                    }
+                }
+                if ( __kmp_affinity_top_method == affinity_top_method_default ) {
+                    __kmp_affinity_top_method = affinity_top_method_all;
+                }
+            }
+        }
+
+        K_DIAG( 1, ( "__kmp_affinity_type         == %d\n", __kmp_affinity_type         ) );
+        K_DIAG( 1, ( "__kmp_affinity_compact      == %d\n", __kmp_affinity_compact      ) );
+        K_DIAG( 1, ( "__kmp_affinity_offset       == %d\n", __kmp_affinity_offset       ) );
+        K_DIAG( 1, ( "__kmp_affinity_verbose      == %d\n", __kmp_affinity_verbose      ) );
+        K_DIAG( 1, ( "__kmp_affinity_warnings     == %d\n", __kmp_affinity_warnings     ) );
+        K_DIAG( 1, ( "__kmp_affinity_respect_mask == %d\n", __kmp_affinity_respect_mask ) );
+        K_DIAG( 1, ( "__kmp_affinity_gran         == %d\n", __kmp_affinity_gran         ) );
+
+        KMP_DEBUG_ASSERT( __kmp_affinity_type != affinity_default);
+# if OMP_40_ENABLED
+        KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default );
+# endif
+    }
+
+#endif /* KMP_AFFINITY_SUPPORTED */
+
+    if ( __kmp_version ) {
+        __kmp_print_version_1();
+    }; // if
+
+    // Post-initialization step: some env. vars need their value's further processing
+    if ( string != NULL) { // kmp_set_defaults() was called
+        __kmp_aux_env_initialize( &block );
+    }
+
+    __kmp_env_blk_free( & block );
+
+    KMP_MB();
+
+} // __kmp_env_initialize
+
+
+void
+__kmp_env_print() {
+
+    kmp_env_blk_t block;
+    int           i;
+    kmp_str_buf_t buffer;
+
+    __kmp_stg_init();
+    __kmp_str_buf_init( & buffer );
+
+    __kmp_env_blk_init( & block, NULL );
+    __kmp_env_blk_sort( & block );
+
+    // Print real environment values.
+    __kmp_str_buf_print( & buffer, "\n%s\n\n", KMP_I18N_STR( UserSettings )  );
+    for ( i = 0; i < block.count; ++ i ) {
+        char const * name  = block.vars[ i ].name;
+        char const * value = block.vars[ i ].value;
+        if (
+            ( KMP_STRLEN( name ) > 4 && strncmp( name, "KMP_", 4 ) == 0 )
+            || strncmp( name, "OMP_", 4 ) == 0
+            #ifdef KMP_GOMP_COMPAT
+                || strncmp( name, "GOMP_", 5 ) == 0
+            #endif // KMP_GOMP_COMPAT
+        ) {
+            __kmp_str_buf_print( & buffer, "   %s=%s\n", name, value );
+        }; // if
+    }; // for
+    __kmp_str_buf_print( & buffer, "\n" );
+
+    // Print internal (effective) settings.
+    __kmp_str_buf_print( & buffer, "%s\n\n", KMP_I18N_STR( EffectiveSettings ) );
+    for ( int i = 0; i < __kmp_stg_count; ++ i ) {
+        if (  __kmp_stg_table[ i ].print != NULL ) {
+            __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data );
+        }; // if
+    }; // for
+
+    __kmp_printf( "%s", buffer.str );
+
+    __kmp_env_blk_free( & block );
+    __kmp_str_buf_free( & buffer );
+
+    __kmp_printf("\n");
+
+} // __kmp_env_print
+
+
+#if OMP_40_ENABLED
+void
+__kmp_env_print_2() {
+
+    kmp_env_blk_t block;
+    kmp_str_buf_t buffer;
+
+    __kmp_env_format = 1;
+
+    __kmp_stg_init();
+    __kmp_str_buf_init( & buffer );
+
+    __kmp_env_blk_init( & block, NULL );
+    __kmp_env_blk_sort( & block );
+
+    __kmp_str_buf_print( & buffer, "\n%s\n", KMP_I18N_STR( DisplayEnvBegin )  );
+    __kmp_str_buf_print( & buffer, "   _OPENMP='%d'\n", __kmp_openmp_version );
+
+    for ( int i = 0; i < __kmp_stg_count; ++ i ) {
+        if (  __kmp_stg_table[ i ].print != NULL &&
+              ( ( __kmp_display_env && strncmp( __kmp_stg_table[ i ].name, "OMP_", 4 ) == 0 ) || __kmp_display_env_verbose ) ) {
+            __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data );
+        }; // if
+    }; // for
+
+    __kmp_str_buf_print( & buffer, "%s\n", KMP_I18N_STR( DisplayEnvEnd )  );
+    __kmp_str_buf_print( & buffer, "\n" );
+
+    __kmp_printf( "%s", buffer.str );
+
+    __kmp_env_blk_free( & block );
+    __kmp_str_buf_free( & buffer );
+
+    __kmp_printf("\n");
+
+} // __kmp_env_print_2
+#endif // OMP_40_ENABLED
+
+// end of file
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_settings.h b/contrib/libs/cxxsupp/openmp/kmp_settings.h
index ff355d7c3e..7232e61975 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_settings.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_settings.h
@@ -1,50 +1,50 @@
-/* 
- * kmp_settings.h -- Initialize environment variables 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_SETTINGS_H 
-#define KMP_SETTINGS_H 
- 
-void __kmp_reset_global_vars( void ); 
-void __kmp_env_initialize( char const * ); 
-void __kmp_env_print(); 
-#if OMP_40_ENABLED 
-void __kmp_env_print_2(); 
-#endif // OMP_40_ENABLED 
- 
-int __kmp_initial_threads_capacity( int req_nproc ); 
-void __kmp_init_dflt_team_nth(); 
-int __kmp_convert_to_milliseconds( char const * ); 
-int __kmp_default_tp_capacity( int, int, int); 
- 
-#if KMP_MIC 
-#define KMP_STR_BUF_PRINT_NAME          __kmp_str_buf_print( buffer, "  %s %s", KMP_I18N_STR(Device), name ) 
-#define KMP_STR_BUF_PRINT_NAME_EX(x)    __kmp_str_buf_print( buffer, "  %s %s='", KMP_I18N_STR(Device), x ) 
-#define KMP_STR_BUF_PRINT_BOOL          __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Device), name, value ? "TRUE" : "FALSE" ); 
-#define KMP_STR_BUF_PRINT_INT           __kmp_str_buf_print( buffer, "  %s %s='%d'\n", KMP_I18N_STR(Device), name, value ) 
-#define KMP_STR_BUF_PRINT_UINT64        __kmp_str_buf_print( buffer, "  %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Device), name, value ); 
-#define KMP_STR_BUF_PRINT_STR           __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Device), name, value ) 
-#else 
-#define KMP_STR_BUF_PRINT_NAME          __kmp_str_buf_print( buffer, "  %s %s", KMP_I18N_STR(Host), name ) 
-#define KMP_STR_BUF_PRINT_NAME_EX(x)    __kmp_str_buf_print( buffer, "  %s %s='", KMP_I18N_STR(Host), x ) 
-#define KMP_STR_BUF_PRINT_BOOL          __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Host), name, value ? "TRUE" : "FALSE" ); 
-#define KMP_STR_BUF_PRINT_INT           __kmp_str_buf_print( buffer, "  %s %s='%d'\n", KMP_I18N_STR(Host), name, value ) 
-#define KMP_STR_BUF_PRINT_UINT64        __kmp_str_buf_print( buffer, "  %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Host), name, value ); 
-#define KMP_STR_BUF_PRINT_STR           __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Host), name, value ) 
-#endif 
- 
-#endif // KMP_SETTINGS_H 
- 
-// end of file // 
- 
+/*
+ * kmp_settings.h -- Initialize environment variables
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_SETTINGS_H
+#define KMP_SETTINGS_H
+
+void __kmp_reset_global_vars( void );
+void __kmp_env_initialize( char const * );
+void __kmp_env_print();
+#if OMP_40_ENABLED
+void __kmp_env_print_2();
+#endif // OMP_40_ENABLED
+
+int __kmp_initial_threads_capacity( int req_nproc );
+void __kmp_init_dflt_team_nth();
+int __kmp_convert_to_milliseconds( char const * );
+int __kmp_default_tp_capacity( int, int, int);
+
+#if KMP_MIC
+#define KMP_STR_BUF_PRINT_NAME          __kmp_str_buf_print( buffer, "  %s %s", KMP_I18N_STR(Device), name )
+#define KMP_STR_BUF_PRINT_NAME_EX(x)    __kmp_str_buf_print( buffer, "  %s %s='", KMP_I18N_STR(Device), x )
+#define KMP_STR_BUF_PRINT_BOOL          __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Device), name, value ? "TRUE" : "FALSE" );
+#define KMP_STR_BUF_PRINT_INT           __kmp_str_buf_print( buffer, "  %s %s='%d'\n", KMP_I18N_STR(Device), name, value )
+#define KMP_STR_BUF_PRINT_UINT64        __kmp_str_buf_print( buffer, "  %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Device), name, value );
+#define KMP_STR_BUF_PRINT_STR           __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Device), name, value )
+#else
+#define KMP_STR_BUF_PRINT_NAME          __kmp_str_buf_print( buffer, "  %s %s", KMP_I18N_STR(Host), name )
+#define KMP_STR_BUF_PRINT_NAME_EX(x)    __kmp_str_buf_print( buffer, "  %s %s='", KMP_I18N_STR(Host), x )
+#define KMP_STR_BUF_PRINT_BOOL          __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Host), name, value ? "TRUE" : "FALSE" );
+#define KMP_STR_BUF_PRINT_INT           __kmp_str_buf_print( buffer, "  %s %s='%d'\n", KMP_I18N_STR(Host), name, value )
+#define KMP_STR_BUF_PRINT_UINT64        __kmp_str_buf_print( buffer, "  %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Host), name, value );
+#define KMP_STR_BUF_PRINT_STR           __kmp_str_buf_print( buffer, "  %s %s='%s'\n", KMP_I18N_STR(Host), name, value )
+#endif
+
+#endif // KMP_SETTINGS_H
+
+// end of file //
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats.cpp b/contrib/libs/cxxsupp/openmp/kmp_stats.cpp
index c2b8c8c489..d1f43afe4a 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_stats.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_stats.cpp
@@ -1,609 +1,609 @@
-/** @file kmp_stats.cpp 
- * Statistics gathering and processing. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "kmp.h" 
-#include "kmp_str.h" 
-#include "kmp_lock.h" 
-#include "kmp_stats.h" 
- 
-#include <algorithm> 
-#include <sstream> 
-#include <iomanip> 
-#include <stdlib.h>                             // for atexit 
- 
-#define STRINGIZE2(x) #x 
-#define STRINGIZE(x) STRINGIZE2(x) 
- 
-#define expandName(name,flags,ignore)  {STRINGIZE(name),flags}, 
-statInfo timeStat::timerInfo[] = { 
-    KMP_FOREACH_TIMER(expandName,0) 
-    {0,0} 
-}; 
-const statInfo counter::counterInfo[] = { 
-    KMP_FOREACH_COUNTER(expandName,0) 
-    {0,0} 
-}; 
-#undef expandName 
- 
-#define expandName(ignore1,ignore2,ignore3)  {0.0,0.0,0.0}, 
-kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { 
-    KMP_FOREACH_TIMER(expandName,0) 
-    {0.0,0.0,0.0} 
-}; 
-#undef expandName 
- 
-const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = { 
-    {1.0, 0.0, 0.0}, // red 
-    {1.0, 0.6, 0.0}, // orange 
-    {1.0, 1.0, 0.0}, // yellow 
-    {0.0, 1.0, 0.0}, // green  
-    {0.0, 0.0, 1.0}, // blue 
-    {0.6, 0.2, 0.8}, // purple 
-    {1.0, 0.0, 1.0}, // magenta 
-    {0.0, 0.4, 0.2}, // dark green 
-    {1.0, 1.0, 0.6}, // light yellow 
-    {0.6, 0.4, 0.6}, // dirty purple 
-    {0.0, 1.0, 1.0}, // cyan 
-    {1.0, 0.4, 0.8}, // pink 
-    {0.5, 0.5, 0.5}, // grey 
-    {0.8, 0.7, 0.5}, // brown 
-    {0.6, 0.6, 1.0}, // light blue 
-    {1.0, 0.7, 0.5}, // peach 
-    {0.8, 0.5, 1.0}, // lavender 
-    {0.6, 0.0, 0.0}, // dark red 
-    {0.7, 0.6, 0.0}, // gold 
-    {0.0, 0.0, 0.0}  // black 
-}; 
- 
-// Ensure that the atexit handler only runs once. 
-static uint32_t statsPrinted = 0; 
- 
-// output interface 
-static kmp_stats_output_module __kmp_stats_global_output; 
- 
-/* ****************************************************** */ 
-/* ************* statistic member functions ************* */ 
- 
-void statistic::addSample(double sample) 
-{ 
-    double delta = sample - meanVal; 
- 
-    sampleCount = sampleCount + 1; 
-    meanVal     = meanVal + delta/sampleCount; 
-    m2          = m2 + delta*(sample - meanVal); 
- 
-    minVal = std::min(minVal, sample); 
-    maxVal = std::max(maxVal, sample); 
-} 
- 
-statistic & statistic::operator+= (const statistic & other) 
-{ 
-    if (sampleCount == 0) 
-    { 
-        *this = other; 
-        return *this; 
-    } 
- 
-    uint64_t newSampleCount = sampleCount + other.sampleCount; 
-    double dnsc  = double(newSampleCount); 
-    double dsc   = double(sampleCount); 
-    double dscBydnsc = dsc/dnsc; 
-    double dosc  = double(other.sampleCount); 
-    double delta = other.meanVal - meanVal; 
- 
-    // Try to order these calculations to avoid overflows. 
-    // If this were Fortran, then the compiler would not be able to re-order over brackets. 
-    // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition 
-    // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation 
-    // really is associative (which floating addition isn't...)). 
-    meanVal     = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc); 
-    m2          = m2 + other.m2 + dscBydnsc*dosc*delta*delta; 
-    minVal      = std::min (minVal, other.minVal); 
-    maxVal      = std::max (maxVal, other.maxVal); 
-    sampleCount = newSampleCount; 
- 
- 
-    return *this; 
-} 
- 
-void statistic::scale(double factor) 
-{ 
-    minVal = minVal*factor; 
-    maxVal = maxVal*factor; 
-    meanVal= meanVal*factor; 
-    m2     = m2*factor*factor; 
-    return; 
-} 
- 
-std::string statistic::format(char unit, bool total) const 
-{ 
-    std::string result = formatSI(sampleCount,9,' '); 
- 
-    result = result + std::string(", ") + formatSI(minVal,  9, unit); 
-    result = result + std::string(", ") + formatSI(meanVal, 9, unit); 
-    result = result + std::string(", ") + formatSI(maxVal,  9, unit); 
-    if (total) 
-        result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit); 
-    result = result + std::string(", ") + formatSI(getSD(), 9, unit); 
- 
-    return result; 
-} 
- 
-/* ********************************************************** */ 
-/* ************* explicitTimer member functions ************* */ 
- 
-void explicitTimer::start(timer_e timerEnumValue) {  
-    startTime = tsc_tick_count::now();  
-    if(timeStat::logEvent(timerEnumValue)) { 
-        __kmp_stats_thread_ptr->incrementNestValue(); 
-    } 
-    return; 
-} 
- 
-void explicitTimer::stop(timer_e timerEnumValue) { 
-    if (startTime.getValue() == 0) 
-        return; 
- 
-    tsc_tick_count finishTime = tsc_tick_count::now(); 
- 
-    //stat->addSample ((tsc_tick_count::now() - startTime).ticks()); 
-    stat->addSample ((finishTime - startTime).ticks()); 
- 
-    if(timeStat::logEvent(timerEnumValue)) { 
-        __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);  
-        __kmp_stats_thread_ptr->decrementNestValue(); 
-    } 
- 
-    /* We accept the risk that we drop a sample because it really did start at t==0. */ 
-    startTime = 0;  
-    return; 
-} 
- 
-/* ******************************************************************* */ 
-/* ************* kmp_stats_event_vector member functions ************* */ 
- 
-void kmp_stats_event_vector::deallocate() { 
-    __kmp_free(events); 
-    internal_size = 0; 
-    allocated_size = 0; 
-    events = NULL; 
-} 
- 
-// This function is for qsort() which requires the compare function to return 
-// either a negative number if event1 < event2, a positive number if event1 > event2 
-// or zero if event1 == event2.   
-// This sorts by start time (lowest to highest). 
-int compare_two_events(const void* event1, const void* event2) { 
-    kmp_stats_event* ev1 = (kmp_stats_event*)event1; 
-    kmp_stats_event* ev2 = (kmp_stats_event*)event2; 
- 
-    if(ev1->getStart() < ev2->getStart()) return -1; 
-    else if(ev1->getStart() > ev2->getStart()) return 1; 
-    else return 0; 
-} 
- 
-void kmp_stats_event_vector::sort() { 
-    qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); 
-} 
- 
-/* *********************************************************** */ 
-/* ************* kmp_stats_list member functions ************* */ 
- 
-// returns a pointer to newly created stats node 
-kmp_stats_list* kmp_stats_list::push_back(int gtid) {  
-    kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list)); 
-    // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used) 
-    new (newnode) kmp_stats_list(); 
-    newnode->setGtid(gtid); 
-    newnode->prev = this->prev; 
-    newnode->next = this; 
-    newnode->prev->next = newnode; 
-    newnode->next->prev = newnode; 
-    return newnode; 
-} 
-void kmp_stats_list::deallocate() { 
-    kmp_stats_list* ptr = this->next; 
-    kmp_stats_list* delptr = this->next; 
-    while(ptr != this) { 
-        delptr = ptr; 
-        ptr=ptr->next; 
-        // placement new means we have to explicitly call destructor. 
-        delptr->_event_vector.deallocate(); 
-        delptr->~kmp_stats_list(); 
-        __kmp_free(delptr); 
-    } 
-} 
-kmp_stats_list::iterator kmp_stats_list::begin() { 
-    kmp_stats_list::iterator it; 
-    it.ptr = this->next; 
-    return it; 
-} 
-kmp_stats_list::iterator kmp_stats_list::end() { 
-    kmp_stats_list::iterator it; 
-    it.ptr = this; 
-    return it; 
-} 
-int kmp_stats_list::size() { 
-    int retval; 
-    kmp_stats_list::iterator it; 
-    for(retval=0, it=begin(); it!=end(); it++, retval++) {} 
-    return retval; 
-} 
- 
-/* ********************************************************************* */ 
-/* ************* kmp_stats_list::iterator member functions ************* */ 
- 
-kmp_stats_list::iterator::iterator() : ptr(NULL) {}  
-kmp_stats_list::iterator::~iterator() {} 
-kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { 
-    this->ptr = this->ptr->next; 
-    return *this; 
-} 
-kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { 
-    this->ptr = this->ptr->next; 
-    return *this; 
-} 
-kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { 
-    this->ptr = this->ptr->prev; 
-    return *this; 
-} 
-kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { 
-    this->ptr = this->ptr->prev; 
-    return *this; 
-} 
-bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) { 
-   return this->ptr!=rhs.ptr;  
-} 
-bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) { 
-   return this->ptr==rhs.ptr;  
-} 
-kmp_stats_list* kmp_stats_list::iterator::operator*() const { 
-    return this->ptr; 
-} 
- 
-/* *************************************************************** */ 
-/* *************  kmp_stats_output_module functions ************** */ 
- 
-const char* kmp_stats_output_module::outputFileName = NULL; 
-const char* kmp_stats_output_module::eventsFileName = NULL; 
-const char* kmp_stats_output_module::plotFileName   = NULL; 
-int kmp_stats_output_module::printPerThreadFlag       = 0; 
-int kmp_stats_output_module::printPerThreadEventsFlag = 0; 
- 
-// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output 
-void kmp_stats_output_module::init()  
-{ 
-    char * statsFileName  = getenv("KMP_STATS_FILE"); 
-    eventsFileName        = getenv("KMP_STATS_EVENTS_FILE"); 
-    plotFileName          = getenv("KMP_STATS_PLOT_FILE"); 
-    char * threadStats    = getenv("KMP_STATS_THREADS"); 
-    char * threadEvents   = getenv("KMP_STATS_EVENTS"); 
- 
-    // set the stats output filenames based on environment variables and defaults 
-    outputFileName = statsFileName; 
-    eventsFileName = eventsFileName ? eventsFileName : "events.dat"; 
-    plotFileName   = plotFileName   ? plotFileName   : "events.plt"; 
- 
-    // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes 
-    printPerThreadFlag        = __kmp_str_match_true(threadStats); 
-    printPerThreadEventsFlag  = __kmp_str_match_true(threadEvents); 
- 
-    if(printPerThreadEventsFlag) { 
-        // assigns a color to each timer for printing 
-        setupEventColors(); 
-    } else { 
-        // will clear flag so that no event will be logged 
-        timeStat::clearEventFlags(); 
-    } 
- 
-    return; 
-} 
- 
-void kmp_stats_output_module::setupEventColors() { 
-    int i; 
-    int globalColorIndex = 0; 
-    int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); 
-    for(i=0;i<TIMER_LAST;i++) { 
-        if(timeStat::logEvent((timer_e)i)) { 
-            timerColorInfo[i] = globalColorArray[globalColorIndex]; 
-            globalColorIndex = (globalColorIndex+1)%numGlobalColors; 
-        } 
-    } 
-    return; 
-} 
- 
-void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers) 
-{ 
-    if (areTimers) 
-    { 
-        // Check if we have useful timers, since we don't print zero value timers we need to avoid 
-        // printing a header and then no data. 
-        bool haveTimers = false; 
-        for (int s = 0; s<TIMER_LAST; s++) 
-        { 
-            if (theStats[s].getCount() != 0) 
-            { 
-                haveTimers = true; 
-                break; 
-            } 
-        } 
-        if (!haveTimers) 
-            return; 
-    } 
- 
-    // Print 
-    const char * title = areTimers ? "Timer,                   SampleCount," : "Counter,                 ThreadCount,"; 
-    fprintf (statsOut, "%s    Min,      Mean,       Max,     Total,        SD\n", title);     
-    if (areTimers) { 
-        for (int s = 0; s<TIMER_LAST; s++) { 
-            statistic const * stat = &theStats[s]; 
-            if (stat->getCount() != 0) { 
-                char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; 
-                fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); 
-            } 
-        } 
-    } else {   // Counters 
-        for (int s = 0; s<COUNTER_LAST; s++) { 
-            statistic const * stat = &theStats[s]; 
-            fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str()); 
-        } 
-    } 
-}  
- 
-void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) 
-{ 
-    // We print all the counters even if they are zero. 
-    // That makes it easier to slice them into a spreadsheet if you need to. 
-    fprintf (statsOut, "\nCounter,                    Count\n"); 
-    for (int c = 0; c<COUNTER_LAST; c++) { 
-        counter const * stat = &theCounters[c]; 
-        fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str()); 
-    } 
-} 
- 
-void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) { 
-    // sort by start time before printing 
-    theEvents->sort(); 
-    for (int i = 0; i < theEvents->size(); i++) { 
-        kmp_stats_event ev = theEvents->at(i); 
-        rgb_color color = getEventColor(ev.getTimerName()); 
-        fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n",  
-                gtid,  
-                ev.getStart(),  
-                ev.getStop(),  
-                1.2 - (ev.getNestLevel() * 0.2), 
-                color.r, color.g, color.b, 
-                timeStat::name(ev.getTimerName()) 
-               ); 
-    } 
-    return; 
-} 
- 
-void kmp_stats_output_module::windupExplicitTimers() 
-{ 
-    // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads  
-    // and say "it's over". 
-    // If the timer wasn't running, this won't record anything anyway. 
-    kmp_stats_list::iterator it; 
-    for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 
-        for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) { 
-            (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); 
-        } 
-    } 
-} 
- 
-void kmp_stats_output_module::printPloticusFile() { 
-    int i; 
-    int size = __kmp_stats_list.size(); 
-    FILE* plotOut = fopen(plotFileName, "w+"); 
- 
-    fprintf(plotOut, "#proc page\n" 
-                     "   pagesize: 15 10\n" 
-                     "   scale: 1.0\n\n"); 
- 
-    fprintf(plotOut, "#proc getdata\n" 
-                     "   file: %s\n\n",  
-                     eventsFileName); 
- 
-    fprintf(plotOut, "#proc areadef\n" 
-                     "   title: OpenMP Sampling Timeline\n" 
-                     "   titledetails: align=center size=16\n" 
-                     "   rectangle: 1 1 13 9\n" 
-                     "   xautorange: datafield=2,3\n" 
-                     "   yautorange: -1 %d\n\n",  
-                     size); 
- 
-    fprintf(plotOut, "#proc xaxis\n" 
-                     "   stubs: inc\n" 
-                     "   stubdetails: size=12\n" 
-                     "   label: Time (ticks)\n" 
-                     "   labeldetails: size=14\n\n"); 
- 
-    fprintf(plotOut, "#proc yaxis\n" 
-                     "   stubs: inc 1\n" 
-                     "   stubrange: 0 %d\n" 
-                     "   stubdetails: size=12\n" 
-                     "   label: Thread #\n" 
-                     "   labeldetails: size=14\n\n",  
-                     size-1); 
- 
-    fprintf(plotOut, "#proc bars\n" 
-                     "   exactcolorfield: 5\n" 
-                     "   axis: x\n" 
-                     "   locfield: 1\n" 
-                     "   segmentfields: 2 3\n" 
-                     "   barwidthfield: 4\n\n"); 
- 
-    // create legend entries corresponding to the timer color 
-    for(i=0;i<TIMER_LAST;i++) { 
-        if(timeStat::logEvent((timer_e)i)) { 
-            rgb_color c = getEventColor((timer_e)i); 
-            fprintf(plotOut, "#proc legendentry\n" 
-                             "   sampletype: color\n" 
-                             "   label: %s\n" 
-                             "   details: rgb(%1.1f,%1.1f,%1.1f)\n\n", 
-                             timeStat::name((timer_e)i), 
-                             c.r, c.g, c.b); 
- 
-        } 
-    } 
- 
-    fprintf(plotOut, "#proc legend\n" 
-                     "   format: down\n" 
-                     "   location: max max\n\n"); 
-    fclose(plotOut); 
-    return; 
-} 
- 
-void kmp_stats_output_module::outputStats(const char* heading)  
-{ 
-    statistic allStats[TIMER_LAST]; 
-    statistic allCounters[COUNTER_LAST]; 
- 
-    // stop all the explicit timers for all threads 
-    windupExplicitTimers(); 
- 
-    FILE * eventsOut; 
-    FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr; 
- 
-    if (eventPrintingEnabled()) { 
-        eventsOut = fopen(eventsFileName, "w+"); 
-    } 
- 
-    if (!statsOut) 
-        statsOut = stderr; 
- 
-    fprintf(statsOut, "%s\n",heading); 
-    // Accumulate across threads. 
-    kmp_stats_list::iterator it; 
-    for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 
-        int t = (*it)->getGtid(); 
-        // Output per thread stats if requested. 
-        if (perThreadPrintingEnabled()) { 
-            fprintf (statsOut, "Thread %d\n", t); 
-            printStats(statsOut, (*it)->getTimers(), true); 
-            printCounters(statsOut, (*it)->getCounters()); 
-            fprintf(statsOut,"\n"); 
-        } 
-        // Output per thread events if requested. 
-        if (eventPrintingEnabled()) { 
-            kmp_stats_event_vector events = (*it)->getEventVector(); 
-            printEvents(eventsOut, &events, t); 
-        } 
- 
-        for (int s = 0; s<TIMER_LAST; s++) { 
-            // See if we should ignore this timer when aggregating 
-            if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker 
-                (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master 
-                timeStat::synthesized(timer_e(s))                 // It's a synthesized stat, so there's no raw data for it. 
-               )             
-            { 
-                continue; 
-            } 
- 
-            statistic * threadStat = (*it)->getTimer(timer_e(s)); 
-            allStats[s] += *threadStat; 
-        } 
- 
-        // Special handling for synthesized statistics. 
-        // These just have to be coded specially here for now.  
-        // At present we only have a few:  
-        // The total parallel work done in each thread. 
-        // The variance here makes it easy to see load imbalance over the whole program (though, of course, 
-        // it's possible to have a code with awful load balance in every parallel region but perfect load 
-        // balance oever the whole program.) 
-        // The time spent in barriers in each thread. 
-        allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); 
- 
-        // Time in explicit barriers. 
-        allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); 
- 
-        for (int c = 0; c<COUNTER_LAST; c++) { 
-            if (counter::masterOnly(counter_e(c)) && t != 0) 
-                continue; 
-            allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue()); 
-        } 
-    } 
- 
-    if (eventPrintingEnabled()) { 
-        printPloticusFile(); 
-        fclose(eventsOut); 
-    } 
- 
-    fprintf (statsOut, "Aggregate for all threads\n"); 
-    printStats (statsOut, &allStats[0], true); 
-    fprintf (statsOut, "\n"); 
-    printStats (statsOut, &allCounters[0], false); 
- 
-    if (statsOut != stderr) 
-        fclose(statsOut); 
- 
-} 
- 
-/* ************************************************** */ 
-/* *************  exported C functions ************** */ 
- 
-// no name mangling for these functions, we want the c files to be able to get at these functions 
-extern "C" { 
- 
-void __kmp_reset_stats() 
-{ 
-    kmp_stats_list::iterator it; 
-    for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 
-        timeStat * timers     = (*it)->getTimers(); 
-        counter * counters    = (*it)->getCounters(); 
-        explicitTimer * eTimers = (*it)->getExplicitTimers(); 
- 
-        for (int t = 0; t<TIMER_LAST; t++) 
-            timers[t].reset(); 
- 
-        for (int c = 0; c<COUNTER_LAST; c++) 
-            counters[c].reset(); 
- 
-        for (int t=0; t<EXPLICIT_TIMER_LAST; t++) 
-            eTimers[t].reset(); 
- 
-        // reset the event vector so all previous events are "erased" 
-        (*it)->resetEventVector(); 
- 
-        // May need to restart the explicit timers in thread zero? 
-    } 
-    KMP_START_EXPLICIT_TIMER(OMP_serial); 
-    KMP_START_EXPLICIT_TIMER(OMP_start_end); 
-} 
- 
-// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. 
-void __kmp_output_stats(const char * heading) 
-{ 
-    __kmp_stats_global_output.outputStats(heading); 
-    __kmp_reset_stats(); 
-} 
- 
-void __kmp_accumulate_stats_at_exit(void) 
-{ 
-    // Only do this once. 
-    if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) 
-        return; 
- 
-    __kmp_output_stats("Statistics on exit"); 
-    return; 
-} 
- 
-void __kmp_stats_init(void)  
-{ 
-    return; 
-} 
- 
-} // extern "C"  
- 
+/** @file kmp_stats.cpp
+ * Statistics gathering and processing.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "kmp.h"
+#include "kmp_str.h"
+#include "kmp_lock.h"
+#include "kmp_stats.h"
+
+#include <algorithm>
+#include <sstream>
+#include <iomanip>
+#include <stdlib.h>                             // for atexit
+
+#define STRINGIZE2(x) #x
+#define STRINGIZE(x) STRINGIZE2(x)
+
+#define expandName(name,flags,ignore)  {STRINGIZE(name),flags},
+statInfo timeStat::timerInfo[] = {
+    KMP_FOREACH_TIMER(expandName,0)
+    {0,0}
+};
+const statInfo counter::counterInfo[] = {
+    KMP_FOREACH_COUNTER(expandName,0)
+    {0,0}
+};
+#undef expandName
+
+#define expandName(ignore1,ignore2,ignore3)  {0.0,0.0,0.0},
+kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
+    KMP_FOREACH_TIMER(expandName,0)
+    {0.0,0.0,0.0}
+};
+#undef expandName
+
+const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = {
+    {1.0, 0.0, 0.0}, // red
+    {1.0, 0.6, 0.0}, // orange
+    {1.0, 1.0, 0.0}, // yellow
+    {0.0, 1.0, 0.0}, // green 
+    {0.0, 0.0, 1.0}, // blue
+    {0.6, 0.2, 0.8}, // purple
+    {1.0, 0.0, 1.0}, // magenta
+    {0.0, 0.4, 0.2}, // dark green
+    {1.0, 1.0, 0.6}, // light yellow
+    {0.6, 0.4, 0.6}, // dirty purple
+    {0.0, 1.0, 1.0}, // cyan
+    {1.0, 0.4, 0.8}, // pink
+    {0.5, 0.5, 0.5}, // grey
+    {0.8, 0.7, 0.5}, // brown
+    {0.6, 0.6, 1.0}, // light blue
+    {1.0, 0.7, 0.5}, // peach
+    {0.8, 0.5, 1.0}, // lavender
+    {0.6, 0.0, 0.0}, // dark red
+    {0.7, 0.6, 0.0}, // gold
+    {0.0, 0.0, 0.0}  // black
+};
+
+// Ensure that the atexit handler only runs once.
+static uint32_t statsPrinted = 0;
+
+// output interface
+static kmp_stats_output_module __kmp_stats_global_output;
+
+/* ****************************************************** */
+/* ************* statistic member functions ************* */
+
+void statistic::addSample(double sample)
+{
+    double delta = sample - meanVal;
+
+    sampleCount = sampleCount + 1;
+    meanVal     = meanVal + delta/sampleCount;
+    m2          = m2 + delta*(sample - meanVal);
+
+    minVal = std::min(minVal, sample);
+    maxVal = std::max(maxVal, sample);
+}
+
+statistic & statistic::operator+= (const statistic & other)
+{
+    if (sampleCount == 0)
+    {
+        *this = other;
+        return *this;
+    }
+
+    uint64_t newSampleCount = sampleCount + other.sampleCount;
+    double dnsc  = double(newSampleCount);
+    double dsc   = double(sampleCount);
+    double dscBydnsc = dsc/dnsc;
+    double dosc  = double(other.sampleCount);
+    double delta = other.meanVal - meanVal;
+
+    // Try to order these calculations to avoid overflows.
+    // If this were Fortran, then the compiler would not be able to re-order over brackets.
+    // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition
+    // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation
+    // really is associative (which floating addition isn't...)).
+    meanVal     = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc);
+    m2          = m2 + other.m2 + dscBydnsc*dosc*delta*delta;
+    minVal      = std::min (minVal, other.minVal);
+    maxVal      = std::max (maxVal, other.maxVal);
+    sampleCount = newSampleCount;
+
+
+    return *this;
+}
+
+void statistic::scale(double factor)
+{
+    minVal = minVal*factor;
+    maxVal = maxVal*factor;
+    meanVal= meanVal*factor;
+    m2     = m2*factor*factor;
+    return;
+}
+
+std::string statistic::format(char unit, bool total) const
+{
+    std::string result = formatSI(sampleCount,9,' ');
+
+    result = result + std::string(", ") + formatSI(minVal,  9, unit);
+    result = result + std::string(", ") + formatSI(meanVal, 9, unit);
+    result = result + std::string(", ") + formatSI(maxVal,  9, unit);
+    if (total)
+        result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit);
+    result = result + std::string(", ") + formatSI(getSD(), 9, unit);
+
+    return result;
+}
+
+/* ********************************************************** */
+/* ************* explicitTimer member functions ************* */
+
+void explicitTimer::start(timer_e timerEnumValue) { 
+    startTime = tsc_tick_count::now(); 
+    if(timeStat::logEvent(timerEnumValue)) {
+        __kmp_stats_thread_ptr->incrementNestValue();
+    }
+    return;
+}
+
+void explicitTimer::stop(timer_e timerEnumValue) {
+    if (startTime.getValue() == 0)
+        return;
+
+    tsc_tick_count finishTime = tsc_tick_count::now();
+
+    //stat->addSample ((tsc_tick_count::now() - startTime).ticks());
+    stat->addSample ((finishTime - startTime).ticks());
+
+    if(timeStat::logEvent(timerEnumValue)) {
+        __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); 
+        __kmp_stats_thread_ptr->decrementNestValue();
+    }
+
+    /* We accept the risk that we drop a sample because it really did start at t==0. */
+    startTime = 0; 
+    return;
+}
+
+/* ******************************************************************* */
+/* ************* kmp_stats_event_vector member functions ************* */
+
+void kmp_stats_event_vector::deallocate() {
+    __kmp_free(events);
+    internal_size = 0;
+    allocated_size = 0;
+    events = NULL;
+}
+
+// This function is for qsort() which requires the compare function to return
+// either a negative number if event1 < event2, a positive number if event1 > event2
+// or zero if event1 == event2.  
+// This sorts by start time (lowest to highest).
+int compare_two_events(const void* event1, const void* event2) {
+    kmp_stats_event* ev1 = (kmp_stats_event*)event1;
+    kmp_stats_event* ev2 = (kmp_stats_event*)event2;
+
+    if(ev1->getStart() < ev2->getStart()) return -1;
+    else if(ev1->getStart() > ev2->getStart()) return 1;
+    else return 0;
+}
+
+void kmp_stats_event_vector::sort() {
+    qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
+}
+
+/* *********************************************************** */
+/* ************* kmp_stats_list member functions ************* */
+
+// returns a pointer to newly created stats node
+kmp_stats_list* kmp_stats_list::push_back(int gtid) { 
+    kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list));
+    // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used)
+    new (newnode) kmp_stats_list();
+    newnode->setGtid(gtid);
+    newnode->prev = this->prev;
+    newnode->next = this;
+    newnode->prev->next = newnode;
+    newnode->next->prev = newnode;
+    return newnode;
+}
+void kmp_stats_list::deallocate() {
+    kmp_stats_list* ptr = this->next;
+    kmp_stats_list* delptr = this->next;
+    while(ptr != this) {
+        delptr = ptr;
+        ptr=ptr->next;
+        // placement new means we have to explicitly call destructor.
+        delptr->_event_vector.deallocate();
+        delptr->~kmp_stats_list();
+        __kmp_free(delptr);
+    }
+}
+kmp_stats_list::iterator kmp_stats_list::begin() {
+    kmp_stats_list::iterator it;
+    it.ptr = this->next;
+    return it;
+}
+kmp_stats_list::iterator kmp_stats_list::end() {
+    kmp_stats_list::iterator it;
+    it.ptr = this;
+    return it;
+}
+int kmp_stats_list::size() {
+    int retval;
+    kmp_stats_list::iterator it;
+    for(retval=0, it=begin(); it!=end(); it++, retval++) {}
+    return retval;
+}
+
+/* ********************************************************************* */
+/* ************* kmp_stats_list::iterator member functions ************* */
+
+kmp_stats_list::iterator::iterator() : ptr(NULL) {} 
+kmp_stats_list::iterator::~iterator() {}
+kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
+    this->ptr = this->ptr->next;
+    return *this;
+}
+kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
+    this->ptr = this->ptr->next;
+    return *this;
+}
+kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
+    this->ptr = this->ptr->prev;
+    return *this;
+}
+kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
+    this->ptr = this->ptr->prev;
+    return *this;
+}
+bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) {
+   return this->ptr!=rhs.ptr; 
+}
+bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) {
+   return this->ptr==rhs.ptr; 
+}
+kmp_stats_list* kmp_stats_list::iterator::operator*() const {
+    return this->ptr;
+}
+
+/* *************************************************************** */
+/* *************  kmp_stats_output_module functions ************** */
+
+const char* kmp_stats_output_module::outputFileName = NULL;
+const char* kmp_stats_output_module::eventsFileName = NULL;
+const char* kmp_stats_output_module::plotFileName   = NULL;
+int kmp_stats_output_module::printPerThreadFlag       = 0;
+int kmp_stats_output_module::printPerThreadEventsFlag = 0;
+
+// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output
+void kmp_stats_output_module::init() 
+{
+    char * statsFileName  = getenv("KMP_STATS_FILE");
+    eventsFileName        = getenv("KMP_STATS_EVENTS_FILE");
+    plotFileName          = getenv("KMP_STATS_PLOT_FILE");
+    char * threadStats    = getenv("KMP_STATS_THREADS");
+    char * threadEvents   = getenv("KMP_STATS_EVENTS");
+
+    // set the stats output filenames based on environment variables and defaults
+    outputFileName = statsFileName;
+    eventsFileName = eventsFileName ? eventsFileName : "events.dat";
+    plotFileName   = plotFileName   ? plotFileName   : "events.plt";
+
+    // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes
+    printPerThreadFlag        = __kmp_str_match_true(threadStats);
+    printPerThreadEventsFlag  = __kmp_str_match_true(threadEvents);
+
+    if(printPerThreadEventsFlag) {
+        // assigns a color to each timer for printing
+        setupEventColors();
+    } else {
+        // will clear flag so that no event will be logged
+        timeStat::clearEventFlags();
+    }
+
+    return;
+}
+
+void kmp_stats_output_module::setupEventColors() {
+    int i;
+    int globalColorIndex = 0;
+    int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
+    for(i=0;i<TIMER_LAST;i++) {
+        if(timeStat::logEvent((timer_e)i)) {
+            timerColorInfo[i] = globalColorArray[globalColorIndex];
+            globalColorIndex = (globalColorIndex+1)%numGlobalColors;
+        }
+    }
+    return;
+}
+
+void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers)
+{
+    if (areTimers)
+    {
+        // Check if we have useful timers, since we don't print zero value timers we need to avoid
+        // printing a header and then no data.
+        bool haveTimers = false;
+        for (int s = 0; s<TIMER_LAST; s++)
+        {
+            if (theStats[s].getCount() != 0)
+            {
+                haveTimers = true;
+                break;
+            }
+        }
+        if (!haveTimers)
+            return;
+    }
+
+    // Print
+    const char * title = areTimers ? "Timer,                   SampleCount," : "Counter,                 ThreadCount,";
+    fprintf (statsOut, "%s    Min,      Mean,       Max,     Total,        SD\n", title);    
+    if (areTimers) {
+        for (int s = 0; s<TIMER_LAST; s++) {
+            statistic const * stat = &theStats[s];
+            if (stat->getCount() != 0) {
+                char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T';
+                fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str());
+            }
+        }
+    } else {   // Counters
+        for (int s = 0; s<COUNTER_LAST; s++) {
+            statistic const * stat = &theStats[s];
+            fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str());
+        }
+    }
+} 
+
+void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters)
+{
+    // We print all the counters even if they are zero.
+    // That makes it easier to slice them into a spreadsheet if you need to.
+    fprintf (statsOut, "\nCounter,                    Count\n");
+    for (int c = 0; c<COUNTER_LAST; c++) {
+        counter const * stat = &theCounters[c];
+        fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str());
+    }
+}
+
+void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) {
+    // sort by start time before printing
+    theEvents->sort();
+    for (int i = 0; i < theEvents->size(); i++) {
+        kmp_stats_event ev = theEvents->at(i);
+        rgb_color color = getEventColor(ev.getTimerName());
+        fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", 
+                gtid, 
+                ev.getStart(), 
+                ev.getStop(), 
+                1.2 - (ev.getNestLevel() * 0.2),
+                color.r, color.g, color.b,
+                timeStat::name(ev.getTimerName())
+               );
+    }
+    return;
+}
+
+void kmp_stats_output_module::windupExplicitTimers()
+{
+    // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads 
+    // and say "it's over".
+    // If the timer wasn't running, this won't record anything anyway.
+    kmp_stats_list::iterator it;
+    for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+        for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
+            (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
+        }
+    }
+}
+
+void kmp_stats_output_module::printPloticusFile() {
+    int i;
+    int size = __kmp_stats_list.size();
+    FILE* plotOut = fopen(plotFileName, "w+");
+
+    fprintf(plotOut, "#proc page\n"
+                     "   pagesize: 15 10\n"
+                     "   scale: 1.0\n\n");
+
+    fprintf(plotOut, "#proc getdata\n"
+                     "   file: %s\n\n", 
+                     eventsFileName);
+
+    fprintf(plotOut, "#proc areadef\n"
+                     "   title: OpenMP Sampling Timeline\n"
+                     "   titledetails: align=center size=16\n"
+                     "   rectangle: 1 1 13 9\n"
+                     "   xautorange: datafield=2,3\n"
+                     "   yautorange: -1 %d\n\n", 
+                     size);
+
+    fprintf(plotOut, "#proc xaxis\n"
+                     "   stubs: inc\n"
+                     "   stubdetails: size=12\n"
+                     "   label: Time (ticks)\n"
+                     "   labeldetails: size=14\n\n");
+
+    fprintf(plotOut, "#proc yaxis\n"
+                     "   stubs: inc 1\n"
+                     "   stubrange: 0 %d\n"
+                     "   stubdetails: size=12\n"
+                     "   label: Thread #\n"
+                     "   labeldetails: size=14\n\n", 
+                     size-1);
+
+    fprintf(plotOut, "#proc bars\n"
+                     "   exactcolorfield: 5\n"
+                     "   axis: x\n"
+                     "   locfield: 1\n"
+                     "   segmentfields: 2 3\n"
+                     "   barwidthfield: 4\n\n");
+
+    // create legend entries corresponding to the timer color
+    for(i=0;i<TIMER_LAST;i++) {
+        if(timeStat::logEvent((timer_e)i)) {
+            rgb_color c = getEventColor((timer_e)i);
+            fprintf(plotOut, "#proc legendentry\n"
+                             "   sampletype: color\n"
+                             "   label: %s\n"
+                             "   details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
+                             timeStat::name((timer_e)i),
+                             c.r, c.g, c.b);
+
+        }
+    }
+
+    fprintf(plotOut, "#proc legend\n"
+                     "   format: down\n"
+                     "   location: max max\n\n");
+    fclose(plotOut);
+    return;
+}
+
+void kmp_stats_output_module::outputStats(const char* heading) 
+{
+    statistic allStats[TIMER_LAST];
+    statistic allCounters[COUNTER_LAST];
+
+    // stop all the explicit timers for all threads
+    windupExplicitTimers();
+
+    FILE * eventsOut;
+    FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr;
+
+    if (eventPrintingEnabled()) {
+        eventsOut = fopen(eventsFileName, "w+");
+    }
+
+    if (!statsOut)
+        statsOut = stderr;
+
+    fprintf(statsOut, "%s\n",heading);
+    // Accumulate across threads.
+    kmp_stats_list::iterator it;
+    for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+        int t = (*it)->getGtid();
+        // Output per thread stats if requested.
+        if (perThreadPrintingEnabled()) {
+            fprintf (statsOut, "Thread %d\n", t);
+            printStats(statsOut, (*it)->getTimers(), true);
+            printCounters(statsOut, (*it)->getCounters());
+            fprintf(statsOut,"\n");
+        }
+        // Output per thread events if requested.
+        if (eventPrintingEnabled()) {
+            kmp_stats_event_vector events = (*it)->getEventVector();
+            printEvents(eventsOut, &events, t);
+        }
+
+        for (int s = 0; s<TIMER_LAST; s++) {
+            // See if we should ignore this timer when aggregating
+            if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker
+                (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master
+                timeStat::synthesized(timer_e(s))                 // It's a synthesized stat, so there's no raw data for it.
+               )            
+            {
+                continue;
+            }
+
+            statistic * threadStat = (*it)->getTimer(timer_e(s));
+            allStats[s] += *threadStat;
+        }
+
+        // Special handling for synthesized statistics.
+        // These just have to be coded specially here for now. 
+        // At present we only have a few: 
+        // The total parallel work done in each thread.
+        // The variance here makes it easy to see load imbalance over the whole program (though, of course,
+        // it's possible to have a code with awful load balance in every parallel region but perfect load
+        // balance oever the whole program.)
+        // The time spent in barriers in each thread.
+        allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
+
+        // Time in explicit barriers.
+        allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
+
+        for (int c = 0; c<COUNTER_LAST; c++) {
+            if (counter::masterOnly(counter_e(c)) && t != 0)
+                continue;
+            allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue());
+        }
+    }
+
+    if (eventPrintingEnabled()) {
+        printPloticusFile();
+        fclose(eventsOut);
+    }
+
+    fprintf (statsOut, "Aggregate for all threads\n");
+    printStats (statsOut, &allStats[0], true);
+    fprintf (statsOut, "\n");
+    printStats (statsOut, &allCounters[0], false);
+
+    if (statsOut != stderr)
+        fclose(statsOut);
+
+}
+
+/* ************************************************** */
+/* *************  exported C functions ************** */
+
+// no name mangling for these functions, we want the c files to be able to get at these functions
+extern "C" {
+
+void __kmp_reset_stats()
+{
+    kmp_stats_list::iterator it;
+    for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+        timeStat * timers     = (*it)->getTimers();
+        counter * counters    = (*it)->getCounters();
+        explicitTimer * eTimers = (*it)->getExplicitTimers();
+
+        for (int t = 0; t<TIMER_LAST; t++)
+            timers[t].reset();
+
+        for (int c = 0; c<COUNTER_LAST; c++)
+            counters[c].reset();
+
+        for (int t=0; t<EXPLICIT_TIMER_LAST; t++)
+            eTimers[t].reset();
+
+        // reset the event vector so all previous events are "erased"
+        (*it)->resetEventVector();
+
+        // May need to restart the explicit timers in thread zero?
+    }
+    KMP_START_EXPLICIT_TIMER(OMP_serial);
+    KMP_START_EXPLICIT_TIMER(OMP_start_end);
+}
+
+// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
+void __kmp_output_stats(const char * heading)
+{
+    __kmp_stats_global_output.outputStats(heading);
+    __kmp_reset_stats();
+}
+
+void __kmp_accumulate_stats_at_exit(void)
+{
+    // Only do this once.
+    if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
+        return;
+
+    __kmp_output_stats("Statistics on exit");
+    return;
+}
+
+void __kmp_stats_init(void) 
+{
+    return;
+}
+
+} // extern "C" 
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats.h b/contrib/libs/cxxsupp/openmp/kmp_stats.h
index c52c964456..20cec3efcc 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_stats.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_stats.h
@@ -1,748 +1,748 @@
-#ifndef KMP_STATS_H 
-#define KMP_STATS_H 
- 
-/** @file kmp_stats.h 
- * Functions for collecting statistics. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "kmp_config.h" 
- 
-#if KMP_STATS_ENABLED 
-/* 
- * Statistics accumulator. 
- * Accumulates number of samples and computes min, max, mean, standard deviation on the fly. 
- * 
- * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm 
- */ 
- 
-#include <limits> 
-#include <math.h> 
-#include <string> 
-#include <stdint.h> 
-#include <new> // placement new 
-#include "kmp_stats_timing.h" 
- 
-/* 
- * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and 
- * are intended for the runtime library developer. 
- */ 
-// #define KMP_DEVELOPER_STATS 1 
- 
-/*! 
- * @ingroup STATS_GATHERING 
- * \brief flags to describe the statistic ( timers or counter ) 
- * 
-*/ 
-class stats_flags_e { 
-    public: 
-        const static int onlyInMaster = 1<<0; //!< statistic is valid only for master 
-        const static int noUnits      = 1<<1; //!< statistic doesn't need units printed next to it in output 
-        const static int synthesized  = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function 
-        const static int notInMaster  = 1<<3; //!< statistic is valid for non-master threads 
-        const static int logEvent     = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) 
-}; 
- 
-/*! 
- * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h 
- * 
- * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) 
- * @param arg a user defined argument to send to the user defined macro 
- * 
- * \details A counter counts the occurrence of some event. 
- * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread 
- * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement). 
- * The min,mean,max are therefore the values for the threads. 
- * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do. 
- * All of the tables and printing is generated from this macro. 
- * Format is "macro(name, flags, arg)" 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_FOREACH_COUNTER(macro, arg)                         \ 
-    macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg)      \ 
-    macro (OMP_NESTED_PARALLEL, 0, arg)                         \ 
-    macro (OMP_FOR_static, 0, arg)                              \ 
-    macro (OMP_FOR_dynamic, 0, arg)                             \ 
-    macro (OMP_DISTRIBUTE, 0, arg)                              \ 
-    macro (OMP_BARRIER, 0, arg)                                 \ 
-    macro (OMP_CRITICAL,0, arg)                                 \ 
-    macro (OMP_SINGLE, 0, arg)                                  \ 
-    macro (OMP_MASTER, 0, arg)                                  \ 
-    macro (OMP_TEAMS, 0, arg)                                   \ 
-    macro (OMP_set_lock, 0, arg)                                \ 
-    macro (OMP_test_lock, 0, arg)                               \ 
-    macro (REDUCE_wait, 0, arg)                                 \ 
-    macro (REDUCE_nowait, 0, arg)                               \ 
-    macro (OMP_TASKYIELD, 0, arg)                               \ 
-    macro (TASK_executed, 0, arg)                               \ 
-    macro (TASK_cancelled, 0, arg)                              \ 
-    macro (TASK_stolen, 0, arg)                                 \ 
-    macro (LAST,0,arg) 
- 
-// OMP_PARALLEL_args      -- the number of arguments passed to a fork 
-// FOR_static_iterations  -- Number of available parallel chunks of work in a static for 
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for 
-//                           Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2. 
- 
-/*! 
- * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h 
- * 
- * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) 
- * @param arg a user defined argument to send to the user defined macro 
- * 
- * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads. 
- * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork" 
- * as well). 
- * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level. 
- * Format is "macro(name, flags, arg)" 
- * 
- * @ingroup STATS_GATHERING2 
- */ 
-#define KMP_FOREACH_TIMER(macro, arg)                                   \ 
-    macro (OMP_start_end, stats_flags_e::onlyInMaster, arg)             \ 
-    macro (OMP_serial, stats_flags_e::onlyInMaster, arg)                \ 
-    macro (OMP_work, 0, arg)                                            \ 
-    macro (Total_work, stats_flags_e::synthesized, arg)                 \ 
-    macro (OMP_barrier, 0, arg)                                         \ 
-    macro (Total_barrier, stats_flags_e::synthesized, arg)              \ 
-    macro (FOR_static_iterations, stats_flags_e::noUnits, arg)          \ 
-    macro (FOR_static_scheduling, 0, arg)                               \ 
-    macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg)         \ 
-    macro (FOR_dynamic_scheduling, 0, arg)                              \ 
-    macro (TASK_execution, 0, arg)                                      \ 
-    macro (OMP_set_numthreads, stats_flags_e::noUnits, arg)             \ 
-    macro (OMP_PARALLEL_args,  stats_flags_e::noUnits, arg)             \ 
-    macro (OMP_single, 0, arg)                                          \ 
-    macro (OMP_master, 0, arg)                                          \ 
-    KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                             \ 
-    macro (LAST,0, arg) 
- 
- 
-// OMP_start_end          -- time from when OpenMP is initialized until the stats are printed at exit 
-// OMP_serial             -- thread zero time executing serial code 
-// OMP_work               -- elapsed time in code dispatched by a fork (measured in the thread) 
-// Total_work             -- a synthesized statistic summarizing how much parallel work each thread executed. 
-// OMP_barrier            -- time at "real" barriers 
-// Total_barrier          -- a synthesized statistic summarizing how much time at real barriers in each thread 
-// FOR_static_scheduling  -- time spent doing scheduling for a static "for" 
-// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for" 
- 
-#if (KMP_DEVELOPER_STATS) 
-// Timers which are of interest tio runtime library developers, not end users. 
-// THese have to be explicitly enabled in addition to the other stats. 
- 
-// KMP_fork_barrier       -- time in __kmp_fork_barrier 
-// KMP_join_barrier       -- time in __kmp_join_barrier 
-// KMP_barrier            -- time in __kmp_barrier 
-// KMP_end_split_barrier  -- time in __kmp_end_split_barrier 
-// KMP_setup_icv_copy     -- time in __kmp_setup_icv_copy 
-// KMP_icv_copy           -- start/stop timer for any ICV copying 
-// KMP_linear_gather      -- time in __kmp_linear_barrier_gather 
-// KMP_linear_release     -- time in __kmp_linear_barrier_release 
-// KMP_tree_gather        -- time in __kmp_tree_barrier_gather 
-// KMP_tree_release       -- time in __kmp_tree_barrier_release 
-// KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather 
-// KMP_hyper_release      -- time in __kmp_hyper_barrier_release 
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                        \ 
-    macro (KMP_fork_call, 0, arg)                                       \ 
-    macro (KMP_join_call, 0, arg)                                       \ 
-    macro (KMP_fork_barrier, stats_flags_e::logEvent, arg)              \ 
-    macro (KMP_join_barrier, stats_flags_e::logEvent, arg)              \ 
-    macro (KMP_barrier, 0, arg)                                         \ 
-    macro (KMP_end_split_barrier, 0, arg)                               \ 
-    macro (KMP_hier_gather, 0, arg)                                     \ 
-    macro (KMP_hier_release, 0, arg)                                    \ 
-    macro (KMP_hyper_gather,  stats_flags_e::logEvent, arg)             \ 
-    macro (KMP_hyper_release,  stats_flags_e::logEvent, arg)            \ 
-    macro (KMP_linear_gather, 0, arg)                                   \ 
-    macro (KMP_linear_release, 0, arg)                                  \ 
-    macro (KMP_tree_gather, 0, arg)                                     \ 
-    macro (KMP_tree_release, 0, arg)                                    \ 
-    macro (USER_master_invoke, stats_flags_e::logEvent, arg)            \ 
-    macro (USER_worker_invoke, stats_flags_e::logEvent, arg)            \ 
-    macro (USER_resume, stats_flags_e::logEvent, arg)                   \ 
-    macro (USER_suspend, stats_flags_e::logEvent, arg)                  \ 
-    macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg)       \ 
-    macro (KMP_allocate_team, 0, arg)                                   \ 
-    macro (KMP_setup_icv_copy, 0, arg)                                  \ 
-    macro (USER_icv_copy, 0, arg)                                        
-#else 
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 
-#endif 
- 
-/*! 
- * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. 
- * 
- * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) 
- * @param arg a user defined argument to send to the user defined macro 
- * 
- * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN! 
- * 
- * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics). 
- * We allocate these on a per-thread basis, and explicitly start and stop them. 
- * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't 
- * need to be defined here. 
- * The name here should be the same as that of a timer above. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)          \ 
-    macro(OMP_serial, 0, arg)                           \ 
-    macro(OMP_start_end, 0, arg)                        \ 
-    macro(OMP_single, 0, arg)                           \ 
-    macro(OMP_master, 0, arg)                           \ 
-    KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg)     \ 
-    macro(LAST, 0, arg) 
- 
-#if (KMP_DEVELOPER_STATS) 
-# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)               \ 
-    macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) 
-#else 
-# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)                
-#endif 
- 
-#define ENUMERATE(name,ignore,prefix) prefix##name, 
-enum timer_e { 
-    KMP_FOREACH_TIMER(ENUMERATE, TIMER_) 
-}; 
- 
-enum explicit_timer_e { 
-    KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) 
-}; 
- 
-enum counter_e { 
-    KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) 
-}; 
-#undef ENUMERATE 
- 
-class statistic 
-{ 
-    double   minVal; 
-    double   maxVal; 
-    double   meanVal; 
-    double   m2; 
-    uint64_t sampleCount; 
- 
- public: 
-    statistic() { reset(); } 
-    statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {} 
- 
-    double   getMin()   const { return minVal; } 
-    double   getMean()  const { return meanVal; } 
-    double   getMax()   const { return maxVal; } 
-    uint64_t getCount() const { return sampleCount; } 
-    double   getSD()    const { return sqrt(m2/sampleCount); } 
-    double   getTotal() const { return sampleCount*meanVal; } 
- 
-    void reset() 
-    { 
-        minVal =  std::numeric_limits<double>::max(); 
-        maxVal = -std::numeric_limits<double>::max(); 
-        meanVal= 0.0; 
-        m2     = 0.0; 
-        sampleCount = 0; 
-    } 
-    void addSample(double sample); 
-    void scale    (double factor); 
-    void scaleDown(double f)  { scale (1./f); } 
-    statistic & operator+= (statistic const & other); 
- 
-    std::string format(char unit, bool total=false) const; 
-}; 
- 
-struct statInfo 
-{ 
-    const char * name; 
-    uint32_t     flags; 
-}; 
- 
-class timeStat : public statistic 
-{ 
-    static statInfo timerInfo[]; 
- 
- public: 
-    timeStat() : statistic() {} 
-    static const char * name(timer_e e) { return timerInfo[e].name; } 
-    static bool  masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } 
-    static bool  workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster;  } 
-    static bool  noUnits    (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits;      } 
-    static bool  synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized;  } 
-    static bool  logEvent   (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent;     } 
-    static void  clearEventFlags()      { 
-        int i; 
-        for(i=0;i<TIMER_LAST;i++) { 
-            timerInfo[i].flags &= (~(stats_flags_e::logEvent)); 
-        } 
-    } 
-}; 
- 
-// Where we need explicitly to start and end the timer, this version can be used 
-// Since these timers normally aren't nicely scoped, so don't have a good place to live 
-// on the stack of the thread, they're more work to use. 
-class explicitTimer 
-{ 
-    timeStat * stat; 
-    tsc_tick_count startTime; 
- 
- public: 
-    explicitTimer () : stat(0), startTime(0) { } 
-    explicitTimer (timeStat * s) : stat(s), startTime() { } 
- 
-    void setStat (timeStat *s) { stat = s; } 
-    void start(timer_e timerEnumValue); 
-    void stop(timer_e timerEnumValue); 
-    void reset() { startTime = 0; } 
-}; 
- 
-// Where all you need is to time a block, this is enough. 
-// (It avoids the need to have an explicit end, leaving the scope suffices.) 
-class blockTimer : public explicitTimer 
-{ 
-    timer_e timerEnumValue; 
- public: 
-    blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); } 
-    ~blockTimer() { stop(timerEnumValue); } 
-}; 
- 
-// If all you want is a count, then you can use this... 
-// The individual per-thread counts will be aggregated into a statistic at program exit. 
-class counter 
-{ 
-    uint64_t value; 
-    static const statInfo counterInfo[]; 
- 
- public: 
-    counter() : value(0) {} 
-    void increment() { value++; } 
-    uint64_t getValue() const { return value; } 
-    void reset() { value = 0; } 
-    static const char * name(counter_e e) { return counterInfo[e].name; } 
-    static bool  masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; } 
-}; 
- 
-/* **************************************************************** 
-    Class to implement an event 
- 
-    There are four components to an event: start time, stop time 
-    nest_level, and timer_name. 
-    The start and stop time should be obvious (recorded in clock ticks). 
-    The nest_level relates to the bar width in the timeline graph. 
-    The timer_name is used to determine which timer event triggered this event. 
- 
-    the interface to this class is through four read-only operations: 
-    1) getStart()     -- returns the start time as 64 bit integer 
-    2) getStop()      -- returns the stop time as 64 bit integer 
-    3) getNestLevel() -- returns the nest level of the event 
-    4) getTimerName() -- returns the timer name that triggered event 
- 
-    *MORE ON NEST_LEVEL* 
-    The nest level is used in the bar graph that represents the timeline. 
-    Its main purpose is for showing how events are nested inside eachother. 
-    For example, say events, A, B, and C are recorded.  If the timeline 
-    looks like this: 
- 
-Begin -------------------------------------------------------------> Time 
-         |    |          |        |          |              | 
-         A    B          C        C          B              A 
-       start start     start     end        end            end 
- 
-       Then A, B, C will have a nest level of 1, 2, 3 respectively. 
-       These values are then used to calculate the barwidth so you can 
-       see that inside A, B has occurred, and inside B, C has occurred. 
-       Currently, this is shown with A's bar width being larger than B's 
-       bar width, and B's bar width being larger than C's bar width. 
- 
-**************************************************************** */ 
-class kmp_stats_event { 
-    uint64_t start; 
-    uint64_t stop; 
-    int nest_level; 
-    timer_e timer_name; 
- public: 
-    kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} 
-    kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} 
-    inline uint64_t  getStart() const { return start; } 
-    inline uint64_t  getStop() const  { return stop;  } 
-    inline int       getNestLevel() const { return nest_level; } 
-    inline timer_e   getTimerName() const { return timer_name; } 
-}; 
- 
-/* **************************************************************** 
-    Class to implement a dynamically expandable array of events 
- 
-    --------------------------------------------------------- 
-    | event 1 | event 2 | event 3 | event 4 | ... | event N | 
-    --------------------------------------------------------- 
- 
-    An event is pushed onto the back of this array at every 
-    explicitTimer->stop() call.  The event records the thread #, 
-    start time, stop time, and nest level related to the bar width. 
- 
-    The event vector starts at size INIT_SIZE and grows (doubles in size) 
-    if needed.  An implication of this behavior is that log(N) 
-    reallocations are needed (where N is number of events).  If you want 
-    to avoid reallocations, then set INIT_SIZE to a large value. 
- 
-    the interface to this class is through six operations: 
-    1) reset() -- sets the internal_size back to 0 but does not deallocate any memory 
-    2) size()  -- returns the number of valid elements in the vector 
-    3) push_back(start, stop, nest, timer_name) -- pushes an event onto 
-                                                   the back of the array 
-    4) deallocate() -- frees all memory associated with the vector 
-    5) sort() -- sorts the vector by start time 
-    6) operator[index] or at(index) -- returns event reference at that index 
- 
-**************************************************************** */ 
-class kmp_stats_event_vector { 
-    kmp_stats_event* events; 
-    int internal_size; 
-    int allocated_size; 
-    static const int INIT_SIZE = 1024; 
- public: 
-    kmp_stats_event_vector() { 
-        events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE); 
-        internal_size = 0; 
-        allocated_size = INIT_SIZE; 
-    } 
-   ~kmp_stats_event_vector() {} 
-    inline void reset() { internal_size = 0; } 
-    inline int  size() const { return internal_size; } 
-    void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { 
-        int i; 
-        if(internal_size == allocated_size) { 
-            kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2); 
-            for(i=0;i<internal_size;i++) tmp[i] = events[i]; 
-            __kmp_free(events); 
-            events = tmp; 
-            allocated_size*=2; 
-        } 
-        events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name); 
-        internal_size++; 
-        return; 
-    } 
-    void deallocate(); 
-    void sort(); 
-    const kmp_stats_event & operator[](int index) const { return events[index]; } 
-          kmp_stats_event & operator[](int index) { return events[index]; } 
-    const kmp_stats_event & at(int index) const { return events[index]; } 
-          kmp_stats_event & at(int index) { return events[index]; } 
-}; 
- 
-/* **************************************************************** 
-    Class to implement a doubly-linked, circular, statistics list 
- 
-    |---| ---> |---| ---> |---| ---> |---| ---> ... next 
-    |   |      |   |      |   |      |   | 
-    |---| <--- |---| <--- |---| <--- |---| <--- ... prev 
-    Sentinel   first      second     third 
-    Node       node       node       node 
- 
-    The Sentinel Node is the user handle on the list. 
-    The first node corresponds to thread 0's statistics. 
-    The second node corresponds to thread 1's statistics and so on... 
- 
-    Each node has a _timers, _counters, and _explicitTimers array to 
-    hold that thread's statistics.  The _explicitTimers 
-    point to the correct _timer and update its statistics at every stop() call. 
-    The explicitTimers' pointers are set up in the constructor. 
-    Each node also has an event vector to hold that thread's timing events. 
-    The event vector expands as necessary and records the start-stop times 
-    for each timer. 
- 
-    The nestLevel variable is for plotting events and is related 
-    to the bar width in the timeline graph. 
- 
-    Every thread will have a __thread local pointer to its node in 
-    the list.  The sentinel node is used by the master thread to 
-    store "dummy" statistics before __kmp_create_worker() is called. 
- 
-**************************************************************** */ 
-class kmp_stats_list { 
-    int gtid; 
-    timeStat      _timers[TIMER_LAST+1]; 
-    counter       _counters[COUNTER_LAST+1]; 
-    explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1]; 
-    int           _nestLevel; // one per thread 
-    kmp_stats_event_vector _event_vector; 
-    kmp_stats_list* next; 
-    kmp_stats_list* prev; 
- public: 
-    kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) { 
-#define doInit(name,ignore1,ignore2) \ 
-        getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); 
-        KMP_FOREACH_EXPLICIT_TIMER(doInit,0); 
-#undef doInit 
-    } 
-   ~kmp_stats_list() { } 
-    inline timeStat *      getTimer(timer_e idx)                  { return &_timers[idx]; } 
-    inline counter  *      getCounter(counter_e idx)              { return &_counters[idx]; } 
-    inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } 
-    inline timeStat *      getTimers()                            { return _timers; } 
-    inline counter  *      getCounters()                          { return _counters; } 
-    inline explicitTimer * getExplicitTimers()                    { return _explicitTimers; } 
-    inline kmp_stats_event_vector & getEventVector()              { return _event_vector; } 
-    inline void resetEventVector()                                { _event_vector.reset(); } 
-    inline void incrementNestValue()                              { _nestLevel++; } 
-    inline int  getNestValue()                                    { return _nestLevel; } 
-    inline void decrementNestValue()                              { _nestLevel--; } 
-    inline int  getGtid() const                                   { return gtid; } 
-    inline void setGtid(int newgtid)                              { gtid = newgtid; } 
-    kmp_stats_list* push_back(int gtid); // returns newly created list node 
-    inline void     push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { 
-        _event_vector.push_back(start_time, stop_time, nest_level, name); 
-    } 
-    void deallocate(); 
-    class iterator; 
-    kmp_stats_list::iterator begin(); 
-    kmp_stats_list::iterator end(); 
-    int size(); 
-    class iterator { 
-        kmp_stats_list* ptr; 
-        friend kmp_stats_list::iterator kmp_stats_list::begin(); 
-        friend kmp_stats_list::iterator kmp_stats_list::end(); 
-      public: 
-        iterator(); 
-       ~iterator(); 
-        iterator operator++(); 
-        iterator operator++(int dummy); 
-        iterator operator--(); 
-        iterator operator--(int dummy); 
-        bool operator!=(const iterator & rhs); 
-        bool operator==(const iterator & rhs); 
-        kmp_stats_list* operator*() const; // dereference operator 
-    }; 
-}; 
- 
-/* **************************************************************** 
-   Class to encapsulate all output functions and the environment variables 
- 
-   This module holds filenames for various outputs (normal stats, events, plot file), 
-   as well as coloring information for the plot file. 
- 
-   The filenames and flags variables are read from environment variables. 
-   These are read once by the constructor of the global variable __kmp_stats_output 
-   which calls init(). 
- 
-   During this init() call, event flags for the timeStat::timerInfo[] global array 
-   are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). 
- 
-   The only interface function that is public is outputStats(heading).  This function 
-   should print out everything it needs to, either to files or stderr, 
-   depending on the environment variables described below 
- 
-   ENVIRONMENT VARIABLES: 
-   KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file, 
-                     otherwise, print to stderr 
-   KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either 
-                        KMP_STATS_FILE or stderr 
-   KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, 
-                          otherwise, the plot file is sent to "events.plt" 
-   KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events 
-   KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, 
-                            otherwise, output is sent to "events.dat" 
- 
-**************************************************************** */ 
-class kmp_stats_output_module { 
- 
- public: 
-    struct rgb_color { 
-        float r; 
-        float g; 
-        float b; 
-    }; 
- 
- private: 
-    static const char* outputFileName; 
-    static const char* eventsFileName; 
-    static const char* plotFileName; 
-    static int printPerThreadFlag; 
-    static int printPerThreadEventsFlag; 
-    static const rgb_color globalColorArray[]; 
-    static       rgb_color timerColorInfo[]; 
- 
-    void init(); 
-    static void setupEventColors(); 
-    static void printPloticusFile(); 
-    static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers); 
-    static void printCounters(FILE * statsOut, counter const * theCounters); 
-    static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); 
-    static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } 
-    static void windupExplicitTimers(); 
-    bool eventPrintingEnabled() { 
-        if(printPerThreadEventsFlag) return true; 
-        else return false; 
-    } 
-    bool perThreadPrintingEnabled() { 
-        if(printPerThreadFlag) return true; 
-        else return false; 
-    } 
- 
- public: 
-    kmp_stats_output_module() { init(); } 
-    void outputStats(const char* heading); 
-}; 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
-void __kmp_stats_init(); 
-void __kmp_reset_stats(); 
-void __kmp_output_stats(const char *); 
-void __kmp_accumulate_stats_at_exit(void); 
-// thread local pointer to stats node within list 
-extern __thread kmp_stats_list* __kmp_stats_thread_ptr; 
-// head to stats list. 
-extern kmp_stats_list __kmp_stats_list; 
-// lock for __kmp_stats_list 
-extern kmp_tas_lock_t  __kmp_stats_lock; 
-// reference start time 
-extern tsc_tick_count __kmp_stats_start_time; 
-// interface to output 
-extern kmp_stats_output_module __kmp_stats_output; 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-// Simple, standard interfaces that drop out completely if stats aren't enabled 
- 
- 
-/*! 
- * \brief Uses specified timer (name) to time code block. 
- * 
- * @param name timer name as specified under the KMP_FOREACH_TIMER() macro 
- * 
- * \details Use KMP_TIME_BLOCK(name) macro to time a code block.  This will record the time taken in the block 
- * and use the destructor to stop the timer.  Convenient! 
- * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block. 
- * I don't think that's a problem. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_TIME_BLOCK(name) \ 
-    blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) 
- 
-/*! 
- * \brief Adds value to specified timer (name). 
- * 
- * @param name timer name as specified under the KMP_FOREACH_TIMER() macro 
- * @param value double precision sample value to add to statistics for the timer 
- * 
- * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_COUNT_VALUE(name, value) \ 
-    __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 
- 
-/*! 
- * \brief Increments specified counter (name). 
- * 
- * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro 
- * 
- * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_COUNT_BLOCK(name) \ 
-   __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 
- 
-/*! 
- * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro. 
- * 
- * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro 
- * 
- * \details Use to start a timer.  This will need a corresponding KMP_STOP_EXPLICIT_TIMER() 
- * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end 
- * of the code block.  All explicit timers are stopped at library exit time before the final statistics are outputted. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_START_EXPLICIT_TIMER(name) \ 
-    __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) 
- 
-/*! 
- * \brief "Stops" an explicit timer. 
- * 
- * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro 
- * 
- * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer.  When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name) 
- * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value.  The timer will then be reset. 
- * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_STOP_EXPLICIT_TIMER(name) \ 
-    __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) 
- 
-/*! 
- * \brief Outputs the current thread statistics and reset them. 
- * 
- * @param heading_string heading put above the final stats output 
- * 
- * \details Explicitly stops all timers and outputs all stats. 
- * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr 
- * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats 
- * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread 
- * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed 
- * It should be noted that all statistics are reset when this macro is called. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_OUTPUT_STATS(heading_string) \ 
-    __kmp_output_stats(heading_string) 
- 
-/*! 
- * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) 
- * 
- * \details Reset all stats for all threads. 
- * 
- * @ingroup STATS_GATHERING 
-*/ 
-#define KMP_RESET_STATS()  __kmp_reset_stats() 
- 
-#if (KMP_DEVELOPER_STATS) 
-# define KMP_TIME_DEVELOPER_BLOCK(n)             KMP_TIME_BLOCK(n) 
-# define KMP_COUNT_DEVELOPER_VALUE(n,v)          KMP_COUNT_VALUE(n,v) 
-# define KMP_COUNT_DEVELOPER_BLOCK(n)            KMP_COUNT_BLOCK(n) 
-# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   KMP_START_EXPLICIT_TIMER(n) 
-# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    KMP_STOP_EXPLICIT_TIMER(n) 
-#else 
-// Null definitions 
-# define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0) 
-# define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0) 
-# define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0) 
-# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0) 
-# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0) 
-#endif 
- 
-#else // KMP_STATS_ENABLED 
- 
-// Null definitions 
-#define KMP_TIME_BLOCK(n)             ((void)0) 
-#define KMP_COUNT_VALUE(n,v)          ((void)0) 
-#define KMP_COUNT_BLOCK(n)            ((void)0) 
-#define KMP_START_EXPLICIT_TIMER(n)   ((void)0) 
-#define KMP_STOP_EXPLICIT_TIMER(n)    ((void)0) 
- 
-#define KMP_OUTPUT_STATS(heading_string) ((void)0) 
-#define KMP_RESET_STATS()  ((void)0) 
- 
-#define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0) 
-#define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0) 
-#define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0) 
-#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0) 
-#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0) 
-#endif  // KMP_STATS_ENABLED 
- 
-#endif // KMP_STATS_H 
+#ifndef KMP_STATS_H
+#define KMP_STATS_H
+
+/** @file kmp_stats.h
+ * Functions for collecting statistics.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "kmp_config.h"
+
+#if KMP_STATS_ENABLED
+/*
+ * Statistics accumulator.
+ * Accumulates number of samples and computes min, max, mean, standard deviation on the fly.
+ *
+ * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
+ */
+
+#include <limits>
+#include <math.h>
+#include <string>
+#include <stdint.h>
+#include <new> // placement new
+#include "kmp_stats_timing.h"
+
+/*
+ * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
+ * are intended for the runtime library developer.
+ */
+// #define KMP_DEVELOPER_STATS 1
+
+/*!
+ * @ingroup STATS_GATHERING
+ * \brief flags to describe the statistic ( timers or counter )
+ *
+*/
+class stats_flags_e {
+    public:
+        const static int onlyInMaster = 1<<0; //!< statistic is valid only for master
+        const static int noUnits      = 1<<1; //!< statistic doesn't need units printed next to it in output
+        const static int synthesized  = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function
+        const static int notInMaster  = 1<<3; //!< statistic is valid for non-master threads
+        const static int logEvent     = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers)
+};
+
+/*!
+ * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
+ *
+ * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg)
+ * @param arg a user defined argument to send to the user defined macro
+ *
+ * \details A counter counts the occurrence of some event.
+ * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
+ * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
+ * The min,mean,max are therefore the values for the threads.
+ * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
+ * All of the tables and printing is generated from this macro.
+ * Format is "macro(name, flags, arg)"
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_FOREACH_COUNTER(macro, arg)                         \
+    macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg)      \
+    macro (OMP_NESTED_PARALLEL, 0, arg)                         \
+    macro (OMP_FOR_static, 0, arg)                              \
+    macro (OMP_FOR_dynamic, 0, arg)                             \
+    macro (OMP_DISTRIBUTE, 0, arg)                              \
+    macro (OMP_BARRIER, 0, arg)                                 \
+    macro (OMP_CRITICAL,0, arg)                                 \
+    macro (OMP_SINGLE, 0, arg)                                  \
+    macro (OMP_MASTER, 0, arg)                                  \
+    macro (OMP_TEAMS, 0, arg)                                   \
+    macro (OMP_set_lock, 0, arg)                                \
+    macro (OMP_test_lock, 0, arg)                               \
+    macro (REDUCE_wait, 0, arg)                                 \
+    macro (REDUCE_nowait, 0, arg)                               \
+    macro (OMP_TASKYIELD, 0, arg)                               \
+    macro (TASK_executed, 0, arg)                               \
+    macro (TASK_cancelled, 0, arg)                              \
+    macro (TASK_stolen, 0, arg)                                 \
+    macro (LAST,0,arg)
+
+// OMP_PARALLEL_args      -- the number of arguments passed to a fork
+// FOR_static_iterations  -- Number of available parallel chunks of work in a static for
+// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
+//                           Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
+
+/*!
+ * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
+ *
+ * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg)
+ * @param arg a user defined argument to send to the user defined macro
+ *
+ * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads.
+ * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
+ * as well).
+ * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
+ * Format is "macro(name, flags, arg)"
+ *
+ * @ingroup STATS_GATHERING2
+ */
+#define KMP_FOREACH_TIMER(macro, arg)                                   \
+    macro (OMP_start_end, stats_flags_e::onlyInMaster, arg)             \
+    macro (OMP_serial, stats_flags_e::onlyInMaster, arg)                \
+    macro (OMP_work, 0, arg)                                            \
+    macro (Total_work, stats_flags_e::synthesized, arg)                 \
+    macro (OMP_barrier, 0, arg)                                         \
+    macro (Total_barrier, stats_flags_e::synthesized, arg)              \
+    macro (FOR_static_iterations, stats_flags_e::noUnits, arg)          \
+    macro (FOR_static_scheduling, 0, arg)                               \
+    macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg)         \
+    macro (FOR_dynamic_scheduling, 0, arg)                              \
+    macro (TASK_execution, 0, arg)                                      \
+    macro (OMP_set_numthreads, stats_flags_e::noUnits, arg)             \
+    macro (OMP_PARALLEL_args,  stats_flags_e::noUnits, arg)             \
+    macro (OMP_single, 0, arg)                                          \
+    macro (OMP_master, 0, arg)                                          \
+    KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                             \
+    macro (LAST,0, arg)
+
+
+// OMP_start_end          -- time from when OpenMP is initialized until the stats are printed at exit
+// OMP_serial             -- thread zero time executing serial code
+// OMP_work               -- elapsed time in code dispatched by a fork (measured in the thread)
+// Total_work             -- a synthesized statistic summarizing how much parallel work each thread executed.
+// OMP_barrier            -- time at "real" barriers
+// Total_barrier          -- a synthesized statistic summarizing how much time at real barriers in each thread
+// FOR_static_scheduling  -- time spent doing scheduling for a static "for"
+// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for"
+
+#if (KMP_DEVELOPER_STATS)
+// Timers which are of interest tio runtime library developers, not end users.
+// THese have to be explicitly enabled in addition to the other stats.
+
+// KMP_fork_barrier       -- time in __kmp_fork_barrier
+// KMP_join_barrier       -- time in __kmp_join_barrier
+// KMP_barrier            -- time in __kmp_barrier
+// KMP_end_split_barrier  -- time in __kmp_end_split_barrier
+// KMP_setup_icv_copy     -- time in __kmp_setup_icv_copy
+// KMP_icv_copy           -- start/stop timer for any ICV copying
+// KMP_linear_gather      -- time in __kmp_linear_barrier_gather
+// KMP_linear_release     -- time in __kmp_linear_barrier_release
+// KMP_tree_gather        -- time in __kmp_tree_barrier_gather
+// KMP_tree_release       -- time in __kmp_tree_barrier_release
+// KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather
+// KMP_hyper_release      -- time in __kmp_hyper_barrier_release
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                        \
+    macro (KMP_fork_call, 0, arg)                                       \
+    macro (KMP_join_call, 0, arg)                                       \
+    macro (KMP_fork_barrier, stats_flags_e::logEvent, arg)              \
+    macro (KMP_join_barrier, stats_flags_e::logEvent, arg)              \
+    macro (KMP_barrier, 0, arg)                                         \
+    macro (KMP_end_split_barrier, 0, arg)                               \
+    macro (KMP_hier_gather, 0, arg)                                     \
+    macro (KMP_hier_release, 0, arg)                                    \
+    macro (KMP_hyper_gather,  stats_flags_e::logEvent, arg)             \
+    macro (KMP_hyper_release,  stats_flags_e::logEvent, arg)            \
+    macro (KMP_linear_gather, 0, arg)                                   \
+    macro (KMP_linear_release, 0, arg)                                  \
+    macro (KMP_tree_gather, 0, arg)                                     \
+    macro (KMP_tree_release, 0, arg)                                    \
+    macro (USER_master_invoke, stats_flags_e::logEvent, arg)            \
+    macro (USER_worker_invoke, stats_flags_e::logEvent, arg)            \
+    macro (USER_resume, stats_flags_e::logEvent, arg)                   \
+    macro (USER_suspend, stats_flags_e::logEvent, arg)                  \
+    macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg)       \
+    macro (KMP_allocate_team, 0, arg)                                   \
+    macro (KMP_setup_icv_copy, 0, arg)                                  \
+    macro (USER_icv_copy, 0, arg)                                       
+#else
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+#endif
+
+/*!
+ * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
+ *
+ * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg)
+ * @param arg a user defined argument to send to the user defined macro
+ *
+ * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN!
+ *
+ * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics).
+ * We allocate these on a per-thread basis, and explicitly start and stop them.
+ * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't
+ * need to be defined here.
+ * The name here should be the same as that of a timer above.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)          \
+    macro(OMP_serial, 0, arg)                           \
+    macro(OMP_start_end, 0, arg)                        \
+    macro(OMP_single, 0, arg)                           \
+    macro(OMP_master, 0, arg)                           \
+    KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg)     \
+    macro(LAST, 0, arg)
+
+#if (KMP_DEVELOPER_STATS)
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)               \
+    macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
+#else
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)               
+#endif
+
+#define ENUMERATE(name,ignore,prefix) prefix##name,
+enum timer_e {
+    KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
+};
+
+enum explicit_timer_e {
+    KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
+};
+
+enum counter_e {
+    KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
+};
+#undef ENUMERATE
+
+class statistic
+{
+    double   minVal;
+    double   maxVal;
+    double   meanVal;
+    double   m2;
+    uint64_t sampleCount;
+
+ public:
+    statistic() { reset(); }
+    statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
+
+    double   getMin()   const { return minVal; }
+    double   getMean()  const { return meanVal; }
+    double   getMax()   const { return maxVal; }
+    uint64_t getCount() const { return sampleCount; }
+    double   getSD()    const { return sqrt(m2/sampleCount); }
+    double   getTotal() const { return sampleCount*meanVal; }
+
+    void reset()
+    {
+        minVal =  std::numeric_limits<double>::max();
+        maxVal = -std::numeric_limits<double>::max();
+        meanVal= 0.0;
+        m2     = 0.0;
+        sampleCount = 0;
+    }
+    void addSample(double sample);
+    void scale    (double factor);
+    void scaleDown(double f)  { scale (1./f); }
+    statistic & operator+= (statistic const & other);
+
+    std::string format(char unit, bool total=false) const;
+};
+
+struct statInfo
+{
+    const char * name;
+    uint32_t     flags;
+};
+
+class timeStat : public statistic
+{
+    static statInfo timerInfo[];
+
+ public:
+    timeStat() : statistic() {}
+    static const char * name(timer_e e) { return timerInfo[e].name; }
+    static bool  masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; }
+    static bool  workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster;  }
+    static bool  noUnits    (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits;      }
+    static bool  synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized;  }
+    static bool  logEvent   (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent;     }
+    static void  clearEventFlags()      {
+        int i;
+        for(i=0;i<TIMER_LAST;i++) {
+            timerInfo[i].flags &= (~(stats_flags_e::logEvent));
+        }
+    }
+};
+
+// Where we need explicitly to start and end the timer, this version can be used
+// Since these timers normally aren't nicely scoped, so don't have a good place to live
+// on the stack of the thread, they're more work to use.
+class explicitTimer
+{
+    timeStat * stat;
+    tsc_tick_count startTime;
+
+ public:
+    explicitTimer () : stat(0), startTime(0) { }
+    explicitTimer (timeStat * s) : stat(s), startTime() { }
+
+    void setStat (timeStat *s) { stat = s; }
+    void start(timer_e timerEnumValue);
+    void stop(timer_e timerEnumValue);
+    void reset() { startTime = 0; }
+};
+
+// Where all you need is to time a block, this is enough.
+// (It avoids the need to have an explicit end, leaving the scope suffices.)
+class blockTimer : public explicitTimer
+{
+    timer_e timerEnumValue;
+ public:
+    blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
+    ~blockTimer() { stop(timerEnumValue); }
+};
+
+// If all you want is a count, then you can use this...
+// The individual per-thread counts will be aggregated into a statistic at program exit.
+class counter
+{
+    uint64_t value;
+    static const statInfo counterInfo[];
+
+ public:
+    counter() : value(0) {}
+    void increment() { value++; }
+    uint64_t getValue() const { return value; }
+    void reset() { value = 0; }
+    static const char * name(counter_e e) { return counterInfo[e].name; }
+    static bool  masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; }
+};
+
+/* ****************************************************************
+    Class to implement an event
+
+    There are four components to an event: start time, stop time
+    nest_level, and timer_name.
+    The start and stop time should be obvious (recorded in clock ticks).
+    The nest_level relates to the bar width in the timeline graph.
+    The timer_name is used to determine which timer event triggered this event.
+
+    the interface to this class is through four read-only operations:
+    1) getStart()     -- returns the start time as 64 bit integer
+    2) getStop()      -- returns the stop time as 64 bit integer
+    3) getNestLevel() -- returns the nest level of the event
+    4) getTimerName() -- returns the timer name that triggered event
+
+    *MORE ON NEST_LEVEL*
+    The nest level is used in the bar graph that represents the timeline.
+    Its main purpose is for showing how events are nested inside eachother.
+    For example, say events, A, B, and C are recorded.  If the timeline
+    looks like this:
+
+Begin -------------------------------------------------------------> Time
+         |    |          |        |          |              |
+         A    B          C        C          B              A
+       start start     start     end        end            end
+
+       Then A, B, C will have a nest level of 1, 2, 3 respectively.
+       These values are then used to calculate the barwidth so you can
+       see that inside A, B has occurred, and inside B, C has occurred.
+       Currently, this is shown with A's bar width being larger than B's
+       bar width, and B's bar width being larger than C's bar width.
+
+**************************************************************** */
+class kmp_stats_event {
+    uint64_t start;
+    uint64_t stop;
+    int nest_level;
+    timer_e timer_name;
+ public:
+    kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
+    kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
+    inline uint64_t  getStart() const { return start; }
+    inline uint64_t  getStop() const  { return stop;  }
+    inline int       getNestLevel() const { return nest_level; }
+    inline timer_e   getTimerName() const { return timer_name; }
+};
+
+/* ****************************************************************
+    Class to implement a dynamically expandable array of events
+
+    ---------------------------------------------------------
+    | event 1 | event 2 | event 3 | event 4 | ... | event N |
+    ---------------------------------------------------------
+
+    An event is pushed onto the back of this array at every
+    explicitTimer->stop() call.  The event records the thread #,
+    start time, stop time, and nest level related to the bar width.
+
+    The event vector starts at size INIT_SIZE and grows (doubles in size)
+    if needed.  An implication of this behavior is that log(N)
+    reallocations are needed (where N is number of events).  If you want
+    to avoid reallocations, then set INIT_SIZE to a large value.
+
+    the interface to this class is through six operations:
+    1) reset() -- sets the internal_size back to 0 but does not deallocate any memory
+    2) size()  -- returns the number of valid elements in the vector
+    3) push_back(start, stop, nest, timer_name) -- pushes an event onto
+                                                   the back of the array
+    4) deallocate() -- frees all memory associated with the vector
+    5) sort() -- sorts the vector by start time
+    6) operator[index] or at(index) -- returns event reference at that index
+
+**************************************************************** */
+class kmp_stats_event_vector {
+    kmp_stats_event* events;
+    int internal_size;
+    int allocated_size;
+    static const int INIT_SIZE = 1024;
+ public:
+    kmp_stats_event_vector() {
+        events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE);
+        internal_size = 0;
+        allocated_size = INIT_SIZE;
+    }
+   ~kmp_stats_event_vector() {}
+    inline void reset() { internal_size = 0; }
+    inline int  size() const { return internal_size; }
+    void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
+        int i;
+        if(internal_size == allocated_size) {
+            kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2);
+            for(i=0;i<internal_size;i++) tmp[i] = events[i];
+            __kmp_free(events);
+            events = tmp;
+            allocated_size*=2;
+        }
+        events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
+        internal_size++;
+        return;
+    }
+    void deallocate();
+    void sort();
+    const kmp_stats_event & operator[](int index) const { return events[index]; }
+          kmp_stats_event & operator[](int index) { return events[index]; }
+    const kmp_stats_event & at(int index) const { return events[index]; }
+          kmp_stats_event & at(int index) { return events[index]; }
+};
+
+/* ****************************************************************
+    Class to implement a doubly-linked, circular, statistics list
+
+    |---| ---> |---| ---> |---| ---> |---| ---> ... next
+    |   |      |   |      |   |      |   |
+    |---| <--- |---| <--- |---| <--- |---| <--- ... prev
+    Sentinel   first      second     third
+    Node       node       node       node
+
+    The Sentinel Node is the user handle on the list.
+    The first node corresponds to thread 0's statistics.
+    The second node corresponds to thread 1's statistics and so on...
+
+    Each node has a _timers, _counters, and _explicitTimers array to
+    hold that thread's statistics.  The _explicitTimers
+    point to the correct _timer and update its statistics at every stop() call.
+    The explicitTimers' pointers are set up in the constructor.
+    Each node also has an event vector to hold that thread's timing events.
+    The event vector expands as necessary and records the start-stop times
+    for each timer.
+
+    The nestLevel variable is for plotting events and is related
+    to the bar width in the timeline graph.
+
+    Every thread will have a __thread local pointer to its node in
+    the list.  The sentinel node is used by the master thread to
+    store "dummy" statistics before __kmp_create_worker() is called.
+
+**************************************************************** */
+class kmp_stats_list {
+    int gtid;
+    timeStat      _timers[TIMER_LAST+1];
+    counter       _counters[COUNTER_LAST+1];
+    explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
+    int           _nestLevel; // one per thread
+    kmp_stats_event_vector _event_vector;
+    kmp_stats_list* next;
+    kmp_stats_list* prev;
+ public:
+    kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) {
+#define doInit(name,ignore1,ignore2) \
+        getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name));
+        KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
+#undef doInit
+    }
+   ~kmp_stats_list() { }
+    inline timeStat *      getTimer(timer_e idx)                  { return &_timers[idx]; }
+    inline counter  *      getCounter(counter_e idx)              { return &_counters[idx]; }
+    inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
+    inline timeStat *      getTimers()                            { return _timers; }
+    inline counter  *      getCounters()                          { return _counters; }
+    inline explicitTimer * getExplicitTimers()                    { return _explicitTimers; }
+    inline kmp_stats_event_vector & getEventVector()              { return _event_vector; }
+    inline void resetEventVector()                                { _event_vector.reset(); }
+    inline void incrementNestValue()                              { _nestLevel++; }
+    inline int  getNestValue()                                    { return _nestLevel; }
+    inline void decrementNestValue()                              { _nestLevel--; }
+    inline int  getGtid() const                                   { return gtid; }
+    inline void setGtid(int newgtid)                              { gtid = newgtid; }
+    kmp_stats_list* push_back(int gtid); // returns newly created list node
+    inline void     push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
+        _event_vector.push_back(start_time, stop_time, nest_level, name);
+    }
+    void deallocate();
+    class iterator;
+    kmp_stats_list::iterator begin();
+    kmp_stats_list::iterator end();
+    int size();
+    class iterator {
+        kmp_stats_list* ptr;
+        friend kmp_stats_list::iterator kmp_stats_list::begin();
+        friend kmp_stats_list::iterator kmp_stats_list::end();
+      public:
+        iterator();
+       ~iterator();
+        iterator operator++();
+        iterator operator++(int dummy);
+        iterator operator--();
+        iterator operator--(int dummy);
+        bool operator!=(const iterator & rhs);
+        bool operator==(const iterator & rhs);
+        kmp_stats_list* operator*() const; // dereference operator
+    };
+};
+
+/* ****************************************************************
+   Class to encapsulate all output functions and the environment variables
+
+   This module holds filenames for various outputs (normal stats, events, plot file),
+   as well as coloring information for the plot file.
+
+   The filenames and flags variables are read from environment variables.
+   These are read once by the constructor of the global variable __kmp_stats_output
+   which calls init().
+
+   During this init() call, event flags for the timeStat::timerInfo[] global array
+   are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
+
+   The only interface function that is public is outputStats(heading).  This function
+   should print out everything it needs to, either to files or stderr,
+   depending on the environment variables described below
+
+   ENVIRONMENT VARIABLES:
+   KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file,
+                     otherwise, print to stderr
+   KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either
+                        KMP_STATS_FILE or stderr
+   KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
+                          otherwise, the plot file is sent to "events.plt"
+   KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events
+   KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
+                            otherwise, output is sent to "events.dat"
+
+**************************************************************** */
+class kmp_stats_output_module {
+
+ public:
+    struct rgb_color {
+        float r;
+        float g;
+        float b;
+    };
+
+ private:
+    static const char* outputFileName;
+    static const char* eventsFileName;
+    static const char* plotFileName;
+    static int printPerThreadFlag;
+    static int printPerThreadEventsFlag;
+    static const rgb_color globalColorArray[];
+    static       rgb_color timerColorInfo[];
+
+    void init();
+    static void setupEventColors();
+    static void printPloticusFile();
+    static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers);
+    static void printCounters(FILE * statsOut, counter const * theCounters);
+    static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid);
+    static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
+    static void windupExplicitTimers();
+    bool eventPrintingEnabled() {
+        if(printPerThreadEventsFlag) return true;
+        else return false;
+    }
+    bool perThreadPrintingEnabled() {
+        if(printPerThreadFlag) return true;
+        else return false;
+    }
+
+ public:
+    kmp_stats_output_module() { init(); }
+    void outputStats(const char* heading);
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __kmp_stats_init();
+void __kmp_reset_stats();
+void __kmp_output_stats(const char *);
+void __kmp_accumulate_stats_at_exit(void);
+// thread local pointer to stats node within list
+extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
+// head to stats list.
+extern kmp_stats_list __kmp_stats_list;
+// lock for __kmp_stats_list
+extern kmp_tas_lock_t  __kmp_stats_lock;
+// reference start time
+extern tsc_tick_count __kmp_stats_start_time;
+// interface to output
+extern kmp_stats_output_module __kmp_stats_output;
+
+#ifdef __cplusplus
+}
+#endif
+
+// Simple, standard interfaces that drop out completely if stats aren't enabled
+
+
+/*!
+ * \brief Uses specified timer (name) to time code block.
+ *
+ * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
+ *
+ * \details Use KMP_TIME_BLOCK(name) macro to time a code block.  This will record the time taken in the block
+ * and use the destructor to stop the timer.  Convenient!
+ * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block.
+ * I don't think that's a problem.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_TIME_BLOCK(name) \
+    blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)
+
+/*!
+ * \brief Adds value to specified timer (name).
+ *
+ * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
+ * @param value double precision sample value to add to statistics for the timer
+ *
+ * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_COUNT_VALUE(name, value) \
+    __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
+
+/*!
+ * \brief Increments specified counter (name).
+ *
+ * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro
+ *
+ * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_COUNT_BLOCK(name) \
+   __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
+
+/*!
+ * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro.
+ *
+ * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro
+ *
+ * \details Use to start a timer.  This will need a corresponding KMP_STOP_EXPLICIT_TIMER()
+ * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end
+ * of the code block.  All explicit timers are stopped at library exit time before the final statistics are outputted.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_START_EXPLICIT_TIMER(name) \
+    __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name)
+
+/*!
+ * \brief "Stops" an explicit timer.
+ *
+ * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro
+ *
+ * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer.  When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name)
+ * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value.  The timer will then be reset.
+ * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_STOP_EXPLICIT_TIMER(name) \
+    __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name)
+
+/*!
+ * \brief Outputs the current thread statistics and reset them.
+ *
+ * @param heading_string heading put above the final stats output
+ *
+ * \details Explicitly stops all timers and outputs all stats.
+ * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr
+ * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats
+ * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread
+ * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed
+ * It should be noted that all statistics are reset when this macro is called.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_OUTPUT_STATS(heading_string) \
+    __kmp_output_stats(heading_string)
+
+/*!
+ * \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
+ *
+ * \details Reset all stats for all threads.
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_RESET_STATS()  __kmp_reset_stats()
+
+#if (KMP_DEVELOPER_STATS)
+# define KMP_TIME_DEVELOPER_BLOCK(n)             KMP_TIME_BLOCK(n)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v)          KMP_COUNT_VALUE(n,v)
+# define KMP_COUNT_DEVELOPER_BLOCK(n)            KMP_COUNT_BLOCK(n)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   KMP_START_EXPLICIT_TIMER(n)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    KMP_STOP_EXPLICIT_TIMER(n)
+#else
+// Null definitions
+# define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0)
+# define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
+#endif
+
+#else // KMP_STATS_ENABLED
+
+// Null definitions
+#define KMP_TIME_BLOCK(n)             ((void)0)
+#define KMP_COUNT_VALUE(n,v)          ((void)0)
+#define KMP_COUNT_BLOCK(n)            ((void)0)
+#define KMP_START_EXPLICIT_TIMER(n)   ((void)0)
+#define KMP_STOP_EXPLICIT_TIMER(n)    ((void)0)
+
+#define KMP_OUTPUT_STATS(heading_string) ((void)0)
+#define KMP_RESET_STATS()  ((void)0)
+
+#define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
+#endif  // KMP_STATS_ENABLED
+
+#endif // KMP_STATS_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp
index 33e032fc76..40e29eb0d8 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.cpp
@@ -1,168 +1,168 @@
-/** @file kmp_stats_timing.cpp 
- * Timing functions 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include <stdlib.h> 
-#include <unistd.h> 
- 
-#include <iostream> 
-#include <iomanip> 
-#include <sstream> 
- 
-#include "kmp.h" 
-#include "kmp_stats_timing.h" 
- 
-using namespace std; 
- 
-#if KMP_HAVE_TICK_TIME 
-# if KMP_MIC 
-double tsc_tick_count::tick_time() 
-{ 
-    // pretty bad assumption of 1GHz clock for MIC 
-    return 1/((double)1000*1.e6); 
-} 
-# elif KMP_ARCH_X86 || KMP_ARCH_X86_64 
-#  include <string.h> 
-// Extract the value from the CPUID information 
-double tsc_tick_count::tick_time() 
-{ 
-    static double result = 0.0; 
- 
-    if (result == 0.0) 
-    { 
-        kmp_cpuid_t cpuinfo; 
-        char brand[256]; 
- 
-        __kmp_x86_cpuid(0x80000000, 0, &cpuinfo); 
-        memset(brand, 0, sizeof(brand)); 
-        int ids = cpuinfo.eax; 
- 
-        for (unsigned int i=2; i<(ids^0x80000000)+2; i++) 
-            __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t))); 
- 
-        char * start = &brand[0]; 
-        for (;*start == ' '; start++) 
-            ; 
-     
-        char * end = brand + KMP_STRLEN(brand) - 3; 
-        uint64_t multiplier; 
- 
-        if (*end == 'M') multiplier = 1000LL*1000LL; 
-        else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL; 
-        else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL; 
-        else  
-        { 
-            cout << "Error determining multiplier '" << *end << "'\n"; 
-            exit (-1); 
-        } 
-        *end = 0; 
-        while (*end != ' ') end--; 
-        end++; 
-     
-        double freq = strtod(end, &start); 
-        if (freq == 0.0)  
-        { 
-            cout << "Error calculating frequency " <<  end << "\n"; 
-            exit (-1); 
-        } 
- 
-        result = ((double)1.0)/(freq * multiplier); 
-    } 
-    return result; 
-} 
-# endif 
-#endif 
- 
-static bool useSI = true; 
- 
-// Return a formatted string after normalising the value into 
-// engineering style and using a suitable unit prefix (e.g. ms, us, ns). 
-std::string formatSI(double interval, int width, char unit) 
-{ 
-    std::stringstream os; 
- 
-    if (useSI) 
-    { 
-        // Preserve accuracy for small numbers, since we only multiply and the positive powers 
-        // of ten are precisely representable.  
-        static struct { double scale; char prefix; } ranges[] = { 
-            {1.e12,'f'}, 
-            {1.e9, 'p'}, 
-            {1.e6, 'n'}, 
-            {1.e3, 'u'}, 
-            {1.0,  'm'}, 
-            {1.e-3,' '}, 
-            {1.e-6,'k'}, 
-            {1.e-9,'M'}, 
-            {1.e-12,'G'}, 
-            {1.e-15,'T'}, 
-            {1.e-18,'P'}, 
-            {1.e-21,'E'}, 
-            {1.e-24,'Z'}, 
-            {1.e-27,'Y'} 
-        }; 
-         
-        if (interval == 0.0) 
-        { 
-            os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit; 
-            return os.str(); 
-        } 
- 
-        bool negative = false; 
-        if (interval < 0.0) 
-        { 
-            negative = true; 
-            interval = -interval; 
-        } 
-         
-        for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++) 
-        { 
-            if (interval*ranges[i].scale < 1.e0) 
-            { 
-                interval = interval * 1000.e0 * ranges[i].scale; 
-                os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right <<  
-                    (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit; 
- 
-                return os.str(); 
-            } 
-        } 
-    } 
-    os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit; 
- 
-    return os.str(); 
-} 
- 
-tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes) 
-{ 
-    timePair lastTimes = times[0]; 
-    tsc_tick_count * startp = lastTimes.get_startp(); 
-    tsc_tick_count * endp   = lastTimes.get_endp(); 
- 
-    for (int i=1; i<nTimes; i++) 
-    { 
-       (*startp) = startp->later(times[i].get_start()); 
-       (*endp)   = endp->later  (times[i].get_end()); 
-    } 
- 
-    return lastTimes.duration(); 
-} 
- 
-std::string timePair::format() const 
-{ 
-    std::ostringstream oss; 
- 
-    oss << start.getValue() << ":" << end.getValue() << " = " << (end-start).getValue(); 
- 
-    return oss.str(); 
-} 
+/** @file kmp_stats_timing.cpp
+ * Timing functions
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <iomanip>
+#include <sstream>
+
+#include "kmp.h"
+#include "kmp_stats_timing.h"
+
+using namespace std;
+
+#if KMP_HAVE_TICK_TIME
+# if KMP_MIC
+double tsc_tick_count::tick_time()
+{
+    // pretty bad assumption of 1GHz clock for MIC
+    return 1/((double)1000*1.e6);
+}
+# elif KMP_ARCH_X86 || KMP_ARCH_X86_64
+#  include <string.h>
+// Extract the value from the CPUID information
+double tsc_tick_count::tick_time()
+{
+    static double result = 0.0;
+
+    if (result == 0.0)
+    {
+        kmp_cpuid_t cpuinfo;
+        char brand[256];
+
+        __kmp_x86_cpuid(0x80000000, 0, &cpuinfo);
+        memset(brand, 0, sizeof(brand));
+        int ids = cpuinfo.eax;
+
+        for (unsigned int i=2; i<(ids^0x80000000)+2; i++)
+            __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t)));
+
+        char * start = &brand[0];
+        for (;*start == ' '; start++)
+            ;
+    
+        char * end = brand + KMP_STRLEN(brand) - 3;
+        uint64_t multiplier;
+
+        if (*end == 'M') multiplier = 1000LL*1000LL;
+        else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL;
+        else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL;
+        else 
+        {
+            cout << "Error determining multiplier '" << *end << "'\n";
+            exit (-1);
+        }
+        *end = 0;
+        while (*end != ' ') end--;
+        end++;
+    
+        double freq = strtod(end, &start);
+        if (freq == 0.0) 
+        {
+            cout << "Error calculating frequency " <<  end << "\n";
+            exit (-1);
+        }
+
+        result = ((double)1.0)/(freq * multiplier);
+    }
+    return result;
+}
+# endif
+#endif
+
+static bool useSI = true;
+
+// Return a formatted string after normalising the value into
+// engineering style and using a suitable unit prefix (e.g. ms, us, ns).
+std::string formatSI(double interval, int width, char unit)
+{
+    std::stringstream os;
+
+    if (useSI)
+    {
+        // Preserve accuracy for small numbers, since we only multiply and the positive powers
+        // of ten are precisely representable. 
+        static struct { double scale; char prefix; } ranges[] = {
+            {1.e12,'f'},
+            {1.e9, 'p'},
+            {1.e6, 'n'},
+            {1.e3, 'u'},
+            {1.0,  'm'},
+            {1.e-3,' '},
+            {1.e-6,'k'},
+            {1.e-9,'M'},
+            {1.e-12,'G'},
+            {1.e-15,'T'},
+            {1.e-18,'P'},
+            {1.e-21,'E'},
+            {1.e-24,'Z'},
+            {1.e-27,'Y'}
+        };
+        
+        if (interval == 0.0)
+        {
+            os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit;
+            return os.str();
+        }
+
+        bool negative = false;
+        if (interval < 0.0)
+        {
+            negative = true;
+            interval = -interval;
+        }
+        
+        for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++)
+        {
+            if (interval*ranges[i].scale < 1.e0)
+            {
+                interval = interval * 1000.e0 * ranges[i].scale;
+                os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right << 
+                    (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit;
+
+                return os.str();
+            }
+        }
+    }
+    os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit;
+
+    return os.str();
+}
+
+tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes)
+{
+    timePair lastTimes = times[0];
+    tsc_tick_count * startp = lastTimes.get_startp();
+    tsc_tick_count * endp   = lastTimes.get_endp();
+
+    for (int i=1; i<nTimes; i++)
+    {
+       (*startp) = startp->later(times[i].get_start());
+       (*endp)   = endp->later  (times[i].get_end());
+    }
+
+    return lastTimes.duration();
+}
+
+std::string timePair::format() const
+{
+    std::ostringstream oss;
+
+    oss << start.getValue() << ":" << end.getValue() << " = " << (end-start).getValue();
+
+    return oss.str();
+}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h
index 03b0c92b3b..83fb85bea3 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_stats_timing.h
@@ -1,110 +1,110 @@
-#ifndef KMP_STATS_TIMING_H 
-#define KMP_STATS_TIMING_H 
- 
-/** @file kmp_stats_timing.h 
- * Access to real time clock and timers. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
- 
-#include <stdint.h> 
-#include <string> 
-#include <limits> 
-#include "kmp_os.h" 
- 
-class tsc_tick_count { 
-  private: 
-    int64_t my_count; 
- 
-  public: 
-    class tsc_interval_t { 
-        int64_t value; 
-        explicit tsc_interval_t(int64_t _value) : value(_value) {} 
-     public: 
-        tsc_interval_t() : value(0) {}; // Construct 0 time duration 
-#if KMP_HAVE_TICK_TIME 
-        double seconds() const; // Return the length of a time interval in seconds 
-#endif 
-        double ticks() const { return double(value); } 
-        int64_t getValue() const { return value; } 
- 
-        friend class tsc_tick_count; 
- 
-        friend tsc_interval_t operator-( 
-        const tsc_tick_count t1, const tsc_tick_count t0); 
-    }; 
- 
-    tsc_tick_count() : my_count(static_cast<int64_t>(__rdtsc())) {}; 
-    tsc_tick_count(int64_t value) : my_count(value) {}; 
-    int64_t getValue() const { return my_count; } 
-    tsc_tick_count later (tsc_tick_count const other) const {  
-        return my_count > other.my_count ? (*this) : other;  
-    } 
-    tsc_tick_count earlier(tsc_tick_count const other) const {  
-        return my_count < other.my_count ? (*this) : other;  
-    } 
-#if KMP_HAVE_TICK_TIME 
-    static double tick_time(); // returns seconds per cycle (period) of clock 
-#endif 
-    static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value 
-    friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0); 
-}; 
- 
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0)  
-{ 
-    return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count ); 
-} 
- 
-#if KMP_HAVE_TICK_TIME 
-inline double tsc_tick_count::tsc_interval_t::seconds() const  
-{ 
-    return value*tick_time(); 
-} 
-#endif 
- 
-extern std::string formatSI(double interval, int width, char unit); 
- 
-inline std::string formatSeconds(double interval, int width) 
-{ 
-    return formatSI(interval, width, 'S'); 
-} 
- 
-inline std::string formatTicks(double interval, int width) 
-{ 
-    return formatSI(interval, width, 'T'); 
-} 
- 
-class timePair 
-{ 
-    tsc_tick_count KMP_ALIGN_CACHE start; 
-    tsc_tick_count end; 
- 
-public: 
-    timePair() : start(-std::numeric_limits<int64_t>::max()), end(-std::numeric_limits<int64_t>::max()) {} 
-    tsc_tick_count get_start() const { return start; } 
-    tsc_tick_count get_end()   const { return end; } 
-    tsc_tick_count * get_startp()    { return &start; } 
-    tsc_tick_count * get_endp()      { return &end; } 
- 
-    void markStart() { start = tsc_tick_count::now(); } 
-    void markEnd()   { end   = tsc_tick_count::now(); } 
-    void set_start(tsc_tick_count s) { start = s; } 
-    void set_end  (tsc_tick_count e) { end = e; } 
- 
-    tsc_tick_count::tsc_interval_t duration() const { return end-start; } 
-    std::string format() const; 
- 
-}; 
- 
-extern tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes); 
-#endif // KMP_STATS_TIMING_H 
+#ifndef KMP_STATS_TIMING_H
+#define KMP_STATS_TIMING_H
+
+/** @file kmp_stats_timing.h
+ * Access to real time clock and timers.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include <stdint.h>
+#include <string>
+#include <limits>
+#include "kmp_os.h"
+
+class tsc_tick_count {
+  private:
+    int64_t my_count;
+
+  public:
+    class tsc_interval_t {
+        int64_t value;
+        explicit tsc_interval_t(int64_t _value) : value(_value) {}
+     public:
+        tsc_interval_t() : value(0) {}; // Construct 0 time duration
+#if KMP_HAVE_TICK_TIME
+        double seconds() const; // Return the length of a time interval in seconds
+#endif
+        double ticks() const { return double(value); }
+        int64_t getValue() const { return value; }
+
+        friend class tsc_tick_count;
+
+        friend tsc_interval_t operator-(
+        const tsc_tick_count t1, const tsc_tick_count t0);
+    };
+
+    tsc_tick_count() : my_count(static_cast<int64_t>(__rdtsc())) {};
+    tsc_tick_count(int64_t value) : my_count(value) {};
+    int64_t getValue() const { return my_count; }
+    tsc_tick_count later (tsc_tick_count const other) const { 
+        return my_count > other.my_count ? (*this) : other; 
+    }
+    tsc_tick_count earlier(tsc_tick_count const other) const { 
+        return my_count < other.my_count ? (*this) : other; 
+    }
+#if KMP_HAVE_TICK_TIME
+    static double tick_time(); // returns seconds per cycle (period) of clock
+#endif
+    static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value
+    friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0);
+};
+
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0) 
+{
+    return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count );
+}
+
+#if KMP_HAVE_TICK_TIME
+inline double tsc_tick_count::tsc_interval_t::seconds() const 
+{
+    return value*tick_time();
+}
+#endif
+
+extern std::string formatSI(double interval, int width, char unit);
+
+inline std::string formatSeconds(double interval, int width)
+{
+    return formatSI(interval, width, 'S');
+}
+
+inline std::string formatTicks(double interval, int width)
+{
+    return formatSI(interval, width, 'T');
+}
+
+class timePair
+{
+    tsc_tick_count KMP_ALIGN_CACHE start;
+    tsc_tick_count end;
+
+public:
+    timePair() : start(-std::numeric_limits<int64_t>::max()), end(-std::numeric_limits<int64_t>::max()) {}
+    tsc_tick_count get_start() const { return start; }
+    tsc_tick_count get_end()   const { return end; }
+    tsc_tick_count * get_startp()    { return &start; }
+    tsc_tick_count * get_endp()      { return &end; }
+
+    void markStart() { start = tsc_tick_count::now(); }
+    void markEnd()   { end   = tsc_tick_count::now(); }
+    void set_start(tsc_tick_count s) { start = s; }
+    void set_end  (tsc_tick_count e) { end = e; }
+
+    tsc_tick_count::tsc_interval_t duration() const { return end-start; }
+    std::string format() const;
+
+};
+
+extern tsc_tick_count::tsc_interval_t computeLastInLastOutInterval(timePair * times, int nTimes);
+#endif // KMP_STATS_TIMING_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_str.c b/contrib/libs/cxxsupp/openmp/kmp_str.c
index 8adf3e3876..b5f700551e 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_str.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_str.c
@@ -1,883 +1,883 @@
-/* 
- * kmp_str.c -- String manipulation routines. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp_str.h" 
- 
-#include <stdarg.h>    // va_* 
-#include <stdio.h>     // vsnprintf() 
-#include <stdlib.h>    // malloc(), realloc() 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
- 
-/* 
-    ------------------------------------------------------------------------------------------------ 
-    String buffer. 
-    ------------------------------------------------------------------------------------------------ 
- 
-    Usage: 
- 
-        // Declare buffer and initialize it. 
-        kmp_str_buf_t  buffer; 
-	__kmp_str_buf_init( & buffer ); 
- 
-        // Print to buffer. 
-        __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 ); 
-        __kmp_str_buf_print( & buffer, "    <%s>\n", line ); 
- 
-        // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed 
-        // characters (not including terminating zero). 
-        write( fd, buffer.str, buffer.used ); 
- 
-        // Free buffer. 
-        __kmp_str_buf_free( & buffer ); 
- 
-        // Alternatively, you can detach allocated memory from buffer: 
-        __kmp_str_buf_detach( & buffer ); 
-        return buffer.str;    // That memory should be freed eventually. 
- 
- 
-    Notes: 
- 
-        * Buffer users may use buffer.str and buffer.used. Users should not change any fields of 
-          buffer directly. 
- 
-        * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string (""). 
- 
-        * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is 
-          exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc() 
-          as amount of used memory grows. 
- 
-        * Buffer doubles amount of allocated memory each time it is exhausted. 
- 
-    ------------------------------------------------------------------------------------------------ 
-*/ 
- 
-// TODO: __kmp_str_buf_print() can use thread local memory allocator. 
- 
-#define KMP_STR_BUF_INVARIANT( b )                                                                \ 
-    {                                                                                             \ 
-        KMP_DEBUG_ASSERT( (b)->str != NULL );                                                     \ 
-        KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) );                                     \ 
-        KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 );                                 \ 
-        KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size );                                      \ 
-        KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 );  \ 
-        KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 );   \ 
-    } 
- 
-void 
- __kmp_str_buf_clear( 
-     kmp_str_buf_t * buffer 
-) { 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-    if ( buffer->used > 0 ) { 
-        buffer->used = 0; 
-        buffer->str[ 0 ] = 0; 
-    }; // if 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-} // __kmp_str_buf_clear 
- 
- 
-void 
-__kmp_str_buf_reserve( 
-    kmp_str_buf_t * buffer, 
-    int             size 
-) { 
- 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-    KMP_DEBUG_ASSERT( size >= 0 ); 
- 
-    if ( buffer->size < (unsigned int)size ) { 
- 
-        // Calculate buffer size. 
-        do { 
-            buffer->size *= 2; 
-        } while ( buffer->size < (unsigned int)size ); 
- 
-        // Enlarge buffer. 
-        if ( buffer->str == & buffer->bulk[ 0 ] ) { 
-            buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); 
-            if ( buffer->str == NULL ) { 
-		KMP_FATAL( MemoryAllocFailed ); 
-            }; // if 
-            KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); 
-        } else { 
-            buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size ); 
-            if ( buffer->str == NULL ) { 
-		KMP_FATAL( MemoryAllocFailed ); 
-            }; // if 
-        }; // if 
- 
-    }; // if 
- 
-    KMP_DEBUG_ASSERT( buffer->size > 0 ); 
-    KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size ); 
-    KMP_STR_BUF_INVARIANT( buffer ); 
- 
-} // __kmp_str_buf_reserve 
- 
- 
-void 
-__kmp_str_buf_detach( 
-    kmp_str_buf_t *  buffer 
-) { 
- 
-    KMP_STR_BUF_INVARIANT( buffer ); 
- 
-    // If internal bulk is used, allocate memory and copy it. 
-    if ( buffer->size <= sizeof( buffer->bulk ) ) { 
-        buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); 
-        if ( buffer->str == NULL ) { 
-		KMP_FATAL( MemoryAllocFailed ); 
-        }; // if 
-        KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); 
-    }; // if 
- 
-} // __kmp_str_buf_detach 
- 
- 
-void 
-__kmp_str_buf_free( 
-    kmp_str_buf_t * buffer 
-) { 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-    if ( buffer->size > sizeof( buffer->bulk ) ) { 
-        KMP_INTERNAL_FREE( buffer->str ); 
-    }; // if 
-    buffer->str  = buffer->bulk; 
-    buffer->size = sizeof( buffer->bulk ); 
-    buffer->used = 0; 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-} // __kmp_str_buf_free 
- 
- 
-void 
-__kmp_str_buf_cat( 
-    kmp_str_buf_t * buffer, 
-    char const *    str, 
-    int             len 
-) { 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-    KMP_DEBUG_ASSERT( str != NULL ); 
-    KMP_DEBUG_ASSERT( len >= 0 ); 
-    __kmp_str_buf_reserve( buffer, buffer->used + len + 1 ); 
-    KMP_MEMCPY( buffer->str + buffer->used, str, len ); 
-    buffer->str[ buffer->used + len ] = 0; 
-    buffer->used += len; 
-    KMP_STR_BUF_INVARIANT( buffer ); 
-} // __kmp_str_buf_cat 
- 
- 
-void 
-__kmp_str_buf_vprint( 
-    kmp_str_buf_t *  buffer, 
-    char const *     format, 
-    va_list          args 
-) { 
- 
-    KMP_STR_BUF_INVARIANT( buffer ); 
- 
-    for ( ; ; ) { 
- 
-        int const free = buffer->size - buffer->used; 
-        int       rc; 
-        int       size; 
- 
-        // Try to format string. 
-        { 
-            /* 
-                On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it 
-                is called for the second time with the same args. To prevent the crash, we have to 
-                pass a fresh intact copy of args to vsnprintf() on each iteration. 
- 
-                Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it 
-                seems vsnprintf() does not modify args argument on Windows* OS. 
-            */ 
- 
-            #if ! KMP_OS_WINDOWS 
-                va_list _args; 
-                __va_copy( _args, args );  // Make copy of args. 
-                #define args _args         // Substitute args with its copy, _args. 
-            #endif // KMP_OS_WINDOWS 
-            rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args ); 
-            #if ! KMP_OS_WINDOWS 
-                #undef args                // Remove substitution. 
-                va_end( _args ); 
-            #endif // KMP_OS_WINDOWS 
-        } 
- 
-        // No errors, string has been formatted. 
-        if ( rc >= 0 && rc < free ) { 
-            buffer->used += rc; 
-            break; 
-        }; // if 
- 
-        // Error occurred, buffer is too small. 
-        if ( rc >= 0 ) { 
-            // C99-conforming implementation of vsnprintf returns required buffer size. 
-            size = buffer->used + rc + 1; 
-        } else { 
-            // Older implementations just return -1. Double buffer size. 
-            size = buffer->size * 2; 
-        }; // if 
- 
-        // Enlarge buffer. 
-        __kmp_str_buf_reserve( buffer, size ); 
- 
-        // And try again. 
- 
-    }; // forever 
- 
-    KMP_DEBUG_ASSERT( buffer->size > 0 ); 
-    KMP_STR_BUF_INVARIANT( buffer ); 
- 
-} // __kmp_str_buf_vprint 
- 
- 
-void 
-__kmp_str_buf_print( 
-    kmp_str_buf_t *  buffer, 
-    char const *     format, 
-    ... 
-) { 
- 
-    va_list args; 
-    va_start( args, format ); 
-    __kmp_str_buf_vprint( buffer, format, args ); 
-    va_end( args ); 
- 
-} // __kmp_str_buf_print 
- 
- 
-/* 
-    The function prints specified size to buffer. Size is expressed using biggest possible unit, for 
-    example 1024 is printed as "1k". 
-*/ 
- 
-void 
-__kmp_str_buf_print_size( 
-    kmp_str_buf_t * buf, 
-    size_t          size 
-) { 
- 
-    char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" }; 
-    int const    units   = sizeof( names ) / sizeof( char const * ); 
-    int          u       = 0; 
-    if ( size > 0 ) { 
-        while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) { 
-            size = size / 1024; 
-            ++ u; 
-        }; // while 
-    }; // if 
- 
-    __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] ); 
- 
-} // __kmp_str_buf_print_size 
- 
- 
-void 
-__kmp_str_fname_init( 
-    kmp_str_fname_t * fname, 
-    char const *      path 
-) { 
- 
-    fname->path = NULL; 
-    fname->dir  = NULL; 
-    fname->base = NULL; 
- 
-    if ( path != NULL ) { 
-        char * slash = NULL;    // Pointer to the last character of dir. 
-        char * base  = NULL;    // Pointer to the beginning of basename. 
-        fname->path = __kmp_str_format( "%s", path ); 
-            // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it 
-            // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format(). 
-        if ( KMP_OS_WINDOWS ) { 
-            __kmp_str_replace( fname->path, '\\', '/' ); 
-        }; // if 
-        fname->dir = __kmp_str_format( "%s", fname->path ); 
-        slash = strrchr( fname->dir, '/' ); 
-        if ( KMP_OS_WINDOWS && slash == NULL ) {           // On Windows* OS, if slash not found, 
-            char first = TOLOWER( fname->dir[ 0 ] );     // look for drive. 
-            if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) { 
-                slash = & fname->dir[ 1 ]; 
-            }; // if 
-        }; // if 
-        base = ( slash == NULL ? fname->dir : slash + 1 ); 
-        fname->base = __kmp_str_format( "%s", base );    // Copy basename 
-        * base = 0;                    // and truncate dir. 
-    }; // if 
- 
-} // kmp_str_fname_init 
- 
- 
-void 
-__kmp_str_fname_free( 
-    kmp_str_fname_t * fname 
-) { 
-    __kmp_str_free( (char const **)( & fname->path ) ); 
-    __kmp_str_free( (char const **)( & fname->dir  ) ); 
-    __kmp_str_free( (char const **)( & fname->base ) ); 
-} // kmp_str_fname_free 
- 
- 
-int 
-__kmp_str_fname_match( 
-    kmp_str_fname_t const * fname, 
-    char const *            pattern 
-) { 
- 
-    int dir_match  = 1; 
-    int base_match = 1; 
- 
-    if ( pattern != NULL ) { 
-        kmp_str_fname_t ptrn; 
-        __kmp_str_fname_init( & ptrn, pattern ); 
-        dir_match = 
-            strcmp( ptrn.dir, "*/" ) == 0 
-            || 
-            ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) ); 
-        base_match = 
-            strcmp( ptrn.base, "*" ) == 0 
-            || 
-            ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) ); 
-        __kmp_str_fname_free( & ptrn ); 
-    }; // if 
- 
-    return dir_match && base_match; 
- 
-} // __kmp_str_fname_match 
- 
- 
-kmp_str_loc_t 
-__kmp_str_loc_init( 
-    char const * psource, 
-    int          init_fname 
-) { 
- 
-    kmp_str_loc_t loc; 
- 
-    loc._bulk = NULL; 
-    loc.file  = NULL; 
-    loc.func  = NULL; 
-    loc.line  = 0; 
-    loc.col   = 0; 
- 
-    if ( psource != NULL ) { 
- 
-        char * str   = NULL; 
-        char * dummy = NULL; 
-        char * line  = NULL; 
-        char * col   = NULL; 
- 
-        // Copy psource to keep it intact. 
-        loc._bulk = __kmp_str_format( "%s", psource ); 
- 
-        // Parse psource string: ";file;func;line;col;;" 
-        str = loc._bulk; 
-        __kmp_str_split( str, ';', & dummy,    & str ); 
-        __kmp_str_split( str, ';', & loc.file, & str ); 
-        __kmp_str_split( str, ';', & loc.func, & str ); 
-        __kmp_str_split( str, ';', & line,     & str ); 
-        __kmp_str_split( str, ';', & col,      & str ); 
- 
-        // Convert line and col into numberic values. 
-        if ( line != NULL ) { 
-            loc.line = atoi( line ); 
-            if ( loc.line < 0 ) { 
-                loc.line = 0; 
-            }; // if 
-        }; // if 
-        if ( col != NULL ) { 
-            loc.col = atoi( col ); 
-            if ( loc.col < 0 ) { 
-                loc.col = 0; 
-            }; // if 
-        }; // if 
- 
-    }; // if 
- 
-    __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL ); 
- 
-    return loc; 
- 
-} // kmp_str_loc_init 
- 
- 
-void 
-__kmp_str_loc_free( 
-    kmp_str_loc_t * loc 
-) { 
-    __kmp_str_fname_free( & loc->fname ); 
-    KMP_INTERNAL_FREE( loc->_bulk ); 
-    loc->_bulk = NULL; 
-    loc->file  = NULL; 
-    loc->func  = NULL; 
-} // kmp_str_loc_free 
- 
- 
- 
-/* 
-    This function is intended to compare file names. On Windows* OS file names are case-insensitive, 
-    so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive 
-    comparison. 
-    Note: The function returns *true* if strings are *equal*. 
-*/ 
- 
-int 
-__kmp_str_eqf(         // True, if strings are equal, false otherwise. 
-    char const * lhs,  // First string. 
-    char const * rhs   // Second string. 
-) { 
-    int result; 
-    #if KMP_OS_WINDOWS 
-        result = ( _stricmp( lhs, rhs ) == 0 ); 
-    #else 
-        result = ( strcmp( lhs, rhs ) == 0 ); 
-    #endif 
-    return result; 
-} // __kmp_str_eqf 
- 
- 
-/* 
-    This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by 
-    __kmp_str_free(). The function is very convenient for constructing strings, it successfully 
-    replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid 
-    buffer overflows. Examples: 
- 
-        str = __kmp_str_format( "%s", orig );              // strdup(), do not care about buffer size. 
-        __kmp_str_free( & str ); 
-        str = __kmp_str_format( "%s%s", orig1, orig2 );    // strcat(), do not care about buffer size. 
-        __kmp_str_free( & str ); 
-        str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. 
-        __kmp_str_free( & str ); 
- 
-    Performance note: 
-        This function allocates memory with malloc() calls, so do not call it from 
-        performance-critical code. In performance-critical code consider using kmp_str_buf_t 
-        instead, since it uses stack-allocated buffer for short strings. 
- 
-    Why does this function use malloc()? 
-        1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no 
-           reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned 
-           memory is not necessary. 
-        2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure. 
-           We need to perform string operations during library startup (for example, in 
-           __kmp_register_library_startup()) when no thread structures are allocated yet. 
-    So standard malloc() is the only available option. 
-*/ 
- 
-// TODO: Find and replace all regular free() with __kmp_str_free(). 
- 
-char * 
-__kmp_str_format(           // Allocated string. 
-    char const * format,    // Format string. 
-    ...                     // Other parameters. 
-) { 
- 
-    va_list args; 
-    int     size   = 512; 
-    char *  buffer = NULL; 
-    int     rc; 
- 
-    // Allocate buffer. 
-    buffer = (char *) KMP_INTERNAL_MALLOC( size ); 
-    if ( buffer == NULL ) { 
-	KMP_FATAL( MemoryAllocFailed ); 
-    }; // if 
- 
-    for ( ; ; ) { 
- 
-        // Try to format string. 
-        va_start( args, format ); 
-        rc = KMP_VSNPRINTF( buffer, size, format, args ); 
-        va_end( args ); 
- 
-        // No errors, string has been formatted. 
-        if ( rc >= 0 && rc < size ) { 
-            break; 
-        }; // if 
- 
-        // Error occurred, buffer is too small. 
-        if ( rc >= 0 ) { 
-            // C99-conforming implementation of vsnprintf returns required buffer size. 
-            size = rc + 1; 
-        } else { 
-            // Older implementations just return -1. 
-            size = size * 2; 
-        }; // if 
- 
-        // Enlarge buffer and try again. 
-        buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size ); 
-        if ( buffer == NULL ) { 
-    	    KMP_FATAL( MemoryAllocFailed ); 
-        }; // if 
- 
-    }; // forever 
- 
-    return buffer; 
- 
-} // func __kmp_str_format 
- 
- 
-void 
-__kmp_str_free( 
-    char const * * str 
-) { 
-    KMP_DEBUG_ASSERT( str != NULL ); 
-    KMP_INTERNAL_FREE( (void *) * str ); 
-    * str = NULL; 
-} // func __kmp_str_free 
- 
- 
-/* If len is zero, returns true iff target and data have exact case-insensitive match. 
-   If len is negative, returns true iff target is a case-insensitive substring of data. 
-   If len is positive, returns true iff target is a case-insensitive substring of data or 
-     vice versa, and neither is shorter than len. 
-*/ 
-int 
-__kmp_str_match( 
-    char const * target, 
-    int          len, 
-    char const * data 
-) { 
-    int i; 
-    if ( target == NULL || data == NULL ) { 
-        return FALSE; 
-    }; // if 
-    for ( i = 0; target[i] && data[i]; ++ i ) { 
-        if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) { 
-            return FALSE; 
-        }; // if 
-    }; // for i 
-    return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) ); 
-} // __kmp_str_match 
- 
- 
-int 
-__kmp_str_match_false( char const * data ) { 
-    int result = 
-        __kmp_str_match( "false",   1, data ) || 
-        __kmp_str_match( "off",     2, data ) || 
-        __kmp_str_match( "0",       1, data ) || 
-        __kmp_str_match( ".false.", 2, data ) || 
-        __kmp_str_match( ".f.",     2, data ) || 
-        __kmp_str_match( "no",      1, data ); 
-    return result; 
-} // __kmp_str_match_false 
- 
- 
-int 
-__kmp_str_match_true( char const * data ) { 
-    int result = 
-        __kmp_str_match( "true",   1, data ) || 
-        __kmp_str_match( "on",     2, data ) || 
-        __kmp_str_match( "1",      1, data ) || 
-        __kmp_str_match( ".true.", 2, data ) || 
-        __kmp_str_match( ".t.",    2, data ) || 
-        __kmp_str_match( "yes",    1, data ); 
-    return result; 
-} // __kmp_str_match_true 
- 
-void 
-__kmp_str_replace( 
-    char * str, 
-    char   search_for, 
-    char   replace_with 
-) { 
- 
-    char * found = NULL; 
- 
-    found = strchr( str, search_for ); 
-    while ( found ) { 
-        * found = replace_with; 
-        found = strchr( found + 1, search_for ); 
-    }; // while 
- 
-} // __kmp_str_replace 
- 
- 
-void 
-__kmp_str_split( 
-    char *  str,    // I: String to split. 
-    char    delim,  // I: Character to split on. 
-    char ** head,   // O: Pointer to head (may be NULL). 
-    char ** tail    // O: Pointer to tail (may be NULL). 
-) { 
-    char * h = str; 
-    char * t = NULL; 
-    if ( str != NULL ) { 
-        char * ptr = strchr( str, delim ); 
-        if ( ptr != NULL ) { 
-            * ptr  = 0; 
-            t = ptr + 1; 
-        }; // if 
-    }; // if 
-    if ( head != NULL ) { 
-        * head = h; 
-    }; // if 
-    if ( tail != NULL ) { 
-        * tail = t; 
-    }; // if 
-} // __kmp_str_split 
- 
-/* 
-    strtok_r() is not available on Windows* OS. This function reimplements strtok_r(). 
-*/ 
-char * 
-__kmp_str_token( 
-    char *       str,   // String to split into tokens. Note: String *is* modified! 
-    char const * delim, // Delimiters. 
-    char **      buf    // Internal buffer. 
-) { 
-    char * token = NULL; 
-    #if KMP_OS_WINDOWS 
-        // On Windows* OS there is no strtok_r() function. Let us implement it. 
-        if ( str != NULL ) { 
-            * buf = str;                       // First call, initialize buf. 
-        }; // if 
-        * buf += strspn( * buf, delim );       // Skip leading delimiters. 
-        if ( ** buf != 0 ) {                   // Rest of the string is not yet empty. 
-            token = * buf;                     // Use it as result. 
-            * buf += strcspn( * buf, delim );  // Skip non-delimiters. 
-            if ( ** buf != 0 ) {               // Rest of the string is not yet empty. 
-                ** buf = 0;                    // Terminate token here. 
-                * buf += 1;                    // Advance buf to start with the next token next time. 
-            }; // if 
-        }; // if 
-    #else 
-        // On Linux* OS and OS X*, strtok_r() is available. Let us use it. 
-        token = strtok_r( str, delim, buf ); 
-    #endif 
-    return token; 
-}; // __kmp_str_token 
- 
- 
-int 
-__kmp_str_to_int( 
-    char const * str, 
-    char         sentinel 
-) { 
-    int result, factor; 
-    char const * t; 
- 
-    result = 0; 
- 
-    for (t = str; *t != '\0'; ++t) { 
-        if (*t < '0' || *t > '9') 
-            break; 
-        result = (result * 10) + (*t - '0'); 
-    } 
- 
-    switch (*t) { 
-    case '\0':          /* the current default for no suffix is bytes */ 
-	factor = 1; 
-        break; 
-    case 'b': case 'B': /* bytes */ 
-	++t; 
-	factor = 1; 
-        break; 
-    case 'k': case 'K': /* kilo-bytes */ 
-	++t; 
-	factor = 1024; 
-        break; 
-    case 'm': case 'M': /* mega-bytes */ 
-	++t; 
-	factor = (1024 * 1024); 
-        break; 
-    default: 
-	if(*t != sentinel) 
-	    return (-1); 
-	t = ""; 
-	factor = 1; 
-    } 
- 
-    if (result > (INT_MAX / factor)) 
-	result = INT_MAX; 
-    else 
-	result *= factor; 
- 
-    return (*t != 0 ? 0 : result); 
- 
-} // __kmp_str_to_int 
- 
- 
-/* 
-    The routine parses input string. It is expected it is a unsigned integer with optional unit. 
-    Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb" 
-    or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is 
-    ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed 
-    value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set 
-    to zero. 
-*/ 
-void 
-__kmp_str_to_size(         // R: Error code. 
-    char const *   str,    // I: String of characters, unsigned number and unit ("b", "kb", etc). 
-    size_t *       out,    // O: Parsed number. 
-    size_t         dfactor, // I: The factor if none of the letters specified. 
-    char const * * error   // O: Null if everything is ok, error message otherwise. 
-) { 
- 
-    size_t value    = 0; 
-    size_t factor   = 0; 
-    int    overflow = 0; 
-    int    i        = 0; 
-    int    digit; 
- 
- 
-    KMP_DEBUG_ASSERT( str != NULL ); 
- 
-    // Skip spaces. 
-    while ( str[ i ] == ' ' || str[ i ] == '\t') { 
-        ++ i; 
-    }; // while 
- 
-    // Parse number. 
-    if ( str[ i ] < '0' || str[ i ] > '9' ) { 
-        * error = KMP_I18N_STR( NotANumber ); 
-        return; 
-    }; // if 
-    do { 
-        digit = str[ i ] - '0'; 
-        overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); 
-        value = ( value * 10 ) + digit; 
-        ++ i; 
-    } while ( str[ i ] >= '0' && str[ i ] <= '9' ); 
- 
-    // Skip spaces. 
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) { 
-        ++ i; 
-    }; // while 
- 
-    // Parse unit. 
-    #define _case( ch, exp )                            \ 
-        case ch :                                       \ 
-        case ch - ( 'a' - 'A' ) : {                     \ 
-            size_t shift = (exp) * 10;                  \ 
-            ++ i;                                       \ 
-            if ( shift < sizeof( size_t ) * 8 ) {       \ 
-                factor = (size_t)( 1 ) << shift;        \ 
-            } else {                                    \ 
-                overflow = 1;                           \ 
-            };                                          \ 
-        } break; 
-    switch ( str[ i ] ) { 
-        _case( 'k', 1 ); // Kilo 
-        _case( 'm', 2 ); // Mega 
-        _case( 'g', 3 ); // Giga 
-        _case( 't', 4 ); // Tera 
-        _case( 'p', 5 ); // Peta 
-        _case( 'e', 6 ); // Exa 
-        _case( 'z', 7 ); // Zetta 
-        _case( 'y', 8 ); // Yotta 
-        // Oops. No more units... 
-    }; // switch 
-    #undef _case 
-    if ( str[ i ] == 'b' || str[ i ] == 'B' ) {    // Skip optional "b". 
-	if ( factor == 0 ) { 
-	    factor = 1; 
-	} 
-        ++ i; 
-    }; // if 
-    if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit 
-        * error = KMP_I18N_STR( BadUnit ); 
-        return; 
-    }; // if 
- 
-    if ( factor == 0 ) { 
-	factor = dfactor; 
-    } 
- 
-    // Apply factor. 
-    overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) ); 
-    value *= factor; 
- 
-    // Skip spaces. 
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) { 
-        ++ i; 
-    }; // while 
- 
-    if ( str[ i ] != 0 ) { 
-        * error = KMP_I18N_STR( IllegalCharacters ); 
-        return; 
-    }; // if 
- 
-    if ( overflow ) { 
-        * error = KMP_I18N_STR( ValueTooLarge ); 
-        * out = KMP_SIZE_T_MAX; 
-        return; 
-    }; // if 
- 
-    * error = NULL; 
-    * out = value; 
- 
-} // __kmp_str_to_size 
- 
- 
-void 
-__kmp_str_to_uint(         // R: Error code. 
-    char const *   str,    // I: String of characters, unsigned number. 
-    kmp_uint64 *   out,    // O: Parsed number. 
-    char const * * error   // O: Null if everything is ok, error message otherwise. 
-) { 
- 
-    size_t value    = 0; 
-    int    overflow = 0; 
-    int    i        = 0; 
-    int    digit; 
- 
- 
-    KMP_DEBUG_ASSERT( str != NULL ); 
- 
-    // Skip spaces. 
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) { 
-        ++ i; 
-    }; // while 
- 
-    // Parse number. 
-    if ( str[ i ] < '0' || str[ i ] > '9' ) { 
-        * error = KMP_I18N_STR( NotANumber ); 
-        return; 
-    }; // if 
-    do { 
-        digit = str[ i ] - '0'; 
-        overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); 
-        value = ( value * 10 ) + digit; 
-        ++ i; 
-    } while ( str[ i ] >= '0' && str[ i ] <= '9' ); 
- 
-    // Skip spaces. 
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) { 
-        ++ i; 
-    }; // while 
- 
-    if ( str[ i ] != 0 ) { 
-        * error = KMP_I18N_STR( IllegalCharacters ); 
-        return; 
-    }; // if 
- 
-    if ( overflow ) { 
-        * error = KMP_I18N_STR( ValueTooLarge ); 
-        * out = (kmp_uint64) -1; 
-        return; 
-    }; // if 
- 
-    * error = NULL; 
-    * out = value; 
- 
-} // __kmp_str_to_unit 
- 
- 
- 
-// end of file // 
+/*
+ * kmp_str.c -- String manipulation routines.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp_str.h"
+
+#include <stdarg.h>    // va_*
+#include <stdio.h>     // vsnprintf()
+#include <stdlib.h>    // malloc(), realloc()
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+
+/*
+    ------------------------------------------------------------------------------------------------
+    String buffer.
+    ------------------------------------------------------------------------------------------------
+
+    Usage:
+
+        // Declare buffer and initialize it.
+        kmp_str_buf_t  buffer;
+	__kmp_str_buf_init( & buffer );
+
+        // Print to buffer.
+        __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 );
+        __kmp_str_buf_print( & buffer, "    <%s>\n", line );
+
+        // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed
+        // characters (not including terminating zero).
+        write( fd, buffer.str, buffer.used );
+
+        // Free buffer.
+        __kmp_str_buf_free( & buffer );
+
+        // Alternatively, you can detach allocated memory from buffer:
+        __kmp_str_buf_detach( & buffer );
+        return buffer.str;    // That memory should be freed eventually.
+
+
+    Notes:
+
+        * Buffer users may use buffer.str and buffer.used. Users should not change any fields of
+          buffer directly.
+
+        * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string ("").
+
+        * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is
+          exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc()
+          as amount of used memory grows.
+
+        * Buffer doubles amount of allocated memory each time it is exhausted.
+
+    ------------------------------------------------------------------------------------------------
+*/
+
+// TODO: __kmp_str_buf_print() can use thread local memory allocator.
+
+#define KMP_STR_BUF_INVARIANT( b )                                                                \
+    {                                                                                             \
+        KMP_DEBUG_ASSERT( (b)->str != NULL );                                                     \
+        KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) );                                     \
+        KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 );                                 \
+        KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size );                                      \
+        KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 );  \
+        KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 );   \
+    }
+
+void
+ __kmp_str_buf_clear(
+     kmp_str_buf_t * buffer
+) {
+    KMP_STR_BUF_INVARIANT( buffer );
+    if ( buffer->used > 0 ) {
+        buffer->used = 0;
+        buffer->str[ 0 ] = 0;
+    }; // if
+    KMP_STR_BUF_INVARIANT( buffer );
+} // __kmp_str_buf_clear
+
+
+void
+__kmp_str_buf_reserve(
+    kmp_str_buf_t * buffer,
+    int             size
+) {
+
+    KMP_STR_BUF_INVARIANT( buffer );
+    KMP_DEBUG_ASSERT( size >= 0 );
+
+    if ( buffer->size < (unsigned int)size ) {
+
+        // Calculate buffer size.
+        do {
+            buffer->size *= 2;
+        } while ( buffer->size < (unsigned int)size );
+
+        // Enlarge buffer.
+        if ( buffer->str == & buffer->bulk[ 0 ] ) {
+            buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size );
+            if ( buffer->str == NULL ) {
+		KMP_FATAL( MemoryAllocFailed );
+            }; // if
+            KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 );
+        } else {
+            buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size );
+            if ( buffer->str == NULL ) {
+		KMP_FATAL( MemoryAllocFailed );
+            }; // if
+        }; // if
+
+    }; // if
+
+    KMP_DEBUG_ASSERT( buffer->size > 0 );
+    KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size );
+    KMP_STR_BUF_INVARIANT( buffer );
+
+} // __kmp_str_buf_reserve
+
+
+void
+__kmp_str_buf_detach(
+    kmp_str_buf_t *  buffer
+) {
+
+    KMP_STR_BUF_INVARIANT( buffer );
+
+    // If internal bulk is used, allocate memory and copy it.
+    if ( buffer->size <= sizeof( buffer->bulk ) ) {
+        buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size );
+        if ( buffer->str == NULL ) {
+		KMP_FATAL( MemoryAllocFailed );
+        }; // if
+        KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 );
+    }; // if
+
+} // __kmp_str_buf_detach
+
+
+void
+__kmp_str_buf_free(
+    kmp_str_buf_t * buffer
+) {
+    KMP_STR_BUF_INVARIANT( buffer );
+    if ( buffer->size > sizeof( buffer->bulk ) ) {
+        KMP_INTERNAL_FREE( buffer->str );
+    }; // if
+    buffer->str  = buffer->bulk;
+    buffer->size = sizeof( buffer->bulk );
+    buffer->used = 0;
+    KMP_STR_BUF_INVARIANT( buffer );
+} // __kmp_str_buf_free
+
+
+void
+__kmp_str_buf_cat(
+    kmp_str_buf_t * buffer,
+    char const *    str,
+    int             len
+) {
+    KMP_STR_BUF_INVARIANT( buffer );
+    KMP_DEBUG_ASSERT( str != NULL );
+    KMP_DEBUG_ASSERT( len >= 0 );
+    __kmp_str_buf_reserve( buffer, buffer->used + len + 1 );
+    KMP_MEMCPY( buffer->str + buffer->used, str, len );
+    buffer->str[ buffer->used + len ] = 0;
+    buffer->used += len;
+    KMP_STR_BUF_INVARIANT( buffer );
+} // __kmp_str_buf_cat
+
+
+void
+__kmp_str_buf_vprint(
+    kmp_str_buf_t *  buffer,
+    char const *     format,
+    va_list          args
+) {
+
+    KMP_STR_BUF_INVARIANT( buffer );
+
+    for ( ; ; ) {
+
+        int const free = buffer->size - buffer->used;
+        int       rc;
+        int       size;
+
+        // Try to format string.
+        {
+            /*
+                On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it
+                is called for the second time with the same args. To prevent the crash, we have to
+                pass a fresh intact copy of args to vsnprintf() on each iteration.
+
+                Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it
+                seems vsnprintf() does not modify args argument on Windows* OS.
+            */
+
+            #if ! KMP_OS_WINDOWS
+                va_list _args;
+                __va_copy( _args, args );  // Make copy of args.
+                #define args _args         // Substitute args with its copy, _args.
+            #endif // KMP_OS_WINDOWS
+            rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args );
+            #if ! KMP_OS_WINDOWS
+                #undef args                // Remove substitution.
+                va_end( _args );
+            #endif // KMP_OS_WINDOWS
+        }
+
+        // No errors, string has been formatted.
+        if ( rc >= 0 && rc < free ) {
+            buffer->used += rc;
+            break;
+        }; // if
+
+        // Error occurred, buffer is too small.
+        if ( rc >= 0 ) {
+            // C99-conforming implementation of vsnprintf returns required buffer size.
+            size = buffer->used + rc + 1;
+        } else {
+            // Older implementations just return -1. Double buffer size.
+            size = buffer->size * 2;
+        }; // if
+
+        // Enlarge buffer.
+        __kmp_str_buf_reserve( buffer, size );
+
+        // And try again.
+
+    }; // forever
+
+    KMP_DEBUG_ASSERT( buffer->size > 0 );
+    KMP_STR_BUF_INVARIANT( buffer );
+
+} // __kmp_str_buf_vprint
+
+
+void
+__kmp_str_buf_print(
+    kmp_str_buf_t *  buffer,
+    char const *     format,
+    ...
+) {
+
+    va_list args;
+    va_start( args, format );
+    __kmp_str_buf_vprint( buffer, format, args );
+    va_end( args );
+
+} // __kmp_str_buf_print
+
+
+/*
+    The function prints specified size to buffer. Size is expressed using biggest possible unit, for
+    example 1024 is printed as "1k".
+*/
+
+void
+__kmp_str_buf_print_size(
+    kmp_str_buf_t * buf,
+    size_t          size
+) {
+
+    char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" };
+    int const    units   = sizeof( names ) / sizeof( char const * );
+    int          u       = 0;
+    if ( size > 0 ) {
+        while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) {
+            size = size / 1024;
+            ++ u;
+        }; // while
+    }; // if
+
+    __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] );
+
+} // __kmp_str_buf_print_size
+
+
+void
+__kmp_str_fname_init(
+    kmp_str_fname_t * fname,
+    char const *      path
+) {
+
+    fname->path = NULL;
+    fname->dir  = NULL;
+    fname->base = NULL;
+
+    if ( path != NULL ) {
+        char * slash = NULL;    // Pointer to the last character of dir.
+        char * base  = NULL;    // Pointer to the beginning of basename.
+        fname->path = __kmp_str_format( "%s", path );
+            // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it
+            // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format().
+        if ( KMP_OS_WINDOWS ) {
+            __kmp_str_replace( fname->path, '\\', '/' );
+        }; // if
+        fname->dir = __kmp_str_format( "%s", fname->path );
+        slash = strrchr( fname->dir, '/' );
+        if ( KMP_OS_WINDOWS && slash == NULL ) {           // On Windows* OS, if slash not found,
+            char first = TOLOWER( fname->dir[ 0 ] );     // look for drive.
+            if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) {
+                slash = & fname->dir[ 1 ];
+            }; // if
+        }; // if
+        base = ( slash == NULL ? fname->dir : slash + 1 );
+        fname->base = __kmp_str_format( "%s", base );    // Copy basename
+        * base = 0;                    // and truncate dir.
+    }; // if
+
+} // kmp_str_fname_init
+
+
+void
+__kmp_str_fname_free(
+    kmp_str_fname_t * fname
+) {
+    __kmp_str_free( (char const **)( & fname->path ) );
+    __kmp_str_free( (char const **)( & fname->dir  ) );
+    __kmp_str_free( (char const **)( & fname->base ) );
+} // kmp_str_fname_free
+
+
+int
+__kmp_str_fname_match(
+    kmp_str_fname_t const * fname,
+    char const *            pattern
+) {
+
+    int dir_match  = 1;
+    int base_match = 1;
+
+    if ( pattern != NULL ) {
+        kmp_str_fname_t ptrn;
+        __kmp_str_fname_init( & ptrn, pattern );
+        dir_match =
+            strcmp( ptrn.dir, "*/" ) == 0
+            ||
+            ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) );
+        base_match =
+            strcmp( ptrn.base, "*" ) == 0
+            ||
+            ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) );
+        __kmp_str_fname_free( & ptrn );
+    }; // if
+
+    return dir_match && base_match;
+
+} // __kmp_str_fname_match
+
+
+kmp_str_loc_t
+__kmp_str_loc_init(
+    char const * psource,
+    int          init_fname
+) {
+
+    kmp_str_loc_t loc;
+
+    loc._bulk = NULL;
+    loc.file  = NULL;
+    loc.func  = NULL;
+    loc.line  = 0;
+    loc.col   = 0;
+
+    if ( psource != NULL ) {
+
+        char * str   = NULL;
+        char * dummy = NULL;
+        char * line  = NULL;
+        char * col   = NULL;
+
+        // Copy psource to keep it intact.
+        loc._bulk = __kmp_str_format( "%s", psource );
+
+        // Parse psource string: ";file;func;line;col;;"
+        str = loc._bulk;
+        __kmp_str_split( str, ';', & dummy,    & str );
+        __kmp_str_split( str, ';', & loc.file, & str );
+        __kmp_str_split( str, ';', & loc.func, & str );
+        __kmp_str_split( str, ';', & line,     & str );
+        __kmp_str_split( str, ';', & col,      & str );
+
+        // Convert line and col into numberic values.
+        if ( line != NULL ) {
+            loc.line = atoi( line );
+            if ( loc.line < 0 ) {
+                loc.line = 0;
+            }; // if
+        }; // if
+        if ( col != NULL ) {
+            loc.col = atoi( col );
+            if ( loc.col < 0 ) {
+                loc.col = 0;
+            }; // if
+        }; // if
+
+    }; // if
+
+    __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL );
+
+    return loc;
+
+} // kmp_str_loc_init
+
+
+void
+__kmp_str_loc_free(
+    kmp_str_loc_t * loc
+) {
+    __kmp_str_fname_free( & loc->fname );
+    KMP_INTERNAL_FREE( loc->_bulk );
+    loc->_bulk = NULL;
+    loc->file  = NULL;
+    loc->func  = NULL;
+} // kmp_str_loc_free
+
+
+
+/*
+    This function is intended to compare file names. On Windows* OS file names are case-insensitive,
+    so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive
+    comparison.
+    Note: The function returns *true* if strings are *equal*.
+*/
+
+int
+__kmp_str_eqf(         // True, if strings are equal, false otherwise.
+    char const * lhs,  // First string.
+    char const * rhs   // Second string.
+) {
+    int result;
+    #if KMP_OS_WINDOWS
+        result = ( _stricmp( lhs, rhs ) == 0 );
+    #else
+        result = ( strcmp( lhs, rhs ) == 0 );
+    #endif
+    return result;
+} // __kmp_str_eqf
+
+
+/*
+    This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by
+    __kmp_str_free(). The function is very convenient for constructing strings, it successfully
+    replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid
+    buffer overflows. Examples:
+
+        str = __kmp_str_format( "%s", orig );              // strdup(), do not care about buffer size.
+        __kmp_str_free( & str );
+        str = __kmp_str_format( "%s%s", orig1, orig2 );    // strcat(), do not care about buffer size.
+        __kmp_str_free( & str );
+        str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
+        __kmp_str_free( & str );
+
+    Performance note:
+        This function allocates memory with malloc() calls, so do not call it from
+        performance-critical code. In performance-critical code consider using kmp_str_buf_t
+        instead, since it uses stack-allocated buffer for short strings.
+
+    Why does this function use malloc()?
+        1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no
+           reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned
+           memory is not necessary.
+        2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure.
+           We need to perform string operations during library startup (for example, in
+           __kmp_register_library_startup()) when no thread structures are allocated yet.
+    So standard malloc() is the only available option.
+*/
+
+// TODO: Find and replace all regular free() with __kmp_str_free().
+
+char *
+__kmp_str_format(           // Allocated string.
+    char const * format,    // Format string.
+    ...                     // Other parameters.
+) {
+
+    va_list args;
+    int     size   = 512;
+    char *  buffer = NULL;
+    int     rc;
+
+    // Allocate buffer.
+    buffer = (char *) KMP_INTERNAL_MALLOC( size );
+    if ( buffer == NULL ) {
+	KMP_FATAL( MemoryAllocFailed );
+    }; // if
+
+    for ( ; ; ) {
+
+        // Try to format string.
+        va_start( args, format );
+        rc = KMP_VSNPRINTF( buffer, size, format, args );
+        va_end( args );
+
+        // No errors, string has been formatted.
+        if ( rc >= 0 && rc < size ) {
+            break;
+        }; // if
+
+        // Error occurred, buffer is too small.
+        if ( rc >= 0 ) {
+            // C99-conforming implementation of vsnprintf returns required buffer size.
+            size = rc + 1;
+        } else {
+            // Older implementations just return -1.
+            size = size * 2;
+        }; // if
+
+        // Enlarge buffer and try again.
+        buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size );
+        if ( buffer == NULL ) {
+    	    KMP_FATAL( MemoryAllocFailed );
+        }; // if
+
+    }; // forever
+
+    return buffer;
+
+} // func __kmp_str_format
+
+
+void
+__kmp_str_free(
+    char const * * str
+) {
+    KMP_DEBUG_ASSERT( str != NULL );
+    KMP_INTERNAL_FREE( (void *) * str );
+    * str = NULL;
+} // func __kmp_str_free
+
+
+/* If len is zero, returns true iff target and data have exact case-insensitive match.
+   If len is negative, returns true iff target is a case-insensitive substring of data.
+   If len is positive, returns true iff target is a case-insensitive substring of data or
+     vice versa, and neither is shorter than len.
+*/
+int
+__kmp_str_match(
+    char const * target,
+    int          len,
+    char const * data
+) {
+    int i;
+    if ( target == NULL || data == NULL ) {
+        return FALSE;
+    }; // if
+    for ( i = 0; target[i] && data[i]; ++ i ) {
+        if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) {
+            return FALSE;
+        }; // if
+    }; // for i
+    return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) );
+} // __kmp_str_match
+
+
+int
+__kmp_str_match_false( char const * data ) {
+    int result =
+        __kmp_str_match( "false",   1, data ) ||
+        __kmp_str_match( "off",     2, data ) ||
+        __kmp_str_match( "0",       1, data ) ||
+        __kmp_str_match( ".false.", 2, data ) ||
+        __kmp_str_match( ".f.",     2, data ) ||
+        __kmp_str_match( "no",      1, data );
+    return result;
+} // __kmp_str_match_false
+
+
+int
+__kmp_str_match_true( char const * data ) {
+    int result =
+        __kmp_str_match( "true",   1, data ) ||
+        __kmp_str_match( "on",     2, data ) ||
+        __kmp_str_match( "1",      1, data ) ||
+        __kmp_str_match( ".true.", 2, data ) ||
+        __kmp_str_match( ".t.",    2, data ) ||
+        __kmp_str_match( "yes",    1, data );
+    return result;
+} // __kmp_str_match_true
+
+void
+__kmp_str_replace(
+    char * str,
+    char   search_for,
+    char   replace_with
+) {
+
+    char * found = NULL;
+
+    found = strchr( str, search_for );
+    while ( found ) {
+        * found = replace_with;
+        found = strchr( found + 1, search_for );
+    }; // while
+
+} // __kmp_str_replace
+
+
+void
+__kmp_str_split(
+    char *  str,    // I: String to split.
+    char    delim,  // I: Character to split on.
+    char ** head,   // O: Pointer to head (may be NULL).
+    char ** tail    // O: Pointer to tail (may be NULL).
+) {
+    char * h = str;
+    char * t = NULL;
+    if ( str != NULL ) {
+        char * ptr = strchr( str, delim );
+        if ( ptr != NULL ) {
+            * ptr  = 0;
+            t = ptr + 1;
+        }; // if
+    }; // if
+    if ( head != NULL ) {
+        * head = h;
+    }; // if
+    if ( tail != NULL ) {
+        * tail = t;
+    }; // if
+} // __kmp_str_split
+
+/*
+    strtok_r() is not available on Windows* OS. This function reimplements strtok_r().
+*/
+char *
+__kmp_str_token(
+    char *       str,   // String to split into tokens. Note: String *is* modified!
+    char const * delim, // Delimiters.
+    char **      buf    // Internal buffer.
+) {
+    char * token = NULL;
+    #if KMP_OS_WINDOWS
+        // On Windows* OS there is no strtok_r() function. Let us implement it.
+        if ( str != NULL ) {
+            * buf = str;                       // First call, initialize buf.
+        }; // if
+        * buf += strspn( * buf, delim );       // Skip leading delimiters.
+        if ( ** buf != 0 ) {                   // Rest of the string is not yet empty.
+            token = * buf;                     // Use it as result.
+            * buf += strcspn( * buf, delim );  // Skip non-delimiters.
+            if ( ** buf != 0 ) {               // Rest of the string is not yet empty.
+                ** buf = 0;                    // Terminate token here.
+                * buf += 1;                    // Advance buf to start with the next token next time.
+            }; // if
+        }; // if
+    #else
+        // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
+        token = strtok_r( str, delim, buf );
+    #endif
+    return token;
+}; // __kmp_str_token
+
+
+int
+__kmp_str_to_int(
+    char const * str,
+    char         sentinel
+) {
+    int result, factor;
+    char const * t;
+
+    result = 0;
+
+    for (t = str; *t != '\0'; ++t) {
+        if (*t < '0' || *t > '9')
+            break;
+        result = (result * 10) + (*t - '0');
+    }
+
+    switch (*t) {
+    case '\0':          /* the current default for no suffix is bytes */
+	factor = 1;
+        break;
+    case 'b': case 'B': /* bytes */
+	++t;
+	factor = 1;
+        break;
+    case 'k': case 'K': /* kilo-bytes */
+	++t;
+	factor = 1024;
+        break;
+    case 'm': case 'M': /* mega-bytes */
+	++t;
+	factor = (1024 * 1024);
+        break;
+    default:
+	if(*t != sentinel)
+	    return (-1);
+	t = "";
+	factor = 1;
+    }
+
+    if (result > (INT_MAX / factor))
+	result = INT_MAX;
+    else
+	result *= factor;
+
+    return (*t != 0 ? 0 : result);
+
+} // __kmp_str_to_int
+
+
+/*
+    The routine parses input string. It is expected it is a unsigned integer with optional unit.
+    Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb"
+    or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is
+    ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
+    value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set
+    to zero.
+*/
+void
+__kmp_str_to_size(         // R: Error code.
+    char const *   str,    // I: String of characters, unsigned number and unit ("b", "kb", etc).
+    size_t *       out,    // O: Parsed number.
+    size_t         dfactor, // I: The factor if none of the letters specified.
+    char const * * error   // O: Null if everything is ok, error message otherwise.
+) {
+
+    size_t value    = 0;
+    size_t factor   = 0;
+    int    overflow = 0;
+    int    i        = 0;
+    int    digit;
+
+
+    KMP_DEBUG_ASSERT( str != NULL );
+
+    // Skip spaces.
+    while ( str[ i ] == ' ' || str[ i ] == '\t') {
+        ++ i;
+    }; // while
+
+    // Parse number.
+    if ( str[ i ] < '0' || str[ i ] > '9' ) {
+        * error = KMP_I18N_STR( NotANumber );
+        return;
+    }; // if
+    do {
+        digit = str[ i ] - '0';
+        overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 );
+        value = ( value * 10 ) + digit;
+        ++ i;
+    } while ( str[ i ] >= '0' && str[ i ] <= '9' );
+
+    // Skip spaces.
+    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
+        ++ i;
+    }; // while
+
+    // Parse unit.
+    #define _case( ch, exp )                            \
+        case ch :                                       \
+        case ch - ( 'a' - 'A' ) : {                     \
+            size_t shift = (exp) * 10;                  \
+            ++ i;                                       \
+            if ( shift < sizeof( size_t ) * 8 ) {       \
+                factor = (size_t)( 1 ) << shift;        \
+            } else {                                    \
+                overflow = 1;                           \
+            };                                          \
+        } break;
+    switch ( str[ i ] ) {
+        _case( 'k', 1 ); // Kilo
+        _case( 'm', 2 ); // Mega
+        _case( 'g', 3 ); // Giga
+        _case( 't', 4 ); // Tera
+        _case( 'p', 5 ); // Peta
+        _case( 'e', 6 ); // Exa
+        _case( 'z', 7 ); // Zetta
+        _case( 'y', 8 ); // Yotta
+        // Oops. No more units...
+    }; // switch
+    #undef _case
+    if ( str[ i ] == 'b' || str[ i ] == 'B' ) {    // Skip optional "b".
+	if ( factor == 0 ) {
+	    factor = 1;
+	}
+        ++ i;
+    }; // if
+    if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit
+        * error = KMP_I18N_STR( BadUnit );
+        return;
+    }; // if
+
+    if ( factor == 0 ) {
+	factor = dfactor;
+    }
+
+    // Apply factor.
+    overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) );
+    value *= factor;
+
+    // Skip spaces.
+    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
+        ++ i;
+    }; // while
+
+    if ( str[ i ] != 0 ) {
+        * error = KMP_I18N_STR( IllegalCharacters );
+        return;
+    }; // if
+
+    if ( overflow ) {
+        * error = KMP_I18N_STR( ValueTooLarge );
+        * out = KMP_SIZE_T_MAX;
+        return;
+    }; // if
+
+    * error = NULL;
+    * out = value;
+
+} // __kmp_str_to_size
+
+
+void
+__kmp_str_to_uint(         // R: Error code.
+    char const *   str,    // I: String of characters, unsigned number.
+    kmp_uint64 *   out,    // O: Parsed number.
+    char const * * error   // O: Null if everything is ok, error message otherwise.
+) {
+
+    size_t value    = 0;
+    int    overflow = 0;
+    int    i        = 0;
+    int    digit;
+
+
+    KMP_DEBUG_ASSERT( str != NULL );
+
+    // Skip spaces.
+    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
+        ++ i;
+    }; // while
+
+    // Parse number.
+    if ( str[ i ] < '0' || str[ i ] > '9' ) {
+        * error = KMP_I18N_STR( NotANumber );
+        return;
+    }; // if
+    do {
+        digit = str[ i ] - '0';
+        overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 );
+        value = ( value * 10 ) + digit;
+        ++ i;
+    } while ( str[ i ] >= '0' && str[ i ] <= '9' );
+
+    // Skip spaces.
+    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
+        ++ i;
+    }; // while
+
+    if ( str[ i ] != 0 ) {
+        * error = KMP_I18N_STR( IllegalCharacters );
+        return;
+    }; // if
+
+    if ( overflow ) {
+        * error = KMP_I18N_STR( ValueTooLarge );
+        * out = (kmp_uint64) -1;
+        return;
+    }; // if
+
+    * error = NULL;
+    * out = value;
+
+} // __kmp_str_to_unit
+
+
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_str.h b/contrib/libs/cxxsupp/openmp/kmp_str.h
index 80de47694b..ba71bbaa34 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_str.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_str.h
@@ -1,119 +1,119 @@
-/* 
- * kmp_str.h -- String manipulation routines. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_STR_H 
-#define KMP_STR_H 
- 
-#include <string.h> 
-#include <stdarg.h> 
- 
-#include "kmp_os.h" 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-#if KMP_OS_WINDOWS 
-# define strdup    _strdup 
-#endif 
- 
-/*  some macros to replace ctype.h functions  */ 
-#define TOLOWER(c)	((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c)) 
- 
-struct kmp_str_buf { 
-    char       * str;         // Pointer to buffer content, read only. 
-    unsigned int size;        // Do not change this field! 
-    int          used;        // Number of characters printed to buffer, read only. 
-    char         bulk[ 512 ]; // Do not use this field! 
-}; // struct kmp_str_buf 
-typedef struct kmp_str_buf  kmp_str_buf_t; 
- 
-#define __kmp_str_buf_init( b )   { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; } 
- 
-void   __kmp_str_buf_clear( kmp_str_buf_t * buffer ); 
-void   __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size ); 
-void   __kmp_str_buf_detach( kmp_str_buf_t * buffer ); 
-void   __kmp_str_buf_free( kmp_str_buf_t * buffer ); 
-void   __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len ); 
-void   __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args ); 
-void   __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... ); 
-void   __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size ); 
- 
-/* 
-    File name parser. Usage: 
- 
-        kmp_str_fname_t fname = __kmp_str_fname_init( path ); 
-        // Use fname.path (copy of original path ), fname.dir, fname.base. 
-        // Note fname.dir concatenated with fname.base gives exact copy of path. 
-        __kmp_str_fname_free( & fname ); 
- 
-*/ 
-struct kmp_str_fname { 
-    char * path; 
-    char * dir; 
-    char * base; 
-}; // struct kmp_str_fname 
-typedef struct kmp_str_fname kmp_str_fname_t; 
-void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path ); 
-void __kmp_str_fname_free( kmp_str_fname_t * fname ); 
-// Compares file name with specified patern. If pattern is NULL, any fname matched. 
-int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern ); 
- 
-/* 
-    The compiler provides source locations in string form ";file;func;line;col;;". It not not 
-    convenient for manupulation. These structure keeps source location in more convenient form. 
-    Usage: 
- 
-        kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 ); 
-        // use loc.file, loc.func, loc.line, loc.col. 
-        // loc.fname is available if the second argument of __kmp_str_loc_init is true. 
-        __kmp_str_loc_free( & loc ); 
- 
-    If psource is NULL or does not follow format above, file and/or func may be NULL pointers. 
-*/ 
-struct kmp_str_loc { 
-    char *          _bulk;  // Do not use thid field. 
-    kmp_str_fname_t fname;  // Will be initialized if init_fname is true. 
-    char *          file; 
-    char *          func; 
-    int             line; 
-    int             col; 
-}; // struct kmp_str_loc 
-typedef struct kmp_str_loc kmp_str_loc_t; 
-kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname ); 
-void __kmp_str_loc_free( kmp_str_loc_t * loc ); 
- 
-int    __kmp_str_eqf( char const * lhs, char const * rhs ); 
-char * __kmp_str_format( char const * format, ... ); 
-void   __kmp_str_free( char const * * str ); 
-int    __kmp_str_match( char const * target, int len, char const * data ); 
-int    __kmp_str_match_false( char const * data ); 
-int    __kmp_str_match_true( char const * data ); 
-void   __kmp_str_replace( char * str, char search_for, char replace_with ); 
-void   __kmp_str_split( char * str, char delim, char ** head, char ** tail ); 
-char * __kmp_str_token( char * str, char const * delim, char ** buf ); 
-int    __kmp_str_to_int( char const * str, char sentinel ); 
- 
-void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error ); 
-void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error ); 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif // __cplusplus 
- 
-#endif // KMP_STR_H 
- 
-// end of file // 
- 
+/*
+ * kmp_str.h -- String manipulation routines.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_STR_H
+#define KMP_STR_H
+
+#include <string.h>
+#include <stdarg.h>
+
+#include "kmp_os.h"
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+#if KMP_OS_WINDOWS
+# define strdup    _strdup
+#endif
+
+/*  some macros to replace ctype.h functions  */
+#define TOLOWER(c)	((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c))
+
+struct kmp_str_buf {
+    char       * str;         // Pointer to buffer content, read only.
+    unsigned int size;        // Do not change this field!
+    int          used;        // Number of characters printed to buffer, read only.
+    char         bulk[ 512 ]; // Do not use this field!
+}; // struct kmp_str_buf
+typedef struct kmp_str_buf  kmp_str_buf_t;
+
+#define __kmp_str_buf_init( b )   { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; }
+
+void   __kmp_str_buf_clear( kmp_str_buf_t * buffer );
+void   __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size );
+void   __kmp_str_buf_detach( kmp_str_buf_t * buffer );
+void   __kmp_str_buf_free( kmp_str_buf_t * buffer );
+void   __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len );
+void   __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args );
+void   __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... );
+void   __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size );
+
+/*
+    File name parser. Usage:
+
+        kmp_str_fname_t fname = __kmp_str_fname_init( path );
+        // Use fname.path (copy of original path ), fname.dir, fname.base.
+        // Note fname.dir concatenated with fname.base gives exact copy of path.
+        __kmp_str_fname_free( & fname );
+
+*/
+struct kmp_str_fname {
+    char * path;
+    char * dir;
+    char * base;
+}; // struct kmp_str_fname
+typedef struct kmp_str_fname kmp_str_fname_t;
+void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path );
+void __kmp_str_fname_free( kmp_str_fname_t * fname );
+// Compares file name with specified patern. If pattern is NULL, any fname matched.
+int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern );
+
+/*
+    The compiler provides source locations in string form ";file;func;line;col;;". It not not
+    convenient for manupulation. These structure keeps source location in more convenient form.
+    Usage:
+
+        kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 );
+        // use loc.file, loc.func, loc.line, loc.col.
+        // loc.fname is available if the second argument of __kmp_str_loc_init is true.
+        __kmp_str_loc_free( & loc );
+
+    If psource is NULL or does not follow format above, file and/or func may be NULL pointers.
+*/
+struct kmp_str_loc {
+    char *          _bulk;  // Do not use thid field.
+    kmp_str_fname_t fname;  // Will be initialized if init_fname is true.
+    char *          file;
+    char *          func;
+    int             line;
+    int             col;
+}; // struct kmp_str_loc
+typedef struct kmp_str_loc kmp_str_loc_t;
+kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname );
+void __kmp_str_loc_free( kmp_str_loc_t * loc );
+
+int    __kmp_str_eqf( char const * lhs, char const * rhs );
+char * __kmp_str_format( char const * format, ... );
+void   __kmp_str_free( char const * * str );
+int    __kmp_str_match( char const * target, int len, char const * data );
+int    __kmp_str_match_false( char const * data );
+int    __kmp_str_match_true( char const * data );
+void   __kmp_str_replace( char * str, char search_for, char replace_with );
+void   __kmp_str_split( char * str, char delim, char ** head, char ** tail );
+char * __kmp_str_token( char * str, char const * delim, char ** buf );
+int    __kmp_str_to_int( char const * str, char sentinel );
+
+void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error );
+void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error );
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+#endif // KMP_STR_H
+
+// end of file //
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stub.c b/contrib/libs/cxxsupp/openmp/kmp_stub.c
index b1379a770b..1e0953a0fc 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_stub.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_stub.c
@@ -1,252 +1,252 @@
-/* 
- * kmp_stub.c -- stub versions of user-callable OpenMP RT functions. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include <stdlib.h> 
-#include <limits.h> 
-#include <errno.h> 
- 
-#include "omp.h"                // Function renamings. 
-#include "kmp.h"                // KMP_DEFAULT_STKSIZE 
-#include "kmp_stub.h" 
- 
-#if KMP_OS_WINDOWS 
-    #include <windows.h> 
-#else 
-    #include <sys/time.h> 
-#endif 
- 
-// Moved from omp.h 
-#define omp_set_max_active_levels    ompc_set_max_active_levels 
-#define omp_set_schedule             ompc_set_schedule 
-#define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num 
-#define omp_get_team_size            ompc_get_team_size 
- 
-#define omp_set_num_threads          ompc_set_num_threads 
-#define omp_set_dynamic              ompc_set_dynamic 
-#define omp_set_nested               ompc_set_nested 
-#define kmp_set_stacksize            kmpc_set_stacksize 
-#define kmp_set_stacksize_s          kmpc_set_stacksize_s 
-#define kmp_set_blocktime            kmpc_set_blocktime 
-#define kmp_set_library              kmpc_set_library 
-#define kmp_set_defaults             kmpc_set_defaults 
-#define kmp_malloc                   kmpc_malloc 
-#define kmp_calloc                   kmpc_calloc 
-#define kmp_realloc                  kmpc_realloc 
-#define kmp_free                     kmpc_free 
- 
-static double frequency = 0.0; 
- 
-// Helper functions. 
-static size_t __kmps_init() { 
-    static int    initialized = 0; 
-    static size_t dummy = 0; 
-    if ( ! initialized ) { 
- 
-        // TODO: Analyze KMP_VERSION environment variable, print __kmp_version_copyright and 
-        // __kmp_version_build_time. 
-        // WARNING: Do not use "fprintf( stderr, ... )" because it will cause unresolved "__iob" 
-        // symbol (see C70080). We need to extract __kmp_printf() stuff from kmp_runtime.c and use 
-        // it. 
- 
-        // Trick with dummy variable forces linker to keep __kmp_version_copyright and 
-        // __kmp_version_build_time strings in executable file (in case of static linkage). 
-        // When KMP_VERSION analyze is implemented, dummy variable should be deleted, function 
-        // should return void. 
-        dummy = __kmp_version_copyright - __kmp_version_build_time; 
- 
-        #if KMP_OS_WINDOWS 
-            LARGE_INTEGER freq; 
-            BOOL status = QueryPerformanceFrequency( & freq ); 
-            if ( status ) { 
-                frequency = double( freq.QuadPart ); 
-            }; // if 
-        #endif 
- 
-        initialized = 1; 
-    }; // if 
-    return dummy; 
-}; // __kmps_init 
- 
-#define i __kmps_init(); 
- 
-/* set API functions */ 
-void omp_set_num_threads( omp_int_t num_threads ) { i; } 
-void omp_set_dynamic( omp_int_t dynamic )         { i; __kmps_set_dynamic( dynamic ); } 
-void omp_set_nested( omp_int_t nested )           { i; __kmps_set_nested( nested );   } 
-void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; } 
-void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); } 
-int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); } 
-int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); } 
-int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; } 
-int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; } 
-int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; } 
- 
-/* kmp API functions */ 
-void kmp_set_stacksize( omp_int_t arg )   { i; __kmps_set_stacksize( arg ); } 
-void kmp_set_stacksize_s( size_t arg )    { i; __kmps_set_stacksize( arg ); } 
-void kmp_set_blocktime( omp_int_t arg )   { i; __kmps_set_blocktime( arg ); } 
-void kmp_set_library( omp_int_t arg )     { i; __kmps_set_library( arg ); } 
-void kmp_set_defaults( char const * str ) { i; } 
- 
-/* KMP memory management functions. */ 
-void * kmp_malloc( size_t size )                 { i; return malloc( size ); } 
-void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); } 
-void * kmp_realloc( void *ptr, size_t size )     { i; return realloc( ptr, size ); } 
-void   kmp_free( void * ptr )                    { i; free( ptr ); } 
- 
-static int __kmps_blocktime = INT_MAX; 
- 
-void __kmps_set_blocktime( int arg ) { 
-    i; 
-    __kmps_blocktime = arg; 
-} // __kmps_set_blocktime 
- 
-int __kmps_get_blocktime( void ) { 
-    i; 
-    return __kmps_blocktime; 
-} // __kmps_get_blocktime 
- 
-static int __kmps_dynamic = 0; 
- 
-void __kmps_set_dynamic( int arg ) { 
-    i; 
-    __kmps_dynamic = arg; 
-} // __kmps_set_dynamic 
- 
-int __kmps_get_dynamic( void ) { 
-    i; 
-    return __kmps_dynamic; 
-} // __kmps_get_dynamic 
- 
-static int __kmps_library = 1000; 
- 
-void __kmps_set_library( int arg ) { 
-    i; 
-    __kmps_library = arg; 
-} // __kmps_set_library 
- 
-int __kmps_get_library( void ) { 
-    i; 
-    return __kmps_library; 
-} // __kmps_get_library 
- 
-static int __kmps_nested = 0; 
- 
-void __kmps_set_nested( int arg ) { 
-    i; 
-    __kmps_nested = arg; 
-} // __kmps_set_nested 
- 
-int __kmps_get_nested( void ) { 
-    i; 
-    return __kmps_nested; 
-} // __kmps_get_nested 
- 
-static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE; 
- 
-void __kmps_set_stacksize( int arg ) { 
-    i; 
-    __kmps_stacksize = arg; 
-} // __kmps_set_stacksize 
- 
-int __kmps_get_stacksize( void ) { 
-    i; 
-    return __kmps_stacksize; 
-} // __kmps_get_stacksize 
- 
-static kmp_sched_t __kmps_sched_kind     = kmp_sched_default; 
-static int         __kmps_sched_modifier = 0; 
- 
-    void __kmps_set_schedule( kmp_sched_t kind, int modifier ) { 
-        i; 
-        __kmps_sched_kind     = kind; 
-        __kmps_sched_modifier = modifier; 
-    } // __kmps_set_schedule 
- 
-    void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) { 
-        i; 
-        *kind     = __kmps_sched_kind; 
-        *modifier = __kmps_sched_modifier; 
-    } // __kmps_get_schedule 
- 
-#if OMP_40_ENABLED 
- 
-static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false; 
- 
-void __kmps_set_proc_bind( kmp_proc_bind_t arg ) { 
-    i; 
-    __kmps_proc_bind = arg; 
-} // __kmps_set_proc_bind 
- 
-kmp_proc_bind_t __kmps_get_proc_bind( void ) { 
-    i; 
-    return __kmps_proc_bind; 
-} // __kmps_get_proc_bind 
- 
-#endif /* OMP_40_ENABLED */ 
- 
-double __kmps_get_wtime( void ) { 
-    // Elapsed wall clock time (in second) from "sometime in the past". 
-    double wtime = 0.0; 
-    i; 
-    #if KMP_OS_WINDOWS 
-        if ( frequency > 0.0 ) { 
-            LARGE_INTEGER now; 
-            BOOL status = QueryPerformanceCounter( & now ); 
-            if ( status ) { 
-                wtime = double( now.QuadPart ) / frequency; 
-            }; // if 
-        }; // if 
-    #else 
-        // gettimeofday() returns seconds and microseconds since the Epoch. 
-        struct timeval  tval; 
-        int             rc; 
-        rc = gettimeofday( & tval, NULL ); 
-        if ( rc == 0 ) { 
-            wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec ); 
-        } else { 
-            // TODO: Assert or abort here. 
-        }; // if 
-    #endif 
-    return wtime; 
-}; // __kmps_get_wtime 
- 
-double __kmps_get_wtick( void ) { 
-    // Number of seconds between successive clock ticks. 
-    double wtick = 0.0; 
-    i; 
-    #if KMP_OS_WINDOWS 
-        { 
-            DWORD increment; 
-            DWORD adjustment; 
-            BOOL  disabled; 
-            BOOL  rc; 
-            rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled ); 
-            if ( rc ) { 
-                wtick = 1.0E-07 * (double)( disabled ? increment : adjustment ); 
-            } else { 
-                // TODO: Assert or abort here. 
-                wtick = 1.0E-03; 
-            }; // if 
-        } 
-    #else 
-        // TODO: gettimeofday() returns in microseconds, but what the precision? 
-        wtick = 1.0E-06; 
-    #endif 
-    return wtick; 
-}; // __kmps_get_wtick 
- 
-// end of file // 
- 
+/*
+ * kmp_stub.c -- stub versions of user-callable OpenMP RT functions.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+
+#include "omp.h"                // Function renamings.
+#include "kmp.h"                // KMP_DEFAULT_STKSIZE
+#include "kmp_stub.h"
+
+#if KMP_OS_WINDOWS
+    #include <windows.h>
+#else
+    #include <sys/time.h>
+#endif
+
+// Moved from omp.h
+#define omp_set_max_active_levels    ompc_set_max_active_levels
+#define omp_set_schedule             ompc_set_schedule
+#define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num
+#define omp_get_team_size            ompc_get_team_size
+
+#define omp_set_num_threads          ompc_set_num_threads
+#define omp_set_dynamic              ompc_set_dynamic
+#define omp_set_nested               ompc_set_nested
+#define kmp_set_stacksize            kmpc_set_stacksize
+#define kmp_set_stacksize_s          kmpc_set_stacksize_s
+#define kmp_set_blocktime            kmpc_set_blocktime
+#define kmp_set_library              kmpc_set_library
+#define kmp_set_defaults             kmpc_set_defaults
+#define kmp_malloc                   kmpc_malloc
+#define kmp_calloc                   kmpc_calloc
+#define kmp_realloc                  kmpc_realloc
+#define kmp_free                     kmpc_free
+
+static double frequency = 0.0;
+
+// Helper functions.
+static size_t __kmps_init() {
+    static int    initialized = 0;
+    static size_t dummy = 0;
+    if ( ! initialized ) {
+
+        // TODO: Analyze KMP_VERSION environment variable, print __kmp_version_copyright and
+        // __kmp_version_build_time.
+        // WARNING: Do not use "fprintf( stderr, ... )" because it will cause unresolved "__iob"
+        // symbol (see C70080). We need to extract __kmp_printf() stuff from kmp_runtime.c and use
+        // it.
+
+        // Trick with dummy variable forces linker to keep __kmp_version_copyright and
+        // __kmp_version_build_time strings in executable file (in case of static linkage).
+        // When KMP_VERSION analyze is implemented, dummy variable should be deleted, function
+        // should return void.
+        dummy = __kmp_version_copyright - __kmp_version_build_time;
+
+        #if KMP_OS_WINDOWS
+            LARGE_INTEGER freq;
+            BOOL status = QueryPerformanceFrequency( & freq );
+            if ( status ) {
+                frequency = double( freq.QuadPart );
+            }; // if
+        #endif
+
+        initialized = 1;
+    }; // if
+    return dummy;
+}; // __kmps_init
+
+#define i __kmps_init();
+
+/* set API functions */
+void omp_set_num_threads( omp_int_t num_threads ) { i; }
+void omp_set_dynamic( omp_int_t dynamic )         { i; __kmps_set_dynamic( dynamic ); }
+void omp_set_nested( omp_int_t nested )           { i; __kmps_set_nested( nested );   }
+void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; }
+void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); }
+int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); }
+int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); }
+int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
+int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
+int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
+
+/* kmp API functions */
+void kmp_set_stacksize( omp_int_t arg )   { i; __kmps_set_stacksize( arg ); }
+void kmp_set_stacksize_s( size_t arg )    { i; __kmps_set_stacksize( arg ); }
+void kmp_set_blocktime( omp_int_t arg )   { i; __kmps_set_blocktime( arg ); }
+void kmp_set_library( omp_int_t arg )     { i; __kmps_set_library( arg ); }
+void kmp_set_defaults( char const * str ) { i; }
+
+/* KMP memory management functions. */
+void * kmp_malloc( size_t size )                 { i; return malloc( size ); }
+void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); }
+void * kmp_realloc( void *ptr, size_t size )     { i; return realloc( ptr, size ); }
+void   kmp_free( void * ptr )                    { i; free( ptr ); }
+
+static int __kmps_blocktime = INT_MAX;
+
+void __kmps_set_blocktime( int arg ) {
+    i;
+    __kmps_blocktime = arg;
+} // __kmps_set_blocktime
+
+int __kmps_get_blocktime( void ) {
+    i;
+    return __kmps_blocktime;
+} // __kmps_get_blocktime
+
+static int __kmps_dynamic = 0;
+
+void __kmps_set_dynamic( int arg ) {
+    i;
+    __kmps_dynamic = arg;
+} // __kmps_set_dynamic
+
+int __kmps_get_dynamic( void ) {
+    i;
+    return __kmps_dynamic;
+} // __kmps_get_dynamic
+
+static int __kmps_library = 1000;
+
+void __kmps_set_library( int arg ) {
+    i;
+    __kmps_library = arg;
+} // __kmps_set_library
+
+int __kmps_get_library( void ) {
+    i;
+    return __kmps_library;
+} // __kmps_get_library
+
+static int __kmps_nested = 0;
+
+void __kmps_set_nested( int arg ) {
+    i;
+    __kmps_nested = arg;
+} // __kmps_set_nested
+
+int __kmps_get_nested( void ) {
+    i;
+    return __kmps_nested;
+} // __kmps_get_nested
+
+static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE;
+
+void __kmps_set_stacksize( int arg ) {
+    i;
+    __kmps_stacksize = arg;
+} // __kmps_set_stacksize
+
+int __kmps_get_stacksize( void ) {
+    i;
+    return __kmps_stacksize;
+} // __kmps_get_stacksize
+
+static kmp_sched_t __kmps_sched_kind     = kmp_sched_default;
+static int         __kmps_sched_modifier = 0;
+
+    void __kmps_set_schedule( kmp_sched_t kind, int modifier ) {
+        i;
+        __kmps_sched_kind     = kind;
+        __kmps_sched_modifier = modifier;
+    } // __kmps_set_schedule
+
+    void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) {
+        i;
+        *kind     = __kmps_sched_kind;
+        *modifier = __kmps_sched_modifier;
+    } // __kmps_get_schedule
+
+#if OMP_40_ENABLED
+
+static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false;
+
+void __kmps_set_proc_bind( kmp_proc_bind_t arg ) {
+    i;
+    __kmps_proc_bind = arg;
+} // __kmps_set_proc_bind
+
+kmp_proc_bind_t __kmps_get_proc_bind( void ) {
+    i;
+    return __kmps_proc_bind;
+} // __kmps_get_proc_bind
+
+#endif /* OMP_40_ENABLED */
+
+double __kmps_get_wtime( void ) {
+    // Elapsed wall clock time (in second) from "sometime in the past".
+    double wtime = 0.0;
+    i;
+    #if KMP_OS_WINDOWS
+        if ( frequency > 0.0 ) {
+            LARGE_INTEGER now;
+            BOOL status = QueryPerformanceCounter( & now );
+            if ( status ) {
+                wtime = double( now.QuadPart ) / frequency;
+            }; // if
+        }; // if
+    #else
+        // gettimeofday() returns seconds and microseconds since the Epoch.
+        struct timeval  tval;
+        int             rc;
+        rc = gettimeofday( & tval, NULL );
+        if ( rc == 0 ) {
+            wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec );
+        } else {
+            // TODO: Assert or abort here.
+        }; // if
+    #endif
+    return wtime;
+}; // __kmps_get_wtime
+
+double __kmps_get_wtick( void ) {
+    // Number of seconds between successive clock ticks.
+    double wtick = 0.0;
+    i;
+    #if KMP_OS_WINDOWS
+        {
+            DWORD increment;
+            DWORD adjustment;
+            BOOL  disabled;
+            BOOL  rc;
+            rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled );
+            if ( rc ) {
+                wtick = 1.0E-07 * (double)( disabled ? increment : adjustment );
+            } else {
+                // TODO: Assert or abort here.
+                wtick = 1.0E-03;
+            }; // if
+        }
+    #else
+        // TODO: gettimeofday() returns in microseconds, but what the precision?
+        wtick = 1.0E-06;
+    #endif
+    return wtick;
+}; // __kmps_get_wtick
+
+// end of file //
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_stub.h b/contrib/libs/cxxsupp/openmp/kmp_stub.h
index 2d357b854a..cdcffa3d8c 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_stub.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_stub.h
@@ -1,61 +1,61 @@
-/* 
- * kmp_stub.h 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_STUB_H 
-#define KMP_STUB_H 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-void __kmps_set_blocktime( int arg ); 
-int  __kmps_get_blocktime( void ); 
-void __kmps_set_dynamic( int arg ); 
-int  __kmps_get_dynamic( void ); 
-void __kmps_set_library( int arg ); 
-int  __kmps_get_library( void ); 
-void __kmps_set_nested( int arg ); 
-int  __kmps_get_nested( void ); 
-void __kmps_set_stacksize( int arg ); 
-int  __kmps_get_stacksize(); 
- 
-#ifndef KMP_SCHED_TYPE_DEFINED 
-#define KMP_SCHED_TYPE_DEFINED 
-typedef enum kmp_sched { 
-    kmp_sched_static            = 1, // mapped to kmp_sch_static_chunked           (33) 
-    kmp_sched_dynamic           = 2, // mapped to kmp_sch_dynamic_chunked          (35) 
-    kmp_sched_guided            = 3, // mapped to kmp_sch_guided_chunked           (36) 
-    kmp_sched_auto              = 4, // mapped to kmp_sch_auto                     (38) 
-    kmp_sched_default = kmp_sched_static   // default scheduling 
-} kmp_sched_t; 
-#endif 
-void __kmps_set_schedule( kmp_sched_t kind, int modifier ); 
-void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ); 
- 
-#if OMP_40_ENABLED 
-void __kmps_set_proc_bind( kmp_proc_bind_t arg ); 
-kmp_proc_bind_t __kmps_get_proc_bind( void ); 
-#endif /* OMP_40_ENABLED */ 
- 
-double __kmps_get_wtime(); 
-double __kmps_get_wtick(); 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif // __cplusplus 
- 
-#endif // KMP_STUB_H 
- 
-// end of file // 
+/*
+ * kmp_stub.h
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_STUB_H
+#define KMP_STUB_H
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+void __kmps_set_blocktime( int arg );
+int  __kmps_get_blocktime( void );
+void __kmps_set_dynamic( int arg );
+int  __kmps_get_dynamic( void );
+void __kmps_set_library( int arg );
+int  __kmps_get_library( void );
+void __kmps_set_nested( int arg );
+int  __kmps_get_nested( void );
+void __kmps_set_stacksize( int arg );
+int  __kmps_get_stacksize();
+
+#ifndef KMP_SCHED_TYPE_DEFINED
+#define KMP_SCHED_TYPE_DEFINED
+typedef enum kmp_sched {
+    kmp_sched_static            = 1, // mapped to kmp_sch_static_chunked           (33)
+    kmp_sched_dynamic           = 2, // mapped to kmp_sch_dynamic_chunked          (35)
+    kmp_sched_guided            = 3, // mapped to kmp_sch_guided_chunked           (36)
+    kmp_sched_auto              = 4, // mapped to kmp_sch_auto                     (38)
+    kmp_sched_default = kmp_sched_static   // default scheduling
+} kmp_sched_t;
+#endif
+void __kmps_set_schedule( kmp_sched_t kind, int modifier );
+void __kmps_get_schedule( kmp_sched_t *kind, int *modifier );
+
+#if OMP_40_ENABLED
+void __kmps_set_proc_bind( kmp_proc_bind_t arg );
+kmp_proc_bind_t __kmps_get_proc_bind( void );
+#endif /* OMP_40_ENABLED */
+
+double __kmps_get_wtime();
+double __kmps_get_wtick();
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+#endif // KMP_STUB_H
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp
index 7251f50b0a..da085ce50c 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_taskdeps.cpp
@@ -1,513 +1,513 @@
-/* 
- * kmp_taskdeps.cpp 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-//#define KMP_SUPPORT_GRAPH_OUTPUT 1 
- 
-#include "kmp.h" 
-#include "kmp_io.h" 
-#include "kmp_wait_release.h" 
- 
-#if OMP_40_ENABLED 
- 
-//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries? 
-//TODO: don't use atomic ref counters for stack-allocated nodes. 
-//TODO: find an alternate to atomic refs for heap-allocated nodes? 
-//TODO: Finish graph output support 
-//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks 
-//TODO: Any ITT support needed? 
- 
-#ifdef KMP_SUPPORT_GRAPH_OUTPUT 
-static kmp_int32 kmp_node_id_seed = 0; 
-#endif 
- 
-static void 
-__kmp_init_node ( kmp_depnode_t *node ) 
-{ 
-    node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed 
-    node->dn.successors = NULL; 
-    __kmp_init_lock(&node->dn.lock); 
-    node->dn.nrefs = 1; // init creates the first reference to the node 
-#ifdef KMP_SUPPORT_GRAPH_OUTPUT 
-    node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); 
-#endif 
-} 
- 
-static inline kmp_depnode_t * 
-__kmp_node_ref ( kmp_depnode_t *node ) 
-{ 
-    KMP_TEST_THEN_INC32(&node->dn.nrefs); 
-    return node; 
-} 
- 
-static inline void 
-__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node ) 
-{ 
-    if (!node) return; 
- 
-    kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1; 
-    if ( n == 0 ) { 
-        KMP_ASSERT(node->dn.nrefs == 0); 
-#if USE_FAST_MEMORY 
-        __kmp_fast_free(thread,node); 
-#else 
-        __kmp_thread_free(thread,node); 
-#endif 
-    } 
-} 
- 
-#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid)) 
-#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid)) 
- 
-static void 
-__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ); 
- 
-static const kmp_int32 kmp_dephash_log2 = 6; 
-static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2); 
- 
-static inline kmp_int32 
-__kmp_dephash_hash ( kmp_intptr_t addr ) 
-{ 
-    //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets ); 
-    return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size; 
-} 
- 
-static kmp_dephash_t * 
-__kmp_dephash_create ( kmp_info_t *thread ) 
-{ 
-    kmp_dephash_t *h; 
- 
-    kmp_int32 size = kmp_dephash_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t); 
- 
-#if USE_FAST_MEMORY 
-    h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size ); 
-#else 
-    h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size ); 
-#endif 
- 
-#ifdef KMP_DEBUG 
-    h->nelements = 0; 
-#endif 
-    h->buckets = (kmp_dephash_entry **)(h+1); 
- 
-    for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ ) 
-        h->buckets[i] = 0; 
- 
-    return h; 
-} 
- 
-static void 
-__kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h ) 
-{ 
-    for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) { 
-        if ( h->buckets[i] ) { 
-            kmp_dephash_entry_t *next; 
-            for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) { 
-                next = entry->next_in_bucket; 
-                __kmp_depnode_list_free(thread,entry->last_ins); 
-                __kmp_node_deref(thread,entry->last_out); 
-#if USE_FAST_MEMORY 
-                __kmp_fast_free(thread,entry); 
-#else 
-                __kmp_thread_free(thread,entry); 
-#endif 
-            } 
-        } 
-    } 
-#if USE_FAST_MEMORY 
-    __kmp_fast_free(thread,h); 
-#else 
-    __kmp_thread_free(thread,h); 
-#endif 
-} 
- 
-static kmp_dephash_entry * 
-__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr ) 
-{ 
-    kmp_int32 bucket = __kmp_dephash_hash(addr); 
- 
-    kmp_dephash_entry_t *entry; 
-    for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket ) 
-        if ( entry->addr == addr ) break; 
- 
-    if ( entry == NULL ) { 
-        // create entry. This is only done by one thread so no locking required 
-#if USE_FAST_MEMORY 
-        entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) ); 
-#else 
-        entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) ); 
-#endif 
-        entry->addr = addr; 
-        entry->last_out = NULL; 
-        entry->last_ins = NULL; 
-        entry->next_in_bucket = h->buckets[bucket]; 
-        h->buckets[bucket] = entry; 
-#ifdef KMP_DEBUG 
-        h->nelements++; 
-        if ( entry->next_in_bucket ) h->nconflicts++; 
-#endif 
-    } 
-    return entry; 
-} 
- 
-static kmp_depnode_list_t * 
-__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node ) 
-{ 
-    kmp_depnode_list_t *new_head; 
- 
-#if USE_FAST_MEMORY 
-    new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t)); 
-#else 
-    new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t)); 
-#endif 
- 
-    new_head->node = __kmp_node_ref(node); 
-    new_head->next = list; 
- 
-    return new_head; 
-} 
- 
-static void 
-__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ) 
-{ 
-    kmp_depnode_list *next; 
- 
-    for ( ; list ; list = next ) { 
-        next = list->next; 
- 
-        __kmp_node_deref(thread,list->node); 
-#if USE_FAST_MEMORY 
-        __kmp_fast_free(thread,list); 
-#else 
-        __kmp_thread_free(thread,list); 
-#endif 
-    } 
-} 
- 
-static inline void 
-__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink ) 
-{ 
-#ifdef KMP_SUPPORT_GRAPH_OUTPUT 
-    kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); 
-    kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink->dn.task);    // this can be NULL when if(0) ... 
- 
-    __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); 
-#endif 
-} 
- 
-template< bool filter > 
-static inline kmp_int32 
-__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, 
-                     bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list) 
-{ 
-    KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) ); 
-     
-    kmp_info_t *thread = __kmp_threads[ gtid ]; 
-    kmp_int32 npredecessors=0; 
-    for ( kmp_int32 i = 0; i < ndeps ; i++ ) { 
-        const kmp_depend_info_t * dep = &dep_list[i]; 
- 
-        KMP_DEBUG_ASSERT(dep->flags.in); 
- 
-        if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries 
- 
-        kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr); 
-        kmp_depnode_t *last_out = info->last_out; 
- 
-        if ( dep->flags.out && info->last_ins ) { 
-            for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) { 
-                kmp_depnode_t * indep = p->node; 
-                if ( indep->dn.task ) { 
-                    KMP_ACQUIRE_DEPNODE(gtid,indep); 
-                    if ( indep->dn.task ) { 
-                        __kmp_track_dependence(indep,node); 
-                        indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node); 
-                        KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", 
-                                 filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); 
-                        npredecessors++; 
-                    } 
-                    KMP_RELEASE_DEPNODE(gtid,indep); 
-                } 
-            } 
- 
-            __kmp_depnode_list_free(thread,info->last_ins); 
-            info->last_ins = NULL; 
- 
-        } else if ( last_out && last_out->dn.task ) { 
-            KMP_ACQUIRE_DEPNODE(gtid,last_out); 
-            if ( last_out->dn.task ) { 
-                __kmp_track_dependence(last_out,node); 
-                last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node); 
-                KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",  
-                             filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); 
-                 
-                npredecessors++; 
-            } 
-            KMP_RELEASE_DEPNODE(gtid,last_out); 
-        } 
- 
-        if ( dep_barrier ) { 
-            // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after 
-            // the execution of this task so the previous output nodes can be cleared. 
-            __kmp_node_deref(thread,last_out); 
-            info->last_out = NULL; 
-        } else { 
-            if ( dep->flags.out ) { 
-                __kmp_node_deref(thread,last_out); 
-                info->last_out = __kmp_node_ref(node); 
-            } else 
-                info->last_ins = __kmp_add_node(thread, info->last_ins, node); 
-        } 
- 
-    } 
- 
-    KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) ); 
- 
-    return npredecessors; 
-} 
- 
-#define NO_DEP_BARRIER (false) 
-#define DEP_BARRIER (true) 
- 
-// returns true if the task has any outstanding dependence 
-static bool 
-__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier, 
-                   kmp_int32 ndeps, kmp_depend_info_t *dep_list, 
-                   kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) 
-{ 
-    int i; 
- 
-#if KMP_DEBUG 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 
-#endif 
-    KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) ); 
- 
-    // Filter deps in dep_list 
-    // TODO: Different algorithm for large dep_list ( > 10 ? ) 
-    for ( i = 0; i < ndeps; i ++ ) { 
-        if ( dep_list[i].base_addr != 0 ) 
-            for ( int j = i+1; j < ndeps; j++ ) 
-                if ( dep_list[i].base_addr == dep_list[j].base_addr ) { 
-                    dep_list[i].flags.in |= dep_list[j].flags.in; 
-                    dep_list[i].flags.out |= dep_list[j].flags.out; 
-                    dep_list[j].base_addr = 0; // Mark j element as void 
-                } 
-    } 
- 
-    // doesn't need to be atomic as no other thread is going to be accessing this node just yet 
-    // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies 
-    node->dn.npredecessors = -1; 
- 
-    // used to pack all npredecessors additions into a single atomic operation at the end 
-    int npredecessors; 
- 
-    npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps, dep_list); 
-    npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list); 
- 
-    node->dn.task = task; 
-    KMP_MB(); 
- 
-    // Account for our initial fake value 
-    npredecessors++; 
- 
-    // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences) 
-    npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors; 
- 
-    KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) ); 
- 
-    // beyond this point the task could be queued (and executed) by a releasing task... 
-    return npredecessors > 0 ? true : false; 
-} 
- 
-void 
-__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) 
-{ 
-    kmp_info_t *thread = __kmp_threads[ gtid ]; 
-    kmp_depnode_t *node = task->td_depnode; 
- 
-    if ( task->td_dephash ) { 
-        KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) ); 
-        __kmp_dephash_free(thread,task->td_dephash); 
-    } 
- 
-    if ( !node ) return; 
- 
-    KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) ); 
-     
-    KMP_ACQUIRE_DEPNODE(gtid,node); 
-    node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated 
-    KMP_RELEASE_DEPNODE(gtid,node); 
- 
-    kmp_depnode_list_t *next; 
-    for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) { 
-        kmp_depnode_t *successor = p->node; 
-        kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1; 
- 
-        // successor task can be NULL for wait_depends or because deps are still being processed 
-        if ( npredecessors == 0 ) { 
-            KMP_MB(); 
-            if ( successor->dn.task ) {             
-                KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) ); 
-                __kmp_omp_task(gtid,successor->dn.task,false); 
-            } 
-        } 
- 
-        next = p->next; 
-        __kmp_node_deref(thread,p->node); 
-#if USE_FAST_MEMORY 
-        __kmp_fast_free(thread,p); 
-#else 
-        __kmp_thread_free(thread,p); 
-#endif 
-    } 
- 
-    __kmp_node_deref(thread,node); 
- 
-    KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) ); 
-} 
- 
-/*! 
-@ingroup TASKING 
-@param loc_ref location of the original task directive 
-@param gtid Global Thread ID of encountering thread 
-@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' 
-@param ndeps Number of depend items with possible aliasing 
-@param dep_list List of depend items with possible aliasing 
-@param ndeps_noalias Number of depend items with no aliasing 
-@param noalias_dep_list List of depend items with no aliasing 
- 
-@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued 
- 
-Schedule a non-thread-switchable task with dependences for execution 
-*/ 
-kmp_int32 
-__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, 
-                            kmp_int32 ndeps, kmp_depend_info_t *dep_list, 
-                            kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) 
-{ 
- 
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 
-    KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, new_taskdata ) ); 
- 
-    kmp_info_t *thread = __kmp_threads[ gtid ]; 
-    kmp_taskdata_t * current_task = thread->th.th_current_task; 
- 
-    bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; 
-#if OMP_41_ENABLED 
-    serial = serial && !(new_taskdata->td_flags.proxy == TASK_PROXY); 
-#endif 
- 
-    if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) { 
-        /* if no dependencies have been tracked yet, create the dependence hash */ 
-        if ( current_task->td_dephash == NULL ) 
-            current_task->td_dephash = __kmp_dephash_create(thread); 
- 
-#if USE_FAST_MEMORY 
-        kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t)); 
-#else 
-        kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t)); 
-#endif 
- 
-        __kmp_init_node(node); 
-        new_taskdata->td_depnode = node; 
- 
-        if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER, 
-                               ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) { 
-            KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: " 
-                  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, 
-                  new_taskdata ) ); 
-            return TASK_CURRENT_NOT_QUEUED; 
-        } 
-    } else { 
-#if OMP_41_ENABLED 
-        kmp_task_team_t * task_team = thread->th.th_task_team; 
-        if ( task_team && task_team->tt.tt_found_proxy_tasks ) 
-           __kmpc_omp_wait_deps ( loc_ref, gtid, ndeps, dep_list, ndeps_noalias, noalias_dep_list ); 
-        else 
-#endif 
-           KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)" 
-                           "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) ); 
-    } 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : " 
-                  "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref, 
-                  new_taskdata ) ); 
- 
-    return __kmpc_omp_task(loc_ref,gtid,new_task); 
-} 
- 
-/*! 
-@ingroup TASKING 
-@param loc_ref location of the original task directive 
-@param gtid Global Thread ID of encountering thread 
-@param ndeps Number of depend items with possible aliasing 
-@param dep_list List of depend items with possible aliasing 
-@param ndeps_noalias Number of depend items with no aliasing 
-@param noalias_dep_list List of depend items with no aliasing 
- 
-Blocks the current task until all specifies dependencies have been fulfilled. 
-*/ 
-void 
-__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 
-                       kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) 
-{ 
-    KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) ); 
- 
-    if ( ndeps == 0 && ndeps_noalias == 0 ) { 
-        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) ); 
-        return; 
-    } 
- 
-    kmp_info_t *thread = __kmp_threads[ gtid ]; 
-    kmp_taskdata_t * current_task = thread->th.th_current_task; 
- 
-    // We can return immediately as: 
-    //   - dependences are not computed in serial teams (except if we have proxy tasks) 
-    //   - if the dephash is not yet created it means we have nothing to wait for 
-    bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; 
-#if OMP_41_ENABLED 
-    ignore = ignore && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; 
-#endif 
-    ignore = ignore || current_task->td_dephash == NULL; 
- 
-    if ( ignore ) { 
-        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); 
-        return; 
-    } 
- 
-    kmp_depnode_t node; 
-    __kmp_init_node(&node); 
- 
-    if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER, 
-                           ndeps, dep_list, ndeps_noalias, noalias_dep_list )) { 
-        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); 
-        return; 
-    } 
- 
-    int thread_finished = FALSE; 
-    kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U); 
-    while ( node.dn.npredecessors > 0 ) { 
-        flag.execute_tasks(thread, gtid, FALSE, &thread_finished, 
-#if USE_ITT_BUILD 
-                           NULL, 
-#endif 
-                           __kmp_task_stealing_constraint ); 
-    } 
- 
-    KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) ); 
-} 
- 
-#endif /* OMP_40_ENABLED */ 
- 
+/*
+ * kmp_taskdeps.cpp
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//#define KMP_SUPPORT_GRAPH_OUTPUT 1
+
+#include "kmp.h"
+#include "kmp_io.h"
+#include "kmp_wait_release.h"
+
+#if OMP_40_ENABLED
+
+//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries?
+//TODO: don't use atomic ref counters for stack-allocated nodes.
+//TODO: find an alternate to atomic refs for heap-allocated nodes?
+//TODO: Finish graph output support
+//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks
+//TODO: Any ITT support needed?
+
+#ifdef KMP_SUPPORT_GRAPH_OUTPUT
+static kmp_int32 kmp_node_id_seed = 0;
+#endif
+
+static void
+__kmp_init_node ( kmp_depnode_t *node )
+{
+    node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed
+    node->dn.successors = NULL;
+    __kmp_init_lock(&node->dn.lock);
+    node->dn.nrefs = 1; // init creates the first reference to the node
+#ifdef KMP_SUPPORT_GRAPH_OUTPUT
+    node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
+#endif
+}
+
+static inline kmp_depnode_t *
+__kmp_node_ref ( kmp_depnode_t *node )
+{
+    KMP_TEST_THEN_INC32(&node->dn.nrefs);
+    return node;
+}
+
+static inline void
+__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node )
+{
+    if (!node) return;
+
+    kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
+    if ( n == 0 ) {
+        KMP_ASSERT(node->dn.nrefs == 0);
+#if USE_FAST_MEMORY
+        __kmp_fast_free(thread,node);
+#else
+        __kmp_thread_free(thread,node);
+#endif
+    }
+}
+
+#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid))
+#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid))
+
+static void
+__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list );
+
+static const kmp_int32 kmp_dephash_log2 = 6;
+static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2);
+
+static inline kmp_int32
+__kmp_dephash_hash ( kmp_intptr_t addr )
+{
+    //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets );
+    return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size;
+}
+
+static kmp_dephash_t *
+__kmp_dephash_create ( kmp_info_t *thread )
+{
+    kmp_dephash_t *h;
+
+    kmp_int32 size = kmp_dephash_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t);
+
+#if USE_FAST_MEMORY
+    h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size );
+#else
+    h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size );
+#endif
+
+#ifdef KMP_DEBUG
+    h->nelements = 0;
+#endif
+    h->buckets = (kmp_dephash_entry **)(h+1);
+
+    for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ )
+        h->buckets[i] = 0;
+
+    return h;
+}
+
+static void
+__kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h )
+{
+    for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) {
+        if ( h->buckets[i] ) {
+            kmp_dephash_entry_t *next;
+            for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) {
+                next = entry->next_in_bucket;
+                __kmp_depnode_list_free(thread,entry->last_ins);
+                __kmp_node_deref(thread,entry->last_out);
+#if USE_FAST_MEMORY
+                __kmp_fast_free(thread,entry);
+#else
+                __kmp_thread_free(thread,entry);
+#endif
+            }
+        }
+    }
+#if USE_FAST_MEMORY
+    __kmp_fast_free(thread,h);
+#else
+    __kmp_thread_free(thread,h);
+#endif
+}
+
+static kmp_dephash_entry *
+__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr )
+{
+    kmp_int32 bucket = __kmp_dephash_hash(addr);
+
+    kmp_dephash_entry_t *entry;
+    for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket )
+        if ( entry->addr == addr ) break;
+
+    if ( entry == NULL ) {
+        // create entry. This is only done by one thread so no locking required
+#if USE_FAST_MEMORY
+        entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) );
+#else
+        entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) );
+#endif
+        entry->addr = addr;
+        entry->last_out = NULL;
+        entry->last_ins = NULL;
+        entry->next_in_bucket = h->buckets[bucket];
+        h->buckets[bucket] = entry;
+#ifdef KMP_DEBUG
+        h->nelements++;
+        if ( entry->next_in_bucket ) h->nconflicts++;
+#endif
+    }
+    return entry;
+}
+
+static kmp_depnode_list_t *
+__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node )
+{
+    kmp_depnode_list_t *new_head;
+
+#if USE_FAST_MEMORY
+    new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t));
+#else
+    new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t));
+#endif
+
+    new_head->node = __kmp_node_ref(node);
+    new_head->next = list;
+
+    return new_head;
+}
+
+static void
+__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list )
+{
+    kmp_depnode_list *next;
+
+    for ( ; list ; list = next ) {
+        next = list->next;
+
+        __kmp_node_deref(thread,list->node);
+#if USE_FAST_MEMORY
+        __kmp_fast_free(thread,list);
+#else
+        __kmp_thread_free(thread,list);
+#endif
+    }
+}
+
+static inline void
+__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink )
+{
+#ifdef KMP_SUPPORT_GRAPH_OUTPUT
+    kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
+    kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink->dn.task);    // this can be NULL when if(0) ...
+
+    __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource);
+#endif
+}
+
+template< bool filter >
+static inline kmp_int32
+__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
+                     bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list)
+{
+    KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) );
+    
+    kmp_info_t *thread = __kmp_threads[ gtid ];
+    kmp_int32 npredecessors=0;
+    for ( kmp_int32 i = 0; i < ndeps ; i++ ) {
+        const kmp_depend_info_t * dep = &dep_list[i];
+
+        KMP_DEBUG_ASSERT(dep->flags.in);
+
+        if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries
+
+        kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr);
+        kmp_depnode_t *last_out = info->last_out;
+
+        if ( dep->flags.out && info->last_ins ) {
+            for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) {
+                kmp_depnode_t * indep = p->node;
+                if ( indep->dn.task ) {
+                    KMP_ACQUIRE_DEPNODE(gtid,indep);
+                    if ( indep->dn.task ) {
+                        __kmp_track_dependence(indep,node);
+                        indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node);
+                        KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
+                                 filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task)));
+                        npredecessors++;
+                    }
+                    KMP_RELEASE_DEPNODE(gtid,indep);
+                }
+            }
+
+            __kmp_depnode_list_free(thread,info->last_ins);
+            info->last_ins = NULL;
+
+        } else if ( last_out && last_out->dn.task ) {
+            KMP_ACQUIRE_DEPNODE(gtid,last_out);
+            if ( last_out->dn.task ) {
+                __kmp_track_dependence(last_out,node);
+                last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node);
+                KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", 
+                             filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task)));
+                
+                npredecessors++;
+            }
+            KMP_RELEASE_DEPNODE(gtid,last_out);
+        }
+
+        if ( dep_barrier ) {
+            // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after
+            // the execution of this task so the previous output nodes can be cleared.
+            __kmp_node_deref(thread,last_out);
+            info->last_out = NULL;
+        } else {
+            if ( dep->flags.out ) {
+                __kmp_node_deref(thread,last_out);
+                info->last_out = __kmp_node_ref(node);
+            } else
+                info->last_ins = __kmp_add_node(thread, info->last_ins, node);
+        }
+
+    }
+
+    KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) );
+
+    return npredecessors;
+}
+
+#define NO_DEP_BARRIER (false)
+#define DEP_BARRIER (true)
+
+// returns true if the task has any outstanding dependence
+static bool
+__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier,
+                   kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+                   kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
+{
+    int i;
+
+#if KMP_DEBUG
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+#endif
+    KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) );
+
+    // Filter deps in dep_list
+    // TODO: Different algorithm for large dep_list ( > 10 ? )
+    for ( i = 0; i < ndeps; i ++ ) {
+        if ( dep_list[i].base_addr != 0 )
+            for ( int j = i+1; j < ndeps; j++ )
+                if ( dep_list[i].base_addr == dep_list[j].base_addr ) {
+                    dep_list[i].flags.in |= dep_list[j].flags.in;
+                    dep_list[i].flags.out |= dep_list[j].flags.out;
+                    dep_list[j].base_addr = 0; // Mark j element as void
+                }
+    }
+
+    // doesn't need to be atomic as no other thread is going to be accessing this node just yet
+    // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies
+    node->dn.npredecessors = -1;
+
+    // used to pack all npredecessors additions into a single atomic operation at the end
+    int npredecessors;
+
+    npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps, dep_list);
+    npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list);
+
+    node->dn.task = task;
+    KMP_MB();
+
+    // Account for our initial fake value
+    npredecessors++;
+
+    // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences)
+    npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors;
+
+    KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) );
+
+    // beyond this point the task could be queued (and executed) by a releasing task...
+    return npredecessors > 0 ? true : false;
+}
+
+void
+__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
+{
+    kmp_info_t *thread = __kmp_threads[ gtid ];
+    kmp_depnode_t *node = task->td_depnode;
+
+    if ( task->td_dephash ) {
+        KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) );
+        __kmp_dephash_free(thread,task->td_dephash);
+    }
+
+    if ( !node ) return;
+
+    KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) );
+    
+    KMP_ACQUIRE_DEPNODE(gtid,node);
+    node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated
+    KMP_RELEASE_DEPNODE(gtid,node);
+
+    kmp_depnode_list_t *next;
+    for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) {
+        kmp_depnode_t *successor = p->node;
+        kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
+
+        // successor task can be NULL for wait_depends or because deps are still being processed
+        if ( npredecessors == 0 ) {
+            KMP_MB();
+            if ( successor->dn.task ) {            
+                KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) );
+                __kmp_omp_task(gtid,successor->dn.task,false);
+            }
+        }
+
+        next = p->next;
+        __kmp_node_deref(thread,p->node);
+#if USE_FAST_MEMORY
+        __kmp_fast_free(thread,p);
+#else
+        __kmp_thread_free(thread,p);
+#endif
+    }
+
+    __kmp_node_deref(thread,node);
+
+    KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) );
+}
+
+/*!
+@ingroup TASKING
+@param loc_ref location of the original task directive
+@param gtid Global Thread ID of encountering thread
+@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
+@param ndeps Number of depend items with possible aliasing
+@param dep_list List of depend items with possible aliasing
+@param ndeps_noalias Number of depend items with no aliasing
+@param noalias_dep_list List of depend items with no aliasing
+
+@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
+
+Schedule a non-thread-switchable task with dependences for execution
+*/
+kmp_int32
+__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+                            kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+                            kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
+{
+
+    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+    KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, new_taskdata ) );
+
+    kmp_info_t *thread = __kmp_threads[ gtid ];
+    kmp_taskdata_t * current_task = thread->th.th_current_task;
+
+    bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
+#if OMP_41_ENABLED
+    serial = serial && !(new_taskdata->td_flags.proxy == TASK_PROXY);
+#endif
+
+    if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) {
+        /* if no dependencies have been tracked yet, create the dependence hash */
+        if ( current_task->td_dephash == NULL )
+            current_task->td_dephash = __kmp_dephash_create(thread);
+
+#if USE_FAST_MEMORY
+        kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t));
+#else
+        kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t));
+#endif
+
+        __kmp_init_node(node);
+        new_taskdata->td_depnode = node;
+
+        if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER,
+                               ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) {
+            KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: "
+                  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
+                  new_taskdata ) );
+            return TASK_CURRENT_NOT_QUEUED;
+        }
+    } else {
+#if OMP_41_ENABLED
+        kmp_task_team_t * task_team = thread->th.th_task_team;
+        if ( task_team && task_team->tt.tt_found_proxy_tasks )
+           __kmpc_omp_wait_deps ( loc_ref, gtid, ndeps, dep_list, ndeps_noalias, noalias_dep_list );
+        else
+#endif
+           KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)"
+                           "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) );
+    }
+
+    KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : "
+                  "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref,
+                  new_taskdata ) );
+
+    return __kmpc_omp_task(loc_ref,gtid,new_task);
+}
+
+/*!
+@ingroup TASKING
+@param loc_ref location of the original task directive
+@param gtid Global Thread ID of encountering thread
+@param ndeps Number of depend items with possible aliasing
+@param dep_list List of depend items with possible aliasing
+@param ndeps_noalias Number of depend items with no aliasing
+@param noalias_dep_list List of depend items with no aliasing
+
+Blocks the current task until all specifies dependencies have been fulfilled.
+*/
+void
+__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+                       kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
+{
+    KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) );
+
+    if ( ndeps == 0 && ndeps_noalias == 0 ) {
+        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) );
+        return;
+    }
+
+    kmp_info_t *thread = __kmp_threads[ gtid ];
+    kmp_taskdata_t * current_task = thread->th.th_current_task;
+
+    // We can return immediately as:
+    //   - dependences are not computed in serial teams (except if we have proxy tasks)
+    //   - if the dephash is not yet created it means we have nothing to wait for
+    bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
+#if OMP_41_ENABLED
+    ignore = ignore && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
+#endif
+    ignore = ignore || current_task->td_dephash == NULL;
+
+    if ( ignore ) {
+        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
+        return;
+    }
+
+    kmp_depnode_t node;
+    __kmp_init_node(&node);
+
+    if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER,
+                           ndeps, dep_list, ndeps_noalias, noalias_dep_list )) {
+        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
+        return;
+    }
+
+    int thread_finished = FALSE;
+    kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U);
+    while ( node.dn.npredecessors > 0 ) {
+        flag.execute_tasks(thread, gtid, FALSE, &thread_finished,
+#if USE_ITT_BUILD
+                           NULL,
+#endif
+                           __kmp_task_stealing_constraint );
+    }
+
+    KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) );
+}
+
+#endif /* OMP_40_ENABLED */
+
diff --git a/contrib/libs/cxxsupp/openmp/kmp_tasking.c b/contrib/libs/cxxsupp/openmp/kmp_tasking.c
index ec588c33f6..432f919231 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_tasking.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_tasking.c
@@ -1,2860 +1,2860 @@
-/* 
- * kmp_tasking.c -- OpenMP 3.0 tasking support. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_itt.h" 
-#include "kmp_wait_release.h" 
-#include "kmp_stats.h" 
- 
-#if OMPT_SUPPORT 
-#include "ompt-specific.h" 
-#endif 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-/* forward declaration */ 
-static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ); 
-static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ); 
-static int  __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ); 
- 
-#ifdef OMP_41_ENABLED 
-static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ); 
-#endif 
- 
-static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { 
-    if (!flag) return; 
-    // Attempt to wake up a thread: examine its type and call appropriate template 
-    switch (((kmp_flag_64 *)flag)->get_type()) { 
-    case flag32: __kmp_resume_32(gtid, NULL); break; 
-    case flag64: __kmp_resume_64(gtid, NULL); break; 
-    case flag_oncore: __kmp_resume_oncore(gtid, NULL); break; 
-    } 
-} 
- 
-#ifdef BUILD_TIED_TASK_STACK 
- 
-//--------------------------------------------------------------------------- 
-//  __kmp_trace_task_stack: print the tied tasks from the task stack in order 
-//     from top do bottom 
-// 
-//  gtid: global thread identifier for thread containing stack 
-//  thread_data: thread data for task team thread containing stack 
-//  threshold: value above which the trace statement triggers 
-//  location: string identifying call site of this function (for trace) 
- 
-static void 
-__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location ) 
-{ 
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; 
-    kmp_taskdata_t **stack_top = task_stack -> ts_top; 
-    kmp_int32 entries = task_stack -> ts_entries; 
-    kmp_taskdata_t *tied_task; 
- 
-    KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 
-                         "first_block = %p, stack_top = %p \n", 
-                         location, gtid, entries, task_stack->ts_first_block, stack_top ) ); 
- 
-    KMP_DEBUG_ASSERT( stack_top != NULL ); 
-    KMP_DEBUG_ASSERT( entries > 0 ); 
- 
-    while ( entries != 0 ) 
-    { 
-        KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] ); 
-        // fix up ts_top if we need to pop from previous block 
-        if ( entries & TASK_STACK_INDEX_MASK == 0 ) 
-        { 
-            kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ; 
- 
-            stack_block = stack_block -> sb_prev; 
-            stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; 
-        } 
- 
-        // finish bookkeeping 
-        stack_top--; 
-        entries--; 
- 
-        tied_task = * stack_top; 
- 
-        KMP_DEBUG_ASSERT( tied_task != NULL ); 
-        KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); 
- 
-        KA_TRACE(threshold, ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, " 
-                             "stack_top=%p, tied_task=%p\n", 
-                             location, gtid, entries, stack_top, tied_task ) ); 
-    } 
-    KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] ); 
- 
-    KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", 
-                         location, gtid ) ); 
-} 
- 
-//--------------------------------------------------------------------------- 
-//  __kmp_init_task_stack: initialize the task stack for the first time 
-//    after a thread_data structure is created. 
-//    It should not be necessary to do this again (assuming the stack works). 
-// 
-//  gtid: global thread identifier of calling thread 
-//  thread_data: thread data for task team thread containing stack 
- 
-static void 
-__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) 
-{ 
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; 
-    kmp_stack_block_t *first_block; 
- 
-    // set up the first block of the stack 
-    first_block = & task_stack -> ts_first_block; 
-    task_stack -> ts_top = (kmp_taskdata_t **) first_block; 
-    memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); 
- 
-    // initialize the stack to be empty 
-    task_stack  -> ts_entries = TASK_STACK_EMPTY; 
-    first_block -> sb_next = NULL; 
-    first_block -> sb_prev = NULL; 
-} 
- 
- 
-//--------------------------------------------------------------------------- 
-//  __kmp_free_task_stack: free the task stack when thread_data is destroyed. 
-// 
-//  gtid: global thread identifier for calling thread 
-//  thread_data: thread info for thread containing stack 
- 
-static void 
-__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) 
-{ 
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; 
-    kmp_stack_block_t *stack_block = & task_stack -> ts_first_block; 
- 
-    KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY ); 
-    // free from the second block of the stack 
-    while ( stack_block != NULL ) { 
-        kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL; 
- 
-        stack_block -> sb_next = NULL; 
-        stack_block -> sb_prev = NULL; 
-        if (stack_block != & task_stack -> ts_first_block) { 
-            __kmp_thread_free( thread, stack_block );  // free the block, if not the first 
-        } 
-        stack_block = next_block; 
-    } 
-    // initialize the stack to be empty 
-    task_stack -> ts_entries = 0; 
-    task_stack -> ts_top = NULL; 
-} 
- 
- 
-//--------------------------------------------------------------------------- 
-//  __kmp_push_task_stack: Push the tied task onto the task stack. 
-//     Grow the stack if necessary by allocating another block. 
-// 
-//  gtid: global thread identifier for calling thread 
-//  thread: thread info for thread containing stack 
-//  tied_task: the task to push on the stack 
- 
-static void 
-__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task ) 
-{ 
-    // GEH - need to consider what to do if tt_threads_data not allocated yet 
-    kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> 
-                                        tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; 
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; 
- 
-    if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) { 
-        return;  // Don't push anything on stack if team or team tasks are serialized 
-    } 
- 
-    KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); 
-    KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); 
- 
-    KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", 
-                  gtid, thread, tied_task ) ); 
-    // Store entry 
-    * (task_stack -> ts_top) = tied_task; 
- 
-    // Do bookkeeping for next push 
-    task_stack -> ts_top++; 
-    task_stack -> ts_entries++; 
- 
-    if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) 
-    { 
-        // Find beginning of this task block 
-        kmp_stack_block_t *stack_block = 
-             (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE); 
- 
-        // Check if we already have a block 
-        if ( stack_block -> sb_next != NULL ) 
-        {    // reset ts_top to beginning of next block 
-            task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0]; 
-        } 
-        else 
-        {   // Alloc new block and link it up 
-            kmp_stack_block_t *new_block = (kmp_stack_block_t *) 
-              __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t)); 
- 
-            task_stack -> ts_top  = & new_block -> sb_block[0]; 
-            stack_block -> sb_next = new_block; 
-            new_block  -> sb_prev = stack_block; 
-            new_block  -> sb_next = NULL; 
- 
-            KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", 
-                          gtid, tied_task, new_block ) ); 
-        } 
-    } 
-    KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); 
-} 
- 
-//--------------------------------------------------------------------------- 
-//  __kmp_pop_task_stack: Pop the tied task from the task stack.  Don't return 
-//     the task, just check to make sure it matches the ending task passed in. 
-// 
-//  gtid: global thread identifier for the calling thread 
-//  thread: thread info structure containing stack 
-//  tied_task: the task popped off the stack 
-//  ending_task: the task that is ending (should match popped task) 
- 
-static void 
-__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task ) 
-{ 
-    // GEH - need to consider what to do if tt_threads_data not allocated yet 
-    kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; 
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; 
-    kmp_taskdata_t *tied_task; 
- 
-    if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) { 
-        return;  // Don't pop anything from stack if team or team tasks are serialized 
-    } 
- 
-    KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); 
-    KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 ); 
- 
-    KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) ); 
- 
-    // fix up ts_top if we need to pop from previous block 
-    if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) 
-    { 
-        kmp_stack_block_t *stack_block = 
-           (kmp_stack_block_t *) (task_stack -> ts_top) ; 
- 
-        stack_block = stack_block -> sb_prev; 
-        task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; 
-    } 
- 
-    // finish bookkeeping 
-    task_stack -> ts_top--; 
-    task_stack -> ts_entries--; 
- 
-    tied_task = * (task_stack -> ts_top ); 
- 
-    KMP_DEBUG_ASSERT( tied_task != NULL ); 
-    KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); 
-    KMP_DEBUG_ASSERT( tied_task == ending_task );  // If we built the stack correctly 
- 
-    KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); 
-    return; 
-} 
-#endif /* BUILD_TIED_TASK_STACK */ 
- 
-//--------------------------------------------------- 
-//  __kmp_push_task: Add a task to the thread's deque 
- 
-static kmp_int32 
-__kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) 
-{ 
-    kmp_info_t *        thread = __kmp_threads[ gtid ]; 
-    kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task); 
-    kmp_task_team_t *   task_team = thread->th.th_task_team; 
-    kmp_int32           tid = __kmp_tid_from_gtid( gtid ); 
-    kmp_thread_data_t * thread_data; 
- 
-    KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) ); 
- 
-    // The first check avoids building task_team thread data if serialized 
-    if ( taskdata->td_flags.task_serial ) { 
-        KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n", 
-                       gtid, taskdata ) ); 
-        return TASK_NOT_PUSHED; 
-    } 
- 
-    // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
-    if ( ! KMP_TASKING_ENABLED(task_team) ) { 
-         __kmp_enable_tasking( task_team, thread ); 
-    } 
-    KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE ); 
-    KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL ); 
- 
-    // Find tasking deque specific to encountering thread 
-    thread_data = & task_team -> tt.tt_threads_data[ tid ]; 
- 
-    // No lock needed since only owner can allocate 
-    if (thread_data -> td.td_deque == NULL ) { 
-        __kmp_alloc_task_deque( thread, thread_data ); 
-    } 
- 
-    // Check if deque is full 
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) 
-    { 
-        KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n", 
-                       gtid, taskdata ) ); 
-        return TASK_NOT_PUSHED; 
-    } 
- 
-    // Lock the deque for the task push operation 
-    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
- 
-#if OMP_41_ENABLED 
-    // Need to recheck as we can get a proxy task from a thread outside of OpenMP 
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) 
-    { 
-        __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
-        KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n", 
-                       gtid, taskdata ) ); 
-        return TASK_NOT_PUSHED; 
-    } 
-#else 
-    // Must have room since no thread can add tasks but calling thread 
-    KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE ); 
-#endif 
- 
-    thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;  // Push taskdata 
-    // Wrap index. 
-    thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK; 
-    TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);             // Adjust task count 
- 
-    __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
- 
-    KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 
-                  "task=%p ntasks=%d head=%u tail=%u\n", 
-                  gtid, taskdata, thread_data->td.td_deque_ntasks, 
-                  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) ); 
- 
-    return TASK_SUCCESSFULLY_PUSHED; 
-} 
- 
- 
-//----------------------------------------------------------------------------------------- 
-// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends 
-// this_thr: thread structure to set current_task in. 
- 
-void 
-__kmp_pop_current_task_from_thread( kmp_info_t *this_thr ) 
-{ 
-    KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " 
-                   "curtask_parent=%p\n", 
-                   0, this_thr, this_thr -> th.th_current_task, 
-                   this_thr -> th.th_current_task -> td_parent ) ); 
- 
-    this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent; 
- 
-    KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " 
-                   "curtask_parent=%p\n", 
-                   0, this_thr, this_thr -> th.th_current_task, 
-                   this_thr -> th.th_current_task -> td_parent ) ); 
-} 
- 
- 
-//--------------------------------------------------------------------------------------- 
-// __kmp_push_current_task_to_thread: set up current task in called thread for a new team 
-// this_thr: thread structure to set up 
-// team: team for implicit task data 
-// tid: thread within team to set up 
- 
-void 
-__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid ) 
-{ 
-    // current task of the thread is a parent of the new just created implicit tasks of new team 
-    KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " 
-                    "parent_task=%p\n", 
-                    tid, this_thr, this_thr->th.th_current_task, 
-                    team->t.t_implicit_task_taskdata[tid].td_parent ) ); 
- 
-    KMP_DEBUG_ASSERT (this_thr != NULL); 
- 
-    if( tid == 0 ) { 
-        if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) { 
-            team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task; 
-            this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ]; 
-        } 
-    } else { 
-        team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent; 
-        this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ]; 
-    } 
- 
-    KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " 
-                    "parent_task=%p\n", 
-                    tid, this_thr, this_thr->th.th_current_task, 
-                    team->t.t_implicit_task_taskdata[tid].td_parent ) ); 
-} 
- 
- 
-//---------------------------------------------------------------------- 
-// __kmp_task_start: bookkeeping for a task starting execution 
-// GTID: global thread id of calling thread 
-// task: task starting execution 
-// current_task: task suspending 
- 
-static void 
-__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task ) 
-{ 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 
-    kmp_info_t * thread = __kmp_threads[ gtid ]; 
- 
-    KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", 
-                  gtid, taskdata, current_task) ); 
- 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 
- 
-    // mark currently executing task as suspended 
-    // TODO: GEH - make sure root team implicit task is initialized properly. 
-    // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); 
-    current_task -> td_flags.executing = 0; 
- 
-    // Add task to stack if tied 
-#ifdef BUILD_TIED_TASK_STACK 
-    if ( taskdata -> td_flags.tiedness == TASK_TIED ) 
-    { 
-        __kmp_push_task_stack( gtid, thread, taskdata ); 
-    } 
-#endif /* BUILD_TIED_TASK_STACK */ 
- 
-    // mark starting task as executing and as current task 
-    thread -> th.th_current_task = taskdata; 
- 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 ); 
-    taskdata -> td_flags.started = 1; 
-    taskdata -> td_flags.executing = 1; 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); 
- 
-    // GEH TODO: shouldn't we pass some sort of location identifier here? 
-    // APT: yes, we will pass location here. 
-    // need to store current thread state (in a thread or taskdata structure) 
-    // before setting work_state, otherwise wrong state is set after end of task 
- 
-    KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", 
-                  gtid, taskdata ) ); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_task_begin)) { 
-        kmp_taskdata_t *parent = taskdata->td_parent; 
-        ompt_callbacks.ompt_callback(ompt_event_task_begin)( 
-            parent ? parent->ompt_task_info.task_id : ompt_task_id_none, 
-            parent ? &(parent->ompt_task_info.frame) : NULL, 
-            taskdata->ompt_task_info.task_id, 
-            taskdata->ompt_task_info.function); 
-    } 
-#endif 
- 
-    return; 
-} 
- 
- 
-//---------------------------------------------------------------------- 
-// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution 
-// loc_ref: source location information; points to beginning of task block. 
-// gtid: global thread number. 
-// task: task thunk for the started task. 
- 
-void 
-__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) 
-{ 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 
-    kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n", 
-                  gtid, loc_ref, taskdata, current_task ) ); 
- 
-    taskdata -> td_flags.task_serial = 1;  // Execute this task immediately, not deferred. 
-    __kmp_task_start( gtid, task, current_task ); 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", 
-                  gtid, loc_ref, taskdata ) ); 
- 
-    return; 
-} 
- 
-#ifdef TASK_UNUSED 
-//---------------------------------------------------------------------- 
-// __kmpc_omp_task_begin: report that a given task has started execution 
-// NEVER GENERATED BY COMPILER, DEPRECATED!!! 
- 
-void 
-__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) 
-{ 
-    kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", 
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) ); 
- 
-    __kmp_task_start( gtid, task, current_task ); 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", 
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 
- 
-    return; 
-} 
-#endif // TASK_UNUSED 
- 
- 
-//------------------------------------------------------------------------------------- 
-// __kmp_free_task: free the current task space and the space for shareds 
-// gtid: Global thread ID of calling thread 
-// taskdata: task to free 
-// thread: thread data structure of caller 
- 
-static void 
-__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) 
-{ 
-    KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", 
-                  gtid, taskdata) ); 
- 
-    // Check to make sure all flags and counters have the correct values 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT ); 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 ); 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 ); 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 ); 
-    KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0  || taskdata->td_flags.task_serial == 1); 
-    KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 ); 
- 
-    taskdata->td_flags.freed = 1; 
-    // deallocate the taskdata and shared variable blocks associated with this task 
-    #if USE_FAST_MEMORY 
-        __kmp_fast_free( thread, taskdata ); 
-    #else /* ! USE_FAST_MEMORY */ 
-        __kmp_thread_free( thread, taskdata ); 
-    #endif 
- 
-    KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", 
-                  gtid, taskdata) ); 
-} 
- 
-//------------------------------------------------------------------------------------- 
-// __kmp_free_task_and_ancestors: free the current task and ancestors without children 
-// 
-// gtid: Global thread ID of calling thread 
-// taskdata: task to free 
-// thread: thread data structure of caller 
- 
-static void 
-__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) 
-{ 
-    kmp_int32 children = 0; 
-    kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser; 
- 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 
- 
-    if ( !team_or_tasking_serialized ) { 
-        children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; 
-        KMP_DEBUG_ASSERT( children >= 0 ); 
-    } 
- 
-    // Now, go up the ancestor tree to see if any ancestors can now be freed. 
-    while ( children == 0 ) 
-    { 
-        kmp_taskdata_t * parent_taskdata = taskdata -> td_parent; 
- 
-        KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 
-                      "and freeing itself\n", gtid, taskdata) ); 
- 
-        // --- Deallocate my ancestor task --- 
-        __kmp_free_task( gtid, taskdata, thread ); 
- 
-        taskdata = parent_taskdata; 
- 
-        // Stop checking ancestors at implicit task or if tasking serialized 
-        // instead of walking up ancestor tree to avoid premature deallocation of ancestors. 
-        if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT ) 
-            return; 
- 
-        if ( !team_or_tasking_serialized ) { 
-            // Predecrement simulated by "- 1" calculation 
-            children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; 
-            KMP_DEBUG_ASSERT( children >= 0 ); 
-        } 
-    } 
- 
-    KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 
-                  "not freeing it yet\n", gtid, taskdata, children) ); 
-} 
- 
-//--------------------------------------------------------------------- 
-// __kmp_task_finish: bookkeeping to do when a task finishes execution 
-// gtid: global thread ID for calling thread 
-// task: task to be finished 
-// resumed_task: task to be resumed.  (may be NULL if task is serialized) 
- 
-static void 
-__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task ) 
-{ 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 
-    kmp_info_t * thread = __kmp_threads[ gtid ]; 
-    kmp_int32 children = 0; 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled && 
-        ompt_callbacks.ompt_callback(ompt_event_task_end)) { 
-        kmp_taskdata_t *parent = taskdata->td_parent; 
-        ompt_callbacks.ompt_callback(ompt_event_task_end)( 
-            taskdata->ompt_task_info.task_id); 
-    } 
-#endif 
- 
-    KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n", 
-                  gtid, taskdata, resumed_task) ); 
- 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 
- 
-    // Pop task from stack if tied 
-#ifdef BUILD_TIED_TASK_STACK 
-    if ( taskdata -> td_flags.tiedness == TASK_TIED ) 
-    { 
-        __kmp_pop_task_stack( gtid, thread, taskdata ); 
-    } 
-#endif /* BUILD_TIED_TASK_STACK */ 
- 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); 
-    taskdata -> td_flags.complete = 1;   // mark the task as completed 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); 
- 
-    // Only need to keep track of count if team parallel and tasking not serialized 
-    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) { 
-        // Predecrement simulated by "- 1" calculation 
-        children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; 
-        KMP_DEBUG_ASSERT( children >= 0 ); 
-#if OMP_40_ENABLED 
-        if ( taskdata->td_taskgroup ) 
-            KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); 
-        __kmp_release_deps(gtid,taskdata); 
-#endif 
-    } 
- 
-    // td_flags.executing  must be marked as 0 after __kmp_release_deps has been called 
-    // Othertwise, if a task is executed immediately from the release_deps code 
-    // the flag will be reset to 1 again by this same function 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 ); 
-    taskdata -> td_flags.executing = 0;  // suspend the finishing task 
- 
-    KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", 
-                  gtid, taskdata, children) ); 
- 
-#if OMP_40_ENABLED 
-    /* If the tasks' destructor thunk flag has been set, we need to invoke the 
-       destructor thunk that has been generated by the compiler. 
-       The code is placed here, since at this point other tasks might have been released 
-       hence overlapping the destructor invokations with some other work in the 
-       released tasks.  The OpenMP spec is not specific on when the destructors are 
-       invoked, so we should be free to choose. 
-     */ 
-    if (taskdata->td_flags.destructors_thunk) { 
-        kmp_routine_entry_t destr_thunk = task->destructors; 
-        KMP_ASSERT(destr_thunk); 
-        destr_thunk(gtid, task); 
-    } 
-#endif // OMP_40_ENABLED 
- 
-    // bookkeeping for resuming task: 
-    // GEH - note tasking_ser => task_serial 
-    KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == 
-                       taskdata->td_flags.task_serial); 
-    if ( taskdata->td_flags.task_serial ) 
-    { 
-        if (resumed_task == NULL) { 
-            resumed_task = taskdata->td_parent;  // In a serialized task, the resumed task is the parent 
-        } 
-        else { 
-            // verify resumed task passed in points to parent 
-            KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent ); 
-        } 
-    } 
-    else { 
-        KMP_DEBUG_ASSERT( resumed_task != NULL );        // verify that resumed task is passed as arguemnt 
-    } 
- 
-    // Free this task and then ancestor tasks if they have no children. 
-    __kmp_free_task_and_ancestors(gtid, taskdata, thread); 
- 
-    // FIXME johnmc: I this statement should be before the last one so if an 
-    // asynchronous inquiry peers into the runtime system it doesn't see the freed 
-    // task as the current task 
-    __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task 
- 
-    // TODO: GEH - make sure root team implicit task is initialized properly. 
-    // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); 
-    resumed_task->td_flags.executing = 1;  // resume previous task 
- 
-    KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", 
-                  gtid, taskdata, resumed_task) ); 
- 
-    return; 
-} 
- 
-//--------------------------------------------------------------------- 
-// __kmpc_omp_task_complete_if0: report that a task has completed execution 
-// loc_ref: source location information; points to end of task block. 
-// gtid: global thread number. 
-// task: task thunk for the completed task. 
- 
-void 
-__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) 
-{ 
-    KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 
- 
-    __kmp_task_finish( gtid, task, NULL );  // this routine will provide task to resume 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 
- 
-    return; 
-} 
- 
-#ifdef TASK_UNUSED 
-//--------------------------------------------------------------------- 
-// __kmpc_omp_task_complete: report that a task has completed execution 
-// NEVER GENERATED BY COMPILER, DEPRECATED!!! 
- 
-void 
-__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) 
-{ 
-    KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 
- 
-    __kmp_task_finish( gtid, task, NULL );  // Not sure how to find task to resume 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 
-    return; 
-} 
-#endif // TASK_UNUSED 
- 
- 
-#if OMPT_SUPPORT 
-//---------------------------------------------------------------------------------------------------- 
-// __kmp_task_init_ompt: 
-//   Initialize OMPT fields maintained by a task. This will only be called after 
-//   ompt_tool, so we already know whether ompt is enabled or not. 
- 
-static inline void 
-__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function ) 
-{ 
-    if (ompt_enabled) { 
-        task->ompt_task_info.task_id = __ompt_task_id_new(tid); 
-        task->ompt_task_info.function = function; 
-        task->ompt_task_info.frame.exit_runtime_frame = NULL; 
-        task->ompt_task_info.frame.reenter_runtime_frame = NULL; 
-    } 
-} 
-#endif 
- 
- 
-//---------------------------------------------------------------------------------------------------- 
-// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread 
-// 
-// loc_ref:  reference to source location of parallel region 
-// this_thr:  thread data structure corresponding to implicit task 
-// team: team for this_thr 
-// tid: thread id of given thread within team 
-// set_curr_task: TRUE if need to push current task to thread 
-// NOTE: Routine does not set up the implicit task ICVS.  This is assumed to have already been done elsewhere. 
-// TODO: Get better loc_ref.  Value passed in may be NULL 
- 
-void 
-__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task ) 
-{ 
-    kmp_taskdata_t * task   = & team->t.t_implicit_task_taskdata[ tid ]; 
- 
-    KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", 
-                  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) ); 
- 
-    task->td_task_id  = KMP_GEN_TASK_ID(); 
-    task->td_team     = team; 
-//    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info in debugger) 
-    task->td_ident    = loc_ref; 
-    task->td_taskwait_ident   = NULL; 
-    task->td_taskwait_counter = 0; 
-    task->td_taskwait_thread  = 0; 
- 
-    task->td_flags.tiedness    = TASK_TIED; 
-    task->td_flags.tasktype    = TASK_IMPLICIT; 
-#if OMP_41_ENABLED 
-    task->td_flags.proxy       = TASK_FULL; 
-#endif 
- 
-    // All implicit tasks are executed immediately, not deferred 
-    task->td_flags.task_serial = 1; 
-    task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); 
-    task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; 
- 
-    task->td_flags.started     = 1; 
-    task->td_flags.executing   = 1; 
-    task->td_flags.complete    = 0; 
-    task->td_flags.freed       = 0; 
- 
-#if OMP_40_ENABLED 
-    task->td_dephash = NULL; 
-    task->td_depnode = NULL; 
-#endif 
- 
-    if (set_curr_task) {  // only do this initialization the first time a thread is created 
-        task->td_incomplete_child_tasks = 0; 
-        task->td_allocated_child_tasks  = 0; // Not used because do not need to deallocate implicit task 
-#if OMP_40_ENABLED 
-        task->td_taskgroup = NULL;           // An implicit task does not have taskgroup 
-#endif 
-        __kmp_push_current_task_to_thread( this_thr, team, tid ); 
-    } else { 
-        KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); 
-        KMP_DEBUG_ASSERT(task->td_allocated_child_tasks  == 0); 
-    } 
- 
-#if OMPT_SUPPORT 
-    __kmp_task_init_ompt(task, tid, NULL); 
-#endif 
- 
-    KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", 
-                  tid, team, task ) ); 
-} 
- 
-// Round up a size to a power of two specified by val 
-// Used to insert padding between structures co-allocated using a single malloc() call 
-static size_t 
-__kmp_round_up_to_val( size_t size, size_t val ) { 
-    if ( size & ( val - 1 ) ) { 
-        size &= ~ ( val - 1 ); 
-        if ( size <= KMP_SIZE_T_MAX - val ) { 
-            size += val;    // Round up if there is no overflow. 
-        }; // if 
-    }; // if 
-    return size; 
-} // __kmp_round_up_to_va 
- 
- 
-//--------------------------------------------------------------------------------- 
-// __kmp_task_alloc: Allocate the taskdata and task data structures for a task 
-// 
-// loc_ref: source location information 
-// gtid: global thread number. 
-// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered. 
-//        Converted from kmp_int32 to kmp_tasking_flags_t in routine. 
-// sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including private vars accessed in task. 
-// sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed in task. 
-// task_entry: Pointer to task code entry point generated by compiler. 
-// returns: a pointer to the allocated kmp_task_t structure (task). 
- 
-kmp_task_t * 
-__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, 
-                  size_t sizeof_kmp_task_t, size_t sizeof_shareds, 
-                  kmp_routine_entry_t task_entry ) 
-{ 
-    kmp_task_t *task; 
-    kmp_taskdata_t *taskdata; 
-    kmp_info_t *thread = __kmp_threads[ gtid ]; 
-    kmp_team_t *team = thread->th.th_team; 
-    kmp_taskdata_t *parent_task = thread->th.th_current_task; 
-    size_t shareds_offset; 
- 
-    KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 
-                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 
-                  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, 
-                  sizeof_shareds, task_entry) ); 
- 
-    if ( parent_task->td_flags.final ) { 
-        if (flags->merged_if0) { 
-        } 
-        flags->final = 1; 
-    } 
- 
-#if OMP_41_ENABLED 
-    if ( flags->proxy == TASK_PROXY ) { 
-        flags->tiedness = TASK_UNTIED; 
-        flags->merged_if0 = 1; 
- 
-        /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */ 
-        if ( (thread->th.th_task_team) == NULL ) { 
-            /* This should only happen if the team is serialized 
-                setup a task team and propagate it to the thread 
-            */ 
-            KMP_DEBUG_ASSERT(team->t.t_serialized); 
-            KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid)); 
-            __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads 
-            thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; 
-        } 
-        kmp_task_team_t * task_team = thread->th.th_task_team; 
- 
-        /* tasking must be enabled now as the task might not be pushed */ 
-        if ( !KMP_TASKING_ENABLED( task_team ) ) { 
-            KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); 
-            __kmp_enable_tasking( task_team, thread ); 
-            kmp_int32 tid = thread->th.th_info.ds.ds_tid; 
-            kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; 
-            // No lock needed since only owner can allocate 
-            if (thread_data -> td.td_deque == NULL ) { 
-                __kmp_alloc_task_deque( thread, thread_data ); 
-            } 
-        } 
- 
-        if ( task_team->tt.tt_found_proxy_tasks == FALSE ) 
-          TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE); 
-    } 
-#endif 
- 
-    // Calculate shared structure offset including padding after kmp_task_t struct 
-    // to align pointers in shared struct 
-    shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t; 
-    shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * )); 
- 
-    // Allocate a kmp_taskdata_t block and a kmp_task_t block. 
-    KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", 
-                  gtid, shareds_offset) ); 
-    KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", 
-                  gtid, sizeof_shareds) ); 
- 
-    // Avoid double allocation here by combining shareds with taskdata 
-    #if USE_FAST_MEMORY 
-    taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds ); 
-    #else /* ! USE_FAST_MEMORY */ 
-    taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds ); 
-    #endif /* USE_FAST_MEMORY */ 
- 
-    task                      = KMP_TASKDATA_TO_TASK(taskdata); 
- 
-    // Make sure task & taskdata are aligned appropriately 
-#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 ); 
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 ); 
-#else 
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 ); 
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 ); 
-#endif 
-    if (sizeof_shareds > 0) { 
-        // Avoid double allocation here by combining shareds with taskdata 
-        task->shareds         = & ((char *) taskdata)[ shareds_offset ]; 
-        // Make sure shareds struct is aligned to pointer size 
-        KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 ); 
-    } else { 
-        task->shareds         = NULL; 
-    } 
-    task->routine             = task_entry; 
-    task->part_id             = 0;      // AC: Always start with 0 part id 
- 
-    taskdata->td_task_id      = KMP_GEN_TASK_ID(); 
-    taskdata->td_team         = team; 
-    taskdata->td_alloc_thread = thread; 
-    taskdata->td_parent       = parent_task; 
-    taskdata->td_level        = parent_task->td_level + 1; // increment nesting level 
-    taskdata->td_ident        = loc_ref; 
-    taskdata->td_taskwait_ident   = NULL; 
-    taskdata->td_taskwait_counter = 0; 
-    taskdata->td_taskwait_thread  = 0; 
-    KMP_DEBUG_ASSERT( taskdata->td_parent != NULL ); 
-#if OMP_41_ENABLED 
-    // avoid copying icvs for proxy tasks 
-    if ( flags->proxy == TASK_FULL ) 
-#endif 
-       copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs ); 
- 
-    taskdata->td_flags.tiedness    = flags->tiedness; 
-    taskdata->td_flags.final       = flags->final; 
-    taskdata->td_flags.merged_if0  = flags->merged_if0; 
-#if OMP_40_ENABLED 
-    taskdata->td_flags.destructors_thunk = flags->destructors_thunk; 
-#endif // OMP_40_ENABLED 
-#if OMP_41_ENABLED 
-    taskdata->td_flags.proxy           = flags->proxy; 
-#endif 
-    taskdata->td_flags.tasktype    = TASK_EXPLICIT; 
- 
-    // GEH - TODO: fix this to copy parent task's value of tasking_ser flag 
-    taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); 
- 
-    // GEH - TODO: fix this to copy parent task's value of team_serial flag 
-    taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; 
- 
-    // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region 
-    //       tasks are not left until program termination to execute.  Also, it helps locality to execute 
-    //       immediately. 
-    taskdata->td_flags.task_serial = ( parent_task->td_flags.final 
-      || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ); 
- 
-    taskdata->td_flags.started     = 0; 
-    taskdata->td_flags.executing   = 0; 
-    taskdata->td_flags.complete    = 0; 
-    taskdata->td_flags.freed       = 0; 
- 
-    taskdata->td_flags.native      = flags->native; 
- 
-    taskdata->td_incomplete_child_tasks = 0; 
-    taskdata->td_allocated_child_tasks  = 1; // start at one because counts current task and children 
-#if OMP_40_ENABLED 
-    taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task 
-    taskdata->td_dephash = NULL; 
-    taskdata->td_depnode = NULL; 
-#endif 
- 
-    // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task 
-#if OMP_41_ENABLED 
-    if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )  
-#else 
-    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )  
-#endif 
-    { 
-        KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); 
-#if OMP_40_ENABLED 
-        if ( parent_task->td_taskgroup ) 
-            KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); 
-#endif 
-        // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated 
-        if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) { 
-            KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); 
-        } 
-    } 
- 
-    KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", 
-                  gtid, taskdata, taskdata->td_parent) ); 
- 
-#if OMPT_SUPPORT 
-    __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry); 
-#endif 
- 
-    return task; 
-} 
- 
- 
-kmp_task_t * 
-__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, 
-                       size_t sizeof_kmp_task_t, size_t sizeof_shareds, 
-                       kmp_routine_entry_t task_entry ) 
-{ 
-    kmp_task_t *retval; 
-    kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; 
- 
-    input_flags->native = FALSE; 
-    // __kmp_task_alloc() sets up all other runtime flags 
- 
-#if OMP_41_ENABLED 
-    KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 
-                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 
-                  gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied", 
-                  input_flags->proxy ? "proxy" : "", 
-                  sizeof_kmp_task_t, sizeof_shareds, task_entry) ); 
-#else 
-    KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 
-                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 
-                  gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied", 
-                  sizeof_kmp_task_t, sizeof_shareds, task_entry) ); 
-#endif 
- 
-    retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t, 
-                               sizeof_shareds, task_entry ); 
- 
-    KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) ); 
- 
-    return retval; 
-} 
- 
-//----------------------------------------------------------- 
-//  __kmp_invoke_task: invoke the specified task 
-// 
-// gtid: global thread ID of caller 
-// task: the task to invoke 
-// current_task: the task to resume after task invokation 
- 
-static void 
-__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task ) 
-{ 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 
-#if OMP_40_ENABLED 
-    int discard = 0 /* false */; 
-#endif 
-    KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", 
-                  gtid, taskdata, current_task) ); 
-    KMP_DEBUG_ASSERT(task); 
-#if OMP_41_ENABLED 
-    if ( taskdata->td_flags.proxy == TASK_PROXY && 
-         taskdata->td_flags.complete == 1) 
-         { 
-            // This is a proxy task that was already completed but it needs to run 
-            // its bottom-half finish 
-            KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", 
-                  gtid, taskdata) ); 
- 
-            __kmp_bottom_half_finish_proxy(gtid,task); 
- 
-            KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) ); 
- 
-            return; 
-         } 
-#endif 
- 
-#if OMP_41_ENABLED 
-    // Proxy tasks are not handled by the runtime 
-    if ( taskdata->td_flags.proxy != TASK_PROXY ) 
-#endif 
-    __kmp_task_start( gtid, task, current_task ); 
- 
-#if OMPT_SUPPORT 
-    ompt_thread_info_t oldInfo; 
-    kmp_info_t * thread; 
-    if (ompt_enabled) { 
-        // Store the threads states and restore them after the task 
-        thread = __kmp_threads[ gtid ]; 
-        oldInfo = thread->th.ompt_thread_info; 
-        thread->th.ompt_thread_info.wait_id = 0; 
-        thread->th.ompt_thread_info.state = ompt_state_work_parallel; 
-        taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0); 
-    } 
-#endif 
- 
-#if OMP_40_ENABLED 
-    // TODO: cancel tasks if the parallel region has also been cancelled 
-    // TODO: check if this sequence can be hoisted above __kmp_task_start 
-    // if cancellation has been enabled for this run ... 
-    if (__kmp_omp_cancellation) { 
-        kmp_info_t *this_thr = __kmp_threads [ gtid ]; 
-        kmp_team_t * this_team = this_thr->th.th_team; 
-        kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; 
-        if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { 
-            KMP_COUNT_BLOCK(TASK_cancelled); 
-            // this task belongs to a task group and we need to cancel it 
-            discard = 1 /* true */; 
-        } 
-    } 
- 
-    // 
-    // Invoke the task routine and pass in relevant data. 
-    // Thunks generated by gcc take a different argument list. 
-    // 
-    if (!discard) { 
-        KMP_COUNT_BLOCK(TASK_executed); 
-        KMP_TIME_BLOCK (TASK_execution); 
-#endif // OMP_40_ENABLED 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        /* let OMPT know that we're about to run this task */ 
-        if (ompt_enabled && 
-             ompt_callbacks.ompt_callback(ompt_event_task_switch)) 
-        { 
-          ompt_callbacks.ompt_callback(ompt_event_task_switch)( 
-            current_task->ompt_task_info.task_id, 
-            taskdata->ompt_task_info.task_id); 
-        } 
-#endif 
- 
-#ifdef KMP_GOMP_COMPAT 
-        if (taskdata->td_flags.native) { 
-            ((void (*)(void *))(*(task->routine)))(task->shareds); 
-        } 
-        else 
-#endif /* KMP_GOMP_COMPAT */ 
-        { 
-            (*(task->routine))(gtid, task); 
-        } 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        /* let OMPT know that we're returning to the callee task */ 
-        if (ompt_enabled && 
-             ompt_callbacks.ompt_callback(ompt_event_task_switch)) 
-        { 
-          ompt_callbacks.ompt_callback(ompt_event_task_switch)( 
-            taskdata->ompt_task_info.task_id, 
-            current_task->ompt_task_info.task_id); 
-        } 
-#endif 
- 
-#if OMP_40_ENABLED 
-    } 
-#endif // OMP_40_ENABLED 
- 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        thread->th.ompt_thread_info = oldInfo; 
-        taskdata->ompt_task_info.frame.exit_runtime_frame = 0; 
-    } 
-#endif 
- 
-#if OMP_41_ENABLED 
-    // Proxy tasks are not handled by the runtime 
-    if ( taskdata->td_flags.proxy != TASK_PROXY ) 
-#endif 
-       __kmp_task_finish( gtid, task, current_task ); 
- 
-    KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", 
-                  gtid, taskdata, current_task) ); 
-    return; 
-} 
- 
-//----------------------------------------------------------------------- 
-// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution 
-// 
-// loc_ref: location of original task pragma (ignored) 
-// gtid: Global Thread ID of encountering thread 
-// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' 
-// Returns: 
-//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. 
-//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. 
- 
-kmp_int32 
-__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) 
-{ 
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, new_taskdata ) ); 
- 
-    /* Should we execute the new task or queue it?   For now, let's just always try to 
-       queue it.  If the queue fills up, then we'll execute it.  */ 
- 
-    if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer 
-    {                                                           // Execute this task immediately 
-        kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 
-        new_taskdata->td_flags.task_serial = 1; 
-        __kmp_invoke_task( gtid, new_task, current_task ); 
-    } 
- 
-    KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 
-                  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, 
-                  new_taskdata ) ); 
- 
-    return TASK_CURRENT_NOT_QUEUED; 
-} 
- 
-//--------------------------------------------------------------------- 
-// __kmp_omp_task: Schedule a non-thread-switchable task for execution 
-// gtid: Global Thread ID of encountering thread 
-// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() 
-// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized 
-// returns: 
-// 
-//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. 
-//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. 
-kmp_int32 
-__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ) 
-{ 
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 
-            __builtin_frame_address(0); 
-    } 
-#endif 
- 
-    /* Should we execute the new task or queue it?   For now, let's just always try to 
-       queue it.  If the queue fills up, then we'll execute it.  */ 
-#if OMP_41_ENABLED 
-    if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer 
-#else 
-    if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer 
-#endif 
-    {                                                           // Execute this task immediately 
-        kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 
-        if ( serialize_immediate ) 
-          new_taskdata -> td_flags.task_serial = 1; 
-        __kmp_invoke_task( gtid, new_task, current_task ); 
-    } 
- 
-#if OMPT_SUPPORT 
-    if (ompt_enabled) { 
-        new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0; 
-    } 
-#endif 
- 
-    return TASK_CURRENT_NOT_QUEUED; 
-} 
- 
-//--------------------------------------------------------------------- 
-// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from 
-// the parent thread only! 
-// loc_ref: location of original task pragma (ignored) 
-// gtid: Global Thread ID of encountering thread 
-// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() 
-// returns: 
-// 
-//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. 
-//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. 
- 
-kmp_int32 
-__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) 
-{ 
-    kmp_int32 res; 
- 
-#if KMP_DEBUG 
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 
-#endif 
-    KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", 
-                  gtid, loc_ref, new_taskdata ) ); 
- 
-    res =  __kmp_omp_task(gtid,new_task,true); 
- 
-    KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", 
-                  gtid, loc_ref, new_taskdata ) ); 
-    return res; 
-} 
- 
-//------------------------------------------------------------------------------------- 
-// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete 
- 
-kmp_int32 
-__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ) 
-{ 
-    kmp_taskdata_t * taskdata; 
-    kmp_info_t * thread; 
-    int thread_finished = FALSE; 
- 
-    KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) ); 
- 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? 
- 
-        thread = __kmp_threads[ gtid ]; 
-        taskdata = thread -> th.th_current_task; 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        ompt_task_id_t my_task_id; 
-        ompt_parallel_id_t my_parallel_id; 
-         
-        if (ompt_enabled) { 
-            kmp_team_t *team = thread->th.th_team; 
-            my_task_id = taskdata->ompt_task_info.task_id; 
-            my_parallel_id = team->t.ompt_team_info.parallel_id; 
-             
-            if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { 
-                ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)( 
-                                my_parallel_id, my_task_id); 
-            } 
-        } 
-#endif 
- 
-#if USE_ITT_BUILD 
-        // Note: These values are used by ITT events as well. 
-#endif /* USE_ITT_BUILD */ 
-        taskdata->td_taskwait_counter += 1; 
-        taskdata->td_taskwait_ident    = loc_ref; 
-        taskdata->td_taskwait_thread   = gtid + 1; 
- 
-#if USE_ITT_BUILD 
-        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-#if OMP_41_ENABLED 
-        if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )  
-#else 
-        if ( ! taskdata->td_flags.team_serial )  
-#endif 
-        { 
-            // GEH: if team serialized, avoid reading the volatile variable below. 
-            kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U); 
-            while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) { 
-                flag.execute_tasks(thread, gtid, FALSE, &thread_finished 
-                                   USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); 
-            } 
-        } 
-#if USE_ITT_BUILD 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? 
-        taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; 
- 
-#if OMPT_SUPPORT && OMPT_TRACE 
-        if (ompt_enabled && 
-            ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { 
-            ompt_callbacks.ompt_callback(ompt_event_taskwait_end)( 
-                                my_parallel_id, my_task_id); 
-        } 
-#endif 
-    } 
- 
-    KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 
-                  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); 
- 
-    return TASK_CURRENT_NOT_QUEUED; 
-} 
- 
- 
-//------------------------------------------------- 
-// __kmpc_omp_taskyield: switch to a different task 
- 
-kmp_int32 
-__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) 
-{ 
-    kmp_taskdata_t * taskdata; 
-    kmp_info_t * thread; 
-    int thread_finished = FALSE; 
- 
-    KMP_COUNT_BLOCK(OMP_TASKYIELD); 
- 
-    KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", 
-                  gtid, loc_ref, end_part) ); 
- 
-    if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) { 
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? 
- 
-        thread = __kmp_threads[ gtid ]; 
-        taskdata = thread -> th.th_current_task; 
-        // Should we model this as a task wait or not? 
-#if USE_ITT_BUILD 
-        // Note: These values are used by ITT events as well. 
-#endif /* USE_ITT_BUILD */ 
-        taskdata->td_taskwait_counter += 1; 
-        taskdata->td_taskwait_ident    = loc_ref; 
-        taskdata->td_taskwait_thread   = gtid + 1; 
- 
-#if USE_ITT_BUILD 
-        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
-        if ( ! taskdata->td_flags.team_serial ) { 
-            kmp_task_team_t * task_team = thread->th.th_task_team; 
-            if (task_team != NULL) { 
-                if (KMP_TASKING_ENABLED(task_team)) { 
-                    __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished 
-                                            USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); 
-                } 
-            } 
-        } 
-#if USE_ITT_BUILD 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? 
-        taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; 
-    } 
- 
-    KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 
-                  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); 
- 
-    return TASK_CURRENT_NOT_QUEUED; 
-} 
- 
- 
-#if OMP_40_ENABLED 
-//------------------------------------------------------------------------------------- 
-// __kmpc_taskgroup: Start a new taskgroup 
- 
-void 
-__kmpc_taskgroup( ident_t* loc, int gtid ) 
-{ 
-    kmp_info_t      * thread = __kmp_threads[ gtid ]; 
-    kmp_taskdata_t  * taskdata = thread->th.th_current_task; 
-    kmp_taskgroup_t * tg_new = 
-        (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) ); 
-    KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) ); 
-    tg_new->count = 0; 
-    tg_new->cancel_request = cancel_noreq; 
-    tg_new->parent = taskdata->td_taskgroup; 
-    taskdata->td_taskgroup = tg_new; 
-} 
- 
- 
-//------------------------------------------------------------------------------------- 
-// __kmpc_end_taskgroup: Wait until all tasks generated by the current task 
-//                       and its descendants are complete 
- 
-void 
-__kmpc_end_taskgroup( ident_t* loc, int gtid ) 
-{ 
-    kmp_info_t      * thread = __kmp_threads[ gtid ]; 
-    kmp_taskdata_t  * taskdata = thread->th.th_current_task; 
-    kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; 
-    int thread_finished = FALSE; 
- 
-    KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) ); 
-    KMP_DEBUG_ASSERT( taskgroup != NULL ); 
- 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) { 
-#if USE_ITT_BUILD 
-        // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them 
-        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-#if OMP_41_ENABLED 
-        if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )  
-#else 
-        if ( ! taskdata->td_flags.team_serial )  
-#endif 
-        { 
-            kmp_flag_32 flag(&(taskgroup->count), 0U); 
-            while ( TCR_4(taskgroup->count) != 0 ) { 
-                flag.execute_tasks(thread, gtid, FALSE, &thread_finished 
-                                   USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); 
-            } 
-        } 
- 
-#if USE_ITT_BUILD 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
-    } 
-    KMP_DEBUG_ASSERT( taskgroup->count == 0 ); 
- 
-    // Restore parent taskgroup for the current task 
-    taskdata->td_taskgroup = taskgroup->parent; 
-    __kmp_thread_free( thread, taskgroup ); 
- 
-    KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) ); 
-} 
-#endif 
- 
- 
-//------------------------------------------------------ 
-// __kmp_remove_my_task: remove a task from my own deque 
- 
-static kmp_task_t * 
-__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team, 
-                      kmp_int32 is_constrained ) 
-{ 
-    kmp_task_t * task; 
-    kmp_taskdata_t * taskdata; 
-    kmp_thread_data_t *thread_data; 
-    kmp_uint32 tail; 
- 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
-    KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition 
- 
-        thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; 
- 
-    KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", 
-                  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 
-                  thread_data->td.td_deque_tail) ); 
- 
-    if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { 
-        KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", 
-                      gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 
-                      thread_data->td.td_deque_tail) ); 
-        return NULL; 
-    } 
- 
-    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
- 
-    if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { 
-        __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
-        KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", 
-                      gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 
-                      thread_data->td.td_deque_tail) ); 
-        return NULL; 
-    } 
- 
-    tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;  // Wrap index. 
-    taskdata = thread_data -> td.td_deque[ tail ]; 
- 
-    if (is_constrained) { 
-        // we need to check if the candidate obeys task scheduling constraint: 
-        // only child of current task can be scheduled 
-        kmp_taskdata_t * current = thread->th.th_current_task; 
-        kmp_int32        level = current->td_level; 
-        kmp_taskdata_t * parent = taskdata->td_parent; 
-        while ( parent != current && parent->td_level > level ) { 
-            parent = parent->td_parent;  // check generation up to the level of the current task 
-            KMP_DEBUG_ASSERT(parent != NULL); 
-        } 
-        if ( parent != current ) { 
-            // If the tail task is not a child, then no other childs can appear in the deque. 
-            __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
-            KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", 
-                          gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 
-                          thread_data->td.td_deque_tail) ); 
-            return NULL; 
-        } 
-    } 
- 
-    thread_data -> td.td_deque_tail = tail; 
-    TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1); 
- 
-    __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); 
- 
-    KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n", 
-                  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 
-                  thread_data->td.td_deque_tail) ); 
- 
-    task = KMP_TASKDATA_TO_TASK( taskdata ); 
-    return task; 
-} 
- 
- 
-//----------------------------------------------------------- 
-// __kmp_steal_task: remove a task from another thread's deque 
-// Assume that calling thread has already checked existence of 
-// task_team thread_data before calling this routine. 
- 
-static kmp_task_t * 
-__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team, 
-                  volatile kmp_uint32 *unfinished_threads, int *thread_finished, 
-                  kmp_int32 is_constrained ) 
-{ 
-    kmp_task_t * task; 
-    kmp_taskdata_t * taskdata; 
-    kmp_thread_data_t *victim_td, *threads_data; 
-    kmp_int32 victim_tid; 
- 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
- 
-    threads_data = task_team -> tt.tt_threads_data; 
-    KMP_DEBUG_ASSERT( threads_data != NULL );  // Caller should check this condition 
- 
-    victim_tid = victim->th.th_info.ds.ds_tid; 
-    victim_td = & threads_data[ victim_tid ]; 
- 
-    KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " 
-                  "head=%u tail=%u\n", 
-                  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, 
-                  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 
- 
-    if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition 
-         (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? 
-    { 
-        KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " 
-                      "ntasks=%d head=%u tail=%u\n", 
-                      gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, 
-                      victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 
-        return NULL; 
-    } 
- 
-    __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock ); 
- 
-    // Check again after we acquire the lock 
-    if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || 
-         (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? 
-    { 
-        __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); 
-        KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 
-                      "ntasks=%d head=%u tail=%u\n", 
-                      gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, 
-                      victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 
-        return NULL; 
-    } 
- 
-    KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL ); 
- 
-    if ( !is_constrained ) { 
-        taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ]; 
-        // Bump head pointer and Wrap. 
-        victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK; 
-    } else { 
-        // While we have postponed tasks let's steal from tail of the deque (smaller tasks) 
-        kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;  // Wrap index. 
-        taskdata = victim_td -> td.td_deque[ tail ]; 
-        // we need to check if the candidate obeys task scheduling constraint: 
-        // only child of current task can be scheduled 
-        kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task; 
-        kmp_int32        level = current->td_level; 
-        kmp_taskdata_t * parent = taskdata->td_parent; 
-        while ( parent != current && parent->td_level > level ) { 
-            parent = parent->td_parent;  // check generation up to the level of the current task 
-            KMP_DEBUG_ASSERT(parent != NULL); 
-        } 
-        if ( parent != current ) { 
-            // If the tail task is not a child, then no other childs can appear in the deque (?). 
-            __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); 
-            KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 
-                          "ntasks=%d head=%u tail=%u\n", 
-                          gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ), 
-                          task_team, victim_td->td.td_deque_ntasks, 
-                          victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 
-            return NULL; 
-        } 
-        victim_td -> td.td_deque_tail = tail; 
-    } 
-    if (*thread_finished) { 
-        // We need to un-mark this victim as a finished victim.  This must be done before 
-        // releasing the lock, or else other threads (starting with the master victim) 
-        // might be prematurely released from the barrier!!! 
-        kmp_uint32 count; 
- 
-        count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads ); 
- 
-        KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", 
-                      gtid, count + 1, task_team) ); 
- 
-        *thread_finished = FALSE; 
-    } 
-    TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1); 
- 
-    __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); 
- 
-    KMP_COUNT_BLOCK(TASK_stolen); 
-    KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " 
-                  "ntasks=%d head=%u tail=%u\n", 
-                  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team, 
-                  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, 
-                  victim_td->td.td_deque_tail) ); 
- 
-    task = KMP_TASKDATA_TO_TASK( taskdata ); 
-    return task; 
-} 
- 
- 
-//----------------------------------------------------------------------------- 
-// __kmp_execute_tasks_template: Choose and execute tasks until either the condition 
-// is statisfied (return true) or there are none left (return false). 
-// final_spin is TRUE if this is the spin at the release barrier. 
-// thread_finished indicates whether the thread is finished executing all 
-// the tasks it has on its deque, and is at the release barrier. 
-// spinner is the location on which to spin. 
-// spinner == NULL means only execute a single task and return. 
-// checker is the value to check to terminate the spin. 
-template <class C> 
-static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,  
-                                               int *thread_finished 
-                                               USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 
-{ 
-    kmp_task_team_t *     task_team; 
-    kmp_thread_data_t *   threads_data; 
-    kmp_task_t *          task; 
-    kmp_taskdata_t *      current_task = thread -> th.th_current_task; 
-    volatile kmp_uint32 * unfinished_threads; 
-    kmp_int32             nthreads, last_stolen, k, tid; 
- 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
-    KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); 
- 
-    task_team = thread -> th.th_task_team; 
-    if (task_team == NULL) return FALSE; 
- 
-    KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", 
-                  gtid, final_spin, *thread_finished) ); 
- 
-    threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); 
-    KMP_DEBUG_ASSERT( threads_data != NULL ); 
- 
-    nthreads = task_team -> tt.tt_nproc; 
-    unfinished_threads = &(task_team -> tt.tt_unfinished_threads); 
-#if OMP_41_ENABLED 
-    KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks); 
-#else 
-    KMP_DEBUG_ASSERT( nthreads > 1 ); 
-#endif 
-    KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 ); 
- 
-    // Choose tasks from our own work queue. 
-    start: 
-    while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) { 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-        if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { 
-            if ( itt_sync_obj == NULL ) { 
-                // we are at fork barrier where we could not get the object reliably 
-                itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); 
-            } 
-            __kmp_itt_task_starting( itt_sync_obj ); 
-        } 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-        __kmp_invoke_task( gtid, task, current_task ); 
-#if USE_ITT_BUILD 
-        if ( itt_sync_obj != NULL ) 
-            __kmp_itt_task_finished( itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-        // If this thread is only partway through the barrier and the condition 
-        // is met, then return now, so that the barrier gather/release pattern can proceed. 
-        // If this thread is in the last spin loop in the barrier, waiting to be 
-        // released, we know that the termination condition will not be satisified, 
-        // so don't waste any cycles checking it. 
-        if (flag == NULL || (!final_spin && flag->done_check())) { 
-            KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) ); 
-            return TRUE; 
-        } 
-        if (thread->th.th_task_team == NULL) break; 
-        KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task 
-    } 
- 
-    // This thread's work queue is empty.  If we are in the final spin loop 
-    // of the barrier, check and see if the termination condition is satisfied. 
-#if OMP_41_ENABLED 
-    // The work queue may be empty but there might be proxy tasks still executing 
-    if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)  
-#else 
-    if (final_spin)  
-#endif 
-    { 
-        // First, decrement the #unfinished threads, if that has not already 
-        // been done.  This decrement might be to the spin location, and 
-        // result in the termination condition being satisfied. 
-        if (! *thread_finished) { 
-            kmp_uint32 count; 
- 
-            count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; 
-            KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n", 
-                          gtid, count, task_team) ); 
-            *thread_finished = TRUE; 
-        } 
- 
-        // It is now unsafe to reference thread->th.th_team !!! 
-        // Decrementing task_team->tt.tt_unfinished_threads can allow the master 
-        // thread to pass through the barrier, where it might reset each thread's 
-        // th.th_team field for the next parallel region. 
-        // If we can steal more work, we know that this has not happened yet. 
-        if (flag != NULL && flag->done_check()) { 
-            KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) ); 
-            return TRUE; 
-        } 
-    } 
- 
-    if (thread->th.th_task_team == NULL) return FALSE; 
-#if OMP_41_ENABLED 
-    // check if there are other threads to steal from, otherwise go back 
-    if ( nthreads  == 1 ) 
-        goto start; 
-#endif 
- 
-    // Try to steal from the last place I stole from successfully. 
-    tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid ); 
-    last_stolen = threads_data[ tid ].td.td_deque_last_stolen; 
- 
-    if (last_stolen != -1) { 
-        kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr; 
- 
-        while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, 
-                                         thread_finished, is_constrained )) != NULL) 
-        { 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-            if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { 
-                if ( itt_sync_obj == NULL ) { 
-                    // we are at fork barrier where we could not get the object reliably 
-                    itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); 
-                } 
-                __kmp_itt_task_starting( itt_sync_obj ); 
-            } 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-            __kmp_invoke_task( gtid, task, current_task ); 
-#if USE_ITT_BUILD 
-            if ( itt_sync_obj != NULL ) 
-                __kmp_itt_task_finished( itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-            // Check to see if this thread can proceed. 
-            if (flag == NULL || (!final_spin && flag->done_check())) { 
-                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n", 
-                              gtid) ); 
-                return TRUE; 
-            } 
- 
-            if (thread->th.th_task_team == NULL) break; 
-            KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task 
-            // If the execution of the stolen task resulted in more tasks being 
-            // placed on our run queue, then restart the whole process. 
-            if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { 
-                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", 
-                              gtid) ); 
-                goto start; 
-            } 
-        } 
- 
-        // Don't give priority to stealing from this thread anymore. 
-        threads_data[ tid ].td.td_deque_last_stolen = -1; 
- 
-        // The victims's work queue is empty.  If we are in the final spin loop 
-        // of the barrier, check and see if the termination condition is satisfied. 
-#if OMP_41_ENABLED 
-        // The work queue may be empty but there might be proxy tasks still executing 
-        if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)  
-#else 
-        if (final_spin)  
-#endif 
-        { 
-            // First, decrement the #unfinished threads, if that has not already 
-            // been done.  This decrement might be to the spin location, and 
-            // result in the termination condition being satisfied. 
-            if (! *thread_finished) { 
-                kmp_uint32 count; 
- 
-                count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; 
-                KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d " 
-                              "task_team=%p\n", gtid, count, task_team) ); 
-                *thread_finished = TRUE; 
-            } 
- 
-            // If __kmp_tasking_mode != tskm_immediate_exec 
-            // then it is now unsafe to reference thread->th.th_team !!! 
-            // Decrementing task_team->tt.tt_unfinished_threads can allow the master 
-            // thread to pass through the barrier, where it might reset each thread's 
-            // th.th_team field for the next parallel region. 
-            // If we can steal more work, we know that this has not happened yet. 
-            if (flag != NULL && flag->done_check()) { 
-                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n", 
-                              gtid) ); 
-                return TRUE; 
-            } 
-        } 
-        if (thread->th.th_task_team == NULL) return FALSE; 
-    } 
- 
-    // Find a different thread to steal work from.  Pick a random thread. 
-    // My initial plan was to cycle through all the threads, and only return 
-    // if we tried to steal from every thread, and failed.  Arch says that's 
-    // not such a great idea. 
-    // GEH - need yield code in this loop for throughput library mode? 
-    new_victim: 
-    k = __kmp_get_random( thread ) % (nthreads - 1); 
-    if ( k >= thread -> th.th_info.ds.ds_tid ) { 
-        ++k;               // Adjusts random distribution to exclude self 
-    } 
-    { 
-        kmp_info_t *other_thread = threads_data[k].td.td_thr; 
-        int first; 
- 
-        // There is a slight chance that __kmp_enable_tasking() did not wake up 
-        // all threads waiting at the barrier.  If this thread is sleeping, then 
-        // wake it up.  Since we were going to pay the cache miss penalty 
-        // for referencing another thread's kmp_info_t struct anyway, the check 
-        // shouldn't cost too much performance at this point. 
-        // In extra barrier mode, tasks do not sleep at the separate tasking 
-        // barrier, so this isn't a problem. 
-        if ( ( __kmp_tasking_mode == tskm_task_teams ) && 
-             (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && 
-             (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) 
-        { 
-            __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); 
-            // A sleeping thread should not have any tasks on it's queue. 
-            // There is a slight possibility that it resumes, steals a task from 
-            // another thread, which spawns more tasks, all in the time that it takes 
-            // this thread to check => don't write an assertion that the victim's 
-            // queue is empty.  Try stealing from a different thread. 
-            goto new_victim; 
-        } 
- 
-        // Now try to steal work from the selected thread 
-        first = TRUE; 
-        while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads, 
-                                         thread_finished, is_constrained )) != NULL) 
-        { 
-#if USE_ITT_BUILD && USE_ITT_NOTIFY 
-            if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { 
-                if ( itt_sync_obj == NULL ) { 
-                    // we are at fork barrier where we could not get the object reliably 
-                    itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); 
-                } 
-                __kmp_itt_task_starting( itt_sync_obj ); 
-            } 
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 
-            __kmp_invoke_task( gtid, task, current_task ); 
-#if USE_ITT_BUILD 
-            if ( itt_sync_obj != NULL ) 
-                __kmp_itt_task_finished( itt_sync_obj ); 
-#endif /* USE_ITT_BUILD */ 
- 
-            // Try stealing from this victim again, in the future. 
-            if (first) { 
-                threads_data[ tid ].td.td_deque_last_stolen = k; 
-                first = FALSE; 
-            } 
- 
-            // Check to see if this thread can proceed. 
-            if (flag == NULL || (!final_spin && flag->done_check())) { 
-                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n", 
-                              gtid) ); 
-                return TRUE; 
-            } 
-            if (thread->th.th_task_team == NULL) break; 
-            KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task 
- 
-            // If the execution of the stolen task resulted in more tasks being 
-            // placed on our run queue, then restart the whole process. 
-            if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) { 
-                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", 
-                              gtid) ); 
-                goto start; 
-            } 
-        } 
- 
-        // The victims's work queue is empty.  If we are in the final spin loop 
-        // of the barrier, check and see if the termination condition is satisfied. 
-        // Going on and finding a new victim to steal from is expensive, as it 
-        // involves a lot of cache misses, so we definitely want to re-check the 
-        // termination condition before doing that. 
-#if OMP_41_ENABLED 
-        // The work queue may be empty but there might be proxy tasks still executing 
-        if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)  
-#else 
-        if (final_spin)  
-#endif 
-        { 
-            // First, decrement the #unfinished threads, if that has not already 
-            // been done.  This decrement might be to the spin location, and 
-            // result in the termination condition being satisfied. 
-            if (! *thread_finished) { 
-                kmp_uint32 count; 
- 
-                count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; 
-                KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; " 
-                              "task_team=%p\n", 
-                              gtid, count, task_team) ); 
-                *thread_finished = TRUE; 
-            } 
- 
-            // If __kmp_tasking_mode != tskm_immediate_exec, 
-            // then it is now unsafe to reference thread->th.th_team !!! 
-            // Decrementing task_team->tt.tt_unfinished_threads can allow the master 
-            // thread to pass through the barrier, where it might reset each thread's 
-            // th.th_team field for the next parallel region. 
-            // If we can steal more work, we know that this has not happened yet. 
-            if (flag != NULL && flag->done_check()) { 
-                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) ); 
-                return TRUE; 
-            } 
-        } 
-        if (thread->th.th_task_team == NULL) return FALSE; 
-    } 
- 
-    KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) ); 
-    return FALSE; 
-} 
- 
-int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, 
-                           int *thread_finished 
-                           USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 
-{ 
-    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished 
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 
-} 
- 
-int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, 
-                           int *thread_finished 
-                           USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 
-{ 
-    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished 
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 
-} 
- 
-int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, 
-                               int *thread_finished 
-                               USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 
-{ 
-    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished 
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 
-} 
- 
- 
- 
-//----------------------------------------------------------------------------- 
-// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the 
-// next barrier so they can assist in executing enqueued tasks. 
-// First thread in allocates the task team atomically. 
- 
-static void 
-__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ) 
-{ 
-    kmp_thread_data_t *threads_data; 
-    int nthreads, i, is_init_thread; 
- 
-    KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n", 
-                    __kmp_gtid_from_thread( this_thr ) ) ); 
- 
-    KMP_DEBUG_ASSERT(task_team != NULL); 
-    KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); 
- 
-    nthreads = task_team->tt.tt_nproc; 
-    KMP_DEBUG_ASSERT(nthreads > 0); 
-    KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); 
- 
-    // Allocate or increase the size of threads_data if necessary 
-    is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team ); 
- 
-    if (!is_init_thread) { 
-        // Some other thread already set up the array. 
-        KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", 
-                        __kmp_gtid_from_thread( this_thr ) ) ); 
-        return; 
-    } 
-    threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); 
-    KMP_DEBUG_ASSERT( threads_data != NULL ); 
- 
-    if ( ( __kmp_tasking_mode == tskm_task_teams ) && 
-         ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) ) 
-    { 
-        // Release any threads sleeping at the barrier, so that they can steal 
-        // tasks and execute them.  In extra barrier mode, tasks do not sleep 
-        // at the separate tasking barrier, so this isn't a problem. 
-        for (i = 0; i < nthreads; i++) { 
-            volatile void *sleep_loc; 
-            kmp_info_t *thread = threads_data[i].td.td_thr; 
- 
-            if (i == this_thr->th.th_info.ds.ds_tid) { 
-                continue; 
-            } 
-            // Since we haven't locked the thread's suspend mutex lock at this 
-            // point, there is a small window where a thread might be putting 
-            // itself to sleep, but hasn't set the th_sleep_loc field yet. 
-            // To work around this, __kmp_execute_tasks_template() periodically checks 
-            // see if other threads are sleeping (using the same random 
-            // mechanism that is used for task stealing) and awakens them if 
-            // they are. 
-            if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL ) 
-            { 
-                KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n", 
-                                 __kmp_gtid_from_thread( this_thr ), 
-                                 __kmp_gtid_from_thread( thread ) ) ); 
-                __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); 
-            } 
-            else { 
-                KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", 
-                                 __kmp_gtid_from_thread( this_thr ), 
-                                 __kmp_gtid_from_thread( thread ) ) ); 
-            } 
-        } 
-    } 
- 
-    KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n", 
-                    __kmp_gtid_from_thread( this_thr ) ) ); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* // TODO: Check the comment consistency 
- * Utility routines for "task teams".  A task team (kmp_task_t) is kind of 
- * like a shadow of the kmp_team_t data struct, with a different lifetime. 
- * After a child * thread checks into a barrier and calls __kmp_release() from 
- * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no 
- * longer assume that the kmp_team_t structure is intact (at any moment, the 
- * master thread may exit the barrier code and free the team data structure, 
- * and return the threads to the thread pool). 
- * 
- * This does not work with the the tasking code, as the thread is still 
- * expected to participate in the execution of any tasks that may have been 
- * spawned my a member of the team, and the thread still needs access to all 
- * to each thread in the team, so that it can steal work from it. 
- * 
- * Enter the existence of the kmp_task_team_t struct.  It employs a reference 
- * counting mechanims, and is allocated by the master thread before calling 
- * __kmp_<barrier_kind>_release, and then is release by the last thread to 
- * exit __kmp_<barrier_kind>_release at the next barrier.  I.e. the lifetimes 
- * of the kmp_task_team_t structs for consecutive barriers can overlap 
- * (and will, unless the master thread is the last thread to exit the barrier 
- * release phase, which is not typical). 
- * 
- * The existence of such a struct is useful outside the context of tasking, 
- * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro, 
- * so that any performance differences show up when comparing the 2.5 vs. 3.0 
- * libraries. 
- * 
- * We currently use the existence of the threads array as an indicator that 
- * tasks were spawned since the last barrier.  If the structure is to be 
- * useful outside the context of tasking, then this will have to change, but 
- * not settting the field minimizes the performance impact of tasking on 
- * barriers, when no explicit tasks were spawned (pushed, actually). 
- */ 
- 
- 
-static kmp_task_team_t *__kmp_free_task_teams = NULL;           // Free list for task_team data structures 
-// Lock for task team data structures 
-static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock ); 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_alloc_task_deque: 
-// Allocates a task deque for a particular thread, and initialize the necessary 
-// data structures relating to the deque.  This only happens once per thread 
-// per task team since task teams are recycled. 
-// No lock is needed during allocation since each thread allocates its own 
-// deque. 
- 
-static void 
-__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ) 
-{ 
-    __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
-    KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL ); 
- 
-    // Initialize last stolen task field to "none" 
-    thread_data -> td.td_deque_last_stolen = -1; 
- 
-    KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 ); 
-    KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 ); 
-    KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 ); 
- 
-    KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", 
-                   __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) ); 
-    // Allocate space for task deque, and zero the deque 
-    // Cannot use __kmp_thread_calloc() because threads not around for 
-    // kmp_reap_task_team( ). 
-    thread_data -> td.td_deque = (kmp_taskdata_t **) 
-            __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_free_task_deque: 
-// Deallocates a task deque for a particular thread. 
-// Happens at library deallocation so don't need to reset all thread data fields. 
- 
-static void 
-__kmp_free_task_deque( kmp_thread_data_t *thread_data ) 
-{ 
-    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
- 
-    if ( thread_data -> td.td_deque != NULL ) { 
-        TCW_4(thread_data -> td.td_deque_ntasks, 0); 
-         __kmp_free( thread_data -> td.td_deque ); 
-        thread_data -> td.td_deque = NULL; 
-    } 
-    __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 
- 
-#ifdef BUILD_TIED_TASK_STACK 
-    // GEH: Figure out what to do here for td_susp_tied_tasks 
-    if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) { 
-        __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data ); 
-    } 
-#endif // BUILD_TIED_TASK_STACK 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_realloc_task_threads_data: 
-// Allocates a threads_data array for a task team, either by allocating an initial 
-// array or enlarging an existing array.  Only the first thread to get the lock 
-// allocs or enlarges the array and re-initializes the array eleemnts. 
-// That thread returns "TRUE", the rest return "FALSE". 
-// Assumes that the new array size is given by task_team -> tt.tt_nproc. 
-// The current size is given by task_team -> tt.tt_max_threads. 
- 
-static int 
-__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ) 
-{ 
-    kmp_thread_data_t ** threads_data_p; 
-    kmp_int32            nthreads, maxthreads; 
-    int                  is_init_thread = FALSE; 
- 
-    if ( TCR_4(task_team -> tt.tt_found_tasks) ) { 
-        // Already reallocated and initialized. 
-        return FALSE; 
-    } 
- 
-    threads_data_p = & task_team -> tt.tt_threads_data; 
-    nthreads   = task_team -> tt.tt_nproc; 
-    maxthreads = task_team -> tt.tt_max_threads; 
- 
-    // All threads must lock when they encounter the first task of the implicit task 
-    // region to make sure threads_data fields are (re)initialized before used. 
-    __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 
- 
-    if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) { 
-        // first thread to enable tasking 
-        kmp_team_t *team = thread -> th.th_team; 
-        int i; 
- 
-        is_init_thread = TRUE; 
-        if ( maxthreads < nthreads ) { 
- 
-            if ( *threads_data_p != NULL ) { 
-                kmp_thread_data_t *old_data = *threads_data_p; 
-                kmp_thread_data_t *new_data = NULL; 
- 
-                KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating " 
-                               "threads data for task_team %p, new_size = %d, old_size = %d\n", 
-                               __kmp_gtid_from_thread( thread ), task_team, 
-                               nthreads, maxthreads ) ); 
-                // Reallocate threads_data to have more elements than current array 
-                // Cannot use __kmp_thread_realloc() because threads not around for 
-                // kmp_reap_task_team( ).  Note all new array entries are initialized 
-                // to zero by __kmp_allocate(). 
-                new_data = (kmp_thread_data_t *) 
-                            __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); 
-                // copy old data to new data 
-                KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t), 
-                              (void *) old_data, 
-                              maxthreads * sizeof(kmp_taskdata_t *) ); 
- 
-#ifdef BUILD_TIED_TASK_STACK 
-                // GEH: Figure out if this is the right thing to do 
-                for (i = maxthreads; i < nthreads; i++) { 
-                    kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; 
-                    __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); 
-                } 
-#endif // BUILD_TIED_TASK_STACK 
-                // Install the new data and free the old data 
-                (*threads_data_p) = new_data; 
-                __kmp_free( old_data ); 
-            } 
-            else { 
-                KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating " 
-                               "threads data for task_team %p, size = %d\n", 
-                               __kmp_gtid_from_thread( thread ), task_team, nthreads ) ); 
-                // Make the initial allocate for threads_data array, and zero entries 
-                // Cannot use __kmp_thread_calloc() because threads not around for 
-                // kmp_reap_task_team( ). 
-                *threads_data_p = (kmp_thread_data_t *) 
-                                  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); 
-#ifdef BUILD_TIED_TASK_STACK 
-                // GEH: Figure out if this is the right thing to do 
-                for (i = 0; i < nthreads; i++) { 
-                    kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; 
-                    __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); 
-                } 
-#endif // BUILD_TIED_TASK_STACK 
-            } 
-            task_team -> tt.tt_max_threads = nthreads; 
-        } 
-        else { 
-            // If array has (more than) enough elements, go ahead and use it 
-            KMP_DEBUG_ASSERT( *threads_data_p != NULL ); 
-        } 
- 
-        // initialize threads_data pointers back to thread_info structures 
-        for (i = 0; i < nthreads; i++) { 
-            kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; 
-            thread_data -> td.td_thr = team -> t.t_threads[i]; 
- 
-            if ( thread_data -> td.td_deque_last_stolen >= nthreads) { 
-                // The last stolen field survives across teams / barrier, and the number 
-                // of threads may have changed.  It's possible (likely?) that a new 
-                // parallel region will exhibit the same behavior as the previous region. 
-                thread_data -> td.td_deque_last_stolen = -1; 
-            } 
-        } 
- 
-        KMP_MB(); 
-        TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE); 
-    } 
- 
-    __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 
-    return is_init_thread; 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_free_task_threads_data: 
-// Deallocates a threads_data array for a task team, including any attached 
-// tasking deques.  Only occurs at library shutdown. 
- 
-static void 
-__kmp_free_task_threads_data( kmp_task_team_t *task_team ) 
-{ 
-    __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 
-    if ( task_team -> tt.tt_threads_data != NULL ) { 
-        int i; 
-        for (i = 0; i < task_team->tt.tt_max_threads; i++ ) { 
-            __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] ); 
-        } 
-        __kmp_free( task_team -> tt.tt_threads_data ); 
-        task_team -> tt.tt_threads_data = NULL; 
-    } 
-    __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_allocate_task_team: 
-// Allocates a task team associated with a specific team, taking it from 
-// the global task team free list if possible.  Also initializes data structures. 
- 
-static kmp_task_team_t * 
-__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team ) 
-{ 
-    kmp_task_team_t *task_team = NULL; 
-    int nthreads; 
- 
-    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n", 
-                    (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) ); 
- 
-    if (TCR_PTR(__kmp_free_task_teams) != NULL) { 
-        // Take a task team from the task team pool 
-        __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); 
-        if (__kmp_free_task_teams != NULL) { 
-            task_team = __kmp_free_task_teams; 
-            TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next); 
-            task_team -> tt.tt_next = NULL; 
-        } 
-        __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); 
-    } 
- 
-    if (task_team == NULL) { 
-        KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating " 
-                       "task team for team %p\n", 
-                       __kmp_gtid_from_thread( thread ), team ) ); 
-        // Allocate a new task team if one is not available. 
-        // Cannot use __kmp_thread_malloc() because threads not around for 
-        // kmp_reap_task_team( ). 
-        task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) ); 
-        __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 
-        //task_team -> tt.tt_threads_data = NULL;   // AC: __kmp_allocate zeroes returned memory 
-        //task_team -> tt.tt_max_threads = 0; 
-        //task_team -> tt.tt_next = NULL; 
-    } 
- 
-    TCW_4(task_team -> tt.tt_found_tasks, FALSE); 
-#if OMP_41_ENABLED 
-    TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE); 
-#endif 
-    task_team -> tt.tt_nproc = nthreads = team->t.t_nproc; 
- 
-    TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); 
-    TCW_4( task_team -> tt.tt_active, TRUE ); 
- 
-    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n", 
-                    (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) ); 
-    return task_team; 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_free_task_team: 
-// Frees the task team associated with a specific thread, and adds it 
-// to the global task team free list. 
- 
-void 
-__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ) 
-{ 
-    KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n", 
-                    thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) ); 
- 
-    // Put task team back on free list 
-    __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock ); 
- 
-    KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL ); 
-    task_team -> tt.tt_next = __kmp_free_task_teams; 
-    TCW_PTR(__kmp_free_task_teams, task_team); 
- 
-    __kmp_release_bootstrap_lock( & __kmp_task_team_lock ); 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_reap_task_teams: 
-// Free all the task teams on the task team free list. 
-// Should only be done during library shutdown. 
-// Cannot do anything that needs a thread structure or gtid since they are already gone. 
- 
-void 
-__kmp_reap_task_teams( void ) 
-{ 
-    kmp_task_team_t   *task_team; 
- 
-    if ( TCR_PTR(__kmp_free_task_teams) != NULL ) { 
-        // Free all task_teams on the free list 
-        __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); 
-        while ( ( task_team = __kmp_free_task_teams ) != NULL ) { 
-            __kmp_free_task_teams = task_team -> tt.tt_next; 
-            task_team -> tt.tt_next = NULL; 
- 
-            // Free threads_data if necessary 
-            if ( task_team -> tt.tt_threads_data != NULL ) { 
-                __kmp_free_task_threads_data( task_team ); 
-            } 
-            __kmp_free( task_team ); 
-        } 
-        __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); 
-    } 
-} 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_wait_to_unref_task_teams: 
-// Some threads could still be in the fork barrier release code, possibly 
-// trying to steal tasks.  Wait for each thread to unreference its task team. 
-// 
-void 
-__kmp_wait_to_unref_task_teams(void) 
-{ 
-    kmp_info_t *thread; 
-    kmp_uint32 spins; 
-    int done; 
- 
-    KMP_INIT_YIELD( spins ); 
- 
- 
-    for (;;) { 
-        done = TRUE; 
- 
-        // TODO: GEH - this may be is wrong because some sync would be necessary 
-        //             in case threads are added to the pool during the traversal. 
-        //             Need to verify that lock for thread pool is held when calling 
-        //             this routine. 
-        for (thread = (kmp_info_t *)__kmp_thread_pool; 
-             thread != NULL; 
-             thread = thread->th.th_next_pool) 
-        { 
-#if KMP_OS_WINDOWS 
-            DWORD exit_val; 
-#endif 
-            if ( TCR_PTR(thread->th.th_task_team) == NULL ) { 
-                KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", 
-                               __kmp_gtid_from_thread( thread ) ) ); 
-                continue; 
-            } 
-#if KMP_OS_WINDOWS 
-            // TODO: GEH - add this check for Linux* OS / OS X* as well? 
-            if (!__kmp_is_thread_alive(thread, &exit_val)) { 
-                thread->th.th_task_team = NULL; 
-                continue; 
-            } 
-#endif 
- 
-            done = FALSE;  // Because th_task_team pointer is not NULL for this thread 
- 
-            KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n", 
-                           __kmp_gtid_from_thread( thread ) ) ); 
- 
-            if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { 
-                volatile void *sleep_loc; 
-                // If the thread is sleeping, awaken it. 
-                if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) { 
-                    KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", 
-                                    __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) ); 
-                    __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); 
-                } 
-            } 
-        } 
-        if (done) { 
-            break; 
-        } 
- 
-        // If we are oversubscribed, 
-        // or have waited a bit (and library mode is throughput), yield. 
-        // Pause is in the following code. 
-        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); 
-        KMP_YIELD_SPIN( spins );        // Yields only if KMP_LIBRARY=throughput 
-    } 
- 
- 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_task_team_setup:  Create a task_team for the current team, but use 
-// an already created, unused one if it already exists. 
-void 
-__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
- 
-    // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next. 
-    // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use. 
-    if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {  
-        team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); 
-        KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n", 
-                      __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], 
-                      ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); 
-    } 
- 
-    // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is  
-    // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the 
-    // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely 
-    // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for  
-    // serialized teams. 
-    if (team->t.t_nproc > 1) { 
-        int other_team = 1 - this_thr->th.th_task_state; 
-        if (team->t.t_task_team[other_team] == NULL) { // setup other team as well 
-                team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); 
-                KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", 
-                                __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], 
-                              ((team != NULL) ? team->t.t_id : -1), other_team )); 
-        } 
-        else { // Leave the old task team struct in place for the upcoming region; adjust as needed 
-            kmp_task_team_t *task_team = team->t.t_task_team[other_team]; 
-            if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) { 
-                TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); 
-                TCW_4(task_team->tt.tt_found_tasks, FALSE); 
-#if OMP_41_ENABLED 
-                TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); 
-#endif 
-                TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc ); 
-                TCW_4(task_team->tt.tt_active, TRUE ); 
-            } 
-            // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary 
-            KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n", 
-                          __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], 
-                          ((team != NULL) ? team->t.t_id : -1), other_team )); 
-        } 
-    } 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_task_team_sync: Propagation of task team data from team to threads 
-// which happens just after the release phase of a team barrier.  This may be 
-// called by any thread, but only for teams with # threads > 1. 
- 
-void 
-__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) 
-{ 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
- 
-    // Toggle the th_task_state field, to switch which task_team this thread refers to 
-    this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; 
-    // It is now safe to propagate the task team pointer from the team struct to the current thread. 
-    TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); 
-    KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n", 
-                  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team, 
-                  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); 
-} 
- 
- 
-//-------------------------------------------------------------------------------------------- 
-// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather 
-// phase.  Only called by master thread if #threads in team > 1 or if proxy tasks were created. 
-// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0 
-// optionally as the last argument. When wait is zero, master thread does not wait for 
-// unfinished_threads to reach 0. 
-void 
-__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team 
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj) 
-                      , int wait) 
-{ 
-    kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; 
- 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 
-    KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); 
- 
-    if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { 
-        if (wait) { 
-            KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", 
-                          __kmp_gtid_from_thread(this_thr), task_team)); 
-            // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait 
-            // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. 
-            kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); 
-            flag.wait(this_thr, TRUE 
-                      USE_ITT_BUILD_ARG(itt_sync_obj)); 
-        } 
-        // Deactivate the old task team, so that the worker threads will stop referencing it while spinning. 
-        KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 
-                      "setting active to false, setting local and team's pointer to NULL\n", 
-                      __kmp_gtid_from_thread(this_thr), task_team)); 
-#if OMP_41_ENABLED 
-        KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE ); 
-        TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE ); 
-#else 
-        KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 ); 
-#endif 
-        TCW_SYNC_4( task_team->tt.tt_active, FALSE ); 
-        KMP_MB(); 
- 
-        TCW_PTR(this_thr->th.th_task_team, NULL); 
-    } 
-} 
- 
- 
-//------------------------------------------------------------------------------ 
-// __kmp_tasking_barrier: 
-// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. 
-// Internal function to execute all tasks prior to a regular barrier or a 
-// join barrier.  It is a full barrier itself, which unfortunately turns 
-// regular barriers into double barriers and join barriers into 1 1/2 
-// barriers. 
-void 
-__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) 
-{ 
-    volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; 
-    int flag = FALSE; 
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier ); 
- 
-#if USE_ITT_BUILD 
-    KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL ); 
-#endif /* USE_ITT_BUILD */ 
-    kmp_flag_32 spin_flag(spin, 0U); 
-    while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag 
-                                     USE_ITT_BUILD_ARG(NULL), 0 ) ) { 
-#if USE_ITT_BUILD 
-        // TODO: What about itt_sync_obj?? 
-        KMP_FSYNC_SPIN_PREPARE( spin ); 
-#endif /* USE_ITT_BUILD */ 
- 
-        if( TCR_4(__kmp_global.g.g_done) ) { 
-            if( __kmp_global.g.g_abort ) 
-                __kmp_abort_thread( ); 
-            break; 
-        } 
-        KMP_YIELD( TRUE );       // GH: We always yield here 
-    } 
-#if USE_ITT_BUILD 
-    KMP_FSYNC_SPIN_ACQUIRED( (void*) spin ); 
-#endif /* USE_ITT_BUILD */ 
-} 
- 
- 
-#if OMP_41_ENABLED 
- 
-/* __kmp_give_task puts a task into a given thread queue if: 
-    - the queue for that thread it was created 
-    - there's space in that queue 
- 
-    Because of this, __kmp_push_task needs to check if there's space after getting the lock 
- */ 
-static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task ) 
-{ 
-    kmp_task_team_t *   task_team = thread->th.th_task_team; 
-    kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; 
-    kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task); 
-    bool result = false; 
- 
-    KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) ); 
- 
-    // assert tasking is enabled? what if not? 
-    KMP_DEBUG_ASSERT( task_team != NULL ); 
- 
-    if (thread_data -> td.td_deque == NULL ) { 
-        // There's no queue in this thread, go find another one 
-        // We're guaranteed that at least one thread has a queue 
-        KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) ); 
-        return result; 
-    } 
- 
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) 
-    { 
-        KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); 
-        return result; 
-    } 
- 
-    __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); 
- 
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE ) 
-    { 
-        KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); 
-        goto release_and_exit; 
-    } 
- 
-    thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; 
-    // Wrap index. 
-    thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK; 
-    TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); 
- 
-    result = true; 
-    KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) ); 
- 
-release_and_exit: 
-    __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock ); 
- 
-     return result; 
-} 
- 
- 
-/* The finish of the a proxy tasks is divided in two pieces: 
-    - the top half is the one that can be done from a thread outside the team 
-    - the bottom half must be run from a them within the team 
- 
-    In order to run the bottom half the task gets queued back into one of the threads of the team. 
-    Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers. 
-    So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts: 
-    - things that can be run before queuing the bottom half 
-    - things that must be run after queuing the bottom half 
- 
-    This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this 
-    we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half. 
-*/ 
- 
-static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata ) 
-{ 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); 
- 
-    taskdata -> td_flags.complete = 1;   // mark the task as completed 
- 
-    if ( taskdata->td_taskgroup ) 
-       KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); 
- 
-    // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half 
-    TCR_4(taskdata->td_incomplete_child_tasks++); 
-} 
- 
-static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata ) 
-{ 
-    kmp_int32 children = 0; 
- 
-    // Predecrement simulated by "- 1" calculation 
-    children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; 
-    KMP_DEBUG_ASSERT( children >= 0 ); 
- 
-    // Remove the imaginary children 
-    TCR_4(taskdata->td_incomplete_child_tasks--); 
-} 
- 
-static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ) 
-{ 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); 
-    kmp_info_t * thread = __kmp_threads[ gtid ]; 
- 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half 
- 
-    // We need to wait to make sure the top half is finished 
-    // Spinning here should be ok as this should happen quickly 
-    while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ; 
- 
-    __kmp_release_deps(gtid,taskdata); 
-    __kmp_free_task_and_ancestors(gtid, taskdata, thread); 
-} 
- 
-/*! 
-@ingroup TASKING 
-@param gtid Global Thread ID of encountering thread 
-@param ptask Task which execution is completed 
- 
-Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly. 
-*/ 
-void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ) 
-{ 
-    KMP_DEBUG_ASSERT( ptask != NULL ); 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); 
-    KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) ); 
- 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); 
- 
-    __kmp_first_top_half_finish_proxy(taskdata); 
-    __kmp_second_top_half_finish_proxy(taskdata); 
-    __kmp_bottom_half_finish_proxy(gtid,ptask); 
- 
-    KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) ); 
-} 
- 
-/*! 
-@ingroup TASKING 
-@param ptask Task which execution is completed 
- 
-Execute the completation of a proxy task from a thread that could not belong to the team. 
-*/ 
-void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ) 
-{ 
-    KMP_DEBUG_ASSERT( ptask != NULL ); 
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); 
- 
-    KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) ); 
- 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); 
- 
-    __kmp_first_top_half_finish_proxy(taskdata); 
- 
-    // Enqueue task to complete bottom half completation from a thread within the corresponding team 
-    kmp_team_t * team = taskdata->td_team; 
-    kmp_int32 nthreads = team->t.t_nproc; 
-    kmp_info_t *thread; 
-    kmp_int32 k = 0; 
- 
-    do { 
-        //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here 
-        //For now we're just linearly trying to find a thread 
-        k = (k+1) % nthreads; 
-        thread = team->t.t_threads[k]; 
-    } while ( !__kmp_give_task( thread, k,  ptask ) ); 
- 
-    __kmp_second_top_half_finish_proxy(taskdata); 
- 
-    KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) ); 
-} 
- 
-#endif 
+/*
+ * kmp_tasking.c -- OpenMP 3.0 tasking support.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_itt.h"
+#include "kmp_wait_release.h"
+#include "kmp_stats.h"
+
+#if OMPT_SUPPORT
+#include "ompt-specific.h"
+#endif
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+
+/* forward declaration */
+static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
+static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
+static int  __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
+
+#ifdef OMP_41_ENABLED
+static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
+#endif
+
+static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
+    if (!flag) return;
+    // Attempt to wake up a thread: examine its type and call appropriate template
+    switch (((kmp_flag_64 *)flag)->get_type()) {
+    case flag32: __kmp_resume_32(gtid, NULL); break;
+    case flag64: __kmp_resume_64(gtid, NULL); break;
+    case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
+    }
+}
+
+#ifdef BUILD_TIED_TASK_STACK
+
+//---------------------------------------------------------------------------
+//  __kmp_trace_task_stack: print the tied tasks from the task stack in order
+//     from top do bottom
+//
+//  gtid: global thread identifier for thread containing stack
+//  thread_data: thread data for task team thread containing stack
+//  threshold: value above which the trace statement triggers
+//  location: string identifying call site of this function (for trace)
+
+static void
+__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
+{
+    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
+    kmp_taskdata_t **stack_top = task_stack -> ts_top;
+    kmp_int32 entries = task_stack -> ts_entries;
+    kmp_taskdata_t *tied_task;
+
+    KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
+                         "first_block = %p, stack_top = %p \n",
+                         location, gtid, entries, task_stack->ts_first_block, stack_top ) );
+
+    KMP_DEBUG_ASSERT( stack_top != NULL );
+    KMP_DEBUG_ASSERT( entries > 0 );
+
+    while ( entries != 0 )
+    {
+        KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
+        // fix up ts_top if we need to pop from previous block
+        if ( entries & TASK_STACK_INDEX_MASK == 0 )
+        {
+            kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
+
+            stack_block = stack_block -> sb_prev;
+            stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
+        }
+
+        // finish bookkeeping
+        stack_top--;
+        entries--;
+
+        tied_task = * stack_top;
+
+        KMP_DEBUG_ASSERT( tied_task != NULL );
+        KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
+
+        KA_TRACE(threshold, ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
+                             "stack_top=%p, tied_task=%p\n",
+                             location, gtid, entries, stack_top, tied_task ) );
+    }
+    KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
+
+    KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
+                         location, gtid ) );
+}
+
+//---------------------------------------------------------------------------
+//  __kmp_init_task_stack: initialize the task stack for the first time
+//    after a thread_data structure is created.
+//    It should not be necessary to do this again (assuming the stack works).
+//
+//  gtid: global thread identifier of calling thread
+//  thread_data: thread data for task team thread containing stack
+
+static void
+__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
+{
+    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
+    kmp_stack_block_t *first_block;
+
+    // set up the first block of the stack
+    first_block = & task_stack -> ts_first_block;
+    task_stack -> ts_top = (kmp_taskdata_t **) first_block;
+    memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
+
+    // initialize the stack to be empty
+    task_stack  -> ts_entries = TASK_STACK_EMPTY;
+    first_block -> sb_next = NULL;
+    first_block -> sb_prev = NULL;
+}
+
+
+//---------------------------------------------------------------------------
+//  __kmp_free_task_stack: free the task stack when thread_data is destroyed.
+//
+//  gtid: global thread identifier for calling thread
+//  thread_data: thread info for thread containing stack
+
+static void
+__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
+{
+    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
+    kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
+
+    KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
+    // free from the second block of the stack
+    while ( stack_block != NULL ) {
+        kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
+
+        stack_block -> sb_next = NULL;
+        stack_block -> sb_prev = NULL;
+        if (stack_block != & task_stack -> ts_first_block) {
+            __kmp_thread_free( thread, stack_block );  // free the block, if not the first
+        }
+        stack_block = next_block;
+    }
+    // initialize the stack to be empty
+    task_stack -> ts_entries = 0;
+    task_stack -> ts_top = NULL;
+}
+
+
+//---------------------------------------------------------------------------
+//  __kmp_push_task_stack: Push the tied task onto the task stack.
+//     Grow the stack if necessary by allocating another block.
+//
+//  gtid: global thread identifier for calling thread
+//  thread: thread info for thread containing stack
+//  tied_task: the task to push on the stack
+
+static void
+__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
+{
+    // GEH - need to consider what to do if tt_threads_data not allocated yet
+    kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
+                                        tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
+    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
+
+    if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
+        return;  // Don't push anything on stack if team or team tasks are serialized
+    }
+
+    KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
+    KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
+
+    KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
+                  gtid, thread, tied_task ) );
+    // Store entry
+    * (task_stack -> ts_top) = tied_task;
+
+    // Do bookkeeping for next push
+    task_stack -> ts_top++;
+    task_stack -> ts_entries++;
+
+    if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
+    {
+        // Find beginning of this task block
+        kmp_stack_block_t *stack_block =
+             (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
+
+        // Check if we already have a block
+        if ( stack_block -> sb_next != NULL )
+        {    // reset ts_top to beginning of next block
+            task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
+        }
+        else
+        {   // Alloc new block and link it up
+            kmp_stack_block_t *new_block = (kmp_stack_block_t *)
+              __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
+
+            task_stack -> ts_top  = & new_block -> sb_block[0];
+            stack_block -> sb_next = new_block;
+            new_block  -> sb_prev = stack_block;
+            new_block  -> sb_next = NULL;
+
+            KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
+                          gtid, tied_task, new_block ) );
+        }
+    }
+    KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
+}
+
+//---------------------------------------------------------------------------
+//  __kmp_pop_task_stack: Pop the tied task from the task stack.  Don't return
+//     the task, just check to make sure it matches the ending task passed in.
+//
+//  gtid: global thread identifier for the calling thread
+//  thread: thread info structure containing stack
+//  tied_task: the task popped off the stack
+//  ending_task: the task that is ending (should match popped task)
+
+static void
+__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
+{
+    // GEH - need to consider what to do if tt_threads_data not allocated yet
+    kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
+    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
+    kmp_taskdata_t *tied_task;
+
+    if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
+        return;  // Don't pop anything from stack if team or team tasks are serialized
+    }
+
+    KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
+    KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
+
+    KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
+
+    // fix up ts_top if we need to pop from previous block
+    if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
+    {
+        kmp_stack_block_t *stack_block =
+           (kmp_stack_block_t *) (task_stack -> ts_top) ;
+
+        stack_block = stack_block -> sb_prev;
+        task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
+    }
+
+    // finish bookkeeping
+    task_stack -> ts_top--;
+    task_stack -> ts_entries--;
+
+    tied_task = * (task_stack -> ts_top );
+
+    KMP_DEBUG_ASSERT( tied_task != NULL );
+    KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
+    KMP_DEBUG_ASSERT( tied_task == ending_task );  // If we built the stack correctly
+
+    KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
+    return;
+}
+#endif /* BUILD_TIED_TASK_STACK */
+
+//---------------------------------------------------
+//  __kmp_push_task: Add a task to the thread's deque
+
+static kmp_int32
+__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
+{
+    kmp_info_t *        thread = __kmp_threads[ gtid ];
+    kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task);
+    kmp_task_team_t *   task_team = thread->th.th_task_team;
+    kmp_int32           tid = __kmp_tid_from_gtid( gtid );
+    kmp_thread_data_t * thread_data;
+
+    KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
+
+    // The first check avoids building task_team thread data if serialized
+    if ( taskdata->td_flags.task_serial ) {
+        KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
+                       gtid, taskdata ) );
+        return TASK_NOT_PUSHED;
+    }
+
+    // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+    if ( ! KMP_TASKING_ENABLED(task_team) ) {
+         __kmp_enable_tasking( task_team, thread );
+    }
+    KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
+    KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
+
+    // Find tasking deque specific to encountering thread
+    thread_data = & task_team -> tt.tt_threads_data[ tid ];
+
+    // No lock needed since only owner can allocate
+    if (thread_data -> td.td_deque == NULL ) {
+        __kmp_alloc_task_deque( thread, thread_data );
+    }
+
+    // Check if deque is full
+    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
+    {
+        KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
+                       gtid, taskdata ) );
+        return TASK_NOT_PUSHED;
+    }
+
+    // Lock the deque for the task push operation
+    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
+
+#if OMP_41_ENABLED
+    // Need to recheck as we can get a proxy task from a thread outside of OpenMP
+    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
+    {
+        __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+        KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
+                       gtid, taskdata ) );
+        return TASK_NOT_PUSHED;
+    }
+#else
+    // Must have room since no thread can add tasks but calling thread
+    KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
+#endif
+
+    thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;  // Push taskdata
+    // Wrap index.
+    thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
+    TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);             // Adjust task count
+
+    __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+
+    KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
+                  "task=%p ntasks=%d head=%u tail=%u\n",
+                  gtid, taskdata, thread_data->td.td_deque_ntasks,
+                  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
+
+    return TASK_SUCCESSFULLY_PUSHED;
+}
+
+
+//-----------------------------------------------------------------------------------------
+// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
+// this_thr: thread structure to set current_task in.
+
+void
+__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
+{
+    KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
+                   "curtask_parent=%p\n",
+                   0, this_thr, this_thr -> th.th_current_task,
+                   this_thr -> th.th_current_task -> td_parent ) );
+
+    this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
+
+    KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
+                   "curtask_parent=%p\n",
+                   0, this_thr, this_thr -> th.th_current_task,
+                   this_thr -> th.th_current_task -> td_parent ) );
+}
+
+
+//---------------------------------------------------------------------------------------
+// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
+// this_thr: thread structure to set up
+// team: team for implicit task data
+// tid: thread within team to set up
+
+void
+__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
+{
+    // current task of the thread is a parent of the new just created implicit tasks of new team
+    KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
+                    "parent_task=%p\n",
+                    tid, this_thr, this_thr->th.th_current_task,
+                    team->t.t_implicit_task_taskdata[tid].td_parent ) );
+
+    KMP_DEBUG_ASSERT (this_thr != NULL);
+
+    if( tid == 0 ) {
+        if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
+            team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
+            this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
+        }
+    } else {
+        team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
+        this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
+    }
+
+    KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
+                    "parent_task=%p\n",
+                    tid, this_thr, this_thr->th.th_current_task,
+                    team->t.t_implicit_task_taskdata[tid].td_parent ) );
+}
+
+
+//----------------------------------------------------------------------
+// __kmp_task_start: bookkeeping for a task starting execution
+// GTID: global thread id of calling thread
+// task: task starting execution
+// current_task: task suspending
+
+static void
+__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
+{
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+    kmp_info_t * thread = __kmp_threads[ gtid ];
+
+    KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
+                  gtid, taskdata, current_task) );
+
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+
+    // mark currently executing task as suspended
+    // TODO: GEH - make sure root team implicit task is initialized properly.
+    // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
+    current_task -> td_flags.executing = 0;
+
+    // Add task to stack if tied
+#ifdef BUILD_TIED_TASK_STACK
+    if ( taskdata -> td_flags.tiedness == TASK_TIED )
+    {
+        __kmp_push_task_stack( gtid, thread, taskdata );
+    }
+#endif /* BUILD_TIED_TASK_STACK */
+
+    // mark starting task as executing and as current task
+    thread -> th.th_current_task = taskdata;
+
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
+    taskdata -> td_flags.started = 1;
+    taskdata -> td_flags.executing = 1;
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
+
+    // GEH TODO: shouldn't we pass some sort of location identifier here?
+    // APT: yes, we will pass location here.
+    // need to store current thread state (in a thread or taskdata structure)
+    // before setting work_state, otherwise wrong state is set after end of task
+
+    KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
+                  gtid, taskdata ) );
+
+#if OMPT_SUPPORT
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
+        kmp_taskdata_t *parent = taskdata->td_parent;
+        ompt_callbacks.ompt_callback(ompt_event_task_begin)(
+            parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
+            parent ? &(parent->ompt_task_info.frame) : NULL,
+            taskdata->ompt_task_info.task_id,
+            taskdata->ompt_task_info.function);
+    }
+#endif
+
+    return;
+}
+
+
+//----------------------------------------------------------------------
+// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
+// loc_ref: source location information; points to beginning of task block.
+// gtid: global thread number.
+// task: task thunk for the started task.
+
+void
+__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
+{
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+    kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
+
+    KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
+                  gtid, loc_ref, taskdata, current_task ) );
+
+    taskdata -> td_flags.task_serial = 1;  // Execute this task immediately, not deferred.
+    __kmp_task_start( gtid, task, current_task );
+
+    KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
+                  gtid, loc_ref, taskdata ) );
+
+    return;
+}
+
+#ifdef TASK_UNUSED
+//----------------------------------------------------------------------
+// __kmpc_omp_task_begin: report that a given task has started execution
+// NEVER GENERATED BY COMPILER, DEPRECATED!!!
+
+void
+__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
+{
+    kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
+
+    KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
+                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
+
+    __kmp_task_start( gtid, task, current_task );
+
+    KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
+                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
+
+    return;
+}
+#endif // TASK_UNUSED
+
+
+//-------------------------------------------------------------------------------------
+// __kmp_free_task: free the current task space and the space for shareds
+// gtid: Global thread ID of calling thread
+// taskdata: task to free
+// thread: thread data structure of caller
+
+static void
+__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
+{
+    KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
+                  gtid, taskdata) );
+
+    // Check to make sure all flags and counters have the correct values
+    KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
+    KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
+    KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
+    KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
+    KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0  || taskdata->td_flags.task_serial == 1);
+    KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
+
+    taskdata->td_flags.freed = 1;
+    // deallocate the taskdata and shared variable blocks associated with this task
+    #if USE_FAST_MEMORY
+        __kmp_fast_free( thread, taskdata );
+    #else /* ! USE_FAST_MEMORY */
+        __kmp_thread_free( thread, taskdata );
+    #endif
+
+    KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
+                  gtid, taskdata) );
+}
+
+//-------------------------------------------------------------------------------------
+// __kmp_free_task_and_ancestors: free the current task and ancestors without children
+//
+// gtid: Global thread ID of calling thread
+// taskdata: task to free
+// thread: thread data structure of caller
+
+static void
+__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
+{
+    kmp_int32 children = 0;
+    kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
+
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+
+    if ( !team_or_tasking_serialized ) {
+        children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
+        KMP_DEBUG_ASSERT( children >= 0 );
+    }
+
+    // Now, go up the ancestor tree to see if any ancestors can now be freed.
+    while ( children == 0 )
+    {
+        kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
+
+        KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
+                      "and freeing itself\n", gtid, taskdata) );
+
+        // --- Deallocate my ancestor task ---
+        __kmp_free_task( gtid, taskdata, thread );
+
+        taskdata = parent_taskdata;
+
+        // Stop checking ancestors at implicit task or if tasking serialized
+        // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
+        if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
+            return;
+
+        if ( !team_or_tasking_serialized ) {
+            // Predecrement simulated by "- 1" calculation
+            children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
+            KMP_DEBUG_ASSERT( children >= 0 );
+        }
+    }
+
+    KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
+                  "not freeing it yet\n", gtid, taskdata, children) );
+}
+
+//---------------------------------------------------------------------
+// __kmp_task_finish: bookkeeping to do when a task finishes execution
+// gtid: global thread ID for calling thread
+// task: task to be finished
+// resumed_task: task to be resumed.  (may be NULL if task is serialized)
+
+static void
+__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
+{
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+    kmp_info_t * thread = __kmp_threads[ gtid ];
+    kmp_int32 children = 0;
+
+#if OMPT_SUPPORT
+    if (ompt_enabled &&
+        ompt_callbacks.ompt_callback(ompt_event_task_end)) {
+        kmp_taskdata_t *parent = taskdata->td_parent;
+        ompt_callbacks.ompt_callback(ompt_event_task_end)(
+            taskdata->ompt_task_info.task_id);
+    }
+#endif
+
+    KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
+                  gtid, taskdata, resumed_task) );
+
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+
+    // Pop task from stack if tied
+#ifdef BUILD_TIED_TASK_STACK
+    if ( taskdata -> td_flags.tiedness == TASK_TIED )
+    {
+        __kmp_pop_task_stack( gtid, thread, taskdata );
+    }
+#endif /* BUILD_TIED_TASK_STACK */
+
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
+    taskdata -> td_flags.complete = 1;   // mark the task as completed
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
+
+    // Only need to keep track of count if team parallel and tasking not serialized
+    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
+        // Predecrement simulated by "- 1" calculation
+        children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
+        KMP_DEBUG_ASSERT( children >= 0 );
+#if OMP_40_ENABLED
+        if ( taskdata->td_taskgroup )
+            KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
+        __kmp_release_deps(gtid,taskdata);
+#endif
+    }
+
+    // td_flags.executing  must be marked as 0 after __kmp_release_deps has been called
+    // Othertwise, if a task is executed immediately from the release_deps code
+    // the flag will be reset to 1 again by this same function
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
+    taskdata -> td_flags.executing = 0;  // suspend the finishing task
+
+    KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
+                  gtid, taskdata, children) );
+
+#if OMP_40_ENABLED
+    /* If the tasks' destructor thunk flag has been set, we need to invoke the
+       destructor thunk that has been generated by the compiler.
+       The code is placed here, since at this point other tasks might have been released
+       hence overlapping the destructor invokations with some other work in the
+       released tasks.  The OpenMP spec is not specific on when the destructors are
+       invoked, so we should be free to choose.
+     */
+    if (taskdata->td_flags.destructors_thunk) {
+        kmp_routine_entry_t destr_thunk = task->destructors;
+        KMP_ASSERT(destr_thunk);
+        destr_thunk(gtid, task);
+    }
+#endif // OMP_40_ENABLED
+
+    // bookkeeping for resuming task:
+    // GEH - note tasking_ser => task_serial
+    KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
+                       taskdata->td_flags.task_serial);
+    if ( taskdata->td_flags.task_serial )
+    {
+        if (resumed_task == NULL) {
+            resumed_task = taskdata->td_parent;  // In a serialized task, the resumed task is the parent
+        }
+        else {
+            // verify resumed task passed in points to parent
+            KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
+        }
+    }
+    else {
+        KMP_DEBUG_ASSERT( resumed_task != NULL );        // verify that resumed task is passed as arguemnt
+    }
+
+    // Free this task and then ancestor tasks if they have no children.
+    __kmp_free_task_and_ancestors(gtid, taskdata, thread);
+
+    // FIXME johnmc: I this statement should be before the last one so if an
+    // asynchronous inquiry peers into the runtime system it doesn't see the freed
+    // task as the current task
+    __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
+
+    // TODO: GEH - make sure root team implicit task is initialized properly.
+    // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
+    resumed_task->td_flags.executing = 1;  // resume previous task
+
+    KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
+                  gtid, taskdata, resumed_task) );
+
+    return;
+}
+
+//---------------------------------------------------------------------
+// __kmpc_omp_task_complete_if0: report that a task has completed execution
+// loc_ref: source location information; points to end of task block.
+// gtid: global thread number.
+// task: task thunk for the completed task.
+
+void
+__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
+{
+    KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
+
+    __kmp_task_finish( gtid, task, NULL );  // this routine will provide task to resume
+
+    KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
+
+    return;
+}
+
+#ifdef TASK_UNUSED
+//---------------------------------------------------------------------
+// __kmpc_omp_task_complete: report that a task has completed execution
+// NEVER GENERATED BY COMPILER, DEPRECATED!!!
+
+void
+__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
+{
+    KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
+
+    __kmp_task_finish( gtid, task, NULL );  // Not sure how to find task to resume
+
+    KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
+    return;
+}
+#endif // TASK_UNUSED
+
+
+#if OMPT_SUPPORT
+//----------------------------------------------------------------------------------------------------
+// __kmp_task_init_ompt:
+//   Initialize OMPT fields maintained by a task. This will only be called after
+//   ompt_tool, so we already know whether ompt is enabled or not.
+
+static inline void
+__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
+{
+    if (ompt_enabled) {
+        task->ompt_task_info.task_id = __ompt_task_id_new(tid);
+        task->ompt_task_info.function = function;
+        task->ompt_task_info.frame.exit_runtime_frame = NULL;
+        task->ompt_task_info.frame.reenter_runtime_frame = NULL;
+    }
+}
+#endif
+
+
+//----------------------------------------------------------------------------------------------------
+// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
+//
+// loc_ref:  reference to source location of parallel region
+// this_thr:  thread data structure corresponding to implicit task
+// team: team for this_thr
+// tid: thread id of given thread within team
+// set_curr_task: TRUE if need to push current task to thread
+// NOTE: Routine does not set up the implicit task ICVS.  This is assumed to have already been done elsewhere.
+// TODO: Get better loc_ref.  Value passed in may be NULL
+
+void
+__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
+{
+    kmp_taskdata_t * task   = & team->t.t_implicit_task_taskdata[ tid ];
+
+    KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
+                  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
+
+    task->td_task_id  = KMP_GEN_TASK_ID();
+    task->td_team     = team;
+//    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info in debugger)
+    task->td_ident    = loc_ref;
+    task->td_taskwait_ident   = NULL;
+    task->td_taskwait_counter = 0;
+    task->td_taskwait_thread  = 0;
+
+    task->td_flags.tiedness    = TASK_TIED;
+    task->td_flags.tasktype    = TASK_IMPLICIT;
+#if OMP_41_ENABLED
+    task->td_flags.proxy       = TASK_FULL;
+#endif
+
+    // All implicit tasks are executed immediately, not deferred
+    task->td_flags.task_serial = 1;
+    task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
+    task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
+
+    task->td_flags.started     = 1;
+    task->td_flags.executing   = 1;
+    task->td_flags.complete    = 0;
+    task->td_flags.freed       = 0;
+
+#if OMP_40_ENABLED
+    task->td_dephash = NULL;
+    task->td_depnode = NULL;
+#endif
+
+    if (set_curr_task) {  // only do this initialization the first time a thread is created
+        task->td_incomplete_child_tasks = 0;
+        task->td_allocated_child_tasks  = 0; // Not used because do not need to deallocate implicit task
+#if OMP_40_ENABLED
+        task->td_taskgroup = NULL;           // An implicit task does not have taskgroup
+#endif
+        __kmp_push_current_task_to_thread( this_thr, team, tid );
+    } else {
+        KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
+        KMP_DEBUG_ASSERT(task->td_allocated_child_tasks  == 0);
+    }
+
+#if OMPT_SUPPORT
+    __kmp_task_init_ompt(task, tid, NULL);
+#endif
+
+    KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
+                  tid, team, task ) );
+}
+
+// Round up a size to a power of two specified by val
+// Used to insert padding between structures co-allocated using a single malloc() call
+static size_t
+__kmp_round_up_to_val( size_t size, size_t val ) {
+    if ( size & ( val - 1 ) ) {
+        size &= ~ ( val - 1 );
+        if ( size <= KMP_SIZE_T_MAX - val ) {
+            size += val;    // Round up if there is no overflow.
+        }; // if
+    }; // if
+    return size;
+} // __kmp_round_up_to_va
+
+
+//---------------------------------------------------------------------------------
+// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
+//
+// loc_ref: source location information
+// gtid: global thread number.
+// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
+//        Converted from kmp_int32 to kmp_tasking_flags_t in routine.
+// sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including private vars accessed in task.
+// sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed in task.
+// task_entry: Pointer to task code entry point generated by compiler.
+// returns: a pointer to the allocated kmp_task_t structure (task).
+
+kmp_task_t *
+__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
+                  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                  kmp_routine_entry_t task_entry )
+{
+    kmp_task_t *task;
+    kmp_taskdata_t *taskdata;
+    kmp_info_t *thread = __kmp_threads[ gtid ];
+    kmp_team_t *team = thread->th.th_team;
+    kmp_taskdata_t *parent_task = thread->th.th_current_task;
+    size_t shareds_offset;
+
+    KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
+                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+                  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
+                  sizeof_shareds, task_entry) );
+
+    if ( parent_task->td_flags.final ) {
+        if (flags->merged_if0) {
+        }
+        flags->final = 1;
+    }
+
+#if OMP_41_ENABLED
+    if ( flags->proxy == TASK_PROXY ) {
+        flags->tiedness = TASK_UNTIED;
+        flags->merged_if0 = 1;
+
+        /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
+        if ( (thread->th.th_task_team) == NULL ) {
+            /* This should only happen if the team is serialized
+                setup a task team and propagate it to the thread
+            */
+            KMP_DEBUG_ASSERT(team->t.t_serialized);
+            KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
+            __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
+            thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
+        }
+        kmp_task_team_t * task_team = thread->th.th_task_team;
+
+        /* tasking must be enabled now as the task might not be pushed */
+        if ( !KMP_TASKING_ENABLED( task_team ) ) {
+            KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
+            __kmp_enable_tasking( task_team, thread );
+            kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+            kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
+            // No lock needed since only owner can allocate
+            if (thread_data -> td.td_deque == NULL ) {
+                __kmp_alloc_task_deque( thread, thread_data );
+            }
+        }
+
+        if ( task_team->tt.tt_found_proxy_tasks == FALSE )
+          TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
+    }
+#endif
+
+    // Calculate shared structure offset including padding after kmp_task_t struct
+    // to align pointers in shared struct
+    shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
+    shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
+
+    // Allocate a kmp_taskdata_t block and a kmp_task_t block.
+    KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
+                  gtid, shareds_offset) );
+    KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
+                  gtid, sizeof_shareds) );
+
+    // Avoid double allocation here by combining shareds with taskdata
+    #if USE_FAST_MEMORY
+    taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
+    #else /* ! USE_FAST_MEMORY */
+    taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
+    #endif /* USE_FAST_MEMORY */
+
+    task                      = KMP_TASKDATA_TO_TASK(taskdata);
+
+    // Make sure task & taskdata are aligned appropriately
+#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
+    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
+    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
+#else
+    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
+    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
+#endif
+    if (sizeof_shareds > 0) {
+        // Avoid double allocation here by combining shareds with taskdata
+        task->shareds         = & ((char *) taskdata)[ shareds_offset ];
+        // Make sure shareds struct is aligned to pointer size
+        KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
+    } else {
+        task->shareds         = NULL;
+    }
+    task->routine             = task_entry;
+    task->part_id             = 0;      // AC: Always start with 0 part id
+
+    taskdata->td_task_id      = KMP_GEN_TASK_ID();
+    taskdata->td_team         = team;
+    taskdata->td_alloc_thread = thread;
+    taskdata->td_parent       = parent_task;
+    taskdata->td_level        = parent_task->td_level + 1; // increment nesting level
+    taskdata->td_ident        = loc_ref;
+    taskdata->td_taskwait_ident   = NULL;
+    taskdata->td_taskwait_counter = 0;
+    taskdata->td_taskwait_thread  = 0;
+    KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
+#if OMP_41_ENABLED
+    // avoid copying icvs for proxy tasks
+    if ( flags->proxy == TASK_FULL )
+#endif
+       copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
+
+    taskdata->td_flags.tiedness    = flags->tiedness;
+    taskdata->td_flags.final       = flags->final;
+    taskdata->td_flags.merged_if0  = flags->merged_if0;
+#if OMP_40_ENABLED
+    taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
+#endif // OMP_40_ENABLED
+#if OMP_41_ENABLED
+    taskdata->td_flags.proxy           = flags->proxy;
+#endif
+    taskdata->td_flags.tasktype    = TASK_EXPLICIT;
+
+    // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
+    taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
+
+    // GEH - TODO: fix this to copy parent task's value of team_serial flag
+    taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
+
+    // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
+    //       tasks are not left until program termination to execute.  Also, it helps locality to execute
+    //       immediately.
+    taskdata->td_flags.task_serial = ( parent_task->td_flags.final
+      || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
+
+    taskdata->td_flags.started     = 0;
+    taskdata->td_flags.executing   = 0;
+    taskdata->td_flags.complete    = 0;
+    taskdata->td_flags.freed       = 0;
+
+    taskdata->td_flags.native      = flags->native;
+
+    taskdata->td_incomplete_child_tasks = 0;
+    taskdata->td_allocated_child_tasks  = 1; // start at one because counts current task and children
+#if OMP_40_ENABLED
+    taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
+    taskdata->td_dephash = NULL;
+    taskdata->td_depnode = NULL;
+#endif
+
+    // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
+#if OMP_41_ENABLED
+    if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) 
+#else
+    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) 
+#endif
+    {
+        KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
+#if OMP_40_ENABLED
+        if ( parent_task->td_taskgroup )
+            KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
+#endif
+        // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
+        if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
+            KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
+        }
+    }
+
+    KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
+                  gtid, taskdata, taskdata->td_parent) );
+
+#if OMPT_SUPPORT
+    __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
+#endif
+
+    return task;
+}
+
+
+kmp_task_t *
+__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+                       size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                       kmp_routine_entry_t task_entry )
+{
+    kmp_task_t *retval;
+    kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
+
+    input_flags->native = FALSE;
+    // __kmp_task_alloc() sets up all other runtime flags
+
+#if OMP_41_ENABLED
+    KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
+                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+                  gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
+                  input_flags->proxy ? "proxy" : "",
+                  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
+#else
+    KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
+                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+                  gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
+                  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
+#endif
+
+    retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
+                               sizeof_shareds, task_entry );
+
+    KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
+
+    return retval;
+}
+
+//-----------------------------------------------------------
+//  __kmp_invoke_task: invoke the specified task
+//
+// gtid: global thread ID of caller
+// task: the task to invoke
+// current_task: the task to resume after task invokation
+
+static void
+__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
+{
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+#if OMP_40_ENABLED
+    int discard = 0 /* false */;
+#endif
+    KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
+                  gtid, taskdata, current_task) );
+    KMP_DEBUG_ASSERT(task);
+#if OMP_41_ENABLED
+    if ( taskdata->td_flags.proxy == TASK_PROXY &&
+         taskdata->td_flags.complete == 1)
+         {
+            // This is a proxy task that was already completed but it needs to run
+            // its bottom-half finish
+            KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
+                  gtid, taskdata) );
+
+            __kmp_bottom_half_finish_proxy(gtid,task);
+
+            KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
+
+            return;
+         }
+#endif
+
+#if OMP_41_ENABLED
+    // Proxy tasks are not handled by the runtime
+    if ( taskdata->td_flags.proxy != TASK_PROXY )
+#endif
+    __kmp_task_start( gtid, task, current_task );
+
+#if OMPT_SUPPORT
+    ompt_thread_info_t oldInfo;
+    kmp_info_t * thread;
+    if (ompt_enabled) {
+        // Store the threads states and restore them after the task
+        thread = __kmp_threads[ gtid ];
+        oldInfo = thread->th.ompt_thread_info;
+        thread->th.ompt_thread_info.wait_id = 0;
+        thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+        taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
+    }
+#endif
+
+#if OMP_40_ENABLED
+    // TODO: cancel tasks if the parallel region has also been cancelled
+    // TODO: check if this sequence can be hoisted above __kmp_task_start
+    // if cancellation has been enabled for this run ...
+    if (__kmp_omp_cancellation) {
+        kmp_info_t *this_thr = __kmp_threads [ gtid ];
+        kmp_team_t * this_team = this_thr->th.th_team;
+        kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
+        if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+            KMP_COUNT_BLOCK(TASK_cancelled);
+            // this task belongs to a task group and we need to cancel it
+            discard = 1 /* true */;
+        }
+    }
+
+    //
+    // Invoke the task routine and pass in relevant data.
+    // Thunks generated by gcc take a different argument list.
+    //
+    if (!discard) {
+        KMP_COUNT_BLOCK(TASK_executed);
+        KMP_TIME_BLOCK (TASK_execution);
+#endif // OMP_40_ENABLED
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        /* let OMPT know that we're about to run this task */
+        if (ompt_enabled &&
+             ompt_callbacks.ompt_callback(ompt_event_task_switch))
+        {
+          ompt_callbacks.ompt_callback(ompt_event_task_switch)(
+            current_task->ompt_task_info.task_id,
+            taskdata->ompt_task_info.task_id);
+        }
+#endif
+
+#ifdef KMP_GOMP_COMPAT
+        if (taskdata->td_flags.native) {
+            ((void (*)(void *))(*(task->routine)))(task->shareds);
+        }
+        else
+#endif /* KMP_GOMP_COMPAT */
+        {
+            (*(task->routine))(gtid, task);
+        }
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        /* let OMPT know that we're returning to the callee task */
+        if (ompt_enabled &&
+             ompt_callbacks.ompt_callback(ompt_event_task_switch))
+        {
+          ompt_callbacks.ompt_callback(ompt_event_task_switch)(
+            taskdata->ompt_task_info.task_id,
+            current_task->ompt_task_info.task_id);
+        }
+#endif
+
+#if OMP_40_ENABLED
+    }
+#endif // OMP_40_ENABLED
+
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        thread->th.ompt_thread_info = oldInfo;
+        taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
+    }
+#endif
+
+#if OMP_41_ENABLED
+    // Proxy tasks are not handled by the runtime
+    if ( taskdata->td_flags.proxy != TASK_PROXY )
+#endif
+       __kmp_task_finish( gtid, task, current_task );
+
+    KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
+                  gtid, taskdata, current_task) );
+    return;
+}
+
+//-----------------------------------------------------------------------
+// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
+//
+// loc_ref: location of original task pragma (ignored)
+// gtid: Global Thread ID of encountering thread
+// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
+// Returns:
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
+
+kmp_int32
+__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
+{
+    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+
+    KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, new_taskdata ) );
+
+    /* Should we execute the new task or queue it?   For now, let's just always try to
+       queue it.  If the queue fills up, then we'll execute it.  */
+
+    if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+    {                                                           // Execute this task immediately
+        kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
+        new_taskdata->td_flags.task_serial = 1;
+        __kmp_invoke_task( gtid, new_task, current_task );
+    }
+
+    KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
+                  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
+                  new_taskdata ) );
+
+    return TASK_CURRENT_NOT_QUEUED;
+}
+
+//---------------------------------------------------------------------
+// __kmp_omp_task: Schedule a non-thread-switchable task for execution
+// gtid: Global Thread ID of encountering thread
+// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
+// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
+// returns:
+//
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
+kmp_int32
+__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
+{
+    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
+            __builtin_frame_address(0);
+    }
+#endif
+
+    /* Should we execute the new task or queue it?   For now, let's just always try to
+       queue it.  If the queue fills up, then we'll execute it.  */
+#if OMP_41_ENABLED
+    if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+#else
+    if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+#endif
+    {                                                           // Execute this task immediately
+        kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
+        if ( serialize_immediate )
+          new_taskdata -> td_flags.task_serial = 1;
+        __kmp_invoke_task( gtid, new_task, current_task );
+    }
+
+#if OMPT_SUPPORT
+    if (ompt_enabled) {
+        new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
+    }
+#endif
+
+    return TASK_CURRENT_NOT_QUEUED;
+}
+
+//---------------------------------------------------------------------
+// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
+// the parent thread only!
+// loc_ref: location of original task pragma (ignored)
+// gtid: Global Thread ID of encountering thread
+// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
+// returns:
+//
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
+
+kmp_int32
+__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
+{
+    kmp_int32 res;
+
+#if KMP_DEBUG
+    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+#endif
+    KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
+                  gtid, loc_ref, new_taskdata ) );
+
+    res =  __kmp_omp_task(gtid,new_task,true);
+
+    KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
+                  gtid, loc_ref, new_taskdata ) );
+    return res;
+}
+
+//-------------------------------------------------------------------------------------
+// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
+
+kmp_int32
+__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
+{
+    kmp_taskdata_t * taskdata;
+    kmp_info_t * thread;
+    int thread_finished = FALSE;
+
+    KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
+
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
+
+        thread = __kmp_threads[ gtid ];
+        taskdata = thread -> th.th_current_task;
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        ompt_task_id_t my_task_id;
+        ompt_parallel_id_t my_parallel_id;
+        
+        if (ompt_enabled) {
+            kmp_team_t *team = thread->th.th_team;
+            my_task_id = taskdata->ompt_task_info.task_id;
+            my_parallel_id = team->t.ompt_team_info.parallel_id;
+            
+            if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
+                ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
+                                my_parallel_id, my_task_id);
+            }
+        }
+#endif
+
+#if USE_ITT_BUILD
+        // Note: These values are used by ITT events as well.
+#endif /* USE_ITT_BUILD */
+        taskdata->td_taskwait_counter += 1;
+        taskdata->td_taskwait_ident    = loc_ref;
+        taskdata->td_taskwait_thread   = gtid + 1;
+
+#if USE_ITT_BUILD
+        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+#if OMP_41_ENABLED
+        if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) 
+#else
+        if ( ! taskdata->td_flags.team_serial ) 
+#endif
+        {
+            // GEH: if team serialized, avoid reading the volatile variable below.
+            kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
+            while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
+                flag.execute_tasks(thread, gtid, FALSE, &thread_finished
+                                   USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
+            }
+        }
+#if USE_ITT_BUILD
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
+        taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
+
+#if OMPT_SUPPORT && OMPT_TRACE
+        if (ompt_enabled &&
+            ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
+            ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
+                                my_parallel_id, my_task_id);
+        }
+#endif
+    }
+
+    KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
+                  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
+
+    return TASK_CURRENT_NOT_QUEUED;
+}
+
+
+//-------------------------------------------------
+// __kmpc_omp_taskyield: switch to a different task
+
+kmp_int32
+__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
+{
+    kmp_taskdata_t * taskdata;
+    kmp_info_t * thread;
+    int thread_finished = FALSE;
+
+    KMP_COUNT_BLOCK(OMP_TASKYIELD);
+
+    KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
+                  gtid, loc_ref, end_part) );
+
+    if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
+        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
+
+        thread = __kmp_threads[ gtid ];
+        taskdata = thread -> th.th_current_task;
+        // Should we model this as a task wait or not?
+#if USE_ITT_BUILD
+        // Note: These values are used by ITT events as well.
+#endif /* USE_ITT_BUILD */
+        taskdata->td_taskwait_counter += 1;
+        taskdata->td_taskwait_ident    = loc_ref;
+        taskdata->td_taskwait_thread   = gtid + 1;
+
+#if USE_ITT_BUILD
+        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+        if ( ! taskdata->td_flags.team_serial ) {
+            kmp_task_team_t * task_team = thread->th.th_task_team;
+            if (task_team != NULL) {
+                if (KMP_TASKING_ENABLED(task_team)) {
+                    __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
+                                            USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
+                }
+            }
+        }
+#if USE_ITT_BUILD
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
+        taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
+    }
+
+    KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
+                  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
+
+    return TASK_CURRENT_NOT_QUEUED;
+}
+
+
+#if OMP_40_ENABLED
+//-------------------------------------------------------------------------------------
+// __kmpc_taskgroup: Start a new taskgroup
+
+void
+__kmpc_taskgroup( ident_t* loc, int gtid )
+{
+    kmp_info_t      * thread = __kmp_threads[ gtid ];
+    kmp_taskdata_t  * taskdata = thread->th.th_current_task;
+    kmp_taskgroup_t * tg_new =
+        (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
+    KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
+    tg_new->count = 0;
+    tg_new->cancel_request = cancel_noreq;
+    tg_new->parent = taskdata->td_taskgroup;
+    taskdata->td_taskgroup = tg_new;
+}
+
+
+//-------------------------------------------------------------------------------------
+// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
+//                       and its descendants are complete
+
+void
+__kmpc_end_taskgroup( ident_t* loc, int gtid )
+{
+    kmp_info_t      * thread = __kmp_threads[ gtid ];
+    kmp_taskdata_t  * taskdata = thread->th.th_current_task;
+    kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
+    int thread_finished = FALSE;
+
+    KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
+    KMP_DEBUG_ASSERT( taskgroup != NULL );
+
+    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+#if USE_ITT_BUILD
+        // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
+        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+#if OMP_41_ENABLED
+        if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) 
+#else
+        if ( ! taskdata->td_flags.team_serial ) 
+#endif
+        {
+            kmp_flag_32 flag(&(taskgroup->count), 0U);
+            while ( TCR_4(taskgroup->count) != 0 ) {
+                flag.execute_tasks(thread, gtid, FALSE, &thread_finished
+                                   USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
+            }
+        }
+
+#if USE_ITT_BUILD
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+    }
+    KMP_DEBUG_ASSERT( taskgroup->count == 0 );
+
+    // Restore parent taskgroup for the current task
+    taskdata->td_taskgroup = taskgroup->parent;
+    __kmp_thread_free( thread, taskgroup );
+
+    KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
+}
+#endif
+
+
+//------------------------------------------------------
+// __kmp_remove_my_task: remove a task from my own deque
+
+static kmp_task_t *
+__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
+                      kmp_int32 is_constrained )
+{
+    kmp_task_t * task;
+    kmp_taskdata_t * taskdata;
+    kmp_thread_data_t *thread_data;
+    kmp_uint32 tail;
+
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+    KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
+
+        thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
+
+    KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
+                  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
+                  thread_data->td.td_deque_tail) );
+
+    if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
+        KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
+                      gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
+                      thread_data->td.td_deque_tail) );
+        return NULL;
+    }
+
+    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
+
+    if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
+        __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+        KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
+                      gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
+                      thread_data->td.td_deque_tail) );
+        return NULL;
+    }
+
+    tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;  // Wrap index.
+    taskdata = thread_data -> td.td_deque[ tail ];
+
+    if (is_constrained) {
+        // we need to check if the candidate obeys task scheduling constraint:
+        // only child of current task can be scheduled
+        kmp_taskdata_t * current = thread->th.th_current_task;
+        kmp_int32        level = current->td_level;
+        kmp_taskdata_t * parent = taskdata->td_parent;
+        while ( parent != current && parent->td_level > level ) {
+            parent = parent->td_parent;  // check generation up to the level of the current task
+            KMP_DEBUG_ASSERT(parent != NULL);
+        }
+        if ( parent != current ) {
+            // If the tail task is not a child, then no other childs can appear in the deque.
+            __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+            KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
+                          gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
+                          thread_data->td.td_deque_tail) );
+            return NULL;
+        }
+    }
+
+    thread_data -> td.td_deque_tail = tail;
+    TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
+
+    __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
+
+    KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
+                  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
+                  thread_data->td.td_deque_tail) );
+
+    task = KMP_TASKDATA_TO_TASK( taskdata );
+    return task;
+}
+
+
+//-----------------------------------------------------------
+// __kmp_steal_task: remove a task from another thread's deque
+// Assume that calling thread has already checked existence of
+// task_team thread_data before calling this routine.
+
+static kmp_task_t *
+__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
+                  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
+                  kmp_int32 is_constrained )
+{
+    kmp_task_t * task;
+    kmp_taskdata_t * taskdata;
+    kmp_thread_data_t *victim_td, *threads_data;
+    kmp_int32 victim_tid;
+
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+
+    threads_data = task_team -> tt.tt_threads_data;
+    KMP_DEBUG_ASSERT( threads_data != NULL );  // Caller should check this condition
+
+    victim_tid = victim->th.th_info.ds.ds_tid;
+    victim_td = & threads_data[ victim_tid ];
+
+    KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
+                  "head=%u tail=%u\n",
+                  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
+                  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
+
+    if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
+         (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
+    {
+        KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
+                      "ntasks=%d head=%u tail=%u\n",
+                      gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
+                      victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
+        return NULL;
+    }
+
+    __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
+
+    // Check again after we acquire the lock
+    if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
+         (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
+    {
+        __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
+        KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
+                      "ntasks=%d head=%u tail=%u\n",
+                      gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
+                      victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
+        return NULL;
+    }
+
+    KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
+
+    if ( !is_constrained ) {
+        taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
+        // Bump head pointer and Wrap.
+        victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
+    } else {
+        // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
+        kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;  // Wrap index.
+        taskdata = victim_td -> td.td_deque[ tail ];
+        // we need to check if the candidate obeys task scheduling constraint:
+        // only child of current task can be scheduled
+        kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
+        kmp_int32        level = current->td_level;
+        kmp_taskdata_t * parent = taskdata->td_parent;
+        while ( parent != current && parent->td_level > level ) {
+            parent = parent->td_parent;  // check generation up to the level of the current task
+            KMP_DEBUG_ASSERT(parent != NULL);
+        }
+        if ( parent != current ) {
+            // If the tail task is not a child, then no other childs can appear in the deque (?).
+            __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
+            KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
+                          "ntasks=%d head=%u tail=%u\n",
+                          gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
+                          task_team, victim_td->td.td_deque_ntasks,
+                          victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
+            return NULL;
+        }
+        victim_td -> td.td_deque_tail = tail;
+    }
+    if (*thread_finished) {
+        // We need to un-mark this victim as a finished victim.  This must be done before
+        // releasing the lock, or else other threads (starting with the master victim)
+        // might be prematurely released from the barrier!!!
+        kmp_uint32 count;
+
+        count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
+
+        KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
+                      gtid, count + 1, task_team) );
+
+        *thread_finished = FALSE;
+    }
+    TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
+
+    __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
+
+    KMP_COUNT_BLOCK(TASK_stolen);
+    KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
+                  "ntasks=%d head=%u tail=%u\n",
+                  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
+                  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+                  victim_td->td.td_deque_tail) );
+
+    task = KMP_TASKDATA_TO_TASK( taskdata );
+    return task;
+}
+
+
+//-----------------------------------------------------------------------------
+// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
+// is statisfied (return true) or there are none left (return false).
+// final_spin is TRUE if this is the spin at the release barrier.
+// thread_finished indicates whether the thread is finished executing all
+// the tasks it has on its deque, and is at the release barrier.
+// spinner is the location on which to spin.
+// spinner == NULL means only execute a single task and return.
+// checker is the value to check to terminate the spin.
+template <class C>
+static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, 
+                                               int *thread_finished
+                                               USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
+{
+    kmp_task_team_t *     task_team;
+    kmp_thread_data_t *   threads_data;
+    kmp_task_t *          task;
+    kmp_taskdata_t *      current_task = thread -> th.th_current_task;
+    volatile kmp_uint32 * unfinished_threads;
+    kmp_int32             nthreads, last_stolen, k, tid;
+
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+    KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
+
+    task_team = thread -> th.th_task_team;
+    if (task_team == NULL) return FALSE;
+
+    KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
+                  gtid, final_spin, *thread_finished) );
+
+    threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
+    KMP_DEBUG_ASSERT( threads_data != NULL );
+
+    nthreads = task_team -> tt.tt_nproc;
+    unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
+#if OMP_41_ENABLED
+    KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
+#else
+    KMP_DEBUG_ASSERT( nthreads > 1 );
+#endif
+    KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
+
+    // Choose tasks from our own work queue.
+    start:
+    while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+        if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
+            if ( itt_sync_obj == NULL ) {
+                // we are at fork barrier where we could not get the object reliably
+                itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
+            }
+            __kmp_itt_task_starting( itt_sync_obj );
+        }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+        __kmp_invoke_task( gtid, task, current_task );
+#if USE_ITT_BUILD
+        if ( itt_sync_obj != NULL )
+            __kmp_itt_task_finished( itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+        // If this thread is only partway through the barrier and the condition
+        // is met, then return now, so that the barrier gather/release pattern can proceed.
+        // If this thread is in the last spin loop in the barrier, waiting to be
+        // released, we know that the termination condition will not be satisified,
+        // so don't waste any cycles checking it.
+        if (flag == NULL || (!final_spin && flag->done_check())) {
+            KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
+            return TRUE;
+        }
+        if (thread->th.th_task_team == NULL) break;
+        KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
+    }
+
+    // This thread's work queue is empty.  If we are in the final spin loop
+    // of the barrier, check and see if the termination condition is satisfied.
+#if OMP_41_ENABLED
+    // The work queue may be empty but there might be proxy tasks still executing
+    if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) 
+#else
+    if (final_spin) 
+#endif
+    {
+        // First, decrement the #unfinished threads, if that has not already
+        // been done.  This decrement might be to the spin location, and
+        // result in the termination condition being satisfied.
+        if (! *thread_finished) {
+            kmp_uint32 count;
+
+            count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
+            KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
+                          gtid, count, task_team) );
+            *thread_finished = TRUE;
+        }
+
+        // It is now unsafe to reference thread->th.th_team !!!
+        // Decrementing task_team->tt.tt_unfinished_threads can allow the master
+        // thread to pass through the barrier, where it might reset each thread's
+        // th.th_team field for the next parallel region.
+        // If we can steal more work, we know that this has not happened yet.
+        if (flag != NULL && flag->done_check()) {
+            KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
+            return TRUE;
+        }
+    }
+
+    if (thread->th.th_task_team == NULL) return FALSE;
+#if OMP_41_ENABLED
+    // check if there are other threads to steal from, otherwise go back
+    if ( nthreads  == 1 )
+        goto start;
+#endif
+
+    // Try to steal from the last place I stole from successfully.
+    tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
+    last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
+
+    if (last_stolen != -1) {
+        kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
+
+        while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
+                                         thread_finished, is_constrained )) != NULL)
+        {
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+            if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
+                if ( itt_sync_obj == NULL ) {
+                    // we are at fork barrier where we could not get the object reliably
+                    itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
+                }
+                __kmp_itt_task_starting( itt_sync_obj );
+            }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+            __kmp_invoke_task( gtid, task, current_task );
+#if USE_ITT_BUILD
+            if ( itt_sync_obj != NULL )
+                __kmp_itt_task_finished( itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+            // Check to see if this thread can proceed.
+            if (flag == NULL || (!final_spin && flag->done_check())) {
+                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
+                              gtid) );
+                return TRUE;
+            }
+
+            if (thread->th.th_task_team == NULL) break;
+            KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
+            // If the execution of the stolen task resulted in more tasks being
+            // placed on our run queue, then restart the whole process.
+            if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
+                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
+                              gtid) );
+                goto start;
+            }
+        }
+
+        // Don't give priority to stealing from this thread anymore.
+        threads_data[ tid ].td.td_deque_last_stolen = -1;
+
+        // The victims's work queue is empty.  If we are in the final spin loop
+        // of the barrier, check and see if the termination condition is satisfied.
+#if OMP_41_ENABLED
+        // The work queue may be empty but there might be proxy tasks still executing
+        if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) 
+#else
+        if (final_spin) 
+#endif
+        {
+            // First, decrement the #unfinished threads, if that has not already
+            // been done.  This decrement might be to the spin location, and
+            // result in the termination condition being satisfied.
+            if (! *thread_finished) {
+                kmp_uint32 count;
+
+                count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
+                KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
+                              "task_team=%p\n", gtid, count, task_team) );
+                *thread_finished = TRUE;
+            }
+
+            // If __kmp_tasking_mode != tskm_immediate_exec
+            // then it is now unsafe to reference thread->th.th_team !!!
+            // Decrementing task_team->tt.tt_unfinished_threads can allow the master
+            // thread to pass through the barrier, where it might reset each thread's
+            // th.th_team field for the next parallel region.
+            // If we can steal more work, we know that this has not happened yet.
+            if (flag != NULL && flag->done_check()) {
+                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
+                              gtid) );
+                return TRUE;
+            }
+        }
+        if (thread->th.th_task_team == NULL) return FALSE;
+    }
+
+    // Find a different thread to steal work from.  Pick a random thread.
+    // My initial plan was to cycle through all the threads, and only return
+    // if we tried to steal from every thread, and failed.  Arch says that's
+    // not such a great idea.
+    // GEH - need yield code in this loop for throughput library mode?
+    new_victim:
+    k = __kmp_get_random( thread ) % (nthreads - 1);
+    if ( k >= thread -> th.th_info.ds.ds_tid ) {
+        ++k;               // Adjusts random distribution to exclude self
+    }
+    {
+        kmp_info_t *other_thread = threads_data[k].td.td_thr;
+        int first;
+
+        // There is a slight chance that __kmp_enable_tasking() did not wake up
+        // all threads waiting at the barrier.  If this thread is sleeping, then
+        // wake it up.  Since we were going to pay the cache miss penalty
+        // for referencing another thread's kmp_info_t struct anyway, the check
+        // shouldn't cost too much performance at this point.
+        // In extra barrier mode, tasks do not sleep at the separate tasking
+        // barrier, so this isn't a problem.
+        if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
+             (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
+             (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
+        {
+            __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
+            // A sleeping thread should not have any tasks on it's queue.
+            // There is a slight possibility that it resumes, steals a task from
+            // another thread, which spawns more tasks, all in the time that it takes
+            // this thread to check => don't write an assertion that the victim's
+            // queue is empty.  Try stealing from a different thread.
+            goto new_victim;
+        }
+
+        // Now try to steal work from the selected thread
+        first = TRUE;
+        while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
+                                         thread_finished, is_constrained )) != NULL)
+        {
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+            if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
+                if ( itt_sync_obj == NULL ) {
+                    // we are at fork barrier where we could not get the object reliably
+                    itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
+                }
+                __kmp_itt_task_starting( itt_sync_obj );
+            }
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+            __kmp_invoke_task( gtid, task, current_task );
+#if USE_ITT_BUILD
+            if ( itt_sync_obj != NULL )
+                __kmp_itt_task_finished( itt_sync_obj );
+#endif /* USE_ITT_BUILD */
+
+            // Try stealing from this victim again, in the future.
+            if (first) {
+                threads_data[ tid ].td.td_deque_last_stolen = k;
+                first = FALSE;
+            }
+
+            // Check to see if this thread can proceed.
+            if (flag == NULL || (!final_spin && flag->done_check())) {
+                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
+                              gtid) );
+                return TRUE;
+            }
+            if (thread->th.th_task_team == NULL) break;
+            KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
+
+            // If the execution of the stolen task resulted in more tasks being
+            // placed on our run queue, then restart the whole process.
+            if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
+                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
+                              gtid) );
+                goto start;
+            }
+        }
+
+        // The victims's work queue is empty.  If we are in the final spin loop
+        // of the barrier, check and see if the termination condition is satisfied.
+        // Going on and finding a new victim to steal from is expensive, as it
+        // involves a lot of cache misses, so we definitely want to re-check the
+        // termination condition before doing that.
+#if OMP_41_ENABLED
+        // The work queue may be empty but there might be proxy tasks still executing
+        if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0) 
+#else
+        if (final_spin) 
+#endif
+        {
+            // First, decrement the #unfinished threads, if that has not already
+            // been done.  This decrement might be to the spin location, and
+            // result in the termination condition being satisfied.
+            if (! *thread_finished) {
+                kmp_uint32 count;
+
+                count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
+                KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
+                              "task_team=%p\n",
+                              gtid, count, task_team) );
+                *thread_finished = TRUE;
+            }
+
+            // If __kmp_tasking_mode != tskm_immediate_exec,
+            // then it is now unsafe to reference thread->th.th_team !!!
+            // Decrementing task_team->tt.tt_unfinished_threads can allow the master
+            // thread to pass through the barrier, where it might reset each thread's
+            // th.th_team field for the next parallel region.
+            // If we can steal more work, we know that this has not happened yet.
+            if (flag != NULL && flag->done_check()) {
+                KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
+                return TRUE;
+            }
+        }
+        if (thread->th.th_task_team == NULL) return FALSE;
+    }
+
+    KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
+    return FALSE;
+}
+
+int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
+                           int *thread_finished
+                           USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
+{
+    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
+                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
+                           int *thread_finished
+                           USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
+{
+    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
+                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
+                               int *thread_finished
+                               USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
+{
+    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
+                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+
+
+//-----------------------------------------------------------------------------
+// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
+// next barrier so they can assist in executing enqueued tasks.
+// First thread in allocates the task team atomically.
+
+static void
+__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
+{
+    kmp_thread_data_t *threads_data;
+    int nthreads, i, is_init_thread;
+
+    KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
+                    __kmp_gtid_from_thread( this_thr ) ) );
+
+    KMP_DEBUG_ASSERT(task_team != NULL);
+    KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
+
+    nthreads = task_team->tt.tt_nproc;
+    KMP_DEBUG_ASSERT(nthreads > 0);
+    KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
+
+    // Allocate or increase the size of threads_data if necessary
+    is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
+
+    if (!is_init_thread) {
+        // Some other thread already set up the array.
+        KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
+                        __kmp_gtid_from_thread( this_thr ) ) );
+        return;
+    }
+    threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
+    KMP_DEBUG_ASSERT( threads_data != NULL );
+
+    if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
+         ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
+    {
+        // Release any threads sleeping at the barrier, so that they can steal
+        // tasks and execute them.  In extra barrier mode, tasks do not sleep
+        // at the separate tasking barrier, so this isn't a problem.
+        for (i = 0; i < nthreads; i++) {
+            volatile void *sleep_loc;
+            kmp_info_t *thread = threads_data[i].td.td_thr;
+
+            if (i == this_thr->th.th_info.ds.ds_tid) {
+                continue;
+            }
+            // Since we haven't locked the thread's suspend mutex lock at this
+            // point, there is a small window where a thread might be putting
+            // itself to sleep, but hasn't set the th_sleep_loc field yet.
+            // To work around this, __kmp_execute_tasks_template() periodically checks
+            // see if other threads are sleeping (using the same random
+            // mechanism that is used for task stealing) and awakens them if
+            // they are.
+            if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
+            {
+                KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
+                                 __kmp_gtid_from_thread( this_thr ),
+                                 __kmp_gtid_from_thread( thread ) ) );
+                __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+            }
+            else {
+                KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
+                                 __kmp_gtid_from_thread( this_thr ),
+                                 __kmp_gtid_from_thread( thread ) ) );
+            }
+        }
+    }
+
+    KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
+                    __kmp_gtid_from_thread( this_thr ) ) );
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* // TODO: Check the comment consistency
+ * Utility routines for "task teams".  A task team (kmp_task_t) is kind of
+ * like a shadow of the kmp_team_t data struct, with a different lifetime.
+ * After a child * thread checks into a barrier and calls __kmp_release() from
+ * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
+ * longer assume that the kmp_team_t structure is intact (at any moment, the
+ * master thread may exit the barrier code and free the team data structure,
+ * and return the threads to the thread pool).
+ *
+ * This does not work with the the tasking code, as the thread is still
+ * expected to participate in the execution of any tasks that may have been
+ * spawned my a member of the team, and the thread still needs access to all
+ * to each thread in the team, so that it can steal work from it.
+ *
+ * Enter the existence of the kmp_task_team_t struct.  It employs a reference
+ * counting mechanims, and is allocated by the master thread before calling
+ * __kmp_<barrier_kind>_release, and then is release by the last thread to
+ * exit __kmp_<barrier_kind>_release at the next barrier.  I.e. the lifetimes
+ * of the kmp_task_team_t structs for consecutive barriers can overlap
+ * (and will, unless the master thread is the last thread to exit the barrier
+ * release phase, which is not typical).
+ *
+ * The existence of such a struct is useful outside the context of tasking,
+ * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
+ * so that any performance differences show up when comparing the 2.5 vs. 3.0
+ * libraries.
+ *
+ * We currently use the existence of the threads array as an indicator that
+ * tasks were spawned since the last barrier.  If the structure is to be
+ * useful outside the context of tasking, then this will have to change, but
+ * not settting the field minimizes the performance impact of tasking on
+ * barriers, when no explicit tasks were spawned (pushed, actually).
+ */
+
+
+static kmp_task_team_t *__kmp_free_task_teams = NULL;           // Free list for task_team data structures
+// Lock for task team data structures
+static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
+
+
+//------------------------------------------------------------------------------
+// __kmp_alloc_task_deque:
+// Allocates a task deque for a particular thread, and initialize the necessary
+// data structures relating to the deque.  This only happens once per thread
+// per task team since task teams are recycled.
+// No lock is needed during allocation since each thread allocates its own
+// deque.
+
+static void
+__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
+{
+    __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
+    KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
+
+    // Initialize last stolen task field to "none"
+    thread_data -> td.td_deque_last_stolen = -1;
+
+    KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
+    KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
+    KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
+
+    KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
+                   __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
+    // Allocate space for task deque, and zero the deque
+    // Cannot use __kmp_thread_calloc() because threads not around for
+    // kmp_reap_task_team( ).
+    thread_data -> td.td_deque = (kmp_taskdata_t **)
+            __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_free_task_deque:
+// Deallocates a task deque for a particular thread.
+// Happens at library deallocation so don't need to reset all thread data fields.
+
+static void
+__kmp_free_task_deque( kmp_thread_data_t *thread_data )
+{
+    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
+
+    if ( thread_data -> td.td_deque != NULL ) {
+        TCW_4(thread_data -> td.td_deque_ntasks, 0);
+         __kmp_free( thread_data -> td.td_deque );
+        thread_data -> td.td_deque = NULL;
+    }
+    __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+
+#ifdef BUILD_TIED_TASK_STACK
+    // GEH: Figure out what to do here for td_susp_tied_tasks
+    if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
+        __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
+    }
+#endif // BUILD_TIED_TASK_STACK
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_realloc_task_threads_data:
+// Allocates a threads_data array for a task team, either by allocating an initial
+// array or enlarging an existing array.  Only the first thread to get the lock
+// allocs or enlarges the array and re-initializes the array eleemnts.
+// That thread returns "TRUE", the rest return "FALSE".
+// Assumes that the new array size is given by task_team -> tt.tt_nproc.
+// The current size is given by task_team -> tt.tt_max_threads.
+
+static int
+__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
+{
+    kmp_thread_data_t ** threads_data_p;
+    kmp_int32            nthreads, maxthreads;
+    int                  is_init_thread = FALSE;
+
+    if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
+        // Already reallocated and initialized.
+        return FALSE;
+    }
+
+    threads_data_p = & task_team -> tt.tt_threads_data;
+    nthreads   = task_team -> tt.tt_nproc;
+    maxthreads = task_team -> tt.tt_max_threads;
+
+    // All threads must lock when they encounter the first task of the implicit task
+    // region to make sure threads_data fields are (re)initialized before used.
+    __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+
+    if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
+        // first thread to enable tasking
+        kmp_team_t *team = thread -> th.th_team;
+        int i;
+
+        is_init_thread = TRUE;
+        if ( maxthreads < nthreads ) {
+
+            if ( *threads_data_p != NULL ) {
+                kmp_thread_data_t *old_data = *threads_data_p;
+                kmp_thread_data_t *new_data = NULL;
+
+                KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
+                               "threads data for task_team %p, new_size = %d, old_size = %d\n",
+                               __kmp_gtid_from_thread( thread ), task_team,
+                               nthreads, maxthreads ) );
+                // Reallocate threads_data to have more elements than current array
+                // Cannot use __kmp_thread_realloc() because threads not around for
+                // kmp_reap_task_team( ).  Note all new array entries are initialized
+                // to zero by __kmp_allocate().
+                new_data = (kmp_thread_data_t *)
+                            __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
+                // copy old data to new data
+                KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
+                              (void *) old_data,
+                              maxthreads * sizeof(kmp_taskdata_t *) );
+
+#ifdef BUILD_TIED_TASK_STACK
+                // GEH: Figure out if this is the right thing to do
+                for (i = maxthreads; i < nthreads; i++) {
+                    kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
+                    __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
+                }
+#endif // BUILD_TIED_TASK_STACK
+                // Install the new data and free the old data
+                (*threads_data_p) = new_data;
+                __kmp_free( old_data );
+            }
+            else {
+                KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
+                               "threads data for task_team %p, size = %d\n",
+                               __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
+                // Make the initial allocate for threads_data array, and zero entries
+                // Cannot use __kmp_thread_calloc() because threads not around for
+                // kmp_reap_task_team( ).
+                *threads_data_p = (kmp_thread_data_t *)
+                                  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
+#ifdef BUILD_TIED_TASK_STACK
+                // GEH: Figure out if this is the right thing to do
+                for (i = 0; i < nthreads; i++) {
+                    kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
+                    __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
+                }
+#endif // BUILD_TIED_TASK_STACK
+            }
+            task_team -> tt.tt_max_threads = nthreads;
+        }
+        else {
+            // If array has (more than) enough elements, go ahead and use it
+            KMP_DEBUG_ASSERT( *threads_data_p != NULL );
+        }
+
+        // initialize threads_data pointers back to thread_info structures
+        for (i = 0; i < nthreads; i++) {
+            kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
+            thread_data -> td.td_thr = team -> t.t_threads[i];
+
+            if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
+                // The last stolen field survives across teams / barrier, and the number
+                // of threads may have changed.  It's possible (likely?) that a new
+                // parallel region will exhibit the same behavior as the previous region.
+                thread_data -> td.td_deque_last_stolen = -1;
+            }
+        }
+
+        KMP_MB();
+        TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
+    }
+
+    __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+    return is_init_thread;
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_free_task_threads_data:
+// Deallocates a threads_data array for a task team, including any attached
+// tasking deques.  Only occurs at library shutdown.
+
+static void
+__kmp_free_task_threads_data( kmp_task_team_t *task_team )
+{
+    __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+    if ( task_team -> tt.tt_threads_data != NULL ) {
+        int i;
+        for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
+            __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
+        }
+        __kmp_free( task_team -> tt.tt_threads_data );
+        task_team -> tt.tt_threads_data = NULL;
+    }
+    __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_allocate_task_team:
+// Allocates a task team associated with a specific team, taking it from
+// the global task team free list if possible.  Also initializes data structures.
+
+static kmp_task_team_t *
+__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
+{
+    kmp_task_team_t *task_team = NULL;
+    int nthreads;
+
+    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
+                    (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
+
+    if (TCR_PTR(__kmp_free_task_teams) != NULL) {
+        // Take a task team from the task team pool
+        __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
+        if (__kmp_free_task_teams != NULL) {
+            task_team = __kmp_free_task_teams;
+            TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
+            task_team -> tt.tt_next = NULL;
+        }
+        __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
+    }
+
+    if (task_team == NULL) {
+        KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
+                       "task team for team %p\n",
+                       __kmp_gtid_from_thread( thread ), team ) );
+        // Allocate a new task team if one is not available.
+        // Cannot use __kmp_thread_malloc() because threads not around for
+        // kmp_reap_task_team( ).
+        task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
+        __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+        //task_team -> tt.tt_threads_data = NULL;   // AC: __kmp_allocate zeroes returned memory
+        //task_team -> tt.tt_max_threads = 0;
+        //task_team -> tt.tt_next = NULL;
+    }
+
+    TCW_4(task_team -> tt.tt_found_tasks, FALSE);
+#if OMP_41_ENABLED
+    TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
+#endif
+    task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
+
+    TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
+    TCW_4( task_team -> tt.tt_active, TRUE );
+
+    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
+                    (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
+    return task_team;
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_free_task_team:
+// Frees the task team associated with a specific thread, and adds it
+// to the global task team free list.
+
+void
+__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
+{
+    KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
+                    thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
+
+    // Put task team back on free list
+    __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
+
+    KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
+    task_team -> tt.tt_next = __kmp_free_task_teams;
+    TCW_PTR(__kmp_free_task_teams, task_team);
+
+    __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_reap_task_teams:
+// Free all the task teams on the task team free list.
+// Should only be done during library shutdown.
+// Cannot do anything that needs a thread structure or gtid since they are already gone.
+
+void
+__kmp_reap_task_teams( void )
+{
+    kmp_task_team_t   *task_team;
+
+    if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
+        // Free all task_teams on the free list
+        __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
+        while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
+            __kmp_free_task_teams = task_team -> tt.tt_next;
+            task_team -> tt.tt_next = NULL;
+
+            // Free threads_data if necessary
+            if ( task_team -> tt.tt_threads_data != NULL ) {
+                __kmp_free_task_threads_data( task_team );
+            }
+            __kmp_free( task_team );
+        }
+        __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
+    }
+}
+
+//------------------------------------------------------------------------------
+// __kmp_wait_to_unref_task_teams:
+// Some threads could still be in the fork barrier release code, possibly
+// trying to steal tasks.  Wait for each thread to unreference its task team.
+//
+void
+__kmp_wait_to_unref_task_teams(void)
+{
+    kmp_info_t *thread;
+    kmp_uint32 spins;
+    int done;
+
+    KMP_INIT_YIELD( spins );
+
+
+    for (;;) {
+        done = TRUE;
+
+        // TODO: GEH - this may be is wrong because some sync would be necessary
+        //             in case threads are added to the pool during the traversal.
+        //             Need to verify that lock for thread pool is held when calling
+        //             this routine.
+        for (thread = (kmp_info_t *)__kmp_thread_pool;
+             thread != NULL;
+             thread = thread->th.th_next_pool)
+        {
+#if KMP_OS_WINDOWS
+            DWORD exit_val;
+#endif
+            if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
+                KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
+                               __kmp_gtid_from_thread( thread ) ) );
+                continue;
+            }
+#if KMP_OS_WINDOWS
+            // TODO: GEH - add this check for Linux* OS / OS X* as well?
+            if (!__kmp_is_thread_alive(thread, &exit_val)) {
+                thread->th.th_task_team = NULL;
+                continue;
+            }
+#endif
+
+            done = FALSE;  // Because th_task_team pointer is not NULL for this thread
+
+            KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
+                           __kmp_gtid_from_thread( thread ) ) );
+
+            if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
+                volatile void *sleep_loc;
+                // If the thread is sleeping, awaken it.
+                if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
+                    KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
+                                    __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
+                    __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+                }
+            }
+        }
+        if (done) {
+            break;
+        }
+
+        // If we are oversubscribed,
+        // or have waited a bit (and library mode is throughput), yield.
+        // Pause is in the following code.
+        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
+        KMP_YIELD_SPIN( spins );        // Yields only if KMP_LIBRARY=throughput
+    }
+
+
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_task_team_setup:  Create a task_team for the current team, but use
+// an already created, unused one if it already exists.
+void
+__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
+{
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+
+    // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
+    // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
+    if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { 
+        team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
+        KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
+                      __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
+                      ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
+    }
+
+    // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is 
+    // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
+    // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
+    // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for 
+    // serialized teams.
+    if (team->t.t_nproc > 1) {
+        int other_team = 1 - this_thr->th.th_task_state;
+        if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
+                team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
+                KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
+                                __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
+                              ((team != NULL) ? team->t.t_id : -1), other_team ));
+        }
+        else { // Leave the old task team struct in place for the upcoming region; adjust as needed
+            kmp_task_team_t *task_team = team->t.t_task_team[other_team];
+            if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
+                TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
+                TCW_4(task_team->tt.tt_found_tasks, FALSE);
+#if OMP_41_ENABLED
+                TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+#endif
+                TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
+                TCW_4(task_team->tt.tt_active, TRUE );
+            }
+            // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
+            KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
+                          __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
+                          ((team != NULL) ? team->t.t_id : -1), other_team ));
+        }
+    }
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_task_team_sync: Propagation of task team data from team to threads
+// which happens just after the release phase of a team barrier.  This may be
+// called by any thread, but only for teams with # threads > 1.
+
+void
+__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
+{
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+
+    // Toggle the th_task_state field, to switch which task_team this thread refers to
+    this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
+    // It is now safe to propagate the task team pointer from the team struct to the current thread.
+    TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
+    KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
+                  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
+                  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
+}
+
+
+//--------------------------------------------------------------------------------------------
+// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
+// phase.  Only called by master thread if #threads in team > 1 or if proxy tasks were created.
+// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
+// optionally as the last argument. When wait is zero, master thread does not wait for
+// unfinished_threads to reach 0.
+void
+__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
+                      USE_ITT_BUILD_ARG(void * itt_sync_obj)
+                      , int wait)
+{
+    kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
+
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+    KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
+
+    if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
+        if (wait) {
+            KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
+                          __kmp_gtid_from_thread(this_thr), task_team));
+            // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
+            // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
+            kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
+            flag.wait(this_thr, TRUE
+                      USE_ITT_BUILD_ARG(itt_sync_obj));
+        }
+        // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
+        KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
+                      "setting active to false, setting local and team's pointer to NULL\n",
+                      __kmp_gtid_from_thread(this_thr), task_team));
+#if OMP_41_ENABLED
+        KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
+        TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
+#else
+        KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
+#endif
+        TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+        KMP_MB();
+
+        TCW_PTR(this_thr->th.th_task_team, NULL);
+    }
+}
+
+
+//------------------------------------------------------------------------------
+// __kmp_tasking_barrier:
+// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
+// Internal function to execute all tasks prior to a regular barrier or a
+// join barrier.  It is a full barrier itself, which unfortunately turns
+// regular barriers into double barriers and join barriers into 1 1/2
+// barriers.
+void
+__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
+{
+    volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
+    int flag = FALSE;
+    KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
+
+#if USE_ITT_BUILD
+    KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
+#endif /* USE_ITT_BUILD */
+    kmp_flag_32 spin_flag(spin, 0U);
+    while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
+                                     USE_ITT_BUILD_ARG(NULL), 0 ) ) {
+#if USE_ITT_BUILD
+        // TODO: What about itt_sync_obj??
+        KMP_FSYNC_SPIN_PREPARE( spin );
+#endif /* USE_ITT_BUILD */
+
+        if( TCR_4(__kmp_global.g.g_done) ) {
+            if( __kmp_global.g.g_abort )
+                __kmp_abort_thread( );
+            break;
+        }
+        KMP_YIELD( TRUE );       // GH: We always yield here
+    }
+#if USE_ITT_BUILD
+    KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
+#endif /* USE_ITT_BUILD */
+}
+
+
+#if OMP_41_ENABLED
+
+/* __kmp_give_task puts a task into a given thread queue if:
+    - the queue for that thread it was created
+    - there's space in that queue
+
+    Because of this, __kmp_push_task needs to check if there's space after getting the lock
+ */
+static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
+{
+    kmp_task_team_t *   task_team = thread->th.th_task_team;
+    kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
+    kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task);
+    bool result = false;
+
+    KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
+
+    // assert tasking is enabled? what if not?
+    KMP_DEBUG_ASSERT( task_team != NULL );
+
+    if (thread_data -> td.td_deque == NULL ) {
+        // There's no queue in this thread, go find another one
+        // We're guaranteed that at least one thread has a queue
+        KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
+        return result;
+    }
+
+    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
+    {
+        KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
+        return result;
+    }
+
+    __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
+
+    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
+    {
+        KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
+        goto release_and_exit;
+    }
+
+    thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
+    // Wrap index.
+    thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
+    TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
+
+    result = true;
+    KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
+
+release_and_exit:
+    __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
+
+     return result;
+}
+
+
+/* The finish of the a proxy tasks is divided in two pieces:
+    - the top half is the one that can be done from a thread outside the team
+    - the bottom half must be run from a them within the team
+
+    In order to run the bottom half the task gets queued back into one of the threads of the team.
+    Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
+    So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
+    - things that can be run before queuing the bottom half
+    - things that must be run after queuing the bottom half
+
+    This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
+    we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
+*/
+
+static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
+{
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
+
+    taskdata -> td_flags.complete = 1;   // mark the task as completed
+
+    if ( taskdata->td_taskgroup )
+       KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
+
+    // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
+    TCR_4(taskdata->td_incomplete_child_tasks++);
+}
+
+static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
+{
+    kmp_int32 children = 0;
+
+    // Predecrement simulated by "- 1" calculation
+    children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
+    KMP_DEBUG_ASSERT( children >= 0 );
+
+    // Remove the imaginary children
+    TCR_4(taskdata->td_incomplete_child_tasks--);
+}
+
+static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
+{
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
+    kmp_info_t * thread = __kmp_threads[ gtid ];
+
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
+    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
+
+    // We need to wait to make sure the top half is finished
+    // Spinning here should be ok as this should happen quickly
+    while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
+
+    __kmp_release_deps(gtid,taskdata);
+    __kmp_free_task_and_ancestors(gtid, taskdata, thread);
+}
+
+/*!
+@ingroup TASKING
+@param gtid Global Thread ID of encountering thread
+@param ptask Task which execution is completed
+
+Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
+*/
+void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
+{
+    KMP_DEBUG_ASSERT( ptask != NULL );
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
+    KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
+
+    KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
+
+    __kmp_first_top_half_finish_proxy(taskdata);
+    __kmp_second_top_half_finish_proxy(taskdata);
+    __kmp_bottom_half_finish_proxy(gtid,ptask);
+
+    KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
+}
+
+/*!
+@ingroup TASKING
+@param ptask Task which execution is completed
+
+Execute the completation of a proxy task from a thread that could not belong to the team.
+*/
+void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
+{
+    KMP_DEBUG_ASSERT( ptask != NULL );
+    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
+
+    KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
+
+    KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
+
+    __kmp_first_top_half_finish_proxy(taskdata);
+
+    // Enqueue task to complete bottom half completation from a thread within the corresponding team
+    kmp_team_t * team = taskdata->td_team;
+    kmp_int32 nthreads = team->t.t_nproc;
+    kmp_info_t *thread;
+    kmp_int32 k = 0;
+
+    do {
+        //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
+        //For now we're just linearly trying to find a thread
+        k = (k+1) % nthreads;
+        thread = team->t.t_threads[k];
+    } while ( !__kmp_give_task( thread, k,  ptask ) );
+
+    __kmp_second_top_half_finish_proxy(taskdata);
+
+    KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/openmp/kmp_taskq.c b/contrib/libs/cxxsupp/openmp/kmp_taskq.c
index 57564a308b..3079d45974 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_taskq.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_taskq.c
@@ -1,2032 +1,2032 @@
-/* 
- * kmp_taskq.c -- TASKQ support for OpenMP. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
-#include "kmp_error.h" 
- 
-#define MAX_MESSAGE 512 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * Taskq routines and global variables 
- */ 
- 
-#define KMP_DEBUG_REF_CTS(x)    KF_TRACE(1, x); 
- 
-#define THREAD_ALLOC_FOR_TASKQ 
- 
-static int 
-in_parallel_context( kmp_team_t *team ) 
-{ 
-    return ! team -> t.t_serialized; 
-} 
- 
-static void 
-__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    int                gtid = *gtid_ref; 
-    int                tid  = __kmp_tid_from_gtid( gtid ); 
-    kmp_uint32         my_token; 
-    kmpc_task_queue_t *taskq; 
-    kmp_taskq_t       *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; 
- 
-    if ( __kmp_env_consistency_check ) 
-#if KMP_USE_DYNAMIC_LOCK 
-        __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 ); 
-#else 
-        __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL ); 
-#endif 
- 
-    if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        /* GEH - need check here under stats to make sure   */ 
-        /*       inside task (curr_thunk[*tid_ref] != NULL) */ 
- 
-        my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum; 
- 
-        taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue; 
- 
-        KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); 
-        KMP_MB(); 
-    } 
-} 
- 
-static void 
-__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 
-{ 
-    int           gtid = *gtid_ref; 
-    int           tid  = __kmp_tid_from_gtid( gtid ); 
-    kmp_uint32    my_token; 
-    kmp_taskq_t  *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref ); 
- 
-    if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        /* GEH - need check here under stats to make sure */ 
-        /*       inside task (curr_thunk[tid] != NULL)    */ 
- 
-        my_token = tq->tq_curr_thunk[ tid ]->th_tasknum; 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1; 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    } 
-} 
- 
-static void 
-__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk ) 
-{ 
-    kmp_uint32 my_token; 
-    kmpc_task_queue_t *taskq; 
- 
-    /* assume we are always called from an active parallel context */ 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    my_token =  thunk -> th_tasknum; 
- 
-    taskq =  thunk -> th.th_shareds -> sv_queue; 
- 
-    if(taskq->tq_tasknum_serving <= my_token) { 
-        KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); 
-        KMP_MB(); 
-        taskq->tq_tasknum_serving = my_token +1; 
-        KMP_MB(); 
-    } 
-} 
- 
-#ifdef KMP_DEBUG 
- 
-static void 
-__kmp_dump_TQF(kmp_int32 flags) 
-{ 
-    if (flags & TQF_IS_ORDERED) 
-        __kmp_printf("ORDERED "); 
-    if (flags & TQF_IS_LASTPRIVATE) 
-        __kmp_printf("LAST_PRIV "); 
-    if (flags & TQF_IS_NOWAIT) 
-        __kmp_printf("NOWAIT "); 
-    if (flags & TQF_HEURISTICS) 
-        __kmp_printf("HEURIST "); 
-    if (flags & TQF_INTERFACE_RESERVED1) 
-        __kmp_printf("RESERV1 "); 
-    if (flags & TQF_INTERFACE_RESERVED2) 
-        __kmp_printf("RESERV2 "); 
-    if (flags & TQF_INTERFACE_RESERVED3) 
-        __kmp_printf("RESERV3 "); 
-    if (flags & TQF_INTERFACE_RESERVED4) 
-        __kmp_printf("RESERV4 "); 
-    if (flags & TQF_IS_LAST_TASK) 
-        __kmp_printf("LAST_TASK "); 
-    if (flags & TQF_TASKQ_TASK) 
-        __kmp_printf("TASKQ_TASK "); 
-    if (flags & TQF_RELEASE_WORKERS) 
-        __kmp_printf("RELEASE "); 
-    if (flags & TQF_ALL_TASKS_QUEUED) 
-        __kmp_printf("ALL_QUEUED "); 
-    if (flags & TQF_PARALLEL_CONTEXT) 
-        __kmp_printf("PARALLEL "); 
-    if (flags & TQF_DEALLOCATED) 
-        __kmp_printf("DEALLOC "); 
-    if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS))) 
-        __kmp_printf("(NONE)"); 
-} 
- 
-static void 
-__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid ) 
-{ 
-    int i; 
-    int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; 
- 
-    __kmp_printf("\tThunk at %p on (%d):  ", thunk, global_tid); 
- 
-    if (thunk != NULL) { 
-        for (i = 0; i < nproc; i++) { 
-            if( tq->tq_curr_thunk[i] == thunk ) { 
-                __kmp_printf("[%i] ", i); 
-            } 
-        } 
-        __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); 
-        __kmp_printf("th_task=%p, ", thunk->th_task); 
-        __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); 
-        __kmp_printf("th_status=%d, ", thunk->th_status); 
-        __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); 
-        __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags); 
-    } 
- 
-    __kmp_printf("\n"); 
-} 
- 
-static void 
-__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) 
-{ 
-    kmpc_thunk_t *th; 
- 
-    __kmp_printf("    Thunk stack for T#%d:  ", thread_num); 
- 
-    for (th = thunk; th != NULL; th = th->th_encl_thunk ) 
-        __kmp_printf("%p ", th); 
- 
-    __kmp_printf("\n"); 
-} 
- 
-static void 
-__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid ) 
-{ 
-    int                  qs, count, i; 
-    kmpc_thunk_t        *thunk; 
-    kmpc_task_queue_t   *taskq; 
- 
-    __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); 
- 
-    if (queue != NULL) { 
-        int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT; 
- 
-    if ( __kmp_env_consistency_check ) { 
-        __kmp_printf("    tq_loc             : "); 
-    } 
-        if (in_parallel) { 
- 
-            //if (queue->tq.tq_parent != 0) 
-                //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-            //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-            __kmp_printf("    tq_parent          : %p\n", queue->tq.tq_parent); 
-            __kmp_printf("    tq_first_child     : %p\n", queue->tq_first_child); 
-            __kmp_printf("    tq_next_child      : %p\n", queue->tq_next_child); 
-            __kmp_printf("    tq_prev_child      : %p\n", queue->tq_prev_child); 
-            __kmp_printf("    tq_ref_count       : %d\n", queue->tq_ref_count); 
- 
-            //__kmp_release_lock(& queue->tq_link_lck, global_tid); 
- 
-            //if (queue->tq.tq_parent != 0) 
-                //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-            //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); 
-            //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
-        } 
- 
-        __kmp_printf("    tq_shareds         : "); 
-        for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) 
-            __kmp_printf("%p ", queue->tq_shareds[i].ai_data); 
-        __kmp_printf("\n"); 
- 
-        if (in_parallel) { 
-            __kmp_printf("    tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); 
-            __kmp_printf("    tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); 
-        } 
- 
-        __kmp_printf("    tq_queue           : %p\n", queue->tq_queue); 
-        __kmp_printf("    tq_thunk_space     : %p\n", queue->tq_thunk_space); 
-        __kmp_printf("    tq_taskq_slot      : %p\n", queue->tq_taskq_slot); 
- 
-        __kmp_printf("    tq_free_thunks     : "); 
-        for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free ) 
-            __kmp_printf("%p ", thunk); 
-        __kmp_printf("\n"); 
- 
-        __kmp_printf("    tq_nslots          : %d\n", queue->tq_nslots); 
-        __kmp_printf("    tq_head            : %d\n", queue->tq_head); 
-        __kmp_printf("    tq_tail            : %d\n", queue->tq_tail); 
-        __kmp_printf("    tq_nfull           : %d\n", queue->tq_nfull); 
-        __kmp_printf("    tq_hiwat           : %d\n", queue->tq_hiwat); 
-        __kmp_printf("    tq_flags           : "); __kmp_dump_TQF(queue->tq_flags); 
-        __kmp_printf("\n"); 
- 
-        if (in_parallel) { 
-            __kmp_printf("    tq_th_thunks       : "); 
-            for (i = 0; i < queue->tq_nproc; i++) { 
-                __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); 
-            } 
-            __kmp_printf("\n"); 
-        } 
- 
-        __kmp_printf("\n"); 
-        __kmp_printf("    Queue slots:\n"); 
- 
- 
-        qs = queue->tq_tail; 
-        for ( count = 0; count < queue->tq_nfull; ++count ) { 
-            __kmp_printf("(%d)", qs); 
-            __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid ); 
-            qs = (qs+1) % queue->tq_nslots; 
-        } 
- 
-        __kmp_printf("\n"); 
- 
-        if (in_parallel) { 
-            if (queue->tq_taskq_slot != NULL) { 
-                __kmp_printf("    TaskQ slot:\n"); 
-                __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid ); 
-                __kmp_printf("\n"); 
-            } 
-            //__kmp_release_lock(& queue->tq_queue_lck, global_tid); 
-            //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); 
-        } 
-    } 
- 
-    __kmp_printf("    Taskq freelist: "); 
- 
-    //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); 
- 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-    for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free ) 
-        __kmp_printf("%p ", taskq); 
- 
-    //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); 
- 
-    __kmp_printf("\n\n"); 
-} 
- 
-static void 
-__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid ) 
-{ 
-    int i, count, qs; 
-    int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; 
-    kmpc_task_queue_t *queue = curr_queue; 
- 
-    if (curr_queue == NULL) 
-        return; 
- 
-    __kmp_printf("    "); 
- 
-    for (i=0; i<level; i++) 
-        __kmp_printf("  "); 
- 
-    __kmp_printf("%p", curr_queue); 
- 
-    for (i = 0; i < nproc; i++) { 
-        if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) { 
-            __kmp_printf(" [%i]", i); 
-        } 
-    } 
- 
-    __kmp_printf(":"); 
- 
-    //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); 
- 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-    qs = curr_queue->tq_tail; 
- 
-    for ( count = 0; count < curr_queue->tq_nfull; ++count ) { 
-        __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); 
-         qs = (qs+1) % curr_queue->tq_nslots; 
-    } 
- 
-    //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); 
- 
-    __kmp_printf("\n"); 
- 
-    if (curr_queue->tq_first_child) { 
-        //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-        if (curr_queue->tq_first_child) { 
-            for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; 
-                queue != NULL; 
-                queue = queue->tq_next_child) { 
-                __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid ); 
-            } 
-        } 
- 
-        //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-    } 
-} 
- 
-static void 
-__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid) 
-{ 
-    __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); 
- 
-    __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid ); 
- 
-    __kmp_printf("\n"); 
-} 
-#endif 
- 
-/* --------------------------------------------------------------------------- */ 
- 
-/* 
-    New taskq storage routines that try to minimize overhead of mallocs but 
-    still provide cache line alignment. 
-*/ 
- 
- 
-static void * 
-__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) 
-{ 
-    void *addr, *orig_addr; 
-    size_t bytes; 
- 
-    KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) ); 
- 
-    bytes = sizeof(void *) + CACHE_LINE + size; 
- 
-#ifdef THREAD_ALLOC_FOR_TASKQ 
-    orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes ); 
-#else 
-    KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) ); 
-    orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes ); 
-#endif /* THREAD_ALLOC_FOR_TASKQ */ 
- 
-    if (orig_addr == 0) 
-        KMP_FATAL( OutOfHeapMemory ); 
- 
-    addr = orig_addr; 
- 
-    if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) { 
-        KB_TRACE( 50, ("__kmp_taskq_allocate:  adjust for cache alignment\n" ) ); 
-        addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 )); 
-    } 
- 
-    (* (void **) addr) = orig_addr; 
- 
-    KB_TRACE( 10, ("__kmp_taskq_allocate:  allocate: %p, use: %p - %p, size: %d, gtid: %d\n", 
-             orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1, 
-             (int) size, global_tid )); 
- 
-    return ( ((void **) addr) + 1 ); 
-} 
- 
-static void 
-__kmpc_taskq_free(void *p, kmp_int32 global_tid) 
-{ 
-    KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) ); 
- 
-    KB_TRACE(10, ("__kmpc_taskq_free:  freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid )); 
- 
-#ifdef THREAD_ALLOC_FOR_TASKQ 
-    __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) ); 
-#else 
-    KMP_INTERNAL_FREE( *( ((void **) p)-1) ); 
-#endif /* THREAD_ALLOC_FOR_TASKQ */ 
-} 
- 
-/* --------------------------------------------------------------------------- */ 
- 
-/* 
- *      Keep freed kmpc_task_queue_t on an internal freelist and recycle since 
- *      they're of constant size. 
- */ 
- 
-static kmpc_task_queue_t * 
-__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks, 
-                    kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk, 
-                    size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid ) 
-{ 
-    kmp_int32                  i; 
-    size_t                     bytes; 
-    kmpc_task_queue_t          *new_queue; 
-    kmpc_aligned_shared_vars_t *shared_var_array; 
-    char                       *shared_var_storage; 
-    char                       *pt; /* for doing byte-adjusted address computations */ 
- 
-    __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); 
- 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-    if( tq->tq_freelist ) { 
-        new_queue =  tq -> tq_freelist; 
-        tq -> tq_freelist =  tq -> tq_freelist -> tq.tq_next_free; 
- 
-        KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED); 
- 
-        new_queue->tq_flags = 0; 
- 
-        __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); 
-    } 
-    else { 
-        __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); 
- 
-        new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid); 
-        new_queue->tq_flags = 0; 
-    } 
- 
-    /*  space in the task queue for queue slots (allocate as one big chunk */ 
-    /* of storage including new_taskq_task space)                          */ 
- 
-    sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));         /* pad to cache line size */ 
-    pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid); 
-    new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; 
-    *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); 
- 
-    /*  chain the allocated thunks into a freelist for this queue  */ 
- 
-    new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; 
- 
-    for (i = 0; i < (nthunks - 2); i++) { 
-        ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk); 
-#ifdef KMP_DEBUG 
-        ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED; 
-#endif 
-    } 
- 
-    ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL; 
-#ifdef KMP_DEBUG 
-    ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED; 
-#endif 
- 
-    /* initialize the locks */ 
- 
-    if (in_parallel) { 
-        __kmp_init_lock( & new_queue->tq_link_lck ); 
-        __kmp_init_lock( & new_queue->tq_free_thunks_lck ); 
-        __kmp_init_lock( & new_queue->tq_queue_lck ); 
-    } 
- 
-    /* now allocate the slots */ 
- 
-    bytes = nslots * sizeof (kmpc_aligned_queue_slot_t); 
-    new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid ); 
- 
-    /*  space for array of pointers to shared variable structures */ 
-    sizeof_shareds += sizeof(kmpc_task_queue_t *); 
-    sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));     /* pad to cache line size */ 
- 
-    bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t); 
-    shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid); 
- 
-    bytes = nshareds * sizeof_shareds; 
-    shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid); 
- 
-    for (i=0; i<nshareds; i++) { 
-        shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds); 
-        shared_var_array[i].ai_data->sv_queue = new_queue; 
-    } 
-    new_queue->tq_shareds = shared_var_array; 
- 
- 
-    /* array for number of outstanding thunks per thread */ 
- 
-    if (in_parallel) { 
-        bytes = nproc * sizeof(kmpc_aligned_int32_t); 
-        new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid); 
-        new_queue->tq_nproc     = nproc; 
- 
-        for (i=0; i<nproc; i++) 
-            new_queue->tq_th_thunks[i].ai_data = 0; 
-    } 
- 
-    return new_queue; 
-} 
- 
-static void 
-__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid) 
-{ 
-    __kmpc_taskq_free(p->tq_thunk_space, global_tid); 
-    __kmpc_taskq_free(p->tq_queue, global_tid); 
- 
-    /* free shared var structure storage */ 
-    __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid); 
- 
-    /* free array of pointers to shared vars storage */ 
-    __kmpc_taskq_free(p->tq_shareds, global_tid); 
- 
-#ifdef KMP_DEBUG 
-    p->tq_first_child = NULL; 
-    p->tq_next_child = NULL; 
-    p->tq_prev_child = NULL; 
-    p->tq_ref_count = -10; 
-    p->tq_shareds = NULL; 
-    p->tq_tasknum_queuing = 0; 
-    p->tq_tasknum_serving = 0; 
-    p->tq_queue = NULL; 
-    p->tq_thunk_space = NULL; 
-    p->tq_taskq_slot = NULL; 
-    p->tq_free_thunks = NULL; 
-    p->tq_nslots = 0; 
-    p->tq_head = 0; 
-    p->tq_tail = 0; 
-    p->tq_nfull = 0; 
-    p->tq_hiwat = 0; 
- 
-    if (in_parallel) { 
-        int i; 
- 
-        for (i=0; i<p->tq_nproc; i++) 
-            p->tq_th_thunks[i].ai_data = 0; 
-    } 
-    if ( __kmp_env_consistency_check ) 
-        p->tq_loc = NULL; 
-    KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED ); 
-    p->tq_flags = TQF_DEALLOCATED; 
-#endif /* KMP_DEBUG */ 
- 
-    if (in_parallel)  { 
-        __kmpc_taskq_free(p->tq_th_thunks, global_tid); 
-        __kmp_destroy_lock(& p->tq_link_lck); 
-        __kmp_destroy_lock(& p->tq_queue_lck); 
-        __kmp_destroy_lock(& p->tq_free_thunks_lck); 
-    } 
-#ifdef KMP_DEBUG 
-    p->tq_th_thunks = NULL; 
-#endif /* KMP_DEBUG */ 
- 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-    __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); 
-    p->tq.tq_next_free = tq->tq_freelist; 
- 
-    tq->tq_freelist = p; 
-    __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); 
-} 
- 
-/* 
- *    Once a group of thunks has been allocated for use in a particular queue, 
- *    these are managed via a per-queue freelist. 
- *    We force a check that there's always a thunk free if we need one. 
- */ 
- 
-static kmpc_thunk_t * 
-__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid) 
-{ 
-    kmpc_thunk_t *fl; 
- 
-    if (in_parallel) { 
-        __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
-    } 
- 
-    fl = queue->tq_free_thunks; 
- 
-    KMP_DEBUG_ASSERT (fl != NULL); 
- 
-    queue->tq_free_thunks = fl->th.th_next_free; 
-    fl->th_flags = 0; 
- 
-    if (in_parallel) 
-        __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); 
- 
-    return fl; 
-} 
- 
-static void 
-__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid) 
-{ 
-#ifdef KMP_DEBUG 
-    p->th_task = 0; 
-    p->th_encl_thunk = 0; 
-    p->th_status = 0; 
-    p->th_tasknum = 0; 
-    /* Also could zero pointers to private vars */ 
-#endif 
- 
-    if (in_parallel) { 
-        __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
-    } 
- 
-    p->th.th_next_free = queue->tq_free_thunks; 
-    queue->tq_free_thunks = p; 
- 
-#ifdef KMP_DEBUG 
-    p->th_flags = TQF_DEALLOCATED; 
-#endif 
- 
-    if (in_parallel) 
-        __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); 
-} 
- 
-/* --------------------------------------------------------------------------- */ 
- 
-/*  returns nonzero if the queue just became full after the enqueue  */ 
- 
-static kmp_int32 
-__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel ) 
-{ 
-    kmp_int32    ret; 
- 
-    /*  dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then)  */ 
-    if (in_parallel) { 
-        __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
-    } 
- 
-    KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);  /*  check queue not full  */ 
- 
-    queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; 
- 
-    if (queue->tq_head >= queue->tq_nslots) 
-        queue->tq_head = 0; 
- 
-    (queue->tq_nfull)++; 
- 
-    KMP_MB();   /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */ 
- 
-    ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE; 
- 
-    if (in_parallel) { 
-        /* don't need to wait until workers are released before unlocking */ 
-        __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
- 
-        if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { 
-            /* If just creating the root queue, the worker threads are waiting at */ 
-            /* a join barrier until now, when there's something in the queue for  */ 
-            /* them to do; release them now to do work.                           */ 
-            /* This should only be done when this is the first task enqueued,     */ 
-            /* so reset the flag here also.                                       */ 
- 
-            tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;  /* no lock needed, workers are still in spin mode */ 
- 
-            KMP_MB();   /* avoid releasing barrier twice if taskq_task switches threads */ 
- 
-            __kmpc_end_barrier_master( NULL, global_tid); 
-        } 
-    } 
- 
-    return ret; 
-} 
- 
-static kmpc_thunk_t * 
-__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel) 
-{ 
-    kmpc_thunk_t *pt; 
-    int           tid = __kmp_tid_from_gtid( global_tid ); 
- 
-    KMP_DEBUG_ASSERT (queue->tq_nfull > 0);  /*  check queue not empty  */ 
- 
-    if (queue->tq.tq_parent != NULL && in_parallel) { 
-        int ct; 
-        __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-        ct = ++(queue->tq_ref_count); 
-        __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", 
-          __LINE__, global_tid, queue, ct)); 
-    } 
- 
-    pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; 
- 
-    if (queue->tq_tail >= queue->tq_nslots) 
-        queue->tq_tail = 0; 
- 
-    if (in_parallel) { 
-        queue->tq_th_thunks[tid].ai_data++; 
- 
-        KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */ 
- 
-        KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n", 
-            global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue)); 
-    } 
- 
-    (queue->tq_nfull)--; 
- 
-#ifdef KMP_DEBUG 
-    KMP_MB(); 
- 
-    /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */ 
- 
-    KMP_DEBUG_ASSERT(queue->tq_nfull >= 0); 
- 
-    if (in_parallel) { 
-        KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH); 
-    } 
-#endif 
- 
-    return pt; 
-} 
- 
-/* 
- * Find the next (non-null) task to dequeue and return it. 
- * This is never called unless in_parallel=TRUE 
- * 
- * Here are the rules for deciding which queue to take the task from: 
- * 1.  Walk up the task queue tree from the current queue's parent and look 
- *      on the way up (for loop, below). 
- * 2.  Do a depth-first search back down the tree from the root and 
- *      look (find_task_in_descendant_queue()). 
- * 
- * Here are the rules for deciding which task to take from a queue 
- * (__kmp_find_task_in_queue ()): 
- * 1.  Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task 
- *     must be staged to make sure we execute the last one with 
- *     TQF_IS_LAST_TASK at the end of task queue execution. 
- * 2.  If the queue length is below some high water mark and the taskq task 
- *     is enqueued, prefer running the taskq task. 
- * 3.  Otherwise, take a (normal) task from the queue. 
- * 
- * If we do all this and return pt == NULL at the bottom of this routine, 
- * this means there are no more tasks to execute (except possibly for 
- * TQF_IS_LASTPRIVATE). 
- */ 
- 
-static kmpc_thunk_t * 
-__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue) 
-{ 
-    kmpc_thunk_t *pt  = NULL; 
-    int           tid = __kmp_tid_from_gtid( global_tid ); 
- 
-    /* To prevent deadlock from tq_queue_lck if queue already deallocated */ 
-    if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { 
- 
-        __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); 
- 
-        /* Check again to avoid race in __kmpc_end_taskq() */ 
-        if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-            if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) { 
-                /* if there's enough room in the queue and the dispatcher */ 
-                /* (taskq task) is available, schedule more tasks         */ 
-                pt = (kmpc_thunk_t *) queue->tq_taskq_slot; 
-                queue->tq_taskq_slot = NULL; 
-            } 
-            else if (queue->tq_nfull == 0 || 
-                     queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) { 
-                /* do nothing if no thunks available or this thread can't */ 
-                /* run any because it already is executing too many       */ 
- 
-                pt = NULL; 
-            } 
-            else if (queue->tq_nfull > 1) { 
-                /*  always safe to schedule a task even if TQF_IS_LASTPRIVATE  */ 
- 
-                pt = __kmp_dequeue_task (global_tid, queue, TRUE); 
-            } 
-            else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) { 
-                /*  one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE  */ 
- 
-                pt = __kmp_dequeue_task (global_tid, queue, TRUE); 
-            } 
-            else if (queue->tq_flags & TQF_IS_LAST_TASK) { 
-                /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task()   */ 
-                /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ 
-                /* instrumentation does copy-out.                                  */ 
- 
-                pt = __kmp_dequeue_task (global_tid, queue, TRUE); 
-                pt->th_flags |= TQF_IS_LAST_TASK;  /* don't need test_then_or since already locked */ 
-            } 
-        } 
- 
-        /* GEH - What happens here if is lastprivate, but not last task? */ 
-        __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
-    } 
- 
-    return pt; 
-} 
- 
-/* 
- * Walk a tree of queues starting at queue's first child 
- * and return a non-NULL thunk if one can be scheduled. 
- * Must only be called when in_parallel=TRUE 
- */ 
- 
-static kmpc_thunk_t * 
-__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) 
-{ 
-    kmpc_thunk_t *pt = NULL; 
-    kmpc_task_queue_t *queue = curr_queue; 
- 
-    if (curr_queue->tq_first_child != NULL) { 
-        __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-        queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; 
-        if (queue == NULL) { 
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-            return NULL; 
-        } 
- 
-        while (queue != NULL)  { 
-            int ct; 
-            kmpc_task_queue_t *next; 
- 
-            ct= ++(queue->tq_ref_count); 
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", 
-              __LINE__, global_tid, queue, ct)); 
- 
-            pt = __kmp_find_task_in_queue (global_tid, queue); 
- 
-            if (pt != NULL) { 
-                int ct; 
- 
-                __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-                ct = --(queue->tq_ref_count); 
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-                  __LINE__, global_tid, queue, ct)); 
-                KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); 
- 
-                __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-                return pt; 
-            } 
- 
-            /* although reference count stays active during descendant walk, shouldn't matter  */ 
-            /* since if children still exist, reference counts aren't being monitored anyway   */ 
- 
-            pt = __kmp_find_task_in_descendant_queue (global_tid, queue); 
- 
-            if (pt != NULL) { 
-                int ct; 
- 
-                __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-                ct = --(queue->tq_ref_count); 
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-                  __LINE__, global_tid, queue, ct)); 
-                KMP_DEBUG_ASSERT( ct >= 0 ); 
- 
-                __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-                return pt; 
-            } 
- 
-            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-            next = queue->tq_next_child; 
- 
-            ct = --(queue->tq_ref_count); 
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-              __LINE__, global_tid, queue, ct)); 
-            KMP_DEBUG_ASSERT( ct >= 0 ); 
- 
-            queue = next; 
-        } 
- 
-        __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-    } 
- 
-    return pt; 
-} 
- 
-/* 
- * Walk up the taskq tree looking for a task to execute. 
- * If we get to the root, search the tree for a descendent queue task. 
- * Must only be called when in_parallel=TRUE 
- */ 
- 
-static kmpc_thunk_t * 
-__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) 
-{ 
-    kmpc_task_queue_t *queue; 
-    kmpc_thunk_t      *pt; 
- 
-    pt = NULL; 
- 
-    if (curr_queue->tq.tq_parent != NULL) { 
-        queue = curr_queue->tq.tq_parent; 
- 
-        while (queue != NULL) { 
-            if (queue->tq.tq_parent != NULL) { 
-                int ct; 
-                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-                ct = ++(queue->tq_ref_count); 
-                __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", 
-                  __LINE__, global_tid, queue, ct)); 
-            } 
- 
-            pt = __kmp_find_task_in_queue (global_tid, queue); 
-            if (pt != NULL) { 
-                if (queue->tq.tq_parent != NULL) { 
-                    int ct; 
-                    __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-                    KMP_MB();  /* make sure data structures are in consistent state before querying them   */ 
-                               /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-                    ct = --(queue->tq_ref_count); 
-                    KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-                      __LINE__, global_tid, queue, ct)); 
-                    KMP_DEBUG_ASSERT( ct >= 0 ); 
- 
-                    __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-                } 
- 
-                return pt; 
-            } 
- 
-            if (queue->tq.tq_parent != NULL) { 
-                int ct; 
-                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-                ct = --(queue->tq_ref_count); 
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-                  __LINE__, global_tid, queue, ct)); 
-                KMP_DEBUG_ASSERT( ct >= 0 ); 
-            } 
-            queue = queue->tq.tq_parent; 
- 
-            if (queue != NULL) 
-                __kmp_release_lock(& queue->tq_link_lck, global_tid); 
-        } 
- 
-    } 
- 
-    pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root ); 
- 
-    return pt; 
-} 
- 
-static int 
-__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue) 
-{ 
-    int i; 
- 
-    /* KMP_MB(); *//* is this really necessary? */ 
- 
-    for (i=0; i<queue->tq_nproc; i++) { 
-        if (queue->tq_th_thunks[i].ai_data != 0) 
-            return FALSE; 
-    } 
- 
-    return TRUE; 
-} 
- 
-static int 
-__kmp_taskq_has_any_children (kmpc_task_queue_t *queue) 
-{ 
-    return (queue->tq_first_child != NULL); 
-} 
- 
-static void 
-__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel ) 
-{ 
-#ifdef KMP_DEBUG 
-    kmp_int32     i; 
-    kmpc_thunk_t *thunk; 
-#endif 
- 
-    KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); 
-    KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid )); 
- 
-    /*  sub-queue in a recursion, not the root task queue  */ 
-    KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL); 
- 
-    if (in_parallel) { 
-        __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
-    } 
- 
-    KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); 
- 
-    /*  unlink queue from its siblings if any at this level  */ 
-    if (queue->tq_prev_child != NULL) 
-        queue->tq_prev_child->tq_next_child = queue->tq_next_child; 
-    if (queue->tq_next_child != NULL) 
-        queue->tq_next_child->tq_prev_child = queue->tq_prev_child; 
-    if (queue->tq.tq_parent->tq_first_child == queue) 
-        queue->tq.tq_parent->tq_first_child = queue->tq_next_child; 
- 
-    queue->tq_prev_child = NULL; 
-    queue->tq_next_child = NULL; 
- 
-    if (in_parallel) { 
-        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n", 
-          __LINE__, global_tid, queue, queue->tq_ref_count)); 
- 
-        /* wait until all other threads have stopped accessing this queue */ 
-        while (queue->tq_ref_count > 1) { 
-            __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-            KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL); 
- 
-            __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
-        } 
- 
-        __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-    } 
- 
-    KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n", 
-      __LINE__, global_tid, queue)); 
- 
-#ifdef KMP_DEBUG 
-    KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED); 
-    KMP_DEBUG_ASSERT(queue->tq_nfull == 0); 
- 
-    for (i=0; i<queue->tq_nproc; i++) { 
-        KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); 
-    } 
- 
-    i = 0; 
-    for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) 
-        ++i; 
- 
-    KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)); 
-#endif 
- 
-    /*  release storage for queue entry  */ 
-    __kmp_free_taskq ( tq, queue, TRUE, global_tid ); 
- 
-    KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); 
-    KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); 
-} 
- 
-/* 
- * Starting from indicated queue, proceed downward through tree and 
- * remove all taskqs which are finished, but only go down to taskqs 
- * which have the "nowait" clause present.  Assume this is only called 
- * when in_parallel=TRUE. 
- */ 
- 
-static void 
-__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue ) 
-{ 
-    kmpc_task_queue_t *queue = curr_queue; 
- 
-    if (curr_queue->tq_first_child != NULL) { 
-        __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-        queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; 
-        if (queue != NULL) { 
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-            return; 
-        } 
- 
-        while (queue != NULL)  { 
-            kmpc_task_queue_t *next; 
-            int ct = ++(queue->tq_ref_count); 
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", 
-              __LINE__, global_tid, queue, ct)); 
- 
- 
-            /* although reference count stays active during descendant walk, */ 
-            /* shouldn't matter since if children still exist, reference     */ 
-            /* counts aren't being monitored anyway                          */ 
- 
-            if (queue->tq_flags & TQF_IS_NOWAIT) { 
-                __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue ); 
- 
-                if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) && 
-                    __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) { 
- 
-                    /* 
-                     Only remove this if we have not already marked it for deallocation. 
-                     This should prevent multiple threads from trying to free this. 
-                     */ 
- 
-                    if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) { 
-                        if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { 
-                            queue->tq_flags |= TQF_DEALLOCATED; 
-                            __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
- 
-                            __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); 
- 
-                            /* Can't do any more here since can't be sure where sibling queue is so just exit this level */ 
-                            return; 
-                        } 
-                        else { 
-                            __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
-                        } 
-                    } 
-                    /* otherwise, just fall through and decrement reference count */ 
-                } 
-            } 
- 
-            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-            next = queue->tq_next_child; 
- 
-            ct = --(queue->tq_ref_count); 
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-              __LINE__, global_tid, queue, ct)); 
-            KMP_DEBUG_ASSERT( ct >= 0 ); 
- 
-            queue = next; 
-        } 
- 
-        __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-    } 
-} 
- 
-/* 
- * Starting from indicated queue, proceed downward through tree and 
- * remove all taskq's assuming all are finished and 
- * assuming NO other threads are executing at this point. 
- */ 
- 
-static void 
-__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue ) 
-{ 
-    kmpc_task_queue_t *next_child; 
- 
-    queue = (kmpc_task_queue_t *) queue->tq_first_child; 
- 
-    while (queue != NULL)  { 
-        __kmp_remove_all_child_taskq ( tq, global_tid, queue ); 
- 
-        next_child = queue->tq_next_child; 
-        queue->tq_flags |= TQF_DEALLOCATED; 
-        __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE ); 
-        queue = next_child; 
-    } 
-} 
- 
-static void 
-__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel ) 
-{ 
-    kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; 
-    kmp_int32          tid   = __kmp_tid_from_gtid( global_tid ); 
- 
-    KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid)); 
-    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); 
-    KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid)); 
-    KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); 
- 
-    /* 
-     * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows: 
-     * 
-     * happens exactly once: 
-     * 1) __kmpc_taskq             : push (if returning thunk only) 
-     * 4) __kmpc_end_taskq_task    : pop 
-     * 
-     * optionally happens *each* time taskq task is dequeued/enqueued: 
-     * 2) __kmpc_taskq_task        : pop 
-     * 3) __kmp_execute_task_from_queue  : push 
-     * 
-     * execution ordering:  1,(2,3)*,4 
-     */ 
- 
-    if (!(thunk->th_flags & TQF_TASKQ_TASK)) { 
-        kmp_int32 index = (queue == tq->tq_root) ? tid : 0; 
-        thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data; 
- 
-        if ( __kmp_env_consistency_check ) { 
-            __kmp_push_workshare( global_tid, 
-                    (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, 
-                    queue->tq_loc ); 
-        } 
-    } 
-    else { 
-        if ( __kmp_env_consistency_check ) 
-            __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc ); 
-    } 
- 
-    if (in_parallel) { 
-        thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; 
-        tq->tq_curr_thunk[tid] = thunk; 
- 
-        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); 
-    } 
- 
-    KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); 
-    thunk->th_task (global_tid, thunk); 
-    KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); 
- 
-    if (!(thunk->th_flags & TQF_TASKQ_TASK)) { 
-        if ( __kmp_env_consistency_check ) 
-            __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, 
-                                 queue->tq_loc ); 
- 
-        if (in_parallel) { 
-            tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; 
-            thunk->th_encl_thunk = NULL; 
-            KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); 
-        } 
- 
-        if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) { 
-            __kmp_taskq_check_ordered(global_tid, thunk); 
-        } 
- 
-        __kmp_free_thunk (queue, thunk, in_parallel, global_tid); 
- 
-        KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk)); 
-        KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); 
- 
-        if (in_parallel) { 
-            KMP_MB();   /* needed so thunk put on free list before outstanding thunk count is decremented */ 
- 
-            KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1); 
- 
-            KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n", 
-                global_tid, queue->tq_th_thunks[tid].ai_data-1, queue)); 
- 
-            queue->tq_th_thunks[tid].ai_data--; 
- 
-            /* KMP_MB(); */     /* is MB really necessary ? */ 
-        } 
- 
-        if (queue->tq.tq_parent != NULL && in_parallel) { 
-            int ct; 
-            __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-            ct = --(queue->tq_ref_count); 
-            __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", 
-              __LINE__, global_tid, queue, ct)); 
-            KMP_DEBUG_ASSERT( ct >= 0 ); 
-        } 
-    } 
-} 
- 
-/* --------------------------------------------------------------------------- */ 
- 
-/* starts a taskq; creates and returns a thunk for the taskq_task        */ 
-/* also, returns pointer to shared vars for this thread in "shareds" arg */ 
- 
-kmpc_thunk_t * 
-__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, 
-              size_t sizeof_thunk, size_t sizeof_shareds, 
-              kmp_int32 flags, kmpc_shared_vars_t **shareds ) 
-{ 
-    int                  in_parallel; 
-    kmp_int32            nslots, nthunks, nshareds, nproc; 
-    kmpc_task_queue_t   *new_queue, *curr_queue; 
-    kmpc_thunk_t        *new_taskq_thunk; 
-    kmp_info_t          *th; 
-    kmp_team_t          *team; 
-    kmp_taskq_t         *tq; 
-    kmp_int32            tid; 
- 
-    KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid)); 
- 
-    th = __kmp_threads[ global_tid ]; 
-    team = th -> th.th_team; 
-    tq = & team -> t.t_taskq; 
-    nproc = team -> t.t_nproc; 
-    tid = __kmp_tid_from_gtid( global_tid ); 
- 
-    /* find out whether this is a parallel taskq or serialized one. */ 
-    in_parallel = in_parallel_context( team ); 
- 
-    if( ! tq->tq_root ) { 
-        if (in_parallel) { 
-            /* Vector ORDERED SECTION to taskq version */ 
-            th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; 
- 
-            /* Vector ORDERED SECTION to taskq version */ 
-            th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; 
-        } 
- 
-        if (in_parallel) { 
-            /* This shouldn't be a barrier region boundary, it will confuse the user. */ 
-            /* Need the boundary to be at the end taskq instead. */ 
-            if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { 
-                /* Creating the active root queue, and we are not the master thread. */ 
-                /* The master thread below created the queue and tasks have been     */ 
-                /* enqueued, and the master thread released this barrier.  This      */ 
-                /* worker thread can now proceed and execute tasks.  See also the    */ 
-                /* TQF_RELEASE_WORKERS which is used to handle this case.            */ 
- 
-                *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data; 
- 
-                KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); 
- 
-                return NULL; 
-            } 
-        } 
- 
-        /* master thread only executes this code */ 
- 
-        if( tq->tq_curr_thunk_capacity < nproc ) { 
-            if(tq->tq_curr_thunk) 
-                __kmp_free(tq->tq_curr_thunk); 
-            else { 
-                /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */ 
-                __kmp_init_lock( & tq->tq_freelist_lck ); 
-            } 
- 
-            tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) ); 
-            tq -> tq_curr_thunk_capacity = nproc; 
-        } 
- 
-        if (in_parallel) 
-            tq->tq_global_flags = TQF_RELEASE_WORKERS; 
-    } 
- 
-    /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ 
-    /*      on some heuristics (e.g., depth of queue nesting?).            */ 
- 
-    nslots = (in_parallel) ? (2 * nproc) : 1; 
- 
-    /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ 
-    /* jobs being executed by other threads, and one extra for taskq slot          */ 
- 
-    nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2; 
- 
-    /* Only the root taskq gets a per-thread array of shareds.       */ 
-    /* The rest of the taskq's only get one copy of the shared vars. */ 
- 
-    nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1; 
- 
-    /*  create overall queue data structure and its components that require allocation */ 
- 
-    new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc, 
-        sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid ); 
- 
-    /*  rest of new_queue initializations  */ 
- 
-    new_queue->tq_flags           = flags & TQF_INTERFACE_FLAGS; 
- 
-    if (in_parallel) { 
-        new_queue->tq_tasknum_queuing  = 0; 
-        new_queue->tq_tasknum_serving  = 0; 
-        new_queue->tq_flags           |= TQF_PARALLEL_CONTEXT; 
-    } 
- 
-    new_queue->tq_taskq_slot   = NULL; 
-    new_queue->tq_nslots       = nslots; 
-    new_queue->tq_hiwat        = HIGH_WATER_MARK (nslots); 
-    new_queue->tq_nfull        = 0; 
-    new_queue->tq_head         = 0; 
-    new_queue->tq_tail         = 0; 
-    new_queue->tq_loc          = loc; 
- 
-    if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) { 
-        /* prepare to serve the first-queued task's ORDERED directive */ 
-        new_queue->tq_tasknum_serving = 1; 
- 
-        /* Vector ORDERED SECTION to taskq version */ 
-        th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; 
- 
-        /* Vector ORDERED SECTION to taskq version */ 
-        th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; 
-    } 
- 
-    /*  create a new thunk for the taskq_task in the new_queue  */ 
-    *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data; 
- 
-    new_taskq_thunk->th.th_shareds = *shareds; 
-    new_taskq_thunk->th_task       = taskq_task; 
-    new_taskq_thunk->th_flags      = new_queue->tq_flags | TQF_TASKQ_TASK; 
-    new_taskq_thunk->th_status     = 0; 
- 
-    KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK); 
- 
-    /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */ 
- 
-    /* insert the new task queue into the tree, but only after all fields initialized */ 
- 
-    if (in_parallel) { 
-        if( ! tq->tq_root ) { 
-            new_queue->tq.tq_parent   = NULL; 
-            new_queue->tq_first_child = NULL; 
-            new_queue->tq_next_child  = NULL; 
-            new_queue->tq_prev_child  = NULL; 
-            new_queue->tq_ref_count   = 1; 
-            tq->tq_root = new_queue; 
-        } 
-        else { 
-            curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; 
-            new_queue->tq.tq_parent   = curr_queue; 
-            new_queue->tq_first_child = NULL; 
-            new_queue->tq_prev_child  = NULL; 
-            new_queue->tq_ref_count   = 1;      /* for this the thread that built the queue */ 
- 
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", 
-              __LINE__, global_tid, new_queue, new_queue->tq_ref_count)); 
- 
-            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-            new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child; 
- 
-            if (curr_queue->tq_first_child != NULL) 
-                curr_queue->tq_first_child->tq_prev_child = new_queue; 
- 
-            curr_queue->tq_first_child = new_queue; 
- 
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); 
-        } 
- 
-        /* set up thunk stack only after code that determines curr_queue above */ 
-        new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; 
-        tq->tq_curr_thunk[tid] = new_taskq_thunk; 
- 
-        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); 
-    } 
-    else { 
-        new_taskq_thunk->th_encl_thunk = 0; 
-        new_queue->tq.tq_parent   = NULL; 
-        new_queue->tq_first_child = NULL; 
-        new_queue->tq_next_child  = NULL; 
-        new_queue->tq_prev_child  = NULL; 
-        new_queue->tq_ref_count   = 1; 
-    } 
- 
-#ifdef KMP_DEBUG 
-    KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid)); 
-    KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid )); 
- 
-    if (in_parallel) { 
-        KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); 
-    } else { 
-        KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); 
-    } 
- 
-    KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid )); 
- 
-    if (in_parallel) { 
-        KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); 
-    } 
-#endif /* KMP_DEBUG */ 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc ); 
- 
-    KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); 
- 
-    return new_taskq_thunk; 
-} 
- 
- 
-/*  ends a taskq; last thread out destroys the queue  */ 
- 
-void 
-__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk) 
-{ 
-#ifdef KMP_DEBUG 
-    kmp_int32           i; 
-#endif 
-    kmp_taskq_t        *tq; 
-    int                 in_parallel; 
-    kmp_info_t         *th; 
-    kmp_int32           is_outermost; 
-    kmpc_task_queue_t  *queue; 
-    kmpc_thunk_t       *thunk; 
-    int                 nproc; 
- 
-    KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid)); 
- 
-    tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq; 
-    nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; 
- 
-    /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */ 
-    queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue; 
- 
-    KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid)); 
-    is_outermost = (queue == tq->tq_root); 
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); 
- 
-    if (in_parallel) { 
-        kmp_uint32 spins; 
- 
-        /* this is just a safeguard to release the waiting threads if */ 
-        /* the outermost taskq never queues a task                    */ 
- 
-        if (is_outermost && (KMP_MASTER_GTID( global_tid ))) { 
-            if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { 
-                /* no lock needed, workers are still in spin mode */ 
-                tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; 
- 
-                __kmp_end_split_barrier( bs_plain_barrier, global_tid ); 
-            } 
-        } 
- 
-        /* keep dequeueing work until all tasks are queued and dequeued */ 
- 
-        do { 
-            /* wait until something is available to dequeue */ 
-            KMP_INIT_YIELD(spins); 
- 
-            while ( (queue->tq_nfull == 0) 
-                 && (queue->tq_taskq_slot == NULL) 
-                 && (! __kmp_taskq_has_any_children(queue) ) 
-                 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) ) 
-                  ) { 
-                KMP_YIELD_WHEN( TRUE, spins ); 
-            } 
- 
-            /* check to see if we can execute tasks in the queue */ 
-            while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) ) 
-                 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL 
-                  ) { 
-                KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid)); 
-                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); 
-            } 
- 
-            /* see if work found can be found in a descendant queue */ 
-            if ( (__kmp_taskq_has_any_children(queue)) 
-              && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL 
-               ) { 
- 
-                KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", 
-                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid )); 
- 
-                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); 
-            } 
- 
-        } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED)) 
-               || (queue->tq_nfull != 0) 
-                ); 
- 
-        KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid)); 
- 
-        /* wait while all tasks are not finished and more work found 
-           in descendant queues */ 
- 
-        while ( (!__kmp_taskq_tasks_finished(queue)) 
-             && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL 
-              ) { 
- 
-            KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", 
-                thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); 
- 
-            __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); 
-        } 
- 
-        KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid)); 
- 
-        if (!is_outermost) { 
-            /* need to return if NOWAIT present and not outermost taskq */ 
- 
-            if (queue->tq_flags & TQF_IS_NOWAIT) { 
-                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
-                queue->tq_ref_count--; 
-                KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); 
-                __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); 
- 
-                KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid)); 
- 
-                return; 
-            } 
- 
-            __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); 
- 
-            /* WAIT until all tasks are finished and no child queues exist before proceeding */ 
-            KMP_INIT_YIELD(spins); 
- 
-            while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) { 
-                thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue ); 
- 
-                if (thunk != NULL) { 
-                    KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n", 
-                                  thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); 
-                    __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); 
-                } 
- 
-                KMP_YIELD_WHEN( thunk == NULL, spins ); 
- 
-                __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); 
-            } 
- 
-            __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); 
-            if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { 
-                queue->tq_flags |= TQF_DEALLOCATED; 
-            } 
-            __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
- 
-            /* only the allocating thread can deallocate the queue */ 
-            if (taskq_thunk != NULL) { 
-                __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); 
-            } 
- 
-            KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid)); 
- 
-            return; 
-        } 
- 
-        /* Outermost Queue: steal work from descendants until all tasks are finished */ 
- 
-        KMP_INIT_YIELD(spins); 
- 
-        while (!__kmp_taskq_tasks_finished(queue)) { 
-            thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); 
- 
-            if (thunk != NULL) { 
-                KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", 
-                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); 
- 
-                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); 
-            } 
- 
-            KMP_YIELD_WHEN( thunk == NULL, spins ); 
-        } 
- 
-        /* Need this barrier to prevent destruction of queue before threads have all executed above code */ 
-        /* This may need to be done earlier when NOWAIT is implemented for the outermost level */ 
- 
-        if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { 
-            /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here;   */ 
-            /* for right now, everybody waits, and the master thread destroys the  */ 
-            /* remaining queues.                                                   */ 
- 
-            __kmp_remove_all_child_taskq( tq, global_tid, queue ); 
- 
-            /* Now destroy the root queue */ 
-            KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue )); 
-            KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); 
- 
-#ifdef KMP_DEBUG 
-            /*  the root queue entry  */ 
-            KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL)); 
- 
-            /*  children must all be gone by now because of barrier above */ 
-            KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); 
- 
-            for (i=0; i<nproc; i++) { 
-                KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); 
-            } 
- 
-            for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free); 
- 
-            KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH)); 
- 
-            for (i = 0; i < nproc; i++) { 
-                KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] ); 
-            } 
-#endif 
-            /*  unlink the root queue entry  */ 
-            tq -> tq_root =  NULL; 
- 
-            /*  release storage for root queue entry  */ 
-            KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid)); 
- 
-            queue->tq_flags |= TQF_DEALLOCATED; 
-            __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); 
- 
-            KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); 
- 
-            /* release the workers now that the data structures are up to date */ 
-            __kmp_end_split_barrier( bs_plain_barrier, global_tid ); 
-        } 
- 
-        th = __kmp_threads[ global_tid ]; 
- 
-        /* Reset ORDERED SECTION to parallel version */ 
-        th->th.th_dispatch->th_deo_fcn = 0; 
- 
-        /* Reset ORDERED SECTION to parallel version */ 
-        th->th.th_dispatch->th_dxo_fcn = 0; 
-    } 
-    else { 
-        /* in serial execution context, dequeue the last task  */ 
-        /* and execute it, if there were any tasks encountered */ 
- 
-        if (queue->tq_nfull > 0) { 
-            KMP_DEBUG_ASSERT(queue->tq_nfull == 1); 
- 
-            thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); 
- 
-            if (queue->tq_flags & TQF_IS_LAST_TASK) { 
-                /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ 
-                /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ 
-                /* instrumentation does copy-out.                                  */ 
- 
-                /* no need for test_then_or call since already locked */ 
-                thunk->th_flags |= TQF_IS_LAST_TASK; 
-            } 
- 
-            KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue)); 
- 
-            __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); 
-        } 
- 
-        /* destroy the unattached serial queue now that there is no more work to do */ 
-        KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid)); 
-        KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); 
- 
-#ifdef KMP_DEBUG 
-        i = 0; 
-        for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) 
-            ++i; 
-        KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1); 
-#endif 
-        /*  release storage for unattached serial queue  */ 
-        KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid)); 
- 
-        queue->tq_flags |= TQF_DEALLOCATED; 
-        __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); 
-    } 
- 
-    KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid)); 
-} 
- 
-/*  Enqueues a task for thunk previously created by __kmpc_task_buffer. */ 
-/*  Returns nonzero if just filled up queue  */ 
- 
-kmp_int32 
-__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) 
-{ 
-    kmp_int32          ret; 
-    kmpc_task_queue_t *queue; 
-    int                in_parallel; 
-    kmp_taskq_t       *tq; 
- 
-    KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid)); 
- 
-    KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));  /*  thunk->th_task is a regular task  */ 
- 
-    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; 
-    queue       = thunk->th.th_shareds->sv_queue; 
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); 
- 
-    if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED)) 
-        thunk->th_tasknum = ++queue->tq_tasknum_queuing; 
- 
-    /* For serial execution dequeue the preceding task and execute it, if one exists */ 
-    /* This cannot be the last task.  That one is handled in __kmpc_end_taskq */ 
- 
-    if (!in_parallel && queue->tq_nfull > 0) { 
-        kmpc_thunk_t *prev_thunk; 
- 
-        KMP_DEBUG_ASSERT(queue->tq_nfull == 1); 
- 
-        prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); 
- 
-        KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue)); 
- 
-        __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel ); 
-    } 
- 
-    /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private    */ 
-    /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the     */ 
-    /* task queue is not full and allocates a thunk (which is then passed to         */ 
-    /* __kmpc_task()).  So, the enqueue below should never fail due to a full queue. */ 
- 
-    KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid)); 
-    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); 
- 
-    ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel ); 
- 
-    KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid)); 
-    KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); 
- 
-    KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid)); 
- 
-    return ret; 
-} 
- 
-/*  enqueues a taskq_task for thunk previously created by __kmpc_taskq  */ 
-/*  this should never be called unless in a parallel context            */ 
- 
-void 
-__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status) 
-{ 
-    kmpc_task_queue_t *queue; 
-    kmp_taskq_t       *tq  = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; 
-    int                tid = __kmp_tid_from_gtid( global_tid ); 
- 
-    KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid)); 
-    KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid)); 
-    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); 
- 
-    queue = thunk->th.th_shareds->sv_queue; 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_workshare( global_tid, ct_taskq, loc ); 
- 
-    /*  thunk->th_task is the taskq_task  */ 
-    KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK); 
- 
-    /*  not supposed to call __kmpc_taskq_task if it's already enqueued  */ 
-    KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL); 
- 
-    /* dequeue taskq thunk from curr_thunk stack */ 
-    tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; 
-    thunk->th_encl_thunk = NULL; 
- 
-    KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); 
- 
-    thunk->th_status = status; 
- 
-    KMP_MB();  /*  flush thunk->th_status before taskq_task enqueued to avoid race condition  */ 
- 
-    /*  enqueue taskq_task in thunk into special slot in queue     */ 
-    /* GEH - probably don't need to lock taskq slot since only one */ 
-    /*       thread enqueues & already a lock set at dequeue point */ 
- 
-    queue->tq_taskq_slot = thunk; 
- 
-    KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid)); 
-} 
- 
-/*  ends a taskq_task; done generating tasks  */ 
- 
-void 
-__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) 
-{ 
-    kmp_taskq_t       *tq; 
-    kmpc_task_queue_t *queue; 
-    int                in_parallel; 
-    int                tid; 
- 
-    KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid)); 
- 
-    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; 
-    queue       = thunk->th.th_shareds->sv_queue; 
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); 
-    tid         = __kmp_tid_from_gtid( global_tid ); 
- 
-    if ( __kmp_env_consistency_check ) 
-        __kmp_pop_workshare( global_tid, ct_taskq, loc ); 
- 
-    if (in_parallel) { 
-#if KMP_ARCH_X86 || \ 
-    KMP_ARCH_X86_64 
- 
-        KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED ); 
-#else 
-        { 
-            __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); 
- 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-            queue->tq_flags |= TQF_ALL_TASKS_QUEUED; 
- 
-            __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
-        } 
-#endif 
-    } 
- 
-    if (thunk->th_flags & TQF_IS_LASTPRIVATE) { 
-        /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */ 
-        /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task      */ 
-        /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.  When         */ 
-        /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so    */ 
-        /* we know the last one in the queue is the lastprivate task.  Mark the queue    */ 
-        /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that     */ 
-        /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */ 
-        /* bit signals the instrumented code to do copy-outs after execution).           */ 
- 
-        if (! in_parallel) { 
-            /* No synchronization needed for serial context */ 
-            queue->tq_flags |= TQF_IS_LAST_TASK; 
-        } 
-        else { 
-#if KMP_ARCH_X86 || \ 
-    KMP_ARCH_X86_64 
- 
-            KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK ); 
-#else 
-            { 
-                __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); 
- 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */ 
-                           /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ 
- 
-                queue->tq_flags |= TQF_IS_LAST_TASK; 
- 
-                __kmp_release_lock(& queue->tq_queue_lck, global_tid); 
-            } 
-#endif 
-            /* to prevent race condition where last task is dequeued but */ 
-            /* flag isn't visible yet (not sure about this)              */ 
-            KMP_MB(); 
-        } 
-    } 
- 
-    /* dequeue taskq thunk from curr_thunk stack */ 
-    if (in_parallel) { 
-        tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; 
-        thunk->th_encl_thunk = NULL; 
- 
-        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); 
-    } 
- 
-    KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid)); 
-} 
- 
-/* returns thunk for a regular task based on taskq_thunk              */ 
-/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK)  */ 
- 
-kmpc_thunk_t * 
-__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task) 
-{ 
-    kmp_taskq_t       *tq; 
-    kmpc_task_queue_t *queue; 
-    kmpc_thunk_t      *new_thunk; 
-    int                in_parallel; 
- 
-    KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid)); 
- 
-    KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);  /*  taskq_thunk->th_task is the taskq_task  */ 
- 
-    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; 
-    queue       = taskq_thunk->th.th_shareds->sv_queue; 
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); 
- 
-    /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private */ 
-    /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the  */ 
-    /* task queue is not full and allocates a thunk (which is then passed to      */ 
-    /* __kmpc_task()).  So, we can pre-allocate a thunk here assuming it will be  */ 
-    /* the next to be enqueued in __kmpc_task().                                  */ 
- 
-    new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid); 
-    new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data; 
-    new_thunk->th_encl_thunk = NULL; 
-    new_thunk->th_task       = task; 
- 
-    /* GEH - shouldn't need to lock the read of tq_flags here */ 
-    new_thunk->th_flags      = queue->tq_flags & TQF_INTERFACE_FLAGS; 
- 
-    new_thunk->th_status     = 0; 
- 
-    KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK)); 
- 
-    KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid)); 
-    KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid )); 
- 
-    KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid)); 
- 
-    return new_thunk; 
-} 
- 
-/* --------------------------------------------------------------------------- */ 
+/*
+ * kmp_taskq.c -- TASKQ support for OpenMP.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_error.h"
+
+#define MAX_MESSAGE 512
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Taskq routines and global variables
+ */
+
+#define KMP_DEBUG_REF_CTS(x)    KF_TRACE(1, x);
+
+#define THREAD_ALLOC_FOR_TASKQ
+
+static int
+in_parallel_context( kmp_team_t *team )
+{
+    return ! team -> t.t_serialized;
+}
+
+static void
+__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    int                gtid = *gtid_ref;
+    int                tid  = __kmp_tid_from_gtid( gtid );
+    kmp_uint32         my_token;
+    kmpc_task_queue_t *taskq;
+    kmp_taskq_t       *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
+
+    if ( __kmp_env_consistency_check )
+#if KMP_USE_DYNAMIC_LOCK
+        __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 );
+#else
+        __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
+#endif
+
+    if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        /* GEH - need check here under stats to make sure   */
+        /*       inside task (curr_thunk[*tid_ref] != NULL) */
+
+        my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
+
+        taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
+
+        KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
+        KMP_MB();
+    }
+}
+
+static void
+__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
+{
+    int           gtid = *gtid_ref;
+    int           tid  = __kmp_tid_from_gtid( gtid );
+    kmp_uint32    my_token;
+    kmp_taskq_t  *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
+
+    if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        /* GEH - need check here under stats to make sure */
+        /*       inside task (curr_thunk[tid] != NULL)    */
+
+        my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    }
+}
+
+static void
+__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
+{
+    kmp_uint32 my_token;
+    kmpc_task_queue_t *taskq;
+
+    /* assume we are always called from an active parallel context */
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    my_token =  thunk -> th_tasknum;
+
+    taskq =  thunk -> th.th_shareds -> sv_queue;
+
+    if(taskq->tq_tasknum_serving <= my_token) {
+        KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
+        KMP_MB();
+        taskq->tq_tasknum_serving = my_token +1;
+        KMP_MB();
+    }
+}
+
+#ifdef KMP_DEBUG
+
+static void
+__kmp_dump_TQF(kmp_int32 flags)
+{
+    if (flags & TQF_IS_ORDERED)
+        __kmp_printf("ORDERED ");
+    if (flags & TQF_IS_LASTPRIVATE)
+        __kmp_printf("LAST_PRIV ");
+    if (flags & TQF_IS_NOWAIT)
+        __kmp_printf("NOWAIT ");
+    if (flags & TQF_HEURISTICS)
+        __kmp_printf("HEURIST ");
+    if (flags & TQF_INTERFACE_RESERVED1)
+        __kmp_printf("RESERV1 ");
+    if (flags & TQF_INTERFACE_RESERVED2)
+        __kmp_printf("RESERV2 ");
+    if (flags & TQF_INTERFACE_RESERVED3)
+        __kmp_printf("RESERV3 ");
+    if (flags & TQF_INTERFACE_RESERVED4)
+        __kmp_printf("RESERV4 ");
+    if (flags & TQF_IS_LAST_TASK)
+        __kmp_printf("LAST_TASK ");
+    if (flags & TQF_TASKQ_TASK)
+        __kmp_printf("TASKQ_TASK ");
+    if (flags & TQF_RELEASE_WORKERS)
+        __kmp_printf("RELEASE ");
+    if (flags & TQF_ALL_TASKS_QUEUED)
+        __kmp_printf("ALL_QUEUED ");
+    if (flags & TQF_PARALLEL_CONTEXT)
+        __kmp_printf("PARALLEL ");
+    if (flags & TQF_DEALLOCATED)
+        __kmp_printf("DEALLOC ");
+    if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
+        __kmp_printf("(NONE)");
+}
+
+static void
+__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
+{
+    int i;
+    int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
+
+    __kmp_printf("\tThunk at %p on (%d):  ", thunk, global_tid);
+
+    if (thunk != NULL) {
+        for (i = 0; i < nproc; i++) {
+            if( tq->tq_curr_thunk[i] == thunk ) {
+                __kmp_printf("[%i] ", i);
+            }
+        }
+        __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
+        __kmp_printf("th_task=%p, ", thunk->th_task);
+        __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
+        __kmp_printf("th_status=%d, ", thunk->th_status);
+        __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
+        __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags);
+    }
+
+    __kmp_printf("\n");
+}
+
+static void
+__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
+{
+    kmpc_thunk_t *th;
+
+    __kmp_printf("    Thunk stack for T#%d:  ", thread_num);
+
+    for (th = thunk; th != NULL; th = th->th_encl_thunk )
+        __kmp_printf("%p ", th);
+
+    __kmp_printf("\n");
+}
+
+static void
+__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
+{
+    int                  qs, count, i;
+    kmpc_thunk_t        *thunk;
+    kmpc_task_queue_t   *taskq;
+
+    __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
+
+    if (queue != NULL) {
+        int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
+
+    if ( __kmp_env_consistency_check ) {
+        __kmp_printf("    tq_loc             : ");
+    }
+        if (in_parallel) {
+
+            //if (queue->tq.tq_parent != 0)
+                //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+            //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+            __kmp_printf("    tq_parent          : %p\n", queue->tq.tq_parent);
+            __kmp_printf("    tq_first_child     : %p\n", queue->tq_first_child);
+            __kmp_printf("    tq_next_child      : %p\n", queue->tq_next_child);
+            __kmp_printf("    tq_prev_child      : %p\n", queue->tq_prev_child);
+            __kmp_printf("    tq_ref_count       : %d\n", queue->tq_ref_count);
+
+            //__kmp_release_lock(& queue->tq_link_lck, global_tid);
+
+            //if (queue->tq.tq_parent != 0)
+                //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+            //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
+            //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+        }
+
+        __kmp_printf("    tq_shareds         : ");
+        for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
+            __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
+        __kmp_printf("\n");
+
+        if (in_parallel) {
+            __kmp_printf("    tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
+            __kmp_printf("    tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
+        }
+
+        __kmp_printf("    tq_queue           : %p\n", queue->tq_queue);
+        __kmp_printf("    tq_thunk_space     : %p\n", queue->tq_thunk_space);
+        __kmp_printf("    tq_taskq_slot      : %p\n", queue->tq_taskq_slot);
+
+        __kmp_printf("    tq_free_thunks     : ");
+        for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
+            __kmp_printf("%p ", thunk);
+        __kmp_printf("\n");
+
+        __kmp_printf("    tq_nslots          : %d\n", queue->tq_nslots);
+        __kmp_printf("    tq_head            : %d\n", queue->tq_head);
+        __kmp_printf("    tq_tail            : %d\n", queue->tq_tail);
+        __kmp_printf("    tq_nfull           : %d\n", queue->tq_nfull);
+        __kmp_printf("    tq_hiwat           : %d\n", queue->tq_hiwat);
+        __kmp_printf("    tq_flags           : "); __kmp_dump_TQF(queue->tq_flags);
+        __kmp_printf("\n");
+
+        if (in_parallel) {
+            __kmp_printf("    tq_th_thunks       : ");
+            for (i = 0; i < queue->tq_nproc; i++) {
+                __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
+            }
+            __kmp_printf("\n");
+        }
+
+        __kmp_printf("\n");
+        __kmp_printf("    Queue slots:\n");
+
+
+        qs = queue->tq_tail;
+        for ( count = 0; count < queue->tq_nfull; ++count ) {
+            __kmp_printf("(%d)", qs);
+            __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
+            qs = (qs+1) % queue->tq_nslots;
+        }
+
+        __kmp_printf("\n");
+
+        if (in_parallel) {
+            if (queue->tq_taskq_slot != NULL) {
+                __kmp_printf("    TaskQ slot:\n");
+                __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
+                __kmp_printf("\n");
+            }
+            //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
+            //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+        }
+    }
+
+    __kmp_printf("    Taskq freelist: ");
+
+    //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
+
+    KMP_MB();  /* make sure data structures are in consistent state before querying them */
+               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+    for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
+        __kmp_printf("%p ", taskq);
+
+    //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+
+    __kmp_printf("\n\n");
+}
+
+static void
+__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
+{
+    int i, count, qs;
+    int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
+    kmpc_task_queue_t *queue = curr_queue;
+
+    if (curr_queue == NULL)
+        return;
+
+    __kmp_printf("    ");
+
+    for (i=0; i<level; i++)
+        __kmp_printf("  ");
+
+    __kmp_printf("%p", curr_queue);
+
+    for (i = 0; i < nproc; i++) {
+        if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
+            __kmp_printf(" [%i]", i);
+        }
+    }
+
+    __kmp_printf(":");
+
+    //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
+
+    KMP_MB();  /* make sure data structures are in consistent state before querying them */
+               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+    qs = curr_queue->tq_tail;
+
+    for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
+        __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
+         qs = (qs+1) % curr_queue->tq_nslots;
+    }
+
+    //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
+
+    __kmp_printf("\n");
+
+    if (curr_queue->tq_first_child) {
+        //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+        if (curr_queue->tq_first_child) {
+            for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+                queue != NULL;
+                queue = queue->tq_next_child) {
+                __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
+            }
+        }
+
+        //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+    }
+}
+
+static void
+__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
+{
+    __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
+
+    __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
+
+    __kmp_printf("\n");
+}
+#endif
+
+/* --------------------------------------------------------------------------- */
+
+/*
+    New taskq storage routines that try to minimize overhead of mallocs but
+    still provide cache line alignment.
+*/
+
+
+static void *
+__kmp_taskq_allocate(size_t size, kmp_int32 global_tid)
+{
+    void *addr, *orig_addr;
+    size_t bytes;
+
+    KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) );
+
+    bytes = sizeof(void *) + CACHE_LINE + size;
+
+#ifdef THREAD_ALLOC_FOR_TASKQ
+    orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
+#else
+    KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) );
+    orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes );
+#endif /* THREAD_ALLOC_FOR_TASKQ */
+
+    if (orig_addr == 0)
+        KMP_FATAL( OutOfHeapMemory );
+
+    addr = orig_addr;
+
+    if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
+        KB_TRACE( 50, ("__kmp_taskq_allocate:  adjust for cache alignment\n" ) );
+        addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
+    }
+
+    (* (void **) addr) = orig_addr;
+
+    KB_TRACE( 10, ("__kmp_taskq_allocate:  allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
+             orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1,
+             (int) size, global_tid ));
+
+    return ( ((void **) addr) + 1 );
+}
+
+static void
+__kmpc_taskq_free(void *p, kmp_int32 global_tid)
+{
+    KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
+
+    KB_TRACE(10, ("__kmpc_taskq_free:  freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid ));
+
+#ifdef THREAD_ALLOC_FOR_TASKQ
+    __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) );
+#else
+    KMP_INTERNAL_FREE( *( ((void **) p)-1) );
+#endif /* THREAD_ALLOC_FOR_TASKQ */
+}
+
+/* --------------------------------------------------------------------------- */
+
+/*
+ *      Keep freed kmpc_task_queue_t on an internal freelist and recycle since
+ *      they're of constant size.
+ */
+
+static kmpc_task_queue_t *
+__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
+                    kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk,
+                    size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
+{
+    kmp_int32                  i;
+    size_t                     bytes;
+    kmpc_task_queue_t          *new_queue;
+    kmpc_aligned_shared_vars_t *shared_var_array;
+    char                       *shared_var_storage;
+    char                       *pt; /* for doing byte-adjusted address computations */
+
+    __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
+
+    KMP_MB();  /* make sure data structures are in consistent state before querying them */
+               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+    if( tq->tq_freelist ) {
+        new_queue =  tq -> tq_freelist;
+        tq -> tq_freelist =  tq -> tq_freelist -> tq.tq_next_free;
+
+        KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
+
+        new_queue->tq_flags = 0;
+
+        __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+    }
+    else {
+        __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+
+        new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid);
+        new_queue->tq_flags = 0;
+    }
+
+    /*  space in the task queue for queue slots (allocate as one big chunk */
+    /* of storage including new_taskq_task space)                          */
+
+    sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));         /* pad to cache line size */
+    pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
+    new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
+    *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
+
+    /*  chain the allocated thunks into a freelist for this queue  */
+
+    new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
+
+    for (i = 0; i < (nthunks - 2); i++) {
+        ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
+#ifdef KMP_DEBUG
+        ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
+#endif
+    }
+
+    ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
+#ifdef KMP_DEBUG
+    ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
+#endif
+
+    /* initialize the locks */
+
+    if (in_parallel) {
+        __kmp_init_lock( & new_queue->tq_link_lck );
+        __kmp_init_lock( & new_queue->tq_free_thunks_lck );
+        __kmp_init_lock( & new_queue->tq_queue_lck );
+    }
+
+    /* now allocate the slots */
+
+    bytes = nslots * sizeof (kmpc_aligned_queue_slot_t);
+    new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
+
+    /*  space for array of pointers to shared variable structures */
+    sizeof_shareds += sizeof(kmpc_task_queue_t *);
+    sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));     /* pad to cache line size */
+
+    bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t);
+    shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
+
+    bytes = nshareds * sizeof_shareds;
+    shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid);
+
+    for (i=0; i<nshareds; i++) {
+        shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
+        shared_var_array[i].ai_data->sv_queue = new_queue;
+    }
+    new_queue->tq_shareds = shared_var_array;
+
+
+    /* array for number of outstanding thunks per thread */
+
+    if (in_parallel) {
+        bytes = nproc * sizeof(kmpc_aligned_int32_t);
+        new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
+        new_queue->tq_nproc     = nproc;
+
+        for (i=0; i<nproc; i++)
+            new_queue->tq_th_thunks[i].ai_data = 0;
+    }
+
+    return new_queue;
+}
+
+static void
+__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid)
+{
+    __kmpc_taskq_free(p->tq_thunk_space, global_tid);
+    __kmpc_taskq_free(p->tq_queue, global_tid);
+
+    /* free shared var structure storage */
+    __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid);
+
+    /* free array of pointers to shared vars storage */
+    __kmpc_taskq_free(p->tq_shareds, global_tid);
+
+#ifdef KMP_DEBUG
+    p->tq_first_child = NULL;
+    p->tq_next_child = NULL;
+    p->tq_prev_child = NULL;
+    p->tq_ref_count = -10;
+    p->tq_shareds = NULL;
+    p->tq_tasknum_queuing = 0;
+    p->tq_tasknum_serving = 0;
+    p->tq_queue = NULL;
+    p->tq_thunk_space = NULL;
+    p->tq_taskq_slot = NULL;
+    p->tq_free_thunks = NULL;
+    p->tq_nslots = 0;
+    p->tq_head = 0;
+    p->tq_tail = 0;
+    p->tq_nfull = 0;
+    p->tq_hiwat = 0;
+
+    if (in_parallel) {
+        int i;
+
+        for (i=0; i<p->tq_nproc; i++)
+            p->tq_th_thunks[i].ai_data = 0;
+    }
+    if ( __kmp_env_consistency_check )
+        p->tq_loc = NULL;
+    KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
+    p->tq_flags = TQF_DEALLOCATED;
+#endif /* KMP_DEBUG */
+
+    if (in_parallel)  {
+        __kmpc_taskq_free(p->tq_th_thunks, global_tid);
+        __kmp_destroy_lock(& p->tq_link_lck);
+        __kmp_destroy_lock(& p->tq_queue_lck);
+        __kmp_destroy_lock(& p->tq_free_thunks_lck);
+    }
+#ifdef KMP_DEBUG
+    p->tq_th_thunks = NULL;
+#endif /* KMP_DEBUG */
+
+    KMP_MB();  /* make sure data structures are in consistent state before querying them */
+               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+    __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
+    p->tq.tq_next_free = tq->tq_freelist;
+
+    tq->tq_freelist = p;
+    __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+}
+
+/*
+ *    Once a group of thunks has been allocated for use in a particular queue,
+ *    these are managed via a per-queue freelist.
+ *    We force a check that there's always a thunk free if we need one.
+ */
+
+static kmpc_thunk_t *
+__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid)
+{
+    kmpc_thunk_t *fl;
+
+    if (in_parallel) {
+        __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+    }
+
+    fl = queue->tq_free_thunks;
+
+    KMP_DEBUG_ASSERT (fl != NULL);
+
+    queue->tq_free_thunks = fl->th.th_next_free;
+    fl->th_flags = 0;
+
+    if (in_parallel)
+        __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+
+    return fl;
+}
+
+static void
+__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid)
+{
+#ifdef KMP_DEBUG
+    p->th_task = 0;
+    p->th_encl_thunk = 0;
+    p->th_status = 0;
+    p->th_tasknum = 0;
+    /* Also could zero pointers to private vars */
+#endif
+
+    if (in_parallel) {
+        __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+    }
+
+    p->th.th_next_free = queue->tq_free_thunks;
+    queue->tq_free_thunks = p;
+
+#ifdef KMP_DEBUG
+    p->th_flags = TQF_DEALLOCATED;
+#endif
+
+    if (in_parallel)
+        __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+}
+
+/* --------------------------------------------------------------------------- */
+
+/*  returns nonzero if the queue just became full after the enqueue  */
+
+static kmp_int32
+__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel )
+{
+    kmp_int32    ret;
+
+    /*  dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then)  */
+    if (in_parallel) {
+        __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+    }
+
+    KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);  /*  check queue not full  */
+
+    queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
+
+    if (queue->tq_head >= queue->tq_nslots)
+        queue->tq_head = 0;
+
+    (queue->tq_nfull)++;
+
+    KMP_MB();   /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */
+
+    ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
+
+    if (in_parallel) {
+        /* don't need to wait until workers are released before unlocking */
+        __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+
+        if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
+            /* If just creating the root queue, the worker threads are waiting at */
+            /* a join barrier until now, when there's something in the queue for  */
+            /* them to do; release them now to do work.                           */
+            /* This should only be done when this is the first task enqueued,     */
+            /* so reset the flag here also.                                       */
+
+            tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;  /* no lock needed, workers are still in spin mode */
+
+            KMP_MB();   /* avoid releasing barrier twice if taskq_task switches threads */
+
+            __kmpc_end_barrier_master( NULL, global_tid);
+        }
+    }
+
+    return ret;
+}
+
+static kmpc_thunk_t *
+__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel)
+{
+    kmpc_thunk_t *pt;
+    int           tid = __kmp_tid_from_gtid( global_tid );
+
+    KMP_DEBUG_ASSERT (queue->tq_nfull > 0);  /*  check queue not empty  */
+
+    if (queue->tq.tq_parent != NULL && in_parallel) {
+        int ct;
+        __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+        ct = ++(queue->tq_ref_count);
+        __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
+          __LINE__, global_tid, queue, ct));
+    }
+
+    pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
+
+    if (queue->tq_tail >= queue->tq_nslots)
+        queue->tq_tail = 0;
+
+    if (in_parallel) {
+        queue->tq_th_thunks[tid].ai_data++;
+
+        KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */
+
+        KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
+            global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
+    }
+
+    (queue->tq_nfull)--;
+
+#ifdef KMP_DEBUG
+    KMP_MB();
+
+    /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */
+
+    KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
+
+    if (in_parallel) {
+        KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
+    }
+#endif
+
+    return pt;
+}
+
+/*
+ * Find the next (non-null) task to dequeue and return it.
+ * This is never called unless in_parallel=TRUE
+ *
+ * Here are the rules for deciding which queue to take the task from:
+ * 1.  Walk up the task queue tree from the current queue's parent and look
+ *      on the way up (for loop, below).
+ * 2.  Do a depth-first search back down the tree from the root and
+ *      look (find_task_in_descendant_queue()).
+ *
+ * Here are the rules for deciding which task to take from a queue
+ * (__kmp_find_task_in_queue ()):
+ * 1.  Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task
+ *     must be staged to make sure we execute the last one with
+ *     TQF_IS_LAST_TASK at the end of task queue execution.
+ * 2.  If the queue length is below some high water mark and the taskq task
+ *     is enqueued, prefer running the taskq task.
+ * 3.  Otherwise, take a (normal) task from the queue.
+ *
+ * If we do all this and return pt == NULL at the bottom of this routine,
+ * this means there are no more tasks to execute (except possibly for
+ * TQF_IS_LASTPRIVATE).
+ */
+
+static kmpc_thunk_t *
+__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
+{
+    kmpc_thunk_t *pt  = NULL;
+    int           tid = __kmp_tid_from_gtid( global_tid );
+
+    /* To prevent deadlock from tq_queue_lck if queue already deallocated */
+    if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
+
+        __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+
+        /* Check again to avoid race in __kmpc_end_taskq() */
+        if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+            if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
+                /* if there's enough room in the queue and the dispatcher */
+                /* (taskq task) is available, schedule more tasks         */
+                pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
+                queue->tq_taskq_slot = NULL;
+            }
+            else if (queue->tq_nfull == 0 ||
+                     queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
+                /* do nothing if no thunks available or this thread can't */
+                /* run any because it already is executing too many       */
+
+                pt = NULL;
+            }
+            else if (queue->tq_nfull > 1) {
+                /*  always safe to schedule a task even if TQF_IS_LASTPRIVATE  */
+
+                pt = __kmp_dequeue_task (global_tid, queue, TRUE);
+            }
+            else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
+                /*  one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE  */
+
+                pt = __kmp_dequeue_task (global_tid, queue, TRUE);
+            }
+            else if (queue->tq_flags & TQF_IS_LAST_TASK) {
+                /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task()   */
+                /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
+                /* instrumentation does copy-out.                                  */
+
+                pt = __kmp_dequeue_task (global_tid, queue, TRUE);
+                pt->th_flags |= TQF_IS_LAST_TASK;  /* don't need test_then_or since already locked */
+            }
+        }
+
+        /* GEH - What happens here if is lastprivate, but not last task? */
+        __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+    }
+
+    return pt;
+}
+
+/*
+ * Walk a tree of queues starting at queue's first child
+ * and return a non-NULL thunk if one can be scheduled.
+ * Must only be called when in_parallel=TRUE
+ */
+
+static kmpc_thunk_t *
+__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
+{
+    kmpc_thunk_t *pt = NULL;
+    kmpc_task_queue_t *queue = curr_queue;
+
+    if (curr_queue->tq_first_child != NULL) {
+        __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+        queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
+        if (queue == NULL) {
+            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+            return NULL;
+        }
+
+        while (queue != NULL)  {
+            int ct;
+            kmpc_task_queue_t *next;
+
+            ct= ++(queue->tq_ref_count);
+            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
+              __LINE__, global_tid, queue, ct));
+
+            pt = __kmp_find_task_in_queue (global_tid, queue);
+
+            if (pt != NULL) {
+                int ct;
+
+                __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+                KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+                ct = --(queue->tq_ref_count);
+                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+                  __LINE__, global_tid, queue, ct));
+                KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
+
+                __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+
+                return pt;
+            }
+
+            /* although reference count stays active during descendant walk, shouldn't matter  */
+            /* since if children still exist, reference counts aren't being monitored anyway   */
+
+            pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
+
+            if (pt != NULL) {
+                int ct;
+
+                __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+                KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+                ct = --(queue->tq_ref_count);
+                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+                  __LINE__, global_tid, queue, ct));
+                KMP_DEBUG_ASSERT( ct >= 0 );
+
+                __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+
+                return pt;
+            }
+
+            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+            next = queue->tq_next_child;
+
+            ct = --(queue->tq_ref_count);
+            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+              __LINE__, global_tid, queue, ct));
+            KMP_DEBUG_ASSERT( ct >= 0 );
+
+            queue = next;
+        }
+
+        __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+    }
+
+    return pt;
+}
+
+/*
+ * Walk up the taskq tree looking for a task to execute.
+ * If we get to the root, search the tree for a descendent queue task.
+ * Must only be called when in_parallel=TRUE
+ */
+
+static kmpc_thunk_t *
+__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
+{
+    kmpc_task_queue_t *queue;
+    kmpc_thunk_t      *pt;
+
+    pt = NULL;
+
+    if (curr_queue->tq.tq_parent != NULL) {
+        queue = curr_queue->tq.tq_parent;
+
+        while (queue != NULL) {
+            if (queue->tq.tq_parent != NULL) {
+                int ct;
+                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+                KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+                ct = ++(queue->tq_ref_count);
+                __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
+                  __LINE__, global_tid, queue, ct));
+            }
+
+            pt = __kmp_find_task_in_queue (global_tid, queue);
+            if (pt != NULL) {
+                if (queue->tq.tq_parent != NULL) {
+                    int ct;
+                    __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+                    KMP_MB();  /* make sure data structures are in consistent state before querying them   */
+                               /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
+
+                    ct = --(queue->tq_ref_count);
+                    KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+                      __LINE__, global_tid, queue, ct));
+                    KMP_DEBUG_ASSERT( ct >= 0 );
+
+                    __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+                }
+
+                return pt;
+            }
+
+            if (queue->tq.tq_parent != NULL) {
+                int ct;
+                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+                KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+                ct = --(queue->tq_ref_count);
+                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+                  __LINE__, global_tid, queue, ct));
+                KMP_DEBUG_ASSERT( ct >= 0 );
+            }
+            queue = queue->tq.tq_parent;
+
+            if (queue != NULL)
+                __kmp_release_lock(& queue->tq_link_lck, global_tid);
+        }
+
+    }
+
+    pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
+
+    return pt;
+}
+
+static int
+__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
+{
+    int i;
+
+    /* KMP_MB(); *//* is this really necessary? */
+
+    for (i=0; i<queue->tq_nproc; i++) {
+        if (queue->tq_th_thunks[i].ai_data != 0)
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+static int
+__kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
+{
+    return (queue->tq_first_child != NULL);
+}
+
+static void
+__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel )
+{
+#ifdef KMP_DEBUG
+    kmp_int32     i;
+    kmpc_thunk_t *thunk;
+#endif
+
+    KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
+    KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
+
+    /*  sub-queue in a recursion, not the root task queue  */
+    KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
+
+    if (in_parallel) {
+        __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+    }
+
+    KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
+
+    /*  unlink queue from its siblings if any at this level  */
+    if (queue->tq_prev_child != NULL)
+        queue->tq_prev_child->tq_next_child = queue->tq_next_child;
+    if (queue->tq_next_child != NULL)
+        queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
+    if (queue->tq.tq_parent->tq_first_child == queue)
+        queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
+
+    queue->tq_prev_child = NULL;
+    queue->tq_next_child = NULL;
+
+    if (in_parallel) {
+        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
+          __LINE__, global_tid, queue, queue->tq_ref_count));
+
+        /* wait until all other threads have stopped accessing this queue */
+        while (queue->tq_ref_count > 1) {
+            __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+            KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
+
+            __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+        }
+
+        __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+    }
+
+    KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n",
+      __LINE__, global_tid, queue));
+
+#ifdef KMP_DEBUG
+    KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
+    KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
+
+    for (i=0; i<queue->tq_nproc; i++) {
+        KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
+    }
+
+    i = 0;
+    for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
+        ++i;
+
+    KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
+#endif
+
+    /*  release storage for queue entry  */
+    __kmp_free_taskq ( tq, queue, TRUE, global_tid );
+
+    KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
+    KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+}
+
+/*
+ * Starting from indicated queue, proceed downward through tree and
+ * remove all taskqs which are finished, but only go down to taskqs
+ * which have the "nowait" clause present.  Assume this is only called
+ * when in_parallel=TRUE.
+ */
+
+static void
+__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
+{
+    kmpc_task_queue_t *queue = curr_queue;
+
+    if (curr_queue->tq_first_child != NULL) {
+        __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+        KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+        queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
+        if (queue != NULL) {
+            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+            return;
+        }
+
+        while (queue != NULL)  {
+            kmpc_task_queue_t *next;
+            int ct = ++(queue->tq_ref_count);
+            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
+              __LINE__, global_tid, queue, ct));
+
+
+            /* although reference count stays active during descendant walk, */
+            /* shouldn't matter since if children still exist, reference     */
+            /* counts aren't being monitored anyway                          */
+
+            if (queue->tq_flags & TQF_IS_NOWAIT) {
+                __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
+
+                if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
+                    __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
+
+                    /*
+                     Only remove this if we have not already marked it for deallocation.
+                     This should prevent multiple threads from trying to free this.
+                     */
+
+                    if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
+                        if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
+                            queue->tq_flags |= TQF_DEALLOCATED;
+                            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+
+                            __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
+
+                            /* Can't do any more here since can't be sure where sibling queue is so just exit this level */
+                            return;
+                        }
+                        else {
+                            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+                        }
+                    }
+                    /* otherwise, just fall through and decrement reference count */
+                }
+            }
+
+            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+            next = queue->tq_next_child;
+
+            ct = --(queue->tq_ref_count);
+            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+              __LINE__, global_tid, queue, ct));
+            KMP_DEBUG_ASSERT( ct >= 0 );
+
+            queue = next;
+        }
+
+        __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+    }
+}
+
+/*
+ * Starting from indicated queue, proceed downward through tree and
+ * remove all taskq's assuming all are finished and
+ * assuming NO other threads are executing at this point.
+ */
+
+static void
+__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
+{
+    kmpc_task_queue_t *next_child;
+
+    queue = (kmpc_task_queue_t *) queue->tq_first_child;
+
+    while (queue != NULL)  {
+        __kmp_remove_all_child_taskq ( tq, global_tid, queue );
+
+        next_child = queue->tq_next_child;
+        queue->tq_flags |= TQF_DEALLOCATED;
+        __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
+        queue = next_child;
+    }
+}
+
+static void
+__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel )
+{
+    kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
+    kmp_int32          tid   = __kmp_tid_from_gtid( global_tid );
+
+    KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
+    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+    KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
+    KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+
+    /*
+     * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows:
+     *
+     * happens exactly once:
+     * 1) __kmpc_taskq             : push (if returning thunk only)
+     * 4) __kmpc_end_taskq_task    : pop
+     *
+     * optionally happens *each* time taskq task is dequeued/enqueued:
+     * 2) __kmpc_taskq_task        : pop
+     * 3) __kmp_execute_task_from_queue  : push
+     *
+     * execution ordering:  1,(2,3)*,4
+     */
+
+    if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
+        kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
+        thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
+
+        if ( __kmp_env_consistency_check ) {
+            __kmp_push_workshare( global_tid,
+                    (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
+                    queue->tq_loc );
+        }
+    }
+    else {
+        if ( __kmp_env_consistency_check )
+            __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
+    }
+
+    if (in_parallel) {
+        thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
+        tq->tq_curr_thunk[tid] = thunk;
+
+        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+    }
+
+    KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
+    thunk->th_task (global_tid, thunk);
+    KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
+
+    if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
+        if ( __kmp_env_consistency_check )
+            __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
+                                 queue->tq_loc );
+
+        if (in_parallel) {
+            tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+            thunk->th_encl_thunk = NULL;
+            KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+        }
+
+        if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
+            __kmp_taskq_check_ordered(global_tid, thunk);
+        }
+
+        __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
+
+        KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
+        KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+
+        if (in_parallel) {
+            KMP_MB();   /* needed so thunk put on free list before outstanding thunk count is decremented */
+
+            KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
+
+            KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
+                global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
+
+            queue->tq_th_thunks[tid].ai_data--;
+
+            /* KMP_MB(); */     /* is MB really necessary ? */
+        }
+
+        if (queue->tq.tq_parent != NULL && in_parallel) {
+            int ct;
+            __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+            ct = --(queue->tq_ref_count);
+            __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
+              __LINE__, global_tid, queue, ct));
+            KMP_DEBUG_ASSERT( ct >= 0 );
+        }
+    }
+}
+
+/* --------------------------------------------------------------------------- */
+
+/* starts a taskq; creates and returns a thunk for the taskq_task        */
+/* also, returns pointer to shared vars for this thread in "shareds" arg */
+
+kmpc_thunk_t *
+__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
+              size_t sizeof_thunk, size_t sizeof_shareds,
+              kmp_int32 flags, kmpc_shared_vars_t **shareds )
+{
+    int                  in_parallel;
+    kmp_int32            nslots, nthunks, nshareds, nproc;
+    kmpc_task_queue_t   *new_queue, *curr_queue;
+    kmpc_thunk_t        *new_taskq_thunk;
+    kmp_info_t          *th;
+    kmp_team_t          *team;
+    kmp_taskq_t         *tq;
+    kmp_int32            tid;
+
+    KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid));
+
+    th = __kmp_threads[ global_tid ];
+    team = th -> th.th_team;
+    tq = & team -> t.t_taskq;
+    nproc = team -> t.t_nproc;
+    tid = __kmp_tid_from_gtid( global_tid );
+
+    /* find out whether this is a parallel taskq or serialized one. */
+    in_parallel = in_parallel_context( team );
+
+    if( ! tq->tq_root ) {
+        if (in_parallel) {
+            /* Vector ORDERED SECTION to taskq version */
+            th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
+
+            /* Vector ORDERED SECTION to taskq version */
+            th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
+        }
+
+        if (in_parallel) {
+            /* This shouldn't be a barrier region boundary, it will confuse the user. */
+            /* Need the boundary to be at the end taskq instead. */
+            if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
+                /* Creating the active root queue, and we are not the master thread. */
+                /* The master thread below created the queue and tasks have been     */
+                /* enqueued, and the master thread released this barrier.  This      */
+                /* worker thread can now proceed and execute tasks.  See also the    */
+                /* TQF_RELEASE_WORKERS which is used to handle this case.            */
+
+                *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
+
+                KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
+
+                return NULL;
+            }
+        }
+
+        /* master thread only executes this code */
+
+        if( tq->tq_curr_thunk_capacity < nproc ) {
+            if(tq->tq_curr_thunk)
+                __kmp_free(tq->tq_curr_thunk);
+            else {
+                /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */
+                __kmp_init_lock( & tq->tq_freelist_lck );
+            }
+
+            tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) );
+            tq -> tq_curr_thunk_capacity = nproc;
+        }
+
+        if (in_parallel)
+            tq->tq_global_flags = TQF_RELEASE_WORKERS;
+    }
+
+    /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
+    /*      on some heuristics (e.g., depth of queue nesting?).            */
+
+    nslots = (in_parallel) ? (2 * nproc) : 1;
+
+    /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
+    /* jobs being executed by other threads, and one extra for taskq slot          */
+
+    nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
+
+    /* Only the root taskq gets a per-thread array of shareds.       */
+    /* The rest of the taskq's only get one copy of the shared vars. */
+
+    nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
+
+    /*  create overall queue data structure and its components that require allocation */
+
+    new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
+        sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
+
+    /*  rest of new_queue initializations  */
+
+    new_queue->tq_flags           = flags & TQF_INTERFACE_FLAGS;
+
+    if (in_parallel) {
+        new_queue->tq_tasknum_queuing  = 0;
+        new_queue->tq_tasknum_serving  = 0;
+        new_queue->tq_flags           |= TQF_PARALLEL_CONTEXT;
+    }
+
+    new_queue->tq_taskq_slot   = NULL;
+    new_queue->tq_nslots       = nslots;
+    new_queue->tq_hiwat        = HIGH_WATER_MARK (nslots);
+    new_queue->tq_nfull        = 0;
+    new_queue->tq_head         = 0;
+    new_queue->tq_tail         = 0;
+    new_queue->tq_loc          = loc;
+
+    if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
+        /* prepare to serve the first-queued task's ORDERED directive */
+        new_queue->tq_tasknum_serving = 1;
+
+        /* Vector ORDERED SECTION to taskq version */
+        th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
+
+        /* Vector ORDERED SECTION to taskq version */
+        th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
+    }
+
+    /*  create a new thunk for the taskq_task in the new_queue  */
+    *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
+
+    new_taskq_thunk->th.th_shareds = *shareds;
+    new_taskq_thunk->th_task       = taskq_task;
+    new_taskq_thunk->th_flags      = new_queue->tq_flags | TQF_TASKQ_TASK;
+    new_taskq_thunk->th_status     = 0;
+
+    KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
+
+    /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */
+
+    /* insert the new task queue into the tree, but only after all fields initialized */
+
+    if (in_parallel) {
+        if( ! tq->tq_root ) {
+            new_queue->tq.tq_parent   = NULL;
+            new_queue->tq_first_child = NULL;
+            new_queue->tq_next_child  = NULL;
+            new_queue->tq_prev_child  = NULL;
+            new_queue->tq_ref_count   = 1;
+            tq->tq_root = new_queue;
+        }
+        else {
+            curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
+            new_queue->tq.tq_parent   = curr_queue;
+            new_queue->tq_first_child = NULL;
+            new_queue->tq_prev_child  = NULL;
+            new_queue->tq_ref_count   = 1;      /* for this the thread that built the queue */
+
+            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n",
+              __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
+
+            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+            new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child;
+
+            if (curr_queue->tq_first_child != NULL)
+                curr_queue->tq_first_child->tq_prev_child = new_queue;
+
+            curr_queue->tq_first_child = new_queue;
+
+            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+        }
+
+        /* set up thunk stack only after code that determines curr_queue above */
+        new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
+        tq->tq_curr_thunk[tid] = new_taskq_thunk;
+
+        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+    }
+    else {
+        new_taskq_thunk->th_encl_thunk = 0;
+        new_queue->tq.tq_parent   = NULL;
+        new_queue->tq_first_child = NULL;
+        new_queue->tq_next_child  = NULL;
+        new_queue->tq_prev_child  = NULL;
+        new_queue->tq_ref_count   = 1;
+    }
+
+#ifdef KMP_DEBUG
+    KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
+    KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
+
+    if (in_parallel) {
+        KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
+    } else {
+        KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
+    }
+
+    KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
+
+    if (in_parallel) {
+        KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+    }
+#endif /* KMP_DEBUG */
+
+    if ( __kmp_env_consistency_check )
+        __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
+
+    KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
+
+    return new_taskq_thunk;
+}
+
+
+/*  ends a taskq; last thread out destroys the queue  */
+
+void
+__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
+{
+#ifdef KMP_DEBUG
+    kmp_int32           i;
+#endif
+    kmp_taskq_t        *tq;
+    int                 in_parallel;
+    kmp_info_t         *th;
+    kmp_int32           is_outermost;
+    kmpc_task_queue_t  *queue;
+    kmpc_thunk_t       *thunk;
+    int                 nproc;
+
+    KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid));
+
+    tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
+    nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
+
+    /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */
+    queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
+
+    KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
+    is_outermost = (queue == tq->tq_root);
+    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+
+    if (in_parallel) {
+        kmp_uint32 spins;
+
+        /* this is just a safeguard to release the waiting threads if */
+        /* the outermost taskq never queues a task                    */
+
+        if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
+            if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
+                /* no lock needed, workers are still in spin mode */
+                tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
+
+                __kmp_end_split_barrier( bs_plain_barrier, global_tid );
+            }
+        }
+
+        /* keep dequeueing work until all tasks are queued and dequeued */
+
+        do {
+            /* wait until something is available to dequeue */
+            KMP_INIT_YIELD(spins);
+
+            while ( (queue->tq_nfull == 0)
+                 && (queue->tq_taskq_slot == NULL)
+                 && (! __kmp_taskq_has_any_children(queue) )
+                 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
+                  ) {
+                KMP_YIELD_WHEN( TRUE, spins );
+            }
+
+            /* check to see if we can execute tasks in the queue */
+            while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
+                 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
+                  ) {
+                KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
+                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+            }
+
+            /* see if work found can be found in a descendant queue */
+            if ( (__kmp_taskq_has_any_children(queue))
+              && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
+               ) {
+
+                KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
+                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
+
+                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+            }
+
+        } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
+               || (queue->tq_nfull != 0)
+                );
+
+        KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
+
+        /* wait while all tasks are not finished and more work found
+           in descendant queues */
+
+        while ( (!__kmp_taskq_tasks_finished(queue))
+             && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
+              ) {
+
+            KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
+                thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+
+            __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+        }
+
+        KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
+
+        if (!is_outermost) {
+            /* need to return if NOWAIT present and not outermost taskq */
+
+            if (queue->tq_flags & TQF_IS_NOWAIT) {
+                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+                queue->tq_ref_count--;
+                KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
+                __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+
+                KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
+
+                return;
+            }
+
+            __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
+
+            /* WAIT until all tasks are finished and no child queues exist before proceeding */
+            KMP_INIT_YIELD(spins);
+
+            while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
+                thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
+
+                if (thunk != NULL) {
+                    KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
+                                  thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+                    __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+                }
+
+                KMP_YIELD_WHEN( thunk == NULL, spins );
+
+                __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
+            }
+
+            __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+            if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
+                queue->tq_flags |= TQF_DEALLOCATED;
+            }
+            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+
+            /* only the allocating thread can deallocate the queue */
+            if (taskq_thunk != NULL) {
+                __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
+            }
+
+            KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
+
+            return;
+        }
+
+        /* Outermost Queue: steal work from descendants until all tasks are finished */
+
+        KMP_INIT_YIELD(spins);
+
+        while (!__kmp_taskq_tasks_finished(queue)) {
+            thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
+
+            if (thunk != NULL) {
+                KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
+                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+
+                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+            }
+
+            KMP_YIELD_WHEN( thunk == NULL, spins );
+        }
+
+        /* Need this barrier to prevent destruction of queue before threads have all executed above code */
+        /* This may need to be done earlier when NOWAIT is implemented for the outermost level */
+
+        if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
+            /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here;   */
+            /* for right now, everybody waits, and the master thread destroys the  */
+            /* remaining queues.                                                   */
+
+            __kmp_remove_all_child_taskq( tq, global_tid, queue );
+
+            /* Now destroy the root queue */
+            KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
+            KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+
+#ifdef KMP_DEBUG
+            /*  the root queue entry  */
+            KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
+
+            /*  children must all be gone by now because of barrier above */
+            KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
+
+            for (i=0; i<nproc; i++) {
+                KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
+            }
+
+            for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
+
+            KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
+
+            for (i = 0; i < nproc; i++) {
+                KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
+            }
+#endif
+            /*  unlink the root queue entry  */
+            tq -> tq_root =  NULL;
+
+            /*  release storage for root queue entry  */
+            KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
+
+            queue->tq_flags |= TQF_DEALLOCATED;
+            __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
+
+            KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+
+            /* release the workers now that the data structures are up to date */
+            __kmp_end_split_barrier( bs_plain_barrier, global_tid );
+        }
+
+        th = __kmp_threads[ global_tid ];
+
+        /* Reset ORDERED SECTION to parallel version */
+        th->th.th_dispatch->th_deo_fcn = 0;
+
+        /* Reset ORDERED SECTION to parallel version */
+        th->th.th_dispatch->th_dxo_fcn = 0;
+    }
+    else {
+        /* in serial execution context, dequeue the last task  */
+        /* and execute it, if there were any tasks encountered */
+
+        if (queue->tq_nfull > 0) {
+            KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
+
+            thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
+
+            if (queue->tq_flags & TQF_IS_LAST_TASK) {
+                /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
+                /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
+                /* instrumentation does copy-out.                                  */
+
+                /* no need for test_then_or call since already locked */
+                thunk->th_flags |= TQF_IS_LAST_TASK;
+            }
+
+            KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
+
+            __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+        }
+
+        /* destroy the unattached serial queue now that there is no more work to do */
+        KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
+        KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+
+#ifdef KMP_DEBUG
+        i = 0;
+        for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
+            ++i;
+        KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
+#endif
+        /*  release storage for unattached serial queue  */
+        KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
+
+        queue->tq_flags |= TQF_DEALLOCATED;
+        __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
+    }
+
+    KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid));
+}
+
+/*  Enqueues a task for thunk previously created by __kmpc_task_buffer. */
+/*  Returns nonzero if just filled up queue  */
+
+kmp_int32
+__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
+{
+    kmp_int32          ret;
+    kmpc_task_queue_t *queue;
+    int                in_parallel;
+    kmp_taskq_t       *tq;
+
+    KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid));
+
+    KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));  /*  thunk->th_task is a regular task  */
+
+    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
+    queue       = thunk->th.th_shareds->sv_queue;
+    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+
+    if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
+        thunk->th_tasknum = ++queue->tq_tasknum_queuing;
+
+    /* For serial execution dequeue the preceding task and execute it, if one exists */
+    /* This cannot be the last task.  That one is handled in __kmpc_end_taskq */
+
+    if (!in_parallel && queue->tq_nfull > 0) {
+        kmpc_thunk_t *prev_thunk;
+
+        KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
+
+        prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
+
+        KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
+
+        __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
+    }
+
+    /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private    */
+    /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the     */
+    /* task queue is not full and allocates a thunk (which is then passed to         */
+    /* __kmpc_task()).  So, the enqueue below should never fail due to a full queue. */
+
+    KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
+    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+
+    ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
+
+    KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
+    KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+
+    KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid));
+
+    return ret;
+}
+
+/*  enqueues a taskq_task for thunk previously created by __kmpc_taskq  */
+/*  this should never be called unless in a parallel context            */
+
+void
+__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
+{
+    kmpc_task_queue_t *queue;
+    kmp_taskq_t       *tq  = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
+    int                tid = __kmp_tid_from_gtid( global_tid );
+
+    KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid));
+    KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
+    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+
+    queue = thunk->th.th_shareds->sv_queue;
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_workshare( global_tid, ct_taskq, loc );
+
+    /*  thunk->th_task is the taskq_task  */
+    KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
+
+    /*  not supposed to call __kmpc_taskq_task if it's already enqueued  */
+    KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
+
+    /* dequeue taskq thunk from curr_thunk stack */
+    tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+    thunk->th_encl_thunk = NULL;
+
+    KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+
+    thunk->th_status = status;
+
+    KMP_MB();  /*  flush thunk->th_status before taskq_task enqueued to avoid race condition  */
+
+    /*  enqueue taskq_task in thunk into special slot in queue     */
+    /* GEH - probably don't need to lock taskq slot since only one */
+    /*       thread enqueues & already a lock set at dequeue point */
+
+    queue->tq_taskq_slot = thunk;
+
+    KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid));
+}
+
+/*  ends a taskq_task; done generating tasks  */
+
+void
+__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
+{
+    kmp_taskq_t       *tq;
+    kmpc_task_queue_t *queue;
+    int                in_parallel;
+    int                tid;
+
+    KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
+
+    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
+    queue       = thunk->th.th_shareds->sv_queue;
+    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+    tid         = __kmp_tid_from_gtid( global_tid );
+
+    if ( __kmp_env_consistency_check )
+        __kmp_pop_workshare( global_tid, ct_taskq, loc );
+
+    if (in_parallel) {
+#if KMP_ARCH_X86 || \
+    KMP_ARCH_X86_64
+
+        KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
+#else
+        {
+            __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+
+            KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+
+            queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
+
+            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+        }
+#endif
+    }
+
+    if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
+        /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */
+        /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task      */
+        /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.  When         */
+        /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so    */
+        /* we know the last one in the queue is the lastprivate task.  Mark the queue    */
+        /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that     */
+        /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */
+        /* bit signals the instrumented code to do copy-outs after execution).           */
+
+        if (! in_parallel) {
+            /* No synchronization needed for serial context */
+            queue->tq_flags |= TQF_IS_LAST_TASK;
+        }
+        else {
+#if KMP_ARCH_X86 || \
+    KMP_ARCH_X86_64
+
+            KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
+#else
+            {
+                __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+
+                KMP_MB();  /* make sure data structures are in consistent state before querying them */
+                           /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
+
+                queue->tq_flags |= TQF_IS_LAST_TASK;
+
+                __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+            }
+#endif
+            /* to prevent race condition where last task is dequeued but */
+            /* flag isn't visible yet (not sure about this)              */
+            KMP_MB();
+        }
+    }
+
+    /* dequeue taskq thunk from curr_thunk stack */
+    if (in_parallel) {
+        tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+        thunk->th_encl_thunk = NULL;
+
+        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+    }
+
+    KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
+}
+
+/* returns thunk for a regular task based on taskq_thunk              */
+/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK)  */
+
+kmpc_thunk_t *
+__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
+{
+    kmp_taskq_t       *tq;
+    kmpc_task_queue_t *queue;
+    kmpc_thunk_t      *new_thunk;
+    int                in_parallel;
+
+    KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid));
+
+    KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);  /*  taskq_thunk->th_task is the taskq_task  */
+
+    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
+    queue       = taskq_thunk->th.th_shareds->sv_queue;
+    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+
+    /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private */
+    /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the  */
+    /* task queue is not full and allocates a thunk (which is then passed to      */
+    /* __kmpc_task()).  So, we can pre-allocate a thunk here assuming it will be  */
+    /* the next to be enqueued in __kmpc_task().                                  */
+
+    new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
+    new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
+    new_thunk->th_encl_thunk = NULL;
+    new_thunk->th_task       = task;
+
+    /* GEH - shouldn't need to lock the read of tq_flags here */
+    new_thunk->th_flags      = queue->tq_flags & TQF_INTERFACE_FLAGS;
+
+    new_thunk->th_status     = 0;
+
+    KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
+
+    KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
+    KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
+
+    KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid));
+
+    return new_thunk;
+}
+
+/* --------------------------------------------------------------------------- */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c b/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c
index 58010e2596..240319fd9f 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_threadprivate.c
@@ -1,733 +1,733 @@
-/* 
- * kmp_threadprivate.c -- OpenMP threadprivate support library 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_itt.h" 
-#include "kmp_i18n.h" 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#define USE_CHECKS_COMMON 
- 
-#define KMP_INLINE_SUBR         1 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); 
-struct private_common * 
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); 
- 
-struct shared_table     __kmp_threadprivate_d_table; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-static 
-#ifdef KMP_INLINE_SUBR 
-__forceinline 
-#endif 
-struct private_common * 
-__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr ) 
- 
-{ 
-    struct private_common *tn; 
- 
-#ifdef KMP_TASK_COMMON_DEBUG 
-    KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n", 
-                    gtid, pc_addr ) ); 
-    dump_list(); 
-#endif 
- 
-    for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { 
-        if (tn->gbl_addr == pc_addr) { 
-#ifdef KMP_TASK_COMMON_DEBUG 
-            KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n", 
-                            gtid, pc_addr ) ); 
-#endif 
-            return tn; 
-        } 
-    } 
-    return 0; 
-} 
- 
-static 
-#ifdef KMP_INLINE_SUBR 
-__forceinline 
-#endif 
-struct shared_common * 
-__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr ) 
-{ 
-    struct shared_common *tn; 
- 
-    for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { 
-        if (tn->gbl_addr == pc_addr) { 
-#ifdef KMP_TASK_COMMON_DEBUG 
-            KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n", 
-                            gtid, pc_addr ) ); 
-#endif 
-            return tn; 
-        } 
-    } 
-    return 0; 
-} 
- 
- 
-/* 
- *      Create a template for the data initialized storage. 
- *      Either the template is NULL indicating zero fill, 
- *      or the template is a copy of the original data. 
- */ 
- 
-static struct private_data * 
-__kmp_init_common_data( void *pc_addr, size_t pc_size ) 
-{ 
-    struct private_data *d; 
-    size_t       i; 
-    char        *p; 
- 
-    d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) ); 
-/* 
-    d->data = 0;  // AC: commented out because __kmp_allocate zeroes the memory 
-    d->next = 0; 
-*/ 
-    d->size = pc_size; 
-    d->more = 1; 
- 
-    p = (char*)pc_addr; 
- 
-    for (i = pc_size;  i > 0; --i) { 
-        if (*p++ != '\0') { 
-            d->data = __kmp_allocate( pc_size ); 
-            KMP_MEMCPY( d->data, pc_addr, pc_size ); 
-            break; 
-        } 
-    } 
- 
-    return d; 
-} 
- 
-/* 
- *      Initialize the data area from the template. 
- */ 
- 
-static void 
-__kmp_copy_common_data( void *pc_addr, struct private_data *d ) 
-{ 
-    char *addr = (char *) pc_addr; 
-    int   i, offset; 
- 
-    for (offset = 0; d != 0; d = d->next) { 
-        for (i = d->more; i > 0; --i) { 
-            if (d->data == 0) 
-                memset( & addr[ offset ], '\0', d->size ); 
-            else 
-                KMP_MEMCPY( & addr[ offset ], d->data, d->size ); 
-            offset += d->size; 
-        } 
-    } 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ 
-void 
-__kmp_common_initialize( void ) 
-{ 
-    if( ! TCR_4(__kmp_init_common) ) { 
-        int q; 
-#ifdef KMP_DEBUG 
-        int gtid; 
-#endif 
- 
-        __kmp_threadpriv_cache_list = NULL; 
- 
-#ifdef KMP_DEBUG 
-        /* verify the uber masters were initialized */ 
-        for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ ) 
-            if( __kmp_root[gtid] ) { 
-                KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread ); 
-                for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q) 
-                    KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] ); 
-/*                    __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/ 
-            } 
-#endif /* KMP_DEBUG */ 
- 
-        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 
-            __kmp_threadprivate_d_table.data[ q ] = 0; 
- 
-        TCW_4(__kmp_init_common, TRUE); 
-    } 
-} 
- 
-/* Call all destructors for threadprivate data belonging to all threads. 
-   Currently unused! */ 
-void 
-__kmp_common_destroy( void ) 
-{ 
-    if( TCR_4(__kmp_init_common) ) { 
-        int q; 
- 
-        TCW_4(__kmp_init_common, FALSE); 
- 
-        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 
-            int gtid; 
-            struct private_common *tn; 
-            struct shared_common  *d_tn; 
- 
-            /*  C++ destructors need to be called once per thread before exiting  */ 
-            /*  don't call destructors for master thread though unless we used copy constructor */ 
- 
-            for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) { 
-                if (d_tn->is_vec) { 
-                    if (d_tn->dt.dtorv != 0) { 
-                        for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 
-                            if( __kmp_threads[gtid] ) { 
-                                if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : 
-                                                         (! KMP_UBER_GTID (gtid)) ) { 
-                                    tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, 
-                                                                               gtid, d_tn->gbl_addr ); 
-                                    if (tn) { 
-                                        (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); 
-                                    } 
-                                } 
-                            } 
-                        } 
-                        if (d_tn->obj_init != 0) { 
-                            (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); 
-                        } 
-                    } 
-                } else { 
-                    if (d_tn->dt.dtor != 0) { 
-                        for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 
-                            if( __kmp_threads[gtid] ) { 
-                                if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : 
-                                                         (! KMP_UBER_GTID (gtid)) ) { 
-                                    tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, 
-                                                                               gtid, d_tn->gbl_addr ); 
-                                    if (tn) { 
-                                        (*d_tn->dt.dtor) (tn->par_addr); 
-                                    } 
-                                } 
-                            } 
-                        } 
-                        if (d_tn->obj_init != 0) { 
-                            (*d_tn->dt.dtor) (d_tn->obj_init); 
-                        } 
-                    } 
-                } 
-            } 
-            __kmp_threadprivate_d_table.data[ q ] = 0; 
-        } 
-    } 
-} 
- 
-/* Call all destructors for threadprivate data belonging to this thread */ 
-void 
-__kmp_common_destroy_gtid( int gtid ) 
-{ 
-    struct private_common *tn; 
-    struct shared_common *d_tn; 
- 
-    KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) ); 
-    if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : 
-                             (! KMP_UBER_GTID (gtid)) ) { 
- 
-        if( TCR_4(__kmp_init_common) ) { 
- 
-            /* Cannot do this here since not all threads have destroyed their data */ 
-            /* TCW_4(__kmp_init_common, FALSE); */ 
- 
-            for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) { 
- 
-                d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, 
-                                                      gtid, tn->gbl_addr ); 
- 
-                KMP_DEBUG_ASSERT( d_tn ); 
- 
-                if (d_tn->is_vec) { 
-                    if (d_tn->dt.dtorv != 0) { 
-                        (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); 
-                    } 
-                    if (d_tn->obj_init != 0) { 
-                        (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); 
-                    } 
-                } else { 
-                    if (d_tn->dt.dtor != 0) { 
-                        (void) (*d_tn->dt.dtor) (tn->par_addr); 
-                    } 
-                    if (d_tn->obj_init != 0) { 
-                        (void) (*d_tn->dt.dtor) (d_tn->obj_init); 
-                    } 
-                } 
-            } 
-            KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n", 
-                           gtid ) ); 
-        } 
-    } 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef KMP_TASK_COMMON_DEBUG 
-static void 
-dump_list( void ) 
-{ 
-    int p, q; 
- 
-    for (p = 0; p < __kmp_all_nth; ++p) { 
-        if( !__kmp_threads[p] ) continue; 
-        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 
-            if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) { 
-                struct private_common *tn; 
- 
-                KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) ); 
- 
-                for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next)                 { 
-                    KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", 
-                                    tn->gbl_addr, tn->par_addr ) ); 
-                } 
-            } 
-        } 
-    } 
-} 
-#endif /* KMP_TASK_COMMON_DEBUG */ 
- 
- 
-/* 
- * NOTE: this routine is to be called only from the serial part of the program. 
- */ 
- 
-void 
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) 
-{ 
-    struct shared_common **lnk_tn, *d_tn; 
-    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] && 
-            __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 ); 
- 
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, 
-                                          gtid, pc_addr ); 
- 
-    if (d_tn == 0) { 
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); 
- 
-        d_tn->gbl_addr = pc_addr; 
-        d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); 
-/* 
-        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory 
-        d_tn->ct.ctor = 0; 
-        d_tn->cct.cctor = 0;; 
-        d_tn->dt.dtor = 0; 
-        d_tn->is_vec = FALSE; 
-        d_tn->vec_len = 0L; 
-*/ 
-        d_tn->cmn_size = pc_size; 
- 
-        __kmp_acquire_lock( &__kmp_global_lock, gtid ); 
- 
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); 
- 
-        d_tn->next = *lnk_tn; 
-        *lnk_tn = d_tn; 
- 
-        __kmp_release_lock( &__kmp_global_lock, gtid ); 
-    } 
-} 
- 
-struct private_common * 
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) 
-{ 
-    struct private_common *tn, **tt; 
-    struct shared_common  *d_tn; 
- 
-    /* +++++++++ START OF CRITICAL SECTION +++++++++ */ 
- 
-    __kmp_acquire_lock( & __kmp_global_lock, gtid ); 
- 
-    tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) ); 
- 
-    tn->gbl_addr = pc_addr; 
- 
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, 
-                                          gtid, pc_addr );     /* Only the MASTER data table exists. */ 
- 
-    if (d_tn != 0) { 
-        /* This threadprivate variable has already been seen. */ 
- 
-        if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) { 
-            d_tn->cmn_size = pc_size; 
- 
-            if (d_tn->is_vec) { 
-                if (d_tn->ct.ctorv != 0) { 
-                    /* Construct from scratch so no prototype exists */ 
-                    d_tn->obj_init = 0; 
-                } 
-                else if (d_tn->cct.cctorv != 0) { 
-                    /* Now data initialize the prototype since it was previously registered */ 
-                    d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); 
-                    (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len); 
-                } 
-                else { 
-                    d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); 
-                } 
-            } else { 
-                if (d_tn->ct.ctor != 0) { 
-                    /* Construct from scratch so no prototype exists */ 
-                    d_tn->obj_init = 0; 
-                } 
-                else if (d_tn->cct.cctor != 0) { 
-                    /* Now data initialize the prototype since it was previously registered */ 
-                    d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); 
-                    (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr); 
-                } 
-                else { 
-                    d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); 
-                } 
-            } 
-        } 
-    } 
-    else { 
-        struct shared_common **lnk_tn; 
- 
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); 
-        d_tn->gbl_addr = pc_addr; 
-        d_tn->cmn_size = pc_size; 
-        d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); 
-/* 
-        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory 
-        d_tn->ct.ctor = 0; 
-        d_tn->cct.cctor = 0; 
-        d_tn->dt.dtor = 0; 
-        d_tn->is_vec = FALSE; 
-        d_tn->vec_len = 0L; 
-*/ 
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); 
- 
-        d_tn->next = *lnk_tn; 
-        *lnk_tn = d_tn; 
-    } 
- 
-    tn->cmn_size = d_tn->cmn_size; 
- 
-    if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) { 
-        tn->par_addr = (void *) pc_addr; 
-    } 
-    else { 
-        tn->par_addr = (void *) __kmp_allocate( tn->cmn_size ); 
-    } 
- 
-    __kmp_release_lock( & __kmp_global_lock, gtid ); 
- 
-    /* +++++++++ END OF CRITICAL SECTION +++++++++ */ 
- 
-#ifdef USE_CHECKS_COMMON 
-        if (pc_size > d_tn->cmn_size) { 
-            KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%" 
-                            KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", 
-                            pc_addr, pc_size, d_tn->cmn_size ) ); 
-            KMP_FATAL( TPCommonBlocksInconsist ); 
-        } 
-#endif /* USE_CHECKS_COMMON */ 
- 
-    tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]); 
- 
-#ifdef KMP_TASK_COMMON_DEBUG 
-    if (*tt != 0) { 
-        KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", 
-                        gtid, pc_addr ) ); 
-    } 
-#endif 
-    tn->next = *tt; 
-    *tt = tn; 
- 
-#ifdef KMP_TASK_COMMON_DEBUG 
-    KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", 
-                    gtid, pc_addr ) ); 
-    dump_list( ); 
-#endif 
- 
-    /* Link the node into a simple list */ 
- 
-    tn->link = __kmp_threads[ gtid ]->th.th_pri_head; 
-    __kmp_threads[ gtid ]->th.th_pri_head = tn; 
- 
-#ifdef BUILD_TV 
-    __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr ); 
-#endif 
- 
-    if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) 
-        return tn; 
- 
-    /* 
-     * if C++ object with copy constructor, use it; 
-     * else if C++ object with constructor, use it for the non-master copies only; 
-     * else use pod_init and memcpy 
-     * 
-     * C++ constructors need to be called once for each non-master thread on allocate 
-     * C++ copy constructors need to be called once for each thread on allocate 
-     */ 
- 
-    /* 
-     * C++ object with constructors/destructors; 
-     * don't call constructors for master thread though 
-     */ 
-    if (d_tn->is_vec) { 
-        if ( d_tn->ct.ctorv != 0) { 
-            (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len); 
-        } else if (d_tn->cct.cctorv != 0) { 
-            (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len); 
-        } else if (tn->par_addr != tn->gbl_addr) { 
-            __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); 
-        } 
-    } else { 
-        if ( d_tn->ct.ctor != 0 ) { 
-            (void) (*d_tn->ct.ctor) (tn->par_addr); 
-        } else if (d_tn->cct.cctor != 0) { 
-            (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init); 
-        } else if (tn->par_addr != tn->gbl_addr) { 
-            __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); 
-        } 
-    } 
-/* !BUILD_OPENMP_C 
-    if (tn->par_addr != tn->gbl_addr) 
-        __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ 
- 
-    return tn; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* We are currently parallel, and we know the thread id.                    */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/*! 
- @ingroup THREADPRIVATE 
- 
- @param loc source location information  
- @param data  pointer to data being privatized  
- @param ctor  pointer to constructor function for data  
- @param cctor  pointer to copy constructor function for data  
- @param dtor  pointer to destructor function for data  
- 
- Register constructors and destructors for thread private data. 
- This function is called when executing in parallel, when we know the thread id. 
-*/ 
-void 
-__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor) 
-{ 
-    struct shared_common *d_tn, **lnk_tn; 
- 
-    KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) ); 
- 
-#ifdef USE_CHECKS_COMMON 
-    /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 
-    KMP_ASSERT( cctor == 0); 
-#endif /* USE_CHECKS_COMMON */ 
- 
-    /* Only the global data table exists. */ 
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data ); 
- 
-    if (d_tn == 0) { 
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); 
-        d_tn->gbl_addr = data; 
- 
-        d_tn->ct.ctor = ctor; 
-        d_tn->cct.cctor = cctor; 
-        d_tn->dt.dtor = dtor; 
-/* 
-        d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate zeroes the memory 
-        d_tn->vec_len = 0L; 
-        d_tn->obj_init = 0; 
-        d_tn->pod_init = 0; 
-*/ 
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); 
- 
-        d_tn->next = *lnk_tn; 
-        *lnk_tn = d_tn; 
-    } 
-} 
- 
-void * 
-__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size) 
-{ 
-    void *ret; 
-    struct private_common *tn; 
- 
-    KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) ); 
- 
-#ifdef USE_CHECKS_COMMON 
-    if (! __kmp_init_serial) 
-        KMP_FATAL( RTLNotInitialized ); 
-#endif /* USE_CHECKS_COMMON */ 
- 
-    if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) { 
-        /* The parallel address will NEVER overlap with the data_address */ 
-        /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */ 
- 
-        KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) ); 
-        kmp_threadprivate_insert_private_data( global_tid, data, data, size ); 
- 
-        ret = data; 
-    } 
-    else { 
-        KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", 
-                       global_tid, data ) ); 
-        tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data ); 
- 
-        if ( tn ) { 
-            KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) ); 
-#ifdef USE_CHECKS_COMMON 
-            if ((size_t) size > tn->cmn_size) { 
-                KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", 
-                                data, size, tn->cmn_size ) ); 
-                KMP_FATAL( TPCommonBlocksInconsist ); 
-            } 
-#endif /* USE_CHECKS_COMMON */ 
-        } 
-        else { 
-            /* The parallel address will NEVER overlap with the data_address */ 
-            /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */ 
-            KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) ); 
-            tn = kmp_threadprivate_insert( global_tid, data, data, size ); 
-        } 
- 
-        ret = tn->par_addr; 
-    } 
-    KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", 
-                   global_tid, ret ) ); 
- 
-    return ret; 
-} 
- 
-/*! 
- @ingroup THREADPRIVATE 
- @param loc source location information  
- @param global_tid  global thread number  
- @param data  pointer to data to privatize  
- @param size  size of data to privatize  
- @param cache  pointer to cache  
- @return pointer to private storage  
- 
- Allocate private storage for threadprivate data.  
-*/ 
-void * 
-__kmpc_threadprivate_cached( 
-    ident_t *  loc, 
-    kmp_int32  global_tid,   // gtid. 
-    void *     data,         // Pointer to original global variable. 
-    size_t     size,         // Size of original global variable. 
-    void ***   cache 
-) { 
-    KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %" 
-                   KMP_SIZE_T_SPEC "\n", 
-                   global_tid, *cache, data, size ) ); 
- 
-    if ( TCR_PTR(*cache) == 0) { 
-        __kmp_acquire_lock( & __kmp_global_lock, global_tid ); 
- 
-        if ( TCR_PTR(*cache) == 0) { 
-            __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 
-            __kmp_tp_cached = 1; 
-            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 
-            void ** my_cache; 
-            KMP_ITT_IGNORE( 
-            my_cache = (void**) 
-                __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t )); 
-                           ); 
-            // No need to zero the allocated memory; __kmp_allocate does that. 
-            KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n", 
-                           global_tid, my_cache ) ); 
-             
-            /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */ 
-            /* Add address of mycache to linked list for cleanup later  */ 
-            kmp_cached_addr_t *tp_cache_addr; 
- 
-            tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity]; 
-            tp_cache_addr -> addr = my_cache; 
-            tp_cache_addr -> next = __kmp_threadpriv_cache_list; 
-            __kmp_threadpriv_cache_list = tp_cache_addr; 
- 
-            KMP_MB(); 
- 
-            TCW_PTR( *cache, my_cache); 
- 
-            KMP_MB(); 
-        } 
- 
-        __kmp_release_lock( & __kmp_global_lock, global_tid ); 
-    } 
- 
-    void *ret; 
-    if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) { 
-        ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size); 
- 
-        TCW_PTR( (*cache)[ global_tid ], ret); 
-    } 
-    KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", 
-                   global_tid, ret ) ); 
- 
-    return ret; 
-} 
- 
-/*! 
- @ingroup THREADPRIVATE 
- @param loc source location information  
- @param data  pointer to data being privatized  
- @param ctor  pointer to constructor function for data  
- @param cctor  pointer to copy constructor function for data  
- @param dtor  pointer to destructor function for data  
- @param vector_length length of the vector (bytes or elements?) 
- Register vector constructors and destructors for thread private data. 
-*/ 
-void 
-__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor, 
-                                   kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, 
-                                   size_t vector_length ) 
-{ 
-    struct shared_common *d_tn, **lnk_tn; 
- 
-    KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) ); 
- 
-#ifdef USE_CHECKS_COMMON 
-    /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 
-    KMP_ASSERT( cctor == 0); 
-#endif /* USE_CHECKS_COMMON */ 
- 
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, 
-                                          -1, data );        /* Only the global data table exists. */ 
- 
-    if (d_tn == 0) { 
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); 
-        d_tn->gbl_addr = data; 
- 
-        d_tn->ct.ctorv = ctor; 
-        d_tn->cct.cctorv = cctor; 
-        d_tn->dt.dtorv = dtor; 
-        d_tn->is_vec = TRUE; 
-        d_tn->vec_len = (size_t) vector_length; 
-/* 
-        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory 
-        d_tn->pod_init = 0; 
-*/ 
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); 
- 
-        d_tn->next = *lnk_tn; 
-        *lnk_tn = d_tn; 
-    } 
-} 
+/*
+ * kmp_threadprivate.c -- OpenMP threadprivate support library
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_itt.h"
+#include "kmp_i18n.h"
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#define USE_CHECKS_COMMON
+
+#define KMP_INLINE_SUBR         1
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+struct private_common *
+kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+
+struct shared_table     __kmp_threadprivate_d_table;
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+static
+#ifdef KMP_INLINE_SUBR
+__forceinline
+#endif
+struct private_common *
+__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr )
+
+{
+    struct private_common *tn;
+
+#ifdef KMP_TASK_COMMON_DEBUG
+    KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n",
+                    gtid, pc_addr ) );
+    dump_list();
+#endif
+
+    for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) {
+        if (tn->gbl_addr == pc_addr) {
+#ifdef KMP_TASK_COMMON_DEBUG
+            KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n",
+                            gtid, pc_addr ) );
+#endif
+            return tn;
+        }
+    }
+    return 0;
+}
+
+static
+#ifdef KMP_INLINE_SUBR
+__forceinline
+#endif
+struct shared_common *
+__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr )
+{
+    struct shared_common *tn;
+
+    for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) {
+        if (tn->gbl_addr == pc_addr) {
+#ifdef KMP_TASK_COMMON_DEBUG
+            KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
+                            gtid, pc_addr ) );
+#endif
+            return tn;
+        }
+    }
+    return 0;
+}
+
+
+/*
+ *      Create a template for the data initialized storage.
+ *      Either the template is NULL indicating zero fill,
+ *      or the template is a copy of the original data.
+ */
+
+static struct private_data *
+__kmp_init_common_data( void *pc_addr, size_t pc_size )
+{
+    struct private_data *d;
+    size_t       i;
+    char        *p;
+
+    d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) );
+/*
+    d->data = 0;  // AC: commented out because __kmp_allocate zeroes the memory
+    d->next = 0;
+*/
+    d->size = pc_size;
+    d->more = 1;
+
+    p = (char*)pc_addr;
+
+    for (i = pc_size;  i > 0; --i) {
+        if (*p++ != '\0') {
+            d->data = __kmp_allocate( pc_size );
+            KMP_MEMCPY( d->data, pc_addr, pc_size );
+            break;
+        }
+    }
+
+    return d;
+}
+
+/*
+ *      Initialize the data area from the template.
+ */
+
+static void
+__kmp_copy_common_data( void *pc_addr, struct private_data *d )
+{
+    char *addr = (char *) pc_addr;
+    int   i, offset;
+
+    for (offset = 0; d != 0; d = d->next) {
+        for (i = d->more; i > 0; --i) {
+            if (d->data == 0)
+                memset( & addr[ offset ], '\0', d->size );
+            else
+                KMP_MEMCPY( & addr[ offset ], d->data, d->size );
+            offset += d->size;
+        }
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
+void
+__kmp_common_initialize( void )
+{
+    if( ! TCR_4(__kmp_init_common) ) {
+        int q;
+#ifdef KMP_DEBUG
+        int gtid;
+#endif
+
+        __kmp_threadpriv_cache_list = NULL;
+
+#ifdef KMP_DEBUG
+        /* verify the uber masters were initialized */
+        for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ )
+            if( __kmp_root[gtid] ) {
+                KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread );
+                for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q)
+                    KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] );
+/*                    __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/
+            }
+#endif /* KMP_DEBUG */
+
+        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
+            __kmp_threadprivate_d_table.data[ q ] = 0;
+
+        TCW_4(__kmp_init_common, TRUE);
+    }
+}
+
+/* Call all destructors for threadprivate data belonging to all threads.
+   Currently unused! */
+void
+__kmp_common_destroy( void )
+{
+    if( TCR_4(__kmp_init_common) ) {
+        int q;
+
+        TCW_4(__kmp_init_common, FALSE);
+
+        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
+            int gtid;
+            struct private_common *tn;
+            struct shared_common  *d_tn;
+
+            /*  C++ destructors need to be called once per thread before exiting  */
+            /*  don't call destructors for master thread though unless we used copy constructor */
+
+            for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) {
+                if (d_tn->is_vec) {
+                    if (d_tn->dt.dtorv != 0) {
+                        for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
+                            if( __kmp_threads[gtid] ) {
+                                if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
+                                                         (! KMP_UBER_GTID (gtid)) ) {
+                                    tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common,
+                                                                               gtid, d_tn->gbl_addr );
+                                    if (tn) {
+                                        (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len);
+                                    }
+                                }
+                            }
+                        }
+                        if (d_tn->obj_init != 0) {
+                            (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len);
+                        }
+                    }
+                } else {
+                    if (d_tn->dt.dtor != 0) {
+                        for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
+                            if( __kmp_threads[gtid] ) {
+                                if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
+                                                         (! KMP_UBER_GTID (gtid)) ) {
+                                    tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common,
+                                                                               gtid, d_tn->gbl_addr );
+                                    if (tn) {
+                                        (*d_tn->dt.dtor) (tn->par_addr);
+                                    }
+                                }
+                            }
+                        }
+                        if (d_tn->obj_init != 0) {
+                            (*d_tn->dt.dtor) (d_tn->obj_init);
+                        }
+                    }
+                }
+            }
+            __kmp_threadprivate_d_table.data[ q ] = 0;
+        }
+    }
+}
+
+/* Call all destructors for threadprivate data belonging to this thread */
+void
+__kmp_common_destroy_gtid( int gtid )
+{
+    struct private_common *tn;
+    struct shared_common *d_tn;
+
+    KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) );
+    if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
+                             (! KMP_UBER_GTID (gtid)) ) {
+
+        if( TCR_4(__kmp_init_common) ) {
+
+            /* Cannot do this here since not all threads have destroyed their data */
+            /* TCW_4(__kmp_init_common, FALSE); */
+
+            for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) {
+
+                d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
+                                                      gtid, tn->gbl_addr );
+
+                KMP_DEBUG_ASSERT( d_tn );
+
+                if (d_tn->is_vec) {
+                    if (d_tn->dt.dtorv != 0) {
+                        (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len);
+                    }
+                    if (d_tn->obj_init != 0) {
+                        (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len);
+                    }
+                } else {
+                    if (d_tn->dt.dtor != 0) {
+                        (void) (*d_tn->dt.dtor) (tn->par_addr);
+                    }
+                    if (d_tn->obj_init != 0) {
+                        (void) (*d_tn->dt.dtor) (d_tn->obj_init);
+                    }
+                }
+            }
+            KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n",
+                           gtid ) );
+        }
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef KMP_TASK_COMMON_DEBUG
+static void
+dump_list( void )
+{
+    int p, q;
+
+    for (p = 0; p < __kmp_all_nth; ++p) {
+        if( !__kmp_threads[p] ) continue;
+        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
+            if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) {
+                struct private_common *tn;
+
+                KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) );
+
+                for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next)                 {
+                    KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
+                                    tn->gbl_addr, tn->par_addr ) );
+                }
+            }
+        }
+    }
+}
+#endif /* KMP_TASK_COMMON_DEBUG */
+
+
+/*
+ * NOTE: this routine is to be called only from the serial part of the program.
+ */
+
+void
+kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size )
+{
+    struct shared_common **lnk_tn, *d_tn;
+    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] &&
+            __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 );
+
+    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
+                                          gtid, pc_addr );
+
+    if (d_tn == 0) {
+        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
+
+        d_tn->gbl_addr = pc_addr;
+        d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size );
+/*
+        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory
+        d_tn->ct.ctor = 0;
+        d_tn->cct.cctor = 0;;
+        d_tn->dt.dtor = 0;
+        d_tn->is_vec = FALSE;
+        d_tn->vec_len = 0L;
+*/
+        d_tn->cmn_size = pc_size;
+
+        __kmp_acquire_lock( &__kmp_global_lock, gtid );
+
+        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]);
+
+        d_tn->next = *lnk_tn;
+        *lnk_tn = d_tn;
+
+        __kmp_release_lock( &__kmp_global_lock, gtid );
+    }
+}
+
+struct private_common *
+kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size )
+{
+    struct private_common *tn, **tt;
+    struct shared_common  *d_tn;
+
+    /* +++++++++ START OF CRITICAL SECTION +++++++++ */
+
+    __kmp_acquire_lock( & __kmp_global_lock, gtid );
+
+    tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) );
+
+    tn->gbl_addr = pc_addr;
+
+    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
+                                          gtid, pc_addr );     /* Only the MASTER data table exists. */
+
+    if (d_tn != 0) {
+        /* This threadprivate variable has already been seen. */
+
+        if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) {
+            d_tn->cmn_size = pc_size;
+
+            if (d_tn->is_vec) {
+                if (d_tn->ct.ctorv != 0) {
+                    /* Construct from scratch so no prototype exists */
+                    d_tn->obj_init = 0;
+                }
+                else if (d_tn->cct.cctorv != 0) {
+                    /* Now data initialize the prototype since it was previously registered */
+                    d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size );
+                    (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len);
+                }
+                else {
+                    d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size );
+                }
+            } else {
+                if (d_tn->ct.ctor != 0) {
+                    /* Construct from scratch so no prototype exists */
+                    d_tn->obj_init = 0;
+                }
+                else if (d_tn->cct.cctor != 0) {
+                    /* Now data initialize the prototype since it was previously registered */
+                    d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size );
+                    (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr);
+                }
+                else {
+                    d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size );
+                }
+            }
+        }
+    }
+    else {
+        struct shared_common **lnk_tn;
+
+        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
+        d_tn->gbl_addr = pc_addr;
+        d_tn->cmn_size = pc_size;
+        d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size );
+/*
+        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory
+        d_tn->ct.ctor = 0;
+        d_tn->cct.cctor = 0;
+        d_tn->dt.dtor = 0;
+        d_tn->is_vec = FALSE;
+        d_tn->vec_len = 0L;
+*/
+        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]);
+
+        d_tn->next = *lnk_tn;
+        *lnk_tn = d_tn;
+    }
+
+    tn->cmn_size = d_tn->cmn_size;
+
+    if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) {
+        tn->par_addr = (void *) pc_addr;
+    }
+    else {
+        tn->par_addr = (void *) __kmp_allocate( tn->cmn_size );
+    }
+
+    __kmp_release_lock( & __kmp_global_lock, gtid );
+
+    /* +++++++++ END OF CRITICAL SECTION +++++++++ */
+
+#ifdef USE_CHECKS_COMMON
+        if (pc_size > d_tn->cmn_size) {
+            KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%"
+                            KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n",
+                            pc_addr, pc_size, d_tn->cmn_size ) );
+            KMP_FATAL( TPCommonBlocksInconsist );
+        }
+#endif /* USE_CHECKS_COMMON */
+
+    tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]);
+
+#ifdef KMP_TASK_COMMON_DEBUG
+    if (*tt != 0) {
+        KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
+                        gtid, pc_addr ) );
+    }
+#endif
+    tn->next = *tt;
+    *tt = tn;
+
+#ifdef KMP_TASK_COMMON_DEBUG
+    KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
+                    gtid, pc_addr ) );
+    dump_list( );
+#endif
+
+    /* Link the node into a simple list */
+
+    tn->link = __kmp_threads[ gtid ]->th.th_pri_head;
+    __kmp_threads[ gtid ]->th.th_pri_head = tn;
+
+#ifdef BUILD_TV
+    __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr );
+#endif
+
+    if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) )
+        return tn;
+
+    /*
+     * if C++ object with copy constructor, use it;
+     * else if C++ object with constructor, use it for the non-master copies only;
+     * else use pod_init and memcpy
+     *
+     * C++ constructors need to be called once for each non-master thread on allocate
+     * C++ copy constructors need to be called once for each thread on allocate
+     */
+
+    /*
+     * C++ object with constructors/destructors;
+     * don't call constructors for master thread though
+     */
+    if (d_tn->is_vec) {
+        if ( d_tn->ct.ctorv != 0) {
+            (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len);
+        } else if (d_tn->cct.cctorv != 0) {
+            (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len);
+        } else if (tn->par_addr != tn->gbl_addr) {
+            __kmp_copy_common_data( tn->par_addr, d_tn->pod_init );
+        }
+    } else {
+        if ( d_tn->ct.ctor != 0 ) {
+            (void) (*d_tn->ct.ctor) (tn->par_addr);
+        } else if (d_tn->cct.cctor != 0) {
+            (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init);
+        } else if (tn->par_addr != tn->gbl_addr) {
+            __kmp_copy_common_data( tn->par_addr, d_tn->pod_init );
+        }
+    }
+/* !BUILD_OPENMP_C
+    if (tn->par_addr != tn->gbl_addr)
+        __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
+
+    return tn;
+}
+
+/* ------------------------------------------------------------------------ */
+/* We are currently parallel, and we know the thread id.                    */
+/* ------------------------------------------------------------------------ */
+
+/*!
+ @ingroup THREADPRIVATE
+
+ @param loc source location information 
+ @param data  pointer to data being privatized 
+ @param ctor  pointer to constructor function for data 
+ @param cctor  pointer to copy constructor function for data 
+ @param dtor  pointer to destructor function for data 
+
+ Register constructors and destructors for thread private data.
+ This function is called when executing in parallel, when we know the thread id.
+*/
+void
+__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
+{
+    struct shared_common *d_tn, **lnk_tn;
+
+    KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) );
+
+#ifdef USE_CHECKS_COMMON
+    /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
+    KMP_ASSERT( cctor == 0);
+#endif /* USE_CHECKS_COMMON */
+
+    /* Only the global data table exists. */
+    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data );
+
+    if (d_tn == 0) {
+        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
+        d_tn->gbl_addr = data;
+
+        d_tn->ct.ctor = ctor;
+        d_tn->cct.cctor = cctor;
+        d_tn->dt.dtor = dtor;
+/*
+        d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate zeroes the memory
+        d_tn->vec_len = 0L;
+        d_tn->obj_init = 0;
+        d_tn->pod_init = 0;
+*/
+        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]);
+
+        d_tn->next = *lnk_tn;
+        *lnk_tn = d_tn;
+    }
+}
+
+void *
+__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size)
+{
+    void *ret;
+    struct private_common *tn;
+
+    KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) );
+
+#ifdef USE_CHECKS_COMMON
+    if (! __kmp_init_serial)
+        KMP_FATAL( RTLNotInitialized );
+#endif /* USE_CHECKS_COMMON */
+
+    if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) {
+        /* The parallel address will NEVER overlap with the data_address */
+        /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */
+
+        KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) );
+        kmp_threadprivate_insert_private_data( global_tid, data, data, size );
+
+        ret = data;
+    }
+    else {
+        KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
+                       global_tid, data ) );
+        tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data );
+
+        if ( tn ) {
+            KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) );
+#ifdef USE_CHECKS_COMMON
+            if ((size_t) size > tn->cmn_size) {
+                KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n",
+                                data, size, tn->cmn_size ) );
+                KMP_FATAL( TPCommonBlocksInconsist );
+            }
+#endif /* USE_CHECKS_COMMON */
+        }
+        else {
+            /* The parallel address will NEVER overlap with the data_address */
+            /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */
+            KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) );
+            tn = kmp_threadprivate_insert( global_tid, data, data, size );
+        }
+
+        ret = tn->par_addr;
+    }
+    KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
+                   global_tid, ret ) );
+
+    return ret;
+}
+
+/*!
+ @ingroup THREADPRIVATE
+ @param loc source location information 
+ @param global_tid  global thread number 
+ @param data  pointer to data to privatize 
+ @param size  size of data to privatize 
+ @param cache  pointer to cache 
+ @return pointer to private storage 
+
+ Allocate private storage for threadprivate data. 
+*/
+void *
+__kmpc_threadprivate_cached(
+    ident_t *  loc,
+    kmp_int32  global_tid,   // gtid.
+    void *     data,         // Pointer to original global variable.
+    size_t     size,         // Size of original global variable.
+    void ***   cache
+) {
+    KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %"
+                   KMP_SIZE_T_SPEC "\n",
+                   global_tid, *cache, data, size ) );
+
+    if ( TCR_PTR(*cache) == 0) {
+        __kmp_acquire_lock( & __kmp_global_lock, global_tid );
+
+        if ( TCR_PTR(*cache) == 0) {
+            __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
+            __kmp_tp_cached = 1;
+            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
+            void ** my_cache;
+            KMP_ITT_IGNORE(
+            my_cache = (void**)
+                __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t ));
+                           );
+            // No need to zero the allocated memory; __kmp_allocate does that.
+            KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n",
+                           global_tid, my_cache ) );
+            
+            /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */
+            /* Add address of mycache to linked list for cleanup later  */
+            kmp_cached_addr_t *tp_cache_addr;
+
+            tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity];
+            tp_cache_addr -> addr = my_cache;
+            tp_cache_addr -> next = __kmp_threadpriv_cache_list;
+            __kmp_threadpriv_cache_list = tp_cache_addr;
+
+            KMP_MB();
+
+            TCW_PTR( *cache, my_cache);
+
+            KMP_MB();
+        }
+
+        __kmp_release_lock( & __kmp_global_lock, global_tid );
+    }
+
+    void *ret;
+    if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) {
+        ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size);
+
+        TCW_PTR( (*cache)[ global_tid ], ret);
+    }
+    KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
+                   global_tid, ret ) );
+
+    return ret;
+}
+
+/*!
+ @ingroup THREADPRIVATE
+ @param loc source location information 
+ @param data  pointer to data being privatized 
+ @param ctor  pointer to constructor function for data 
+ @param cctor  pointer to copy constructor function for data 
+ @param dtor  pointer to destructor function for data 
+ @param vector_length length of the vector (bytes or elements?)
+ Register vector constructors and destructors for thread private data.
+*/
+void
+__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor,
+                                   kmpc_cctor_vec cctor, kmpc_dtor_vec dtor,
+                                   size_t vector_length )
+{
+    struct shared_common *d_tn, **lnk_tn;
+
+    KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) );
+
+#ifdef USE_CHECKS_COMMON
+    /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
+    KMP_ASSERT( cctor == 0);
+#endif /* USE_CHECKS_COMMON */
+
+    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
+                                          -1, data );        /* Only the global data table exists. */
+
+    if (d_tn == 0) {
+        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
+        d_tn->gbl_addr = data;
+
+        d_tn->ct.ctorv = ctor;
+        d_tn->cct.cctorv = cctor;
+        d_tn->dt.dtorv = dtor;
+        d_tn->is_vec = TRUE;
+        d_tn->vec_len = (size_t) vector_length;
+/*
+        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory
+        d_tn->pod_init = 0;
+*/
+        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]);
+
+        d_tn->next = *lnk_tn;
+        *lnk_tn = d_tn;
+    }
+}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_utility.c b/contrib/libs/cxxsupp/openmp/kmp_utility.c
index 311b2031d8..c777d7dc0c 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_utility.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_utility.c
@@ -1,440 +1,440 @@
-/* 
- * kmp_utility.c -- Utility routines for the OpenMP support library. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_wrapper_getpid.h" 
-#include "kmp_str.h" 
-#include <float.h> 
-#include "kmp_i18n.h" 
- 
+/*
+ * kmp_utility.c -- Utility routines for the OpenMP support library.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_wrapper_getpid.h"
+#include "kmp_str.h"
+#include <float.h>
+#include "kmp_i18n.h"
+
 #include <util/system/types.h>
 
 const char* CpuBrand(ui32 store[12]) noexcept; //defined in <util/system/cpu_id.h>
 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-static const char *unknown = "unknown"; 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */ 
-/* the debugging package has not been initialized yet, and only "0" will print   */ 
-/* debugging output since the environment variables have not been read.          */ 
- 
-#ifdef KMP_DEBUG 
-static int trace_level = 5; 
-#endif 
- 
-/* 
- * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) 
- * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID 
- * PHY_ID       = APIC_ID >> LOG_ID_BITS 
- */ 
-int 
-__kmp_get_physical_id( int log_per_phy, int apic_id ) 
-{ 
-   int index_lsb, index_msb, temp; 
- 
-   if (log_per_phy > 1) { 
-	index_lsb = 0; 
-	index_msb = 31; 
- 
-	temp = log_per_phy; 
-        while ( (temp & 1) == 0 ) { 
-	    temp  >>= 1; 
-	    index_lsb++; 
-	} 
- 
-	temp = log_per_phy; 
-	while ( (temp & 0x80000000)==0 ) { 
-	    temp <<= 1; 
-	    index_msb--; 
-	} 
- 
-	/* If >1 bits were set in log_per_phy, choose next higher power of 2 */ 
-	if (index_lsb != index_msb) index_msb++; 
- 
-	return ( (int) (apic_id >> index_msb) ); 
-   } 
- 
-   return apic_id; 
-} 
- 
- 
-/* 
- * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) 
- * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID 
- * LOG_ID       = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 ) 
- */ 
-int 
-__kmp_get_logical_id( int log_per_phy, int apic_id ) 
-{ 
-   unsigned current_bit; 
-   int bits_seen; 
- 
-   if (log_per_phy <= 1) return ( 0 ); 
- 
-   bits_seen = 0; 
- 
-   for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) { 
-	if ( log_per_phy & current_bit ) { 
-	    log_per_phy &= ~current_bit; 
-	    bits_seen++; 
-	} 
-   } 
- 
-   /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */ 
-   if (bits_seen == 1) { 
-	current_bit >>= 1; 
-   } 
- 
-   return ( (int) ((current_bit - 1) & apic_id) ); 
-} 
- 
- 
-static 
-kmp_uint64 
-__kmp_parse_frequency(        // R: Frequency in Hz. 
-    char const * frequency    // I: Float number and unit: MHz, GHz, or TGz. 
-) { 
- 
-    double       value  = 0.0; 
-    char const * unit   = NULL; 
-    kmp_uint64   result = ~ 0; 
- 
-    if ( frequency == NULL ) { 
-        return result; 
-    }; // if 
-    value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char conts *". 
-    if ( 0 < value && value <= DBL_MAX ) {          // Good value (not overflow, underflow, etc). 
-        if ( strcmp( unit, "MHz" ) == 0 ) { 
-            value = value * 1.0E+6; 
-        } else if ( strcmp( unit, "GHz" ) == 0 ) { 
-            value = value * 1.0E+9; 
-        } else if ( strcmp( unit, "THz" ) == 0 ) { 
-            value = value * 1.0E+12; 
-        } else {                      // Wrong unit. 
-            return result; 
-        }; // if 
-        result = value; 
-    }; // if 
-    return result; 
- 
-}; // func __kmp_parse_cpu_frequency 
- 
-void 
-__kmp_query_cpuid( kmp_cpuinfo_t *p ) 
-{ 
-    struct kmp_cpuid buf; 
-    int max_arg; 
-    int log_per_phy; 
-#ifdef KMP_DEBUG 
-    int cflush_size; 
-#endif 
- 
-    memset(&buf, 0, sizeof(buf)); 
- 
-    p->initialized = 1; 
- 
-    p->sse2 = 1; // Assume SSE2 by default. 
- 
-    __kmp_x86_cpuid( 0, 0, &buf ); 
- 
-    KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 
-        0, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); 
- 
-    max_arg = buf.eax; 
- 
-    p->apic_id = -1; 
- 
-    if (max_arg >= 1) { 
-        int i; 
-        kmp_uint32 t, data[ 4 ]; 
- 
-        __kmp_x86_cpuid( 1, 0, &buf ); 
-        KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 
-                                1, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); 
- 
-        { 
-#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask  ) ) 
- 
-            p->signature = buf.eax; 
-            p->family    =   get_value( buf.eax, 20, 0xff )        + get_value( buf.eax, 8, 0x0f ); 
-            p->model     = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f ); 
-            p->stepping  =   get_value( buf.eax,  0, 0x0f ); 
- 
-#undef get_value 
- 
-            KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) ); 
-        } 
- 
-        for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) { 
-            data[ i ] = (t & 0xff); 
-        }; // for 
- 
-        p->sse2 = ( buf.edx >> 26 ) & 1; 
- 
-#ifdef KMP_DEBUG 
- 
-        if ( (buf.edx >> 4) & 1 ) { 
-            /* TSC - Timestamp Counter Available */ 
-            KA_TRACE( trace_level, (" TSC" ) ); 
-        } 
-        if ( (buf.edx >> 8) & 1 ) { 
-            /* CX8 - CMPXCHG8B Instruction Available */ 
-            KA_TRACE( trace_level, (" CX8" ) ); 
-        } 
-        if ( (buf.edx >> 9) & 1 ) { 
-            /* APIC - Local APIC Present (multi-processor operation support */ 
-            KA_TRACE( trace_level, (" APIC" ) ); 
-        } 
-        if ( (buf.edx >> 15) & 1 ) { 
-            /* CMOV - Conditional MOVe Instruction Available */ 
-            KA_TRACE( trace_level, (" CMOV" ) ); 
-        } 
-        if ( (buf.edx >> 18) & 1 ) { 
-            /* PSN - Processor Serial Number Available */ 
-            KA_TRACE( trace_level, (" PSN" ) ); 
-        } 
-        if ( (buf.edx >> 19) & 1 ) { 
-            /* CLFULSH - Cache Flush Instruction Available */ 
-            cflush_size = data[ 1 ] * 8;    /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */ 
-            KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) ); 
- 
-        } 
-        if ( (buf.edx >> 21) & 1 ) { 
-            /* DTES - Debug Trace & EMON Store */ 
-            KA_TRACE( trace_level, (" DTES" ) ); 
-        } 
-        if ( (buf.edx >> 22) & 1 ) { 
-            /* ACPI - ACPI Support Available */ 
-            KA_TRACE( trace_level, (" ACPI" ) ); 
-        } 
-        if ( (buf.edx >> 23) & 1 ) { 
-            /* MMX - Multimedia Extensions */ 
-            KA_TRACE( trace_level, (" MMX" ) ); 
-        } 
-        if ( (buf.edx >> 25) & 1 ) { 
-            /* SSE - SSE Instructions */ 
-            KA_TRACE( trace_level, (" SSE" ) ); 
-        } 
-        if ( (buf.edx >> 26) & 1 ) { 
-            /* SSE2 - SSE2 Instructions */ 
-            KA_TRACE( trace_level, (" SSE2" ) ); 
-        } 
-        if ( (buf.edx >> 27) & 1 ) { 
-            /* SLFSNP - Self-Snooping Cache */ 
-            KA_TRACE( trace_level, (" SLFSNP" ) ); 
-        } 
-#endif /* KMP_DEBUG */ 
- 
-        if ( (buf.edx >> 28) & 1 ) { 
-            /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */ 
-            log_per_phy = data[ 2 ]; 
-            p->apic_id     = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */ 
-            KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) ); 
- 
-            if( log_per_phy > 1 ) { 
-                /* default to 1k FOR JT-enabled processors (4k on OS X*) */ 
-#if KMP_OS_DARWIN 
-                p->cpu_stackoffset = 4 * 1024; 
-#else 
-                p->cpu_stackoffset = 1 * 1024; 
-#endif 
-            } 
- 
-            p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id ); 
-            p->logical_id  = __kmp_get_logical_id( log_per_phy, p->apic_id ); 
-        } 
-#ifdef KMP_DEBUG 
-        if ( (buf.edx >> 29) & 1 ) { 
-            /* ATHROTL - Automatic Throttle Control */ 
-            KA_TRACE( trace_level, (" ATHROTL" ) ); 
-        } 
-        KA_TRACE( trace_level, (" ]\n" ) ); 
- 
-        for (i = 2; i <= max_arg; ++i) { 
-            __kmp_x86_cpuid( i, 0, &buf ); 
-            KA_TRACE( trace_level, 
-                      ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 
-                        i, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); 
-        } 
-#endif 
-#if KMP_USE_ADAPTIVE_LOCKS 
-        p->rtm = 0; 
-        if (max_arg > 7) 
-        { 
-            /* RTM bit CPUID.07:EBX, bit 11 */ 
-            __kmp_x86_cpuid(7, 0, &buf); 
-            p->rtm = (buf.ebx >> 11) & 1; 
-            KA_TRACE( trace_level, (" RTM" ) ); 
-        } 
-#endif 
-    }; // if 
- 
-    { // Parse CPU brand string for frequency. 
- 
-        union kmp_cpu_brand_string { 
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+static const char *unknown = "unknown";
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */
+/* the debugging package has not been initialized yet, and only "0" will print   */
+/* debugging output since the environment variables have not been read.          */
+
+#ifdef KMP_DEBUG
+static int trace_level = 5;
+#endif
+
+/*
+ * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
+ * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID
+ * PHY_ID       = APIC_ID >> LOG_ID_BITS
+ */
+int
+__kmp_get_physical_id( int log_per_phy, int apic_id )
+{
+   int index_lsb, index_msb, temp;
+
+   if (log_per_phy > 1) {
+	index_lsb = 0;
+	index_msb = 31;
+
+	temp = log_per_phy;
+        while ( (temp & 1) == 0 ) {
+	    temp  >>= 1;
+	    index_lsb++;
+	}
+
+	temp = log_per_phy;
+	while ( (temp & 0x80000000)==0 ) {
+	    temp <<= 1;
+	    index_msb--;
+	}
+
+	/* If >1 bits were set in log_per_phy, choose next higher power of 2 */
+	if (index_lsb != index_msb) index_msb++;
+
+	return ( (int) (apic_id >> index_msb) );
+   }
+
+   return apic_id;
+}
+
+
+/*
+ * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
+ * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID
+ * LOG_ID       = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 )
+ */
+int
+__kmp_get_logical_id( int log_per_phy, int apic_id )
+{
+   unsigned current_bit;
+   int bits_seen;
+
+   if (log_per_phy <= 1) return ( 0 );
+
+   bits_seen = 0;
+
+   for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) {
+	if ( log_per_phy & current_bit ) {
+	    log_per_phy &= ~current_bit;
+	    bits_seen++;
+	}
+   }
+
+   /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */
+   if (bits_seen == 1) {
+	current_bit >>= 1;
+   }
+
+   return ( (int) ((current_bit - 1) & apic_id) );
+}
+
+
+static
+kmp_uint64
+__kmp_parse_frequency(        // R: Frequency in Hz.
+    char const * frequency    // I: Float number and unit: MHz, GHz, or TGz.
+) {
+
+    double       value  = 0.0;
+    char const * unit   = NULL;
+    kmp_uint64   result = ~ 0;
+
+    if ( frequency == NULL ) {
+        return result;
+    }; // if
+    value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char conts *".
+    if ( 0 < value && value <= DBL_MAX ) {          // Good value (not overflow, underflow, etc).
+        if ( strcmp( unit, "MHz" ) == 0 ) {
+            value = value * 1.0E+6;
+        } else if ( strcmp( unit, "GHz" ) == 0 ) {
+            value = value * 1.0E+9;
+        } else if ( strcmp( unit, "THz" ) == 0 ) {
+            value = value * 1.0E+12;
+        } else {                      // Wrong unit.
+            return result;
+        }; // if
+        result = value;
+    }; // if
+    return result;
+
+}; // func __kmp_parse_cpu_frequency
+
+void
+__kmp_query_cpuid( kmp_cpuinfo_t *p )
+{
+    struct kmp_cpuid buf;
+    int max_arg;
+    int log_per_phy;
+#ifdef KMP_DEBUG
+    int cflush_size;
+#endif
+
+    memset(&buf, 0, sizeof(buf));
+
+    p->initialized = 1;
+
+    p->sse2 = 1; // Assume SSE2 by default.
+
+    __kmp_x86_cpuid( 0, 0, &buf );
+
+    KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+        0, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+
+    max_arg = buf.eax;
+
+    p->apic_id = -1;
+
+    if (max_arg >= 1) {
+        int i;
+        kmp_uint32 t, data[ 4 ];
+
+        __kmp_x86_cpuid( 1, 0, &buf );
+        KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+                                1, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+
+        {
+#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask  ) )
+
+            p->signature = buf.eax;
+            p->family    =   get_value( buf.eax, 20, 0xff )        + get_value( buf.eax, 8, 0x0f );
+            p->model     = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f );
+            p->stepping  =   get_value( buf.eax,  0, 0x0f );
+
+#undef get_value
+
+            KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) );
+        }
+
+        for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) {
+            data[ i ] = (t & 0xff);
+        }; // for
+
+        p->sse2 = ( buf.edx >> 26 ) & 1;
+
+#ifdef KMP_DEBUG
+
+        if ( (buf.edx >> 4) & 1 ) {
+            /* TSC - Timestamp Counter Available */
+            KA_TRACE( trace_level, (" TSC" ) );
+        }
+        if ( (buf.edx >> 8) & 1 ) {
+            /* CX8 - CMPXCHG8B Instruction Available */
+            KA_TRACE( trace_level, (" CX8" ) );
+        }
+        if ( (buf.edx >> 9) & 1 ) {
+            /* APIC - Local APIC Present (multi-processor operation support */
+            KA_TRACE( trace_level, (" APIC" ) );
+        }
+        if ( (buf.edx >> 15) & 1 ) {
+            /* CMOV - Conditional MOVe Instruction Available */
+            KA_TRACE( trace_level, (" CMOV" ) );
+        }
+        if ( (buf.edx >> 18) & 1 ) {
+            /* PSN - Processor Serial Number Available */
+            KA_TRACE( trace_level, (" PSN" ) );
+        }
+        if ( (buf.edx >> 19) & 1 ) {
+            /* CLFULSH - Cache Flush Instruction Available */
+            cflush_size = data[ 1 ] * 8;    /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */
+            KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) );
+
+        }
+        if ( (buf.edx >> 21) & 1 ) {
+            /* DTES - Debug Trace & EMON Store */
+            KA_TRACE( trace_level, (" DTES" ) );
+        }
+        if ( (buf.edx >> 22) & 1 ) {
+            /* ACPI - ACPI Support Available */
+            KA_TRACE( trace_level, (" ACPI" ) );
+        }
+        if ( (buf.edx >> 23) & 1 ) {
+            /* MMX - Multimedia Extensions */
+            KA_TRACE( trace_level, (" MMX" ) );
+        }
+        if ( (buf.edx >> 25) & 1 ) {
+            /* SSE - SSE Instructions */
+            KA_TRACE( trace_level, (" SSE" ) );
+        }
+        if ( (buf.edx >> 26) & 1 ) {
+            /* SSE2 - SSE2 Instructions */
+            KA_TRACE( trace_level, (" SSE2" ) );
+        }
+        if ( (buf.edx >> 27) & 1 ) {
+            /* SLFSNP - Self-Snooping Cache */
+            KA_TRACE( trace_level, (" SLFSNP" ) );
+        }
+#endif /* KMP_DEBUG */
+
+        if ( (buf.edx >> 28) & 1 ) {
+            /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */
+            log_per_phy = data[ 2 ];
+            p->apic_id     = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */
+            KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) );
+
+            if( log_per_phy > 1 ) {
+                /* default to 1k FOR JT-enabled processors (4k on OS X*) */
+#if KMP_OS_DARWIN
+                p->cpu_stackoffset = 4 * 1024;
+#else
+                p->cpu_stackoffset = 1 * 1024;
+#endif
+            }
+
+            p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id );
+            p->logical_id  = __kmp_get_logical_id( log_per_phy, p->apic_id );
+        }
+#ifdef KMP_DEBUG
+        if ( (buf.edx >> 29) & 1 ) {
+            /* ATHROTL - Automatic Throttle Control */
+            KA_TRACE( trace_level, (" ATHROTL" ) );
+        }
+        KA_TRACE( trace_level, (" ]\n" ) );
+
+        for (i = 2; i <= max_arg; ++i) {
+            __kmp_x86_cpuid( i, 0, &buf );
+            KA_TRACE( trace_level,
+                      ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+                        i, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+        }
+#endif
+#if KMP_USE_ADAPTIVE_LOCKS
+        p->rtm = 0;
+        if (max_arg > 7)
+        {
+            /* RTM bit CPUID.07:EBX, bit 11 */
+            __kmp_x86_cpuid(7, 0, &buf);
+            p->rtm = (buf.ebx >> 11) & 1;
+            KA_TRACE( trace_level, (" RTM" ) );
+        }
+#endif
+    }; // if
+
+    { // Parse CPU brand string for frequency.
+
+        union kmp_cpu_brand_string {
             ui32             buf[ 12 ];
             char             string[ sizeof( ui32 ) * 12 + 1 ];
-        }; // union kmp_cpu_brand_string 
-        union kmp_cpu_brand_string brand; 
- 
-        memset(&brand, 0, sizeof(brand)); 
- 
-        p->frequency = 0; 
- 
-        // Get CPU brand string. 
+        }; // union kmp_cpu_brand_string
+        union kmp_cpu_brand_string brand;
+
+        memset(&brand, 0, sizeof(brand));
+
+        p->frequency = 0;
+
+        // Get CPU brand string.
         CpuBrand(brand.buf);
-        brand.string[ sizeof( brand.string ) - 1 ] = 0; // Just in case. ;-) 
-        KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", brand.string ) ); 
- 
-        // Parse frequency. 
-        p->frequency = __kmp_parse_frequency( strrchr( brand.string, ' ' ) ); 
-        KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) ); 
-    } 
-} 
- 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-/* ------------------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_expand_host_name( char *buffer, size_t size ) 
-{ 
-    KMP_DEBUG_ASSERT(size >= sizeof(unknown)); 
-#if KMP_OS_WINDOWS 
-    { 
-	DWORD	s = size; 
- 
-	if (! GetComputerNameA( buffer, & s )) 
-	    KMP_STRCPY_S( buffer, size, unknown ); 
-    } 
-#else 
-    buffer[size - 2] = 0; 
-    if (gethostname( buffer, size ) || buffer[size - 2] != 0) 
-	KMP_STRCPY_S( buffer, size, unknown ); 
-#endif 
-} 
- 
-/* Expand the meta characters in the filename: 
- * 
- * Currently defined characters are: 
- * 
- * %H the hostname 
- * %P the number of threads used. 
- * %I the unique identifier for this run. 
- */ 
- 
-void 
-__kmp_expand_file_name( char *result, size_t rlen, char *pattern ) 
-{ 
-    char	*pos = result, *end = result + rlen - 1; 
-    char	 buffer[256]; 
-    int		 default_cpu_width = 1; 
-    int          snp_result; 
- 
-    KMP_DEBUG_ASSERT(rlen > 0); 
-    *end = 0; 
-    { 
-	int i; 
-	for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width); 
-    } 
- 
-    if (pattern != NULL) { 
-	while (*pattern != '\0' && pos < end) { 
-	    if (*pattern != '%') { 
-		*pos++ = *pattern++; 
-	    } else { 
-		char *old_pattern = pattern; 
-		int width = 1; 
-		int cpu_width = default_cpu_width; 
- 
-		++pattern; 
- 
-		if (*pattern >= '0' && *pattern <= '9') { 
-		    width = 0; 
-		    do { 
-			width = (width * 10) + *pattern++ - '0'; 
-		    } while (*pattern >= '0' && *pattern <= '9'); 
-		    if (width < 0 || width > 1024) 
-			width = 1; 
- 
-		    cpu_width = width; 
-		} 
- 
-		switch (*pattern) { 
-		case 'H': 
-		case 'h': 
-		    { 
-			__kmp_expand_host_name( buffer, sizeof( buffer ) ); 
-			KMP_STRNCPY( pos,  buffer, end - pos + 1); 
-			if(*end == 0) { 
-			    while ( *pos ) 
-				++pos; 
-			    ++pattern; 
-			} else 
-			    pos = end; 
-		    } 
-		    break; 
-		case 'P': 
-		case 'p': 
-		    { 
-			snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth ); 
-			if(snp_result >= 0 && snp_result <= end - pos) { 
-			    while ( *pos ) 
-				++pos; 
-			    ++pattern; 
-			} else 
-			    pos = end; 
-		    } 
-		    break; 
-		case 'I': 
-		case 'i': 
-		    { 
-			pid_t id = getpid(); 
-			snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id ); 
-			if(snp_result >= 0 && snp_result <= end - pos) { 
-			    while ( *pos ) 
-				++pos; 
-			    ++pattern; 
-			} else 
-			    pos = end; 
-			break; 
-		    } 
-		case '%': 
-		    { 
-			*pos++ = '%'; 
-			++pattern; 
-			break; 
-		    } 
-		default: 
-		    { 
-			*pos++ = '%'; 
-			pattern = old_pattern + 1; 
-			break; 
-		    } 
-		} 
-	    } 
-	} 
-	/* TODO: How do we get rid of this? */ 
-	if(*pattern != '\0') 
-	    KMP_FATAL( FileNameTooLong ); 
-    } 
- 
-    *pos = '\0'; 
-} 
+        brand.string[ sizeof( brand.string ) - 1 ] = 0; // Just in case. ;-)
+        KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", brand.string ) );
+
+        // Parse frequency.
+        p->frequency = __kmp_parse_frequency( strrchr( brand.string, ' ' ) );
+        KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) );
+    }
+}
+
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+/* ------------------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------------------ */
+
+void
+__kmp_expand_host_name( char *buffer, size_t size )
+{
+    KMP_DEBUG_ASSERT(size >= sizeof(unknown));
+#if KMP_OS_WINDOWS
+    {
+	DWORD	s = size;
+
+	if (! GetComputerNameA( buffer, & s ))
+	    KMP_STRCPY_S( buffer, size, unknown );
+    }
+#else
+    buffer[size - 2] = 0;
+    if (gethostname( buffer, size ) || buffer[size - 2] != 0)
+	KMP_STRCPY_S( buffer, size, unknown );
+#endif
+}
+
+/* Expand the meta characters in the filename:
+ *
+ * Currently defined characters are:
+ *
+ * %H the hostname
+ * %P the number of threads used.
+ * %I the unique identifier for this run.
+ */
+
+void
+__kmp_expand_file_name( char *result, size_t rlen, char *pattern )
+{
+    char	*pos = result, *end = result + rlen - 1;
+    char	 buffer[256];
+    int		 default_cpu_width = 1;
+    int          snp_result;
+
+    KMP_DEBUG_ASSERT(rlen > 0);
+    *end = 0;
+    {
+	int i;
+	for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width);
+    }
+
+    if (pattern != NULL) {
+	while (*pattern != '\0' && pos < end) {
+	    if (*pattern != '%') {
+		*pos++ = *pattern++;
+	    } else {
+		char *old_pattern = pattern;
+		int width = 1;
+		int cpu_width = default_cpu_width;
+
+		++pattern;
+
+		if (*pattern >= '0' && *pattern <= '9') {
+		    width = 0;
+		    do {
+			width = (width * 10) + *pattern++ - '0';
+		    } while (*pattern >= '0' && *pattern <= '9');
+		    if (width < 0 || width > 1024)
+			width = 1;
+
+		    cpu_width = width;
+		}
+
+		switch (*pattern) {
+		case 'H':
+		case 'h':
+		    {
+			__kmp_expand_host_name( buffer, sizeof( buffer ) );
+			KMP_STRNCPY( pos,  buffer, end - pos + 1);
+			if(*end == 0) {
+			    while ( *pos )
+				++pos;
+			    ++pattern;
+			} else
+			    pos = end;
+		    }
+		    break;
+		case 'P':
+		case 'p':
+		    {
+			snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth );
+			if(snp_result >= 0 && snp_result <= end - pos) {
+			    while ( *pos )
+				++pos;
+			    ++pattern;
+			} else
+			    pos = end;
+		    }
+		    break;
+		case 'I':
+		case 'i':
+		    {
+			pid_t id = getpid();
+			snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id );
+			if(snp_result >= 0 && snp_result <= end - pos) {
+			    while ( *pos )
+				++pos;
+			    ++pattern;
+			} else
+			    pos = end;
+			break;
+		    }
+		case '%':
+		    {
+			*pos++ = '%';
+			++pattern;
+			break;
+		    }
+		default:
+		    {
+			*pos++ = '%';
+			pattern = old_pattern + 1;
+			break;
+		    }
+		}
+	    }
+	}
+	/* TODO: How do we get rid of this? */
+	if(*pattern != '\0')
+	    KMP_FATAL( FileNameTooLong );
+    }
+
+    *pos = '\0';
+}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_version.c b/contrib/libs/cxxsupp/openmp/kmp_version.c
index 8b7598c46b..2ddd76d3ad 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_version.c
+++ b/contrib/libs/cxxsupp/openmp/kmp_version.c
@@ -1,211 +1,211 @@
-/* 
- * kmp_version.c 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_io.h" 
-#include "kmp_version.h" 
- 
-// Replace with snapshot date YYYYMMDD for promotion build. 
-#define KMP_VERSION_BUILD    20140926 
- 
-// Helper macros to convert value of macro to string literal. 
-#define _stringer( x ) #x 
-#define stringer( x )  _stringer( x ) 
- 
-// Detect compiler. 
-#if KMP_COMPILER_ICC 
-    #if   __INTEL_COMPILER == 1010 
-        #define KMP_COMPILER "Intel C++ Compiler 10.1" 
-    #elif __INTEL_COMPILER == 1100 
-        #define KMP_COMPILER "Intel C++ Compiler 11.0" 
-    #elif __INTEL_COMPILER == 1110 
-        #define KMP_COMPILER "Intel C++ Compiler 11.1" 
-    #elif __INTEL_COMPILER == 1200 
-        #define KMP_COMPILER "Intel C++ Compiler 12.0" 
-    #elif __INTEL_COMPILER == 1210 
-        #define KMP_COMPILER "Intel C++ Compiler 12.1" 
-    #elif __INTEL_COMPILER == 1300 
-        #define KMP_COMPILER "Intel C++ Compiler 13.0" 
-    #elif __INTEL_COMPILER == 1310 
-        #define KMP_COMPILER "Intel C++ Compiler 13.1" 
-    #elif __INTEL_COMPILER == 1400 
-        #define KMP_COMPILER "Intel C++ Compiler 14.0" 
-    #elif __INTEL_COMPILER == 1410 
-        #define KMP_COMPILER "Intel C++ Compiler 14.1" 
-    #elif __INTEL_COMPILER == 1500 
-        #define KMP_COMPILER "Intel C++ Compiler 15.0" 
-    #elif __INTEL_COMPILER == 1600 
-        #define KMP_COMPILER "Intel C++ Compiler 16.0" 
-    #elif __INTEL_COMPILER == 9999 
-        #define KMP_COMPILER "Intel C++ Compiler mainline" 
-    #endif 
-#elif KMP_COMPILER_CLANG 
-    #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ ) 
-#elif KMP_COMPILER_GCC 
-    #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ ) 
-#elif KMP_COMPILER_MSVC 
-    #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER ) 
-#endif 
-#ifndef KMP_COMPILER 
-    #warning "Unknown compiler" 
-    #define KMP_COMPILER "unknown compiler" 
-#endif 
- 
-// Detect librray type (perf, stub). 
-#ifdef KMP_STUB 
-    #define KMP_LIB_TYPE "stub" 
-#else 
-    #define KMP_LIB_TYPE "performance" 
-#endif // KMP_LIB_TYPE 
- 
-// Detect link type (static, dynamic). 
-#ifdef KMP_DYNAMIC_LIB 
-    #define KMP_LINK_TYPE "dynamic" 
-#else 
-    #define KMP_LINK_TYPE "static" 
-#endif // KMP_LINK_TYPE 
- 
-// Finally, define strings. 
-#define KMP_LIBRARY   KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")" 
-#define KMP_COPYRIGHT "" 
- 
-int const __kmp_version_major = KMP_VERSION_MAJOR; 
-int const __kmp_version_minor = KMP_VERSION_MINOR; 
-int const __kmp_version_build = KMP_VERSION_BUILD; 
-int const __kmp_openmp_version = 
-    #if OMP_40_ENABLED 
-        201307; 
-    #else 
-        201107; 
-    #endif 
- 
-/* Do NOT change the format of this string!  Intel(R) Thread Profiler checks for a 
-   specific format some changes in the recognition routine there need to 
-   be made before this is changed. 
-*/ 
-char const __kmp_copyright[] = 
-    KMP_VERSION_PREFIX KMP_LIBRARY 
-    " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) 
-    "." stringer( KMP_VERSION_BUILD ) " " 
-    KMP_COPYRIGHT; 
- 
-char const __kmp_version_copyright[]      = KMP_VERSION_PREFIX KMP_COPYRIGHT; 
-char const __kmp_version_lib_ver[]        = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD ); 
-char const __kmp_version_lib_type[]       = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE; 
-char const __kmp_version_link_type[]      = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE; 
-char const __kmp_version_build_time[]     = KMP_VERSION_PREFIX "build time: " __DATE__ " " __TIME__; 
-#if KMP_MIC2 
-    char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2"; 
-#endif 
-char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER; 
- 
-// 
-// Called at serial initialization time. 
-// 
-static int __kmp_version_1_printed = FALSE; 
- 
-void 
-__kmp_print_version_1( void ) 
-{ 
-    if ( __kmp_version_1_printed ) { 
-        return; 
-    }; // if 
-    __kmp_version_1_printed = TRUE; 
- 
-    #ifndef KMP_STUB 
-        kmp_str_buf_t buffer; 
-        __kmp_str_buf_init( & buffer ); 
-        // Print version strings skipping initial magic. 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] ); 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] ); 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] ); 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] ); 
-      #if KMP_MIC 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] ); 
-      #endif 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] ); 
-        #if defined(KMP_GOMP_COMPAT) 
-            __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] ); 
-        #endif /* defined(KMP_GOMP_COMPAT) */ 
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] ); 
-        __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" )  ); 
-        #ifdef KMP_DEBUG 
-            for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { 
-                __kmp_str_buf_print( 
-                    & buffer, 
-                    "%s%s barrier branch bits: gather=%u, release=%u\n", 
-                    KMP_VERSION_PREF_STR, 
-                    __kmp_barrier_type_name[ i ], 
-                    __kmp_barrier_gather_branch_bits[ i ], 
-                    __kmp_barrier_release_branch_bits[ i ] 
-                ); // __kmp_str_buf_print 
-            }; // for i 
-            for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { 
-                __kmp_str_buf_print( 
-                    & buffer, 
-                    "%s%s barrier pattern: gather=%s, release=%s\n", 
-                    KMP_VERSION_PREF_STR, 
-                    __kmp_barrier_type_name[ i ], 
-                    __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ], 
-                    __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ] 
-                ); // __kmp_str_buf_print 
-            }; // for i 
-            __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] ); 
-        #endif 
-        __kmp_str_buf_print( 
-            & buffer, 
-            "%sthread affinity support: %s\n", 
-            KMP_VERSION_PREF_STR, 
-            #if KMP_AFFINITY_SUPPORTED 
-                ( 
-                    KMP_AFFINITY_CAPABLE() 
-                    ? 
-                    ( 
-                        __kmp_affinity_type == affinity_none 
-                        ? 
-                        "not used" 
-                        : 
-                        "yes" 
-                    ) 
-                    : 
-                    "no" 
-                ) 
-            #else 
-                "no" 
-            #endif 
-        ); 
-        __kmp_printf( "%s", buffer.str ); 
-        __kmp_str_buf_free( & buffer ); 
-        K_DIAG( 1, ( "KMP_VERSION is true\n" ) ); 
-    #endif // KMP_STUB 
-} // __kmp_print_version_1 
- 
-// 
-// Called at parallel initialization time. 
-// 
-static int __kmp_version_2_printed = FALSE; 
- 
-void 
-__kmp_print_version_2( void ) { 
-    if ( __kmp_version_2_printed ) { 
-        return; 
-    }; // if 
-    __kmp_version_2_printed = TRUE; 
- 
-    #ifndef KMP_STUB 
-    #endif // KMP_STUB 
-} // __kmp_print_version_2 
- 
-// end of file // 
+/*
+ * kmp_version.c
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_io.h"
+#include "kmp_version.h"
+
+// Replace with snapshot date YYYYMMDD for promotion build.
+#define KMP_VERSION_BUILD    20140926
+
+// Helper macros to convert value of macro to string literal.
+#define _stringer( x ) #x
+#define stringer( x )  _stringer( x )
+
+// Detect compiler.
+#if KMP_COMPILER_ICC
+    #if   __INTEL_COMPILER == 1010
+        #define KMP_COMPILER "Intel C++ Compiler 10.1"
+    #elif __INTEL_COMPILER == 1100
+        #define KMP_COMPILER "Intel C++ Compiler 11.0"
+    #elif __INTEL_COMPILER == 1110
+        #define KMP_COMPILER "Intel C++ Compiler 11.1"
+    #elif __INTEL_COMPILER == 1200
+        #define KMP_COMPILER "Intel C++ Compiler 12.0"
+    #elif __INTEL_COMPILER == 1210
+        #define KMP_COMPILER "Intel C++ Compiler 12.1"
+    #elif __INTEL_COMPILER == 1300
+        #define KMP_COMPILER "Intel C++ Compiler 13.0"
+    #elif __INTEL_COMPILER == 1310
+        #define KMP_COMPILER "Intel C++ Compiler 13.1"
+    #elif __INTEL_COMPILER == 1400
+        #define KMP_COMPILER "Intel C++ Compiler 14.0"
+    #elif __INTEL_COMPILER == 1410
+        #define KMP_COMPILER "Intel C++ Compiler 14.1"
+    #elif __INTEL_COMPILER == 1500
+        #define KMP_COMPILER "Intel C++ Compiler 15.0"
+    #elif __INTEL_COMPILER == 1600
+        #define KMP_COMPILER "Intel C++ Compiler 16.0"
+    #elif __INTEL_COMPILER == 9999
+        #define KMP_COMPILER "Intel C++ Compiler mainline"
+    #endif
+#elif KMP_COMPILER_CLANG
+    #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ )
+#elif KMP_COMPILER_GCC
+    #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ )
+#elif KMP_COMPILER_MSVC
+    #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER )
+#endif
+#ifndef KMP_COMPILER
+    #warning "Unknown compiler"
+    #define KMP_COMPILER "unknown compiler"
+#endif
+
+// Detect librray type (perf, stub).
+#ifdef KMP_STUB
+    #define KMP_LIB_TYPE "stub"
+#else
+    #define KMP_LIB_TYPE "performance"
+#endif // KMP_LIB_TYPE
+
+// Detect link type (static, dynamic).
+#ifdef KMP_DYNAMIC_LIB
+    #define KMP_LINK_TYPE "dynamic"
+#else
+    #define KMP_LINK_TYPE "static"
+#endif // KMP_LINK_TYPE
+
+// Finally, define strings.
+#define KMP_LIBRARY   KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")"
+#define KMP_COPYRIGHT ""
+
+int const __kmp_version_major = KMP_VERSION_MAJOR;
+int const __kmp_version_minor = KMP_VERSION_MINOR;
+int const __kmp_version_build = KMP_VERSION_BUILD;
+int const __kmp_openmp_version =
+    #if OMP_40_ENABLED
+        201307;
+    #else
+        201107;
+    #endif
+
+/* Do NOT change the format of this string!  Intel(R) Thread Profiler checks for a
+   specific format some changes in the recognition routine there need to
+   be made before this is changed.
+*/
+char const __kmp_copyright[] =
+    KMP_VERSION_PREFIX KMP_LIBRARY
+    " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR )
+    "." stringer( KMP_VERSION_BUILD ) " "
+    KMP_COPYRIGHT;
+
+char const __kmp_version_copyright[]      = KMP_VERSION_PREFIX KMP_COPYRIGHT;
+char const __kmp_version_lib_ver[]        = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD );
+char const __kmp_version_lib_type[]       = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE;
+char const __kmp_version_link_type[]      = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE;
+char const __kmp_version_build_time[]     = KMP_VERSION_PREFIX "build time: " __DATE__ " " __TIME__;
+#if KMP_MIC2
+    char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2";
+#endif
+char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER;
+
+//
+// Called at serial initialization time.
+//
+static int __kmp_version_1_printed = FALSE;
+
+void
+__kmp_print_version_1( void )
+{
+    if ( __kmp_version_1_printed ) {
+        return;
+    }; // if
+    __kmp_version_1_printed = TRUE;
+
+    #ifndef KMP_STUB
+        kmp_str_buf_t buffer;
+        __kmp_str_buf_init( & buffer );
+        // Print version strings skipping initial magic.
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] );
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] );
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] );
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] );
+      #if KMP_MIC
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] );
+      #endif
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] );
+        #if defined(KMP_GOMP_COMPAT)
+            __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] );
+        #endif /* defined(KMP_GOMP_COMPAT) */
+        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] );
+        __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" )  );
+        #ifdef KMP_DEBUG
+            for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) {
+                __kmp_str_buf_print(
+                    & buffer,
+                    "%s%s barrier branch bits: gather=%u, release=%u\n",
+                    KMP_VERSION_PREF_STR,
+                    __kmp_barrier_type_name[ i ],
+                    __kmp_barrier_gather_branch_bits[ i ],
+                    __kmp_barrier_release_branch_bits[ i ]
+                ); // __kmp_str_buf_print
+            }; // for i
+            for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) {
+                __kmp_str_buf_print(
+                    & buffer,
+                    "%s%s barrier pattern: gather=%s, release=%s\n",
+                    KMP_VERSION_PREF_STR,
+                    __kmp_barrier_type_name[ i ],
+                    __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ],
+                    __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ]
+                ); // __kmp_str_buf_print
+            }; // for i
+            __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] );
+        #endif
+        __kmp_str_buf_print(
+            & buffer,
+            "%sthread affinity support: %s\n",
+            KMP_VERSION_PREF_STR,
+            #if KMP_AFFINITY_SUPPORTED
+                (
+                    KMP_AFFINITY_CAPABLE()
+                    ?
+                    (
+                        __kmp_affinity_type == affinity_none
+                        ?
+                        "not used"
+                        :
+                        "yes"
+                    )
+                    :
+                    "no"
+                )
+            #else
+                "no"
+            #endif
+        );
+        __kmp_printf( "%s", buffer.str );
+        __kmp_str_buf_free( & buffer );
+        K_DIAG( 1, ( "KMP_VERSION is true\n" ) );
+    #endif // KMP_STUB
+} // __kmp_print_version_1
+
+//
+// Called at parallel initialization time.
+//
+static int __kmp_version_2_printed = FALSE;
+
+void
+__kmp_print_version_2( void ) {
+    if ( __kmp_version_2_printed ) {
+        return;
+    }; // if
+    __kmp_version_2_printed = TRUE;
+
+    #ifndef KMP_STUB
+    #endif // KMP_STUB
+} // __kmp_print_version_2
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_version.h b/contrib/libs/cxxsupp/openmp/kmp_version.h
index ba7c1b949a..212853b8e2 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_version.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_version.h
@@ -1,68 +1,68 @@
-/* 
- * kmp_version.h -- version number for this release 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_VERSION_H 
-#define KMP_VERSION_H 
- 
-#ifdef __cplusplus 
-    extern "C" { 
-#endif // __cplusplus 
- 
-#ifndef KMP_VERSION_MAJOR 
-    #error KMP_VERSION_MAJOR macro is not defined. 
-#endif 
-#define KMP_VERSION_MINOR       0 
-/* 
-    Using "magic" prefix in all the version strings is rather convenient to get static version info 
-    from binaries by using standard utilities "strings" and "grep", e. g.: 
-        $ strings libomp.so | grep "@(#)" 
-    gives clean list of all version strings in the library. Leading zero helps to keep version 
-    string separate from printable characters which may occurs just before version string. 
-*/ 
-#define KMP_VERSION_MAGIC_STR   "\x00@(#) " 
-#define KMP_VERSION_MAGIC_LEN   6                // Length of KMP_VERSION_MAGIC_STR. 
-#define KMP_VERSION_PREF_STR    "Intel(R) OMP " 
-#define KMP_VERSION_PREFIX      KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR 
- 
-/* declare all the version string constants for KMP_VERSION env. variable */ 
-extern int  const __kmp_version_major; 
-extern int  const __kmp_version_minor; 
-extern int  const __kmp_version_build; 
-extern int  const __kmp_openmp_version; 
-extern char const __kmp_copyright[];    // Old variable, kept for compatibility with ITC and ITP. 
-extern char const __kmp_version_copyright[]; 
-extern char const __kmp_version_lib_ver[]; 
-extern char const __kmp_version_lib_type[]; 
-extern char const __kmp_version_link_type[]; 
-extern char const __kmp_version_build_time[]; 
-extern char const __kmp_version_target_env[]; 
-extern char const __kmp_version_build_compiler[]; 
-extern char const __kmp_version_alt_comp[]; 
-extern char const __kmp_version_omp_api[]; 
-// ??? extern char const __kmp_version_debug[]; 
-extern char const __kmp_version_lock[]; 
-extern char const __kmp_version_nested_stats_reporting[]; 
-extern char const __kmp_version_ftnstdcall[]; 
-extern char const __kmp_version_ftncdecl[]; 
-extern char const __kmp_version_ftnextra[]; 
- 
-void __kmp_print_version_1( void ); 
-void __kmp_print_version_2( void ); 
- 
-#ifdef __cplusplus 
-    } // extern "C" 
-#endif // __cplusplus 
- 
-#endif /* KMP_VERSION_H */ 
+/*
+ * kmp_version.h -- version number for this release
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_VERSION_H
+#define KMP_VERSION_H
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+#ifndef KMP_VERSION_MAJOR
+    #error KMP_VERSION_MAJOR macro is not defined.
+#endif
+#define KMP_VERSION_MINOR       0
+/*
+    Using "magic" prefix in all the version strings is rather convenient to get static version info
+    from binaries by using standard utilities "strings" and "grep", e. g.:
+        $ strings libomp.so | grep "@(#)"
+    gives clean list of all version strings in the library. Leading zero helps to keep version
+    string separate from printable characters which may occurs just before version string.
+*/
+#define KMP_VERSION_MAGIC_STR   "\x00@(#) "
+#define KMP_VERSION_MAGIC_LEN   6                // Length of KMP_VERSION_MAGIC_STR.
+#define KMP_VERSION_PREF_STR    "Intel(R) OMP "
+#define KMP_VERSION_PREFIX      KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR
+
+/* declare all the version string constants for KMP_VERSION env. variable */
+extern int  const __kmp_version_major;
+extern int  const __kmp_version_minor;
+extern int  const __kmp_version_build;
+extern int  const __kmp_openmp_version;
+extern char const __kmp_copyright[];    // Old variable, kept for compatibility with ITC and ITP.
+extern char const __kmp_version_copyright[];
+extern char const __kmp_version_lib_ver[];
+extern char const __kmp_version_lib_type[];
+extern char const __kmp_version_link_type[];
+extern char const __kmp_version_build_time[];
+extern char const __kmp_version_target_env[];
+extern char const __kmp_version_build_compiler[];
+extern char const __kmp_version_alt_comp[];
+extern char const __kmp_version_omp_api[];
+// ??? extern char const __kmp_version_debug[];
+extern char const __kmp_version_lock[];
+extern char const __kmp_version_nested_stats_reporting[];
+extern char const __kmp_version_ftnstdcall[];
+extern char const __kmp_version_ftncdecl[];
+extern char const __kmp_version_ftnextra[];
+
+void __kmp_print_version_1( void );
+void __kmp_print_version_2( void );
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+#endif /* KMP_VERSION_H */
diff --git a/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp b/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp
index 7758e18991..d865bf6d46 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp
+++ b/contrib/libs/cxxsupp/openmp/kmp_wait_release.cpp
@@ -1,50 +1,50 @@
-/* 
- * kmp_wait_release.cpp -- Wait/Release implementation 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "kmp_wait_release.h" 
- 
-void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin 
-                   USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    __kmp_wait_template(this_thr, flag, final_spin 
-                        USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-} 
- 
-void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin 
-                   USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    __kmp_wait_template(this_thr, flag, final_spin 
-                        USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-} 
- 
-void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin 
-                       USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    __kmp_wait_template(this_thr, flag, final_spin 
-                        USE_ITT_BUILD_ARG(itt_sync_obj) ); 
-} 
- 
- 
- 
-void __kmp_release_32(kmp_flag_32 *flag) { 
-    __kmp_release_template(flag); 
-} 
- 
-void __kmp_release_64(kmp_flag_64 *flag) { 
-    __kmp_release_template(flag); 
-} 
- 
-void __kmp_release_oncore(kmp_flag_oncore *flag) { 
-    __kmp_release_template(flag); 
-} 
+/*
+ * kmp_wait_release.cpp -- Wait/Release implementation
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "kmp_wait_release.h"
+
+void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin
+                   USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    __kmp_wait_template(this_thr, flag, final_spin
+                        USE_ITT_BUILD_ARG(itt_sync_obj) );
+}
+
+void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
+                   USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    __kmp_wait_template(this_thr, flag, final_spin
+                        USE_ITT_BUILD_ARG(itt_sync_obj) );
+}
+
+void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin
+                       USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    __kmp_wait_template(this_thr, flag, final_spin
+                        USE_ITT_BUILD_ARG(itt_sync_obj) );
+}
+
+
+
+void __kmp_release_32(kmp_flag_32 *flag) {
+    __kmp_release_template(flag);
+}
+
+void __kmp_release_64(kmp_flag_64 *flag) {
+    __kmp_release_template(flag);
+}
+
+void __kmp_release_oncore(kmp_flag_oncore *flag) {
+    __kmp_release_template(flag);
+}
diff --git a/contrib/libs/cxxsupp/openmp/kmp_wait_release.h b/contrib/libs/cxxsupp/openmp/kmp_wait_release.h
index 60a0f45f3d..92db155eb5 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_wait_release.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_wait_release.h
@@ -1,564 +1,564 @@
-/* 
- * kmp_wait_release.h -- Wait/Release implementation 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_WAIT_RELEASE_H 
-#define KMP_WAIT_RELEASE_H 
- 
-#include "kmp.h" 
-#include "kmp_itt.h" 
- 
+/*
+ * kmp_wait_release.h -- Wait/Release implementation
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_WAIT_RELEASE_H
+#define KMP_WAIT_RELEASE_H
+
+#include "kmp.h"
+#include "kmp_itt.h"
+
+/*!
+@defgroup WAIT_RELEASE Wait/Release operations
+
+The definitions and functions here implement the lowest level thread
+synchronizations of suspending a thread and awaking it. They are used
+to build higher level operations such as barriers and fork/join.
+*/
+
+/*!
+@ingroup WAIT_RELEASE
+@{
+*/
+
 /*! 
-@defgroup WAIT_RELEASE Wait/Release operations 
- 
-The definitions and functions here implement the lowest level thread 
-synchronizations of suspending a thread and awaking it. They are used 
-to build higher level operations such as barriers and fork/join. 
-*/ 
- 
-/*! 
-@ingroup WAIT_RELEASE 
-@{ 
-*/ 
- 
-/*!  
- * The flag_type describes the storage used for the flag. 
- */ 
-enum flag_type { 
-    flag32,        /**< 32 bit flags */ 
-    flag64,        /**< 64 bit flags */ 
-    flag_oncore    /**< special 64-bit flag for on-core barrier (hierarchical) */ 
-}; 
- 
-/*! 
- * Base class for wait/release volatile flag 
- */ 
-template <typename P> 
-class kmp_flag { 
-    volatile P * loc;  /**< Pointer to the flag storage that is modified by another thread */ 
-    flag_type t;       /**< "Type" of the flag in loc */ 
- public: 
-    typedef P flag_t; 
-    kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {} 
-    /*! 
-     * @result the pointer to the actual flag 
-     */ 
-    volatile P * get() { return loc; } 
-    /*! 
-     * @param new_loc in   set loc to point at new_loc 
-     */ 
-    void set(volatile P *new_loc) { loc = new_loc; } 
-    /*! 
-     * @result the flag_type 
-     */ 
-    flag_type get_type() { return t; } 
-    // Derived classes must provide the following: 
-    /* 
-    kmp_info_t * get_waiter(kmp_uint32 i); 
-    kmp_uint32 get_num_waiters(); 
-    bool done_check(); 
-    bool done_check_val(P old_loc); 
-    bool notdone_check(); 
-    P internal_release(); 
-    void suspend(int th_gtid); 
-    void resume(int th_gtid); 
-    P set_sleeping(); 
-    P unset_sleeping(); 
-    bool is_sleeping(); 
-    bool is_any_sleeping(); 
-    bool is_sleeping_val(P old_loc); 
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished 
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained); 
-    */ 
-}; 
- 
-/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_* 
-   must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks!  */ 
-template <class C> 
-static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin 
-                                       USE_ITT_BUILD_ARG(void * itt_sync_obj) ) 
-{ 
-    // NOTE: We may not belong to a team at this point. 
-    volatile typename C::flag_t *spin = flag->get(); 
-    kmp_uint32 spins; 
-    kmp_uint32 hibernate; 
-    int th_gtid; 
-    int tasks_completed = FALSE; 
- 
-    KMP_FSYNC_SPIN_INIT(spin, NULL); 
-    if (flag->done_check()) { 
-        KMP_FSYNC_SPIN_ACQUIRED(spin); 
-        return; 
-    } 
-    th_gtid = this_thr->th.th_info.ds.ds_gtid; 
-    KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); 
- 
-#if OMPT_SUPPORT && OMPT_BLAME 
-    ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; 
-    if (ompt_enabled && 
-        ompt_state != ompt_state_undefined) { 
-        if (ompt_state == ompt_state_idle) { 
-            if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { 
-                ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); 
-            } 
-        } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { 
-            KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || 
-                             ompt_state == ompt_state_wait_barrier_implicit || 
-                             ompt_state == ompt_state_wait_barrier_explicit); 
- 
-            ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; 
-            ompt_parallel_id_t pId; 
-            ompt_task_id_t tId; 
-            if (team){ 
-                pId = team->ompt_team_info.parallel_id; 
-                tId = team->ompt_task_info.task_id; 
-            } else { 
-                pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; 
-                tId = this_thr->th.th_current_task->ompt_task_info.task_id; 
-            } 
-            ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); 
-        } 
-    } 
-#endif 
- 
-    // Setup for waiting 
-    KMP_INIT_YIELD(spins); 
- 
-    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 
-        // The worker threads cannot rely on the team struct existing at this point. 
-        // Use the bt values cached in the thread struct instead. 
-#ifdef KMP_ADJUST_BLOCKTIME 
-        if (__kmp_zero_bt && !this_thr->th.th_team_bt_set) 
-            // Force immediate suspend if not set by user and more threads than available procs 
-            hibernate = 0; 
-        else 
-            hibernate = this_thr->th.th_team_bt_intervals; 
-#else 
-        hibernate = this_thr->th.th_team_bt_intervals; 
-#endif /* KMP_ADJUST_BLOCKTIME */ 
- 
-        /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety 
-           of the specified #intervals, plus up to one interval more.  This increment make 
-           certain that this thread doesn't go to sleep too soon.  */ 
-        if (hibernate != 0) 
-            hibernate++; 
- 
-        // Add in the current time value. 
-        hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); 
-        KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", 
-                      th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, 
-                      hibernate - __kmp_global.g.g_time.dt.t_value)); 
-    } 
- 
-    KMP_MB(); 
- 
-    // Main wait spin loop 
-    while (flag->notdone_check()) { 
-        int in_pool; 
- 
-        /* If the task team is NULL, it means one of things: 
-           1) A newly-created thread is first being released by __kmp_fork_barrier(), and 
-              its task team has not been set up yet. 
-           2) All tasks have been executed to completion, this thread has decremented the task 
-              team's ref ct and possibly deallocated it, and should no longer reference it. 
-           3) Tasking is off for this region.  This could be because we are in a serialized region 
-              (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0).  */ 
-        kmp_task_team_t * task_team = NULL; 
-        if (__kmp_tasking_mode != tskm_immediate_exec) { 
-            task_team = this_thr->th.th_task_team; 
-            if (task_team != NULL) { 
-                if (TCR_SYNC_4(task_team->tt.tt_active)) { 
-                    if (KMP_TASKING_ENABLED(task_team)) 
-                        flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed 
-                                            USE_ITT_BUILD_ARG(itt_sync_obj), 0); 
-                } 
-                else { 
-                    KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); 
-                    this_thr->th.th_task_team = NULL; 
-                } 
-            } // if 
-        } // if 
- 
-        KMP_FSYNC_SPIN_PREPARE(spin); 
-        if (TCR_4(__kmp_global.g.g_done)) { 
-            if (__kmp_global.g.g_abort) 
-                __kmp_abort_thread(); 
-            break; 
-        } 
- 
-        // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield 
-        KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); 
-        // TODO: Should it be number of cores instead of thread contexts? Like: 
-        // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); 
-        // Need performance improvement data to make the change... 
-        KMP_YIELD_SPIN(spins); 
- 
-        // Check if this thread was transferred from a team 
-        // to the thread pool (or vice-versa) while spinning. 
-        in_pool = !!TCR_4(this_thr->th.th_in_pool); 
-        if (in_pool != !!this_thr->th.th_active_in_pool) { 
-            if (in_pool) { // Recently transferred from team to pool 
-                KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); 
-                this_thr->th.th_active_in_pool = TRUE; 
-                /* Here, we cannot assert that: 
-                   KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth); 
-                   __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join 
-                   lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously 
-                   by the workers.  The two can get out of sync for brief periods of time.  */ 
-            } 
-            else { // Recently transferred from pool to team 
-                KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth); 
-                KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); 
-                this_thr->th.th_active_in_pool = FALSE; 
-            } 
-        } 
- 
-        // Don't suspend if KMP_BLOCKTIME is set to "infinite" 
-        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) 
-            continue; 
- 
-        // Don't suspend if there is a likelihood of new tasks being spawned. 
-        if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) 
-            continue; 
- 
-        // If we have waited a bit more, fall asleep 
-        if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) 
-            continue; 
- 
-        KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); 
- 
-        flag->suspend(th_gtid); 
- 
-        if (TCR_4(__kmp_global.g.g_done)) { 
-            if (__kmp_global.g.g_abort) 
-                __kmp_abort_thread(); 
-            break; 
-        } 
-        // TODO: If thread is done with work and times out, disband/free 
-    } 
- 
-#if OMPT_SUPPORT && OMPT_BLAME 
-    if (ompt_enabled && 
-        ompt_state != ompt_state_undefined) { 
-        if (ompt_state == ompt_state_idle) { 
-            if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { 
-                ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); 
-            } 
-        } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { 
-            KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || 
-                             ompt_state == ompt_state_wait_barrier_implicit || 
-                             ompt_state == ompt_state_wait_barrier_explicit); 
- 
-            ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; 
-            ompt_parallel_id_t pId; 
-            ompt_task_id_t tId; 
-            if (team){ 
-                pId = team->ompt_team_info.parallel_id; 
-                tId = team->ompt_task_info.task_id; 
-            } else { 
-                pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; 
-                tId = this_thr->th.th_current_task->ompt_task_info.task_id; 
-            } 
-            ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); 
-        } 
-    } 
-#endif 
- 
-    KMP_FSYNC_SPIN_ACQUIRED(spin); 
-} 
- 
-/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread 
-   if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake 
-   up the potentially sleeping thread and prevent deadlocks!  */ 
-template <class C> 
-static inline void __kmp_release_template(C *flag) 
-{ 
-#ifdef KMP_DEBUG 
-    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 
-#endif 
-    KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); 
-    KMP_DEBUG_ASSERT(flag->get()); 
-    KMP_FSYNC_RELEASING(flag->get()); 
-     
-    flag->internal_release(); 
-     
-    KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get()))); 
-     
-    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 
-        // Only need to check sleep stuff if infinite block time not set 
-        if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping? 
-            for (unsigned int i=0; i<flag->get_num_waiters(); ++i) { 
-                kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag 
-                if (waiter) { 
-                    int wait_gtid = waiter->th.th_info.ds.ds_gtid; 
-                    // Wake up thread if needed 
-                    KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n", 
-                                  gtid, wait_gtid, flag->get())); 
-                    flag->resume(wait_gtid); // unsets flag's current_waiter when done 
-                } 
-            } 
-        } 
-    } 
-} 
- 
-template <typename FlagType> 
-struct flag_traits {}; 
- 
-template <> 
-struct flag_traits<kmp_uint32> { 
-    typedef kmp_uint32 flag_t; 
-    static const flag_type t = flag32; 
-    static inline flag_t tcr(flag_t f) { return TCR_4(f); } 
-    static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); } 
-    static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); } 
-    static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); } 
-}; 
- 
-template <> 
-struct flag_traits<kmp_uint64> { 
-    typedef kmp_uint64 flag_t; 
-    static const flag_type t = flag64; 
-    static inline flag_t tcr(flag_t f) { return TCR_8(f); } 
-    static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); } 
-    static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); } 
-    static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); } 
-}; 
- 
-template <typename FlagType> 
-class kmp_basic_flag : public kmp_flag<FlagType> { 
-    typedef flag_traits<FlagType> traits_type; 
-    FlagType checker;  /**< Value to compare flag to to check if flag has been released. */ 
-    kmp_info_t * waiting_threads[1];  /**< Array of threads sleeping on this thread. */ 
-    kmp_uint32 num_waiting_threads;       /**< Number of threads sleeping on this thread. */ 
- public: 
-    kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {} 
-    kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) { 
-        waiting_threads[0] = thr;  
-    } 
-    kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {} 
-    /*! 
-     * param i in   index into waiting_threads 
-     * @result the thread that is waiting at index i 
-     */ 
-    kmp_info_t * get_waiter(kmp_uint32 i) {  
-        KMP_DEBUG_ASSERT(i<num_waiting_threads); 
-        return waiting_threads[i];  
-    } 
-    /*! 
-     * @result num_waiting_threads 
-     */ 
-    kmp_uint32 get_num_waiters() { return num_waiting_threads; } 
-    /*! 
-     * @param thr in   the thread which is now waiting 
-     * 
-     * Insert a waiting thread at index 0. 
-     */ 
-    void set_waiter(kmp_info_t *thr) {  
-        waiting_threads[0] = thr;  
-        num_waiting_threads = 1; 
-    } 
-    /*! 
-     * @result true if the flag object has been released. 
-     */ 
-    bool done_check() { return traits_type::tcr(*(this->get())) == checker; } 
-    /*! 
-     * @param old_loc in   old value of flag 
-     * @result true if the flag's old value indicates it was released. 
-     */ 
-    bool done_check_val(FlagType old_loc) { return old_loc == checker; } 
-    /*! 
-     * @result true if the flag object is not yet released. 
-     * Used in __kmp_wait_template like: 
-     * @code 
-     * while (flag.notdone_check()) { pause(); } 
-     * @endcode 
-     */ 
-    bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } 
-    /*! 
-     * @result Actual flag value before release was applied. 
-     * Trigger all waiting threads to run by modifying flag to release state. 
-     */ 
-    void internal_release() { 
-        (void) traits_type::test_then_add4((volatile FlagType *)this->get()); 
-    } 
-    /*! 
-     * @result Actual flag value before sleep bit(s) set. 
-     * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s). 
-     */ 
-    FlagType set_sleeping() {  
-        return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE); 
-    } 
-    /*! 
-     * @result Actual flag value before sleep bit(s) cleared. 
-     * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s). 
-     */ 
-    FlagType unset_sleeping() {  
-        return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE); 
-    } 
-    /*!  
-     * @param old_loc in   old value of flag 
-     * Test whether there are threads sleeping on the flag's old value in old_loc. 
-     */ 
-    bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } 
-    /*!  
-     * Test whether there are threads sleeping on the flag. 
-     */ 
-    bool is_sleeping() { return is_sleeping_val(*(this->get())); } 
-    bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } 
-    kmp_uint8 *get_stolen() { return NULL; } 
-    enum barrier_type get_bt() { return bs_last_barrier; } 
-}; 
- 
-class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> { 
- public: 
-    kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {} 
-    kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {} 
-    kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {} 
-    void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } 
-    void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } 
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished 
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { 
-        return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished 
-                                      USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 
-    } 
-    void wait(kmp_info_t *this_thr, int final_spin 
-              USE_ITT_BUILD_ARG(void * itt_sync_obj)) { 
-        __kmp_wait_template(this_thr, this, final_spin 
-                            USE_ITT_BUILD_ARG(itt_sync_obj)); 
-    } 
-    void release() { __kmp_release_template(this); } 
-    flag_type get_ptr_type() { return flag32; } 
-}; 
- 
-class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> { 
- public: 
-    kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {} 
-    kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {} 
-    kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {} 
-    void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } 
-    void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } 
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished 
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { 
-        return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished 
-                                      USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 
-    } 
-    void wait(kmp_info_t *this_thr, int final_spin 
-              USE_ITT_BUILD_ARG(void * itt_sync_obj)) { 
-        __kmp_wait_template(this_thr, this, final_spin 
-                            USE_ITT_BUILD_ARG(itt_sync_obj)); 
-    } 
-    void release() { __kmp_release_template(this); } 
-    flag_type get_ptr_type() { return flag64; } 
-}; 
- 
-// Hierarchical 64-bit on-core barrier instantiation 
-class kmp_flag_oncore : public kmp_flag<kmp_uint64> { 
-    kmp_uint64 checker; 
-    kmp_info_t * waiting_threads[1]; 
-    kmp_uint32 num_waiting_threads; 
-    kmp_uint32 offset;      /**< Portion of flag that is of interest for an operation. */ 
-    bool flag_switch;       /**< Indicates a switch in flag location. */ 
-    enum barrier_type bt;   /**< Barrier type. */ 
-    kmp_info_t * this_thr;  /**< Thread that may be redirected to different flag location. */ 
-#if USE_ITT_BUILD 
-    void *itt_sync_obj;     /**< ITT object that must be passed to new flag location. */ 
-#endif 
-    unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; } 
-public: 
-    kmp_flag_oncore(volatile kmp_uint64 *p) 
-        : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {} 
-    kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) 
-        : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {} 
-    kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, 
-                    kmp_info_t * thr 
-#if USE_ITT_BUILD 
-                    , void *itt 
-#endif 
-                    ) 
-        : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx), 
-          flag_switch(false), bt(bar_t), this_thr(thr) 
-#if USE_ITT_BUILD 
-        , itt_sync_obj(itt) 
-#endif 
-        {} 
+ * The flag_type describes the storage used for the flag.
+ */
+enum flag_type {
+    flag32,        /**< 32 bit flags */
+    flag64,        /**< 64 bit flags */
+    flag_oncore    /**< special 64-bit flag for on-core barrier (hierarchical) */
+};
+
+/*!
+ * Base class for wait/release volatile flag
+ */
+template <typename P>
+class kmp_flag {
+    volatile P * loc;  /**< Pointer to the flag storage that is modified by another thread */
+    flag_type t;       /**< "Type" of the flag in loc */
+ public:
+    typedef P flag_t;
+    kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
+    /*!
+     * @result the pointer to the actual flag
+     */
+    volatile P * get() { return loc; }
+    /*!
+     * @param new_loc in   set loc to point at new_loc
+     */
+    void set(volatile P *new_loc) { loc = new_loc; }
+    /*!
+     * @result the flag_type
+     */
+    flag_type get_type() { return t; }
+    // Derived classes must provide the following:
+    /*
+    kmp_info_t * get_waiter(kmp_uint32 i);
+    kmp_uint32 get_num_waiters();
+    bool done_check();
+    bool done_check_val(P old_loc);
+    bool notdone_check();
+    P internal_release();
+    void suspend(int th_gtid);
+    void resume(int th_gtid);
+    P set_sleeping();
+    P unset_sleeping();
+    bool is_sleeping();
+    bool is_any_sleeping();
+    bool is_sleeping_val(P old_loc);
+    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
+                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained);
+    */
+};
+
+/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
+   must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks!  */
+template <class C>
+static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
+                                       USE_ITT_BUILD_ARG(void * itt_sync_obj) )
+{
+    // NOTE: We may not belong to a team at this point.
+    volatile typename C::flag_t *spin = flag->get();
+    kmp_uint32 spins;
+    kmp_uint32 hibernate;
+    int th_gtid;
+    int tasks_completed = FALSE;
+
+    KMP_FSYNC_SPIN_INIT(spin, NULL);
+    if (flag->done_check()) {
+        KMP_FSYNC_SPIN_ACQUIRED(spin);
+        return;
+    }
+    th_gtid = this_thr->th.th_info.ds.ds_gtid;
+    KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
+
+#if OMPT_SUPPORT && OMPT_BLAME
+    ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
+    if (ompt_enabled &&
+        ompt_state != ompt_state_undefined) {
+        if (ompt_state == ompt_state_idle) {
+            if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
+                ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
+            }
+        } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
+            KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
+                             ompt_state == ompt_state_wait_barrier_implicit ||
+                             ompt_state == ompt_state_wait_barrier_explicit);
+
+            ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
+            ompt_parallel_id_t pId;
+            ompt_task_id_t tId;
+            if (team){
+                pId = team->ompt_team_info.parallel_id;
+                tId = team->ompt_task_info.task_id;
+            } else {
+                pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
+                tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+            }
+            ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
+        }
+    }
+#endif
+
+    // Setup for waiting
+    KMP_INIT_YIELD(spins);
+
+    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+        // The worker threads cannot rely on the team struct existing at this point.
+        // Use the bt values cached in the thread struct instead.
+#ifdef KMP_ADJUST_BLOCKTIME
+        if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
+            // Force immediate suspend if not set by user and more threads than available procs
+            hibernate = 0;
+        else
+            hibernate = this_thr->th.th_team_bt_intervals;
+#else
+        hibernate = this_thr->th.th_team_bt_intervals;
+#endif /* KMP_ADJUST_BLOCKTIME */
+
+        /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
+           of the specified #intervals, plus up to one interval more.  This increment make
+           certain that this thread doesn't go to sleep too soon.  */
+        if (hibernate != 0)
+            hibernate++;
+
+        // Add in the current time value.
+        hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
+        KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
+                      th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
+                      hibernate - __kmp_global.g.g_time.dt.t_value));
+    }
+
+    KMP_MB();
+
+    // Main wait spin loop
+    while (flag->notdone_check()) {
+        int in_pool;
+
+        /* If the task team is NULL, it means one of things:
+           1) A newly-created thread is first being released by __kmp_fork_barrier(), and
+              its task team has not been set up yet.
+           2) All tasks have been executed to completion, this thread has decremented the task
+              team's ref ct and possibly deallocated it, and should no longer reference it.
+           3) Tasking is off for this region.  This could be because we are in a serialized region
+              (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0).  */
+        kmp_task_team_t * task_team = NULL;
+        if (__kmp_tasking_mode != tskm_immediate_exec) {
+            task_team = this_thr->th.th_task_team;
+            if (task_team != NULL) {
+                if (TCR_SYNC_4(task_team->tt.tt_active)) {
+                    if (KMP_TASKING_ENABLED(task_team))
+                        flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
+                                            USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+                }
+                else {
+                    KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
+                    this_thr->th.th_task_team = NULL;
+                }
+            } // if
+        } // if
+
+        KMP_FSYNC_SPIN_PREPARE(spin);
+        if (TCR_4(__kmp_global.g.g_done)) {
+            if (__kmp_global.g.g_abort)
+                __kmp_abort_thread();
+            break;
+        }
+
+        // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
+        KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
+        // TODO: Should it be number of cores instead of thread contexts? Like:
+        // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
+        // Need performance improvement data to make the change...
+        KMP_YIELD_SPIN(spins);
+
+        // Check if this thread was transferred from a team
+        // to the thread pool (or vice-versa) while spinning.
+        in_pool = !!TCR_4(this_thr->th.th_in_pool);
+        if (in_pool != !!this_thr->th.th_active_in_pool) {
+            if (in_pool) { // Recently transferred from team to pool
+                KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
+                this_thr->th.th_active_in_pool = TRUE;
+                /* Here, we cannot assert that:
+                   KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
+                   __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
+                   lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
+                   by the workers.  The two can get out of sync for brief periods of time.  */
+            }
+            else { // Recently transferred from pool to team
+                KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
+                KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
+                this_thr->th.th_active_in_pool = FALSE;
+            }
+        }
+
+        // Don't suspend if KMP_BLOCKTIME is set to "infinite"
+        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+            continue;
+
+        // Don't suspend if there is a likelihood of new tasks being spawned.
+        if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
+            continue;
+
+        // If we have waited a bit more, fall asleep
+        if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
+            continue;
+
+        KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
+
+        flag->suspend(th_gtid);
+
+        if (TCR_4(__kmp_global.g.g_done)) {
+            if (__kmp_global.g.g_abort)
+                __kmp_abort_thread();
+            break;
+        }
+        // TODO: If thread is done with work and times out, disband/free
+    }
+
+#if OMPT_SUPPORT && OMPT_BLAME
+    if (ompt_enabled &&
+        ompt_state != ompt_state_undefined) {
+        if (ompt_state == ompt_state_idle) {
+            if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
+                ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
+            }
+        } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
+            KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
+                             ompt_state == ompt_state_wait_barrier_implicit ||
+                             ompt_state == ompt_state_wait_barrier_explicit);
+
+            ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
+            ompt_parallel_id_t pId;
+            ompt_task_id_t tId;
+            if (team){
+                pId = team->ompt_team_info.parallel_id;
+                tId = team->ompt_task_info.task_id;
+            } else {
+                pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
+                tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+            }
+            ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
+        }
+    }
+#endif
+
+    KMP_FSYNC_SPIN_ACQUIRED(spin);
+}
+
+/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
+   if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
+   up the potentially sleeping thread and prevent deadlocks!  */
+template <class C>
+static inline void __kmp_release_template(C *flag)
+{
+#ifdef KMP_DEBUG
+    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
+#endif
+    KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
+    KMP_DEBUG_ASSERT(flag->get());
+    KMP_FSYNC_RELEASING(flag->get());
+    
+    flag->internal_release();
+    
+    KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get())));
+    
+    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+        // Only need to check sleep stuff if infinite block time not set
+        if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping?
+            for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
+                kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag
+                if (waiter) {
+                    int wait_gtid = waiter->th.th_info.ds.ds_gtid;
+                    // Wake up thread if needed
+                    KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n",
+                                  gtid, wait_gtid, flag->get()));
+                    flag->resume(wait_gtid); // unsets flag's current_waiter when done
+                }
+            }
+        }
+    }
+}
+
+template <typename FlagType>
+struct flag_traits {};
+
+template <>
+struct flag_traits<kmp_uint32> {
+    typedef kmp_uint32 flag_t;
+    static const flag_type t = flag32;
+    static inline flag_t tcr(flag_t f) { return TCR_4(f); }
+    static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
+    static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
+    static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
+};
+
+template <>
+struct flag_traits<kmp_uint64> {
+    typedef kmp_uint64 flag_t;
+    static const flag_type t = flag64;
+    static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+    static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
+    static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
+    static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
+};
+
+template <typename FlagType>
+class kmp_basic_flag : public kmp_flag<FlagType> {
+    typedef flag_traits<FlagType> traits_type;
+    FlagType checker;  /**< Value to compare flag to to check if flag has been released. */
+    kmp_info_t * waiting_threads[1];  /**< Array of threads sleeping on this thread. */
+    kmp_uint32 num_waiting_threads;       /**< Number of threads sleeping on this thread. */
+ public:
+    kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
+    kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
+        waiting_threads[0] = thr; 
+    }
+    kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
+    /*!
+     * param i in   index into waiting_threads
+     * @result the thread that is waiting at index i
+     */
     kmp_info_t * get_waiter(kmp_uint32 i) { 
-        KMP_DEBUG_ASSERT(i<num_waiting_threads); 
+        KMP_DEBUG_ASSERT(i<num_waiting_threads);
         return waiting_threads[i]; 
-    } 
-    kmp_uint32 get_num_waiters() { return num_waiting_threads; } 
+    }
+    /*!
+     * @result num_waiting_threads
+     */
+    kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+    /*!
+     * @param thr in   the thread which is now waiting
+     *
+     * Insert a waiting thread at index 0.
+     */
     void set_waiter(kmp_info_t *thr) { 
         waiting_threads[0] = thr; 
-        num_waiting_threads = 1; 
-    } 
-    bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; } 
-    bool done_check() { return done_check_val(*get()); } 
-    bool notdone_check() { 
-        // Calculate flag_switch 
-        if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) 
-            flag_switch = true; 
-        if (byteref(get(),offset) != 1 && !flag_switch) 
-            return true; 
-        else if (flag_switch) { 
-            this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; 
-            kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP); 
-            __kmp_wait_64(this_thr, &flag, TRUE 
-#if USE_ITT_BUILD 
-                          , itt_sync_obj 
-#endif 
-                          ); 
-        } 
-        return false; 
-    } 
-    void internal_release() { 
-        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 
-            byteref(get(),offset) = 1; 
-        } 
-        else { 
-            kmp_uint64 mask=0; 
-            byteref(&mask,offset) = 1; 
-            (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask); 
-        } 
-    } 
-    kmp_uint64 set_sleeping() { 
-        return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE); 
-    } 
-    kmp_uint64 unset_sleeping() { 
-        return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE); 
-    } 
-    bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } 
-    bool is_sleeping() { return is_sleeping_val(*get()); } 
-    bool is_any_sleeping() { return is_sleeping_val(*get()); } 
-    void wait(kmp_info_t *this_thr, int final_spin) { 
-        __kmp_wait_template<kmp_flag_oncore>(this_thr, this, final_spin 
-                            USE_ITT_BUILD_ARG(itt_sync_obj)); 
-    } 
-    void release() { __kmp_release_template(this); } 
-    void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } 
-    void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } 
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished 
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { 
-        return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished 
-                                          USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 
-    } 
-    kmp_uint8 *get_stolen() { return NULL; } 
-    enum barrier_type get_bt() { return bt; } 
-    flag_type get_ptr_type() { return flag_oncore; } 
-}; 
- 
- 
-/*! 
-@} 
-*/ 
- 
-#endif // KMP_WAIT_RELEASE_H 
+        num_waiting_threads = 1;
+    }
+    /*!
+     * @result true if the flag object has been released.
+     */
+    bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
+    /*!
+     * @param old_loc in   old value of flag
+     * @result true if the flag's old value indicates it was released.
+     */
+    bool done_check_val(FlagType old_loc) { return old_loc == checker; }
+    /*!
+     * @result true if the flag object is not yet released.
+     * Used in __kmp_wait_template like:
+     * @code
+     * while (flag.notdone_check()) { pause(); }
+     * @endcode
+     */
+    bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
+    /*!
+     * @result Actual flag value before release was applied.
+     * Trigger all waiting threads to run by modifying flag to release state.
+     */
+    void internal_release() {
+        (void) traits_type::test_then_add4((volatile FlagType *)this->get());
+    }
+    /*!
+     * @result Actual flag value before sleep bit(s) set.
+     * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s).
+     */
+    FlagType set_sleeping() { 
+        return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
+    }
+    /*!
+     * @result Actual flag value before sleep bit(s) cleared.
+     * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s).
+     */
+    FlagType unset_sleeping() { 
+        return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
+    }
+    /*! 
+     * @param old_loc in   old value of flag
+     * Test whether there are threads sleeping on the flag's old value in old_loc.
+     */
+    bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
+    /*! 
+     * Test whether there are threads sleeping on the flag.
+     */
+    bool is_sleeping() { return is_sleeping_val(*(this->get())); }
+    bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
+    kmp_uint8 *get_stolen() { return NULL; }
+    enum barrier_type get_bt() { return bs_last_barrier; }
+};
+
+class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
+ public:
+    kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
+    kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
+    kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
+    void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
+    void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
+    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
+                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
+        return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
+                                      USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+    }
+    void wait(kmp_info_t *this_thr, int final_spin
+              USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
+        __kmp_wait_template(this_thr, this, final_spin
+                            USE_ITT_BUILD_ARG(itt_sync_obj));
+    }
+    void release() { __kmp_release_template(this); }
+    flag_type get_ptr_type() { return flag32; }
+};
+
+class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
+ public:
+    kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
+    kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
+    kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
+    void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
+    void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
+    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
+                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
+        return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
+                                      USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+    }
+    void wait(kmp_info_t *this_thr, int final_spin
+              USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
+        __kmp_wait_template(this_thr, this, final_spin
+                            USE_ITT_BUILD_ARG(itt_sync_obj));
+    }
+    void release() { __kmp_release_template(this); }
+    flag_type get_ptr_type() { return flag64; }
+};
+
+// Hierarchical 64-bit on-core barrier instantiation
+class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
+    kmp_uint64 checker;
+    kmp_info_t * waiting_threads[1];
+    kmp_uint32 num_waiting_threads;
+    kmp_uint32 offset;      /**< Portion of flag that is of interest for an operation. */
+    bool flag_switch;       /**< Indicates a switch in flag location. */
+    enum barrier_type bt;   /**< Barrier type. */
+    kmp_info_t * this_thr;  /**< Thread that may be redirected to different flag location. */
+#if USE_ITT_BUILD
+    void *itt_sync_obj;     /**< ITT object that must be passed to new flag location. */
+#endif
+    unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; }
+public:
+    kmp_flag_oncore(volatile kmp_uint64 *p)
+        : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
+    kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
+        : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
+    kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
+                    kmp_info_t * thr
+#if USE_ITT_BUILD
+                    , void *itt
+#endif
+                    )
+        : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx),
+          flag_switch(false), bt(bar_t), this_thr(thr)
+#if USE_ITT_BUILD
+        , itt_sync_obj(itt)
+#endif
+        {}
+    kmp_info_t * get_waiter(kmp_uint32 i) {
+        KMP_DEBUG_ASSERT(i<num_waiting_threads);
+        return waiting_threads[i];
+    }
+    kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+    void set_waiter(kmp_info_t *thr) {
+        waiting_threads[0] = thr;
+        num_waiting_threads = 1;
+    }
+    bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
+    bool done_check() { return done_check_val(*get()); }
+    bool notdone_check() {
+        // Calculate flag_switch
+        if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
+            flag_switch = true;
+        if (byteref(get(),offset) != 1 && !flag_switch)
+            return true;
+        else if (flag_switch) {
+            this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
+            kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
+            __kmp_wait_64(this_thr, &flag, TRUE
+#if USE_ITT_BUILD
+                          , itt_sync_obj
+#endif
+                          );
+        }
+        return false;
+    }
+    void internal_release() {
+        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
+            byteref(get(),offset) = 1;
+        }
+        else {
+            kmp_uint64 mask=0;
+            byteref(&mask,offset) = 1;
+            (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
+        }
+    }
+    kmp_uint64 set_sleeping() {
+        return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
+    }
+    kmp_uint64 unset_sleeping() {
+        return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
+    }
+    bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
+    bool is_sleeping() { return is_sleeping_val(*get()); }
+    bool is_any_sleeping() { return is_sleeping_val(*get()); }
+    void wait(kmp_info_t *this_thr, int final_spin) {
+        __kmp_wait_template<kmp_flag_oncore>(this_thr, this, final_spin
+                            USE_ITT_BUILD_ARG(itt_sync_obj));
+    }
+    void release() { __kmp_release_template(this); }
+    void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
+    void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
+    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
+                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
+        return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
+                                          USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+    }
+    kmp_uint8 *get_stolen() { return NULL; }
+    enum barrier_type get_bt() { return bt; }
+    flag_type get_ptr_type() { return flag_oncore; }
+};
+
+
+/*!
+@}
+*/
+
+#endif // KMP_WAIT_RELEASE_H
diff --git a/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h b/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h
index 87c6f37f16..61a046c37d 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_wrapper_getpid.h
@@ -1,56 +1,56 @@
-/* 
- * kmp_wrapper_getpid.h -- getpid() declaration. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_WRAPPER_GETPID_H 
-#define KMP_WRAPPER_GETPID_H 
- 
-#if KMP_OS_UNIX 
- 
-    // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers. 
-    #include <sys/types.h> 
-    #include <unistd.h> 
- 
-#elif KMP_OS_WINDOWS 
- 
-    // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h". 
-    #include <process.h> 
-    // Let us simulate Unix. 
-    typedef int pid_t; 
-    #define getpid _getpid 
- 
-#else 
- 
-    #error Unknown or unsupported OS. 
- 
-#endif 
- 
-/* 
-    TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good. 
-    But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t 
-    be longer that int?). It seems all pid prints should be rewritten as 
- 
-        printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid ); 
- 
-    or (at least) as 
- 
-        printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid ); 
- 
-    (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".) 
- 
-*/ 
- 
-#endif // KMP_WRAPPER_GETPID_H 
- 
-// end of file // 
+/*
+ * kmp_wrapper_getpid.h -- getpid() declaration.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_WRAPPER_GETPID_H
+#define KMP_WRAPPER_GETPID_H
+
+#if KMP_OS_UNIX
+
+    // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers.
+    #include <sys/types.h>
+    #include <unistd.h>
+
+#elif KMP_OS_WINDOWS
+
+    // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h".
+    #include <process.h>
+    // Let us simulate Unix.
+    typedef int pid_t;
+    #define getpid _getpid
+
+#else
+
+    #error Unknown or unsupported OS.
+
+#endif
+
+/*
+    TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good.
+    But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t
+    be longer that int?). It seems all pid prints should be rewritten as
+
+        printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid );
+
+    or (at least) as
+
+        printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid );
+
+    (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".)
+
+*/
+
+#endif // KMP_WRAPPER_GETPID_H
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h b/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h
index ce9ae3f40a..453d1ef5e7 100644
--- a/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h
+++ b/contrib/libs/cxxsupp/openmp/kmp_wrapper_malloc.h
@@ -1,205 +1,205 @@
-/* 
- * kmp_wrapper_malloc.h -- Wrappers for memory allocation routines 
- *                         (malloc(), free(), and others). 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef KMP_WRAPPER_MALLOC_H 
-#define KMP_WRAPPER_MALLOC_H 
- 
-/* 
-    This header serves for 3 purposes: 
- 
-        1. Declaring standard memory allocation rourines in OS-independent way. 
-        2. Passing source location info through memory allocation wrappers. 
-        3. Enabling native memory debugging capabilities. 
- 
- 
-    1. Declaring standard memory allocation rourines in OS-independent way. 
-    ----------------------------------------------------------------------- 
- 
-    On Linux* OS, alloca() function is declared in <alloca.h> header, while on Windows* OS there is no 
-    <alloca.h> header, function _alloca() (note underscore!) is declared in <malloc.h>. This header 
-    eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on 
-    following routines: 
- 
-        malloc 
-        calloc 
-        realloc 
-        free 
-        alloca 
- 
-    in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently 
-    it is available only on Windows* OS.) 
- 
- 
-    2. Passing source location info through memory allocation wrappers. 
-    ------------------------------------------------------------------- 
- 
-    Some tools may help debugging memory errors, for example, report memory leaks. However, memory 
-    allocation wrappers may hinder source location. 
- 
-    For example: 
- 
-        void * aligned_malloc( int size ) { 
-            void * ptr = malloc( size ); // All the memory leaks will be reported at this line. 
-            // some adjustments... 
-            return ptr; 
-        }; 
- 
-        ptr = aligned_malloc( size );    // Memory leak will *not* be detected here. :-( 
- 
-    To overcome the problem, information about original source location should be passed through all 
-    the memory allocation wrappers, for example: 
- 
-        void * aligned_malloc( int size, char const * file, int line ) { 
-            void * ptr = _malloc_dbg( size, file, line ); 
-            // some adjustments... 
-            return ptr; 
-        }; 
- 
-        void * ptr = aligned_malloc( size, __FILE__, __LINE__ ); 
- 
-    This is a good idea for debug, but passing additional arguments impacts performance. Disabling 
-    extra arguments in release version of the software introduces too many conditional compilation, 
-    which makes code unreadable. This header defines few macros and functions facilitating it: 
- 
-        void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { 
-            void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); 
-            // some adjustments... 
-            return ptr; 
-        }; 
-        #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) 
-            // Use macro instead of direct call to function. 
- 
-        void * ptr = aligned_malloc( size );  // Bingo! Memory leak will be reported at this line. 
- 
- 
-    3. Enabling native memory debugging capabilities. 
-    ------------------------------------------------- 
- 
-    Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft 
-    RTL tracks all memory allocations and can report memory leaks. This header enables this, and 
-    makes report more useful (see "Passing source location info through memory allocation 
-    wrappers"). 
- 
-*/ 
- 
-#include <stdlib.h> 
- 
-#include "kmp_os.h" 
- 
-// Include alloca() declaration. 
-#if KMP_OS_WINDOWS 
-    #include <malloc.h>        // Windows* OS: _alloca() declared in "malloc.h". 
-    #define alloca _alloca     // Allow to use alloca() with no underscore. 
-#elif KMP_OS_FREEBSD || KMP_OS_NETBSD 
-    // Declared in "stdlib.h". 
-#elif KMP_OS_UNIX 
-    #include <alloca.h>        // Linux* OS and OS X*: alloc() declared in "alloca". 
-#else 
-    #error Unknown or unsupported OS. 
-#endif 
- 
-/* 
-    KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration. 
-    KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying 
-        levels. 
-    KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at 
-        top-level. 
- 
-    Typical usage: 
- 
-        void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { 
-            // Note: Comma is missed before KMP_SRC_LOC_DECL. 
-            KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) ); 
-            ... 
-        } 
-        #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) 
-            // Use macro instead of direct call to function -- macro passes info about current 
-            // source location to the func. 
-*/ 
-#if KMP_DEBUG 
-    #define KMP_SRC_LOC_DECL    , char const * _file_, int _line_ 
-    #define KMP_SRC_LOC_PARM    , _file_, _line_ 
-    #define KMP_SRC_LOC_CURR    , __FILE__, __LINE__ 
-#else 
-    #define KMP_SRC_LOC_DECL 
-    #define KMP_SRC_LOC_PARM 
-    #define KMP_SRC_LOC_CURR 
-#endif // KMP_DEBUG 
- 
-/* 
-    malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra 
-    arguments (source location info) in debug mode. They should be used in place of malloc() and 
-    free(), this allows enabling native memory debugging capabilities (if any). 
- 
-    Typical usage: 
- 
-        ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); 
-            // Inside memory allocation wrapper, or 
-        ptr = malloc_src_loc( size KMP_SRC_LOC_CURR ); 
-            // Outside of memory allocation wrapper. 
- 
- 
-*/ 
-#define malloc_src_loc( args )    _malloc_src_loc( args ) 
-#define free_src_loc(   args )    _free_src_loc(   args ) 
-    /* 
-        Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3 
-        parameters, but calls to malloc_src_loc() are always the same: 
- 
-            ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR 
- 
-        Compiler issues warning/error "too few arguments in macro invocation". Declaring two 
-        macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem. 
-    */ 
- 
-#if KMP_DEBUG 
- 
-    #if KMP_OS_WINDOWS && _DEBUG 
-        // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined. 
- 
-        // Windows* OS has native memory debugging capabilities. Enable them. 
- 
-        #include <crtdbg.h> 
- 
-        #define KMP_MEM_BLOCK           _CLIENT_BLOCK 
-        #define malloc( size )          _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) 
-        #define calloc( num, size )     _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) 
-        #define realloc( ptr, size )    _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) 
-        #define free( ptr )             _free_dbg( (ptr), KMP_MEM_BLOCK ) 
- 
-        #define _malloc_src_loc( size, file, line )  _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) ) 
-        #define _free_src_loc(    ptr, file, line )  _free_dbg(   (ptr),  KMP_MEM_BLOCK                 ) 
- 
-    #else 
- 
-        // Linux* OS, OS X*, or non-debug Windows* OS. 
- 
-        #define _malloc_src_loc( size, file, line )    malloc( (size) ) 
-        #define _free_src_loc( ptr, file, line )       free( (ptr) ) 
- 
-    #endif 
- 
-#else 
- 
-    // In release build malloc_src_loc() and free_src_loc() do not have extra parameters. 
-    #define _malloc_src_loc( size )    malloc( (size) ) 
-    #define _free_src_loc( ptr )       free( (ptr) ) 
- 
-#endif // KMP_DEBUG 
- 
-#endif // KMP_WRAPPER_MALLOC_H 
- 
-// end of file // 
+/*
+ * kmp_wrapper_malloc.h -- Wrappers for memory allocation routines
+ *                         (malloc(), free(), and others).
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef KMP_WRAPPER_MALLOC_H
+#define KMP_WRAPPER_MALLOC_H
+
+/*
+    This header serves for 3 purposes:
+
+        1. Declaring standard memory allocation rourines in OS-independent way.
+        2. Passing source location info through memory allocation wrappers.
+        3. Enabling native memory debugging capabilities.
+
+
+    1. Declaring standard memory allocation rourines in OS-independent way.
+    -----------------------------------------------------------------------
+
+    On Linux* OS, alloca() function is declared in <alloca.h> header, while on Windows* OS there is no
+    <alloca.h> header, function _alloca() (note underscore!) is declared in <malloc.h>. This header
+    eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on
+    following routines:
+
+        malloc
+        calloc
+        realloc
+        free
+        alloca
+
+    in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently
+    it is available only on Windows* OS.)
+
+
+    2. Passing source location info through memory allocation wrappers.
+    -------------------------------------------------------------------
+
+    Some tools may help debugging memory errors, for example, report memory leaks. However, memory
+    allocation wrappers may hinder source location.
+
+    For example:
+
+        void * aligned_malloc( int size ) {
+            void * ptr = malloc( size ); // All the memory leaks will be reported at this line.
+            // some adjustments...
+            return ptr;
+        };
+
+        ptr = aligned_malloc( size );    // Memory leak will *not* be detected here. :-(
+
+    To overcome the problem, information about original source location should be passed through all
+    the memory allocation wrappers, for example:
+
+        void * aligned_malloc( int size, char const * file, int line ) {
+            void * ptr = _malloc_dbg( size, file, line );
+            // some adjustments...
+            return ptr;
+        };
+
+        void * ptr = aligned_malloc( size, __FILE__, __LINE__ );
+
+    This is a good idea for debug, but passing additional arguments impacts performance. Disabling
+    extra arguments in release version of the software introduces too many conditional compilation,
+    which makes code unreadable. This header defines few macros and functions facilitating it:
+
+        void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
+            void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
+            // some adjustments...
+            return ptr;
+        };
+        #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
+            // Use macro instead of direct call to function.
+
+        void * ptr = aligned_malloc( size );  // Bingo! Memory leak will be reported at this line.
+
+
+    3. Enabling native memory debugging capabilities.
+    -------------------------------------------------
+
+    Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft
+    RTL tracks all memory allocations and can report memory leaks. This header enables this, and
+    makes report more useful (see "Passing source location info through memory allocation
+    wrappers").
+
+*/
+
+#include <stdlib.h>
+
+#include "kmp_os.h"
+
+// Include alloca() declaration.
+#if KMP_OS_WINDOWS
+    #include <malloc.h>        // Windows* OS: _alloca() declared in "malloc.h".
+    #define alloca _alloca     // Allow to use alloca() with no underscore.
+#elif KMP_OS_FREEBSD || KMP_OS_NETBSD
+    // Declared in "stdlib.h".
+#elif KMP_OS_UNIX
+    #include <alloca.h>        // Linux* OS and OS X*: alloc() declared in "alloca".
+#else
+    #error Unknown or unsupported OS.
+#endif
+
+/*
+    KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration.
+    KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying
+        levels.
+    KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at
+        top-level.
+
+    Typical usage:
+
+        void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
+            // Note: Comma is missed before KMP_SRC_LOC_DECL.
+            KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) );
+            ...
+        }
+        #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
+            // Use macro instead of direct call to function -- macro passes info about current
+            // source location to the func.
+*/
+#if KMP_DEBUG
+    #define KMP_SRC_LOC_DECL    , char const * _file_, int _line_
+    #define KMP_SRC_LOC_PARM    , _file_, _line_
+    #define KMP_SRC_LOC_CURR    , __FILE__, __LINE__
+#else
+    #define KMP_SRC_LOC_DECL
+    #define KMP_SRC_LOC_PARM
+    #define KMP_SRC_LOC_CURR
+#endif // KMP_DEBUG
+
+/*
+    malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra
+    arguments (source location info) in debug mode. They should be used in place of malloc() and
+    free(), this allows enabling native memory debugging capabilities (if any).
+
+    Typical usage:
+
+        ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
+            // Inside memory allocation wrapper, or
+        ptr = malloc_src_loc( size KMP_SRC_LOC_CURR );
+            // Outside of memory allocation wrapper.
+
+
+*/
+#define malloc_src_loc( args )    _malloc_src_loc( args )
+#define free_src_loc(   args )    _free_src_loc(   args )
+    /*
+        Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3
+        parameters, but calls to malloc_src_loc() are always the same:
+
+            ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR
+
+        Compiler issues warning/error "too few arguments in macro invocation". Declaring two
+        macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem.
+    */
+
+#if KMP_DEBUG
+
+    #if KMP_OS_WINDOWS && _DEBUG
+        // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined.
+
+        // Windows* OS has native memory debugging capabilities. Enable them.
+
+        #include <crtdbg.h>
+
+        #define KMP_MEM_BLOCK           _CLIENT_BLOCK
+        #define malloc( size )          _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
+        #define calloc( num, size )     _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
+        #define realloc( ptr, size )    _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
+        #define free( ptr )             _free_dbg( (ptr), KMP_MEM_BLOCK )
+
+        #define _malloc_src_loc( size, file, line )  _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) )
+        #define _free_src_loc(    ptr, file, line )  _free_dbg(   (ptr),  KMP_MEM_BLOCK                 )
+
+    #else
+
+        // Linux* OS, OS X*, or non-debug Windows* OS.
+
+        #define _malloc_src_loc( size, file, line )    malloc( (size) )
+        #define _free_src_loc( ptr, file, line )       free( (ptr) )
+
+    #endif
+
+#else
+
+    // In release build malloc_src_loc() and free_src_loc() do not have extra parameters.
+    #define _malloc_src_loc( size )    malloc( (size) )
+    #define _free_src_loc( ptr )       free( (ptr) )
+
+#endif // KMP_DEBUG
+
+#endif // KMP_WRAPPER_MALLOC_H
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/libomp.rc.var b/contrib/libs/cxxsupp/openmp/libomp.rc.var
index fcc64c9f2f..cf6a9c9efa 100644
--- a/contrib/libs/cxxsupp/openmp/libomp.rc.var
+++ b/contrib/libs/cxxsupp/openmp/libomp.rc.var
@@ -1,70 +1,70 @@
-// libomp.rc.var 
- 
-// 
-////===----------------------------------------------------------------------===// 
-//// 
-////                     The LLVM Compiler Infrastructure 
-//// 
-//// This file is dual licensed under the MIT and the University of Illinois Open 
-//// Source Licenses. See LICENSE.txt for details. 
-//// 
-////===----------------------------------------------------------------------===// 
-// 
- 
-#include "winres.h" 
-#include "kmp_config.h" 
- 
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US    // English (U.S.) resources 
-#pragma code_page(1252) 
- 
-VS_VERSION_INFO VERSIONINFO 
-    // Parts of FILEVERSION and PRODUCTVERSION are 16-bit fields, entire build date yyyymmdd 
-    // does not fit into one version part, so we need to split it into yyyy and mmdd: 
-    FILEVERSION    @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ 
-    PRODUCTVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@ 
-    FILEFLAGSMASK  VS_FFI_FILEFLAGSMASK 
-    FILEFLAGS      0 
-#if KMP_DEBUG 
-        | VS_FF_DEBUG 
-#endif 
-#if @LIBOMP_VERSION_BUILD@ == 0 
-        | VS_FF_PRIVATEBUILD | VS_FF_PRERELEASE 
-#endif 
-    FILEOS          VOS_NT_WINDOWS32    // Windows* Server* 2003, XP*, 2000, or NT* 
-    FILETYPE        VFT_DLL 
-    BEGIN 
-        BLOCK "StringFileInfo" 
-        BEGIN 
-            BLOCK "040904b0"            // U.S. English, Unicode (0x04b0 == 1200) 
-            BEGIN 
- 
-                // FileDescription and LegalCopyright should be short. 
-                VALUE "FileDescription",  "LLVM* OpenMP* Runtime Library\0" 
-                // Following values may be relatively long. 
-                VALUE "CompanyName",      "LLVM\0" 
-                // VALUE "LegalTrademarks",  "\0"  // Not used for now. 
-                VALUE "ProductName",      "LLVM* OpenMP* Runtime Library\0" 
-                VALUE "ProductVersion",   "@LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@\0" 
-                VALUE "FileVersion",      "@LIBOMP_VERSION_BUILD@\0" 
-                VALUE "InternalName",     "@LIBOMP_LIB_FILE@\0" 
-                VALUE "OriginalFilename", "@LIBOMP_LIB_FILE@\0" 
-                VALUE "Comments", 
-                    "LLVM* OpenMP* @LIBOMP_LEGAL_TYPE@ Library " 
-                    "version @LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@.@LIBOMP_VERSION_BUILD@ " 
-                    "for @LIBOMP_LEGAL_ARCH@ architecture built on @LIBOMP_BUILD_DATE@.\0" 
-#if @LIBOMP_VERSION_BUILD@ == 0 
-                    VALUE "PrivateBuild", 
-                        "This is a development build.\0" 
-#endif 
-                // VALUE "SpecialBuild",     "\0"    // Not used for now. 
- 
-            END 
-        END 
-        BLOCK "VarFileInfo" 
-        BEGIN 
-            VALUE "Translation", 1033, 1200 
-            // 1033 -- U.S. English, 1200 -- Unicode 
-        END 
-    END 
- 
-// end of file // 
+// libomp.rc.var
+
+//
+////===----------------------------------------------------------------------===//
+////
+////                     The LLVM Compiler Infrastructure
+////
+//// This file is dual licensed under the MIT and the University of Illinois Open
+//// Source Licenses. See LICENSE.txt for details.
+////
+////===----------------------------------------------------------------------===//
+//
+
+#include "winres.h"
+#include "kmp_config.h"
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US    // English (U.S.) resources
+#pragma code_page(1252)
+
+VS_VERSION_INFO VERSIONINFO
+    // Parts of FILEVERSION and PRODUCTVERSION are 16-bit fields, entire build date yyyymmdd
+    // does not fit into one version part, so we need to split it into yyyy and mmdd:
+    FILEVERSION    @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@
+    PRODUCTVERSION @LIBOMP_VERSION_MAJOR@,@LIBOMP_VERSION_MINOR@,@LIBOMP_VERSION_BUILD_YEAR@,@LIBOMP_VERSION_BUILD_MONTH_DAY@
+    FILEFLAGSMASK  VS_FFI_FILEFLAGSMASK
+    FILEFLAGS      0
+#if KMP_DEBUG
+        | VS_FF_DEBUG
+#endif
+#if @LIBOMP_VERSION_BUILD@ == 0
+        | VS_FF_PRIVATEBUILD | VS_FF_PRERELEASE
+#endif
+    FILEOS          VOS_NT_WINDOWS32    // Windows* Server* 2003, XP*, 2000, or NT*
+    FILETYPE        VFT_DLL
+    BEGIN
+        BLOCK "StringFileInfo"
+        BEGIN
+            BLOCK "040904b0"            // U.S. English, Unicode (0x04b0 == 1200)
+            BEGIN
+
+                // FileDescription and LegalCopyright should be short.
+                VALUE "FileDescription",  "LLVM* OpenMP* Runtime Library\0"
+                // Following values may be relatively long.
+                VALUE "CompanyName",      "LLVM\0"
+                // VALUE "LegalTrademarks",  "\0"  // Not used for now.
+                VALUE "ProductName",      "LLVM* OpenMP* Runtime Library\0"
+                VALUE "ProductVersion",   "@LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@\0"
+                VALUE "FileVersion",      "@LIBOMP_VERSION_BUILD@\0"
+                VALUE "InternalName",     "@LIBOMP_LIB_FILE@\0"
+                VALUE "OriginalFilename", "@LIBOMP_LIB_FILE@\0"
+                VALUE "Comments",
+                    "LLVM* OpenMP* @LIBOMP_LEGAL_TYPE@ Library "
+                    "version @LIBOMP_VERSION_MAJOR@.@LIBOMP_VERSION_MINOR@.@LIBOMP_VERSION_BUILD@ "
+                    "for @LIBOMP_LEGAL_ARCH@ architecture built on @LIBOMP_BUILD_DATE@.\0"
+#if @LIBOMP_VERSION_BUILD@ == 0
+                    VALUE "PrivateBuild",
+                        "This is a development build.\0"
+#endif
+                // VALUE "SpecialBuild",     "\0"    // Not used for now.
+
+            END
+        END
+        BLOCK "VarFileInfo"
+        BEGIN
+            VALUE "Translation", 1033, 1200
+            // 1033 -- U.S. English, 1200 -- Unicode
+        END
+    END
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/omp.h b/contrib/libs/cxxsupp/openmp/omp.h
index eadd69db43..2dee5600ec 100644
--- a/contrib/libs/cxxsupp/openmp/omp.h
+++ b/contrib/libs/cxxsupp/openmp/omp.h
@@ -1,183 +1,183 @@
-/* 
- * include/41/omp.h.var 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef __OMP_H 
-#   define __OMP_H 
- 
-#   define KMP_VERSION_MAJOR    5 
-#   define KMP_VERSION_MINOR    0 
-#   define KMP_VERSION_BUILD    20140926 
-#   define KMP_BUILD_DATE       "No_Timestamp" 
- 
-#   ifdef __cplusplus 
-    extern "C" { 
-#   endif 
- 
-#   if defined(_WIN32) 
-#       define __KAI_KMPC_CONVENTION __cdecl 
-#   else 
-#       define __KAI_KMPC_CONVENTION 
-#   endif 
- 
-    /* schedule kind constants */ 
-    typedef enum omp_sched_t { 
-	omp_sched_static  = 1, 
-	omp_sched_dynamic = 2, 
-	omp_sched_guided  = 3, 
-	omp_sched_auto    = 4 
-    } omp_sched_t; 
- 
-    /* set API functions */ 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int); 
- 
-    /* query API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void); 
-    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void); 
-    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *); 
- 
-    /* lock API functions */ 
-    typedef struct omp_lock_t { 
-        void * _lk; 
-    } omp_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *); 
- 
-    /* nested lock API functions */ 
-    typedef struct omp_nest_lock_t { 
-        void * _lk; 
-    } omp_nest_lock_t; 
- 
-    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *); 
-    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *); 
-    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *); 
- 
-    /* lock hint type for dynamic user lock */ 
-    typedef enum omp_lock_hint_t { 
-        omp_lock_hint_none           = 0, 
-        omp_lock_hint_uncontended    = 1, 
-        omp_lock_hint_contended      = (1<<1 ), 
-        omp_lock_hint_nonspeculative = (1<<2 ), 
-        omp_lock_hint_speculative    = (1<<3 ), 
-        kmp_lock_hint_hle            = (1<<16), 
-        kmp_lock_hint_rtm            = (1<<17), 
-        kmp_lock_hint_adaptive       = (1<<18) 
-    } omp_lock_hint_t; 
- 
-    /* hinted lock initializers */ 
-    extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); 
-    extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); 
- 
-    /* time API functions */ 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void); 
-    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void); 
- 
-    /* OpenMP 4.0 */ 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_default_device (void); 
-    extern void __KAI_KMPC_CONVENTION  omp_set_default_device (int); 
-    extern int  __KAI_KMPC_CONVENTION  omp_is_initial_device (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void); 
-    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void); 
- 
+/*
+ * include/41/omp.h.var
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef __OMP_H
+#   define __OMP_H
+
+#   define KMP_VERSION_MAJOR    5
+#   define KMP_VERSION_MINOR    0
+#   define KMP_VERSION_BUILD    20140926
+#   define KMP_BUILD_DATE       "No_Timestamp"
+
+#   ifdef __cplusplus
+    extern "C" {
+#   endif
+
+#   if defined(_WIN32)
+#       define __KAI_KMPC_CONVENTION __cdecl
+#   else
+#       define __KAI_KMPC_CONVENTION
+#   endif
+
+    /* schedule kind constants */
+    typedef enum omp_sched_t {
+	omp_sched_static  = 1,
+	omp_sched_dynamic = 2,
+	omp_sched_guided  = 3,
+	omp_sched_auto    = 4
+    } omp_sched_t;
+
+    /* set API functions */
+    extern void   __KAI_KMPC_CONVENTION  omp_set_num_threads (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_dynamic     (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nested      (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_max_active_levels (int);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_schedule          (omp_sched_t, int);
+
+    /* query API functions */
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_dynamic      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_nested       (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_threads  (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_num   (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_num_procs    (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_parallel      (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_in_final         (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_active_level        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_level               (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_ancestor_thread_num (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_team_size           (int);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_thread_limit        (void);
+    extern int    __KAI_KMPC_CONVENTION  omp_get_max_active_levels   (void);
+    extern void   __KAI_KMPC_CONVENTION  omp_get_schedule            (omp_sched_t *, int *);
+
+    /* lock API functions */
+    typedef struct omp_lock_t {
+        void * _lk;
+    } omp_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_lock    (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_lock     (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_lock   (omp_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_lock (omp_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_lock    (omp_lock_t *);
+
+    /* nested lock API functions */
+    typedef struct omp_nest_lock_t {
+        void * _lk;
+    } omp_nest_lock_t;
+
+    extern void   __KAI_KMPC_CONVENTION  omp_init_nest_lock    (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_set_nest_lock     (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_unset_nest_lock   (omp_nest_lock_t *);
+    extern void   __KAI_KMPC_CONVENTION  omp_destroy_nest_lock (omp_nest_lock_t *);
+    extern int    __KAI_KMPC_CONVENTION  omp_test_nest_lock    (omp_nest_lock_t *);
+
+    /* lock hint type for dynamic user lock */
+    typedef enum omp_lock_hint_t {
+        omp_lock_hint_none           = 0,
+        omp_lock_hint_uncontended    = 1,
+        omp_lock_hint_contended      = (1<<1 ),
+        omp_lock_hint_nonspeculative = (1<<2 ),
+        omp_lock_hint_speculative    = (1<<3 ),
+        kmp_lock_hint_hle            = (1<<16),
+        kmp_lock_hint_rtm            = (1<<17),
+        kmp_lock_hint_adaptive       = (1<<18)
+    } omp_lock_hint_t;
+
+    /* hinted lock initializers */
+    extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);
+    extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);
+
+    /* time API functions */
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtime (void);
+    extern double __KAI_KMPC_CONVENTION  omp_get_wtick (void);
+
+    /* OpenMP 4.0 */
+    extern int  __KAI_KMPC_CONVENTION  omp_get_default_device (void);
+    extern void __KAI_KMPC_CONVENTION  omp_set_default_device (int);
+    extern int  __KAI_KMPC_CONVENTION  omp_is_initial_device (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void);
+
 #if 0  // !defined(NORUNTIME) && !defined(USE_STL_SYSTEM)
     // We need to put all possible dependencies to prevent blinking:
     // on all stdlib.h that can be mentioned here within a platform.
 #   include <contrib/libs/cxxsupp/libcxx/include/stdlib.h>
 #else
-#   include <stdlib.h> 
+#   include <stdlib.h>
 #endif
 
-    /* kmp API functions */ 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int); 
-    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *); 
- 
-    /* Intel affinity API */ 
-    typedef void * kmp_affinity_mask_t; 
- 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); 
-    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *); 
- 
-    /* OpenMP 4.0 affinity API */ 
-    typedef enum omp_proc_bind_t { 
-        omp_proc_bind_false = 0, 
-        omp_proc_bind_true = 1, 
-        omp_proc_bind_master = 2, 
-        omp_proc_bind_close = 3, 
-        omp_proc_bind_spread = 4 
-    } omp_proc_bind_t; 
- 
-    extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); 
- 
-    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t); 
-    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *); 
- 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void); 
-    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void); 
- 
-#   undef __KAI_KMPC_CONVENTION 
- 
-    /* Warning: 
-       The following typedefs are not standard, deprecated and will be removed in a future release. 
-    */ 
-    typedef int     omp_int_t; 
-    typedef double  omp_wtime_t; 
- 
-#   ifdef __cplusplus 
-    } 
-#   endif 
- 
-#endif /* __OMP_H */ 
- 
+    /* kmp API functions */
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_stacksize          (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize          (int);
+    extern size_t __KAI_KMPC_CONVENTION  kmp_get_stacksize_s        (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_stacksize_s        (size_t);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_blocktime          (void);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_library            (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_blocktime          (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library            (int);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_serial     (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_turnaround (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_library_throughput (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_defaults           (char const *);
+
+    /* Intel affinity API */
+    typedef void * kmp_affinity_mask_t;
+
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity             (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_max_proc    (void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_create_affinity_mask     (kmp_affinity_mask_t *);
+    extern void   __KAI_KMPC_CONVENTION  kmp_destroy_affinity_mask    (kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_set_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_affinity_mask_proc   (int, kmp_affinity_mask_t *);
+
+    /* OpenMP 4.0 affinity API */
+    typedef enum omp_proc_bind_t {
+        omp_proc_bind_false = 0,
+        omp_proc_bind_true = 1,
+        omp_proc_bind_master = 2,
+        omp_proc_bind_close = 3,
+        omp_proc_bind_spread = 4
+    } omp_proc_bind_t;
+
+    extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);
+
+    extern void * __KAI_KMPC_CONVENTION  kmp_malloc  (size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_calloc  (size_t, size_t);
+    extern void * __KAI_KMPC_CONVENTION  kmp_realloc (void *, size_t);
+    extern void   __KAI_KMPC_CONVENTION  kmp_free    (void *);
+
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
+    extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
+
+#   undef __KAI_KMPC_CONVENTION
+
+    /* Warning:
+       The following typedefs are not standard, deprecated and will be removed in a future release.
+    */
+    typedef int     omp_int_t;
+    typedef double  omp_wtime_t;
+
+#   ifdef __cplusplus
+    }
+#   endif
+
+#endif /* __OMP_H */
+
diff --git a/contrib/libs/cxxsupp/openmp/ompt-event-specific.h b/contrib/libs/cxxsupp/openmp/ompt-event-specific.h
index 71e3cf2aa3..28c1512ac2 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-event-specific.h
+++ b/contrib/libs/cxxsupp/openmp/ompt-event-specific.h
@@ -1,144 +1,144 @@
-#ifndef  __OMPT_EVENT_SPECIFIC_H__ 
-#define  __OMPT_EVENT_SPECIFIC_H__ 
- 
-/****************************************************************************** 
- * File: ompt-event-specific.h 
- * 
- * Description: 
- * 
- *   specify which of the OMPT events are implemented by this runtime system 
- *   and the level of their implementation by a runtime system. 
- *****************************************************************************/ 
- 
-#define _ompt_tokenpaste_helper(x,y)        x ## y 
-#define _ompt_tokenpaste(x,y)               _ompt_tokenpaste_helper(x,y) 
-#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented) 
- 
- 
-/*---------------------------------------------------------------------------- 
- | Specify whether an event may occur or not, and whether event callbacks 
- | never, sometimes, or always occur. 
- | 
- | The values for these constants are defined in section 6.1.2 of 
- | the OMPT TR. They are exposed to tools through ompt_set_callback. 
- +--------------------------------------------------------------------------*/ 
- 
-#define ompt_event_NEVER             ompt_set_result_event_never_occurs 
-#define ompt_event_UNIMPLEMENTED     ompt_set_result_event_may_occur_no_callback 
-#define ompt_event_MAY_CONVENIENT    ompt_set_result_event_may_occur_callback_some 
-#define ompt_event_MAY_ALWAYS        ompt_set_result_event_may_occur_callback_always 
- 
-#if OMPT_TRACE 
-#define ompt_event_MAY_ALWAYS_TRACE   ompt_event_MAY_ALWAYS 
-#else 
-#define ompt_event_MAY_ALWAYS_TRACE   ompt_event_UNIMPLEMENTED 
-#endif 
- 
-#if OMPT_BLAME 
-#define ompt_event_MAY_ALWAYS_BLAME   ompt_event_MAY_ALWAYS 
-#else 
-#define ompt_event_MAY_ALWAYS_BLAME   ompt_event_UNIMPLEMENTED 
-#endif 
- 
-/*---------------------------------------------------------------------------- 
- | Mandatory Events 
- +--------------------------------------------------------------------------*/ 
- 
-#define ompt_event_parallel_begin_implemented           ompt_event_MAY_ALWAYS 
-#define ompt_event_parallel_end_implemented             ompt_event_MAY_ALWAYS 
- 
-#define ompt_event_task_begin_implemented               ompt_event_MAY_ALWAYS 
-#define ompt_event_task_end_implemented                 ompt_event_MAY_ALWAYS 
- 
-#define ompt_event_thread_begin_implemented             ompt_event_MAY_ALWAYS 
-#define ompt_event_thread_end_implemented               ompt_event_MAY_ALWAYS 
- 
-#define ompt_event_control_implemented                  ompt_event_MAY_ALWAYS 
- 
-#define ompt_event_runtime_shutdown_implemented         ompt_event_MAY_ALWAYS 
- 
- 
-/*---------------------------------------------------------------------------- 
- | Optional Events (blame shifting) 
- +--------------------------------------------------------------------------*/ 
- 
-#define ompt_event_idle_begin_implemented               ompt_event_MAY_ALWAYS_BLAME 
-#define ompt_event_idle_end_implemented                 ompt_event_MAY_ALWAYS_BLAME 
- 
-#define ompt_event_wait_barrier_begin_implemented       ompt_event_MAY_ALWAYS_BLAME 
-#define ompt_event_wait_barrier_end_implemented         ompt_event_MAY_ALWAYS_BLAME 
- 
-#define ompt_event_wait_taskwait_begin_implemented      ompt_event_UNIMPLEMENTED 
-#define ompt_event_wait_taskwait_end_implemented        ompt_event_UNIMPLEMENTED 
- 
-#define ompt_event_wait_taskgroup_begin_implemented     ompt_event_UNIMPLEMENTED 
-#define ompt_event_wait_taskgroup_end_implemented       ompt_event_UNIMPLEMENTED 
- 
-#define ompt_event_release_lock_implemented             ompt_event_MAY_ALWAYS_BLAME 
-#define ompt_event_release_nest_lock_last_implemented   ompt_event_MAY_ALWAYS_BLAME 
-#define ompt_event_release_critical_implemented         ompt_event_MAY_ALWAYS_BLAME 
-#define ompt_event_release_atomic_implemented           ompt_event_MAY_ALWAYS_BLAME 
-#define ompt_event_release_ordered_implemented          ompt_event_MAY_ALWAYS_BLAME 
- 
- 
-/*---------------------------------------------------------------------------- 
- | Optional Events (synchronous events) 
- +--------------------------------------------------------------------------*/ 
- 
-#define ompt_event_implicit_task_begin_implemented      ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_implicit_task_end_implemented        ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_initial_task_begin_implemented       ompt_event_UNIMPLEMENTED 
-#define ompt_event_initial_task_end_implemented         ompt_event_UNIMPLEMENTED 
- 
-#define ompt_event_task_switch_implemented              ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_loop_begin_implemented               ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_loop_end_implemented                 ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_sections_begin_implemented           ompt_event_UNIMPLEMENTED 
-#define ompt_event_sections_end_implemented             ompt_event_UNIMPLEMENTED 
- 
-#define ompt_event_single_in_block_begin_implemented    ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_single_in_block_end_implemented      ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_single_others_begin_implemented      ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_single_others_end_implemented        ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_workshare_begin_implemented          ompt_event_UNIMPLEMENTED 
-#define ompt_event_workshare_end_implemented            ompt_event_UNIMPLEMENTED 
- 
-#define ompt_event_master_begin_implemented             ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_master_end_implemented               ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_barrier_begin_implemented            ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_barrier_end_implemented              ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_taskwait_begin_implemented           ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_taskwait_end_implemented             ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_taskgroup_begin_implemented          ompt_event_UNIMPLEMENTED 
-#define ompt_event_taskgroup_end_implemented            ompt_event_UNIMPLEMENTED 
- 
-#define ompt_event_release_nest_lock_prev_implemented   ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_wait_lock_implemented                ompt_event_UNIMPLEMENTED 
-#define ompt_event_wait_nest_lock_implemented           ompt_event_UNIMPLEMENTED 
-#define ompt_event_wait_critical_implemented            ompt_event_UNIMPLEMENTED 
-#define ompt_event_wait_atomic_implemented              ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_wait_ordered_implemented             ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_acquired_lock_implemented            ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_acquired_nest_lock_next_implemented  ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_acquired_critical_implemented        ompt_event_UNIMPLEMENTED 
-#define ompt_event_acquired_atomic_implemented          ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_acquired_ordered_implemented         ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_init_lock_implemented                ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_init_nest_lock_implemented           ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_destroy_lock_implemented             ompt_event_MAY_ALWAYS_TRACE 
-#define ompt_event_destroy_nest_lock_implemented        ompt_event_MAY_ALWAYS_TRACE 
- 
-#define ompt_event_flush_implemented                    ompt_event_UNIMPLEMENTED 
- 
-#endif 
+#ifndef  __OMPT_EVENT_SPECIFIC_H__
+#define  __OMPT_EVENT_SPECIFIC_H__
+
+/******************************************************************************
+ * File: ompt-event-specific.h
+ *
+ * Description:
+ *
+ *   specify which of the OMPT events are implemented by this runtime system
+ *   and the level of their implementation by a runtime system.
+ *****************************************************************************/
+
+#define _ompt_tokenpaste_helper(x,y)        x ## y
+#define _ompt_tokenpaste(x,y)               _ompt_tokenpaste_helper(x,y)
+#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented)
+
+
+/*----------------------------------------------------------------------------
+ | Specify whether an event may occur or not, and whether event callbacks
+ | never, sometimes, or always occur.
+ |
+ | The values for these constants are defined in section 6.1.2 of
+ | the OMPT TR. They are exposed to tools through ompt_set_callback.
+ +--------------------------------------------------------------------------*/
+
+#define ompt_event_NEVER             ompt_set_result_event_never_occurs
+#define ompt_event_UNIMPLEMENTED     ompt_set_result_event_may_occur_no_callback
+#define ompt_event_MAY_CONVENIENT    ompt_set_result_event_may_occur_callback_some
+#define ompt_event_MAY_ALWAYS        ompt_set_result_event_may_occur_callback_always
+
+#if OMPT_TRACE
+#define ompt_event_MAY_ALWAYS_TRACE   ompt_event_MAY_ALWAYS
+#else
+#define ompt_event_MAY_ALWAYS_TRACE   ompt_event_UNIMPLEMENTED
+#endif
+
+#if OMPT_BLAME
+#define ompt_event_MAY_ALWAYS_BLAME   ompt_event_MAY_ALWAYS
+#else
+#define ompt_event_MAY_ALWAYS_BLAME   ompt_event_UNIMPLEMENTED
+#endif
+
+/*----------------------------------------------------------------------------
+ | Mandatory Events
+ +--------------------------------------------------------------------------*/
+
+#define ompt_event_parallel_begin_implemented           ompt_event_MAY_ALWAYS
+#define ompt_event_parallel_end_implemented             ompt_event_MAY_ALWAYS
+
+#define ompt_event_task_begin_implemented               ompt_event_MAY_ALWAYS
+#define ompt_event_task_end_implemented                 ompt_event_MAY_ALWAYS
+
+#define ompt_event_thread_begin_implemented             ompt_event_MAY_ALWAYS
+#define ompt_event_thread_end_implemented               ompt_event_MAY_ALWAYS
+
+#define ompt_event_control_implemented                  ompt_event_MAY_ALWAYS
+
+#define ompt_event_runtime_shutdown_implemented         ompt_event_MAY_ALWAYS
+
+
+/*----------------------------------------------------------------------------
+ | Optional Events (blame shifting)
+ +--------------------------------------------------------------------------*/
+
+#define ompt_event_idle_begin_implemented               ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_idle_end_implemented                 ompt_event_MAY_ALWAYS_BLAME
+
+#define ompt_event_wait_barrier_begin_implemented       ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_wait_barrier_end_implemented         ompt_event_MAY_ALWAYS_BLAME
+
+#define ompt_event_wait_taskwait_begin_implemented      ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskwait_end_implemented        ompt_event_UNIMPLEMENTED
+
+#define ompt_event_wait_taskgroup_begin_implemented     ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskgroup_end_implemented       ompt_event_UNIMPLEMENTED
+
+#define ompt_event_release_lock_implemented             ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_nest_lock_last_implemented   ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_critical_implemented         ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_atomic_implemented           ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_ordered_implemented          ompt_event_MAY_ALWAYS_BLAME
+
+
+/*----------------------------------------------------------------------------
+ | Optional Events (synchronous events)
+ +--------------------------------------------------------------------------*/
+
+#define ompt_event_implicit_task_begin_implemented      ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_implicit_task_end_implemented        ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_initial_task_begin_implemented       ompt_event_UNIMPLEMENTED
+#define ompt_event_initial_task_end_implemented         ompt_event_UNIMPLEMENTED
+
+#define ompt_event_task_switch_implemented              ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_loop_begin_implemented               ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_loop_end_implemented                 ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_sections_begin_implemented           ompt_event_UNIMPLEMENTED
+#define ompt_event_sections_end_implemented             ompt_event_UNIMPLEMENTED
+
+#define ompt_event_single_in_block_begin_implemented    ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_in_block_end_implemented      ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_others_begin_implemented      ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_others_end_implemented        ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_workshare_begin_implemented          ompt_event_UNIMPLEMENTED
+#define ompt_event_workshare_end_implemented            ompt_event_UNIMPLEMENTED
+
+#define ompt_event_master_begin_implemented             ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_master_end_implemented               ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_barrier_begin_implemented            ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_barrier_end_implemented              ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_taskwait_begin_implemented           ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_taskwait_end_implemented             ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_taskgroup_begin_implemented          ompt_event_UNIMPLEMENTED
+#define ompt_event_taskgroup_end_implemented            ompt_event_UNIMPLEMENTED
+
+#define ompt_event_release_nest_lock_prev_implemented   ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_wait_lock_implemented                ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_nest_lock_implemented           ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_critical_implemented            ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_atomic_implemented              ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_wait_ordered_implemented             ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_acquired_lock_implemented            ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_nest_lock_next_implemented  ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_critical_implemented        ompt_event_UNIMPLEMENTED
+#define ompt_event_acquired_atomic_implemented          ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_ordered_implemented         ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_init_lock_implemented                ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_init_nest_lock_implemented           ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_destroy_lock_implemented             ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_destroy_nest_lock_implemented        ompt_event_MAY_ALWAYS_TRACE
+
+#define ompt_event_flush_implemented                    ompt_event_UNIMPLEMENTED
+
+#endif
diff --git a/contrib/libs/cxxsupp/openmp/ompt-general.c b/contrib/libs/cxxsupp/openmp/ompt-general.c
index 6dda24418a..4daae81917 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-general.c
+++ b/contrib/libs/cxxsupp/openmp/ompt-general.c
@@ -1,535 +1,535 @@
-/***************************************************************************** 
- * system include files 
- ****************************************************************************/ 
- 
-#include <assert.h> 
- 
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
- 
- 
-/***************************************************************************** 
- * ompt include files 
- ****************************************************************************/ 
- 
-#include "ompt-specific.c" 
- 
- 
- 
-/***************************************************************************** 
- * macros 
- ****************************************************************************/ 
- 
-#define ompt_get_callback_success 1 
-#define ompt_get_callback_failure 0 
- 
-#define no_tool_present 0 
- 
-#define OMPT_API_ROUTINE static 
- 
-#ifndef OMPT_STR_MATCH 
-#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) 
-#endif 
- 
- 
-/***************************************************************************** 
- * types 
- ****************************************************************************/ 
- 
-typedef struct { 
-    const char *state_name; 
-    ompt_state_t  state_id; 
-} ompt_state_info_t; 
- 
- 
-enum tool_setting_e { 
-    omp_tool_error, 
-    omp_tool_unset, 
-    omp_tool_disabled, 
-    omp_tool_enabled 
-}; 
- 
- 
-typedef void (*ompt_initialize_t) ( 
-    ompt_function_lookup_t ompt_fn_lookup,  
-    const char *version, 
-    unsigned int ompt_version 
-); 
- 
- 
- 
-/***************************************************************************** 
- * global variables 
- ****************************************************************************/ 
- 
-int ompt_enabled = 0; 
- 
-ompt_state_info_t ompt_state_info[] = { 
-#define ompt_state_macro(state, code) { # state, state }, 
-    FOREACH_OMPT_STATE(ompt_state_macro) 
-#undef ompt_state_macro 
-}; 
- 
-ompt_callbacks_t ompt_callbacks; 
- 
-static ompt_initialize_t  ompt_initialize_fn = NULL; 
- 
- 
- 
-/***************************************************************************** 
- * forward declarations 
- ****************************************************************************/ 
- 
-static ompt_interface_fn_t ompt_fn_lookup(const char *s); 
- 
-OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); 
- 
- 
-/***************************************************************************** 
- * initialization and finalization (private operations) 
- ****************************************************************************/ 
- 
-/* On Unix-like systems that support weak symbols the following implementation 
- * of ompt_tool() will be used in case no tool-supplied implementation of 
- * this function is present in the address space of a process. 
- * 
- * On Windows, the ompt_tool_windows function is used to find the 
- * ompt_tool symbol across all modules loaded by a process. If ompt_tool is 
- * found, ompt_tool's return value is used to initialize the tool. Otherwise, 
- * NULL is returned and OMPT won't be enabled */ 
-#if OMPT_HAVE_WEAK_ATTRIBUTE 
-_OMP_EXTERN  
-__attribute__ (( weak )) 
-ompt_initialize_t ompt_tool() 
-{ 
-#if OMPT_DEBUG 
-    printf("ompt_tool() is called from the RTL\n"); 
-#endif 
-    return NULL; 
-} 
- 
-#elif OMPT_HAVE_PSAPI 
- 
-#include <psapi.h> 
-#pragma comment(lib, "psapi.lib") 
-#define ompt_tool ompt_tool_windows 
- 
-// The number of loaded modules to start enumeration with EnumProcessModules() 
-#define NUM_MODULES 128 
- 
-static 
-ompt_initialize_t ompt_tool_windows() 
-{ 
-    int i; 
-    DWORD needed, new_size; 
-    HMODULE *modules; 
-    HANDLE  process = GetCurrentProcess(); 
-    modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) ); 
-    ompt_initialize_t (*ompt_tool_p)() = NULL; 
- 
-#if OMPT_DEBUG 
-    printf("ompt_tool_windows(): looking for ompt_tool\n"); 
-#endif 
-    if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE), 
-                              &needed)) { 
-        // Regardless of the error reason use the stub initialization function 
-        free(modules); 
-        return NULL; 
-    } 
-    // Check if NUM_MODULES is enough to list all modules 
-    new_size = needed / sizeof(HMODULE); 
-    if (new_size > NUM_MODULES) { 
-#if OMPT_DEBUG 
-    printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed); 
-#endif 
-        modules = (HMODULE*)realloc( modules, needed ); 
-        // If resizing failed use the stub function. 
-        if (!EnumProcessModules(process, modules, needed, &needed)) { 
-            free(modules); 
-            return NULL; 
-        } 
-    } 
-    for (i = 0; i < new_size; ++i) { 
-        (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); 
-        if (ompt_tool_p) { 
-#if OMPT_DEBUG 
-            TCHAR modName[MAX_PATH]; 
-            if (GetModuleFileName(modules[i], modName, MAX_PATH)) 
-                printf("ompt_tool_windows(): ompt_tool found in module %s\n", 
-                       modName); 
-#endif 
-            free(modules); 
-            return ompt_tool_p(); 
-        } 
-#if OMPT_DEBUG 
-        else { 
-            TCHAR modName[MAX_PATH]; 
-            if (GetModuleFileName(modules[i], modName, MAX_PATH)) 
-                printf("ompt_tool_windows(): ompt_tool not found in module %s\n", 
-                       modName); 
-        } 
-#endif 
-    } 
-    free(modules); 
-    return NULL; 
-} 
-#else 
-# error Either __attribute__((weak)) or psapi.dll are required for OMPT support 
-#endif // OMPT_HAVE_WEAK_ATTRIBUTE 
- 
-void ompt_pre_init() 
-{ 
-    //-------------------------------------------------- 
-    // Execute the pre-initialization logic only once. 
-    //-------------------------------------------------- 
-    static int ompt_pre_initialized = 0; 
- 
-    if (ompt_pre_initialized) return; 
- 
-    ompt_pre_initialized = 1; 
- 
-    //-------------------------------------------------- 
-    // Use a tool iff a tool is enabled and available. 
-    //-------------------------------------------------- 
-    const char *ompt_env_var = getenv("OMP_TOOL"); 
-    tool_setting_e tool_setting = omp_tool_error; 
- 
-    if (!ompt_env_var  || !strcmp(ompt_env_var, "")) 
-        tool_setting = omp_tool_unset; 
-    else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) 
-        tool_setting = omp_tool_disabled; 
-    else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) 
-        tool_setting = omp_tool_enabled; 
- 
-#if OMPT_DEBUG 
-    printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); 
-#endif 
-    switch(tool_setting) { 
-    case omp_tool_disabled: 
-        break; 
- 
-    case omp_tool_unset: 
-    case omp_tool_enabled: 
-        ompt_initialize_fn = ompt_tool(); 
-        if (ompt_initialize_fn) { 
-            ompt_enabled = 1; 
-        } 
-        break; 
- 
-    case omp_tool_error: 
-        fprintf(stderr, 
-            "Warning: OMP_TOOL has invalid value \"%s\".\n" 
-            "  legal values are (NULL,\"\",\"disabled\"," 
-            "\"enabled\").\n", ompt_env_var); 
-        break; 
-    } 
-#if OMPT_DEBUG 
-    printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); 
-#endif 
-} 
- 
- 
-void ompt_post_init() 
-{ 
-    //-------------------------------------------------- 
-    // Execute the post-initialization logic only once. 
-    //-------------------------------------------------- 
-    static int ompt_post_initialized = 0; 
- 
-    if (ompt_post_initialized) return; 
- 
-    ompt_post_initialized = 1; 
- 
-    //-------------------------------------------------- 
-    // Initialize the tool if so indicated. 
-    //-------------------------------------------------- 
-    if (ompt_enabled) { 
-        ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),  
-                           OMPT_VERSION); 
- 
-        ompt_thread_t *root_thread = ompt_get_thread(); 
- 
-        ompt_set_thread_state(root_thread, ompt_state_overhead); 
- 
-        if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { 
-            ompt_callbacks.ompt_callback(ompt_event_thread_begin) 
-                (ompt_thread_initial, ompt_get_thread_id()); 
-        } 
- 
-        ompt_set_thread_state(root_thread, ompt_state_work_serial); 
-    } 
-} 
- 
- 
-void ompt_fini() 
-{ 
-    if (ompt_enabled) { 
-        if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { 
-            ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); 
-        } 
-    } 
- 
-    ompt_enabled = 0; 
-} 
- 
- 
-/***************************************************************************** 
- * interface operations 
- ****************************************************************************/ 
- 
-/***************************************************************************** 
- * state 
- ****************************************************************************/ 
- 
-OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, 
-                                          const char **next_state_name) 
-{ 
-    const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); 
-    int i = 0; 
- 
-    for (i = 0; i < len - 1; i++) { 
-        if (ompt_state_info[i].state_id == current_state) { 
-            *next_state = ompt_state_info[i+1].state_id; 
-            *next_state_name = ompt_state_info[i+1].state_name; 
-            return 1; 
-        } 
-    } 
- 
-    return 0; 
-} 
- 
- 
- 
-/***************************************************************************** 
- * callbacks 
- ****************************************************************************/ 
- 
-OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) 
-{ 
-    switch (evid) { 
- 
-#define ompt_event_macro(event_name, callback_type, event_id)                  \ 
-    case event_name:                                                           \ 
-        if (ompt_event_implementation_status(event_name)) {                    \ 
-            ompt_callbacks.ompt_callback(event_name) = (callback_type) cb;     \ 
-        }                                                                      \ 
-        return ompt_event_implementation_status(event_name); 
- 
-    FOREACH_OMPT_EVENT(ompt_event_macro) 
- 
-#undef ompt_event_macro 
- 
-    default: return ompt_set_result_registration_error; 
-    } 
-} 
- 
- 
-OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) 
-{ 
-    switch (evid) { 
- 
-#define ompt_event_macro(event_name, callback_type, event_id)                  \ 
-    case event_name:                                                           \ 
-        if (ompt_event_implementation_status(event_name)) {                    \ 
-            ompt_callback_t mycb =                                             \ 
-                (ompt_callback_t) ompt_callbacks.ompt_callback(event_name);    \ 
-            if (mycb) {                                                        \ 
-                *cb = mycb;                                                    \ 
-                return ompt_get_callback_success;                              \ 
-            }                                                                  \ 
-        }                                                                      \ 
-        return ompt_get_callback_failure; 
- 
-    FOREACH_OMPT_EVENT(ompt_event_macro) 
- 
-#undef ompt_event_macro 
- 
-    default: return ompt_get_callback_failure; 
-    } 
-} 
- 
- 
-/***************************************************************************** 
- * parallel regions 
- ****************************************************************************/ 
- 
-OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) 
-{ 
-    return __ompt_get_parallel_id_internal(ancestor_level); 
-} 
- 
- 
-OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) 
-{ 
-    return __ompt_get_parallel_team_size_internal(ancestor_level); 
-} 
- 
- 
-OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) 
-{ 
-    return __ompt_get_parallel_function_internal(ancestor_level); 
-} 
- 
- 
-OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) 
-{ 
-    ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); 
- 
-    if (thread_state == ompt_state_undefined) { 
-        thread_state = ompt_state_work_serial; 
-    } 
- 
-    return thread_state; 
-} 
- 
- 
- 
-/***************************************************************************** 
- * threads 
- ****************************************************************************/ 
- 
- 
-OMPT_API_ROUTINE void *ompt_get_idle_frame() 
-{ 
-    return __ompt_get_idle_frame_internal(); 
-} 
- 
- 
- 
-/***************************************************************************** 
- * tasks 
- ****************************************************************************/ 
- 
- 
-OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) 
-{ 
-    return __ompt_get_thread_id_internal(); 
-} 
- 
-OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) 
-{ 
-    return __ompt_get_task_id_internal(depth); 
-} 
- 
- 
-OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) 
-{ 
-    return __ompt_get_task_frame_internal(depth); 
-} 
- 
- 
-OMPT_API_ROUTINE void *ompt_get_task_function(int depth) 
-{ 
-    return __ompt_get_task_function_internal(depth); 
-} 
- 
- 
-/***************************************************************************** 
- * placeholders 
- ****************************************************************************/ 
- 
-// Don't define this as static. The loader may choose to eliminate the symbol 
-// even though it is needed by tools.   
-#define OMPT_API_PLACEHOLDER  
- 
-// Ensure that placeholders don't have mangled names in the symbol table. 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
- 
-OMPT_API_PLACEHOLDER void ompt_idle(void)   
-{ 
-    // This function is a placeholder used to represent the calling context of 
-    // idle OpenMP worker threads. It is not meant to be invoked. 
-    assert(0); 
-} 
- 
- 
-OMPT_API_PLACEHOLDER void ompt_overhead(void) 
-{ 
-    // This function is a placeholder used to represent the OpenMP context of 
-    // threads working in the OpenMP runtime.  It is not meant to be invoked. 
-    assert(0); 
-} 
- 
- 
-OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) 
-{ 
-    // This function is a placeholder used to represent the OpenMP context of 
-    // threads waiting for a barrier in the OpenMP runtime. It is not meant 
-    // to be invoked. 
-    assert(0); 
-} 
- 
- 
-OMPT_API_PLACEHOLDER void ompt_task_wait(void) 
-{ 
-    // This function is a placeholder used to represent the OpenMP context of 
-    // threads waiting for a task in the OpenMP runtime. It is not meant 
-    // to be invoked. 
-    assert(0); 
-} 
- 
- 
-OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) 
-{ 
-    // This function is a placeholder used to represent the OpenMP context of 
-    // threads waiting for a mutex in the OpenMP runtime. It is not meant 
-    // to be invoked. 
-    assert(0); 
-} 
- 
-#ifdef __cplusplus 
-}; 
-#endif 
- 
- 
-/***************************************************************************** 
- * compatability 
- ****************************************************************************/ 
- 
-OMPT_API_ROUTINE int ompt_get_ompt_version() 
-{ 
-    return OMPT_VERSION; 
-} 
- 
- 
- 
-/***************************************************************************** 
- * application-facing API 
- ****************************************************************************/ 
- 
- 
-/*---------------------------------------------------------------------------- 
- | control 
- ---------------------------------------------------------------------------*/ 
- 
-_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) 
-{ 
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { 
-        ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); 
-    } 
-} 
- 
- 
- 
-/***************************************************************************** 
- * API inquiry for tool 
- ****************************************************************************/ 
- 
-static ompt_interface_fn_t ompt_fn_lookup(const char *s) 
-{ 
- 
-#define ompt_interface_fn(fn) \ 
-    if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn; 
- 
-    FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) 
- 
-    FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) 
- 
-    return (ompt_interface_fn_t) 0; 
-} 
+/*****************************************************************************
+ * system include files
+ ****************************************************************************/
+
+#include <assert.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+
+/*****************************************************************************
+ * ompt include files
+ ****************************************************************************/
+
+#include "ompt-specific.c"
+
+
+
+/*****************************************************************************
+ * macros
+ ****************************************************************************/
+
+#define ompt_get_callback_success 1
+#define ompt_get_callback_failure 0
+
+#define no_tool_present 0
+
+#define OMPT_API_ROUTINE static
+
+#ifndef OMPT_STR_MATCH
+#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
+#endif
+
+
+/*****************************************************************************
+ * types
+ ****************************************************************************/
+
+typedef struct {
+    const char *state_name;
+    ompt_state_t  state_id;
+} ompt_state_info_t;
+
+
+enum tool_setting_e {
+    omp_tool_error,
+    omp_tool_unset,
+    omp_tool_disabled,
+    omp_tool_enabled
+};
+
+
+typedef void (*ompt_initialize_t) (
+    ompt_function_lookup_t ompt_fn_lookup, 
+    const char *version,
+    unsigned int ompt_version
+);
+
+
+
+/*****************************************************************************
+ * global variables
+ ****************************************************************************/
+
+int ompt_enabled = 0;
+
+ompt_state_info_t ompt_state_info[] = {
+#define ompt_state_macro(state, code) { # state, state },
+    FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
+};
+
+ompt_callbacks_t ompt_callbacks;
+
+static ompt_initialize_t  ompt_initialize_fn = NULL;
+
+
+
+/*****************************************************************************
+ * forward declarations
+ ****************************************************************************/
+
+static ompt_interface_fn_t ompt_fn_lookup(const char *s);
+
+OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void);
+
+
+/*****************************************************************************
+ * initialization and finalization (private operations)
+ ****************************************************************************/
+
+/* On Unix-like systems that support weak symbols the following implementation
+ * of ompt_tool() will be used in case no tool-supplied implementation of
+ * this function is present in the address space of a process.
+ *
+ * On Windows, the ompt_tool_windows function is used to find the
+ * ompt_tool symbol across all modules loaded by a process. If ompt_tool is
+ * found, ompt_tool's return value is used to initialize the tool. Otherwise,
+ * NULL is returned and OMPT won't be enabled */
+#if OMPT_HAVE_WEAK_ATTRIBUTE
+_OMP_EXTERN 
+__attribute__ (( weak ))
+ompt_initialize_t ompt_tool()
+{
+#if OMPT_DEBUG
+    printf("ompt_tool() is called from the RTL\n");
+#endif
+    return NULL;
+}
+
+#elif OMPT_HAVE_PSAPI
+
+#include <psapi.h>
+#pragma comment(lib, "psapi.lib")
+#define ompt_tool ompt_tool_windows
+
+// The number of loaded modules to start enumeration with EnumProcessModules()
+#define NUM_MODULES 128
+
+static
+ompt_initialize_t ompt_tool_windows()
+{
+    int i;
+    DWORD needed, new_size;
+    HMODULE *modules;
+    HANDLE  process = GetCurrentProcess();
+    modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) );
+    ompt_initialize_t (*ompt_tool_p)() = NULL;
+
+#if OMPT_DEBUG
+    printf("ompt_tool_windows(): looking for ompt_tool\n");
+#endif
+    if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE),
+                              &needed)) {
+        // Regardless of the error reason use the stub initialization function
+        free(modules);
+        return NULL;
+    }
+    // Check if NUM_MODULES is enough to list all modules
+    new_size = needed / sizeof(HMODULE);
+    if (new_size > NUM_MODULES) {
+#if OMPT_DEBUG
+    printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed);
+#endif
+        modules = (HMODULE*)realloc( modules, needed );
+        // If resizing failed use the stub function.
+        if (!EnumProcessModules(process, modules, needed, &needed)) {
+            free(modules);
+            return NULL;
+        }
+    }
+    for (i = 0; i < new_size; ++i) {
+        (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
+        if (ompt_tool_p) {
+#if OMPT_DEBUG
+            TCHAR modName[MAX_PATH];
+            if (GetModuleFileName(modules[i], modName, MAX_PATH))
+                printf("ompt_tool_windows(): ompt_tool found in module %s\n",
+                       modName);
+#endif
+            free(modules);
+            return ompt_tool_p();
+        }
+#if OMPT_DEBUG
+        else {
+            TCHAR modName[MAX_PATH];
+            if (GetModuleFileName(modules[i], modName, MAX_PATH))
+                printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
+                       modName);
+        }
+#endif
+    }
+    free(modules);
+    return NULL;
+}
+#else
+# error Either __attribute__((weak)) or psapi.dll are required for OMPT support
+#endif // OMPT_HAVE_WEAK_ATTRIBUTE
+
+void ompt_pre_init()
+{
+    //--------------------------------------------------
+    // Execute the pre-initialization logic only once.
+    //--------------------------------------------------
+    static int ompt_pre_initialized = 0;
+
+    if (ompt_pre_initialized) return;
+
+    ompt_pre_initialized = 1;
+
+    //--------------------------------------------------
+    // Use a tool iff a tool is enabled and available.
+    //--------------------------------------------------
+    const char *ompt_env_var = getenv("OMP_TOOL");
+    tool_setting_e tool_setting = omp_tool_error;
+
+    if (!ompt_env_var  || !strcmp(ompt_env_var, ""))
+        tool_setting = omp_tool_unset;
+    else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
+        tool_setting = omp_tool_disabled;
+    else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
+        tool_setting = omp_tool_enabled;
+
+#if OMPT_DEBUG
+    printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
+#endif
+    switch(tool_setting) {
+    case omp_tool_disabled:
+        break;
+
+    case omp_tool_unset:
+    case omp_tool_enabled:
+        ompt_initialize_fn = ompt_tool();
+        if (ompt_initialize_fn) {
+            ompt_enabled = 1;
+        }
+        break;
+
+    case omp_tool_error:
+        fprintf(stderr,
+            "Warning: OMP_TOOL has invalid value \"%s\".\n"
+            "  legal values are (NULL,\"\",\"disabled\","
+            "\"enabled\").\n", ompt_env_var);
+        break;
+    }
+#if OMPT_DEBUG
+    printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
+#endif
+}
+
+
+void ompt_post_init()
+{
+    //--------------------------------------------------
+    // Execute the post-initialization logic only once.
+    //--------------------------------------------------
+    static int ompt_post_initialized = 0;
+
+    if (ompt_post_initialized) return;
+
+    ompt_post_initialized = 1;
+
+    //--------------------------------------------------
+    // Initialize the tool if so indicated.
+    //--------------------------------------------------
+    if (ompt_enabled) {
+        ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), 
+                           OMPT_VERSION);
+
+        ompt_thread_t *root_thread = ompt_get_thread();
+
+        ompt_set_thread_state(root_thread, ompt_state_overhead);
+
+        if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
+            ompt_callbacks.ompt_callback(ompt_event_thread_begin)
+                (ompt_thread_initial, ompt_get_thread_id());
+        }
+
+        ompt_set_thread_state(root_thread, ompt_state_work_serial);
+    }
+}
+
+
+void ompt_fini()
+{
+    if (ompt_enabled) {
+        if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
+            ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
+        }
+    }
+
+    ompt_enabled = 0;
+}
+
+
+/*****************************************************************************
+ * interface operations
+ ****************************************************************************/
+
+/*****************************************************************************
+ * state
+ ****************************************************************************/
+
+OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
+                                          const char **next_state_name)
+{
+    const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
+    int i = 0;
+
+    for (i = 0; i < len - 1; i++) {
+        if (ompt_state_info[i].state_id == current_state) {
+            *next_state = ompt_state_info[i+1].state_id;
+            *next_state_name = ompt_state_info[i+1].state_name;
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+/*****************************************************************************
+ * callbacks
+ ****************************************************************************/
+
+OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb)
+{
+    switch (evid) {
+
+#define ompt_event_macro(event_name, callback_type, event_id)                  \
+    case event_name:                                                           \
+        if (ompt_event_implementation_status(event_name)) {                    \
+            ompt_callbacks.ompt_callback(event_name) = (callback_type) cb;     \
+        }                                                                      \
+        return ompt_event_implementation_status(event_name);
+
+    FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+
+    default: return ompt_set_result_registration_error;
+    }
+}
+
+
+OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb)
+{
+    switch (evid) {
+
+#define ompt_event_macro(event_name, callback_type, event_id)                  \
+    case event_name:                                                           \
+        if (ompt_event_implementation_status(event_name)) {                    \
+            ompt_callback_t mycb =                                             \
+                (ompt_callback_t) ompt_callbacks.ompt_callback(event_name);    \
+            if (mycb) {                                                        \
+                *cb = mycb;                                                    \
+                return ompt_get_callback_success;                              \
+            }                                                                  \
+        }                                                                      \
+        return ompt_get_callback_failure;
+
+    FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+
+    default: return ompt_get_callback_failure;
+    }
+}
+
+
+/*****************************************************************************
+ * parallel regions
+ ****************************************************************************/
+
+OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level)
+{
+    return __ompt_get_parallel_id_internal(ancestor_level);
+}
+
+
+OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level)
+{
+    return __ompt_get_parallel_team_size_internal(ancestor_level);
+}
+
+
+OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level)
+{
+    return __ompt_get_parallel_function_internal(ancestor_level);
+}
+
+
+OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id)
+{
+    ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
+
+    if (thread_state == ompt_state_undefined) {
+        thread_state = ompt_state_work_serial;
+    }
+
+    return thread_state;
+}
+
+
+
+/*****************************************************************************
+ * threads
+ ****************************************************************************/
+
+
+OMPT_API_ROUTINE void *ompt_get_idle_frame()
+{
+    return __ompt_get_idle_frame_internal();
+}
+
+
+
+/*****************************************************************************
+ * tasks
+ ****************************************************************************/
+
+
+OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void)
+{
+    return __ompt_get_thread_id_internal();
+}
+
+OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth)
+{
+    return __ompt_get_task_id_internal(depth);
+}
+
+
+OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth)
+{
+    return __ompt_get_task_frame_internal(depth);
+}
+
+
+OMPT_API_ROUTINE void *ompt_get_task_function(int depth)
+{
+    return __ompt_get_task_function_internal(depth);
+}
+
+
+/*****************************************************************************
+ * placeholders
+ ****************************************************************************/
+
+// Don't define this as static. The loader may choose to eliminate the symbol
+// even though it is needed by tools.  
+#define OMPT_API_PLACEHOLDER 
+
+// Ensure that placeholders don't have mangled names in the symbol table.
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+OMPT_API_PLACEHOLDER void ompt_idle(void)  
+{
+    // This function is a placeholder used to represent the calling context of
+    // idle OpenMP worker threads. It is not meant to be invoked.
+    assert(0);
+}
+
+
+OMPT_API_PLACEHOLDER void ompt_overhead(void)
+{
+    // This function is a placeholder used to represent the OpenMP context of
+    // threads working in the OpenMP runtime.  It is not meant to be invoked.
+    assert(0);
+}
+
+
+OMPT_API_PLACEHOLDER void ompt_barrier_wait(void)
+{
+    // This function is a placeholder used to represent the OpenMP context of
+    // threads waiting for a barrier in the OpenMP runtime. It is not meant
+    // to be invoked.
+    assert(0);
+}
+
+
+OMPT_API_PLACEHOLDER void ompt_task_wait(void)
+{
+    // This function is a placeholder used to represent the OpenMP context of
+    // threads waiting for a task in the OpenMP runtime. It is not meant
+    // to be invoked.
+    assert(0);
+}
+
+
+OMPT_API_PLACEHOLDER void ompt_mutex_wait(void)
+{
+    // This function is a placeholder used to represent the OpenMP context of
+    // threads waiting for a mutex in the OpenMP runtime. It is not meant
+    // to be invoked.
+    assert(0);
+}
+
+#ifdef __cplusplus
+};
+#endif
+
+
+/*****************************************************************************
+ * compatability
+ ****************************************************************************/
+
+OMPT_API_ROUTINE int ompt_get_ompt_version()
+{
+    return OMPT_VERSION;
+}
+
+
+
+/*****************************************************************************
+ * application-facing API
+ ****************************************************************************/
+
+
+/*----------------------------------------------------------------------------
+ | control
+ ---------------------------------------------------------------------------*/
+
+_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier)
+{
+    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
+        ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
+    }
+}
+
+
+
+/*****************************************************************************
+ * API inquiry for tool
+ ****************************************************************************/
+
+static ompt_interface_fn_t ompt_fn_lookup(const char *s)
+{
+
+#define ompt_interface_fn(fn) \
+    if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn;
+
+    FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
+
+    FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn)
+
+    return (ompt_interface_fn_t) 0;
+}
diff --git a/contrib/libs/cxxsupp/openmp/ompt-internal.h b/contrib/libs/cxxsupp/openmp/ompt-internal.h
index 6466c3bc22..64e8d2e8fd 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-internal.h
+++ b/contrib/libs/cxxsupp/openmp/ompt-internal.h
@@ -1,79 +1,79 @@
-#ifndef __OMPT_INTERNAL_H__ 
-#define __OMPT_INTERNAL_H__ 
- 
-#include "ompt.h" 
-#include "ompt-event-specific.h" 
- 
-#define OMPT_VERSION 1 
- 
-#define _OMP_EXTERN extern "C" 
- 
-#define OMPT_INVOKER(x) \ 
-  ((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime) 
- 
- 
-#define ompt_callback(e) e ## _callback 
- 
- 
-typedef struct ompt_callbacks_s { 
-#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event); 
- 
-    FOREACH_OMPT_EVENT(ompt_event_macro) 
- 
-#undef ompt_event_macro 
-} ompt_callbacks_t; 
- 
- 
- 
-typedef struct { 
-    ompt_frame_t        frame; 
-    void*               function; 
-    ompt_task_id_t      task_id; 
-} ompt_task_info_t; 
- 
- 
-typedef struct { 
-    ompt_parallel_id_t  parallel_id; 
-    void                *microtask; 
-} ompt_team_info_t; 
- 
- 
-typedef struct ompt_lw_taskteam_s { 
-    ompt_team_info_t    ompt_team_info; 
-    ompt_task_info_t    ompt_task_info; 
-    struct ompt_lw_taskteam_s *parent; 
-} ompt_lw_taskteam_t; 
- 
- 
-typedef struct ompt_parallel_info_s { 
-    ompt_task_id_t parent_task_id;    /* id of parent task            */ 
-    ompt_parallel_id_t parallel_id;   /* id of parallel region        */ 
-    ompt_frame_t *parent_task_frame;  /* frame data of parent task    */ 
-    void *parallel_function;          /* pointer to outlined function */ 
-} ompt_parallel_info_t; 
- 
- 
-typedef struct { 
-    ompt_state_t        state; 
-    ompt_wait_id_t      wait_id; 
-    void                *idle_frame; 
-} ompt_thread_info_t; 
- 
- 
-extern ompt_callbacks_t ompt_callbacks; 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-void ompt_pre_init(void); 
-void ompt_post_init(void); 
-void ompt_fini(void); 
- 
-extern int ompt_enabled; 
- 
-#ifdef __cplusplus 
-}; 
-#endif 
- 
-#endif 
+#ifndef __OMPT_INTERNAL_H__
+#define __OMPT_INTERNAL_H__
+
+#include "ompt.h"
+#include "ompt-event-specific.h"
+
+#define OMPT_VERSION 1
+
+#define _OMP_EXTERN extern "C"
+
+#define OMPT_INVOKER(x) \
+  ((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime)
+
+
+#define ompt_callback(e) e ## _callback
+
+
+typedef struct ompt_callbacks_s {
+#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event);
+
+    FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+} ompt_callbacks_t;
+
+
+
+typedef struct {
+    ompt_frame_t        frame;
+    void*               function;
+    ompt_task_id_t      task_id;
+} ompt_task_info_t;
+
+
+typedef struct {
+    ompt_parallel_id_t  parallel_id;
+    void                *microtask;
+} ompt_team_info_t;
+
+
+typedef struct ompt_lw_taskteam_s {
+    ompt_team_info_t    ompt_team_info;
+    ompt_task_info_t    ompt_task_info;
+    struct ompt_lw_taskteam_s *parent;
+} ompt_lw_taskteam_t;
+
+
+typedef struct ompt_parallel_info_s {
+    ompt_task_id_t parent_task_id;    /* id of parent task            */
+    ompt_parallel_id_t parallel_id;   /* id of parallel region        */
+    ompt_frame_t *parent_task_frame;  /* frame data of parent task    */
+    void *parallel_function;          /* pointer to outlined function */
+} ompt_parallel_info_t;
+
+
+typedef struct {
+    ompt_state_t        state;
+    ompt_wait_id_t      wait_id;
+    void                *idle_frame;
+} ompt_thread_info_t;
+
+
+extern ompt_callbacks_t ompt_callbacks;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void ompt_pre_init(void);
+void ompt_post_init(void);
+void ompt_fini(void);
+
+extern int ompt_enabled;
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
diff --git a/contrib/libs/cxxsupp/openmp/ompt-specific.c b/contrib/libs/cxxsupp/openmp/ompt-specific.c
index f718470c64..49f668af10 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-specific.c
+++ b/contrib/libs/cxxsupp/openmp/ompt-specific.c
@@ -1,332 +1,332 @@
-//****************************************************************************** 
-// include files 
-//****************************************************************************** 
- 
-#include "kmp.h" 
-#include "ompt-internal.h" 
-#include "ompt-specific.h" 
- 
-//****************************************************************************** 
-// macros 
-//****************************************************************************** 
- 
-#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0) 
- 
-#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; 
- 
-#define OMPT_THREAD_ID_BITS 16 
- 
-// 2013 08 24 - John Mellor-Crummey 
-//   ideally, a thread should assign its own ids based on thread private data. 
-//   however, the way the intel runtime reinitializes thread data structures 
-//   when it creates teams makes it difficult to maintain persistent thread 
-//   data. using a shared variable instead is simple. I leave it to intel to 
-//   sort out how to implement a higher performance version in their runtime. 
- 
-// when using fetch_and_add to generate the IDs, there isn't any reason to waste 
-// bits for thread id. 
-#if 0 
-#define NEXT_ID(id_ptr,tid) \ 
-  ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) 
-#else 
-#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) 
-#endif 
- 
-//****************************************************************************** 
-// private operations 
-//****************************************************************************** 
- 
-//---------------------------------------------------------- 
-// traverse the team and task hierarchy 
-// note: __ompt_get_teaminfo and __ompt_get_taskinfo 
-//       traverse the hierarchy similarly and need to be 
-//       kept consistent 
-//---------------------------------------------------------- 
- 
-ompt_team_info_t * 
-__ompt_get_teaminfo(int depth, int *size) 
-{ 
-    kmp_info_t *thr = ompt_get_thread(); 
- 
-    if (thr) { 
-        kmp_team *team = thr->th.th_team; 
-        if (team == NULL) return NULL; 
- 
-        ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); 
- 
-        while(depth > 0) { 
-            // next lightweight team (if any) 
-            if (lwt) lwt = lwt->parent; 
- 
-            // next heavyweight team (if any) after 
-            // lightweight teams are exhausted 
-            if (!lwt && team) team=team->t.t_parent; 
- 
-            depth--; 
-        } 
- 
-        if (lwt) { 
-            // lightweight teams have one task 
-            if (size) *size = 1; 
- 
-            // return team info for lightweight team 
-            return &lwt->ompt_team_info; 
-        } else if (team) { 
-            // extract size from heavyweight team 
-            if (size) *size = team->t.t_nproc; 
- 
-            // return team info for heavyweight team 
-            return &team->t.ompt_team_info; 
-        } 
-    } 
- 
-    return NULL; 
-} 
- 
- 
-ompt_task_info_t * 
-__ompt_get_taskinfo(int depth) 
-{ 
-    ompt_task_info_t *info = NULL; 
-    kmp_info_t *thr = ompt_get_thread(); 
- 
-    if (thr) { 
-        kmp_taskdata_t  *taskdata = thr->th.th_current_task; 
-        ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); 
- 
-        while (depth > 0) { 
-            // next lightweight team (if any) 
-            if (lwt) lwt = lwt->parent; 
- 
-            // next heavyweight team (if any) after 
-            // lightweight teams are exhausted 
-            if (!lwt && taskdata) { 
-                taskdata = taskdata->td_parent; 
-                if (taskdata) { 
-                    lwt = LWT_FROM_TEAM(taskdata->td_team); 
-                } 
-            } 
-            depth--; 
-        } 
- 
-        if (lwt) { 
-            info = &lwt->ompt_task_info; 
-        } else if (taskdata) { 
-            info = &taskdata->ompt_task_info; 
-        } 
-    } 
- 
-    return info; 
-} 
- 
- 
- 
-//****************************************************************************** 
-// interface operations 
-//****************************************************************************** 
- 
-//---------------------------------------------------------- 
-// thread support 
-//---------------------------------------------------------- 
- 
-ompt_parallel_id_t 
-__ompt_thread_id_new() 
-{ 
-    static uint64_t ompt_thread_id = 1; 
-    return NEXT_ID(&ompt_thread_id, 0); 
-} 
- 
-void 
-__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) 
-{ 
-    ompt_callbacks.ompt_callback(ompt_event_thread_begin)( 
-        thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); 
-} 
- 
- 
-void 
-__ompt_thread_end(ompt_thread_type_t thread_type, int gtid) 
-{ 
-    ompt_callbacks.ompt_callback(ompt_event_thread_end)( 
-        thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); 
-} 
- 
- 
-ompt_thread_id_t 
-__ompt_get_thread_id_internal() 
-{ 
-    // FIXME 
-    // until we have a better way of assigning ids, use __kmp_get_gtid 
-    // since the return value might be negative, we need to test that before 
-    // assigning it to an ompt_thread_id_t, which is unsigned. 
-    int id = __kmp_get_gtid(); 
-    assert(id >= 0); 
- 
-    return GTID_TO_OMPT_THREAD_ID(id); 
-} 
- 
-//---------------------------------------------------------- 
-// state support 
-//---------------------------------------------------------- 
- 
-void 
-__ompt_thread_assign_wait_id(void *variable) 
-{ 
-    int gtid = __kmp_gtid_get_specific(); 
-    kmp_info_t *ti = ompt_get_thread_gtid(gtid); 
- 
-    ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable; 
-} 
- 
-ompt_state_t 
-__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) 
-{ 
-    kmp_info_t *ti = ompt_get_thread(); 
- 
-    if (ti) { 
-        if (ompt_wait_id) 
-            *ompt_wait_id = ti->th.ompt_thread_info.wait_id; 
-        return ti->th.ompt_thread_info.state; 
-    } 
-    return ompt_state_undefined; 
-} 
- 
-//---------------------------------------------------------- 
-// idle frame support 
-//---------------------------------------------------------- 
- 
-void * 
-__ompt_get_idle_frame_internal(void) 
-{ 
-    kmp_info_t *ti = ompt_get_thread(); 
-    return ti ? ti->th.ompt_thread_info.idle_frame : NULL; 
-} 
- 
- 
-//---------------------------------------------------------- 
-// parallel region support 
-//---------------------------------------------------------- 
- 
-ompt_parallel_id_t 
-__ompt_parallel_id_new(int gtid) 
-{ 
-    static uint64_t ompt_parallel_id = 1; 
-    return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; 
-} 
- 
- 
-void * 
-__ompt_get_parallel_function_internal(int depth) 
-{ 
-    ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); 
-    void *function = info ? info->microtask : NULL; 
-    return function; 
-} 
- 
- 
-ompt_parallel_id_t 
-__ompt_get_parallel_id_internal(int depth) 
-{ 
-    ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); 
-    ompt_parallel_id_t id = info ? info->parallel_id : 0; 
-    return id; 
-} 
- 
- 
-int 
-__ompt_get_parallel_team_size_internal(int depth) 
-{ 
-    // initialize the return value with the error value. 
-    // if there is a team at the specified depth, the default 
-    // value will be overwritten the size of that team. 
-    int size = -1; 
-    (void) __ompt_get_teaminfo(depth, &size); 
-    return size; 
-} 
- 
- 
-//---------------------------------------------------------- 
-// lightweight task team support 
-//---------------------------------------------------------- 
- 
-void 
-__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, 
-                        int gtid, void *microtask, 
-                        ompt_parallel_id_t ompt_pid) 
-{ 
-    lwt->ompt_team_info.parallel_id = ompt_pid; 
-    lwt->ompt_team_info.microtask = microtask; 
-    lwt->ompt_task_info.task_id = 0; 
-    lwt->ompt_task_info.frame.reenter_runtime_frame = 0; 
-    lwt->ompt_task_info.frame.exit_runtime_frame = 0; 
-    lwt->ompt_task_info.function = NULL; 
-    lwt->parent = 0; 
-} 
- 
- 
-void 
-__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt,  kmp_info_t *thr) 
-{ 
-    ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; 
-    lwt->parent = my_parent; 
-    thr->th.th_team->t.ompt_serialized_team_info = lwt; 
-} 
- 
- 
-ompt_lw_taskteam_t * 
-__ompt_lw_taskteam_unlink(kmp_info_t *thr) 
-{ 
-    ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; 
-    if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; 
-    return lwtask; 
-} 
- 
- 
-//---------------------------------------------------------- 
-// task support 
-//---------------------------------------------------------- 
- 
-ompt_task_id_t 
-__ompt_task_id_new(int gtid) 
-{ 
-    static uint64_t ompt_task_id = 1; 
-    return NEXT_ID(&ompt_task_id, gtid); 
-} 
- 
- 
-ompt_task_id_t 
-__ompt_get_task_id_internal(int depth) 
-{ 
-    ompt_task_info_t *info = __ompt_get_taskinfo(depth); 
-    ompt_task_id_t task_id = info ?  info->task_id : 0; 
-    return task_id; 
-} 
- 
- 
-void * 
-__ompt_get_task_function_internal(int depth) 
-{ 
-    ompt_task_info_t *info = __ompt_get_taskinfo(depth); 
-    void *function = info ? info->function : NULL; 
-    return function; 
-} 
- 
- 
-ompt_frame_t * 
-__ompt_get_task_frame_internal(int depth) 
-{ 
-    ompt_task_info_t *info = __ompt_get_taskinfo(depth); 
-    ompt_frame_t *frame = info ? frame = &info->frame : NULL; 
-    return frame; 
-} 
- 
- 
-//---------------------------------------------------------- 
-// team support 
-//---------------------------------------------------------- 
- 
-void 
-__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) 
-{ 
-    team->t.ompt_team_info.parallel_id = ompt_pid; 
-} 
+//******************************************************************************
+// include files
+//******************************************************************************
+
+#include "kmp.h"
+#include "ompt-internal.h"
+#include "ompt-specific.h"
+
+//******************************************************************************
+// macros
+//******************************************************************************
+
+#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0)
+
+#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info;
+
+#define OMPT_THREAD_ID_BITS 16
+
+// 2013 08 24 - John Mellor-Crummey
+//   ideally, a thread should assign its own ids based on thread private data.
+//   however, the way the intel runtime reinitializes thread data structures
+//   when it creates teams makes it difficult to maintain persistent thread
+//   data. using a shared variable instead is simple. I leave it to intel to
+//   sort out how to implement a higher performance version in their runtime.
+
+// when using fetch_and_add to generate the IDs, there isn't any reason to waste
+// bits for thread id.
+#if 0
+#define NEXT_ID(id_ptr,tid) \
+  ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid))
+#else
+#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
+#endif
+
+//******************************************************************************
+// private operations
+//******************************************************************************
+
+//----------------------------------------------------------
+// traverse the team and task hierarchy
+// note: __ompt_get_teaminfo and __ompt_get_taskinfo
+//       traverse the hierarchy similarly and need to be
+//       kept consistent
+//----------------------------------------------------------
+
+ompt_team_info_t *
+__ompt_get_teaminfo(int depth, int *size)
+{
+    kmp_info_t *thr = ompt_get_thread();
+
+    if (thr) {
+        kmp_team *team = thr->th.th_team;
+        if (team == NULL) return NULL;
+
+        ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
+
+        while(depth > 0) {
+            // next lightweight team (if any)
+            if (lwt) lwt = lwt->parent;
+
+            // next heavyweight team (if any) after
+            // lightweight teams are exhausted
+            if (!lwt && team) team=team->t.t_parent;
+
+            depth--;
+        }
+
+        if (lwt) {
+            // lightweight teams have one task
+            if (size) *size = 1;
+
+            // return team info for lightweight team
+            return &lwt->ompt_team_info;
+        } else if (team) {
+            // extract size from heavyweight team
+            if (size) *size = team->t.t_nproc;
+
+            // return team info for heavyweight team
+            return &team->t.ompt_team_info;
+        }
+    }
+
+    return NULL;
+}
+
+
+ompt_task_info_t *
+__ompt_get_taskinfo(int depth)
+{
+    ompt_task_info_t *info = NULL;
+    kmp_info_t *thr = ompt_get_thread();
+
+    if (thr) {
+        kmp_taskdata_t  *taskdata = thr->th.th_current_task;
+        ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
+
+        while (depth > 0) {
+            // next lightweight team (if any)
+            if (lwt) lwt = lwt->parent;
+
+            // next heavyweight team (if any) after
+            // lightweight teams are exhausted
+            if (!lwt && taskdata) {
+                taskdata = taskdata->td_parent;
+                if (taskdata) {
+                    lwt = LWT_FROM_TEAM(taskdata->td_team);
+                }
+            }
+            depth--;
+        }
+
+        if (lwt) {
+            info = &lwt->ompt_task_info;
+        } else if (taskdata) {
+            info = &taskdata->ompt_task_info;
+        }
+    }
+
+    return info;
+}
+
+
+
+//******************************************************************************
+// interface operations
+//******************************************************************************
+
+//----------------------------------------------------------
+// thread support
+//----------------------------------------------------------
+
+ompt_parallel_id_t
+__ompt_thread_id_new()
+{
+    static uint64_t ompt_thread_id = 1;
+    return NEXT_ID(&ompt_thread_id, 0);
+}
+
+void
+__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid)
+{
+    ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
+        thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
+}
+
+
+void
+__ompt_thread_end(ompt_thread_type_t thread_type, int gtid)
+{
+    ompt_callbacks.ompt_callback(ompt_event_thread_end)(
+        thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
+}
+
+
+ompt_thread_id_t
+__ompt_get_thread_id_internal()
+{
+    // FIXME
+    // until we have a better way of assigning ids, use __kmp_get_gtid
+    // since the return value might be negative, we need to test that before
+    // assigning it to an ompt_thread_id_t, which is unsigned.
+    int id = __kmp_get_gtid();
+    assert(id >= 0);
+
+    return GTID_TO_OMPT_THREAD_ID(id);
+}
+
+//----------------------------------------------------------
+// state support
+//----------------------------------------------------------
+
+void
+__ompt_thread_assign_wait_id(void *variable)
+{
+    int gtid = __kmp_gtid_get_specific();
+    kmp_info_t *ti = ompt_get_thread_gtid(gtid);
+
+    ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable;
+}
+
+ompt_state_t
+__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id)
+{
+    kmp_info_t *ti = ompt_get_thread();
+
+    if (ti) {
+        if (ompt_wait_id)
+            *ompt_wait_id = ti->th.ompt_thread_info.wait_id;
+        return ti->th.ompt_thread_info.state;
+    }
+    return ompt_state_undefined;
+}
+
+//----------------------------------------------------------
+// idle frame support
+//----------------------------------------------------------
+
+void *
+__ompt_get_idle_frame_internal(void)
+{
+    kmp_info_t *ti = ompt_get_thread();
+    return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
+}
+
+
+//----------------------------------------------------------
+// parallel region support
+//----------------------------------------------------------
+
+ompt_parallel_id_t
+__ompt_parallel_id_new(int gtid)
+{
+    static uint64_t ompt_parallel_id = 1;
+    return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
+}
+
+
+void *
+__ompt_get_parallel_function_internal(int depth)
+{
+    ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
+    void *function = info ? info->microtask : NULL;
+    return function;
+}
+
+
+ompt_parallel_id_t
+__ompt_get_parallel_id_internal(int depth)
+{
+    ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
+    ompt_parallel_id_t id = info ? info->parallel_id : 0;
+    return id;
+}
+
+
+int
+__ompt_get_parallel_team_size_internal(int depth)
+{
+    // initialize the return value with the error value.
+    // if there is a team at the specified depth, the default
+    // value will be overwritten the size of that team.
+    int size = -1;
+    (void) __ompt_get_teaminfo(depth, &size);
+    return size;
+}
+
+
+//----------------------------------------------------------
+// lightweight task team support
+//----------------------------------------------------------
+
+void
+__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
+                        int gtid, void *microtask,
+                        ompt_parallel_id_t ompt_pid)
+{
+    lwt->ompt_team_info.parallel_id = ompt_pid;
+    lwt->ompt_team_info.microtask = microtask;
+    lwt->ompt_task_info.task_id = 0;
+    lwt->ompt_task_info.frame.reenter_runtime_frame = 0;
+    lwt->ompt_task_info.frame.exit_runtime_frame = 0;
+    lwt->ompt_task_info.function = NULL;
+    lwt->parent = 0;
+}
+
+
+void
+__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt,  kmp_info_t *thr)
+{
+    ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
+    lwt->parent = my_parent;
+    thr->th.th_team->t.ompt_serialized_team_info = lwt;
+}
+
+
+ompt_lw_taskteam_t *
+__ompt_lw_taskteam_unlink(kmp_info_t *thr)
+{
+    ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
+    if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
+    return lwtask;
+}
+
+
+//----------------------------------------------------------
+// task support
+//----------------------------------------------------------
+
+ompt_task_id_t
+__ompt_task_id_new(int gtid)
+{
+    static uint64_t ompt_task_id = 1;
+    return NEXT_ID(&ompt_task_id, gtid);
+}
+
+
+ompt_task_id_t
+__ompt_get_task_id_internal(int depth)
+{
+    ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+    ompt_task_id_t task_id = info ?  info->task_id : 0;
+    return task_id;
+}
+
+
+void *
+__ompt_get_task_function_internal(int depth)
+{
+    ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+    void *function = info ? info->function : NULL;
+    return function;
+}
+
+
+ompt_frame_t *
+__ompt_get_task_frame_internal(int depth)
+{
+    ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+    ompt_frame_t *frame = info ? frame = &info->frame : NULL;
+    return frame;
+}
+
+
+//----------------------------------------------------------
+// team support
+//----------------------------------------------------------
+
+void
+__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid)
+{
+    team->t.ompt_team_info.parallel_id = ompt_pid;
+}
diff --git a/contrib/libs/cxxsupp/openmp/ompt-specific.h b/contrib/libs/cxxsupp/openmp/ompt-specific.h
index 2cc213ff14..e8f84a9a58 100644
--- a/contrib/libs/cxxsupp/openmp/ompt-specific.h
+++ b/contrib/libs/cxxsupp/openmp/ompt-specific.h
@@ -1,90 +1,90 @@
-#ifndef OMPT_SPECIFIC_H 
-#define OMPT_SPECIFIC_H 
- 
-#include "kmp.h" 
- 
-/***************************************************************************** 
- * types 
- ****************************************************************************/ 
- 
-typedef kmp_info_t ompt_thread_t; 
- 
- 
- 
-/***************************************************************************** 
- * forward declarations 
- ****************************************************************************/ 
- 
-void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid); 
-void __ompt_thread_assign_wait_id(void *variable); 
- 
-void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, 
-                             int gtid, void *microtask, 
-                             ompt_parallel_id_t ompt_pid); 
- 
-void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt,  ompt_thread_t *thr); 
- 
-ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr); 
- 
-ompt_parallel_id_t __ompt_parallel_id_new(int gtid); 
-ompt_task_id_t __ompt_task_id_new(int gtid); 
- 
-ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); 
- 
-ompt_task_info_t *__ompt_get_taskinfo(int depth); 
- 
-void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid); 
- 
-void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid); 
- 
-int __ompt_get_parallel_team_size_internal(int ancestor_level); 
- 
-ompt_task_id_t __ompt_get_task_id_internal(int depth); 
- 
-ompt_frame_t *__ompt_get_task_frame_internal(int depth); 
- 
- 
- 
-/***************************************************************************** 
- * macros 
- ****************************************************************************/ 
- 
-#define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE 
-#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI 
-#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) 
- 
- 
- 
-//****************************************************************************** 
-// inline functions 
-//****************************************************************************** 
- 
-inline ompt_thread_t * 
-ompt_get_thread_gtid(int gtid) 
-{ 
-    return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; 
-} 
- 
- 
-inline ompt_thread_t * 
-ompt_get_thread() 
-{ 
-    int gtid = __kmp_gtid_get_specific(); 
-    return ompt_get_thread_gtid(gtid); 
-} 
- 
- 
-inline void  
-ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) 
-{ 
-    thread->th.ompt_thread_info.state = state;  
-} 
- 
- 
-inline const char * 
-ompt_get_runtime_version() 
-{ 
-    return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; 
-} 
- 
-#endif 
+#ifndef OMPT_SPECIFIC_H
+#define OMPT_SPECIFIC_H
+
+#include "kmp.h"
+
+/*****************************************************************************
+ * types
+ ****************************************************************************/
+
+typedef kmp_info_t ompt_thread_t;
+
+
+
+/*****************************************************************************
+ * forward declarations
+ ****************************************************************************/
+
+void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid);
+void __ompt_thread_assign_wait_id(void *variable);
+
+void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr,
+                             int gtid, void *microtask,
+                             ompt_parallel_id_t ompt_pid);
+
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt,  ompt_thread_t *thr);
+
+ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr);
+
+ompt_parallel_id_t __ompt_parallel_id_new(int gtid);
+ompt_task_id_t __ompt_task_id_new(int gtid);
+
+ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size);
+
+ompt_task_info_t *__ompt_get_taskinfo(int depth);
+
+void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid);
+
+void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid);
+
+int __ompt_get_parallel_team_size_internal(int ancestor_level);
+
+ompt_task_id_t __ompt_get_task_id_internal(int depth);
+
+ompt_frame_t *__ompt_get_task_frame_internal(int depth);
+
+
+
+/*****************************************************************************
+ * macros
+ ****************************************************************************/
+
+#define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE
+#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI
+#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle)
+
+
+
+//******************************************************************************
+// inline functions
+//******************************************************************************
+
+inline ompt_thread_t *
+ompt_get_thread_gtid(int gtid)
+{
+    return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL;
+}
+
+
+inline ompt_thread_t *
+ompt_get_thread()
+{
+    int gtid = __kmp_gtid_get_specific();
+    return ompt_get_thread_gtid(gtid);
+}
+
+
+inline void 
+ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state)
+{
+    thread->th.ompt_thread_info.state = state; 
+}
+
+
+inline const char *
+ompt_get_runtime_version()
+{
+    return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
+}
+
+#endif
diff --git a/contrib/libs/cxxsupp/openmp/test-touch.c b/contrib/libs/cxxsupp/openmp/test-touch.c
index f724702df5..6ce529ae23 100644
--- a/contrib/libs/cxxsupp/openmp/test-touch.c
+++ b/contrib/libs/cxxsupp/openmp/test-touch.c
@@ -1,31 +1,31 @@
-// test-touch.c // 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
-extern double omp_get_wtime(); 
-extern int    omp_get_num_threads(); 
-extern int    omp_get_max_threads(); 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-int main() { 
-    omp_get_wtime(); 
-    omp_get_num_threads(); 
-    omp_get_max_threads(); 
-    return 0; 
-} 
- 
-// end of file // 
+// test-touch.c //
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern double omp_get_wtime();
+extern int    omp_get_num_threads();
+extern int    omp_get_max_threads();
+#ifdef __cplusplus
+}
+#endif
+
+int main() {
+    omp_get_wtime();
+    omp_get_num_threads();
+    omp_get_max_threads();
+    return 0;
+}
+
+// end of file //
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h
index 50ccc374d9..4b242fdd8f 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/disable_warnings.h
@@ -1,29 +1,29 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "ittnotify_config.h" 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
- 
-#pragma warning (disable: 593)   /* parameter "XXXX" was set but never used                 */ 
-#pragma warning (disable: 344)   /* typedef name has already been declared (with same type) */ 
-#pragma warning (disable: 174)   /* expression has no effect                                */ 
-#pragma warning (disable: 4127)  /* conditional expression is constant                      */ 
-#pragma warning (disable: 4306)  /* conversion from '?' to '?' of greater size              */ 
- 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#if defined __INTEL_COMPILER 
- 
-#pragma warning (disable: 869)  /* parameter "XXXXX" was never referenced                  */ 
-#pragma warning (disable: 1418) /* external function definition with no prior declaration  */ 
-#pragma warning (disable: 1419) /* external declaration in primary source file             */ 
- 
-#endif /* __INTEL_COMPILER */ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+
+#pragma warning (disable: 593)   /* parameter "XXXX" was set but never used                 */
+#pragma warning (disable: 344)   /* typedef name has already been declared (with same type) */
+#pragma warning (disable: 174)   /* expression has no effect                                */
+#pragma warning (disable: 4127)  /* conditional expression is constant                      */
+#pragma warning (disable: 4306)  /* conversion from '?' to '?' of greater size              */
+
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if defined __INTEL_COMPILER
+
+#pragma warning (disable: 869)  /* parameter "XXXXX" was never referenced                  */
+#pragma warning (disable: 1418) /* external function definition with no prior declaration  */
+#pragma warning (disable: 1419) /* external declaration in primary source file             */
+
+#endif /* __INTEL_COMPILER */
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h
index 0d0ad7938a..c3792f30a0 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify.h
@@ -1,3804 +1,3804 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef _ITTNOTIFY_H_ 
-#define _ITTNOTIFY_H_ 
- 
-/** 
-@file 
-@brief Public User API functions and types 
-@mainpage 
- 
-The ITT API is used to annotate a user's program with additional information 
-that can be used by correctness and performance tools. The user inserts 
-calls in their program. Those calls generate information that is collected 
-at runtime, and used by Intel(R) Threading Tools. 
- 
-@section API Concepts 
-The following general concepts are used throughout the API. 
- 
-@subsection Unicode Support 
-Many API functions take character string arguments. On Windows, there 
-are two versions of each such function. The function name is suffixed 
-by W if Unicode support is enabled, and by A otherwise. Any API function 
-that takes a character string argument adheres to this convention. 
- 
-@subsection Conditional Compilation 
-Many users prefer having an option to modify ITT API code when linking it 
-inside their runtimes. ITT API header file provides a mechanism to replace 
-ITT API function names inside your code with empty strings. To do this, 
-define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the 
-static library from the linker script. 
- 
-@subsection Domains 
-[see domains] 
-Domains provide a way to separate notification for different modules or 
-libraries in a program. Domains are specified by dotted character strings, 
-e.g. TBB.Internal.Control. 
- 
-A mechanism (to be specified) is provided to enable and disable 
-domains. By default, all domains are enabled. 
-@subsection Named Entities and Instances 
-Named entities (frames, regions, tasks, and markers) communicate 
-information about the program to the analysis tools. A named entity often 
-refers to a section of program code, or to some set of logical concepts 
-that the programmer wants to group together. 
- 
-Named entities relate to the programmer's static view of the program. When 
-the program actually executes, many instances of a given named entity 
-may be created. 
- 
-The API annotations denote instances of named entities. The actual 
-named entities are displayed using the analysis tools. In other words, 
-the named entities come into existence when instances are created. 
- 
-Instances of named entities may have instance identifiers (IDs). Some 
-API calls use instance identifiers to create relationships between 
-different instances of named entities. Other API calls associate data 
-with instances of named entities. 
- 
-Some named entities must always have instance IDs. In particular, regions 
-and frames always have IDs. Task and markers need IDs only if the ID is 
-needed in another API call (such as adding a relation or metadata). 
- 
-The lifetime of instance IDs is distinct from the lifetime of 
-instances. This allows various relationships to be specified separate 
-from the actual execution of instances. This flexibility comes at the 
-expense of extra API calls. 
- 
-The same ID may not be reused for different instances, unless a previous 
-[ref] __itt_id_destroy call for that ID has been issued. 
-*/ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef ITT_OS_WIN 
-#  define ITT_OS_WIN   1 
-#endif /* ITT_OS_WIN */ 
- 
-#ifndef ITT_OS_LINUX 
-#  define ITT_OS_LINUX 2 
-#endif /* ITT_OS_LINUX */ 
- 
-#ifndef ITT_OS_MAC 
-#  define ITT_OS_MAC   3 
-#endif /* ITT_OS_MAC */ 
- 
-#ifndef ITT_OS 
-#  if defined WIN32 || defined _WIN32 
-#    define ITT_OS ITT_OS_WIN 
-#  elif defined( __APPLE__ ) && defined( __MACH__ ) 
-#    define ITT_OS ITT_OS_MAC 
-#  else 
-#    define ITT_OS ITT_OS_LINUX 
-#  endif 
-#endif /* ITT_OS */ 
- 
-#ifndef ITT_PLATFORM_WIN 
-#  define ITT_PLATFORM_WIN 1 
-#endif /* ITT_PLATFORM_WIN */ 
- 
-#ifndef ITT_PLATFORM_POSIX 
-#  define ITT_PLATFORM_POSIX 2 
-#endif /* ITT_PLATFORM_POSIX */ 
- 
-#ifndef ITT_PLATFORM_MAC 
-#  define ITT_PLATFORM_MAC 3 
-#endif /* ITT_PLATFORM_MAC */ 
- 
-#ifndef ITT_PLATFORM 
-#  if ITT_OS==ITT_OS_WIN 
-#    define ITT_PLATFORM ITT_PLATFORM_WIN 
-#  elif ITT_OS==ITT_OS_MAC 
-#    define ITT_PLATFORM ITT_PLATFORM_MAC 
-#  else 
-#    define ITT_PLATFORM ITT_PLATFORM_POSIX 
-#  endif 
-#endif /* ITT_PLATFORM */ 
- 
-#if defined(_UNICODE) && !defined(UNICODE) 
-#define UNICODE 
-#endif 
- 
-#include <stddef.h> 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#include <tchar.h> 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#include <stdint.h> 
-#if defined(UNICODE) || defined(_UNICODE) 
-#include <wchar.h> 
-#endif /* UNICODE || _UNICODE */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#ifndef CDECL 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    define CDECL __cdecl 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#    if defined _M_IX86 || defined __i386__  
-#      define CDECL __attribute__ ((cdecl)) 
-#    else  /* _M_IX86 || __i386__ */ 
-#      define CDECL /* actual only on x86 platform */ 
-#    endif /* _M_IX86 || __i386__ */ 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* CDECL */ 
- 
-#ifndef STDCALL 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    define STDCALL __stdcall 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _ITTNOTIFY_H_
+#define _ITTNOTIFY_H_
+
+/**
+@file
+@brief Public User API functions and types
+@mainpage
+
+The ITT API is used to annotate a user's program with additional information
+that can be used by correctness and performance tools. The user inserts
+calls in their program. Those calls generate information that is collected
+at runtime, and used by Intel(R) Threading Tools.
+
+@section API Concepts
+The following general concepts are used throughout the API.
+
+@subsection Unicode Support
+Many API functions take character string arguments. On Windows, there
+are two versions of each such function. The function name is suffixed
+by W if Unicode support is enabled, and by A otherwise. Any API function
+that takes a character string argument adheres to this convention.
+
+@subsection Conditional Compilation
+Many users prefer having an option to modify ITT API code when linking it
+inside their runtimes. ITT API header file provides a mechanism to replace
+ITT API function names inside your code with empty strings. To do this,
+define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the
+static library from the linker script.
+
+@subsection Domains
+[see domains]
+Domains provide a way to separate notification for different modules or
+libraries in a program. Domains are specified by dotted character strings,
+e.g. TBB.Internal.Control.
+
+A mechanism (to be specified) is provided to enable and disable
+domains. By default, all domains are enabled.
+@subsection Named Entities and Instances
+Named entities (frames, regions, tasks, and markers) communicate
+information about the program to the analysis tools. A named entity often
+refers to a section of program code, or to some set of logical concepts
+that the programmer wants to group together.
+
+Named entities relate to the programmer's static view of the program. When
+the program actually executes, many instances of a given named entity
+may be created.
+
+The API annotations denote instances of named entities. The actual
+named entities are displayed using the analysis tools. In other words,
+the named entities come into existence when instances are created.
+
+Instances of named entities may have instance identifiers (IDs). Some
+API calls use instance identifiers to create relationships between
+different instances of named entities. Other API calls associate data
+with instances of named entities.
+
+Some named entities must always have instance IDs. In particular, regions
+and frames always have IDs. Task and markers need IDs only if the ID is
+needed in another API call (such as adding a relation or metadata).
+
+The lifetime of instance IDs is distinct from the lifetime of
+instances. This allows various relationships to be specified separate
+from the actual execution of instances. This flexibility comes at the
+expense of extra API calls.
+
+The same ID may not be reused for different instances, unless a previous
+[ref] __itt_id_destroy call for that ID has been issued.
+*/
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+#  define ITT_OS_WIN   1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+#  define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+#  define ITT_OS_MAC   3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS
+#  if defined WIN32 || defined _WIN32
+#    define ITT_OS ITT_OS_WIN
+#  elif defined( __APPLE__ ) && defined( __MACH__ )
+#    define ITT_OS ITT_OS_MAC
+#  else
+#    define ITT_OS ITT_OS_LINUX
+#  endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+#  define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+#  define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM
+#  if ITT_OS==ITT_OS_WIN
+#    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
+#  else
+#    define ITT_PLATFORM ITT_PLATFORM_POSIX
+#  endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef CDECL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define CDECL __cdecl
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #    if defined _M_IX86 || defined __i386__ 
-#      define STDCALL __attribute__ ((stdcall))  
-#    else  /* _M_IX86 || __i386__ */ 
-#      define STDCALL /* supported only on x86 platform */ 
-#    endif /* _M_IX86 || __i386__ */ 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* STDCALL */ 
- 
-#define ITTAPI    CDECL 
-#define LIBITTAPI CDECL 
- 
-/* TODO: Temporary for compatibility! */ 
-#define ITTAPI_CALL    CDECL 
-#define LIBITTAPI_CALL CDECL 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-/* use __forceinline (VC++ specific) */ 
-#define ITT_INLINE           __forceinline 
-#define ITT_INLINE_ATTRIBUTE /* nothing */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-/* 
- * Generally, functions are not inlined unless optimization is specified. 
- * For functions declared inline, this attribute inlines the function even 
- * if no optimization level was specified. 
- */ 
-#ifdef __STRICT_ANSI__ 
-#define ITT_INLINE           static 
-#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) 
-#else  /* __STRICT_ANSI__ */ 
-#define ITT_INLINE           static inline 
-#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) 
-#endif /* __STRICT_ANSI__ */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-/** @endcond */ 
- 
-#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#    warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#  include "legacy/ittnotify.h" 
-#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ 
- 
-/** @cond exclude_from_documentation */ 
-/* Helper macro for joining tokens */ 
-#define ITT_JOIN_AUX(p,n) p##n 
-#define ITT_JOIN(p,n)     ITT_JOIN_AUX(p,n) 
- 
-#ifdef ITT_MAJOR 
-#undef ITT_MAJOR 
-#endif 
-#ifdef ITT_MINOR 
-#undef ITT_MINOR 
-#endif 
-#define ITT_MAJOR     3 
-#define ITT_MINOR     0 
- 
-/* Standard versioning of a token with major and minor version numbers */ 
-#define ITT_VERSIONIZE(x)    \ 
-    ITT_JOIN(x,              \ 
-    ITT_JOIN(_,              \ 
-    ITT_JOIN(ITT_MAJOR,      \ 
-    ITT_JOIN(_, ITT_MINOR)))) 
- 
-#ifndef INTEL_ITTNOTIFY_PREFIX 
-#  define INTEL_ITTNOTIFY_PREFIX __itt_ 
-#endif /* INTEL_ITTNOTIFY_PREFIX */ 
-#ifndef INTEL_ITTNOTIFY_POSTFIX 
-#  define INTEL_ITTNOTIFY_POSTFIX _ptr_ 
-#endif /* INTEL_ITTNOTIFY_POSTFIX */ 
- 
-#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) 
-#define ITTNOTIFY_NAME(n)     ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) 
- 
-#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) 
-#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n) 
- 
-#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) 
-#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) 
-#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) 
-#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) 
-#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) 
-#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) 
-#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) 
-#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d) 
-#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x) 
-#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y) 
-#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z) 
-#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) 
-#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) 
-#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) 
- 
-#ifdef ITT_STUB 
-#undef ITT_STUB 
-#endif 
-#ifdef ITT_STUBV 
-#undef ITT_STUBV 
-#endif 
-#define ITT_STUBV(api,type,name,args)                             \ 
-    typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args;   \ 
-    extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); 
-#define ITT_STUB ITT_STUBV 
-/** @endcond */ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif /* __cplusplus */ 
- 
-/** @cond exclude_from_gpa_documentation */ 
-/** 
- * @defgroup public Public API 
- * @{ 
- * @} 
- */ 
- 
-/** 
- * @defgroup control Collection Control 
- * @ingroup public 
- * General behavior: application continues to run, but no profiling information is being collected 
- * 
- * Pausing occurs not only for the current thread but for all process as well as spawned processes 
- * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: 
- *   - Does not analyze or report errors that involve memory access. 
- *   - Other errors are reported as usual. Pausing data collection in 
- *     Intel(R) Parallel Inspector and Intel(R) Inspector XE 
- *     only pauses tracing and analyzing memory access. 
- *     It does not pause tracing or analyzing threading APIs. 
- *   . 
- * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: 
- *   - Does continue to record when new threads are started. 
- *   . 
- * - Other effects: 
- *   - Possible reduction of runtime overhead. 
- *   . 
- * @{ 
- */ 
-/** @brief Pause collection */ 
-void ITTAPI __itt_pause(void); 
-/** @brief Resume collection */ 
-void ITTAPI __itt_resume(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, pause,  (void)) 
-ITT_STUBV(ITTAPI, void, resume, (void)) 
-#define __itt_pause      ITTNOTIFY_VOID(pause) 
-#define __itt_pause_ptr  ITTNOTIFY_NAME(pause) 
-#define __itt_resume     ITTNOTIFY_VOID(resume) 
-#define __itt_resume_ptr ITTNOTIFY_NAME(resume) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_pause() 
-#define __itt_pause_ptr  0 
-#define __itt_resume() 
-#define __itt_resume_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_pause_ptr  0 
-#define __itt_resume_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} control group */ 
-/** @endcond */ 
- 
-/** 
- * @defgroup threads Threads 
- * @ingroup public 
- * Give names to threads 
- * @{ 
- */ 
-/** 
- * @brief Sets thread name of calling thread 
- * @param[in] name - name of thread 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_thread_set_nameA(const char    *name); 
-void ITTAPI __itt_thread_set_nameW(const wchar_t *name); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_thread_set_name     __itt_thread_set_nameW 
-#  define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr 
-#else /* UNICODE */ 
-#  define __itt_thread_set_name     __itt_thread_set_nameA 
-#  define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-void ITTAPI __itt_thread_set_name(const char *name); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name)) 
-ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, thread_set_name,  (const char    *name)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_thread_set_nameA     ITTNOTIFY_VOID(thread_set_nameA) 
-#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) 
-#define __itt_thread_set_nameW     ITTNOTIFY_VOID(thread_set_nameW) 
-#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_thread_set_name     ITTNOTIFY_VOID(thread_set_name) 
-#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_thread_set_nameA(name) 
-#define __itt_thread_set_nameA_ptr 0 
-#define __itt_thread_set_nameW(name) 
-#define __itt_thread_set_nameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_thread_set_name(name) 
-#define __itt_thread_set_name_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_thread_set_nameA_ptr 0 
-#define __itt_thread_set_nameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_thread_set_name_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @cond exclude_from_gpa_documentation */ 
- 
-/** 
- * @brief Mark current thread as ignored from this point on, for the duration of its existence. 
- */ 
-void ITTAPI __itt_thread_ignore(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, thread_ignore, (void)) 
-#define __itt_thread_ignore     ITTNOTIFY_VOID(thread_ignore) 
-#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_thread_ignore() 
-#define __itt_thread_ignore_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_thread_ignore_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} threads group */ 
- 
-/** 
- * @defgroup suppress Error suppression 
- * @ingroup public 
- * General behavior: application continues to run, but errors are suppressed 
- * 
- * @{ 
- */ 
- 
-/*****************************************************************//** 
- * @name group of functions used for error suppression in correctness tools 
- *********************************************************************/ 
-/** @{ */ 
-/** 
- * @hideinitializer  
- * @brief possible value for suppression mask 
- */ 
-#define __itt_suppress_all_errors 0x7fffffff 
- 
-/** 
- * @hideinitializer  
- * @brief possible value for suppression mask (suppresses errors from threading analysis) 
- */ 
-#define __itt_suppress_threading_errors 0x000000ff 
- 
-/** 
- * @hideinitializer  
- * @brief possible value for suppression mask (suppresses errors from memory analysis) 
- */ 
-#define __itt_suppress_memory_errors 0x0000ff00 
- 
-/** 
- * @brief Start suppressing errors identified in mask on this thread 
- */ 
-void ITTAPI __itt_suppress_push(unsigned int mask); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) 
-#define __itt_suppress_push     ITTNOTIFY_VOID(suppress_push) 
-#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_suppress_push(mask) 
-#define __itt_suppress_push_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_suppress_push_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Undo the effects of the matching call to __itt_suppress_push   
- */ 
-void ITTAPI __itt_suppress_pop(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, suppress_pop, (void)) 
-#define __itt_suppress_pop     ITTNOTIFY_VOID(suppress_pop) 
-#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_suppress_pop() 
-#define __itt_suppress_pop_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_suppress_pop_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @enum __itt_model_disable 
- * @brief Enumerator for the disable methods 
- */ 
-typedef enum __itt_suppress_mode { 
-    __itt_unsuppress_range, 
-    __itt_suppress_range 
-} __itt_suppress_mode_t; 
- 
-/** 
- * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask 
- */ 
-void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) 
-#define __itt_suppress_mark_range     ITTNOTIFY_VOID(suppress_mark_range) 
-#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_suppress_mark_range(mask) 
-#define __itt_suppress_mark_range_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_suppress_mark_range_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Undo the effect of a matching call to __itt_suppress_mark_range.   If not matching 
- *        call is found, nothing is changed. 
- */ 
-void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) 
-#define __itt_suppress_clear_range     ITTNOTIFY_VOID(suppress_clear_range) 
-#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_suppress_clear_range(mask) 
-#define __itt_suppress_clear_range_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_suppress_clear_range_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} */ 
-/** @} suppress group */ 
- 
-/** 
- * @defgroup sync Synchronization 
- * @ingroup public 
- * Indicate user-written synchronization code 
- * @{ 
- */ 
-/** 
+#      define CDECL __attribute__ ((cdecl))
+#    else  /* _M_IX86 || __i386__ */
+#      define CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* CDECL */
+
+#ifndef STDCALL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define STDCALL __stdcall
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define STDCALL __attribute__ ((stdcall)) 
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI    CDECL
+#define LIBITTAPI CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL    CDECL
+#define LIBITTAPI_CALL CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE           static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else  /* __STRICT_ANSI__ */
+#define ITT_INLINE           static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro")
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro"
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#  include "legacy/ittnotify.h"
+#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n)     ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR     3
+#define ITT_MINOR     0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x)    \
+    ITT_JOIN(x,              \
+    ITT_JOIN(_,              \
+    ITT_JOIN(ITT_MAJOR,      \
+    ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+#  define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+#  define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n)     ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args)                             \
+    typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args;   \
+    extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup public Public API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup control Collection Control
+ * @ingroup public
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ *   - Does not analyze or report errors that involve memory access.
+ *   - Other errors are reported as usual. Pausing data collection in
+ *     Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ *     only pauses tracing and analyzing memory access.
+ *     It does not pause tracing or analyzing threading APIs.
+ *   .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ *   - Does continue to record when new threads are started.
+ *   .
+ * - Other effects:
+ *   - Possible reduction of runtime overhead.
+ *   .
+ * @{
+ */
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause,  (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+#define __itt_pause      ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr  ITTNOTIFY_NAME(pause)
+#define __itt_resume     ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr  0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr  0
+#define __itt_resume_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} control group */
+/** @endcond */
+
+/**
+ * @defgroup threads Threads
+ * @ingroup public
+ * Give names to threads
+ * @{
+ */
+/**
+ * @brief Sets thread name of calling thread
+ * @param[in] name - name of thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_thread_set_nameA(const char    *name);
+void ITTAPI __itt_thread_set_nameW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_thread_set_name     __itt_thread_set_nameW
+#  define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr
+#else /* UNICODE */
+#  define __itt_thread_set_name     __itt_thread_set_nameA
+#  define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_thread_set_name(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name))
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA     ITTNOTIFY_VOID(thread_set_nameA)
+#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA)
+#define __itt_thread_set_nameW     ITTNOTIFY_VOID(thread_set_nameW)
+#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name     ITTNOTIFY_VOID(thread_set_name)
+#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA(name)
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW(name)
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name(name)
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void ITTAPI __itt_thread_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, thread_ignore, (void))
+#define __itt_thread_ignore     ITTNOTIFY_VOID(thread_ignore)
+#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thread_ignore()
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} threads group */
+
+/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
  * @hideinitializer 
- * @brief possible value of attribute argument for sync object type 
- */ 
-#define __itt_attr_barrier 1 
- 
-/** 
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
  * @hideinitializer 
- * @brief possible value of attribute argument for sync object type 
- */ 
-#define __itt_attr_mutex   2 
- 
-/** 
-@brief Name a synchronization object 
-@param[in] addr       Handle for the synchronization object. You should 
-use a real address to uniquely identify the synchronization object. 
-@param[in] objtype    null-terminated object type string. If NULL is 
-passed, the name will be "User Synchronization". 
-@param[in] objname    null-terminated object name string. If NULL, 
-no name will be assigned to the object. 
-@param[in] attribute  one of [#__itt_attr_barrier, #__itt_attr_mutex] 
- */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_sync_createA(void *addr, const char    *objtype, const char    *objname, int attribute); 
-void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_sync_create     __itt_sync_createW 
-#  define __itt_sync_create_ptr __itt_sync_createW_ptr 
-#else /* UNICODE */ 
-#  define __itt_sync_create     __itt_sync_createA 
-#  define __itt_sync_create_ptr __itt_sync_createA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char    *objtype, const char    *objname, int attribute)) 
-ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, sync_create,  (void *addr, const char*    objtype, const char*    objname, int attribute)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_createA     ITTNOTIFY_VOID(sync_createA) 
-#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) 
-#define __itt_sync_createW     ITTNOTIFY_VOID(sync_createW) 
-#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_create     ITTNOTIFY_VOID(sync_create) 
-#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_createA(addr, objtype, objname, attribute) 
-#define __itt_sync_createA_ptr 0 
-#define __itt_sync_createW(addr, objtype, objname, attribute) 
-#define __itt_sync_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_create(addr, objtype, objname, attribute) 
-#define __itt_sync_create_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_createA_ptr 0 
-#define __itt_sync_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_create_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
-@brief Rename a synchronization object 
- 
-You can use the rename call to assign or reassign a name to a given 
-synchronization object. 
-@param[in] addr  handle for the synchronization object. 
-@param[in] name  null-terminated object name string. 
-*/ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_sync_renameA(void *addr, const char    *name); 
-void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_sync_rename     __itt_sync_renameW 
-#  define __itt_sync_rename_ptr __itt_sync_renameW_ptr 
-#else /* UNICODE */ 
-#  define __itt_sync_rename     __itt_sync_renameA 
-#  define __itt_sync_rename_ptr __itt_sync_renameA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-void ITTAPI __itt_sync_rename(void *addr, const char *name); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char    *name)) 
-ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, sync_rename,  (void *addr, const char    *name)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_renameA     ITTNOTIFY_VOID(sync_renameA) 
-#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) 
-#define __itt_sync_renameW     ITTNOTIFY_VOID(sync_renameW) 
-#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_rename     ITTNOTIFY_VOID(sync_rename) 
-#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_renameA(addr, name) 
-#define __itt_sync_renameA_ptr 0 
-#define __itt_sync_renameW(addr, name) 
-#define __itt_sync_renameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_rename(addr, name) 
-#define __itt_sync_rename_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_renameA_ptr 0 
-#define __itt_sync_renameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_rename_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- @brief Destroy a synchronization object. 
- @param addr Handle for the synchronization object. 
- */ 
-void ITTAPI __itt_sync_destroy(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) 
-#define __itt_sync_destroy     ITTNOTIFY_VOID(sync_destroy) 
-#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_sync_destroy(addr) 
-#define __itt_sync_destroy_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_sync_destroy_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/*****************************************************************//** 
- * @name group of functions is used for performance measurement tools 
- *********************************************************************/ 
-/** @{ */ 
-/** 
- * @brief Enter spin loop on user-defined sync object 
- */ 
-void ITTAPI __itt_sync_prepare(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) 
-#define __itt_sync_prepare     ITTNOTIFY_VOID(sync_prepare) 
-#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_sync_prepare(addr) 
-#define __itt_sync_prepare_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_sync_prepare_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Quit spin loop without acquiring spin object 
- */ 
-void ITTAPI __itt_sync_cancel(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) 
-#define __itt_sync_cancel     ITTNOTIFY_VOID(sync_cancel) 
-#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_sync_cancel(addr) 
-#define __itt_sync_cancel_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_sync_cancel_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Successful spin loop completion (sync object acquired) 
- */ 
-void ITTAPI __itt_sync_acquired(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) 
-#define __itt_sync_acquired     ITTNOTIFY_VOID(sync_acquired) 
-#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_sync_acquired(addr) 
-#define __itt_sync_acquired_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_sync_acquired_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Start sync object releasing code. Is called before the lock release call. 
- */ 
-void ITTAPI __itt_sync_releasing(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) 
-#define __itt_sync_releasing     ITTNOTIFY_VOID(sync_releasing) 
-#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_sync_releasing(addr) 
-#define __itt_sync_releasing_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_sync_releasing_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} */ 
- 
-/** @} sync group */ 
- 
-/**************************************************************//** 
- * @name group of functions is used for correctness checking tools 
- ******************************************************************/ 
-/** @{ */ 
-/** 
- * @ingroup legacy 
- * @deprecated Legacy API 
- * @brief Fast synchronization which does no require spinning. 
- * - This special function is to be used by TBB and OpenMP libraries only when they know 
- *   there is no spin but they need to suppress TC warnings about shared variable modifications. 
- * - It only has corresponding pointers in static library and does not have corresponding function 
- *   in dynamic library. 
- * @see void __itt_sync_prepare(void* addr); 
- */ 
-void ITTAPI __itt_fsync_prepare(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) 
-#define __itt_fsync_prepare     ITTNOTIFY_VOID(fsync_prepare) 
-#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_fsync_prepare(addr) 
-#define __itt_fsync_prepare_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_fsync_prepare_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup legacy 
- * @deprecated Legacy API 
- * @brief Fast synchronization which does no require spinning. 
- * - This special function is to be used by TBB and OpenMP libraries only when they know 
- *   there is no spin but they need to suppress TC warnings about shared variable modifications. 
- * - It only has corresponding pointers in static library and does not have corresponding function 
- *   in dynamic library. 
- * @see void __itt_sync_cancel(void *addr); 
- */ 
-void ITTAPI __itt_fsync_cancel(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) 
-#define __itt_fsync_cancel     ITTNOTIFY_VOID(fsync_cancel) 
-#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_fsync_cancel(addr) 
-#define __itt_fsync_cancel_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_fsync_cancel_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup legacy 
- * @deprecated Legacy API 
- * @brief Fast synchronization which does no require spinning. 
- * - This special function is to be used by TBB and OpenMP libraries only when they know 
- *   there is no spin but they need to suppress TC warnings about shared variable modifications. 
- * - It only has corresponding pointers in static library and does not have corresponding function 
- *   in dynamic library. 
- * @see void __itt_sync_acquired(void *addr); 
- */ 
-void ITTAPI __itt_fsync_acquired(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) 
-#define __itt_fsync_acquired     ITTNOTIFY_VOID(fsync_acquired) 
-#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_fsync_acquired(addr) 
-#define __itt_fsync_acquired_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_fsync_acquired_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup legacy 
- * @deprecated Legacy API 
- * @brief Fast synchronization which does no require spinning. 
- * - This special function is to be used by TBB and OpenMP libraries only when they know 
- *   there is no spin but they need to suppress TC warnings about shared variable modifications. 
- * - It only has corresponding pointers in static library and does not have corresponding function 
- *   in dynamic library. 
- * @see void __itt_sync_releasing(void* addr); 
- */ 
-void ITTAPI __itt_fsync_releasing(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) 
-#define __itt_fsync_releasing     ITTNOTIFY_VOID(fsync_releasing) 
-#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_fsync_releasing(addr) 
-#define __itt_fsync_releasing_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_fsync_releasing_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} */ 
- 
-/** 
- * @defgroup model Modeling by Intel(R) Parallel Advisor 
- * @ingroup public 
- * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. 
- * This API is called ONLY using annotate.h, by "Annotation" macros 
- * the user places in their sources during the parallelism modeling steps. 
- * 
- * site_begin/end and task_begin/end take the address of handle variables, 
- * which are writeable by the API.  Handles must be 0 initialized prior 
- * to the first call to begin, or may cause a run-time failure. 
- * The handles are initialized in a multi-thread safe way by the API if 
- * the handle is 0.  The commonly expected idiom is one static handle to 
- * identify a site or task.  If a site or task of the same name has already 
- * been started during this collection, the same handle MAY be returned, 
- * but is not required to be - it is unspecified if data merging is done 
- * based on name.  These routines also take an instance variable.  Like 
- * the lexical instance, these must be 0 initialized.  Unlike the lexical 
- * instance, this is used to track a single dynamic instance. 
- * 
- * API used by the Intel(R) Parallel Advisor to describe potential concurrency 
- * and related activities. User-added source annotations expand to calls 
- * to these procedures to enable modeling of a hypothetical concurrent 
- * execution serially. 
- * @{ 
- */ 
-#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) 
- 
-typedef void* __itt_model_site;             /*!< @brief handle for lexical site     */ 
-typedef void* __itt_model_site_instance;    /*!< @brief handle for dynamic instance */ 
-typedef void* __itt_model_task;             /*!< @brief handle for lexical site     */ 
-typedef void* __itt_model_task_instance;    /*!< @brief handle for dynamic instance */ 
- 
-/** 
- * @enum __itt_model_disable 
- * @brief Enumerator for the disable methods 
- */ 
-typedef enum { 
-    __itt_model_disable_observation, 
-    __itt_model_disable_collection 
-} __itt_model_disable; 
- 
-#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ 
- 
-/** 
- * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. 
- * 
- * site_begin/end model a potential concurrency site. 
- * site instances may be recursively nested with themselves. 
- * site_end exits the most recently started but unended site for the current 
- * thread.  The handle passed to end may be used to validate structure. 
- * Instances of a site encountered on different threads concurrently 
- * are considered completely distinct. If the site name for two different 
- * lexical sites match, it is unspecified whether they are treated as the 
- * same or different for data presentation. 
- */ 
-void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_model_site_beginW(const wchar_t *name); 
-#endif 
-void ITTAPI __itt_model_site_beginA(const char *name); 
-void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); 
-void ITTAPI __itt_model_site_end  (__itt_model_site *site, __itt_model_site_instance *instance); 
-void ITTAPI __itt_model_site_end_2(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_site_begin,  (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, model_site_beginW,  (const wchar_t *name)) 
-#endif 
-ITT_STUBV(ITTAPI, void, model_site_beginA,  (const char *name)) 
-ITT_STUBV(ITTAPI, void, model_site_beginAL,  (const char *name, size_t siteNameLen)) 
-ITT_STUBV(ITTAPI, void, model_site_end,    (__itt_model_site *site, __itt_model_site_instance *instance)) 
-ITT_STUBV(ITTAPI, void, model_site_end_2,  (void)) 
-#define __itt_model_site_begin      ITTNOTIFY_VOID(model_site_begin) 
-#define __itt_model_site_begin_ptr  ITTNOTIFY_NAME(model_site_begin) 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_model_site_beginW      ITTNOTIFY_VOID(model_site_beginW) 
-#define __itt_model_site_beginW_ptr  ITTNOTIFY_NAME(model_site_beginW) 
-#endif 
-#define __itt_model_site_beginA      ITTNOTIFY_VOID(model_site_beginA) 
-#define __itt_model_site_beginA_ptr  ITTNOTIFY_NAME(model_site_beginA) 
-#define __itt_model_site_beginAL      ITTNOTIFY_VOID(model_site_beginAL) 
-#define __itt_model_site_beginAL_ptr  ITTNOTIFY_NAME(model_site_beginAL) 
-#define __itt_model_site_end        ITTNOTIFY_VOID(model_site_end) 
-#define __itt_model_site_end_ptr    ITTNOTIFY_NAME(model_site_end) 
-#define __itt_model_site_end_2        ITTNOTIFY_VOID(model_site_end_2) 
-#define __itt_model_site_end_2_ptr    ITTNOTIFY_NAME(model_site_end_2) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_site_begin(site, instance, name) 
-#define __itt_model_site_begin_ptr  0 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_model_site_beginW(name) 
-#define __itt_model_site_beginW_ptr  0 
-#endif 
-#define __itt_model_site_beginA(name) 
-#define __itt_model_site_beginA_ptr  0 
-#define __itt_model_site_beginAL(name, siteNameLen) 
-#define __itt_model_site_beginAL_ptr  0 
-#define __itt_model_site_end(site, instance) 
-#define __itt_model_site_end_ptr    0 
-#define __itt_model_site_end_2() 
-#define __itt_model_site_end_2_ptr    0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_site_begin_ptr  0 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_model_site_beginW_ptr  0 
-#endif 
-#define __itt_model_site_beginA_ptr  0 
-#define __itt_model_site_beginAL_ptr  0 
-#define __itt_model_site_end_ptr    0 
-#define __itt_model_site_end_2_ptr    0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support 
- * 
- * task_begin/end model a potential task, which is contained within the most 
- * closely enclosing dynamic site.  task_end exits the most recently started 
- * but unended task.  The handle passed to end may be used to validate 
- * structure.  It is unspecified if bad dynamic nesting is detected.  If it 
- * is, it should be encoded in the resulting data collection.  The collector 
- * should not fail due to construct nesting issues, nor attempt to directly 
- * indicate the problem. 
- */ 
-void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_model_task_beginW(const wchar_t *name); 
-void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); 
-#endif 
-void ITTAPI __itt_model_task_beginA(const char *name); 
-void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); 
-void ITTAPI __itt_model_iteration_taskA(const char *name); 
-void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); 
-void ITTAPI __itt_model_task_end  (__itt_model_task *task, __itt_model_task_instance *instance); 
-void ITTAPI __itt_model_task_end_2(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_task_begin,  (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, model_task_beginW,  (const wchar_t *name)) 
-ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) 
-#endif 
-ITT_STUBV(ITTAPI, void, model_task_beginA,  (const char *name)) 
-ITT_STUBV(ITTAPI, void, model_task_beginAL,  (const char *name, size_t taskNameLen)) 
-ITT_STUBV(ITTAPI, void, model_iteration_taskA,  (const char *name)) 
-ITT_STUBV(ITTAPI, void, model_iteration_taskAL,  (const char *name, size_t taskNameLen)) 
-ITT_STUBV(ITTAPI, void, model_task_end,    (__itt_model_task *task, __itt_model_task_instance *instance)) 
-ITT_STUBV(ITTAPI, void, model_task_end_2,  (void)) 
-#define __itt_model_task_begin      ITTNOTIFY_VOID(model_task_begin) 
-#define __itt_model_task_begin_ptr  ITTNOTIFY_NAME(model_task_begin) 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_model_task_beginW     ITTNOTIFY_VOID(model_task_beginW) 
-#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) 
-#define __itt_model_iteration_taskW     ITTNOTIFY_VOID(model_iteration_taskW) 
-#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) 
-#endif 
-#define __itt_model_task_beginA    ITTNOTIFY_VOID(model_task_beginA) 
-#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) 
-#define __itt_model_task_beginAL    ITTNOTIFY_VOID(model_task_beginAL) 
-#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) 
-#define __itt_model_iteration_taskA    ITTNOTIFY_VOID(model_iteration_taskA) 
-#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) 
-#define __itt_model_iteration_taskAL    ITTNOTIFY_VOID(model_iteration_taskAL) 
-#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) 
-#define __itt_model_task_end        ITTNOTIFY_VOID(model_task_end) 
-#define __itt_model_task_end_ptr    ITTNOTIFY_NAME(model_task_end) 
-#define __itt_model_task_end_2        ITTNOTIFY_VOID(model_task_end_2) 
-#define __itt_model_task_end_2_ptr    ITTNOTIFY_NAME(model_task_end_2) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_task_begin(task, instance, name) 
-#define __itt_model_task_begin_ptr  0 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_model_task_beginW(name) 
-#define __itt_model_task_beginW_ptr  0 
-#endif 
-#define __itt_model_task_beginA(name) 
-#define __itt_model_task_beginA_ptr  0 
-#define __itt_model_task_beginAL(name, siteNameLen) 
-#define __itt_model_task_beginAL_ptr  0 
-#define __itt_model_iteration_taskA(name) 
-#define __itt_model_iteration_taskA_ptr  0 
-#define __itt_model_iteration_taskAL(name, siteNameLen) 
-#define __itt_model_iteration_taskAL_ptr  0 
-#define __itt_model_task_end(task, instance) 
-#define __itt_model_task_end_ptr    0 
-#define __itt_model_task_end_2() 
-#define __itt_model_task_end_2_ptr    0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_task_begin_ptr  0 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_model_task_beginW_ptr 0 
-#endif 
-#define __itt_model_task_beginA_ptr  0 
-#define __itt_model_task_beginAL_ptr  0 
-#define __itt_model_iteration_taskA_ptr    0 
-#define __itt_model_iteration_taskAL_ptr    0 
-#define __itt_model_task_end_ptr    0 
-#define __itt_model_task_end_2_ptr    0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support 
- * 
- * lock_acquire/release model a potential lock for both lockset and 
- * performance modeling.  Each unique address is modeled as a separate 
- * lock, with invalid addresses being valid lock IDs.  Specifically: 
- * no storage is accessed by the API at the specified address - it is only 
- * used for lock identification.  Lock acquires may be self-nested and are 
- * unlocked by a corresponding number of releases. 
- * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, 
- * but may not have identical semantics.) 
- */ 
-void ITTAPI __itt_model_lock_acquire(void *lock); 
-void ITTAPI __itt_model_lock_acquire_2(void *lock); 
-void ITTAPI __itt_model_lock_release(void *lock); 
-void ITTAPI __itt_model_lock_release_2(void *lock); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) 
-ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) 
-ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) 
-ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) 
-#define __itt_model_lock_acquire     ITTNOTIFY_VOID(model_lock_acquire) 
-#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) 
-#define __itt_model_lock_acquire_2     ITTNOTIFY_VOID(model_lock_acquire_2) 
-#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) 
-#define __itt_model_lock_release     ITTNOTIFY_VOID(model_lock_release) 
-#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) 
-#define __itt_model_lock_release_2     ITTNOTIFY_VOID(model_lock_release_2) 
-#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_lock_acquire(lock) 
-#define __itt_model_lock_acquire_ptr 0 
-#define __itt_model_lock_acquire_2(lock) 
-#define __itt_model_lock_acquire_2_ptr 0 
-#define __itt_model_lock_release(lock) 
-#define __itt_model_lock_release_ptr 0 
-#define __itt_model_lock_release_2(lock) 
-#define __itt_model_lock_release_2_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_lock_acquire_ptr 0 
-#define __itt_model_lock_acquire_2_ptr 0 
-#define __itt_model_lock_release_ptr 0 
-#define __itt_model_lock_release_2_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support 
- * 
- * record_allocation/deallocation describe user-defined memory allocator 
- * behavior, which may be required for correctness modeling to understand 
- * when storage is not expected to be actually reused across threads. 
- */ 
-void ITTAPI __itt_model_record_allocation  (void *addr, size_t size); 
-void ITTAPI __itt_model_record_deallocation(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_record_allocation,   (void *addr, size_t size)) 
-ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) 
-#define __itt_model_record_allocation       ITTNOTIFY_VOID(model_record_allocation) 
-#define __itt_model_record_allocation_ptr   ITTNOTIFY_NAME(model_record_allocation) 
-#define __itt_model_record_deallocation     ITTNOTIFY_VOID(model_record_deallocation) 
-#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_record_allocation(addr, size) 
-#define __itt_model_record_allocation_ptr   0 
-#define __itt_model_record_deallocation(addr) 
-#define __itt_model_record_deallocation_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_record_allocation_ptr   0 
-#define __itt_model_record_deallocation_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_INDUCTION_USES support 
- * 
- * Note particular storage is inductive through the end of the current site 
- */ 
-void ITTAPI __itt_model_induction_uses(void* addr, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) 
-#define __itt_model_induction_uses     ITTNOTIFY_VOID(model_induction_uses) 
-#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_induction_uses(addr, size) 
-#define __itt_model_induction_uses_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_induction_uses_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_REDUCTION_USES support 
- * 
- * Note particular storage is used for reduction through the end 
- * of the current site 
- */ 
-void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) 
-#define __itt_model_reduction_uses     ITTNOTIFY_VOID(model_reduction_uses) 
-#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_reduction_uses(addr, size) 
-#define __itt_model_reduction_uses_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_reduction_uses_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_OBSERVE_USES support 
- * 
- * Have correctness modeling record observations about uses of storage 
- * through the end of the current site 
- */ 
-void ITTAPI __itt_model_observe_uses(void* addr, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) 
-#define __itt_model_observe_uses     ITTNOTIFY_VOID(model_observe_uses) 
-#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_observe_uses(addr, size) 
-#define __itt_model_observe_uses_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_observe_uses_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_CLEAR_USES support 
- * 
- * Clear the special handling of a piece of storage related to induction, 
- * reduction or observe_uses 
- */ 
-void ITTAPI __itt_model_clear_uses(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) 
-#define __itt_model_clear_uses     ITTNOTIFY_VOID(model_clear_uses) 
-#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_clear_uses(addr) 
-#define __itt_model_clear_uses_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_clear_uses_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support 
- * 
- * disable_push/disable_pop push and pop disabling based on a parameter. 
- * Disabling observations stops processing of memory references during 
- * correctness modeling, and all annotations that occur in the disabled 
- * region.  This allows description of code that is expected to be handled 
- * specially during conversion to parallelism or that is not recognized 
- * by tools (e.g. some kinds of synchronization operations.) 
- * This mechanism causes all annotations in the disabled region, other 
- * than disable_push and disable_pop, to be ignored.  (For example, this 
- * might validly be used to disable an entire parallel site and the contained 
- * tasks and locking in it for data collection purposes.) 
- * The disable for collection is a more expensive operation, but reduces 
- * collector overhead significantly.  This applies to BOTH correctness data 
- * collection and performance data collection.  For example, a site 
- * containing a task might only enable data collection for the first 10 
- * iterations.  Both performance and correctness data should reflect this, 
- * and the program should run as close to full speed as possible when 
- * collection is disabled. 
- */ 
-void ITTAPI __itt_model_disable_push(__itt_model_disable x); 
-void ITTAPI __itt_model_disable_pop(void); 
-void ITTAPI __itt_model_aggregate_task(size_t x); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) 
-ITT_STUBV(ITTAPI, void, model_disable_pop,  (void)) 
-ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) 
-#define __itt_model_disable_push     ITTNOTIFY_VOID(model_disable_push) 
-#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) 
-#define __itt_model_disable_pop      ITTNOTIFY_VOID(model_disable_pop) 
-#define __itt_model_disable_pop_ptr  ITTNOTIFY_NAME(model_disable_pop) 
-#define __itt_model_aggregate_task      ITTNOTIFY_VOID(model_aggregate_task) 
-#define __itt_model_aggregate_task_ptr  ITTNOTIFY_NAME(model_aggregate_task) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_model_disable_push(x) 
-#define __itt_model_disable_push_ptr 0 
-#define __itt_model_disable_pop() 
-#define __itt_model_disable_pop_ptr 0 
-#define __itt_model_aggregate_task(x) 
-#define __itt_model_aggregate_task_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_model_disable_push_ptr 0 
-#define __itt_model_disable_pop_ptr 0 
-#define __itt_model_aggregate_task_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} model group */ 
- 
-/** 
- * @defgroup heap Heap 
- * @ingroup public 
- * Heap group 
- * @{ 
- */ 
- 
-typedef void* __itt_heap_function; 
- 
-/** 
- * @brief Create an identification for heap function 
- * @return non-zero identifier or NULL 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_heap_function ITTAPI __itt_heap_function_createA(const char*    name, const char*    domain); 
-__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_heap_function_create     __itt_heap_function_createW 
-#  define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr 
-#else 
-#  define __itt_heap_function_create     __itt_heap_function_createA 
-#  define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char*    name, const char*    domain)) 
-ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create,  (const char*    name, const char*    domain)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_heap_function_createA     ITTNOTIFY_DATA(heap_function_createA) 
-#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) 
-#define __itt_heap_function_createW     ITTNOTIFY_DATA(heap_function_createW) 
-#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_heap_function_create      ITTNOTIFY_DATA(heap_function_create) 
-#define __itt_heap_function_create_ptr  ITTNOTIFY_NAME(heap_function_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 
-#define __itt_heap_function_createA_ptr 0 
-#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 
-#define __itt_heap_function_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_heap_function_create(name, domain)  (__itt_heap_function)0 
-#define __itt_heap_function_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_heap_function_createA_ptr 0 
-#define __itt_heap_function_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_heap_function_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an allocation begin occurrence. 
- */ 
-void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) 
-#define __itt_heap_allocate_begin     ITTNOTIFY_VOID(heap_allocate_begin) 
-#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_allocate_begin(h, size, initialized) 
-#define __itt_heap_allocate_begin_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_allocate_begin_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an allocation end occurrence. 
- */ 
-void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) 
-#define __itt_heap_allocate_end     ITTNOTIFY_VOID(heap_allocate_end) 
-#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_allocate_end(h, addr, size, initialized) 
-#define __itt_heap_allocate_end_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_allocate_end_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an free begin occurrence. 
- */ 
-void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) 
-#define __itt_heap_free_begin     ITTNOTIFY_VOID(heap_free_begin) 
-#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_free_begin(h, addr) 
-#define __itt_heap_free_begin_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_free_begin_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an free end occurrence. 
- */ 
-void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) 
-#define __itt_heap_free_end     ITTNOTIFY_VOID(heap_free_end) 
-#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_free_end(h, addr) 
-#define __itt_heap_free_end_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_free_end_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an reallocation begin occurrence. 
- */ 
-void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) 
-#define __itt_heap_reallocate_begin     ITTNOTIFY_VOID(heap_reallocate_begin) 
-#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) 
-#define __itt_heap_reallocate_begin_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_reallocate_begin_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an reallocation end occurrence. 
- */ 
-void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) 
-#define __itt_heap_reallocate_end     ITTNOTIFY_VOID(heap_reallocate_end) 
-#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) 
-#define __itt_heap_reallocate_end_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_reallocate_end_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @brief internal access begin */ 
-void ITTAPI __itt_heap_internal_access_begin(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_internal_access_begin,  (void)) 
-#define __itt_heap_internal_access_begin      ITTNOTIFY_VOID(heap_internal_access_begin) 
-#define __itt_heap_internal_access_begin_ptr  ITTNOTIFY_NAME(heap_internal_access_begin) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_internal_access_begin() 
-#define __itt_heap_internal_access_begin_ptr  0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_internal_access_begin_ptr  0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @brief internal access end */ 
-void ITTAPI __itt_heap_internal_access_end(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) 
-#define __itt_heap_internal_access_end     ITTNOTIFY_VOID(heap_internal_access_end) 
-#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_internal_access_end() 
-#define __itt_heap_internal_access_end_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_internal_access_end_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @brief record memory growth begin */ 
-void ITTAPI __itt_heap_record_memory_growth_begin(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin,  (void)) 
-#define __itt_heap_record_memory_growth_begin      ITTNOTIFY_VOID(heap_record_memory_growth_begin) 
-#define __itt_heap_record_memory_growth_begin_ptr  ITTNOTIFY_NAME(heap_record_memory_growth_begin) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_record_memory_growth_begin() 
-#define __itt_heap_record_memory_growth_begin_ptr  0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_record_memory_growth_begin_ptr  0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @brief record memory growth end */ 
-void ITTAPI __itt_heap_record_memory_growth_end(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) 
-#define __itt_heap_record_memory_growth_end     ITTNOTIFY_VOID(heap_record_memory_growth_end) 
-#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_record_memory_growth_end() 
-#define __itt_heap_record_memory_growth_end_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_record_memory_growth_end_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Specify the type of heap detection/reporting to modify. 
- */ 
-/** 
- * @hideinitializer  
- * @brief Report on memory leaks. 
- */ 
-#define __itt_heap_leaks 0x00000001 
- 
-/** 
- * @hideinitializer  
- * @brief Report on memory growth. 
- */ 
-#define __itt_heap_growth 0x00000002 
- 
- 
-/** @brief heap reset detection */ 
-void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_reset_detection,  (unsigned int reset_mask)) 
-#define __itt_heap_reset_detection      ITTNOTIFY_VOID(heap_reset_detection) 
-#define __itt_heap_reset_detection_ptr  ITTNOTIFY_NAME(heap_reset_detection) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_reset_detection() 
-#define __itt_heap_reset_detection_ptr  0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_reset_detection_ptr  0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @brief report */ 
-void ITTAPI __itt_heap_record(unsigned int record_mask); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) 
-#define __itt_heap_record     ITTNOTIFY_VOID(heap_record) 
-#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_heap_record() 
-#define __itt_heap_record_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_heap_record_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @} heap group */ 
-/** @endcond */ 
-/* ========================================================================== */ 
- 
-/** 
- * @defgroup domains Domains 
- * @ingroup public 
- * Domains group 
- * @{ 
- */ 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_domain 
-{ 
-    volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ 
-    const char* nameA;  /*!< Copy of original name in ASCII. */ 
-#if defined(UNICODE) || defined(_UNICODE) 
-    const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ 
-#else  /* UNICODE || _UNICODE */ 
-    void* nameW; 
-#endif /* UNICODE || _UNICODE */ 
-    int   extra1; /*!< Reserved to the runtime */ 
-    void* extra2; /*!< Reserved to the runtime */ 
-    struct ___itt_domain* next; 
-} __itt_domain; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-/** 
- * @ingroup domains 
- * @brief Create a domain. 
- * Create domain using some domain name: the URI naming style is recommended. 
- * Because the set of domains is expected to be static over the application's  
- * execution time, there is no mechanism to destroy a domain. 
- * Any domain can be accessed by any thread in the process, regardless of 
- * which thread created the domain. This call is thread-safe. 
- * @param[in] name name of domain 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_domain* ITTAPI __itt_domain_createA(const char    *name); 
-__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_domain_create     __itt_domain_createW 
-#  define __itt_domain_create_ptr __itt_domain_createW_ptr 
-#else /* UNICODE */ 
-#  define __itt_domain_create     __itt_domain_createA 
-#  define __itt_domain_create_ptr __itt_domain_createA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_domain* ITTAPI __itt_domain_create(const char *name); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char    *name)) 
-ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_domain_createA     ITTNOTIFY_DATA(domain_createA) 
-#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) 
-#define __itt_domain_createW     ITTNOTIFY_DATA(domain_createW) 
-#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_domain_create     ITTNOTIFY_DATA(domain_create) 
-#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_domain_createA(name) (__itt_domain*)0 
-#define __itt_domain_createA_ptr 0 
-#define __itt_domain_createW(name) (__itt_domain*)0 
-#define __itt_domain_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_domain_create(name)  (__itt_domain*)0 
-#define __itt_domain_create_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_domain_createA_ptr 0 
-#define __itt_domain_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_domain_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} domains group */ 
- 
-/** 
- * @defgroup ids IDs 
- * @ingroup public 
- * IDs group 
- * @{ 
- */ 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_id 
-{ 
-    unsigned long long d1, d2, d3; 
-} __itt_id; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-static const __itt_id __itt_null = { 0, 0, 0 }; 
- 
-/** 
- * @ingroup ids 
- * @brief A convenience function is provided to create an ID without domain control. 
- * @brief This is a convenience function to initialize an __itt_id structure. 
- * After you make the ID with this function, you still must create it with the 
- * __itt_id_create function before using the ID to identify a named entity. 
- * @param[in] addr The address of object; high QWORD of the ID value. 
- * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. 
- */ 
- 
-ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; 
-ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) 
-{ 
-    __itt_id id = __itt_null; 
-    id.d1 = (unsigned long long)((uintptr_t)addr); 
-    id.d2 = (unsigned long long)extra; 
-    id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ 
-    return id; 
-} 
- 
-/** 
- * @ingroup ids 
- * @brief Create an instance of identifier. 
- * This establishes the beginning of the lifetime of an instance of 
- * the given ID in the trace. Once this lifetime starts, the ID 
- * can be used to tag named entity instances in calls such as 
- * __itt_task_begin, and to specify relationships among 
- * identified named entity instances, using the \ref relations APIs. 
- * Instance IDs are not domain specific! 
- * @param[in] domain The domain controlling the execution of this call. 
- * @param[in] id The ID to create. 
- */ 
-void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) 
-#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) 
-#define __itt_id_create_ptr  ITTNOTIFY_NAME(id_create) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_id_create(domain,id) 
-#define __itt_id_create_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_id_create_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup ids 
- * @brief Destroy an instance of identifier. 
- * This ends the lifetime of the current instance of the given ID value in the trace. 
- * Any relationships that are established after this lifetime ends are invalid. 
- * This call must be performed before the given ID value can be reused for a different  
- * named entity instance. 
- * @param[in] domain The domain controlling the execution of this call. 
- * @param[in] id The ID to destroy. 
- */ 
-void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) 
-#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) 
-#define __itt_id_destroy_ptr  ITTNOTIFY_NAME(id_destroy) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_id_destroy(domain,id) 
-#define __itt_id_destroy_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_id_destroy_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} ids group */ 
- 
-/** 
- * @defgroup handless String Handles 
- * @ingroup public 
- * String Handles group 
- * @{ 
- */ 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_string_handle 
-{ 
-    const char* strA; /*!< Copy of original string in ASCII. */ 
-#if defined(UNICODE) || defined(_UNICODE) 
-    const wchar_t* strW; /*!< Copy of original string in UNICODE. */ 
-#else  /* UNICODE || _UNICODE */ 
-    void* strW; 
-#endif /* UNICODE || _UNICODE */ 
-    int   extra1; /*!< Reserved. Must be zero   */ 
-    void* extra2; /*!< Reserved. Must be zero   */ 
-    struct ___itt_string_handle* next; 
-} __itt_string_handle; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-/** 
- * @ingroup handles 
- * @brief Create a string handle. 
- * Create and return handle value that can be associated with a string. 
- * Consecutive calls to __itt_string_handle_create with the same name 
- * return the same value. Because the set of string handles is expected to remain 
- * static during the application's execution time, there is no mechanism to destroy a string handle. 
- * Any string handle can be accessed by any thread in the process, regardless of which thread created 
- * the string handle. This call is thread-safe. 
- * @param[in] name The input string 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_string_handle* ITTAPI __itt_string_handle_createA(const char    *name); 
-__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_string_handle_create     __itt_string_handle_createW 
-#  define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr 
-#else /* UNICODE */ 
-#  define __itt_string_handle_create     __itt_string_handle_createA 
-#  define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name)) 
-ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create,  (const char    *name)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_string_handle_createA     ITTNOTIFY_DATA(string_handle_createA) 
-#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) 
-#define __itt_string_handle_createW     ITTNOTIFY_DATA(string_handle_createW) 
-#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_string_handle_create     ITTNOTIFY_DATA(string_handle_create) 
-#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_string_handle_createA(name) (__itt_string_handle*)0 
-#define __itt_string_handle_createA_ptr 0 
-#define __itt_string_handle_createW(name) (__itt_string_handle*)0 
-#define __itt_string_handle_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_string_handle_create(name)  (__itt_string_handle*)0 
-#define __itt_string_handle_create_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_string_handle_createA_ptr 0 
-#define __itt_string_handle_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_string_handle_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} handles group */ 
- 
-/** @cond exclude_from_documentation */ 
-typedef unsigned long long __itt_timestamp; 
-/** @endcond */ 
- 
-static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL; 
- 
-/** @cond exclude_from_gpa_documentation */ 
- 
-/** 
- * @ingroup timestamps 
- * @brief Return timestamp corresponding to the current moment. 
- * This returns the timestamp in the format that is the most relevant for the current 
- * host or platform (RDTSC, QPC, and others). You can use the "<" operator to 
- * compare __itt_timestamp values. 
- */ 
-__itt_timestamp ITTAPI __itt_get_timestamp(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) 
-#define __itt_get_timestamp      ITTNOTIFY_DATA(get_timestamp) 
-#define __itt_get_timestamp_ptr  ITTNOTIFY_NAME(get_timestamp) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_get_timestamp() 
-#define __itt_get_timestamp_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_get_timestamp_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} timestamps */ 
-/** @endcond */ 
- 
-/** @cond exclude_from_gpa_documentation */ 
- 
-/** 
- * @defgroup regions Regions 
- * @ingroup public 
- * Regions group 
- * @{ 
- */ 
-/** 
- * @ingroup regions 
- * @brief Begin of region instance. 
- * Successive calls to __itt_region_begin with the same ID are ignored 
- * until a call to __itt_region_end with the same ID 
- * @param[in] domain The domain for this region instance 
- * @param[in] id The instance ID for this region instance. Must not be __itt_null 
- * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null 
- * @param[in] name The name of this region 
- */ 
-void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); 
- 
-/** 
- * @ingroup regions 
- * @brief End of region instance. 
- * The first call to __itt_region_end with a given ID ends the 
- * region. Successive calls with the same ID are ignored, as are 
- * calls that do not have a matching __itt_region_begin call. 
- * @param[in] domain The domain for this region instance 
- * @param[in] id The instance ID for this region instance 
- */ 
-void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) 
-ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id)) 
-#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) 
-#define __itt_region_begin_ptr      ITTNOTIFY_NAME(region_begin) 
-#define __itt_region_end(d,x)       ITTNOTIFY_VOID_D1(region_end,d,x) 
-#define __itt_region_end_ptr        ITTNOTIFY_NAME(region_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_region_begin(d,x,y,z) 
-#define __itt_region_begin_ptr 0 
-#define __itt_region_end(d,x) 
-#define __itt_region_end_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_region_begin_ptr 0 
-#define __itt_region_end_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} regions group */ 
- 
-/** 
- * @defgroup frames Frames 
- * @ingroup public 
- * Frames are similar to regions, but are intended to be easier to use and to implement. 
- * In particular: 
- * - Frames always represent periods of elapsed time 
- * - By default, frames have no nesting relationships 
- * @{ 
- */ 
- 
-/** 
- * @ingroup frames 
- * @brief Begin a frame instance. 
- * Successive calls to __itt_frame_begin with the 
- * same ID are ignored until a call to __itt_frame_end with the same ID. 
- * @param[in] domain The domain for this frame instance 
- * @param[in] id The instance ID for this frame instance or NULL 
- */ 
-void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); 
- 
-/** 
- * @ingroup frames 
- * @brief End a frame instance. 
- * The first call to __itt_frame_end with a given ID 
- * ends the frame. Successive calls with the same ID are ignored, as are 
- * calls that do not have a matching __itt_frame_begin call. 
- * @param[in] domain The domain for this frame instance 
- * @param[in] id The instance ID for this frame instance or NULL for current 
- */ 
-void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); 
- 
-/** 
- * @ingroup frames 
- * @brief Submits a frame instance. 
- * Successive calls to __itt_frame_begin or __itt_frame_submit with the 
- * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit 
- * with the same ID. 
- * Passing special __itt_timestamp_none value as "end" argument means 
- * take the current timestamp as the end timestamp. 
- * @param[in] domain The domain for this frame instance 
- * @param[in] id The instance ID for this frame instance or NULL 
- * @param[in] begin Timestamp of the beginning of the frame 
- * @param[in] end Timestamp of the end of the frame 
- */ 
-void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, 
-    __itt_timestamp begin, __itt_timestamp end); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, frame_begin_v3,  (const __itt_domain *domain, __itt_id *id)) 
-ITT_STUBV(ITTAPI, void, frame_end_v3,    (const __itt_domain *domain, __itt_id *id)) 
-ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) 
-#define __itt_frame_begin_v3(d,x)      ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) 
-#define __itt_frame_begin_v3_ptr       ITTNOTIFY_NAME(frame_begin_v3) 
-#define __itt_frame_end_v3(d,x)        ITTNOTIFY_VOID_D1(frame_end_v3,d,x) 
-#define __itt_frame_end_v3_ptr         ITTNOTIFY_NAME(frame_end_v3) 
-#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) 
-#define __itt_frame_submit_v3_ptr      ITTNOTIFY_NAME(frame_submit_v3) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_frame_begin_v3(domain,id) 
-#define __itt_frame_begin_v3_ptr 0 
-#define __itt_frame_end_v3(domain,id) 
-#define __itt_frame_end_v3_ptr   0 
-#define __itt_frame_submit_v3(domain,id,begin,end) 
-#define __itt_frame_submit_v3_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_frame_begin_v3_ptr 0 
-#define __itt_frame_end_v3_ptr   0 
-#define __itt_frame_submit_v3_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} frames group */ 
-/** @endcond */ 
- 
-/** 
- * @defgroup taskgroup Task Group 
- * @ingroup public 
- * Task Group 
- * @{ 
- */ 
-/** 
- * @ingroup task_groups 
- * @brief Denotes a task_group instance. 
- * Successive calls to __itt_task_group with the same ID are ignored. 
- * @param[in] domain The domain for this task_group instance 
- * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. 
- * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. 
- * @param[in] name The name of this task_group 
- */ 
-void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) 
-#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) 
-#define __itt_task_group_ptr      ITTNOTIFY_NAME(task_group) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_task_group(d,x,y,z) 
-#define __itt_task_group_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_task_group_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} taskgroup group */ 
- 
-/** 
- * @defgroup tasks Tasks 
- * @ingroup public 
- * A task instance represents a piece of work performed by a particular 
- * thread for a period of time. A call to __itt_task_begin creates a 
- * task instance. This becomes the current instance for that task on that 
- * thread. A following call to __itt_task_end on the same thread ends the 
- * instance. There may be multiple simultaneous instances of tasks with the 
- * same name on different threads. If an ID is specified, the task instance 
- * receives that ID. Nested tasks are allowed. 
- * 
- * Note: The task is defined by the bracketing of __itt_task_begin and 
- * __itt_task_end on the same thread. If some scheduling mechanism causes 
- * task switching (the thread executes a different user task) or task 
- * switching (the user task switches to a different thread) then this breaks 
- * the notion of  current instance. Additional API calls are required to 
- * deal with that possibility. 
- * @{ 
- */ 
- 
-/** 
- * @ingroup tasks 
- * @brief Begin a task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] taskid The instance ID for this task instance, or __itt_null 
- * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null 
- * @param[in] name The name of this task 
- */ 
-void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); 
- 
-/** 
- * @ingroup tasks 
- * @brief Begin a task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] taskid The identifier for this task instance (may be 0) 
- * @param[in] parentid The parent of this task (may be 0) 
- * @param[in] fn The pointer to the function you are tracing 
- */ 
-void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); 
- 
-/** 
- * @ingroup tasks 
- * @brief End the current task instance. 
- * @param[in] domain The domain for this task 
- */ 
-void ITTAPI __itt_task_end(const __itt_domain *domain); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, task_begin,    (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) 
-ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) 
-ITT_STUBV(ITTAPI, void, task_end,      (const __itt_domain *domain)) 
-#define __itt_task_begin(d,x,y,z)    ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) 
-#define __itt_task_begin_ptr         ITTNOTIFY_NAME(task_begin) 
-#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) 
-#define __itt_task_begin_fn_ptr      ITTNOTIFY_NAME(task_begin_fn) 
-#define __itt_task_end(d)            ITTNOTIFY_VOID_D0(task_end,d) 
-#define __itt_task_end_ptr           ITTNOTIFY_NAME(task_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_task_begin(domain,id,parentid,name) 
-#define __itt_task_begin_ptr    0 
-#define __itt_task_begin_fn(domain,id,parentid,fn) 
-#define __itt_task_begin_fn_ptr 0 
-#define __itt_task_end(domain) 
-#define __itt_task_end_ptr      0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_task_begin_ptr    0 
-#define __itt_task_begin_fn_ptr 0 
-#define __itt_task_end_ptr      0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} tasks group */ 
- 
-/** 
- * @defgroup counters Counters 
- * @ingroup public 
- * Counters are user-defined objects with a monotonically increasing 
- * value. Counter values are 64-bit unsigned integers. Counter values 
- * are tracked per-thread. Counters have names that can be displayed in 
- * the tools. 
- * @{ 
- */ 
- 
-/** 
- * @ingroup counters 
- * @brief Increment a counter by one. 
- * The first call with a given name creates a counter by that name and sets its 
- * value to zero on every thread. Successive calls increment the counter value 
- * on the thread on which the call is issued. 
- * @param[in] domain The domain controlling the call. Counter names are not domain specific. 
- *            The domain argument is used only to enable or disable the API calls. 
- * @param[in] name The name of the counter 
- */ 
-void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); 
- 
-/** 
- * @ingroup counters 
- * @brief Increment a counter by the value specified in delta. 
- * @param[in] domain The domain controlling the call. Counter names are not domain specific. 
- *            The domain argument is used only to enable or disable the API calls. 
- * @param[in] name The name of the counter 
- * @param[in] delta The amount by which to increment the counter 
- */ 
-void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, counter_inc_v3,       (const __itt_domain *domain, __itt_string_handle *name)) 
-ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) 
-#define __itt_counter_inc_v3(d,x)         ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) 
-#define __itt_counter_inc_v3_ptr          ITTNOTIFY_NAME(counter_inc_v3) 
-#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) 
-#define __itt_counter_inc_delta_v3_ptr    ITTNOTIFY_NAME(counter_inc_delta_v3) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_counter_inc_v3(domain,name) 
-#define __itt_counter_inc_v3_ptr       0 
-#define __itt_counter_inc_delta_v3(domain,name,delta) 
-#define __itt_counter_inc_delta_v3_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_counter_inc_v3_ptr       0 
-#define __itt_counter_inc_delta_v3_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} counters group */ 
- 
-/** 
- * @defgroup markers Markers 
- * Markers represent a single discreet event in time. Markers have a scope, 
- * described by an enumerated type __itt_scope. Markers are created by 
- * the API call __itt_marker. A marker instance can be given an ID for use in 
- * adding metadata. 
- * @{ 
- */ 
- 
-/** 
- * @brief Describes the scope of an event object in the trace. 
- */ 
-typedef enum 
-{ 
-    __itt_scope_unknown = 0, 
-    __itt_scope_global, 
-    __itt_scope_track_group, 
-    __itt_scope_track, 
-    __itt_scope_task, 
-    __itt_scope_marker 
-} __itt_scope; 
- 
-/** @cond exclude_from_documentation */ 
-#define __itt_marker_scope_unknown  __itt_scope_unknown 
-#define __itt_marker_scope_global   __itt_scope_global 
-#define __itt_marker_scope_process  __itt_scope_track_group 
-#define __itt_marker_scope_thread   __itt_scope_track 
-#define __itt_marker_scope_task     __itt_scope_task 
-/** @endcond */ 
- 
-/** 
- * @ingroup markers 
- * @brief Create a marker instance 
- * @param[in] domain The domain for this marker 
- * @param[in] id The instance ID for this marker or __itt_null 
- * @param[in] name The name for this marker 
- * @param[in] scope The scope for this marker 
- */ 
-void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) 
-#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) 
-#define __itt_marker_ptr      ITTNOTIFY_NAME(marker) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_marker(domain,id,name,scope) 
-#define __itt_marker_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_marker_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} markers group */ 
- 
-/** 
- * @defgroup metadata Metadata 
- * The metadata API is used to attach extra information to named 
- * entities. Metadata can be attached to an identified named entity by ID, 
- * or to the current entity (which is always a task). 
- * 
- * Conceptually metadata has a type (what kind of metadata), a key (the 
- * name of the metadata), and a value (the actual data). The encoding of 
- * the value depends on the type of the metadata. 
- * 
- * The type of metadata is specified by an enumerated type __itt_metdata_type. 
- * @{ 
- */ 
- 
-/** 
- * @ingroup parameters 
- * @brief describes the type of metadata 
- */ 
-typedef enum { 
-    __itt_metadata_unknown = 0, 
-    __itt_metadata_u64,     /**< Unsigned 64-bit integer */ 
-    __itt_metadata_s64,     /**< Signed 64-bit integer */ 
-    __itt_metadata_u32,     /**< Unsigned 32-bit integer */ 
-    __itt_metadata_s32,     /**< Signed 32-bit integer */ 
-    __itt_metadata_u16,     /**< Unsigned 16-bit integer */ 
-    __itt_metadata_s16,     /**< Signed 16-bit integer */ 
-    __itt_metadata_float,   /**< Signed 32-bit floating-point */ 
-    __itt_metadata_double   /**< SIgned 64-bit floating-point */ 
-} __itt_metadata_type; 
- 
-/** 
- * @ingroup parameters 
- * @brief Add metadata to an instance of a named entity. 
- * @param[in] domain The domain controlling the call 
- * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task 
- * @param[in] key The name of the metadata 
- * @param[in] type The type of the metadata 
- * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. 
- * @param[in] data The metadata itself 
-*/ 
-void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) 
-#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) 
-#define __itt_metadata_add_ptr          ITTNOTIFY_NAME(metadata_add) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_metadata_add(d,x,y,z,a,b) 
-#define __itt_metadata_add_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_metadata_add_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup parameters 
- * @brief Add string metadata to an instance of a named entity. 
- * @param[in] domain The domain controlling the call 
- * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task 
- * @param[in] key The name of the metadata 
- * @param[in] data The metadata itself 
- * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated  
-*/ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); 
-void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_metadata_str_add     __itt_metadata_str_addW 
-#  define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr 
-#else /* UNICODE */ 
-#  define __itt_metadata_str_add     __itt_metadata_str_addA 
-#  define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); 
-#endif 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) 
-ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) 
-#define __itt_metadata_str_addA_ptr        ITTNOTIFY_NAME(metadata_str_addA) 
-#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) 
-#define __itt_metadata_str_addW_ptr        ITTNOTIFY_NAME(metadata_str_addW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_metadata_str_add(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) 
-#define __itt_metadata_str_add_ptr         ITTNOTIFY_NAME(metadata_str_add) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_metadata_str_addA(d,x,y,z,a)  
-#define __itt_metadata_str_addA_ptr 0 
-#define __itt_metadata_str_addW(d,x,y,z,a)  
-#define __itt_metadata_str_addW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_metadata_str_add(d,x,y,z,a) 
-#define __itt_metadata_str_add_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_metadata_str_addA_ptr 0 
-#define __itt_metadata_str_addW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_metadata_str_add_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup parameters 
- * @brief Add metadata to an instance of a named entity. 
- * @param[in] domain The domain controlling the call 
- * @param[in] scope The scope of the instance to which the metadata is to be added 
- 
- * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task 
-  
- * @param[in] key The name of the metadata 
- * @param[in] type The type of the metadata 
- * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. 
- * @param[in] data The metadata itself 
-*/ 
-void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) 
-#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) 
-#define __itt_metadata_add_with_scope_ptr          ITTNOTIFY_NAME(metadata_add_with_scope) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_metadata_add_with_scope(d,x,y,z,a,b) 
-#define __itt_metadata_add_with_scope_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_metadata_add_with_scope_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup parameters 
- * @brief Add string metadata to an instance of a named entity. 
- * @param[in] domain The domain controlling the call 
- * @param[in] scope The scope of the instance to which the metadata is to be added 
- 
- * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task 
- 
- * @param[in] key The name of the metadata 
- * @param[in] data The metadata itself 
- * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated  
-*/ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); 
-void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeW 
-#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr 
-#else /* UNICODE */ 
-#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeA 
-#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); 
-#endif 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) 
-ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) 
-#define __itt_metadata_str_add_with_scopeA_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeA) 
-#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) 
-#define __itt_metadata_str_add_with_scopeW_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_metadata_str_add_with_scope(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) 
-#define __itt_metadata_str_add_with_scope_ptr         ITTNOTIFY_NAME(metadata_str_add_with_scope) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a)  
-#define __itt_metadata_str_add_with_scopeA_ptr  0 
-#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a)  
-#define __itt_metadata_str_add_with_scopeW_ptr  0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_metadata_str_add_with_scope(d,x,y,z,a) 
-#define __itt_metadata_str_add_with_scope_ptr   0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_metadata_str_add_with_scopeA_ptr  0 
-#define __itt_metadata_str_add_with_scopeW_ptr  0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_metadata_str_add_with_scope_ptr   0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @} metadata group */ 
- 
-/** 
- * @defgroup relations Relations 
- * Instances of named entities can be explicitly associated with other 
- * instances using instance IDs and the relationship API calls. 
- * 
- * @{ 
- */ 
- 
-/** 
- * @ingroup relations 
- * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. 
- * Relations between instances can be added with an API call. The relation 
- * API uses instance IDs. Relations can be added before or after the actual 
- * instances are created and persist independently of the instances. This 
- * is the motivation for having different lifetimes for instance IDs and 
- * the actual instances. 
- */ 
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer 
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push     ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push  
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop     ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+    __itt_unsuppress_range,
+    __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range     ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range.   If not matching
+ *        call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range     ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
+ * @defgroup sync Synchronization
+ * @ingroup public
+ * Indicate user-written synchronization code
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex   2
+
+/**
+@brief Name a synchronization object
+@param[in] addr       Handle for the synchronization object. You should
+use a real address to uniquely identify the synchronization object.
+@param[in] objtype    null-terminated object type string. If NULL is
+passed, the name will be "User Synchronization".
+@param[in] objname    null-terminated object name string. If NULL,
+no name will be assigned to the object.
+@param[in] attribute  one of [#__itt_attr_barrier, #__itt_attr_mutex]
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_createA(void *addr, const char    *objtype, const char    *objname, int attribute);
+void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_create     __itt_sync_createW
+#  define __itt_sync_create_ptr __itt_sync_createW_ptr
+#else /* UNICODE */
+#  define __itt_sync_create     __itt_sync_createA
+#  define __itt_sync_create_ptr __itt_sync_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char    *objtype, const char    *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create,  (void *addr, const char*    objtype, const char*    objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA     ITTNOTIFY_VOID(sync_createA)
+#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA)
+#define __itt_sync_createW     ITTNOTIFY_VOID(sync_createW)
+#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create     ITTNOTIFY_VOID(sync_create)
+#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA(addr, objtype, objname, attribute)
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW(addr, objtype, objname, attribute)
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create(addr, objtype, objname, attribute)
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+@brief Rename a synchronization object
+
+You can use the rename call to assign or reassign a name to a given
+synchronization object.
+@param[in] addr  handle for the synchronization object.
+@param[in] name  null-terminated object name string.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_renameA(void *addr, const char    *name);
+void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_rename     __itt_sync_renameW
+#  define __itt_sync_rename_ptr __itt_sync_renameW_ptr
+#else /* UNICODE */
+#  define __itt_sync_rename     __itt_sync_renameA
+#  define __itt_sync_rename_ptr __itt_sync_renameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_rename(void *addr, const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char    *name))
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_rename,  (void *addr, const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA     ITTNOTIFY_VOID(sync_renameA)
+#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA)
+#define __itt_sync_renameW     ITTNOTIFY_VOID(sync_renameW)
+#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename     ITTNOTIFY_VOID(sync_rename)
+#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA(addr, name)
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW(addr, name)
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename(addr, name)
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ @brief Destroy a synchronization object.
+ @param addr Handle for the synchronization object.
+ */
+void ITTAPI __itt_sync_destroy(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
+#define __itt_sync_destroy     ITTNOTIFY_VOID(sync_destroy)
+#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_destroy(addr)
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/*****************************************************************//**
+ * @name group of functions is used for performance measurement tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @brief Enter spin loop on user-defined sync object
+ */
+void ITTAPI __itt_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr))
+#define __itt_sync_prepare     ITTNOTIFY_VOID(sync_prepare)
+#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_prepare(addr)
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Quit spin loop without acquiring spin object
+ */
+void ITTAPI __itt_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr))
+#define __itt_sync_cancel     ITTNOTIFY_VOID(sync_cancel)
+#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_cancel(addr)
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void ITTAPI __itt_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr))
+#define __itt_sync_acquired     ITTNOTIFY_VOID(sync_acquired)
+#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_acquired(addr)
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void ITTAPI __itt_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
+#define __itt_sync_releasing     ITTNOTIFY_VOID(sync_releasing)
+#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_releasing(addr)
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/** @} sync group */
+
+/**************************************************************//**
+ * @name group of functions is used for correctness checking tools
+ ******************************************************************/
+/** @{ */
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_prepare(void* addr);
+ */
+void ITTAPI __itt_fsync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr))
+#define __itt_fsync_prepare     ITTNOTIFY_VOID(fsync_prepare)
+#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_prepare(addr)
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_cancel(void *addr);
+ */
+void ITTAPI __itt_fsync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr))
+#define __itt_fsync_cancel     ITTNOTIFY_VOID(fsync_cancel)
+#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_cancel(addr)
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_acquired(void *addr);
+ */
+void ITTAPI __itt_fsync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr))
+#define __itt_fsync_acquired     ITTNOTIFY_VOID(fsync_acquired)
+#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_acquired(addr)
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_releasing(void* addr);
+ */
+void ITTAPI __itt_fsync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr))
+#define __itt_fsync_releasing     ITTNOTIFY_VOID(fsync_releasing)
+#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_releasing(addr)
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/**
+ * @defgroup model Modeling by Intel(R) Parallel Advisor
+ * @ingroup public
+ * This is the subset of itt used for modeling by Intel(R) Parallel Advisor.
+ * This API is called ONLY using annotate.h, by "Annotation" macros
+ * the user places in their sources during the parallelism modeling steps.
+ *
+ * site_begin/end and task_begin/end take the address of handle variables,
+ * which are writeable by the API.  Handles must be 0 initialized prior
+ * to the first call to begin, or may cause a run-time failure.
+ * The handles are initialized in a multi-thread safe way by the API if
+ * the handle is 0.  The commonly expected idiom is one static handle to
+ * identify a site or task.  If a site or task of the same name has already
+ * been started during this collection, the same handle MAY be returned,
+ * but is not required to be - it is unspecified if data merging is done
+ * based on name.  These routines also take an instance variable.  Like
+ * the lexical instance, these must be 0 initialized.  Unlike the lexical
+ * instance, this is used to track a single dynamic instance.
+ *
+ * API used by the Intel(R) Parallel Advisor to describe potential concurrency
+ * and related activities. User-added source annotations expand to calls
+ * to these procedures to enable modeling of a hypothetical concurrent
+ * execution serially.
+ * @{
+ */
+#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL)
+
+typedef void* __itt_model_site;             /*!< @brief handle for lexical site     */
+typedef void* __itt_model_site_instance;    /*!< @brief handle for dynamic instance */
+typedef void* __itt_model_task;             /*!< @brief handle for lexical site     */
+typedef void* __itt_model_task_instance;    /*!< @brief handle for dynamic instance */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum {
+    __itt_model_disable_observation,
+    __itt_model_disable_collection
+} __itt_model_disable;
+
+#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */
+
+/**
+ * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support.
+ *
+ * site_begin/end model a potential concurrency site.
+ * site instances may be recursively nested with themselves.
+ * site_end exits the most recently started but unended site for the current
+ * thread.  The handle passed to end may be used to validate structure.
+ * Instances of a site encountered on different threads concurrently
+ * are considered completely distinct. If the site name for two different
+ * lexical sites match, it is unspecified whether they are treated as the
+ * same or different for data presentation.
+ */
+void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_site_beginW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
+void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
+void ITTAPI __itt_model_site_end  (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_site_begin,  (__itt_model_site *site, __itt_model_site_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW,  (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_site_beginAL,  (const char *name, size_t siteNameLen))
+ITT_STUBV(ITTAPI, void, model_site_end,    (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2,  (void))
+#define __itt_model_site_begin      ITTNOTIFY_VOID(model_site_begin)
+#define __itt_model_site_begin_ptr  ITTNOTIFY_NAME(model_site_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW      ITTNOTIFY_VOID(model_site_beginW)
+#define __itt_model_site_beginW_ptr  ITTNOTIFY_NAME(model_site_beginW)
+#endif
+#define __itt_model_site_beginA      ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr  ITTNOTIFY_NAME(model_site_beginA)
+#define __itt_model_site_beginAL      ITTNOTIFY_VOID(model_site_beginAL)
+#define __itt_model_site_beginAL_ptr  ITTNOTIFY_NAME(model_site_beginAL)
+#define __itt_model_site_end        ITTNOTIFY_VOID(model_site_end)
+#define __itt_model_site_end_ptr    ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2        ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr    ITTNOTIFY_NAME(model_site_end_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_site_begin(site, instance, name)
+#define __itt_model_site_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW(name)
+#define __itt_model_site_beginW_ptr  0
+#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr  0
+#define __itt_model_site_beginAL(name, siteNameLen)
+#define __itt_model_site_beginAL_ptr  0
+#define __itt_model_site_end(site, instance)
+#define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_site_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW_ptr  0
+#endif
+#define __itt_model_site_beginA_ptr  0
+#define __itt_model_site_beginAL_ptr  0
+#define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support
+ *
+ * task_begin/end model a potential task, which is contained within the most
+ * closely enclosing dynamic site.  task_end exits the most recently started
+ * but unended task.  The handle passed to end may be used to validate
+ * structure.  It is unspecified if bad dynamic nesting is detected.  If it
+ * is, it should be encoded in the resulting data collection.  The collector
+ * should not fail due to construct nesting issues, nor attempt to directly
+ * indicate the problem.
+ */
+void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
+void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_task_end  (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_task_begin,  (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_task_beginW,  (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_task_beginAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_task_end,    (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2,  (void))
+#define __itt_model_task_begin      ITTNOTIFY_VOID(model_task_begin)
+#define __itt_model_task_begin_ptr  ITTNOTIFY_NAME(model_task_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW     ITTNOTIFY_VOID(model_task_beginW)
+#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW     ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
+#endif
+#define __itt_model_task_beginA    ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
+#define __itt_model_task_beginAL    ITTNOTIFY_VOID(model_task_beginAL)
+#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA    ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL    ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
+#define __itt_model_task_end        ITTNOTIFY_VOID(model_task_end)
+#define __itt_model_task_end_ptr    ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2        ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr    ITTNOTIFY_NAME(model_task_end_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_task_begin(task, instance, name)
+#define __itt_model_task_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW(name)
+#define __itt_model_task_beginW_ptr  0
+#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr  0
+#define __itt_model_task_beginAL(name, siteNameLen)
+#define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr  0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr  0
+#define __itt_model_task_end(task, instance)
+#define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_task_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA_ptr  0
+#define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA_ptr    0
+#define __itt_model_iteration_taskAL_ptr    0
+#define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support
+ *
+ * lock_acquire/release model a potential lock for both lockset and
+ * performance modeling.  Each unique address is modeled as a separate
+ * lock, with invalid addresses being valid lock IDs.  Specifically:
+ * no storage is accessed by the API at the specified address - it is only
+ * used for lock identification.  Lock acquires may be self-nested and are
+ * unlocked by a corresponding number of releases.
+ * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing,
+ * but may not have identical semantics.)
+ */
+void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
+void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
+#define __itt_model_lock_acquire     ITTNOTIFY_VOID(model_lock_acquire)
+#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2     ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
+#define __itt_model_lock_release     ITTNOTIFY_VOID(model_lock_release)
+#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2     ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_lock_acquire(lock)
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release(lock)
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support
+ *
+ * record_allocation/deallocation describe user-defined memory allocator
+ * behavior, which may be required for correctness modeling to understand
+ * when storage is not expected to be actually reused across threads.
+ */
+void ITTAPI __itt_model_record_allocation  (void *addr, size_t size);
+void ITTAPI __itt_model_record_deallocation(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_record_allocation,   (void *addr, size_t size))
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr))
+#define __itt_model_record_allocation       ITTNOTIFY_VOID(model_record_allocation)
+#define __itt_model_record_allocation_ptr   ITTNOTIFY_NAME(model_record_allocation)
+#define __itt_model_record_deallocation     ITTNOTIFY_VOID(model_record_deallocation)
+#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_record_allocation(addr, size)
+#define __itt_model_record_allocation_ptr   0
+#define __itt_model_record_deallocation(addr)
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_record_allocation_ptr   0
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_INDUCTION_USES support
+ *
+ * Note particular storage is inductive through the end of the current site
+ */
+void ITTAPI __itt_model_induction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size))
+#define __itt_model_induction_uses     ITTNOTIFY_VOID(model_induction_uses)
+#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_induction_uses(addr, size)
+#define __itt_model_induction_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_induction_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_REDUCTION_USES support
+ *
+ * Note particular storage is used for reduction through the end
+ * of the current site
+ */
+void ITTAPI __itt_model_reduction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size))
+#define __itt_model_reduction_uses     ITTNOTIFY_VOID(model_reduction_uses)
+#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_reduction_uses(addr, size)
+#define __itt_model_reduction_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_reduction_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_OBSERVE_USES support
+ *
+ * Have correctness modeling record observations about uses of storage
+ * through the end of the current site
+ */
+void ITTAPI __itt_model_observe_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size))
+#define __itt_model_observe_uses     ITTNOTIFY_VOID(model_observe_uses)
+#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_observe_uses(addr, size)
+#define __itt_model_observe_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_observe_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_CLEAR_USES support
+ *
+ * Clear the special handling of a piece of storage related to induction,
+ * reduction or observe_uses
+ */
+void ITTAPI __itt_model_clear_uses(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr))
+#define __itt_model_clear_uses     ITTNOTIFY_VOID(model_clear_uses)
+#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_clear_uses(addr)
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support
+ *
+ * disable_push/disable_pop push and pop disabling based on a parameter.
+ * Disabling observations stops processing of memory references during
+ * correctness modeling, and all annotations that occur in the disabled
+ * region.  This allows description of code that is expected to be handled
+ * specially during conversion to parallelism or that is not recognized
+ * by tools (e.g. some kinds of synchronization operations.)
+ * This mechanism causes all annotations in the disabled region, other
+ * than disable_push and disable_pop, to be ignored.  (For example, this
+ * might validly be used to disable an entire parallel site and the contained
+ * tasks and locking in it for data collection purposes.)
+ * The disable for collection is a more expensive operation, but reduces
+ * collector overhead significantly.  This applies to BOTH correctness data
+ * collection and performance data collection.  For example, a site
+ * containing a task might only enable data collection for the first 10
+ * iterations.  Both performance and correctness data should reflect this,
+ * and the program should run as close to full speed as possible when
+ * collection is disabled.
+ */
+void ITTAPI __itt_model_disable_push(__itt_model_disable x);
+void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
+ITT_STUBV(ITTAPI, void, model_disable_pop,  (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
+#define __itt_model_disable_push     ITTNOTIFY_VOID(model_disable_push)
+#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
+#define __itt_model_disable_pop      ITTNOTIFY_VOID(model_disable_pop)
+#define __itt_model_disable_pop_ptr  ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task      ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr  ITTNOTIFY_NAME(model_aggregate_task)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_disable_push(x)
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop()
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} model group */
+
+/**
+ * @defgroup heap Heap
+ * @ingroup public
+ * Heap group
+ * @{
+ */
+
+typedef void* __itt_heap_function;
+
+/**
+ * @brief Create an identification for heap function
+ * @return non-zero identifier or NULL
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_heap_function ITTAPI __itt_heap_function_createA(const char*    name, const char*    domain);
+__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_heap_function_create     __itt_heap_function_createW
+#  define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr
+#else
+#  define __itt_heap_function_create     __itt_heap_function_createA
+#  define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char*    name, const char*    domain))
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create,  (const char*    name, const char*    domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA     ITTNOTIFY_DATA(heap_function_createA)
+#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA)
+#define __itt_heap_function_createW     ITTNOTIFY_DATA(heap_function_createW)
+#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create      ITTNOTIFY_DATA(heap_function_create)
+#define __itt_heap_function_create_ptr  ITTNOTIFY_NAME(heap_function_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create(name, domain)  (__itt_heap_function)0
+#define __itt_heap_function_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation begin occurrence.
+ */
+void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized))
+#define __itt_heap_allocate_begin     ITTNOTIFY_VOID(heap_allocate_begin)
+#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_begin(h, size, initialized)
+#define __itt_heap_allocate_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation end occurrence.
+ */
+void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized))
+#define __itt_heap_allocate_end     ITTNOTIFY_VOID(heap_allocate_end)
+#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_end(h, addr, size, initialized)
+#define __itt_heap_allocate_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free begin occurrence.
+ */
+void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_begin     ITTNOTIFY_VOID(heap_free_begin)
+#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_begin(h, addr)
+#define __itt_heap_free_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free end occurrence.
+ */
+void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_end     ITTNOTIFY_VOID(heap_free_end)
+#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_end(h, addr)
+#define __itt_heap_free_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation begin occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_begin     ITTNOTIFY_VOID(heap_reallocate_begin)
+#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_begin(h, addr, new_size, initialized)
+#define __itt_heap_reallocate_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation end occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_end     ITTNOTIFY_VOID(heap_reallocate_end)
+#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized)
+#define __itt_heap_reallocate_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access begin */
+void ITTAPI __itt_heap_internal_access_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin,  (void))
+#define __itt_heap_internal_access_begin      ITTNOTIFY_VOID(heap_internal_access_begin)
+#define __itt_heap_internal_access_begin_ptr  ITTNOTIFY_NAME(heap_internal_access_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_begin()
+#define __itt_heap_internal_access_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access end */
+void ITTAPI __itt_heap_internal_access_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void))
+#define __itt_heap_internal_access_end     ITTNOTIFY_VOID(heap_internal_access_end)
+#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_end()
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin,  (void))
+#define __itt_heap_record_memory_growth_begin      ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr  ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end     ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer 
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer 
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection,  (unsigned int reset_mask))
+#define __itt_heap_reset_detection      ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr  ITTNOTIFY_NAME(heap_reset_detection)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record     ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
+/* ========================================================================== */
+
+/**
+ * @defgroup domains Domains
+ * @ingroup public
+ * Domains group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_domain
+{
+    volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */
+    const char* nameA;  /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* nameW;
+#endif /* UNICODE || _UNICODE */
+    int   extra1; /*!< Reserved to the runtime */
+    void* extra2; /*!< Reserved to the runtime */
+    struct ___itt_domain* next;
+} __itt_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup domains
+ * @brief Create a domain.
+ * Create domain using some domain name: the URI naming style is recommended.
+ * Because the set of domains is expected to be static over the application's 
+ * execution time, there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of
+ * which thread created the domain. This call is thread-safe.
+ * @param[in] name name of domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_domain* ITTAPI __itt_domain_createA(const char    *name);
+__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_domain_create     __itt_domain_createW
+#  define __itt_domain_create_ptr __itt_domain_createW_ptr
+#else /* UNICODE */
+#  define __itt_domain_create     __itt_domain_createA
+#  define __itt_domain_create_ptr __itt_domain_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_domain* ITTAPI __itt_domain_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA     ITTNOTIFY_DATA(domain_createA)
+#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA)
+#define __itt_domain_createW     ITTNOTIFY_DATA(domain_createW)
+#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create     ITTNOTIFY_DATA(domain_create)
+#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA(name) (__itt_domain*)0
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW(name) (__itt_domain*)0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create(name)  (__itt_domain*)0
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} domains group */
+
+/**
+ * @defgroup ids IDs
+ * @ingroup public
+ * IDs group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_id
+{
+    unsigned long long d1, d2, d3;
+} __itt_id;
+
+#pragma pack(pop)
+/** @endcond */
+
+static const __itt_id __itt_null = { 0, 0, 0 };
+
+/**
+ * @ingroup ids
+ * @brief A convenience function is provided to create an ID without domain control.
+ * @brief This is a convenience function to initialize an __itt_id structure.
+ * After you make the ID with this function, you still must create it with the
+ * __itt_id_create function before using the ID to identify a named entity.
+ * @param[in] addr The address of object; high QWORD of the ID value.
+ * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
+ */
+
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+{
+    __itt_id id = __itt_null;
+    id.d1 = (unsigned long long)((uintptr_t)addr);
+    id.d2 = (unsigned long long)extra;
+    id.d3 = (unsigned long long)0; /* Reserved. Must be zero */
+    return id;
+}
+
+/**
+ * @ingroup ids
+ * @brief Create an instance of identifier.
+ * This establishes the beginning of the lifetime of an instance of
+ * the given ID in the trace. Once this lifetime starts, the ID
+ * can be used to tag named entity instances in calls such as
+ * __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * Instance IDs are not domain specific!
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x)
+#define __itt_id_create_ptr  ITTNOTIFY_NAME(id_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create(domain,id)
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup ids
+ * @brief Destroy an instance of identifier.
+ * This ends the lifetime of the current instance of the given ID value in the trace.
+ * Any relationships that are established after this lifetime ends are invalid.
+ * This call must be performed before the given ID value can be reused for a different 
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x)
+#define __itt_id_destroy_ptr  ITTNOTIFY_NAME(id_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_destroy(domain,id)
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} ids group */
+
+/**
+ * @defgroup handless String Handles
+ * @ingroup public
+ * String Handles group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_string_handle
+{
+    const char* strA; /*!< Copy of original string in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* strW; /*!< Copy of original string in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* strW;
+#endif /* UNICODE || _UNICODE */
+    int   extra1; /*!< Reserved. Must be zero   */
+    void* extra2; /*!< Reserved. Must be zero   */
+    struct ___itt_string_handle* next;
+} __itt_string_handle;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup handles
+ * @brief Create a string handle.
+ * Create and return handle value that can be associated with a string.
+ * Consecutive calls to __itt_string_handle_create with the same name
+ * return the same value. Because the set of string handles is expected to remain
+ * static during the application's execution time, there is no mechanism to destroy a string handle.
+ * Any string handle can be accessed by any thread in the process, regardless of which thread created
+ * the string handle. This call is thread-safe.
+ * @param[in] name The input string
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_string_handle* ITTAPI __itt_string_handle_createA(const char    *name);
+__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_string_handle_create     __itt_string_handle_createW
+#  define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr
+#else /* UNICODE */
+#  define __itt_string_handle_create     __itt_string_handle_createA
+#  define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA     ITTNOTIFY_DATA(string_handle_createA)
+#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA)
+#define __itt_string_handle_createW     ITTNOTIFY_DATA(string_handle_createW)
+#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create     ITTNOTIFY_DATA(string_handle_create)
+#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA(name) (__itt_string_handle*)0
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW(name) (__itt_string_handle*)0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create(name)  (__itt_string_handle*)0
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} handles group */
+
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL;
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to the current moment.
+ * This returns the timestamp in the format that is the most relevant for the current
+ * host or platform (RDTSC, QPC, and others). You can use the "<" operator to
+ * compare __itt_timestamp values.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp      ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr  ITTNOTIFY_NAME(get_timestamp)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @defgroup regions Regions
+ * @ingroup public
+ * Regions group
+ * @{
+ */
+/**
+ * @ingroup regions
+ * @brief Begin of region instance.
+ * Successive calls to __itt_region_begin with the same ID are ignored
+ * until a call to __itt_region_end with the same ID
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance. Must not be __itt_null
+ * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null
+ * @param[in] name The name of this region
+ */
+void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup regions
+ * @brief End of region instance.
+ * The first call to __itt_region_end with a given ID ends the
+ * region. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_region_begin call.
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance
+ */
+void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id))
+#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z)
+#define __itt_region_begin_ptr      ITTNOTIFY_NAME(region_begin)
+#define __itt_region_end(d,x)       ITTNOTIFY_VOID_D1(region_end,d,x)
+#define __itt_region_end_ptr        ITTNOTIFY_NAME(region_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_region_begin(d,x,y,z)
+#define __itt_region_begin_ptr 0
+#define __itt_region_end(d,x)
+#define __itt_region_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_region_begin_ptr 0
+#define __itt_region_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} regions group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup public
+ * Frames are similar to regions, but are intended to be easier to use and to implement.
+ * In particular:
+ * - Frames always represent periods of elapsed time
+ * - By default, frames have no nesting relationships
+ * @{
+ */
+
+/**
+ * @ingroup frames
+ * @brief Begin a frame instance.
+ * Successive calls to __itt_frame_begin with the
+ * same ID are ignored until a call to __itt_frame_end with the same ID.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ */
+void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief End a frame instance.
+ * The first call to __itt_frame_end with a given ID
+ * ends the frame. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_frame_begin call.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL for current
+ */
+void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beginning of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+    __itt_timestamp begin, __itt_timestamp end);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin_v3,  (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_end_v3,    (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
+#define __itt_frame_begin_v3(d,x)      ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
+#define __itt_frame_begin_v3_ptr       ITTNOTIFY_NAME(frame_begin_v3)
+#define __itt_frame_end_v3(d,x)        ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
+#define __itt_frame_end_v3_ptr         ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr      ITTNOTIFY_NAME(frame_submit_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin_v3(domain,id)
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3(domain,id)
+#define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+/** @endcond */
+
+/**
+ * @defgroup taskgroup Task Group
+ * @ingroup public
+ * Task Group
+ * @{
+ */
+/**
+ * @ingroup task_groups
+ * @brief Denotes a task_group instance.
+ * Successive calls to __itt_task_group with the same ID are ignored.
+ * @param[in] domain The domain for this task_group instance
+ * @param[in] id The instance ID for this task_group instance. Must not be __itt_null.
+ * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null.
+ * @param[in] name The name of this task_group
+ */
+void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z)
+#define __itt_task_group_ptr      ITTNOTIFY_NAME(task_group)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_group(d,x,y,z)
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} taskgroup group */
+
+/**
+ * @defgroup tasks Tasks
+ * @ingroup public
+ * A task instance represents a piece of work performed by a particular
+ * thread for a period of time. A call to __itt_task_begin creates a
+ * task instance. This becomes the current instance for that task on that
+ * thread. A following call to __itt_task_end on the same thread ends the
+ * instance. There may be multiple simultaneous instances of tasks with the
+ * same name on different threads. If an ID is specified, the task instance
+ * receives that ID. Nested tasks are allowed.
+ *
+ * Note: The task is defined by the bracketing of __itt_task_begin and
+ * __itt_task_end on the same thread. If some scheduling mechanism causes
+ * task switching (the thread executes a different user task) or task
+ * switching (the user task switches to a different thread) then this breaks
+ * the notion of  current instance. Additional API calls are required to
+ * deal with that possibility.
+ * @{
+ */
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The identifier for this task instance (may be 0)
+ * @param[in] parentid The parent of this task (may be 0)
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup tasks
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ */
+void ITTAPI __itt_task_end(const __itt_domain *domain);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin,    (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end,      (const __itt_domain *domain))
+#define __itt_task_begin(d,x,y,z)    ITTNOTIFY_VOID_D3(task_begin,d,x,y,z)
+#define __itt_task_begin_ptr         ITTNOTIFY_NAME(task_begin)
+#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z)
+#define __itt_task_begin_fn_ptr      ITTNOTIFY_NAME(task_begin_fn)
+#define __itt_task_end(d)            ITTNOTIFY_VOID_D0(task_end,d)
+#define __itt_task_end_ptr           ITTNOTIFY_NAME(task_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin(domain,id,parentid,name)
+#define __itt_task_begin_ptr    0
+#define __itt_task_begin_fn(domain,id,parentid,fn)
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end(domain)
+#define __itt_task_end_ptr      0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ptr    0
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end_ptr      0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} tasks group */
+
+/**
+ * @defgroup counters Counters
+ * @ingroup public
+ * Counters are user-defined objects with a monotonically increasing
+ * value. Counter values are 64-bit unsigned integers. Counter values
+ * are tracked per-thread. Counters have names that can be displayed in
+ * the tools.
+ * @{
+ */
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero on every thread. Successive calls increment the counter value
+ * on the thread on which the call is issued.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to increment the counter
+ */
+void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_v3,       (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_inc_v3(d,x)         ITTNOTIFY_VOID_D1(counter_inc_v3,d,x)
+#define __itt_counter_inc_v3_ptr          ITTNOTIFY_NAME(counter_inc_v3)
+#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y)
+#define __itt_counter_inc_delta_v3_ptr    ITTNOTIFY_NAME(counter_inc_delta_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_v3(domain,name)
+#define __itt_counter_inc_v3_ptr       0
+#define __itt_counter_inc_delta_v3(domain,name,delta)
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_v3_ptr       0
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @defgroup markers Markers
+ * Markers represent a single discreet event in time. Markers have a scope,
+ * described by an enumerated type __itt_scope. Markers are created by
+ * the API call __itt_marker. A marker instance can be given an ID for use in
+ * adding metadata.
+ * @{
+ */
+
+/**
+ * @brief Describes the scope of an event object in the trace.
+ */
+typedef enum
+{
+    __itt_scope_unknown = 0,
+    __itt_scope_global,
+    __itt_scope_track_group,
+    __itt_scope_track,
+    __itt_scope_task,
+    __itt_scope_marker
+} __itt_scope;
+
+/** @cond exclude_from_documentation */
+#define __itt_marker_scope_unknown  __itt_scope_unknown
+#define __itt_marker_scope_global   __itt_scope_global
+#define __itt_marker_scope_process  __itt_scope_track_group
+#define __itt_marker_scope_thread   __itt_scope_track
+#define __itt_marker_scope_task     __itt_scope_task
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance
+ * @param[in] domain The domain for this marker
+ * @param[in] id The instance ID for this marker or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z)
+#define __itt_marker_ptr      ITTNOTIFY_NAME(marker)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker(domain,id,name,scope)
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} markers group */
+
+/**
+ * @defgroup metadata Metadata
+ * The metadata API is used to attach extra information to named
+ * entities. Metadata can be attached to an identified named entity by ID,
+ * or to the current entity (which is always a task).
+ *
+ * Conceptually metadata has a type (what kind of metadata), a key (the
+ * name of the metadata), and a value (the actual data). The encoding of
+ * the value depends on the type of the metadata.
+ *
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * @{
+ */
+
+/**
+ * @ingroup parameters
+ * @brief describes the type of metadata
+ */
+typedef enum {
+    __itt_metadata_unknown = 0,
+    __itt_metadata_u64,     /**< Unsigned 64-bit integer */
+    __itt_metadata_s64,     /**< Signed 64-bit integer */
+    __itt_metadata_u32,     /**< Unsigned 32-bit integer */
+    __itt_metadata_s32,     /**< Signed 32-bit integer */
+    __itt_metadata_u16,     /**< Unsigned 16-bit integer */
+    __itt_metadata_s16,     /**< Signed 16-bit integer */
+    __itt_metadata_float,   /**< Signed 32-bit floating-point */
+    __itt_metadata_double   /**< SIgned 64-bit floating-point */
+} __itt_metadata_type;
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b)
+#define __itt_metadata_add_ptr          ITTNOTIFY_NAME(metadata_add)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add(d,x,y,z,a,b)
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated 
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_metadata_str_add     __itt_metadata_str_addW
+#  define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr
+#else /* UNICODE */
+#  define __itt_metadata_str_add     __itt_metadata_str_addA
+#  define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr        ITTNOTIFY_NAME(metadata_str_addA)
+#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr        ITTNOTIFY_NAME(metadata_str_addW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a)
+#define __itt_metadata_str_add_ptr         ITTNOTIFY_NAME(metadata_str_add)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) 
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW(d,x,y,z,a) 
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ 
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr          ITTNOTIFY_NAME(metadata_add_with_scope)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated 
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeW
+#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr
+#else /* UNICODE */
+#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeA
+#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeA)
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr         ITTNOTIFY_NAME(metadata_str_add_with_scope)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) 
+#define __itt_metadata_str_add_with_scopeA_ptr  0
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) 
+#define __itt_metadata_str_add_with_scopeW_ptr  0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr   0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA_ptr  0
+#define __itt_metadata_str_add_with_scopeW_ptr  0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope_ptr   0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} metadata group */
+
+/**
+ * @defgroup relations Relations
+ * Instances of named entities can be explicitly associated with other
+ * instances using instance IDs and the relationship API calls.
+ *
+ * @{
+ */
+
+/**
+ * @ingroup relations
+ * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation.
+ * Relations between instances can be added with an API call. The relation
+ * API uses instance IDs. Relations can be added before or after the actual
+ * instances are created and persist independently of the instances. This
+ * is the motivation for having different lifetimes for instance IDs and
+ * the actual instances.
+ */
+typedef enum
+{
+    __itt_relation_is_unknown = 0,
+    __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */
+    __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */
+    __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */
+    __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */
+    __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
+    __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
+    __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
+} __itt_relation;
+
+/**
+ * @ingroup relations
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup relations
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add,            (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y)
+#define __itt_relation_add_to_current_ptr    ITTNOTIFY_NAME(relation_add_to_current)
+#define __itt_relation_add(d,x,y,z)          ITTNOTIFY_VOID_D3(relation_add,d,x,y,z)
+#define __itt_relation_add_ptr               ITTNOTIFY_NAME(relation_add)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current(d,x,y)
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add(d,x,y,z)
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} relations group */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_info
+{
+    unsigned long long clock_freq; /*!< Clock domain frequency */
+    unsigned long long clock_base; /*!< Clock domain base timestamp */
+} __itt_clock_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_domain
+{
+    __itt_clock_info info;      /*!< Most recent clock domain info */
+    __itt_get_clock_info_fn fn; /*!< Callback function pointer */
+    void* fn_data;              /*!< Input argument for the callback function */
+    int   extra1;               /*!< Reserved. Must be zero */
+    void* extra2;               /*!< Reserved. Must be zero */
+    struct ___itt_clock_domain* next;
+} __itt_clock_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Create a clock domain.
+ * Certain applications require the capability to trace their application using
+ * a clock domain different than the CPU, for instance the instrumentation of events
+ * that occur on a GPU.
+ * Because the set of domains is expected to be static over the application's execution time,
+ * there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of which thread created
+ * the domain. This call is thread-safe.
+ * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps
+ * @param[in] fn_data Argument for a callback function; may be NULL
+ */
+__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data))
+#define __itt_clock_domain_create     ITTNOTIFY_DATA(clock_domain_create)
+#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Recalculate clock domains frequences and clock base timestamps.
+ */
+void ITTAPI __itt_clock_domain_reset(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void))
+#define __itt_clock_domain_reset     ITTNOTIFY_VOID(clock_domain_reset)
+#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_reset()
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Create an instance of identifier. This establishes the beginning of the lifetime of
+ * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to
+ * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/**
+ * @ingroup clockdomain
+ * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the
+ * given ID value in the trace. Any relationships that are established after this lifetime ends are
+ * invalid. This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create_ex,  (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+#define __itt_id_create_ex(d,x,y,z)  ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z)
+#define __itt_id_create_ex_ptr       ITTNOTIFY_NAME(id_create_ex)
+#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z)
+#define __itt_id_destroy_ex_ptr      ITTNOTIFY_NAME(id_destroy_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_create_ex_ptr    0
+#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ex_ptr    0
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, or __itt_null
+ * @param[in] parentid The parent of this task, or __itt_null
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup clockdomain
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ */
+void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_ex,        (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex,     (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end_ex,          (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp))
+#define __itt_task_begin_ex(d,x,y,z,a,b)      ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b)
+#define __itt_task_begin_ex_ptr               ITTNOTIFY_NAME(task_begin_ex)
+#define __itt_task_begin_fn_ex(d,x,y,z,a,b)   ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b)
+#define __itt_task_begin_fn_ex_ptr            ITTNOTIFY_NAME(task_begin_fn_ex)
+#define __itt_task_end_ex(d,x,y)              ITTNOTIFY_VOID_D2(task_end_ex,d,x,y)
+#define __itt_task_end_ex_ptr                 ITTNOTIFY_NAME(task_end_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name)
+#define __itt_task_begin_ex_ptr          0
+#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn)
+#define __itt_task_begin_fn_ex_ptr       0
+#define __itt_task_end_ex(domain,clock_domain,timestamp)
+#define __itt_task_end_ex_ptr            0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ex_ptr          0
+#define __itt_task_begin_fn_ex_ptr       0
+#define __itt_task_end_ex_ptr            0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance.
+ * @param[in] domain The domain for this marker
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The instance ID for this marker, or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker_ex,    (const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker_ex(d,x,y,z,a,b)    ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b)
+#define __itt_marker_ex_ptr             ITTNOTIFY_NAME(marker_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope)
+#define __itt_marker_ex_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ex_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add_ex,            (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a)
+#define __itt_relation_add_to_current_ex_ptr        ITTNOTIFY_NAME(relation_add_to_current_ex)
+#define __itt_relation_add_ex(d,x,y,z,a,b)          ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
+#define __itt_relation_add_ex_ptr                   ITTNOTIFY_NAME(relation_add_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum ___itt_track_group_type
+{
+    __itt_track_group_type_normal = 0
+} __itt_track_group_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track_group
+{
+    __itt_string_handle* name;     /*!< Name of the track group */
+    struct ___itt_track* track;    /*!< List of child tracks    */
+    __itt_track_group_type tgtype; /*!< Type of the track group */
+    int   extra1;                  /*!< Reserved. Must be zero  */
+    void* extra2;                  /*!< Reserved. Must be zero  */
+    struct ___itt_track_group* next;
+} __itt_track_group;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Placeholder for custom track types. Currently, "normal" custom track
+ * is the only available track type.
+ */
+typedef enum ___itt_track_type
+{
+    __itt_track_type_normal = 0
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+    , __itt_track_type_queue
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+} __itt_track_type;
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track
+{
+    __itt_string_handle* name; /*!< Name of the track group */
+    __itt_track_group* group;  /*!< Parent group to a track */
+    __itt_track_type ttype;    /*!< Type of the track       */
+    int   extra1;              /*!< Reserved. Must be zero  */
+    void* extra2;              /*!< Reserved. Must be zero  */
+    struct ___itt_track* next;
+} __itt_track;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create logical track group.
+ */
+__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type))
+#define __itt_track_group_create     ITTNOTIFY_DATA(track_group_create)
+#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_group_create(name)  (__itt_track_group*)0
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create logical track.
+ */
+__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type))
+#define __itt_track_create     ITTNOTIFY_DATA(track_create)
+#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_create(track_group,name,track_type)  (__itt_track*)0
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the logical track.
+ */
+void ITTAPI __itt_set_track(__itt_track* track);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track))
+#define __itt_set_track     ITTNOTIFY_VOID(set_track)
+#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_set_track(track)
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/* ========================================================================== */
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup events Events
+ * @ingroup public
+ * Events group
+ * @{
+ */
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char    *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_event_create     __itt_event_createW
+#  define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+#  define __itt_event_create     __itt_event_createA
+#  define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char    *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA     ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW     ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create      ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr  ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen)  (__itt_event)0
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start     ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end     ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types) 
+ */
 typedef enum 
-{ 
-    __itt_relation_is_unknown = 0, 
-    __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */ 
-    __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */ 
-    __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */ 
-    __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */ 
-    __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ 
-    __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ 
-    __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ 
-} __itt_relation; 
- 
-/** 
- * @ingroup relations 
- * @brief Add a relation to the current task instance. 
- * The current task instance is the head of the relation. 
- * @param[in] domain The domain controlling this call 
- * @param[in] relation The kind of relation 
- * @param[in] tail The ID for the tail of the relation 
- */ 
-void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); 
- 
-/** 
- * @ingroup relations 
- * @brief Add a relation between two instance identifiers. 
- * @param[in] domain The domain controlling this call 
- * @param[in] head The ID for the head of the relation 
- * @param[in] relation The kind of relation 
- * @param[in] tail The ID for the tail of the relation 
- */ 
-void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) 
-ITT_STUBV(ITTAPI, void, relation_add,            (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) 
-#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) 
-#define __itt_relation_add_to_current_ptr    ITTNOTIFY_NAME(relation_add_to_current) 
-#define __itt_relation_add(d,x,y,z)          ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) 
-#define __itt_relation_add_ptr               ITTNOTIFY_NAME(relation_add) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_relation_add_to_current(d,x,y) 
-#define __itt_relation_add_to_current_ptr 0 
-#define __itt_relation_add(d,x,y,z) 
-#define __itt_relation_add_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_relation_add_to_current_ptr 0 
-#define __itt_relation_add_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} relations group */ 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_clock_info 
-{ 
-    unsigned long long clock_freq; /*!< Clock domain frequency */ 
-    unsigned long long clock_base; /*!< Clock domain base timestamp */ 
-} __itt_clock_info; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-/** @cond exclude_from_documentation */ 
-typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); 
-/** @endcond */ 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_clock_domain 
-{ 
-    __itt_clock_info info;      /*!< Most recent clock domain info */ 
-    __itt_get_clock_info_fn fn; /*!< Callback function pointer */ 
-    void* fn_data;              /*!< Input argument for the callback function */ 
-    int   extra1;               /*!< Reserved. Must be zero */ 
-    void* extra2;               /*!< Reserved. Must be zero */ 
-    struct ___itt_clock_domain* next; 
-} __itt_clock_domain; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-/** 
- * @ingroup clockdomains 
- * @brief Create a clock domain. 
- * Certain applications require the capability to trace their application using 
- * a clock domain different than the CPU, for instance the instrumentation of events 
- * that occur on a GPU. 
- * Because the set of domains is expected to be static over the application's execution time, 
- * there is no mechanism to destroy a domain. 
- * Any domain can be accessed by any thread in the process, regardless of which thread created 
- * the domain. This call is thread-safe. 
- * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps 
- * @param[in] fn_data Argument for a callback function; may be NULL 
- */ 
-__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) 
-#define __itt_clock_domain_create     ITTNOTIFY_DATA(clock_domain_create) 
-#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 
-#define __itt_clock_domain_create_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_clock_domain_create_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup clockdomains 
- * @brief Recalculate clock domains frequences and clock base timestamps. 
- */ 
-void ITTAPI __itt_clock_domain_reset(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) 
-#define __itt_clock_domain_reset     ITTNOTIFY_VOID(clock_domain_reset) 
-#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_clock_domain_reset() 
-#define __itt_clock_domain_reset_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_clock_domain_reset_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Create an instance of identifier. This establishes the beginning of the lifetime of 
- * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to 
- * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among 
- * identified named entity instances, using the \ref relations APIs. 
- * @param[in] domain The domain controlling the execution of this call. 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] id The ID to create. 
- */ 
-void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the 
- * given ID value in the trace. Any relationships that are established after this lifetime ends are 
- * invalid. This call must be performed before the given ID value can be reused for a different 
- * named entity instance. 
- * @param[in] domain The domain controlling the execution of this call. 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] id The ID to destroy. 
- */ 
-void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, id_create_ex,  (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) 
-ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) 
-#define __itt_id_create_ex(d,x,y,z)  ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) 
-#define __itt_id_create_ex_ptr       ITTNOTIFY_NAME(id_create_ex) 
-#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) 
-#define __itt_id_destroy_ex_ptr      ITTNOTIFY_NAME(id_destroy_ex) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_id_create_ex(domain,clock_domain,timestamp,id) 
-#define __itt_id_create_ex_ptr    0 
-#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) 
-#define __itt_id_destroy_ex_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_id_create_ex_ptr    0 
-#define __itt_id_destroy_ex_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Begin a task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] taskid The instance ID for this task instance, or __itt_null 
- * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null 
- * @param[in] name The name of this task 
- */ 
-void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Begin a task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] taskid The identifier for this task instance, or __itt_null 
- * @param[in] parentid The parent of this task, or __itt_null 
- * @param[in] fn The pointer to the function you are tracing 
- */ 
-void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); 
- 
-/** 
- * @ingroup clockdomain 
- * @brief End the current task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- */ 
-void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, task_begin_ex,        (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) 
-ITT_STUBV(ITTAPI, void, task_begin_fn_ex,     (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) 
-ITT_STUBV(ITTAPI, void, task_end_ex,          (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) 
-#define __itt_task_begin_ex(d,x,y,z,a,b)      ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) 
-#define __itt_task_begin_ex_ptr               ITTNOTIFY_NAME(task_begin_ex) 
-#define __itt_task_begin_fn_ex(d,x,y,z,a,b)   ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) 
-#define __itt_task_begin_fn_ex_ptr            ITTNOTIFY_NAME(task_begin_fn_ex) 
-#define __itt_task_end_ex(d,x,y)              ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) 
-#define __itt_task_end_ex_ptr                 ITTNOTIFY_NAME(task_end_ex) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) 
-#define __itt_task_begin_ex_ptr          0 
-#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) 
-#define __itt_task_begin_fn_ex_ptr       0 
-#define __itt_task_end_ex(domain,clock_domain,timestamp) 
-#define __itt_task_end_ex_ptr            0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_task_begin_ex_ptr          0 
-#define __itt_task_begin_fn_ex_ptr       0 
-#define __itt_task_end_ex_ptr            0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup markers 
- * @brief Create a marker instance. 
- * @param[in] domain The domain for this marker 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] id The instance ID for this marker, or __itt_null 
- * @param[in] name The name for this marker 
- * @param[in] scope The scope for this marker 
- */ 
-void ITTAPI __itt_marker_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, marker_ex,    (const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) 
-#define __itt_marker_ex(d,x,y,z,a,b)    ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) 
-#define __itt_marker_ex_ptr             ITTNOTIFY_NAME(marker_ex) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) 
-#define __itt_marker_ex_ptr    0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_marker_ex_ptr    0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Add a relation to the current task instance. 
- * The current task instance is the head of the relation. 
- * @param[in] domain The domain controlling this call 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] relation The kind of relation 
- * @param[in] tail The ID for the tail of the relation 
- */ 
-void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Add a relation between two instance identifiers. 
- * @param[in] domain The domain controlling this call 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] head The ID for the head of the relation 
- * @param[in] relation The kind of relation 
- * @param[in] tail The ID for the tail of the relation 
- */ 
-void ITTAPI __itt_relation_add_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) 
-ITT_STUBV(ITTAPI, void, relation_add_ex,            (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) 
-#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) 
-#define __itt_relation_add_to_current_ex_ptr        ITTNOTIFY_NAME(relation_add_to_current_ex) 
-#define __itt_relation_add_ex(d,x,y,z,a,b)          ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) 
-#define __itt_relation_add_ex_ptr                   ITTNOTIFY_NAME(relation_add_ex) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) 
-#define __itt_relation_add_to_current_ex_ptr 0 
-#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) 
-#define __itt_relation_add_ex_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_relation_add_to_current_ex_ptr 0 
-#define __itt_relation_add_ex_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @cond exclude_from_documentation */ 
-typedef enum ___itt_track_group_type 
-{ 
-    __itt_track_group_type_normal = 0 
-} __itt_track_group_type; 
-/** @endcond */ 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_track_group 
-{ 
-    __itt_string_handle* name;     /*!< Name of the track group */ 
-    struct ___itt_track* track;    /*!< List of child tracks    */ 
-    __itt_track_group_type tgtype; /*!< Type of the track group */ 
-    int   extra1;                  /*!< Reserved. Must be zero  */ 
-    void* extra2;                  /*!< Reserved. Must be zero  */ 
-    struct ___itt_track_group* next; 
-} __itt_track_group; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-/** 
- * @brief Placeholder for custom track types. Currently, "normal" custom track 
- * is the only available track type. 
- */ 
-typedef enum ___itt_track_type 
-{ 
-    __itt_track_type_normal = 0 
-#ifdef INTEL_ITTNOTIFY_API_PRIVATE 
-    , __itt_track_type_queue 
-#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ 
-} __itt_track_type; 
- 
-/** @cond exclude_from_documentation */ 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_track 
-{ 
-    __itt_string_handle* name; /*!< Name of the track group */ 
-    __itt_track_group* group;  /*!< Parent group to a track */ 
-    __itt_track_type ttype;    /*!< Type of the track       */ 
-    int   extra1;              /*!< Reserved. Must be zero  */ 
-    void* extra2;              /*!< Reserved. Must be zero  */ 
-    struct ___itt_track* next; 
-} __itt_track; 
- 
-#pragma pack(pop) 
-/** @endcond */ 
- 
-/** 
- * @brief Create logical track group. 
- */ 
-__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) 
-#define __itt_track_group_create     ITTNOTIFY_DATA(track_group_create) 
-#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_track_group_create(name)  (__itt_track_group*)0 
-#define __itt_track_group_create_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_track_group_create_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Create logical track. 
- */ 
-__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) 
-#define __itt_track_create     ITTNOTIFY_DATA(track_create) 
-#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_track_create(track_group,name,track_type)  (__itt_track*)0 
-#define __itt_track_create_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_track_create_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Set the logical track. 
- */ 
-void ITTAPI __itt_set_track(__itt_track* track); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) 
-#define __itt_set_track     ITTNOTIFY_VOID(set_track) 
-#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_set_track(track) 
-#define __itt_set_track_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_set_track_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/* ========================================================================== */ 
-/** @cond exclude_from_gpa_documentation */ 
-/** 
- * @defgroup events Events 
- * @ingroup public 
- * Events group 
- * @{ 
- */ 
-/** @brief user event type */ 
-typedef int __itt_event; 
- 
-/** 
- * @brief Create an event notification 
- * @note name or namelen being null/name and namelen not matching, user event feature not enabled 
- * @return non-zero event identifier upon success and __itt_err otherwise 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_event LIBITTAPI __itt_event_createA(const char    *name, int namelen); 
-__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_event_create     __itt_event_createW 
-#  define __itt_event_create_ptr __itt_event_createW_ptr 
-#else 
-#  define __itt_event_create     __itt_event_createA 
-#  define __itt_event_create_ptr __itt_event_createA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen)) 
-ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char    *name, int namelen)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_event_createA     ITTNOTIFY_DATA(event_createA) 
-#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) 
-#define __itt_event_createW     ITTNOTIFY_DATA(event_createW) 
-#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_event_create      ITTNOTIFY_DATA(event_create) 
-#define __itt_event_create_ptr  ITTNOTIFY_NAME(event_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_event_createA(name, namelen) (__itt_event)0 
-#define __itt_event_createA_ptr 0 
-#define __itt_event_createW(name, namelen) (__itt_event)0 
-#define __itt_event_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_event_create(name, namelen)  (__itt_event)0 
-#define __itt_event_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_event_createA_ptr 0 
-#define __itt_event_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_event_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an event occurrence. 
- * @return __itt_err upon failure (invalid event id/user event feature not enabled) 
- */ 
-int LIBITTAPI __itt_event_start(__itt_event event); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) 
-#define __itt_event_start     ITTNOTIFY_DATA(event_start) 
-#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_event_start(event) (int)0 
-#define __itt_event_start_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_event_start_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an event end occurrence. 
- * @note It is optional if events do not have durations. 
- * @return __itt_err upon failure (invalid event id/user event feature not enabled) 
- */ 
-int LIBITTAPI __itt_event_end(__itt_event event); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) 
-#define __itt_event_end     ITTNOTIFY_DATA(event_end) 
-#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_event_end(event) (int)0 
-#define __itt_event_end_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_event_end_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} events group */ 
- 
- 
-/** 
- * @defgroup arrays Arrays Visualizer 
- * @ingroup public 
- * Visualize arrays 
- * @{ 
- */ 
- 
-/** 
- * @enum __itt_av_data_type 
- * @brief Defines types of arrays data (for C/C++ intrinsic types)  
- */ 
-typedef enum  
-{ 
-    __itt_e_first = 0, 
-    __itt_e_char = 0,  /* 1-byte integer */ 
-    __itt_e_uchar,     /* 1-byte unsigned integer */ 
-    __itt_e_int16,     /* 2-byte integer */ 
-    __itt_e_uint16,    /* 2-byte unsigned integer  */ 
-    __itt_e_int32,     /* 4-byte integer */ 
-    __itt_e_uint32,    /* 4-byte unsigned integer */ 
-    __itt_e_int64,     /* 8-byte integer */ 
-    __itt_e_uint64,    /* 8-byte unsigned integer */ 
-    __itt_e_float,     /* 4-byte floating */ 
-    __itt_e_double,    /* 8-byte floating */ 
-    __itt_e_last = __itt_e_double 
-} __itt_av_data_type; 
- 
-/** 
- * @brief Save an array data to a file. 
- * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). 
- * @param[in] data - pointer to the array data 
- * @param[in] rank - the rank of the array  
- * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.  
- * The size of dimensions must be equal to the rank 
- * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) 
- * @param[in] filePath - the file path; the output format is defined by the file extension 
- * @param[in] columnOrder - defines how the array is stored in the linear memory. 
- * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). 
- */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); 
-int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_av_save     __itt_av_saveW 
-#  define __itt_av_save_ptr __itt_av_saveW_ptr 
-#else /* UNICODE */ 
-#  define __itt_av_save     __itt_av_saveA 
-#  define __itt_av_save_ptr __itt_av_saveA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) 
-ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_av_saveA     ITTNOTIFY_DATA(av_saveA) 
-#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) 
-#define __itt_av_saveW     ITTNOTIFY_DATA(av_saveW) 
-#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_av_save     ITTNOTIFY_DATA(av_save) 
-#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_av_saveA(name) 
-#define __itt_av_saveA_ptr 0 
-#define __itt_av_saveW(name) 
-#define __itt_av_saveW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_av_save(name) 
-#define __itt_av_save_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_av_saveA_ptr 0 
-#define __itt_av_saveW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_av_save_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-void ITTAPI __itt_enable_attach(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, enable_attach, (void)) 
-#define __itt_enable_attach     ITTNOTIFY_VOID(enable_attach) 
-#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_enable_attach() 
-#define __itt_enable_attach_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_enable_attach_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @cond exclude_from_gpa_documentation */ 
- 
-/** @} arrays group */ 
- 
-/** @endcond */ 
- 
- 
-#ifdef __cplusplus 
-} 
-#endif /* __cplusplus */ 
- 
-#endif /* _ITTNOTIFY_H_ */ 
- 
-#ifdef INTEL_ITTNOTIFY_API_PRIVATE 
- 
-#ifndef _ITTNOTIFY_PRIVATE_ 
-#define _ITTNOTIFY_PRIVATE_ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif /* __cplusplus */ 
- 
-/** 
- * @ingroup tasks 
- * @brief Begin an overlapped task instance. 
- * @param[in] domain The domain for this task. 
- * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. 
- * @param[in] parentid The parent of this task, or __itt_null. 
- * @param[in] name The name of this task. 
- */ 
-void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); 
- 
-/** 
- * @ingroup clockdomain 
- * @brief Begin an overlapped task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. 
- * @param[in] parentid The parent of this task, or __itt_null. 
- * @param[in] name The name of this task. 
- */ 
-void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); 
- 
-/** 
- * @ingroup tasks 
- * @brief End an overlapped task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] taskid Explicit ID of finished task 
- */ 
-void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); 
- 
-/** 
- * @ingroup clockdomain 
- * @brief End an overlapped task instance. 
- * @param[in] domain The domain for this task 
- * @param[in] clock_domain The clock domain controlling the execution of this call. 
- * @param[in] timestamp The user defined timestamp. 
- * @param[in] taskid Explicit ID of finished task 
- */ 
-void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, task_begin_overlapped,          (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) 
-ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex,       (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) 
-ITT_STUBV(ITTAPI, void, task_end_overlapped,            (const __itt_domain *domain, __itt_id taskid)) 
-ITT_STUBV(ITTAPI, void, task_end_overlapped_ex,         (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) 
-#define __itt_task_begin_overlapped(d,x,y,z)            ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) 
-#define __itt_task_begin_overlapped_ptr                 ITTNOTIFY_NAME(task_begin_overlapped) 
-#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b)     ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) 
-#define __itt_task_begin_overlapped_ex_ptr              ITTNOTIFY_NAME(task_begin_overlapped_ex) 
-#define __itt_task_end_overlapped(d,x)                  ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) 
-#define __itt_task_end_overlapped_ptr                   ITTNOTIFY_NAME(task_end_overlapped) 
-#define __itt_task_end_overlapped_ex(d,x,y,z)           ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) 
-#define __itt_task_end_overlapped_ex_ptr                ITTNOTIFY_NAME(task_end_overlapped_ex) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_task_begin_overlapped(domain,taskid,parentid,name) 
-#define __itt_task_begin_overlapped_ptr         0 
-#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) 
-#define __itt_task_begin_overlapped_ex_ptr      0 
-#define __itt_task_end_overlapped(domain,taskid) 
-#define __itt_task_end_overlapped_ptr           0 
-#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) 
-#define __itt_task_end_overlapped_ex_ptr        0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_task_begin_overlapped_ptr         0 
-#define __itt_task_begin_overlapped_ex_ptr      0 
-#define __itt_task_end_overlapped_ptr           0 
-#define __itt_task_end_overlapped_ex_ptr        0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @defgroup makrs_internal Marks 
- * @ingroup internal 
- * Marks group 
- * @warning Internal API: 
- *   - It is not shipped to outside of Intel 
- *   - It is delivered to internal Intel teams using e-mail or SVN access only 
- * @{ 
- */ 
-/** @brief user mark type */ 
-typedef int __itt_mark_type; 
- 
-/** 
- * @brief Creates a user mark type with the specified name using char or Unicode string. 
- * @param[in] name - name of mark to create 
- * @return Returns a handle to the mark type 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_mark_type ITTAPI __itt_mark_createA(const char    *name); 
-__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_mark_create     __itt_mark_createW 
-#  define __itt_mark_create_ptr __itt_mark_createW_ptr 
-#else /* UNICODE */ 
-#  define __itt_mark_create     __itt_mark_createA 
-#  define __itt_mark_create_ptr __itt_mark_createA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_mark_type ITTAPI __itt_mark_create(const char *name); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char    *name)) 
-ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_mark_type, mark_create,  (const char *name)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_mark_createA     ITTNOTIFY_DATA(mark_createA) 
-#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) 
-#define __itt_mark_createW     ITTNOTIFY_DATA(mark_createW) 
-#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_create      ITTNOTIFY_DATA(mark_create) 
-#define __itt_mark_create_ptr  ITTNOTIFY_NAME(mark_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_mark_createA(name) (__itt_mark_type)0 
-#define __itt_mark_createA_ptr 0 
-#define __itt_mark_createW(name) (__itt_mark_type)0 
-#define __itt_mark_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_create(name)  (__itt_mark_type)0 
-#define __itt_mark_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_mark_createA_ptr 0 
-#define __itt_mark_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. 
- * 
- * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. 
- * - The call is "synchronous" - function returns after mark is actually added to results. 
- * - This function is useful, for example, to mark different phases of application 
- *   (beginning of the next mark automatically meand end of current region). 
- * - Can be used together with "continuous" marks (see below) at the same collection session 
- * @param[in] mt - mark, created by __itt_mark_create(const char* name) function 
- * @param[in] parameter - string parameter of mark 
- * @return Returns zero value in case of success, non-zero value otherwise. 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-int ITTAPI __itt_markA(__itt_mark_type mt, const char    *parameter); 
-int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_mark     __itt_markW 
-#  define __itt_mark_ptr __itt_markW_ptr 
-#else /* UNICODE  */ 
-#  define __itt_mark     __itt_markA 
-#  define __itt_mark_ptr __itt_markA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char    *parameter)) 
-ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int, mark,  (__itt_mark_type mt, const char *parameter)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_markA     ITTNOTIFY_DATA(markA) 
-#define __itt_markA_ptr ITTNOTIFY_NAME(markA) 
-#define __itt_markW     ITTNOTIFY_DATA(markW) 
-#define __itt_markW_ptr ITTNOTIFY_NAME(markW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark      ITTNOTIFY_DATA(mark) 
-#define __itt_mark_ptr  ITTNOTIFY_NAME(mark) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_markA(mt, parameter) (int)0 
-#define __itt_markA_ptr 0 
-#define __itt_markW(mt, parameter) (int)0 
-#define __itt_markW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark(mt, parameter)  (int)0 
-#define __itt_mark_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_markA_ptr 0 
-#define __itt_markW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Use this if necessary to create a "discrete" user event type (mark) for process 
- * rather then for one thread 
- * @see int __itt_mark(__itt_mark_type mt, const char* parameter); 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char    *parameter); 
-int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_mark_global     __itt_mark_globalW 
-#  define __itt_mark_global_ptr __itt_mark_globalW_ptr 
-#else /* UNICODE  */ 
-#  define __itt_mark_global     __itt_mark_globalA 
-#  define __itt_mark_global_ptr __itt_mark_globalA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char    *parameter)) 
-ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int, mark_global,  (__itt_mark_type mt, const char *parameter)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_mark_globalA     ITTNOTIFY_DATA(mark_globalA) 
-#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) 
-#define __itt_mark_globalW     ITTNOTIFY_DATA(mark_globalW) 
-#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_global      ITTNOTIFY_DATA(mark_global) 
-#define __itt_mark_global_ptr  ITTNOTIFY_NAME(mark_global) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_mark_globalA(mt, parameter) (int)0 
-#define __itt_mark_globalA_ptr 0 
-#define __itt_mark_globalW(mt, parameter) (int)0 
-#define __itt_mark_globalW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_global(mt, parameter)  (int)0 
-#define __itt_mark_global_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_mark_globalA_ptr 0 
-#define __itt_mark_globalW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_mark_global_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Creates an "end" point for "continuous" mark with specified name. 
- * 
- * - Returns zero value in case of success, non-zero value otherwise. 
- *   Also returns non-zero value when preceding "begin" point for the 
- *   mark with the same name failed to be created or not created. 
- * - The mark of "continuous" type is placed to collection results in 
- *   case of success. It appears in overtime view(s) as a special tick 
- *   sign (different from "discrete" mark) together with line from 
- *   corresponding "begin" mark to "end" mark. 
- * @note Continuous marks can overlap and be nested inside each other. 
- * Discrete mark can be nested inside marked region 
- * @param[in] mt - mark, created by __itt_mark_create(const char* name) function 
- * @return Returns zero value in case of success, non-zero value otherwise. 
- */ 
-int ITTAPI __itt_mark_off(__itt_mark_type mt); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) 
-#define __itt_mark_off     ITTNOTIFY_DATA(mark_off) 
-#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_mark_off(mt) (int)0 
-#define __itt_mark_off_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_mark_off_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Use this if necessary to create an "end" point for mark of process 
- * @see int __itt_mark_off(__itt_mark_type mt); 
- */ 
-int ITTAPI __itt_mark_global_off(__itt_mark_type mt); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) 
-#define __itt_mark_global_off     ITTNOTIFY_DATA(mark_global_off) 
-#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_mark_global_off(mt) (int)0 
-#define __itt_mark_global_off_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_mark_global_off_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} marks group */ 
- 
-/** 
- * @defgroup counters_internal Counters 
- * @ingroup internal 
- * Counters group 
- * @{ 
- */ 
-/** 
- * @brief opaque structure for counter identification 
- */ 
-typedef struct ___itt_counter *__itt_counter; 
- 
-/** 
- * @brief Create a counter with given name/domain for the calling thread 
- * 
- * After __itt_counter_create() is called, __itt_counter_inc() / __itt_counter_inc_delta() can be used 
- * to increment the counter on any thread 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_counter ITTAPI __itt_counter_createA(const char    *name, const char    *domain); 
-__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_counter_create     __itt_counter_createW 
-#  define __itt_counter_create_ptr __itt_counter_createW_ptr 
-#else /* UNICODE */ 
-#  define __itt_counter_create     __itt_counter_createA 
-#  define __itt_counter_create_ptr __itt_counter_createA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char    *name, const char    *domain)) 
-ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_counter, counter_create,  (const char *name, const char *domain)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_counter_createA     ITTNOTIFY_DATA(counter_createA) 
-#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) 
-#define __itt_counter_createW     ITTNOTIFY_DATA(counter_createW) 
-#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_counter_create     ITTNOTIFY_DATA(counter_create) 
-#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_counter_createA(name, domain) 
-#define __itt_counter_createA_ptr 0 
-#define __itt_counter_createW(name, domain) 
-#define __itt_counter_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_counter_create(name, domain) 
-#define __itt_counter_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_counter_createA_ptr 0 
-#define __itt_counter_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_counter_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() 
- */ 
-void ITTAPI __itt_counter_destroy(__itt_counter id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) 
-#define __itt_counter_destroy     ITTNOTIFY_VOID(counter_destroy) 
-#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_counter_destroy(id) 
-#define __itt_counter_destroy_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_counter_destroy_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Increment the counter value 
- */ 
-void ITTAPI __itt_counter_inc(__itt_counter id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) 
-#define __itt_counter_inc     ITTNOTIFY_VOID(counter_inc) 
-#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_counter_inc(id) 
-#define __itt_counter_inc_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_counter_inc_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Increment the counter value with x 
- */ 
-void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) 
-#define __itt_counter_inc_delta     ITTNOTIFY_VOID(counter_inc_delta) 
-#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_counter_inc_delta(id, value) 
-#define __itt_counter_inc_delta_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_counter_inc_delta_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} counters group */ 
- 
-/** 
- * @defgroup stitch Stack Stitching 
- * @ingroup internal 
- * Stack Stitching group 
- * @{ 
- */ 
-/** 
- * @brief opaque structure for counter identification 
- */ 
-typedef struct ___itt_caller *__itt_caller; 
- 
-/** 
- * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. 
- * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. 
- */ 
-__itt_caller ITTAPI __itt_stack_caller_create(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) 
-#define __itt_stack_caller_create     ITTNOTIFY_DATA(stack_caller_create) 
-#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_stack_caller_create() (__itt_caller)0 
-#define __itt_stack_caller_create_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_stack_caller_create_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() 
- */ 
-void ITTAPI __itt_stack_caller_destroy(__itt_caller id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) 
-#define __itt_stack_caller_destroy     ITTNOTIFY_VOID(stack_caller_destroy) 
-#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_stack_caller_destroy(id) 
-#define __itt_stack_caller_destroy_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_stack_caller_destroy_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Sets the cut point. Stack from each event which occurs after this call will be cut 
- * at the same stack level the function was called and stitched to the corresponding stitch point. 
- */ 
-void ITTAPI __itt_stack_callee_enter(__itt_caller id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) 
-#define __itt_stack_callee_enter     ITTNOTIFY_VOID(stack_callee_enter) 
-#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_stack_callee_enter(id) 
-#define __itt_stack_callee_enter_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_stack_callee_enter_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). 
- */ 
-void ITTAPI __itt_stack_callee_leave(__itt_caller id); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) 
-#define __itt_stack_callee_leave     ITTNOTIFY_VOID(stack_callee_leave) 
-#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_stack_callee_leave(id) 
-#define __itt_stack_callee_leave_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_stack_callee_leave_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @} stitch group */ 
- 
-/* ***************************************************************************************************************************** */ 
- 
-#include <stdarg.h> 
- 
-/** @cond exclude_from_documentation */ 
-typedef enum __itt_error_code 
-{ 
-    __itt_error_success       = 0, /*!< no error */ 
-    __itt_error_no_module     = 1, /*!< module can't be loaded */ 
-    /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ 
-    __itt_error_no_symbol     = 2, /*!< symbol not found */ 
-    /* %1$s -- library name, %2$s -- symbol name. */ 
-    __itt_error_unknown_group = 3, /*!< unknown group specified */ 
-    /* %1$s -- env var name, %2$s -- group name. */ 
-    __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ 
-    /* %1$s -- env var name, %2$d -- system error. */ 
-    __itt_error_env_too_long  = 5, /*!< variable value too long */ 
-    /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ 
-    __itt_error_system        = 6  /*!< pthread_mutexattr_init or pthread_mutex_init failed */ 
-    /* %1$s -- function name, %2$d -- errno. */ 
-} __itt_error_code; 
- 
-typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); 
-__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); 
- 
-const char* ITTAPI __itt_api_version(void); 
-/** @endcond */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) 
-void __itt_error_handler(__itt_error_code code, va_list args); 
-extern const int ITTNOTIFY_NAME(err); 
-#define __itt_err ITTNOTIFY_NAME(err) 
-ITT_STUB(ITTAPI, const char*, api_version, (void)) 
-#define __itt_api_version     ITTNOTIFY_DATA(api_version) 
-#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_api_version()   (const char*)0 
-#define __itt_api_version_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_api_version_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-#ifdef __cplusplus 
-} 
-#endif /* __cplusplus */ 
- 
-#endif /* _ITTNOTIFY_PRIVATE_ */ 
- 
-#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ 
+{
+    __itt_e_first = 0,
+    __itt_e_char = 0,  /* 1-byte integer */
+    __itt_e_uchar,     /* 1-byte unsigned integer */
+    __itt_e_int16,     /* 2-byte integer */
+    __itt_e_uint16,    /* 2-byte unsigned integer  */
+    __itt_e_int32,     /* 4-byte integer */
+    __itt_e_uint32,    /* 4-byte unsigned integer */
+    __itt_e_int64,     /* 8-byte integer */
+    __itt_e_uint64,    /* 8-byte unsigned integer */
+    __itt_e_float,     /* 4-byte floating */
+    __itt_e_double,    /* 8-byte floating */
+    __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array 
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. 
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_av_save     __itt_av_saveW
+#  define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+#  define __itt_av_save     __itt_av_saveA
+#  define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA     ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW     ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save     ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach     ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_H_ */
+
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+
+#ifndef _ITTNOTIFY_PRIVATE_
+#define _ITTNOTIFY_PRIVATE_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @ingroup tasks
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup tasks
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid);
+
+/**
+ * @ingroup clockdomain
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_overlapped,          (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex,       (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped,            (const __itt_domain *domain, __itt_id taskid))
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex,         (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid))
+#define __itt_task_begin_overlapped(d,x,y,z)            ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z)
+#define __itt_task_begin_overlapped_ptr                 ITTNOTIFY_NAME(task_begin_overlapped)
+#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b)     ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b)
+#define __itt_task_begin_overlapped_ex_ptr              ITTNOTIFY_NAME(task_begin_overlapped_ex)
+#define __itt_task_end_overlapped(d,x)                  ITTNOTIFY_VOID_D1(task_end_overlapped,d,x)
+#define __itt_task_end_overlapped_ptr                   ITTNOTIFY_NAME(task_end_overlapped)
+#define __itt_task_end_overlapped_ex(d,x,y,z)           ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z)
+#define __itt_task_end_overlapped_ex_ptr                ITTNOTIFY_NAME(task_end_overlapped_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_overlapped(domain,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ptr         0
+#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ex_ptr      0
+#define __itt_task_end_overlapped(domain,taskid)
+#define __itt_task_end_overlapped_ptr           0
+#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid)
+#define __itt_task_end_overlapped_ex_ptr        0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_overlapped_ptr         0
+#define __itt_task_begin_overlapped_ex_ptr      0
+#define __itt_task_end_overlapped_ptr           0
+#define __itt_task_end_overlapped_ex_ptr        0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup makrs_internal Marks
+ * @ingroup internal
+ * Marks group
+ * @warning Internal API:
+ *   - It is not shipped to outside of Intel
+ *   - It is delivered to internal Intel teams using e-mail or SVN access only
+ * @{
+ */
+/** @brief user mark type */
+typedef int __itt_mark_type;
+
+/**
+ * @brief Creates a user mark type with the specified name using char or Unicode string.
+ * @param[in] name - name of mark to create
+ * @return Returns a handle to the mark type
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_mark_type ITTAPI __itt_mark_createA(const char    *name);
+__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark_create     __itt_mark_createW
+#  define __itt_mark_create_ptr __itt_mark_createW_ptr
+#else /* UNICODE */
+#  define __itt_mark_create     __itt_mark_createA
+#  define __itt_mark_create_ptr __itt_mark_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_mark_type ITTAPI __itt_mark_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create,  (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA     ITTNOTIFY_DATA(mark_createA)
+#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA)
+#define __itt_mark_createW     ITTNOTIFY_DATA(mark_createW)
+#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create      ITTNOTIFY_DATA(mark_create)
+#define __itt_mark_create_ptr  ITTNOTIFY_NAME(mark_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA(name) (__itt_mark_type)0
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW(name) (__itt_mark_type)0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create(name)  (__itt_mark_type)0
+#define __itt_mark_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string.
+ *
+ * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign.
+ * - The call is "synchronous" - function returns after mark is actually added to results.
+ * - This function is useful, for example, to mark different phases of application
+ *   (beginning of the next mark automatically meand end of current region).
+ * - Can be used together with "continuous" marks (see below) at the same collection session
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @param[in] parameter - string parameter of mark
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_markA(__itt_mark_type mt, const char    *parameter);
+int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark     __itt_markW
+#  define __itt_mark_ptr __itt_markW_ptr
+#else /* UNICODE  */
+#  define __itt_mark     __itt_markA
+#  define __itt_mark_ptr __itt_markA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char    *parameter))
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark,  (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA     ITTNOTIFY_DATA(markA)
+#define __itt_markA_ptr ITTNOTIFY_NAME(markA)
+#define __itt_markW     ITTNOTIFY_DATA(markW)
+#define __itt_markW_ptr ITTNOTIFY_NAME(markW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark      ITTNOTIFY_DATA(mark)
+#define __itt_mark_ptr  ITTNOTIFY_NAME(mark)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA(mt, parameter) (int)0
+#define __itt_markA_ptr 0
+#define __itt_markW(mt, parameter) (int)0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark(mt, parameter)  (int)0
+#define __itt_mark_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA_ptr 0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create a "discrete" user event type (mark) for process
+ * rather then for one thread
+ * @see int __itt_mark(__itt_mark_type mt, const char* parameter);
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char    *parameter);
+int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark_global     __itt_mark_globalW
+#  define __itt_mark_global_ptr __itt_mark_globalW_ptr
+#else /* UNICODE  */
+#  define __itt_mark_global     __itt_mark_globalA
+#  define __itt_mark_global_ptr __itt_mark_globalA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char    *parameter))
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global,  (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA     ITTNOTIFY_DATA(mark_globalA)
+#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA)
+#define __itt_mark_globalW     ITTNOTIFY_DATA(mark_globalW)
+#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global      ITTNOTIFY_DATA(mark_global)
+#define __itt_mark_global_ptr  ITTNOTIFY_NAME(mark_global)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA(mt, parameter) (int)0
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW(mt, parameter) (int)0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global(mt, parameter)  (int)0
+#define __itt_mark_global_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates an "end" point for "continuous" mark with specified name.
+ *
+ * - Returns zero value in case of success, non-zero value otherwise.
+ *   Also returns non-zero value when preceding "begin" point for the
+ *   mark with the same name failed to be created or not created.
+ * - The mark of "continuous" type is placed to collection results in
+ *   case of success. It appears in overtime view(s) as a special tick
+ *   sign (different from "discrete" mark) together with line from
+ *   corresponding "begin" mark to "end" mark.
+ * @note Continuous marks can overlap and be nested inside each other.
+ * Discrete mark can be nested inside marked region
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+int ITTAPI __itt_mark_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt))
+#define __itt_mark_off     ITTNOTIFY_DATA(mark_off)
+#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_off(mt) (int)0
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create an "end" point for mark of process
+ * @see int __itt_mark_off(__itt_mark_type mt);
+ */
+int ITTAPI __itt_mark_global_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt))
+#define __itt_mark_global_off     ITTNOTIFY_DATA(mark_global_off)
+#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_global_off(mt) (int)0
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} marks group */
+
+/**
+ * @defgroup counters_internal Counters
+ * @ingroup internal
+ * Counters group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_counter *__itt_counter;
+
+/**
+ * @brief Create a counter with given name/domain for the calling thread
+ *
+ * After __itt_counter_create() is called, __itt_counter_inc() / __itt_counter_inc_delta() can be used
+ * to increment the counter on any thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA(const char    *name, const char    *domain);
+__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_counter_create     __itt_counter_createW
+#  define __itt_counter_create_ptr __itt_counter_createW_ptr
+#else /* UNICODE */
+#  define __itt_counter_create     __itt_counter_createA
+#  define __itt_counter_create_ptr __itt_counter_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char    *name, const char    *domain))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create,  (const char *name, const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA     ITTNOTIFY_DATA(counter_createA)
+#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA)
+#define __itt_counter_createW     ITTNOTIFY_DATA(counter_createW)
+#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create     ITTNOTIFY_DATA(counter_create)
+#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA(name, domain)
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW(name, domain)
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create(name, domain)
+#define __itt_counter_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create()
+ */
+void ITTAPI __itt_counter_destroy(__itt_counter id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id))
+#define __itt_counter_destroy     ITTNOTIFY_VOID(counter_destroy)
+#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_destroy(id)
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the counter value
+ */
+void ITTAPI __itt_counter_inc(__itt_counter id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id))
+#define __itt_counter_inc     ITTNOTIFY_VOID(counter_inc)
+#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc(id)
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the counter value with x
+ */
+void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_inc_delta     ITTNOTIFY_VOID(counter_inc_delta)
+#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_delta(id, value)
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @defgroup stitch Stack Stitching
+ * @ingroup internal
+ * Stack Stitching group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_caller *__itt_caller;
+
+/**
+ * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to.
+ * The function returns a unique identifier which is used to match the cut points with corresponding stitch points.
+ */
+__itt_caller ITTAPI __itt_stack_caller_create(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
+#define __itt_stack_caller_create     ITTNOTIFY_DATA(stack_caller_create)
+#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_create() (__itt_caller)0
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ */
+void ITTAPI __itt_stack_caller_destroy(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id))
+#define __itt_stack_caller_destroy     ITTNOTIFY_VOID(stack_caller_destroy)
+#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_destroy(id)
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Sets the cut point. Stack from each event which occurs after this call will be cut
+ * at the same stack level the function was called and stitched to the corresponding stitch point.
+ */
+void ITTAPI __itt_stack_callee_enter(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id))
+#define __itt_stack_callee_enter     ITTNOTIFY_VOID(stack_callee_enter)
+#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_enter(id)
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter().
+ */
+void ITTAPI __itt_stack_callee_leave(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id))
+#define __itt_stack_callee_leave     ITTNOTIFY_VOID(stack_callee_leave)
+#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_leave(id)
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} stitch group */
+
+/* ***************************************************************************************************************************** */
+
+#include <stdarg.h>
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_error_code
+{
+    __itt_error_success       = 0, /*!< no error */
+    __itt_error_no_module     = 1, /*!< module can't be loaded */
+    /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */
+    __itt_error_no_symbol     = 2, /*!< symbol not found */
+    /* %1$s -- library name, %2$s -- symbol name. */
+    __itt_error_unknown_group = 3, /*!< unknown group specified */
+    /* %1$s -- env var name, %2$s -- group name. */
+    __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */
+    /* %1$s -- env var name, %2$d -- system error. */
+    __itt_error_env_too_long  = 5, /*!< variable value too long */
+    /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */
+    __itt_error_system        = 6  /*!< pthread_mutexattr_init or pthread_mutex_init failed */
+    /* %1$s -- function name, %2$d -- errno. */
+} __itt_error_code;
+
+typedef void (__itt_error_handler_t)(__itt_error_code code, va_list);
+__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*);
+
+const char* ITTAPI __itt_api_version(void);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler)
+void __itt_error_handler(__itt_error_code code, va_list args);
+extern const int ITTNOTIFY_NAME(err);
+#define __itt_err ITTNOTIFY_NAME(err)
+ITT_STUB(ITTAPI, const char*, api_version, (void))
+#define __itt_api_version     ITTNOTIFY_DATA(api_version)
+#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_api_version()   (const char*)0
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_PRIVATE_ */
+
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h
index 3a2aee76cf..710bbe9226 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_config.h
@@ -1,478 +1,478 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef _ITTNOTIFY_CONFIG_H_ 
-#define _ITTNOTIFY_CONFIG_H_ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef ITT_OS_WIN 
-#  define ITT_OS_WIN   1 
-#endif /* ITT_OS_WIN */ 
- 
-#ifndef ITT_OS_LINUX 
-#  define ITT_OS_LINUX 2 
-#endif /* ITT_OS_LINUX */ 
- 
-#ifndef ITT_OS_MAC 
-#  define ITT_OS_MAC   3 
-#endif /* ITT_OS_MAC */ 
- 
-#ifndef ITT_OS 
-#  if defined WIN32 || defined _WIN32 
-#    define ITT_OS ITT_OS_WIN 
-#  elif defined( __APPLE__ ) && defined( __MACH__ ) 
-#    define ITT_OS ITT_OS_MAC 
-#  else 
-#    define ITT_OS ITT_OS_LINUX 
-#  endif 
-#endif /* ITT_OS */ 
- 
-#ifndef ITT_PLATFORM_WIN 
-#  define ITT_PLATFORM_WIN 1 
-#endif /* ITT_PLATFORM_WIN */ 
- 
-#ifndef ITT_PLATFORM_POSIX 
-#  define ITT_PLATFORM_POSIX 2 
-#endif /* ITT_PLATFORM_POSIX */ 
- 
-#ifndef ITT_PLATFORM_MAC 
-#  define ITT_PLATFORM_MAC 3 
-#endif /* ITT_PLATFORM_MAC */ 
- 
-#ifndef ITT_PLATFORM 
-#  if ITT_OS==ITT_OS_WIN 
-#    define ITT_PLATFORM ITT_PLATFORM_WIN 
-#  elif ITT_OS==ITT_OS_MAC 
-#    define ITT_PLATFORM ITT_PLATFORM_MAC 
-#  else 
-#    define ITT_PLATFORM ITT_PLATFORM_POSIX 
-#  endif 
-#endif /* ITT_PLATFORM */ 
- 
-#if defined(_UNICODE) && !defined(UNICODE) 
-#define UNICODE 
-#endif 
- 
-#include <stddef.h> 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#include <tchar.h> 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#include <stdint.h> 
-#if defined(UNICODE) || defined(_UNICODE) 
-#include <wchar.h> 
-#endif /* UNICODE || _UNICODE */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#ifndef CDECL 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    define CDECL __cdecl 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#    if defined _M_IX86 || defined __i386__  
-#      define CDECL __attribute__ ((cdecl)) 
-#    else  /* _M_IX86 || __i386__ */ 
-#      define CDECL /* actual only on x86 platform */ 
-#    endif /* _M_IX86 || __i386__ */ 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* CDECL */ 
- 
-#ifndef STDCALL 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    define STDCALL __stdcall 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _ITTNOTIFY_CONFIG_H_
+#define _ITTNOTIFY_CONFIG_H_
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+#  define ITT_OS_WIN   1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+#  define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+#  define ITT_OS_MAC   3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS
+#  if defined WIN32 || defined _WIN32
+#    define ITT_OS ITT_OS_WIN
+#  elif defined( __APPLE__ ) && defined( __MACH__ )
+#    define ITT_OS ITT_OS_MAC
+#  else
+#    define ITT_OS ITT_OS_LINUX
+#  endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+#  define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+#  define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM
+#  if ITT_OS==ITT_OS_WIN
+#    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
+#  else
+#    define ITT_PLATFORM ITT_PLATFORM_POSIX
+#  endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef CDECL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define CDECL __cdecl
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #    if defined _M_IX86 || defined __i386__ 
-#      define STDCALL __attribute__ ((stdcall))  
-#    else  /* _M_IX86 || __i386__ */ 
-#      define STDCALL /* supported only on x86 platform */ 
-#    endif /* _M_IX86 || __i386__ */ 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* STDCALL */ 
- 
-#define ITTAPI    CDECL 
-#define LIBITTAPI CDECL 
- 
-/* TODO: Temporary for compatibility! */ 
-#define ITTAPI_CALL    CDECL 
-#define LIBITTAPI_CALL CDECL 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-/* use __forceinline (VC++ specific) */ 
-#define ITT_INLINE           __forceinline 
-#define ITT_INLINE_ATTRIBUTE /* nothing */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-/* 
- * Generally, functions are not inlined unless optimization is specified. 
- * For functions declared inline, this attribute inlines the function even 
- * if no optimization level was specified. 
- */ 
-#ifdef __STRICT_ANSI__ 
-#define ITT_INLINE           static 
-#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) 
-#else  /* __STRICT_ANSI__ */ 
-#define ITT_INLINE           static inline 
-#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) 
-#endif /* __STRICT_ANSI__ */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-/** @endcond */ 
- 
-#ifndef ITT_ARCH_IA32 
-#  define ITT_ARCH_IA32  1 
-#endif /* ITT_ARCH_IA32 */ 
- 
-#ifndef ITT_ARCH_IA32E 
-#  define ITT_ARCH_IA32E 2 
-#endif /* ITT_ARCH_IA32E */ 
- 
-/* Was there a magical reason we didn't have 3 here before? */ 
-#ifndef ITT_ARCH_AARCH64 
-#  define ITT_ARCH_AARCH64  3 
-#endif /* ITT_ARCH_AARCH64 */ 
- 
-#ifndef ITT_ARCH_ARM 
-#  define ITT_ARCH_ARM  4 
-#endif /* ITT_ARCH_ARM */ 
- 
-#ifndef ITT_ARCH_PPC64 
-#  define ITT_ARCH_PPC64  5 
-#endif /* ITT_ARCH_PPC64 */ 
- 
- 
-#ifndef ITT_ARCH 
-#  if defined _M_IX86 || defined __i386__ 
-#    define ITT_ARCH ITT_ARCH_IA32 
-#  elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ 
-#    define ITT_ARCH ITT_ARCH_IA32E 
-#  elif defined _M_IA64 || defined __ia64__ 
-#    define ITT_ARCH ITT_ARCH_IA64 
-#  elif defined _M_ARM || __arm__ 
-#    define ITT_ARCH ITT_ARCH_ARM 
-#  elif defined __powerpc64__ 
-#    define ITT_ARCH ITT_ARCH_PPC64 
-#  elif defined __aarch64__ 
-#    define ITT_ARCH ITT_ARCH_AARCH64 
-#  endif 
-#endif 
- 
-#ifdef __cplusplus 
-#  define ITT_EXTERN_C extern "C" 
-#  define ITT_EXTERN_C_BEGIN extern "C" { 
-#  define ITT_EXTERN_C_END } 
-#else 
-#  define ITT_EXTERN_C /* nothing */ 
-#  define ITT_EXTERN_C_BEGIN /* nothing */ 
-#  define ITT_EXTERN_C_END /* nothing */ 
-#endif /* __cplusplus */ 
- 
-#define ITT_TO_STR_AUX(x) #x 
-#define ITT_TO_STR(x)     ITT_TO_STR_AUX(x) 
- 
-#define __ITT_BUILD_ASSERT(expr, suffix) do { \ 
-    static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ 
-    __itt_build_check_##suffix[0] = 0; \ 
-} while(0) 
-#define _ITT_BUILD_ASSERT(expr, suffix)  __ITT_BUILD_ASSERT((expr), suffix) 
-#define ITT_BUILD_ASSERT(expr)           _ITT_BUILD_ASSERT((expr), __LINE__) 
- 
-#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } 
- 
-/* Replace with snapshot date YYYYMMDD for promotion build. */ 
-#define API_VERSION_BUILD    20111111 
- 
-#ifndef API_VERSION_NUM 
-#define API_VERSION_NUM 0.0.0 
-#endif /* API_VERSION_NUM */ 
- 
-#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ 
-                                " (" ITT_TO_STR(API_VERSION_BUILD) ")" 
- 
-/* OS communication functions */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#include <windows.h> 
-typedef HMODULE           lib_t; 
-typedef DWORD             TIDT; 
-typedef CRITICAL_SECTION  mutex_t; 
-#define MUTEX_INITIALIZER { 0 } 
-#define strong_alias(name, aliasname) /* empty for Windows */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#include <dlfcn.h> 
-#if defined(UNICODE) || defined(_UNICODE) 
-#include <wchar.h> 
-#endif /* UNICODE */ 
-#ifndef _GNU_SOURCE 
-#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ 
-#endif /* _GNU_SOURCE */ 
-#ifndef __USE_UNIX98 
-#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */ 
-#endif /*__USE_UNIX98*/ 
-#include <pthread.h> 
-typedef void*             lib_t; 
-typedef pthread_t         TIDT; 
-typedef pthread_mutex_t   mutex_t; 
-#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER 
-#define _strong_alias(name, aliasname) \ 
-            extern __typeof (name) aliasname __attribute__ ((alias (#name))); 
-#define strong_alias(name, aliasname) _strong_alias(name, aliasname) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_get_proc(lib, name) GetProcAddress(lib, name) 
-#define __itt_mutex_init(mutex)   InitializeCriticalSection(mutex) 
-#define __itt_mutex_lock(mutex)   EnterCriticalSection(mutex) 
-#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) 
-#define __itt_load_lib(name)      LoadLibraryA(name) 
-#define __itt_unload_lib(handle)  FreeLibrary(handle) 
-#define __itt_system_error()      (int)GetLastError() 
-#define __itt_fstrcmp(s1, s2)     lstrcmpA(s1, s2) 
-#define __itt_fstrlen(s)          lstrlenA(s) 
-#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l) 
-#define __itt_fstrdup(s)          _strdup(s) 
-#define __itt_thread_id()         GetCurrentThreadId() 
-#define __itt_thread_yield()      SwitchToThread() 
-#ifndef ITT_SIMPLE_INIT 
-ITT_INLINE long 
-__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; 
-ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) 
-{ 
-    return InterlockedIncrement(ptr); 
-} 
-#endif /* ITT_SIMPLE_INIT */ 
-#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-#define __itt_get_proc(lib, name) dlsym(lib, name) 
-#define __itt_mutex_init(mutex)   {\ 
-    pthread_mutexattr_t mutex_attr;                                         \ 
-    int error_code = pthread_mutexattr_init(&mutex_attr);                   \ 
-    if (error_code)                                                         \ 
-        __itt_report_error(__itt_error_system, "pthread_mutexattr_init",    \ 
-                           error_code);                                     \ 
-    error_code = pthread_mutexattr_settype(&mutex_attr,                     \ 
-                                           PTHREAD_MUTEX_RECURSIVE);        \ 
-    if (error_code)                                                         \ 
-        __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ 
-                           error_code);                                     \ 
-    error_code = pthread_mutex_init(mutex, &mutex_attr);                    \ 
-    if (error_code)                                                         \ 
-        __itt_report_error(__itt_error_system, "pthread_mutex_init",        \ 
-                           error_code);                                     \ 
-    error_code = pthread_mutexattr_destroy(&mutex_attr);                    \ 
-    if (error_code)                                                         \ 
-        __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ 
-                           error_code);                                     \ 
-} 
-#define __itt_mutex_lock(mutex)   pthread_mutex_lock(mutex) 
-#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) 
-#define __itt_load_lib(name)      dlopen(name, RTLD_LAZY) 
-#define __itt_unload_lib(handle)  dlclose(handle) 
-#define __itt_system_error()      errno 
-#define __itt_fstrcmp(s1, s2)     strcmp(s1, s2) 
-#define __itt_fstrlen(s)          strlen(s) 
-#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l) 
-#define __itt_fstrdup(s)          strdup(s) 
-#define __itt_thread_id()         pthread_self() 
-#define __itt_thread_yield()      sched_yield() 
-#if ITT_ARCH==ITT_ARCH_IA64 
-#ifdef __INTEL_COMPILER 
-#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) 
-#else  /* __INTEL_COMPILER */ 
-/* TODO: Add Support for not Intel compilers for IA-64 architecture */ 
-#endif /* __INTEL_COMPILER */ 
-#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ 
-ITT_INLINE long 
-__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; 
-ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) 
-{ 
-    long result; 
-    __asm__ __volatile__("lock\nxadd %0,%1" 
-                          : "=r"(result),"=m"(*(int*)ptr) 
-                          : "0"(addend), "m"(*(int*)ptr) 
-                          : "memory"); 
-    return result; 
-} 
-#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 
-#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) 
-#endif /* ITT_ARCH==ITT_ARCH_IA64 */ 
-#ifndef ITT_SIMPLE_INIT 
-ITT_INLINE long 
-__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; 
-ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) 
-{ 
-    return __TBB_machine_fetchadd4(ptr, 1) + 1L; 
-} 
-#endif /* ITT_SIMPLE_INIT */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-typedef enum { 
-    __itt_collection_normal = 0, 
-    __itt_collection_paused = 1 
-} __itt_collection_state; 
- 
-typedef enum { 
-    __itt_thread_normal  = 0, 
-    __itt_thread_ignored = 1 
-} __itt_thread_state; 
- 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_thread_info 
-{ 
-    const char* nameA; /*!< Copy of original name in ASCII. */ 
-#if defined(UNICODE) || defined(_UNICODE) 
-    const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ 
-#else  /* UNICODE || _UNICODE */ 
-    void* nameW; 
-#endif /* UNICODE || _UNICODE */ 
-    TIDT               tid; 
-    __itt_thread_state state;   /*!< Thread state (paused or normal) */ 
-    int                extra1;  /*!< Reserved to the runtime */ 
-    void*              extra2;  /*!< Reserved to the runtime */ 
-    struct ___itt_thread_info* next; 
-} __itt_thread_info; 
- 
-#include "ittnotify_types.h" /* For __itt_group_id definition */ 
- 
-typedef struct ___itt_api_info_20101001 
-{ 
-    const char*    name; 
-    void**         func_ptr; 
-    void*          init_func; 
-    __itt_group_id group; 
-}  __itt_api_info_20101001; 
- 
-typedef struct ___itt_api_info 
-{ 
-    const char*    name; 
-    void**         func_ptr; 
-    void*          init_func; 
-    void*          null_func; 
-    __itt_group_id group; 
-}  __itt_api_info; 
- 
-struct ___itt_domain; 
-struct ___itt_string_handle; 
- 
-typedef struct ___itt_global 
-{ 
-    unsigned char          magic[8]; 
-    unsigned long          version_major; 
-    unsigned long          version_minor; 
-    unsigned long          version_build; 
-    volatile long          api_initialized; 
-    volatile long          mutex_initialized; 
-    volatile long          atomic_counter; 
-    mutex_t                mutex; 
-    lib_t                  lib; 
-    void*                  error_handler; 
-    const char**           dll_path_ptr; 
-    __itt_api_info*        api_list_ptr; 
-    struct ___itt_global*  next; 
-    /* Joinable structures below */ 
-    __itt_thread_info*     thread_list; 
-    struct ___itt_domain*  domain_list; 
-    struct ___itt_string_handle* string_list; 
-    __itt_collection_state state; 
-} __itt_global; 
- 
-#pragma pack(pop) 
- 
-#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ 
-    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ 
-    if (h != NULL) { \ 
-        h->tid    = t; \ 
-        h->nameA  = NULL; \ 
-        h->nameW  = n ? _wcsdup(n) : NULL; \ 
-        h->state  = s; \ 
-        h->extra1 = 0;    /* reserved */ \ 
-        h->extra2 = NULL; /* reserved */ \ 
-        h->next   = NULL; \ 
-        if (h_tail == NULL) \ 
-            (gptr)->thread_list = h; \ 
-        else \ 
-            h_tail->next = h; \ 
-    } \ 
-} 
- 
-#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ 
-    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ 
-    if (h != NULL) { \ 
-        h->tid    = t; \ 
-        h->nameA  = n ? __itt_fstrdup(n) : NULL; \ 
-        h->nameW  = NULL; \ 
-        h->state  = s; \ 
-        h->extra1 = 0;    /* reserved */ \ 
-        h->extra2 = NULL; /* reserved */ \ 
-        h->next   = NULL; \ 
-        if (h_tail == NULL) \ 
-            (gptr)->thread_list = h; \ 
-        else \ 
-            h_tail->next = h; \ 
-    } \ 
-} 
- 
-#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ 
-    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ 
-    if (h != NULL) { \ 
-        h->flags  = 0;    /* domain is disabled by default */ \ 
-        h->nameA  = NULL; \ 
-        h->nameW  = name ? _wcsdup(name) : NULL; \ 
-        h->extra1 = 0;    /* reserved */ \ 
-        h->extra2 = NULL; /* reserved */ \ 
-        h->next   = NULL; \ 
-        if (h_tail == NULL) \ 
-            (gptr)->domain_list = h; \ 
-        else \ 
-            h_tail->next = h; \ 
-    } \ 
-} 
- 
-#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ 
-    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ 
-    if (h != NULL) { \ 
-        h->flags  = 0;    /* domain is disabled by default */ \ 
-        h->nameA  = name ? __itt_fstrdup(name) : NULL; \ 
-        h->nameW  = NULL; \ 
-        h->extra1 = 0;    /* reserved */ \ 
-        h->extra2 = NULL; /* reserved */ \ 
-        h->next   = NULL; \ 
-        if (h_tail == NULL) \ 
-            (gptr)->domain_list = h; \ 
-        else \ 
-            h_tail->next = h; \ 
-    } \ 
-} 
- 
-#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ 
-    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ 
-    if (h != NULL) { \ 
-        h->strA   = NULL; \ 
-        h->strW   = name ? _wcsdup(name) : NULL; \ 
-        h->extra1 = 0;    /* reserved */ \ 
-        h->extra2 = NULL; /* reserved */ \ 
-        h->next   = NULL; \ 
-        if (h_tail == NULL) \ 
-            (gptr)->string_list = h; \ 
-        else \ 
-            h_tail->next = h; \ 
-    } \ 
-} 
- 
-#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ 
-    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ 
-    if (h != NULL) { \ 
-        h->strA   = name ? __itt_fstrdup(name) : NULL; \ 
-        h->strW   = NULL; \ 
-        h->extra1 = 0;    /* reserved */ \ 
-        h->extra2 = NULL; /* reserved */ \ 
-        h->next   = NULL; \ 
-        if (h_tail == NULL) \ 
-            (gptr)->string_list = h; \ 
-        else \ 
-            h_tail->next = h; \ 
-    } \ 
-} 
- 
-#endif /* _ITTNOTIFY_CONFIG_H_ */ 
+#      define CDECL __attribute__ ((cdecl))
+#    else  /* _M_IX86 || __i386__ */
+#      define CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* CDECL */
+
+#ifndef STDCALL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define STDCALL __stdcall
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define STDCALL __attribute__ ((stdcall)) 
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI    CDECL
+#define LIBITTAPI CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL    CDECL
+#define LIBITTAPI_CALL CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE           static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else  /* __STRICT_ANSI__ */
+#define ITT_INLINE           static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifndef ITT_ARCH_IA32
+#  define ITT_ARCH_IA32  1
+#endif /* ITT_ARCH_IA32 */
+
+#ifndef ITT_ARCH_IA32E
+#  define ITT_ARCH_IA32E 2
+#endif /* ITT_ARCH_IA32E */
+
+/* Was there a magical reason we didn't have 3 here before? */
+#ifndef ITT_ARCH_AARCH64
+#  define ITT_ARCH_AARCH64  3
+#endif /* ITT_ARCH_AARCH64 */
+
+#ifndef ITT_ARCH_ARM
+#  define ITT_ARCH_ARM  4
+#endif /* ITT_ARCH_ARM */
+
+#ifndef ITT_ARCH_PPC64
+#  define ITT_ARCH_PPC64  5
+#endif /* ITT_ARCH_PPC64 */
+
+
+#ifndef ITT_ARCH
+#  if defined _M_IX86 || defined __i386__
+#    define ITT_ARCH ITT_ARCH_IA32
+#  elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+#    define ITT_ARCH ITT_ARCH_IA32E
+#  elif defined _M_IA64 || defined __ia64__
+#    define ITT_ARCH ITT_ARCH_IA64
+#  elif defined _M_ARM || __arm__
+#    define ITT_ARCH ITT_ARCH_ARM
+#  elif defined __powerpc64__
+#    define ITT_ARCH ITT_ARCH_PPC64
+#  elif defined __aarch64__
+#    define ITT_ARCH ITT_ARCH_AARCH64
+#  endif
+#endif
+
+#ifdef __cplusplus
+#  define ITT_EXTERN_C extern "C"
+#  define ITT_EXTERN_C_BEGIN extern "C" {
+#  define ITT_EXTERN_C_END }
+#else
+#  define ITT_EXTERN_C /* nothing */
+#  define ITT_EXTERN_C_BEGIN /* nothing */
+#  define ITT_EXTERN_C_END /* nothing */
+#endif /* __cplusplus */
+
+#define ITT_TO_STR_AUX(x) #x
+#define ITT_TO_STR(x)     ITT_TO_STR_AUX(x)
+
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+    static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+    __itt_build_check_##suffix[0] = 0; \
+} while(0)
+#define _ITT_BUILD_ASSERT(expr, suffix)  __ITT_BUILD_ASSERT((expr), suffix)
+#define ITT_BUILD_ASSERT(expr)           _ITT_BUILD_ASSERT((expr), __LINE__)
+
+#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }
+
+/* Replace with snapshot date YYYYMMDD for promotion build. */
+#define API_VERSION_BUILD    20111111
+
+#ifndef API_VERSION_NUM
+#define API_VERSION_NUM 0.0.0
+#endif /* API_VERSION_NUM */
+
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+                                " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+
+/* OS communication functions */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+typedef HMODULE           lib_t;
+typedef DWORD             TIDT;
+typedef CRITICAL_SECTION  mutex_t;
+#define MUTEX_INITIALIZER { 0 }
+#define strong_alias(name, aliasname) /* empty for Windows */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <dlfcn.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
+#endif /* _GNU_SOURCE */
+#ifndef __USE_UNIX98
+#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */
+#endif /*__USE_UNIX98*/
+#include <pthread.h>
+typedef void*             lib_t;
+typedef pthread_t         TIDT;
+typedef pthread_mutex_t   mutex_t;
+#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#define _strong_alias(name, aliasname) \
+            extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_get_proc(lib, name) GetProcAddress(lib, name)
+#define __itt_mutex_init(mutex)   InitializeCriticalSection(mutex)
+#define __itt_mutex_lock(mutex)   EnterCriticalSection(mutex)
+#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex)
+#define __itt_load_lib(name)      LoadLibraryA(name)
+#define __itt_unload_lib(handle)  FreeLibrary(handle)
+#define __itt_system_error()      (int)GetLastError()
+#define __itt_fstrcmp(s1, s2)     lstrcmpA(s1, s2)
+#define __itt_fstrlen(s)          lstrlenA(s)
+#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l)
+#define __itt_fstrdup(s)          _strdup(s)
+#define __itt_thread_id()         GetCurrentThreadId()
+#define __itt_thread_yield()      SwitchToThread()
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+    return InterlockedIncrement(ptr);
+}
+#endif /* ITT_SIMPLE_INIT */
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#define __itt_get_proc(lib, name) dlsym(lib, name)
+#define __itt_mutex_init(mutex)   {\
+    pthread_mutexattr_t mutex_attr;                                         \
+    int error_code = pthread_mutexattr_init(&mutex_attr);                   \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutexattr_init",    \
+                           error_code);                                     \
+    error_code = pthread_mutexattr_settype(&mutex_attr,                     \
+                                           PTHREAD_MUTEX_RECURSIVE);        \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+                           error_code);                                     \
+    error_code = pthread_mutex_init(mutex, &mutex_attr);                    \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutex_init",        \
+                           error_code);                                     \
+    error_code = pthread_mutexattr_destroy(&mutex_attr);                    \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+                           error_code);                                     \
+}
+#define __itt_mutex_lock(mutex)   pthread_mutex_lock(mutex)
+#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
+#define __itt_load_lib(name)      dlopen(name, RTLD_LAZY)
+#define __itt_unload_lib(handle)  dlclose(handle)
+#define __itt_system_error()      errno
+#define __itt_fstrcmp(s1, s2)     strcmp(s1, s2)
+#define __itt_fstrlen(s)          strlen(s)
+#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l)
+#define __itt_fstrdup(s)          strdup(s)
+#define __itt_thread_id()         pthread_self()
+#define __itt_thread_yield()      sched_yield()
+#if ITT_ARCH==ITT_ARCH_IA64
+#ifdef __INTEL_COMPILER
+#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
+#else  /* __INTEL_COMPILER */
+/* TODO: Add Support for not Intel compilers for IA-64 architecture */
+#endif /* __INTEL_COMPILER */
+#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+{
+    long result;
+    __asm__ __volatile__("lock\nxadd %0,%1"
+                          : "=r"(result),"=m"(*(int*)ptr)
+                          : "0"(addend), "m"(*(int*)ptr)
+                          : "memory");
+    return result;
+}
+#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64
+#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
+#endif /* ITT_ARCH==ITT_ARCH_IA64 */
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+    return __TBB_machine_fetchadd4(ptr, 1) + 1L;
+}
+#endif /* ITT_SIMPLE_INIT */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+typedef enum {
+    __itt_collection_normal = 0,
+    __itt_collection_paused = 1
+} __itt_collection_state;
+
+typedef enum {
+    __itt_thread_normal  = 0,
+    __itt_thread_ignored = 1
+} __itt_thread_state;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_thread_info
+{
+    const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* nameW;
+#endif /* UNICODE || _UNICODE */
+    TIDT               tid;
+    __itt_thread_state state;   /*!< Thread state (paused or normal) */
+    int                extra1;  /*!< Reserved to the runtime */
+    void*              extra2;  /*!< Reserved to the runtime */
+    struct ___itt_thread_info* next;
+} __itt_thread_info;
+
+#include "ittnotify_types.h" /* For __itt_group_id definition */
+
+typedef struct ___itt_api_info_20101001
+{
+    const char*    name;
+    void**         func_ptr;
+    void*          init_func;
+    __itt_group_id group;
+}  __itt_api_info_20101001;
+
+typedef struct ___itt_api_info
+{
+    const char*    name;
+    void**         func_ptr;
+    void*          init_func;
+    void*          null_func;
+    __itt_group_id group;
+}  __itt_api_info;
+
+struct ___itt_domain;
+struct ___itt_string_handle;
+
+typedef struct ___itt_global
+{
+    unsigned char          magic[8];
+    unsigned long          version_major;
+    unsigned long          version_minor;
+    unsigned long          version_build;
+    volatile long          api_initialized;
+    volatile long          mutex_initialized;
+    volatile long          atomic_counter;
+    mutex_t                mutex;
+    lib_t                  lib;
+    void*                  error_handler;
+    const char**           dll_path_ptr;
+    __itt_api_info*        api_list_ptr;
+    struct ___itt_global*  next;
+    /* Joinable structures below */
+    __itt_thread_info*     thread_list;
+    struct ___itt_domain*  domain_list;
+    struct ___itt_string_handle* string_list;
+    __itt_collection_state state;
+} __itt_global;
+
+#pragma pack(pop)
+
+#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \
+    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+    if (h != NULL) { \
+        h->tid    = t; \
+        h->nameA  = NULL; \
+        h->nameW  = n ? _wcsdup(n) : NULL; \
+        h->state  = s; \
+        h->extra1 = 0;    /* reserved */ \
+        h->extra2 = NULL; /* reserved */ \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->thread_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \
+    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+    if (h != NULL) { \
+        h->tid    = t; \
+        h->nameA  = n ? __itt_fstrdup(n) : NULL; \
+        h->nameW  = NULL; \
+        h->state  = s; \
+        h->extra1 = 0;    /* reserved */ \
+        h->extra2 = NULL; /* reserved */ \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->thread_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \
+    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+    if (h != NULL) { \
+        h->flags  = 0;    /* domain is disabled by default */ \
+        h->nameA  = NULL; \
+        h->nameW  = name ? _wcsdup(name) : NULL; \
+        h->extra1 = 0;    /* reserved */ \
+        h->extra2 = NULL; /* reserved */ \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->domain_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \
+    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+    if (h != NULL) { \
+        h->flags  = 0;    /* domain is disabled by default */ \
+        h->nameA  = name ? __itt_fstrdup(name) : NULL; \
+        h->nameW  = NULL; \
+        h->extra1 = 0;    /* reserved */ \
+        h->extra2 = NULL; /* reserved */ \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->domain_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \
+    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+    if (h != NULL) { \
+        h->strA   = NULL; \
+        h->strW   = name ? _wcsdup(name) : NULL; \
+        h->extra1 = 0;    /* reserved */ \
+        h->extra2 = NULL; /* reserved */ \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->string_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \
+    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+    if (h != NULL) { \
+        h->strA   = name ? __itt_fstrdup(name) : NULL; \
+        h->strW   = NULL; \
+        h->extra1 = 0;    /* reserved */ \
+        h->extra2 = NULL; /* reserved */ \
+        h->next   = NULL; \
+        if (h_tail == NULL) \
+            (gptr)->string_list = h; \
+        else \
+            h_tail->next = h; \
+    } \
+}
+
+#endif /* _ITTNOTIFY_CONFIG_H_ */
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c
index 27f84c50e8..a2723aa670 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.c
@@ -1,1051 +1,1051 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "kmp_config.h" 
-#include "ittnotify_config.h" 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define PATH_MAX 512 
-#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-#include <limits.h> 
-#include <dlfcn.h> 
-#include <errno.h> 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <stdarg.h> 
-#include <string.h> 
- 
-#define INTEL_NO_MACRO_BODY  
-#define INTEL_ITTNOTIFY_API_PRIVATE 
-#include "ittnotify.h" 
-#include "legacy/ittnotify.h" 
- 
-#include "disable_warnings.h" 
- 
-static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 43375 $\n"; 
- 
-#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) 
- 
-#if ITT_OS==ITT_OS_WIN 
-static const char* ittnotify_lib_name = "libittnotify.dll"; 
-#elif ITT_OS==ITT_OS_LINUX 
-static const char* ittnotify_lib_name = "libittnotify.so"; 
-#elif ITT_OS==ITT_OS_MAC 
-static const char* ittnotify_lib_name = "libittnotify.dylib"; 
-#else 
-#error Unsupported or unknown OS. 
-#endif 
- 
-#ifdef __ANDROID__ 
-#include <android/log.h> 
-#include <stdio.h> 
-#include <unistd.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <fcntl.h> 
-#include <linux/limits.h> 
- 
-#ifdef ITT_ANDROID_LOG 
-    #define ITT_ANDROID_LOG_TAG   "INTEL_VTUNE_USERAPI" 
-    #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) 
-    #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) 
-    #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) 
-    #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) 
-#else 
-    #define ITT_ANDROID_LOGI(...) 
-    #define ITT_ANDROID_LOGW(...) 
-    #define ITT_ANDROID_LOGE(...) 
-    #define ITT_ANDROID_LOGD(...) 
-#endif 
- 
-/* default location of userapi collector on Android */ 
-#define ANDROID_ITTNOTIFY_DEFAULT_PATH  "/data/data/com.intel.vtune/intel/libittnotify.so" 
-#endif 
- 
- 
-#ifndef LIB_VAR_NAME 
-#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM 
-#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 
-#else 
-#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 
-#endif 
-#endif /* LIB_VAR_NAME */ 
- 
-#define ITT_MUTEX_INIT_AND_LOCK(p) {                                 \ 
-        if (!p.mutex_initialized)                                    \ 
-        {                                                            \ 
-            if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ 
-            {                                                        \ 
-                __itt_mutex_init(&p.mutex);                          \ 
-                p.mutex_initialized = 1;                             \ 
-            }                                                        \ 
-            else                                                     \ 
-                while (!p.mutex_initialized)                         \ 
-                    __itt_thread_yield();                            \ 
-        }                                                            \ 
-        __itt_mutex_lock(&p.mutex);                                  \ 
-} 
- 
-const int _N_(err) = 0; 
- 
-typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); 
- 
-/* this define used to control initialization function name. */ 
-#ifndef __itt_init_ittlib_name 
-ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); 
-static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); 
-#define __itt_init_ittlib_name __itt_init_ittlib_ptr 
-#endif /* __itt_init_ittlib_name */ 
- 
-typedef void (__itt_fini_ittlib_t)(void); 
- 
-/* this define used to control finalization function name. */ 
-#ifndef __itt_fini_ittlib_name 
-ITT_EXTERN_C void _N_(fini_ittlib)(void); 
-static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); 
-#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr 
-#endif /* __itt_fini_ittlib_name */ 
- 
-/* building pointers to imported funcs */ 
-#undef ITT_STUBV 
-#undef ITT_STUB 
-#define ITT_STUB(api,type,name,args,params,ptr,group,format)   \ 
-static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ 
-typedef type api ITT_JOIN(_N_(name),_t) args;                  \ 
-ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ 
-static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ 
-{                                                              \ 
-    __itt_init_ittlib_name(NULL, __itt_group_all);             \ 
-    if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ 
-        return ITTNOTIFY_NAME(name) params;                    \ 
-    else                                                       \ 
-        return (type)0;                                        \ 
-} 
- 
-#define ITT_STUBV(api,type,name,args,params,ptr,group,format)  \ 
-static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ 
-typedef type api ITT_JOIN(_N_(name),_t) args;                  \ 
-ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ 
-static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ 
-{                                                              \ 
-    __itt_init_ittlib_name(NULL, __itt_group_all);             \ 
-    if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ 
-        ITTNOTIFY_NAME(name) params;                           \ 
-    else                                                       \ 
-        return;                                                \ 
-} 
- 
-#undef __ITT_INTERNAL_INIT 
-#include "ittnotify_static.h" 
- 
-#undef ITT_STUB 
-#undef ITT_STUBV 
-#define ITT_STUB(api,type,name,args,params,ptr,group,format)   \ 
-static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ 
-typedef type api ITT_JOIN(_N_(name),_t) args;                  \ 
-ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END 
- 
-#define ITT_STUBV(api,type,name,args,params,ptr,group,format)  \ 
-static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ 
-typedef type api ITT_JOIN(_N_(name),_t) args;                  \ 
-ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END 
- 
-#define __ITT_INTERNAL_INIT 
-#include "ittnotify_static.h" 
-#undef __ITT_INTERNAL_INIT 
- 
-ITT_GROUP_LIST(group_list); 
- 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_group_alias 
-{ 
-    const char*    env_var; 
-    __itt_group_id groups; 
-} __itt_group_alias; 
- 
-static __itt_group_alias group_alias[] = { 
-    { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_mark) }, 
-    { "KMP_FOR_TCHECK",   (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, 
-    { NULL,               (__itt_group_none) }, 
-    { api_version,        (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ 
-}; 
- 
-#pragma pack(pop) 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#pragma warning(push) 
-#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-static __itt_api_info api_list[] = { 
-/* Define functions with static implementation */ 
-#undef ITT_STUB 
-#undef ITT_STUBV 
-#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, 
-#define ITT_STUBV ITT_STUB 
-#define __ITT_INTERNAL_INIT 
-#include "ittnotify_static.h" 
-#undef __ITT_INTERNAL_INIT 
-/* Define functions without static implementation */ 
-#undef ITT_STUB 
-#undef ITT_STUBV 
-#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, 
-#define ITT_STUBV ITT_STUB 
-#include "ittnotify_static.h" 
-    {NULL, NULL, NULL, NULL, __itt_group_none} 
-}; 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#pragma warning(pop) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/* private, init thread info item. used for internal purposes */ 
-static __itt_thread_info init_thread_info = { 
-    (const char*)NULL,                        /* nameA */ 
-#if defined(UNICODE) || defined(_UNICODE) 
-    (const wchar_t*)NULL,                     /* nameW */ 
-#else 
-    (void*)NULL,                              /* nameW */ 
-#endif 
-    0,                                        /* tid */ 
-    __itt_thread_normal,                      /* state */ 
-    0,                                        /* extra1 */ 
-    (void*)NULL,                              /* extra2 */ 
-    (__itt_thread_info*)NULL                  /* next */ 
-}; 
- 
-/* private, NULL domain item. used for internal purposes */ 
-static __itt_domain null_domain = { 
-    0,                                        /* flags:  disabled by default */ 
-    (const char*)NULL,                        /* nameA */ 
-#if defined(UNICODE) || defined(_UNICODE) 
-    (const wchar_t*)NULL,                     /* nameW */ 
-#else 
-    (void*)NULL,                              /* nameW */ 
-#endif 
-    0,                                        /* extra1 */ 
-    (void*)NULL,                              /* extra2 */ 
-    (__itt_domain*)NULL                       /* next */ 
-}; 
- 
-/* private, NULL string handle item. used for internal purposes */ 
-static __itt_string_handle null_string_handle = { 
-    (const char*)NULL,                        /* strA */ 
-#if defined(UNICODE) || defined(_UNICODE) 
-    (const wchar_t*)NULL,                     /* strW */ 
-#else 
-    (void*)NULL,                              /* strW */ 
-#endif 
-    0,                                        /* extra1 */ 
-    (void*)NULL,                              /* extra2 */ 
-    (__itt_string_handle*)NULL                /* next */ 
-}; 
- 
-static const char dll_path[PATH_MAX] = { 0 }; 
- 
-/* static part descriptor which handles. all notification api attributes. */ 
-__itt_global _N_(_ittapi_global) = { 
-    ITT_MAGIC,                                     /* identification info */ 
-    ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD,       /* version info */ 
-    0,                                             /* api_initialized */ 
-    0,                                             /* mutex_initialized */ 
-    0,                                             /* atomic_counter */ 
-    MUTEX_INITIALIZER,                             /* mutex */ 
-    NULL,                                          /* dynamic library handle */ 
-    NULL,                                          /* error_handler */ 
-    (const char**)&dll_path,                       /* dll_path_ptr */ 
-    (__itt_api_info*)&api_list,                    /* api_list_ptr */ 
-    NULL,                                          /* next __itt_global */ 
-    (__itt_thread_info*)&init_thread_info,         /* thread_list */ 
-    (__itt_domain*)&null_domain,                   /* domain_list */ 
-    (__itt_string_handle*)&null_string_handle,     /* string_list */ 
-    __itt_collection_normal                        /* collection state */ 
-}; 
- 
-typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); 
-typedef void (__itt_api_fini_t)(__itt_global*); 
- 
-/* ========================================================================= */ 
- 
-#ifdef ITT_NOTIFY_EXT_REPORT 
-ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); 
-#endif /* ITT_NOTIFY_EXT_REPORT */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#pragma warning(push) 
-#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-static void __itt_report_error(__itt_error_code code, ...) 
-{ 
-    va_list args; 
-    va_start(args, code); 
-    if (_N_(_ittapi_global).error_handler != NULL) 
-    { 
-        __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; 
-        handler(code, args); 
-    } 
-#ifdef ITT_NOTIFY_EXT_REPORT 
-    _N_(error_handler)(code, args); 
-#endif /* ITT_NOTIFY_EXT_REPORT */ 
-    va_end(args); 
-} 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#pragma warning(pop) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) 
-{ 
-    __itt_domain *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-        if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) 
-            return ITTNOTIFY_NAME(domain_createW)(name); 
-    } 
- 
-    if (name == NULL) 
-        return _N_(_ittapi_global).domain_list; 
- 
-    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->nameW != NULL && !wcscmp(h->nameW, name)) 
-            break; 
-    if (h == NULL) { 
-        NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-    return h; 
-} 
- 
-static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-{ 
-    __itt_domain *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-        if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) 
-            return ITTNOTIFY_NAME(domain_createA)(name); 
-#else 
-        if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) 
-            return ITTNOTIFY_NAME(domain_create)(name); 
-#endif 
-    } 
- 
-    if (name == NULL) 
-        return _N_(_ittapi_global).domain_list; 
- 
-    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) 
-            break; 
-    if (h == NULL) { 
-        NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-    return h; 
-} 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) 
-{ 
-    __itt_string_handle *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-        if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) 
-            return ITTNOTIFY_NAME(string_handle_createW)(name); 
-    } 
- 
-    if (name == NULL) 
-        return _N_(_ittapi_global).string_list; 
- 
-    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->strW != NULL && !wcscmp(h->strW, name)) 
-            break; 
-    if (h == NULL) { 
-        NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-    return h; 
-} 
- 
-static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-{ 
-    __itt_string_handle *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-        if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) 
-            return ITTNOTIFY_NAME(string_handle_createA)(name); 
-#else 
-        if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) 
-            return ITTNOTIFY_NAME(string_handle_create)(name); 
-#endif 
-    } 
- 
-    if (name == NULL) 
-        return _N_(_ittapi_global).string_list; 
- 
-    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) 
-            break; 
-    if (h == NULL) { 
-        NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-    return h; 
-} 
- 
-/* -------------------------------------------------------------------------- */ 
- 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) 
-{ 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-        if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) 
-        { 
-            ITTNOTIFY_NAME(pause)(); 
-            return; 
-        } 
-    } 
-    _N_(_ittapi_global).state = __itt_collection_paused; 
-} 
- 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) 
-{ 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-        if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) 
-        { 
-            ITTNOTIFY_NAME(resume)(); 
-            return; 
-        } 
-    } 
-    _N_(_ittapi_global).state = __itt_collection_normal; 
-} 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) 
-{ 
-    TIDT tid = __itt_thread_id(); 
-    __itt_thread_info *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-        if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) 
-        { 
-            ITTNOTIFY_NAME(thread_set_nameW)(name); 
-            return; 
-        } 
-    } 
- 
-    __itt_mutex_lock(&_N_(_ittapi_global).mutex); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->tid == tid) 
-            break; 
-    if (h == NULL) { 
-        NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); 
-    } 
-    else 
-    { 
-        h->nameW = name ? _wcsdup(name) : NULL; 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-} 
- 
-static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) 
-{ 
-    namelen = namelen; 
-    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); 
-    return 0; 
-} 
- 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-{ 
-    TIDT tid = __itt_thread_id(); 
-    __itt_thread_info *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-        if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) 
-        { 
-            ITTNOTIFY_NAME(thread_set_nameA)(name); 
-            return; 
-        } 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-        if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) 
-        { 
-            ITTNOTIFY_NAME(thread_set_name)(name); 
-            return; 
-        } 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-    } 
- 
-    __itt_mutex_lock(&_N_(_ittapi_global).mutex); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->tid == tid) 
-            break; 
-    if (h == NULL) { 
-        NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); 
-    } 
-    else 
-    { 
-        h->nameA = name ? __itt_fstrdup(name) : NULL; 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-} 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) 
-{ 
-    namelen = namelen; 
-    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); 
-    return 0; 
-} 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) 
-{ 
-    namelen = namelen; 
-    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); 
-    return 0; 
-} 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) 
-{ 
-    TIDT tid = __itt_thread_id(); 
-    __itt_thread_info *h_tail, *h; 
- 
-    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) 
-    { 
-        __itt_init_ittlib_name(NULL, __itt_group_all); 
-        if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) 
-        { 
-            ITTNOTIFY_NAME(thread_ignore)(); 
-            return; 
-        } 
-    } 
- 
-    __itt_mutex_lock(&_N_(_ittapi_global).mutex); 
-    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) 
-        if (h->tid == tid) 
-            break; 
-    if (h == NULL) { 
-        static const char* name = "unknown"; 
-        NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name); 
-    } 
-    else 
-    { 
-        h->state = __itt_thread_ignored; 
-    } 
-    __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-} 
- 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) 
-{ 
-    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); 
-} 
- 
-static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) 
-{ 
-#ifdef __ANDROID__ 
-    /* 
-     * if LIB_VAR_NAME env variable were set before then stay previous value 
-     * else set default path 
-    */ 
-    setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); 
-#endif 
-} 
- 
-/* -------------------------------------------------------------------------- */ 
- 
-static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) 
-{ 
-    int i; 
-    int j; 
- 
-    if (!s || !sep || !out || !len) 
-        return NULL; 
- 
-    for (i = 0; s[i]; i++) 
-    { 
-        int b = 0; 
-        for (j = 0; sep[j]; j++) 
-            if (s[i] == sep[j]) 
-            { 
-                b = 1; 
-                break; 
-            } 
-        if (!b) 
-            break; 
-    } 
- 
-    if (!s[i]) 
-        return NULL; 
- 
-    *len = 0; 
-    *out = &s[i]; 
- 
-    for (; s[i]; i++, (*len)++) 
-    { 
-        int b = 0; 
-        for (j = 0; sep[j]; j++) 
-            if (s[i] == sep[j]) 
-            { 
-                b = 1; 
-                break; 
-            } 
-        if (b) 
-            break; 
-    } 
- 
-    for (; s[i]; i++) 
-    { 
-        int b = 0; 
-        for (j = 0; sep[j]; j++) 
-            if (s[i] == sep[j]) 
-            { 
-                b = 1; 
-                break; 
-            } 
-        if (!b) 
-            break; 
-    } 
- 
-    return &s[i]; 
-} 
- 
-/* This function return value of env variable that placed into static buffer. 
- * !!! The same static buffer is used for subsequent calls. !!! 
- * This was done to aviod dynamic allocation for few calls. 
- * Actually we need this function only four times. 
- */ 
-static const char* __itt_get_env_var(const char* name) 
-{ 
-#define MAX_ENV_VALUE_SIZE 4086 
-    static char  env_buff[MAX_ENV_VALUE_SIZE]; 
-    static char* env_value = (char*)env_buff; 
- 
-    if (name != NULL) 
-    { 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-        size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); 
-        DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); 
-        if (rc >= max_len) 
-            __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); 
-        else if (rc > 0) 
-        { 
-            const char* ret = (const char*)env_value; 
-            env_value += rc + 1; 
-            return ret; 
-        } 
-        else 
-        { 
-            /* If environment variable is empty, GetEnvirornmentVariables() 
-             * returns zero (number of characters (not including terminating null), 
-             * and GetLastError() returns ERROR_SUCCESS. */ 
-            DWORD err = GetLastError(); 
-            if (err == ERROR_SUCCESS) 
-                return env_value; 
- 
-            if (err != ERROR_ENVVAR_NOT_FOUND) 
-                __itt_report_error(__itt_error_cant_read_env, name, (int)err); 
-        } 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-        char* env = getenv(name); 
-        if (env != NULL) 
-        { 
-            size_t len = strlen(env); 
-            size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); 
-            if (len < max_len) 
-            { 
-                const char* ret = (const char*)env_value; 
-                strncpy(env_value, env, len + 1); 
-                env_value += len + 1; 
-                return ret; 
-            } else 
-                __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); 
-        } 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-    } 
-    return NULL; 
-} 
- 
-static const char* __itt_get_lib_name(void) 
-{ 
-    const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); 
- 
-#ifdef __ANDROID__ 
-    if (lib_name == NULL) 
-    { 
-        const char* const system_wide_marker_filename = "/data/local/tmp/com.intel.itt.collector_lib"; 
-        int itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); 
-        ssize_t res = 0; 
- 
-        if (itt_marker_file_fd == -1) 
-        { 
-            const pid_t my_pid = getpid(); 
-            char cmdline_path[PATH_MAX] = {0}; 
-            char package_name[PATH_MAX] = {0}; 
-            char app_sandbox_file[PATH_MAX] = {0}; 
-            int cmdline_fd = 0; 
- 
-            ITT_ANDROID_LOGI("Unable to open system-wide marker file."); 
-            snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); 
-            ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); 
-            cmdline_fd = open(cmdline_path, O_RDONLY); 
-            if (cmdline_fd == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); 
-                return lib_name; 
-            } 
-            res = read(cmdline_fd, package_name, PATH_MAX - 1); 
-            if (res == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); 
-                res = close(cmdline_fd); 
-                if (res == -1) 
-                { 
-                    ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); 
-                } 
-                return lib_name; 
-            } 
-            res = close(cmdline_fd); 
-            if (res == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); 
-                return lib_name; 
-            } 
-            ITT_ANDROID_LOGI("Package name: %s\n", package_name); 
-            snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/com.intel.itt.collector_lib", package_name); 
-            ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); 
-            itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); 
-            if (itt_marker_file_fd == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to open app marker file!"); 
-                return lib_name; 
-            } 
-        } 
- 
-        { 
-            char itt_lib_name[PATH_MAX] = {0}; 
- 
-            res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); 
-            if (res == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); 
-                res = close(itt_marker_file_fd); 
-                if (res == -1) 
-                { 
-                    ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); 
-                } 
-                return lib_name; 
-            } 
-            ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); 
-            res = close(itt_marker_file_fd); 
-            if (res == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); 
-                return lib_name; 
-            } 
-            ITT_ANDROID_LOGI("Set env"); 
-            res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); 
-            if (res == -1) 
-            { 
-                ITT_ANDROID_LOGE("Unable to set env var!"); 
-                return lib_name; 
-            } 
-            lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); 
-            ITT_ANDROID_LOGI("ITT Lib path from env: %s", itt_lib_name); 
-        } 
-    } 
-#endif 
- 
-    return lib_name; 
-} 
- 
-#ifndef min 
-#define min(a,b) (a) < (b) ? (a) : (b) 
-#endif /* min */ 
- 
-static __itt_group_id __itt_get_groups(void) 
-{ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "kmp_config.h"
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define PATH_MAX 512
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#include <limits.h>
+#include <dlfcn.h>
+#include <errno.h>
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define INTEL_NO_MACRO_BODY 
+#define INTEL_ITTNOTIFY_API_PRIVATE
+#include "ittnotify.h"
+#include "legacy/ittnotify.h"
+
+#include "disable_warnings.h"
+
+static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 43375 $\n";
+
+#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+
+#if ITT_OS==ITT_OS_WIN
+static const char* ittnotify_lib_name = "libittnotify.dll";
+#elif ITT_OS==ITT_OS_LINUX
+static const char* ittnotify_lib_name = "libittnotify.so";
+#elif ITT_OS==ITT_OS_MAC
+static const char* ittnotify_lib_name = "libittnotify.dylib";
+#else
+#error Unsupported or unknown OS.
+#endif
+
+#ifdef __ANDROID__
+#include <android/log.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+
+#ifdef ITT_ANDROID_LOG
+    #define ITT_ANDROID_LOG_TAG   "INTEL_VTUNE_USERAPI"
+    #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+    #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+    #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+    #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+#else
+    #define ITT_ANDROID_LOGI(...)
+    #define ITT_ANDROID_LOGW(...)
+    #define ITT_ANDROID_LOGE(...)
+    #define ITT_ANDROID_LOGD(...)
+#endif
+
+/* default location of userapi collector on Android */
+#define ANDROID_ITTNOTIFY_DEFAULT_PATH  "/data/data/com.intel.vtune/intel/libittnotify.so"
+#endif
+
+
+#ifndef LIB_VAR_NAME
+#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM
+#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32
+#else
+#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64
+#endif
+#endif /* LIB_VAR_NAME */
+
+#define ITT_MUTEX_INIT_AND_LOCK(p) {                                 \
+        if (!p.mutex_initialized)                                    \
+        {                                                            \
+            if (__itt_interlocked_increment(&p.atomic_counter) == 1) \
+            {                                                        \
+                __itt_mutex_init(&p.mutex);                          \
+                p.mutex_initialized = 1;                             \
+            }                                                        \
+            else                                                     \
+                while (!p.mutex_initialized)                         \
+                    __itt_thread_yield();                            \
+        }                                                            \
+        __itt_mutex_lock(&p.mutex);                                  \
+}
+
+const int _N_(err) = 0;
+
+typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id);
+
+/* this define used to control initialization function name. */
+#ifndef __itt_init_ittlib_name
+ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id);
+static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib);
+#define __itt_init_ittlib_name __itt_init_ittlib_ptr
+#endif /* __itt_init_ittlib_name */
+
+typedef void (__itt_fini_ittlib_t)(void);
+
+/* this define used to control finalization function name. */
+#ifndef __itt_fini_ittlib_name
+ITT_EXTERN_C void _N_(fini_ittlib)(void);
+static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib);
+#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr
+#endif /* __itt_fini_ittlib_name */
+
+/* building pointers to imported funcs */
+#undef ITT_STUBV
+#undef ITT_STUB
+#define ITT_STUB(api,type,name,args,params,ptr,group,format)   \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args;                  \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \
+{                                                              \
+    __itt_init_ittlib_name(NULL, __itt_group_all);             \
+    if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \
+        return ITTNOTIFY_NAME(name) params;                    \
+    else                                                       \
+        return (type)0;                                        \
+}
+
+#define ITT_STUBV(api,type,name,args,params,ptr,group,format)  \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args;                  \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \
+{                                                              \
+    __itt_init_ittlib_name(NULL, __itt_group_all);             \
+    if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \
+        ITTNOTIFY_NAME(name) params;                           \
+    else                                                       \
+        return;                                                \
+}
+
+#undef __ITT_INTERNAL_INIT
+#include "ittnotify_static.h"
+
+#undef ITT_STUB
+#undef ITT_STUBV
+#define ITT_STUB(api,type,name,args,params,ptr,group,format)   \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args;                  \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END
+
+#define ITT_STUBV(api,type,name,args,params,ptr,group,format)  \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args;                  \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END
+
+#define __ITT_INTERNAL_INIT
+#include "ittnotify_static.h"
+#undef __ITT_INTERNAL_INIT
+
+ITT_GROUP_LIST(group_list);
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_alias
+{
+    const char*    env_var;
+    __itt_group_id groups;
+} __itt_group_alias;
+
+static __itt_group_alias group_alias[] = {
+    { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_mark) },
+    { "KMP_FOR_TCHECK",   (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) },
+    { NULL,               (__itt_group_none) },
+    { api_version,        (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */
+};
+
+#pragma pack(pop)
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(push)
+#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static __itt_api_info api_list[] = {
+/* Define functions with static implementation */
+#undef ITT_STUB
+#undef ITT_STUBV
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)},
+#define ITT_STUBV ITT_STUB
+#define __ITT_INTERNAL_INIT
+#include "ittnotify_static.h"
+#undef __ITT_INTERNAL_INIT
+/* Define functions without static implementation */
+#undef ITT_STUB
+#undef ITT_STUBV
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)},
+#define ITT_STUBV ITT_STUB
+#include "ittnotify_static.h"
+    {NULL, NULL, NULL, NULL, __itt_group_none}
+};
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(pop)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/* private, init thread info item. used for internal purposes */
+static __itt_thread_info init_thread_info = {
+    (const char*)NULL,                        /* nameA */
+#if defined(UNICODE) || defined(_UNICODE)
+    (const wchar_t*)NULL,                     /* nameW */
+#else
+    (void*)NULL,                              /* nameW */
+#endif
+    0,                                        /* tid */
+    __itt_thread_normal,                      /* state */
+    0,                                        /* extra1 */
+    (void*)NULL,                              /* extra2 */
+    (__itt_thread_info*)NULL                  /* next */
+};
+
+/* private, NULL domain item. used for internal purposes */
+static __itt_domain null_domain = {
+    0,                                        /* flags:  disabled by default */
+    (const char*)NULL,                        /* nameA */
+#if defined(UNICODE) || defined(_UNICODE)
+    (const wchar_t*)NULL,                     /* nameW */
+#else
+    (void*)NULL,                              /* nameW */
+#endif
+    0,                                        /* extra1 */
+    (void*)NULL,                              /* extra2 */
+    (__itt_domain*)NULL                       /* next */
+};
+
+/* private, NULL string handle item. used for internal purposes */
+static __itt_string_handle null_string_handle = {
+    (const char*)NULL,                        /* strA */
+#if defined(UNICODE) || defined(_UNICODE)
+    (const wchar_t*)NULL,                     /* strW */
+#else
+    (void*)NULL,                              /* strW */
+#endif
+    0,                                        /* extra1 */
+    (void*)NULL,                              /* extra2 */
+    (__itt_string_handle*)NULL                /* next */
+};
+
+static const char dll_path[PATH_MAX] = { 0 };
+
+/* static part descriptor which handles. all notification api attributes. */
+__itt_global _N_(_ittapi_global) = {
+    ITT_MAGIC,                                     /* identification info */
+    ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD,       /* version info */
+    0,                                             /* api_initialized */
+    0,                                             /* mutex_initialized */
+    0,                                             /* atomic_counter */
+    MUTEX_INITIALIZER,                             /* mutex */
+    NULL,                                          /* dynamic library handle */
+    NULL,                                          /* error_handler */
+    (const char**)&dll_path,                       /* dll_path_ptr */
+    (__itt_api_info*)&api_list,                    /* api_list_ptr */
+    NULL,                                          /* next __itt_global */
+    (__itt_thread_info*)&init_thread_info,         /* thread_list */
+    (__itt_domain*)&null_domain,                   /* domain_list */
+    (__itt_string_handle*)&null_string_handle,     /* string_list */
+    __itt_collection_normal                        /* collection state */
+};
+
+typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id);
+typedef void (__itt_api_fini_t)(__itt_global*);
+
+/* ========================================================================= */
+
+#ifdef ITT_NOTIFY_EXT_REPORT
+ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args);
+#endif /* ITT_NOTIFY_EXT_REPORT */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(push)
+#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static void __itt_report_error(__itt_error_code code, ...)
+{
+    va_list args;
+    va_start(args, code);
+    if (_N_(_ittapi_global).error_handler != NULL)
+    {
+        __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
+        handler(code, args);
+    }
+#ifdef ITT_NOTIFY_EXT_REPORT
+    _N_(error_handler)(code, args);
+#endif /* ITT_NOTIFY_EXT_REPORT */
+    va_end(args);
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(pop)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name)
+{
+    __itt_domain *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+        if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init)))
+            return ITTNOTIFY_NAME(domain_createW)(name);
+    }
+
+    if (name == NULL)
+        return _N_(_ittapi_global).domain_list;
+
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
+        if (h->nameW != NULL && !wcscmp(h->nameW, name))
+            break;
+    if (h == NULL) {
+        NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name);
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+    return h;
+}
+
+static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name)
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+    __itt_domain *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+        if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init)))
+            return ITTNOTIFY_NAME(domain_createA)(name);
+#else
+        if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init)))
+            return ITTNOTIFY_NAME(domain_create)(name);
+#endif
+    }
+
+    if (name == NULL)
+        return _N_(_ittapi_global).domain_list;
+
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
+        if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name))
+            break;
+    if (h == NULL) {
+        NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name);
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+    return h;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name)
+{
+    __itt_string_handle *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+        if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init)))
+            return ITTNOTIFY_NAME(string_handle_createW)(name);
+    }
+
+    if (name == NULL)
+        return _N_(_ittapi_global).string_list;
+
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
+        if (h->strW != NULL && !wcscmp(h->strW, name))
+            break;
+    if (h == NULL) {
+        NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name);
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+    return h;
+}
+
+static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name)
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+    __itt_string_handle *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+        if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init)))
+            return ITTNOTIFY_NAME(string_handle_createA)(name);
+#else
+        if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init)))
+            return ITTNOTIFY_NAME(string_handle_create)(name);
+#endif
+    }
+
+    if (name == NULL)
+        return _N_(_ittapi_global).string_list;
+
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
+        if (h->strA != NULL && !__itt_fstrcmp(h->strA, name))
+            break;
+    if (h == NULL) {
+        NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name);
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+    return h;
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void)
+{
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+        if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init)))
+        {
+            ITTNOTIFY_NAME(pause)();
+            return;
+        }
+    }
+    _N_(_ittapi_global).state = __itt_collection_paused;
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void)
+{
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+        if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init)))
+        {
+            ITTNOTIFY_NAME(resume)();
+            return;
+        }
+    }
+    _N_(_ittapi_global).state = __itt_collection_normal;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name)
+{
+    TIDT tid = __itt_thread_id();
+    __itt_thread_info *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+        if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init)))
+        {
+            ITTNOTIFY_NAME(thread_set_nameW)(name);
+            return;
+        }
+    }
+
+    __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
+        if (h->tid == tid)
+            break;
+    if (h == NULL) {
+        NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name);
+    }
+    else
+    {
+        h->nameW = name ? _wcsdup(name) : NULL;
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+}
+
+static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen)
+{
+    namelen = namelen;
+    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name);
+    return 0;
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name)
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+    TIDT tid = __itt_thread_id();
+    __itt_thread_info *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+        if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init)))
+        {
+            ITTNOTIFY_NAME(thread_set_nameA)(name);
+            return;
+        }
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+        if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init)))
+        {
+            ITTNOTIFY_NAME(thread_set_name)(name);
+            return;
+        }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+    }
+
+    __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
+        if (h->tid == tid)
+            break;
+    if (h == NULL) {
+        NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name);
+    }
+    else
+    {
+        h->nameA = name ? __itt_fstrdup(name) : NULL;
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen)
+{
+    namelen = namelen;
+    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name);
+    return 0;
+}
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen)
+{
+    namelen = namelen;
+    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name);
+    return 0;
+}
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void)
+{
+    TIDT tid = __itt_thread_id();
+    __itt_thread_info *h_tail, *h;
+
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
+    {
+        __itt_init_ittlib_name(NULL, __itt_group_all);
+        if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init)))
+        {
+            ITTNOTIFY_NAME(thread_ignore)();
+            return;
+        }
+    }
+
+    __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
+        if (h->tid == tid)
+            break;
+    if (h == NULL) {
+        static const char* name = "unknown";
+        NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name);
+    }
+    else
+    {
+        h->state = __itt_thread_ignored;
+    }
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void)
+{
+    ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))();
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void)
+{
+#ifdef __ANDROID__
+    /*
+     * if LIB_VAR_NAME env variable were set before then stay previous value
+     * else set default path
+    */
+    setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0);
+#endif
+}
+
+/* -------------------------------------------------------------------------- */
+
+static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len)
+{
     int i;
-    __itt_group_id res = __itt_group_none; 
-    const char* var_name  = "INTEL_ITTNOTIFY_GROUPS"; 
-    const char* group_str = __itt_get_env_var(var_name); 
- 
-    if (group_str != NULL) 
-    { 
-        int len; 
-        char gr[255]; 
-        const char* chunk; 
-        while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) 
-        { 
-            __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1); 
-            gr[min(len, (int)(sizeof(gr) - 1))] = 0; 
- 
-            for (i = 0; group_list[i].name != NULL; i++) 
-            { 
-                if (!__itt_fstrcmp(gr, group_list[i].name)) 
-                { 
-                    res = (__itt_group_id)(res | group_list[i].id); 
-                    break; 
-                } 
-            } 
-        } 
-        /* TODO: !!! Workaround for bug with warning for unknown group !!! 
-         * Should be fixed in new initialization scheme. 
-         * Now the following groups should be set always. */ 
-        for (i = 0; group_list[i].id != __itt_group_none; i++) 
-            if (group_list[i].id != __itt_group_all && 
-                group_list[i].id > __itt_group_splitter_min && 
-                group_list[i].id < __itt_group_splitter_max) 
-                res = (__itt_group_id)(res | group_list[i].id); 
-        return res; 
-    } 
-    else 
-    { 
-        for (i = 0; group_alias[i].env_var != NULL; i++) 
-            if (__itt_get_env_var(group_alias[i].env_var) != NULL) 
-                return group_alias[i].groups; 
-    } 
- 
-    return res; 
-} 
- 
-static int __itt_lib_version(lib_t lib) 
-{ 
-    if (lib == NULL) 
-        return 0; 
-    if (__itt_get_proc(lib, "__itt_api_init")) 
-        return 2; 
-    if (__itt_get_proc(lib, "__itt_api_version")) 
-        return 1; 
-    return 0; 
-} 
- 
-/* It's not used right now! Comment it out to avoid warnings. 
-static void __itt_reinit_all_pointers(void) 
-{ 
+    int j;
+
+    if (!s || !sep || !out || !len)
+        return NULL;
+
+    for (i = 0; s[i]; i++)
+    {
+        int b = 0;
+        for (j = 0; sep[j]; j++)
+            if (s[i] == sep[j])
+            {
+                b = 1;
+                break;
+            }
+        if (!b)
+            break;
+    }
+
+    if (!s[i])
+        return NULL;
+
+    *len = 0;
+    *out = &s[i];
+
+    for (; s[i]; i++, (*len)++)
+    {
+        int b = 0;
+        for (j = 0; sep[j]; j++)
+            if (s[i] == sep[j])
+            {
+                b = 1;
+                break;
+            }
+        if (b)
+            break;
+    }
+
+    for (; s[i]; i++)
+    {
+        int b = 0;
+        for (j = 0; sep[j]; j++)
+            if (s[i] == sep[j])
+            {
+                b = 1;
+                break;
+            }
+        if (!b)
+            break;
+    }
+
+    return &s[i];
+}
+
+/* This function return value of env variable that placed into static buffer.
+ * !!! The same static buffer is used for subsequent calls. !!!
+ * This was done to aviod dynamic allocation for few calls.
+ * Actually we need this function only four times.
+ */
+static const char* __itt_get_env_var(const char* name)
+{
+#define MAX_ENV_VALUE_SIZE 4086
+    static char  env_buff[MAX_ENV_VALUE_SIZE];
+    static char* env_value = (char*)env_buff;
+
+    if (name != NULL)
+    {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+        size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff);
+        DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len);
+        if (rc >= max_len)
+            __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1));
+        else if (rc > 0)
+        {
+            const char* ret = (const char*)env_value;
+            env_value += rc + 1;
+            return ret;
+        }
+        else
+        {
+            /* If environment variable is empty, GetEnvirornmentVariables()
+             * returns zero (number of characters (not including terminating null),
+             * and GetLastError() returns ERROR_SUCCESS. */
+            DWORD err = GetLastError();
+            if (err == ERROR_SUCCESS)
+                return env_value;
+
+            if (err != ERROR_ENVVAR_NOT_FOUND)
+                __itt_report_error(__itt_error_cant_read_env, name, (int)err);
+        }
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+        char* env = getenv(name);
+        if (env != NULL)
+        {
+            size_t len = strlen(env);
+            size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff);
+            if (len < max_len)
+            {
+                const char* ret = (const char*)env_value;
+                strncpy(env_value, env, len + 1);
+                env_value += len + 1;
+                return ret;
+            } else
+                __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1));
+        }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+    }
+    return NULL;
+}
+
+static const char* __itt_get_lib_name(void)
+{
+    const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME));
+
+#ifdef __ANDROID__
+    if (lib_name == NULL)
+    {
+        const char* const system_wide_marker_filename = "/data/local/tmp/com.intel.itt.collector_lib";
+        int itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY);
+        ssize_t res = 0;
+
+        if (itt_marker_file_fd == -1)
+        {
+            const pid_t my_pid = getpid();
+            char cmdline_path[PATH_MAX] = {0};
+            char package_name[PATH_MAX] = {0};
+            char app_sandbox_file[PATH_MAX] = {0};
+            int cmdline_fd = 0;
+
+            ITT_ANDROID_LOGI("Unable to open system-wide marker file.");
+            snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid);
+            ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path);
+            cmdline_fd = open(cmdline_path, O_RDONLY);
+            if (cmdline_fd == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path);
+                return lib_name;
+            }
+            res = read(cmdline_fd, package_name, PATH_MAX - 1);
+            if (res == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path);
+                res = close(cmdline_fd);
+                if (res == -1)
+                {
+                    ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path);
+                }
+                return lib_name;
+            }
+            res = close(cmdline_fd);
+            if (res == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path);
+                return lib_name;
+            }
+            ITT_ANDROID_LOGI("Package name: %s\n", package_name);
+            snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/com.intel.itt.collector_lib", package_name);
+            ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file);
+            itt_marker_file_fd = open(app_sandbox_file, O_RDONLY);
+            if (itt_marker_file_fd == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to open app marker file!");
+                return lib_name;
+            }
+        }
+
+        {
+            char itt_lib_name[PATH_MAX] = {0};
+
+            res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1);
+            if (res == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd);
+                res = close(itt_marker_file_fd);
+                if (res == -1)
+                {
+                    ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd);
+                }
+                return lib_name;
+            }
+            ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name);
+            res = close(itt_marker_file_fd);
+            if (res == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd);
+                return lib_name;
+            }
+            ITT_ANDROID_LOGI("Set env");
+            res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0);
+            if (res == -1)
+            {
+                ITT_ANDROID_LOGE("Unable to set env var!");
+                return lib_name;
+            }
+            lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME));
+            ITT_ANDROID_LOGI("ITT Lib path from env: %s", itt_lib_name);
+        }
+    }
+#endif
+
+    return lib_name;
+}
+
+#ifndef min
+#define min(a,b) (a) < (b) ? (a) : (b)
+#endif /* min */
+
+static __itt_group_id __itt_get_groups(void)
+{
     int i;
-    // Fill all pointers with initial stubs 
-    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) 
-        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; 
-} 
-*/ 
- 
-static void __itt_nullify_all_pointers(void) 
-{ 
+    __itt_group_id res = __itt_group_none;
+    const char* var_name  = "INTEL_ITTNOTIFY_GROUPS";
+    const char* group_str = __itt_get_env_var(var_name);
+
+    if (group_str != NULL)
+    {
+        int len;
+        char gr[255];
+        const char* chunk;
+        while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL)
+        {
+            __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1);
+            gr[min(len, (int)(sizeof(gr) - 1))] = 0;
+
+            for (i = 0; group_list[i].name != NULL; i++)
+            {
+                if (!__itt_fstrcmp(gr, group_list[i].name))
+                {
+                    res = (__itt_group_id)(res | group_list[i].id);
+                    break;
+                }
+            }
+        }
+        /* TODO: !!! Workaround for bug with warning for unknown group !!!
+         * Should be fixed in new initialization scheme.
+         * Now the following groups should be set always. */
+        for (i = 0; group_list[i].id != __itt_group_none; i++)
+            if (group_list[i].id != __itt_group_all &&
+                group_list[i].id > __itt_group_splitter_min &&
+                group_list[i].id < __itt_group_splitter_max)
+                res = (__itt_group_id)(res | group_list[i].id);
+        return res;
+    }
+    else
+    {
+        for (i = 0; group_alias[i].env_var != NULL; i++)
+            if (__itt_get_env_var(group_alias[i].env_var) != NULL)
+                return group_alias[i].groups;
+    }
+
+    return res;
+}
+
+static int __itt_lib_version(lib_t lib)
+{
+    if (lib == NULL)
+        return 0;
+    if (__itt_get_proc(lib, "__itt_api_init"))
+        return 2;
+    if (__itt_get_proc(lib, "__itt_api_version"))
+        return 1;
+    return 0;
+}
+
+/* It's not used right now! Comment it out to avoid warnings.
+static void __itt_reinit_all_pointers(void)
+{
     int i;
-    /* Nulify all pointers except domain_create and string_handle_create */ 
-    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) 
-        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; 
-} 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#pragma warning(push) 
-#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ 
-#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-ITT_EXTERN_C void _N_(fini_ittlib)(void) 
-{ 
-    __itt_api_fini_t* __itt_api_fini_ptr; 
-    static volatile TIDT current_thread = 0; 
- 
-    if (_N_(_ittapi_global).api_initialized) 
-    { 
-        __itt_mutex_lock(&_N_(_ittapi_global).mutex); 
-        if (_N_(_ittapi_global).api_initialized) 
-        { 
-            if (current_thread == 0) 
-            { 
-                current_thread = __itt_thread_id(); 
-                __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); 
-                if (__itt_api_fini_ptr) 
-                    __itt_api_fini_ptr(&_N_(_ittapi_global)); 
- 
-                __itt_nullify_all_pointers(); 
- 
- /* TODO: !!! not safe !!! don't support unload so far. 
-  *             if (_N_(_ittapi_global).lib != NULL) 
-  *                 __itt_unload_lib(_N_(_ittapi_global).lib); 
-  *             _N_(_ittapi_global).lib = NULL; 
-  */ 
-                _N_(_ittapi_global).api_initialized = 0; 
-                current_thread = 0; 
-            } 
-        } 
-        __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-    } 
-} 
- 
-ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) 
-{ 
+    // Fill all pointers with initial stubs
+    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func;
+}
+*/
+
+static void __itt_nullify_all_pointers(void)
+{
     int i;
-    __itt_group_id groups; 
-#ifdef ITT_COMPLETE_GROUP 
-    __itt_group_id zero_group = __itt_group_none; 
-#endif /* ITT_COMPLETE_GROUP */ 
-    static volatile TIDT current_thread = 0; 
- 
-    if (!_N_(_ittapi_global).api_initialized) 
-    { 
-#ifndef ITT_SIMPLE_INIT 
-        ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); 
-#endif /* ITT_SIMPLE_INIT */ 
- 
-        if (!_N_(_ittapi_global).api_initialized) 
-        { 
-            if (current_thread == 0) 
-            { 
-                current_thread = __itt_thread_id(); 
-                _N_(_ittapi_global).thread_list->tid = current_thread; 
-                if (lib_name == NULL) 
-                    lib_name = __itt_get_lib_name(); 
-                groups = __itt_get_groups(); 
-                if (groups != __itt_group_none || lib_name != NULL) 
-                { 
-                    _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); 
- 
-                    if (_N_(_ittapi_global).lib != NULL) 
-                    { 
-                        __itt_api_init_t* __itt_api_init_ptr; 
-                        int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); 
- 
-                        switch (lib_version) { 
-                        case 0: 
-                            groups = __itt_group_legacy; 
-                        case 1: 
-                            /* Fill all pointers from dynamic library */ 
-                            for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) 
-                            { 
-                                if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) 
-                                { 
-                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); 
-                                    if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) 
-                                    { 
-                                        /* Restore pointers for function with static implementation */ 
-                                        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; 
-                                        __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); 
-#ifdef ITT_COMPLETE_GROUP 
-                                        zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); 
-#endif /* ITT_COMPLETE_GROUP */ 
-                                    } 
-                                } 
-                                else 
-                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; 
-                            } 
- 
-                            if (groups == __itt_group_legacy) 
-                            { 
-                                /* Compatibility with legacy tools */ 
-                                ITTNOTIFY_NAME(thread_ignore)  = ITTNOTIFY_NAME(thr_ignore); 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-                                ITTNOTIFY_NAME(sync_createA)   = ITTNOTIFY_NAME(sync_set_nameA); 
-                                ITTNOTIFY_NAME(sync_createW)   = ITTNOTIFY_NAME(sync_set_nameW); 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-                                ITTNOTIFY_NAME(sync_create)    = ITTNOTIFY_NAME(sync_set_name); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-                                ITTNOTIFY_NAME(sync_prepare)   = ITTNOTIFY_NAME(notify_sync_prepare); 
-                                ITTNOTIFY_NAME(sync_cancel)    = ITTNOTIFY_NAME(notify_sync_cancel); 
-                                ITTNOTIFY_NAME(sync_acquired)  = ITTNOTIFY_NAME(notify_sync_acquired); 
-                                ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); 
-                            } 
- 
-#ifdef ITT_COMPLETE_GROUP 
-                            for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) 
-                                if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) 
-                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; 
-#endif /* ITT_COMPLETE_GROUP */ 
-                            break; 
-                        case 2: 
-                            __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); 
-                            if (__itt_api_init_ptr) 
-                                __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); 
-                            break; 
-                        } 
-                    } 
-                    else 
-                    { 
-                        __itt_nullify_all_pointers(); 
- 
-                        __itt_report_error(__itt_error_no_module, lib_name, 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-                            __itt_system_error() 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-                            dlerror() 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-                        ); 
-                    } 
-                } 
-                else 
-                { 
-                    __itt_nullify_all_pointers(); 
-                } 
-                _N_(_ittapi_global).api_initialized = 1; 
-                current_thread = 0; 
-                /* !!! Just to avoid unused code elimination !!! */ 
-                if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; 
-            } 
-        } 
- 
-#ifndef ITT_SIMPLE_INIT 
-        __itt_mutex_unlock(&_N_(_ittapi_global).mutex); 
-#endif /* ITT_SIMPLE_INIT */ 
-    } 
- 
-    /* Evaluating if any function ptr is non empty and it's in init_groups */ 
-    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) 
-        if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && 
-            _N_(_ittapi_global).api_list_ptr[i].group & init_groups) 
-            return 1; 
-    return 0; 
-} 
- 
-ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) 
-{ 
-    __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; 
-    _N_(_ittapi_global).error_handler = (void*)(size_t)handler; 
-    return prev; 
-} 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#pragma warning(pop) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
+    /* Nulify all pointers except domain_create and string_handle_create */
+    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(push)
+#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
+#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_EXTERN_C void _N_(fini_ittlib)(void)
+{
+    __itt_api_fini_t* __itt_api_fini_ptr;
+    static volatile TIDT current_thread = 0;
+
+    if (_N_(_ittapi_global).api_initialized)
+    {
+        __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+        if (_N_(_ittapi_global).api_initialized)
+        {
+            if (current_thread == 0)
+            {
+                current_thread = __itt_thread_id();
+                __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini");
+                if (__itt_api_fini_ptr)
+                    __itt_api_fini_ptr(&_N_(_ittapi_global));
+
+                __itt_nullify_all_pointers();
+
+ /* TODO: !!! not safe !!! don't support unload so far.
+  *             if (_N_(_ittapi_global).lib != NULL)
+  *                 __itt_unload_lib(_N_(_ittapi_global).lib);
+  *             _N_(_ittapi_global).lib = NULL;
+  */
+                _N_(_ittapi_global).api_initialized = 0;
+                current_thread = 0;
+            }
+        }
+        __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+    }
+}
+
+ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups)
+{
+    int i;
+    __itt_group_id groups;
+#ifdef ITT_COMPLETE_GROUP
+    __itt_group_id zero_group = __itt_group_none;
+#endif /* ITT_COMPLETE_GROUP */
+    static volatile TIDT current_thread = 0;
+
+    if (!_N_(_ittapi_global).api_initialized)
+    {
+#ifndef ITT_SIMPLE_INIT
+        ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+#endif /* ITT_SIMPLE_INIT */
+
+        if (!_N_(_ittapi_global).api_initialized)
+        {
+            if (current_thread == 0)
+            {
+                current_thread = __itt_thread_id();
+                _N_(_ittapi_global).thread_list->tid = current_thread;
+                if (lib_name == NULL)
+                    lib_name = __itt_get_lib_name();
+                groups = __itt_get_groups();
+                if (groups != __itt_group_none || lib_name != NULL)
+                {
+                    _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name);
+
+                    if (_N_(_ittapi_global).lib != NULL)
+                    {
+                        __itt_api_init_t* __itt_api_init_ptr;
+                        int lib_version = __itt_lib_version(_N_(_ittapi_global).lib);
+
+                        switch (lib_version) {
+                        case 0:
+                            groups = __itt_group_legacy;
+                        case 1:
+                            /* Fill all pointers from dynamic library */
+                            for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+                            {
+                                if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups)
+                                {
+                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name);
+                                    if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL)
+                                    {
+                                        /* Restore pointers for function with static implementation */
+                                        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+                                        __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name);
+#ifdef ITT_COMPLETE_GROUP
+                                        zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group);
+#endif /* ITT_COMPLETE_GROUP */
+                                    }
+                                }
+                                else
+                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+                            }
+
+                            if (groups == __itt_group_legacy)
+                            {
+                                /* Compatibility with legacy tools */
+                                ITTNOTIFY_NAME(thread_ignore)  = ITTNOTIFY_NAME(thr_ignore);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+                                ITTNOTIFY_NAME(sync_createA)   = ITTNOTIFY_NAME(sync_set_nameA);
+                                ITTNOTIFY_NAME(sync_createW)   = ITTNOTIFY_NAME(sync_set_nameW);
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+                                ITTNOTIFY_NAME(sync_create)    = ITTNOTIFY_NAME(sync_set_name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+                                ITTNOTIFY_NAME(sync_prepare)   = ITTNOTIFY_NAME(notify_sync_prepare);
+                                ITTNOTIFY_NAME(sync_cancel)    = ITTNOTIFY_NAME(notify_sync_cancel);
+                                ITTNOTIFY_NAME(sync_acquired)  = ITTNOTIFY_NAME(notify_sync_acquired);
+                                ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing);
+                            }
+
+#ifdef ITT_COMPLETE_GROUP
+                            for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+                                if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group)
+                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+#endif /* ITT_COMPLETE_GROUP */
+                            break;
+                        case 2:
+                            __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init");
+                            if (__itt_api_init_ptr)
+                                __itt_api_init_ptr(&_N_(_ittapi_global), init_groups);
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        __itt_nullify_all_pointers();
+
+                        __itt_report_error(__itt_error_no_module, lib_name,
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+                            __itt_system_error()
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+                            dlerror()
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+                        );
+                    }
+                }
+                else
+                {
+                    __itt_nullify_all_pointers();
+                }
+                _N_(_ittapi_global).api_initialized = 1;
+                current_thread = 0;
+                /* !!! Just to avoid unused code elimination !!! */
+                if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0;
+            }
+        }
+
+#ifndef ITT_SIMPLE_INIT
+        __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif /* ITT_SIMPLE_INIT */
+    }
+
+    /* Evaluating if any function ptr is non empty and it's in init_groups */
+    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+        if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func &&
+            _N_(_ittapi_global).api_list_ptr[i].group & init_groups)
+            return 1;
+    return 0;
+}
+
+ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler)
+{
+    __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
+    _N_(_ittapi_global).error_handler = (void*)(size_t)handler;
+    return prev;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(pop)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h
index d49dfc6dae..a218cc87bf 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_static.h
@@ -1,316 +1,316 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "ittnotify_config.h" 
- 
-#ifndef ITT_FORMAT_DEFINED 
-#  ifndef ITT_FORMAT 
-#    define ITT_FORMAT 
-#  endif /* ITT_FORMAT */ 
-#  ifndef ITT_NO_PARAMS 
-#    define ITT_NO_PARAMS 
-#  endif /* ITT_NO_PARAMS */ 
-#endif /* ITT_FORMAT_DEFINED */ 
- 
-/* 
- * parameters for macro expected: 
- * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt) 
- */ 
-#ifdef __ITT_INTERNAL_INIT 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char    *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"") 
-ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name), (ITT_FORMAT name), domain_create,  __itt_group_structure, "\"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"") 
-ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create,  (const char    *name), (ITT_FORMAT name), string_handle_create,  __itt_group_structure, "\"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-ITT_STUBV(ITTAPI, void, pause,  (void), (ITT_NO_PARAMS), pause,  __itt_group_control | __itt_group_legacy, "no args") 
-ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"") 
-ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, thread_set_name,  (const char    *name), (ITT_FORMAT name), thread_set_name,  __itt_group_thread, "\"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args") 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, int,  thr_name_setA, (const char    *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") 
-ITT_STUB(LIBITTAPI, int,  thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, int,  thr_name_set,  (const char    *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set,  __itt_group_thread | __itt_group_legacy, "\"%s\", %d") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(LIBITTAPI, void, thr_ignore,   (void),                             (ITT_NO_PARAMS),            thr_ignore,    __itt_group_thread | __itt_group_legacy, "no args") 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") 
- 
-#else  /* __ITT_INTERNAL_INIT */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") 
-ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x") 
-ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char    *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") 
-ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, sync_create,  (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create,  __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") 
-ITT_STUBV(ITTAPI, void, sync_rename,  (void *addr, const char    *name), (ITT_FORMAT addr, name), sync_rename,  __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, sync_destroy,    (void *addr), (ITT_FORMAT addr), sync_destroy,   __itt_group_sync | __itt_group_fsync, "%p") 
- 
-ITT_STUBV(ITTAPI, void, sync_prepare,    (void* addr), (ITT_FORMAT addr), sync_prepare,   __itt_group_sync,  "%p") 
-ITT_STUBV(ITTAPI, void, sync_cancel,     (void *addr), (ITT_FORMAT addr), sync_cancel,    __itt_group_sync,  "%p") 
-ITT_STUBV(ITTAPI, void, sync_acquired,   (void *addr), (ITT_FORMAT addr), sync_acquired,  __itt_group_sync,  "%p") 
-ITT_STUBV(ITTAPI, void, sync_releasing,  (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync,  "%p") 
- 
-ITT_STUBV(ITTAPI, void, suppress_push,       (unsigned int mask),                             (ITT_FORMAT mask), suppress_push,  __itt_group_suppress,  "%p") 
-ITT_STUBV(ITTAPI, void, suppress_pop,        (void),                                          (ITT_NO_PARAMS),   suppress_pop,   __itt_group_suppress,  "no args") 
-ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d") 
-ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d") 
- 
-ITT_STUBV(ITTAPI, void, fsync_prepare,   (void* addr), (ITT_FORMAT addr), sync_prepare,   __itt_group_fsync, "%p") 
-ITT_STUBV(ITTAPI, void, fsync_cancel,    (void *addr), (ITT_FORMAT addr), sync_cancel,    __itt_group_fsync, "%p") 
-ITT_STUBV(ITTAPI, void, fsync_acquired,  (void *addr), (ITT_FORMAT addr), sync_acquired,  __itt_group_fsync, "%p") 
-ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p") 
- 
-ITT_STUBV(ITTAPI, void, model_site_begin,          (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"") 
-ITT_STUBV(ITTAPI, void, model_site_end,            (__itt_model_site *site, __itt_model_site_instance *instance),                   (ITT_FORMAT site, instance),       model_site_end,   __itt_group_model, "%p, %p") 
-ITT_STUBV(ITTAPI, void, model_task_begin,          (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"") 
-ITT_STUBV(ITTAPI, void, model_task_end,            (__itt_model_task *task, __itt_model_task_instance *instance),                   (ITT_FORMAT task, instance),       model_task_end,   __itt_group_model, "%p, %p") 
-ITT_STUBV(ITTAPI, void, model_lock_acquire,        (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p") 
-ITT_STUBV(ITTAPI, void, model_lock_release,        (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p") 
-ITT_STUBV(ITTAPI, void, model_record_allocation,   (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation,   __itt_group_model, "%p, %d") 
-ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr),              (ITT_FORMAT addr),       model_record_deallocation, __itt_group_model, "%p") 
-ITT_STUBV(ITTAPI, void, model_induction_uses,      (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses,      __itt_group_model, "%p, %d") 
-ITT_STUBV(ITTAPI, void, model_reduction_uses,      (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses,      __itt_group_model, "%p, %d") 
-ITT_STUBV(ITTAPI, void, model_observe_uses,        (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses,        __itt_group_model, "%p, %d") 
-ITT_STUBV(ITTAPI, void, model_clear_uses,          (void* addr),              (ITT_FORMAT addr),       model_clear_uses,          __itt_group_model, "%p") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, model_site_beginW,         (const wchar_t *name),     (ITT_FORMAT name),       model_site_beginW,         __itt_group_model, "\"%s\"") 
-ITT_STUBV(ITTAPI, void, model_task_beginW,         (const wchar_t *name),     (ITT_FORMAT name),       model_task_beginW,         __itt_group_model, "\"%s\"") 
-ITT_STUBV(ITTAPI, void, model_iteration_taskW,     (const wchar_t *name),     (ITT_FORMAT name),       model_iteration_taskW,     __itt_group_model, "\"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, model_site_beginA,         (const char *name),        (ITT_FORMAT name),       model_site_beginA,         __itt_group_model, "\"%s\"") 
-ITT_STUBV(ITTAPI, void, model_site_beginAL,        (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL,    __itt_group_model, "\"%s\", %d") 
-ITT_STUBV(ITTAPI, void, model_task_beginA,         (const char *name),        (ITT_FORMAT name),       model_task_beginA,         __itt_group_model, "\"%s\"") 
-ITT_STUBV(ITTAPI, void, model_task_beginAL,        (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL,    __itt_group_model, "\"%s\", %d") 
-ITT_STUBV(ITTAPI, void, model_iteration_taskA,     (const char *name),        (ITT_FORMAT name),       model_iteration_taskA,     __itt_group_model, "\"%s\"") 
-ITT_STUBV(ITTAPI, void, model_iteration_taskAL,    (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d") 
-ITT_STUBV(ITTAPI, void, model_site_end_2,          (void),                    (ITT_NO_PARAMS),         model_site_end_2,          __itt_group_model, "no args") 
-ITT_STUBV(ITTAPI, void, model_task_end_2,          (void),                    (ITT_NO_PARAMS),         model_task_end_2,          __itt_group_model, "no args") 
-ITT_STUBV(ITTAPI, void, model_lock_acquire_2,      (void *lock),              (ITT_FORMAT lock),       model_lock_acquire_2,      __itt_group_model, "%p") 
-ITT_STUBV(ITTAPI, void, model_lock_release_2,      (void *lock),              (ITT_FORMAT lock),       model_lock_release_2,      __itt_group_model, "%p") 
-ITT_STUBV(ITTAPI, void, model_aggregate_task,      (size_t count),            (ITT_FORMAT count),      model_aggregate_task,      __itt_group_model, "%d") 
-ITT_STUBV(ITTAPI, void, model_disable_push,        (__itt_model_disable x),   (ITT_FORMAT x),          model_disable_push,        __itt_group_model, "%p") 
-ITT_STUBV(ITTAPI, void, model_disable_pop,         (void),                    (ITT_NO_PARAMS),         model_disable_pop,         __itt_group_model, "no args") 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char    *name, const char    *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"") 
-ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"") 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create,  (const char    *name, const char    *domain), (ITT_FORMAT name, domain), heap_function_create,  __itt_group_heap, "\"%s\", \"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* __ITT_INTERNAL_BODY */ 
-ITT_STUBV(ITTAPI, void, heap_allocate_begin,   (__itt_heap_function h, size_t size, int initialized),             (ITT_FORMAT h, size, initialized),       heap_allocate_begin, __itt_group_heap, "%p, %lu, %d") 
-ITT_STUBV(ITTAPI, void, heap_allocate_end,     (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end,   __itt_group_heap, "%p, %p, %lu, %d") 
-ITT_STUBV(ITTAPI, void, heap_free_begin,       (__itt_heap_function h, void*  addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p") 
-ITT_STUBV(ITTAPI, void, heap_free_end,         (__itt_heap_function h, void*  addr), (ITT_FORMAT h, addr), heap_free_end,   __itt_group_heap, "%p, %p") 
-ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void*  addr, size_t new_size, int initialized),                  (ITT_FORMAT h, addr, new_size, initialized),           heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d") 
-ITT_STUBV(ITTAPI, void, heap_reallocate_end,   (__itt_heap_function h, void*  addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end,   __itt_group_heap, "%p, %p, %p, %lu, %d") 
-ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args") 
-ITT_STUBV(ITTAPI, void, heap_internal_access_end,   (void), (ITT_NO_PARAMS), heap_internal_access_end,   __itt_group_heap, "no args") 
-ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args") 
-ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end,   (void), (ITT_NO_PARAMS), heap_record_memory_growth_end,   __itt_group_heap, "no args") 
-ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask),  (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u") 
-ITT_STUBV(ITTAPI, void, heap_record,          (unsigned int record_mask), (ITT_FORMAT record_mask),  heap_record,        __itt_group_heap, "%u") 
- 
-ITT_STUBV(ITTAPI, void, id_create,  (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create,  __itt_group_structure, "%p, %lu") 
-ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu") 
- 
-ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp,  __itt_group_structure, "no args") 
- 
-ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id),                                             (ITT_FORMAT domain, id),               region_end,   __itt_group_structure, "%p, %lu") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-ITT_STUBV(ITTAPI, void, frame_begin_v3,  (const __itt_domain *domain, __itt_id *id),                                             (ITT_FORMAT domain, id),             frame_begin_v3,  __itt_group_structure, "%p, %p") 
-ITT_STUBV(ITTAPI, void, frame_end_v3,    (const __itt_domain *domain, __itt_id *id),                                             (ITT_FORMAT domain, id),             frame_end_v3,    __itt_group_structure, "%p, %p") 
-ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu") 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-ITT_STUBV(ITTAPI, void, task_group,   (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group,  __itt_group_structure, "%p, %lu, %lu, %p") 
- 
-ITT_STUBV(ITTAPI, void, task_begin,    (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin,    __itt_group_structure, "%p, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn),                  (ITT_FORMAT domain, id, parent, fn),   task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, task_end,      (const __itt_domain *domain),                                                          (ITT_FORMAT domain),                   task_end,      __itt_group_structure, "%p") 
- 
-ITT_STUBV(ITTAPI, void, counter_inc_v3,       (const __itt_domain *domain, __itt_string_handle *name),                           (ITT_FORMAT domain, name),        counter_inc_v3,       __itt_group_structure, "%p, %p") 
-ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu") 
- 
-ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d") 
- 
-ITT_STUBV(ITTAPI, void, metadata_add,      (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p") 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length),    (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu") 
-ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, metadata_str_add,  (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length),    (ITT_FORMAT domain, id, key, data, length), metadata_str_add,  __itt_group_structure, "%p, %lu, %p, %p, %lu") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail),                (ITT_FORMAT domain, relation, tail),       relation_add_to_current, __itt_group_structure, "%p, %lu, %p") 
-ITT_STUBV(ITTAPI, void, relation_add,            (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add,            __itt_group_structure, "%p, %p, %lu, %p") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") 
-ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char    *name, int namelen), (ITT_FORMAT name, namelen), event_create,  __itt_group_mark | __itt_group_legacy, "\"%s\", %d") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, int,  event_start,          (__itt_event event),                (ITT_FORMAT event),         event_start,   __itt_group_mark | __itt_group_legacy, "%d") 
-ITT_STUB(LIBITTAPI, int,  event_end,            (__itt_event event),                (ITT_FORMAT event),         event_end,     __itt_group_mark | __itt_group_legacy, "%d") 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x") 
-ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, sync_set_name,  (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name,  __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") 
-ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, int, notify_sync_name,  (void *p, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name,  __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-ITT_STUBV(LIBITTAPI, void, notify_sync_prepare,   (void *p), (ITT_FORMAT p), notify_sync_prepare,   __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") 
-ITT_STUBV(LIBITTAPI, void, notify_sync_cancel,    (void *p), (ITT_FORMAT p), notify_sync_cancel,    __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") 
-ITT_STUBV(LIBITTAPI, void, notify_sync_acquired,  (void *p), (ITT_FORMAT p), notify_sync_acquired,  __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") 
-ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-ITT_STUBV(LIBITTAPI, void, memory_read,   (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read,   __itt_group_legacy, "%p, %lu") 
-ITT_STUBV(LIBITTAPI, void, memory_write,  (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write,  __itt_group_legacy, "%p, %lu") 
-ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu") 
- 
-ITT_STUB(LIBITTAPI, __itt_state_t,     state_get,    (void),                                    (ITT_NO_PARAMS),   state_get,    __itt_group_legacy, "no args") 
-ITT_STUB(LIBITTAPI, __itt_state_t,     state_set,    (__itt_state_t s),                         (ITT_FORMAT s),    state_set,    __itt_group_legacy, "%d") 
-ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d") 
-ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char    *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"") 
-ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"") 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_frame, frame_create,  (const char    *domain), (ITT_FORMAT domain), frame_create,  __itt_group_frame, "\"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* __ITT_INTERNAL_BODY */ 
-ITT_STUBV(ITTAPI, void, frame_begin,         (__itt_frame frame),     (ITT_FORMAT frame),  frame_begin,   __itt_group_frame, "%p") 
-ITT_STUBV(ITTAPI, void, frame_end,           (__itt_frame frame),     (ITT_FORMAT frame),  frame_end,     __itt_group_frame, "%p") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char    *name, const char    *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") 
-ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_counter, counter_create,  (const char    *name, const char    *domain), (ITT_FORMAT name, domain), counter_create,  __itt_group_counter, "\"%s\", \"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* __ITT_INTERNAL_BODY */ 
-ITT_STUBV(ITTAPI, void, counter_destroy,   (__itt_counter id),                           (ITT_FORMAT id),        counter_destroy,   __itt_group_counter, "%p") 
-ITT_STUBV(ITTAPI, void, counter_inc,       (__itt_counter id),                           (ITT_FORMAT id),        counter_inc,       __itt_group_counter, "%p") 
-ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char    *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"") 
-ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_mark_type, mark_create,  (const char    *name), (ITT_FORMAT name), mark_create,  __itt_group_mark, "\"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* __ITT_INTERNAL_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, int,  markA,        (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"") 
-ITT_STUB(ITTAPI, int,  markW,        (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int,  mark,         (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), mark,  __itt_group_mark, "%d, \"%s\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int,  mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d") 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, int,  mark_globalA, (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"") 
-ITT_STUB(ITTAPI, int,  mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int,  mark_global,  (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), mark_global,  __itt_group_mark, "%d, \"%S\"") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int,  mark_global_off, (__itt_mark_type mt),                        (ITT_FORMAT mt),            mark_global_off, __itt_group_mark, "%d") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create,  __itt_group_stitch, "no args") 
-#endif /* __ITT_INTERNAL_BODY */ 
-ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p") 
-ITT_STUBV(ITTAPI, void, stack_callee_enter,   (__itt_caller id), (ITT_FORMAT id), stack_callee_enter,   __itt_group_stitch, "%p") 
-ITT_STUBV(ITTAPI, void, stack_callee_leave,   (__itt_caller id), (ITT_FORMAT id), stack_callee_leave,   __itt_group_stitch, "%p") 
- 
-ITT_STUB(ITTAPI,  __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p") 
-ITT_STUBV(ITTAPI, void,                clock_domain_reset,  (void),                                      (ITT_NO_PARAMS),          clock_domain_reset,  __itt_group_structure, "no args") 
-ITT_STUBV(ITTAPI, void, id_create_ex,  (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex,  __itt_group_structure, "%p, %p, %lu, %lu") 
-ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu") 
-ITT_STUBV(ITTAPI, void, task_begin_ex,    (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn),                  (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, task_end_ex,      (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp),                                                            (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu") 
-ITT_STUBV(ITTAPI, void, task_begin_overlapped,       (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name),                                                                   (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex,    (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") 
-ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id),                                                                                                                       (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu") 
-ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id),                                                    (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu") 
-ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d") 
-ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p") 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length),    (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu") 
-ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope,  (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length),    (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope,  __itt_group_structure, "%p, %d, %p, %p, %lu") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail),                (ITT_FORMAT domain, clock_domain, timestamp, relation, tail),       relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu") 
-ITT_STUBV(ITTAPI, void, relation_add_ex,            (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex,            __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu") 
-ITT_STUB(ITTAPI,  __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type),                    (ITT_FORMAT name, track_group_type),        track_group_create, __itt_group_structure, "%p, %d") 
-ITT_STUB(ITTAPI,  __itt_track*,       track_create,       (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create,       __itt_group_structure, "%p, %p, %d") 
-ITT_STUBV(ITTAPI, void,               set_track,          (__itt_track *track),                                                                    (ITT_FORMAT track),                         set_track,          __itt_group_structure, "%p") 
- 
-#ifndef __ITT_INTERNAL_BODY 
-ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args") 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-#ifndef __ITT_INTERNAL_BODY 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") 
-ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d") 
-#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save,  __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* __ITT_INTERNAL_BODY */ 
- 
-#endif /* __ITT_INTERNAL_INIT */ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ittnotify_config.h"
+
+#ifndef ITT_FORMAT_DEFINED
+#  ifndef ITT_FORMAT
+#    define ITT_FORMAT
+#  endif /* ITT_FORMAT */
+#  ifndef ITT_NO_PARAMS
+#    define ITT_NO_PARAMS
+#  endif /* ITT_NO_PARAMS */
+#endif /* ITT_FORMAT_DEFINED */
+
+/*
+ * parameters for macro expected:
+ * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt)
+ */
+#ifdef __ITT_INTERNAL_INIT
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char    *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name), (ITT_FORMAT name), domain_create,  __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create,  (const char    *name), (ITT_FORMAT name), string_handle_create,  __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_STUBV(ITTAPI, void, pause,  (void), (ITT_NO_PARAMS), pause,  __itt_group_control | __itt_group_legacy, "no args")
+ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args")
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"")
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name,  (const char    *name), (ITT_FORMAT name), thread_set_name,  __itt_group_thread, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args")
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int,  thr_name_setA, (const char    *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d")
+ITT_STUB(LIBITTAPI, int,  thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int,  thr_name_set,  (const char    *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set,  __itt_group_thread | __itt_group_legacy, "\"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(LIBITTAPI, void, thr_ignore,   (void),                             (ITT_NO_PARAMS),            thr_ignore,    __itt_group_thread | __itt_group_legacy, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+
+ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args")
+
+#else  /* __ITT_INTERNAL_INIT */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x")
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x")
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char    *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"")
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create,  (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create,  __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x")
+ITT_STUBV(ITTAPI, void, sync_rename,  (void *addr, const char    *name), (ITT_FORMAT addr, name), sync_rename,  __itt_group_sync | __itt_group_fsync, "%p, \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_destroy,    (void *addr), (ITT_FORMAT addr), sync_destroy,   __itt_group_sync | __itt_group_fsync, "%p")
+
+ITT_STUBV(ITTAPI, void, sync_prepare,    (void* addr), (ITT_FORMAT addr), sync_prepare,   __itt_group_sync,  "%p")
+ITT_STUBV(ITTAPI, void, sync_cancel,     (void *addr), (ITT_FORMAT addr), sync_cancel,    __itt_group_sync,  "%p")
+ITT_STUBV(ITTAPI, void, sync_acquired,   (void *addr), (ITT_FORMAT addr), sync_acquired,  __itt_group_sync,  "%p")
+ITT_STUBV(ITTAPI, void, sync_releasing,  (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync,  "%p")
+
+ITT_STUBV(ITTAPI, void, suppress_push,       (unsigned int mask),                             (ITT_FORMAT mask), suppress_push,  __itt_group_suppress,  "%p")
+ITT_STUBV(ITTAPI, void, suppress_pop,        (void),                                          (ITT_NO_PARAMS),   suppress_pop,   __itt_group_suppress,  "no args")
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d")
+ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d")
+
+ITT_STUBV(ITTAPI, void, fsync_prepare,   (void* addr), (ITT_FORMAT addr), sync_prepare,   __itt_group_fsync, "%p")
+ITT_STUBV(ITTAPI, void, fsync_cancel,    (void *addr), (ITT_FORMAT addr), sync_cancel,    __itt_group_fsync, "%p")
+ITT_STUBV(ITTAPI, void, fsync_acquired,  (void *addr), (ITT_FORMAT addr), sync_acquired,  __itt_group_fsync, "%p")
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p")
+
+ITT_STUBV(ITTAPI, void, model_site_begin,          (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"")
+ITT_STUBV(ITTAPI, void, model_site_end,            (__itt_model_site *site, __itt_model_site_instance *instance),                   (ITT_FORMAT site, instance),       model_site_end,   __itt_group_model, "%p, %p")
+ITT_STUBV(ITTAPI, void, model_task_begin,          (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"")
+ITT_STUBV(ITTAPI, void, model_task_end,            (__itt_model_task *task, __itt_model_task_instance *instance),                   (ITT_FORMAT task, instance),       model_task_end,   __itt_group_model, "%p, %p")
+ITT_STUBV(ITTAPI, void, model_lock_acquire,        (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_lock_release,        (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_record_allocation,   (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation,   __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr),              (ITT_FORMAT addr),       model_record_deallocation, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_induction_uses,      (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses,      __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_reduction_uses,      (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses,      __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_observe_uses,        (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses,        __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_clear_uses,          (void* addr),              (ITT_FORMAT addr),       model_clear_uses,          __itt_group_model, "%p")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW,         (const wchar_t *name),     (ITT_FORMAT name),       model_site_beginW,         __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_task_beginW,         (const wchar_t *name),     (ITT_FORMAT name),       model_task_beginW,         __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskW,     (const wchar_t *name),     (ITT_FORMAT name),       model_iteration_taskW,     __itt_group_model, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, model_site_beginA,         (const char *name),        (ITT_FORMAT name),       model_site_beginA,         __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_site_beginAL,        (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL,    __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_task_beginA,         (const char *name),        (ITT_FORMAT name),       model_task_beginA,         __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_task_beginAL,        (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL,    __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_iteration_taskA,     (const char *name),        (ITT_FORMAT name),       model_iteration_taskA,     __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL,    (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_site_end_2,          (void),                    (ITT_NO_PARAMS),         model_site_end_2,          __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_task_end_2,          (void),                    (ITT_NO_PARAMS),         model_task_end_2,          __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2,      (void *lock),              (ITT_FORMAT lock),       model_lock_acquire_2,      __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_lock_release_2,      (void *lock),              (ITT_FORMAT lock),       model_lock_release_2,      __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_aggregate_task,      (size_t count),            (ITT_FORMAT count),      model_aggregate_task,      __itt_group_model, "%d")
+ITT_STUBV(ITTAPI, void, model_disable_push,        (__itt_model_disable x),   (ITT_FORMAT x),          model_disable_push,        __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_disable_pop,         (void),                    (ITT_NO_PARAMS),         model_disable_pop,         __itt_group_model, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char    *name, const char    *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"")
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"")
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create,  (const char    *name, const char    *domain), (ITT_FORMAT name, domain), heap_function_create,  __itt_group_heap, "\"%s\", \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, heap_allocate_begin,   (__itt_heap_function h, size_t size, int initialized),             (ITT_FORMAT h, size, initialized),       heap_allocate_begin, __itt_group_heap, "%p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_allocate_end,     (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end,   __itt_group_heap, "%p, %p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_free_begin,       (__itt_heap_function h, void*  addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p")
+ITT_STUBV(ITTAPI, void, heap_free_end,         (__itt_heap_function h, void*  addr), (ITT_FORMAT h, addr), heap_free_end,   __itt_group_heap, "%p, %p")
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void*  addr, size_t new_size, int initialized),                  (ITT_FORMAT h, addr, new_size, initialized),           heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_reallocate_end,   (__itt_heap_function h, void*  addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end,   __itt_group_heap, "%p, %p, %p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_internal_access_end,   (void), (ITT_NO_PARAMS), heap_internal_access_end,   __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end,   (void), (ITT_NO_PARAMS), heap_record_memory_growth_end,   __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask),  (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u")
+ITT_STUBV(ITTAPI, void, heap_record,          (unsigned int record_mask), (ITT_FORMAT record_mask),  heap_record,        __itt_group_heap, "%u")
+
+ITT_STUBV(ITTAPI, void, id_create,  (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create,  __itt_group_structure, "%p, %lu")
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu")
+
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp,  __itt_group_structure, "no args")
+
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id),                                             (ITT_FORMAT domain, id),               region_end,   __itt_group_structure, "%p, %lu")
+
+#ifndef __ITT_INTERNAL_BODY
+ITT_STUBV(ITTAPI, void, frame_begin_v3,  (const __itt_domain *domain, __itt_id *id),                                             (ITT_FORMAT domain, id),             frame_begin_v3,  __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, frame_end_v3,    (const __itt_domain *domain, __itt_id *id),                                             (ITT_FORMAT domain, id),             frame_end_v3,    __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu")
+#endif /* __ITT_INTERNAL_BODY */
+
+ITT_STUBV(ITTAPI, void, task_group,   (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group,  __itt_group_structure, "%p, %lu, %lu, %p")
+
+ITT_STUBV(ITTAPI, void, task_begin,    (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin,    __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn),                  (ITT_FORMAT domain, id, parent, fn),   task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_end,      (const __itt_domain *domain),                                                          (ITT_FORMAT domain),                   task_end,      __itt_group_structure, "%p")
+
+ITT_STUBV(ITTAPI, void, counter_inc_v3,       (const __itt_domain *domain, __itt_string_handle *name),                           (ITT_FORMAT domain, name),        counter_inc_v3,       __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu")
+
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d")
+
+ITT_STUBV(ITTAPI, void, metadata_add,      (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p")
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length),    (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu")
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add,  (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length),    (ITT_FORMAT domain, id, key, data, length), metadata_str_add,  __itt_group_structure, "%p, %lu, %p, %p, %lu")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail),                (ITT_FORMAT domain, relation, tail),       relation_add_to_current, __itt_group_structure, "%p, %lu, %p")
+ITT_STUBV(ITTAPI, void, relation_add,            (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add,            __itt_group_structure, "%p, %p, %lu, %p")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d")
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char    *name, int namelen), (ITT_FORMAT name, namelen), event_create,  __itt_group_mark | __itt_group_legacy, "\"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int,  event_start,          (__itt_event event),                (ITT_FORMAT event),         event_start,   __itt_group_mark | __itt_group_legacy, "%d")
+ITT_STUB(LIBITTAPI, int,  event_end,            (__itt_event event),                (ITT_FORMAT event),         event_end,     __itt_group_mark | __itt_group_legacy, "%d")
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x")
+ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_set_name,  (void *addr, const char    *objtype, const char    *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name,  __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x")
+ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, notify_sync_name,  (void *p, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name,  __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_STUBV(LIBITTAPI, void, notify_sync_prepare,   (void *p), (ITT_FORMAT p), notify_sync_prepare,   __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+ITT_STUBV(LIBITTAPI, void, notify_sync_cancel,    (void *p), (ITT_FORMAT p), notify_sync_cancel,    __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+ITT_STUBV(LIBITTAPI, void, notify_sync_acquired,  (void *p), (ITT_FORMAT p), notify_sync_acquired,  __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+#endif /* __ITT_INTERNAL_BODY */
+
+ITT_STUBV(LIBITTAPI, void, memory_read,   (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read,   __itt_group_legacy, "%p, %lu")
+ITT_STUBV(LIBITTAPI, void, memory_write,  (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write,  __itt_group_legacy, "%p, %lu")
+ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu")
+
+ITT_STUB(LIBITTAPI, __itt_state_t,     state_get,    (void),                                    (ITT_NO_PARAMS),   state_get,    __itt_group_legacy, "no args")
+ITT_STUB(LIBITTAPI, __itt_state_t,     state_set,    (__itt_state_t s),                         (ITT_FORMAT s),    state_set,    __itt_group_legacy, "%d")
+ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d")
+ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char    *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"")
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_frame, frame_create,  (const char    *domain), (ITT_FORMAT domain), frame_create,  __itt_group_frame, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, frame_begin,         (__itt_frame frame),     (ITT_FORMAT frame),  frame_begin,   __itt_group_frame, "%p")
+ITT_STUBV(ITTAPI, void, frame_end,           (__itt_frame frame),     (ITT_FORMAT frame),  frame_end,     __itt_group_frame, "%p")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char    *name, const char    *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"")
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"")
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create,  (const char    *name, const char    *domain), (ITT_FORMAT name, domain), counter_create,  __itt_group_counter, "\"%s\", \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, counter_destroy,   (__itt_counter id),                           (ITT_FORMAT id),        counter_destroy,   __itt_group_counter, "%p")
+ITT_STUBV(ITTAPI, void, counter_inc,       (__itt_counter id),                           (ITT_FORMAT id),        counter_inc,       __itt_group_counter, "%p")
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char    *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create,  (const char    *name), (ITT_FORMAT name), mark_create,  __itt_group_mark, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int,  markA,        (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"")
+ITT_STUB(ITTAPI, int,  markW,        (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int,  mark,         (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), mark,  __itt_group_mark, "%d, \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int,  mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d")
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int,  mark_globalA, (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"")
+ITT_STUB(ITTAPI, int,  mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int,  mark_global,  (__itt_mark_type mt, const char    *parameter), (ITT_FORMAT mt, parameter), mark_global,  __itt_group_mark, "%d, \"%S\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int,  mark_global_off, (__itt_mark_type mt),                        (ITT_FORMAT mt),            mark_global_off, __itt_group_mark, "%d")
+
+#ifndef __ITT_INTERNAL_BODY
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create,  __itt_group_stitch, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p")
+ITT_STUBV(ITTAPI, void, stack_callee_enter,   (__itt_caller id), (ITT_FORMAT id), stack_callee_enter,   __itt_group_stitch, "%p")
+ITT_STUBV(ITTAPI, void, stack_callee_leave,   (__itt_caller id), (ITT_FORMAT id), stack_callee_leave,   __itt_group_stitch, "%p")
+
+ITT_STUB(ITTAPI,  __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void,                clock_domain_reset,  (void),                                      (ITT_NO_PARAMS),          clock_domain_reset,  __itt_group_structure, "no args")
+ITT_STUBV(ITTAPI, void, id_create_ex,  (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex,  __itt_group_structure, "%p, %p, %lu, %lu")
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu")
+ITT_STUBV(ITTAPI, void, task_begin_ex,    (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn),                  (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_end_ex,      (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp),                                                            (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu")
+ITT_STUBV(ITTAPI, void, task_begin_overlapped,       (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name),                                                                   (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex,    (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id),                                                                                                                       (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu")
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id),                                                    (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu")
+ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d")
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p")
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length),    (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu")
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope,  (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length),    (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope,  __itt_group_structure, "%p, %d, %p, %p, %lu")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail),                (ITT_FORMAT domain, clock_domain, timestamp, relation, tail),       relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu")
+ITT_STUBV(ITTAPI, void, relation_add_ex,            (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex,            __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu")
+ITT_STUB(ITTAPI,  __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type),                    (ITT_FORMAT name, track_group_type),        track_group_create, __itt_group_structure, "%p, %d")
+ITT_STUB(ITTAPI,  __itt_track*,       track_create,       (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create,       __itt_group_structure, "%p, %p, %d")
+ITT_STUBV(ITTAPI, void,               set_track,          (__itt_track *track),                                                                    (ITT_FORMAT track),                         set_track,          __itt_group_structure, "%p")
+
+#ifndef __ITT_INTERNAL_BODY
+ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save,  __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+
+#endif /* __ITT_INTERNAL_INIT */
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h
index 56efda5333..3695a67089 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/ittnotify_types.h
@@ -1,67 +1,67 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef _ITTNOTIFY_TYPES_H_ 
-#define _ITTNOTIFY_TYPES_H_ 
- 
-typedef enum ___itt_group_id 
-{ 
-    __itt_group_none      = 0, 
-    __itt_group_legacy    = 1<<0, 
-    __itt_group_control   = 1<<1, 
-    __itt_group_thread    = 1<<2, 
-    __itt_group_mark      = 1<<3, 
-    __itt_group_sync      = 1<<4, 
-    __itt_group_fsync     = 1<<5, 
-    __itt_group_jit       = 1<<6, 
-    __itt_group_model     = 1<<7, 
-    __itt_group_splitter_min = 1<<7, 
-    __itt_group_counter   = 1<<8, 
-    __itt_group_frame     = 1<<9, 
-    __itt_group_stitch    = 1<<10, 
-    __itt_group_heap      = 1<<11, 
-    __itt_group_splitter_max = 1<<12, 
-    __itt_group_structure = 1<<12, 
-    __itt_group_suppress = 1<<13, 
-    __itt_group_arrays    = 1<<14, 
-    __itt_group_all       = -1 
-} __itt_group_id; 
- 
-#pragma pack(push, 8) 
- 
-typedef struct ___itt_group_list 
-{ 
-    __itt_group_id id; 
-    const char*    name; 
-} __itt_group_list; 
- 
-#pragma pack(pop) 
- 
-#define ITT_GROUP_LIST(varname) \ 
-    static __itt_group_list varname[] = {       \ 
-        { __itt_group_all,       "all"       }, \ 
-        { __itt_group_control,   "control"   }, \ 
-        { __itt_group_thread,    "thread"    }, \ 
-        { __itt_group_mark,      "mark"      }, \ 
-        { __itt_group_sync,      "sync"      }, \ 
-        { __itt_group_fsync,     "fsync"     }, \ 
-        { __itt_group_jit,       "jit"       }, \ 
-        { __itt_group_model,     "model"     }, \ 
-        { __itt_group_counter,   "counter"   }, \ 
-        { __itt_group_frame,     "frame"     }, \ 
-        { __itt_group_stitch,    "stitch"    }, \ 
-        { __itt_group_heap,      "heap"      }, \ 
-        { __itt_group_structure, "structure" }, \ 
-        { __itt_group_suppress,  "suppress"  }, \ 
-        { __itt_group_arrays,    "arrays"    }, \ 
-        { __itt_group_none,      NULL        }  \ 
-    } 
- 
-#endif /* _ITTNOTIFY_TYPES_H_ */ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _ITTNOTIFY_TYPES_H_
+#define _ITTNOTIFY_TYPES_H_
+
+typedef enum ___itt_group_id
+{
+    __itt_group_none      = 0,
+    __itt_group_legacy    = 1<<0,
+    __itt_group_control   = 1<<1,
+    __itt_group_thread    = 1<<2,
+    __itt_group_mark      = 1<<3,
+    __itt_group_sync      = 1<<4,
+    __itt_group_fsync     = 1<<5,
+    __itt_group_jit       = 1<<6,
+    __itt_group_model     = 1<<7,
+    __itt_group_splitter_min = 1<<7,
+    __itt_group_counter   = 1<<8,
+    __itt_group_frame     = 1<<9,
+    __itt_group_stitch    = 1<<10,
+    __itt_group_heap      = 1<<11,
+    __itt_group_splitter_max = 1<<12,
+    __itt_group_structure = 1<<12,
+    __itt_group_suppress = 1<<13,
+    __itt_group_arrays    = 1<<14,
+    __itt_group_all       = -1
+} __itt_group_id;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_list
+{
+    __itt_group_id id;
+    const char*    name;
+} __itt_group_list;
+
+#pragma pack(pop)
+
+#define ITT_GROUP_LIST(varname) \
+    static __itt_group_list varname[] = {       \
+        { __itt_group_all,       "all"       }, \
+        { __itt_group_control,   "control"   }, \
+        { __itt_group_thread,    "thread"    }, \
+        { __itt_group_mark,      "mark"      }, \
+        { __itt_group_sync,      "sync"      }, \
+        { __itt_group_fsync,     "fsync"     }, \
+        { __itt_group_jit,       "jit"       }, \
+        { __itt_group_model,     "model"     }, \
+        { __itt_group_counter,   "counter"   }, \
+        { __itt_group_frame,     "frame"     }, \
+        { __itt_group_stitch,    "stitch"    }, \
+        { __itt_group_heap,      "heap"      }, \
+        { __itt_group_structure, "structure" }, \
+        { __itt_group_suppress,  "suppress"  }, \
+        { __itt_group_arrays,    "arrays"    }, \
+        { __itt_group_none,      NULL        }  \
+    }
+
+#endif /* _ITTNOTIFY_TYPES_H_ */
diff --git a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h
index e0410a76f2..4cf81db634 100644
--- a/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h
+++ b/contrib/libs/cxxsupp/openmp/thirdparty/ittnotify/legacy/ittnotify.h
@@ -1,972 +1,972 @@
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef _LEGACY_ITTNOTIFY_H_ 
-#define _LEGACY_ITTNOTIFY_H_ 
- 
-/** 
- * @file 
- * @brief Legacy User API functions and types 
- */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef ITT_OS_WIN 
-#  define ITT_OS_WIN   1 
-#endif /* ITT_OS_WIN */ 
- 
-#ifndef ITT_OS_LINUX 
-#  define ITT_OS_LINUX 2 
-#endif /* ITT_OS_LINUX */ 
- 
-#ifndef ITT_OS_MAC 
-#  define ITT_OS_MAC   3 
-#endif /* ITT_OS_MAC */ 
- 
-#ifndef ITT_OS 
-#  if defined WIN32 || defined _WIN32 
-#    define ITT_OS ITT_OS_WIN 
-#  elif defined( __APPLE__ ) && defined( __MACH__ ) 
-#    define ITT_OS ITT_OS_MAC 
-#  else 
-#    define ITT_OS ITT_OS_LINUX 
-#  endif 
-#endif /* ITT_OS */ 
- 
-#ifndef ITT_PLATFORM_WIN 
-#  define ITT_PLATFORM_WIN 1 
-#endif /* ITT_PLATFORM_WIN */ 
- 
-#ifndef ITT_PLATFORM_POSIX 
-#  define ITT_PLATFORM_POSIX 2 
-#endif /* ITT_PLATFORM_POSIX */ 
- 
-#ifndef ITT_PLATFORM_MAC 
-#  define ITT_PLATFORM_MAC 3 
-#endif /* ITT_PLATFORM_MAC */ 
- 
-#ifndef ITT_PLATFORM 
-#  if ITT_OS==ITT_OS_WIN 
-#    define ITT_PLATFORM ITT_PLATFORM_WIN 
-#  elif ITT_OS==ITT_OS_MAC 
-#    define ITT_PLATFORM ITT_PLATFORM_MAC 
-#  else 
-#    define ITT_PLATFORM ITT_PLATFORM_POSIX 
-#  endif 
-#endif /* ITT_PLATFORM */ 
- 
-#if defined(_UNICODE) && !defined(UNICODE) 
-#define UNICODE 
-#endif 
- 
-#include <stddef.h> 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#include <tchar.h> 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#include <stdint.h> 
-#if defined(UNICODE) || defined(_UNICODE) 
-#include <wchar.h> 
-#endif /* UNICODE || _UNICODE */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-#ifndef CDECL 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    define CDECL __cdecl 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#    if defined _M_IX86 || defined __i386__  
-#      define CDECL __attribute__ ((cdecl)) 
-#    else  /* _M_IX86 || __i386__ */ 
-#      define CDECL /* actual only on x86 platform */ 
-#    endif /* _M_IX86 || __i386__ */ 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* CDECL */ 
- 
-#ifndef STDCALL 
-#  if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#    define STDCALL __stdcall 
-#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LEGACY_ITTNOTIFY_H_
+#define _LEGACY_ITTNOTIFY_H_
+
+/**
+ * @file
+ * @brief Legacy User API functions and types
+ */
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+#  define ITT_OS_WIN   1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+#  define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+#  define ITT_OS_MAC   3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS
+#  if defined WIN32 || defined _WIN32
+#    define ITT_OS ITT_OS_WIN
+#  elif defined( __APPLE__ ) && defined( __MACH__ )
+#    define ITT_OS ITT_OS_MAC
+#  else
+#    define ITT_OS ITT_OS_LINUX
+#  endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+#  define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+#  define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM
+#  if ITT_OS==ITT_OS_WIN
+#    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
+#  else
+#    define ITT_PLATFORM ITT_PLATFORM_POSIX
+#  endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef CDECL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define CDECL __cdecl
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #    if defined _M_IX86 || defined __i386__ 
-#      define STDCALL __attribute__ ((stdcall))  
-#    else  /* _M_IX86 || __i386__ */ 
-#      define STDCALL /* supported only on x86 platform */ 
-#    endif /* _M_IX86 || __i386__ */ 
-#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* STDCALL */ 
- 
-#define ITTAPI    CDECL 
-#define LIBITTAPI CDECL 
- 
-/* TODO: Temporary for compatibility! */ 
-#define ITTAPI_CALL    CDECL 
-#define LIBITTAPI_CALL CDECL 
- 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-/* use __forceinline (VC++ specific) */ 
-#define ITT_INLINE           __forceinline 
-#define ITT_INLINE_ATTRIBUTE /* nothing */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-/* 
- * Generally, functions are not inlined unless optimization is specified. 
- * For functions declared inline, this attribute inlines the function even 
- * if no optimization level was specified. 
- */ 
-#ifdef __STRICT_ANSI__ 
-#define ITT_INLINE           static 
-#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) 
-#else  /* __STRICT_ANSI__ */ 
-#define ITT_INLINE           static inline 
-#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) 
-#endif /* __STRICT_ANSI__ */ 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-/** @endcond */ 
- 
-/** @cond exclude_from_documentation */ 
-/* Helper macro for joining tokens */ 
-#define ITT_JOIN_AUX(p,n) p##n 
-#define ITT_JOIN(p,n)     ITT_JOIN_AUX(p,n) 
- 
-#ifdef ITT_MAJOR 
-#undef ITT_MAJOR 
-#endif 
-#ifdef ITT_MINOR 
-#undef ITT_MINOR 
-#endif 
-#define ITT_MAJOR     3 
-#define ITT_MINOR     0 
- 
-/* Standard versioning of a token with major and minor version numbers */ 
-#define ITT_VERSIONIZE(x)    \ 
-    ITT_JOIN(x,              \ 
-    ITT_JOIN(_,              \ 
-    ITT_JOIN(ITT_MAJOR,      \ 
-    ITT_JOIN(_, ITT_MINOR)))) 
- 
-#ifndef INTEL_ITTNOTIFY_PREFIX 
-#  define INTEL_ITTNOTIFY_PREFIX __itt_ 
-#endif /* INTEL_ITTNOTIFY_PREFIX */ 
-#ifndef INTEL_ITTNOTIFY_POSTFIX 
-#  define INTEL_ITTNOTIFY_POSTFIX _ptr_ 
-#endif /* INTEL_ITTNOTIFY_POSTFIX */ 
- 
-#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) 
-#define ITTNOTIFY_NAME(n)     ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) 
- 
-#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) 
-#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n) 
- 
-#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) 
-#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) 
-#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) 
-#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) 
-#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) 
-#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) 
-#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) 
-#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d) 
-#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x) 
-#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y) 
-#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z) 
-#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) 
-#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) 
-#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) 
- 
-#ifdef ITT_STUB 
-#undef ITT_STUB 
-#endif 
-#ifdef ITT_STUBV 
-#undef ITT_STUBV 
-#endif 
-#define ITT_STUBV(api,type,name,args)                             \ 
-    typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args;   \ 
-    extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); 
-#define ITT_STUB ITT_STUBV 
-/** @endcond */ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif /* __cplusplus */ 
- 
-/** 
- * @defgroup legacy Legacy API 
- * @{ 
- * @} 
- */ 
- 
-/** 
- * @defgroup legacy_control Collection Control 
- * @ingroup legacy 
- * General behavior: application continues to run, but no profiling information is being collected 
- * 
- * Pausing occurs not only for the current thread but for all process as well as spawned processes 
- * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: 
- *   - Does not analyze or report errors that involve memory access. 
- *   - Other errors are reported as usual. Pausing data collection in 
- *     Intel(R) Parallel Inspector and Intel(R) Inspector XE 
- *     only pauses tracing and analyzing memory access. 
- *     It does not pause tracing or analyzing threading APIs. 
- *   . 
- * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: 
- *   - Does continue to record when new threads are started. 
- *   . 
- * - Other effects: 
- *   - Possible reduction of runtime overhead. 
- *   . 
- * @{ 
- */ 
-#ifndef _ITTNOTIFY_H_ 
-/** @brief Pause collection */ 
-void ITTAPI __itt_pause(void); 
-/** @brief Resume collection */ 
-void ITTAPI __itt_resume(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, pause,   (void)) 
-ITT_STUBV(ITTAPI, void, resume,  (void)) 
-#define __itt_pause      ITTNOTIFY_VOID(pause) 
-#define __itt_pause_ptr  ITTNOTIFY_NAME(pause) 
-#define __itt_resume     ITTNOTIFY_VOID(resume) 
-#define __itt_resume_ptr ITTNOTIFY_NAME(resume) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_pause() 
-#define __itt_pause_ptr  0 
-#define __itt_resume() 
-#define __itt_resume_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_pause_ptr  0 
-#define __itt_resume_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-#endif /* _ITTNOTIFY_H_ */ 
-/** @} legacy_control group */ 
- 
-/** 
- * @defgroup legacy_threads Threads 
- * @ingroup legacy 
- * Threads group 
- * @warning Legacy API 
- * @{ 
- */ 
-/** 
- * @deprecated Legacy API 
- * @brief Set name to be associated with thread in analysis GUI. 
- * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-int LIBITTAPI __itt_thr_name_setA(const char    *name, int namelen); 
-int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_thr_name_set     __itt_thr_name_setW 
-#  define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr 
-#else 
-#  define __itt_thr_name_set     __itt_thr_name_setA 
-#  define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-int LIBITTAPI __itt_thr_name_set(const char *name, int namelen); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char    *name, int namelen)) 
-ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, int, thr_name_set,  (const char    *name, int namelen)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_thr_name_setA     ITTNOTIFY_DATA(thr_name_setA) 
-#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA) 
-#define __itt_thr_name_setW     ITTNOTIFY_DATA(thr_name_setW) 
-#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_thr_name_set     ITTNOTIFY_DATA(thr_name_set) 
-#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_thr_name_setA(name, namelen) 
-#define __itt_thr_name_setA_ptr 0 
-#define __itt_thr_name_setW(name, namelen) 
-#define __itt_thr_name_setW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_thr_name_set(name, namelen) 
-#define __itt_thr_name_set_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_thr_name_setA_ptr 0 
-#define __itt_thr_name_setW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_thr_name_set_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Mark current thread as ignored from this point on, for the duration of its existence. 
- */ 
-void LIBITTAPI __itt_thr_ignore(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, thr_ignore, (void)) 
-#define __itt_thr_ignore     ITTNOTIFY_VOID(thr_ignore) 
-#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_thr_ignore() 
-#define __itt_thr_ignore_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_thr_ignore_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} legacy_threads group */ 
- 
-/** 
- * @defgroup legacy_sync Synchronization 
- * @ingroup legacy 
- * Synchronization group 
- * @warning Legacy API 
- * @{ 
- */ 
-/** 
- * @hideinitializer 
- * @brief possible value of attribute argument for sync object type 
- */ 
-#define __itt_attr_barrier 1 
- 
-/** 
- * @hideinitializer 
- * @brief possible value of attribute argument for sync object type 
- */ 
-#define __itt_attr_mutex   2 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Assign a name to a sync object using char or Unicode string 
- * @param[in] addr    - pointer to the sync object. You should use a real pointer to your object 
- *                      to make sure that the values don't clash with other object addresses 
- * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will 
- *                      be assumed to be of generic "User Synchronization" type 
- * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned 
- *                      to the object -- you can use the __itt_sync_rename call later to assign 
- *                      the name 
- * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the 
- *                      exact semantics of how prepare/acquired/releasing calls work. 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-void ITTAPI __itt_sync_set_nameA(void *addr, const char    *objtype, const char    *objname, int attribute); 
-void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_sync_set_name     __itt_sync_set_nameW 
-#  define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr 
-#else /* UNICODE */ 
-#  define __itt_sync_set_name     __itt_sync_set_nameA 
-#  define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char    *objtype, const char    *objname, int attribute)) 
-ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUBV(ITTAPI, void, sync_set_name,  (void *addr, const char    *objtype, const char    *objname, int attribute)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_set_nameA     ITTNOTIFY_VOID(sync_set_nameA) 
-#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA) 
-#define __itt_sync_set_nameW     ITTNOTIFY_VOID(sync_set_nameW) 
-#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_set_name     ITTNOTIFY_VOID(sync_set_name) 
-#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_set_nameA(addr, objtype, objname, attribute) 
-#define __itt_sync_set_nameA_ptr 0 
-#define __itt_sync_set_nameW(addr, objtype, objname, attribute) 
-#define __itt_sync_set_nameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_set_name(addr, objtype, objname, attribute) 
-#define __itt_sync_set_name_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_sync_set_nameA_ptr 0 
-#define __itt_sync_set_nameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_sync_set_name_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Assign a name and type to a sync object using char or Unicode string 
- * @param[in] addr -      pointer to the sync object. You should use a real pointer to your object 
- *                        to make sure that the values don't clash with other object addresses 
- * @param[in] objtype -   null-terminated object type string. If NULL is passed, the object will 
- *                        be assumed to be of generic "User Synchronization" type 
- * @param[in] objname -   null-terminated object name string. If NULL, no name will be assigned 
- *                        to the object -- you can use the __itt_sync_rename call later to assign 
- *                        the name 
- * @param[in] typelen, namelen -   a length of string for appropriate objtype and objname parameter 
- * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the 
- *                        exact semantics of how prepare/acquired/releasing calls work. 
- * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute); 
-int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_notify_sync_name __itt_notify_sync_nameW 
-#else 
-#  define __itt_notify_sync_name __itt_notify_sync_nameA 
-#endif 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute)) 
-ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, int, notify_sync_name,  (void *addr, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_notify_sync_nameA     ITTNOTIFY_DATA(notify_sync_nameA) 
-#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA) 
-#define __itt_notify_sync_nameW     ITTNOTIFY_DATA(notify_sync_nameW) 
-#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_notify_sync_name     ITTNOTIFY_DATA(notify_sync_name) 
-#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute) 
-#define __itt_notify_sync_nameA_ptr 0 
-#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute) 
-#define __itt_notify_sync_nameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute) 
-#define __itt_notify_sync_name_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_notify_sync_nameA_ptr 0 
-#define __itt_notify_sync_nameW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_notify_sync_name_ptr 0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Enter spin loop on user-defined sync object 
- */ 
-void LIBITTAPI __itt_notify_sync_prepare(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr)) 
-#define __itt_notify_sync_prepare     ITTNOTIFY_VOID(notify_sync_prepare) 
-#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_notify_sync_prepare(addr) 
-#define __itt_notify_sync_prepare_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_notify_sync_prepare_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Quit spin loop without acquiring spin object 
- */ 
-void LIBITTAPI __itt_notify_sync_cancel(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr)) 
-#define __itt_notify_sync_cancel     ITTNOTIFY_VOID(notify_sync_cancel) 
-#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_notify_sync_cancel(addr) 
-#define __itt_notify_sync_cancel_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_notify_sync_cancel_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Successful spin loop completion (sync object acquired) 
- */ 
-void LIBITTAPI __itt_notify_sync_acquired(void *addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr)) 
-#define __itt_notify_sync_acquired     ITTNOTIFY_VOID(notify_sync_acquired) 
-#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_notify_sync_acquired(addr) 
-#define __itt_notify_sync_acquired_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_notify_sync_acquired_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Start sync object releasing code. Is called before the lock release call. 
- */ 
-void LIBITTAPI __itt_notify_sync_releasing(void* addr); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr)) 
-#define __itt_notify_sync_releasing     ITTNOTIFY_VOID(notify_sync_releasing) 
-#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_notify_sync_releasing(addr) 
-#define __itt_notify_sync_releasing_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_notify_sync_releasing_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} legacy_sync group */ 
- 
-#ifndef _ITTNOTIFY_H_ 
-/** 
- * @defgroup legacy_events Events 
- * @ingroup legacy 
- * Events group 
- * @{ 
- */ 
- 
-/** @brief user event type */ 
-typedef int __itt_event; 
- 
-/** 
- * @brief Create an event notification 
- * @note name or namelen being null/name and namelen not matching, user event feature not enabled 
- * @return non-zero event identifier upon success and __itt_err otherwise 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_event LIBITTAPI __itt_event_createA(const char    *name, int namelen); 
-__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_event_create     __itt_event_createW 
-#  define __itt_event_create_ptr __itt_event_createW_ptr 
-#else 
-#  define __itt_event_create     __itt_event_createA 
-#  define __itt_event_create_ptr __itt_event_createA_ptr 
-#endif /* UNICODE */ 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen)) 
-ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char *name, int namelen)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_event_createA     ITTNOTIFY_DATA(event_createA) 
-#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) 
-#define __itt_event_createW     ITTNOTIFY_DATA(event_createW) 
-#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_event_create      ITTNOTIFY_DATA(event_create) 
-#define __itt_event_create_ptr  ITTNOTIFY_NAME(event_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_event_createA(name, namelen) (__itt_event)0 
-#define __itt_event_createA_ptr 0 
-#define __itt_event_createW(name, namelen) (__itt_event)0 
-#define __itt_event_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_event_create(name, namelen)  (__itt_event)0 
-#define __itt_event_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_event_createA_ptr 0 
-#define __itt_event_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_event_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an event occurrence. 
- * @return __itt_err upon failure (invalid event id/user event feature not enabled) 
- */ 
-int LIBITTAPI __itt_event_start(__itt_event event); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) 
-#define __itt_event_start     ITTNOTIFY_DATA(event_start) 
-#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_event_start(event) (int)0 
-#define __itt_event_start_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_event_start_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @brief Record an event end occurrence. 
- * @note It is optional if events do not have durations. 
- * @return __itt_err upon failure (invalid event id/user event feature not enabled) 
- */ 
-int LIBITTAPI __itt_event_end(__itt_event event); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) 
-#define __itt_event_end     ITTNOTIFY_DATA(event_end) 
-#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_event_end(event) (int)0 
-#define __itt_event_end_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_event_end_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} legacy_events group */ 
-#endif /* _ITTNOTIFY_H_ */ 
- 
-/** 
- * @defgroup legacy_memory Memory Accesses 
- * @ingroup legacy 
- */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Inform the tool of memory accesses on reading 
- */ 
-void LIBITTAPI __itt_memory_read(void *addr, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size)) 
-#define __itt_memory_read     ITTNOTIFY_VOID(memory_read) 
-#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_memory_read(addr, size) 
-#define __itt_memory_read_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_memory_read_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Inform the tool of memory accesses on writing 
- */ 
-void LIBITTAPI __itt_memory_write(void *addr, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size)) 
-#define __itt_memory_write     ITTNOTIFY_VOID(memory_write) 
-#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_memory_write(addr, size) 
-#define __itt_memory_write_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_memory_write_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief Inform the tool of memory accesses on updating 
- */ 
-void LIBITTAPI __itt_memory_update(void *address, size_t size); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size)) 
-#define __itt_memory_update     ITTNOTIFY_VOID(memory_update) 
-#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_memory_update(addr, size) 
-#define __itt_memory_update_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_memory_update_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} legacy_memory group */ 
- 
-/** 
- * @defgroup legacy_state Thread and Object States 
- * @ingroup legacy 
- */ 
- 
-/** @brief state type */ 
-typedef int __itt_state_t; 
- 
-/** @cond exclude_from_documentation */ 
-typedef enum __itt_obj_state { 
-    __itt_obj_state_err = 0, 
-    __itt_obj_state_clr = 1, 
-    __itt_obj_state_set = 2, 
-    __itt_obj_state_use = 3 
-} __itt_obj_state_t; 
- 
-typedef enum __itt_thr_state { 
-    __itt_thr_state_err = 0, 
-    __itt_thr_state_clr = 1, 
-    __itt_thr_state_set = 2 
-} __itt_thr_state_t; 
- 
-typedef enum __itt_obj_prop { 
-    __itt_obj_prop_watch    = 1, 
-    __itt_obj_prop_ignore   = 2, 
-    __itt_obj_prop_sharable = 3 
-} __itt_obj_prop_t; 
- 
-typedef enum __itt_thr_prop { 
-    __itt_thr_prop_quiet = 1 
-} __itt_thr_prop_t; 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief managing thread and object states 
- */ 
-__itt_state_t LIBITTAPI __itt_state_get(void); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_state_t, state_get, (void)) 
-#define __itt_state_get     ITTNOTIFY_DATA(state_get) 
-#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_state_get(void) (__itt_state_t)0 
-#define __itt_state_get_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_state_get_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief managing thread and object states 
- */ 
-__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s)) 
-#define __itt_state_set     ITTNOTIFY_DATA(state_set) 
-#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_state_set(s) (__itt_state_t)0 
-#define __itt_state_set_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_state_set_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief managing thread and object modes 
- */ 
-__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s)) 
-#define __itt_thr_mode_set     ITTNOTIFY_DATA(thr_mode_set) 
-#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0 
-#define __itt_thr_mode_set_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_thr_mode_set_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** 
- * @deprecated Legacy API 
- * @brief managing thread and object modes 
- */ 
-__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s)) 
-#define __itt_obj_mode_set     ITTNOTIFY_DATA(obj_mode_set) 
-#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0 
-#define __itt_obj_mode_set_ptr 0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_obj_mode_set_ptr 0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} legacy_state group */ 
- 
-/** 
- * @defgroup frames Frames 
- * @ingroup legacy 
- * Frames group 
- * @{ 
- */ 
-/** 
- * @brief opaque structure for frame identification 
- */ 
-typedef struct __itt_frame_t *__itt_frame; 
- 
-/** 
- * @brief Create a global frame with given domain 
- */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-__itt_frame ITTAPI __itt_frame_createA(const char    *domain); 
-__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain); 
-#if defined(UNICODE) || defined(_UNICODE) 
-#  define __itt_frame_create     __itt_frame_createW 
-#  define __itt_frame_create_ptr __itt_frame_createW_ptr 
-#else /* UNICODE */ 
-#  define __itt_frame_create     __itt_frame_createA 
-#  define __itt_frame_create_ptr __itt_frame_createA_ptr 
-#endif /* UNICODE */ 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-__itt_frame ITTAPI __itt_frame_create(const char *domain); 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char    *domain)) 
-ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain)) 
-#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-ITT_STUB(ITTAPI, __itt_frame, frame_create,  (const char *domain)) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_frame_createA     ITTNOTIFY_DATA(frame_createA) 
-#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA) 
-#define __itt_frame_createW     ITTNOTIFY_DATA(frame_createW) 
-#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW) 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_frame_create     ITTNOTIFY_DATA(frame_create) 
-#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create) 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_frame_createA(domain) 
-#define __itt_frame_createA_ptr 0 
-#define __itt_frame_createW(domain) 
-#define __itt_frame_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_frame_create(domain) 
-#define __itt_frame_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN 
-#define __itt_frame_createA_ptr 0 
-#define __itt_frame_createW_ptr 0 
-#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#define __itt_frame_create_ptr  0 
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
- 
-/** @brief Record an frame begin occurrence. */ 
-void ITTAPI __itt_frame_begin(__itt_frame frame); 
-/** @brief Record an frame end occurrence. */ 
-void ITTAPI __itt_frame_end  (__itt_frame frame); 
- 
-/** @cond exclude_from_documentation */ 
-#ifndef INTEL_NO_MACRO_BODY 
-#ifndef INTEL_NO_ITTNOTIFY_API 
-ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame)) 
-ITT_STUBV(ITTAPI, void, frame_end,   (__itt_frame frame)) 
-#define __itt_frame_begin     ITTNOTIFY_VOID(frame_begin) 
-#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin) 
-#define __itt_frame_end       ITTNOTIFY_VOID(frame_end) 
-#define __itt_frame_end_ptr   ITTNOTIFY_NAME(frame_end) 
-#else  /* INTEL_NO_ITTNOTIFY_API */ 
-#define __itt_frame_begin(frame) 
-#define __itt_frame_begin_ptr 0 
-#define __itt_frame_end(frame) 
-#define __itt_frame_end_ptr   0 
-#endif /* INTEL_NO_ITTNOTIFY_API */ 
-#else  /* INTEL_NO_MACRO_BODY */ 
-#define __itt_frame_begin_ptr 0 
-#define __itt_frame_end_ptr   0 
-#endif /* INTEL_NO_MACRO_BODY */ 
-/** @endcond */ 
-/** @} frames group */ 
- 
-#ifdef __cplusplus 
-} 
-#endif /* __cplusplus */ 
- 
-#endif /* _LEGACY_ITTNOTIFY_H_ */ 
+#      define CDECL __attribute__ ((cdecl))
+#    else  /* _M_IX86 || __i386__ */
+#      define CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* CDECL */
+
+#ifndef STDCALL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define STDCALL __stdcall
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define STDCALL __attribute__ ((stdcall)) 
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI    CDECL
+#define LIBITTAPI CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL    CDECL
+#define LIBITTAPI_CALL CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE           static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else  /* __STRICT_ANSI__ */
+#define ITT_INLINE           static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n)     ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR     3
+#define ITT_MINOR     0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x)    \
+    ITT_JOIN(x,              \
+    ITT_JOIN(_,              \
+    ITT_JOIN(ITT_MAJOR,      \
+    ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+#  define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+#  define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n)     ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args)                             \
+    typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args;   \
+    extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @defgroup legacy Legacy API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup legacy_control Collection Control
+ * @ingroup legacy
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ *   - Does not analyze or report errors that involve memory access.
+ *   - Other errors are reported as usual. Pausing data collection in
+ *     Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ *     only pauses tracing and analyzing memory access.
+ *     It does not pause tracing or analyzing threading APIs.
+ *   .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ *   - Does continue to record when new threads are started.
+ *   .
+ * - Other effects:
+ *   - Possible reduction of runtime overhead.
+ *   .
+ * @{
+ */
+#ifndef _ITTNOTIFY_H_
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause,   (void))
+ITT_STUBV(ITTAPI, void, resume,  (void))
+#define __itt_pause      ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr  ITTNOTIFY_NAME(pause)
+#define __itt_resume     ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr  0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr  0
+#define __itt_resume_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+#endif /* _ITTNOTIFY_H_ */
+/** @} legacy_control group */
+
+/**
+ * @defgroup legacy_threads Threads
+ * @ingroup legacy
+ * Threads group
+ * @warning Legacy API
+ * @{
+ */
+/**
+ * @deprecated Legacy API
+ * @brief Set name to be associated with thread in analysis GUI.
+ * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int LIBITTAPI __itt_thr_name_setA(const char    *name, int namelen);
+int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_thr_name_set     __itt_thr_name_setW
+#  define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr
+#else
+#  define __itt_thr_name_set     __itt_thr_name_setA
+#  define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int LIBITTAPI __itt_thr_name_set(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char    *name, int namelen))
+ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, thr_name_set,  (const char    *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thr_name_setA     ITTNOTIFY_DATA(thr_name_setA)
+#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA)
+#define __itt_thr_name_setW     ITTNOTIFY_DATA(thr_name_setW)
+#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thr_name_set     ITTNOTIFY_DATA(thr_name_set)
+#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thr_name_setA(name, namelen)
+#define __itt_thr_name_setA_ptr 0
+#define __itt_thr_name_setW(name, namelen)
+#define __itt_thr_name_setW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thr_name_set(name, namelen)
+#define __itt_thr_name_set_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thr_name_setA_ptr 0
+#define __itt_thr_name_setW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thr_name_set_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void LIBITTAPI __itt_thr_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, thr_ignore, (void))
+#define __itt_thr_ignore     ITTNOTIFY_VOID(thr_ignore)
+#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thr_ignore()
+#define __itt_thr_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_thr_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_threads group */
+
+/**
+ * @defgroup legacy_sync Synchronization
+ * @ingroup legacy
+ * Synchronization group
+ * @warning Legacy API
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex   2
+
+/**
+ * @deprecated Legacy API
+ * @brief Assign a name to a sync object using char or Unicode string
+ * @param[in] addr    - pointer to the sync object. You should use a real pointer to your object
+ *                      to make sure that the values don't clash with other object addresses
+ * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will
+ *                      be assumed to be of generic "User Synchronization" type
+ * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned
+ *                      to the object -- you can use the __itt_sync_rename call later to assign
+ *                      the name
+ * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the
+ *                      exact semantics of how prepare/acquired/releasing calls work.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_set_nameA(void *addr, const char    *objtype, const char    *objname, int attribute);
+void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_set_name     __itt_sync_set_nameW
+#  define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr
+#else /* UNICODE */
+#  define __itt_sync_set_name     __itt_sync_set_nameA
+#  define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char    *objtype, const char    *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_set_name,  (void *addr, const char    *objtype, const char    *objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_set_nameA     ITTNOTIFY_VOID(sync_set_nameA)
+#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA)
+#define __itt_sync_set_nameW     ITTNOTIFY_VOID(sync_set_nameW)
+#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_set_name     ITTNOTIFY_VOID(sync_set_name)
+#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_set_nameA(addr, objtype, objname, attribute)
+#define __itt_sync_set_nameA_ptr 0
+#define __itt_sync_set_nameW(addr, objtype, objname, attribute)
+#define __itt_sync_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_set_name(addr, objtype, objname, attribute)
+#define __itt_sync_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_set_nameA_ptr 0
+#define __itt_sync_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Assign a name and type to a sync object using char or Unicode string
+ * @param[in] addr -      pointer to the sync object. You should use a real pointer to your object
+ *                        to make sure that the values don't clash with other object addresses
+ * @param[in] objtype -   null-terminated object type string. If NULL is passed, the object will
+ *                        be assumed to be of generic "User Synchronization" type
+ * @param[in] objname -   null-terminated object name string. If NULL, no name will be assigned
+ *                        to the object -- you can use the __itt_sync_rename call later to assign
+ *                        the name
+ * @param[in] typelen, namelen -   a length of string for appropriate objtype and objname parameter
+ * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the
+ *                        exact semantics of how prepare/acquired/releasing calls work.
+ * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute);
+int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_notify_sync_name __itt_notify_sync_nameW
+#else
+#  define __itt_notify_sync_name __itt_notify_sync_nameA
+#endif
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute))
+ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, notify_sync_name,  (void *addr, const char    *objtype, int typelen, const char    *objname, int namelen, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_notify_sync_nameA     ITTNOTIFY_DATA(notify_sync_nameA)
+#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA)
+#define __itt_notify_sync_nameW     ITTNOTIFY_DATA(notify_sync_nameW)
+#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_notify_sync_name     ITTNOTIFY_DATA(notify_sync_name)
+#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute)
+#define __itt_notify_sync_nameA_ptr 0
+#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute)
+#define __itt_notify_sync_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute)
+#define __itt_notify_sync_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_notify_sync_nameA_ptr 0
+#define __itt_notify_sync_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_notify_sync_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Enter spin loop on user-defined sync object
+ */
+void LIBITTAPI __itt_notify_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr))
+#define __itt_notify_sync_prepare     ITTNOTIFY_VOID(notify_sync_prepare)
+#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_prepare(addr)
+#define __itt_notify_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Quit spin loop without acquiring spin object
+ */
+void LIBITTAPI __itt_notify_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr))
+#define __itt_notify_sync_cancel     ITTNOTIFY_VOID(notify_sync_cancel)
+#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_cancel(addr)
+#define __itt_notify_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void LIBITTAPI __itt_notify_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr))
+#define __itt_notify_sync_acquired     ITTNOTIFY_VOID(notify_sync_acquired)
+#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_acquired(addr)
+#define __itt_notify_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void LIBITTAPI __itt_notify_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr))
+#define __itt_notify_sync_releasing     ITTNOTIFY_VOID(notify_sync_releasing)
+#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_releasing(addr)
+#define __itt_notify_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_sync group */
+
+#ifndef _ITTNOTIFY_H_
+/**
+ * @defgroup legacy_events Events
+ * @ingroup legacy
+ * Events group
+ * @{
+ */
+
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char    *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_event_create     __itt_event_createW
+#  define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+#  define __itt_event_create     __itt_event_createA
+#  define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA     ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW     ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create      ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr  ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen)  (__itt_event)0
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start     ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end     ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_events group */
+#endif /* _ITTNOTIFY_H_ */
+
+/**
+ * @defgroup legacy_memory Memory Accesses
+ * @ingroup legacy
+ */
+
+/**
+ * @deprecated Legacy API
+ * @brief Inform the tool of memory accesses on reading
+ */
+void LIBITTAPI __itt_memory_read(void *addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size))
+#define __itt_memory_read     ITTNOTIFY_VOID(memory_read)
+#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_memory_read(addr, size)
+#define __itt_memory_read_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_memory_read_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Inform the tool of memory accesses on writing
+ */
+void LIBITTAPI __itt_memory_write(void *addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size))
+#define __itt_memory_write     ITTNOTIFY_VOID(memory_write)
+#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_memory_write(addr, size)
+#define __itt_memory_write_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_memory_write_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Inform the tool of memory accesses on updating
+ */
+void LIBITTAPI __itt_memory_update(void *address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size))
+#define __itt_memory_update     ITTNOTIFY_VOID(memory_update)
+#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_memory_update(addr, size)
+#define __itt_memory_update_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_memory_update_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_memory group */
+
+/**
+ * @defgroup legacy_state Thread and Object States
+ * @ingroup legacy
+ */
+
+/** @brief state type */
+typedef int __itt_state_t;
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_obj_state {
+    __itt_obj_state_err = 0,
+    __itt_obj_state_clr = 1,
+    __itt_obj_state_set = 2,
+    __itt_obj_state_use = 3
+} __itt_obj_state_t;
+
+typedef enum __itt_thr_state {
+    __itt_thr_state_err = 0,
+    __itt_thr_state_clr = 1,
+    __itt_thr_state_set = 2
+} __itt_thr_state_t;
+
+typedef enum __itt_obj_prop {
+    __itt_obj_prop_watch    = 1,
+    __itt_obj_prop_ignore   = 2,
+    __itt_obj_prop_sharable = 3
+} __itt_obj_prop_t;
+
+typedef enum __itt_thr_prop {
+    __itt_thr_prop_quiet = 1
+} __itt_thr_prop_t;
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object states
+ */
+__itt_state_t LIBITTAPI __itt_state_get(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_state_t, state_get, (void))
+#define __itt_state_get     ITTNOTIFY_DATA(state_get)
+#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_state_get(void) (__itt_state_t)0
+#define __itt_state_get_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_state_get_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object states
+ */
+__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s))
+#define __itt_state_set     ITTNOTIFY_DATA(state_set)
+#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_state_set(s) (__itt_state_t)0
+#define __itt_state_set_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_state_set_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object modes
+ */
+__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s))
+#define __itt_thr_mode_set     ITTNOTIFY_DATA(thr_mode_set)
+#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0
+#define __itt_thr_mode_set_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_thr_mode_set_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object modes
+ */
+__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s))
+#define __itt_obj_mode_set     ITTNOTIFY_DATA(obj_mode_set)
+#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0
+#define __itt_obj_mode_set_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_obj_mode_set_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_state group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup legacy
+ * Frames group
+ * @{
+ */
+/**
+ * @brief opaque structure for frame identification
+ */
+typedef struct __itt_frame_t *__itt_frame;
+
+/**
+ * @brief Create a global frame with given domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_frame ITTAPI __itt_frame_createA(const char    *domain);
+__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_frame_create     __itt_frame_createW
+#  define __itt_frame_create_ptr __itt_frame_createW_ptr
+#else /* UNICODE */
+#  define __itt_frame_create     __itt_frame_createA
+#  define __itt_frame_create_ptr __itt_frame_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_frame ITTAPI __itt_frame_create(const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char    *domain))
+ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_frame, frame_create,  (const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_frame_createA     ITTNOTIFY_DATA(frame_createA)
+#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA)
+#define __itt_frame_createW     ITTNOTIFY_DATA(frame_createW)
+#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_frame_create     ITTNOTIFY_DATA(frame_create)
+#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_frame_createA(domain)
+#define __itt_frame_createA_ptr 0
+#define __itt_frame_createW(domain)
+#define __itt_frame_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_frame_create(domain)
+#define __itt_frame_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_frame_createA_ptr 0
+#define __itt_frame_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_frame_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief Record an frame begin occurrence. */
+void ITTAPI __itt_frame_begin(__itt_frame frame);
+/** @brief Record an frame end occurrence. */
+void ITTAPI __itt_frame_end  (__itt_frame frame);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame))
+ITT_STUBV(ITTAPI, void, frame_end,   (__itt_frame frame))
+#define __itt_frame_begin     ITTNOTIFY_VOID(frame_begin)
+#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin)
+#define __itt_frame_end       ITTNOTIFY_VOID(frame_end)
+#define __itt_frame_end_ptr   ITTNOTIFY_NAME(frame_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin(frame)
+#define __itt_frame_begin_ptr 0
+#define __itt_frame_end(frame)
+#define __itt_frame_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_ptr 0
+#define __itt_frame_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _LEGACY_ITTNOTIFY_H_ */
diff --git a/contrib/libs/cxxsupp/openmp/ya.make b/contrib/libs/cxxsupp/openmp/ya.make
index 6b5090ce68..2c64468375 100644
--- a/contrib/libs/cxxsupp/openmp/ya.make
+++ b/contrib/libs/cxxsupp/openmp/ya.make
@@ -1,5 +1,5 @@
-LIBRARY() 
- 
+LIBRARY()
+
 LICENSE(
     MIT AND
     NCSA
@@ -14,13 +14,13 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
-NO_UTIL() 
 
-NO_PLATFORM() 
+NO_UTIL()
+
+NO_PLATFORM()
+
+NO_COMPILER_WARNINGS()
 
-NO_COMPILER_WARNINGS() 
- 
 IF (SANITIZER_TYPE == thread)
     NO_SANITIZE()
     CFLAGS(
@@ -29,62 +29,62 @@ IF (SANITIZER_TYPE == thread)
 ENDIF()
 
 IF (SANITIZER_TYPE == memory)
-    NO_SANITIZE() 
+    NO_SANITIZE()
     CFLAGS(
         -fPIC
     )
 ENDIF()
- 
+
 COMPILE_C_AS_CXX()
- 
+
 CXXFLAGS(-fno-exceptions)
- 
+
 SET_APPEND(CFLAGS -fno-lto)
 
 ADDINCL(
     GLOBAL contrib/libs/cxxsupp/openmp
 )
- 
-ADDINCL( 
-    contrib/libs/cxxsupp/openmp/i18n 
-    contrib/libs/cxxsupp/openmp/include/41 
-    contrib/libs/cxxsupp/openmp/thirdparty/ittnotify 
-) 
- 
-SRCS( 
-    kmp_alloc.c 
-    kmp_atomic.c 
-    kmp_csupport.c 
-    kmp_debug.c 
-    kmp_itt.c 
-    kmp_environment.c 
-    kmp_error.c 
-    kmp_global.c 
-    kmp_i18n.c 
-    kmp_io.c 
-    kmp_runtime.c 
-    kmp_settings.c 
-    kmp_str.c 
-    kmp_tasking.c 
-    kmp_taskq.c 
-    kmp_threadprivate.c 
-    kmp_utility.c 
-    z_Linux_util.c 
-    kmp_gsupport.c 
-    asm.S 
-    thirdparty/ittnotify/ittnotify_static.c 
-    kmp_barrier.cpp 
-    kmp_wait_release.cpp 
-    kmp_affinity.cpp 
-    kmp_dispatch.cpp 
-    kmp_lock.cpp 
-    kmp_sched.cpp 
-    kmp_taskdeps.cpp 
-    kmp_cancel.cpp 
-    kmp_ftn_cdecl.c 
-    kmp_ftn_extra.c 
-    kmp_version.c 
-    #ompt-general.c 
-) 
- 
-END() 
+
+ADDINCL(
+    contrib/libs/cxxsupp/openmp/i18n
+    contrib/libs/cxxsupp/openmp/include/41
+    contrib/libs/cxxsupp/openmp/thirdparty/ittnotify
+)
+
+SRCS(
+    kmp_alloc.c
+    kmp_atomic.c
+    kmp_csupport.c
+    kmp_debug.c
+    kmp_itt.c
+    kmp_environment.c
+    kmp_error.c
+    kmp_global.c
+    kmp_i18n.c
+    kmp_io.c
+    kmp_runtime.c
+    kmp_settings.c
+    kmp_str.c
+    kmp_tasking.c
+    kmp_taskq.c
+    kmp_threadprivate.c
+    kmp_utility.c
+    z_Linux_util.c
+    kmp_gsupport.c
+    asm.S
+    thirdparty/ittnotify/ittnotify_static.c
+    kmp_barrier.cpp
+    kmp_wait_release.cpp
+    kmp_affinity.cpp
+    kmp_dispatch.cpp
+    kmp_lock.cpp
+    kmp_sched.cpp
+    kmp_taskdeps.cpp
+    kmp_cancel.cpp
+    kmp_ftn_cdecl.c
+    kmp_ftn_extra.c
+    kmp_version.c
+    #ompt-general.c
+)
+
+END()
diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_asm.s b/contrib/libs/cxxsupp/openmp/z_Linux_asm.s
index c56182f900..7f649b849e 100644
--- a/contrib/libs/cxxsupp/openmp/z_Linux_asm.s
+++ b/contrib/libs/cxxsupp/openmp/z_Linux_asm.s
@@ -1,1445 +1,1445 @@
-//  z_Linux_asm.s:  - microtasking routines specifically 
-//                    written for Intel platforms running Linux* OS 
- 
-// 
-////===----------------------------------------------------------------------===// 
-//// 
-////                     The LLVM Compiler Infrastructure 
-//// 
-//// This file is dual licensed under the MIT and the University of Illinois Open 
-//// Source Licenses. See LICENSE.txt for details. 
-//// 
-////===----------------------------------------------------------------------===// 
-// 
- 
-// ----------------------------------------------------------------------- 
-// macros 
-// ----------------------------------------------------------------------- 
- 
-#include "kmp_config.h" 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
- 
-# if KMP_MIC 
-// 
-// the 'delay r16/r32/r64' should be used instead of the 'pause'. 
-// The delay operation has the effect of removing the current thread from 
-// the round-robin HT mechanism, and therefore speeds up the issue rate of 
-// the other threads on the same core. 
-// 
-// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 
-// barrier time to increase greatly for 3 or more threads per core. 
-// 
-// A value of 100 works pretty well for up to 4 threads per core, but isn't 
-// quite as fast as 0 for 2 threads per core. 
-// 
-// We need to check what happens for oversubscription / > 4 threads per core. 
-// It is possible that we need to pass the delay value in as a parameter 
-// that the caller determines based on the total # threads / # cores. 
-// 
-//.macro pause_op 
-//	mov    $100, %rax 
-//	delay  %rax 
-//.endm 
-# else 
-#  define pause_op   .byte 0xf3,0x90 
-# endif // KMP_MIC 
- 
-# if KMP_OS_DARWIN 
-#  define KMP_PREFIX_UNDERSCORE(x) _##x  // extra underscore for OS X* symbols 
-#  define KMP_LABEL(x) L_##x             // form the name of label 
-.macro KMP_CFI_DEF_OFFSET 
-.endmacro 
-.macro KMP_CFI_OFFSET 
-.endmacro 
-.macro KMP_CFI_REGISTER 
-.endmacro 
-.macro KMP_CFI_DEF 
-.endmacro 
-.macro ALIGN 
-	.align $0 
-.endmacro 
-.macro DEBUG_INFO 
-/* Not sure what .size does in icc, not sure if we need to do something 
-   similar for OS X*. 
-*/ 
-.endmacro 
-.macro PROC 
-	ALIGN  4 
-	.globl KMP_PREFIX_UNDERSCORE($0) 
-KMP_PREFIX_UNDERSCORE($0): 
-.endmacro 
-# else // KMP_OS_DARWIN 
-#  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Linux* OS symbols 
-// Format labels so that they don't override function names in gdb's backtraces 
-// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*) 
-# if KMP_MIC 
-#  define KMP_LABEL(x) L_##x          // local label 
-# else 
-#  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces 
-# endif // KMP_MIC 
-.macro ALIGN size 
-	.align 1<<(\size) 
-.endm 
-.macro DEBUG_INFO proc 
-	.cfi_endproc 
-// Not sure why we need .type and .size for the functions 
-	.align 16 
-	.type  \proc,@function 
-        .size  \proc,.-\proc 
-.endm 
-.macro PROC proc 
-	ALIGN  4 
-        .globl KMP_PREFIX_UNDERSCORE(\proc) 
-KMP_PREFIX_UNDERSCORE(\proc): 
-	.cfi_startproc 
-.endm 
-.macro KMP_CFI_DEF_OFFSET sz 
-	.cfi_def_cfa_offset	\sz 
-.endm 
-.macro KMP_CFI_OFFSET reg, sz 
-	.cfi_offset	\reg,\sz 
-.endm 
-.macro KMP_CFI_REGISTER reg 
-	.cfi_def_cfa_register	\reg 
-.endm 
-.macro KMP_CFI_DEF reg, sz 
-	.cfi_def_cfa	\reg,\sz 
-.endm 
-# endif // KMP_OS_DARWIN 
-#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 
- 
- 
-// ----------------------------------------------------------------------- 
-// data 
-// ----------------------------------------------------------------------- 
- 
-#ifdef KMP_GOMP_COMPAT 
- 
-// 
-// Support for unnamed common blocks. 
-// 
-// Because the symbol ".gomp_critical_user_" contains a ".", we have to 
-// put this stuff in assembly. 
-// 
- 
-# if KMP_ARCH_X86 
-#  if KMP_OS_DARWIN 
-        .data 
-        .comm .gomp_critical_user_,32 
-        .data 
-        .globl ___kmp_unnamed_critical_addr 
-___kmp_unnamed_critical_addr: 
-        .long .gomp_critical_user_ 
-#  else /* Linux* OS */ 
-        .data 
-        .comm .gomp_critical_user_,32,8 
-        .data 
-	ALIGN 4 
-        .global __kmp_unnamed_critical_addr 
-__kmp_unnamed_critical_addr: 
-        .4byte .gomp_critical_user_ 
-        .type __kmp_unnamed_critical_addr,@object 
-        .size __kmp_unnamed_critical_addr,4 
-#  endif /* KMP_OS_DARWIN */ 
-# endif /* KMP_ARCH_X86 */ 
- 
-# if KMP_ARCH_X86_64 
-#  if KMP_OS_DARWIN 
-        .data 
-        .comm .gomp_critical_user_,32 
-        .data 
-        .globl ___kmp_unnamed_critical_addr 
-___kmp_unnamed_critical_addr: 
-        .quad .gomp_critical_user_ 
-#  else /* Linux* OS */ 
-        .data 
-        .comm .gomp_critical_user_,32,8 
-        .data 
-	ALIGN 8 
-        .global __kmp_unnamed_critical_addr 
-__kmp_unnamed_critical_addr: 
-        .8byte .gomp_critical_user_ 
-        .type __kmp_unnamed_critical_addr,@object 
-        .size __kmp_unnamed_critical_addr,8 
-#  endif /* KMP_OS_DARWIN */ 
-# endif /* KMP_ARCH_X86_64 */ 
- 
-#endif /* KMP_GOMP_COMPAT */ 
- 
- 
-#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 
- 
-// ----------------------------------------------------------------------- 
-// microtasking routines specifically written for IA-32 architecture 
-// running Linux* OS 
-// ----------------------------------------------------------------------- 
-// 
- 
-	.ident "Intel Corporation" 
-	.data 
-	ALIGN 4 
-// void 
-// __kmp_x86_pause( void ); 
-// 
- 
-        .text 
-	PROC  __kmp_x86_pause 
- 
-        pause_op 
-        ret 
- 
-	DEBUG_INFO __kmp_x86_pause 
- 
-// 
-// void 
-// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); 
-// 
-	PROC  __kmp_x86_cpuid 
- 
-	pushl %ebp 
-	movl  %esp,%ebp 
-        pushl %edi 
-        pushl %ebx 
-        pushl %ecx 
-        pushl %edx 
- 
-	movl  8(%ebp), %eax 
-	movl  12(%ebp), %ecx 
-	cpuid				// Query the CPUID for the current processor 
- 
-	movl  16(%ebp), %edi 
-	movl  %eax, 0(%edi) 
-	movl  %ebx, 4(%edi) 
-	movl  %ecx, 8(%edi) 
-	movl  %edx, 12(%edi) 
- 
-        popl  %edx 
-        popl  %ecx 
-        popl  %ebx 
-        popl  %edi 
-        movl  %ebp, %esp 
-        popl  %ebp 
-	ret 
- 
-	DEBUG_INFO __kmp_x86_cpuid 
- 
- 
-# if !KMP_ASM_INTRINS 
- 
-//------------------------------------------------------------------------ 
-// 
-// kmp_int32 
-// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 
-// 
- 
-        PROC      __kmp_test_then_add32 
- 
-        movl      4(%esp), %ecx 
-        movl      8(%esp), %eax 
-        lock 
-        xaddl     %eax,(%ecx) 
-        ret 
- 
-	DEBUG_INFO __kmp_test_then_add32 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed8 
-// 
-// kmp_int32 
-// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 
-// 
-// parameters: 
-// 	p:	4(%esp) 
-// 	d:	8(%esp) 
-// 
-// return:	%al 
- 
-        PROC  __kmp_xchg_fixed8 
- 
-        movl      4(%esp), %ecx    // "p" 
-        movb      8(%esp), %al	// "d" 
- 
-        lock 
-        xchgb     %al,(%ecx) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed8 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed16 
-// 
-// kmp_int16 
-// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 
-// 
-// parameters: 
-// 	p:	4(%esp) 
-// 	d:	8(%esp) 
-// return:     %ax 
- 
-        PROC  __kmp_xchg_fixed16 
- 
-        movl      4(%esp), %ecx    // "p" 
-        movw      8(%esp), %ax	// "d" 
- 
-        lock 
-        xchgw     %ax,(%ecx) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed16 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed32 
-// 
-// kmp_int32 
-// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 
-// 
-// parameters: 
-// 	p:	4(%esp) 
-// 	d:	8(%esp) 
-// 
-// return:	%eax 
- 
-        PROC  __kmp_xchg_fixed32 
- 
-        movl      4(%esp), %ecx    // "p" 
-        movl      8(%esp), %eax	// "d" 
- 
-        lock 
-        xchgl     %eax,(%ecx) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed32 
- 
- 
-// 
-// kmp_int8 
-// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-// 
- 
-        PROC  __kmp_compare_and_store8 
- 
-        movl      4(%esp), %ecx 
-        movb      8(%esp), %al 
-        movb      12(%esp), %dl 
-        lock 
-        cmpxchgb  %dl,(%ecx) 
-        sete      %al           // if %al == (%ecx) set %al = 1 else set %al = 0 
-        and       $1, %eax      // sign extend previous instruction 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store8 
- 
-// 
-// kmp_int16 
-// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-// 
- 
-        PROC  __kmp_compare_and_store16 
- 
-        movl      4(%esp), %ecx 
-        movw      8(%esp), %ax 
-        movw      12(%esp), %dx 
-        lock 
-        cmpxchgw  %dx,(%ecx) 
-        sete      %al           // if %ax == (%ecx) set %al = 1 else set %al = 0 
-        and       $1, %eax      // sign extend previous instruction 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store16 
- 
-// 
-// kmp_int32 
-// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-// 
- 
-        PROC  __kmp_compare_and_store32 
- 
-        movl      4(%esp), %ecx 
-        movl      8(%esp), %eax 
-        movl      12(%esp), %edx 
-        lock 
-        cmpxchgl  %edx,(%ecx) 
-        sete      %al           // if %eax == (%ecx) set %al = 1 else set %al = 0 
-        and       $1, %eax      // sign extend previous instruction 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store32 
- 
-// 
-// kmp_int32 
-// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-// 
-        PROC  __kmp_compare_and_store64 
- 
-        pushl     %ebp 
-        movl      %esp, %ebp 
-        pushl     %ebx 
-        pushl     %edi 
-        movl      8(%ebp), %edi 
-        movl      12(%ebp), %eax        // "cv" low order word 
-        movl      16(%ebp), %edx        // "cv" high order word 
-        movl      20(%ebp), %ebx        // "sv" low order word 
-        movl      24(%ebp), %ecx        // "sv" high order word 
-        lock 
-        cmpxchg8b (%edi) 
-        sete      %al           // if %edx:eax == (%edi) set %al = 1 else set %al = 0 
-        and       $1, %eax      // sign extend previous instruction 
-        popl      %edi 
-        popl      %ebx 
-        movl      %ebp, %esp 
-        popl      %ebp 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store64 
- 
-// 
-// kmp_int8 
-// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-// 
- 
-        PROC  __kmp_compare_and_store_ret8 
- 
-        movl      4(%esp), %ecx 
-        movb      8(%esp), %al 
-        movb      12(%esp), %dl 
-        lock 
-        cmpxchgb  %dl,(%ecx) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret8 
- 
-// 
-// kmp_int16 
-// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-// 
- 
-        PROC  __kmp_compare_and_store_ret16 
- 
-        movl      4(%esp), %ecx 
-        movw      8(%esp), %ax 
-        movw      12(%esp), %dx 
-        lock 
-        cmpxchgw  %dx,(%ecx) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret16 
- 
-// 
-// kmp_int32 
-// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-// 
- 
-        PROC  __kmp_compare_and_store_ret32 
- 
-        movl      4(%esp), %ecx 
-        movl      8(%esp), %eax 
-        movl      12(%esp), %edx 
-        lock 
-        cmpxchgl  %edx,(%ecx) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret32 
- 
-// 
-// kmp_int64 
-// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-// 
-        PROC  __kmp_compare_and_store_ret64 
- 
-        pushl     %ebp 
-        movl      %esp, %ebp 
-        pushl     %ebx 
-        pushl     %edi 
-        movl      8(%ebp), %edi 
-        movl      12(%ebp), %eax        // "cv" low order word 
-        movl      16(%ebp), %edx        // "cv" high order word 
-        movl      20(%ebp), %ebx        // "sv" low order word 
-        movl      24(%ebp), %ecx        // "sv" high order word 
-        lock 
-        cmpxchg8b (%edi) 
-        popl      %edi 
-        popl      %ebx 
-        movl      %ebp, %esp 
-        popl      %ebp 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret64 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_real32 
-// 
-// kmp_real32 
-// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 
-// 
-// parameters: 
-// 	addr:	4(%esp) 
-// 	data:	8(%esp) 
-// 
-// return:	%eax 
- 
- 
-        PROC  __kmp_xchg_real32 
- 
-        pushl   %ebp 
-        movl    %esp, %ebp 
-        subl    $4, %esp 
-        pushl   %esi 
- 
-        movl    4(%ebp), %esi 
-        flds    (%esi) 
-                        // load <addr> 
-        fsts    -4(%ebp) 
-                        // store old value 
- 
-        movl    8(%ebp), %eax 
- 
-        lock 
-        xchgl   %eax, (%esi) 
- 
-        flds    -4(%ebp) 
-                        // return old value 
- 
-        popl    %esi 
-        movl    %ebp, %esp 
-        popl    %ebp 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_real32 
- 
-# endif /* !KMP_ASM_INTRINS */ 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_load_x87_fpu_control_word 
-// 
-// void 
-// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 
-// 
-// parameters: 
-// 	p:	4(%esp) 
-// 
- 
-        PROC  __kmp_load_x87_fpu_control_word 
- 
-        movl  4(%esp), %eax 
-        fldcw (%eax) 
-        ret 
- 
-        DEBUG_INFO __kmp_load_x87_fpu_control_word 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_store_x87_fpu_control_word 
-// 
-// void 
-// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 
-// 
-// parameters: 
-// 	p:	4(%esp) 
-// 
- 
-        PROC  __kmp_store_x87_fpu_control_word 
- 
-        movl  4(%esp), %eax 
-        fstcw (%eax) 
-        ret 
- 
-        DEBUG_INFO __kmp_store_x87_fpu_control_word 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_clear_x87_fpu_status_word 
-// 
-// void 
-// __kmp_clear_x87_fpu_status_word(); 
-// 
-// 
- 
-        PROC  __kmp_clear_x87_fpu_status_word 
- 
-        fnclex 
-        ret 
- 
-        DEBUG_INFO __kmp_clear_x87_fpu_status_word 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// typedef void	(*microtask_t)( int *gtid, int *tid, ... ); 
-// 
-// int 
-// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, 
-//                         int argc, void *p_argv[] ) { 
-//    (*pkfn)( & gtid, & gtid, argv[0], ... ); 
-//    return 1; 
-// } 
- 
-// -- Begin __kmp_invoke_microtask 
-// mark_begin; 
-	PROC  __kmp_invoke_microtask 
- 
-	pushl %ebp 
-	KMP_CFI_DEF_OFFSET 8 
-	KMP_CFI_OFFSET ebp,-8 
-	movl %esp,%ebp		// establish the base pointer for this routine. 
-	KMP_CFI_REGISTER ebp 
-	subl $8,%esp		// allocate space for two local variables. 
-				// These varibales are: 
-				//	argv: -4(%ebp) 
-				//	temp: -8(%ebp) 
-				// 
-	pushl %ebx		// save %ebx to use during this routine 
-				// 
-#if OMPT_SUPPORT 
-	movl 28(%ebp),%ebx	// get exit_frame address 
-	movl %ebp,(%ebx)	// save exit_frame 
-#endif 
- 
-	movl 20(%ebp),%ebx	// Stack alignment - # args 
-	addl $2,%ebx		// #args +2  Always pass at least 2 args (gtid and tid) 
-	shll $2,%ebx		// Number of bytes used on stack: (#args+2)*4 
-	movl %esp,%eax		// 
-	subl %ebx,%eax		// %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 
-	movl %eax,%ebx		// Save to %ebx 
-	andl $0xFFFFFF80,%eax	// mask off 7 bits 
-	subl %eax,%ebx		// Amount to subtract from %esp 
-	subl %ebx,%esp		// Prepare the stack ptr -- 
-				//   now it will be aligned on 128-byte boundary at the call 
- 
-	movl 24(%ebp),%eax	// copy from p_argv[] 
-	movl %eax,-4(%ebp)	// into the local variable *argv. 
- 
-	movl 20(%ebp),%ebx	// argc is 20(%ebp) 
-	shll $2,%ebx 
- 
-KMP_LABEL(invoke_2): 
-	cmpl $0,%ebx 
-	jg  KMP_LABEL(invoke_4) 
-	jmp KMP_LABEL(invoke_3) 
-	ALIGN 2 
-KMP_LABEL(invoke_4): 
-	movl -4(%ebp),%eax 
-	subl $4,%ebx			// decrement argc. 
-	addl %ebx,%eax			// index into argv. 
-	movl (%eax),%edx 
-	pushl %edx 
- 
-	jmp KMP_LABEL(invoke_2) 
-	ALIGN 2 
-KMP_LABEL(invoke_3): 
-	leal 16(%ebp),%eax		// push & tid 
-	pushl %eax 
- 
-	leal 12(%ebp),%eax		// push & gtid 
-	pushl %eax 
- 
-	movl 8(%ebp),%ebx 
-	call *%ebx			// call (*pkfn)(); 
- 
-	movl $1,%eax			// return 1; 
- 
-	movl -12(%ebp),%ebx		// restore %ebx 
-	leave 
-	KMP_CFI_DEF esp,4 
-	ret 
- 
-	DEBUG_INFO __kmp_invoke_microtask 
-// -- End  __kmp_invoke_microtask 
- 
- 
-// kmp_uint64 
-// __kmp_hardware_timestamp(void) 
-	PROC  __kmp_hardware_timestamp 
-	rdtsc 
-	ret 
- 
-	DEBUG_INFO __kmp_hardware_timestamp 
-// -- End  __kmp_hardware_timestamp 
- 
-// ----------------------------------------------------------------------- 
-#endif /* KMP_ARCH_X86 */ 
- 
- 
-#if KMP_ARCH_X86_64 
- 
-// ----------------------------------------------------------------------- 
-// microtasking routines specifically written for IA-32 architecture and 
-// Intel(R) 64 running Linux* OS 
-// ----------------------------------------------------------------------- 
- 
-// -- Machine type P 
-// mark_description "Intel Corporation"; 
-	.ident "Intel Corporation" 
-// --	.file "z_Linux_asm.s" 
-	.data 
-	ALIGN 4 
- 
-// To prevent getting our code into .data section .text added to every routine definition for x86_64. 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_x86_cpuid 
-// 
-// void 
-// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); 
-// 
-// parameters: 
-// 	mode:		%edi 
-// 	mode2:		%esi 
-// 	cpuid_buffer:	%rdx 
- 
-        .text 
-	PROC  __kmp_x86_cpuid 
- 
-	pushq  %rbp 
-	movq   %rsp,%rbp 
-        pushq  %rbx			// callee-save register 
- 
-	movl   %esi, %ecx		// "mode2" 
-	movl   %edi, %eax		// "mode" 
-        movq   %rdx, %rsi               // cpuid_buffer 
-	cpuid				// Query the CPUID for the current processor 
- 
-	movl   %eax, 0(%rsi)		// store results into buffer 
-	movl   %ebx, 4(%rsi) 
-	movl   %ecx, 8(%rsi) 
-	movl   %edx, 12(%rsi) 
- 
-        popq   %rbx			// callee-save register 
-        movq   %rbp, %rsp 
-        popq   %rbp 
-	ret 
- 
-        DEBUG_INFO __kmp_x86_cpuid 
- 
- 
- 
-# if !KMP_ASM_INTRINS 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_test_then_add32 
-// 
-// kmp_int32 
-// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	d:	%esi 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_test_then_add32 
- 
-        movl      %esi, %eax	// "d" 
-        lock 
-        xaddl     %eax,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_test_then_add32 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_test_then_add64 
-// 
-// kmp_int64 
-// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	d:	%rsi 
-//	return:	%rax 
- 
-        .text 
-        PROC  __kmp_test_then_add64 
- 
-        movq      %rsi, %rax	// "d" 
-        lock 
-        xaddq     %rax,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_test_then_add64 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed8 
-// 
-// kmp_int32 
-// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	d:	%sil 
-// 
-// return:	%al 
- 
-        .text 
-        PROC  __kmp_xchg_fixed8 
- 
-        movb      %sil, %al	// "d" 
- 
-        lock 
-        xchgb     %al,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed8 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed16 
-// 
-// kmp_int16 
-// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	d:	%si 
-// return:     %ax 
- 
-        .text 
-        PROC  __kmp_xchg_fixed16 
- 
-        movw      %si, %ax	// "d" 
- 
-        lock 
-        xchgw     %ax,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed16 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed32 
-// 
-// kmp_int32 
-// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	d:	%esi 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_xchg_fixed32 
- 
-        movl      %esi, %eax	// "d" 
- 
-        lock 
-        xchgl     %eax,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed32 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_fixed64 
-// 
-// kmp_int64 
-// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	d:	%rsi 
-// return:	%rax 
- 
-        .text 
-        PROC  __kmp_xchg_fixed64 
- 
-        movq      %rsi, %rax	// "d" 
- 
-        lock 
-        xchgq     %rax,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_fixed64 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store8 
-// 
-// kmp_int8 
-// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%esi 
-//	sv:	%edx 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store8 
- 
-        movb      %sil, %al	// "cv" 
-        lock 
-        cmpxchgb  %dl,(%rdi) 
-        sete      %al           // if %al == (%rdi) set %al = 1 else set %al = 0 
-        andq      $1, %rax      // sign extend previous instruction for return value 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store8 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store16 
-// 
-// kmp_int16 
-// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%si 
-//	sv:	%dx 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store16 
- 
-        movw      %si, %ax	// "cv" 
-        lock 
-        cmpxchgw  %dx,(%rdi) 
-        sete      %al           // if %ax == (%rdi) set %al = 1 else set %al = 0 
-        andq      $1, %rax      // sign extend previous instruction for return value 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store16 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store32 
-// 
-// kmp_int32 
-// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%esi 
-//	sv:	%edx 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store32 
- 
-        movl      %esi, %eax	// "cv" 
-        lock 
-        cmpxchgl  %edx,(%rdi) 
-        sete      %al           // if %eax == (%rdi) set %al = 1 else set %al = 0 
-        andq      $1, %rax      // sign extend previous instruction for return value 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store32 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store64 
-// 
-// kmp_int32 
-// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%rsi 
-//	sv:	%rdx 
-//	return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store64 
- 
-        movq      %rsi, %rax    // "cv" 
-        lock 
-        cmpxchgq  %rdx,(%rdi) 
-        sete      %al           // if %rax == (%rdi) set %al = 1 else set %al = 0 
-        andq      $1, %rax      // sign extend previous instruction for return value 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store64 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store_ret8 
-// 
-// kmp_int8 
-// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%esi 
-//	sv:	%edx 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store_ret8 
- 
-        movb      %sil, %al	// "cv" 
-        lock 
-        cmpxchgb  %dl,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret8 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store_ret16 
-// 
-// kmp_int16 
-// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%si 
-//	sv:	%dx 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store_ret16 
- 
-        movw      %si, %ax	// "cv" 
-        lock 
-        cmpxchgw  %dx,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret16 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store_ret32 
-// 
-// kmp_int32 
-// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%esi 
-//	sv:	%edx 
-// 
-// return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store_ret32 
- 
-        movl      %esi, %eax	// "cv" 
-        lock 
-        cmpxchgl  %edx,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret32 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_compare_and_store_ret64 
-// 
-// kmp_int64 
-// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 	cv:	%rsi 
-//	sv:	%rdx 
-//	return:	%eax 
- 
-        .text 
-        PROC  __kmp_compare_and_store_ret64 
- 
-        movq      %rsi, %rax    // "cv" 
-        lock 
-        cmpxchgq  %rdx,(%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_compare_and_store_ret64 
- 
-# endif /* !KMP_ASM_INTRINS */ 
- 
- 
-# if !KMP_MIC 
- 
-# if !KMP_ASM_INTRINS 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_real32 
-// 
-// kmp_real32 
-// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 
-// 
-// parameters: 
-// 	addr:	%rdi 
-// 	data:	%xmm0 (lower 4 bytes) 
-// 
-// return:	%xmm0 (lower 4 bytes) 
- 
-        .text 
-        PROC  __kmp_xchg_real32 
- 
-	movd	%xmm0, %eax	// load "data" to eax 
- 
-         lock 
-         xchgl %eax, (%rdi) 
- 
-	movd	%eax, %xmm0	// load old value into return register 
- 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_real32 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_xchg_real64 
-// 
-// kmp_real64 
-// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 
-// 
-// parameters: 
-//      addr:   %rdi 
-//      data:   %xmm0 (lower 8 bytes) 
-//      return: %xmm0 (lower 8 bytes) 
-// 
- 
-        .text 
-        PROC  __kmp_xchg_real64 
- 
-	movd	%xmm0, %rax	// load "data" to rax 
- 
-         lock 
-	xchgq  %rax, (%rdi) 
- 
-	movd	%rax, %xmm0	// load old value into return register 
-        ret 
- 
-        DEBUG_INFO __kmp_xchg_real64 
- 
- 
-# endif /* !KMP_MIC */ 
- 
-# endif /* !KMP_ASM_INTRINS */ 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_load_x87_fpu_control_word 
-// 
-// void 
-// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 
- 
-        .text 
-        PROC  __kmp_load_x87_fpu_control_word 
- 
-        fldcw (%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_load_x87_fpu_control_word 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_store_x87_fpu_control_word 
-// 
-// void 
-// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 
-// 
-// parameters: 
-// 	p:	%rdi 
-// 
- 
-        .text 
-        PROC  __kmp_store_x87_fpu_control_word 
- 
-        fstcw (%rdi) 
-        ret 
- 
-        DEBUG_INFO __kmp_store_x87_fpu_control_word 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_clear_x87_fpu_status_word 
-// 
-// void 
-// __kmp_clear_x87_fpu_status_word(); 
-// 
-// 
- 
-        .text 
-        PROC  __kmp_clear_x87_fpu_status_word 
- 
-#if KMP_MIC 
-// TODO: remove the workaround for problem with fnclex instruction (no CQ known) 
-        fstenv  -32(%rsp)              // store FP env 
-        andw    $~0x80ff, 4-32(%rsp)   // clear 0-7,15 bits of FP SW 
-        fldenv  -32(%rsp)              // load FP env back 
-        ret 
-#else 
-        fnclex 
-        ret 
-#endif 
- 
-        DEBUG_INFO __kmp_clear_x87_fpu_status_word 
- 
- 
-//------------------------------------------------------------------------ 
-// 
-// typedef void	(*microtask_t)( int *gtid, int *tid, ... ); 
-// 
-// int 
-// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 
-//		           int gtid, int tid, 
-//                         int argc, void *p_argv[] ) { 
-//    (*pkfn)( & gtid, & tid, argv[0], ... ); 
-//    return 1; 
-// } 
-// 
-// note: 
-//	at call to pkfn must have %rsp 128-byte aligned for compiler 
-// 
-// parameters: 
-//      %rdi:  	pkfn 
-//	%esi:	gtid 
-//	%edx:	tid 
-//	%ecx:	argc 
-//	%r8:	p_argv 
-//	%r9:	&exit_frame 
-// 
-// locals: 
-//	__gtid:	gtid parm pushed on stack so can pass &gtid to pkfn 
-//	__tid:	tid parm pushed on stack so can pass &tid to pkfn 
-// 
-// reg temps: 
-//	%rax:	used all over the place 
-//	%rdx:	used in stack pointer alignment calculation 
-//	%r11:	used to traverse p_argv array 
-//	%rsi:	used as temporary for stack parameters 
-//		used as temporary for number of pkfn parms to push 
-//	%rbx:	used to hold pkfn address, and zero constant, callee-save 
-// 
-// return:	%eax 	(always 1/TRUE) 
-// 
- 
-__gtid = -16 
-__tid = -24 
- 
-// -- Begin __kmp_invoke_microtask 
-// mark_begin; 
-        .text 
-	PROC  __kmp_invoke_microtask 
- 
-	pushq 	%rbp		// save base pointer 
-	KMP_CFI_DEF_OFFSET 16 
-	KMP_CFI_OFFSET rbp,-16 
-	movq 	%rsp,%rbp	// establish the base pointer for this routine. 
-	KMP_CFI_REGISTER rbp 
- 
-#if OMPT_SUPPORT 
-	movq	%rbp, (%r9)	// save exit_frame 
-#endif 
- 
-	pushq 	%rbx		// %rbx is callee-saved register 
-	pushq	%rsi		// Put gtid on stack so can pass &tgid to pkfn 
-	pushq	%rdx		// Put tid on stack so can pass &tid to pkfn 
- 
-	movq	%rcx, %rax	// Stack alignment calculation begins; argc -> %rax 
-	movq	$0, %rbx	// constant for cmovs later 
-	subq	$4, %rax	// subtract four args passed in registers to pkfn 
-#if KMP_MIC 
-	js	KMP_LABEL(kmp_0)	// jump to movq 
-	jmp	KMP_LABEL(kmp_0_exit)	// jump ahead 
-KMP_LABEL(kmp_0): 
-	movq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4) 
-KMP_LABEL(kmp_0_exit): 
-#else 
-	cmovsq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4) 
-#endif // KMP_MIC 
- 
-	movq	%rax, %rsi	// save max(0, argc-4) -> %rsi for later 
-	shlq 	$3, %rax	// Number of bytes used on stack: max(0, argc-4)*8 
- 
-	movq 	%rsp, %rdx	// 
-	subq 	%rax, %rdx	// %rsp-(max(0,argc-4)*8) -> %rdx -- 
-				// without align, stack ptr would be this 
-	movq 	%rdx, %rax	// Save to %rax 
- 
-	andq 	$0xFFFFFFFFFFFFFF80, %rax  // mask off lower 7 bits (128 bytes align) 
-	subq 	%rax, %rdx	// Amount to subtract from %rsp 
-	subq 	%rdx, %rsp	// Prepare the stack ptr -- 
-				// now %rsp will align to 128-byte boundary at call site 
- 
-				// setup pkfn parameter reg and stack 
-	movq	%rcx, %rax	// argc -> %rax 
-	cmpq	$0, %rsi 
-	je	KMP_LABEL(kmp_invoke_pass_parms)	// jump ahead if no parms to push 
-	shlq	$3, %rcx	// argc*8 -> %rcx 
-	movq 	%r8, %rdx	// p_argv -> %rdx 
-	addq	%rcx, %rdx	// &p_argv[argc] -> %rdx 
- 
-	movq	%rsi, %rcx	// max (0, argc-4) -> %rcx 
- 
-KMP_LABEL(kmp_invoke_push_parms): 
-	// push nth - 7th parms to pkfn on stack 
-	subq	$8, %rdx	// decrement p_argv pointer to previous parm 
-	movq	(%rdx), %rsi	// p_argv[%rcx-1] -> %rsi 
-	pushq	%rsi		// push p_argv[%rcx-1] onto stack (reverse order) 
-	subl	$1, %ecx 
- 
-// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 
-//		if the name of the label that is an operand of this jecxz starts with a dot ("."); 
-//	   Apple's linker does not support 1-byte length relocation; 
-//         Resolution: replace all .labelX entries with L_labelX. 
- 
-	jecxz   KMP_LABEL(kmp_invoke_pass_parms)  // stop when four p_argv[] parms left 
-	jmp	KMP_LABEL(kmp_invoke_push_parms) 
-	ALIGN 3 
-KMP_LABEL(kmp_invoke_pass_parms):	// put 1st - 6th parms to pkfn in registers. 
-				// order here is important to avoid trashing 
-				// registers used for both input and output parms! 
-	movq	%rdi, %rbx	// pkfn -> %rbx 
-	leaq	__gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn) 
-	leaq	__tid(%rbp), %rsi  // &tid -> %rsi (store 2nd parm to pkfn) 
- 
-	movq	%r8, %r11	// p_argv -> %r11 
- 
-#if KMP_MIC 
-	cmpq	$4, %rax	// argc >= 4? 
-	jns	KMP_LABEL(kmp_4)	// jump to movq 
-	jmp	KMP_LABEL(kmp_4_exit)	// jump ahead 
-KMP_LABEL(kmp_4): 
-	movq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn) 
-KMP_LABEL(kmp_4_exit): 
- 
-	cmpq	$3, %rax	// argc >= 3? 
-	jns	KMP_LABEL(kmp_3)	// jump to movq 
-	jmp	KMP_LABEL(kmp_3_exit)	// jump ahead 
-KMP_LABEL(kmp_3): 
-	movq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn) 
-KMP_LABEL(kmp_3_exit): 
- 
-	cmpq	$2, %rax	// argc >= 2? 
-	jns	KMP_LABEL(kmp_2)	// jump to movq 
-	jmp	KMP_LABEL(kmp_2_exit)	// jump ahead 
-KMP_LABEL(kmp_2): 
-	movq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn) 
-KMP_LABEL(kmp_2_exit): 
- 
-	cmpq	$1, %rax	// argc >= 1? 
-	jns	KMP_LABEL(kmp_1)	// jump to movq 
-	jmp	KMP_LABEL(kmp_1_exit)	// jump ahead 
-KMP_LABEL(kmp_1): 
-	movq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn) 
-KMP_LABEL(kmp_1_exit): 
-#else 
-	cmpq	$4, %rax	// argc >= 4? 
-	cmovnsq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn) 
- 
-	cmpq	$3, %rax	// argc >= 3? 
-	cmovnsq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn) 
- 
-	cmpq	$2, %rax	// argc >= 2? 
-	cmovnsq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn) 
- 
-	cmpq	$1, %rax	// argc >= 1? 
-	cmovnsq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn) 
-#endif // KMP_MIC 
- 
-	call	*%rbx		// call (*pkfn)(); 
-	movq	$1, %rax	// move 1 into return register; 
- 
-	movq	-8(%rbp), %rbx	// restore %rbx	using %rbp since %rsp was modified 
-	movq 	%rbp, %rsp	// restore stack pointer 
-	popq 	%rbp		// restore frame pointer 
-	KMP_CFI_DEF rsp,8 
-	ret 
- 
-	DEBUG_INFO __kmp_invoke_microtask 
-// -- End  __kmp_invoke_microtask 
- 
-// kmp_uint64 
-// __kmp_hardware_timestamp(void) 
-        .text 
-	PROC  __kmp_hardware_timestamp 
-	rdtsc 
-	shlq    $32, %rdx 
-	orq     %rdx, %rax 
-	ret 
- 
-	DEBUG_INFO __kmp_hardware_timestamp 
-// -- End  __kmp_hardware_timestamp 
- 
-//------------------------------------------------------------------------ 
-// 
-// FUNCTION __kmp_bsr32 
-// 
-// int 
-// __kmp_bsr32( int ); 
-// 
- 
-        .text 
-        PROC  __kmp_bsr32 
- 
-        bsr    %edi,%eax 
-        ret 
- 
-        DEBUG_INFO __kmp_bsr32 
- 
-	 
-// ----------------------------------------------------------------------- 
-#endif /* KMP_ARCH_X86_64 */ 
- 
-#if KMP_ARCH_ARM 
-    .data 
-    .comm .gomp_critical_user_,32,8 
-    .data 
-    .align 4 
-    .global __kmp_unnamed_critical_addr 
-__kmp_unnamed_critical_addr: 
-    .4byte .gomp_critical_user_ 
-    .size __kmp_unnamed_critical_addr,4 
-#endif /* KMP_ARCH_ARM */ 
- 
-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 
-    .data 
-    .comm .gomp_critical_user_,32,8 
-    .data 
-    .align 8 
-    .global __kmp_unnamed_critical_addr 
-__kmp_unnamed_critical_addr: 
-    .8byte .gomp_critical_user_ 
-    .size __kmp_unnamed_critical_addr,8 
-#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ 
- 
-#if KMP_OS_LINUX 
-# if KMP_ARCH_ARM 
-.section .note.GNU-stack,"",%progbits 
-# else 
-.section .note.GNU-stack,"",@progbits 
-# endif 
-#endif 
+//  z_Linux_asm.s:  - microtasking routines specifically
+//                    written for Intel platforms running Linux* OS
+
+//
+////===----------------------------------------------------------------------===//
+////
+////                     The LLVM Compiler Infrastructure
+////
+//// This file is dual licensed under the MIT and the University of Illinois Open
+//// Source Licenses. See LICENSE.txt for details.
+////
+////===----------------------------------------------------------------------===//
+//
+
+// -----------------------------------------------------------------------
+// macros
+// -----------------------------------------------------------------------
+
+#include "kmp_config.h"
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+# if KMP_MIC
+//
+// the 'delay r16/r32/r64' should be used instead of the 'pause'.
+// The delay operation has the effect of removing the current thread from
+// the round-robin HT mechanism, and therefore speeds up the issue rate of
+// the other threads on the same core.
+//
+// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
+// barrier time to increase greatly for 3 or more threads per core.
+//
+// A value of 100 works pretty well for up to 4 threads per core, but isn't
+// quite as fast as 0 for 2 threads per core.
+//
+// We need to check what happens for oversubscription / > 4 threads per core.
+// It is possible that we need to pass the delay value in as a parameter
+// that the caller determines based on the total # threads / # cores.
+//
+//.macro pause_op
+//	mov    $100, %rax
+//	delay  %rax
+//.endm
+# else
+#  define pause_op   .byte 0xf3,0x90
+# endif // KMP_MIC
+
+# if KMP_OS_DARWIN
+#  define KMP_PREFIX_UNDERSCORE(x) _##x  // extra underscore for OS X* symbols
+#  define KMP_LABEL(x) L_##x             // form the name of label
+.macro KMP_CFI_DEF_OFFSET
+.endmacro
+.macro KMP_CFI_OFFSET
+.endmacro
+.macro KMP_CFI_REGISTER
+.endmacro
+.macro KMP_CFI_DEF
+.endmacro
+.macro ALIGN
+	.align $0
+.endmacro
+.macro DEBUG_INFO
+/* Not sure what .size does in icc, not sure if we need to do something
+   similar for OS X*.
+*/
+.endmacro
+.macro PROC
+	ALIGN  4
+	.globl KMP_PREFIX_UNDERSCORE($0)
+KMP_PREFIX_UNDERSCORE($0):
+.endmacro
+# else // KMP_OS_DARWIN
+#  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Linux* OS symbols
+// Format labels so that they don't override function names in gdb's backtraces
+// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*)
+# if KMP_MIC
+#  define KMP_LABEL(x) L_##x          // local label
+# else
+#  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces
+# endif // KMP_MIC
+.macro ALIGN size
+	.align 1<<(\size)
+.endm
+.macro DEBUG_INFO proc
+	.cfi_endproc
+// Not sure why we need .type and .size for the functions
+	.align 16
+	.type  \proc,@function
+        .size  \proc,.-\proc
+.endm
+.macro PROC proc
+	ALIGN  4
+        .globl KMP_PREFIX_UNDERSCORE(\proc)
+KMP_PREFIX_UNDERSCORE(\proc):
+	.cfi_startproc
+.endm
+.macro KMP_CFI_DEF_OFFSET sz
+	.cfi_def_cfa_offset	\sz
+.endm
+.macro KMP_CFI_OFFSET reg, sz
+	.cfi_offset	\reg,\sz
+.endm
+.macro KMP_CFI_REGISTER reg
+	.cfi_def_cfa_register	\reg
+.endm
+.macro KMP_CFI_DEF reg, sz
+	.cfi_def_cfa	\reg,\sz
+.endm
+# endif // KMP_OS_DARWIN
+#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
+
+
+// -----------------------------------------------------------------------
+// data
+// -----------------------------------------------------------------------
+
+#ifdef KMP_GOMP_COMPAT
+
+//
+// Support for unnamed common blocks.
+//
+// Because the symbol ".gomp_critical_user_" contains a ".", we have to
+// put this stuff in assembly.
+//
+
+# if KMP_ARCH_X86
+#  if KMP_OS_DARWIN
+        .data
+        .comm .gomp_critical_user_,32
+        .data
+        .globl ___kmp_unnamed_critical_addr
+___kmp_unnamed_critical_addr:
+        .long .gomp_critical_user_
+#  else /* Linux* OS */
+        .data
+        .comm .gomp_critical_user_,32,8
+        .data
+	ALIGN 4
+        .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+        .4byte .gomp_critical_user_
+        .type __kmp_unnamed_critical_addr,@object
+        .size __kmp_unnamed_critical_addr,4
+#  endif /* KMP_OS_DARWIN */
+# endif /* KMP_ARCH_X86 */
+
+# if KMP_ARCH_X86_64
+#  if KMP_OS_DARWIN
+        .data
+        .comm .gomp_critical_user_,32
+        .data
+        .globl ___kmp_unnamed_critical_addr
+___kmp_unnamed_critical_addr:
+        .quad .gomp_critical_user_
+#  else /* Linux* OS */
+        .data
+        .comm .gomp_critical_user_,32,8
+        .data
+	ALIGN 8
+        .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+        .8byte .gomp_critical_user_
+        .type __kmp_unnamed_critical_addr,@object
+        .size __kmp_unnamed_critical_addr,8
+#  endif /* KMP_OS_DARWIN */
+# endif /* KMP_ARCH_X86_64 */
+
+#endif /* KMP_GOMP_COMPAT */
+
+
+#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
+
+// -----------------------------------------------------------------------
+// microtasking routines specifically written for IA-32 architecture
+// running Linux* OS
+// -----------------------------------------------------------------------
+//
+
+	.ident "Intel Corporation"
+	.data
+	ALIGN 4
+// void
+// __kmp_x86_pause( void );
+//
+
+        .text
+	PROC  __kmp_x86_pause
+
+        pause_op
+        ret
+
+	DEBUG_INFO __kmp_x86_pause
+
+//
+// void
+// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
+//
+	PROC  __kmp_x86_cpuid
+
+	pushl %ebp
+	movl  %esp,%ebp
+        pushl %edi
+        pushl %ebx
+        pushl %ecx
+        pushl %edx
+
+	movl  8(%ebp), %eax
+	movl  12(%ebp), %ecx
+	cpuid				// Query the CPUID for the current processor
+
+	movl  16(%ebp), %edi
+	movl  %eax, 0(%edi)
+	movl  %ebx, 4(%edi)
+	movl  %ecx, 8(%edi)
+	movl  %edx, 12(%edi)
+
+        popl  %edx
+        popl  %ecx
+        popl  %ebx
+        popl  %edi
+        movl  %ebp, %esp
+        popl  %ebp
+	ret
+
+	DEBUG_INFO __kmp_x86_cpuid
+
+
+# if !KMP_ASM_INTRINS
+
+//------------------------------------------------------------------------
+//
+// kmp_int32
+// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
+//
+
+        PROC      __kmp_test_then_add32
+
+        movl      4(%esp), %ecx
+        movl      8(%esp), %eax
+        lock
+        xaddl     %eax,(%ecx)
+        ret
+
+	DEBUG_INFO __kmp_test_then_add32
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed8
+//
+// kmp_int32
+// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
+//
+// parameters:
+// 	p:	4(%esp)
+// 	d:	8(%esp)
+//
+// return:	%al
+
+        PROC  __kmp_xchg_fixed8
+
+        movl      4(%esp), %ecx    // "p"
+        movb      8(%esp), %al	// "d"
+
+        lock
+        xchgb     %al,(%ecx)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed8
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed16
+//
+// kmp_int16
+// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
+//
+// parameters:
+// 	p:	4(%esp)
+// 	d:	8(%esp)
+// return:     %ax
+
+        PROC  __kmp_xchg_fixed16
+
+        movl      4(%esp), %ecx    // "p"
+        movw      8(%esp), %ax	// "d"
+
+        lock
+        xchgw     %ax,(%ecx)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed16
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed32
+//
+// kmp_int32
+// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
+//
+// parameters:
+// 	p:	4(%esp)
+// 	d:	8(%esp)
+//
+// return:	%eax
+
+        PROC  __kmp_xchg_fixed32
+
+        movl      4(%esp), %ecx    // "p"
+        movl      8(%esp), %eax	// "d"
+
+        lock
+        xchgl     %eax,(%ecx)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed32
+
+
+//
+// kmp_int8
+// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+//
+
+        PROC  __kmp_compare_and_store8
+
+        movl      4(%esp), %ecx
+        movb      8(%esp), %al
+        movb      12(%esp), %dl
+        lock
+        cmpxchgb  %dl,(%ecx)
+        sete      %al           // if %al == (%ecx) set %al = 1 else set %al = 0
+        and       $1, %eax      // sign extend previous instruction
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store8
+
+//
+// kmp_int16
+// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+//
+
+        PROC  __kmp_compare_and_store16
+
+        movl      4(%esp), %ecx
+        movw      8(%esp), %ax
+        movw      12(%esp), %dx
+        lock
+        cmpxchgw  %dx,(%ecx)
+        sete      %al           // if %ax == (%ecx) set %al = 1 else set %al = 0
+        and       $1, %eax      // sign extend previous instruction
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store16
+
+//
+// kmp_int32
+// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+//
+
+        PROC  __kmp_compare_and_store32
+
+        movl      4(%esp), %ecx
+        movl      8(%esp), %eax
+        movl      12(%esp), %edx
+        lock
+        cmpxchgl  %edx,(%ecx)
+        sete      %al           // if %eax == (%ecx) set %al = 1 else set %al = 0
+        and       $1, %eax      // sign extend previous instruction
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store32
+
+//
+// kmp_int32
+// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+//
+        PROC  __kmp_compare_and_store64
+
+        pushl     %ebp
+        movl      %esp, %ebp
+        pushl     %ebx
+        pushl     %edi
+        movl      8(%ebp), %edi
+        movl      12(%ebp), %eax        // "cv" low order word
+        movl      16(%ebp), %edx        // "cv" high order word
+        movl      20(%ebp), %ebx        // "sv" low order word
+        movl      24(%ebp), %ecx        // "sv" high order word
+        lock
+        cmpxchg8b (%edi)
+        sete      %al           // if %edx:eax == (%edi) set %al = 1 else set %al = 0
+        and       $1, %eax      // sign extend previous instruction
+        popl      %edi
+        popl      %ebx
+        movl      %ebp, %esp
+        popl      %ebp
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store64
+
+//
+// kmp_int8
+// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+//
+
+        PROC  __kmp_compare_and_store_ret8
+
+        movl      4(%esp), %ecx
+        movb      8(%esp), %al
+        movb      12(%esp), %dl
+        lock
+        cmpxchgb  %dl,(%ecx)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret8
+
+//
+// kmp_int16
+// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+//
+
+        PROC  __kmp_compare_and_store_ret16
+
+        movl      4(%esp), %ecx
+        movw      8(%esp), %ax
+        movw      12(%esp), %dx
+        lock
+        cmpxchgw  %dx,(%ecx)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret16
+
+//
+// kmp_int32
+// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+//
+
+        PROC  __kmp_compare_and_store_ret32
+
+        movl      4(%esp), %ecx
+        movl      8(%esp), %eax
+        movl      12(%esp), %edx
+        lock
+        cmpxchgl  %edx,(%ecx)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret32
+
+//
+// kmp_int64
+// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+//
+        PROC  __kmp_compare_and_store_ret64
+
+        pushl     %ebp
+        movl      %esp, %ebp
+        pushl     %ebx
+        pushl     %edi
+        movl      8(%ebp), %edi
+        movl      12(%ebp), %eax        // "cv" low order word
+        movl      16(%ebp), %edx        // "cv" high order word
+        movl      20(%ebp), %ebx        // "sv" low order word
+        movl      24(%ebp), %ecx        // "sv" high order word
+        lock
+        cmpxchg8b (%edi)
+        popl      %edi
+        popl      %ebx
+        movl      %ebp, %esp
+        popl      %ebp
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret64
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_real32
+//
+// kmp_real32
+// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
+//
+// parameters:
+// 	addr:	4(%esp)
+// 	data:	8(%esp)
+//
+// return:	%eax
+
+
+        PROC  __kmp_xchg_real32
+
+        pushl   %ebp
+        movl    %esp, %ebp
+        subl    $4, %esp
+        pushl   %esi
+
+        movl    4(%ebp), %esi
+        flds    (%esi)
+                        // load <addr>
+        fsts    -4(%ebp)
+                        // store old value
+
+        movl    8(%ebp), %eax
+
+        lock
+        xchgl   %eax, (%esi)
+
+        flds    -4(%ebp)
+                        // return old value
+
+        popl    %esi
+        movl    %ebp, %esp
+        popl    %ebp
+        ret
+
+        DEBUG_INFO __kmp_xchg_real32
+
+# endif /* !KMP_ASM_INTRINS */
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_load_x87_fpu_control_word
+//
+// void
+// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
+//
+// parameters:
+// 	p:	4(%esp)
+//
+
+        PROC  __kmp_load_x87_fpu_control_word
+
+        movl  4(%esp), %eax
+        fldcw (%eax)
+        ret
+
+        DEBUG_INFO __kmp_load_x87_fpu_control_word
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_store_x87_fpu_control_word
+//
+// void
+// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
+//
+// parameters:
+// 	p:	4(%esp)
+//
+
+        PROC  __kmp_store_x87_fpu_control_word
+
+        movl  4(%esp), %eax
+        fstcw (%eax)
+        ret
+
+        DEBUG_INFO __kmp_store_x87_fpu_control_word
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_clear_x87_fpu_status_word
+//
+// void
+// __kmp_clear_x87_fpu_status_word();
+//
+//
+
+        PROC  __kmp_clear_x87_fpu_status_word
+
+        fnclex
+        ret
+
+        DEBUG_INFO __kmp_clear_x87_fpu_status_word
+
+
+//------------------------------------------------------------------------
+//
+// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
+//
+// int
+// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
+//                         int argc, void *p_argv[] ) {
+//    (*pkfn)( & gtid, & gtid, argv[0], ... );
+//    return 1;
+// }
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+	PROC  __kmp_invoke_microtask
+
+	pushl %ebp
+	KMP_CFI_DEF_OFFSET 8
+	KMP_CFI_OFFSET ebp,-8
+	movl %esp,%ebp		// establish the base pointer for this routine.
+	KMP_CFI_REGISTER ebp
+	subl $8,%esp		// allocate space for two local variables.
+				// These varibales are:
+				//	argv: -4(%ebp)
+				//	temp: -8(%ebp)
+				//
+	pushl %ebx		// save %ebx to use during this routine
+				//
+#if OMPT_SUPPORT
+	movl 28(%ebp),%ebx	// get exit_frame address
+	movl %ebp,(%ebx)	// save exit_frame
+#endif
+
+	movl 20(%ebp),%ebx	// Stack alignment - # args
+	addl $2,%ebx		// #args +2  Always pass at least 2 args (gtid and tid)
+	shll $2,%ebx		// Number of bytes used on stack: (#args+2)*4
+	movl %esp,%eax		//
+	subl %ebx,%eax		// %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
+	movl %eax,%ebx		// Save to %ebx
+	andl $0xFFFFFF80,%eax	// mask off 7 bits
+	subl %eax,%ebx		// Amount to subtract from %esp
+	subl %ebx,%esp		// Prepare the stack ptr --
+				//   now it will be aligned on 128-byte boundary at the call
+
+	movl 24(%ebp),%eax	// copy from p_argv[]
+	movl %eax,-4(%ebp)	// into the local variable *argv.
+
+	movl 20(%ebp),%ebx	// argc is 20(%ebp)
+	shll $2,%ebx
+
+KMP_LABEL(invoke_2):
+	cmpl $0,%ebx
+	jg  KMP_LABEL(invoke_4)
+	jmp KMP_LABEL(invoke_3)
+	ALIGN 2
+KMP_LABEL(invoke_4):
+	movl -4(%ebp),%eax
+	subl $4,%ebx			// decrement argc.
+	addl %ebx,%eax			// index into argv.
+	movl (%eax),%edx
+	pushl %edx
+
+	jmp KMP_LABEL(invoke_2)
+	ALIGN 2
+KMP_LABEL(invoke_3):
+	leal 16(%ebp),%eax		// push & tid
+	pushl %eax
+
+	leal 12(%ebp),%eax		// push & gtid
+	pushl %eax
+
+	movl 8(%ebp),%ebx
+	call *%ebx			// call (*pkfn)();
+
+	movl $1,%eax			// return 1;
+
+	movl -12(%ebp),%ebx		// restore %ebx
+	leave
+	KMP_CFI_DEF esp,4
+	ret
+
+	DEBUG_INFO __kmp_invoke_microtask
+// -- End  __kmp_invoke_microtask
+
+
+// kmp_uint64
+// __kmp_hardware_timestamp(void)
+	PROC  __kmp_hardware_timestamp
+	rdtsc
+	ret
+
+	DEBUG_INFO __kmp_hardware_timestamp
+// -- End  __kmp_hardware_timestamp
+
+// -----------------------------------------------------------------------
+#endif /* KMP_ARCH_X86 */
+
+
+#if KMP_ARCH_X86_64
+
+// -----------------------------------------------------------------------
+// microtasking routines specifically written for IA-32 architecture and
+// Intel(R) 64 running Linux* OS
+// -----------------------------------------------------------------------
+
+// -- Machine type P
+// mark_description "Intel Corporation";
+	.ident "Intel Corporation"
+// --	.file "z_Linux_asm.s"
+	.data
+	ALIGN 4
+
+// To prevent getting our code into .data section .text added to every routine definition for x86_64.
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_x86_cpuid
+//
+// void
+// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
+//
+// parameters:
+// 	mode:		%edi
+// 	mode2:		%esi
+// 	cpuid_buffer:	%rdx
+
+        .text
+	PROC  __kmp_x86_cpuid
+
+	pushq  %rbp
+	movq   %rsp,%rbp
+        pushq  %rbx			// callee-save register
+
+	movl   %esi, %ecx		// "mode2"
+	movl   %edi, %eax		// "mode"
+        movq   %rdx, %rsi               // cpuid_buffer
+	cpuid				// Query the CPUID for the current processor
+
+	movl   %eax, 0(%rsi)		// store results into buffer
+	movl   %ebx, 4(%rsi)
+	movl   %ecx, 8(%rsi)
+	movl   %edx, 12(%rsi)
+
+        popq   %rbx			// callee-save register
+        movq   %rbp, %rsp
+        popq   %rbp
+	ret
+
+        DEBUG_INFO __kmp_x86_cpuid
+
+
+
+# if !KMP_ASM_INTRINS
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_test_then_add32
+//
+// kmp_int32
+// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
+//
+// parameters:
+// 	p:	%rdi
+// 	d:	%esi
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_test_then_add32
+
+        movl      %esi, %eax	// "d"
+        lock
+        xaddl     %eax,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_test_then_add32
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_test_then_add64
+//
+// kmp_int64
+// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
+//
+// parameters:
+// 	p:	%rdi
+// 	d:	%rsi
+//	return:	%rax
+
+        .text
+        PROC  __kmp_test_then_add64
+
+        movq      %rsi, %rax	// "d"
+        lock
+        xaddq     %rax,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_test_then_add64
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed8
+//
+// kmp_int32
+// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
+//
+// parameters:
+// 	p:	%rdi
+// 	d:	%sil
+//
+// return:	%al
+
+        .text
+        PROC  __kmp_xchg_fixed8
+
+        movb      %sil, %al	// "d"
+
+        lock
+        xchgb     %al,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed8
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed16
+//
+// kmp_int16
+// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
+//
+// parameters:
+// 	p:	%rdi
+// 	d:	%si
+// return:     %ax
+
+        .text
+        PROC  __kmp_xchg_fixed16
+
+        movw      %si, %ax	// "d"
+
+        lock
+        xchgw     %ax,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed16
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed32
+//
+// kmp_int32
+// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
+//
+// parameters:
+// 	p:	%rdi
+// 	d:	%esi
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_xchg_fixed32
+
+        movl      %esi, %eax	// "d"
+
+        lock
+        xchgl     %eax,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed32
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_fixed64
+//
+// kmp_int64
+// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
+//
+// parameters:
+// 	p:	%rdi
+// 	d:	%rsi
+// return:	%rax
+
+        .text
+        PROC  __kmp_xchg_fixed64
+
+        movq      %rsi, %rax	// "d"
+
+        lock
+        xchgq     %rax,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_xchg_fixed64
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store8
+//
+// kmp_int8
+// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%esi
+//	sv:	%edx
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store8
+
+        movb      %sil, %al	// "cv"
+        lock
+        cmpxchgb  %dl,(%rdi)
+        sete      %al           // if %al == (%rdi) set %al = 1 else set %al = 0
+        andq      $1, %rax      // sign extend previous instruction for return value
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store8
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store16
+//
+// kmp_int16
+// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%si
+//	sv:	%dx
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store16
+
+        movw      %si, %ax	// "cv"
+        lock
+        cmpxchgw  %dx,(%rdi)
+        sete      %al           // if %ax == (%rdi) set %al = 1 else set %al = 0
+        andq      $1, %rax      // sign extend previous instruction for return value
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store16
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store32
+//
+// kmp_int32
+// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%esi
+//	sv:	%edx
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store32
+
+        movl      %esi, %eax	// "cv"
+        lock
+        cmpxchgl  %edx,(%rdi)
+        sete      %al           // if %eax == (%rdi) set %al = 1 else set %al = 0
+        andq      $1, %rax      // sign extend previous instruction for return value
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store32
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store64
+//
+// kmp_int32
+// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%rsi
+//	sv:	%rdx
+//	return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store64
+
+        movq      %rsi, %rax    // "cv"
+        lock
+        cmpxchgq  %rdx,(%rdi)
+        sete      %al           // if %rax == (%rdi) set %al = 1 else set %al = 0
+        andq      $1, %rax      // sign extend previous instruction for return value
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store64
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store_ret8
+//
+// kmp_int8
+// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%esi
+//	sv:	%edx
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store_ret8
+
+        movb      %sil, %al	// "cv"
+        lock
+        cmpxchgb  %dl,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret8
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store_ret16
+//
+// kmp_int16
+// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%si
+//	sv:	%dx
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store_ret16
+
+        movw      %si, %ax	// "cv"
+        lock
+        cmpxchgw  %dx,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret16
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store_ret32
+//
+// kmp_int32
+// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%esi
+//	sv:	%edx
+//
+// return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store_ret32
+
+        movl      %esi, %eax	// "cv"
+        lock
+        cmpxchgl  %edx,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret32
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_compare_and_store_ret64
+//
+// kmp_int64
+// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+//
+// parameters:
+// 	p:	%rdi
+// 	cv:	%rsi
+//	sv:	%rdx
+//	return:	%eax
+
+        .text
+        PROC  __kmp_compare_and_store_ret64
+
+        movq      %rsi, %rax    // "cv"
+        lock
+        cmpxchgq  %rdx,(%rdi)
+        ret
+
+        DEBUG_INFO __kmp_compare_and_store_ret64
+
+# endif /* !KMP_ASM_INTRINS */
+
+
+# if !KMP_MIC
+
+# if !KMP_ASM_INTRINS
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_real32
+//
+// kmp_real32
+// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
+//
+// parameters:
+// 	addr:	%rdi
+// 	data:	%xmm0 (lower 4 bytes)
+//
+// return:	%xmm0 (lower 4 bytes)
+
+        .text
+        PROC  __kmp_xchg_real32
+
+	movd	%xmm0, %eax	// load "data" to eax
+
+         lock
+         xchgl %eax, (%rdi)
+
+	movd	%eax, %xmm0	// load old value into return register
+
+        ret
+
+        DEBUG_INFO __kmp_xchg_real32
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_xchg_real64
+//
+// kmp_real64
+// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
+//
+// parameters:
+//      addr:   %rdi
+//      data:   %xmm0 (lower 8 bytes)
+//      return: %xmm0 (lower 8 bytes)
+//
+
+        .text
+        PROC  __kmp_xchg_real64
+
+	movd	%xmm0, %rax	// load "data" to rax
+
+         lock
+	xchgq  %rax, (%rdi)
+
+	movd	%rax, %xmm0	// load old value into return register
+        ret
+
+        DEBUG_INFO __kmp_xchg_real64
+
+
+# endif /* !KMP_MIC */
+
+# endif /* !KMP_ASM_INTRINS */
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_load_x87_fpu_control_word
+//
+// void
+// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
+//
+// parameters:
+// 	p:	%rdi
+//
+
+        .text
+        PROC  __kmp_load_x87_fpu_control_word
+
+        fldcw (%rdi)
+        ret
+
+        DEBUG_INFO __kmp_load_x87_fpu_control_word
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_store_x87_fpu_control_word
+//
+// void
+// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
+//
+// parameters:
+// 	p:	%rdi
+//
+
+        .text
+        PROC  __kmp_store_x87_fpu_control_word
+
+        fstcw (%rdi)
+        ret
+
+        DEBUG_INFO __kmp_store_x87_fpu_control_word
+
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_clear_x87_fpu_status_word
+//
+// void
+// __kmp_clear_x87_fpu_status_word();
+//
+//
+
+        .text
+        PROC  __kmp_clear_x87_fpu_status_word
+
+#if KMP_MIC
+// TODO: remove the workaround for problem with fnclex instruction (no CQ known)
+        fstenv  -32(%rsp)              // store FP env
+        andw    $~0x80ff, 4-32(%rsp)   // clear 0-7,15 bits of FP SW
+        fldenv  -32(%rsp)              // load FP env back
+        ret
+#else
+        fnclex
+        ret
+#endif
+
+        DEBUG_INFO __kmp_clear_x87_fpu_status_word
+
+
+//------------------------------------------------------------------------
+//
+// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
+//
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+//		           int gtid, int tid,
+//                         int argc, void *p_argv[] ) {
+//    (*pkfn)( & gtid, & tid, argv[0], ... );
+//    return 1;
+// }
+//
+// note:
+//	at call to pkfn must have %rsp 128-byte aligned for compiler
+//
+// parameters:
+//      %rdi:  	pkfn
+//	%esi:	gtid
+//	%edx:	tid
+//	%ecx:	argc
+//	%r8:	p_argv
+//	%r9:	&exit_frame
+//
+// locals:
+//	__gtid:	gtid parm pushed on stack so can pass &gtid to pkfn
+//	__tid:	tid parm pushed on stack so can pass &tid to pkfn
+//
+// reg temps:
+//	%rax:	used all over the place
+//	%rdx:	used in stack pointer alignment calculation
+//	%r11:	used to traverse p_argv array
+//	%rsi:	used as temporary for stack parameters
+//		used as temporary for number of pkfn parms to push
+//	%rbx:	used to hold pkfn address, and zero constant, callee-save
+//
+// return:	%eax 	(always 1/TRUE)
+//
+
+__gtid = -16
+__tid = -24
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+        .text
+	PROC  __kmp_invoke_microtask
+
+	pushq 	%rbp		// save base pointer
+	KMP_CFI_DEF_OFFSET 16
+	KMP_CFI_OFFSET rbp,-16
+	movq 	%rsp,%rbp	// establish the base pointer for this routine.
+	KMP_CFI_REGISTER rbp
+
+#if OMPT_SUPPORT
+	movq	%rbp, (%r9)	// save exit_frame
+#endif
+
+	pushq 	%rbx		// %rbx is callee-saved register
+	pushq	%rsi		// Put gtid on stack so can pass &tgid to pkfn
+	pushq	%rdx		// Put tid on stack so can pass &tid to pkfn
+
+	movq	%rcx, %rax	// Stack alignment calculation begins; argc -> %rax
+	movq	$0, %rbx	// constant for cmovs later
+	subq	$4, %rax	// subtract four args passed in registers to pkfn
+#if KMP_MIC
+	js	KMP_LABEL(kmp_0)	// jump to movq
+	jmp	KMP_LABEL(kmp_0_exit)	// jump ahead
+KMP_LABEL(kmp_0):
+	movq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4)
+KMP_LABEL(kmp_0_exit):
+#else
+	cmovsq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4)
+#endif // KMP_MIC
+
+	movq	%rax, %rsi	// save max(0, argc-4) -> %rsi for later
+	shlq 	$3, %rax	// Number of bytes used on stack: max(0, argc-4)*8
+
+	movq 	%rsp, %rdx	//
+	subq 	%rax, %rdx	// %rsp-(max(0,argc-4)*8) -> %rdx --
+				// without align, stack ptr would be this
+	movq 	%rdx, %rax	// Save to %rax
+
+	andq 	$0xFFFFFFFFFFFFFF80, %rax  // mask off lower 7 bits (128 bytes align)
+	subq 	%rax, %rdx	// Amount to subtract from %rsp
+	subq 	%rdx, %rsp	// Prepare the stack ptr --
+				// now %rsp will align to 128-byte boundary at call site
+
+				// setup pkfn parameter reg and stack
+	movq	%rcx, %rax	// argc -> %rax
+	cmpq	$0, %rsi
+	je	KMP_LABEL(kmp_invoke_pass_parms)	// jump ahead if no parms to push
+	shlq	$3, %rcx	// argc*8 -> %rcx
+	movq 	%r8, %rdx	// p_argv -> %rdx
+	addq	%rcx, %rdx	// &p_argv[argc] -> %rdx
+
+	movq	%rsi, %rcx	// max (0, argc-4) -> %rcx
+
+KMP_LABEL(kmp_invoke_push_parms):
+	// push nth - 7th parms to pkfn on stack
+	subq	$8, %rdx	// decrement p_argv pointer to previous parm
+	movq	(%rdx), %rsi	// p_argv[%rcx-1] -> %rsi
+	pushq	%rsi		// push p_argv[%rcx-1] onto stack (reverse order)
+	subl	$1, %ecx
+
+// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
+//		if the name of the label that is an operand of this jecxz starts with a dot (".");
+//	   Apple's linker does not support 1-byte length relocation;
+//         Resolution: replace all .labelX entries with L_labelX.
+
+	jecxz   KMP_LABEL(kmp_invoke_pass_parms)  // stop when four p_argv[] parms left
+	jmp	KMP_LABEL(kmp_invoke_push_parms)
+	ALIGN 3
+KMP_LABEL(kmp_invoke_pass_parms):	// put 1st - 6th parms to pkfn in registers.
+				// order here is important to avoid trashing
+				// registers used for both input and output parms!
+	movq	%rdi, %rbx	// pkfn -> %rbx
+	leaq	__gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
+	leaq	__tid(%rbp), %rsi  // &tid -> %rsi (store 2nd parm to pkfn)
+
+	movq	%r8, %r11	// p_argv -> %r11
+
+#if KMP_MIC
+	cmpq	$4, %rax	// argc >= 4?
+	jns	KMP_LABEL(kmp_4)	// jump to movq
+	jmp	KMP_LABEL(kmp_4_exit)	// jump ahead
+KMP_LABEL(kmp_4):
+	movq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn)
+KMP_LABEL(kmp_4_exit):
+
+	cmpq	$3, %rax	// argc >= 3?
+	jns	KMP_LABEL(kmp_3)	// jump to movq
+	jmp	KMP_LABEL(kmp_3_exit)	// jump ahead
+KMP_LABEL(kmp_3):
+	movq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn)
+KMP_LABEL(kmp_3_exit):
+
+	cmpq	$2, %rax	// argc >= 2?
+	jns	KMP_LABEL(kmp_2)	// jump to movq
+	jmp	KMP_LABEL(kmp_2_exit)	// jump ahead
+KMP_LABEL(kmp_2):
+	movq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn)
+KMP_LABEL(kmp_2_exit):
+
+	cmpq	$1, %rax	// argc >= 1?
+	jns	KMP_LABEL(kmp_1)	// jump to movq
+	jmp	KMP_LABEL(kmp_1_exit)	// jump ahead
+KMP_LABEL(kmp_1):
+	movq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn)
+KMP_LABEL(kmp_1_exit):
+#else
+	cmpq	$4, %rax	// argc >= 4?
+	cmovnsq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn)
+
+	cmpq	$3, %rax	// argc >= 3?
+	cmovnsq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn)
+
+	cmpq	$2, %rax	// argc >= 2?
+	cmovnsq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn)
+
+	cmpq	$1, %rax	// argc >= 1?
+	cmovnsq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn)
+#endif // KMP_MIC
+
+	call	*%rbx		// call (*pkfn)();
+	movq	$1, %rax	// move 1 into return register;
+
+	movq	-8(%rbp), %rbx	// restore %rbx	using %rbp since %rsp was modified
+	movq 	%rbp, %rsp	// restore stack pointer
+	popq 	%rbp		// restore frame pointer
+	KMP_CFI_DEF rsp,8
+	ret
+
+	DEBUG_INFO __kmp_invoke_microtask
+// -- End  __kmp_invoke_microtask
+
+// kmp_uint64
+// __kmp_hardware_timestamp(void)
+        .text
+	PROC  __kmp_hardware_timestamp
+	rdtsc
+	shlq    $32, %rdx
+	orq     %rdx, %rax
+	ret
+
+	DEBUG_INFO __kmp_hardware_timestamp
+// -- End  __kmp_hardware_timestamp
+
+//------------------------------------------------------------------------
+//
+// FUNCTION __kmp_bsr32
+//
+// int
+// __kmp_bsr32( int );
+//
+
+        .text
+        PROC  __kmp_bsr32
+
+        bsr    %edi,%eax
+        ret
+
+        DEBUG_INFO __kmp_bsr32
+
+	
+// -----------------------------------------------------------------------
+#endif /* KMP_ARCH_X86_64 */
+
+#if KMP_ARCH_ARM
+    .data
+    .comm .gomp_critical_user_,32,8
+    .data
+    .align 4
+    .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+    .4byte .gomp_critical_user_
+    .size __kmp_unnamed_critical_addr,4
+#endif /* KMP_ARCH_ARM */
+
+#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
+    .data
+    .comm .gomp_critical_user_,32,8
+    .data
+    .align 8
+    .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+    .8byte .gomp_critical_user_
+    .size __kmp_unnamed_critical_addr,8
+#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
+
+#if KMP_OS_LINUX
+# if KMP_ARCH_ARM
+.section .note.GNU-stack,"",%progbits
+# else
+.section .note.GNU-stack,"",@progbits
+# endif
+#endif
diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_util.c b/contrib/libs/cxxsupp/openmp/z_Linux_util.c
index 67129797c9..237677b24c 100644
--- a/contrib/libs/cxxsupp/openmp/z_Linux_util.c
+++ b/contrib/libs/cxxsupp/openmp/z_Linux_util.c
@@ -1,2706 +1,2706 @@
-/* 
- * z_Linux_util.c -- platform specific routines. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_wrapper_getpid.h" 
-#include "kmp_itt.h" 
-#include "kmp_str.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
-#include "kmp_stats.h" 
-#include "kmp_wait_release.h" 
- 
-#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD 
-# include <alloca.h> 
-#endif 
-#include <unistd.h> 
-#include <math.h>               // HUGE_VAL. 
-#include <sys/time.h> 
-#include <sys/times.h> 
-#include <sys/resource.h> 
-#include <sys/syscall.h> 
- 
-#if KMP_OS_LINUX && !KMP_OS_CNK 
-# include <sys/sysinfo.h> 
-# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 
-// We should really include <futex.h>, but that causes compatibility problems on different 
-// Linux* OS distributions that either require that you include (or break when you try to include) 
-// <pci/types.h>. 
-// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 
-// we just define the constants here and don't include <futex.h> 
-#  ifndef FUTEX_WAIT 
-#   define FUTEX_WAIT    0 
-#  endif 
-#  ifndef FUTEX_WAKE 
-#   define FUTEX_WAKE    1 
-#  endif 
-# endif 
-#elif KMP_OS_DARWIN 
-# include <sys/sysctl.h> 
-# include <mach/mach.h> 
-#elif KMP_OS_FREEBSD 
-# include <pthread_np.h> 
-#endif 
- 
- 
-#include <dirent.h> 
-#include <ctype.h> 
-#include <fcntl.h> 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-struct kmp_sys_timer { 
-    struct timespec     start; 
-}; 
- 
-// Convert timespec to nanoseconds. 
-#define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec) 
- 
-static struct kmp_sys_timer __kmp_sys_timer_data; 
- 
-#if KMP_HANDLE_SIGNALS 
-    typedef void                            (* sig_func_t )( int ); 
-    STATIC_EFI2_WORKAROUND struct sigaction    __kmp_sighldrs[ NSIG ]; 
-    static sigset_t                            __kmp_sigset; 
-#endif 
- 
-static int __kmp_init_runtime   = FALSE; 
- 
-static int __kmp_fork_count = 0; 
- 
-static pthread_condattr_t  __kmp_suspend_cond_attr; 
-static pthread_mutexattr_t __kmp_suspend_mutex_attr; 
- 
-static kmp_cond_align_t    __kmp_wait_cv; 
-static kmp_mutex_align_t   __kmp_wait_mx; 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef DEBUG_SUSPEND 
-static void 
-__kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) 
-{ 
-    KMP_SNPRINTF( buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))", 
-                      cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock, 
-                      cond->c_cond.__c_waiting ); 
-} 
-#endif 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) 
- 
-/* 
- * Affinity support 
- */ 
- 
-/* 
- * On some of the older OS's that we build on, these constants aren't present 
- * in <asm/unistd.h> #included from <sys.syscall.h>.  They must be the same on 
- * all systems of the same arch where they are defined, and they cannot change. 
- * stone forever. 
- */ 
- 
-#  if KMP_ARCH_X86 || KMP_ARCH_ARM 
-#   ifndef __NR_sched_setaffinity 
-#    define __NR_sched_setaffinity  241 
-#   elif __NR_sched_setaffinity != 241 
-#    error Wrong code for setaffinity system call. 
-#   endif /* __NR_sched_setaffinity */ 
-#   ifndef __NR_sched_getaffinity 
-#    define __NR_sched_getaffinity  242 
-#   elif __NR_sched_getaffinity != 242 
-#    error Wrong code for getaffinity system call. 
-#   endif /* __NR_sched_getaffinity */ 
- 
-#  elif KMP_ARCH_AARCH64 
-#   ifndef __NR_sched_setaffinity 
-#    define __NR_sched_setaffinity  122 
-#   elif __NR_sched_setaffinity != 122 
-#    error Wrong code for setaffinity system call. 
-#   endif /* __NR_sched_setaffinity */ 
-#   ifndef __NR_sched_getaffinity 
-#    define __NR_sched_getaffinity  123 
-#   elif __NR_sched_getaffinity != 123 
-#    error Wrong code for getaffinity system call. 
-#   endif /* __NR_sched_getaffinity */ 
- 
-#  elif KMP_ARCH_X86_64 
-#   ifndef __NR_sched_setaffinity 
-#    define __NR_sched_setaffinity  203 
-#   elif __NR_sched_setaffinity != 203 
-#    error Wrong code for setaffinity system call. 
-#   endif /* __NR_sched_setaffinity */ 
-#   ifndef __NR_sched_getaffinity 
-#    define __NR_sched_getaffinity  204 
-#   elif __NR_sched_getaffinity != 204 
-#    error Wrong code for getaffinity system call. 
-#   endif /* __NR_sched_getaffinity */ 
- 
-#  elif KMP_ARCH_PPC64 
-#   ifndef __NR_sched_setaffinity 
-#    define __NR_sched_setaffinity  222 
-#   elif __NR_sched_setaffinity != 222 
-#    error Wrong code for setaffinity system call. 
-#   endif /* __NR_sched_setaffinity */ 
-#   ifndef __NR_sched_getaffinity 
-#    define __NR_sched_getaffinity  223 
-#   elif __NR_sched_getaffinity != 223 
-#    error Wrong code for getaffinity system call. 
-#   endif /* __NR_sched_getaffinity */ 
- 
- 
-#  else 
-#   error Unknown or unsupported architecture 
- 
-#  endif /* KMP_ARCH_* */ 
- 
-int 
-__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) 
-{ 
-    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 
-      "Illegal set affinity operation when not capable"); 
-#if KMP_USE_HWLOC 
-    int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); 
-#else 
-    int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); 
-#endif 
-    if (retval >= 0) { 
-        return 0; 
-    } 
-    int error = errno; 
-    if (abort_on_error) { 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( FatalSysError ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
-    return error; 
-} 
- 
-int 
-__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) 
-{ 
-    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 
-      "Illegal get affinity operation when not capable"); 
- 
-#if KMP_USE_HWLOC 
-    int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); 
-#else 
-    int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); 
-#endif 
-    if (retval >= 0) { 
-        return 0; 
-    } 
-    int error = errno; 
-    if (abort_on_error) { 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( FatalSysError ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
-    return error; 
-} 
- 
-void 
-__kmp_affinity_bind_thread( int which ) 
-{ 
-    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 
-      "Illegal set affinity operation when not capable"); 
- 
-    kmp_affin_mask_t *mask; 
-    KMP_CPU_ALLOC_ON_STACK(mask); 
-    KMP_CPU_ZERO(mask); 
-    KMP_CPU_SET(which, mask); 
-    __kmp_set_system_affinity(mask, TRUE); 
-    KMP_CPU_FREE_FROM_STACK(mask); 
-} 
- 
-/* 
- * Determine if we can access affinity functionality on this version of 
- * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set 
- * __kmp_affin_mask_size to the appropriate value (0 means not capable). 
- */ 
-void 
-__kmp_affinity_determine_capable(const char *env_var) 
-{ 
-    // 
-    // Check and see if the OS supports thread affinity. 
-    // 
- 
-# define KMP_CPU_SET_SIZE_LIMIT          (1024*1024) 
- 
-    int gCode; 
-    int sCode; 
-    kmp_affin_mask_t *buf; 
-    buf = ( kmp_affin_mask_t * ) KMP_INTERNAL_MALLOC( KMP_CPU_SET_SIZE_LIMIT ); 
- 
-    // If Linux* OS: 
-    // If the syscall fails or returns a suggestion for the size, 
-    // then we don't have to search for an appropriate size. 
-    gCode = syscall( __NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf ); 
-    KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-       "initial getaffinity call returned %d errno = %d\n", 
-       gCode, errno)); 
- 
-    //if ((gCode < 0) && (errno == ENOSYS)) 
-    if (gCode < 0) { 
-        // 
-        // System call not supported 
-        // 
-        if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-          && (__kmp_affinity_type != affinity_none) 
-          && (__kmp_affinity_type != affinity_default) 
-          && (__kmp_affinity_type != affinity_disabled))) { 
-            int error = errno; 
-            __kmp_msg( 
-                kmp_ms_warning, 
-                KMP_MSG( GetAffSysCallNotSupported, env_var ), 
-                KMP_ERR( error ), 
-                __kmp_msg_null 
-            ); 
-        } 
-        KMP_AFFINITY_DISABLE(); 
-        KMP_INTERNAL_FREE(buf); 
-        return; 
-    } 
-    if (gCode > 0) { // Linux* OS only 
-        // The optimal situation: the OS returns the size of the buffer 
-        // it expects. 
-        // 
-        // A verification of correct behavior is that Isetaffinity on a NULL 
-        // buffer with the same size fails with errno set to EFAULT. 
-        sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); 
-        KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-           "setaffinity for mask size %d returned %d errno = %d\n", 
-           gCode, sCode, errno)); 
-        if (sCode < 0) { 
-            if (errno == ENOSYS) { 
-                if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                  && (__kmp_affinity_type != affinity_none) 
-                  && (__kmp_affinity_type != affinity_default) 
-                  && (__kmp_affinity_type != affinity_disabled))) { 
-                    int error = errno; 
-                    __kmp_msg( 
-                        kmp_ms_warning, 
-                        KMP_MSG( SetAffSysCallNotSupported, env_var ), 
-                        KMP_ERR( error ), 
-                        __kmp_msg_null 
-                    ); 
-                } 
-                KMP_AFFINITY_DISABLE(); 
-                KMP_INTERNAL_FREE(buf); 
-            } 
-            if (errno == EFAULT) { 
-                KMP_AFFINITY_ENABLE(gCode); 
-                KA_TRACE(10, ( "__kmp_affinity_determine_capable: " 
-                  "affinity supported (mask size %d)\n", 
-                  (int)__kmp_affin_mask_size)); 
-                KMP_INTERNAL_FREE(buf); 
-                return; 
-            } 
-        } 
-    } 
- 
-    // 
-    // Call the getaffinity system call repeatedly with increasing set sizes 
-    // until we succeed, or reach an upper bound on the search. 
-    // 
-    KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-      "searching for proper set size\n")); 
-    int size; 
-    for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) { 
-        gCode = syscall( __NR_sched_getaffinity, 0,  size, buf ); 
-        KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-          "getaffinity for mask size %d returned %d errno = %d\n", size, 
-            gCode, errno)); 
- 
-        if (gCode < 0) { 
-            if ( errno == ENOSYS ) 
-            { 
-                // 
-                // We shouldn't get here 
-                // 
-                KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-                  "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", 
-                   size)); 
-                if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                  && (__kmp_affinity_type != affinity_none) 
-                  && (__kmp_affinity_type != affinity_default) 
-                  && (__kmp_affinity_type != affinity_disabled))) { 
-                    int error = errno; 
-                    __kmp_msg( 
-                        kmp_ms_warning, 
-                        KMP_MSG( GetAffSysCallNotSupported, env_var ), 
-                        KMP_ERR( error ), 
-                        __kmp_msg_null 
-                    ); 
-                } 
-                KMP_AFFINITY_DISABLE(); 
-                KMP_INTERNAL_FREE(buf); 
-                return; 
-            } 
-            continue; 
-        } 
- 
-        sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); 
-        KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-           "setaffinity for mask size %d returned %d errno = %d\n", 
-           gCode, sCode, errno)); 
-        if (sCode < 0) { 
-            if (errno == ENOSYS) { // Linux* OS only 
-                // 
-                // We shouldn't get here 
-                // 
-                KA_TRACE(30, ( "__kmp_affinity_determine_capable: " 
-                  "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", 
-                   size)); 
-                if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-                  && (__kmp_affinity_type != affinity_none) 
-                  && (__kmp_affinity_type != affinity_default) 
-                  && (__kmp_affinity_type != affinity_disabled))) { 
-                    int error = errno; 
-                    __kmp_msg( 
-                        kmp_ms_warning, 
-                        KMP_MSG( SetAffSysCallNotSupported, env_var ), 
-                        KMP_ERR( error ), 
-                        __kmp_msg_null 
-                    ); 
-                } 
-                KMP_AFFINITY_DISABLE(); 
-                KMP_INTERNAL_FREE(buf); 
-                return; 
-            } 
-            if (errno == EFAULT) { 
-                KMP_AFFINITY_ENABLE(gCode); 
-                KA_TRACE(10, ( "__kmp_affinity_determine_capable: " 
-                  "affinity supported (mask size %d)\n", 
-                   (int)__kmp_affin_mask_size)); 
-                KMP_INTERNAL_FREE(buf); 
-                return; 
-            } 
-        } 
-    } 
-    //int error = errno;  // save uncaught error code 
-    KMP_INTERNAL_FREE(buf); 
-    // errno = error;  // restore uncaught error code, will be printed at the next KMP_WARNING below 
- 
-    // 
-    // Affinity is not supported 
-    // 
-    KMP_AFFINITY_DISABLE(); 
-    KA_TRACE(10, ( "__kmp_affinity_determine_capable: " 
-      "cannot determine mask size - affinity not supported\n")); 
-    if (__kmp_affinity_verbose || (__kmp_affinity_warnings 
-      && (__kmp_affinity_type != affinity_none) 
-      && (__kmp_affinity_type != affinity_default) 
-      && (__kmp_affinity_type != affinity_disabled))) { 
-        KMP_WARNING( AffCantGetMaskSize, env_var ); 
-    } 
-} 
- 
-#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && !KMP_OS_CNK 
- 
-int 
-__kmp_futex_determine_capable() 
-{ 
-    int loc = 0; 
-    int rc = syscall( __NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0 ); 
-    int retval = ( rc == 0 ) || ( errno != ENOSYS ); 
- 
-    KA_TRACE(10, ( "__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, 
-      errno ) ); 
-    KA_TRACE(10, ( "__kmp_futex_determine_capable: futex syscall%s supported\n", 
-        retval ? "" : " not" ) ); 
- 
-    return retval; 
-} 
- 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) 
-/* 
- * Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to 
- * use compare_and_store for these routines 
- */ 
- 
-kmp_int8 
-__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) 
-{ 
-    kmp_int8 old_value, new_value; 
- 
-    old_value = TCR_1( *p ); 
-    new_value = old_value | d; 
- 
-    while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_1( *p ); 
-        new_value = old_value | d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int8 
-__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) 
-{ 
-    kmp_int8 old_value, new_value; 
- 
-    old_value = TCR_1( *p ); 
-    new_value = old_value & d; 
- 
-    while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_1( *p ); 
-        new_value = old_value & d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int32 
-__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) 
-{ 
-    kmp_int32 old_value, new_value; 
- 
-    old_value = TCR_4( *p ); 
-    new_value = old_value | d; 
- 
-    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_4( *p ); 
-        new_value = old_value | d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int32 
-__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) 
-{ 
-    kmp_int32 old_value, new_value; 
- 
-    old_value = TCR_4( *p ); 
-    new_value = old_value & d; 
- 
-    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_4( *p ); 
-        new_value = old_value & d; 
-    } 
-    return old_value; 
-} 
- 
-# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 
-kmp_int8 
-__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) 
-{ 
-    kmp_int8 old_value, new_value; 
- 
-    old_value = TCR_1( *p ); 
-    new_value = old_value + d; 
- 
-    while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_1( *p ); 
-        new_value = old_value + d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int64 
-__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_8( *p ); 
-    new_value = old_value + d; 
- 
-    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_8( *p ); 
-        new_value = old_value + d; 
-    } 
-    return old_value; 
-} 
-# endif /* KMP_ARCH_X86 */ 
- 
-kmp_int64 
-__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_8( *p ); 
-    new_value = old_value | d; 
-    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_8( *p ); 
-        new_value = old_value | d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int64 
-__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_8( *p ); 
-    new_value = old_value & d; 
-    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_8( *p ); 
-        new_value = old_value & d; 
-    } 
-    return old_value; 
-} 
- 
-#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */ 
- 
-void 
-__kmp_terminate_thread( int gtid ) 
-{ 
-    int status; 
-    kmp_info_t  *th = __kmp_threads[ gtid ]; 
- 
-    if ( !th ) return; 
- 
-    #ifdef KMP_CANCEL_THREADS 
-        KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); 
-        status = pthread_cancel( th->th.th_info.ds.ds_thread ); 
-        if ( status != 0 && status != ESRCH ) { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantTerminateWorkerThread ), 
-                KMP_ERR( status ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-    #endif 
-    __kmp_yield( TRUE ); 
-} // 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-/* 
- * Set thread stack info according to values returned by 
- * pthread_getattr_np(). 
- * If values are unreasonable, assume call failed and use 
- * incremental stack refinement method instead. 
- * Returns TRUE if the stack parameters could be determined exactly, 
- * FALSE if incremental refinement is necessary. 
- */ 
-static kmp_int32 
-__kmp_set_stack_info( int gtid, kmp_info_t *th ) 
-{ 
-    int            stack_data; 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD 
-    /* Linux* OS only -- no pthread_getattr_np support on OS X* */ 
-    pthread_attr_t attr; 
-    int            status; 
-    size_t         size = 0; 
-    void *         addr = 0; 
- 
-    /* Always do incremental stack refinement for ubermaster threads since the initial 
-       thread stack range can be reduced by sibling thread creation so pthread_attr_getstack 
-       may cause thread gtid aliasing */ 
-    if ( ! KMP_UBER_GTID(gtid) ) { 
- 
-        /* Fetch the real thread attributes */ 
-        status = pthread_attr_init( &attr ); 
-        KMP_CHECK_SYSFAIL( "pthread_attr_init", status ); 
-#if KMP_OS_FREEBSD || KMP_OS_NETBSD 
-        status = pthread_attr_get_np( pthread_self(), &attr ); 
-        KMP_CHECK_SYSFAIL( "pthread_attr_get_np", status ); 
-#else 
-        status = pthread_getattr_np( pthread_self(), &attr ); 
-        KMP_CHECK_SYSFAIL( "pthread_getattr_np", status ); 
-#endif 
-        status = pthread_attr_getstack( &attr, &addr, &size ); 
-        KMP_CHECK_SYSFAIL( "pthread_attr_getstack", status ); 
-        KA_TRACE( 60, ( "__kmp_set_stack_info: T#%d pthread_attr_getstack returned size: %lu, " 
-                        "low addr: %p\n", 
-                        gtid, size, addr )); 
- 
-        status = pthread_attr_destroy( &attr ); 
-        KMP_CHECK_SYSFAIL( "pthread_attr_destroy", status ); 
-    } 
- 
-    if ( size != 0 && addr != 0 ) {     /* was stack parameter determination successful? */ 
-        /* Store the correct base and size */ 
-        TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size)); 
-        TCW_PTR(th->th.th_info.ds.ds_stacksize, size); 
-        TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); 
-        return TRUE; 
-    } 
-#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */ 
-    /* Use incremental refinement starting from initial conservative estimate */ 
-    TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); 
-    TCW_PTR(th -> th.th_info.ds.ds_stackbase, &stack_data); 
-    TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); 
-    return FALSE; 
-} 
- 
-static void* 
-__kmp_launch_worker( void *thr ) 
-{ 
-    int status, old_type, old_state; 
-#ifdef KMP_BLOCK_SIGNALS 
-    sigset_t    new_set, old_set; 
-#endif /* KMP_BLOCK_SIGNALS */ 
-    void *exit_val; 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD 
-    void * volatile padding = 0; 
-#endif 
-    int gtid; 
- 
-    gtid = ((kmp_info_t*)thr) -> th.th_info.ds.ds_gtid; 
-    __kmp_gtid_set_specific( gtid ); 
-#ifdef KMP_TDATA_GTID 
-    __kmp_gtid = gtid; 
-#endif 
-#if KMP_STATS_ENABLED 
-    // set __thread local index to point to thread-specific stats 
-    __kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats; 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_thread_name( gtid ); 
-#endif /* USE_ITT_BUILD */ 
- 
-#if KMP_AFFINITY_SUPPORTED 
-    __kmp_affinity_set_init_mask( gtid, FALSE ); 
-#endif 
- 
-#ifdef KMP_CANCEL_THREADS 
-    status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); 
-    KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); 
-    /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ 
-    status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); 
-    KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); 
-#endif 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    // 
-    // Set the FP control regs to be a copy of 
-    // the parallel initialization thread's. 
-    // 
-    __kmp_clear_x87_fpu_status_word(); 
-    __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); 
-    __kmp_load_mxcsr( &__kmp_init_mxcsr ); 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-#ifdef KMP_BLOCK_SIGNALS 
-    status = sigfillset( & new_set ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); 
-    status = pthread_sigmask( SIG_BLOCK, & new_set, & old_set ); 
-    KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); 
-#endif /* KMP_BLOCK_SIGNALS */ 
- 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD 
-    if ( __kmp_stkoffset > 0 && gtid > 0 ) { 
-        padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); 
-    } 
-#endif 
- 
-    KMP_MB(); 
-    __kmp_set_stack_info( gtid, (kmp_info_t*)thr ); 
- 
-    __kmp_check_stack_overlap( (kmp_info_t*)thr ); 
- 
-    exit_val = __kmp_launch_thread( (kmp_info_t *) thr ); 
- 
-#ifdef KMP_BLOCK_SIGNALS 
-    status = pthread_sigmask( SIG_SETMASK, & old_set, NULL ); 
-    KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); 
-#endif /* KMP_BLOCK_SIGNALS */ 
- 
-    return exit_val; 
-} 
- 
- 
-/* The monitor thread controls all of the threads in the complex */ 
- 
-static void* 
-__kmp_launch_monitor( void *thr ) 
-{ 
-    int         status, old_type, old_state; 
-#ifdef KMP_BLOCK_SIGNALS 
-    sigset_t    new_set; 
-#endif /* KMP_BLOCK_SIGNALS */ 
-    struct timespec  interval; 
-    int yield_count; 
-    int yield_cycles = 0; 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 10, ("__kmp_launch_monitor: #1 launched\n" ) ); 
- 
-    /* register us as the monitor thread */ 
-    __kmp_gtid_set_specific( KMP_GTID_MONITOR ); 
-#ifdef KMP_TDATA_GTID 
-    __kmp_gtid = KMP_GTID_MONITOR; 
-#endif 
- 
-    KMP_MB(); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_thread_ignore();    // Instruct Intel(R) Threading Tools to ignore monitor thread. 
-#endif /* USE_ITT_BUILD */ 
- 
-    __kmp_set_stack_info( ((kmp_info_t*)thr)->th.th_info.ds.ds_gtid, (kmp_info_t*)thr ); 
- 
-    __kmp_check_stack_overlap( (kmp_info_t*)thr ); 
- 
-#ifdef KMP_CANCEL_THREADS 
-    status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); 
-    KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); 
-    /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ 
-    status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); 
-    KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); 
-#endif 
- 
-    #if KMP_REAL_TIME_FIX 
-    // This is a potential fix which allows application with real-time scheduling policy work. 
-    // However, decision about the fix is not made yet, so it is disabled by default. 
-    { // Are program started with real-time scheduling policy? 
-        int sched = sched_getscheduler( 0 ); 
-        if ( sched == SCHED_FIFO || sched == SCHED_RR ) { 
-            // Yes, we are a part of real-time application. Try to increase the priority of the 
-            // monitor. 
-            struct sched_param param; 
-            int    max_priority = sched_get_priority_max( sched ); 
-            int    rc; 
-            KMP_WARNING( RealTimeSchedNotSupported ); 
-            sched_getparam( 0, & param ); 
-            if ( param.sched_priority < max_priority ) { 
-                param.sched_priority += 1; 
-                rc = sched_setscheduler( 0, sched, & param ); 
-                if ( rc != 0 ) { 
-                    int error = errno; 
-                  __kmp_msg( 
-                      kmp_ms_warning, 
-                      KMP_MSG( CantChangeMonitorPriority ), 
-                      KMP_ERR( error ), 
-                      KMP_MSG( MonitorWillStarve ), 
-                      __kmp_msg_null 
-                  ); 
-                }; // if 
-            } else { 
-                // We cannot abort here, because number of CPUs may be enough for all the threads, 
-                // including the monitor thread, so application could potentially work... 
-                __kmp_msg( 
-                    kmp_ms_warning, 
-                    KMP_MSG( RunningAtMaxPriority ), 
-                    KMP_MSG( MonitorWillStarve ), 
-                    KMP_HNT( RunningAtMaxPriority ), 
-                    __kmp_msg_null 
-                ); 
-            }; // if 
-        }; // if 
-        TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );  // AC: free thread that waits for monitor started 
-    } 
-    #endif // KMP_REAL_TIME_FIX 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    if ( __kmp_monitor_wakeups == 1 ) { 
-        interval.tv_sec  = 1; 
-        interval.tv_nsec = 0; 
-    } else { 
-        interval.tv_sec  = 0; 
-        interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups); 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_launch_monitor: #2 monitor\n" ) ); 
- 
-    if (__kmp_yield_cycle) { 
-        __kmp_yielding_on = 0;  /* Start out with yielding shut off */ 
-        yield_count = __kmp_yield_off_count; 
-    } else { 
-        __kmp_yielding_on = 1;  /* Yielding is on permanently */ 
-    } 
- 
-    while( ! TCR_4( __kmp_global.g.g_done ) ) { 
-        struct timespec  now; 
-        struct timeval   tval; 
- 
-        /*  This thread monitors the state of the system */ 
- 
-        KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); 
- 
-        status = gettimeofday( &tval, NULL ); 
-        KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); 
-        TIMEVAL_TO_TIMESPEC( &tval, &now ); 
- 
-        now.tv_sec  += interval.tv_sec; 
-        now.tv_nsec += interval.tv_nsec; 
- 
-        if (now.tv_nsec >= KMP_NSEC_PER_SEC) { 
-            now.tv_sec  += 1; 
-            now.tv_nsec -= KMP_NSEC_PER_SEC; 
-        } 
- 
-        status = pthread_mutex_lock( & __kmp_wait_mx.m_mutex ); 
-        KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); 
-        // AC: the monitor should not fall asleep if g_done has been set 
-        if ( !TCR_4(__kmp_global.g.g_done) ) {  // check once more under mutex 
-            status = pthread_cond_timedwait( &__kmp_wait_cv.c_cond, &__kmp_wait_mx.m_mutex, &now ); 
-            if ( status != 0 ) { 
-                if ( status != ETIMEDOUT && status != EINTR ) { 
-                    KMP_SYSFAIL( "pthread_cond_timedwait", status ); 
-                }; 
-            }; 
-        }; 
-        status = pthread_mutex_unlock( & __kmp_wait_mx.m_mutex ); 
-        KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); 
- 
-        if (__kmp_yield_cycle) { 
-            yield_cycles++; 
-            if ( (yield_cycles % yield_count) == 0 ) { 
-                if (__kmp_yielding_on) { 
-                    __kmp_yielding_on = 0;   /* Turn it off now */ 
-                    yield_count = __kmp_yield_off_count; 
-                } else { 
-                    __kmp_yielding_on = 1;   /* Turn it on now */ 
-                    yield_count = __kmp_yield_on_count; 
-                } 
-                yield_cycles = 0; 
-            } 
-        } else { 
-            __kmp_yielding_on = 1; 
-        } 
- 
-        TCW_4( __kmp_global.g.g_time.dt.t_value, 
-          TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_launch_monitor: #3 cleanup\n" ) ); 
- 
-#ifdef KMP_BLOCK_SIGNALS 
-    status = sigfillset( & new_set ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); 
-    status = pthread_sigmask( SIG_UNBLOCK, & new_set, NULL ); 
-    KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); 
-#endif /* KMP_BLOCK_SIGNALS */ 
- 
-    KA_TRACE( 10, ("__kmp_launch_monitor: #4 finished\n" ) ); 
- 
-    if( __kmp_global.g.g_abort != 0 ) { 
-        /* now we need to terminate the worker threads  */ 
-        /* the value of t_abort is the signal we caught */ 
- 
-        int gtid; 
- 
-        KA_TRACE( 10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", __kmp_global.g.g_abort ) ); 
- 
-        /* terminate the OpenMP worker threads */ 
-        /* TODO this is not valid for sibling threads!! 
-         * the uber master might not be 0 anymore.. */ 
-        for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) 
-            __kmp_terminate_thread( gtid ); 
- 
-        __kmp_cleanup(); 
- 
-        KA_TRACE( 10, ("__kmp_launch_monitor: #6 raise sig=%d\n", __kmp_global.g.g_abort ) ); 
- 
-        if (__kmp_global.g.g_abort > 0) 
-            raise( __kmp_global.g.g_abort ); 
- 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_launch_monitor: #7 exit\n" ) ); 
- 
-    return thr; 
-} 
- 
-void 
-__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) 
-{ 
-    pthread_t      handle; 
-    pthread_attr_t thread_attr; 
-    int            status; 
- 
- 
-    th->th.th_info.ds.ds_gtid = gtid; 
- 
-#if KMP_STATS_ENABLED 
-    // sets up worker thread stats 
-    __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid); 
- 
-    // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker 
-    // So when thread is created (goes into __kmp_launch_worker) it will 
-    // set it's __thread local pointer to th->th.th_stats 
-    th->th.th_stats = __kmp_stats_list.push_back(gtid); 
-    if(KMP_UBER_GTID(gtid)) { 
-        __kmp_stats_start_time = tsc_tick_count::now(); 
-        __kmp_stats_thread_ptr = th->th.th_stats; 
-        __kmp_stats_init(); 
-        KMP_START_EXPLICIT_TIMER(OMP_serial); 
-        KMP_START_EXPLICIT_TIMER(OMP_start_end); 
-    } 
-    __kmp_release_tas_lock(&__kmp_stats_lock, gtid); 
- 
-#endif // KMP_STATS_ENABLED 
- 
-    if ( KMP_UBER_GTID(gtid) ) { 
-        KA_TRACE( 10, ("__kmp_create_worker: uber thread (%d)\n", gtid ) ); 
-        th -> th.th_info.ds.ds_thread = pthread_self(); 
-        __kmp_set_stack_info( gtid, th ); 
-        __kmp_check_stack_overlap( th ); 
-        return; 
-    }; // if 
- 
-    KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-#ifdef KMP_THREAD_ATTR 
-        { 
-            status = pthread_attr_init( &thread_attr ); 
-            if ( status != 0 ) { 
-                __kmp_msg( 
-                          kmp_ms_fatal, 
-                          KMP_MSG( CantInitThreadAttrs ), 
-                          KMP_ERR( status ), 
-                          __kmp_msg_null 
-                          ); 
-            }; // if 
-            status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); 
-            if ( status != 0 ) { 
-                __kmp_msg( 
-                          kmp_ms_fatal, 
-                          KMP_MSG( CantSetWorkerState ), 
-                          KMP_ERR( status ), 
-                          __kmp_msg_null 
-                          ); 
-            }; // if 
- 
-            /* Set stack size for this thread now.  
-             * The multiple of 2 is there because on some machines, requesting an unusual stacksize 
-             * causes the thread to have an offset before the dummy alloca() takes place to create the 
-             * offset.  Since we want the user to have a sufficient stacksize AND support a stack offset, we  
-             * alloca() twice the offset so that the upcoming alloca() does not eliminate any premade 
-             * offset, and also gives the user the stack space they requested for all threads */ 
-            stack_size += gtid * __kmp_stkoffset * 2; 
- 
-            KA_TRACE( 10, ( "__kmp_create_worker: T#%d, default stacksize = %lu bytes, " 
-                            "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", 
-                            gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) ); 
- 
-# ifdef _POSIX_THREAD_ATTR_STACKSIZE 
-                status = pthread_attr_setstacksize( & thread_attr, stack_size ); 
-#  ifdef KMP_BACKUP_STKSIZE 
-            if ( status != 0 ) { 
-                if ( ! __kmp_env_stksize ) { 
-                    stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset; 
-                    __kmp_stksize = KMP_BACKUP_STKSIZE; 
-                    KA_TRACE( 10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " 
-                                   "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu " 
-                                   "bytes\n", 
-                                   gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) 
-                              ); 
-                    status = pthread_attr_setstacksize( &thread_attr, stack_size ); 
-                }; // if 
-            }; // if 
-#  endif /* KMP_BACKUP_STKSIZE */ 
-            if ( status != 0 ) { 
-                __kmp_msg( 
-                          kmp_ms_fatal, 
-                          KMP_MSG( CantSetWorkerStackSize, stack_size ), 
-                          KMP_ERR( status ), 
-                          KMP_HNT( ChangeWorkerStackSize  ), 
-                          __kmp_msg_null 
-                          ); 
-            }; // if 
-# endif /* _POSIX_THREAD_ATTR_STACKSIZE */ 
-        } 
-#endif /* KMP_THREAD_ATTR */ 
- 
-        { 
-            status = pthread_create( & handle, & thread_attr, __kmp_launch_worker, (void *) th ); 
-            if ( status != 0 || ! handle ) { // ??? Why do we check handle?? 
-#ifdef _POSIX_THREAD_ATTR_STACKSIZE 
-                if ( status == EINVAL ) { 
-                    __kmp_msg( 
-                              kmp_ms_fatal, 
-                              KMP_MSG( CantSetWorkerStackSize, stack_size ), 
-                              KMP_ERR( status ), 
-                              KMP_HNT( IncreaseWorkerStackSize ), 
-                              __kmp_msg_null 
-                              ); 
-                }; 
-                if ( status == ENOMEM ) { 
-                    __kmp_msg( 
-                              kmp_ms_fatal, 
-                              KMP_MSG( CantSetWorkerStackSize, stack_size ), 
-                              KMP_ERR( status ), 
-                              KMP_HNT( DecreaseWorkerStackSize ), 
-                              __kmp_msg_null 
-                              ); 
-                }; 
-#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ 
-                if ( status == EAGAIN ) { 
-                    __kmp_msg( 
-                              kmp_ms_fatal, 
-                              KMP_MSG( NoResourcesForWorkerThread ), 
-                              KMP_ERR( status ), 
-                              KMP_HNT( Decrease_NUM_THREADS ), 
-                              __kmp_msg_null 
-                              ); 
-                }; // if 
-                KMP_SYSFAIL( "pthread_create", status ); 
-            }; // if 
- 
-            th->th.th_info.ds.ds_thread = handle; 
-        } 
- 
-#ifdef KMP_THREAD_ATTR 
-        { 
-            status = pthread_attr_destroy( & thread_attr ); 
-            if ( status ) { 
-                __kmp_msg( 
-                          kmp_ms_warning, 
-                          KMP_MSG( CantDestroyThreadAttrs ), 
-                          KMP_ERR( status ), 
-                          __kmp_msg_null 
-                          ); 
-            }; // if 
-        } 
-#endif /* KMP_THREAD_ATTR */ 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); 
- 
-} // __kmp_create_worker 
- 
- 
-void 
-__kmp_create_monitor( kmp_info_t *th ) 
-{ 
-    pthread_t           handle; 
-    pthread_attr_t      thread_attr; 
-    size_t              size; 
-    int                 status; 
-    int                 auto_adj_size = FALSE; 
- 
-    KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    th->th.th_info.ds.ds_tid  = KMP_GTID_MONITOR; 
-    th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; 
-    #if KMP_REAL_TIME_FIX 
-        TCW_4( __kmp_global.g.g_time.dt.t_value, -1 ); // Will use it for synchronization a bit later. 
-    #else 
-        TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); 
-    #endif // KMP_REAL_TIME_FIX 
- 
-    #ifdef KMP_THREAD_ATTR 
-        if ( __kmp_monitor_stksize == 0 ) { 
-            __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; 
-            auto_adj_size = TRUE; 
-        } 
-        status = pthread_attr_init( &thread_attr ); 
-        if ( status != 0 ) { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantInitThreadAttrs ), 
-                KMP_ERR( status ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-        status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); 
-        if ( status != 0 ) { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( CantSetMonitorState ), 
-                KMP_ERR( status ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
- 
-        #ifdef _POSIX_THREAD_ATTR_STACKSIZE 
-            status = pthread_attr_getstacksize( & thread_attr, & size ); 
-            KMP_CHECK_SYSFAIL( "pthread_attr_getstacksize", status ); 
-        #else 
-            size = __kmp_sys_min_stksize; 
-        #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ 
-    #endif /* KMP_THREAD_ATTR */ 
- 
-    if ( __kmp_monitor_stksize == 0 ) { 
-        __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; 
-    } 
-    if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { 
-        __kmp_monitor_stksize = __kmp_sys_min_stksize; 
-    } 
- 
-    KA_TRACE( 10, ( "__kmp_create_monitor: default stacksize = %lu bytes," 
-                    "requested stacksize = %lu bytes\n", 
-                    size, __kmp_monitor_stksize ) ); 
- 
-    retry: 
- 
-    /* Set stack size for this thread now. */ 
- 
-    #ifdef _POSIX_THREAD_ATTR_STACKSIZE 
-        KA_TRACE( 10, ( "__kmp_create_monitor: setting stacksize = %lu bytes,", 
-                        __kmp_monitor_stksize ) ); 
-        status = pthread_attr_setstacksize( & thread_attr, __kmp_monitor_stksize ); 
-        if ( status != 0 ) { 
-            if ( auto_adj_size ) { 
-                __kmp_monitor_stksize *= 2; 
-                goto retry; 
-            } 
-            __kmp_msg( 
-                kmp_ms_warning,  // should this be fatal?  BB 
-                KMP_MSG( CantSetMonitorStackSize, (long int) __kmp_monitor_stksize ), 
-                KMP_ERR( status ), 
-                KMP_HNT( ChangeMonitorStackSize ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-    #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ 
- 
-    status = pthread_create( &handle, & thread_attr, __kmp_launch_monitor, (void *) th ); 
- 
-    if ( status != 0 ) { 
-        #ifdef _POSIX_THREAD_ATTR_STACKSIZE 
-            if ( status == EINVAL ) { 
-                if ( auto_adj_size  && ( __kmp_monitor_stksize < (size_t)0x40000000 ) ) { 
-                    __kmp_monitor_stksize *= 2; 
-                    goto retry; 
-                } 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), 
-                    KMP_ERR( status ), 
-                    KMP_HNT( IncreaseMonitorStackSize ), 
-                    __kmp_msg_null 
-                ); 
-            }; // if 
-            if ( status == ENOMEM ) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), 
-                    KMP_ERR( status ), 
-                    KMP_HNT( DecreaseMonitorStackSize ), 
-                    __kmp_msg_null 
-                ); 
-            }; // if 
-        #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ 
-        if ( status == EAGAIN ) { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( NoResourcesForMonitorThread ), 
-                KMP_ERR( status ), 
-                KMP_HNT( DecreaseNumberOfThreadsInUse ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-        KMP_SYSFAIL( "pthread_create", status ); 
-    }; // if 
- 
-    th->th.th_info.ds.ds_thread = handle; 
- 
-    #if KMP_REAL_TIME_FIX 
-        // Wait for the monitor thread is really started and set its *priority*. 
-        KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == sizeof( __kmp_global.g.g_time.dt.t_value ) ); 
-        __kmp_wait_yield_4( 
-            (kmp_uint32 volatile *) & __kmp_global.g.g_time.dt.t_value, -1, & __kmp_neq_4, NULL 
-        ); 
-    #endif // KMP_REAL_TIME_FIX 
- 
-    #ifdef KMP_THREAD_ATTR 
-        status = pthread_attr_destroy( & thread_attr ); 
-        if ( status != 0 ) { 
-            __kmp_msg(    // 
-                kmp_ms_warning, 
-                KMP_MSG( CantDestroyThreadAttrs ), 
-                KMP_ERR( status ), 
-                __kmp_msg_null 
-            ); 
-        }; // if 
-    #endif 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 10, ( "__kmp_create_monitor: monitor created %#.8lx\n", th->th.th_info.ds.ds_thread ) ); 
- 
-} // __kmp_create_monitor 
- 
-void 
-__kmp_exit_thread( 
-    int exit_status 
-) { 
-    pthread_exit( (void *)(intptr_t) exit_status ); 
-} // __kmp_exit_thread 
- 
-void __kmp_resume_monitor(); 
- 
-void 
-__kmp_reap_monitor( kmp_info_t *th ) 
-{ 
-    int          status; 
-    void        *exit_val; 
- 
-    KA_TRACE( 10, ("__kmp_reap_monitor: try to reap monitor thread with handle %#.8lx\n", 
-                   th->th.th_info.ds.ds_thread ) ); 
- 
-    // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. 
-    // If both tid and gtid are 0, it means the monitor did not ever start. 
-    // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. 
-    KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); 
-    if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { 
-        return; 
-    }; // if 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
- 
-    /* First, check to see whether the monitor thread exists.  This could prevent a hang, 
-       but if the monitor dies after the pthread_kill call and before the pthread_join 
-       call, it will still hang. */ 
- 
-    status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); 
-    if (status == ESRCH) { 
- 
-        KA_TRACE( 10, ("__kmp_reap_monitor: monitor does not exist, returning\n") ); 
- 
-    } else 
-    { 
-        __kmp_resume_monitor();   // Wake up the monitor thread 
-        status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); 
-        if (exit_val != th) { 
-            __kmp_msg( 
-                kmp_ms_fatal, 
-                KMP_MSG( ReapMonitorError ), 
-                KMP_ERR( status ), 
-                __kmp_msg_null 
-            ); 
-        } 
-    } 
- 
-    th->th.th_info.ds.ds_tid  = KMP_GTID_DNE; 
-    th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; 
- 
-    KA_TRACE( 10, ("__kmp_reap_monitor: done reaping monitor thread with handle %#.8lx\n", 
-                   th->th.th_info.ds.ds_thread ) ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-} 
- 
-void 
-__kmp_reap_worker( kmp_info_t *th ) 
-{ 
-    int          status; 
-    void        *exit_val; 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid ) ); 
- 
-    /* First, check to see whether the worker thread exists.  This could prevent a hang, 
-       but if the worker dies after the pthread_kill call and before the pthread_join 
-       call, it will still hang. */ 
- 
-        { 
-            status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); 
-            if (status == ESRCH) { 
-                KA_TRACE( 10, ("__kmp_reap_worker: worker T#%d does not exist, returning\n", 
-                               th->th.th_info.ds.ds_gtid ) ); 
-            } 
-            else { 
-                KA_TRACE( 10, ("__kmp_reap_worker: try to join with worker T#%d\n", 
-                               th->th.th_info.ds.ds_gtid ) ); 
- 
-                status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); 
-#ifdef KMP_DEBUG 
-                /* Don't expose these to the user until we understand when they trigger */ 
-                if ( status != 0 ) { 
-                    __kmp_msg( 
-                              kmp_ms_fatal, 
-                              KMP_MSG( ReapWorkerError ), 
-                              KMP_ERR( status ), 
-                              __kmp_msg_null 
-                              ); 
-                } 
-                if ( exit_val != th ) { 
-                    KA_TRACE( 10, ( "__kmp_reap_worker: worker T#%d did not reap properly, " 
-                                    "exit_val = %p\n", 
-                                    th->th.th_info.ds.ds_gtid, exit_val ) ); 
-                } 
-#endif /* KMP_DEBUG */ 
-            } 
-        } 
- 
-    KA_TRACE( 10, ("__kmp_reap_worker: done reaping T#%d\n", th->th.th_info.ds.ds_gtid ) ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_HANDLE_SIGNALS 
- 
- 
-static void 
-__kmp_null_handler( int signo ) 
-{ 
-    //  Do nothing, for doing SIG_IGN-type actions. 
-} // __kmp_null_handler 
- 
- 
-static void 
-__kmp_team_handler( int signo ) 
-{ 
-    if ( __kmp_global.g.g_abort == 0 ) { 
-        /* Stage 1 signal handler, let's shut down all of the threads */ 
-        #ifdef KMP_DEBUG 
-            __kmp_debug_printf( "__kmp_team_handler: caught signal = %d\n", signo ); 
-        #endif 
-        switch ( signo ) { 
-            case SIGHUP  : 
-            case SIGINT  : 
-            case SIGQUIT : 
-            case SIGILL  : 
-            case SIGABRT : 
-            case SIGFPE  : 
-            case SIGBUS  : 
-            case SIGSEGV : 
-            #ifdef SIGSYS 
-                case SIGSYS : 
-            #endif 
-            case SIGTERM : 
-                if ( __kmp_debug_buf ) { 
-                    __kmp_dump_debug_buffer( ); 
-                }; // if 
-                KMP_MB();       // Flush all pending memory write invalidates. 
-                TCW_4( __kmp_global.g.g_abort, signo ); 
-                KMP_MB();       // Flush all pending memory write invalidates. 
-                TCW_4( __kmp_global.g.g_done, TRUE ); 
-                KMP_MB();       // Flush all pending memory write invalidates. 
-                break; 
-            default: 
-                #ifdef KMP_DEBUG 
-                    __kmp_debug_printf( "__kmp_team_handler: unknown signal type" ); 
-                #endif 
-                break; 
-        }; // switch 
-    }; // if 
-} // __kmp_team_handler 
- 
- 
-static 
-void __kmp_sigaction( int signum, const struct sigaction * act, struct sigaction * oldact ) { 
-    int rc = sigaction( signum, act, oldact ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "sigaction", rc ); 
-} 
- 
- 
-static void 
-__kmp_install_one_handler( int sig, sig_func_t handler_func, int parallel_init ) 
-{ 
-    KMP_MB();       // Flush all pending memory write invalidates. 
-    KB_TRACE( 60, ( "__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init ) ); 
-    if ( parallel_init ) { 
-        struct sigaction new_action; 
-        struct sigaction old_action; 
-        new_action.sa_handler = handler_func; 
-        new_action.sa_flags   = 0; 
-        sigfillset( & new_action.sa_mask ); 
-        __kmp_sigaction( sig, & new_action, & old_action ); 
-        if ( old_action.sa_handler == __kmp_sighldrs[ sig ].sa_handler ) { 
-            sigaddset( & __kmp_sigset, sig ); 
-        } else { 
-            // Restore/keep user's handler if one previously installed. 
-            __kmp_sigaction( sig, & old_action, NULL ); 
-        }; // if 
-    } else { 
-        // Save initial/system signal handlers to see if user handlers installed. 
-        __kmp_sigaction( sig, NULL, & __kmp_sighldrs[ sig ] ); 
-    }; // if 
-    KMP_MB();       // Flush all pending memory write invalidates. 
-} // __kmp_install_one_handler 
- 
- 
-static void 
-__kmp_remove_one_handler( int sig ) 
-{ 
-    KB_TRACE( 60, ( "__kmp_remove_one_handler( %d )\n", sig ) ); 
-    if ( sigismember( & __kmp_sigset, sig ) ) { 
-        struct sigaction old; 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-        __kmp_sigaction( sig, & __kmp_sighldrs[ sig ], & old ); 
-        if ( ( old.sa_handler != __kmp_team_handler ) && ( old.sa_handler != __kmp_null_handler ) ) { 
-            // Restore the users signal handler. 
-            KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); 
-            __kmp_sigaction( sig, & old, NULL ); 
-        }; // if 
-        sigdelset( & __kmp_sigset, sig ); 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-    }; // if 
-} // __kmp_remove_one_handler 
- 
- 
-void 
-__kmp_install_signals( int parallel_init ) 
-{ 
-    KB_TRACE( 10, ( "__kmp_install_signals( %d )\n", parallel_init ) ); 
-    if ( __kmp_handle_signals || ! parallel_init ) { 
-        // If ! parallel_init, we do not install handlers, just save original handlers. 
-        // Let us do it even __handle_signals is 0. 
-        sigemptyset( & __kmp_sigset ); 
-        __kmp_install_one_handler( SIGHUP,  __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGINT,  __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGQUIT, __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGILL,  __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGFPE,  __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGBUS,  __kmp_team_handler, parallel_init ); 
-        __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); 
-        #ifdef SIGSYS 
-            __kmp_install_one_handler( SIGSYS,  __kmp_team_handler, parallel_init ); 
-        #endif // SIGSYS 
-        __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); 
-        #ifdef SIGPIPE 
-            __kmp_install_one_handler( SIGPIPE, __kmp_team_handler, parallel_init ); 
-        #endif // SIGPIPE 
-    }; // if 
-} // __kmp_install_signals 
- 
- 
-void 
-__kmp_remove_signals( void ) 
-{ 
-    int    sig; 
-    KB_TRACE( 10, ( "__kmp_remove_signals()\n" ) ); 
-    for ( sig = 1; sig < NSIG; ++ sig ) { 
-        __kmp_remove_one_handler( sig ); 
-    }; // for sig 
-} // __kmp_remove_signals 
- 
- 
-#endif // KMP_HANDLE_SIGNALS 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_enable( int new_state ) 
-{ 
-    #ifdef KMP_CANCEL_THREADS 
-        int status, old_state; 
-        status = pthread_setcancelstate( new_state, & old_state ); 
-        KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); 
-        KMP_DEBUG_ASSERT( old_state == PTHREAD_CANCEL_DISABLE ); 
-    #endif 
-} 
- 
-void 
-__kmp_disable( int * old_state ) 
-{ 
-    #ifdef KMP_CANCEL_THREADS 
-        int status; 
-        status = pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, old_state ); 
-        KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); 
-    #endif 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-static void 
-__kmp_atfork_prepare (void) 
-{ 
-    /*  nothing to do  */ 
-} 
- 
-static void 
-__kmp_atfork_parent (void) 
-{ 
-    /*  nothing to do  */ 
-} 
- 
-/* 
-    Reset the library so execution in the child starts "all over again" with 
-    clean data structures in initial states.  Don't worry about freeing memory 
-    allocated by parent, just abandon it to be safe. 
-*/ 
-static void 
-__kmp_atfork_child (void) 
-{ 
-    /* TODO make sure this is done right for nested/sibling */ 
-    // ATT:  Memory leaks are here? TODO: Check it and fix. 
-    /* KMP_ASSERT( 0 ); */ 
- 
-    ++__kmp_fork_count; 
- 
-    __kmp_init_runtime = FALSE; 
-    __kmp_init_monitor = 0; 
-    __kmp_init_parallel = FALSE; 
-    __kmp_init_middle = FALSE; 
-    __kmp_init_serial = FALSE; 
-    TCW_4(__kmp_init_gtid, FALSE); 
-    __kmp_init_common = FALSE; 
- 
-    TCW_4(__kmp_init_user_locks, FALSE); 
-#if ! KMP_USE_DYNAMIC_LOCK 
-    __kmp_user_lock_table.used = 1; 
-    __kmp_user_lock_table.allocated = 0; 
-    __kmp_user_lock_table.table = NULL; 
-    __kmp_lock_blocks = NULL; 
-#endif 
- 
-    __kmp_all_nth = 0; 
-    TCW_4(__kmp_nth, 0); 
- 
-    /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate here 
-       so threadprivate doesn't use stale data */ 
-    KA_TRACE( 10, ( "__kmp_atfork_child: checking cache address list %p\n", 
-                 __kmp_threadpriv_cache_list ) ); 
- 
-    while ( __kmp_threadpriv_cache_list != NULL ) { 
- 
-        if ( *__kmp_threadpriv_cache_list -> addr != NULL ) { 
-            KC_TRACE( 50, ( "__kmp_atfork_child: zeroing cache at address %p\n", 
-                        &(*__kmp_threadpriv_cache_list -> addr) ) ); 
- 
-            *__kmp_threadpriv_cache_list -> addr = NULL; 
-        } 
-        __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list -> next; 
-    } 
- 
-    __kmp_init_runtime = FALSE; 
- 
-    /* reset statically initialized locks */ 
-    __kmp_init_bootstrap_lock( &__kmp_initz_lock ); 
-    __kmp_init_bootstrap_lock( &__kmp_stdio_lock ); 
-    __kmp_init_bootstrap_lock( &__kmp_console_lock ); 
- 
-    /* This is necessary to make sure no stale data is left around */ 
-    /* AC: customers complain that we use unsafe routines in the atfork 
-       handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen 
-       in dynamic_link when check the presence of shared tbbmalloc library. 
-       Suggestion is to make the library initialization lazier, similar 
-       to what done for __kmpc_begin(). */ 
-    // TODO: synchronize all static initializations with regular library 
-    //       startup; look at kmp_global.c and etc. 
-    //__kmp_internal_begin (); 
- 
-} 
- 
-void 
-__kmp_register_atfork(void) { 
-    if ( __kmp_need_register_atfork ) { 
-        int status = pthread_atfork( __kmp_atfork_prepare, __kmp_atfork_parent, __kmp_atfork_child ); 
-        KMP_CHECK_SYSFAIL( "pthread_atfork", status ); 
-        __kmp_need_register_atfork = FALSE; 
-    } 
-} 
- 
-void 
-__kmp_suspend_initialize( void ) 
-{ 
-    int status; 
-    status = pthread_mutexattr_init( &__kmp_suspend_mutex_attr ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); 
-    status = pthread_condattr_init( &__kmp_suspend_cond_attr ); 
-    KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); 
-} 
- 
-static void 
-__kmp_suspend_initialize_thread( kmp_info_t *th ) 
-{ 
-    if ( th->th.th_suspend_init_count <= __kmp_fork_count ) { 
-        /* this means we haven't initialized the suspension pthread objects for this thread 
-           in this instance of the process */ 
-        int     status; 
-        status = pthread_cond_init( &th->th.th_suspend_cv.c_cond, &__kmp_suspend_cond_attr ); 
-        KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); 
-        status = pthread_mutex_init( &th->th.th_suspend_mx.m_mutex, & __kmp_suspend_mutex_attr ); 
-        KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); 
-        *(volatile int*)&th->th.th_suspend_init_count = __kmp_fork_count + 1; 
-    }; 
-} 
- 
-void 
-__kmp_suspend_uninitialize_thread( kmp_info_t *th ) 
-{ 
-    if(th->th.th_suspend_init_count > __kmp_fork_count) { 
-        /* this means we have initialize the suspension pthread objects for this thread 
-           in this instance of the process */ 
-        int status; 
- 
-        status = pthread_cond_destroy( &th->th.th_suspend_cv.c_cond ); 
-        if ( status != 0 && status != EBUSY ) { 
-            KMP_SYSFAIL( "pthread_cond_destroy", status ); 
-        }; 
-        status = pthread_mutex_destroy( &th->th.th_suspend_mx.m_mutex ); 
-        if ( status != 0 && status != EBUSY ) { 
-            KMP_SYSFAIL( "pthread_mutex_destroy", status ); 
-        }; 
-        --th->th.th_suspend_init_count; 
-        KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count); 
-    } 
-} 
- 
-/* This routine puts the calling thread to sleep after setting the 
- * sleep bit for the indicated flag variable to true. 
- */ 
-template <class C> 
-static inline void __kmp_suspend_template( int th_gtid, C *flag ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(USER_suspend); 
-    kmp_info_t *th = __kmp_threads[th_gtid]; 
-    int status; 
-    typename C::flag_t old_spin; 
- 
-    KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, flag->get() ) ); 
- 
-    __kmp_suspend_initialize_thread( th ); 
- 
-    status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); 
- 
-    KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", 
-                    th_gtid, flag->get() ) ); 
- 
-    /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread 
-       gets called first? 
-    */ 
-    old_spin = flag->set_sleeping(); 
- 
-    KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x, was %x\n", 
-                   th_gtid, flag->get(), *(flag->get()), old_spin ) ); 
- 
-    if ( flag->done_check_val(old_spin) ) { 
-        old_spin = flag->unset_sleeping(); 
-        KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for spin(%p)\n", 
-                       th_gtid, flag->get()) ); 
-    } else { 
-        /* Encapsulate in a loop as the documentation states that this may 
-         * "with low probability" return when the condition variable has 
-         * not been signaled or broadcast 
-         */ 
-        int deactivated = FALSE; 
-        TCW_PTR(th->th.th_sleep_loc, (void *)flag); 
-        while ( flag->is_sleeping() ) { 
-#ifdef DEBUG_SUSPEND 
-            char buffer[128]; 
-            __kmp_suspend_count++; 
-            __kmp_print_cond( buffer, &th->th.th_suspend_cv ); 
-            __kmp_printf( "__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, buffer ); 
-#endif 
-            // Mark the thread as no longer active (only in the first iteration of the loop). 
-            if ( ! deactivated ) { 
-                th->th.th_active = FALSE; 
-                if ( th->th.th_active_in_pool ) { 
-                    th->th.th_active_in_pool = FALSE; 
-                    KMP_TEST_THEN_DEC32( 
-                      (kmp_int32 *) &__kmp_thread_pool_active_nth ); 
-                    KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); 
-                } 
-                deactivated = TRUE; 
- 
- 
-            } 
- 
-#if USE_SUSPEND_TIMEOUT 
-            struct timespec  now; 
-            struct timeval   tval; 
-            int msecs; 
- 
-            status = gettimeofday( &tval, NULL ); 
-            KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); 
-            TIMEVAL_TO_TIMESPEC( &tval, &now ); 
- 
-            msecs = (4*__kmp_dflt_blocktime) + 200; 
-            now.tv_sec  += msecs / 1000; 
-            now.tv_nsec += (msecs % 1000)*1000; 
- 
-            KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_timedwait\n", 
-                            th_gtid ) ); 
-            status = pthread_cond_timedwait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex, & now ); 
-#else 
-            KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_wait\n", 
-                            th_gtid ) ); 
-            status = pthread_cond_wait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex ); 
-#endif 
- 
-            if ( (status != 0) && (status != EINTR) && (status != ETIMEDOUT) ) { 
-                KMP_SYSFAIL( "pthread_cond_wait", status ); 
-            } 
-#ifdef KMP_DEBUG 
-            if (status == ETIMEDOUT) { 
-                if ( flag->is_sleeping() ) { 
-                    KF_TRACE( 100, ( "__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid ) ); 
-                } else { 
-                    KF_TRACE( 2, ( "__kmp_suspend_template: T#%d timeout wakeup, sleep bit not set!\n", 
-                                   th_gtid ) ); 
-                } 
-            } else if ( flag->is_sleeping() ) { 
-                KF_TRACE( 100, ( "__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ) ); 
-            } 
-#endif 
-        } // while 
- 
-        // Mark the thread as active again (if it was previous marked as inactive) 
-        if ( deactivated ) { 
-            th->th.th_active = TRUE; 
-            if ( TCR_4(th->th.th_in_pool) ) { 
-                KMP_TEST_THEN_INC32( (kmp_int32 *) &__kmp_thread_pool_active_nth ); 
-                th->th.th_active_in_pool = TRUE; 
-            } 
-        } 
-    } 
- 
-#ifdef DEBUG_SUSPEND 
-    { 
-        char buffer[128]; 
-        __kmp_print_cond( buffer, &th->th.th_suspend_cv); 
-        __kmp_printf( "__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, buffer ); 
-    } 
-#endif 
- 
- 
-    status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); 
- 
-    KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); 
-} 
- 
-void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { 
-    __kmp_suspend_template(th_gtid, flag); 
-} 
-void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { 
-    __kmp_suspend_template(th_gtid, flag); 
-} 
-void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { 
-    __kmp_suspend_template(th_gtid, flag); 
-} 
- 
- 
-/* This routine signals the thread specified by target_gtid to wake up 
- * after setting the sleep bit indicated by the flag argument to FALSE. 
- * The target thread must already have called __kmp_suspend_template() 
- */ 
-template <class C> 
-static inline void __kmp_resume_template( int target_gtid, C *flag ) 
-{ 
-    KMP_TIME_DEVELOPER_BLOCK(USER_resume); 
-    kmp_info_t *th = __kmp_threads[target_gtid]; 
-    int status; 
- 
-#ifdef KMP_DEBUG 
-    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 
-#endif 
- 
-    KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); 
-    KMP_DEBUG_ASSERT( gtid != target_gtid ); 
- 
-    __kmp_suspend_initialize_thread( th ); 
- 
-    status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); 
- 
-    if (!flag) { // coming from __kmp_null_resume_wrapper 
-        flag = (C *)th->th.th_sleep_loc; 
-    } 
- 
-    // First, check if the flag is null or its type has changed. If so, someone else woke it up. 
-    if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to 
-        KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p)\n", 
-                       gtid, target_gtid, NULL ) ); 
-        status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); 
-        KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); 
-        return; 
-    } 
-    else { // if multiple threads are sleeping, flag should be internally referring to a specific thread here 
-        typename C::flag_t old_spin = flag->unset_sleeping(); 
-        if ( ! flag->is_sleeping_val(old_spin) ) { 
-            KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p): " 
-                           "%u => %u\n", 
-                           gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); 
- 
-            status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); 
-            KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); 
-            return; 
-        } 
-        KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p): " 
-                       "%u => %u\n", 
-                       gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); 
-    } 
-    TCW_PTR(th->th.th_sleep_loc, NULL); 
- 
- 
-#ifdef DEBUG_SUSPEND 
-    { 
-        char buffer[128]; 
-        __kmp_print_cond( buffer, &th->th.th_suspend_cv ); 
-        __kmp_printf( "__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, target_gtid, buffer ); 
-    } 
-#endif 
- 
- 
-    status = pthread_cond_signal( &th->th.th_suspend_cv.c_cond ); 
-    KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); 
-    status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); 
-    KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", 
-                    gtid, target_gtid ) ); 
-} 
- 
-void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { 
-    __kmp_resume_template(target_gtid, flag); 
-} 
-void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { 
-    __kmp_resume_template(target_gtid, flag); 
-} 
-void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { 
-    __kmp_resume_template(target_gtid, flag); 
-} 
- 
-void 
-__kmp_resume_monitor() 
-{ 
-    int status; 
-#ifdef KMP_DEBUG 
-    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 
-    KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", 
-                    gtid, KMP_GTID_MONITOR ) ); 
-    KMP_DEBUG_ASSERT( gtid != KMP_GTID_MONITOR ); 
-#endif 
-    status = pthread_mutex_lock( &__kmp_wait_mx.m_mutex ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); 
-#ifdef DEBUG_SUSPEND 
-    { 
-        char buffer[128]; 
-        __kmp_print_cond( buffer, &__kmp_wait_cv.c_cond ); 
-        __kmp_printf( "__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, KMP_GTID_MONITOR, buffer ); 
-    } 
-#endif 
-    status = pthread_cond_signal( &__kmp_wait_cv.c_cond ); 
-    KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); 
-    status = pthread_mutex_unlock( &__kmp_wait_mx.m_mutex ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); 
-    KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d exiting after signaling wake up for T#%d\n", 
-                    gtid, KMP_GTID_MONITOR ) ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_yield( int cond ) 
-{ 
-    if (cond && __kmp_yielding_on) { 
-        sched_yield(); 
-    } 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_gtid_set_specific( int gtid ) 
-{ 
-    int status; 
-    KMP_ASSERT( __kmp_init_runtime ); 
-    status = pthread_setspecific( __kmp_gtid_threadprivate_key, (void*)(intptr_t)(gtid+1) ); 
-    KMP_CHECK_SYSFAIL( "pthread_setspecific", status ); 
-} 
- 
-int 
-__kmp_gtid_get_specific() 
-{ 
-    int gtid; 
-    if ( !__kmp_init_runtime ) { 
-        KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); 
-        return KMP_GTID_SHUTDOWN; 
-    } 
-    gtid = (int)(size_t)pthread_getspecific( __kmp_gtid_threadprivate_key ); 
-    if ( gtid == 0 ) { 
-        gtid = KMP_GTID_DNE; 
-    } 
-    else { 
-        gtid--; 
-    } 
-    KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", 
-               __kmp_gtid_threadprivate_key, gtid )); 
-    return gtid; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-double 
-__kmp_read_cpu_time( void ) 
-{ 
-    /*clock_t   t;*/ 
-    struct tms  buffer; 
- 
-    /*t =*/  times( & buffer ); 
- 
-    return (buffer.tms_utime + buffer.tms_cutime) / (double) CLOCKS_PER_SEC; 
-} 
- 
-int 
-__kmp_read_system_info( struct kmp_sys_info *info ) 
-{ 
-    int status; 
-    struct rusage r_usage; 
- 
-    memset( info, 0, sizeof( *info ) ); 
- 
-    status = getrusage( RUSAGE_SELF, &r_usage); 
-    KMP_CHECK_SYSFAIL_ERRNO( "getrusage", status ); 
- 
-    info->maxrss  = r_usage.ru_maxrss;  /* the maximum resident set size utilized (in kilobytes)     */ 
-    info->minflt  = r_usage.ru_minflt;  /* the number of page faults serviced without any I/O        */ 
-    info->majflt  = r_usage.ru_majflt;  /* the number of page faults serviced that required I/O      */ 
-    info->nswap   = r_usage.ru_nswap;   /* the number of times a process was "swapped" out of memory */ 
-    info->inblock = r_usage.ru_inblock; /* the number of times the file system had to perform input  */ 
-    info->oublock = r_usage.ru_oublock; /* the number of times the file system had to perform output */ 
-    info->nvcsw   = r_usage.ru_nvcsw;   /* the number of times a context switch was voluntarily      */ 
-    info->nivcsw  = r_usage.ru_nivcsw;  /* the number of times a context switch was forced           */ 
- 
-    return (status != 0); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_read_system_time( double *delta ) 
-{ 
-    double              t_ns; 
-    struct timeval      tval; 
-    struct timespec     stop; 
-    int status; 
- 
-    status = gettimeofday( &tval, NULL ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); 
-    TIMEVAL_TO_TIMESPEC( &tval, &stop ); 
-    t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start); 
-    *delta = (t_ns * 1e-9); 
-} 
- 
-void 
-__kmp_clear_system_time( void ) 
-{ 
-    struct timeval tval; 
-    int status; 
-    status = gettimeofday( &tval, NULL ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); 
-    TIMEVAL_TO_TIMESPEC( &tval, &__kmp_sys_timer_data.start ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#ifdef BUILD_TV 
- 
-void 
-__kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ) 
-{ 
-    struct tv_data *p; 
- 
-    p = (struct tv_data *) __kmp_allocate( sizeof( *p ) ); 
- 
-    p->u.tp.global_addr = global_addr; 
-    p->u.tp.thread_addr = thread_addr; 
- 
-    p->type = (void *) 1; 
- 
-    p->next =  th->th.th_local.tv_data; 
-    th->th.th_local.tv_data = p; 
- 
-    if ( p->next == 0 ) { 
-        int rc = pthread_setspecific( __kmp_tv_key, p ); 
-        KMP_CHECK_SYSFAIL( "pthread_setspecific", rc ); 
-    } 
-} 
- 
-#endif /* BUILD_TV */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-static int 
-__kmp_get_xproc( void ) { 
- 
-    int r = 0; 
- 
-    #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD 
- 
-        r = sysconf( _SC_NPROCESSORS_ONLN ); 
- 
-    #elif KMP_OS_DARWIN 
- 
-        // Bug C77011 High "OpenMP Threads and number of active cores". 
- 
-        // Find the number of available CPUs. 
-        kern_return_t          rc; 
-        host_basic_info_data_t info; 
-        mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT; 
-        rc = host_info( mach_host_self(), HOST_BASIC_INFO, (host_info_t) & info, & num ); 
-        if ( rc == 0 && num == HOST_BASIC_INFO_COUNT ) { 
-            // Cannot use KA_TRACE() here because this code works before trace support is 
-            // initialized. 
-            r = info.avail_cpus; 
-        } else { 
-            KMP_WARNING( CantGetNumAvailCPU ); 
-            KMP_INFORM( AssumedNumCPU ); 
-        }; // if 
- 
-    #else 
- 
-        #error "Unknown or unsupported OS." 
- 
-    #endif 
- 
-    return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */ 
- 
-} // __kmp_get_xproc 
- 
-int 
-__kmp_read_from_file( char const *path, char const *format, ... ) 
-{ 
-    int result; 
-    va_list args; 
- 
-    va_start(args, format); 
-    FILE *f = fopen(path, "rb"); 
-    if ( f == NULL ) 
-        return 0; 
-    result = vfscanf(f, format, args); 
-    fclose(f); 
- 
-    return result; 
-} 
- 
-void 
-__kmp_runtime_initialize( void ) 
-{ 
-    int status; 
-    pthread_mutexattr_t mutex_attr; 
-    pthread_condattr_t  cond_attr; 
- 
-    if ( __kmp_init_runtime ) { 
-        return; 
-    }; // if 
- 
-    #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) 
-        if ( ! __kmp_cpuinfo.initialized ) { 
-            __kmp_query_cpuid( &__kmp_cpuinfo ); 
-        }; // if 
-    #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-    __kmp_xproc = __kmp_get_xproc(); 
- 
-    if ( sysconf( _SC_THREADS ) ) { 
- 
-        /* Query the maximum number of threads */ 
-        __kmp_sys_max_nth = sysconf( _SC_THREAD_THREADS_MAX ); 
-        if ( __kmp_sys_max_nth == -1 ) { 
-            /* Unlimited threads for NPTL */ 
-            __kmp_sys_max_nth = INT_MAX; 
-        } 
-        else if ( __kmp_sys_max_nth <= 1 ) { 
-            /* Can't tell, just use PTHREAD_THREADS_MAX */ 
-            __kmp_sys_max_nth = KMP_MAX_NTH; 
-        } 
- 
-        /* Query the minimum stack size */ 
-        __kmp_sys_min_stksize = sysconf( _SC_THREAD_STACK_MIN ); 
-        if ( __kmp_sys_min_stksize <= 1 ) { 
-            __kmp_sys_min_stksize = KMP_MIN_STKSIZE; 
-        } 
-    } 
- 
-    /* Set up minimum number of threads to switch to TLS gtid */ 
-    __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; 
- 
-    #ifdef BUILD_TV 
-        { 
-            int rc = pthread_key_create( & __kmp_tv_key, 0 ); 
-            KMP_CHECK_SYSFAIL( "pthread_key_create", rc ); 
-        } 
-    #endif 
- 
-    status = pthread_key_create( &__kmp_gtid_threadprivate_key, __kmp_internal_end_dest ); 
-    KMP_CHECK_SYSFAIL( "pthread_key_create", status ); 
-    status = pthread_mutexattr_init( & mutex_attr ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); 
-    status = pthread_mutex_init( & __kmp_wait_mx.m_mutex, & mutex_attr ); 
-    KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); 
-    status = pthread_condattr_init( & cond_attr ); 
-    KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); 
-    status = pthread_cond_init( & __kmp_wait_cv.c_cond, & cond_attr ); 
-    KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); 
-#if USE_ITT_BUILD 
-    __kmp_itt_initialize(); 
-#endif /* USE_ITT_BUILD */ 
- 
-    __kmp_init_runtime = TRUE; 
-} 
- 
-void 
-__kmp_runtime_destroy( void ) 
-{ 
-    int status; 
- 
-    if ( ! __kmp_init_runtime ) { 
-        return; // Nothing to do. 
-    }; 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_destroy(); 
-#endif /* USE_ITT_BUILD */ 
- 
-    status = pthread_key_delete( __kmp_gtid_threadprivate_key ); 
-    KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); 
-    #ifdef BUILD_TV 
-        status = pthread_key_delete( __kmp_tv_key ); 
-        KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); 
-    #endif 
- 
-    status = pthread_mutex_destroy( & __kmp_wait_mx.m_mutex ); 
-    if ( status != 0 && status != EBUSY ) { 
-        KMP_SYSFAIL( "pthread_mutex_destroy", status ); 
-    } 
-    status = pthread_cond_destroy( & __kmp_wait_cv.c_cond ); 
-    if ( status != 0 && status != EBUSY ) { 
-        KMP_SYSFAIL( "pthread_cond_destroy", status ); 
-    } 
-    #if KMP_AFFINITY_SUPPORTED 
-        __kmp_affinity_uninitialize(); 
-    #endif 
- 
-    __kmp_init_runtime = FALSE; 
-} 
- 
- 
-/* Put the thread to sleep for a time period */ 
-/* NOTE: not currently used anywhere */ 
-void 
-__kmp_thread_sleep( int millis ) 
-{ 
-    sleep(  ( millis + 500 ) / 1000 ); 
-} 
- 
-/* Calculate the elapsed wall clock time for the user */ 
-void 
-__kmp_elapsed( double *t ) 
-{ 
-    int status; 
-# ifdef FIX_SGI_CLOCK 
-    struct timespec ts; 
- 
-    status = clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "clock_gettime", status ); 
-    *t = (double) ts.tv_nsec * (1.0 / (double) KMP_NSEC_PER_SEC) + 
-        (double) ts.tv_sec; 
-# else 
-    struct timeval tv; 
- 
-    status = gettimeofday( & tv, NULL ); 
-    KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); 
-    *t = (double) tv.tv_usec * (1.0 / (double) KMP_USEC_PER_SEC) + 
-        (double) tv.tv_sec; 
-# endif 
-} 
- 
-/* Calculate the elapsed wall clock tick for the user */ 
-void 
-__kmp_elapsed_tick( double *t ) 
-{ 
-    *t = 1 / (double) CLOCKS_PER_SEC; 
-} 
- 
-/* 
-    Determine whether the given address is mapped into the current address space. 
-*/ 
- 
-int 
-__kmp_is_address_mapped( void * addr ) { 
- 
-    int found = 0; 
-    int rc; 
- 
-    #if KMP_OS_LINUX || KMP_OS_FREEBSD 
- 
-        /* 
-            On Linux* OS, read the /proc/<pid>/maps pseudo-file to get all the address ranges mapped 
-            into the address space. 
-        */ 
- 
-        char * name = __kmp_str_format( "/proc/%d/maps", getpid() ); 
-        FILE * file  = NULL; 
- 
-        file = fopen( name, "r" ); 
-        KMP_ASSERT( file != NULL ); 
- 
-        for ( ; ; ) { 
- 
-            void * beginning = NULL; 
-            void * ending    = NULL; 
-            char   perms[ 5 ]; 
- 
-            rc = fscanf( file, "%p-%p %4s %*[^\n]\n", & beginning, & ending, perms ); 
-            if ( rc == EOF ) { 
-                break; 
-            }; // if 
-            KMP_ASSERT( rc == 3 && KMP_STRLEN( perms ) == 4 ); // Make sure all fields are read. 
- 
-            // Ending address is not included in the region, but beginning is. 
-            if ( ( addr >= beginning ) && ( addr < ending ) ) { 
-                perms[ 2 ] = 0;    // 3th and 4th character does not matter. 
-                if ( strcmp( perms, "rw" ) == 0 ) { 
-                    // Memory we are looking for should be readable and writable. 
-                    found = 1; 
-                }; // if 
-                break; 
-            }; // if 
- 
-        }; // forever 
- 
-        // Free resources. 
-        fclose( file ); 
-        KMP_INTERNAL_FREE( name ); 
- 
-    #elif KMP_OS_DARWIN 
- 
-        /* 
-            On OS X*, /proc pseudo filesystem is not available. Try to read memory using vm 
-            interface. 
-        */ 
- 
-        int       buffer; 
-        vm_size_t count; 
-        rc = 
-            vm_read_overwrite( 
-                mach_task_self(),           // Task to read memory of. 
-                (vm_address_t)( addr ),     // Address to read from. 
-                1,                          // Number of bytes to be read. 
-                (vm_address_t)( & buffer ), // Address of buffer to save read bytes in. 
-                & count                     // Address of var to save number of read bytes in. 
-            ); 
-        if ( rc == 0 ) { 
-            // Memory successfully read. 
-            found = 1; 
-        }; // if 
- 
-    #elif KMP_OS_FREEBSD || KMP_OS_NETBSD 
- 
-        // FIXME(FreeBSD, NetBSD): Implement this 
-        found = 1; 
- 
-    #else 
- 
-        #error "Unknown or unsupported OS" 
- 
-    #endif 
- 
-    return found; 
- 
-} // __kmp_is_address_mapped 
- 
-#ifdef USE_LOAD_BALANCE 
- 
- 
-# if KMP_OS_DARWIN 
- 
-// The function returns the rounded value of the system load average 
-// during given time interval which depends on the value of 
-// __kmp_load_balance_interval variable (default is 60 sec, other values 
-// may be 300 sec or 900 sec). 
-// It returns -1 in case of error. 
-int 
-__kmp_get_load_balance( int max ) 
-{ 
-    double averages[3]; 
-    int ret_avg = 0; 
- 
-    int res = getloadavg( averages, 3 ); 
- 
-    //Check __kmp_load_balance_interval to determine which of averages to use. 
-    // getloadavg() may return the number of samples less than requested that is 
-    // less than 3. 
-    if ( __kmp_load_balance_interval < 180 && ( res >= 1 ) ) { 
-        ret_avg = averages[0];// 1 min 
-    } else if ( ( __kmp_load_balance_interval >= 180 
-                  && __kmp_load_balance_interval < 600 ) && ( res >= 2 ) ) { 
-        ret_avg = averages[1];// 5 min 
-    } else if ( ( __kmp_load_balance_interval >= 600 ) && ( res == 3 ) ) { 
-        ret_avg = averages[2];// 15 min 
-    } else {// Error occurred 
-        return -1; 
-    } 
- 
-    return ret_avg; 
-} 
- 
-# else // Linux* OS 
- 
-// The fuction returns number of running (not sleeping) threads, or -1 in case of error. 
-// Error could be reported if Linux* OS kernel too old (without "/proc" support). 
-// Counting running threads stops if max running threads encountered. 
-int 
-__kmp_get_load_balance( int max ) 
-{ 
-    static int permanent_error = 0; 
- 
-    static int     glb_running_threads          = 0;  /* Saved count of the running threads for the thread balance algortihm */ 
-    static double  glb_call_time = 0;  /* Thread balance algorithm call time */ 
- 
-    int running_threads = 0;              // Number of running threads in the system. 
- 
-    DIR  *          proc_dir   = NULL;    // Handle of "/proc/" directory. 
-    struct dirent * proc_entry = NULL; 
- 
-    kmp_str_buf_t   task_path;            // "/proc/<pid>/task/<tid>/" path. 
-    DIR  *          task_dir   = NULL;    // Handle of "/proc/<pid>/task/<tid>/" directory. 
-    struct dirent * task_entry = NULL; 
-    int             task_path_fixed_len; 
- 
-    kmp_str_buf_t   stat_path;            // "/proc/<pid>/task/<tid>/stat" path. 
-    int             stat_file = -1; 
-    int             stat_path_fixed_len; 
- 
-    int total_processes = 0;              // Total number of processes in system. 
-    int total_threads   = 0;              // Total number of threads in system. 
- 
-    double call_time = 0.0; 
- 
-    __kmp_str_buf_init( & task_path ); 
-    __kmp_str_buf_init( & stat_path ); 
- 
-     __kmp_elapsed( & call_time ); 
- 
-    if ( glb_call_time && 
-            ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { 
-        running_threads = glb_running_threads; 
-        goto finish; 
-    } 
- 
-    glb_call_time = call_time; 
- 
-    // Do not spend time on scanning "/proc/" if we have a permanent error. 
-    if ( permanent_error ) { 
-        running_threads = -1; 
-        goto finish; 
-    }; // if 
- 
-    if ( max <= 0 ) { 
-        max = INT_MAX; 
-    }; // if 
- 
-    // Open "/proc/" directory. 
-    proc_dir = opendir( "/proc" ); 
-    if ( proc_dir == NULL ) { 
-        // Cannot open "/prroc/". Probably the kernel does not support it. Return an error now and 
-        // in subsequent calls. 
-        running_threads = -1; 
-        permanent_error = 1; 
-        goto finish; 
-    }; // if 
- 
-    // Initialize fixed part of task_path. This part will not change. 
-    __kmp_str_buf_cat( & task_path, "/proc/", 6 ); 
-    task_path_fixed_len = task_path.used;    // Remember number of used characters. 
- 
-    proc_entry = readdir( proc_dir ); 
-    while ( proc_entry != NULL ) { 
-        // Proc entry is a directory and name starts with a digit. Assume it is a process' 
-        // directory. 
-        if ( proc_entry->d_type == DT_DIR && isdigit( proc_entry->d_name[ 0 ] ) ) { 
- 
-            ++ total_processes; 
-            // Make sure init process is the very first in "/proc", so we can replace 
-            // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == 1. 
-            // We are going to check that total_processes == 1 => d_name == "1" is true (where 
-            // "=>" is implication). Since C++ does not have => operator, let us replace it with its 
-            // equivalent: a => b == ! a || b. 
-            KMP_DEBUG_ASSERT( total_processes != 1 || strcmp( proc_entry->d_name, "1" ) == 0 ); 
- 
-            // Construct task_path. 
-            task_path.used = task_path_fixed_len;    // Reset task_path to "/proc/". 
-            __kmp_str_buf_cat( & task_path, proc_entry->d_name, KMP_STRLEN( proc_entry->d_name ) ); 
-            __kmp_str_buf_cat( & task_path, "/task", 5 ); 
- 
-            task_dir = opendir( task_path.str ); 
-            if ( task_dir == NULL ) { 
-                // Process can finish between reading "/proc/" directory entry and opening process' 
-                // "task/" directory. So, in general case we should not complain, but have to skip 
-                // this process and read the next one. 
-                // But on systems with no "task/" support we will spend lot of time to scan "/proc/" 
-                // tree again and again without any benefit. "init" process (its pid is 1) should 
-                // exist always, so, if we cannot open "/proc/1/task/" directory, it means "task/" 
-                // is not supported by kernel. Report an error now and in the future. 
-                if ( strcmp( proc_entry->d_name, "1" ) == 0 ) { 
-                    running_threads = -1; 
-                    permanent_error = 1; 
-                    goto finish; 
-                }; // if 
-            } else { 
-                 // Construct fixed part of stat file path. 
-                __kmp_str_buf_clear( & stat_path ); 
-                __kmp_str_buf_cat( & stat_path, task_path.str, task_path.used ); 
-                __kmp_str_buf_cat( & stat_path, "/", 1 ); 
-                stat_path_fixed_len = stat_path.used; 
- 
-                task_entry = readdir( task_dir ); 
-                while ( task_entry != NULL ) { 
-                    // It is a directory and name starts with a digit. 
-                    if ( proc_entry->d_type == DT_DIR && isdigit( task_entry->d_name[ 0 ] ) ) { 
- 
-                        ++ total_threads; 
- 
-                        // Consruct complete stat file path. Easiest way would be: 
-                        //  __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, task_entry->d_name ); 
-                        // but seriae of __kmp_str_buf_cat works a bit faster. 
-                        stat_path.used = stat_path_fixed_len;    // Reset stat path to its fixed part. 
-                        __kmp_str_buf_cat( & stat_path, task_entry->d_name, KMP_STRLEN( task_entry->d_name ) ); 
-                        __kmp_str_buf_cat( & stat_path, "/stat", 5 ); 
- 
-                        // Note: Low-level API (open/read/close) is used. High-level API 
-                        // (fopen/fclose)  works ~ 30 % slower. 
-                        stat_file = open( stat_path.str, O_RDONLY ); 
-                        if ( stat_file == -1 ) { 
-                            // We cannot report an error because task (thread) can terminate just 
-                            // before reading this file. 
-                        } else { 
-                            /* 
-                                Content of "stat" file looks like: 
- 
-                                    24285 (program) S ... 
- 
-                                It is a single line (if program name does not include fanny 
-                                symbols). First number is a thread id, then name of executable file 
-                                name in paretheses, then state of the thread. We need just thread 
-                                state. 
- 
-                                Good news: Length of program name is 15 characters max. Longer 
-                                names are truncated. 
- 
-                                Thus, we need rather short buffer: 15 chars for program name + 
-                                2 parenthesis, + 3 spaces + ~7 digits of pid = 37. 
- 
-                                Bad news: Program name may contain special symbols like space, 
-                                closing parenthesis, or even new line. This makes parsing "stat" 
-                                file not 100 % reliable. In case of fanny program names parsing 
-                                may fail (report incorrect thread state). 
- 
-                                Parsing "status" file looks more promissing (due to different 
-                                file structure and escaping special symbols) but reading and 
-                                parsing of "status" file works slower. 
- 
-                                -- ln 
-                            */ 
-                            char buffer[ 65 ]; 
-                            int len; 
-                            len = read( stat_file, buffer, sizeof( buffer ) - 1 ); 
-                            if ( len >= 0 ) { 
-                                buffer[ len ] = 0; 
-                                // Using scanf: 
-                                //     sscanf( buffer, "%*d (%*s) %c ", & state ); 
-                                // looks very nice, but searching for a closing parenthesis works a 
-                                // bit faster. 
-                                char * close_parent = strstr( buffer, ") " ); 
-                                if ( close_parent != NULL ) { 
-                                    char state = * ( close_parent + 2 ); 
-                                    if ( state == 'R' ) { 
-                                        ++ running_threads; 
-                                        if ( running_threads >= max ) { 
-                                            goto finish; 
-                                        }; // if 
-                                    }; // if 
-                                }; // if 
-                            }; // if 
-                            close( stat_file ); 
-                            stat_file = -1; 
-                        }; // if 
-                    }; // if 
-                    task_entry = readdir( task_dir ); 
-                }; // while 
-                closedir( task_dir ); 
-                task_dir = NULL; 
-            }; // if 
-        }; // if 
-        proc_entry = readdir( proc_dir ); 
-    }; // while 
- 
-    // 
-    // There _might_ be a timing hole where the thread executing this 
-    // code get skipped in the load balance, and running_threads is 0. 
-    // Assert in the debug builds only!!! 
-    // 
-    KMP_DEBUG_ASSERT( running_threads > 0 ); 
-    if ( running_threads <= 0 ) { 
-        running_threads = 1; 
-    } 
- 
-    finish: // Clean up and exit. 
-        if ( proc_dir != NULL ) { 
-            closedir( proc_dir ); 
-        }; // if 
-        __kmp_str_buf_free( & task_path ); 
-        if ( task_dir != NULL ) { 
-            closedir( task_dir ); 
-        }; // if 
-        __kmp_str_buf_free( & stat_path ); 
-        if ( stat_file != -1 ) { 
-            close( stat_file ); 
-        }; // if 
- 
-    glb_running_threads = running_threads; 
- 
-    return running_threads; 
- 
-} // __kmp_get_load_balance 
- 
-# endif // KMP_OS_DARWIN 
- 
-#endif // USE_LOAD_BALANCE 
- 
-#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC) 
- 
-// we really only need the case with 1 argument, because CLANG always build 
-// a struct of pointers to shared variables referenced in the outlined function 
-int 
-__kmp_invoke_microtask( microtask_t pkfn, 
-                        int gtid, int tid, 
-                        int argc, void *p_argv[]  
-#if OMPT_SUPPORT 
-                        , void **exit_frame_ptr 
-#endif 
-)  
-{ 
-#if OMPT_SUPPORT 
-  *exit_frame_ptr = __builtin_frame_address(0); 
-#endif 
- 
-  switch (argc) { 
-  default: 
-    fprintf(stderr, "Too many args to microtask: %d!\n", argc); 
-    fflush(stderr); 
-    exit(-1); 
-  case 0: 
-    (*pkfn)(&gtid, &tid); 
-    break; 
-  case 1: 
-    (*pkfn)(&gtid, &tid, p_argv[0]); 
-    break; 
-  case 2: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]); 
-    break; 
-  case 3: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]); 
-    break; 
-  case 4: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); 
-    break; 
-  case 5: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); 
-    break; 
-  case 6: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5]); 
-    break; 
-  case 7: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6]); 
-    break; 
-  case 8: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7]); 
-    break; 
-  case 9: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8]); 
-    break; 
-  case 10: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); 
-    break; 
-  case 11: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); 
-    break; 
-  case 12: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], 
-            p_argv[11]); 
-    break; 
-  case 13: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], 
-            p_argv[11], p_argv[12]); 
-    break; 
-  case 14: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], 
-            p_argv[11], p_argv[12], p_argv[13]); 
-    break; 
-  case 15: 
-    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], 
-            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], 
-            p_argv[11], p_argv[12], p_argv[13], p_argv[14]); 
-    break; 
-  } 
- 
-#if OMPT_SUPPORT 
-  *exit_frame_ptr = 0; 
-#endif 
- 
-  return 1; 
-} 
- 
-#endif 
- 
-// end of file // 
- 
+/*
+ * z_Linux_util.c -- platform specific routines.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_wrapper_getpid.h"
+#include "kmp_itt.h"
+#include "kmp_str.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_stats.h"
+#include "kmp_wait_release.h"
+
+#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD
+# include <alloca.h>
+#endif
+#include <unistd.h>
+#include <math.h>               // HUGE_VAL.
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+
+#if KMP_OS_LINUX && !KMP_OS_CNK
+# include <sys/sysinfo.h>
+# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+// We should really include <futex.h>, but that causes compatibility problems on different
+// Linux* OS distributions that either require that you include (or break when you try to include)
+// <pci/types.h>.
+// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
+// we just define the constants here and don't include <futex.h>
+#  ifndef FUTEX_WAIT
+#   define FUTEX_WAIT    0
+#  endif
+#  ifndef FUTEX_WAKE
+#   define FUTEX_WAKE    1
+#  endif
+# endif
+#elif KMP_OS_DARWIN
+# include <sys/sysctl.h>
+# include <mach/mach.h>
+#elif KMP_OS_FREEBSD
+# include <pthread_np.h>
+#endif
+
+
+#include <dirent.h>
+#include <ctype.h>
+#include <fcntl.h>
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+struct kmp_sys_timer {
+    struct timespec     start;
+};
+
+// Convert timespec to nanoseconds.
+#define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec)
+
+static struct kmp_sys_timer __kmp_sys_timer_data;
+
+#if KMP_HANDLE_SIGNALS
+    typedef void                            (* sig_func_t )( int );
+    STATIC_EFI2_WORKAROUND struct sigaction    __kmp_sighldrs[ NSIG ];
+    static sigset_t                            __kmp_sigset;
+#endif
+
+static int __kmp_init_runtime   = FALSE;
+
+static int __kmp_fork_count = 0;
+
+static pthread_condattr_t  __kmp_suspend_cond_attr;
+static pthread_mutexattr_t __kmp_suspend_mutex_attr;
+
+static kmp_cond_align_t    __kmp_wait_cv;
+static kmp_mutex_align_t   __kmp_wait_mx;
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef DEBUG_SUSPEND
+static void
+__kmp_print_cond( char *buffer, kmp_cond_align_t *cond )
+{
+    KMP_SNPRINTF( buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
+                      cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
+                      cond->c_cond.__c_waiting );
+}
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
+
+/*
+ * Affinity support
+ */
+
+/*
+ * On some of the older OS's that we build on, these constants aren't present
+ * in <asm/unistd.h> #included from <sys.syscall.h>.  They must be the same on
+ * all systems of the same arch where they are defined, and they cannot change.
+ * stone forever.
+ */
+
+#  if KMP_ARCH_X86 || KMP_ARCH_ARM
+#   ifndef __NR_sched_setaffinity
+#    define __NR_sched_setaffinity  241
+#   elif __NR_sched_setaffinity != 241
+#    error Wrong code for setaffinity system call.
+#   endif /* __NR_sched_setaffinity */
+#   ifndef __NR_sched_getaffinity
+#    define __NR_sched_getaffinity  242
+#   elif __NR_sched_getaffinity != 242
+#    error Wrong code for getaffinity system call.
+#   endif /* __NR_sched_getaffinity */
+
+#  elif KMP_ARCH_AARCH64
+#   ifndef __NR_sched_setaffinity
+#    define __NR_sched_setaffinity  122
+#   elif __NR_sched_setaffinity != 122
+#    error Wrong code for setaffinity system call.
+#   endif /* __NR_sched_setaffinity */
+#   ifndef __NR_sched_getaffinity
+#    define __NR_sched_getaffinity  123
+#   elif __NR_sched_getaffinity != 123
+#    error Wrong code for getaffinity system call.
+#   endif /* __NR_sched_getaffinity */
+
+#  elif KMP_ARCH_X86_64
+#   ifndef __NR_sched_setaffinity
+#    define __NR_sched_setaffinity  203
+#   elif __NR_sched_setaffinity != 203
+#    error Wrong code for setaffinity system call.
+#   endif /* __NR_sched_setaffinity */
+#   ifndef __NR_sched_getaffinity
+#    define __NR_sched_getaffinity  204
+#   elif __NR_sched_getaffinity != 204
+#    error Wrong code for getaffinity system call.
+#   endif /* __NR_sched_getaffinity */
+
+#  elif KMP_ARCH_PPC64
+#   ifndef __NR_sched_setaffinity
+#    define __NR_sched_setaffinity  222
+#   elif __NR_sched_setaffinity != 222
+#    error Wrong code for setaffinity system call.
+#   endif /* __NR_sched_setaffinity */
+#   ifndef __NR_sched_getaffinity
+#    define __NR_sched_getaffinity  223
+#   elif __NR_sched_getaffinity != 223
+#    error Wrong code for getaffinity system call.
+#   endif /* __NR_sched_getaffinity */
+
+
+#  else
+#   error Unknown or unsupported architecture
+
+#  endif /* KMP_ARCH_* */
+
+int
+__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
+{
+    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+      "Illegal set affinity operation when not capable");
+#if KMP_USE_HWLOC
+    int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
+#else
+    int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
+#endif
+    if (retval >= 0) {
+        return 0;
+    }
+    int error = errno;
+    if (abort_on_error) {
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( FatalSysError ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+    return error;
+}
+
+int
+__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
+{
+    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+      "Illegal get affinity operation when not capable");
+
+#if KMP_USE_HWLOC
+    int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
+#else
+    int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
+#endif
+    if (retval >= 0) {
+        return 0;
+    }
+    int error = errno;
+    if (abort_on_error) {
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( FatalSysError ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+    return error;
+}
+
+void
+__kmp_affinity_bind_thread( int which )
+{
+    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+      "Illegal set affinity operation when not capable");
+
+    kmp_affin_mask_t *mask;
+    KMP_CPU_ALLOC_ON_STACK(mask);
+    KMP_CPU_ZERO(mask);
+    KMP_CPU_SET(which, mask);
+    __kmp_set_system_affinity(mask, TRUE);
+    KMP_CPU_FREE_FROM_STACK(mask);
+}
+
+/*
+ * Determine if we can access affinity functionality on this version of
+ * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
+ * __kmp_affin_mask_size to the appropriate value (0 means not capable).
+ */
+void
+__kmp_affinity_determine_capable(const char *env_var)
+{
+    //
+    // Check and see if the OS supports thread affinity.
+    //
+
+# define KMP_CPU_SET_SIZE_LIMIT          (1024*1024)
+
+    int gCode;
+    int sCode;
+    kmp_affin_mask_t *buf;
+    buf = ( kmp_affin_mask_t * ) KMP_INTERNAL_MALLOC( KMP_CPU_SET_SIZE_LIMIT );
+
+    // If Linux* OS:
+    // If the syscall fails or returns a suggestion for the size,
+    // then we don't have to search for an appropriate size.
+    gCode = syscall( __NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf );
+    KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+       "initial getaffinity call returned %d errno = %d\n",
+       gCode, errno));
+
+    //if ((gCode < 0) && (errno == ENOSYS))
+    if (gCode < 0) {
+        //
+        // System call not supported
+        //
+        if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+          && (__kmp_affinity_type != affinity_none)
+          && (__kmp_affinity_type != affinity_default)
+          && (__kmp_affinity_type != affinity_disabled))) {
+            int error = errno;
+            __kmp_msg(
+                kmp_ms_warning,
+                KMP_MSG( GetAffSysCallNotSupported, env_var ),
+                KMP_ERR( error ),
+                __kmp_msg_null
+            );
+        }
+        KMP_AFFINITY_DISABLE();
+        KMP_INTERNAL_FREE(buf);
+        return;
+    }
+    if (gCode > 0) { // Linux* OS only
+        // The optimal situation: the OS returns the size of the buffer
+        // it expects.
+        //
+        // A verification of correct behavior is that Isetaffinity on a NULL
+        // buffer with the same size fails with errno set to EFAULT.
+        sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL );
+        KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+           "setaffinity for mask size %d returned %d errno = %d\n",
+           gCode, sCode, errno));
+        if (sCode < 0) {
+            if (errno == ENOSYS) {
+                if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                  && (__kmp_affinity_type != affinity_none)
+                  && (__kmp_affinity_type != affinity_default)
+                  && (__kmp_affinity_type != affinity_disabled))) {
+                    int error = errno;
+                    __kmp_msg(
+                        kmp_ms_warning,
+                        KMP_MSG( SetAffSysCallNotSupported, env_var ),
+                        KMP_ERR( error ),
+                        __kmp_msg_null
+                    );
+                }
+                KMP_AFFINITY_DISABLE();
+                KMP_INTERNAL_FREE(buf);
+            }
+            if (errno == EFAULT) {
+                KMP_AFFINITY_ENABLE(gCode);
+                KA_TRACE(10, ( "__kmp_affinity_determine_capable: "
+                  "affinity supported (mask size %d)\n",
+                  (int)__kmp_affin_mask_size));
+                KMP_INTERNAL_FREE(buf);
+                return;
+            }
+        }
+    }
+
+    //
+    // Call the getaffinity system call repeatedly with increasing set sizes
+    // until we succeed, or reach an upper bound on the search.
+    //
+    KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+      "searching for proper set size\n"));
+    int size;
+    for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
+        gCode = syscall( __NR_sched_getaffinity, 0,  size, buf );
+        KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+          "getaffinity for mask size %d returned %d errno = %d\n", size,
+            gCode, errno));
+
+        if (gCode < 0) {
+            if ( errno == ENOSYS )
+            {
+                //
+                // We shouldn't get here
+                //
+                KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+                  "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n",
+                   size));
+                if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                  && (__kmp_affinity_type != affinity_none)
+                  && (__kmp_affinity_type != affinity_default)
+                  && (__kmp_affinity_type != affinity_disabled))) {
+                    int error = errno;
+                    __kmp_msg(
+                        kmp_ms_warning,
+                        KMP_MSG( GetAffSysCallNotSupported, env_var ),
+                        KMP_ERR( error ),
+                        __kmp_msg_null
+                    );
+                }
+                KMP_AFFINITY_DISABLE();
+                KMP_INTERNAL_FREE(buf);
+                return;
+            }
+            continue;
+        }
+
+        sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL );
+        KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+           "setaffinity for mask size %d returned %d errno = %d\n",
+           gCode, sCode, errno));
+        if (sCode < 0) {
+            if (errno == ENOSYS) { // Linux* OS only
+                //
+                // We shouldn't get here
+                //
+                KA_TRACE(30, ( "__kmp_affinity_determine_capable: "
+                  "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n",
+                   size));
+                if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+                  && (__kmp_affinity_type != affinity_none)
+                  && (__kmp_affinity_type != affinity_default)
+                  && (__kmp_affinity_type != affinity_disabled))) {
+                    int error = errno;
+                    __kmp_msg(
+                        kmp_ms_warning,
+                        KMP_MSG( SetAffSysCallNotSupported, env_var ),
+                        KMP_ERR( error ),
+                        __kmp_msg_null
+                    );
+                }
+                KMP_AFFINITY_DISABLE();
+                KMP_INTERNAL_FREE(buf);
+                return;
+            }
+            if (errno == EFAULT) {
+                KMP_AFFINITY_ENABLE(gCode);
+                KA_TRACE(10, ( "__kmp_affinity_determine_capable: "
+                  "affinity supported (mask size %d)\n",
+                   (int)__kmp_affin_mask_size));
+                KMP_INTERNAL_FREE(buf);
+                return;
+            }
+        }
+    }
+    //int error = errno;  // save uncaught error code
+    KMP_INTERNAL_FREE(buf);
+    // errno = error;  // restore uncaught error code, will be printed at the next KMP_WARNING below
+
+    //
+    // Affinity is not supported
+    //
+    KMP_AFFINITY_DISABLE();
+    KA_TRACE(10, ( "__kmp_affinity_determine_capable: "
+      "cannot determine mask size - affinity not supported\n"));
+    if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+      && (__kmp_affinity_type != affinity_none)
+      && (__kmp_affinity_type != affinity_default)
+      && (__kmp_affinity_type != affinity_disabled))) {
+        KMP_WARNING( AffCantGetMaskSize, env_var );
+    }
+}
+
+#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && !KMP_OS_CNK
+
+int
+__kmp_futex_determine_capable()
+{
+    int loc = 0;
+    int rc = syscall( __NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0 );
+    int retval = ( rc == 0 ) || ( errno != ENOSYS );
+
+    KA_TRACE(10, ( "__kmp_futex_determine_capable: rc = %d errno = %d\n", rc,
+      errno ) );
+    KA_TRACE(10, ( "__kmp_futex_determine_capable: futex syscall%s supported\n",
+        retval ? "" : " not" ) );
+
+    return retval;
+}
+
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS)
+/*
+ * Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
+ * use compare_and_store for these routines
+ */
+
+kmp_int8
+__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d )
+{
+    kmp_int8 old_value, new_value;
+
+    old_value = TCR_1( *p );
+    new_value = old_value | d;
+
+    while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_1( *p );
+        new_value = old_value | d;
+    }
+    return old_value;
+}
+
+kmp_int8
+__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d )
+{
+    kmp_int8 old_value, new_value;
+
+    old_value = TCR_1( *p );
+    new_value = old_value & d;
+
+    while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_1( *p );
+        new_value = old_value & d;
+    }
+    return old_value;
+}
+
+kmp_int32
+__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d )
+{
+    kmp_int32 old_value, new_value;
+
+    old_value = TCR_4( *p );
+    new_value = old_value | d;
+
+    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_4( *p );
+        new_value = old_value | d;
+    }
+    return old_value;
+}
+
+kmp_int32
+__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
+{
+    kmp_int32 old_value, new_value;
+
+    old_value = TCR_4( *p );
+    new_value = old_value & d;
+
+    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_4( *p );
+        new_value = old_value & d;
+    }
+    return old_value;
+}
+
+# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
+kmp_int8
+__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d )
+{
+    kmp_int8 old_value, new_value;
+
+    old_value = TCR_1( *p );
+    new_value = old_value + d;
+
+    while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_1( *p );
+        new_value = old_value + d;
+    }
+    return old_value;
+}
+
+kmp_int64
+__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_8( *p );
+    new_value = old_value + d;
+
+    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_8( *p );
+        new_value = old_value + d;
+    }
+    return old_value;
+}
+# endif /* KMP_ARCH_X86 */
+
+kmp_int64
+__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_8( *p );
+    new_value = old_value | d;
+    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_8( *p );
+        new_value = old_value | d;
+    }
+    return old_value;
+}
+
+kmp_int64
+__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_8( *p );
+    new_value = old_value & d;
+    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_8( *p );
+        new_value = old_value & d;
+    }
+    return old_value;
+}
+
+#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
+
+void
+__kmp_terminate_thread( int gtid )
+{
+    int status;
+    kmp_info_t  *th = __kmp_threads[ gtid ];
+
+    if ( !th ) return;
+
+    #ifdef KMP_CANCEL_THREADS
+        KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) );
+        status = pthread_cancel( th->th.th_info.ds.ds_thread );
+        if ( status != 0 && status != ESRCH ) {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantTerminateWorkerThread ),
+                KMP_ERR( status ),
+                __kmp_msg_null
+            );
+        }; // if
+    #endif
+    __kmp_yield( TRUE );
+} //
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Set thread stack info according to values returned by
+ * pthread_getattr_np().
+ * If values are unreasonable, assume call failed and use
+ * incremental stack refinement method instead.
+ * Returns TRUE if the stack parameters could be determined exactly,
+ * FALSE if incremental refinement is necessary.
+ */
+static kmp_int32
+__kmp_set_stack_info( int gtid, kmp_info_t *th )
+{
+    int            stack_data;
+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
+    /* Linux* OS only -- no pthread_getattr_np support on OS X* */
+    pthread_attr_t attr;
+    int            status;
+    size_t         size = 0;
+    void *         addr = 0;
+
+    /* Always do incremental stack refinement for ubermaster threads since the initial
+       thread stack range can be reduced by sibling thread creation so pthread_attr_getstack
+       may cause thread gtid aliasing */
+    if ( ! KMP_UBER_GTID(gtid) ) {
+
+        /* Fetch the real thread attributes */
+        status = pthread_attr_init( &attr );
+        KMP_CHECK_SYSFAIL( "pthread_attr_init", status );
+#if KMP_OS_FREEBSD || KMP_OS_NETBSD
+        status = pthread_attr_get_np( pthread_self(), &attr );
+        KMP_CHECK_SYSFAIL( "pthread_attr_get_np", status );
+#else
+        status = pthread_getattr_np( pthread_self(), &attr );
+        KMP_CHECK_SYSFAIL( "pthread_getattr_np", status );
+#endif
+        status = pthread_attr_getstack( &attr, &addr, &size );
+        KMP_CHECK_SYSFAIL( "pthread_attr_getstack", status );
+        KA_TRACE( 60, ( "__kmp_set_stack_info: T#%d pthread_attr_getstack returned size: %lu, "
+                        "low addr: %p\n",
+                        gtid, size, addr ));
+
+        status = pthread_attr_destroy( &attr );
+        KMP_CHECK_SYSFAIL( "pthread_attr_destroy", status );
+    }
+
+    if ( size != 0 && addr != 0 ) {     /* was stack parameter determination successful? */
+        /* Store the correct base and size */
+        TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
+        TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
+        TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
+        return TRUE;
+    }
+#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */
+    /* Use incremental refinement starting from initial conservative estimate */
+    TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
+    TCW_PTR(th -> th.th_info.ds.ds_stackbase, &stack_data);
+    TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
+    return FALSE;
+}
+
+static void*
+__kmp_launch_worker( void *thr )
+{
+    int status, old_type, old_state;
+#ifdef KMP_BLOCK_SIGNALS
+    sigset_t    new_set, old_set;
+#endif /* KMP_BLOCK_SIGNALS */
+    void *exit_val;
+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
+    void * volatile padding = 0;
+#endif
+    int gtid;
+
+    gtid = ((kmp_info_t*)thr) -> th.th_info.ds.ds_gtid;
+    __kmp_gtid_set_specific( gtid );
+#ifdef KMP_TDATA_GTID
+    __kmp_gtid = gtid;
+#endif
+#if KMP_STATS_ENABLED
+    // set __thread local index to point to thread-specific stats
+    __kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats;
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_thread_name( gtid );
+#endif /* USE_ITT_BUILD */
+
+#if KMP_AFFINITY_SUPPORTED
+    __kmp_affinity_set_init_mask( gtid, FALSE );
+#endif
+
+#ifdef KMP_CANCEL_THREADS
+    status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type );
+    KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status );
+    /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */
+    status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state );
+    KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status );
+#endif
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    //
+    // Set the FP control regs to be a copy of
+    // the parallel initialization thread's.
+    //
+    __kmp_clear_x87_fpu_status_word();
+    __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
+    __kmp_load_mxcsr( &__kmp_init_mxcsr );
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+#ifdef KMP_BLOCK_SIGNALS
+    status = sigfillset( & new_set );
+    KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status );
+    status = pthread_sigmask( SIG_BLOCK, & new_set, & old_set );
+    KMP_CHECK_SYSFAIL( "pthread_sigmask", status );
+#endif /* KMP_BLOCK_SIGNALS */
+
+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
+    if ( __kmp_stkoffset > 0 && gtid > 0 ) {
+        padding = KMP_ALLOCA( gtid * __kmp_stkoffset );
+    }
+#endif
+
+    KMP_MB();
+    __kmp_set_stack_info( gtid, (kmp_info_t*)thr );
+
+    __kmp_check_stack_overlap( (kmp_info_t*)thr );
+
+    exit_val = __kmp_launch_thread( (kmp_info_t *) thr );
+
+#ifdef KMP_BLOCK_SIGNALS
+    status = pthread_sigmask( SIG_SETMASK, & old_set, NULL );
+    KMP_CHECK_SYSFAIL( "pthread_sigmask", status );
+#endif /* KMP_BLOCK_SIGNALS */
+
+    return exit_val;
+}
+
+
+/* The monitor thread controls all of the threads in the complex */
+
+static void*
+__kmp_launch_monitor( void *thr )
+{
+    int         status, old_type, old_state;
+#ifdef KMP_BLOCK_SIGNALS
+    sigset_t    new_set;
+#endif /* KMP_BLOCK_SIGNALS */
+    struct timespec  interval;
+    int yield_count;
+    int yield_cycles = 0;
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 10, ("__kmp_launch_monitor: #1 launched\n" ) );
+
+    /* register us as the monitor thread */
+    __kmp_gtid_set_specific( KMP_GTID_MONITOR );
+#ifdef KMP_TDATA_GTID
+    __kmp_gtid = KMP_GTID_MONITOR;
+#endif
+
+    KMP_MB();
+
+#if USE_ITT_BUILD
+    __kmp_itt_thread_ignore();    // Instruct Intel(R) Threading Tools to ignore monitor thread.
+#endif /* USE_ITT_BUILD */
+
+    __kmp_set_stack_info( ((kmp_info_t*)thr)->th.th_info.ds.ds_gtid, (kmp_info_t*)thr );
+
+    __kmp_check_stack_overlap( (kmp_info_t*)thr );
+
+#ifdef KMP_CANCEL_THREADS
+    status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type );
+    KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status );
+    /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */
+    status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state );
+    KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status );
+#endif
+
+    #if KMP_REAL_TIME_FIX
+    // This is a potential fix which allows application with real-time scheduling policy work.
+    // However, decision about the fix is not made yet, so it is disabled by default.
+    { // Are program started with real-time scheduling policy?
+        int sched = sched_getscheduler( 0 );
+        if ( sched == SCHED_FIFO || sched == SCHED_RR ) {
+            // Yes, we are a part of real-time application. Try to increase the priority of the
+            // monitor.
+            struct sched_param param;
+            int    max_priority = sched_get_priority_max( sched );
+            int    rc;
+            KMP_WARNING( RealTimeSchedNotSupported );
+            sched_getparam( 0, & param );
+            if ( param.sched_priority < max_priority ) {
+                param.sched_priority += 1;
+                rc = sched_setscheduler( 0, sched, & param );
+                if ( rc != 0 ) {
+                    int error = errno;
+                  __kmp_msg(
+                      kmp_ms_warning,
+                      KMP_MSG( CantChangeMonitorPriority ),
+                      KMP_ERR( error ),
+                      KMP_MSG( MonitorWillStarve ),
+                      __kmp_msg_null
+                  );
+                }; // if
+            } else {
+                // We cannot abort here, because number of CPUs may be enough for all the threads,
+                // including the monitor thread, so application could potentially work...
+                __kmp_msg(
+                    kmp_ms_warning,
+                    KMP_MSG( RunningAtMaxPriority ),
+                    KMP_MSG( MonitorWillStarve ),
+                    KMP_HNT( RunningAtMaxPriority ),
+                    __kmp_msg_null
+                );
+            }; // if
+        }; // if
+        TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );  // AC: free thread that waits for monitor started
+    }
+    #endif // KMP_REAL_TIME_FIX
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    if ( __kmp_monitor_wakeups == 1 ) {
+        interval.tv_sec  = 1;
+        interval.tv_nsec = 0;
+    } else {
+        interval.tv_sec  = 0;
+        interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
+    }
+
+    KA_TRACE( 10, ("__kmp_launch_monitor: #2 monitor\n" ) );
+
+    if (__kmp_yield_cycle) {
+        __kmp_yielding_on = 0;  /* Start out with yielding shut off */
+        yield_count = __kmp_yield_off_count;
+    } else {
+        __kmp_yielding_on = 1;  /* Yielding is on permanently */
+    }
+
+    while( ! TCR_4( __kmp_global.g.g_done ) ) {
+        struct timespec  now;
+        struct timeval   tval;
+
+        /*  This thread monitors the state of the system */
+
+        KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) );
+
+        status = gettimeofday( &tval, NULL );
+        KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status );
+        TIMEVAL_TO_TIMESPEC( &tval, &now );
+
+        now.tv_sec  += interval.tv_sec;
+        now.tv_nsec += interval.tv_nsec;
+
+        if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
+            now.tv_sec  += 1;
+            now.tv_nsec -= KMP_NSEC_PER_SEC;
+        }
+
+        status = pthread_mutex_lock( & __kmp_wait_mx.m_mutex );
+        KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status );
+        // AC: the monitor should not fall asleep if g_done has been set
+        if ( !TCR_4(__kmp_global.g.g_done) ) {  // check once more under mutex
+            status = pthread_cond_timedwait( &__kmp_wait_cv.c_cond, &__kmp_wait_mx.m_mutex, &now );
+            if ( status != 0 ) {
+                if ( status != ETIMEDOUT && status != EINTR ) {
+                    KMP_SYSFAIL( "pthread_cond_timedwait", status );
+                };
+            };
+        };
+        status = pthread_mutex_unlock( & __kmp_wait_mx.m_mutex );
+        KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status );
+
+        if (__kmp_yield_cycle) {
+            yield_cycles++;
+            if ( (yield_cycles % yield_count) == 0 ) {
+                if (__kmp_yielding_on) {
+                    __kmp_yielding_on = 0;   /* Turn it off now */
+                    yield_count = __kmp_yield_off_count;
+                } else {
+                    __kmp_yielding_on = 1;   /* Turn it on now */
+                    yield_count = __kmp_yield_on_count;
+                }
+                yield_cycles = 0;
+            }
+        } else {
+            __kmp_yielding_on = 1;
+        }
+
+        TCW_4( __kmp_global.g.g_time.dt.t_value,
+          TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 );
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    }
+
+    KA_TRACE( 10, ("__kmp_launch_monitor: #3 cleanup\n" ) );
+
+#ifdef KMP_BLOCK_SIGNALS
+    status = sigfillset( & new_set );
+    KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status );
+    status = pthread_sigmask( SIG_UNBLOCK, & new_set, NULL );
+    KMP_CHECK_SYSFAIL( "pthread_sigmask", status );
+#endif /* KMP_BLOCK_SIGNALS */
+
+    KA_TRACE( 10, ("__kmp_launch_monitor: #4 finished\n" ) );
+
+    if( __kmp_global.g.g_abort != 0 ) {
+        /* now we need to terminate the worker threads  */
+        /* the value of t_abort is the signal we caught */
+
+        int gtid;
+
+        KA_TRACE( 10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", __kmp_global.g.g_abort ) );
+
+        /* terminate the OpenMP worker threads */
+        /* TODO this is not valid for sibling threads!!
+         * the uber master might not be 0 anymore.. */
+        for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
+            __kmp_terminate_thread( gtid );
+
+        __kmp_cleanup();
+
+        KA_TRACE( 10, ("__kmp_launch_monitor: #6 raise sig=%d\n", __kmp_global.g.g_abort ) );
+
+        if (__kmp_global.g.g_abort > 0)
+            raise( __kmp_global.g.g_abort );
+
+    }
+
+    KA_TRACE( 10, ("__kmp_launch_monitor: #7 exit\n" ) );
+
+    return thr;
+}
+
+void
+__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
+{
+    pthread_t      handle;
+    pthread_attr_t thread_attr;
+    int            status;
+
+
+    th->th.th_info.ds.ds_gtid = gtid;
+
+#if KMP_STATS_ENABLED
+    // sets up worker thread stats
+    __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
+
+    // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker
+    // So when thread is created (goes into __kmp_launch_worker) it will
+    // set it's __thread local pointer to th->th.th_stats
+    th->th.th_stats = __kmp_stats_list.push_back(gtid);
+    if(KMP_UBER_GTID(gtid)) {
+        __kmp_stats_start_time = tsc_tick_count::now();
+        __kmp_stats_thread_ptr = th->th.th_stats;
+        __kmp_stats_init();
+        KMP_START_EXPLICIT_TIMER(OMP_serial);
+        KMP_START_EXPLICIT_TIMER(OMP_start_end);
+    }
+    __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
+
+#endif // KMP_STATS_ENABLED
+
+    if ( KMP_UBER_GTID(gtid) ) {
+        KA_TRACE( 10, ("__kmp_create_worker: uber thread (%d)\n", gtid ) );
+        th -> th.th_info.ds.ds_thread = pthread_self();
+        __kmp_set_stack_info( gtid, th );
+        __kmp_check_stack_overlap( th );
+        return;
+    }; // if
+
+    KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+#ifdef KMP_THREAD_ATTR
+        {
+            status = pthread_attr_init( &thread_attr );
+            if ( status != 0 ) {
+                __kmp_msg(
+                          kmp_ms_fatal,
+                          KMP_MSG( CantInitThreadAttrs ),
+                          KMP_ERR( status ),
+                          __kmp_msg_null
+                          );
+            }; // if
+            status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE );
+            if ( status != 0 ) {
+                __kmp_msg(
+                          kmp_ms_fatal,
+                          KMP_MSG( CantSetWorkerState ),
+                          KMP_ERR( status ),
+                          __kmp_msg_null
+                          );
+            }; // if
+
+            /* Set stack size for this thread now. 
+             * The multiple of 2 is there because on some machines, requesting an unusual stacksize
+             * causes the thread to have an offset before the dummy alloca() takes place to create the
+             * offset.  Since we want the user to have a sufficient stacksize AND support a stack offset, we 
+             * alloca() twice the offset so that the upcoming alloca() does not eliminate any premade
+             * offset, and also gives the user the stack space they requested for all threads */
+            stack_size += gtid * __kmp_stkoffset * 2;
+
+            KA_TRACE( 10, ( "__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
+                            "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
+                            gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) );
+
+# ifdef _POSIX_THREAD_ATTR_STACKSIZE
+                status = pthread_attr_setstacksize( & thread_attr, stack_size );
+#  ifdef KMP_BACKUP_STKSIZE
+            if ( status != 0 ) {
+                if ( ! __kmp_env_stksize ) {
+                    stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
+                    __kmp_stksize = KMP_BACKUP_STKSIZE;
+                    KA_TRACE( 10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
+                                   "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
+                                   "bytes\n",
+                                   gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size )
+                              );
+                    status = pthread_attr_setstacksize( &thread_attr, stack_size );
+                }; // if
+            }; // if
+#  endif /* KMP_BACKUP_STKSIZE */
+            if ( status != 0 ) {
+                __kmp_msg(
+                          kmp_ms_fatal,
+                          KMP_MSG( CantSetWorkerStackSize, stack_size ),
+                          KMP_ERR( status ),
+                          KMP_HNT( ChangeWorkerStackSize  ),
+                          __kmp_msg_null
+                          );
+            }; // if
+# endif /* _POSIX_THREAD_ATTR_STACKSIZE */
+        }
+#endif /* KMP_THREAD_ATTR */
+
+        {
+            status = pthread_create( & handle, & thread_attr, __kmp_launch_worker, (void *) th );
+            if ( status != 0 || ! handle ) { // ??? Why do we check handle??
+#ifdef _POSIX_THREAD_ATTR_STACKSIZE
+                if ( status == EINVAL ) {
+                    __kmp_msg(
+                              kmp_ms_fatal,
+                              KMP_MSG( CantSetWorkerStackSize, stack_size ),
+                              KMP_ERR( status ),
+                              KMP_HNT( IncreaseWorkerStackSize ),
+                              __kmp_msg_null
+                              );
+                };
+                if ( status == ENOMEM ) {
+                    __kmp_msg(
+                              kmp_ms_fatal,
+                              KMP_MSG( CantSetWorkerStackSize, stack_size ),
+                              KMP_ERR( status ),
+                              KMP_HNT( DecreaseWorkerStackSize ),
+                              __kmp_msg_null
+                              );
+                };
+#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
+                if ( status == EAGAIN ) {
+                    __kmp_msg(
+                              kmp_ms_fatal,
+                              KMP_MSG( NoResourcesForWorkerThread ),
+                              KMP_ERR( status ),
+                              KMP_HNT( Decrease_NUM_THREADS ),
+                              __kmp_msg_null
+                              );
+                }; // if
+                KMP_SYSFAIL( "pthread_create", status );
+            }; // if
+
+            th->th.th_info.ds.ds_thread = handle;
+        }
+
+#ifdef KMP_THREAD_ATTR
+        {
+            status = pthread_attr_destroy( & thread_attr );
+            if ( status ) {
+                __kmp_msg(
+                          kmp_ms_warning,
+                          KMP_MSG( CantDestroyThreadAttrs ),
+                          KMP_ERR( status ),
+                          __kmp_msg_null
+                          );
+            }; // if
+        }
+#endif /* KMP_THREAD_ATTR */
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) );
+
+} // __kmp_create_worker
+
+
+void
+__kmp_create_monitor( kmp_info_t *th )
+{
+    pthread_t           handle;
+    pthread_attr_t      thread_attr;
+    size_t              size;
+    int                 status;
+    int                 auto_adj_size = FALSE;
+
+    KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    th->th.th_info.ds.ds_tid  = KMP_GTID_MONITOR;
+    th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
+    #if KMP_REAL_TIME_FIX
+        TCW_4( __kmp_global.g.g_time.dt.t_value, -1 ); // Will use it for synchronization a bit later.
+    #else
+        TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );
+    #endif // KMP_REAL_TIME_FIX
+
+    #ifdef KMP_THREAD_ATTR
+        if ( __kmp_monitor_stksize == 0 ) {
+            __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
+            auto_adj_size = TRUE;
+        }
+        status = pthread_attr_init( &thread_attr );
+        if ( status != 0 ) {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantInitThreadAttrs ),
+                KMP_ERR( status ),
+                __kmp_msg_null
+            );
+        }; // if
+        status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE );
+        if ( status != 0 ) {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( CantSetMonitorState ),
+                KMP_ERR( status ),
+                __kmp_msg_null
+            );
+        }; // if
+
+        #ifdef _POSIX_THREAD_ATTR_STACKSIZE
+            status = pthread_attr_getstacksize( & thread_attr, & size );
+            KMP_CHECK_SYSFAIL( "pthread_attr_getstacksize", status );
+        #else
+            size = __kmp_sys_min_stksize;
+        #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
+    #endif /* KMP_THREAD_ATTR */
+
+    if ( __kmp_monitor_stksize == 0 ) {
+        __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
+    }
+    if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) {
+        __kmp_monitor_stksize = __kmp_sys_min_stksize;
+    }
+
+    KA_TRACE( 10, ( "__kmp_create_monitor: default stacksize = %lu bytes,"
+                    "requested stacksize = %lu bytes\n",
+                    size, __kmp_monitor_stksize ) );
+
+    retry:
+
+    /* Set stack size for this thread now. */
+
+    #ifdef _POSIX_THREAD_ATTR_STACKSIZE
+        KA_TRACE( 10, ( "__kmp_create_monitor: setting stacksize = %lu bytes,",
+                        __kmp_monitor_stksize ) );
+        status = pthread_attr_setstacksize( & thread_attr, __kmp_monitor_stksize );
+        if ( status != 0 ) {
+            if ( auto_adj_size ) {
+                __kmp_monitor_stksize *= 2;
+                goto retry;
+            }
+            __kmp_msg(
+                kmp_ms_warning,  // should this be fatal?  BB
+                KMP_MSG( CantSetMonitorStackSize, (long int) __kmp_monitor_stksize ),
+                KMP_ERR( status ),
+                KMP_HNT( ChangeMonitorStackSize ),
+                __kmp_msg_null
+            );
+        }; // if
+    #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
+
+    status = pthread_create( &handle, & thread_attr, __kmp_launch_monitor, (void *) th );
+
+    if ( status != 0 ) {
+        #ifdef _POSIX_THREAD_ATTR_STACKSIZE
+            if ( status == EINVAL ) {
+                if ( auto_adj_size  && ( __kmp_monitor_stksize < (size_t)0x40000000 ) ) {
+                    __kmp_monitor_stksize *= 2;
+                    goto retry;
+                }
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ),
+                    KMP_ERR( status ),
+                    KMP_HNT( IncreaseMonitorStackSize ),
+                    __kmp_msg_null
+                );
+            }; // if
+            if ( status == ENOMEM ) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ),
+                    KMP_ERR( status ),
+                    KMP_HNT( DecreaseMonitorStackSize ),
+                    __kmp_msg_null
+                );
+            }; // if
+        #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
+        if ( status == EAGAIN ) {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( NoResourcesForMonitorThread ),
+                KMP_ERR( status ),
+                KMP_HNT( DecreaseNumberOfThreadsInUse ),
+                __kmp_msg_null
+            );
+        }; // if
+        KMP_SYSFAIL( "pthread_create", status );
+    }; // if
+
+    th->th.th_info.ds.ds_thread = handle;
+
+    #if KMP_REAL_TIME_FIX
+        // Wait for the monitor thread is really started and set its *priority*.
+        KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == sizeof( __kmp_global.g.g_time.dt.t_value ) );
+        __kmp_wait_yield_4(
+            (kmp_uint32 volatile *) & __kmp_global.g.g_time.dt.t_value, -1, & __kmp_neq_4, NULL
+        );
+    #endif // KMP_REAL_TIME_FIX
+
+    #ifdef KMP_THREAD_ATTR
+        status = pthread_attr_destroy( & thread_attr );
+        if ( status != 0 ) {
+            __kmp_msg(    //
+                kmp_ms_warning,
+                KMP_MSG( CantDestroyThreadAttrs ),
+                KMP_ERR( status ),
+                __kmp_msg_null
+            );
+        }; // if
+    #endif
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 10, ( "__kmp_create_monitor: monitor created %#.8lx\n", th->th.th_info.ds.ds_thread ) );
+
+} // __kmp_create_monitor
+
+void
+__kmp_exit_thread(
+    int exit_status
+) {
+    pthread_exit( (void *)(intptr_t) exit_status );
+} // __kmp_exit_thread
+
+void __kmp_resume_monitor();
+
+void
+__kmp_reap_monitor( kmp_info_t *th )
+{
+    int          status;
+    void        *exit_val;
+
+    KA_TRACE( 10, ("__kmp_reap_monitor: try to reap monitor thread with handle %#.8lx\n",
+                   th->th.th_info.ds.ds_thread ) );
+
+    // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
+    // If both tid and gtid are 0, it means the monitor did not ever start.
+    // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
+    KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid );
+    if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) {
+        return;
+    }; // if
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+
+    /* First, check to see whether the monitor thread exists.  This could prevent a hang,
+       but if the monitor dies after the pthread_kill call and before the pthread_join
+       call, it will still hang. */
+
+    status = pthread_kill( th->th.th_info.ds.ds_thread, 0 );
+    if (status == ESRCH) {
+
+        KA_TRACE( 10, ("__kmp_reap_monitor: monitor does not exist, returning\n") );
+
+    } else
+    {
+        __kmp_resume_monitor();   // Wake up the monitor thread
+        status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val);
+        if (exit_val != th) {
+            __kmp_msg(
+                kmp_ms_fatal,
+                KMP_MSG( ReapMonitorError ),
+                KMP_ERR( status ),
+                __kmp_msg_null
+            );
+        }
+    }
+
+    th->th.th_info.ds.ds_tid  = KMP_GTID_DNE;
+    th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
+
+    KA_TRACE( 10, ("__kmp_reap_monitor: done reaping monitor thread with handle %#.8lx\n",
+                   th->th.th_info.ds.ds_thread ) );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+}
+
+void
+__kmp_reap_worker( kmp_info_t *th )
+{
+    int          status;
+    void        *exit_val;
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid ) );
+
+    /* First, check to see whether the worker thread exists.  This could prevent a hang,
+       but if the worker dies after the pthread_kill call and before the pthread_join
+       call, it will still hang. */
+
+        {
+            status = pthread_kill( th->th.th_info.ds.ds_thread, 0 );
+            if (status == ESRCH) {
+                KA_TRACE( 10, ("__kmp_reap_worker: worker T#%d does not exist, returning\n",
+                               th->th.th_info.ds.ds_gtid ) );
+            }
+            else {
+                KA_TRACE( 10, ("__kmp_reap_worker: try to join with worker T#%d\n",
+                               th->th.th_info.ds.ds_gtid ) );
+
+                status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val);
+#ifdef KMP_DEBUG
+                /* Don't expose these to the user until we understand when they trigger */
+                if ( status != 0 ) {
+                    __kmp_msg(
+                              kmp_ms_fatal,
+                              KMP_MSG( ReapWorkerError ),
+                              KMP_ERR( status ),
+                              __kmp_msg_null
+                              );
+                }
+                if ( exit_val != th ) {
+                    KA_TRACE( 10, ( "__kmp_reap_worker: worker T#%d did not reap properly, "
+                                    "exit_val = %p\n",
+                                    th->th.th_info.ds.ds_gtid, exit_val ) );
+                }
+#endif /* KMP_DEBUG */
+            }
+        }
+
+    KA_TRACE( 10, ("__kmp_reap_worker: done reaping T#%d\n", th->th.th_info.ds.ds_gtid ) );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if KMP_HANDLE_SIGNALS
+
+
+static void
+__kmp_null_handler( int signo )
+{
+    //  Do nothing, for doing SIG_IGN-type actions.
+} // __kmp_null_handler
+
+
+static void
+__kmp_team_handler( int signo )
+{
+    if ( __kmp_global.g.g_abort == 0 ) {
+        /* Stage 1 signal handler, let's shut down all of the threads */
+        #ifdef KMP_DEBUG
+            __kmp_debug_printf( "__kmp_team_handler: caught signal = %d\n", signo );
+        #endif
+        switch ( signo ) {
+            case SIGHUP  :
+            case SIGINT  :
+            case SIGQUIT :
+            case SIGILL  :
+            case SIGABRT :
+            case SIGFPE  :
+            case SIGBUS  :
+            case SIGSEGV :
+            #ifdef SIGSYS
+                case SIGSYS :
+            #endif
+            case SIGTERM :
+                if ( __kmp_debug_buf ) {
+                    __kmp_dump_debug_buffer( );
+                }; // if
+                KMP_MB();       // Flush all pending memory write invalidates.
+                TCW_4( __kmp_global.g.g_abort, signo );
+                KMP_MB();       // Flush all pending memory write invalidates.
+                TCW_4( __kmp_global.g.g_done, TRUE );
+                KMP_MB();       // Flush all pending memory write invalidates.
+                break;
+            default:
+                #ifdef KMP_DEBUG
+                    __kmp_debug_printf( "__kmp_team_handler: unknown signal type" );
+                #endif
+                break;
+        }; // switch
+    }; // if
+} // __kmp_team_handler
+
+
+static
+void __kmp_sigaction( int signum, const struct sigaction * act, struct sigaction * oldact ) {
+    int rc = sigaction( signum, act, oldact );
+    KMP_CHECK_SYSFAIL_ERRNO( "sigaction", rc );
+}
+
+
+static void
+__kmp_install_one_handler( int sig, sig_func_t handler_func, int parallel_init )
+{
+    KMP_MB();       // Flush all pending memory write invalidates.
+    KB_TRACE( 60, ( "__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init ) );
+    if ( parallel_init ) {
+        struct sigaction new_action;
+        struct sigaction old_action;
+        new_action.sa_handler = handler_func;
+        new_action.sa_flags   = 0;
+        sigfillset( & new_action.sa_mask );
+        __kmp_sigaction( sig, & new_action, & old_action );
+        if ( old_action.sa_handler == __kmp_sighldrs[ sig ].sa_handler ) {
+            sigaddset( & __kmp_sigset, sig );
+        } else {
+            // Restore/keep user's handler if one previously installed.
+            __kmp_sigaction( sig, & old_action, NULL );
+        }; // if
+    } else {
+        // Save initial/system signal handlers to see if user handlers installed.
+        __kmp_sigaction( sig, NULL, & __kmp_sighldrs[ sig ] );
+    }; // if
+    KMP_MB();       // Flush all pending memory write invalidates.
+} // __kmp_install_one_handler
+
+
+static void
+__kmp_remove_one_handler( int sig )
+{
+    KB_TRACE( 60, ( "__kmp_remove_one_handler( %d )\n", sig ) );
+    if ( sigismember( & __kmp_sigset, sig ) ) {
+        struct sigaction old;
+        KMP_MB();       // Flush all pending memory write invalidates.
+        __kmp_sigaction( sig, & __kmp_sighldrs[ sig ], & old );
+        if ( ( old.sa_handler != __kmp_team_handler ) && ( old.sa_handler != __kmp_null_handler ) ) {
+            // Restore the users signal handler.
+            KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) );
+            __kmp_sigaction( sig, & old, NULL );
+        }; // if
+        sigdelset( & __kmp_sigset, sig );
+        KMP_MB();       // Flush all pending memory write invalidates.
+    }; // if
+} // __kmp_remove_one_handler
+
+
+void
+__kmp_install_signals( int parallel_init )
+{
+    KB_TRACE( 10, ( "__kmp_install_signals( %d )\n", parallel_init ) );
+    if ( __kmp_handle_signals || ! parallel_init ) {
+        // If ! parallel_init, we do not install handlers, just save original handlers.
+        // Let us do it even __handle_signals is 0.
+        sigemptyset( & __kmp_sigset );
+        __kmp_install_one_handler( SIGHUP,  __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGINT,  __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGQUIT, __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGILL,  __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGFPE,  __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGBUS,  __kmp_team_handler, parallel_init );
+        __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init );
+        #ifdef SIGSYS
+            __kmp_install_one_handler( SIGSYS,  __kmp_team_handler, parallel_init );
+        #endif // SIGSYS
+        __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init );
+        #ifdef SIGPIPE
+            __kmp_install_one_handler( SIGPIPE, __kmp_team_handler, parallel_init );
+        #endif // SIGPIPE
+    }; // if
+} // __kmp_install_signals
+
+
+void
+__kmp_remove_signals( void )
+{
+    int    sig;
+    KB_TRACE( 10, ( "__kmp_remove_signals()\n" ) );
+    for ( sig = 1; sig < NSIG; ++ sig ) {
+        __kmp_remove_one_handler( sig );
+    }; // for sig
+} // __kmp_remove_signals
+
+
+#endif // KMP_HANDLE_SIGNALS
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_enable( int new_state )
+{
+    #ifdef KMP_CANCEL_THREADS
+        int status, old_state;
+        status = pthread_setcancelstate( new_state, & old_state );
+        KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status );
+        KMP_DEBUG_ASSERT( old_state == PTHREAD_CANCEL_DISABLE );
+    #endif
+}
+
+void
+__kmp_disable( int * old_state )
+{
+    #ifdef KMP_CANCEL_THREADS
+        int status;
+        status = pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, old_state );
+        KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status );
+    #endif
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+static void
+__kmp_atfork_prepare (void)
+{
+    /*  nothing to do  */
+}
+
+static void
+__kmp_atfork_parent (void)
+{
+    /*  nothing to do  */
+}
+
+/*
+    Reset the library so execution in the child starts "all over again" with
+    clean data structures in initial states.  Don't worry about freeing memory
+    allocated by parent, just abandon it to be safe.
+*/
+static void
+__kmp_atfork_child (void)
+{
+    /* TODO make sure this is done right for nested/sibling */
+    // ATT:  Memory leaks are here? TODO: Check it and fix.
+    /* KMP_ASSERT( 0 ); */
+
+    ++__kmp_fork_count;
+
+    __kmp_init_runtime = FALSE;
+    __kmp_init_monitor = 0;
+    __kmp_init_parallel = FALSE;
+    __kmp_init_middle = FALSE;
+    __kmp_init_serial = FALSE;
+    TCW_4(__kmp_init_gtid, FALSE);
+    __kmp_init_common = FALSE;
+
+    TCW_4(__kmp_init_user_locks, FALSE);
+#if ! KMP_USE_DYNAMIC_LOCK
+    __kmp_user_lock_table.used = 1;
+    __kmp_user_lock_table.allocated = 0;
+    __kmp_user_lock_table.table = NULL;
+    __kmp_lock_blocks = NULL;
+#endif
+
+    __kmp_all_nth = 0;
+    TCW_4(__kmp_nth, 0);
+
+    /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate here
+       so threadprivate doesn't use stale data */
+    KA_TRACE( 10, ( "__kmp_atfork_child: checking cache address list %p\n",
+                 __kmp_threadpriv_cache_list ) );
+
+    while ( __kmp_threadpriv_cache_list != NULL ) {
+
+        if ( *__kmp_threadpriv_cache_list -> addr != NULL ) {
+            KC_TRACE( 50, ( "__kmp_atfork_child: zeroing cache at address %p\n",
+                        &(*__kmp_threadpriv_cache_list -> addr) ) );
+
+            *__kmp_threadpriv_cache_list -> addr = NULL;
+        }
+        __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list -> next;
+    }
+
+    __kmp_init_runtime = FALSE;
+
+    /* reset statically initialized locks */
+    __kmp_init_bootstrap_lock( &__kmp_initz_lock );
+    __kmp_init_bootstrap_lock( &__kmp_stdio_lock );
+    __kmp_init_bootstrap_lock( &__kmp_console_lock );
+
+    /* This is necessary to make sure no stale data is left around */
+    /* AC: customers complain that we use unsafe routines in the atfork
+       handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
+       in dynamic_link when check the presence of shared tbbmalloc library.
+       Suggestion is to make the library initialization lazier, similar
+       to what done for __kmpc_begin(). */
+    // TODO: synchronize all static initializations with regular library
+    //       startup; look at kmp_global.c and etc.
+    //__kmp_internal_begin ();
+
+}
+
+void
+__kmp_register_atfork(void) {
+    if ( __kmp_need_register_atfork ) {
+        int status = pthread_atfork( __kmp_atfork_prepare, __kmp_atfork_parent, __kmp_atfork_child );
+        KMP_CHECK_SYSFAIL( "pthread_atfork", status );
+        __kmp_need_register_atfork = FALSE;
+    }
+}
+
+void
+__kmp_suspend_initialize( void )
+{
+    int status;
+    status = pthread_mutexattr_init( &__kmp_suspend_mutex_attr );
+    KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status );
+    status = pthread_condattr_init( &__kmp_suspend_cond_attr );
+    KMP_CHECK_SYSFAIL( "pthread_condattr_init", status );
+}
+
+static void
+__kmp_suspend_initialize_thread( kmp_info_t *th )
+{
+    if ( th->th.th_suspend_init_count <= __kmp_fork_count ) {
+        /* this means we haven't initialized the suspension pthread objects for this thread
+           in this instance of the process */
+        int     status;
+        status = pthread_cond_init( &th->th.th_suspend_cv.c_cond, &__kmp_suspend_cond_attr );
+        KMP_CHECK_SYSFAIL( "pthread_cond_init", status );
+        status = pthread_mutex_init( &th->th.th_suspend_mx.m_mutex, & __kmp_suspend_mutex_attr );
+        KMP_CHECK_SYSFAIL( "pthread_mutex_init", status );
+        *(volatile int*)&th->th.th_suspend_init_count = __kmp_fork_count + 1;
+    };
+}
+
+void
+__kmp_suspend_uninitialize_thread( kmp_info_t *th )
+{
+    if(th->th.th_suspend_init_count > __kmp_fork_count) {
+        /* this means we have initialize the suspension pthread objects for this thread
+           in this instance of the process */
+        int status;
+
+        status = pthread_cond_destroy( &th->th.th_suspend_cv.c_cond );
+        if ( status != 0 && status != EBUSY ) {
+            KMP_SYSFAIL( "pthread_cond_destroy", status );
+        };
+        status = pthread_mutex_destroy( &th->th.th_suspend_mx.m_mutex );
+        if ( status != 0 && status != EBUSY ) {
+            KMP_SYSFAIL( "pthread_mutex_destroy", status );
+        };
+        --th->th.th_suspend_init_count;
+        KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count);
+    }
+}
+
+/* This routine puts the calling thread to sleep after setting the
+ * sleep bit for the indicated flag variable to true.
+ */
+template <class C>
+static inline void __kmp_suspend_template( int th_gtid, C *flag )
+{
+    KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
+    kmp_info_t *th = __kmp_threads[th_gtid];
+    int status;
+    typename C::flag_t old_spin;
+
+    KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, flag->get() ) );
+
+    __kmp_suspend_initialize_thread( th );
+
+    status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status );
+
+    KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
+                    th_gtid, flag->get() ) );
+
+    /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
+       gets called first?
+    */
+    old_spin = flag->set_sleeping();
+
+    KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x, was %x\n",
+                   th_gtid, flag->get(), *(flag->get()), old_spin ) );
+
+    if ( flag->done_check_val(old_spin) ) {
+        old_spin = flag->unset_sleeping();
+        KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for spin(%p)\n",
+                       th_gtid, flag->get()) );
+    } else {
+        /* Encapsulate in a loop as the documentation states that this may
+         * "with low probability" return when the condition variable has
+         * not been signaled or broadcast
+         */
+        int deactivated = FALSE;
+        TCW_PTR(th->th.th_sleep_loc, (void *)flag);
+        while ( flag->is_sleeping() ) {
+#ifdef DEBUG_SUSPEND
+            char buffer[128];
+            __kmp_suspend_count++;
+            __kmp_print_cond( buffer, &th->th.th_suspend_cv );
+            __kmp_printf( "__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, buffer );
+#endif
+            // Mark the thread as no longer active (only in the first iteration of the loop).
+            if ( ! deactivated ) {
+                th->th.th_active = FALSE;
+                if ( th->th.th_active_in_pool ) {
+                    th->th.th_active_in_pool = FALSE;
+                    KMP_TEST_THEN_DEC32(
+                      (kmp_int32 *) &__kmp_thread_pool_active_nth );
+                    KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
+                }
+                deactivated = TRUE;
+
+
+            }
+
+#if USE_SUSPEND_TIMEOUT
+            struct timespec  now;
+            struct timeval   tval;
+            int msecs;
+
+            status = gettimeofday( &tval, NULL );
+            KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status );
+            TIMEVAL_TO_TIMESPEC( &tval, &now );
+
+            msecs = (4*__kmp_dflt_blocktime) + 200;
+            now.tv_sec  += msecs / 1000;
+            now.tv_nsec += (msecs % 1000)*1000;
+
+            KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_timedwait\n",
+                            th_gtid ) );
+            status = pthread_cond_timedwait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex, & now );
+#else
+            KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_wait\n",
+                            th_gtid ) );
+            status = pthread_cond_wait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex );
+#endif
+
+            if ( (status != 0) && (status != EINTR) && (status != ETIMEDOUT) ) {
+                KMP_SYSFAIL( "pthread_cond_wait", status );
+            }
+#ifdef KMP_DEBUG
+            if (status == ETIMEDOUT) {
+                if ( flag->is_sleeping() ) {
+                    KF_TRACE( 100, ( "__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid ) );
+                } else {
+                    KF_TRACE( 2, ( "__kmp_suspend_template: T#%d timeout wakeup, sleep bit not set!\n",
+                                   th_gtid ) );
+                }
+            } else if ( flag->is_sleeping() ) {
+                KF_TRACE( 100, ( "__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ) );
+            }
+#endif
+        } // while
+
+        // Mark the thread as active again (if it was previous marked as inactive)
+        if ( deactivated ) {
+            th->th.th_active = TRUE;
+            if ( TCR_4(th->th.th_in_pool) ) {
+                KMP_TEST_THEN_INC32( (kmp_int32 *) &__kmp_thread_pool_active_nth );
+                th->th.th_active_in_pool = TRUE;
+            }
+        }
+    }
+
+#ifdef DEBUG_SUSPEND
+    {
+        char buffer[128];
+        __kmp_print_cond( buffer, &th->th.th_suspend_cv);
+        __kmp_printf( "__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, buffer );
+    }
+#endif
+
+
+    status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status );
+
+    KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) );
+}
+
+void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
+    __kmp_suspend_template(th_gtid, flag);
+}
+void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
+    __kmp_suspend_template(th_gtid, flag);
+}
+void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
+    __kmp_suspend_template(th_gtid, flag);
+}
+
+
+/* This routine signals the thread specified by target_gtid to wake up
+ * after setting the sleep bit indicated by the flag argument to FALSE.
+ * The target thread must already have called __kmp_suspend_template()
+ */
+template <class C>
+static inline void __kmp_resume_template( int target_gtid, C *flag )
+{
+    KMP_TIME_DEVELOPER_BLOCK(USER_resume);
+    kmp_info_t *th = __kmp_threads[target_gtid];
+    int status;
+
+#ifdef KMP_DEBUG
+    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
+#endif
+
+    KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) );
+    KMP_DEBUG_ASSERT( gtid != target_gtid );
+
+    __kmp_suspend_initialize_thread( th );
+
+    status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status );
+
+    if (!flag) { // coming from __kmp_null_resume_wrapper
+        flag = (C *)th->th.th_sleep_loc;
+    }
+
+    // First, check if the flag is null or its type has changed. If so, someone else woke it up.
+    if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to
+        KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p)\n",
+                       gtid, target_gtid, NULL ) );
+        status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex );
+        KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status );
+        return;
+    }
+    else { // if multiple threads are sleeping, flag should be internally referring to a specific thread here
+        typename C::flag_t old_spin = flag->unset_sleeping();
+        if ( ! flag->is_sleeping_val(old_spin) ) {
+            KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p): "
+                           "%u => %u\n",
+                           gtid, target_gtid, flag->get(), old_spin, *flag->get() ) );
+
+            status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex );
+            KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status );
+            return;
+        }
+        KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p): "
+                       "%u => %u\n",
+                       gtid, target_gtid, flag->get(), old_spin, *flag->get() ) );
+    }
+    TCW_PTR(th->th.th_sleep_loc, NULL);
+
+
+#ifdef DEBUG_SUSPEND
+    {
+        char buffer[128];
+        __kmp_print_cond( buffer, &th->th.th_suspend_cv );
+        __kmp_printf( "__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, target_gtid, buffer );
+    }
+#endif
+
+
+    status = pthread_cond_signal( &th->th.th_suspend_cv.c_cond );
+    KMP_CHECK_SYSFAIL( "pthread_cond_signal", status );
+    status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status );
+    KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n",
+                    gtid, target_gtid ) );
+}
+
+void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
+    __kmp_resume_template(target_gtid, flag);
+}
+void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
+    __kmp_resume_template(target_gtid, flag);
+}
+void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
+    __kmp_resume_template(target_gtid, flag);
+}
+
+void
+__kmp_resume_monitor()
+{
+    int status;
+#ifdef KMP_DEBUG
+    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
+    KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n",
+                    gtid, KMP_GTID_MONITOR ) );
+    KMP_DEBUG_ASSERT( gtid != KMP_GTID_MONITOR );
+#endif
+    status = pthread_mutex_lock( &__kmp_wait_mx.m_mutex );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status );
+#ifdef DEBUG_SUSPEND
+    {
+        char buffer[128];
+        __kmp_print_cond( buffer, &__kmp_wait_cv.c_cond );
+        __kmp_printf( "__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, KMP_GTID_MONITOR, buffer );
+    }
+#endif
+    status = pthread_cond_signal( &__kmp_wait_cv.c_cond );
+    KMP_CHECK_SYSFAIL( "pthread_cond_signal", status );
+    status = pthread_mutex_unlock( &__kmp_wait_mx.m_mutex );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status );
+    KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d exiting after signaling wake up for T#%d\n",
+                    gtid, KMP_GTID_MONITOR ) );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_yield( int cond )
+{
+    if (cond && __kmp_yielding_on) {
+        sched_yield();
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_gtid_set_specific( int gtid )
+{
+    int status;
+    KMP_ASSERT( __kmp_init_runtime );
+    status = pthread_setspecific( __kmp_gtid_threadprivate_key, (void*)(intptr_t)(gtid+1) );
+    KMP_CHECK_SYSFAIL( "pthread_setspecific", status );
+}
+
+int
+__kmp_gtid_get_specific()
+{
+    int gtid;
+    if ( !__kmp_init_runtime ) {
+        KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) );
+        return KMP_GTID_SHUTDOWN;
+    }
+    gtid = (int)(size_t)pthread_getspecific( __kmp_gtid_threadprivate_key );
+    if ( gtid == 0 ) {
+        gtid = KMP_GTID_DNE;
+    }
+    else {
+        gtid--;
+    }
+    KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
+               __kmp_gtid_threadprivate_key, gtid ));
+    return gtid;
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+double
+__kmp_read_cpu_time( void )
+{
+    /*clock_t   t;*/
+    struct tms  buffer;
+
+    /*t =*/  times( & buffer );
+
+    return (buffer.tms_utime + buffer.tms_cutime) / (double) CLOCKS_PER_SEC;
+}
+
+int
+__kmp_read_system_info( struct kmp_sys_info *info )
+{
+    int status;
+    struct rusage r_usage;
+
+    memset( info, 0, sizeof( *info ) );
+
+    status = getrusage( RUSAGE_SELF, &r_usage);
+    KMP_CHECK_SYSFAIL_ERRNO( "getrusage", status );
+
+    info->maxrss  = r_usage.ru_maxrss;  /* the maximum resident set size utilized (in kilobytes)     */
+    info->minflt  = r_usage.ru_minflt;  /* the number of page faults serviced without any I/O        */
+    info->majflt  = r_usage.ru_majflt;  /* the number of page faults serviced that required I/O      */
+    info->nswap   = r_usage.ru_nswap;   /* the number of times a process was "swapped" out of memory */
+    info->inblock = r_usage.ru_inblock; /* the number of times the file system had to perform input  */
+    info->oublock = r_usage.ru_oublock; /* the number of times the file system had to perform output */
+    info->nvcsw   = r_usage.ru_nvcsw;   /* the number of times a context switch was voluntarily      */
+    info->nivcsw  = r_usage.ru_nivcsw;  /* the number of times a context switch was forced           */
+
+    return (status != 0);
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_read_system_time( double *delta )
+{
+    double              t_ns;
+    struct timeval      tval;
+    struct timespec     stop;
+    int status;
+
+    status = gettimeofday( &tval, NULL );
+    KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status );
+    TIMEVAL_TO_TIMESPEC( &tval, &stop );
+    t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start);
+    *delta = (t_ns * 1e-9);
+}
+
+void
+__kmp_clear_system_time( void )
+{
+    struct timeval tval;
+    int status;
+    status = gettimeofday( &tval, NULL );
+    KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status );
+    TIMEVAL_TO_TIMESPEC( &tval, &__kmp_sys_timer_data.start );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#ifdef BUILD_TV
+
+void
+__kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr )
+{
+    struct tv_data *p;
+
+    p = (struct tv_data *) __kmp_allocate( sizeof( *p ) );
+
+    p->u.tp.global_addr = global_addr;
+    p->u.tp.thread_addr = thread_addr;
+
+    p->type = (void *) 1;
+
+    p->next =  th->th.th_local.tv_data;
+    th->th.th_local.tv_data = p;
+
+    if ( p->next == 0 ) {
+        int rc = pthread_setspecific( __kmp_tv_key, p );
+        KMP_CHECK_SYSFAIL( "pthread_setspecific", rc );
+    }
+}
+
+#endif /* BUILD_TV */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+static int
+__kmp_get_xproc( void ) {
+
+    int r = 0;
+
+    #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
+
+        r = sysconf( _SC_NPROCESSORS_ONLN );
+
+    #elif KMP_OS_DARWIN
+
+        // Bug C77011 High "OpenMP Threads and number of active cores".
+
+        // Find the number of available CPUs.
+        kern_return_t          rc;
+        host_basic_info_data_t info;
+        mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT;
+        rc = host_info( mach_host_self(), HOST_BASIC_INFO, (host_info_t) & info, & num );
+        if ( rc == 0 && num == HOST_BASIC_INFO_COUNT ) {
+            // Cannot use KA_TRACE() here because this code works before trace support is
+            // initialized.
+            r = info.avail_cpus;
+        } else {
+            KMP_WARNING( CantGetNumAvailCPU );
+            KMP_INFORM( AssumedNumCPU );
+        }; // if
+
+    #else
+
+        #error "Unknown or unsupported OS."
+
+    #endif
+
+    return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
+
+} // __kmp_get_xproc
+
+int
+__kmp_read_from_file( char const *path, char const *format, ... )
+{
+    int result;
+    va_list args;
+
+    va_start(args, format);
+    FILE *f = fopen(path, "rb");
+    if ( f == NULL )
+        return 0;
+    result = vfscanf(f, format, args);
+    fclose(f);
+
+    return result;
+}
+
+void
+__kmp_runtime_initialize( void )
+{
+    int status;
+    pthread_mutexattr_t mutex_attr;
+    pthread_condattr_t  cond_attr;
+
+    if ( __kmp_init_runtime ) {
+        return;
+    }; // if
+
+    #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
+        if ( ! __kmp_cpuinfo.initialized ) {
+            __kmp_query_cpuid( &__kmp_cpuinfo );
+        }; // if
+    #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+    __kmp_xproc = __kmp_get_xproc();
+
+    if ( sysconf( _SC_THREADS ) ) {
+
+        /* Query the maximum number of threads */
+        __kmp_sys_max_nth = sysconf( _SC_THREAD_THREADS_MAX );
+        if ( __kmp_sys_max_nth == -1 ) {
+            /* Unlimited threads for NPTL */
+            __kmp_sys_max_nth = INT_MAX;
+        }
+        else if ( __kmp_sys_max_nth <= 1 ) {
+            /* Can't tell, just use PTHREAD_THREADS_MAX */
+            __kmp_sys_max_nth = KMP_MAX_NTH;
+        }
+
+        /* Query the minimum stack size */
+        __kmp_sys_min_stksize = sysconf( _SC_THREAD_STACK_MIN );
+        if ( __kmp_sys_min_stksize <= 1 ) {
+            __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
+        }
+    }
+
+    /* Set up minimum number of threads to switch to TLS gtid */
+    __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
+
+    #ifdef BUILD_TV
+        {
+            int rc = pthread_key_create( & __kmp_tv_key, 0 );
+            KMP_CHECK_SYSFAIL( "pthread_key_create", rc );
+        }
+    #endif
+
+    status = pthread_key_create( &__kmp_gtid_threadprivate_key, __kmp_internal_end_dest );
+    KMP_CHECK_SYSFAIL( "pthread_key_create", status );
+    status = pthread_mutexattr_init( & mutex_attr );
+    KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status );
+    status = pthread_mutex_init( & __kmp_wait_mx.m_mutex, & mutex_attr );
+    KMP_CHECK_SYSFAIL( "pthread_mutex_init", status );
+    status = pthread_condattr_init( & cond_attr );
+    KMP_CHECK_SYSFAIL( "pthread_condattr_init", status );
+    status = pthread_cond_init( & __kmp_wait_cv.c_cond, & cond_attr );
+    KMP_CHECK_SYSFAIL( "pthread_cond_init", status );
+#if USE_ITT_BUILD
+    __kmp_itt_initialize();
+#endif /* USE_ITT_BUILD */
+
+    __kmp_init_runtime = TRUE;
+}
+
+void
+__kmp_runtime_destroy( void )
+{
+    int status;
+
+    if ( ! __kmp_init_runtime ) {
+        return; // Nothing to do.
+    };
+
+#if USE_ITT_BUILD
+    __kmp_itt_destroy();
+#endif /* USE_ITT_BUILD */
+
+    status = pthread_key_delete( __kmp_gtid_threadprivate_key );
+    KMP_CHECK_SYSFAIL( "pthread_key_delete", status );
+    #ifdef BUILD_TV
+        status = pthread_key_delete( __kmp_tv_key );
+        KMP_CHECK_SYSFAIL( "pthread_key_delete", status );
+    #endif
+
+    status = pthread_mutex_destroy( & __kmp_wait_mx.m_mutex );
+    if ( status != 0 && status != EBUSY ) {
+        KMP_SYSFAIL( "pthread_mutex_destroy", status );
+    }
+    status = pthread_cond_destroy( & __kmp_wait_cv.c_cond );
+    if ( status != 0 && status != EBUSY ) {
+        KMP_SYSFAIL( "pthread_cond_destroy", status );
+    }
+    #if KMP_AFFINITY_SUPPORTED
+        __kmp_affinity_uninitialize();
+    #endif
+
+    __kmp_init_runtime = FALSE;
+}
+
+
+/* Put the thread to sleep for a time period */
+/* NOTE: not currently used anywhere */
+void
+__kmp_thread_sleep( int millis )
+{
+    sleep(  ( millis + 500 ) / 1000 );
+}
+
+/* Calculate the elapsed wall clock time for the user */
+void
+__kmp_elapsed( double *t )
+{
+    int status;
+# ifdef FIX_SGI_CLOCK
+    struct timespec ts;
+
+    status = clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts );
+    KMP_CHECK_SYSFAIL_ERRNO( "clock_gettime", status );
+    *t = (double) ts.tv_nsec * (1.0 / (double) KMP_NSEC_PER_SEC) +
+        (double) ts.tv_sec;
+# else
+    struct timeval tv;
+
+    status = gettimeofday( & tv, NULL );
+    KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status );
+    *t = (double) tv.tv_usec * (1.0 / (double) KMP_USEC_PER_SEC) +
+        (double) tv.tv_sec;
+# endif
+}
+
+/* Calculate the elapsed wall clock tick for the user */
+void
+__kmp_elapsed_tick( double *t )
+{
+    *t = 1 / (double) CLOCKS_PER_SEC;
+}
+
+/*
+    Determine whether the given address is mapped into the current address space.
+*/
+
+int
+__kmp_is_address_mapped( void * addr ) {
+
+    int found = 0;
+    int rc;
+
+    #if KMP_OS_LINUX || KMP_OS_FREEBSD
+
+        /*
+            On Linux* OS, read the /proc/<pid>/maps pseudo-file to get all the address ranges mapped
+            into the address space.
+        */
+
+        char * name = __kmp_str_format( "/proc/%d/maps", getpid() );
+        FILE * file  = NULL;
+
+        file = fopen( name, "r" );
+        KMP_ASSERT( file != NULL );
+
+        for ( ; ; ) {
+
+            void * beginning = NULL;
+            void * ending    = NULL;
+            char   perms[ 5 ];
+
+            rc = fscanf( file, "%p-%p %4s %*[^\n]\n", & beginning, & ending, perms );
+            if ( rc == EOF ) {
+                break;
+            }; // if
+            KMP_ASSERT( rc == 3 && KMP_STRLEN( perms ) == 4 ); // Make sure all fields are read.
+
+            // Ending address is not included in the region, but beginning is.
+            if ( ( addr >= beginning ) && ( addr < ending ) ) {
+                perms[ 2 ] = 0;    // 3th and 4th character does not matter.
+                if ( strcmp( perms, "rw" ) == 0 ) {
+                    // Memory we are looking for should be readable and writable.
+                    found = 1;
+                }; // if
+                break;
+            }; // if
+
+        }; // forever
+
+        // Free resources.
+        fclose( file );
+        KMP_INTERNAL_FREE( name );
+
+    #elif KMP_OS_DARWIN
+
+        /*
+            On OS X*, /proc pseudo filesystem is not available. Try to read memory using vm
+            interface.
+        */
+
+        int       buffer;
+        vm_size_t count;
+        rc =
+            vm_read_overwrite(
+                mach_task_self(),           // Task to read memory of.
+                (vm_address_t)( addr ),     // Address to read from.
+                1,                          // Number of bytes to be read.
+                (vm_address_t)( & buffer ), // Address of buffer to save read bytes in.
+                & count                     // Address of var to save number of read bytes in.
+            );
+        if ( rc == 0 ) {
+            // Memory successfully read.
+            found = 1;
+        }; // if
+
+    #elif KMP_OS_FREEBSD || KMP_OS_NETBSD
+
+        // FIXME(FreeBSD, NetBSD): Implement this
+        found = 1;
+
+    #else
+
+        #error "Unknown or unsupported OS"
+
+    #endif
+
+    return found;
+
+} // __kmp_is_address_mapped
+
+#ifdef USE_LOAD_BALANCE
+
+
+# if KMP_OS_DARWIN
+
+// The function returns the rounded value of the system load average
+// during given time interval which depends on the value of
+// __kmp_load_balance_interval variable (default is 60 sec, other values
+// may be 300 sec or 900 sec).
+// It returns -1 in case of error.
+int
+__kmp_get_load_balance( int max )
+{
+    double averages[3];
+    int ret_avg = 0;
+
+    int res = getloadavg( averages, 3 );
+
+    //Check __kmp_load_balance_interval to determine which of averages to use.
+    // getloadavg() may return the number of samples less than requested that is
+    // less than 3.
+    if ( __kmp_load_balance_interval < 180 && ( res >= 1 ) ) {
+        ret_avg = averages[0];// 1 min
+    } else if ( ( __kmp_load_balance_interval >= 180
+                  && __kmp_load_balance_interval < 600 ) && ( res >= 2 ) ) {
+        ret_avg = averages[1];// 5 min
+    } else if ( ( __kmp_load_balance_interval >= 600 ) && ( res == 3 ) ) {
+        ret_avg = averages[2];// 15 min
+    } else {// Error occurred
+        return -1;
+    }
+
+    return ret_avg;
+}
+
+# else // Linux* OS
+
+// The fuction returns number of running (not sleeping) threads, or -1 in case of error.
+// Error could be reported if Linux* OS kernel too old (without "/proc" support).
+// Counting running threads stops if max running threads encountered.
+int
+__kmp_get_load_balance( int max )
+{
+    static int permanent_error = 0;
+
+    static int     glb_running_threads          = 0;  /* Saved count of the running threads for the thread balance algortihm */
+    static double  glb_call_time = 0;  /* Thread balance algorithm call time */
+
+    int running_threads = 0;              // Number of running threads in the system.
+
+    DIR  *          proc_dir   = NULL;    // Handle of "/proc/" directory.
+    struct dirent * proc_entry = NULL;
+
+    kmp_str_buf_t   task_path;            // "/proc/<pid>/task/<tid>/" path.
+    DIR  *          task_dir   = NULL;    // Handle of "/proc/<pid>/task/<tid>/" directory.
+    struct dirent * task_entry = NULL;
+    int             task_path_fixed_len;
+
+    kmp_str_buf_t   stat_path;            // "/proc/<pid>/task/<tid>/stat" path.
+    int             stat_file = -1;
+    int             stat_path_fixed_len;
+
+    int total_processes = 0;              // Total number of processes in system.
+    int total_threads   = 0;              // Total number of threads in system.
+
+    double call_time = 0.0;
+
+    __kmp_str_buf_init( & task_path );
+    __kmp_str_buf_init( & stat_path );
+
+     __kmp_elapsed( & call_time );
+
+    if ( glb_call_time &&
+            ( call_time - glb_call_time < __kmp_load_balance_interval ) ) {
+        running_threads = glb_running_threads;
+        goto finish;
+    }
+
+    glb_call_time = call_time;
+
+    // Do not spend time on scanning "/proc/" if we have a permanent error.
+    if ( permanent_error ) {
+        running_threads = -1;
+        goto finish;
+    }; // if
+
+    if ( max <= 0 ) {
+        max = INT_MAX;
+    }; // if
+
+    // Open "/proc/" directory.
+    proc_dir = opendir( "/proc" );
+    if ( proc_dir == NULL ) {
+        // Cannot open "/prroc/". Probably the kernel does not support it. Return an error now and
+        // in subsequent calls.
+        running_threads = -1;
+        permanent_error = 1;
+        goto finish;
+    }; // if
+
+    // Initialize fixed part of task_path. This part will not change.
+    __kmp_str_buf_cat( & task_path, "/proc/", 6 );
+    task_path_fixed_len = task_path.used;    // Remember number of used characters.
+
+    proc_entry = readdir( proc_dir );
+    while ( proc_entry != NULL ) {
+        // Proc entry is a directory and name starts with a digit. Assume it is a process'
+        // directory.
+        if ( proc_entry->d_type == DT_DIR && isdigit( proc_entry->d_name[ 0 ] ) ) {
+
+            ++ total_processes;
+            // Make sure init process is the very first in "/proc", so we can replace
+            // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == 1.
+            // We are going to check that total_processes == 1 => d_name == "1" is true (where
+            // "=>" is implication). Since C++ does not have => operator, let us replace it with its
+            // equivalent: a => b == ! a || b.
+            KMP_DEBUG_ASSERT( total_processes != 1 || strcmp( proc_entry->d_name, "1" ) == 0 );
+
+            // Construct task_path.
+            task_path.used = task_path_fixed_len;    // Reset task_path to "/proc/".
+            __kmp_str_buf_cat( & task_path, proc_entry->d_name, KMP_STRLEN( proc_entry->d_name ) );
+            __kmp_str_buf_cat( & task_path, "/task", 5 );
+
+            task_dir = opendir( task_path.str );
+            if ( task_dir == NULL ) {
+                // Process can finish between reading "/proc/" directory entry and opening process'
+                // "task/" directory. So, in general case we should not complain, but have to skip
+                // this process and read the next one.
+                // But on systems with no "task/" support we will spend lot of time to scan "/proc/"
+                // tree again and again without any benefit. "init" process (its pid is 1) should
+                // exist always, so, if we cannot open "/proc/1/task/" directory, it means "task/"
+                // is not supported by kernel. Report an error now and in the future.
+                if ( strcmp( proc_entry->d_name, "1" ) == 0 ) {
+                    running_threads = -1;
+                    permanent_error = 1;
+                    goto finish;
+                }; // if
+            } else {
+                 // Construct fixed part of stat file path.
+                __kmp_str_buf_clear( & stat_path );
+                __kmp_str_buf_cat( & stat_path, task_path.str, task_path.used );
+                __kmp_str_buf_cat( & stat_path, "/", 1 );
+                stat_path_fixed_len = stat_path.used;
+
+                task_entry = readdir( task_dir );
+                while ( task_entry != NULL ) {
+                    // It is a directory and name starts with a digit.
+                    if ( proc_entry->d_type == DT_DIR && isdigit( task_entry->d_name[ 0 ] ) ) {
+
+                        ++ total_threads;
+
+                        // Consruct complete stat file path. Easiest way would be:
+                        //  __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, task_entry->d_name );
+                        // but seriae of __kmp_str_buf_cat works a bit faster.
+                        stat_path.used = stat_path_fixed_len;    // Reset stat path to its fixed part.
+                        __kmp_str_buf_cat( & stat_path, task_entry->d_name, KMP_STRLEN( task_entry->d_name ) );
+                        __kmp_str_buf_cat( & stat_path, "/stat", 5 );
+
+                        // Note: Low-level API (open/read/close) is used. High-level API
+                        // (fopen/fclose)  works ~ 30 % slower.
+                        stat_file = open( stat_path.str, O_RDONLY );
+                        if ( stat_file == -1 ) {
+                            // We cannot report an error because task (thread) can terminate just
+                            // before reading this file.
+                        } else {
+                            /*
+                                Content of "stat" file looks like:
+
+                                    24285 (program) S ...
+
+                                It is a single line (if program name does not include fanny
+                                symbols). First number is a thread id, then name of executable file
+                                name in paretheses, then state of the thread. We need just thread
+                                state.
+
+                                Good news: Length of program name is 15 characters max. Longer
+                                names are truncated.
+
+                                Thus, we need rather short buffer: 15 chars for program name +
+                                2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
+
+                                Bad news: Program name may contain special symbols like space,
+                                closing parenthesis, or even new line. This makes parsing "stat"
+                                file not 100 % reliable. In case of fanny program names parsing
+                                may fail (report incorrect thread state).
+
+                                Parsing "status" file looks more promissing (due to different
+                                file structure and escaping special symbols) but reading and
+                                parsing of "status" file works slower.
+
+                                -- ln
+                            */
+                            char buffer[ 65 ];
+                            int len;
+                            len = read( stat_file, buffer, sizeof( buffer ) - 1 );
+                            if ( len >= 0 ) {
+                                buffer[ len ] = 0;
+                                // Using scanf:
+                                //     sscanf( buffer, "%*d (%*s) %c ", & state );
+                                // looks very nice, but searching for a closing parenthesis works a
+                                // bit faster.
+                                char * close_parent = strstr( buffer, ") " );
+                                if ( close_parent != NULL ) {
+                                    char state = * ( close_parent + 2 );
+                                    if ( state == 'R' ) {
+                                        ++ running_threads;
+                                        if ( running_threads >= max ) {
+                                            goto finish;
+                                        }; // if
+                                    }; // if
+                                }; // if
+                            }; // if
+                            close( stat_file );
+                            stat_file = -1;
+                        }; // if
+                    }; // if
+                    task_entry = readdir( task_dir );
+                }; // while
+                closedir( task_dir );
+                task_dir = NULL;
+            }; // if
+        }; // if
+        proc_entry = readdir( proc_dir );
+    }; // while
+
+    //
+    // There _might_ be a timing hole where the thread executing this
+    // code get skipped in the load balance, and running_threads is 0.
+    // Assert in the debug builds only!!!
+    //
+    KMP_DEBUG_ASSERT( running_threads > 0 );
+    if ( running_threads <= 0 ) {
+        running_threads = 1;
+    }
+
+    finish: // Clean up and exit.
+        if ( proc_dir != NULL ) {
+            closedir( proc_dir );
+        }; // if
+        __kmp_str_buf_free( & task_path );
+        if ( task_dir != NULL ) {
+            closedir( task_dir );
+        }; // if
+        __kmp_str_buf_free( & stat_path );
+        if ( stat_file != -1 ) {
+            close( stat_file );
+        }; // if
+
+    glb_running_threads = running_threads;
+
+    return running_threads;
+
+} // __kmp_get_load_balance
+
+# endif // KMP_OS_DARWIN
+
+#endif // USE_LOAD_BALANCE
+
+#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC)
+
+// we really only need the case with 1 argument, because CLANG always build
+// a struct of pointers to shared variables referenced in the outlined function
+int
+__kmp_invoke_microtask( microtask_t pkfn,
+                        int gtid, int tid,
+                        int argc, void *p_argv[] 
+#if OMPT_SUPPORT
+                        , void **exit_frame_ptr
+#endif
+) 
+{
+#if OMPT_SUPPORT
+  *exit_frame_ptr = __builtin_frame_address(0);
+#endif
+
+  switch (argc) {
+  default:
+    fprintf(stderr, "Too many args to microtask: %d!\n", argc);
+    fflush(stderr);
+    exit(-1);
+  case 0:
+    (*pkfn)(&gtid, &tid);
+    break;
+  case 1:
+    (*pkfn)(&gtid, &tid, p_argv[0]);
+    break;
+  case 2:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
+    break;
+  case 3:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
+    break;
+  case 4:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
+    break;
+  case 5:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
+    break;
+  case 6:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5]);
+    break;
+  case 7:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6]);
+    break;
+  case 8:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7]);
+    break;
+  case 9:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
+    break;
+  case 10:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
+    break;
+  case 11:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
+    break;
+  case 12:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+            p_argv[11]);
+    break;
+  case 13:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+            p_argv[11], p_argv[12]);
+    break;
+  case 14:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+            p_argv[11], p_argv[12], p_argv[13]);
+    break;
+  case 15:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+            p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
+    break;
+  }
+
+#if OMPT_SUPPORT
+  *exit_frame_ptr = 0;
+#endif
+
+  return 1;
+}
+
+#endif
+
+// end of file //
+
diff --git a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm
index 507d093778..a4f9a38ae7 100644
--- a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm
+++ b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_asm.asm
@@ -1,1402 +1,1402 @@
-;  z_Windows_NT-586_asm.asm:  - microtasking routines specifically 
-;    written for IA-32 architecture and Intel(R) 64 running Windows* OS 
- 
-; 
-;//===----------------------------------------------------------------------===// 
-;// 
-;//                     The LLVM Compiler Infrastructure 
-;// 
-;// This file is dual licensed under the MIT and the University of Illinois Open 
-;// Source Licenses. See LICENSE.txt for details. 
-;// 
-;//===----------------------------------------------------------------------===// 
-; 
- 
-        TITLE   z_Windows_NT-586_asm.asm 
- 
-; ============================= IA-32 architecture ========================== 
-ifdef _M_IA32 
- 
-        .586P 
- 
-if @Version gt 510 
-        .model HUGE 
-else 
-_TEXT   SEGMENT PARA USE32 PUBLIC 'CODE' 
-_TEXT   ENDS 
-_DATA   SEGMENT DWORD USE32 PUBLIC 'DATA' 
-_DATA   ENDS 
-CONST   SEGMENT DWORD USE32 PUBLIC 'CONST' 
-CONST   ENDS 
-_BSS    SEGMENT DWORD USE32 PUBLIC 'BSS' 
-_BSS    ENDS 
-$$SYMBOLS       SEGMENT BYTE USE32 'DEBSYM' 
-$$SYMBOLS       ENDS 
-$$TYPES SEGMENT BYTE USE32 'DEBTYP' 
-$$TYPES ENDS 
-_TLS    SEGMENT DWORD USE32 PUBLIC 'TLS' 
-_TLS    ENDS 
-FLAT    GROUP _DATA, CONST, _BSS 
-        ASSUME  CS: FLAT, DS: FLAT, SS: FLAT 
-endif 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_x86_pause 
-; 
-; void 
-; __kmp_x86_pause( void ) 
-; 
- 
-PUBLIC  ___kmp_x86_pause 
-_p$ = 4 
-_d$ = 8 
-_TEXT   SEGMENT 
-        ALIGN 16 
-___kmp_x86_pause PROC NEAR 
- 
-        db      0f3H 
-        db      090H    ;; pause 
-        ret 
- 
-___kmp_x86_pause ENDP 
-_TEXT   ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_x86_cpuid 
-; 
-; void 
-; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 
-; 
- 
-PUBLIC  ___kmp_x86_cpuid 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_mode$  = 8 
-_mode2$ = 12 
-_p$     = 16 
-_eax$   = 0 
-_ebx$   = 4 
-_ecx$   = 8 
-_edx$   = 12 
- 
-___kmp_x86_cpuid PROC NEAR 
- 
-        push      ebp 
-        mov       ebp, esp 
- 
-        push      edi 
-        push      ebx 
-        push      ecx 
-        push      edx 
- 
-        mov	  eax, DWORD PTR _mode$[ebp] 
-        mov	  ecx, DWORD PTR _mode2$[ebp] 
-	cpuid					; Query the CPUID for the current processor 
- 
-        mov       edi, DWORD PTR _p$[ebp] 
-	mov 	  DWORD PTR _eax$[ edi ], eax 
-	mov 	  DWORD PTR _ebx$[ edi ], ebx 
-	mov 	  DWORD PTR _ecx$[ edi ], ecx 
-	mov 	  DWORD PTR _edx$[ edi ], edx 
- 
-        pop       edx 
-        pop       ecx 
-        pop       ebx 
-        pop       edi 
- 
-        mov       esp, ebp 
-        pop       ebp 
-        ret 
- 
-___kmp_x86_cpuid ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_test_then_add32 
-; 
-; kmp_int32 
-; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 
-; 
- 
-PUBLIC  ___kmp_test_then_add32 
-_p$ = 4 
-_d$ = 8 
-_TEXT   SEGMENT 
-        ALIGN 16 
-___kmp_test_then_add32 PROC NEAR 
- 
-        mov     eax, DWORD PTR _d$[esp] 
-        mov     ecx, DWORD PTR _p$[esp] 
-lock    xadd    DWORD PTR [ecx], eax 
-        ret 
- 
-___kmp_test_then_add32 ENDP 
-_TEXT   ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store8 
-; 
-; kmp_int8 
-; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store8 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_cv$ = 8 
-_sv$ = 12 
- 
-___kmp_compare_and_store8 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       al, BYTE PTR _cv$[esp] 
-        mov       dl, BYTE PTR _sv$[esp] 
-lock    cmpxchg   BYTE PTR [ecx], dl 
-        sete      al           ; if al == [ecx] set al = 1 else set al = 0 
-        and       eax, 1       ; sign extend previous instruction 
-        ret 
- 
-___kmp_compare_and_store8 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store16 
-; 
-; kmp_int16 
-; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store16 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_cv$ = 8 
-_sv$ = 12 
- 
-___kmp_compare_and_store16 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       ax, WORD PTR _cv$[esp] 
-        mov       dx, WORD PTR _sv$[esp] 
-lock    cmpxchg   WORD PTR [ecx], dx 
-        sete      al           ; if ax == [ecx] set al = 1 else set al = 0 
-        and       eax, 1       ; sign extend previous instruction 
-        ret 
- 
-___kmp_compare_and_store16 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store32 
-; 
-; kmp_int32 
-; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_cv$ = 8 
-_sv$ = 12 
- 
-___kmp_compare_and_store32 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       eax, DWORD PTR _cv$[esp] 
-        mov       edx, DWORD PTR _sv$[esp] 
-lock    cmpxchg   DWORD PTR [ecx], edx 
-        sete      al           ; if eax == [ecx] set al = 1 else set al = 0 
-        and       eax, 1       ; sign extend previous instruction 
-        ret 
- 
-___kmp_compare_and_store32 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store64 
-; 
-; kmp_int32 
-; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store64 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 8 
-_cv_low$ = 12 
-_cv_high$ = 16 
-_sv_low$ = 20 
-_sv_high$ = 24 
- 
-___kmp_compare_and_store64 PROC NEAR 
- 
-        push      ebp 
-        mov       ebp, esp 
-        push      ebx 
-        push      edi 
-        mov       edi, DWORD PTR _p$[ebp] 
-        mov       eax, DWORD PTR _cv_low$[ebp] 
-        mov       edx, DWORD PTR _cv_high$[ebp] 
-        mov       ebx, DWORD PTR _sv_low$[ebp] 
-        mov       ecx, DWORD PTR _sv_high$[ebp] 
-lock    cmpxchg8b QWORD PTR [edi] 
-        sete      al           ; if edx:eax == [edi] set al = 1 else set al = 0 
-        and       eax, 1       ; sign extend previous instruction 
-        pop       edi 
-        pop       ebx 
-        mov       esp, ebp 
-        pop       ebp 
-        ret 
- 
-___kmp_compare_and_store64 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed8 
-; 
-; kmp_int8 
-; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 
-; 
- 
-PUBLIC  ___kmp_xchg_fixed8 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_d$ = 8 
- 
-___kmp_xchg_fixed8 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       al,  BYTE PTR _d$[esp] 
-lock    xchg      BYTE PTR [ecx], al 
-        ret 
- 
-___kmp_xchg_fixed8 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed16 
-; 
-; kmp_int16 
-; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 
-; 
- 
-PUBLIC  ___kmp_xchg_fixed16 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_d$ = 8 
- 
-___kmp_xchg_fixed16 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       ax,  WORD PTR  _d$[esp] 
-lock    xchg      WORD PTR [ecx], ax 
-        ret 
- 
-___kmp_xchg_fixed16 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed32 
-; 
-; kmp_int32 
-; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 
-; 
- 
-PUBLIC  ___kmp_xchg_fixed32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_d$ = 8 
- 
-___kmp_xchg_fixed32 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       eax, DWORD PTR _d$[esp] 
-lock    xchg      DWORD PTR [ecx], eax 
-        ret 
- 
-___kmp_xchg_fixed32 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_real32 
-; 
-; kmp_real32 
-; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); 
-; 
- 
-PUBLIC  ___kmp_xchg_real32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 8 
-_d$ = 12 
-_old_value$ = -4 
- 
-___kmp_xchg_real32 PROC NEAR 
- 
-        push    ebp 
-        mov     ebp, esp 
-        sub     esp, 4 
-        push    esi 
-        mov     esi, DWORD PTR _p$[ebp] 
- 
-        fld     DWORD PTR [esi] 
-                        ;; load <addr> 
-        fst     DWORD PTR _old_value$[ebp] 
-                        ;; store into old_value 
- 
-        mov     eax, DWORD PTR _d$[ebp] 
- 
-lock    xchg    DWORD PTR [esi], eax 
- 
-        fld     DWORD PTR _old_value$[ebp] 
-                        ;; return old_value 
-        pop     esi 
-        mov     esp, ebp 
-        pop     ebp 
-        ret 
- 
-___kmp_xchg_real32 ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store_ret8 
-; 
-; kmp_int8 
-; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store_ret8 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_cv$ = 8 
-_sv$ = 12 
- 
-___kmp_compare_and_store_ret8 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       al, BYTE PTR _cv$[esp] 
-        mov       dl, BYTE PTR _sv$[esp] 
-lock    cmpxchg   BYTE PTR [ecx], dl 
-        ret 
- 
-___kmp_compare_and_store_ret8 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store_ret16 
-; 
-; kmp_int16 
-; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store_ret16 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_cv$ = 8 
-_sv$ = 12 
- 
-___kmp_compare_and_store_ret16 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       ax, WORD PTR _cv$[esp] 
-        mov       dx, WORD PTR _sv$[esp] 
-lock    cmpxchg   WORD PTR [ecx], dx 
-        ret 
- 
-___kmp_compare_and_store_ret16 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store_ret32 
-; 
-; kmp_int32 
-; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store_ret32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
-_cv$ = 8 
-_sv$ = 12 
- 
-___kmp_compare_and_store_ret32 PROC NEAR 
- 
-        mov       ecx, DWORD PTR _p$[esp] 
-        mov       eax, DWORD PTR _cv$[esp] 
-        mov       edx, DWORD PTR _sv$[esp] 
-lock    cmpxchg   DWORD PTR [ecx], edx 
-        ret 
- 
-___kmp_compare_and_store_ret32 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_compare_and_store_ret64 
-; 
-; kmp_int64 
-; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-; 
- 
-PUBLIC  ___kmp_compare_and_store_ret64 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 8 
-_cv_low$ = 12 
-_cv_high$ = 16 
-_sv_low$ = 20 
-_sv_high$ = 24 
- 
-___kmp_compare_and_store_ret64 PROC NEAR 
- 
-        push      ebp 
-        mov       ebp, esp 
-        push      ebx 
-        push      edi 
-        mov       edi, DWORD PTR _p$[ebp] 
-        mov       eax, DWORD PTR _cv_low$[ebp] 
-        mov       edx, DWORD PTR _cv_high$[ebp] 
-        mov       ebx, DWORD PTR _sv_low$[ebp] 
-        mov       ecx, DWORD PTR _sv_high$[ebp] 
-lock    cmpxchg8b QWORD PTR [edi] 
-        pop       edi 
-        pop       ebx 
-        mov       esp, ebp 
-        pop       ebp 
-        ret 
- 
-___kmp_compare_and_store_ret64 ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_load_x87_fpu_control_word 
-; 
-; void 
-; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 
-; 
-; parameters: 
-;       p:      4(%esp) 
- 
-PUBLIC  ___kmp_load_x87_fpu_control_word 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
- 
-___kmp_load_x87_fpu_control_word PROC NEAR 
- 
-        mov       eax, DWORD PTR _p$[esp] 
-        fldcw     WORD PTR [eax] 
-        ret 
- 
-___kmp_load_x87_fpu_control_word ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_store_x87_fpu_control_word 
-; 
-; void 
-; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 
-; 
-; parameters: 
-;       p:      4(%esp) 
- 
-PUBLIC  ___kmp_store_x87_fpu_control_word 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_p$ = 4 
- 
-___kmp_store_x87_fpu_control_word PROC NEAR 
- 
-        mov       eax, DWORD PTR _p$[esp] 
-        fstcw     WORD PTR [eax] 
-        ret 
- 
-___kmp_store_x87_fpu_control_word ENDP 
-_TEXT     ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_clear_x87_fpu_status_word 
-; 
-; void 
-; __kmp_clear_x87_fpu_status_word(); 
-; 
- 
-PUBLIC  ___kmp_clear_x87_fpu_status_word 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-___kmp_clear_x87_fpu_status_word PROC NEAR 
- 
-        fnclex 
-        ret 
- 
-___kmp_clear_x87_fpu_status_word ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_invoke_microtask 
-; 
-; typedef void  (*microtask_t)( int *gtid, int *tid, ... ); 
-; 
-; int 
-; __kmp_invoke_microtask( microtask_t pkfn, 
-;                         int gtid, int tid, 
-;                         int argc, void *p_argv[] ) 
-; 
- 
-PUBLIC  ___kmp_invoke_microtask 
-_TEXT   SEGMENT 
-        ALIGN 16 
-_pkfn$ = 8 
-_gtid$ = 12 
-_tid$ = 16 
-_argc$ = 20 
-_argv$ = 24 
-if OMPT_SUPPORT 
-_exit_frame$ = 28 
-endif 
-_i$ = -8 
-_stk_adj$ = -16 
-_vptr$ = -12 
-_qptr$ = -4 
- 
-___kmp_invoke_microtask PROC NEAR 
-; Line 102 
-        push    ebp 
-        mov     ebp, esp 
-        sub     esp, 16                                 ; 00000010H 
-        push    ebx 
-        push    esi 
-        push    edi 
-if OMPT_SUPPORT 
-        mov     eax, DWORD PTR _exit_frame$[ebp] 
-        mov     DWORD PTR [eax], ebp 
-endif 
-; Line 114 
-        mov     eax, DWORD PTR _argc$[ebp] 
-        mov     DWORD PTR _i$[ebp], eax 
- 
-;; ------------------------------------------------------------ 
-	lea     edx, DWORD PTR [eax*4+8] 
-	mov     ecx, esp                                ; Save current SP into ECX 
-	mov	eax,edx		; Save the size of the args in eax 
-	sub	ecx,edx		; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this 
-	mov	edx,ecx		; Save to edx 
-	and	ecx,-128	; Mask off 7 bits 
-	sub	edx,ecx		; Amount to subtract from esp 
-	sub	esp,edx		; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call 
- 
-	add	edx,eax		; Calculate total size of the stack decrement. 
-        mov     DWORD PTR _stk_adj$[ebp], edx 
-;; ------------------------------------------------------------ 
- 
-        jmp     SHORT $L22237 
-$L22238: 
-        mov     ecx, DWORD PTR _i$[ebp] 
-        sub     ecx, 1 
-        mov     DWORD PTR _i$[ebp], ecx 
-$L22237: 
-        cmp     DWORD PTR _i$[ebp], 0 
-        jle     SHORT $L22239 
-; Line 116 
-        mov     edx, DWORD PTR _i$[ebp] 
-        mov     eax, DWORD PTR _argv$[ebp] 
-        mov     ecx, DWORD PTR [eax+edx*4-4] 
-        mov     DWORD PTR _vptr$[ebp], ecx 
-; Line 123 
-        mov     eax, DWORD PTR _vptr$[ebp] 
-; Line 124 
-        push    eax 
-; Line 127 
-        jmp     SHORT $L22238 
-$L22239: 
-; Line 129 
-        lea     edx, DWORD PTR _tid$[ebp] 
-        mov     DWORD PTR _vptr$[ebp], edx 
-; Line 130 
-        lea     eax, DWORD PTR _gtid$[ebp] 
-        mov     DWORD PTR _qptr$[ebp], eax 
-; Line 143 
-        mov     eax, DWORD PTR _vptr$[ebp] 
-; Line 144 
-        push    eax 
-; Line 145 
-        mov     eax, DWORD PTR _qptr$[ebp] 
-; Line 146 
-        push    eax 
-; Line 147 
-        call    DWORD PTR _pkfn$[ebp] 
-; Line 148 
-        add     esp, DWORD PTR _stk_adj$[ebp] 
-; Line 152 
-        mov     eax, 1 
-; Line 153 
-        pop     edi 
-        pop     esi 
-        pop     ebx 
-        mov     esp, ebp 
-        pop     ebp 
-        ret     0 
-___kmp_invoke_microtask ENDP 
-_TEXT   ENDS 
- 
-endif 
- 
-; ==================================== Intel(R) 64 =================================== 
- 
-ifdef _M_AMD64 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_x86_cpuid 
-; 
-; void 
-; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 
-; 
-; parameters: 
-;	mode:		ecx 
-;	mode2:		edx 
-;	cpuid_buffer: 	r8 
- 
-PUBLIC  __kmp_x86_cpuid 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_x86_cpuid PROC FRAME ;NEAR 
- 
-        push      rbp 
-        .pushreg  rbp 
-        mov       rbp, rsp 
-        .setframe rbp, 0 
-        push      rbx				; callee-save register 
-        .pushreg  rbx 
-        .ENDPROLOG 
- 
-	mov	  r10, r8                       ; p parameter 
-        mov	  eax, ecx			; mode parameter 
-        mov	  ecx, edx                      ; mode2 parameter 
-	cpuid					; Query the CPUID for the current processor 
- 
-	mov 	  DWORD PTR 0[ r10 ], eax	; store results into buffer 
-	mov 	  DWORD PTR 4[ r10 ], ebx 
-	mov 	  DWORD PTR 8[ r10 ], ecx 
-	mov 	  DWORD PTR 12[ r10 ], edx 
- 
-        pop       rbx				; callee-save register 
-        mov       rsp, rbp 
-        pop       rbp 
-        ret 
- 
-__kmp_x86_cpuid ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_test_then_add32 
-; 
-; kmp_int32 
-; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	edx 
-; 
-; return: 	eax 
- 
-PUBLIC  __kmp_test_then_add32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_test_then_add32 PROC ;NEAR 
- 
-        mov     eax, edx 
-lock    xadd    DWORD PTR [rcx], eax 
-        ret 
- 
-__kmp_test_then_add32 ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_test_then_add64 
-; 
-; kmp_int32 
-; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	rdx 
-; 
-; return: 	rax 
- 
-PUBLIC  __kmp_test_then_add64 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_test_then_add64 PROC ;NEAR 
- 
-        mov     rax, rdx 
-lock    xadd    QWORD PTR [rcx], rax 
-        ret 
- 
-__kmp_test_then_add64 ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store8 
-; 
-; kmp_int8 
-; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store8 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store8 PROC ;NEAR 
- 
-        mov       al, dl	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   BYTE PTR [rcx], dl 
-        sete      al           	; if al == [rcx] set al = 1 else set al = 0 
-        and       rax, 1       	; sign extend previous instruction 
-        ret 
- 
-__kmp_compare_and_store8 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store16 
-; 
-; kmp_int16 
-; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store16 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store16 PROC ;NEAR 
- 
-        mov       ax, dx	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   WORD PTR [rcx], dx 
-        sete      al           	; if ax == [rcx] set al = 1 else set al = 0 
-        and       rax, 1       	; sign extend previous instruction 
-        ret 
- 
-__kmp_compare_and_store16 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store32 
-; 
-; kmp_int32 
-; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store32 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store32 PROC ;NEAR 
- 
-        mov       eax, edx	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   DWORD PTR [rcx], edx 
-        sete      al           	; if eax == [rcx] set al = 1 else set al = 0 
-        and       rax, 1       	; sign extend previous instruction 
-        ret 
- 
-__kmp_compare_and_store32 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store64 
-; 
-; kmp_int32 
-; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	rdx 
-;	sv:	r8 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store64 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store64 PROC ;NEAR 
- 
-        mov       rax, rdx	; "cv" 
-	mov	  rdx, r8	; "sv" 
-lock    cmpxchg   QWORD PTR [rcx], rdx 
-        sete      al           ; if rax == [rcx] set al = 1 else set al = 0 
-        and       rax, 1       ; sign extend previous instruction 
-        ret 
- 
-__kmp_compare_and_store64 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed8 
-; 
-; kmp_int8 
-; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	dl 
-; 
-; return: 	al 
- 
-PUBLIC  __kmp_xchg_fixed8 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_xchg_fixed8 PROC ;NEAR 
- 
-        mov       al,  dl 
-lock    xchg      BYTE PTR [rcx], al 
-        ret 
- 
-__kmp_xchg_fixed8 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed16 
-; 
-; kmp_int16 
-; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	dx 
-; 
-; return: 	ax 
- 
-PUBLIC  __kmp_xchg_fixed16 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_xchg_fixed16 PROC ;NEAR 
- 
-        mov       ax,  dx 
-lock    xchg      WORD PTR [rcx], ax 
-        ret 
- 
-__kmp_xchg_fixed16 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed32 
-; 
-; kmp_int32 
-; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	edx 
-; 
-; return: 	eax 
- 
-PUBLIC  __kmp_xchg_fixed32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_xchg_fixed32 PROC ;NEAR 
- 
-        mov     eax, edx 
-lock    xchg    DWORD PTR [rcx], eax 
-        ret 
- 
-__kmp_xchg_fixed32 ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION ___kmp_xchg_fixed64 
-; 
-; kmp_int64 
-; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	rdx 
-; 
-; return: 	rax 
- 
-PUBLIC  __kmp_xchg_fixed64 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_xchg_fixed64 PROC ;NEAR 
- 
-        mov     rax, rdx 
-lock    xchg    QWORD PTR [rcx], rax 
-        ret 
- 
-__kmp_xchg_fixed64 ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store_ret8 
-; 
-; kmp_int8 
-; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store_ret8 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store_ret8 PROC ;NEAR 
-        mov       al, dl	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   BYTE PTR [rcx], dl 
-                        ; Compare AL with [rcx].  If equal set 
-                        ; ZF and exchange DL with [rcx].  Else, clear 
-                        ; ZF and load [rcx] into AL. 
-        ret 
- 
-__kmp_compare_and_store_ret8 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store_ret16 
-; 
-; kmp_int16 
-; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store_ret16 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store_ret16 PROC ;NEAR 
- 
-        mov       ax, dx	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   WORD PTR [rcx], dx 
-        ret 
- 
-__kmp_compare_and_store_ret16 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store_ret32 
-; 
-; kmp_int32 
-; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	eax 
- 
-PUBLIC  __kmp_compare_and_store_ret32 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store_ret32 PROC ;NEAR 
- 
-        mov       eax, edx	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   DWORD PTR [rcx], edx 
-        ret 
- 
-__kmp_compare_and_store_ret32 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store_ret64 
-; 
-; kmp_int64 
-; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	rdx 
-;	sv:	r8 
-; 
-; return:	rax 
- 
-PUBLIC  __kmp_compare_and_store_ret64 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store_ret64 PROC ;NEAR 
- 
-        mov       rax, rdx	; "cv" 
-	mov	  rdx, r8	; "sv" 
-lock    cmpxchg   QWORD PTR [rcx], rdx 
-        ret 
- 
-__kmp_compare_and_store_ret64 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_compare_and_store_loop8 
-; 
-; kmp_int8 
-; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 
-; parameters: 
-;	p:	rcx 
-;	cv:	edx 
-;	sv:	r8d 
-; 
-; return:	al 
- 
-PUBLIC  __kmp_compare_and_store_loop8 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_compare_and_store_loop8 PROC ;NEAR 
-$__kmp_loop: 
-        mov       al, dl	; "cv" 
-	mov	  edx, r8d	; "sv" 
-lock    cmpxchg   BYTE PTR [rcx], dl 
-                        ; Compare AL with [rcx].  If equal set 
-                        ; ZF and exchange DL with [rcx].  Else, clear 
-                        ; ZF and load [rcx] into AL. 
-        jz     	SHORT $__kmp_success 
- 
-        db      0f3H 
-        db      090H    		; pause 
- 
-	jmp	SHORT $__kmp_loop 
- 
-$__kmp_success: 
-        ret 
- 
-__kmp_compare_and_store_loop8 ENDP 
-_TEXT     ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_xchg_real32 
-; 
-; kmp_real32 
-; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;       d:	xmm1 (lower 4 bytes) 
-; 
-; return:	xmm0 (lower 4 bytes) 
- 
-PUBLIC  __kmp_xchg_real32 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_xchg_real32 PROC ;NEAR 
- 
-	movd	eax, xmm1		; load d 
- 
-lock    xchg    DWORD PTR [rcx], eax 
- 
-	movd	xmm0, eax		; load old value into return register 
-        ret 
- 
-__kmp_xchg_real32 ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_xchg_real64 
-; 
-; kmp_real64 
-; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d ); 
-; 
-; parameters: 
-;	p:	rcx 
-;	d:	xmm1 (lower 8 bytes) 
-; 
-; return:	xmm0 (lower 8 bytes) 
- 
-PUBLIC  __kmp_xchg_real64 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_xchg_real64 PROC ;NEAR 
- 
-	movd	rax, xmm1		; load "d" 
- 
-lock    xchg    QWORD PTR [rcx], rax 
- 
-	movd	xmm0, rax		; load old value into return register 
-        ret 
- 
-__kmp_xchg_real64 ENDP 
-_TEXT   ENDS 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_load_x87_fpu_control_word 
-; 
-; void 
-; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 
-; 
-; parameters: 
-;	p:	rcx 
-; 
- 
-PUBLIC  __kmp_load_x87_fpu_control_word 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_load_x87_fpu_control_word PROC ;NEAR 
- 
-        fldcw   WORD PTR [rcx] 
-        ret 
- 
-__kmp_load_x87_fpu_control_word ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_store_x87_fpu_control_word 
-; 
-; void 
-; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 
-; 
-; parameters: 
-;	p:	rcx 
-; 
- 
-PUBLIC  __kmp_store_x87_fpu_control_word 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_store_x87_fpu_control_word PROC ;NEAR 
- 
-        fstcw   WORD PTR [rcx] 
-        ret 
- 
-__kmp_store_x87_fpu_control_word ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_clear_x87_fpu_status_word 
-; 
-; void 
-; __kmp_clear_x87_fpu_status_word() 
-; 
- 
-PUBLIC  __kmp_clear_x87_fpu_status_word 
-_TEXT   SEGMENT 
-        ALIGN 16 
-__kmp_clear_x87_fpu_status_word PROC ;NEAR 
- 
-        fnclex 
-        ret 
- 
-__kmp_clear_x87_fpu_status_word ENDP 
-_TEXT   ENDS 
- 
- 
-;------------------------------------------------------------------------ 
-; 
-; FUNCTION __kmp_invoke_microtask 
-; 
-; typedef void  (*microtask_t)( int *gtid, int *tid, ... ); 
-; 
-; int 
-; __kmp_invoke_microtask( microtask_t pkfn, 
-;                         int gtid, int tid, 
-;                         int argc, void *p_argv[] ) { 
-; 
-;     (*pkfn) ( &gtid, &tid, argv[0], ... ); 
-;     return 1; 
-; } 
-; 
-; note: 
-;      just before call to pkfn must have rsp 128-byte aligned for compiler 
-; 
-; parameters: 
-;      rcx:   pkfn	16[rbp] 
-;      edx:   gtid	24[rbp] 
-;      r8d:   tid	32[rbp] 
-;      r9d:   argc	40[rbp] 
-;      [st]:  p_argv	48[rbp] 
-; 
-; reg temps: 
-;      rax:   used all over the place 
-;      rdx:   used all over the place 
-;      rcx:   used as argument counter for push parms loop 
-;      r10:   used to hold pkfn function pointer argument 
-; 
-; return:      eax    (always 1/TRUE) 
-; 
- 
-$_pkfn   = 16 
-$_gtid   = 24 
-$_tid    = 32 
-$_argc   = 40 
-$_p_argv = 48 
-if OMPT_SUPPORT 
-$_exit_frame = 56 
-endif 
- 
-PUBLIC  __kmp_invoke_microtask 
-_TEXT   SEGMENT 
-        ALIGN 16 
- 
-__kmp_invoke_microtask PROC FRAME ;NEAR 
-	mov	QWORD PTR 16[rsp], rdx	; home gtid parameter 
-	mov 	QWORD PTR 24[rsp], r8	; home tid parameter 
-        push    rbp		; save base pointer 
-        .pushreg rbp 
-	sub	rsp, 0		; no fixed allocation necessary - end prolog 
- 
-        lea     rbp, QWORD PTR [rsp]   	; establish the base pointer 
-        .setframe rbp, 0 
-        .ENDPROLOG 
-if OMPT_SUPPORT 
-        mov     rax, QWORD PTR $_exit_frame[rbp] 
-        mov     QWORD PTR [rax], rbp 
-endif 
-	mov	r10, rcx	; save pkfn pointer for later 
- 
-;; ------------------------------------------------------------ 
-        mov     rax, r9		; rax <= argc 
-        cmp     rax, 2 
-        jge     SHORT $_kmp_invoke_stack_align 
-        mov     rax, 2          ; set 4 homes if less than 2 parms 
-$_kmp_invoke_stack_align: 
-	lea     rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8 
-	mov     rax, rsp        ; Save current SP into rax 
-	sub	rax, rdx	; rsp - ((argc+2)*8) -> rax 
-				; without align, rsp would be this 
-	and     rax, -128       ; Mask off 7 bits (128-byte align) 
-	add     rax, rdx        ; add space for push's in a loop below 
-	mov     rsp, rax        ; Prepare the stack ptr 
-				; Now it will align to 128-byte at the call 
-;; ------------------------------------------------------------ 
-        			; setup pkfn parameter stack 
-	mov	rax, r9		; rax <= argc 
-	shl	rax, 3		; rax <= argc*8 
-	mov	rdx, QWORD PTR $_p_argv[rbp]	; rdx <= p_argv 
-	add	rdx, rax	; rdx <= &p_argv[argc] 
-	mov	rcx, r9		; rcx <= argc 
-	jecxz	SHORT $_kmp_invoke_pass_parms	; nothing to push if argc=0 
-	cmp	ecx, 1		; if argc=1 branch ahead 
-	je	SHORT $_kmp_invoke_one_parm 
-	sub	ecx, 2		; if argc=2 branch ahead, subtract two from 
-	je	SHORT $_kmp_invoke_two_parms 
- 
-$_kmp_invoke_push_parms:	; push last - 5th parms to pkfn on stack 
-	sub	rdx, 8		; decrement p_argv pointer to previous parm 
-	mov 	r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1] 
-	push	r8		; push p_argv[rcx-1] onto stack (reverse order) 
-	sub	ecx, 1 
-	jecxz	SHORT $_kmp_invoke_two_parms 
-	jmp	SHORT $_kmp_invoke_push_parms 
- 
-$_kmp_invoke_two_parms: 
-	sub	rdx, 8		; put 4th parm to pkfn in r9 
-	mov	r9, QWORD PTR [rdx] ; r9 <= p_argv[1] 
- 
-$_kmp_invoke_one_parm: 
-        sub	rdx, 8		; put 3rd parm to pkfn in r8 
-	mov	r8, QWORD PTR [rdx] ; r8 <= p_argv[0] 
- 
-$_kmp_invoke_pass_parms:	; put 1st & 2nd parms to pkfn in registers 
-	lea	rdx, QWORD PTR $_tid[rbp]  ; rdx <= &tid (2nd parm to pkfn) 
-	lea	rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn) 
-        sub     rsp, 32         ; add stack space for first four parms 
-	mov	rax, r10	; rax <= pkfn 
-	call	rax		; call (*pkfn)() 
-	mov	rax, 1		; move 1 into return register; 
- 
-        lea     rsp, QWORD PTR [rbp]	; restore stack pointer 
- 
-;	add	rsp, 0		; no fixed allocation necessary - start epilog 
-        pop     rbp		; restore frame pointer 
-        ret 
-__kmp_invoke_microtask ENDP 
-_TEXT   ENDS 
- 
-endif 
- 
-END 
+;  z_Windows_NT-586_asm.asm:  - microtasking routines specifically
+;    written for IA-32 architecture and Intel(R) 64 running Windows* OS
+
+;
+;//===----------------------------------------------------------------------===//
+;//
+;//                     The LLVM Compiler Infrastructure
+;//
+;// This file is dual licensed under the MIT and the University of Illinois Open
+;// Source Licenses. See LICENSE.txt for details.
+;//
+;//===----------------------------------------------------------------------===//
+;
+
+        TITLE   z_Windows_NT-586_asm.asm
+
+; ============================= IA-32 architecture ==========================
+ifdef _M_IA32
+
+        .586P
+
+if @Version gt 510
+        .model HUGE
+else
+_TEXT   SEGMENT PARA USE32 PUBLIC 'CODE'
+_TEXT   ENDS
+_DATA   SEGMENT DWORD USE32 PUBLIC 'DATA'
+_DATA   ENDS
+CONST   SEGMENT DWORD USE32 PUBLIC 'CONST'
+CONST   ENDS
+_BSS    SEGMENT DWORD USE32 PUBLIC 'BSS'
+_BSS    ENDS
+$$SYMBOLS       SEGMENT BYTE USE32 'DEBSYM'
+$$SYMBOLS       ENDS
+$$TYPES SEGMENT BYTE USE32 'DEBTYP'
+$$TYPES ENDS
+_TLS    SEGMENT DWORD USE32 PUBLIC 'TLS'
+_TLS    ENDS
+FLAT    GROUP _DATA, CONST, _BSS
+        ASSUME  CS: FLAT, DS: FLAT, SS: FLAT
+endif
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_x86_pause
+;
+; void
+; __kmp_x86_pause( void )
+;
+
+PUBLIC  ___kmp_x86_pause
+_p$ = 4
+_d$ = 8
+_TEXT   SEGMENT
+        ALIGN 16
+___kmp_x86_pause PROC NEAR
+
+        db      0f3H
+        db      090H    ;; pause
+        ret
+
+___kmp_x86_pause ENDP
+_TEXT   ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_x86_cpuid
+;
+; void
+; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
+;
+
+PUBLIC  ___kmp_x86_cpuid
+_TEXT   SEGMENT
+        ALIGN 16
+_mode$  = 8
+_mode2$ = 12
+_p$     = 16
+_eax$   = 0
+_ebx$   = 4
+_ecx$   = 8
+_edx$   = 12
+
+___kmp_x86_cpuid PROC NEAR
+
+        push      ebp
+        mov       ebp, esp
+
+        push      edi
+        push      ebx
+        push      ecx
+        push      edx
+
+        mov	  eax, DWORD PTR _mode$[ebp]
+        mov	  ecx, DWORD PTR _mode2$[ebp]
+	cpuid					; Query the CPUID for the current processor
+
+        mov       edi, DWORD PTR _p$[ebp]
+	mov 	  DWORD PTR _eax$[ edi ], eax
+	mov 	  DWORD PTR _ebx$[ edi ], ebx
+	mov 	  DWORD PTR _ecx$[ edi ], ecx
+	mov 	  DWORD PTR _edx$[ edi ], edx
+
+        pop       edx
+        pop       ecx
+        pop       ebx
+        pop       edi
+
+        mov       esp, ebp
+        pop       ebp
+        ret
+
+___kmp_x86_cpuid ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_test_then_add32
+;
+; kmp_int32
+; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
+;
+
+PUBLIC  ___kmp_test_then_add32
+_p$ = 4
+_d$ = 8
+_TEXT   SEGMENT
+        ALIGN 16
+___kmp_test_then_add32 PROC NEAR
+
+        mov     eax, DWORD PTR _d$[esp]
+        mov     ecx, DWORD PTR _p$[esp]
+lock    xadd    DWORD PTR [ecx], eax
+        ret
+
+___kmp_test_then_add32 ENDP
+_TEXT   ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store8
+;
+; kmp_int8
+; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store8
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_cv$ = 8
+_sv$ = 12
+
+___kmp_compare_and_store8 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       al, BYTE PTR _cv$[esp]
+        mov       dl, BYTE PTR _sv$[esp]
+lock    cmpxchg   BYTE PTR [ecx], dl
+        sete      al           ; if al == [ecx] set al = 1 else set al = 0
+        and       eax, 1       ; sign extend previous instruction
+        ret
+
+___kmp_compare_and_store8 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store16
+;
+; kmp_int16
+; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store16
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_cv$ = 8
+_sv$ = 12
+
+___kmp_compare_and_store16 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       ax, WORD PTR _cv$[esp]
+        mov       dx, WORD PTR _sv$[esp]
+lock    cmpxchg   WORD PTR [ecx], dx
+        sete      al           ; if ax == [ecx] set al = 1 else set al = 0
+        and       eax, 1       ; sign extend previous instruction
+        ret
+
+___kmp_compare_and_store16 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store32
+;
+; kmp_int32
+; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store32
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_cv$ = 8
+_sv$ = 12
+
+___kmp_compare_and_store32 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       eax, DWORD PTR _cv$[esp]
+        mov       edx, DWORD PTR _sv$[esp]
+lock    cmpxchg   DWORD PTR [ecx], edx
+        sete      al           ; if eax == [ecx] set al = 1 else set al = 0
+        and       eax, 1       ; sign extend previous instruction
+        ret
+
+___kmp_compare_and_store32 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store64
+;
+; kmp_int32
+; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store64
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 8
+_cv_low$ = 12
+_cv_high$ = 16
+_sv_low$ = 20
+_sv_high$ = 24
+
+___kmp_compare_and_store64 PROC NEAR
+
+        push      ebp
+        mov       ebp, esp
+        push      ebx
+        push      edi
+        mov       edi, DWORD PTR _p$[ebp]
+        mov       eax, DWORD PTR _cv_low$[ebp]
+        mov       edx, DWORD PTR _cv_high$[ebp]
+        mov       ebx, DWORD PTR _sv_low$[ebp]
+        mov       ecx, DWORD PTR _sv_high$[ebp]
+lock    cmpxchg8b QWORD PTR [edi]
+        sete      al           ; if edx:eax == [edi] set al = 1 else set al = 0
+        and       eax, 1       ; sign extend previous instruction
+        pop       edi
+        pop       ebx
+        mov       esp, ebp
+        pop       ebp
+        ret
+
+___kmp_compare_and_store64 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed8
+;
+; kmp_int8
+; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
+;
+
+PUBLIC  ___kmp_xchg_fixed8
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_d$ = 8
+
+___kmp_xchg_fixed8 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       al,  BYTE PTR _d$[esp]
+lock    xchg      BYTE PTR [ecx], al
+        ret
+
+___kmp_xchg_fixed8 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed16
+;
+; kmp_int16
+; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
+;
+
+PUBLIC  ___kmp_xchg_fixed16
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_d$ = 8
+
+___kmp_xchg_fixed16 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       ax,  WORD PTR  _d$[esp]
+lock    xchg      WORD PTR [ecx], ax
+        ret
+
+___kmp_xchg_fixed16 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed32
+;
+; kmp_int32
+; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
+;
+
+PUBLIC  ___kmp_xchg_fixed32
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_d$ = 8
+
+___kmp_xchg_fixed32 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       eax, DWORD PTR _d$[esp]
+lock    xchg      DWORD PTR [ecx], eax
+        ret
+
+___kmp_xchg_fixed32 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_real32
+;
+; kmp_real32
+; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
+;
+
+PUBLIC  ___kmp_xchg_real32
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 8
+_d$ = 12
+_old_value$ = -4
+
+___kmp_xchg_real32 PROC NEAR
+
+        push    ebp
+        mov     ebp, esp
+        sub     esp, 4
+        push    esi
+        mov     esi, DWORD PTR _p$[ebp]
+
+        fld     DWORD PTR [esi]
+                        ;; load <addr>
+        fst     DWORD PTR _old_value$[ebp]
+                        ;; store into old_value
+
+        mov     eax, DWORD PTR _d$[ebp]
+
+lock    xchg    DWORD PTR [esi], eax
+
+        fld     DWORD PTR _old_value$[ebp]
+                        ;; return old_value
+        pop     esi
+        mov     esp, ebp
+        pop     ebp
+        ret
+
+___kmp_xchg_real32 ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store_ret8
+;
+; kmp_int8
+; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store_ret8
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_cv$ = 8
+_sv$ = 12
+
+___kmp_compare_and_store_ret8 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       al, BYTE PTR _cv$[esp]
+        mov       dl, BYTE PTR _sv$[esp]
+lock    cmpxchg   BYTE PTR [ecx], dl
+        ret
+
+___kmp_compare_and_store_ret8 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store_ret16
+;
+; kmp_int16
+; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store_ret16
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_cv$ = 8
+_sv$ = 12
+
+___kmp_compare_and_store_ret16 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       ax, WORD PTR _cv$[esp]
+        mov       dx, WORD PTR _sv$[esp]
+lock    cmpxchg   WORD PTR [ecx], dx
+        ret
+
+___kmp_compare_and_store_ret16 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store_ret32
+;
+; kmp_int32
+; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store_ret32
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+_cv$ = 8
+_sv$ = 12
+
+___kmp_compare_and_store_ret32 PROC NEAR
+
+        mov       ecx, DWORD PTR _p$[esp]
+        mov       eax, DWORD PTR _cv$[esp]
+        mov       edx, DWORD PTR _sv$[esp]
+lock    cmpxchg   DWORD PTR [ecx], edx
+        ret
+
+___kmp_compare_and_store_ret32 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_compare_and_store_ret64
+;
+; kmp_int64
+; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+;
+
+PUBLIC  ___kmp_compare_and_store_ret64
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 8
+_cv_low$ = 12
+_cv_high$ = 16
+_sv_low$ = 20
+_sv_high$ = 24
+
+___kmp_compare_and_store_ret64 PROC NEAR
+
+        push      ebp
+        mov       ebp, esp
+        push      ebx
+        push      edi
+        mov       edi, DWORD PTR _p$[ebp]
+        mov       eax, DWORD PTR _cv_low$[ebp]
+        mov       edx, DWORD PTR _cv_high$[ebp]
+        mov       ebx, DWORD PTR _sv_low$[ebp]
+        mov       ecx, DWORD PTR _sv_high$[ebp]
+lock    cmpxchg8b QWORD PTR [edi]
+        pop       edi
+        pop       ebx
+        mov       esp, ebp
+        pop       ebp
+        ret
+
+___kmp_compare_and_store_ret64 ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_load_x87_fpu_control_word
+;
+; void
+; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
+;
+; parameters:
+;       p:      4(%esp)
+
+PUBLIC  ___kmp_load_x87_fpu_control_word
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+
+___kmp_load_x87_fpu_control_word PROC NEAR
+
+        mov       eax, DWORD PTR _p$[esp]
+        fldcw     WORD PTR [eax]
+        ret
+
+___kmp_load_x87_fpu_control_word ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_store_x87_fpu_control_word
+;
+; void
+; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
+;
+; parameters:
+;       p:      4(%esp)
+
+PUBLIC  ___kmp_store_x87_fpu_control_word
+_TEXT   SEGMENT
+        ALIGN 16
+_p$ = 4
+
+___kmp_store_x87_fpu_control_word PROC NEAR
+
+        mov       eax, DWORD PTR _p$[esp]
+        fstcw     WORD PTR [eax]
+        ret
+
+___kmp_store_x87_fpu_control_word ENDP
+_TEXT     ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_clear_x87_fpu_status_word
+;
+; void
+; __kmp_clear_x87_fpu_status_word();
+;
+
+PUBLIC  ___kmp_clear_x87_fpu_status_word
+_TEXT   SEGMENT
+        ALIGN 16
+
+___kmp_clear_x87_fpu_status_word PROC NEAR
+
+        fnclex
+        ret
+
+___kmp_clear_x87_fpu_status_word ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_invoke_microtask
+;
+; typedef void  (*microtask_t)( int *gtid, int *tid, ... );
+;
+; int
+; __kmp_invoke_microtask( microtask_t pkfn,
+;                         int gtid, int tid,
+;                         int argc, void *p_argv[] )
+;
+
+PUBLIC  ___kmp_invoke_microtask
+_TEXT   SEGMENT
+        ALIGN 16
+_pkfn$ = 8
+_gtid$ = 12
+_tid$ = 16
+_argc$ = 20
+_argv$ = 24
+if OMPT_SUPPORT
+_exit_frame$ = 28
+endif
+_i$ = -8
+_stk_adj$ = -16
+_vptr$ = -12
+_qptr$ = -4
+
+___kmp_invoke_microtask PROC NEAR
+; Line 102
+        push    ebp
+        mov     ebp, esp
+        sub     esp, 16                                 ; 00000010H
+        push    ebx
+        push    esi
+        push    edi
+if OMPT_SUPPORT
+        mov     eax, DWORD PTR _exit_frame$[ebp]
+        mov     DWORD PTR [eax], ebp
+endif
+; Line 114
+        mov     eax, DWORD PTR _argc$[ebp]
+        mov     DWORD PTR _i$[ebp], eax
+
+;; ------------------------------------------------------------
+	lea     edx, DWORD PTR [eax*4+8]
+	mov     ecx, esp                                ; Save current SP into ECX
+	mov	eax,edx		; Save the size of the args in eax
+	sub	ecx,edx		; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this
+	mov	edx,ecx		; Save to edx
+	and	ecx,-128	; Mask off 7 bits
+	sub	edx,ecx		; Amount to subtract from esp
+	sub	esp,edx		; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call
+
+	add	edx,eax		; Calculate total size of the stack decrement.
+        mov     DWORD PTR _stk_adj$[ebp], edx
+;; ------------------------------------------------------------
+
+        jmp     SHORT $L22237
+$L22238:
+        mov     ecx, DWORD PTR _i$[ebp]
+        sub     ecx, 1
+        mov     DWORD PTR _i$[ebp], ecx
+$L22237:
+        cmp     DWORD PTR _i$[ebp], 0
+        jle     SHORT $L22239
+; Line 116
+        mov     edx, DWORD PTR _i$[ebp]
+        mov     eax, DWORD PTR _argv$[ebp]
+        mov     ecx, DWORD PTR [eax+edx*4-4]
+        mov     DWORD PTR _vptr$[ebp], ecx
+; Line 123
+        mov     eax, DWORD PTR _vptr$[ebp]
+; Line 124
+        push    eax
+; Line 127
+        jmp     SHORT $L22238
+$L22239:
+; Line 129
+        lea     edx, DWORD PTR _tid$[ebp]
+        mov     DWORD PTR _vptr$[ebp], edx
+; Line 130
+        lea     eax, DWORD PTR _gtid$[ebp]
+        mov     DWORD PTR _qptr$[ebp], eax
+; Line 143
+        mov     eax, DWORD PTR _vptr$[ebp]
+; Line 144
+        push    eax
+; Line 145
+        mov     eax, DWORD PTR _qptr$[ebp]
+; Line 146
+        push    eax
+; Line 147
+        call    DWORD PTR _pkfn$[ebp]
+; Line 148
+        add     esp, DWORD PTR _stk_adj$[ebp]
+; Line 152
+        mov     eax, 1
+; Line 153
+        pop     edi
+        pop     esi
+        pop     ebx
+        mov     esp, ebp
+        pop     ebp
+        ret     0
+___kmp_invoke_microtask ENDP
+_TEXT   ENDS
+
+endif
+
+; ==================================== Intel(R) 64 ===================================
+
+ifdef _M_AMD64
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_x86_cpuid
+;
+; void
+; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
+;
+; parameters:
+;	mode:		ecx
+;	mode2:		edx
+;	cpuid_buffer: 	r8
+
+PUBLIC  __kmp_x86_cpuid
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_x86_cpuid PROC FRAME ;NEAR
+
+        push      rbp
+        .pushreg  rbp
+        mov       rbp, rsp
+        .setframe rbp, 0
+        push      rbx				; callee-save register
+        .pushreg  rbx
+        .ENDPROLOG
+
+	mov	  r10, r8                       ; p parameter
+        mov	  eax, ecx			; mode parameter
+        mov	  ecx, edx                      ; mode2 parameter
+	cpuid					; Query the CPUID for the current processor
+
+	mov 	  DWORD PTR 0[ r10 ], eax	; store results into buffer
+	mov 	  DWORD PTR 4[ r10 ], ebx
+	mov 	  DWORD PTR 8[ r10 ], ecx
+	mov 	  DWORD PTR 12[ r10 ], edx
+
+        pop       rbx				; callee-save register
+        mov       rsp, rbp
+        pop       rbp
+        ret
+
+__kmp_x86_cpuid ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_test_then_add32
+;
+; kmp_int32
+; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	edx
+;
+; return: 	eax
+
+PUBLIC  __kmp_test_then_add32
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_test_then_add32 PROC ;NEAR
+
+        mov     eax, edx
+lock    xadd    DWORD PTR [rcx], eax
+        ret
+
+__kmp_test_then_add32 ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_test_then_add64
+;
+; kmp_int32
+; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	rdx
+;
+; return: 	rax
+
+PUBLIC  __kmp_test_then_add64
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_test_then_add64 PROC ;NEAR
+
+        mov     rax, rdx
+lock    xadd    QWORD PTR [rcx], rax
+        ret
+
+__kmp_test_then_add64 ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store8
+;
+; kmp_int8
+; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store8
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store8 PROC ;NEAR
+
+        mov       al, dl	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   BYTE PTR [rcx], dl
+        sete      al           	; if al == [rcx] set al = 1 else set al = 0
+        and       rax, 1       	; sign extend previous instruction
+        ret
+
+__kmp_compare_and_store8 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store16
+;
+; kmp_int16
+; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store16
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store16 PROC ;NEAR
+
+        mov       ax, dx	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   WORD PTR [rcx], dx
+        sete      al           	; if ax == [rcx] set al = 1 else set al = 0
+        and       rax, 1       	; sign extend previous instruction
+        ret
+
+__kmp_compare_and_store16 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store32
+;
+; kmp_int32
+; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store32
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store32 PROC ;NEAR
+
+        mov       eax, edx	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   DWORD PTR [rcx], edx
+        sete      al           	; if eax == [rcx] set al = 1 else set al = 0
+        and       rax, 1       	; sign extend previous instruction
+        ret
+
+__kmp_compare_and_store32 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store64
+;
+; kmp_int32
+; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+; parameters:
+;	p:	rcx
+;	cv:	rdx
+;	sv:	r8
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store64
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store64 PROC ;NEAR
+
+        mov       rax, rdx	; "cv"
+	mov	  rdx, r8	; "sv"
+lock    cmpxchg   QWORD PTR [rcx], rdx
+        sete      al           ; if rax == [rcx] set al = 1 else set al = 0
+        and       rax, 1       ; sign extend previous instruction
+        ret
+
+__kmp_compare_and_store64 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed8
+;
+; kmp_int8
+; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	dl
+;
+; return: 	al
+
+PUBLIC  __kmp_xchg_fixed8
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_xchg_fixed8 PROC ;NEAR
+
+        mov       al,  dl
+lock    xchg      BYTE PTR [rcx], al
+        ret
+
+__kmp_xchg_fixed8 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed16
+;
+; kmp_int16
+; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	dx
+;
+; return: 	ax
+
+PUBLIC  __kmp_xchg_fixed16
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_xchg_fixed16 PROC ;NEAR
+
+        mov       ax,  dx
+lock    xchg      WORD PTR [rcx], ax
+        ret
+
+__kmp_xchg_fixed16 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed32
+;
+; kmp_int32
+; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	edx
+;
+; return: 	eax
+
+PUBLIC  __kmp_xchg_fixed32
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_xchg_fixed32 PROC ;NEAR
+
+        mov     eax, edx
+lock    xchg    DWORD PTR [rcx], eax
+        ret
+
+__kmp_xchg_fixed32 ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION ___kmp_xchg_fixed64
+;
+; kmp_int64
+; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	rdx
+;
+; return: 	rax
+
+PUBLIC  __kmp_xchg_fixed64
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_xchg_fixed64 PROC ;NEAR
+
+        mov     rax, rdx
+lock    xchg    QWORD PTR [rcx], rax
+        ret
+
+__kmp_xchg_fixed64 ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store_ret8
+;
+; kmp_int8
+; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store_ret8
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store_ret8 PROC ;NEAR
+        mov       al, dl	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   BYTE PTR [rcx], dl
+                        ; Compare AL with [rcx].  If equal set
+                        ; ZF and exchange DL with [rcx].  Else, clear
+                        ; ZF and load [rcx] into AL.
+        ret
+
+__kmp_compare_and_store_ret8 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store_ret16
+;
+; kmp_int16
+; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store_ret16
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store_ret16 PROC ;NEAR
+
+        mov       ax, dx	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   WORD PTR [rcx], dx
+        ret
+
+__kmp_compare_and_store_ret16 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store_ret32
+;
+; kmp_int32
+; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	eax
+
+PUBLIC  __kmp_compare_and_store_ret32
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store_ret32 PROC ;NEAR
+
+        mov       eax, edx	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   DWORD PTR [rcx], edx
+        ret
+
+__kmp_compare_and_store_ret32 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store_ret64
+;
+; kmp_int64
+; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+; parameters:
+;	p:	rcx
+;	cv:	rdx
+;	sv:	r8
+;
+; return:	rax
+
+PUBLIC  __kmp_compare_and_store_ret64
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store_ret64 PROC ;NEAR
+
+        mov       rax, rdx	; "cv"
+	mov	  rdx, r8	; "sv"
+lock    cmpxchg   QWORD PTR [rcx], rdx
+        ret
+
+__kmp_compare_and_store_ret64 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_compare_and_store_loop8
+;
+; kmp_int8
+; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+; parameters:
+;	p:	rcx
+;	cv:	edx
+;	sv:	r8d
+;
+; return:	al
+
+PUBLIC  __kmp_compare_and_store_loop8
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_compare_and_store_loop8 PROC ;NEAR
+$__kmp_loop:
+        mov       al, dl	; "cv"
+	mov	  edx, r8d	; "sv"
+lock    cmpxchg   BYTE PTR [rcx], dl
+                        ; Compare AL with [rcx].  If equal set
+                        ; ZF and exchange DL with [rcx].  Else, clear
+                        ; ZF and load [rcx] into AL.
+        jz     	SHORT $__kmp_success
+
+        db      0f3H
+        db      090H    		; pause
+
+	jmp	SHORT $__kmp_loop
+
+$__kmp_success:
+        ret
+
+__kmp_compare_and_store_loop8 ENDP
+_TEXT     ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_xchg_real32
+;
+; kmp_real32
+; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
+;
+; parameters:
+;	p:	rcx
+;       d:	xmm1 (lower 4 bytes)
+;
+; return:	xmm0 (lower 4 bytes)
+
+PUBLIC  __kmp_xchg_real32
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_xchg_real32 PROC ;NEAR
+
+	movd	eax, xmm1		; load d
+
+lock    xchg    DWORD PTR [rcx], eax
+
+	movd	xmm0, eax		; load old value into return register
+        ret
+
+__kmp_xchg_real32 ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_xchg_real64
+;
+; kmp_real64
+; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d );
+;
+; parameters:
+;	p:	rcx
+;	d:	xmm1 (lower 8 bytes)
+;
+; return:	xmm0 (lower 8 bytes)
+
+PUBLIC  __kmp_xchg_real64
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_xchg_real64 PROC ;NEAR
+
+	movd	rax, xmm1		; load "d"
+
+lock    xchg    QWORD PTR [rcx], rax
+
+	movd	xmm0, rax		; load old value into return register
+        ret
+
+__kmp_xchg_real64 ENDP
+_TEXT   ENDS
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_load_x87_fpu_control_word
+;
+; void
+; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
+;
+; parameters:
+;	p:	rcx
+;
+
+PUBLIC  __kmp_load_x87_fpu_control_word
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_load_x87_fpu_control_word PROC ;NEAR
+
+        fldcw   WORD PTR [rcx]
+        ret
+
+__kmp_load_x87_fpu_control_word ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_store_x87_fpu_control_word
+;
+; void
+; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
+;
+; parameters:
+;	p:	rcx
+;
+
+PUBLIC  __kmp_store_x87_fpu_control_word
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_store_x87_fpu_control_word PROC ;NEAR
+
+        fstcw   WORD PTR [rcx]
+        ret
+
+__kmp_store_x87_fpu_control_word ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_clear_x87_fpu_status_word
+;
+; void
+; __kmp_clear_x87_fpu_status_word()
+;
+
+PUBLIC  __kmp_clear_x87_fpu_status_word
+_TEXT   SEGMENT
+        ALIGN 16
+__kmp_clear_x87_fpu_status_word PROC ;NEAR
+
+        fnclex
+        ret
+
+__kmp_clear_x87_fpu_status_word ENDP
+_TEXT   ENDS
+
+
+;------------------------------------------------------------------------
+;
+; FUNCTION __kmp_invoke_microtask
+;
+; typedef void  (*microtask_t)( int *gtid, int *tid, ... );
+;
+; int
+; __kmp_invoke_microtask( microtask_t pkfn,
+;                         int gtid, int tid,
+;                         int argc, void *p_argv[] ) {
+;
+;     (*pkfn) ( &gtid, &tid, argv[0], ... );
+;     return 1;
+; }
+;
+; note:
+;      just before call to pkfn must have rsp 128-byte aligned for compiler
+;
+; parameters:
+;      rcx:   pkfn	16[rbp]
+;      edx:   gtid	24[rbp]
+;      r8d:   tid	32[rbp]
+;      r9d:   argc	40[rbp]
+;      [st]:  p_argv	48[rbp]
+;
+; reg temps:
+;      rax:   used all over the place
+;      rdx:   used all over the place
+;      rcx:   used as argument counter for push parms loop
+;      r10:   used to hold pkfn function pointer argument
+;
+; return:      eax    (always 1/TRUE)
+;
+
+$_pkfn   = 16
+$_gtid   = 24
+$_tid    = 32
+$_argc   = 40
+$_p_argv = 48
+if OMPT_SUPPORT
+$_exit_frame = 56
+endif
+
+PUBLIC  __kmp_invoke_microtask
+_TEXT   SEGMENT
+        ALIGN 16
+
+__kmp_invoke_microtask PROC FRAME ;NEAR
+	mov	QWORD PTR 16[rsp], rdx	; home gtid parameter
+	mov 	QWORD PTR 24[rsp], r8	; home tid parameter
+        push    rbp		; save base pointer
+        .pushreg rbp
+	sub	rsp, 0		; no fixed allocation necessary - end prolog
+
+        lea     rbp, QWORD PTR [rsp]   	; establish the base pointer
+        .setframe rbp, 0
+        .ENDPROLOG
+if OMPT_SUPPORT
+        mov     rax, QWORD PTR $_exit_frame[rbp]
+        mov     QWORD PTR [rax], rbp
+endif
+	mov	r10, rcx	; save pkfn pointer for later
+
+;; ------------------------------------------------------------
+        mov     rax, r9		; rax <= argc
+        cmp     rax, 2
+        jge     SHORT $_kmp_invoke_stack_align
+        mov     rax, 2          ; set 4 homes if less than 2 parms
+$_kmp_invoke_stack_align:
+	lea     rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8
+	mov     rax, rsp        ; Save current SP into rax
+	sub	rax, rdx	; rsp - ((argc+2)*8) -> rax
+				; without align, rsp would be this
+	and     rax, -128       ; Mask off 7 bits (128-byte align)
+	add     rax, rdx        ; add space for push's in a loop below
+	mov     rsp, rax        ; Prepare the stack ptr
+				; Now it will align to 128-byte at the call
+;; ------------------------------------------------------------
+        			; setup pkfn parameter stack
+	mov	rax, r9		; rax <= argc
+	shl	rax, 3		; rax <= argc*8
+	mov	rdx, QWORD PTR $_p_argv[rbp]	; rdx <= p_argv
+	add	rdx, rax	; rdx <= &p_argv[argc]
+	mov	rcx, r9		; rcx <= argc
+	jecxz	SHORT $_kmp_invoke_pass_parms	; nothing to push if argc=0
+	cmp	ecx, 1		; if argc=1 branch ahead
+	je	SHORT $_kmp_invoke_one_parm
+	sub	ecx, 2		; if argc=2 branch ahead, subtract two from
+	je	SHORT $_kmp_invoke_two_parms
+
+$_kmp_invoke_push_parms:	; push last - 5th parms to pkfn on stack
+	sub	rdx, 8		; decrement p_argv pointer to previous parm
+	mov 	r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1]
+	push	r8		; push p_argv[rcx-1] onto stack (reverse order)
+	sub	ecx, 1
+	jecxz	SHORT $_kmp_invoke_two_parms
+	jmp	SHORT $_kmp_invoke_push_parms
+
+$_kmp_invoke_two_parms:
+	sub	rdx, 8		; put 4th parm to pkfn in r9
+	mov	r9, QWORD PTR [rdx] ; r9 <= p_argv[1]
+
+$_kmp_invoke_one_parm:
+        sub	rdx, 8		; put 3rd parm to pkfn in r8
+	mov	r8, QWORD PTR [rdx] ; r8 <= p_argv[0]
+
+$_kmp_invoke_pass_parms:	; put 1st & 2nd parms to pkfn in registers
+	lea	rdx, QWORD PTR $_tid[rbp]  ; rdx <= &tid (2nd parm to pkfn)
+	lea	rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn)
+        sub     rsp, 32         ; add stack space for first four parms
+	mov	rax, r10	; rax <= pkfn
+	call	rax		; call (*pkfn)()
+	mov	rax, 1		; move 1 into return register;
+
+        lea     rsp, QWORD PTR [rbp]	; restore stack pointer
+
+;	add	rsp, 0		; no fixed allocation necessary - start epilog
+        pop     rbp		; restore frame pointer
+        ret
+__kmp_invoke_microtask ENDP
+_TEXT   ENDS
+
+endif
+
+END
diff --git a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c
index 8aa07f0b4c..3aeafae910 100644
--- a/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c
+++ b/contrib/libs/cxxsupp/openmp/z_Windows_NT-586_util.c
@@ -1,163 +1,163 @@
-/* 
- * z_Windows_NT-586_util.c -- platform specific routines. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
- 
-#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 
-/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to 
- * use compare_and_store for these routines 
- */ 
- 
-kmp_int8 
-__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) 
-{ 
-    kmp_int8 old_value, new_value; 
- 
-    old_value = TCR_1( *p ); 
-    new_value = old_value | d; 
- 
-    while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_1( *p ); 
-        new_value = old_value | d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int8 
-__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) 
-{ 
-    kmp_int8 old_value, new_value; 
- 
-    old_value = TCR_1( *p ); 
-    new_value = old_value & d; 
- 
-    while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_1( *p ); 
-        new_value = old_value & d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int32 
-__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) 
-{ 
-    kmp_int32 old_value, new_value; 
- 
-    old_value = TCR_4( *p ); 
-    new_value = old_value | d; 
- 
-    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_4( *p ); 
-        new_value = old_value | d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int32 
-__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) 
-{ 
-    kmp_int32 old_value, new_value; 
- 
-    old_value = TCR_4( *p ); 
-    new_value = old_value & d; 
- 
-    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_4( *p ); 
-        new_value = old_value & d; 
-    } 
-    return old_value; 
-} 
- 
-kmp_int8 
-__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_1( *p ); 
-    new_value = old_value + d; 
-    while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_1( *p ); 
-        new_value = old_value + d; 
-    } 
-    return old_value; 
-} 
- 
-#if KMP_ARCH_X86 
-kmp_int64 
-__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_8( *p ); 
-    new_value = old_value + d; 
-    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_8( *p ); 
-        new_value = old_value + d; 
-    } 
-    return old_value; 
-} 
-#endif /* KMP_ARCH_X86 */ 
- 
-kmp_int64 
-__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_8( *p ); 
-    new_value = old_value | d; 
-    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_8( *p ); 
-        new_value = old_value | d; 
-    } 
- 
-    return old_value; 
-} 
- 
-kmp_int64 
-__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) 
-{ 
-    kmp_int64 old_value, new_value; 
- 
-    old_value = TCR_8( *p ); 
-    new_value = old_value & d; 
-    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) 
-    { 
-        KMP_CPU_PAUSE(); 
-        old_value = TCR_8( *p ); 
-        new_value = old_value & d; 
-    } 
- 
-    return old_value; 
-} 
- 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
+/*
+ * z_Windows_NT-586_util.c -- platform specific routines.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+
+#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
+ * use compare_and_store for these routines
+ */
+
+kmp_int8
+__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d )
+{
+    kmp_int8 old_value, new_value;
+
+    old_value = TCR_1( *p );
+    new_value = old_value | d;
+
+    while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_1( *p );
+        new_value = old_value | d;
+    }
+    return old_value;
+}
+
+kmp_int8
+__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d )
+{
+    kmp_int8 old_value, new_value;
+
+    old_value = TCR_1( *p );
+    new_value = old_value & d;
+
+    while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_1( *p );
+        new_value = old_value & d;
+    }
+    return old_value;
+}
+
+kmp_int32
+__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d )
+{
+    kmp_int32 old_value, new_value;
+
+    old_value = TCR_4( *p );
+    new_value = old_value | d;
+
+    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_4( *p );
+        new_value = old_value | d;
+    }
+    return old_value;
+}
+
+kmp_int32
+__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
+{
+    kmp_int32 old_value, new_value;
+
+    old_value = TCR_4( *p );
+    new_value = old_value & d;
+
+    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_4( *p );
+        new_value = old_value & d;
+    }
+    return old_value;
+}
+
+kmp_int8
+__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_1( *p );
+    new_value = old_value + d;
+    while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_1( *p );
+        new_value = old_value + d;
+    }
+    return old_value;
+}
+
+#if KMP_ARCH_X86
+kmp_int64
+__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_8( *p );
+    new_value = old_value + d;
+    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_8( *p );
+        new_value = old_value + d;
+    }
+    return old_value;
+}
+#endif /* KMP_ARCH_X86 */
+
+kmp_int64
+__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_8( *p );
+    new_value = old_value | d;
+    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_8( *p );
+        new_value = old_value | d;
+    }
+
+    return old_value;
+}
+
+kmp_int64
+__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d )
+{
+    kmp_int64 old_value, new_value;
+
+    old_value = TCR_8( *p );
+    new_value = old_value & d;
+    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+    {
+        KMP_CPU_PAUSE();
+        old_value = TCR_8( *p );
+        new_value = old_value & d;
+    }
+
+    return old_value;
+}
+
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
diff --git a/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c b/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c
index f678ba320e..03a4afe5e1 100644
--- a/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c
+++ b/contrib/libs/cxxsupp/openmp/z_Windows_NT_util.c
@@ -1,1932 +1,1932 @@
-/* 
- * z_Windows_NT_util.c -- platform specific routines. 
- */ 
- 
- 
-//===----------------------------------------------------------------------===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is dual licensed under the MIT and the University of Illinois Open 
-// Source Licenses. See LICENSE.txt for details. 
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#include "kmp.h" 
-#include "kmp_itt.h" 
-#include "kmp_i18n.h" 
-#include "kmp_io.h" 
-#include "kmp_wait_release.h" 
- 
- 
- 
-/* ----------------------------------------------------------------------------------- */ 
-/* ----------------------------------------------------------------------------------- */ 
- 
-/* This code is related to NtQuerySystemInformation() function. This function 
-   is used in the Load balance algorithm for OMP_DYNAMIC=true to find the 
-   number of running threads in the system. */ 
- 
-#include <ntstatus.h> 
-#include <ntsecapi.h>   // UNICODE_STRING 
- 
-enum SYSTEM_INFORMATION_CLASS { 
-    SystemProcessInformation = 5 
-}; // SYSTEM_INFORMATION_CLASS 
- 
-struct CLIENT_ID { 
-    HANDLE UniqueProcess; 
-    HANDLE UniqueThread; 
-}; // struct CLIENT_ID 
- 
-enum THREAD_STATE { 
-    StateInitialized, 
-    StateReady, 
-    StateRunning, 
-    StateStandby, 
-    StateTerminated, 
-    StateWait, 
-    StateTransition, 
-    StateUnknown 
-}; // enum THREAD_STATE 
- 
-struct VM_COUNTERS { 
-    SIZE_T        PeakVirtualSize; 
-    SIZE_T        VirtualSize; 
-    ULONG         PageFaultCount; 
-    SIZE_T        PeakWorkingSetSize; 
-    SIZE_T        WorkingSetSize; 
-    SIZE_T        QuotaPeakPagedPoolUsage; 
-    SIZE_T        QuotaPagedPoolUsage; 
-    SIZE_T        QuotaPeakNonPagedPoolUsage; 
-    SIZE_T        QuotaNonPagedPoolUsage; 
-    SIZE_T        PagefileUsage; 
-    SIZE_T        PeakPagefileUsage; 
-    SIZE_T        PrivatePageCount; 
-}; // struct VM_COUNTERS 
- 
-struct SYSTEM_THREAD { 
-  LARGE_INTEGER   KernelTime; 
-  LARGE_INTEGER   UserTime; 
-  LARGE_INTEGER   CreateTime; 
-  ULONG           WaitTime; 
-  LPVOID          StartAddress; 
-  CLIENT_ID       ClientId; 
-  DWORD           Priority; 
-  LONG            BasePriority; 
-  ULONG           ContextSwitchCount; 
-  THREAD_STATE    State; 
-  ULONG           WaitReason; 
-}; // SYSTEM_THREAD 
- 
-KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 ); 
-#if KMP_ARCH_X86 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State        ) == 52 ); 
-#else 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State        ) == 68 ); 
-#endif 
- 
-struct SYSTEM_PROCESS_INFORMATION { 
-  ULONG           NextEntryOffset; 
-  ULONG           NumberOfThreads; 
-  LARGE_INTEGER   Reserved[ 3 ]; 
-  LARGE_INTEGER   CreateTime; 
-  LARGE_INTEGER   UserTime; 
-  LARGE_INTEGER   KernelTime; 
-  UNICODE_STRING  ImageName; 
-  DWORD           BasePriority; 
-  HANDLE          ProcessId; 
-  HANDLE          ParentProcessId; 
-  ULONG           HandleCount; 
-  ULONG           Reserved2[ 2 ]; 
-  VM_COUNTERS     VMCounters; 
-  IO_COUNTERS     IOCounters; 
-  SYSTEM_THREAD   Threads[ 1 ]; 
-}; // SYSTEM_PROCESS_INFORMATION 
-typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION; 
- 
-KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) ==  0 ); 
-KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime      ) == 32 ); 
-KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName       ) == 56 ); 
-#if KMP_ARCH_X86 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId       ) ==  68 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount     ) ==  76 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters      ) ==  88 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters      ) == 136 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads         ) == 184 ); 
-#else 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId       ) ==  80 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount     ) ==  96 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters      ) == 112 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters      ) == 208 ); 
-    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads         ) == 256 ); 
-#endif 
- 
-typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG ); 
-NtQuerySystemInformation_t NtQuerySystemInformation = NULL; 
- 
-HMODULE ntdll = NULL; 
- 
-/* End of NtQuerySystemInformation()-related code */ 
- 
-#if KMP_GROUP_AFFINITY 
-static HMODULE kernel32 = NULL; 
-#endif /* KMP_GROUP_AFFINITY */ 
- 
-/* ----------------------------------------------------------------------------------- */ 
-/* ----------------------------------------------------------------------------------- */ 
- 
-#if KMP_HANDLE_SIGNALS 
-    typedef void    (* sig_func_t )( int ); 
-    static sig_func_t  __kmp_sighldrs[ NSIG ]; 
-    static int         __kmp_siginstalled[ NSIG ]; 
-#endif 
- 
-static HANDLE   __kmp_monitor_ev; 
-static kmp_int64 __kmp_win32_time; 
-double __kmp_win32_tick; 
- 
-int __kmp_init_runtime = FALSE; 
-CRITICAL_SECTION __kmp_win32_section; 
- 
-void 
-__kmp_win32_mutex_init( kmp_win32_mutex_t *mx ) 
-{ 
-    InitializeCriticalSection( & mx->cs ); 
-#if USE_ITT_BUILD 
-    __kmp_itt_system_object_created( & mx->cs, "Critical Section" ); 
-#endif /* USE_ITT_BUILD */ 
-} 
- 
-void 
-__kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx ) 
-{ 
-    DeleteCriticalSection( & mx->cs ); 
-} 
- 
-void 
-__kmp_win32_mutex_lock( kmp_win32_mutex_t *mx ) 
-{ 
-    EnterCriticalSection( & mx->cs ); 
-} 
- 
-void 
-__kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx ) 
-{ 
-    LeaveCriticalSection( & mx->cs ); 
-} 
- 
-void 
-__kmp_win32_cond_init( kmp_win32_cond_t *cv ) 
-{ 
-    cv->waiters_count_         = 0; 
-    cv->wait_generation_count_ = 0; 
-    cv->release_count_         = 0; 
- 
-    /* Initialize the critical section */ 
-    __kmp_win32_mutex_init( & cv->waiters_count_lock_ ); 
- 
-    /* Create a manual-reset event. */ 
-    cv->event_ = CreateEvent( NULL,     // no security 
-                              TRUE,     // manual-reset 
-                              FALSE,    // non-signaled initially 
-                              NULL );   // unnamed 
-#if USE_ITT_BUILD 
-    __kmp_itt_system_object_created( cv->event_, "Event" ); 
-#endif /* USE_ITT_BUILD */ 
-} 
- 
-void 
-__kmp_win32_cond_destroy( kmp_win32_cond_t *cv ) 
-{ 
-    __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ ); 
-    __kmp_free_handle( cv->event_ ); 
-    memset( cv, '\0', sizeof( *cv ) ); 
-} 
- 
-/* TODO associate cv with a team instead of a thread so as to optimize 
- * the case where we wake up a whole team */ 
- 
-void 
-__kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load ) 
-{ 
-    int my_generation; 
-    int last_waiter; 
- 
-    /* Avoid race conditions */ 
-    __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); 
- 
-    /* Increment count of waiters */ 
-    cv->waiters_count_++; 
- 
-    /* Store current generation in our activation record. */ 
-    my_generation = cv->wait_generation_count_; 
- 
-    __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); 
-    __kmp_win32_mutex_unlock( mx ); 
- 
- 
-    for (;;) { 
-        int wait_done; 
- 
-        /* Wait until the event is signaled */ 
-        WaitForSingleObject( cv->event_, INFINITE ); 
- 
-        __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); 
- 
-        /* Exit the loop when the <cv->event_> is signaled and 
-         * there are still waiting threads from this <wait_generation> 
-         * that haven't been released from this wait yet.              */ 
-        wait_done = ( cv->release_count_ > 0 ) && 
-                    ( cv->wait_generation_count_ != my_generation ); 
- 
-        __kmp_win32_mutex_unlock( &cv->waiters_count_lock_); 
- 
-        /* there used to be a semicolon after the if statement, 
-         * it looked like a bug, so i removed it */ 
-        if( wait_done ) 
-            break; 
-    } 
- 
-    __kmp_win32_mutex_lock( mx ); 
-    __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); 
- 
-    cv->waiters_count_--; 
-    cv->release_count_--; 
- 
-    last_waiter =  ( cv->release_count_ == 0 ); 
- 
-    __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); 
- 
-    if( last_waiter ) { 
-        /* We're the last waiter to be notified, so reset the manual event. */ 
-        ResetEvent( cv->event_ ); 
-    } 
-} 
- 
-void 
-__kmp_win32_cond_broadcast( kmp_win32_cond_t *cv ) 
-{ 
-    __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); 
- 
-    if( cv->waiters_count_ > 0 ) { 
-        SetEvent( cv->event_ ); 
-        /* Release all the threads in this generation. */ 
- 
-        cv->release_count_ = cv->waiters_count_; 
- 
-        /* Start a new generation. */ 
-        cv->wait_generation_count_++; 
-    } 
- 
-    __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); 
-} 
- 
-void 
-__kmp_win32_cond_signal( kmp_win32_cond_t *cv ) 
-{ 
-    __kmp_win32_cond_broadcast( cv ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_enable( int new_state ) 
-{ 
-    if (__kmp_init_runtime) 
-        LeaveCriticalSection( & __kmp_win32_section ); 
-} 
- 
-void 
-__kmp_disable( int *old_state ) 
-{ 
-    *old_state = 0; 
- 
-    if (__kmp_init_runtime) 
-        EnterCriticalSection( & __kmp_win32_section ); 
-} 
- 
-void 
-__kmp_suspend_initialize( void ) 
-{ 
-    /* do nothing */ 
-} 
- 
-static void 
-__kmp_suspend_initialize_thread( kmp_info_t *th ) 
-{ 
-    if ( ! TCR_4( th->th.th_suspend_init ) ) { 
-      /* this means we haven't initialized the suspension pthread objects for this thread 
-         in this instance of the process */ 
-        __kmp_win32_cond_init(  &th->th.th_suspend_cv ); 
-        __kmp_win32_mutex_init( &th->th.th_suspend_mx ); 
-        TCW_4( th->th.th_suspend_init, TRUE ); 
-    } 
-} 
- 
-void 
-__kmp_suspend_uninitialize_thread( kmp_info_t *th ) 
-{ 
-    if ( TCR_4( th->th.th_suspend_init ) ) { 
-      /* this means we have initialize the suspension pthread objects for this thread 
-         in this instance of the process */ 
-      __kmp_win32_cond_destroy( & th->th.th_suspend_cv ); 
-      __kmp_win32_mutex_destroy( & th->th.th_suspend_mx ); 
-      TCW_4( th->th.th_suspend_init, FALSE ); 
-    } 
-} 
- 
-/* This routine puts the calling thread to sleep after setting the 
- * sleep bit for the indicated flag variable to true. 
- */ 
-template <class C> 
-static inline void __kmp_suspend_template( int th_gtid, C *flag ) 
-{ 
-    kmp_info_t *th = __kmp_threads[th_gtid]; 
-    int status; 
-    typename C::flag_t old_spin; 
- 
-    KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) ); 
- 
-    __kmp_suspend_initialize_thread( th ); 
-    __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); 
- 
-    KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n", 
-                    th_gtid, flag->get() ) ); 
- 
-    /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread 
-       gets called first? 
-    */ 
-    old_spin = flag->set_sleeping(); 
- 
-    KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n", 
-                   th_gtid, flag->get(), *(flag->get()) ) ); 
- 
-    if ( flag->done_check_val(old_spin) ) { 
-        old_spin = flag->unset_sleeping(); 
-        KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n", 
-                       th_gtid, flag->get()) ); 
-    } else { 
-#ifdef DEBUG_SUSPEND 
-        __kmp_suspend_count++; 
-#endif 
-        /* Encapsulate in a loop as the documentation states that this may 
-         * "with low probability" return when the condition variable has 
-         * not been signaled or broadcast 
-         */ 
-        int deactivated = FALSE; 
-        TCW_PTR(th->th.th_sleep_loc, (void *)flag); 
-        while ( flag->is_sleeping() ) { 
-            KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n", 
-                     th_gtid ) ); 
-            // Mark the thread as no longer active (only in the first iteration of the loop). 
-            if ( ! deactivated ) { 
-                th->th.th_active = FALSE; 
-                if ( th->th.th_active_in_pool ) { 
-                    th->th.th_active_in_pool = FALSE; 
-                    KMP_TEST_THEN_DEC32( 
-                      (kmp_int32 *) &__kmp_thread_pool_active_nth ); 
-                    KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); 
-                } 
-                deactivated = TRUE; 
- 
- 
-                __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); 
-            } 
-            else { 
-                __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); 
-            } 
- 
-#ifdef KMP_DEBUG 
-            if( flag->is_sleeping() ) { 
-                KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid )); 
-            } 
-#endif /* KMP_DEBUG */ 
- 
-        } // while 
- 
-        // Mark the thread as active again (if it was previous marked as inactive) 
-        if ( deactivated ) { 
-            th->th.th_active = TRUE; 
-            if ( TCR_4(th->th.th_in_pool) ) { 
-                KMP_TEST_THEN_INC32( 
-                  (kmp_int32 *) &__kmp_thread_pool_active_nth ); 
-                th->th.th_active_in_pool = TRUE; 
-            } 
-        } 
-    } 
- 
- 
-    __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); 
- 
-    KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); 
-} 
- 
-void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { 
-    __kmp_suspend_template(th_gtid, flag); 
-} 
-void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { 
-    __kmp_suspend_template(th_gtid, flag); 
-} 
-void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { 
-    __kmp_suspend_template(th_gtid, flag); 
-} 
- 
- 
-/* This routine signals the thread specified by target_gtid to wake up 
- * after setting the sleep bit indicated by the flag argument to FALSE 
- */ 
-template <class C> 
-static inline void __kmp_resume_template( int target_gtid, C *flag ) 
-{ 
-    kmp_info_t *th = __kmp_threads[target_gtid]; 
-    int status; 
- 
-#ifdef KMP_DEBUG 
-    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; 
-#endif 
- 
-    KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); 
- 
-    __kmp_suspend_initialize_thread( th ); 
-    __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); 
- 
-    if (!flag) { // coming from __kmp_null_resume_wrapper 
-        flag = (C *)th->th.th_sleep_loc; 
-    } 
- 
-    // First, check if the flag is null or its type has changed. If so, someone else woke it up. 
-    if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to 
-        KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n", 
-                       gtid, target_gtid, NULL ) ); 
-        __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); 
-        return; 
-    } 
-    else { 
-        typename C::flag_t old_spin = flag->unset_sleeping(); 
-        if ( !flag->is_sleeping_val(old_spin) ) { 
-            KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): " 
-                           "%u => %u\n", 
-                           gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) ); 
-            __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); 
-            return; 
-        } 
-    } 
-    TCW_PTR(th->th.th_sleep_loc, NULL); 
- 
-    KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n", 
-                   gtid, target_gtid, flag->get() ) ); 
- 
- 
-    __kmp_win32_cond_signal(  &th->th.th_suspend_cv ); 
-    __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); 
- 
-    KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", 
-                    gtid, target_gtid ) ); 
-} 
- 
-void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { 
-    __kmp_resume_template(target_gtid, flag); 
-} 
-void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { 
-    __kmp_resume_template(target_gtid, flag); 
-} 
-void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { 
-    __kmp_resume_template(target_gtid, flag); 
-} 
- 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_yield( int cond ) 
-{ 
-    if (cond) 
-        Sleep(0); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_gtid_set_specific( int gtid ) 
-{ 
-    KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n", 
-                gtid, __kmp_gtid_threadprivate_key )); 
-    KMP_ASSERT( __kmp_init_runtime ); 
-    if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) ) 
-        KMP_FATAL( TLSSetValueFailed ); 
-} 
- 
-int 
-__kmp_gtid_get_specific() 
-{ 
-    int gtid; 
-    if( !__kmp_init_runtime ) { 
-        KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); 
-        return KMP_GTID_SHUTDOWN; 
-    } 
-    gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ); 
-    if ( gtid == 0 ) { 
-        gtid = KMP_GTID_DNE; 
-    } 
-    else { 
-        gtid--; 
-    } 
-    KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", 
-                __kmp_gtid_threadprivate_key, gtid )); 
-    return gtid; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_GROUP_AFFINITY 
- 
-// 
-// Only 1 DWORD in the mask should have any procs set. 
-// Return the appropriate index, or -1 for an invalid mask. 
-// 
-int 
-__kmp_get_proc_group( kmp_affin_mask_t const *mask ) 
-{ 
-    int i; 
-    int group = -1; 
-    for (i = 0; i < __kmp_num_proc_groups; i++) { 
-        if (mask[i] == 0) { 
-            continue; 
-        } 
-        if (group >= 0) { 
-            return -1; 
-        } 
-        group = i; 
-    } 
-    return group; 
-} 
- 
-#endif /* KMP_GROUP_AFFINITY */ 
- 
-int 
-__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) 
-{ 
- 
-#if KMP_GROUP_AFFINITY 
- 
-    if (__kmp_num_proc_groups > 1) { 
-        // 
-        // Check for a valid mask. 
-        // 
-        GROUP_AFFINITY ga; 
-        int group = __kmp_get_proc_group( mask ); 
-        if (group < 0) { 
-            if (abort_on_error) { 
-                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 
-            } 
-            return -1; 
-        } 
- 
-        // 
-        // Transform the bit vector into a GROUP_AFFINITY struct 
-        // and make the system call to set affinity. 
-        // 
-        ga.Group = group; 
-        ga.Mask = mask[group]; 
-        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 
- 
-        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 
-        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 
-            DWORD error = GetLastError(); 
-            if (abort_on_error) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantSetThreadAffMask ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            return error; 
-        } 
-    } 
-    else 
- 
-#endif /* KMP_GROUP_AFFINITY */ 
- 
-    { 
-        if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { 
-            DWORD error = GetLastError(); 
-            if (abort_on_error) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG( CantSetThreadAffMask ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            return error; 
-        } 
-    } 
-    return 0; 
-} 
- 
-int 
-__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) 
-{ 
- 
-#if KMP_GROUP_AFFINITY 
- 
-    if (__kmp_num_proc_groups > 1) { 
-        KMP_CPU_ZERO(mask); 
-        GROUP_AFFINITY ga; 
-        KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 
- 
-        if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 
-            DWORD error = GetLastError(); 
-            if (abort_on_error) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 
-                    KMP_ERR(error), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            return error; 
-        } 
- 
-        if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) 
-          || (ga.Mask == 0)) { 
-            return -1; 
-        } 
- 
-        mask[ga.Group] = ga.Mask; 
-    } 
-    else 
- 
-#endif /* KMP_GROUP_AFFINITY */ 
- 
-    { 
-        kmp_affin_mask_t newMask, sysMask, retval; 
- 
-        if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 
-            DWORD error = GetLastError(); 
-            if (abort_on_error) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 
-                    KMP_ERR(error), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            return error; 
-        } 
-        retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 
-        if (! retval) { 
-            DWORD error = GetLastError(); 
-            if (abort_on_error) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 
-                    KMP_ERR(error), 
-                    __kmp_msg_null 
-                ); 
-            } 
-            return error; 
-        } 
-        newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 
-        if (! newMask) { 
-            DWORD error = GetLastError(); 
-            if (abort_on_error) { 
-                __kmp_msg( 
-                    kmp_ms_fatal, 
-                    KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 
-                    KMP_ERR(error), 
-                    __kmp_msg_null 
-                ); 
-            } 
-        } 
-        *mask = retval; 
-    } 
-    return 0; 
-} 
- 
-void 
-__kmp_affinity_bind_thread( int proc ) 
-{ 
- 
-#if KMP_GROUP_AFFINITY 
- 
-    if (__kmp_num_proc_groups > 1) { 
-        // 
-        // Form the GROUP_AFFINITY struct directly, rather than filling 
-        // out a bit vector and calling __kmp_set_system_affinity(). 
-        // 
-        GROUP_AFFINITY ga; 
-        KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups 
-           * CHAR_BIT * sizeof(DWORD_PTR)))); 
-        ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); 
-        ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); 
-        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 
- 
-        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 
-        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 
-            DWORD error = GetLastError(); 
-            if (__kmp_affinity_verbose) { // AC: continue silently if not verbose 
-                __kmp_msg( 
-                    kmp_ms_warning, 
-                    KMP_MSG( CantSetThreadAffMask ), 
-                    KMP_ERR( error ), 
-                    __kmp_msg_null 
-                ); 
-            } 
-        } 
-    } 
-    else 
- 
-#endif /* KMP_GROUP_AFFINITY */ 
- 
-    { 
-        kmp_affin_mask_t mask; 
-        KMP_CPU_ZERO(&mask); 
-        KMP_CPU_SET(proc, &mask); 
-        __kmp_set_system_affinity(&mask, TRUE); 
-    } 
-} 
- 
-void 
-__kmp_affinity_determine_capable( const char *env_var ) 
-{ 
-    // 
-    // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask(). 
-    // 
- 
-#if KMP_GROUP_AFFINITY 
-    KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(kmp_affin_mask_t)); 
-#else 
-    KMP_AFFINITY_ENABLE(sizeof(kmp_affin_mask_t)); 
-#endif 
- 
-    KA_TRACE( 10, ( 
-        "__kmp_affinity_determine_capable: " 
-            "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n", 
-        __kmp_affin_mask_size 
-    ) ); 
-} 
- 
-double 
-__kmp_read_cpu_time( void ) 
-{ 
-    FILETIME    CreationTime, ExitTime, KernelTime, UserTime; 
-    int         status; 
-    double      cpu_time; 
- 
-    cpu_time = 0; 
- 
-    status = GetProcessTimes( GetCurrentProcess(), &CreationTime, 
-                              &ExitTime, &KernelTime, &UserTime ); 
- 
-    if (status) { 
-        double  sec = 0; 
- 
-        sec += KernelTime.dwHighDateTime; 
-        sec += UserTime.dwHighDateTime; 
- 
-        /* Shift left by 32 bits */ 
-        sec *= (double) (1 << 16) * (double) (1 << 16); 
- 
-        sec += KernelTime.dwLowDateTime; 
-        sec += UserTime.dwLowDateTime; 
- 
-        cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; 
-    } 
- 
-    return cpu_time; 
-} 
- 
-int 
-__kmp_read_system_info( struct kmp_sys_info *info ) 
-{ 
-    info->maxrss  = 0;                   /* the maximum resident set size utilized (in kilobytes)     */ 
-    info->minflt  = 0;                   /* the number of page faults serviced without any I/O        */ 
-    info->majflt  = 0;                   /* the number of page faults serviced that required I/O      */ 
-    info->nswap   = 0;                   /* the number of times a process was "swapped" out of memory */ 
-    info->inblock = 0;                   /* the number of times the file system had to perform input  */ 
-    info->oublock = 0;                   /* the number of times the file system had to perform output */ 
-    info->nvcsw   = 0;                   /* the number of times a context switch was voluntarily      */ 
-    info->nivcsw  = 0;                   /* the number of times a context switch was forced           */ 
- 
-    return 1; 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
- 
-void 
-__kmp_runtime_initialize( void ) 
-{ 
-    SYSTEM_INFO info; 
-    kmp_str_buf_t path; 
-    UINT path_size; 
- 
-    if ( __kmp_init_runtime ) { 
-        return; 
-    }; 
- 
-#if KMP_DYNAMIC_LIB 
-    /* Pin dynamic library for the lifetime of application */ 
-    { 
-        // First, turn off error message boxes 
-        UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS); 
-        HMODULE h; 
-        BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS 
-                                     |GET_MODULE_HANDLE_EX_FLAG_PIN, 
-                                     (LPCTSTR)&__kmp_serial_initialize, &h); 
-        KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded"); 
-        SetErrorMode (err_mode);   // Restore error mode 
-        KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") ); 
-    } 
-#endif 
- 
-    InitializeCriticalSection( & __kmp_win32_section ); 
-#if USE_ITT_BUILD 
-    __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" ); 
-#endif /* USE_ITT_BUILD */ 
-    __kmp_initialize_system_tick(); 
- 
-    #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 
-        if ( ! __kmp_cpuinfo.initialized ) { 
-            __kmp_query_cpuid( & __kmp_cpuinfo ); 
-        }; // if 
-    #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-    /* Set up minimum number of threads to switch to TLS gtid */ 
-    #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB 
-        // Windows* OS, static library. 
-        /* 
-            New thread may use stack space previously used by another thread, currently terminated. 
-            On Windows* OS, in case of static linking, we do not know the moment of thread termination, 
-            and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead 
-            threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly 
-            finds gtid (by searching through stack addresses of all known threads) for unregistered 
-            foreign tread. 
- 
-            Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id() 
-            does not search through stacks, but get gtid from TLS immediately. 
- 
-            --ln 
-        */ 
-        __kmp_tls_gtid_min = 0; 
-    #else 
-        __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; 
-    #endif 
- 
-    /* for the static library */ 
-    if ( !__kmp_gtid_threadprivate_key ) { 
-        __kmp_gtid_threadprivate_key = TlsAlloc(); 
-        if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) { 
-            KMP_FATAL( TLSOutOfIndexes ); 
-        } 
-    } 
- 
- 
-    // 
-    // Load ntdll.dll. 
-    // 
-    /* 
-        Simple 
-            GetModuleHandle( "ntdll.dl" ) 
-        is not suitable due to security issue (see 
-        http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full 
-        path to the library. 
-    */ 
-    __kmp_str_buf_init( & path ); 
-    path_size = GetSystemDirectory( path.str, path.size ); 
-    KMP_DEBUG_ASSERT( path_size > 0 ); 
-    if ( path_size >= path.size ) { 
-        // 
-        // Buffer is too short.  Expand the buffer and try again. 
-        // 
-        __kmp_str_buf_reserve( & path, path_size ); 
-        path_size = GetSystemDirectory( path.str, path.size ); 
-        KMP_DEBUG_ASSERT( path_size > 0 ); 
-    }; // if 
-    if ( path_size > 0 && path_size < path.size ) { 
-        // 
-        // Now we have system directory name in the buffer. 
-        // Append backslash and name of dll to form full path, 
-        // 
-        path.used = path_size; 
-        __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" ); 
- 
-        // 
-        // Now load ntdll using full path. 
-        // 
-        ntdll = GetModuleHandle( path.str ); 
-    } 
- 
-    KMP_DEBUG_ASSERT( ntdll != NULL ); 
-    if ( ntdll != NULL ) { 
-        NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" ); 
-    } 
-    KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL ); 
- 
-#if KMP_GROUP_AFFINITY 
-    // 
-    // Load kernel32.dll. 
-    // Same caveat - must use full system path name. 
-    // 
-    if ( path_size > 0 && path_size < path.size ) { 
-        // 
-        // Truncate the buffer back to just the system path length, 
-        // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". 
-        // 
-        path.used = path_size; 
-        __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" ); 
- 
-        // 
-        // Load kernel32.dll using full path. 
-        // 
-        kernel32 = GetModuleHandle( path.str ); 
-        KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) ); 
- 
-        // 
-        // Load the function pointers to kernel32.dll routines 
-        // that may or may not exist on this system. 
-        // 
-        if ( kernel32 != NULL ) { 
-            __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" ); 
-            __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" ); 
-            __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" ); 
-            __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" ); 
- 
-            KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) ); 
-            KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) ); 
-            KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) ); 
-            KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) ); 
-            KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) ); 
- 
-            // 
-            // See if group affinity is supported on this system. 
-            // If so, calculate the #groups and #procs. 
-            // 
-            // Group affinity was introduced with Windows* 7 OS and 
-            // Windows* Server 2008 R2 OS. 
-            // 
-            if ( ( __kmp_GetActiveProcessorCount != NULL ) 
-              && ( __kmp_GetActiveProcessorGroupCount != NULL ) 
-              && ( __kmp_GetThreadGroupAffinity != NULL ) 
-              && ( __kmp_SetThreadGroupAffinity != NULL ) 
-              && ( ( __kmp_num_proc_groups 
-              = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) { 
-                // 
-                // Calculate the total number of active OS procs. 
-                // 
-                int i; 
- 
-                KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); 
- 
-                __kmp_xproc = 0; 
- 
-                for ( i = 0; i < __kmp_num_proc_groups; i++ ) { 
-                    DWORD size = __kmp_GetActiveProcessorCount( i ); 
-                    __kmp_xproc += size; 
-                    KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) ); 
-                } 
-                } 
-            else { 
-                KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); 
-            } 
-        } 
-    } 
-    if ( __kmp_num_proc_groups <= 1 ) { 
-        GetSystemInfo( & info ); 
-        __kmp_xproc = info.dwNumberOfProcessors; 
-    } 
-#else 
-    GetSystemInfo( & info ); 
-    __kmp_xproc = info.dwNumberOfProcessors; 
-#endif /* KMP_GROUP_AFFINITY */ 
- 
-    // 
-    // If the OS said there were 0 procs, take a guess and use a value of 2. 
-    // This is done for Linux* OS, also.  Do we need error / warning? 
-    // 
-    if ( __kmp_xproc <= 0 ) { 
-        __kmp_xproc = 2; 
-    } 
- 
-    KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) ); 
- 
-    __kmp_str_buf_free( & path ); 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_initialize(); 
-#endif /* USE_ITT_BUILD */ 
- 
-    __kmp_init_runtime = TRUE; 
-} // __kmp_runtime_initialize 
- 
-void 
-__kmp_runtime_destroy( void ) 
-{ 
-    if ( ! __kmp_init_runtime ) { 
-        return; 
-    } 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_destroy(); 
-#endif /* USE_ITT_BUILD */ 
- 
-    /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ 
-    /* due to the KX_TRACE() commands */ 
-    KA_TRACE( 40, ("__kmp_runtime_destroy\n" )); 
- 
-    if( __kmp_gtid_threadprivate_key ) { 
-        TlsFree( __kmp_gtid_threadprivate_key ); 
-        __kmp_gtid_threadprivate_key = 0; 
-    } 
- 
-    __kmp_affinity_uninitialize(); 
-    DeleteCriticalSection( & __kmp_win32_section ); 
- 
-    ntdll = NULL; 
-    NtQuerySystemInformation = NULL; 
- 
-#if KMP_ARCH_X86_64 
-    kernel32 = NULL; 
-    __kmp_GetActiveProcessorCount = NULL; 
-    __kmp_GetActiveProcessorGroupCount = NULL; 
-    __kmp_GetThreadGroupAffinity = NULL; 
-    __kmp_SetThreadGroupAffinity = NULL; 
-#endif // KMP_ARCH_X86_64 
- 
-    __kmp_init_runtime = FALSE; 
-} 
- 
- 
-void 
-__kmp_terminate_thread( int gtid ) 
-{ 
-    kmp_info_t  *th = __kmp_threads[ gtid ]; 
- 
-    if( !th ) return; 
- 
-    KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); 
- 
-    if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) { 
-        /* It's OK, the thread may have exited already */ 
-    } 
-    __kmp_free_handle( th->th.th_info.ds.ds_thread ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void 
-__kmp_clear_system_time( void ) 
-{ 
-    BOOL status; 
-    LARGE_INTEGER time; 
-    status = QueryPerformanceCounter( & time ); 
-    __kmp_win32_time = (kmp_int64) time.QuadPart; 
-} 
- 
-void 
-__kmp_initialize_system_tick( void ) 
-{ 
-    { 
-  BOOL status; 
-  LARGE_INTEGER freq; 
- 
-  status = QueryPerformanceFrequency( & freq ); 
-  if (! status) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
- 
-  } 
-  else { 
-      __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart; 
-  } 
-    } 
-} 
- 
-/* Calculate the elapsed wall clock time for the user */ 
- 
-void 
-__kmp_elapsed( double *t ) 
-{ 
-    BOOL status; 
-    LARGE_INTEGER now; 
-    status = QueryPerformanceCounter( & now ); 
-    *t = ((double) now.QuadPart) * __kmp_win32_tick; 
-} 
- 
-/* Calculate the elapsed wall clock tick for the user */ 
- 
-void 
-__kmp_elapsed_tick( double *t ) 
-{ 
-    *t = __kmp_win32_tick; 
-} 
- 
-void 
-__kmp_read_system_time( double *delta ) 
-{ 
- 
-    if (delta != NULL) { 
-        BOOL status; 
-        LARGE_INTEGER now; 
- 
-        status = QueryPerformanceCounter( & now ); 
- 
-        *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time)) 
-    * __kmp_win32_tick; 
-    } 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-void * __stdcall 
-__kmp_launch_worker( void *arg ) 
-{ 
-    volatile void *stack_data; 
-    void *exit_val; 
-    void *padding = 0; 
-    kmp_info_t *this_thr = (kmp_info_t *) arg; 
-    int gtid; 
- 
-    gtid = this_thr->th.th_info.ds.ds_gtid; 
-    __kmp_gtid_set_specific( gtid ); 
-#ifdef KMP_TDATA_GTID 
-    #error "This define causes problems with LoadLibrary() + declspec(thread) " \ 
-        "on Windows* OS.  See CQ50564, tests kmp_load_library*.c and this MSDN " \ 
-        "reference: http://support.microsoft.com/kb/118816" 
-    //__kmp_gtid = gtid; 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_thread_name( gtid ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    __kmp_affinity_set_init_mask( gtid, FALSE ); 
- 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 
-    // 
-    // Set the FP control regs to be a copy of 
-    // the parallel initialization thread's. 
-    // 
-    __kmp_clear_x87_fpu_status_word(); 
-    __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); 
-    __kmp_load_mxcsr( &__kmp_init_mxcsr ); 
-#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 
- 
-    if ( __kmp_stkoffset > 0 && gtid > 0 ) { 
-        padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); 
-    } 
- 
-    KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); 
-    this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); 
-    TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); 
- 
-    if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid 
-        TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); 
-        KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE ); 
-        __kmp_check_stack_overlap( this_thr ); 
-    } 
-    KMP_MB(); 
-    exit_val = __kmp_launch_thread( this_thr ); 
-    KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); 
-    TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); 
-    KMP_MB(); 
-    return exit_val; 
-} 
- 
- 
-/* The monitor thread controls all of the threads in the complex */ 
- 
-void * __stdcall 
-__kmp_launch_monitor( void *arg ) 
-{ 
-    DWORD        wait_status; 
-    kmp_thread_t monitor; 
-    int          status; 
-    int          interval; 
-    kmp_info_t *this_thr = (kmp_info_t *) arg; 
- 
-    KMP_DEBUG_ASSERT(__kmp_init_monitor); 
-    TCW_4( __kmp_init_monitor, 2 );    // AC: Signal the library that monitor has started 
-                                       // TODO: hide "2" in enum (like {true,false,started}) 
-    this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); 
-    TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) ); 
- 
-    monitor = GetCurrentThread(); 
- 
-    /* set thread priority */ 
-    status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST ); 
-    if (! status) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( CantSetThreadPriority ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
- 
-    /* register us as monitor */ 
-    __kmp_gtid_set_specific( KMP_GTID_MONITOR ); 
-#ifdef KMP_TDATA_GTID 
-    #error "This define causes problems with LoadLibrary() + declspec(thread) " \ 
-        "on Windows* OS.  See CQ50564, tests kmp_load_library*.c and this MSDN " \ 
-        "reference: http://support.microsoft.com/kb/118816" 
-    //__kmp_gtid = KMP_GTID_MONITOR; 
-#endif 
- 
-#if USE_ITT_BUILD 
-    __kmp_itt_thread_ignore();    // Instruct Intel(R) Threading Tools to ignore monitor thread. 
-#endif /* USE_ITT_BUILD */ 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */ 
- 
-    while (! TCR_4(__kmp_global.g.g_done)) { 
-        /*  This thread monitors the state of the system */ 
- 
-        KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); 
- 
-        wait_status = WaitForSingleObject( __kmp_monitor_ev, interval ); 
- 
-        if (wait_status == WAIT_TIMEOUT) { 
-            TCW_4( __kmp_global.g.g_time.dt.t_value, 
-              TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); 
-        } 
- 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) ); 
- 
-    status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL ); 
-    if (! status) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( CantSetThreadPriority ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
- 
-    if (__kmp_global.g.g_abort != 0) { 
-        /* now we need to terminate the worker threads   */ 
-        /* the value of t_abort is the signal we caught */ 
- 
-        int gtid; 
- 
-        KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) ); 
- 
-        /* terminate the OpenMP worker threads */ 
-        /* TODO this is not valid for sibling threads!! 
-         * the uber master might not be 0 anymore.. */ 
-        for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) 
-            __kmp_terminate_thread( gtid ); 
- 
-        __kmp_cleanup(); 
- 
-        Sleep( 0 ); 
- 
-        KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) ); 
- 
-        if (__kmp_global.g.g_abort > 0) { 
-            raise( __kmp_global.g.g_abort ); 
-        } 
-    } 
- 
-    TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); 
- 
-    KMP_MB(); 
-    return arg; 
-} 
- 
-void 
-__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) 
-{ 
-    kmp_thread_t   handle; 
-    DWORD          idThread; 
- 
-    KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); 
- 
-    th->th.th_info.ds.ds_gtid = gtid; 
- 
-    if ( KMP_UBER_GTID(gtid) ) { 
-        int     stack_data; 
- 
-        /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use. 
-           Is it appropriate to just use GetCurrentThread?  When should we close this handle?  When 
-           unregistering the root? 
-        */ 
-        { 
-            BOOL rc; 
-            rc = DuplicateHandle( 
-                                 GetCurrentProcess(), 
-                                 GetCurrentThread(), 
-                                 GetCurrentProcess(), 
-                                 &th->th.th_info.ds.ds_thread, 
-                                 0, 
-                                 FALSE, 
-                                 DUPLICATE_SAME_ACCESS 
-                                 ); 
-            KMP_ASSERT( rc ); 
-            KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", 
-                           (LPVOID)th, 
-                           th->th.th_info.ds.ds_thread ) ); 
-            th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); 
-        } 
-        if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid 
-            /* we will dynamically update the stack range if gtid_mode == 1 */ 
-            TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); 
-            TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); 
-            TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); 
-            __kmp_check_stack_overlap( th ); 
-        } 
-    } 
-    else { 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-        /* Set stack size for this thread now. */ 
-        KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC 
-                        " bytes\n", stack_size ) ); 
- 
-        stack_size += gtid * __kmp_stkoffset; 
- 
-        TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); 
-        TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); 
- 
-        KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %" 
-                        KMP_SIZE_T_SPEC 
-                        " bytes, &__kmp_launch_worker = %p, th = %p, " 
-                        "&idThread = %p\n", 
-                        (SIZE_T) stack_size, 
-                        (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, 
-                        (LPVOID) th, &idThread ) ); 
- 
-            { 
-                handle = CreateThread( NULL, (SIZE_T) stack_size, 
-                                       (LPTHREAD_START_ROUTINE) __kmp_launch_worker, 
-                                       (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); 
-            } 
- 
-        KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %" 
-                        KMP_SIZE_T_SPEC 
-                        " bytes, &__kmp_launch_worker = %p, th = %p, " 
-                        "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n", 
-                        (SIZE_T) stack_size, 
-                        (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, 
-                        (LPVOID) th, idThread, handle ) ); 
- 
-            { 
-                if ( handle == 0 ) { 
-                    DWORD error = GetLastError(); 
-                    __kmp_msg( 
-                              kmp_ms_fatal, 
-                              KMP_MSG( CantCreateThread ), 
-                              KMP_ERR( error ), 
-                              __kmp_msg_null 
-                              ); 
-                } else { 
-                    th->th.th_info.ds.ds_thread = handle; 
-                } 
-            } 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); 
-} 
- 
-int 
-__kmp_still_running(kmp_info_t *th) { 
-    return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0)); 
-} 
- 
-void 
-__kmp_create_monitor( kmp_info_t *th ) 
-{ 
-    kmp_thread_t        handle; 
-    DWORD               idThread; 
-    int                 ideal, new_ideal; 
- 
-    KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL ); 
-    if ( __kmp_monitor_ev == NULL ) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( CantCreateEvent ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    }; // if 
-#if USE_ITT_BUILD 
-    __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" ); 
-#endif /* USE_ITT_BUILD */ 
- 
-    th->th.th_info.ds.ds_tid  = KMP_GTID_MONITOR; 
-    th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; 
- 
-    // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how 
-    // to automatically expand stacksize based on CreateThread error code. 
-    if ( __kmp_monitor_stksize == 0 ) { 
-        __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; 
-    } 
-    if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { 
-        __kmp_monitor_stksize = __kmp_sys_min_stksize; 
-    } 
- 
-    KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n", 
-                   (int) __kmp_monitor_stksize ) ); 
- 
-    TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); 
- 
-    handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize, 
-                           (LPTHREAD_START_ROUTINE) __kmp_launch_monitor, 
-                           (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); 
-    if (handle == 0) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( CantCreateThread ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
-    else 
-        th->th.th_info.ds.ds_thread = handle; 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n", 
-                   (void *) th->th.th_info.ds.ds_thread ) ); 
-} 
- 
-/* 
-  Check to see if thread is still alive. 
- 
-  NOTE:  The ExitProcess(code) system call causes all threads to Terminate 
-         with a exit_val = code.  Because of this we can not rely on 
-         exit_val having any particular value.  So this routine may 
-         return STILL_ALIVE in exit_val even after the thread is dead. 
-*/ 
- 
-int 
-__kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ) 
-{ 
-    DWORD rc; 
-    rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val ); 
-    if ( rc == 0 ) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( FunctionError, "GetExitCodeThread()" ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    }; // if 
-    return ( *exit_val == STILL_ACTIVE ); 
-} 
- 
- 
-void 
-__kmp_exit_thread( 
-    int exit_status 
-) { 
-    ExitThread( exit_status ); 
-} // __kmp_exit_thread 
- 
-/* 
-    This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). 
-*/ 
-static void 
-__kmp_reap_common( kmp_info_t * th ) 
-{ 
-    DWORD exit_val; 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) ); 
- 
-    /* 
-        2006-10-19: 
- 
-        There are two opposite situations: 
- 
-            1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread 
-               function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very 
-               slow".) 
-            2. Windows* OS may kill thread before it resets ds_alive flag. 
- 
-        Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting. 
- 
-    */ 
- 
-    { 
-        // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to 
-        // cover this usage also. 
-        void * obj = NULL; 
+/*
+ * z_Windows_NT_util.c -- platform specific routines.
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_itt.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_wait_release.h"
+
+
+
+/* ----------------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------------------- */
+
+/* This code is related to NtQuerySystemInformation() function. This function
+   is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
+   number of running threads in the system. */
+
+#include <ntstatus.h>
+#include <ntsecapi.h>   // UNICODE_STRING
+
+enum SYSTEM_INFORMATION_CLASS {
+    SystemProcessInformation = 5
+}; // SYSTEM_INFORMATION_CLASS
+
+struct CLIENT_ID {
+    HANDLE UniqueProcess;
+    HANDLE UniqueThread;
+}; // struct CLIENT_ID
+
+enum THREAD_STATE {
+    StateInitialized,
+    StateReady,
+    StateRunning,
+    StateStandby,
+    StateTerminated,
+    StateWait,
+    StateTransition,
+    StateUnknown
+}; // enum THREAD_STATE
+
+struct VM_COUNTERS {
+    SIZE_T        PeakVirtualSize;
+    SIZE_T        VirtualSize;
+    ULONG         PageFaultCount;
+    SIZE_T        PeakWorkingSetSize;
+    SIZE_T        WorkingSetSize;
+    SIZE_T        QuotaPeakPagedPoolUsage;
+    SIZE_T        QuotaPagedPoolUsage;
+    SIZE_T        QuotaPeakNonPagedPoolUsage;
+    SIZE_T        QuotaNonPagedPoolUsage;
+    SIZE_T        PagefileUsage;
+    SIZE_T        PeakPagefileUsage;
+    SIZE_T        PrivatePageCount;
+}; // struct VM_COUNTERS
+
+struct SYSTEM_THREAD {
+  LARGE_INTEGER   KernelTime;
+  LARGE_INTEGER   UserTime;
+  LARGE_INTEGER   CreateTime;
+  ULONG           WaitTime;
+  LPVOID          StartAddress;
+  CLIENT_ID       ClientId;
+  DWORD           Priority;
+  LONG            BasePriority;
+  ULONG           ContextSwitchCount;
+  THREAD_STATE    State;
+  ULONG           WaitReason;
+}; // SYSTEM_THREAD
+
+KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 );
+#if KMP_ARCH_X86
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State        ) == 52 );
+#else
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State        ) == 68 );
+#endif
+
+struct SYSTEM_PROCESS_INFORMATION {
+  ULONG           NextEntryOffset;
+  ULONG           NumberOfThreads;
+  LARGE_INTEGER   Reserved[ 3 ];
+  LARGE_INTEGER   CreateTime;
+  LARGE_INTEGER   UserTime;
+  LARGE_INTEGER   KernelTime;
+  UNICODE_STRING  ImageName;
+  DWORD           BasePriority;
+  HANDLE          ProcessId;
+  HANDLE          ParentProcessId;
+  ULONG           HandleCount;
+  ULONG           Reserved2[ 2 ];
+  VM_COUNTERS     VMCounters;
+  IO_COUNTERS     IOCounters;
+  SYSTEM_THREAD   Threads[ 1 ];
+}; // SYSTEM_PROCESS_INFORMATION
+typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION;
+
+KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) ==  0 );
+KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime      ) == 32 );
+KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName       ) == 56 );
+#if KMP_ARCH_X86
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId       ) ==  68 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount     ) ==  76 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters      ) ==  88 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters      ) == 136 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads         ) == 184 );
+#else
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId       ) ==  80 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount     ) ==  96 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters      ) == 112 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters      ) == 208 );
+    KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads         ) == 256 );
+#endif
+
+typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG );
+NtQuerySystemInformation_t NtQuerySystemInformation = NULL;
+
+HMODULE ntdll = NULL;
+
+/* End of NtQuerySystemInformation()-related code */
+
+#if KMP_GROUP_AFFINITY
+static HMODULE kernel32 = NULL;
+#endif /* KMP_GROUP_AFFINITY */
+
+/* ----------------------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------------------- */
+
+#if KMP_HANDLE_SIGNALS
+    typedef void    (* sig_func_t )( int );
+    static sig_func_t  __kmp_sighldrs[ NSIG ];
+    static int         __kmp_siginstalled[ NSIG ];
+#endif
+
+static HANDLE   __kmp_monitor_ev;
+static kmp_int64 __kmp_win32_time;
+double __kmp_win32_tick;
+
+int __kmp_init_runtime = FALSE;
+CRITICAL_SECTION __kmp_win32_section;
+
+void
+__kmp_win32_mutex_init( kmp_win32_mutex_t *mx )
+{
+    InitializeCriticalSection( & mx->cs );
+#if USE_ITT_BUILD
+    __kmp_itt_system_object_created( & mx->cs, "Critical Section" );
+#endif /* USE_ITT_BUILD */
+}
+
+void
+__kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx )
+{
+    DeleteCriticalSection( & mx->cs );
+}
+
+void
+__kmp_win32_mutex_lock( kmp_win32_mutex_t *mx )
+{
+    EnterCriticalSection( & mx->cs );
+}
+
+void
+__kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx )
+{
+    LeaveCriticalSection( & mx->cs );
+}
+
+void
+__kmp_win32_cond_init( kmp_win32_cond_t *cv )
+{
+    cv->waiters_count_         = 0;
+    cv->wait_generation_count_ = 0;
+    cv->release_count_         = 0;
+
+    /* Initialize the critical section */
+    __kmp_win32_mutex_init( & cv->waiters_count_lock_ );
+
+    /* Create a manual-reset event. */
+    cv->event_ = CreateEvent( NULL,     // no security
+                              TRUE,     // manual-reset
+                              FALSE,    // non-signaled initially
+                              NULL );   // unnamed
+#if USE_ITT_BUILD
+    __kmp_itt_system_object_created( cv->event_, "Event" );
+#endif /* USE_ITT_BUILD */
+}
+
+void
+__kmp_win32_cond_destroy( kmp_win32_cond_t *cv )
+{
+    __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ );
+    __kmp_free_handle( cv->event_ );
+    memset( cv, '\0', sizeof( *cv ) );
+}
+
+/* TODO associate cv with a team instead of a thread so as to optimize
+ * the case where we wake up a whole team */
+
+void
+__kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load )
+{
+    int my_generation;
+    int last_waiter;
+
+    /* Avoid race conditions */
+    __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
+
+    /* Increment count of waiters */
+    cv->waiters_count_++;
+
+    /* Store current generation in our activation record. */
+    my_generation = cv->wait_generation_count_;
+
+    __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
+    __kmp_win32_mutex_unlock( mx );
+
+
+    for (;;) {
+        int wait_done;
+
+        /* Wait until the event is signaled */
+        WaitForSingleObject( cv->event_, INFINITE );
+
+        __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
+
+        /* Exit the loop when the <cv->event_> is signaled and
+         * there are still waiting threads from this <wait_generation>
+         * that haven't been released from this wait yet.              */
+        wait_done = ( cv->release_count_ > 0 ) &&
+                    ( cv->wait_generation_count_ != my_generation );
+
+        __kmp_win32_mutex_unlock( &cv->waiters_count_lock_);
+
+        /* there used to be a semicolon after the if statement,
+         * it looked like a bug, so i removed it */
+        if( wait_done )
+            break;
+    }
+
+    __kmp_win32_mutex_lock( mx );
+    __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
+
+    cv->waiters_count_--;
+    cv->release_count_--;
+
+    last_waiter =  ( cv->release_count_ == 0 );
+
+    __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
+
+    if( last_waiter ) {
+        /* We're the last waiter to be notified, so reset the manual event. */
+        ResetEvent( cv->event_ );
+    }
+}
+
+void
+__kmp_win32_cond_broadcast( kmp_win32_cond_t *cv )
+{
+    __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
+
+    if( cv->waiters_count_ > 0 ) {
+        SetEvent( cv->event_ );
+        /* Release all the threads in this generation. */
+
+        cv->release_count_ = cv->waiters_count_;
+
+        /* Start a new generation. */
+        cv->wait_generation_count_++;
+    }
+
+    __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
+}
+
+void
+__kmp_win32_cond_signal( kmp_win32_cond_t *cv )
+{
+    __kmp_win32_cond_broadcast( cv );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_enable( int new_state )
+{
+    if (__kmp_init_runtime)
+        LeaveCriticalSection( & __kmp_win32_section );
+}
+
+void
+__kmp_disable( int *old_state )
+{
+    *old_state = 0;
+
+    if (__kmp_init_runtime)
+        EnterCriticalSection( & __kmp_win32_section );
+}
+
+void
+__kmp_suspend_initialize( void )
+{
+    /* do nothing */
+}
+
+static void
+__kmp_suspend_initialize_thread( kmp_info_t *th )
+{
+    if ( ! TCR_4( th->th.th_suspend_init ) ) {
+      /* this means we haven't initialized the suspension pthread objects for this thread
+         in this instance of the process */
+        __kmp_win32_cond_init(  &th->th.th_suspend_cv );
+        __kmp_win32_mutex_init( &th->th.th_suspend_mx );
+        TCW_4( th->th.th_suspend_init, TRUE );
+    }
+}
+
+void
+__kmp_suspend_uninitialize_thread( kmp_info_t *th )
+{
+    if ( TCR_4( th->th.th_suspend_init ) ) {
+      /* this means we have initialize the suspension pthread objects for this thread
+         in this instance of the process */
+      __kmp_win32_cond_destroy( & th->th.th_suspend_cv );
+      __kmp_win32_mutex_destroy( & th->th.th_suspend_mx );
+      TCW_4( th->th.th_suspend_init, FALSE );
+    }
+}
+
+/* This routine puts the calling thread to sleep after setting the
+ * sleep bit for the indicated flag variable to true.
+ */
+template <class C>
+static inline void __kmp_suspend_template( int th_gtid, C *flag )
+{
+    kmp_info_t *th = __kmp_threads[th_gtid];
+    int status;
+    typename C::flag_t old_spin;
+
+    KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) );
+
+    __kmp_suspend_initialize_thread( th );
+    __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
+
+    KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n",
+                    th_gtid, flag->get() ) );
+
+    /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
+       gets called first?
+    */
+    old_spin = flag->set_sleeping();
+
+    KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n",
+                   th_gtid, flag->get(), *(flag->get()) ) );
+
+    if ( flag->done_check_val(old_spin) ) {
+        old_spin = flag->unset_sleeping();
+        KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n",
+                       th_gtid, flag->get()) );
+    } else {
+#ifdef DEBUG_SUSPEND
+        __kmp_suspend_count++;
+#endif
+        /* Encapsulate in a loop as the documentation states that this may
+         * "with low probability" return when the condition variable has
+         * not been signaled or broadcast
+         */
+        int deactivated = FALSE;
+        TCW_PTR(th->th.th_sleep_loc, (void *)flag);
+        while ( flag->is_sleeping() ) {
+            KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n",
+                     th_gtid ) );
+            // Mark the thread as no longer active (only in the first iteration of the loop).
+            if ( ! deactivated ) {
+                th->th.th_active = FALSE;
+                if ( th->th.th_active_in_pool ) {
+                    th->th.th_active_in_pool = FALSE;
+                    KMP_TEST_THEN_DEC32(
+                      (kmp_int32 *) &__kmp_thread_pool_active_nth );
+                    KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
+                }
+                deactivated = TRUE;
+
+
+                __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
+            }
+            else {
+                __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
+            }
+
+#ifdef KMP_DEBUG
+            if( flag->is_sleeping() ) {
+                KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ));
+            }
+#endif /* KMP_DEBUG */
+
+        } // while
+
+        // Mark the thread as active again (if it was previous marked as inactive)
+        if ( deactivated ) {
+            th->th.th_active = TRUE;
+            if ( TCR_4(th->th.th_in_pool) ) {
+                KMP_TEST_THEN_INC32(
+                  (kmp_int32 *) &__kmp_thread_pool_active_nth );
+                th->th.th_active_in_pool = TRUE;
+            }
+        }
+    }
+
+
+    __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
+
+    KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) );
+}
+
+void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
+    __kmp_suspend_template(th_gtid, flag);
+}
+void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
+    __kmp_suspend_template(th_gtid, flag);
+}
+void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
+    __kmp_suspend_template(th_gtid, flag);
+}
+
+
+/* This routine signals the thread specified by target_gtid to wake up
+ * after setting the sleep bit indicated by the flag argument to FALSE
+ */
+template <class C>
+static inline void __kmp_resume_template( int target_gtid, C *flag )
+{
+    kmp_info_t *th = __kmp_threads[target_gtid];
+    int status;
+
+#ifdef KMP_DEBUG
+    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
+#endif
+
+    KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) );
+
+    __kmp_suspend_initialize_thread( th );
+    __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
+
+    if (!flag) { // coming from __kmp_null_resume_wrapper
+        flag = (C *)th->th.th_sleep_loc;
+    }
+
+    // First, check if the flag is null or its type has changed. If so, someone else woke it up.
+    if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to
+        KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n",
+                       gtid, target_gtid, NULL ) );
+        __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
+        return;
+    }
+    else {
+        typename C::flag_t old_spin = flag->unset_sleeping();
+        if ( !flag->is_sleeping_val(old_spin) ) {
+            KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): "
+                           "%u => %u\n",
+                           gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) );
+            __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
+            return;
+        }
+    }
+    TCW_PTR(th->th.th_sleep_loc, NULL);
+
+    KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n",
+                   gtid, target_gtid, flag->get() ) );
+
+
+    __kmp_win32_cond_signal(  &th->th.th_suspend_cv );
+    __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
+
+    KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n",
+                    gtid, target_gtid ) );
+}
+
+void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
+    __kmp_resume_template(target_gtid, flag);
+}
+void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
+    __kmp_resume_template(target_gtid, flag);
+}
+void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
+    __kmp_resume_template(target_gtid, flag);
+}
+
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_yield( int cond )
+{
+    if (cond)
+        Sleep(0);
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_gtid_set_specific( int gtid )
+{
+    KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n",
+                gtid, __kmp_gtid_threadprivate_key ));
+    KMP_ASSERT( __kmp_init_runtime );
+    if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) )
+        KMP_FATAL( TLSSetValueFailed );
+}
+
+int
+__kmp_gtid_get_specific()
+{
+    int gtid;
+    if( !__kmp_init_runtime ) {
+        KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) );
+        return KMP_GTID_SHUTDOWN;
+    }
+    gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key );
+    if ( gtid == 0 ) {
+        gtid = KMP_GTID_DNE;
+    }
+    else {
+        gtid--;
+    }
+    KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
+                __kmp_gtid_threadprivate_key, gtid ));
+    return gtid;
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if KMP_GROUP_AFFINITY
+
+//
+// Only 1 DWORD in the mask should have any procs set.
+// Return the appropriate index, or -1 for an invalid mask.
+//
+int
+__kmp_get_proc_group( kmp_affin_mask_t const *mask )
+{
+    int i;
+    int group = -1;
+    for (i = 0; i < __kmp_num_proc_groups; i++) {
+        if (mask[i] == 0) {
+            continue;
+        }
+        if (group >= 0) {
+            return -1;
+        }
+        group = i;
+    }
+    return group;
+}
+
+#endif /* KMP_GROUP_AFFINITY */
+
+int
+__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
+{
+
+#if KMP_GROUP_AFFINITY
+
+    if (__kmp_num_proc_groups > 1) {
+        //
+        // Check for a valid mask.
+        //
+        GROUP_AFFINITY ga;
+        int group = __kmp_get_proc_group( mask );
+        if (group < 0) {
+            if (abort_on_error) {
+                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+            }
+            return -1;
+        }
+
+        //
+        // Transform the bit vector into a GROUP_AFFINITY struct
+        // and make the system call to set affinity.
+        //
+        ga.Group = group;
+        ga.Mask = mask[group];
+        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
+
+        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
+        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
+            DWORD error = GetLastError();
+            if (abort_on_error) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantSetThreadAffMask ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }
+            return error;
+        }
+    }
+    else
+
+#endif /* KMP_GROUP_AFFINITY */
+
+    {
+        if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
+            DWORD error = GetLastError();
+            if (abort_on_error) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG( CantSetThreadAffMask ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }
+            return error;
+        }
+    }
+    return 0;
+}
+
+int
+__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
+{
+
+#if KMP_GROUP_AFFINITY
+
+    if (__kmp_num_proc_groups > 1) {
+        KMP_CPU_ZERO(mask);
+        GROUP_AFFINITY ga;
+        KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
+
+        if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
+            DWORD error = GetLastError();
+            if (abort_on_error) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
+                    KMP_ERR(error),
+                    __kmp_msg_null
+                );
+            }
+            return error;
+        }
+
+        if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups)
+          || (ga.Mask == 0)) {
+            return -1;
+        }
+
+        mask[ga.Group] = ga.Mask;
+    }
+    else
+
+#endif /* KMP_GROUP_AFFINITY */
+
+    {
+        kmp_affin_mask_t newMask, sysMask, retval;
+
+        if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
+            DWORD error = GetLastError();
+            if (abort_on_error) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
+                    KMP_ERR(error),
+                    __kmp_msg_null
+                );
+            }
+            return error;
+        }
+        retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
+        if (! retval) {
+            DWORD error = GetLastError();
+            if (abort_on_error) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+                    KMP_ERR(error),
+                    __kmp_msg_null
+                );
+            }
+            return error;
+        }
+        newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
+        if (! newMask) {
+            DWORD error = GetLastError();
+            if (abort_on_error) {
+                __kmp_msg(
+                    kmp_ms_fatal,
+                    KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+                    KMP_ERR(error),
+                    __kmp_msg_null
+                );
+            }
+        }
+        *mask = retval;
+    }
+    return 0;
+}
+
+void
+__kmp_affinity_bind_thread( int proc )
+{
+
+#if KMP_GROUP_AFFINITY
+
+    if (__kmp_num_proc_groups > 1) {
+        //
+        // Form the GROUP_AFFINITY struct directly, rather than filling
+        // out a bit vector and calling __kmp_set_system_affinity().
+        //
+        GROUP_AFFINITY ga;
+        KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups
+           * CHAR_BIT * sizeof(DWORD_PTR))));
+        ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR));
+        ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR)));
+        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
+
+        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
+        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
+            DWORD error = GetLastError();
+            if (__kmp_affinity_verbose) { // AC: continue silently if not verbose
+                __kmp_msg(
+                    kmp_ms_warning,
+                    KMP_MSG( CantSetThreadAffMask ),
+                    KMP_ERR( error ),
+                    __kmp_msg_null
+                );
+            }
+        }
+    }
+    else
+
+#endif /* KMP_GROUP_AFFINITY */
+
+    {
+        kmp_affin_mask_t mask;
+        KMP_CPU_ZERO(&mask);
+        KMP_CPU_SET(proc, &mask);
+        __kmp_set_system_affinity(&mask, TRUE);
+    }
+}
+
+void
+__kmp_affinity_determine_capable( const char *env_var )
+{
+    //
+    // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask().
+    //
+
+#if KMP_GROUP_AFFINITY
+    KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(kmp_affin_mask_t));
+#else
+    KMP_AFFINITY_ENABLE(sizeof(kmp_affin_mask_t));
+#endif
+
+    KA_TRACE( 10, (
+        "__kmp_affinity_determine_capable: "
+            "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n",
+        __kmp_affin_mask_size
+    ) );
+}
+
+double
+__kmp_read_cpu_time( void )
+{
+    FILETIME    CreationTime, ExitTime, KernelTime, UserTime;
+    int         status;
+    double      cpu_time;
+
+    cpu_time = 0;
+
+    status = GetProcessTimes( GetCurrentProcess(), &CreationTime,
+                              &ExitTime, &KernelTime, &UserTime );
+
+    if (status) {
+        double  sec = 0;
+
+        sec += KernelTime.dwHighDateTime;
+        sec += UserTime.dwHighDateTime;
+
+        /* Shift left by 32 bits */
+        sec *= (double) (1 << 16) * (double) (1 << 16);
+
+        sec += KernelTime.dwLowDateTime;
+        sec += UserTime.dwLowDateTime;
+
+        cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC;
+    }
+
+    return cpu_time;
+}
+
+int
+__kmp_read_system_info( struct kmp_sys_info *info )
+{
+    info->maxrss  = 0;                   /* the maximum resident set size utilized (in kilobytes)     */
+    info->minflt  = 0;                   /* the number of page faults serviced without any I/O        */
+    info->majflt  = 0;                   /* the number of page faults serviced that required I/O      */
+    info->nswap   = 0;                   /* the number of times a process was "swapped" out of memory */
+    info->inblock = 0;                   /* the number of times the file system had to perform input  */
+    info->oublock = 0;                   /* the number of times the file system had to perform output */
+    info->nvcsw   = 0;                   /* the number of times a context switch was voluntarily      */
+    info->nivcsw  = 0;                   /* the number of times a context switch was forced           */
+
+    return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+
+void
+__kmp_runtime_initialize( void )
+{
+    SYSTEM_INFO info;
+    kmp_str_buf_t path;
+    UINT path_size;
+
+    if ( __kmp_init_runtime ) {
+        return;
+    };
+
+#if KMP_DYNAMIC_LIB
+    /* Pin dynamic library for the lifetime of application */
+    {
+        // First, turn off error message boxes
+        UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS);
+        HMODULE h;
+        BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
+                                     |GET_MODULE_HANDLE_EX_FLAG_PIN,
+                                     (LPCTSTR)&__kmp_serial_initialize, &h);
+        KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded");
+        SetErrorMode (err_mode);   // Restore error mode
+        KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") );
+    }
+#endif
+
+    InitializeCriticalSection( & __kmp_win32_section );
+#if USE_ITT_BUILD
+    __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" );
+#endif /* USE_ITT_BUILD */
+    __kmp_initialize_system_tick();
+
+    #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+        if ( ! __kmp_cpuinfo.initialized ) {
+            __kmp_query_cpuid( & __kmp_cpuinfo );
+        }; // if
+    #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+    /* Set up minimum number of threads to switch to TLS gtid */
+    #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB
+        // Windows* OS, static library.
+        /*
+            New thread may use stack space previously used by another thread, currently terminated.
+            On Windows* OS, in case of static linking, we do not know the moment of thread termination,
+            and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead
+            threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly
+            finds gtid (by searching through stack addresses of all known threads) for unregistered
+            foreign tread.
+
+            Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id()
+            does not search through stacks, but get gtid from TLS immediately.
+
+            --ln
+        */
+        __kmp_tls_gtid_min = 0;
+    #else
+        __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
+    #endif
+
+    /* for the static library */
+    if ( !__kmp_gtid_threadprivate_key ) {
+        __kmp_gtid_threadprivate_key = TlsAlloc();
+        if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) {
+            KMP_FATAL( TLSOutOfIndexes );
+        }
+    }
+
+
+    //
+    // Load ntdll.dll.
+    //
+    /*
+        Simple
+            GetModuleHandle( "ntdll.dl" )
+        is not suitable due to security issue (see
+        http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full
+        path to the library.
+    */
+    __kmp_str_buf_init( & path );
+    path_size = GetSystemDirectory( path.str, path.size );
+    KMP_DEBUG_ASSERT( path_size > 0 );
+    if ( path_size >= path.size ) {
+        //
+        // Buffer is too short.  Expand the buffer and try again.
+        //
+        __kmp_str_buf_reserve( & path, path_size );
+        path_size = GetSystemDirectory( path.str, path.size );
+        KMP_DEBUG_ASSERT( path_size > 0 );
+    }; // if
+    if ( path_size > 0 && path_size < path.size ) {
+        //
+        // Now we have system directory name in the buffer.
+        // Append backslash and name of dll to form full path,
+        //
+        path.used = path_size;
+        __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" );
+
+        //
+        // Now load ntdll using full path.
+        //
+        ntdll = GetModuleHandle( path.str );
+    }
+
+    KMP_DEBUG_ASSERT( ntdll != NULL );
+    if ( ntdll != NULL ) {
+        NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" );
+    }
+    KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL );
+
+#if KMP_GROUP_AFFINITY
+    //
+    // Load kernel32.dll.
+    // Same caveat - must use full system path name.
+    //
+    if ( path_size > 0 && path_size < path.size ) {
+        //
+        // Truncate the buffer back to just the system path length,
+        // discarding "\\ntdll.dll", and replacing it with "kernel32.dll".
+        //
+        path.used = path_size;
+        __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" );
+
+        //
+        // Load kernel32.dll using full path.
+        //
+        kernel32 = GetModuleHandle( path.str );
+        KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) );
+
+        //
+        // Load the function pointers to kernel32.dll routines
+        // that may or may not exist on this system.
+        //
+        if ( kernel32 != NULL ) {
+            __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" );
+            __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" );
+            __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" );
+            __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" );
+
+            KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) );
+            KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) );
+            KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) );
+            KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) );
+            KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) );
+
+            //
+            // See if group affinity is supported on this system.
+            // If so, calculate the #groups and #procs.
+            //
+            // Group affinity was introduced with Windows* 7 OS and
+            // Windows* Server 2008 R2 OS.
+            //
+            if ( ( __kmp_GetActiveProcessorCount != NULL )
+              && ( __kmp_GetActiveProcessorGroupCount != NULL )
+              && ( __kmp_GetThreadGroupAffinity != NULL )
+              && ( __kmp_SetThreadGroupAffinity != NULL )
+              && ( ( __kmp_num_proc_groups
+              = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) {
+                //
+                // Calculate the total number of active OS procs.
+                //
+                int i;
+
+                KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
+
+                __kmp_xproc = 0;
+
+                for ( i = 0; i < __kmp_num_proc_groups; i++ ) {
+                    DWORD size = __kmp_GetActiveProcessorCount( i );
+                    __kmp_xproc += size;
+                    KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) );
+                }
+                }
+            else {
+                KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
+            }
+        }
+    }
+    if ( __kmp_num_proc_groups <= 1 ) {
+        GetSystemInfo( & info );
+        __kmp_xproc = info.dwNumberOfProcessors;
+    }
+#else
+    GetSystemInfo( & info );
+    __kmp_xproc = info.dwNumberOfProcessors;
+#endif /* KMP_GROUP_AFFINITY */
+
+    //
+    // If the OS said there were 0 procs, take a guess and use a value of 2.
+    // This is done for Linux* OS, also.  Do we need error / warning?
+    //
+    if ( __kmp_xproc <= 0 ) {
+        __kmp_xproc = 2;
+    }
+
+    KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) );
+
+    __kmp_str_buf_free( & path );
+
+#if USE_ITT_BUILD
+    __kmp_itt_initialize();
+#endif /* USE_ITT_BUILD */
+
+    __kmp_init_runtime = TRUE;
+} // __kmp_runtime_initialize
+
+void
+__kmp_runtime_destroy( void )
+{
+    if ( ! __kmp_init_runtime ) {
+        return;
+    }
+
+#if USE_ITT_BUILD
+    __kmp_itt_destroy();
+#endif /* USE_ITT_BUILD */
+
+    /* we can't DeleteCriticalsection( & __kmp_win32_section ); */
+    /* due to the KX_TRACE() commands */
+    KA_TRACE( 40, ("__kmp_runtime_destroy\n" ));
+
+    if( __kmp_gtid_threadprivate_key ) {
+        TlsFree( __kmp_gtid_threadprivate_key );
+        __kmp_gtid_threadprivate_key = 0;
+    }
+
+    __kmp_affinity_uninitialize();
+    DeleteCriticalSection( & __kmp_win32_section );
+
+    ntdll = NULL;
+    NtQuerySystemInformation = NULL;
+
+#if KMP_ARCH_X86_64
+    kernel32 = NULL;
+    __kmp_GetActiveProcessorCount = NULL;
+    __kmp_GetActiveProcessorGroupCount = NULL;
+    __kmp_GetThreadGroupAffinity = NULL;
+    __kmp_SetThreadGroupAffinity = NULL;
+#endif // KMP_ARCH_X86_64
+
+    __kmp_init_runtime = FALSE;
+}
+
+
+void
+__kmp_terminate_thread( int gtid )
+{
+    kmp_info_t  *th = __kmp_threads[ gtid ];
+
+    if( !th ) return;
+
+    KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) );
+
+    if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) {
+        /* It's OK, the thread may have exited already */
+    }
+    __kmp_free_handle( th->th.th_info.ds.ds_thread );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void
+__kmp_clear_system_time( void )
+{
+    BOOL status;
+    LARGE_INTEGER time;
+    status = QueryPerformanceCounter( & time );
+    __kmp_win32_time = (kmp_int64) time.QuadPart;
+}
+
+void
+__kmp_initialize_system_tick( void )
+{
+    {
+  BOOL status;
+  LARGE_INTEGER freq;
+
+  status = QueryPerformanceFrequency( & freq );
+  if (! status) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+
+  }
+  else {
+      __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart;
+  }
+    }
+}
+
+/* Calculate the elapsed wall clock time for the user */
+
+void
+__kmp_elapsed( double *t )
+{
+    BOOL status;
+    LARGE_INTEGER now;
+    status = QueryPerformanceCounter( & now );
+    *t = ((double) now.QuadPart) * __kmp_win32_tick;
+}
+
+/* Calculate the elapsed wall clock tick for the user */
+
+void
+__kmp_elapsed_tick( double *t )
+{
+    *t = __kmp_win32_tick;
+}
+
+void
+__kmp_read_system_time( double *delta )
+{
+
+    if (delta != NULL) {
+        BOOL status;
+        LARGE_INTEGER now;
+
+        status = QueryPerformanceCounter( & now );
+
+        *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time))
+    * __kmp_win32_tick;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+void * __stdcall
+__kmp_launch_worker( void *arg )
+{
+    volatile void *stack_data;
+    void *exit_val;
+    void *padding = 0;
+    kmp_info_t *this_thr = (kmp_info_t *) arg;
+    int gtid;
+
+    gtid = this_thr->th.th_info.ds.ds_gtid;
+    __kmp_gtid_set_specific( gtid );
+#ifdef KMP_TDATA_GTID
+    #error "This define causes problems with LoadLibrary() + declspec(thread) " \
+        "on Windows* OS.  See CQ50564, tests kmp_load_library*.c and this MSDN " \
+        "reference: http://support.microsoft.com/kb/118816"
+    //__kmp_gtid = gtid;
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_thread_name( gtid );
+#endif /* USE_ITT_BUILD */
+
+    __kmp_affinity_set_init_mask( gtid, FALSE );
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+    //
+    // Set the FP control regs to be a copy of
+    // the parallel initialization thread's.
+    //
+    __kmp_clear_x87_fpu_status_word();
+    __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
+    __kmp_load_mxcsr( &__kmp_init_mxcsr );
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+    if ( __kmp_stkoffset > 0 && gtid > 0 ) {
+        padding = KMP_ALLOCA( gtid * __kmp_stkoffset );
+    }
+
+    KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
+    this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
+    TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
+
+    if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
+        TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data);
+        KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE );
+        __kmp_check_stack_overlap( this_thr );
+    }
+    KMP_MB();
+    exit_val = __kmp_launch_thread( this_thr );
+    KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
+    TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
+    KMP_MB();
+    return exit_val;
+}
+
+
+/* The monitor thread controls all of the threads in the complex */
+
+void * __stdcall
+__kmp_launch_monitor( void *arg )
+{
+    DWORD        wait_status;
+    kmp_thread_t monitor;
+    int          status;
+    int          interval;
+    kmp_info_t *this_thr = (kmp_info_t *) arg;
+
+    KMP_DEBUG_ASSERT(__kmp_init_monitor);
+    TCW_4( __kmp_init_monitor, 2 );    // AC: Signal the library that monitor has started
+                                       // TODO: hide "2" in enum (like {true,false,started})
+    this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
+    TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+    KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) );
+
+    monitor = GetCurrentThread();
+
+    /* set thread priority */
+    status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST );
+    if (! status) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( CantSetThreadPriority ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+
+    /* register us as monitor */
+    __kmp_gtid_set_specific( KMP_GTID_MONITOR );
+#ifdef KMP_TDATA_GTID
+    #error "This define causes problems with LoadLibrary() + declspec(thread) " \
+        "on Windows* OS.  See CQ50564, tests kmp_load_library*.c and this MSDN " \
+        "reference: http://support.microsoft.com/kb/118816"
+    //__kmp_gtid = KMP_GTID_MONITOR;
+#endif
+
+#if USE_ITT_BUILD
+    __kmp_itt_thread_ignore();    // Instruct Intel(R) Threading Tools to ignore monitor thread.
+#endif /* USE_ITT_BUILD */
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */
+
+    while (! TCR_4(__kmp_global.g.g_done)) {
+        /*  This thread monitors the state of the system */
+
+        KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) );
+
+        wait_status = WaitForSingleObject( __kmp_monitor_ev, interval );
+
+        if (wait_status == WAIT_TIMEOUT) {
+            TCW_4( __kmp_global.g.g_time.dt.t_value,
+              TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 );
+        }
+
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    }
+
+    KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) );
+
+    status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL );
+    if (! status) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( CantSetThreadPriority ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+
+    if (__kmp_global.g.g_abort != 0) {
+        /* now we need to terminate the worker threads   */
+        /* the value of t_abort is the signal we caught */
+
+        int gtid;
+
+        KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) );
+
+        /* terminate the OpenMP worker threads */
+        /* TODO this is not valid for sibling threads!!
+         * the uber master might not be 0 anymore.. */
+        for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
+            __kmp_terminate_thread( gtid );
+
+        __kmp_cleanup();
+
+        Sleep( 0 );
+
+        KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) );
+
+        if (__kmp_global.g.g_abort > 0) {
+            raise( __kmp_global.g.g_abort );
+        }
+    }
+
+    TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
+
+    KMP_MB();
+    return arg;
+}
+
+void
+__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
+{
+    kmp_thread_t   handle;
+    DWORD          idThread;
+
+    KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) );
+
+    th->th.th_info.ds.ds_gtid = gtid;
+
+    if ( KMP_UBER_GTID(gtid) ) {
+        int     stack_data;
+
+        /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use.
+           Is it appropriate to just use GetCurrentThread?  When should we close this handle?  When
+           unregistering the root?
+        */
+        {
+            BOOL rc;
+            rc = DuplicateHandle(
+                                 GetCurrentProcess(),
+                                 GetCurrentThread(),
+                                 GetCurrentProcess(),
+                                 &th->th.th_info.ds.ds_thread,
+                                 0,
+                                 FALSE,
+                                 DUPLICATE_SAME_ACCESS
+                                 );
+            KMP_ASSERT( rc );
+            KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
+                           (LPVOID)th,
+                           th->th.th_info.ds.ds_thread ) );
+            th->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
+        }
+        if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
+            /* we will dynamically update the stack range if gtid_mode == 1 */
+            TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
+            TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
+            TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
+            __kmp_check_stack_overlap( th );
+        }
+    }
+    else {
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+        /* Set stack size for this thread now. */
+        KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC
+                        " bytes\n", stack_size ) );
+
+        stack_size += gtid * __kmp_stkoffset;
+
+        TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size);
+        TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
+
+        KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %"
+                        KMP_SIZE_T_SPEC
+                        " bytes, &__kmp_launch_worker = %p, th = %p, "
+                        "&idThread = %p\n",
+                        (SIZE_T) stack_size,
+                        (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
+                        (LPVOID) th, &idThread ) );
+
+            {
+                handle = CreateThread( NULL, (SIZE_T) stack_size,
+                                       (LPTHREAD_START_ROUTINE) __kmp_launch_worker,
+                                       (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
+            }
+
+        KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %"
+                        KMP_SIZE_T_SPEC
+                        " bytes, &__kmp_launch_worker = %p, th = %p, "
+                        "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n",
+                        (SIZE_T) stack_size,
+                        (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
+                        (LPVOID) th, idThread, handle ) );
+
+            {
+                if ( handle == 0 ) {
+                    DWORD error = GetLastError();
+                    __kmp_msg(
+                              kmp_ms_fatal,
+                              KMP_MSG( CantCreateThread ),
+                              KMP_ERR( error ),
+                              __kmp_msg_null
+                              );
+                } else {
+                    th->th.th_info.ds.ds_thread = handle;
+                }
+            }
+        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    }
+
+    KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) );
+}
+
+int
+__kmp_still_running(kmp_info_t *th) {
+    return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0));
+}
+
+void
+__kmp_create_monitor( kmp_info_t *th )
+{
+    kmp_thread_t        handle;
+    DWORD               idThread;
+    int                 ideal, new_ideal;
+
+    KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL );
+    if ( __kmp_monitor_ev == NULL ) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( CantCreateEvent ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }; // if
+#if USE_ITT_BUILD
+    __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" );
+#endif /* USE_ITT_BUILD */
+
+    th->th.th_info.ds.ds_tid  = KMP_GTID_MONITOR;
+    th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
+
+    // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how
+    // to automatically expand stacksize based on CreateThread error code.
+    if ( __kmp_monitor_stksize == 0 ) {
+        __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
+    }
+    if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) {
+        __kmp_monitor_stksize = __kmp_sys_min_stksize;
+    }
+
+    KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n",
+                   (int) __kmp_monitor_stksize ) );
+
+    TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );
+
+    handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize,
+                           (LPTHREAD_START_ROUTINE) __kmp_launch_monitor,
+                           (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
+    if (handle == 0) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( CantCreateThread ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+    else
+        th->th.th_info.ds.ds_thread = handle;
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n",
+                   (void *) th->th.th_info.ds.ds_thread ) );
+}
+
+/*
+  Check to see if thread is still alive.
+
+  NOTE:  The ExitProcess(code) system call causes all threads to Terminate
+         with a exit_val = code.  Because of this we can not rely on
+         exit_val having any particular value.  So this routine may
+         return STILL_ALIVE in exit_val even after the thread is dead.
+*/
+
+int
+__kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val )
+{
+    DWORD rc;
+    rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val );
+    if ( rc == 0 ) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( FunctionError, "GetExitCodeThread()" ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }; // if
+    return ( *exit_val == STILL_ACTIVE );
+}
+
+
+void
+__kmp_exit_thread(
+    int exit_status
+) {
+    ExitThread( exit_status );
+} // __kmp_exit_thread
+
+/*
+    This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor().
+*/
+static void
+__kmp_reap_common( kmp_info_t * th )
+{
+    DWORD exit_val;
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) );
+
+    /*
+        2006-10-19:
+
+        There are two opposite situations:
+
+            1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread
+               function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very
+               slow".)
+            2. Windows* OS may kill thread before it resets ds_alive flag.
+
+        Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting.
+
+    */
+
+    {
+        // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to
+        // cover this usage also.
+        void * obj = NULL;
         kmp_uint32 spins;
-#if USE_ITT_BUILD 
-        KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive ); 
-#endif /* USE_ITT_BUILD */ 
-        KMP_INIT_YIELD( spins ); 
-        do { 
-#if USE_ITT_BUILD 
-            KMP_FSYNC_SPIN_PREPARE( obj ); 
-#endif /* USE_ITT_BUILD */ 
-            __kmp_is_thread_alive( th, &exit_val ); 
-            KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); 
-            KMP_YIELD_SPIN( spins ); 
-        } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) ); 
-#if USE_ITT_BUILD 
-        if ( exit_val == STILL_ACTIVE ) { 
-            KMP_FSYNC_CANCEL( obj ); 
-        } else { 
-            KMP_FSYNC_SPIN_ACQUIRED( obj ); 
-        }; // if 
-#endif /* USE_ITT_BUILD */ 
-    } 
- 
-    __kmp_free_handle( th->th.th_info.ds.ds_thread ); 
- 
-    /* 
-     * NOTE:  The ExitProcess(code) system call causes all threads to Terminate 
-     *        with a exit_val = code.  Because of this we can not rely on 
-     *        exit_val having any particular value. 
-     */ 
-    if ( exit_val == STILL_ACTIVE ) { 
-        KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) ); 
-    } else if ( (void *) exit_val != (void *) th) { 
-        KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) ); 
-    }; // if 
- 
-    KA_TRACE( 10, 
-        ( 
-            "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n", 
-            th->th.th_info.ds.ds_gtid, 
-            th->th.th_info.ds.ds_thread 
-        ) 
-    ); 
- 
-    th->th.th_info.ds.ds_thread    = 0; 
-    th->th.th_info.ds.ds_tid       = KMP_GTID_DNE; 
-    th->th.th_info.ds.ds_gtid      = KMP_GTID_DNE; 
-    th->th.th_info.ds.ds_thread_id = 0; 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-} 
- 
-void 
-__kmp_reap_monitor( kmp_info_t *th ) 
-{ 
-    int status; 
- 
-    KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n", 
-                   (void *) th->th.th_info.ds.ds_thread ) ); 
- 
-    // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. 
-    // If both tid and gtid are 0, it means the monitor did not ever start. 
-    // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. 
-    KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); 
-    if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { 
-        return; 
-    }; // if 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
- 
-    status = SetEvent( __kmp_monitor_ev ); 
-    if ( status == FALSE ) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( CantSetEvent ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
-    KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); 
-    __kmp_reap_common( th ); 
- 
-    __kmp_free_handle( __kmp_monitor_ev ); 
- 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-} 
- 
-void 
-__kmp_reap_worker( kmp_info_t * th ) 
-{ 
-    KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); 
-    __kmp_reap_common( th ); 
-} 
- 
-/* ------------------------------------------------------------------------ */ 
-/* ------------------------------------------------------------------------ */ 
- 
-#if KMP_HANDLE_SIGNALS 
- 
- 
-static void 
-__kmp_team_handler( int signo ) 
-{ 
-    if ( __kmp_global.g.g_abort == 0 ) { 
-        // Stage 1 signal handler, let's shut down all of the threads. 
-        if ( __kmp_debug_buf ) { 
-            __kmp_dump_debug_buffer(); 
-        }; // if 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-        TCW_4( __kmp_global.g.g_abort, signo ); 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-        TCW_4( __kmp_global.g.g_done, TRUE ); 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-    } 
-} // __kmp_team_handler 
- 
- 
- 
-static 
-sig_func_t __kmp_signal( int signum, sig_func_t handler ) { 
-    sig_func_t old = signal( signum, handler ); 
-    if ( old == SIG_ERR ) { 
-        int error = errno; 
-        __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null ); 
-    }; // if 
-    return old; 
-} 
- 
-static void 
-__kmp_install_one_handler( 
-    int           sig, 
-    sig_func_t    handler, 
-    int           parallel_init 
-) { 
-    sig_func_t old; 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-    KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) ); 
-    if ( parallel_init ) { 
-        old = __kmp_signal( sig, handler ); 
-        // SIG_DFL on Windows* OS in NULL or 0. 
-        if ( old == __kmp_sighldrs[ sig ] ) { 
-            __kmp_siginstalled[ sig ] = 1; 
-        } else { 
-            // Restore/keep user's handler if one previously installed. 
-            old = __kmp_signal( sig, old ); 
-        }; // if 
-    } else { 
-        // Save initial/system signal handlers to see if user handlers installed. 
-        // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with 
-        // parallel_init == TRUE. 
-        old = __kmp_signal( sig, SIG_DFL ); 
-        __kmp_sighldrs[ sig ] = old; 
-        __kmp_signal( sig, old ); 
-    }; // if 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */ 
-} // __kmp_install_one_handler 
- 
-static void 
-__kmp_remove_one_handler( int sig ) { 
-    if ( __kmp_siginstalled[ sig ] ) { 
-        sig_func_t old; 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-        KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) ); 
-        old = __kmp_signal( sig, __kmp_sighldrs[ sig ] ); 
-        if ( old != __kmp_team_handler ) { 
-            KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); 
-            old = __kmp_signal( sig, old ); 
-        }; // if 
-        __kmp_sighldrs[ sig ] = NULL; 
-        __kmp_siginstalled[ sig ] = 0; 
-        KMP_MB();       // Flush all pending memory write invalidates. 
-    }; // if 
-} // __kmp_remove_one_handler 
- 
- 
-void 
-__kmp_install_signals( int parallel_init ) 
-{ 
-    KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) ); 
-    if ( ! __kmp_handle_signals ) { 
-        KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) ); 
-        return; 
-    }; // if 
-    __kmp_install_one_handler( SIGINT,  __kmp_team_handler, parallel_init ); 
-    __kmp_install_one_handler( SIGILL,  __kmp_team_handler, parallel_init ); 
-    __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); 
-    __kmp_install_one_handler( SIGFPE,  __kmp_team_handler, parallel_init ); 
-    __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); 
-    __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); 
-} // __kmp_install_signals 
- 
- 
-void 
-__kmp_remove_signals( void ) 
-{ 
-    int sig; 
-    KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) ); 
-    for ( sig = 1; sig < NSIG; ++ sig ) { 
-        __kmp_remove_one_handler( sig ); 
-    }; // for sig 
-} // __kmp_remove_signals 
- 
- 
-#endif // KMP_HANDLE_SIGNALS 
- 
-/* Put the thread to sleep for a time period */ 
-void 
-__kmp_thread_sleep( int millis ) 
-{ 
-    DWORD status; 
- 
-    status = SleepEx( (DWORD) millis, FALSE ); 
-    if ( status ) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( FunctionError, "SleepEx()" ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
-} 
- 
-/* Determine whether the given address is mapped into the current address space. */ 
-int 
-__kmp_is_address_mapped( void * addr ) 
-{ 
-    DWORD status; 
-    MEMORY_BASIC_INFORMATION lpBuffer; 
-    SIZE_T dwLength; 
- 
-    dwLength = sizeof(MEMORY_BASIC_INFORMATION); 
- 
-    status = VirtualQuery( addr, &lpBuffer, dwLength ); 
- 
-    return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) || 
-       (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE ))); 
-} 
- 
-kmp_uint64 
-__kmp_hardware_timestamp(void) 
-{ 
-    kmp_uint64 r = 0; 
- 
-    QueryPerformanceCounter((LARGE_INTEGER*) &r); 
-    return r; 
-} 
- 
-/* Free handle and check the error code */ 
-void 
-__kmp_free_handle( kmp_thread_t tHandle ) 
-{ 
-/* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */ 
-    BOOL rc; 
-    rc = CloseHandle( tHandle ); 
-    if ( !rc ) { 
-        DWORD error = GetLastError(); 
-        __kmp_msg( 
-            kmp_ms_fatal, 
-            KMP_MSG( CantCloseHandle ), 
-            KMP_ERR( error ), 
-            __kmp_msg_null 
-        ); 
-    } 
-} 
- 
-int 
-__kmp_get_load_balance( int max ) { 
- 
-    static ULONG glb_buff_size = 100 * 1024; 
- 
-    static int     glb_running_threads  = 0;  /* Saved count of the running threads for the thread balance algortihm */ 
-    static double  glb_call_time        = 0;  /* Thread balance algorithm call time */ 
- 
-    int running_threads = 0;              // Number of running threads in the system. 
-    NTSTATUS  status        = 0; 
-    ULONG     buff_size     = 0; 
-    ULONG     info_size     = 0; 
-    void *    buffer        = NULL; 
-    PSYSTEM_PROCESS_INFORMATION spi = NULL; 
-    int first_time          = 1; 
- 
-    double call_time = 0.0; //start, finish; 
- 
-    __kmp_elapsed( & call_time ); 
- 
-    if ( glb_call_time && 
-            ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { 
-        running_threads = glb_running_threads; 
-        goto finish; 
-    } 
-    glb_call_time = call_time; 
- 
-    // Do not spend time on running algorithm if we have a permanent error. 
-    if ( NtQuerySystemInformation == NULL ) { 
-        running_threads = -1; 
-        goto finish; 
-    }; // if 
- 
-    if ( max <= 0 ) { 
-        max = INT_MAX; 
-    }; // if 
- 
-    do { 
- 
-        if ( first_time ) { 
-            buff_size = glb_buff_size; 
-        } else { 
-            buff_size = 2 * buff_size; 
-        } 
- 
-        buffer = KMP_INTERNAL_REALLOC( buffer, buff_size ); 
-        if ( buffer == NULL ) { 
-            running_threads = -1; 
-            goto finish; 
-        }; // if 
-        status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size ); 
-        first_time = 0; 
- 
-    } while ( status == STATUS_INFO_LENGTH_MISMATCH ); 
-    glb_buff_size = buff_size; 
- 
-    #define CHECK( cond )                       \ 
-        {                                       \ 
-            KMP_DEBUG_ASSERT( cond );           \ 
-            if ( ! ( cond ) ) {                 \ 
-                running_threads = -1;           \ 
-                goto finish;                    \ 
-            }                                   \ 
-        } 
- 
-    CHECK( buff_size >= info_size ); 
-    spi = PSYSTEM_PROCESS_INFORMATION( buffer ); 
-    for ( ; ; ) { 
-        ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer ); 
-        CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size ); 
-        HANDLE pid = spi->ProcessId; 
-        ULONG num = spi->NumberOfThreads; 
-        CHECK( num >= 1 ); 
-        size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 ); 
-        CHECK( offset + spi_size < info_size );          // Make sure process info record fits the buffer. 
-        if ( spi->NextEntryOffset != 0 ) { 
-            CHECK( spi_size <= spi->NextEntryOffset );   // And do not overlap with the next record. 
-        }; // if 
-        // pid == 0 corresponds to the System Idle Process. It always has running threads 
-        // on all cores. So, we don't consider the running threads of this process. 
-        if ( pid != 0 ) { 
-            for ( int i = 0; i < num; ++ i ) { 
-                THREAD_STATE state = spi->Threads[ i ].State; 
-                // Count threads that have Ready or Running state. 
-                // !!! TODO: Why comment does not match the code??? 
-                if ( state == StateRunning ) { 
-                    ++ running_threads; 
-                    // Stop counting running threads if the number is already greater than 
-                    // the number of available cores 
-                    if ( running_threads >= max ) { 
-                        goto finish; 
-                    } 
-                } // if 
-            }; // for i 
-        } // if 
-        if ( spi->NextEntryOffset == 0 ) { 
-            break; 
-        }; // if 
-        spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset ); 
-    }; // forever 
- 
-    #undef CHECK 
- 
-    finish: // Clean up and exit. 
- 
-        if ( buffer != NULL ) { 
-            KMP_INTERNAL_FREE( buffer ); 
-        }; // if 
- 
-        glb_running_threads = running_threads; 
- 
-        return running_threads; 
- 
-} //__kmp_get_load_balance() 
- 
+#if USE_ITT_BUILD
+        KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive );
+#endif /* USE_ITT_BUILD */
+        KMP_INIT_YIELD( spins );
+        do {
+#if USE_ITT_BUILD
+            KMP_FSYNC_SPIN_PREPARE( obj );
+#endif /* USE_ITT_BUILD */
+            __kmp_is_thread_alive( th, &exit_val );
+            KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
+            KMP_YIELD_SPIN( spins );
+        } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) );
+#if USE_ITT_BUILD
+        if ( exit_val == STILL_ACTIVE ) {
+            KMP_FSYNC_CANCEL( obj );
+        } else {
+            KMP_FSYNC_SPIN_ACQUIRED( obj );
+        }; // if
+#endif /* USE_ITT_BUILD */
+    }
+
+    __kmp_free_handle( th->th.th_info.ds.ds_thread );
+
+    /*
+     * NOTE:  The ExitProcess(code) system call causes all threads to Terminate
+     *        with a exit_val = code.  Because of this we can not rely on
+     *        exit_val having any particular value.
+     */
+    if ( exit_val == STILL_ACTIVE ) {
+        KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) );
+    } else if ( (void *) exit_val != (void *) th) {
+        KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) );
+    }; // if
+
+    KA_TRACE( 10,
+        (
+            "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n",
+            th->th.th_info.ds.ds_gtid,
+            th->th.th_info.ds.ds_thread
+        )
+    );
+
+    th->th.th_info.ds.ds_thread    = 0;
+    th->th.th_info.ds.ds_tid       = KMP_GTID_DNE;
+    th->th.th_info.ds.ds_gtid      = KMP_GTID_DNE;
+    th->th.th_info.ds.ds_thread_id = 0;
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+}
+
+void
+__kmp_reap_monitor( kmp_info_t *th )
+{
+    int status;
+
+    KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n",
+                   (void *) th->th.th_info.ds.ds_thread ) );
+
+    // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
+    // If both tid and gtid are 0, it means the monitor did not ever start.
+    // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
+    KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid );
+    if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) {
+        return;
+    }; // if
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+
+    status = SetEvent( __kmp_monitor_ev );
+    if ( status == FALSE ) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( CantSetEvent ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+    KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
+    __kmp_reap_common( th );
+
+    __kmp_free_handle( __kmp_monitor_ev );
+
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+}
+
+void
+__kmp_reap_worker( kmp_info_t * th )
+{
+    KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
+    __kmp_reap_common( th );
+}
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+#if KMP_HANDLE_SIGNALS
+
+
+static void
+__kmp_team_handler( int signo )
+{
+    if ( __kmp_global.g.g_abort == 0 ) {
+        // Stage 1 signal handler, let's shut down all of the threads.
+        if ( __kmp_debug_buf ) {
+            __kmp_dump_debug_buffer();
+        }; // if
+        KMP_MB();       // Flush all pending memory write invalidates.
+        TCW_4( __kmp_global.g.g_abort, signo );
+        KMP_MB();       // Flush all pending memory write invalidates.
+        TCW_4( __kmp_global.g.g_done, TRUE );
+        KMP_MB();       // Flush all pending memory write invalidates.
+    }
+} // __kmp_team_handler
+
+
+
+static
+sig_func_t __kmp_signal( int signum, sig_func_t handler ) {
+    sig_func_t old = signal( signum, handler );
+    if ( old == SIG_ERR ) {
+        int error = errno;
+        __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null );
+    }; // if
+    return old;
+}
+
+static void
+__kmp_install_one_handler(
+    int           sig,
+    sig_func_t    handler,
+    int           parallel_init
+) {
+    sig_func_t old;
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+    KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) );
+    if ( parallel_init ) {
+        old = __kmp_signal( sig, handler );
+        // SIG_DFL on Windows* OS in NULL or 0.
+        if ( old == __kmp_sighldrs[ sig ] ) {
+            __kmp_siginstalled[ sig ] = 1;
+        } else {
+            // Restore/keep user's handler if one previously installed.
+            old = __kmp_signal( sig, old );
+        }; // if
+    } else {
+        // Save initial/system signal handlers to see if user handlers installed.
+        // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with
+        // parallel_init == TRUE.
+        old = __kmp_signal( sig, SIG_DFL );
+        __kmp_sighldrs[ sig ] = old;
+        __kmp_signal( sig, old );
+    }; // if
+    KMP_MB();       /* Flush all pending memory write invalidates.  */
+} // __kmp_install_one_handler
+
+static void
+__kmp_remove_one_handler( int sig ) {
+    if ( __kmp_siginstalled[ sig ] ) {
+        sig_func_t old;
+        KMP_MB();       // Flush all pending memory write invalidates.
+        KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) );
+        old = __kmp_signal( sig, __kmp_sighldrs[ sig ] );
+        if ( old != __kmp_team_handler ) {
+            KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) );
+            old = __kmp_signal( sig, old );
+        }; // if
+        __kmp_sighldrs[ sig ] = NULL;
+        __kmp_siginstalled[ sig ] = 0;
+        KMP_MB();       // Flush all pending memory write invalidates.
+    }; // if
+} // __kmp_remove_one_handler
+
+
+void
+__kmp_install_signals( int parallel_init )
+{
+    KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) );
+    if ( ! __kmp_handle_signals ) {
+        KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) );
+        return;
+    }; // if
+    __kmp_install_one_handler( SIGINT,  __kmp_team_handler, parallel_init );
+    __kmp_install_one_handler( SIGILL,  __kmp_team_handler, parallel_init );
+    __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init );
+    __kmp_install_one_handler( SIGFPE,  __kmp_team_handler, parallel_init );
+    __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init );
+    __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init );
+} // __kmp_install_signals
+
+
+void
+__kmp_remove_signals( void )
+{
+    int sig;
+    KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) );
+    for ( sig = 1; sig < NSIG; ++ sig ) {
+        __kmp_remove_one_handler( sig );
+    }; // for sig
+} // __kmp_remove_signals
+
+
+#endif // KMP_HANDLE_SIGNALS
+
+/* Put the thread to sleep for a time period */
+void
+__kmp_thread_sleep( int millis )
+{
+    DWORD status;
+
+    status = SleepEx( (DWORD) millis, FALSE );
+    if ( status ) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( FunctionError, "SleepEx()" ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+}
+
+/* Determine whether the given address is mapped into the current address space. */
+int
+__kmp_is_address_mapped( void * addr )
+{
+    DWORD status;
+    MEMORY_BASIC_INFORMATION lpBuffer;
+    SIZE_T dwLength;
+
+    dwLength = sizeof(MEMORY_BASIC_INFORMATION);
+
+    status = VirtualQuery( addr, &lpBuffer, dwLength );
+
+    return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) ||
+       (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE )));
+}
+
+kmp_uint64
+__kmp_hardware_timestamp(void)
+{
+    kmp_uint64 r = 0;
+
+    QueryPerformanceCounter((LARGE_INTEGER*) &r);
+    return r;
+}
+
+/* Free handle and check the error code */
+void
+__kmp_free_handle( kmp_thread_t tHandle )
+{
+/* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */
+    BOOL rc;
+    rc = CloseHandle( tHandle );
+    if ( !rc ) {
+        DWORD error = GetLastError();
+        __kmp_msg(
+            kmp_ms_fatal,
+            KMP_MSG( CantCloseHandle ),
+            KMP_ERR( error ),
+            __kmp_msg_null
+        );
+    }
+}
+
+int
+__kmp_get_load_balance( int max ) {
+
+    static ULONG glb_buff_size = 100 * 1024;
+
+    static int     glb_running_threads  = 0;  /* Saved count of the running threads for the thread balance algortihm */
+    static double  glb_call_time        = 0;  /* Thread balance algorithm call time */
+
+    int running_threads = 0;              // Number of running threads in the system.
+    NTSTATUS  status        = 0;
+    ULONG     buff_size     = 0;
+    ULONG     info_size     = 0;
+    void *    buffer        = NULL;
+    PSYSTEM_PROCESS_INFORMATION spi = NULL;
+    int first_time          = 1;
+
+    double call_time = 0.0; //start, finish;
+
+    __kmp_elapsed( & call_time );
+
+    if ( glb_call_time &&
+            ( call_time - glb_call_time < __kmp_load_balance_interval ) ) {
+        running_threads = glb_running_threads;
+        goto finish;
+    }
+    glb_call_time = call_time;
+
+    // Do not spend time on running algorithm if we have a permanent error.
+    if ( NtQuerySystemInformation == NULL ) {
+        running_threads = -1;
+        goto finish;
+    }; // if
+
+    if ( max <= 0 ) {
+        max = INT_MAX;
+    }; // if
+
+    do {
+
+        if ( first_time ) {
+            buff_size = glb_buff_size;
+        } else {
+            buff_size = 2 * buff_size;
+        }
+
+        buffer = KMP_INTERNAL_REALLOC( buffer, buff_size );
+        if ( buffer == NULL ) {
+            running_threads = -1;
+            goto finish;
+        }; // if
+        status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size );
+        first_time = 0;
+
+    } while ( status == STATUS_INFO_LENGTH_MISMATCH );
+    glb_buff_size = buff_size;
+
+    #define CHECK( cond )                       \
+        {                                       \
+            KMP_DEBUG_ASSERT( cond );           \
+            if ( ! ( cond ) ) {                 \
+                running_threads = -1;           \
+                goto finish;                    \
+            }                                   \
+        }
+
+    CHECK( buff_size >= info_size );
+    spi = PSYSTEM_PROCESS_INFORMATION( buffer );
+    for ( ; ; ) {
+        ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer );
+        CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size );
+        HANDLE pid = spi->ProcessId;
+        ULONG num = spi->NumberOfThreads;
+        CHECK( num >= 1 );
+        size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 );
+        CHECK( offset + spi_size < info_size );          // Make sure process info record fits the buffer.
+        if ( spi->NextEntryOffset != 0 ) {
+            CHECK( spi_size <= spi->NextEntryOffset );   // And do not overlap with the next record.
+        }; // if
+        // pid == 0 corresponds to the System Idle Process. It always has running threads
+        // on all cores. So, we don't consider the running threads of this process.
+        if ( pid != 0 ) {
+            for ( int i = 0; i < num; ++ i ) {
+                THREAD_STATE state = spi->Threads[ i ].State;
+                // Count threads that have Ready or Running state.
+                // !!! TODO: Why comment does not match the code???
+                if ( state == StateRunning ) {
+                    ++ running_threads;
+                    // Stop counting running threads if the number is already greater than
+                    // the number of available cores
+                    if ( running_threads >= max ) {
+                        goto finish;
+                    }
+                } // if
+            }; // for i
+        } // if
+        if ( spi->NextEntryOffset == 0 ) {
+            break;
+        }; // if
+        spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset );
+    }; // forever
+
+    #undef CHECK
+
+    finish: // Clean up and exit.
+
+        if ( buffer != NULL ) {
+            KMP_INTERNAL_FREE( buffer );
+        }; // if
+
+        glb_running_threads = running_threads;
+
+        return running_threads;
+
+} //__kmp_get_load_balance()
+
diff --git a/contrib/libs/cxxsupp/system_stl/ya.make b/contrib/libs/cxxsupp/system_stl/ya.make
index 3b4327f877..75317898ac 100644
--- a/contrib/libs/cxxsupp/system_stl/ya.make
+++ b/contrib/libs/cxxsupp/system_stl/ya.make
@@ -3,7 +3,7 @@ LIBRARY()
 WITHOUT_LICENSE_TEXTS()
 
 LICENSE(YandexOpen)
- 
+
 OWNER(
     g:contrib
     g:cpp-contrib
diff --git a/contrib/libs/cxxsupp/ya.make b/contrib/libs/cxxsupp/ya.make
index 5f77c46f13..6cd299ac8a 100644
--- a/contrib/libs/cxxsupp/ya.make
+++ b/contrib/libs/cxxsupp/ya.make
@@ -1,4 +1,4 @@
-LIBRARY() 
+LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
@@ -9,7 +9,7 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 NO_PLATFORM()
 DISABLE(OPENSOURCE_EXPORT)
 
@@ -23,7 +23,7 @@ ELSE()
     )
 ENDIF()
 
-END() 
+END()
 
 RECURSE(
     libcxx
diff --git a/contrib/libs/double-conversion/bignum-dtoa.cc b/contrib/libs/double-conversion/bignum-dtoa.cc
index e6b43a5d6f..d99ac2aaf9 100644
--- a/contrib/libs/double-conversion/bignum-dtoa.cc
+++ b/contrib/libs/double-conversion/bignum-dtoa.cc
@@ -1,641 +1,641 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 #include <cmath>
- 
-#include "bignum-dtoa.h" 
- 
-#include "bignum.h" 
-#include "ieee.h" 
- 
-namespace double_conversion { 
- 
-static int NormalizedExponent(uint64_t significand, int exponent) { 
-  ASSERT(significand != 0); 
-  while ((significand & Double::kHiddenBit) == 0) { 
-    significand = significand << 1; 
-    exponent = exponent - 1; 
-  } 
-  return exponent; 
-} 
- 
- 
-// Forward declarations: 
-// Returns an estimation of k such that 10^(k-1) <= v < 10^k. 
-static int EstimatePower(int exponent); 
-// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator 
-// and denominator. 
-static void InitialScaledStartValues(uint64_t significand, 
-                                     int exponent, 
-                                     bool lower_boundary_is_closer, 
-                                     int estimated_power, 
-                                     bool need_boundary_deltas, 
-                                     Bignum* numerator, 
-                                     Bignum* denominator, 
-                                     Bignum* delta_minus, 
-                                     Bignum* delta_plus); 
-// Multiplies numerator/denominator so that its values lies in the range 1-10. 
-// Returns decimal_point s.t. 
-//  v = numerator'/denominator' * 10^(decimal_point-1) 
-//     where numerator' and denominator' are the values of numerator and 
-//     denominator after the call to this function. 
-static void FixupMultiply10(int estimated_power, bool is_even, 
-                            int* decimal_point, 
-                            Bignum* numerator, Bignum* denominator, 
-                            Bignum* delta_minus, Bignum* delta_plus); 
-// Generates digits from the left to the right and stops when the generated 
-// digits yield the shortest decimal representation of v. 
-static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, 
-                                   Bignum* delta_minus, Bignum* delta_plus, 
-                                   bool is_even, 
-                                   Vector<char> buffer, int* length); 
-// Generates 'requested_digits' after the decimal point. 
-static void BignumToFixed(int requested_digits, int* decimal_point, 
-                          Bignum* numerator, Bignum* denominator, 
-                          Vector<char>(buffer), int* length); 
-// Generates 'count' digits of numerator/denominator. 
-// Once 'count' digits have been produced rounds the result depending on the 
-// remainder (remainders of exactly .5 round upwards). Might update the 
-// decimal_point when rounding up (for example for 0.9999). 
-static void GenerateCountedDigits(int count, int* decimal_point, 
-                                  Bignum* numerator, Bignum* denominator, 
-                                  Vector<char>(buffer), int* length); 
- 
- 
-void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, 
-                Vector<char> buffer, int* length, int* decimal_point) { 
-  ASSERT(v > 0); 
-  ASSERT(!Double(v).IsSpecial()); 
-  uint64_t significand; 
-  int exponent; 
-  bool lower_boundary_is_closer; 
-  if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { 
-    float f = static_cast<float>(v); 
-    ASSERT(f == v); 
-    significand = Single(f).Significand(); 
-    exponent = Single(f).Exponent(); 
-    lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); 
-  } else { 
-    significand = Double(v).Significand(); 
-    exponent = Double(v).Exponent(); 
-    lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); 
-  } 
-  bool need_boundary_deltas = 
-      (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); 
- 
-  bool is_even = (significand & 1) == 0; 
-  int normalized_exponent = NormalizedExponent(significand, exponent); 
-  // estimated_power might be too low by 1. 
-  int estimated_power = EstimatePower(normalized_exponent); 
- 
-  // Shortcut for Fixed. 
-  // The requested digits correspond to the digits after the point. If the 
-  // number is much too small, then there is no need in trying to get any 
-  // digits. 
-  if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { 
-    buffer[0] = '\0'; 
-    *length = 0; 
-    // Set decimal-point to -requested_digits. This is what Gay does. 
-    // Note that it should not have any effect anyways since the string is 
-    // empty. 
-    *decimal_point = -requested_digits; 
-    return; 
-  } 
- 
-  Bignum numerator; 
-  Bignum denominator; 
-  Bignum delta_minus; 
-  Bignum delta_plus; 
-  // Make sure the bignum can grow large enough. The smallest double equals 
-  // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. 
-  // The maximum double is 1.7976931348623157e308 which needs fewer than 
-  // 308*4 binary digits. 
-  ASSERT(Bignum::kMaxSignificantBits >= 324*4); 
-  InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, 
-                           estimated_power, need_boundary_deltas, 
-                           &numerator, &denominator, 
-                           &delta_minus, &delta_plus); 
-  // We now have v = (numerator / denominator) * 10^estimated_power. 
-  FixupMultiply10(estimated_power, is_even, decimal_point, 
-                  &numerator, &denominator, 
-                  &delta_minus, &delta_plus); 
-  // We now have v = (numerator / denominator) * 10^(decimal_point-1), and 
-  //  1 <= (numerator + delta_plus) / denominator < 10 
-  switch (mode) { 
-    case BIGNUM_DTOA_SHORTEST: 
-    case BIGNUM_DTOA_SHORTEST_SINGLE: 
-      GenerateShortestDigits(&numerator, &denominator, 
-                             &delta_minus, &delta_plus, 
-                             is_even, buffer, length); 
-      break; 
-    case BIGNUM_DTOA_FIXED: 
-      BignumToFixed(requested_digits, decimal_point, 
-                    &numerator, &denominator, 
-                    buffer, length); 
-      break; 
-    case BIGNUM_DTOA_PRECISION: 
-      GenerateCountedDigits(requested_digits, decimal_point, 
-                            &numerator, &denominator, 
-                            buffer, length); 
-      break; 
-    default: 
-      UNREACHABLE(); 
-  } 
-  buffer[*length] = '\0'; 
-} 
- 
- 
-// The procedure starts generating digits from the left to the right and stops 
-// when the generated digits yield the shortest decimal representation of v. A 
-// decimal representation of v is a number lying closer to v than to any other 
-// double, so it converts to v when read. 
-// 
-// This is true if d, the decimal representation, is between m- and m+, the 
-// upper and lower boundaries. d must be strictly between them if !is_even. 
-//           m- := (numerator - delta_minus) / denominator 
-//           m+ := (numerator + delta_plus) / denominator 
-// 
-// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. 
-//   If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit 
-//   will be produced. This should be the standard precondition. 
-static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, 
-                                   Bignum* delta_minus, Bignum* delta_plus, 
-                                   bool is_even, 
-                                   Vector<char> buffer, int* length) { 
-  // Small optimization: if delta_minus and delta_plus are the same just reuse 
-  // one of the two bignums. 
-  if (Bignum::Equal(*delta_minus, *delta_plus)) { 
-    delta_plus = delta_minus; 
-  } 
-  *length = 0; 
-  for (;;) { 
-    uint16_t digit; 
-    digit = numerator->DivideModuloIntBignum(*denominator); 
-    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive. 
-    // digit = numerator / denominator (integer division). 
-    // numerator = numerator % denominator. 
-    buffer[(*length)++] = static_cast<char>(digit + '0'); 
- 
-    // Can we stop already? 
-    // If the remainder of the division is less than the distance to the lower 
-    // boundary we can stop. In this case we simply round down (discarding the 
-    // remainder). 
-    // Similarly we test if we can round up (using the upper boundary). 
-    bool in_delta_room_minus; 
-    bool in_delta_room_plus; 
-    if (is_even) { 
-      in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); 
-    } else { 
-      in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); 
-    } 
-    if (is_even) { 
-      in_delta_room_plus = 
-          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; 
-    } else { 
-      in_delta_room_plus = 
-          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; 
-    } 
-    if (!in_delta_room_minus && !in_delta_room_plus) { 
-      // Prepare for next iteration. 
-      numerator->Times10(); 
-      delta_minus->Times10(); 
-      // We optimized delta_plus to be equal to delta_minus (if they share the 
-      // same value). So don't multiply delta_plus if they point to the same 
-      // object. 
-      if (delta_minus != delta_plus) { 
-        delta_plus->Times10(); 
-      } 
-    } else if (in_delta_room_minus && in_delta_room_plus) { 
-      // Let's see if 2*numerator < denominator. 
-      // If yes, then the next digit would be < 5 and we can round down. 
-      int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); 
-      if (compare < 0) { 
-        // Remaining digits are less than .5. -> Round down (== do nothing). 
-      } else if (compare > 0) { 
-        // Remaining digits are more than .5 of denominator. -> Round up. 
-        // Note that the last digit could not be a '9' as otherwise the whole 
-        // loop would have stopped earlier. 
-        // We still have an assert here in case the preconditions were not 
-        // satisfied. 
-        ASSERT(buffer[(*length) - 1] != '9'); 
-        buffer[(*length) - 1]++; 
-      } else { 
-        // Halfway case. 
-        // TODO(floitsch): need a way to solve half-way cases. 
-        //   For now let's round towards even (since this is what Gay seems to 
-        //   do). 
- 
-        if ((buffer[(*length) - 1] - '0') % 2 == 0) { 
-          // Round down => Do nothing. 
-        } else { 
-          ASSERT(buffer[(*length) - 1] != '9'); 
-          buffer[(*length) - 1]++; 
-        } 
-      } 
-      return; 
-    } else if (in_delta_room_minus) { 
-      // Round down (== do nothing). 
-      return; 
-    } else {  // in_delta_room_plus 
-      // Round up. 
-      // Note again that the last digit could not be '9' since this would have 
-      // stopped the loop earlier. 
-      // We still have an ASSERT here, in case the preconditions were not 
-      // satisfied. 
-      ASSERT(buffer[(*length) -1] != '9'); 
-      buffer[(*length) - 1]++; 
-      return; 
-    } 
-  } 
-} 
- 
- 
-// Let v = numerator / denominator < 10. 
-// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) 
-// from left to right. Once 'count' digits have been produced we decide wether 
-// to round up or down. Remainders of exactly .5 round upwards. Numbers such 
-// as 9.999999 propagate a carry all the way, and change the 
-// exponent (decimal_point), when rounding upwards. 
-static void GenerateCountedDigits(int count, int* decimal_point, 
-                                  Bignum* numerator, Bignum* denominator, 
-                                  Vector<char> buffer, int* length) { 
-  ASSERT(count >= 0); 
-  for (int i = 0; i < count - 1; ++i) { 
-    uint16_t digit; 
-    digit = numerator->DivideModuloIntBignum(*denominator); 
-    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive. 
-    // digit = numerator / denominator (integer division). 
-    // numerator = numerator % denominator. 
-    buffer[i] = static_cast<char>(digit + '0'); 
-    // Prepare for next iteration. 
-    numerator->Times10(); 
-  } 
-  // Generate the last digit. 
-  uint16_t digit; 
-  digit = numerator->DivideModuloIntBignum(*denominator); 
-  if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { 
-    digit++; 
-  } 
-  ASSERT(digit <= 10); 
-  buffer[count - 1] = static_cast<char>(digit + '0'); 
-  // Correct bad digits (in case we had a sequence of '9's). Propagate the 
-  // carry until we hat a non-'9' or til we reach the first digit. 
-  for (int i = count - 1; i > 0; --i) { 
-    if (buffer[i] != '0' + 10) break; 
-    buffer[i] = '0'; 
-    buffer[i - 1]++; 
-  } 
-  if (buffer[0] == '0' + 10) { 
-    // Propagate a carry past the top place. 
-    buffer[0] = '1'; 
-    (*decimal_point)++; 
-  } 
-  *length = count; 
-} 
- 
- 
-// Generates 'requested_digits' after the decimal point. It might omit 
-// trailing '0's. If the input number is too small then no digits at all are 
-// generated (ex.: 2 fixed digits for 0.00001). 
-// 
-// Input verifies:  1 <= (numerator + delta) / denominator < 10. 
-static void BignumToFixed(int requested_digits, int* decimal_point, 
-                          Bignum* numerator, Bignum* denominator, 
-                          Vector<char>(buffer), int* length) { 
-  // Note that we have to look at more than just the requested_digits, since 
-  // a number could be rounded up. Example: v=0.5 with requested_digits=0. 
-  // Even though the power of v equals 0 we can't just stop here. 
-  if (-(*decimal_point) > requested_digits) { 
-    // The number is definitively too small. 
-    // Ex: 0.001 with requested_digits == 1. 
-    // Set decimal-point to -requested_digits. This is what Gay does. 
-    // Note that it should not have any effect anyways since the string is 
-    // empty. 
-    *decimal_point = -requested_digits; 
-    *length = 0; 
-    return; 
-  } else if (-(*decimal_point) == requested_digits) { 
-    // We only need to verify if the number rounds down or up. 
-    // Ex: 0.04 and 0.06 with requested_digits == 1. 
-    ASSERT(*decimal_point == -requested_digits); 
-    // Initially the fraction lies in range (1, 10]. Multiply the denominator 
-    // by 10 so that we can compare more easily. 
-    denominator->Times10(); 
-    if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { 
-      // If the fraction is >= 0.5 then we have to include the rounded 
-      // digit. 
-      buffer[0] = '1'; 
-      *length = 1; 
-      (*decimal_point)++; 
-    } else { 
-      // Note that we caught most of similar cases earlier. 
-      *length = 0; 
-    } 
-    return; 
-  } else { 
-    // The requested digits correspond to the digits after the point. 
-    // The variable 'needed_digits' includes the digits before the point. 
-    int needed_digits = (*decimal_point) + requested_digits; 
-    GenerateCountedDigits(needed_digits, decimal_point, 
-                          numerator, denominator, 
-                          buffer, length); 
-  } 
-} 
- 
- 
-// Returns an estimation of k such that 10^(k-1) <= v < 10^k where 
-// v = f * 2^exponent and 2^52 <= f < 2^53. 
-// v is hence a normalized double with the given exponent. The output is an 
-// approximation for the exponent of the decimal approimation .digits * 10^k. 
-// 
-// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. 
-// Note: this property holds for v's upper boundary m+ too. 
-//    10^k <= m+ < 10^k+1. 
-//   (see explanation below). 
-// 
-// Examples: 
-//  EstimatePower(0)   => 16 
-//  EstimatePower(-52) => 0 
-// 
-// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. 
-static int EstimatePower(int exponent) { 
-  // This function estimates log10 of v where v = f*2^e (with e == exponent). 
-  // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). 
-  // Note that f is bounded by its container size. Let p = 53 (the double's 
-  // significand size). Then 2^(p-1) <= f < 2^p. 
-  // 
-  // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close 
-  // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). 
-  // The computed number undershoots by less than 0.631 (when we compute log3 
-  // and not log10). 
-  // 
-  // Optimization: since we only need an approximated result this computation 
-  // can be performed on 64 bit integers. On x86/x64 architecture the speedup is 
-  // not really measurable, though. 
-  // 
-  // Since we want to avoid overshooting we decrement by 1e10 so that 
-  // floating-point imprecisions don't affect us. 
-  // 
-  // Explanation for v's boundary m+: the computation takes advantage of 
-  // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement 
-  // (even for denormals where the delta can be much more important). 
- 
-  const double k1Log10 = 0.30102999566398114;  // 1/lg(10) 
- 
-  // For doubles len(f) == 53 (don't forget the hidden bit). 
-  const int kSignificandSize = Double::kSignificandSize; 
-  double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); 
-  return static_cast<int>(estimate); 
-} 
- 
- 
-// See comments for InitialScaledStartValues. 
-static void InitialScaledStartValuesPositiveExponent( 
-    uint64_t significand, int exponent, 
-    int estimated_power, bool need_boundary_deltas, 
-    Bignum* numerator, Bignum* denominator, 
-    Bignum* delta_minus, Bignum* delta_plus) { 
-  // A positive exponent implies a positive power. 
-  ASSERT(estimated_power >= 0); 
-  // Since the estimated_power is positive we simply multiply the denominator 
-  // by 10^estimated_power. 
- 
-  // numerator = v. 
-  numerator->AssignUInt64(significand); 
-  numerator->ShiftLeft(exponent); 
-  // denominator = 10^estimated_power. 
-  denominator->AssignPowerUInt16(10, estimated_power); 
- 
-  if (need_boundary_deltas) { 
-    // Introduce a common denominator so that the deltas to the boundaries are 
-    // integers. 
-    denominator->ShiftLeft(1); 
-    numerator->ShiftLeft(1); 
-    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common 
-    // denominator (of 2) delta_plus equals 2^e. 
-    delta_plus->AssignUInt16(1); 
-    delta_plus->ShiftLeft(exponent); 
-    // Same for delta_minus. The adjustments if f == 2^p-1 are done later. 
-    delta_minus->AssignUInt16(1); 
-    delta_minus->ShiftLeft(exponent); 
-  } 
-} 
- 
- 
-// See comments for InitialScaledStartValues 
-static void InitialScaledStartValuesNegativeExponentPositivePower( 
-    uint64_t significand, int exponent, 
-    int estimated_power, bool need_boundary_deltas, 
-    Bignum* numerator, Bignum* denominator, 
-    Bignum* delta_minus, Bignum* delta_plus) { 
-  // v = f * 2^e with e < 0, and with estimated_power >= 0. 
-  // This means that e is close to 0 (have a look at how estimated_power is 
-  // computed). 
- 
-  // numerator = significand 
-  //  since v = significand * 2^exponent this is equivalent to 
-  //  numerator = v * / 2^-exponent 
-  numerator->AssignUInt64(significand); 
-  // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) 
-  denominator->AssignPowerUInt16(10, estimated_power); 
-  denominator->ShiftLeft(-exponent); 
- 
-  if (need_boundary_deltas) { 
-    // Introduce a common denominator so that the deltas to the boundaries are 
-    // integers. 
-    denominator->ShiftLeft(1); 
-    numerator->ShiftLeft(1); 
-    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common 
-    // denominator (of 2) delta_plus equals 2^e. 
-    // Given that the denominator already includes v's exponent the distance 
-    // to the boundaries is simply 1. 
-    delta_plus->AssignUInt16(1); 
-    // Same for delta_minus. The adjustments if f == 2^p-1 are done later. 
-    delta_minus->AssignUInt16(1); 
-  } 
-} 
- 
- 
-// See comments for InitialScaledStartValues 
-static void InitialScaledStartValuesNegativeExponentNegativePower( 
-    uint64_t significand, int exponent, 
-    int estimated_power, bool need_boundary_deltas, 
-    Bignum* numerator, Bignum* denominator, 
-    Bignum* delta_minus, Bignum* delta_plus) { 
-  // Instead of multiplying the denominator with 10^estimated_power we 
-  // multiply all values (numerator and deltas) by 10^-estimated_power. 
- 
-  // Use numerator as temporary container for power_ten. 
-  Bignum* power_ten = numerator; 
-  power_ten->AssignPowerUInt16(10, -estimated_power); 
- 
-  if (need_boundary_deltas) { 
-    // Since power_ten == numerator we must make a copy of 10^estimated_power 
-    // before we complete the computation of the numerator. 
-    // delta_plus = delta_minus = 10^estimated_power 
-    delta_plus->AssignBignum(*power_ten); 
-    delta_minus->AssignBignum(*power_ten); 
-  } 
- 
-  // numerator = significand * 2 * 10^-estimated_power 
-  //  since v = significand * 2^exponent this is equivalent to 
-  // numerator = v * 10^-estimated_power * 2 * 2^-exponent. 
-  // Remember: numerator has been abused as power_ten. So no need to assign it 
-  //  to itself. 
-  ASSERT(numerator == power_ten); 
-  numerator->MultiplyByUInt64(significand); 
- 
-  // denominator = 2 * 2^-exponent with exponent < 0. 
-  denominator->AssignUInt16(1); 
-  denominator->ShiftLeft(-exponent); 
- 
-  if (need_boundary_deltas) { 
-    // Introduce a common denominator so that the deltas to the boundaries are 
-    // integers. 
-    numerator->ShiftLeft(1); 
-    denominator->ShiftLeft(1); 
-    // With this shift the boundaries have their correct value, since 
-    // delta_plus = 10^-estimated_power, and 
-    // delta_minus = 10^-estimated_power. 
-    // These assignments have been done earlier. 
-    // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. 
-  } 
-} 
- 
- 
-// Let v = significand * 2^exponent. 
-// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator 
-// and denominator. The functions GenerateShortestDigits and 
-// GenerateCountedDigits will then convert this ratio to its decimal 
-// representation d, with the required accuracy. 
-// Then d * 10^estimated_power is the representation of v. 
-// (Note: the fraction and the estimated_power might get adjusted before 
-// generating the decimal representation.) 
-// 
-// The initial start values consist of: 
-//  - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. 
-//  - a scaled (common) denominator. 
-//  optionally (used by GenerateShortestDigits to decide if it has the shortest 
-//  decimal converting back to v): 
-//  - v - m-: the distance to the lower boundary. 
-//  - m+ - v: the distance to the upper boundary. 
-// 
-// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. 
-// 
-// Let ep == estimated_power, then the returned values will satisfy: 
-//  v / 10^ep = numerator / denominator. 
-//  v's boundarys m- and m+: 
-//    m- / 10^ep == v / 10^ep - delta_minus / denominator 
-//    m+ / 10^ep == v / 10^ep + delta_plus / denominator 
-//  Or in other words: 
-//    m- == v - delta_minus * 10^ep / denominator; 
-//    m+ == v + delta_plus * 10^ep / denominator; 
-// 
-// Since 10^(k-1) <= v < 10^k    (with k == estimated_power) 
-//  or       10^k <= v < 10^(k+1) 
-//  we then have 0.1 <= numerator/denominator < 1 
-//           or    1 <= numerator/denominator < 10 
-// 
-// It is then easy to kickstart the digit-generation routine. 
-// 
-// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST 
-// or BIGNUM_DTOA_SHORTEST_SINGLE. 
- 
-static void InitialScaledStartValues(uint64_t significand, 
-                                     int exponent, 
-                                     bool lower_boundary_is_closer, 
-                                     int estimated_power, 
-                                     bool need_boundary_deltas, 
-                                     Bignum* numerator, 
-                                     Bignum* denominator, 
-                                     Bignum* delta_minus, 
-                                     Bignum* delta_plus) { 
-  if (exponent >= 0) { 
-    InitialScaledStartValuesPositiveExponent( 
-        significand, exponent, estimated_power, need_boundary_deltas, 
-        numerator, denominator, delta_minus, delta_plus); 
-  } else if (estimated_power >= 0) { 
-    InitialScaledStartValuesNegativeExponentPositivePower( 
-        significand, exponent, estimated_power, need_boundary_deltas, 
-        numerator, denominator, delta_minus, delta_plus); 
-  } else { 
-    InitialScaledStartValuesNegativeExponentNegativePower( 
-        significand, exponent, estimated_power, need_boundary_deltas, 
-        numerator, denominator, delta_minus, delta_plus); 
-  } 
- 
-  if (need_boundary_deltas && lower_boundary_is_closer) { 
-    // The lower boundary is closer at half the distance of "normal" numbers. 
-    // Increase the common denominator and adapt all but the delta_minus. 
-    denominator->ShiftLeft(1);  // *2 
-    numerator->ShiftLeft(1);    // *2 
-    delta_plus->ShiftLeft(1);   // *2 
-  } 
-} 
- 
- 
-// This routine multiplies numerator/denominator so that its values lies in the 
-// range 1-10. That is after a call to this function we have: 
-//    1 <= (numerator + delta_plus) /denominator < 10. 
-// Let numerator the input before modification and numerator' the argument 
-// after modification, then the output-parameter decimal_point is such that 
-//  numerator / denominator * 10^estimated_power == 
-//    numerator' / denominator' * 10^(decimal_point - 1) 
-// In some cases estimated_power was too low, and this is already the case. We 
-// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == 
-// estimated_power) but do not touch the numerator or denominator. 
-// Otherwise the routine multiplies the numerator and the deltas by 10. 
-static void FixupMultiply10(int estimated_power, bool is_even, 
-                            int* decimal_point, 
-                            Bignum* numerator, Bignum* denominator, 
-                            Bignum* delta_minus, Bignum* delta_plus) { 
-  bool in_range; 
-  if (is_even) { 
-    // For IEEE doubles half-way cases (in decimal system numbers ending with 5) 
-    // are rounded to the closest floating-point number with even significand. 
-    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; 
-  } else { 
-    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; 
-  } 
-  if (in_range) { 
-    // Since numerator + delta_plus >= denominator we already have 
-    // 1 <= numerator/denominator < 10. Simply update the estimated_power. 
-    *decimal_point = estimated_power + 1; 
-  } else { 
-    *decimal_point = estimated_power; 
-    numerator->Times10(); 
-    if (Bignum::Equal(*delta_minus, *delta_plus)) { 
-      delta_minus->Times10(); 
-      delta_plus->AssignBignum(*delta_minus); 
-    } else { 
-      delta_minus->Times10(); 
-      delta_plus->Times10(); 
-    } 
-  } 
-} 
- 
-}  // namespace double_conversion 
+
+#include "bignum-dtoa.h"
+
+#include "bignum.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+static int NormalizedExponent(uint64_t significand, int exponent) {
+  ASSERT(significand != 0);
+  while ((significand & Double::kHiddenBit) == 0) {
+    significand = significand << 1;
+    exponent = exponent - 1;
+  }
+  return exponent;
+}
+
+
+// Forward declarations:
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k.
+static int EstimatePower(int exponent);
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator.
+static void InitialScaledStartValues(uint64_t significand,
+                                     int exponent,
+                                     bool lower_boundary_is_closer,
+                                     int estimated_power,
+                                     bool need_boundary_deltas,
+                                     Bignum* numerator,
+                                     Bignum* denominator,
+                                     Bignum* delta_minus,
+                                     Bignum* delta_plus);
+// Multiplies numerator/denominator so that its values lies in the range 1-10.
+// Returns decimal_point s.t.
+//  v = numerator'/denominator' * 10^(decimal_point-1)
+//     where numerator' and denominator' are the values of numerator and
+//     denominator after the call to this function.
+static void FixupMultiply10(int estimated_power, bool is_even,
+                            int* decimal_point,
+                            Bignum* numerator, Bignum* denominator,
+                            Bignum* delta_minus, Bignum* delta_plus);
+// Generates digits from the left to the right and stops when the generated
+// digits yield the shortest decimal representation of v.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+                                   Bignum* delta_minus, Bignum* delta_plus,
+                                   bool is_even,
+                                   Vector<char> buffer, int* length);
+// Generates 'requested_digits' after the decimal point.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+                          Bignum* numerator, Bignum* denominator,
+                          Vector<char>(buffer), int* length);
+// Generates 'count' digits of numerator/denominator.
+// Once 'count' digits have been produced rounds the result depending on the
+// remainder (remainders of exactly .5 round upwards). Might update the
+// decimal_point when rounding up (for example for 0.9999).
+static void GenerateCountedDigits(int count, int* decimal_point,
+                                  Bignum* numerator, Bignum* denominator,
+                                  Vector<char>(buffer), int* length);
+
+
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+                Vector<char> buffer, int* length, int* decimal_point) {
+  ASSERT(v > 0);
+  ASSERT(!Double(v).IsSpecial());
+  uint64_t significand;
+  int exponent;
+  bool lower_boundary_is_closer;
+  if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) {
+    float f = static_cast<float>(v);
+    ASSERT(f == v);
+    significand = Single(f).Significand();
+    exponent = Single(f).Exponent();
+    lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser();
+  } else {
+    significand = Double(v).Significand();
+    exponent = Double(v).Exponent();
+    lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser();
+  }
+  bool need_boundary_deltas =
+      (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE);
+
+  bool is_even = (significand & 1) == 0;
+  int normalized_exponent = NormalizedExponent(significand, exponent);
+  // estimated_power might be too low by 1.
+  int estimated_power = EstimatePower(normalized_exponent);
+
+  // Shortcut for Fixed.
+  // The requested digits correspond to the digits after the point. If the
+  // number is much too small, then there is no need in trying to get any
+  // digits.
+  if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) {
+    buffer[0] = '\0';
+    *length = 0;
+    // Set decimal-point to -requested_digits. This is what Gay does.
+    // Note that it should not have any effect anyways since the string is
+    // empty.
+    *decimal_point = -requested_digits;
+    return;
+  }
+
+  Bignum numerator;
+  Bignum denominator;
+  Bignum delta_minus;
+  Bignum delta_plus;
+  // Make sure the bignum can grow large enough. The smallest double equals
+  // 4e-324. In this case the denominator needs fewer than 324*4 binary digits.
+  // The maximum double is 1.7976931348623157e308 which needs fewer than
+  // 308*4 binary digits.
+  ASSERT(Bignum::kMaxSignificantBits >= 324*4);
+  InitialScaledStartValues(significand, exponent, lower_boundary_is_closer,
+                           estimated_power, need_boundary_deltas,
+                           &numerator, &denominator,
+                           &delta_minus, &delta_plus);
+  // We now have v = (numerator / denominator) * 10^estimated_power.
+  FixupMultiply10(estimated_power, is_even, decimal_point,
+                  &numerator, &denominator,
+                  &delta_minus, &delta_plus);
+  // We now have v = (numerator / denominator) * 10^(decimal_point-1), and
+  //  1 <= (numerator + delta_plus) / denominator < 10
+  switch (mode) {
+    case BIGNUM_DTOA_SHORTEST:
+    case BIGNUM_DTOA_SHORTEST_SINGLE:
+      GenerateShortestDigits(&numerator, &denominator,
+                             &delta_minus, &delta_plus,
+                             is_even, buffer, length);
+      break;
+    case BIGNUM_DTOA_FIXED:
+      BignumToFixed(requested_digits, decimal_point,
+                    &numerator, &denominator,
+                    buffer, length);
+      break;
+    case BIGNUM_DTOA_PRECISION:
+      GenerateCountedDigits(requested_digits, decimal_point,
+                            &numerator, &denominator,
+                            buffer, length);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  buffer[*length] = '\0';
+}
+
+
+// The procedure starts generating digits from the left to the right and stops
+// when the generated digits yield the shortest decimal representation of v. A
+// decimal representation of v is a number lying closer to v than to any other
+// double, so it converts to v when read.
+//
+// This is true if d, the decimal representation, is between m- and m+, the
+// upper and lower boundaries. d must be strictly between them if !is_even.
+//           m- := (numerator - delta_minus) / denominator
+//           m+ := (numerator + delta_plus) / denominator
+//
+// Precondition: 0 <= (numerator+delta_plus) / denominator < 10.
+//   If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit
+//   will be produced. This should be the standard precondition.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+                                   Bignum* delta_minus, Bignum* delta_plus,
+                                   bool is_even,
+                                   Vector<char> buffer, int* length) {
+  // Small optimization: if delta_minus and delta_plus are the same just reuse
+  // one of the two bignums.
+  if (Bignum::Equal(*delta_minus, *delta_plus)) {
+    delta_plus = delta_minus;
+  }
+  *length = 0;
+  for (;;) {
+    uint16_t digit;
+    digit = numerator->DivideModuloIntBignum(*denominator);
+    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive.
+    // digit = numerator / denominator (integer division).
+    // numerator = numerator % denominator.
+    buffer[(*length)++] = static_cast<char>(digit + '0');
+
+    // Can we stop already?
+    // If the remainder of the division is less than the distance to the lower
+    // boundary we can stop. In this case we simply round down (discarding the
+    // remainder).
+    // Similarly we test if we can round up (using the upper boundary).
+    bool in_delta_room_minus;
+    bool in_delta_room_plus;
+    if (is_even) {
+      in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus);
+    } else {
+      in_delta_room_minus = Bignum::Less(*numerator, *delta_minus);
+    }
+    if (is_even) {
+      in_delta_room_plus =
+          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+    } else {
+      in_delta_room_plus =
+          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+    }
+    if (!in_delta_room_minus && !in_delta_room_plus) {
+      // Prepare for next iteration.
+      numerator->Times10();
+      delta_minus->Times10();
+      // We optimized delta_plus to be equal to delta_minus (if they share the
+      // same value). So don't multiply delta_plus if they point to the same
+      // object.
+      if (delta_minus != delta_plus) {
+        delta_plus->Times10();
+      }
+    } else if (in_delta_room_minus && in_delta_room_plus) {
+      // Let's see if 2*numerator < denominator.
+      // If yes, then the next digit would be < 5 and we can round down.
+      int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator);
+      if (compare < 0) {
+        // Remaining digits are less than .5. -> Round down (== do nothing).
+      } else if (compare > 0) {
+        // Remaining digits are more than .5 of denominator. -> Round up.
+        // Note that the last digit could not be a '9' as otherwise the whole
+        // loop would have stopped earlier.
+        // We still have an assert here in case the preconditions were not
+        // satisfied.
+        ASSERT(buffer[(*length) - 1] != '9');
+        buffer[(*length) - 1]++;
+      } else {
+        // Halfway case.
+        // TODO(floitsch): need a way to solve half-way cases.
+        //   For now let's round towards even (since this is what Gay seems to
+        //   do).
+
+        if ((buffer[(*length) - 1] - '0') % 2 == 0) {
+          // Round down => Do nothing.
+        } else {
+          ASSERT(buffer[(*length) - 1] != '9');
+          buffer[(*length) - 1]++;
+        }
+      }
+      return;
+    } else if (in_delta_room_minus) {
+      // Round down (== do nothing).
+      return;
+    } else {  // in_delta_room_plus
+      // Round up.
+      // Note again that the last digit could not be '9' since this would have
+      // stopped the loop earlier.
+      // We still have an ASSERT here, in case the preconditions were not
+      // satisfied.
+      ASSERT(buffer[(*length) -1] != '9');
+      buffer[(*length) - 1]++;
+      return;
+    }
+  }
+}
+
+
+// Let v = numerator / denominator < 10.
+// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point)
+// from left to right. Once 'count' digits have been produced we decide wether
+// to round up or down. Remainders of exactly .5 round upwards. Numbers such
+// as 9.999999 propagate a carry all the way, and change the
+// exponent (decimal_point), when rounding upwards.
+static void GenerateCountedDigits(int count, int* decimal_point,
+                                  Bignum* numerator, Bignum* denominator,
+                                  Vector<char> buffer, int* length) {
+  ASSERT(count >= 0);
+  for (int i = 0; i < count - 1; ++i) {
+    uint16_t digit;
+    digit = numerator->DivideModuloIntBignum(*denominator);
+    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive.
+    // digit = numerator / denominator (integer division).
+    // numerator = numerator % denominator.
+    buffer[i] = static_cast<char>(digit + '0');
+    // Prepare for next iteration.
+    numerator->Times10();
+  }
+  // Generate the last digit.
+  uint16_t digit;
+  digit = numerator->DivideModuloIntBignum(*denominator);
+  if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+    digit++;
+  }
+  ASSERT(digit <= 10);
+  buffer[count - 1] = static_cast<char>(digit + '0');
+  // Correct bad digits (in case we had a sequence of '9's). Propagate the
+  // carry until we hat a non-'9' or til we reach the first digit.
+  for (int i = count - 1; i > 0; --i) {
+    if (buffer[i] != '0' + 10) break;
+    buffer[i] = '0';
+    buffer[i - 1]++;
+  }
+  if (buffer[0] == '0' + 10) {
+    // Propagate a carry past the top place.
+    buffer[0] = '1';
+    (*decimal_point)++;
+  }
+  *length = count;
+}
+
+
+// Generates 'requested_digits' after the decimal point. It might omit
+// trailing '0's. If the input number is too small then no digits at all are
+// generated (ex.: 2 fixed digits for 0.00001).
+//
+// Input verifies:  1 <= (numerator + delta) / denominator < 10.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+                          Bignum* numerator, Bignum* denominator,
+                          Vector<char>(buffer), int* length) {
+  // Note that we have to look at more than just the requested_digits, since
+  // a number could be rounded up. Example: v=0.5 with requested_digits=0.
+  // Even though the power of v equals 0 we can't just stop here.
+  if (-(*decimal_point) > requested_digits) {
+    // The number is definitively too small.
+    // Ex: 0.001 with requested_digits == 1.
+    // Set decimal-point to -requested_digits. This is what Gay does.
+    // Note that it should not have any effect anyways since the string is
+    // empty.
+    *decimal_point = -requested_digits;
+    *length = 0;
+    return;
+  } else if (-(*decimal_point) == requested_digits) {
+    // We only need to verify if the number rounds down or up.
+    // Ex: 0.04 and 0.06 with requested_digits == 1.
+    ASSERT(*decimal_point == -requested_digits);
+    // Initially the fraction lies in range (1, 10]. Multiply the denominator
+    // by 10 so that we can compare more easily.
+    denominator->Times10();
+    if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+      // If the fraction is >= 0.5 then we have to include the rounded
+      // digit.
+      buffer[0] = '1';
+      *length = 1;
+      (*decimal_point)++;
+    } else {
+      // Note that we caught most of similar cases earlier.
+      *length = 0;
+    }
+    return;
+  } else {
+    // The requested digits correspond to the digits after the point.
+    // The variable 'needed_digits' includes the digits before the point.
+    int needed_digits = (*decimal_point) + requested_digits;
+    GenerateCountedDigits(needed_digits, decimal_point,
+                          numerator, denominator,
+                          buffer, length);
+  }
+}
+
+
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k where
+// v = f * 2^exponent and 2^52 <= f < 2^53.
+// v is hence a normalized double with the given exponent. The output is an
+// approximation for the exponent of the decimal approimation .digits * 10^k.
+//
+// The result might undershoot by 1 in which case 10^k <= v < 10^k+1.
+// Note: this property holds for v's upper boundary m+ too.
+//    10^k <= m+ < 10^k+1.
+//   (see explanation below).
+//
+// Examples:
+//  EstimatePower(0)   => 16
+//  EstimatePower(-52) => 0
+//
+// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0.
+static int EstimatePower(int exponent) {
+  // This function estimates log10 of v where v = f*2^e (with e == exponent).
+  // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)).
+  // Note that f is bounded by its container size. Let p = 53 (the double's
+  // significand size). Then 2^(p-1) <= f < 2^p.
+  //
+  // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close
+  // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)).
+  // The computed number undershoots by less than 0.631 (when we compute log3
+  // and not log10).
+  //
+  // Optimization: since we only need an approximated result this computation
+  // can be performed on 64 bit integers. On x86/x64 architecture the speedup is
+  // not really measurable, though.
+  //
+  // Since we want to avoid overshooting we decrement by 1e10 so that
+  // floating-point imprecisions don't affect us.
+  //
+  // Explanation for v's boundary m+: the computation takes advantage of
+  // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement
+  // (even for denormals where the delta can be much more important).
+
+  const double k1Log10 = 0.30102999566398114;  // 1/lg(10)
+
+  // For doubles len(f) == 53 (don't forget the hidden bit).
+  const int kSignificandSize = Double::kSignificandSize;
+  double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10);
+  return static_cast<int>(estimate);
+}
+
+
+// See comments for InitialScaledStartValues.
+static void InitialScaledStartValuesPositiveExponent(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // A positive exponent implies a positive power.
+  ASSERT(estimated_power >= 0);
+  // Since the estimated_power is positive we simply multiply the denominator
+  // by 10^estimated_power.
+
+  // numerator = v.
+  numerator->AssignUInt64(significand);
+  numerator->ShiftLeft(exponent);
+  // denominator = 10^estimated_power.
+  denominator->AssignPowerUInt16(10, estimated_power);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    denominator->ShiftLeft(1);
+    numerator->ShiftLeft(1);
+    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+    // denominator (of 2) delta_plus equals 2^e.
+    delta_plus->AssignUInt16(1);
+    delta_plus->ShiftLeft(exponent);
+    // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+    delta_minus->AssignUInt16(1);
+    delta_minus->ShiftLeft(exponent);
+  }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentPositivePower(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // v = f * 2^e with e < 0, and with estimated_power >= 0.
+  // This means that e is close to 0 (have a look at how estimated_power is
+  // computed).
+
+  // numerator = significand
+  //  since v = significand * 2^exponent this is equivalent to
+  //  numerator = v * / 2^-exponent
+  numerator->AssignUInt64(significand);
+  // denominator = 10^estimated_power * 2^-exponent (with exponent < 0)
+  denominator->AssignPowerUInt16(10, estimated_power);
+  denominator->ShiftLeft(-exponent);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    denominator->ShiftLeft(1);
+    numerator->ShiftLeft(1);
+    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+    // denominator (of 2) delta_plus equals 2^e.
+    // Given that the denominator already includes v's exponent the distance
+    // to the boundaries is simply 1.
+    delta_plus->AssignUInt16(1);
+    // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+    delta_minus->AssignUInt16(1);
+  }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentNegativePower(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // Instead of multiplying the denominator with 10^estimated_power we
+  // multiply all values (numerator and deltas) by 10^-estimated_power.
+
+  // Use numerator as temporary container for power_ten.
+  Bignum* power_ten = numerator;
+  power_ten->AssignPowerUInt16(10, -estimated_power);
+
+  if (need_boundary_deltas) {
+    // Since power_ten == numerator we must make a copy of 10^estimated_power
+    // before we complete the computation of the numerator.
+    // delta_plus = delta_minus = 10^estimated_power
+    delta_plus->AssignBignum(*power_ten);
+    delta_minus->AssignBignum(*power_ten);
+  }
+
+  // numerator = significand * 2 * 10^-estimated_power
+  //  since v = significand * 2^exponent this is equivalent to
+  // numerator = v * 10^-estimated_power * 2 * 2^-exponent.
+  // Remember: numerator has been abused as power_ten. So no need to assign it
+  //  to itself.
+  ASSERT(numerator == power_ten);
+  numerator->MultiplyByUInt64(significand);
+
+  // denominator = 2 * 2^-exponent with exponent < 0.
+  denominator->AssignUInt16(1);
+  denominator->ShiftLeft(-exponent);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    numerator->ShiftLeft(1);
+    denominator->ShiftLeft(1);
+    // With this shift the boundaries have their correct value, since
+    // delta_plus = 10^-estimated_power, and
+    // delta_minus = 10^-estimated_power.
+    // These assignments have been done earlier.
+    // The adjustments if f == 2^p-1 (lower boundary is closer) are done later.
+  }
+}
+
+
+// Let v = significand * 2^exponent.
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator. The functions GenerateShortestDigits and
+// GenerateCountedDigits will then convert this ratio to its decimal
+// representation d, with the required accuracy.
+// Then d * 10^estimated_power is the representation of v.
+// (Note: the fraction and the estimated_power might get adjusted before
+// generating the decimal representation.)
+//
+// The initial start values consist of:
+//  - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power.
+//  - a scaled (common) denominator.
+//  optionally (used by GenerateShortestDigits to decide if it has the shortest
+//  decimal converting back to v):
+//  - v - m-: the distance to the lower boundary.
+//  - m+ - v: the distance to the upper boundary.
+//
+// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator.
+//
+// Let ep == estimated_power, then the returned values will satisfy:
+//  v / 10^ep = numerator / denominator.
+//  v's boundarys m- and m+:
+//    m- / 10^ep == v / 10^ep - delta_minus / denominator
+//    m+ / 10^ep == v / 10^ep + delta_plus / denominator
+//  Or in other words:
+//    m- == v - delta_minus * 10^ep / denominator;
+//    m+ == v + delta_plus * 10^ep / denominator;
+//
+// Since 10^(k-1) <= v < 10^k    (with k == estimated_power)
+//  or       10^k <= v < 10^(k+1)
+//  we then have 0.1 <= numerator/denominator < 1
+//           or    1 <= numerator/denominator < 10
+//
+// It is then easy to kickstart the digit-generation routine.
+//
+// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST
+// or BIGNUM_DTOA_SHORTEST_SINGLE.
+
+static void InitialScaledStartValues(uint64_t significand,
+                                     int exponent,
+                                     bool lower_boundary_is_closer,
+                                     int estimated_power,
+                                     bool need_boundary_deltas,
+                                     Bignum* numerator,
+                                     Bignum* denominator,
+                                     Bignum* delta_minus,
+                                     Bignum* delta_plus) {
+  if (exponent >= 0) {
+    InitialScaledStartValuesPositiveExponent(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  } else if (estimated_power >= 0) {
+    InitialScaledStartValuesNegativeExponentPositivePower(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  } else {
+    InitialScaledStartValuesNegativeExponentNegativePower(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  }
+
+  if (need_boundary_deltas && lower_boundary_is_closer) {
+    // The lower boundary is closer at half the distance of "normal" numbers.
+    // Increase the common denominator and adapt all but the delta_minus.
+    denominator->ShiftLeft(1);  // *2
+    numerator->ShiftLeft(1);    // *2
+    delta_plus->ShiftLeft(1);   // *2
+  }
+}
+
+
+// This routine multiplies numerator/denominator so that its values lies in the
+// range 1-10. That is after a call to this function we have:
+//    1 <= (numerator + delta_plus) /denominator < 10.
+// Let numerator the input before modification and numerator' the argument
+// after modification, then the output-parameter decimal_point is such that
+//  numerator / denominator * 10^estimated_power ==
+//    numerator' / denominator' * 10^(decimal_point - 1)
+// In some cases estimated_power was too low, and this is already the case. We
+// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k ==
+// estimated_power) but do not touch the numerator or denominator.
+// Otherwise the routine multiplies the numerator and the deltas by 10.
+static void FixupMultiply10(int estimated_power, bool is_even,
+                            int* decimal_point,
+                            Bignum* numerator, Bignum* denominator,
+                            Bignum* delta_minus, Bignum* delta_plus) {
+  bool in_range;
+  if (is_even) {
+    // For IEEE doubles half-way cases (in decimal system numbers ending with 5)
+    // are rounded to the closest floating-point number with even significand.
+    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+  } else {
+    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+  }
+  if (in_range) {
+    // Since numerator + delta_plus >= denominator we already have
+    // 1 <= numerator/denominator < 10. Simply update the estimated_power.
+    *decimal_point = estimated_power + 1;
+  } else {
+    *decimal_point = estimated_power;
+    numerator->Times10();
+    if (Bignum::Equal(*delta_minus, *delta_plus)) {
+      delta_minus->Times10();
+      delta_plus->AssignBignum(*delta_minus);
+    } else {
+      delta_minus->Times10();
+      delta_plus->Times10();
+    }
+  }
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/bignum-dtoa.h b/contrib/libs/double-conversion/bignum-dtoa.h
index 25904a5788..34b961992d 100644
--- a/contrib/libs/double-conversion/bignum-dtoa.h
+++ b/contrib/libs/double-conversion/bignum-dtoa.h
@@ -1,84 +1,84 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ 
-#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-enum BignumDtoaMode { 
-  // Return the shortest correct representation. 
-  // For example the output of 0.299999999999999988897 is (the less accurate but 
-  // correct) 0.3. 
-  BIGNUM_DTOA_SHORTEST, 
-  // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. 
-  BIGNUM_DTOA_SHORTEST_SINGLE, 
-  // Return a fixed number of digits after the decimal point. 
-  // For instance fixed(0.1, 4) becomes 0.1000 
-  // If the input number is big, the output will be big. 
-  BIGNUM_DTOA_FIXED, 
-  // Return a fixed number of digits, no matter what the exponent is. 
-  BIGNUM_DTOA_PRECISION 
-}; 
- 
-// Converts the given double 'v' to ascii. 
-// The result should be interpreted as buffer * 10^(point-length). 
-// The buffer will be null-terminated. 
-// 
-// The input v must be > 0 and different from NaN, and Infinity. 
-// 
-// The output depends on the given mode: 
-//  - SHORTEST: produce the least amount of digits for which the internal 
-//   identity requirement is still satisfied. If the digits are printed 
-//   (together with the correct exponent) then reading this number will give 
-//   'v' again. The buffer will choose the representation that is closest to 
-//   'v'. If there are two at the same distance, than the number is round up. 
-//   In this mode the 'requested_digits' parameter is ignored. 
-//  - FIXED: produces digits necessary to print a given number with 
-//   'requested_digits' digits after the decimal point. The produced digits 
-//   might be too short in which case the caller has to fill the gaps with '0's. 
-//   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. 
-//   Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns 
-//     buffer="2", point=0. 
-//   Note: the length of the returned buffer has no meaning wrt the significance 
-//   of its digits. That is, just because it contains '0's does not mean that 
-//   any other digit would not satisfy the internal identity requirement. 
-//  - PRECISION: produces 'requested_digits' where the first digit is not '0'. 
-//   Even though the length of produced digits usually equals 
-//   'requested_digits', the function is allowed to return fewer digits, in 
-//   which case the caller has to fill the missing digits with '0's. 
-//   Halfway cases are again rounded up. 
-// 'BignumDtoa' expects the given buffer to be big enough to hold all digits 
-// and a terminating null-character. 
-void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, 
-                Vector<char> buffer, int* length, int* point); 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_
+#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+enum BignumDtoaMode {
+  // Return the shortest correct representation.
+  // For example the output of 0.299999999999999988897 is (the less accurate but
+  // correct) 0.3.
+  BIGNUM_DTOA_SHORTEST,
+  // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats.
+  BIGNUM_DTOA_SHORTEST_SINGLE,
+  // Return a fixed number of digits after the decimal point.
+  // For instance fixed(0.1, 4) becomes 0.1000
+  // If the input number is big, the output will be big.
+  BIGNUM_DTOA_FIXED,
+  // Return a fixed number of digits, no matter what the exponent is.
+  BIGNUM_DTOA_PRECISION
+};
+
+// Converts the given double 'v' to ascii.
+// The result should be interpreted as buffer * 10^(point-length).
+// The buffer will be null-terminated.
+//
+// The input v must be > 0 and different from NaN, and Infinity.
+//
+// The output depends on the given mode:
+//  - SHORTEST: produce the least amount of digits for which the internal
+//   identity requirement is still satisfied. If the digits are printed
+//   (together with the correct exponent) then reading this number will give
+//   'v' again. The buffer will choose the representation that is closest to
+//   'v'. If there are two at the same distance, than the number is round up.
+//   In this mode the 'requested_digits' parameter is ignored.
+//  - FIXED: produces digits necessary to print a given number with
+//   'requested_digits' digits after the decimal point. The produced digits
+//   might be too short in which case the caller has to fill the gaps with '0's.
+//   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
+//   Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns
+//     buffer="2", point=0.
+//   Note: the length of the returned buffer has no meaning wrt the significance
+//   of its digits. That is, just because it contains '0's does not mean that
+//   any other digit would not satisfy the internal identity requirement.
+//  - PRECISION: produces 'requested_digits' where the first digit is not '0'.
+//   Even though the length of produced digits usually equals
+//   'requested_digits', the function is allowed to return fewer digits, in
+//   which case the caller has to fill the missing digits with '0's.
+//   Halfway cases are again rounded up.
+// 'BignumDtoa' expects the given buffer to be big enough to hold all digits
+// and a terminating null-character.
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+                Vector<char> buffer, int* length, int* point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_BIGNUM_DTOA_H_
diff --git a/contrib/libs/double-conversion/bignum.cc b/contrib/libs/double-conversion/bignum.cc
index 490071facd..d077eef3f5 100644
--- a/contrib/libs/double-conversion/bignum.cc
+++ b/contrib/libs/double-conversion/bignum.cc
@@ -1,767 +1,767 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#include "bignum.h" 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-Bignum::Bignum() 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bignum.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+Bignum::Bignum()
     : bigits_buffer_(), bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) {
-  for (int i = 0; i < kBigitCapacity; ++i) { 
-    bigits_[i] = 0; 
-  } 
-} 
- 
- 
-template<typename S> 
-static int BitSize(S value) { 
-  (void) value;  // Mark variable as used. 
-  return 8 * sizeof(value); 
-} 
- 
-// Guaranteed to lie in one Bigit. 
-void Bignum::AssignUInt16(uint16_t value) { 
-  ASSERT(kBigitSize >= BitSize(value)); 
-  Zero(); 
-  if (value == 0) return; 
- 
-  EnsureCapacity(1); 
-  bigits_[0] = value; 
-  used_digits_ = 1; 
-} 
- 
- 
-void Bignum::AssignUInt64(uint64_t value) { 
-  const int kUInt64Size = 64; 
- 
-  Zero(); 
-  if (value == 0) return; 
- 
-  int needed_bigits = kUInt64Size / kBigitSize + 1; 
-  EnsureCapacity(needed_bigits); 
-  for (int i = 0; i < needed_bigits; ++i) { 
-    bigits_[i] = value & kBigitMask; 
-    value = value >> kBigitSize; 
-  } 
-  used_digits_ = needed_bigits; 
-  Clamp(); 
-} 
- 
- 
-void Bignum::AssignBignum(const Bignum& other) { 
-  exponent_ = other.exponent_; 
-  for (int i = 0; i < other.used_digits_; ++i) { 
-    bigits_[i] = other.bigits_[i]; 
-  } 
-  // Clear the excess digits (if there were any). 
-  for (int i = other.used_digits_; i < used_digits_; ++i) { 
-    bigits_[i] = 0; 
-  } 
-  used_digits_ = other.used_digits_; 
-} 
- 
- 
-static uint64_t ReadUInt64(Vector<const char> buffer, 
-                           int from, 
-                           int digits_to_read) { 
-  uint64_t result = 0; 
-  for (int i = from; i < from + digits_to_read; ++i) { 
-    int digit = buffer[i] - '0'; 
-    ASSERT(0 <= digit && digit <= 9); 
-    result = result * 10 + digit; 
-  } 
-  return result; 
-} 
- 
- 
-void Bignum::AssignDecimalString(Vector<const char> value) { 
-  // 2^64 = 18446744073709551616 > 10^19 
-  const int kMaxUint64DecimalDigits = 19; 
-  Zero(); 
-  int length = value.length(); 
-  unsigned int pos = 0; 
-  // Let's just say that each digit needs 4 bits. 
-  while (length >= kMaxUint64DecimalDigits) { 
-    uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); 
-    pos += kMaxUint64DecimalDigits; 
-    length -= kMaxUint64DecimalDigits; 
-    MultiplyByPowerOfTen(kMaxUint64DecimalDigits); 
-    AddUInt64(digits); 
-  } 
-  uint64_t digits = ReadUInt64(value, pos, length); 
-  MultiplyByPowerOfTen(length); 
-  AddUInt64(digits); 
-  Clamp(); 
-} 
- 
- 
-static int HexCharValue(char c) { 
-  if ('0' <= c && c <= '9') return c - '0'; 
-  if ('a' <= c && c <= 'f') return 10 + c - 'a'; 
-  ASSERT('A' <= c && c <= 'F'); 
-  return 10 + c - 'A'; 
-} 
- 
- 
-void Bignum::AssignHexString(Vector<const char> value) { 
-  Zero(); 
-  int length = value.length(); 
- 
-  int needed_bigits = length * 4 / kBigitSize + 1; 
-  EnsureCapacity(needed_bigits); 
-  int string_index = length - 1; 
-  for (int i = 0; i < needed_bigits - 1; ++i) { 
-    // These bigits are guaranteed to be "full". 
-    Chunk current_bigit = 0; 
-    for (int j = 0; j < kBigitSize / 4; j++) { 
-      current_bigit += HexCharValue(value[string_index--]) << (j * 4); 
-    } 
-    bigits_[i] = current_bigit; 
-  } 
-  used_digits_ = needed_bigits - 1; 
- 
-  Chunk most_significant_bigit = 0;  // Could be = 0; 
-  for (int j = 0; j <= string_index; ++j) { 
-    most_significant_bigit <<= 4; 
-    most_significant_bigit += HexCharValue(value[j]); 
-  } 
-  if (most_significant_bigit != 0) { 
-    bigits_[used_digits_] = most_significant_bigit; 
-    used_digits_++; 
-  } 
-  Clamp(); 
-} 
- 
- 
-void Bignum::AddUInt64(uint64_t operand) { 
-  if (operand == 0) return; 
-  Bignum other; 
-  other.AssignUInt64(operand); 
-  AddBignum(other); 
-} 
- 
- 
-void Bignum::AddBignum(const Bignum& other) { 
-  ASSERT(IsClamped()); 
-  ASSERT(other.IsClamped()); 
- 
-  // If this has a greater exponent than other append zero-bigits to this. 
-  // After this call exponent_ <= other.exponent_. 
-  Align(other); 
- 
-  // There are two possibilities: 
-  //   aaaaaaaaaaa 0000  (where the 0s represent a's exponent) 
-  //     bbbbb 00000000 
-  //   ---------------- 
-  //   ccccccccccc 0000 
-  // or 
-  //    aaaaaaaaaa 0000 
-  //  bbbbbbbbb 0000000 
-  //  ----------------- 
-  //  cccccccccccc 0000 
-  // In both cases we might need a carry bigit. 
- 
-  EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); 
-  Chunk carry = 0; 
-  int bigit_pos = other.exponent_ - exponent_; 
-  ASSERT(bigit_pos >= 0); 
-  for (int i = 0; i < other.used_digits_; ++i) { 
-    Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; 
-    bigits_[bigit_pos] = sum & kBigitMask; 
-    carry = sum >> kBigitSize; 
-    bigit_pos++; 
-  } 
- 
-  while (carry != 0) { 
-    Chunk sum = bigits_[bigit_pos] + carry; 
-    bigits_[bigit_pos] = sum & kBigitMask; 
-    carry = sum >> kBigitSize; 
-    bigit_pos++; 
-  } 
-  used_digits_ = Max(bigit_pos, used_digits_); 
-  ASSERT(IsClamped()); 
-} 
- 
- 
-void Bignum::SubtractBignum(const Bignum& other) { 
-  ASSERT(IsClamped()); 
-  ASSERT(other.IsClamped()); 
-  // We require this to be bigger than other. 
-  ASSERT(LessEqual(other, *this)); 
- 
-  Align(other); 
- 
-  int offset = other.exponent_ - exponent_; 
-  Chunk borrow = 0; 
-  int i; 
-  for (i = 0; i < other.used_digits_; ++i) { 
-    ASSERT((borrow == 0) || (borrow == 1)); 
-    Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; 
-    bigits_[i + offset] = difference & kBigitMask; 
-    borrow = difference >> (kChunkSize - 1); 
-  } 
-  while (borrow != 0) { 
-    Chunk difference = bigits_[i + offset] - borrow; 
-    bigits_[i + offset] = difference & kBigitMask; 
-    borrow = difference >> (kChunkSize - 1); 
-    ++i; 
-  } 
-  Clamp(); 
-} 
- 
- 
-void Bignum::ShiftLeft(int shift_amount) { 
-  if (used_digits_ == 0) return; 
-  exponent_ += shift_amount / kBigitSize; 
-  int local_shift = shift_amount % kBigitSize; 
-  EnsureCapacity(used_digits_ + 1); 
-  BigitsShiftLeft(local_shift); 
-} 
- 
- 
-void Bignum::MultiplyByUInt32(uint32_t factor) { 
-  if (factor == 1) return; 
-  if (factor == 0) { 
-    Zero(); 
-    return; 
-  } 
-  if (used_digits_ == 0) return; 
- 
-  // The product of a bigit with the factor is of size kBigitSize + 32. 
-  // Assert that this number + 1 (for the carry) fits into double chunk. 
-  ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); 
-  DoubleChunk carry = 0; 
-  for (int i = 0; i < used_digits_; ++i) { 
-    DoubleChunk product = static_cast<DoubleChunk>(factor) * bigits_[i] + carry; 
-    bigits_[i] = static_cast<Chunk>(product & kBigitMask); 
-    carry = (product >> kBigitSize); 
-  } 
-  while (carry != 0) { 
-    EnsureCapacity(used_digits_ + 1); 
-    bigits_[used_digits_] = carry & kBigitMask; 
-    used_digits_++; 
-    carry >>= kBigitSize; 
-  } 
-} 
- 
- 
-void Bignum::MultiplyByUInt64(uint64_t factor) { 
-  if (factor == 1) return; 
-  if (factor == 0) { 
-    Zero(); 
-    return; 
-  } 
-  ASSERT(kBigitSize < 32); 
-  uint64_t carry = 0; 
-  uint64_t low = factor & 0xFFFFFFFF; 
-  uint64_t high = factor >> 32; 
-  for (int i = 0; i < used_digits_; ++i) { 
-    uint64_t product_low = low * bigits_[i]; 
-    uint64_t product_high = high * bigits_[i]; 
-    uint64_t tmp = (carry & kBigitMask) + product_low; 
-    bigits_[i] = tmp & kBigitMask; 
-    carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + 
-        (product_high << (32 - kBigitSize)); 
-  } 
-  while (carry != 0) { 
-    EnsureCapacity(used_digits_ + 1); 
-    bigits_[used_digits_] = carry & kBigitMask; 
-    used_digits_++; 
-    carry >>= kBigitSize; 
-  } 
-} 
- 
- 
-void Bignum::MultiplyByPowerOfTen(int exponent) { 
-  const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); 
-  const uint16_t kFive1 = 5; 
-  const uint16_t kFive2 = kFive1 * 5; 
-  const uint16_t kFive3 = kFive2 * 5; 
-  const uint16_t kFive4 = kFive3 * 5; 
-  const uint16_t kFive5 = kFive4 * 5; 
-  const uint16_t kFive6 = kFive5 * 5; 
-  const uint32_t kFive7 = kFive6 * 5; 
-  const uint32_t kFive8 = kFive7 * 5; 
-  const uint32_t kFive9 = kFive8 * 5; 
-  const uint32_t kFive10 = kFive9 * 5; 
-  const uint32_t kFive11 = kFive10 * 5; 
-  const uint32_t kFive12 = kFive11 * 5; 
-  const uint32_t kFive13 = kFive12 * 5; 
-  const uint32_t kFive1_to_12[] = 
-      { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, 
-        kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; 
- 
-  ASSERT(exponent >= 0); 
-  if (exponent == 0) return; 
-  if (used_digits_ == 0) return; 
- 
-  // We shift by exponent at the end just before returning. 
-  int remaining_exponent = exponent; 
-  while (remaining_exponent >= 27) { 
-    MultiplyByUInt64(kFive27); 
-    remaining_exponent -= 27; 
-  } 
-  while (remaining_exponent >= 13) { 
-    MultiplyByUInt32(kFive13); 
-    remaining_exponent -= 13; 
-  } 
-  if (remaining_exponent > 0) { 
-    MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); 
-  } 
-  ShiftLeft(exponent); 
-} 
- 
- 
-void Bignum::Square() { 
-  ASSERT(IsClamped()); 
-  int product_length = 2 * used_digits_; 
-  EnsureCapacity(product_length); 
- 
-  // Comba multiplication: compute each column separately. 
-  // Example: r = a2a1a0 * b2b1b0. 
-  //    r =  1    * a0b0 + 
-  //        10    * (a1b0 + a0b1) + 
-  //        100   * (a2b0 + a1b1 + a0b2) + 
-  //        1000  * (a2b1 + a1b2) + 
-  //        10000 * a2b2 
-  // 
-  // In the worst case we have to accumulate nb-digits products of digit*digit. 
-  // 
-  // Assert that the additional number of bits in a DoubleChunk are enough to 
-  // sum up used_digits of Bigit*Bigit. 
-  if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { 
-    UNIMPLEMENTED(); 
-  } 
-  DoubleChunk accumulator = 0; 
-  // First shift the digits so we don't overwrite them. 
-  int copy_offset = used_digits_; 
-  for (int i = 0; i < used_digits_; ++i) { 
-    bigits_[copy_offset + i] = bigits_[i]; 
-  } 
-  // We have two loops to avoid some 'if's in the loop. 
-  for (int i = 0; i < used_digits_; ++i) { 
-    // Process temporary digit i with power i. 
-    // The sum of the two indices must be equal to i. 
-    int bigit_index1 = i; 
-    int bigit_index2 = 0; 
-    // Sum all of the sub-products. 
-    while (bigit_index1 >= 0) { 
-      Chunk chunk1 = bigits_[copy_offset + bigit_index1]; 
-      Chunk chunk2 = bigits_[copy_offset + bigit_index2]; 
-      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2; 
-      bigit_index1--; 
-      bigit_index2++; 
-    } 
-    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask; 
-    accumulator >>= kBigitSize; 
-  } 
-  for (int i = used_digits_; i < product_length; ++i) { 
-    int bigit_index1 = used_digits_ - 1; 
-    int bigit_index2 = i - bigit_index1; 
-    // Invariant: sum of both indices is again equal to i. 
-    // Inner loop runs 0 times on last iteration, emptying accumulator. 
-    while (bigit_index2 < used_digits_) { 
-      Chunk chunk1 = bigits_[copy_offset + bigit_index1]; 
-      Chunk chunk2 = bigits_[copy_offset + bigit_index2]; 
-      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2; 
-      bigit_index1--; 
-      bigit_index2++; 
-    } 
-    // The overwritten bigits_[i] will never be read in further loop iterations, 
-    // because bigit_index1 and bigit_index2 are always greater 
-    // than i - used_digits_. 
-    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask; 
-    accumulator >>= kBigitSize; 
-  } 
-  // Since the result was guaranteed to lie inside the number the 
-  // accumulator must be 0 now. 
-  ASSERT(accumulator == 0); 
- 
-  // Don't forget to update the used_digits and the exponent. 
-  used_digits_ = product_length; 
-  exponent_ *= 2; 
-  Clamp(); 
-} 
- 
- 
-void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { 
-  ASSERT(base != 0); 
-  ASSERT(power_exponent >= 0); 
-  if (power_exponent == 0) { 
-    AssignUInt16(1); 
-    return; 
-  } 
-  Zero(); 
-  int shifts = 0; 
-  // We expect base to be in range 2-32, and most often to be 10. 
-  // It does not make much sense to implement different algorithms for counting 
-  // the bits. 
-  while ((base & 1) == 0) { 
-    base >>= 1; 
-    shifts++; 
-  } 
-  int bit_size = 0; 
-  int tmp_base = base; 
-  while (tmp_base != 0) { 
-    tmp_base >>= 1; 
-    bit_size++; 
-  } 
-  int final_size = bit_size * power_exponent; 
-  // 1 extra bigit for the shifting, and one for rounded final_size. 
-  EnsureCapacity(final_size / kBigitSize + 2); 
- 
-  // Left to Right exponentiation. 
-  int mask = 1; 
-  while (power_exponent >= mask) mask <<= 1; 
- 
-  // The mask is now pointing to the bit above the most significant 1-bit of 
-  // power_exponent. 
-  // Get rid of first 1-bit; 
-  mask >>= 2; 
-  uint64_t this_value = base; 
- 
+  for (int i = 0; i < kBigitCapacity; ++i) {
+    bigits_[i] = 0;
+  }
+}
+
+
+template<typename S>
+static int BitSize(S value) {
+  (void) value;  // Mark variable as used.
+  return 8 * sizeof(value);
+}
+
+// Guaranteed to lie in one Bigit.
+void Bignum::AssignUInt16(uint16_t value) {
+  ASSERT(kBigitSize >= BitSize(value));
+  Zero();
+  if (value == 0) return;
+
+  EnsureCapacity(1);
+  bigits_[0] = value;
+  used_digits_ = 1;
+}
+
+
+void Bignum::AssignUInt64(uint64_t value) {
+  const int kUInt64Size = 64;
+
+  Zero();
+  if (value == 0) return;
+
+  int needed_bigits = kUInt64Size / kBigitSize + 1;
+  EnsureCapacity(needed_bigits);
+  for (int i = 0; i < needed_bigits; ++i) {
+    bigits_[i] = value & kBigitMask;
+    value = value >> kBigitSize;
+  }
+  used_digits_ = needed_bigits;
+  Clamp();
+}
+
+
+void Bignum::AssignBignum(const Bignum& other) {
+  exponent_ = other.exponent_;
+  for (int i = 0; i < other.used_digits_; ++i) {
+    bigits_[i] = other.bigits_[i];
+  }
+  // Clear the excess digits (if there were any).
+  for (int i = other.used_digits_; i < used_digits_; ++i) {
+    bigits_[i] = 0;
+  }
+  used_digits_ = other.used_digits_;
+}
+
+
+static uint64_t ReadUInt64(Vector<const char> buffer,
+                           int from,
+                           int digits_to_read) {
+  uint64_t result = 0;
+  for (int i = from; i < from + digits_to_read; ++i) {
+    int digit = buffer[i] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = result * 10 + digit;
+  }
+  return result;
+}
+
+
+void Bignum::AssignDecimalString(Vector<const char> value) {
+  // 2^64 = 18446744073709551616 > 10^19
+  const int kMaxUint64DecimalDigits = 19;
+  Zero();
+  int length = value.length();
+  unsigned int pos = 0;
+  // Let's just say that each digit needs 4 bits.
+  while (length >= kMaxUint64DecimalDigits) {
+    uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits);
+    pos += kMaxUint64DecimalDigits;
+    length -= kMaxUint64DecimalDigits;
+    MultiplyByPowerOfTen(kMaxUint64DecimalDigits);
+    AddUInt64(digits);
+  }
+  uint64_t digits = ReadUInt64(value, pos, length);
+  MultiplyByPowerOfTen(length);
+  AddUInt64(digits);
+  Clamp();
+}
+
+
+static int HexCharValue(char c) {
+  if ('0' <= c && c <= '9') return c - '0';
+  if ('a' <= c && c <= 'f') return 10 + c - 'a';
+  ASSERT('A' <= c && c <= 'F');
+  return 10 + c - 'A';
+}
+
+
+void Bignum::AssignHexString(Vector<const char> value) {
+  Zero();
+  int length = value.length();
+
+  int needed_bigits = length * 4 / kBigitSize + 1;
+  EnsureCapacity(needed_bigits);
+  int string_index = length - 1;
+  for (int i = 0; i < needed_bigits - 1; ++i) {
+    // These bigits are guaranteed to be "full".
+    Chunk current_bigit = 0;
+    for (int j = 0; j < kBigitSize / 4; j++) {
+      current_bigit += HexCharValue(value[string_index--]) << (j * 4);
+    }
+    bigits_[i] = current_bigit;
+  }
+  used_digits_ = needed_bigits - 1;
+
+  Chunk most_significant_bigit = 0;  // Could be = 0;
+  for (int j = 0; j <= string_index; ++j) {
+    most_significant_bigit <<= 4;
+    most_significant_bigit += HexCharValue(value[j]);
+  }
+  if (most_significant_bigit != 0) {
+    bigits_[used_digits_] = most_significant_bigit;
+    used_digits_++;
+  }
+  Clamp();
+}
+
+
+void Bignum::AddUInt64(uint64_t operand) {
+  if (operand == 0) return;
+  Bignum other;
+  other.AssignUInt64(operand);
+  AddBignum(other);
+}
+
+
+void Bignum::AddBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+
+  // If this has a greater exponent than other append zero-bigits to this.
+  // After this call exponent_ <= other.exponent_.
+  Align(other);
+
+  // There are two possibilities:
+  //   aaaaaaaaaaa 0000  (where the 0s represent a's exponent)
+  //     bbbbb 00000000
+  //   ----------------
+  //   ccccccccccc 0000
+  // or
+  //    aaaaaaaaaa 0000
+  //  bbbbbbbbb 0000000
+  //  -----------------
+  //  cccccccccccc 0000
+  // In both cases we might need a carry bigit.
+
+  EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_);
+  Chunk carry = 0;
+  int bigit_pos = other.exponent_ - exponent_;
+  ASSERT(bigit_pos >= 0);
+  for (int i = 0; i < other.used_digits_; ++i) {
+    Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry;
+    bigits_[bigit_pos] = sum & kBigitMask;
+    carry = sum >> kBigitSize;
+    bigit_pos++;
+  }
+
+  while (carry != 0) {
+    Chunk sum = bigits_[bigit_pos] + carry;
+    bigits_[bigit_pos] = sum & kBigitMask;
+    carry = sum >> kBigitSize;
+    bigit_pos++;
+  }
+  used_digits_ = Max(bigit_pos, used_digits_);
+  ASSERT(IsClamped());
+}
+
+
+void Bignum::SubtractBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+  // We require this to be bigger than other.
+  ASSERT(LessEqual(other, *this));
+
+  Align(other);
+
+  int offset = other.exponent_ - exponent_;
+  Chunk borrow = 0;
+  int i;
+  for (i = 0; i < other.used_digits_; ++i) {
+    ASSERT((borrow == 0) || (borrow == 1));
+    Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow;
+    bigits_[i + offset] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+  }
+  while (borrow != 0) {
+    Chunk difference = bigits_[i + offset] - borrow;
+    bigits_[i + offset] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+    ++i;
+  }
+  Clamp();
+}
+
+
+void Bignum::ShiftLeft(int shift_amount) {
+  if (used_digits_ == 0) return;
+  exponent_ += shift_amount / kBigitSize;
+  int local_shift = shift_amount % kBigitSize;
+  EnsureCapacity(used_digits_ + 1);
+  BigitsShiftLeft(local_shift);
+}
+
+
+void Bignum::MultiplyByUInt32(uint32_t factor) {
+  if (factor == 1) return;
+  if (factor == 0) {
+    Zero();
+    return;
+  }
+  if (used_digits_ == 0) return;
+
+  // The product of a bigit with the factor is of size kBigitSize + 32.
+  // Assert that this number + 1 (for the carry) fits into double chunk.
+  ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1);
+  DoubleChunk carry = 0;
+  for (int i = 0; i < used_digits_; ++i) {
+    DoubleChunk product = static_cast<DoubleChunk>(factor) * bigits_[i] + carry;
+    bigits_[i] = static_cast<Chunk>(product & kBigitMask);
+    carry = (product >> kBigitSize);
+  }
+  while (carry != 0) {
+    EnsureCapacity(used_digits_ + 1);
+    bigits_[used_digits_] = carry & kBigitMask;
+    used_digits_++;
+    carry >>= kBigitSize;
+  }
+}
+
+
+void Bignum::MultiplyByUInt64(uint64_t factor) {
+  if (factor == 1) return;
+  if (factor == 0) {
+    Zero();
+    return;
+  }
+  ASSERT(kBigitSize < 32);
+  uint64_t carry = 0;
+  uint64_t low = factor & 0xFFFFFFFF;
+  uint64_t high = factor >> 32;
+  for (int i = 0; i < used_digits_; ++i) {
+    uint64_t product_low = low * bigits_[i];
+    uint64_t product_high = high * bigits_[i];
+    uint64_t tmp = (carry & kBigitMask) + product_low;
+    bigits_[i] = tmp & kBigitMask;
+    carry = (carry >> kBigitSize) + (tmp >> kBigitSize) +
+        (product_high << (32 - kBigitSize));
+  }
+  while (carry != 0) {
+    EnsureCapacity(used_digits_ + 1);
+    bigits_[used_digits_] = carry & kBigitMask;
+    used_digits_++;
+    carry >>= kBigitSize;
+  }
+}
+
+
+void Bignum::MultiplyByPowerOfTen(int exponent) {
+  const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d);
+  const uint16_t kFive1 = 5;
+  const uint16_t kFive2 = kFive1 * 5;
+  const uint16_t kFive3 = kFive2 * 5;
+  const uint16_t kFive4 = kFive3 * 5;
+  const uint16_t kFive5 = kFive4 * 5;
+  const uint16_t kFive6 = kFive5 * 5;
+  const uint32_t kFive7 = kFive6 * 5;
+  const uint32_t kFive8 = kFive7 * 5;
+  const uint32_t kFive9 = kFive8 * 5;
+  const uint32_t kFive10 = kFive9 * 5;
+  const uint32_t kFive11 = kFive10 * 5;
+  const uint32_t kFive12 = kFive11 * 5;
+  const uint32_t kFive13 = kFive12 * 5;
+  const uint32_t kFive1_to_12[] =
+      { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6,
+        kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 };
+
+  ASSERT(exponent >= 0);
+  if (exponent == 0) return;
+  if (used_digits_ == 0) return;
+
+  // We shift by exponent at the end just before returning.
+  int remaining_exponent = exponent;
+  while (remaining_exponent >= 27) {
+    MultiplyByUInt64(kFive27);
+    remaining_exponent -= 27;
+  }
+  while (remaining_exponent >= 13) {
+    MultiplyByUInt32(kFive13);
+    remaining_exponent -= 13;
+  }
+  if (remaining_exponent > 0) {
+    MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]);
+  }
+  ShiftLeft(exponent);
+}
+
+
+void Bignum::Square() {
+  ASSERT(IsClamped());
+  int product_length = 2 * used_digits_;
+  EnsureCapacity(product_length);
+
+  // Comba multiplication: compute each column separately.
+  // Example: r = a2a1a0 * b2b1b0.
+  //    r =  1    * a0b0 +
+  //        10    * (a1b0 + a0b1) +
+  //        100   * (a2b0 + a1b1 + a0b2) +
+  //        1000  * (a2b1 + a1b2) +
+  //        10000 * a2b2
+  //
+  // In the worst case we have to accumulate nb-digits products of digit*digit.
+  //
+  // Assert that the additional number of bits in a DoubleChunk are enough to
+  // sum up used_digits of Bigit*Bigit.
+  if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) {
+    UNIMPLEMENTED();
+  }
+  DoubleChunk accumulator = 0;
+  // First shift the digits so we don't overwrite them.
+  int copy_offset = used_digits_;
+  for (int i = 0; i < used_digits_; ++i) {
+    bigits_[copy_offset + i] = bigits_[i];
+  }
+  // We have two loops to avoid some 'if's in the loop.
+  for (int i = 0; i < used_digits_; ++i) {
+    // Process temporary digit i with power i.
+    // The sum of the two indices must be equal to i.
+    int bigit_index1 = i;
+    int bigit_index2 = 0;
+    // Sum all of the sub-products.
+    while (bigit_index1 >= 0) {
+      Chunk chunk1 = bigits_[copy_offset + bigit_index1];
+      Chunk chunk2 = bigits_[copy_offset + bigit_index2];
+      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2;
+      bigit_index1--;
+      bigit_index2++;
+    }
+    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask;
+    accumulator >>= kBigitSize;
+  }
+  for (int i = used_digits_; i < product_length; ++i) {
+    int bigit_index1 = used_digits_ - 1;
+    int bigit_index2 = i - bigit_index1;
+    // Invariant: sum of both indices is again equal to i.
+    // Inner loop runs 0 times on last iteration, emptying accumulator.
+    while (bigit_index2 < used_digits_) {
+      Chunk chunk1 = bigits_[copy_offset + bigit_index1];
+      Chunk chunk2 = bigits_[copy_offset + bigit_index2];
+      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2;
+      bigit_index1--;
+      bigit_index2++;
+    }
+    // The overwritten bigits_[i] will never be read in further loop iterations,
+    // because bigit_index1 and bigit_index2 are always greater
+    // than i - used_digits_.
+    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask;
+    accumulator >>= kBigitSize;
+  }
+  // Since the result was guaranteed to lie inside the number the
+  // accumulator must be 0 now.
+  ASSERT(accumulator == 0);
+
+  // Don't forget to update the used_digits and the exponent.
+  used_digits_ = product_length;
+  exponent_ *= 2;
+  Clamp();
+}
+
+
+void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) {
+  ASSERT(base != 0);
+  ASSERT(power_exponent >= 0);
+  if (power_exponent == 0) {
+    AssignUInt16(1);
+    return;
+  }
+  Zero();
+  int shifts = 0;
+  // We expect base to be in range 2-32, and most often to be 10.
+  // It does not make much sense to implement different algorithms for counting
+  // the bits.
+  while ((base & 1) == 0) {
+    base >>= 1;
+    shifts++;
+  }
+  int bit_size = 0;
+  int tmp_base = base;
+  while (tmp_base != 0) {
+    tmp_base >>= 1;
+    bit_size++;
+  }
+  int final_size = bit_size * power_exponent;
+  // 1 extra bigit for the shifting, and one for rounded final_size.
+  EnsureCapacity(final_size / kBigitSize + 2);
+
+  // Left to Right exponentiation.
+  int mask = 1;
+  while (power_exponent >= mask) mask <<= 1;
+
+  // The mask is now pointing to the bit above the most significant 1-bit of
+  // power_exponent.
+  // Get rid of first 1-bit;
+  mask >>= 2;
+  uint64_t this_value = base;
+
   bool delayed_multiplication = false;
-  const uint64_t max_32bits = 0xFFFFFFFF; 
-  while (mask != 0 && this_value <= max_32bits) { 
-    this_value = this_value * this_value; 
-    // Verify that there is enough space in this_value to perform the 
-    // multiplication.  The first bit_size bits must be 0. 
-    if ((power_exponent & mask) != 0) { 
+  const uint64_t max_32bits = 0xFFFFFFFF;
+  while (mask != 0 && this_value <= max_32bits) {
+    this_value = this_value * this_value;
+    // Verify that there is enough space in this_value to perform the
+    // multiplication.  The first bit_size bits must be 0.
+    if ((power_exponent & mask) != 0) {
       ASSERT(bit_size > 0);
-      uint64_t base_bits_mask = 
-          ~((static_cast<uint64_t>(1) << (64 - bit_size)) - 1); 
-      bool high_bits_zero = (this_value & base_bits_mask) == 0; 
-      if (high_bits_zero) { 
-        this_value *= base; 
-      } else { 
+      uint64_t base_bits_mask =
+          ~((static_cast<uint64_t>(1) << (64 - bit_size)) - 1);
+      bool high_bits_zero = (this_value & base_bits_mask) == 0;
+      if (high_bits_zero) {
+        this_value *= base;
+      } else {
         delayed_multiplication = true;
-      } 
-    } 
-    mask >>= 1; 
-  } 
-  AssignUInt64(this_value); 
+      }
+    }
+    mask >>= 1;
+  }
+  AssignUInt64(this_value);
   if (delayed_multiplication) {
-    MultiplyByUInt32(base); 
-  } 
- 
-  // Now do the same thing as a bignum. 
-  while (mask != 0) { 
-    Square(); 
-    if ((power_exponent & mask) != 0) { 
-      MultiplyByUInt32(base); 
-    } 
-    mask >>= 1; 
-  } 
- 
-  // And finally add the saved shifts. 
-  ShiftLeft(shifts * power_exponent); 
-} 
- 
- 
-// Precondition: this/other < 16bit. 
-uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { 
-  ASSERT(IsClamped()); 
-  ASSERT(other.IsClamped()); 
-  ASSERT(other.used_digits_ > 0); 
- 
-  // Easy case: if we have less digits than the divisor than the result is 0. 
-  // Note: this handles the case where this == 0, too. 
-  if (BigitLength() < other.BigitLength()) { 
-    return 0; 
-  } 
- 
-  Align(other); 
- 
-  uint16_t result = 0; 
- 
-  // Start by removing multiples of 'other' until both numbers have the same 
-  // number of digits. 
-  while (BigitLength() > other.BigitLength()) { 
-    // This naive approach is extremely inefficient if `this` divided by other 
-    // is big. This function is implemented for doubleToString where 
-    // the result should be small (less than 10). 
-    ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); 
-    ASSERT(bigits_[used_digits_ - 1] < 0x10000); 
-    // Remove the multiples of the first digit. 
-    // Example this = 23 and other equals 9. -> Remove 2 multiples. 
-    result += static_cast<uint16_t>(bigits_[used_digits_ - 1]); 
-    SubtractTimes(other, bigits_[used_digits_ - 1]); 
-  } 
- 
-  ASSERT(BigitLength() == other.BigitLength()); 
- 
-  // Both bignums are at the same length now. 
-  // Since other has more than 0 digits we know that the access to 
-  // bigits_[used_digits_ - 1] is safe. 
-  Chunk this_bigit = bigits_[used_digits_ - 1]; 
-  Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; 
- 
-  if (other.used_digits_ == 1) { 
-    // Shortcut for easy (and common) case. 
-    int quotient = this_bigit / other_bigit; 
-    bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; 
-    ASSERT(quotient < 0x10000); 
-    result += static_cast<uint16_t>(quotient); 
-    Clamp(); 
-    return result; 
-  } 
- 
-  int division_estimate = this_bigit / (other_bigit + 1); 
-  ASSERT(division_estimate < 0x10000); 
-  result += static_cast<uint16_t>(division_estimate); 
-  SubtractTimes(other, division_estimate); 
- 
-  if (other_bigit * (division_estimate + 1) > this_bigit) { 
-    // No need to even try to subtract. Even if other's remaining digits were 0 
-    // another subtraction would be too much. 
-    return result; 
-  } 
- 
-  while (LessEqual(other, *this)) { 
-    SubtractBignum(other); 
-    result++; 
-  } 
-  return result; 
-} 
- 
- 
-template<typename S> 
-static int SizeInHexChars(S number) { 
-  ASSERT(number > 0); 
-  int result = 0; 
-  while (number != 0) { 
-    number >>= 4; 
-    result++; 
-  } 
-  return result; 
-} 
- 
- 
-static char HexCharOfValue(int value) { 
-  ASSERT(0 <= value && value <= 16); 
-  if (value < 10) return static_cast<char>(value + '0'); 
-  return static_cast<char>(value - 10 + 'A'); 
-} 
- 
- 
-bool Bignum::ToHexString(char* buffer, int buffer_size) const { 
-  ASSERT(IsClamped()); 
-  // Each bigit must be printable as separate hex-character. 
-  ASSERT(kBigitSize % 4 == 0); 
-  const int kHexCharsPerBigit = kBigitSize / 4; 
- 
-  if (used_digits_ == 0) { 
-    if (buffer_size < 2) return false; 
-    buffer[0] = '0'; 
-    buffer[1] = '\0'; 
-    return true; 
-  } 
-  // We add 1 for the terminating '\0' character. 
-  int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + 
-      SizeInHexChars(bigits_[used_digits_ - 1]) + 1; 
-  if (needed_chars > buffer_size) return false; 
-  int string_index = needed_chars - 1; 
-  buffer[string_index--] = '\0'; 
-  for (int i = 0; i < exponent_; ++i) { 
-    for (int j = 0; j < kHexCharsPerBigit; ++j) { 
-      buffer[string_index--] = '0'; 
-    } 
-  } 
-  for (int i = 0; i < used_digits_ - 1; ++i) { 
-    Chunk current_bigit = bigits_[i]; 
-    for (int j = 0; j < kHexCharsPerBigit; ++j) { 
-      buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); 
-      current_bigit >>= 4; 
-    } 
-  } 
-  // And finally the last bigit. 
-  Chunk most_significant_bigit = bigits_[used_digits_ - 1]; 
-  while (most_significant_bigit != 0) { 
-    buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); 
-    most_significant_bigit >>= 4; 
-  } 
-  return true; 
-} 
- 
- 
-Bignum::Chunk Bignum::BigitAt(int index) const { 
-  if (index >= BigitLength()) return 0; 
-  if (index < exponent_) return 0; 
-  return bigits_[index - exponent_]; 
-} 
- 
- 
-int Bignum::Compare(const Bignum& a, const Bignum& b) { 
-  ASSERT(a.IsClamped()); 
-  ASSERT(b.IsClamped()); 
-  int bigit_length_a = a.BigitLength(); 
-  int bigit_length_b = b.BigitLength(); 
-  if (bigit_length_a < bigit_length_b) return -1; 
-  if (bigit_length_a > bigit_length_b) return +1; 
-  for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { 
-    Chunk bigit_a = a.BigitAt(i); 
-    Chunk bigit_b = b.BigitAt(i); 
-    if (bigit_a < bigit_b) return -1; 
-    if (bigit_a > bigit_b) return +1; 
-    // Otherwise they are equal up to this digit. Try the next digit. 
-  } 
-  return 0; 
-} 
- 
- 
-int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { 
-  ASSERT(a.IsClamped()); 
-  ASSERT(b.IsClamped()); 
-  ASSERT(c.IsClamped()); 
-  if (a.BigitLength() < b.BigitLength()) { 
-    return PlusCompare(b, a, c); 
-  } 
-  if (a.BigitLength() + 1 < c.BigitLength()) return -1; 
-  if (a.BigitLength() > c.BigitLength()) return +1; 
-  // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than 
-  // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one 
-  // of 'a'. 
-  if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { 
-    return -1; 
-  } 
- 
-  Chunk borrow = 0; 
-  // Starting at min_exponent all digits are == 0. So no need to compare them. 
-  int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); 
-  for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { 
-    Chunk chunk_a = a.BigitAt(i); 
-    Chunk chunk_b = b.BigitAt(i); 
-    Chunk chunk_c = c.BigitAt(i); 
-    Chunk sum = chunk_a + chunk_b; 
-    if (sum > chunk_c + borrow) { 
-      return +1; 
-    } else { 
-      borrow = chunk_c + borrow - sum; 
-      if (borrow > 1) return -1; 
-      borrow <<= kBigitSize; 
-    } 
-  } 
-  if (borrow == 0) return 0; 
-  return -1; 
-} 
- 
- 
-void Bignum::Clamp() { 
-  while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { 
-    used_digits_--; 
-  } 
-  if (used_digits_ == 0) { 
-    // Zero. 
-    exponent_ = 0; 
-  } 
-} 
- 
- 
-bool Bignum::IsClamped() const { 
-  return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; 
-} 
- 
- 
-void Bignum::Zero() { 
-  for (int i = 0; i < used_digits_; ++i) { 
-    bigits_[i] = 0; 
-  } 
-  used_digits_ = 0; 
-  exponent_ = 0; 
-} 
- 
- 
-void Bignum::Align(const Bignum& other) { 
-  if (exponent_ > other.exponent_) { 
-    // If "X" represents a "hidden" digit (by the exponent) then we are in the 
-    // following case (a == this, b == other): 
-    // a:  aaaaaaXXXX   or a:   aaaaaXXX 
-    // b:     bbbbbbX      b: bbbbbbbbXX 
-    // We replace some of the hidden digits (X) of a with 0 digits. 
-    // a:  aaaaaa000X   or a:   aaaaa0XX 
-    int zero_digits = exponent_ - other.exponent_; 
-    EnsureCapacity(used_digits_ + zero_digits); 
-    for (int i = used_digits_ - 1; i >= 0; --i) { 
-      bigits_[i + zero_digits] = bigits_[i]; 
-    } 
-    for (int i = 0; i < zero_digits; ++i) { 
-      bigits_[i] = 0; 
-    } 
-    used_digits_ += zero_digits; 
-    exponent_ -= zero_digits; 
-    ASSERT(used_digits_ >= 0); 
-    ASSERT(exponent_ >= 0); 
-  } 
-} 
- 
- 
-void Bignum::BigitsShiftLeft(int shift_amount) { 
-  ASSERT(shift_amount < kBigitSize); 
-  ASSERT(shift_amount >= 0); 
-  Chunk carry = 0; 
-  for (int i = 0; i < used_digits_; ++i) { 
-    Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); 
-    bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; 
-    carry = new_carry; 
-  } 
-  if (carry != 0) { 
-    bigits_[used_digits_] = carry; 
-    used_digits_++; 
-  } 
-} 
- 
- 
-void Bignum::SubtractTimes(const Bignum& other, int factor) { 
-  ASSERT(exponent_ <= other.exponent_); 
-  if (factor < 3) { 
-    for (int i = 0; i < factor; ++i) { 
-      SubtractBignum(other); 
-    } 
-    return; 
-  } 
-  Chunk borrow = 0; 
-  int exponent_diff = other.exponent_ - exponent_; 
-  for (int i = 0; i < other.used_digits_; ++i) { 
-    DoubleChunk product = static_cast<DoubleChunk>(factor) * other.bigits_[i]; 
-    DoubleChunk remove = borrow + product; 
-    Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); 
-    bigits_[i + exponent_diff] = difference & kBigitMask; 
-    borrow = static_cast<Chunk>((difference >> (kChunkSize - 1)) + 
-                                (remove >> kBigitSize)); 
-  } 
-  for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { 
-    if (borrow == 0) return; 
-    Chunk difference = bigits_[i] - borrow; 
-    bigits_[i] = difference & kBigitMask; 
-    borrow = difference >> (kChunkSize - 1); 
-  } 
-  Clamp(); 
-} 
- 
- 
-}  // namespace double_conversion 
+    MultiplyByUInt32(base);
+  }
+
+  // Now do the same thing as a bignum.
+  while (mask != 0) {
+    Square();
+    if ((power_exponent & mask) != 0) {
+      MultiplyByUInt32(base);
+    }
+    mask >>= 1;
+  }
+
+  // And finally add the saved shifts.
+  ShiftLeft(shifts * power_exponent);
+}
+
+
+// Precondition: this/other < 16bit.
+uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+  ASSERT(other.used_digits_ > 0);
+
+  // Easy case: if we have less digits than the divisor than the result is 0.
+  // Note: this handles the case where this == 0, too.
+  if (BigitLength() < other.BigitLength()) {
+    return 0;
+  }
+
+  Align(other);
+
+  uint16_t result = 0;
+
+  // Start by removing multiples of 'other' until both numbers have the same
+  // number of digits.
+  while (BigitLength() > other.BigitLength()) {
+    // This naive approach is extremely inefficient if `this` divided by other
+    // is big. This function is implemented for doubleToString where
+    // the result should be small (less than 10).
+    ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16));
+    ASSERT(bigits_[used_digits_ - 1] < 0x10000);
+    // Remove the multiples of the first digit.
+    // Example this = 23 and other equals 9. -> Remove 2 multiples.
+    result += static_cast<uint16_t>(bigits_[used_digits_ - 1]);
+    SubtractTimes(other, bigits_[used_digits_ - 1]);
+  }
+
+  ASSERT(BigitLength() == other.BigitLength());
+
+  // Both bignums are at the same length now.
+  // Since other has more than 0 digits we know that the access to
+  // bigits_[used_digits_ - 1] is safe.
+  Chunk this_bigit = bigits_[used_digits_ - 1];
+  Chunk other_bigit = other.bigits_[other.used_digits_ - 1];
+
+  if (other.used_digits_ == 1) {
+    // Shortcut for easy (and common) case.
+    int quotient = this_bigit / other_bigit;
+    bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient;
+    ASSERT(quotient < 0x10000);
+    result += static_cast<uint16_t>(quotient);
+    Clamp();
+    return result;
+  }
+
+  int division_estimate = this_bigit / (other_bigit + 1);
+  ASSERT(division_estimate < 0x10000);
+  result += static_cast<uint16_t>(division_estimate);
+  SubtractTimes(other, division_estimate);
+
+  if (other_bigit * (division_estimate + 1) > this_bigit) {
+    // No need to even try to subtract. Even if other's remaining digits were 0
+    // another subtraction would be too much.
+    return result;
+  }
+
+  while (LessEqual(other, *this)) {
+    SubtractBignum(other);
+    result++;
+  }
+  return result;
+}
+
+
+template<typename S>
+static int SizeInHexChars(S number) {
+  ASSERT(number > 0);
+  int result = 0;
+  while (number != 0) {
+    number >>= 4;
+    result++;
+  }
+  return result;
+}
+
+
+static char HexCharOfValue(int value) {
+  ASSERT(0 <= value && value <= 16);
+  if (value < 10) return static_cast<char>(value + '0');
+  return static_cast<char>(value - 10 + 'A');
+}
+
+
+bool Bignum::ToHexString(char* buffer, int buffer_size) const {
+  ASSERT(IsClamped());
+  // Each bigit must be printable as separate hex-character.
+  ASSERT(kBigitSize % 4 == 0);
+  const int kHexCharsPerBigit = kBigitSize / 4;
+
+  if (used_digits_ == 0) {
+    if (buffer_size < 2) return false;
+    buffer[0] = '0';
+    buffer[1] = '\0';
+    return true;
+  }
+  // We add 1 for the terminating '\0' character.
+  int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit +
+      SizeInHexChars(bigits_[used_digits_ - 1]) + 1;
+  if (needed_chars > buffer_size) return false;
+  int string_index = needed_chars - 1;
+  buffer[string_index--] = '\0';
+  for (int i = 0; i < exponent_; ++i) {
+    for (int j = 0; j < kHexCharsPerBigit; ++j) {
+      buffer[string_index--] = '0';
+    }
+  }
+  for (int i = 0; i < used_digits_ - 1; ++i) {
+    Chunk current_bigit = bigits_[i];
+    for (int j = 0; j < kHexCharsPerBigit; ++j) {
+      buffer[string_index--] = HexCharOfValue(current_bigit & 0xF);
+      current_bigit >>= 4;
+    }
+  }
+  // And finally the last bigit.
+  Chunk most_significant_bigit = bigits_[used_digits_ - 1];
+  while (most_significant_bigit != 0) {
+    buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF);
+    most_significant_bigit >>= 4;
+  }
+  return true;
+}
+
+
+Bignum::Chunk Bignum::BigitAt(int index) const {
+  if (index >= BigitLength()) return 0;
+  if (index < exponent_) return 0;
+  return bigits_[index - exponent_];
+}
+
+
+int Bignum::Compare(const Bignum& a, const Bignum& b) {
+  ASSERT(a.IsClamped());
+  ASSERT(b.IsClamped());
+  int bigit_length_a = a.BigitLength();
+  int bigit_length_b = b.BigitLength();
+  if (bigit_length_a < bigit_length_b) return -1;
+  if (bigit_length_a > bigit_length_b) return +1;
+  for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) {
+    Chunk bigit_a = a.BigitAt(i);
+    Chunk bigit_b = b.BigitAt(i);
+    if (bigit_a < bigit_b) return -1;
+    if (bigit_a > bigit_b) return +1;
+    // Otherwise they are equal up to this digit. Try the next digit.
+  }
+  return 0;
+}
+
+
+int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) {
+  ASSERT(a.IsClamped());
+  ASSERT(b.IsClamped());
+  ASSERT(c.IsClamped());
+  if (a.BigitLength() < b.BigitLength()) {
+    return PlusCompare(b, a, c);
+  }
+  if (a.BigitLength() + 1 < c.BigitLength()) return -1;
+  if (a.BigitLength() > c.BigitLength()) return +1;
+  // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than
+  // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one
+  // of 'a'.
+  if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) {
+    return -1;
+  }
+
+  Chunk borrow = 0;
+  // Starting at min_exponent all digits are == 0. So no need to compare them.
+  int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_);
+  for (int i = c.BigitLength() - 1; i >= min_exponent; --i) {
+    Chunk chunk_a = a.BigitAt(i);
+    Chunk chunk_b = b.BigitAt(i);
+    Chunk chunk_c = c.BigitAt(i);
+    Chunk sum = chunk_a + chunk_b;
+    if (sum > chunk_c + borrow) {
+      return +1;
+    } else {
+      borrow = chunk_c + borrow - sum;
+      if (borrow > 1) return -1;
+      borrow <<= kBigitSize;
+    }
+  }
+  if (borrow == 0) return 0;
+  return -1;
+}
+
+
+void Bignum::Clamp() {
+  while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) {
+    used_digits_--;
+  }
+  if (used_digits_ == 0) {
+    // Zero.
+    exponent_ = 0;
+  }
+}
+
+
+bool Bignum::IsClamped() const {
+  return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0;
+}
+
+
+void Bignum::Zero() {
+  for (int i = 0; i < used_digits_; ++i) {
+    bigits_[i] = 0;
+  }
+  used_digits_ = 0;
+  exponent_ = 0;
+}
+
+
+void Bignum::Align(const Bignum& other) {
+  if (exponent_ > other.exponent_) {
+    // If "X" represents a "hidden" digit (by the exponent) then we are in the
+    // following case (a == this, b == other):
+    // a:  aaaaaaXXXX   or a:   aaaaaXXX
+    // b:     bbbbbbX      b: bbbbbbbbXX
+    // We replace some of the hidden digits (X) of a with 0 digits.
+    // a:  aaaaaa000X   or a:   aaaaa0XX
+    int zero_digits = exponent_ - other.exponent_;
+    EnsureCapacity(used_digits_ + zero_digits);
+    for (int i = used_digits_ - 1; i >= 0; --i) {
+      bigits_[i + zero_digits] = bigits_[i];
+    }
+    for (int i = 0; i < zero_digits; ++i) {
+      bigits_[i] = 0;
+    }
+    used_digits_ += zero_digits;
+    exponent_ -= zero_digits;
+    ASSERT(used_digits_ >= 0);
+    ASSERT(exponent_ >= 0);
+  }
+}
+
+
+void Bignum::BigitsShiftLeft(int shift_amount) {
+  ASSERT(shift_amount < kBigitSize);
+  ASSERT(shift_amount >= 0);
+  Chunk carry = 0;
+  for (int i = 0; i < used_digits_; ++i) {
+    Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount);
+    bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask;
+    carry = new_carry;
+  }
+  if (carry != 0) {
+    bigits_[used_digits_] = carry;
+    used_digits_++;
+  }
+}
+
+
+void Bignum::SubtractTimes(const Bignum& other, int factor) {
+  ASSERT(exponent_ <= other.exponent_);
+  if (factor < 3) {
+    for (int i = 0; i < factor; ++i) {
+      SubtractBignum(other);
+    }
+    return;
+  }
+  Chunk borrow = 0;
+  int exponent_diff = other.exponent_ - exponent_;
+  for (int i = 0; i < other.used_digits_; ++i) {
+    DoubleChunk product = static_cast<DoubleChunk>(factor) * other.bigits_[i];
+    DoubleChunk remove = borrow + product;
+    Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask);
+    bigits_[i + exponent_diff] = difference & kBigitMask;
+    borrow = static_cast<Chunk>((difference >> (kChunkSize - 1)) +
+                                (remove >> kBigitSize));
+  }
+  for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) {
+    if (borrow == 0) return;
+    Chunk difference = bigits_[i] - borrow;
+    bigits_[i] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+  }
+  Clamp();
+}
+
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/bignum.h b/contrib/libs/double-conversion/bignum.h
index eabfd1d7fd..7c289fa2f6 100644
--- a/contrib/libs/double-conversion/bignum.h
+++ b/contrib/libs/double-conversion/bignum.h
@@ -1,144 +1,144 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_BIGNUM_H_ 
-#define DOUBLE_CONVERSION_BIGNUM_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-class Bignum { 
- public: 
-  // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. 
-  // This bignum can encode much bigger numbers, since it contains an 
-  // exponent. 
-  static const int kMaxSignificantBits = 3584; 
- 
-  Bignum(); 
-  void AssignUInt16(uint16_t value); 
-  void AssignUInt64(uint64_t value); 
-  void AssignBignum(const Bignum& other); 
- 
-  void AssignDecimalString(Vector<const char> value); 
-  void AssignHexString(Vector<const char> value); 
- 
-  void AssignPowerUInt16(uint16_t base, int exponent); 
- 
-  void AddUInt64(uint64_t operand); 
-  void AddBignum(const Bignum& other); 
-  // Precondition: this >= other. 
-  void SubtractBignum(const Bignum& other); 
- 
-  void Square(); 
-  void ShiftLeft(int shift_amount); 
-  void MultiplyByUInt32(uint32_t factor); 
-  void MultiplyByUInt64(uint64_t factor); 
-  void MultiplyByPowerOfTen(int exponent); 
-  void Times10() { return MultiplyByUInt32(10); } 
-  // Pseudocode: 
-  //  int result = this / other; 
-  //  this = this % other; 
-  // In the worst case this function is in O(this/other). 
-  uint16_t DivideModuloIntBignum(const Bignum& other); 
- 
-  bool ToHexString(char* buffer, int buffer_size) const; 
- 
-  // Returns 
-  //  -1 if a < b, 
-  //   0 if a == b, and 
-  //  +1 if a > b. 
-  static int Compare(const Bignum& a, const Bignum& b); 
-  static bool Equal(const Bignum& a, const Bignum& b) { 
-    return Compare(a, b) == 0; 
-  } 
-  static bool LessEqual(const Bignum& a, const Bignum& b) { 
-    return Compare(a, b) <= 0; 
-  } 
-  static bool Less(const Bignum& a, const Bignum& b) { 
-    return Compare(a, b) < 0; 
-  } 
-  // Returns Compare(a + b, c); 
-  static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c); 
-  // Returns a + b == c 
-  static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) { 
-    return PlusCompare(a, b, c) == 0; 
-  } 
-  // Returns a + b <= c 
-  static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) { 
-    return PlusCompare(a, b, c) <= 0; 
-  } 
-  // Returns a + b < c 
-  static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) { 
-    return PlusCompare(a, b, c) < 0; 
-  } 
- private: 
-  typedef uint32_t Chunk; 
-  typedef uint64_t DoubleChunk; 
- 
-  static const int kChunkSize = sizeof(Chunk) * 8; 
-  static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; 
-  // With bigit size of 28 we loose some bits, but a double still fits easily 
-  // into two chunks, and more importantly we can use the Comba multiplication. 
-  static const int kBigitSize = 28; 
-  static const Chunk kBigitMask = (1 << kBigitSize) - 1; 
-  // Every instance allocates kBigitLength chunks on the stack. Bignums cannot 
-  // grow. There are no checks if the stack-allocated space is sufficient. 
-  static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; 
- 
-  void EnsureCapacity(int size) { 
-    if (size > kBigitCapacity) { 
-      UNREACHABLE(); 
-    } 
-  } 
-  void Align(const Bignum& other); 
-  void Clamp(); 
-  bool IsClamped() const; 
-  void Zero(); 
-  // Requires this to have enough capacity (no tests done). 
-  // Updates used_digits_ if necessary. 
-  // shift_amount must be < kBigitSize. 
-  void BigitsShiftLeft(int shift_amount); 
-  // BigitLength includes the "hidden" digits encoded in the exponent. 
-  int BigitLength() const { return used_digits_ + exponent_; } 
-  Chunk BigitAt(int index) const; 
-  void SubtractTimes(const Bignum& other, int factor); 
- 
-  Chunk bigits_buffer_[kBigitCapacity]; 
-  // A vector backed by bigits_buffer_. This way accesses to the array are 
-  // checked for out-of-bounds errors. 
-  Vector<Chunk> bigits_; 
-  int used_digits_; 
-  // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). 
-  int exponent_; 
- 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_BIGNUM_H_
+#define DOUBLE_CONVERSION_BIGNUM_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+class Bignum {
+ public:
+  // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately.
+  // This bignum can encode much bigger numbers, since it contains an
+  // exponent.
+  static const int kMaxSignificantBits = 3584;
+
+  Bignum();
+  void AssignUInt16(uint16_t value);
+  void AssignUInt64(uint64_t value);
+  void AssignBignum(const Bignum& other);
+
+  void AssignDecimalString(Vector<const char> value);
+  void AssignHexString(Vector<const char> value);
+
+  void AssignPowerUInt16(uint16_t base, int exponent);
+
+  void AddUInt64(uint64_t operand);
+  void AddBignum(const Bignum& other);
+  // Precondition: this >= other.
+  void SubtractBignum(const Bignum& other);
+
+  void Square();
+  void ShiftLeft(int shift_amount);
+  void MultiplyByUInt32(uint32_t factor);
+  void MultiplyByUInt64(uint64_t factor);
+  void MultiplyByPowerOfTen(int exponent);
+  void Times10() { return MultiplyByUInt32(10); }
+  // Pseudocode:
+  //  int result = this / other;
+  //  this = this % other;
+  // In the worst case this function is in O(this/other).
+  uint16_t DivideModuloIntBignum(const Bignum& other);
+
+  bool ToHexString(char* buffer, int buffer_size) const;
+
+  // Returns
+  //  -1 if a < b,
+  //   0 if a == b, and
+  //  +1 if a > b.
+  static int Compare(const Bignum& a, const Bignum& b);
+  static bool Equal(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) == 0;
+  }
+  static bool LessEqual(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) <= 0;
+  }
+  static bool Less(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) < 0;
+  }
+  // Returns Compare(a + b, c);
+  static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c);
+  // Returns a + b == c
+  static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) == 0;
+  }
+  // Returns a + b <= c
+  static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) <= 0;
+  }
+  // Returns a + b < c
+  static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) < 0;
+  }
+ private:
+  typedef uint32_t Chunk;
+  typedef uint64_t DoubleChunk;
+
+  static const int kChunkSize = sizeof(Chunk) * 8;
+  static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8;
+  // With bigit size of 28 we loose some bits, but a double still fits easily
+  // into two chunks, and more importantly we can use the Comba multiplication.
+  static const int kBigitSize = 28;
+  static const Chunk kBigitMask = (1 << kBigitSize) - 1;
+  // Every instance allocates kBigitLength chunks on the stack. Bignums cannot
+  // grow. There are no checks if the stack-allocated space is sufficient.
+  static const int kBigitCapacity = kMaxSignificantBits / kBigitSize;
+
+  void EnsureCapacity(int size) {
+    if (size > kBigitCapacity) {
+      UNREACHABLE();
+    }
+  }
+  void Align(const Bignum& other);
+  void Clamp();
+  bool IsClamped() const;
+  void Zero();
+  // Requires this to have enough capacity (no tests done).
+  // Updates used_digits_ if necessary.
+  // shift_amount must be < kBigitSize.
+  void BigitsShiftLeft(int shift_amount);
+  // BigitLength includes the "hidden" digits encoded in the exponent.
+  int BigitLength() const { return used_digits_ + exponent_; }
+  Chunk BigitAt(int index) const;
+  void SubtractTimes(const Bignum& other, int factor);
+
+  Chunk bigits_buffer_[kBigitCapacity];
+  // A vector backed by bigits_buffer_. This way accesses to the array are
+  // checked for out-of-bounds errors.
+  Vector<Chunk> bigits_;
+  int used_digits_;
+  // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize).
+  int exponent_;
+
   DC_DISALLOW_COPY_AND_ASSIGN(Bignum);
-}; 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_BIGNUM_H_ 
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_BIGNUM_H_
diff --git a/contrib/libs/double-conversion/cached-powers.cc b/contrib/libs/double-conversion/cached-powers.cc
index bec831d863..8ab281a1ba 100644
--- a/contrib/libs/double-conversion/cached-powers.cc
+++ b/contrib/libs/double-conversion/cached-powers.cc
@@ -1,175 +1,175 @@
-// Copyright 2006-2008 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
+// Copyright 2006-2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 #include <climits>
 #include <cmath>
 #include <cstdarg>
- 
-#include "utils.h" 
- 
-#include "cached-powers.h" 
- 
-namespace double_conversion { 
- 
-struct CachedPower { 
-  uint64_t significand; 
-  int16_t binary_exponent; 
-  int16_t decimal_exponent; 
-}; 
- 
-static const CachedPower kCachedPowers[] = { 
-  {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, 
-  {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, 
-  {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, 
-  {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, 
-  {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, 
-  {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, 
-  {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, 
-  {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, 
-  {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, 
-  {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, 
-  {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, 
-  {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, 
-  {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, 
-  {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, 
-  {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, 
-  {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, 
-  {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, 
-  {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, 
-  {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, 
-  {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, 
-  {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, 
-  {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, 
-  {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, 
-  {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, 
-  {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, 
-  {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, 
-  {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, 
-  {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, 
-  {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, 
-  {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, 
-  {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, 
-  {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, 
-  {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, 
-  {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, 
-  {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, 
-  {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, 
-  {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, 
-  {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, 
-  {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, 
-  {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, 
-  {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, 
-  {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, 
-  {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, 
-  {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, 
-  {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, 
-  {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, 
-  {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, 
-  {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, 
-  {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, 
-  {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, 
-  {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, 
-  {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, 
-  {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, 
-  {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, 
-  {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, 
-  {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, 
-  {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, 
-  {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, 
-  {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, 
-  {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, 
-  {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, 
-  {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, 
-  {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, 
-  {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, 
-  {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, 
-  {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, 
-  {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, 
-  {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, 
-  {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, 
-  {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, 
-  {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, 
-  {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, 
-  {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, 
-  {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, 
-  {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, 
-  {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, 
-  {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, 
-  {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, 
-  {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, 
-  {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, 
-  {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, 
-  {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, 
-  {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, 
-  {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, 
-  {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, 
-  {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, 
-  {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, 
-}; 
- 
-static const int kCachedPowersOffset = 348;  // -1 * the first decimal_exponent. 
-static const double kD_1_LOG2_10 = 0.30102999566398114;  //  1 / lg(10) 
-// Difference between the decimal exponents in the table above. 
-const int PowersOfTenCache::kDecimalExponentDistance = 8; 
-const int PowersOfTenCache::kMinDecimalExponent = -348; 
-const int PowersOfTenCache::kMaxDecimalExponent = 340; 
- 
-void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( 
-    int min_exponent, 
-    int max_exponent, 
-    DiyFp* power, 
-    int* decimal_exponent) { 
-  int kQ = DiyFp::kSignificandSize; 
-  double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); 
-  int foo = kCachedPowersOffset; 
-  int index = 
-      (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1; 
-  ASSERT(0 <= index && index < static_cast<int>(ARRAY_SIZE(kCachedPowers))); 
-  CachedPower cached_power = kCachedPowers[index]; 
-  ASSERT(min_exponent <= cached_power.binary_exponent); 
-  (void) max_exponent;  // Mark variable as used. 
-  ASSERT(cached_power.binary_exponent <= max_exponent); 
-  *decimal_exponent = cached_power.decimal_exponent; 
-  *power = DiyFp(cached_power.significand, cached_power.binary_exponent); 
-} 
- 
- 
-void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, 
-                                                        DiyFp* power, 
-                                                        int* found_exponent) { 
-  ASSERT(kMinDecimalExponent <= requested_exponent); 
-  ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); 
-  int index = 
-      (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; 
-  CachedPower cached_power = kCachedPowers[index]; 
-  *power = DiyFp(cached_power.significand, cached_power.binary_exponent); 
-  *found_exponent = cached_power.decimal_exponent; 
-  ASSERT(*found_exponent <= requested_exponent); 
-  ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); 
-} 
- 
-}  // namespace double_conversion 
+
+#include "utils.h"
+
+#include "cached-powers.h"
+
+namespace double_conversion {
+
+struct CachedPower {
+  uint64_t significand;
+  int16_t binary_exponent;
+  int16_t decimal_exponent;
+};
+
+static const CachedPower kCachedPowers[] = {
+  {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348},
+  {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340},
+  {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332},
+  {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324},
+  {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316},
+  {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308},
+  {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300},
+  {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292},
+  {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284},
+  {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276},
+  {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268},
+  {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260},
+  {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252},
+  {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244},
+  {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236},
+  {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228},
+  {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220},
+  {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212},
+  {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204},
+  {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196},
+  {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188},
+  {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180},
+  {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172},
+  {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164},
+  {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156},
+  {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148},
+  {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140},
+  {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132},
+  {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124},
+  {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116},
+  {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108},
+  {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100},
+  {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92},
+  {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84},
+  {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76},
+  {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68},
+  {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60},
+  {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52},
+  {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44},
+  {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36},
+  {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28},
+  {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20},
+  {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12},
+  {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4},
+  {UINT64_2PART_C(0x9c400000, 00000000), -50, 4},
+  {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12},
+  {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20},
+  {UINT64_2PART_C(0x813f3978, f8940984), 30, 28},
+  {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36},
+  {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44},
+  {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52},
+  {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60},
+  {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68},
+  {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76},
+  {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84},
+  {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92},
+  {UINT64_2PART_C(0x924d692c, a61be758), 269, 100},
+  {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108},
+  {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116},
+  {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124},
+  {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132},
+  {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140},
+  {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148},
+  {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156},
+  {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164},
+  {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172},
+  {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180},
+  {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188},
+  {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196},
+  {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204},
+  {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212},
+  {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220},
+  {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228},
+  {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236},
+  {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244},
+  {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252},
+  {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260},
+  {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268},
+  {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276},
+  {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284},
+  {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292},
+  {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300},
+  {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308},
+  {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316},
+  {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324},
+  {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332},
+  {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
+};
+
+static const int kCachedPowersOffset = 348;  // -1 * the first decimal_exponent.
+static const double kD_1_LOG2_10 = 0.30102999566398114;  //  1 / lg(10)
+// Difference between the decimal exponents in the table above.
+const int PowersOfTenCache::kDecimalExponentDistance = 8;
+const int PowersOfTenCache::kMinDecimalExponent = -348;
+const int PowersOfTenCache::kMaxDecimalExponent = 340;
+
+void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+    int min_exponent,
+    int max_exponent,
+    DiyFp* power,
+    int* decimal_exponent) {
+  int kQ = DiyFp::kSignificandSize;
+  double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10);
+  int foo = kCachedPowersOffset;
+  int index =
+      (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
+  ASSERT(0 <= index && index < static_cast<int>(ARRAY_SIZE(kCachedPowers)));
+  CachedPower cached_power = kCachedPowers[index];
+  ASSERT(min_exponent <= cached_power.binary_exponent);
+  (void) max_exponent;  // Mark variable as used.
+  ASSERT(cached_power.binary_exponent <= max_exponent);
+  *decimal_exponent = cached_power.decimal_exponent;
+  *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+}
+
+
+void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent,
+                                                        DiyFp* power,
+                                                        int* found_exponent) {
+  ASSERT(kMinDecimalExponent <= requested_exponent);
+  ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance);
+  int index =
+      (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance;
+  CachedPower cached_power = kCachedPowers[index];
+  *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+  *found_exponent = cached_power.decimal_exponent;
+  ASSERT(*found_exponent <= requested_exponent);
+  ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance);
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/cached-powers.h b/contrib/libs/double-conversion/cached-powers.h
index b37a93d082..61a50614cf 100644
--- a/contrib/libs/double-conversion/cached-powers.h
+++ b/contrib/libs/double-conversion/cached-powers.h
@@ -1,64 +1,64 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ 
-#define DOUBLE_CONVERSION_CACHED_POWERS_H_ 
- 
-#include "diy-fp.h" 
- 
-namespace double_conversion { 
- 
-class PowersOfTenCache { 
- public: 
- 
-  // Not all powers of ten are cached. The decimal exponent of two neighboring 
-  // cached numbers will differ by kDecimalExponentDistance. 
-  static const int kDecimalExponentDistance; 
- 
-  static const int kMinDecimalExponent; 
-  static const int kMaxDecimalExponent; 
- 
-  // Returns a cached power-of-ten with a binary exponent in the range 
-  // [min_exponent; max_exponent] (boundaries included). 
-  static void GetCachedPowerForBinaryExponentRange(int min_exponent, 
-                                                   int max_exponent, 
-                                                   DiyFp* power, 
-                                                   int* decimal_exponent); 
- 
-  // Returns a cached power of ten x ~= 10^k such that 
-  //   k <= decimal_exponent < k + kCachedPowersDecimalDistance. 
-  // The given decimal_exponent must satisfy 
-  //   kMinDecimalExponent <= requested_exponent, and 
-  //   requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. 
-  static void GetCachedPowerForDecimalExponent(int requested_exponent, 
-                                               DiyFp* power, 
-                                               int* found_exponent); 
-}; 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_CACHED_POWERS_H_ 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_
+#define DOUBLE_CONVERSION_CACHED_POWERS_H_
+
+#include "diy-fp.h"
+
+namespace double_conversion {
+
+class PowersOfTenCache {
+ public:
+
+  // Not all powers of ten are cached. The decimal exponent of two neighboring
+  // cached numbers will differ by kDecimalExponentDistance.
+  static const int kDecimalExponentDistance;
+
+  static const int kMinDecimalExponent;
+  static const int kMaxDecimalExponent;
+
+  // Returns a cached power-of-ten with a binary exponent in the range
+  // [min_exponent; max_exponent] (boundaries included).
+  static void GetCachedPowerForBinaryExponentRange(int min_exponent,
+                                                   int max_exponent,
+                                                   DiyFp* power,
+                                                   int* decimal_exponent);
+
+  // Returns a cached power of ten x ~= 10^k such that
+  //   k <= decimal_exponent < k + kCachedPowersDecimalDistance.
+  // The given decimal_exponent must satisfy
+  //   kMinDecimalExponent <= requested_exponent, and
+  //   requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance.
+  static void GetCachedPowerForDecimalExponent(int requested_exponent,
+                                               DiyFp* power,
+                                               int* found_exponent);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_CACHED_POWERS_H_
diff --git a/contrib/libs/double-conversion/diy-fp.cc b/contrib/libs/double-conversion/diy-fp.cc
index ea0b55f716..ddd1891b16 100644
--- a/contrib/libs/double-conversion/diy-fp.cc
+++ b/contrib/libs/double-conversion/diy-fp.cc
@@ -1,57 +1,57 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
- 
-#include "diy-fp.h" 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-void DiyFp::Multiply(const DiyFp& other) { 
-  // Simply "emulates" a 128 bit multiplication. 
-  // However: the resulting number only contains 64 bits. The least 
-  // significant 64 bits are only used for rounding the most significant 64 
-  // bits. 
-  const uint64_t kM32 = 0xFFFFFFFFU; 
-  uint64_t a = f_ >> 32; 
-  uint64_t b = f_ & kM32; 
-  uint64_t c = other.f_ >> 32; 
-  uint64_t d = other.f_ & kM32; 
-  uint64_t ac = a * c; 
-  uint64_t bc = b * c; 
-  uint64_t ad = a * d; 
-  uint64_t bd = b * d; 
-  uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); 
-  // By adding 1U << 31 to tmp we round the final result. 
-  // Halfway cases will be round up. 
-  tmp += 1U << 31; 
-  uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); 
-  e_ += other.e_ + 64; 
-  f_ = result_f; 
-} 
- 
-}  // namespace double_conversion 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#include "diy-fp.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+void DiyFp::Multiply(const DiyFp& other) {
+  // Simply "emulates" a 128 bit multiplication.
+  // However: the resulting number only contains 64 bits. The least
+  // significant 64 bits are only used for rounding the most significant 64
+  // bits.
+  const uint64_t kM32 = 0xFFFFFFFFU;
+  uint64_t a = f_ >> 32;
+  uint64_t b = f_ & kM32;
+  uint64_t c = other.f_ >> 32;
+  uint64_t d = other.f_ & kM32;
+  uint64_t ac = a * c;
+  uint64_t bc = b * c;
+  uint64_t ad = a * d;
+  uint64_t bd = b * d;
+  uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32);
+  // By adding 1U << 31 to tmp we round the final result.
+  // Halfway cases will be round up.
+  tmp += 1U << 31;
+  uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
+  e_ += other.e_ + 64;
+  f_ = result_f;
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/diy-fp.h b/contrib/libs/double-conversion/diy-fp.h
index b88ff7e1fb..2edf34674e 100644
--- a/contrib/libs/double-conversion/diy-fp.h
+++ b/contrib/libs/double-conversion/diy-fp.h
@@ -1,118 +1,118 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_DIY_FP_H_ 
-#define DOUBLE_CONVERSION_DIY_FP_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-// This "Do It Yourself Floating Point" class implements a floating-point number 
-// with a uint64 significand and an int exponent. Normalized DiyFp numbers will 
-// have the most significant bit of the significand set. 
-// Multiplication and Subtraction do not normalize their results. 
-// DiyFp are not designed to contain special doubles (NaN and Infinity). 
-class DiyFp { 
- public: 
-  static const int kSignificandSize = 64; 
- 
-  DiyFp() : f_(0), e_(0) {} 
-  DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {} 
- 
-  // this = this - other. 
-  // The exponents of both numbers must be the same and the significand of this 
-  // must be bigger than the significand of other. 
-  // The result will not be normalized. 
-  void Subtract(const DiyFp& other) { 
-    ASSERT(e_ == other.e_); 
-    ASSERT(f_ >= other.f_); 
-    f_ -= other.f_; 
-  } 
- 
-  // Returns a - b. 
-  // The exponents of both numbers must be the same and this must be bigger 
-  // than other. The result will not be normalized. 
-  static DiyFp Minus(const DiyFp& a, const DiyFp& b) { 
-    DiyFp result = a; 
-    result.Subtract(b); 
-    return result; 
-  } 
- 
- 
-  // this = this * other. 
-  void Multiply(const DiyFp& other); 
- 
-  // returns a * b; 
-  static DiyFp Times(const DiyFp& a, const DiyFp& b) { 
-    DiyFp result = a; 
-    result.Multiply(b); 
-    return result; 
-  } 
- 
-  void Normalize() { 
-    ASSERT(f_ != 0); 
-    uint64_t significand = f_; 
-    int exponent = e_; 
- 
-    // This method is mainly called for normalizing boundaries. In general 
-    // boundaries need to be shifted by 10 bits. We thus optimize for this case. 
-    const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); 
-    while ((significand & k10MSBits) == 0) { 
-      significand <<= 10; 
-      exponent -= 10; 
-    } 
-    while ((significand & kUint64MSB) == 0) { 
-      significand <<= 1; 
-      exponent--; 
-    } 
-    f_ = significand; 
-    e_ = exponent; 
-  } 
- 
-  static DiyFp Normalize(const DiyFp& a) { 
-    DiyFp result = a; 
-    result.Normalize(); 
-    return result; 
-  } 
- 
-  uint64_t f() const { return f_; } 
-  int e() const { return e_; } 
- 
-  void set_f(uint64_t new_value) { f_ = new_value; } 
-  void set_e(int new_value) { e_ = new_value; } 
- 
- private: 
-  static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); 
- 
-  uint64_t f_; 
-  int e_; 
-}; 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_DIY_FP_H_ 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DIY_FP_H_
+#define DOUBLE_CONVERSION_DIY_FP_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// This "Do It Yourself Floating Point" class implements a floating-point number
+// with a uint64 significand and an int exponent. Normalized DiyFp numbers will
+// have the most significant bit of the significand set.
+// Multiplication and Subtraction do not normalize their results.
+// DiyFp are not designed to contain special doubles (NaN and Infinity).
+class DiyFp {
+ public:
+  static const int kSignificandSize = 64;
+
+  DiyFp() : f_(0), e_(0) {}
+  DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {}
+
+  // this = this - other.
+  // The exponents of both numbers must be the same and the significand of this
+  // must be bigger than the significand of other.
+  // The result will not be normalized.
+  void Subtract(const DiyFp& other) {
+    ASSERT(e_ == other.e_);
+    ASSERT(f_ >= other.f_);
+    f_ -= other.f_;
+  }
+
+  // Returns a - b.
+  // The exponents of both numbers must be the same and this must be bigger
+  // than other. The result will not be normalized.
+  static DiyFp Minus(const DiyFp& a, const DiyFp& b) {
+    DiyFp result = a;
+    result.Subtract(b);
+    return result;
+  }
+
+
+  // this = this * other.
+  void Multiply(const DiyFp& other);
+
+  // returns a * b;
+  static DiyFp Times(const DiyFp& a, const DiyFp& b) {
+    DiyFp result = a;
+    result.Multiply(b);
+    return result;
+  }
+
+  void Normalize() {
+    ASSERT(f_ != 0);
+    uint64_t significand = f_;
+    int exponent = e_;
+
+    // This method is mainly called for normalizing boundaries. In general
+    // boundaries need to be shifted by 10 bits. We thus optimize for this case.
+    const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000);
+    while ((significand & k10MSBits) == 0) {
+      significand <<= 10;
+      exponent -= 10;
+    }
+    while ((significand & kUint64MSB) == 0) {
+      significand <<= 1;
+      exponent--;
+    }
+    f_ = significand;
+    e_ = exponent;
+  }
+
+  static DiyFp Normalize(const DiyFp& a) {
+    DiyFp result = a;
+    result.Normalize();
+    return result;
+  }
+
+  uint64_t f() const { return f_; }
+  int e() const { return e_; }
+
+  void set_f(uint64_t new_value) { f_ = new_value; }
+  void set_e(int new_value) { e_ = new_value; }
+
+ private:
+  static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000);
+
+  uint64_t f_;
+  int e_;
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DIY_FP_H_
diff --git a/contrib/libs/double-conversion/double-conversion.cc b/contrib/libs/double-conversion/double-conversion.cc
index 2b6627e7eb..6ee6feb09b 100644
--- a/contrib/libs/double-conversion/double-conversion.cc
+++ b/contrib/libs/double-conversion/double-conversion.cc
@@ -1,420 +1,420 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 #include <climits>
 #include <locale>
 #include <cmath>
- 
-#include "double-conversion.h" 
- 
-#include "bignum-dtoa.h" 
-#include "fast-dtoa.h" 
-#include "fixed-dtoa.h" 
-#include "ieee.h" 
-#include "strtod.h" 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { 
-  int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; 
-  static DoubleToStringConverter converter(flags, 
-                                           "Infinity", 
-                                           "NaN", 
-                                           'e', 
-                                           -6, 21, 
-                                           6, 0); 
-  return converter; 
-} 
- 
- 
-bool DoubleToStringConverter::HandleSpecialValues( 
-    double value, 
-    StringBuilder* result_builder) const { 
-  Double double_inspect(value); 
-  if (double_inspect.IsInfinite()) { 
-    if (infinity_symbol_ == NULL) return false; 
-    if (value < 0) { 
-      result_builder->AddCharacter('-'); 
-    } 
-    result_builder->AddString(infinity_symbol_); 
-    return true; 
-  } 
-  if (double_inspect.IsNan()) { 
-    if (nan_symbol_ == NULL) return false; 
-    result_builder->AddString(nan_symbol_); 
-    return true; 
-  } 
-  return false; 
-} 
- 
- 
-void DoubleToStringConverter::CreateExponentialRepresentation( 
-    const char* decimal_digits, 
-    int length, 
-    int exponent, 
-    StringBuilder* result_builder) const { 
-  ASSERT(length != 0); 
-  result_builder->AddCharacter(decimal_digits[0]); 
-  if (length != 1) { 
-    result_builder->AddCharacter('.'); 
-    result_builder->AddSubstring(&decimal_digits[1], length-1); 
-  } 
-  result_builder->AddCharacter(exponent_character_); 
-  if (exponent < 0) { 
-    result_builder->AddCharacter('-'); 
-    exponent = -exponent; 
-  } else { 
-    if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { 
-      result_builder->AddCharacter('+'); 
-    } 
-  } 
-  if (exponent == 0) { 
-    result_builder->AddCharacter('0'); 
-    return; 
-  } 
-  ASSERT(exponent < 1e4); 
-  const int kMaxExponentLength = 5; 
-  char buffer[kMaxExponentLength + 1]; 
-  buffer[kMaxExponentLength] = '\0'; 
-  int first_char_pos = kMaxExponentLength; 
-  while (exponent > 0) { 
-    buffer[--first_char_pos] = '0' + (exponent % 10); 
-    exponent /= 10; 
-  } 
-  result_builder->AddSubstring(&buffer[first_char_pos], 
-                               kMaxExponentLength - first_char_pos); 
-} 
- 
- 
-void DoubleToStringConverter::CreateDecimalRepresentation( 
-    const char* decimal_digits, 
-    int length, 
-    int decimal_point, 
-    int digits_after_point, 
-    StringBuilder* result_builder) const { 
-  // Create a representation that is padded with zeros if needed. 
-  if (decimal_point <= 0) { 
-      // "0.00000decimal_rep" or "0.000decimal_rep00". 
-    result_builder->AddCharacter('0'); 
-    if (digits_after_point > 0) { 
-      result_builder->AddCharacter('.'); 
-      result_builder->AddPadding('0', -decimal_point); 
-      ASSERT(length <= digits_after_point - (-decimal_point)); 
-      result_builder->AddSubstring(decimal_digits, length); 
-      int remaining_digits = digits_after_point - (-decimal_point) - length; 
-      result_builder->AddPadding('0', remaining_digits); 
-    } 
-  } else if (decimal_point >= length) { 
-    // "decimal_rep0000.00000" or "decimal_rep.0000". 
-    result_builder->AddSubstring(decimal_digits, length); 
-    result_builder->AddPadding('0', decimal_point - length); 
-    if (digits_after_point > 0) { 
-      result_builder->AddCharacter('.'); 
-      result_builder->AddPadding('0', digits_after_point); 
-    } 
-  } else { 
-    // "decima.l_rep000". 
-    ASSERT(digits_after_point > 0); 
-    result_builder->AddSubstring(decimal_digits, decimal_point); 
-    result_builder->AddCharacter('.'); 
-    ASSERT(length - decimal_point <= digits_after_point); 
-    result_builder->AddSubstring(&decimal_digits[decimal_point], 
-                                 length - decimal_point); 
-    int remaining_digits = digits_after_point - (length - decimal_point); 
-    result_builder->AddPadding('0', remaining_digits); 
-  } 
-  if (digits_after_point == 0) { 
-    if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { 
-      result_builder->AddCharacter('.'); 
-    } 
-    if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { 
-      result_builder->AddCharacter('0'); 
-    } 
-  } 
-} 
- 
- 
-bool DoubleToStringConverter::ToShortestIeeeNumber( 
-    double value, 
-    StringBuilder* result_builder, 
-    DoubleToStringConverter::DtoaMode mode) const { 
-  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); 
-  if (Double(value).IsSpecial()) { 
-    return HandleSpecialValues(value, result_builder); 
-  } 
- 
-  int decimal_point; 
-  bool sign; 
-  const int kDecimalRepCapacity = kBase10MaximalLength + 1; 
-  char decimal_rep[kDecimalRepCapacity]; 
-  int decimal_rep_length; 
- 
-  DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, 
-                &sign, &decimal_rep_length, &decimal_point); 
- 
-  bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; 
-  if (sign && (value != 0.0 || !unique_zero)) { 
-    result_builder->AddCharacter('-'); 
-  } 
- 
-  int exponent = decimal_point - 1; 
-  if ((decimal_in_shortest_low_ <= exponent) && 
-      (exponent < decimal_in_shortest_high_)) { 
-    CreateDecimalRepresentation(decimal_rep, decimal_rep_length, 
-                                decimal_point, 
-                                Max(0, decimal_rep_length - decimal_point), 
-                                result_builder); 
-  } else { 
-    CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, 
-                                    result_builder); 
-  } 
-  return true; 
-} 
- 
- 
-bool DoubleToStringConverter::ToFixed(double value, 
-                                      int requested_digits, 
-                                      StringBuilder* result_builder) const { 
-  ASSERT(kMaxFixedDigitsBeforePoint == 60); 
-  const double kFirstNonFixed = 1e60; 
- 
-  if (Double(value).IsSpecial()) { 
-    return HandleSpecialValues(value, result_builder); 
-  } 
- 
-  if (requested_digits > kMaxFixedDigitsAfterPoint) return false; 
-  if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; 
- 
-  // Find a sufficiently precise decimal representation of n. 
-  int decimal_point; 
-  bool sign; 
-  // Add space for the '\0' byte. 
-  const int kDecimalRepCapacity = 
-      kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; 
-  char decimal_rep[kDecimalRepCapacity]; 
-  int decimal_rep_length; 
-  DoubleToAscii(value, FIXED, requested_digits, 
-                decimal_rep, kDecimalRepCapacity, 
-                &sign, &decimal_rep_length, &decimal_point); 
- 
-  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); 
-  if (sign && (value != 0.0 || !unique_zero)) { 
-    result_builder->AddCharacter('-'); 
-  } 
- 
-  CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, 
-                              requested_digits, result_builder); 
-  return true; 
-} 
- 
- 
-bool DoubleToStringConverter::ToExponential( 
-    double value, 
-    int requested_digits, 
-    StringBuilder* result_builder) const { 
-  if (Double(value).IsSpecial()) { 
-    return HandleSpecialValues(value, result_builder); 
-  } 
- 
-  if (requested_digits < -1) return false; 
-  if (requested_digits > kMaxExponentialDigits) return false; 
- 
-  int decimal_point; 
-  bool sign; 
-  // Add space for digit before the decimal point and the '\0' character. 
-  const int kDecimalRepCapacity = kMaxExponentialDigits + 2; 
-  ASSERT(kDecimalRepCapacity > kBase10MaximalLength); 
-  char decimal_rep[kDecimalRepCapacity]; 
-  int decimal_rep_length; 
- 
-  if (requested_digits == -1) { 
-    DoubleToAscii(value, SHORTEST, 0, 
-                  decimal_rep, kDecimalRepCapacity, 
-                  &sign, &decimal_rep_length, &decimal_point); 
-  } else { 
-    DoubleToAscii(value, PRECISION, requested_digits + 1, 
-                  decimal_rep, kDecimalRepCapacity, 
-                  &sign, &decimal_rep_length, &decimal_point); 
-    ASSERT(decimal_rep_length <= requested_digits + 1); 
- 
-    for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { 
-      decimal_rep[i] = '0'; 
-    } 
-    decimal_rep_length = requested_digits + 1; 
-  } 
- 
-  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); 
-  if (sign && (value != 0.0 || !unique_zero)) { 
-    result_builder->AddCharacter('-'); 
-  } 
- 
-  int exponent = decimal_point - 1; 
-  CreateExponentialRepresentation(decimal_rep, 
-                                  decimal_rep_length, 
-                                  exponent, 
-                                  result_builder); 
-  return true; 
-} 
- 
- 
-bool DoubleToStringConverter::ToPrecision(double value, 
-                                          int precision, 
-                                          StringBuilder* result_builder) const { 
-  if (Double(value).IsSpecial()) { 
-    return HandleSpecialValues(value, result_builder); 
-  } 
- 
-  if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { 
-    return false; 
-  } 
- 
-  // Find a sufficiently precise decimal representation of n. 
-  int decimal_point; 
-  bool sign; 
-  // Add one for the terminating null character. 
-  const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; 
-  char decimal_rep[kDecimalRepCapacity]; 
-  int decimal_rep_length; 
- 
-  DoubleToAscii(value, PRECISION, precision, 
-                decimal_rep, kDecimalRepCapacity, 
-                &sign, &decimal_rep_length, &decimal_point); 
-  ASSERT(decimal_rep_length <= precision); 
- 
-  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); 
-  if (sign && (value != 0.0 || !unique_zero)) { 
-    result_builder->AddCharacter('-'); 
-  } 
- 
-  // The exponent if we print the number as x.xxeyyy. That is with the 
-  // decimal point after the first digit. 
-  int exponent = decimal_point - 1; 
- 
-  int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; 
-  if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || 
-      (decimal_point - precision + extra_zero > 
-       max_trailing_padding_zeroes_in_precision_mode_)) { 
-    // Fill buffer to contain 'precision' digits. 
-    // Usually the buffer is already at the correct length, but 'DoubleToAscii' 
-    // is allowed to return less characters. 
-    for (int i = decimal_rep_length; i < precision; ++i) { 
-      decimal_rep[i] = '0'; 
-    } 
- 
-    CreateExponentialRepresentation(decimal_rep, 
-                                    precision, 
-                                    exponent, 
-                                    result_builder); 
-  } else { 
-    CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, 
-                                Max(0, precision - decimal_point), 
-                                result_builder); 
-  } 
-  return true; 
-} 
- 
- 
-static BignumDtoaMode DtoaToBignumDtoaMode( 
-    DoubleToStringConverter::DtoaMode dtoa_mode) { 
-  switch (dtoa_mode) { 
-    case DoubleToStringConverter::SHORTEST:  return BIGNUM_DTOA_SHORTEST; 
-    case DoubleToStringConverter::SHORTEST_SINGLE: 
-        return BIGNUM_DTOA_SHORTEST_SINGLE; 
-    case DoubleToStringConverter::FIXED:     return BIGNUM_DTOA_FIXED; 
-    case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; 
-    default: 
-      UNREACHABLE(); 
-  } 
-} 
- 
- 
-void DoubleToStringConverter::DoubleToAscii(double v, 
-                                            DtoaMode mode, 
-                                            int requested_digits, 
-                                            char* buffer, 
-                                            int buffer_length, 
-                                            bool* sign, 
-                                            int* length, 
-                                            int* point) { 
-  Vector<char> vector(buffer, buffer_length); 
-  ASSERT(!Double(v).IsSpecial()); 
-  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); 
- 
-  if (Double(v).Sign() < 0) { 
-    *sign = true; 
-    v = -v; 
-  } else { 
-    *sign = false; 
-  } 
- 
-  if (mode == PRECISION && requested_digits == 0) { 
-    vector[0] = '\0'; 
-    *length = 0; 
-    return; 
-  } 
- 
-  if (v == 0) { 
-    vector[0] = '0'; 
-    vector[1] = '\0'; 
-    *length = 1; 
-    *point = 1; 
-    return; 
-  } 
- 
-  bool fast_worked; 
-  switch (mode) { 
-    case SHORTEST: 
-      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); 
-      break; 
-    case SHORTEST_SINGLE: 
-      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, 
-                             vector, length, point); 
-      break; 
-    case FIXED: 
-      fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); 
-      break; 
-    case PRECISION: 
-      fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, 
-                             vector, length, point); 
-      break; 
-    default: 
-      fast_worked = false; 
-      UNREACHABLE(); 
-  } 
-  if (fast_worked) return; 
- 
-  // If the fast dtoa didn't succeed use the slower bignum version. 
-  BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); 
-  BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); 
-  vector[*length] = '\0'; 
-} 
- 
- 
+
+#include "double-conversion.h"
+
+#include "bignum-dtoa.h"
+#include "fast-dtoa.h"
+#include "fixed-dtoa.h"
+#include "ieee.h"
+#include "strtod.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() {
+  int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN;
+  static DoubleToStringConverter converter(flags,
+                                           "Infinity",
+                                           "NaN",
+                                           'e',
+                                           -6, 21,
+                                           6, 0);
+  return converter;
+}
+
+
+bool DoubleToStringConverter::HandleSpecialValues(
+    double value,
+    StringBuilder* result_builder) const {
+  Double double_inspect(value);
+  if (double_inspect.IsInfinite()) {
+    if (infinity_symbol_ == NULL) return false;
+    if (value < 0) {
+      result_builder->AddCharacter('-');
+    }
+    result_builder->AddString(infinity_symbol_);
+    return true;
+  }
+  if (double_inspect.IsNan()) {
+    if (nan_symbol_ == NULL) return false;
+    result_builder->AddString(nan_symbol_);
+    return true;
+  }
+  return false;
+}
+
+
+void DoubleToStringConverter::CreateExponentialRepresentation(
+    const char* decimal_digits,
+    int length,
+    int exponent,
+    StringBuilder* result_builder) const {
+  ASSERT(length != 0);
+  result_builder->AddCharacter(decimal_digits[0]);
+  if (length != 1) {
+    result_builder->AddCharacter('.');
+    result_builder->AddSubstring(&decimal_digits[1], length-1);
+  }
+  result_builder->AddCharacter(exponent_character_);
+  if (exponent < 0) {
+    result_builder->AddCharacter('-');
+    exponent = -exponent;
+  } else {
+    if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) {
+      result_builder->AddCharacter('+');
+    }
+  }
+  if (exponent == 0) {
+    result_builder->AddCharacter('0');
+    return;
+  }
+  ASSERT(exponent < 1e4);
+  const int kMaxExponentLength = 5;
+  char buffer[kMaxExponentLength + 1];
+  buffer[kMaxExponentLength] = '\0';
+  int first_char_pos = kMaxExponentLength;
+  while (exponent > 0) {
+    buffer[--first_char_pos] = '0' + (exponent % 10);
+    exponent /= 10;
+  }
+  result_builder->AddSubstring(&buffer[first_char_pos],
+                               kMaxExponentLength - first_char_pos);
+}
+
+
+void DoubleToStringConverter::CreateDecimalRepresentation(
+    const char* decimal_digits,
+    int length,
+    int decimal_point,
+    int digits_after_point,
+    StringBuilder* result_builder) const {
+  // Create a representation that is padded with zeros if needed.
+  if (decimal_point <= 0) {
+      // "0.00000decimal_rep" or "0.000decimal_rep00".
+    result_builder->AddCharacter('0');
+    if (digits_after_point > 0) {
+      result_builder->AddCharacter('.');
+      result_builder->AddPadding('0', -decimal_point);
+      ASSERT(length <= digits_after_point - (-decimal_point));
+      result_builder->AddSubstring(decimal_digits, length);
+      int remaining_digits = digits_after_point - (-decimal_point) - length;
+      result_builder->AddPadding('0', remaining_digits);
+    }
+  } else if (decimal_point >= length) {
+    // "decimal_rep0000.00000" or "decimal_rep.0000".
+    result_builder->AddSubstring(decimal_digits, length);
+    result_builder->AddPadding('0', decimal_point - length);
+    if (digits_after_point > 0) {
+      result_builder->AddCharacter('.');
+      result_builder->AddPadding('0', digits_after_point);
+    }
+  } else {
+    // "decima.l_rep000".
+    ASSERT(digits_after_point > 0);
+    result_builder->AddSubstring(decimal_digits, decimal_point);
+    result_builder->AddCharacter('.');
+    ASSERT(length - decimal_point <= digits_after_point);
+    result_builder->AddSubstring(&decimal_digits[decimal_point],
+                                 length - decimal_point);
+    int remaining_digits = digits_after_point - (length - decimal_point);
+    result_builder->AddPadding('0', remaining_digits);
+  }
+  if (digits_after_point == 0) {
+    if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
+      result_builder->AddCharacter('.');
+    }
+    if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
+      result_builder->AddCharacter('0');
+    }
+  }
+}
+
+
+bool DoubleToStringConverter::ToShortestIeeeNumber(
+    double value,
+    StringBuilder* result_builder,
+    DoubleToStringConverter::DtoaMode mode) const {
+  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE);
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  int decimal_point;
+  bool sign;
+  const int kDecimalRepCapacity = kBase10MaximalLength + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+
+  bool unique_zero = (flags_ & UNIQUE_ZERO) != 0;
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  int exponent = decimal_point - 1;
+  if ((decimal_in_shortest_low_ <= exponent) &&
+      (exponent < decimal_in_shortest_high_)) {
+    CreateDecimalRepresentation(decimal_rep, decimal_rep_length,
+                                decimal_point,
+                                Max(0, decimal_rep_length - decimal_point),
+                                result_builder);
+  } else {
+    CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent,
+                                    result_builder);
+  }
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToFixed(double value,
+                                      int requested_digits,
+                                      StringBuilder* result_builder) const {
+  ASSERT(kMaxFixedDigitsBeforePoint == 60);
+  const double kFirstNonFixed = 1e60;
+
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (requested_digits > kMaxFixedDigitsAfterPoint) return false;
+  if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false;
+
+  // Find a sufficiently precise decimal representation of n.
+  int decimal_point;
+  bool sign;
+  // Add space for the '\0' byte.
+  const int kDecimalRepCapacity =
+      kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+  DoubleToAscii(value, FIXED, requested_digits,
+                decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+                              requested_digits, result_builder);
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToExponential(
+    double value,
+    int requested_digits,
+    StringBuilder* result_builder) const {
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (requested_digits < -1) return false;
+  if (requested_digits > kMaxExponentialDigits) return false;
+
+  int decimal_point;
+  bool sign;
+  // Add space for digit before the decimal point and the '\0' character.
+  const int kDecimalRepCapacity = kMaxExponentialDigits + 2;
+  ASSERT(kDecimalRepCapacity > kBase10MaximalLength);
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  if (requested_digits == -1) {
+    DoubleToAscii(value, SHORTEST, 0,
+                  decimal_rep, kDecimalRepCapacity,
+                  &sign, &decimal_rep_length, &decimal_point);
+  } else {
+    DoubleToAscii(value, PRECISION, requested_digits + 1,
+                  decimal_rep, kDecimalRepCapacity,
+                  &sign, &decimal_rep_length, &decimal_point);
+    ASSERT(decimal_rep_length <= requested_digits + 1);
+
+    for (int i = decimal_rep_length; i < requested_digits + 1; ++i) {
+      decimal_rep[i] = '0';
+    }
+    decimal_rep_length = requested_digits + 1;
+  }
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  int exponent = decimal_point - 1;
+  CreateExponentialRepresentation(decimal_rep,
+                                  decimal_rep_length,
+                                  exponent,
+                                  result_builder);
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToPrecision(double value,
+                                          int precision,
+                                          StringBuilder* result_builder) const {
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) {
+    return false;
+  }
+
+  // Find a sufficiently precise decimal representation of n.
+  int decimal_point;
+  bool sign;
+  // Add one for the terminating null character.
+  const int kDecimalRepCapacity = kMaxPrecisionDigits + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  DoubleToAscii(value, PRECISION, precision,
+                decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+  ASSERT(decimal_rep_length <= precision);
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  // The exponent if we print the number as x.xxeyyy. That is with the
+  // decimal point after the first digit.
+  int exponent = decimal_point - 1;
+
+  int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
+  if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
+      (decimal_point - precision + extra_zero >
+       max_trailing_padding_zeroes_in_precision_mode_)) {
+    // Fill buffer to contain 'precision' digits.
+    // Usually the buffer is already at the correct length, but 'DoubleToAscii'
+    // is allowed to return less characters.
+    for (int i = decimal_rep_length; i < precision; ++i) {
+      decimal_rep[i] = '0';
+    }
+
+    CreateExponentialRepresentation(decimal_rep,
+                                    precision,
+                                    exponent,
+                                    result_builder);
+  } else {
+    CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+                                Max(0, precision - decimal_point),
+                                result_builder);
+  }
+  return true;
+}
+
+
+static BignumDtoaMode DtoaToBignumDtoaMode(
+    DoubleToStringConverter::DtoaMode dtoa_mode) {
+  switch (dtoa_mode) {
+    case DoubleToStringConverter::SHORTEST:  return BIGNUM_DTOA_SHORTEST;
+    case DoubleToStringConverter::SHORTEST_SINGLE:
+        return BIGNUM_DTOA_SHORTEST_SINGLE;
+    case DoubleToStringConverter::FIXED:     return BIGNUM_DTOA_FIXED;
+    case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
+    default:
+      UNREACHABLE();
+  }
+}
+
+
+void DoubleToStringConverter::DoubleToAscii(double v,
+                                            DtoaMode mode,
+                                            int requested_digits,
+                                            char* buffer,
+                                            int buffer_length,
+                                            bool* sign,
+                                            int* length,
+                                            int* point) {
+  Vector<char> vector(buffer, buffer_length);
+  ASSERT(!Double(v).IsSpecial());
+  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0);
+
+  if (Double(v).Sign() < 0) {
+    *sign = true;
+    v = -v;
+  } else {
+    *sign = false;
+  }
+
+  if (mode == PRECISION && requested_digits == 0) {
+    vector[0] = '\0';
+    *length = 0;
+    return;
+  }
+
+  if (v == 0) {
+    vector[0] = '0';
+    vector[1] = '\0';
+    *length = 1;
+    *point = 1;
+    return;
+  }
+
+  bool fast_worked;
+  switch (mode) {
+    case SHORTEST:
+      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point);
+      break;
+    case SHORTEST_SINGLE:
+      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0,
+                             vector, length, point);
+      break;
+    case FIXED:
+      fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point);
+      break;
+    case PRECISION:
+      fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits,
+                             vector, length, point);
+      break;
+    default:
+      fast_worked = false;
+      UNREACHABLE();
+  }
+  if (fast_worked) return;
+
+  // If the fast dtoa didn't succeed use the slower bignum version.
+  BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode);
+  BignumDtoa(v, bignum_mode, requested_digits, vector, length, point);
+  vector[*length] = '\0';
+}
+
+
 namespace {
 
 inline char ToLower(char ch) {
@@ -443,20 +443,20 @@ static inline bool ConsumeSubStringImpl(Iterator* current,
   return true;
 }
 
-// Consumes the given substring from the iterator. 
-// Returns false, if the substring does not match. 
-template <class Iterator> 
-static bool ConsumeSubString(Iterator* current, 
-                             Iterator end, 
+// Consumes the given substring from the iterator.
+// Returns false, if the substring does not match.
+template <class Iterator>
+static bool ConsumeSubString(Iterator* current,
+                             Iterator end,
                              const char* substring,
                              bool allow_case_insensibility) {
   if (allow_case_insensibility) {
     return ConsumeSubStringImpl(current, end, substring, ToLower);
   } else {
     return ConsumeSubStringImpl(current, end, substring, Pass);
-  } 
-} 
- 
+  }
+}
+
 // Consumes first character of the str is equal to ch
 inline bool ConsumeFirstCharacter(char ch,
                                          const char* str,
@@ -464,93 +464,93 @@ inline bool ConsumeFirstCharacter(char ch,
   return case_insensibility ? ToLower(ch) == str[0] : ch == str[0];
 }
 }  // namespace
- 
-// Maximum number of significant digits in decimal representation. 
-// The longest possible double in decimal representation is 
-// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 
-// (768 digits). If we parse a number whose first digits are equal to a 
-// mean of 2 adjacent doubles (that could have up to 769 digits) the result 
-// must be rounded to the bigger one unless the tail consists of zeros, so 
-// we don't need to preserve all the digits. 
-const int kMaxSignificantDigits = 772; 
- 
- 
-static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 }; 
-static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7); 
- 
- 
-static const uc16 kWhitespaceTable16[] = { 
-  160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195, 
-  8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279 
-}; 
-static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16); 
- 
- 
-static bool isWhitespace(int x) { 
-  if (x < 128) { 
-    for (int i = 0; i < kWhitespaceTable7Length; i++) { 
-      if (kWhitespaceTable7[i] == x) return true; 
-    } 
-  } else { 
-    for (int i = 0; i < kWhitespaceTable16Length; i++) { 
-      if (kWhitespaceTable16[i] == x) return true; 
-    } 
-  } 
-  return false; 
-} 
- 
- 
-// Returns true if a nonspace found and false if the end has reached. 
-template <class Iterator> 
-static inline bool AdvanceToNonspace(Iterator* current, Iterator end) { 
-  while (*current != end) { 
-    if (!isWhitespace(**current)) return true; 
-    ++*current; 
-  } 
-  return false; 
-} 
- 
- 
-static bool isDigit(int x, int radix) { 
-  return (x >= '0' && x <= '9' && x < '0' + radix) 
-      || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) 
-      || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); 
-} 
- 
- 
-static double SignedZero(bool sign) { 
-  return sign ? -0.0 : 0.0; 
-} 
- 
- 
-// Returns true if 'c' is a decimal digit that is valid for the given radix. 
-// 
-// The function is small and could be inlined, but VS2012 emitted a warning 
-// because it constant-propagated the radix and concluded that the last 
-// condition was always true. By moving it into a separate function the 
-// compiler wouldn't warn anymore. 
-#if _MSC_VER 
-#pragma optimize("",off) 
-static bool IsDecimalDigitForRadix(int c, int radix) { 
-  return '0' <= c && c <= '9' && (c - '0') < radix; 
-} 
-#pragma optimize("",on) 
-#else 
-static bool inline IsDecimalDigitForRadix(int c, int radix) { 
+
+// Maximum number of significant digits in decimal representation.
+// The longest possible double in decimal representation is
+// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
+// (768 digits). If we parse a number whose first digits are equal to a
+// mean of 2 adjacent doubles (that could have up to 769 digits) the result
+// must be rounded to the bigger one unless the tail consists of zeros, so
+// we don't need to preserve all the digits.
+const int kMaxSignificantDigits = 772;
+
+
+static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 };
+static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7);
+
+
+static const uc16 kWhitespaceTable16[] = {
+  160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195,
+  8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279
+};
+static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16);
+
+
+static bool isWhitespace(int x) {
+  if (x < 128) {
+    for (int i = 0; i < kWhitespaceTable7Length; i++) {
+      if (kWhitespaceTable7[i] == x) return true;
+    }
+  } else {
+    for (int i = 0; i < kWhitespaceTable16Length; i++) {
+      if (kWhitespaceTable16[i] == x) return true;
+    }
+  }
+  return false;
+}
+
+
+// Returns true if a nonspace found and false if the end has reached.
+template <class Iterator>
+static inline bool AdvanceToNonspace(Iterator* current, Iterator end) {
+  while (*current != end) {
+    if (!isWhitespace(**current)) return true;
+    ++*current;
+  }
+  return false;
+}
+
+
+static bool isDigit(int x, int radix) {
+  return (x >= '0' && x <= '9' && x < '0' + radix)
+      || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
+      || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
+}
+
+
+static double SignedZero(bool sign) {
+  return sign ? -0.0 : 0.0;
+}
+
+
+// Returns true if 'c' is a decimal digit that is valid for the given radix.
+//
+// The function is small and could be inlined, but VS2012 emitted a warning
+// because it constant-propagated the radix and concluded that the last
+// condition was always true. By moving it into a separate function the
+// compiler wouldn't warn anymore.
+#if _MSC_VER
+#pragma optimize("",off)
+static bool IsDecimalDigitForRadix(int c, int radix) {
+  return '0' <= c && c <= '9' && (c - '0') < radix;
+}
+#pragma optimize("",on)
+#else
+static bool inline IsDecimalDigitForRadix(int c, int radix) {
   return '0' <= c && c <= '9' && (c - '0') < radix;
-} 
-#endif 
-// Returns true if 'c' is a character digit that is valid for the given radix. 
-// The 'a_character' should be 'a' or 'A'. 
-// 
-// The function is small and could be inlined, but VS2012 emitted a warning 
-// because it constant-propagated the radix and concluded that the first 
-// condition was always false. By moving it into a separate function the 
-// compiler wouldn't warn anymore. 
-static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { 
-  return radix > 10 && c >= a_character && c < a_character + radix - 10; 
-} 
- 
+}
+#endif
+// Returns true if 'c' is a character digit that is valid for the given radix.
+// The 'a_character' should be 'a' or 'A'.
+//
+// The function is small and could be inlined, but VS2012 emitted a warning
+// because it constant-propagated the radix and concluded that the first
+// condition was always false. By moving it into a separate function the
+// compiler wouldn't warn anymore.
+static bool IsCharacterDigitForRadix(int c, int radix, char a_character) {
+  return radix > 10 && c >= a_character && c < a_character + radix - 10;
+}
+
 // Returns true, when the iterator is equal to end.
 template<class Iterator>
 static bool Advance (Iterator* it, char separator, int base, Iterator& end) {
@@ -570,7 +570,7 @@ static bool Advance (Iterator* it, char separator, int base, Iterator& end) {
   }
   return *it == end;
 }
- 
+
 // Checks whether the string in the range start-end is a hex-float string.
 // This function assumes that the leading '0x'/'0X' is already consumed.
 //
@@ -614,30 +614,30 @@ static bool IsHexFloatString(Iterator start,
 }
 
 
-// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. 
+// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
 //
 // If parse_as_hex_float is true, then the string must be a valid
 // hex-float.
-template <int radix_log_2, class Iterator> 
-static double RadixStringToIeee(Iterator* current, 
-                                Iterator end, 
-                                bool sign, 
+template <int radix_log_2, class Iterator>
+static double RadixStringToIeee(Iterator* current,
+                                Iterator end,
+                                bool sign,
                                 char separator,
                                 bool parse_as_hex_float,
-                                bool allow_trailing_junk, 
-                                double junk_string_value, 
-                                bool read_as_double, 
-                                bool* result_is_junk) { 
-  ASSERT(*current != end); 
+                                bool allow_trailing_junk,
+                                double junk_string_value,
+                                bool read_as_double,
+                                bool* result_is_junk) {
+  ASSERT(*current != end);
   ASSERT(!parse_as_hex_float ||
       IsHexFloatString(*current, end, separator, allow_trailing_junk));
- 
-  const int kDoubleSize = Double::kSignificandSize; 
-  const int kSingleSize = Single::kSignificandSize; 
-  const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; 
- 
-  *result_is_junk = true; 
- 
+
+  const int kDoubleSize = Double::kSignificandSize;
+  const int kSingleSize = Single::kSignificandSize;
+  const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
+
+  *result_is_junk = true;
+
   int64_t number = 0;
   int exponent = 0;
   const int radix = (1 << radix_log_2);
@@ -645,24 +645,24 @@ static double RadixStringToIeee(Iterator* current,
   // Only relevant if parse_as_hex_float is true.
   bool post_decimal = false;
 
-  // Skip leading 0s. 
-  while (**current == '0') { 
+  // Skip leading 0s.
+  while (**current == '0') {
     if (Advance(current, separator, radix, end)) {
-      *result_is_junk = false; 
-      return SignedZero(sign); 
-    } 
-  } 
- 
+      *result_is_junk = false;
+      return SignedZero(sign);
+    }
+  }
+
   while (true) {
-    int digit; 
-    if (IsDecimalDigitForRadix(**current, radix)) { 
-      digit = static_cast<char>(**current) - '0'; 
+    int digit;
+    if (IsDecimalDigitForRadix(**current, radix)) {
+      digit = static_cast<char>(**current) - '0';
       if (post_decimal) exponent -= radix_log_2;
-    } else if (IsCharacterDigitForRadix(**current, radix, 'a')) { 
-      digit = static_cast<char>(**current) - 'a' + 10; 
+    } else if (IsCharacterDigitForRadix(**current, radix, 'a')) {
+      digit = static_cast<char>(**current) - 'a' + 10;
       if (post_decimal) exponent -= radix_log_2;
-    } else if (IsCharacterDigitForRadix(**current, radix, 'A')) { 
-      digit = static_cast<char>(**current) - 'A' + 10; 
+    } else if (IsCharacterDigitForRadix(**current, radix, 'A')) {
+      digit = static_cast<char>(**current) - 'A' + 10;
       if (post_decimal) exponent -= radix_log_2;
     } else if (parse_as_hex_float && **current == '.') {
       post_decimal = true;
@@ -671,32 +671,32 @@ static double RadixStringToIeee(Iterator* current,
       continue;
     } else if (parse_as_hex_float && (**current == 'p' || **current == 'P')) {
       break;
-    } else { 
-      if (allow_trailing_junk || !AdvanceToNonspace(current, end)) { 
-        break; 
-      } else { 
-        return junk_string_value; 
-      } 
-    } 
- 
-    number = number * radix + digit; 
-    int overflow = static_cast<int>(number >> kSignificandSize); 
-    if (overflow != 0) { 
-      // Overflow occurred. Need to determine which direction to round the 
-      // result. 
-      int overflow_bits_count = 1; 
-      while (overflow > 1) { 
-        overflow_bits_count++; 
-        overflow >>= 1; 
-      } 
- 
-      int dropped_bits_mask = ((1 << overflow_bits_count) - 1); 
-      int dropped_bits = static_cast<int>(number) & dropped_bits_mask; 
-      number >>= overflow_bits_count; 
+    } else {
+      if (allow_trailing_junk || !AdvanceToNonspace(current, end)) {
+        break;
+      } else {
+        return junk_string_value;
+      }
+    }
+
+    number = number * radix + digit;
+    int overflow = static_cast<int>(number >> kSignificandSize);
+    if (overflow != 0) {
+      // Overflow occurred. Need to determine which direction to round the
+      // result.
+      int overflow_bits_count = 1;
+      while (overflow > 1) {
+        overflow_bits_count++;
+        overflow >>= 1;
+      }
+
+      int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
+      int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
+      number >>= overflow_bits_count;
       exponent += overflow_bits_count;
- 
-      bool zero_tail = true; 
-      for (;;) { 
+
+      bool zero_tail = true;
+      for (;;) {
         if (Advance(current, separator, radix, end)) break;
         if (parse_as_hex_float && **current == '.') {
           // Just run over the '.'. We are just trying to see whether there is
@@ -706,42 +706,42 @@ static double RadixStringToIeee(Iterator* current,
           post_decimal = true;
         }
         if (!isDigit(**current, radix)) break;
-        zero_tail = zero_tail && **current == '0'; 
+        zero_tail = zero_tail && **current == '0';
         if (!post_decimal) exponent += radix_log_2;
-      } 
- 
+      }
+
       if (!parse_as_hex_float &&
           !allow_trailing_junk &&
           AdvanceToNonspace(current, end)) {
-        return junk_string_value; 
-      } 
- 
-      int middle_value = (1 << (overflow_bits_count - 1)); 
-      if (dropped_bits > middle_value) { 
-        number++;  // Rounding up. 
-      } else if (dropped_bits == middle_value) { 
-        // Rounding to even to consistency with decimals: half-way case rounds 
-        // up if significant part is odd and down otherwise. 
-        if ((number & 1) != 0 || !zero_tail) { 
-          number++;  // Rounding up. 
-        } 
-      } 
- 
-      // Rounding up may cause overflow. 
-      if ((number & ((int64_t)1 << kSignificandSize)) != 0) { 
-        exponent++; 
-        number >>= 1; 
-      } 
-      break; 
-    } 
+        return junk_string_value;
+      }
+
+      int middle_value = (1 << (overflow_bits_count - 1));
+      if (dropped_bits > middle_value) {
+        number++;  // Rounding up.
+      } else if (dropped_bits == middle_value) {
+        // Rounding to even to consistency with decimals: half-way case rounds
+        // up if significant part is odd and down otherwise.
+        if ((number & 1) != 0 || !zero_tail) {
+          number++;  // Rounding up.
+        }
+      }
+
+      // Rounding up may cause overflow.
+      if ((number & ((int64_t)1 << kSignificandSize)) != 0) {
+        exponent++;
+        number >>= 1;
+      }
+      break;
+    }
     if (Advance(current, separator, radix, end)) break;
   }
- 
-  ASSERT(number < ((int64_t)1 << kSignificandSize)); 
-  ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number); 
- 
-  *result_is_junk = false; 
- 
+
+  ASSERT(number < ((int64_t)1 << kSignificandSize));
+  ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
+
+  *result_is_junk = false;
+
   if (parse_as_hex_float) {
     ASSERT(**current == 'p' || **current == 'P');
     Advance(current, separator, radix, end);
@@ -765,133 +765,133 @@ static double RadixStringToIeee(Iterator* current,
   }
 
   if (exponent == 0 || number == 0) {
-    if (sign) { 
-      if (number == 0) return -0.0; 
-      number = -number; 
-    } 
-    return static_cast<double>(number); 
-  } 
- 
-  ASSERT(number != 0); 
+    if (sign) {
+      if (number == 0) return -0.0;
+      number = -number;
+    }
+    return static_cast<double>(number);
+  }
+
+  ASSERT(number != 0);
   double result = Double(DiyFp(number, exponent)).value();
   return sign ? -result : result;
-} 
- 
-template <class Iterator> 
-double StringToDoubleConverter::StringToIeee( 
-    Iterator input, 
-    int length, 
-    bool read_as_double, 
-    int* processed_characters_count) const { 
-  Iterator current = input; 
-  Iterator end = input + length; 
- 
-  *processed_characters_count = 0; 
- 
-  const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; 
-  const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; 
-  const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; 
-  const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; 
+}
+
+template <class Iterator>
+double StringToDoubleConverter::StringToIeee(
+    Iterator input,
+    int length,
+    bool read_as_double,
+    int* processed_characters_count) const {
+  Iterator current = input;
+  Iterator end = input + length;
+
+  *processed_characters_count = 0;
+
+  const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0;
+  const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0;
+  const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0;
+  const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0;
   const bool allow_case_insensibility = (flags_ & ALLOW_CASE_INSENSIBILITY) != 0;
- 
-  // To make sure that iterator dereferencing is valid the following 
-  // convention is used: 
-  // 1. Each '++current' statement is followed by check for equality to 'end'. 
-  // 2. If AdvanceToNonspace returned false then current == end. 
-  // 3. If 'current' becomes equal to 'end' the function returns or goes to 
-  // 'parsing_done'. 
-  // 4. 'current' is not dereferenced after the 'parsing_done' label. 
-  // 5. Code before 'parsing_done' may rely on 'current != end'. 
-  if (current == end) return empty_string_value_; 
- 
-  if (allow_leading_spaces || allow_trailing_spaces) { 
-    if (!AdvanceToNonspace(&current, end)) { 
-      *processed_characters_count = static_cast<int>(current - input); 
-      return empty_string_value_; 
-    } 
-    if (!allow_leading_spaces && (input != current)) { 
-      // No leading spaces allowed, but AdvanceToNonspace moved forward. 
-      return junk_string_value_; 
-    } 
-  } 
- 
-  // The longest form of simplified number is: "-<significant digits>.1eXXX\0". 
-  const int kBufferSize = kMaxSignificantDigits + 10; 
-  char buffer[kBufferSize];  // NOLINT: size is known at compile time. 
-  int buffer_pos = 0; 
- 
-  // Exponent will be adjusted if insignificant digits of the integer part 
-  // or insignificant leading zeros of the fractional part are dropped. 
-  int exponent = 0; 
-  int significant_digits = 0; 
-  int insignificant_digits = 0; 
-  bool nonzero_digit_dropped = false; 
- 
-  bool sign = false; 
- 
-  if (*current == '+' || *current == '-') { 
-    sign = (*current == '-'); 
-    ++current; 
-    Iterator next_non_space = current; 
-    // Skip following spaces (if allowed). 
-    if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; 
-    if (!allow_spaces_after_sign && (current != next_non_space)) { 
-      return junk_string_value_; 
-    } 
-    current = next_non_space; 
-  } 
- 
-  if (infinity_symbol_ != NULL) { 
+
+  // To make sure that iterator dereferencing is valid the following
+  // convention is used:
+  // 1. Each '++current' statement is followed by check for equality to 'end'.
+  // 2. If AdvanceToNonspace returned false then current == end.
+  // 3. If 'current' becomes equal to 'end' the function returns or goes to
+  // 'parsing_done'.
+  // 4. 'current' is not dereferenced after the 'parsing_done' label.
+  // 5. Code before 'parsing_done' may rely on 'current != end'.
+  if (current == end) return empty_string_value_;
+
+  if (allow_leading_spaces || allow_trailing_spaces) {
+    if (!AdvanceToNonspace(&current, end)) {
+      *processed_characters_count = static_cast<int>(current - input);
+      return empty_string_value_;
+    }
+    if (!allow_leading_spaces && (input != current)) {
+      // No leading spaces allowed, but AdvanceToNonspace moved forward.
+      return junk_string_value_;
+    }
+  }
+
+  // The longest form of simplified number is: "-<significant digits>.1eXXX\0".
+  const int kBufferSize = kMaxSignificantDigits + 10;
+  char buffer[kBufferSize];  // NOLINT: size is known at compile time.
+  int buffer_pos = 0;
+
+  // Exponent will be adjusted if insignificant digits of the integer part
+  // or insignificant leading zeros of the fractional part are dropped.
+  int exponent = 0;
+  int significant_digits = 0;
+  int insignificant_digits = 0;
+  bool nonzero_digit_dropped = false;
+
+  bool sign = false;
+
+  if (*current == '+' || *current == '-') {
+    sign = (*current == '-');
+    ++current;
+    Iterator next_non_space = current;
+    // Skip following spaces (if allowed).
+    if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
+    if (!allow_spaces_after_sign && (current != next_non_space)) {
+      return junk_string_value_;
+    }
+    current = next_non_space;
+  }
+
+  if (infinity_symbol_ != NULL) {
     if (ConsumeFirstCharacter(*current, infinity_symbol_, allow_case_insensibility)) {
       if (!ConsumeSubString(&current, end, infinity_symbol_, allow_case_insensibility)) {
-        return junk_string_value_; 
-      } 
- 
-      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { 
-        return junk_string_value_; 
-      } 
-      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) { 
-        return junk_string_value_; 
-      } 
- 
-      ASSERT(buffer_pos == 0); 
-      *processed_characters_count = static_cast<int>(current - input); 
-      return sign ? -Double::Infinity() : Double::Infinity(); 
-    } 
-  } 
- 
-  if (nan_symbol_ != NULL) { 
+        return junk_string_value_;
+      }
+
+      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+        return junk_string_value_;
+      }
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value_;
+      }
+
+      ASSERT(buffer_pos == 0);
+      *processed_characters_count = static_cast<int>(current - input);
+      return sign ? -Double::Infinity() : Double::Infinity();
+    }
+  }
+
+  if (nan_symbol_ != NULL) {
     if (ConsumeFirstCharacter(*current, nan_symbol_, allow_case_insensibility)) {
       if (!ConsumeSubString(&current, end, nan_symbol_, allow_case_insensibility)) {
-        return junk_string_value_; 
-      } 
- 
-      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { 
-        return junk_string_value_; 
-      } 
-      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) { 
-        return junk_string_value_; 
-      } 
- 
-      ASSERT(buffer_pos == 0); 
-      *processed_characters_count = static_cast<int>(current - input); 
-      return sign ? -Double::NaN() : Double::NaN(); 
-    } 
-  } 
- 
-  bool leading_zero = false; 
-  if (*current == '0') { 
+        return junk_string_value_;
+      }
+
+      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+        return junk_string_value_;
+      }
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value_;
+      }
+
+      ASSERT(buffer_pos == 0);
+      *processed_characters_count = static_cast<int>(current - input);
+      return sign ? -Double::NaN() : Double::NaN();
+    }
+  }
+
+  bool leading_zero = false;
+  if (*current == '0') {
     if (Advance(&current, separator_, 10, end)) {
-      *processed_characters_count = static_cast<int>(current - input); 
-      return SignedZero(sign); 
-    } 
- 
-    leading_zero = true; 
- 
-    // It could be hexadecimal value. 
+      *processed_characters_count = static_cast<int>(current - input);
+      return SignedZero(sign);
+    }
+
+    leading_zero = true;
+
+    // It could be hexadecimal value.
     if (((flags_ & ALLOW_HEX) || (flags_ & ALLOW_HEX_FLOATS)) &&
         (*current == 'x' || *current == 'X')) {
-      ++current; 
+      ++current;
 
       bool parse_as_hex_float = (flags_ & ALLOW_HEX_FLOATS) &&
                 IsHexFloatString(current, end, separator_, allow_trailing_junk);
@@ -899,240 +899,240 @@ double StringToDoubleConverter::StringToIeee(
       if (current == end) return junk_string_value_;  // "0x"
       if (!parse_as_hex_float && !isDigit(*current, 16)) {
         return junk_string_value_;
-      } 
- 
-      bool result_is_junk; 
-      double result = RadixStringToIeee<4>(&current, 
-                                           end, 
-                                           sign, 
+      }
+
+      bool result_is_junk;
+      double result = RadixStringToIeee<4>(&current,
+                                           end,
+                                           sign,
                                            separator_,
                                            parse_as_hex_float,
-                                           allow_trailing_junk, 
-                                           junk_string_value_, 
-                                           read_as_double, 
-                                           &result_is_junk); 
-      if (!result_is_junk) { 
-        if (allow_trailing_spaces) AdvanceToNonspace(&current, end); 
-        *processed_characters_count = static_cast<int>(current - input); 
-      } 
-      return result; 
-    } 
- 
-    // Ignore leading zeros in the integer part. 
-    while (*current == '0') { 
+                                           allow_trailing_junk,
+                                           junk_string_value_,
+                                           read_as_double,
+                                           &result_is_junk);
+      if (!result_is_junk) {
+        if (allow_trailing_spaces) AdvanceToNonspace(&current, end);
+        *processed_characters_count = static_cast<int>(current - input);
+      }
+      return result;
+    }
+
+    // Ignore leading zeros in the integer part.
+    while (*current == '0') {
       if (Advance(&current, separator_, 10, end)) {
-        *processed_characters_count = static_cast<int>(current - input); 
-        return SignedZero(sign); 
-      } 
-    } 
-  } 
- 
-  bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; 
- 
-  // Copy significant digits of the integer part (if any) to the buffer. 
-  while (*current >= '0' && *current <= '9') { 
-    if (significant_digits < kMaxSignificantDigits) { 
-      ASSERT(buffer_pos < kBufferSize); 
-      buffer[buffer_pos++] = static_cast<char>(*current); 
-      significant_digits++; 
-      // Will later check if it's an octal in the buffer. 
-    } else { 
-      insignificant_digits++;  // Move the digit into the exponential part. 
-      nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; 
-    } 
-    octal = octal && *current < '8'; 
+        *processed_characters_count = static_cast<int>(current - input);
+        return SignedZero(sign);
+      }
+    }
+  }
+
+  bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0;
+
+  // Copy significant digits of the integer part (if any) to the buffer.
+  while (*current >= '0' && *current <= '9') {
+    if (significant_digits < kMaxSignificantDigits) {
+      ASSERT(buffer_pos < kBufferSize);
+      buffer[buffer_pos++] = static_cast<char>(*current);
+      significant_digits++;
+      // Will later check if it's an octal in the buffer.
+    } else {
+      insignificant_digits++;  // Move the digit into the exponential part.
+      nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+    }
+    octal = octal && *current < '8';
     if (Advance(&current, separator_, 10, end)) goto parsing_done;
-  } 
- 
-  if (significant_digits == 0) { 
-    octal = false; 
-  } 
- 
-  if (*current == '.') { 
-    if (octal && !allow_trailing_junk) return junk_string_value_; 
-    if (octal) goto parsing_done; 
- 
+  }
+
+  if (significant_digits == 0) {
+    octal = false;
+  }
+
+  if (*current == '.') {
+    if (octal && !allow_trailing_junk) return junk_string_value_;
+    if (octal) goto parsing_done;
+
     if (Advance(&current, separator_, 10, end)) {
-      if (significant_digits == 0 && !leading_zero) { 
-        return junk_string_value_; 
-      } else { 
-        goto parsing_done; 
-      } 
-    } 
- 
-    if (significant_digits == 0) { 
-      // octal = false; 
-      // Integer part consists of 0 or is absent. Significant digits start after 
-      // leading zeros (if any). 
-      while (*current == '0') { 
+      if (significant_digits == 0 && !leading_zero) {
+        return junk_string_value_;
+      } else {
+        goto parsing_done;
+      }
+    }
+
+    if (significant_digits == 0) {
+      // octal = false;
+      // Integer part consists of 0 or is absent. Significant digits start after
+      // leading zeros (if any).
+      while (*current == '0') {
         if (Advance(&current, separator_, 10, end)) {
-          *processed_characters_count = static_cast<int>(current - input); 
-          return SignedZero(sign); 
-        } 
-        exponent--;  // Move this 0 into the exponent. 
-      } 
-    } 
- 
-    // There is a fractional part. 
-    // We don't emit a '.', but adjust the exponent instead. 
-    while (*current >= '0' && *current <= '9') { 
-      if (significant_digits < kMaxSignificantDigits) { 
-        ASSERT(buffer_pos < kBufferSize); 
-        buffer[buffer_pos++] = static_cast<char>(*current); 
-        significant_digits++; 
-        exponent--; 
-      } else { 
-        // Ignore insignificant digits in the fractional part. 
-        nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; 
-      } 
+          *processed_characters_count = static_cast<int>(current - input);
+          return SignedZero(sign);
+        }
+        exponent--;  // Move this 0 into the exponent.
+      }
+    }
+
+    // There is a fractional part.
+    // We don't emit a '.', but adjust the exponent instead.
+    while (*current >= '0' && *current <= '9') {
+      if (significant_digits < kMaxSignificantDigits) {
+        ASSERT(buffer_pos < kBufferSize);
+        buffer[buffer_pos++] = static_cast<char>(*current);
+        significant_digits++;
+        exponent--;
+      } else {
+        // Ignore insignificant digits in the fractional part.
+        nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+      }
       if (Advance(&current, separator_, 10, end)) goto parsing_done;
-    } 
-  } 
- 
-  if (!leading_zero && exponent == 0 && significant_digits == 0) { 
-    // If leading_zeros is true then the string contains zeros. 
-    // If exponent < 0 then string was [+-]\.0*... 
-    // If significant_digits != 0 the string is not equal to 0. 
-    // Otherwise there are no digits in the string. 
-    return junk_string_value_; 
-  } 
- 
-  // Parse exponential part. 
-  if (*current == 'e' || *current == 'E') { 
-    if (octal && !allow_trailing_junk) return junk_string_value_; 
-    if (octal) goto parsing_done; 
+    }
+  }
+
+  if (!leading_zero && exponent == 0 && significant_digits == 0) {
+    // If leading_zeros is true then the string contains zeros.
+    // If exponent < 0 then string was [+-]\.0*...
+    // If significant_digits != 0 the string is not equal to 0.
+    // Otherwise there are no digits in the string.
+    return junk_string_value_;
+  }
+
+  // Parse exponential part.
+  if (*current == 'e' || *current == 'E') {
+    if (octal && !allow_trailing_junk) return junk_string_value_;
+    if (octal) goto parsing_done;
     Iterator junk_begin = current;
-    ++current; 
-    if (current == end) { 
-      if (allow_trailing_junk) { 
+    ++current;
+    if (current == end) {
+      if (allow_trailing_junk) {
         current = junk_begin;
-        goto parsing_done; 
-      } else { 
-        return junk_string_value_; 
-      } 
-    } 
-    char exponen_sign = '+'; 
-    if (*current == '+' || *current == '-') { 
-      exponen_sign = static_cast<char>(*current); 
-      ++current; 
-      if (current == end) { 
-        if (allow_trailing_junk) { 
+        goto parsing_done;
+      } else {
+        return junk_string_value_;
+      }
+    }
+    char exponen_sign = '+';
+    if (*current == '+' || *current == '-') {
+      exponen_sign = static_cast<char>(*current);
+      ++current;
+      if (current == end) {
+        if (allow_trailing_junk) {
           current = junk_begin;
-          goto parsing_done; 
-        } else { 
-          return junk_string_value_; 
-        } 
-      } 
-    } 
- 
-    if (current == end || *current < '0' || *current > '9') { 
-      if (allow_trailing_junk) { 
+          goto parsing_done;
+        } else {
+          return junk_string_value_;
+        }
+      }
+    }
+
+    if (current == end || *current < '0' || *current > '9') {
+      if (allow_trailing_junk) {
         current = junk_begin;
-        goto parsing_done; 
-      } else { 
-        return junk_string_value_; 
-      } 
-    } 
- 
-    const int max_exponent = INT_MAX / 2; 
-    ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); 
-    int num = 0; 
-    do { 
-      // Check overflow. 
-      int digit = *current - '0'; 
-      if (num >= max_exponent / 10 
-          && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { 
-        num = max_exponent; 
-      } else { 
-        num = num * 10 + digit; 
-      } 
-      ++current; 
-    } while (current != end && *current >= '0' && *current <= '9'); 
- 
-    exponent += (exponen_sign == '-' ? -num : num); 
-  } 
- 
-  if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { 
-    return junk_string_value_; 
-  } 
-  if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) { 
-    return junk_string_value_; 
-  } 
-  if (allow_trailing_spaces) { 
-    AdvanceToNonspace(&current, end); 
-  } 
- 
-  parsing_done: 
-  exponent += insignificant_digits; 
- 
-  if (octal) { 
-    double result; 
-    bool result_is_junk; 
-    char* start = buffer; 
-    result = RadixStringToIeee<3>(&start, 
-                                  buffer + buffer_pos, 
-                                  sign, 
+        goto parsing_done;
+      } else {
+        return junk_string_value_;
+      }
+    }
+
+    const int max_exponent = INT_MAX / 2;
+    ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
+    int num = 0;
+    do {
+      // Check overflow.
+      int digit = *current - '0';
+      if (num >= max_exponent / 10
+          && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
+        num = max_exponent;
+      } else {
+        num = num * 10 + digit;
+      }
+      ++current;
+    } while (current != end && *current >= '0' && *current <= '9');
+
+    exponent += (exponen_sign == '-' ? -num : num);
+  }
+
+  if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+    return junk_string_value_;
+  }
+  if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+    return junk_string_value_;
+  }
+  if (allow_trailing_spaces) {
+    AdvanceToNonspace(&current, end);
+  }
+
+  parsing_done:
+  exponent += insignificant_digits;
+
+  if (octal) {
+    double result;
+    bool result_is_junk;
+    char* start = buffer;
+    result = RadixStringToIeee<3>(&start,
+                                  buffer + buffer_pos,
+                                  sign,
                                   separator_,
                                   false, // Don't parse as hex_float.
-                                  allow_trailing_junk, 
-                                  junk_string_value_, 
-                                  read_as_double, 
-                                  &result_is_junk); 
-    ASSERT(!result_is_junk); 
-    *processed_characters_count = static_cast<int>(current - input); 
-    return result; 
-  } 
- 
-  if (nonzero_digit_dropped) { 
-    buffer[buffer_pos++] = '1'; 
-    exponent--; 
-  } 
- 
-  ASSERT(buffer_pos < kBufferSize); 
-  buffer[buffer_pos] = '\0'; 
- 
-  double converted; 
-  if (read_as_double) { 
-    converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent); 
-  } else { 
-    converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent); 
-  } 
-  *processed_characters_count = static_cast<int>(current - input); 
-  return sign? -converted: converted; 
-} 
- 
- 
-double StringToDoubleConverter::StringToDouble( 
-    const char* buffer, 
-    int length, 
-    int* processed_characters_count) const { 
-  return StringToIeee(buffer, length, true, processed_characters_count); 
-} 
- 
- 
-double StringToDoubleConverter::StringToDouble( 
-    const uc16* buffer, 
-    int length, 
-    int* processed_characters_count) const { 
-  return StringToIeee(buffer, length, true, processed_characters_count); 
-} 
- 
- 
-float StringToDoubleConverter::StringToFloat( 
-    const char* buffer, 
-    int length, 
-    int* processed_characters_count) const { 
-  return static_cast<float>(StringToIeee(buffer, length, false, 
-                                         processed_characters_count)); 
-} 
- 
- 
-float StringToDoubleConverter::StringToFloat( 
-    const uc16* buffer, 
-    int length, 
-    int* processed_characters_count) const { 
-  return static_cast<float>(StringToIeee(buffer, length, false, 
-                                         processed_characters_count)); 
-} 
- 
-}  // namespace double_conversion 
+                                  allow_trailing_junk,
+                                  junk_string_value_,
+                                  read_as_double,
+                                  &result_is_junk);
+    ASSERT(!result_is_junk);
+    *processed_characters_count = static_cast<int>(current - input);
+    return result;
+  }
+
+  if (nonzero_digit_dropped) {
+    buffer[buffer_pos++] = '1';
+    exponent--;
+  }
+
+  ASSERT(buffer_pos < kBufferSize);
+  buffer[buffer_pos] = '\0';
+
+  double converted;
+  if (read_as_double) {
+    converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
+  } else {
+    converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
+  }
+  *processed_characters_count = static_cast<int>(current - input);
+  return sign? -converted: converted;
+}
+
+
+double StringToDoubleConverter::StringToDouble(
+    const char* buffer,
+    int length,
+    int* processed_characters_count) const {
+  return StringToIeee(buffer, length, true, processed_characters_count);
+}
+
+
+double StringToDoubleConverter::StringToDouble(
+    const uc16* buffer,
+    int length,
+    int* processed_characters_count) const {
+  return StringToIeee(buffer, length, true, processed_characters_count);
+}
+
+
+float StringToDoubleConverter::StringToFloat(
+    const char* buffer,
+    int length,
+    int* processed_characters_count) const {
+  return static_cast<float>(StringToIeee(buffer, length, false,
+                                         processed_characters_count));
+}
+
+
+float StringToDoubleConverter::StringToFloat(
+    const uc16* buffer,
+    int length,
+    int* processed_characters_count) const {
+  return static_cast<float>(StringToIeee(buffer, length, false,
+                                         processed_characters_count));
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/double-conversion.h b/contrib/libs/double-conversion/double-conversion.h
index 0b4cce51b2..6dbc0997c6 100644
--- a/contrib/libs/double-conversion/double-conversion.h
+++ b/contrib/libs/double-conversion/double-conversion.h
@@ -1,435 +1,435 @@
-// Copyright 2012 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ 
-#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-class DoubleToStringConverter { 
- public: 
-  // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint 
-  // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the 
-  // function returns false. 
-  static const int kMaxFixedDigitsBeforePoint = 60; 
-  static const int kMaxFixedDigitsAfterPoint = 60; 
- 
-  // When calling ToExponential with a requested_digits 
-  // parameter > kMaxExponentialDigits then the function returns false. 
-  static const int kMaxExponentialDigits = 120; 
- 
-  // When calling ToPrecision with a requested_digits 
-  // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits 
-  // then the function returns false. 
-  static const int kMinPrecisionDigits = 1; 
-  static const int kMaxPrecisionDigits = 120; 
- 
-  enum Flags { 
-    NO_FLAGS = 0, 
-    EMIT_POSITIVE_EXPONENT_SIGN = 1, 
-    EMIT_TRAILING_DECIMAL_POINT = 2, 
-    EMIT_TRAILING_ZERO_AFTER_POINT = 4, 
-    UNIQUE_ZERO = 8 
-  }; 
- 
-  // Flags should be a bit-or combination of the possible Flags-enum. 
-  //  - NO_FLAGS: no special flags. 
-  //  - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent 
-  //    form, emits a '+' for positive exponents. Example: 1.2e+2. 
-  //  - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is 
-  //    converted into decimal format then a trailing decimal point is appended. 
-  //    Example: 2345.0 is converted to "2345.". 
-  //  - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point 
-  //    emits a trailing '0'-character. This flag requires the 
-  //    EXMIT_TRAILING_DECIMAL_POINT flag. 
-  //    Example: 2345.0 is converted to "2345.0". 
-  //  - UNIQUE_ZERO: "-0.0" is converted to "0.0". 
-  // 
-  // Infinity symbol and nan_symbol provide the string representation for these 
-  // special values. If the string is NULL and the special value is encountered 
-  // then the conversion functions return false. 
-  // 
-  // The exponent_character is used in exponential representations. It is 
-  // usually 'e' or 'E'. 
-  // 
-  // When converting to the shortest representation the converter will 
-  // represent input numbers in decimal format if they are in the interval 
-  // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ 
-  //    (lower boundary included, greater boundary excluded). 
-  // Example: with decimal_in_shortest_low = -6 and 
-  //               decimal_in_shortest_high = 21: 
-  //   ToShortest(0.000001)  -> "0.000001" 
-  //   ToShortest(0.0000001) -> "1e-7" 
-  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000" 
-  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000" 
-  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" 
-  // 
-  // When converting to precision mode the converter may add 
-  // max_leading_padding_zeroes before returning the number in exponential 
-  // format. 
-  // Example with max_leading_padding_zeroes_in_precision_mode = 6. 
-  //   ToPrecision(0.0000012345, 2) -> "0.0000012" 
-  //   ToPrecision(0.00000012345, 2) -> "1.2e-7" 
-  // Similarily the converter may add up to 
-  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid 
-  // returning an exponential representation. A zero added by the 
-  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. 
-  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: 
-  //   ToPrecision(230.0, 2) -> "230" 
-  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT. 
-  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. 
-  DoubleToStringConverter(int flags, 
-                          const char* infinity_symbol, 
-                          const char* nan_symbol, 
-                          char exponent_character, 
-                          int decimal_in_shortest_low, 
-                          int decimal_in_shortest_high, 
-                          int max_leading_padding_zeroes_in_precision_mode, 
-                          int max_trailing_padding_zeroes_in_precision_mode) 
-      : flags_(flags), 
-        infinity_symbol_(infinity_symbol), 
-        nan_symbol_(nan_symbol), 
-        exponent_character_(exponent_character), 
-        decimal_in_shortest_low_(decimal_in_shortest_low), 
-        decimal_in_shortest_high_(decimal_in_shortest_high), 
-        max_leading_padding_zeroes_in_precision_mode_( 
-            max_leading_padding_zeroes_in_precision_mode), 
-        max_trailing_padding_zeroes_in_precision_mode_( 
-            max_trailing_padding_zeroes_in_precision_mode) { 
-    // When 'trailing zero after the point' is set, then 'trailing point' 
-    // must be set too. 
-    ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || 
-        !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); 
-  } 
- 
-  // Returns a converter following the EcmaScript specification. 
-  static const DoubleToStringConverter& EcmaScriptConverter(); 
- 
-  // Computes the shortest string of digits that correctly represent the input 
-  // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high 
-  // (see constructor) it then either returns a decimal representation, or an 
-  // exponential representation. 
-  // Example with decimal_in_shortest_low = -6, 
-  //              decimal_in_shortest_high = 21, 
-  //              EMIT_POSITIVE_EXPONENT_SIGN activated, and 
-  //              EMIT_TRAILING_DECIMAL_POINT deactived: 
-  //   ToShortest(0.000001)  -> "0.000001" 
-  //   ToShortest(0.0000001) -> "1e-7" 
-  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000" 
-  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000" 
-  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" 
-  // 
-  // Note: the conversion may round the output if the returned string 
-  // is accurate enough to uniquely identify the input-number. 
-  // For example the most precise representation of the double 9e59 equals 
-  // "899999999999999918767229449717619953810131273674690656206848", but 
-  // the converter will return the shorter (but still correct) "9e59". 
-  // 
-  // Returns true if the conversion succeeds. The conversion always succeeds 
-  // except when the input value is special and no infinity_symbol or 
-  // nan_symbol has been given to the constructor. 
-  bool ToShortest(double value, StringBuilder* result_builder) const { 
-    return ToShortestIeeeNumber(value, result_builder, SHORTEST); 
-  } 
- 
-  // Same as ToShortest, but for single-precision floats. 
-  bool ToShortestSingle(float value, StringBuilder* result_builder) const { 
-    return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); 
-  } 
- 
- 
-  // Computes a decimal representation with a fixed number of digits after the 
-  // decimal point. The last emitted digit is rounded. 
-  // 
-  // Examples: 
-  //   ToFixed(3.12, 1) -> "3.1" 
-  //   ToFixed(3.1415, 3) -> "3.142" 
-  //   ToFixed(1234.56789, 4) -> "1234.5679" 
-  //   ToFixed(1.23, 5) -> "1.23000" 
-  //   ToFixed(0.1, 4) -> "0.1000" 
-  //   ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" 
-  //   ToFixed(0.1, 30) -> "0.100000000000000005551115123126" 
-  //   ToFixed(0.1, 17) -> "0.10000000000000001" 
-  // 
-  // If requested_digits equals 0, then the tail of the result depends on 
-  // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. 
-  // Examples, for requested_digits == 0, 
-  //   let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be 
-  //    - false and false: then 123.45 -> 123 
-  //                             0.678 -> 1 
-  //    - true and false: then 123.45 -> 123. 
-  //                            0.678 -> 1. 
-  //    - true and true: then 123.45 -> 123.0 
-  //                           0.678 -> 1.0 
-  // 
-  // Returns true if the conversion succeeds. The conversion always succeeds 
-  // except for the following cases: 
-  //   - the input value is special and no infinity_symbol or nan_symbol has 
-  //     been provided to the constructor, 
-  //   - 'value' > 10^kMaxFixedDigitsBeforePoint, or 
-  //   - 'requested_digits' > kMaxFixedDigitsAfterPoint. 
-  // The last two conditions imply that the result will never contain more than 
-  // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters 
-  // (one additional character for the sign, and one for the decimal point). 
-  bool ToFixed(double value, 
-               int requested_digits, 
-               StringBuilder* result_builder) const; 
- 
-  // Computes a representation in exponential format with requested_digits 
-  // after the decimal point. The last emitted digit is rounded. 
-  // If requested_digits equals -1, then the shortest exponential representation 
-  // is computed. 
-  // 
-  // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and 
-  //               exponent_character set to 'e'. 
-  //   ToExponential(3.12, 1) -> "3.1e0" 
-  //   ToExponential(5.0, 3) -> "5.000e0" 
-  //   ToExponential(0.001, 2) -> "1.00e-3" 
-  //   ToExponential(3.1415, -1) -> "3.1415e0" 
-  //   ToExponential(3.1415, 4) -> "3.1415e0" 
-  //   ToExponential(3.1415, 3) -> "3.142e0" 
-  //   ToExponential(123456789000000, 3) -> "1.235e14" 
-  //   ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" 
-  //   ToExponential(1000000000000000019884624838656.0, 32) -> 
-  //                     "1.00000000000000001988462483865600e30" 
-  //   ToExponential(1234, 0) -> "1e3" 
-  // 
-  // Returns true if the conversion succeeds. The conversion always succeeds 
-  // except for the following cases: 
-  //   - the input value is special and no infinity_symbol or nan_symbol has 
-  //     been provided to the constructor, 
-  //   - 'requested_digits' > kMaxExponentialDigits. 
-  // The last condition implies that the result will never contain more than 
-  // kMaxExponentialDigits + 8 characters (the sign, the digit before the 
-  // decimal point, the decimal point, the exponent character, the 
-  // exponent's sign, and at most 3 exponent digits). 
-  bool ToExponential(double value, 
-                     int requested_digits, 
-                     StringBuilder* result_builder) const; 
- 
-  // Computes 'precision' leading digits of the given 'value' and returns them 
-  // either in exponential or decimal format, depending on 
-  // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the 
-  // constructor). 
-  // The last computed digit is rounded. 
-  // 
-  // Example with max_leading_padding_zeroes_in_precision_mode = 6. 
-  //   ToPrecision(0.0000012345, 2) -> "0.0000012" 
-  //   ToPrecision(0.00000012345, 2) -> "1.2e-7" 
-  // Similarily the converter may add up to 
-  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid 
-  // returning an exponential representation. A zero added by the 
-  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. 
-  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: 
-  //   ToPrecision(230.0, 2) -> "230" 
-  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT. 
-  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. 
-  // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no 
-  //    EMIT_TRAILING_ZERO_AFTER_POINT: 
-  //   ToPrecision(123450.0, 6) -> "123450" 
-  //   ToPrecision(123450.0, 5) -> "123450" 
-  //   ToPrecision(123450.0, 4) -> "123500" 
-  //   ToPrecision(123450.0, 3) -> "123000" 
-  //   ToPrecision(123450.0, 2) -> "1.2e5" 
-  // 
-  // Returns true if the conversion succeeds. The conversion always succeeds 
-  // except for the following cases: 
-  //   - the input value is special and no infinity_symbol or nan_symbol has 
-  //     been provided to the constructor, 
-  //   - precision < kMinPericisionDigits 
-  //   - precision > kMaxPrecisionDigits 
-  // The last condition implies that the result will never contain more than 
-  // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the 
-  // exponent character, the exponent's sign, and at most 3 exponent digits). 
-  bool ToPrecision(double value, 
-                   int precision, 
-                   StringBuilder* result_builder) const; 
- 
-  enum DtoaMode { 
-    // Produce the shortest correct representation. 
-    // For example the output of 0.299999999999999988897 is (the less accurate 
-    // but correct) 0.3. 
-    SHORTEST, 
-    // Same as SHORTEST, but for single-precision floats. 
-    SHORTEST_SINGLE, 
-    // Produce a fixed number of digits after the decimal point. 
-    // For instance fixed(0.1, 4) becomes 0.1000 
-    // If the input number is big, the output will be big. 
-    FIXED, 
-    // Fixed number of digits (independent of the decimal point). 
-    PRECISION 
-  }; 
- 
-  // The maximal number of digits that are needed to emit a double in base 10. 
-  // A higher precision can be achieved by using more digits, but the shortest 
-  // accurate representation of any double will never use more digits than 
-  // kBase10MaximalLength. 
-  // Note that DoubleToAscii null-terminates its input. So the given buffer 
-  // should be at least kBase10MaximalLength + 1 characters long. 
-  static const int kBase10MaximalLength = 17; 
- 
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
+#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+class DoubleToStringConverter {
+ public:
+  // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint
+  // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the
+  // function returns false.
+  static const int kMaxFixedDigitsBeforePoint = 60;
+  static const int kMaxFixedDigitsAfterPoint = 60;
+
+  // When calling ToExponential with a requested_digits
+  // parameter > kMaxExponentialDigits then the function returns false.
+  static const int kMaxExponentialDigits = 120;
+
+  // When calling ToPrecision with a requested_digits
+  // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits
+  // then the function returns false.
+  static const int kMinPrecisionDigits = 1;
+  static const int kMaxPrecisionDigits = 120;
+
+  enum Flags {
+    NO_FLAGS = 0,
+    EMIT_POSITIVE_EXPONENT_SIGN = 1,
+    EMIT_TRAILING_DECIMAL_POINT = 2,
+    EMIT_TRAILING_ZERO_AFTER_POINT = 4,
+    UNIQUE_ZERO = 8
+  };
+
+  // Flags should be a bit-or combination of the possible Flags-enum.
+  //  - NO_FLAGS: no special flags.
+  //  - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent
+  //    form, emits a '+' for positive exponents. Example: 1.2e+2.
+  //  - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is
+  //    converted into decimal format then a trailing decimal point is appended.
+  //    Example: 2345.0 is converted to "2345.".
+  //  - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point
+  //    emits a trailing '0'-character. This flag requires the
+  //    EXMIT_TRAILING_DECIMAL_POINT flag.
+  //    Example: 2345.0 is converted to "2345.0".
+  //  - UNIQUE_ZERO: "-0.0" is converted to "0.0".
+  //
+  // Infinity symbol and nan_symbol provide the string representation for these
+  // special values. If the string is NULL and the special value is encountered
+  // then the conversion functions return false.
+  //
+  // The exponent_character is used in exponential representations. It is
+  // usually 'e' or 'E'.
+  //
+  // When converting to the shortest representation the converter will
+  // represent input numbers in decimal format if they are in the interval
+  // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[
+  //    (lower boundary included, greater boundary excluded).
+  // Example: with decimal_in_shortest_low = -6 and
+  //               decimal_in_shortest_high = 21:
+  //   ToShortest(0.000001)  -> "0.000001"
+  //   ToShortest(0.0000001) -> "1e-7"
+  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000"
+  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000"
+  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
+  //
+  // When converting to precision mode the converter may add
+  // max_leading_padding_zeroes before returning the number in exponential
+  // format.
+  // Example with max_leading_padding_zeroes_in_precision_mode = 6.
+  //   ToPrecision(0.0000012345, 2) -> "0.0000012"
+  //   ToPrecision(0.00000012345, 2) -> "1.2e-7"
+  // Similarily the converter may add up to
+  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
+  // returning an exponential representation. A zero added by the
+  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
+  //   ToPrecision(230.0, 2) -> "230"
+  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
+  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  DoubleToStringConverter(int flags,
+                          const char* infinity_symbol,
+                          const char* nan_symbol,
+                          char exponent_character,
+                          int decimal_in_shortest_low,
+                          int decimal_in_shortest_high,
+                          int max_leading_padding_zeroes_in_precision_mode,
+                          int max_trailing_padding_zeroes_in_precision_mode)
+      : flags_(flags),
+        infinity_symbol_(infinity_symbol),
+        nan_symbol_(nan_symbol),
+        exponent_character_(exponent_character),
+        decimal_in_shortest_low_(decimal_in_shortest_low),
+        decimal_in_shortest_high_(decimal_in_shortest_high),
+        max_leading_padding_zeroes_in_precision_mode_(
+            max_leading_padding_zeroes_in_precision_mode),
+        max_trailing_padding_zeroes_in_precision_mode_(
+            max_trailing_padding_zeroes_in_precision_mode) {
+    // When 'trailing zero after the point' is set, then 'trailing point'
+    // must be set too.
+    ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) ||
+        !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0));
+  }
+
+  // Returns a converter following the EcmaScript specification.
+  static const DoubleToStringConverter& EcmaScriptConverter();
+
+  // Computes the shortest string of digits that correctly represent the input
+  // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high
+  // (see constructor) it then either returns a decimal representation, or an
+  // exponential representation.
+  // Example with decimal_in_shortest_low = -6,
+  //              decimal_in_shortest_high = 21,
+  //              EMIT_POSITIVE_EXPONENT_SIGN activated, and
+  //              EMIT_TRAILING_DECIMAL_POINT deactived:
+  //   ToShortest(0.000001)  -> "0.000001"
+  //   ToShortest(0.0000001) -> "1e-7"
+  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000"
+  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000"
+  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
+  //
+  // Note: the conversion may round the output if the returned string
+  // is accurate enough to uniquely identify the input-number.
+  // For example the most precise representation of the double 9e59 equals
+  // "899999999999999918767229449717619953810131273674690656206848", but
+  // the converter will return the shorter (but still correct) "9e59".
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except when the input value is special and no infinity_symbol or
+  // nan_symbol has been given to the constructor.
+  bool ToShortest(double value, StringBuilder* result_builder) const {
+    return ToShortestIeeeNumber(value, result_builder, SHORTEST);
+  }
+
+  // Same as ToShortest, but for single-precision floats.
+  bool ToShortestSingle(float value, StringBuilder* result_builder) const {
+    return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE);
+  }
+
+
+  // Computes a decimal representation with a fixed number of digits after the
+  // decimal point. The last emitted digit is rounded.
+  //
+  // Examples:
+  //   ToFixed(3.12, 1) -> "3.1"
+  //   ToFixed(3.1415, 3) -> "3.142"
+  //   ToFixed(1234.56789, 4) -> "1234.5679"
+  //   ToFixed(1.23, 5) -> "1.23000"
+  //   ToFixed(0.1, 4) -> "0.1000"
+  //   ToFixed(1e30, 2) -> "1000000000000000019884624838656.00"
+  //   ToFixed(0.1, 30) -> "0.100000000000000005551115123126"
+  //   ToFixed(0.1, 17) -> "0.10000000000000001"
+  //
+  // If requested_digits equals 0, then the tail of the result depends on
+  // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT.
+  // Examples, for requested_digits == 0,
+  //   let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be
+  //    - false and false: then 123.45 -> 123
+  //                             0.678 -> 1
+  //    - true and false: then 123.45 -> 123.
+  //                            0.678 -> 1.
+  //    - true and true: then 123.45 -> 123.0
+  //                           0.678 -> 1.0
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - 'value' > 10^kMaxFixedDigitsBeforePoint, or
+  //   - 'requested_digits' > kMaxFixedDigitsAfterPoint.
+  // The last two conditions imply that the result will never contain more than
+  // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters
+  // (one additional character for the sign, and one for the decimal point).
+  bool ToFixed(double value,
+               int requested_digits,
+               StringBuilder* result_builder) const;
+
+  // Computes a representation in exponential format with requested_digits
+  // after the decimal point. The last emitted digit is rounded.
+  // If requested_digits equals -1, then the shortest exponential representation
+  // is computed.
+  //
+  // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and
+  //               exponent_character set to 'e'.
+  //   ToExponential(3.12, 1) -> "3.1e0"
+  //   ToExponential(5.0, 3) -> "5.000e0"
+  //   ToExponential(0.001, 2) -> "1.00e-3"
+  //   ToExponential(3.1415, -1) -> "3.1415e0"
+  //   ToExponential(3.1415, 4) -> "3.1415e0"
+  //   ToExponential(3.1415, 3) -> "3.142e0"
+  //   ToExponential(123456789000000, 3) -> "1.235e14"
+  //   ToExponential(1000000000000000019884624838656.0, -1) -> "1e30"
+  //   ToExponential(1000000000000000019884624838656.0, 32) ->
+  //                     "1.00000000000000001988462483865600e30"
+  //   ToExponential(1234, 0) -> "1e3"
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - 'requested_digits' > kMaxExponentialDigits.
+  // The last condition implies that the result will never contain more than
+  // kMaxExponentialDigits + 8 characters (the sign, the digit before the
+  // decimal point, the decimal point, the exponent character, the
+  // exponent's sign, and at most 3 exponent digits).
+  bool ToExponential(double value,
+                     int requested_digits,
+                     StringBuilder* result_builder) const;
+
+  // Computes 'precision' leading digits of the given 'value' and returns them
+  // either in exponential or decimal format, depending on
+  // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the
+  // constructor).
+  // The last computed digit is rounded.
+  //
+  // Example with max_leading_padding_zeroes_in_precision_mode = 6.
+  //   ToPrecision(0.0000012345, 2) -> "0.0000012"
+  //   ToPrecision(0.00000012345, 2) -> "1.2e-7"
+  // Similarily the converter may add up to
+  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
+  // returning an exponential representation. A zero added by the
+  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
+  //   ToPrecision(230.0, 2) -> "230"
+  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
+  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no
+  //    EMIT_TRAILING_ZERO_AFTER_POINT:
+  //   ToPrecision(123450.0, 6) -> "123450"
+  //   ToPrecision(123450.0, 5) -> "123450"
+  //   ToPrecision(123450.0, 4) -> "123500"
+  //   ToPrecision(123450.0, 3) -> "123000"
+  //   ToPrecision(123450.0, 2) -> "1.2e5"
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - precision < kMinPericisionDigits
+  //   - precision > kMaxPrecisionDigits
+  // The last condition implies that the result will never contain more than
+  // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the
+  // exponent character, the exponent's sign, and at most 3 exponent digits).
+  bool ToPrecision(double value,
+                   int precision,
+                   StringBuilder* result_builder) const;
+
+  enum DtoaMode {
+    // Produce the shortest correct representation.
+    // For example the output of 0.299999999999999988897 is (the less accurate
+    // but correct) 0.3.
+    SHORTEST,
+    // Same as SHORTEST, but for single-precision floats.
+    SHORTEST_SINGLE,
+    // Produce a fixed number of digits after the decimal point.
+    // For instance fixed(0.1, 4) becomes 0.1000
+    // If the input number is big, the output will be big.
+    FIXED,
+    // Fixed number of digits (independent of the decimal point).
+    PRECISION
+  };
+
+  // The maximal number of digits that are needed to emit a double in base 10.
+  // A higher precision can be achieved by using more digits, but the shortest
+  // accurate representation of any double will never use more digits than
+  // kBase10MaximalLength.
+  // Note that DoubleToAscii null-terminates its input. So the given buffer
+  // should be at least kBase10MaximalLength + 1 characters long.
+  static const int kBase10MaximalLength = 17;
+
   // Converts the given double 'v' to digit characters. 'v' must not be NaN,
   // +Infinity, or -Infinity. In SHORTEST_SINGLE-mode this restriction also
   // applies to 'v' after it has been casted to a single-precision float. That
   // is, in this mode static_cast<float>(v) must not be NaN, +Infinity or
   // -Infinity.
-  // 
-  // The result should be interpreted as buffer * 10^(point-length). 
-  // 
+  //
+  // The result should be interpreted as buffer * 10^(point-length).
+  //
   // The digits are written to the buffer in the platform's charset, which is
   // often UTF-8 (with ASCII-range digits) but may be another charset, such
   // as EBCDIC.
   //
-  // The output depends on the given mode: 
-  //  - SHORTEST: produce the least amount of digits for which the internal 
-  //   identity requirement is still satisfied. If the digits are printed 
-  //   (together with the correct exponent) then reading this number will give 
-  //   'v' again. The buffer will choose the representation that is closest to 
-  //   'v'. If there are two at the same distance, than the one farther away 
-  //   from 0 is chosen (halfway cases - ending with 5 - are rounded up). 
-  //   In this mode the 'requested_digits' parameter is ignored. 
-  //  - SHORTEST_SINGLE: same as SHORTEST but with single-precision. 
-  //  - FIXED: produces digits necessary to print a given number with 
-  //   'requested_digits' digits after the decimal point. The produced digits 
-  //   might be too short in which case the caller has to fill the remainder 
-  //   with '0's. 
-  //   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. 
-  //   Halfway cases are rounded towards +/-Infinity (away from 0). The call 
-  //   toFixed(0.15, 2) thus returns buffer="2", point=0. 
-  //   The returned buffer may contain digits that would be truncated from the 
-  //   shortest representation of the input. 
-  //  - PRECISION: produces 'requested_digits' where the first digit is not '0'. 
-  //   Even though the length of produced digits usually equals 
-  //   'requested_digits', the function is allowed to return fewer digits, in 
-  //   which case the caller has to fill the missing digits with '0's. 
-  //   Halfway cases are again rounded away from 0. 
-  // DoubleToAscii expects the given buffer to be big enough to hold all 
-  // digits and a terminating null-character. In SHORTEST-mode it expects a 
-  // buffer of at least kBase10MaximalLength + 1. In all other modes the 
-  // requested_digits parameter and the padding-zeroes limit the size of the 
-  // output. Don't forget the decimal point, the exponent character and the 
-  // terminating null-character when computing the maximal output size. 
-  // The given length is only used in debug mode to ensure the buffer is big 
-  // enough. 
-  static void DoubleToAscii(double v, 
-                            DtoaMode mode, 
-                            int requested_digits, 
-                            char* buffer, 
-                            int buffer_length, 
-                            bool* sign, 
-                            int* length, 
-                            int* point); 
- 
- private: 
-  // Implementation for ToShortest and ToShortestSingle. 
-  bool ToShortestIeeeNumber(double value, 
-                            StringBuilder* result_builder, 
-                            DtoaMode mode) const; 
- 
-  // If the value is a special value (NaN or Infinity) constructs the 
-  // corresponding string using the configured infinity/nan-symbol. 
-  // If either of them is NULL or the value is not special then the 
-  // function returns false. 
-  bool HandleSpecialValues(double value, StringBuilder* result_builder) const; 
-  // Constructs an exponential representation (i.e. 1.234e56). 
-  // The given exponent assumes a decimal point after the first decimal digit. 
-  void CreateExponentialRepresentation(const char* decimal_digits, 
-                                       int length, 
-                                       int exponent, 
-                                       StringBuilder* result_builder) const; 
-  // Creates a decimal representation (i.e 1234.5678). 
-  void CreateDecimalRepresentation(const char* decimal_digits, 
-                                   int length, 
-                                   int decimal_point, 
-                                   int digits_after_point, 
-                                   StringBuilder* result_builder) const; 
- 
-  const int flags_; 
-  const char* const infinity_symbol_; 
-  const char* const nan_symbol_; 
-  const char exponent_character_; 
-  const int decimal_in_shortest_low_; 
-  const int decimal_in_shortest_high_; 
-  const int max_leading_padding_zeroes_in_precision_mode_; 
-  const int max_trailing_padding_zeroes_in_precision_mode_; 
- 
+  // The output depends on the given mode:
+  //  - SHORTEST: produce the least amount of digits for which the internal
+  //   identity requirement is still satisfied. If the digits are printed
+  //   (together with the correct exponent) then reading this number will give
+  //   'v' again. The buffer will choose the representation that is closest to
+  //   'v'. If there are two at the same distance, than the one farther away
+  //   from 0 is chosen (halfway cases - ending with 5 - are rounded up).
+  //   In this mode the 'requested_digits' parameter is ignored.
+  //  - SHORTEST_SINGLE: same as SHORTEST but with single-precision.
+  //  - FIXED: produces digits necessary to print a given number with
+  //   'requested_digits' digits after the decimal point. The produced digits
+  //   might be too short in which case the caller has to fill the remainder
+  //   with '0's.
+  //   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
+  //   Halfway cases are rounded towards +/-Infinity (away from 0). The call
+  //   toFixed(0.15, 2) thus returns buffer="2", point=0.
+  //   The returned buffer may contain digits that would be truncated from the
+  //   shortest representation of the input.
+  //  - PRECISION: produces 'requested_digits' where the first digit is not '0'.
+  //   Even though the length of produced digits usually equals
+  //   'requested_digits', the function is allowed to return fewer digits, in
+  //   which case the caller has to fill the missing digits with '0's.
+  //   Halfway cases are again rounded away from 0.
+  // DoubleToAscii expects the given buffer to be big enough to hold all
+  // digits and a terminating null-character. In SHORTEST-mode it expects a
+  // buffer of at least kBase10MaximalLength + 1. In all other modes the
+  // requested_digits parameter and the padding-zeroes limit the size of the
+  // output. Don't forget the decimal point, the exponent character and the
+  // terminating null-character when computing the maximal output size.
+  // The given length is only used in debug mode to ensure the buffer is big
+  // enough.
+  static void DoubleToAscii(double v,
+                            DtoaMode mode,
+                            int requested_digits,
+                            char* buffer,
+                            int buffer_length,
+                            bool* sign,
+                            int* length,
+                            int* point);
+
+ private:
+  // Implementation for ToShortest and ToShortestSingle.
+  bool ToShortestIeeeNumber(double value,
+                            StringBuilder* result_builder,
+                            DtoaMode mode) const;
+
+  // If the value is a special value (NaN or Infinity) constructs the
+  // corresponding string using the configured infinity/nan-symbol.
+  // If either of them is NULL or the value is not special then the
+  // function returns false.
+  bool HandleSpecialValues(double value, StringBuilder* result_builder) const;
+  // Constructs an exponential representation (i.e. 1.234e56).
+  // The given exponent assumes a decimal point after the first decimal digit.
+  void CreateExponentialRepresentation(const char* decimal_digits,
+                                       int length,
+                                       int exponent,
+                                       StringBuilder* result_builder) const;
+  // Creates a decimal representation (i.e 1234.5678).
+  void CreateDecimalRepresentation(const char* decimal_digits,
+                                   int length,
+                                   int decimal_point,
+                                   int digits_after_point,
+                                   StringBuilder* result_builder) const;
+
+  const int flags_;
+  const char* const infinity_symbol_;
+  const char* const nan_symbol_;
+  const char exponent_character_;
+  const int decimal_in_shortest_low_;
+  const int decimal_in_shortest_high_;
+  const int max_leading_padding_zeroes_in_precision_mode_;
+  const int max_trailing_padding_zeroes_in_precision_mode_;
+
   DC_DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter);
-}; 
- 
- 
-class StringToDoubleConverter { 
- public: 
-  // Enumeration for allowing octals and ignoring junk when converting 
-  // strings to numbers. 
-  enum Flags { 
-    NO_FLAGS = 0, 
-    ALLOW_HEX = 1, 
-    ALLOW_OCTALS = 2, 
-    ALLOW_TRAILING_JUNK = 4, 
-    ALLOW_LEADING_SPACES = 8, 
-    ALLOW_TRAILING_SPACES = 16, 
+};
+
+
+class StringToDoubleConverter {
+ public:
+  // Enumeration for allowing octals and ignoring junk when converting
+  // strings to numbers.
+  enum Flags {
+    NO_FLAGS = 0,
+    ALLOW_HEX = 1,
+    ALLOW_OCTALS = 2,
+    ALLOW_TRAILING_JUNK = 4,
+    ALLOW_LEADING_SPACES = 8,
+    ALLOW_TRAILING_SPACES = 16,
     ALLOW_SPACES_AFTER_SIGN = 32,
     ALLOW_CASE_INSENSIBILITY = 64,
     ALLOW_HEX_FLOATS = 128,
-  }; 
- 
+  };
+
   static const uc16 kNoSeparator = '\0';
 
-  // Flags should be a bit-or combination of the possible Flags-enum. 
-  //  - NO_FLAGS: no special flags. 
-  //  - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. 
-  //      Ex: StringToDouble("0x1234") -> 4660.0 
-  //          In StringToDouble("0x1234.56") the characters ".56" are trailing 
-  //          junk. The result of the call is hence dependent on 
-  //          the ALLOW_TRAILING_JUNK flag and/or the junk value. 
-  //      With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, 
-  //      the string will not be parsed as "0" followed by junk. 
-  // 
-  //  - ALLOW_OCTALS: recognizes the prefix "0" for octals: 
-  //      If a sequence of octal digits starts with '0', then the number is 
-  //      read as octal integer. Octal numbers may only be integers. 
-  //      Ex: StringToDouble("01234") -> 668.0 
-  //          StringToDouble("012349") -> 12349.0  // Not a sequence of octal 
-  //                                               // digits. 
-  //          In StringToDouble("01234.56") the characters ".56" are trailing 
-  //          junk. The result of the call is hence dependent on 
-  //          the ALLOW_TRAILING_JUNK flag and/or the junk value. 
-  //          In StringToDouble("01234e56") the characters "e56" are trailing 
-  //          junk, too. 
-  //  - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of 
-  //      a double literal. 
-  //  - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces, 
-  //                          new-lines, and tabs. 
-  //  - ALLOW_TRAILING_SPACES: ignore trailing whitespace. 
-  //  - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign. 
-  //       Ex: StringToDouble("-   123.2") -> -123.2. 
-  //           StringToDouble("+   123.2") -> 123.2 
+  // Flags should be a bit-or combination of the possible Flags-enum.
+  //  - NO_FLAGS: no special flags.
+  //  - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers.
+  //      Ex: StringToDouble("0x1234") -> 4660.0
+  //          In StringToDouble("0x1234.56") the characters ".56" are trailing
+  //          junk. The result of the call is hence dependent on
+  //          the ALLOW_TRAILING_JUNK flag and/or the junk value.
+  //      With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK,
+  //      the string will not be parsed as "0" followed by junk.
+  //
+  //  - ALLOW_OCTALS: recognizes the prefix "0" for octals:
+  //      If a sequence of octal digits starts with '0', then the number is
+  //      read as octal integer. Octal numbers may only be integers.
+  //      Ex: StringToDouble("01234") -> 668.0
+  //          StringToDouble("012349") -> 12349.0  // Not a sequence of octal
+  //                                               // digits.
+  //          In StringToDouble("01234.56") the characters ".56" are trailing
+  //          junk. The result of the call is hence dependent on
+  //          the ALLOW_TRAILING_JUNK flag and/or the junk value.
+  //          In StringToDouble("01234e56") the characters "e56" are trailing
+  //          junk, too.
+  //  - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
+  //      a double literal.
+  //  - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces,
+  //                          new-lines, and tabs.
+  //  - ALLOW_TRAILING_SPACES: ignore trailing whitespace.
+  //  - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign.
+  //       Ex: StringToDouble("-   123.2") -> -123.2.
+  //           StringToDouble("+   123.2") -> 123.2
   //  - ALLOW_CASE_INSENSIBILITY: ignore case of characters for special values:
   //      infinity and nan.
   //  - ALLOW_HEX_FLOATS: allows hexadecimal float literals.
@@ -437,75 +437,75 @@ class StringToDoubleConverter {
   //      Examples: 0x1.2p3 == 9.0
   //                0x10.1p0 == 16.0625
   //      ALLOW_HEX and ALLOW_HEX_FLOATS are indendent.
-  // 
-  // empty_string_value is returned when an empty string is given as input. 
-  // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string 
-  // containing only spaces is converted to the 'empty_string_value', too. 
-  // 
-  // junk_string_value is returned when 
-  //  a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not 
-  //     part of a double-literal) is found. 
-  //  b) ALLOW_TRAILING_JUNK is set, but the string does not start with a 
-  //     double literal. 
-  // 
-  // infinity_symbol and nan_symbol are strings that are used to detect 
-  // inputs that represent infinity and NaN. They can be null, in which case 
-  // they are ignored. 
-  // The conversion routine first reads any possible signs. Then it compares the 
-  // following character of the input-string with the first character of 
-  // the infinity, and nan-symbol. If either matches, the function assumes, that 
-  // a match has been found, and expects the following input characters to match 
-  // the remaining characters of the special-value symbol. 
-  // This means that the following restrictions apply to special-value symbols: 
-  //  - they must not start with signs ('+', or '-'), 
-  //  - they must not have the same first character. 
-  //  - they must not start with digits. 
-  // 
+  //
+  // empty_string_value is returned when an empty string is given as input.
+  // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string
+  // containing only spaces is converted to the 'empty_string_value', too.
+  //
+  // junk_string_value is returned when
+  //  a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not
+  //     part of a double-literal) is found.
+  //  b) ALLOW_TRAILING_JUNK is set, but the string does not start with a
+  //     double literal.
+  //
+  // infinity_symbol and nan_symbol are strings that are used to detect
+  // inputs that represent infinity and NaN. They can be null, in which case
+  // they are ignored.
+  // The conversion routine first reads any possible signs. Then it compares the
+  // following character of the input-string with the first character of
+  // the infinity, and nan-symbol. If either matches, the function assumes, that
+  // a match has been found, and expects the following input characters to match
+  // the remaining characters of the special-value symbol.
+  // This means that the following restrictions apply to special-value symbols:
+  //  - they must not start with signs ('+', or '-'),
+  //  - they must not have the same first character.
+  //  - they must not start with digits.
+  //
   // If the separator character is not kNoSeparator, then that specific
   // character is ignored when in between two valid digits of the significant.
   // It is not allowed to appear in the exponent.
   // It is not allowed to lead or trail the number.
   // It is not allowed to appear twice next to each other.
   //
-  // Examples: 
-  //  flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, 
-  //  empty_string_value = 0.0, 
-  //  junk_string_value = NaN, 
-  //  infinity_symbol = "infinity", 
-  //  nan_symbol = "nan": 
-  //    StringToDouble("0x1234") -> 4660.0. 
-  //    StringToDouble("0x1234K") -> 4660.0. 
-  //    StringToDouble("") -> 0.0  // empty_string_value. 
-  //    StringToDouble(" ") -> NaN  // junk_string_value. 
-  //    StringToDouble(" 1") -> NaN  // junk_string_value. 
-  //    StringToDouble("0x") -> NaN  // junk_string_value. 
-  //    StringToDouble("-123.45") -> -123.45. 
-  //    StringToDouble("--123.45") -> NaN  // junk_string_value. 
-  //    StringToDouble("123e45") -> 123e45. 
-  //    StringToDouble("123E45") -> 123e45. 
-  //    StringToDouble("123e+45") -> 123e45. 
-  //    StringToDouble("123E-45") -> 123e-45. 
-  //    StringToDouble("123e") -> 123.0  // trailing junk ignored. 
-  //    StringToDouble("123e-") -> 123.0  // trailing junk ignored. 
-  //    StringToDouble("+NaN") -> NaN  // NaN string literal. 
-  //    StringToDouble("-infinity") -> -inf.  // infinity literal. 
-  //    StringToDouble("Infinity") -> NaN  // junk_string_value. 
-  // 
-  //  flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, 
-  //  empty_string_value = 0.0, 
-  //  junk_string_value = NaN, 
-  //  infinity_symbol = NULL, 
-  //  nan_symbol = NULL: 
-  //    StringToDouble("0x1234") -> NaN  // junk_string_value. 
-  //    StringToDouble("01234") -> 668.0. 
-  //    StringToDouble("") -> 0.0  // empty_string_value. 
-  //    StringToDouble(" ") -> 0.0  // empty_string_value. 
-  //    StringToDouble(" 1") -> 1.0 
-  //    StringToDouble("0x") -> NaN  // junk_string_value. 
-  //    StringToDouble("0123e45") -> NaN  // junk_string_value. 
-  //    StringToDouble("01239E45") -> 1239e45. 
-  //    StringToDouble("-infinity") -> NaN  // junk_string_value. 
-  //    StringToDouble("NaN") -> NaN  // junk_string_value. 
+  // Examples:
+  //  flags = ALLOW_HEX | ALLOW_TRAILING_JUNK,
+  //  empty_string_value = 0.0,
+  //  junk_string_value = NaN,
+  //  infinity_symbol = "infinity",
+  //  nan_symbol = "nan":
+  //    StringToDouble("0x1234") -> 4660.0.
+  //    StringToDouble("0x1234K") -> 4660.0.
+  //    StringToDouble("") -> 0.0  // empty_string_value.
+  //    StringToDouble(" ") -> NaN  // junk_string_value.
+  //    StringToDouble(" 1") -> NaN  // junk_string_value.
+  //    StringToDouble("0x") -> NaN  // junk_string_value.
+  //    StringToDouble("-123.45") -> -123.45.
+  //    StringToDouble("--123.45") -> NaN  // junk_string_value.
+  //    StringToDouble("123e45") -> 123e45.
+  //    StringToDouble("123E45") -> 123e45.
+  //    StringToDouble("123e+45") -> 123e45.
+  //    StringToDouble("123E-45") -> 123e-45.
+  //    StringToDouble("123e") -> 123.0  // trailing junk ignored.
+  //    StringToDouble("123e-") -> 123.0  // trailing junk ignored.
+  //    StringToDouble("+NaN") -> NaN  // NaN string literal.
+  //    StringToDouble("-infinity") -> -inf.  // infinity literal.
+  //    StringToDouble("Infinity") -> NaN  // junk_string_value.
+  //
+  //  flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES,
+  //  empty_string_value = 0.0,
+  //  junk_string_value = NaN,
+  //  infinity_symbol = NULL,
+  //  nan_symbol = NULL:
+  //    StringToDouble("0x1234") -> NaN  // junk_string_value.
+  //    StringToDouble("01234") -> 668.0.
+  //    StringToDouble("") -> 0.0  // empty_string_value.
+  //    StringToDouble(" ") -> 0.0  // empty_string_value.
+  //    StringToDouble(" 1") -> 1.0
+  //    StringToDouble("0x") -> NaN  // junk_string_value.
+  //    StringToDouble("0123e45") -> NaN  // junk_string_value.
+  //    StringToDouble("01239E45") -> 1239e45.
+  //    StringToDouble("-infinity") -> NaN  // junk_string_value.
+  //    StringToDouble("NaN") -> NaN  // junk_string_value.
   //
   //  flags = NO_FLAGS,
   //  separator = ' ':
@@ -514,63 +514,63 @@ class StringToDoubleConverter {
   //    StringToDouble("1 000 000.0") -> 1000000.0
   //    StringToDouble("1.000 000") -> 1.0
   //    StringToDouble("1.0e1 000") -> NaN // junk_string_value
-  StringToDoubleConverter(int flags, 
-                          double empty_string_value, 
-                          double junk_string_value, 
-                          const char* infinity_symbol, 
+  StringToDoubleConverter(int flags,
+                          double empty_string_value,
+                          double junk_string_value,
+                          const char* infinity_symbol,
                           const char* nan_symbol,
                           uc16 separator = kNoSeparator)
-      : flags_(flags), 
-        empty_string_value_(empty_string_value), 
-        junk_string_value_(junk_string_value), 
-        infinity_symbol_(infinity_symbol), 
+      : flags_(flags),
+        empty_string_value_(empty_string_value),
+        junk_string_value_(junk_string_value),
+        infinity_symbol_(infinity_symbol),
         nan_symbol_(nan_symbol),
         separator_(separator) {
-  } 
- 
-  // Performs the conversion. 
-  // The output parameter 'processed_characters_count' is set to the number 
-  // of characters that have been processed to read the number. 
-  // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included 
-  // in the 'processed_characters_count'. Trailing junk is never included. 
-  double StringToDouble(const char* buffer, 
-                        int length, 
-                        int* processed_characters_count) const; 
- 
-  // Same as StringToDouble above but for 16 bit characters. 
-  double StringToDouble(const uc16* buffer, 
-                        int length, 
-                        int* processed_characters_count) const; 
- 
-  // Same as StringToDouble but reads a float. 
-  // Note that this is not equivalent to static_cast<float>(StringToDouble(...)) 
-  // due to potential double-rounding. 
-  float StringToFloat(const char* buffer, 
-                      int length, 
-                      int* processed_characters_count) const; 
- 
-  // Same as StringToFloat above but for 16 bit characters. 
-  float StringToFloat(const uc16* buffer, 
-                      int length, 
-                      int* processed_characters_count) const; 
- 
- private: 
-  const int flags_; 
-  const double empty_string_value_; 
-  const double junk_string_value_; 
-  const char* const infinity_symbol_; 
-  const char* const nan_symbol_; 
+  }
+
+  // Performs the conversion.
+  // The output parameter 'processed_characters_count' is set to the number
+  // of characters that have been processed to read the number.
+  // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included
+  // in the 'processed_characters_count'. Trailing junk is never included.
+  double StringToDouble(const char* buffer,
+                        int length,
+                        int* processed_characters_count) const;
+
+  // Same as StringToDouble above but for 16 bit characters.
+  double StringToDouble(const uc16* buffer,
+                        int length,
+                        int* processed_characters_count) const;
+
+  // Same as StringToDouble but reads a float.
+  // Note that this is not equivalent to static_cast<float>(StringToDouble(...))
+  // due to potential double-rounding.
+  float StringToFloat(const char* buffer,
+                      int length,
+                      int* processed_characters_count) const;
+
+  // Same as StringToFloat above but for 16 bit characters.
+  float StringToFloat(const uc16* buffer,
+                      int length,
+                      int* processed_characters_count) const;
+
+ private:
+  const int flags_;
+  const double empty_string_value_;
+  const double junk_string_value_;
+  const char* const infinity_symbol_;
+  const char* const nan_symbol_;
   const uc16 separator_;
- 
-  template <class Iterator> 
-  double StringToIeee(Iterator start_pointer, 
-                      int length, 
-                      bool read_as_double, 
-                      int* processed_characters_count) const; 
- 
+
+  template <class Iterator>
+  double StringToIeee(Iterator start_pointer,
+                      int length,
+                      bool read_as_double,
+                      int* processed_characters_count) const;
+
   DC_DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
-}; 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ 
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
diff --git a/contrib/libs/double-conversion/fast-dtoa.cc b/contrib/libs/double-conversion/fast-dtoa.cc
index deb757d5eb..61350383a9 100644
--- a/contrib/libs/double-conversion/fast-dtoa.cc
+++ b/contrib/libs/double-conversion/fast-dtoa.cc
@@ -1,665 +1,665 @@
-// Copyright 2012 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#include "fast-dtoa.h" 
- 
-#include "cached-powers.h" 
-#include "diy-fp.h" 
-#include "ieee.h" 
- 
-namespace double_conversion { 
- 
-// The minimal and maximal target exponent define the range of w's binary 
-// exponent, where 'w' is the result of multiplying the input by a cached power 
-// of ten. 
-// 
-// A different range might be chosen on a different platform, to optimize digit 
-// generation, but a smaller range requires more powers of ten to be cached. 
-static const int kMinimalTargetExponent = -60; 
-static const int kMaximalTargetExponent = -32; 
- 
- 
-// Adjusts the last digit of the generated number, and screens out generated 
-// solutions that may be inaccurate. A solution may be inaccurate if it is 
-// outside the safe interval, or if we cannot prove that it is closer to the 
-// input than a neighboring representation of the same length. 
-// 
-// Input: * buffer containing the digits of too_high / 10^kappa 
-//        * the buffer's length 
-//        * distance_too_high_w == (too_high - w).f() * unit 
-//        * unsafe_interval == (too_high - too_low).f() * unit 
-//        * rest = (too_high - buffer * 10^kappa).f() * unit 
-//        * ten_kappa = 10^kappa * unit 
-//        * unit = the common multiplier 
-// Output: returns true if the buffer is guaranteed to contain the closest 
-//    representable number to the input. 
-//  Modifies the generated digits in the buffer to approach (round towards) w. 
-static bool RoundWeed(Vector<char> buffer, 
-                      int length, 
-                      uint64_t distance_too_high_w, 
-                      uint64_t unsafe_interval, 
-                      uint64_t rest, 
-                      uint64_t ten_kappa, 
-                      uint64_t unit) { 
-  uint64_t small_distance = distance_too_high_w - unit; 
-  uint64_t big_distance = distance_too_high_w + unit; 
-  // Let w_low  = too_high - big_distance, and 
-  //     w_high = too_high - small_distance. 
-  // Note: w_low < w < w_high 
-  // 
-  // The real w (* unit) must lie somewhere inside the interval 
-  // ]w_low; w_high[ (often written as "(w_low; w_high)") 
- 
-  // Basically the buffer currently contains a number in the unsafe interval 
-  // ]too_low; too_high[ with too_low < w < too_high 
-  // 
-  //  too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
-  //                     ^v 1 unit            ^      ^                 ^      ^ 
-  //  boundary_high ---------------------     .      .                 .      . 
-  //                     ^v 1 unit            .      .                 .      . 
-  //   - - - - - - - - - - - - - - - - - - -  +  - - + - - - - - -     .      . 
-  //                                          .      .         ^       .      . 
-  //                                          .  big_distance  .       .      . 
-  //                                          .      .         .       .    rest 
-  //                              small_distance     .         .       .      . 
-  //                                          v      .         .       .      . 
-  //  w_high - - - - - - - - - - - - - - - - - -     .         .       .      . 
-  //                     ^v 1 unit                   .         .       .      . 
-  //  w ----------------------------------------     .         .       .      . 
-  //                     ^v 1 unit                   v         .       .      . 
-  //  w_low  - - - - - - - - - - - - - - - - - - - - -         .       .      . 
-  //                                                           .       .      v 
-  //  buffer --------------------------------------------------+-------+-------- 
-  //                                                           .       . 
-  //                                                  safe_interval    . 
-  //                                                           v       . 
-  //   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -     . 
-  //                     ^v 1 unit                                     . 
-  //  boundary_low -------------------------                     unsafe_interval 
-  //                     ^v 1 unit                                     v 
-  //  too_low  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
-  // 
-  // 
-  // Note that the value of buffer could lie anywhere inside the range too_low 
-  // to too_high. 
-  // 
-  // boundary_low, boundary_high and w are approximations of the real boundaries 
-  // and v (the input number). They are guaranteed to be precise up to one unit. 
-  // In fact the error is guaranteed to be strictly less than one unit. 
-  // 
-  // Anything that lies outside the unsafe interval is guaranteed not to round 
-  // to v when read again. 
-  // Anything that lies inside the safe interval is guaranteed to round to v 
-  // when read again. 
-  // If the number inside the buffer lies inside the unsafe interval but not 
-  // inside the safe interval then we simply do not know and bail out (returning 
-  // false). 
-  // 
-  // Similarly we have to take into account the imprecision of 'w' when finding 
-  // the closest representation of 'w'. If we have two potential 
-  // representations, and one is closer to both w_low and w_high, then we know 
-  // it is closer to the actual value v. 
-  // 
-  // By generating the digits of too_high we got the largest (closest to 
-  // too_high) buffer that is still in the unsafe interval. In the case where 
-  // w_high < buffer < too_high we try to decrement the buffer. 
-  // This way the buffer approaches (rounds towards) w. 
-  // There are 3 conditions that stop the decrementation process: 
-  //   1) the buffer is already below w_high 
-  //   2) decrementing the buffer would make it leave the unsafe interval 
-  //   3) decrementing the buffer would yield a number below w_high and farther 
-  //      away than the current number. In other words: 
-  //              (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high 
-  // Instead of using the buffer directly we use its distance to too_high. 
-  // Conceptually rest ~= too_high - buffer 
-  // We need to do the following tests in this order to avoid over- and 
-  // underflows. 
-  ASSERT(rest <= unsafe_interval); 
-  while (rest < small_distance &&  // Negated condition 1 
-         unsafe_interval - rest >= ten_kappa &&  // Negated condition 2 
-         (rest + ten_kappa < small_distance ||  // buffer{-1} > w_high 
-          small_distance - rest >= rest + ten_kappa - small_distance)) { 
-    buffer[length - 1]--; 
-    rest += ten_kappa; 
-  } 
- 
-  // We have approached w+ as much as possible. We now test if approaching w- 
-  // would require changing the buffer. If yes, then we have two possible 
-  // representations close to w, but we cannot decide which one is closer. 
-  if (rest < big_distance && 
-      unsafe_interval - rest >= ten_kappa && 
-      (rest + ten_kappa < big_distance || 
-       big_distance - rest > rest + ten_kappa - big_distance)) { 
-    return false; 
-  } 
- 
-  // Weeding test. 
-  //   The safe interval is [too_low + 2 ulp; too_high - 2 ulp] 
-  //   Since too_low = too_high - unsafe_interval this is equivalent to 
-  //      [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] 
-  //   Conceptually we have: rest ~= too_high - buffer 
-  return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); 
-} 
- 
- 
-// Rounds the buffer upwards if the result is closer to v by possibly adding 
-// 1 to the buffer. If the precision of the calculation is not sufficient to 
-// round correctly, return false. 
-// The rounding might shift the whole buffer in which case the kappa is 
-// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. 
-// 
-// If 2*rest > ten_kappa then the buffer needs to be round up. 
-// rest can have an error of +/- 1 unit. This function accounts for the 
-// imprecision and returns false, if the rounding direction cannot be 
-// unambiguously determined. 
-// 
-// Precondition: rest < ten_kappa. 
-static bool RoundWeedCounted(Vector<char> buffer, 
-                             int length, 
-                             uint64_t rest, 
-                             uint64_t ten_kappa, 
-                             uint64_t unit, 
-                             int* kappa) { 
-  ASSERT(rest < ten_kappa); 
-  // The following tests are done in a specific order to avoid overflows. They 
-  // will work correctly with any uint64 values of rest < ten_kappa and unit. 
-  // 
-  // If the unit is too big, then we don't know which way to round. For example 
-  // a unit of 50 means that the real number lies within rest +/- 50. If 
-  // 10^kappa == 40 then there is no way to tell which way to round. 
-  if (unit >= ten_kappa) return false; 
-  // Even if unit is just half the size of 10^kappa we are already completely 
-  // lost. (And after the previous test we know that the expression will not 
-  // over/underflow.) 
-  if (ten_kappa - unit <= unit) return false; 
-  // If 2 * (rest + unit) <= 10^kappa we can safely round down. 
-  if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { 
-    return true; 
-  } 
-  // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. 
-  if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { 
-    // Increment the last digit recursively until we find a non '9' digit. 
-    buffer[length - 1]++; 
-    for (int i = length - 1; i > 0; --i) { 
-      if (buffer[i] != '0' + 10) break; 
-      buffer[i] = '0'; 
-      buffer[i - 1]++; 
-    } 
-    // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the 
-    // exception of the first digit all digits are now '0'. Simply switch the 
-    // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and 
-    // the power (the kappa) is increased. 
-    if (buffer[0] == '0' + 10) { 
-      buffer[0] = '1'; 
-      (*kappa) += 1; 
-    } 
-    return true; 
-  } 
-  return false; 
-} 
- 
-// Returns the biggest power of ten that is less than or equal to the given 
-// number. We furthermore receive the maximum number of bits 'number' has. 
-// 
-// Returns power == 10^(exponent_plus_one-1) such that 
-//    power <= number < power * 10. 
-// If number_bits == 0 then 0^(0-1) is returned. 
-// The number of bits must be <= 32. 
-// Precondition: number < (1 << (number_bits + 1)). 
- 
-// Inspired by the method for finding an integer log base 10 from here: 
-// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 
-static unsigned int const kSmallPowersOfTen[] = 
-    {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 
-     1000000000}; 
- 
-static void BiggestPowerTen(uint32_t number, 
-                            int number_bits, 
-                            uint32_t* power, 
-                            int* exponent_plus_one) { 
-  ASSERT(number < (1u << (number_bits + 1))); 
-  // 1233/4096 is approximately 1/lg(10). 
-  int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); 
-  // We increment to skip over the first entry in the kPowersOf10 table. 
-  // Note: kPowersOf10[i] == 10^(i-1). 
-  exponent_plus_one_guess++; 
-  // We don't have any guarantees that 2^number_bits <= number. 
-  if (number < kSmallPowersOfTen[exponent_plus_one_guess]) { 
-    exponent_plus_one_guess--; 
-  } 
-  *power = kSmallPowersOfTen[exponent_plus_one_guess]; 
-  *exponent_plus_one = exponent_plus_one_guess; 
-} 
- 
-// Generates the digits of input number w. 
-// w is a floating-point number (DiyFp), consisting of a significand and an 
-// exponent. Its exponent is bounded by kMinimalTargetExponent and 
-// kMaximalTargetExponent. 
-//       Hence -60 <= w.e() <= -32. 
-// 
-// Returns false if it fails, in which case the generated digits in the buffer 
-// should not be used. 
-// Preconditions: 
-//  * low, w and high are correct up to 1 ulp (unit in the last place). That 
-//    is, their error must be less than a unit of their last digits. 
-//  * low.e() == w.e() == high.e() 
-//  * low < w < high, and taking into account their error: low~ <= high~ 
-//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent 
-// Postconditions: returns false if procedure fails. 
-//   otherwise: 
-//     * buffer is not null-terminated, but len contains the number of digits. 
-//     * buffer contains the shortest possible decimal digit-sequence 
-//       such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the 
-//       correct values of low and high (without their error). 
-//     * if more than one decimal representation gives the minimal number of 
-//       decimal digits then the one closest to W (where W is the correct value 
-//       of w) is chosen. 
-// Remark: this procedure takes into account the imprecision of its input 
-//   numbers. If the precision is not enough to guarantee all the postconditions 
-//   then false is returned. This usually happens rarely (~0.5%). 
-// 
-// Say, for the sake of example, that 
-//   w.e() == -48, and w.f() == 0x1234567890abcdef 
-// w's value can be computed by w.f() * 2^w.e() 
-// We can obtain w's integral digits by simply shifting w.f() by -w.e(). 
-//  -> w's integral part is 0x1234 
-//  w's fractional part is therefore 0x567890abcdef. 
-// Printing w's integral part is easy (simply print 0x1234 in decimal). 
-// In order to print its fraction we repeatedly multiply the fraction by 10 and 
-// get each digit. Example the first digit after the point would be computed by 
-//   (0x567890abcdef * 10) >> 48. -> 3 
-// The whole thing becomes slightly more complicated because we want to stop 
-// once we have enough digits. That is, once the digits inside the buffer 
-// represent 'w' we can stop. Everything inside the interval low - high 
-// represents w. However we have to pay attention to low, high and w's 
-// imprecision. 
-static bool DigitGen(DiyFp low, 
-                     DiyFp w, 
-                     DiyFp high, 
-                     Vector<char> buffer, 
-                     int* length, 
-                     int* kappa) { 
-  ASSERT(low.e() == w.e() && w.e() == high.e()); 
-  ASSERT(low.f() + 1 <= high.f() - 1); 
-  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); 
-  // low, w and high are imprecise, but by less than one ulp (unit in the last 
-  // place). 
-  // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that 
-  // the new numbers are outside of the interval we want the final 
-  // representation to lie in. 
-  // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield 
-  // numbers that are certain to lie in the interval. We will use this fact 
-  // later on. 
-  // We will now start by generating the digits within the uncertain 
-  // interval. Later we will weed out representations that lie outside the safe 
-  // interval and thus _might_ lie outside the correct interval. 
-  uint64_t unit = 1; 
-  DiyFp too_low = DiyFp(low.f() - unit, low.e()); 
-  DiyFp too_high = DiyFp(high.f() + unit, high.e()); 
-  // too_low and too_high are guaranteed to lie outside the interval we want the 
-  // generated number in. 
-  DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); 
-  // We now cut the input number into two parts: the integral digits and the 
-  // fractionals. We will not write any decimal separator though, but adapt 
-  // kappa instead. 
-  // Reminder: we are currently computing the digits (stored inside the buffer) 
-  // such that:   too_low < buffer * 10^kappa < too_high 
-  // We use too_high for the digit_generation and stop as soon as possible. 
-  // If we stop early we effectively round down. 
-  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e()); 
-  // Division by one is a shift. 
-  uint32_t integrals = static_cast<uint32_t>(too_high.f() >> -one.e()); 
-  // Modulo by one is an and. 
-  uint64_t fractionals = too_high.f() & (one.f() - 1); 
-  uint32_t divisor; 
-  int divisor_exponent_plus_one; 
-  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), 
-                  &divisor, &divisor_exponent_plus_one); 
-  *kappa = divisor_exponent_plus_one; 
-  *length = 0; 
-  // Loop invariant: buffer = too_high / 10^kappa  (integer division) 
-  // The invariant holds for the first iteration: kappa has been initialized 
-  // with the divisor exponent + 1. And the divisor is the biggest power of ten 
-  // that is smaller than integrals. 
-  while (*kappa > 0) { 
-    int digit = integrals / divisor; 
-    ASSERT(digit <= 9); 
-    buffer[*length] = static_cast<char>('0' + digit); 
-    (*length)++; 
-    integrals %= divisor; 
-    (*kappa)--; 
-    // Note that kappa now equals the exponent of the divisor and that the 
-    // invariant thus holds again. 
-    uint64_t rest = 
-        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals; 
-    // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) 
-    // Reminder: unsafe_interval.e() == one.e() 
-    if (rest < unsafe_interval.f()) { 
-      // Rounding down (by not emitting the remaining digits) yields a number 
-      // that lies within the unsafe interval. 
-      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), 
-                       unsafe_interval.f(), rest, 
-                       static_cast<uint64_t>(divisor) << -one.e(), unit); 
-    } 
-    divisor /= 10; 
-  } 
- 
-  // The integrals have been generated. We are at the point of the decimal 
-  // separator. In the following loop we simply multiply the remaining digits by 
-  // 10 and divide by one. We just need to pay attention to multiply associated 
-  // data (like the interval or 'unit'), too. 
-  // Note that the multiplication by 10 does not overflow, because w.e >= -60 
-  // and thus one.e >= -60. 
-  ASSERT(one.e() >= -60); 
-  ASSERT(fractionals < one.f()); 
-  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); 
-  for (;;) { 
-    fractionals *= 10; 
-    unit *= 10; 
-    unsafe_interval.set_f(unsafe_interval.f() * 10); 
-    // Integer division by one. 
-    int digit = static_cast<int>(fractionals >> -one.e()); 
-    ASSERT(digit <= 9); 
-    buffer[*length] = static_cast<char>('0' + digit); 
-    (*length)++; 
-    fractionals &= one.f() - 1;  // Modulo by one. 
-    (*kappa)--; 
-    if (fractionals < unsafe_interval.f()) { 
-      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, 
-                       unsafe_interval.f(), fractionals, one.f(), unit); 
-    } 
-  } 
-} 
- 
- 
- 
-// Generates (at most) requested_digits digits of input number w. 
-// w is a floating-point number (DiyFp), consisting of a significand and an 
-// exponent. Its exponent is bounded by kMinimalTargetExponent and 
-// kMaximalTargetExponent. 
-//       Hence -60 <= w.e() <= -32. 
-// 
-// Returns false if it fails, in which case the generated digits in the buffer 
-// should not be used. 
-// Preconditions: 
-//  * w is correct up to 1 ulp (unit in the last place). That 
-//    is, its error must be strictly less than a unit of its last digit. 
-//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent 
-// 
-// Postconditions: returns false if procedure fails. 
-//   otherwise: 
-//     * buffer is not null-terminated, but length contains the number of 
-//       digits. 
-//     * the representation in buffer is the most precise representation of 
-//       requested_digits digits. 
-//     * buffer contains at most requested_digits digits of w. If there are less 
-//       than requested_digits digits then some trailing '0's have been removed. 
-//     * kappa is such that 
-//            w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. 
-// 
-// Remark: This procedure takes into account the imprecision of its input 
-//   numbers. If the precision is not enough to guarantee all the postconditions 
-//   then false is returned. This usually happens rarely, but the failure-rate 
-//   increases with higher requested_digits. 
-static bool DigitGenCounted(DiyFp w, 
-                            int requested_digits, 
-                            Vector<char> buffer, 
-                            int* length, 
-                            int* kappa) { 
-  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); 
-  ASSERT(kMinimalTargetExponent >= -60); 
-  ASSERT(kMaximalTargetExponent <= -32); 
-  // w is assumed to have an error less than 1 unit. Whenever w is scaled we 
-  // also scale its error. 
-  uint64_t w_error = 1; 
-  // We cut the input number into two parts: the integral digits and the 
-  // fractional digits. We don't emit any decimal separator, but adapt kappa 
-  // instead. Example: instead of writing "1.2" we put "12" into the buffer and 
-  // increase kappa by 1. 
-  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e()); 
-  // Division by one is a shift. 
-  uint32_t integrals = static_cast<uint32_t>(w.f() >> -one.e()); 
-  // Modulo by one is an and. 
-  uint64_t fractionals = w.f() & (one.f() - 1); 
-  uint32_t divisor; 
-  int divisor_exponent_plus_one; 
-  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), 
-                  &divisor, &divisor_exponent_plus_one); 
-  *kappa = divisor_exponent_plus_one; 
-  *length = 0; 
- 
-  // Loop invariant: buffer = w / 10^kappa  (integer division) 
-  // The invariant holds for the first iteration: kappa has been initialized 
-  // with the divisor exponent + 1. And the divisor is the biggest power of ten 
-  // that is smaller than 'integrals'. 
-  while (*kappa > 0) { 
-    int digit = integrals / divisor; 
-    ASSERT(digit <= 9); 
-    buffer[*length] = static_cast<char>('0' + digit); 
-    (*length)++; 
-    requested_digits--; 
-    integrals %= divisor; 
-    (*kappa)--; 
-    // Note that kappa now equals the exponent of the divisor and that the 
-    // invariant thus holds again. 
-    if (requested_digits == 0) break; 
-    divisor /= 10; 
-  } 
- 
-  if (requested_digits == 0) { 
-    uint64_t rest = 
-        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals; 
-    return RoundWeedCounted(buffer, *length, rest, 
-                            static_cast<uint64_t>(divisor) << -one.e(), w_error, 
-                            kappa); 
-  } 
- 
-  // The integrals have been generated. We are at the point of the decimal 
-  // separator. In the following loop we simply multiply the remaining digits by 
-  // 10 and divide by one. We just need to pay attention to multiply associated 
-  // data (the 'unit'), too. 
-  // Note that the multiplication by 10 does not overflow, because w.e >= -60 
-  // and thus one.e >= -60. 
-  ASSERT(one.e() >= -60); 
-  ASSERT(fractionals < one.f()); 
-  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); 
-  while (requested_digits > 0 && fractionals > w_error) { 
-    fractionals *= 10; 
-    w_error *= 10; 
-    // Integer division by one. 
-    int digit = static_cast<int>(fractionals >> -one.e()); 
-    ASSERT(digit <= 9); 
-    buffer[*length] = static_cast<char>('0' + digit); 
-    (*length)++; 
-    requested_digits--; 
-    fractionals &= one.f() - 1;  // Modulo by one. 
-    (*kappa)--; 
-  } 
-  if (requested_digits != 0) return false; 
-  return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, 
-                          kappa); 
-} 
- 
- 
-// Provides a decimal representation of v. 
-// Returns true if it succeeds, otherwise the result cannot be trusted. 
-// There will be *length digits inside the buffer (not null-terminated). 
-// If the function returns true then 
-//        v == (double) (buffer * 10^decimal_exponent). 
-// The digits in the buffer are the shortest representation possible: no 
-// 0.09999999999999999 instead of 0.1. The shorter representation will even be 
-// chosen even if the longer one would be closer to v. 
-// The last digit will be closest to the actual v. That is, even if several 
-// digits might correctly yield 'v' when read again, the closest will be 
-// computed. 
-static bool Grisu3(double v, 
-                   FastDtoaMode mode, 
-                   Vector<char> buffer, 
-                   int* length, 
-                   int* decimal_exponent) { 
-  DiyFp w = Double(v).AsNormalizedDiyFp(); 
-  // boundary_minus and boundary_plus are the boundaries between v and its 
-  // closest floating-point neighbors. Any number strictly between 
-  // boundary_minus and boundary_plus will round to v when convert to a double. 
-  // Grisu3 will never output representations that lie exactly on a boundary. 
-  DiyFp boundary_minus, boundary_plus; 
-  if (mode == FAST_DTOA_SHORTEST) { 
-    Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); 
-  } else { 
-    ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); 
-    float single_v = static_cast<float>(v); 
-    Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); 
-  } 
-  ASSERT(boundary_plus.e() == w.e()); 
-  DiyFp ten_mk;  // Cached power of ten: 10^-k 
-  int mk;        // -k 
-  int ten_mk_minimal_binary_exponent = 
-     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); 
-  int ten_mk_maximal_binary_exponent = 
-     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); 
-  PowersOfTenCache::GetCachedPowerForBinaryExponentRange( 
-      ten_mk_minimal_binary_exponent, 
-      ten_mk_maximal_binary_exponent, 
-      &ten_mk, &mk); 
-  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + 
-          DiyFp::kSignificandSize) && 
-         (kMaximalTargetExponent >= w.e() + ten_mk.e() + 
-          DiyFp::kSignificandSize)); 
-  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a 
-  // 64 bit significand and ten_mk is thus only precise up to 64 bits. 
- 
-  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated 
-  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now 
-  // off by a small amount. 
-  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. 
-  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then 
-  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e 
-  DiyFp scaled_w = DiyFp::Times(w, ten_mk); 
-  ASSERT(scaled_w.e() == 
-         boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); 
-  // In theory it would be possible to avoid some recomputations by computing 
-  // the difference between w and boundary_minus/plus (a power of 2) and to 
-  // compute scaled_boundary_minus/plus by subtracting/adding from 
-  // scaled_w. However the code becomes much less readable and the speed 
-  // enhancements are not terriffic. 
-  DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); 
-  DiyFp scaled_boundary_plus  = DiyFp::Times(boundary_plus,  ten_mk); 
- 
-  // DigitGen will generate the digits of scaled_w. Therefore we have 
-  // v == (double) (scaled_w * 10^-mk). 
-  // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an 
-  // integer than it will be updated. For instance if scaled_w == 1.23 then 
-  // the buffer will be filled with "123" und the decimal_exponent will be 
-  // decreased by 2. 
-  int kappa; 
-  bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, 
-                         buffer, length, &kappa); 
-  *decimal_exponent = -mk + kappa; 
-  return result; 
-} 
- 
- 
-// The "counted" version of grisu3 (see above) only generates requested_digits 
-// number of digits. This version does not generate the shortest representation, 
-// and with enough requested digits 0.1 will at some point print as 0.9999999... 
-// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and 
-// therefore the rounding strategy for halfway cases is irrelevant. 
-static bool Grisu3Counted(double v, 
-                          int requested_digits, 
-                          Vector<char> buffer, 
-                          int* length, 
-                          int* decimal_exponent) { 
-  DiyFp w = Double(v).AsNormalizedDiyFp(); 
-  DiyFp ten_mk;  // Cached power of ten: 10^-k 
-  int mk;        // -k 
-  int ten_mk_minimal_binary_exponent = 
-     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); 
-  int ten_mk_maximal_binary_exponent = 
-     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); 
-  PowersOfTenCache::GetCachedPowerForBinaryExponentRange( 
-      ten_mk_minimal_binary_exponent, 
-      ten_mk_maximal_binary_exponent, 
-      &ten_mk, &mk); 
-  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + 
-          DiyFp::kSignificandSize) && 
-         (kMaximalTargetExponent >= w.e() + ten_mk.e() + 
-          DiyFp::kSignificandSize)); 
-  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a 
-  // 64 bit significand and ten_mk is thus only precise up to 64 bits. 
- 
-  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated 
-  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now 
-  // off by a small amount. 
-  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. 
-  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then 
-  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e 
-  DiyFp scaled_w = DiyFp::Times(w, ten_mk); 
- 
-  // We now have (double) (scaled_w * 10^-mk). 
-  // DigitGen will generate the first requested_digits digits of scaled_w and 
-  // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It 
-  // will not always be exactly the same since DigitGenCounted only produces a 
-  // limited number of digits.) 
-  int kappa; 
-  bool result = DigitGenCounted(scaled_w, requested_digits, 
-                                buffer, length, &kappa); 
-  *decimal_exponent = -mk + kappa; 
-  return result; 
-} 
- 
- 
-bool FastDtoa(double v, 
-              FastDtoaMode mode, 
-              int requested_digits, 
-              Vector<char> buffer, 
-              int* length, 
-              int* decimal_point) { 
-  ASSERT(v > 0); 
-  ASSERT(!Double(v).IsSpecial()); 
- 
-  bool result = false; 
-  int decimal_exponent = 0; 
-  switch (mode) { 
-    case FAST_DTOA_SHORTEST: 
-    case FAST_DTOA_SHORTEST_SINGLE: 
-      result = Grisu3(v, mode, buffer, length, &decimal_exponent); 
-      break; 
-    case FAST_DTOA_PRECISION: 
-      result = Grisu3Counted(v, requested_digits, 
-                             buffer, length, &decimal_exponent); 
-      break; 
-    default: 
-      UNREACHABLE(); 
-  } 
-  if (result) { 
-    *decimal_point = *length + decimal_exponent; 
-    buffer[*length] = '\0'; 
-  } 
-  return result; 
-} 
- 
-}  // namespace double_conversion 
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "fast-dtoa.h"
+
+#include "cached-powers.h"
+#include "diy-fp.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// The minimal and maximal target exponent define the range of w's binary
+// exponent, where 'w' is the result of multiplying the input by a cached power
+// of ten.
+//
+// A different range might be chosen on a different platform, to optimize digit
+// generation, but a smaller range requires more powers of ten to be cached.
+static const int kMinimalTargetExponent = -60;
+static const int kMaximalTargetExponent = -32;
+
+
+// Adjusts the last digit of the generated number, and screens out generated
+// solutions that may be inaccurate. A solution may be inaccurate if it is
+// outside the safe interval, or if we cannot prove that it is closer to the
+// input than a neighboring representation of the same length.
+//
+// Input: * buffer containing the digits of too_high / 10^kappa
+//        * the buffer's length
+//        * distance_too_high_w == (too_high - w).f() * unit
+//        * unsafe_interval == (too_high - too_low).f() * unit
+//        * rest = (too_high - buffer * 10^kappa).f() * unit
+//        * ten_kappa = 10^kappa * unit
+//        * unit = the common multiplier
+// Output: returns true if the buffer is guaranteed to contain the closest
+//    representable number to the input.
+//  Modifies the generated digits in the buffer to approach (round towards) w.
+static bool RoundWeed(Vector<char> buffer,
+                      int length,
+                      uint64_t distance_too_high_w,
+                      uint64_t unsafe_interval,
+                      uint64_t rest,
+                      uint64_t ten_kappa,
+                      uint64_t unit) {
+  uint64_t small_distance = distance_too_high_w - unit;
+  uint64_t big_distance = distance_too_high_w + unit;
+  // Let w_low  = too_high - big_distance, and
+  //     w_high = too_high - small_distance.
+  // Note: w_low < w < w_high
+  //
+  // The real w (* unit) must lie somewhere inside the interval
+  // ]w_low; w_high[ (often written as "(w_low; w_high)")
+
+  // Basically the buffer currently contains a number in the unsafe interval
+  // ]too_low; too_high[ with too_low < w < too_high
+  //
+  //  too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  //                     ^v 1 unit            ^      ^                 ^      ^
+  //  boundary_high ---------------------     .      .                 .      .
+  //                     ^v 1 unit            .      .                 .      .
+  //   - - - - - - - - - - - - - - - - - - -  +  - - + - - - - - -     .      .
+  //                                          .      .         ^       .      .
+  //                                          .  big_distance  .       .      .
+  //                                          .      .         .       .    rest
+  //                              small_distance     .         .       .      .
+  //                                          v      .         .       .      .
+  //  w_high - - - - - - - - - - - - - - - - - -     .         .       .      .
+  //                     ^v 1 unit                   .         .       .      .
+  //  w ----------------------------------------     .         .       .      .
+  //                     ^v 1 unit                   v         .       .      .
+  //  w_low  - - - - - - - - - - - - - - - - - - - - -         .       .      .
+  //                                                           .       .      v
+  //  buffer --------------------------------------------------+-------+--------
+  //                                                           .       .
+  //                                                  safe_interval    .
+  //                                                           v       .
+  //   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -     .
+  //                     ^v 1 unit                                     .
+  //  boundary_low -------------------------                     unsafe_interval
+  //                     ^v 1 unit                                     v
+  //  too_low  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  //
+  //
+  // Note that the value of buffer could lie anywhere inside the range too_low
+  // to too_high.
+  //
+  // boundary_low, boundary_high and w are approximations of the real boundaries
+  // and v (the input number). They are guaranteed to be precise up to one unit.
+  // In fact the error is guaranteed to be strictly less than one unit.
+  //
+  // Anything that lies outside the unsafe interval is guaranteed not to round
+  // to v when read again.
+  // Anything that lies inside the safe interval is guaranteed to round to v
+  // when read again.
+  // If the number inside the buffer lies inside the unsafe interval but not
+  // inside the safe interval then we simply do not know and bail out (returning
+  // false).
+  //
+  // Similarly we have to take into account the imprecision of 'w' when finding
+  // the closest representation of 'w'. If we have two potential
+  // representations, and one is closer to both w_low and w_high, then we know
+  // it is closer to the actual value v.
+  //
+  // By generating the digits of too_high we got the largest (closest to
+  // too_high) buffer that is still in the unsafe interval. In the case where
+  // w_high < buffer < too_high we try to decrement the buffer.
+  // This way the buffer approaches (rounds towards) w.
+  // There are 3 conditions that stop the decrementation process:
+  //   1) the buffer is already below w_high
+  //   2) decrementing the buffer would make it leave the unsafe interval
+  //   3) decrementing the buffer would yield a number below w_high and farther
+  //      away than the current number. In other words:
+  //              (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high
+  // Instead of using the buffer directly we use its distance to too_high.
+  // Conceptually rest ~= too_high - buffer
+  // We need to do the following tests in this order to avoid over- and
+  // underflows.
+  ASSERT(rest <= unsafe_interval);
+  while (rest < small_distance &&  // Negated condition 1
+         unsafe_interval - rest >= ten_kappa &&  // Negated condition 2
+         (rest + ten_kappa < small_distance ||  // buffer{-1} > w_high
+          small_distance - rest >= rest + ten_kappa - small_distance)) {
+    buffer[length - 1]--;
+    rest += ten_kappa;
+  }
+
+  // We have approached w+ as much as possible. We now test if approaching w-
+  // would require changing the buffer. If yes, then we have two possible
+  // representations close to w, but we cannot decide which one is closer.
+  if (rest < big_distance &&
+      unsafe_interval - rest >= ten_kappa &&
+      (rest + ten_kappa < big_distance ||
+       big_distance - rest > rest + ten_kappa - big_distance)) {
+    return false;
+  }
+
+  // Weeding test.
+  //   The safe interval is [too_low + 2 ulp; too_high - 2 ulp]
+  //   Since too_low = too_high - unsafe_interval this is equivalent to
+  //      [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp]
+  //   Conceptually we have: rest ~= too_high - buffer
+  return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit);
+}
+
+
+// Rounds the buffer upwards if the result is closer to v by possibly adding
+// 1 to the buffer. If the precision of the calculation is not sufficient to
+// round correctly, return false.
+// The rounding might shift the whole buffer in which case the kappa is
+// adjusted. For example "99", kappa = 3 might become "10", kappa = 4.
+//
+// If 2*rest > ten_kappa then the buffer needs to be round up.
+// rest can have an error of +/- 1 unit. This function accounts for the
+// imprecision and returns false, if the rounding direction cannot be
+// unambiguously determined.
+//
+// Precondition: rest < ten_kappa.
+static bool RoundWeedCounted(Vector<char> buffer,
+                             int length,
+                             uint64_t rest,
+                             uint64_t ten_kappa,
+                             uint64_t unit,
+                             int* kappa) {
+  ASSERT(rest < ten_kappa);
+  // The following tests are done in a specific order to avoid overflows. They
+  // will work correctly with any uint64 values of rest < ten_kappa and unit.
+  //
+  // If the unit is too big, then we don't know which way to round. For example
+  // a unit of 50 means that the real number lies within rest +/- 50. If
+  // 10^kappa == 40 then there is no way to tell which way to round.
+  if (unit >= ten_kappa) return false;
+  // Even if unit is just half the size of 10^kappa we are already completely
+  // lost. (And after the previous test we know that the expression will not
+  // over/underflow.)
+  if (ten_kappa - unit <= unit) return false;
+  // If 2 * (rest + unit) <= 10^kappa we can safely round down.
+  if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) {
+    return true;
+  }
+  // If 2 * (rest - unit) >= 10^kappa, then we can safely round up.
+  if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) {
+    // Increment the last digit recursively until we find a non '9' digit.
+    buffer[length - 1]++;
+    for (int i = length - 1; i > 0; --i) {
+      if (buffer[i] != '0' + 10) break;
+      buffer[i] = '0';
+      buffer[i - 1]++;
+    }
+    // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the
+    // exception of the first digit all digits are now '0'. Simply switch the
+    // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and
+    // the power (the kappa) is increased.
+    if (buffer[0] == '0' + 10) {
+      buffer[0] = '1';
+      (*kappa) += 1;
+    }
+    return true;
+  }
+  return false;
+}
+
+// Returns the biggest power of ten that is less than or equal to the given
+// number. We furthermore receive the maximum number of bits 'number' has.
+//
+// Returns power == 10^(exponent_plus_one-1) such that
+//    power <= number < power * 10.
+// If number_bits == 0 then 0^(0-1) is returned.
+// The number of bits must be <= 32.
+// Precondition: number < (1 << (number_bits + 1)).
+
+// Inspired by the method for finding an integer log base 10 from here:
+// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+static unsigned int const kSmallPowersOfTen[] =
+    {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+     1000000000};
+
+static void BiggestPowerTen(uint32_t number,
+                            int number_bits,
+                            uint32_t* power,
+                            int* exponent_plus_one) {
+  ASSERT(number < (1u << (number_bits + 1)));
+  // 1233/4096 is approximately 1/lg(10).
+  int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12);
+  // We increment to skip over the first entry in the kPowersOf10 table.
+  // Note: kPowersOf10[i] == 10^(i-1).
+  exponent_plus_one_guess++;
+  // We don't have any guarantees that 2^number_bits <= number.
+  if (number < kSmallPowersOfTen[exponent_plus_one_guess]) {
+    exponent_plus_one_guess--;
+  }
+  *power = kSmallPowersOfTen[exponent_plus_one_guess];
+  *exponent_plus_one = exponent_plus_one_guess;
+}
+
+// Generates the digits of input number w.
+// w is a floating-point number (DiyFp), consisting of a significand and an
+// exponent. Its exponent is bounded by kMinimalTargetExponent and
+// kMaximalTargetExponent.
+//       Hence -60 <= w.e() <= -32.
+//
+// Returns false if it fails, in which case the generated digits in the buffer
+// should not be used.
+// Preconditions:
+//  * low, w and high are correct up to 1 ulp (unit in the last place). That
+//    is, their error must be less than a unit of their last digits.
+//  * low.e() == w.e() == high.e()
+//  * low < w < high, and taking into account their error: low~ <= high~
+//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent
+// Postconditions: returns false if procedure fails.
+//   otherwise:
+//     * buffer is not null-terminated, but len contains the number of digits.
+//     * buffer contains the shortest possible decimal digit-sequence
+//       such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the
+//       correct values of low and high (without their error).
+//     * if more than one decimal representation gives the minimal number of
+//       decimal digits then the one closest to W (where W is the correct value
+//       of w) is chosen.
+// Remark: this procedure takes into account the imprecision of its input
+//   numbers. If the precision is not enough to guarantee all the postconditions
+//   then false is returned. This usually happens rarely (~0.5%).
+//
+// Say, for the sake of example, that
+//   w.e() == -48, and w.f() == 0x1234567890abcdef
+// w's value can be computed by w.f() * 2^w.e()
+// We can obtain w's integral digits by simply shifting w.f() by -w.e().
+//  -> w's integral part is 0x1234
+//  w's fractional part is therefore 0x567890abcdef.
+// Printing w's integral part is easy (simply print 0x1234 in decimal).
+// In order to print its fraction we repeatedly multiply the fraction by 10 and
+// get each digit. Example the first digit after the point would be computed by
+//   (0x567890abcdef * 10) >> 48. -> 3
+// The whole thing becomes slightly more complicated because we want to stop
+// once we have enough digits. That is, once the digits inside the buffer
+// represent 'w' we can stop. Everything inside the interval low - high
+// represents w. However we have to pay attention to low, high and w's
+// imprecision.
+static bool DigitGen(DiyFp low,
+                     DiyFp w,
+                     DiyFp high,
+                     Vector<char> buffer,
+                     int* length,
+                     int* kappa) {
+  ASSERT(low.e() == w.e() && w.e() == high.e());
+  ASSERT(low.f() + 1 <= high.f() - 1);
+  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent);
+  // low, w and high are imprecise, but by less than one ulp (unit in the last
+  // place).
+  // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that
+  // the new numbers are outside of the interval we want the final
+  // representation to lie in.
+  // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield
+  // numbers that are certain to lie in the interval. We will use this fact
+  // later on.
+  // We will now start by generating the digits within the uncertain
+  // interval. Later we will weed out representations that lie outside the safe
+  // interval and thus _might_ lie outside the correct interval.
+  uint64_t unit = 1;
+  DiyFp too_low = DiyFp(low.f() - unit, low.e());
+  DiyFp too_high = DiyFp(high.f() + unit, high.e());
+  // too_low and too_high are guaranteed to lie outside the interval we want the
+  // generated number in.
+  DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low);
+  // We now cut the input number into two parts: the integral digits and the
+  // fractionals. We will not write any decimal separator though, but adapt
+  // kappa instead.
+  // Reminder: we are currently computing the digits (stored inside the buffer)
+  // such that:   too_low < buffer * 10^kappa < too_high
+  // We use too_high for the digit_generation and stop as soon as possible.
+  // If we stop early we effectively round down.
+  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());
+  // Division by one is a shift.
+  uint32_t integrals = static_cast<uint32_t>(too_high.f() >> -one.e());
+  // Modulo by one is an and.
+  uint64_t fractionals = too_high.f() & (one.f() - 1);
+  uint32_t divisor;
+  int divisor_exponent_plus_one;
+  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),
+                  &divisor, &divisor_exponent_plus_one);
+  *kappa = divisor_exponent_plus_one;
+  *length = 0;
+  // Loop invariant: buffer = too_high / 10^kappa  (integer division)
+  // The invariant holds for the first iteration: kappa has been initialized
+  // with the divisor exponent + 1. And the divisor is the biggest power of ten
+  // that is smaller than integrals.
+  while (*kappa > 0) {
+    int digit = integrals / divisor;
+    ASSERT(digit <= 9);
+    buffer[*length] = static_cast<char>('0' + digit);
+    (*length)++;
+    integrals %= divisor;
+    (*kappa)--;
+    // Note that kappa now equals the exponent of the divisor and that the
+    // invariant thus holds again.
+    uint64_t rest =
+        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;
+    // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e())
+    // Reminder: unsafe_interval.e() == one.e()
+    if (rest < unsafe_interval.f()) {
+      // Rounding down (by not emitting the remaining digits) yields a number
+      // that lies within the unsafe interval.
+      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(),
+                       unsafe_interval.f(), rest,
+                       static_cast<uint64_t>(divisor) << -one.e(), unit);
+    }
+    divisor /= 10;
+  }
+
+  // The integrals have been generated. We are at the point of the decimal
+  // separator. In the following loop we simply multiply the remaining digits by
+  // 10 and divide by one. We just need to pay attention to multiply associated
+  // data (like the interval or 'unit'), too.
+  // Note that the multiplication by 10 does not overflow, because w.e >= -60
+  // and thus one.e >= -60.
+  ASSERT(one.e() >= -60);
+  ASSERT(fractionals < one.f());
+  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
+  for (;;) {
+    fractionals *= 10;
+    unit *= 10;
+    unsafe_interval.set_f(unsafe_interval.f() * 10);
+    // Integer division by one.
+    int digit = static_cast<int>(fractionals >> -one.e());
+    ASSERT(digit <= 9);
+    buffer[*length] = static_cast<char>('0' + digit);
+    (*length)++;
+    fractionals &= one.f() - 1;  // Modulo by one.
+    (*kappa)--;
+    if (fractionals < unsafe_interval.f()) {
+      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit,
+                       unsafe_interval.f(), fractionals, one.f(), unit);
+    }
+  }
+}
+
+
+
+// Generates (at most) requested_digits digits of input number w.
+// w is a floating-point number (DiyFp), consisting of a significand and an
+// exponent. Its exponent is bounded by kMinimalTargetExponent and
+// kMaximalTargetExponent.
+//       Hence -60 <= w.e() <= -32.
+//
+// Returns false if it fails, in which case the generated digits in the buffer
+// should not be used.
+// Preconditions:
+//  * w is correct up to 1 ulp (unit in the last place). That
+//    is, its error must be strictly less than a unit of its last digit.
+//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent
+//
+// Postconditions: returns false if procedure fails.
+//   otherwise:
+//     * buffer is not null-terminated, but length contains the number of
+//       digits.
+//     * the representation in buffer is the most precise representation of
+//       requested_digits digits.
+//     * buffer contains at most requested_digits digits of w. If there are less
+//       than requested_digits digits then some trailing '0's have been removed.
+//     * kappa is such that
+//            w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2.
+//
+// Remark: This procedure takes into account the imprecision of its input
+//   numbers. If the precision is not enough to guarantee all the postconditions
+//   then false is returned. This usually happens rarely, but the failure-rate
+//   increases with higher requested_digits.
+static bool DigitGenCounted(DiyFp w,
+                            int requested_digits,
+                            Vector<char> buffer,
+                            int* length,
+                            int* kappa) {
+  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent);
+  ASSERT(kMinimalTargetExponent >= -60);
+  ASSERT(kMaximalTargetExponent <= -32);
+  // w is assumed to have an error less than 1 unit. Whenever w is scaled we
+  // also scale its error.
+  uint64_t w_error = 1;
+  // We cut the input number into two parts: the integral digits and the
+  // fractional digits. We don't emit any decimal separator, but adapt kappa
+  // instead. Example: instead of writing "1.2" we put "12" into the buffer and
+  // increase kappa by 1.
+  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());
+  // Division by one is a shift.
+  uint32_t integrals = static_cast<uint32_t>(w.f() >> -one.e());
+  // Modulo by one is an and.
+  uint64_t fractionals = w.f() & (one.f() - 1);
+  uint32_t divisor;
+  int divisor_exponent_plus_one;
+  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),
+                  &divisor, &divisor_exponent_plus_one);
+  *kappa = divisor_exponent_plus_one;
+  *length = 0;
+
+  // Loop invariant: buffer = w / 10^kappa  (integer division)
+  // The invariant holds for the first iteration: kappa has been initialized
+  // with the divisor exponent + 1. And the divisor is the biggest power of ten
+  // that is smaller than 'integrals'.
+  while (*kappa > 0) {
+    int digit = integrals / divisor;
+    ASSERT(digit <= 9);
+    buffer[*length] = static_cast<char>('0' + digit);
+    (*length)++;
+    requested_digits--;
+    integrals %= divisor;
+    (*kappa)--;
+    // Note that kappa now equals the exponent of the divisor and that the
+    // invariant thus holds again.
+    if (requested_digits == 0) break;
+    divisor /= 10;
+  }
+
+  if (requested_digits == 0) {
+    uint64_t rest =
+        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;
+    return RoundWeedCounted(buffer, *length, rest,
+                            static_cast<uint64_t>(divisor) << -one.e(), w_error,
+                            kappa);
+  }
+
+  // The integrals have been generated. We are at the point of the decimal
+  // separator. In the following loop we simply multiply the remaining digits by
+  // 10 and divide by one. We just need to pay attention to multiply associated
+  // data (the 'unit'), too.
+  // Note that the multiplication by 10 does not overflow, because w.e >= -60
+  // and thus one.e >= -60.
+  ASSERT(one.e() >= -60);
+  ASSERT(fractionals < one.f());
+  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
+  while (requested_digits > 0 && fractionals > w_error) {
+    fractionals *= 10;
+    w_error *= 10;
+    // Integer division by one.
+    int digit = static_cast<int>(fractionals >> -one.e());
+    ASSERT(digit <= 9);
+    buffer[*length] = static_cast<char>('0' + digit);
+    (*length)++;
+    requested_digits--;
+    fractionals &= one.f() - 1;  // Modulo by one.
+    (*kappa)--;
+  }
+  if (requested_digits != 0) return false;
+  return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error,
+                          kappa);
+}
+
+
+// Provides a decimal representation of v.
+// Returns true if it succeeds, otherwise the result cannot be trusted.
+// There will be *length digits inside the buffer (not null-terminated).
+// If the function returns true then
+//        v == (double) (buffer * 10^decimal_exponent).
+// The digits in the buffer are the shortest representation possible: no
+// 0.09999999999999999 instead of 0.1. The shorter representation will even be
+// chosen even if the longer one would be closer to v.
+// The last digit will be closest to the actual v. That is, even if several
+// digits might correctly yield 'v' when read again, the closest will be
+// computed.
+static bool Grisu3(double v,
+                   FastDtoaMode mode,
+                   Vector<char> buffer,
+                   int* length,
+                   int* decimal_exponent) {
+  DiyFp w = Double(v).AsNormalizedDiyFp();
+  // boundary_minus and boundary_plus are the boundaries between v and its
+  // closest floating-point neighbors. Any number strictly between
+  // boundary_minus and boundary_plus will round to v when convert to a double.
+  // Grisu3 will never output representations that lie exactly on a boundary.
+  DiyFp boundary_minus, boundary_plus;
+  if (mode == FAST_DTOA_SHORTEST) {
+    Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus);
+  } else {
+    ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE);
+    float single_v = static_cast<float>(v);
+    Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus);
+  }
+  ASSERT(boundary_plus.e() == w.e());
+  DiyFp ten_mk;  // Cached power of ten: 10^-k
+  int mk;        // -k
+  int ten_mk_minimal_binary_exponent =
+     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  int ten_mk_maximal_binary_exponent =
+     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+      ten_mk_minimal_binary_exponent,
+      ten_mk_maximal_binary_exponent,
+      &ten_mk, &mk);
+  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize) &&
+         (kMaximalTargetExponent >= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize));
+  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a
+  // 64 bit significand and ten_mk is thus only precise up to 64 bits.
+
+  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated
+  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now
+  // off by a small amount.
+  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.
+  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then
+  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e
+  DiyFp scaled_w = DiyFp::Times(w, ten_mk);
+  ASSERT(scaled_w.e() ==
+         boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize);
+  // In theory it would be possible to avoid some recomputations by computing
+  // the difference between w and boundary_minus/plus (a power of 2) and to
+  // compute scaled_boundary_minus/plus by subtracting/adding from
+  // scaled_w. However the code becomes much less readable and the speed
+  // enhancements are not terriffic.
+  DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk);
+  DiyFp scaled_boundary_plus  = DiyFp::Times(boundary_plus,  ten_mk);
+
+  // DigitGen will generate the digits of scaled_w. Therefore we have
+  // v == (double) (scaled_w * 10^-mk).
+  // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an
+  // integer than it will be updated. For instance if scaled_w == 1.23 then
+  // the buffer will be filled with "123" und the decimal_exponent will be
+  // decreased by 2.
+  int kappa;
+  bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus,
+                         buffer, length, &kappa);
+  *decimal_exponent = -mk + kappa;
+  return result;
+}
+
+
+// The "counted" version of grisu3 (see above) only generates requested_digits
+// number of digits. This version does not generate the shortest representation,
+// and with enough requested digits 0.1 will at some point print as 0.9999999...
+// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and
+// therefore the rounding strategy for halfway cases is irrelevant.
+static bool Grisu3Counted(double v,
+                          int requested_digits,
+                          Vector<char> buffer,
+                          int* length,
+                          int* decimal_exponent) {
+  DiyFp w = Double(v).AsNormalizedDiyFp();
+  DiyFp ten_mk;  // Cached power of ten: 10^-k
+  int mk;        // -k
+  int ten_mk_minimal_binary_exponent =
+     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  int ten_mk_maximal_binary_exponent =
+     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+      ten_mk_minimal_binary_exponent,
+      ten_mk_maximal_binary_exponent,
+      &ten_mk, &mk);
+  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize) &&
+         (kMaximalTargetExponent >= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize));
+  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a
+  // 64 bit significand and ten_mk is thus only precise up to 64 bits.
+
+  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated
+  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now
+  // off by a small amount.
+  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.
+  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then
+  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e
+  DiyFp scaled_w = DiyFp::Times(w, ten_mk);
+
+  // We now have (double) (scaled_w * 10^-mk).
+  // DigitGen will generate the first requested_digits digits of scaled_w and
+  // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It
+  // will not always be exactly the same since DigitGenCounted only produces a
+  // limited number of digits.)
+  int kappa;
+  bool result = DigitGenCounted(scaled_w, requested_digits,
+                                buffer, length, &kappa);
+  *decimal_exponent = -mk + kappa;
+  return result;
+}
+
+
+bool FastDtoa(double v,
+              FastDtoaMode mode,
+              int requested_digits,
+              Vector<char> buffer,
+              int* length,
+              int* decimal_point) {
+  ASSERT(v > 0);
+  ASSERT(!Double(v).IsSpecial());
+
+  bool result = false;
+  int decimal_exponent = 0;
+  switch (mode) {
+    case FAST_DTOA_SHORTEST:
+    case FAST_DTOA_SHORTEST_SINGLE:
+      result = Grisu3(v, mode, buffer, length, &decimal_exponent);
+      break;
+    case FAST_DTOA_PRECISION:
+      result = Grisu3Counted(v, requested_digits,
+                             buffer, length, &decimal_exponent);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  if (result) {
+    *decimal_point = *length + decimal_exponent;
+    buffer[*length] = '\0';
+  }
+  return result;
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/fast-dtoa.h b/contrib/libs/double-conversion/fast-dtoa.h
index b701103500..5f1e8eee5e 100644
--- a/contrib/libs/double-conversion/fast-dtoa.h
+++ b/contrib/libs/double-conversion/fast-dtoa.h
@@ -1,88 +1,88 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ 
-#define DOUBLE_CONVERSION_FAST_DTOA_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-enum FastDtoaMode { 
-  // Computes the shortest representation of the given input. The returned 
-  // result will be the most accurate number of this length. Longer 
-  // representations might be more accurate. 
-  FAST_DTOA_SHORTEST, 
-  // Same as FAST_DTOA_SHORTEST but for single-precision floats. 
-  FAST_DTOA_SHORTEST_SINGLE, 
-  // Computes a representation where the precision (number of digits) is 
-  // given as input. The precision is independent of the decimal point. 
-  FAST_DTOA_PRECISION 
-}; 
- 
-// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not 
-// include the terminating '\0' character. 
-static const int kFastDtoaMaximalLength = 17; 
-// Same for single-precision numbers. 
-static const int kFastDtoaMaximalSingleLength = 9; 
- 
-// Provides a decimal representation of v. 
-// The result should be interpreted as buffer * 10^(point - length). 
-// 
-// Precondition: 
-//   * v must be a strictly positive finite double. 
-// 
-// Returns true if it succeeds, otherwise the result can not be trusted. 
-// There will be *length digits inside the buffer followed by a null terminator. 
-// If the function returns true and mode equals 
-//   - FAST_DTOA_SHORTEST, then 
-//     the parameter requested_digits is ignored. 
-//     The result satisfies 
-//         v == (double) (buffer * 10^(point - length)). 
-//     The digits in the buffer are the shortest representation possible. E.g. 
-//     if 0.099999999999 and 0.1 represent the same double then "1" is returned 
-//     with point = 0. 
-//     The last digit will be closest to the actual v. That is, even if several 
-//     digits might correctly yield 'v' when read again, the buffer will contain 
-//     the one closest to v. 
-//   - FAST_DTOA_PRECISION, then 
-//     the buffer contains requested_digits digits. 
-//     the difference v - (buffer * 10^(point-length)) is closest to zero for 
-//     all possible representations of requested_digits digits. 
-//     If there are two values that are equally close, then FastDtoa returns 
-//     false. 
-// For both modes the buffer must be large enough to hold the result. 
-bool FastDtoa(double d, 
-              FastDtoaMode mode, 
-              int requested_digits, 
-              Vector<char> buffer, 
-              int* length, 
-              int* decimal_point); 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_FAST_DTOA_H_ 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_
+#define DOUBLE_CONVERSION_FAST_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+enum FastDtoaMode {
+  // Computes the shortest representation of the given input. The returned
+  // result will be the most accurate number of this length. Longer
+  // representations might be more accurate.
+  FAST_DTOA_SHORTEST,
+  // Same as FAST_DTOA_SHORTEST but for single-precision floats.
+  FAST_DTOA_SHORTEST_SINGLE,
+  // Computes a representation where the precision (number of digits) is
+  // given as input. The precision is independent of the decimal point.
+  FAST_DTOA_PRECISION
+};
+
+// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not
+// include the terminating '\0' character.
+static const int kFastDtoaMaximalLength = 17;
+// Same for single-precision numbers.
+static const int kFastDtoaMaximalSingleLength = 9;
+
+// Provides a decimal representation of v.
+// The result should be interpreted as buffer * 10^(point - length).
+//
+// Precondition:
+//   * v must be a strictly positive finite double.
+//
+// Returns true if it succeeds, otherwise the result can not be trusted.
+// There will be *length digits inside the buffer followed by a null terminator.
+// If the function returns true and mode equals
+//   - FAST_DTOA_SHORTEST, then
+//     the parameter requested_digits is ignored.
+//     The result satisfies
+//         v == (double) (buffer * 10^(point - length)).
+//     The digits in the buffer are the shortest representation possible. E.g.
+//     if 0.099999999999 and 0.1 represent the same double then "1" is returned
+//     with point = 0.
+//     The last digit will be closest to the actual v. That is, even if several
+//     digits might correctly yield 'v' when read again, the buffer will contain
+//     the one closest to v.
+//   - FAST_DTOA_PRECISION, then
+//     the buffer contains requested_digits digits.
+//     the difference v - (buffer * 10^(point-length)) is closest to zero for
+//     all possible representations of requested_digits digits.
+//     If there are two values that are equally close, then FastDtoa returns
+//     false.
+// For both modes the buffer must be large enough to hold the result.
+bool FastDtoa(double d,
+              FastDtoaMode mode,
+              int requested_digits,
+              Vector<char> buffer,
+              int* length,
+              int* decimal_point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_FAST_DTOA_H_
diff --git a/contrib/libs/double-conversion/fixed-dtoa.cc b/contrib/libs/double-conversion/fixed-dtoa.cc
index eb5b27d777..0f989bceaf 100644
--- a/contrib/libs/double-conversion/fixed-dtoa.cc
+++ b/contrib/libs/double-conversion/fixed-dtoa.cc
@@ -1,405 +1,405 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 #include <cmath>
- 
-#include "fixed-dtoa.h" 
-#include "ieee.h" 
- 
-namespace double_conversion { 
- 
-// Represents a 128bit type. This class should be replaced by a native type on 
-// platforms that support 128bit integers. 
-class UInt128 { 
- public: 
-  UInt128() : high_bits_(0), low_bits_(0) { } 
-  UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { } 
- 
-  void Multiply(uint32_t multiplicand) { 
-    uint64_t accumulator; 
- 
-    accumulator = (low_bits_ & kMask32) * multiplicand; 
-    uint32_t part = static_cast<uint32_t>(accumulator & kMask32); 
-    accumulator >>= 32; 
-    accumulator = accumulator + (low_bits_ >> 32) * multiplicand; 
-    low_bits_ = (accumulator << 32) + part; 
-    accumulator >>= 32; 
-    accumulator = accumulator + (high_bits_ & kMask32) * multiplicand; 
-    part = static_cast<uint32_t>(accumulator & kMask32); 
-    accumulator >>= 32; 
-    accumulator = accumulator + (high_bits_ >> 32) * multiplicand; 
-    high_bits_ = (accumulator << 32) + part; 
-    ASSERT((accumulator >> 32) == 0); 
-  } 
- 
-  void Shift(int shift_amount) { 
-    ASSERT(-64 <= shift_amount && shift_amount <= 64); 
-    if (shift_amount == 0) { 
-      return; 
-    } else if (shift_amount == -64) { 
-      high_bits_ = low_bits_; 
-      low_bits_ = 0; 
-    } else if (shift_amount == 64) { 
-      low_bits_ = high_bits_; 
-      high_bits_ = 0; 
-    } else if (shift_amount <= 0) { 
-      high_bits_ <<= -shift_amount; 
-      high_bits_ += low_bits_ >> (64 + shift_amount); 
-      low_bits_ <<= -shift_amount; 
-    } else { 
-      low_bits_ >>= shift_amount; 
-      low_bits_ += high_bits_ << (64 - shift_amount); 
-      high_bits_ >>= shift_amount; 
-    } 
-  } 
- 
-  // Modifies *this to *this MOD (2^power). 
-  // Returns *this DIV (2^power). 
-  int DivModPowerOf2(int power) { 
-    if (power >= 64) { 
-      int result = static_cast<int>(high_bits_ >> (power - 64)); 
-      high_bits_ -= static_cast<uint64_t>(result) << (power - 64); 
-      return result; 
-    } else { 
-      uint64_t part_low = low_bits_ >> power; 
-      uint64_t part_high = high_bits_ << (64 - power); 
-      int result = static_cast<int>(part_low + part_high); 
-      high_bits_ = 0; 
-      low_bits_ -= part_low << power; 
-      return result; 
-    } 
-  } 
- 
-  bool IsZero() const { 
-    return high_bits_ == 0 && low_bits_ == 0; 
-  } 
- 
-  int BitAt(int position) const { 
-    if (position >= 64) { 
-      return static_cast<int>(high_bits_ >> (position - 64)) & 1; 
-    } else { 
-      return static_cast<int>(low_bits_ >> position) & 1; 
-    } 
-  } 
- 
- private: 
-  static const uint64_t kMask32 = 0xFFFFFFFF; 
-  // Value == (high_bits_ << 64) + low_bits_ 
-  uint64_t high_bits_; 
-  uint64_t low_bits_; 
-}; 
- 
- 
-static const int kDoubleSignificandSize = 53;  // Includes the hidden bit. 
- 
- 
-static void FillDigits32FixedLength(uint32_t number, int requested_length, 
-                                    Vector<char> buffer, int* length) { 
-  for (int i = requested_length - 1; i >= 0; --i) { 
-    buffer[(*length) + i] = '0' + number % 10; 
-    number /= 10; 
-  } 
-  *length += requested_length; 
-} 
- 
- 
-static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) { 
-  int number_length = 0; 
-  // We fill the digits in reverse order and exchange them afterwards. 
-  while (number != 0) { 
-    int digit = number % 10; 
-    number /= 10; 
-    buffer[(*length) + number_length] = static_cast<char>('0' + digit); 
-    number_length++; 
-  } 
-  // Exchange the digits. 
-  int i = *length; 
-  int j = *length + number_length - 1; 
-  while (i < j) { 
-    char tmp = buffer[i]; 
-    buffer[i] = buffer[j]; 
-    buffer[j] = tmp; 
-    i++; 
-    j--; 
-  } 
-  *length += number_length; 
-} 
- 
- 
-static void FillDigits64FixedLength(uint64_t number, 
-                                    Vector<char> buffer, int* length) { 
-  const uint32_t kTen7 = 10000000; 
-  // For efficiency cut the number into 3 uint32_t parts, and print those. 
-  uint32_t part2 = static_cast<uint32_t>(number % kTen7); 
-  number /= kTen7; 
-  uint32_t part1 = static_cast<uint32_t>(number % kTen7); 
-  uint32_t part0 = static_cast<uint32_t>(number / kTen7); 
- 
-  FillDigits32FixedLength(part0, 3, buffer, length); 
-  FillDigits32FixedLength(part1, 7, buffer, length); 
-  FillDigits32FixedLength(part2, 7, buffer, length); 
-} 
- 
- 
-static void FillDigits64(uint64_t number, Vector<char> buffer, int* length) { 
-  const uint32_t kTen7 = 10000000; 
-  // For efficiency cut the number into 3 uint32_t parts, and print those. 
-  uint32_t part2 = static_cast<uint32_t>(number % kTen7); 
-  number /= kTen7; 
-  uint32_t part1 = static_cast<uint32_t>(number % kTen7); 
-  uint32_t part0 = static_cast<uint32_t>(number / kTen7); 
- 
-  if (part0 != 0) { 
-    FillDigits32(part0, buffer, length); 
-    FillDigits32FixedLength(part1, 7, buffer, length); 
-    FillDigits32FixedLength(part2, 7, buffer, length); 
-  } else if (part1 != 0) { 
-    FillDigits32(part1, buffer, length); 
-    FillDigits32FixedLength(part2, 7, buffer, length); 
-  } else { 
-    FillDigits32(part2, buffer, length); 
-  } 
-} 
- 
- 
-static void RoundUp(Vector<char> buffer, int* length, int* decimal_point) { 
-  // An empty buffer represents 0. 
-  if (*length == 0) { 
-    buffer[0] = '1'; 
-    *decimal_point = 1; 
-    *length = 1; 
-    return; 
-  } 
-  // Round the last digit until we either have a digit that was not '9' or until 
-  // we reached the first digit. 
-  buffer[(*length) - 1]++; 
-  for (int i = (*length) - 1; i > 0; --i) { 
-    if (buffer[i] != '0' + 10) { 
-      return; 
-    } 
-    buffer[i] = '0'; 
-    buffer[i - 1]++; 
-  } 
-  // If the first digit is now '0' + 10, we would need to set it to '0' and add 
-  // a '1' in front. However we reach the first digit only if all following 
-  // digits had been '9' before rounding up. Now all trailing digits are '0' and 
-  // we simply switch the first digit to '1' and update the decimal-point 
-  // (indicating that the point is now one digit to the right). 
-  if (buffer[0] == '0' + 10) { 
-    buffer[0] = '1'; 
-    (*decimal_point)++; 
-  } 
-} 
- 
- 
-// The given fractionals number represents a fixed-point number with binary 
-// point at bit (-exponent). 
-// Preconditions: 
-//   -128 <= exponent <= 0. 
-//   0 <= fractionals * 2^exponent < 1 
-//   The buffer holds the result. 
-// The function will round its result. During the rounding-process digits not 
-// generated by this function might be updated, and the decimal-point variable 
-// might be updated. If this function generates the digits 99 and the buffer 
-// already contained "199" (thus yielding a buffer of "19999") then a 
-// rounding-up will change the contents of the buffer to "20000". 
-static void FillFractionals(uint64_t fractionals, int exponent, 
-                            int fractional_count, Vector<char> buffer, 
-                            int* length, int* decimal_point) { 
-  ASSERT(-128 <= exponent && exponent <= 0); 
-  // 'fractionals' is a fixed-point number, with binary point at bit 
-  // (-exponent). Inside the function the non-converted remainder of fractionals 
-  // is a fixed-point number, with binary point at bit 'point'. 
-  if (-exponent <= 64) { 
-    // One 64 bit number is sufficient. 
-    ASSERT(fractionals >> 56 == 0); 
-    int point = -exponent; 
-    for (int i = 0; i < fractional_count; ++i) { 
-      if (fractionals == 0) break; 
-      // Instead of multiplying by 10 we multiply by 5 and adjust the point 
-      // location. This way the fractionals variable will not overflow. 
-      // Invariant at the beginning of the loop: fractionals < 2^point. 
-      // Initially we have: point <= 64 and fractionals < 2^56 
-      // After each iteration the point is decremented by one. 
-      // Note that 5^3 = 125 < 128 = 2^7. 
-      // Therefore three iterations of this loop will not overflow fractionals 
-      // (even without the subtraction at the end of the loop body). At this 
-      // time point will satisfy point <= 61 and therefore fractionals < 2^point 
-      // and any further multiplication of fractionals by 5 will not overflow. 
-      fractionals *= 5; 
-      point--; 
-      int digit = static_cast<int>(fractionals >> point); 
-      ASSERT(digit <= 9); 
-      buffer[*length] = static_cast<char>('0' + digit); 
-      (*length)++; 
-      fractionals -= static_cast<uint64_t>(digit) << point; 
-    } 
-    // If the first bit after the point is set we have to round up. 
-    ASSERT(fractionals == 0 || point - 1 >= 0); 
-    if ((fractionals != 0) && ((fractionals >> (point - 1)) & 1) == 1) { 
-      RoundUp(buffer, length, decimal_point); 
-    } 
-  } else {  // We need 128 bits. 
-    ASSERT(64 < -exponent && -exponent <= 128); 
-    UInt128 fractionals128 = UInt128(fractionals, 0); 
-    fractionals128.Shift(-exponent - 64); 
-    int point = 128; 
-    for (int i = 0; i < fractional_count; ++i) { 
-      if (fractionals128.IsZero()) break; 
-      // As before: instead of multiplying by 10 we multiply by 5 and adjust the 
-      // point location. 
-      // This multiplication will not overflow for the same reasons as before. 
-      fractionals128.Multiply(5); 
-      point--; 
-      int digit = fractionals128.DivModPowerOf2(point); 
-      ASSERT(digit <= 9); 
-      buffer[*length] = static_cast<char>('0' + digit); 
-      (*length)++; 
-    } 
-    if (fractionals128.BitAt(point - 1) == 1) { 
-      RoundUp(buffer, length, decimal_point); 
-    } 
-  } 
-} 
- 
- 
-// Removes leading and trailing zeros. 
-// If leading zeros are removed then the decimal point position is adjusted. 
-static void TrimZeros(Vector<char> buffer, int* length, int* decimal_point) { 
-  while (*length > 0 && buffer[(*length) - 1] == '0') { 
-    (*length)--; 
-  } 
-  int first_non_zero = 0; 
-  while (first_non_zero < *length && buffer[first_non_zero] == '0') { 
-    first_non_zero++; 
-  } 
-  if (first_non_zero != 0) { 
-    for (int i = first_non_zero; i < *length; ++i) { 
-      buffer[i - first_non_zero] = buffer[i]; 
-    } 
-    *length -= first_non_zero; 
-    *decimal_point -= first_non_zero; 
-  } 
-} 
- 
- 
-bool FastFixedDtoa(double v, 
-                   int fractional_count, 
-                   Vector<char> buffer, 
-                   int* length, 
-                   int* decimal_point) { 
-  const uint32_t kMaxUInt32 = 0xFFFFFFFF; 
-  uint64_t significand = Double(v).Significand(); 
-  int exponent = Double(v).Exponent(); 
-  // v = significand * 2^exponent (with significand a 53bit integer). 
-  // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we 
-  // don't know how to compute the representation. 2^73 ~= 9.5*10^21. 
-  // If necessary this limit could probably be increased, but we don't need 
-  // more. 
-  if (exponent > 20) return false; 
-  if (fractional_count > 20) return false; 
-  *length = 0; 
-  // At most kDoubleSignificandSize bits of the significand are non-zero. 
-  // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero 
-  // bits:  0..11*..0xxx..53*..xx 
-  if (exponent + kDoubleSignificandSize > 64) { 
-    // The exponent must be > 11. 
-    // 
-    // We know that v = significand * 2^exponent. 
-    // And the exponent > 11. 
-    // We simplify the task by dividing v by 10^17. 
-    // The quotient delivers the first digits, and the remainder fits into a 64 
-    // bit number. 
-    // Dividing by 10^17 is equivalent to dividing by 5^17*2^17. 
-    const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5);  // 5^17 
-    uint64_t divisor = kFive17; 
-    int divisor_power = 17; 
-    uint64_t dividend = significand; 
-    uint32_t quotient; 
-    uint64_t remainder; 
-    // Let v = f * 2^e with f == significand and e == exponent. 
-    // Then need q (quotient) and r (remainder) as follows: 
-    //   v            = q * 10^17       + r 
-    //   f * 2^e      = q * 10^17       + r 
-    //   f * 2^e      = q * 5^17 * 2^17 + r 
-    // If e > 17 then 
-    //   f * 2^(e-17) = q * 5^17        + r/2^17 
-    // else 
-    //   f  = q * 5^17 * 2^(17-e) + r/2^e 
-    if (exponent > divisor_power) { 
-      // We only allow exponents of up to 20 and therefore (17 - e) <= 3 
-      dividend <<= exponent - divisor_power; 
-      quotient = static_cast<uint32_t>(dividend / divisor); 
-      remainder = (dividend % divisor) << divisor_power; 
-    } else { 
-      divisor <<= divisor_power - exponent; 
-      quotient = static_cast<uint32_t>(dividend / divisor); 
-      remainder = (dividend % divisor) << exponent; 
-    } 
-    FillDigits32(quotient, buffer, length); 
-    FillDigits64FixedLength(remainder, buffer, length); 
-    *decimal_point = *length; 
-  } else if (exponent >= 0) { 
-    // 0 <= exponent <= 11 
-    significand <<= exponent; 
-    FillDigits64(significand, buffer, length); 
-    *decimal_point = *length; 
-  } else if (exponent > -kDoubleSignificandSize) { 
-    // We have to cut the number. 
-    uint64_t integrals = significand >> -exponent; 
-    uint64_t fractionals = significand - (integrals << -exponent); 
-    if (integrals > kMaxUInt32) { 
-      FillDigits64(integrals, buffer, length); 
-    } else { 
-      FillDigits32(static_cast<uint32_t>(integrals), buffer, length); 
-    } 
-    *decimal_point = *length; 
-    FillFractionals(fractionals, exponent, fractional_count, 
-                    buffer, length, decimal_point); 
-  } else if (exponent < -128) { 
-    // This configuration (with at most 20 digits) means that all digits must be 
-    // 0. 
-    ASSERT(fractional_count <= 20); 
-    buffer[0] = '\0'; 
-    *length = 0; 
-    *decimal_point = -fractional_count; 
-  } else { 
-    *decimal_point = 0; 
-    FillFractionals(significand, exponent, fractional_count, 
-                    buffer, length, decimal_point); 
-  } 
-  TrimZeros(buffer, length, decimal_point); 
-  buffer[*length] = '\0'; 
-  if ((*length) == 0) { 
-    // The string is empty and the decimal_point thus has no importance. Mimick 
-    // Gay's dtoa and and set it to -fractional_count. 
-    *decimal_point = -fractional_count; 
-  } 
-  return true; 
-} 
- 
-}  // namespace double_conversion 
+
+#include "fixed-dtoa.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// Represents a 128bit type. This class should be replaced by a native type on
+// platforms that support 128bit integers.
+class UInt128 {
+ public:
+  UInt128() : high_bits_(0), low_bits_(0) { }
+  UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { }
+
+  void Multiply(uint32_t multiplicand) {
+    uint64_t accumulator;
+
+    accumulator = (low_bits_ & kMask32) * multiplicand;
+    uint32_t part = static_cast<uint32_t>(accumulator & kMask32);
+    accumulator >>= 32;
+    accumulator = accumulator + (low_bits_ >> 32) * multiplicand;
+    low_bits_ = (accumulator << 32) + part;
+    accumulator >>= 32;
+    accumulator = accumulator + (high_bits_ & kMask32) * multiplicand;
+    part = static_cast<uint32_t>(accumulator & kMask32);
+    accumulator >>= 32;
+    accumulator = accumulator + (high_bits_ >> 32) * multiplicand;
+    high_bits_ = (accumulator << 32) + part;
+    ASSERT((accumulator >> 32) == 0);
+  }
+
+  void Shift(int shift_amount) {
+    ASSERT(-64 <= shift_amount && shift_amount <= 64);
+    if (shift_amount == 0) {
+      return;
+    } else if (shift_amount == -64) {
+      high_bits_ = low_bits_;
+      low_bits_ = 0;
+    } else if (shift_amount == 64) {
+      low_bits_ = high_bits_;
+      high_bits_ = 0;
+    } else if (shift_amount <= 0) {
+      high_bits_ <<= -shift_amount;
+      high_bits_ += low_bits_ >> (64 + shift_amount);
+      low_bits_ <<= -shift_amount;
+    } else {
+      low_bits_ >>= shift_amount;
+      low_bits_ += high_bits_ << (64 - shift_amount);
+      high_bits_ >>= shift_amount;
+    }
+  }
+
+  // Modifies *this to *this MOD (2^power).
+  // Returns *this DIV (2^power).
+  int DivModPowerOf2(int power) {
+    if (power >= 64) {
+      int result = static_cast<int>(high_bits_ >> (power - 64));
+      high_bits_ -= static_cast<uint64_t>(result) << (power - 64);
+      return result;
+    } else {
+      uint64_t part_low = low_bits_ >> power;
+      uint64_t part_high = high_bits_ << (64 - power);
+      int result = static_cast<int>(part_low + part_high);
+      high_bits_ = 0;
+      low_bits_ -= part_low << power;
+      return result;
+    }
+  }
+
+  bool IsZero() const {
+    return high_bits_ == 0 && low_bits_ == 0;
+  }
+
+  int BitAt(int position) const {
+    if (position >= 64) {
+      return static_cast<int>(high_bits_ >> (position - 64)) & 1;
+    } else {
+      return static_cast<int>(low_bits_ >> position) & 1;
+    }
+  }
+
+ private:
+  static const uint64_t kMask32 = 0xFFFFFFFF;
+  // Value == (high_bits_ << 64) + low_bits_
+  uint64_t high_bits_;
+  uint64_t low_bits_;
+};
+
+
+static const int kDoubleSignificandSize = 53;  // Includes the hidden bit.
+
+
+static void FillDigits32FixedLength(uint32_t number, int requested_length,
+                                    Vector<char> buffer, int* length) {
+  for (int i = requested_length - 1; i >= 0; --i) {
+    buffer[(*length) + i] = '0' + number % 10;
+    number /= 10;
+  }
+  *length += requested_length;
+}
+
+
+static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
+  int number_length = 0;
+  // We fill the digits in reverse order and exchange them afterwards.
+  while (number != 0) {
+    int digit = number % 10;
+    number /= 10;
+    buffer[(*length) + number_length] = static_cast<char>('0' + digit);
+    number_length++;
+  }
+  // Exchange the digits.
+  int i = *length;
+  int j = *length + number_length - 1;
+  while (i < j) {
+    char tmp = buffer[i];
+    buffer[i] = buffer[j];
+    buffer[j] = tmp;
+    i++;
+    j--;
+  }
+  *length += number_length;
+}
+
+
+static void FillDigits64FixedLength(uint64_t number,
+                                    Vector<char> buffer, int* length) {
+  const uint32_t kTen7 = 10000000;
+  // For efficiency cut the number into 3 uint32_t parts, and print those.
+  uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+  number /= kTen7;
+  uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+  uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+  FillDigits32FixedLength(part0, 3, buffer, length);
+  FillDigits32FixedLength(part1, 7, buffer, length);
+  FillDigits32FixedLength(part2, 7, buffer, length);
+}
+
+
+static void FillDigits64(uint64_t number, Vector<char> buffer, int* length) {
+  const uint32_t kTen7 = 10000000;
+  // For efficiency cut the number into 3 uint32_t parts, and print those.
+  uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+  number /= kTen7;
+  uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+  uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+  if (part0 != 0) {
+    FillDigits32(part0, buffer, length);
+    FillDigits32FixedLength(part1, 7, buffer, length);
+    FillDigits32FixedLength(part2, 7, buffer, length);
+  } else if (part1 != 0) {
+    FillDigits32(part1, buffer, length);
+    FillDigits32FixedLength(part2, 7, buffer, length);
+  } else {
+    FillDigits32(part2, buffer, length);
+  }
+}
+
+
+static void RoundUp(Vector<char> buffer, int* length, int* decimal_point) {
+  // An empty buffer represents 0.
+  if (*length == 0) {
+    buffer[0] = '1';
+    *decimal_point = 1;
+    *length = 1;
+    return;
+  }
+  // Round the last digit until we either have a digit that was not '9' or until
+  // we reached the first digit.
+  buffer[(*length) - 1]++;
+  for (int i = (*length) - 1; i > 0; --i) {
+    if (buffer[i] != '0' + 10) {
+      return;
+    }
+    buffer[i] = '0';
+    buffer[i - 1]++;
+  }
+  // If the first digit is now '0' + 10, we would need to set it to '0' and add
+  // a '1' in front. However we reach the first digit only if all following
+  // digits had been '9' before rounding up. Now all trailing digits are '0' and
+  // we simply switch the first digit to '1' and update the decimal-point
+  // (indicating that the point is now one digit to the right).
+  if (buffer[0] == '0' + 10) {
+    buffer[0] = '1';
+    (*decimal_point)++;
+  }
+}
+
+
+// The given fractionals number represents a fixed-point number with binary
+// point at bit (-exponent).
+// Preconditions:
+//   -128 <= exponent <= 0.
+//   0 <= fractionals * 2^exponent < 1
+//   The buffer holds the result.
+// The function will round its result. During the rounding-process digits not
+// generated by this function might be updated, and the decimal-point variable
+// might be updated. If this function generates the digits 99 and the buffer
+// already contained "199" (thus yielding a buffer of "19999") then a
+// rounding-up will change the contents of the buffer to "20000".
+static void FillFractionals(uint64_t fractionals, int exponent,
+                            int fractional_count, Vector<char> buffer,
+                            int* length, int* decimal_point) {
+  ASSERT(-128 <= exponent && exponent <= 0);
+  // 'fractionals' is a fixed-point number, with binary point at bit
+  // (-exponent). Inside the function the non-converted remainder of fractionals
+  // is a fixed-point number, with binary point at bit 'point'.
+  if (-exponent <= 64) {
+    // One 64 bit number is sufficient.
+    ASSERT(fractionals >> 56 == 0);
+    int point = -exponent;
+    for (int i = 0; i < fractional_count; ++i) {
+      if (fractionals == 0) break;
+      // Instead of multiplying by 10 we multiply by 5 and adjust the point
+      // location. This way the fractionals variable will not overflow.
+      // Invariant at the beginning of the loop: fractionals < 2^point.
+      // Initially we have: point <= 64 and fractionals < 2^56
+      // After each iteration the point is decremented by one.
+      // Note that 5^3 = 125 < 128 = 2^7.
+      // Therefore three iterations of this loop will not overflow fractionals
+      // (even without the subtraction at the end of the loop body). At this
+      // time point will satisfy point <= 61 and therefore fractionals < 2^point
+      // and any further multiplication of fractionals by 5 will not overflow.
+      fractionals *= 5;
+      point--;
+      int digit = static_cast<int>(fractionals >> point);
+      ASSERT(digit <= 9);
+      buffer[*length] = static_cast<char>('0' + digit);
+      (*length)++;
+      fractionals -= static_cast<uint64_t>(digit) << point;
+    }
+    // If the first bit after the point is set we have to round up.
+    ASSERT(fractionals == 0 || point - 1 >= 0);
+    if ((fractionals != 0) && ((fractionals >> (point - 1)) & 1) == 1) {
+      RoundUp(buffer, length, decimal_point);
+    }
+  } else {  // We need 128 bits.
+    ASSERT(64 < -exponent && -exponent <= 128);
+    UInt128 fractionals128 = UInt128(fractionals, 0);
+    fractionals128.Shift(-exponent - 64);
+    int point = 128;
+    for (int i = 0; i < fractional_count; ++i) {
+      if (fractionals128.IsZero()) break;
+      // As before: instead of multiplying by 10 we multiply by 5 and adjust the
+      // point location.
+      // This multiplication will not overflow for the same reasons as before.
+      fractionals128.Multiply(5);
+      point--;
+      int digit = fractionals128.DivModPowerOf2(point);
+      ASSERT(digit <= 9);
+      buffer[*length] = static_cast<char>('0' + digit);
+      (*length)++;
+    }
+    if (fractionals128.BitAt(point - 1) == 1) {
+      RoundUp(buffer, length, decimal_point);
+    }
+  }
+}
+
+
+// Removes leading and trailing zeros.
+// If leading zeros are removed then the decimal point position is adjusted.
+static void TrimZeros(Vector<char> buffer, int* length, int* decimal_point) {
+  while (*length > 0 && buffer[(*length) - 1] == '0') {
+    (*length)--;
+  }
+  int first_non_zero = 0;
+  while (first_non_zero < *length && buffer[first_non_zero] == '0') {
+    first_non_zero++;
+  }
+  if (first_non_zero != 0) {
+    for (int i = first_non_zero; i < *length; ++i) {
+      buffer[i - first_non_zero] = buffer[i];
+    }
+    *length -= first_non_zero;
+    *decimal_point -= first_non_zero;
+  }
+}
+
+
+bool FastFixedDtoa(double v,
+                   int fractional_count,
+                   Vector<char> buffer,
+                   int* length,
+                   int* decimal_point) {
+  const uint32_t kMaxUInt32 = 0xFFFFFFFF;
+  uint64_t significand = Double(v).Significand();
+  int exponent = Double(v).Exponent();
+  // v = significand * 2^exponent (with significand a 53bit integer).
+  // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we
+  // don't know how to compute the representation. 2^73 ~= 9.5*10^21.
+  // If necessary this limit could probably be increased, but we don't need
+  // more.
+  if (exponent > 20) return false;
+  if (fractional_count > 20) return false;
+  *length = 0;
+  // At most kDoubleSignificandSize bits of the significand are non-zero.
+  // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero
+  // bits:  0..11*..0xxx..53*..xx
+  if (exponent + kDoubleSignificandSize > 64) {
+    // The exponent must be > 11.
+    //
+    // We know that v = significand * 2^exponent.
+    // And the exponent > 11.
+    // We simplify the task by dividing v by 10^17.
+    // The quotient delivers the first digits, and the remainder fits into a 64
+    // bit number.
+    // Dividing by 10^17 is equivalent to dividing by 5^17*2^17.
+    const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5);  // 5^17
+    uint64_t divisor = kFive17;
+    int divisor_power = 17;
+    uint64_t dividend = significand;
+    uint32_t quotient;
+    uint64_t remainder;
+    // Let v = f * 2^e with f == significand and e == exponent.
+    // Then need q (quotient) and r (remainder) as follows:
+    //   v            = q * 10^17       + r
+    //   f * 2^e      = q * 10^17       + r
+    //   f * 2^e      = q * 5^17 * 2^17 + r
+    // If e > 17 then
+    //   f * 2^(e-17) = q * 5^17        + r/2^17
+    // else
+    //   f  = q * 5^17 * 2^(17-e) + r/2^e
+    if (exponent > divisor_power) {
+      // We only allow exponents of up to 20 and therefore (17 - e) <= 3
+      dividend <<= exponent - divisor_power;
+      quotient = static_cast<uint32_t>(dividend / divisor);
+      remainder = (dividend % divisor) << divisor_power;
+    } else {
+      divisor <<= divisor_power - exponent;
+      quotient = static_cast<uint32_t>(dividend / divisor);
+      remainder = (dividend % divisor) << exponent;
+    }
+    FillDigits32(quotient, buffer, length);
+    FillDigits64FixedLength(remainder, buffer, length);
+    *decimal_point = *length;
+  } else if (exponent >= 0) {
+    // 0 <= exponent <= 11
+    significand <<= exponent;
+    FillDigits64(significand, buffer, length);
+    *decimal_point = *length;
+  } else if (exponent > -kDoubleSignificandSize) {
+    // We have to cut the number.
+    uint64_t integrals = significand >> -exponent;
+    uint64_t fractionals = significand - (integrals << -exponent);
+    if (integrals > kMaxUInt32) {
+      FillDigits64(integrals, buffer, length);
+    } else {
+      FillDigits32(static_cast<uint32_t>(integrals), buffer, length);
+    }
+    *decimal_point = *length;
+    FillFractionals(fractionals, exponent, fractional_count,
+                    buffer, length, decimal_point);
+  } else if (exponent < -128) {
+    // This configuration (with at most 20 digits) means that all digits must be
+    // 0.
+    ASSERT(fractional_count <= 20);
+    buffer[0] = '\0';
+    *length = 0;
+    *decimal_point = -fractional_count;
+  } else {
+    *decimal_point = 0;
+    FillFractionals(significand, exponent, fractional_count,
+                    buffer, length, decimal_point);
+  }
+  TrimZeros(buffer, length, decimal_point);
+  buffer[*length] = '\0';
+  if ((*length) == 0) {
+    // The string is empty and the decimal_point thus has no importance. Mimick
+    // Gay's dtoa and and set it to -fractional_count.
+    *decimal_point = -fractional_count;
+  }
+  return true;
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/fixed-dtoa.h b/contrib/libs/double-conversion/fixed-dtoa.h
index e4eccca371..3bdd08e21f 100644
--- a/contrib/libs/double-conversion/fixed-dtoa.h
+++ b/contrib/libs/double-conversion/fixed-dtoa.h
@@ -1,56 +1,56 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_ 
-#define DOUBLE_CONVERSION_FIXED_DTOA_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-// Produces digits necessary to print a given number with 
-// 'fractional_count' digits after the decimal point. 
-// The buffer must be big enough to hold the result plus one terminating null 
-// character. 
-// 
-// The produced digits might be too short in which case the caller has to fill 
-// the gaps with '0's. 
-// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and 
-// decimal_point = -2. 
-// Halfway cases are rounded towards +/-Infinity (away from 0). The call 
-// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0. 
-// The returned buffer may contain digits that would be truncated from the 
-// shortest representation of the input. 
-// 
-// This method only works for some parameters. If it can't handle the input it 
-// returns false. The output is null-terminated when the function succeeds. 
-bool FastFixedDtoa(double v, int fractional_count, 
-                   Vector<char> buffer, int* length, int* decimal_point); 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_FIXED_DTOA_H_ 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_
+#define DOUBLE_CONVERSION_FIXED_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// Produces digits necessary to print a given number with
+// 'fractional_count' digits after the decimal point.
+// The buffer must be big enough to hold the result plus one terminating null
+// character.
+//
+// The produced digits might be too short in which case the caller has to fill
+// the gaps with '0's.
+// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and
+// decimal_point = -2.
+// Halfway cases are rounded towards +/-Infinity (away from 0). The call
+// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0.
+// The returned buffer may contain digits that would be truncated from the
+// shortest representation of the input.
+//
+// This method only works for some parameters. If it can't handle the input it
+// returns false. The output is null-terminated when the function succeeds.
+bool FastFixedDtoa(double v, int fractional_count,
+                   Vector<char> buffer, int* length, int* decimal_point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_FIXED_DTOA_H_
diff --git a/contrib/libs/double-conversion/ieee.h b/contrib/libs/double-conversion/ieee.h
index 05da3fcc35..4a5fe8f9c0 100644
--- a/contrib/libs/double-conversion/ieee.h
+++ b/contrib/libs/double-conversion/ieee.h
@@ -1,402 +1,402 @@
-// Copyright 2012 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_DOUBLE_H_ 
-#define DOUBLE_CONVERSION_DOUBLE_H_ 
- 
-#include "diy-fp.h" 
- 
-namespace double_conversion { 
- 
-// We assume that doubles and uint64_t have the same endianness. 
-static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); } 
-static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); } 
-static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); } 
-static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); } 
- 
-// Helper functions for doubles. 
-class Double { 
- public: 
-  static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); 
-  static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); 
-  static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); 
-  static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); 
-  static const int kPhysicalSignificandSize = 52;  // Excludes the hidden bit. 
-  static const int kSignificandSize = 53; 
- 
-  Double() : d64_(0) {} 
-  explicit Double(double d) : d64_(double_to_uint64(d)) {} 
-  explicit Double(uint64_t d64) : d64_(d64) {} 
-  explicit Double(DiyFp diy_fp) 
-    : d64_(DiyFpToUint64(diy_fp)) {} 
- 
-  // The value encoded by this Double must be greater or equal to +0.0. 
-  // It must not be special (infinity, or NaN). 
-  DiyFp AsDiyFp() const { 
-    ASSERT(Sign() > 0); 
-    ASSERT(!IsSpecial()); 
-    return DiyFp(Significand(), Exponent()); 
-  } 
- 
-  // The value encoded by this Double must be strictly greater than 0. 
-  DiyFp AsNormalizedDiyFp() const { 
-    ASSERT(value() > 0.0); 
-    uint64_t f = Significand(); 
-    int e = Exponent(); 
- 
-    // The current double could be a denormal. 
-    while ((f & kHiddenBit) == 0) { 
-      f <<= 1; 
-      e--; 
-    } 
-    // Do the final shifts in one go. 
-    f <<= DiyFp::kSignificandSize - kSignificandSize; 
-    e -= DiyFp::kSignificandSize - kSignificandSize; 
-    return DiyFp(f, e); 
-  } 
- 
-  // Returns the double's bit as uint64. 
-  uint64_t AsUint64() const { 
-    return d64_; 
-  } 
- 
-  // Returns the next greater double. Returns +infinity on input +infinity. 
-  double NextDouble() const { 
-    if (d64_ == kInfinity) return Double(kInfinity).value(); 
-    if (Sign() < 0 && Significand() == 0) { 
-      // -0.0 
-      return 0.0; 
-    } 
-    if (Sign() < 0) { 
-      return Double(d64_ - 1).value(); 
-    } else { 
-      return Double(d64_ + 1).value(); 
-    } 
-  } 
- 
-  double PreviousDouble() const { 
-    if (d64_ == (kInfinity | kSignMask)) return -Infinity(); 
-    if (Sign() < 0) { 
-      return Double(d64_ + 1).value(); 
-    } else { 
-      if (Significand() == 0) return -0.0; 
-      return Double(d64_ - 1).value(); 
-    } 
-  } 
- 
-  int Exponent() const { 
-    if (IsDenormal()) return kDenormalExponent; 
- 
-    uint64_t d64 = AsUint64(); 
-    int biased_e = 
-        static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize); 
-    return biased_e - kExponentBias; 
-  } 
- 
-  uint64_t Significand() const { 
-    uint64_t d64 = AsUint64(); 
-    uint64_t significand = d64 & kSignificandMask; 
-    if (!IsDenormal()) { 
-      return significand + kHiddenBit; 
-    } else { 
-      return significand; 
-    } 
-  } 
- 
-  // Returns true if the double is a denormal. 
-  bool IsDenormal() const { 
-    uint64_t d64 = AsUint64(); 
-    return (d64 & kExponentMask) == 0; 
-  } 
- 
-  // We consider denormals not to be special. 
-  // Hence only Infinity and NaN are special. 
-  bool IsSpecial() const { 
-    uint64_t d64 = AsUint64(); 
-    return (d64 & kExponentMask) == kExponentMask; 
-  } 
- 
-  bool IsNan() const { 
-    uint64_t d64 = AsUint64(); 
-    return ((d64 & kExponentMask) == kExponentMask) && 
-        ((d64 & kSignificandMask) != 0); 
-  } 
- 
-  bool IsInfinite() const { 
-    uint64_t d64 = AsUint64(); 
-    return ((d64 & kExponentMask) == kExponentMask) && 
-        ((d64 & kSignificandMask) == 0); 
-  } 
- 
-  int Sign() const { 
-    uint64_t d64 = AsUint64(); 
-    return (d64 & kSignMask) == 0? 1: -1; 
-  } 
- 
-  // Precondition: the value encoded by this Double must be greater or equal 
-  // than +0.0. 
-  DiyFp UpperBoundary() const { 
-    ASSERT(Sign() > 0); 
-    return DiyFp(Significand() * 2 + 1, Exponent() - 1); 
-  } 
- 
-  // Computes the two boundaries of this. 
-  // The bigger boundary (m_plus) is normalized. The lower boundary has the same 
-  // exponent as m_plus. 
-  // Precondition: the value encoded by this Double must be greater than 0. 
-  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { 
-    ASSERT(value() > 0.0); 
-    DiyFp v = this->AsDiyFp(); 
-    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); 
-    DiyFp m_minus; 
-    if (LowerBoundaryIsCloser()) { 
-      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); 
-    } else { 
-      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); 
-    } 
-    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); 
-    m_minus.set_e(m_plus.e()); 
-    *out_m_plus = m_plus; 
-    *out_m_minus = m_minus; 
-  } 
- 
-  bool LowerBoundaryIsCloser() const { 
-    // The boundary is closer if the significand is of the form f == 2^p-1 then 
-    // the lower boundary is closer. 
-    // Think of v = 1000e10 and v- = 9999e9. 
-    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but 
-    // at a distance of 1e8. 
-    // The only exception is for the smallest normal: the largest denormal is 
-    // at the same distance as its successor. 
-    // Note: denormals have the same exponent as the smallest normals. 
-    bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); 
-    return physical_significand_is_zero && (Exponent() != kDenormalExponent); 
-  } 
- 
-  double value() const { return uint64_to_double(d64_); } 
- 
-  // Returns the significand size for a given order of magnitude. 
-  // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. 
-  // This function returns the number of significant binary digits v will have 
-  // once it's encoded into a double. In almost all cases this is equal to 
-  // kSignificandSize. The only exceptions are denormals. They start with 
-  // leading zeroes and their effective significand-size is hence smaller. 
-  static int SignificandSizeForOrderOfMagnitude(int order) { 
-    if (order >= (kDenormalExponent + kSignificandSize)) { 
-      return kSignificandSize; 
-    } 
-    if (order <= kDenormalExponent) return 0; 
-    return order - kDenormalExponent; 
-  } 
- 
-  static double Infinity() { 
-    return Double(kInfinity).value(); 
-  } 
- 
-  static double NaN() { 
-    return Double(kNaN).value(); 
-  } 
- 
- private: 
-  static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; 
-  static const int kDenormalExponent = -kExponentBias + 1; 
-  static const int kMaxExponent = 0x7FF - kExponentBias; 
-  static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); 
-  static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); 
- 
-  const uint64_t d64_; 
- 
-  static uint64_t DiyFpToUint64(DiyFp diy_fp) { 
-    uint64_t significand = diy_fp.f(); 
-    int exponent = diy_fp.e(); 
-    while (significand > kHiddenBit + kSignificandMask) { 
-      significand >>= 1; 
-      exponent++; 
-    } 
-    if (exponent >= kMaxExponent) { 
-      return kInfinity; 
-    } 
-    if (exponent < kDenormalExponent) { 
-      return 0; 
-    } 
-    while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { 
-      significand <<= 1; 
-      exponent--; 
-    } 
-    uint64_t biased_exponent; 
-    if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { 
-      biased_exponent = 0; 
-    } else { 
-      biased_exponent = static_cast<uint64_t>(exponent + kExponentBias); 
-    } 
-    return (significand & kSignificandMask) | 
-        (biased_exponent << kPhysicalSignificandSize); 
-  } 
- 
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DOUBLE_H_
+#define DOUBLE_CONVERSION_DOUBLE_H_
+
+#include "diy-fp.h"
+
+namespace double_conversion {
+
+// We assume that doubles and uint64_t have the same endianness.
+static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); }
+static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); }
+static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); }
+static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); }
+
+// Helper functions for doubles.
+class Double {
+ public:
+  static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000);
+  static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF);
+  static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000);
+  static const int kPhysicalSignificandSize = 52;  // Excludes the hidden bit.
+  static const int kSignificandSize = 53;
+
+  Double() : d64_(0) {}
+  explicit Double(double d) : d64_(double_to_uint64(d)) {}
+  explicit Double(uint64_t d64) : d64_(d64) {}
+  explicit Double(DiyFp diy_fp)
+    : d64_(DiyFpToUint64(diy_fp)) {}
+
+  // The value encoded by this Double must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+
+  // The value encoded by this Double must be strictly greater than 0.
+  DiyFp AsNormalizedDiyFp() const {
+    ASSERT(value() > 0.0);
+    uint64_t f = Significand();
+    int e = Exponent();
+
+    // The current double could be a denormal.
+    while ((f & kHiddenBit) == 0) {
+      f <<= 1;
+      e--;
+    }
+    // Do the final shifts in one go.
+    f <<= DiyFp::kSignificandSize - kSignificandSize;
+    e -= DiyFp::kSignificandSize - kSignificandSize;
+    return DiyFp(f, e);
+  }
+
+  // Returns the double's bit as uint64.
+  uint64_t AsUint64() const {
+    return d64_;
+  }
+
+  // Returns the next greater double. Returns +infinity on input +infinity.
+  double NextDouble() const {
+    if (d64_ == kInfinity) return Double(kInfinity).value();
+    if (Sign() < 0 && Significand() == 0) {
+      // -0.0
+      return 0.0;
+    }
+    if (Sign() < 0) {
+      return Double(d64_ - 1).value();
+    } else {
+      return Double(d64_ + 1).value();
+    }
+  }
+
+  double PreviousDouble() const {
+    if (d64_ == (kInfinity | kSignMask)) return -Infinity();
+    if (Sign() < 0) {
+      return Double(d64_ + 1).value();
+    } else {
+      if (Significand() == 0) return -0.0;
+      return Double(d64_ - 1).value();
+    }
+  }
+
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+
+    uint64_t d64 = AsUint64();
+    int biased_e =
+        static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+
+  uint64_t Significand() const {
+    uint64_t d64 = AsUint64();
+    uint64_t significand = d64 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+
+  // Returns true if the double is a denormal.
+  bool IsDenormal() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == 0;
+  }
+
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == kExponentMask;
+  }
+
+  bool IsNan() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) != 0);
+  }
+
+  bool IsInfinite() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) == 0);
+  }
+
+  int Sign() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kSignMask) == 0? 1: -1;
+  }
+
+  // Precondition: the value encoded by this Double must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Double must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+
+  double value() const { return uint64_to_double(d64_); }
+
+  // Returns the significand size for a given order of magnitude.
+  // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
+  // This function returns the number of significant binary digits v will have
+  // once it's encoded into a double. In almost all cases this is equal to
+  // kSignificandSize. The only exceptions are denormals. They start with
+  // leading zeroes and their effective significand-size is hence smaller.
+  static int SignificandSizeForOrderOfMagnitude(int order) {
+    if (order >= (kDenormalExponent + kSignificandSize)) {
+      return kSignificandSize;
+    }
+    if (order <= kDenormalExponent) return 0;
+    return order - kDenormalExponent;
+  }
+
+  static double Infinity() {
+    return Double(kInfinity).value();
+  }
+
+  static double NaN() {
+    return Double(kNaN).value();
+  }
+
+ private:
+  static const int kExponentBias = 0x3FF + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0x7FF - kExponentBias;
+  static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000);
+
+  const uint64_t d64_;
+
+  static uint64_t DiyFpToUint64(DiyFp diy_fp) {
+    uint64_t significand = diy_fp.f();
+    int exponent = diy_fp.e();
+    while (significand > kHiddenBit + kSignificandMask) {
+      significand >>= 1;
+      exponent++;
+    }
+    if (exponent >= kMaxExponent) {
+      return kInfinity;
+    }
+    if (exponent < kDenormalExponent) {
+      return 0;
+    }
+    while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) {
+      significand <<= 1;
+      exponent--;
+    }
+    uint64_t biased_exponent;
+    if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) {
+      biased_exponent = 0;
+    } else {
+      biased_exponent = static_cast<uint64_t>(exponent + kExponentBias);
+    }
+    return (significand & kSignificandMask) |
+        (biased_exponent << kPhysicalSignificandSize);
+  }
+
   DC_DISALLOW_COPY_AND_ASSIGN(Double);
-}; 
- 
-class Single { 
- public: 
-  static const uint32_t kSignMask = 0x80000000; 
-  static const uint32_t kExponentMask = 0x7F800000; 
-  static const uint32_t kSignificandMask = 0x007FFFFF; 
-  static const uint32_t kHiddenBit = 0x00800000; 
-  static const int kPhysicalSignificandSize = 23;  // Excludes the hidden bit. 
-  static const int kSignificandSize = 24; 
- 
-  Single() : d32_(0) {} 
-  explicit Single(float f) : d32_(float_to_uint32(f)) {} 
-  explicit Single(uint32_t d32) : d32_(d32) {} 
- 
-  // The value encoded by this Single must be greater or equal to +0.0. 
-  // It must not be special (infinity, or NaN). 
-  DiyFp AsDiyFp() const { 
-    ASSERT(Sign() > 0); 
-    ASSERT(!IsSpecial()); 
-    return DiyFp(Significand(), Exponent()); 
-  } 
- 
-  // Returns the single's bit as uint64. 
-  uint32_t AsUint32() const { 
-    return d32_; 
-  } 
- 
-  int Exponent() const { 
-    if (IsDenormal()) return kDenormalExponent; 
- 
-    uint32_t d32 = AsUint32(); 
-    int biased_e = 
-        static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize); 
-    return biased_e - kExponentBias; 
-  } 
- 
-  uint32_t Significand() const { 
-    uint32_t d32 = AsUint32(); 
-    uint32_t significand = d32 & kSignificandMask; 
-    if (!IsDenormal()) { 
-      return significand + kHiddenBit; 
-    } else { 
-      return significand; 
-    } 
-  } 
- 
-  // Returns true if the single is a denormal. 
-  bool IsDenormal() const { 
-    uint32_t d32 = AsUint32(); 
-    return (d32 & kExponentMask) == 0; 
-  } 
- 
-  // We consider denormals not to be special. 
-  // Hence only Infinity and NaN are special. 
-  bool IsSpecial() const { 
-    uint32_t d32 = AsUint32(); 
-    return (d32 & kExponentMask) == kExponentMask; 
-  } 
- 
-  bool IsNan() const { 
-    uint32_t d32 = AsUint32(); 
-    return ((d32 & kExponentMask) == kExponentMask) && 
-        ((d32 & kSignificandMask) != 0); 
-  } 
- 
-  bool IsInfinite() const { 
-    uint32_t d32 = AsUint32(); 
-    return ((d32 & kExponentMask) == kExponentMask) && 
-        ((d32 & kSignificandMask) == 0); 
-  } 
- 
-  int Sign() const { 
-    uint32_t d32 = AsUint32(); 
-    return (d32 & kSignMask) == 0? 1: -1; 
-  } 
- 
-  // Computes the two boundaries of this. 
-  // The bigger boundary (m_plus) is normalized. The lower boundary has the same 
-  // exponent as m_plus. 
-  // Precondition: the value encoded by this Single must be greater than 0. 
-  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { 
-    ASSERT(value() > 0.0); 
-    DiyFp v = this->AsDiyFp(); 
-    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); 
-    DiyFp m_minus; 
-    if (LowerBoundaryIsCloser()) { 
-      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); 
-    } else { 
-      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); 
-    } 
-    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); 
-    m_minus.set_e(m_plus.e()); 
-    *out_m_plus = m_plus; 
-    *out_m_minus = m_minus; 
-  } 
- 
-  // Precondition: the value encoded by this Single must be greater or equal 
-  // than +0.0. 
-  DiyFp UpperBoundary() const { 
-    ASSERT(Sign() > 0); 
-    return DiyFp(Significand() * 2 + 1, Exponent() - 1); 
-  } 
- 
-  bool LowerBoundaryIsCloser() const { 
-    // The boundary is closer if the significand is of the form f == 2^p-1 then 
-    // the lower boundary is closer. 
-    // Think of v = 1000e10 and v- = 9999e9. 
-    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but 
-    // at a distance of 1e8. 
-    // The only exception is for the smallest normal: the largest denormal is 
-    // at the same distance as its successor. 
-    // Note: denormals have the same exponent as the smallest normals. 
-    bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); 
-    return physical_significand_is_zero && (Exponent() != kDenormalExponent); 
-  } 
- 
-  float value() const { return uint32_to_float(d32_); } 
- 
-  static float Infinity() { 
-    return Single(kInfinity).value(); 
-  } 
- 
-  static float NaN() { 
-    return Single(kNaN).value(); 
-  } 
- 
- private: 
-  static const int kExponentBias = 0x7F + kPhysicalSignificandSize; 
-  static const int kDenormalExponent = -kExponentBias + 1; 
-  static const int kMaxExponent = 0xFF - kExponentBias; 
-  static const uint32_t kInfinity = 0x7F800000; 
-  static const uint32_t kNaN = 0x7FC00000; 
- 
-  const uint32_t d32_; 
- 
+};
+
+class Single {
+ public:
+  static const uint32_t kSignMask = 0x80000000;
+  static const uint32_t kExponentMask = 0x7F800000;
+  static const uint32_t kSignificandMask = 0x007FFFFF;
+  static const uint32_t kHiddenBit = 0x00800000;
+  static const int kPhysicalSignificandSize = 23;  // Excludes the hidden bit.
+  static const int kSignificandSize = 24;
+
+  Single() : d32_(0) {}
+  explicit Single(float f) : d32_(float_to_uint32(f)) {}
+  explicit Single(uint32_t d32) : d32_(d32) {}
+
+  // The value encoded by this Single must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+
+  // Returns the single's bit as uint64.
+  uint32_t AsUint32() const {
+    return d32_;
+  }
+
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+
+    uint32_t d32 = AsUint32();
+    int biased_e =
+        static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+
+  uint32_t Significand() const {
+    uint32_t d32 = AsUint32();
+    uint32_t significand = d32 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+
+  // Returns true if the single is a denormal.
+  bool IsDenormal() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == 0;
+  }
+
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == kExponentMask;
+  }
+
+  bool IsNan() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) != 0);
+  }
+
+  bool IsInfinite() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) == 0);
+  }
+
+  int Sign() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kSignMask) == 0? 1: -1;
+  }
+
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Single must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+
+  // Precondition: the value encoded by this Single must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+
+  float value() const { return uint32_to_float(d32_); }
+
+  static float Infinity() {
+    return Single(kInfinity).value();
+  }
+
+  static float NaN() {
+    return Single(kNaN).value();
+  }
+
+ private:
+  static const int kExponentBias = 0x7F + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0xFF - kExponentBias;
+  static const uint32_t kInfinity = 0x7F800000;
+  static const uint32_t kNaN = 0x7FC00000;
+
+  const uint32_t d32_;
+
   DC_DISALLOW_COPY_AND_ASSIGN(Single);
-}; 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_DOUBLE_H_ 
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DOUBLE_H_
diff --git a/contrib/libs/double-conversion/strtod.cc b/contrib/libs/double-conversion/strtod.cc
index 8dd07c19ab..a75cf5d9f1 100644
--- a/contrib/libs/double-conversion/strtod.cc
+++ b/contrib/libs/double-conversion/strtod.cc
@@ -1,477 +1,477 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 #include <climits>
 #include <cstdarg>
- 
-#include "bignum.h" 
-#include "cached-powers.h" 
-#include "ieee.h" 
+
+#include "bignum.h"
+#include "cached-powers.h"
+#include "ieee.h"
 #include "strtod.h"
- 
-namespace double_conversion { 
- 
-// 2^53 = 9007199254740992. 
-// Any integer with at most 15 decimal digits will hence fit into a double 
-// (which has a 53bit significand) without loss of precision. 
-static const int kMaxExactDoubleIntegerDecimalDigits = 15; 
-// 2^64 = 18446744073709551616 > 10^19 
-static const int kMaxUint64DecimalDigits = 19; 
- 
-// Max double: 1.7976931348623157 x 10^308 
-// Min non-zero double: 4.9406564584124654 x 10^-324 
-// Any x >= 10^309 is interpreted as +infinity. 
-// Any x <= 10^-324 is interpreted as 0. 
-// Note that 2.5e-324 (despite being smaller than the min double) will be read 
-// as non-zero (equal to the min non-zero double). 
-static const int kMaxDecimalPower = 309; 
-static const int kMinDecimalPower = -324; 
- 
-// 2^64 = 18446744073709551616 
-static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF); 
- 
- 
-static const double exact_powers_of_ten[] = { 
-  1.0,  // 10^0 
-  10.0, 
-  100.0, 
-  1000.0, 
-  10000.0, 
-  100000.0, 
-  1000000.0, 
-  10000000.0, 
-  100000000.0, 
-  1000000000.0, 
-  10000000000.0,  // 10^10 
-  100000000000.0, 
-  1000000000000.0, 
-  10000000000000.0, 
-  100000000000000.0, 
-  1000000000000000.0, 
-  10000000000000000.0, 
-  100000000000000000.0, 
-  1000000000000000000.0, 
-  10000000000000000000.0, 
-  100000000000000000000.0,  // 10^20 
-  1000000000000000000000.0, 
-  // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22 
-  10000000000000000000000.0 
-}; 
-static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten); 
- 
-// Maximum number of significant digits in the decimal representation. 
-// In fact the value is 772 (see conversions.cc), but to give us some margin 
-// we round up to 780. 
-static const int kMaxSignificantDecimalDigits = 780; 
- 
-static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) { 
-  for (int i = 0; i < buffer.length(); i++) { 
-    if (buffer[i] != '0') { 
-      return buffer.SubVector(i, buffer.length()); 
-    } 
-  } 
-  return Vector<const char>(buffer.start(), 0); 
-} 
- 
- 
-static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) { 
-  for (int i = buffer.length() - 1; i >= 0; --i) { 
-    if (buffer[i] != '0') { 
-      return buffer.SubVector(0, i + 1); 
-    } 
-  } 
-  return Vector<const char>(buffer.start(), 0); 
-} 
- 
- 
-static void CutToMaxSignificantDigits(Vector<const char> buffer, 
-                                       int exponent, 
-                                       char* significant_buffer, 
-                                       int* significant_exponent) { 
-  for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) { 
-    significant_buffer[i] = buffer[i]; 
-  } 
-  // The input buffer has been trimmed. Therefore the last digit must be 
-  // different from '0'. 
-  ASSERT(buffer[buffer.length() - 1] != '0'); 
-  // Set the last digit to be non-zero. This is sufficient to guarantee 
-  // correct rounding. 
-  significant_buffer[kMaxSignificantDecimalDigits - 1] = '1'; 
-  *significant_exponent = 
-      exponent + (buffer.length() - kMaxSignificantDecimalDigits); 
-} 
- 
- 
-// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits. 
-// If possible the input-buffer is reused, but if the buffer needs to be 
-// modified (due to cutting), then the input needs to be copied into the 
-// buffer_copy_space. 
-static void TrimAndCut(Vector<const char> buffer, int exponent, 
-                       char* buffer_copy_space, int space_size, 
-                       Vector<const char>* trimmed, int* updated_exponent) { 
-  Vector<const char> left_trimmed = TrimLeadingZeros(buffer); 
-  Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed); 
-  exponent += left_trimmed.length() - right_trimmed.length(); 
-  if (right_trimmed.length() > kMaxSignificantDecimalDigits) { 
-    (void) space_size;  // Mark variable as used. 
-    ASSERT(space_size >= kMaxSignificantDecimalDigits); 
-    CutToMaxSignificantDigits(right_trimmed, exponent, 
-                              buffer_copy_space, updated_exponent); 
-    *trimmed = Vector<const char>(buffer_copy_space, 
-                                 kMaxSignificantDecimalDigits); 
-  } else { 
-    *trimmed = right_trimmed; 
-    *updated_exponent = exponent; 
-  } 
-} 
- 
- 
-// Reads digits from the buffer and converts them to a uint64. 
-// Reads in as many digits as fit into a uint64. 
-// When the string starts with "1844674407370955161" no further digit is read. 
-// Since 2^64 = 18446744073709551616 it would still be possible read another 
-// digit if it was less or equal than 6, but this would complicate the code. 
-static uint64_t ReadUint64(Vector<const char> buffer, 
-                           int* number_of_read_digits) { 
-  uint64_t result = 0; 
-  int i = 0; 
-  while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) { 
-    int digit = buffer[i++] - '0'; 
-    ASSERT(0 <= digit && digit <= 9); 
-    result = 10 * result + digit; 
-  } 
-  *number_of_read_digits = i; 
-  return result; 
-} 
- 
- 
-// Reads a DiyFp from the buffer. 
-// The returned DiyFp is not necessarily normalized. 
-// If remaining_decimals is zero then the returned DiyFp is accurate. 
-// Otherwise it has been rounded and has error of at most 1/2 ulp. 
-static void ReadDiyFp(Vector<const char> buffer, 
-                      DiyFp* result, 
-                      int* remaining_decimals) { 
-  int read_digits; 
-  uint64_t significand = ReadUint64(buffer, &read_digits); 
-  if (buffer.length() == read_digits) { 
-    *result = DiyFp(significand, 0); 
-    *remaining_decimals = 0; 
-  } else { 
-    // Round the significand. 
-    if (buffer[read_digits] >= '5') { 
-      significand++; 
-    } 
-    // Compute the binary exponent. 
-    int exponent = 0; 
-    *result = DiyFp(significand, exponent); 
-    *remaining_decimals = buffer.length() - read_digits; 
-  } 
-} 
- 
- 
-static bool DoubleStrtod(Vector<const char> trimmed, 
-                         int exponent, 
-                         double* result) { 
-#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) 
-  // On x86 the floating-point stack can be 64 or 80 bits wide. If it is 
-  // 80 bits wide (as is the case on Linux) then double-rounding occurs and the 
-  // result is not accurate. 
-  // We know that Windows32 uses 64 bits and is therefore accurate. 
-  // Note that the ARM simulator is compiled for 32bits. It therefore exhibits 
-  // the same problem. 
-  return false; 
+
+namespace double_conversion {
+
+// 2^53 = 9007199254740992.
+// Any integer with at most 15 decimal digits will hence fit into a double
+// (which has a 53bit significand) without loss of precision.
+static const int kMaxExactDoubleIntegerDecimalDigits = 15;
+// 2^64 = 18446744073709551616 > 10^19
+static const int kMaxUint64DecimalDigits = 19;
+
+// Max double: 1.7976931348623157 x 10^308
+// Min non-zero double: 4.9406564584124654 x 10^-324
+// Any x >= 10^309 is interpreted as +infinity.
+// Any x <= 10^-324 is interpreted as 0.
+// Note that 2.5e-324 (despite being smaller than the min double) will be read
+// as non-zero (equal to the min non-zero double).
+static const int kMaxDecimalPower = 309;
+static const int kMinDecimalPower = -324;
+
+// 2^64 = 18446744073709551616
+static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
+
+
+static const double exact_powers_of_ten[] = {
+  1.0,  // 10^0
+  10.0,
+  100.0,
+  1000.0,
+  10000.0,
+  100000.0,
+  1000000.0,
+  10000000.0,
+  100000000.0,
+  1000000000.0,
+  10000000000.0,  // 10^10
+  100000000000.0,
+  1000000000000.0,
+  10000000000000.0,
+  100000000000000.0,
+  1000000000000000.0,
+  10000000000000000.0,
+  100000000000000000.0,
+  1000000000000000000.0,
+  10000000000000000000.0,
+  100000000000000000000.0,  // 10^20
+  1000000000000000000000.0,
+  // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
+  10000000000000000000000.0
+};
+static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
+
+// Maximum number of significant digits in the decimal representation.
+// In fact the value is 772 (see conversions.cc), but to give us some margin
+// we round up to 780.
+static const int kMaxSignificantDecimalDigits = 780;
+
+static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
+  for (int i = 0; i < buffer.length(); i++) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(i, buffer.length());
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+
+
+static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
+  for (int i = buffer.length() - 1; i >= 0; --i) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(0, i + 1);
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+
+
+static void CutToMaxSignificantDigits(Vector<const char> buffer,
+                                       int exponent,
+                                       char* significant_buffer,
+                                       int* significant_exponent) {
+  for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
+    significant_buffer[i] = buffer[i];
+  }
+  // The input buffer has been trimmed. Therefore the last digit must be
+  // different from '0'.
+  ASSERT(buffer[buffer.length() - 1] != '0');
+  // Set the last digit to be non-zero. This is sufficient to guarantee
+  // correct rounding.
+  significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
+  *significant_exponent =
+      exponent + (buffer.length() - kMaxSignificantDecimalDigits);
+}
+
+
+// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
+// If possible the input-buffer is reused, but if the buffer needs to be
+// modified (due to cutting), then the input needs to be copied into the
+// buffer_copy_space.
+static void TrimAndCut(Vector<const char> buffer, int exponent,
+                       char* buffer_copy_space, int space_size,
+                       Vector<const char>* trimmed, int* updated_exponent) {
+  Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
+  Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
+  exponent += left_trimmed.length() - right_trimmed.length();
+  if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
+    (void) space_size;  // Mark variable as used.
+    ASSERT(space_size >= kMaxSignificantDecimalDigits);
+    CutToMaxSignificantDigits(right_trimmed, exponent,
+                              buffer_copy_space, updated_exponent);
+    *trimmed = Vector<const char>(buffer_copy_space,
+                                 kMaxSignificantDecimalDigits);
+  } else {
+    *trimmed = right_trimmed;
+    *updated_exponent = exponent;
+  }
+}
+
+
+// Reads digits from the buffer and converts them to a uint64.
+// Reads in as many digits as fit into a uint64.
+// When the string starts with "1844674407370955161" no further digit is read.
+// Since 2^64 = 18446744073709551616 it would still be possible read another
+// digit if it was less or equal than 6, but this would complicate the code.
+static uint64_t ReadUint64(Vector<const char> buffer,
+                           int* number_of_read_digits) {
+  uint64_t result = 0;
+  int i = 0;
+  while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
+    int digit = buffer[i++] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = 10 * result + digit;
+  }
+  *number_of_read_digits = i;
+  return result;
+}
+
+
+// Reads a DiyFp from the buffer.
+// The returned DiyFp is not necessarily normalized.
+// If remaining_decimals is zero then the returned DiyFp is accurate.
+// Otherwise it has been rounded and has error of at most 1/2 ulp.
+static void ReadDiyFp(Vector<const char> buffer,
+                      DiyFp* result,
+                      int* remaining_decimals) {
+  int read_digits;
+  uint64_t significand = ReadUint64(buffer, &read_digits);
+  if (buffer.length() == read_digits) {
+    *result = DiyFp(significand, 0);
+    *remaining_decimals = 0;
+  } else {
+    // Round the significand.
+    if (buffer[read_digits] >= '5') {
+      significand++;
+    }
+    // Compute the binary exponent.
+    int exponent = 0;
+    *result = DiyFp(significand, exponent);
+    *remaining_decimals = buffer.length() - read_digits;
+  }
+}
+
+
+static bool DoubleStrtod(Vector<const char> trimmed,
+                         int exponent,
+                         double* result) {
+#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
+  // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
+  // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
+  // result is not accurate.
+  // We know that Windows32 uses 64 bits and is therefore accurate.
+  // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
+  // the same problem.
+  return false;
 #else
-  if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { 
-    int read_digits; 
-    // The trimmed input fits into a double. 
-    // If the 10^exponent (resp. 10^-exponent) fits into a double too then we 
-    // can compute the result-double simply by multiplying (resp. dividing) the 
-    // two numbers. 
-    // This is possible because IEEE guarantees that floating-point operations 
-    // return the best possible approximation. 
-    if (exponent < 0 && -exponent < kExactPowersOfTenSize) { 
-      // 10^-exponent fits into a double. 
-      *result = static_cast<double>(ReadUint64(trimmed, &read_digits)); 
-      ASSERT(read_digits == trimmed.length()); 
-      *result /= exact_powers_of_ten[-exponent]; 
-      return true; 
-    } 
-    if (0 <= exponent && exponent < kExactPowersOfTenSize) { 
-      // 10^exponent fits into a double. 
-      *result = static_cast<double>(ReadUint64(trimmed, &read_digits)); 
-      ASSERT(read_digits == trimmed.length()); 
-      *result *= exact_powers_of_ten[exponent]; 
-      return true; 
-    } 
-    int remaining_digits = 
-        kMaxExactDoubleIntegerDecimalDigits - trimmed.length(); 
-    if ((0 <= exponent) && 
-        (exponent - remaining_digits < kExactPowersOfTenSize)) { 
-      // The trimmed string was short and we can multiply it with 
-      // 10^remaining_digits. As a result the remaining exponent now fits 
-      // into a double too. 
-      *result = static_cast<double>(ReadUint64(trimmed, &read_digits)); 
-      ASSERT(read_digits == trimmed.length()); 
-      *result *= exact_powers_of_ten[remaining_digits]; 
-      *result *= exact_powers_of_ten[exponent - remaining_digits]; 
-      return true; 
-    } 
-  } 
-  return false; 
+  if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
+    int read_digits;
+    // The trimmed input fits into a double.
+    // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
+    // can compute the result-double simply by multiplying (resp. dividing) the
+    // two numbers.
+    // This is possible because IEEE guarantees that floating-point operations
+    // return the best possible approximation.
+    if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
+      // 10^-exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result /= exact_powers_of_ten[-exponent];
+      return true;
+    }
+    if (0 <= exponent && exponent < kExactPowersOfTenSize) {
+      // 10^exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[exponent];
+      return true;
+    }
+    int remaining_digits =
+        kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
+    if ((0 <= exponent) &&
+        (exponent - remaining_digits < kExactPowersOfTenSize)) {
+      // The trimmed string was short and we can multiply it with
+      // 10^remaining_digits. As a result the remaining exponent now fits
+      // into a double too.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[remaining_digits];
+      *result *= exact_powers_of_ten[exponent - remaining_digits];
+      return true;
+    }
+  }
+  return false;
 #endif
-} 
- 
- 
-// Returns 10^exponent as an exact DiyFp. 
-// The given exponent must be in the range [1; kDecimalExponentDistance[. 
-static DiyFp AdjustmentPowerOfTen(int exponent) { 
-  ASSERT(0 < exponent); 
-  ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance); 
-  // Simply hardcode the remaining powers for the given decimal exponent 
-  // distance. 
-  ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8); 
-  switch (exponent) { 
-    case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60); 
-    case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57); 
-    case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54); 
-    case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50); 
-    case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47); 
-    case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44); 
-    case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40); 
-    default: 
-      UNREACHABLE(); 
-  } 
-} 
- 
- 
-// If the function returns true then the result is the correct double. 
-// Otherwise it is either the correct double or the double that is just below 
-// the correct double. 
-static bool DiyFpStrtod(Vector<const char> buffer, 
-                        int exponent, 
-                        double* result) { 
-  DiyFp input; 
-  int remaining_decimals; 
-  ReadDiyFp(buffer, &input, &remaining_decimals); 
-  // Since we may have dropped some digits the input is not accurate. 
-  // If remaining_decimals is different than 0 than the error is at most 
-  // .5 ulp (unit in the last place). 
-  // We don't want to deal with fractions and therefore keep a common 
-  // denominator. 
-  const int kDenominatorLog = 3; 
-  const int kDenominator = 1 << kDenominatorLog; 
-  // Move the remaining decimals into the exponent. 
-  exponent += remaining_decimals; 
-  uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2); 
- 
-  int old_e = input.e(); 
-  input.Normalize(); 
-  error <<= old_e - input.e(); 
- 
-  ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent); 
-  if (exponent < PowersOfTenCache::kMinDecimalExponent) { 
-    *result = 0.0; 
-    return true; 
-  } 
-  DiyFp cached_power; 
-  int cached_decimal_exponent; 
-  PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent, 
-                                                     &cached_power, 
-                                                     &cached_decimal_exponent); 
- 
-  if (cached_decimal_exponent != exponent) { 
-    int adjustment_exponent = exponent - cached_decimal_exponent; 
-    DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); 
-    input.Multiply(adjustment_power); 
-    if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { 
-      // The product of input with the adjustment power fits into a 64 bit 
-      // integer. 
-      ASSERT(DiyFp::kSignificandSize == 64); 
-    } else { 
-      // The adjustment power is exact. There is hence only an error of 0.5. 
-      error += kDenominator / 2; 
-    } 
-  } 
- 
-  input.Multiply(cached_power); 
-  // The error introduced by a multiplication of a*b equals 
-  //   error_a + error_b + error_a*error_b/2^64 + 0.5 
-  // Substituting a with 'input' and b with 'cached_power' we have 
-  //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp), 
-  //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 
-  int error_b = kDenominator / 2; 
-  int error_ab = (error == 0 ? 0 : 1);  // We round up to 1. 
-  int fixed_error = kDenominator / 2; 
-  error += error_b + error_ab + fixed_error; 
- 
-  old_e = input.e(); 
-  input.Normalize(); 
-  error <<= old_e - input.e(); 
- 
-  // See if the double's significand changes if we add/subtract the error. 
-  int order_of_magnitude = DiyFp::kSignificandSize + input.e(); 
-  int effective_significand_size = 
-      Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude); 
-  int precision_digits_count = 
-      DiyFp::kSignificandSize - effective_significand_size; 
-  if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) { 
-    // This can only happen for very small denormals. In this case the 
-    // half-way multiplied by the denominator exceeds the range of an uint64. 
-    // Simply shift everything to the right. 
-    int shift_amount = (precision_digits_count + kDenominatorLog) - 
-        DiyFp::kSignificandSize + 1; 
-    input.set_f(input.f() >> shift_amount); 
-    input.set_e(input.e() + shift_amount); 
-    // We add 1 for the lost precision of error, and kDenominator for 
-    // the lost precision of input.f(). 
-    error = (error >> shift_amount) + 1 + kDenominator; 
-    precision_digits_count -= shift_amount; 
-  } 
-  // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. 
-  ASSERT(DiyFp::kSignificandSize == 64); 
-  ASSERT(precision_digits_count < 64); 
-  uint64_t one64 = 1; 
-  uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; 
-  uint64_t precision_bits = input.f() & precision_bits_mask; 
-  uint64_t half_way = one64 << (precision_digits_count - 1); 
-  precision_bits *= kDenominator; 
-  half_way *= kDenominator; 
-  DiyFp rounded_input(input.f() >> precision_digits_count, 
-                      input.e() + precision_digits_count); 
-  if (precision_bits >= half_way + error) { 
-    rounded_input.set_f(rounded_input.f() + 1); 
-  } 
-  // If the last_bits are too close to the half-way case than we are too 
-  // inaccurate and round down. In this case we return false so that we can 
-  // fall back to a more precise algorithm. 
- 
-  *result = Double(rounded_input).value(); 
-  if (half_way - error < precision_bits && precision_bits < half_way + error) { 
-    // Too imprecise. The caller will have to fall back to a slower version. 
-    // However the returned number is guaranteed to be either the correct 
-    // double, or the next-lower double. 
-    return false; 
-  } else { 
-    return true; 
-  } 
-} 
- 
- 
-// Returns 
-//   - -1 if buffer*10^exponent < diy_fp. 
-//   -  0 if buffer*10^exponent == diy_fp. 
-//   - +1 if buffer*10^exponent > diy_fp. 
-// Preconditions: 
-//   buffer.length() + exponent <= kMaxDecimalPower + 1 
-//   buffer.length() + exponent > kMinDecimalPower 
-//   buffer.length() <= kMaxDecimalSignificantDigits 
-static int CompareBufferWithDiyFp(Vector<const char> buffer, 
-                                  int exponent, 
-                                  DiyFp diy_fp) { 
-  ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1); 
-  ASSERT(buffer.length() + exponent > kMinDecimalPower); 
-  ASSERT(buffer.length() <= kMaxSignificantDecimalDigits); 
-  // Make sure that the Bignum will be able to hold all our numbers. 
-  // Our Bignum implementation has a separate field for exponents. Shifts will 
-  // consume at most one bigit (< 64 bits). 
-  // ln(10) == 3.3219... 
-  ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits); 
-  Bignum buffer_bignum; 
-  Bignum diy_fp_bignum; 
-  buffer_bignum.AssignDecimalString(buffer); 
-  diy_fp_bignum.AssignUInt64(diy_fp.f()); 
-  if (exponent >= 0) { 
-    buffer_bignum.MultiplyByPowerOfTen(exponent); 
-  } else { 
-    diy_fp_bignum.MultiplyByPowerOfTen(-exponent); 
-  } 
-  if (diy_fp.e() > 0) { 
-    diy_fp_bignum.ShiftLeft(diy_fp.e()); 
-  } else { 
-    buffer_bignum.ShiftLeft(-diy_fp.e()); 
-  } 
-  return Bignum::Compare(buffer_bignum, diy_fp_bignum); 
-} 
- 
- 
-// Returns true if the guess is the correct double. 
-// Returns false, when guess is either correct or the next-lower double. 
-static bool ComputeGuess(Vector<const char> trimmed, int exponent, 
-                         double* guess) { 
-  if (trimmed.length() == 0) { 
-    *guess = 0.0; 
-    return true; 
-  } 
-  if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) { 
-    *guess = Double::Infinity(); 
-    return true; 
-  } 
-  if (exponent + trimmed.length() <= kMinDecimalPower) { 
-    *guess = 0.0; 
-    return true; 
-  } 
- 
-  if (DoubleStrtod(trimmed, exponent, guess) || 
-      DiyFpStrtod(trimmed, exponent, guess)) { 
-    return true; 
-  } 
-  if (*guess == Double::Infinity()) { 
-    return true; 
-  } 
-  return false; 
-} 
- 
-double Strtod(Vector<const char> buffer, int exponent) { 
-  char copy_buffer[kMaxSignificantDecimalDigits]; 
-  Vector<const char> trimmed; 
-  int updated_exponent; 
-  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, 
-             &trimmed, &updated_exponent); 
-  exponent = updated_exponent; 
- 
-  double guess; 
-  bool is_correct = ComputeGuess(trimmed, exponent, &guess); 
-  if (is_correct) return guess; 
- 
-  DiyFp upper_boundary = Double(guess).UpperBoundary(); 
-  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); 
-  if (comparison < 0) { 
-    return guess; 
-  } else if (comparison > 0) { 
-    return Double(guess).NextDouble(); 
-  } else if ((Double(guess).Significand() & 1) == 0) { 
-    // Round towards even. 
-    return guess; 
-  } else { 
-    return Double(guess).NextDouble(); 
-  } 
-} 
- 
+}
+
+
+// Returns 10^exponent as an exact DiyFp.
+// The given exponent must be in the range [1; kDecimalExponentDistance[.
+static DiyFp AdjustmentPowerOfTen(int exponent) {
+  ASSERT(0 < exponent);
+  ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
+  // Simply hardcode the remaining powers for the given decimal exponent
+  // distance.
+  ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
+  switch (exponent) {
+    case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
+    case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
+    case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
+    case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
+    case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
+    case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
+    case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
+    default:
+      UNREACHABLE();
+  }
+}
+
+
+// If the function returns true then the result is the correct double.
+// Otherwise it is either the correct double or the double that is just below
+// the correct double.
+static bool DiyFpStrtod(Vector<const char> buffer,
+                        int exponent,
+                        double* result) {
+  DiyFp input;
+  int remaining_decimals;
+  ReadDiyFp(buffer, &input, &remaining_decimals);
+  // Since we may have dropped some digits the input is not accurate.
+  // If remaining_decimals is different than 0 than the error is at most
+  // .5 ulp (unit in the last place).
+  // We don't want to deal with fractions and therefore keep a common
+  // denominator.
+  const int kDenominatorLog = 3;
+  const int kDenominator = 1 << kDenominatorLog;
+  // Move the remaining decimals into the exponent.
+  exponent += remaining_decimals;
+  uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
+
+  int old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+
+  ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
+  if (exponent < PowersOfTenCache::kMinDecimalExponent) {
+    *result = 0.0;
+    return true;
+  }
+  DiyFp cached_power;
+  int cached_decimal_exponent;
+  PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
+                                                     &cached_power,
+                                                     &cached_decimal_exponent);
+
+  if (cached_decimal_exponent != exponent) {
+    int adjustment_exponent = exponent - cached_decimal_exponent;
+    DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
+    input.Multiply(adjustment_power);
+    if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
+      // The product of input with the adjustment power fits into a 64 bit
+      // integer.
+      ASSERT(DiyFp::kSignificandSize == 64);
+    } else {
+      // The adjustment power is exact. There is hence only an error of 0.5.
+      error += kDenominator / 2;
+    }
+  }
+
+  input.Multiply(cached_power);
+  // The error introduced by a multiplication of a*b equals
+  //   error_a + error_b + error_a*error_b/2^64 + 0.5
+  // Substituting a with 'input' and b with 'cached_power' we have
+  //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
+  //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
+  int error_b = kDenominator / 2;
+  int error_ab = (error == 0 ? 0 : 1);  // We round up to 1.
+  int fixed_error = kDenominator / 2;
+  error += error_b + error_ab + fixed_error;
+
+  old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+
+  // See if the double's significand changes if we add/subtract the error.
+  int order_of_magnitude = DiyFp::kSignificandSize + input.e();
+  int effective_significand_size =
+      Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
+  int precision_digits_count =
+      DiyFp::kSignificandSize - effective_significand_size;
+  if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
+    // This can only happen for very small denormals. In this case the
+    // half-way multiplied by the denominator exceeds the range of an uint64.
+    // Simply shift everything to the right.
+    int shift_amount = (precision_digits_count + kDenominatorLog) -
+        DiyFp::kSignificandSize + 1;
+    input.set_f(input.f() >> shift_amount);
+    input.set_e(input.e() + shift_amount);
+    // We add 1 for the lost precision of error, and kDenominator for
+    // the lost precision of input.f().
+    error = (error >> shift_amount) + 1 + kDenominator;
+    precision_digits_count -= shift_amount;
+  }
+  // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
+  ASSERT(DiyFp::kSignificandSize == 64);
+  ASSERT(precision_digits_count < 64);
+  uint64_t one64 = 1;
+  uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
+  uint64_t precision_bits = input.f() & precision_bits_mask;
+  uint64_t half_way = one64 << (precision_digits_count - 1);
+  precision_bits *= kDenominator;
+  half_way *= kDenominator;
+  DiyFp rounded_input(input.f() >> precision_digits_count,
+                      input.e() + precision_digits_count);
+  if (precision_bits >= half_way + error) {
+    rounded_input.set_f(rounded_input.f() + 1);
+  }
+  // If the last_bits are too close to the half-way case than we are too
+  // inaccurate and round down. In this case we return false so that we can
+  // fall back to a more precise algorithm.
+
+  *result = Double(rounded_input).value();
+  if (half_way - error < precision_bits && precision_bits < half_way + error) {
+    // Too imprecise. The caller will have to fall back to a slower version.
+    // However the returned number is guaranteed to be either the correct
+    // double, or the next-lower double.
+    return false;
+  } else {
+    return true;
+  }
+}
+
+
+// Returns
+//   - -1 if buffer*10^exponent < diy_fp.
+//   -  0 if buffer*10^exponent == diy_fp.
+//   - +1 if buffer*10^exponent > diy_fp.
+// Preconditions:
+//   buffer.length() + exponent <= kMaxDecimalPower + 1
+//   buffer.length() + exponent > kMinDecimalPower
+//   buffer.length() <= kMaxDecimalSignificantDigits
+static int CompareBufferWithDiyFp(Vector<const char> buffer,
+                                  int exponent,
+                                  DiyFp diy_fp) {
+  ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
+  ASSERT(buffer.length() + exponent > kMinDecimalPower);
+  ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
+  // Make sure that the Bignum will be able to hold all our numbers.
+  // Our Bignum implementation has a separate field for exponents. Shifts will
+  // consume at most one bigit (< 64 bits).
+  // ln(10) == 3.3219...
+  ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
+  Bignum buffer_bignum;
+  Bignum diy_fp_bignum;
+  buffer_bignum.AssignDecimalString(buffer);
+  diy_fp_bignum.AssignUInt64(diy_fp.f());
+  if (exponent >= 0) {
+    buffer_bignum.MultiplyByPowerOfTen(exponent);
+  } else {
+    diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
+  }
+  if (diy_fp.e() > 0) {
+    diy_fp_bignum.ShiftLeft(diy_fp.e());
+  } else {
+    buffer_bignum.ShiftLeft(-diy_fp.e());
+  }
+  return Bignum::Compare(buffer_bignum, diy_fp_bignum);
+}
+
+
+// Returns true if the guess is the correct double.
+// Returns false, when guess is either correct or the next-lower double.
+static bool ComputeGuess(Vector<const char> trimmed, int exponent,
+                         double* guess) {
+  if (trimmed.length() == 0) {
+    *guess = 0.0;
+    return true;
+  }
+  if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
+    *guess = Double::Infinity();
+    return true;
+  }
+  if (exponent + trimmed.length() <= kMinDecimalPower) {
+    *guess = 0.0;
+    return true;
+  }
+
+  if (DoubleStrtod(trimmed, exponent, guess) ||
+      DiyFpStrtod(trimmed, exponent, guess)) {
+    return true;
+  }
+  if (*guess == Double::Infinity()) {
+    return true;
+  }
+  return false;
+}
+
+double Strtod(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+
+  double guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &guess);
+  if (is_correct) return guess;
+
+  DiyFp upper_boundary = Double(guess).UpperBoundary();
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return Double(guess).NextDouble();
+  } else if ((Double(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return Double(guess).NextDouble();
+  }
+}
+
 static float SanitizedDoubletof(double d) {
   ASSERT(d >= 0.0);
   // ASAN has a sanitize check that disallows casting doubles to floats if
@@ -496,85 +496,85 @@ static float SanitizedDoubletof(double d) {
   }
 }
 
-float Strtof(Vector<const char> buffer, int exponent) { 
-  char copy_buffer[kMaxSignificantDecimalDigits]; 
-  Vector<const char> trimmed; 
-  int updated_exponent; 
-  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, 
-             &trimmed, &updated_exponent); 
-  exponent = updated_exponent; 
- 
-  double double_guess; 
-  bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); 
- 
+float Strtof(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+
+  double double_guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
+
   float float_guess = SanitizedDoubletof(double_guess);
-  if (float_guess == double_guess) { 
-    // This shortcut triggers for integer values. 
-    return float_guess; 
-  } 
- 
-  // We must catch double-rounding. Say the double has been rounded up, and is 
-  // now a boundary of a float, and rounds up again. This is why we have to 
-  // look at previous too. 
-  // Example (in decimal numbers): 
-  //    input: 12349 
-  //    high-precision (4 digits): 1235 
-  //    low-precision (3 digits): 
-  //       when read from input: 123 
-  //       when rounded from high precision: 124. 
-  // To do this we simply look at the neigbors of the correct result and see 
-  // if they would round to the same float. If the guess is not correct we have 
-  // to look at four values (since two different doubles could be the correct 
-  // double). 
- 
-  double double_next = Double(double_guess).NextDouble(); 
-  double double_previous = Double(double_guess).PreviousDouble(); 
- 
+  if (float_guess == double_guess) {
+    // This shortcut triggers for integer values.
+    return float_guess;
+  }
+
+  // We must catch double-rounding. Say the double has been rounded up, and is
+  // now a boundary of a float, and rounds up again. This is why we have to
+  // look at previous too.
+  // Example (in decimal numbers):
+  //    input: 12349
+  //    high-precision (4 digits): 1235
+  //    low-precision (3 digits):
+  //       when read from input: 123
+  //       when rounded from high precision: 124.
+  // To do this we simply look at the neigbors of the correct result and see
+  // if they would round to the same float. If the guess is not correct we have
+  // to look at four values (since two different doubles could be the correct
+  // double).
+
+  double double_next = Double(double_guess).NextDouble();
+  double double_previous = Double(double_guess).PreviousDouble();
+
   float f1 = SanitizedDoubletof(double_previous);
-  float f2 = float_guess; 
+  float f2 = float_guess;
   float f3 = SanitizedDoubletof(double_next);
-  float f4; 
-  if (is_correct) { 
-    f4 = f3; 
-  } else { 
-    double double_next2 = Double(double_next).NextDouble(); 
+  float f4;
+  if (is_correct) {
+    f4 = f3;
+  } else {
+    double double_next2 = Double(double_next).NextDouble();
     f4 = SanitizedDoubletof(double_next2);
-  } 
-  (void) f2;  // Mark variable as used. 
-  ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4); 
- 
-  // If the guess doesn't lie near a single-precision boundary we can simply 
-  // return its float-value. 
-  if (f1 == f4) { 
-    return float_guess; 
-  } 
- 
-  ASSERT((f1 != f2 && f2 == f3 && f3 == f4) || 
-         (f1 == f2 && f2 != f3 && f3 == f4) || 
-         (f1 == f2 && f2 == f3 && f3 != f4)); 
- 
+  }
+  (void) f2;  // Mark variable as used.
+  ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
+
+  // If the guess doesn't lie near a single-precision boundary we can simply
+  // return its float-value.
+  if (f1 == f4) {
+    return float_guess;
+  }
+
+  ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
+         (f1 == f2 && f2 != f3 && f3 == f4) ||
+         (f1 == f2 && f2 == f3 && f3 != f4));
+
   // guess and next are the two possible candidates (in the same way that
-  // double_guess was the lower candidate for a double-precision guess). 
-  float guess = f1; 
-  float next = f4; 
-  DiyFp upper_boundary; 
-  if (guess == 0.0f) { 
-    float min_float = 1e-45f; 
-    upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp(); 
-  } else { 
-    upper_boundary = Single(guess).UpperBoundary(); 
-  } 
-  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); 
-  if (comparison < 0) { 
-    return guess; 
-  } else if (comparison > 0) { 
-    return next; 
-  } else if ((Single(guess).Significand() & 1) == 0) { 
-    // Round towards even. 
-    return guess; 
-  } else { 
-    return next; 
-  } 
-} 
- 
-}  // namespace double_conversion 
+  // double_guess was the lower candidate for a double-precision guess).
+  float guess = f1;
+  float next = f4;
+  DiyFp upper_boundary;
+  if (guess == 0.0f) {
+    float min_float = 1e-45f;
+    upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
+  } else {
+    upper_boundary = Single(guess).UpperBoundary();
+  }
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return next;
+  } else if ((Single(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return next;
+  }
+}
+
+}  // namespace double_conversion
diff --git a/contrib/libs/double-conversion/strtod.h b/contrib/libs/double-conversion/strtod.h
index 937e5a3c5b..ed0293b8f5 100644
--- a/contrib/libs/double-conversion/strtod.h
+++ b/contrib/libs/double-conversion/strtod.h
@@ -1,45 +1,45 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_STRTOD_H_ 
-#define DOUBLE_CONVERSION_STRTOD_H_ 
- 
-#include "utils.h" 
- 
-namespace double_conversion { 
- 
-// The buffer must only contain digits in the range [0-9]. It must not 
-// contain a dot or a sign. It must not start with '0', and must not be empty. 
-double Strtod(Vector<const char> buffer, int exponent); 
- 
-// The buffer must only contain digits in the range [0-9]. It must not 
-// contain a dot or a sign. It must not start with '0', and must not be empty. 
-float Strtof(Vector<const char> buffer, int exponent); 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_STRTOD_H_ 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_STRTOD_H_
+#define DOUBLE_CONVERSION_STRTOD_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+double Strtod(Vector<const char> buffer, int exponent);
+
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+float Strtof(Vector<const char> buffer, int exponent);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_STRTOD_H_
diff --git a/contrib/libs/double-conversion/utils.h b/contrib/libs/double-conversion/utils.h
index c99e28f0f4..41c5b02d2c 100644
--- a/contrib/libs/double-conversion/utils.h
+++ b/contrib/libs/double-conversion/utils.h
@@ -1,72 +1,72 @@
-// Copyright 2010 the V8 project authors. All rights reserved. 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-//       notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-//       copyright notice, this list of conditions and the following 
-//       disclaimer in the documentation and/or other materials provided 
-//       with the distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-//       contributors may be used to endorse or promote products derived 
-//       from this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#ifndef DOUBLE_CONVERSION_UTILS_H_ 
-#define DOUBLE_CONVERSION_UTILS_H_ 
- 
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_UTILS_H_
+#define DOUBLE_CONVERSION_UTILS_H_
+
 #include <cstdlib>
 #include <cstring>
- 
+
 #include <cassert>
-#ifndef ASSERT 
-#define ASSERT(condition)         \ 
-    assert(condition); 
-#endif 
-#ifndef UNIMPLEMENTED 
-#define UNIMPLEMENTED() (abort()) 
-#endif 
-#ifndef DOUBLE_CONVERSION_NO_RETURN 
-#ifdef _MSC_VER 
-#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) 
-#else 
-#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) 
-#endif 
-#endif 
-#ifndef UNREACHABLE 
-#ifdef _MSC_VER 
-void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); 
-inline void abort_noreturn() { abort(); } 
-#define UNREACHABLE()   (abort_noreturn()) 
-#else 
-#define UNREACHABLE()   (abort()) 
-#endif 
-#endif 
- 
- 
-// Double operations detection based on target architecture. 
-// Linux uses a 80bit wide floating point stack on x86. This induces double 
-// rounding, which in turn leads to wrong results. 
-// An easy way to test if the floating-point operations are correct is to 
-// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then 
-// the result is equal to 89255e-22. 
-// The best way to test this, is to create a division-function and to compare 
-// the output of the division with the expected result. (Inlining must be 
-// disabled.) 
-// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) 
+#ifndef ASSERT
+#define ASSERT(condition)         \
+    assert(condition);
+#endif
+#ifndef UNIMPLEMENTED
+#define UNIMPLEMENTED() (abort())
+#endif
+#ifndef DOUBLE_CONVERSION_NO_RETURN
+#ifdef _MSC_VER
+#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn)
+#else
+#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn))
+#endif
+#endif
+#ifndef UNREACHABLE
+#ifdef _MSC_VER
+void DOUBLE_CONVERSION_NO_RETURN abort_noreturn();
+inline void abort_noreturn() { abort(); }
+#define UNREACHABLE()   (abort_noreturn())
+#else
+#define UNREACHABLE()   (abort())
+#endif
+#endif
+
+
+// Double operations detection based on target architecture.
+// Linux uses a 80bit wide floating point stack on x86. This induces double
+// rounding, which in turn leads to wrong results.
+// An easy way to test if the floating-point operations are correct is to
+// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
+// the result is equal to 89255e-22.
+// The best way to test this, is to create a division-function and to compare
+// the output of the division with the expected result. (Inlining must be
+// disabled.)
+// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
 //
 // For example:
 /*
@@ -83,276 +83,276 @@ int main(int argc, char** argv) {
 // Run as follows ./main || echo "correct"
 //
 // If it prints "correct" then the architecture should be here, in the "correct" section.
-#if defined(_M_X64) || defined(__x86_64__) || \ 
+#if defined(_M_X64) || defined(__x86_64__) || \
     defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
-    defined(__hppa__) || defined(__ia64__) || \ 
-    defined(__mips__) || \ 
-    defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ 
-    defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ 
-    defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ 
-    defined(__SH4__) || defined(__alpha__) || \ 
-    defined(_MIPS_ARCH_MIPS32R2) || \ 
+    defined(__hppa__) || defined(__ia64__) || \
+    defined(__mips__) || \
+    defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+    defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+    defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+    defined(__SH4__) || defined(__alpha__) || \
+    defined(_MIPS_ARCH_MIPS32R2) || \
     defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \
     defined(__riscv) || \
     defined(__or1k__)
-#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
 #elif defined(__mc68000__) || \
     defined(__pnacl__) || defined(__native_client__)
-#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 
-#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) 
-#if defined(_WIN32) 
-// Windows uses a 64bit wide floating point stack. 
-#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 
-#else 
-#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 
-#endif  // _WIN32 
-#else 
-#error Target architecture was not detected as supported by Double-Conversion. 
-#endif 
- 
-#if defined(_WIN32) && !defined(__MINGW32__) 
- 
-typedef signed char int8_t; 
-typedef unsigned char uint8_t; 
-typedef short int16_t;  // NOLINT 
-typedef unsigned short uint16_t;  // NOLINT 
-typedef int int32_t; 
-typedef unsigned int uint32_t; 
-typedef __int64 int64_t; 
-typedef unsigned __int64 uint64_t; 
-// intptr_t and friends are defined in crtdefs.h through stdio.h. 
- 
-#else 
- 
-#include <stdint.h> 
- 
-#endif 
- 
-typedef uint16_t uc16; 
- 
-// The following macro works on both 32 and 64-bit platforms. 
-// Usage: instead of writing 0x1234567890123456 
-//      write UINT64_2PART_C(0x12345678,90123456); 
-#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u)) 
- 
- 
-// The expression ARRAY_SIZE(a) is a compile-time constant of type 
-// size_t which represents the number of elements of the given 
-// array. You should only use ARRAY_SIZE on statically allocated 
-// arrays. 
-#ifndef ARRAY_SIZE 
-#define ARRAY_SIZE(a)                                   \ 
-  ((sizeof(a) / sizeof(*(a))) /                         \ 
-  static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) 
-#endif 
- 
-// A macro to disallow the evil copy constructor and operator= functions 
-// This should be used in the private: declarations for a class 
+#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
+#if defined(_WIN32)
+// Windows uses a 64bit wide floating point stack.
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#else
+#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
+#endif  // _WIN32
+#else
+#error Target architecture was not detected as supported by Double-Conversion.
+#endif
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;  // NOLINT
+typedef unsigned short uint16_t;  // NOLINT
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+// intptr_t and friends are defined in crtdefs.h through stdio.h.
+
+#else
+
+#include <stdint.h>
+
+#endif
+
+typedef uint16_t uc16;
+
+// The following macro works on both 32 and 64-bit platforms.
+// Usage: instead of writing 0x1234567890123456
+//      write UINT64_2PART_C(0x12345678,90123456);
+#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
+
+
+// The expression ARRAY_SIZE(a) is a compile-time constant of type
+// size_t which represents the number of elements of the given
+// array. You should only use ARRAY_SIZE on statically allocated
+// arrays.
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a)                                   \
+  ((sizeof(a) / sizeof(*(a))) /                         \
+  static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+#endif
+
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
 #ifndef DC_DISALLOW_COPY_AND_ASSIGN
 #define DC_DISALLOW_COPY_AND_ASSIGN(TypeName)      \
-  TypeName(const TypeName&);                    \ 
-  void operator=(const TypeName&) 
-#endif 
- 
-// A macro to disallow all the implicit constructors, namely the 
-// default constructor, copy constructor and operator= functions. 
-// 
-// This should be used in the private: declarations for a class 
-// that wants to prevent anyone from instantiating it. This is 
-// especially useful for classes containing only static methods. 
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+#endif
+
+// A macro to disallow all the implicit constructors, namely the
+// default constructor, copy constructor and operator= functions.
+//
+// This should be used in the private: declarations for a class
+// that wants to prevent anyone from instantiating it. This is
+// especially useful for classes containing only static methods.
 #ifndef DC_DISALLOW_IMPLICIT_CONSTRUCTORS
 #define DC_DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
-  TypeName();                                    \ 
+  TypeName();                                    \
   DC_DISALLOW_COPY_AND_ASSIGN(TypeName)
-#endif 
- 
-namespace double_conversion { 
- 
-static const int kCharSize = sizeof(char); 
- 
-// Returns the maximum of the two parameters. 
-template <typename T> 
-static T Max(T a, T b) { 
-  return a < b ? b : a; 
-} 
- 
- 
-// Returns the minimum of the two parameters. 
-template <typename T> 
-static T Min(T a, T b) { 
-  return a < b ? a : b; 
-} 
- 
- 
-inline int StrLength(const char* string) { 
-  size_t length = strlen(string); 
-  ASSERT(length == static_cast<size_t>(static_cast<int>(length))); 
-  return static_cast<int>(length); 
-} 
- 
-// This is a simplified version of V8's Vector class. 
-template <typename T> 
-class Vector { 
- public: 
-  Vector() : start_(NULL), length_(0) {} 
-  Vector(T* data, int len) : start_(data), length_(len) { 
-    ASSERT(len == 0 || (len > 0 && data != NULL)); 
-  } 
- 
-  // Returns a vector using the same backing storage as this one, 
-  // spanning from and including 'from', to but not including 'to'. 
-  Vector<T> SubVector(int from, int to) { 
-    ASSERT(to <= length_); 
-    ASSERT(from < to); 
-    ASSERT(0 <= from); 
-    return Vector<T>(start() + from, to - from); 
-  } 
- 
-  // Returns the length of the vector. 
-  int length() const { return length_; } 
- 
-  // Returns whether or not the vector is empty. 
-  bool is_empty() const { return length_ == 0; } 
- 
-  // Returns the pointer to the start of the data in the vector. 
-  T* start() const { return start_; } 
- 
-  // Access individual vector elements - checks bounds in debug mode. 
-  T& operator[](int index) const { 
-    ASSERT(0 <= index && index < length_); 
-    return start_[index]; 
-  } 
- 
-  T& first() { return start_[0]; } 
- 
-  T& last() { return start_[length_ - 1]; } 
- 
- private: 
-  T* start_; 
-  int length_; 
-}; 
- 
- 
-// Helper class for building result strings in a character buffer. The 
-// purpose of the class is to use safe operations that checks the 
-// buffer bounds on all operations in debug mode. 
-class StringBuilder { 
- public: 
-  StringBuilder(char* buffer, int buffer_size) 
-      : buffer_(buffer, buffer_size), position_(0) { } 
- 
-  ~StringBuilder() { if (!is_finalized()) Finalize(); } 
- 
-  int size() const { return buffer_.length(); } 
- 
-  // Get the current position in the builder. 
-  int position() const { 
-    ASSERT(!is_finalized()); 
-    return position_; 
-  } 
- 
-  // Reset the position. 
-  void Reset() { position_ = 0; } 
- 
-  // Add a single character to the builder. It is not allowed to add 
-  // 0-characters; use the Finalize() method to terminate the string 
-  // instead. 
-  void AddCharacter(char c) { 
-    ASSERT(c != '\0'); 
-    ASSERT(!is_finalized() && position_ < buffer_.length()); 
-    buffer_[position_++] = c; 
-  } 
- 
-  // Add an entire string to the builder. Uses strlen() internally to 
-  // compute the length of the input string. 
-  void AddString(const char* s) { 
-    AddSubstring(s, StrLength(s)); 
-  } 
- 
-  // Add the first 'n' characters of the given string 's' to the 
-  // builder. The input string must have enough characters. 
-  void AddSubstring(const char* s, int n) { 
-    ASSERT(!is_finalized() && position_ + n < buffer_.length()); 
-    ASSERT(static_cast<size_t>(n) <= strlen(s)); 
-    memmove(&buffer_[position_], s, n * kCharSize); 
-    position_ += n; 
-  } 
- 
- 
-  // Add character padding to the builder. If count is non-positive, 
-  // nothing is added to the builder. 
-  void AddPadding(char c, int count) { 
-    for (int i = 0; i < count; i++) { 
-      AddCharacter(c); 
-    } 
-  } 
- 
-  // Finalize the string by 0-terminating it and returning the buffer. 
-  char* Finalize() { 
-    ASSERT(!is_finalized() && position_ < buffer_.length()); 
-    buffer_[position_] = '\0'; 
-    // Make sure nobody managed to add a 0-character to the 
-    // buffer while building the string. 
-    ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_)); 
-    position_ = -1; 
-    ASSERT(is_finalized()); 
-    return buffer_.start(); 
-  } 
- 
- private: 
-  Vector<char> buffer_; 
-  int position_; 
- 
-  bool is_finalized() const { return position_ < 0; } 
- 
+#endif
+
+namespace double_conversion {
+
+static const int kCharSize = sizeof(char);
+
+// Returns the maximum of the two parameters.
+template <typename T>
+static T Max(T a, T b) {
+  return a < b ? b : a;
+}
+
+
+// Returns the minimum of the two parameters.
+template <typename T>
+static T Min(T a, T b) {
+  return a < b ? a : b;
+}
+
+
+inline int StrLength(const char* string) {
+  size_t length = strlen(string);
+  ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
+  return static_cast<int>(length);
+}
+
+// This is a simplified version of V8's Vector class.
+template <typename T>
+class Vector {
+ public:
+  Vector() : start_(NULL), length_(0) {}
+  Vector(T* data, int len) : start_(data), length_(len) {
+    ASSERT(len == 0 || (len > 0 && data != NULL));
+  }
+
+  // Returns a vector using the same backing storage as this one,
+  // spanning from and including 'from', to but not including 'to'.
+  Vector<T> SubVector(int from, int to) {
+    ASSERT(to <= length_);
+    ASSERT(from < to);
+    ASSERT(0 <= from);
+    return Vector<T>(start() + from, to - from);
+  }
+
+  // Returns the length of the vector.
+  int length() const { return length_; }
+
+  // Returns whether or not the vector is empty.
+  bool is_empty() const { return length_ == 0; }
+
+  // Returns the pointer to the start of the data in the vector.
+  T* start() const { return start_; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](int index) const {
+    ASSERT(0 <= index && index < length_);
+    return start_[index];
+  }
+
+  T& first() { return start_[0]; }
+
+  T& last() { return start_[length_ - 1]; }
+
+ private:
+  T* start_;
+  int length_;
+};
+
+
+// Helper class for building result strings in a character buffer. The
+// purpose of the class is to use safe operations that checks the
+// buffer bounds on all operations in debug mode.
+class StringBuilder {
+ public:
+  StringBuilder(char* buffer, int buffer_size)
+      : buffer_(buffer, buffer_size), position_(0) { }
+
+  ~StringBuilder() { if (!is_finalized()) Finalize(); }
+
+  int size() const { return buffer_.length(); }
+
+  // Get the current position in the builder.
+  int position() const {
+    ASSERT(!is_finalized());
+    return position_;
+  }
+
+  // Reset the position.
+  void Reset() { position_ = 0; }
+
+  // Add a single character to the builder. It is not allowed to add
+  // 0-characters; use the Finalize() method to terminate the string
+  // instead.
+  void AddCharacter(char c) {
+    ASSERT(c != '\0');
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_++] = c;
+  }
+
+  // Add an entire string to the builder. Uses strlen() internally to
+  // compute the length of the input string.
+  void AddString(const char* s) {
+    AddSubstring(s, StrLength(s));
+  }
+
+  // Add the first 'n' characters of the given string 's' to the
+  // builder. The input string must have enough characters.
+  void AddSubstring(const char* s, int n) {
+    ASSERT(!is_finalized() && position_ + n < buffer_.length());
+    ASSERT(static_cast<size_t>(n) <= strlen(s));
+    memmove(&buffer_[position_], s, n * kCharSize);
+    position_ += n;
+  }
+
+
+  // Add character padding to the builder. If count is non-positive,
+  // nothing is added to the builder.
+  void AddPadding(char c, int count) {
+    for (int i = 0; i < count; i++) {
+      AddCharacter(c);
+    }
+  }
+
+  // Finalize the string by 0-terminating it and returning the buffer.
+  char* Finalize() {
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_] = '\0';
+    // Make sure nobody managed to add a 0-character to the
+    // buffer while building the string.
+    ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
+    position_ = -1;
+    ASSERT(is_finalized());
+    return buffer_.start();
+  }
+
+ private:
+  Vector<char> buffer_;
+  int position_;
+
+  bool is_finalized() const { return position_ < 0; }
+
   DC_DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
-}; 
- 
-// The type-based aliasing rule allows the compiler to assume that pointers of 
-// different types (for some definition of different) never alias each other. 
-// Thus the following code does not work: 
-// 
-// float f = foo(); 
-// int fbits = *(int*)(&f); 
-// 
-// The compiler 'knows' that the int pointer can't refer to f since the types 
-// don't match, so the compiler may cache f in a register, leaving random data 
-// in fbits.  Using C++ style casts makes no difference, however a pointer to 
-// char data is assumed to alias any other pointer.  This is the 'memcpy 
-// exception'. 
-// 
-// Bit_cast uses the memcpy exception to move the bits from a variable of one 
-// type of a variable of another type.  Of course the end result is likely to 
-// be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005) 
-// will completely optimize BitCast away. 
-// 
-// There is an additional use for BitCast. 
-// Recent gccs will warn when they see casts that may result in breakage due to 
-// the type-based aliasing rule.  If you have checked that there is no breakage 
-// you can use BitCast to cast one pointer type to another.  This confuses gcc 
-// enough that it can no longer see that you have cast one pointer type to 
-// another thus avoiding the warning. 
-template <class Dest, class Source> 
-inline Dest BitCast(const Source& source) { 
-  // Compile time assertion: sizeof(Dest) == sizeof(Source) 
-  // A compile error here means your Dest and Source have different sizes. 
+};
+
+// The type-based aliasing rule allows the compiler to assume that pointers of
+// different types (for some definition of different) never alias each other.
+// Thus the following code does not work:
+//
+// float f = foo();
+// int fbits = *(int*)(&f);
+//
+// The compiler 'knows' that the int pointer can't refer to f since the types
+// don't match, so the compiler may cache f in a register, leaving random data
+// in fbits.  Using C++ style casts makes no difference, however a pointer to
+// char data is assumed to alias any other pointer.  This is the 'memcpy
+// exception'.
+//
+// Bit_cast uses the memcpy exception to move the bits from a variable of one
+// type of a variable of another type.  Of course the end result is likely to
+// be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005)
+// will completely optimize BitCast away.
+//
+// There is an additional use for BitCast.
+// Recent gccs will warn when they see casts that may result in breakage due to
+// the type-based aliasing rule.  If you have checked that there is no breakage
+// you can use BitCast to cast one pointer type to another.  This confuses gcc
+// enough that it can no longer see that you have cast one pointer type to
+// another thus avoiding the warning.
+template <class Dest, class Source>
+inline Dest BitCast(const Source& source) {
+  // Compile time assertion: sizeof(Dest) == sizeof(Source)
+  // A compile error here means your Dest and Source have different sizes.
 #if __cplusplus >= 201103L
   static_assert(sizeof(Dest) == sizeof(Source),
                 "source and destination size mismatch");
 #else
   typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
 #endif
- 
-  Dest dest; 
-  memmove(&dest, &source, sizeof(dest)); 
-  return dest; 
-} 
- 
-template <class Dest, class Source> 
-inline Dest BitCast(Source* source) { 
-  return BitCast<Dest>(reinterpret_cast<uintptr_t>(source)); 
-} 
- 
-}  // namespace double_conversion 
- 
-#endif  // DOUBLE_CONVERSION_UTILS_H_ 
+
+  Dest dest;
+  memmove(&dest, &source, sizeof(dest));
+  return dest;
+}
+
+template <class Dest, class Source>
+inline Dest BitCast(Source* source) {
+  return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
+}
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_UTILS_H_
diff --git a/contrib/libs/double-conversion/ya.make b/contrib/libs/double-conversion/ya.make
index b97a761c80..52e59b0989 100644
--- a/contrib/libs/double-conversion/ya.make
+++ b/contrib/libs/double-conversion/ya.make
@@ -1,8 +1,8 @@
-LIBRARY() 
- 
+LIBRARY()
+
 VERSION(3.1.0)
 
-LICENSE(BSD-3-Clause) 
+LICENSE(BSD-3-Clause)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
@@ -11,22 +11,22 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 NO_COMPILER_WARNINGS()
 
-NO_UTIL() 
- 
+NO_UTIL()
+
 ADDINCL(GLOBAL contrib/libs/double-conversion/include)
 
-SRCS( 
-    cached-powers.cc 
-    bignum-dtoa.cc 
-    double-conversion.cc 
-    diy-fp.cc 
-    fixed-dtoa.cc 
-    strtod.cc 
-    bignum.cc 
-    fast-dtoa.cc 
-) 
- 
-END() 
+SRCS(
+    cached-powers.cc
+    bignum-dtoa.cc
+    double-conversion.cc
+    diy-fp.cc
+    fixed-dtoa.cc
+    strtod.cc
+    bignum.cc
+    fast-dtoa.cc
+)
+
+END()
diff --git a/contrib/libs/expat/ya.make b/contrib/libs/expat/ya.make
index 09c5e26409..8128621e60 100644
--- a/contrib/libs/expat/ya.make
+++ b/contrib/libs/expat/ya.make
@@ -6,7 +6,7 @@ OWNER(
     orivej
     g:cpp-contrib
 )
- 
+
 VERSION(2.4.4)
 
 ORIGINAL_SOURCE(https://github.com/libexpat/libexpat/releases/download/R_2_4_4/expat-2.4.4.tar.xz)
diff --git a/contrib/libs/farmhash/arch/sse41/ya.make b/contrib/libs/farmhash/arch/sse41/ya.make
index 46e7d808c9..b3b2964c99 100644
--- a/contrib/libs/farmhash/arch/sse41/ya.make
+++ b/contrib/libs/farmhash/arch/sse41/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(somov)
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/farmhash/arch/sse42/ya.make b/contrib/libs/farmhash/arch/sse42/ya.make
index f48b958eed..6df471feb7 100644
--- a/contrib/libs/farmhash/arch/sse42/ya.make
+++ b/contrib/libs/farmhash/arch/sse42/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(somov)
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/farmhash/arch/sse42_aesni/ya.make b/contrib/libs/farmhash/arch/sse42_aesni/ya.make
index 05596dc277..4d558bc2a2 100644
--- a/contrib/libs/farmhash/arch/sse42_aesni/ya.make
+++ b/contrib/libs/farmhash/arch/sse42_aesni/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(somov)
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/fastlz/fastlz.c b/contrib/libs/fastlz/fastlz.c
index deb7266770..e671bda728 100644
--- a/contrib/libs/fastlz/fastlz.c
+++ b/contrib/libs/fastlz/fastlz.c
@@ -1,553 +1,553 @@
-/*   
-  FastLZ - lightning-fast lossless compression library 
- 
-  Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) 
-  Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) 
-  Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) 
- 
-  Permission is hereby granted, free of charge, to any person obtaining a copy 
-  of this software and associated documentation files (the "Software"), to deal 
-  in the Software without restriction, including without limitation the rights 
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
-  copies of the Software, and to permit persons to whom the Software is 
-  furnished to do so, subject to the following conditions: 
- 
-  The above copyright notice and this permission notice shall be included in 
-  all copies or substantial portions of the Software. 
- 
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
-  THE SOFTWARE. 
-*/ 
- 
-#include "fastlz.h" 
- 
-#if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) 
- 
-/* 
- * Always check for bound when decompressing. 
- * Generally it is best to leave it defined. 
- */ 
-#define FASTLZ_SAFE 
- 
-/* 
- * Give hints to the compiler for branch prediction optimization. 
- */ 
-#if defined(__GNUC__) && (__GNUC__ > 2) 
-#define FASTLZ_EXPECT_CONDITIONAL(c)    (__builtin_expect((c), 1)) 
-#define FASTLZ_UNEXPECT_CONDITIONAL(c)  (__builtin_expect((c), 0)) 
+/*  
+  FastLZ - lightning-fast lossless compression library
+
+  Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
+  Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
+  Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#include "fastlz.h"
+
+#if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR)
+
+/*
+ * Always check for bound when decompressing.
+ * Generally it is best to leave it defined.
+ */
+#define FASTLZ_SAFE
+
+/*
+ * Give hints to the compiler for branch prediction optimization.
+ */
+#if defined(__GNUC__) && (__GNUC__ > 2)
+#define FASTLZ_EXPECT_CONDITIONAL(c)    (__builtin_expect((c), 1))
+#define FASTLZ_UNEXPECT_CONDITIONAL(c)  (__builtin_expect((c), 0))
+#else
+#define FASTLZ_EXPECT_CONDITIONAL(c)    (c)
+#define FASTLZ_UNEXPECT_CONDITIONAL(c)  (c)
+#endif
+
+/*
+ * Use inlined functions for supported systems.
+ */
+#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C)
+#define FASTLZ_INLINE inline
+#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
+#define FASTLZ_INLINE __inline
 #else 
-#define FASTLZ_EXPECT_CONDITIONAL(c)    (c) 
-#define FASTLZ_UNEXPECT_CONDITIONAL(c)  (c) 
-#endif 
- 
-/* 
- * Use inlined functions for supported systems. 
- */ 
-#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C) 
-#define FASTLZ_INLINE inline 
-#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__) 
-#define FASTLZ_INLINE __inline 
-#else  
-#define FASTLZ_INLINE 
-#endif 
- 
-/* 
- * Prevent accessing more than 8-bit at once, except on x86 architectures. 
- */ 
-#if !defined(FASTLZ_STRICT_ALIGN) 
-#define FASTLZ_STRICT_ALIGN 
-#if defined(__i386__) || defined(__386)  /* GNU C, Sun Studio */ 
-#undef FASTLZ_STRICT_ALIGN 
-#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ 
-#undef FASTLZ_STRICT_ALIGN 
-#elif defined(_M_IX86) /* Intel, MSVC */ 
-#undef FASTLZ_STRICT_ALIGN 
-#elif defined(__386) 
-#undef FASTLZ_STRICT_ALIGN 
-#elif defined(_X86_) /* MinGW */ 
-#undef FASTLZ_STRICT_ALIGN 
-#elif defined(__I86__) /* Digital Mars */ 
-#undef FASTLZ_STRICT_ALIGN 
-#endif 
-#endif 
- 
-/* 
- * FIXME: use preprocessor magic to set this on different platforms! 
- */ 
-typedef unsigned char  flzuint8; 
-typedef unsigned short flzuint16; 
-typedef unsigned int   flzuint32; 
- 
-/* prototypes */ 
-int fastlz_compress(const void* input, int length, void* output); 
-int fastlz_compress_level(int level, const void* input, int length, void* output); 
-int fastlz_decompress(const void* input, int length, void* output, int maxout); 
- 
-#define MAX_COPY       32 
-#define MAX_LEN       264  /* 256 + 8 */ 
-#define MAX_DISTANCE 8192 
- 
-#if !defined(FASTLZ_STRICT_ALIGN) 
-#define FASTLZ_READU16(p) *((const flzuint16*)(p))  
-#else 
-#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8) 
-#endif 
- 
-#define HASH_LOG  13 
-#define HASH_SIZE (1<< HASH_LOG) 
-#define HASH_MASK  (HASH_SIZE-1) 
-#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; } 
- 
-#undef FASTLZ_LEVEL 
-#define FASTLZ_LEVEL 1 
- 
-#undef FASTLZ_COMPRESSOR 
-#undef FASTLZ_DECOMPRESSOR 
-#define FASTLZ_COMPRESSOR fastlz1_compress 
-#define FASTLZ_DECOMPRESSOR fastlz1_decompress 
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); 
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); 
-#include "fastlz.c" 
- 
-#undef FASTLZ_LEVEL 
-#define FASTLZ_LEVEL 2 
- 
-#undef MAX_DISTANCE 
-#define MAX_DISTANCE 8191 
-#define MAX_FARDISTANCE (65535+MAX_DISTANCE-1) 
- 
-#undef FASTLZ_COMPRESSOR 
-#undef FASTLZ_DECOMPRESSOR 
-#define FASTLZ_COMPRESSOR fastlz2_compress 
-#define FASTLZ_DECOMPRESSOR fastlz2_decompress 
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); 
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); 
-#include "fastlz.c" 
- 
-int fastlz_compress(const void* input, int length, void* output) 
-{ 
-  /* for short block, choose fastlz1 */ 
-  if(length < 65536) 
-    return fastlz1_compress(input, length, output); 
- 
-  /* else... */ 
-  return fastlz2_compress(input, length, output); 
-} 
- 
-int fastlz_decompress(const void* input, int length, void* output, int maxout) 
-{ 
-  /* magic identifier for compression level */ 
-  int level = ((*(const flzuint8*)input) >> 5) + 1; 
- 
-  if(level == 1) 
-    return fastlz1_decompress(input, length, output, maxout); 
-  if(level == 2) 
-    return fastlz2_decompress(input, length, output, maxout); 
- 
-  /* unknown level, trigger error */ 
-  return 0; 
-} 
- 
-int fastlz_compress_level(int level, const void* input, int length, void* output) 
-{ 
-  if(level == 1) 
-    return fastlz1_compress(input, length, output); 
-  if(level == 2) 
-    return fastlz2_compress(input, length, output); 
- 
-  return 0; 
-} 
- 
-#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ 
- 
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output) 
-{ 
-  const flzuint8* ip = (const flzuint8*) input; 
-  const flzuint8* ip_bound = ip + length - 2; 
-  const flzuint8* ip_limit = ip + length - 12; 
-  flzuint8* op = (flzuint8*) output; 
- 
-  const flzuint8* htab[HASH_SIZE]; 
-  const flzuint8** hslot; 
-  flzuint32 hval; 
- 
-  flzuint32 copy; 
- 
-  /* sanity check */ 
-  if(FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) 
-  { 
-    if(length) 
-    { 
-      /* create literal copy only */ 
-      *op++ = length-1; 
-      ip_bound++; 
-      while(ip <= ip_bound) 
-        *op++ = *ip++; 
-      return length+1; 
-    } 
-    else 
-      return 0; 
-  } 
- 
-  /* initializes hash table */ 
-  for (hslot = htab; hslot < htab + HASH_SIZE; hslot++) 
-    *hslot = ip; 
- 
-  /* we start with literal copy */ 
-  copy = 2; 
-  *op++ = MAX_COPY-1; 
-  *op++ = *ip++; 
-  *op++ = *ip++; 
- 
-  /* main loop */ 
-  while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) 
-  { 
-    const flzuint8* ref; 
-    flzuint32 distance; 
- 
-    /* minimum match length */ 
-    flzuint32 len = 3; 
- 
-    /* comparison starting-point */ 
-    const flzuint8* anchor = ip; 
- 
-    /* check for a run */ 
-#if FASTLZ_LEVEL==2 
-    if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1)) 
-    { 
-      distance = 1; 
-      ip += 3; 
-      ref = anchor - 1 + 3; 
-      goto match; 
-    } 
-#endif 
- 
-    /* find potential match */ 
-    HASH_FUNCTION(hval,ip); 
-    hslot = htab + hval; 
-    ref = htab[hval]; 
- 
-    /* calculate distance to the match */ 
-    distance = anchor - ref; 
- 
-    /* update hash table */ 
-    *hslot = anchor; 
- 
-    /* is this a match? check the first 3 bytes */ 
-    if(distance==0 ||  
-#if FASTLZ_LEVEL==1 
-    (distance >= MAX_DISTANCE) || 
-#else 
-    (distance >= MAX_FARDISTANCE) || 
-#endif 
-    *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++) 
-      goto literal; 
- 
-#if FASTLZ_LEVEL==2 
-    /* far, needs at least 5-byte match */ 
-    if(distance >= MAX_DISTANCE) 
-    { 
-      if(*ip++ != *ref++ || *ip++!= *ref++)  
-        goto literal; 
-      len += 2; 
-    } 
-     
-    match: 
-#endif 
- 
-    /* last matched byte */ 
-    ip = anchor + len; 
- 
-    /* distance is biased */ 
-    distance--; 
- 
-    if(!distance) 
-    { 
-      /* zero distance means a run */ 
-      flzuint8 x = ip[-1]; 
-      while(ip < ip_bound) 
-        if(*ref++ != x) break; else ip++; 
-    } 
-    else 
-    for(;;) 
-    { 
-      /* safe because the outer check against ip limit */ 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      if(*ref++ != *ip++) break; 
-      while(ip < ip_bound) 
-        if(*ref++ != *ip++) break; 
-      break; 
-    } 
- 
-    /* if we have copied something, adjust the copy count */ 
-    if(copy) 
-      /* copy is biased, '0' means 1 byte copy */ 
-      *(op-copy-1) = copy-1; 
-    else 
-      /* back, to overwrite the copy count */ 
-      op--; 
- 
-    /* reset literal counter */ 
-    copy = 0; 
- 
-    /* length is biased, '1' means a match of 3 bytes */ 
-    ip -= 3; 
-    len = ip - anchor; 
- 
-    /* encode the match */ 
-#if FASTLZ_LEVEL==2 
-    if(distance < MAX_DISTANCE) 
-    { 
-      if(len < 7) 
-      { 
-        *op++ = (len << 5) + (distance >> 8); 
-        *op++ = (distance & 255); 
-      } 
-      else 
-      { 
-        *op++ = (7 << 5) + (distance >> 8); 
-        for(len-=7; len >= 255; len-= 255) 
-          *op++ = 255; 
-        *op++ = len; 
-        *op++ = (distance & 255); 
-      } 
-    } 
-    else 
-    { 
-      /* far away, but not yet in the another galaxy... */ 
-      if(len < 7) 
-      { 
-        distance -= MAX_DISTANCE; 
-        *op++ = (len << 5) + 31; 
-        *op++ = 255; 
-        *op++ = distance >> 8; 
-        *op++ = distance & 255; 
-      } 
-      else 
-      { 
-        distance -= MAX_DISTANCE; 
-        *op++ = (7 << 5) + 31; 
-        for(len-=7; len >= 255; len-= 255) 
-          *op++ = 255; 
-        *op++ = len; 
-        *op++ = 255; 
-        *op++ = distance >> 8; 
-        *op++ = distance & 255; 
-      } 
-    } 
-#else 
- 
-    if(FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN-2)) 
-      while(len > MAX_LEN-2) 
-      { 
-        *op++ = (7 << 5) + (distance >> 8); 
-        *op++ = MAX_LEN - 2 - 7 -2;  
-        *op++ = (distance & 255); 
-        len -= MAX_LEN-2; 
-      } 
- 
-    if(len < 7) 
-    { 
-      *op++ = (len << 5) + (distance >> 8); 
-      *op++ = (distance & 255); 
-    } 
-    else 
-    { 
-      *op++ = (7 << 5) + (distance >> 8); 
-      *op++ = len - 7; 
-      *op++ = (distance & 255); 
-    } 
-#endif 
- 
-    /* update the hash at match boundary */ 
-    HASH_FUNCTION(hval,ip); 
-    htab[hval] = ip++; 
-    HASH_FUNCTION(hval,ip); 
-    htab[hval] = ip++; 
- 
-    /* assuming literal copy */ 
-    *op++ = MAX_COPY-1; 
- 
-    continue; 
- 
-    literal: 
-      *op++ = *anchor++; 
-      ip = anchor; 
-      copy++; 
-      if(FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) 
-      { 
-        copy = 0; 
-        *op++ = MAX_COPY-1; 
-      } 
-  } 
- 
-  /* left-over as literal copy */ 
-  ip_bound++; 
-  while(ip <= ip_bound) 
-  { 
-    *op++ = *ip++; 
-    copy++; 
-    if(copy == MAX_COPY) 
-    { 
-      copy = 0; 
-      *op++ = MAX_COPY-1; 
-    } 
-  } 
- 
-  /* if we have copied something, adjust the copy length */ 
-  if(copy) 
-    *(op-copy-1) = copy-1; 
-  else 
-    op--; 
- 
-#if FASTLZ_LEVEL==2 
-  /* marker for fastlz2 */ 
-  *(flzuint8*)output |= (1 << 5); 
-#endif 
- 
-  return op - (flzuint8*)output; 
-} 
- 
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout) 
-{ 
-  const flzuint8* ip = (const flzuint8*) input; 
-  const flzuint8* ip_limit  = ip + length; 
-  flzuint8* op = (flzuint8*) output; 
-  flzuint8* op_limit = op + maxout; 
-  flzuint32 ctrl = (*ip++) & 31; 
-  int loop = 1; 
- 
-  do 
-  { 
-    const flzuint8* ref = op; 
-    flzuint32 len = ctrl >> 5; 
-    flzuint32 ofs = (ctrl & 31) << 8; 
- 
-    if(ctrl >= 32) 
-    { 
-#if FASTLZ_LEVEL==2 
-      flzuint8 code; 
-#endif 
-      len--; 
-      ref -= ofs; 
-      if (len == 7-1) 
-#if FASTLZ_LEVEL==1 
-        len += *ip++; 
-      ref -= *ip++; 
-#else 
-        do 
-        { 
-          code = *ip++; 
-          len += code; 
-        } while (code==255); 
-      code = *ip++; 
-      ref -= code; 
- 
-      /* match from 16-bit distance */ 
-      if(FASTLZ_UNEXPECT_CONDITIONAL(code==255)) 
-      if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8))) 
-      { 
-        ofs = (*ip++) << 8; 
-        ofs += *ip++; 
-        ref = op - ofs - MAX_DISTANCE; 
-      } 
-#endif 
-       
-#ifdef FASTLZ_SAFE 
-      if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) 
-        return 0; 
- 
-      if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output)) 
-        return 0; 
-#endif 
- 
-      if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) 
-        ctrl = *ip++; 
-      else 
-        loop = 0; 
- 
-      if(ref == op) 
-      { 
-        /* optimize copy for a run */ 
-        flzuint8 b = ref[-1]; 
-        *op++ = b; 
-        *op++ = b; 
-        *op++ = b; 
-        for(; len; --len) 
-          *op++ = b; 
-      } 
-      else 
-      { 
-#if !defined(FASTLZ_STRICT_ALIGN) 
-        const flzuint16* p; 
-        flzuint16* q; 
-#endif 
-        /* copy from reference */ 
-        ref--; 
-        *op++ = *ref++; 
-        *op++ = *ref++; 
-        *op++ = *ref++; 
- 
-#if !defined(FASTLZ_STRICT_ALIGN) 
-        /* copy a byte, so that now it's word aligned */ 
-        if(len & 1) 
-        { 
-          *op++ = *ref++; 
-          len--; 
-        } 
- 
-        /* copy 16-bit at once */ 
-        q = (flzuint16*) op; 
-        op += len; 
-        p = (const flzuint16*) ref; 
-        for(len>>=1; len > 4; len-=4) 
-        { 
-          *q++ = *p++; 
-          *q++ = *p++; 
-          *q++ = *p++; 
-          *q++ = *p++; 
-        } 
-        for(; len; --len) 
-          *q++ = *p++; 
-#else 
-        for(; len; --len) 
-          *op++ = *ref++; 
-#endif 
-      } 
-    } 
-    else 
-    { 
-      ctrl++; 
-#ifdef FASTLZ_SAFE 
-      if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) 
-        return 0; 
-      if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) 
-        return 0; 
-#endif 
- 
-      *op++ = *ip++;  
-      for(--ctrl; ctrl; ctrl--) 
-        *op++ = *ip++; 
- 
-      loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit); 
-      if(loop) 
-        ctrl = *ip++; 
-    } 
-  } 
-  while(FASTLZ_EXPECT_CONDITIONAL(loop)); 
- 
-  return op - (flzuint8*)output; 
-} 
- 
-#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ 
+#define FASTLZ_INLINE
+#endif
+
+/*
+ * Prevent accessing more than 8-bit at once, except on x86 architectures.
+ */
+#if !defined(FASTLZ_STRICT_ALIGN)
+#define FASTLZ_STRICT_ALIGN
+#if defined(__i386__) || defined(__386)  /* GNU C, Sun Studio */
+#undef FASTLZ_STRICT_ALIGN
+#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
+#undef FASTLZ_STRICT_ALIGN
+#elif defined(_M_IX86) /* Intel, MSVC */
+#undef FASTLZ_STRICT_ALIGN
+#elif defined(__386)
+#undef FASTLZ_STRICT_ALIGN
+#elif defined(_X86_) /* MinGW */
+#undef FASTLZ_STRICT_ALIGN
+#elif defined(__I86__) /* Digital Mars */
+#undef FASTLZ_STRICT_ALIGN
+#endif
+#endif
+
+/*
+ * FIXME: use preprocessor magic to set this on different platforms!
+ */
+typedef unsigned char  flzuint8;
+typedef unsigned short flzuint16;
+typedef unsigned int   flzuint32;
+
+/* prototypes */
+int fastlz_compress(const void* input, int length, void* output);
+int fastlz_compress_level(int level, const void* input, int length, void* output);
+int fastlz_decompress(const void* input, int length, void* output, int maxout);
+
+#define MAX_COPY       32
+#define MAX_LEN       264  /* 256 + 8 */
+#define MAX_DISTANCE 8192
+
+#if !defined(FASTLZ_STRICT_ALIGN)
+#define FASTLZ_READU16(p) *((const flzuint16*)(p)) 
+#else
+#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
+#endif
+
+#define HASH_LOG  13
+#define HASH_SIZE (1<< HASH_LOG)
+#define HASH_MASK  (HASH_SIZE-1)
+#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; }
+
+#undef FASTLZ_LEVEL
+#define FASTLZ_LEVEL 1
+
+#undef FASTLZ_COMPRESSOR
+#undef FASTLZ_DECOMPRESSOR
+#define FASTLZ_COMPRESSOR fastlz1_compress
+#define FASTLZ_DECOMPRESSOR fastlz1_decompress
+static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
+static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
+#include "fastlz.c"
+
+#undef FASTLZ_LEVEL
+#define FASTLZ_LEVEL 2
+
+#undef MAX_DISTANCE
+#define MAX_DISTANCE 8191
+#define MAX_FARDISTANCE (65535+MAX_DISTANCE-1)
+
+#undef FASTLZ_COMPRESSOR
+#undef FASTLZ_DECOMPRESSOR
+#define FASTLZ_COMPRESSOR fastlz2_compress
+#define FASTLZ_DECOMPRESSOR fastlz2_decompress
+static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
+static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
+#include "fastlz.c"
+
+int fastlz_compress(const void* input, int length, void* output)
+{
+  /* for short block, choose fastlz1 */
+  if(length < 65536)
+    return fastlz1_compress(input, length, output);
+
+  /* else... */
+  return fastlz2_compress(input, length, output);
+}
+
+int fastlz_decompress(const void* input, int length, void* output, int maxout)
+{
+  /* magic identifier for compression level */
+  int level = ((*(const flzuint8*)input) >> 5) + 1;
+
+  if(level == 1)
+    return fastlz1_decompress(input, length, output, maxout);
+  if(level == 2)
+    return fastlz2_decompress(input, length, output, maxout);
+
+  /* unknown level, trigger error */
+  return 0;
+}
+
+int fastlz_compress_level(int level, const void* input, int length, void* output)
+{
+  if(level == 1)
+    return fastlz1_compress(input, length, output);
+  if(level == 2)
+    return fastlz2_compress(input, length, output);
+
+  return 0;
+}
+
+#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
+
+static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output)
+{
+  const flzuint8* ip = (const flzuint8*) input;
+  const flzuint8* ip_bound = ip + length - 2;
+  const flzuint8* ip_limit = ip + length - 12;
+  flzuint8* op = (flzuint8*) output;
+
+  const flzuint8* htab[HASH_SIZE];
+  const flzuint8** hslot;
+  flzuint32 hval;
+
+  flzuint32 copy;
+
+  /* sanity check */
+  if(FASTLZ_UNEXPECT_CONDITIONAL(length < 4))
+  {
+    if(length)
+    {
+      /* create literal copy only */
+      *op++ = length-1;
+      ip_bound++;
+      while(ip <= ip_bound)
+        *op++ = *ip++;
+      return length+1;
+    }
+    else
+      return 0;
+  }
+
+  /* initializes hash table */
+  for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
+    *hslot = ip;
+
+  /* we start with literal copy */
+  copy = 2;
+  *op++ = MAX_COPY-1;
+  *op++ = *ip++;
+  *op++ = *ip++;
+
+  /* main loop */
+  while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
+  {
+    const flzuint8* ref;
+    flzuint32 distance;
+
+    /* minimum match length */
+    flzuint32 len = 3;
+
+    /* comparison starting-point */
+    const flzuint8* anchor = ip;
+
+    /* check for a run */
+#if FASTLZ_LEVEL==2
+    if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
+    {
+      distance = 1;
+      ip += 3;
+      ref = anchor - 1 + 3;
+      goto match;
+    }
+#endif
+
+    /* find potential match */
+    HASH_FUNCTION(hval,ip);
+    hslot = htab + hval;
+    ref = htab[hval];
+
+    /* calculate distance to the match */
+    distance = anchor - ref;
+
+    /* update hash table */
+    *hslot = anchor;
+
+    /* is this a match? check the first 3 bytes */
+    if(distance==0 || 
+#if FASTLZ_LEVEL==1
+    (distance >= MAX_DISTANCE) ||
+#else
+    (distance >= MAX_FARDISTANCE) ||
+#endif
+    *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++)
+      goto literal;
+
+#if FASTLZ_LEVEL==2
+    /* far, needs at least 5-byte match */
+    if(distance >= MAX_DISTANCE)
+    {
+      if(*ip++ != *ref++ || *ip++!= *ref++) 
+        goto literal;
+      len += 2;
+    }
+    
+    match:
+#endif
+
+    /* last matched byte */
+    ip = anchor + len;
+
+    /* distance is biased */
+    distance--;
+
+    if(!distance)
+    {
+      /* zero distance means a run */
+      flzuint8 x = ip[-1];
+      while(ip < ip_bound)
+        if(*ref++ != x) break; else ip++;
+    }
+    else
+    for(;;)
+    {
+      /* safe because the outer check against ip limit */
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      if(*ref++ != *ip++) break;
+      while(ip < ip_bound)
+        if(*ref++ != *ip++) break;
+      break;
+    }
+
+    /* if we have copied something, adjust the copy count */
+    if(copy)
+      /* copy is biased, '0' means 1 byte copy */
+      *(op-copy-1) = copy-1;
+    else
+      /* back, to overwrite the copy count */
+      op--;
+
+    /* reset literal counter */
+    copy = 0;
+
+    /* length is biased, '1' means a match of 3 bytes */
+    ip -= 3;
+    len = ip - anchor;
+
+    /* encode the match */
+#if FASTLZ_LEVEL==2
+    if(distance < MAX_DISTANCE)
+    {
+      if(len < 7)
+      {
+        *op++ = (len << 5) + (distance >> 8);
+        *op++ = (distance & 255);
+      }
+      else
+      {
+        *op++ = (7 << 5) + (distance >> 8);
+        for(len-=7; len >= 255; len-= 255)
+          *op++ = 255;
+        *op++ = len;
+        *op++ = (distance & 255);
+      }
+    }
+    else
+    {
+      /* far away, but not yet in the another galaxy... */
+      if(len < 7)
+      {
+        distance -= MAX_DISTANCE;
+        *op++ = (len << 5) + 31;
+        *op++ = 255;
+        *op++ = distance >> 8;
+        *op++ = distance & 255;
+      }
+      else
+      {
+        distance -= MAX_DISTANCE;
+        *op++ = (7 << 5) + 31;
+        for(len-=7; len >= 255; len-= 255)
+          *op++ = 255;
+        *op++ = len;
+        *op++ = 255;
+        *op++ = distance >> 8;
+        *op++ = distance & 255;
+      }
+    }
+#else
+
+    if(FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN-2))
+      while(len > MAX_LEN-2)
+      {
+        *op++ = (7 << 5) + (distance >> 8);
+        *op++ = MAX_LEN - 2 - 7 -2; 
+        *op++ = (distance & 255);
+        len -= MAX_LEN-2;
+      }
+
+    if(len < 7)
+    {
+      *op++ = (len << 5) + (distance >> 8);
+      *op++ = (distance & 255);
+    }
+    else
+    {
+      *op++ = (7 << 5) + (distance >> 8);
+      *op++ = len - 7;
+      *op++ = (distance & 255);
+    }
+#endif
+
+    /* update the hash at match boundary */
+    HASH_FUNCTION(hval,ip);
+    htab[hval] = ip++;
+    HASH_FUNCTION(hval,ip);
+    htab[hval] = ip++;
+
+    /* assuming literal copy */
+    *op++ = MAX_COPY-1;
+
+    continue;
+
+    literal:
+      *op++ = *anchor++;
+      ip = anchor;
+      copy++;
+      if(FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY))
+      {
+        copy = 0;
+        *op++ = MAX_COPY-1;
+      }
+  }
+
+  /* left-over as literal copy */
+  ip_bound++;
+  while(ip <= ip_bound)
+  {
+    *op++ = *ip++;
+    copy++;
+    if(copy == MAX_COPY)
+    {
+      copy = 0;
+      *op++ = MAX_COPY-1;
+    }
+  }
+
+  /* if we have copied something, adjust the copy length */
+  if(copy)
+    *(op-copy-1) = copy-1;
+  else
+    op--;
+
+#if FASTLZ_LEVEL==2
+  /* marker for fastlz2 */
+  *(flzuint8*)output |= (1 << 5);
+#endif
+
+  return op - (flzuint8*)output;
+}
+
+static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout)
+{
+  const flzuint8* ip = (const flzuint8*) input;
+  const flzuint8* ip_limit  = ip + length;
+  flzuint8* op = (flzuint8*) output;
+  flzuint8* op_limit = op + maxout;
+  flzuint32 ctrl = (*ip++) & 31;
+  int loop = 1;
+
+  do
+  {
+    const flzuint8* ref = op;
+    flzuint32 len = ctrl >> 5;
+    flzuint32 ofs = (ctrl & 31) << 8;
+
+    if(ctrl >= 32)
+    {
+#if FASTLZ_LEVEL==2
+      flzuint8 code;
+#endif
+      len--;
+      ref -= ofs;
+      if (len == 7-1)
+#if FASTLZ_LEVEL==1
+        len += *ip++;
+      ref -= *ip++;
+#else
+        do
+        {
+          code = *ip++;
+          len += code;
+        } while (code==255);
+      code = *ip++;
+      ref -= code;
+
+      /* match from 16-bit distance */
+      if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
+      if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
+      {
+        ofs = (*ip++) << 8;
+        ofs += *ip++;
+        ref = op - ofs - MAX_DISTANCE;
+      }
+#endif
+      
+#ifdef FASTLZ_SAFE
+      if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit))
+        return 0;
+
+      if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
+        return 0;
+#endif
+
+      if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
+        ctrl = *ip++;
+      else
+        loop = 0;
+
+      if(ref == op)
+      {
+        /* optimize copy for a run */
+        flzuint8 b = ref[-1];
+        *op++ = b;
+        *op++ = b;
+        *op++ = b;
+        for(; len; --len)
+          *op++ = b;
+      }
+      else
+      {
+#if !defined(FASTLZ_STRICT_ALIGN)
+        const flzuint16* p;
+        flzuint16* q;
+#endif
+        /* copy from reference */
+        ref--;
+        *op++ = *ref++;
+        *op++ = *ref++;
+        *op++ = *ref++;
+
+#if !defined(FASTLZ_STRICT_ALIGN)
+        /* copy a byte, so that now it's word aligned */
+        if(len & 1)
+        {
+          *op++ = *ref++;
+          len--;
+        }
+
+        /* copy 16-bit at once */
+        q = (flzuint16*) op;
+        op += len;
+        p = (const flzuint16*) ref;
+        for(len>>=1; len > 4; len-=4)
+        {
+          *q++ = *p++;
+          *q++ = *p++;
+          *q++ = *p++;
+          *q++ = *p++;
+        }
+        for(; len; --len)
+          *q++ = *p++;
+#else
+        for(; len; --len)
+          *op++ = *ref++;
+#endif
+      }
+    }
+    else
+    {
+      ctrl++;
+#ifdef FASTLZ_SAFE
+      if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
+        return 0;
+      if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
+        return 0;
+#endif
+
+      *op++ = *ip++; 
+      for(--ctrl; ctrl; ctrl--)
+        *op++ = *ip++;
+
+      loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
+      if(loop)
+        ctrl = *ip++;
+    }
+  }
+  while(FASTLZ_EXPECT_CONDITIONAL(loop));
+
+  return op - (flzuint8*)output;
+}
+
+#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
diff --git a/contrib/libs/fastlz/fastlz.h b/contrib/libs/fastlz/fastlz.h
index 821626a3ef..5bce060a18 100644
--- a/contrib/libs/fastlz/fastlz.h
+++ b/contrib/libs/fastlz/fastlz.h
@@ -1,102 +1,102 @@
-/*   
-  FastLZ - lightning-fast lossless compression library 
- 
-  Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) 
-  Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) 
-  Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) 
- 
-  Permission is hereby granted, free of charge, to any person obtaining a copy 
-  of this software and associated documentation files (the "Software"), to deal 
-  in the Software without restriction, including without limitation the rights 
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
-  copies of the Software, and to permit persons to whom the Software is 
-  furnished to do so, subject to the following conditions: 
- 
-  The above copyright notice and this permission notice shall be included in 
-  all copies or substantial portions of the Software. 
- 
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
-  THE SOFTWARE. 
-*/ 
- 
-#ifndef FASTLZ_H 
-#define FASTLZ_H 
- 
-#include "rename.h" 
- 
-#define FASTLZ_VERSION 0x000100 
- 
-#define FASTLZ_VERSION_MAJOR     0 
-#define FASTLZ_VERSION_MINOR     0 
-#define FASTLZ_VERSION_REVISION  0 
- 
-#define FASTLZ_VERSION_STRING "0.1.0" 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/** 
-  Compress a block of data in the input buffer and returns the size of  
-  compressed block. The size of input buffer is specified by length. The  
-  minimum input buffer size is 16. 
- 
-  The output buffer must be at least 5% larger than the input buffer   
-  and can not be smaller than 66 bytes. 
- 
-  If the input is not compressible, the return value might be larger than 
-  length (input buffer size). 
- 
-  The input buffer and the output buffer can not overlap. 
-*/ 
- 
-int fastlz_compress(const void* input, int length, void* output); 
- 
-/** 
-  Decompress a block of compressed data and returns the size of the  
-  decompressed block. If error occurs, e.g. the compressed data is  
-  corrupted or the output buffer is not large enough, then 0 (zero)  
-  will be returned instead. 
- 
-  The input buffer and the output buffer can not overlap. 
- 
-  Decompression is memory safe and guaranteed not to write the output buffer 
-  more than what is specified in maxout. 
- */ 
- 
-int fastlz_decompress(const void* input, int length, void* output, int maxout);  
- 
-/** 
-  Compress a block of data in the input buffer and returns the size of  
-  compressed block. The size of input buffer is specified by length. The  
-  minimum input buffer size is 16. 
- 
-  The output buffer must be at least 5% larger than the input buffer   
-  and can not be smaller than 66 bytes. 
- 
-  If the input is not compressible, the return value might be larger than 
-  length (input buffer size). 
- 
-  The input buffer and the output buffer can not overlap. 
- 
-  Compression level can be specified in parameter level. At the moment,  
-  only level 1 and level 2 are supported. 
-  Level 1 is the fastest compression and generally useful for short data. 
-  Level 2 is slightly slower but it gives better compression ratio. 
- 
-  Note that the compressed data, regardless of the level, can always be 
-  decompressed using the function fastlz_decompress above. 
-*/   
- 
-int fastlz_compress_level(int level, const void* input, int length, void* output); 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* FASTLZ_H */ 
+/*  
+  FastLZ - lightning-fast lossless compression library
+
+  Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
+  Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
+  Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#ifndef FASTLZ_H
+#define FASTLZ_H
+
+#include "rename.h"
+
+#define FASTLZ_VERSION 0x000100
+
+#define FASTLZ_VERSION_MAJOR     0
+#define FASTLZ_VERSION_MINOR     0
+#define FASTLZ_VERSION_REVISION  0
+
+#define FASTLZ_VERSION_STRING "0.1.0"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**
+  Compress a block of data in the input buffer and returns the size of 
+  compressed block. The size of input buffer is specified by length. The 
+  minimum input buffer size is 16.
+
+  The output buffer must be at least 5% larger than the input buffer  
+  and can not be smaller than 66 bytes.
+
+  If the input is not compressible, the return value might be larger than
+  length (input buffer size).
+
+  The input buffer and the output buffer can not overlap.
+*/
+
+int fastlz_compress(const void* input, int length, void* output);
+
+/**
+  Decompress a block of compressed data and returns the size of the 
+  decompressed block. If error occurs, e.g. the compressed data is 
+  corrupted or the output buffer is not large enough, then 0 (zero) 
+  will be returned instead.
+
+  The input buffer and the output buffer can not overlap.
+
+  Decompression is memory safe and guaranteed not to write the output buffer
+  more than what is specified in maxout.
+ */
+
+int fastlz_decompress(const void* input, int length, void* output, int maxout); 
+
+/**
+  Compress a block of data in the input buffer and returns the size of 
+  compressed block. The size of input buffer is specified by length. The 
+  minimum input buffer size is 16.
+
+  The output buffer must be at least 5% larger than the input buffer  
+  and can not be smaller than 66 bytes.
+
+  If the input is not compressible, the return value might be larger than
+  length (input buffer size).
+
+  The input buffer and the output buffer can not overlap.
+
+  Compression level can be specified in parameter level. At the moment, 
+  only level 1 and level 2 are supported.
+  Level 1 is the fastest compression and generally useful for short data.
+  Level 2 is slightly slower but it gives better compression ratio.
+
+  Note that the compressed data, regardless of the level, can always be
+  decompressed using the function fastlz_decompress above.
+*/  
+
+int fastlz_compress_level(int level, const void* input, int length, void* output);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* FASTLZ_H */
diff --git a/contrib/libs/fastlz/rename.h b/contrib/libs/fastlz/rename.h
index d99342a51c..143a9f08f4 100644
--- a/contrib/libs/fastlz/rename.h
+++ b/contrib/libs/fastlz/rename.h
@@ -1,8 +1,8 @@
-#ifndef rename_h_7d8f6576asdf5 
-#define rename_h_7d8f6576asdf5 
- 
-#define fastlz_compress       yfastlz_compress 
-#define fastlz_decompress     yfastlz_decompress 
-#define fastlz_compress_level yfastlz_compress_level 
- 
-#endif 
+#ifndef rename_h_7d8f6576asdf5
+#define rename_h_7d8f6576asdf5
+
+#define fastlz_compress       yfastlz_compress
+#define fastlz_decompress     yfastlz_decompress
+#define fastlz_compress_level yfastlz_compress_level
+
+#endif
diff --git a/contrib/libs/fastlz/ya.make b/contrib/libs/fastlz/ya.make
index 476ed9248e..7f31ac0bec 100644
--- a/contrib/libs/fastlz/ya.make
+++ b/contrib/libs/fastlz/ya.make
@@ -1,6 +1,6 @@
-LIBRARY() 
+LIBRARY()
 
-LICENSE(MIT) 
+LICENSE(MIT)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
@@ -10,14 +10,14 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 NO_UTIL()
 
 NO_COMPILER_WARNINGS()
- 
-SRCS( 
-    fastlz.c 
+
+SRCS(
+    fastlz.c
     rename.h
-) 
- 
-END() 
+)
+
+END()
diff --git a/contrib/libs/fmt/test/ya.make b/contrib/libs/fmt/test/ya.make
index 38c2e4ecae..8db82d6c1e 100644
--- a/contrib/libs/fmt/test/ya.make
+++ b/contrib/libs/fmt/test/ya.make
@@ -9,8 +9,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 PEERDIR(
     contrib/libs/fmt
     contrib/restricted/googletest/googlemock
diff --git a/contrib/libs/grpc/grpc++/ya.make b/contrib/libs/grpc/grpc++/ya.make
index 63f85a6a8f..788da4ce7e 100644
--- a/contrib/libs/grpc/grpc++/ya.make
+++ b/contrib/libs/grpc/grpc++/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/grpc/grpc
     contrib/libs/grpc/src/core/lib
diff --git a/contrib/libs/grpc/grpc++_error_details/ya.make b/contrib/libs/grpc/grpc++_error_details/ya.make
index efbae2eb9d..c1b3fad26f 100644
--- a/contrib/libs/grpc/grpc++_error_details/ya.make
+++ b/contrib/libs/grpc/grpc++_error_details/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/grpc/grpc
     contrib/libs/grpc/grpc++
diff --git a/contrib/libs/grpc/grpc++_reflection/ya.make b/contrib/libs/grpc/grpc++_reflection/ya.make
index d9083c1fe9..3068b78f9a 100644
--- a/contrib/libs/grpc/grpc++_reflection/ya.make
+++ b/contrib/libs/grpc/grpc++_reflection/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/grpc/grpc
     contrib/libs/grpc/grpc++
diff --git a/contrib/libs/grpc/grpc++_unsecure/ya.make b/contrib/libs/grpc/grpc++_unsecure/ya.make
index a0d8820c12..4810d9a037 100644
--- a/contrib/libs/grpc/grpc++_unsecure/ya.make
+++ b/contrib/libs/grpc/grpc++_unsecure/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/grpc/grpc
     contrib/libs/grpc/grpc++
diff --git a/contrib/libs/grpc/grpc/ya.make b/contrib/libs/grpc/grpc/ya.make
index 9381f9d313..c29c11c0c4 100644
--- a/contrib/libs/grpc/grpc/ya.make
+++ b/contrib/libs/grpc/grpc/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     certs
     contrib/libs/c-ares
diff --git a/contrib/libs/grpc/grpc_unsecure/ya.make b/contrib/libs/grpc/grpc_unsecure/ya.make
index ceef301d91..38ef63eb18 100644
--- a/contrib/libs/grpc/grpc_unsecure/ya.make
+++ b/contrib/libs/grpc/grpc_unsecure/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/c-ares
     contrib/libs/grpc/grpc
diff --git a/contrib/libs/grpc/grpcpp_channelz/ya.make b/contrib/libs/grpc/grpcpp_channelz/ya.make
index b99a9fc7f2..31761e3d47 100644
--- a/contrib/libs/grpc/grpcpp_channelz/ya.make
+++ b/contrib/libs/grpc/grpcpp_channelz/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/grpc/grpc
     contrib/libs/grpc/grpc++
diff --git a/contrib/libs/grpc/python/ya.make b/contrib/libs/grpc/python/ya.make
index 5668b8fcf3..f22d3393fc 100644
--- a/contrib/libs/grpc/python/ya.make
+++ b/contrib/libs/grpc/python/ya.make
@@ -2,8 +2,8 @@ PY23_LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 OWNER(
     akastornov
     g:contrib
diff --git a/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make b/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make
index ddd91ed13f..068ae1ead6 100644
--- a/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make
+++ b/contrib/libs/grpc/src/compiler/grpc_plugin_support/ya.make
@@ -6,8 +6,8 @@ WITHOUT_LICENSE_TEXTS()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 PEERDIR(
     contrib/libs/protobuf
     contrib/libs/protoc
diff --git a/contrib/libs/grpc/src/core/lib/ya.make b/contrib/libs/grpc/src/core/lib/ya.make
index 82d64f2fd3..93f59abe37 100644
--- a/contrib/libs/grpc/src/core/lib/ya.make
+++ b/contrib/libs/grpc/src/core/lib/ya.make
@@ -4,8 +4,8 @@ LIBRARY()
 
 OWNER(g:cpp-contrib)
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/grpc/src/proto/grpc/channelz/ya.make b/contrib/libs/grpc/src/proto/grpc/channelz/ya.make
index 7d62c80e6b..9f1e3cedc9 100644
--- a/contrib/libs/grpc/src/proto/grpc/channelz/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/channelz/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/core/ya.make b/contrib/libs/grpc/src/proto/grpc/core/ya.make
index c0db9a79fa..856c34ee53 100644
--- a/contrib/libs/grpc/src/proto/grpc/core/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/core/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make b/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make
index 469b2f1af4..7bb8b0dff9 100644
--- a/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/health/v1/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make b/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make
index f213f75366..f8e301c937 100644
--- a/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/reflection/v1alpha/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/status/ya.make b/contrib/libs/grpc/src/proto/grpc/status/ya.make
index 588164c661..eec367765f 100644
--- a/contrib/libs/grpc/src/proto/grpc/status/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/status/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make b/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make
index b0bfcc81a1..b94aa415f8 100644
--- a/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/testing/duplicate/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make b/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make
index 419c0ddfbe..994ec6fa82 100644
--- a/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/testing/xds/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/proto/grpc/testing/ya.make b/contrib/libs/grpc/src/proto/grpc/testing/ya.make
index 42db0476aa..2704585a37 100644
--- a/contrib/libs/grpc/src/proto/grpc/testing/ya.make
+++ b/contrib/libs/grpc/src/proto/grpc/testing/ya.make
@@ -1,7 +1,7 @@
 PROTO_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/python/grpcio_channelz/ya.make b/contrib/libs/grpc/src/python/grpcio_channelz/ya.make
index 62dc3e7c3a..2703d70ab0 100644
--- a/contrib/libs/grpc/src/python/grpcio_channelz/ya.make
+++ b/contrib/libs/grpc/src/python/grpcio_channelz/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make b/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make
index 0d1611f13d..4bab1758a5 100644
--- a/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make
+++ b/contrib/libs/grpc/src/python/grpcio_health_checking/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/python/grpcio_reflection/ya.make b/contrib/libs/grpc/src/python/grpcio_reflection/ya.make
index ca9ac093f6..c0f7d61dcc 100644
--- a/contrib/libs/grpc/src/python/grpcio_reflection/ya.make
+++ b/contrib/libs/grpc/src/python/grpcio_reflection/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/src/python/grpcio_status/ya.make b/contrib/libs/grpc/src/python/grpcio_status/ya.make
index fb7f71272a..61a3ee4336 100644
--- a/contrib/libs/grpc/src/python/grpcio_status/ya.make
+++ b/contrib/libs/grpc/src/python/grpcio_status/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/grpc/test/core/util/ya.make b/contrib/libs/grpc/test/core/util/ya.make
index 0f15e395d7..fbaad80cad 100644
--- a/contrib/libs/grpc/test/core/util/ya.make
+++ b/contrib/libs/grpc/test/core/util/ya.make
@@ -1,7 +1,7 @@
 LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(dvshkurko)
diff --git a/contrib/libs/grpc/test/cpp/end2end/ya.make b/contrib/libs/grpc/test/cpp/end2end/ya.make
index 0ffbd6783b..b9c1dc7fe0 100644
--- a/contrib/libs/grpc/test/cpp/end2end/ya.make
+++ b/contrib/libs/grpc/test/cpp/end2end/ya.make
@@ -1,7 +1,7 @@
 LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(dvshkurko)
diff --git a/contrib/libs/grpc/test/cpp/util/ya.make b/contrib/libs/grpc/test/cpp/util/ya.make
index 05a9b0853c..f043cc5b14 100644
--- a/contrib/libs/grpc/test/cpp/util/ya.make
+++ b/contrib/libs/grpc/test/cpp/util/ya.make
@@ -1,7 +1,7 @@
 LIBRARY()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(orivej)
diff --git a/contrib/libs/grpc/ya.make b/contrib/libs/grpc/ya.make
index 23dd8c6c6e..29848d23ea 100644
--- a/contrib/libs/grpc/ya.make
+++ b/contrib/libs/grpc/ya.make
@@ -2,7 +2,7 @@
 LIBRARY()
 
 LICENSE(Apache-2.0)
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(1.33.2)
diff --git a/contrib/libs/hdr_histogram/ya.make b/contrib/libs/hdr_histogram/ya.make
index 968be53c35..b20d977b57 100644
--- a/contrib/libs/hdr_histogram/ya.make
+++ b/contrib/libs/hdr_histogram/ya.make
@@ -4,7 +4,7 @@ LICENSE(
     BSD-2-Clause AND
     CC0-1.0
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(0.9.5)
diff --git a/contrib/libs/highwayhash/arch/avx2/ya.make b/contrib/libs/highwayhash/arch/avx2/ya.make
index 25fafd7e97..3084a352d8 100644
--- a/contrib/libs/highwayhash/arch/avx2/ya.make
+++ b/contrib/libs/highwayhash/arch/avx2/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 OWNER(somov)
 
 ADDINCL(contrib/libs/highwayhash)
diff --git a/contrib/libs/highwayhash/arch/sse41/ya.make b/contrib/libs/highwayhash/arch/sse41/ya.make
index 64924d1271..d94ad97038 100644
--- a/contrib/libs/highwayhash/arch/sse41/ya.make
+++ b/contrib/libs/highwayhash/arch/sse41/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(Apache-2.0) 
- 
+LICENSE(Apache-2.0)
+
 OWNER(somov)
 
 ADDINCL(contrib/libs/highwayhash)
diff --git a/contrib/libs/hyperscan/ya.make b/contrib/libs/hyperscan/ya.make
index a8f671f181..7783969e4a 100644
--- a/contrib/libs/hyperscan/ya.make
+++ b/contrib/libs/hyperscan/ya.make
@@ -1,7 +1,7 @@
 # Generated by devtools/yamaker from nixpkgs cc3b147ed182a6cae239348ef094158815da14ae.
 
 LIBRARY()
- 
+
 OWNER(
     galtsev
     g:antiinfra
diff --git a/contrib/libs/jemalloc/hack.cpp b/contrib/libs/jemalloc/hack.cpp
index 39fd018eb0..900856d353 100644
--- a/contrib/libs/jemalloc/hack.cpp
+++ b/contrib/libs/jemalloc/hack.cpp
@@ -1,14 +1,14 @@
-#include "hack.h" 
- 
-#include <util/system/yield.cpp> 
-#include <util/system/spinlock.cpp> 
- 
-#include "spinlock.h" 
- 
-void SPIN_L(spinlock_t* l) { 
-    AcquireAdaptiveLock(l); 
-} 
- 
-void SPIN_U(spinlock_t* l) { 
-    ReleaseAdaptiveLock(l); 
-} 
+#include "hack.h"
+
+#include <util/system/yield.cpp>
+#include <util/system/spinlock.cpp>
+
+#include "spinlock.h"
+
+void SPIN_L(spinlock_t* l) {
+    AcquireAdaptiveLock(l);
+}
+
+void SPIN_U(spinlock_t* l) {
+    ReleaseAdaptiveLock(l);
+}
diff --git a/contrib/libs/jemalloc/hack.h b/contrib/libs/jemalloc/hack.h
index 160b79bebe..3b172a2da2 100644
--- a/contrib/libs/jemalloc/hack.h
+++ b/contrib/libs/jemalloc/hack.h
@@ -1,21 +1,21 @@
 #pragma once
- 
-#include <sys/types.h> 
- 
-#if defined(__cplusplus) 
-extern "C" { 
-#endif 
- 
-#define AcquireAdaptiveLockSlow AllocAcquireAdaptiveLockSlow 
-#define SchedYield AllocSchedYield 
-#define ThreadYield AllocThreadYield 
-#define NSystemInfo NAllocSystemInfo 
- 
+
+#include <sys/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define AcquireAdaptiveLockSlow AllocAcquireAdaptiveLockSlow
+#define SchedYield AllocSchedYield
+#define ThreadYield AllocThreadYield
+#define NSystemInfo NAllocSystemInfo
+
 #ifdef _MSC_VER
 #   define __restrict__ __restrict
 #   define JEMALLOC_EXPORT
 #endif
 
-#if defined(__cplusplus) 
-}; 
-#endif 
+#if defined(__cplusplus)
+};
+#endif
diff --git a/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h b/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h
index b7d31ebedf..28cc151f07 100644
--- a/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h
+++ b/contrib/libs/jemalloc/include/jemalloc/internal/jemalloc_internal_defs-linux.h
@@ -38,9 +38,9 @@
 /* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
 #define HAVE_CPU_SPINWAIT 1
 #else
-#define CPU_SPINWAIT 
+#define CPU_SPINWAIT
 #define HAVE_CPU_SPINWAIT 0
-#endif 
+#endif
 
 /*
  * Number of significant bits in virtual addresses.  This may be less than the
diff --git a/contrib/libs/jemalloc/reg_zone.cpp b/contrib/libs/jemalloc/reg_zone.cpp
index 7946b87928..6a7b9d69e1 100644
--- a/contrib/libs/jemalloc/reg_zone.cpp
+++ b/contrib/libs/jemalloc/reg_zone.cpp
@@ -1,23 +1,23 @@
 #include <util/system/compiler.h>
 
 extern "C" void je_zone_register();
- 
+
 static volatile bool initialized = false;
 
-namespace { 
+namespace {
     struct TInit {
-        inline TInit() { 
+        inline TInit() {
             if (!initialized) {
                 je_zone_register();
                 initialized = true;
             }
-        } 
+        }
     };
 
     void zone_register() {
         static TInit init;
     }
-} 
+}
 
 extern "C" {
     void je_assure_zone_register() {
diff --git a/contrib/libs/jemalloc/spinlock.h b/contrib/libs/jemalloc/spinlock.h
index 1430b9bb8f..93fcf10e12 100644
--- a/contrib/libs/jemalloc/spinlock.h
+++ b/contrib/libs/jemalloc/spinlock.h
@@ -1,21 +1,21 @@
 #pragma once
- 
+
 #include <util/system/defaults.h>
- 
+
 typedef volatile intptr_t spinlock_t;
 
-#define SPIN_L AllocAcquireAdaptiveLock 
-#define SPIN_U AllocReleaseAdaptiveLock 
- 
+#define SPIN_L AllocAcquireAdaptiveLock
+#define SPIN_U AllocReleaseAdaptiveLock
+
 #define    _SPINLOCK_INITIALIZER 0
-#define _SPINUNLOCK(_lck)     SPIN_U(_lck) 
+#define _SPINUNLOCK(_lck)     SPIN_U(_lck)
 #define    _SPINLOCK(_lck)       SPIN_L(_lck)
- 
-#if defined(__cplusplus) 
-extern "C" { 
-#endif 
-    void SPIN_L(spinlock_t* lock); 
-    void SPIN_U(spinlock_t* lock); 
-#if defined(__cplusplus) 
-}; 
-#endif 
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+    void SPIN_L(spinlock_t* lock);
+    void SPIN_U(spinlock_t* lock);
+#if defined(__cplusplus)
+};
+#endif
diff --git a/contrib/libs/jemalloc/ya.make b/contrib/libs/jemalloc/ya.make
index ffb0e62400..586de30ab0 100644
--- a/contrib/libs/jemalloc/ya.make
+++ b/contrib/libs/jemalloc/ya.make
@@ -1,12 +1,12 @@
 # Generated by devtools/yamaker from nixpkgs 21.11.
 
 LIBRARY()
- 
+
 OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 VERSION(5.2.1)
 
 ORIGINAL_SOURCE(https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2)
diff --git a/contrib/libs/jwt-cpp/ya.make b/contrib/libs/jwt-cpp/ya.make
index 6c7006a0c0..c7ddd33162 100644
--- a/contrib/libs/jwt-cpp/ya.make
+++ b/contrib/libs/jwt-cpp/ya.make
@@ -6,7 +6,7 @@ OWNER(
     pbludov
     g:cpp-contrib
 )
- 
+
 VERSION(0.2.0)
 
 ORIGINAL_SOURCE(https://github.com/Thalhammer/jwt-cpp/archive/2b3ddae668f5b0dac92f57207312dc50b5bdb2f8.tar.gz)
diff --git a/contrib/libs/libaio/static/ya.make b/contrib/libs/libaio/static/ya.make
index b92e014caa..c4a7e200bb 100644
--- a/contrib/libs/libaio/static/ya.make
+++ b/contrib/libs/libaio/static/ya.make
@@ -15,7 +15,7 @@ ELSE()
 ENDIF()
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
- 
+
 OWNER(
     vskipin
     g:contrib
diff --git a/contrib/libs/libaio/ya.make b/contrib/libs/libaio/ya.make
index b16741c7cd..3eb09b549b 100644
--- a/contrib/libs/libaio/ya.make
+++ b/contrib/libs/libaio/ya.make
@@ -4,7 +4,7 @@ LIBRARY()
 # revision: 5a546a834c36070648158d19dd564762d59f8eb8
 
 LICENSE(Service-Dll-Harness)
- 
+
 WITHOUT_LICENSE_TEXTS()
 
 VERSION(2015-07-01-5a546a834c36070648158d19dd564762d59f8eb8)
diff --git a/contrib/libs/libbz2/blocksort.c b/contrib/libs/libbz2/blocksort.c
index f788e88c80..92d81fe287 100644
--- a/contrib/libs/libbz2/blocksort.c
+++ b/contrib/libs/libbz2/blocksort.c
@@ -1,1094 +1,1094 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Block sorting machinery                               ---*/ 
-/*---                                           blocksort.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery                               ---*/
+/*---                                           blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
-   Please read the WARNING, DISCLAIMER and PATENTS sections in the  
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#include "bzlib_private.h" 
- 
-/*---------------------------------------------*/ 
-/*--- Fallback O(N log(N)^2) sorting        ---*/ 
-/*--- algorithm, for repetitive blocks      ---*/ 
-/*---------------------------------------------*/ 
- 
-/*---------------------------------------------*/ 
-static  
-__inline__ 
-void fallbackSimpleSort ( UInt32* fmap,  
-                          UInt32* eclass,  
-                          Int32   lo,  
-                          Int32   hi ) 
-{ 
-   Int32 i, j, tmp; 
-   UInt32 ec_tmp; 
- 
-   if (lo == hi) return; 
- 
-   if (hi - lo > 3) { 
-      for ( i = hi-4; i >= lo; i-- ) { 
-         tmp = fmap[i]; 
-         ec_tmp = eclass[tmp]; 
-         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 
-            fmap[j-4] = fmap[j]; 
-         fmap[j-4] = tmp; 
-      } 
-   } 
- 
-   for ( i = hi-1; i >= lo; i-- ) { 
-      tmp = fmap[i]; 
-      ec_tmp = eclass[tmp]; 
-      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 
-         fmap[j-1] = fmap[j]; 
-      fmap[j-1] = tmp; 
-   } 
-} 
- 
- 
-/*---------------------------------------------*/ 
-#define fswap(zz1, zz2) \ 
-   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 
- 
-#define fvswap(zzp1, zzp2, zzn)       \ 
-{                                     \ 
-   Int32 yyp1 = (zzp1);               \ 
-   Int32 yyp2 = (zzp2);               \ 
-   Int32 yyn  = (zzn);                \ 
-   while (yyn > 0) {                  \ 
-      fswap(fmap[yyp1], fmap[yyp2]);  \ 
-      yyp1++; yyp2++; yyn--;          \ 
-   }                                  \ 
-} 
- 
- 
-#define fmin(a,b) ((a) < (b)) ? (a) : (b) 
- 
-#define fpush(lz,hz) { stackLo[sp] = lz; \ 
-                       stackHi[sp] = hz; \ 
-                       sp++; } 
- 
-#define fpop(lz,hz) { sp--;              \ 
-                      lz = stackLo[sp];  \ 
-                      hz = stackHi[sp]; } 
- 
-#define FALLBACK_QSORT_SMALL_THRESH 10 
-#define FALLBACK_QSORT_STACK_SIZE   100 
- 
- 
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting        ---*/
+/*--- algorithm, for repetitive blocks      ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
 static 
-void fallbackQSort3 ( UInt32* fmap,  
-                      UInt32* eclass, 
-                      Int32   loSt,  
-                      Int32   hiSt ) 
-{ 
-   Int32 unLo, unHi, ltLo, gtHi, n, m; 
-   Int32 sp, lo, hi; 
-   UInt32 med, r, r3; 
-   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 
-   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 
- 
-   r = 0; 
- 
-   sp = 0; 
-   fpush ( loSt, hiSt ); 
- 
-   while (sp > 0) { 
- 
-      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 
- 
-      fpop ( lo, hi ); 
-      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 
-         fallbackSimpleSort ( fmap, eclass, lo, hi ); 
-         continue; 
-      } 
- 
-      /* Random partitioning.  Median of 3 sometimes fails to 
-         avoid bad cases.  Median of 9 seems to help but  
-         looks rather expensive.  This too seems to work but 
-         is cheaper.  Guidance for the magic constants  
-         7621 and 32768 is taken from Sedgewick's algorithms 
-         book, chapter 35. 
-      */ 
-      r = ((r * 7621) + 1) % 32768; 
-      r3 = r % 3; 
-      if (r3 == 0) med = eclass[fmap[lo]]; else 
-      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 
-                   med = eclass[fmap[hi]]; 
- 
-      unLo = ltLo = lo; 
-      unHi = gtHi = hi; 
- 
-      while (1) { 
-         while (1) { 
-            if (unLo > unHi) break; 
-            n = (Int32)eclass[fmap[unLo]] - (Int32)med; 
-            if (n == 0) {  
-               fswap(fmap[unLo], fmap[ltLo]);  
-               ltLo++; unLo++;  
-               continue;  
-            }; 
-            if (n > 0) break; 
-            unLo++; 
-         } 
-         while (1) { 
-            if (unLo > unHi) break; 
-            n = (Int32)eclass[fmap[unHi]] - (Int32)med; 
-            if (n == 0) {  
-               fswap(fmap[unHi], fmap[gtHi]);  
-               gtHi--; unHi--;  
-               continue;  
-            }; 
-            if (n < 0) break; 
-            unHi--; 
-         } 
-         if (unLo > unHi) break; 
-         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 
-      } 
- 
-      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 
- 
-      if (gtHi < ltLo) continue; 
- 
-      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 
-      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 
- 
-      n = lo + unLo - ltLo - 1; 
-      m = hi - (gtHi - unHi) + 1; 
- 
-      if (n - lo > hi - m) { 
-         fpush ( lo, n ); 
-         fpush ( m, hi ); 
-      } else { 
-         fpush ( m, hi ); 
-         fpush ( lo, n ); 
-      } 
-   } 
-} 
- 
-#undef fmin 
-#undef fpush 
-#undef fpop 
-#undef fswap 
-#undef fvswap 
-#undef FALLBACK_QSORT_SMALL_THRESH 
-#undef FALLBACK_QSORT_STACK_SIZE 
- 
- 
-/*---------------------------------------------*/ 
-/* Pre: 
-      nblock > 0 
-      eclass exists for [0 .. nblock-1] 
-      ((UChar*)eclass) [0 .. nblock-1] holds block 
-      ptr exists for [0 .. nblock-1] 
- 
-   Post: 
-      ((UChar*)eclass) [0 .. nblock-1] holds block 
-      All other areas of eclass destroyed 
-      fmap [0 .. nblock-1] holds sorted order 
-      bhtab [ 0 .. 2+(nblock/32) ] destroyed 
-*/ 
- 
+__inline__
+void fallbackSimpleSort ( UInt32* fmap, 
+                          UInt32* eclass, 
+                          Int32   lo, 
+                          Int32   hi )
+{
+   Int32 i, j, tmp;
+   UInt32 ec_tmp;
+
+   if (lo == hi) return;
+
+   if (hi - lo > 3) {
+      for ( i = hi-4; i >= lo; i-- ) {
+         tmp = fmap[i];
+         ec_tmp = eclass[tmp];
+         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+            fmap[j-4] = fmap[j];
+         fmap[j-4] = tmp;
+      }
+   }
+
+   for ( i = hi-1; i >= lo; i-- ) {
+      tmp = fmap[i];
+      ec_tmp = eclass[tmp];
+      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+         fmap[j-1] = fmap[j];
+      fmap[j-1] = tmp;
+   }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      fswap(fmap[yyp1], fmap[yyp2]);  \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+                       stackHi[sp] = hz; \
+                       sp++; }
+
+#define fpop(lz,hz) { sp--;              \
+                      lz = stackLo[sp];  \
+                      hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE   100
+
+
+static
+void fallbackQSort3 ( UInt32* fmap, 
+                      UInt32* eclass,
+                      Int32   loSt, 
+                      Int32   hiSt )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m;
+   Int32 sp, lo, hi;
+   UInt32 med, r, r3;
+   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+   r = 0;
+
+   sp = 0;
+   fpush ( loSt, hiSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
+
+      fpop ( lo, hi );
+      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+         fallbackSimpleSort ( fmap, eclass, lo, hi );
+         continue;
+      }
+
+      /* Random partitioning.  Median of 3 sometimes fails to
+         avoid bad cases.  Median of 9 seems to help but 
+         looks rather expensive.  This too seems to work but
+         is cheaper.  Guidance for the magic constants 
+         7621 and 32768 is taken from Sedgewick's algorithms
+         book, chapter 35.
+      */
+      r = ((r * 7621) + 1) % 32768;
+      r3 = r % 3;
+      if (r3 == 0) med = eclass[fmap[lo]]; else
+      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+                   med = eclass[fmap[hi]];
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (1) {
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unLo], fmap[ltLo]); 
+               ltLo++; unLo++; 
+               continue; 
+            };
+            if (n > 0) break;
+            unLo++;
+         }
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unHi], fmap[gtHi]); 
+               gtHi--; unHi--; 
+               continue; 
+            };
+            if (n < 0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+
+      if (gtHi < ltLo) continue;
+
+      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      if (n - lo > hi - m) {
+         fpush ( lo, n );
+         fpush ( m, hi );
+      } else {
+         fpush ( m, hi );
+         fpush ( lo, n );
+      }
+   }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      eclass exists for [0 .. nblock-1]
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      All other areas of eclass destroyed
+      fmap [0 .. nblock-1] holds sorted order
+      bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
 #define       SET_BH(zz)  bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
 #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
 #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
-#define      WORD_BH(zz)  bhtab[(zz) >> 5] 
-#define UNALIGNED_BH(zz)  ((zz) & 0x01f) 
- 
-static 
-void fallbackSort ( UInt32* fmap,  
-                    UInt32* eclass,  
-                    UInt32* bhtab, 
-                    Int32   nblock, 
-                    Int32   verb ) 
-{ 
-   Int32 ftab[257]; 
-   Int32 ftabCopy[256]; 
-   Int32 H, i, j, k, l, r, cc, cc1; 
-   Int32 nNotDone; 
-   Int32 nBhtab; 
-   UChar* eclass8 = (UChar*)eclass; 
- 
-   /*-- 
-      Initial 1-char radix sort to generate 
-      initial fmap and initial BH bits. 
-   --*/ 
-   if (verb >= 4) 
-      VPrintf0 ( "        bucket sorting ...\n" ); 
-   for (i = 0; i < 257;    i++) ftab[i] = 0; 
-   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 
-   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i]; 
-   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1]; 
- 
-   for (i = 0; i < nblock; i++) { 
-      j = eclass8[i]; 
-      k = ftab[j] - 1; 
-      ftab[j] = k; 
-      fmap[k] = i; 
-   } 
- 
-   nBhtab = 2 + (nblock / 32); 
-   for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 
-   for (i = 0; i < 256; i++) SET_BH(ftab[i]); 
- 
+#define      WORD_BH(zz)  bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
+
+static
+void fallbackSort ( UInt32* fmap, 
+                    UInt32* eclass, 
+                    UInt32* bhtab,
+                    Int32   nblock,
+                    Int32   verb )
+{
+   Int32 ftab[257];
+   Int32 ftabCopy[256];
+   Int32 H, i, j, k, l, r, cc, cc1;
+   Int32 nNotDone;
+   Int32 nBhtab;
+   UChar* eclass8 = (UChar*)eclass;
+
+   /*--
+      Initial 1-char radix sort to generate
+      initial fmap and initial BH bits.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        bucket sorting ...\n" );
+   for (i = 0; i < 257;    i++) ftab[i] = 0;
+   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
+   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+
+   for (i = 0; i < nblock; i++) {
+      j = eclass8[i];
+      k = ftab[j] - 1;
+      ftab[j] = k;
+      fmap[k] = i;
+   }
+
+   nBhtab = 2 + (nblock / 32);
+   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+   /*--
+      Inductively refine the buckets.  Kind-of an
+      "exponential radix sort" (!), inspired by the
+      Manber-Myers suffix array construction algorithm.
+   --*/
+
+   /*-- set sentinel bits for block-end detection --*/
+   for (i = 0; i < 32; i++) { 
+      SET_BH(nblock + 2*i);
+      CLEAR_BH(nblock + 2*i + 1);
+   }
+
+   /*-- the log(N) loop --*/
+   H = 1;
+   while (1) {
+
+      if (verb >= 4) 
+         VPrintf1 ( "        depth %6d has ", H );
+
+      j = 0;
+      for (i = 0; i < nblock; i++) {
+         if (ISSET_BH(i)) j = i;
+         k = fmap[i] - H; if (k < 0) k += nblock;
+         eclass[k] = j;
+      }
+
+      nNotDone = 0;
+      r = -1;
+      while (1) {
+
+	 /*-- find the next non-singleton bucket --*/
+         k = r + 1;
+         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (ISSET_BH(k)) {
+            while (WORD_BH(k) == 0xffffffff) k += 32;
+            while (ISSET_BH(k)) k++;
+         }
+         l = k - 1;
+         if (l >= nblock) break;
+         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (!ISSET_BH(k)) {
+            while (WORD_BH(k) == 0x00000000) k += 32;
+            while (!ISSET_BH(k)) k++;
+         }
+         r = k - 1;
+         if (r >= nblock) break;
+
+         /*-- now [l, r] bracket current bucket --*/
+         if (r > l) {
+            nNotDone += (r - l + 1);
+            fallbackQSort3 ( fmap, eclass, l, r );
+
+            /*-- scan bucket and generate header bits-- */
+            cc = -1;
+            for (i = l; i <= r; i++) {
+               cc1 = eclass[fmap[i]];
+               if (cc != cc1) { SET_BH(i); cc = cc1; };
+            }
+         }
+      }
+
+      if (verb >= 4) 
+         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+
+      H *= 2;
+      if (H > nblock || nNotDone == 0) break;
+   }
+
    /*-- 
-      Inductively refine the buckets.  Kind-of an 
-      "exponential radix sort" (!), inspired by the 
-      Manber-Myers suffix array construction algorithm. 
-   --*/ 
- 
-   /*-- set sentinel bits for block-end detection --*/ 
-   for (i = 0; i < 32; i++) {  
-      SET_BH(nblock + 2*i); 
-      CLEAR_BH(nblock + 2*i + 1); 
-   } 
- 
-   /*-- the log(N) loop --*/ 
-   H = 1; 
-   while (1) { 
- 
-      if (verb >= 4)  
-         VPrintf1 ( "        depth %6d has ", H ); 
- 
-      j = 0; 
-      for (i = 0; i < nblock; i++) { 
-         if (ISSET_BH(i)) j = i; 
-         k = fmap[i] - H; if (k < 0) k += nblock; 
-         eclass[k] = j; 
-      } 
- 
-      nNotDone = 0; 
-      r = -1; 
-      while (1) { 
- 
-	 /*-- find the next non-singleton bucket --*/ 
-         k = r + 1; 
-         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 
-         if (ISSET_BH(k)) { 
-            while (WORD_BH(k) == 0xffffffff) k += 32; 
-            while (ISSET_BH(k)) k++; 
-         } 
-         l = k - 1; 
-         if (l >= nblock) break; 
-         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 
-         if (!ISSET_BH(k)) { 
-            while (WORD_BH(k) == 0x00000000) k += 32; 
-            while (!ISSET_BH(k)) k++; 
-         } 
-         r = k - 1; 
-         if (r >= nblock) break; 
- 
-         /*-- now [l, r] bracket current bucket --*/ 
-         if (r > l) { 
-            nNotDone += (r - l + 1); 
-            fallbackQSort3 ( fmap, eclass, l, r ); 
- 
-            /*-- scan bucket and generate header bits-- */ 
-            cc = -1; 
-            for (i = l; i <= r; i++) { 
-               cc1 = eclass[fmap[i]]; 
-               if (cc != cc1) { SET_BH(i); cc = cc1; }; 
-            } 
-         } 
-      } 
- 
-      if (verb >= 4)  
-         VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 
- 
-      H *= 2; 
-      if (H > nblock || nNotDone == 0) break; 
-   } 
- 
-   /*--  
-      Reconstruct the original block in 
-      eclass8 [0 .. nblock-1], since the 
-      previous phase destroyed it. 
-   --*/ 
-   if (verb >= 4) 
-      VPrintf0 ( "        reconstructing block ...\n" ); 
-   j = 0; 
-   for (i = 0; i < nblock; i++) { 
-      while (ftabCopy[j] == 0) j++; 
-      ftabCopy[j]--; 
-      eclass8[fmap[i]] = (UChar)j; 
-   } 
-   AssertH ( j < 256, 1005 ); 
-} 
- 
-#undef       SET_BH 
-#undef     CLEAR_BH 
-#undef     ISSET_BH 
-#undef      WORD_BH 
-#undef UNALIGNED_BH 
- 
- 
-/*---------------------------------------------*/ 
-/*--- The main, O(N^2 log(N)) sorting       ---*/ 
-/*--- algorithm.  Faster for "normal"       ---*/ 
-/*--- non-repetitive blocks.                ---*/ 
-/*---------------------------------------------*/ 
- 
-/*---------------------------------------------*/ 
-static 
-__inline__ 
-Bool mainGtU ( UInt32  i1,  
-               UInt32  i2, 
-               UChar*  block,  
-               UInt16* quadrant, 
-               UInt32  nblock, 
-               Int32*  budget ) 
-{ 
-   Int32  k; 
-   UChar  c1, c2; 
-   UInt16 s1, s2; 
- 
-   AssertD ( i1 != i2, "mainGtU" ); 
-   /* 1 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 2 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 3 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 4 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 5 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 6 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 7 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 8 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 9 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 10 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 11 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 12 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
- 
-   k = nblock + 8; 
- 
-   do { 
-      /* 1 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 2 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 3 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 4 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 5 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 6 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 7 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 8 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
- 
-      if (i1 >= nblock) i1 -= nblock; 
-      if (i2 >= nblock) i2 -= nblock; 
- 
-      k -= 8; 
-      (*budget)--; 
-   } 
-      while (k >= 0); 
- 
-   return False; 
-} 
- 
- 
-/*---------------------------------------------*/ 
-/*-- 
-   Knuth's increments seem to work better 
-   than Incerpi-Sedgewick here.  Possibly 
-   because the number of elems to sort is 
-   usually small, typically <= 20. 
---*/ 
-static 
-Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 
-                   9841, 29524, 88573, 265720, 
-                   797161, 2391484 }; 
- 
+      Reconstruct the original block in
+      eclass8 [0 .. nblock-1], since the
+      previous phase destroyed it.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        reconstructing block ...\n" );
+   j = 0;
+   for (i = 0; i < nblock; i++) {
+      while (ftabCopy[j] == 0) j++;
+      ftabCopy[j]--;
+      eclass8[fmap[i]] = (UChar)j;
+   }
+   AssertH ( j < 256, 1005 );
+}
+
+#undef       SET_BH
+#undef     CLEAR_BH
+#undef     ISSET_BH
+#undef      WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting       ---*/
+/*--- algorithm.  Faster for "normal"       ---*/
+/*--- non-repetitive blocks.                ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32  i1, 
+               UInt32  i2,
+               UChar*  block, 
+               UInt16* quadrant,
+               UInt32  nblock,
+               Int32*  budget )
+{
+   Int32  k;
+   UChar  c1, c2;
+   UInt16 s1, s2;
+
+   AssertD ( i1 != i2, "mainGtU" );
+   /* 1 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 2 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 3 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 4 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 5 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 6 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 7 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 8 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 9 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 10 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 11 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 12 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+
+   k = nblock + 8;
+
+   do {
+      /* 1 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 2 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 3 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 4 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 5 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 6 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 7 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 8 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+
+      if (i1 >= nblock) i1 -= nblock;
+      if (i2 >= nblock) i2 -= nblock;
+
+      k -= 8;
+      (*budget)--;
+   }
+      while (k >= 0);
+
+   return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+   Knuth's increments seem to work better
+   than Incerpi-Sedgewick here.  Possibly
+   because the number of elems to sort is
+   usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                   9841, 29524, 88573, 265720,
+                   797161, 2391484 };
+
+static
+void mainSimpleSort ( UInt32* ptr,
+                      UChar*  block,
+                      UInt16* quadrant,
+                      Int32   nblock,
+                      Int32   lo, 
+                      Int32   hi, 
+                      Int32   d,
+                      Int32*  budget )
+{
+   Int32 i, j, h, bigN, hp;
+   UInt32 v;
+
+   bigN = hi - lo + 1;
+   if (bigN < 2) return;
+
+   hp = 0;
+   while (incs[hp] < bigN) hp++;
+   hp--;
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+
+      i = lo + h;
+      while (True) {
+
+         /*-- copy 1 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 2 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 3 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         if (*budget < 0) return;
+      }
+   }
+}
+
+
+/*---------------------------------------------*/
+/*--
+   The following is an implementation of
+   an elegant 3-way quicksort for strings,
+   described in a paper "Fast Algorithms for
+   Sorting and Searching Strings", by Robert
+   Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      mswap(ptr[yyp1], ptr[yyp2]);    \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
 static 
-void mainSimpleSort ( UInt32* ptr, 
-                      UChar*  block, 
-                      UInt16* quadrant, 
-                      Int32   nblock, 
-                      Int32   lo,  
-                      Int32   hi,  
-                      Int32   d, 
-                      Int32*  budget ) 
-{ 
-   Int32 i, j, h, bigN, hp; 
-   UInt32 v; 
- 
-   bigN = hi - lo + 1; 
-   if (bigN < 2) return; 
- 
-   hp = 0; 
-   while (incs[hp] < bigN) hp++; 
-   hp--; 
- 
-   for (; hp >= 0; hp--) { 
-      h = incs[hp]; 
- 
-      i = lo + h; 
-      while (True) { 
- 
-         /*-- copy 1 --*/ 
-         if (i > hi) break; 
-         v = ptr[i]; 
-         j = i; 
-         while ( mainGtU (  
-                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget  
-                 ) ) { 
-            ptr[j] = ptr[j-h]; 
-            j = j - h; 
-            if (j <= (lo + h - 1)) break; 
-         } 
-         ptr[j] = v; 
-         i++; 
- 
-         /*-- copy 2 --*/ 
-         if (i > hi) break; 
-         v = ptr[i]; 
-         j = i; 
-         while ( mainGtU (  
-                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget  
-                 ) ) { 
-            ptr[j] = ptr[j-h]; 
-            j = j - h; 
-            if (j <= (lo + h - 1)) break; 
-         } 
-         ptr[j] = v; 
-         i++; 
- 
-         /*-- copy 3 --*/ 
-         if (i > hi) break; 
-         v = ptr[i]; 
-         j = i; 
-         while ( mainGtU (  
-                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget  
-                 ) ) { 
-            ptr[j] = ptr[j-h]; 
-            j = j - h; 
-            if (j <= (lo + h - 1)) break; 
-         } 
-         ptr[j] = v; 
-         i++; 
- 
-         if (*budget < 0) return; 
-      } 
-   } 
-} 
- 
- 
-/*---------------------------------------------*/ 
-/*-- 
-   The following is an implementation of 
-   an elegant 3-way quicksort for strings, 
-   described in a paper "Fast Algorithms for 
-   Sorting and Searching Strings", by Robert 
-   Sedgewick and Jon L. Bentley. 
---*/ 
- 
-#define mswap(zz1, zz2) \ 
-   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 
- 
-#define mvswap(zzp1, zzp2, zzn)       \ 
-{                                     \ 
-   Int32 yyp1 = (zzp1);               \ 
-   Int32 yyp2 = (zzp2);               \ 
-   Int32 yyn  = (zzn);                \ 
-   while (yyn > 0) {                  \ 
-      mswap(ptr[yyp1], ptr[yyp2]);    \ 
-      yyp1++; yyp2++; yyn--;          \ 
-   }                                  \ 
-} 
- 
-static  
-__inline__ 
-UChar mmed3 ( UChar a, UChar b, UChar c ) 
-{ 
-   UChar t; 
-   if (a > b) { t = a; a = b; b = t; }; 
-   if (b > c) {  
-      b = c; 
-      if (a > b) b = a; 
-   } 
-   return b; 
-} 
- 
-#define mmin(a,b) ((a) < (b)) ? (a) : (b) 
- 
-#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 
-                          stackHi[sp] = hz; \ 
-                          stackD [sp] = dz; \ 
-                          sp++; } 
- 
-#define mpop(lz,hz,dz) { sp--;             \ 
-                         lz = stackLo[sp]; \ 
-                         hz = stackHi[sp]; \ 
-                         dz = stackD [sp]; } 
- 
- 
-#define mnextsize(az) (nextHi[az]-nextLo[az]) 
- 
-#define mnextswap(az,bz)                                        \ 
-   { Int32 tz;                                                  \ 
-     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 
-     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 
-     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 
- 
- 
-#define MAIN_QSORT_SMALL_THRESH 20 
-#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 
-#define MAIN_QSORT_STACK_SIZE 100 
- 
-static 
-void mainQSort3 ( UInt32* ptr, 
-                  UChar*  block, 
-                  UInt16* quadrant, 
-                  Int32   nblock, 
-                  Int32   loSt,  
-                  Int32   hiSt,  
-                  Int32   dSt, 
-                  Int32*  budget ) 
-{ 
-   Int32 unLo, unHi, ltLo, gtHi, n, m, med; 
-   Int32 sp, lo, hi, d; 
- 
-   Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 
-   Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 
-   Int32 stackD [MAIN_QSORT_STACK_SIZE]; 
- 
-   Int32 nextLo[3]; 
-   Int32 nextHi[3]; 
-   Int32 nextD [3]; 
- 
-   sp = 0; 
-   mpush ( loSt, hiSt, dSt ); 
- 
-   while (sp > 0) { 
- 
-      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 
- 
-      mpop ( lo, hi, d ); 
-      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||  
-          d > MAIN_QSORT_DEPTH_THRESH) { 
-         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 
-         if (*budget < 0) return; 
-         continue; 
-      } 
- 
-      med = (Int32)  
-            mmed3 ( block[ptr[ lo         ]+d], 
-                    block[ptr[ hi         ]+d], 
-                    block[ptr[ (lo+hi)>>1 ]+d] ); 
- 
-      unLo = ltLo = lo; 
-      unHi = gtHi = hi; 
- 
-      while (True) { 
-         while (True) { 
-            if (unLo > unHi) break; 
-            n = ((Int32)block[ptr[unLo]+d]) - med; 
-            if (n == 0) {  
-               mswap(ptr[unLo], ptr[ltLo]);  
-               ltLo++; unLo++; continue;  
-            }; 
-            if (n >  0) break; 
-            unLo++; 
-         } 
-         while (True) { 
-            if (unLo > unHi) break; 
-            n = ((Int32)block[ptr[unHi]+d]) - med; 
-            if (n == 0) {  
-               mswap(ptr[unHi], ptr[gtHi]);  
-               gtHi--; unHi--; continue;  
-            }; 
-            if (n <  0) break; 
-            unHi--; 
-         } 
-         if (unLo > unHi) break; 
-         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 
-      } 
- 
-      AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 
- 
-      if (gtHi < ltLo) { 
-         mpush(lo, hi, d+1 ); 
-         continue; 
-      } 
- 
-      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 
-      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 
- 
-      n = lo + unLo - ltLo - 1; 
-      m = hi - (gtHi - unHi) + 1; 
- 
-      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d; 
-      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d; 
-      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 
- 
-      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 
-      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 
-      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 
- 
-      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 
-      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 
- 
-      mpush (nextLo[0], nextHi[0], nextD[0]); 
-      mpush (nextLo[1], nextHi[1], nextD[1]); 
-      mpush (nextLo[2], nextHi[2], nextD[2]); 
-   } 
-} 
- 
-#undef mswap 
-#undef mvswap 
-#undef mpush 
-#undef mpop 
-#undef mmin 
-#undef mnextsize 
-#undef mnextswap 
-#undef MAIN_QSORT_SMALL_THRESH 
-#undef MAIN_QSORT_DEPTH_THRESH 
-#undef MAIN_QSORT_STACK_SIZE 
- 
- 
-/*---------------------------------------------*/ 
-/* Pre: 
-      nblock > N_OVERSHOOT 
-      block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 
-      ((UChar*)block32) [0 .. nblock-1] holds block 
-      ptr exists for [0 .. nblock-1] 
- 
-   Post: 
-      ((UChar*)block32) [0 .. nblock-1] holds block 
-      All other areas of block32 destroyed 
-      ftab [0 .. 65536 ] destroyed 
-      ptr [0 .. nblock-1] holds sorted order 
-      if (*budget < 0), sorting was abandoned 
-*/ 
- 
-#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 
-#define SETMASK (1 << 21) 
-#define CLEARMASK (~(SETMASK)) 
- 
-static 
-void mainSort ( UInt32* ptr,  
-                UChar*  block, 
-                UInt16* quadrant,  
-                UInt32* ftab, 
-                Int32   nblock, 
-                Int32   verb, 
-                Int32*  budget ) 
-{ 
-   Int32  i, j, k, ss, sb; 
-   Int32  runningOrder[256]; 
-   Bool   bigDone[256]; 
-   Int32  copyStart[256]; 
-   Int32  copyEnd  [256]; 
-   UChar  c1; 
-   Int32  numQSorted; 
-   UInt16 s; 
-   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" ); 
- 
-   /*-- set up the 2-byte frequency table --*/ 
-   for (i = 65536; i >= 0; i--) ftab[i] = 0; 
- 
-   j = block[0] << 8; 
-   i = nblock-1; 
-   for (; i >= 3; i -= 4) { 
-      quadrant[i] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i]) << 8); 
-      ftab[j]++; 
-      quadrant[i-1] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 
-      ftab[j]++; 
-      quadrant[i-2] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 
-      ftab[j]++; 
-      quadrant[i-3] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 
-      ftab[j]++; 
-   } 
-   for (; i >= 0; i--) { 
-      quadrant[i] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i]) << 8); 
-      ftab[j]++; 
-   } 
- 
-   /*-- (emphasises close relationship of block & quadrant) --*/ 
-   for (i = 0; i < BZ_N_OVERSHOOT; i++) { 
-      block   [nblock+i] = block[i]; 
-      quadrant[nblock+i] = 0; 
-   } 
- 
-   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" ); 
- 
-   /*-- Complete the initial radix sort --*/ 
-   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 
- 
-   s = block[0] << 8; 
-   i = nblock-1; 
-   for (; i >= 3; i -= 4) { 
-      s = (s >> 8) | (block[i] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i; 
-      s = (s >> 8) | (block[i-1] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i-1; 
-      s = (s >> 8) | (block[i-2] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i-2; 
-      s = (s >> 8) | (block[i-3] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i-3; 
-   } 
-   for (; i >= 0; i--) { 
-      s = (s >> 8) | (block[i] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i; 
-   } 
- 
-   /*-- 
-      Now ftab contains the first loc of every small bucket. 
-      Calculate the running order, from smallest to largest 
-      big bucket. 
-   --*/ 
-   for (i = 0; i <= 255; i++) { 
-      bigDone     [i] = False; 
-      runningOrder[i] = i; 
-   } 
- 
-   { 
-      Int32 vv; 
-      Int32 h = 1; 
-      do h = 3 * h + 1; while (h <= 256); 
-      do { 
-         h = h / 3; 
-         for (i = h; i <= 255; i++) { 
-            vv = runningOrder[i]; 
-            j = i; 
-            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 
-               runningOrder[j] = runningOrder[j-h]; 
-               j = j - h; 
-               if (j <= (h - 1)) goto zero; 
-            } 
-            zero: 
-            runningOrder[j] = vv; 
-         } 
-      } while (h != 1); 
-   } 
- 
-   /*-- 
-      The main sorting loop. 
-   --*/ 
- 
-   numQSorted = 0; 
- 
-   for (i = 0; i <= 255; i++) { 
- 
-      /*-- 
-         Process big buckets, starting with the least full. 
-         Basically this is a 3-step process in which we call 
-         mainQSort3 to sort the small buckets [ss, j], but 
-         also make a big effort to avoid the calls if we can. 
-      --*/ 
-      ss = runningOrder[i]; 
- 
-      /*-- 
-         Step 1: 
-         Complete the big bucket [ss] by quicksorting 
-         any unsorted small buckets [ss, j], for j != ss.   
-         Hopefully previous pointer-scanning phases have already 
-         completed many of the small buckets [ss, j], so 
-         we don't have to sort them at all. 
-      --*/ 
-      for (j = 0; j <= 255; j++) { 
-         if (j != ss) { 
-            sb = (ss << 8) + j; 
-            if ( ! (ftab[sb] & SETMASK) ) { 
-               Int32 lo = ftab[sb]   & CLEARMASK; 
-               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 
-               if (hi > lo) { 
-                  if (verb >= 4) 
-                     VPrintf4 ( "        qsort [0x%x, 0x%x]   " 
-                                "done %d   this %d\n", 
-                                ss, j, numQSorted, hi - lo + 1 ); 
-                  mainQSort3 (  
-                     ptr, block, quadrant, nblock,  
-                     lo, hi, BZ_N_RADIX, budget  
-                  );    
-                  numQSorted += (hi - lo + 1); 
-                  if (*budget < 0) return; 
-               } 
-            } 
-            ftab[sb] |= SETMASK; 
-         } 
-      } 
- 
-      AssertH ( !bigDone[ss], 1006 ); 
- 
-      /*-- 
-         Step 2: 
-         Now scan this big bucket [ss] so as to synthesise the 
-         sorted order for small buckets [t, ss] for all t, 
-         including, magically, the bucket [ss,ss] too. 
-         This will avoid doing Real Work in subsequent Step 1's. 
-      --*/ 
-      { 
-         for (j = 0; j <= 255; j++) { 
-            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK; 
-            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 
-         } 
-         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 
-            k = ptr[j]-1; if (k < 0) k += nblock; 
-            c1 = block[k]; 
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+   UChar t;
+   if (a > b) { t = a; a = b; b = t; };
+   if (b > c) { 
+      b = c;
+      if (a > b) b = a;
+   }
+   return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+                          stackHi[sp] = hz; \
+                          stackD [sp] = dz; \
+                          sp++; }
+
+#define mpop(lz,hz,dz) { sp--;             \
+                         lz = stackLo[sp]; \
+                         hz = stackHi[sp]; \
+                         dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz)                                        \
+   { Int32 tz;                                                  \
+     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+static
+void mainQSort3 ( UInt32* ptr,
+                  UChar*  block,
+                  UInt16* quadrant,
+                  Int32   nblock,
+                  Int32   loSt, 
+                  Int32   hiSt, 
+                  Int32   dSt,
+                  Int32*  budget )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+   Int32 sp, lo, hi, d;
+
+   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+   Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+   Int32 nextLo[3];
+   Int32 nextHi[3];
+   Int32 nextD [3];
+
+   sp = 0;
+   mpush ( loSt, hiSt, dSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
+
+      mpop ( lo, hi, d );
+      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
+          d > MAIN_QSORT_DEPTH_THRESH) {
+         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+         if (*budget < 0) return;
+         continue;
+      }
+
+      med = (Int32) 
+            mmed3 ( block[ptr[ lo         ]+d],
+                    block[ptr[ hi         ]+d],
+                    block[ptr[ (lo+hi)>>1 ]+d] );
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (True) {
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unLo]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unLo], ptr[ltLo]); 
+               ltLo++; unLo++; continue; 
+            };
+            if (n >  0) break;
+            unLo++;
+         }
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unHi]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unHi], ptr[gtHi]); 
+               gtHi--; unHi--; continue; 
+            };
+            if (n <  0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+
+      if (gtHi < ltLo) {
+         mpush(lo, hi, d+1 );
+         continue;
+      }
+
+      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
+      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
+      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+
+      mpush (nextLo[0], nextHi[0], nextD[0]);
+      mpush (nextLo[1], nextHi[1], nextD[1]);
+      mpush (nextLo[2], nextHi[2], nextD[2]);
+   }
+}
+
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > N_OVERSHOOT
+      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      All other areas of block32 destroyed
+      ftab [0 .. 65536 ] destroyed
+      ptr [0 .. nblock-1] holds sorted order
+      if (*budget < 0), sorting was abandoned
+*/
+
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+
+static
+void mainSort ( UInt32* ptr, 
+                UChar*  block,
+                UInt16* quadrant, 
+                UInt32* ftab,
+                Int32   nblock,
+                Int32   verb,
+                Int32*  budget )
+{
+   Int32  i, j, k, ss, sb;
+   Int32  runningOrder[256];
+   Bool   bigDone[256];
+   Int32  copyStart[256];
+   Int32  copyEnd  [256];
+   UChar  c1;
+   Int32  numQSorted;
+   UInt16 s;
+   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
+
+   /*-- set up the 2-byte frequency table --*/
+   for (i = 65536; i >= 0; i--) ftab[i] = 0;
+
+   j = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+      quadrant[i-1] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+      ftab[j]++;
+      quadrant[i-2] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+      ftab[j]++;
+      quadrant[i-3] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+      ftab[j]++;
+   }
+   for (; i >= 0; i--) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+   }
+
+   /*-- (emphasises close relationship of block & quadrant) --*/
+   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+      block   [nblock+i] = block[i];
+      quadrant[nblock+i] = 0;
+   }
+
+   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
+
+   /*-- Complete the initial radix sort --*/
+   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+
+   s = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+      s = (s >> 8) | (block[i-1] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-1;
+      s = (s >> 8) | (block[i-2] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-2;
+      s = (s >> 8) | (block[i-3] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-3;
+   }
+   for (; i >= 0; i--) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+   }
+
+   /*--
+      Now ftab contains the first loc of every small bucket.
+      Calculate the running order, from smallest to largest
+      big bucket.
+   --*/
+   for (i = 0; i <= 255; i++) {
+      bigDone     [i] = False;
+      runningOrder[i] = i;
+   }
+
+   {
+      Int32 vv;
+      Int32 h = 1;
+      do h = 3 * h + 1; while (h <= 256);
+      do {
+         h = h / 3;
+         for (i = h; i <= 255; i++) {
+            vv = runningOrder[i];
+            j = i;
+            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+               runningOrder[j] = runningOrder[j-h];
+               j = j - h;
+               if (j <= (h - 1)) goto zero;
+            }
+            zero:
+            runningOrder[j] = vv;
+         }
+      } while (h != 1);
+   }
+
+   /*--
+      The main sorting loop.
+   --*/
+
+   numQSorted = 0;
+
+   for (i = 0; i <= 255; i++) {
+
+      /*--
+         Process big buckets, starting with the least full.
+         Basically this is a 3-step process in which we call
+         mainQSort3 to sort the small buckets [ss, j], but
+         also make a big effort to avoid the calls if we can.
+      --*/
+      ss = runningOrder[i];
+
+      /*--
+         Step 1:
+         Complete the big bucket [ss] by quicksorting
+         any unsorted small buckets [ss, j], for j != ss.  
+         Hopefully previous pointer-scanning phases have already
+         completed many of the small buckets [ss, j], so
+         we don't have to sort them at all.
+      --*/
+      for (j = 0; j <= 255; j++) {
+         if (j != ss) {
+            sb = (ss << 8) + j;
+            if ( ! (ftab[sb] & SETMASK) ) {
+               Int32 lo = ftab[sb]   & CLEARMASK;
+               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+               if (hi > lo) {
+                  if (verb >= 4)
+                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
+                                "done %d   this %d\n",
+                                ss, j, numQSorted, hi - lo + 1 );
+                  mainQSort3 ( 
+                     ptr, block, quadrant, nblock, 
+                     lo, hi, BZ_N_RADIX, budget 
+                  );   
+                  numQSorted += (hi - lo + 1);
+                  if (*budget < 0) return;
+               }
+            }
+            ftab[sb] |= SETMASK;
+         }
+      }
+
+      AssertH ( !bigDone[ss], 1006 );
+
+      /*--
+         Step 2:
+         Now scan this big bucket [ss] so as to synthesise the
+         sorted order for small buckets [t, ss] for all t,
+         including, magically, the bucket [ss,ss] too.
+         This will avoid doing Real Work in subsequent Step 1's.
+      --*/
+      {
+         for (j = 0; j <= 255; j++) {
+            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
+            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+         }
+         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
+            if (!bigDone[c1])
+               ptr[ copyStart[c1]++ ] = k;
+         }
+         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
             if (!bigDone[c1]) 
-               ptr[ copyStart[c1]++ ] = k; 
-         } 
-         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 
-            k = ptr[j]-1; if (k < 0) k += nblock; 
-            c1 = block[k]; 
-            if (!bigDone[c1])  
-               ptr[ copyEnd[c1]-- ] = k; 
-         } 
-      } 
- 
-      AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 
-                ||  
-                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 
-                   Necessity for this case is demonstrated by compressing  
-                   a sequence of approximately 48.5 million of character  
-                   251; 1.0.0/1.0.1 will then die here. */ 
-                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 
-                1007 ) 
- 
-      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 
- 
-      /*-- 
-         Step 3: 
-         The [ss] big bucket is now done.  Record this fact, 
-         and update the quadrant descriptors.  Remember to 
-         update quadrants in the overshoot area too, if 
-         necessary.  The "if (i < 255)" test merely skips 
-         this updating for the last bucket processed, since 
-         updating for the last bucket is pointless. 
- 
-         The quadrant array provides a way to incrementally 
-         cache sort orderings, as they appear, so as to  
-         make subsequent comparisons in fullGtU() complete 
-         faster.  For repetitive blocks this makes a big 
-         difference (but not big enough to be able to avoid 
-         the fallback sorting mechanism, exponential radix sort). 
- 
-         The precise meaning is: at all times: 
- 
-            for 0 <= i < nblock and 0 <= j <= nblock 
- 
-            if block[i] != block[j],  
- 
-               then the relative values of quadrant[i] and  
-                    quadrant[j] are meaningless. 
- 
-               else { 
-                  if quadrant[i] < quadrant[j] 
-                     then the string starting at i lexicographically 
-                     precedes the string starting at j 
- 
-                  else if quadrant[i] > quadrant[j] 
-                     then the string starting at j lexicographically 
-                     precedes the string starting at i 
- 
-                  else 
-                     the relative ordering of the strings starting 
-                     at i and j has not yet been determined. 
-               } 
-      --*/ 
-      bigDone[ss] = True; 
- 
-      if (i < 255) { 
-         Int32 bbStart  = ftab[ss << 8] & CLEARMASK; 
-         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 
-         Int32 shifts   = 0; 
- 
-         while ((bbSize >> shifts) > 65534) shifts++; 
- 
-         for (j = bbSize-1; j >= 0; j--) { 
-            Int32 a2update     = ptr[bbStart + j]; 
-            UInt16 qVal        = (UInt16)(j >> shifts); 
-            quadrant[a2update] = qVal; 
-            if (a2update < BZ_N_OVERSHOOT) 
-               quadrant[a2update + nblock] = qVal; 
-         } 
-         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 
-      } 
- 
-   } 
- 
-   if (verb >= 4) 
-      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n", 
-                 nblock, numQSorted, nblock - numQSorted ); 
-} 
- 
-#undef BIGFREQ 
-#undef SETMASK 
-#undef CLEARMASK 
- 
- 
-/*---------------------------------------------*/ 
-/* Pre: 
-      nblock > 0 
-      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 
-      ((UChar*)arr2)  [0 .. nblock-1] holds block 
-      arr1 exists for [0 .. nblock-1] 
- 
-   Post: 
-      ((UChar*)arr2) [0 .. nblock-1] holds block 
-      All other areas of block destroyed 
-      ftab [ 0 .. 65536 ] destroyed 
-      arr1 [0 .. nblock-1] holds sorted order 
-*/ 
-void BZ2_blockSort ( EState* s ) 
-{ 
-   UInt32* ptr    = s->ptr;  
-   UChar*  block  = s->block; 
-   UInt32* ftab   = s->ftab; 
-   Int32   nblock = s->nblock; 
-   Int32   verb   = s->verbosity; 
-   Int32   wfact  = s->workFactor; 
-   UInt16* quadrant; 
-   Int32   budget; 
-   Int32   budgetInit; 
-   Int32   i; 
- 
-   if (nblock < 10000) { 
-      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 
-   } else { 
-      /* Calculate the location for quadrant, remembering to get 
-         the alignment right.  Assumes that &(block[0]) is at least 
-         2-byte aligned -- this should be ok since block is really 
-         the first section of arr2. 
-      */ 
-      i = nblock+BZ_N_OVERSHOOT; 
-      if (i & 1) i++; 
-      quadrant = (UInt16*)(&(block[i])); 
- 
-      /* (wfact-1) / 3 puts the default-factor-30 
-         transition point at very roughly the same place as  
-         with v0.1 and v0.9.0.   
-         Not that it particularly matters any more, since the 
-         resulting compressed stream is now the same regardless 
-         of whether or not we use the main sort or fallback sort. 
-      */ 
-      if (wfact < 1  ) wfact = 1; 
-      if (wfact > 100) wfact = 100; 
-      budgetInit = nblock * ((wfact-1) / 3); 
-      budget = budgetInit; 
- 
-      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 
-      if (verb >= 3)  
-         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n", 
-                    budgetInit - budget, 
-                    nblock,  
-                    (float)(budgetInit - budget) / 
-                    (float)(nblock==0 ? 1 : nblock) );  
-      if (budget < 0) { 
-         if (verb >= 2)  
-            VPrintf0 ( "    too repetitive; using fallback" 
-                       " sorting algorithm\n" ); 
-         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 
-      } 
-   } 
- 
-   s->origPtr = -1; 
-   for (i = 0; i < s->nblock; i++) 
-      if (ptr[i] == 0) 
-         { s->origPtr = i; break; }; 
- 
-   AssertH( s->origPtr != -1, 1003 ); 
-} 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                       blocksort.c ---*/ 
-/*-------------------------------------------------------------*/ 
+               ptr[ copyEnd[c1]-- ] = k;
+         }
+      }
+
+      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+                || 
+                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+                   Necessity for this case is demonstrated by compressing 
+                   a sequence of approximately 48.5 million of character 
+                   251; 1.0.0/1.0.1 will then die here. */
+                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+                1007 )
+
+      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
+      /*--
+         Step 3:
+         The [ss] big bucket is now done.  Record this fact,
+         and update the quadrant descriptors.  Remember to
+         update quadrants in the overshoot area too, if
+         necessary.  The "if (i < 255)" test merely skips
+         this updating for the last bucket processed, since
+         updating for the last bucket is pointless.
+
+         The quadrant array provides a way to incrementally
+         cache sort orderings, as they appear, so as to 
+         make subsequent comparisons in fullGtU() complete
+         faster.  For repetitive blocks this makes a big
+         difference (but not big enough to be able to avoid
+         the fallback sorting mechanism, exponential radix sort).
+
+         The precise meaning is: at all times:
+
+            for 0 <= i < nblock and 0 <= j <= nblock
+
+            if block[i] != block[j], 
+
+               then the relative values of quadrant[i] and 
+                    quadrant[j] are meaningless.
+
+               else {
+                  if quadrant[i] < quadrant[j]
+                     then the string starting at i lexicographically
+                     precedes the string starting at j
+
+                  else if quadrant[i] > quadrant[j]
+                     then the string starting at j lexicographically
+                     precedes the string starting at i
+
+                  else
+                     the relative ordering of the strings starting
+                     at i and j has not yet been determined.
+               }
+      --*/
+      bigDone[ss] = True;
+
+      if (i < 255) {
+         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
+         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+         Int32 shifts   = 0;
+
+         while ((bbSize >> shifts) > 65534) shifts++;
+
+         for (j = bbSize-1; j >= 0; j--) {
+            Int32 a2update     = ptr[bbStart + j];
+            UInt16 qVal        = (UInt16)(j >> shifts);
+            quadrant[a2update] = qVal;
+            if (a2update < BZ_N_OVERSHOOT)
+               quadrant[a2update + nblock] = qVal;
+         }
+         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+      }
+
+   }
+
+   if (verb >= 4)
+      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
+                 nblock, numQSorted, nblock - numQSorted );
+}
+
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)arr2)  [0 .. nblock-1] holds block
+      arr1 exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)arr2) [0 .. nblock-1] holds block
+      All other areas of block destroyed
+      ftab [ 0 .. 65536 ] destroyed
+      arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+   UInt32* ptr    = s->ptr; 
+   UChar*  block  = s->block;
+   UInt32* ftab   = s->ftab;
+   Int32   nblock = s->nblock;
+   Int32   verb   = s->verbosity;
+   Int32   wfact  = s->workFactor;
+   UInt16* quadrant;
+   Int32   budget;
+   Int32   budgetInit;
+   Int32   i;
+
+   if (nblock < 10000) {
+      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+   } else {
+      /* Calculate the location for quadrant, remembering to get
+         the alignment right.  Assumes that &(block[0]) is at least
+         2-byte aligned -- this should be ok since block is really
+         the first section of arr2.
+      */
+      i = nblock+BZ_N_OVERSHOOT;
+      if (i & 1) i++;
+      quadrant = (UInt16*)(&(block[i]));
+
+      /* (wfact-1) / 3 puts the default-factor-30
+         transition point at very roughly the same place as 
+         with v0.1 and v0.9.0.  
+         Not that it particularly matters any more, since the
+         resulting compressed stream is now the same regardless
+         of whether or not we use the main sort or fallback sort.
+      */
+      if (wfact < 1  ) wfact = 1;
+      if (wfact > 100) wfact = 100;
+      budgetInit = nblock * ((wfact-1) / 3);
+      budget = budgetInit;
+
+      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+      if (verb >= 3) 
+         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
+                    budgetInit - budget,
+                    nblock, 
+                    (float)(budgetInit - budget) /
+                    (float)(nblock==0 ? 1 : nblock) ); 
+      if (budget < 0) {
+         if (verb >= 2) 
+            VPrintf0 ( "    too repetitive; using fallback"
+                       " sorting algorithm\n" );
+         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+      }
+   }
+
+   s->origPtr = -1;
+   for (i = 0; i < s->nblock; i++)
+      if (ptr[i] == 0)
+         { s->origPtr = i; break; };
+
+   AssertH( s->origPtr != -1, 1003 );
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       blocksort.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/bzlib.c b/contrib/libs/libbz2/bzlib.c
index ee4ec6e8ef..21786551b6 100644
--- a/contrib/libs/libbz2/bzlib.c
+++ b/contrib/libs/libbz2/bzlib.c
@@ -1,1572 +1,1572 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Library top-level functions.                          ---*/ 
-/*---                                               bzlib.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Library top-level functions.                          ---*/
+/*---                                               bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
-   Please read the WARNING, DISCLAIMER and PATENTS sections in the  
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
-/* CHANGES 
-   0.9.0    -- original version. 
-   0.9.0a/b -- no changes in this file. 
-   0.9.0c   -- made zero-length BZ_FLUSH work correctly in bzCompress(). 
-     fixed bzWrite/bzRead to ignore zero-length requests. 
-     fixed bzread to correctly handle read requests after EOF. 
-     wrong parameter order in call to bzDecompressInit in 
-     bzBuffToBuffDecompress.  Fixed. 
-*/ 
- 
-#include "bzlib_private.h" 
- 
- 
-/*---------------------------------------------------*/ 
-/*--- Compression stuff                           ---*/ 
-/*---------------------------------------------------*/ 
- 
- 
-/*---------------------------------------------------*/ 
-#ifndef BZ_NO_STDIO 
-void BZ2_bz__AssertH__fail ( int errcode ) 
-{ 
-   fprintf(stderr,  
-      "\n\nbzip2/libbzip2: internal error number %d.\n" 
-      "This is a bug in bzip2/libbzip2, %s.\n" 
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+/* CHANGES
+   0.9.0    -- original version.
+   0.9.0a/b -- no changes in this file.
+   0.9.0c   -- made zero-length BZ_FLUSH work correctly in bzCompress().
+     fixed bzWrite/bzRead to ignore zero-length requests.
+     fixed bzread to correctly handle read requests after EOF.
+     wrong parameter order in call to bzDecompressInit in
+     bzBuffToBuffDecompress.  Fixed.
+*/
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+/*--- Compression stuff                           ---*/
+/*---------------------------------------------------*/
+
+
+/*---------------------------------------------------*/
+#ifndef BZ_NO_STDIO
+void BZ2_bz__AssertH__fail ( int errcode )
+{
+   fprintf(stderr, 
+      "\n\nbzip2/libbzip2: internal error number %d.\n"
+      "This is a bug in bzip2/libbzip2, %s.\n"
       "Please report it to: bzip2-devel@sourceware.org.  If this happened\n"
-      "when you were using some program which uses libbzip2 as a\n" 
-      "component, you should also report this bug to the author(s)\n" 
-      "of that program.  Please make an effort to report this bug;\n" 
-      "timely and accurate bug reports eventually lead to higher\n" 
+      "when you were using some program which uses libbzip2 as a\n"
+      "component, you should also report this bug to the author(s)\n"
+      "of that program.  Please make an effort to report this bug;\n"
+      "timely and accurate bug reports eventually lead to higher\n"
       "quality software.  Thanks.\n\n",
-      errcode, 
-      BZ2_bzlibVersion() 
-   ); 
- 
-   if (errcode == 1007) { 
-   fprintf(stderr, 
-      "\n*** A special note about internal error number 1007 ***\n" 
-      "\n" 
-      "Experience suggests that a common cause of i.e. 1007\n" 
-      "is unreliable memory or other hardware.  The 1007 assertion\n" 
-      "just happens to cross-check the results of huge numbers of\n" 
-      "memory reads/writes, and so acts (unintendedly) as a stress\n" 
-      "test of your memory system.\n" 
-      "\n" 
-      "I suggest the following: try compressing the file again,\n" 
-      "possibly monitoring progress in detail with the -vv flag.\n" 
-      "\n" 
-      "* If the error cannot be reproduced, and/or happens at different\n" 
-      "  points in compression, you may have a flaky memory system.\n" 
-      "  Try a memory-test program.  I have used Memtest86\n" 
-      "  (www.memtest86.com).  At the time of writing it is free (GPLd).\n" 
-      "  Memtest86 tests memory much more thorougly than your BIOSs\n" 
-      "  power-on test, and may find failures that the BIOS doesn't.\n" 
-      "\n" 
-      "* If the error can be repeatably reproduced, this is a bug in\n" 
-      "  bzip2, and I would very much like to hear about it.  Please\n" 
-      "  let me know, and, ideally, save a copy of the file causing the\n" 
-      "  problem -- without which I will be unable to investigate it.\n" 
-      "\n" 
-   ); 
-   } 
- 
-   exit(3); 
-} 
-#endif 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-int bz_config_ok ( void ) 
-{ 
-   if (sizeof(int)   != 4) return 0; 
-   if (sizeof(short) != 2) return 0; 
-   if (sizeof(char)  != 1) return 0; 
-   return 1; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) 
-{ 
-   void* v = malloc ( items * size ); 
-   return v; 
-} 
- 
-static 
-void default_bzfree ( void* opaque, void* addr ) 
-{ 
-   if (addr != NULL) free ( addr ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void prepare_new_block ( EState* s ) 
-{ 
-   Int32 i; 
-   s->nblock = 0; 
-   s->numZ = 0; 
-   s->state_out_pos = 0; 
-   BZ_INITIALISE_CRC ( s->blockCRC ); 
-   for (i = 0; i < 256; i++) s->inUse[i] = False; 
-   s->blockNo++; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void init_RL ( EState* s ) 
-{ 
-   s->state_in_ch  = 256; 
-   s->state_in_len = 0; 
-} 
- 
- 
-static 
-Bool isempty_RL ( EState* s ) 
-{ 
-   if (s->state_in_ch < 256 && s->state_in_len > 0) 
-      return False; else 
-      return True; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzCompressInit)  
-                    ( bz_stream* strm,  
-                     int        blockSize100k, 
-                     int        verbosity, 
-                     int        workFactor ) 
-{ 
-   Int32   n; 
-   EState* s; 
- 
-   if (!bz_config_ok()) return BZ_CONFIG_ERROR; 
- 
-   if (strm == NULL ||  
-       blockSize100k < 1 || blockSize100k > 9 || 
-       workFactor < 0 || workFactor > 250) 
-     return BZ_PARAM_ERROR; 
- 
-   if (workFactor == 0) workFactor = 30; 
-   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; 
-   if (strm->bzfree == NULL) strm->bzfree = default_bzfree; 
- 
-   s = BZALLOC( sizeof(EState) ); 
-   if (s == NULL) return BZ_MEM_ERROR; 
-   s->strm = strm; 
- 
-   s->arr1 = NULL; 
-   s->arr2 = NULL; 
-   s->ftab = NULL; 
- 
-   n       = 100000 * blockSize100k; 
-   s->arr1 = BZALLOC( n                  * sizeof(UInt32) ); 
-   s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); 
-   s->ftab = BZALLOC( 65537              * sizeof(UInt32) ); 
- 
-   if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { 
-      if (s->arr1 != NULL) BZFREE(s->arr1); 
-      if (s->arr2 != NULL) BZFREE(s->arr2); 
-      if (s->ftab != NULL) BZFREE(s->ftab); 
-      if (s       != NULL) BZFREE(s); 
-      return BZ_MEM_ERROR; 
-   } 
- 
-   s->blockNo           = 0; 
-   s->state             = BZ_S_INPUT; 
-   s->mode              = BZ_M_RUNNING; 
-   s->combinedCRC       = 0; 
-   s->blockSize100k     = blockSize100k; 
-   s->nblockMAX         = 100000 * blockSize100k - 19; 
-   s->verbosity         = verbosity; 
-   s->workFactor        = workFactor; 
- 
-   s->block             = (UChar*)s->arr2; 
-   s->mtfv              = (UInt16*)s->arr1; 
-   s->zbits             = NULL; 
-   s->ptr               = (UInt32*)s->arr1; 
- 
-   strm->state          = s; 
-   strm->total_in_lo32  = 0; 
-   strm->total_in_hi32  = 0; 
-   strm->total_out_lo32 = 0; 
-   strm->total_out_hi32 = 0; 
-   init_RL ( s ); 
-   prepare_new_block ( s ); 
-   return BZ_OK; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void add_pair_to_block ( EState* s ) 
-{ 
-   Int32 i; 
-   UChar ch = (UChar)(s->state_in_ch); 
-   for (i = 0; i < s->state_in_len; i++) { 
-      BZ_UPDATE_CRC( s->blockCRC, ch ); 
-   } 
-   s->inUse[s->state_in_ch] = True; 
-   switch (s->state_in_len) { 
-      case 1: 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         break; 
-      case 2: 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         break; 
-      case 3: 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         break; 
-      default: 
-         s->inUse[s->state_in_len-4] = True; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = (UChar)ch; s->nblock++; 
-         s->block[s->nblock] = ((UChar)(s->state_in_len-4)); 
-         s->nblock++; 
-         break; 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void flush_RL ( EState* s ) 
-{ 
-   if (s->state_in_ch < 256) add_pair_to_block ( s ); 
-   init_RL ( s ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-#define ADD_CHAR_TO_BLOCK(zs,zchh0)               \ 
-{                                                 \ 
-   UInt32 zchh = (UInt32)(zchh0);                 \ 
-   /*-- fast track the common case --*/           \ 
-   if (zchh != zs->state_in_ch &&                 \ 
-       zs->state_in_len == 1) {                   \ 
-      UChar ch = (UChar)(zs->state_in_ch);        \ 
-      BZ_UPDATE_CRC( zs->blockCRC, ch );          \ 
-      zs->inUse[zs->state_in_ch] = True;          \ 
-      zs->block[zs->nblock] = (UChar)ch;          \ 
-      zs->nblock++;                               \ 
-      zs->state_in_ch = zchh;                     \ 
-   }                                              \ 
-   else                                           \ 
-   /*-- general, uncommon cases --*/              \ 
-   if (zchh != zs->state_in_ch ||                 \ 
-      zs->state_in_len == 255) {                  \ 
-      if (zs->state_in_ch < 256)                  \ 
-         add_pair_to_block ( zs );                \ 
-      zs->state_in_ch = zchh;                     \ 
-      zs->state_in_len = 1;                       \ 
-   } else {                                       \ 
-      zs->state_in_len++;                         \ 
-   }                                              \ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-Bool copy_input_until_stop ( EState* s ) 
-{ 
-   Bool progress_in = False; 
- 
-   if (s->mode == BZ_M_RUNNING) { 
- 
-      /*-- fast track the common case --*/ 
-      while (True) { 
-         /*-- block full? --*/ 
-         if (s->nblock >= s->nblockMAX) break; 
-         /*-- no input? --*/ 
-         if (s->strm->avail_in == 0) break; 
-         progress_in = True; 
-         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );  
-         s->strm->next_in++; 
-         s->strm->avail_in--; 
-         s->strm->total_in_lo32++; 
-         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; 
-      } 
- 
-   } else { 
- 
-      /*-- general, uncommon case --*/ 
-      while (True) { 
-         /*-- block full? --*/ 
-         if (s->nblock >= s->nblockMAX) break; 
-         /*-- no input? --*/ 
-         if (s->strm->avail_in == 0) break; 
-         /*-- flush/finish end? --*/ 
-         if (s->avail_in_expect == 0) break; 
-         progress_in = True; 
-         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );  
-         s->strm->next_in++; 
-         s->strm->avail_in--; 
-         s->strm->total_in_lo32++; 
-         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; 
-         s->avail_in_expect--; 
-      } 
-   } 
-   return progress_in; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-Bool copy_output_until_stop ( EState* s ) 
-{ 
-   Bool progress_out = False; 
- 
-   while (True) { 
- 
-      /*-- no output space? --*/ 
-      if (s->strm->avail_out == 0) break; 
- 
-      /*-- block done? --*/ 
-      if (s->state_out_pos >= s->numZ) break; 
- 
-      progress_out = True; 
-      *(s->strm->next_out) = s->zbits[s->state_out_pos]; 
-      s->state_out_pos++; 
-      s->strm->avail_out--; 
-      s->strm->next_out++; 
-      s->strm->total_out_lo32++; 
-      if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 
-   } 
- 
-   return progress_out; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-Bool handle_compress ( bz_stream* strm ) 
-{ 
-   Bool progress_in  = False; 
-   Bool progress_out = False; 
-   EState* s = strm->state; 
-    
-   while (True) { 
- 
-      if (s->state == BZ_S_OUTPUT) { 
-         progress_out |= copy_output_until_stop ( s ); 
-         if (s->state_out_pos < s->numZ) break; 
-         if (s->mode == BZ_M_FINISHING &&  
-             s->avail_in_expect == 0 && 
-             isempty_RL(s)) break; 
-         prepare_new_block ( s ); 
-         s->state = BZ_S_INPUT; 
-         if (s->mode == BZ_M_FLUSHING &&  
-             s->avail_in_expect == 0 && 
-             isempty_RL(s)) break; 
-      } 
- 
-      if (s->state == BZ_S_INPUT) { 
-         progress_in |= copy_input_until_stop ( s ); 
-         if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { 
-            flush_RL ( s ); 
-            BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); 
-            s->state = BZ_S_OUTPUT; 
+      errcode,
+      BZ2_bzlibVersion()
+   );
+
+   if (errcode == 1007) {
+   fprintf(stderr,
+      "\n*** A special note about internal error number 1007 ***\n"
+      "\n"
+      "Experience suggests that a common cause of i.e. 1007\n"
+      "is unreliable memory or other hardware.  The 1007 assertion\n"
+      "just happens to cross-check the results of huge numbers of\n"
+      "memory reads/writes, and so acts (unintendedly) as a stress\n"
+      "test of your memory system.\n"
+      "\n"
+      "I suggest the following: try compressing the file again,\n"
+      "possibly monitoring progress in detail with the -vv flag.\n"
+      "\n"
+      "* If the error cannot be reproduced, and/or happens at different\n"
+      "  points in compression, you may have a flaky memory system.\n"
+      "  Try a memory-test program.  I have used Memtest86\n"
+      "  (www.memtest86.com).  At the time of writing it is free (GPLd).\n"
+      "  Memtest86 tests memory much more thorougly than your BIOSs\n"
+      "  power-on test, and may find failures that the BIOS doesn't.\n"
+      "\n"
+      "* If the error can be repeatably reproduced, this is a bug in\n"
+      "  bzip2, and I would very much like to hear about it.  Please\n"
+      "  let me know, and, ideally, save a copy of the file causing the\n"
+      "  problem -- without which I will be unable to investigate it.\n"
+      "\n"
+   );
+   }
+
+   exit(3);
+}
+#endif
+
+
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+   if (sizeof(int)   != 4) return 0;
+   if (sizeof(short) != 2) return 0;
+   if (sizeof(char)  != 1) return 0;
+   return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
+void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
+{
+   void* v = malloc ( items * size );
+   return v;
+}
+
+static
+void default_bzfree ( void* opaque, void* addr )
+{
+   if (addr != NULL) free ( addr );
+}
+
+
+/*---------------------------------------------------*/
+static
+void prepare_new_block ( EState* s )
+{
+   Int32 i;
+   s->nblock = 0;
+   s->numZ = 0;
+   s->state_out_pos = 0;
+   BZ_INITIALISE_CRC ( s->blockCRC );
+   for (i = 0; i < 256; i++) s->inUse[i] = False;
+   s->blockNo++;
+}
+
+
+/*---------------------------------------------------*/
+static
+void init_RL ( EState* s )
+{
+   s->state_in_ch  = 256;
+   s->state_in_len = 0;
+}
+
+
+static
+Bool isempty_RL ( EState* s )
+{
+   if (s->state_in_ch < 256 && s->state_in_len > 0)
+      return False; else
+      return True;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressInit) 
+                    ( bz_stream* strm, 
+                     int        blockSize100k,
+                     int        verbosity,
+                     int        workFactor )
+{
+   Int32   n;
+   EState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL || 
+       blockSize100k < 1 || blockSize100k > 9 ||
+       workFactor < 0 || workFactor > 250)
+     return BZ_PARAM_ERROR;
+
+   if (workFactor == 0) workFactor = 30;
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(EState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm = strm;
+
+   s->arr1 = NULL;
+   s->arr2 = NULL;
+   s->ftab = NULL;
+
+   n       = 100000 * blockSize100k;
+   s->arr1 = BZALLOC( n                  * sizeof(UInt32) );
+   s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
+   s->ftab = BZALLOC( 65537              * sizeof(UInt32) );
+
+   if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
+      if (s->arr1 != NULL) BZFREE(s->arr1);
+      if (s->arr2 != NULL) BZFREE(s->arr2);
+      if (s->ftab != NULL) BZFREE(s->ftab);
+      if (s       != NULL) BZFREE(s);
+      return BZ_MEM_ERROR;
+   }
+
+   s->blockNo           = 0;
+   s->state             = BZ_S_INPUT;
+   s->mode              = BZ_M_RUNNING;
+   s->combinedCRC       = 0;
+   s->blockSize100k     = blockSize100k;
+   s->nblockMAX         = 100000 * blockSize100k - 19;
+   s->verbosity         = verbosity;
+   s->workFactor        = workFactor;
+
+   s->block             = (UChar*)s->arr2;
+   s->mtfv              = (UInt16*)s->arr1;
+   s->zbits             = NULL;
+   s->ptr               = (UInt32*)s->arr1;
+
+   strm->state          = s;
+   strm->total_in_lo32  = 0;
+   strm->total_in_hi32  = 0;
+   strm->total_out_lo32 = 0;
+   strm->total_out_hi32 = 0;
+   init_RL ( s );
+   prepare_new_block ( s );
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+static
+void add_pair_to_block ( EState* s )
+{
+   Int32 i;
+   UChar ch = (UChar)(s->state_in_ch);
+   for (i = 0; i < s->state_in_len; i++) {
+      BZ_UPDATE_CRC( s->blockCRC, ch );
+   }
+   s->inUse[s->state_in_ch] = True;
+   switch (s->state_in_len) {
+      case 1:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      case 2:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      case 3:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      default:
+         s->inUse[s->state_in_len-4] = True;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = ((UChar)(s->state_in_len-4));
+         s->nblock++;
+         break;
+   }
+}
+
+
+/*---------------------------------------------------*/
+static
+void flush_RL ( EState* s )
+{
+   if (s->state_in_ch < 256) add_pair_to_block ( s );
+   init_RL ( s );
+}
+
+
+/*---------------------------------------------------*/
+#define ADD_CHAR_TO_BLOCK(zs,zchh0)               \
+{                                                 \
+   UInt32 zchh = (UInt32)(zchh0);                 \
+   /*-- fast track the common case --*/           \
+   if (zchh != zs->state_in_ch &&                 \
+       zs->state_in_len == 1) {                   \
+      UChar ch = (UChar)(zs->state_in_ch);        \
+      BZ_UPDATE_CRC( zs->blockCRC, ch );          \
+      zs->inUse[zs->state_in_ch] = True;          \
+      zs->block[zs->nblock] = (UChar)ch;          \
+      zs->nblock++;                               \
+      zs->state_in_ch = zchh;                     \
+   }                                              \
+   else                                           \
+   /*-- general, uncommon cases --*/              \
+   if (zchh != zs->state_in_ch ||                 \
+      zs->state_in_len == 255) {                  \
+      if (zs->state_in_ch < 256)                  \
+         add_pair_to_block ( zs );                \
+      zs->state_in_ch = zchh;                     \
+      zs->state_in_len = 1;                       \
+   } else {                                       \
+      zs->state_in_len++;                         \
+   }                                              \
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_input_until_stop ( EState* s )
+{
+   Bool progress_in = False;
+
+   if (s->mode == BZ_M_RUNNING) {
+
+      /*-- fast track the common case --*/
+      while (True) {
+         /*-- block full? --*/
+         if (s->nblock >= s->nblockMAX) break;
+         /*-- no input? --*/
+         if (s->strm->avail_in == 0) break;
+         progress_in = True;
+         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 
+         s->strm->next_in++;
+         s->strm->avail_in--;
+         s->strm->total_in_lo32++;
+         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+      }
+
+   } else {
+
+      /*-- general, uncommon case --*/
+      while (True) {
+         /*-- block full? --*/
+         if (s->nblock >= s->nblockMAX) break;
+         /*-- no input? --*/
+         if (s->strm->avail_in == 0) break;
+         /*-- flush/finish end? --*/
+         if (s->avail_in_expect == 0) break;
+         progress_in = True;
+         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 
+         s->strm->next_in++;
+         s->strm->avail_in--;
+         s->strm->total_in_lo32++;
+         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+         s->avail_in_expect--;
+      }
+   }
+   return progress_in;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_output_until_stop ( EState* s )
+{
+   Bool progress_out = False;
+
+   while (True) {
+
+      /*-- no output space? --*/
+      if (s->strm->avail_out == 0) break;
+
+      /*-- block done? --*/
+      if (s->state_out_pos >= s->numZ) break;
+
+      progress_out = True;
+      *(s->strm->next_out) = s->zbits[s->state_out_pos];
+      s->state_out_pos++;
+      s->strm->avail_out--;
+      s->strm->next_out++;
+      s->strm->total_out_lo32++;
+      if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+   }
+
+   return progress_out;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool handle_compress ( bz_stream* strm )
+{
+   Bool progress_in  = False;
+   Bool progress_out = False;
+   EState* s = strm->state;
+   
+   while (True) {
+
+      if (s->state == BZ_S_OUTPUT) {
+         progress_out |= copy_output_until_stop ( s );
+         if (s->state_out_pos < s->numZ) break;
+         if (s->mode == BZ_M_FINISHING && 
+             s->avail_in_expect == 0 &&
+             isempty_RL(s)) break;
+         prepare_new_block ( s );
+         s->state = BZ_S_INPUT;
+         if (s->mode == BZ_M_FLUSHING && 
+             s->avail_in_expect == 0 &&
+             isempty_RL(s)) break;
+      }
+
+      if (s->state == BZ_S_INPUT) {
+         progress_in |= copy_input_until_stop ( s );
+         if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
+            flush_RL ( s );
+            BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
+            s->state = BZ_S_OUTPUT;
+         }
+         else
+         if (s->nblock >= s->nblockMAX) {
+            BZ2_compressBlock ( s, False );
+            s->state = BZ_S_OUTPUT;
+         }
+         else
+         if (s->strm->avail_in == 0) {
+            break;
+         }
+      }
+
+   }
+
+   return progress_in || progress_out;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
+{
+   Bool progress;
+   EState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   preswitch:
+   switch (s->mode) {
+
+      case BZ_M_IDLE:
+         return BZ_SEQUENCE_ERROR;
+
+      case BZ_M_RUNNING:
+         if (action == BZ_RUN) {
+            progress = handle_compress ( strm );
+            return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;
          } 
+         else
+	 if (action == BZ_FLUSH) {
+            s->avail_in_expect = strm->avail_in;
+            s->mode = BZ_M_FLUSHING;
+            goto preswitch;
+         }
+         else
+         if (action == BZ_FINISH) {
+            s->avail_in_expect = strm->avail_in;
+            s->mode = BZ_M_FINISHING;
+            goto preswitch;
+         }
          else 
-         if (s->nblock >= s->nblockMAX) { 
-            BZ2_compressBlock ( s, False ); 
-            s->state = BZ_S_OUTPUT; 
-         } 
-         else 
-         if (s->strm->avail_in == 0) { 
-            break; 
-         } 
-      } 
- 
-   } 
- 
-   return progress_in || progress_out; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) 
-{ 
-   Bool progress; 
-   EState* s; 
-   if (strm == NULL) return BZ_PARAM_ERROR; 
-   s = strm->state; 
-   if (s == NULL) return BZ_PARAM_ERROR; 
-   if (s->strm != strm) return BZ_PARAM_ERROR; 
- 
-   preswitch: 
-   switch (s->mode) { 
- 
-      case BZ_M_IDLE: 
-         return BZ_SEQUENCE_ERROR; 
- 
-      case BZ_M_RUNNING: 
-         if (action == BZ_RUN) { 
-            progress = handle_compress ( strm ); 
-            return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; 
-         }  
-         else 
-	 if (action == BZ_FLUSH) { 
-            s->avail_in_expect = strm->avail_in; 
-            s->mode = BZ_M_FLUSHING; 
-            goto preswitch; 
-         } 
-         else 
-         if (action == BZ_FINISH) { 
-            s->avail_in_expect = strm->avail_in; 
-            s->mode = BZ_M_FINISHING; 
-            goto preswitch; 
-         } 
-         else  
-            return BZ_PARAM_ERROR; 
- 
-      case BZ_M_FLUSHING: 
-         if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; 
-         if (s->avail_in_expect != s->strm->avail_in)  
-            return BZ_SEQUENCE_ERROR; 
-         progress = handle_compress ( strm ); 
-         if (s->avail_in_expect > 0 || !isempty_RL(s) || 
-             s->state_out_pos < s->numZ) return BZ_FLUSH_OK; 
-         s->mode = BZ_M_RUNNING; 
-         return BZ_RUN_OK; 
- 
-      case BZ_M_FINISHING: 
-         if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; 
-         if (s->avail_in_expect != s->strm->avail_in)  
-            return BZ_SEQUENCE_ERROR; 
-         progress = handle_compress ( strm ); 
-         if (!progress) return BZ_SEQUENCE_ERROR; 
-         if (s->avail_in_expect > 0 || !isempty_RL(s) || 
-             s->state_out_pos < s->numZ) return BZ_FINISH_OK; 
-         s->mode = BZ_M_IDLE; 
-         return BZ_STREAM_END; 
-   } 
-   return BZ_OK; /*--not reached--*/ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzCompressEnd)  ( bz_stream *strm ) 
-{ 
-   EState* s; 
-   if (strm == NULL) return BZ_PARAM_ERROR; 
-   s = strm->state; 
-   if (s == NULL) return BZ_PARAM_ERROR; 
-   if (s->strm != strm) return BZ_PARAM_ERROR; 
- 
-   if (s->arr1 != NULL) BZFREE(s->arr1); 
-   if (s->arr2 != NULL) BZFREE(s->arr2); 
-   if (s->ftab != NULL) BZFREE(s->ftab); 
-   BZFREE(strm->state); 
- 
-   strm->state = NULL;    
- 
-   return BZ_OK; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/*--- Decompression stuff                         ---*/ 
-/*---------------------------------------------------*/ 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzDecompressInit)  
-                     ( bz_stream* strm,  
-                       int        verbosity, 
-                       int        small ) 
-{ 
-   DState* s; 
- 
-   if (!bz_config_ok()) return BZ_CONFIG_ERROR; 
- 
-   if (strm == NULL) return BZ_PARAM_ERROR; 
-   if (small != 0 && small != 1) return BZ_PARAM_ERROR; 
-   if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; 
- 
-   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; 
-   if (strm->bzfree == NULL) strm->bzfree = default_bzfree; 
- 
-   s = BZALLOC( sizeof(DState) ); 
-   if (s == NULL) return BZ_MEM_ERROR; 
-   s->strm                  = strm; 
-   strm->state              = s; 
-   s->state                 = BZ_X_MAGIC_1; 
-   s->bsLive                = 0; 
-   s->bsBuff                = 0; 
-   s->calculatedCombinedCRC = 0; 
-   strm->total_in_lo32      = 0; 
-   strm->total_in_hi32      = 0; 
-   strm->total_out_lo32     = 0; 
-   strm->total_out_hi32     = 0; 
-   s->smallDecompress       = (Bool)small; 
-   s->ll4                   = NULL; 
-   s->ll16                  = NULL; 
-   s->tt                    = NULL; 
-   s->currBlockNo           = 0; 
-   s->verbosity             = verbosity; 
- 
-   return BZ_OK; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/* Return  True iff data corruption is discovered. 
-   Returns False if there is no problem. 
-*/ 
-static 
-Bool unRLE_obuf_to_output_FAST ( DState* s ) 
-{ 
-   UChar k1; 
- 
-   if (s->blockRandomised) { 
- 
-      while (True) { 
-         /* try to finish existing run */ 
-         while (True) { 
-            if (s->strm->avail_out == 0) return False; 
-            if (s->state_out_len == 0) break; 
-            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 
-            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 
-            s->state_out_len--; 
-            s->strm->next_out++; 
-            s->strm->avail_out--; 
-            s->strm->total_out_lo32++; 
-            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 
-         } 
- 
-         /* can a new run be started? */ 
-         if (s->nblock_used == s->save_nblock+1) return False; 
-                
-         /* Only caused by corrupt data stream? */ 
-         if (s->nblock_used > s->save_nblock+1) 
-            return True; 
-    
-         s->state_out_len = 1; 
-         s->state_out_ch = s->k0; 
-         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         s->state_out_len = 2; 
-         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         s->state_out_len = 3; 
-         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         s->state_out_len = ((Int32)k1) + 4; 
-         BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK;  
-         s->k0 ^= BZ_RAND_MASK; s->nblock_used++; 
-      } 
- 
-   } else { 
- 
-      /* restore */ 
-      UInt32        c_calculatedBlockCRC = s->calculatedBlockCRC; 
-      UChar         c_state_out_ch       = s->state_out_ch; 
-      Int32         c_state_out_len      = s->state_out_len; 
-      Int32         c_nblock_used        = s->nblock_used; 
-      Int32         c_k0                 = s->k0; 
-      UInt32*       c_tt                 = s->tt; 
-      UInt32        c_tPos               = s->tPos; 
-      char*         cs_next_out          = s->strm->next_out; 
-      unsigned int  cs_avail_out         = s->strm->avail_out; 
-      Int32         ro_blockSize100k     = s->blockSize100k; 
-      /* end restore */ 
- 
-      UInt32       avail_out_INIT = cs_avail_out; 
-      Int32        s_save_nblockPP = s->save_nblock+1; 
-      unsigned int total_out_lo32_old; 
- 
-      while (True) { 
- 
-         /* try to finish existing run */ 
-         if (c_state_out_len > 0) { 
-            while (True) { 
-               if (cs_avail_out == 0) goto return_notr; 
-               if (c_state_out_len == 1) break; 
-               *( (UChar*)(cs_next_out) ) = c_state_out_ch; 
-               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); 
-               c_state_out_len--; 
-               cs_next_out++; 
-               cs_avail_out--; 
-            } 
-            s_state_out_len_eq_one: 
-            { 
-               if (cs_avail_out == 0) {  
-                  c_state_out_len = 1; goto return_notr; 
-               }; 
-               *( (UChar*)(cs_next_out) ) = c_state_out_ch; 
-               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); 
-               cs_next_out++; 
-               cs_avail_out--; 
-            } 
-         }    
-         /* Only caused by corrupt data stream? */ 
-         if (c_nblock_used > s_save_nblockPP) 
-            return True; 
- 
-         /* can a new run be started? */ 
-         if (c_nblock_used == s_save_nblockPP) { 
-            c_state_out_len = 0; goto return_notr; 
-         };    
-         c_state_out_ch = c_k0; 
-         BZ_GET_FAST_C(k1); c_nblock_used++; 
-         if (k1 != c_k0) {  
-            c_k0 = k1; goto s_state_out_len_eq_one;  
-         }; 
-         if (c_nblock_used == s_save_nblockPP)  
-            goto s_state_out_len_eq_one; 
-    
-         c_state_out_len = 2; 
-         BZ_GET_FAST_C(k1); c_nblock_used++; 
-         if (c_nblock_used == s_save_nblockPP) continue; 
-         if (k1 != c_k0) { c_k0 = k1; continue; }; 
-    
-         c_state_out_len = 3; 
-         BZ_GET_FAST_C(k1); c_nblock_used++; 
-         if (c_nblock_used == s_save_nblockPP) continue; 
-         if (k1 != c_k0) { c_k0 = k1; continue; }; 
-    
-         BZ_GET_FAST_C(k1); c_nblock_used++; 
-         c_state_out_len = ((Int32)k1) + 4; 
-         BZ_GET_FAST_C(c_k0); c_nblock_used++; 
-      } 
- 
-      return_notr: 
-      total_out_lo32_old = s->strm->total_out_lo32; 
-      s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); 
-      if (s->strm->total_out_lo32 < total_out_lo32_old) 
-         s->strm->total_out_hi32++; 
- 
-      /* save */ 
-      s->calculatedBlockCRC = c_calculatedBlockCRC; 
-      s->state_out_ch       = c_state_out_ch; 
-      s->state_out_len      = c_state_out_len; 
-      s->nblock_used        = c_nblock_used; 
-      s->k0                 = c_k0; 
-      s->tt                 = c_tt; 
-      s->tPos               = c_tPos; 
-      s->strm->next_out     = cs_next_out; 
-      s->strm->avail_out    = cs_avail_out; 
-      /* end save */ 
-   } 
-   return False; 
-} 
- 
- 
- 
-/*---------------------------------------------------*/ 
-__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) 
-{ 
-   Int32 nb, na, mid; 
-   nb = 0; 
-   na = 256; 
-   do { 
-      mid = (nb + na) >> 1; 
-      if (indx >= cftab[mid]) nb = mid; else na = mid; 
-   } 
-   while (na - nb != 1); 
-   return nb; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/* Return  True iff data corruption is discovered. 
-   Returns False if there is no problem. 
-*/ 
-static 
-Bool unRLE_obuf_to_output_SMALL ( DState* s ) 
-{ 
-   UChar k1; 
- 
-   if (s->blockRandomised) { 
- 
-      while (True) { 
-         /* try to finish existing run */ 
-         while (True) { 
-            if (s->strm->avail_out == 0) return False; 
-            if (s->state_out_len == 0) break; 
-            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 
-            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 
-            s->state_out_len--; 
-            s->strm->next_out++; 
-            s->strm->avail_out--; 
-            s->strm->total_out_lo32++; 
-            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 
-         } 
-    
-         /* can a new run be started? */ 
-         if (s->nblock_used == s->save_nblock+1) return False; 
- 
-         /* Only caused by corrupt data stream? */ 
-         if (s->nblock_used > s->save_nblock+1) 
-            return True; 
-    
-         s->state_out_len = 1; 
-         s->state_out_ch = s->k0; 
-         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         s->state_out_len = 2; 
-         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         s->state_out_len = 3; 
-         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;  
-         k1 ^= BZ_RAND_MASK; s->nblock_used++; 
-         s->state_out_len = ((Int32)k1) + 4; 
-         BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK;  
-         s->k0 ^= BZ_RAND_MASK; s->nblock_used++; 
-      } 
- 
-   } else { 
- 
-      while (True) { 
-         /* try to finish existing run */ 
-         while (True) { 
-            if (s->strm->avail_out == 0) return False; 
-            if (s->state_out_len == 0) break; 
-            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 
-            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 
-            s->state_out_len--; 
-            s->strm->next_out++; 
-            s->strm->avail_out--; 
-            s->strm->total_out_lo32++; 
-            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 
-         } 
-    
-         /* can a new run be started? */ 
-         if (s->nblock_used == s->save_nblock+1) return False; 
- 
-         /* Only caused by corrupt data stream? */ 
-         if (s->nblock_used > s->save_nblock+1) 
-            return True; 
-    
-         s->state_out_len = 1; 
-         s->state_out_ch = s->k0; 
-         BZ_GET_SMALL(k1); s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         s->state_out_len = 2; 
-         BZ_GET_SMALL(k1); s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         s->state_out_len = 3; 
-         BZ_GET_SMALL(k1); s->nblock_used++; 
-         if (s->nblock_used == s->save_nblock+1) continue; 
-         if (k1 != s->k0) { s->k0 = k1; continue; }; 
-    
-         BZ_GET_SMALL(k1); s->nblock_used++; 
-         s->state_out_len = ((Int32)k1) + 4; 
-         BZ_GET_SMALL(s->k0); s->nblock_used++; 
-      } 
- 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) 
-{ 
-   Bool    corrupt; 
-   DState* s; 
-   if (strm == NULL) return BZ_PARAM_ERROR; 
-   s = strm->state; 
-   if (s == NULL) return BZ_PARAM_ERROR; 
-   if (s->strm != strm) return BZ_PARAM_ERROR; 
- 
-   while (True) { 
-      if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; 
-      if (s->state == BZ_X_OUTPUT) { 
-         if (s->smallDecompress) 
-            corrupt = unRLE_obuf_to_output_SMALL ( s ); else 
-            corrupt = unRLE_obuf_to_output_FAST  ( s ); 
-         if (corrupt) return BZ_DATA_ERROR; 
-         if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { 
-            BZ_FINALISE_CRC ( s->calculatedBlockCRC ); 
-            if (s->verbosity >= 3)  
-               VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC,  
-                          s->calculatedBlockCRC ); 
-            if (s->verbosity >= 2) VPrintf0 ( "]" ); 
-            if (s->calculatedBlockCRC != s->storedBlockCRC) 
-               return BZ_DATA_ERROR; 
-            s->calculatedCombinedCRC  
-               = (s->calculatedCombinedCRC << 1) |  
-                    (s->calculatedCombinedCRC >> 31); 
-            s->calculatedCombinedCRC ^= s->calculatedBlockCRC; 
-            s->state = BZ_X_BLKHDR_1; 
-         } else { 
-            return BZ_OK; 
-         } 
-      } 
-      if (s->state >= BZ_X_MAGIC_1) { 
-         Int32 r = BZ2_decompress ( s ); 
-         if (r == BZ_STREAM_END) { 
+            return BZ_PARAM_ERROR;
+
+      case BZ_M_FLUSHING:
+         if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect != s->strm->avail_in) 
+            return BZ_SEQUENCE_ERROR;
+         progress = handle_compress ( strm );
+         if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+             s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
+         s->mode = BZ_M_RUNNING;
+         return BZ_RUN_OK;
+
+      case BZ_M_FINISHING:
+         if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect != s->strm->avail_in) 
+            return BZ_SEQUENCE_ERROR;
+         progress = handle_compress ( strm );
+         if (!progress) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+             s->state_out_pos < s->numZ) return BZ_FINISH_OK;
+         s->mode = BZ_M_IDLE;
+         return BZ_STREAM_END;
+   }
+   return BZ_OK; /*--not reached--*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressEnd)  ( bz_stream *strm )
+{
+   EState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->arr1 != NULL) BZFREE(s->arr1);
+   if (s->arr2 != NULL) BZFREE(s->arr2);
+   if (s->ftab != NULL) BZFREE(s->ftab);
+   BZFREE(strm->state);
+
+   strm->state = NULL;   
+
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/*--- Decompression stuff                         ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressInit) 
+                     ( bz_stream* strm, 
+                       int        verbosity,
+                       int        small )
+{
+   DState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   if (small != 0 && small != 1) return BZ_PARAM_ERROR;
+   if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
+
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(DState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm                  = strm;
+   strm->state              = s;
+   s->state                 = BZ_X_MAGIC_1;
+   s->bsLive                = 0;
+   s->bsBuff                = 0;
+   s->calculatedCombinedCRC = 0;
+   strm->total_in_lo32      = 0;
+   strm->total_in_hi32      = 0;
+   strm->total_out_lo32     = 0;
+   strm->total_out_hi32     = 0;
+   s->smallDecompress       = (Bool)small;
+   s->ll4                   = NULL;
+   s->ll16                  = NULL;
+   s->tt                    = NULL;
+   s->currBlockNo           = 0;
+   s->verbosity             = verbosity;
+
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_FAST ( DState* s )
+{
+   UChar k1;
+
+   if (s->blockRandomised) {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+               
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; 
+         s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+      }
+
+   } else {
+
+      /* restore */
+      UInt32        c_calculatedBlockCRC = s->calculatedBlockCRC;
+      UChar         c_state_out_ch       = s->state_out_ch;
+      Int32         c_state_out_len      = s->state_out_len;
+      Int32         c_nblock_used        = s->nblock_used;
+      Int32         c_k0                 = s->k0;
+      UInt32*       c_tt                 = s->tt;
+      UInt32        c_tPos               = s->tPos;
+      char*         cs_next_out          = s->strm->next_out;
+      unsigned int  cs_avail_out         = s->strm->avail_out;
+      Int32         ro_blockSize100k     = s->blockSize100k;
+      /* end restore */
+
+      UInt32       avail_out_INIT = cs_avail_out;
+      Int32        s_save_nblockPP = s->save_nblock+1;
+      unsigned int total_out_lo32_old;
+
+      while (True) {
+
+         /* try to finish existing run */
+         if (c_state_out_len > 0) {
+            while (True) {
+               if (cs_avail_out == 0) goto return_notr;
+               if (c_state_out_len == 1) break;
+               *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+               c_state_out_len--;
+               cs_next_out++;
+               cs_avail_out--;
+            }
+            s_state_out_len_eq_one:
+            {
+               if (cs_avail_out == 0) { 
+                  c_state_out_len = 1; goto return_notr;
+               };
+               *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+               cs_next_out++;
+               cs_avail_out--;
+            }
+         }   
+         /* Only caused by corrupt data stream? */
+         if (c_nblock_used > s_save_nblockPP)
+            return True;
+
+         /* can a new run be started? */
+         if (c_nblock_used == s_save_nblockPP) {
+            c_state_out_len = 0; goto return_notr;
+         };   
+         c_state_out_ch = c_k0;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (k1 != c_k0) { 
+            c_k0 = k1; goto s_state_out_len_eq_one; 
+         };
+         if (c_nblock_used == s_save_nblockPP) 
+            goto s_state_out_len_eq_one;
+   
+         c_state_out_len = 2;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         c_state_out_len = 3;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         c_state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST_C(c_k0); c_nblock_used++;
+      }
+
+      return_notr:
+      total_out_lo32_old = s->strm->total_out_lo32;
+      s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+      if (s->strm->total_out_lo32 < total_out_lo32_old)
+         s->strm->total_out_hi32++;
+
+      /* save */
+      s->calculatedBlockCRC = c_calculatedBlockCRC;
+      s->state_out_ch       = c_state_out_ch;
+      s->state_out_len      = c_state_out_len;
+      s->nblock_used        = c_nblock_used;
+      s->k0                 = c_k0;
+      s->tt                 = c_tt;
+      s->tPos               = c_tPos;
+      s->strm->next_out     = cs_next_out;
+      s->strm->avail_out    = cs_avail_out;
+      /* end save */
+   }
+   return False;
+}
+
+
+
+/*---------------------------------------------------*/
+__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
+{
+   Int32 nb, na, mid;
+   nb = 0;
+   na = 256;
+   do {
+      mid = (nb + na) >> 1;
+      if (indx >= cftab[mid]) nb = mid; else na = mid;
+   }
+   while (na - nb != 1);
+   return nb;
+}
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_SMALL ( DState* s )
+{
+   UChar k1;
+
+   if (s->blockRandomised) {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; 
+         s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+      }
+
+   } else {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); s->nblock_used++;
+      }
+
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
+{
+   Bool    corrupt;
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   while (True) {
+      if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
+      if (s->state == BZ_X_OUTPUT) {
+         if (s->smallDecompress)
+            corrupt = unRLE_obuf_to_output_SMALL ( s ); else
+            corrupt = unRLE_obuf_to_output_FAST  ( s );
+         if (corrupt) return BZ_DATA_ERROR;
+         if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
+            BZ_FINALISE_CRC ( s->calculatedBlockCRC );
             if (s->verbosity >= 3) 
-               VPrintf2 ( "\n    combined CRCs: stored = 0x%08x, computed = 0x%08x",  
-                          s->storedCombinedCRC, s->calculatedCombinedCRC ); 
-            if (s->calculatedCombinedCRC != s->storedCombinedCRC) 
-               return BZ_DATA_ERROR; 
-            return r; 
-         } 
-         if (s->state != BZ_X_OUTPUT) return r; 
-      } 
-   } 
- 
-   AssertH ( 0, 6001 ); 
- 
-   return 0;  /*NOTREACHED*/ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzDecompressEnd)  ( bz_stream *strm ) 
-{ 
-   DState* s; 
-   if (strm == NULL) return BZ_PARAM_ERROR; 
-   s = strm->state; 
-   if (s == NULL) return BZ_PARAM_ERROR; 
-   if (s->strm != strm) return BZ_PARAM_ERROR; 
- 
-   if (s->tt   != NULL) BZFREE(s->tt); 
-   if (s->ll16 != NULL) BZFREE(s->ll16); 
-   if (s->ll4  != NULL) BZFREE(s->ll4); 
- 
-   BZFREE(strm->state); 
-   strm->state = NULL; 
- 
-   return BZ_OK; 
-} 
- 
- 
-#ifndef BZ_NO_STDIO 
-/*---------------------------------------------------*/ 
-/*--- File I/O stuff                              ---*/ 
-/*---------------------------------------------------*/ 
- 
-#define BZ_SETERR(eee)                    \ 
-{                                         \ 
-   if (bzerror != NULL) *bzerror = eee;   \ 
-   if (bzf != NULL) bzf->lastErr = eee;   \ 
-} 
- 
-typedef  
-   struct { 
-      FILE*     handle; 
-      Char      buf[BZ_MAX_UNUSED]; 
-      Int32     bufN; 
-      Bool      writing; 
-      bz_stream strm; 
-      Int32     lastErr; 
-      Bool      initialisedOk; 
-   } 
-   bzFile; 
- 
- 
-/*---------------------------------------------*/ 
-static Bool myfeof ( FILE* f ) 
-{ 
-   Int32 c = fgetc ( f ); 
-   if (c == EOF) return True; 
-   ungetc ( c, f ); 
-   return False; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-BZFILE* BZ_API(BZ2_bzWriteOpen)  
-                    ( int*  bzerror,       
-                      FILE* f,  
-                      int   blockSize100k,  
-                      int   verbosity, 
-                      int   workFactor ) 
-{ 
-   Int32   ret; 
-   bzFile* bzf = NULL; 
- 
-   BZ_SETERR(BZ_OK); 
- 
+               VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, 
+                          s->calculatedBlockCRC );
+            if (s->verbosity >= 2) VPrintf0 ( "]" );
+            if (s->calculatedBlockCRC != s->storedBlockCRC)
+               return BZ_DATA_ERROR;
+            s->calculatedCombinedCRC 
+               = (s->calculatedCombinedCRC << 1) | 
+                    (s->calculatedCombinedCRC >> 31);
+            s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
+            s->state = BZ_X_BLKHDR_1;
+         } else {
+            return BZ_OK;
+         }
+      }
+      if (s->state >= BZ_X_MAGIC_1) {
+         Int32 r = BZ2_decompress ( s );
+         if (r == BZ_STREAM_END) {
+            if (s->verbosity >= 3)
+               VPrintf2 ( "\n    combined CRCs: stored = 0x%08x, computed = 0x%08x", 
+                          s->storedCombinedCRC, s->calculatedCombinedCRC );
+            if (s->calculatedCombinedCRC != s->storedCombinedCRC)
+               return BZ_DATA_ERROR;
+            return r;
+         }
+         if (s->state != BZ_X_OUTPUT) return r;
+      }
+   }
+
+   AssertH ( 0, 6001 );
+
+   return 0;  /*NOTREACHED*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressEnd)  ( bz_stream *strm )
+{
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->tt   != NULL) BZFREE(s->tt);
+   if (s->ll16 != NULL) BZFREE(s->ll16);
+   if (s->ll4  != NULL) BZFREE(s->ll4);
+
+   BZFREE(strm->state);
+   strm->state = NULL;
+
+   return BZ_OK;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+/*--- File I/O stuff                              ---*/
+/*---------------------------------------------------*/
+
+#define BZ_SETERR(eee)                    \
+{                                         \
+   if (bzerror != NULL) *bzerror = eee;   \
+   if (bzf != NULL) bzf->lastErr = eee;   \
+}
+
+typedef 
+   struct {
+      FILE*     handle;
+      Char      buf[BZ_MAX_UNUSED];
+      Int32     bufN;
+      Bool      writing;
+      bz_stream strm;
+      Int32     lastErr;
+      Bool      initialisedOk;
+   }
+   bzFile;
+
+
+/*---------------------------------------------*/
+static Bool myfeof ( FILE* f )
+{
+   Int32 c = fgetc ( f );
+   if (c == EOF) return True;
+   ungetc ( c, f );
+   return False;
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzWriteOpen) 
+                    ( int*  bzerror,      
+                      FILE* f, 
+                      int   blockSize100k, 
+                      int   verbosity,
+                      int   workFactor )
+{
+   Int32   ret;
+   bzFile* bzf = NULL;
+
+   BZ_SETERR(BZ_OK);
+
+   if (f == NULL ||
+       (blockSize100k < 1 || blockSize100k > 9) ||
+       (workFactor < 0 || workFactor > 250) ||
+       (verbosity < 0 || verbosity > 4))
+      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+   if (ferror(f))
+      { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+   bzf = malloc ( sizeof(bzFile) );
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+   BZ_SETERR(BZ_OK);
+   bzf->initialisedOk = False;
+   bzf->bufN          = 0;
+   bzf->handle        = f;
+   bzf->writing       = True;
+   bzf->strm.bzalloc  = NULL;
+   bzf->strm.bzfree   = NULL;
+   bzf->strm.opaque   = NULL;
+
+   if (workFactor == 0) workFactor = 30;
+   ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, 
+                              verbosity, workFactor );
+   if (ret != BZ_OK)
+      { BZ_SETERR(ret); free(bzf); return NULL; };
+
+   bzf->strm.avail_in = 0;
+   bzf->initialisedOk = True;
+   return bzf;   
+}
+
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWrite)
+             ( int*    bzerror, 
+               BZFILE* b, 
+               void*   buf, 
+               int     len )
+{
+   Int32 n, n2, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+   if (bzf == NULL || buf == NULL || len < 0)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+   if (!(bzf->writing))
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (ferror(bzf->handle))
+      { BZ_SETERR(BZ_IO_ERROR); return; };
+
+   if (len == 0)
+      { BZ_SETERR(BZ_OK); return; };
+
+   bzf->strm.avail_in = len;
+   bzf->strm.next_in  = buf;
+
+   while (True) {
+      bzf->strm.avail_out = BZ_MAX_UNUSED;
+      bzf->strm.next_out = bzf->buf;
+      ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
+      if (ret != BZ_RUN_OK)
+         { BZ_SETERR(ret); return; };
+
+      if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+         n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+         n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 
+                       n, bzf->handle );
+         if (n != n2 || ferror(bzf->handle))
+            { BZ_SETERR(BZ_IO_ERROR); return; };
+      }
+
+      if (bzf->strm.avail_in == 0)
+         { BZ_SETERR(BZ_OK); return; };
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWriteClose)
+                  ( int*          bzerror, 
+                    BZFILE*       b, 
+                    int           abandon,
+                    unsigned int* nbytes_in,
+                    unsigned int* nbytes_out )
+{
+   BZ2_bzWriteClose64 ( bzerror, b, abandon, 
+                        nbytes_in, NULL, nbytes_out, NULL );
+}
+
+
+void BZ_API(BZ2_bzWriteClose64)
+                  ( int*          bzerror, 
+                    BZFILE*       b, 
+                    int           abandon,
+                    unsigned int* nbytes_in_lo32,
+                    unsigned int* nbytes_in_hi32,
+                    unsigned int* nbytes_out_lo32,
+                    unsigned int* nbytes_out_hi32 )
+{
+   Int32   n, n2, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_OK); return; };
+   if (!(bzf->writing))
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (ferror(bzf->handle))
+      { BZ_SETERR(BZ_IO_ERROR); return; };
+
+   if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
+   if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
+   if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
+   if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
+
+   if ((!abandon) && bzf->lastErr == BZ_OK) {
+      while (True) {
+         bzf->strm.avail_out = BZ_MAX_UNUSED;
+         bzf->strm.next_out = bzf->buf;
+         ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
+         if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
+            { BZ_SETERR(ret); return; };
+
+         if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+            n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+            n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 
+                          n, bzf->handle );
+            if (n != n2 || ferror(bzf->handle))
+               { BZ_SETERR(BZ_IO_ERROR); return; };
+         }
+
+         if (ret == BZ_STREAM_END) break;
+      }
+   }
+
+   if ( !abandon && !ferror ( bzf->handle ) ) {
+      fflush ( bzf->handle );
+      if (ferror(bzf->handle))
+         { BZ_SETERR(BZ_IO_ERROR); return; };
+   }
+
+   if (nbytes_in_lo32 != NULL)
+      *nbytes_in_lo32 = bzf->strm.total_in_lo32;
+   if (nbytes_in_hi32 != NULL)
+      *nbytes_in_hi32 = bzf->strm.total_in_hi32;
+   if (nbytes_out_lo32 != NULL)
+      *nbytes_out_lo32 = bzf->strm.total_out_lo32;
+   if (nbytes_out_hi32 != NULL)
+      *nbytes_out_hi32 = bzf->strm.total_out_hi32;
+
+   BZ_SETERR(BZ_OK);
+   BZ2_bzCompressEnd ( &(bzf->strm) );
+   free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzReadOpen) 
+                   ( int*  bzerror, 
+                     FILE* f, 
+                     int   verbosity,
+                     int   small,
+                     void* unused,
+                     int   nUnused )
+{
+   bzFile* bzf = NULL;
+   int     ret;
+
+   BZ_SETERR(BZ_OK);
+
    if (f == NULL || 
-       (blockSize100k < 1 || blockSize100k > 9) || 
-       (workFactor < 0 || workFactor > 250) || 
-       (verbosity < 0 || verbosity > 4)) 
-      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; 
- 
-   if (ferror(f)) 
-      { BZ_SETERR(BZ_IO_ERROR); return NULL; }; 
- 
-   bzf = malloc ( sizeof(bzFile) ); 
-   if (bzf == NULL) 
-      { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; 
- 
-   BZ_SETERR(BZ_OK); 
-   bzf->initialisedOk = False; 
-   bzf->bufN          = 0; 
-   bzf->handle        = f; 
-   bzf->writing       = True; 
-   bzf->strm.bzalloc  = NULL; 
-   bzf->strm.bzfree   = NULL; 
-   bzf->strm.opaque   = NULL; 
- 
-   if (workFactor == 0) workFactor = 30; 
-   ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k,  
-                              verbosity, workFactor ); 
-   if (ret != BZ_OK) 
-      { BZ_SETERR(ret); free(bzf); return NULL; }; 
- 
-   bzf->strm.avail_in = 0; 
-   bzf->initialisedOk = True; 
-   return bzf;    
-} 
- 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ_API(BZ2_bzWrite) 
-             ( int*    bzerror,  
-               BZFILE* b,  
-               void*   buf,  
-               int     len ) 
-{ 
-   Int32 n, n2, ret; 
-   bzFile* bzf = (bzFile*)b; 
- 
-   BZ_SETERR(BZ_OK); 
-   if (bzf == NULL || buf == NULL || len < 0) 
-      { BZ_SETERR(BZ_PARAM_ERROR); return; }; 
-   if (!(bzf->writing)) 
-      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 
-   if (ferror(bzf->handle)) 
-      { BZ_SETERR(BZ_IO_ERROR); return; }; 
- 
-   if (len == 0) 
-      { BZ_SETERR(BZ_OK); return; }; 
- 
-   bzf->strm.avail_in = len; 
-   bzf->strm.next_in  = buf; 
- 
-   while (True) { 
-      bzf->strm.avail_out = BZ_MAX_UNUSED; 
-      bzf->strm.next_out = bzf->buf; 
-      ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); 
-      if (ret != BZ_RUN_OK) 
-         { BZ_SETERR(ret); return; }; 
- 
-      if (bzf->strm.avail_out < BZ_MAX_UNUSED) { 
-         n = BZ_MAX_UNUSED - bzf->strm.avail_out; 
-         n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar),  
-                       n, bzf->handle ); 
-         if (n != n2 || ferror(bzf->handle)) 
-            { BZ_SETERR(BZ_IO_ERROR); return; }; 
-      } 
- 
-      if (bzf->strm.avail_in == 0) 
-         { BZ_SETERR(BZ_OK); return; }; 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ_API(BZ2_bzWriteClose) 
-                  ( int*          bzerror,  
-                    BZFILE*       b,  
-                    int           abandon, 
-                    unsigned int* nbytes_in, 
-                    unsigned int* nbytes_out ) 
-{ 
-   BZ2_bzWriteClose64 ( bzerror, b, abandon,  
-                        nbytes_in, NULL, nbytes_out, NULL ); 
-} 
- 
- 
-void BZ_API(BZ2_bzWriteClose64) 
-                  ( int*          bzerror,  
-                    BZFILE*       b,  
-                    int           abandon, 
-                    unsigned int* nbytes_in_lo32, 
-                    unsigned int* nbytes_in_hi32, 
-                    unsigned int* nbytes_out_lo32, 
-                    unsigned int* nbytes_out_hi32 ) 
-{ 
-   Int32   n, n2, ret; 
-   bzFile* bzf = (bzFile*)b; 
- 
+       (small != 0 && small != 1) ||
+       (verbosity < 0 || verbosity > 4) ||
+       (unused == NULL && nUnused != 0) ||
+       (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED)))
+      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+   if (ferror(f))
+      { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+   bzf = malloc ( sizeof(bzFile) );
    if (bzf == NULL) 
-      { BZ_SETERR(BZ_OK); return; }; 
-   if (!(bzf->writing)) 
-      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 
-   if (ferror(bzf->handle)) 
-      { BZ_SETERR(BZ_IO_ERROR); return; }; 
- 
-   if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; 
-   if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; 
-   if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; 
-   if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; 
- 
-   if ((!abandon) && bzf->lastErr == BZ_OK) { 
-      while (True) { 
-         bzf->strm.avail_out = BZ_MAX_UNUSED; 
-         bzf->strm.next_out = bzf->buf; 
-         ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); 
-         if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) 
-            { BZ_SETERR(ret); return; }; 
- 
-         if (bzf->strm.avail_out < BZ_MAX_UNUSED) { 
-            n = BZ_MAX_UNUSED - bzf->strm.avail_out; 
-            n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar),  
-                          n, bzf->handle ); 
-            if (n != n2 || ferror(bzf->handle)) 
-               { BZ_SETERR(BZ_IO_ERROR); return; }; 
-         } 
- 
-         if (ret == BZ_STREAM_END) break; 
-      } 
-   } 
- 
-   if ( !abandon && !ferror ( bzf->handle ) ) { 
-      fflush ( bzf->handle ); 
+      { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+   BZ_SETERR(BZ_OK);
+
+   bzf->initialisedOk = False;
+   bzf->handle        = f;
+   bzf->bufN          = 0;
+   bzf->writing       = False;
+   bzf->strm.bzalloc  = NULL;
+   bzf->strm.bzfree   = NULL;
+   bzf->strm.opaque   = NULL;
+   
+   while (nUnused > 0) {
+      bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++;
+      unused = ((void*)( 1 + ((UChar*)(unused))  ));
+      nUnused--;
+   }
+
+   ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
+   if (ret != BZ_OK)
+      { BZ_SETERR(ret); free(bzf); return NULL; };
+
+   bzf->strm.avail_in = bzf->bufN;
+   bzf->strm.next_in  = bzf->buf;
+
+   bzf->initialisedOk = True;
+   return bzf;   
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
+{
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_OK); return; };
+
+   if (bzf->writing)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+
+   if (bzf->initialisedOk)
+      (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
+   free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzRead) 
+           ( int*    bzerror, 
+             BZFILE* b, 
+             void*   buf, 
+             int     len )
+{
+   Int32   n, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+
+   if (bzf == NULL || buf == NULL || len < 0)
+      { BZ_SETERR(BZ_PARAM_ERROR); return 0; };
+
+   if (bzf->writing)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; };
+
+   if (len == 0)
+      { BZ_SETERR(BZ_OK); return 0; };
+
+   bzf->strm.avail_out = len;
+   bzf->strm.next_out = buf;
+
+   while (True) {
+
       if (ferror(bzf->handle)) 
-         { BZ_SETERR(BZ_IO_ERROR); return; }; 
-   } 
- 
-   if (nbytes_in_lo32 != NULL) 
-      *nbytes_in_lo32 = bzf->strm.total_in_lo32; 
-   if (nbytes_in_hi32 != NULL) 
-      *nbytes_in_hi32 = bzf->strm.total_in_hi32; 
-   if (nbytes_out_lo32 != NULL) 
-      *nbytes_out_lo32 = bzf->strm.total_out_lo32; 
-   if (nbytes_out_hi32 != NULL) 
-      *nbytes_out_hi32 = bzf->strm.total_out_hi32; 
- 
-   BZ_SETERR(BZ_OK); 
-   BZ2_bzCompressEnd ( &(bzf->strm) ); 
-   free ( bzf ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-BZFILE* BZ_API(BZ2_bzReadOpen)  
-                   ( int*  bzerror,  
-                     FILE* f,  
-                     int   verbosity, 
-                     int   small, 
-                     void* unused, 
-                     int   nUnused ) 
-{ 
-   bzFile* bzf = NULL; 
-   int     ret; 
- 
-   BZ_SETERR(BZ_OK); 
- 
-   if (f == NULL ||  
-       (small != 0 && small != 1) || 
-       (verbosity < 0 || verbosity > 4) || 
-       (unused == NULL && nUnused != 0) || 
-       (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED))) 
-      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; 
- 
-   if (ferror(f)) 
-      { BZ_SETERR(BZ_IO_ERROR); return NULL; }; 
- 
-   bzf = malloc ( sizeof(bzFile) ); 
-   if (bzf == NULL)  
-      { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; 
- 
-   BZ_SETERR(BZ_OK); 
- 
-   bzf->initialisedOk = False; 
-   bzf->handle        = f; 
-   bzf->bufN          = 0; 
-   bzf->writing       = False; 
-   bzf->strm.bzalloc  = NULL; 
-   bzf->strm.bzfree   = NULL; 
-   bzf->strm.opaque   = NULL; 
-    
-   while (nUnused > 0) { 
-      bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++; 
-      unused = ((void*)( 1 + ((UChar*)(unused))  )); 
-      nUnused--; 
-   } 
- 
-   ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); 
-   if (ret != BZ_OK) 
-      { BZ_SETERR(ret); free(bzf); return NULL; }; 
- 
-   bzf->strm.avail_in = bzf->bufN; 
-   bzf->strm.next_in  = bzf->buf; 
- 
-   bzf->initialisedOk = True; 
-   return bzf;    
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) 
-{ 
-   bzFile* bzf = (bzFile*)b; 
- 
-   BZ_SETERR(BZ_OK); 
-   if (bzf == NULL) 
-      { BZ_SETERR(BZ_OK); return; }; 
- 
-   if (bzf->writing) 
-      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 
- 
-   if (bzf->initialisedOk) 
-      (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); 
-   free ( bzf ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzRead)  
-           ( int*    bzerror,  
-             BZFILE* b,  
-             void*   buf,  
-             int     len ) 
-{ 
-   Int32   n, ret; 
-   bzFile* bzf = (bzFile*)b; 
- 
-   BZ_SETERR(BZ_OK); 
- 
-   if (bzf == NULL || buf == NULL || len < 0) 
-      { BZ_SETERR(BZ_PARAM_ERROR); return 0; }; 
- 
-   if (bzf->writing) 
-      { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; }; 
- 
-   if (len == 0) 
-      { BZ_SETERR(BZ_OK); return 0; }; 
- 
-   bzf->strm.avail_out = len; 
-   bzf->strm.next_out = buf; 
- 
-   while (True) { 
- 
-      if (ferror(bzf->handle))  
-         { BZ_SETERR(BZ_IO_ERROR); return 0; }; 
- 
-      if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { 
-         n = fread ( bzf->buf, sizeof(UChar),  
-                     BZ_MAX_UNUSED, bzf->handle ); 
-         if (ferror(bzf->handle)) 
-            { BZ_SETERR(BZ_IO_ERROR); return 0; }; 
-         bzf->bufN = n; 
-         bzf->strm.avail_in = bzf->bufN; 
-         bzf->strm.next_in = bzf->buf; 
-      } 
- 
-      ret = BZ2_bzDecompress ( &(bzf->strm) ); 
- 
-      if (ret != BZ_OK && ret != BZ_STREAM_END) 
-         { BZ_SETERR(ret); return 0; }; 
- 
-      if (ret == BZ_OK && myfeof(bzf->handle) &&  
-          bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) 
-         { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; 
- 
-      if (ret == BZ_STREAM_END) 
-         { BZ_SETERR(BZ_STREAM_END); 
-           return len - bzf->strm.avail_out; }; 
-      if (bzf->strm.avail_out == 0) 
-         { BZ_SETERR(BZ_OK); return len; }; 
-       
-   } 
- 
-   return 0; /*not reached*/ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ_API(BZ2_bzReadGetUnused)  
-                     ( int*    bzerror,  
-                       BZFILE* b,  
-                       void**  unused,  
-                       int*    nUnused ) 
-{ 
-   bzFile* bzf = (bzFile*)b; 
-   if (bzf == NULL) 
-      { BZ_SETERR(BZ_PARAM_ERROR); return; }; 
-   if (bzf->lastErr != BZ_STREAM_END) 
-      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 
-   if (unused == NULL || nUnused == NULL) 
-      { BZ_SETERR(BZ_PARAM_ERROR); return; }; 
- 
-   BZ_SETERR(BZ_OK); 
-   *nUnused = bzf->strm.avail_in; 
-   *unused = bzf->strm.next_in; 
-} 
-#endif 
- 
- 
-/*---------------------------------------------------*/ 
-/*--- Misc convenience stuff                      ---*/ 
-/*---------------------------------------------------*/ 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzBuffToBuffCompress)  
-                         ( char*         dest,  
-                           unsigned int* destLen, 
-                           char*         source,  
-                           unsigned int  sourceLen, 
-                           int           blockSize100k,  
-                           int           verbosity,  
-                           int           workFactor ) 
-{ 
-   bz_stream strm; 
-   int ret; 
- 
-   if (dest == NULL || destLen == NULL ||  
-       source == NULL || 
-       blockSize100k < 1 || blockSize100k > 9 || 
-       verbosity < 0 || verbosity > 4 || 
-       workFactor < 0 || workFactor > 250)  
-      return BZ_PARAM_ERROR; 
- 
-   if (workFactor == 0) workFactor = 30; 
-   strm.bzalloc = NULL; 
-   strm.bzfree = NULL; 
-   strm.opaque = NULL; 
-   ret = BZ2_bzCompressInit ( &strm, blockSize100k,  
-                              verbosity, workFactor ); 
-   if (ret != BZ_OK) return ret; 
- 
-   strm.next_in = source; 
-   strm.next_out = dest; 
-   strm.avail_in = sourceLen; 
-   strm.avail_out = *destLen; 
- 
-   ret = BZ2_bzCompress ( &strm, BZ_FINISH ); 
-   if (ret == BZ_FINISH_OK) goto output_overflow; 
-   if (ret != BZ_STREAM_END) goto errhandler; 
- 
-   /* normal termination */ 
-   *destLen -= strm.avail_out;    
-   BZ2_bzCompressEnd ( &strm ); 
-   return BZ_OK; 
- 
-   output_overflow: 
-   BZ2_bzCompressEnd ( &strm ); 
-   return BZ_OUTBUFF_FULL; 
- 
-   errhandler: 
-   BZ2_bzCompressEnd ( &strm ); 
+         { BZ_SETERR(BZ_IO_ERROR); return 0; };
+
+      if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) {
+         n = fread ( bzf->buf, sizeof(UChar), 
+                     BZ_MAX_UNUSED, bzf->handle );
+         if (ferror(bzf->handle))
+            { BZ_SETERR(BZ_IO_ERROR); return 0; };
+         bzf->bufN = n;
+         bzf->strm.avail_in = bzf->bufN;
+         bzf->strm.next_in = bzf->buf;
+      }
+
+      ret = BZ2_bzDecompress ( &(bzf->strm) );
+
+      if (ret != BZ_OK && ret != BZ_STREAM_END)
+         { BZ_SETERR(ret); return 0; };
+
+      if (ret == BZ_OK && myfeof(bzf->handle) && 
+          bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0)
+         { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; };
+
+      if (ret == BZ_STREAM_END)
+         { BZ_SETERR(BZ_STREAM_END);
+           return len - bzf->strm.avail_out; };
+      if (bzf->strm.avail_out == 0)
+         { BZ_SETERR(BZ_OK); return len; };
+      
+   }
+
+   return 0; /*not reached*/
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadGetUnused) 
+                     ( int*    bzerror, 
+                       BZFILE* b, 
+                       void**  unused, 
+                       int*    nUnused )
+{
+   bzFile* bzf = (bzFile*)b;
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+   if (bzf->lastErr != BZ_STREAM_END)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (unused == NULL || nUnused == NULL)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+
+   BZ_SETERR(BZ_OK);
+   *nUnused = bzf->strm.avail_in;
+   *unused = bzf->strm.next_in;
+}
+#endif
+
+
+/*---------------------------------------------------*/
+/*--- Misc convenience stuff                      ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffCompress) 
+                         ( char*         dest, 
+                           unsigned int* destLen,
+                           char*         source, 
+                           unsigned int  sourceLen,
+                           int           blockSize100k, 
+                           int           verbosity, 
+                           int           workFactor )
+{
+   bz_stream strm;
+   int ret;
+
+   if (dest == NULL || destLen == NULL || 
+       source == NULL ||
+       blockSize100k < 1 || blockSize100k > 9 ||
+       verbosity < 0 || verbosity > 4 ||
+       workFactor < 0 || workFactor > 250) 
+      return BZ_PARAM_ERROR;
+
+   if (workFactor == 0) workFactor = 30;
+   strm.bzalloc = NULL;
+   strm.bzfree = NULL;
+   strm.opaque = NULL;
+   ret = BZ2_bzCompressInit ( &strm, blockSize100k, 
+                              verbosity, workFactor );
+   if (ret != BZ_OK) return ret;
+
+   strm.next_in = source;
+   strm.next_out = dest;
+   strm.avail_in = sourceLen;
+   strm.avail_out = *destLen;
+
+   ret = BZ2_bzCompress ( &strm, BZ_FINISH );
+   if (ret == BZ_FINISH_OK) goto output_overflow;
+   if (ret != BZ_STREAM_END) goto errhandler;
+
+   /* normal termination */
+   *destLen -= strm.avail_out;   
+   BZ2_bzCompressEnd ( &strm );
+   return BZ_OK;
+
+   output_overflow:
+   BZ2_bzCompressEnd ( &strm );
+   return BZ_OUTBUFF_FULL;
+
+   errhandler:
+   BZ2_bzCompressEnd ( &strm );
+   return ret;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffDecompress) 
+                           ( char*         dest, 
+                             unsigned int* destLen,
+                             char*         source, 
+                             unsigned int  sourceLen,
+                             int           small,
+                             int           verbosity )
+{
+   bz_stream strm;
+   int ret;
+
+   if (dest == NULL || destLen == NULL || 
+       source == NULL ||
+       (small != 0 && small != 1) ||
+       verbosity < 0 || verbosity > 4) 
+          return BZ_PARAM_ERROR;
+
+   strm.bzalloc = NULL;
+   strm.bzfree = NULL;
+   strm.opaque = NULL;
+   ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
+   if (ret != BZ_OK) return ret;
+
+   strm.next_in = source;
+   strm.next_out = dest;
+   strm.avail_in = sourceLen;
+   strm.avail_out = *destLen;
+
+   ret = BZ2_bzDecompress ( &strm );
+   if (ret == BZ_OK) goto output_overflow_or_eof;
+   if (ret != BZ_STREAM_END) goto errhandler;
+
+   /* normal termination */
+   *destLen -= strm.avail_out;
+   BZ2_bzDecompressEnd ( &strm );
+   return BZ_OK;
+
+   output_overflow_or_eof:
+   if (strm.avail_out > 0) {
+      BZ2_bzDecompressEnd ( &strm );
+      return BZ_UNEXPECTED_EOF;
+   } else {
+      BZ2_bzDecompressEnd ( &strm );
+      return BZ_OUTBUFF_FULL;
+   };      
+
+   errhandler:
+   BZ2_bzDecompressEnd ( &strm );
    return ret; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzBuffToBuffDecompress)  
-                           ( char*         dest,  
-                             unsigned int* destLen, 
-                             char*         source,  
-                             unsigned int  sourceLen, 
-                             int           small, 
-                             int           verbosity ) 
-{ 
-   bz_stream strm; 
-   int ret; 
- 
-   if (dest == NULL || destLen == NULL ||  
-       source == NULL || 
-       (small != 0 && small != 1) || 
-       verbosity < 0 || verbosity > 4)  
-          return BZ_PARAM_ERROR; 
- 
-   strm.bzalloc = NULL; 
-   strm.bzfree = NULL; 
-   strm.opaque = NULL; 
-   ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); 
-   if (ret != BZ_OK) return ret; 
- 
-   strm.next_in = source; 
-   strm.next_out = dest; 
-   strm.avail_in = sourceLen; 
-   strm.avail_out = *destLen; 
- 
-   ret = BZ2_bzDecompress ( &strm ); 
-   if (ret == BZ_OK) goto output_overflow_or_eof; 
-   if (ret != BZ_STREAM_END) goto errhandler; 
- 
-   /* normal termination */ 
-   *destLen -= strm.avail_out; 
-   BZ2_bzDecompressEnd ( &strm ); 
-   return BZ_OK; 
- 
-   output_overflow_or_eof: 
-   if (strm.avail_out > 0) { 
-      BZ2_bzDecompressEnd ( &strm ); 
-      return BZ_UNEXPECTED_EOF; 
-   } else { 
-      BZ2_bzDecompressEnd ( &strm ); 
-      return BZ_OUTBUFF_FULL; 
-   };       
- 
-   errhandler: 
-   BZ2_bzDecompressEnd ( &strm ); 
-   return ret;  
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/*-- 
-   Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) 
-   to support better zlib compatibility. 
-   This code is not _officially_ part of libbzip2 (yet); 
-   I haven't tested it, documented it, or considered the 
-   threading-safeness of it. 
-   If this code breaks, please contact both Yoshioka and me. 
---*/ 
-/*---------------------------------------------------*/ 
- 
-/*---------------------------------------------------*/ 
-/*-- 
-   return version like "0.9.5d, 4-Sept-1999". 
---*/ 
-const char * BZ_API(BZ2_bzlibVersion)(void) 
-{ 
-   return BZ_VERSION; 
-} 
- 
- 
-#ifndef BZ_NO_STDIO 
-/*---------------------------------------------------*/ 
- 
-#if defined(_WIN32) || defined(OS2) || defined(MSDOS) 
-#   include <fcntl.h> 
-#   include <io.h> 
-#   define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) 
-#else 
-#   define SET_BINARY_MODE(file) 
-#endif 
-static 
-BZFILE * bzopen_or_bzdopen 
-               ( const char *path,   /* no use when bzdopen */ 
-                 int fd,             /* no use when bzdopen */ 
-                 const char *mode, 
-                 int open_mode)      /* bzopen: 0, bzdopen:1 */ 
-{ 
-   int    bzerr; 
-   char   unused[BZ_MAX_UNUSED]; 
-   int    blockSize100k = 9; 
-   int    writing       = 0; 
-   char   mode2[10]     = ""; 
-   FILE   *fp           = NULL; 
-   BZFILE *bzfp         = NULL; 
-   int    verbosity     = 0; 
-   int    workFactor    = 30; 
-   int    smallMode     = 0; 
-   int    nUnused       = 0;  
- 
-   if (mode == NULL) return NULL; 
-   while (*mode) { 
-      switch (*mode) { 
-      case 'r': 
-         writing = 0; break; 
-      case 'w': 
-         writing = 1; break; 
-      case 's': 
-         smallMode = 1; break; 
-      default: 
-         if (isdigit((int)(*mode))) { 
-            blockSize100k = *mode-BZ_HDR_0; 
-         } 
-      } 
-      mode++; 
-   } 
-   strcat(mode2, writing ? "w" : "r" ); 
-   strcat(mode2,"b");   /* binary mode */ 
- 
-   if (open_mode==0) { 
-      if (path==NULL || strcmp(path,"")==0) { 
-        fp = (writing ? stdout : stdin); 
-        SET_BINARY_MODE(fp); 
-      } else { 
-        fp = fopen(path,mode2); 
-      } 
-   } else { 
-#ifdef BZ_STRICT_ANSI 
-      fp = NULL; 
-#else 
-      fp = fdopen(fd,mode2); 
-#endif 
-   } 
-   if (fp == NULL) return NULL; 
- 
-   if (writing) { 
-      /* Guard against total chaos and anarchy -- JRS */ 
-      if (blockSize100k < 1) blockSize100k = 1; 
-      if (blockSize100k > 9) blockSize100k = 9;  
-      bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, 
-                             verbosity,workFactor); 
-   } else { 
-      bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, 
-                            unused,nUnused); 
-   } 
-   if (bzfp == NULL) { 
-      if (fp != stdin && fp != stdout) fclose(fp); 
-      return NULL; 
-   } 
-   return bzfp; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/*-- 
-   open file for read or write. 
-      ex) bzopen("file","w9") 
-      case path="" or NULL => use stdin or stdout. 
---*/ 
-BZFILE * BZ_API(BZ2_bzopen) 
-               ( const char *path, 
-                 const char *mode ) 
-{ 
-   return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-BZFILE * BZ_API(BZ2_bzdopen) 
-               ( int fd, 
-                 const char *mode ) 
-{ 
-   return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) 
-{ 
-   int bzerr, nread; 
-   if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; 
-   nread = BZ2_bzRead(&bzerr,b,buf,len); 
-   if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { 
-      return nread; 
-   } else { 
-      return -1; 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) 
-{ 
-   int bzerr; 
- 
-   BZ2_bzWrite(&bzerr,b,buf,len); 
-   if(bzerr == BZ_OK){ 
-      return len; 
-   }else{ 
-      return -1; 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-int BZ_API(BZ2_bzflush) (BZFILE *b) 
-{ 
-   /* do nothing now... */ 
-   return 0; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ_API(BZ2_bzclose) (BZFILE* b) 
-{ 
-   int bzerr; 
-   FILE *fp; 
-    
-   if (b==NULL) {return;} 
-   fp = ((bzFile *)b)->handle; 
-   if(((bzFile*)b)->writing){ 
-      BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); 
-      if(bzerr != BZ_OK){ 
-         BZ2_bzWriteClose(NULL,b,1,NULL,NULL); 
-      } 
-   }else{ 
-      BZ2_bzReadClose(&bzerr,b); 
-   } 
-   if(fp!=stdin && fp!=stdout){ 
-      fclose(fp); 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/*-- 
-   return last error code  
---*/ 
-static const char *bzerrorstrings[] = { 
-       "OK" 
-      ,"SEQUENCE_ERROR" 
-      ,"PARAM_ERROR" 
-      ,"MEM_ERROR" 
-      ,"DATA_ERROR" 
-      ,"DATA_ERROR_MAGIC" 
-      ,"IO_ERROR" 
-      ,"UNEXPECTED_EOF" 
-      ,"OUTBUFF_FULL" 
-      ,"CONFIG_ERROR" 
-      ,"???"   /* for future */ 
-      ,"???"   /* for future */ 
-      ,"???"   /* for future */ 
-      ,"???"   /* for future */ 
-      ,"???"   /* for future */ 
-      ,"???"   /* for future */ 
-}; 
- 
- 
-const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) 
-{ 
-   int err = ((bzFile *)b)->lastErr; 
- 
-   if(err>0) err = 0; 
-   *errnum = err; 
-   return bzerrorstrings[err*-1]; 
-} 
-#endif 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                           bzlib.c ---*/ 
-/*-------------------------------------------------------------*/ 
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
+   to support better zlib compatibility.
+   This code is not _officially_ part of libbzip2 (yet);
+   I haven't tested it, documented it, or considered the
+   threading-safeness of it.
+   If this code breaks, please contact both Yoshioka and me.
+--*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+/*--
+   return version like "0.9.5d, 4-Sept-1999".
+--*/
+const char * BZ_API(BZ2_bzlibVersion)(void)
+{
+   return BZ_VERSION;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+
+#if defined(_WIN32) || defined(OS2) || defined(MSDOS)
+#   include <fcntl.h>
+#   include <io.h>
+#   define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY)
+#else
+#   define SET_BINARY_MODE(file)
+#endif
+static
+BZFILE * bzopen_or_bzdopen
+               ( const char *path,   /* no use when bzdopen */
+                 int fd,             /* no use when bzdopen */
+                 const char *mode,
+                 int open_mode)      /* bzopen: 0, bzdopen:1 */
+{
+   int    bzerr;
+   char   unused[BZ_MAX_UNUSED];
+   int    blockSize100k = 9;
+   int    writing       = 0;
+   char   mode2[10]     = "";
+   FILE   *fp           = NULL;
+   BZFILE *bzfp         = NULL;
+   int    verbosity     = 0;
+   int    workFactor    = 30;
+   int    smallMode     = 0;
+   int    nUnused       = 0; 
+
+   if (mode == NULL) return NULL;
+   while (*mode) {
+      switch (*mode) {
+      case 'r':
+         writing = 0; break;
+      case 'w':
+         writing = 1; break;
+      case 's':
+         smallMode = 1; break;
+      default:
+         if (isdigit((int)(*mode))) {
+            blockSize100k = *mode-BZ_HDR_0;
+         }
+      }
+      mode++;
+   }
+   strcat(mode2, writing ? "w" : "r" );
+   strcat(mode2,"b");   /* binary mode */
+
+   if (open_mode==0) {
+      if (path==NULL || strcmp(path,"")==0) {
+        fp = (writing ? stdout : stdin);
+        SET_BINARY_MODE(fp);
+      } else {
+        fp = fopen(path,mode2);
+      }
+   } else {
+#ifdef BZ_STRICT_ANSI
+      fp = NULL;
+#else
+      fp = fdopen(fd,mode2);
+#endif
+   }
+   if (fp == NULL) return NULL;
+
+   if (writing) {
+      /* Guard against total chaos and anarchy -- JRS */
+      if (blockSize100k < 1) blockSize100k = 1;
+      if (blockSize100k > 9) blockSize100k = 9; 
+      bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
+                             verbosity,workFactor);
+   } else {
+      bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
+                            unused,nUnused);
+   }
+   if (bzfp == NULL) {
+      if (fp != stdin && fp != stdout) fclose(fp);
+      return NULL;
+   }
+   return bzfp;
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   open file for read or write.
+      ex) bzopen("file","w9")
+      case path="" or NULL => use stdin or stdout.
+--*/
+BZFILE * BZ_API(BZ2_bzopen)
+               ( const char *path,
+                 const char *mode )
+{
+   return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0);
+}
+
+
+/*---------------------------------------------------*/
+BZFILE * BZ_API(BZ2_bzdopen)
+               ( int fd,
+                 const char *mode )
+{
+   return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1);
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
+{
+   int bzerr, nread;
+   if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
+   nread = BZ2_bzRead(&bzerr,b,buf,len);
+   if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
+      return nread;
+   } else {
+      return -1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
+{
+   int bzerr;
+
+   BZ2_bzWrite(&bzerr,b,buf,len);
+   if(bzerr == BZ_OK){
+      return len;
+   }else{
+      return -1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzflush) (BZFILE *b)
+{
+   /* do nothing now... */
+   return 0;
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzclose) (BZFILE* b)
+{
+   int bzerr;
+   FILE *fp;
+   
+   if (b==NULL) {return;}
+   fp = ((bzFile *)b)->handle;
+   if(((bzFile*)b)->writing){
+      BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
+      if(bzerr != BZ_OK){
+         BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
+      }
+   }else{
+      BZ2_bzReadClose(&bzerr,b);
+   }
+   if(fp!=stdin && fp!=stdout){
+      fclose(fp);
+   }
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   return last error code 
+--*/
+static const char *bzerrorstrings[] = {
+       "OK"
+      ,"SEQUENCE_ERROR"
+      ,"PARAM_ERROR"
+      ,"MEM_ERROR"
+      ,"DATA_ERROR"
+      ,"DATA_ERROR_MAGIC"
+      ,"IO_ERROR"
+      ,"UNEXPECTED_EOF"
+      ,"OUTBUFF_FULL"
+      ,"CONFIG_ERROR"
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+};
+
+
+const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
+{
+   int err = ((bzFile *)b)->lastErr;
+
+   if(err>0) err = 0;
+   *errnum = err;
+   return bzerrorstrings[err*-1];
+}
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/bzlib.h b/contrib/libs/libbz2/bzlib.h
index 1fcc8a1d47..a7d2cd6188 100644
--- a/contrib/libs/libbz2/bzlib.h
+++ b/contrib/libs/libbz2/bzlib.h
@@ -1,183 +1,183 @@
 
-/*-------------------------------------------------------------*/ 
-/*--- Public header file for the library.                   ---*/ 
-/*---                                               bzlib.h ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+/*-------------------------------------------------------------*/
+/*--- Public header file for the library.                   ---*/
+/*---                                               bzlib.h ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
+
    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#ifndef _BZLIB_H 
-#define _BZLIB_H 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-#define BZ_RUN               0 
-#define BZ_FLUSH             1 
-#define BZ_FINISH            2 
- 
-#define BZ_OK                0 
-#define BZ_RUN_OK            1 
-#define BZ_FLUSH_OK          2 
-#define BZ_FINISH_OK         3 
-#define BZ_STREAM_END        4 
-#define BZ_SEQUENCE_ERROR    (-1) 
-#define BZ_PARAM_ERROR       (-2) 
-#define BZ_MEM_ERROR         (-3) 
-#define BZ_DATA_ERROR        (-4) 
-#define BZ_DATA_ERROR_MAGIC  (-5) 
-#define BZ_IO_ERROR          (-6) 
-#define BZ_UNEXPECTED_EOF    (-7) 
-#define BZ_OUTBUFF_FULL      (-8) 
-#define BZ_CONFIG_ERROR      (-9) 
- 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#ifndef _BZLIB_H
+#define _BZLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BZ_RUN               0
+#define BZ_FLUSH             1
+#define BZ_FINISH            2
+
+#define BZ_OK                0
+#define BZ_RUN_OK            1
+#define BZ_FLUSH_OK          2
+#define BZ_FINISH_OK         3
+#define BZ_STREAM_END        4
+#define BZ_SEQUENCE_ERROR    (-1)
+#define BZ_PARAM_ERROR       (-2)
+#define BZ_MEM_ERROR         (-3)
+#define BZ_DATA_ERROR        (-4)
+#define BZ_DATA_ERROR_MAGIC  (-5)
+#define BZ_IO_ERROR          (-6)
+#define BZ_UNEXPECTED_EOF    (-7)
+#define BZ_OUTBUFF_FULL      (-8)
+#define BZ_CONFIG_ERROR      (-9)
+
 typedef 
-   struct { 
-      char *next_in; 
-      unsigned int avail_in; 
-      unsigned int total_in_lo32; 
-      unsigned int total_in_hi32; 
- 
-      char *next_out; 
-      unsigned int avail_out; 
-      unsigned int total_out_lo32; 
-      unsigned int total_out_hi32; 
- 
-      void *state; 
- 
-      void *(*bzalloc)(void *,int,int); 
-      void (*bzfree)(void *,void *); 
-      void *opaque; 
+   struct {
+      char *next_in;
+      unsigned int avail_in;
+      unsigned int total_in_lo32;
+      unsigned int total_in_hi32;
+
+      char *next_out;
+      unsigned int avail_out;
+      unsigned int total_out_lo32;
+      unsigned int total_out_hi32;
+
+      void *state;
+
+      void *(*bzalloc)(void *,int,int);
+      void (*bzfree)(void *,void *);
+      void *opaque;
    } 
-   bz_stream; 
- 
- 
-#ifndef BZ_IMPORT 
-#define BZ_EXPORT 
-#endif 
- 
-#ifndef BZ_NO_STDIO 
-/* Need a definitition for FILE */ 
-#include <stdio.h> 
-#endif 
- 
-#ifdef _WIN32 
-#   include <windows.h> 
-#   ifdef small 
-      /* windows.h define small to char */ 
-#      undef small 
-#   endif 
-#endif 
- 
-#define BZ_API(func) func 
-#define BZ_EXTERN extern 
- 
-/*-- Core (low-level) library functions --*/ 
- 
+   bz_stream;
+
+
+#ifndef BZ_IMPORT
+#define BZ_EXPORT
+#endif
+
+#ifndef BZ_NO_STDIO
+/* Need a definitition for FILE */
+#include <stdio.h>
+#endif
+
+#ifdef _WIN32
+#   include <windows.h>
+#   ifdef small
+      /* windows.h define small to char */
+#      undef small
+#   endif
+#endif
+
+#define BZ_API(func) func
+#define BZ_EXTERN extern
+
+/*-- Core (low-level) library functions --*/
+
 BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( 
       bz_stream* strm, 
       int        blockSize100k, 
       int        verbosity, 
       int        workFactor 
-   ); 
- 
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzCompress) ( 
       bz_stream* strm, 
       int action 
-   ); 
- 
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( 
       bz_stream* strm 
-   ); 
- 
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( 
       bz_stream *strm, 
       int       verbosity, 
-      int       small 
-   ); 
- 
+      int       small
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( 
       bz_stream* strm 
-   ); 
- 
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( 
       bz_stream *strm 
-   ); 
- 
- 
- 
-/*-- High(er) level library functions --*/ 
- 
-#ifndef BZ_NO_STDIO 
-#define BZ_MAX_UNUSED 5000 
- 
-typedef void BZFILE; 
- 
+   );
+
+
+
+/*-- High(er) level library functions --*/
+
+#ifndef BZ_NO_STDIO
+#define BZ_MAX_UNUSED 5000
+
+typedef void BZFILE;
+
 BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( 
       int*  bzerror,   
       FILE* f, 
       int   verbosity, 
-      int   small, 
+      int   small,
       void* unused,    
       int   nUnused 
-   ); 
- 
+   );
+
 BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( 
       int*    bzerror, 
       BZFILE* b 
-   ); 
- 
+   );
+
 BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( 
       int*    bzerror, 
       BZFILE* b, 
       void**  unused,  
       int*    nUnused 
-   ); 
- 
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzRead) ( 
       int*    bzerror, 
       BZFILE* b, 
       void*   buf, 
       int     len 
-   ); 
- 
+   );
+
 BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( 
       int*  bzerror,      
       FILE* f, 
       int   blockSize100k, 
       int   verbosity, 
       int   workFactor 
-   ); 
- 
+   );
+
 BZ_EXTERN void BZ_API(BZ2_bzWrite) ( 
       int*    bzerror, 
       BZFILE* b, 
       void*   buf, 
       int     len 
-   ); 
- 
+   );
+
 BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( 
       int*          bzerror, 
       BZFILE*       b, 
       int           abandon, 
       unsigned int* nbytes_in, 
       unsigned int* nbytes_out 
-   ); 
- 
+   );
+
 BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( 
       int*          bzerror, 
       BZFILE*       b, 
@@ -185,89 +185,89 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
       unsigned int* nbytes_in_lo32, 
       unsigned int* nbytes_in_hi32, 
       unsigned int* nbytes_out_lo32, 
-      unsigned int* nbytes_out_hi32 
-   ); 
-#endif 
- 
- 
-/*-- Utility functions --*/ 
- 
+      unsigned int* nbytes_out_hi32
+   );
+#endif
+
+
+/*-- Utility functions --*/
+
 BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( 
       char*         dest, 
-      unsigned int* destLen, 
+      unsigned int* destLen,
       char*         source, 
-      unsigned int  sourceLen, 
+      unsigned int  sourceLen,
       int           blockSize100k, 
       int           verbosity, 
       int           workFactor 
-   ); 
- 
+   );
+
 BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( 
       char*         dest, 
-      unsigned int* destLen, 
+      unsigned int* destLen,
       char*         source, 
-      unsigned int  sourceLen, 
+      unsigned int  sourceLen,
       int           small, 
       int           verbosity 
-   ); 
- 
- 
-/*-- 
-   Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp) 
-   to support better zlib compatibility. 
-   This code is not _officially_ part of libbzip2 (yet); 
-   I haven't tested it, documented it, or considered the 
-   threading-safeness of it. 
-   If this code breaks, please contact both Yoshioka and me. 
---*/ 
- 
-BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( 
-      void 
-   ); 
- 
-#ifndef BZ_NO_STDIO 
-BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( 
-      const char *path, 
-      const char *mode 
-   ); 
- 
-BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( 
-      int        fd, 
-      const char *mode 
-   ); 
+   );
+
+
+/*--
+   Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
+   to support better zlib compatibility.
+   This code is not _officially_ part of libbzip2 (yet);
+   I haven't tested it, documented it, or considered the
+   threading-safeness of it.
+   If this code breaks, please contact both Yoshioka and me.
+--*/
+
+BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
+      void
+   );
+
+#ifndef BZ_NO_STDIO
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
+      const char *path,
+      const char *mode
+   );
+
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
+      int        fd,
+      const char *mode
+   );
          
-BZ_EXTERN int BZ_API(BZ2_bzread) ( 
+BZ_EXTERN int BZ_API(BZ2_bzread) (
       BZFILE* b, 
       void* buf, 
       int len 
-   ); 
- 
-BZ_EXTERN int BZ_API(BZ2_bzwrite) ( 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzwrite) (
       BZFILE* b, 
       void*   buf, 
       int     len 
-   ); 
- 
-BZ_EXTERN int BZ_API(BZ2_bzflush) ( 
-      BZFILE* b 
-   ); 
- 
-BZ_EXTERN void BZ_API(BZ2_bzclose) ( 
-      BZFILE* b 
-   ); 
- 
-BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzflush) (
+      BZFILE* b
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzclose) (
+      BZFILE* b
+   );
+
+BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
       BZFILE *b, 
-      int    *errnum 
-   ); 
-#endif 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                           bzlib.h ---*/ 
-/*-------------------------------------------------------------*/ 
+      int    *errnum
+   );
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.h ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/bzlib_private.h b/contrib/libs/libbz2/bzlib_private.h
index f20d7f06b1..2578c2dcf2 100644
--- a/contrib/libs/libbz2/bzlib_private.h
+++ b/contrib/libs/libbz2/bzlib_private.h
@@ -1,509 +1,509 @@
 
-/*-------------------------------------------------------------*/ 
-/*--- Private header file for the library.                  ---*/ 
-/*---                                       bzlib_private.h ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+/*-------------------------------------------------------------*/
+/*--- Private header file for the library.                  ---*/
+/*---                                       bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
+
    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#ifndef _BZLIB_PRIVATE_H 
-#define _BZLIB_PRIVATE_H 
- 
-#include <stdlib.h> 
- 
-#ifndef BZ_NO_STDIO 
-#include <stdio.h> 
-#include <ctype.h> 
-#include <string.h> 
-#endif 
- 
-#include "bzlib.h" 
- 
- 
- 
-/*-- General stuff. --*/ 
- 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#ifndef _BZLIB_PRIVATE_H
+#define _BZLIB_PRIVATE_H
+
+#include <stdlib.h>
+
+#ifndef BZ_NO_STDIO
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#endif
+
+#include "bzlib.h"
+
+
+
+/*-- General stuff. --*/
+
 #define BZ_VERSION  "1.0.8, 13-Jul-2019"
- 
-typedef char            Char; 
-typedef unsigned char   Bool; 
-typedef unsigned char   UChar; 
-typedef int             Int32; 
-typedef unsigned int    UInt32; 
-typedef short           Int16; 
-typedef unsigned short  UInt16; 
- 
-#define True  ((Bool)1) 
-#define False ((Bool)0) 
- 
-#ifndef __GNUC__ 
-#define __inline__  /* */ 
-#endif 
- 
-#ifndef BZ_NO_STDIO 
- 
-extern void BZ2_bz__AssertH__fail ( int errcode ); 
-#define AssertH(cond,errcode) \ 
-   { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } 
- 
-#if BZ_DEBUG 
-#define AssertD(cond,msg) \ 
-   { if (!(cond)) {       \ 
-      fprintf ( stderr,   \ 
-        "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ 
-      exit(1); \ 
-   }} 
-#else 
-#define AssertD(cond,msg) /* */ 
-#endif 
- 
-#define VPrintf0(zf) \ 
-   fprintf(stderr,zf) 
-#define VPrintf1(zf,za1) \ 
-   fprintf(stderr,zf,za1) 
-#define VPrintf2(zf,za1,za2) \ 
-   fprintf(stderr,zf,za1,za2) 
-#define VPrintf3(zf,za1,za2,za3) \ 
-   fprintf(stderr,zf,za1,za2,za3) 
-#define VPrintf4(zf,za1,za2,za3,za4) \ 
-   fprintf(stderr,zf,za1,za2,za3,za4) 
-#define VPrintf5(zf,za1,za2,za3,za4,za5) \ 
-   fprintf(stderr,zf,za1,za2,za3,za4,za5) 
- 
-#else 
- 
-extern void bz_internal_error ( int errcode ); 
-#define AssertH(cond,errcode) \ 
-   { if (!(cond)) bz_internal_error ( errcode ); } 
-#define AssertD(cond,msg)                do { } while (0) 
-#define VPrintf0(zf)                     do { } while (0) 
-#define VPrintf1(zf,za1)                 do { } while (0) 
-#define VPrintf2(zf,za1,za2)             do { } while (0) 
-#define VPrintf3(zf,za1,za2,za3)         do { } while (0) 
-#define VPrintf4(zf,za1,za2,za3,za4)     do { } while (0) 
-#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0) 
- 
+
+typedef char            Char;
+typedef unsigned char   Bool;
+typedef unsigned char   UChar;
+typedef int             Int32;
+typedef unsigned int    UInt32;
+typedef short           Int16;
+typedef unsigned short  UInt16;
+
+#define True  ((Bool)1)
+#define False ((Bool)0)
+
+#ifndef __GNUC__
+#define __inline__  /* */
 #endif 
- 
- 
-#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) 
-#define BZFREE(ppp)  (strm->bzfree)(strm->opaque,(ppp)) 
- 
- 
-/*-- Header bytes. --*/ 
- 
-#define BZ_HDR_B 0x42   /* 'B' */ 
-#define BZ_HDR_Z 0x5a   /* 'Z' */ 
-#define BZ_HDR_h 0x68   /* 'h' */ 
-#define BZ_HDR_0 0x30   /* '0' */ 
+
+#ifndef BZ_NO_STDIO
+
+extern void BZ2_bz__AssertH__fail ( int errcode );
+#define AssertH(cond,errcode) \
+   { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
+
+#if BZ_DEBUG
+#define AssertD(cond,msg) \
+   { if (!(cond)) {       \
+      fprintf ( stderr,   \
+        "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
+      exit(1); \
+   }}
+#else
+#define AssertD(cond,msg) /* */
+#endif
+
+#define VPrintf0(zf) \
+   fprintf(stderr,zf)
+#define VPrintf1(zf,za1) \
+   fprintf(stderr,zf,za1)
+#define VPrintf2(zf,za1,za2) \
+   fprintf(stderr,zf,za1,za2)
+#define VPrintf3(zf,za1,za2,za3) \
+   fprintf(stderr,zf,za1,za2,za3)
+#define VPrintf4(zf,za1,za2,za3,za4) \
+   fprintf(stderr,zf,za1,za2,za3,za4)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) \
+   fprintf(stderr,zf,za1,za2,za3,za4,za5)
+
+#else
+
+extern void bz_internal_error ( int errcode );
+#define AssertH(cond,errcode) \
+   { if (!(cond)) bz_internal_error ( errcode ); }
+#define AssertD(cond,msg)                do { } while (0)
+#define VPrintf0(zf)                     do { } while (0)
+#define VPrintf1(zf,za1)                 do { } while (0)
+#define VPrintf2(zf,za1,za2)             do { } while (0)
+#define VPrintf3(zf,za1,za2,za3)         do { } while (0)
+#define VPrintf4(zf,za1,za2,za3,za4)     do { } while (0)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0)
+
+#endif
+
+
+#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
+#define BZFREE(ppp)  (strm->bzfree)(strm->opaque,(ppp))
+
+
+/*-- Header bytes. --*/
+
+#define BZ_HDR_B 0x42   /* 'B' */
+#define BZ_HDR_Z 0x5a   /* 'Z' */
+#define BZ_HDR_h 0x68   /* 'h' */
+#define BZ_HDR_0 0x30   /* '0' */
   
-/*-- Constants for the back end. --*/ 
- 
-#define BZ_MAX_ALPHA_SIZE 258 
-#define BZ_MAX_CODE_LEN    23 
- 
-#define BZ_RUNA 0 
-#define BZ_RUNB 1 
- 
-#define BZ_N_GROUPS 6 
-#define BZ_G_SIZE   50 
-#define BZ_N_ITERS  4 
- 
-#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) 
- 
- 
- 
-/*-- Stuff for randomising repetitive blocks. --*/ 
- 
-extern const Int32 BZ2_rNums[512]; 
- 
-#define BZ_RAND_DECLS                          \ 
-   Int32 rNToGo;                               \ 
-   Int32 rTPos                                 \ 
- 
-#define BZ_RAND_INIT_MASK                      \ 
-   s->rNToGo = 0;                              \ 
-   s->rTPos  = 0                               \ 
- 
-#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) 
- 
-#define BZ_RAND_UPD_MASK                       \ 
-   if (s->rNToGo == 0) {                       \ 
-      s->rNToGo = BZ2_rNums[s->rTPos];         \ 
-      s->rTPos++;                              \ 
-      if (s->rTPos == 512) s->rTPos = 0;       \ 
-   }                                           \ 
-   s->rNToGo--; 
- 
- 
- 
-/*-- Stuff for doing CRCs. --*/ 
- 
-extern const UInt32 BZ2_crc32Table[256]; 
- 
-#define BZ_INITIALISE_CRC(crcVar)              \ 
-{                                              \ 
-   crcVar = 0xffffffffL;                       \ 
-} 
- 
-#define BZ_FINALISE_CRC(crcVar)                \ 
-{                                              \ 
-   crcVar = ~(crcVar);                         \ 
-} 
- 
-#define BZ_UPDATE_CRC(crcVar,cha)              \ 
-{                                              \ 
-   crcVar = (crcVar << 8) ^                    \ 
-            BZ2_crc32Table[(crcVar >> 24) ^    \ 
-                           ((UChar)cha)];      \ 
-} 
- 
- 
- 
-/*-- States and modes for compression. --*/ 
- 
-#define BZ_M_IDLE      1 
-#define BZ_M_RUNNING   2 
-#define BZ_M_FLUSHING  3 
-#define BZ_M_FINISHING 4 
- 
-#define BZ_S_OUTPUT    1 
-#define BZ_S_INPUT     2 
- 
-#define BZ_N_RADIX 2 
-#define BZ_N_QSORT 12 
-#define BZ_N_SHELL 18 
-#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) 
- 
- 
- 
- 
-/*-- Structure holding all the compression-side stuff. --*/ 
- 
-typedef 
-   struct { 
-      /* pointer back to the struct bz_stream */ 
-      bz_stream* strm; 
- 
-      /* mode this stream is in, and whether inputting */ 
-      /* or outputting data */ 
-      Int32    mode; 
-      Int32    state; 
- 
-      /* remembers avail_in when flush/finish requested */ 
-      UInt32   avail_in_expect; 
- 
-      /* for doing the block sorting */ 
-      UInt32*  arr1; 
-      UInt32*  arr2; 
-      UInt32*  ftab; 
-      Int32    origPtr; 
- 
-      /* aliases for arr1 and arr2 */ 
-      UInt32*  ptr; 
-      UChar*   block; 
-      UInt16*  mtfv; 
-      UChar*   zbits; 
- 
-      /* for deciding when to use the fallback sorting algorithm */ 
-      Int32    workFactor; 
- 
-      /* run-length-encoding of the input */ 
-      UInt32   state_in_ch; 
-      Int32    state_in_len; 
-      BZ_RAND_DECLS; 
- 
-      /* input and output limits and current posns */ 
-      Int32    nblock; 
-      Int32    nblockMAX; 
-      Int32    numZ; 
-      Int32    state_out_pos; 
- 
-      /* map of bytes used in block */ 
-      Int32    nInUse; 
-      Bool     inUse[256]; 
-      UChar    unseqToSeq[256]; 
- 
-      /* the buffer for bit stream creation */ 
-      UInt32   bsBuff; 
-      Int32    bsLive; 
- 
-      /* block and combined CRCs */ 
-      UInt32   blockCRC; 
-      UInt32   combinedCRC; 
- 
-      /* misc administratium */ 
-      Int32    verbosity; 
-      Int32    blockNo; 
-      Int32    blockSize100k; 
- 
-      /* stuff for coding the MTF values */ 
-      Int32    nMTF; 
-      Int32    mtfFreq    [BZ_MAX_ALPHA_SIZE]; 
-      UChar    selector   [BZ_MAX_SELECTORS]; 
-      UChar    selectorMtf[BZ_MAX_SELECTORS]; 
- 
-      UChar    len     [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-      Int32    code    [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-      Int32    rfreq   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-      /* second dimension: only 3 needed; 4 makes index calculations faster */ 
-      UInt32   len_pack[BZ_MAX_ALPHA_SIZE][4]; 
- 
-   } 
-   EState; 
- 
- 
- 
-/*-- externs for compression. --*/ 
- 
+/*-- Constants for the back end. --*/
+
+#define BZ_MAX_ALPHA_SIZE 258
+#define BZ_MAX_CODE_LEN    23
+
+#define BZ_RUNA 0
+#define BZ_RUNB 1
+
+#define BZ_N_GROUPS 6
+#define BZ_G_SIZE   50
+#define BZ_N_ITERS  4
+
+#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
+
+
+
+/*-- Stuff for randomising repetitive blocks. --*/
+
+extern const Int32 BZ2_rNums[512];
+
+#define BZ_RAND_DECLS                          \
+   Int32 rNToGo;                               \
+   Int32 rTPos                                 \
+
+#define BZ_RAND_INIT_MASK                      \
+   s->rNToGo = 0;                              \
+   s->rTPos  = 0                               \
+
+#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
+
+#define BZ_RAND_UPD_MASK                       \
+   if (s->rNToGo == 0) {                       \
+      s->rNToGo = BZ2_rNums[s->rTPos];         \
+      s->rTPos++;                              \
+      if (s->rTPos == 512) s->rTPos = 0;       \
+   }                                           \
+   s->rNToGo--;
+
+
+
+/*-- Stuff for doing CRCs. --*/
+
+extern const UInt32 BZ2_crc32Table[256];
+
+#define BZ_INITIALISE_CRC(crcVar)              \
+{                                              \
+   crcVar = 0xffffffffL;                       \
+}
+
+#define BZ_FINALISE_CRC(crcVar)                \
+{                                              \
+   crcVar = ~(crcVar);                         \
+}
+
+#define BZ_UPDATE_CRC(crcVar,cha)              \
+{                                              \
+   crcVar = (crcVar << 8) ^                    \
+            BZ2_crc32Table[(crcVar >> 24) ^    \
+                           ((UChar)cha)];      \
+}
+
+
+
+/*-- States and modes for compression. --*/
+
+#define BZ_M_IDLE      1
+#define BZ_M_RUNNING   2
+#define BZ_M_FLUSHING  3
+#define BZ_M_FINISHING 4
+
+#define BZ_S_OUTPUT    1
+#define BZ_S_INPUT     2
+
+#define BZ_N_RADIX 2
+#define BZ_N_QSORT 12
+#define BZ_N_SHELL 18
+#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
+
+
+
+
+/*-- Structure holding all the compression-side stuff. --*/
+
+typedef
+   struct {
+      /* pointer back to the struct bz_stream */
+      bz_stream* strm;
+
+      /* mode this stream is in, and whether inputting */
+      /* or outputting data */
+      Int32    mode;
+      Int32    state;
+
+      /* remembers avail_in when flush/finish requested */
+      UInt32   avail_in_expect;
+
+      /* for doing the block sorting */
+      UInt32*  arr1;
+      UInt32*  arr2;
+      UInt32*  ftab;
+      Int32    origPtr;
+
+      /* aliases for arr1 and arr2 */
+      UInt32*  ptr;
+      UChar*   block;
+      UInt16*  mtfv;
+      UChar*   zbits;
+
+      /* for deciding when to use the fallback sorting algorithm */
+      Int32    workFactor;
+
+      /* run-length-encoding of the input */
+      UInt32   state_in_ch;
+      Int32    state_in_len;
+      BZ_RAND_DECLS;
+
+      /* input and output limits and current posns */
+      Int32    nblock;
+      Int32    nblockMAX;
+      Int32    numZ;
+      Int32    state_out_pos;
+
+      /* map of bytes used in block */
+      Int32    nInUse;
+      Bool     inUse[256];
+      UChar    unseqToSeq[256];
+
+      /* the buffer for bit stream creation */
+      UInt32   bsBuff;
+      Int32    bsLive;
+
+      /* block and combined CRCs */
+      UInt32   blockCRC;
+      UInt32   combinedCRC;
+
+      /* misc administratium */
+      Int32    verbosity;
+      Int32    blockNo;
+      Int32    blockSize100k;
+
+      /* stuff for coding the MTF values */
+      Int32    nMTF;
+      Int32    mtfFreq    [BZ_MAX_ALPHA_SIZE];
+      UChar    selector   [BZ_MAX_SELECTORS];
+      UChar    selectorMtf[BZ_MAX_SELECTORS];
+
+      UChar    len     [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    code    [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    rfreq   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      /* second dimension: only 3 needed; 4 makes index calculations faster */
+      UInt32   len_pack[BZ_MAX_ALPHA_SIZE][4];
+
+   }
+   EState;
+
+
+
+/*-- externs for compression. --*/
+
 extern void 
-BZ2_blockSort ( EState* ); 
- 
+BZ2_blockSort ( EState* );
+
 extern void 
-BZ2_compressBlock ( EState*, Bool ); 
- 
+BZ2_compressBlock ( EState*, Bool );
+
 extern void 
-BZ2_bsInitWrite ( EState* ); 
- 
+BZ2_bsInitWrite ( EState* );
+
 extern void 
-BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); 
- 
+BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
+
 extern void 
-BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); 
- 
- 
- 
-/*-- states for decompression. --*/ 
- 
-#define BZ_X_IDLE        1 
-#define BZ_X_OUTPUT      2 
- 
-#define BZ_X_MAGIC_1     10 
-#define BZ_X_MAGIC_2     11 
-#define BZ_X_MAGIC_3     12 
-#define BZ_X_MAGIC_4     13 
-#define BZ_X_BLKHDR_1    14 
-#define BZ_X_BLKHDR_2    15 
-#define BZ_X_BLKHDR_3    16 
-#define BZ_X_BLKHDR_4    17 
-#define BZ_X_BLKHDR_5    18 
-#define BZ_X_BLKHDR_6    19 
-#define BZ_X_BCRC_1      20 
-#define BZ_X_BCRC_2      21 
-#define BZ_X_BCRC_3      22 
-#define BZ_X_BCRC_4      23 
-#define BZ_X_RANDBIT     24 
-#define BZ_X_ORIGPTR_1   25 
-#define BZ_X_ORIGPTR_2   26 
-#define BZ_X_ORIGPTR_3   27 
-#define BZ_X_MAPPING_1   28 
-#define BZ_X_MAPPING_2   29 
-#define BZ_X_SELECTOR_1  30 
-#define BZ_X_SELECTOR_2  31 
-#define BZ_X_SELECTOR_3  32 
-#define BZ_X_CODING_1    33 
-#define BZ_X_CODING_2    34 
-#define BZ_X_CODING_3    35 
-#define BZ_X_MTF_1       36 
-#define BZ_X_MTF_2       37 
-#define BZ_X_MTF_3       38 
-#define BZ_X_MTF_4       39 
-#define BZ_X_MTF_5       40 
-#define BZ_X_MTF_6       41 
-#define BZ_X_ENDHDR_2    42 
-#define BZ_X_ENDHDR_3    43 
-#define BZ_X_ENDHDR_4    44 
-#define BZ_X_ENDHDR_5    45 
-#define BZ_X_ENDHDR_6    46 
-#define BZ_X_CCRC_1      47 
-#define BZ_X_CCRC_2      48 
-#define BZ_X_CCRC_3      49 
-#define BZ_X_CCRC_4      50 
- 
- 
- 
-/*-- Constants for the fast MTF decoder. --*/ 
- 
-#define MTFA_SIZE 4096 
-#define MTFL_SIZE 16 
- 
- 
- 
-/*-- Structure holding all the decompression-side stuff. --*/ 
- 
-typedef 
-   struct { 
-      /* pointer back to the struct bz_stream */ 
-      bz_stream* strm; 
- 
-      /* state indicator for this stream */ 
-      Int32    state; 
- 
-      /* for doing the final run-length decoding */ 
-      UChar    state_out_ch; 
-      Int32    state_out_len; 
-      Bool     blockRandomised; 
-      BZ_RAND_DECLS; 
- 
-      /* the buffer for bit stream reading */ 
-      UInt32   bsBuff; 
-      Int32    bsLive; 
- 
-      /* misc administratium */ 
-      Int32    blockSize100k; 
-      Bool     smallDecompress; 
-      Int32    currBlockNo; 
-      Int32    verbosity; 
- 
-      /* for undoing the Burrows-Wheeler transform */ 
-      Int32    origPtr; 
-      UInt32   tPos; 
-      Int32    k0; 
-      Int32    unzftab[256]; 
-      Int32    nblock_used; 
-      Int32    cftab[257]; 
-      Int32    cftabCopy[257]; 
- 
-      /* for undoing the Burrows-Wheeler transform (FAST) */ 
-      UInt32   *tt; 
- 
-      /* for undoing the Burrows-Wheeler transform (SMALL) */ 
-      UInt16   *ll16; 
-      UChar    *ll4; 
- 
-      /* stored and calculated CRCs */ 
-      UInt32   storedBlockCRC; 
-      UInt32   storedCombinedCRC; 
-      UInt32   calculatedBlockCRC; 
-      UInt32   calculatedCombinedCRC; 
- 
-      /* map of bytes used in block */ 
-      Int32    nInUse; 
-      Bool     inUse[256]; 
-      Bool     inUse16[16]; 
-      UChar    seqToUnseq[256]; 
- 
-      /* for decoding the MTF values */ 
-      UChar    mtfa   [MTFA_SIZE]; 
-      Int32    mtfbase[256 / MTFL_SIZE]; 
-      UChar    selector   [BZ_MAX_SELECTORS]; 
-      UChar    selectorMtf[BZ_MAX_SELECTORS]; 
-      UChar    len  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
- 
-      Int32    limit  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-      Int32    base   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-      Int32    perm   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-      Int32    minLens[BZ_N_GROUPS]; 
- 
-      /* save area for scalars in the main decompress code */ 
-      Int32    save_i; 
-      Int32    save_j; 
-      Int32    save_t; 
-      Int32    save_alphaSize; 
-      Int32    save_nGroups; 
-      Int32    save_nSelectors; 
-      Int32    save_EOB; 
-      Int32    save_groupNo; 
-      Int32    save_groupPos; 
-      Int32    save_nextSym; 
-      Int32    save_nblockMAX; 
-      Int32    save_nblock; 
-      Int32    save_es; 
-      Int32    save_N; 
-      Int32    save_curr; 
-      Int32    save_zt; 
+BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
+
+
+
+/*-- states for decompression. --*/
+
+#define BZ_X_IDLE        1
+#define BZ_X_OUTPUT      2
+
+#define BZ_X_MAGIC_1     10
+#define BZ_X_MAGIC_2     11
+#define BZ_X_MAGIC_3     12
+#define BZ_X_MAGIC_4     13
+#define BZ_X_BLKHDR_1    14
+#define BZ_X_BLKHDR_2    15
+#define BZ_X_BLKHDR_3    16
+#define BZ_X_BLKHDR_4    17
+#define BZ_X_BLKHDR_5    18
+#define BZ_X_BLKHDR_6    19
+#define BZ_X_BCRC_1      20
+#define BZ_X_BCRC_2      21
+#define BZ_X_BCRC_3      22
+#define BZ_X_BCRC_4      23
+#define BZ_X_RANDBIT     24
+#define BZ_X_ORIGPTR_1   25
+#define BZ_X_ORIGPTR_2   26
+#define BZ_X_ORIGPTR_3   27
+#define BZ_X_MAPPING_1   28
+#define BZ_X_MAPPING_2   29
+#define BZ_X_SELECTOR_1  30
+#define BZ_X_SELECTOR_2  31
+#define BZ_X_SELECTOR_3  32
+#define BZ_X_CODING_1    33
+#define BZ_X_CODING_2    34
+#define BZ_X_CODING_3    35
+#define BZ_X_MTF_1       36
+#define BZ_X_MTF_2       37
+#define BZ_X_MTF_3       38
+#define BZ_X_MTF_4       39
+#define BZ_X_MTF_5       40
+#define BZ_X_MTF_6       41
+#define BZ_X_ENDHDR_2    42
+#define BZ_X_ENDHDR_3    43
+#define BZ_X_ENDHDR_4    44
+#define BZ_X_ENDHDR_5    45
+#define BZ_X_ENDHDR_6    46
+#define BZ_X_CCRC_1      47
+#define BZ_X_CCRC_2      48
+#define BZ_X_CCRC_3      49
+#define BZ_X_CCRC_4      50
+
+
+
+/*-- Constants for the fast MTF decoder. --*/
+
+#define MTFA_SIZE 4096
+#define MTFL_SIZE 16
+
+
+
+/*-- Structure holding all the decompression-side stuff. --*/
+
+typedef
+   struct {
+      /* pointer back to the struct bz_stream */
+      bz_stream* strm;
+
+      /* state indicator for this stream */
+      Int32    state;
+
+      /* for doing the final run-length decoding */
+      UChar    state_out_ch;
+      Int32    state_out_len;
+      Bool     blockRandomised;
+      BZ_RAND_DECLS;
+
+      /* the buffer for bit stream reading */
+      UInt32   bsBuff;
+      Int32    bsLive;
+
+      /* misc administratium */
+      Int32    blockSize100k;
+      Bool     smallDecompress;
+      Int32    currBlockNo;
+      Int32    verbosity;
+
+      /* for undoing the Burrows-Wheeler transform */
+      Int32    origPtr;
+      UInt32   tPos;
+      Int32    k0;
+      Int32    unzftab[256];
+      Int32    nblock_used;
+      Int32    cftab[257];
+      Int32    cftabCopy[257];
+
+      /* for undoing the Burrows-Wheeler transform (FAST) */
+      UInt32   *tt;
+
+      /* for undoing the Burrows-Wheeler transform (SMALL) */
+      UInt16   *ll16;
+      UChar    *ll4;
+
+      /* stored and calculated CRCs */
+      UInt32   storedBlockCRC;
+      UInt32   storedCombinedCRC;
+      UInt32   calculatedBlockCRC;
+      UInt32   calculatedCombinedCRC;
+
+      /* map of bytes used in block */
+      Int32    nInUse;
+      Bool     inUse[256];
+      Bool     inUse16[16];
+      UChar    seqToUnseq[256];
+
+      /* for decoding the MTF values */
+      UChar    mtfa   [MTFA_SIZE];
+      Int32    mtfbase[256 / MTFL_SIZE];
+      UChar    selector   [BZ_MAX_SELECTORS];
+      UChar    selectorMtf[BZ_MAX_SELECTORS];
+      UChar    len  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+
+      Int32    limit  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    base   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    perm   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    minLens[BZ_N_GROUPS];
+
+      /* save area for scalars in the main decompress code */
+      Int32    save_i;
+      Int32    save_j;
+      Int32    save_t;
+      Int32    save_alphaSize;
+      Int32    save_nGroups;
+      Int32    save_nSelectors;
+      Int32    save_EOB;
+      Int32    save_groupNo;
+      Int32    save_groupPos;
+      Int32    save_nextSym;
+      Int32    save_nblockMAX;
+      Int32    save_nblock;
+      Int32    save_es;
+      Int32    save_N;
+      Int32    save_curr;
+      Int32    save_zt;
       Int32    save_zn; 
-      Int32    save_zvec; 
-      Int32    save_zj; 
-      Int32    save_gSel; 
-      Int32    save_gMinlen; 
-      Int32*   save_gLimit; 
-      Int32*   save_gBase; 
-      Int32*   save_gPerm; 
- 
-   } 
-   DState; 
- 
- 
- 
-/*-- Macros for decompression. --*/ 
- 
-#define BZ_GET_FAST(cccc)                     \ 
-    /* c_tPos is unsigned, hence test < 0 is pointless. */ \ 
-    if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ 
-    s->tPos = s->tt[s->tPos];                 \ 
-    cccc = (UChar)(s->tPos & 0xff);           \ 
-    s->tPos >>= 8; 
- 
-#define BZ_GET_FAST_C(cccc)                   \ 
-    /* c_tPos is unsigned, hence test < 0 is pointless. */ \ 
-    if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \ 
-    c_tPos = c_tt[c_tPos];                    \ 
-    cccc = (UChar)(c_tPos & 0xff);            \ 
-    c_tPos >>= 8; 
- 
-#define SET_LL4(i,n)                                          \ 
-   { if (((i) & 0x1) == 0)                                    \ 
-        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else    \ 
-        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4);  \ 
-   } 
- 
-#define GET_LL4(i)                             \ 
-   ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) 
- 
-#define SET_LL(i,n)                          \ 
-   { s->ll16[i] = (UInt16)(n & 0x0000ffff);  \ 
-     SET_LL4(i, n >> 16);                    \ 
-   } 
- 
-#define GET_LL(i) \ 
-   (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) 
- 
-#define BZ_GET_SMALL(cccc)                            \ 
-    /* c_tPos is unsigned, hence test < 0 is pointless. */ \ 
-    if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \ 
-    cccc = BZ2_indexIntoF ( s->tPos, s->cftab );    \ 
-    s->tPos = GET_LL(s->tPos); 
- 
- 
-/*-- externs for decompression. --*/ 
- 
+      Int32    save_zvec;
+      Int32    save_zj;
+      Int32    save_gSel;
+      Int32    save_gMinlen;
+      Int32*   save_gLimit;
+      Int32*   save_gBase;
+      Int32*   save_gPerm;
+
+   }
+   DState;
+
+
+
+/*-- Macros for decompression. --*/
+
+#define BZ_GET_FAST(cccc)                     \
+    /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+    if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \
+    s->tPos = s->tt[s->tPos];                 \
+    cccc = (UChar)(s->tPos & 0xff);           \
+    s->tPos >>= 8;
+
+#define BZ_GET_FAST_C(cccc)                   \
+    /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+    if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \
+    c_tPos = c_tt[c_tPos];                    \
+    cccc = (UChar)(c_tPos & 0xff);            \
+    c_tPos >>= 8;
+
+#define SET_LL4(i,n)                                          \
+   { if (((i) & 0x1) == 0)                                    \
+        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else    \
+        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4);  \
+   }
+
+#define GET_LL4(i)                             \
+   ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
+
+#define SET_LL(i,n)                          \
+   { s->ll16[i] = (UInt16)(n & 0x0000ffff);  \
+     SET_LL4(i, n >> 16);                    \
+   }
+
+#define GET_LL(i) \
+   (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
+
+#define BZ_GET_SMALL(cccc)                            \
+    /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+    if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \
+    cccc = BZ2_indexIntoF ( s->tPos, s->cftab );    \
+    s->tPos = GET_LL(s->tPos);
+
+
+/*-- externs for decompression. --*/
+
 extern Int32 
-BZ2_indexIntoF ( Int32, Int32* ); 
- 
+BZ2_indexIntoF ( Int32, Int32* );
+
 extern Int32 
-BZ2_decompress ( DState* ); 
- 
+BZ2_decompress ( DState* );
+
 extern void 
-BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, 
-                           Int32,  Int32, Int32 ); 
- 
- 
-#endif 
- 
- 
-/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ 
- 
-#ifdef BZ_NO_STDIO 
-#ifndef NULL 
-#define NULL 0 
-#endif 
-#endif 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                   bzlib_private.h ---*/ 
-/*-------------------------------------------------------------*/ 
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
+                           Int32,  Int32, Int32 );
+
+
+#endif
+
+
+/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
+
+#ifdef BZ_NO_STDIO
+#ifndef NULL
+#define NULL 0
+#endif
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                   bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/compress.c b/contrib/libs/libbz2/compress.c
index 2880fdf8ec..5dfa00231b 100644
--- a/contrib/libs/libbz2/compress.c
+++ b/contrib/libs/libbz2/compress.c
@@ -1,672 +1,672 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Compression machinery (not incl block sorting)        ---*/ 
-/*---                                            compress.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Compression machinery (not incl block sorting)        ---*/
+/*---                                            compress.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
-   Please read the WARNING, DISCLAIMER and PATENTS sections in the  
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-/* CHANGES 
-    0.9.0    -- original version. 
-    0.9.0a/b -- no changes in this file. 
-    0.9.0c   -- changed setting of nGroups in sendMTFValues()  
-                so as to do a bit better on small files 
-*/ 
- 
-#include "bzlib_private.h" 
- 
- 
-/*---------------------------------------------------*/ 
-/*--- Bit stream I/O                              ---*/ 
-/*---------------------------------------------------*/ 
- 
-/*---------------------------------------------------*/ 
-void BZ2_bsInitWrite ( EState* s ) 
-{ 
-   s->bsLive = 0; 
-   s->bsBuff = 0; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void bsFinishWrite ( EState* s ) 
-{ 
-   while (s->bsLive > 0) { 
-      s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); 
-      s->numZ++; 
-      s->bsBuff <<= 8; 
-      s->bsLive -= 8; 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-#define bsNEEDW(nz)                           \ 
-{                                             \ 
-   while (s->bsLive >= 8) {                   \ 
-      s->zbits[s->numZ]                       \ 
-         = (UChar)(s->bsBuff >> 24);          \ 
-      s->numZ++;                              \ 
-      s->bsBuff <<= 8;                        \ 
-      s->bsLive -= 8;                         \ 
-   }                                          \ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-__inline__ 
-void bsW ( EState* s, Int32 n, UInt32 v ) 
-{ 
-   bsNEEDW ( n ); 
-   s->bsBuff |= (v << (32 - s->bsLive - n)); 
-   s->bsLive += n; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void bsPutUInt32 ( EState* s, UInt32 u ) 
-{ 
-   bsW ( s, 8, (u >> 24) & 0xffL ); 
-   bsW ( s, 8, (u >> 16) & 0xffL ); 
-   bsW ( s, 8, (u >>  8) & 0xffL ); 
-   bsW ( s, 8,  u        & 0xffL ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void bsPutUChar ( EState* s, UChar c ) 
-{ 
-   bsW( s, 8, (UInt32)c ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-/*--- The back end proper                         ---*/ 
-/*---------------------------------------------------*/ 
- 
-/*---------------------------------------------------*/ 
-static 
-void makeMaps_e ( EState* s ) 
-{ 
-   Int32 i; 
-   s->nInUse = 0; 
-   for (i = 0; i < 256; i++) 
-      if (s->inUse[i]) { 
-         s->unseqToSeq[i] = s->nInUse; 
-         s->nInUse++; 
-      } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void generateMTFValues ( EState* s ) 
-{ 
-   UChar   yy[256]; 
-   Int32   i, j; 
-   Int32   zPend; 
-   Int32   wr; 
-   Int32   EOB; 
- 
-   /*  
-      After sorting (eg, here), 
-         s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, 
-         and 
-         ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]  
-         holds the original block data. 
- 
-      The first thing to do is generate the MTF values, 
-      and put them in 
-         ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. 
-      Because there are strictly fewer or equal MTF values 
-      than block values, ptr values in this area are overwritten 
-      with MTF values only when they are no longer needed. 
- 
-      The final compressed bitstream is generated into the 
-      area starting at 
-         (UChar*) (&((UChar*)s->arr2)[s->nblock]) 
- 
-      These storage aliases are set up in bzCompressInit(), 
-      except for the last one, which is arranged in  
-      compressBlock(). 
-   */ 
-   UInt32* ptr   = s->ptr; 
-   UChar* block  = s->block; 
-   UInt16* mtfv  = s->mtfv; 
- 
-   makeMaps_e ( s ); 
-   EOB = s->nInUse+1; 
- 
-   for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; 
- 
-   wr = 0; 
-   zPend = 0; 
-   for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; 
- 
-   for (i = 0; i < s->nblock; i++) { 
-      UChar ll_i; 
-      AssertD ( wr <= i, "generateMTFValues(1)" ); 
-      j = ptr[i]-1; if (j < 0) j += s->nblock; 
-      ll_i = s->unseqToSeq[block[j]]; 
-      AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 
- 
-      if (yy[0] == ll_i) {  
-         zPend++; 
-      } else { 
- 
-         if (zPend > 0) { 
-            zPend--; 
-            while (True) { 
-               if (zPend & 1) { 
-                  mtfv[wr] = BZ_RUNB; wr++;  
-                  s->mtfFreq[BZ_RUNB]++;  
-               } else { 
-                  mtfv[wr] = BZ_RUNA; wr++;  
-                  s->mtfFreq[BZ_RUNA]++;  
-               } 
-               if (zPend < 2) break; 
-               zPend = (zPend - 2) / 2; 
-            }; 
-            zPend = 0; 
-         } 
-         { 
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+/* CHANGES
+    0.9.0    -- original version.
+    0.9.0a/b -- no changes in this file.
+    0.9.0c   -- changed setting of nGroups in sendMTFValues() 
+                so as to do a bit better on small files
+*/
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+/*--- Bit stream I/O                              ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+void BZ2_bsInitWrite ( EState* s )
+{
+   s->bsLive = 0;
+   s->bsBuff = 0;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsFinishWrite ( EState* s )
+{
+   while (s->bsLive > 0) {
+      s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+      s->numZ++;
+      s->bsBuff <<= 8;
+      s->bsLive -= 8;
+   }
+}
+
+
+/*---------------------------------------------------*/
+#define bsNEEDW(nz)                           \
+{                                             \
+   while (s->bsLive >= 8) {                   \
+      s->zbits[s->numZ]                       \
+         = (UChar)(s->bsBuff >> 24);          \
+      s->numZ++;                              \
+      s->bsBuff <<= 8;                        \
+      s->bsLive -= 8;                         \
+   }                                          \
+}
+
+
+/*---------------------------------------------------*/
+static
+__inline__
+void bsW ( EState* s, Int32 n, UInt32 v )
+{
+   bsNEEDW ( n );
+   s->bsBuff |= (v << (32 - s->bsLive - n));
+   s->bsLive += n;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUInt32 ( EState* s, UInt32 u )
+{
+   bsW ( s, 8, (u >> 24) & 0xffL );
+   bsW ( s, 8, (u >> 16) & 0xffL );
+   bsW ( s, 8, (u >>  8) & 0xffL );
+   bsW ( s, 8,  u        & 0xffL );
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUChar ( EState* s, UChar c )
+{
+   bsW( s, 8, (UInt32)c );
+}
+
+
+/*---------------------------------------------------*/
+/*--- The back end proper                         ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+static
+void makeMaps_e ( EState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->unseqToSeq[i] = s->nInUse;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+static
+void generateMTFValues ( EState* s )
+{
+   UChar   yy[256];
+   Int32   i, j;
+   Int32   zPend;
+   Int32   wr;
+   Int32   EOB;
+
+   /* 
+      After sorting (eg, here),
+         s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
+         and
+         ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 
+         holds the original block data.
+
+      The first thing to do is generate the MTF values,
+      and put them in
+         ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
+      Because there are strictly fewer or equal MTF values
+      than block values, ptr values in this area are overwritten
+      with MTF values only when they are no longer needed.
+
+      The final compressed bitstream is generated into the
+      area starting at
+         (UChar*) (&((UChar*)s->arr2)[s->nblock])
+
+      These storage aliases are set up in bzCompressInit(),
+      except for the last one, which is arranged in 
+      compressBlock().
+   */
+   UInt32* ptr   = s->ptr;
+   UChar* block  = s->block;
+   UInt16* mtfv  = s->mtfv;
+
+   makeMaps_e ( s );
+   EOB = s->nInUse+1;
+
+   for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
+
+   wr = 0;
+   zPend = 0;
+   for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
+
+   for (i = 0; i < s->nblock; i++) {
+      UChar ll_i;
+      AssertD ( wr <= i, "generateMTFValues(1)" );
+      j = ptr[i]-1; if (j < 0) j += s->nblock;
+      ll_i = s->unseqToSeq[block[j]];
+      AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
+
+      if (yy[0] == ll_i) { 
+         zPend++;
+      } else {
+
+         if (zPend > 0) {
+            zPend--;
+            while (True) {
+               if (zPend & 1) {
+                  mtfv[wr] = BZ_RUNB; wr++; 
+                  s->mtfFreq[BZ_RUNB]++; 
+               } else {
+                  mtfv[wr] = BZ_RUNA; wr++; 
+                  s->mtfFreq[BZ_RUNA]++; 
+               }
+               if (zPend < 2) break;
+               zPend = (zPend - 2) / 2;
+            };
+            zPend = 0;
+         }
+         {
             register UChar  rtmp;
             register UChar* ryy_j;
             register UChar  rll_i;
-            rtmp  = yy[1]; 
-            yy[1] = yy[0]; 
-            ryy_j = &(yy[1]); 
-            rll_i = ll_i; 
-            while ( rll_i != rtmp ) { 
+            rtmp  = yy[1];
+            yy[1] = yy[0];
+            ryy_j = &(yy[1]);
+            rll_i = ll_i;
+            while ( rll_i != rtmp ) {
                register UChar rtmp2;
-               ryy_j++; 
-               rtmp2  = rtmp; 
-               rtmp   = *ryy_j; 
-               *ryy_j = rtmp2; 
-            }; 
-            yy[0] = rtmp; 
-            j = ryy_j - &(yy[0]); 
-            mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; 
-         } 
- 
-      } 
-   } 
- 
-   if (zPend > 0) { 
-      zPend--; 
-      while (True) { 
-         if (zPend & 1) { 
-            mtfv[wr] = BZ_RUNB; wr++;  
-            s->mtfFreq[BZ_RUNB]++;  
-         } else { 
-            mtfv[wr] = BZ_RUNA; wr++;  
-            s->mtfFreq[BZ_RUNA]++;  
-         } 
-         if (zPend < 2) break; 
-         zPend = (zPend - 2) / 2; 
-      }; 
-      zPend = 0; 
-   } 
- 
-   mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; 
- 
-   s->nMTF = wr; 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-#define BZ_LESSER_ICOST  0 
-#define BZ_GREATER_ICOST 15 
- 
-static 
-void sendMTFValues ( EState* s ) 
-{ 
-   Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 
-   Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 
-   Int32 nGroups, nBytes; 
- 
-   /*-- 
-   UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-   is a global since the decoder also needs it. 
- 
-   Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-   Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 
-   are also globals only used in this proc. 
-   Made global to keep stack frame size small. 
-   --*/ 
- 
- 
-   UInt16 cost[BZ_N_GROUPS]; 
-   Int32  fave[BZ_N_GROUPS]; 
- 
-   UInt16* mtfv = s->mtfv; 
- 
-   if (s->verbosity >= 3) 
-      VPrintf3( "      %d in block, %d after MTF & 1-2 coding, " 
-                "%d+2 syms in use\n",  
-                s->nblock, s->nMTF, s->nInUse ); 
- 
-   alphaSize = s->nInUse+2; 
-   for (t = 0; t < BZ_N_GROUPS; t++) 
-      for (v = 0; v < alphaSize; v++) 
-         s->len[t][v] = BZ_GREATER_ICOST; 
- 
-   /*--- Decide how many coding tables to use ---*/ 
-   AssertH ( s->nMTF > 0, 3001 ); 
-   if (s->nMTF < 200)  nGroups = 2; else 
-   if (s->nMTF < 600)  nGroups = 3; else 
-   if (s->nMTF < 1200) nGroups = 4; else 
-   if (s->nMTF < 2400) nGroups = 5; else 
-                       nGroups = 6; 
- 
-   /*--- Generate an initial set of coding tables ---*/ 
-   {  
-      Int32 nPart, remF, tFreq, aFreq; 
- 
-      nPart = nGroups; 
-      remF  = s->nMTF; 
-      gs = 0; 
-      while (nPart > 0) { 
-         tFreq = remF / nPart; 
-         ge = gs-1; 
-         aFreq = 0; 
-         while (aFreq < tFreq && ge < alphaSize-1) { 
-            ge++; 
-            aFreq += s->mtfFreq[ge]; 
-         } 
- 
-         if (ge > gs  
-             && nPart != nGroups && nPart != 1  
-             && ((nGroups-nPart) % 2 == 1)) { 
-            aFreq -= s->mtfFreq[ge]; 
-            ge--; 
-         } 
- 
-         if (s->verbosity >= 3) 
-            VPrintf5( "      initial group %d, [%d .. %d], " 
-                      "has %d syms (%4.1f%%)\n", 
-                      nPart, gs, ge, aFreq,  
-                      (100.0 * (float)aFreq) / (float)(s->nMTF) ); 
-  
-         for (v = 0; v < alphaSize; v++) 
-            if (v >= gs && v <= ge)  
-               s->len[nPart-1][v] = BZ_LESSER_ICOST; else 
-               s->len[nPart-1][v] = BZ_GREATER_ICOST; 
-  
-         nPart--; 
-         gs = ge+1; 
-         remF -= aFreq; 
-      } 
-   } 
- 
-   /*---  
-      Iterate up to BZ_N_ITERS times to improve the tables. 
-   ---*/ 
-   for (iter = 0; iter < BZ_N_ITERS; iter++) { 
- 
-      for (t = 0; t < nGroups; t++) fave[t] = 0; 
- 
-      for (t = 0; t < nGroups; t++) 
-         for (v = 0; v < alphaSize; v++) 
-            s->rfreq[t][v] = 0; 
- 
-      /*--- 
-        Set up an auxiliary length table which is used to fast-track 
-	the common case (nGroups == 6).  
-      ---*/ 
-      if (nGroups == 6) { 
-         for (v = 0; v < alphaSize; v++) { 
-            s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; 
-            s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; 
-            s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; 
-	 } 
-      } 
- 
-      nSelectors = 0; 
-      totc = 0; 
-      gs = 0; 
-      while (True) { 
- 
-         /*--- Set group start & end marks. --*/ 
-         if (gs >= s->nMTF) break; 
-         ge = gs + BZ_G_SIZE - 1;  
-         if (ge >= s->nMTF) ge = s->nMTF-1; 
- 
-         /*--  
-            Calculate the cost of this group as coded 
-            by each of the coding tables. 
-         --*/ 
-         for (t = 0; t < nGroups; t++) cost[t] = 0; 
- 
-         if (nGroups == 6 && 50 == ge-gs+1) { 
-            /*--- fast track the common case ---*/ 
+               ryy_j++;
+               rtmp2  = rtmp;
+               rtmp   = *ryy_j;
+               *ryy_j = rtmp2;
+            };
+            yy[0] = rtmp;
+            j = ryy_j - &(yy[0]);
+            mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+         }
+
+      }
+   }
+
+   if (zPend > 0) {
+      zPend--;
+      while (True) {
+         if (zPend & 1) {
+            mtfv[wr] = BZ_RUNB; wr++; 
+            s->mtfFreq[BZ_RUNB]++; 
+         } else {
+            mtfv[wr] = BZ_RUNA; wr++; 
+            s->mtfFreq[BZ_RUNA]++; 
+         }
+         if (zPend < 2) break;
+         zPend = (zPend - 2) / 2;
+      };
+      zPend = 0;
+   }
+
+   mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
+
+   s->nMTF = wr;
+}
+
+
+/*---------------------------------------------------*/
+#define BZ_LESSER_ICOST  0
+#define BZ_GREATER_ICOST 15
+
+static
+void sendMTFValues ( EState* s )
+{
+   Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
+   Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
+   Int32 nGroups, nBytes;
+
+   /*--
+   UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   is a global since the decoder also needs it.
+
+   Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   are also globals only used in this proc.
+   Made global to keep stack frame size small.
+   --*/
+
+
+   UInt16 cost[BZ_N_GROUPS];
+   Int32  fave[BZ_N_GROUPS];
+
+   UInt16* mtfv = s->mtfv;
+
+   if (s->verbosity >= 3)
+      VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
+                "%d+2 syms in use\n", 
+                s->nblock, s->nMTF, s->nInUse );
+
+   alphaSize = s->nInUse+2;
+   for (t = 0; t < BZ_N_GROUPS; t++)
+      for (v = 0; v < alphaSize; v++)
+         s->len[t][v] = BZ_GREATER_ICOST;
+
+   /*--- Decide how many coding tables to use ---*/
+   AssertH ( s->nMTF > 0, 3001 );
+   if (s->nMTF < 200)  nGroups = 2; else
+   if (s->nMTF < 600)  nGroups = 3; else
+   if (s->nMTF < 1200) nGroups = 4; else
+   if (s->nMTF < 2400) nGroups = 5; else
+                       nGroups = 6;
+
+   /*--- Generate an initial set of coding tables ---*/
+   { 
+      Int32 nPart, remF, tFreq, aFreq;
+
+      nPart = nGroups;
+      remF  = s->nMTF;
+      gs = 0;
+      while (nPart > 0) {
+         tFreq = remF / nPart;
+         ge = gs-1;
+         aFreq = 0;
+         while (aFreq < tFreq && ge < alphaSize-1) {
+            ge++;
+            aFreq += s->mtfFreq[ge];
+         }
+
+         if (ge > gs 
+             && nPart != nGroups && nPart != 1 
+             && ((nGroups-nPart) % 2 == 1)) {
+            aFreq -= s->mtfFreq[ge];
+            ge--;
+         }
+
+         if (s->verbosity >= 3)
+            VPrintf5( "      initial group %d, [%d .. %d], "
+                      "has %d syms (%4.1f%%)\n",
+                      nPart, gs, ge, aFreq, 
+                      (100.0 * (float)aFreq) / (float)(s->nMTF) );
+ 
+         for (v = 0; v < alphaSize; v++)
+            if (v >= gs && v <= ge) 
+               s->len[nPart-1][v] = BZ_LESSER_ICOST; else
+               s->len[nPart-1][v] = BZ_GREATER_ICOST;
+ 
+         nPart--;
+         gs = ge+1;
+         remF -= aFreq;
+      }
+   }
+
+   /*--- 
+      Iterate up to BZ_N_ITERS times to improve the tables.
+   ---*/
+   for (iter = 0; iter < BZ_N_ITERS; iter++) {
+
+      for (t = 0; t < nGroups; t++) fave[t] = 0;
+
+      for (t = 0; t < nGroups; t++)
+         for (v = 0; v < alphaSize; v++)
+            s->rfreq[t][v] = 0;
+
+      /*---
+        Set up an auxiliary length table which is used to fast-track
+	the common case (nGroups == 6). 
+      ---*/
+      if (nGroups == 6) {
+         for (v = 0; v < alphaSize; v++) {
+            s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+            s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+            s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+	 }
+      }
+
+      nSelectors = 0;
+      totc = 0;
+      gs = 0;
+      while (True) {
+
+         /*--- Set group start & end marks. --*/
+         if (gs >= s->nMTF) break;
+         ge = gs + BZ_G_SIZE - 1; 
+         if (ge >= s->nMTF) ge = s->nMTF-1;
+
+         /*-- 
+            Calculate the cost of this group as coded
+            by each of the coding tables.
+         --*/
+         for (t = 0; t < nGroups; t++) cost[t] = 0;
+
+         if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
             register UInt32 cost01, cost23, cost45;
             register UInt16 icv;
-            cost01 = cost23 = cost45 = 0; 
- 
-#           define BZ_ITER(nn)                \ 
-               icv = mtfv[gs+(nn)];           \ 
-               cost01 += s->len_pack[icv][0]; \ 
-               cost23 += s->len_pack[icv][1]; \ 
-               cost45 += s->len_pack[icv][2]; \ 
- 
-            BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4); 
-            BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9); 
-            BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); 
-            BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); 
-            BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); 
-            BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); 
-            BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); 
-            BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); 
-            BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); 
-            BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); 
- 
-#           undef BZ_ITER 
- 
-            cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; 
-            cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; 
-            cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; 
- 
-         } else { 
-	    /*--- slow version which correctly handles all situations ---*/ 
-            for (i = gs; i <= ge; i++) {  
-               UInt16 icv = mtfv[i]; 
-               for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 
-            } 
-         } 
-  
-         /*--  
-            Find the coding table which is best for this group, 
-            and record its identity in the selector table. 
-         --*/ 
-         bc = 999999999; bt = -1; 
-         for (t = 0; t < nGroups; t++) 
-            if (cost[t] < bc) { bc = cost[t]; bt = t; }; 
-         totc += bc; 
-         fave[bt]++; 
-         s->selector[nSelectors] = bt; 
-         nSelectors++; 
- 
-         /*--  
-            Increment the symbol frequencies for the selected table. 
-          --*/ 
-         if (nGroups == 6 && 50 == ge-gs+1) { 
-            /*--- fast track the common case ---*/ 
- 
-#           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ 
- 
-            BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4); 
-            BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9); 
-            BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); 
-            BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); 
-            BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); 
-            BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); 
-            BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); 
-            BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); 
-            BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); 
-            BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); 
- 
-#           undef BZ_ITUR 
- 
-         } else { 
-	    /*--- slow version which correctly handles all situations ---*/ 
-            for (i = gs; i <= ge; i++) 
-               s->rfreq[bt][ mtfv[i] ]++; 
-         } 
- 
-         gs = ge+1; 
-      } 
-      if (s->verbosity >= 3) { 
-         VPrintf2 ( "      pass %d: size is %d, grp uses are ",  
-                   iter+1, totc/8 ); 
-         for (t = 0; t < nGroups; t++) 
-            VPrintf1 ( "%d ", fave[t] ); 
-         VPrintf0 ( "\n" ); 
-      } 
- 
-      /*-- 
-        Recompute the tables based on the accumulated frequencies. 
-      --*/ 
-      /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See  
-         comment in huffman.c for details. */ 
-      for (t = 0; t < nGroups; t++) 
-         BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),  
-                                 alphaSize, 17 /*20*/ ); 
-   } 
- 
- 
-   AssertH( nGroups < 8, 3002 ); 
-   AssertH( nSelectors < 32768 && 
+            cost01 = cost23 = cost45 = 0;
+
+#           define BZ_ITER(nn)                \
+               icv = mtfv[gs+(nn)];           \
+               cost01 += s->len_pack[icv][0]; \
+               cost23 += s->len_pack[icv][1]; \
+               cost45 += s->len_pack[icv][2]; \
+
+            BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
+            BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
+            BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+            BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+            BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+            BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+            BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+            BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+            BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+            BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+#           undef BZ_ITER
+
+            cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+            cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+            cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
+         } else {
+	    /*--- slow version which correctly handles all situations ---*/
+            for (i = gs; i <= ge; i++) { 
+               UInt16 icv = mtfv[i];
+               for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
+            }
+         }
+ 
+         /*-- 
+            Find the coding table which is best for this group,
+            and record its identity in the selector table.
+         --*/
+         bc = 999999999; bt = -1;
+         for (t = 0; t < nGroups; t++)
+            if (cost[t] < bc) { bc = cost[t]; bt = t; };
+         totc += bc;
+         fave[bt]++;
+         s->selector[nSelectors] = bt;
+         nSelectors++;
+
+         /*-- 
+            Increment the symbol frequencies for the selected table.
+          --*/
+         if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+
+#           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+            BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
+            BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
+            BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+            BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+            BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+            BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+            BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+            BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+            BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+            BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+#           undef BZ_ITUR
+
+         } else {
+	    /*--- slow version which correctly handles all situations ---*/
+            for (i = gs; i <= ge; i++)
+               s->rfreq[bt][ mtfv[i] ]++;
+         }
+
+         gs = ge+1;
+      }
+      if (s->verbosity >= 3) {
+         VPrintf2 ( "      pass %d: size is %d, grp uses are ", 
+                   iter+1, totc/8 );
+         for (t = 0; t < nGroups; t++)
+            VPrintf1 ( "%d ", fave[t] );
+         VPrintf0 ( "\n" );
+      }
+
+      /*--
+        Recompute the tables based on the accumulated frequencies.
+      --*/
+      /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See 
+         comment in huffman.c for details. */
+      for (t = 0; t < nGroups; t++)
+         BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 
+                                 alphaSize, 17 /*20*/ );
+   }
+
+
+   AssertH( nGroups < 8, 3002 );
+   AssertH( nSelectors < 32768 &&
             nSelectors <= BZ_MAX_SELECTORS,
-            3003 ); 
- 
- 
-   /*--- Compute MTF values for the selectors. ---*/ 
+            3003 );
+
+
+   /*--- Compute MTF values for the selectors. ---*/
+   {
+      UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
+      for (i = 0; i < nGroups; i++) pos[i] = i;
+      for (i = 0; i < nSelectors; i++) {
+         ll_i = s->selector[i];
+         j = 0;
+         tmp = pos[j];
+         while ( ll_i != tmp ) {
+            j++;
+            tmp2 = tmp;
+            tmp = pos[j];
+            pos[j] = tmp2;
+         };
+         pos[0] = tmp;
+         s->selectorMtf[i] = j;
+      }
+   };
+
+   /*--- Assign actual codes for the tables. --*/
+   for (t = 0; t < nGroups; t++) {
+      minLen = 32;
+      maxLen = 0;
+      for (i = 0; i < alphaSize; i++) {
+         if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+         if (s->len[t][i] < minLen) minLen = s->len[t][i];
+      }
+      AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
+      AssertH ( !(minLen < 1),  3005 );
+      BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 
+                          minLen, maxLen, alphaSize );
+   }
+
+   /*--- Transmit the mapping table. ---*/
    { 
-      UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; 
-      for (i = 0; i < nGroups; i++) pos[i] = i; 
-      for (i = 0; i < nSelectors; i++) { 
-         ll_i = s->selector[i]; 
-         j = 0; 
-         tmp = pos[j]; 
-         while ( ll_i != tmp ) { 
-            j++; 
-            tmp2 = tmp; 
-            tmp = pos[j]; 
-            pos[j] = tmp2; 
-         }; 
-         pos[0] = tmp; 
-         s->selectorMtf[i] = j; 
-      } 
-   }; 
- 
-   /*--- Assign actual codes for the tables. --*/ 
-   for (t = 0; t < nGroups; t++) { 
-      minLen = 32; 
-      maxLen = 0; 
-      for (i = 0; i < alphaSize; i++) { 
-         if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 
-         if (s->len[t][i] < minLen) minLen = s->len[t][i]; 
-      } 
-      AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); 
-      AssertH ( !(minLen < 1),  3005 ); 
-      BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),  
-                          minLen, maxLen, alphaSize ); 
-   } 
- 
-   /*--- Transmit the mapping table. ---*/ 
-   {  
-      Bool inUse16[16]; 
-      for (i = 0; i < 16; i++) { 
-          inUse16[i] = False; 
-          for (j = 0; j < 16; j++) 
-             if (s->inUse[i * 16 + j]) inUse16[i] = True; 
-      } 
-      
-      nBytes = s->numZ; 
-      for (i = 0; i < 16; i++) 
-         if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); 
- 
-      for (i = 0; i < 16; i++) 
-         if (inUse16[i]) 
-            for (j = 0; j < 16; j++) { 
-               if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); 
-            } 
- 
-      if (s->verbosity >= 3)  
-         VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes ); 
-   } 
- 
-   /*--- Now the selectors. ---*/ 
-   nBytes = s->numZ; 
-   bsW ( s, 3, nGroups ); 
-   bsW ( s, 15, nSelectors ); 
-   for (i = 0; i < nSelectors; i++) {  
-      for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); 
-      bsW(s,1,0); 
-   } 
-   if (s->verbosity >= 3) 
-      VPrintf1( "selectors %d, ", s->numZ-nBytes ); 
- 
-   /*--- Now the coding tables. ---*/ 
-   nBytes = s->numZ; 
- 
-   for (t = 0; t < nGroups; t++) { 
-      Int32 curr = s->len[t][0]; 
-      bsW ( s, 5, curr ); 
-      for (i = 0; i < alphaSize; i++) { 
-         while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; 
-         while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; 
-         bsW ( s, 1, 0 ); 
-      } 
-   } 
- 
-   if (s->verbosity >= 3) 
-      VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); 
- 
-   /*--- And finally, the block data proper ---*/ 
-   nBytes = s->numZ; 
-   selCtr = 0; 
-   gs = 0; 
-   while (True) { 
-      if (gs >= s->nMTF) break; 
-      ge = gs + BZ_G_SIZE - 1;  
-      if (ge >= s->nMTF) ge = s->nMTF-1; 
-      AssertH ( s->selector[selCtr] < nGroups, 3006 ); 
- 
-      if (nGroups == 6 && 50 == ge-gs+1) { 
-            /*--- fast track the common case ---*/ 
-            UInt16 mtfv_i; 
-            UChar* s_len_sel_selCtr  
-               = &(s->len[s->selector[selCtr]][0]); 
-            Int32* s_code_sel_selCtr 
-               = &(s->code[s->selector[selCtr]][0]); 
- 
-#           define BZ_ITAH(nn)                      \ 
-               mtfv_i = mtfv[gs+(nn)];              \ 
-               bsW ( s,                             \ 
-                     s_len_sel_selCtr[mtfv_i],      \ 
-                     s_code_sel_selCtr[mtfv_i] ) 
- 
-            BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4); 
-            BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9); 
-            BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); 
-            BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); 
-            BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); 
-            BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); 
-            BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); 
-            BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); 
-            BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); 
-            BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); 
- 
-#           undef BZ_ITAH 
- 
-      } else { 
-	 /*--- slow version which correctly handles all situations ---*/ 
-         for (i = gs; i <= ge; i++) { 
-            bsW ( s,  
-                  s->len  [s->selector[selCtr]] [mtfv[i]], 
-                  s->code [s->selector[selCtr]] [mtfv[i]] ); 
-         } 
-      } 
- 
- 
-      gs = ge+1; 
-      selCtr++; 
-   } 
-   AssertH( selCtr == nSelectors, 3007 ); 
- 
-   if (s->verbosity >= 3) 
-      VPrintf1( "codes %d\n", s->numZ-nBytes ); 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ2_compressBlock ( EState* s, Bool is_last_block ) 
-{ 
-   if (s->nblock > 0) { 
- 
-      BZ_FINALISE_CRC ( s->blockCRC ); 
-      s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); 
-      s->combinedCRC ^= s->blockCRC; 
-      if (s->blockNo > 1) s->numZ = 0; 
- 
-      if (s->verbosity >= 2) 
-         VPrintf4( "    block %d: crc = 0x%08x, " 
-                   "combined CRC = 0x%08x, size = %d\n", 
-                   s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); 
- 
-      BZ2_blockSort ( s ); 
-   } 
- 
-   s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); 
- 
-   /*-- If this is the first block, create the stream header. --*/ 
-   if (s->blockNo == 1) { 
-      BZ2_bsInitWrite ( s ); 
-      bsPutUChar ( s, BZ_HDR_B ); 
-      bsPutUChar ( s, BZ_HDR_Z ); 
-      bsPutUChar ( s, BZ_HDR_h ); 
-      bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); 
-   } 
- 
-   if (s->nblock > 0) { 
- 
-      bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); 
-      bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); 
-      bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); 
- 
-      /*-- Now the block's CRC, so it is in a known place. --*/ 
-      bsPutUInt32 ( s, s->blockCRC ); 
- 
-      /*--  
-         Now a single bit indicating (non-)randomisation.  
-         As of version 0.9.5, we use a better sorting algorithm 
-         which makes randomisation unnecessary.  So always set 
-         the randomised bit to 'no'.  Of course, the decoder 
-         still needs to be able to handle randomised blocks 
-         so as to maintain backwards compatibility with 
-         older versions of bzip2. 
-      --*/ 
-      bsW(s,1,0); 
- 
-      bsW ( s, 24, s->origPtr ); 
-      generateMTFValues ( s ); 
-      sendMTFValues ( s ); 
-   } 
- 
- 
-   /*-- If this is the last block, add the stream trailer. --*/ 
-   if (is_last_block) { 
- 
-      bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 
-      bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 
-      bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 
-      bsPutUInt32 ( s, s->combinedCRC ); 
-      if (s->verbosity >= 2) 
-         VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC ); 
-      bsFinishWrite ( s ); 
-   } 
-} 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                        compress.c ---*/ 
-/*-------------------------------------------------------------*/ 
+      Bool inUse16[16];
+      for (i = 0; i < 16; i++) {
+          inUse16[i] = False;
+          for (j = 0; j < 16; j++)
+             if (s->inUse[i * 16 + j]) inUse16[i] = True;
+      }
+     
+      nBytes = s->numZ;
+      for (i = 0; i < 16; i++)
+         if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
+
+      for (i = 0; i < 16; i++)
+         if (inUse16[i])
+            for (j = 0; j < 16; j++) {
+               if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
+            }
+
+      if (s->verbosity >= 3) 
+         VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
+   }
+
+   /*--- Now the selectors. ---*/
+   nBytes = s->numZ;
+   bsW ( s, 3, nGroups );
+   bsW ( s, 15, nSelectors );
+   for (i = 0; i < nSelectors; i++) { 
+      for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
+      bsW(s,1,0);
+   }
+   if (s->verbosity >= 3)
+      VPrintf1( "selectors %d, ", s->numZ-nBytes );
+
+   /*--- Now the coding tables. ---*/
+   nBytes = s->numZ;
+
+   for (t = 0; t < nGroups; t++) {
+      Int32 curr = s->len[t][0];
+      bsW ( s, 5, curr );
+      for (i = 0; i < alphaSize; i++) {
+         while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
+         while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
+         bsW ( s, 1, 0 );
+      }
+   }
+
+   if (s->verbosity >= 3)
+      VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
+
+   /*--- And finally, the block data proper ---*/
+   nBytes = s->numZ;
+   selCtr = 0;
+   gs = 0;
+   while (True) {
+      if (gs >= s->nMTF) break;
+      ge = gs + BZ_G_SIZE - 1; 
+      if (ge >= s->nMTF) ge = s->nMTF-1;
+      AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+      if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+            UInt16 mtfv_i;
+            UChar* s_len_sel_selCtr 
+               = &(s->len[s->selector[selCtr]][0]);
+            Int32* s_code_sel_selCtr
+               = &(s->code[s->selector[selCtr]][0]);
+
+#           define BZ_ITAH(nn)                      \
+               mtfv_i = mtfv[gs+(nn)];              \
+               bsW ( s,                             \
+                     s_len_sel_selCtr[mtfv_i],      \
+                     s_code_sel_selCtr[mtfv_i] )
+
+            BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
+            BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
+            BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+            BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+            BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+            BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+            BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+            BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+            BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+            BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+#           undef BZ_ITAH
+
+      } else {
+	 /*--- slow version which correctly handles all situations ---*/
+         for (i = gs; i <= ge; i++) {
+            bsW ( s, 
+                  s->len  [s->selector[selCtr]] [mtfv[i]],
+                  s->code [s->selector[selCtr]] [mtfv[i]] );
+         }
+      }
+
+
+      gs = ge+1;
+      selCtr++;
+   }
+   AssertH( selCtr == nSelectors, 3007 );
+
+   if (s->verbosity >= 3)
+      VPrintf1( "codes %d\n", s->numZ-nBytes );
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
+{
+   if (s->nblock > 0) {
+
+      BZ_FINALISE_CRC ( s->blockCRC );
+      s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
+      s->combinedCRC ^= s->blockCRC;
+      if (s->blockNo > 1) s->numZ = 0;
+
+      if (s->verbosity >= 2)
+         VPrintf4( "    block %d: crc = 0x%08x, "
+                   "combined CRC = 0x%08x, size = %d\n",
+                   s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
+
+      BZ2_blockSort ( s );
+   }
+
+   s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
+
+   /*-- If this is the first block, create the stream header. --*/
+   if (s->blockNo == 1) {
+      BZ2_bsInitWrite ( s );
+      bsPutUChar ( s, BZ_HDR_B );
+      bsPutUChar ( s, BZ_HDR_Z );
+      bsPutUChar ( s, BZ_HDR_h );
+      bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
+   }
+
+   if (s->nblock > 0) {
+
+      bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
+      bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
+      bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
+
+      /*-- Now the block's CRC, so it is in a known place. --*/
+      bsPutUInt32 ( s, s->blockCRC );
+
+      /*-- 
+         Now a single bit indicating (non-)randomisation. 
+         As of version 0.9.5, we use a better sorting algorithm
+         which makes randomisation unnecessary.  So always set
+         the randomised bit to 'no'.  Of course, the decoder
+         still needs to be able to handle randomised blocks
+         so as to maintain backwards compatibility with
+         older versions of bzip2.
+      --*/
+      bsW(s,1,0);
+
+      bsW ( s, 24, s->origPtr );
+      generateMTFValues ( s );
+      sendMTFValues ( s );
+   }
+
+
+   /*-- If this is the last block, add the stream trailer. --*/
+   if (is_last_block) {
+
+      bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
+      bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
+      bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
+      bsPutUInt32 ( s, s->combinedCRC );
+      if (s->verbosity >= 2)
+         VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
+      bsFinishWrite ( s );
+   }
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                        compress.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/crctable.c b/contrib/libs/libbz2/crctable.c
index c961b19057..a9212dbf2c 100644
--- a/contrib/libs/libbz2/crctable.c
+++ b/contrib/libs/libbz2/crctable.c
@@ -1,104 +1,104 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Table for doing CRCs                                  ---*/ 
-/*---                                            crctable.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Table for doing CRCs                                  ---*/
+/*---                                            crctable.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
+
    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#include "bzlib_private.h" 
- 
-/*-- 
-  I think this is an implementation of the AUTODIN-II, 
-  Ethernet & FDDI 32-bit CRC standard.  Vaguely derived 
-  from code by Rob Warnock, in Section 51 of the 
-  comp.compression FAQ. 
---*/ 
- 
-const UInt32 BZ2_crc32Table[256] = { 
- 
-   /*-- Ugly, innit? --*/ 
- 
-   0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 
-   0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 
-   0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 
-   0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 
-   0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 
-   0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 
-   0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 
-   0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 
-   0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 
-   0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 
-   0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 
-   0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 
-   0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 
-   0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 
-   0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 
-   0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 
-   0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 
-   0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 
-   0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 
-   0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 
-   0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 
-   0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 
-   0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 
-   0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 
-   0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 
-   0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 
-   0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 
-   0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 
-   0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 
-   0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 
-   0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 
-   0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 
-   0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 
-   0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 
-   0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 
-   0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 
-   0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 
-   0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 
-   0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 
-   0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 
-   0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 
-   0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 
-   0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 
-   0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 
-   0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 
-   0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 
-   0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 
-   0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 
-   0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 
-   0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 
-   0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 
-   0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 
-   0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 
-   0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 
-   0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 
-   0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 
-   0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 
-   0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 
-   0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 
-   0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 
-   0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 
-   0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 
-   0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 
-   0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 
-}; 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                        crctable.c ---*/ 
-/*-------------------------------------------------------------*/ 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*--
+  I think this is an implementation of the AUTODIN-II,
+  Ethernet & FDDI 32-bit CRC standard.  Vaguely derived
+  from code by Rob Warnock, in Section 51 of the
+  comp.compression FAQ.
+--*/
+
+const UInt32 BZ2_crc32Table[256] = {
+
+   /*-- Ugly, innit? --*/
+
+   0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
+   0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
+   0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
+   0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
+   0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
+   0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
+   0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
+   0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
+   0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
+   0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
+   0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
+   0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
+   0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
+   0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
+   0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
+   0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
+   0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
+   0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
+   0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
+   0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
+   0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
+   0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
+   0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
+   0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
+   0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
+   0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
+   0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
+   0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
+   0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
+   0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
+   0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
+   0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
+   0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
+   0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
+   0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
+   0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
+   0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
+   0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
+   0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
+   0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
+   0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
+   0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
+   0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
+   0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
+   0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
+   0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
+   0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
+   0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
+   0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
+   0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
+   0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
+   0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
+   0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
+   0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
+   0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
+   0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
+   0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
+   0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
+   0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
+   0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
+   0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
+   0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
+   0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
+   0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                        crctable.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/decompress.c b/contrib/libs/libbz2/decompress.c
index 5f37c27e2f..a1a0bac892 100644
--- a/contrib/libs/libbz2/decompress.c
+++ b/contrib/libs/libbz2/decompress.c
@@ -1,392 +1,392 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Decompression machinery                               ---*/ 
-/*---                                          decompress.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Decompression machinery                               ---*/
+/*---                                          decompress.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
-   Please read the WARNING, DISCLAIMER and PATENTS sections in the  
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#include "bzlib_private.h" 
- 
- 
-/*---------------------------------------------------*/ 
-static 
-void makeMaps_d ( DState* s ) 
-{ 
-   Int32 i; 
-   s->nInUse = 0; 
-   for (i = 0; i < 256; i++) 
-      if (s->inUse[i]) { 
-         s->seqToUnseq[s->nInUse] = i; 
-         s->nInUse++; 
-      } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-#define RETURN(rrr)                               \ 
-   { retVal = rrr; goto save_state_and_return; }; 
- 
-#define GET_BITS(lll,vvv,nnn)                     \ 
-   case lll: s->state = lll;                      \ 
-   while (True) {                                 \ 
-      if (s->bsLive >= nnn) {                     \ 
-         UInt32 v;                                \ 
-         v = (s->bsBuff >>                        \ 
-             (s->bsLive-nnn)) & ((1 << nnn)-1);   \ 
-         s->bsLive -= nnn;                        \ 
-         vvv = v;                                 \ 
-         break;                                   \ 
-      }                                           \ 
-      if (s->strm->avail_in == 0) RETURN(BZ_OK);  \ 
-      s->bsBuff                                   \ 
-         = (s->bsBuff << 8) |                     \ 
-           ((UInt32)                              \ 
-              (*((UChar*)(s->strm->next_in))));   \ 
-      s->bsLive += 8;                             \ 
-      s->strm->next_in++;                         \ 
-      s->strm->avail_in--;                        \ 
-      s->strm->total_in_lo32++;                   \ 
-      if (s->strm->total_in_lo32 == 0)            \ 
-         s->strm->total_in_hi32++;                \ 
-   } 
- 
-#define GET_UCHAR(lll,uuu)                        \ 
-   GET_BITS(lll,uuu,8) 
- 
-#define GET_BIT(lll,uuu)                          \ 
-   GET_BITS(lll,uuu,1) 
- 
-/*---------------------------------------------------*/ 
-#define GET_MTF_VAL(label1,label2,lval)           \ 
-{                                                 \ 
-   if (groupPos == 0) {                           \ 
-      groupNo++;                                  \ 
-      if (groupNo >= nSelectors)                  \ 
-         RETURN(BZ_DATA_ERROR);                   \ 
-      groupPos = BZ_G_SIZE;                       \ 
-      gSel = s->selector[groupNo];                \ 
-      gMinlen = s->minLens[gSel];                 \ 
-      gLimit = &(s->limit[gSel][0]);              \ 
-      gPerm = &(s->perm[gSel][0]);                \ 
-      gBase = &(s->base[gSel][0]);                \ 
-   }                                              \ 
-   groupPos--;                                    \ 
-   zn = gMinlen;                                  \ 
-   GET_BITS(label1, zvec, zn);                    \ 
-   while (1) {                                    \ 
-      if (zn > 20 /* the longest code */)         \ 
-         RETURN(BZ_DATA_ERROR);                   \ 
-      if (zvec <= gLimit[zn]) break;              \ 
-      zn++;                                       \ 
-      GET_BIT(label2, zj);                        \ 
-      zvec = (zvec << 1) | zj;                    \ 
-   };                                             \ 
-   if (zvec - gBase[zn] < 0                       \ 
-       || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE)  \ 
-      RETURN(BZ_DATA_ERROR);                      \ 
-   lval = gPerm[zvec - gBase[zn]];                \ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-Int32 BZ2_decompress ( DState* s ) 
-{ 
-   UChar      uc; 
-   Int32      retVal; 
-   Int32      minLen, maxLen; 
-   bz_stream* strm = s->strm; 
- 
-   /* stuff that needs to be saved/restored */ 
-   Int32  i; 
-   Int32  j; 
-   Int32  t; 
-   Int32  alphaSize; 
-   Int32  nGroups; 
-   Int32  nSelectors; 
-   Int32  EOB; 
-   Int32  groupNo; 
-   Int32  groupPos; 
-   Int32  nextSym; 
-   Int32  nblockMAX; 
-   Int32  nblock; 
-   Int32  es; 
-   Int32  N; 
-   Int32  curr; 
-   Int32  zt; 
-   Int32  zn;  
-   Int32  zvec; 
-   Int32  zj; 
-   Int32  gSel; 
-   Int32  gMinlen; 
-   Int32* gLimit; 
-   Int32* gBase; 
-   Int32* gPerm; 
- 
-   if (s->state == BZ_X_MAGIC_1) { 
-      /*initialise the save area*/ 
-      s->save_i           = 0; 
-      s->save_j           = 0; 
-      s->save_t           = 0; 
-      s->save_alphaSize   = 0; 
-      s->save_nGroups     = 0; 
-      s->save_nSelectors  = 0; 
-      s->save_EOB         = 0; 
-      s->save_groupNo     = 0; 
-      s->save_groupPos    = 0; 
-      s->save_nextSym     = 0; 
-      s->save_nblockMAX   = 0; 
-      s->save_nblock      = 0; 
-      s->save_es          = 0; 
-      s->save_N           = 0; 
-      s->save_curr        = 0; 
-      s->save_zt          = 0; 
-      s->save_zn          = 0; 
-      s->save_zvec        = 0; 
-      s->save_zj          = 0; 
-      s->save_gSel        = 0; 
-      s->save_gMinlen     = 0; 
-      s->save_gLimit      = NULL; 
-      s->save_gBase       = NULL; 
-      s->save_gPerm       = NULL; 
-   } 
- 
-   /*restore from the save area*/ 
-   i           = s->save_i; 
-   j           = s->save_j; 
-   t           = s->save_t; 
-   alphaSize   = s->save_alphaSize; 
-   nGroups     = s->save_nGroups; 
-   nSelectors  = s->save_nSelectors; 
-   EOB         = s->save_EOB; 
-   groupNo     = s->save_groupNo; 
-   groupPos    = s->save_groupPos; 
-   nextSym     = s->save_nextSym; 
-   nblockMAX   = s->save_nblockMAX; 
-   nblock      = s->save_nblock; 
-   es          = s->save_es; 
-   N           = s->save_N; 
-   curr        = s->save_curr; 
-   zt          = s->save_zt; 
-   zn          = s->save_zn;  
-   zvec        = s->save_zvec; 
-   zj          = s->save_zj; 
-   gSel        = s->save_gSel; 
-   gMinlen     = s->save_gMinlen; 
-   gLimit      = s->save_gLimit; 
-   gBase       = s->save_gBase; 
-   gPerm       = s->save_gPerm; 
- 
-   retVal = BZ_OK; 
- 
-   switch (s->state) { 
- 
-      GET_UCHAR(BZ_X_MAGIC_1, uc); 
-      if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); 
- 
-      GET_UCHAR(BZ_X_MAGIC_2, uc); 
-      if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); 
- 
-      GET_UCHAR(BZ_X_MAGIC_3, uc) 
-      if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); 
- 
-      GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) 
-      if (s->blockSize100k < (BZ_HDR_0 + 1) ||  
-          s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); 
-      s->blockSize100k -= BZ_HDR_0; 
- 
-      if (s->smallDecompress) { 
-         s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); 
-         s->ll4  = BZALLOC(  
-                      ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)  
-                   ); 
-         if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); 
-      } else { 
-         s->tt  = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); 
-         if (s->tt == NULL) RETURN(BZ_MEM_ERROR); 
-      } 
- 
-      GET_UCHAR(BZ_X_BLKHDR_1, uc); 
- 
-      if (uc == 0x17) goto endhdr_2; 
-      if (uc != 0x31) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_BLKHDR_2, uc); 
-      if (uc != 0x41) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_BLKHDR_3, uc); 
-      if (uc != 0x59) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_BLKHDR_4, uc); 
-      if (uc != 0x26) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_BLKHDR_5, uc); 
-      if (uc != 0x53) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_BLKHDR_6, uc); 
-      if (uc != 0x59) RETURN(BZ_DATA_ERROR); 
- 
-      s->currBlockNo++; 
-      if (s->verbosity >= 2) 
-         VPrintf1 ( "\n    [%d: huff+mtf ", s->currBlockNo ); 
-  
-      s->storedBlockCRC = 0; 
-      GET_UCHAR(BZ_X_BCRC_1, uc); 
-      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 
-      GET_UCHAR(BZ_X_BCRC_2, uc); 
-      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 
-      GET_UCHAR(BZ_X_BCRC_3, uc); 
-      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 
-      GET_UCHAR(BZ_X_BCRC_4, uc); 
-      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 
- 
-      GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); 
- 
-      s->origPtr = 0; 
-      GET_UCHAR(BZ_X_ORIGPTR_1, uc); 
-      s->origPtr = (s->origPtr << 8) | ((Int32)uc); 
-      GET_UCHAR(BZ_X_ORIGPTR_2, uc); 
-      s->origPtr = (s->origPtr << 8) | ((Int32)uc); 
-      GET_UCHAR(BZ_X_ORIGPTR_3, uc); 
-      s->origPtr = (s->origPtr << 8) | ((Int32)uc); 
- 
-      if (s->origPtr < 0) 
-         RETURN(BZ_DATA_ERROR); 
-      if (s->origPtr > 10 + 100000*s->blockSize100k)  
-         RETURN(BZ_DATA_ERROR); 
- 
-      /*--- Receive the mapping table ---*/ 
-      for (i = 0; i < 16; i++) { 
-         GET_BIT(BZ_X_MAPPING_1, uc); 
-         if (uc == 1)  
-            s->inUse16[i] = True; else  
-            s->inUse16[i] = False; 
-      } 
- 
-      for (i = 0; i < 256; i++) s->inUse[i] = False; 
- 
-      for (i = 0; i < 16; i++) 
-         if (s->inUse16[i]) 
-            for (j = 0; j < 16; j++) { 
-               GET_BIT(BZ_X_MAPPING_2, uc); 
-               if (uc == 1) s->inUse[i * 16 + j] = True; 
-            } 
-      makeMaps_d ( s ); 
-      if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); 
-      alphaSize = s->nInUse+2; 
- 
-      /*--- Now the selectors ---*/ 
-      GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); 
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+static
+void makeMaps_d ( DState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->seqToUnseq[s->nInUse] = i;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+#define RETURN(rrr)                               \
+   { retVal = rrr; goto save_state_and_return; };
+
+#define GET_BITS(lll,vvv,nnn)                     \
+   case lll: s->state = lll;                      \
+   while (True) {                                 \
+      if (s->bsLive >= nnn) {                     \
+         UInt32 v;                                \
+         v = (s->bsBuff >>                        \
+             (s->bsLive-nnn)) & ((1 << nnn)-1);   \
+         s->bsLive -= nnn;                        \
+         vvv = v;                                 \
+         break;                                   \
+      }                                           \
+      if (s->strm->avail_in == 0) RETURN(BZ_OK);  \
+      s->bsBuff                                   \
+         = (s->bsBuff << 8) |                     \
+           ((UInt32)                              \
+              (*((UChar*)(s->strm->next_in))));   \
+      s->bsLive += 8;                             \
+      s->strm->next_in++;                         \
+      s->strm->avail_in--;                        \
+      s->strm->total_in_lo32++;                   \
+      if (s->strm->total_in_lo32 == 0)            \
+         s->strm->total_in_hi32++;                \
+   }
+
+#define GET_UCHAR(lll,uuu)                        \
+   GET_BITS(lll,uuu,8)
+
+#define GET_BIT(lll,uuu)                          \
+   GET_BITS(lll,uuu,1)
+
+/*---------------------------------------------------*/
+#define GET_MTF_VAL(label1,label2,lval)           \
+{                                                 \
+   if (groupPos == 0) {                           \
+      groupNo++;                                  \
+      if (groupNo >= nSelectors)                  \
+         RETURN(BZ_DATA_ERROR);                   \
+      groupPos = BZ_G_SIZE;                       \
+      gSel = s->selector[groupNo];                \
+      gMinlen = s->minLens[gSel];                 \
+      gLimit = &(s->limit[gSel][0]);              \
+      gPerm = &(s->perm[gSel][0]);                \
+      gBase = &(s->base[gSel][0]);                \
+   }                                              \
+   groupPos--;                                    \
+   zn = gMinlen;                                  \
+   GET_BITS(label1, zvec, zn);                    \
+   while (1) {                                    \
+      if (zn > 20 /* the longest code */)         \
+         RETURN(BZ_DATA_ERROR);                   \
+      if (zvec <= gLimit[zn]) break;              \
+      zn++;                                       \
+      GET_BIT(label2, zj);                        \
+      zvec = (zvec << 1) | zj;                    \
+   };                                             \
+   if (zvec - gBase[zn] < 0                       \
+       || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE)  \
+      RETURN(BZ_DATA_ERROR);                      \
+   lval = gPerm[zvec - gBase[zn]];                \
+}
+
+
+/*---------------------------------------------------*/
+Int32 BZ2_decompress ( DState* s )
+{
+   UChar      uc;
+   Int32      retVal;
+   Int32      minLen, maxLen;
+   bz_stream* strm = s->strm;
+
+   /* stuff that needs to be saved/restored */
+   Int32  i;
+   Int32  j;
+   Int32  t;
+   Int32  alphaSize;
+   Int32  nGroups;
+   Int32  nSelectors;
+   Int32  EOB;
+   Int32  groupNo;
+   Int32  groupPos;
+   Int32  nextSym;
+   Int32  nblockMAX;
+   Int32  nblock;
+   Int32  es;
+   Int32  N;
+   Int32  curr;
+   Int32  zt;
+   Int32  zn; 
+   Int32  zvec;
+   Int32  zj;
+   Int32  gSel;
+   Int32  gMinlen;
+   Int32* gLimit;
+   Int32* gBase;
+   Int32* gPerm;
+
+   if (s->state == BZ_X_MAGIC_1) {
+      /*initialise the save area*/
+      s->save_i           = 0;
+      s->save_j           = 0;
+      s->save_t           = 0;
+      s->save_alphaSize   = 0;
+      s->save_nGroups     = 0;
+      s->save_nSelectors  = 0;
+      s->save_EOB         = 0;
+      s->save_groupNo     = 0;
+      s->save_groupPos    = 0;
+      s->save_nextSym     = 0;
+      s->save_nblockMAX   = 0;
+      s->save_nblock      = 0;
+      s->save_es          = 0;
+      s->save_N           = 0;
+      s->save_curr        = 0;
+      s->save_zt          = 0;
+      s->save_zn          = 0;
+      s->save_zvec        = 0;
+      s->save_zj          = 0;
+      s->save_gSel        = 0;
+      s->save_gMinlen     = 0;
+      s->save_gLimit      = NULL;
+      s->save_gBase       = NULL;
+      s->save_gPerm       = NULL;
+   }
+
+   /*restore from the save area*/
+   i           = s->save_i;
+   j           = s->save_j;
+   t           = s->save_t;
+   alphaSize   = s->save_alphaSize;
+   nGroups     = s->save_nGroups;
+   nSelectors  = s->save_nSelectors;
+   EOB         = s->save_EOB;
+   groupNo     = s->save_groupNo;
+   groupPos    = s->save_groupPos;
+   nextSym     = s->save_nextSym;
+   nblockMAX   = s->save_nblockMAX;
+   nblock      = s->save_nblock;
+   es          = s->save_es;
+   N           = s->save_N;
+   curr        = s->save_curr;
+   zt          = s->save_zt;
+   zn          = s->save_zn; 
+   zvec        = s->save_zvec;
+   zj          = s->save_zj;
+   gSel        = s->save_gSel;
+   gMinlen     = s->save_gMinlen;
+   gLimit      = s->save_gLimit;
+   gBase       = s->save_gBase;
+   gPerm       = s->save_gPerm;
+
+   retVal = BZ_OK;
+
+   switch (s->state) {
+
+      GET_UCHAR(BZ_X_MAGIC_1, uc);
+      if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_2, uc);
+      if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_3, uc)
+      if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
+      if (s->blockSize100k < (BZ_HDR_0 + 1) || 
+          s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
+      s->blockSize100k -= BZ_HDR_0;
+
+      if (s->smallDecompress) {
+         s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
+         s->ll4  = BZALLOC( 
+                      ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 
+                   );
+         if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
+      } else {
+         s->tt  = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
+         if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
+      }
+
+      GET_UCHAR(BZ_X_BLKHDR_1, uc);
+
+      if (uc == 0x17) goto endhdr_2;
+      if (uc != 0x31) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_2, uc);
+      if (uc != 0x41) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_3, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_4, uc);
+      if (uc != 0x26) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_5, uc);
+      if (uc != 0x53) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_6, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+
+      s->currBlockNo++;
+      if (s->verbosity >= 2)
+         VPrintf1 ( "\n    [%d: huff+mtf ", s->currBlockNo );
+ 
+      s->storedBlockCRC = 0;
+      GET_UCHAR(BZ_X_BCRC_1, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_2, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_3, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_4, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+
+      GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
+
+      s->origPtr = 0;
+      GET_UCHAR(BZ_X_ORIGPTR_1, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_2, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_3, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+
+      if (s->origPtr < 0)
+         RETURN(BZ_DATA_ERROR);
+      if (s->origPtr > 10 + 100000*s->blockSize100k) 
+         RETURN(BZ_DATA_ERROR);
+
+      /*--- Receive the mapping table ---*/
+      for (i = 0; i < 16; i++) {
+         GET_BIT(BZ_X_MAPPING_1, uc);
+         if (uc == 1) 
+            s->inUse16[i] = True; else 
+            s->inUse16[i] = False;
+      }
+
+      for (i = 0; i < 256; i++) s->inUse[i] = False;
+
+      for (i = 0; i < 16; i++)
+         if (s->inUse16[i])
+            for (j = 0; j < 16; j++) {
+               GET_BIT(BZ_X_MAPPING_2, uc);
+               if (uc == 1) s->inUse[i * 16 + j] = True;
+            }
+      makeMaps_d ( s );
+      if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
+      alphaSize = s->nInUse+2;
+
+      /*--- Now the selectors ---*/
+      GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
       if (nGroups < 2 || nGroups > BZ_N_GROUPS) RETURN(BZ_DATA_ERROR);
-      GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); 
+      GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
       if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
-      for (i = 0; i < nSelectors; i++) { 
-         j = 0; 
-         while (True) { 
-            GET_BIT(BZ_X_SELECTOR_3, uc); 
-            if (uc == 0) break; 
-            j++; 
-            if (j >= nGroups) RETURN(BZ_DATA_ERROR); 
-         } 
+      for (i = 0; i < nSelectors; i++) {
+         j = 0;
+         while (True) {
+            GET_BIT(BZ_X_SELECTOR_3, uc);
+            if (uc == 0) break;
+            j++;
+            if (j >= nGroups) RETURN(BZ_DATA_ERROR);
+         }
          /* Having more than BZ_MAX_SELECTORS doesn't make much sense
             since they will never be used, but some implementations might
             "round up" the number of selectors, so just ignore those. */
          if (i < BZ_MAX_SELECTORS)
            s->selectorMtf[i] = j;
-      } 
+      }
       if (nSelectors > BZ_MAX_SELECTORS)
         nSelectors = BZ_MAX_SELECTORS;
- 
-      /*--- Undo the MTF values for the selectors. ---*/ 
-      { 
-         UChar pos[BZ_N_GROUPS], tmp, v; 
-         for (v = 0; v < nGroups; v++) pos[v] = v; 
-    
-         for (i = 0; i < nSelectors; i++) { 
-            v = s->selectorMtf[i]; 
-            tmp = pos[v]; 
-            while (v > 0) { pos[v] = pos[v-1]; v--; } 
-            pos[0] = tmp; 
-            s->selector[i] = tmp; 
-         } 
-      } 
- 
-      /*--- Now the coding tables ---*/ 
-      for (t = 0; t < nGroups; t++) { 
-         GET_BITS(BZ_X_CODING_1, curr, 5); 
-         for (i = 0; i < alphaSize; i++) { 
-            while (True) { 
-               if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); 
-               GET_BIT(BZ_X_CODING_2, uc); 
-               if (uc == 0) break; 
-               GET_BIT(BZ_X_CODING_3, uc); 
-               if (uc == 0) curr++; else curr--; 
-            } 
-            s->len[t][i] = curr; 
-         } 
-      } 
- 
-      /*--- Create the Huffman decoding tables ---*/ 
-      for (t = 0; t < nGroups; t++) { 
-         minLen = 32; 
-         maxLen = 0; 
-         for (i = 0; i < alphaSize; i++) { 
-            if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 
-            if (s->len[t][i] < minLen) minLen = s->len[t][i]; 
-         } 
-         BZ2_hbCreateDecodeTables (  
-            &(s->limit[t][0]),  
-            &(s->base[t][0]),  
-            &(s->perm[t][0]),  
-            &(s->len[t][0]), 
-            minLen, maxLen, alphaSize 
-         ); 
-         s->minLens[t] = minLen; 
-      } 
- 
-      /*--- Now the MTF values ---*/ 
- 
-      EOB      = s->nInUse+1; 
-      nblockMAX = 100000 * s->blockSize100k; 
-      groupNo  = -1; 
-      groupPos = 0; 
- 
-      for (i = 0; i <= 255; i++) s->unzftab[i] = 0; 
- 
-      /*-- MTF init --*/ 
-      { 
-         Int32 ii, jj, kk; 
-         kk = MTFA_SIZE-1; 
-         for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { 
-            for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 
-               s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); 
-               kk--; 
-            } 
-            s->mtfbase[ii] = kk + 1; 
-         } 
-      } 
-      /*-- end MTF init --*/ 
- 
-      nblock = 0; 
-      GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); 
- 
-      while (True) { 
- 
-         if (nextSym == EOB) break; 
- 
-         if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { 
- 
-            es = -1; 
-            N = 1; 
-            do { 
+
+      /*--- Undo the MTF values for the selectors. ---*/
+      {
+         UChar pos[BZ_N_GROUPS], tmp, v;
+         for (v = 0; v < nGroups; v++) pos[v] = v;
+   
+         for (i = 0; i < nSelectors; i++) {
+            v = s->selectorMtf[i];
+            tmp = pos[v];
+            while (v > 0) { pos[v] = pos[v-1]; v--; }
+            pos[0] = tmp;
+            s->selector[i] = tmp;
+         }
+      }
+
+      /*--- Now the coding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         GET_BITS(BZ_X_CODING_1, curr, 5);
+         for (i = 0; i < alphaSize; i++) {
+            while (True) {
+               if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
+               GET_BIT(BZ_X_CODING_2, uc);
+               if (uc == 0) break;
+               GET_BIT(BZ_X_CODING_3, uc);
+               if (uc == 0) curr++; else curr--;
+            }
+            s->len[t][i] = curr;
+         }
+      }
+
+      /*--- Create the Huffman decoding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         minLen = 32;
+         maxLen = 0;
+         for (i = 0; i < alphaSize; i++) {
+            if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+            if (s->len[t][i] < minLen) minLen = s->len[t][i];
+         }
+         BZ2_hbCreateDecodeTables ( 
+            &(s->limit[t][0]), 
+            &(s->base[t][0]), 
+            &(s->perm[t][0]), 
+            &(s->len[t][0]),
+            minLen, maxLen, alphaSize
+         );
+         s->minLens[t] = minLen;
+      }
+
+      /*--- Now the MTF values ---*/
+
+      EOB      = s->nInUse+1;
+      nblockMAX = 100000 * s->blockSize100k;
+      groupNo  = -1;
+      groupPos = 0;
+
+      for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
+
+      /*-- MTF init --*/
+      {
+         Int32 ii, jj, kk;
+         kk = MTFA_SIZE-1;
+         for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
+            for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+               s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
+               kk--;
+            }
+            s->mtfbase[ii] = kk + 1;
+         }
+      }
+      /*-- end MTF init --*/
+
+      nblock = 0;
+      GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
+
+      while (True) {
+
+         if (nextSym == EOB) break;
+
+         if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
+
+            es = -1;
+            N = 1;
+            do {
                /* Check that N doesn't get too big, so that es doesn't
                   go negative.  The maximum value that can be
                   RUNA/RUNB encoded is equal to the block size (post
@@ -394,259 +394,259 @@ Int32 BZ2_decompress ( DState* s )
                   million should guard against overflow without
                   rejecting any legitimate inputs. */
                if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
-               if (nextSym == BZ_RUNA) es = es + (0+1) * N; else 
-               if (nextSym == BZ_RUNB) es = es + (1+1) * N; 
-               N = N * 2; 
-               GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); 
-            } 
-               while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); 
- 
-            es++; 
-            uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; 
-            s->unzftab[uc] += es; 
- 
-            if (s->smallDecompress) 
-               while (es > 0) { 
-                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 
-                  s->ll16[nblock] = (UInt16)uc; 
-                  nblock++; 
-                  es--; 
-               } 
-            else 
-               while (es > 0) { 
-                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 
-                  s->tt[nblock] = (UInt32)uc; 
-                  nblock++; 
-                  es--; 
-               }; 
- 
-            continue; 
- 
-         } else { 
- 
-            if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 
- 
-            /*-- uc = MTF ( nextSym-1 ) --*/ 
-            { 
-               Int32 ii, jj, kk, pp, lno, off; 
-               UInt32 nn; 
-               nn = (UInt32)(nextSym - 1); 
- 
-               if (nn < MTFL_SIZE) { 
-                  /* avoid general-case expense */ 
-                  pp = s->mtfbase[0]; 
-                  uc = s->mtfa[pp+nn]; 
-                  while (nn > 3) { 
-                     Int32 z = pp+nn; 
-                     s->mtfa[(z)  ] = s->mtfa[(z)-1]; 
-                     s->mtfa[(z)-1] = s->mtfa[(z)-2]; 
-                     s->mtfa[(z)-2] = s->mtfa[(z)-3]; 
-                     s->mtfa[(z)-3] = s->mtfa[(z)-4]; 
-                     nn -= 4; 
-                  } 
-                  while (nn > 0) {  
-                     s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;  
-                  }; 
-                  s->mtfa[pp] = uc; 
-               } else {  
-                  /* general case */ 
-                  lno = nn / MTFL_SIZE; 
-                  off = nn % MTFL_SIZE; 
-                  pp = s->mtfbase[lno] + off; 
-                  uc = s->mtfa[pp]; 
-                  while (pp > s->mtfbase[lno]) {  
-                     s->mtfa[pp] = s->mtfa[pp-1]; pp--;  
-                  }; 
-                  s->mtfbase[lno]++; 
-                  while (lno > 0) { 
-                     s->mtfbase[lno]--; 
-                     s->mtfa[s->mtfbase[lno]]  
-                        = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; 
-                     lno--; 
-                  } 
-                  s->mtfbase[0]--; 
-                  s->mtfa[s->mtfbase[0]] = uc; 
-                  if (s->mtfbase[0] == 0) { 
-                     kk = MTFA_SIZE-1; 
-                     for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { 
-                        for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 
-                           s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; 
-                           kk--; 
-                        } 
-                        s->mtfbase[ii] = kk + 1; 
-                     } 
-                  } 
-               } 
-            } 
-            /*-- end uc = MTF ( nextSym-1 ) --*/ 
- 
-            s->unzftab[s->seqToUnseq[uc]]++; 
-            if (s->smallDecompress) 
-               s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else 
-               s->tt[nblock]   = (UInt32)(s->seqToUnseq[uc]); 
-            nblock++; 
- 
-            GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); 
-            continue; 
-         } 
-      } 
- 
-      /* Now we know what nblock is, we can do a better sanity 
-         check on s->origPtr. 
-      */ 
-      if (s->origPtr < 0 || s->origPtr >= nblock) 
-         RETURN(BZ_DATA_ERROR); 
- 
-      /*-- Set up cftab to facilitate generation of T^(-1) --*/ 
+               if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
+               if (nextSym == BZ_RUNB) es = es + (1+1) * N;
+               N = N * 2;
+               GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
+            }
+               while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+
+            es++;
+            uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
+            s->unzftab[uc] += es;
+
+            if (s->smallDecompress)
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->ll16[nblock] = (UInt16)uc;
+                  nblock++;
+                  es--;
+               }
+            else
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->tt[nblock] = (UInt32)uc;
+                  nblock++;
+                  es--;
+               };
+
+            continue;
+
+         } else {
+
+            if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+
+            /*-- uc = MTF ( nextSym-1 ) --*/
+            {
+               Int32 ii, jj, kk, pp, lno, off;
+               UInt32 nn;
+               nn = (UInt32)(nextSym - 1);
+
+               if (nn < MTFL_SIZE) {
+                  /* avoid general-case expense */
+                  pp = s->mtfbase[0];
+                  uc = s->mtfa[pp+nn];
+                  while (nn > 3) {
+                     Int32 z = pp+nn;
+                     s->mtfa[(z)  ] = s->mtfa[(z)-1];
+                     s->mtfa[(z)-1] = s->mtfa[(z)-2];
+                     s->mtfa[(z)-2] = s->mtfa[(z)-3];
+                     s->mtfa[(z)-3] = s->mtfa[(z)-4];
+                     nn -= 4;
+                  }
+                  while (nn > 0) { 
+                     s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 
+                  };
+                  s->mtfa[pp] = uc;
+               } else { 
+                  /* general case */
+                  lno = nn / MTFL_SIZE;
+                  off = nn % MTFL_SIZE;
+                  pp = s->mtfbase[lno] + off;
+                  uc = s->mtfa[pp];
+                  while (pp > s->mtfbase[lno]) { 
+                     s->mtfa[pp] = s->mtfa[pp-1]; pp--; 
+                  };
+                  s->mtfbase[lno]++;
+                  while (lno > 0) {
+                     s->mtfbase[lno]--;
+                     s->mtfa[s->mtfbase[lno]] 
+                        = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+                     lno--;
+                  }
+                  s->mtfbase[0]--;
+                  s->mtfa[s->mtfbase[0]] = uc;
+                  if (s->mtfbase[0] == 0) {
+                     kk = MTFA_SIZE-1;
+                     for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+                        for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+                           s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+                           kk--;
+                        }
+                        s->mtfbase[ii] = kk + 1;
+                     }
+                  }
+               }
+            }
+            /*-- end uc = MTF ( nextSym-1 ) --*/
+
+            s->unzftab[s->seqToUnseq[uc]]++;
+            if (s->smallDecompress)
+               s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
+               s->tt[nblock]   = (UInt32)(s->seqToUnseq[uc]);
+            nblock++;
+
+            GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
+            continue;
+         }
+      }
+
+      /* Now we know what nblock is, we can do a better sanity
+         check on s->origPtr.
+      */
+      if (s->origPtr < 0 || s->origPtr >= nblock)
+         RETURN(BZ_DATA_ERROR);
+
+      /*-- Set up cftab to facilitate generation of T^(-1) --*/
       /* Check: unzftab entries in range. */
       for (i = 0; i <= 255; i++) {
          if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
             RETURN(BZ_DATA_ERROR);
       }
       /* Actually generate cftab. */
-      s->cftab[0] = 0; 
-      for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; 
-      for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; 
+      s->cftab[0] = 0;
+      for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
+      for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
       /* Check: cftab entries in range. */
-      for (i = 0; i <= 256; i++) { 
-         if (s->cftab[i] < 0 || s->cftab[i] > nblock) { 
-            /* s->cftab[i] can legitimately be == nblock */ 
-            RETURN(BZ_DATA_ERROR); 
-         } 
-      } 
+      for (i = 0; i <= 256; i++) {
+         if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
+            /* s->cftab[i] can legitimately be == nblock */
+            RETURN(BZ_DATA_ERROR);
+         }
+      }
       /* Check: cftab entries non-descending. */
       for (i = 1; i <= 256; i++) {
          if (s->cftab[i-1] > s->cftab[i]) {
             RETURN(BZ_DATA_ERROR);
          }
       }
- 
-      s->state_out_len = 0; 
-      s->state_out_ch  = 0; 
-      BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); 
-      s->state = BZ_X_OUTPUT; 
-      if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); 
- 
-      if (s->smallDecompress) { 
- 
-         /*-- Make a copy of cftab, used in generation of T --*/ 
-         for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; 
- 
-         /*-- compute the T vector --*/ 
-         for (i = 0; i < nblock; i++) { 
-            uc = (UChar)(s->ll16[i]); 
-            SET_LL(i, s->cftabCopy[uc]); 
-            s->cftabCopy[uc]++; 
-         } 
- 
-         /*-- Compute T^(-1) by pointer reversal on T --*/ 
-         i = s->origPtr; 
-         j = GET_LL(i); 
-         do { 
-            Int32 tmp = GET_LL(j); 
-            SET_LL(j, i); 
-            i = j; 
-            j = tmp; 
-         } 
-            while (i != s->origPtr); 
- 
-         s->tPos = s->origPtr; 
-         s->nblock_used = 0; 
-         if (s->blockRandomised) { 
-            BZ_RAND_INIT_MASK; 
-            BZ_GET_SMALL(s->k0); s->nblock_used++; 
-            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;  
-         } else { 
-            BZ_GET_SMALL(s->k0); s->nblock_used++; 
-         } 
- 
-      } else { 
- 
-         /*-- compute the T^(-1) vector --*/ 
-         for (i = 0; i < nblock; i++) { 
-            uc = (UChar)(s->tt[i] & 0xff); 
-            s->tt[s->cftab[uc]] |= (i << 8); 
-            s->cftab[uc]++; 
-         } 
- 
-         s->tPos = s->tt[s->origPtr] >> 8; 
-         s->nblock_used = 0; 
-         if (s->blockRandomised) { 
-            BZ_RAND_INIT_MASK; 
-            BZ_GET_FAST(s->k0); s->nblock_used++; 
-            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;  
-         } else { 
-            BZ_GET_FAST(s->k0); s->nblock_used++; 
-         } 
- 
-      } 
- 
-      RETURN(BZ_OK); 
- 
- 
- 
-    endhdr_2: 
- 
-      GET_UCHAR(BZ_X_ENDHDR_2, uc); 
-      if (uc != 0x72) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_ENDHDR_3, uc); 
-      if (uc != 0x45) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_ENDHDR_4, uc); 
-      if (uc != 0x38) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_ENDHDR_5, uc); 
-      if (uc != 0x50) RETURN(BZ_DATA_ERROR); 
-      GET_UCHAR(BZ_X_ENDHDR_6, uc); 
-      if (uc != 0x90) RETURN(BZ_DATA_ERROR); 
- 
-      s->storedCombinedCRC = 0; 
-      GET_UCHAR(BZ_X_CCRC_1, uc); 
-      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 
-      GET_UCHAR(BZ_X_CCRC_2, uc); 
-      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 
-      GET_UCHAR(BZ_X_CCRC_3, uc); 
-      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 
-      GET_UCHAR(BZ_X_CCRC_4, uc); 
-      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 
- 
-      s->state = BZ_X_IDLE; 
-      RETURN(BZ_STREAM_END); 
- 
-      default: AssertH ( False, 4001 ); 
-   } 
- 
-   AssertH ( False, 4002 ); 
- 
-   save_state_and_return: 
- 
-   s->save_i           = i; 
-   s->save_j           = j; 
-   s->save_t           = t; 
-   s->save_alphaSize   = alphaSize; 
-   s->save_nGroups     = nGroups; 
-   s->save_nSelectors  = nSelectors; 
-   s->save_EOB         = EOB; 
-   s->save_groupNo     = groupNo; 
-   s->save_groupPos    = groupPos; 
-   s->save_nextSym     = nextSym; 
-   s->save_nblockMAX   = nblockMAX; 
-   s->save_nblock      = nblock; 
-   s->save_es          = es; 
-   s->save_N           = N; 
-   s->save_curr        = curr; 
-   s->save_zt          = zt; 
-   s->save_zn          = zn; 
-   s->save_zvec        = zvec; 
-   s->save_zj          = zj; 
-   s->save_gSel        = gSel; 
-   s->save_gMinlen     = gMinlen; 
-   s->save_gLimit      = gLimit; 
-   s->save_gBase       = gBase; 
-   s->save_gPerm       = gPerm; 
- 
-   return retVal;    
-} 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                      decompress.c ---*/ 
-/*-------------------------------------------------------------*/ 
+
+      s->state_out_len = 0;
+      s->state_out_ch  = 0;
+      BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
+      s->state = BZ_X_OUTPUT;
+      if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
+
+      if (s->smallDecompress) {
+
+         /*-- Make a copy of cftab, used in generation of T --*/
+         for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
+
+         /*-- compute the T vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->ll16[i]);
+            SET_LL(i, s->cftabCopy[uc]);
+            s->cftabCopy[uc]++;
+         }
+
+         /*-- Compute T^(-1) by pointer reversal on T --*/
+         i = s->origPtr;
+         j = GET_LL(i);
+         do {
+            Int32 tmp = GET_LL(j);
+            SET_LL(j, i);
+            i = j;
+            j = tmp;
+         }
+            while (i != s->origPtr);
+
+         s->tPos = s->origPtr;
+         s->nblock_used = 0;
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+         } else {
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+         }
+
+      } else {
+
+         /*-- compute the T^(-1) vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->tt[i] & 0xff);
+            s->tt[s->cftab[uc]] |= (i << 8);
+            s->cftab[uc]++;
+         }
+
+         s->tPos = s->tt[s->origPtr] >> 8;
+         s->nblock_used = 0;
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+         } else {
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+         }
+
+      }
+
+      RETURN(BZ_OK);
+
+
+
+    endhdr_2:
+
+      GET_UCHAR(BZ_X_ENDHDR_2, uc);
+      if (uc != 0x72) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_3, uc);
+      if (uc != 0x45) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_4, uc);
+      if (uc != 0x38) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_5, uc);
+      if (uc != 0x50) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_6, uc);
+      if (uc != 0x90) RETURN(BZ_DATA_ERROR);
+
+      s->storedCombinedCRC = 0;
+      GET_UCHAR(BZ_X_CCRC_1, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_2, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_3, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_4, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+
+      s->state = BZ_X_IDLE;
+      RETURN(BZ_STREAM_END);
+
+      default: AssertH ( False, 4001 );
+   }
+
+   AssertH ( False, 4002 );
+
+   save_state_and_return:
+
+   s->save_i           = i;
+   s->save_j           = j;
+   s->save_t           = t;
+   s->save_alphaSize   = alphaSize;
+   s->save_nGroups     = nGroups;
+   s->save_nSelectors  = nSelectors;
+   s->save_EOB         = EOB;
+   s->save_groupNo     = groupNo;
+   s->save_groupPos    = groupPos;
+   s->save_nextSym     = nextSym;
+   s->save_nblockMAX   = nblockMAX;
+   s->save_nblock      = nblock;
+   s->save_es          = es;
+   s->save_N           = N;
+   s->save_curr        = curr;
+   s->save_zt          = zt;
+   s->save_zn          = zn;
+   s->save_zvec        = zvec;
+   s->save_zj          = zj;
+   s->save_gSel        = gSel;
+   s->save_gMinlen     = gMinlen;
+   s->save_gLimit      = gLimit;
+   s->save_gBase       = gBase;
+   s->save_gPerm       = gPerm;
+
+   return retVal;   
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                      decompress.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/huffman.c b/contrib/libs/libbz2/huffman.c
index b4a0d0e4cb..43a1899e46 100644
--- a/contrib/libs/libbz2/huffman.c
+++ b/contrib/libs/libbz2/huffman.c
@@ -1,205 +1,205 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Huffman coding low-level stuff                        ---*/ 
-/*---                                             huffman.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Huffman coding low-level stuff                        ---*/
+/*---                                             huffman.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
-   Please read the WARNING, DISCLAIMER and PATENTS sections in the  
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#include "bzlib_private.h" 
- 
-/*---------------------------------------------------*/ 
-#define WEIGHTOF(zz0)  ((zz0) & 0xffffff00) 
-#define DEPTHOF(zz1)   ((zz1) & 0x000000ff) 
-#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 
- 
-#define ADDWEIGHTS(zw1,zw2)                           \ 
-   (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \ 
-   (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 
- 
-#define UPHEAP(z)                                     \ 
-{                                                     \ 
-   Int32 zz, tmp;                                     \ 
-   zz = z; tmp = heap[zz];                            \ 
-   while (weight[tmp] < weight[heap[zz >> 1]]) {      \ 
-      heap[zz] = heap[zz >> 1];                       \ 
-      zz >>= 1;                                       \ 
-   }                                                  \ 
-   heap[zz] = tmp;                                    \ 
-} 
- 
-#define DOWNHEAP(z)                                   \ 
-{                                                     \ 
-   Int32 zz, yy, tmp;                                 \ 
-   zz = z; tmp = heap[zz];                            \ 
-   while (True) {                                     \ 
-      yy = zz << 1;                                   \ 
-      if (yy > nHeap) break;                          \ 
-      if (yy < nHeap &&                               \ 
-          weight[heap[yy+1]] < weight[heap[yy]])      \ 
-         yy++;                                        \ 
-      if (weight[tmp] < weight[heap[yy]]) break;      \ 
-      heap[zz] = heap[yy];                            \ 
-      zz = yy;                                        \ 
-   }                                                  \ 
-   heap[zz] = tmp;                                    \ 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ2_hbMakeCodeLengths ( UChar *len,  
-                             Int32 *freq, 
-                             Int32 alphaSize, 
-                             Int32 maxLen ) 
-{ 
-   /*-- 
-      Nodes and heap entries run from 1.  Entry 0 
-      for both the heap and nodes is a sentinel. 
-   --*/ 
-   Int32 nNodes, nHeap, n1, n2, i, j, k; 
-   Bool  tooLong; 
- 
-   Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ]; 
-   Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 
-   Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];  
- 
-   for (i = 0; i < alphaSize; i++) 
-      weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 
- 
-   while (True) { 
- 
-      nNodes = alphaSize; 
-      nHeap = 0; 
- 
-      heap[0] = 0; 
-      weight[0] = 0; 
-      parent[0] = -2; 
- 
-      for (i = 1; i <= alphaSize; i++) { 
-         parent[i] = -1; 
-         nHeap++; 
-         heap[nHeap] = i; 
-         UPHEAP(nHeap); 
-      } 
- 
-      AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 
-    
-      while (nHeap > 1) { 
-         n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 
-         n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 
-         nNodes++; 
-         parent[n1] = parent[n2] = nNodes; 
-         weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 
-         parent[nNodes] = -1; 
-         nHeap++; 
-         heap[nHeap] = nNodes; 
-         UPHEAP(nHeap); 
-      } 
- 
-      AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 
- 
-      tooLong = False; 
-      for (i = 1; i <= alphaSize; i++) { 
-         j = 0; 
-         k = i; 
-         while (parent[k] >= 0) { k = parent[k]; j++; } 
-         len[i-1] = j; 
-         if (j > maxLen) tooLong = True; 
-      } 
-       
-      if (! tooLong) break; 
- 
-      /* 17 Oct 04: keep-going condition for the following loop used 
-         to be 'i < alphaSize', which missed the last element, 
-         theoretically leading to the possibility of the compressor 
-         looping.  However, this count-scaling step is only needed if 
-         one of the generated Huffman code words is longer than 
-         maxLen, which up to and including version 1.0.2 was 20 bits, 
-         which is extremely unlikely.  In version 1.0.3 maxLen was 
-         changed to 17 bits, which has minimal effect on compression 
-         ratio, but does mean this scaling step is used from time to 
-         time, enough to verify that it works. 
- 
-         This means that bzip2-1.0.3 and later will only produce 
-         Huffman codes with a maximum length of 17 bits.  However, in 
-         order to preserve backwards compatibility with bitstreams 
-         produced by versions pre-1.0.3, the decompressor must still 
-         handle lengths of up to 20. */ 
- 
-      for (i = 1; i <= alphaSize; i++) { 
-         j = weight[i] >> 8; 
-         j = 1 + (j / 2); 
-         weight[i] = j << 8; 
-      } 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ2_hbAssignCodes ( Int32 *code, 
-                         UChar *length, 
-                         Int32 minLen, 
-                         Int32 maxLen, 
-                         Int32 alphaSize ) 
-{ 
-   Int32 n, vec, i; 
- 
-   vec = 0; 
-   for (n = minLen; n <= maxLen; n++) { 
-      for (i = 0; i < alphaSize; i++) 
-         if (length[i] == n) { code[i] = vec; vec++; }; 
-      vec <<= 1; 
-   } 
-} 
- 
- 
-/*---------------------------------------------------*/ 
-void BZ2_hbCreateDecodeTables ( Int32 *limit, 
-                                Int32 *base, 
-                                Int32 *perm, 
-                                UChar *length, 
-                                Int32 minLen, 
-                                Int32 maxLen, 
-                                Int32 alphaSize ) 
-{ 
-   Int32 pp, i, j, vec; 
- 
-   pp = 0; 
-   for (i = minLen; i <= maxLen; i++) 
-      for (j = 0; j < alphaSize; j++) 
-         if (length[j] == i) { perm[pp] = j; pp++; }; 
- 
-   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 
-   for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 
- 
-   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 
- 
-   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 
-   vec = 0; 
- 
-   for (i = minLen; i <= maxLen; i++) { 
-      vec += (base[i+1] - base[i]); 
-      limit[i] = vec-1; 
-      vec <<= 1; 
-   } 
-   for (i = minLen + 1; i <= maxLen; i++) 
-      base[i] = ((limit[i-1] + 1) << 1) - base[i]; 
-} 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                         huffman.c ---*/ 
-/*-------------------------------------------------------------*/ 
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------------*/
+#define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
+#define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
+#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
+
+#define ADDWEIGHTS(zw1,zw2)                           \
+   (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
+   (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
+
+#define UPHEAP(z)                                     \
+{                                                     \
+   Int32 zz, tmp;                                     \
+   zz = z; tmp = heap[zz];                            \
+   while (weight[tmp] < weight[heap[zz >> 1]]) {      \
+      heap[zz] = heap[zz >> 1];                       \
+      zz >>= 1;                                       \
+   }                                                  \
+   heap[zz] = tmp;                                    \
+}
+
+#define DOWNHEAP(z)                                   \
+{                                                     \
+   Int32 zz, yy, tmp;                                 \
+   zz = z; tmp = heap[zz];                            \
+   while (True) {                                     \
+      yy = zz << 1;                                   \
+      if (yy > nHeap) break;                          \
+      if (yy < nHeap &&                               \
+          weight[heap[yy+1]] < weight[heap[yy]])      \
+         yy++;                                        \
+      if (weight[tmp] < weight[heap[yy]]) break;      \
+      heap[zz] = heap[yy];                            \
+      zz = yy;                                        \
+   }                                                  \
+   heap[zz] = tmp;                                    \
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbMakeCodeLengths ( UChar *len, 
+                             Int32 *freq,
+                             Int32 alphaSize,
+                             Int32 maxLen )
+{
+   /*--
+      Nodes and heap entries run from 1.  Entry 0
+      for both the heap and nodes is a sentinel.
+   --*/
+   Int32 nNodes, nHeap, n1, n2, i, j, k;
+   Bool  tooLong;
+
+   Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
+   Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
+   Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 
+
+   for (i = 0; i < alphaSize; i++)
+      weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+
+   while (True) {
+
+      nNodes = alphaSize;
+      nHeap = 0;
+
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (i = 1; i <= alphaSize; i++) {
+         parent[i] = -1;
+         nHeap++;
+         heap[nHeap] = i;
+         UPHEAP(nHeap);
+      }
+
+      AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
+   
+      while (nHeap > 1) {
+         n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+         n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+         nNodes++;
+         parent[n1] = parent[n2] = nNodes;
+         weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
+         parent[nNodes] = -1;
+         nHeap++;
+         heap[nHeap] = nNodes;
+         UPHEAP(nHeap);
+      }
+
+      AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
+
+      tooLong = False;
+      for (i = 1; i <= alphaSize; i++) {
+         j = 0;
+         k = i;
+         while (parent[k] >= 0) { k = parent[k]; j++; }
+         len[i-1] = j;
+         if (j > maxLen) tooLong = True;
+      }
+      
+      if (! tooLong) break;
+
+      /* 17 Oct 04: keep-going condition for the following loop used
+         to be 'i < alphaSize', which missed the last element,
+         theoretically leading to the possibility of the compressor
+         looping.  However, this count-scaling step is only needed if
+         one of the generated Huffman code words is longer than
+         maxLen, which up to and including version 1.0.2 was 20 bits,
+         which is extremely unlikely.  In version 1.0.3 maxLen was
+         changed to 17 bits, which has minimal effect on compression
+         ratio, but does mean this scaling step is used from time to
+         time, enough to verify that it works.
+
+         This means that bzip2-1.0.3 and later will only produce
+         Huffman codes with a maximum length of 17 bits.  However, in
+         order to preserve backwards compatibility with bitstreams
+         produced by versions pre-1.0.3, the decompressor must still
+         handle lengths of up to 20. */
+
+      for (i = 1; i <= alphaSize; i++) {
+         j = weight[i] >> 8;
+         j = 1 + (j / 2);
+         weight[i] = j << 8;
+      }
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbAssignCodes ( Int32 *code,
+                         UChar *length,
+                         Int32 minLen,
+                         Int32 maxLen,
+                         Int32 alphaSize )
+{
+   Int32 n, vec, i;
+
+   vec = 0;
+   for (n = minLen; n <= maxLen; n++) {
+      for (i = 0; i < alphaSize; i++)
+         if (length[i] == n) { code[i] = vec; vec++; };
+      vec <<= 1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbCreateDecodeTables ( Int32 *limit,
+                                Int32 *base,
+                                Int32 *perm,
+                                UChar *length,
+                                Int32 minLen,
+                                Int32 maxLen,
+                                Int32 alphaSize )
+{
+   Int32 pp, i, j, vec;
+
+   pp = 0;
+   for (i = minLen; i <= maxLen; i++)
+      for (j = 0; j < alphaSize; j++)
+         if (length[j] == i) { perm[pp] = j; pp++; };
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
+   for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
+
+   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+   vec = 0;
+
+   for (i = minLen; i <= maxLen; i++) {
+      vec += (base[i+1] - base[i]);
+      limit[i] = vec-1;
+      vec <<= 1;
+   }
+   for (i = minLen + 1; i <= maxLen; i++)
+      base[i] = ((limit[i-1] + 1) << 1) - base[i];
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                         huffman.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/randtable.c b/contrib/libs/libbz2/randtable.c
index 87146eb7ba..43de39ff68 100644
--- a/contrib/libs/libbz2/randtable.c
+++ b/contrib/libs/libbz2/randtable.c
@@ -1,29 +1,29 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Table for randomising repetitive blocks               ---*/ 
-/*---                                           randtable.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Table for randomising repetitive blocks               ---*/
+/*---                                           randtable.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
+
    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#include "bzlib_private.h" 
- 
- 
-/*---------------------------------------------*/ 
-const Int32 BZ2_rNums[512] = { 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------*/
+const Int32 BZ2_rNums[512] = {
    619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 
    985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 
    733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 
@@ -75,10 +75,10 @@ const Int32 BZ2_rNums[512] = {
    780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 
    920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 
    645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 
-   936, 638 
-}; 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                       randtable.c ---*/ 
-/*-------------------------------------------------------------*/ 
+   936, 638
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       randtable.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/contrib/libs/libbz2/ya.make b/contrib/libs/libbz2/ya.make
index 3c0c17b863..3d5be08322 100644
--- a/contrib/libs/libbz2/ya.make
+++ b/contrib/libs/libbz2/ya.make
@@ -1,12 +1,12 @@
 # Generated by devtools/yamaker from nixpkgs 5852a21819542e6809f68ba5a798600e69874e76.
 
-LIBRARY() 
+LIBRARY()
 
 OWNER(
     orivej
     g:cpp-contrib
 )
- 
+
 VERSION(1.0.8)
 
 ORIGINAL_SOURCE(https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz)
@@ -19,21 +19,21 @@ LICENSE(
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 NO_COMPILER_WARNINGS()
- 
+
 NO_RUNTIME()
- 
+
 IF (SANITIZER_TYPE == undefined)
-    NO_SANITIZE() 
+    NO_SANITIZE()
 ENDIF()
- 
-SRCS( 
+
+SRCS(
     blocksort.c
     bzlib.c
     compress.c
-    crctable.c 
-    decompress.c 
-    huffman.c 
+    crctable.c
+    decompress.c
+    huffman.c
     randtable.c
-) 
- 
-END() 
+)
+
+END()
diff --git a/contrib/libs/libc_compat/ifaddrs.c b/contrib/libs/libc_compat/ifaddrs.c
index a09db789ba..c59d8bc745 100644
--- a/contrib/libs/libc_compat/ifaddrs.c
+++ b/contrib/libs/libc_compat/ifaddrs.c
@@ -1,663 +1,663 @@
-/* 
-Copyright (c) 2013, Kenneth MacKay 
-All rights reserved. 
- 
-Redistribution and use in source and binary forms, with or without modification, 
-are permitted provided that the following conditions are met: 
- * Redistributions of source code must retain the above copyright notice, this 
-   list of conditions and the following disclaimer. 
- * Redistributions in binary form must reproduce the above copyright notice, 
-   this list of conditions and the following disclaimer in the documentation 
-   and/or other materials provided with the distribution. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-*/ 
- 
-#include "ifaddrs.h" 
- 
-#include <string.h> 
-#include <stdlib.h> 
-#include <stddef.h> 
-#include <errno.h> 
-#include <unistd.h> 
-#include <sys/socket.h> 
-#include <netpacket/packet.h> 
-#include <net/if_arp.h> 
-#include <netinet/in.h> 
-#include <linux/netlink.h> 
-#include <linux/rtnetlink.h> 
- 
-typedef struct NetlinkList 
-{ 
-    struct NetlinkList *m_next; 
-    struct nlmsghdr *m_data; 
-    unsigned int m_size; 
-} NetlinkList; 
- 
-static int netlink_socket(void) 
-{ 
-    int l_socket = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 
-    if(l_socket < 0) 
-    { 
-        return -1; 
-    } 
-     
-    struct sockaddr_nl l_addr; 
-    memset(&l_addr, 0, sizeof(l_addr)); 
-    l_addr.nl_family = AF_NETLINK; 
-    if(bind(l_socket, (struct sockaddr *)&l_addr, sizeof(l_addr)) < 0) 
-    { 
-        close(l_socket); 
-        return -1; 
-    } 
-     
-    return l_socket; 
-} 
- 
-static int netlink_send(int p_socket, int p_request) 
-{ 
-    struct 
-    { 
-        struct nlmsghdr m_hdr; 
-        struct rtgenmsg m_msg; 
-    } l_data; 
- 
-    memset(&l_data, 0, sizeof(l_data)); 
-     
-    l_data.m_hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); 
-    l_data.m_hdr.nlmsg_type = p_request; 
-    l_data.m_hdr.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; 
-    l_data.m_hdr.nlmsg_pid = 0; 
-    l_data.m_hdr.nlmsg_seq = p_socket; 
-    l_data.m_msg.rtgen_family = AF_UNSPEC; 
-     
-    struct sockaddr_nl l_addr; 
-    memset(&l_addr, 0, sizeof(l_addr)); 
-    l_addr.nl_family = AF_NETLINK; 
-    return (sendto(p_socket, &l_data.m_hdr, l_data.m_hdr.nlmsg_len, 0, (struct sockaddr *)&l_addr, sizeof(l_addr))); 
-} 
- 
-static int netlink_recv(int p_socket, void *p_buffer, size_t p_len) 
-{ 
-    struct msghdr l_msg; 
-    struct iovec l_iov = { p_buffer, p_len }; 
-    struct sockaddr_nl l_addr; 
- 
-    for(;;) 
-    { 
-        l_msg.msg_name = (void *)&l_addr; 
-        l_msg.msg_namelen = sizeof(l_addr); 
-        l_msg.msg_iov = &l_iov; 
-        l_msg.msg_iovlen = 1; 
-        l_msg.msg_control = NULL; 
-        l_msg.msg_controllen = 0; 
-        l_msg.msg_flags = 0; 
-        int l_result = recvmsg(p_socket, &l_msg, 0); 
-         
-        if(l_result < 0) 
-        { 
-            if(errno == EINTR) 
-            { 
-                continue; 
-            } 
-            return -2; 
-        } 
-         
-        if(l_msg.msg_flags & MSG_TRUNC) 
-        { // buffer was too small 
-            return -1; 
-        } 
-        return l_result; 
-    } 
-} 
- 
-static struct nlmsghdr *getNetlinkResponse(int p_socket, int *p_size, int *p_done) 
-{ 
-    size_t l_size = 4096; 
-    void *l_buffer = NULL; 
-     
-    for(;;) 
-    { 
-        free(l_buffer); 
-        l_buffer = malloc(l_size); 
-        if (l_buffer == NULL) 
-        { 
-            return NULL; 
-        } 
-         
-        int l_read = netlink_recv(p_socket, l_buffer, l_size); 
-        *p_size = l_read; 
-        if(l_read == -2) 
-        { 
-            free(l_buffer); 
-            return NULL; 
-        } 
-        if(l_read >= 0) 
-        { 
-            pid_t l_pid = getpid(); 
-            struct nlmsghdr *l_hdr; 
-            for(l_hdr = (struct nlmsghdr *)l_buffer; NLMSG_OK(l_hdr, (unsigned int)l_read); l_hdr = (struct nlmsghdr *)NLMSG_NEXT(l_hdr, l_read)) 
-            { 
-                if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) 
-                { 
-                    continue; 
-                } 
-                 
-                if(l_hdr->nlmsg_type == NLMSG_DONE) 
-                { 
-                    *p_done = 1; 
-                    break; 
-                } 
-                 
-                if(l_hdr->nlmsg_type == NLMSG_ERROR) 
-                { 
-                    free(l_buffer); 
-                    return NULL; 
-                } 
-            } 
-            return l_buffer; 
-        } 
-         
-        l_size *= 2; 
-    } 
-} 
- 
-static NetlinkList *newListItem(struct nlmsghdr *p_data, unsigned int p_size) 
-{ 
-    NetlinkList *l_item = malloc(sizeof(NetlinkList)); 
-    if (l_item == NULL) 
-    { 
-        return NULL; 
-    } 
- 
-    l_item->m_next = NULL; 
-    l_item->m_data = p_data; 
-    l_item->m_size = p_size; 
-    return l_item; 
-} 
- 
-static void freeResultList(NetlinkList *p_list) 
-{ 
-    NetlinkList *l_cur; 
-    while(p_list) 
-    { 
-        l_cur = p_list; 
-        p_list = p_list->m_next; 
-        free(l_cur->m_data); 
-        free(l_cur); 
-    } 
-} 
- 
-static NetlinkList *getResultList(int p_socket, int p_request) 
-{ 
-    if(netlink_send(p_socket, p_request) < 0) 
-    { 
-        return NULL; 
-    } 
- 
-    NetlinkList *l_list = NULL; 
-    NetlinkList *l_end = NULL; 
-    int l_size; 
-    int l_done = 0; 
-    while(!l_done) 
-    { 
-        struct nlmsghdr *l_hdr = getNetlinkResponse(p_socket, &l_size, &l_done); 
-        if(!l_hdr) 
-        { // error 
-            freeResultList(l_list); 
-            return NULL; 
-        } 
-         
-        NetlinkList *l_item = newListItem(l_hdr, l_size); 
-        if (!l_item) 
-        { 
-            freeResultList(l_list); 
-            return NULL; 
-        } 
-        if(!l_list) 
-        { 
-            l_list = l_item; 
-        } 
-        else 
-        { 
-            l_end->m_next = l_item; 
-        } 
-        l_end = l_item; 
-    } 
-    return l_list; 
-} 
- 
-static size_t maxSize(size_t a, size_t b) 
-{ 
-    return (a > b ? a : b); 
-} 
- 
-static size_t calcAddrLen(sa_family_t p_family, int p_dataSize) 
-{ 
-    switch(p_family) 
-    { 
-        case AF_INET: 
-            return sizeof(struct sockaddr_in); 
-        case AF_INET6: 
-            return sizeof(struct sockaddr_in6); 
-        case AF_PACKET: 
-            return maxSize(sizeof(struct sockaddr_ll), offsetof(struct sockaddr_ll, sll_addr) + p_dataSize); 
-        default: 
-            return maxSize(sizeof(struct sockaddr), offsetof(struct sockaddr, sa_data) + p_dataSize); 
-    } 
-} 
- 
-static void makeSockaddr(sa_family_t p_family, struct sockaddr *p_dest, void *p_data, size_t p_size) 
-{ 
-    switch(p_family) 
-    { 
-        case AF_INET: 
-            memcpy(&((struct sockaddr_in*)p_dest)->sin_addr, p_data, p_size); 
-            break; 
-        case AF_INET6: 
-            memcpy(&((struct sockaddr_in6*)p_dest)->sin6_addr, p_data, p_size); 
-            break; 
-        case AF_PACKET: 
-            memcpy(((struct sockaddr_ll*)p_dest)->sll_addr, p_data, p_size); 
-            ((struct sockaddr_ll*)p_dest)->sll_halen = p_size; 
-            break; 
-        default: 
-            memcpy(p_dest->sa_data, p_data, p_size); 
-            break; 
-    } 
-    p_dest->sa_family = p_family; 
-} 
- 
-static void addToEnd(struct ifaddrs **p_resultList, struct ifaddrs *p_entry) 
-{ 
-    if(!*p_resultList) 
-    { 
-        *p_resultList = p_entry; 
-    } 
-    else 
-    { 
-        struct ifaddrs *l_cur = *p_resultList; 
-        while(l_cur->ifa_next) 
-        { 
-            l_cur = l_cur->ifa_next; 
-        } 
-        l_cur->ifa_next = p_entry; 
-    } 
-} 
- 
-static int interpretLink(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList) 
-{ 
-    struct ifinfomsg *l_info = (struct ifinfomsg *)NLMSG_DATA(p_hdr); 
- 
-    size_t l_nameSize = 0; 
-    size_t l_addrSize = 0; 
-    size_t l_dataSize = 0; 
-     
-    size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); 
-    struct rtattr *l_rta; 
-    for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) 
-    { 
-        void *l_rtaData = RTA_DATA(l_rta); 
-        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); 
-        switch(l_rta->rta_type) 
-        { 
-            case IFLA_ADDRESS: 
-            case IFLA_BROADCAST: 
-                l_addrSize += NLMSG_ALIGN(calcAddrLen(AF_PACKET, l_rtaDataSize)); 
-                break; 
-            case IFLA_IFNAME: 
-                l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); 
-                break; 
-            case IFLA_STATS: 
-                l_dataSize += NLMSG_ALIGN(l_rtaSize); 
-                break; 
-            default: 
-                break; 
-        } 
-    } 
-     
-    struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + sizeof(int) + l_nameSize + l_addrSize + l_dataSize); 
-    if (l_entry == NULL) 
-    { 
-        return -1; 
-    } 
-    memset(l_entry, 0, sizeof(struct ifaddrs)); 
-    l_entry->ifa_name = ""; 
-     
-    char *l_index = ((char *)l_entry) + sizeof(struct ifaddrs); 
-    char *l_name = l_index + sizeof(int); 
-    char *l_addr = l_name + l_nameSize; 
-    char *l_data = l_addr + l_addrSize; 
-     
-    // save the interface index so we can look it up when handling the addresses. 
-    memcpy(l_index, &l_info->ifi_index, sizeof(int)); 
-     
-    l_entry->ifa_flags = l_info->ifi_flags; 
-     
-    l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); 
-    for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) 
-    { 
-        void *l_rtaData = RTA_DATA(l_rta); 
-        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); 
-        switch(l_rta->rta_type) 
-        { 
-            case IFLA_ADDRESS: 
-            case IFLA_BROADCAST: 
-            { 
-                size_t l_addrLen = calcAddrLen(AF_PACKET, l_rtaDataSize); 
-                makeSockaddr(AF_PACKET, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); 
-                ((struct sockaddr_ll *)l_addr)->sll_ifindex = l_info->ifi_index; 
-                ((struct sockaddr_ll *)l_addr)->sll_hatype = l_info->ifi_type; 
-                if(l_rta->rta_type == IFLA_ADDRESS) 
-                { 
-                    l_entry->ifa_addr = (struct sockaddr *)l_addr; 
-                } 
-                else 
-                { 
-                    l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; 
-                } 
-                l_addr += NLMSG_ALIGN(l_addrLen); 
-                break; 
-            } 
-            case IFLA_IFNAME: 
-                strncpy(l_name, l_rtaData, l_rtaDataSize); 
-                l_name[l_rtaDataSize] = '\0'; 
-                l_entry->ifa_name = l_name; 
-                break; 
-            case IFLA_STATS: 
-                memcpy(l_data, l_rtaData, l_rtaDataSize); 
-                l_entry->ifa_data = l_data; 
-                break; 
-            default: 
-                break; 
-        } 
-    } 
-     
-    addToEnd(p_resultList, l_entry); 
-    return 0; 
-} 
- 
-static struct ifaddrs *findInterface(int p_index, struct ifaddrs **p_links, int p_numLinks) 
-{ 
-    int l_num = 0; 
-    struct ifaddrs *l_cur = *p_links; 
-    while(l_cur && l_num < p_numLinks) 
-    { 
-        char *l_indexPtr = ((char *)l_cur) + sizeof(struct ifaddrs); 
-        int l_index; 
-        memcpy(&l_index, l_indexPtr, sizeof(int)); 
-        if(l_index == p_index) 
-        { 
-            return l_cur; 
-        } 
-         
-        l_cur = l_cur->ifa_next; 
-        ++l_num; 
-    } 
-    return NULL; 
-} 
- 
-static int interpretAddr(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList, int p_numLinks) 
-{ 
-    struct ifaddrmsg *l_info = (struct ifaddrmsg *)NLMSG_DATA(p_hdr); 
-    struct ifaddrs *l_interface = findInterface(l_info->ifa_index, p_resultList, p_numLinks); 
-     
-    if(l_info->ifa_family == AF_PACKET) 
-    { 
-        return 0; 
-    } 
- 
-    size_t l_nameSize = 0; 
-    size_t l_addrSize = 0; 
-     
-    int l_addedNetmask = 0; 
-     
-    size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); 
-    struct rtattr *l_rta; 
-    for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) 
-    { 
-        void *l_rtaData = RTA_DATA(l_rta); 
-        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); 
-         
-        switch(l_rta->rta_type) 
-        { 
-            case IFA_ADDRESS: 
-            case IFA_LOCAL: 
-                if((l_info->ifa_family == AF_INET || l_info->ifa_family == AF_INET6) && !l_addedNetmask) 
-                { // make room for netmask 
-                    l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); 
-                    l_addedNetmask = 1; 
-                } 
-            case IFA_BROADCAST: 
-                l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); 
-                break; 
-            case IFA_LABEL: 
-                l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); 
-                break; 
-            default: 
-                break; 
-        } 
-    } 
-     
-    struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + l_nameSize + l_addrSize); 
-    if (l_entry == NULL) 
-    { 
-        return -1; 
-    } 
-    memset(l_entry, 0, sizeof(struct ifaddrs)); 
-    l_entry->ifa_name = (l_interface ? l_interface->ifa_name : ""); 
-     
-    char *l_name = ((char *)l_entry) + sizeof(struct ifaddrs); 
-    char *l_addr = l_name + l_nameSize; 
-     
-    l_entry->ifa_flags = l_info->ifa_flags; 
-    if(l_interface) 
-    { 
-        l_entry->ifa_flags |= l_interface->ifa_flags; 
-    } 
-     
-    l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); 
-    for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) 
-    { 
-        void *l_rtaData = RTA_DATA(l_rta); 
-        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); 
-        switch(l_rta->rta_type) 
-        { 
-            case IFA_ADDRESS: 
-            case IFA_BROADCAST: 
-            case IFA_LOCAL: 
-            { 
-                size_t l_addrLen = calcAddrLen(l_info->ifa_family, l_rtaDataSize); 
-                makeSockaddr(l_info->ifa_family, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); 
-                if(l_info->ifa_family == AF_INET6) 
-                { 
-                    if(IN6_IS_ADDR_LINKLOCAL((struct in6_addr *)l_rtaData) || IN6_IS_ADDR_MC_LINKLOCAL((struct in6_addr *)l_rtaData)) 
-                    { 
-                        ((struct sockaddr_in6 *)l_addr)->sin6_scope_id = l_info->ifa_index; 
-                    } 
-                } 
-                 
-                if(l_rta->rta_type == IFA_ADDRESS) 
-                { // apparently in a point-to-point network IFA_ADDRESS contains the dest address and IFA_LOCAL contains the local address 
-                    if(l_entry->ifa_addr) 
-                    { 
-                        l_entry->ifa_dstaddr = (struct sockaddr *)l_addr; 
-                    } 
-                    else 
-                    { 
-                        l_entry->ifa_addr = (struct sockaddr *)l_addr; 
-                    } 
-                } 
-                else if(l_rta->rta_type == IFA_LOCAL) 
-                { 
-                    if(l_entry->ifa_addr) 
-                    { 
-                        l_entry->ifa_dstaddr = l_entry->ifa_addr; 
-                    } 
-                    l_entry->ifa_addr = (struct sockaddr *)l_addr; 
-                } 
-                else 
-                { 
-                    l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; 
-                } 
-                l_addr += NLMSG_ALIGN(l_addrLen); 
-                break; 
-            } 
-            case IFA_LABEL: 
-                strncpy(l_name, l_rtaData, l_rtaDataSize); 
-                l_name[l_rtaDataSize] = '\0'; 
-                l_entry->ifa_name = l_name; 
-                break; 
-            default: 
-                break; 
-        } 
-    } 
-     
-    if(l_entry->ifa_addr && (l_entry->ifa_addr->sa_family == AF_INET || l_entry->ifa_addr->sa_family == AF_INET6)) 
-    { 
-        unsigned l_maxPrefix = (l_entry->ifa_addr->sa_family == AF_INET ? 32 : 128); 
-        unsigned l_prefix = (l_info->ifa_prefixlen > l_maxPrefix ? l_maxPrefix : l_info->ifa_prefixlen); 
-        char l_mask[16] = {0}; 
-        unsigned i; 
-        for(i=0; i<(l_prefix/8); ++i) 
-        { 
-            l_mask[i] = 0xff; 
-        } 
-        if(l_prefix % 8) 
-        { 
-            l_mask[i] = 0xff << (8 - (l_prefix % 8)); 
-        } 
-         
-        makeSockaddr(l_entry->ifa_addr->sa_family, (struct sockaddr *)l_addr, l_mask, l_maxPrefix / 8); 
-        l_entry->ifa_netmask = (struct sockaddr *)l_addr; 
-    } 
-     
-    addToEnd(p_resultList, l_entry); 
-    return 0; 
-} 
- 
-static int interpretLinks(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList) 
-{ 
-    int l_numLinks = 0; 
-    pid_t l_pid = getpid(); 
-    for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) 
-    { 
-        unsigned int l_nlsize = p_netlinkList->m_size; 
-        struct nlmsghdr *l_hdr; 
-        for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) 
-        { 
-            if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) 
-            { 
-                continue; 
-            } 
-             
-            if(l_hdr->nlmsg_type == NLMSG_DONE) 
-            { 
-                break; 
-            } 
-             
-            if(l_hdr->nlmsg_type == RTM_NEWLINK) 
-            { 
-                if(interpretLink(l_hdr, p_resultList) == -1) 
-                { 
-                    return -1; 
-                } 
-                ++l_numLinks; 
-            } 
-        } 
-    } 
-    return l_numLinks; 
-} 
- 
-static int interpretAddrs(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList, int p_numLinks) 
-{ 
-    pid_t l_pid = getpid(); 
-    for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) 
-    { 
-        unsigned int l_nlsize = p_netlinkList->m_size; 
-        struct nlmsghdr *l_hdr; 
-        for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) 
-        { 
-            if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) 
-            { 
-                continue; 
-            } 
-             
-            if(l_hdr->nlmsg_type == NLMSG_DONE) 
-            { 
-                break; 
-            } 
-             
-            if(l_hdr->nlmsg_type == RTM_NEWADDR) 
-            { 
-                if (interpretAddr(l_hdr, p_resultList, p_numLinks) == -1) 
-                { 
-                    return -1; 
-                } 
-            } 
-        } 
-    } 
-    return 0; 
-} 
- 
-int getifaddrs(struct ifaddrs **ifap) 
-{ 
-    if(!ifap) 
-    { 
-        return -1; 
-    } 
-    *ifap = NULL; 
-     
-    int l_socket = netlink_socket(); 
-    if(l_socket < 0) 
-    { 
-        return -1; 
-    } 
-     
-    NetlinkList *l_linkResults = getResultList(l_socket, RTM_GETLINK); 
-    if(!l_linkResults) 
-    { 
-        close(l_socket); 
-        return -1; 
-    } 
-     
-    NetlinkList *l_addrResults = getResultList(l_socket, RTM_GETADDR); 
-    if(!l_addrResults) 
-    { 
-        close(l_socket); 
-        freeResultList(l_linkResults); 
-        return -1; 
-    } 
-     
-    int l_result = 0; 
-    int l_numLinks = interpretLinks(l_socket, l_linkResults, ifap); 
-    if(l_numLinks == -1 || interpretAddrs(l_socket, l_addrResults, ifap, l_numLinks) == -1) 
-    { 
-        l_result = -1; 
-    } 
-     
-    freeResultList(l_linkResults); 
-    freeResultList(l_addrResults); 
-    close(l_socket); 
-    return l_result; 
-} 
- 
-void freeifaddrs(struct ifaddrs *ifa) 
-{ 
-    struct ifaddrs *l_cur; 
-    while(ifa) 
-    { 
-        l_cur = ifa; 
-        ifa = ifa->ifa_next; 
-        free(l_cur); 
-    } 
-} 
+/*
+Copyright (c) 2013, Kenneth MacKay
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "ifaddrs.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <netpacket/packet.h>
+#include <net/if_arp.h>
+#include <netinet/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+typedef struct NetlinkList
+{
+    struct NetlinkList *m_next;
+    struct nlmsghdr *m_data;
+    unsigned int m_size;
+} NetlinkList;
+
+static int netlink_socket(void)
+{
+    int l_socket = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+    if(l_socket < 0)
+    {
+        return -1;
+    }
+    
+    struct sockaddr_nl l_addr;
+    memset(&l_addr, 0, sizeof(l_addr));
+    l_addr.nl_family = AF_NETLINK;
+    if(bind(l_socket, (struct sockaddr *)&l_addr, sizeof(l_addr)) < 0)
+    {
+        close(l_socket);
+        return -1;
+    }
+    
+    return l_socket;
+}
+
+static int netlink_send(int p_socket, int p_request)
+{
+    struct
+    {
+        struct nlmsghdr m_hdr;
+        struct rtgenmsg m_msg;
+    } l_data;
+
+    memset(&l_data, 0, sizeof(l_data));
+    
+    l_data.m_hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+    l_data.m_hdr.nlmsg_type = p_request;
+    l_data.m_hdr.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
+    l_data.m_hdr.nlmsg_pid = 0;
+    l_data.m_hdr.nlmsg_seq = p_socket;
+    l_data.m_msg.rtgen_family = AF_UNSPEC;
+    
+    struct sockaddr_nl l_addr;
+    memset(&l_addr, 0, sizeof(l_addr));
+    l_addr.nl_family = AF_NETLINK;
+    return (sendto(p_socket, &l_data.m_hdr, l_data.m_hdr.nlmsg_len, 0, (struct sockaddr *)&l_addr, sizeof(l_addr)));
+}
+
+static int netlink_recv(int p_socket, void *p_buffer, size_t p_len)
+{
+    struct msghdr l_msg;
+    struct iovec l_iov = { p_buffer, p_len };
+    struct sockaddr_nl l_addr;
+
+    for(;;)
+    {
+        l_msg.msg_name = (void *)&l_addr;
+        l_msg.msg_namelen = sizeof(l_addr);
+        l_msg.msg_iov = &l_iov;
+        l_msg.msg_iovlen = 1;
+        l_msg.msg_control = NULL;
+        l_msg.msg_controllen = 0;
+        l_msg.msg_flags = 0;
+        int l_result = recvmsg(p_socket, &l_msg, 0);
+        
+        if(l_result < 0)
+        {
+            if(errno == EINTR)
+            {
+                continue;
+            }
+            return -2;
+        }
+        
+        if(l_msg.msg_flags & MSG_TRUNC)
+        { // buffer was too small
+            return -1;
+        }
+        return l_result;
+    }
+}
+
+static struct nlmsghdr *getNetlinkResponse(int p_socket, int *p_size, int *p_done)
+{
+    size_t l_size = 4096;
+    void *l_buffer = NULL;
+    
+    for(;;)
+    {
+        free(l_buffer);
+        l_buffer = malloc(l_size);
+        if (l_buffer == NULL)
+        {
+            return NULL;
+        }
+        
+        int l_read = netlink_recv(p_socket, l_buffer, l_size);
+        *p_size = l_read;
+        if(l_read == -2)
+        {
+            free(l_buffer);
+            return NULL;
+        }
+        if(l_read >= 0)
+        {
+            pid_t l_pid = getpid();
+            struct nlmsghdr *l_hdr;
+            for(l_hdr = (struct nlmsghdr *)l_buffer; NLMSG_OK(l_hdr, (unsigned int)l_read); l_hdr = (struct nlmsghdr *)NLMSG_NEXT(l_hdr, l_read))
+            {
+                if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket)
+                {
+                    continue;
+                }
+                
+                if(l_hdr->nlmsg_type == NLMSG_DONE)
+                {
+                    *p_done = 1;
+                    break;
+                }
+                
+                if(l_hdr->nlmsg_type == NLMSG_ERROR)
+                {
+                    free(l_buffer);
+                    return NULL;
+                }
+            }
+            return l_buffer;
+        }
+        
+        l_size *= 2;
+    }
+}
+
+static NetlinkList *newListItem(struct nlmsghdr *p_data, unsigned int p_size)
+{
+    NetlinkList *l_item = malloc(sizeof(NetlinkList));
+    if (l_item == NULL)
+    {
+        return NULL;
+    }
+
+    l_item->m_next = NULL;
+    l_item->m_data = p_data;
+    l_item->m_size = p_size;
+    return l_item;
+}
+
+static void freeResultList(NetlinkList *p_list)
+{
+    NetlinkList *l_cur;
+    while(p_list)
+    {
+        l_cur = p_list;
+        p_list = p_list->m_next;
+        free(l_cur->m_data);
+        free(l_cur);
+    }
+}
+
+static NetlinkList *getResultList(int p_socket, int p_request)
+{
+    if(netlink_send(p_socket, p_request) < 0)
+    {
+        return NULL;
+    }
+
+    NetlinkList *l_list = NULL;
+    NetlinkList *l_end = NULL;
+    int l_size;
+    int l_done = 0;
+    while(!l_done)
+    {
+        struct nlmsghdr *l_hdr = getNetlinkResponse(p_socket, &l_size, &l_done);
+        if(!l_hdr)
+        { // error
+            freeResultList(l_list);
+            return NULL;
+        }
+        
+        NetlinkList *l_item = newListItem(l_hdr, l_size);
+        if (!l_item)
+        {
+            freeResultList(l_list);
+            return NULL;
+        }
+        if(!l_list)
+        {
+            l_list = l_item;
+        }
+        else
+        {
+            l_end->m_next = l_item;
+        }
+        l_end = l_item;
+    }
+    return l_list;
+}
+
+static size_t maxSize(size_t a, size_t b)
+{
+    return (a > b ? a : b);
+}
+
+static size_t calcAddrLen(sa_family_t p_family, int p_dataSize)
+{
+    switch(p_family)
+    {
+        case AF_INET:
+            return sizeof(struct sockaddr_in);
+        case AF_INET6:
+            return sizeof(struct sockaddr_in6);
+        case AF_PACKET:
+            return maxSize(sizeof(struct sockaddr_ll), offsetof(struct sockaddr_ll, sll_addr) + p_dataSize);
+        default:
+            return maxSize(sizeof(struct sockaddr), offsetof(struct sockaddr, sa_data) + p_dataSize);
+    }
+}
+
+static void makeSockaddr(sa_family_t p_family, struct sockaddr *p_dest, void *p_data, size_t p_size)
+{
+    switch(p_family)
+    {
+        case AF_INET:
+            memcpy(&((struct sockaddr_in*)p_dest)->sin_addr, p_data, p_size);
+            break;
+        case AF_INET6:
+            memcpy(&((struct sockaddr_in6*)p_dest)->sin6_addr, p_data, p_size);
+            break;
+        case AF_PACKET:
+            memcpy(((struct sockaddr_ll*)p_dest)->sll_addr, p_data, p_size);
+            ((struct sockaddr_ll*)p_dest)->sll_halen = p_size;
+            break;
+        default:
+            memcpy(p_dest->sa_data, p_data, p_size);
+            break;
+    }
+    p_dest->sa_family = p_family;
+}
+
+static void addToEnd(struct ifaddrs **p_resultList, struct ifaddrs *p_entry)
+{
+    if(!*p_resultList)
+    {
+        *p_resultList = p_entry;
+    }
+    else
+    {
+        struct ifaddrs *l_cur = *p_resultList;
+        while(l_cur->ifa_next)
+        {
+            l_cur = l_cur->ifa_next;
+        }
+        l_cur->ifa_next = p_entry;
+    }
+}
+
+static int interpretLink(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList)
+{
+    struct ifinfomsg *l_info = (struct ifinfomsg *)NLMSG_DATA(p_hdr);
+
+    size_t l_nameSize = 0;
+    size_t l_addrSize = 0;
+    size_t l_dataSize = 0;
+    
+    size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg));
+    struct rtattr *l_rta;
+    for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize))
+    {
+        void *l_rtaData = RTA_DATA(l_rta);
+        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta);
+        switch(l_rta->rta_type)
+        {
+            case IFLA_ADDRESS:
+            case IFLA_BROADCAST:
+                l_addrSize += NLMSG_ALIGN(calcAddrLen(AF_PACKET, l_rtaDataSize));
+                break;
+            case IFLA_IFNAME:
+                l_nameSize += NLMSG_ALIGN(l_rtaSize + 1);
+                break;
+            case IFLA_STATS:
+                l_dataSize += NLMSG_ALIGN(l_rtaSize);
+                break;
+            default:
+                break;
+        }
+    }
+    
+    struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + sizeof(int) + l_nameSize + l_addrSize + l_dataSize);
+    if (l_entry == NULL)
+    {
+        return -1;
+    }
+    memset(l_entry, 0, sizeof(struct ifaddrs));
+    l_entry->ifa_name = "";
+    
+    char *l_index = ((char *)l_entry) + sizeof(struct ifaddrs);
+    char *l_name = l_index + sizeof(int);
+    char *l_addr = l_name + l_nameSize;
+    char *l_data = l_addr + l_addrSize;
+    
+    // save the interface index so we can look it up when handling the addresses.
+    memcpy(l_index, &l_info->ifi_index, sizeof(int));
+    
+    l_entry->ifa_flags = l_info->ifi_flags;
+    
+    l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg));
+    for(l_rta = IFLA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize))
+    {
+        void *l_rtaData = RTA_DATA(l_rta);
+        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta);
+        switch(l_rta->rta_type)
+        {
+            case IFLA_ADDRESS:
+            case IFLA_BROADCAST:
+            {
+                size_t l_addrLen = calcAddrLen(AF_PACKET, l_rtaDataSize);
+                makeSockaddr(AF_PACKET, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize);
+                ((struct sockaddr_ll *)l_addr)->sll_ifindex = l_info->ifi_index;
+                ((struct sockaddr_ll *)l_addr)->sll_hatype = l_info->ifi_type;
+                if(l_rta->rta_type == IFLA_ADDRESS)
+                {
+                    l_entry->ifa_addr = (struct sockaddr *)l_addr;
+                }
+                else
+                {
+                    l_entry->ifa_broadaddr = (struct sockaddr *)l_addr;
+                }
+                l_addr += NLMSG_ALIGN(l_addrLen);
+                break;
+            }
+            case IFLA_IFNAME:
+                strncpy(l_name, l_rtaData, l_rtaDataSize);
+                l_name[l_rtaDataSize] = '\0';
+                l_entry->ifa_name = l_name;
+                break;
+            case IFLA_STATS:
+                memcpy(l_data, l_rtaData, l_rtaDataSize);
+                l_entry->ifa_data = l_data;
+                break;
+            default:
+                break;
+        }
+    }
+    
+    addToEnd(p_resultList, l_entry);
+    return 0;
+}
+
+static struct ifaddrs *findInterface(int p_index, struct ifaddrs **p_links, int p_numLinks)
+{
+    int l_num = 0;
+    struct ifaddrs *l_cur = *p_links;
+    while(l_cur && l_num < p_numLinks)
+    {
+        char *l_indexPtr = ((char *)l_cur) + sizeof(struct ifaddrs);
+        int l_index;
+        memcpy(&l_index, l_indexPtr, sizeof(int));
+        if(l_index == p_index)
+        {
+            return l_cur;
+        }
+        
+        l_cur = l_cur->ifa_next;
+        ++l_num;
+    }
+    return NULL;
+}
+
+static int interpretAddr(struct nlmsghdr *p_hdr, struct ifaddrs **p_resultList, int p_numLinks)
+{
+    struct ifaddrmsg *l_info = (struct ifaddrmsg *)NLMSG_DATA(p_hdr);
+    struct ifaddrs *l_interface = findInterface(l_info->ifa_index, p_resultList, p_numLinks);
+    
+    if(l_info->ifa_family == AF_PACKET)
+    {
+        return 0;
+    }
+
+    size_t l_nameSize = 0;
+    size_t l_addrSize = 0;
+    
+    int l_addedNetmask = 0;
+    
+    size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg));
+    struct rtattr *l_rta;
+    for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize))
+    {
+        void *l_rtaData = RTA_DATA(l_rta);
+        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta);
+        
+        switch(l_rta->rta_type)
+        {
+            case IFA_ADDRESS:
+            case IFA_LOCAL:
+                if((l_info->ifa_family == AF_INET || l_info->ifa_family == AF_INET6) && !l_addedNetmask)
+                { // make room for netmask
+                    l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize));
+                    l_addedNetmask = 1;
+                }
+            case IFA_BROADCAST:
+                l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize));
+                break;
+            case IFA_LABEL:
+                l_nameSize += NLMSG_ALIGN(l_rtaSize + 1);
+                break;
+            default:
+                break;
+        }
+    }
+    
+    struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + l_nameSize + l_addrSize);
+    if (l_entry == NULL)
+    {
+        return -1;
+    }
+    memset(l_entry, 0, sizeof(struct ifaddrs));
+    l_entry->ifa_name = (l_interface ? l_interface->ifa_name : "");
+    
+    char *l_name = ((char *)l_entry) + sizeof(struct ifaddrs);
+    char *l_addr = l_name + l_nameSize;
+    
+    l_entry->ifa_flags = l_info->ifa_flags;
+    if(l_interface)
+    {
+        l_entry->ifa_flags |= l_interface->ifa_flags;
+    }
+    
+    l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg));
+    for(l_rta = IFA_RTA(l_info); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize))
+    {
+        void *l_rtaData = RTA_DATA(l_rta);
+        size_t l_rtaDataSize = RTA_PAYLOAD(l_rta);
+        switch(l_rta->rta_type)
+        {
+            case IFA_ADDRESS:
+            case IFA_BROADCAST:
+            case IFA_LOCAL:
+            {
+                size_t l_addrLen = calcAddrLen(l_info->ifa_family, l_rtaDataSize);
+                makeSockaddr(l_info->ifa_family, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize);
+                if(l_info->ifa_family == AF_INET6)
+                {
+                    if(IN6_IS_ADDR_LINKLOCAL((struct in6_addr *)l_rtaData) || IN6_IS_ADDR_MC_LINKLOCAL((struct in6_addr *)l_rtaData))
+                    {
+                        ((struct sockaddr_in6 *)l_addr)->sin6_scope_id = l_info->ifa_index;
+                    }
+                }
+                
+                if(l_rta->rta_type == IFA_ADDRESS)
+                { // apparently in a point-to-point network IFA_ADDRESS contains the dest address and IFA_LOCAL contains the local address
+                    if(l_entry->ifa_addr)
+                    {
+                        l_entry->ifa_dstaddr = (struct sockaddr *)l_addr;
+                    }
+                    else
+                    {
+                        l_entry->ifa_addr = (struct sockaddr *)l_addr;
+                    }
+                }
+                else if(l_rta->rta_type == IFA_LOCAL)
+                {
+                    if(l_entry->ifa_addr)
+                    {
+                        l_entry->ifa_dstaddr = l_entry->ifa_addr;
+                    }
+                    l_entry->ifa_addr = (struct sockaddr *)l_addr;
+                }
+                else
+                {
+                    l_entry->ifa_broadaddr = (struct sockaddr *)l_addr;
+                }
+                l_addr += NLMSG_ALIGN(l_addrLen);
+                break;
+            }
+            case IFA_LABEL:
+                strncpy(l_name, l_rtaData, l_rtaDataSize);
+                l_name[l_rtaDataSize] = '\0';
+                l_entry->ifa_name = l_name;
+                break;
+            default:
+                break;
+        }
+    }
+    
+    if(l_entry->ifa_addr && (l_entry->ifa_addr->sa_family == AF_INET || l_entry->ifa_addr->sa_family == AF_INET6))
+    {
+        unsigned l_maxPrefix = (l_entry->ifa_addr->sa_family == AF_INET ? 32 : 128);
+        unsigned l_prefix = (l_info->ifa_prefixlen > l_maxPrefix ? l_maxPrefix : l_info->ifa_prefixlen);
+        char l_mask[16] = {0};
+        unsigned i;
+        for(i=0; i<(l_prefix/8); ++i)
+        {
+            l_mask[i] = 0xff;
+        }
+        if(l_prefix % 8)
+        {
+            l_mask[i] = 0xff << (8 - (l_prefix % 8));
+        }
+        
+        makeSockaddr(l_entry->ifa_addr->sa_family, (struct sockaddr *)l_addr, l_mask, l_maxPrefix / 8);
+        l_entry->ifa_netmask = (struct sockaddr *)l_addr;
+    }
+    
+    addToEnd(p_resultList, l_entry);
+    return 0;
+}
+
+static int interpretLinks(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList)
+{
+    int l_numLinks = 0;
+    pid_t l_pid = getpid();
+    for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next)
+    {
+        unsigned int l_nlsize = p_netlinkList->m_size;
+        struct nlmsghdr *l_hdr;
+        for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize))
+        {
+            if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket)
+            {
+                continue;
+            }
+            
+            if(l_hdr->nlmsg_type == NLMSG_DONE)
+            {
+                break;
+            }
+            
+            if(l_hdr->nlmsg_type == RTM_NEWLINK)
+            {
+                if(interpretLink(l_hdr, p_resultList) == -1)
+                {
+                    return -1;
+                }
+                ++l_numLinks;
+            }
+        }
+    }
+    return l_numLinks;
+}
+
+static int interpretAddrs(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_resultList, int p_numLinks)
+{
+    pid_t l_pid = getpid();
+    for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next)
+    {
+        unsigned int l_nlsize = p_netlinkList->m_size;
+        struct nlmsghdr *l_hdr;
+        for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize))
+        {
+            if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket)
+            {
+                continue;
+            }
+            
+            if(l_hdr->nlmsg_type == NLMSG_DONE)
+            {
+                break;
+            }
+            
+            if(l_hdr->nlmsg_type == RTM_NEWADDR)
+            {
+                if (interpretAddr(l_hdr, p_resultList, p_numLinks) == -1)
+                {
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+int getifaddrs(struct ifaddrs **ifap)
+{
+    if(!ifap)
+    {
+        return -1;
+    }
+    *ifap = NULL;
+    
+    int l_socket = netlink_socket();
+    if(l_socket < 0)
+    {
+        return -1;
+    }
+    
+    NetlinkList *l_linkResults = getResultList(l_socket, RTM_GETLINK);
+    if(!l_linkResults)
+    {
+        close(l_socket);
+        return -1;
+    }
+    
+    NetlinkList *l_addrResults = getResultList(l_socket, RTM_GETADDR);
+    if(!l_addrResults)
+    {
+        close(l_socket);
+        freeResultList(l_linkResults);
+        return -1;
+    }
+    
+    int l_result = 0;
+    int l_numLinks = interpretLinks(l_socket, l_linkResults, ifap);
+    if(l_numLinks == -1 || interpretAddrs(l_socket, l_addrResults, ifap, l_numLinks) == -1)
+    {
+        l_result = -1;
+    }
+    
+    freeResultList(l_linkResults);
+    freeResultList(l_addrResults);
+    close(l_socket);
+    return l_result;
+}
+
+void freeifaddrs(struct ifaddrs *ifa)
+{
+    struct ifaddrs *l_cur;
+    while(ifa)
+    {
+        l_cur = ifa;
+        ifa = ifa->ifa_next;
+        free(l_cur);
+    }
+}
diff --git a/contrib/libs/libc_compat/ubuntu_14/ya.make b/contrib/libs/libc_compat/ubuntu_14/ya.make
index 69f50fedd4..7355c4ad9d 100644
--- a/contrib/libs/libc_compat/ubuntu_14/ya.make
+++ b/contrib/libs/libc_compat/ubuntu_14/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 OWNER(
     somov
     g:contrib
diff --git a/contrib/libs/libevent/event_core/ya.make b/contrib/libs/libevent/event_core/ya.make
index bff1de464f..3988de817a 100644
--- a/contrib/libs/libevent/event_core/ya.make
+++ b/contrib/libs/libevent/event_core/ya.make
@@ -11,8 +11,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 PEERDIR(
     contrib/libs/libc_compat
 )
diff --git a/contrib/libs/libevent/event_extra/ya.make b/contrib/libs/libevent/event_extra/ya.make
index f73e9b2bd9..9f4c289e62 100644
--- a/contrib/libs/libevent/event_extra/ya.make
+++ b/contrib/libs/libevent/event_extra/ya.make
@@ -11,8 +11,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 PEERDIR(
     contrib/libs/libc_compat
 )
diff --git a/contrib/libs/libevent/event_openssl/ya.make b/contrib/libs/libevent/event_openssl/ya.make
index c6d86475fe..fe042893c0 100644
--- a/contrib/libs/libevent/event_openssl/ya.make
+++ b/contrib/libs/libevent/event_openssl/ya.make
@@ -11,8 +11,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 PEERDIR(
     contrib/libs/libc_compat
     contrib/libs/openssl
diff --git a/contrib/libs/libevent/event_thread/ya.make b/contrib/libs/libevent/event_thread/ya.make
index 548b7f5312..63579b456a 100644
--- a/contrib/libs/libevent/event_thread/ya.make
+++ b/contrib/libs/libevent/event_thread/ya.make
@@ -11,8 +11,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 PEERDIR(
     contrib/libs/libc_compat
 )
diff --git a/contrib/libs/libidn/static/ya.make b/contrib/libs/libidn/static/ya.make
index c4fd089976..a927d7918e 100644
--- a/contrib/libs/libidn/static/ya.make
+++ b/contrib/libs/libidn/static/ya.make
@@ -3,8 +3,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LIBRARY() 
- 
+LIBRARY()
+
 LICENSE(
     Custom-Punycode AND
     Ietf AND
@@ -12,15 +12,15 @@ LICENSE(
     LGPL-2.1-only AND
     LGPL-2.1-or-later
 )
- 
+
 LICENSE_TEXTS(../.yandex_meta/licenses.list.txt)
 
-VERSION(1.9) 
- 
+VERSION(1.9)
+
 PROVIDES(libidn)
 
-NO_RUNTIME() 
- 
+NO_RUNTIME()
+
 NO_COMPILER_WARNINGS()
 
 ADDINCL(
@@ -34,14 +34,14 @@ CFLAGS(
 IF (OS_WINDOWS)
     CFLAGS(
         -DLIBIDN_EXPORTS
-    ) 
+    )
 ENDIF()
 
 IF (OS_ANDROID)
     CFLAGS(
         -DHAVE_LOCALE_H=1
     )
-ENDIF() 
+ENDIF()
 
 SRCDIR(contrib/libs/libidn)
 
@@ -68,5 +68,5 @@ SRCS(
     c-strcasecmp.c
     c-ctype.c
 )
- 
-END() 
+
+END()
diff --git a/contrib/libs/libidn/unix/config.h b/contrib/libs/libidn/unix/config.h
index af4b48f607..0b4dad46a2 100644
--- a/contrib/libs/libidn/unix/config.h
+++ b/contrib/libs/libidn/unix/config.h
@@ -9,7 +9,7 @@
 
 /* Define to 1 if translation of program messages to the user's native
    language is requested. */
-//#define ENABLE_NLS 1 
+//#define ENABLE_NLS 1
 
 /* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the
    CoreFoundation framework. */
@@ -21,7 +21,7 @@
 
 /* Define if the GNU dcgettext() function is already present or preinstalled.
    */
-//#define HAVE_DCGETTEXT 1 
+//#define HAVE_DCGETTEXT 1
 
 /* Define to 1 if you have the declaration of `getenv', and to 0 if you don't.
    */
@@ -192,11 +192,11 @@
 
 /* Define to l, ll, u, ul, ull, etc., as suitable for constants of type
    'wchar_t'. */
-#define WCHAR_T_SUFFIX 
+#define WCHAR_T_SUFFIX
 
 /* Define to l, ll, u, ul, ull, etc., as suitable for constants of type
    'wint_t'. */
-#define WINT_T_SUFFIX 
+#define WINT_T_SUFFIX
 
 /* Define to 1 if you want TLD code. */
 #define WITH_TLD 1
diff --git a/contrib/libs/libidn/win/ac-stdint.h b/contrib/libs/libidn/win/ac-stdint.h
index 67b3518bf0..a3560eb49a 100644
--- a/contrib/libs/libidn/win/ac-stdint.h
+++ b/contrib/libs/libidn/win/ac-stdint.h
@@ -21,7 +21,7 @@
 #ifndef _GENERATED_STDINT_H
 #define _GENERATED_STDINT_H
 
-#include <stdint.h> 
+#include <stdint.h>
 
 #endif
 #endif
diff --git a/contrib/libs/libidn/ya.make b/contrib/libs/libidn/ya.make
index 5be09da161..678a813caa 100644
--- a/contrib/libs/libidn/ya.make
+++ b/contrib/libs/libidn/ya.make
@@ -3,32 +3,32 @@ OWNER(
     g:cpp-contrib
 )
 
-LIBRARY() 
- 
+LIBRARY()
+
 LICENSE(Service-Dll-Harness)
 
 WITHOUT_LICENSE_TEXTS()
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-VERSION(1.9) 
- 
-NO_RUNTIME() 
- 
+VERSION(1.9)
+
+NO_RUNTIME()
+
 NO_COMPILER_WARNINGS()
 
 IF (USE_DYNAMIC_IDN)
-    PEERDIR( 
-        contrib/libs/libidn/dynamic 
-    ) 
-ELSE() 
+    PEERDIR(
+        contrib/libs/libidn/dynamic
+    )
+ELSE()
     PEERDIR(
         contrib/libs/libidn/static
     )
-ENDIF() 
- 
-END() 
- 
+ENDIF()
+
+END()
+
 RECURSE(
     dynamic
     static
diff --git a/contrib/libs/libunwind/include/__libunwind_config.h b/contrib/libs/libunwind/include/__libunwind_config.h
index 8cb3981f60..e87bcf4003 100644
--- a/contrib/libs/libunwind/include/__libunwind_config.h
+++ b/contrib/libs/libunwind/include/__libunwind_config.h
@@ -1,19 +1,19 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef ____LIBUNWIND_CONFIG_H__ 
-#define ____LIBUNWIND_CONFIG_H__ 
- 
-#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ 
-    !defined(__ARM_DWARF_EH__) 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ____LIBUNWIND_CONFIG_H__
+#define ____LIBUNWIND_CONFIG_H__
+
+#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
+    !defined(__ARM_DWARF_EH__)
 #define _LIBUNWIND_ARM_EHABI
-#endif 
- 
+#endif
+
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86       8
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64    32
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC       112
@@ -181,4 +181,4 @@
 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287
 #endif // _LIBUNWIND_IS_NATIVE_ONLY
 
-#endif // ____LIBUNWIND_CONFIG_H__ 
+#endif // ____LIBUNWIND_CONFIG_H__
diff --git a/contrib/libs/libunwind/include/libunwind.h b/contrib/libs/libunwind/include/libunwind.h
index 8e1a4266cf..8303c1a04c 100644
--- a/contrib/libs/libunwind/include/libunwind.h
+++ b/contrib/libs/libunwind/include/libunwind.h
@@ -1,24 +1,24 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
+//
+//
 // Compatible with libunwind API documented at:
-//   http://www.nongnu.org/libunwind/man/libunwind(3).html 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __LIBUNWIND__ 
-#define __LIBUNWIND__ 
- 
-#include "__libunwind_config.h" 
- 
-#include <stdint.h> 
-#include <stddef.h> 
- 
-#ifdef __APPLE__ 
+//   http://www.nongnu.org/libunwind/man/libunwind(3).html
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LIBUNWIND__
+#define __LIBUNWIND__
+
+#include "__libunwind_config.h"
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __APPLE__
   #if __clang__
     #if __has_include(<Availability.h>)
       #include <Availability.h>
@@ -35,140 +35,140 @@
     #include <AvailabilityMacros.h>
     #ifdef AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
       #define LIBUNWIND_AVAIL AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
-    #else 
+    #else
       #define LIBUNWIND_AVAIL __attribute__((unavailable))
-    #endif 
+    #endif
   #endif
-#else 
-  #define LIBUNWIND_AVAIL 
-#endif 
- 
+#else
+  #define LIBUNWIND_AVAIL
+#endif
+
 #if defined(_WIN32) && defined(__SEH__)
   #define LIBUNWIND_CURSOR_ALIGNMENT_ATTR __attribute__((__aligned__(16)))
 #else
   #define LIBUNWIND_CURSOR_ALIGNMENT_ATTR
 #endif
 
-/* error codes */ 
-enum { 
-  UNW_ESUCCESS      = 0,     /* no error */ 
-  UNW_EUNSPEC       = -6540, /* unspecified (general) error */ 
-  UNW_ENOMEM        = -6541, /* out of memory */ 
-  UNW_EBADREG       = -6542, /* bad register number */ 
-  UNW_EREADONLYREG  = -6543, /* attempt to write read-only register */ 
-  UNW_ESTOPUNWIND   = -6544, /* stop unwinding */ 
-  UNW_EINVALIDIP    = -6545, /* invalid IP */ 
-  UNW_EBADFRAME     = -6546, /* bad frame */ 
-  UNW_EINVAL        = -6547, /* unsupported operation or bad value */ 
-  UNW_EBADVERSION   = -6548, /* unwind info has unsupported version */ 
-  UNW_ENOINFO       = -6549  /* no unwind info found */ 
+/* error codes */
+enum {
+  UNW_ESUCCESS      = 0,     /* no error */
+  UNW_EUNSPEC       = -6540, /* unspecified (general) error */
+  UNW_ENOMEM        = -6541, /* out of memory */
+  UNW_EBADREG       = -6542, /* bad register number */
+  UNW_EREADONLYREG  = -6543, /* attempt to write read-only register */
+  UNW_ESTOPUNWIND   = -6544, /* stop unwinding */
+  UNW_EINVALIDIP    = -6545, /* invalid IP */
+  UNW_EBADFRAME     = -6546, /* bad frame */
+  UNW_EINVAL        = -6547, /* unsupported operation or bad value */
+  UNW_EBADVERSION   = -6548, /* unwind info has unsupported version */
+  UNW_ENOINFO       = -6549  /* no unwind info found */
 #if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY)
   , UNW_ECROSSRASIGNING = -6550 /* cross unwind with return address signing */
 #endif
-}; 
- 
-struct unw_context_t { 
+};
+
+struct unw_context_t {
   uint64_t data[_LIBUNWIND_CONTEXT_SIZE];
-}; 
-typedef struct unw_context_t unw_context_t; 
- 
-struct unw_cursor_t { 
+};
+typedef struct unw_context_t unw_context_t;
+
+struct unw_cursor_t {
   uint64_t data[_LIBUNWIND_CURSOR_SIZE];
 } LIBUNWIND_CURSOR_ALIGNMENT_ATTR;
-typedef struct unw_cursor_t unw_cursor_t; 
- 
-typedef struct unw_addr_space *unw_addr_space_t; 
- 
-typedef int unw_regnum_t; 
+typedef struct unw_cursor_t unw_cursor_t;
+
+typedef struct unw_addr_space *unw_addr_space_t;
+
+typedef int unw_regnum_t;
 typedef uintptr_t unw_word_t;
 #if defined(__arm__) && !defined(__ARM_DWARF_EH__)
-typedef uint64_t unw_fpreg_t; 
-#else 
-typedef double unw_fpreg_t; 
-#endif 
- 
-struct unw_proc_info_t { 
-  unw_word_t  start_ip;         /* start address of function */ 
-  unw_word_t  end_ip;           /* address after end of function */ 
-  unw_word_t  lsda;             /* address of language specific data area, */ 
-                                /*  or zero if not used */ 
-  unw_word_t  handler;          /* personality routine, or zero if not used */ 
-  unw_word_t  gp;               /* not used */ 
-  unw_word_t  flags;            /* not used */ 
-  uint32_t    format;           /* compact unwind encoding, or zero if none */ 
+typedef uint64_t unw_fpreg_t;
+#else
+typedef double unw_fpreg_t;
+#endif
+
+struct unw_proc_info_t {
+  unw_word_t  start_ip;         /* start address of function */
+  unw_word_t  end_ip;           /* address after end of function */
+  unw_word_t  lsda;             /* address of language specific data area, */
+                                /*  or zero if not used */
+  unw_word_t  handler;          /* personality routine, or zero if not used */
+  unw_word_t  gp;               /* not used */
+  unw_word_t  flags;            /* not used */
+  uint32_t    format;           /* compact unwind encoding, or zero if none */
   uint32_t    unwind_info_size; /* size of DWARF unwind info, or zero if none */
   unw_word_t  unwind_info;      /* address of DWARF unwind info, or zero */
-  unw_word_t  extra;            /* mach_header of mach-o image containing func */ 
-}; 
-typedef struct unw_proc_info_t unw_proc_info_t; 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL; 
-extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL; 
-extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL; 
-extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL; 
-extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL; 
-extern int unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t) LIBUNWIND_AVAIL; 
-extern int unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t)  LIBUNWIND_AVAIL; 
-extern int unw_resume(unw_cursor_t *) LIBUNWIND_AVAIL; 
- 
-#ifdef __arm__ 
-/* Save VFP registers in FSTMX format (instead of FSTMD). */ 
-extern void unw_save_vfp_as_X(unw_cursor_t *) LIBUNWIND_AVAIL; 
-#endif 
- 
- 
-extern const char *unw_regname(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; 
-extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *) LIBUNWIND_AVAIL; 
-extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; 
-extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL; 
-extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL; 
-//extern int       unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*); 
- 
-extern unw_addr_space_t unw_local_addr_space; 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-// architecture independent register numbers 
-enum { 
-  UNW_REG_IP = -1, // instruction pointer 
-  UNW_REG_SP = -2, // stack pointer 
-}; 
- 
-// 32-bit x86 registers 
-enum { 
-  UNW_X86_EAX = 0, 
-  UNW_X86_ECX = 1, 
-  UNW_X86_EDX = 2, 
-  UNW_X86_EBX = 3, 
-  UNW_X86_EBP = 4, 
-  UNW_X86_ESP = 5, 
-  UNW_X86_ESI = 6, 
-  UNW_X86_EDI = 7 
-}; 
- 
-// 64-bit x86_64 registers 
-enum { 
-  UNW_X86_64_RAX = 0, 
-  UNW_X86_64_RDX = 1, 
-  UNW_X86_64_RCX = 2, 
-  UNW_X86_64_RBX = 3, 
-  UNW_X86_64_RSI = 4, 
-  UNW_X86_64_RDI = 5, 
-  UNW_X86_64_RBP = 6, 
-  UNW_X86_64_RSP = 7, 
-  UNW_X86_64_R8  = 8, 
-  UNW_X86_64_R9  = 9, 
-  UNW_X86_64_R10 = 10, 
-  UNW_X86_64_R11 = 11, 
-  UNW_X86_64_R12 = 12, 
-  UNW_X86_64_R13 = 13, 
-  UNW_X86_64_R14 = 14, 
+  unw_word_t  extra;            /* mach_header of mach-o image containing func */
+};
+typedef struct unw_proc_info_t unw_proc_info_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL;
+extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
+extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL;
+extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL;
+extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL;
+extern int unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t) LIBUNWIND_AVAIL;
+extern int unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t)  LIBUNWIND_AVAIL;
+extern int unw_resume(unw_cursor_t *) LIBUNWIND_AVAIL;
+
+#ifdef __arm__
+/* Save VFP registers in FSTMX format (instead of FSTMD). */
+extern void unw_save_vfp_as_X(unw_cursor_t *) LIBUNWIND_AVAIL;
+#endif
+
+
+extern const char *unw_regname(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL;
+extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *) LIBUNWIND_AVAIL;
+extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL;
+extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL;
+extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL;
+//extern int       unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*);
+
+extern unw_addr_space_t unw_local_addr_space;
+
+#ifdef __cplusplus
+}
+#endif
+
+// architecture independent register numbers
+enum {
+  UNW_REG_IP = -1, // instruction pointer
+  UNW_REG_SP = -2, // stack pointer
+};
+
+// 32-bit x86 registers
+enum {
+  UNW_X86_EAX = 0,
+  UNW_X86_ECX = 1,
+  UNW_X86_EDX = 2,
+  UNW_X86_EBX = 3,
+  UNW_X86_EBP = 4,
+  UNW_X86_ESP = 5,
+  UNW_X86_ESI = 6,
+  UNW_X86_EDI = 7
+};
+
+// 64-bit x86_64 registers
+enum {
+  UNW_X86_64_RAX = 0,
+  UNW_X86_64_RDX = 1,
+  UNW_X86_64_RCX = 2,
+  UNW_X86_64_RBX = 3,
+  UNW_X86_64_RSI = 4,
+  UNW_X86_64_RDI = 5,
+  UNW_X86_64_RBP = 6,
+  UNW_X86_64_RSP = 7,
+  UNW_X86_64_R8  = 8,
+  UNW_X86_64_R9  = 9,
+  UNW_X86_64_R10 = 10,
+  UNW_X86_64_R11 = 11,
+  UNW_X86_64_R12 = 12,
+  UNW_X86_64_R13 = 13,
+  UNW_X86_64_R14 = 14,
   UNW_X86_64_R15 = 15,
   UNW_X86_64_RIP = 16,
   UNW_X86_64_XMM0 = 17,
@@ -187,126 +187,126 @@ enum {
   UNW_X86_64_XMM13 = 30,
   UNW_X86_64_XMM14 = 31,
   UNW_X86_64_XMM15 = 32,
-}; 
- 
- 
-// 32-bit ppc register numbers 
-enum { 
-  UNW_PPC_R0  = 0, 
-  UNW_PPC_R1  = 1, 
-  UNW_PPC_R2  = 2, 
-  UNW_PPC_R3  = 3, 
-  UNW_PPC_R4  = 4, 
-  UNW_PPC_R5  = 5, 
-  UNW_PPC_R6  = 6, 
-  UNW_PPC_R7  = 7, 
-  UNW_PPC_R8  = 8, 
-  UNW_PPC_R9  = 9, 
-  UNW_PPC_R10 = 10, 
-  UNW_PPC_R11 = 11, 
-  UNW_PPC_R12 = 12, 
-  UNW_PPC_R13 = 13, 
-  UNW_PPC_R14 = 14, 
-  UNW_PPC_R15 = 15, 
-  UNW_PPC_R16 = 16, 
-  UNW_PPC_R17 = 17, 
-  UNW_PPC_R18 = 18, 
-  UNW_PPC_R19 = 19, 
-  UNW_PPC_R20 = 20, 
-  UNW_PPC_R21 = 21, 
-  UNW_PPC_R22 = 22, 
-  UNW_PPC_R23 = 23, 
-  UNW_PPC_R24 = 24, 
-  UNW_PPC_R25 = 25, 
-  UNW_PPC_R26 = 26, 
-  UNW_PPC_R27 = 27, 
-  UNW_PPC_R28 = 28, 
-  UNW_PPC_R29 = 29, 
-  UNW_PPC_R30 = 30, 
-  UNW_PPC_R31 = 31, 
-  UNW_PPC_F0  = 32, 
-  UNW_PPC_F1  = 33, 
-  UNW_PPC_F2  = 34, 
-  UNW_PPC_F3  = 35, 
-  UNW_PPC_F4  = 36, 
-  UNW_PPC_F5  = 37, 
-  UNW_PPC_F6  = 38, 
-  UNW_PPC_F7  = 39, 
-  UNW_PPC_F8  = 40, 
-  UNW_PPC_F9  = 41, 
-  UNW_PPC_F10 = 42, 
-  UNW_PPC_F11 = 43, 
-  UNW_PPC_F12 = 44, 
-  UNW_PPC_F13 = 45, 
-  UNW_PPC_F14 = 46, 
-  UNW_PPC_F15 = 47, 
-  UNW_PPC_F16 = 48, 
-  UNW_PPC_F17 = 49, 
-  UNW_PPC_F18 = 50, 
-  UNW_PPC_F19 = 51, 
-  UNW_PPC_F20 = 52, 
-  UNW_PPC_F21 = 53, 
-  UNW_PPC_F22 = 54, 
-  UNW_PPC_F23 = 55, 
-  UNW_PPC_F24 = 56, 
-  UNW_PPC_F25 = 57, 
-  UNW_PPC_F26 = 58, 
-  UNW_PPC_F27 = 59, 
-  UNW_PPC_F28 = 60, 
-  UNW_PPC_F29 = 61, 
-  UNW_PPC_F30 = 62, 
-  UNW_PPC_F31 = 63, 
-  UNW_PPC_MQ  = 64, 
-  UNW_PPC_LR  = 65, 
-  UNW_PPC_CTR = 66, 
-  UNW_PPC_AP  = 67, 
-  UNW_PPC_CR0 = 68, 
-  UNW_PPC_CR1 = 69, 
-  UNW_PPC_CR2 = 70, 
-  UNW_PPC_CR3 = 71, 
-  UNW_PPC_CR4 = 72, 
-  UNW_PPC_CR5 = 73, 
-  UNW_PPC_CR6 = 74, 
-  UNW_PPC_CR7 = 75, 
-  UNW_PPC_XER = 76, 
-  UNW_PPC_V0  = 77, 
-  UNW_PPC_V1  = 78, 
-  UNW_PPC_V2  = 79, 
-  UNW_PPC_V3  = 80, 
-  UNW_PPC_V4  = 81, 
-  UNW_PPC_V5  = 82, 
-  UNW_PPC_V6  = 83, 
-  UNW_PPC_V7  = 84, 
-  UNW_PPC_V8  = 85, 
-  UNW_PPC_V9  = 86, 
-  UNW_PPC_V10 = 87, 
-  UNW_PPC_V11 = 88, 
-  UNW_PPC_V12 = 89, 
-  UNW_PPC_V13 = 90, 
-  UNW_PPC_V14 = 91, 
-  UNW_PPC_V15 = 92, 
-  UNW_PPC_V16 = 93, 
-  UNW_PPC_V17 = 94, 
-  UNW_PPC_V18 = 95, 
-  UNW_PPC_V19 = 96, 
-  UNW_PPC_V20 = 97, 
-  UNW_PPC_V21 = 98, 
-  UNW_PPC_V22 = 99, 
-  UNW_PPC_V23 = 100, 
-  UNW_PPC_V24 = 101, 
-  UNW_PPC_V25 = 102, 
-  UNW_PPC_V26 = 103, 
-  UNW_PPC_V27 = 104, 
-  UNW_PPC_V28 = 105, 
-  UNW_PPC_V29 = 106, 
-  UNW_PPC_V30 = 107, 
-  UNW_PPC_V31 = 108, 
-  UNW_PPC_VRSAVE  = 109, 
-  UNW_PPC_VSCR    = 110, 
-  UNW_PPC_SPE_ACC = 111, 
-  UNW_PPC_SPEFSCR = 112 
-}; 
- 
+};
+
+
+// 32-bit ppc register numbers
+enum {
+  UNW_PPC_R0  = 0,
+  UNW_PPC_R1  = 1,
+  UNW_PPC_R2  = 2,
+  UNW_PPC_R3  = 3,
+  UNW_PPC_R4  = 4,
+  UNW_PPC_R5  = 5,
+  UNW_PPC_R6  = 6,
+  UNW_PPC_R7  = 7,
+  UNW_PPC_R8  = 8,
+  UNW_PPC_R9  = 9,
+  UNW_PPC_R10 = 10,
+  UNW_PPC_R11 = 11,
+  UNW_PPC_R12 = 12,
+  UNW_PPC_R13 = 13,
+  UNW_PPC_R14 = 14,
+  UNW_PPC_R15 = 15,
+  UNW_PPC_R16 = 16,
+  UNW_PPC_R17 = 17,
+  UNW_PPC_R18 = 18,
+  UNW_PPC_R19 = 19,
+  UNW_PPC_R20 = 20,
+  UNW_PPC_R21 = 21,
+  UNW_PPC_R22 = 22,
+  UNW_PPC_R23 = 23,
+  UNW_PPC_R24 = 24,
+  UNW_PPC_R25 = 25,
+  UNW_PPC_R26 = 26,
+  UNW_PPC_R27 = 27,
+  UNW_PPC_R28 = 28,
+  UNW_PPC_R29 = 29,
+  UNW_PPC_R30 = 30,
+  UNW_PPC_R31 = 31,
+  UNW_PPC_F0  = 32,
+  UNW_PPC_F1  = 33,
+  UNW_PPC_F2  = 34,
+  UNW_PPC_F3  = 35,
+  UNW_PPC_F4  = 36,
+  UNW_PPC_F5  = 37,
+  UNW_PPC_F6  = 38,
+  UNW_PPC_F7  = 39,
+  UNW_PPC_F8  = 40,
+  UNW_PPC_F9  = 41,
+  UNW_PPC_F10 = 42,
+  UNW_PPC_F11 = 43,
+  UNW_PPC_F12 = 44,
+  UNW_PPC_F13 = 45,
+  UNW_PPC_F14 = 46,
+  UNW_PPC_F15 = 47,
+  UNW_PPC_F16 = 48,
+  UNW_PPC_F17 = 49,
+  UNW_PPC_F18 = 50,
+  UNW_PPC_F19 = 51,
+  UNW_PPC_F20 = 52,
+  UNW_PPC_F21 = 53,
+  UNW_PPC_F22 = 54,
+  UNW_PPC_F23 = 55,
+  UNW_PPC_F24 = 56,
+  UNW_PPC_F25 = 57,
+  UNW_PPC_F26 = 58,
+  UNW_PPC_F27 = 59,
+  UNW_PPC_F28 = 60,
+  UNW_PPC_F29 = 61,
+  UNW_PPC_F30 = 62,
+  UNW_PPC_F31 = 63,
+  UNW_PPC_MQ  = 64,
+  UNW_PPC_LR  = 65,
+  UNW_PPC_CTR = 66,
+  UNW_PPC_AP  = 67,
+  UNW_PPC_CR0 = 68,
+  UNW_PPC_CR1 = 69,
+  UNW_PPC_CR2 = 70,
+  UNW_PPC_CR3 = 71,
+  UNW_PPC_CR4 = 72,
+  UNW_PPC_CR5 = 73,
+  UNW_PPC_CR6 = 74,
+  UNW_PPC_CR7 = 75,
+  UNW_PPC_XER = 76,
+  UNW_PPC_V0  = 77,
+  UNW_PPC_V1  = 78,
+  UNW_PPC_V2  = 79,
+  UNW_PPC_V3  = 80,
+  UNW_PPC_V4  = 81,
+  UNW_PPC_V5  = 82,
+  UNW_PPC_V6  = 83,
+  UNW_PPC_V7  = 84,
+  UNW_PPC_V8  = 85,
+  UNW_PPC_V9  = 86,
+  UNW_PPC_V10 = 87,
+  UNW_PPC_V11 = 88,
+  UNW_PPC_V12 = 89,
+  UNW_PPC_V13 = 90,
+  UNW_PPC_V14 = 91,
+  UNW_PPC_V15 = 92,
+  UNW_PPC_V16 = 93,
+  UNW_PPC_V17 = 94,
+  UNW_PPC_V18 = 95,
+  UNW_PPC_V19 = 96,
+  UNW_PPC_V20 = 97,
+  UNW_PPC_V21 = 98,
+  UNW_PPC_V22 = 99,
+  UNW_PPC_V23 = 100,
+  UNW_PPC_V24 = 101,
+  UNW_PPC_V25 = 102,
+  UNW_PPC_V26 = 103,
+  UNW_PPC_V27 = 104,
+  UNW_PPC_V28 = 105,
+  UNW_PPC_V29 = 106,
+  UNW_PPC_V30 = 107,
+  UNW_PPC_V31 = 108,
+  UNW_PPC_VRSAVE  = 109,
+  UNW_PPC_VSCR    = 110,
+  UNW_PPC_SPE_ACC = 111,
+  UNW_PPC_SPEFSCR = 112
+};
+
 // 64-bit ppc register numbers
 enum {
   UNW_PPC64_R0      = 0,
@@ -491,8 +491,8 @@ enum {
   UNW_PPC64_VS63    = UNW_PPC64_V31
 };
 
-// 64-bit ARM64 registers 
-enum { 
+// 64-bit ARM64 registers
+enum {
   UNW_AARCH64_X0 = 0,
   UNW_AARCH64_X1 = 1,
   UNW_AARCH64_X2 = 2,
@@ -530,7 +530,7 @@ enum {
   UNW_AARCH64_SP = 31,
   UNW_AARCH64_PC = 32,
 
-  // reserved block 
+  // reserved block
   UNW_AARCH64_RA_SIGN_STATE = 34,
 
   // FP/vector registers
@@ -637,176 +637,176 @@ enum {
   UNW_ARM64_D29 = UNW_AARCH64_V29,
   UNW_ARM64_D30 = UNW_AARCH64_V30,
   UNW_ARM64_D31 = UNW_AARCH64_V31,
-}; 
- 
-// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1. 
-// Naming scheme uses recommendations given in Note 4 for VFP-v2 and VFP-v3. 
-// In this scheme, even though the 64-bit floating point registers D0-D31 
-// overlap physically with the 32-bit floating pointer registers S0-S31, 
-// they are given a non-overlapping range of register numbers. 
-// 
-// Commented out ranges are not preserved during unwinding. 
-enum { 
-  UNW_ARM_R0  = 0, 
-  UNW_ARM_R1  = 1, 
-  UNW_ARM_R2  = 2, 
-  UNW_ARM_R3  = 3, 
-  UNW_ARM_R4  = 4, 
-  UNW_ARM_R5  = 5, 
-  UNW_ARM_R6  = 6, 
-  UNW_ARM_R7  = 7, 
-  UNW_ARM_R8  = 8, 
-  UNW_ARM_R9  = 9, 
-  UNW_ARM_R10 = 10, 
-  UNW_ARM_R11 = 11, 
-  UNW_ARM_R12 = 12, 
-  UNW_ARM_SP  = 13,  // Logical alias for UNW_REG_SP 
-  UNW_ARM_R13 = 13, 
-  UNW_ARM_LR  = 14, 
-  UNW_ARM_R14 = 14, 
-  UNW_ARM_IP  = 15,  // Logical alias for UNW_REG_IP 
-  UNW_ARM_R15 = 15, 
-  // 16-63 -- OBSOLETE. Used in VFP1 to represent both S0-S31 and D0-D31. 
-  UNW_ARM_S0  = 64, 
-  UNW_ARM_S1  = 65, 
-  UNW_ARM_S2  = 66, 
-  UNW_ARM_S3  = 67, 
-  UNW_ARM_S4  = 68, 
-  UNW_ARM_S5  = 69, 
-  UNW_ARM_S6  = 70, 
-  UNW_ARM_S7  = 71, 
-  UNW_ARM_S8  = 72, 
-  UNW_ARM_S9  = 73, 
-  UNW_ARM_S10 = 74, 
-  UNW_ARM_S11 = 75, 
-  UNW_ARM_S12 = 76, 
-  UNW_ARM_S13 = 77, 
-  UNW_ARM_S14 = 78, 
-  UNW_ARM_S15 = 79, 
-  UNW_ARM_S16 = 80, 
-  UNW_ARM_S17 = 81, 
-  UNW_ARM_S18 = 82, 
-  UNW_ARM_S19 = 83, 
-  UNW_ARM_S20 = 84, 
-  UNW_ARM_S21 = 85, 
-  UNW_ARM_S22 = 86, 
-  UNW_ARM_S23 = 87, 
-  UNW_ARM_S24 = 88, 
-  UNW_ARM_S25 = 89, 
-  UNW_ARM_S26 = 90, 
-  UNW_ARM_S27 = 91, 
-  UNW_ARM_S28 = 92, 
-  UNW_ARM_S29 = 93, 
-  UNW_ARM_S30 = 94, 
-  UNW_ARM_S31 = 95, 
-  //  96-103 -- OBSOLETE. F0-F7. Used by the FPA system. Superseded by VFP. 
-  // 104-111 -- wCGR0-wCGR7, ACC0-ACC7 (Intel wireless MMX) 
-  UNW_ARM_WR0 = 112, 
-  UNW_ARM_WR1 = 113, 
-  UNW_ARM_WR2 = 114, 
-  UNW_ARM_WR3 = 115, 
-  UNW_ARM_WR4 = 116, 
-  UNW_ARM_WR5 = 117, 
-  UNW_ARM_WR6 = 118, 
-  UNW_ARM_WR7 = 119, 
-  UNW_ARM_WR8 = 120, 
-  UNW_ARM_WR9 = 121, 
-  UNW_ARM_WR10 = 122, 
-  UNW_ARM_WR11 = 123, 
-  UNW_ARM_WR12 = 124, 
-  UNW_ARM_WR13 = 125, 
-  UNW_ARM_WR14 = 126, 
-  UNW_ARM_WR15 = 127, 
-  // 128-133 -- SPSR, SPSR_{FIQ|IRQ|ABT|UND|SVC} 
+};
+
+// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1.
+// Naming scheme uses recommendations given in Note 4 for VFP-v2 and VFP-v3.
+// In this scheme, even though the 64-bit floating point registers D0-D31
+// overlap physically with the 32-bit floating pointer registers S0-S31,
+// they are given a non-overlapping range of register numbers.
+//
+// Commented out ranges are not preserved during unwinding.
+enum {
+  UNW_ARM_R0  = 0,
+  UNW_ARM_R1  = 1,
+  UNW_ARM_R2  = 2,
+  UNW_ARM_R3  = 3,
+  UNW_ARM_R4  = 4,
+  UNW_ARM_R5  = 5,
+  UNW_ARM_R6  = 6,
+  UNW_ARM_R7  = 7,
+  UNW_ARM_R8  = 8,
+  UNW_ARM_R9  = 9,
+  UNW_ARM_R10 = 10,
+  UNW_ARM_R11 = 11,
+  UNW_ARM_R12 = 12,
+  UNW_ARM_SP  = 13,  // Logical alias for UNW_REG_SP
+  UNW_ARM_R13 = 13,
+  UNW_ARM_LR  = 14,
+  UNW_ARM_R14 = 14,
+  UNW_ARM_IP  = 15,  // Logical alias for UNW_REG_IP
+  UNW_ARM_R15 = 15,
+  // 16-63 -- OBSOLETE. Used in VFP1 to represent both S0-S31 and D0-D31.
+  UNW_ARM_S0  = 64,
+  UNW_ARM_S1  = 65,
+  UNW_ARM_S2  = 66,
+  UNW_ARM_S3  = 67,
+  UNW_ARM_S4  = 68,
+  UNW_ARM_S5  = 69,
+  UNW_ARM_S6  = 70,
+  UNW_ARM_S7  = 71,
+  UNW_ARM_S8  = 72,
+  UNW_ARM_S9  = 73,
+  UNW_ARM_S10 = 74,
+  UNW_ARM_S11 = 75,
+  UNW_ARM_S12 = 76,
+  UNW_ARM_S13 = 77,
+  UNW_ARM_S14 = 78,
+  UNW_ARM_S15 = 79,
+  UNW_ARM_S16 = 80,
+  UNW_ARM_S17 = 81,
+  UNW_ARM_S18 = 82,
+  UNW_ARM_S19 = 83,
+  UNW_ARM_S20 = 84,
+  UNW_ARM_S21 = 85,
+  UNW_ARM_S22 = 86,
+  UNW_ARM_S23 = 87,
+  UNW_ARM_S24 = 88,
+  UNW_ARM_S25 = 89,
+  UNW_ARM_S26 = 90,
+  UNW_ARM_S27 = 91,
+  UNW_ARM_S28 = 92,
+  UNW_ARM_S29 = 93,
+  UNW_ARM_S30 = 94,
+  UNW_ARM_S31 = 95,
+  //  96-103 -- OBSOLETE. F0-F7. Used by the FPA system. Superseded by VFP.
+  // 104-111 -- wCGR0-wCGR7, ACC0-ACC7 (Intel wireless MMX)
+  UNW_ARM_WR0 = 112,
+  UNW_ARM_WR1 = 113,
+  UNW_ARM_WR2 = 114,
+  UNW_ARM_WR3 = 115,
+  UNW_ARM_WR4 = 116,
+  UNW_ARM_WR5 = 117,
+  UNW_ARM_WR6 = 118,
+  UNW_ARM_WR7 = 119,
+  UNW_ARM_WR8 = 120,
+  UNW_ARM_WR9 = 121,
+  UNW_ARM_WR10 = 122,
+  UNW_ARM_WR11 = 123,
+  UNW_ARM_WR12 = 124,
+  UNW_ARM_WR13 = 125,
+  UNW_ARM_WR14 = 126,
+  UNW_ARM_WR15 = 127,
+  // 128-133 -- SPSR, SPSR_{FIQ|IRQ|ABT|UND|SVC}
   // 134-142 -- Reserved
   UNW_ARM_RA_AUTH_CODE = 143,
-  // 144-150 -- R8_USR-R14_USR 
-  // 151-157 -- R8_FIQ-R14_FIQ 
-  // 158-159 -- R13_IRQ-R14_IRQ 
-  // 160-161 -- R13_ABT-R14_ABT 
-  // 162-163 -- R13_UND-R14_UND 
-  // 164-165 -- R13_SVC-R14_SVC 
-  // 166-191 -- Reserved 
-  UNW_ARM_WC0 = 192, 
-  UNW_ARM_WC1 = 193, 
-  UNW_ARM_WC2 = 194, 
-  UNW_ARM_WC3 = 195, 
-  // 196-199 -- wC4-wC7 (Intel wireless MMX control) 
-  // 200-255 -- Reserved 
-  UNW_ARM_D0  = 256, 
-  UNW_ARM_D1  = 257, 
-  UNW_ARM_D2  = 258, 
-  UNW_ARM_D3  = 259, 
-  UNW_ARM_D4  = 260, 
-  UNW_ARM_D5  = 261, 
-  UNW_ARM_D6  = 262, 
-  UNW_ARM_D7  = 263, 
-  UNW_ARM_D8  = 264, 
-  UNW_ARM_D9  = 265, 
-  UNW_ARM_D10 = 266, 
-  UNW_ARM_D11 = 267, 
-  UNW_ARM_D12 = 268, 
-  UNW_ARM_D13 = 269, 
-  UNW_ARM_D14 = 270, 
-  UNW_ARM_D15 = 271, 
-  UNW_ARM_D16 = 272, 
-  UNW_ARM_D17 = 273, 
-  UNW_ARM_D18 = 274, 
-  UNW_ARM_D19 = 275, 
-  UNW_ARM_D20 = 276, 
-  UNW_ARM_D21 = 277, 
-  UNW_ARM_D22 = 278, 
-  UNW_ARM_D23 = 279, 
-  UNW_ARM_D24 = 280, 
-  UNW_ARM_D25 = 281, 
-  UNW_ARM_D26 = 282, 
-  UNW_ARM_D27 = 283, 
-  UNW_ARM_D28 = 284, 
-  UNW_ARM_D29 = 285, 
-  UNW_ARM_D30 = 286, 
-  UNW_ARM_D31 = 287, 
-  // 288-319 -- Reserved for VFP/Neon 
-  // 320-8191 -- Reserved 
-  // 8192-16383 -- Unspecified vendor co-processor register. 
-}; 
- 
-// OpenRISC1000 register numbers 
-enum { 
-  UNW_OR1K_R0  = 0, 
-  UNW_OR1K_R1  = 1, 
-  UNW_OR1K_R2  = 2, 
-  UNW_OR1K_R3  = 3, 
-  UNW_OR1K_R4  = 4, 
-  UNW_OR1K_R5  = 5, 
-  UNW_OR1K_R6  = 6, 
-  UNW_OR1K_R7  = 7, 
-  UNW_OR1K_R8  = 8, 
-  UNW_OR1K_R9  = 9, 
-  UNW_OR1K_R10 = 10, 
-  UNW_OR1K_R11 = 11, 
-  UNW_OR1K_R12 = 12, 
-  UNW_OR1K_R13 = 13, 
-  UNW_OR1K_R14 = 14, 
-  UNW_OR1K_R15 = 15, 
-  UNW_OR1K_R16 = 16, 
-  UNW_OR1K_R17 = 17, 
-  UNW_OR1K_R18 = 18, 
-  UNW_OR1K_R19 = 19, 
-  UNW_OR1K_R20 = 20, 
-  UNW_OR1K_R21 = 21, 
-  UNW_OR1K_R22 = 22, 
-  UNW_OR1K_R23 = 23, 
-  UNW_OR1K_R24 = 24, 
-  UNW_OR1K_R25 = 25, 
-  UNW_OR1K_R26 = 26, 
-  UNW_OR1K_R27 = 27, 
-  UNW_OR1K_R28 = 28, 
-  UNW_OR1K_R29 = 29, 
-  UNW_OR1K_R30 = 30, 
-  UNW_OR1K_R31 = 31, 
+  // 144-150 -- R8_USR-R14_USR
+  // 151-157 -- R8_FIQ-R14_FIQ
+  // 158-159 -- R13_IRQ-R14_IRQ
+  // 160-161 -- R13_ABT-R14_ABT
+  // 162-163 -- R13_UND-R14_UND
+  // 164-165 -- R13_SVC-R14_SVC
+  // 166-191 -- Reserved
+  UNW_ARM_WC0 = 192,
+  UNW_ARM_WC1 = 193,
+  UNW_ARM_WC2 = 194,
+  UNW_ARM_WC3 = 195,
+  // 196-199 -- wC4-wC7 (Intel wireless MMX control)
+  // 200-255 -- Reserved
+  UNW_ARM_D0  = 256,
+  UNW_ARM_D1  = 257,
+  UNW_ARM_D2  = 258,
+  UNW_ARM_D3  = 259,
+  UNW_ARM_D4  = 260,
+  UNW_ARM_D5  = 261,
+  UNW_ARM_D6  = 262,
+  UNW_ARM_D7  = 263,
+  UNW_ARM_D8  = 264,
+  UNW_ARM_D9  = 265,
+  UNW_ARM_D10 = 266,
+  UNW_ARM_D11 = 267,
+  UNW_ARM_D12 = 268,
+  UNW_ARM_D13 = 269,
+  UNW_ARM_D14 = 270,
+  UNW_ARM_D15 = 271,
+  UNW_ARM_D16 = 272,
+  UNW_ARM_D17 = 273,
+  UNW_ARM_D18 = 274,
+  UNW_ARM_D19 = 275,
+  UNW_ARM_D20 = 276,
+  UNW_ARM_D21 = 277,
+  UNW_ARM_D22 = 278,
+  UNW_ARM_D23 = 279,
+  UNW_ARM_D24 = 280,
+  UNW_ARM_D25 = 281,
+  UNW_ARM_D26 = 282,
+  UNW_ARM_D27 = 283,
+  UNW_ARM_D28 = 284,
+  UNW_ARM_D29 = 285,
+  UNW_ARM_D30 = 286,
+  UNW_ARM_D31 = 287,
+  // 288-319 -- Reserved for VFP/Neon
+  // 320-8191 -- Reserved
+  // 8192-16383 -- Unspecified vendor co-processor register.
+};
+
+// OpenRISC1000 register numbers
+enum {
+  UNW_OR1K_R0  = 0,
+  UNW_OR1K_R1  = 1,
+  UNW_OR1K_R2  = 2,
+  UNW_OR1K_R3  = 3,
+  UNW_OR1K_R4  = 4,
+  UNW_OR1K_R5  = 5,
+  UNW_OR1K_R6  = 6,
+  UNW_OR1K_R7  = 7,
+  UNW_OR1K_R8  = 8,
+  UNW_OR1K_R9  = 9,
+  UNW_OR1K_R10 = 10,
+  UNW_OR1K_R11 = 11,
+  UNW_OR1K_R12 = 12,
+  UNW_OR1K_R13 = 13,
+  UNW_OR1K_R14 = 14,
+  UNW_OR1K_R15 = 15,
+  UNW_OR1K_R16 = 16,
+  UNW_OR1K_R17 = 17,
+  UNW_OR1K_R18 = 18,
+  UNW_OR1K_R19 = 19,
+  UNW_OR1K_R20 = 20,
+  UNW_OR1K_R21 = 21,
+  UNW_OR1K_R22 = 22,
+  UNW_OR1K_R23 = 23,
+  UNW_OR1K_R24 = 24,
+  UNW_OR1K_R25 = 25,
+  UNW_OR1K_R26 = 26,
+  UNW_OR1K_R27 = 27,
+  UNW_OR1K_R28 = 28,
+  UNW_OR1K_R29 = 29,
+  UNW_OR1K_R30 = 30,
+  UNW_OR1K_R31 = 31,
   UNW_OR1K_EPCR = 32,
-}; 
- 
+};
+
 // MIPS registers
 enum {
   UNW_MIPS_R0  = 0,
@@ -1174,4 +1174,4 @@ enum {
   UNW_VE_VL   = 145,
 };
 
-#endif 
+#endif
diff --git a/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h b/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h
index 45d873f75c..68d562eec4 100644
--- a/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h
+++ b/contrib/libs/libunwind/include/mach-o/compact_unwind_encoding.h
@@ -1,477 +1,477 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
+//
+//
 // Darwin's alternative to DWARF based unwind encodings.
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef __COMPACT_UNWIND_ENCODING__ 
-#define __COMPACT_UNWIND_ENCODING__ 
- 
-#include <stdint.h> 
- 
-// 
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef __COMPACT_UNWIND_ENCODING__
+#define __COMPACT_UNWIND_ENCODING__
+
+#include <stdint.h>
+
+//
 // Compilers can emit standard DWARF FDEs in the __TEXT,__eh_frame section
-// of object files. Or compilers can emit compact unwind information in 
-// the __LD,__compact_unwind section. 
-// 
-// When the linker creates a final linked image, it will create a 
-// __TEXT,__unwind_info section.  This section is a small and fast way for the 
-// runtime to access unwind info for any given function.  If the compiler 
-// emitted compact unwind info for the function, that compact unwind info will 
-// be encoded in the __TEXT,__unwind_info section. If the compiler emitted 
+// of object files. Or compilers can emit compact unwind information in
+// the __LD,__compact_unwind section.
+//
+// When the linker creates a final linked image, it will create a
+// __TEXT,__unwind_info section.  This section is a small and fast way for the
+// runtime to access unwind info for any given function.  If the compiler
+// emitted compact unwind info for the function, that compact unwind info will
+// be encoded in the __TEXT,__unwind_info section. If the compiler emitted
 // DWARF unwind info, the __TEXT,__unwind_info section will contain the offset
-// of the FDE in the __TEXT,__eh_frame section in the final linked image. 
-// 
+// of the FDE in the __TEXT,__eh_frame section in the final linked image.
+//
 // Note: Previously, the linker would transform some DWARF unwind infos into
-//       compact unwind info.  But that is fragile and no longer done. 
- 
- 
-// 
-// The compact unwind endoding is a 32-bit value which encoded in an 
-// architecture specific way, which registers to restore from where, and how 
-// to unwind out of the function. 
-// 
-typedef uint32_t compact_unwind_encoding_t; 
- 
- 
-// architecture independent bits 
-enum { 
-    UNWIND_IS_NOT_FUNCTION_START           = 0x80000000, 
-    UNWIND_HAS_LSDA                        = 0x40000000, 
-    UNWIND_PERSONALITY_MASK                = 0x30000000, 
-}; 
- 
- 
- 
- 
-// 
-// x86 
-// 
-// 1-bit: start 
-// 1-bit: has lsda 
-// 2-bit: personality index 
-// 
+//       compact unwind info.  But that is fragile and no longer done.
+
+
+//
+// The compact unwind endoding is a 32-bit value which encoded in an
+// architecture specific way, which registers to restore from where, and how
+// to unwind out of the function.
+//
+typedef uint32_t compact_unwind_encoding_t;
+
+
+// architecture independent bits
+enum {
+    UNWIND_IS_NOT_FUNCTION_START           = 0x80000000,
+    UNWIND_HAS_LSDA                        = 0x40000000,
+    UNWIND_PERSONALITY_MASK                = 0x30000000,
+};
+
+
+
+
+//
+// x86
+//
+// 1-bit: start
+// 1-bit: has lsda
+// 2-bit: personality index
+//
 // 4-bits: 0=old, 1=ebp based, 2=stack-imm, 3=stack-ind, 4=DWARF
-//  ebp based: 
-//        15-bits (5*3-bits per reg) register permutation 
-//        8-bits for stack offset 
-//  frameless: 
-//        8-bits stack size 
-//        3-bits stack adjust 
-//        3-bits register count 
-//        10-bits register permutation 
-// 
-enum { 
-    UNWIND_X86_MODE_MASK                         = 0x0F000000, 
-    UNWIND_X86_MODE_EBP_FRAME                    = 0x01000000, 
-    UNWIND_X86_MODE_STACK_IMMD                   = 0x02000000, 
-    UNWIND_X86_MODE_STACK_IND                    = 0x03000000, 
-    UNWIND_X86_MODE_DWARF                        = 0x04000000, 
- 
-    UNWIND_X86_EBP_FRAME_REGISTERS               = 0x00007FFF, 
-    UNWIND_X86_EBP_FRAME_OFFSET                  = 0x00FF0000, 
- 
-    UNWIND_X86_FRAMELESS_STACK_SIZE              = 0x00FF0000, 
-    UNWIND_X86_FRAMELESS_STACK_ADJUST            = 0x0000E000, 
-    UNWIND_X86_FRAMELESS_STACK_REG_COUNT         = 0x00001C00, 
-    UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION   = 0x000003FF, 
- 
-    UNWIND_X86_DWARF_SECTION_OFFSET              = 0x00FFFFFF, 
-}; 
- 
-enum { 
-    UNWIND_X86_REG_NONE     = 0, 
-    UNWIND_X86_REG_EBX      = 1, 
-    UNWIND_X86_REG_ECX      = 2, 
-    UNWIND_X86_REG_EDX      = 3, 
-    UNWIND_X86_REG_EDI      = 4, 
-    UNWIND_X86_REG_ESI      = 5, 
-    UNWIND_X86_REG_EBP      = 6, 
-}; 
- 
-// 
-// For x86 there are four modes for the compact unwind encoding: 
-// UNWIND_X86_MODE_EBP_FRAME: 
-//    EBP based frame where EBP is push on stack immediately after return address, 
-//    then ESP is moved to EBP. Thus, to unwind ESP is restored with the current 
-//    EPB value, then EBP is restored by popping off the stack, and the return 
-//    is done by popping the stack once more into the pc. 
-//    All non-volatile registers that need to be restored must have been saved 
-//    in a small range in the stack that starts EBP-4 to EBP-1020.  The offset/4 
-//    is encoded in the UNWIND_X86_EBP_FRAME_OFFSET bits.  The registers saved 
-//    are encoded in the UNWIND_X86_EBP_FRAME_REGISTERS bits as five 3-bit entries. 
-//    Each entry contains which register to restore. 
-// UNWIND_X86_MODE_STACK_IMMD: 
-//    A "frameless" (EBP not used as frame pointer) function with a small  
-//    constant stack size.  To return, a constant (encoded in the compact 
-//    unwind encoding) is added to the ESP. Then the return is done by 
-//    popping the stack into the pc. 
-//    All non-volatile registers that need to be restored must have been saved 
-//    on the stack immediately after the return address.  The stack_size/4 is 
-//    encoded in the UNWIND_X86_FRAMELESS_STACK_SIZE (max stack size is 1024). 
-//    The number of registers saved is encoded in UNWIND_X86_FRAMELESS_STACK_REG_COUNT. 
-//    UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION constains which registers were 
-//    saved and their order. 
-// UNWIND_X86_MODE_STACK_IND: 
-//    A "frameless" (EBP not used as frame pointer) function large constant  
-//    stack size.  This case is like the previous, except the stack size is too 
-//    large to encode in the compact unwind encoding.  Instead it requires that  
-//    the function contains "subl $nnnnnnnn,ESP" in its prolog.  The compact  
-//    encoding contains the offset to the nnnnnnnn value in the function in 
-//    UNWIND_X86_FRAMELESS_STACK_SIZE.   
-// UNWIND_X86_MODE_DWARF: 
-//    No compact unwind encoding is available.  Instead the low 24-bits of the 
+//  ebp based:
+//        15-bits (5*3-bits per reg) register permutation
+//        8-bits for stack offset
+//  frameless:
+//        8-bits stack size
+//        3-bits stack adjust
+//        3-bits register count
+//        10-bits register permutation
+//
+enum {
+    UNWIND_X86_MODE_MASK                         = 0x0F000000,
+    UNWIND_X86_MODE_EBP_FRAME                    = 0x01000000,
+    UNWIND_X86_MODE_STACK_IMMD                   = 0x02000000,
+    UNWIND_X86_MODE_STACK_IND                    = 0x03000000,
+    UNWIND_X86_MODE_DWARF                        = 0x04000000,
+
+    UNWIND_X86_EBP_FRAME_REGISTERS               = 0x00007FFF,
+    UNWIND_X86_EBP_FRAME_OFFSET                  = 0x00FF0000,
+
+    UNWIND_X86_FRAMELESS_STACK_SIZE              = 0x00FF0000,
+    UNWIND_X86_FRAMELESS_STACK_ADJUST            = 0x0000E000,
+    UNWIND_X86_FRAMELESS_STACK_REG_COUNT         = 0x00001C00,
+    UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION   = 0x000003FF,
+
+    UNWIND_X86_DWARF_SECTION_OFFSET              = 0x00FFFFFF,
+};
+
+enum {
+    UNWIND_X86_REG_NONE     = 0,
+    UNWIND_X86_REG_EBX      = 1,
+    UNWIND_X86_REG_ECX      = 2,
+    UNWIND_X86_REG_EDX      = 3,
+    UNWIND_X86_REG_EDI      = 4,
+    UNWIND_X86_REG_ESI      = 5,
+    UNWIND_X86_REG_EBP      = 6,
+};
+
+//
+// For x86 there are four modes for the compact unwind encoding:
+// UNWIND_X86_MODE_EBP_FRAME:
+//    EBP based frame where EBP is push on stack immediately after return address,
+//    then ESP is moved to EBP. Thus, to unwind ESP is restored with the current
+//    EPB value, then EBP is restored by popping off the stack, and the return
+//    is done by popping the stack once more into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    in a small range in the stack that starts EBP-4 to EBP-1020.  The offset/4
+//    is encoded in the UNWIND_X86_EBP_FRAME_OFFSET bits.  The registers saved
+//    are encoded in the UNWIND_X86_EBP_FRAME_REGISTERS bits as five 3-bit entries.
+//    Each entry contains which register to restore.
+// UNWIND_X86_MODE_STACK_IMMD:
+//    A "frameless" (EBP not used as frame pointer) function with a small 
+//    constant stack size.  To return, a constant (encoded in the compact
+//    unwind encoding) is added to the ESP. Then the return is done by
+//    popping the stack into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    on the stack immediately after the return address.  The stack_size/4 is
+//    encoded in the UNWIND_X86_FRAMELESS_STACK_SIZE (max stack size is 1024).
+//    The number of registers saved is encoded in UNWIND_X86_FRAMELESS_STACK_REG_COUNT.
+//    UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION constains which registers were
+//    saved and their order.
+// UNWIND_X86_MODE_STACK_IND:
+//    A "frameless" (EBP not used as frame pointer) function large constant 
+//    stack size.  This case is like the previous, except the stack size is too
+//    large to encode in the compact unwind encoding.  Instead it requires that 
+//    the function contains "subl $nnnnnnnn,ESP" in its prolog.  The compact 
+//    encoding contains the offset to the nnnnnnnn value in the function in
+//    UNWIND_X86_FRAMELESS_STACK_SIZE.  
+// UNWIND_X86_MODE_DWARF:
+//    No compact unwind encoding is available.  Instead the low 24-bits of the
 //    compact encoding is the offset of the DWARF FDE in the __eh_frame section.
-//    This mode is never used in object files.  It is only generated by the  
+//    This mode is never used in object files.  It is only generated by the 
 //    linker in final linked images which have only DWARF unwind info for a
-//    function. 
-// 
-// The permutation encoding is a Lehmer code sequence encoded into a 
-// single variable-base number so we can encode the ordering of up to 
-// six registers in a 10-bit space. 
-// 
-// The following is the algorithm used to create the permutation encoding used 
-// with frameless stacks.  It is passed the number of registers to be saved and 
-// an array of the register numbers saved. 
-// 
-//uint32_t permute_encode(uint32_t registerCount, const uint32_t registers[6]) 
-//{ 
-//    uint32_t renumregs[6]; 
-//    for (int i=6-registerCount; i < 6; ++i) { 
-//        int countless = 0; 
-//        for (int j=6-registerCount; j < i; ++j) { 
-//            if ( registers[j] < registers[i] ) 
-//                ++countless; 
-//        } 
-//        renumregs[i] = registers[i] - countless -1; 
-//    } 
-//    uint32_t permutationEncoding = 0; 
-//    switch ( registerCount ) { 
-//        case 6: 
-//            permutationEncoding |= (120*renumregs[0] + 24*renumregs[1] 
-//                                    + 6*renumregs[2] + 2*renumregs[3] 
-//                                      + renumregs[4]); 
-//            break; 
-//        case 5: 
-//            permutationEncoding |= (120*renumregs[1] + 24*renumregs[2] 
-//                                    + 6*renumregs[3] + 2*renumregs[4] 
-//                                      + renumregs[5]); 
-//            break; 
-//        case 4: 
-//            permutationEncoding |= (60*renumregs[2] + 12*renumregs[3] 
-//                                   + 3*renumregs[4] + renumregs[5]); 
-//            break; 
-//        case 3: 
-//            permutationEncoding |= (20*renumregs[3] + 4*renumregs[4] 
-//                                     + renumregs[5]); 
-//            break; 
-//        case 2: 
-//            permutationEncoding |= (5*renumregs[4] + renumregs[5]); 
-//            break; 
-//        case 1: 
-//            permutationEncoding |= (renumregs[5]); 
-//            break; 
-//    } 
-//    return permutationEncoding; 
-//} 
-// 
- 
- 
- 
- 
-// 
-// x86_64 
-// 
-// 1-bit: start 
-// 1-bit: has lsda 
-// 2-bit: personality index 
-// 
+//    function.
+//
+// The permutation encoding is a Lehmer code sequence encoded into a
+// single variable-base number so we can encode the ordering of up to
+// six registers in a 10-bit space.
+//
+// The following is the algorithm used to create the permutation encoding used
+// with frameless stacks.  It is passed the number of registers to be saved and
+// an array of the register numbers saved.
+//
+//uint32_t permute_encode(uint32_t registerCount, const uint32_t registers[6])
+//{
+//    uint32_t renumregs[6];
+//    for (int i=6-registerCount; i < 6; ++i) {
+//        int countless = 0;
+//        for (int j=6-registerCount; j < i; ++j) {
+//            if ( registers[j] < registers[i] )
+//                ++countless;
+//        }
+//        renumregs[i] = registers[i] - countless -1;
+//    }
+//    uint32_t permutationEncoding = 0;
+//    switch ( registerCount ) {
+//        case 6:
+//            permutationEncoding |= (120*renumregs[0] + 24*renumregs[1]
+//                                    + 6*renumregs[2] + 2*renumregs[3]
+//                                      + renumregs[4]);
+//            break;
+//        case 5:
+//            permutationEncoding |= (120*renumregs[1] + 24*renumregs[2]
+//                                    + 6*renumregs[3] + 2*renumregs[4]
+//                                      + renumregs[5]);
+//            break;
+//        case 4:
+//            permutationEncoding |= (60*renumregs[2] + 12*renumregs[3]
+//                                   + 3*renumregs[4] + renumregs[5]);
+//            break;
+//        case 3:
+//            permutationEncoding |= (20*renumregs[3] + 4*renumregs[4]
+//                                     + renumregs[5]);
+//            break;
+//        case 2:
+//            permutationEncoding |= (5*renumregs[4] + renumregs[5]);
+//            break;
+//        case 1:
+//            permutationEncoding |= (renumregs[5]);
+//            break;
+//    }
+//    return permutationEncoding;
+//}
+//
+
+
+
+
+//
+// x86_64
+//
+// 1-bit: start
+// 1-bit: has lsda
+// 2-bit: personality index
+//
 // 4-bits: 0=old, 1=rbp based, 2=stack-imm, 3=stack-ind, 4=DWARF
-//  rbp based: 
-//        15-bits (5*3-bits per reg) register permutation 
-//        8-bits for stack offset 
-//  frameless: 
-//        8-bits stack size 
-//        3-bits stack adjust 
-//        3-bits register count 
-//        10-bits register permutation 
-// 
-enum { 
-    UNWIND_X86_64_MODE_MASK                         = 0x0F000000, 
-    UNWIND_X86_64_MODE_RBP_FRAME                    = 0x01000000, 
-    UNWIND_X86_64_MODE_STACK_IMMD                   = 0x02000000, 
-    UNWIND_X86_64_MODE_STACK_IND                    = 0x03000000, 
-    UNWIND_X86_64_MODE_DWARF                        = 0x04000000, 
- 
-    UNWIND_X86_64_RBP_FRAME_REGISTERS               = 0x00007FFF, 
-    UNWIND_X86_64_RBP_FRAME_OFFSET                  = 0x00FF0000, 
- 
-    UNWIND_X86_64_FRAMELESS_STACK_SIZE              = 0x00FF0000, 
-    UNWIND_X86_64_FRAMELESS_STACK_ADJUST            = 0x0000E000, 
-    UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT         = 0x00001C00, 
-    UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION   = 0x000003FF, 
- 
-    UNWIND_X86_64_DWARF_SECTION_OFFSET              = 0x00FFFFFF, 
-}; 
- 
-enum { 
-    UNWIND_X86_64_REG_NONE       = 0, 
-    UNWIND_X86_64_REG_RBX        = 1, 
-    UNWIND_X86_64_REG_R12        = 2, 
-    UNWIND_X86_64_REG_R13        = 3, 
-    UNWIND_X86_64_REG_R14        = 4, 
-    UNWIND_X86_64_REG_R15        = 5, 
-    UNWIND_X86_64_REG_RBP        = 6, 
-}; 
-// 
-// For x86_64 there are four modes for the compact unwind encoding: 
-// UNWIND_X86_64_MODE_RBP_FRAME: 
-//    RBP based frame where RBP is push on stack immediately after return address, 
-//    then RSP is moved to RBP. Thus, to unwind RSP is restored with the current  
-//    EPB value, then RBP is restored by popping off the stack, and the return  
-//    is done by popping the stack once more into the pc. 
-//    All non-volatile registers that need to be restored must have been saved 
-//    in a small range in the stack that starts RBP-8 to RBP-2040.  The offset/8  
-//    is encoded in the UNWIND_X86_64_RBP_FRAME_OFFSET bits.  The registers saved 
-//    are encoded in the UNWIND_X86_64_RBP_FRAME_REGISTERS bits as five 3-bit entries. 
-//    Each entry contains which register to restore.   
-// UNWIND_X86_64_MODE_STACK_IMMD: 
-//    A "frameless" (RBP not used as frame pointer) function with a small  
-//    constant stack size.  To return, a constant (encoded in the compact  
-//    unwind encoding) is added to the RSP. Then the return is done by  
-//    popping the stack into the pc. 
-//    All non-volatile registers that need to be restored must have been saved 
-//    on the stack immediately after the return address.  The stack_size/8 is 
-//    encoded in the UNWIND_X86_64_FRAMELESS_STACK_SIZE (max stack size is 2048). 
-//    The number of registers saved is encoded in UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT. 
-//    UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION constains which registers were 
-//    saved and their order.   
-// UNWIND_X86_64_MODE_STACK_IND: 
-//    A "frameless" (RBP not used as frame pointer) function large constant  
-//    stack size.  This case is like the previous, except the stack size is too 
-//    large to encode in the compact unwind encoding.  Instead it requires that  
-//    the function contains "subq $nnnnnnnn,RSP" in its prolog.  The compact  
-//    encoding contains the offset to the nnnnnnnn value in the function in 
-//    UNWIND_X86_64_FRAMELESS_STACK_SIZE.   
-// UNWIND_X86_64_MODE_DWARF: 
-//    No compact unwind encoding is available.  Instead the low 24-bits of the 
+//  rbp based:
+//        15-bits (5*3-bits per reg) register permutation
+//        8-bits for stack offset
+//  frameless:
+//        8-bits stack size
+//        3-bits stack adjust
+//        3-bits register count
+//        10-bits register permutation
+//
+enum {
+    UNWIND_X86_64_MODE_MASK                         = 0x0F000000,
+    UNWIND_X86_64_MODE_RBP_FRAME                    = 0x01000000,
+    UNWIND_X86_64_MODE_STACK_IMMD                   = 0x02000000,
+    UNWIND_X86_64_MODE_STACK_IND                    = 0x03000000,
+    UNWIND_X86_64_MODE_DWARF                        = 0x04000000,
+
+    UNWIND_X86_64_RBP_FRAME_REGISTERS               = 0x00007FFF,
+    UNWIND_X86_64_RBP_FRAME_OFFSET                  = 0x00FF0000,
+
+    UNWIND_X86_64_FRAMELESS_STACK_SIZE              = 0x00FF0000,
+    UNWIND_X86_64_FRAMELESS_STACK_ADJUST            = 0x0000E000,
+    UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT         = 0x00001C00,
+    UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION   = 0x000003FF,
+
+    UNWIND_X86_64_DWARF_SECTION_OFFSET              = 0x00FFFFFF,
+};
+
+enum {
+    UNWIND_X86_64_REG_NONE       = 0,
+    UNWIND_X86_64_REG_RBX        = 1,
+    UNWIND_X86_64_REG_R12        = 2,
+    UNWIND_X86_64_REG_R13        = 3,
+    UNWIND_X86_64_REG_R14        = 4,
+    UNWIND_X86_64_REG_R15        = 5,
+    UNWIND_X86_64_REG_RBP        = 6,
+};
+//
+// For x86_64 there are four modes for the compact unwind encoding:
+// UNWIND_X86_64_MODE_RBP_FRAME:
+//    RBP based frame where RBP is push on stack immediately after return address,
+//    then RSP is moved to RBP. Thus, to unwind RSP is restored with the current 
+//    EPB value, then RBP is restored by popping off the stack, and the return 
+//    is done by popping the stack once more into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    in a small range in the stack that starts RBP-8 to RBP-2040.  The offset/8 
+//    is encoded in the UNWIND_X86_64_RBP_FRAME_OFFSET bits.  The registers saved
+//    are encoded in the UNWIND_X86_64_RBP_FRAME_REGISTERS bits as five 3-bit entries.
+//    Each entry contains which register to restore.  
+// UNWIND_X86_64_MODE_STACK_IMMD:
+//    A "frameless" (RBP not used as frame pointer) function with a small 
+//    constant stack size.  To return, a constant (encoded in the compact 
+//    unwind encoding) is added to the RSP. Then the return is done by 
+//    popping the stack into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    on the stack immediately after the return address.  The stack_size/8 is
+//    encoded in the UNWIND_X86_64_FRAMELESS_STACK_SIZE (max stack size is 2048).
+//    The number of registers saved is encoded in UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT.
+//    UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION constains which registers were
+//    saved and their order.  
+// UNWIND_X86_64_MODE_STACK_IND:
+//    A "frameless" (RBP not used as frame pointer) function large constant 
+//    stack size.  This case is like the previous, except the stack size is too
+//    large to encode in the compact unwind encoding.  Instead it requires that 
+//    the function contains "subq $nnnnnnnn,RSP" in its prolog.  The compact 
+//    encoding contains the offset to the nnnnnnnn value in the function in
+//    UNWIND_X86_64_FRAMELESS_STACK_SIZE.  
+// UNWIND_X86_64_MODE_DWARF:
+//    No compact unwind encoding is available.  Instead the low 24-bits of the
 //    compact encoding is the offset of the DWARF FDE in the __eh_frame section.
-//    This mode is never used in object files.  It is only generated by the  
+//    This mode is never used in object files.  It is only generated by the 
 //    linker in final linked images which have only DWARF unwind info for a
-//    function. 
-// 
- 
- 
-// ARM64 
-// 
-// 1-bit: start 
-// 1-bit: has lsda 
-// 2-bit: personality index 
-// 
+//    function.
+//
+
+
+// ARM64
+//
+// 1-bit: start
+// 1-bit: has lsda
+// 2-bit: personality index
+//
 // 4-bits: 4=frame-based, 3=DWARF, 2=frameless
-//  frameless: 
-//        12-bits of stack size 
-//  frame-based: 
-//        4-bits D reg pairs saved 
-//        5-bits X reg pairs saved 
+//  frameless:
+//        12-bits of stack size
+//  frame-based:
+//        4-bits D reg pairs saved
+//        5-bits X reg pairs saved
 //  DWARF:
 //        24-bits offset of DWARF FDE in __eh_frame section
-// 
-enum { 
-    UNWIND_ARM64_MODE_MASK                     = 0x0F000000, 
-    UNWIND_ARM64_MODE_FRAMELESS                = 0x02000000, 
-    UNWIND_ARM64_MODE_DWARF                    = 0x03000000, 
-    UNWIND_ARM64_MODE_FRAME                    = 0x04000000, 
- 
-    UNWIND_ARM64_FRAME_X19_X20_PAIR            = 0x00000001, 
-    UNWIND_ARM64_FRAME_X21_X22_PAIR            = 0x00000002, 
-    UNWIND_ARM64_FRAME_X23_X24_PAIR            = 0x00000004, 
-    UNWIND_ARM64_FRAME_X25_X26_PAIR            = 0x00000008, 
-    UNWIND_ARM64_FRAME_X27_X28_PAIR            = 0x00000010, 
-    UNWIND_ARM64_FRAME_D8_D9_PAIR              = 0x00000100, 
-    UNWIND_ARM64_FRAME_D10_D11_PAIR            = 0x00000200, 
-    UNWIND_ARM64_FRAME_D12_D13_PAIR            = 0x00000400, 
-    UNWIND_ARM64_FRAME_D14_D15_PAIR            = 0x00000800, 
- 
-    UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK     = 0x00FFF000, 
-    UNWIND_ARM64_DWARF_SECTION_OFFSET          = 0x00FFFFFF, 
-}; 
-// For arm64 there are three modes for the compact unwind encoding: 
-// UNWIND_ARM64_MODE_FRAME: 
-//    This is a standard arm64 prolog where FP/LR are immediately pushed on the 
-//    stack, then SP is copied to FP. If there are any non-volatile registers 
-//    saved, then are copied into the stack frame in pairs in a contiguous 
-//    range right below the saved FP/LR pair.  Any subset of the five X pairs  
-//    and four D pairs can be saved, but the memory layout must be in register 
-//    number order.   
-// UNWIND_ARM64_MODE_FRAMELESS: 
-//    A "frameless" leaf function, where FP/LR are not saved. The return address  
-//    remains in LR throughout the function. If any non-volatile registers 
-//    are saved, they must be pushed onto the stack before any stack space is 
-//    allocated for local variables.  The stack sized (including any saved 
-//    non-volatile registers) divided by 16 is encoded in the bits  
-//    UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK. 
-// UNWIND_ARM64_MODE_DWARF: 
-//    No compact unwind encoding is available.  Instead the low 24-bits of the 
+//
+enum {
+    UNWIND_ARM64_MODE_MASK                     = 0x0F000000,
+    UNWIND_ARM64_MODE_FRAMELESS                = 0x02000000,
+    UNWIND_ARM64_MODE_DWARF                    = 0x03000000,
+    UNWIND_ARM64_MODE_FRAME                    = 0x04000000,
+
+    UNWIND_ARM64_FRAME_X19_X20_PAIR            = 0x00000001,
+    UNWIND_ARM64_FRAME_X21_X22_PAIR            = 0x00000002,
+    UNWIND_ARM64_FRAME_X23_X24_PAIR            = 0x00000004,
+    UNWIND_ARM64_FRAME_X25_X26_PAIR            = 0x00000008,
+    UNWIND_ARM64_FRAME_X27_X28_PAIR            = 0x00000010,
+    UNWIND_ARM64_FRAME_D8_D9_PAIR              = 0x00000100,
+    UNWIND_ARM64_FRAME_D10_D11_PAIR            = 0x00000200,
+    UNWIND_ARM64_FRAME_D12_D13_PAIR            = 0x00000400,
+    UNWIND_ARM64_FRAME_D14_D15_PAIR            = 0x00000800,
+
+    UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK     = 0x00FFF000,
+    UNWIND_ARM64_DWARF_SECTION_OFFSET          = 0x00FFFFFF,
+};
+// For arm64 there are three modes for the compact unwind encoding:
+// UNWIND_ARM64_MODE_FRAME:
+//    This is a standard arm64 prolog where FP/LR are immediately pushed on the
+//    stack, then SP is copied to FP. If there are any non-volatile registers
+//    saved, then are copied into the stack frame in pairs in a contiguous
+//    range right below the saved FP/LR pair.  Any subset of the five X pairs 
+//    and four D pairs can be saved, but the memory layout must be in register
+//    number order.  
+// UNWIND_ARM64_MODE_FRAMELESS:
+//    A "frameless" leaf function, where FP/LR are not saved. The return address 
+//    remains in LR throughout the function. If any non-volatile registers
+//    are saved, they must be pushed onto the stack before any stack space is
+//    allocated for local variables.  The stack sized (including any saved
+//    non-volatile registers) divided by 16 is encoded in the bits 
+//    UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK.
+// UNWIND_ARM64_MODE_DWARF:
+//    No compact unwind encoding is available.  Instead the low 24-bits of the
 //    compact encoding is the offset of the DWARF FDE in the __eh_frame section.
-//    This mode is never used in object files.  It is only generated by the  
+//    This mode is never used in object files.  It is only generated by the 
 //    linker in final linked images which have only DWARF unwind info for a
-//    function. 
-// 
- 
- 
- 
- 
- 
-//////////////////////////////////////////////////////////////////////////////// 
-// 
-//  Relocatable Object Files: __LD,__compact_unwind 
-// 
-//////////////////////////////////////////////////////////////////////////////// 
- 
-// 
-// A compiler can generated compact unwind information for a function by adding 
-// a "row" to the __LD,__compact_unwind section.  This section has the  
-// S_ATTR_DEBUG bit set, so the section will be ignored by older linkers.  
-// It is removed by the new linker, so never ends up in final executables.  
-// This section is a table, initially with one row per function (that needs  
-// unwind info).  The table columns and some conceptual entries are: 
-// 
-//     range-start               pointer to start of function/range 
-//     range-length               
-//     compact-unwind-encoding   32-bit encoding   
-//     personality-function      or zero if no personality function 
-//     lsda                      or zero if no LSDA data 
-// 
-// The length and encoding fields are 32-bits.  The other are all pointer sized.  
-// 
-// In x86_64 assembly, these entry would look like: 
-// 
-//     .section __LD,__compact_unwind,regular,debug 
-// 
-//     #compact unwind for _foo 
-//     .quad    _foo 
-//     .set     L1,LfooEnd-_foo 
-//     .long    L1 
-//     .long    0x01010001 
-//     .quad    0 
-//     .quad    0 
-// 
-//     #compact unwind for _bar 
-//     .quad    _bar 
-//     .set     L2,LbarEnd-_bar 
-//     .long    L2 
-//     .long    0x01020011 
-//     .quad    __gxx_personality 
-//     .quad    except_tab1 
-// 
-// 
-// Notes: There is no need for any labels in the the __compact_unwind section.   
-//        The use of the .set directive is to force the evaluation of the  
-//        range-length at assembly time, instead of generating relocations. 
-// 
-// To support future compiler optimizations where which non-volatile registers  
-// are saved changes within a function (e.g. delay saving non-volatiles until 
-// necessary), there can by multiple lines in the __compact_unwind table for one 
-// function, each with a different (non-overlapping) range and each with  
-// different compact unwind encodings that correspond to the non-volatiles  
-// saved at that range of the function. 
-// 
-// If a particular function is so wacky that there is no compact unwind way 
+//    function.
+//
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Relocatable Object Files: __LD,__compact_unwind
+//
+////////////////////////////////////////////////////////////////////////////////
+
+//
+// A compiler can generated compact unwind information for a function by adding
+// a "row" to the __LD,__compact_unwind section.  This section has the 
+// S_ATTR_DEBUG bit set, so the section will be ignored by older linkers. 
+// It is removed by the new linker, so never ends up in final executables. 
+// This section is a table, initially with one row per function (that needs 
+// unwind info).  The table columns and some conceptual entries are:
+//
+//     range-start               pointer to start of function/range
+//     range-length              
+//     compact-unwind-encoding   32-bit encoding  
+//     personality-function      or zero if no personality function
+//     lsda                      or zero if no LSDA data
+//
+// The length and encoding fields are 32-bits.  The other are all pointer sized. 
+//
+// In x86_64 assembly, these entry would look like:
+//
+//     .section __LD,__compact_unwind,regular,debug
+//
+//     #compact unwind for _foo
+//     .quad    _foo
+//     .set     L1,LfooEnd-_foo
+//     .long    L1
+//     .long    0x01010001
+//     .quad    0
+//     .quad    0
+//
+//     #compact unwind for _bar
+//     .quad    _bar
+//     .set     L2,LbarEnd-_bar
+//     .long    L2
+//     .long    0x01020011
+//     .quad    __gxx_personality
+//     .quad    except_tab1
+//
+//
+// Notes: There is no need for any labels in the the __compact_unwind section.  
+//        The use of the .set directive is to force the evaluation of the 
+//        range-length at assembly time, instead of generating relocations.
+//
+// To support future compiler optimizations where which non-volatile registers 
+// are saved changes within a function (e.g. delay saving non-volatiles until
+// necessary), there can by multiple lines in the __compact_unwind table for one
+// function, each with a different (non-overlapping) range and each with 
+// different compact unwind encodings that correspond to the non-volatiles 
+// saved at that range of the function.
+//
+// If a particular function is so wacky that there is no compact unwind way
 // to encode it, then the compiler can emit traditional DWARF unwind info.  
-// The runtime will use which ever is available. 
-// 
-// Runtime support for compact unwind encodings are only available on 10.6  
-// and later.  So, the compiler should not generate it when targeting pre-10.6.  
- 
- 
- 
- 
-//////////////////////////////////////////////////////////////////////////////// 
-// 
-//  Final Linked Images: __TEXT,__unwind_info 
-// 
-//////////////////////////////////////////////////////////////////////////////// 
- 
-// 
-// The __TEXT,__unwind_info section is laid out for an efficient two level lookup. 
-// The header of the section contains a coarse index that maps function address 
-// to the page (4096 byte block) containing the unwind info for that function.   
-// 
- 
-#define UNWIND_SECTION_VERSION 1 
-struct unwind_info_section_header 
-{ 
-    uint32_t    version;            // UNWIND_SECTION_VERSION 
-    uint32_t    commonEncodingsArraySectionOffset; 
-    uint32_t    commonEncodingsArrayCount; 
-    uint32_t    personalityArraySectionOffset; 
-    uint32_t    personalityArrayCount; 
-    uint32_t    indexSectionOffset; 
-    uint32_t    indexCount; 
-    // compact_unwind_encoding_t[] 
-    // uint32_t personalities[] 
-    // unwind_info_section_header_index_entry[] 
-    // unwind_info_section_header_lsda_index_entry[] 
-}; 
- 
-struct unwind_info_section_header_index_entry 
-{ 
-    uint32_t        functionOffset; 
-    uint32_t        secondLevelPagesSectionOffset;  // section offset to start of regular or compress page 
-    uint32_t        lsdaIndexArraySectionOffset;    // section offset to start of lsda_index array for this range 
-}; 
- 
-struct unwind_info_section_header_lsda_index_entry 
-{ 
-    uint32_t        functionOffset; 
-    uint32_t        lsdaOffset; 
-}; 
- 
-// 
-// There are two kinds of second level index pages: regular and compressed. 
-// A compressed page can hold up to 1021 entries, but it cannot be used 
-// if too many different encoding types are used.  The regular page holds 
-// 511 entries. 
-// 
- 
-struct unwind_info_regular_second_level_entry 
-{ 
-    uint32_t                    functionOffset; 
-    compact_unwind_encoding_t    encoding; 
-}; 
- 
-#define UNWIND_SECOND_LEVEL_REGULAR 2 
-struct unwind_info_regular_second_level_page_header 
-{ 
-    uint32_t    kind;    // UNWIND_SECOND_LEVEL_REGULAR 
-    uint16_t    entryPageOffset; 
-    uint16_t    entryCount; 
-    // entry array 
-}; 
- 
-#define UNWIND_SECOND_LEVEL_COMPRESSED 3 
-struct unwind_info_compressed_second_level_page_header 
-{ 
-    uint32_t    kind;    // UNWIND_SECOND_LEVEL_COMPRESSED 
-    uint16_t    entryPageOffset; 
-    uint16_t    entryCount; 
-    uint16_t    encodingsPageOffset; 
-    uint16_t    encodingsCount; 
-    // 32-bit entry array 
-    // encodings array 
-}; 
- 
-#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry)            (entry & 0x00FFFFFF) 
-#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry)        ((entry >> 24) & 0xFF) 
- 
- 
- 
-#endif 
- 
+// The runtime will use which ever is available.
+//
+// Runtime support for compact unwind encodings are only available on 10.6 
+// and later.  So, the compiler should not generate it when targeting pre-10.6. 
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Final Linked Images: __TEXT,__unwind_info
+//
+////////////////////////////////////////////////////////////////////////////////
+
+//
+// The __TEXT,__unwind_info section is laid out for an efficient two level lookup.
+// The header of the section contains a coarse index that maps function address
+// to the page (4096 byte block) containing the unwind info for that function.  
+//
+
+#define UNWIND_SECTION_VERSION 1
+struct unwind_info_section_header
+{
+    uint32_t    version;            // UNWIND_SECTION_VERSION
+    uint32_t    commonEncodingsArraySectionOffset;
+    uint32_t    commonEncodingsArrayCount;
+    uint32_t    personalityArraySectionOffset;
+    uint32_t    personalityArrayCount;
+    uint32_t    indexSectionOffset;
+    uint32_t    indexCount;
+    // compact_unwind_encoding_t[]
+    // uint32_t personalities[]
+    // unwind_info_section_header_index_entry[]
+    // unwind_info_section_header_lsda_index_entry[]
+};
+
+struct unwind_info_section_header_index_entry
+{
+    uint32_t        functionOffset;
+    uint32_t        secondLevelPagesSectionOffset;  // section offset to start of regular or compress page
+    uint32_t        lsdaIndexArraySectionOffset;    // section offset to start of lsda_index array for this range
+};
+
+struct unwind_info_section_header_lsda_index_entry
+{
+    uint32_t        functionOffset;
+    uint32_t        lsdaOffset;
+};
+
+//
+// There are two kinds of second level index pages: regular and compressed.
+// A compressed page can hold up to 1021 entries, but it cannot be used
+// if too many different encoding types are used.  The regular page holds
+// 511 entries.
+//
+
+struct unwind_info_regular_second_level_entry
+{
+    uint32_t                    functionOffset;
+    compact_unwind_encoding_t    encoding;
+};
+
+#define UNWIND_SECOND_LEVEL_REGULAR 2
+struct unwind_info_regular_second_level_page_header
+{
+    uint32_t    kind;    // UNWIND_SECOND_LEVEL_REGULAR
+    uint16_t    entryPageOffset;
+    uint16_t    entryCount;
+    // entry array
+};
+
+#define UNWIND_SECOND_LEVEL_COMPRESSED 3
+struct unwind_info_compressed_second_level_page_header
+{
+    uint32_t    kind;    // UNWIND_SECOND_LEVEL_COMPRESSED
+    uint16_t    entryPageOffset;
+    uint16_t    entryCount;
+    uint16_t    encodingsPageOffset;
+    uint16_t    encodingsCount;
+    // 32-bit entry array
+    // encodings array
+};
+
+#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry)            (entry & 0x00FFFFFF)
+#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry)        ((entry >> 24) & 0xFF)
+
+
+
+#endif
+
diff --git a/contrib/libs/libunwind/include/unwind.h b/contrib/libs/libunwind/include/unwind.h
index 13ff2b6bf8..6949e063dd 100644
--- a/contrib/libs/libunwind/include/unwind.h
+++ b/contrib/libs/libunwind/include/unwind.h
@@ -1,190 +1,190 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-// C++ ABI Level 1 ABI documented at: 
+//
+//
+// C++ ABI Level 1 ABI documented at:
 //   https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __UNWIND_H__ 
-#define __UNWIND_H__ 
- 
-#include "__libunwind_config.h" 
- 
-#include <stdint.h> 
-#include <stddef.h> 
- 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWIND_H__
+#define __UNWIND_H__
+
+#include "__libunwind_config.h"
+
+#include <stdint.h>
+#include <stddef.h>
+
 #if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) && defined(_WIN32)
 #include <windows.h>
 #include <ntverp.h>
 #endif
 
-#if defined(__APPLE__) 
-#define LIBUNWIND_UNAVAIL __attribute__ (( deprecated )) 
-#else 
-#define LIBUNWIND_UNAVAIL 
-#endif 
- 
-typedef enum { 
-  _URC_NO_REASON = 0, 
-  _URC_OK = 0, 
-  _URC_FOREIGN_EXCEPTION_CAUGHT = 1, 
-  _URC_FATAL_PHASE2_ERROR = 2, 
-  _URC_FATAL_PHASE1_ERROR = 3, 
-  _URC_NORMAL_STOP = 4, 
-  _URC_END_OF_STACK = 5, 
-  _URC_HANDLER_FOUND = 6, 
-  _URC_INSTALL_CONTEXT = 7, 
-  _URC_CONTINUE_UNWIND = 8, 
+#if defined(__APPLE__)
+#define LIBUNWIND_UNAVAIL __attribute__ (( deprecated ))
+#else
+#define LIBUNWIND_UNAVAIL
+#endif
+
+typedef enum {
+  _URC_NO_REASON = 0,
+  _URC_OK = 0,
+  _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+  _URC_FATAL_PHASE2_ERROR = 2,
+  _URC_FATAL_PHASE1_ERROR = 3,
+  _URC_NORMAL_STOP = 4,
+  _URC_END_OF_STACK = 5,
+  _URC_HANDLER_FOUND = 6,
+  _URC_INSTALL_CONTEXT = 7,
+  _URC_CONTINUE_UNWIND = 8,
 #if defined(_LIBUNWIND_ARM_EHABI)
-  _URC_FAILURE = 9 
-#endif 
-} _Unwind_Reason_Code; 
- 
-typedef enum { 
-  _UA_SEARCH_PHASE = 1, 
-  _UA_CLEANUP_PHASE = 2, 
-  _UA_HANDLER_FRAME = 4, 
-  _UA_FORCE_UNWIND = 8, 
-  _UA_END_OF_STACK = 16 // gcc extension to C++ ABI 
-} _Unwind_Action; 
- 
-typedef struct _Unwind_Context _Unwind_Context;   // opaque 
- 
+  _URC_FAILURE = 9
+#endif
+} _Unwind_Reason_Code;
+
+typedef enum {
+  _UA_SEARCH_PHASE = 1,
+  _UA_CLEANUP_PHASE = 2,
+  _UA_HANDLER_FRAME = 4,
+  _UA_FORCE_UNWIND = 8,
+  _UA_END_OF_STACK = 16 // gcc extension to C++ ABI
+} _Unwind_Action;
+
+typedef struct _Unwind_Context _Unwind_Context;   // opaque
+
 #if defined(_LIBUNWIND_ARM_EHABI)
 #include "unwind_arm_ehabi.h"
 #else
 #include "unwind_itanium.h"
 #endif
- 
-typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) 
-    (int version, 
-     _Unwind_Action actions, 
+
+typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
+    (int version,
+     _Unwind_Action actions,
      _Unwind_Exception_Class exceptionClass,
-     _Unwind_Exception* exceptionObject, 
-     struct _Unwind_Context* context, 
+     _Unwind_Exception* exceptionObject,
+     struct _Unwind_Context* context,
      void* stop_parameter);
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context); 
-extern uintptr_t 
-    _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context); 
-#ifdef __USING_SJLJ_EXCEPTIONS__ 
-extern _Unwind_Reason_Code 
-    _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *exception_object, 
-                              _Unwind_Stop_Fn stop, void *stop_parameter); 
-#else 
-extern _Unwind_Reason_Code 
-    _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, 
-                         _Unwind_Stop_Fn stop, void *stop_parameter); 
-#endif 
- 
-#ifdef __USING_SJLJ_EXCEPTIONS__ 
-typedef struct _Unwind_FunctionContext *_Unwind_FunctionContext_t; 
-extern void _Unwind_SjLj_Register(_Unwind_FunctionContext_t fc); 
-extern void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t fc); 
-#endif 
- 
-// 
-// The following are semi-suppoted extensions to the C++ ABI 
-// 
- 
-// 
-//  called by __cxa_rethrow(). 
-// 
-#ifdef __USING_SJLJ_EXCEPTIONS__ 
-extern _Unwind_Reason_Code 
-    _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *exception_object); 
-#else 
-extern _Unwind_Reason_Code 
-    _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object); 
-#endif 
- 
-// _Unwind_Backtrace() is a gcc extension that walks the stack and calls the 
-// _Unwind_Trace_Fn once per frame until it reaches the bottom of the stack 
-// or the _Unwind_Trace_Fn function returns something other than _URC_NO_REASON. 
-typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, 
-                                                void *); 
-extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); 
- 
-// _Unwind_GetCFA is a gcc extension that can be called from within a 
-// personality handler to get the CFA (stack pointer before call) of 
-// current frame. 
-extern uintptr_t _Unwind_GetCFA(struct _Unwind_Context *); 
- 
- 
-// _Unwind_GetIPInfo is a gcc extension that can be called from within a 
-// personality handler.  Similar to _Unwind_GetIP() but also returns in 
-// *ipBefore a non-zero value if the instruction pointer is at or before the 
-// instruction causing the unwind. Normally, in a function call, the IP returned 
-// is the return address which is after the call instruction and may be past the 
-// end of the function containing the call instruction. 
-extern uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, 
-                                   int *ipBefore); 
- 
- 
-// __register_frame() is used with dynamically generated code to register the 
-// FDE for a generated (JIT) code.  The FDE must use pc-rel addressing to point 
-// to its function and optional LSDA. 
-// __register_frame() has existed in all versions of Mac OS X, but in 10.4 and 
-// 10.5 it was buggy and did not actually register the FDE with the unwinder. 
-// In 10.6 and later it does register properly. 
-extern void __register_frame(const void *fde); 
-extern void __deregister_frame(const void *fde); 
- 
-// _Unwind_Find_FDE() will locate the FDE if the pc is in some function that has 
-// an associated FDE. Note, Mac OS X 10.6 and later, introduces "compact unwind 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context);
+extern uintptr_t
+    _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context);
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *exception_object,
+                              _Unwind_Stop_Fn stop, void *stop_parameter);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_ForcedUnwind(_Unwind_Exception *exception_object,
+                         _Unwind_Stop_Fn stop, void *stop_parameter);
+#endif
+
+#ifdef __USING_SJLJ_EXCEPTIONS__
+typedef struct _Unwind_FunctionContext *_Unwind_FunctionContext_t;
+extern void _Unwind_SjLj_Register(_Unwind_FunctionContext_t fc);
+extern void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t fc);
+#endif
+
+//
+// The following are semi-suppoted extensions to the C++ ABI
+//
+
+//
+//  called by __cxa_rethrow().
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object);
+#endif
+
+// _Unwind_Backtrace() is a gcc extension that walks the stack and calls the
+// _Unwind_Trace_Fn once per frame until it reaches the bottom of the stack
+// or the _Unwind_Trace_Fn function returns something other than _URC_NO_REASON.
+typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
+                                                void *);
+extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+// _Unwind_GetCFA is a gcc extension that can be called from within a
+// personality handler to get the CFA (stack pointer before call) of
+// current frame.
+extern uintptr_t _Unwind_GetCFA(struct _Unwind_Context *);
+
+
+// _Unwind_GetIPInfo is a gcc extension that can be called from within a
+// personality handler.  Similar to _Unwind_GetIP() but also returns in
+// *ipBefore a non-zero value if the instruction pointer is at or before the
+// instruction causing the unwind. Normally, in a function call, the IP returned
+// is the return address which is after the call instruction and may be past the
+// end of the function containing the call instruction.
+extern uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
+                                   int *ipBefore);
+
+
+// __register_frame() is used with dynamically generated code to register the
+// FDE for a generated (JIT) code.  The FDE must use pc-rel addressing to point
+// to its function and optional LSDA.
+// __register_frame() has existed in all versions of Mac OS X, but in 10.4 and
+// 10.5 it was buggy and did not actually register the FDE with the unwinder.
+// In 10.6 and later it does register properly.
+extern void __register_frame(const void *fde);
+extern void __deregister_frame(const void *fde);
+
+// _Unwind_Find_FDE() will locate the FDE if the pc is in some function that has
+// an associated FDE. Note, Mac OS X 10.6 and later, introduces "compact unwind
 // info" which the runtime uses in preference to DWARF unwind info.  This
-// function will only work if the target function has an FDE but no compact 
-// unwind info. 
-struct dwarf_eh_bases { 
-  uintptr_t tbase; 
-  uintptr_t dbase; 
-  uintptr_t func; 
-}; 
-extern const void *_Unwind_Find_FDE(const void *pc, struct dwarf_eh_bases *); 
- 
- 
-// This function attempts to find the start (address of first instruction) of 
-// a function given an address inside the function.  It only works if the 
+// function will only work if the target function has an FDE but no compact
+// unwind info.
+struct dwarf_eh_bases {
+  uintptr_t tbase;
+  uintptr_t dbase;
+  uintptr_t func;
+};
+extern const void *_Unwind_Find_FDE(const void *pc, struct dwarf_eh_bases *);
+
+
+// This function attempts to find the start (address of first instruction) of
+// a function given an address inside the function.  It only works if the
 // function has an FDE (DWARF unwind info).
-// This function is unimplemented on Mac OS X 10.6 and later.  Instead, use 
-// _Unwind_Find_FDE() and look at the dwarf_eh_bases.func result. 
-extern void *_Unwind_FindEnclosingFunction(void *pc); 
- 
-// Mac OS X does not support text-rel and data-rel addressing so these functions 
-// are unimplemented 
-extern uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context) 
-    LIBUNWIND_UNAVAIL; 
-extern uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context) 
-    LIBUNWIND_UNAVAIL; 
- 
-// Mac OS X 10.4 and 10.5 had implementations of these functions in 
-// libgcc_s.dylib, but they never worked. 
-/// These functions are no longer available on Mac OS X. 
-extern void __register_frame_info_bases(const void *fde, void *ob, void *tb, 
-                                        void *db) LIBUNWIND_UNAVAIL; 
-extern void __register_frame_info(const void *fde, void *ob) 
-    LIBUNWIND_UNAVAIL; 
-extern void __register_frame_info_table_bases(const void *fde, void *ob, 
-                                              void *tb, void *db) 
-    LIBUNWIND_UNAVAIL; 
-extern void __register_frame_info_table(const void *fde, void *ob) 
-    LIBUNWIND_UNAVAIL; 
-extern void __register_frame_table(const void *fde) 
-    LIBUNWIND_UNAVAIL; 
-extern void *__deregister_frame_info(const void *fde) 
-    LIBUNWIND_UNAVAIL; 
-extern void *__deregister_frame_info_bases(const void *fde) 
-    LIBUNWIND_UNAVAIL; 
- 
+// This function is unimplemented on Mac OS X 10.6 and later.  Instead, use
+// _Unwind_Find_FDE() and look at the dwarf_eh_bases.func result.
+extern void *_Unwind_FindEnclosingFunction(void *pc);
+
+// Mac OS X does not support text-rel and data-rel addressing so these functions
+// are unimplemented
+extern uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context)
+    LIBUNWIND_UNAVAIL;
+extern uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context)
+    LIBUNWIND_UNAVAIL;
+
+// Mac OS X 10.4 and 10.5 had implementations of these functions in
+// libgcc_s.dylib, but they never worked.
+/// These functions are no longer available on Mac OS X.
+extern void __register_frame_info_bases(const void *fde, void *ob, void *tb,
+                                        void *db) LIBUNWIND_UNAVAIL;
+extern void __register_frame_info(const void *fde, void *ob)
+    LIBUNWIND_UNAVAIL;
+extern void __register_frame_info_table_bases(const void *fde, void *ob,
+                                              void *tb, void *db)
+    LIBUNWIND_UNAVAIL;
+extern void __register_frame_info_table(const void *fde, void *ob)
+    LIBUNWIND_UNAVAIL;
+extern void __register_frame_table(const void *fde)
+    LIBUNWIND_UNAVAIL;
+extern void *__deregister_frame_info(const void *fde)
+    LIBUNWIND_UNAVAIL;
+extern void *__deregister_frame_info_bases(const void *fde)
+    LIBUNWIND_UNAVAIL;
+
 #if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
 #ifndef _WIN32
 typedef struct _EXCEPTION_RECORD EXCEPTION_RECORD;
@@ -215,8 +215,8 @@ typedef struct _Unwind_Backtrace_Buffer {
     void* backtrace[_YNDX_LIBUNWIND_EXCEPTION_BACKTRACE_SIZE];
 } _Unwind_Backtrace_Buffer;
 #endif
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif // __UNWIND_H__ 
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __UNWIND_H__
diff --git a/contrib/libs/libunwind/include/unwind_arm_ehabi.h b/contrib/libs/libunwind/include/unwind_arm_ehabi.h
index 178834a241..6277a1457f 100644
--- a/contrib/libs/libunwind/include/unwind_arm_ehabi.h
+++ b/contrib/libs/libunwind/include/unwind_arm_ehabi.h
@@ -1,170 +1,170 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-// C++ ABI Level 1 ABI documented at: 
+//
+//
+// C++ ABI Level 1 ABI documented at:
 //   https://github.com/ARM-software/abi-aa/blob/main/ehabi32/ehabi32.rst
-// 
-//===----------------------------------------------------------------------===// 
- 
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef __ARM_EHABI_UNWIND_H__
 #define __ARM_EHABI_UNWIND_H__
- 
-typedef uint32_t _Unwind_State; 
- 
-static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME   = 0; 
-static const _Unwind_State _US_UNWIND_FRAME_STARTING  = 1; 
-static const _Unwind_State _US_UNWIND_FRAME_RESUME    = 2; 
+
+typedef uint32_t _Unwind_State;
+
+static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME   = 0;
+static const _Unwind_State _US_UNWIND_FRAME_STARTING  = 1;
+static const _Unwind_State _US_UNWIND_FRAME_RESUME    = 2;
 static const _Unwind_State _US_ACTION_MASK            = 3;
-/* Undocumented flag for force unwinding. */ 
-static const _Unwind_State _US_FORCE_UNWIND           = 8; 
- 
-typedef uint32_t _Unwind_EHT_Header; 
- 
-struct _Unwind_Control_Block; 
-typedef struct _Unwind_Control_Block _Unwind_Control_Block; 
+/* Undocumented flag for force unwinding. */
+static const _Unwind_State _US_FORCE_UNWIND           = 8;
+
+typedef uint32_t _Unwind_EHT_Header;
+
+struct _Unwind_Control_Block;
+typedef struct _Unwind_Control_Block _Unwind_Control_Block;
 #define _Unwind_Exception _Unwind_Control_Block /* Alias */
 typedef uint8_t _Unwind_Exception_Class[8];
- 
-struct _Unwind_Control_Block { 
+
+struct _Unwind_Control_Block {
   _Unwind_Exception_Class exception_class;
-  void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*); 
- 
-  /* Unwinder cache, private fields for the unwinder's use */ 
-  struct { 
-    uint32_t reserved1; /* init reserved1 to 0, then don't touch */ 
-    uint32_t reserved2; 
-    uint32_t reserved3; 
-    uint32_t reserved4; 
-    uint32_t reserved5; 
-  } unwinder_cache; 
- 
-  /* Propagation barrier cache (valid after phase 1): */ 
-  struct { 
-    uint32_t sp; 
-    uint32_t bitpattern[5]; 
-  } barrier_cache; 
- 
-  /* Cleanup cache (preserved over cleanup): */ 
-  struct { 
-    uint32_t bitpattern[4]; 
-  } cleanup_cache; 
- 
-  /* Pr cache (for pr's benefit): */ 
-  struct { 
-    uint32_t fnstart; /* function start address */ 
-    _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */ 
-    uint32_t additional; 
-    uint32_t reserved1; 
-  } pr_cache; 
- 
-  long long int :0; /* Enforce the 8-byte alignment */ 
+  void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*);
+
+  /* Unwinder cache, private fields for the unwinder's use */
+  struct {
+    uint32_t reserved1; /* init reserved1 to 0, then don't touch */
+    uint32_t reserved2;
+    uint32_t reserved3;
+    uint32_t reserved4;
+    uint32_t reserved5;
+  } unwinder_cache;
+
+  /* Propagation barrier cache (valid after phase 1): */
+  struct {
+    uint32_t sp;
+    uint32_t bitpattern[5];
+  } barrier_cache;
+
+  /* Cleanup cache (preserved over cleanup): */
+  struct {
+    uint32_t bitpattern[4];
+  } cleanup_cache;
+
+  /* Pr cache (for pr's benefit): */
+  struct {
+    uint32_t fnstart; /* function start address */
+    _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */
+    uint32_t additional;
+    uint32_t reserved1;
+  } pr_cache;
+
+  long long int :0; /* Enforce the 8-byte alignment */
 } __attribute__((__aligned__(8)));
- 
+
 typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
     _Unwind_State state, _Unwind_Exception *exceptionObject,
     struct _Unwind_Context *context);
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-// 
-// The following are the base functions documented by the C++ ABI 
-// 
-#ifdef __USING_SJLJ_EXCEPTIONS__ 
-extern _Unwind_Reason_Code 
-    _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); 
-extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); 
-#else 
-extern _Unwind_Reason_Code 
-    _Unwind_RaiseException(_Unwind_Exception *exception_object); 
-extern void _Unwind_Resume(_Unwind_Exception *exception_object); 
-#endif 
-extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); 
- 
-typedef enum { 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// The following are the base functions documented by the C++ ABI
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_Resume(_Unwind_Exception *exception_object);
+#endif
+extern void _Unwind_DeleteException(_Unwind_Exception *exception_object);
+
+typedef enum {
   _UVRSC_CORE = 0,  /* integer register */
   _UVRSC_VFP = 1,   /* vfp */
-  _UVRSC_WMMXD = 3, /* Intel WMMX data register */ 
+  _UVRSC_WMMXD = 3, /* Intel WMMX data register */
   _UVRSC_WMMXC = 4, /* Intel WMMX control register */
   _UVRSC_PSEUDO = 5 /* Special purpose pseudo register */
-} _Unwind_VRS_RegClass; 
- 
-typedef enum { 
-  _UVRSD_UINT32 = 0, 
-  _UVRSD_VFPX = 1, 
-  _UVRSD_UINT64 = 3, 
-  _UVRSD_FLOAT = 4, 
-  _UVRSD_DOUBLE = 5 
-} _Unwind_VRS_DataRepresentation; 
- 
-typedef enum { 
-  _UVRSR_OK = 0, 
-  _UVRSR_NOT_IMPLEMENTED = 1, 
-  _UVRSR_FAILED = 2 
-} _Unwind_VRS_Result; 
- 
-extern void _Unwind_Complete(_Unwind_Exception* exception_object); 
- 
-extern _Unwind_VRS_Result 
-_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, 
-                uint32_t regno, _Unwind_VRS_DataRepresentation representation, 
-                void *valuep); 
- 
-extern _Unwind_VRS_Result 
-_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, 
-                uint32_t regno, _Unwind_VRS_DataRepresentation representation, 
-                void *valuep); 
- 
-extern _Unwind_VRS_Result 
-_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, 
-                uint32_t discriminator, 
-                _Unwind_VRS_DataRepresentation representation); 
- 
-#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE) 
-#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern 
-#else 
-#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__ 
-#endif 
- 
-// These are de facto helper functions for ARM, which delegate the function 
-// calls to _Unwind_VRS_Get/Set().  These are not a part of ARM EHABI 
-// specification, thus these function MUST be inlined.  Please don't replace 
-// these with the "extern" function declaration; otherwise, the program 
-// including this <unwind.h> header won't be ABI compatible and will result in 
-// link error when we are linking the program with libgcc. 
- 
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1 
-uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) { 
-  uintptr_t value = 0; 
-  _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); 
-  return value; 
-} 
- 
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1 
-void _Unwind_SetGR(struct _Unwind_Context *context, int index, 
-                   uintptr_t value) { 
-  _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value); 
-} 
- 
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1 
-uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { 
-  // remove the thumb-bit before returning 
-  return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1); 
-} 
- 
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1 
-void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) { 
-  uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1); 
-  _Unwind_SetGR(context, 15, value | thumb_bit); 
-} 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
+} _Unwind_VRS_RegClass;
+
+typedef enum {
+  _UVRSD_UINT32 = 0,
+  _UVRSD_VFPX = 1,
+  _UVRSD_UINT64 = 3,
+  _UVRSD_FLOAT = 4,
+  _UVRSD_DOUBLE = 5
+} _Unwind_VRS_DataRepresentation;
+
+typedef enum {
+  _UVRSR_OK = 0,
+  _UVRSR_NOT_IMPLEMENTED = 1,
+  _UVRSR_FAILED = 2
+} _Unwind_VRS_Result;
+
+extern void _Unwind_Complete(_Unwind_Exception* exception_object);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t discriminator,
+                _Unwind_VRS_DataRepresentation representation);
+
+#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE)
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern
+#else
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__
+#endif
+
+// These are de facto helper functions for ARM, which delegate the function
+// calls to _Unwind_VRS_Get/Set().  These are not a part of ARM EHABI
+// specification, thus these function MUST be inlined.  Please don't replace
+// these with the "extern" function declaration; otherwise, the program
+// including this <unwind.h> header won't be ABI compatible and will result in
+// link error when we are linking the program with libgcc.
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) {
+  uintptr_t value = 0;
+  _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
+  return value;
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                   uintptr_t value) {
+  _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+  // remove the thumb-bit before returning
+  return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1);
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) {
+  uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1);
+  _Unwind_SetGR(context, 15, value | thumb_bit);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif // __ARM_EHABI_UNWIND_H__
diff --git a/contrib/libs/libunwind/include/unwind_itanium.h b/contrib/libs/libunwind/include/unwind_itanium.h
index 9a415d243b..d94a6183be 100644
--- a/contrib/libs/libunwind/include/unwind_itanium.h
+++ b/contrib/libs/libunwind/include/unwind_itanium.h
@@ -1,76 +1,76 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-// C++ ABI Level 1 ABI documented at: 
+//
+//
+// C++ ABI Level 1 ABI documented at:
 //   https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
-// 
-//===----------------------------------------------------------------------===// 
- 
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef __ITANIUM_UNWIND_H__
 #define __ITANIUM_UNWIND_H__
- 
-struct _Unwind_Context;   // opaque 
-struct _Unwind_Exception; // forward declaration 
-typedef struct _Unwind_Exception _Unwind_Exception; 
+
+struct _Unwind_Context;   // opaque
+struct _Unwind_Exception; // forward declaration
+typedef struct _Unwind_Exception _Unwind_Exception;
 typedef uint64_t _Unwind_Exception_Class;
- 
-struct _Unwind_Exception { 
+
+struct _Unwind_Exception {
   _Unwind_Exception_Class exception_class;
-  void (*exception_cleanup)(_Unwind_Reason_Code reason, 
-                            _Unwind_Exception *exc); 
+  void (*exception_cleanup)(_Unwind_Reason_Code reason,
+                            _Unwind_Exception *exc);
 #if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
   uintptr_t private_[6];
 #else
-  uintptr_t private_1; // non-zero means forced unwind 
-  uintptr_t private_2; // holds sp that phase1 found for phase2 to use 
+  uintptr_t private_1; // non-zero means forced unwind
+  uintptr_t private_2; // holds sp that phase1 found for phase2 to use
 #endif
 #if __SIZEOF_POINTER__ == 4
   // The implementation of _Unwind_Exception uses an attribute mode on the
   // above fields which has the side effect of causing this whole struct to
   // round up to 32 bytes in size (48 with SEH). To be more explicit, we add
   // pad fields added for binary compatibility.
-  uint32_t reserved[3]; 
-#endif 
+  uint32_t reserved[3];
+#endif
   // The Itanium ABI requires that _Unwind_Exception objects are "double-word
   // aligned".  GCC has interpreted this to mean "use the maximum useful
   // alignment for the target"; so do we.
 } __attribute__((__aligned__));
- 
+
 typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
     int version, _Unwind_Action actions, uint64_t exceptionClass,
     _Unwind_Exception *exceptionObject, struct _Unwind_Context *context);
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-// 
-// The following are the base functions documented by the C++ ABI 
-// 
-#ifdef __USING_SJLJ_EXCEPTIONS__ 
-extern _Unwind_Reason_Code 
-    _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object); 
-extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object); 
-#else 
-extern _Unwind_Reason_Code 
-    _Unwind_RaiseException(_Unwind_Exception *exception_object); 
-extern void _Unwind_Resume(_Unwind_Exception *exception_object); 
-#endif 
-extern void _Unwind_DeleteException(_Unwind_Exception *exception_object); 
- 
- 
-extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index); 
-extern void _Unwind_SetGR(struct _Unwind_Context *context, int index, 
-                          uintptr_t new_value); 
-extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context); 
-extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value); 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// The following are the base functions documented by the C++ ABI
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_Resume(_Unwind_Exception *exception_object);
+#endif
+extern void _Unwind_DeleteException(_Unwind_Exception *exception_object);
+
+
+extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index);
+extern void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                          uintptr_t new_value);
+extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context);
+extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value);
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif // __ITANIUM_UNWIND_H__
diff --git a/contrib/libs/libunwind/src/AddressSpace.hpp b/contrib/libs/libunwind/src/AddressSpace.hpp
index 6eff522593..0c4dfeb4e6 100644
--- a/contrib/libs/libunwind/src/AddressSpace.hpp
+++ b/contrib/libs/libunwind/src/AddressSpace.hpp
@@ -1,22 +1,22 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-// Abstracts accessing local vs remote address spaces. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __ADDRESSSPACE_HPP__ 
-#define __ADDRESSSPACE_HPP__ 
- 
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
+//
+//
+// Abstracts accessing local vs remote address spaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __ADDRESSSPACE_HPP__
+#define __ADDRESSSPACE_HPP__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
 #include "libunwind.h"
 #include "config.h"
 #include "dwarf2.h"
@@ -32,12 +32,12 @@
 #endif
 
 #if _LIBUNWIND_USE_DLADDR
-#include <dlfcn.h> 
+#include <dlfcn.h>
 #if defined(__ELF__) && defined(_LIBUNWIND_LINK_DL_LIB)
 #pragma comment(lib, "dl")
-#endif 
 #endif
- 
+#endif
+
 #if defined(_LIBUNWIND_ARM_EHABI)
 struct EHABIIndexEntry {
   uint32_t functionOffset;
@@ -45,8 +45,8 @@ struct EHABIIndexEntry {
 };
 #endif
 
-#ifdef __APPLE__ 
- 
+#ifdef __APPLE__
+
   struct dyld_unwind_sections
   {
     const struct mach_header*   mh;
@@ -55,15 +55,15 @@ struct EHABIIndexEntry {
     const void*                 compact_unwind_section;
     uintptr_t                   compact_unwind_section_length;
   };
- 
+
   // In 10.7.0 or later, libSystem.dylib implements this function.
   extern "C" bool _dyld_find_unwind_sections(void *, dyld_unwind_sections *);
- 
+
 #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
 
-// When statically linked on bare-metal, the symbols for the EH table are looked 
-// up without going through the dynamic loader. 
- 
+// When statically linked on bare-metal, the symbols for the EH table are looked
+// up without going through the dynamic loader.
+
 // The following linker script may be used to produce the necessary sections and symbols.
 // Unless the --eh-frame-hdr linker option is provided, the section is not generated
 // and does not take space in the output file.
@@ -108,100 +108,100 @@ extern char __exidx_end;
 
 #include <link.h>
 
-#endif 
- 
-namespace libunwind { 
- 
-/// Used by findUnwindSections() to return info about needed sections. 
-struct UnwindInfoSections { 
+#endif
+
+namespace libunwind {
+
+/// Used by findUnwindSections() to return info about needed sections.
+struct UnwindInfoSections {
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) ||                                \
     defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) ||                              \
     defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
   // No dso_base for SEH.
-  uintptr_t       dso_base; 
-#endif 
+  uintptr_t       dso_base;
+#endif
 #if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
   size_t          text_segment_length;
 #endif
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-  uintptr_t       dwarf_section; 
+  uintptr_t       dwarf_section;
   size_t          dwarf_section_length;
-#endif 
+#endif
 #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
-  uintptr_t       dwarf_index_section; 
+  uintptr_t       dwarf_index_section;
   size_t          dwarf_index_section_length;
-#endif 
+#endif
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-  uintptr_t       compact_unwind_section; 
+  uintptr_t       compact_unwind_section;
   size_t          compact_unwind_section_length;
-#endif 
+#endif
 #if defined(_LIBUNWIND_ARM_EHABI)
-  uintptr_t       arm_section; 
+  uintptr_t       arm_section;
   size_t          arm_section_length;
-#endif 
-}; 
- 
- 
-/// LocalAddressSpace is used as a template parameter to UnwindCursor when 
-/// unwinding a thread in the same process.  The wrappers compile away, 
-/// making local unwinds fast. 
+#endif
+};
+
+
+/// LocalAddressSpace is used as a template parameter to UnwindCursor when
+/// unwinding a thread in the same process.  The wrappers compile away,
+/// making local unwinds fast.
 class _LIBUNWIND_HIDDEN LocalAddressSpace {
-public: 
+public:
   typedef uintptr_t pint_t;
   typedef intptr_t  sint_t;
-  uint8_t         get8(pint_t addr) { 
-    uint8_t val; 
-    memcpy(&val, (void *)addr, sizeof(val)); 
-    return val; 
-  } 
-  uint16_t         get16(pint_t addr) { 
-    uint16_t val; 
-    memcpy(&val, (void *)addr, sizeof(val)); 
-    return val; 
-  } 
-  uint32_t         get32(pint_t addr) { 
-    uint32_t val; 
-    memcpy(&val, (void *)addr, sizeof(val)); 
-    return val; 
-  } 
-  uint64_t         get64(pint_t addr) { 
-    uint64_t val; 
-    memcpy(&val, (void *)addr, sizeof(val)); 
-    return val; 
-  } 
-  double           getDouble(pint_t addr) { 
-    double val; 
-    memcpy(&val, (void *)addr, sizeof(val)); 
-    return val; 
-  } 
-  v128             getVector(pint_t addr) { 
-    v128 val; 
-    memcpy(&val, (void *)addr, sizeof(val)); 
-    return val; 
-  } 
-  uintptr_t       getP(pint_t addr); 
+  uint8_t         get8(pint_t addr) {
+    uint8_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uint16_t         get16(pint_t addr) {
+    uint16_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uint32_t         get32(pint_t addr) {
+    uint32_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uint64_t         get64(pint_t addr) {
+    uint64_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  double           getDouble(pint_t addr) {
+    double val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  v128             getVector(pint_t addr) {
+    v128 val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uintptr_t       getP(pint_t addr);
   uint64_t        getRegister(pint_t addr);
-  static uint64_t getULEB128(pint_t &addr, pint_t end); 
-  static int64_t  getSLEB128(pint_t &addr, pint_t end); 
- 
-  pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, 
-                     pint_t datarelBase = 0); 
-  bool findFunctionName(pint_t addr, char *buf, size_t bufLen, 
-                        unw_word_t *offset); 
-  bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info); 
-  bool findOtherFDE(pint_t targetAddr, pint_t &fde); 
- 
-  static LocalAddressSpace sThisAddressSpace; 
-}; 
- 
-inline uintptr_t LocalAddressSpace::getP(pint_t addr) { 
+  static uint64_t getULEB128(pint_t &addr, pint_t end);
+  static int64_t  getSLEB128(pint_t &addr, pint_t end);
+
+  pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
+                     pint_t datarelBase = 0);
+  bool findFunctionName(pint_t addr, char *buf, size_t bufLen,
+                        unw_word_t *offset);
+  bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info);
+  bool findOtherFDE(pint_t targetAddr, pint_t &fde);
+
+  static LocalAddressSpace sThisAddressSpace;
+};
+
+inline uintptr_t LocalAddressSpace::getP(pint_t addr) {
 #if __SIZEOF_POINTER__ == 8
-  return get64(addr); 
-#else 
-  return get32(addr); 
-#endif 
-} 
- 
+  return get64(addr);
+#else
+  return get32(addr);
+#endif
+}
+
 inline uint64_t LocalAddressSpace::getRegister(pint_t addr) {
 #if __SIZEOF_POINTER__ == 8 || defined(__mips64)
   return get64(addr);
@@ -210,144 +210,144 @@ inline uint64_t LocalAddressSpace::getRegister(pint_t addr) {
 #endif
 }
 
-/// Read a ULEB128 into a 64-bit word. 
-inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) { 
-  const uint8_t *p = (uint8_t *)addr; 
-  const uint8_t *pend = (uint8_t *)end; 
-  uint64_t result = 0; 
-  int bit = 0; 
-  do { 
-    uint64_t b; 
- 
-    if (p == pend) 
-      _LIBUNWIND_ABORT("truncated uleb128 expression"); 
- 
-    b = *p & 0x7f; 
- 
-    if (bit >= 64 || b << bit >> bit != b) { 
-      _LIBUNWIND_ABORT("malformed uleb128 expression"); 
-    } else { 
-      result |= b << bit; 
-      bit += 7; 
-    } 
-  } while (*p++ >= 0x80); 
-  addr = (pint_t) p; 
-  return result; 
-} 
- 
-/// Read a SLEB128 into a 64-bit word. 
-inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) { 
-  const uint8_t *p = (uint8_t *)addr; 
-  const uint8_t *pend = (uint8_t *)end; 
-  int64_t result = 0; 
-  int bit = 0; 
-  uint8_t byte; 
-  do { 
-    if (p == pend) 
-      _LIBUNWIND_ABORT("truncated sleb128 expression"); 
-    byte = *p++; 
+/// Read a ULEB128 into a 64-bit word.
+inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) {
+  const uint8_t *p = (uint8_t *)addr;
+  const uint8_t *pend = (uint8_t *)end;
+  uint64_t result = 0;
+  int bit = 0;
+  do {
+    uint64_t b;
+
+    if (p == pend)
+      _LIBUNWIND_ABORT("truncated uleb128 expression");
+
+    b = *p & 0x7f;
+
+    if (bit >= 64 || b << bit >> bit != b) {
+      _LIBUNWIND_ABORT("malformed uleb128 expression");
+    } else {
+      result |= b << bit;
+      bit += 7;
+    }
+  } while (*p++ >= 0x80);
+  addr = (pint_t) p;
+  return result;
+}
+
+/// Read a SLEB128 into a 64-bit word.
+inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) {
+  const uint8_t *p = (uint8_t *)addr;
+  const uint8_t *pend = (uint8_t *)end;
+  int64_t result = 0;
+  int bit = 0;
+  uint8_t byte;
+  do {
+    if (p == pend)
+      _LIBUNWIND_ABORT("truncated sleb128 expression");
+    byte = *p++;
     result |= (uint64_t)(byte & 0x7f) << bit;
-    bit += 7; 
-  } while (byte & 0x80); 
-  // sign extend negative numbers 
+    bit += 7;
+  } while (byte & 0x80);
+  // sign extend negative numbers
   if ((byte & 0x40) != 0 && bit < 64)
-    result |= (-1ULL) << bit; 
-  addr = (pint_t) p; 
-  return result; 
-} 
- 
-inline LocalAddressSpace::pint_t 
-LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, 
-                               pint_t datarelBase) { 
-  pint_t startAddr = addr; 
-  const uint8_t *p = (uint8_t *)addr; 
-  pint_t result; 
- 
-  // first get value 
-  switch (encoding & 0x0F) { 
-  case DW_EH_PE_ptr: 
-    result = getP(addr); 
-    p += sizeof(pint_t); 
-    addr = (pint_t) p; 
-    break; 
-  case DW_EH_PE_uleb128: 
-    result = (pint_t)getULEB128(addr, end); 
-    break; 
-  case DW_EH_PE_udata2: 
-    result = get16(addr); 
-    p += 2; 
-    addr = (pint_t) p; 
-    break; 
-  case DW_EH_PE_udata4: 
-    result = get32(addr); 
-    p += 4; 
-    addr = (pint_t) p; 
-    break; 
-  case DW_EH_PE_udata8: 
-    result = (pint_t)get64(addr); 
-    p += 8; 
-    addr = (pint_t) p; 
-    break; 
-  case DW_EH_PE_sleb128: 
-    result = (pint_t)getSLEB128(addr, end); 
-    break; 
-  case DW_EH_PE_sdata2: 
-    // Sign extend from signed 16-bit value. 
-    result = (pint_t)(int16_t)get16(addr); 
-    p += 2; 
-    addr = (pint_t) p; 
-    break; 
-  case DW_EH_PE_sdata4: 
-    // Sign extend from signed 32-bit value. 
-    result = (pint_t)(int32_t)get32(addr); 
-    p += 4; 
-    addr = (pint_t) p; 
-    break; 
-  case DW_EH_PE_sdata8: 
-    result = (pint_t)get64(addr); 
-    p += 8; 
-    addr = (pint_t) p; 
-    break; 
-  default: 
-    _LIBUNWIND_ABORT("unknown pointer encoding"); 
-  } 
- 
-  // then add relative offset 
-  switch (encoding & 0x70) { 
-  case DW_EH_PE_absptr: 
-    // do nothing 
-    break; 
-  case DW_EH_PE_pcrel: 
-    result += startAddr; 
-    break; 
-  case DW_EH_PE_textrel: 
-    _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported"); 
-    break; 
-  case DW_EH_PE_datarel: 
-    // DW_EH_PE_datarel is only valid in a few places, so the parameter has a 
-    // default value of 0, and we abort in the event that someone calls this 
-    // function with a datarelBase of 0 and DW_EH_PE_datarel encoding. 
-    if (datarelBase == 0) 
-      _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0"); 
-    result += datarelBase; 
-    break; 
-  case DW_EH_PE_funcrel: 
-    _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported"); 
-    break; 
-  case DW_EH_PE_aligned: 
-    _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported"); 
-    break; 
-  default: 
-    _LIBUNWIND_ABORT("unknown pointer encoding"); 
-    break; 
-  } 
- 
-  if (encoding & DW_EH_PE_indirect) 
-    result = getP(result); 
- 
-  return result; 
-} 
- 
+    result |= (-1ULL) << bit;
+  addr = (pint_t) p;
+  return result;
+}
+
+inline LocalAddressSpace::pint_t
+LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
+                               pint_t datarelBase) {
+  pint_t startAddr = addr;
+  const uint8_t *p = (uint8_t *)addr;
+  pint_t result;
+
+  // first get value
+  switch (encoding & 0x0F) {
+  case DW_EH_PE_ptr:
+    result = getP(addr);
+    p += sizeof(pint_t);
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_uleb128:
+    result = (pint_t)getULEB128(addr, end);
+    break;
+  case DW_EH_PE_udata2:
+    result = get16(addr);
+    p += 2;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_udata4:
+    result = get32(addr);
+    p += 4;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_udata8:
+    result = (pint_t)get64(addr);
+    p += 8;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_sleb128:
+    result = (pint_t)getSLEB128(addr, end);
+    break;
+  case DW_EH_PE_sdata2:
+    // Sign extend from signed 16-bit value.
+    result = (pint_t)(int16_t)get16(addr);
+    p += 2;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_sdata4:
+    // Sign extend from signed 32-bit value.
+    result = (pint_t)(int32_t)get32(addr);
+    p += 4;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_sdata8:
+    result = (pint_t)get64(addr);
+    p += 8;
+    addr = (pint_t) p;
+    break;
+  default:
+    _LIBUNWIND_ABORT("unknown pointer encoding");
+  }
+
+  // then add relative offset
+  switch (encoding & 0x70) {
+  case DW_EH_PE_absptr:
+    // do nothing
+    break;
+  case DW_EH_PE_pcrel:
+    result += startAddr;
+    break;
+  case DW_EH_PE_textrel:
+    _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported");
+    break;
+  case DW_EH_PE_datarel:
+    // DW_EH_PE_datarel is only valid in a few places, so the parameter has a
+    // default value of 0, and we abort in the event that someone calls this
+    // function with a datarelBase of 0 and DW_EH_PE_datarel encoding.
+    if (datarelBase == 0)
+      _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0");
+    result += datarelBase;
+    break;
+  case DW_EH_PE_funcrel:
+    _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported");
+    break;
+  case DW_EH_PE_aligned:
+    _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported");
+    break;
+  default:
+    _LIBUNWIND_ABORT("unknown pointer encoding");
+    break;
+  }
+
+  if (encoding & DW_EH_PE_indirect)
+    result = getP(result);
+
+  return result;
+}
+
 #if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
 
 // The ElfW() macro for pointer-size independent ELF header traversal is not
@@ -498,23 +498,23 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo,
 #endif  // defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
 
 
-inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, 
-                                                  UnwindInfoSections &info) { 
-#ifdef __APPLE__ 
-  dyld_unwind_sections dyldInfo; 
-  if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) { 
-    info.dso_base                      = (uintptr_t)dyldInfo.mh; 
+inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
+                                                  UnwindInfoSections &info) {
+#ifdef __APPLE__
+  dyld_unwind_sections dyldInfo;
+  if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) {
+    info.dso_base                      = (uintptr_t)dyldInfo.mh;
  #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-    info.dwarf_section                 = (uintptr_t)dyldInfo.dwarf_section; 
+    info.dwarf_section                 = (uintptr_t)dyldInfo.dwarf_section;
     info.dwarf_section_length          = (size_t)dyldInfo.dwarf_section_length;
- #endif 
-    info.compact_unwind_section        = (uintptr_t)dyldInfo.compact_unwind_section; 
+ #endif
+    info.compact_unwind_section        = (uintptr_t)dyldInfo.compact_unwind_section;
     info.compact_unwind_section_length = (size_t)dyldInfo.compact_unwind_section_length;
-    return true; 
-  } 
+    return true;
+  }
 #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
   info.dso_base = 0;
-  // Bare metal is statically linked, so no need to ask the dynamic loader 
+  // Bare metal is statically linked, so no need to ask the dynamic loader
   info.dwarf_section_length = (size_t)(&__eh_frame_end - &__eh_frame_start);
   info.dwarf_section =        (uintptr_t)(&__eh_frame_start);
   _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p",
@@ -529,7 +529,7 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
     return true;
 #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
   // Bare metal is statically linked, so no need to ask the dynamic loader
-  info.arm_section =        (uintptr_t)(&__exidx_start); 
+  info.arm_section =        (uintptr_t)(&__exidx_start);
   info.arm_section_length = (size_t)(&__exidx_end - &__exidx_start);
   _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p",
                              (void *)info.arm_section, (void *)info.arm_section_length);
@@ -581,50 +581,50 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
   (void)info;
   return true;
 #elif defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX)
-  int length = 0; 
+  int length = 0;
   info.arm_section =
       (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length);
   info.arm_section_length = (size_t)length * sizeof(EHABIIndexEntry);
-  if (info.arm_section && info.arm_section_length) 
-    return true; 
+  if (info.arm_section && info.arm_section_length)
+    return true;
 #elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
-  dl_iterate_cb_data cb_data = {this, &info, targetAddr}; 
+  dl_iterate_cb_data cb_data = {this, &info, targetAddr};
   int found = dl_iterate_phdr(findUnwindSectionsByPhdr, &cb_data);
-  return static_cast<bool>(found); 
-#endif 
- 
-  return false; 
-} 
- 
- 
-inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) { 
-  // TO DO: if OS has way to dynamically register FDEs, check that. 
-  (void)targetAddr; 
-  (void)fde; 
-  return false; 
-} 
- 
-inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf, 
-                                                size_t bufLen, 
-                                                unw_word_t *offset) { 
+  return static_cast<bool>(found);
+#endif
+
+  return false;
+}
+
+
+inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) {
+  // TO DO: if OS has way to dynamically register FDEs, check that.
+  (void)targetAddr;
+  (void)fde;
+  return false;
+}
+
+inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf,
+                                                size_t bufLen,
+                                                unw_word_t *offset) {
 #if _LIBUNWIND_USE_DLADDR
-  Dl_info dyldInfo; 
-  if (dladdr((void *)addr, &dyldInfo)) { 
-    if (dyldInfo.dli_sname != NULL) { 
-      snprintf(buf, bufLen, "%s", dyldInfo.dli_sname); 
-      *offset = (addr - (pint_t) dyldInfo.dli_saddr); 
-      return true; 
-    } 
-  } 
+  Dl_info dyldInfo;
+  if (dladdr((void *)addr, &dyldInfo)) {
+    if (dyldInfo.dli_sname != NULL) {
+      snprintf(buf, bufLen, "%s", dyldInfo.dli_sname);
+      *offset = (addr - (pint_t) dyldInfo.dli_saddr);
+      return true;
+    }
+  }
 #else
   (void)addr;
   (void)buf;
   (void)bufLen;
   (void)offset;
-#endif 
-  return false; 
-} 
- 
-} // namespace libunwind 
- 
-#endif // __ADDRESSSPACE_HPP__ 
+#endif
+  return false;
+}
+
+} // namespace libunwind
+
+#endif // __ADDRESSSPACE_HPP__
diff --git a/contrib/libs/libunwind/src/CompactUnwinder.hpp b/contrib/libs/libunwind/src/CompactUnwinder.hpp
index daa7b0cd15..0b2b5e111b 100644
--- a/contrib/libs/libunwind/src/CompactUnwinder.hpp
+++ b/contrib/libs/libunwind/src/CompactUnwinder.hpp
@@ -1,697 +1,697 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Does runtime stack unwinding using compact unwind encodings. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __COMPACT_UNWINDER_HPP__ 
-#define __COMPACT_UNWINDER_HPP__ 
- 
-#include <stdint.h> 
-#include <stdlib.h> 
- 
-#include <libunwind.h> 
-#include <mach-o/compact_unwind_encoding.h> 
- 
-#include "Registers.hpp" 
- 
-#define EXTRACT_BITS(value, mask)                                              \ 
-  ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1)) 
- 
-namespace libunwind { 
- 
+//
+//
+//  Does runtime stack unwinding using compact unwind encodings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __COMPACT_UNWINDER_HPP__
+#define __COMPACT_UNWINDER_HPP__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <libunwind.h>
+#include <mach-o/compact_unwind_encoding.h>
+
+#include "Registers.hpp"
+
+#define EXTRACT_BITS(value, mask)                                              \
+  ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1))
+
+namespace libunwind {
+
 #if defined(_LIBUNWIND_TARGET_I386)
-/// CompactUnwinder_x86 uses a compact unwind info to virtually "step" (aka 
-/// unwind) by modifying a Registers_x86 register set 
-template <typename A> 
-class CompactUnwinder_x86 { 
-public: 
- 
-  static int stepWithCompactEncoding(compact_unwind_encoding_t info, 
-                                     uint32_t functionStart, A &addressSpace, 
-                                     Registers_x86 &registers); 
- 
-private: 
-  typename A::pint_t pint_t; 
- 
-  static void frameUnwind(A &addressSpace, Registers_x86 &registers); 
-  static void framelessUnwind(A &addressSpace, 
-                              typename A::pint_t returnAddressLocation, 
-                              Registers_x86 &registers); 
-  static int 
-      stepWithCompactEncodingEBPFrame(compact_unwind_encoding_t compactEncoding, 
-                                      uint32_t functionStart, A &addressSpace, 
-                                      Registers_x86 &registers); 
-  static int stepWithCompactEncodingFrameless( 
-      compact_unwind_encoding_t compactEncoding, uint32_t functionStart, 
-      A &addressSpace, Registers_x86 &registers, bool indirectStackSize); 
-}; 
- 
-template <typename A> 
-int CompactUnwinder_x86<A>::stepWithCompactEncoding( 
-    compact_unwind_encoding_t compactEncoding, uint32_t functionStart, 
-    A &addressSpace, Registers_x86 &registers) { 
-  switch (compactEncoding & UNWIND_X86_MODE_MASK) { 
-  case UNWIND_X86_MODE_EBP_FRAME: 
-    return stepWithCompactEncodingEBPFrame(compactEncoding, functionStart, 
-                                           addressSpace, registers); 
-  case UNWIND_X86_MODE_STACK_IMMD: 
-    return stepWithCompactEncodingFrameless(compactEncoding, functionStart, 
-                                            addressSpace, registers, false); 
-  case UNWIND_X86_MODE_STACK_IND: 
-    return stepWithCompactEncodingFrameless(compactEncoding, functionStart, 
-                                            addressSpace, registers, true); 
-  } 
-  _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-} 
- 
-template <typename A> 
-int CompactUnwinder_x86<A>::stepWithCompactEncodingEBPFrame( 
-    compact_unwind_encoding_t compactEncoding, uint32_t functionStart, 
-    A &addressSpace, Registers_x86 &registers) { 
-  uint32_t savedRegistersOffset = 
-      EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_OFFSET); 
-  uint32_t savedRegistersLocations = 
-      EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_REGISTERS); 
- 
-  uint32_t savedRegisters = registers.getEBP() - 4 * savedRegistersOffset; 
-  for (int i = 0; i < 5; ++i) { 
-    switch (savedRegistersLocations & 0x7) { 
-    case UNWIND_X86_REG_NONE: 
-      // no register saved in this slot 
-      break; 
-    case UNWIND_X86_REG_EBX: 
-      registers.setEBX(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_ECX: 
-      registers.setECX(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_EDX: 
-      registers.setEDX(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_EDI: 
-      registers.setEDI(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_ESI: 
-      registers.setESI(addressSpace.get32(savedRegisters)); 
-      break; 
-    default: 
-      (void)functionStart; 
-      _LIBUNWIND_DEBUG_LOG("bad register for EBP frame, encoding=%08X for  " 
+/// CompactUnwinder_x86 uses a compact unwind info to virtually "step" (aka
+/// unwind) by modifying a Registers_x86 register set
+template <typename A>
+class CompactUnwinder_x86 {
+public:
+
+  static int stepWithCompactEncoding(compact_unwind_encoding_t info,
+                                     uint32_t functionStart, A &addressSpace,
+                                     Registers_x86 &registers);
+
+private:
+  typename A::pint_t pint_t;
+
+  static void frameUnwind(A &addressSpace, Registers_x86 &registers);
+  static void framelessUnwind(A &addressSpace,
+                              typename A::pint_t returnAddressLocation,
+                              Registers_x86 &registers);
+  static int
+      stepWithCompactEncodingEBPFrame(compact_unwind_encoding_t compactEncoding,
+                                      uint32_t functionStart, A &addressSpace,
+                                      Registers_x86 &registers);
+  static int stepWithCompactEncodingFrameless(
+      compact_unwind_encoding_t compactEncoding, uint32_t functionStart,
+      A &addressSpace, Registers_x86 &registers, bool indirectStackSize);
+};
+
+template <typename A>
+int CompactUnwinder_x86<A>::stepWithCompactEncoding(
+    compact_unwind_encoding_t compactEncoding, uint32_t functionStart,
+    A &addressSpace, Registers_x86 &registers) {
+  switch (compactEncoding & UNWIND_X86_MODE_MASK) {
+  case UNWIND_X86_MODE_EBP_FRAME:
+    return stepWithCompactEncodingEBPFrame(compactEncoding, functionStart,
+                                           addressSpace, registers);
+  case UNWIND_X86_MODE_STACK_IMMD:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, false);
+  case UNWIND_X86_MODE_STACK_IND:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, true);
+  }
+  _LIBUNWIND_ABORT("invalid compact unwind encoding");
+}
+
+template <typename A>
+int CompactUnwinder_x86<A>::stepWithCompactEncodingEBPFrame(
+    compact_unwind_encoding_t compactEncoding, uint32_t functionStart,
+    A &addressSpace, Registers_x86 &registers) {
+  uint32_t savedRegistersOffset =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_OFFSET);
+  uint32_t savedRegistersLocations =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_REGISTERS);
+
+  uint32_t savedRegisters = registers.getEBP() - 4 * savedRegistersOffset;
+  for (int i = 0; i < 5; ++i) {
+    switch (savedRegistersLocations & 0x7) {
+    case UNWIND_X86_REG_NONE:
+      // no register saved in this slot
+      break;
+    case UNWIND_X86_REG_EBX:
+      registers.setEBX(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_ECX:
+      registers.setECX(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_EDX:
+      registers.setEDX(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_EDI:
+      registers.setEDI(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_ESI:
+      registers.setESI(addressSpace.get32(savedRegisters));
+      break;
+    default:
+      (void)functionStart;
+      _LIBUNWIND_DEBUG_LOG("bad register for EBP frame, encoding=%08X for  "
                            "function starting at 0x%X",
-                            compactEncoding, functionStart); 
-      _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-    } 
-    savedRegisters += 4; 
-    savedRegistersLocations = (savedRegistersLocations >> 3); 
-  } 
-  frameUnwind(addressSpace, registers); 
-  return UNW_STEP_SUCCESS; 
-} 
- 
-template <typename A> 
-int CompactUnwinder_x86<A>::stepWithCompactEncodingFrameless( 
-    compact_unwind_encoding_t encoding, uint32_t functionStart, 
-    A &addressSpace, Registers_x86 &registers, bool indirectStackSize) { 
-  uint32_t stackSizeEncoded = 
-      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE); 
-  uint32_t stackAdjust = 
-      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST); 
-  uint32_t regCount = 
-      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT); 
-  uint32_t permutation = 
-      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION); 
-  uint32_t stackSize = stackSizeEncoded * 4; 
-  if (indirectStackSize) { 
-    // stack size is encoded in subl $xxx,%esp instruction 
-    uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); 
-    stackSize = subl + 4 * stackAdjust; 
-  } 
-  // decompress permutation 
-  uint32_t permunreg[6]; 
-  switch (regCount) { 
-  case 6: 
-    permunreg[0] = permutation / 120; 
-    permutation -= (permunreg[0] * 120); 
-    permunreg[1] = permutation / 24; 
-    permutation -= (permunreg[1] * 24); 
-    permunreg[2] = permutation / 6; 
-    permutation -= (permunreg[2] * 6); 
-    permunreg[3] = permutation / 2; 
-    permutation -= (permunreg[3] * 2); 
-    permunreg[4] = permutation; 
-    permunreg[5] = 0; 
-    break; 
-  case 5: 
-    permunreg[0] = permutation / 120; 
-    permutation -= (permunreg[0] * 120); 
-    permunreg[1] = permutation / 24; 
-    permutation -= (permunreg[1] * 24); 
-    permunreg[2] = permutation / 6; 
-    permutation -= (permunreg[2] * 6); 
-    permunreg[3] = permutation / 2; 
-    permutation -= (permunreg[3] * 2); 
-    permunreg[4] = permutation; 
-    break; 
-  case 4: 
-    permunreg[0] = permutation / 60; 
-    permutation -= (permunreg[0] * 60); 
-    permunreg[1] = permutation / 12; 
-    permutation -= (permunreg[1] * 12); 
-    permunreg[2] = permutation / 3; 
-    permutation -= (permunreg[2] * 3); 
-    permunreg[3] = permutation; 
-    break; 
-  case 3: 
-    permunreg[0] = permutation / 20; 
-    permutation -= (permunreg[0] * 20); 
-    permunreg[1] = permutation / 4; 
-    permutation -= (permunreg[1] * 4); 
-    permunreg[2] = permutation; 
-    break; 
-  case 2: 
-    permunreg[0] = permutation / 5; 
-    permutation -= (permunreg[0] * 5); 
-    permunreg[1] = permutation; 
-    break; 
-  case 1: 
-    permunreg[0] = permutation; 
-    break; 
-  } 
-  // re-number registers back to standard numbers 
-  int registersSaved[6]; 
-  bool used[7] = { false, false, false, false, false, false, false }; 
-  for (uint32_t i = 0; i < regCount; ++i) { 
-    uint32_t renum = 0; 
-    for (int u = 1; u < 7; ++u) { 
-      if (!used[u]) { 
-        if (renum == permunreg[i]) { 
-          registersSaved[i] = u; 
-          used[u] = true; 
-          break; 
-        } 
-        ++renum; 
-      } 
-    } 
-  } 
-  uint32_t savedRegisters = registers.getSP() + stackSize - 4 - 4 * regCount; 
-  for (uint32_t i = 0; i < regCount; ++i) { 
-    switch (registersSaved[i]) { 
-    case UNWIND_X86_REG_EBX: 
-      registers.setEBX(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_ECX: 
-      registers.setECX(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_EDX: 
-      registers.setEDX(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_EDI: 
-      registers.setEDI(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_ESI: 
-      registers.setESI(addressSpace.get32(savedRegisters)); 
-      break; 
-    case UNWIND_X86_REG_EBP: 
-      registers.setEBP(addressSpace.get32(savedRegisters)); 
-      break; 
-    default: 
-      _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " 
+                            compactEncoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 4;
+    savedRegistersLocations = (savedRegistersLocations >> 3);
+  }
+  frameUnwind(addressSpace, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+template <typename A>
+int CompactUnwinder_x86<A>::stepWithCompactEncodingFrameless(
+    compact_unwind_encoding_t encoding, uint32_t functionStart,
+    A &addressSpace, Registers_x86 &registers, bool indirectStackSize) {
+  uint32_t stackSizeEncoded =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
+  uint32_t stackAdjust =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST);
+  uint32_t regCount =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT);
+  uint32_t permutation =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION);
+  uint32_t stackSize = stackSizeEncoded * 4;
+  if (indirectStackSize) {
+    // stack size is encoded in subl $xxx,%esp instruction
+    uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded);
+    stackSize = subl + 4 * stackAdjust;
+  }
+  // decompress permutation
+  uint32_t permunreg[6];
+  switch (regCount) {
+  case 6:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    permunreg[5] = 0;
+    break;
+  case 5:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    break;
+  case 4:
+    permunreg[0] = permutation / 60;
+    permutation -= (permunreg[0] * 60);
+    permunreg[1] = permutation / 12;
+    permutation -= (permunreg[1] * 12);
+    permunreg[2] = permutation / 3;
+    permutation -= (permunreg[2] * 3);
+    permunreg[3] = permutation;
+    break;
+  case 3:
+    permunreg[0] = permutation / 20;
+    permutation -= (permunreg[0] * 20);
+    permunreg[1] = permutation / 4;
+    permutation -= (permunreg[1] * 4);
+    permunreg[2] = permutation;
+    break;
+  case 2:
+    permunreg[0] = permutation / 5;
+    permutation -= (permunreg[0] * 5);
+    permunreg[1] = permutation;
+    break;
+  case 1:
+    permunreg[0] = permutation;
+    break;
+  }
+  // re-number registers back to standard numbers
+  int registersSaved[6];
+  bool used[7] = { false, false, false, false, false, false, false };
+  for (uint32_t i = 0; i < regCount; ++i) {
+    uint32_t renum = 0;
+    for (int u = 1; u < 7; ++u) {
+      if (!used[u]) {
+        if (renum == permunreg[i]) {
+          registersSaved[i] = u;
+          used[u] = true;
+          break;
+        }
+        ++renum;
+      }
+    }
+  }
+  uint32_t savedRegisters = registers.getSP() + stackSize - 4 - 4 * regCount;
+  for (uint32_t i = 0; i < regCount; ++i) {
+    switch (registersSaved[i]) {
+    case UNWIND_X86_REG_EBX:
+      registers.setEBX(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_ECX:
+      registers.setECX(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_EDX:
+      registers.setEDX(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_EDI:
+      registers.setEDI(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_ESI:
+      registers.setESI(addressSpace.get32(savedRegisters));
+      break;
+    case UNWIND_X86_REG_EBP:
+      registers.setEBP(addressSpace.get32(savedRegisters));
+      break;
+    default:
+      _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for "
                            "function starting at 0x%X",
-                           encoding, functionStart); 
-      _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-    } 
-    savedRegisters += 4; 
-  } 
-  framelessUnwind(addressSpace, savedRegisters, registers); 
-  return UNW_STEP_SUCCESS; 
-} 
- 
- 
-template <typename A> 
-void CompactUnwinder_x86<A>::frameUnwind(A &addressSpace, 
-                                         Registers_x86 &registers) { 
-  typename A::pint_t bp = registers.getEBP(); 
-  // ebp points to old ebp 
-  registers.setEBP(addressSpace.get32(bp)); 
-  // old esp is ebp less saved ebp and return address 
-  registers.setSP((uint32_t)bp + 8); 
-  // pop return address into eip 
-  registers.setIP(addressSpace.get32(bp + 4)); 
-} 
- 
-template <typename A> 
-void CompactUnwinder_x86<A>::framelessUnwind( 
-    A &addressSpace, typename A::pint_t returnAddressLocation, 
-    Registers_x86 &registers) { 
-  // return address is on stack after last saved register 
-  registers.setIP(addressSpace.get32(returnAddressLocation)); 
-  // old esp is before return address 
-  registers.setSP((uint32_t)returnAddressLocation + 4); 
-} 
+                           encoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 4;
+  }
+  framelessUnwind(addressSpace, savedRegisters, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+
+template <typename A>
+void CompactUnwinder_x86<A>::frameUnwind(A &addressSpace,
+                                         Registers_x86 &registers) {
+  typename A::pint_t bp = registers.getEBP();
+  // ebp points to old ebp
+  registers.setEBP(addressSpace.get32(bp));
+  // old esp is ebp less saved ebp and return address
+  registers.setSP((uint32_t)bp + 8);
+  // pop return address into eip
+  registers.setIP(addressSpace.get32(bp + 4));
+}
+
+template <typename A>
+void CompactUnwinder_x86<A>::framelessUnwind(
+    A &addressSpace, typename A::pint_t returnAddressLocation,
+    Registers_x86 &registers) {
+  // return address is on stack after last saved register
+  registers.setIP(addressSpace.get32(returnAddressLocation));
+  // old esp is before return address
+  registers.setSP((uint32_t)returnAddressLocation + 4);
+}
 #endif // _LIBUNWIND_TARGET_I386
- 
- 
+
+
 #if defined(_LIBUNWIND_TARGET_X86_64)
-/// CompactUnwinder_x86_64 uses a compact unwind info to virtually "step" (aka 
-/// unwind) by modifying a Registers_x86_64 register set 
-template <typename A> 
-class CompactUnwinder_x86_64 { 
-public: 
- 
-  static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, 
-                                     uint64_t functionStart, A &addressSpace, 
-                                     Registers_x86_64 &registers); 
- 
-private: 
-  typename A::pint_t pint_t; 
- 
-  static void frameUnwind(A &addressSpace, Registers_x86_64 &registers); 
-  static void framelessUnwind(A &addressSpace, uint64_t returnAddressLocation, 
-                              Registers_x86_64 &registers); 
-  static int 
-      stepWithCompactEncodingRBPFrame(compact_unwind_encoding_t compactEncoding, 
-                                      uint64_t functionStart, A &addressSpace, 
-                                      Registers_x86_64 &registers); 
-  static int stepWithCompactEncodingFrameless( 
-      compact_unwind_encoding_t compactEncoding, uint64_t functionStart, 
-      A &addressSpace, Registers_x86_64 &registers, bool indirectStackSize); 
-}; 
- 
-template <typename A> 
-int CompactUnwinder_x86_64<A>::stepWithCompactEncoding( 
-    compact_unwind_encoding_t compactEncoding, uint64_t functionStart, 
-    A &addressSpace, Registers_x86_64 &registers) { 
-  switch (compactEncoding & UNWIND_X86_64_MODE_MASK) { 
-  case UNWIND_X86_64_MODE_RBP_FRAME: 
-    return stepWithCompactEncodingRBPFrame(compactEncoding, functionStart, 
-                                           addressSpace, registers); 
-  case UNWIND_X86_64_MODE_STACK_IMMD: 
-    return stepWithCompactEncodingFrameless(compactEncoding, functionStart, 
-                                            addressSpace, registers, false); 
-  case UNWIND_X86_64_MODE_STACK_IND: 
-    return stepWithCompactEncodingFrameless(compactEncoding, functionStart, 
-                                            addressSpace, registers, true); 
-  } 
-  _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-} 
- 
-template <typename A> 
-int CompactUnwinder_x86_64<A>::stepWithCompactEncodingRBPFrame( 
-    compact_unwind_encoding_t compactEncoding, uint64_t functionStart, 
-    A &addressSpace, Registers_x86_64 &registers) { 
-  uint32_t savedRegistersOffset = 
-      EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_OFFSET); 
-  uint32_t savedRegistersLocations = 
-      EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS); 
- 
-  uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset; 
-  for (int i = 0; i < 5; ++i) { 
-    switch (savedRegistersLocations & 0x7) { 
-    case UNWIND_X86_64_REG_NONE: 
-      // no register saved in this slot 
-      break; 
-    case UNWIND_X86_64_REG_RBX: 
-      registers.setRBX(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R12: 
-      registers.setR12(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R13: 
-      registers.setR13(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R14: 
-      registers.setR14(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R15: 
-      registers.setR15(addressSpace.get64(savedRegisters)); 
-      break; 
-    default: 
-      (void)functionStart; 
-      _LIBUNWIND_DEBUG_LOG("bad register for RBP frame, encoding=%08X for " 
+/// CompactUnwinder_x86_64 uses a compact unwind info to virtually "step" (aka
+/// unwind) by modifying a Registers_x86_64 register set
+template <typename A>
+class CompactUnwinder_x86_64 {
+public:
+
+  static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding,
+                                     uint64_t functionStart, A &addressSpace,
+                                     Registers_x86_64 &registers);
+
+private:
+  typename A::pint_t pint_t;
+
+  static void frameUnwind(A &addressSpace, Registers_x86_64 &registers);
+  static void framelessUnwind(A &addressSpace, uint64_t returnAddressLocation,
+                              Registers_x86_64 &registers);
+  static int
+      stepWithCompactEncodingRBPFrame(compact_unwind_encoding_t compactEncoding,
+                                      uint64_t functionStart, A &addressSpace,
+                                      Registers_x86_64 &registers);
+  static int stepWithCompactEncodingFrameless(
+      compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+      A &addressSpace, Registers_x86_64 &registers, bool indirectStackSize);
+};
+
+template <typename A>
+int CompactUnwinder_x86_64<A>::stepWithCompactEncoding(
+    compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+    A &addressSpace, Registers_x86_64 &registers) {
+  switch (compactEncoding & UNWIND_X86_64_MODE_MASK) {
+  case UNWIND_X86_64_MODE_RBP_FRAME:
+    return stepWithCompactEncodingRBPFrame(compactEncoding, functionStart,
+                                           addressSpace, registers);
+  case UNWIND_X86_64_MODE_STACK_IMMD:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, false);
+  case UNWIND_X86_64_MODE_STACK_IND:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, true);
+  }
+  _LIBUNWIND_ABORT("invalid compact unwind encoding");
+}
+
+template <typename A>
+int CompactUnwinder_x86_64<A>::stepWithCompactEncodingRBPFrame(
+    compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+    A &addressSpace, Registers_x86_64 &registers) {
+  uint32_t savedRegistersOffset =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_OFFSET);
+  uint32_t savedRegistersLocations =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
+
+  uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset;
+  for (int i = 0; i < 5; ++i) {
+    switch (savedRegistersLocations & 0x7) {
+    case UNWIND_X86_64_REG_NONE:
+      // no register saved in this slot
+      break;
+    case UNWIND_X86_64_REG_RBX:
+      registers.setRBX(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R12:
+      registers.setR12(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R13:
+      registers.setR13(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R14:
+      registers.setR14(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R15:
+      registers.setR15(addressSpace.get64(savedRegisters));
+      break;
+    default:
+      (void)functionStart;
+      _LIBUNWIND_DEBUG_LOG("bad register for RBP frame, encoding=%08X for "
                            "function starting at 0x%llX",
-                            compactEncoding, functionStart); 
-      _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-    } 
-    savedRegisters += 8; 
-    savedRegistersLocations = (savedRegistersLocations >> 3); 
-  } 
-  frameUnwind(addressSpace, registers); 
-  return UNW_STEP_SUCCESS; 
-} 
- 
-template <typename A> 
-int CompactUnwinder_x86_64<A>::stepWithCompactEncodingFrameless( 
-    compact_unwind_encoding_t encoding, uint64_t functionStart, A &addressSpace, 
-    Registers_x86_64 &registers, bool indirectStackSize) { 
-  uint32_t stackSizeEncoded = 
-      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE); 
-  uint32_t stackAdjust = 
-      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST); 
-  uint32_t regCount = 
-      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT); 
-  uint32_t permutation = 
-      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION); 
-  uint32_t stackSize = stackSizeEncoded * 8; 
-  if (indirectStackSize) { 
-    // stack size is encoded in subl $xxx,%esp instruction 
-    uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded); 
-    stackSize = subl + 8 * stackAdjust; 
-  } 
-  // decompress permutation 
-  uint32_t permunreg[6]; 
-  switch (regCount) { 
-  case 6: 
-    permunreg[0] = permutation / 120; 
-    permutation -= (permunreg[0] * 120); 
-    permunreg[1] = permutation / 24; 
-    permutation -= (permunreg[1] * 24); 
-    permunreg[2] = permutation / 6; 
-    permutation -= (permunreg[2] * 6); 
-    permunreg[3] = permutation / 2; 
-    permutation -= (permunreg[3] * 2); 
-    permunreg[4] = permutation; 
-    permunreg[5] = 0; 
-    break; 
-  case 5: 
-    permunreg[0] = permutation / 120; 
-    permutation -= (permunreg[0] * 120); 
-    permunreg[1] = permutation / 24; 
-    permutation -= (permunreg[1] * 24); 
-    permunreg[2] = permutation / 6; 
-    permutation -= (permunreg[2] * 6); 
-    permunreg[3] = permutation / 2; 
-    permutation -= (permunreg[3] * 2); 
-    permunreg[4] = permutation; 
-    break; 
-  case 4: 
-    permunreg[0] = permutation / 60; 
-    permutation -= (permunreg[0] * 60); 
-    permunreg[1] = permutation / 12; 
-    permutation -= (permunreg[1] * 12); 
-    permunreg[2] = permutation / 3; 
-    permutation -= (permunreg[2] * 3); 
-    permunreg[3] = permutation; 
-    break; 
-  case 3: 
-    permunreg[0] = permutation / 20; 
-    permutation -= (permunreg[0] * 20); 
-    permunreg[1] = permutation / 4; 
-    permutation -= (permunreg[1] * 4); 
-    permunreg[2] = permutation; 
-    break; 
-  case 2: 
-    permunreg[0] = permutation / 5; 
-    permutation -= (permunreg[0] * 5); 
-    permunreg[1] = permutation; 
-    break; 
-  case 1: 
-    permunreg[0] = permutation; 
-    break; 
-  } 
-  // re-number registers back to standard numbers 
-  int registersSaved[6]; 
-  bool used[7] = { false, false, false, false, false, false, false }; 
-  for (uint32_t i = 0; i < regCount; ++i) { 
-    uint32_t renum = 0; 
-    for (int u = 1; u < 7; ++u) { 
-      if (!used[u]) { 
-        if (renum == permunreg[i]) { 
-          registersSaved[i] = u; 
-          used[u] = true; 
-          break; 
-        } 
-        ++renum; 
-      } 
-    } 
-  } 
-  uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount; 
-  for (uint32_t i = 0; i < regCount; ++i) { 
-    switch (registersSaved[i]) { 
-    case UNWIND_X86_64_REG_RBX: 
-      registers.setRBX(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R12: 
-      registers.setR12(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R13: 
-      registers.setR13(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R14: 
-      registers.setR14(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_R15: 
-      registers.setR15(addressSpace.get64(savedRegisters)); 
-      break; 
-    case UNWIND_X86_64_REG_RBP: 
-      registers.setRBP(addressSpace.get64(savedRegisters)); 
-      break; 
-    default: 
-      _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " 
+                            compactEncoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 8;
+    savedRegistersLocations = (savedRegistersLocations >> 3);
+  }
+  frameUnwind(addressSpace, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+template <typename A>
+int CompactUnwinder_x86_64<A>::stepWithCompactEncodingFrameless(
+    compact_unwind_encoding_t encoding, uint64_t functionStart, A &addressSpace,
+    Registers_x86_64 &registers, bool indirectStackSize) {
+  uint32_t stackSizeEncoded =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
+  uint32_t stackAdjust =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST);
+  uint32_t regCount =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT);
+  uint32_t permutation =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION);
+  uint32_t stackSize = stackSizeEncoded * 8;
+  if (indirectStackSize) {
+    // stack size is encoded in subl $xxx,%esp instruction
+    uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded);
+    stackSize = subl + 8 * stackAdjust;
+  }
+  // decompress permutation
+  uint32_t permunreg[6];
+  switch (regCount) {
+  case 6:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    permunreg[5] = 0;
+    break;
+  case 5:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    break;
+  case 4:
+    permunreg[0] = permutation / 60;
+    permutation -= (permunreg[0] * 60);
+    permunreg[1] = permutation / 12;
+    permutation -= (permunreg[1] * 12);
+    permunreg[2] = permutation / 3;
+    permutation -= (permunreg[2] * 3);
+    permunreg[3] = permutation;
+    break;
+  case 3:
+    permunreg[0] = permutation / 20;
+    permutation -= (permunreg[0] * 20);
+    permunreg[1] = permutation / 4;
+    permutation -= (permunreg[1] * 4);
+    permunreg[2] = permutation;
+    break;
+  case 2:
+    permunreg[0] = permutation / 5;
+    permutation -= (permunreg[0] * 5);
+    permunreg[1] = permutation;
+    break;
+  case 1:
+    permunreg[0] = permutation;
+    break;
+  }
+  // re-number registers back to standard numbers
+  int registersSaved[6];
+  bool used[7] = { false, false, false, false, false, false, false };
+  for (uint32_t i = 0; i < regCount; ++i) {
+    uint32_t renum = 0;
+    for (int u = 1; u < 7; ++u) {
+      if (!used[u]) {
+        if (renum == permunreg[i]) {
+          registersSaved[i] = u;
+          used[u] = true;
+          break;
+        }
+        ++renum;
+      }
+    }
+  }
+  uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount;
+  for (uint32_t i = 0; i < regCount; ++i) {
+    switch (registersSaved[i]) {
+    case UNWIND_X86_64_REG_RBX:
+      registers.setRBX(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R12:
+      registers.setR12(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R13:
+      registers.setR13(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R14:
+      registers.setR14(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_R15:
+      registers.setR15(addressSpace.get64(savedRegisters));
+      break;
+    case UNWIND_X86_64_REG_RBP:
+      registers.setRBP(addressSpace.get64(savedRegisters));
+      break;
+    default:
+      _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for "
                            "function starting at 0x%llX",
-                            encoding, functionStart); 
-      _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-    } 
-    savedRegisters += 8; 
-  } 
-  framelessUnwind(addressSpace, savedRegisters, registers); 
-  return UNW_STEP_SUCCESS; 
-} 
- 
- 
-template <typename A> 
-void CompactUnwinder_x86_64<A>::frameUnwind(A &addressSpace, 
-                                            Registers_x86_64 &registers) { 
-  uint64_t rbp = registers.getRBP(); 
-  // ebp points to old ebp 
-  registers.setRBP(addressSpace.get64(rbp)); 
-  // old esp is ebp less saved ebp and return address 
-  registers.setSP(rbp + 16); 
-  // pop return address into eip 
-  registers.setIP(addressSpace.get64(rbp + 8)); 
-} 
- 
-template <typename A> 
-void CompactUnwinder_x86_64<A>::framelessUnwind(A &addressSpace, 
-                                                uint64_t returnAddressLocation, 
-                                                Registers_x86_64 &registers) { 
-  // return address is on stack after last saved register 
-  registers.setIP(addressSpace.get64(returnAddressLocation)); 
-  // old esp is before return address 
-  registers.setSP(returnAddressLocation + 8); 
-} 
+                            encoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 8;
+  }
+  framelessUnwind(addressSpace, savedRegisters, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+
+template <typename A>
+void CompactUnwinder_x86_64<A>::frameUnwind(A &addressSpace,
+                                            Registers_x86_64 &registers) {
+  uint64_t rbp = registers.getRBP();
+  // ebp points to old ebp
+  registers.setRBP(addressSpace.get64(rbp));
+  // old esp is ebp less saved ebp and return address
+  registers.setSP(rbp + 16);
+  // pop return address into eip
+  registers.setIP(addressSpace.get64(rbp + 8));
+}
+
+template <typename A>
+void CompactUnwinder_x86_64<A>::framelessUnwind(A &addressSpace,
+                                                uint64_t returnAddressLocation,
+                                                Registers_x86_64 &registers) {
+  // return address is on stack after last saved register
+  registers.setIP(addressSpace.get64(returnAddressLocation));
+  // old esp is before return address
+  registers.setSP(returnAddressLocation + 8);
+}
 #endif // _LIBUNWIND_TARGET_X86_64
- 
- 
- 
+
+
+
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-/// CompactUnwinder_arm64 uses a compact unwind info to virtually "step" (aka 
-/// unwind) by modifying a Registers_arm64 register set 
-template <typename A> 
-class CompactUnwinder_arm64 { 
-public: 
- 
-  static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding, 
-                                     uint64_t functionStart, A &addressSpace, 
-                                     Registers_arm64 &registers); 
- 
-private: 
-  typename A::pint_t pint_t; 
- 
-  static int 
-      stepWithCompactEncodingFrame(compact_unwind_encoding_t compactEncoding, 
-                                   uint64_t functionStart, A &addressSpace, 
-                                   Registers_arm64 &registers); 
-  static int stepWithCompactEncodingFrameless( 
-      compact_unwind_encoding_t compactEncoding, uint64_t functionStart, 
-      A &addressSpace, Registers_arm64 &registers); 
-}; 
- 
-template <typename A> 
-int CompactUnwinder_arm64<A>::stepWithCompactEncoding( 
-    compact_unwind_encoding_t compactEncoding, uint64_t functionStart, 
-    A &addressSpace, Registers_arm64 &registers) { 
-  switch (compactEncoding & UNWIND_ARM64_MODE_MASK) { 
-  case UNWIND_ARM64_MODE_FRAME: 
-    return stepWithCompactEncodingFrame(compactEncoding, functionStart, 
-                                        addressSpace, registers); 
-  case UNWIND_ARM64_MODE_FRAMELESS: 
-    return stepWithCompactEncodingFrameless(compactEncoding, functionStart, 
-                                            addressSpace, registers); 
-  } 
-  _LIBUNWIND_ABORT("invalid compact unwind encoding"); 
-} 
- 
-template <typename A> 
-int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless( 
-    compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, 
-    Registers_arm64 &registers) { 
-  uint32_t stackSize = 
-      16 * EXTRACT_BITS(encoding, UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK); 
- 
-  uint64_t savedRegisterLoc = registers.getSP() + stackSize; 
- 
-  if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { 
+/// CompactUnwinder_arm64 uses a compact unwind info to virtually "step" (aka
+/// unwind) by modifying a Registers_arm64 register set
+template <typename A>
+class CompactUnwinder_arm64 {
+public:
+
+  static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding,
+                                     uint64_t functionStart, A &addressSpace,
+                                     Registers_arm64 &registers);
+
+private:
+  typename A::pint_t pint_t;
+
+  static int
+      stepWithCompactEncodingFrame(compact_unwind_encoding_t compactEncoding,
+                                   uint64_t functionStart, A &addressSpace,
+                                   Registers_arm64 &registers);
+  static int stepWithCompactEncodingFrameless(
+      compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+      A &addressSpace, Registers_arm64 &registers);
+};
+
+template <typename A>
+int CompactUnwinder_arm64<A>::stepWithCompactEncoding(
+    compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+    A &addressSpace, Registers_arm64 &registers) {
+  switch (compactEncoding & UNWIND_ARM64_MODE_MASK) {
+  case UNWIND_ARM64_MODE_FRAME:
+    return stepWithCompactEncodingFrame(compactEncoding, functionStart,
+                                        addressSpace, registers);
+  case UNWIND_ARM64_MODE_FRAMELESS:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers);
+  }
+  _LIBUNWIND_ABORT("invalid compact unwind encoding");
+}
+
+template <typename A>
+int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless(
+    compact_unwind_encoding_t encoding, uint64_t, A &addressSpace,
+    Registers_arm64 &registers) {
+  uint32_t stackSize =
+      16 * EXTRACT_BITS(encoding, UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK);
+
+  uint64_t savedRegisterLoc = registers.getSP() + stackSize;
+
+  if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
     registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X20, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
     registers.setRegister(UNW_AARCH64_X21, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X22, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
     registers.setRegister(UNW_AARCH64_X23, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X24, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
     registers.setRegister(UNW_AARCH64_X25, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X26, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
     registers.setRegister(UNW_AARCH64_X27, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X28, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
- 
-  if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+
+  if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V8,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V9,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V10,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V11,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V12,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V13,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V14,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V15,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
- 
-  // subtract stack size off of sp 
-  registers.setSP(savedRegisterLoc); 
- 
-  // set pc to be value in lr 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+
+  // subtract stack size off of sp
+  registers.setSP(savedRegisterLoc);
+
+  // set pc to be value in lr
   registers.setIP(registers.getRegister(UNW_AARCH64_LR));
- 
-  return UNW_STEP_SUCCESS; 
-} 
- 
-template <typename A> 
-int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame( 
-    compact_unwind_encoding_t encoding, uint64_t, A &addressSpace, 
-    Registers_arm64 &registers) { 
-  uint64_t savedRegisterLoc = registers.getFP() - 8; 
- 
-  if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) { 
+
+  return UNW_STEP_SUCCESS;
+}
+
+template <typename A>
+int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame(
+    compact_unwind_encoding_t encoding, uint64_t, A &addressSpace,
+    Registers_arm64 &registers) {
+  uint64_t savedRegisterLoc = registers.getFP() - 8;
+
+  if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
     registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X20, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
     registers.setRegister(UNW_AARCH64_X21, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X22, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
     registers.setRegister(UNW_AARCH64_X23, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X24, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
     registers.setRegister(UNW_AARCH64_X25, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X26, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
     registers.setRegister(UNW_AARCH64_X27, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
+    savedRegisterLoc -= 8;
     registers.setRegister(UNW_AARCH64_X28, addressSpace.get64(savedRegisterLoc));
-    savedRegisterLoc -= 8; 
-  } 
- 
-  if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) { 
+    savedRegisterLoc -= 8;
+  }
+
+  if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V8,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V9,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) { 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V10,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V11,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) { 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V12,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V13,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
-  if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) { 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
     registers.setFloatRegister(UNW_AARCH64_V14,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
     registers.setFloatRegister(UNW_AARCH64_V15,
-                               addressSpace.getDouble(savedRegisterLoc)); 
-    savedRegisterLoc -= 8; 
-  } 
- 
-  uint64_t fp = registers.getFP(); 
-  // fp points to old fp 
-  registers.setFP(addressSpace.get64(fp)); 
-  // old sp is fp less saved fp and lr 
-  registers.setSP(fp + 16); 
-  // pop return address into pc 
-  registers.setIP(addressSpace.get64(fp + 8)); 
- 
-  return UNW_STEP_SUCCESS; 
-} 
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+
+  uint64_t fp = registers.getFP();
+  // fp points to old fp
+  registers.setFP(addressSpace.get64(fp));
+  // old sp is fp less saved fp and lr
+  registers.setSP(fp + 16);
+  // pop return address into pc
+  registers.setIP(addressSpace.get64(fp + 8));
+
+  return UNW_STEP_SUCCESS;
+}
 #endif // _LIBUNWIND_TARGET_AARCH64
- 
- 
-} // namespace libunwind 
- 
-#endif // __COMPACT_UNWINDER_HPP__ 
+
+
+} // namespace libunwind
+
+#endif // __COMPACT_UNWINDER_HPP__
diff --git a/contrib/libs/libunwind/src/DwarfInstructions.hpp b/contrib/libs/libunwind/src/DwarfInstructions.hpp
index 4f61bf739c..c1a241c55c 100644
--- a/contrib/libs/libunwind/src/DwarfInstructions.hpp
+++ b/contrib/libs/libunwind/src/DwarfInstructions.hpp
@@ -1,79 +1,79 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
+//
+//
 //  Processor specific interpretation of DWARF unwind info.
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __DWARF_INSTRUCTIONS_HPP__ 
-#define __DWARF_INSTRUCTIONS_HPP__ 
- 
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
- 
-#include "dwarf2.h" 
-#include "Registers.hpp" 
-#include "DwarfParser.hpp" 
-#include "config.h" 
- 
- 
-namespace libunwind { 
- 
- 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __DWARF_INSTRUCTIONS_HPP__
+#define __DWARF_INSTRUCTIONS_HPP__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dwarf2.h"
+#include "Registers.hpp"
+#include "DwarfParser.hpp"
+#include "config.h"
+
+
+namespace libunwind {
+
+
 /// DwarfInstructions maps abtract DWARF unwind instructions to a particular
-/// architecture 
-template <typename A, typename R> 
-class DwarfInstructions { 
-public: 
-  typedef typename A::pint_t pint_t; 
-  typedef typename A::sint_t sint_t; 
- 
-  static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart, 
+/// architecture
+template <typename A, typename R>
+class DwarfInstructions {
+public:
+  typedef typename A::pint_t pint_t;
+  typedef typename A::sint_t sint_t;
+
+  static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart,
                            R &registers, bool &isSignalFrame);
- 
-private: 
- 
-  enum { 
-    DW_X86_64_RET_ADDR = 16 
-  }; 
- 
-  enum { 
-    DW_X86_RET_ADDR = 8 
-  }; 
- 
-  typedef typename CFI_Parser<A>::RegisterLocation  RegisterLocation; 
-  typedef typename CFI_Parser<A>::PrologInfo        PrologInfo; 
-  typedef typename CFI_Parser<A>::FDE_Info          FDE_Info; 
-  typedef typename CFI_Parser<A>::CIE_Info          CIE_Info; 
- 
-  static pint_t evaluateExpression(pint_t expression, A &addressSpace, 
-                                   const R &registers, 
-                                   pint_t initialStackValue); 
-  static pint_t getSavedRegister(A &addressSpace, const R &registers, 
-                                 pint_t cfa, const RegisterLocation &savedReg); 
-  static double getSavedFloatRegister(A &addressSpace, const R &registers, 
-                                  pint_t cfa, const RegisterLocation &savedReg); 
-  static v128 getSavedVectorRegister(A &addressSpace, const R &registers, 
-                                  pint_t cfa, const RegisterLocation &savedReg); 
- 
-  static pint_t getCFA(A &addressSpace, const PrologInfo &prolog, 
-                       const R &registers) { 
+
+private:
+
+  enum {
+    DW_X86_64_RET_ADDR = 16
+  };
+
+  enum {
+    DW_X86_RET_ADDR = 8
+  };
+
+  typedef typename CFI_Parser<A>::RegisterLocation  RegisterLocation;
+  typedef typename CFI_Parser<A>::PrologInfo        PrologInfo;
+  typedef typename CFI_Parser<A>::FDE_Info          FDE_Info;
+  typedef typename CFI_Parser<A>::CIE_Info          CIE_Info;
+
+  static pint_t evaluateExpression(pint_t expression, A &addressSpace,
+                                   const R &registers,
+                                   pint_t initialStackValue);
+  static pint_t getSavedRegister(A &addressSpace, const R &registers,
+                                 pint_t cfa, const RegisterLocation &savedReg);
+  static double getSavedFloatRegister(A &addressSpace, const R &registers,
+                                  pint_t cfa, const RegisterLocation &savedReg);
+  static v128 getSavedVectorRegister(A &addressSpace, const R &registers,
+                                  pint_t cfa, const RegisterLocation &savedReg);
+
+  static pint_t getCFA(A &addressSpace, const PrologInfo &prolog,
+                       const R &registers) {
     if (prolog.cfaRegister != 0)
       return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) +
-             prolog.cfaRegisterOffset); 
-    if (prolog.cfaExpression != 0) 
-      return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace,  
-                                registers, 0); 
-    assert(0 && "getCFA(): unknown location"); 
-    __builtin_unreachable(); 
-  } 
-}; 
- 
+             prolog.cfaRegisterOffset);
+    if (prolog.cfaExpression != 0)
+      return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace, 
+                                registers, 0);
+    assert(0 && "getCFA(): unknown location");
+    __builtin_unreachable();
+  }
+};
+
 template <typename R>
 auto getSparcWCookie(const R &r, int) -> decltype(r.getWCookie()) {
   return r.getWCookie();
@@ -81,108 +81,108 @@ auto getSparcWCookie(const R &r, int) -> decltype(r.getWCookie()) {
 template <typename R> uint64_t getSparcWCookie(const R &, long) {
   return 0;
 }
- 
-template <typename A, typename R> 
-typename A::pint_t DwarfInstructions<A, R>::getSavedRegister( 
-    A &addressSpace, const R &registers, pint_t cfa, 
-    const RegisterLocation &savedReg) { 
-  switch (savedReg.location) { 
-  case CFI_Parser<A>::kRegisterInCFA: 
+
+template <typename A, typename R>
+typename A::pint_t DwarfInstructions<A, R>::getSavedRegister(
+    A &addressSpace, const R &registers, pint_t cfa,
+    const RegisterLocation &savedReg) {
+  switch (savedReg.location) {
+  case CFI_Parser<A>::kRegisterInCFA:
     return (pint_t)addressSpace.getRegister(cfa + (pint_t)savedReg.value);
- 
+
   case CFI_Parser<A>::kRegisterInCFADecrypt: // sparc64 specific
     return addressSpace.getP(cfa + (pint_t)savedReg.value) ^
            getSparcWCookie(registers, 0);
 
-  case CFI_Parser<A>::kRegisterAtExpression: 
+  case CFI_Parser<A>::kRegisterAtExpression:
     return (pint_t)addressSpace.getRegister(evaluateExpression(
         (pint_t)savedReg.value, addressSpace, registers, cfa));
- 
-  case CFI_Parser<A>::kRegisterIsExpression: 
-    return evaluateExpression((pint_t)savedReg.value, addressSpace, 
-                              registers, cfa); 
- 
-  case CFI_Parser<A>::kRegisterInRegister: 
-    return registers.getRegister((int)savedReg.value); 
+
+  case CFI_Parser<A>::kRegisterIsExpression:
+    return evaluateExpression((pint_t)savedReg.value, addressSpace,
+                              registers, cfa);
+
+  case CFI_Parser<A>::kRegisterInRegister:
+    return registers.getRegister((int)savedReg.value);
   case CFI_Parser<A>::kRegisterUndefined:
     return 0;
-  case CFI_Parser<A>::kRegisterUnused: 
-  case CFI_Parser<A>::kRegisterOffsetFromCFA: 
-    // FIX ME 
-    break; 
-  } 
-  _LIBUNWIND_ABORT("unsupported restore location for register"); 
-} 
- 
-template <typename A, typename R> 
-double DwarfInstructions<A, R>::getSavedFloatRegister( 
-    A &addressSpace, const R &registers, pint_t cfa, 
-    const RegisterLocation &savedReg) { 
-  switch (savedReg.location) { 
-  case CFI_Parser<A>::kRegisterInCFA: 
-    return addressSpace.getDouble(cfa + (pint_t)savedReg.value); 
- 
-  case CFI_Parser<A>::kRegisterAtExpression: 
-    return addressSpace.getDouble( 
-        evaluateExpression((pint_t)savedReg.value, addressSpace, 
-                            registers, cfa)); 
+  case CFI_Parser<A>::kRegisterUnused:
+  case CFI_Parser<A>::kRegisterOffsetFromCFA:
+    // FIX ME
+    break;
+  }
+  _LIBUNWIND_ABORT("unsupported restore location for register");
+}
+
+template <typename A, typename R>
+double DwarfInstructions<A, R>::getSavedFloatRegister(
+    A &addressSpace, const R &registers, pint_t cfa,
+    const RegisterLocation &savedReg) {
+  switch (savedReg.location) {
+  case CFI_Parser<A>::kRegisterInCFA:
+    return addressSpace.getDouble(cfa + (pint_t)savedReg.value);
+
+  case CFI_Parser<A>::kRegisterAtExpression:
+    return addressSpace.getDouble(
+        evaluateExpression((pint_t)savedReg.value, addressSpace,
+                            registers, cfa));
   case CFI_Parser<A>::kRegisterUndefined:
     return 0.0;
   case CFI_Parser<A>::kRegisterInRegister:
 #ifndef _LIBUNWIND_TARGET_ARM
     return registers.getFloatRegister((int)savedReg.value);
 #endif
-  case CFI_Parser<A>::kRegisterIsExpression: 
-  case CFI_Parser<A>::kRegisterUnused: 
-  case CFI_Parser<A>::kRegisterOffsetFromCFA: 
+  case CFI_Parser<A>::kRegisterIsExpression:
+  case CFI_Parser<A>::kRegisterUnused:
+  case CFI_Parser<A>::kRegisterOffsetFromCFA:
   case CFI_Parser<A>::kRegisterInCFADecrypt:
-    // FIX ME 
-    break; 
-  } 
-  _LIBUNWIND_ABORT("unsupported restore location for float register"); 
-} 
- 
-template <typename A, typename R> 
-v128 DwarfInstructions<A, R>::getSavedVectorRegister( 
-    A &addressSpace, const R &registers, pint_t cfa, 
-    const RegisterLocation &savedReg) { 
-  switch (savedReg.location) { 
-  case CFI_Parser<A>::kRegisterInCFA: 
-    return addressSpace.getVector(cfa + (pint_t)savedReg.value); 
- 
-  case CFI_Parser<A>::kRegisterAtExpression: 
-    return addressSpace.getVector( 
-        evaluateExpression((pint_t)savedReg.value, addressSpace, 
-                            registers, cfa)); 
- 
-  case CFI_Parser<A>::kRegisterIsExpression: 
-  case CFI_Parser<A>::kRegisterUnused: 
+    // FIX ME
+    break;
+  }
+  _LIBUNWIND_ABORT("unsupported restore location for float register");
+}
+
+template <typename A, typename R>
+v128 DwarfInstructions<A, R>::getSavedVectorRegister(
+    A &addressSpace, const R &registers, pint_t cfa,
+    const RegisterLocation &savedReg) {
+  switch (savedReg.location) {
+  case CFI_Parser<A>::kRegisterInCFA:
+    return addressSpace.getVector(cfa + (pint_t)savedReg.value);
+
+  case CFI_Parser<A>::kRegisterAtExpression:
+    return addressSpace.getVector(
+        evaluateExpression((pint_t)savedReg.value, addressSpace,
+                            registers, cfa));
+
+  case CFI_Parser<A>::kRegisterIsExpression:
+  case CFI_Parser<A>::kRegisterUnused:
   case CFI_Parser<A>::kRegisterUndefined:
-  case CFI_Parser<A>::kRegisterOffsetFromCFA: 
-  case CFI_Parser<A>::kRegisterInRegister: 
+  case CFI_Parser<A>::kRegisterOffsetFromCFA:
+  case CFI_Parser<A>::kRegisterInRegister:
   case CFI_Parser<A>::kRegisterInCFADecrypt:
-    // FIX ME 
-    break; 
-  } 
-  _LIBUNWIND_ABORT("unsupported restore location for vector register"); 
-} 
- 
-template <typename A, typename R> 
-int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc, 
+    // FIX ME
+    break;
+  }
+  _LIBUNWIND_ABORT("unsupported restore location for vector register");
+}
+
+template <typename A, typename R>
+int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
                                            pint_t fdeStart, R &registers,
                                            bool &isSignalFrame) {
-  FDE_Info fdeInfo; 
-  CIE_Info cieInfo; 
-  if (CFI_Parser<A>::decodeFDE(addressSpace, fdeStart, &fdeInfo, 
-                               &cieInfo) == NULL) { 
-    PrologInfo prolog; 
-    if (CFI_Parser<A>::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc, 
+  FDE_Info fdeInfo;
+  CIE_Info cieInfo;
+  if (CFI_Parser<A>::decodeFDE(addressSpace, fdeStart, &fdeInfo,
+                               &cieInfo) == NULL) {
+    PrologInfo prolog;
+    if (CFI_Parser<A>::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc,
                                             R::getArch(), &prolog)) {
-      // get pointer to cfa (architecture specific) 
-      pint_t cfa = getCFA(addressSpace, prolog, registers); 
- 
+      // get pointer to cfa (architecture specific)
+      pint_t cfa = getCFA(addressSpace, prolog, registers);
+
        // restore registers that DWARF says were saved
-      R newRegisters = registers; 
+      R newRegisters = registers;
 
       // Typically, the CFA is the stack pointer at the call site in
       // the previous frame. However, there are scenarios in which this is not
@@ -193,39 +193,39 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
       // by a CFI directive later on.
       newRegisters.setSP(cfa);
 
-      pint_t returnAddress = 0; 
-      const int lastReg = R::lastDwarfRegNum(); 
+      pint_t returnAddress = 0;
+      const int lastReg = R::lastDwarfRegNum();
       assert(static_cast<int>(CFI_Parser<A>::kMaxRegisterNumber) >= lastReg &&
-             "register range too large"); 
-      assert(lastReg >= (int)cieInfo.returnAddressRegister && 
-             "register range does not contain return address register"); 
-      for (int i = 0; i <= lastReg; ++i) { 
-        if (prolog.savedRegisters[i].location != 
-            CFI_Parser<A>::kRegisterUnused) { 
-          if (registers.validFloatRegister(i)) 
-            newRegisters.setFloatRegister( 
-                i, getSavedFloatRegister(addressSpace, registers, cfa, 
-                                         prolog.savedRegisters[i])); 
-          else if (registers.validVectorRegister(i)) 
-            newRegisters.setVectorRegister( 
-                i, getSavedVectorRegister(addressSpace, registers, cfa, 
-                                          prolog.savedRegisters[i])); 
-          else if (i == (int)cieInfo.returnAddressRegister) 
-            returnAddress = getSavedRegister(addressSpace, registers, cfa, 
-                                             prolog.savedRegisters[i]); 
-          else if (registers.validRegister(i)) 
-            newRegisters.setRegister( 
-                i, getSavedRegister(addressSpace, registers, cfa, 
-                                    prolog.savedRegisters[i])); 
-          else 
-            return UNW_EBADREG; 
+             "register range too large");
+      assert(lastReg >= (int)cieInfo.returnAddressRegister &&
+             "register range does not contain return address register");
+      for (int i = 0; i <= lastReg; ++i) {
+        if (prolog.savedRegisters[i].location !=
+            CFI_Parser<A>::kRegisterUnused) {
+          if (registers.validFloatRegister(i))
+            newRegisters.setFloatRegister(
+                i, getSavedFloatRegister(addressSpace, registers, cfa,
+                                         prolog.savedRegisters[i]));
+          else if (registers.validVectorRegister(i))
+            newRegisters.setVectorRegister(
+                i, getSavedVectorRegister(addressSpace, registers, cfa,
+                                          prolog.savedRegisters[i]));
+          else if (i == (int)cieInfo.returnAddressRegister)
+            returnAddress = getSavedRegister(addressSpace, registers, cfa,
+                                             prolog.savedRegisters[i]);
+          else if (registers.validRegister(i))
+            newRegisters.setRegister(
+                i, getSavedRegister(addressSpace, registers, cfa,
+                                    prolog.savedRegisters[i]));
+          else
+            return UNW_EBADREG;
         } else if (i == (int)cieInfo.returnAddressRegister) {
             // Leaf function keeps the return address in register and there is no
             // explicit intructions how to restore it.
             returnAddress = registers.getRegister(cieInfo.returnAddressRegister);
-        } 
-      } 
- 
+        }
+      }
+
       isSignalFrame = cieInfo.isSignalFrame;
 
 #if defined(_LIBUNWIND_TARGET_AARCH64)
@@ -310,562 +310,562 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
       }
 #endif
 
-      // Return address is address after call site instruction, so setting IP to 
-      // that does simualates a return. 
-      newRegisters.setIP(returnAddress); 
- 
-      // Simulate the step by replacing the register set with the new ones. 
-      registers = newRegisters; 
- 
-      return UNW_STEP_SUCCESS; 
-    } 
-  } 
-  return UNW_EBADFRAME; 
-} 
- 
-template <typename A, typename R> 
-typename A::pint_t 
-DwarfInstructions<A, R>::evaluateExpression(pint_t expression, A &addressSpace, 
-                                            const R &registers, 
-                                            pint_t initialStackValue) { 
-  const bool log = false; 
-  pint_t p = expression; 
-  pint_t expressionEnd = expression + 20; // temp, until len read 
-  pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd); 
-  expressionEnd = p + length; 
-  if (log) 
-    fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n", 
-            (uint64_t)length); 
-  pint_t stack[100]; 
-  pint_t *sp = stack; 
-  *(++sp) = initialStackValue; 
- 
-  while (p < expressionEnd) { 
-    if (log) { 
-      for (pint_t *t = sp; t > stack; --t) { 
-        fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t)); 
-      } 
-    } 
-    uint8_t opcode = addressSpace.get8(p++); 
-    sint_t svalue, svalue2; 
-    pint_t value; 
-    uint32_t reg; 
-    switch (opcode) { 
-    case DW_OP_addr: 
-      // push immediate address sized value 
-      value = addressSpace.getP(p); 
-      p += sizeof(pint_t); 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_deref: 
-      // pop stack, dereference, push result 
-      value = *sp--; 
-      *(++sp) = addressSpace.getP(value); 
-      if (log) 
-        fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_const1u: 
-      // push immediate 1 byte value 
-      value = addressSpace.get8(p); 
-      p += 1; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_const1s: 
-      // push immediate 1 byte signed value 
-      svalue = (int8_t) addressSpace.get8(p); 
-      p += 1; 
-      *(++sp) = (pint_t)svalue; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_const2u: 
-      // push immediate 2 byte value 
-      value = addressSpace.get16(p); 
-      p += 2; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_const2s: 
-      // push immediate 2 byte signed value 
-      svalue = (int16_t) addressSpace.get16(p); 
-      p += 2; 
-      *(++sp) = (pint_t)svalue; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_const4u: 
-      // push immediate 4 byte value 
-      value = addressSpace.get32(p); 
-      p += 4; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_const4s: 
-      // push immediate 4 byte signed value 
-      svalue = (int32_t)addressSpace.get32(p); 
-      p += 4; 
-      *(++sp) = (pint_t)svalue; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_const8u: 
-      // push immediate 8 byte value 
-      value = (pint_t)addressSpace.get64(p); 
-      p += 8; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_const8s: 
-      // push immediate 8 byte signed value 
-      value = (pint_t)addressSpace.get64(p); 
-      p += 8; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_constu: 
-      // push immediate ULEB128 value 
-      value = (pint_t)addressSpace.getULEB128(p, expressionEnd); 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_consts: 
-      // push immediate SLEB128 value 
-      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); 
-      *(++sp) = (pint_t)svalue; 
-      if (log) 
-        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_dup: 
-      // push top of stack 
-      value = *sp; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "duplicate top of stack\n"); 
-      break; 
- 
-    case DW_OP_drop: 
-      // pop 
-      --sp; 
-      if (log) 
-        fprintf(stderr, "pop top of stack\n"); 
-      break; 
- 
-    case DW_OP_over: 
-      // dup second 
-      value = sp[-1]; 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "duplicate second in stack\n"); 
-      break; 
- 
-    case DW_OP_pick: 
-      // pick from 
-      reg = addressSpace.get8(p); 
-      p += 1; 
+      // Return address is address after call site instruction, so setting IP to
+      // that does simualates a return.
+      newRegisters.setIP(returnAddress);
+
+      // Simulate the step by replacing the register set with the new ones.
+      registers = newRegisters;
+
+      return UNW_STEP_SUCCESS;
+    }
+  }
+  return UNW_EBADFRAME;
+}
+
+template <typename A, typename R>
+typename A::pint_t
+DwarfInstructions<A, R>::evaluateExpression(pint_t expression, A &addressSpace,
+                                            const R &registers,
+                                            pint_t initialStackValue) {
+  const bool log = false;
+  pint_t p = expression;
+  pint_t expressionEnd = expression + 20; // temp, until len read
+  pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd);
+  expressionEnd = p + length;
+  if (log)
+    fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n",
+            (uint64_t)length);
+  pint_t stack[100];
+  pint_t *sp = stack;
+  *(++sp) = initialStackValue;
+
+  while (p < expressionEnd) {
+    if (log) {
+      for (pint_t *t = sp; t > stack; --t) {
+        fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t));
+      }
+    }
+    uint8_t opcode = addressSpace.get8(p++);
+    sint_t svalue, svalue2;
+    pint_t value;
+    uint32_t reg;
+    switch (opcode) {
+    case DW_OP_addr:
+      // push immediate address sized value
+      value = addressSpace.getP(p);
+      p += sizeof(pint_t);
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_deref:
+      // pop stack, dereference, push result
+      value = *sp--;
+      *(++sp) = addressSpace.getP(value);
+      if (log)
+        fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const1u:
+      // push immediate 1 byte value
+      value = addressSpace.get8(p);
+      p += 1;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const1s:
+      // push immediate 1 byte signed value
+      svalue = (int8_t) addressSpace.get8(p);
+      p += 1;
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_const2u:
+      // push immediate 2 byte value
+      value = addressSpace.get16(p);
+      p += 2;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const2s:
+      // push immediate 2 byte signed value
+      svalue = (int16_t) addressSpace.get16(p);
+      p += 2;
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_const4u:
+      // push immediate 4 byte value
+      value = addressSpace.get32(p);
+      p += 4;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const4s:
+      // push immediate 4 byte signed value
+      svalue = (int32_t)addressSpace.get32(p);
+      p += 4;
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_const8u:
+      // push immediate 8 byte value
+      value = (pint_t)addressSpace.get64(p);
+      p += 8;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const8s:
+      // push immediate 8 byte signed value
+      value = (pint_t)addressSpace.get64(p);
+      p += 8;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_constu:
+      // push immediate ULEB128 value
+      value = (pint_t)addressSpace.getULEB128(p, expressionEnd);
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_consts:
+      // push immediate SLEB128 value
+      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_dup:
+      // push top of stack
+      value = *sp;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "duplicate top of stack\n");
+      break;
+
+    case DW_OP_drop:
+      // pop
+      --sp;
+      if (log)
+        fprintf(stderr, "pop top of stack\n");
+      break;
+
+    case DW_OP_over:
+      // dup second
+      value = sp[-1];
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "duplicate second in stack\n");
+      break;
+
+    case DW_OP_pick:
+      // pick from
+      reg = addressSpace.get8(p);
+      p += 1;
       value = sp[-(int)reg];
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "duplicate %d in stack\n", reg); 
-      break; 
- 
-    case DW_OP_swap: 
-      // swap top two 
-      value = sp[0]; 
-      sp[0] = sp[-1]; 
-      sp[-1] = value; 
-      if (log) 
-        fprintf(stderr, "swap top of stack\n"); 
-      break; 
- 
-    case DW_OP_rot: 
-      // rotate top three 
-      value = sp[0]; 
-      sp[0] = sp[-1]; 
-      sp[-1] = sp[-2]; 
-      sp[-2] = value; 
-      if (log) 
-        fprintf(stderr, "rotate top three of stack\n"); 
-      break; 
- 
-    case DW_OP_xderef: 
-      // pop stack, dereference, push result 
-      value = *sp--; 
-      *sp = *((pint_t*)value); 
-      if (log) 
-        fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_abs: 
-      svalue = (sint_t)*sp; 
-      if (svalue < 0) 
-        *sp = (pint_t)(-svalue); 
-      if (log) 
-        fprintf(stderr, "abs\n"); 
-      break; 
- 
-    case DW_OP_and: 
-      value = *sp--; 
-      *sp &= value; 
-      if (log) 
-        fprintf(stderr, "and\n"); 
-      break; 
- 
-    case DW_OP_div: 
-      svalue = (sint_t)(*sp--); 
-      svalue2 = (sint_t)*sp; 
-      *sp = (pint_t)(svalue2 / svalue); 
-      if (log) 
-        fprintf(stderr, "div\n"); 
-      break; 
- 
-    case DW_OP_minus: 
-      value = *sp--; 
-      *sp = *sp - value; 
-      if (log) 
-        fprintf(stderr, "minus\n"); 
-      break; 
- 
-    case DW_OP_mod: 
-      svalue = (sint_t)(*sp--); 
-      svalue2 = (sint_t)*sp; 
-      *sp = (pint_t)(svalue2 % svalue); 
-      if (log) 
-        fprintf(stderr, "module\n"); 
-      break; 
- 
-    case DW_OP_mul: 
-      svalue = (sint_t)(*sp--); 
-      svalue2 = (sint_t)*sp; 
-      *sp = (pint_t)(svalue2 * svalue); 
-      if (log) 
-        fprintf(stderr, "mul\n"); 
-      break; 
- 
-    case DW_OP_neg: 
-      *sp = 0 - *sp; 
-      if (log) 
-        fprintf(stderr, "neg\n"); 
-      break; 
- 
-    case DW_OP_not: 
-      svalue = (sint_t)(*sp); 
-      *sp = (pint_t)(~svalue); 
-      if (log) 
-        fprintf(stderr, "not\n"); 
-      break; 
- 
-    case DW_OP_or: 
-      value = *sp--; 
-      *sp |= value; 
-      if (log) 
-        fprintf(stderr, "or\n"); 
-      break; 
- 
-    case DW_OP_plus: 
-      value = *sp--; 
-      *sp += value; 
-      if (log) 
-        fprintf(stderr, "plus\n"); 
-      break; 
- 
-    case DW_OP_plus_uconst: 
-      // pop stack, add uelb128 constant, push result 
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "duplicate %d in stack\n", reg);
+      break;
+
+    case DW_OP_swap:
+      // swap top two
+      value = sp[0];
+      sp[0] = sp[-1];
+      sp[-1] = value;
+      if (log)
+        fprintf(stderr, "swap top of stack\n");
+      break;
+
+    case DW_OP_rot:
+      // rotate top three
+      value = sp[0];
+      sp[0] = sp[-1];
+      sp[-1] = sp[-2];
+      sp[-2] = value;
+      if (log)
+        fprintf(stderr, "rotate top three of stack\n");
+      break;
+
+    case DW_OP_xderef:
+      // pop stack, dereference, push result
+      value = *sp--;
+      *sp = *((pint_t*)value);
+      if (log)
+        fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_abs:
+      svalue = (sint_t)*sp;
+      if (svalue < 0)
+        *sp = (pint_t)(-svalue);
+      if (log)
+        fprintf(stderr, "abs\n");
+      break;
+
+    case DW_OP_and:
+      value = *sp--;
+      *sp &= value;
+      if (log)
+        fprintf(stderr, "and\n");
+      break;
+
+    case DW_OP_div:
+      svalue = (sint_t)(*sp--);
+      svalue2 = (sint_t)*sp;
+      *sp = (pint_t)(svalue2 / svalue);
+      if (log)
+        fprintf(stderr, "div\n");
+      break;
+
+    case DW_OP_minus:
+      value = *sp--;
+      *sp = *sp - value;
+      if (log)
+        fprintf(stderr, "minus\n");
+      break;
+
+    case DW_OP_mod:
+      svalue = (sint_t)(*sp--);
+      svalue2 = (sint_t)*sp;
+      *sp = (pint_t)(svalue2 % svalue);
+      if (log)
+        fprintf(stderr, "module\n");
+      break;
+
+    case DW_OP_mul:
+      svalue = (sint_t)(*sp--);
+      svalue2 = (sint_t)*sp;
+      *sp = (pint_t)(svalue2 * svalue);
+      if (log)
+        fprintf(stderr, "mul\n");
+      break;
+
+    case DW_OP_neg:
+      *sp = 0 - *sp;
+      if (log)
+        fprintf(stderr, "neg\n");
+      break;
+
+    case DW_OP_not:
+      svalue = (sint_t)(*sp);
+      *sp = (pint_t)(~svalue);
+      if (log)
+        fprintf(stderr, "not\n");
+      break;
+
+    case DW_OP_or:
+      value = *sp--;
+      *sp |= value;
+      if (log)
+        fprintf(stderr, "or\n");
+      break;
+
+    case DW_OP_plus:
+      value = *sp--;
+      *sp += value;
+      if (log)
+        fprintf(stderr, "plus\n");
+      break;
+
+    case DW_OP_plus_uconst:
+      // pop stack, add uelb128 constant, push result
       *sp += static_cast<pint_t>(addressSpace.getULEB128(p, expressionEnd));
-      if (log) 
-        fprintf(stderr, "add constant\n"); 
-      break; 
- 
-    case DW_OP_shl: 
-      value = *sp--; 
-      *sp = *sp << value; 
-      if (log) 
-        fprintf(stderr, "shift left\n"); 
-      break; 
- 
-    case DW_OP_shr: 
-      value = *sp--; 
-      *sp = *sp >> value; 
-      if (log) 
-        fprintf(stderr, "shift left\n"); 
-      break; 
- 
-    case DW_OP_shra: 
-      value = *sp--; 
-      svalue = (sint_t)*sp; 
-      *sp = (pint_t)(svalue >> value); 
-      if (log) 
-        fprintf(stderr, "shift left arithmetric\n"); 
-      break; 
- 
-    case DW_OP_xor: 
-      value = *sp--; 
-      *sp ^= value; 
-      if (log) 
-        fprintf(stderr, "xor\n"); 
-      break; 
- 
-    case DW_OP_skip: 
-      svalue = (int16_t) addressSpace.get16(p); 
-      p += 2; 
-      p = (pint_t)((sint_t)p + svalue); 
-      if (log) 
-        fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_bra: 
-      svalue = (int16_t) addressSpace.get16(p); 
-      p += 2; 
-      if (*sp--) 
-        p = (pint_t)((sint_t)p + svalue); 
-      if (log) 
-        fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_eq: 
-      value = *sp--; 
-      *sp = (*sp == value); 
-      if (log) 
-        fprintf(stderr, "eq\n"); 
-      break; 
- 
-    case DW_OP_ge: 
-      value = *sp--; 
-      *sp = (*sp >= value); 
-      if (log) 
-        fprintf(stderr, "ge\n"); 
-      break; 
- 
-    case DW_OP_gt: 
-      value = *sp--; 
-      *sp = (*sp > value); 
-      if (log) 
-        fprintf(stderr, "gt\n"); 
-      break; 
- 
-    case DW_OP_le: 
-      value = *sp--; 
-      *sp = (*sp <= value); 
-      if (log) 
-        fprintf(stderr, "le\n"); 
-      break; 
- 
-    case DW_OP_lt: 
-      value = *sp--; 
-      *sp = (*sp < value); 
-      if (log) 
-        fprintf(stderr, "lt\n"); 
-      break; 
- 
-    case DW_OP_ne: 
-      value = *sp--; 
-      *sp = (*sp != value); 
-      if (log) 
-        fprintf(stderr, "ne\n"); 
-      break; 
- 
-    case DW_OP_lit0: 
-    case DW_OP_lit1: 
-    case DW_OP_lit2: 
-    case DW_OP_lit3: 
-    case DW_OP_lit4: 
-    case DW_OP_lit5: 
-    case DW_OP_lit6: 
-    case DW_OP_lit7: 
-    case DW_OP_lit8: 
-    case DW_OP_lit9: 
-    case DW_OP_lit10: 
-    case DW_OP_lit11: 
-    case DW_OP_lit12: 
-    case DW_OP_lit13: 
-    case DW_OP_lit14: 
-    case DW_OP_lit15: 
-    case DW_OP_lit16: 
-    case DW_OP_lit17: 
-    case DW_OP_lit18: 
-    case DW_OP_lit19: 
-    case DW_OP_lit20: 
-    case DW_OP_lit21: 
-    case DW_OP_lit22: 
-    case DW_OP_lit23: 
-    case DW_OP_lit24: 
-    case DW_OP_lit25: 
-    case DW_OP_lit26: 
-    case DW_OP_lit27: 
-    case DW_OP_lit28: 
-    case DW_OP_lit29: 
-    case DW_OP_lit30: 
-    case DW_OP_lit31: 
-      value = static_cast<pint_t>(opcode - DW_OP_lit0); 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_reg0: 
-    case DW_OP_reg1: 
-    case DW_OP_reg2: 
-    case DW_OP_reg3: 
-    case DW_OP_reg4: 
-    case DW_OP_reg5: 
-    case DW_OP_reg6: 
-    case DW_OP_reg7: 
-    case DW_OP_reg8: 
-    case DW_OP_reg9: 
-    case DW_OP_reg10: 
-    case DW_OP_reg11: 
-    case DW_OP_reg12: 
-    case DW_OP_reg13: 
-    case DW_OP_reg14: 
-    case DW_OP_reg15: 
-    case DW_OP_reg16: 
-    case DW_OP_reg17: 
-    case DW_OP_reg18: 
-    case DW_OP_reg19: 
-    case DW_OP_reg20: 
-    case DW_OP_reg21: 
-    case DW_OP_reg22: 
-    case DW_OP_reg23: 
-    case DW_OP_reg24: 
-    case DW_OP_reg25: 
-    case DW_OP_reg26: 
-    case DW_OP_reg27: 
-    case DW_OP_reg28: 
-    case DW_OP_reg29: 
-    case DW_OP_reg30: 
-    case DW_OP_reg31: 
-      reg = static_cast<uint32_t>(opcode - DW_OP_reg0); 
-      *(++sp) = registers.getRegister((int)reg); 
-      if (log) 
-        fprintf(stderr, "push reg %d\n", reg); 
-      break; 
- 
-    case DW_OP_regx: 
-      reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd)); 
-      *(++sp) = registers.getRegister((int)reg); 
-      if (log) 
-        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_breg0: 
-    case DW_OP_breg1: 
-    case DW_OP_breg2: 
-    case DW_OP_breg3: 
-    case DW_OP_breg4: 
-    case DW_OP_breg5: 
-    case DW_OP_breg6: 
-    case DW_OP_breg7: 
-    case DW_OP_breg8: 
-    case DW_OP_breg9: 
-    case DW_OP_breg10: 
-    case DW_OP_breg11: 
-    case DW_OP_breg12: 
-    case DW_OP_breg13: 
-    case DW_OP_breg14: 
-    case DW_OP_breg15: 
-    case DW_OP_breg16: 
-    case DW_OP_breg17: 
-    case DW_OP_breg18: 
-    case DW_OP_breg19: 
-    case DW_OP_breg20: 
-    case DW_OP_breg21: 
-    case DW_OP_breg22: 
-    case DW_OP_breg23: 
-    case DW_OP_breg24: 
-    case DW_OP_breg25: 
-    case DW_OP_breg26: 
-    case DW_OP_breg27: 
-    case DW_OP_breg28: 
-    case DW_OP_breg29: 
-    case DW_OP_breg30: 
-    case DW_OP_breg31: 
-      reg = static_cast<uint32_t>(opcode - DW_OP_breg0); 
-      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); 
-      svalue += static_cast<sint_t>(registers.getRegister((int)reg)); 
-      *(++sp) = (pint_t)(svalue); 
-      if (log) 
-        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_bregx: 
-      reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd)); 
-      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd); 
-      svalue += static_cast<sint_t>(registers.getRegister((int)reg)); 
-      *(++sp) = (pint_t)(svalue); 
-      if (log) 
-        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue); 
-      break; 
- 
-    case DW_OP_fbreg: 
-      _LIBUNWIND_ABORT("DW_OP_fbreg not implemented"); 
-      break; 
- 
-    case DW_OP_piece: 
-      _LIBUNWIND_ABORT("DW_OP_piece not implemented"); 
-      break; 
- 
-    case DW_OP_deref_size: 
-      // pop stack, dereference, push result 
-      value = *sp--; 
-      switch (addressSpace.get8(p++)) { 
-      case 1: 
-        value = addressSpace.get8(value); 
-        break; 
-      case 2: 
-        value = addressSpace.get16(value); 
-        break; 
-      case 4: 
-        value = addressSpace.get32(value); 
-        break; 
-      case 8: 
-        value = (pint_t)addressSpace.get64(value); 
-        break; 
-      default: 
-        _LIBUNWIND_ABORT("DW_OP_deref_size with bad size"); 
-      } 
-      *(++sp) = value; 
-      if (log) 
-        fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value); 
-      break; 
- 
-    case DW_OP_xderef_size: 
-    case DW_OP_nop: 
-    case DW_OP_push_object_addres: 
-    case DW_OP_call2: 
-    case DW_OP_call4: 
-    case DW_OP_call_ref: 
-    default: 
+      if (log)
+        fprintf(stderr, "add constant\n");
+      break;
+
+    case DW_OP_shl:
+      value = *sp--;
+      *sp = *sp << value;
+      if (log)
+        fprintf(stderr, "shift left\n");
+      break;
+
+    case DW_OP_shr:
+      value = *sp--;
+      *sp = *sp >> value;
+      if (log)
+        fprintf(stderr, "shift left\n");
+      break;
+
+    case DW_OP_shra:
+      value = *sp--;
+      svalue = (sint_t)*sp;
+      *sp = (pint_t)(svalue >> value);
+      if (log)
+        fprintf(stderr, "shift left arithmetric\n");
+      break;
+
+    case DW_OP_xor:
+      value = *sp--;
+      *sp ^= value;
+      if (log)
+        fprintf(stderr, "xor\n");
+      break;
+
+    case DW_OP_skip:
+      svalue = (int16_t) addressSpace.get16(p);
+      p += 2;
+      p = (pint_t)((sint_t)p + svalue);
+      if (log)
+        fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_bra:
+      svalue = (int16_t) addressSpace.get16(p);
+      p += 2;
+      if (*sp--)
+        p = (pint_t)((sint_t)p + svalue);
+      if (log)
+        fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_eq:
+      value = *sp--;
+      *sp = (*sp == value);
+      if (log)
+        fprintf(stderr, "eq\n");
+      break;
+
+    case DW_OP_ge:
+      value = *sp--;
+      *sp = (*sp >= value);
+      if (log)
+        fprintf(stderr, "ge\n");
+      break;
+
+    case DW_OP_gt:
+      value = *sp--;
+      *sp = (*sp > value);
+      if (log)
+        fprintf(stderr, "gt\n");
+      break;
+
+    case DW_OP_le:
+      value = *sp--;
+      *sp = (*sp <= value);
+      if (log)
+        fprintf(stderr, "le\n");
+      break;
+
+    case DW_OP_lt:
+      value = *sp--;
+      *sp = (*sp < value);
+      if (log)
+        fprintf(stderr, "lt\n");
+      break;
+
+    case DW_OP_ne:
+      value = *sp--;
+      *sp = (*sp != value);
+      if (log)
+        fprintf(stderr, "ne\n");
+      break;
+
+    case DW_OP_lit0:
+    case DW_OP_lit1:
+    case DW_OP_lit2:
+    case DW_OP_lit3:
+    case DW_OP_lit4:
+    case DW_OP_lit5:
+    case DW_OP_lit6:
+    case DW_OP_lit7:
+    case DW_OP_lit8:
+    case DW_OP_lit9:
+    case DW_OP_lit10:
+    case DW_OP_lit11:
+    case DW_OP_lit12:
+    case DW_OP_lit13:
+    case DW_OP_lit14:
+    case DW_OP_lit15:
+    case DW_OP_lit16:
+    case DW_OP_lit17:
+    case DW_OP_lit18:
+    case DW_OP_lit19:
+    case DW_OP_lit20:
+    case DW_OP_lit21:
+    case DW_OP_lit22:
+    case DW_OP_lit23:
+    case DW_OP_lit24:
+    case DW_OP_lit25:
+    case DW_OP_lit26:
+    case DW_OP_lit27:
+    case DW_OP_lit28:
+    case DW_OP_lit29:
+    case DW_OP_lit30:
+    case DW_OP_lit31:
+      value = static_cast<pint_t>(opcode - DW_OP_lit0);
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_reg0:
+    case DW_OP_reg1:
+    case DW_OP_reg2:
+    case DW_OP_reg3:
+    case DW_OP_reg4:
+    case DW_OP_reg5:
+    case DW_OP_reg6:
+    case DW_OP_reg7:
+    case DW_OP_reg8:
+    case DW_OP_reg9:
+    case DW_OP_reg10:
+    case DW_OP_reg11:
+    case DW_OP_reg12:
+    case DW_OP_reg13:
+    case DW_OP_reg14:
+    case DW_OP_reg15:
+    case DW_OP_reg16:
+    case DW_OP_reg17:
+    case DW_OP_reg18:
+    case DW_OP_reg19:
+    case DW_OP_reg20:
+    case DW_OP_reg21:
+    case DW_OP_reg22:
+    case DW_OP_reg23:
+    case DW_OP_reg24:
+    case DW_OP_reg25:
+    case DW_OP_reg26:
+    case DW_OP_reg27:
+    case DW_OP_reg28:
+    case DW_OP_reg29:
+    case DW_OP_reg30:
+    case DW_OP_reg31:
+      reg = static_cast<uint32_t>(opcode - DW_OP_reg0);
+      *(++sp) = registers.getRegister((int)reg);
+      if (log)
+        fprintf(stderr, "push reg %d\n", reg);
+      break;
+
+    case DW_OP_regx:
+      reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd));
+      *(++sp) = registers.getRegister((int)reg);
+      if (log)
+        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
+      break;
+
+    case DW_OP_breg0:
+    case DW_OP_breg1:
+    case DW_OP_breg2:
+    case DW_OP_breg3:
+    case DW_OP_breg4:
+    case DW_OP_breg5:
+    case DW_OP_breg6:
+    case DW_OP_breg7:
+    case DW_OP_breg8:
+    case DW_OP_breg9:
+    case DW_OP_breg10:
+    case DW_OP_breg11:
+    case DW_OP_breg12:
+    case DW_OP_breg13:
+    case DW_OP_breg14:
+    case DW_OP_breg15:
+    case DW_OP_breg16:
+    case DW_OP_breg17:
+    case DW_OP_breg18:
+    case DW_OP_breg19:
+    case DW_OP_breg20:
+    case DW_OP_breg21:
+    case DW_OP_breg22:
+    case DW_OP_breg23:
+    case DW_OP_breg24:
+    case DW_OP_breg25:
+    case DW_OP_breg26:
+    case DW_OP_breg27:
+    case DW_OP_breg28:
+    case DW_OP_breg29:
+    case DW_OP_breg30:
+    case DW_OP_breg31:
+      reg = static_cast<uint32_t>(opcode - DW_OP_breg0);
+      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
+      svalue += static_cast<sint_t>(registers.getRegister((int)reg));
+      *(++sp) = (pint_t)(svalue);
+      if (log)
+        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
+      break;
+
+    case DW_OP_bregx:
+      reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd));
+      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
+      svalue += static_cast<sint_t>(registers.getRegister((int)reg));
+      *(++sp) = (pint_t)(svalue);
+      if (log)
+        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
+      break;
+
+    case DW_OP_fbreg:
+      _LIBUNWIND_ABORT("DW_OP_fbreg not implemented");
+      break;
+
+    case DW_OP_piece:
+      _LIBUNWIND_ABORT("DW_OP_piece not implemented");
+      break;
+
+    case DW_OP_deref_size:
+      // pop stack, dereference, push result
+      value = *sp--;
+      switch (addressSpace.get8(p++)) {
+      case 1:
+        value = addressSpace.get8(value);
+        break;
+      case 2:
+        value = addressSpace.get16(value);
+        break;
+      case 4:
+        value = addressSpace.get32(value);
+        break;
+      case 8:
+        value = (pint_t)addressSpace.get64(value);
+        break;
+      default:
+        _LIBUNWIND_ABORT("DW_OP_deref_size with bad size");
+      }
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_xderef_size:
+    case DW_OP_nop:
+    case DW_OP_push_object_addres:
+    case DW_OP_call2:
+    case DW_OP_call4:
+    case DW_OP_call_ref:
+    default:
       _LIBUNWIND_ABORT("DWARF opcode not implemented");
-    } 
- 
-  } 
-  if (log) 
-    fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp); 
-  return *sp; 
-} 
- 
- 
- 
-} // namespace libunwind 
- 
-#endif // __DWARF_INSTRUCTIONS_HPP__ 
+    }
+
+  }
+  if (log)
+    fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp);
+  return *sp;
+}
+
+
+
+} // namespace libunwind
+
+#endif // __DWARF_INSTRUCTIONS_HPP__
diff --git a/contrib/libs/libunwind/src/DwarfParser.hpp b/contrib/libs/libunwind/src/DwarfParser.hpp
index 8f764923e7..b5a53166fc 100644
--- a/contrib/libs/libunwind/src/DwarfParser.hpp
+++ b/contrib/libs/libunwind/src/DwarfParser.hpp
@@ -1,94 +1,94 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Parses DWARF CFIs (FDEs and CIEs). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __DWARF_PARSER_HPP__ 
-#define __DWARF_PARSER_HPP__ 
- 
-#include <inttypes.h> 
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
- 
-#include "libunwind.h" 
-#include "dwarf2.h" 
+//
+//
+//  Parses DWARF CFIs (FDEs and CIEs).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __DWARF_PARSER_HPP__
+#define __DWARF_PARSER_HPP__
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libunwind.h"
+#include "dwarf2.h"
 #include "Registers.hpp"
- 
+
 #include "config.h"
- 
-namespace libunwind { 
 
-/// CFI_Parser does basic parsing of a CFI (Call Frame Information) records. 
+namespace libunwind {
+
+/// CFI_Parser does basic parsing of a CFI (Call Frame Information) records.
 /// See DWARF Spec for details:
-///    http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 
-/// 
-template <typename A> 
-class CFI_Parser { 
-public: 
-  typedef typename A::pint_t pint_t; 
- 
-  /// Information encoded in a CIE (Common Information Entry) 
-  struct CIE_Info { 
-    pint_t    cieStart; 
-    pint_t    cieLength; 
-    pint_t    cieInstructions; 
-    uint8_t   pointerEncoding; 
-    uint8_t   lsdaEncoding; 
-    uint8_t   personalityEncoding; 
-    uint8_t   personalityOffsetInCIE; 
-    pint_t    personality; 
-    uint32_t  codeAlignFactor; 
-    int       dataAlignFactor; 
-    bool      isSignalFrame; 
-    bool      fdesHaveAugmentationData; 
-    uint8_t   returnAddressRegister; 
+///    http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+///
+template <typename A>
+class CFI_Parser {
+public:
+  typedef typename A::pint_t pint_t;
+
+  /// Information encoded in a CIE (Common Information Entry)
+  struct CIE_Info {
+    pint_t    cieStart;
+    pint_t    cieLength;
+    pint_t    cieInstructions;
+    uint8_t   pointerEncoding;
+    uint8_t   lsdaEncoding;
+    uint8_t   personalityEncoding;
+    uint8_t   personalityOffsetInCIE;
+    pint_t    personality;
+    uint32_t  codeAlignFactor;
+    int       dataAlignFactor;
+    bool      isSignalFrame;
+    bool      fdesHaveAugmentationData;
+    uint8_t   returnAddressRegister;
 #if defined(_LIBUNWIND_TARGET_AARCH64)
     bool      addressesSignedWithBKey;
 #endif
-  }; 
- 
-  /// Information about an FDE (Frame Description Entry) 
-  struct FDE_Info { 
-    pint_t  fdeStart; 
-    pint_t  fdeLength; 
-    pint_t  fdeInstructions; 
-    pint_t  pcStart; 
-    pint_t  pcEnd; 
-    pint_t  lsda; 
-  }; 
- 
-  enum { 
+  };
+
+  /// Information about an FDE (Frame Description Entry)
+  struct FDE_Info {
+    pint_t  fdeStart;
+    pint_t  fdeLength;
+    pint_t  fdeInstructions;
+    pint_t  pcStart;
+    pint_t  pcEnd;
+    pint_t  lsda;
+  };
+
+  enum {
     kMaxRegisterNumber = _LIBUNWIND_HIGHEST_DWARF_REGISTER
-  }; 
-  enum RegisterSavedWhere { 
-    kRegisterUnused, 
+  };
+  enum RegisterSavedWhere {
+    kRegisterUnused,
     kRegisterUndefined,
-    kRegisterInCFA, 
+    kRegisterInCFA,
     kRegisterInCFADecrypt, // sparc64 specific
-    kRegisterOffsetFromCFA, 
-    kRegisterInRegister, 
-    kRegisterAtExpression, 
-    kRegisterIsExpression 
-  }; 
-  struct RegisterLocation { 
-    RegisterSavedWhere location; 
+    kRegisterOffsetFromCFA,
+    kRegisterInRegister,
+    kRegisterAtExpression,
+    kRegisterIsExpression
+  };
+  struct RegisterLocation {
+    RegisterSavedWhere location;
     bool initialStateSaved;
-    int64_t value; 
-  }; 
-  /// Information about a frame layout and registers saved determined 
+    int64_t value;
+  };
+  /// Information about a frame layout and registers saved determined
   /// by "running" the DWARF FDE "instructions"
-  struct PrologInfo { 
-    uint32_t          cfaRegister; 
-    int32_t           cfaRegisterOffset;  // CFA = (cfaRegister)+cfaRegisterOffset 
-    int64_t           cfaExpression;      // CFA = expression 
-    uint32_t          spExtraArgSize; 
+  struct PrologInfo {
+    uint32_t          cfaRegister;
+    int32_t           cfaRegisterOffset;  // CFA = (cfaRegister)+cfaRegisterOffset
+    int64_t           cfaExpression;      // CFA = expression
+    uint32_t          spExtraArgSize;
     RegisterLocation  savedRegisters[kMaxRegisterNumber + 1];
     enum class InitializeTime { kLazy, kNormal };
 
@@ -124,15 +124,15 @@ public:
         savedRegisters[reg] = initialState.savedRegisters[reg];
       // else the register still holds its initial state
     }
-  }; 
- 
-  struct PrologInfoStackEntry { 
-    PrologInfoStackEntry(PrologInfoStackEntry *n, const PrologInfo &i) 
-        : next(n), info(i) {} 
-    PrologInfoStackEntry *next; 
-    PrologInfo info; 
-  }; 
- 
+  };
+
+  struct PrologInfoStackEntry {
+    PrologInfoStackEntry(PrologInfoStackEntry *n, const PrologInfo &i)
+        : next(n), info(i) {}
+    PrologInfoStackEntry *next;
+    PrologInfo info;
+  };
+
   struct RememberStack {
     PrologInfoStackEntry *entry;
     RememberStack() : entry(nullptr) {}
@@ -151,42 +151,42 @@ public:
     }
   };
 
-  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, 
+  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
                       size_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo,
-                      CIE_Info *cieInfo); 
-  static const char *decodeFDE(A &addressSpace, pint_t fdeStart, 
+                      CIE_Info *cieInfo);
+  static const char *decodeFDE(A &addressSpace, pint_t fdeStart,
                                FDE_Info *fdeInfo, CIE_Info *cieInfo,
                                bool useCIEInfo = false);
-  static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo, 
-                                   const CIE_Info &cieInfo, pint_t upToPC, 
+  static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo,
+                                   const CIE_Info &cieInfo, pint_t upToPC,
                                    int arch, PrologInfo *results);
- 
-  static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo); 
-}; 
- 
+
+  static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo);
+};
+
 /// Parse a FDE into a CIE_Info and an FDE_Info. If useCIEInfo is
 /// true, treat cieInfo as already-parsed CIE_Info (whose start offset
 /// must match the one specified by the FDE) rather than parsing the
 /// one indicated within the FDE.
-template <typename A> 
-const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart, 
+template <typename A>
+const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart,
                                      FDE_Info *fdeInfo, CIE_Info *cieInfo,
                                      bool useCIEInfo) {
-  pint_t p = fdeStart; 
-  pint_t cfiLength = (pint_t)addressSpace.get32(p); 
-  p += 4; 
-  if (cfiLength == 0xffffffff) { 
-    // 0xffffffff means length is really next 8 bytes 
-    cfiLength = (pint_t)addressSpace.get64(p); 
-    p += 8; 
-  } 
-  if (cfiLength == 0) 
+  pint_t p = fdeStart;
+  pint_t cfiLength = (pint_t)addressSpace.get32(p);
+  p += 4;
+  if (cfiLength == 0xffffffff) {
+    // 0xffffffff means length is really next 8 bytes
+    cfiLength = (pint_t)addressSpace.get64(p);
+    p += 8;
+  }
+  if (cfiLength == 0)
     return "FDE has zero length"; // zero terminator
-  uint32_t ciePointer = addressSpace.get32(p); 
+  uint32_t ciePointer = addressSpace.get32(p);
   if (ciePointer == 0)
     return "FDE is really a CIE"; // this is a CIE not an FDE
-  pint_t nextCFI = p + cfiLength; 
-  pint_t cieStart = p - ciePointer; 
+  pint_t nextCFI = p + cfiLength;
+  pint_t cieStart = p - ciePointer;
   if (useCIEInfo) {
     if (cieInfo->cieStart != cieStart)
       return "CIE start does not match";
@@ -195,242 +195,242 @@ const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart,
     if (err != NULL)
       return err;
   }
-  p += 4; 
+  p += 4;
   // Parse pc begin and range.
-  pint_t pcStart = 
-      addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); 
-  pint_t pcRange = 
-      addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding & 0x0F); 
+  pint_t pcStart =
+      addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding);
+  pint_t pcRange =
+      addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding & 0x0F);
   // Parse rest of info.
-  fdeInfo->lsda = 0; 
+  fdeInfo->lsda = 0;
   // Check for augmentation length.
-  if (cieInfo->fdesHaveAugmentationData) { 
-    pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); 
-    pint_t endOfAug = p + augLen; 
-    if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { 
+  if (cieInfo->fdesHaveAugmentationData) {
+    pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI);
+    pint_t endOfAug = p + augLen;
+    if (cieInfo->lsdaEncoding != DW_EH_PE_omit) {
       // Peek at value (without indirection).  Zero means no LSDA.
-      pint_t lsdaStart = p; 
-      if (addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 
-          0) { 
+      pint_t lsdaStart = p;
+      if (addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding & 0x0F) !=
+          0) {
         // Reset pointer and re-parse LSDA address.
-        p = lsdaStart; 
-        fdeInfo->lsda = 
-            addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); 
-      } 
-    } 
-    p = endOfAug; 
-  } 
-  fdeInfo->fdeStart = fdeStart; 
-  fdeInfo->fdeLength = nextCFI - fdeStart; 
-  fdeInfo->fdeInstructions = p; 
-  fdeInfo->pcStart = pcStart; 
-  fdeInfo->pcEnd = pcStart + pcRange; 
-  return NULL; // success 
-} 
- 
-/// Scan an eh_frame section to find an FDE for a pc 
-template <typename A> 
-bool CFI_Parser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart, 
+        p = lsdaStart;
+        fdeInfo->lsda =
+            addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding);
+      }
+    }
+    p = endOfAug;
+  }
+  fdeInfo->fdeStart = fdeStart;
+  fdeInfo->fdeLength = nextCFI - fdeStart;
+  fdeInfo->fdeInstructions = p;
+  fdeInfo->pcStart = pcStart;
+  fdeInfo->pcEnd = pcStart + pcRange;
+  return NULL; // success
+}
+
+/// Scan an eh_frame section to find an FDE for a pc
+template <typename A>
+bool CFI_Parser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
                             size_t sectionLength, pint_t fdeHint,
-                            FDE_Info *fdeInfo, CIE_Info *cieInfo) { 
-  //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc); 
-  pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart; 
+                            FDE_Info *fdeInfo, CIE_Info *cieInfo) {
+  //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc);
+  pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart;
   const pint_t ehSectionEnd = (sectionLength == SIZE_MAX)
                                   ? static_cast<pint_t>(-1)
                                   : (ehSectionStart + sectionLength);
-  while (p < ehSectionEnd) { 
-    pint_t currentCFI = p; 
-    //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p); 
-    pint_t cfiLength = addressSpace.get32(p); 
-    p += 4; 
-    if (cfiLength == 0xffffffff) { 
-      // 0xffffffff means length is really next 8 bytes 
-      cfiLength = (pint_t)addressSpace.get64(p); 
-      p += 8; 
-    } 
-    if (cfiLength == 0) 
+  while (p < ehSectionEnd) {
+    pint_t currentCFI = p;
+    //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p);
+    pint_t cfiLength = addressSpace.get32(p);
+    p += 4;
+    if (cfiLength == 0xffffffff) {
+      // 0xffffffff means length is really next 8 bytes
+      cfiLength = (pint_t)addressSpace.get64(p);
+      p += 8;
+    }
+    if (cfiLength == 0)
       return false; // zero terminator
-    uint32_t id = addressSpace.get32(p); 
-    if (id == 0) { 
+    uint32_t id = addressSpace.get32(p);
+    if (id == 0) {
       // Skip over CIEs.
-      p += cfiLength; 
-    } else { 
+      p += cfiLength;
+    } else {
       // Process FDE to see if it covers pc.
-      pint_t nextCFI = p + cfiLength; 
-      uint32_t ciePointer = addressSpace.get32(p); 
-      pint_t cieStart = p - ciePointer; 
+      pint_t nextCFI = p + cfiLength;
+      uint32_t ciePointer = addressSpace.get32(p);
+      pint_t cieStart = p - ciePointer;
       // Validate pointer to CIE is within section.
-      if ((ehSectionStart <= cieStart) && (cieStart < ehSectionEnd)) { 
-        if (parseCIE(addressSpace, cieStart, cieInfo) == NULL) { 
-          p += 4; 
+      if ((ehSectionStart <= cieStart) && (cieStart < ehSectionEnd)) {
+        if (parseCIE(addressSpace, cieStart, cieInfo) == NULL) {
+          p += 4;
           // Parse pc begin and range.
-          pint_t pcStart = 
-              addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding); 
-          pint_t pcRange = addressSpace.getEncodedP( 
-              p, nextCFI, cieInfo->pointerEncoding & 0x0F); 
+          pint_t pcStart =
+              addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding);
+          pint_t pcRange = addressSpace.getEncodedP(
+              p, nextCFI, cieInfo->pointerEncoding & 0x0F);
           // Test if pc is within the function this FDE covers.
-          if ((pcStart < pc) && (pc <= pcStart + pcRange)) { 
-            // parse rest of info 
-            fdeInfo->lsda = 0; 
-            // check for augmentation length 
-            if (cieInfo->fdesHaveAugmentationData) { 
-              pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI); 
-              pint_t endOfAug = p + augLen; 
-              if (cieInfo->lsdaEncoding != DW_EH_PE_omit) { 
+          if ((pcStart < pc) && (pc <= pcStart + pcRange)) {
+            // parse rest of info
+            fdeInfo->lsda = 0;
+            // check for augmentation length
+            if (cieInfo->fdesHaveAugmentationData) {
+              pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI);
+              pint_t endOfAug = p + augLen;
+              if (cieInfo->lsdaEncoding != DW_EH_PE_omit) {
                 // Peek at value (without indirection).  Zero means no LSDA.
-                pint_t lsdaStart = p; 
-                if (addressSpace.getEncodedP( 
-                        p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 0) { 
+                pint_t lsdaStart = p;
+                if (addressSpace.getEncodedP(
+                        p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 0) {
                   // Reset pointer and re-parse LSDA address.
-                  p = lsdaStart; 
-                  fdeInfo->lsda = addressSpace 
-                      .getEncodedP(p, nextCFI, cieInfo->lsdaEncoding); 
-                } 
-              } 
-              p = endOfAug; 
-            } 
-            fdeInfo->fdeStart = currentCFI; 
-            fdeInfo->fdeLength = nextCFI - currentCFI; 
-            fdeInfo->fdeInstructions = p; 
-            fdeInfo->pcStart = pcStart; 
-            fdeInfo->pcEnd = pcStart + pcRange; 
-            return true; 
-          } else { 
-            // pc is not in begin/range, skip this FDE 
-          } 
-        } else { 
+                  p = lsdaStart;
+                  fdeInfo->lsda = addressSpace
+                      .getEncodedP(p, nextCFI, cieInfo->lsdaEncoding);
+                }
+              }
+              p = endOfAug;
+            }
+            fdeInfo->fdeStart = currentCFI;
+            fdeInfo->fdeLength = nextCFI - currentCFI;
+            fdeInfo->fdeInstructions = p;
+            fdeInfo->pcStart = pcStart;
+            fdeInfo->pcEnd = pcStart + pcRange;
+            return true;
+          } else {
+            // pc is not in begin/range, skip this FDE
+          }
+        } else {
           // Malformed CIE, now augmentation describing pc range encoding.
-        } 
-      } else { 
-        // malformed FDE.  CIE is bad 
-      } 
-      p = nextCFI; 
-    } 
-  } 
-  return false; 
-} 
- 
-/// Extract info from a CIE 
-template <typename A> 
-const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie, 
-                                    CIE_Info *cieInfo) { 
-  cieInfo->pointerEncoding = 0; 
-  cieInfo->lsdaEncoding = DW_EH_PE_omit; 
-  cieInfo->personalityEncoding = 0; 
-  cieInfo->personalityOffsetInCIE = 0; 
-  cieInfo->personality = 0; 
-  cieInfo->codeAlignFactor = 0; 
-  cieInfo->dataAlignFactor = 0; 
-  cieInfo->isSignalFrame = false; 
-  cieInfo->fdesHaveAugmentationData = false; 
+        }
+      } else {
+        // malformed FDE.  CIE is bad
+      }
+      p = nextCFI;
+    }
+  }
+  return false;
+}
+
+/// Extract info from a CIE
+template <typename A>
+const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie,
+                                    CIE_Info *cieInfo) {
+  cieInfo->pointerEncoding = 0;
+  cieInfo->lsdaEncoding = DW_EH_PE_omit;
+  cieInfo->personalityEncoding = 0;
+  cieInfo->personalityOffsetInCIE = 0;
+  cieInfo->personality = 0;
+  cieInfo->codeAlignFactor = 0;
+  cieInfo->dataAlignFactor = 0;
+  cieInfo->isSignalFrame = false;
+  cieInfo->fdesHaveAugmentationData = false;
 #if defined(_LIBUNWIND_TARGET_AARCH64)
   cieInfo->addressesSignedWithBKey = false;
 #endif
-  cieInfo->cieStart = cie; 
-  pint_t p = cie; 
-  pint_t cieLength = (pint_t)addressSpace.get32(p); 
-  p += 4; 
-  pint_t cieContentEnd = p + cieLength; 
-  if (cieLength == 0xffffffff) { 
-    // 0xffffffff means length is really next 8 bytes 
-    cieLength = (pint_t)addressSpace.get64(p); 
-    p += 8; 
-    cieContentEnd = p + cieLength; 
-  } 
-  if (cieLength == 0) 
-    return NULL; 
-  // CIE ID is always 0 
-  if (addressSpace.get32(p) != 0) 
-    return "CIE ID is not zero"; 
-  p += 4; 
-  // Version is always 1 or 3 
-  uint8_t version = addressSpace.get8(p); 
-  if ((version != 1) && (version != 3)) 
-    return "CIE version is not 1 or 3"; 
-  ++p; 
-  // save start of augmentation string and find end 
-  pint_t strStart = p; 
-  while (addressSpace.get8(p) != 0) 
-    ++p; 
-  ++p; 
-  // parse code aligment factor 
-  cieInfo->codeAlignFactor = (uint32_t)addressSpace.getULEB128(p, cieContentEnd); 
-  // parse data alignment factor 
-  cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd); 
-  // parse return address register 
+  cieInfo->cieStart = cie;
+  pint_t p = cie;
+  pint_t cieLength = (pint_t)addressSpace.get32(p);
+  p += 4;
+  pint_t cieContentEnd = p + cieLength;
+  if (cieLength == 0xffffffff) {
+    // 0xffffffff means length is really next 8 bytes
+    cieLength = (pint_t)addressSpace.get64(p);
+    p += 8;
+    cieContentEnd = p + cieLength;
+  }
+  if (cieLength == 0)
+    return NULL;
+  // CIE ID is always 0
+  if (addressSpace.get32(p) != 0)
+    return "CIE ID is not zero";
+  p += 4;
+  // Version is always 1 or 3
+  uint8_t version = addressSpace.get8(p);
+  if ((version != 1) && (version != 3))
+    return "CIE version is not 1 or 3";
+  ++p;
+  // save start of augmentation string and find end
+  pint_t strStart = p;
+  while (addressSpace.get8(p) != 0)
+    ++p;
+  ++p;
+  // parse code aligment factor
+  cieInfo->codeAlignFactor = (uint32_t)addressSpace.getULEB128(p, cieContentEnd);
+  // parse data alignment factor
+  cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd);
+  // parse return address register
   uint64_t raReg = (version == 1) ? addressSpace.get8(p++)
                                   : addressSpace.getULEB128(p, cieContentEnd);
-  assert(raReg < 255 && "return address register too large"); 
-  cieInfo->returnAddressRegister = (uint8_t)raReg; 
-  // parse augmentation data based on augmentation string 
-  const char *result = NULL; 
-  if (addressSpace.get8(strStart) == 'z') { 
-    // parse augmentation data length 
-    addressSpace.getULEB128(p, cieContentEnd); 
-    for (pint_t s = strStart; addressSpace.get8(s) != '\0'; ++s) { 
-      switch (addressSpace.get8(s)) { 
-      case 'z': 
-        cieInfo->fdesHaveAugmentationData = true; 
-        break; 
-      case 'P': 
-        cieInfo->personalityEncoding = addressSpace.get8(p); 
-        ++p; 
-        cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie); 
-        cieInfo->personality = addressSpace 
-            .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding); 
-        break; 
-      case 'L': 
-        cieInfo->lsdaEncoding = addressSpace.get8(p); 
-        ++p; 
-        break; 
-      case 'R': 
-        cieInfo->pointerEncoding = addressSpace.get8(p); 
-        ++p; 
-        break; 
-      case 'S': 
-        cieInfo->isSignalFrame = true; 
-        break; 
+  assert(raReg < 255 && "return address register too large");
+  cieInfo->returnAddressRegister = (uint8_t)raReg;
+  // parse augmentation data based on augmentation string
+  const char *result = NULL;
+  if (addressSpace.get8(strStart) == 'z') {
+    // parse augmentation data length
+    addressSpace.getULEB128(p, cieContentEnd);
+    for (pint_t s = strStart; addressSpace.get8(s) != '\0'; ++s) {
+      switch (addressSpace.get8(s)) {
+      case 'z':
+        cieInfo->fdesHaveAugmentationData = true;
+        break;
+      case 'P':
+        cieInfo->personalityEncoding = addressSpace.get8(p);
+        ++p;
+        cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie);
+        cieInfo->personality = addressSpace
+            .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding);
+        break;
+      case 'L':
+        cieInfo->lsdaEncoding = addressSpace.get8(p);
+        ++p;
+        break;
+      case 'R':
+        cieInfo->pointerEncoding = addressSpace.get8(p);
+        ++p;
+        break;
+      case 'S':
+        cieInfo->isSignalFrame = true;
+        break;
 #if defined(_LIBUNWIND_TARGET_AARCH64)
       case 'B':
         cieInfo->addressesSignedWithBKey = true;
         break;
 #endif
-      default: 
-        // ignore unknown letters 
-        break; 
-      } 
-    } 
-  } 
-  cieInfo->cieLength = cieContentEnd - cieInfo->cieStart; 
-  cieInfo->cieInstructions = p; 
-  return result; 
-} 
- 
- 
+      default:
+        // ignore unknown letters
+        break;
+      }
+    }
+  }
+  cieInfo->cieLength = cieContentEnd - cieInfo->cieStart;
+  cieInfo->cieInstructions = p;
+  return result;
+}
+
+
 /// "run" the DWARF instructions and create the abstact PrologInfo for an FDE
-template <typename A> 
-bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace, 
-                                         const FDE_Info &fdeInfo, 
-                                         const CIE_Info &cieInfo, pint_t upToPC, 
+template <typename A>
+bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
+                                         const FDE_Info &fdeInfo,
+                                         const CIE_Info &cieInfo, pint_t upToPC,
                                          int arch, PrologInfo *results) {
   // Alloca is used for the allocation of the rememberStack entries. It removes
   // the dependency on new/malloc but the below for loop can not be refactored
   // into functions. Entry could be saved during the processing of a CIE and
   // restored by an FDE.
   RememberStack rememberStack;
- 
+
   struct ParseInfo {
     pint_t instructions;
     pint_t instructionsEnd;
     pint_t pcoffset;
   };
- 
+
   ParseInfo parseInfoArray[] = {
       {cieInfo.cieInstructions, cieInfo.cieStart + cieInfo.cieLength,
        (pint_t)(-1)},
       {fdeInfo.fdeInstructions, fdeInfo.fdeStart + fdeInfo.fdeLength,
        upToPC - fdeInfo.pcStart}};
- 
+
   for (const auto &info : parseInfoArray) {
     pint_t p = info.instructions;
     pint_t instructionsEnd = info.instructionsEnd;
@@ -588,7 +588,7 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
         _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa(reg=%" PRIu64 ", offset=%" PRIu64
                                ")\n",
                                reg, offset);
-        break; 
+        break;
       case DW_CFA_def_cfa_register:
         reg = addressSpace.getULEB128(p, instructionsEnd);
         if (reg > kMaxRegisterNumber) {
@@ -598,7 +598,7 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
         }
         results->cfaRegister = (uint32_t)reg;
         _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_register(%" PRIu64 ")\n", reg);
-        break; 
+        break;
       case DW_CFA_def_cfa_offset:
         results->cfaRegisterOffset =
             (int32_t)addressSpace.getULEB128(p, instructionsEnd);
@@ -631,7 +631,7 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
                                "expression=0x%" PRIx64 ", "
                                "length=%" PRIu64 ")\n",
                                reg, results->savedRegisters[reg].value, length);
-        break; 
+        break;
       case DW_CFA_offset_extended_sf:
         reg = addressSpace.getULEB128(p, instructionsEnd);
         if (reg > kMaxRegisterNumber) {
@@ -798,7 +798,7 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
         (void)arch;
 #endif
 
-      default: 
+      default:
         operand = opcode & 0x3F;
         switch (opcode & 0xC0) {
         case DW_CFA_offset:
@@ -837,12 +837,12 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
           _LIBUNWIND_TRACE_DWARF("unknown CFA opcode 0x%02X\n", opcode);
           return false;
         }
-      } 
-    } 
-  } 
-  return true; 
-} 
- 
-} // namespace libunwind 
- 
-#endif // __DWARF_PARSER_HPP__ 
+      }
+    }
+  }
+  return true;
+}
+
+} // namespace libunwind
+
+#endif // __DWARF_PARSER_HPP__
diff --git a/contrib/libs/libunwind/src/EHHeaderParser.hpp b/contrib/libs/libunwind/src/EHHeaderParser.hpp
index 188cb93269..9a38070fab 100644
--- a/contrib/libs/libunwind/src/EHHeaderParser.hpp
+++ b/contrib/libs/libunwind/src/EHHeaderParser.hpp
@@ -1,169 +1,169 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Parses ELF .eh_frame_hdr sections. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __EHHEADERPARSER_HPP__ 
-#define __EHHEADERPARSER_HPP__ 
- 
-#include "libunwind.h" 
- 
-#include "DwarfParser.hpp" 
- 
-namespace libunwind { 
- 
-/// \brief EHHeaderParser does basic parsing of an ELF .eh_frame_hdr section. 
-/// 
-/// See DWARF spec for details: 
-///    http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 
-/// 
-template <typename A> class EHHeaderParser { 
-public: 
-  typedef typename A::pint_t pint_t; 
- 
-  /// Information encoded in the EH frame header. 
-  struct EHHeaderInfo { 
-    pint_t eh_frame_ptr; 
-    size_t fde_count; 
-    pint_t table; 
-    uint8_t table_enc; 
-  }; 
- 
+//
+//
+//  Parses ELF .eh_frame_hdr sections.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __EHHEADERPARSER_HPP__
+#define __EHHEADERPARSER_HPP__
+
+#include "libunwind.h"
+
+#include "DwarfParser.hpp"
+
+namespace libunwind {
+
+/// \brief EHHeaderParser does basic parsing of an ELF .eh_frame_hdr section.
+///
+/// See DWARF spec for details:
+///    http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+///
+template <typename A> class EHHeaderParser {
+public:
+  typedef typename A::pint_t pint_t;
+
+  /// Information encoded in the EH frame header.
+  struct EHHeaderInfo {
+    pint_t eh_frame_ptr;
+    size_t fde_count;
+    pint_t table;
+    uint8_t table_enc;
+  };
+
   static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd,
-                          EHHeaderInfo &ehHdrInfo); 
-  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, 
-                      uint32_t sectionLength, 
-                      typename CFI_Parser<A>::FDE_Info *fdeInfo, 
-                      typename CFI_Parser<A>::CIE_Info *cieInfo); 
- 
-private: 
-  static bool decodeTableEntry(A &addressSpace, pint_t &tableEntry, 
-                               pint_t ehHdrStart, pint_t ehHdrEnd, 
-                               uint8_t tableEnc, 
-                               typename CFI_Parser<A>::FDE_Info *fdeInfo, 
-                               typename CFI_Parser<A>::CIE_Info *cieInfo); 
-  static size_t getTableEntrySize(uint8_t tableEnc); 
-}; 
- 
-template <typename A> 
+                          EHHeaderInfo &ehHdrInfo);
+  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
+                      uint32_t sectionLength,
+                      typename CFI_Parser<A>::FDE_Info *fdeInfo,
+                      typename CFI_Parser<A>::CIE_Info *cieInfo);
+
+private:
+  static bool decodeTableEntry(A &addressSpace, pint_t &tableEntry,
+                               pint_t ehHdrStart, pint_t ehHdrEnd,
+                               uint8_t tableEnc,
+                               typename CFI_Parser<A>::FDE_Info *fdeInfo,
+                               typename CFI_Parser<A>::CIE_Info *cieInfo);
+  static size_t getTableEntrySize(uint8_t tableEnc);
+};
+
+template <typename A>
 bool EHHeaderParser<A>::decodeEHHdr(A &addressSpace, pint_t ehHdrStart,
-                                    pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) { 
-  pint_t p = ehHdrStart; 
-  uint8_t version = addressSpace.get8(p++); 
+                                    pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) {
+  pint_t p = ehHdrStart;
+  uint8_t version = addressSpace.get8(p++);
   if (version != 1) {
     _LIBUNWIND_LOG0("Unsupported .eh_frame_hdr version");
     return false;
   }
- 
-  uint8_t eh_frame_ptr_enc = addressSpace.get8(p++); 
-  uint8_t fde_count_enc = addressSpace.get8(p++); 
-  ehHdrInfo.table_enc = addressSpace.get8(p++); 
- 
-  ehHdrInfo.eh_frame_ptr = 
-      addressSpace.getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart); 
-  ehHdrInfo.fde_count = 
+
+  uint8_t eh_frame_ptr_enc = addressSpace.get8(p++);
+  uint8_t fde_count_enc = addressSpace.get8(p++);
+  ehHdrInfo.table_enc = addressSpace.get8(p++);
+
+  ehHdrInfo.eh_frame_ptr =
+      addressSpace.getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart);
+  ehHdrInfo.fde_count =
       fde_count_enc == DW_EH_PE_omit
           ? 0
           : addressSpace.getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart);
-  ehHdrInfo.table = p; 
+  ehHdrInfo.table = p;
 
   return true;
-} 
- 
-template <typename A> 
-bool EHHeaderParser<A>::decodeTableEntry( 
-    A &addressSpace, pint_t &tableEntry, pint_t ehHdrStart, pint_t ehHdrEnd, 
-    uint8_t tableEnc, typename CFI_Parser<A>::FDE_Info *fdeInfo, 
-    typename CFI_Parser<A>::CIE_Info *cieInfo) { 
-  // Have to decode the whole FDE for the PC range anyway, so just throw away 
-  // the PC start. 
-  addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); 
-  pint_t fde = 
-      addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart); 
-  const char *message = 
-      CFI_Parser<A>::decodeFDE(addressSpace, fde, fdeInfo, cieInfo); 
-  if (message != NULL) { 
+}
+
+template <typename A>
+bool EHHeaderParser<A>::decodeTableEntry(
+    A &addressSpace, pint_t &tableEntry, pint_t ehHdrStart, pint_t ehHdrEnd,
+    uint8_t tableEnc, typename CFI_Parser<A>::FDE_Info *fdeInfo,
+    typename CFI_Parser<A>::CIE_Info *cieInfo) {
+  // Have to decode the whole FDE for the PC range anyway, so just throw away
+  // the PC start.
+  addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart);
+  pint_t fde =
+      addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart);
+  const char *message =
+      CFI_Parser<A>::decodeFDE(addressSpace, fde, fdeInfo, cieInfo);
+  if (message != NULL) {
     _LIBUNWIND_DEBUG_LOG("EHHeaderParser::decodeTableEntry: bad fde: %s",
-                         message); 
-    return false; 
-  } 
- 
-  return true; 
-} 
- 
-template <typename A> 
-bool EHHeaderParser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart, 
-                                uint32_t sectionLength, 
-                                typename CFI_Parser<A>::FDE_Info *fdeInfo, 
-                                typename CFI_Parser<A>::CIE_Info *cieInfo) { 
-  pint_t ehHdrEnd = ehHdrStart + sectionLength; 
- 
-  EHHeaderParser<A>::EHHeaderInfo hdrInfo; 
+                         message);
+    return false;
+  }
+
+  return true;
+}
+
+template <typename A>
+bool EHHeaderParser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
+                                uint32_t sectionLength,
+                                typename CFI_Parser<A>::FDE_Info *fdeInfo,
+                                typename CFI_Parser<A>::CIE_Info *cieInfo) {
+  pint_t ehHdrEnd = ehHdrStart + sectionLength;
+
+  EHHeaderParser<A>::EHHeaderInfo hdrInfo;
   if (!EHHeaderParser<A>::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd,
                                       hdrInfo))
     return false;
- 
+
   if (hdrInfo.fde_count == 0) return false;
 
-  size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc); 
-  pint_t tableEntry; 
- 
-  size_t low = 0; 
-  for (size_t len = hdrInfo.fde_count; len > 1;) { 
-    size_t mid = low + (len / 2); 
-    tableEntry = hdrInfo.table + mid * tableEntrySize; 
-    pint_t start = addressSpace.getEncodedP(tableEntry, ehHdrEnd, 
-                                            hdrInfo.table_enc, ehHdrStart); 
- 
-    if (start == pc) { 
-      low = mid; 
-      break; 
-    } else if (start < pc) { 
-      low = mid; 
-      len -= (len / 2); 
-    } else { 
-      len /= 2; 
-    } 
-  } 
- 
-  tableEntry = hdrInfo.table + low * tableEntrySize; 
-  if (decodeTableEntry(addressSpace, tableEntry, ehHdrStart, ehHdrEnd, 
-                       hdrInfo.table_enc, fdeInfo, cieInfo)) { 
-    if (pc >= fdeInfo->pcStart && pc < fdeInfo->pcEnd) 
-      return true; 
-  } 
- 
-  return false; 
-} 
- 
-template <typename A> 
-size_t EHHeaderParser<A>::getTableEntrySize(uint8_t tableEnc) { 
-  switch (tableEnc & 0x0f) { 
-  case DW_EH_PE_sdata2: 
-  case DW_EH_PE_udata2: 
-    return 4; 
-  case DW_EH_PE_sdata4: 
-  case DW_EH_PE_udata4: 
-    return 8; 
-  case DW_EH_PE_sdata8: 
-  case DW_EH_PE_udata8: 
-    return 16; 
-  case DW_EH_PE_sleb128: 
-  case DW_EH_PE_uleb128: 
-    _LIBUNWIND_ABORT("Can't binary search on variable length encoded data."); 
-  case DW_EH_PE_omit: 
-    return 0; 
-  default: 
-    _LIBUNWIND_ABORT("Unknown DWARF encoding for search table."); 
-  } 
-} 
- 
-} 
- 
-#endif 
+  size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc);
+  pint_t tableEntry;
+
+  size_t low = 0;
+  for (size_t len = hdrInfo.fde_count; len > 1;) {
+    size_t mid = low + (len / 2);
+    tableEntry = hdrInfo.table + mid * tableEntrySize;
+    pint_t start = addressSpace.getEncodedP(tableEntry, ehHdrEnd,
+                                            hdrInfo.table_enc, ehHdrStart);
+
+    if (start == pc) {
+      low = mid;
+      break;
+    } else if (start < pc) {
+      low = mid;
+      len -= (len / 2);
+    } else {
+      len /= 2;
+    }
+  }
+
+  tableEntry = hdrInfo.table + low * tableEntrySize;
+  if (decodeTableEntry(addressSpace, tableEntry, ehHdrStart, ehHdrEnd,
+                       hdrInfo.table_enc, fdeInfo, cieInfo)) {
+    if (pc >= fdeInfo->pcStart && pc < fdeInfo->pcEnd)
+      return true;
+  }
+
+  return false;
+}
+
+template <typename A>
+size_t EHHeaderParser<A>::getTableEntrySize(uint8_t tableEnc) {
+  switch (tableEnc & 0x0f) {
+  case DW_EH_PE_sdata2:
+  case DW_EH_PE_udata2:
+    return 4;
+  case DW_EH_PE_sdata4:
+  case DW_EH_PE_udata4:
+    return 8;
+  case DW_EH_PE_sdata8:
+  case DW_EH_PE_udata8:
+    return 16;
+  case DW_EH_PE_sleb128:
+  case DW_EH_PE_uleb128:
+    _LIBUNWIND_ABORT("Can't binary search on variable length encoded data.");
+  case DW_EH_PE_omit:
+    return 0;
+  default:
+    _LIBUNWIND_ABORT("Unknown DWARF encoding for search table.");
+  }
+}
+
+}
+
+#endif
diff --git a/contrib/libs/libunwind/src/Registers.hpp b/contrib/libs/libunwind/src/Registers.hpp
index 3d03b815cf..cbc3876d67 100644
--- a/contrib/libs/libunwind/src/Registers.hpp
+++ b/contrib/libs/libunwind/src/Registers.hpp
@@ -1,29 +1,29 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Models register sets for supported processors. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __REGISTERS_HPP__ 
-#define __REGISTERS_HPP__ 
- 
-#include <stdint.h> 
-#include <string.h> 
- 
+//
+//
+//  Models register sets for supported processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __REGISTERS_HPP__
+#define __REGISTERS_HPP__
+
+#include <stdint.h>
+#include <string.h>
+
 #include "cet_unwind.h"
 #include "config.h"
-#include "libunwind.h" 
- 
-namespace libunwind { 
- 
-// For emulating 128-bit registers 
-struct v128 { uint32_t vec[4]; }; 
- 
+#include "libunwind.h"
+
+namespace libunwind {
+
+// For emulating 128-bit registers
+struct v128 { uint32_t vec[4]; };
+
 enum {
   REGISTERS_X86,
   REGISTERS_X86_64,
@@ -40,7 +40,7 @@ enum {
   REGISTERS_RISCV,
   REGISTERS_VE,
 };
- 
+
 #if defined(_LIBUNWIND_TARGET_I386)
 class _LIBUNWIND_HIDDEN Registers_x86;
 extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *);
@@ -51,215 +51,215 @@ extern "C" void *__libunwind_cet_get_jump_target() {
 }
 #endif
 
-/// Registers_x86 holds the register state of a thread in a 32-bit intel 
-/// process. 
-class _LIBUNWIND_HIDDEN Registers_x86 { 
-public: 
-  Registers_x86(); 
-  Registers_x86(const void *registers); 
- 
-  bool        validRegister(int num) const; 
-  uint32_t    getRegister(int num) const; 
-  void        setRegister(int num, uint32_t value); 
-  bool        validFloatRegister(int) const { return false; } 
-  double      getFloatRegister(int num) const; 
-  void        setFloatRegister(int num, double value); 
-  bool        validVectorRegister(int) const { return false; } 
-  v128        getVectorRegister(int num) const; 
-  void        setVectorRegister(int num, v128 value); 
+/// Registers_x86 holds the register state of a thread in a 32-bit intel
+/// process.
+class _LIBUNWIND_HIDDEN Registers_x86 {
+public:
+  Registers_x86();
+  Registers_x86(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int) const { return false; }
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int) const { return false; }
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
   void        jumpto() { __libunwind_Registers_x86_jumpto(this); }
   static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86; }
   static int  getArch() { return REGISTERS_X86; }
- 
-  uint32_t  getSP() const          { return _registers.__esp; } 
-  void      setSP(uint32_t value)  { _registers.__esp = value; } 
-  uint32_t  getIP() const          { return _registers.__eip; } 
-  void      setIP(uint32_t value)  { _registers.__eip = value; } 
-  uint32_t  getEBP() const         { return _registers.__ebp; } 
-  void      setEBP(uint32_t value) { _registers.__ebp = value; } 
-  uint32_t  getEBX() const         { return _registers.__ebx; } 
-  void      setEBX(uint32_t value) { _registers.__ebx = value; } 
-  uint32_t  getECX() const         { return _registers.__ecx; } 
-  void      setECX(uint32_t value) { _registers.__ecx = value; } 
-  uint32_t  getEDX() const         { return _registers.__edx; } 
-  void      setEDX(uint32_t value) { _registers.__edx = value; } 
-  uint32_t  getESI() const         { return _registers.__esi; } 
-  void      setESI(uint32_t value) { _registers.__esi = value; } 
-  uint32_t  getEDI() const         { return _registers.__edi; } 
-  void      setEDI(uint32_t value) { _registers.__edi = value; } 
- 
-private: 
-  struct GPRs { 
-    unsigned int __eax; 
-    unsigned int __ebx; 
-    unsigned int __ecx; 
-    unsigned int __edx; 
-    unsigned int __edi; 
-    unsigned int __esi; 
-    unsigned int __ebp; 
-    unsigned int __esp; 
-    unsigned int __ss; 
-    unsigned int __eflags; 
-    unsigned int __eip; 
-    unsigned int __cs; 
-    unsigned int __ds; 
-    unsigned int __es; 
-    unsigned int __fs; 
-    unsigned int __gs; 
-  }; 
- 
-  GPRs _registers; 
-}; 
- 
-inline Registers_x86::Registers_x86(const void *registers) { 
+
+  uint32_t  getSP() const          { return _registers.__esp; }
+  void      setSP(uint32_t value)  { _registers.__esp = value; }
+  uint32_t  getIP() const          { return _registers.__eip; }
+  void      setIP(uint32_t value)  { _registers.__eip = value; }
+  uint32_t  getEBP() const         { return _registers.__ebp; }
+  void      setEBP(uint32_t value) { _registers.__ebp = value; }
+  uint32_t  getEBX() const         { return _registers.__ebx; }
+  void      setEBX(uint32_t value) { _registers.__ebx = value; }
+  uint32_t  getECX() const         { return _registers.__ecx; }
+  void      setECX(uint32_t value) { _registers.__ecx = value; }
+  uint32_t  getEDX() const         { return _registers.__edx; }
+  void      setEDX(uint32_t value) { _registers.__edx = value; }
+  uint32_t  getESI() const         { return _registers.__esi; }
+  void      setESI(uint32_t value) { _registers.__esi = value; }
+  uint32_t  getEDI() const         { return _registers.__edi; }
+  void      setEDI(uint32_t value) { _registers.__edi = value; }
+
+private:
+  struct GPRs {
+    unsigned int __eax;
+    unsigned int __ebx;
+    unsigned int __ecx;
+    unsigned int __edx;
+    unsigned int __edi;
+    unsigned int __esi;
+    unsigned int __ebp;
+    unsigned int __esp;
+    unsigned int __ss;
+    unsigned int __eflags;
+    unsigned int __eip;
+    unsigned int __cs;
+    unsigned int __ds;
+    unsigned int __es;
+    unsigned int __fs;
+    unsigned int __gs;
+  };
+
+  GPRs _registers;
+};
+
+inline Registers_x86::Registers_x86(const void *registers) {
   static_assert((check_fit<Registers_x86, unw_context_t>::does_fit),
                 "x86 registers do not fit into unw_context_t");
-  memcpy(&_registers, registers, sizeof(_registers)); 
-} 
- 
-inline Registers_x86::Registers_x86() { 
-  memset(&_registers, 0, sizeof(_registers)); 
-} 
- 
-inline bool Registers_x86::validRegister(int regNum) const { 
-  if (regNum == UNW_REG_IP) 
-    return true; 
-  if (regNum == UNW_REG_SP) 
-    return true; 
-  if (regNum < 0) 
-    return false; 
-  if (regNum > 7) 
-    return false; 
-  return true; 
-} 
- 
-inline uint32_t Registers_x86::getRegister(int regNum) const { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    return _registers.__eip; 
-  case UNW_REG_SP: 
-    return _registers.__esp; 
-  case UNW_X86_EAX: 
-    return _registers.__eax; 
-  case UNW_X86_ECX: 
-    return _registers.__ecx; 
-  case UNW_X86_EDX: 
-    return _registers.__edx; 
-  case UNW_X86_EBX: 
-    return _registers.__ebx; 
+  memcpy(&_registers, registers, sizeof(_registers));
+}
+
+inline Registers_x86::Registers_x86() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_x86::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum > 7)
+    return false;
+  return true;
+}
+
+inline uint32_t Registers_x86::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__eip;
+  case UNW_REG_SP:
+    return _registers.__esp;
+  case UNW_X86_EAX:
+    return _registers.__eax;
+  case UNW_X86_ECX:
+    return _registers.__ecx;
+  case UNW_X86_EDX:
+    return _registers.__edx;
+  case UNW_X86_EBX:
+    return _registers.__ebx;
 #if !defined(__APPLE__)
   case UNW_X86_ESP:
 #else
-  case UNW_X86_EBP: 
+  case UNW_X86_EBP:
 #endif
-    return _registers.__ebp; 
+    return _registers.__ebp;
 #if !defined(__APPLE__)
   case UNW_X86_EBP:
 #else
-  case UNW_X86_ESP: 
+  case UNW_X86_ESP:
 #endif
-    return _registers.__esp; 
-  case UNW_X86_ESI: 
-    return _registers.__esi; 
-  case UNW_X86_EDI: 
-    return _registers.__edi; 
-  } 
-  _LIBUNWIND_ABORT("unsupported x86 register"); 
-} 
- 
-inline void Registers_x86::setRegister(int regNum, uint32_t value) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    _registers.__eip = value; 
-    return; 
-  case UNW_REG_SP: 
-    _registers.__esp = value; 
-    return; 
-  case UNW_X86_EAX: 
-    _registers.__eax = value; 
-    return; 
-  case UNW_X86_ECX: 
-    _registers.__ecx = value; 
-    return; 
-  case UNW_X86_EDX: 
-    _registers.__edx = value; 
-    return; 
-  case UNW_X86_EBX: 
-    _registers.__ebx = value; 
-    return; 
+    return _registers.__esp;
+  case UNW_X86_ESI:
+    return _registers.__esi;
+  case UNW_X86_EDI:
+    return _registers.__edi;
+  }
+  _LIBUNWIND_ABORT("unsupported x86 register");
+}
+
+inline void Registers_x86::setRegister(int regNum, uint32_t value) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__eip = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__esp = value;
+    return;
+  case UNW_X86_EAX:
+    _registers.__eax = value;
+    return;
+  case UNW_X86_ECX:
+    _registers.__ecx = value;
+    return;
+  case UNW_X86_EDX:
+    _registers.__edx = value;
+    return;
+  case UNW_X86_EBX:
+    _registers.__ebx = value;
+    return;
 #if !defined(__APPLE__)
   case UNW_X86_ESP:
 #else
-  case UNW_X86_EBP: 
+  case UNW_X86_EBP:
 #endif
-    _registers.__ebp = value; 
-    return; 
+    _registers.__ebp = value;
+    return;
 #if !defined(__APPLE__)
   case UNW_X86_EBP:
 #else
-  case UNW_X86_ESP: 
+  case UNW_X86_ESP:
 #endif
-    _registers.__esp = value; 
-    return; 
-  case UNW_X86_ESI: 
-    _registers.__esi = value; 
-    return; 
-  case UNW_X86_EDI: 
-    _registers.__edi = value; 
-    return; 
-  } 
-  _LIBUNWIND_ABORT("unsupported x86 register"); 
-} 
- 
-inline const char *Registers_x86::getRegisterName(int regNum) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    return "ip"; 
-  case UNW_REG_SP: 
-    return "esp"; 
-  case UNW_X86_EAX: 
-    return "eax"; 
-  case UNW_X86_ECX: 
-    return "ecx"; 
-  case UNW_X86_EDX: 
-    return "edx"; 
-  case UNW_X86_EBX: 
-    return "ebx"; 
-  case UNW_X86_EBP: 
-    return "ebp"; 
-  case UNW_X86_ESP: 
-    return "esp"; 
-  case UNW_X86_ESI: 
-    return "esi"; 
-  case UNW_X86_EDI: 
-    return "edi"; 
-  default: 
-    return "unknown register"; 
-  } 
-} 
- 
-inline double Registers_x86::getFloatRegister(int) const { 
-  _LIBUNWIND_ABORT("no x86 float registers"); 
-} 
- 
-inline void Registers_x86::setFloatRegister(int, double) { 
-  _LIBUNWIND_ABORT("no x86 float registers"); 
-} 
- 
-inline v128 Registers_x86::getVectorRegister(int) const { 
-  _LIBUNWIND_ABORT("no x86 vector registers"); 
-} 
- 
-inline void Registers_x86::setVectorRegister(int, v128) { 
-  _LIBUNWIND_ABORT("no x86 vector registers"); 
-} 
+    _registers.__esp = value;
+    return;
+  case UNW_X86_ESI:
+    _registers.__esi = value;
+    return;
+  case UNW_X86_EDI:
+    _registers.__edi = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported x86 register");
+}
+
+inline const char *Registers_x86::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "ip";
+  case UNW_REG_SP:
+    return "esp";
+  case UNW_X86_EAX:
+    return "eax";
+  case UNW_X86_ECX:
+    return "ecx";
+  case UNW_X86_EDX:
+    return "edx";
+  case UNW_X86_EBX:
+    return "ebx";
+  case UNW_X86_EBP:
+    return "ebp";
+  case UNW_X86_ESP:
+    return "esp";
+  case UNW_X86_ESI:
+    return "esi";
+  case UNW_X86_EDI:
+    return "edi";
+  default:
+    return "unknown register";
+  }
+}
+
+inline double Registers_x86::getFloatRegister(int) const {
+  _LIBUNWIND_ABORT("no x86 float registers");
+}
+
+inline void Registers_x86::setFloatRegister(int, double) {
+  _LIBUNWIND_ABORT("no x86 float registers");
+}
+
+inline v128 Registers_x86::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("no x86 vector registers");
+}
+
+inline void Registers_x86::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("no x86 vector registers");
+}
 #endif // _LIBUNWIND_TARGET_I386
- 
- 
+
+
 #if defined(_LIBUNWIND_TARGET_X86_64)
-/// Registers_x86_64  holds the register state of a thread in a 64-bit intel 
-/// process. 
+/// Registers_x86_64  holds the register state of a thread in a 64-bit intel
+/// process.
 class _LIBUNWIND_HIDDEN Registers_x86_64;
 extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *);
 
@@ -269,240 +269,240 @@ extern "C" void *__libunwind_cet_get_jump_target() {
 }
 #endif
 
-class _LIBUNWIND_HIDDEN Registers_x86_64 { 
-public: 
-  Registers_x86_64(); 
-  Registers_x86_64(const void *registers); 
- 
-  bool        validRegister(int num) const; 
-  uint64_t    getRegister(int num) const; 
-  void        setRegister(int num, uint64_t value); 
-  bool        validFloatRegister(int) const { return false; } 
-  double      getFloatRegister(int num) const; 
-  void        setFloatRegister(int num, double value); 
+class _LIBUNWIND_HIDDEN Registers_x86_64 {
+public:
+  Registers_x86_64();
+  Registers_x86_64(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value);
+  bool        validFloatRegister(int) const { return false; }
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
   bool        validVectorRegister(int) const;
-  v128        getVectorRegister(int num) const; 
-  void        setVectorRegister(int num, v128 value); 
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
   void        jumpto() { __libunwind_Registers_x86_64_jumpto(this); }
   static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64; }
   static int  getArch() { return REGISTERS_X86_64; }
- 
-  uint64_t  getSP() const          { return _registers.__rsp; } 
-  void      setSP(uint64_t value)  { _registers.__rsp = value; } 
-  uint64_t  getIP() const          { return _registers.__rip; } 
-  void      setIP(uint64_t value)  { _registers.__rip = value; } 
-  uint64_t  getRBP() const         { return _registers.__rbp; } 
-  void      setRBP(uint64_t value) { _registers.__rbp = value; } 
-  uint64_t  getRBX() const         { return _registers.__rbx; } 
-  void      setRBX(uint64_t value) { _registers.__rbx = value; } 
-  uint64_t  getR12() const         { return _registers.__r12; } 
-  void      setR12(uint64_t value) { _registers.__r12 = value; } 
-  uint64_t  getR13() const         { return _registers.__r13; } 
-  void      setR13(uint64_t value) { _registers.__r13 = value; } 
-  uint64_t  getR14() const         { return _registers.__r14; } 
-  void      setR14(uint64_t value) { _registers.__r14 = value; } 
-  uint64_t  getR15() const         { return _registers.__r15; } 
-  void      setR15(uint64_t value) { _registers.__r15 = value; } 
- 
-private: 
-  struct GPRs { 
-    uint64_t __rax; 
-    uint64_t __rbx; 
-    uint64_t __rcx; 
-    uint64_t __rdx; 
-    uint64_t __rdi; 
-    uint64_t __rsi; 
-    uint64_t __rbp; 
-    uint64_t __rsp; 
-    uint64_t __r8; 
-    uint64_t __r9; 
-    uint64_t __r10; 
-    uint64_t __r11; 
-    uint64_t __r12; 
-    uint64_t __r13; 
-    uint64_t __r14; 
-    uint64_t __r15; 
-    uint64_t __rip; 
-    uint64_t __rflags; 
-    uint64_t __cs; 
-    uint64_t __fs; 
-    uint64_t __gs; 
+
+  uint64_t  getSP() const          { return _registers.__rsp; }
+  void      setSP(uint64_t value)  { _registers.__rsp = value; }
+  uint64_t  getIP() const          { return _registers.__rip; }
+  void      setIP(uint64_t value)  { _registers.__rip = value; }
+  uint64_t  getRBP() const         { return _registers.__rbp; }
+  void      setRBP(uint64_t value) { _registers.__rbp = value; }
+  uint64_t  getRBX() const         { return _registers.__rbx; }
+  void      setRBX(uint64_t value) { _registers.__rbx = value; }
+  uint64_t  getR12() const         { return _registers.__r12; }
+  void      setR12(uint64_t value) { _registers.__r12 = value; }
+  uint64_t  getR13() const         { return _registers.__r13; }
+  void      setR13(uint64_t value) { _registers.__r13 = value; }
+  uint64_t  getR14() const         { return _registers.__r14; }
+  void      setR14(uint64_t value) { _registers.__r14 = value; }
+  uint64_t  getR15() const         { return _registers.__r15; }
+  void      setR15(uint64_t value) { _registers.__r15 = value; }
+
+private:
+  struct GPRs {
+    uint64_t __rax;
+    uint64_t __rbx;
+    uint64_t __rcx;
+    uint64_t __rdx;
+    uint64_t __rdi;
+    uint64_t __rsi;
+    uint64_t __rbp;
+    uint64_t __rsp;
+    uint64_t __r8;
+    uint64_t __r9;
+    uint64_t __r10;
+    uint64_t __r11;
+    uint64_t __r12;
+    uint64_t __r13;
+    uint64_t __r14;
+    uint64_t __r15;
+    uint64_t __rip;
+    uint64_t __rflags;
+    uint64_t __cs;
+    uint64_t __fs;
+    uint64_t __gs;
 #if defined(_WIN64)
     uint64_t __padding; // 16-byte align
 #endif
-  }; 
-  GPRs _registers; 
+  };
+  GPRs _registers;
 #if defined(_WIN64)
   v128 _xmm[16];
 #endif
-}; 
- 
-inline Registers_x86_64::Registers_x86_64(const void *registers) { 
+};
+
+inline Registers_x86_64::Registers_x86_64(const void *registers) {
   static_assert((check_fit<Registers_x86_64, unw_context_t>::does_fit),
                 "x86_64 registers do not fit into unw_context_t");
-  memcpy(&_registers, registers, sizeof(_registers)); 
-} 
- 
-inline Registers_x86_64::Registers_x86_64() { 
-  memset(&_registers, 0, sizeof(_registers)); 
-} 
- 
-inline bool Registers_x86_64::validRegister(int regNum) const { 
-  if (regNum == UNW_REG_IP) 
-    return true; 
-  if (regNum == UNW_REG_SP) 
-    return true; 
-  if (regNum < 0) 
-    return false; 
+  memcpy(&_registers, registers, sizeof(_registers));
+}
+
+inline Registers_x86_64::Registers_x86_64() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_x86_64::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
   if (regNum > 16)
-    return false; 
-  return true; 
-} 
- 
-inline uint64_t Registers_x86_64::getRegister(int regNum) const { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
+    return false;
+  return true;
+}
+
+inline uint64_t Registers_x86_64::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
   case UNW_X86_64_RIP:
-    return _registers.__rip; 
-  case UNW_REG_SP: 
-    return _registers.__rsp; 
-  case UNW_X86_64_RAX: 
-    return _registers.__rax; 
-  case UNW_X86_64_RDX: 
-    return _registers.__rdx; 
-  case UNW_X86_64_RCX: 
-    return _registers.__rcx; 
-  case UNW_X86_64_RBX: 
-    return _registers.__rbx; 
-  case UNW_X86_64_RSI: 
-    return _registers.__rsi; 
-  case UNW_X86_64_RDI: 
-    return _registers.__rdi; 
-  case UNW_X86_64_RBP: 
-    return _registers.__rbp; 
-  case UNW_X86_64_RSP: 
-    return _registers.__rsp; 
-  case UNW_X86_64_R8: 
-    return _registers.__r8; 
-  case UNW_X86_64_R9: 
-    return _registers.__r9; 
-  case UNW_X86_64_R10: 
-    return _registers.__r10; 
-  case UNW_X86_64_R11: 
-    return _registers.__r11; 
-  case UNW_X86_64_R12: 
-    return _registers.__r12; 
-  case UNW_X86_64_R13: 
-    return _registers.__r13; 
-  case UNW_X86_64_R14: 
-    return _registers.__r14; 
-  case UNW_X86_64_R15: 
-    return _registers.__r15; 
-  } 
-  _LIBUNWIND_ABORT("unsupported x86_64 register"); 
-} 
- 
-inline void Registers_x86_64::setRegister(int regNum, uint64_t value) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
+    return _registers.__rip;
+  case UNW_REG_SP:
+    return _registers.__rsp;
+  case UNW_X86_64_RAX:
+    return _registers.__rax;
+  case UNW_X86_64_RDX:
+    return _registers.__rdx;
+  case UNW_X86_64_RCX:
+    return _registers.__rcx;
+  case UNW_X86_64_RBX:
+    return _registers.__rbx;
+  case UNW_X86_64_RSI:
+    return _registers.__rsi;
+  case UNW_X86_64_RDI:
+    return _registers.__rdi;
+  case UNW_X86_64_RBP:
+    return _registers.__rbp;
+  case UNW_X86_64_RSP:
+    return _registers.__rsp;
+  case UNW_X86_64_R8:
+    return _registers.__r8;
+  case UNW_X86_64_R9:
+    return _registers.__r9;
+  case UNW_X86_64_R10:
+    return _registers.__r10;
+  case UNW_X86_64_R11:
+    return _registers.__r11;
+  case UNW_X86_64_R12:
+    return _registers.__r12;
+  case UNW_X86_64_R13:
+    return _registers.__r13;
+  case UNW_X86_64_R14:
+    return _registers.__r14;
+  case UNW_X86_64_R15:
+    return _registers.__r15;
+  }
+  _LIBUNWIND_ABORT("unsupported x86_64 register");
+}
+
+inline void Registers_x86_64::setRegister(int regNum, uint64_t value) {
+  switch (regNum) {
+  case UNW_REG_IP:
   case UNW_X86_64_RIP:
-    _registers.__rip = value; 
-    return; 
-  case UNW_REG_SP: 
-    _registers.__rsp = value; 
-    return; 
-  case UNW_X86_64_RAX: 
-    _registers.__rax = value; 
-    return; 
-  case UNW_X86_64_RDX: 
-    _registers.__rdx = value; 
-    return; 
-  case UNW_X86_64_RCX: 
-    _registers.__rcx = value; 
-    return; 
-  case UNW_X86_64_RBX: 
-    _registers.__rbx = value; 
-    return; 
-  case UNW_X86_64_RSI: 
-    _registers.__rsi = value; 
-    return; 
-  case UNW_X86_64_RDI: 
-    _registers.__rdi = value; 
-    return; 
-  case UNW_X86_64_RBP: 
-    _registers.__rbp = value; 
-    return; 
-  case UNW_X86_64_RSP: 
-    _registers.__rsp = value; 
-    return; 
-  case UNW_X86_64_R8: 
-    _registers.__r8 = value; 
-    return; 
-  case UNW_X86_64_R9: 
-    _registers.__r9 = value; 
-    return; 
-  case UNW_X86_64_R10: 
-    _registers.__r10 = value; 
-    return; 
-  case UNW_X86_64_R11: 
-    _registers.__r11 = value; 
-    return; 
-  case UNW_X86_64_R12: 
-    _registers.__r12 = value; 
-    return; 
-  case UNW_X86_64_R13: 
-    _registers.__r13 = value; 
-    return; 
-  case UNW_X86_64_R14: 
-    _registers.__r14 = value; 
-    return; 
-  case UNW_X86_64_R15: 
-    _registers.__r15 = value; 
-    return; 
-  } 
-  _LIBUNWIND_ABORT("unsupported x86_64 register"); 
-} 
- 
-inline const char *Registers_x86_64::getRegisterName(int regNum) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
+    _registers.__rip = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__rsp = value;
+    return;
+  case UNW_X86_64_RAX:
+    _registers.__rax = value;
+    return;
+  case UNW_X86_64_RDX:
+    _registers.__rdx = value;
+    return;
+  case UNW_X86_64_RCX:
+    _registers.__rcx = value;
+    return;
+  case UNW_X86_64_RBX:
+    _registers.__rbx = value;
+    return;
+  case UNW_X86_64_RSI:
+    _registers.__rsi = value;
+    return;
+  case UNW_X86_64_RDI:
+    _registers.__rdi = value;
+    return;
+  case UNW_X86_64_RBP:
+    _registers.__rbp = value;
+    return;
+  case UNW_X86_64_RSP:
+    _registers.__rsp = value;
+    return;
+  case UNW_X86_64_R8:
+    _registers.__r8 = value;
+    return;
+  case UNW_X86_64_R9:
+    _registers.__r9 = value;
+    return;
+  case UNW_X86_64_R10:
+    _registers.__r10 = value;
+    return;
+  case UNW_X86_64_R11:
+    _registers.__r11 = value;
+    return;
+  case UNW_X86_64_R12:
+    _registers.__r12 = value;
+    return;
+  case UNW_X86_64_R13:
+    _registers.__r13 = value;
+    return;
+  case UNW_X86_64_R14:
+    _registers.__r14 = value;
+    return;
+  case UNW_X86_64_R15:
+    _registers.__r15 = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported x86_64 register");
+}
+
+inline const char *Registers_x86_64::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
   case UNW_X86_64_RIP:
-    return "rip"; 
-  case UNW_REG_SP: 
-    return "rsp"; 
-  case UNW_X86_64_RAX: 
-    return "rax"; 
-  case UNW_X86_64_RDX: 
-    return "rdx"; 
-  case UNW_X86_64_RCX: 
-    return "rcx"; 
-  case UNW_X86_64_RBX: 
-    return "rbx"; 
-  case UNW_X86_64_RSI: 
-    return "rsi"; 
-  case UNW_X86_64_RDI: 
-    return "rdi"; 
-  case UNW_X86_64_RBP: 
-    return "rbp"; 
-  case UNW_X86_64_RSP: 
-    return "rsp"; 
-  case UNW_X86_64_R8: 
-    return "r8"; 
-  case UNW_X86_64_R9: 
-    return "r9"; 
-  case UNW_X86_64_R10: 
-    return "r10"; 
-  case UNW_X86_64_R11: 
-    return "r11"; 
-  case UNW_X86_64_R12: 
-    return "r12"; 
-  case UNW_X86_64_R13: 
-    return "r13"; 
-  case UNW_X86_64_R14: 
-    return "r14"; 
-  case UNW_X86_64_R15: 
-    return "r15"; 
+    return "rip";
+  case UNW_REG_SP:
+    return "rsp";
+  case UNW_X86_64_RAX:
+    return "rax";
+  case UNW_X86_64_RDX:
+    return "rdx";
+  case UNW_X86_64_RCX:
+    return "rcx";
+  case UNW_X86_64_RBX:
+    return "rbx";
+  case UNW_X86_64_RSI:
+    return "rsi";
+  case UNW_X86_64_RDI:
+    return "rdi";
+  case UNW_X86_64_RBP:
+    return "rbp";
+  case UNW_X86_64_RSP:
+    return "rsp";
+  case UNW_X86_64_R8:
+    return "r8";
+  case UNW_X86_64_R9:
+    return "r9";
+  case UNW_X86_64_R10:
+    return "r10";
+  case UNW_X86_64_R11:
+    return "r11";
+  case UNW_X86_64_R12:
+    return "r12";
+  case UNW_X86_64_R13:
+    return "r13";
+  case UNW_X86_64_R14:
+    return "r14";
+  case UNW_X86_64_R15:
+    return "r15";
   case UNW_X86_64_XMM0:
     return "xmm0";
   case UNW_X86_64_XMM1:
@@ -535,19 +535,19 @@ inline const char *Registers_x86_64::getRegisterName(int regNum) {
     return "xmm14";
   case UNW_X86_64_XMM15:
     return "xmm15";
-  default: 
-    return "unknown register"; 
-  } 
-} 
- 
-inline double Registers_x86_64::getFloatRegister(int) const { 
-  _LIBUNWIND_ABORT("no x86_64 float registers"); 
-} 
- 
-inline void Registers_x86_64::setFloatRegister(int, double) { 
-  _LIBUNWIND_ABORT("no x86_64 float registers"); 
-} 
- 
+  default:
+    return "unknown register";
+  }
+}
+
+inline double Registers_x86_64::getFloatRegister(int) const {
+  _LIBUNWIND_ABORT("no x86_64 float registers");
+}
+
+inline void Registers_x86_64::setFloatRegister(int, double) {
+  _LIBUNWIND_ABORT("no x86_64 float registers");
+}
+
 inline bool Registers_x86_64::validVectorRegister(int regNum) const {
 #if defined(_WIN64)
   if (regNum < UNW_X86_64_XMM0)
@@ -567,588 +567,588 @@ inline v128 Registers_x86_64::getVectorRegister(int regNum) const {
   return _xmm[regNum - UNW_X86_64_XMM0];
 #else
   (void)regNum; // suppress unused parameter warning
-  _LIBUNWIND_ABORT("no x86_64 vector registers"); 
+  _LIBUNWIND_ABORT("no x86_64 vector registers");
 #endif
-} 
- 
+}
+
 inline void Registers_x86_64::setVectorRegister(int regNum, v128 value) {
 #if defined(_WIN64)
   assert(validVectorRegister(regNum));
   _xmm[regNum - UNW_X86_64_XMM0] = value;
 #else
   (void)regNum; (void)value; // suppress unused parameter warnings
-  _LIBUNWIND_ABORT("no x86_64 vector registers"); 
+  _LIBUNWIND_ABORT("no x86_64 vector registers");
 #endif
-} 
+}
 #endif // _LIBUNWIND_TARGET_X86_64
- 
- 
+
+
 #if defined(_LIBUNWIND_TARGET_PPC)
-/// Registers_ppc holds the register state of a thread in a 32-bit PowerPC 
-/// process. 
-class _LIBUNWIND_HIDDEN Registers_ppc { 
-public: 
-  Registers_ppc(); 
-  Registers_ppc(const void *registers); 
- 
-  bool        validRegister(int num) const; 
-  uint32_t    getRegister(int num) const; 
-  void        setRegister(int num, uint32_t value); 
-  bool        validFloatRegister(int num) const; 
-  double      getFloatRegister(int num) const; 
-  void        setFloatRegister(int num, double value); 
-  bool        validVectorRegister(int num) const; 
-  v128        getVectorRegister(int num) const; 
-  void        setVectorRegister(int num, v128 value); 
+/// Registers_ppc holds the register state of a thread in a 32-bit PowerPC
+/// process.
+class _LIBUNWIND_HIDDEN Registers_ppc {
+public:
+  Registers_ppc();
+  Registers_ppc(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
-  void        jumpto(); 
+  void        jumpto();
   static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC; }
   static int  getArch() { return REGISTERS_PPC; }
- 
-  uint64_t  getSP() const         { return _registers.__r1; } 
-  void      setSP(uint32_t value) { _registers.__r1 = value; } 
-  uint64_t  getIP() const         { return _registers.__srr0; } 
-  void      setIP(uint32_t value) { _registers.__srr0 = value; } 
- 
-private: 
-  struct ppc_thread_state_t { 
-    unsigned int __srr0; /* Instruction address register (PC) */ 
-    unsigned int __srr1; /* Machine state register (supervisor) */ 
-    unsigned int __r0; 
-    unsigned int __r1; 
-    unsigned int __r2; 
-    unsigned int __r3; 
-    unsigned int __r4; 
-    unsigned int __r5; 
-    unsigned int __r6; 
-    unsigned int __r7; 
-    unsigned int __r8; 
-    unsigned int __r9; 
-    unsigned int __r10; 
-    unsigned int __r11; 
-    unsigned int __r12; 
-    unsigned int __r13; 
-    unsigned int __r14; 
-    unsigned int __r15; 
-    unsigned int __r16; 
-    unsigned int __r17; 
-    unsigned int __r18; 
-    unsigned int __r19; 
-    unsigned int __r20; 
-    unsigned int __r21; 
-    unsigned int __r22; 
-    unsigned int __r23; 
-    unsigned int __r24; 
-    unsigned int __r25; 
-    unsigned int __r26; 
-    unsigned int __r27; 
-    unsigned int __r28; 
-    unsigned int __r29; 
-    unsigned int __r30; 
-    unsigned int __r31; 
-    unsigned int __cr;     /* Condition register */ 
-    unsigned int __xer;    /* User's integer exception register */ 
-    unsigned int __lr;     /* Link register */ 
-    unsigned int __ctr;    /* Count register */ 
-    unsigned int __mq;     /* MQ register (601 only) */ 
-    unsigned int __vrsave; /* Vector Save Register */ 
-  }; 
- 
-  struct ppc_float_state_t { 
-    double __fpregs[32]; 
- 
-    unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */ 
-    unsigned int __fpscr;     /* floating point status register */ 
-  }; 
- 
-  ppc_thread_state_t _registers; 
-  ppc_float_state_t  _floatRegisters; 
-  v128               _vectorRegisters[32]; // offset 424 
-}; 
- 
-inline Registers_ppc::Registers_ppc(const void *registers) { 
+
+  uint64_t  getSP() const         { return _registers.__r1; }
+  void      setSP(uint32_t value) { _registers.__r1 = value; }
+  uint64_t  getIP() const         { return _registers.__srr0; }
+  void      setIP(uint32_t value) { _registers.__srr0 = value; }
+
+private:
+  struct ppc_thread_state_t {
+    unsigned int __srr0; /* Instruction address register (PC) */
+    unsigned int __srr1; /* Machine state register (supervisor) */
+    unsigned int __r0;
+    unsigned int __r1;
+    unsigned int __r2;
+    unsigned int __r3;
+    unsigned int __r4;
+    unsigned int __r5;
+    unsigned int __r6;
+    unsigned int __r7;
+    unsigned int __r8;
+    unsigned int __r9;
+    unsigned int __r10;
+    unsigned int __r11;
+    unsigned int __r12;
+    unsigned int __r13;
+    unsigned int __r14;
+    unsigned int __r15;
+    unsigned int __r16;
+    unsigned int __r17;
+    unsigned int __r18;
+    unsigned int __r19;
+    unsigned int __r20;
+    unsigned int __r21;
+    unsigned int __r22;
+    unsigned int __r23;
+    unsigned int __r24;
+    unsigned int __r25;
+    unsigned int __r26;
+    unsigned int __r27;
+    unsigned int __r28;
+    unsigned int __r29;
+    unsigned int __r30;
+    unsigned int __r31;
+    unsigned int __cr;     /* Condition register */
+    unsigned int __xer;    /* User's integer exception register */
+    unsigned int __lr;     /* Link register */
+    unsigned int __ctr;    /* Count register */
+    unsigned int __mq;     /* MQ register (601 only) */
+    unsigned int __vrsave; /* Vector Save Register */
+  };
+
+  struct ppc_float_state_t {
+    double __fpregs[32];
+
+    unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */
+    unsigned int __fpscr;     /* floating point status register */
+  };
+
+  ppc_thread_state_t _registers;
+  ppc_float_state_t  _floatRegisters;
+  v128               _vectorRegisters[32]; // offset 424
+};
+
+inline Registers_ppc::Registers_ppc(const void *registers) {
   static_assert((check_fit<Registers_ppc, unw_context_t>::does_fit),
                 "ppc registers do not fit into unw_context_t");
-  memcpy(&_registers, static_cast<const uint8_t *>(registers), 
-         sizeof(_registers)); 
-  static_assert(sizeof(ppc_thread_state_t) == 160, 
-                "expected float register offset to be 160"); 
-  memcpy(&_floatRegisters, 
-         static_cast<const uint8_t *>(registers) + sizeof(ppc_thread_state_t), 
-         sizeof(_floatRegisters)); 
-  static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424, 
-                "expected vector register offset to be 424 bytes"); 
-  memcpy(_vectorRegisters, 
-         static_cast<const uint8_t *>(registers) + sizeof(ppc_thread_state_t) + 
-             sizeof(ppc_float_state_t), 
-         sizeof(_vectorRegisters)); 
-} 
- 
-inline Registers_ppc::Registers_ppc() { 
-  memset(&_registers, 0, sizeof(_registers)); 
-  memset(&_floatRegisters, 0, sizeof(_floatRegisters)); 
-  memset(&_vectorRegisters, 0, sizeof(_vectorRegisters)); 
-} 
- 
-inline bool Registers_ppc::validRegister(int regNum) const { 
-  if (regNum == UNW_REG_IP) 
-    return true; 
-  if (regNum == UNW_REG_SP) 
-    return true; 
-  if (regNum == UNW_PPC_VRSAVE) 
-    return true; 
-  if (regNum < 0) 
-    return false; 
-  if (regNum <= UNW_PPC_R31) 
-    return true; 
-  if (regNum == UNW_PPC_MQ) 
-    return true; 
-  if (regNum == UNW_PPC_LR) 
-    return true; 
-  if (regNum == UNW_PPC_CTR) 
-    return true; 
-  if ((UNW_PPC_CR0 <= regNum) && (regNum <= UNW_PPC_CR7)) 
-    return true; 
-  return false; 
-} 
- 
-inline uint32_t Registers_ppc::getRegister(int regNum) const { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    return _registers.__srr0; 
-  case UNW_REG_SP: 
-    return _registers.__r1; 
-  case UNW_PPC_R0: 
-    return _registers.__r0; 
-  case UNW_PPC_R1: 
-    return _registers.__r1; 
-  case UNW_PPC_R2: 
-    return _registers.__r2; 
-  case UNW_PPC_R3: 
-    return _registers.__r3; 
-  case UNW_PPC_R4: 
-    return _registers.__r4; 
-  case UNW_PPC_R5: 
-    return _registers.__r5; 
-  case UNW_PPC_R6: 
-    return _registers.__r6; 
-  case UNW_PPC_R7: 
-    return _registers.__r7; 
-  case UNW_PPC_R8: 
-    return _registers.__r8; 
-  case UNW_PPC_R9: 
-    return _registers.__r9; 
-  case UNW_PPC_R10: 
-    return _registers.__r10; 
-  case UNW_PPC_R11: 
-    return _registers.__r11; 
-  case UNW_PPC_R12: 
-    return _registers.__r12; 
-  case UNW_PPC_R13: 
-    return _registers.__r13; 
-  case UNW_PPC_R14: 
-    return _registers.__r14; 
-  case UNW_PPC_R15: 
-    return _registers.__r15; 
-  case UNW_PPC_R16: 
-    return _registers.__r16; 
-  case UNW_PPC_R17: 
-    return _registers.__r17; 
-  case UNW_PPC_R18: 
-    return _registers.__r18; 
-  case UNW_PPC_R19: 
-    return _registers.__r19; 
-  case UNW_PPC_R20: 
-    return _registers.__r20; 
-  case UNW_PPC_R21: 
-    return _registers.__r21; 
-  case UNW_PPC_R22: 
-    return _registers.__r22; 
-  case UNW_PPC_R23: 
-    return _registers.__r23; 
-  case UNW_PPC_R24: 
-    return _registers.__r24; 
-  case UNW_PPC_R25: 
-    return _registers.__r25; 
-  case UNW_PPC_R26: 
-    return _registers.__r26; 
-  case UNW_PPC_R27: 
-    return _registers.__r27; 
-  case UNW_PPC_R28: 
-    return _registers.__r28; 
-  case UNW_PPC_R29: 
-    return _registers.__r29; 
-  case UNW_PPC_R30: 
-    return _registers.__r30; 
-  case UNW_PPC_R31: 
-    return _registers.__r31; 
-  case UNW_PPC_LR: 
-    return _registers.__lr; 
-  case UNW_PPC_CR0: 
-    return (_registers.__cr & 0xF0000000); 
-  case UNW_PPC_CR1: 
-    return (_registers.__cr & 0x0F000000); 
-  case UNW_PPC_CR2: 
-    return (_registers.__cr & 0x00F00000); 
-  case UNW_PPC_CR3: 
-    return (_registers.__cr & 0x000F0000); 
-  case UNW_PPC_CR4: 
-    return (_registers.__cr & 0x0000F000); 
-  case UNW_PPC_CR5: 
-    return (_registers.__cr & 0x00000F00); 
-  case UNW_PPC_CR6: 
-    return (_registers.__cr & 0x000000F0); 
-  case UNW_PPC_CR7: 
-    return (_registers.__cr & 0x0000000F); 
-  case UNW_PPC_VRSAVE: 
-    return _registers.__vrsave; 
-  } 
-  _LIBUNWIND_ABORT("unsupported ppc register"); 
-} 
- 
-inline void Registers_ppc::setRegister(int regNum, uint32_t value) { 
-  //fprintf(stderr, "Registers_ppc::setRegister(%d, 0x%08X)\n", regNum, value); 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    _registers.__srr0 = value; 
-    return; 
-  case UNW_REG_SP: 
-    _registers.__r1 = value; 
-    return; 
-  case UNW_PPC_R0: 
-    _registers.__r0 = value; 
-    return; 
-  case UNW_PPC_R1: 
-    _registers.__r1 = value; 
-    return; 
-  case UNW_PPC_R2: 
-    _registers.__r2 = value; 
-    return; 
-  case UNW_PPC_R3: 
-    _registers.__r3 = value; 
-    return; 
-  case UNW_PPC_R4: 
-    _registers.__r4 = value; 
-    return; 
-  case UNW_PPC_R5: 
-    _registers.__r5 = value; 
-    return; 
-  case UNW_PPC_R6: 
-    _registers.__r6 = value; 
-    return; 
-  case UNW_PPC_R7: 
-    _registers.__r7 = value; 
-    return; 
-  case UNW_PPC_R8: 
-    _registers.__r8 = value; 
-    return; 
-  case UNW_PPC_R9: 
-    _registers.__r9 = value; 
-    return; 
-  case UNW_PPC_R10: 
-    _registers.__r10 = value; 
-    return; 
-  case UNW_PPC_R11: 
-    _registers.__r11 = value; 
-    return; 
-  case UNW_PPC_R12: 
-    _registers.__r12 = value; 
-    return; 
-  case UNW_PPC_R13: 
-    _registers.__r13 = value; 
-    return; 
-  case UNW_PPC_R14: 
-    _registers.__r14 = value; 
-    return; 
-  case UNW_PPC_R15: 
-    _registers.__r15 = value; 
-    return; 
-  case UNW_PPC_R16: 
-    _registers.__r16 = value; 
-    return; 
-  case UNW_PPC_R17: 
-    _registers.__r17 = value; 
-    return; 
-  case UNW_PPC_R18: 
-    _registers.__r18 = value; 
-    return; 
-  case UNW_PPC_R19: 
-    _registers.__r19 = value; 
-    return; 
-  case UNW_PPC_R20: 
-    _registers.__r20 = value; 
-    return; 
-  case UNW_PPC_R21: 
-    _registers.__r21 = value; 
-    return; 
-  case UNW_PPC_R22: 
-    _registers.__r22 = value; 
-    return; 
-  case UNW_PPC_R23: 
-    _registers.__r23 = value; 
-    return; 
-  case UNW_PPC_R24: 
-    _registers.__r24 = value; 
-    return; 
-  case UNW_PPC_R25: 
-    _registers.__r25 = value; 
-    return; 
-  case UNW_PPC_R26: 
-    _registers.__r26 = value; 
-    return; 
-  case UNW_PPC_R27: 
-    _registers.__r27 = value; 
-    return; 
-  case UNW_PPC_R28: 
-    _registers.__r28 = value; 
-    return; 
-  case UNW_PPC_R29: 
-    _registers.__r29 = value; 
-    return; 
-  case UNW_PPC_R30: 
-    _registers.__r30 = value; 
-    return; 
-  case UNW_PPC_R31: 
-    _registers.__r31 = value; 
-    return; 
-  case UNW_PPC_MQ: 
-    _registers.__mq = value; 
-    return; 
-  case UNW_PPC_LR: 
-    _registers.__lr = value; 
-    return; 
-  case UNW_PPC_CTR: 
-    _registers.__ctr = value; 
-    return; 
-  case UNW_PPC_CR0: 
-    _registers.__cr &= 0x0FFFFFFF; 
-    _registers.__cr |= (value & 0xF0000000); 
-    return; 
-  case UNW_PPC_CR1: 
-    _registers.__cr &= 0xF0FFFFFF; 
-    _registers.__cr |= (value & 0x0F000000); 
-    return; 
-  case UNW_PPC_CR2: 
-    _registers.__cr &= 0xFF0FFFFF; 
-    _registers.__cr |= (value & 0x00F00000); 
-    return; 
-  case UNW_PPC_CR3: 
-    _registers.__cr &= 0xFFF0FFFF; 
-    _registers.__cr |= (value & 0x000F0000); 
-    return; 
-  case UNW_PPC_CR4: 
-    _registers.__cr &= 0xFFFF0FFF; 
-    _registers.__cr |= (value & 0x0000F000); 
-    return; 
-  case UNW_PPC_CR5: 
-    _registers.__cr &= 0xFFFFF0FF; 
-    _registers.__cr |= (value & 0x00000F00); 
-    return; 
-  case UNW_PPC_CR6: 
-    _registers.__cr &= 0xFFFFFF0F; 
-    _registers.__cr |= (value & 0x000000F0); 
-    return; 
-  case UNW_PPC_CR7: 
-    _registers.__cr &= 0xFFFFFFF0; 
-    _registers.__cr |= (value & 0x0000000F); 
-    return; 
-  case UNW_PPC_VRSAVE: 
-    _registers.__vrsave = value; 
-    return; 
-    // not saved 
-    return; 
-  case UNW_PPC_XER: 
-    _registers.__xer = value; 
-    return; 
-  case UNW_PPC_AP: 
-  case UNW_PPC_VSCR: 
-  case UNW_PPC_SPEFSCR: 
-    // not saved 
-    return; 
-  } 
-  _LIBUNWIND_ABORT("unsupported ppc register"); 
-} 
- 
-inline bool Registers_ppc::validFloatRegister(int regNum) const { 
-  if (regNum < UNW_PPC_F0) 
-    return false; 
-  if (regNum > UNW_PPC_F31) 
-    return false; 
-  return true; 
-} 
- 
-inline double Registers_ppc::getFloatRegister(int regNum) const { 
-  assert(validFloatRegister(regNum)); 
-  return _floatRegisters.__fpregs[regNum - UNW_PPC_F0]; 
-} 
- 
-inline void Registers_ppc::setFloatRegister(int regNum, double value) { 
-  assert(validFloatRegister(regNum)); 
-  _floatRegisters.__fpregs[regNum - UNW_PPC_F0] = value; 
-} 
- 
-inline bool Registers_ppc::validVectorRegister(int regNum) const { 
-  if (regNum < UNW_PPC_V0) 
-    return false; 
-  if (regNum > UNW_PPC_V31) 
-    return false; 
-  return true; 
-} 
- 
-inline v128 Registers_ppc::getVectorRegister(int regNum) const { 
-  assert(validVectorRegister(regNum)); 
-  v128 result = _vectorRegisters[regNum - UNW_PPC_V0]; 
-  return result; 
-} 
- 
-inline void Registers_ppc::setVectorRegister(int regNum, v128 value) { 
-  assert(validVectorRegister(regNum)); 
-  _vectorRegisters[regNum - UNW_PPC_V0] = value; 
-} 
- 
-inline const char *Registers_ppc::getRegisterName(int regNum) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    return "ip"; 
-  case UNW_REG_SP: 
-    return "sp"; 
-  case UNW_PPC_R0: 
-    return "r0"; 
-  case UNW_PPC_R1: 
-    return "r1"; 
-  case UNW_PPC_R2: 
-    return "r2"; 
-  case UNW_PPC_R3: 
-    return "r3"; 
-  case UNW_PPC_R4: 
-    return "r4"; 
-  case UNW_PPC_R5: 
-    return "r5"; 
-  case UNW_PPC_R6: 
-    return "r6"; 
-  case UNW_PPC_R7: 
-    return "r7"; 
-  case UNW_PPC_R8: 
-    return "r8"; 
-  case UNW_PPC_R9: 
-    return "r9"; 
-  case UNW_PPC_R10: 
-    return "r10"; 
-  case UNW_PPC_R11: 
-    return "r11"; 
-  case UNW_PPC_R12: 
-    return "r12"; 
-  case UNW_PPC_R13: 
-    return "r13"; 
-  case UNW_PPC_R14: 
-    return "r14"; 
-  case UNW_PPC_R15: 
-    return "r15"; 
-  case UNW_PPC_R16: 
-    return "r16"; 
-  case UNW_PPC_R17: 
-    return "r17"; 
-  case UNW_PPC_R18: 
-    return "r18"; 
-  case UNW_PPC_R19: 
-    return "r19"; 
-  case UNW_PPC_R20: 
-    return "r20"; 
-  case UNW_PPC_R21: 
-    return "r21"; 
-  case UNW_PPC_R22: 
-    return "r22"; 
-  case UNW_PPC_R23: 
-    return "r23"; 
-  case UNW_PPC_R24: 
-    return "r24"; 
-  case UNW_PPC_R25: 
-    return "r25"; 
-  case UNW_PPC_R26: 
-    return "r26"; 
-  case UNW_PPC_R27: 
-    return "r27"; 
-  case UNW_PPC_R28: 
-    return "r28"; 
-  case UNW_PPC_R29: 
-    return "r29"; 
-  case UNW_PPC_R30: 
-    return "r30"; 
-  case UNW_PPC_R31: 
-    return "r31"; 
-  case UNW_PPC_F0: 
-    return "fp0"; 
-  case UNW_PPC_F1: 
-    return "fp1"; 
-  case UNW_PPC_F2: 
-    return "fp2"; 
-  case UNW_PPC_F3: 
-    return "fp3"; 
-  case UNW_PPC_F4: 
-    return "fp4"; 
-  case UNW_PPC_F5: 
-    return "fp5"; 
-  case UNW_PPC_F6: 
-    return "fp6"; 
-  case UNW_PPC_F7: 
-    return "fp7"; 
-  case UNW_PPC_F8: 
-    return "fp8"; 
-  case UNW_PPC_F9: 
-    return "fp9"; 
-  case UNW_PPC_F10: 
-    return "fp10"; 
-  case UNW_PPC_F11: 
-    return "fp11"; 
-  case UNW_PPC_F12: 
-    return "fp12"; 
-  case UNW_PPC_F13: 
-    return "fp13"; 
-  case UNW_PPC_F14: 
-    return "fp14"; 
-  case UNW_PPC_F15: 
-    return "fp15"; 
-  case UNW_PPC_F16: 
-    return "fp16"; 
-  case UNW_PPC_F17: 
-    return "fp17"; 
-  case UNW_PPC_F18: 
-    return "fp18"; 
-  case UNW_PPC_F19: 
-    return "fp19"; 
-  case UNW_PPC_F20: 
-    return "fp20"; 
-  case UNW_PPC_F21: 
-    return "fp21"; 
-  case UNW_PPC_F22: 
-    return "fp22"; 
-  case UNW_PPC_F23: 
-    return "fp23"; 
-  case UNW_PPC_F24: 
-    return "fp24"; 
-  case UNW_PPC_F25: 
-    return "fp25"; 
-  case UNW_PPC_F26: 
-    return "fp26"; 
-  case UNW_PPC_F27: 
-    return "fp27"; 
-  case UNW_PPC_F28: 
-    return "fp28"; 
-  case UNW_PPC_F29: 
-    return "fp29"; 
-  case UNW_PPC_F30: 
-    return "fp30"; 
-  case UNW_PPC_F31: 
-    return "fp31"; 
-  case UNW_PPC_LR: 
-    return "lr"; 
-  default: 
-    return "unknown register"; 
-  } 
- 
-} 
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+  static_assert(sizeof(ppc_thread_state_t) == 160,
+                "expected float register offset to be 160");
+  memcpy(&_floatRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(ppc_thread_state_t),
+         sizeof(_floatRegisters));
+  static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424,
+                "expected vector register offset to be 424 bytes");
+  memcpy(_vectorRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(ppc_thread_state_t) +
+             sizeof(ppc_float_state_t),
+         sizeof(_vectorRegisters));
+}
+
+inline Registers_ppc::Registers_ppc() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_floatRegisters, 0, sizeof(_floatRegisters));
+  memset(&_vectorRegisters, 0, sizeof(_vectorRegisters));
+}
+
+inline bool Registers_ppc::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum == UNW_PPC_VRSAVE)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_PPC_R31)
+    return true;
+  if (regNum == UNW_PPC_MQ)
+    return true;
+  if (regNum == UNW_PPC_LR)
+    return true;
+  if (regNum == UNW_PPC_CTR)
+    return true;
+  if ((UNW_PPC_CR0 <= regNum) && (regNum <= UNW_PPC_CR7))
+    return true;
+  return false;
+}
+
+inline uint32_t Registers_ppc::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__srr0;
+  case UNW_REG_SP:
+    return _registers.__r1;
+  case UNW_PPC_R0:
+    return _registers.__r0;
+  case UNW_PPC_R1:
+    return _registers.__r1;
+  case UNW_PPC_R2:
+    return _registers.__r2;
+  case UNW_PPC_R3:
+    return _registers.__r3;
+  case UNW_PPC_R4:
+    return _registers.__r4;
+  case UNW_PPC_R5:
+    return _registers.__r5;
+  case UNW_PPC_R6:
+    return _registers.__r6;
+  case UNW_PPC_R7:
+    return _registers.__r7;
+  case UNW_PPC_R8:
+    return _registers.__r8;
+  case UNW_PPC_R9:
+    return _registers.__r9;
+  case UNW_PPC_R10:
+    return _registers.__r10;
+  case UNW_PPC_R11:
+    return _registers.__r11;
+  case UNW_PPC_R12:
+    return _registers.__r12;
+  case UNW_PPC_R13:
+    return _registers.__r13;
+  case UNW_PPC_R14:
+    return _registers.__r14;
+  case UNW_PPC_R15:
+    return _registers.__r15;
+  case UNW_PPC_R16:
+    return _registers.__r16;
+  case UNW_PPC_R17:
+    return _registers.__r17;
+  case UNW_PPC_R18:
+    return _registers.__r18;
+  case UNW_PPC_R19:
+    return _registers.__r19;
+  case UNW_PPC_R20:
+    return _registers.__r20;
+  case UNW_PPC_R21:
+    return _registers.__r21;
+  case UNW_PPC_R22:
+    return _registers.__r22;
+  case UNW_PPC_R23:
+    return _registers.__r23;
+  case UNW_PPC_R24:
+    return _registers.__r24;
+  case UNW_PPC_R25:
+    return _registers.__r25;
+  case UNW_PPC_R26:
+    return _registers.__r26;
+  case UNW_PPC_R27:
+    return _registers.__r27;
+  case UNW_PPC_R28:
+    return _registers.__r28;
+  case UNW_PPC_R29:
+    return _registers.__r29;
+  case UNW_PPC_R30:
+    return _registers.__r30;
+  case UNW_PPC_R31:
+    return _registers.__r31;
+  case UNW_PPC_LR:
+    return _registers.__lr;
+  case UNW_PPC_CR0:
+    return (_registers.__cr & 0xF0000000);
+  case UNW_PPC_CR1:
+    return (_registers.__cr & 0x0F000000);
+  case UNW_PPC_CR2:
+    return (_registers.__cr & 0x00F00000);
+  case UNW_PPC_CR3:
+    return (_registers.__cr & 0x000F0000);
+  case UNW_PPC_CR4:
+    return (_registers.__cr & 0x0000F000);
+  case UNW_PPC_CR5:
+    return (_registers.__cr & 0x00000F00);
+  case UNW_PPC_CR6:
+    return (_registers.__cr & 0x000000F0);
+  case UNW_PPC_CR7:
+    return (_registers.__cr & 0x0000000F);
+  case UNW_PPC_VRSAVE:
+    return _registers.__vrsave;
+  }
+  _LIBUNWIND_ABORT("unsupported ppc register");
+}
+
+inline void Registers_ppc::setRegister(int regNum, uint32_t value) {
+  //fprintf(stderr, "Registers_ppc::setRegister(%d, 0x%08X)\n", regNum, value);
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__srr0 = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__r1 = value;
+    return;
+  case UNW_PPC_R0:
+    _registers.__r0 = value;
+    return;
+  case UNW_PPC_R1:
+    _registers.__r1 = value;
+    return;
+  case UNW_PPC_R2:
+    _registers.__r2 = value;
+    return;
+  case UNW_PPC_R3:
+    _registers.__r3 = value;
+    return;
+  case UNW_PPC_R4:
+    _registers.__r4 = value;
+    return;
+  case UNW_PPC_R5:
+    _registers.__r5 = value;
+    return;
+  case UNW_PPC_R6:
+    _registers.__r6 = value;
+    return;
+  case UNW_PPC_R7:
+    _registers.__r7 = value;
+    return;
+  case UNW_PPC_R8:
+    _registers.__r8 = value;
+    return;
+  case UNW_PPC_R9:
+    _registers.__r9 = value;
+    return;
+  case UNW_PPC_R10:
+    _registers.__r10 = value;
+    return;
+  case UNW_PPC_R11:
+    _registers.__r11 = value;
+    return;
+  case UNW_PPC_R12:
+    _registers.__r12 = value;
+    return;
+  case UNW_PPC_R13:
+    _registers.__r13 = value;
+    return;
+  case UNW_PPC_R14:
+    _registers.__r14 = value;
+    return;
+  case UNW_PPC_R15:
+    _registers.__r15 = value;
+    return;
+  case UNW_PPC_R16:
+    _registers.__r16 = value;
+    return;
+  case UNW_PPC_R17:
+    _registers.__r17 = value;
+    return;
+  case UNW_PPC_R18:
+    _registers.__r18 = value;
+    return;
+  case UNW_PPC_R19:
+    _registers.__r19 = value;
+    return;
+  case UNW_PPC_R20:
+    _registers.__r20 = value;
+    return;
+  case UNW_PPC_R21:
+    _registers.__r21 = value;
+    return;
+  case UNW_PPC_R22:
+    _registers.__r22 = value;
+    return;
+  case UNW_PPC_R23:
+    _registers.__r23 = value;
+    return;
+  case UNW_PPC_R24:
+    _registers.__r24 = value;
+    return;
+  case UNW_PPC_R25:
+    _registers.__r25 = value;
+    return;
+  case UNW_PPC_R26:
+    _registers.__r26 = value;
+    return;
+  case UNW_PPC_R27:
+    _registers.__r27 = value;
+    return;
+  case UNW_PPC_R28:
+    _registers.__r28 = value;
+    return;
+  case UNW_PPC_R29:
+    _registers.__r29 = value;
+    return;
+  case UNW_PPC_R30:
+    _registers.__r30 = value;
+    return;
+  case UNW_PPC_R31:
+    _registers.__r31 = value;
+    return;
+  case UNW_PPC_MQ:
+    _registers.__mq = value;
+    return;
+  case UNW_PPC_LR:
+    _registers.__lr = value;
+    return;
+  case UNW_PPC_CTR:
+    _registers.__ctr = value;
+    return;
+  case UNW_PPC_CR0:
+    _registers.__cr &= 0x0FFFFFFF;
+    _registers.__cr |= (value & 0xF0000000);
+    return;
+  case UNW_PPC_CR1:
+    _registers.__cr &= 0xF0FFFFFF;
+    _registers.__cr |= (value & 0x0F000000);
+    return;
+  case UNW_PPC_CR2:
+    _registers.__cr &= 0xFF0FFFFF;
+    _registers.__cr |= (value & 0x00F00000);
+    return;
+  case UNW_PPC_CR3:
+    _registers.__cr &= 0xFFF0FFFF;
+    _registers.__cr |= (value & 0x000F0000);
+    return;
+  case UNW_PPC_CR4:
+    _registers.__cr &= 0xFFFF0FFF;
+    _registers.__cr |= (value & 0x0000F000);
+    return;
+  case UNW_PPC_CR5:
+    _registers.__cr &= 0xFFFFF0FF;
+    _registers.__cr |= (value & 0x00000F00);
+    return;
+  case UNW_PPC_CR6:
+    _registers.__cr &= 0xFFFFFF0F;
+    _registers.__cr |= (value & 0x000000F0);
+    return;
+  case UNW_PPC_CR7:
+    _registers.__cr &= 0xFFFFFFF0;
+    _registers.__cr |= (value & 0x0000000F);
+    return;
+  case UNW_PPC_VRSAVE:
+    _registers.__vrsave = value;
+    return;
+    // not saved
+    return;
+  case UNW_PPC_XER:
+    _registers.__xer = value;
+    return;
+  case UNW_PPC_AP:
+  case UNW_PPC_VSCR:
+  case UNW_PPC_SPEFSCR:
+    // not saved
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported ppc register");
+}
+
+inline bool Registers_ppc::validFloatRegister(int regNum) const {
+  if (regNum < UNW_PPC_F0)
+    return false;
+  if (regNum > UNW_PPC_F31)
+    return false;
+  return true;
+}
+
+inline double Registers_ppc::getFloatRegister(int regNum) const {
+  assert(validFloatRegister(regNum));
+  return _floatRegisters.__fpregs[regNum - UNW_PPC_F0];
+}
+
+inline void Registers_ppc::setFloatRegister(int regNum, double value) {
+  assert(validFloatRegister(regNum));
+  _floatRegisters.__fpregs[regNum - UNW_PPC_F0] = value;
+}
+
+inline bool Registers_ppc::validVectorRegister(int regNum) const {
+  if (regNum < UNW_PPC_V0)
+    return false;
+  if (regNum > UNW_PPC_V31)
+    return false;
+  return true;
+}
+
+inline v128 Registers_ppc::getVectorRegister(int regNum) const {
+  assert(validVectorRegister(regNum));
+  v128 result = _vectorRegisters[regNum - UNW_PPC_V0];
+  return result;
+}
+
+inline void Registers_ppc::setVectorRegister(int regNum, v128 value) {
+  assert(validVectorRegister(regNum));
+  _vectorRegisters[regNum - UNW_PPC_V0] = value;
+}
+
+inline const char *Registers_ppc::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "ip";
+  case UNW_REG_SP:
+    return "sp";
+  case UNW_PPC_R0:
+    return "r0";
+  case UNW_PPC_R1:
+    return "r1";
+  case UNW_PPC_R2:
+    return "r2";
+  case UNW_PPC_R3:
+    return "r3";
+  case UNW_PPC_R4:
+    return "r4";
+  case UNW_PPC_R5:
+    return "r5";
+  case UNW_PPC_R6:
+    return "r6";
+  case UNW_PPC_R7:
+    return "r7";
+  case UNW_PPC_R8:
+    return "r8";
+  case UNW_PPC_R9:
+    return "r9";
+  case UNW_PPC_R10:
+    return "r10";
+  case UNW_PPC_R11:
+    return "r11";
+  case UNW_PPC_R12:
+    return "r12";
+  case UNW_PPC_R13:
+    return "r13";
+  case UNW_PPC_R14:
+    return "r14";
+  case UNW_PPC_R15:
+    return "r15";
+  case UNW_PPC_R16:
+    return "r16";
+  case UNW_PPC_R17:
+    return "r17";
+  case UNW_PPC_R18:
+    return "r18";
+  case UNW_PPC_R19:
+    return "r19";
+  case UNW_PPC_R20:
+    return "r20";
+  case UNW_PPC_R21:
+    return "r21";
+  case UNW_PPC_R22:
+    return "r22";
+  case UNW_PPC_R23:
+    return "r23";
+  case UNW_PPC_R24:
+    return "r24";
+  case UNW_PPC_R25:
+    return "r25";
+  case UNW_PPC_R26:
+    return "r26";
+  case UNW_PPC_R27:
+    return "r27";
+  case UNW_PPC_R28:
+    return "r28";
+  case UNW_PPC_R29:
+    return "r29";
+  case UNW_PPC_R30:
+    return "r30";
+  case UNW_PPC_R31:
+    return "r31";
+  case UNW_PPC_F0:
+    return "fp0";
+  case UNW_PPC_F1:
+    return "fp1";
+  case UNW_PPC_F2:
+    return "fp2";
+  case UNW_PPC_F3:
+    return "fp3";
+  case UNW_PPC_F4:
+    return "fp4";
+  case UNW_PPC_F5:
+    return "fp5";
+  case UNW_PPC_F6:
+    return "fp6";
+  case UNW_PPC_F7:
+    return "fp7";
+  case UNW_PPC_F8:
+    return "fp8";
+  case UNW_PPC_F9:
+    return "fp9";
+  case UNW_PPC_F10:
+    return "fp10";
+  case UNW_PPC_F11:
+    return "fp11";
+  case UNW_PPC_F12:
+    return "fp12";
+  case UNW_PPC_F13:
+    return "fp13";
+  case UNW_PPC_F14:
+    return "fp14";
+  case UNW_PPC_F15:
+    return "fp15";
+  case UNW_PPC_F16:
+    return "fp16";
+  case UNW_PPC_F17:
+    return "fp17";
+  case UNW_PPC_F18:
+    return "fp18";
+  case UNW_PPC_F19:
+    return "fp19";
+  case UNW_PPC_F20:
+    return "fp20";
+  case UNW_PPC_F21:
+    return "fp21";
+  case UNW_PPC_F22:
+    return "fp22";
+  case UNW_PPC_F23:
+    return "fp23";
+  case UNW_PPC_F24:
+    return "fp24";
+  case UNW_PPC_F25:
+    return "fp25";
+  case UNW_PPC_F26:
+    return "fp26";
+  case UNW_PPC_F27:
+    return "fp27";
+  case UNW_PPC_F28:
+    return "fp28";
+  case UNW_PPC_F29:
+    return "fp29";
+  case UNW_PPC_F30:
+    return "fp30";
+  case UNW_PPC_F31:
+    return "fp31";
+  case UNW_PPC_LR:
+    return "lr";
+  default:
+    return "unknown register";
+  }
+
+}
 #endif // _LIBUNWIND_TARGET_PPC
- 
+
 #if defined(_LIBUNWIND_TARGET_PPC64)
 /// Registers_ppc64 holds the register state of a thread in a 64-bit PowerPC
 /// process.
@@ -1156,7 +1156,7 @@ class _LIBUNWIND_HIDDEN Registers_ppc64 {
 public:
   Registers_ppc64();
   Registers_ppc64(const void *registers);
- 
+
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
   void        setRegister(int num, uint64_t value);
@@ -1793,91 +1793,91 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) {
 
 
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-/// Registers_arm64  holds the register state of a thread in a 64-bit arm 
-/// process. 
+/// Registers_arm64  holds the register state of a thread in a 64-bit arm
+/// process.
 class _LIBUNWIND_HIDDEN Registers_arm64;
 extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
-class _LIBUNWIND_HIDDEN Registers_arm64 { 
-public: 
-  Registers_arm64(); 
-  Registers_arm64(const void *registers); 
- 
-  bool        validRegister(int num) const; 
-  uint64_t    getRegister(int num) const; 
-  void        setRegister(int num, uint64_t value); 
-  bool        validFloatRegister(int num) const; 
-  double      getFloatRegister(int num) const; 
-  void        setFloatRegister(int num, double value); 
-  bool        validVectorRegister(int num) const; 
-  v128        getVectorRegister(int num) const; 
-  void        setVectorRegister(int num, v128 value); 
+class _LIBUNWIND_HIDDEN Registers_arm64 {
+public:
+  Registers_arm64();
+  Registers_arm64(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
   void        jumpto() { __libunwind_Registers_arm64_jumpto(this); }
   static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; }
   static int  getArch() { return REGISTERS_ARM64; }
- 
-  uint64_t  getSP() const         { return _registers.__sp; } 
-  void      setSP(uint64_t value) { _registers.__sp = value; } 
-  uint64_t  getIP() const         { return _registers.__pc; } 
-  void      setIP(uint64_t value) { _registers.__pc = value; } 
-  uint64_t  getFP() const         { return _registers.__fp; } 
-  void      setFP(uint64_t value) { _registers.__fp = value; } 
- 
-private: 
-  struct GPRs { 
-    uint64_t __x[29]; // x0-x28 
-    uint64_t __fp;    // Frame pointer x29 
-    uint64_t __lr;    // Link register x30 
-    uint64_t __sp;    // Stack pointer x31 
-    uint64_t __pc;    // Program counter 
+
+  uint64_t  getSP() const         { return _registers.__sp; }
+  void      setSP(uint64_t value) { _registers.__sp = value; }
+  uint64_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint64_t value) { _registers.__pc = value; }
+  uint64_t  getFP() const         { return _registers.__fp; }
+  void      setFP(uint64_t value) { _registers.__fp = value; }
+
+private:
+  struct GPRs {
+    uint64_t __x[29]; // x0-x28
+    uint64_t __fp;    // Frame pointer x29
+    uint64_t __lr;    // Link register x30
+    uint64_t __sp;    // Stack pointer x31
+    uint64_t __pc;    // Program counter
     uint64_t __ra_sign_state; // RA sign state register
-  }; 
- 
-  GPRs    _registers; 
-  double  _vectorHalfRegisters[32]; 
-  // Currently only the lower double in 128-bit vectore registers 
-  // is perserved during unwinding.  We could define new register 
-  // numbers (> 96) which mean whole vector registers, then this 
-  // struct would need to change to contain whole vector registers. 
-}; 
- 
-inline Registers_arm64::Registers_arm64(const void *registers) { 
+  };
+
+  GPRs    _registers;
+  double  _vectorHalfRegisters[32];
+  // Currently only the lower double in 128-bit vectore registers
+  // is perserved during unwinding.  We could define new register
+  // numbers (> 96) which mean whole vector registers, then this
+  // struct would need to change to contain whole vector registers.
+};
+
+inline Registers_arm64::Registers_arm64(const void *registers) {
   static_assert((check_fit<Registers_arm64, unw_context_t>::does_fit),
                 "arm64 registers do not fit into unw_context_t");
-  memcpy(&_registers, registers, sizeof(_registers)); 
-  static_assert(sizeof(GPRs) == 0x110, 
-                "expected VFP registers to be at offset 272"); 
-  memcpy(_vectorHalfRegisters, 
-         static_cast<const uint8_t *>(registers) + sizeof(GPRs), 
-         sizeof(_vectorHalfRegisters)); 
-} 
- 
-inline Registers_arm64::Registers_arm64() { 
-  memset(&_registers, 0, sizeof(_registers)); 
-  memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters)); 
-} 
- 
-inline bool Registers_arm64::validRegister(int regNum) const { 
-  if (regNum == UNW_REG_IP) 
-    return true; 
-  if (regNum == UNW_REG_SP) 
-    return true; 
-  if (regNum < 0) 
-    return false; 
-  if (regNum > 95) 
-    return false; 
+  memcpy(&_registers, registers, sizeof(_registers));
+  static_assert(sizeof(GPRs) == 0x110,
+                "expected VFP registers to be at offset 272");
+  memcpy(_vectorHalfRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(GPRs),
+         sizeof(_vectorHalfRegisters));
+}
+
+inline Registers_arm64::Registers_arm64() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters));
+}
+
+inline bool Registers_arm64::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum > 95)
+    return false;
   if (regNum == UNW_AARCH64_RA_SIGN_STATE)
     return true;
   if ((regNum > 32) && (regNum < 64))
-    return false; 
-  return true; 
-} 
- 
-inline uint64_t Registers_arm64::getRegister(int regNum) const { 
+    return false;
+  return true;
+}
+
+inline uint64_t Registers_arm64::getRegister(int regNum) const {
   if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC)
-    return _registers.__pc; 
+    return _registers.__pc;
   if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP)
-    return _registers.__sp; 
+    return _registers.__sp;
   if (regNum == UNW_AARCH64_RA_SIGN_STATE)
     return _registers.__ra_sign_state;
   if (regNum == UNW_AARCH64_FP)
@@ -1885,15 +1885,15 @@ inline uint64_t Registers_arm64::getRegister(int regNum) const {
   if (regNum == UNW_AARCH64_LR)
     return _registers.__lr;
   if ((regNum >= 0) && (regNum < 29))
-    return _registers.__x[regNum]; 
-  _LIBUNWIND_ABORT("unsupported arm64 register"); 
-} 
- 
-inline void Registers_arm64::setRegister(int regNum, uint64_t value) { 
+    return _registers.__x[regNum];
+  _LIBUNWIND_ABORT("unsupported arm64 register");
+}
+
+inline void Registers_arm64::setRegister(int regNum, uint64_t value) {
   if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC)
-    _registers.__pc = value; 
+    _registers.__pc = value;
   else if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP)
-    _registers.__sp = value; 
+    _registers.__sp = value;
   else if (regNum == UNW_AARCH64_RA_SIGN_STATE)
     _registers.__ra_sign_state = value;
   else if (regNum == UNW_AARCH64_FP)
@@ -1901,246 +1901,246 @@ inline void Registers_arm64::setRegister(int regNum, uint64_t value) {
   else if (regNum == UNW_AARCH64_LR)
     _registers.__lr = value;
   else if ((regNum >= 0) && (regNum < 29))
-    _registers.__x[regNum] = value; 
-  else 
-    _LIBUNWIND_ABORT("unsupported arm64 register"); 
-} 
- 
-inline const char *Registers_arm64::getRegisterName(int regNum) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-    return "pc"; 
-  case UNW_REG_SP: 
-    return "sp"; 
+    _registers.__x[regNum] = value;
+  else
+    _LIBUNWIND_ABORT("unsupported arm64 register");
+}
+
+inline const char *Registers_arm64::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "pc";
+  case UNW_REG_SP:
+    return "sp";
   case UNW_AARCH64_X0:
-    return "x0"; 
+    return "x0";
   case UNW_AARCH64_X1:
-    return "x1"; 
+    return "x1";
   case UNW_AARCH64_X2:
-    return "x2"; 
+    return "x2";
   case UNW_AARCH64_X3:
-    return "x3"; 
+    return "x3";
   case UNW_AARCH64_X4:
-    return "x4"; 
+    return "x4";
   case UNW_AARCH64_X5:
-    return "x5"; 
+    return "x5";
   case UNW_AARCH64_X6:
-    return "x6"; 
+    return "x6";
   case UNW_AARCH64_X7:
-    return "x7"; 
+    return "x7";
   case UNW_AARCH64_X8:
-    return "x8"; 
+    return "x8";
   case UNW_AARCH64_X9:
-    return "x9"; 
+    return "x9";
   case UNW_AARCH64_X10:
-    return "x10"; 
+    return "x10";
   case UNW_AARCH64_X11:
-    return "x11"; 
+    return "x11";
   case UNW_AARCH64_X12:
-    return "x12"; 
+    return "x12";
   case UNW_AARCH64_X13:
-    return "x13"; 
+    return "x13";
   case UNW_AARCH64_X14:
-    return "x14"; 
+    return "x14";
   case UNW_AARCH64_X15:
-    return "x15"; 
+    return "x15";
   case UNW_AARCH64_X16:
-    return "x16"; 
+    return "x16";
   case UNW_AARCH64_X17:
-    return "x17"; 
+    return "x17";
   case UNW_AARCH64_X18:
-    return "x18"; 
+    return "x18";
   case UNW_AARCH64_X19:
-    return "x19"; 
+    return "x19";
   case UNW_AARCH64_X20:
-    return "x20"; 
+    return "x20";
   case UNW_AARCH64_X21:
-    return "x21"; 
+    return "x21";
   case UNW_AARCH64_X22:
-    return "x22"; 
+    return "x22";
   case UNW_AARCH64_X23:
-    return "x23"; 
+    return "x23";
   case UNW_AARCH64_X24:
-    return "x24"; 
+    return "x24";
   case UNW_AARCH64_X25:
-    return "x25"; 
+    return "x25";
   case UNW_AARCH64_X26:
-    return "x26"; 
+    return "x26";
   case UNW_AARCH64_X27:
-    return "x27"; 
+    return "x27";
   case UNW_AARCH64_X28:
-    return "x28"; 
+    return "x28";
   case UNW_AARCH64_FP:
-    return "fp"; 
+    return "fp";
   case UNW_AARCH64_LR:
-    return "lr"; 
+    return "lr";
   case UNW_AARCH64_SP:
-    return "sp"; 
+    return "sp";
   case UNW_AARCH64_PC:
     return "pc";
   case UNW_AARCH64_V0:
-    return "d0"; 
+    return "d0";
   case UNW_AARCH64_V1:
-    return "d1"; 
+    return "d1";
   case UNW_AARCH64_V2:
-    return "d2"; 
+    return "d2";
   case UNW_AARCH64_V3:
-    return "d3"; 
+    return "d3";
   case UNW_AARCH64_V4:
-    return "d4"; 
+    return "d4";
   case UNW_AARCH64_V5:
-    return "d5"; 
+    return "d5";
   case UNW_AARCH64_V6:
-    return "d6"; 
+    return "d6";
   case UNW_AARCH64_V7:
-    return "d7"; 
+    return "d7";
   case UNW_AARCH64_V8:
-    return "d8"; 
+    return "d8";
   case UNW_AARCH64_V9:
-    return "d9"; 
+    return "d9";
   case UNW_AARCH64_V10:
-    return "d10"; 
+    return "d10";
   case UNW_AARCH64_V11:
-    return "d11"; 
+    return "d11";
   case UNW_AARCH64_V12:
-    return "d12"; 
+    return "d12";
   case UNW_AARCH64_V13:
-    return "d13"; 
+    return "d13";
   case UNW_AARCH64_V14:
-    return "d14"; 
+    return "d14";
   case UNW_AARCH64_V15:
-    return "d15"; 
+    return "d15";
   case UNW_AARCH64_V16:
-    return "d16"; 
+    return "d16";
   case UNW_AARCH64_V17:
-    return "d17"; 
+    return "d17";
   case UNW_AARCH64_V18:
-    return "d18"; 
+    return "d18";
   case UNW_AARCH64_V19:
-    return "d19"; 
+    return "d19";
   case UNW_AARCH64_V20:
-    return "d20"; 
+    return "d20";
   case UNW_AARCH64_V21:
-    return "d21"; 
+    return "d21";
   case UNW_AARCH64_V22:
-    return "d22"; 
+    return "d22";
   case UNW_AARCH64_V23:
-    return "d23"; 
+    return "d23";
   case UNW_AARCH64_V24:
-    return "d24"; 
+    return "d24";
   case UNW_AARCH64_V25:
-    return "d25"; 
+    return "d25";
   case UNW_AARCH64_V26:
-    return "d26"; 
+    return "d26";
   case UNW_AARCH64_V27:
-    return "d27"; 
+    return "d27";
   case UNW_AARCH64_V28:
-    return "d28"; 
+    return "d28";
   case UNW_AARCH64_V29:
-    return "d29"; 
+    return "d29";
   case UNW_AARCH64_V30:
-    return "d30"; 
+    return "d30";
   case UNW_AARCH64_V31:
-    return "d31"; 
-  default: 
-    return "unknown register"; 
-  } 
-} 
- 
-inline bool Registers_arm64::validFloatRegister(int regNum) const { 
+    return "d31";
+  default:
+    return "unknown register";
+  }
+}
+
+inline bool Registers_arm64::validFloatRegister(int regNum) const {
   if (regNum < UNW_AARCH64_V0)
-    return false; 
+    return false;
   if (regNum > UNW_AARCH64_V31)
-    return false; 
-  return true; 
-} 
- 
-inline double Registers_arm64::getFloatRegister(int regNum) const { 
-  assert(validFloatRegister(regNum)); 
+    return false;
+  return true;
+}
+
+inline double Registers_arm64::getFloatRegister(int regNum) const {
+  assert(validFloatRegister(regNum));
   return _vectorHalfRegisters[regNum - UNW_AARCH64_V0];
-} 
- 
-inline void Registers_arm64::setFloatRegister(int regNum, double value) { 
-  assert(validFloatRegister(regNum)); 
+}
+
+inline void Registers_arm64::setFloatRegister(int regNum, double value) {
+  assert(validFloatRegister(regNum));
   _vectorHalfRegisters[regNum - UNW_AARCH64_V0] = value;
-} 
- 
-inline bool Registers_arm64::validVectorRegister(int) const { 
-  return false; 
-} 
- 
-inline v128 Registers_arm64::getVectorRegister(int) const { 
-  _LIBUNWIND_ABORT("no arm64 vector register support yet"); 
-} 
- 
-inline void Registers_arm64::setVectorRegister(int, v128) { 
-  _LIBUNWIND_ABORT("no arm64 vector register support yet"); 
-} 
+}
+
+inline bool Registers_arm64::validVectorRegister(int) const {
+  return false;
+}
+
+inline v128 Registers_arm64::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("no arm64 vector register support yet");
+}
+
+inline void Registers_arm64::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("no arm64 vector register support yet");
+}
 #endif // _LIBUNWIND_TARGET_AARCH64
- 
+
 #if defined(_LIBUNWIND_TARGET_ARM)
-/// Registers_arm holds the register state of a thread in a 32-bit arm 
-/// process. 
-/// 
-/// NOTE: Assumes VFPv3. On ARM processors without a floating point unit, 
-/// this uses more memory than required. 
-class _LIBUNWIND_HIDDEN Registers_arm { 
-public: 
-  Registers_arm(); 
-  Registers_arm(const void *registers); 
- 
-  bool        validRegister(int num) const; 
+/// Registers_arm holds the register state of a thread in a 32-bit arm
+/// process.
+///
+/// NOTE: Assumes VFPv3. On ARM processors without a floating point unit,
+/// this uses more memory than required.
+class _LIBUNWIND_HIDDEN Registers_arm {
+public:
+  Registers_arm();
+  Registers_arm(const void *registers);
+
+  bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
-  void        setRegister(int num, uint32_t value); 
-  bool        validFloatRegister(int num) const; 
-  unw_fpreg_t getFloatRegister(int num); 
-  void        setFloatRegister(int num, unw_fpreg_t value); 
-  bool        validVectorRegister(int num) const; 
-  v128        getVectorRegister(int num) const; 
-  void        setVectorRegister(int num, v128 value); 
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  unw_fpreg_t getFloatRegister(int num);
+  void        setFloatRegister(int num, unw_fpreg_t value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
-  void        jumpto() { 
-    restoreSavedFloatRegisters(); 
-    restoreCoreAndJumpTo(); 
-  } 
+  void        jumpto() {
+    restoreSavedFloatRegisters();
+    restoreCoreAndJumpTo();
+  }
   static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; }
   static int  getArch() { return REGISTERS_ARM; }
- 
-  uint32_t  getSP() const         { return _registers.__sp; } 
-  void      setSP(uint32_t value) { _registers.__sp = value; } 
-  uint32_t  getIP() const         { return _registers.__pc; } 
-  void      setIP(uint32_t value) { _registers.__pc = value; } 
- 
-  void saveVFPAsX() { 
-    assert(_use_X_for_vfp_save || !_saved_vfp_d0_d15); 
-    _use_X_for_vfp_save = true; 
-  } 
- 
-  void restoreSavedFloatRegisters() { 
-    if (_saved_vfp_d0_d15) { 
-      if (_use_X_for_vfp_save) 
-        restoreVFPWithFLDMX(_vfp_d0_d15_pad); 
-      else 
-        restoreVFPWithFLDMD(_vfp_d0_d15_pad); 
-    } 
-    if (_saved_vfp_d16_d31) 
-      restoreVFPv3(_vfp_d16_d31); 
+
+  uint32_t  getSP() const         { return _registers.__sp; }
+  void      setSP(uint32_t value) { _registers.__sp = value; }
+  uint32_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint32_t value) { _registers.__pc = value; }
+
+  void saveVFPAsX() {
+    assert(_use_X_for_vfp_save || !_saved_vfp_d0_d15);
+    _use_X_for_vfp_save = true;
+  }
+
+  void restoreSavedFloatRegisters() {
+    if (_saved_vfp_d0_d15) {
+      if (_use_X_for_vfp_save)
+        restoreVFPWithFLDMX(_vfp_d0_d15_pad);
+      else
+        restoreVFPWithFLDMD(_vfp_d0_d15_pad);
+    }
+    if (_saved_vfp_d16_d31)
+      restoreVFPv3(_vfp_d16_d31);
 #if defined(__ARM_WMMX)
-    if (_saved_iwmmx) 
-      restoreiWMMX(_iwmmx); 
-    if (_saved_iwmmx_control) 
-      restoreiWMMXControl(_iwmmx_control); 
+    if (_saved_iwmmx)
+      restoreiWMMX(_iwmmx);
+    if (_saved_iwmmx_control)
+      restoreiWMMXControl(_iwmmx_control);
 #endif
-  } 
- 
-private: 
-  struct GPRs { 
-    uint32_t __r[13]; // r0-r12 
-    uint32_t __sp;    // Stack pointer r13 
-    uint32_t __lr;    // Link register r14 
-    uint32_t __pc;    // Program counter r15 
-  }; 
- 
+  }
+
+private:
+  struct GPRs {
+    uint32_t __r[13]; // r0-r12
+    uint32_t __sp;    // Stack pointer r13
+    uint32_t __lr;    // Link register r14
+    uint32_t __pc;    // Program counter r15
+  };
+
   struct PseudoRegisters {
     uint32_t __pac; // Return Authentication Code (PAC)
   };
@@ -2153,94 +2153,94 @@ private:
   static void restoreVFPv3(void*);
 #if defined(__ARM_WMMX)
   static void saveiWMMX(void*);
-  static void saveiWMMXControl(uint32_t*); 
+  static void saveiWMMXControl(uint32_t*);
   static void restoreiWMMX(void*);
-  static void restoreiWMMXControl(uint32_t*); 
+  static void restoreiWMMXControl(uint32_t*);
 #endif
-  void restoreCoreAndJumpTo(); 
- 
-  // ARM registers 
-  GPRs _registers; 
+  void restoreCoreAndJumpTo();
+
+  // ARM registers
+  GPRs _registers;
   PseudoRegisters _pseudo_registers;
- 
-  // We save floating point registers lazily because we can't know ahead of 
-  // time which ones are used. See EHABI #4.7. 
- 
-  // Whether D0-D15 are saved in the FTSMX instead of FSTMD format. 
-  // 
-  // See EHABI #7.5 that explains how matching instruction sequences for load 
-  // and store need to be used to correctly restore the exact register bits. 
-  bool _use_X_for_vfp_save; 
-  // Whether VFP D0-D15 are saved. 
-  bool _saved_vfp_d0_d15; 
-  // Whether VFPv3 D16-D31 are saved. 
-  bool _saved_vfp_d16_d31; 
-  // VFP registers D0-D15, + padding if saved using FSTMX 
-  unw_fpreg_t _vfp_d0_d15_pad[17]; 
-  // VFPv3 registers D16-D31, always saved using FSTMD 
-  unw_fpreg_t _vfp_d16_d31[16]; 
+
+  // We save floating point registers lazily because we can't know ahead of
+  // time which ones are used. See EHABI #4.7.
+
+  // Whether D0-D15 are saved in the FTSMX instead of FSTMD format.
+  //
+  // See EHABI #7.5 that explains how matching instruction sequences for load
+  // and store need to be used to correctly restore the exact register bits.
+  bool _use_X_for_vfp_save;
+  // Whether VFP D0-D15 are saved.
+  bool _saved_vfp_d0_d15;
+  // Whether VFPv3 D16-D31 are saved.
+  bool _saved_vfp_d16_d31;
+  // VFP registers D0-D15, + padding if saved using FSTMX
+  unw_fpreg_t _vfp_d0_d15_pad[17];
+  // VFPv3 registers D16-D31, always saved using FSTMD
+  unw_fpreg_t _vfp_d16_d31[16];
 #if defined(__ARM_WMMX)
   // Whether iWMMX data registers are saved.
   bool _saved_iwmmx;
   // Whether iWMMX control registers are saved.
   mutable bool _saved_iwmmx_control;
-  // iWMMX registers 
-  unw_fpreg_t _iwmmx[16]; 
-  // iWMMX control registers 
+  // iWMMX registers
+  unw_fpreg_t _iwmmx[16];
+  // iWMMX control registers
   mutable uint32_t _iwmmx_control[4];
 #endif
-}; 
- 
-inline Registers_arm::Registers_arm(const void *registers) 
-  : _use_X_for_vfp_save(false), 
-    _saved_vfp_d0_d15(false), 
+};
+
+inline Registers_arm::Registers_arm(const void *registers)
+  : _use_X_for_vfp_save(false),
+    _saved_vfp_d0_d15(false),
     _saved_vfp_d16_d31(false) {
   static_assert((check_fit<Registers_arm, unw_context_t>::does_fit),
                 "arm registers do not fit into unw_context_t");
   // See __unw_getcontext() note about data.
-  memcpy(&_registers, registers, sizeof(_registers)); 
+  memcpy(&_registers, registers, sizeof(_registers));
   memset(&_pseudo_registers, 0, sizeof(_pseudo_registers));
-  memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); 
-  memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); 
+  memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad));
+  memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31));
 #if defined(__ARM_WMMX)
   _saved_iwmmx = false;
   _saved_iwmmx_control = false;
-  memset(&_iwmmx, 0, sizeof(_iwmmx)); 
-  memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); 
+  memset(&_iwmmx, 0, sizeof(_iwmmx));
+  memset(&_iwmmx_control, 0, sizeof(_iwmmx_control));
 #endif
-} 
- 
-inline Registers_arm::Registers_arm() 
-  : _use_X_for_vfp_save(false), 
-    _saved_vfp_d0_d15(false), 
+}
+
+inline Registers_arm::Registers_arm()
+  : _use_X_for_vfp_save(false),
+    _saved_vfp_d0_d15(false),
     _saved_vfp_d16_d31(false) {
-  memset(&_registers, 0, sizeof(_registers)); 
+  memset(&_registers, 0, sizeof(_registers));
   memset(&_pseudo_registers, 0, sizeof(_pseudo_registers));
-  memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); 
-  memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); 
+  memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad));
+  memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31));
 #if defined(__ARM_WMMX)
   _saved_iwmmx = false;
   _saved_iwmmx_control = false;
-  memset(&_iwmmx, 0, sizeof(_iwmmx)); 
-  memset(&_iwmmx_control, 0, sizeof(_iwmmx_control)); 
+  memset(&_iwmmx, 0, sizeof(_iwmmx));
+  memset(&_iwmmx_control, 0, sizeof(_iwmmx_control));
 #endif
-} 
- 
-inline bool Registers_arm::validRegister(int regNum) const { 
-  // Returns true for all non-VFP registers supported by the EHABI 
-  // virtual register set (VRS). 
-  if (regNum == UNW_REG_IP) 
-    return true; 
+}
 
-  if (regNum == UNW_REG_SP) 
-    return true; 
+inline bool Registers_arm::validRegister(int regNum) const {
+  // Returns true for all non-VFP registers supported by the EHABI
+  // virtual register set (VRS).
+  if (regNum == UNW_REG_IP)
+    return true;
 
-  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) 
-    return true; 
+  if (regNum == UNW_REG_SP)
+    return true;
+
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15)
+    return true;
 
 #if defined(__ARM_WMMX)
-  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) 
-    return true; 
+  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3)
+    return true;
 #endif
 
 #ifdef __ARM_FEATURE_PAUTH
@@ -2248,30 +2248,30 @@ inline bool Registers_arm::validRegister(int regNum) const {
     return true;
 #endif
 
-  return false; 
-} 
- 
+  return false;
+}
+
 inline uint32_t Registers_arm::getRegister(int regNum) const {
-  if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) 
-    return _registers.__sp; 
+  if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP)
+    return _registers.__sp;
 
-  if (regNum == UNW_ARM_LR) 
-    return _registers.__lr; 
+  if (regNum == UNW_ARM_LR)
+    return _registers.__lr;
 
-  if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) 
-    return _registers.__pc; 
+  if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP)
+    return _registers.__pc;
 
-  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) 
-    return _registers.__r[regNum]; 
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12)
+    return _registers.__r[regNum];
 
 #if defined(__ARM_WMMX)
-  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) { 
-    if (!_saved_iwmmx_control) { 
-      _saved_iwmmx_control = true; 
-      saveiWMMXControl(_iwmmx_control); 
-    } 
-    return _iwmmx_control[regNum - UNW_ARM_WC0]; 
-  } 
+  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) {
+    if (!_saved_iwmmx_control) {
+      _saved_iwmmx_control = true;
+      saveiWMMXControl(_iwmmx_control);
+    }
+    return _iwmmx_control[regNum - UNW_ARM_WC0];
+  }
 #endif
 
 #ifdef __ARM_FEATURE_PAUTH
@@ -2279,37 +2279,37 @@ inline uint32_t Registers_arm::getRegister(int regNum) const {
     return _pseudo_registers.__pac;
 #endif
 
-  _LIBUNWIND_ABORT("unsupported arm register"); 
-} 
- 
-inline void Registers_arm::setRegister(int regNum, uint32_t value) { 
+  _LIBUNWIND_ABORT("unsupported arm register");
+}
+
+inline void Registers_arm::setRegister(int regNum, uint32_t value) {
   if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) {
-    _registers.__sp = value; 
+    _registers.__sp = value;
     return;
   }
 
   if (regNum == UNW_ARM_LR) {
-    _registers.__lr = value; 
+    _registers.__lr = value;
     return;
   }
 
   if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) {
-    _registers.__pc = value; 
+    _registers.__pc = value;
     return;
   }
 
   if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) {
-    _registers.__r[regNum] = value; 
+    _registers.__r[regNum] = value;
     return;
   }
 
 #if defined(__ARM_WMMX)
   if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) {
-    if (!_saved_iwmmx_control) { 
-      _saved_iwmmx_control = true; 
-      saveiWMMXControl(_iwmmx_control); 
-    } 
-    _iwmmx_control[regNum - UNW_ARM_WC0] = value; 
+    if (!_saved_iwmmx_control) {
+      _saved_iwmmx_control = true;
+      saveiWMMXControl(_iwmmx_control);
+    }
+    _iwmmx_control[regNum - UNW_ARM_WC0] = value;
     return;
   }
 #endif
@@ -2320,465 +2320,465 @@ inline void Registers_arm::setRegister(int regNum, uint32_t value) {
   }
 
   _LIBUNWIND_ABORT("unsupported arm register");
-} 
- 
-inline const char *Registers_arm::getRegisterName(int regNum) { 
-  switch (regNum) { 
-  case UNW_REG_IP: 
-  case UNW_ARM_IP: // UNW_ARM_R15 is alias 
-    return "pc"; 
-  case UNW_ARM_LR: // UNW_ARM_R14 is alias 
-    return "lr"; 
-  case UNW_REG_SP: 
-  case UNW_ARM_SP: // UNW_ARM_R13 is alias 
-    return "sp"; 
-  case UNW_ARM_R0: 
-    return "r0"; 
-  case UNW_ARM_R1: 
-    return "r1"; 
-  case UNW_ARM_R2: 
-    return "r2"; 
-  case UNW_ARM_R3: 
-    return "r3"; 
-  case UNW_ARM_R4: 
-    return "r4"; 
-  case UNW_ARM_R5: 
-    return "r5"; 
-  case UNW_ARM_R6: 
-    return "r6"; 
-  case UNW_ARM_R7: 
-    return "r7"; 
-  case UNW_ARM_R8: 
-    return "r8"; 
-  case UNW_ARM_R9: 
-    return "r9"; 
-  case UNW_ARM_R10: 
-    return "r10"; 
-  case UNW_ARM_R11: 
-    return "r11"; 
-  case UNW_ARM_R12: 
-    return "r12"; 
-  case UNW_ARM_S0: 
-    return "s0"; 
-  case UNW_ARM_S1: 
-    return "s1"; 
-  case UNW_ARM_S2: 
-    return "s2"; 
-  case UNW_ARM_S3: 
-    return "s3"; 
-  case UNW_ARM_S4: 
-    return "s4"; 
-  case UNW_ARM_S5: 
-    return "s5"; 
-  case UNW_ARM_S6: 
-    return "s6"; 
-  case UNW_ARM_S7: 
-    return "s7"; 
-  case UNW_ARM_S8: 
-    return "s8"; 
-  case UNW_ARM_S9: 
-    return "s9"; 
-  case UNW_ARM_S10: 
-    return "s10"; 
-  case UNW_ARM_S11: 
-    return "s11"; 
-  case UNW_ARM_S12: 
-    return "s12"; 
-  case UNW_ARM_S13: 
-    return "s13"; 
-  case UNW_ARM_S14: 
-    return "s14"; 
-  case UNW_ARM_S15: 
-    return "s15"; 
-  case UNW_ARM_S16: 
-    return "s16"; 
-  case UNW_ARM_S17: 
-    return "s17"; 
-  case UNW_ARM_S18: 
-    return "s18"; 
-  case UNW_ARM_S19: 
-    return "s19"; 
-  case UNW_ARM_S20: 
-    return "s20"; 
-  case UNW_ARM_S21: 
-    return "s21"; 
-  case UNW_ARM_S22: 
-    return "s22"; 
-  case UNW_ARM_S23: 
-    return "s23"; 
-  case UNW_ARM_S24: 
-    return "s24"; 
-  case UNW_ARM_S25: 
-    return "s25"; 
-  case UNW_ARM_S26: 
-    return "s26"; 
-  case UNW_ARM_S27: 
-    return "s27"; 
-  case UNW_ARM_S28: 
-    return "s28"; 
-  case UNW_ARM_S29: 
-    return "s29"; 
-  case UNW_ARM_S30: 
-    return "s30"; 
-  case UNW_ARM_S31: 
-    return "s31"; 
-  case UNW_ARM_D0: 
-    return "d0"; 
-  case UNW_ARM_D1: 
-    return "d1"; 
-  case UNW_ARM_D2: 
-    return "d2"; 
-  case UNW_ARM_D3: 
-    return "d3"; 
-  case UNW_ARM_D4: 
-    return "d4"; 
-  case UNW_ARM_D5: 
-    return "d5"; 
-  case UNW_ARM_D6: 
-    return "d6"; 
-  case UNW_ARM_D7: 
-    return "d7"; 
-  case UNW_ARM_D8: 
-    return "d8"; 
-  case UNW_ARM_D9: 
-    return "d9"; 
-  case UNW_ARM_D10: 
-    return "d10"; 
-  case UNW_ARM_D11: 
-    return "d11"; 
-  case UNW_ARM_D12: 
-    return "d12"; 
-  case UNW_ARM_D13: 
-    return "d13"; 
-  case UNW_ARM_D14: 
-    return "d14"; 
-  case UNW_ARM_D15: 
-    return "d15"; 
-  case UNW_ARM_D16: 
-    return "d16"; 
-  case UNW_ARM_D17: 
-    return "d17"; 
-  case UNW_ARM_D18: 
-    return "d18"; 
-  case UNW_ARM_D19: 
-    return "d19"; 
-  case UNW_ARM_D20: 
-    return "d20"; 
-  case UNW_ARM_D21: 
-    return "d21"; 
-  case UNW_ARM_D22: 
-    return "d22"; 
-  case UNW_ARM_D23: 
-    return "d23"; 
-  case UNW_ARM_D24: 
-    return "d24"; 
-  case UNW_ARM_D25: 
-    return "d25"; 
-  case UNW_ARM_D26: 
-    return "d26"; 
-  case UNW_ARM_D27: 
-    return "d27"; 
-  case UNW_ARM_D28: 
-    return "d28"; 
-  case UNW_ARM_D29: 
-    return "d29"; 
-  case UNW_ARM_D30: 
-    return "d30"; 
-  case UNW_ARM_D31: 
-    return "d31"; 
-  default: 
-    return "unknown register"; 
-  } 
-} 
- 
-inline bool Registers_arm::validFloatRegister(int regNum) const { 
-  // NOTE: Consider the intel MMX registers floating points so the 
+}
+
+inline const char *Registers_arm::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+  case UNW_ARM_IP: // UNW_ARM_R15 is alias
+    return "pc";
+  case UNW_ARM_LR: // UNW_ARM_R14 is alias
+    return "lr";
+  case UNW_REG_SP:
+  case UNW_ARM_SP: // UNW_ARM_R13 is alias
+    return "sp";
+  case UNW_ARM_R0:
+    return "r0";
+  case UNW_ARM_R1:
+    return "r1";
+  case UNW_ARM_R2:
+    return "r2";
+  case UNW_ARM_R3:
+    return "r3";
+  case UNW_ARM_R4:
+    return "r4";
+  case UNW_ARM_R5:
+    return "r5";
+  case UNW_ARM_R6:
+    return "r6";
+  case UNW_ARM_R7:
+    return "r7";
+  case UNW_ARM_R8:
+    return "r8";
+  case UNW_ARM_R9:
+    return "r9";
+  case UNW_ARM_R10:
+    return "r10";
+  case UNW_ARM_R11:
+    return "r11";
+  case UNW_ARM_R12:
+    return "r12";
+  case UNW_ARM_S0:
+    return "s0";
+  case UNW_ARM_S1:
+    return "s1";
+  case UNW_ARM_S2:
+    return "s2";
+  case UNW_ARM_S3:
+    return "s3";
+  case UNW_ARM_S4:
+    return "s4";
+  case UNW_ARM_S5:
+    return "s5";
+  case UNW_ARM_S6:
+    return "s6";
+  case UNW_ARM_S7:
+    return "s7";
+  case UNW_ARM_S8:
+    return "s8";
+  case UNW_ARM_S9:
+    return "s9";
+  case UNW_ARM_S10:
+    return "s10";
+  case UNW_ARM_S11:
+    return "s11";
+  case UNW_ARM_S12:
+    return "s12";
+  case UNW_ARM_S13:
+    return "s13";
+  case UNW_ARM_S14:
+    return "s14";
+  case UNW_ARM_S15:
+    return "s15";
+  case UNW_ARM_S16:
+    return "s16";
+  case UNW_ARM_S17:
+    return "s17";
+  case UNW_ARM_S18:
+    return "s18";
+  case UNW_ARM_S19:
+    return "s19";
+  case UNW_ARM_S20:
+    return "s20";
+  case UNW_ARM_S21:
+    return "s21";
+  case UNW_ARM_S22:
+    return "s22";
+  case UNW_ARM_S23:
+    return "s23";
+  case UNW_ARM_S24:
+    return "s24";
+  case UNW_ARM_S25:
+    return "s25";
+  case UNW_ARM_S26:
+    return "s26";
+  case UNW_ARM_S27:
+    return "s27";
+  case UNW_ARM_S28:
+    return "s28";
+  case UNW_ARM_S29:
+    return "s29";
+  case UNW_ARM_S30:
+    return "s30";
+  case UNW_ARM_S31:
+    return "s31";
+  case UNW_ARM_D0:
+    return "d0";
+  case UNW_ARM_D1:
+    return "d1";
+  case UNW_ARM_D2:
+    return "d2";
+  case UNW_ARM_D3:
+    return "d3";
+  case UNW_ARM_D4:
+    return "d4";
+  case UNW_ARM_D5:
+    return "d5";
+  case UNW_ARM_D6:
+    return "d6";
+  case UNW_ARM_D7:
+    return "d7";
+  case UNW_ARM_D8:
+    return "d8";
+  case UNW_ARM_D9:
+    return "d9";
+  case UNW_ARM_D10:
+    return "d10";
+  case UNW_ARM_D11:
+    return "d11";
+  case UNW_ARM_D12:
+    return "d12";
+  case UNW_ARM_D13:
+    return "d13";
+  case UNW_ARM_D14:
+    return "d14";
+  case UNW_ARM_D15:
+    return "d15";
+  case UNW_ARM_D16:
+    return "d16";
+  case UNW_ARM_D17:
+    return "d17";
+  case UNW_ARM_D18:
+    return "d18";
+  case UNW_ARM_D19:
+    return "d19";
+  case UNW_ARM_D20:
+    return "d20";
+  case UNW_ARM_D21:
+    return "d21";
+  case UNW_ARM_D22:
+    return "d22";
+  case UNW_ARM_D23:
+    return "d23";
+  case UNW_ARM_D24:
+    return "d24";
+  case UNW_ARM_D25:
+    return "d25";
+  case UNW_ARM_D26:
+    return "d26";
+  case UNW_ARM_D27:
+    return "d27";
+  case UNW_ARM_D28:
+    return "d28";
+  case UNW_ARM_D29:
+    return "d29";
+  case UNW_ARM_D30:
+    return "d30";
+  case UNW_ARM_D31:
+    return "d31";
+  default:
+    return "unknown register";
+  }
+}
+
+inline bool Registers_arm::validFloatRegister(int regNum) const {
+  // NOTE: Consider the intel MMX registers floating points so the
   // __unw_get_fpreg can be used to transmit the 64-bit data back.
-  return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31)) 
+  return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31))
 #if defined(__ARM_WMMX)
       || ((regNum >= UNW_ARM_WR0) && (regNum <= UNW_ARM_WR15))
 #endif
       ;
-} 
- 
-inline unw_fpreg_t Registers_arm::getFloatRegister(int regNum) { 
-  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { 
-    if (!_saved_vfp_d0_d15) { 
-      _saved_vfp_d0_d15 = true; 
-      if (_use_X_for_vfp_save) 
-        saveVFPWithFSTMX(_vfp_d0_d15_pad); 
-      else 
-        saveVFPWithFSTMD(_vfp_d0_d15_pad); 
-    } 
-    return _vfp_d0_d15_pad[regNum - UNW_ARM_D0]; 
+}
+
+inline unw_fpreg_t Registers_arm::getFloatRegister(int regNum) {
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) {
+    if (!_saved_vfp_d0_d15) {
+      _saved_vfp_d0_d15 = true;
+      if (_use_X_for_vfp_save)
+        saveVFPWithFSTMX(_vfp_d0_d15_pad);
+      else
+        saveVFPWithFSTMD(_vfp_d0_d15_pad);
+    }
+    return _vfp_d0_d15_pad[regNum - UNW_ARM_D0];
   }
 
   if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) {
-    if (!_saved_vfp_d16_d31) { 
-      _saved_vfp_d16_d31 = true; 
-      saveVFPv3(_vfp_d16_d31); 
-    } 
-    return _vfp_d16_d31[regNum - UNW_ARM_D16]; 
+    if (!_saved_vfp_d16_d31) {
+      _saved_vfp_d16_d31 = true;
+      saveVFPv3(_vfp_d16_d31);
+    }
+    return _vfp_d16_d31[regNum - UNW_ARM_D16];
   }
 
 #if defined(__ARM_WMMX)
   if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) {
-    if (!_saved_iwmmx) { 
-      _saved_iwmmx = true; 
-      saveiWMMX(_iwmmx); 
-    } 
-    return _iwmmx[regNum - UNW_ARM_WR0]; 
-  } 
+    if (!_saved_iwmmx) {
+      _saved_iwmmx = true;
+      saveiWMMX(_iwmmx);
+    }
+    return _iwmmx[regNum - UNW_ARM_WR0];
+  }
 #endif
 
   _LIBUNWIND_ABORT("Unknown ARM float register");
-} 
- 
-inline void Registers_arm::setFloatRegister(int regNum, unw_fpreg_t value) { 
-  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) { 
-    if (!_saved_vfp_d0_d15) { 
-      _saved_vfp_d0_d15 = true; 
-      if (_use_X_for_vfp_save) 
-        saveVFPWithFSTMX(_vfp_d0_d15_pad); 
-      else 
-        saveVFPWithFSTMD(_vfp_d0_d15_pad); 
-    } 
-    _vfp_d0_d15_pad[regNum - UNW_ARM_D0] = value; 
+}
+
+inline void Registers_arm::setFloatRegister(int regNum, unw_fpreg_t value) {
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) {
+    if (!_saved_vfp_d0_d15) {
+      _saved_vfp_d0_d15 = true;
+      if (_use_X_for_vfp_save)
+        saveVFPWithFSTMX(_vfp_d0_d15_pad);
+      else
+        saveVFPWithFSTMD(_vfp_d0_d15_pad);
+    }
+    _vfp_d0_d15_pad[regNum - UNW_ARM_D0] = value;
     return;
   }
 
   if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) {
-    if (!_saved_vfp_d16_d31) { 
-      _saved_vfp_d16_d31 = true; 
-      saveVFPv3(_vfp_d16_d31); 
-    } 
-    _vfp_d16_d31[regNum - UNW_ARM_D16] = value; 
+    if (!_saved_vfp_d16_d31) {
+      _saved_vfp_d16_d31 = true;
+      saveVFPv3(_vfp_d16_d31);
+    }
+    _vfp_d16_d31[regNum - UNW_ARM_D16] = value;
     return;
   }
 
 #if defined(__ARM_WMMX)
   if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) {
-    if (!_saved_iwmmx) { 
-      _saved_iwmmx = true; 
-      saveiWMMX(_iwmmx); 
-    } 
-    _iwmmx[regNum - UNW_ARM_WR0] = value; 
+    if (!_saved_iwmmx) {
+      _saved_iwmmx = true;
+      saveiWMMX(_iwmmx);
+    }
+    _iwmmx[regNum - UNW_ARM_WR0] = value;
     return;
-  } 
+  }
 #endif
 
   _LIBUNWIND_ABORT("Unknown ARM float register");
-} 
- 
-inline bool Registers_arm::validVectorRegister(int) const { 
-  return false; 
-} 
- 
-inline v128 Registers_arm::getVectorRegister(int) const { 
-  _LIBUNWIND_ABORT("ARM vector support not implemented"); 
-} 
- 
-inline void Registers_arm::setVectorRegister(int, v128) { 
-  _LIBUNWIND_ABORT("ARM vector support not implemented"); 
-} 
+}
+
+inline bool Registers_arm::validVectorRegister(int) const {
+  return false;
+}
+
+inline v128 Registers_arm::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("ARM vector support not implemented");
+}
+
+inline void Registers_arm::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("ARM vector support not implemented");
+}
 #endif // _LIBUNWIND_TARGET_ARM
 
 
 #if defined(_LIBUNWIND_TARGET_OR1K)
-/// Registers_or1k holds the register state of a thread in an OpenRISC1000 
-/// process. 
-class _LIBUNWIND_HIDDEN Registers_or1k { 
-public: 
-  Registers_or1k(); 
-  Registers_or1k(const void *registers); 
- 
-  bool        validRegister(int num) const; 
-  uint32_t    getRegister(int num) const; 
-  void        setRegister(int num, uint32_t value); 
-  bool        validFloatRegister(int num) const; 
-  double      getFloatRegister(int num) const; 
-  void        setFloatRegister(int num, double value); 
-  bool        validVectorRegister(int num) const; 
-  v128        getVectorRegister(int num) const; 
-  void        setVectorRegister(int num, v128 value); 
+/// Registers_or1k holds the register state of a thread in an OpenRISC1000
+/// process.
+class _LIBUNWIND_HIDDEN Registers_or1k {
+public:
+  Registers_or1k();
+  Registers_or1k(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
-  void        jumpto(); 
+  void        jumpto();
   static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K; }
   static int  getArch() { return REGISTERS_OR1K; }
- 
-  uint64_t  getSP() const         { return _registers.__r[1]; } 
-  void      setSP(uint32_t value) { _registers.__r[1] = value; } 
+
+  uint64_t  getSP() const         { return _registers.__r[1]; }
+  void      setSP(uint32_t value) { _registers.__r[1] = value; }
   uint64_t  getIP() const         { return _registers.__pc; }
   void      setIP(uint32_t value) { _registers.__pc = value; }
- 
-private: 
-  struct or1k_thread_state_t { 
+
+private:
+  struct or1k_thread_state_t {
     unsigned int __r[32]; // r0-r31
     unsigned int __pc;    // Program counter
     unsigned int __epcr;  // Program counter at exception
-  }; 
- 
-  or1k_thread_state_t _registers; 
-}; 
- 
-inline Registers_or1k::Registers_or1k(const void *registers) { 
+  };
+
+  or1k_thread_state_t _registers;
+};
+
+inline Registers_or1k::Registers_or1k(const void *registers) {
   static_assert((check_fit<Registers_or1k, unw_context_t>::does_fit),
                 "or1k registers do not fit into unw_context_t");
-  memcpy(&_registers, static_cast<const uint8_t *>(registers), 
-         sizeof(_registers)); 
-} 
- 
-inline Registers_or1k::Registers_or1k() { 
-  memset(&_registers, 0, sizeof(_registers)); 
-} 
- 
-inline bool Registers_or1k::validRegister(int regNum) const { 
-  if (regNum == UNW_REG_IP) 
-    return true; 
-  if (regNum == UNW_REG_SP) 
-    return true; 
-  if (regNum < 0) 
-    return false; 
-  if (regNum <= UNW_OR1K_R31) 
-    return true; 
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+}
+
+inline Registers_or1k::Registers_or1k() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_or1k::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_OR1K_R31)
+    return true;
   if (regNum == UNW_OR1K_EPCR)
     return true;
-  return false; 
-} 
- 
-inline uint32_t Registers_or1k::getRegister(int regNum) const { 
-  if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) 
-    return _registers.__r[regNum - UNW_OR1K_R0]; 
- 
-  switch (regNum) { 
-  case UNW_REG_IP: 
+  return false;
+}
+
+inline uint32_t Registers_or1k::getRegister(int regNum) const {
+  if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31)
+    return _registers.__r[regNum - UNW_OR1K_R0];
+
+  switch (regNum) {
+  case UNW_REG_IP:
     return _registers.__pc;
-  case UNW_REG_SP: 
-    return _registers.__r[1]; 
+  case UNW_REG_SP:
+    return _registers.__r[1];
   case UNW_OR1K_EPCR:
     return _registers.__epcr;
-  } 
-  _LIBUNWIND_ABORT("unsupported or1k register"); 
-} 
- 
-inline void Registers_or1k::setRegister(int regNum, uint32_t value) { 
-  if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) { 
-    _registers.__r[regNum - UNW_OR1K_R0] = value; 
-    return; 
-  } 
- 
-  switch (regNum) { 
-  case UNW_REG_IP: 
+  }
+  _LIBUNWIND_ABORT("unsupported or1k register");
+}
+
+inline void Registers_or1k::setRegister(int regNum, uint32_t value) {
+  if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) {
+    _registers.__r[regNum - UNW_OR1K_R0] = value;
+    return;
+  }
+
+  switch (regNum) {
+  case UNW_REG_IP:
     _registers.__pc = value;
-    return; 
-  case UNW_REG_SP: 
-    _registers.__r[1] = value; 
-    return; 
+    return;
+  case UNW_REG_SP:
+    _registers.__r[1] = value;
+    return;
   case UNW_OR1K_EPCR:
     _registers.__epcr = value;
     return;
-  } 
-  _LIBUNWIND_ABORT("unsupported or1k register"); 
-} 
- 
-inline bool Registers_or1k::validFloatRegister(int /* regNum */) const { 
-  return false; 
-} 
- 
-inline double Registers_or1k::getFloatRegister(int /* regNum */) const { 
-  _LIBUNWIND_ABORT("or1k float support not implemented"); 
-} 
- 
-inline void Registers_or1k::setFloatRegister(int /* regNum */, 
-                                             double /* value */) { 
-  _LIBUNWIND_ABORT("or1k float support not implemented"); 
-} 
- 
-inline bool Registers_or1k::validVectorRegister(int /* regNum */) const { 
-  return false; 
-} 
- 
-inline v128 Registers_or1k::getVectorRegister(int /* regNum */) const { 
-  _LIBUNWIND_ABORT("or1k vector support not implemented"); 
-} 
- 
-inline void Registers_or1k::setVectorRegister(int /* regNum */, v128 /* value */) { 
-  _LIBUNWIND_ABORT("or1k vector support not implemented"); 
-} 
- 
-inline const char *Registers_or1k::getRegisterName(int regNum) { 
-  switch (regNum) { 
-  case UNW_OR1K_R0: 
-    return "r0"; 
-  case UNW_OR1K_R1: 
-    return "r1"; 
-  case UNW_OR1K_R2: 
-    return "r2"; 
-  case UNW_OR1K_R3: 
-    return "r3"; 
-  case UNW_OR1K_R4: 
-    return "r4"; 
-  case UNW_OR1K_R5: 
-    return "r5"; 
-  case UNW_OR1K_R6: 
-    return "r6"; 
-  case UNW_OR1K_R7: 
-    return "r7"; 
-  case UNW_OR1K_R8: 
-    return "r8"; 
-  case UNW_OR1K_R9: 
-    return "r9"; 
-  case UNW_OR1K_R10: 
-    return "r10"; 
-  case UNW_OR1K_R11: 
-    return "r11"; 
-  case UNW_OR1K_R12: 
-    return "r12"; 
-  case UNW_OR1K_R13: 
-    return "r13"; 
-  case UNW_OR1K_R14: 
-    return "r14"; 
-  case UNW_OR1K_R15: 
-    return "r15"; 
-  case UNW_OR1K_R16: 
-    return "r16"; 
-  case UNW_OR1K_R17: 
-    return "r17"; 
-  case UNW_OR1K_R18: 
-    return "r18"; 
-  case UNW_OR1K_R19: 
-    return "r19"; 
-  case UNW_OR1K_R20: 
-    return "r20"; 
-  case UNW_OR1K_R21: 
-    return "r21"; 
-  case UNW_OR1K_R22: 
-    return "r22"; 
-  case UNW_OR1K_R23: 
-    return "r23"; 
-  case UNW_OR1K_R24: 
-    return "r24"; 
-  case UNW_OR1K_R25: 
-    return "r25"; 
-  case UNW_OR1K_R26: 
-    return "r26"; 
-  case UNW_OR1K_R27: 
-    return "r27"; 
-  case UNW_OR1K_R28: 
-    return "r28"; 
-  case UNW_OR1K_R29: 
-    return "r29"; 
-  case UNW_OR1K_R30: 
-    return "r30"; 
-  case UNW_OR1K_R31: 
-    return "r31"; 
+  }
+  _LIBUNWIND_ABORT("unsupported or1k register");
+}
+
+inline bool Registers_or1k::validFloatRegister(int /* regNum */) const {
+  return false;
+}
+
+inline double Registers_or1k::getFloatRegister(int /* regNum */) const {
+  _LIBUNWIND_ABORT("or1k float support not implemented");
+}
+
+inline void Registers_or1k::setFloatRegister(int /* regNum */,
+                                             double /* value */) {
+  _LIBUNWIND_ABORT("or1k float support not implemented");
+}
+
+inline bool Registers_or1k::validVectorRegister(int /* regNum */) const {
+  return false;
+}
+
+inline v128 Registers_or1k::getVectorRegister(int /* regNum */) const {
+  _LIBUNWIND_ABORT("or1k vector support not implemented");
+}
+
+inline void Registers_or1k::setVectorRegister(int /* regNum */, v128 /* value */) {
+  _LIBUNWIND_ABORT("or1k vector support not implemented");
+}
+
+inline const char *Registers_or1k::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_OR1K_R0:
+    return "r0";
+  case UNW_OR1K_R1:
+    return "r1";
+  case UNW_OR1K_R2:
+    return "r2";
+  case UNW_OR1K_R3:
+    return "r3";
+  case UNW_OR1K_R4:
+    return "r4";
+  case UNW_OR1K_R5:
+    return "r5";
+  case UNW_OR1K_R6:
+    return "r6";
+  case UNW_OR1K_R7:
+    return "r7";
+  case UNW_OR1K_R8:
+    return "r8";
+  case UNW_OR1K_R9:
+    return "r9";
+  case UNW_OR1K_R10:
+    return "r10";
+  case UNW_OR1K_R11:
+    return "r11";
+  case UNW_OR1K_R12:
+    return "r12";
+  case UNW_OR1K_R13:
+    return "r13";
+  case UNW_OR1K_R14:
+    return "r14";
+  case UNW_OR1K_R15:
+    return "r15";
+  case UNW_OR1K_R16:
+    return "r16";
+  case UNW_OR1K_R17:
+    return "r17";
+  case UNW_OR1K_R18:
+    return "r18";
+  case UNW_OR1K_R19:
+    return "r19";
+  case UNW_OR1K_R20:
+    return "r20";
+  case UNW_OR1K_R21:
+    return "r21";
+  case UNW_OR1K_R22:
+    return "r22";
+  case UNW_OR1K_R23:
+    return "r23";
+  case UNW_OR1K_R24:
+    return "r24";
+  case UNW_OR1K_R25:
+    return "r25";
+  case UNW_OR1K_R26:
+    return "r26";
+  case UNW_OR1K_R27:
+    return "r27";
+  case UNW_OR1K_R28:
+    return "r28";
+  case UNW_OR1K_R29:
+    return "r29";
+  case UNW_OR1K_R30:
+    return "r30";
+  case UNW_OR1K_R31:
+    return "r31";
   case UNW_OR1K_EPCR:
     return "EPCR";
-  default: 
-    return "unknown register"; 
-  } 
- 
-} 
+  default:
+    return "unknown register";
+  }
+
+}
 #endif // _LIBUNWIND_TARGET_OR1K
 
 #if defined(_LIBUNWIND_TARGET_MIPS_O32)
@@ -4712,6 +4712,6 @@ inline const char *Registers_ve::getRegisterName(int regNum) {
 }
 #endif // _LIBUNWIND_TARGET_VE
 
-} // namespace libunwind 
- 
-#endif // __REGISTERS_HPP__ 
+} // namespace libunwind
+
+#endif // __REGISTERS_HPP__
diff --git a/contrib/libs/libunwind/src/Unwind-EHABI.cpp b/contrib/libs/libunwind/src/Unwind-EHABI.cpp
index 46b26f588f..21c8b2777b 100644
--- a/contrib/libs/libunwind/src/Unwind-EHABI.cpp
+++ b/contrib/libs/libunwind/src/Unwind-EHABI.cpp
@@ -1,432 +1,432 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Implements ARM zero-cost C++ exceptions 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "Unwind-EHABI.h" 
- 
+//
+//
+//  Implements ARM zero-cost C++ exceptions
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unwind-EHABI.h"
+
 #if defined(_LIBUNWIND_ARM_EHABI)
- 
+
 #include <inttypes.h>
-#include <stdbool.h> 
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
-#include "config.h" 
-#include "libunwind.h" 
-#include "libunwind_ext.h" 
-#include "unwind.h" 
- 
-namespace { 
- 
-// Strange order: take words in order, but inside word, take from most to least 
-// signinficant byte. 
-uint8_t getByte(const uint32_t* data, size_t offset) { 
-  const uint8_t* byteData = reinterpret_cast<const uint8_t*>(data); 
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "libunwind.h"
+#include "libunwind_ext.h"
+#include "unwind.h"
+
+namespace {
+
+// Strange order: take words in order, but inside word, take from most to least
+// signinficant byte.
+uint8_t getByte(const uint32_t* data, size_t offset) {
+  const uint8_t* byteData = reinterpret_cast<const uint8_t*>(data);
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-  return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))]; 
+  return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))];
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
   return byteData[offset];
 #else
 #error "Unable to determine endianess"
 #endif
-} 
- 
-const char* getNextWord(const char* data, uint32_t* out) { 
-  *out = *reinterpret_cast<const uint32_t*>(data); 
-  return data + 4; 
-} 
- 
-const char* getNextNibble(const char* data, uint32_t* out) { 
-  *out = *reinterpret_cast<const uint16_t*>(data); 
-  return data + 2; 
-} 
- 
-struct Descriptor { 
-  // See # 9.2 
-  typedef enum { 
-    SU16 = 0, // Short descriptor, 16-bit entries 
-    LU16 = 1, // Long descriptor,  16-bit entries 
-    LU32 = 3, // Long descriptor,  32-bit entries 
-    RESERVED0 =  4, RESERVED1 =  5, RESERVED2  = 6,  RESERVED3  =  7, 
-    RESERVED4 =  8, RESERVED5 =  9, RESERVED6  = 10, RESERVED7  = 11, 
-    RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15 
-  } Format; 
- 
-  // See # 9.2 
-  typedef enum { 
-    CLEANUP = 0x0, 
-    FUNC    = 0x1, 
-    CATCH   = 0x2, 
-    INVALID = 0x4 
-  } Kind; 
-}; 
- 
-_Unwind_Reason_Code ProcessDescriptors( 
-    _Unwind_State state, 
-    _Unwind_Control_Block* ucbp, 
-    struct _Unwind_Context* context, 
-    Descriptor::Format format, 
-    const char* descriptorStart, 
-    uint32_t flags) { 
- 
-  // EHT is inlined in the index using compact form. No descriptors. #5 
-  if (flags & 0x1) 
-    return _URC_CONTINUE_UNWIND; 
- 
-  // TODO: We should check the state here, and determine whether we need to 
-  // perform phase1 or phase2 unwinding. 
-  (void)state; 
- 
-  const char* descriptor = descriptorStart; 
-  uint32_t descriptorWord; 
-  getNextWord(descriptor, &descriptorWord); 
-  while (descriptorWord) { 
-    // Read descriptor based on # 9.2. 
-    uint32_t length; 
-    uint32_t offset; 
-    switch (format) { 
-      case Descriptor::LU32: 
-        descriptor = getNextWord(descriptor, &length); 
-        descriptor = getNextWord(descriptor, &offset); 
+}
+
+const char* getNextWord(const char* data, uint32_t* out) {
+  *out = *reinterpret_cast<const uint32_t*>(data);
+  return data + 4;
+}
+
+const char* getNextNibble(const char* data, uint32_t* out) {
+  *out = *reinterpret_cast<const uint16_t*>(data);
+  return data + 2;
+}
+
+struct Descriptor {
+  // See # 9.2
+  typedef enum {
+    SU16 = 0, // Short descriptor, 16-bit entries
+    LU16 = 1, // Long descriptor,  16-bit entries
+    LU32 = 3, // Long descriptor,  32-bit entries
+    RESERVED0 =  4, RESERVED1 =  5, RESERVED2  = 6,  RESERVED3  =  7,
+    RESERVED4 =  8, RESERVED5 =  9, RESERVED6  = 10, RESERVED7  = 11,
+    RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15
+  } Format;
+
+  // See # 9.2
+  typedef enum {
+    CLEANUP = 0x0,
+    FUNC    = 0x1,
+    CATCH   = 0x2,
+    INVALID = 0x4
+  } Kind;
+};
+
+_Unwind_Reason_Code ProcessDescriptors(
+    _Unwind_State state,
+    _Unwind_Control_Block* ucbp,
+    struct _Unwind_Context* context,
+    Descriptor::Format format,
+    const char* descriptorStart,
+    uint32_t flags) {
+
+  // EHT is inlined in the index using compact form. No descriptors. #5
+  if (flags & 0x1)
+    return _URC_CONTINUE_UNWIND;
+
+  // TODO: We should check the state here, and determine whether we need to
+  // perform phase1 or phase2 unwinding.
+  (void)state;
+
+  const char* descriptor = descriptorStart;
+  uint32_t descriptorWord;
+  getNextWord(descriptor, &descriptorWord);
+  while (descriptorWord) {
+    // Read descriptor based on # 9.2.
+    uint32_t length;
+    uint32_t offset;
+    switch (format) {
+      case Descriptor::LU32:
+        descriptor = getNextWord(descriptor, &length);
+        descriptor = getNextWord(descriptor, &offset);
         break;
-      case Descriptor::LU16: 
-        descriptor = getNextNibble(descriptor, &length); 
-        descriptor = getNextNibble(descriptor, &offset); 
+      case Descriptor::LU16:
+        descriptor = getNextNibble(descriptor, &length);
+        descriptor = getNextNibble(descriptor, &offset);
+        break;
+      default:
+        assert(false);
+        return _URC_FAILURE;
+    }
+
+    // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value.
+    Descriptor::Kind kind =
+        static_cast<Descriptor::Kind>((length & 0x1) | ((offset & 0x1) << 1));
+
+    // Clear off flag from last bit.
+    length &= ~1u;
+    offset &= ~1u;
+    uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset;
+    uintptr_t scopeEnd = scopeStart + length;
+    uintptr_t pc = _Unwind_GetIP(context);
+    bool isInScope = (scopeStart <= pc) && (pc < scopeEnd);
+
+    switch (kind) {
+      case Descriptor::CLEANUP: {
+        // TODO(ajwong): Handle cleanup descriptors.
+        break;
+      }
+      case Descriptor::FUNC: {
+        // TODO(ajwong): Handle function descriptors.
+        break;
+      }
+      case Descriptor::CATCH: {
+        // Catch descriptors require gobbling one more word.
+        uint32_t landing_pad;
+        descriptor = getNextWord(descriptor, &landing_pad);
+
+        if (isInScope) {
+          // TODO(ajwong): This is only phase1 compatible logic. Implement
+          // phase2.
+          landing_pad = signExtendPrel31(landing_pad & ~0x80000000);
+          if (landing_pad == 0xffffffff) {
+            return _URC_HANDLER_FOUND;
+          } else if (landing_pad == 0xfffffffe) {
+            return _URC_FAILURE;
+          } else {
+            /*
+            bool is_reference_type = landing_pad & 0x80000000;
+            void* matched_object;
+            if (__cxxabiv1::__cxa_type_match(
+                    ucbp, reinterpret_cast<const std::type_info *>(landing_pad),
+                    is_reference_type,
+                    &matched_object) != __cxxabiv1::ctm_failed)
+                return _URC_HANDLER_FOUND;
+                */
+            _LIBUNWIND_ABORT("Type matching not implemented");
+          }
+        }
         break;
-      default: 
-        assert(false); 
-        return _URC_FAILURE; 
-    } 
- 
-    // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value. 
-    Descriptor::Kind kind = 
-        static_cast<Descriptor::Kind>((length & 0x1) | ((offset & 0x1) << 1)); 
- 
-    // Clear off flag from last bit. 
-    length &= ~1u; 
-    offset &= ~1u; 
-    uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset; 
-    uintptr_t scopeEnd = scopeStart + length; 
-    uintptr_t pc = _Unwind_GetIP(context); 
-    bool isInScope = (scopeStart <= pc) && (pc < scopeEnd); 
- 
-    switch (kind) { 
-      case Descriptor::CLEANUP: { 
-        // TODO(ajwong): Handle cleanup descriptors. 
-        break; 
-      } 
-      case Descriptor::FUNC: { 
-        // TODO(ajwong): Handle function descriptors. 
-        break; 
-      } 
-      case Descriptor::CATCH: { 
-        // Catch descriptors require gobbling one more word. 
-        uint32_t landing_pad; 
-        descriptor = getNextWord(descriptor, &landing_pad); 
- 
-        if (isInScope) { 
-          // TODO(ajwong): This is only phase1 compatible logic. Implement 
-          // phase2. 
-          landing_pad = signExtendPrel31(landing_pad & ~0x80000000); 
-          if (landing_pad == 0xffffffff) { 
-            return _URC_HANDLER_FOUND; 
-          } else if (landing_pad == 0xfffffffe) { 
-            return _URC_FAILURE; 
-          } else { 
-            /* 
-            bool is_reference_type = landing_pad & 0x80000000; 
-            void* matched_object; 
-            if (__cxxabiv1::__cxa_type_match( 
-                    ucbp, reinterpret_cast<const std::type_info *>(landing_pad), 
-                    is_reference_type, 
-                    &matched_object) != __cxxabiv1::ctm_failed) 
-                return _URC_HANDLER_FOUND; 
-                */ 
-            _LIBUNWIND_ABORT("Type matching not implemented"); 
-          } 
-        } 
-        break; 
-      } 
-      default: 
-        _LIBUNWIND_ABORT("Invalid descriptor kind found."); 
-    } 
- 
-    getNextWord(descriptor, &descriptorWord); 
-  } 
- 
-  return _URC_CONTINUE_UNWIND; 
-} 
- 
-static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, 
-                                          _Unwind_Control_Block* ucbp, 
-                                          struct _Unwind_Context* context) { 
-  // Read the compact model EHT entry's header # 6.3 
-  const uint32_t* unwindingData = ucbp->pr_cache.ehtp; 
-  assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry"); 
-  Descriptor::Format format = 
-      static_cast<Descriptor::Format>((*unwindingData & 0x0f000000) >> 24); 
- 
-  const char *lsda = 
-      reinterpret_cast<const char *>(_Unwind_GetLanguageSpecificData(context)); 
- 
-  // Handle descriptors before unwinding so they are processed in the context 
-  // of the correct stack frame. 
-  _Unwind_Reason_Code result = 
-      ProcessDescriptors(state, ucbp, context, format, lsda, 
-                         ucbp->pr_cache.additional); 
- 
-  if (result != _URC_CONTINUE_UNWIND) 
-    return result; 
- 
+      }
+      default:
+        _LIBUNWIND_ABORT("Invalid descriptor kind found.");
+    }
+
+    getNextWord(descriptor, &descriptorWord);
+  }
+
+  return _URC_CONTINUE_UNWIND;
+}
+
+static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state,
+                                          _Unwind_Control_Block* ucbp,
+                                          struct _Unwind_Context* context) {
+  // Read the compact model EHT entry's header # 6.3
+  const uint32_t* unwindingData = ucbp->pr_cache.ehtp;
+  assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry");
+  Descriptor::Format format =
+      static_cast<Descriptor::Format>((*unwindingData & 0x0f000000) >> 24);
+
+  const char *lsda =
+      reinterpret_cast<const char *>(_Unwind_GetLanguageSpecificData(context));
+
+  // Handle descriptors before unwinding so they are processed in the context
+  // of the correct stack frame.
+  _Unwind_Reason_Code result =
+      ProcessDescriptors(state, ucbp, context, format, lsda,
+                         ucbp->pr_cache.additional);
+
+  if (result != _URC_CONTINUE_UNWIND)
+    return result;
+
   switch (__unw_step(reinterpret_cast<unw_cursor_t *>(context))) {
   case UNW_STEP_SUCCESS:
     return _URC_CONTINUE_UNWIND;
   case UNW_STEP_END:
     return _URC_END_OF_STACK;
   default:
-    return _URC_FAILURE; 
+    return _URC_FAILURE;
+  }
+}
+
+// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE /
+// _UVRSD_UINT32.
+uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) {
+  return ((1U << (count_minus_one + 1)) - 1) << start;
+}
+
+// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP /
+// _UVRSD_DOUBLE.
+uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) {
+  return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1);
+}
+
+} // end anonymous namespace
+
+/**
+ * Decodes an EHT entry.
+ *
+ * @param data Pointer to EHT.
+ * @param[out] off Offset from return value (in bytes) to begin interpretation.
+ * @param[out] len Number of bytes in unwind code.
+ * @return Pointer to beginning of unwind code.
+ */
+extern "C" const uint32_t*
+decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) {
+  if ((*data & 0x80000000) == 0) {
+    // 6.2: Generic Model
+    //
+    // EHT entry is a prel31 pointing to the PR, followed by data understood
+    // only by the personality routine. Fortunately, all existing assembler
+    // implementations, including GNU assembler, LLVM integrated assembler,
+    // and ARM assembler, assume that the unwind opcodes come after the
+    // personality rountine address.
+    *off = 1; // First byte is size data.
+    *len = (((data[1] >> 24) & 0xff) + 1) * 4;
+    data++; // Skip the first word, which is the prel31 offset.
+  } else {
+    // 6.3: ARM Compact Model
+    //
+    // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded
+    // by format:
+    Descriptor::Format format =
+        static_cast<Descriptor::Format>((*data & 0x0f000000) >> 24);
+    switch (format) {
+      case Descriptor::SU16:
+        *len = 4;
+        *off = 1;
+        break;
+      case Descriptor::LU16:
+      case Descriptor::LU32:
+        *len = 4 + 4 * ((*data & 0x00ff0000) >> 16);
+        *off = 2;
+        break;
+      default:
+        return nullptr;
+    }
   }
-} 
- 
-// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE / 
-// _UVRSD_UINT32. 
-uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) { 
-  return ((1U << (count_minus_one + 1)) - 1) << start; 
-} 
- 
-// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP / 
-// _UVRSD_DOUBLE. 
-uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) { 
-  return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1); 
-} 
- 
-} // end anonymous namespace 
- 
-/** 
- * Decodes an EHT entry. 
- * 
- * @param data Pointer to EHT. 
- * @param[out] off Offset from return value (in bytes) to begin interpretation. 
- * @param[out] len Number of bytes in unwind code. 
- * @return Pointer to beginning of unwind code. 
- */ 
-extern "C" const uint32_t* 
-decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) { 
-  if ((*data & 0x80000000) == 0) { 
-    // 6.2: Generic Model 
-    // 
-    // EHT entry is a prel31 pointing to the PR, followed by data understood 
-    // only by the personality routine. Fortunately, all existing assembler 
-    // implementations, including GNU assembler, LLVM integrated assembler, 
-    // and ARM assembler, assume that the unwind opcodes come after the 
-    // personality rountine address. 
-    *off = 1; // First byte is size data. 
-    *len = (((data[1] >> 24) & 0xff) + 1) * 4; 
-    data++; // Skip the first word, which is the prel31 offset. 
-  } else { 
-    // 6.3: ARM Compact Model 
-    // 
-    // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded 
-    // by format: 
-    Descriptor::Format format = 
-        static_cast<Descriptor::Format>((*data & 0x0f000000) >> 24); 
-    switch (format) { 
-      case Descriptor::SU16: 
-        *len = 4; 
-        *off = 1; 
-        break; 
-      case Descriptor::LU16: 
-      case Descriptor::LU32: 
-        *len = 4 + 4 * ((*data & 0x00ff0000) >> 16); 
-        *off = 2; 
-        break; 
-      default: 
-        return nullptr; 
-    } 
-  } 
-  return data; 
-} 
- 
+  return data;
+}
+
 _LIBUNWIND_EXPORT _Unwind_Reason_Code
 _Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data,
                       size_t offset, size_t len) {
-  bool wrotePC = false; 
-  bool finish = false; 
+  bool wrotePC = false;
+  bool finish = false;
   bool hasReturnAddrAuthCode = false;
-  while (offset < len && !finish) { 
-    uint8_t byte = getByte(data, offset++); 
-    if ((byte & 0x80) == 0) { 
-      uint32_t sp; 
-      _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); 
-      if (byte & 0x40) 
-        sp -= (((uint32_t)byte & 0x3f) << 2) + 4; 
-      else 
-        sp += ((uint32_t)byte << 2) + 4; 
-      _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); 
-    } else { 
-      switch (byte & 0xf0) { 
-        case 0x80: { 
-          if (offset >= len) 
-            return _URC_FAILURE; 
-          uint32_t registers = 
-              (((uint32_t)byte & 0x0f) << 12) | 
-              (((uint32_t)getByte(data, offset++)) << 4); 
-          if (!registers) 
-            return _URC_FAILURE; 
-          if (registers & (1 << 15)) 
-            wrotePC = true; 
-          _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); 
-          break; 
-        } 
-        case 0x90: { 
-          uint8_t reg = byte & 0x0f; 
-          if (reg == 13 || reg == 15) 
-            return _URC_FAILURE; 
-          uint32_t sp; 
-          _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg, 
-                          _UVRSD_UINT32, &sp); 
-          _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, 
-                          &sp); 
-          break; 
-        } 
-        case 0xa0: { 
-          uint32_t registers = RegisterMask(4, byte & 0x07); 
-          if (byte & 0x08) 
-            registers |= 1 << 14; 
-          _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); 
-          break; 
-        } 
-        case 0xb0: { 
-          switch (byte) { 
-            case 0xb0: 
-              finish = true; 
-              break; 
-            case 0xb1: { 
-              if (offset >= len) 
-                return _URC_FAILURE; 
-              uint8_t registers = getByte(data, offset++); 
-              if (registers & 0xf0 || !registers) 
-                return _URC_FAILURE; 
-              _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32); 
-              break; 
-            } 
-            case 0xb2: { 
-              uint32_t addend = 0; 
-              uint32_t shift = 0; 
-              // This decodes a uleb128 value. 
-              while (true) { 
-                if (offset >= len) 
-                  return _URC_FAILURE; 
-                uint32_t v = getByte(data, offset++); 
-                addend |= (v & 0x7f) << shift; 
-                if ((v & 0x80) == 0) 
-                  break; 
-                shift += 7; 
-              } 
-              uint32_t sp; 
-              _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, 
-                              &sp); 
-              sp += 0x204 + (addend << 2); 
-              _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, 
-                              &sp); 
-              break; 
-            } 
-            case 0xb3: { 
-              uint8_t v = getByte(data, offset++); 
-              _Unwind_VRS_Pop(context, _UVRSC_VFP, 
-                              RegisterRange(static_cast<uint8_t>(v >> 4), 
-                                            v & 0x0f), _UVRSD_VFPX); 
-              break; 
-            } 
-            case 0xb4: 
+  while (offset < len && !finish) {
+    uint8_t byte = getByte(data, offset++);
+    if ((byte & 0x80) == 0) {
+      uint32_t sp;
+      _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
+      if (byte & 0x40)
+        sp -= (((uint32_t)byte & 0x3f) << 2) + 4;
+      else
+        sp += ((uint32_t)byte << 2) + 4;
+      _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
+    } else {
+      switch (byte & 0xf0) {
+        case 0x80: {
+          if (offset >= len)
+            return _URC_FAILURE;
+          uint32_t registers =
+              (((uint32_t)byte & 0x0f) << 12) |
+              (((uint32_t)getByte(data, offset++)) << 4);
+          if (!registers)
+            return _URC_FAILURE;
+          if (registers & (1 << 15))
+            wrotePC = true;
+          _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
+          break;
+        }
+        case 0x90: {
+          uint8_t reg = byte & 0x0f;
+          if (reg == 13 || reg == 15)
+            return _URC_FAILURE;
+          uint32_t sp;
+          _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg,
+                          _UVRSD_UINT32, &sp);
+          _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                          &sp);
+          break;
+        }
+        case 0xa0: {
+          uint32_t registers = RegisterMask(4, byte & 0x07);
+          if (byte & 0x08)
+            registers |= 1 << 14;
+          _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
+          break;
+        }
+        case 0xb0: {
+          switch (byte) {
+            case 0xb0:
+              finish = true;
+              break;
+            case 0xb1: {
+              if (offset >= len)
+                return _URC_FAILURE;
+              uint8_t registers = getByte(data, offset++);
+              if (registers & 0xf0 || !registers)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
+              break;
+            }
+            case 0xb2: {
+              uint32_t addend = 0;
+              uint32_t shift = 0;
+              // This decodes a uleb128 value.
+              while (true) {
+                if (offset >= len)
+                  return _URC_FAILURE;
+                uint32_t v = getByte(data, offset++);
+                addend |= (v & 0x7f) << shift;
+                if ((v & 0x80) == 0)
+                  break;
+                shift += 7;
+              }
+              uint32_t sp;
+              _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                              &sp);
+              sp += 0x204 + (addend << 2);
+              _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                              &sp);
+              break;
+            }
+            case 0xb3: {
+              uint8_t v = getByte(data, offset++);
+              _Unwind_VRS_Pop(context, _UVRSC_VFP,
+                              RegisterRange(static_cast<uint8_t>(v >> 4),
+                                            v & 0x0f), _UVRSD_VFPX);
+              break;
+            }
+            case 0xb4:
               hasReturnAddrAuthCode = true;
               _Unwind_VRS_Pop(context, _UVRSC_PSEUDO,
                               0 /* Return Address Auth Code */, _UVRSD_UINT32);
               break;
-            case 0xb5: 
-            case 0xb6: 
-            case 0xb7: 
-              return _URC_FAILURE; 
-            default: 
-              _Unwind_VRS_Pop(context, _UVRSC_VFP, 
-                              RegisterRange(8, byte & 0x07), _UVRSD_VFPX); 
-              break; 
-          } 
-          break; 
-        } 
-        case 0xc0: { 
-          switch (byte) { 
+            case 0xb5:
+            case 0xb6:
+            case 0xb7:
+              return _URC_FAILURE;
+            default:
+              _Unwind_VRS_Pop(context, _UVRSC_VFP,
+                              RegisterRange(8, byte & 0x07), _UVRSD_VFPX);
+              break;
+          }
+          break;
+        }
+        case 0xc0: {
+          switch (byte) {
 #if defined(__ARM_WMMX)
-            case 0xc0: 
-            case 0xc1: 
-            case 0xc2: 
-            case 0xc3: 
-            case 0xc4: 
-            case 0xc5: 
-              _Unwind_VRS_Pop(context, _UVRSC_WMMXD, 
-                              RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE); 
-              break; 
-            case 0xc6: { 
-              uint8_t v = getByte(data, offset++); 
-              uint8_t start = static_cast<uint8_t>(v >> 4); 
-              uint8_t count_minus_one = v & 0xf; 
-              if (start + count_minus_one >= 16) 
-                return _URC_FAILURE; 
-              _Unwind_VRS_Pop(context, _UVRSC_WMMXD, 
-                              RegisterRange(start, count_minus_one), 
-                              _UVRSD_DOUBLE); 
-              break; 
-            } 
-            case 0xc7: { 
-              uint8_t v = getByte(data, offset++); 
-              if (!v || v & 0xf0) 
-                return _URC_FAILURE; 
-              _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE); 
-              break; 
-            } 
+            case 0xc0:
+            case 0xc1:
+            case 0xc2:
+            case 0xc3:
+            case 0xc4:
+            case 0xc5:
+              _Unwind_VRS_Pop(context, _UVRSC_WMMXD,
+                              RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE);
+              break;
+            case 0xc6: {
+              uint8_t v = getByte(data, offset++);
+              uint8_t start = static_cast<uint8_t>(v >> 4);
+              uint8_t count_minus_one = v & 0xf;
+              if (start + count_minus_one >= 16)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_WMMXD,
+                              RegisterRange(start, count_minus_one),
+                              _UVRSD_DOUBLE);
+              break;
+            }
+            case 0xc7: {
+              uint8_t v = getByte(data, offset++);
+              if (!v || v & 0xf0)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE);
+              break;
+            }
 #endif
-            case 0xc8: 
-            case 0xc9: { 
-              uint8_t v = getByte(data, offset++); 
-              uint8_t start = 
-                  static_cast<uint8_t>(((byte == 0xc8) ? 16 : 0) + (v >> 4)); 
-              uint8_t count_minus_one = v & 0xf; 
-              if (start + count_minus_one >= 32) 
-                return _URC_FAILURE; 
-              _Unwind_VRS_Pop(context, _UVRSC_VFP, 
-                              RegisterRange(start, count_minus_one), 
-                              _UVRSD_DOUBLE); 
-              break; 
-            } 
-            default: 
-              return _URC_FAILURE; 
-          } 
-          break; 
-        } 
-        case 0xd0: { 
-          if (byte & 0x08) 
-            return _URC_FAILURE; 
-          _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7), 
-                          _UVRSD_DOUBLE); 
-          break; 
-        } 
-        default: 
-          return _URC_FAILURE; 
-      } 
-    } 
-  } 
-  if (!wrotePC) { 
-    uint32_t lr; 
-    _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr); 
+            case 0xc8:
+            case 0xc9: {
+              uint8_t v = getByte(data, offset++);
+              uint8_t start =
+                  static_cast<uint8_t>(((byte == 0xc8) ? 16 : 0) + (v >> 4));
+              uint8_t count_minus_one = v & 0xf;
+              if (start + count_minus_one >= 32)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_VFP,
+                              RegisterRange(start, count_minus_one),
+                              _UVRSD_DOUBLE);
+              break;
+            }
+            default:
+              return _URC_FAILURE;
+          }
+          break;
+        }
+        case 0xd0: {
+          if (byte & 0x08)
+            return _URC_FAILURE;
+          _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7),
+                          _UVRSD_DOUBLE);
+          break;
+        }
+        default:
+          return _URC_FAILURE;
+      }
+    }
+  }
+  if (!wrotePC) {
+    uint32_t lr;
+    _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr);
 #ifdef __ARM_FEATURE_PAUTH
     if (hasReturnAddrAuthCode) {
       uint32_t sp;
@@ -437,263 +437,263 @@ _Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data,
       __asm__ __volatile__("autg %0, %1, %2" : : "r"(pac), "r"(lr), "r"(sp) :);
     }
 #endif
-    _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr); 
-  } 
-  return _URC_CONTINUE_UNWIND; 
-} 
- 
+    _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr);
+  }
+  return _URC_CONTINUE_UNWIND;
+}
+
 extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
 __aeabi_unwind_cpp_pr0(_Unwind_State state, _Unwind_Control_Block *ucbp,
                        _Unwind_Context *context) {
-  return unwindOneFrame(state, ucbp, context); 
-} 
- 
+  return unwindOneFrame(state, ucbp, context);
+}
+
 extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
 __aeabi_unwind_cpp_pr1(_Unwind_State state, _Unwind_Control_Block *ucbp,
                        _Unwind_Context *context) {
-  return unwindOneFrame(state, ucbp, context); 
-} 
- 
+  return unwindOneFrame(state, ucbp, context);
+}
+
 extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
 __aeabi_unwind_cpp_pr2(_Unwind_State state, _Unwind_Control_Block *ucbp,
                        _Unwind_Context *context) {
-  return unwindOneFrame(state, ucbp, context); 
-} 
- 
-static _Unwind_Reason_Code 
+  return unwindOneFrame(state, ucbp, context);
+}
+
+static _Unwind_Reason_Code
 unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
-  // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during 
-  // phase 1 and then restoring it to the "primary VRS" for phase 2. The 
-  // effect is phase 2 doesn't see any of the VRS manipulations from phase 1. 
-  // In this implementation, the phases don't share the VRS backing store. 
-  // Instead, they are passed the original |uc| and they create a new VRS 
-  // from scratch thus achieving the same effect. 
+  // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during
+  // phase 1 and then restoring it to the "primary VRS" for phase 2. The
+  // effect is phase 2 doesn't see any of the VRS manipulations from phase 1.
+  // In this implementation, the phases don't share the VRS backing store.
+  // Instead, they are passed the original |uc| and they create a new VRS
+  // from scratch thus achieving the same effect.
   __unw_init_local(cursor, uc);
- 
-  // Walk each frame looking for a place to stop. 
-  for (bool handlerNotFound = true; handlerNotFound;) { 
- 
-    // See if frame has code to run (has personality routine). 
-    unw_proc_info_t frameInfo; 
+
+  // Walk each frame looking for a place to stop.
+  for (bool handlerNotFound = true; handlerNotFound;) {
+
+    // See if frame has code to run (has personality routine).
+    unw_proc_info_t frameInfo;
     if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): __unw_get_proc_info "
           "failed => _URC_FATAL_PHASE1_ERROR",
           static_cast<void *>(exception_object));
-      return _URC_FATAL_PHASE1_ERROR; 
-    } 
- 
+      return _URC_FATAL_PHASE1_ERROR;
+    }
+
 #ifndef NDEBUG
-    // When tracing, print state information. 
-    if (_LIBUNWIND_TRACING_UNWINDING) { 
-      char functionBuf[512]; 
-      const char *functionName = functionBuf; 
-      unw_word_t offset; 
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
       if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                                &offset) != UNW_ESUCCESS) ||
-          (frameInfo.start_ip + offset > frameInfo.end_ip)) 
-        functionName = ".anonymous."; 
-      unw_word_t pc; 
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      unw_word_t pc;
       __unw_get_reg(cursor, UNW_REG_IP, &pc);
-      _LIBUNWIND_TRACE_UNWINDING( 
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, "
           "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
           static_cast<void *>(exception_object), pc,
           frameInfo.start_ip, functionName,
           frameInfo.lsda, frameInfo.handler);
-    } 
+    }
 #endif
- 
-    // If there is a personality routine, ask it if it will want to stop at 
-    // this frame. 
-    if (frameInfo.handler != 0) { 
+
+    // If there is a personality routine, ask it if it will want to stop at
+    // this frame.
+    if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
           (_Unwind_Personality_Fn)(long)(frameInfo.handler);
-      _LIBUNWIND_TRACE_UNWINDING( 
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): calling personality function %p",
-          static_cast<void *>(exception_object), 
-          reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p))); 
+          static_cast<void *>(exception_object),
+          reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p)));
       struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
-      exception_object->pr_cache.fnstart = frameInfo.start_ip; 
-      exception_object->pr_cache.ehtp = 
-          (_Unwind_EHT_Header *)frameInfo.unwind_info; 
-      exception_object->pr_cache.additional = frameInfo.flags; 
-      _Unwind_Reason_Code personalityResult = 
-          (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context); 
-      _LIBUNWIND_TRACE_UNWINDING( 
-          "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p " 
+      exception_object->pr_cache.fnstart = frameInfo.start_ip;
+      exception_object->pr_cache.ehtp =
+          (_Unwind_EHT_Header *)frameInfo.unwind_info;
+      exception_object->pr_cache.additional = frameInfo.flags;
+      _Unwind_Reason_Code personalityResult =
+          (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p "
           "additional %x",
-          static_cast<void *>(exception_object), personalityResult, 
-          exception_object->pr_cache.fnstart, 
-          static_cast<void *>(exception_object->pr_cache.ehtp), 
-          exception_object->pr_cache.additional); 
-      switch (personalityResult) { 
-      case _URC_HANDLER_FOUND: 
-        // found a catch clause or locals that need destructing in this frame 
-        // stop search and remember stack pointer at the frame 
-        handlerNotFound = false; 
-        // p should have initialized barrier_cache. EHABI #7.3.5 
-        _LIBUNWIND_TRACE_UNWINDING( 
+          static_cast<void *>(exception_object), personalityResult,
+          exception_object->pr_cache.fnstart,
+          static_cast<void *>(exception_object->pr_cache.ehtp),
+          exception_object->pr_cache.additional);
+      switch (personalityResult) {
+      case _URC_HANDLER_FOUND:
+        // found a catch clause or locals that need destructing in this frame
+        // stop search and remember stack pointer at the frame
+        handlerNotFound = false;
+        // p should have initialized barrier_cache. EHABI #7.3.5
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND",
-            static_cast<void *>(exception_object)); 
-        return _URC_NO_REASON; 
- 
-      case _URC_CONTINUE_UNWIND: 
-        _LIBUNWIND_TRACE_UNWINDING( 
+            static_cast<void *>(exception_object));
+        return _URC_NO_REASON;
+
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND",
-            static_cast<void *>(exception_object)); 
-        // continue unwinding 
-        break; 
- 
-      // EHABI #7.3.3 
-      case _URC_FAILURE: 
-        return _URC_FAILURE; 
- 
-      default: 
-        // something went wrong 
-        _LIBUNWIND_TRACE_UNWINDING( 
+            static_cast<void *>(exception_object));
+        // continue unwinding
+        break;
+
+      // EHABI #7.3.3
+      case _URC_FAILURE:
+        return _URC_FAILURE;
+
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
-            static_cast<void *>(exception_object)); 
-        return _URC_FATAL_PHASE1_ERROR; 
-      } 
-    } 
-  } 
-  return _URC_NO_REASON; 
-} 
- 
+            static_cast<void *>(exception_object));
+        return _URC_FATAL_PHASE1_ERROR;
+      }
+    }
+  }
+  return _URC_NO_REASON;
+}
+
 static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
-                                         _Unwind_Exception *exception_object, 
-                                         bool resume) { 
-  // See comment at the start of unwind_phase1 regarding VRS integrity. 
+                                         _Unwind_Exception *exception_object,
+                                         bool resume) {
+  // See comment at the start of unwind_phase1 regarding VRS integrity.
   __unw_init_local(cursor, uc);
- 
+
   _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
-                             static_cast<void *>(exception_object)); 
-  int frame_count = 0; 
- 
-  // Walk each frame until we reach where search phase said to stop. 
-  while (true) { 
+                             static_cast<void *>(exception_object));
+  int frame_count = 0;
+
+  // Walk each frame until we reach where search phase said to stop.
+  while (true) {
     // Ask libunwind to get next frame (skip over first which is
-    // _Unwind_RaiseException or _Unwind_Resume). 
-    // 
-    // Resume only ever makes sense for 1 frame. 
-    _Unwind_State state = 
-        resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING; 
-    if (resume && frame_count == 1) { 
-      // On a resume, first unwind the _Unwind_Resume() frame. The next frame 
-      // is now the landing pad for the cleanup from a previous execution of 
-      // phase2. To continue unwindingly correctly, replace VRS[15] with the 
-      // IP of the frame that the previous run of phase2 installed the context 
-      // for. After this, continue unwinding as if normal. 
-      // 
-      // See #7.4.6 for details. 
+    // _Unwind_RaiseException or _Unwind_Resume).
+    //
+    // Resume only ever makes sense for 1 frame.
+    _Unwind_State state =
+        resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING;
+    if (resume && frame_count == 1) {
+      // On a resume, first unwind the _Unwind_Resume() frame. The next frame
+      // is now the landing pad for the cleanup from a previous execution of
+      // phase2. To continue unwindingly correctly, replace VRS[15] with the
+      // IP of the frame that the previous run of phase2 installed the context
+      // for. After this, continue unwinding as if normal.
+      //
+      // See #7.4.6 for details.
       __unw_set_reg(cursor, UNW_REG_IP,
                     exception_object->unwinder_cache.reserved2);
-      resume = false; 
-    } 
- 
-    // Get info about this frame. 
-    unw_word_t sp; 
-    unw_proc_info_t frameInfo; 
+      resume = false;
+    }
+
+    // Get info about this frame.
+    unw_word_t sp;
+    unw_proc_info_t frameInfo;
     __unw_get_reg(cursor, UNW_REG_SP, &sp);
     if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): __unw_get_proc_info "
           "failed => _URC_FATAL_PHASE2_ERROR",
           static_cast<void *>(exception_object));
-      return _URC_FATAL_PHASE2_ERROR; 
-    } 
- 
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
 #ifndef NDEBUG
-    // When tracing, print state information. 
-    if (_LIBUNWIND_TRACING_UNWINDING) { 
-      char functionBuf[512]; 
-      const char *functionName = functionBuf; 
-      unw_word_t offset; 
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
       if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                                &offset) != UNW_ESUCCESS) ||
-          (frameInfo.start_ip + offset > frameInfo.end_ip)) 
-        functionName = ".anonymous."; 
-      _LIBUNWIND_TRACE_UNWINDING( 
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", "
           "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "",
           static_cast<void *>(exception_object), frameInfo.start_ip,
           functionName, sp, frameInfo.lsda,
           frameInfo.handler);
-    } 
+    }
 #endif
- 
-    // If there is a personality routine, tell it we are unwinding. 
-    if (frameInfo.handler != 0) { 
+
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
           (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler);
       struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
-      // EHABI #7.2 
-      exception_object->pr_cache.fnstart = frameInfo.start_ip; 
-      exception_object->pr_cache.ehtp = 
-          (_Unwind_EHT_Header *)frameInfo.unwind_info; 
-      exception_object->pr_cache.additional = frameInfo.flags; 
-      _Unwind_Reason_Code personalityResult = 
-          (*p)(state, exception_object, context); 
-      switch (personalityResult) { 
-      case _URC_CONTINUE_UNWIND: 
-        // Continue unwinding 
-        _LIBUNWIND_TRACE_UNWINDING( 
+      // EHABI #7.2
+      exception_object->pr_cache.fnstart = frameInfo.start_ip;
+      exception_object->pr_cache.ehtp =
+          (_Unwind_EHT_Header *)frameInfo.unwind_info;
+      exception_object->pr_cache.additional = frameInfo.flags;
+      _Unwind_Reason_Code personalityResult =
+          (*p)(state, exception_object, context);
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        // Continue unwinding
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
-            static_cast<void *>(exception_object)); 
-        // EHABI #7.2 
-        if (sp == exception_object->barrier_cache.sp) { 
-          // Phase 1 said we would stop at this frame, but we did not... 
-          _LIBUNWIND_ABORT("during phase1 personality function said it would " 
-                           "stop here, but now in phase2 it did not stop here"); 
-        } 
-        break; 
-      case _URC_INSTALL_CONTEXT: 
-        _LIBUNWIND_TRACE_UNWINDING( 
+            static_cast<void *>(exception_object));
+        // EHABI #7.2
+        if (sp == exception_object->barrier_cache.sp) {
+          // Phase 1 said we would stop at this frame, but we did not...
+          _LIBUNWIND_ABORT("during phase1 personality function said it would "
+                           "stop here, but now in phase2 it did not stop here");
+        }
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT",
-            static_cast<void *>(exception_object)); 
-        // Personality routine says to transfer control to landing pad. 
-        // We may get control back if landing pad calls _Unwind_Resume(). 
-        if (_LIBUNWIND_TRACING_UNWINDING) { 
-          unw_word_t pc; 
+            static_cast<void *>(exception_object));
+        // Personality routine says to transfer control to landing pad.
+        // We may get control back if landing pad calls _Unwind_Resume().
+        if (_LIBUNWIND_TRACING_UNWINDING) {
+          unw_word_t pc;
           __unw_get_reg(cursor, UNW_REG_IP, &pc);
           __unw_get_reg(cursor, UNW_REG_SP, &sp);
-          _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " 
+          _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
                                      "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR,
-                                     static_cast<void *>(exception_object), 
+                                     static_cast<void *>(exception_object),
                                      pc, sp);
-        } 
- 
-        { 
-          // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume 
-          // is called back, to find this same frame. 
-          unw_word_t pc; 
+        }
+
+        {
+          // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume
+          // is called back, to find this same frame.
+          unw_word_t pc;
           __unw_get_reg(cursor, UNW_REG_IP, &pc);
-          exception_object->unwinder_cache.reserved2 = (uint32_t)pc; 
-        } 
+          exception_object->unwinder_cache.reserved2 = (uint32_t)pc;
+        }
         __unw_resume(cursor);
         // __unw_resume() only returns if there was an error.
-        return _URC_FATAL_PHASE2_ERROR; 
- 
-      // # EHABI #7.4.3 
-      case _URC_FAILURE: 
-        abort(); 
- 
-      default: 
-        // Personality routine returned an unknown result code. 
-        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", 
-                      personalityResult); 
-        return _URC_FATAL_PHASE2_ERROR; 
-      } 
-    } 
-    frame_count++; 
-  } 
- 
-  // Clean up phase did not resume at the frame that the search phase 
-  // said it would... 
-  return _URC_FATAL_PHASE2_ERROR; 
-} 
- 
+        return _URC_FATAL_PHASE2_ERROR;
+
+      // # EHABI #7.4.3
+      case _URC_FAILURE:
+        abort();
+
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
+                      personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+    frame_count++;
+  }
+
+  // Clean up phase did not resume at the frame that the search phase
+  // said it would...
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
 static _Unwind_Reason_Code
 unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
                      _Unwind_Exception *exception_object, _Unwind_Stop_Fn stop,
@@ -811,53 +811,53 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
   return _URC_FATAL_PHASE2_ERROR;
 }
 
-/// Called by __cxa_throw.  Only returns if there is a fatal error. 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_RaiseException(_Unwind_Exception *exception_object) { 
+/// Called by __cxa_throw.  Only returns if there is a fatal error.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_RaiseException(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
-                       static_cast<void *>(exception_object)); 
-  unw_context_t uc; 
+                       static_cast<void *>(exception_object));
+  unw_context_t uc;
   unw_cursor_t cursor;
   __unw_getcontext(&uc);
- 
-  // This field for is for compatibility with GCC to say this isn't a forced 
-  // unwind. EHABI #7.2 
-  exception_object->unwinder_cache.reserved1 = 0; 
- 
-  // phase 1: the search phase 
+
+  // This field for is for compatibility with GCC to say this isn't a forced
+  // unwind. EHABI #7.2
+  exception_object->unwinder_cache.reserved1 = 0;
+
+  // phase 1: the search phase
   _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
-  if (phase1 != _URC_NO_REASON) 
-    return phase1; 
- 
-  // phase 2: the clean up phase 
+  if (phase1 != _URC_NO_REASON)
+    return phase1;
+
+  // phase 2: the clean up phase
   return unwind_phase2(&uc, &cursor, exception_object, false);
-} 
- 
-_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) { 
-  // This is to be called when exception handling completes to give us a chance 
-  // to perform any housekeeping. EHABI #7.2. But we have nothing to do here. 
-  (void)exception_object; 
-} 
- 
-/// When _Unwind_RaiseException() is in phase2, it hands control 
-/// to the personality function at each frame.  The personality 
-/// may force a jump to a landing pad in that function, the landing 
-/// pad code may then call _Unwind_Resume() to continue with the 
-/// unwinding.  Note: the call to _Unwind_Resume() is from compiler 
-/// geneated user code.  All other _Unwind_* routines are called 
-/// by the C++ runtime __cxa_* routines. 
-/// 
-/// Note: re-throwing an exception (as opposed to continuing the unwind) 
-/// is implemented by having the code call __cxa_rethrow() which 
-/// in turn calls _Unwind_Resume_or_Rethrow(). 
-_LIBUNWIND_EXPORT void 
-_Unwind_Resume(_Unwind_Exception *exception_object) { 
+}
+
+_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) {
+  // This is to be called when exception handling completes to give us a chance
+  // to perform any housekeeping. EHABI #7.2. But we have nothing to do here.
+  (void)exception_object;
+}
+
+/// When _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function, the landing
+/// pad code may then call _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to _Unwind_Resume() is from compiler
+/// geneated user code.  All other _Unwind_* routines are called
+/// by the C++ runtime __cxa_* routines.
+///
+/// Note: re-throwing an exception (as opposed to continuing the unwind)
+/// is implemented by having the code call __cxa_rethrow() which
+/// in turn calls _Unwind_Resume_or_Rethrow().
+_LIBUNWIND_EXPORT void
+_Unwind_Resume(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)",
-                       static_cast<void *>(exception_object)); 
-  unw_context_t uc; 
+                       static_cast<void *>(exception_object));
+  unw_context_t uc;
   unw_cursor_t cursor;
   __unw_getcontext(&uc);
- 
+
   if (exception_object->unwinder_cache.reserved1)
     unwind_phase2_forced(
         &uc, &cursor, exception_object,
@@ -865,77 +865,77 @@ _Unwind_Resume(_Unwind_Exception *exception_object) {
         (void *)exception_object->unwinder_cache.reserved3);
   else
     unwind_phase2(&uc, &cursor, exception_object, true);
- 
-  // Clients assume _Unwind_Resume() does not return, so all we can do is abort. 
-  _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); 
-} 
- 
-/// Called by personality handler during phase 2 to get LSDA for current frame. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_proc_info_t frameInfo; 
-  uintptr_t result = 0; 
+
+  // Clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_Resume() can't return");
+}
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
   if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
-    result = (uintptr_t)frameInfo.lsda; 
-  _LIBUNWIND_TRACE_API( 
+    result = (uintptr_t)frameInfo.lsda;
+  _LIBUNWIND_TRACE_API(
       "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx",
-      static_cast<void *>(context), (long long)result); 
-  return result; 
-} 
- 
+      static_cast<void *>(context), (long long)result);
+  return result;
+}
+
 [[maybe_unused]] static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation,
-                                  void* valuep) { 
-  uint64_t value = 0; 
-  switch (representation) { 
-    case _UVRSD_UINT32: 
-    case _UVRSD_FLOAT: 
-      memcpy(&value, valuep, sizeof(uint32_t)); 
-      break; 
- 
-    case _UVRSD_VFPX: 
-    case _UVRSD_UINT64: 
-    case _UVRSD_DOUBLE: 
-      memcpy(&value, valuep, sizeof(uint64_t)); 
-      break; 
-  } 
-  return value; 
-} 
- 
+                                  void* valuep) {
+  uint64_t value = 0;
+  switch (representation) {
+    case _UVRSD_UINT32:
+    case _UVRSD_FLOAT:
+      memcpy(&value, valuep, sizeof(uint32_t));
+      break;
+
+    case _UVRSD_VFPX:
+    case _UVRSD_UINT64:
+    case _UVRSD_DOUBLE:
+      memcpy(&value, valuep, sizeof(uint64_t));
+      break;
+  }
+  return value;
+}
+
 _LIBUNWIND_EXPORT _Unwind_VRS_Result
-_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, 
-                uint32_t regno, _Unwind_VRS_DataRepresentation representation, 
-                void *valuep) { 
-  _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, " 
+_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep) {
+  _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, "
                        "rep=%d, value=0x%llX)",
-                       static_cast<void *>(context), regclass, regno, 
-                       representation, 
-                       ValueAsBitPattern(representation, valuep)); 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  switch (regclass) { 
-    case _UVRSC_CORE: 
-      if (representation != _UVRSD_UINT32 || regno > 15) 
-        return _UVRSR_FAILED; 
+                       static_cast<void *>(context), regclass, regno,
+                       representation,
+                       ValueAsBitPattern(representation, valuep));
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  switch (regclass) {
+    case _UVRSC_CORE:
+      if (representation != _UVRSD_UINT32 || regno > 15)
+        return _UVRSR_FAILED;
       return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
                            *(unw_word_t *)valuep) == UNW_ESUCCESS
-                 ? _UVRSR_OK 
-                 : _UVRSR_FAILED; 
-    case _UVRSC_VFP: 
-      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) 
-        return _UVRSR_FAILED; 
-      if (representation == _UVRSD_VFPX) { 
-        // Can only touch d0-15 with FSTMFDX. 
-        if (regno > 15) 
-          return _UVRSR_FAILED; 
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+    case _UVRSC_VFP:
+      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+        return _UVRSR_FAILED;
+      if (representation == _UVRSD_VFPX) {
+        // Can only touch d0-15 with FSTMFDX.
+        if (regno > 15)
+          return _UVRSR_FAILED;
         __unw_save_vfp_as_X(cursor);
-      } else { 
-        if (regno > 31) 
-          return _UVRSR_FAILED; 
-      } 
+      } else {
+        if (regno > 31)
+          return _UVRSR_FAILED;
+      }
       return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
                              *(unw_fpreg_t *)valuep) == UNW_ESUCCESS
-                 ? _UVRSR_OK 
-                 : _UVRSR_FAILED; 
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
 #if defined(__ARM_WMMX)
     case _UVRSC_WMMXC:
       if (representation != _UVRSD_UINT32 || regno > 3)
@@ -944,13 +944,13 @@ _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
                            *(unw_word_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
-    case _UVRSC_WMMXD: 
-      if (representation != _UVRSD_DOUBLE || regno > 31) 
-        return _UVRSR_FAILED; 
+    case _UVRSC_WMMXD:
+      if (representation != _UVRSD_DOUBLE || regno > 31)
+        return _UVRSR_FAILED;
       return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
                              *(unw_fpreg_t *)valuep) == UNW_ESUCCESS
-                 ? _UVRSR_OK 
-                 : _UVRSR_FAILED; 
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
 #else
     case _UVRSC_WMMXC:
     case _UVRSC_WMMXD:
@@ -965,40 +965,40 @@ _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
       break;
-  } 
-  _LIBUNWIND_ABORT("unsupported register class"); 
-} 
- 
-static _Unwind_VRS_Result 
-_Unwind_VRS_Get_Internal(_Unwind_Context *context, 
-                         _Unwind_VRS_RegClass regclass, uint32_t regno, 
-                         _Unwind_VRS_DataRepresentation representation, 
-                         void *valuep) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  switch (regclass) { 
-    case _UVRSC_CORE: 
-      if (representation != _UVRSD_UINT32 || regno > 15) 
-        return _UVRSR_FAILED; 
+  }
+  _LIBUNWIND_ABORT("unsupported register class");
+}
+
+static _Unwind_VRS_Result
+_Unwind_VRS_Get_Internal(_Unwind_Context *context,
+                         _Unwind_VRS_RegClass regclass, uint32_t regno,
+                         _Unwind_VRS_DataRepresentation representation,
+                         void *valuep) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  switch (regclass) {
+    case _UVRSC_CORE:
+      if (representation != _UVRSD_UINT32 || regno > 15)
+        return _UVRSR_FAILED;
       return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
                            (unw_word_t *)valuep) == UNW_ESUCCESS
-                 ? _UVRSR_OK 
-                 : _UVRSR_FAILED; 
-    case _UVRSC_VFP: 
-      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) 
-        return _UVRSR_FAILED; 
-      if (representation == _UVRSD_VFPX) { 
-        // Can only touch d0-15 with FSTMFDX. 
-        if (regno > 15) 
-          return _UVRSR_FAILED; 
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+    case _UVRSC_VFP:
+      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+        return _UVRSR_FAILED;
+      if (representation == _UVRSD_VFPX) {
+        // Can only touch d0-15 with FSTMFDX.
+        if (regno > 15)
+          return _UVRSR_FAILED;
         __unw_save_vfp_as_X(cursor);
-      } else { 
-        if (regno > 31) 
-          return _UVRSR_FAILED; 
-      } 
+      } else {
+        if (regno > 31)
+          return _UVRSR_FAILED;
+      }
       return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
                              (unw_fpreg_t *)valuep) == UNW_ESUCCESS
-                 ? _UVRSR_OK 
-                 : _UVRSR_FAILED; 
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
 #if defined(__ARM_WMMX)
     case _UVRSC_WMMXC:
       if (representation != _UVRSD_UINT32 || regno > 3)
@@ -1007,13 +1007,13 @@ _Unwind_VRS_Get_Internal(_Unwind_Context *context,
                            (unw_word_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
-    case _UVRSC_WMMXD: 
-      if (representation != _UVRSD_DOUBLE || regno > 31) 
-        return _UVRSR_FAILED; 
+    case _UVRSC_WMMXD:
+      if (representation != _UVRSD_DOUBLE || regno > 31)
+        return _UVRSR_FAILED;
       return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
                              (unw_fpreg_t *)valuep) == UNW_ESUCCESS
-                 ? _UVRSR_OK 
-                 : _UVRSR_FAILED; 
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
 #else
     case _UVRSC_WMMXC:
     case _UVRSC_WMMXD:
@@ -1028,85 +1028,85 @@ _Unwind_VRS_Get_Internal(_Unwind_Context *context,
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
       break;
-  } 
-  _LIBUNWIND_ABORT("unsupported register class"); 
-} 
- 
+  }
+  _LIBUNWIND_ABORT("unsupported register class");
+}
+
 _LIBUNWIND_EXPORT _Unwind_VRS_Result
 _Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
                 uint32_t regno, _Unwind_VRS_DataRepresentation representation,
                 void *valuep) {
-  _Unwind_VRS_Result result = 
-      _Unwind_VRS_Get_Internal(context, regclass, regno, representation, 
-                               valuep); 
-  _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, " 
+  _Unwind_VRS_Result result =
+      _Unwind_VRS_Get_Internal(context, regclass, regno, representation,
+                               valuep);
+  _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, "
                        "rep=%d, value=0x%llX, result = %d)",
-                       static_cast<void *>(context), regclass, regno, 
-                       representation, 
-                       ValueAsBitPattern(representation, valuep), result); 
-  return result; 
-} 
- 
-_Unwind_VRS_Result 
-_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, 
-                uint32_t discriminator, 
-                _Unwind_VRS_DataRepresentation representation) { 
-  _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, " 
+                       static_cast<void *>(context), regclass, regno,
+                       representation,
+                       ValueAsBitPattern(representation, valuep), result);
+  return result;
+}
+
+_Unwind_VRS_Result
+_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t discriminator,
+                _Unwind_VRS_DataRepresentation representation) {
+  _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, "
                        "discriminator=%d, representation=%d)",
-                       static_cast<void *>(context), regclass, discriminator, 
-                       representation); 
-  switch (regclass) { 
+                       static_cast<void *>(context), regclass, discriminator,
+                       representation);
+  switch (regclass) {
     case _UVRSC_WMMXC:
 #if !defined(__ARM_WMMX)
       break;
 #endif
     case _UVRSC_CORE: {
-      if (representation != _UVRSD_UINT32) 
-        return _UVRSR_FAILED; 
-      // When popping SP from the stack, we don't want to override it from the 
-      // computed new stack location. See EHABI #7.5.4 table 3. 
-      bool poppedSP = false; 
-      uint32_t* sp; 
-      if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, 
-                          _UVRSD_UINT32, &sp) != _UVRSR_OK) { 
-        return _UVRSR_FAILED; 
-      } 
-      for (uint32_t i = 0; i < 16; ++i) { 
-        if (!(discriminator & static_cast<uint32_t>(1 << i))) 
-          continue; 
-        uint32_t value = *sp++; 
-        if (regclass == _UVRSC_CORE && i == 13) 
-          poppedSP = true; 
-        if (_Unwind_VRS_Set(context, regclass, i, 
-                            _UVRSD_UINT32, &value) != _UVRSR_OK) { 
-          return _UVRSR_FAILED; 
-        } 
-      } 
-      if (!poppedSP) { 
-        return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, 
-                               _UVRSD_UINT32, &sp); 
-      } 
-      return _UVRSR_OK; 
-    } 
+      if (representation != _UVRSD_UINT32)
+        return _UVRSR_FAILED;
+      // When popping SP from the stack, we don't want to override it from the
+      // computed new stack location. See EHABI #7.5.4 table 3.
+      bool poppedSP = false;
+      uint32_t* sp;
+      if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
+                          _UVRSD_UINT32, &sp) != _UVRSR_OK) {
+        return _UVRSR_FAILED;
+      }
+      for (uint32_t i = 0; i < 16; ++i) {
+        if (!(discriminator & static_cast<uint32_t>(1 << i)))
+          continue;
+        uint32_t value = *sp++;
+        if (regclass == _UVRSC_CORE && i == 13)
+          poppedSP = true;
+        if (_Unwind_VRS_Set(context, regclass, i,
+                            _UVRSD_UINT32, &value) != _UVRSR_OK) {
+          return _UVRSR_FAILED;
+        }
+      }
+      if (!poppedSP) {
+        return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP,
+                               _UVRSD_UINT32, &sp);
+      }
+      return _UVRSR_OK;
+    }
     case _UVRSC_WMMXD:
 #if !defined(__ARM_WMMX)
       break;
 #endif
     case _UVRSC_VFP: {
-      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) 
-        return _UVRSR_FAILED; 
-      uint32_t first = discriminator >> 16; 
-      uint32_t count = discriminator & 0xffff; 
-      uint32_t end = first+count; 
-      uint32_t* sp; 
-      if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, 
-                          _UVRSD_UINT32, &sp) != _UVRSR_OK) { 
-        return _UVRSR_FAILED; 
-      } 
-      // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard 
-      // format 1", which is equivalent to FSTMD + a padding word. 
-      for (uint32_t i = first; i < end; ++i) { 
-        // SP is only 32-bit aligned so don't copy 64-bit at a time. 
+      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+        return _UVRSR_FAILED;
+      uint32_t first = discriminator >> 16;
+      uint32_t count = discriminator & 0xffff;
+      uint32_t end = first+count;
+      uint32_t* sp;
+      if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
+                          _UVRSD_UINT32, &sp) != _UVRSR_OK) {
+        return _UVRSR_FAILED;
+      }
+      // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard
+      // format 1", which is equivalent to FSTMD + a padding word.
+      for (uint32_t i = first; i < end; ++i) {
+        // SP is only 32-bit aligned so don't copy 64-bit at a time.
         uint64_t w0 = *sp++;
         uint64_t w1 = *sp++;
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -1116,15 +1116,15 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
 #else
 #error "Unable to determine endianess"
 #endif
-        if (_Unwind_VRS_Set(context, regclass, i, representation, &value) != 
-            _UVRSR_OK) 
-          return _UVRSR_FAILED; 
-      } 
-      if (representation == _UVRSD_VFPX) 
-        ++sp; 
-      return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, 
-                             &sp); 
-    } 
+        if (_Unwind_VRS_Set(context, regclass, i, representation, &value) !=
+            _UVRSR_OK)
+          return _UVRSR_FAILED;
+      }
+      if (representation == _UVRSD_VFPX)
+        ++sp;
+      return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                             &sp);
+    }
     case _UVRSC_PSEUDO: {
       if (representation != _UVRSD_UINT32 || discriminator != 0)
         return _UVRSR_FAILED;
@@ -1139,10 +1139,10 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
       return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_RA_AUTH_CODE,
                              _UVRSD_UINT32, &pac);
     }
-  } 
-  _LIBUNWIND_ABORT("unsupported register class"); 
-} 
- 
+  }
+  _LIBUNWIND_ABORT("unsupported register class");
+}
+
 /// Not used by C++.
 /// Unwinds stack, calling "stop" function at each frame.
 /// Could be used to implement longjmp().
@@ -1164,44 +1164,44 @@ _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, _Unwind_Stop_Fn stop,
                               stop_parameter);
 }
 
-/// Called by personality handler during phase 2 to find the start of the 
-/// function. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetRegionStart(struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_proc_info_t frameInfo; 
-  uintptr_t result = 0; 
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
   if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
-    result = (uintptr_t)frameInfo.start_ip; 
+    result = (uintptr_t)frameInfo.start_ip;
   _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX",
-                       static_cast<void *>(context), (long long)result); 
-  return result; 
-} 
- 
- 
-/// Called by personality handler during phase 2 if a foreign exception 
-// is caught. 
-_LIBUNWIND_EXPORT void 
-_Unwind_DeleteException(_Unwind_Exception *exception_object) { 
+                       static_cast<void *>(context), (long long)result);
+  return result;
+}
+
+
+/// Called by personality handler during phase 2 if a foreign exception
+// is caught.
+_LIBUNWIND_EXPORT void
+_Unwind_DeleteException(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
-                       static_cast<void *>(exception_object)); 
-  if (exception_object->exception_cleanup != NULL) 
-    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, 
-                                           exception_object); 
-} 
- 
-extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code 
+                       static_cast<void *>(exception_object));
+  if (exception_object->exception_cleanup != NULL)
+    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
+                                           exception_object);
+}
+
+extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
 __gnu_unwind_frame(_Unwind_Exception * /* exception_object */,
-                   struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
+                   struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
   switch (__unw_step(cursor)) {
   case UNW_STEP_SUCCESS:
     return _URC_OK;
   case UNW_STEP_END:
     return _URC_END_OF_STACK;
   default:
-    return _URC_FAILURE; 
+    return _URC_FAILURE;
   }
-} 
- 
+}
+
 #endif  // defined(_LIBUNWIND_ARM_EHABI)
diff --git a/contrib/libs/libunwind/src/Unwind-EHABI.h b/contrib/libs/libunwind/src/Unwind-EHABI.h
index f24def91ed..ff3b5fc6fe 100644
--- a/contrib/libs/libunwind/src/Unwind-EHABI.h
+++ b/contrib/libs/libunwind/src/Unwind-EHABI.h
@@ -1,50 +1,50 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __UNWIND_EHABI_H__ 
-#define __UNWIND_EHABI_H__ 
- 
-#include <__libunwind_config.h> 
- 
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWIND_EHABI_H__
+#define __UNWIND_EHABI_H__
+
+#include <__libunwind_config.h>
+
 #if defined(_LIBUNWIND_ARM_EHABI)
- 
-#include <stdint.h> 
-#include <unwind.h> 
- 
-// Unable to unwind in the ARM index table (section 5 EHABI). 
-#define UNW_EXIDX_CANTUNWIND 0x1 
- 
-static inline uint32_t signExtendPrel31(uint32_t data) { 
-  return data | ((data & 0x40000000u) << 1); 
-} 
- 
-static inline uint32_t readPrel31(const uint32_t *data) { 
-  return (((uint32_t)(uintptr_t)data) + signExtendPrel31(*data)); 
-} 
- 
-#if defined(__cplusplus) 
-extern "C" { 
-#endif 
- 
-extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0( 
-    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); 
- 
-extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1( 
-    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); 
- 
-extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2( 
-    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context); 
- 
-#if defined(__cplusplus) 
-} // extern "C" 
-#endif 
- 
+
+#include <stdint.h>
+#include <unwind.h>
+
+// Unable to unwind in the ARM index table (section 5 EHABI).
+#define UNW_EXIDX_CANTUNWIND 0x1
+
+static inline uint32_t signExtendPrel31(uint32_t data) {
+  return data | ((data & 0x40000000u) << 1);
+}
+
+static inline uint32_t readPrel31(const uint32_t *data) {
+  return (((uint32_t)(uintptr_t)data) + signExtendPrel31(*data));
+}
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0(
+    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context);
+
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1(
+    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context);
+
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2(
+    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
 #endif // defined(_LIBUNWIND_ARM_EHABI)
- 
-#endif  // __UNWIND_EHABI_H__ 
+
+#endif  // __UNWIND_EHABI_H__
diff --git a/contrib/libs/libunwind/src/Unwind-sjlj.c b/contrib/libs/libunwind/src/Unwind-sjlj.c
index 18ece59862..d487995bb7 100644
--- a/contrib/libs/libunwind/src/Unwind-sjlj.c
+++ b/contrib/libs/libunwind/src/Unwind-sjlj.c
@@ -1,23 +1,23 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Implements setjump-longjump based C++ exceptions 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include <unwind.h> 
- 
+//
+//
+//  Implements setjump-longjump based C++ exceptions
+//
+//===----------------------------------------------------------------------===//
+
+#include <unwind.h>
+
 #include <inttypes.h>
-#include <stdint.h> 
-#include <stdbool.h> 
-#include <stdlib.h> 
- 
-#include "config.h" 
- 
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "config.h"
+
 /// With SJLJ based exceptions, any function that has a catch clause or needs to
 /// do any clean up when an exception propagates through it, needs to call
 /// \c _Unwind_SjLj_Register at the start of the function and
@@ -25,19 +25,19 @@
 /// the address of a block of memory in the function's stack frame.  The runtime
 /// keeps a linked list (stack) of these blocks - one per thread.  The calling
 /// function also sets the personality and lsda fields of the block.
- 
+
 #if defined(_LIBUNWIND_BUILD_SJLJ_APIS)
- 
-struct _Unwind_FunctionContext { 
-  // next function in stack of handlers 
-  struct _Unwind_FunctionContext *prev; 
- 
+
+struct _Unwind_FunctionContext {
+  // next function in stack of handlers
+  struct _Unwind_FunctionContext *prev;
+
 #if defined(__ve__)
   // VE requires to store 64 bit pointers in the buffer for SjLj execption.
   // We expand the size of values defined here.  This size must be matched
   // to the size returned by TargetMachine::getSjLjDataSize().
 
-  // set by calling function before registering to be the landing pad 
+  // set by calling function before registering to be the landing pad
   uint64_t                        resumeLocation;
 
   // set by personality handler to be parameters passed to landing pad function
@@ -45,20 +45,20 @@ struct _Unwind_FunctionContext {
 #else
   // set by calling function before registering to be the landing pad
   uint32_t                        resumeLocation;
- 
-  // set by personality handler to be parameters passed to landing pad function 
+
+  // set by personality handler to be parameters passed to landing pad function
   uint32_t                        resumeParameters[4];
 #endif
- 
-  // set by calling function before registering 
+
+  // set by calling function before registering
   _Unwind_Personality_Fn personality;          // arm offset=24
-  uintptr_t                       lsda;        // arm offset=28 
- 
-  // variable length array, contains registers to restore 
-  // 0 = r7, 1 = pc, 2 = sp 
-  void                           *jbuf[]; 
-}; 
- 
+  uintptr_t                       lsda;        // arm offset=28
+
+  // variable length array, contains registers to restore
+  // 0 = r7, 1 = pc, 2 = sp
+  void                           *jbuf[];
+};
+
 #if defined(_LIBUNWIND_HAS_NO_THREADS)
 # define _LIBUNWIND_THREAD_LOCAL
 #else
@@ -72,7 +72,7 @@ struct _Unwind_FunctionContext {
 #  error Unable to create thread local storage
 # endif
 #endif
- 
+
 
 #if !defined(FOR_DYLD)
 
@@ -102,427 +102,427 @@ __Unwind_SjLj_SetTopOfFunctionStack(struct _Unwind_FunctionContext *fc) {
 #endif
 
 
-/// Called at start of each function that catches exceptions 
-_LIBUNWIND_EXPORT void 
-_Unwind_SjLj_Register(struct _Unwind_FunctionContext *fc) { 
-  fc->prev = __Unwind_SjLj_GetTopOfFunctionStack(); 
-  __Unwind_SjLj_SetTopOfFunctionStack(fc); 
-} 
- 
- 
-/// Called at end of each function that catches exceptions 
-_LIBUNWIND_EXPORT void 
-_Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) { 
-  __Unwind_SjLj_SetTopOfFunctionStack(fc->prev); 
-} 
- 
- 
-static _Unwind_Reason_Code 
-unwind_phase1(struct _Unwind_Exception *exception_object) { 
-  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); 
+/// Called at start of each function that catches exceptions
+_LIBUNWIND_EXPORT void
+_Unwind_SjLj_Register(struct _Unwind_FunctionContext *fc) {
+  fc->prev = __Unwind_SjLj_GetTopOfFunctionStack();
+  __Unwind_SjLj_SetTopOfFunctionStack(fc);
+}
+
+
+/// Called at end of each function that catches exceptions
+_LIBUNWIND_EXPORT void
+_Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) {
+  __Unwind_SjLj_SetTopOfFunctionStack(fc->prev);
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase1(struct _Unwind_Exception *exception_object) {
+  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack();
   _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: initial function-context=%p",
                              (void *)c);
- 
-  // walk each frame looking for a place to stop 
-  for (bool handlerNotFound = true; handlerNotFound; c = c->prev) { 
- 
-    // check for no more frames 
-    if (c == NULL) { 
-      _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached " 
+
+  // walk each frame looking for a place to stop
+  for (bool handlerNotFound = true; handlerNotFound; c = c->prev) {
+
+    // check for no more frames
+    if (c == NULL) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached "
                                  "bottom => _URC_END_OF_STACK",
                                  (void *)exception_object);
-      return _URC_END_OF_STACK; 
-    } 
- 
+      return _URC_END_OF_STACK;
+    }
+
     _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: function-context=%p", (void *)c);
-    // if there is a personality routine, ask it if it will want to stop at this 
-    // frame 
-    if (c->personality != NULL) { 
-      _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling " 
+    // if there is a personality routine, ask it if it will want to stop at this
+    // frame
+    if (c->personality != NULL) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling "
                                  "personality function %p",
                                  (void *)exception_object,
                                  (void *)c->personality);
-      _Unwind_Reason_Code personalityResult = (*c->personality)( 
-          1, _UA_SEARCH_PHASE, exception_object->exception_class, 
-          exception_object, (struct _Unwind_Context *)c); 
-      switch (personalityResult) { 
-      case _URC_HANDLER_FOUND: 
-        // found a catch clause or locals that need destructing in this frame 
-        // stop search and remember function context 
-        handlerNotFound = false; 
-        exception_object->private_2 = (uintptr_t) c; 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " 
+      _Unwind_Reason_Code personalityResult = (*c->personality)(
+          1, _UA_SEARCH_PHASE, exception_object->exception_class,
+          exception_object, (struct _Unwind_Context *)c);
+      switch (personalityResult) {
+      case _URC_HANDLER_FOUND:
+        // found a catch clause or locals that need destructing in this frame
+        // stop search and remember function context
+        handlerNotFound = false;
+        exception_object->private_2 = (uintptr_t) c;
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): "
                                    "_URC_HANDLER_FOUND",
                                    (void *)exception_object);
-        return _URC_NO_REASON; 
- 
-      case _URC_CONTINUE_UNWIND: 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " 
+        return _URC_NO_REASON;
+
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): "
                                    "_URC_CONTINUE_UNWIND",
                                    (void *)exception_object);
-        // continue unwinding 
-        break; 
- 
-      default: 
-        // something went wrong 
-        _LIBUNWIND_TRACE_UNWINDING( 
+        // continue unwinding
+        break;
+
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
             (void *)exception_object);
-        return _URC_FATAL_PHASE1_ERROR; 
-      } 
-    } 
-  } 
-  return _URC_NO_REASON; 
-} 
- 
- 
-static _Unwind_Reason_Code 
-unwind_phase2(struct _Unwind_Exception *exception_object) { 
+        return _URC_FATAL_PHASE1_ERROR;
+      }
+    }
+  }
+  return _URC_NO_REASON;
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase2(struct _Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
                              (void *)exception_object);
- 
-  // walk each frame until we reach where search phase said to stop 
-  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); 
-  while (true) { 
+
+  // walk each frame until we reach where search phase said to stop
+  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack();
+  while (true) {
     _LIBUNWIND_TRACE_UNWINDING("unwind_phase2s(ex_ojb=%p): context=%p",
                                (void *)exception_object, (void *)c);
- 
-    // check for no more frames 
-    if (c == NULL) { 
+
+    // check for no more frames
+    if (c == NULL) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): __unw_step() reached "
           "bottom => _URC_END_OF_STACK",
           (void *)exception_object);
-      return _URC_END_OF_STACK; 
-    } 
- 
-    // if there is a personality routine, tell it we are unwinding 
-    if (c->personality != NULL) { 
-      _Unwind_Action action = _UA_CLEANUP_PHASE; 
-      if ((uintptr_t) c == exception_object->private_2) 
-        action = (_Unwind_Action)( 
-            _UA_CLEANUP_PHASE | 
-            _UA_HANDLER_FRAME); // tell personality this was the frame it marked 
-                                // in phase 1 
-      _Unwind_Reason_Code personalityResult = 
-          (*c->personality)(1, action, exception_object->exception_class, 
-                            exception_object, (struct _Unwind_Context *)c); 
-      switch (personalityResult) { 
-      case _URC_CONTINUE_UNWIND: 
-        // continue unwinding 
-        _LIBUNWIND_TRACE_UNWINDING( 
+      return _URC_END_OF_STACK;
+    }
+
+    // if there is a personality routine, tell it we are unwinding
+    if (c->personality != NULL) {
+      _Unwind_Action action = _UA_CLEANUP_PHASE;
+      if ((uintptr_t) c == exception_object->private_2)
+        action = (_Unwind_Action)(
+            _UA_CLEANUP_PHASE |
+            _UA_HANDLER_FRAME); // tell personality this was the frame it marked
+                                // in phase 1
+      _Unwind_Reason_Code personalityResult =
+          (*c->personality)(1, action, exception_object->exception_class,
+                            exception_object, (struct _Unwind_Context *)c);
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        // continue unwinding
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
             (void *)exception_object);
-        if ((uintptr_t) c == exception_object->private_2) { 
-          // phase 1 said we would stop at this frame, but we did not... 
-          _LIBUNWIND_ABORT("during phase1 personality function said it would " 
-                           "stop here, but now if phase2 it did not stop here"); 
-        } 
-        break; 
-      case _URC_INSTALL_CONTEXT: 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): " 
+        if ((uintptr_t) c == exception_object->private_2) {
+          // phase 1 said we would stop at this frame, but we did not...
+          _LIBUNWIND_ABORT("during phase1 personality function said it would "
+                           "stop here, but now if phase2 it did not stop here");
+        }
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): "
                                    "_URC_INSTALL_CONTEXT, will resume at "
                                    "landing pad %p",
                                    (void *)exception_object, c->jbuf[1]);
-        // personality routine says to transfer control to landing pad 
-        // we may get control back if landing pad calls _Unwind_Resume() 
-        __Unwind_SjLj_SetTopOfFunctionStack(c); 
-        __builtin_longjmp(c->jbuf, 1); 
+        // personality routine says to transfer control to landing pad
+        // we may get control back if landing pad calls _Unwind_Resume()
+        __Unwind_SjLj_SetTopOfFunctionStack(c);
+        __builtin_longjmp(c->jbuf, 1);
         // __unw_resume() only returns if there was an error
-        return _URC_FATAL_PHASE2_ERROR; 
-      default: 
-        // something went wrong 
-        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", 
-                      personalityResult); 
-        return _URC_FATAL_PHASE2_ERROR; 
-      } 
-    } 
-    c = c->prev; 
-  } 
- 
-  // clean up phase did not resume at the frame that the search phase said it 
-  // would 
-  return _URC_FATAL_PHASE2_ERROR; 
-} 
- 
- 
-static _Unwind_Reason_Code 
-unwind_phase2_forced(struct _Unwind_Exception *exception_object, 
-                     _Unwind_Stop_Fn stop, void *stop_parameter) { 
-  // walk each frame until we reach where search phase said to stop 
-  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); 
-  while (true) { 
- 
-    // get next frame (skip over first which is _Unwind_RaiseException) 
-    if (c == NULL) { 
+        return _URC_FATAL_PHASE2_ERROR;
+      default:
+        // something went wrong
+        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
+                      personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+    c = c->prev;
+  }
+
+  // clean up phase did not resume at the frame that the search phase said it
+  // would
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase2_forced(struct _Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
+  // walk each frame until we reach where search phase said to stop
+  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack();
+  while (true) {
+
+    // get next frame (skip over first which is _Unwind_RaiseException)
+    if (c == NULL) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): __unw_step() reached "
           "bottom => _URC_END_OF_STACK",
           (void *)exception_object);
-      return _URC_END_OF_STACK; 
-    } 
- 
-    // call stop function at each frame 
-    _Unwind_Action action = 
-        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); 
-    _Unwind_Reason_Code stopResult = 
-        (*stop)(1, action, exception_object->exception_class, exception_object, 
-                (struct _Unwind_Context *)c, stop_parameter); 
-    _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
+      return _URC_END_OF_STACK;
+    }
+
+    // call stop function at each frame
+    _Unwind_Action action =
+        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE);
+    _Unwind_Reason_Code stopResult =
+        (*stop)(1, action, exception_object->exception_class, exception_object,
+                (struct _Unwind_Context *)c, stop_parameter);
+    _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
                                "stop function returned %d",
                                (void *)exception_object, stopResult);
-    if (stopResult != _URC_NO_REASON) { 
-      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
+    if (stopResult != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
                                  "stopped by stop function",
                                  (void *)exception_object);
-      return _URC_FATAL_PHASE2_ERROR; 
-    } 
- 
-    // if there is a personality routine, tell it we are unwinding 
-    if (c->personality != NULL) { 
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // if there is a personality routine, tell it we are unwinding
+    if (c->personality != NULL) {
       _Unwind_Personality_Fn p = (_Unwind_Personality_Fn)c->personality;
-      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
                                  "calling personality function %p",
                                  (void *)exception_object, (void *)p);
-      _Unwind_Reason_Code personalityResult = 
-          (*p)(1, action, exception_object->exception_class, exception_object, 
-               (struct _Unwind_Context *)c); 
-      switch (personalityResult) { 
-      case _URC_CONTINUE_UNWIND: 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p):  " 
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
+               (struct _Unwind_Context *)c);
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p):  "
                                    "personality returned _URC_CONTINUE_UNWIND",
                                    (void *)exception_object);
-        // destructors called, continue unwinding 
-        break; 
-      case _URC_INSTALL_CONTEXT: 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
+        // destructors called, continue unwinding
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
                                    "personality returned _URC_INSTALL_CONTEXT",
                                    (void *)exception_object);
-        // we may get control back if landing pad calls _Unwind_Resume() 
-        __Unwind_SjLj_SetTopOfFunctionStack(c); 
-        __builtin_longjmp(c->jbuf, 1); 
-        break; 
-      default: 
-        // something went wrong 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
-                                   "personality returned %d, " 
+        // we may get control back if landing pad calls _Unwind_Resume()
+        __Unwind_SjLj_SetTopOfFunctionStack(c);
+        __builtin_longjmp(c->jbuf, 1);
+        break;
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned %d, "
                                    "_URC_FATAL_PHASE2_ERROR",
                                    (void *)exception_object, personalityResult);
-        return _URC_FATAL_PHASE2_ERROR; 
-      } 
-    } 
-    c = c->prev; 
-  } 
- 
-  // call stop function one last time and tell it we've reached the end of the 
-  // stack 
-  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " 
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+    c = c->prev;
+  }
+
+  // call stop function one last time and tell it we've reached the end of the
+  // stack
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop "
                              "function with _UA_END_OF_STACK",
                              (void *)exception_object);
-  _Unwind_Action lastAction = 
-      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); 
-  (*stop)(1, lastAction, exception_object->exception_class, exception_object, 
-          (struct _Unwind_Context *)c, stop_parameter); 
- 
-  // clean up phase did not resume at the frame that the search phase said it 
-  // would 
-  return _URC_FATAL_PHASE2_ERROR; 
-} 
- 
- 
-/// Called by __cxa_throw.  Only returns if there is a fatal error 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) { 
+  _Unwind_Action lastAction =
+      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK);
+  (*stop)(1, lastAction, exception_object->exception_class, exception_object,
+          (struct _Unwind_Context *)c, stop_parameter);
+
+  // clean up phase did not resume at the frame that the search phase said it
+  // would
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+
+/// Called by __cxa_throw.  Only returns if there is a fatal error
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_SjLj_RaiseException(ex_obj=%p)",
                        (void *)exception_object);
- 
-  // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right 
-  // thing 
-  exception_object->private_1 = 0; 
-  exception_object->private_2 = 0; 
- 
-  // phase 1: the search phase 
-  _Unwind_Reason_Code phase1 = unwind_phase1(exception_object); 
-  if (phase1 != _URC_NO_REASON) 
-    return phase1; 
- 
-  // phase 2: the clean up phase 
-  return unwind_phase2(exception_object); 
-} 
- 
- 
- 
-/// When _Unwind_RaiseException() is in phase2, it hands control 
-/// to the personality function at each frame.  The personality 
-/// may force a jump to a landing pad in that function, the landing 
-/// pad code may then call _Unwind_Resume() to continue with the 
-/// unwinding.  Note: the call to _Unwind_Resume() is from compiler 
-/// geneated user code.  All other _Unwind_* routines are called 
-/// by the C++ runtime __cxa_* routines. 
-/// 
-/// Re-throwing an exception is implemented by having the code call 
-/// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow() 
-_LIBUNWIND_EXPORT void 
-_Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) { 
+
+  // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right
+  // thing
+  exception_object->private_1 = 0;
+  exception_object->private_2 = 0;
+
+  // phase 1: the search phase
+  _Unwind_Reason_Code phase1 = unwind_phase1(exception_object);
+  if (phase1 != _URC_NO_REASON)
+    return phase1;
+
+  // phase 2: the clean up phase
+  return unwind_phase2(exception_object);
+}
+
+
+
+/// When _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function, the landing
+/// pad code may then call _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to _Unwind_Resume() is from compiler
+/// geneated user code.  All other _Unwind_* routines are called
+/// by the C++ runtime __cxa_* routines.
+///
+/// Re-throwing an exception is implemented by having the code call
+/// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow()
+_LIBUNWIND_EXPORT void
+_Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_SjLj_Resume(ex_obj=%p)",
                        (void *)exception_object);
- 
-  if (exception_object->private_1 != 0) 
-    unwind_phase2_forced(exception_object, 
-                         (_Unwind_Stop_Fn) exception_object->private_1, 
-                         (void *)exception_object->private_2); 
-  else 
-    unwind_phase2(exception_object); 
- 
-  // clients assume _Unwind_Resume() does not return, so all we can do is abort. 
-  _LIBUNWIND_ABORT("_Unwind_SjLj_Resume() can't return"); 
-} 
- 
- 
-///  Called by __cxa_rethrow(). 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) { 
-  _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), " 
+
+  if (exception_object->private_1 != 0)
+    unwind_phase2_forced(exception_object,
+                         (_Unwind_Stop_Fn) exception_object->private_1,
+                         (void *)exception_object->private_2);
+  else
+    unwind_phase2(exception_object);
+
+  // clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_SjLj_Resume() can't return");
+}
+
+
+///  Called by __cxa_rethrow().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), "
                        "private_1=%" PRIuPTR,
                        (void *)exception_object, exception_object->private_1);
-  // If this is non-forced and a stopping place was found, then this is a 
-  // re-throw. 
-  // Call _Unwind_RaiseException() as if this was a new exception. 
-  if (exception_object->private_1 == 0) { 
-    return _Unwind_SjLj_RaiseException(exception_object); 
-    // should return if there is no catch clause, so that __cxa_rethrow can call 
-    // std::terminate() 
-  } 
- 
-  // Call through to _Unwind_Resume() which distiguishes between forced and 
-  // regular exceptions. 
-  _Unwind_SjLj_Resume(exception_object); 
-  _LIBUNWIND_ABORT("__Unwind_SjLj_Resume_or_Rethrow() called " 
-                    "_Unwind_SjLj_Resume() which unexpectedly returned"); 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get LSDA for current frame. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { 
-  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
-  _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) " 
+  // If this is non-forced and a stopping place was found, then this is a
+  // re-throw.
+  // Call _Unwind_RaiseException() as if this was a new exception.
+  if (exception_object->private_1 == 0) {
+    return _Unwind_SjLj_RaiseException(exception_object);
+    // should return if there is no catch clause, so that __cxa_rethrow can call
+    // std::terminate()
+  }
+
+  // Call through to _Unwind_Resume() which distiguishes between forced and
+  // regular exceptions.
+  _Unwind_SjLj_Resume(exception_object);
+  _LIBUNWIND_ABORT("__Unwind_SjLj_Resume_or_Rethrow() called "
+                    "_Unwind_SjLj_Resume() which unexpectedly returned");
+}
+
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) "
                        "=> 0x%" PRIuPTR,
                        (void *)context, ufc->lsda);
-  return ufc->lsda; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get register values. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, 
-                                          int index) { 
+  return ufc->lsda;
+}
+
+
+/// Called by personality handler during phase 2 to get register values.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context,
+                                          int index) {
   _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d)", (void *)context,
                        index);
-  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
-  return ufc->resumeParameters[index]; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to alter register values. 
-_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, 
-                                     uintptr_t new_value) { 
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  return ufc->resumeParameters[index];
+}
+
+
+/// Called by personality handler during phase 2 to alter register values.
+_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                                     uintptr_t new_value) {
   _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%" PRIuPTR
                        ")",
                        (void *)context, index, new_value);
-  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
-  ufc->resumeParameters[index] = new_value; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get instruction pointer. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { 
-  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  ufc->resumeParameters[index] = new_value;
+}
+
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
   _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIu32,
                        (void *)context, ufc->resumeLocation + 1);
-  return ufc->resumeLocation + 1; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get instruction pointer. 
-/// ipBefore is a boolean that says if IP is already adjusted to be the call 
-/// site address.  Normally IP is the return address. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, 
-                                              int *ipBefore) { 
-  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
-  *ipBefore = 0; 
+  return ufc->resumeLocation + 1;
+}
+
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+/// ipBefore is a boolean that says if IP is already adjusted to be the call
+/// site address.  Normally IP is the return address.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
+                                              int *ipBefore) {
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  *ipBefore = 0;
   _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p, %p) => 0x%" PRIu32,
                        (void *)context, (void *)ipBefore,
                        ufc->resumeLocation + 1);
-  return ufc->resumeLocation + 1; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to alter instruction pointer. 
-_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, 
-                                     uintptr_t new_value) { 
+  return ufc->resumeLocation + 1;
+}
+
+
+/// Called by personality handler during phase 2 to alter instruction pointer.
+_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context,
+                                     uintptr_t new_value) {
   _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%" PRIuPTR ")",
                        (void *)context, new_value);
-  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
-  ufc->resumeLocation = new_value - 1; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to find the start of the 
-/// function. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetRegionStart(struct _Unwind_Context *context) { 
-  // Not supported or needed for sjlj based unwinding 
-  (void)context; 
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  ufc->resumeLocation = new_value - 1;
+}
+
+
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  // Not supported or needed for sjlj based unwinding
+  (void)context;
   _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p)", (void *)context);
-  return 0; 
-} 
- 
- 
-/// Called by personality handler during phase 2 if a foreign exception 
-/// is caught. 
-_LIBUNWIND_EXPORT void 
-_Unwind_DeleteException(struct _Unwind_Exception *exception_object) { 
+  return 0;
+}
+
+
+/// Called by personality handler during phase 2 if a foreign exception
+/// is caught.
+_LIBUNWIND_EXPORT void
+_Unwind_DeleteException(struct _Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
                        (void *)exception_object);
-  if (exception_object->exception_cleanup != NULL) 
-    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, 
-                                           exception_object); 
-} 
- 
- 
- 
-/// Called by personality handler during phase 2 to get base address for data 
-/// relative encodings. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetDataRelBase(struct _Unwind_Context *context) { 
-  // Not supported or needed for sjlj based unwinding 
-  (void)context; 
+  if (exception_object->exception_cleanup != NULL)
+    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
+                                           exception_object);
+}
+
+
+
+/// Called by personality handler during phase 2 to get base address for data
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetDataRelBase(struct _Unwind_Context *context) {
+  // Not supported or needed for sjlj based unwinding
+  (void)context;
   _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context);
-  _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get base address for text 
-/// relative encodings. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetTextRelBase(struct _Unwind_Context *context) { 
-  // Not supported or needed for sjlj based unwinding 
-  (void)context; 
+  _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented");
+}
+
+
+/// Called by personality handler during phase 2 to get base address for text
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetTextRelBase(struct _Unwind_Context *context) {
+  // Not supported or needed for sjlj based unwinding
+  (void)context;
   _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context);
-  _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); 
-} 
- 
- 
-/// Called by personality handler to get "Call Frame Area" for current frame. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { 
+  _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented");
+}
+
+
+/// Called by personality handler to get "Call Frame Area" for current frame.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) {
   _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p)", (void *)context);
-  if (context != NULL) { 
-    _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; 
-    // Setjmp/longjmp based exceptions don't have a true CFA. 
-    // Instead, the SP in the jmpbuf is the closest approximation. 
-    return (uintptr_t) ufc->jbuf[2]; 
-  } 
-  return 0; 
-} 
- 
+  if (context != NULL) {
+    _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+    // Setjmp/longjmp based exceptions don't have a true CFA.
+    // Instead, the SP in the jmpbuf is the closest approximation.
+    return (uintptr_t) ufc->jbuf[2];
+  }
+  return 0;
+}
+
 #endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS)
diff --git a/contrib/libs/libunwind/src/UnwindCursor.hpp b/contrib/libs/libunwind/src/UnwindCursor.hpp
index d751111dd2..1ca842f33a 100644
--- a/contrib/libs/libunwind/src/UnwindCursor.hpp
+++ b/contrib/libs/libunwind/src/UnwindCursor.hpp
@@ -1,30 +1,30 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
+//
+//
 // C++ interface to lower levels of libunwind
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __UNWINDCURSOR_HPP__ 
-#define __UNWINDCURSOR_HPP__ 
- 
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWINDCURSOR_HPP__
+#define __UNWINDCURSOR_HPP__
+
 #include "cet_unwind.h"
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unwind.h> 
- 
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unwind.h>
+
 #ifdef _WIN32
   #include <windows.h>
   #include <ntverp.h>
 #endif
-#ifdef __APPLE__ 
-  #include <mach-o/dyld.h> 
-#endif 
- 
+#ifdef __APPLE__
+  #include <mach-o/dyld.h>
+#endif
+
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
 // Provide a definition for the DISPATCHER_CONTEXT struct for old (Win7 and
 // earlier) SDKs.
@@ -62,402 +62,402 @@ extern "C" _Unwind_Reason_Code __libunwind_seh_personality(
 
 #endif
 
-#include "config.h" 
- 
-#include "AddressSpace.hpp" 
-#include "CompactUnwinder.hpp" 
-#include "config.h" 
-#include "DwarfInstructions.hpp" 
-#include "EHHeaderParser.hpp" 
-#include "libunwind.h" 
-#include "Registers.hpp" 
+#include "config.h"
+
+#include "AddressSpace.hpp"
+#include "CompactUnwinder.hpp"
+#include "config.h"
+#include "DwarfInstructions.hpp"
+#include "EHHeaderParser.hpp"
+#include "libunwind.h"
+#include "Registers.hpp"
 #include "RWMutex.hpp"
-#include "Unwind-EHABI.h" 
- 
-namespace libunwind { 
- 
+#include "Unwind-EHABI.h"
+
+namespace libunwind {
+
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-/// Cache of recently found FDEs. 
-template <typename A> 
-class _LIBUNWIND_HIDDEN DwarfFDECache { 
-  typedef typename A::pint_t pint_t; 
-public: 
+/// Cache of recently found FDEs.
+template <typename A>
+class _LIBUNWIND_HIDDEN DwarfFDECache {
+  typedef typename A::pint_t pint_t;
+public:
   static constexpr pint_t kSearchAll = static_cast<pint_t>(-1);
-  static pint_t findFDE(pint_t mh, pint_t pc); 
-  static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde); 
-  static void removeAllIn(pint_t mh); 
-  static void iterateCacheEntries(void (*func)(unw_word_t ip_start, 
-                                               unw_word_t ip_end, 
-                                               unw_word_t fde, unw_word_t mh)); 
- 
-private: 
- 
-  struct entry { 
-    pint_t mh; 
-    pint_t ip_start; 
-    pint_t ip_end; 
-    pint_t fde; 
-  }; 
- 
-  // These fields are all static to avoid needing an initializer. 
-  // There is only one instance of this class per process. 
+  static pint_t findFDE(pint_t mh, pint_t pc);
+  static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde);
+  static void removeAllIn(pint_t mh);
+  static void iterateCacheEntries(void (*func)(unw_word_t ip_start,
+                                               unw_word_t ip_end,
+                                               unw_word_t fde, unw_word_t mh));
+
+private:
+
+  struct entry {
+    pint_t mh;
+    pint_t ip_start;
+    pint_t ip_end;
+    pint_t fde;
+  };
+
+  // These fields are all static to avoid needing an initializer.
+  // There is only one instance of this class per process.
   static RWMutex _lock;
-#ifdef __APPLE__ 
-  static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide); 
-  static bool _registeredForDyldUnloads; 
-#endif 
-  static entry *_buffer; 
-  static entry *_bufferUsed; 
-  static entry *_bufferEnd; 
-  static entry _initialBuffer[64]; 
-}; 
- 
-template <typename A> 
-typename DwarfFDECache<A>::entry * 
-DwarfFDECache<A>::_buffer = _initialBuffer; 
- 
-template <typename A> 
-typename DwarfFDECache<A>::entry * 
-DwarfFDECache<A>::_bufferUsed = _initialBuffer; 
- 
-template <typename A> 
-typename DwarfFDECache<A>::entry * 
-DwarfFDECache<A>::_bufferEnd = &_initialBuffer[64]; 
- 
-template <typename A> 
-typename DwarfFDECache<A>::entry DwarfFDECache<A>::_initialBuffer[64]; 
- 
-template <typename A> 
+#ifdef __APPLE__
+  static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide);
+  static bool _registeredForDyldUnloads;
+#endif
+  static entry *_buffer;
+  static entry *_bufferUsed;
+  static entry *_bufferEnd;
+  static entry _initialBuffer[64];
+};
+
+template <typename A>
+typename DwarfFDECache<A>::entry *
+DwarfFDECache<A>::_buffer = _initialBuffer;
+
+template <typename A>
+typename DwarfFDECache<A>::entry *
+DwarfFDECache<A>::_bufferUsed = _initialBuffer;
+
+template <typename A>
+typename DwarfFDECache<A>::entry *
+DwarfFDECache<A>::_bufferEnd = &_initialBuffer[64];
+
+template <typename A>
+typename DwarfFDECache<A>::entry DwarfFDECache<A>::_initialBuffer[64];
+
+template <typename A>
 RWMutex DwarfFDECache<A>::_lock;
- 
-#ifdef __APPLE__ 
-template <typename A> 
-bool DwarfFDECache<A>::_registeredForDyldUnloads = false; 
-#endif 
- 
-template <typename A> 
-typename A::pint_t DwarfFDECache<A>::findFDE(pint_t mh, pint_t pc) { 
-  pint_t result = 0; 
+
+#ifdef __APPLE__
+template <typename A>
+bool DwarfFDECache<A>::_registeredForDyldUnloads = false;
+#endif
+
+template <typename A>
+typename A::pint_t DwarfFDECache<A>::findFDE(pint_t mh, pint_t pc) {
+  pint_t result = 0;
   _LIBUNWIND_LOG_IF_FALSE(_lock.lock_shared());
-  for (entry *p = _buffer; p < _bufferUsed; ++p) { 
+  for (entry *p = _buffer; p < _bufferUsed; ++p) {
     if ((mh == p->mh) || (mh == kSearchAll)) {
-      if ((p->ip_start <= pc) && (pc < p->ip_end)) { 
-        result = p->fde; 
-        break; 
-      } 
-    } 
-  } 
+      if ((p->ip_start <= pc) && (pc < p->ip_end)) {
+        result = p->fde;
+        break;
+      }
+    }
+  }
   _LIBUNWIND_LOG_IF_FALSE(_lock.unlock_shared());
-  return result; 
-} 
- 
-template <typename A> 
-void DwarfFDECache<A>::add(pint_t mh, pint_t ip_start, pint_t ip_end, 
-                           pint_t fde) { 
-#if !defined(_LIBUNWIND_NO_HEAP) 
+  return result;
+}
+
+template <typename A>
+void DwarfFDECache<A>::add(pint_t mh, pint_t ip_start, pint_t ip_end,
+                           pint_t fde) {
+#if !defined(_LIBUNWIND_NO_HEAP)
   _LIBUNWIND_LOG_IF_FALSE(_lock.lock());
-  if (_bufferUsed >= _bufferEnd) { 
-    size_t oldSize = (size_t)(_bufferEnd - _buffer); 
-    size_t newSize = oldSize * 4; 
-    // Can't use operator new (we are below it). 
-    entry *newBuffer = (entry *)malloc(newSize * sizeof(entry)); 
-    memcpy(newBuffer, _buffer, oldSize * sizeof(entry)); 
-    if (_buffer != _initialBuffer) 
-      free(_buffer); 
-    _buffer = newBuffer; 
-    _bufferUsed = &newBuffer[oldSize]; 
-    _bufferEnd = &newBuffer[newSize]; 
-  } 
-  _bufferUsed->mh = mh; 
-  _bufferUsed->ip_start = ip_start; 
-  _bufferUsed->ip_end = ip_end; 
-  _bufferUsed->fde = fde; 
-  ++_bufferUsed; 
-#ifdef __APPLE__ 
-  if (!_registeredForDyldUnloads) { 
-    _dyld_register_func_for_remove_image(&dyldUnloadHook); 
-    _registeredForDyldUnloads = true; 
-  } 
-#endif 
+  if (_bufferUsed >= _bufferEnd) {
+    size_t oldSize = (size_t)(_bufferEnd - _buffer);
+    size_t newSize = oldSize * 4;
+    // Can't use operator new (we are below it).
+    entry *newBuffer = (entry *)malloc(newSize * sizeof(entry));
+    memcpy(newBuffer, _buffer, oldSize * sizeof(entry));
+    if (_buffer != _initialBuffer)
+      free(_buffer);
+    _buffer = newBuffer;
+    _bufferUsed = &newBuffer[oldSize];
+    _bufferEnd = &newBuffer[newSize];
+  }
+  _bufferUsed->mh = mh;
+  _bufferUsed->ip_start = ip_start;
+  _bufferUsed->ip_end = ip_end;
+  _bufferUsed->fde = fde;
+  ++_bufferUsed;
+#ifdef __APPLE__
+  if (!_registeredForDyldUnloads) {
+    _dyld_register_func_for_remove_image(&dyldUnloadHook);
+    _registeredForDyldUnloads = true;
+  }
+#endif
   _LIBUNWIND_LOG_IF_FALSE(_lock.unlock());
-#endif 
-} 
- 
-template <typename A> 
-void DwarfFDECache<A>::removeAllIn(pint_t mh) { 
+#endif
+}
+
+template <typename A>
+void DwarfFDECache<A>::removeAllIn(pint_t mh) {
   _LIBUNWIND_LOG_IF_FALSE(_lock.lock());
-  entry *d = _buffer; 
-  for (const entry *s = _buffer; s < _bufferUsed; ++s) { 
-    if (s->mh != mh) { 
-      if (d != s) 
-        *d = *s; 
-      ++d; 
-    } 
-  } 
-  _bufferUsed = d; 
+  entry *d = _buffer;
+  for (const entry *s = _buffer; s < _bufferUsed; ++s) {
+    if (s->mh != mh) {
+      if (d != s)
+        *d = *s;
+      ++d;
+    }
+  }
+  _bufferUsed = d;
   _LIBUNWIND_LOG_IF_FALSE(_lock.unlock());
-} 
- 
-#ifdef __APPLE__ 
-template <typename A> 
-void DwarfFDECache<A>::dyldUnloadHook(const struct mach_header *mh, intptr_t ) { 
-  removeAllIn((pint_t) mh); 
-} 
-#endif 
- 
-template <typename A> 
-void DwarfFDECache<A>::iterateCacheEntries(void (*func)( 
-    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { 
+}
+
+#ifdef __APPLE__
+template <typename A>
+void DwarfFDECache<A>::dyldUnloadHook(const struct mach_header *mh, intptr_t ) {
+  removeAllIn((pint_t) mh);
+}
+#endif
+
+template <typename A>
+void DwarfFDECache<A>::iterateCacheEntries(void (*func)(
+    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) {
   _LIBUNWIND_LOG_IF_FALSE(_lock.lock());
-  for (entry *p = _buffer; p < _bufferUsed; ++p) { 
-    (*func)(p->ip_start, p->ip_end, p->fde, p->mh); 
-  } 
+  for (entry *p = _buffer; p < _bufferUsed; ++p) {
+    (*func)(p->ip_start, p->ip_end, p->fde, p->mh);
+  }
   _LIBUNWIND_LOG_IF_FALSE(_lock.unlock());
-} 
+}
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
- 
- 
-#define arrayoffsetof(type, index, field) ((size_t)(&((type *)0)[index].field)) 
- 
+
+
+#define arrayoffsetof(type, index, field) ((size_t)(&((type *)0)[index].field))
+
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-template <typename A> class UnwindSectionHeader { 
-public: 
-  UnwindSectionHeader(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t version() const { 
-    return _addressSpace.get32(_addr + 
-                               offsetof(unwind_info_section_header, version)); 
-  } 
-  uint32_t commonEncodingsArraySectionOffset() const { 
-    return _addressSpace.get32(_addr + 
-                               offsetof(unwind_info_section_header, 
-                                        commonEncodingsArraySectionOffset)); 
-  } 
-  uint32_t commonEncodingsArrayCount() const { 
-    return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, 
-                                                commonEncodingsArrayCount)); 
-  } 
-  uint32_t personalityArraySectionOffset() const { 
-    return _addressSpace.get32(_addr + offsetof(unwind_info_section_header, 
-                                                personalityArraySectionOffset)); 
-  } 
-  uint32_t personalityArrayCount() const { 
-    return _addressSpace.get32( 
-        _addr + offsetof(unwind_info_section_header, personalityArrayCount)); 
-  } 
-  uint32_t indexSectionOffset() const { 
-    return _addressSpace.get32( 
-        _addr + offsetof(unwind_info_section_header, indexSectionOffset)); 
-  } 
-  uint32_t indexCount() const { 
-    return _addressSpace.get32( 
-        _addr + offsetof(unwind_info_section_header, indexCount)); 
-  } 
- 
-private: 
-  A                     &_addressSpace; 
-  typename A::pint_t     _addr; 
-}; 
- 
-template <typename A> class UnwindSectionIndexArray { 
-public: 
-  UnwindSectionIndexArray(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t functionOffset(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, 
-                              functionOffset)); 
-  } 
-  uint32_t secondLevelPagesSectionOffset(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, 
-                              secondLevelPagesSectionOffset)); 
-  } 
-  uint32_t lsdaIndexArraySectionOffset(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index, 
-                              lsdaIndexArraySectionOffset)); 
-  } 
- 
-private: 
-  A                   &_addressSpace; 
-  typename A::pint_t   _addr; 
-}; 
- 
-template <typename A> class UnwindSectionRegularPageHeader { 
-public: 
-  UnwindSectionRegularPageHeader(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t kind() const { 
-    return _addressSpace.get32( 
-        _addr + offsetof(unwind_info_regular_second_level_page_header, kind)); 
-  } 
-  uint16_t entryPageOffset() const { 
-    return _addressSpace.get16( 
-        _addr + offsetof(unwind_info_regular_second_level_page_header, 
-                         entryPageOffset)); 
-  } 
-  uint16_t entryCount() const { 
-    return _addressSpace.get16( 
-        _addr + 
-        offsetof(unwind_info_regular_second_level_page_header, entryCount)); 
-  } 
- 
-private: 
-  A &_addressSpace; 
-  typename A::pint_t _addr; 
-}; 
- 
-template <typename A> class UnwindSectionRegularArray { 
-public: 
-  UnwindSectionRegularArray(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t functionOffset(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + arrayoffsetof(unwind_info_regular_second_level_entry, index, 
-                              functionOffset)); 
-  } 
-  uint32_t encoding(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + 
-        arrayoffsetof(unwind_info_regular_second_level_entry, index, encoding)); 
-  } 
- 
-private: 
-  A &_addressSpace; 
-  typename A::pint_t _addr; 
-}; 
- 
-template <typename A> class UnwindSectionCompressedPageHeader { 
-public: 
-  UnwindSectionCompressedPageHeader(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t kind() const { 
-    return _addressSpace.get32( 
-        _addr + 
-        offsetof(unwind_info_compressed_second_level_page_header, kind)); 
-  } 
-  uint16_t entryPageOffset() const { 
-    return _addressSpace.get16( 
-        _addr + offsetof(unwind_info_compressed_second_level_page_header, 
-                         entryPageOffset)); 
-  } 
-  uint16_t entryCount() const { 
-    return _addressSpace.get16( 
-        _addr + 
-        offsetof(unwind_info_compressed_second_level_page_header, entryCount)); 
-  } 
-  uint16_t encodingsPageOffset() const { 
-    return _addressSpace.get16( 
-        _addr + offsetof(unwind_info_compressed_second_level_page_header, 
-                         encodingsPageOffset)); 
-  } 
-  uint16_t encodingsCount() const { 
-    return _addressSpace.get16( 
-        _addr + offsetof(unwind_info_compressed_second_level_page_header, 
-                         encodingsCount)); 
-  } 
- 
-private: 
-  A &_addressSpace; 
-  typename A::pint_t _addr; 
-}; 
- 
-template <typename A> class UnwindSectionCompressedArray { 
-public: 
-  UnwindSectionCompressedArray(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t functionOffset(uint32_t index) const { 
-    return UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET( 
-        _addressSpace.get32(_addr + index * sizeof(uint32_t))); 
-  } 
-  uint16_t encodingIndex(uint32_t index) const { 
-    return UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX( 
-        _addressSpace.get32(_addr + index * sizeof(uint32_t))); 
-  } 
- 
-private: 
-  A &_addressSpace; 
-  typename A::pint_t _addr; 
-}; 
- 
-template <typename A> class UnwindSectionLsdaArray { 
-public: 
-  UnwindSectionLsdaArray(A &addressSpace, typename A::pint_t addr) 
-      : _addressSpace(addressSpace), _addr(addr) {} 
- 
-  uint32_t functionOffset(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, 
-                              index, functionOffset)); 
-  } 
-  uint32_t lsdaOffset(uint32_t index) const { 
-    return _addressSpace.get32( 
-        _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry, 
-                              index, lsdaOffset)); 
-  } 
- 
-private: 
-  A                   &_addressSpace; 
-  typename A::pint_t   _addr; 
-}; 
+template <typename A> class UnwindSectionHeader {
+public:
+  UnwindSectionHeader(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t version() const {
+    return _addressSpace.get32(_addr +
+                               offsetof(unwind_info_section_header, version));
+  }
+  uint32_t commonEncodingsArraySectionOffset() const {
+    return _addressSpace.get32(_addr +
+                               offsetof(unwind_info_section_header,
+                                        commonEncodingsArraySectionOffset));
+  }
+  uint32_t commonEncodingsArrayCount() const {
+    return _addressSpace.get32(_addr + offsetof(unwind_info_section_header,
+                                                commonEncodingsArrayCount));
+  }
+  uint32_t personalityArraySectionOffset() const {
+    return _addressSpace.get32(_addr + offsetof(unwind_info_section_header,
+                                                personalityArraySectionOffset));
+  }
+  uint32_t personalityArrayCount() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_section_header, personalityArrayCount));
+  }
+  uint32_t indexSectionOffset() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_section_header, indexSectionOffset));
+  }
+  uint32_t indexCount() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_section_header, indexCount));
+  }
+
+private:
+  A                     &_addressSpace;
+  typename A::pint_t     _addr;
+};
+
+template <typename A> class UnwindSectionIndexArray {
+public:
+  UnwindSectionIndexArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index,
+                              functionOffset));
+  }
+  uint32_t secondLevelPagesSectionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index,
+                              secondLevelPagesSectionOffset));
+  }
+  uint32_t lsdaIndexArraySectionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index,
+                              lsdaIndexArraySectionOffset));
+  }
+
+private:
+  A                   &_addressSpace;
+  typename A::pint_t   _addr;
+};
+
+template <typename A> class UnwindSectionRegularPageHeader {
+public:
+  UnwindSectionRegularPageHeader(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t kind() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_regular_second_level_page_header, kind));
+  }
+  uint16_t entryPageOffset() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_regular_second_level_page_header,
+                         entryPageOffset));
+  }
+  uint16_t entryCount() const {
+    return _addressSpace.get16(
+        _addr +
+        offsetof(unwind_info_regular_second_level_page_header, entryCount));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionRegularArray {
+public:
+  UnwindSectionRegularArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_regular_second_level_entry, index,
+                              functionOffset));
+  }
+  uint32_t encoding(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr +
+        arrayoffsetof(unwind_info_regular_second_level_entry, index, encoding));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionCompressedPageHeader {
+public:
+  UnwindSectionCompressedPageHeader(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t kind() const {
+    return _addressSpace.get32(
+        _addr +
+        offsetof(unwind_info_compressed_second_level_page_header, kind));
+  }
+  uint16_t entryPageOffset() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_compressed_second_level_page_header,
+                         entryPageOffset));
+  }
+  uint16_t entryCount() const {
+    return _addressSpace.get16(
+        _addr +
+        offsetof(unwind_info_compressed_second_level_page_header, entryCount));
+  }
+  uint16_t encodingsPageOffset() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_compressed_second_level_page_header,
+                         encodingsPageOffset));
+  }
+  uint16_t encodingsCount() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_compressed_second_level_page_header,
+                         encodingsCount));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionCompressedArray {
+public:
+  UnwindSectionCompressedArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(
+        _addressSpace.get32(_addr + index * sizeof(uint32_t)));
+  }
+  uint16_t encodingIndex(uint32_t index) const {
+    return UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(
+        _addressSpace.get32(_addr + index * sizeof(uint32_t)));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionLsdaArray {
+public:
+  UnwindSectionLsdaArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry,
+                              index, functionOffset));
+  }
+  uint32_t lsdaOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry,
+                              index, lsdaOffset));
+  }
+
+private:
+  A                   &_addressSpace;
+  typename A::pint_t   _addr;
+};
 #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
- 
-class _LIBUNWIND_HIDDEN AbstractUnwindCursor { 
-public: 
-  // NOTE: provide a class specific placement deallocation function (S5.3.4 p20) 
-  // This avoids an unnecessary dependency to libc++abi. 
-  void operator delete(void *, size_t) {} 
- 
-  virtual ~AbstractUnwindCursor() {} 
-  virtual bool validReg(int) { _LIBUNWIND_ABORT("validReg not implemented"); } 
-  virtual unw_word_t getReg(int) { _LIBUNWIND_ABORT("getReg not implemented"); } 
-  virtual void setReg(int, unw_word_t) { 
-    _LIBUNWIND_ABORT("setReg not implemented"); 
-  } 
-  virtual bool validFloatReg(int) { 
-    _LIBUNWIND_ABORT("validFloatReg not implemented"); 
-  } 
-  virtual unw_fpreg_t getFloatReg(int) { 
-    _LIBUNWIND_ABORT("getFloatReg not implemented"); 
-  } 
-  virtual void setFloatReg(int, unw_fpreg_t) { 
-    _LIBUNWIND_ABORT("setFloatReg not implemented"); 
-  } 
-  virtual int step() { _LIBUNWIND_ABORT("step not implemented"); } 
-  virtual void getInfo(unw_proc_info_t *) { 
-    _LIBUNWIND_ABORT("getInfo not implemented"); 
-  } 
-  virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } 
-  virtual bool isSignalFrame() { 
-    _LIBUNWIND_ABORT("isSignalFrame not implemented"); 
-  } 
-  virtual bool getFunctionName(char *, size_t, unw_word_t *) { 
-    _LIBUNWIND_ABORT("getFunctionName not implemented"); 
-  } 
-  virtual void setInfoBasedOnIPRegister(bool = false) { 
-    _LIBUNWIND_ABORT("setInfoBasedOnIPRegister not implemented"); 
-  } 
-  virtual const char *getRegisterName(int) { 
-    _LIBUNWIND_ABORT("getRegisterName not implemented"); 
-  } 
-#ifdef __arm__ 
-  virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); } 
-#endif 
+
+class _LIBUNWIND_HIDDEN AbstractUnwindCursor {
+public:
+  // NOTE: provide a class specific placement deallocation function (S5.3.4 p20)
+  // This avoids an unnecessary dependency to libc++abi.
+  void operator delete(void *, size_t) {}
+
+  virtual ~AbstractUnwindCursor() {}
+  virtual bool validReg(int) { _LIBUNWIND_ABORT("validReg not implemented"); }
+  virtual unw_word_t getReg(int) { _LIBUNWIND_ABORT("getReg not implemented"); }
+  virtual void setReg(int, unw_word_t) {
+    _LIBUNWIND_ABORT("setReg not implemented");
+  }
+  virtual bool validFloatReg(int) {
+    _LIBUNWIND_ABORT("validFloatReg not implemented");
+  }
+  virtual unw_fpreg_t getFloatReg(int) {
+    _LIBUNWIND_ABORT("getFloatReg not implemented");
+  }
+  virtual void setFloatReg(int, unw_fpreg_t) {
+    _LIBUNWIND_ABORT("setFloatReg not implemented");
+  }
+  virtual int step() { _LIBUNWIND_ABORT("step not implemented"); }
+  virtual void getInfo(unw_proc_info_t *) {
+    _LIBUNWIND_ABORT("getInfo not implemented");
+  }
+  virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); }
+  virtual bool isSignalFrame() {
+    _LIBUNWIND_ABORT("isSignalFrame not implemented");
+  }
+  virtual bool getFunctionName(char *, size_t, unw_word_t *) {
+    _LIBUNWIND_ABORT("getFunctionName not implemented");
+  }
+  virtual void setInfoBasedOnIPRegister(bool = false) {
+    _LIBUNWIND_ABORT("setInfoBasedOnIPRegister not implemented");
+  }
+  virtual const char *getRegisterName(int) {
+    _LIBUNWIND_ABORT("getRegisterName not implemented");
+  }
+#ifdef __arm__
+  virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
+#endif
 
 #if defined(_LIBUNWIND_USE_CET)
   virtual void *get_registers() {
     _LIBUNWIND_ABORT("get_registers not implemented");
   }
 #endif
-}; 
- 
+};
+
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
 
 /// \c UnwindCursor contains all state (including all register values) during
@@ -884,32 +884,32 @@ template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
 
 #else  // !defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) || !defined(_WIN32)
 
-/// UnwindCursor contains all state (including all register values) during 
-/// an unwind.  This is normally stack allocated inside a unw_cursor_t. 
-template <typename A, typename R> 
-class UnwindCursor : public AbstractUnwindCursor{ 
-  typedef typename A::pint_t pint_t; 
-public: 
-                      UnwindCursor(unw_context_t *context, A &as); 
-                      UnwindCursor(A &as, void *threadArg); 
-  virtual             ~UnwindCursor() {} 
-  virtual bool        validReg(int); 
-  virtual unw_word_t  getReg(int); 
-  virtual void        setReg(int, unw_word_t); 
-  virtual bool        validFloatReg(int); 
-  virtual unw_fpreg_t getFloatReg(int); 
-  virtual void        setFloatReg(int, unw_fpreg_t); 
-  virtual int         step(); 
-  virtual void        getInfo(unw_proc_info_t *); 
-  virtual void        jumpto(); 
-  virtual bool        isSignalFrame(); 
-  virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off); 
-  virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false); 
-  virtual const char *getRegisterName(int num); 
-#ifdef __arm__ 
-  virtual void        saveVFPAsX(); 
-#endif 
- 
+/// UnwindCursor contains all state (including all register values) during
+/// an unwind.  This is normally stack allocated inside a unw_cursor_t.
+template <typename A, typename R>
+class UnwindCursor : public AbstractUnwindCursor{
+  typedef typename A::pint_t pint_t;
+public:
+                      UnwindCursor(unw_context_t *context, A &as);
+                      UnwindCursor(A &as, void *threadArg);
+  virtual             ~UnwindCursor() {}
+  virtual bool        validReg(int);
+  virtual unw_word_t  getReg(int);
+  virtual void        setReg(int, unw_word_t);
+  virtual bool        validFloatReg(int);
+  virtual unw_fpreg_t getFloatReg(int);
+  virtual void        setFloatReg(int, unw_fpreg_t);
+  virtual int         step();
+  virtual void        getInfo(unw_proc_info_t *);
+  virtual void        jumpto();
+  virtual bool        isSignalFrame();
+  virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
+  virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
+  virtual const char *getRegisterName(int num);
+#ifdef __arm__
+  virtual void        saveVFPAsX();
+#endif
+
 #if defined(_LIBUNWIND_USE_CET)
   virtual void *get_registers() { return &_registers; }
 #endif
@@ -917,26 +917,26 @@ public:
   // need our own defition of inline placement new.
   static void *operator new(size_t, UnwindCursor<A, R> *p) { return p; }
 
-private: 
- 
+private:
+
 #if defined(_LIBUNWIND_ARM_EHABI)
-  bool getInfoFromEHABISection(pint_t pc, const UnwindInfoSections &sects); 
- 
-  int stepWithEHABI() { 
-    size_t len = 0; 
-    size_t off = 0; 
-    // FIXME: Calling decode_eht_entry() here is violating the libunwind 
-    // abstraction layer. 
-    const uint32_t *ehtp = 
-        decode_eht_entry(reinterpret_cast<const uint32_t *>(_info.unwind_info), 
-                         &off, &len); 
-    if (_Unwind_VRS_Interpret((_Unwind_Context *)this, ehtp, off, len) != 
-            _URC_CONTINUE_UNWIND) 
-      return UNW_STEP_END; 
-    return UNW_STEP_SUCCESS; 
-  } 
-#endif 
- 
+  bool getInfoFromEHABISection(pint_t pc, const UnwindInfoSections &sects);
+
+  int stepWithEHABI() {
+    size_t len = 0;
+    size_t off = 0;
+    // FIXME: Calling decode_eht_entry() here is violating the libunwind
+    // abstraction layer.
+    const uint32_t *ehtp =
+        decode_eht_entry(reinterpret_cast<const uint32_t *>(_info.unwind_info),
+                         &off, &len);
+    if (_Unwind_VRS_Interpret((_Unwind_Context *)this, ehtp, off, len) !=
+            _URC_CONTINUE_UNWIND)
+      return UNW_STEP_END;
+    return UNW_STEP_SUCCESS;
+  }
+#endif
+
 #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
   bool setInfoForSigReturn() {
     R dummy;
@@ -960,48 +960,48 @@ private:
   bool getInfoFromFdeCie(const typename CFI_Parser<A>::FDE_Info &fdeInfo,
                          const typename CFI_Parser<A>::CIE_Info &cieInfo,
                          pint_t pc, uintptr_t dso_base);
-  bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections &sects, 
-                                            uint32_t fdeSectionOffsetHint=0); 
-  int stepWithDwarfFDE() { 
-    return DwarfInstructions<A, R>::stepWithDwarf(_addressSpace, 
-                                              (pint_t)this->getReg(UNW_REG_IP), 
-                                              (pint_t)_info.unwind_info, 
+  bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections &sects,
+                                            uint32_t fdeSectionOffsetHint=0);
+  int stepWithDwarfFDE() {
+    return DwarfInstructions<A, R>::stepWithDwarf(_addressSpace,
+                                              (pint_t)this->getReg(UNW_REG_IP),
+                                              (pint_t)_info.unwind_info,
                                               _registers, _isSignalFrame);
-  } 
-#endif 
- 
+  }
+#endif
+
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-  bool getInfoFromCompactEncodingSection(pint_t pc, 
-                                            const UnwindInfoSections &sects); 
-  int stepWithCompactEncoding() { 
+  bool getInfoFromCompactEncodingSection(pint_t pc,
+                                            const UnwindInfoSections &sects);
+  int stepWithCompactEncoding() {
   #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-    if ( compactSaysUseDwarf() ) 
-      return stepWithDwarfFDE(); 
-  #endif 
-    R dummy; 
-    return stepWithCompactEncoding(dummy); 
-  } 
- 
+    if ( compactSaysUseDwarf() )
+      return stepWithDwarfFDE();
+  #endif
+    R dummy;
+    return stepWithCompactEncoding(dummy);
+  }
+
 #if defined(_LIBUNWIND_TARGET_X86_64)
-  int stepWithCompactEncoding(Registers_x86_64 &) { 
-    return CompactUnwinder_x86_64<A>::stepWithCompactEncoding( 
-        _info.format, _info.start_ip, _addressSpace, _registers); 
-  } 
+  int stepWithCompactEncoding(Registers_x86_64 &) {
+    return CompactUnwinder_x86_64<A>::stepWithCompactEncoding(
+        _info.format, _info.start_ip, _addressSpace, _registers);
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_I386)
-  int stepWithCompactEncoding(Registers_x86 &) { 
-    return CompactUnwinder_x86<A>::stepWithCompactEncoding( 
-        _info.format, (uint32_t)_info.start_ip, _addressSpace, _registers); 
-  } 
+  int stepWithCompactEncoding(Registers_x86 &) {
+    return CompactUnwinder_x86<A>::stepWithCompactEncoding(
+        _info.format, (uint32_t)_info.start_ip, _addressSpace, _registers);
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_PPC)
-  int stepWithCompactEncoding(Registers_ppc &) { 
-    return UNW_EINVAL; 
-  } 
+  int stepWithCompactEncoding(Registers_ppc &) {
+    return UNW_EINVAL;
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_PPC64)
   int stepWithCompactEncoding(Registers_ppc64 &) {
     return UNW_EINVAL;
@@ -1010,12 +1010,12 @@ private:
 
 
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-  int stepWithCompactEncoding(Registers_arm64 &) { 
-    return CompactUnwinder_arm64<A>::stepWithCompactEncoding( 
-        _info.format, _info.start_ip, _addressSpace, _registers); 
-  } 
+  int stepWithCompactEncoding(Registers_arm64 &) {
+    return CompactUnwinder_arm64<A>::stepWithCompactEncoding(
+        _info.format, _info.start_ip, _addressSpace, _registers);
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_MIPS_O32)
   int stepWithCompactEncoding(Registers_mips_o32 &) {
     return UNW_EINVAL;
@@ -1042,39 +1042,39 @@ private:
   }
 #endif
 
-  bool compactSaysUseDwarf(uint32_t *offset=NULL) const { 
-    R dummy; 
-    return compactSaysUseDwarf(dummy, offset); 
-  } 
- 
+  bool compactSaysUseDwarf(uint32_t *offset=NULL) const {
+    R dummy;
+    return compactSaysUseDwarf(dummy, offset);
+  }
+
 #if defined(_LIBUNWIND_TARGET_X86_64)
-  bool compactSaysUseDwarf(Registers_x86_64 &, uint32_t *offset) const { 
-    if ((_info.format & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF) { 
-      if (offset) 
-        *offset = (_info.format & UNWIND_X86_64_DWARF_SECTION_OFFSET); 
-      return true; 
-    } 
-    return false; 
-  } 
-#endif
- 
+  bool compactSaysUseDwarf(Registers_x86_64 &, uint32_t *offset) const {
+    if ((_info.format & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF) {
+      if (offset)
+        *offset = (_info.format & UNWIND_X86_64_DWARF_SECTION_OFFSET);
+      return true;
+    }
+    return false;
+  }
+#endif
+
 #if defined(_LIBUNWIND_TARGET_I386)
-  bool compactSaysUseDwarf(Registers_x86 &, uint32_t *offset) const { 
-    if ((_info.format & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF) { 
-      if (offset) 
-        *offset = (_info.format & UNWIND_X86_DWARF_SECTION_OFFSET); 
-      return true; 
-    } 
-    return false; 
-  } 
-#endif
- 
+  bool compactSaysUseDwarf(Registers_x86 &, uint32_t *offset) const {
+    if ((_info.format & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF) {
+      if (offset)
+        *offset = (_info.format & UNWIND_X86_DWARF_SECTION_OFFSET);
+      return true;
+    }
+    return false;
+  }
+#endif
+
 #if defined(_LIBUNWIND_TARGET_PPC)
-  bool compactSaysUseDwarf(Registers_ppc &, uint32_t *) const { 
-    return true; 
-  } 
+  bool compactSaysUseDwarf(Registers_ppc &, uint32_t *) const {
+    return true;
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_PPC64)
   bool compactSaysUseDwarf(Registers_ppc64 &, uint32_t *) const {
     return true;
@@ -1082,16 +1082,16 @@ private:
 #endif
 
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-  bool compactSaysUseDwarf(Registers_arm64 &, uint32_t *offset) const { 
-    if ((_info.format & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF) { 
-      if (offset) 
-        *offset = (_info.format & UNWIND_ARM64_DWARF_SECTION_OFFSET); 
-      return true; 
-    } 
-    return false; 
-  } 
-#endif
- 
+  bool compactSaysUseDwarf(Registers_arm64 &, uint32_t *offset) const {
+    if ((_info.format & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF) {
+      if (offset)
+        *offset = (_info.format & UNWIND_ARM64_DWARF_SECTION_OFFSET);
+      return true;
+    }
+    return false;
+  }
+#endif
+
 #if defined(_LIBUNWIND_TARGET_MIPS_O32)
   bool compactSaysUseDwarf(Registers_mips_o32 &, uint32_t *) const {
     return true;
@@ -1123,29 +1123,29 @@ private:
 #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
 
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-  compact_unwind_encoding_t dwarfEncoding() const { 
-    R dummy; 
-    return dwarfEncoding(dummy); 
-  } 
- 
+  compact_unwind_encoding_t dwarfEncoding() const {
+    R dummy;
+    return dwarfEncoding(dummy);
+  }
+
 #if defined(_LIBUNWIND_TARGET_X86_64)
-  compact_unwind_encoding_t dwarfEncoding(Registers_x86_64 &) const { 
-    return UNWIND_X86_64_MODE_DWARF; 
-  } 
+  compact_unwind_encoding_t dwarfEncoding(Registers_x86_64 &) const {
+    return UNWIND_X86_64_MODE_DWARF;
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_I386)
-  compact_unwind_encoding_t dwarfEncoding(Registers_x86 &) const { 
-    return UNWIND_X86_MODE_DWARF; 
-  } 
+  compact_unwind_encoding_t dwarfEncoding(Registers_x86 &) const {
+    return UNWIND_X86_MODE_DWARF;
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_PPC)
-  compact_unwind_encoding_t dwarfEncoding(Registers_ppc &) const { 
-    return 0; 
-  } 
+  compact_unwind_encoding_t dwarfEncoding(Registers_ppc &) const {
+    return 0;
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_PPC64)
   compact_unwind_encoding_t dwarfEncoding(Registers_ppc64 &) const {
     return 0;
@@ -1153,11 +1153,11 @@ private:
 #endif
 
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-  compact_unwind_encoding_t dwarfEncoding(Registers_arm64 &) const { 
-    return UNWIND_ARM64_MODE_DWARF; 
-  } 
+  compact_unwind_encoding_t dwarfEncoding(Registers_arm64 &) const {
+    return UNWIND_ARM64_MODE_DWARF;
+  }
 #endif
- 
+
 #if defined(_LIBUNWIND_TARGET_ARM)
   compact_unwind_encoding_t dwarfEncoding(Registers_arm &) const {
     return 0;
@@ -1165,11 +1165,11 @@ private:
 #endif
 
 #if defined (_LIBUNWIND_TARGET_OR1K)
-  compact_unwind_encoding_t dwarfEncoding(Registers_or1k &) const { 
-    return 0; 
-  } 
+  compact_unwind_encoding_t dwarfEncoding(Registers_or1k &) const {
+    return 0;
+  }
 #endif
- 
+
 #if defined (_LIBUNWIND_TARGET_HEXAGON)
   compact_unwind_encoding_t dwarfEncoding(Registers_hexagon &) const {
     return 0;
@@ -1181,7 +1181,7 @@ private:
     return 0;
   }
 #endif
- 
+
 #if defined (_LIBUNWIND_TARGET_MIPS_NEWABI)
   compact_unwind_encoding_t dwarfEncoding(Registers_mips_newabi &) const {
     return 0;
@@ -1221,152 +1221,152 @@ private:
 #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
 
 
-  A               &_addressSpace; 
-  R                _registers; 
-  unw_proc_info_t  _info; 
-  bool             _unwindInfoMissing; 
-  bool             _isSignalFrame; 
+  A               &_addressSpace;
+  R                _registers;
+  unw_proc_info_t  _info;
+  bool             _unwindInfoMissing;
+  bool             _isSignalFrame;
 #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
   bool             _isSigReturn = false;
 #endif
-}; 
- 
- 
-template <typename A, typename R> 
-UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as) 
-    : _addressSpace(as), _registers(context), _unwindInfoMissing(false), 
-      _isSignalFrame(false) { 
+};
+
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
+    : _addressSpace(as), _registers(context), _unwindInfoMissing(false),
+      _isSignalFrame(false) {
   static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
-                "UnwindCursor<> does not fit in unw_cursor_t"); 
+                "UnwindCursor<> does not fit in unw_cursor_t");
   static_assert((alignof(UnwindCursor<A, R>) <= alignof(unw_cursor_t)),
                 "UnwindCursor<> requires more alignment than unw_cursor_t");
-  memset(&_info, 0, sizeof(_info)); 
-} 
- 
-template <typename A, typename R> 
-UnwindCursor<A, R>::UnwindCursor(A &as, void *) 
-    : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) { 
-  memset(&_info, 0, sizeof(_info)); 
-  // FIXME 
-  // fill in _registers from thread arg 
-} 
- 
- 
-template <typename A, typename R> 
-bool UnwindCursor<A, R>::validReg(int regNum) { 
-  return _registers.validRegister(regNum); 
-} 
- 
-template <typename A, typename R> 
-unw_word_t UnwindCursor<A, R>::getReg(int regNum) { 
-  return _registers.getRegister(regNum); 
-} 
- 
-template <typename A, typename R> 
-void UnwindCursor<A, R>::setReg(int regNum, unw_word_t value) { 
-  _registers.setRegister(regNum, (typename A::pint_t)value); 
-} 
- 
-template <typename A, typename R> 
-bool UnwindCursor<A, R>::validFloatReg(int regNum) { 
-  return _registers.validFloatRegister(regNum); 
-} 
- 
-template <typename A, typename R> 
-unw_fpreg_t UnwindCursor<A, R>::getFloatReg(int regNum) { 
-  return _registers.getFloatRegister(regNum); 
-} 
- 
-template <typename A, typename R> 
-void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) { 
-  _registers.setFloatRegister(regNum, value); 
-} 
- 
-template <typename A, typename R> void UnwindCursor<A, R>::jumpto() { 
-  _registers.jumpto(); 
-} 
- 
-#ifdef __arm__ 
-template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() { 
-  _registers.saveVFPAsX(); 
-} 
-#endif 
- 
-template <typename A, typename R> 
-const char *UnwindCursor<A, R>::getRegisterName(int regNum) { 
-  return _registers.getRegisterName(regNum); 
-} 
- 
-template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() { 
-  return _isSignalFrame; 
-} 
- 
+  memset(&_info, 0, sizeof(_info));
+}
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(A &as, void *)
+    : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) {
+  memset(&_info, 0, sizeof(_info));
+  // FIXME
+  // fill in _registers from thread arg
+}
+
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::validReg(int regNum) {
+  return _registers.validRegister(regNum);
+}
+
+template <typename A, typename R>
+unw_word_t UnwindCursor<A, R>::getReg(int regNum) {
+  return _registers.getRegister(regNum);
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setReg(int regNum, unw_word_t value) {
+  _registers.setRegister(regNum, (typename A::pint_t)value);
+}
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::validFloatReg(int regNum) {
+  return _registers.validFloatRegister(regNum);
+}
+
+template <typename A, typename R>
+unw_fpreg_t UnwindCursor<A, R>::getFloatReg(int regNum) {
+  return _registers.getFloatRegister(regNum);
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
+  _registers.setFloatRegister(regNum, value);
+}
+
+template <typename A, typename R> void UnwindCursor<A, R>::jumpto() {
+  _registers.jumpto();
+}
+
+#ifdef __arm__
+template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() {
+  _registers.saveVFPAsX();
+}
+#endif
+
+template <typename A, typename R>
+const char *UnwindCursor<A, R>::getRegisterName(int regNum) {
+  return _registers.getRegisterName(regNum);
+}
+
+template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
+  return _isSignalFrame;
+}
+
 #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
 
 #if defined(_LIBUNWIND_ARM_EHABI)
-template<typename A> 
-struct EHABISectionIterator { 
-  typedef EHABISectionIterator _Self; 
- 
-  typedef typename A::pint_t value_type; 
-  typedef typename A::pint_t* pointer; 
-  typedef typename A::pint_t& reference; 
-  typedef size_t size_type; 
-  typedef size_t difference_type; 
- 
-  static _Self begin(A& addressSpace, const UnwindInfoSections& sects) { 
-    return _Self(addressSpace, sects, 0); 
-  } 
-  static _Self end(A& addressSpace, const UnwindInfoSections& sects) { 
+template<typename A>
+struct EHABISectionIterator {
+  typedef EHABISectionIterator _Self;
+
+  typedef typename A::pint_t value_type;
+  typedef typename A::pint_t* pointer;
+  typedef typename A::pint_t& reference;
+  typedef size_t size_type;
+  typedef size_t difference_type;
+
+  static _Self begin(A& addressSpace, const UnwindInfoSections& sects) {
+    return _Self(addressSpace, sects, 0);
+  }
+  static _Self end(A& addressSpace, const UnwindInfoSections& sects) {
     return _Self(addressSpace, sects,
                  sects.arm_section_length / sizeof(EHABIIndexEntry));
-  } 
- 
-  EHABISectionIterator(A& addressSpace, const UnwindInfoSections& sects, size_t i) 
-      : _i(i), _addressSpace(&addressSpace), _sects(&sects) {} 
- 
-  _Self& operator++() { ++_i; return *this; } 
-  _Self& operator+=(size_t a) { _i += a; return *this; } 
-  _Self& operator--() { assert(_i > 0); --_i; return *this; } 
-  _Self& operator-=(size_t a) { assert(_i >= a); _i -= a; return *this; } 
- 
-  _Self operator+(size_t a) { _Self out = *this; out._i += a; return out; } 
-  _Self operator-(size_t a) { assert(_i >= a); _Self out = *this; out._i -= a; return out; } 
- 
+  }
+
+  EHABISectionIterator(A& addressSpace, const UnwindInfoSections& sects, size_t i)
+      : _i(i), _addressSpace(&addressSpace), _sects(&sects) {}
+
+  _Self& operator++() { ++_i; return *this; }
+  _Self& operator+=(size_t a) { _i += a; return *this; }
+  _Self& operator--() { assert(_i > 0); --_i; return *this; }
+  _Self& operator-=(size_t a) { assert(_i >= a); _i -= a; return *this; }
+
+  _Self operator+(size_t a) { _Self out = *this; out._i += a; return out; }
+  _Self operator-(size_t a) { assert(_i >= a); _Self out = *this; out._i -= a; return out; }
+
   size_t operator-(const _Self& other) const { return _i - other._i; }
- 
-  bool operator==(const _Self& other) const { 
-    assert(_addressSpace == other._addressSpace); 
-    assert(_sects == other._sects); 
-    return _i == other._i; 
-  } 
- 
+
+  bool operator==(const _Self& other) const {
+    assert(_addressSpace == other._addressSpace);
+    assert(_sects == other._sects);
+    return _i == other._i;
+  }
+
   bool operator!=(const _Self& other) const {
     assert(_addressSpace == other._addressSpace);
     assert(_sects == other._sects);
     return _i != other._i;
   }
 
-  typename A::pint_t operator*() const { return functionAddress(); } 
- 
-  typename A::pint_t functionAddress() const { 
-    typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( 
-        EHABIIndexEntry, _i, functionOffset); 
-    return indexAddr + signExtendPrel31(_addressSpace->get32(indexAddr)); 
-  } 
- 
-  typename A::pint_t dataAddress() { 
-    typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof( 
-        EHABIIndexEntry, _i, data); 
-    return indexAddr; 
-  } 
- 
- private: 
-  size_t _i; 
-  A* _addressSpace; 
-  const UnwindInfoSections* _sects; 
-}; 
- 
+  typename A::pint_t operator*() const { return functionAddress(); }
+
+  typename A::pint_t functionAddress() const {
+    typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof(
+        EHABIIndexEntry, _i, functionOffset);
+    return indexAddr + signExtendPrel31(_addressSpace->get32(indexAddr));
+  }
+
+  typename A::pint_t dataAddress() {
+    typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof(
+        EHABIIndexEntry, _i, data);
+    return indexAddr;
+  }
+
+ private:
+  size_t _i;
+  A* _addressSpace;
+  const UnwindInfoSections* _sects;
+};
+
 namespace {
 
 template <typename A>
@@ -1390,147 +1390,147 @@ EHABISectionIterator<A> EHABISectionUpperBound(
 
 }
 
-template <typename A, typename R> 
-bool UnwindCursor<A, R>::getInfoFromEHABISection( 
-    pint_t pc, 
-    const UnwindInfoSections &sects) { 
-  EHABISectionIterator<A> begin = 
-      EHABISectionIterator<A>::begin(_addressSpace, sects); 
-  EHABISectionIterator<A> end = 
-      EHABISectionIterator<A>::end(_addressSpace, sects); 
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getInfoFromEHABISection(
+    pint_t pc,
+    const UnwindInfoSections &sects) {
+  EHABISectionIterator<A> begin =
+      EHABISectionIterator<A>::begin(_addressSpace, sects);
+  EHABISectionIterator<A> end =
+      EHABISectionIterator<A>::end(_addressSpace, sects);
   if (begin == end)
     return false;
- 
+
   EHABISectionIterator<A> itNextPC = EHABISectionUpperBound(begin, end, pc);
   if (itNextPC == begin)
-    return false; 
-  EHABISectionIterator<A> itThisPC = itNextPC - 1; 
- 
-  pint_t thisPC = itThisPC.functionAddress(); 
+    return false;
+  EHABISectionIterator<A> itThisPC = itNextPC - 1;
+
+  pint_t thisPC = itThisPC.functionAddress();
   // If an exception is thrown from a function, corresponding to the last entry
   // in the table, we don't really know the function extent and have to choose a
   // value for nextPC. Choosing max() will allow the range check during trace to
   // succeed.
   pint_t nextPC = (itNextPC == end) ? UINTPTR_MAX : itNextPC.functionAddress();
-  pint_t indexDataAddr = itThisPC.dataAddress(); 
- 
-  if (indexDataAddr == 0) 
-    return false; 
- 
-  uint32_t indexData = _addressSpace.get32(indexDataAddr); 
-  if (indexData == UNW_EXIDX_CANTUNWIND) 
-    return false; 
- 
-  // If the high bit is set, the exception handling table entry is inline inside 
-  // the index table entry on the second word (aka |indexDataAddr|). Otherwise, 
+  pint_t indexDataAddr = itThisPC.dataAddress();
+
+  if (indexDataAddr == 0)
+    return false;
+
+  uint32_t indexData = _addressSpace.get32(indexDataAddr);
+  if (indexData == UNW_EXIDX_CANTUNWIND)
+    return false;
+
+  // If the high bit is set, the exception handling table entry is inline inside
+  // the index table entry on the second word (aka |indexDataAddr|). Otherwise,
   // the table points at an offset in the exception handling table (section 5
   // EHABI).
-  pint_t exceptionTableAddr; 
-  uint32_t exceptionTableData; 
-  bool isSingleWordEHT; 
-  if (indexData & 0x80000000) { 
-    exceptionTableAddr = indexDataAddr; 
-    // TODO(ajwong): Should this data be 0? 
-    exceptionTableData = indexData; 
-    isSingleWordEHT = true; 
-  } else { 
-    exceptionTableAddr = indexDataAddr + signExtendPrel31(indexData); 
-    exceptionTableData = _addressSpace.get32(exceptionTableAddr); 
-    isSingleWordEHT = false; 
-  } 
- 
-  // Now we know the 3 things: 
-  //   exceptionTableAddr -- exception handler table entry. 
-  //   exceptionTableData -- the data inside the first word of the eht entry. 
-  //   isSingleWordEHT -- whether the entry is in the index. 
-  unw_word_t personalityRoutine = 0xbadf00d; 
-  bool scope32 = false; 
-  uintptr_t lsda; 
- 
-  // If the high bit in the exception handling table entry is set, the entry is 
-  // in compact form (section 6.3 EHABI). 
-  if (exceptionTableData & 0x80000000) { 
-    // Grab the index of the personality routine from the compact form. 
-    uint32_t choice = (exceptionTableData & 0x0f000000) >> 24; 
-    uint32_t extraWords = 0; 
-    switch (choice) { 
-      case 0: 
-        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr0; 
-        extraWords = 0; 
-        scope32 = false; 
-        lsda = isSingleWordEHT ? 0 : (exceptionTableAddr + 4); 
-        break; 
-      case 1: 
-        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr1; 
-        extraWords = (exceptionTableData & 0x00ff0000) >> 16; 
-        scope32 = false; 
-        lsda = exceptionTableAddr + (extraWords + 1) * 4; 
-        break; 
-      case 2: 
-        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr2; 
-        extraWords = (exceptionTableData & 0x00ff0000) >> 16; 
-        scope32 = true; 
-        lsda = exceptionTableAddr + (extraWords + 1) * 4; 
-        break; 
-      default: 
-        _LIBUNWIND_ABORT("unknown personality routine"); 
-        return false; 
-    } 
- 
-    if (isSingleWordEHT) { 
-      if (extraWords != 0) { 
-        _LIBUNWIND_ABORT("index inlined table detected but pr function " 
-                         "requires extra words"); 
-        return false; 
-      } 
-    } 
-  } else { 
-    pint_t personalityAddr = 
-        exceptionTableAddr + signExtendPrel31(exceptionTableData); 
-    personalityRoutine = personalityAddr; 
- 
-    // ARM EHABI # 6.2, # 9.2 
-    // 
-    //  +---- ehtp 
-    //  v 
-    // +--------------------------------------+ 
-    // | +--------+--------+--------+-------+ | 
-    // | |0| prel31 to personalityRoutine   | | 
-    // | +--------+--------+--------+-------+ | 
-    // | |      N |      unwind opcodes     | |  <-- UnwindData 
-    // | +--------+--------+--------+-------+ | 
-    // | | Word 2        unwind opcodes     | | 
-    // | +--------+--------+--------+-------+ | 
-    // | ...                                  | 
-    // | +--------+--------+--------+-------+ | 
-    // | | Word N        unwind opcodes     | | 
-    // | +--------+--------+--------+-------+ | 
-    // | | LSDA                             | |  <-- lsda 
-    // | | ...                              | | 
-    // | +--------+--------+--------+-------+ | 
-    // +--------------------------------------+ 
- 
-    uint32_t *UnwindData = reinterpret_cast<uint32_t*>(exceptionTableAddr) + 1; 
-    uint32_t FirstDataWord = *UnwindData; 
-    size_t N = ((FirstDataWord >> 24) & 0xff); 
-    size_t NDataWords = N + 1; 
-    lsda = reinterpret_cast<uintptr_t>(UnwindData + NDataWords); 
-  } 
- 
-  _info.start_ip = thisPC; 
-  _info.end_ip = nextPC; 
-  _info.handler = personalityRoutine; 
-  _info.unwind_info = exceptionTableAddr; 
-  _info.lsda = lsda; 
-  // flags is pr_cache.additional. See EHABI #7.2 for definition of bit 0. 
+  pint_t exceptionTableAddr;
+  uint32_t exceptionTableData;
+  bool isSingleWordEHT;
+  if (indexData & 0x80000000) {
+    exceptionTableAddr = indexDataAddr;
+    // TODO(ajwong): Should this data be 0?
+    exceptionTableData = indexData;
+    isSingleWordEHT = true;
+  } else {
+    exceptionTableAddr = indexDataAddr + signExtendPrel31(indexData);
+    exceptionTableData = _addressSpace.get32(exceptionTableAddr);
+    isSingleWordEHT = false;
+  }
+
+  // Now we know the 3 things:
+  //   exceptionTableAddr -- exception handler table entry.
+  //   exceptionTableData -- the data inside the first word of the eht entry.
+  //   isSingleWordEHT -- whether the entry is in the index.
+  unw_word_t personalityRoutine = 0xbadf00d;
+  bool scope32 = false;
+  uintptr_t lsda;
+
+  // If the high bit in the exception handling table entry is set, the entry is
+  // in compact form (section 6.3 EHABI).
+  if (exceptionTableData & 0x80000000) {
+    // Grab the index of the personality routine from the compact form.
+    uint32_t choice = (exceptionTableData & 0x0f000000) >> 24;
+    uint32_t extraWords = 0;
+    switch (choice) {
+      case 0:
+        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr0;
+        extraWords = 0;
+        scope32 = false;
+        lsda = isSingleWordEHT ? 0 : (exceptionTableAddr + 4);
+        break;
+      case 1:
+        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr1;
+        extraWords = (exceptionTableData & 0x00ff0000) >> 16;
+        scope32 = false;
+        lsda = exceptionTableAddr + (extraWords + 1) * 4;
+        break;
+      case 2:
+        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr2;
+        extraWords = (exceptionTableData & 0x00ff0000) >> 16;
+        scope32 = true;
+        lsda = exceptionTableAddr + (extraWords + 1) * 4;
+        break;
+      default:
+        _LIBUNWIND_ABORT("unknown personality routine");
+        return false;
+    }
+
+    if (isSingleWordEHT) {
+      if (extraWords != 0) {
+        _LIBUNWIND_ABORT("index inlined table detected but pr function "
+                         "requires extra words");
+        return false;
+      }
+    }
+  } else {
+    pint_t personalityAddr =
+        exceptionTableAddr + signExtendPrel31(exceptionTableData);
+    personalityRoutine = personalityAddr;
+
+    // ARM EHABI # 6.2, # 9.2
+    //
+    //  +---- ehtp
+    //  v
+    // +--------------------------------------+
+    // | +--------+--------+--------+-------+ |
+    // | |0| prel31 to personalityRoutine   | |
+    // | +--------+--------+--------+-------+ |
+    // | |      N |      unwind opcodes     | |  <-- UnwindData
+    // | +--------+--------+--------+-------+ |
+    // | | Word 2        unwind opcodes     | |
+    // | +--------+--------+--------+-------+ |
+    // | ...                                  |
+    // | +--------+--------+--------+-------+ |
+    // | | Word N        unwind opcodes     | |
+    // | +--------+--------+--------+-------+ |
+    // | | LSDA                             | |  <-- lsda
+    // | | ...                              | |
+    // | +--------+--------+--------+-------+ |
+    // +--------------------------------------+
+
+    uint32_t *UnwindData = reinterpret_cast<uint32_t*>(exceptionTableAddr) + 1;
+    uint32_t FirstDataWord = *UnwindData;
+    size_t N = ((FirstDataWord >> 24) & 0xff);
+    size_t NDataWords = N + 1;
+    lsda = reinterpret_cast<uintptr_t>(UnwindData + NDataWords);
+  }
+
+  _info.start_ip = thisPC;
+  _info.end_ip = nextPC;
+  _info.handler = personalityRoutine;
+  _info.unwind_info = exceptionTableAddr;
+  _info.lsda = lsda;
+  // flags is pr_cache.additional. See EHABI #7.2 for definition of bit 0.
   _info.flags = (isSingleWordEHT ? 1 : 0) | (scope32 ? 0x2 : 0);  // Use enum?
- 
-  return true; 
-} 
-#endif 
- 
+
+  return true;
+}
+#endif
+
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-template <typename A, typename R> 
+template <typename A, typename R>
 bool UnwindCursor<A, R>::getInfoFromFdeCie(
     const typename CFI_Parser<A>::FDE_Info &fdeInfo,
     const typename CFI_Parser<A>::CIE_Info &cieInfo, pint_t pc,
@@ -1557,325 +1557,325 @@ bool UnwindCursor<A, R>::getInfoFromFdeCie(
 }
 
 template <typename A, typename R>
-bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc, 
-                                                const UnwindInfoSections &sects, 
-                                                uint32_t fdeSectionOffsetHint) { 
-  typename CFI_Parser<A>::FDE_Info fdeInfo; 
-  typename CFI_Parser<A>::CIE_Info cieInfo; 
-  bool foundFDE = false; 
-  bool foundInCache = false; 
-  // If compact encoding table gave offset into dwarf section, go directly there 
-  if (fdeSectionOffsetHint != 0) { 
-    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section, 
+bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc,
+                                                const UnwindInfoSections &sects,
+                                                uint32_t fdeSectionOffsetHint) {
+  typename CFI_Parser<A>::FDE_Info fdeInfo;
+  typename CFI_Parser<A>::CIE_Info cieInfo;
+  bool foundFDE = false;
+  bool foundInCache = false;
+  // If compact encoding table gave offset into dwarf section, go directly there
+  if (fdeSectionOffsetHint != 0) {
+    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
                                     sects.dwarf_section_length,
-                                    sects.dwarf_section + fdeSectionOffsetHint, 
-                                    &fdeInfo, &cieInfo); 
-  } 
+                                    sects.dwarf_section + fdeSectionOffsetHint,
+                                    &fdeInfo, &cieInfo);
+  }
 #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
-  if (!foundFDE && (sects.dwarf_index_section != 0)) { 
-    foundFDE = EHHeaderParser<A>::findFDE( 
-        _addressSpace, pc, sects.dwarf_index_section, 
-        (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); 
-  } 
-#endif 
-  if (!foundFDE) { 
-    // otherwise, search cache of previously found FDEs. 
-    pint_t cachedFDE = DwarfFDECache<A>::findFDE(sects.dso_base, pc); 
-    if (cachedFDE != 0) { 
-      foundFDE = 
-          CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section, 
+  if (!foundFDE && (sects.dwarf_index_section != 0)) {
+    foundFDE = EHHeaderParser<A>::findFDE(
+        _addressSpace, pc, sects.dwarf_index_section,
+        (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo);
+  }
+#endif
+  if (!foundFDE) {
+    // otherwise, search cache of previously found FDEs.
+    pint_t cachedFDE = DwarfFDECache<A>::findFDE(sects.dso_base, pc);
+    if (cachedFDE != 0) {
+      foundFDE =
+          CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
                                  sects.dwarf_section_length,
-                                 cachedFDE, &fdeInfo, &cieInfo); 
-      foundInCache = foundFDE; 
-    } 
-  } 
-  if (!foundFDE) { 
-    // Still not found, do full scan of __eh_frame section. 
-    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section, 
+                                 cachedFDE, &fdeInfo, &cieInfo);
+      foundInCache = foundFDE;
+    }
+  }
+  if (!foundFDE) {
+    // Still not found, do full scan of __eh_frame section.
+    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
                                       sects.dwarf_section_length, 0,
-                                      &fdeInfo, &cieInfo); 
-  } 
-  if (foundFDE) { 
+                                      &fdeInfo, &cieInfo);
+  }
+  if (foundFDE) {
     if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, sects.dso_base)) {
-      // Add to cache (to make next lookup faster) if we had no hint 
-      // and there was no index. 
-      if (!foundInCache && (fdeSectionOffsetHint == 0)) { 
+      // Add to cache (to make next lookup faster) if we had no hint
+      // and there was no index.
+      if (!foundInCache && (fdeSectionOffsetHint == 0)) {
   #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
-        if (sects.dwarf_index_section == 0) 
-  #endif 
-        DwarfFDECache<A>::add(sects.dso_base, fdeInfo.pcStart, fdeInfo.pcEnd, 
-                              fdeInfo.fdeStart); 
-      } 
-      return true; 
-    } 
-  } 
+        if (sects.dwarf_index_section == 0)
+  #endif
+        DwarfFDECache<A>::add(sects.dso_base, fdeInfo.pcStart, fdeInfo.pcEnd,
+                              fdeInfo.fdeStart);
+      }
+      return true;
+    }
+  }
   //_LIBUNWIND_DEBUG_LOG("can't find/use FDE for pc=0x%llX", (uint64_t)pc);
-  return false; 
-} 
+  return false;
+}
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
- 
- 
+
+
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-template <typename A, typename R> 
-bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc, 
-                                              const UnwindInfoSections &sects) { 
-  const bool log = false; 
-  if (log) 
-    fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n", 
-            (uint64_t)pc, (uint64_t)sects.dso_base); 
- 
-  const UnwindSectionHeader<A> sectionHeader(_addressSpace, 
-                                                sects.compact_unwind_section); 
-  if (sectionHeader.version() != UNWIND_SECTION_VERSION) 
-    return false; 
- 
-  // do a binary search of top level index to find page with unwind info 
-  pint_t targetFunctionOffset = pc - sects.dso_base; 
-  const UnwindSectionIndexArray<A> topIndex(_addressSpace, 
-                                           sects.compact_unwind_section 
-                                         + sectionHeader.indexSectionOffset()); 
-  uint32_t low = 0; 
-  uint32_t high = sectionHeader.indexCount(); 
-  uint32_t last = high - 1; 
-  while (low < high) { 
-    uint32_t mid = (low + high) / 2; 
-    //if ( log ) fprintf(stderr, "\tmid=%d, low=%d, high=%d, *mid=0x%08X\n", 
-    //mid, low, high, topIndex.functionOffset(mid)); 
-    if (topIndex.functionOffset(mid) <= targetFunctionOffset) { 
-      if ((mid == last) || 
-          (topIndex.functionOffset(mid + 1) > targetFunctionOffset)) { 
-        low = mid; 
-        break; 
-      } else { 
-        low = mid + 1; 
-      } 
-    } else { 
-      high = mid; 
-    } 
-  } 
-  const uint32_t firstLevelFunctionOffset = topIndex.functionOffset(low); 
-  const uint32_t firstLevelNextPageFunctionOffset = 
-      topIndex.functionOffset(low + 1); 
-  const pint_t secondLevelAddr = 
-      sects.compact_unwind_section + topIndex.secondLevelPagesSectionOffset(low); 
-  const pint_t lsdaArrayStartAddr = 
-      sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low); 
-  const pint_t lsdaArrayEndAddr = 
-      sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low+1); 
-  if (log) 
-    fprintf(stderr, "\tfirst level search for result index=%d " 
-                    "to secondLevelAddr=0x%llX\n", 
-                    low, (uint64_t) secondLevelAddr); 
-  // do a binary search of second level page index 
-  uint32_t encoding = 0; 
-  pint_t funcStart = 0; 
-  pint_t funcEnd = 0; 
-  pint_t lsda = 0; 
-  pint_t personality = 0; 
-  uint32_t pageKind = _addressSpace.get32(secondLevelAddr); 
-  if (pageKind == UNWIND_SECOND_LEVEL_REGULAR) { 
-    // regular page 
-    UnwindSectionRegularPageHeader<A> pageHeader(_addressSpace, 
-                                                 secondLevelAddr); 
-    UnwindSectionRegularArray<A> pageIndex( 
-        _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); 
-    // binary search looks for entry with e where index[e].offset <= pc < 
-    // index[e+1].offset 
-    if (log) 
-      fprintf(stderr, "\tbinary search for targetFunctionOffset=0x%08llX in " 
-                      "regular page starting at secondLevelAddr=0x%llX\n", 
-              (uint64_t) targetFunctionOffset, (uint64_t) secondLevelAddr); 
-    low = 0; 
-    high = pageHeader.entryCount(); 
-    while (low < high) { 
-      uint32_t mid = (low + high) / 2; 
-      if (pageIndex.functionOffset(mid) <= targetFunctionOffset) { 
-        if (mid == (uint32_t)(pageHeader.entryCount() - 1)) { 
-          // at end of table 
-          low = mid; 
-          funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; 
-          break; 
-        } else if (pageIndex.functionOffset(mid + 1) > targetFunctionOffset) { 
-          // next is too big, so we found it 
-          low = mid; 
-          funcEnd = pageIndex.functionOffset(low + 1) + sects.dso_base; 
-          break; 
-        } else { 
-          low = mid + 1; 
-        } 
-      } else { 
-        high = mid; 
-      } 
-    } 
-    encoding = pageIndex.encoding(low); 
-    funcStart = pageIndex.functionOffset(low) + sects.dso_base; 
-    if (pc < funcStart) { 
-      if (log) 
-        fprintf( 
-            stderr, 
-            "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", 
-            (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); 
-      return false; 
-    } 
-    if (pc > funcEnd) { 
-      if (log) 
-        fprintf( 
-            stderr, 
-            "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n", 
-            (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd); 
-      return false; 
-    } 
-  } else if (pageKind == UNWIND_SECOND_LEVEL_COMPRESSED) { 
-    // compressed page 
-    UnwindSectionCompressedPageHeader<A> pageHeader(_addressSpace, 
-                                                    secondLevelAddr); 
-    UnwindSectionCompressedArray<A> pageIndex( 
-        _addressSpace, secondLevelAddr + pageHeader.entryPageOffset()); 
-    const uint32_t targetFunctionPageOffset = 
-        (uint32_t)(targetFunctionOffset - firstLevelFunctionOffset); 
-    // binary search looks for entry with e where index[e].offset <= pc < 
-    // index[e+1].offset 
-    if (log) 
-      fprintf(stderr, "\tbinary search of compressed page starting at " 
-                      "secondLevelAddr=0x%llX\n", 
-              (uint64_t) secondLevelAddr); 
-    low = 0; 
-    last = pageHeader.entryCount() - 1; 
-    high = pageHeader.entryCount(); 
-    while (low < high) { 
-      uint32_t mid = (low + high) / 2; 
-      if (pageIndex.functionOffset(mid) <= targetFunctionPageOffset) { 
-        if ((mid == last) || 
-            (pageIndex.functionOffset(mid + 1) > targetFunctionPageOffset)) { 
-          low = mid; 
-          break; 
-        } else { 
-          low = mid + 1; 
-        } 
-      } else { 
-        high = mid; 
-      } 
-    } 
-    funcStart = pageIndex.functionOffset(low) + firstLevelFunctionOffset 
-                                                              + sects.dso_base; 
-    if (low < last) 
-      funcEnd = 
-          pageIndex.functionOffset(low + 1) + firstLevelFunctionOffset 
-                                                              + sects.dso_base; 
-    else 
-      funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base; 
-    if (pc < funcStart) { 
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc,
+                                              const UnwindInfoSections &sects) {
+  const bool log = false;
+  if (log)
+    fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n",
+            (uint64_t)pc, (uint64_t)sects.dso_base);
+
+  const UnwindSectionHeader<A> sectionHeader(_addressSpace,
+                                                sects.compact_unwind_section);
+  if (sectionHeader.version() != UNWIND_SECTION_VERSION)
+    return false;
+
+  // do a binary search of top level index to find page with unwind info
+  pint_t targetFunctionOffset = pc - sects.dso_base;
+  const UnwindSectionIndexArray<A> topIndex(_addressSpace,
+                                           sects.compact_unwind_section
+                                         + sectionHeader.indexSectionOffset());
+  uint32_t low = 0;
+  uint32_t high = sectionHeader.indexCount();
+  uint32_t last = high - 1;
+  while (low < high) {
+    uint32_t mid = (low + high) / 2;
+    //if ( log ) fprintf(stderr, "\tmid=%d, low=%d, high=%d, *mid=0x%08X\n",
+    //mid, low, high, topIndex.functionOffset(mid));
+    if (topIndex.functionOffset(mid) <= targetFunctionOffset) {
+      if ((mid == last) ||
+          (topIndex.functionOffset(mid + 1) > targetFunctionOffset)) {
+        low = mid;
+        break;
+      } else {
+        low = mid + 1;
+      }
+    } else {
+      high = mid;
+    }
+  }
+  const uint32_t firstLevelFunctionOffset = topIndex.functionOffset(low);
+  const uint32_t firstLevelNextPageFunctionOffset =
+      topIndex.functionOffset(low + 1);
+  const pint_t secondLevelAddr =
+      sects.compact_unwind_section + topIndex.secondLevelPagesSectionOffset(low);
+  const pint_t lsdaArrayStartAddr =
+      sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low);
+  const pint_t lsdaArrayEndAddr =
+      sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low+1);
+  if (log)
+    fprintf(stderr, "\tfirst level search for result index=%d "
+                    "to secondLevelAddr=0x%llX\n",
+                    low, (uint64_t) secondLevelAddr);
+  // do a binary search of second level page index
+  uint32_t encoding = 0;
+  pint_t funcStart = 0;
+  pint_t funcEnd = 0;
+  pint_t lsda = 0;
+  pint_t personality = 0;
+  uint32_t pageKind = _addressSpace.get32(secondLevelAddr);
+  if (pageKind == UNWIND_SECOND_LEVEL_REGULAR) {
+    // regular page
+    UnwindSectionRegularPageHeader<A> pageHeader(_addressSpace,
+                                                 secondLevelAddr);
+    UnwindSectionRegularArray<A> pageIndex(
+        _addressSpace, secondLevelAddr + pageHeader.entryPageOffset());
+    // binary search looks for entry with e where index[e].offset <= pc <
+    // index[e+1].offset
+    if (log)
+      fprintf(stderr, "\tbinary search for targetFunctionOffset=0x%08llX in "
+                      "regular page starting at secondLevelAddr=0x%llX\n",
+              (uint64_t) targetFunctionOffset, (uint64_t) secondLevelAddr);
+    low = 0;
+    high = pageHeader.entryCount();
+    while (low < high) {
+      uint32_t mid = (low + high) / 2;
+      if (pageIndex.functionOffset(mid) <= targetFunctionOffset) {
+        if (mid == (uint32_t)(pageHeader.entryCount() - 1)) {
+          // at end of table
+          low = mid;
+          funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base;
+          break;
+        } else if (pageIndex.functionOffset(mid + 1) > targetFunctionOffset) {
+          // next is too big, so we found it
+          low = mid;
+          funcEnd = pageIndex.functionOffset(low + 1) + sects.dso_base;
+          break;
+        } else {
+          low = mid + 1;
+        }
+      } else {
+        high = mid;
+      }
+    }
+    encoding = pageIndex.encoding(low);
+    funcStart = pageIndex.functionOffset(low) + sects.dso_base;
+    if (pc < funcStart) {
+      if (log)
+        fprintf(
+            stderr,
+            "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n",
+            (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd);
+      return false;
+    }
+    if (pc > funcEnd) {
+      if (log)
+        fprintf(
+            stderr,
+            "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n",
+            (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd);
+      return false;
+    }
+  } else if (pageKind == UNWIND_SECOND_LEVEL_COMPRESSED) {
+    // compressed page
+    UnwindSectionCompressedPageHeader<A> pageHeader(_addressSpace,
+                                                    secondLevelAddr);
+    UnwindSectionCompressedArray<A> pageIndex(
+        _addressSpace, secondLevelAddr + pageHeader.entryPageOffset());
+    const uint32_t targetFunctionPageOffset =
+        (uint32_t)(targetFunctionOffset - firstLevelFunctionOffset);
+    // binary search looks for entry with e where index[e].offset <= pc <
+    // index[e+1].offset
+    if (log)
+      fprintf(stderr, "\tbinary search of compressed page starting at "
+                      "secondLevelAddr=0x%llX\n",
+              (uint64_t) secondLevelAddr);
+    low = 0;
+    last = pageHeader.entryCount() - 1;
+    high = pageHeader.entryCount();
+    while (low < high) {
+      uint32_t mid = (low + high) / 2;
+      if (pageIndex.functionOffset(mid) <= targetFunctionPageOffset) {
+        if ((mid == last) ||
+            (pageIndex.functionOffset(mid + 1) > targetFunctionPageOffset)) {
+          low = mid;
+          break;
+        } else {
+          low = mid + 1;
+        }
+      } else {
+        high = mid;
+      }
+    }
+    funcStart = pageIndex.functionOffset(low) + firstLevelFunctionOffset
+                                                              + sects.dso_base;
+    if (low < last)
+      funcEnd =
+          pageIndex.functionOffset(low + 1) + firstLevelFunctionOffset
+                                                              + sects.dso_base;
+    else
+      funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base;
+    if (pc < funcStart) {
       _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX "
                            "not in second level compressed unwind table. "
                            "funcStart=0x%llX",
-                            (uint64_t) pc, (uint64_t) funcStart); 
-      return false; 
-    } 
-    if (pc > funcEnd) { 
+                            (uint64_t) pc, (uint64_t) funcStart);
+      return false;
+    }
+    if (pc > funcEnd) {
       _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX "
                            "not in second level compressed unwind table. "
                            "funcEnd=0x%llX",
-                           (uint64_t) pc, (uint64_t) funcEnd); 
-      return false; 
-    } 
-    uint16_t encodingIndex = pageIndex.encodingIndex(low); 
-    if (encodingIndex < sectionHeader.commonEncodingsArrayCount()) { 
-      // encoding is in common table in section header 
-      encoding = _addressSpace.get32( 
-          sects.compact_unwind_section + 
-          sectionHeader.commonEncodingsArraySectionOffset() + 
-          encodingIndex * sizeof(uint32_t)); 
-    } else { 
-      // encoding is in page specific table 
-      uint16_t pageEncodingIndex = 
-          encodingIndex - (uint16_t)sectionHeader.commonEncodingsArrayCount(); 
-      encoding = _addressSpace.get32(secondLevelAddr + 
-                                     pageHeader.encodingsPageOffset() + 
-                                     pageEncodingIndex * sizeof(uint32_t)); 
-    } 
-  } else { 
+                           (uint64_t) pc, (uint64_t) funcEnd);
+      return false;
+    }
+    uint16_t encodingIndex = pageIndex.encodingIndex(low);
+    if (encodingIndex < sectionHeader.commonEncodingsArrayCount()) {
+      // encoding is in common table in section header
+      encoding = _addressSpace.get32(
+          sects.compact_unwind_section +
+          sectionHeader.commonEncodingsArraySectionOffset() +
+          encodingIndex * sizeof(uint32_t));
+    } else {
+      // encoding is in page specific table
+      uint16_t pageEncodingIndex =
+          encodingIndex - (uint16_t)sectionHeader.commonEncodingsArrayCount();
+      encoding = _addressSpace.get32(secondLevelAddr +
+                                     pageHeader.encodingsPageOffset() +
+                                     pageEncodingIndex * sizeof(uint32_t));
+    }
+  } else {
     _LIBUNWIND_DEBUG_LOG(
         "malformed __unwind_info at 0x%0llX bad second level page",
         (uint64_t)sects.compact_unwind_section);
-    return false; 
-  } 
- 
-  // look up LSDA, if encoding says function has one 
-  if (encoding & UNWIND_HAS_LSDA) { 
-    UnwindSectionLsdaArray<A> lsdaIndex(_addressSpace, lsdaArrayStartAddr); 
-    uint32_t funcStartOffset = (uint32_t)(funcStart - sects.dso_base); 
-    low = 0; 
-    high = (uint32_t)(lsdaArrayEndAddr - lsdaArrayStartAddr) / 
-                    sizeof(unwind_info_section_header_lsda_index_entry); 
-    // binary search looks for entry with exact match for functionOffset 
-    if (log) 
-      fprintf(stderr, 
-              "\tbinary search of lsda table for targetFunctionOffset=0x%08X\n", 
-              funcStartOffset); 
-    while (low < high) { 
-      uint32_t mid = (low + high) / 2; 
-      if (lsdaIndex.functionOffset(mid) == funcStartOffset) { 
-        lsda = lsdaIndex.lsdaOffset(mid) + sects.dso_base; 
-        break; 
-      } else if (lsdaIndex.functionOffset(mid) < funcStartOffset) { 
-        low = mid + 1; 
-      } else { 
-        high = mid; 
-      } 
-    } 
-    if (lsda == 0) { 
-      _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with HAS_LSDA bit set for " 
+    return false;
+  }
+
+  // look up LSDA, if encoding says function has one
+  if (encoding & UNWIND_HAS_LSDA) {
+    UnwindSectionLsdaArray<A> lsdaIndex(_addressSpace, lsdaArrayStartAddr);
+    uint32_t funcStartOffset = (uint32_t)(funcStart - sects.dso_base);
+    low = 0;
+    high = (uint32_t)(lsdaArrayEndAddr - lsdaArrayStartAddr) /
+                    sizeof(unwind_info_section_header_lsda_index_entry);
+    // binary search looks for entry with exact match for functionOffset
+    if (log)
+      fprintf(stderr,
+              "\tbinary search of lsda table for targetFunctionOffset=0x%08X\n",
+              funcStartOffset);
+    while (low < high) {
+      uint32_t mid = (low + high) / 2;
+      if (lsdaIndex.functionOffset(mid) == funcStartOffset) {
+        lsda = lsdaIndex.lsdaOffset(mid) + sects.dso_base;
+        break;
+      } else if (lsdaIndex.functionOffset(mid) < funcStartOffset) {
+        low = mid + 1;
+      } else {
+        high = mid;
+      }
+    }
+    if (lsda == 0) {
+      _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with HAS_LSDA bit set for "
                     "pc=0x%0llX, but lsda table has no entry",
-                    encoding, (uint64_t) pc); 
-      return false; 
-    } 
-  } 
- 
+                    encoding, (uint64_t) pc);
+      return false;
+    }
+  }
+
   // extract personality routine, if encoding says function has one
-  uint32_t personalityIndex = (encoding & UNWIND_PERSONALITY_MASK) >> 
-                              (__builtin_ctz(UNWIND_PERSONALITY_MASK)); 
-  if (personalityIndex != 0) { 
-    --personalityIndex; // change 1-based to zero-based index 
+  uint32_t personalityIndex = (encoding & UNWIND_PERSONALITY_MASK) >>
+                              (__builtin_ctz(UNWIND_PERSONALITY_MASK));
+  if (personalityIndex != 0) {
+    --personalityIndex; // change 1-based to zero-based index
     if (personalityIndex >= sectionHeader.personalityArrayCount()) {
-      _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d,  " 
+      _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d,  "
                             "but personality table has only %d entries",
-                            encoding, personalityIndex, 
-                            sectionHeader.personalityArrayCount()); 
-      return false; 
-    } 
-    int32_t personalityDelta = (int32_t)_addressSpace.get32( 
-        sects.compact_unwind_section + 
-        sectionHeader.personalityArraySectionOffset() + 
-        personalityIndex * sizeof(uint32_t)); 
-    pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta; 
-    personality = _addressSpace.getP(personalityPointer); 
-    if (log) 
-      fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " 
-                      "personalityDelta=0x%08X, personality=0x%08llX\n", 
-              (uint64_t) pc, personalityDelta, (uint64_t) personality); 
-  } 
- 
-  if (log) 
-    fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), " 
-                    "encoding=0x%08X, lsda=0x%08llX for funcStart=0x%llX\n", 
-            (uint64_t) pc, encoding, (uint64_t) lsda, (uint64_t) funcStart); 
-  _info.start_ip = funcStart; 
-  _info.end_ip = funcEnd; 
-  _info.lsda = lsda; 
-  _info.handler = personality; 
-  _info.gp = 0; 
-  _info.flags = 0; 
-  _info.format = encoding; 
-  _info.unwind_info = 0; 
-  _info.unwind_info_size = 0; 
-  _info.extra = sects.dso_base; 
-  return true; 
-} 
+                            encoding, personalityIndex,
+                            sectionHeader.personalityArrayCount());
+      return false;
+    }
+    int32_t personalityDelta = (int32_t)_addressSpace.get32(
+        sects.compact_unwind_section +
+        sectionHeader.personalityArraySectionOffset() +
+        personalityIndex * sizeof(uint32_t));
+    pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta;
+    personality = _addressSpace.getP(personalityPointer);
+    if (log)
+      fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), "
+                      "personalityDelta=0x%08X, personality=0x%08llX\n",
+              (uint64_t) pc, personalityDelta, (uint64_t) personality);
+  }
+
+  if (log)
+    fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), "
+                    "encoding=0x%08X, lsda=0x%08llX for funcStart=0x%llX\n",
+            (uint64_t) pc, encoding, (uint64_t) lsda, (uint64_t) funcStart);
+  _info.start_ip = funcStart;
+  _info.end_ip = funcEnd;
+  _info.lsda = lsda;
+  _info.handler = personality;
+  _info.gp = 0;
+  _info.flags = 0;
+  _info.format = encoding;
+  _info.unwind_info = 0;
+  _info.unwind_info_size = 0;
+  _info.extra = sects.dso_base;
+  return true;
+}
 #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
- 
- 
+
+
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
-template <typename A, typename R> 
+template <typename A, typename R>
 bool UnwindCursor<A, R>::getInfoFromSEH(pint_t pc) {
   pint_t base;
   RUNTIME_FUNCTION *unwindEntry = lookUpSEHUnwindInfo(pc, &base);
@@ -1924,58 +1924,58 @@ bool UnwindCursor<A, R>::getInfoFromSEH(pint_t pc) {
 
 
 template <typename A, typename R>
-void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) { 
+void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
 #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
   _isSigReturn = false;
 #endif
 
   pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
 #if defined(_LIBUNWIND_ARM_EHABI)
-  // Remove the thumb bit so the IP represents the actual instruction address. 
-  // This matches the behaviour of _Unwind_GetIP on arm. 
-  pc &= (pint_t)~0x1; 
-#endif 
- 
+  // Remove the thumb bit so the IP represents the actual instruction address.
+  // This matches the behaviour of _Unwind_GetIP on arm.
+  pc &= (pint_t)~0x1;
+#endif
+
   // Exit early if at the top of the stack.
   if (pc == 0) {
     _unwindInfoMissing = true;
     return;
   }
 
-  // If the last line of a function is a "throw" the compiler sometimes 
-  // emits no instructions after the call to __cxa_throw.  This means 
-  // the return address is actually the start of the next function. 
-  // To disambiguate this, back up the pc when we know it is a return 
-  // address. 
-  if (isReturnAddress) 
-    --pc; 
- 
-  // Ask address space object to find unwind sections for this pc. 
-  UnwindInfoSections sects; 
-  if (_addressSpace.findUnwindSections(pc, sects)) { 
+  // If the last line of a function is a "throw" the compiler sometimes
+  // emits no instructions after the call to __cxa_throw.  This means
+  // the return address is actually the start of the next function.
+  // To disambiguate this, back up the pc when we know it is a return
+  // address.
+  if (isReturnAddress)
+    --pc;
+
+  // Ask address space object to find unwind sections for this pc.
+  UnwindInfoSections sects;
+  if (_addressSpace.findUnwindSections(pc, sects)) {
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-    // If there is a compact unwind encoding table, look there first. 
-    if (sects.compact_unwind_section != 0) { 
-      if (this->getInfoFromCompactEncodingSection(pc, sects)) { 
+    // If there is a compact unwind encoding table, look there first.
+    if (sects.compact_unwind_section != 0) {
+      if (this->getInfoFromCompactEncodingSection(pc, sects)) {
   #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-        // Found info in table, done unless encoding says to use dwarf. 
-        uint32_t dwarfOffset; 
-        if ((sects.dwarf_section != 0) && compactSaysUseDwarf(&dwarfOffset)) { 
-          if (this->getInfoFromDwarfSection(pc, sects, dwarfOffset)) { 
-            // found info in dwarf, done 
-            return; 
-          } 
-        } 
-  #endif 
-        // If unwind table has entry, but entry says there is no unwind info, 
-        // record that we have no unwind info. 
-        if (_info.format == 0) 
-          _unwindInfoMissing = true; 
-        return; 
-      } 
-    } 
+        // Found info in table, done unless encoding says to use dwarf.
+        uint32_t dwarfOffset;
+        if ((sects.dwarf_section != 0) && compactSaysUseDwarf(&dwarfOffset)) {
+          if (this->getInfoFromDwarfSection(pc, sects, dwarfOffset)) {
+            // found info in dwarf, done
+            return;
+          }
+        }
+  #endif
+        // If unwind table has entry, but entry says there is no unwind info,
+        // record that we have no unwind info.
+        if (_info.format == 0)
+          _unwindInfoMissing = true;
+        return;
+      }
+    }
 #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
- 
+
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
     // If there is SEH unwind info, look there next.
     if (this->getInfoFromSEH(pc))
@@ -1983,61 +1983,61 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
 #endif
 
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-    // If there is dwarf unwind info, look there next. 
-    if (sects.dwarf_section != 0) { 
-      if (this->getInfoFromDwarfSection(pc, sects)) { 
-        // found info in dwarf, done 
-        return; 
-      } 
-    } 
-#endif 
- 
+    // If there is dwarf unwind info, look there next.
+    if (sects.dwarf_section != 0) {
+      if (this->getInfoFromDwarfSection(pc, sects)) {
+        // found info in dwarf, done
+        return;
+      }
+    }
+#endif
+
 #if defined(_LIBUNWIND_ARM_EHABI)
-    // If there is ARM EHABI unwind info, look there next. 
-    if (sects.arm_section != 0 && this->getInfoFromEHABISection(pc, sects)) 
-      return; 
-#endif 
-  } 
- 
+    // If there is ARM EHABI unwind info, look there next.
+    if (sects.arm_section != 0 && this->getInfoFromEHABISection(pc, sects))
+      return;
+#endif
+  }
+
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-  // There is no static unwind info for this pc. Look to see if an FDE was 
-  // dynamically registered for it. 
+  // There is no static unwind info for this pc. Look to see if an FDE was
+  // dynamically registered for it.
   pint_t cachedFDE = DwarfFDECache<A>::findFDE(DwarfFDECache<A>::kSearchAll,
                                                pc);
-  if (cachedFDE != 0) { 
+  if (cachedFDE != 0) {
     typename CFI_Parser<A>::FDE_Info fdeInfo;
     typename CFI_Parser<A>::CIE_Info cieInfo;
     if (!CFI_Parser<A>::decodeFDE(_addressSpace, cachedFDE, &fdeInfo, &cieInfo))
       if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, 0))
-        return; 
-  } 
- 
-  // Lastly, ask AddressSpace object about platform specific ways to locate 
-  // other FDEs. 
-  pint_t fde; 
-  if (_addressSpace.findOtherFDE(pc, fde)) { 
+        return;
+  }
+
+  // Lastly, ask AddressSpace object about platform specific ways to locate
+  // other FDEs.
+  pint_t fde;
+  if (_addressSpace.findOtherFDE(pc, fde)) {
     typename CFI_Parser<A>::FDE_Info fdeInfo;
     typename CFI_Parser<A>::CIE_Info cieInfo;
-    if (!CFI_Parser<A>::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) { 
-      // Double check this FDE is for a function that includes the pc. 
+    if (!CFI_Parser<A>::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) {
+      // Double check this FDE is for a function that includes the pc.
       if ((fdeInfo.pcStart <= pc) && (pc < fdeInfo.pcEnd))
         if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, 0))
-          return; 
-    } 
-  } 
+          return;
+    }
+  }
 #endif // #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
- 
+
 #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
   if (setInfoForSigReturn())
     return;
 #endif
 
-  // no unwind info, flag that we can't reliably unwind 
-  _unwindInfoMissing = true; 
-} 
- 
+  // no unwind info, flag that we can't reliably unwind
+  _unwindInfoMissing = true;
+}
+
 #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
-template <typename A, typename R> 
+template <typename A, typename R>
 bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
   // Look for the sigreturn trampoline. The trampoline's body is two
   // specific instructions (see below). Typically the trampoline comes from the
@@ -2099,13 +2099,13 @@ int UnwindCursor<A, R>::stepThroughSigReturn(Registers_arm64 &) {
 #endif // defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
 
 template <typename A, typename R>
-int UnwindCursor<A, R>::step() { 
-  // Bottom of stack is defined is when unwind info cannot be found. 
-  if (_unwindInfoMissing) 
-    return UNW_STEP_END; 
- 
-  // Use unwinding info to modify register set as if function returned. 
-  int result; 
+int UnwindCursor<A, R>::step() {
+  // Bottom of stack is defined is when unwind info cannot be found.
+  if (_unwindInfoMissing)
+    return UNW_STEP_END;
+
+  // Use unwinding info to modify register set as if function returned.
+  int result;
 #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
   if (_isSigReturn) {
     result = this->stepThroughSigReturn();
@@ -2120,45 +2120,45 @@ int UnwindCursor<A, R>::step() {
     result = this->stepWithDwarfFDE();
 #elif defined(_LIBUNWIND_ARM_EHABI)
     result = this->stepWithEHABI();
-#else 
-  #error Need _LIBUNWIND_SUPPORT_COMPACT_UNWIND or \ 
+#else
+  #error Need _LIBUNWIND_SUPPORT_COMPACT_UNWIND or \
               _LIBUNWIND_SUPPORT_SEH_UNWIND or \
-              _LIBUNWIND_SUPPORT_DWARF_UNWIND or \ 
-              _LIBUNWIND_ARM_EHABI 
-#endif 
-  }
- 
-  // update info based on new PC 
-  if (result == UNW_STEP_SUCCESS) { 
-    this->setInfoBasedOnIPRegister(true); 
-    if (_unwindInfoMissing) 
-      return UNW_STEP_END; 
-  } 
- 
-  return result; 
-} 
- 
-template <typename A, typename R> 
-void UnwindCursor<A, R>::getInfo(unw_proc_info_t *info) { 
+              _LIBUNWIND_SUPPORT_DWARF_UNWIND or \
+              _LIBUNWIND_ARM_EHABI
+#endif
+  }
+
+  // update info based on new PC
+  if (result == UNW_STEP_SUCCESS) {
+    this->setInfoBasedOnIPRegister(true);
+    if (_unwindInfoMissing)
+      return UNW_STEP_END;
+  }
+
+  return result;
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::getInfo(unw_proc_info_t *info) {
   if (_unwindInfoMissing)
     memset(info, 0, sizeof(*info));
   else
     *info = _info;
-} 
- 
-template <typename A, typename R> 
-bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen, 
-                                                           unw_word_t *offset) { 
-  return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP), 
-                                         buf, bufLen, offset); 
-} 
- 
+}
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen,
+                                                           unw_word_t *offset) {
+  return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP),
+                                         buf, bufLen, offset);
+}
+
 #if defined(_LIBUNWIND_USE_CET)
 extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) {
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->get_registers();
 }
 #endif
-} // namespace libunwind 
- 
-#endif // __UNWINDCURSOR_HPP__ 
+} // namespace libunwind
+
+#endif // __UNWINDCURSOR_HPP__
diff --git a/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c b/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c
index 1032fbf688..951d5d219a 100644
--- a/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c
+++ b/contrib/libs/libunwind/src/UnwindLevel1-gcc-ext.c
@@ -1,29 +1,29 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Implements gcc extensions to the C++ ABI Exception Handling Level 1. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include <inttypes.h> 
-#include <stdbool.h> 
-#include <stdint.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
-#include "config.h" 
-#include "libunwind_ext.h" 
-#include "libunwind.h" 
-#include "Unwind-EHABI.h" 
-#include "unwind.h" 
- 
+//
+//
+//  Implements gcc extensions to the C++ ABI Exception Handling Level 1.
+//
+//===----------------------------------------------------------------------===//
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "libunwind_ext.h"
+#include "libunwind.h"
+#include "Unwind-EHABI.h"
+#include "unwind.h"
+
 #if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
- 
+
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
 #define PRIVATE_1 private_[0]
 #elif defined(_LIBUNWIND_ARM_EHABI)
@@ -32,185 +32,185 @@
 #define PRIVATE_1 private_1
 #endif
 
-///  Called by __cxa_rethrow(). 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) { 
+///  Called by __cxa_rethrow().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API(
       "_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%" PRIdPTR,
       (void *)exception_object, (intptr_t)exception_object->PRIVATE_1);
- 
-  // If this is non-forced and a stopping place was found, then this is a 
-  // re-throw. 
-  // Call _Unwind_RaiseException() as if this was a new exception 
+
+  // If this is non-forced and a stopping place was found, then this is a
+  // re-throw.
+  // Call _Unwind_RaiseException() as if this was a new exception
   if (exception_object->PRIVATE_1 == 0) {
-    return _Unwind_RaiseException(exception_object); 
-    // Will return if there is no catch clause, so that __cxa_rethrow can call 
-    // std::terminate(). 
-  } 
- 
-  // Call through to _Unwind_Resume() which distiguishes between forced and 
-  // regular exceptions. 
-  _Unwind_Resume(exception_object); 
-  _LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()" 
-                   " which unexpectedly returned"); 
-} 
- 
-/// Called by personality handler during phase 2 to get base address for data 
-/// relative encodings. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetDataRelBase(struct _Unwind_Context *context) { 
-  (void)context; 
+    return _Unwind_RaiseException(exception_object);
+    // Will return if there is no catch clause, so that __cxa_rethrow can call
+    // std::terminate().
+  }
+
+  // Call through to _Unwind_Resume() which distiguishes between forced and
+  // regular exceptions.
+  _Unwind_Resume(exception_object);
+  _LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()"
+                   " which unexpectedly returned");
+}
+
+/// Called by personality handler during phase 2 to get base address for data
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetDataRelBase(struct _Unwind_Context *context) {
+  (void)context;
   _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context);
-  _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get base address for text 
-/// relative encodings. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetTextRelBase(struct _Unwind_Context *context) { 
-  (void)context; 
+  _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented");
+}
+
+
+/// Called by personality handler during phase 2 to get base address for text
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetTextRelBase(struct _Unwind_Context *context) {
+  (void)context;
   _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context);
-  _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); 
-} 
- 
- 
-/// Scans unwind information to find the function that contains the 
-/// specified code address "pc". 
-_LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) { 
+  _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented");
+}
+
+
+/// Scans unwind information to find the function that contains the
+/// specified code address "pc".
+_LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) {
   _LIBUNWIND_TRACE_API("_Unwind_FindEnclosingFunction(pc=%p)", pc);
-  // This is slow, but works. 
-  // We create an unwind cursor then alter the IP to be pc 
-  unw_cursor_t cursor; 
-  unw_context_t uc; 
-  unw_proc_info_t info; 
+  // This is slow, but works.
+  // We create an unwind cursor then alter the IP to be pc
+  unw_cursor_t cursor;
+  unw_context_t uc;
+  unw_proc_info_t info;
   __unw_getcontext(&uc);
   __unw_init_local(&cursor, &uc);
   __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc);
   if (__unw_get_proc_info(&cursor, &info) == UNW_ESUCCESS)
     return (void *)(intptr_t) info.start_ip;
-  else 
-    return NULL; 
-} 
- 
-/// Walk every frame and call trace function at each one.  If trace function 
-/// returns anything other than _URC_NO_REASON, then walk is terminated. 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { 
-  unw_cursor_t cursor; 
-  unw_context_t uc; 
+  else
+    return NULL;
+}
+
+/// Walk every frame and call trace function at each one.  If trace function
+/// returns anything other than _URC_NO_REASON, then walk is terminated.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) {
+  unw_cursor_t cursor;
+  unw_context_t uc;
   __unw_getcontext(&uc);
   __unw_init_local(&cursor, &uc);
- 
+
   _LIBUNWIND_TRACE_API("_Unwind_Backtrace(callback=%p)",
-                       (void *)(uintptr_t)callback); 
- 
+                       (void *)(uintptr_t)callback);
+
 #if defined(_LIBUNWIND_ARM_EHABI)
-  // Create a mock exception object for force unwinding. 
-  _Unwind_Exception ex; 
-  memset(&ex, '\0', sizeof(ex)); 
+  // Create a mock exception object for force unwinding.
+  _Unwind_Exception ex;
+  memset(&ex, '\0', sizeof(ex));
   strcpy((char *)&ex.exception_class, "CLNGUNW");
-#endif 
- 
-  // walk each frame 
-  while (true) { 
-    _Unwind_Reason_Code result; 
- 
+#endif
+
+  // walk each frame
+  while (true) {
+    _Unwind_Reason_Code result;
+
 #if !defined(_LIBUNWIND_ARM_EHABI)
     // ask libunwind to get next frame (skip over first frame which is
-    // _Unwind_Backtrace()) 
+    // _Unwind_Backtrace())
     if (__unw_step(&cursor) <= 0) {
-      _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached " 
+      _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached "
                                  "bottom of stack, returning %d",
-                                 _URC_END_OF_STACK); 
-      return _URC_END_OF_STACK; 
-    } 
-#else 
-    // Get the information for this frame. 
-    unw_proc_info_t frameInfo; 
+                                 _URC_END_OF_STACK);
+      return _URC_END_OF_STACK;
+    }
+#else
+    // Get the information for this frame.
+    unw_proc_info_t frameInfo;
     if (__unw_get_proc_info(&cursor, &frameInfo) != UNW_ESUCCESS) {
-      return _URC_END_OF_STACK; 
-    } 
- 
-    // Update the pr_cache in the mock exception object. 
-    const uint32_t* unwindInfo = (uint32_t *) frameInfo.unwind_info; 
-    ex.pr_cache.fnstart = frameInfo.start_ip; 
-    ex.pr_cache.ehtp = (_Unwind_EHT_Header *) unwindInfo; 
-    ex.pr_cache.additional= frameInfo.flags; 
- 
-    struct _Unwind_Context *context = (struct _Unwind_Context *)&cursor; 
-    // Get and call the personality function to unwind the frame. 
+      return _URC_END_OF_STACK;
+    }
+
+    // Update the pr_cache in the mock exception object.
+    const uint32_t* unwindInfo = (uint32_t *) frameInfo.unwind_info;
+    ex.pr_cache.fnstart = frameInfo.start_ip;
+    ex.pr_cache.ehtp = (_Unwind_EHT_Header *) unwindInfo;
+    ex.pr_cache.additional= frameInfo.flags;
+
+    struct _Unwind_Context *context = (struct _Unwind_Context *)&cursor;
+    // Get and call the personality function to unwind the frame.
     _Unwind_Personality_Fn handler = (_Unwind_Personality_Fn)frameInfo.handler;
-    if (handler == NULL) { 
-      return _URC_END_OF_STACK; 
-    } 
-    if (handler(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, &ex, context) != 
-            _URC_CONTINUE_UNWIND) { 
-      return _URC_END_OF_STACK; 
-    } 
+    if (handler == NULL) {
+      return _URC_END_OF_STACK;
+    }
+    if (handler(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, &ex, context) !=
+            _URC_CONTINUE_UNWIND) {
+      return _URC_END_OF_STACK;
+    }
 #endif // defined(_LIBUNWIND_ARM_EHABI)
- 
-    // debugging 
-    if (_LIBUNWIND_TRACING_UNWINDING) { 
-      char functionName[512]; 
-      unw_proc_info_t frame; 
-      unw_word_t offset; 
+
+    // debugging
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionName[512];
+      unw_proc_info_t frame;
+      unw_word_t offset;
       __unw_get_proc_name(&cursor, functionName, 512, &offset);
       __unw_get_proc_info(&cursor, &frame);
-      _LIBUNWIND_TRACE_UNWINDING( 
+      _LIBUNWIND_TRACE_UNWINDING(
           " _backtrace: start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", context=%p",
           frame.start_ip, functionName, frame.lsda,
-          (void *)&cursor); 
-    } 
- 
-    // call trace function with this frame 
-    result = (*callback)((struct _Unwind_Context *)(&cursor), ref); 
-    if (result != _URC_NO_REASON) { 
-      _LIBUNWIND_TRACE_UNWINDING( 
+          (void *)&cursor);
+    }
+
+    // call trace function with this frame
+    result = (*callback)((struct _Unwind_Context *)(&cursor), ref);
+    if (result != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING(
           " _backtrace: ended because callback returned %d", result);
-      return result; 
-    } 
-  } 
-} 
- 
- 
+      return result;
+    }
+  }
+}
+
+
 /// Find DWARF unwind info for an address 'pc' in some function.
-_LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc, 
-                                               struct dwarf_eh_bases *bases) { 
-  // This is slow, but works. 
-  // We create an unwind cursor then alter the IP to be pc 
-  unw_cursor_t cursor; 
-  unw_context_t uc; 
-  unw_proc_info_t info; 
+_LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc,
+                                               struct dwarf_eh_bases *bases) {
+  // This is slow, but works.
+  // We create an unwind cursor then alter the IP to be pc
+  unw_cursor_t cursor;
+  unw_context_t uc;
+  unw_proc_info_t info;
   __unw_getcontext(&uc);
   __unw_init_local(&cursor, &uc);
   __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc);
   __unw_get_proc_info(&cursor, &info);
-  bases->tbase = (uintptr_t)info.extra; 
-  bases->dbase = 0; // dbase not used on Mac OS X 
-  bases->func = (uintptr_t)info.start_ip; 
+  bases->tbase = (uintptr_t)info.extra;
+  bases->dbase = 0; // dbase not used on Mac OS X
+  bases->func = (uintptr_t)info.start_ip;
   _LIBUNWIND_TRACE_API("_Unwind_Find_FDE(pc=%p) => %p", pc,
                   (void *)(intptr_t) info.unwind_info);
   return (void *)(intptr_t) info.unwind_info;
-} 
- 
-/// Returns the CFA (call frame area, or stack pointer at start of function) 
-/// for the current context. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_word_t result; 
+}
+
+/// Returns the CFA (call frame area, or stack pointer at start of function)
+/// for the current context.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_word_t result;
   __unw_get_reg(cursor, UNW_REG_SP, &result);
   _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p) => 0x%" PRIxPTR,
                        (void *)context, result);
-  return (uintptr_t)result; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to get instruction pointer. 
-/// ipBefore is a boolean that says if IP is already adjusted to be the call 
-/// site address.  Normally IP is the return address. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, 
-                                              int *ipBefore) { 
+  return (uintptr_t)result;
+}
+
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+/// ipBefore is a boolean that says if IP is already adjusted to be the call
+/// site address.  Normally IP is the return address.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
+                                              int *ipBefore) {
   _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p)", (void *)context);
   int isSignalFrame = __unw_is_signal_frame((unw_cursor_t *)context);
   // Negative means some kind of error (probably UNW_ENOINFO), but we have no
@@ -220,98 +220,98 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
     *ipBefore = 0;
   else
     *ipBefore = 1;
-  return _Unwind_GetIP(context); 
-} 
- 
+  return _Unwind_GetIP(context);
+}
+
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
- 
-/// Called by programs with dynamic code generators that want 
-/// to register a dynamically generated FDE. 
-/// This function has existed on Mac OS X since 10.4, but 
-/// was broken until 10.6. 
-_LIBUNWIND_EXPORT void __register_frame(const void *fde) { 
+
+/// Called by programs with dynamic code generators that want
+/// to register a dynamically generated FDE.
+/// This function has existed on Mac OS X since 10.4, but
+/// was broken until 10.6.
+_LIBUNWIND_EXPORT void __register_frame(const void *fde) {
   _LIBUNWIND_TRACE_API("__register_frame(%p)", fde);
   __unw_add_dynamic_fde((unw_word_t)(uintptr_t)fde);
-} 
- 
- 
-/// Called by programs with dynamic code generators that want 
-/// to unregister a dynamically generated FDE. 
-/// This function has existed on Mac OS X since 10.4, but 
-/// was broken until 10.6. 
-_LIBUNWIND_EXPORT void __deregister_frame(const void *fde) { 
+}
+
+
+/// Called by programs with dynamic code generators that want
+/// to unregister a dynamically generated FDE.
+/// This function has existed on Mac OS X since 10.4, but
+/// was broken until 10.6.
+_LIBUNWIND_EXPORT void __deregister_frame(const void *fde) {
   _LIBUNWIND_TRACE_API("__deregister_frame(%p)", fde);
   __unw_remove_dynamic_fde((unw_word_t)(uintptr_t)fde);
-} 
- 
- 
-// The following register/deregister functions are gcc extensions. 
-// They have existed on Mac OS X, but have never worked because Mac OS X 
-// before 10.6 used keymgr to track known FDEs, but these functions 
-// never got updated to use keymgr. 
-// For now, we implement these as do-nothing functions to keep any existing 
-// applications working.  We also add the not in 10.6 symbol so that nwe 
-// application won't be able to use them. 
- 
+}
+
+
+// The following register/deregister functions are gcc extensions.
+// They have existed on Mac OS X, but have never worked because Mac OS X
+// before 10.6 used keymgr to track known FDEs, but these functions
+// never got updated to use keymgr.
+// For now, we implement these as do-nothing functions to keep any existing
+// applications working.  We also add the not in 10.6 symbol so that nwe
+// application won't be able to use them.
+
 #if defined(_LIBUNWIND_SUPPORT_FRAME_APIS)
-_LIBUNWIND_EXPORT void __register_frame_info_bases(const void *fde, void *ob, 
-                                                   void *tb, void *db) { 
-  (void)fde; 
-  (void)ob; 
-  (void)tb; 
-  (void)db; 
+_LIBUNWIND_EXPORT void __register_frame_info_bases(const void *fde, void *ob,
+                                                   void *tb, void *db) {
+  (void)fde;
+  (void)ob;
+  (void)tb;
+  (void)db;
  _LIBUNWIND_TRACE_API("__register_frame_info_bases(%p,%p, %p, %p)",
-                            fde, ob, tb, db); 
-  // do nothing, this function never worked in Mac OS X 
-} 
- 
-_LIBUNWIND_EXPORT void __register_frame_info(const void *fde, void *ob) { 
-  (void)fde; 
-  (void)ob; 
+                            fde, ob, tb, db);
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_info(const void *fde, void *ob) {
+  (void)fde;
+  (void)ob;
   _LIBUNWIND_TRACE_API("__register_frame_info(%p, %p)", fde, ob);
-  // do nothing, this function never worked in Mac OS X 
-} 
- 
-_LIBUNWIND_EXPORT void __register_frame_info_table_bases(const void *fde, 
-                                                         void *ob, void *tb, 
-                                                         void *db) { 
-  (void)fde; 
-  (void)ob; 
-  (void)tb; 
-  (void)db; 
-  _LIBUNWIND_TRACE_API("__register_frame_info_table_bases" 
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_info_table_bases(const void *fde,
+                                                         void *ob, void *tb,
+                                                         void *db) {
+  (void)fde;
+  (void)ob;
+  (void)tb;
+  (void)db;
+  _LIBUNWIND_TRACE_API("__register_frame_info_table_bases"
                              "(%p,%p, %p, %p)", fde, ob, tb, db);
-  // do nothing, this function never worked in Mac OS X 
-} 
- 
-_LIBUNWIND_EXPORT void __register_frame_info_table(const void *fde, void *ob) { 
-  (void)fde; 
-  (void)ob; 
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_info_table(const void *fde, void *ob) {
+  (void)fde;
+  (void)ob;
   _LIBUNWIND_TRACE_API("__register_frame_info_table(%p, %p)", fde, ob);
-  // do nothing, this function never worked in Mac OS X 
-} 
- 
-_LIBUNWIND_EXPORT void __register_frame_table(const void *fde) { 
-  (void)fde; 
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_table(const void *fde) {
+  (void)fde;
   _LIBUNWIND_TRACE_API("__register_frame_table(%p)", fde);
-  // do nothing, this function never worked in Mac OS X 
-} 
- 
-_LIBUNWIND_EXPORT void *__deregister_frame_info(const void *fde) { 
-  (void)fde; 
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void *__deregister_frame_info(const void *fde) {
+  (void)fde;
   _LIBUNWIND_TRACE_API("__deregister_frame_info(%p)", fde);
-  // do nothing, this function never worked in Mac OS X 
-  return NULL; 
-} 
- 
-_LIBUNWIND_EXPORT void *__deregister_frame_info_bases(const void *fde) { 
-  (void)fde; 
+  // do nothing, this function never worked in Mac OS X
+  return NULL;
+}
+
+_LIBUNWIND_EXPORT void *__deregister_frame_info_bases(const void *fde) {
+  (void)fde;
   _LIBUNWIND_TRACE_API("__deregister_frame_info_bases(%p)", fde);
-  // do nothing, this function never worked in Mac OS X 
-  return NULL; 
-} 
+  // do nothing, this function never worked in Mac OS X
+  return NULL;
+}
 #endif // defined(_LIBUNWIND_SUPPORT_FRAME_APIS)
- 
+
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
- 
+
 #endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
diff --git a/contrib/libs/libunwind/src/UnwindLevel1.c b/contrib/libs/libunwind/src/UnwindLevel1.c
index 82338e7d36..13ca17cc6a 100644
--- a/contrib/libs/libunwind/src/UnwindLevel1.c
+++ b/contrib/libs/libunwind/src/UnwindLevel1.c
@@ -1,38 +1,38 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-// Implements C++ ABI Exception Handling Level 1 as documented at: 
+//
+//
+// Implements C++ ABI Exception Handling Level 1 as documented at:
 //      https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
-// using libunwind 
-// 
-//===----------------------------------------------------------------------===// 
- 
-// ARM EHABI does not specify _Unwind_{Get,Set}{GR,IP}().  Thus, we are 
-// defining inline functions to delegate the function calls to 
-// _Unwind_VRS_{Get,Set}().  However, some applications might declare the 
-// function protetype directly (instead of including <unwind.h>), thus we need 
-// to export these functions from libunwind.so as well. 
-#define _LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE 1 
- 
-#include <inttypes.h> 
-#include <stdint.h> 
-#include <stdbool.h> 
-#include <stdlib.h> 
-#include <stdio.h> 
-#include <string.h> 
- 
+// using libunwind
+//
+//===----------------------------------------------------------------------===//
+
+// ARM EHABI does not specify _Unwind_{Get,Set}{GR,IP}().  Thus, we are
+// defining inline functions to delegate the function calls to
+// _Unwind_VRS_{Get,Set}().  However, some applications might declare the
+// function protetype directly (instead of including <unwind.h>), thus we need
+// to export these functions from libunwind.so as well.
+#define _LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE 1
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
 #include "cet_unwind.h"
 #include "config.h"
-#include "libunwind.h" 
+#include "libunwind.h"
 #include "libunwind_ext.h"
-#include "unwind.h" 
- 
+#include "unwind.h"
+
 #if !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__)
- 
+
 #ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND
 
 // When CET is enabled, each "call" instruction will push return address to
@@ -67,10 +67,10 @@
   } while (0)
 #endif
 
-static _Unwind_Reason_Code 
+static _Unwind_Reason_Code
 unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
   __unw_init_local(cursor, uc);
- 
+
 #ifdef _YNDX_LIBUNWIND_ENABLE_EXCEPTION_BACKTRACE
   _Unwind_Backtrace_Buffer* backtrace_buffer =
       exception_object->exception_class == _YNDX_LIBUNWIND_EXCEPTION_BACKTRACE_PRIMARY_CLASS ||
@@ -80,25 +80,25 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
       : NULL;
 #endif
 
-  // Walk each frame looking for a place to stop. 
+  // Walk each frame looking for a place to stop.
   while (true) {
     // Ask libunwind to get next frame (skip over first which is
-    // _Unwind_RaiseException). 
+    // _Unwind_RaiseException).
     int stepResult = __unw_step(cursor);
-    if (stepResult == 0) { 
+    if (stepResult == 0) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): __unw_step() reached "
           "bottom => _URC_END_OF_STACK",
           (void *)exception_object);
-      return _URC_END_OF_STACK; 
-    } else if (stepResult < 0) { 
+      return _URC_END_OF_STACK;
+    } else if (stepResult < 0) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): __unw_step failed => "
           "_URC_FATAL_PHASE1_ERROR",
           (void *)exception_object);
-      return _URC_FATAL_PHASE1_ERROR; 
-    } 
- 
+      return _URC_FATAL_PHASE1_ERROR;
+    }
+
 #ifdef _YNDX_LIBUNWIND_ENABLE_EXCEPTION_BACKTRACE
     if (backtrace_buffer && backtrace_buffer->size < _YNDX_LIBUNWIND_EXCEPTION_BACKTRACE_SIZE) {
         unw_word_t pc;
@@ -107,472 +107,472 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
     }
 #endif
 
-    // See if frame has code to run (has personality routine). 
-    unw_proc_info_t frameInfo; 
-    unw_word_t sp; 
+    // See if frame has code to run (has personality routine).
+    unw_proc_info_t frameInfo;
+    unw_word_t sp;
     if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): __unw_get_proc_info "
           "failed => _URC_FATAL_PHASE1_ERROR",
           (void *)exception_object);
-      return _URC_FATAL_PHASE1_ERROR; 
-    } 
- 
+      return _URC_FATAL_PHASE1_ERROR;
+    }
+
 #ifndef NDEBUG
-    // When tracing, print state information. 
-    if (_LIBUNWIND_TRACING_UNWINDING) { 
-      char functionBuf[512]; 
-      const char *functionName = functionBuf; 
-      unw_word_t offset; 
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
       if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                                &offset) != UNW_ESUCCESS) ||
-          (frameInfo.start_ip + offset > frameInfo.end_ip)) 
-        functionName = ".anonymous."; 
-      unw_word_t pc; 
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      unw_word_t pc;
       __unw_get_reg(cursor, UNW_REG_IP, &pc);
-      _LIBUNWIND_TRACE_UNWINDING( 
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR
           ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "",
-          (void *)exception_object, pc, frameInfo.start_ip, functionName, 
-          frameInfo.lsda, frameInfo.handler); 
-    } 
+          (void *)exception_object, pc, frameInfo.start_ip, functionName,
+          frameInfo.lsda, frameInfo.handler);
+    }
 #endif
- 
-    // If there is a personality routine, ask it if it will want to stop at 
-    // this frame. 
-    if (frameInfo.handler != 0) { 
+
+    // If there is a personality routine, ask it if it will want to stop at
+    // this frame.
+    if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
           (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler);
-      _LIBUNWIND_TRACE_UNWINDING( 
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): calling personality function %p",
-          (void *)exception_object, (void *)(uintptr_t)p); 
-      _Unwind_Reason_Code personalityResult = 
-          (*p)(1, _UA_SEARCH_PHASE, exception_object->exception_class, 
+          (void *)exception_object, (void *)(uintptr_t)p);
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, _UA_SEARCH_PHASE, exception_object->exception_class,
                exception_object, (struct _Unwind_Context *)(cursor));
-      switch (personalityResult) { 
-      case _URC_HANDLER_FOUND: 
-        // found a catch clause or locals that need destructing in this frame 
-        // stop search and remember stack pointer at the frame 
+      switch (personalityResult) {
+      case _URC_HANDLER_FOUND:
+        // found a catch clause or locals that need destructing in this frame
+        // stop search and remember stack pointer at the frame
         __unw_get_reg(cursor, UNW_REG_SP, &sp);
-        exception_object->private_2 = (uintptr_t)sp; 
-        _LIBUNWIND_TRACE_UNWINDING( 
+        exception_object->private_2 = (uintptr_t)sp;
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND",
-            (void *)exception_object); 
-        return _URC_NO_REASON; 
- 
-      case _URC_CONTINUE_UNWIND: 
-        _LIBUNWIND_TRACE_UNWINDING( 
+            (void *)exception_object);
+        return _URC_NO_REASON;
+
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND",
-            (void *)exception_object); 
-        // continue unwinding 
-        break; 
- 
-      default: 
-        // something went wrong 
-        _LIBUNWIND_TRACE_UNWINDING( 
+            (void *)exception_object);
+        // continue unwinding
+        break;
+
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
-            (void *)exception_object); 
-        return _URC_FATAL_PHASE1_ERROR; 
-      } 
-    } 
-  } 
-  return _URC_NO_REASON; 
-} 
- 
- 
-static _Unwind_Reason_Code 
+            (void *)exception_object);
+        return _URC_FATAL_PHASE1_ERROR;
+      }
+    }
+  }
+  return _URC_NO_REASON;
+}
+
+
+static _Unwind_Reason_Code
 unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
   __unw_init_local(cursor, uc);
- 
+
   _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
-                             (void *)exception_object); 
- 
+                             (void *)exception_object);
+
   // uc is initialized by __unw_getcontext in the parent frame. The first stack
   // frame walked is unwind_phase2.
   unsigned framesWalked = 1;
-  // Walk each frame until we reach where search phase said to stop. 
-  while (true) { 
- 
+  // Walk each frame until we reach where search phase said to stop.
+  while (true) {
+
     // Ask libunwind to get next frame (skip over first which is
-    // _Unwind_RaiseException). 
+    // _Unwind_RaiseException).
     int stepResult = __unw_step(cursor);
-    if (stepResult == 0) { 
+    if (stepResult == 0) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): __unw_step() reached "
           "bottom => _URC_END_OF_STACK",
           (void *)exception_object);
-      return _URC_END_OF_STACK; 
-    } else if (stepResult < 0) { 
+      return _URC_END_OF_STACK;
+    } else if (stepResult < 0) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): __unw_step failed => "
           "_URC_FATAL_PHASE1_ERROR",
           (void *)exception_object);
-      return _URC_FATAL_PHASE2_ERROR; 
-    } 
- 
-    // Get info about this frame. 
-    unw_word_t sp; 
-    unw_proc_info_t frameInfo; 
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // Get info about this frame.
+    unw_word_t sp;
+    unw_proc_info_t frameInfo;
     __unw_get_reg(cursor, UNW_REG_SP, &sp);
     if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): __unw_get_proc_info "
           "failed => _URC_FATAL_PHASE1_ERROR",
           (void *)exception_object);
-      return _URC_FATAL_PHASE2_ERROR; 
-    } 
- 
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
 #ifndef NDEBUG
-    // When tracing, print state information. 
-    if (_LIBUNWIND_TRACING_UNWINDING) { 
-      char functionBuf[512]; 
-      const char *functionName = functionBuf; 
-      unw_word_t offset; 
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
       if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                                &offset) != UNW_ESUCCESS) ||
-          (frameInfo.start_ip + offset > frameInfo.end_ip)) 
-        functionName = ".anonymous."; 
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
       _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR
                                  ", func=%s, sp=0x%" PRIxPTR ", lsda=0x%" PRIxPTR
                                  ", personality=0x%" PRIxPTR,
-                                 (void *)exception_object, frameInfo.start_ip, 
-                                 functionName, sp, frameInfo.lsda, 
-                                 frameInfo.handler); 
-    } 
+                                 (void *)exception_object, frameInfo.start_ip,
+                                 functionName, sp, frameInfo.lsda,
+                                 frameInfo.handler);
+    }
 #endif
- 
+
     ++framesWalked;
-    // If there is a personality routine, tell it we are unwinding. 
-    if (frameInfo.handler != 0) { 
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
           (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler);
-      _Unwind_Action action = _UA_CLEANUP_PHASE; 
-      if (sp == exception_object->private_2) { 
-        // Tell personality this was the frame it marked in phase 1. 
-        action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME); 
-      } 
-       _Unwind_Reason_Code personalityResult = 
-          (*p)(1, action, exception_object->exception_class, exception_object, 
+      _Unwind_Action action = _UA_CLEANUP_PHASE;
+      if (sp == exception_object->private_2) {
+        // Tell personality this was the frame it marked in phase 1.
+        action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME);
+      }
+       _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
                (struct _Unwind_Context *)(cursor));
-      switch (personalityResult) { 
-      case _URC_CONTINUE_UNWIND: 
-        // Continue unwinding 
-        _LIBUNWIND_TRACE_UNWINDING( 
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        // Continue unwinding
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
-            (void *)exception_object); 
-        if (sp == exception_object->private_2) { 
-          // Phase 1 said we would stop at this frame, but we did not... 
-          _LIBUNWIND_ABORT("during phase1 personality function said it would " 
-                           "stop here, but now in phase2 it did not stop here"); 
-        } 
-        break; 
-      case _URC_INSTALL_CONTEXT: 
-        _LIBUNWIND_TRACE_UNWINDING( 
+            (void *)exception_object);
+        if (sp == exception_object->private_2) {
+          // Phase 1 said we would stop at this frame, but we did not...
+          _LIBUNWIND_ABORT("during phase1 personality function said it would "
+                           "stop here, but now in phase2 it did not stop here");
+        }
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT",
-            (void *)exception_object); 
-        // Personality routine says to transfer control to landing pad. 
-        // We may get control back if landing pad calls _Unwind_Resume(). 
-        if (_LIBUNWIND_TRACING_UNWINDING) { 
-          unw_word_t pc; 
+            (void *)exception_object);
+        // Personality routine says to transfer control to landing pad.
+        // We may get control back if landing pad calls _Unwind_Resume().
+        if (_LIBUNWIND_TRACING_UNWINDING) {
+          unw_word_t pc;
           __unw_get_reg(cursor, UNW_REG_IP, &pc);
           __unw_get_reg(cursor, UNW_REG_SP, &sp);
-          _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " 
+          _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
                                      "user code with ip=0x%" PRIxPTR
                                      ", sp=0x%" PRIxPTR,
-                                     (void *)exception_object, pc, sp); 
-        } 
+                                     (void *)exception_object, pc, sp);
+        }
 
         __unw_phase2_resume(cursor, framesWalked);
         // __unw_phase2_resume() only returns if there was an error.
-        return _URC_FATAL_PHASE2_ERROR; 
-      default: 
-        // Personality routine returned an unknown result code. 
-        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d", 
-                             personalityResult); 
-        return _URC_FATAL_PHASE2_ERROR; 
-      } 
-    } 
-  } 
- 
-  // Clean up phase did not resume at the frame that the search phase 
-  // said it would... 
-  return _URC_FATAL_PHASE2_ERROR; 
-} 
- 
-static _Unwind_Reason_Code 
+        return _URC_FATAL_PHASE2_ERROR;
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
+                             personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+  }
+
+  // Clean up phase did not resume at the frame that the search phase
+  // said it would...
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+static _Unwind_Reason_Code
 unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
-                     _Unwind_Exception *exception_object, 
-                     _Unwind_Stop_Fn stop, void *stop_parameter) { 
+                     _Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
   __unw_init_local(cursor, uc);
- 
+
   // uc is initialized by __unw_getcontext in the parent frame. The first stack
   // frame walked is unwind_phase2_forced.
   unsigned framesWalked = 1;
-  // Walk each frame until we reach where search phase said to stop 
+  // Walk each frame until we reach where search phase said to stop
   while (__unw_step(cursor) > 0) {
- 
-    // Update info about this frame. 
-    unw_proc_info_t frameInfo; 
+
+    // Update info about this frame.
+    unw_proc_info_t frameInfo;
     if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step "
                                  "failed => _URC_END_OF_STACK",
-                                 (void *)exception_object); 
-      return _URC_FATAL_PHASE2_ERROR; 
-    } 
- 
+                                 (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
 #ifndef NDEBUG
-    // When tracing, print state information. 
-    if (_LIBUNWIND_TRACING_UNWINDING) { 
-      char functionBuf[512]; 
-      const char *functionName = functionBuf; 
-      unw_word_t offset; 
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
       if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                                &offset) != UNW_ESUCCESS) ||
-          (frameInfo.start_ip + offset > frameInfo.end_ip)) 
-        functionName = ".anonymous."; 
-      _LIBUNWIND_TRACE_UNWINDING( 
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIxPTR
           ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
-          (void *)exception_object, frameInfo.start_ip, functionName, 
-          frameInfo.lsda, frameInfo.handler); 
-    } 
+          (void *)exception_object, frameInfo.start_ip, functionName,
+          frameInfo.lsda, frameInfo.handler);
+    }
 #endif
- 
-    // Call stop function at each frame. 
-    _Unwind_Action action = 
-        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE); 
-    _Unwind_Reason_Code stopResult = 
-        (*stop)(1, action, exception_object->exception_class, exception_object, 
+
+    // Call stop function at each frame.
+    _Unwind_Action action =
+        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE);
+    _Unwind_Reason_Code stopResult =
+        (*stop)(1, action, exception_object->exception_class, exception_object,
                 (struct _Unwind_Context *)(cursor), stop_parameter);
-    _LIBUNWIND_TRACE_UNWINDING( 
+    _LIBUNWIND_TRACE_UNWINDING(
         "unwind_phase2_forced(ex_ojb=%p): stop function returned %d",
-        (void *)exception_object, stopResult); 
-    if (stopResult != _URC_NO_REASON) { 
-      _LIBUNWIND_TRACE_UNWINDING( 
+        (void *)exception_object, stopResult);
+    if (stopResult != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2_forced(ex_ojb=%p): stopped by stop function",
-          (void *)exception_object); 
-      return _URC_FATAL_PHASE2_ERROR; 
-    } 
- 
+          (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
     ++framesWalked;
-    // If there is a personality routine, tell it we are unwinding. 
-    if (frameInfo.handler != 0) { 
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
           (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler);
-      _LIBUNWIND_TRACE_UNWINDING( 
+      _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2_forced(ex_ojb=%p): calling personality function %p",
-          (void *)exception_object, (void *)(uintptr_t)p); 
-      _Unwind_Reason_Code personalityResult = 
-          (*p)(1, action, exception_object->exception_class, exception_object, 
+          (void *)exception_object, (void *)(uintptr_t)p);
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
                (struct _Unwind_Context *)(cursor));
-      switch (personalityResult) { 
-      case _URC_CONTINUE_UNWIND: 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
-                                   "personality returned " 
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned "
                                    "_URC_CONTINUE_UNWIND",
-                                   (void *)exception_object); 
-        // Destructors called, continue unwinding 
-        break; 
-      case _URC_INSTALL_CONTEXT: 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
-                                   "personality returned " 
+                                   (void *)exception_object);
+        // Destructors called, continue unwinding
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned "
                                    "_URC_INSTALL_CONTEXT",
-                                   (void *)exception_object); 
-        // We may get control back if landing pad calls _Unwind_Resume(). 
+                                   (void *)exception_object);
+        // We may get control back if landing pad calls _Unwind_Resume().
         __unw_phase2_resume(cursor, framesWalked);
-        break; 
-      default: 
-        // Personality routine returned an unknown result code. 
-        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " 
-                                   "personality returned %d, " 
+        break;
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned %d, "
                                    "_URC_FATAL_PHASE2_ERROR",
-                                   (void *)exception_object, personalityResult); 
-        return _URC_FATAL_PHASE2_ERROR; 
-      } 
-    } 
-  } 
- 
-  // Call stop function one last time and tell it we've reached the end 
-  // of the stack. 
-  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " 
+                                   (void *)exception_object, personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+  }
+
+  // Call stop function one last time and tell it we've reached the end
+  // of the stack.
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop "
                              "function with _UA_END_OF_STACK",
-                             (void *)exception_object); 
-  _Unwind_Action lastAction = 
-      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); 
-  (*stop)(1, lastAction, exception_object->exception_class, exception_object, 
+                             (void *)exception_object);
+  _Unwind_Action lastAction =
+      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK);
+  (*stop)(1, lastAction, exception_object->exception_class, exception_object,
           (struct _Unwind_Context *)(cursor), stop_parameter);
- 
-  // Clean up phase did not resume at the frame that the search phase said it 
-  // would. 
-  return _URC_FATAL_PHASE2_ERROR; 
-} 
- 
- 
-/// Called by __cxa_throw.  Only returns if there is a fatal error. 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_RaiseException(_Unwind_Exception *exception_object) { 
+
+  // Clean up phase did not resume at the frame that the search phase said it
+  // would.
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+
+/// Called by __cxa_throw.  Only returns if there is a fatal error.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_RaiseException(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
-                       (void *)exception_object); 
-  unw_context_t uc; 
+                       (void *)exception_object);
+  unw_context_t uc;
   unw_cursor_t cursor;
   __unw_getcontext(&uc);
- 
-  // Mark that this is a non-forced unwind, so _Unwind_Resume() 
-  // can do the right thing. 
-  exception_object->private_1 = 0; 
-  exception_object->private_2 = 0; 
- 
-  // phase 1: the search phase 
+
+  // Mark that this is a non-forced unwind, so _Unwind_Resume()
+  // can do the right thing.
+  exception_object->private_1 = 0;
+  exception_object->private_2 = 0;
+
+  // phase 1: the search phase
   _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
-  if (phase1 != _URC_NO_REASON) 
-    return phase1; 
- 
-  // phase 2: the clean up phase 
+  if (phase1 != _URC_NO_REASON)
+    return phase1;
+
+  // phase 2: the clean up phase
   return unwind_phase2(&uc, &cursor, exception_object);
-} 
- 
- 
- 
-/// When _Unwind_RaiseException() is in phase2, it hands control 
-/// to the personality function at each frame.  The personality 
-/// may force a jump to a landing pad in that function, the landing 
-/// pad code may then call _Unwind_Resume() to continue with the 
-/// unwinding.  Note: the call to _Unwind_Resume() is from compiler 
-/// geneated user code.  All other _Unwind_* routines are called 
-/// by the C++ runtime __cxa_* routines. 
-/// 
-/// Note: re-throwing an exception (as opposed to continuing the unwind) 
-/// is implemented by having the code call __cxa_rethrow() which 
-/// in turn calls _Unwind_Resume_or_Rethrow(). 
-_LIBUNWIND_EXPORT void 
-_Unwind_Resume(_Unwind_Exception *exception_object) { 
+}
+
+
+
+/// When _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function, the landing
+/// pad code may then call _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to _Unwind_Resume() is from compiler
+/// geneated user code.  All other _Unwind_* routines are called
+/// by the C++ runtime __cxa_* routines.
+///
+/// Note: re-throwing an exception (as opposed to continuing the unwind)
+/// is implemented by having the code call __cxa_rethrow() which
+/// in turn calls _Unwind_Resume_or_Rethrow().
+_LIBUNWIND_EXPORT void
+_Unwind_Resume(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object);
-  unw_context_t uc; 
+  unw_context_t uc;
   unw_cursor_t cursor;
   __unw_getcontext(&uc);
- 
-  if (exception_object->private_1 != 0) 
+
+  if (exception_object->private_1 != 0)
     unwind_phase2_forced(&uc, &cursor, exception_object,
-                         (_Unwind_Stop_Fn) exception_object->private_1, 
-                         (void *)exception_object->private_2); 
-  else 
+                         (_Unwind_Stop_Fn) exception_object->private_1,
+                         (void *)exception_object->private_2);
+  else
     unwind_phase2(&uc, &cursor, exception_object);
- 
-  // Clients assume _Unwind_Resume() does not return, so all we can do is abort. 
-  _LIBUNWIND_ABORT("_Unwind_Resume() can't return"); 
-} 
- 
- 
- 
-/// Not used by C++. 
-/// Unwinds stack, calling "stop" function at each frame. 
-/// Could be used to implement longjmp(). 
-_LIBUNWIND_EXPORT _Unwind_Reason_Code 
-_Unwind_ForcedUnwind(_Unwind_Exception *exception_object, 
-                     _Unwind_Stop_Fn stop, void *stop_parameter) { 
+
+  // Clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_Resume() can't return");
+}
+
+
+
+/// Not used by C++.
+/// Unwinds stack, calling "stop" function at each frame.
+/// Could be used to implement longjmp().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_ForcedUnwind(_Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
   _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)",
-                       (void *)exception_object, (void *)(uintptr_t)stop); 
-  unw_context_t uc; 
+                       (void *)exception_object, (void *)(uintptr_t)stop);
+  unw_context_t uc;
   unw_cursor_t cursor;
   __unw_getcontext(&uc);
- 
-  // Mark that this is a forced unwind, so _Unwind_Resume() can do 
-  // the right thing. 
-  exception_object->private_1 = (uintptr_t) stop; 
-  exception_object->private_2 = (uintptr_t) stop_parameter; 
- 
-  // do it 
+
+  // Mark that this is a forced unwind, so _Unwind_Resume() can do
+  // the right thing.
+  exception_object->private_1 = (uintptr_t) stop;
+  exception_object->private_2 = (uintptr_t) stop_parameter;
+
+  // do it
   return unwind_phase2_forced(&uc, &cursor, exception_object, stop, stop_parameter);
-} 
- 
- 
-/// Called by personality handler during phase 2 to get LSDA for current frame. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_proc_info_t frameInfo; 
-  uintptr_t result = 0; 
+}
+
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
   if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
-    result = (uintptr_t)frameInfo.lsda; 
-  _LIBUNWIND_TRACE_API( 
+    result = (uintptr_t)frameInfo.lsda;
+  _LIBUNWIND_TRACE_API(
       "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR,
-      (void *)context, result); 
-  if (result != 0) { 
-    if (*((uint8_t *)result) != 0xFF) 
+      (void *)context, result);
+  if (result != 0) {
+    if (*((uint8_t *)result) != 0xFF)
       _LIBUNWIND_DEBUG_LOG("lsda at 0x%" PRIxPTR " does not start with 0xFF",
-                           result); 
-  } 
-  return result; 
-} 
- 
- 
-/// Called by personality handler during phase 2 to find the start of the 
-/// function. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetRegionStart(struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_proc_info_t frameInfo; 
-  uintptr_t result = 0; 
+                           result);
+  }
+  return result;
+}
+
+
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
   if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
-    result = (uintptr_t)frameInfo.start_ip; 
+    result = (uintptr_t)frameInfo.start_ip;
   _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR,
-                       (void *)context, result); 
-  return result; 
-} 
- 
+                       (void *)context, result);
+  return result;
+}
+
 #endif // !_LIBUNWIND_SUPPORT_SEH_UNWIND
- 
-/// Called by personality handler during phase 2 if a foreign exception 
-// is caught. 
-_LIBUNWIND_EXPORT void 
-_Unwind_DeleteException(_Unwind_Exception *exception_object) { 
+
+/// Called by personality handler during phase 2 if a foreign exception
+// is caught.
+_LIBUNWIND_EXPORT void
+_Unwind_DeleteException(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
-                       (void *)exception_object); 
-  if (exception_object->exception_cleanup != NULL) 
-    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, 
-                                           exception_object); 
-} 
- 
-/// Called by personality handler during phase 2 to get register values. 
-_LIBUNWIND_EXPORT uintptr_t 
-_Unwind_GetGR(struct _Unwind_Context *context, int index) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_word_t result; 
+                       (void *)exception_object);
+  if (exception_object->exception_cleanup != NULL)
+    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
+                                           exception_object);
+}
+
+/// Called by personality handler during phase 2 to get register values.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetGR(struct _Unwind_Context *context, int index) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_word_t result;
   __unw_get_reg(cursor, index, &result);
   _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d) => 0x%" PRIxPTR,
                        (void *)context, index, result);
-  return (uintptr_t)result; 
-} 
- 
-/// Called by personality handler during phase 2 to alter register values. 
-_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, 
-                                     uintptr_t value) { 
+  return (uintptr_t)result;
+}
+
+/// Called by personality handler during phase 2 to alter register values.
+_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                                     uintptr_t value) {
   _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%0" PRIxPTR
                        ")",
                        (void *)context, index, value);
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
   __unw_set_reg(cursor, index, value);
-} 
- 
-/// Called by personality handler during phase 2 to get instruction pointer. 
-_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { 
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
-  unw_word_t result; 
+}
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_word_t result;
   __unw_get_reg(cursor, UNW_REG_IP, &result);
   _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR,
                        (void *)context, result);
-  return (uintptr_t)result; 
-} 
- 
-/// Called by personality handler during phase 2 to alter instruction pointer, 
-/// such as setting where the landing pad is, so _Unwind_Resume() will 
-/// start executing in the landing pad. 
-_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, 
-                                     uintptr_t value) { 
+  return (uintptr_t)result;
+}
+
+/// Called by personality handler during phase 2 to alter instruction pointer,
+/// such as setting where the landing pad is, so _Unwind_Resume() will
+/// start executing in the landing pad.
+_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context,
+                                     uintptr_t value) {
   _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%0" PRIxPTR ")",
                        (void *)context, value);
-  unw_cursor_t *cursor = (unw_cursor_t *)context; 
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
   __unw_set_reg(cursor, UNW_REG_IP, value);
-} 
- 
+}
+
 #endif // !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__)
diff --git a/contrib/libs/libunwind/src/UnwindRegistersRestore.S b/contrib/libs/libunwind/src/UnwindRegistersRestore.S
index 0c10e2b0e4..1df97f5fc4 100644
--- a/contrib/libs/libunwind/src/UnwindRegistersRestore.S
+++ b/contrib/libs/libunwind/src/UnwindRegistersRestore.S
@@ -1,67 +1,67 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "assembly.h" 
- 
-  .text 
- 
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+  .text
+
 #if !defined(__USING_SJLJ_EXCEPTIONS__)
 
-#if defined(__i386__) 
+#if defined(__i386__)
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
-# 
+#
 # extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *);
-# 
-# On entry: 
-#  +                       + 
-#  +-----------------------+ 
-#  + thread_state pointer  + 
-#  +-----------------------+ 
-#  + return address        + 
-#  +-----------------------+   <-- SP 
-#  +                       + 
+#
+# On entry:
+#  +                       +
+#  +-----------------------+
+#  + thread_state pointer  +
+#  +-----------------------+
+#  + return address        +
+#  +-----------------------+   <-- SP
+#  +                       +
 
   _LIBUNWIND_CET_ENDBR
-  movl   4(%esp), %eax 
-  # set up eax and ret on new stack location 
-  movl  28(%eax), %edx # edx holds new stack pointer 
-  subl  $8,%edx 
-  movl  %edx, 28(%eax) 
-  movl  0(%eax), %ebx 
-  movl  %ebx, 0(%edx) 
-  movl  40(%eax), %ebx 
-  movl  %ebx, 4(%edx) 
-  # we now have ret and eax pushed onto where new stack will be 
-  # restore all registers 
-  movl   4(%eax), %ebx 
-  movl   8(%eax), %ecx 
-  movl  12(%eax), %edx 
-  movl  16(%eax), %edi 
-  movl  20(%eax), %esi 
-  movl  24(%eax), %ebp 
-  movl  28(%eax), %esp 
-  # skip ss 
-  # skip eflags 
-  pop    %eax  # eax was already pushed on new stack 
+  movl   4(%esp), %eax
+  # set up eax and ret on new stack location
+  movl  28(%eax), %edx # edx holds new stack pointer
+  subl  $8,%edx
+  movl  %edx, 28(%eax)
+  movl  0(%eax), %ebx
+  movl  %ebx, 0(%edx)
+  movl  40(%eax), %ebx
+  movl  %ebx, 4(%edx)
+  # we now have ret and eax pushed onto where new stack will be
+  # restore all registers
+  movl   4(%eax), %ebx
+  movl   8(%eax), %ecx
+  movl  12(%eax), %edx
+  movl  16(%eax), %edi
+  movl  20(%eax), %esi
+  movl  24(%eax), %ebp
+  movl  28(%eax), %esp
+  # skip ss
+  # skip eflags
+  pop    %eax  # eax was already pushed on new stack
   pop    %ecx
   jmp    *%ecx
-  # skip cs 
-  # skip ds 
-  # skip es 
-  # skip fs 
-  # skip gs 
- 
-#elif defined(__x86_64__) 
- 
+  # skip cs
+  # skip ds
+  # skip es
+  # skip fs
+  # skip gs
+
+#elif defined(__x86_64__)
+
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
-# 
+#
 # extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *);
-# 
+#
 #if defined(_WIN64)
 # On entry, thread_state pointer is in rcx; move it into rdi
 # to share restore code below. Since this routine restores and
@@ -70,38 +70,38 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
 # mustn't clobber some of them.
   movq  %rcx, %rdi
 #else
-# On entry, thread_state pointer is in rdi 
+# On entry, thread_state pointer is in rdi
 #endif
- 
+
   _LIBUNWIND_CET_ENDBR
-  movq  56(%rdi), %rax # rax holds new stack pointer 
-  subq  $16, %rax 
-  movq  %rax, 56(%rdi) 
-  movq  32(%rdi), %rbx  # store new rdi on new stack 
-  movq  %rbx, 0(%rax) 
-  movq  128(%rdi), %rbx # store new rip on new stack 
-  movq  %rbx, 8(%rax) 
-  # restore all registers 
-  movq    0(%rdi), %rax 
-  movq    8(%rdi), %rbx 
-  movq   16(%rdi), %rcx 
-  movq   24(%rdi), %rdx 
-  # restore rdi later 
-  movq   40(%rdi), %rsi 
-  movq   48(%rdi), %rbp 
-  # restore rsp later 
-  movq   64(%rdi), %r8 
-  movq   72(%rdi), %r9 
-  movq   80(%rdi), %r10 
-  movq   88(%rdi), %r11 
-  movq   96(%rdi), %r12 
-  movq  104(%rdi), %r13 
-  movq  112(%rdi), %r14 
-  movq  120(%rdi), %r15 
-  # skip rflags 
-  # skip cs 
-  # skip fs 
-  # skip gs 
+  movq  56(%rdi), %rax # rax holds new stack pointer
+  subq  $16, %rax
+  movq  %rax, 56(%rdi)
+  movq  32(%rdi), %rbx  # store new rdi on new stack
+  movq  %rbx, 0(%rax)
+  movq  128(%rdi), %rbx # store new rip on new stack
+  movq  %rbx, 8(%rax)
+  # restore all registers
+  movq    0(%rdi), %rax
+  movq    8(%rdi), %rbx
+  movq   16(%rdi), %rcx
+  movq   24(%rdi), %rdx
+  # restore rdi later
+  movq   40(%rdi), %rsi
+  movq   48(%rdi), %rbp
+  # restore rsp later
+  movq   64(%rdi), %r8
+  movq   72(%rdi), %r9
+  movq   80(%rdi), %r10
+  movq   88(%rdi), %r11
+  movq   96(%rdi), %r12
+  movq  104(%rdi), %r13
+  movq  112(%rdi), %r14
+  movq  120(%rdi), %r15
+  # skip rflags
+  # skip cs
+  # skip fs
+  # skip gs
 
 #if defined(_WIN64)
   movdqu 176(%rdi),%xmm0
@@ -121,12 +121,12 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
   movdqu 400(%rdi),%xmm14
   movdqu 416(%rdi),%xmm15
 #endif
-  movq  56(%rdi), %rsp  # cut back rsp to new location 
-  pop    %rdi      # rdi was saved here earlier 
+  movq  56(%rdi), %rsp  # cut back rsp to new location
+  pop    %rdi      # rdi was saved here earlier
   pop    %rcx
   jmpq   *%rcx
- 
- 
+
+
 #elif defined(__powerpc64__)
 
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_ppc646jumptoEv)
@@ -395,7 +395,7 @@ Lnovec:
   bctr
 
 #elif defined(__powerpc__)
- 
+
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
 //
 // void libunwind::Registers_ppc::jumpto()
@@ -403,7 +403,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
 // On entry:
 //  thread_state pointer is in r3
 //
- 
+
   // restore integral registerrs
   // skip r0 for now
   // skip r1 for now
@@ -437,7 +437,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
   lwz     29,124(3)
   lwz     30,128(3)
   lwz     31,132(3)
- 
+
 #ifndef __NO_FPRS__
   // restore float registers
   lfd     0, 160(3)
@@ -473,18 +473,18 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
   lfd     30,400(3)
   lfd     31,408(3)
 #endif
- 
+
 #if defined(__ALTIVEC__)
   // restore vector registers if any are in use
   lwz     5, 156(3)       // test VRsave
   cmpwi   5, 0
   beq     Lnovec
- 
+
   subi    4, 1, 16
   rlwinm  4, 4, 0, 0, 27  // mask low 4-bits
   // r4 is now a 16-byte aligned pointer into the red zone
   // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer
- 
+
 
 #define LOAD_VECTOR_UNALIGNEDl(_index)          \
   andis.  0, 5, (1 PPC_LEFT_SHIFT(15-_index)) SEPARATOR \
@@ -499,7 +499,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
   stw     0, 12(%r4)                  SEPARATOR \
   lvx     _index, 0, 4                SEPARATOR \
   Ldone ## _index:
- 
+
 #define LOAD_VECTOR_UNALIGNEDh(_index)          \
   andi.   0, 5, (1 PPC_LEFT_SHIFT(31-_index)) SEPARATOR \
   beq     Ldone ## _index             SEPARATOR \
@@ -513,43 +513,43 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
   stw     0, 12(4)                    SEPARATOR \
   lvx     _index, 0, 4                SEPARATOR \
   Ldone ## _index:
- 
- 
-  LOAD_VECTOR_UNALIGNEDl(0) 
-  LOAD_VECTOR_UNALIGNEDl(1) 
-  LOAD_VECTOR_UNALIGNEDl(2) 
-  LOAD_VECTOR_UNALIGNEDl(3) 
-  LOAD_VECTOR_UNALIGNEDl(4) 
-  LOAD_VECTOR_UNALIGNEDl(5) 
-  LOAD_VECTOR_UNALIGNEDl(6) 
-  LOAD_VECTOR_UNALIGNEDl(7) 
-  LOAD_VECTOR_UNALIGNEDl(8) 
-  LOAD_VECTOR_UNALIGNEDl(9) 
-  LOAD_VECTOR_UNALIGNEDl(10) 
-  LOAD_VECTOR_UNALIGNEDl(11) 
-  LOAD_VECTOR_UNALIGNEDl(12) 
-  LOAD_VECTOR_UNALIGNEDl(13) 
-  LOAD_VECTOR_UNALIGNEDl(14) 
-  LOAD_VECTOR_UNALIGNEDl(15) 
-  LOAD_VECTOR_UNALIGNEDh(16) 
-  LOAD_VECTOR_UNALIGNEDh(17) 
-  LOAD_VECTOR_UNALIGNEDh(18) 
-  LOAD_VECTOR_UNALIGNEDh(19) 
-  LOAD_VECTOR_UNALIGNEDh(20) 
-  LOAD_VECTOR_UNALIGNEDh(21) 
-  LOAD_VECTOR_UNALIGNEDh(22) 
-  LOAD_VECTOR_UNALIGNEDh(23) 
-  LOAD_VECTOR_UNALIGNEDh(24) 
-  LOAD_VECTOR_UNALIGNEDh(25) 
-  LOAD_VECTOR_UNALIGNEDh(26) 
-  LOAD_VECTOR_UNALIGNEDh(27) 
-  LOAD_VECTOR_UNALIGNEDh(28) 
-  LOAD_VECTOR_UNALIGNEDh(29) 
-  LOAD_VECTOR_UNALIGNEDh(30) 
-  LOAD_VECTOR_UNALIGNEDh(31) 
+
+
+  LOAD_VECTOR_UNALIGNEDl(0)
+  LOAD_VECTOR_UNALIGNEDl(1)
+  LOAD_VECTOR_UNALIGNEDl(2)
+  LOAD_VECTOR_UNALIGNEDl(3)
+  LOAD_VECTOR_UNALIGNEDl(4)
+  LOAD_VECTOR_UNALIGNEDl(5)
+  LOAD_VECTOR_UNALIGNEDl(6)
+  LOAD_VECTOR_UNALIGNEDl(7)
+  LOAD_VECTOR_UNALIGNEDl(8)
+  LOAD_VECTOR_UNALIGNEDl(9)
+  LOAD_VECTOR_UNALIGNEDl(10)
+  LOAD_VECTOR_UNALIGNEDl(11)
+  LOAD_VECTOR_UNALIGNEDl(12)
+  LOAD_VECTOR_UNALIGNEDl(13)
+  LOAD_VECTOR_UNALIGNEDl(14)
+  LOAD_VECTOR_UNALIGNEDl(15)
+  LOAD_VECTOR_UNALIGNEDh(16)
+  LOAD_VECTOR_UNALIGNEDh(17)
+  LOAD_VECTOR_UNALIGNEDh(18)
+  LOAD_VECTOR_UNALIGNEDh(19)
+  LOAD_VECTOR_UNALIGNEDh(20)
+  LOAD_VECTOR_UNALIGNEDh(21)
+  LOAD_VECTOR_UNALIGNEDh(22)
+  LOAD_VECTOR_UNALIGNEDh(23)
+  LOAD_VECTOR_UNALIGNEDh(24)
+  LOAD_VECTOR_UNALIGNEDh(25)
+  LOAD_VECTOR_UNALIGNEDh(26)
+  LOAD_VECTOR_UNALIGNEDh(27)
+  LOAD_VECTOR_UNALIGNEDh(28)
+  LOAD_VECTOR_UNALIGNEDh(29)
+  LOAD_VECTOR_UNALIGNEDh(30)
+  LOAD_VECTOR_UNALIGNEDh(31)
 #endif
- 
-Lnovec: 
+
+Lnovec:
   lwz     0, 136(3)   // __cr
   mtcr    0
   lwz     0, 148(3)   // __ctr
@@ -561,79 +561,79 @@ Lnovec:
   lwz     4,  24(3)   // do r4 now
   lwz     1,  12(3)   // do sp now
   lwz     3,  20(3)   // do r3 last
-  bctr 
- 
+  bctr
+
 #elif defined(__aarch64__)
- 
-// 
+
+//
 // extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
-// 
-// On entry: 
-//  thread_state pointer is in x0 
-// 
-  .p2align 2 
+//
+// On entry:
+//  thread_state pointer is in x0
+//
+  .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
-  // skip restore of x0,x1 for now 
-  ldp    x2, x3,  [x0, #0x010] 
-  ldp    x4, x5,  [x0, #0x020] 
-  ldp    x6, x7,  [x0, #0x030] 
-  ldp    x8, x9,  [x0, #0x040] 
-  ldp    x10,x11, [x0, #0x050] 
-  ldp    x12,x13, [x0, #0x060] 
-  ldp    x14,x15, [x0, #0x070] 
+  // skip restore of x0,x1 for now
+  ldp    x2, x3,  [x0, #0x010]
+  ldp    x4, x5,  [x0, #0x020]
+  ldp    x6, x7,  [x0, #0x030]
+  ldp    x8, x9,  [x0, #0x040]
+  ldp    x10,x11, [x0, #0x050]
+  ldp    x12,x13, [x0, #0x060]
+  ldp    x14,x15, [x0, #0x070]
   // x16 and x17 were clobbered by the call into the unwinder, so no point in
   // restoring them.
-  ldp    x18,x19, [x0, #0x090] 
-  ldp    x20,x21, [x0, #0x0A0] 
-  ldp    x22,x23, [x0, #0x0B0] 
-  ldp    x24,x25, [x0, #0x0C0] 
-  ldp    x26,x27, [x0, #0x0D0] 
+  ldp    x18,x19, [x0, #0x090]
+  ldp    x20,x21, [x0, #0x0A0]
+  ldp    x22,x23, [x0, #0x0B0]
+  ldp    x24,x25, [x0, #0x0C0]
+  ldp    x26,x27, [x0, #0x0D0]
   ldp    x28,x29, [x0, #0x0E0]
   ldr    x30,     [x0, #0x100]  // restore pc into lr
- 
-  ldp    d0, d1,  [x0, #0x110] 
-  ldp    d2, d3,  [x0, #0x120] 
-  ldp    d4, d5,  [x0, #0x130] 
-  ldp    d6, d7,  [x0, #0x140] 
-  ldp    d8, d9,  [x0, #0x150] 
-  ldp    d10,d11, [x0, #0x160] 
-  ldp    d12,d13, [x0, #0x170] 
-  ldp    d14,d15, [x0, #0x180] 
-  ldp    d16,d17, [x0, #0x190] 
-  ldp    d18,d19, [x0, #0x1A0] 
-  ldp    d20,d21, [x0, #0x1B0] 
-  ldp    d22,d23, [x0, #0x1C0] 
-  ldp    d24,d25, [x0, #0x1D0] 
-  ldp    d26,d27, [x0, #0x1E0] 
-  ldp    d28,d29, [x0, #0x1F0] 
-  ldr    d30,     [x0, #0x200] 
-  ldr    d31,     [x0, #0x208] 
- 
+
+  ldp    d0, d1,  [x0, #0x110]
+  ldp    d2, d3,  [x0, #0x120]
+  ldp    d4, d5,  [x0, #0x130]
+  ldp    d6, d7,  [x0, #0x140]
+  ldp    d8, d9,  [x0, #0x150]
+  ldp    d10,d11, [x0, #0x160]
+  ldp    d12,d13, [x0, #0x170]
+  ldp    d14,d15, [x0, #0x180]
+  ldp    d16,d17, [x0, #0x190]
+  ldp    d18,d19, [x0, #0x1A0]
+  ldp    d20,d21, [x0, #0x1B0]
+  ldp    d22,d23, [x0, #0x1C0]
+  ldp    d24,d25, [x0, #0x1D0]
+  ldp    d26,d27, [x0, #0x1E0]
+  ldp    d28,d29, [x0, #0x1F0]
+  ldr    d30,     [x0, #0x200]
+  ldr    d31,     [x0, #0x208]
+
   // Finally, restore sp. This must be done after the the last read from the
   // context struct, because it is allocated on the stack, and an exception
   // could clobber the de-allocated portion of the stack after sp has been
   // restored.
   ldr    x16,     [x0, #0x0F8]
-  ldp    x0, x1,  [x0, #0x000]  // restore x0,x1 
+  ldp    x0, x1,  [x0, #0x000]  // restore x0,x1
   mov    sp,x16                 // restore sp
   ret    x30                    // jump to pc
- 
-#elif defined(__arm__) && !defined(__APPLE__) 
- 
-#if !defined(__ARM_ARCH_ISA_ARM) 
+
+#elif defined(__arm__) && !defined(__APPLE__)
+
+#if !defined(__ARM_ARCH_ISA_ARM)
 #if (__ARM_ARCH_ISA_THUMB == 2)
   .syntax unified
 #endif
-  .thumb 
-#endif 
- 
-@ 
-@ void libunwind::Registers_arm::restoreCoreAndJumpTo() 
-@ 
-@ On entry: 
-@  thread_state pointer is in r0 
-@ 
-  .p2align 2 
+  .thumb
+#endif
+
+@
+@ void libunwind::Registers_arm::restoreCoreAndJumpTo()
+@
+@ On entry:
+@  thread_state pointer is in r0
+@
+  .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv)
 #if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1
   @ r8-r11: ldm into r1-r4, then mov to r8-r11
@@ -647,180 +647,180 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv)
   @ r12 does not need loading, it it the intra-procedure-call scratch register
   ldr r2, [r0, #0x34]
   ldr r3, [r0, #0x3c]
-  mov sp, r2 
-  mov lr, r3         @ restore pc into lr 
-  ldm r0, {r0-r7} 
-#else 
-  @ Use lr as base so that r0 can be restored. 
-  mov lr, r0 
-  @ 32bit thumb-2 restrictions for ldm: 
-  @ . the sp (r13) cannot be in the list 
-  @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction 
-  ldm lr, {r0-r12} 
-  ldr sp, [lr, #52] 
-  ldr lr, [lr, #60]  @ restore pc into lr 
-#endif 
+  mov sp, r2
+  mov lr, r3         @ restore pc into lr
+  ldm r0, {r0-r7}
+#else
+  @ Use lr as base so that r0 can be restored.
+  mov lr, r0
+  @ 32bit thumb-2 restrictions for ldm:
+  @ . the sp (r13) cannot be in the list
+  @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction
+  ldm lr, {r0-r12}
+  ldr sp, [lr, #52]
+  ldr lr, [lr, #60]  @ restore pc into lr
+#endif
 #if defined(__ARM_FEATURE_BTI_DEFAULT) && !defined(__ARM_ARCH_ISA_ARM)
   // 'bx' is not BTI setting when used with lr, therefore r12 is used instead
   mov r12, lr
   JMP(r12)
 #else
-  JMP(lr) 
+  JMP(lr)
 #endif
- 
-@ 
-@ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+
+@
+@ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
-  .fpu vfpv3-d16 
+  .fpu vfpv3-d16
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPv)
-  @ VFP and iwMMX instructions are only available when compiling with the flags 
-  @ that enable them. We do not want to do that in the library (because we do not 
-  @ want the compiler to generate instructions that access those) but this is 
-  @ only accessed if the personality routine needs these registers. Use of 
-  @ these registers implies they are, actually, available on the target, so 
-  @ it's ok to execute. 
-  @ So, generate the instruction using the corresponding coprocessor mnemonic. 
-  vldmia r0, {d0-d15} 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+  @ VFP and iwMMX instructions are only available when compiling with the flags
+  @ that enable them. We do not want to do that in the library (because we do not
+  @ want the compiler to generate instructions that access those) but this is
+  @ only accessed if the personality routine needs these registers. Use of
+  @ these registers implies they are, actually, available on the target, so
+  @ it's ok to execute.
+  @ So, generate the instruction using the corresponding coprocessor mnemonic.
+  vldmia r0, {d0-d15}
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
-  .fpu vfpv3-d16 
+  .fpu vfpv3-d16
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPv)
-  vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+  vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
-  .fpu vfpv3 
+  .fpu vfpv3
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPv)
-  vldmia r0, {d16-d31} 
-  JMP(lr) 
- 
+  vldmia r0, {d16-d31}
+  JMP(lr)
+
 #if defined(__ARM_WMMX)
 
-@ 
-@ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+@
+@ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
   .arch armv5te
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPv)
-  ldcl p1, cr0, [r0], #8  @ wldrd wR0, [r0], #8 
-  ldcl p1, cr1, [r0], #8  @ wldrd wR1, [r0], #8 
-  ldcl p1, cr2, [r0], #8  @ wldrd wR2, [r0], #8 
-  ldcl p1, cr3, [r0], #8  @ wldrd wR3, [r0], #8 
-  ldcl p1, cr4, [r0], #8  @ wldrd wR4, [r0], #8 
-  ldcl p1, cr5, [r0], #8  @ wldrd wR5, [r0], #8 
-  ldcl p1, cr6, [r0], #8  @ wldrd wR6, [r0], #8 
-  ldcl p1, cr7, [r0], #8  @ wldrd wR7, [r0], #8 
-  ldcl p1, cr8, [r0], #8  @ wldrd wR8, [r0], #8 
-  ldcl p1, cr9, [r0], #8  @ wldrd wR9, [r0], #8 
-  ldcl p1, cr10, [r0], #8  @ wldrd wR10, [r0], #8 
-  ldcl p1, cr11, [r0], #8  @ wldrd wR11, [r0], #8 
-  ldcl p1, cr12, [r0], #8  @ wldrd wR12, [r0], #8 
-  ldcl p1, cr13, [r0], #8  @ wldrd wR13, [r0], #8 
-  ldcl p1, cr14, [r0], #8  @ wldrd wR14, [r0], #8 
-  ldcl p1, cr15, [r0], #8  @ wldrd wR15, [r0], #8 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+  ldcl p1, cr0, [r0], #8  @ wldrd wR0, [r0], #8
+  ldcl p1, cr1, [r0], #8  @ wldrd wR1, [r0], #8
+  ldcl p1, cr2, [r0], #8  @ wldrd wR2, [r0], #8
+  ldcl p1, cr3, [r0], #8  @ wldrd wR3, [r0], #8
+  ldcl p1, cr4, [r0], #8  @ wldrd wR4, [r0], #8
+  ldcl p1, cr5, [r0], #8  @ wldrd wR5, [r0], #8
+  ldcl p1, cr6, [r0], #8  @ wldrd wR6, [r0], #8
+  ldcl p1, cr7, [r0], #8  @ wldrd wR7, [r0], #8
+  ldcl p1, cr8, [r0], #8  @ wldrd wR8, [r0], #8
+  ldcl p1, cr9, [r0], #8  @ wldrd wR9, [r0], #8
+  ldcl p1, cr10, [r0], #8  @ wldrd wR10, [r0], #8
+  ldcl p1, cr11, [r0], #8  @ wldrd wR11, [r0], #8
+  ldcl p1, cr12, [r0], #8  @ wldrd wR12, [r0], #8
+  ldcl p1, cr13, [r0], #8  @ wldrd wR13, [r0], #8
+  ldcl p1, cr14, [r0], #8  @ wldrd wR14, [r0], #8
+  ldcl p1, cr15, [r0], #8  @ wldrd wR15, [r0], #8
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
   .arch armv5te
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj)
-  ldc2 p1, cr8, [r0], #4  @ wldrw wCGR0, [r0], #4 
-  ldc2 p1, cr9, [r0], #4  @ wldrw wCGR1, [r0], #4 
-  ldc2 p1, cr10, [r0], #4  @ wldrw wCGR2, [r0], #4 
-  ldc2 p1, cr11, [r0], #4  @ wldrw wCGR3, [r0], #4 
-  JMP(lr) 
- 
+  ldc2 p1, cr8, [r0], #4  @ wldrw wCGR0, [r0], #4
+  ldc2 p1, cr9, [r0], #4  @ wldrw wCGR1, [r0], #4
+  ldc2 p1, cr10, [r0], #4  @ wldrw wCGR2, [r0], #4
+  ldc2 p1, cr11, [r0], #4  @ wldrw wCGR3, [r0], #4
+  JMP(lr)
+
 #endif
 
-#elif defined(__or1k__) 
- 
+#elif defined(__or1k__)
+
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv)
-# 
-# void libunwind::Registers_or1k::jumpto() 
-# 
-# On entry: 
-#  thread_state pointer is in r3 
-# 
- 
+#
+# void libunwind::Registers_or1k::jumpto()
+#
+# On entry:
+#  thread_state pointer is in r3
+#
+
   # restore integral registers
-  l.lwz     r0,  0(r3) 
-  l.lwz     r1,  4(r3) 
-  l.lwz     r2,  8(r3) 
-  # skip r3 for now 
-  l.lwz     r4, 16(r3) 
-  l.lwz     r5, 20(r3) 
-  l.lwz     r6, 24(r3) 
-  l.lwz     r7, 28(r3) 
-  l.lwz     r8, 32(r3) 
+  l.lwz     r0,  0(r3)
+  l.lwz     r1,  4(r3)
+  l.lwz     r2,  8(r3)
+  # skip r3 for now
+  l.lwz     r4, 16(r3)
+  l.lwz     r5, 20(r3)
+  l.lwz     r6, 24(r3)
+  l.lwz     r7, 28(r3)
+  l.lwz     r8, 32(r3)
   # skip r9
-  l.lwz    r10, 40(r3) 
-  l.lwz    r11, 44(r3) 
-  l.lwz    r12, 48(r3) 
-  l.lwz    r13, 52(r3) 
-  l.lwz    r14, 56(r3) 
-  l.lwz    r15, 60(r3) 
-  l.lwz    r16, 64(r3) 
-  l.lwz    r17, 68(r3) 
-  l.lwz    r18, 72(r3) 
-  l.lwz    r19, 76(r3) 
-  l.lwz    r20, 80(r3) 
-  l.lwz    r21, 84(r3) 
-  l.lwz    r22, 88(r3) 
-  l.lwz    r23, 92(r3) 
-  l.lwz    r24, 96(r3) 
-  l.lwz    r25,100(r3) 
-  l.lwz    r26,104(r3) 
-  l.lwz    r27,108(r3) 
-  l.lwz    r28,112(r3) 
-  l.lwz    r29,116(r3) 
-  l.lwz    r30,120(r3) 
-  l.lwz    r31,124(r3) 
- 
+  l.lwz    r10, 40(r3)
+  l.lwz    r11, 44(r3)
+  l.lwz    r12, 48(r3)
+  l.lwz    r13, 52(r3)
+  l.lwz    r14, 56(r3)
+  l.lwz    r15, 60(r3)
+  l.lwz    r16, 64(r3)
+  l.lwz    r17, 68(r3)
+  l.lwz    r18, 72(r3)
+  l.lwz    r19, 76(r3)
+  l.lwz    r20, 80(r3)
+  l.lwz    r21, 84(r3)
+  l.lwz    r22, 88(r3)
+  l.lwz    r23, 92(r3)
+  l.lwz    r24, 96(r3)
+  l.lwz    r25,100(r3)
+  l.lwz    r26,104(r3)
+  l.lwz    r27,108(r3)
+  l.lwz    r28,112(r3)
+  l.lwz    r29,116(r3)
+  l.lwz    r30,120(r3)
+  l.lwz    r31,124(r3)
+
   # load new pc into ra
   l.lwz    r9, 128(r3)
 
-  # at last, restore r3 
-  l.lwz    r3,  12(r3) 
- 
-  # jump to pc 
-  l.jr     r9 
-   l.nop 
- 
+  # at last, restore r3
+  l.lwz    r3,  12(r3)
+
+  # jump to pc
+  l.jr     r9
+   l.nop
+
 #elif defined(__hexagon__)
 # On entry:
 #  thread_state pointer is in r2
@@ -927,7 +927,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv)
   ldc1  $f29, (4 * 36 + 8 * 29)($4)
   ldc1  $f30, (4 * 36 + 8 * 30)($4)
   ldc1  $f31, (4 * 36 + 8 * 31)($4)
-#endif 
+#endif
 #endif
   // restore hi and lo
   lw    $8, (4 * 33)($4)
diff --git a/contrib/libs/libunwind/src/UnwindRegistersSave.S b/contrib/libs/libunwind/src/UnwindRegistersSave.S
index b6170bcdc6..9566bb0335 100644
--- a/contrib/libs/libunwind/src/UnwindRegistersSave.S
+++ b/contrib/libs/libunwind/src/UnwindRegistersSave.S
@@ -1,68 +1,68 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "assembly.h" 
- 
-    .text 
- 
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+    .text
+
 #if !defined(__USING_SJLJ_EXCEPTIONS__)
 
-#if defined(__i386__) 
- 
-# 
+#if defined(__i386__)
+
+#
 # extern int __unw_getcontext(unw_context_t* thread_state)
-# 
-# On entry: 
-#   +                       + 
-#   +-----------------------+ 
-#   + thread_state pointer  + 
-#   +-----------------------+ 
-#   + return address        + 
-#   +-----------------------+   <-- SP 
-#   +                       + 
-# 
+#
+# On entry:
+#   +                       +
+#   +-----------------------+
+#   + thread_state pointer  +
+#   +-----------------------+
+#   + return address        +
+#   +-----------------------+   <-- SP
+#   +                       +
+#
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
 
   _LIBUNWIND_CET_ENDBR
-  push  %eax 
-  movl  8(%esp), %eax 
-  movl  %ebx,  4(%eax) 
-  movl  %ecx,  8(%eax) 
-  movl  %edx, 12(%eax) 
-  movl  %edi, 16(%eax) 
-  movl  %esi, 20(%eax) 
-  movl  %ebp, 24(%eax) 
-  movl  %esp, %edx 
-  addl  $8, %edx 
-  movl  %edx, 28(%eax)  # store what sp was at call site as esp 
-  # skip ss 
-  # skip eflags 
-  movl  4(%esp), %edx 
-  movl  %edx, 40(%eax)  # store return address as eip 
-  # skip cs 
-  # skip ds 
-  # skip es 
-  # skip fs 
-  # skip gs 
-  movl  (%esp), %edx 
-  movl  %edx, (%eax)  # store original eax 
-  popl  %eax 
-  xorl  %eax, %eax    # return UNW_ESUCCESS 
-  ret 
- 
-#elif defined(__x86_64__) 
- 
-# 
+  push  %eax
+  movl  8(%esp), %eax
+  movl  %ebx,  4(%eax)
+  movl  %ecx,  8(%eax)
+  movl  %edx, 12(%eax)
+  movl  %edi, 16(%eax)
+  movl  %esi, 20(%eax)
+  movl  %ebp, 24(%eax)
+  movl  %esp, %edx
+  addl  $8, %edx
+  movl  %edx, 28(%eax)  # store what sp was at call site as esp
+  # skip ss
+  # skip eflags
+  movl  4(%esp), %edx
+  movl  %edx, 40(%eax)  # store return address as eip
+  # skip cs
+  # skip ds
+  # skip es
+  # skip fs
+  # skip gs
+  movl  (%esp), %edx
+  movl  %edx, (%eax)  # store original eax
+  popl  %eax
+  xorl  %eax, %eax    # return UNW_ESUCCESS
+  ret
+
+#elif defined(__x86_64__)
+
+#
 # extern int __unw_getcontext(unw_context_t* thread_state)
-# 
-# On entry: 
-#  thread_state pointer is in rdi 
-# 
+#
+# On entry:
+#  thread_state pointer is in rdi
+#
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
 #if defined(_WIN64)
 #define PTR %rcx
@@ -92,10 +92,10 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   movq  %r15,120(PTR)
   movq  (%rsp),TMP
   movq  TMP,128(PTR) # store return address as rip
-  # skip rflags 
-  # skip cs 
-  # skip fs 
-  # skip gs 
+  # skip rflags
+  # skip cs
+  # skip fs
+  # skip gs
 
 #if defined(_WIN64)
   movdqu %xmm0,176(PTR)
@@ -115,9 +115,9 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   movdqu %xmm14,400(PTR)
   movdqu %xmm15,416(PTR)
 #endif
-  xorl  %eax, %eax    # return UNW_ESUCCESS 
-  ret 
- 
+  xorl  %eax, %eax    # return UNW_ESUCCESS
+  ret
+
 #elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
 
 #
@@ -317,15 +317,15 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   or    $2, $0, $0
   .set pop
 
-# elif defined(__mips__) 
- 
-# 
+# elif defined(__mips__)
+
+#
 # extern int __unw_getcontext(unw_context_t* thread_state)
-# 
-# Just trap for the time being. 
+#
+# Just trap for the time being.
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
-  teq $0, $0 
- 
+  teq $0, $0
+
 #elif defined(__powerpc64__)
 
 //
@@ -560,7 +560,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
 
 
 #elif defined(__powerpc__)
- 
+
 //
 // extern int unw_getcontext(unw_context_t* thread_state)
 //
@@ -602,7 +602,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   stw     29,124(3)
   stw     30,128(3)
   stw     31,132(3)
- 
+
   // save VRSave register
   mfspr   0, 256
   stw     0, 156(3)
@@ -612,7 +612,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   // save CTR register
   mfctr   0
   stw     0, 148(3)
- 
+
 #if !defined(__NO_FPRS__)
   // save float registers
   stfd    0, 160(3)
@@ -648,15 +648,15 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   stfd    30,400(3)
   stfd    31,408(3)
 #endif
- 
+
 #if defined(__ALTIVEC__)
   // save vector registers
- 
+
   subi    4, 1, 16
   rlwinm  4, 4, 0, 0, 27  // mask low 4-bits
   // r4 is now a 16-byte aligned pointer into the red zone
- 
-#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \ 
+
+#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \
   stvx    _vec, 0, 4               SEPARATOR \
   lwz     5, 0(4)                  SEPARATOR \
   stw     5, _offset(3)            SEPARATOR \
@@ -666,7 +666,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   stw     5, _offset+8(3)          SEPARATOR \
   lwz     5, 12(4)                 SEPARATOR \
   stw     5, _offset+12(3)
- 
+
   SAVE_VECTOR_UNALIGNED( 0, 424+0x000)
   SAVE_VECTOR_UNALIGNED( 1, 424+0x010)
   SAVE_VECTOR_UNALIGNED( 2, 424+0x020)
@@ -700,83 +700,83 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   SAVE_VECTOR_UNALIGNED(30, 424+0x1E0)
   SAVE_VECTOR_UNALIGNED(31, 424+0x1F0)
 #endif
- 
+
   li      3, 0  // return UNW_ESUCCESS
-  blr 
- 
- 
+  blr
+
+
 #elif defined(__aarch64__)
- 
-// 
+
+//
 // extern int __unw_getcontext(unw_context_t* thread_state)
-// 
-// On entry: 
-//  thread_state pointer is in x0 
-// 
-  .p2align 2 
+//
+// On entry:
+//  thread_state pointer is in x0
+//
+  .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
-  stp    x0, x1,  [x0, #0x000] 
-  stp    x2, x3,  [x0, #0x010] 
-  stp    x4, x5,  [x0, #0x020] 
-  stp    x6, x7,  [x0, #0x030] 
-  stp    x8, x9,  [x0, #0x040] 
-  stp    x10,x11, [x0, #0x050] 
-  stp    x12,x13, [x0, #0x060] 
-  stp    x14,x15, [x0, #0x070] 
-  stp    x16,x17, [x0, #0x080] 
-  stp    x18,x19, [x0, #0x090] 
-  stp    x20,x21, [x0, #0x0A0] 
-  stp    x22,x23, [x0, #0x0B0] 
-  stp    x24,x25, [x0, #0x0C0] 
-  stp    x26,x27, [x0, #0x0D0] 
+  stp    x0, x1,  [x0, #0x000]
+  stp    x2, x3,  [x0, #0x010]
+  stp    x4, x5,  [x0, #0x020]
+  stp    x6, x7,  [x0, #0x030]
+  stp    x8, x9,  [x0, #0x040]
+  stp    x10,x11, [x0, #0x050]
+  stp    x12,x13, [x0, #0x060]
+  stp    x14,x15, [x0, #0x070]
+  stp    x16,x17, [x0, #0x080]
+  stp    x18,x19, [x0, #0x090]
+  stp    x20,x21, [x0, #0x0A0]
+  stp    x22,x23, [x0, #0x0B0]
+  stp    x24,x25, [x0, #0x0C0]
+  stp    x26,x27, [x0, #0x0D0]
   stp    x28,x29, [x0, #0x0E0]
   str    x30,     [x0, #0x0F0]
-  mov    x1,sp 
-  str    x1,      [x0, #0x0F8] 
+  mov    x1,sp
+  str    x1,      [x0, #0x0F8]
   str    x30,     [x0, #0x100]    // store return address as pc
-  // skip cpsr 
-  stp    d0, d1,  [x0, #0x110] 
-  stp    d2, d3,  [x0, #0x120] 
-  stp    d4, d5,  [x0, #0x130] 
-  stp    d6, d7,  [x0, #0x140] 
-  stp    d8, d9,  [x0, #0x150] 
-  stp    d10,d11, [x0, #0x160] 
-  stp    d12,d13, [x0, #0x170] 
-  stp    d14,d15, [x0, #0x180] 
-  stp    d16,d17, [x0, #0x190] 
-  stp    d18,d19, [x0, #0x1A0] 
-  stp    d20,d21, [x0, #0x1B0] 
-  stp    d22,d23, [x0, #0x1C0] 
-  stp    d24,d25, [x0, #0x1D0] 
-  stp    d26,d27, [x0, #0x1E0] 
-  stp    d28,d29, [x0, #0x1F0] 
-  str    d30,     [x0, #0x200] 
-  str    d31,     [x0, #0x208] 
-  mov    x0, #0                   // return UNW_ESUCCESS 
-  ret 
- 
-#elif defined(__arm__) && !defined(__APPLE__) 
- 
-#if !defined(__ARM_ARCH_ISA_ARM) 
+  // skip cpsr
+  stp    d0, d1,  [x0, #0x110]
+  stp    d2, d3,  [x0, #0x120]
+  stp    d4, d5,  [x0, #0x130]
+  stp    d6, d7,  [x0, #0x140]
+  stp    d8, d9,  [x0, #0x150]
+  stp    d10,d11, [x0, #0x160]
+  stp    d12,d13, [x0, #0x170]
+  stp    d14,d15, [x0, #0x180]
+  stp    d16,d17, [x0, #0x190]
+  stp    d18,d19, [x0, #0x1A0]
+  stp    d20,d21, [x0, #0x1B0]
+  stp    d22,d23, [x0, #0x1C0]
+  stp    d24,d25, [x0, #0x1D0]
+  stp    d26,d27, [x0, #0x1E0]
+  stp    d28,d29, [x0, #0x1F0]
+  str    d30,     [x0, #0x200]
+  str    d31,     [x0, #0x208]
+  mov    x0, #0                   // return UNW_ESUCCESS
+  ret
+
+#elif defined(__arm__) && !defined(__APPLE__)
+
+#if !defined(__ARM_ARCH_ISA_ARM)
 #if (__ARM_ARCH_ISA_THUMB == 2)
   .syntax unified
 #endif
-  .thumb 
-#endif 
- 
-@ 
+  .thumb
+#endif
+
+@
 @ extern int __unw_getcontext(unw_context_t* thread_state)
+@
+@ On entry:
+@  thread_state pointer is in r0
 @ 
-@ On entry: 
-@  thread_state pointer is in r0 
-@  
-@ Per EHABI #4.7 this only saves the core integer registers. 
-@ EHABI #7.4.5 notes that in general all VRS registers should be restored 
-@ however this is very hard to do for VFP registers because it is unknown 
-@ to the library how many registers are implemented by the architecture. 
+@ Per EHABI #4.7 this only saves the core integer registers.
+@ EHABI #7.4.5 notes that in general all VRS registers should be restored
+@ however this is very hard to do for VFP registers because it is unknown
+@ to the library how many registers are implemented by the architecture.
 @ Instead, VFP registers are demand saved by logic external to __unw_getcontext.
-@ 
-  .p2align 2 
+@
+  .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
 #if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1
   stm r0!, {r0-r7}
@@ -785,8 +785,8 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   mov r3, r10
   stm r0!, {r1-r3}
   mov r1, r11
-  mov r2, sp 
-  mov r3, lr 
+  mov r2, sp
+  mov r3, lr
   str r1, [r0, #0]   @ r11
   @ r12 does not need storing, it it the intra-procedure-call scratch register
   str r2, [r0, #8]   @ sp
@@ -796,158 +796,158 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   @ It is safe to use here though because we are about to return, and cpsr is
   @ not expected to be preserved.
   movs r0, #0        @ return UNW_ESUCCESS
-#else 
-  @ 32bit thumb-2 restrictions for stm: 
-  @ . the sp (r13) cannot be in the list 
-  @ . the pc (r15) cannot be in the list in an STM instruction 
-  stm r0, {r0-r12} 
-  str sp, [r0, #52] 
-  str lr, [r0, #56] 
-  str lr, [r0, #60]  @ store return address as pc 
-  mov r0, #0         @ return UNW_ESUCCESS 
-#endif 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+#else
+  @ 32bit thumb-2 restrictions for stm:
+  @ . the sp (r13) cannot be in the list
+  @ . the pc (r15) cannot be in the list in an STM instruction
+  stm r0, {r0-r12}
+  str sp, [r0, #52]
+  str lr, [r0, #56]
+  str lr, [r0, #60]  @ store return address as pc
+  mov r0, #0         @ return UNW_ESUCCESS
+#endif
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
-  .fpu vfpv3-d16 
+  .fpu vfpv3-d16
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPv)
-  vstmia r0, {d0-d15} 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+  vstmia r0, {d0-d15}
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
-  .fpu vfpv3-d16 
+  .fpu vfpv3-d16
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPv)
-  vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+  vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
-  .fpu vfpv3 
+  .fpu vfpv3
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPv)
-  @ VFP and iwMMX instructions are only available when compiling with the flags 
-  @ that enable them. We do not want to do that in the library (because we do not 
-  @ want the compiler to generate instructions that access those) but this is 
-  @ only accessed if the personality routine needs these registers. Use of 
-  @ these registers implies they are, actually, available on the target, so 
-  @ it's ok to execute. 
-  @ So, generate the instructions using the corresponding coprocessor mnemonic. 
-  vstmia r0, {d16-d31} 
-  JMP(lr) 
- 
+  @ VFP and iwMMX instructions are only available when compiling with the flags
+  @ that enable them. We do not want to do that in the library (because we do not
+  @ want the compiler to generate instructions that access those) but this is
+  @ only accessed if the personality routine needs these registers. Use of
+  @ these registers implies they are, actually, available on the target, so
+  @ it's ok to execute.
+  @ So, generate the instructions using the corresponding coprocessor mnemonic.
+  vstmia r0, {d16-d31}
+  JMP(lr)
+
 #if defined(_LIBUNWIND_ARM_WMMX)
 
-@ 
-@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+@
+@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
   .arch armv5te
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPv)
-  stcl p1, cr0, [r0], #8  @ wstrd wR0, [r0], #8 
-  stcl p1, cr1, [r0], #8  @ wstrd wR1, [r0], #8 
-  stcl p1, cr2, [r0], #8  @ wstrd wR2, [r0], #8 
-  stcl p1, cr3, [r0], #8  @ wstrd wR3, [r0], #8 
-  stcl p1, cr4, [r0], #8  @ wstrd wR4, [r0], #8 
-  stcl p1, cr5, [r0], #8  @ wstrd wR5, [r0], #8 
-  stcl p1, cr6, [r0], #8  @ wstrd wR6, [r0], #8 
-  stcl p1, cr7, [r0], #8  @ wstrd wR7, [r0], #8 
-  stcl p1, cr8, [r0], #8  @ wstrd wR8, [r0], #8 
-  stcl p1, cr9, [r0], #8  @ wstrd wR9, [r0], #8 
-  stcl p1, cr10, [r0], #8  @ wstrd wR10, [r0], #8 
-  stcl p1, cr11, [r0], #8  @ wstrd wR11, [r0], #8 
-  stcl p1, cr12, [r0], #8  @ wstrd wR12, [r0], #8 
-  stcl p1, cr13, [r0], #8  @ wstrd wR13, [r0], #8 
-  stcl p1, cr14, [r0], #8  @ wstrd wR14, [r0], #8 
-  stcl p1, cr15, [r0], #8  @ wstrd wR15, [r0], #8 
-  JMP(lr) 
- 
-@ 
-@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values) 
-@ 
-@ On entry: 
-@  values pointer is in r0 
-@ 
-  .p2align 2 
+  stcl p1, cr0, [r0], #8  @ wstrd wR0, [r0], #8
+  stcl p1, cr1, [r0], #8  @ wstrd wR1, [r0], #8
+  stcl p1, cr2, [r0], #8  @ wstrd wR2, [r0], #8
+  stcl p1, cr3, [r0], #8  @ wstrd wR3, [r0], #8
+  stcl p1, cr4, [r0], #8  @ wstrd wR4, [r0], #8
+  stcl p1, cr5, [r0], #8  @ wstrd wR5, [r0], #8
+  stcl p1, cr6, [r0], #8  @ wstrd wR6, [r0], #8
+  stcl p1, cr7, [r0], #8  @ wstrd wR7, [r0], #8
+  stcl p1, cr8, [r0], #8  @ wstrd wR8, [r0], #8
+  stcl p1, cr9, [r0], #8  @ wstrd wR9, [r0], #8
+  stcl p1, cr10, [r0], #8  @ wstrd wR10, [r0], #8
+  stcl p1, cr11, [r0], #8  @ wstrd wR11, [r0], #8
+  stcl p1, cr12, [r0], #8  @ wstrd wR12, [r0], #8
+  stcl p1, cr13, [r0], #8  @ wstrd wR13, [r0], #8
+  stcl p1, cr14, [r0], #8  @ wstrd wR14, [r0], #8
+  stcl p1, cr15, [r0], #8  @ wstrd wR15, [r0], #8
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
 #if defined(__ELF__)
   .arch armv5te
 #endif
 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj)
-  stc2 p1, cr8, [r0], #4  @ wstrw wCGR0, [r0], #4 
-  stc2 p1, cr9, [r0], #4  @ wstrw wCGR1, [r0], #4 
-  stc2 p1, cr10, [r0], #4  @ wstrw wCGR2, [r0], #4 
-  stc2 p1, cr11, [r0], #4  @ wstrw wCGR3, [r0], #4 
-  JMP(lr) 
- 
+  stc2 p1, cr8, [r0], #4  @ wstrw wCGR0, [r0], #4
+  stc2 p1, cr9, [r0], #4  @ wstrw wCGR1, [r0], #4
+  stc2 p1, cr10, [r0], #4  @ wstrw wCGR2, [r0], #4
+  stc2 p1, cr11, [r0], #4  @ wstrw wCGR3, [r0], #4
+  JMP(lr)
+
 #endif
 
-#elif defined(__or1k__) 
- 
-# 
+#elif defined(__or1k__)
+
+#
 # extern int __unw_getcontext(unw_context_t* thread_state)
-# 
-# On entry: 
-#  thread_state pointer is in r3 
-# 
+#
+# On entry:
+#  thread_state pointer is in r3
+#
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
-  l.sw       0(r3), r0 
-  l.sw       4(r3), r1 
-  l.sw       8(r3), r2 
-  l.sw      12(r3), r3 
-  l.sw      16(r3), r4 
-  l.sw      20(r3), r5 
-  l.sw      24(r3), r6 
-  l.sw      28(r3), r7 
-  l.sw      32(r3), r8 
-  l.sw      36(r3), r9 
-  l.sw      40(r3), r10 
-  l.sw      44(r3), r11 
-  l.sw      48(r3), r12 
-  l.sw      52(r3), r13 
-  l.sw      56(r3), r14 
-  l.sw      60(r3), r15 
-  l.sw      64(r3), r16 
-  l.sw      68(r3), r17 
-  l.sw      72(r3), r18 
-  l.sw      76(r3), r19 
-  l.sw      80(r3), r20 
-  l.sw      84(r3), r21 
-  l.sw      88(r3), r22 
-  l.sw      92(r3), r23 
-  l.sw      96(r3), r24 
-  l.sw     100(r3), r25 
-  l.sw     104(r3), r26 
-  l.sw     108(r3), r27 
-  l.sw     112(r3), r28 
-  l.sw     116(r3), r29 
-  l.sw     120(r3), r30 
-  l.sw     124(r3), r31 
+  l.sw       0(r3), r0
+  l.sw       4(r3), r1
+  l.sw       8(r3), r2
+  l.sw      12(r3), r3
+  l.sw      16(r3), r4
+  l.sw      20(r3), r5
+  l.sw      24(r3), r6
+  l.sw      28(r3), r7
+  l.sw      32(r3), r8
+  l.sw      36(r3), r9
+  l.sw      40(r3), r10
+  l.sw      44(r3), r11
+  l.sw      48(r3), r12
+  l.sw      52(r3), r13
+  l.sw      56(r3), r14
+  l.sw      60(r3), r15
+  l.sw      64(r3), r16
+  l.sw      68(r3), r17
+  l.sw      72(r3), r18
+  l.sw      76(r3), r19
+  l.sw      80(r3), r20
+  l.sw      84(r3), r21
+  l.sw      88(r3), r22
+  l.sw      92(r3), r23
+  l.sw      96(r3), r24
+  l.sw     100(r3), r25
+  l.sw     104(r3), r26
+  l.sw     108(r3), r27
+  l.sw     112(r3), r28
+  l.sw     116(r3), r29
+  l.sw     120(r3), r30
+  l.sw     124(r3), r31
   # store ra to pc
   l.sw     128(r3), r9
   # zero epcr
diff --git a/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp b/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp
index 1a0b61f6cb..ffb49a89e5 100644
--- a/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp
+++ b/contrib/libs/libunwind/src/Unwind_AppleExtras.cpp
@@ -1,113 +1,113 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "config.h" 
- 
- 
-// static linker symbols to prevent wrong two level namespace for _Unwind symbols 
-#if defined(__arm__) 
-   #define NOT_HERE_BEFORE_5_0(sym)     \ 
-       extern const char sym##_tmp30 __asm("$ld$hide$os3.0$_" #sym ); \ 
-       __attribute__((visibility("default"))) const char sym##_tmp30 = 0; \ 
-       extern const char sym##_tmp31 __asm("$ld$hide$os3.1$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp31 = 0; \ 
-       extern const char sym##_tmp32 __asm("$ld$hide$os3.2$_" #sym );\ 
-           __attribute__((visibility("default"))) const char sym##_tmp32 = 0; \ 
-       extern const char sym##_tmp40 __asm("$ld$hide$os4.0$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp40 = 0; \ 
-       extern const char sym##_tmp41 __asm("$ld$hide$os4.1$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp41 = 0; \ 
-       extern const char sym##_tmp42 __asm("$ld$hide$os4.2$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp42 = 0; \ 
-       extern const char sym##_tmp43 __asm("$ld$hide$os4.3$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp43 = 0; 
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "config.h"
+
+
+// static linker symbols to prevent wrong two level namespace for _Unwind symbols
+#if defined(__arm__)
+   #define NOT_HERE_BEFORE_5_0(sym)     \
+       extern const char sym##_tmp30 __asm("$ld$hide$os3.0$_" #sym ); \
+       __attribute__((visibility("default"))) const char sym##_tmp30 = 0; \
+       extern const char sym##_tmp31 __asm("$ld$hide$os3.1$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp31 = 0; \
+       extern const char sym##_tmp32 __asm("$ld$hide$os3.2$_" #sym );\
+           __attribute__((visibility("default"))) const char sym##_tmp32 = 0; \
+       extern const char sym##_tmp40 __asm("$ld$hide$os4.0$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp40 = 0; \
+       extern const char sym##_tmp41 __asm("$ld$hide$os4.1$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp41 = 0; \
+       extern const char sym##_tmp42 __asm("$ld$hide$os4.2$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp42 = 0; \
+       extern const char sym##_tmp43 __asm("$ld$hide$os4.3$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp43 = 0;
 #elif defined(__aarch64__)
-  #define NOT_HERE_BEFORE_10_6(sym) 
-  #define NEVER_HERE(sym) 
-#else 
-  #define NOT_HERE_BEFORE_10_6(sym) \ 
-    extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ 
-    extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp5 = 0; 
-  #define NEVER_HERE(sym) \ 
-    extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ 
-    extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ 
-    extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ 
-          __attribute__((visibility("default"))) const char sym##_tmp6 = 0; 
-#endif 
- 
- 
+  #define NOT_HERE_BEFORE_10_6(sym)
+  #define NEVER_HERE(sym)
+#else
+  #define NOT_HERE_BEFORE_10_6(sym) \
+    extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+    extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp5 = 0;
+  #define NEVER_HERE(sym) \
+    extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+    extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \
+    extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp6 = 0;
+#endif
+
+
 #if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
- 
-// 
-// symbols in libSystem.dylib in 10.6 and later, but are in libgcc_s.dylib in 
-// earlier versions 
-// 
-NOT_HERE_BEFORE_10_6(_Unwind_DeleteException) 
-NOT_HERE_BEFORE_10_6(_Unwind_Find_FDE) 
-NOT_HERE_BEFORE_10_6(_Unwind_ForcedUnwind) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetGR) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetIP) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetLanguageSpecificData) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetRegionStart) 
-NOT_HERE_BEFORE_10_6(_Unwind_RaiseException) 
-NOT_HERE_BEFORE_10_6(_Unwind_Resume) 
-NOT_HERE_BEFORE_10_6(_Unwind_SetGR) 
-NOT_HERE_BEFORE_10_6(_Unwind_SetIP) 
-NOT_HERE_BEFORE_10_6(_Unwind_Backtrace) 
-NOT_HERE_BEFORE_10_6(_Unwind_FindEnclosingFunction) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetCFA) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetDataRelBase) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetTextRelBase) 
-NOT_HERE_BEFORE_10_6(_Unwind_Resume_or_Rethrow) 
-NOT_HERE_BEFORE_10_6(_Unwind_GetIPInfo) 
-NOT_HERE_BEFORE_10_6(__register_frame) 
-NOT_HERE_BEFORE_10_6(__deregister_frame) 
- 
-// 
-// symbols in libSystem.dylib for compatibility, but we don't want any new code 
-// using them 
-// 
-NEVER_HERE(__register_frame_info_bases) 
-NEVER_HERE(__register_frame_info) 
-NEVER_HERE(__register_frame_info_table_bases) 
-NEVER_HERE(__register_frame_info_table) 
-NEVER_HERE(__register_frame_table) 
-NEVER_HERE(__deregister_frame_info) 
-NEVER_HERE(__deregister_frame_info_bases) 
- 
+
+//
+// symbols in libSystem.dylib in 10.6 and later, but are in libgcc_s.dylib in
+// earlier versions
+//
+NOT_HERE_BEFORE_10_6(_Unwind_DeleteException)
+NOT_HERE_BEFORE_10_6(_Unwind_Find_FDE)
+NOT_HERE_BEFORE_10_6(_Unwind_ForcedUnwind)
+NOT_HERE_BEFORE_10_6(_Unwind_GetGR)
+NOT_HERE_BEFORE_10_6(_Unwind_GetIP)
+NOT_HERE_BEFORE_10_6(_Unwind_GetLanguageSpecificData)
+NOT_HERE_BEFORE_10_6(_Unwind_GetRegionStart)
+NOT_HERE_BEFORE_10_6(_Unwind_RaiseException)
+NOT_HERE_BEFORE_10_6(_Unwind_Resume)
+NOT_HERE_BEFORE_10_6(_Unwind_SetGR)
+NOT_HERE_BEFORE_10_6(_Unwind_SetIP)
+NOT_HERE_BEFORE_10_6(_Unwind_Backtrace)
+NOT_HERE_BEFORE_10_6(_Unwind_FindEnclosingFunction)
+NOT_HERE_BEFORE_10_6(_Unwind_GetCFA)
+NOT_HERE_BEFORE_10_6(_Unwind_GetDataRelBase)
+NOT_HERE_BEFORE_10_6(_Unwind_GetTextRelBase)
+NOT_HERE_BEFORE_10_6(_Unwind_Resume_or_Rethrow)
+NOT_HERE_BEFORE_10_6(_Unwind_GetIPInfo)
+NOT_HERE_BEFORE_10_6(__register_frame)
+NOT_HERE_BEFORE_10_6(__deregister_frame)
+
+//
+// symbols in libSystem.dylib for compatibility, but we don't want any new code
+// using them
+//
+NEVER_HERE(__register_frame_info_bases)
+NEVER_HERE(__register_frame_info)
+NEVER_HERE(__register_frame_info_table_bases)
+NEVER_HERE(__register_frame_info_table)
+NEVER_HERE(__register_frame_table)
+NEVER_HERE(__deregister_frame_info)
+NEVER_HERE(__deregister_frame_info_bases)
+
 #endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
- 
- 
- 
- 
+
+
+
+
 #if defined(_LIBUNWIND_BUILD_SJLJ_APIS)
-// 
-// symbols in libSystem.dylib in iOS 5.0 and later, but are in libgcc_s.dylib in 
-// earlier versions 
-// 
-NOT_HERE_BEFORE_5_0(_Unwind_GetLanguageSpecificData) 
-NOT_HERE_BEFORE_5_0(_Unwind_GetRegionStart) 
-NOT_HERE_BEFORE_5_0(_Unwind_GetIP) 
-NOT_HERE_BEFORE_5_0(_Unwind_SetGR) 
-NOT_HERE_BEFORE_5_0(_Unwind_SetIP) 
-NOT_HERE_BEFORE_5_0(_Unwind_DeleteException) 
-NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Register) 
-NOT_HERE_BEFORE_5_0(_Unwind_GetGR) 
-NOT_HERE_BEFORE_5_0(_Unwind_GetIPInfo) 
-NOT_HERE_BEFORE_5_0(_Unwind_GetCFA) 
-NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume) 
-NOT_HERE_BEFORE_5_0(_Unwind_SjLj_RaiseException) 
-NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume_or_Rethrow) 
-NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Unregister) 
- 
+//
+// symbols in libSystem.dylib in iOS 5.0 and later, but are in libgcc_s.dylib in
+// earlier versions
+//
+NOT_HERE_BEFORE_5_0(_Unwind_GetLanguageSpecificData)
+NOT_HERE_BEFORE_5_0(_Unwind_GetRegionStart)
+NOT_HERE_BEFORE_5_0(_Unwind_GetIP)
+NOT_HERE_BEFORE_5_0(_Unwind_SetGR)
+NOT_HERE_BEFORE_5_0(_Unwind_SetIP)
+NOT_HERE_BEFORE_5_0(_Unwind_DeleteException)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Register)
+NOT_HERE_BEFORE_5_0(_Unwind_GetGR)
+NOT_HERE_BEFORE_5_0(_Unwind_GetIPInfo)
+NOT_HERE_BEFORE_5_0(_Unwind_GetCFA)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_RaiseException)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume_or_Rethrow)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Unregister)
+
 #endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS)
diff --git a/contrib/libs/libunwind/src/assembly.h b/contrib/libs/libunwind/src/assembly.h
index ab0b191b9d..978f6bd619 100644
--- a/contrib/libs/libunwind/src/assembly.h
+++ b/contrib/libs/libunwind/src/assembly.h
@@ -1,20 +1,20 @@
-/* ===-- assembly.h - libUnwind assembler support macros -------------------=== 
- * 
+/* ===-- assembly.h - libUnwind assembler support macros -------------------===
+ *
  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  * See https://llvm.org/LICENSE.txt for license information.
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- * 
- * ===----------------------------------------------------------------------=== 
- * 
- * This file defines macros for use in libUnwind assembler source. 
- * This file is not part of the interface of this library. 
- * 
- * ===----------------------------------------------------------------------=== 
- */ 
- 
-#ifndef UNWIND_ASSEMBLY_H 
-#define UNWIND_ASSEMBLY_H 
- 
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in libUnwind assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef UNWIND_ASSEMBLY_H
+#define UNWIND_ASSEMBLY_H
+
 #if (defined(__i386__) || defined(__x86_64__)) && defined(__linux__)
 #include <cet.h>
 #define _LIBUNWIND_CET_ENDBR _CET_ENDBR
@@ -33,7 +33,7 @@
 #define PPC64_OFFS_FP     312
 #define PPC64_OFFS_V      824
 #elif defined(__APPLE__) && defined(__aarch64__)
-#define SEPARATOR %% 
+#define SEPARATOR %%
 #elif defined(__riscv)
 # define RISCV_ISIZE (__riscv_xlen / 8)
 # define RISCV_FOFFSET (RISCV_ISIZE * 32)
@@ -63,10 +63,10 @@
 #  endif
 # endif
 # define SEPARATOR ;
-#else 
-#define SEPARATOR ; 
-#endif 
- 
+#else
+#define SEPARATOR ;
+#endif
+
 #if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1)
 #define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR
 #define PPC64_OPD2 SEPARATOR \
@@ -76,11 +76,11 @@
   .quad 0 SEPARATOR \
   .text SEPARATOR \
 .Lfunc_begin0:
-#else 
+#else
 #define PPC64_OPD1
 #define PPC64_OPD2
-#endif 
- 
+#endif
+
 #if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT)
   .pushsection ".note.gnu.property", "a" SEPARATOR                             \
   .balign 8 SEPARATOR                                                          \
@@ -110,13 +110,13 @@
 #endif
 #endif
 
-#define GLUE2(a, b) a ## b 
-#define GLUE(a, b) GLUE2(a, b) 
-#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) 
- 
-#if defined(__APPLE__) 
+#define GLUE2(a, b) a ## b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#if defined(__APPLE__)
 
-#define SYMBOL_IS_FUNC(name) 
+#define SYMBOL_IS_FUNC(name)
 #define HIDDEN_SYMBOL(name) .private_extern name
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
 #define EXPORT_SYMBOL(name) HIDDEN_SYMBOL(name)
@@ -130,13 +130,13 @@
 
 #define NO_EXEC_STACK_DIRECTIVE
 
-#elif defined(__ELF__) 
+#elif defined(__ELF__)
 
-#if defined(__arm__) 
-#define SYMBOL_IS_FUNC(name) .type name,%function 
-#else 
-#define SYMBOL_IS_FUNC(name) .type name,@function 
-#endif 
+#if defined(__arm__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
 #define HIDDEN_SYMBOL(name) .hidden name
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
 #define EXPORT_SYMBOL(name) HIDDEN_SYMBOL(name)
@@ -160,17 +160,17 @@
 #if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
     defined(__linux__)
 #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
-#else 
+#else
 #define NO_EXEC_STACK_DIRECTIVE
 #endif
 
 #elif defined(_WIN32)
 
-#define SYMBOL_IS_FUNC(name)                                                   \ 
-  .def name SEPARATOR                                                          \ 
-    .scl 2 SEPARATOR                                                           \ 
-    .type 32 SEPARATOR                                                         \ 
-  .endef 
+#define SYMBOL_IS_FUNC(name)                                                   \
+  .def name SEPARATOR                                                          \
+    .scl 2 SEPARATOR                                                           \
+    .type 32 SEPARATOR                                                         \
+  .endef
 #define EXPORT_SYMBOL2(name)                                                   \
   .section .drectve,"yn" SEPARATOR                                             \
   .ascii "-export:", #name, "\0" SEPARATOR                                     \
@@ -179,9 +179,9 @@
 #define EXPORT_SYMBOL(name)
 #else
 #define EXPORT_SYMBOL(name) EXPORT_SYMBOL2(name)
-#endif 
+#endif
 #define HIDDEN_SYMBOL(name)
- 
+
 #if defined(__MINGW32__)
 #define WEAK_ALIAS(name, aliasname)                                            \
   .globl SYMBOL_NAME(aliasname) SEPARATOR                                      \
@@ -198,9 +198,9 @@
   EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR                              \
   WEAK_ALIAS2(SYMBOL_NAME(name), SYMBOL_NAME(aliasname))
 #endif
- 
+
 #define NO_EXEC_STACK_DIRECTIVE
- 
+
 #elif defined(__sparc__)
 
 #else
@@ -218,24 +218,24 @@
   PPC64_OPD2                                                                   \
   AARCH64_BTI
 
-#if defined(__arm__) 
-#if !defined(__ARM_ARCH) 
-#define __ARM_ARCH 4 
-#endif 
- 
-#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 
-#define ARM_HAS_BX 
-#endif 
- 
-#ifdef ARM_HAS_BX 
-#define JMP(r) bx r 
-#else 
-#define JMP(r) mov pc, r 
-#endif 
-#endif /* __arm__ */ 
- 
+#if defined(__arm__)
+#if !defined(__ARM_ARCH)
+#define __ARM_ARCH 4
+#endif
+
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#else
+#define JMP(r) mov pc, r
+#endif
+#endif /* __arm__ */
+
 #if defined(__powerpc__)
 #define PPC_LEFT_SHIFT(index) << (index)
 #endif
 
-#endif /* UNWIND_ASSEMBLY_H */ 
+#endif /* UNWIND_ASSEMBLY_H */
diff --git a/contrib/libs/libunwind/src/config.h b/contrib/libs/libunwind/src/config.h
index 850a160657..560edda04e 100644
--- a/contrib/libs/libunwind/src/config.h
+++ b/contrib/libs/libunwind/src/config.h
@@ -1,39 +1,39 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
+//
+//
 //  Defines macros used within libunwind project.
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-#ifndef LIBUNWIND_CONFIG_H 
-#define LIBUNWIND_CONFIG_H 
- 
-#include <assert.h> 
-#include <stdio.h> 
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LIBUNWIND_CONFIG_H
+#define LIBUNWIND_CONFIG_H
+
+#include <assert.h>
+#include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
- 
+
 #include <__libunwind_config.h>
- 
-// Platform specific configuration defines. 
-#ifdef __APPLE__ 
+
+// Platform specific configuration defines.
+#ifdef __APPLE__
   #if defined(FOR_DYLD)
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
   #else
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
-  #endif 
+  #endif
 #elif defined(_WIN32)
   #ifdef __SEH__
     #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1
   #else
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
-  #endif 
+  #endif
 #elif defined(_LIBUNWIND_IS_BAREMETAL)
   #if !defined(_LIBUNWIND_ARM_EHABI)
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
@@ -51,7 +51,7 @@
     #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1
   #endif
 #endif
- 
+
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
   // The CMake file passes -fvisibility=hidden to control ELF/Mach-O visibility.
   #define _LIBUNWIND_EXPORT
@@ -60,12 +60,12 @@
   #if !defined(__ELF__) && !defined(__MACH__)
     #define _LIBUNWIND_EXPORT __declspec(dllexport)
     #define _LIBUNWIND_HIDDEN
-  #else 
+  #else
     #define _LIBUNWIND_EXPORT __attribute__((visibility("default")))
     #define _LIBUNWIND_HIDDEN __attribute__((visibility("hidden")))
-  #endif 
+  #endif
 #endif
- 
+
 #define STR(a) #a
 #define XSTR(a) STR(a)
 #define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name
@@ -89,7 +89,7 @@
 #define _LIBUNWIND_WEAK_ALIAS(name, aliasname)                                 \
   extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname                        \
       __attribute__((alias(#name)));
-#else 
+#else
 #define _LIBUNWIND_WEAK_ALIAS(name, aliasname)                                 \
   __pragma(comment(linker, "/alternatename:" SYMBOL_NAME(aliasname) "="        \
                                              SYMBOL_NAME(name)))               \
@@ -98,17 +98,17 @@
 #else
 #error Unsupported target
 #endif
- 
+
 // Apple/armv7k defaults to DWARF/Compact unwinding, but its libunwind also
 // needs to include the SJLJ APIs.
 #if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__)
 #define _LIBUNWIND_BUILD_SJLJ_APIS
 #endif
- 
+
 #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
 #define _LIBUNWIND_SUPPORT_FRAME_APIS
 #endif
- 
+
 #if defined(__i386__) || defined(__x86_64__) ||                                \
     defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__) ||        \
     (!defined(__APPLE__) && defined(__arm__)) ||                               \
@@ -118,9 +118,9 @@
     defined(__hexagon__)
 #if !defined(_LIBUNWIND_BUILD_SJLJ_APIS)
 #define _LIBUNWIND_BUILD_ZERO_COST_APIS
-#endif 
 #endif
- 
+#endif
+
 #ifndef _LIBUNWIND_REMEMBER_HEAP_ALLOC
 #if defined(_LIBUNWIND_REMEMBER_STACK_ALLOC) || defined(__APPLE__) ||          \
     defined(__linux__) || defined(__ANDROID__) || defined(__MINGW32__) ||      \
@@ -143,7 +143,7 @@
 #define _LIBUNWIND_REMEMBER_FREE(_ptr) free(_ptr)
 #define _LIBUNWIND_REMEMBER_CLEANUP_NEEDED
 #endif
- 
+
 #if defined(NDEBUG) && defined(_LIBUNWIND_IS_BAREMETAL)
 #define _LIBUNWIND_ABORT(msg)                                                  \
   do {                                                                         \
@@ -179,31 +179,31 @@
     } while (0)
 #endif
 
-// Macros that define away in non-Debug builds 
-#ifdef NDEBUG 
-  #define _LIBUNWIND_DEBUG_LOG(msg, ...) 
-  #define _LIBUNWIND_TRACE_API(msg, ...) 
+// Macros that define away in non-Debug builds
+#ifdef NDEBUG
+  #define _LIBUNWIND_DEBUG_LOG(msg, ...)
+  #define _LIBUNWIND_TRACE_API(msg, ...)
   #define _LIBUNWIND_TRACING_UNWINDING (0)
   #define _LIBUNWIND_TRACING_DWARF (0)
-  #define _LIBUNWIND_TRACE_UNWINDING(msg, ...) 
+  #define _LIBUNWIND_TRACE_UNWINDING(msg, ...)
   #define _LIBUNWIND_TRACE_DWARF(...)
-#else 
-  #ifdef __cplusplus 
-    extern "C" { 
-  #endif 
-    extern  bool logAPIs(); 
-    extern  bool logUnwinding(); 
+#else
+  #ifdef __cplusplus
+    extern "C" {
+  #endif
+    extern  bool logAPIs();
+    extern  bool logUnwinding();
     extern  bool logDWARF();
-  #ifdef __cplusplus 
-    } 
-  #endif 
-  #define _LIBUNWIND_DEBUG_LOG(msg, ...)  _LIBUNWIND_LOG(msg, __VA_ARGS__) 
+  #ifdef __cplusplus
+    }
+  #endif
+  #define _LIBUNWIND_DEBUG_LOG(msg, ...)  _LIBUNWIND_LOG(msg, __VA_ARGS__)
   #define _LIBUNWIND_TRACE_API(msg, ...)                                       \
     do {                                                                       \
       if (logAPIs())                                                           \
         _LIBUNWIND_LOG(msg, __VA_ARGS__);                                      \
     } while (0)
-  #define _LIBUNWIND_TRACING_UNWINDING logUnwinding() 
+  #define _LIBUNWIND_TRACING_UNWINDING logUnwinding()
   #define _LIBUNWIND_TRACING_DWARF logDWARF()
   #define _LIBUNWIND_TRACE_UNWINDING(msg, ...)                                 \
     do {                                                                       \
@@ -215,8 +215,8 @@
       if (logDWARF())                                                          \
         fprintf(stderr, __VA_ARGS__);                                          \
     } while (0)
-#endif 
- 
+#endif
+
 #ifdef __cplusplus
 // Used to fit UnwindCursor and Registers_xxx types against unw_context_t /
 // unw_cursor_t sized memory blocks.
@@ -237,5 +237,5 @@ struct check_fit {
 };
 #undef COMP_OP
 #endif // __cplusplus
- 
-#endif // LIBUNWIND_CONFIG_H 
+
+#endif // LIBUNWIND_CONFIG_H
diff --git a/contrib/libs/libunwind/src/dwarf2.h b/contrib/libs/libunwind/src/dwarf2.h
index ec099388b2..174277d5a7 100644
--- a/contrib/libs/libunwind/src/dwarf2.h
+++ b/contrib/libs/libunwind/src/dwarf2.h
@@ -1,239 +1,239 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-//===----------------------------------------------------------------------===// 
- 
- 
-/* 
-   These constants were taken from version 3 of the DWARF standard, 
-   which is Copyright (c) 2005 Free Standards Group, and 
-   Copyright (c) 1992, 1993 UNIX International, Inc. 
-*/ 
- 
-#ifndef __DWARF2__ 
-#define __DWARF2__ 
- 
-// DWARF unwind instructions 
-enum { 
-  DW_CFA_nop                 = 0x0, 
-  DW_CFA_set_loc             = 0x1, 
-  DW_CFA_advance_loc1        = 0x2, 
-  DW_CFA_advance_loc2        = 0x3, 
-  DW_CFA_advance_loc4        = 0x4, 
-  DW_CFA_offset_extended     = 0x5, 
-  DW_CFA_restore_extended    = 0x6, 
-  DW_CFA_undefined           = 0x7, 
-  DW_CFA_same_value          = 0x8, 
-  DW_CFA_register            = 0x9, 
-  DW_CFA_remember_state      = 0xA, 
-  DW_CFA_restore_state       = 0xB, 
-  DW_CFA_def_cfa             = 0xC, 
-  DW_CFA_def_cfa_register    = 0xD, 
-  DW_CFA_def_cfa_offset      = 0xE, 
-  DW_CFA_def_cfa_expression  = 0xF, 
-  DW_CFA_expression         = 0x10, 
-  DW_CFA_offset_extended_sf = 0x11, 
-  DW_CFA_def_cfa_sf         = 0x12, 
-  DW_CFA_def_cfa_offset_sf  = 0x13, 
-  DW_CFA_val_offset         = 0x14, 
-  DW_CFA_val_offset_sf      = 0x15, 
-  DW_CFA_val_expression     = 0x16, 
-  DW_CFA_advance_loc        = 0x40, // high 2 bits are 0x1, lower 6 bits are delta 
-  DW_CFA_offset             = 0x80, // high 2 bits are 0x2, lower 6 bits are register 
-  DW_CFA_restore            = 0xC0, // high 2 bits are 0x3, lower 6 bits are register 
- 
-  // GNU extensions 
-  DW_CFA_GNU_window_save              = 0x2D, 
-  DW_CFA_GNU_args_size                = 0x2E, 
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+   These constants were taken from version 3 of the DWARF standard,
+   which is Copyright (c) 2005 Free Standards Group, and
+   Copyright (c) 1992, 1993 UNIX International, Inc.
+*/
+
+#ifndef __DWARF2__
+#define __DWARF2__
+
+// DWARF unwind instructions
+enum {
+  DW_CFA_nop                 = 0x0,
+  DW_CFA_set_loc             = 0x1,
+  DW_CFA_advance_loc1        = 0x2,
+  DW_CFA_advance_loc2        = 0x3,
+  DW_CFA_advance_loc4        = 0x4,
+  DW_CFA_offset_extended     = 0x5,
+  DW_CFA_restore_extended    = 0x6,
+  DW_CFA_undefined           = 0x7,
+  DW_CFA_same_value          = 0x8,
+  DW_CFA_register            = 0x9,
+  DW_CFA_remember_state      = 0xA,
+  DW_CFA_restore_state       = 0xB,
+  DW_CFA_def_cfa             = 0xC,
+  DW_CFA_def_cfa_register    = 0xD,
+  DW_CFA_def_cfa_offset      = 0xE,
+  DW_CFA_def_cfa_expression  = 0xF,
+  DW_CFA_expression         = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf         = 0x12,
+  DW_CFA_def_cfa_offset_sf  = 0x13,
+  DW_CFA_val_offset         = 0x14,
+  DW_CFA_val_offset_sf      = 0x15,
+  DW_CFA_val_expression     = 0x16,
+  DW_CFA_advance_loc        = 0x40, // high 2 bits are 0x1, lower 6 bits are delta
+  DW_CFA_offset             = 0x80, // high 2 bits are 0x2, lower 6 bits are register
+  DW_CFA_restore            = 0xC0, // high 2 bits are 0x3, lower 6 bits are register
+
+  // GNU extensions
+  DW_CFA_GNU_window_save              = 0x2D,
+  DW_CFA_GNU_args_size                = 0x2E,
   DW_CFA_GNU_negative_offset_extended = 0x2F,
 
   // AARCH64 extensions
   DW_CFA_AARCH64_negate_ra_state      = 0x2D
-}; 
- 
- 
-// FSF exception handling Pointer-Encoding constants 
-// Used in CFI augmentation by GCC 
-enum { 
-  DW_EH_PE_ptr       = 0x00, 
-  DW_EH_PE_uleb128   = 0x01, 
-  DW_EH_PE_udata2    = 0x02, 
-  DW_EH_PE_udata4    = 0x03, 
-  DW_EH_PE_udata8    = 0x04, 
-  DW_EH_PE_signed    = 0x08, 
-  DW_EH_PE_sleb128   = 0x09, 
-  DW_EH_PE_sdata2    = 0x0A, 
-  DW_EH_PE_sdata4    = 0x0B, 
-  DW_EH_PE_sdata8    = 0x0C, 
-  DW_EH_PE_absptr    = 0x00, 
-  DW_EH_PE_pcrel     = 0x10, 
-  DW_EH_PE_textrel   = 0x20, 
-  DW_EH_PE_datarel   = 0x30, 
-  DW_EH_PE_funcrel   = 0x40, 
-  DW_EH_PE_aligned   = 0x50, 
-  DW_EH_PE_indirect  = 0x80, 
-  DW_EH_PE_omit      = 0xFF 
-}; 
- 
- 
-// DWARF expressions 
-enum { 
-  DW_OP_addr               = 0x03, // constant address (size target specific) 
-  DW_OP_deref              = 0x06, 
-  DW_OP_const1u            = 0x08, // 1-byte constant 
-  DW_OP_const1s            = 0x09, // 1-byte constant 
-  DW_OP_const2u            = 0x0A, // 2-byte constant 
-  DW_OP_const2s            = 0x0B, // 2-byte constant 
-  DW_OP_const4u            = 0x0C, // 4-byte constant 
-  DW_OP_const4s            = 0x0D, // 4-byte constant 
-  DW_OP_const8u            = 0x0E, // 8-byte constant 
-  DW_OP_const8s            = 0x0F, // 8-byte constant 
-  DW_OP_constu             = 0x10, // ULEB128 constant 
-  DW_OP_consts             = 0x11, // SLEB128 constant 
-  DW_OP_dup                = 0x12, 
-  DW_OP_drop               = 0x13, 
-  DW_OP_over               = 0x14, 
-  DW_OP_pick               = 0x15, // 1-byte stack index 
-  DW_OP_swap               = 0x16, 
-  DW_OP_rot                = 0x17, 
-  DW_OP_xderef             = 0x18, 
-  DW_OP_abs                = 0x19, 
-  DW_OP_and                = 0x1A, 
-  DW_OP_div                = 0x1B, 
-  DW_OP_minus              = 0x1C, 
-  DW_OP_mod                = 0x1D, 
-  DW_OP_mul                = 0x1E, 
-  DW_OP_neg                = 0x1F, 
-  DW_OP_not                = 0x20, 
-  DW_OP_or                 = 0x21, 
-  DW_OP_plus               = 0x22, 
-  DW_OP_plus_uconst        = 0x23, // ULEB128 addend 
-  DW_OP_shl                = 0x24, 
-  DW_OP_shr                = 0x25, 
-  DW_OP_shra               = 0x26, 
-  DW_OP_xor                = 0x27, 
-  DW_OP_skip               = 0x2F, // signed 2-byte constant 
-  DW_OP_bra                = 0x28, // signed 2-byte constant 
-  DW_OP_eq                 = 0x29, 
-  DW_OP_ge                 = 0x2A, 
-  DW_OP_gt                 = 0x2B, 
-  DW_OP_le                 = 0x2C, 
-  DW_OP_lt                 = 0x2D, 
-  DW_OP_ne                 = 0x2E, 
-  DW_OP_lit0               = 0x30, // Literal 0 
-  DW_OP_lit1               = 0x31, // Literal 1 
-  DW_OP_lit2               = 0x32, // Literal 2 
-  DW_OP_lit3               = 0x33, // Literal 3 
-  DW_OP_lit4               = 0x34, // Literal 4 
-  DW_OP_lit5               = 0x35, // Literal 5 
-  DW_OP_lit6               = 0x36, // Literal 6 
-  DW_OP_lit7               = 0x37, // Literal 7 
-  DW_OP_lit8               = 0x38, // Literal 8 
-  DW_OP_lit9               = 0x39, // Literal 9 
-  DW_OP_lit10              = 0x3A, // Literal 10 
-  DW_OP_lit11              = 0x3B, // Literal 11 
-  DW_OP_lit12              = 0x3C, // Literal 12 
-  DW_OP_lit13              = 0x3D, // Literal 13 
-  DW_OP_lit14              = 0x3E, // Literal 14 
-  DW_OP_lit15              = 0x3F, // Literal 15 
-  DW_OP_lit16              = 0x40, // Literal 16 
-  DW_OP_lit17              = 0x41, // Literal 17 
-  DW_OP_lit18              = 0x42, // Literal 18 
-  DW_OP_lit19              = 0x43, // Literal 19 
-  DW_OP_lit20              = 0x44, // Literal 20 
-  DW_OP_lit21              = 0x45, // Literal 21 
-  DW_OP_lit22              = 0x46, // Literal 22 
-  DW_OP_lit23              = 0x47, // Literal 23 
-  DW_OP_lit24              = 0x48, // Literal 24 
-  DW_OP_lit25              = 0x49, // Literal 25 
-  DW_OP_lit26              = 0x4A, // Literal 26 
-  DW_OP_lit27              = 0x4B, // Literal 27 
-  DW_OP_lit28              = 0x4C, // Literal 28 
-  DW_OP_lit29              = 0x4D, // Literal 29 
-  DW_OP_lit30              = 0x4E, // Literal 30 
-  DW_OP_lit31              = 0x4F, // Literal 31 
-  DW_OP_reg0               = 0x50, // Contents of reg0 
-  DW_OP_reg1               = 0x51, // Contents of reg1 
-  DW_OP_reg2               = 0x52, // Contents of reg2 
-  DW_OP_reg3               = 0x53, // Contents of reg3 
-  DW_OP_reg4               = 0x54, // Contents of reg4 
-  DW_OP_reg5               = 0x55, // Contents of reg5 
-  DW_OP_reg6               = 0x56, // Contents of reg6 
-  DW_OP_reg7               = 0x57, // Contents of reg7 
-  DW_OP_reg8               = 0x58, // Contents of reg8 
-  DW_OP_reg9               = 0x59, // Contents of reg9 
-  DW_OP_reg10              = 0x5A, // Contents of reg10 
-  DW_OP_reg11              = 0x5B, // Contents of reg11 
-  DW_OP_reg12              = 0x5C, // Contents of reg12 
-  DW_OP_reg13              = 0x5D, // Contents of reg13 
-  DW_OP_reg14              = 0x5E, // Contents of reg14 
-  DW_OP_reg15              = 0x5F, // Contents of reg15 
-  DW_OP_reg16              = 0x60, // Contents of reg16 
-  DW_OP_reg17              = 0x61, // Contents of reg17 
-  DW_OP_reg18              = 0x62, // Contents of reg18 
-  DW_OP_reg19              = 0x63, // Contents of reg19 
-  DW_OP_reg20              = 0x64, // Contents of reg20 
-  DW_OP_reg21              = 0x65, // Contents of reg21 
-  DW_OP_reg22              = 0x66, // Contents of reg22 
-  DW_OP_reg23              = 0x67, // Contents of reg23 
-  DW_OP_reg24              = 0x68, // Contents of reg24 
-  DW_OP_reg25              = 0x69, // Contents of reg25 
-  DW_OP_reg26              = 0x6A, // Contents of reg26 
-  DW_OP_reg27              = 0x6B, // Contents of reg27 
-  DW_OP_reg28              = 0x6C, // Contents of reg28 
-  DW_OP_reg29              = 0x6D, // Contents of reg29 
-  DW_OP_reg30              = 0x6E, // Contents of reg30 
-  DW_OP_reg31              = 0x6F, // Contents of reg31 
-  DW_OP_breg0              = 0x70, // base register 0 + SLEB128 offset 
-  DW_OP_breg1              = 0x71, // base register 1 + SLEB128 offset 
-  DW_OP_breg2              = 0x72, // base register 2 + SLEB128 offset 
-  DW_OP_breg3              = 0x73, // base register 3 + SLEB128 offset 
-  DW_OP_breg4              = 0x74, // base register 4 + SLEB128 offset 
-  DW_OP_breg5              = 0x75, // base register 5 + SLEB128 offset 
-  DW_OP_breg6              = 0x76, // base register 6 + SLEB128 offset 
-  DW_OP_breg7              = 0x77, // base register 7 + SLEB128 offset 
-  DW_OP_breg8              = 0x78, // base register 8 + SLEB128 offset 
-  DW_OP_breg9              = 0x79, // base register 9 + SLEB128 offset 
-  DW_OP_breg10             = 0x7A, // base register 10 + SLEB128 offset 
-  DW_OP_breg11             = 0x7B, // base register 11 + SLEB128 offset 
-  DW_OP_breg12             = 0x7C, // base register 12 + SLEB128 offset 
-  DW_OP_breg13             = 0x7D, // base register 13 + SLEB128 offset 
-  DW_OP_breg14             = 0x7E, // base register 14 + SLEB128 offset 
-  DW_OP_breg15             = 0x7F, // base register 15 + SLEB128 offset 
-  DW_OP_breg16             = 0x80, // base register 16 + SLEB128 offset 
-  DW_OP_breg17             = 0x81, // base register 17 + SLEB128 offset 
-  DW_OP_breg18             = 0x82, // base register 18 + SLEB128 offset 
-  DW_OP_breg19             = 0x83, // base register 19 + SLEB128 offset 
-  DW_OP_breg20             = 0x84, // base register 20 + SLEB128 offset 
-  DW_OP_breg21             = 0x85, // base register 21 + SLEB128 offset 
-  DW_OP_breg22             = 0x86, // base register 22 + SLEB128 offset 
-  DW_OP_breg23             = 0x87, // base register 23 + SLEB128 offset 
-  DW_OP_breg24             = 0x88, // base register 24 + SLEB128 offset 
-  DW_OP_breg25             = 0x89, // base register 25 + SLEB128 offset 
-  DW_OP_breg26             = 0x8A, // base register 26 + SLEB128 offset 
-  DW_OP_breg27             = 0x8B, // base register 27 + SLEB128 offset 
-  DW_OP_breg28             = 0x8C, // base register 28 + SLEB128 offset 
-  DW_OP_breg29             = 0x8D, // base register 29 + SLEB128 offset 
-  DW_OP_breg30             = 0x8E, // base register 30 + SLEB128 offset 
-  DW_OP_breg31             = 0x8F, // base register 31 + SLEB128 offset 
-  DW_OP_regx               = 0x90, // ULEB128 register 
-  DW_OP_fbreg              = 0x91, // SLEB128 offset 
-  DW_OP_bregx              = 0x92, // ULEB128 register followed by SLEB128 offset 
-  DW_OP_piece              = 0x93, // ULEB128 size of piece addressed 
-  DW_OP_deref_size         = 0x94, // 1-byte size of data retrieved 
-  DW_OP_xderef_size        = 0x95, // 1-byte size of data retrieved 
-  DW_OP_nop                = 0x96, 
-  DW_OP_push_object_addres = 0x97, 
-  DW_OP_call2              = 0x98, // 2-byte offset of DIE 
-  DW_OP_call4              = 0x99, // 4-byte offset of DIE 
-  DW_OP_call_ref           = 0x9A, // 4- or 8-byte offset of DIE 
-  DW_OP_lo_user            = 0xE0, 
-  DW_OP_APPLE_uninit       = 0xF0, 
-  DW_OP_hi_user            = 0xFF 
-}; 
- 
- 
-#endif 
+};
+
+
+// FSF exception handling Pointer-Encoding constants
+// Used in CFI augmentation by GCC
+enum {
+  DW_EH_PE_ptr       = 0x00,
+  DW_EH_PE_uleb128   = 0x01,
+  DW_EH_PE_udata2    = 0x02,
+  DW_EH_PE_udata4    = 0x03,
+  DW_EH_PE_udata8    = 0x04,
+  DW_EH_PE_signed    = 0x08,
+  DW_EH_PE_sleb128   = 0x09,
+  DW_EH_PE_sdata2    = 0x0A,
+  DW_EH_PE_sdata4    = 0x0B,
+  DW_EH_PE_sdata8    = 0x0C,
+  DW_EH_PE_absptr    = 0x00,
+  DW_EH_PE_pcrel     = 0x10,
+  DW_EH_PE_textrel   = 0x20,
+  DW_EH_PE_datarel   = 0x30,
+  DW_EH_PE_funcrel   = 0x40,
+  DW_EH_PE_aligned   = 0x50,
+  DW_EH_PE_indirect  = 0x80,
+  DW_EH_PE_omit      = 0xFF
+};
+
+
+// DWARF expressions
+enum {
+  DW_OP_addr               = 0x03, // constant address (size target specific)
+  DW_OP_deref              = 0x06,
+  DW_OP_const1u            = 0x08, // 1-byte constant
+  DW_OP_const1s            = 0x09, // 1-byte constant
+  DW_OP_const2u            = 0x0A, // 2-byte constant
+  DW_OP_const2s            = 0x0B, // 2-byte constant
+  DW_OP_const4u            = 0x0C, // 4-byte constant
+  DW_OP_const4s            = 0x0D, // 4-byte constant
+  DW_OP_const8u            = 0x0E, // 8-byte constant
+  DW_OP_const8s            = 0x0F, // 8-byte constant
+  DW_OP_constu             = 0x10, // ULEB128 constant
+  DW_OP_consts             = 0x11, // SLEB128 constant
+  DW_OP_dup                = 0x12,
+  DW_OP_drop               = 0x13,
+  DW_OP_over               = 0x14,
+  DW_OP_pick               = 0x15, // 1-byte stack index
+  DW_OP_swap               = 0x16,
+  DW_OP_rot                = 0x17,
+  DW_OP_xderef             = 0x18,
+  DW_OP_abs                = 0x19,
+  DW_OP_and                = 0x1A,
+  DW_OP_div                = 0x1B,
+  DW_OP_minus              = 0x1C,
+  DW_OP_mod                = 0x1D,
+  DW_OP_mul                = 0x1E,
+  DW_OP_neg                = 0x1F,
+  DW_OP_not                = 0x20,
+  DW_OP_or                 = 0x21,
+  DW_OP_plus               = 0x22,
+  DW_OP_plus_uconst        = 0x23, // ULEB128 addend
+  DW_OP_shl                = 0x24,
+  DW_OP_shr                = 0x25,
+  DW_OP_shra               = 0x26,
+  DW_OP_xor                = 0x27,
+  DW_OP_skip               = 0x2F, // signed 2-byte constant
+  DW_OP_bra                = 0x28, // signed 2-byte constant
+  DW_OP_eq                 = 0x29,
+  DW_OP_ge                 = 0x2A,
+  DW_OP_gt                 = 0x2B,
+  DW_OP_le                 = 0x2C,
+  DW_OP_lt                 = 0x2D,
+  DW_OP_ne                 = 0x2E,
+  DW_OP_lit0               = 0x30, // Literal 0
+  DW_OP_lit1               = 0x31, // Literal 1
+  DW_OP_lit2               = 0x32, // Literal 2
+  DW_OP_lit3               = 0x33, // Literal 3
+  DW_OP_lit4               = 0x34, // Literal 4
+  DW_OP_lit5               = 0x35, // Literal 5
+  DW_OP_lit6               = 0x36, // Literal 6
+  DW_OP_lit7               = 0x37, // Literal 7
+  DW_OP_lit8               = 0x38, // Literal 8
+  DW_OP_lit9               = 0x39, // Literal 9
+  DW_OP_lit10              = 0x3A, // Literal 10
+  DW_OP_lit11              = 0x3B, // Literal 11
+  DW_OP_lit12              = 0x3C, // Literal 12
+  DW_OP_lit13              = 0x3D, // Literal 13
+  DW_OP_lit14              = 0x3E, // Literal 14
+  DW_OP_lit15              = 0x3F, // Literal 15
+  DW_OP_lit16              = 0x40, // Literal 16
+  DW_OP_lit17              = 0x41, // Literal 17
+  DW_OP_lit18              = 0x42, // Literal 18
+  DW_OP_lit19              = 0x43, // Literal 19
+  DW_OP_lit20              = 0x44, // Literal 20
+  DW_OP_lit21              = 0x45, // Literal 21
+  DW_OP_lit22              = 0x46, // Literal 22
+  DW_OP_lit23              = 0x47, // Literal 23
+  DW_OP_lit24              = 0x48, // Literal 24
+  DW_OP_lit25              = 0x49, // Literal 25
+  DW_OP_lit26              = 0x4A, // Literal 26
+  DW_OP_lit27              = 0x4B, // Literal 27
+  DW_OP_lit28              = 0x4C, // Literal 28
+  DW_OP_lit29              = 0x4D, // Literal 29
+  DW_OP_lit30              = 0x4E, // Literal 30
+  DW_OP_lit31              = 0x4F, // Literal 31
+  DW_OP_reg0               = 0x50, // Contents of reg0
+  DW_OP_reg1               = 0x51, // Contents of reg1
+  DW_OP_reg2               = 0x52, // Contents of reg2
+  DW_OP_reg3               = 0x53, // Contents of reg3
+  DW_OP_reg4               = 0x54, // Contents of reg4
+  DW_OP_reg5               = 0x55, // Contents of reg5
+  DW_OP_reg6               = 0x56, // Contents of reg6
+  DW_OP_reg7               = 0x57, // Contents of reg7
+  DW_OP_reg8               = 0x58, // Contents of reg8
+  DW_OP_reg9               = 0x59, // Contents of reg9
+  DW_OP_reg10              = 0x5A, // Contents of reg10
+  DW_OP_reg11              = 0x5B, // Contents of reg11
+  DW_OP_reg12              = 0x5C, // Contents of reg12
+  DW_OP_reg13              = 0x5D, // Contents of reg13
+  DW_OP_reg14              = 0x5E, // Contents of reg14
+  DW_OP_reg15              = 0x5F, // Contents of reg15
+  DW_OP_reg16              = 0x60, // Contents of reg16
+  DW_OP_reg17              = 0x61, // Contents of reg17
+  DW_OP_reg18              = 0x62, // Contents of reg18
+  DW_OP_reg19              = 0x63, // Contents of reg19
+  DW_OP_reg20              = 0x64, // Contents of reg20
+  DW_OP_reg21              = 0x65, // Contents of reg21
+  DW_OP_reg22              = 0x66, // Contents of reg22
+  DW_OP_reg23              = 0x67, // Contents of reg23
+  DW_OP_reg24              = 0x68, // Contents of reg24
+  DW_OP_reg25              = 0x69, // Contents of reg25
+  DW_OP_reg26              = 0x6A, // Contents of reg26
+  DW_OP_reg27              = 0x6B, // Contents of reg27
+  DW_OP_reg28              = 0x6C, // Contents of reg28
+  DW_OP_reg29              = 0x6D, // Contents of reg29
+  DW_OP_reg30              = 0x6E, // Contents of reg30
+  DW_OP_reg31              = 0x6F, // Contents of reg31
+  DW_OP_breg0              = 0x70, // base register 0 + SLEB128 offset
+  DW_OP_breg1              = 0x71, // base register 1 + SLEB128 offset
+  DW_OP_breg2              = 0x72, // base register 2 + SLEB128 offset
+  DW_OP_breg3              = 0x73, // base register 3 + SLEB128 offset
+  DW_OP_breg4              = 0x74, // base register 4 + SLEB128 offset
+  DW_OP_breg5              = 0x75, // base register 5 + SLEB128 offset
+  DW_OP_breg6              = 0x76, // base register 6 + SLEB128 offset
+  DW_OP_breg7              = 0x77, // base register 7 + SLEB128 offset
+  DW_OP_breg8              = 0x78, // base register 8 + SLEB128 offset
+  DW_OP_breg9              = 0x79, // base register 9 + SLEB128 offset
+  DW_OP_breg10             = 0x7A, // base register 10 + SLEB128 offset
+  DW_OP_breg11             = 0x7B, // base register 11 + SLEB128 offset
+  DW_OP_breg12             = 0x7C, // base register 12 + SLEB128 offset
+  DW_OP_breg13             = 0x7D, // base register 13 + SLEB128 offset
+  DW_OP_breg14             = 0x7E, // base register 14 + SLEB128 offset
+  DW_OP_breg15             = 0x7F, // base register 15 + SLEB128 offset
+  DW_OP_breg16             = 0x80, // base register 16 + SLEB128 offset
+  DW_OP_breg17             = 0x81, // base register 17 + SLEB128 offset
+  DW_OP_breg18             = 0x82, // base register 18 + SLEB128 offset
+  DW_OP_breg19             = 0x83, // base register 19 + SLEB128 offset
+  DW_OP_breg20             = 0x84, // base register 20 + SLEB128 offset
+  DW_OP_breg21             = 0x85, // base register 21 + SLEB128 offset
+  DW_OP_breg22             = 0x86, // base register 22 + SLEB128 offset
+  DW_OP_breg23             = 0x87, // base register 23 + SLEB128 offset
+  DW_OP_breg24             = 0x88, // base register 24 + SLEB128 offset
+  DW_OP_breg25             = 0x89, // base register 25 + SLEB128 offset
+  DW_OP_breg26             = 0x8A, // base register 26 + SLEB128 offset
+  DW_OP_breg27             = 0x8B, // base register 27 + SLEB128 offset
+  DW_OP_breg28             = 0x8C, // base register 28 + SLEB128 offset
+  DW_OP_breg29             = 0x8D, // base register 29 + SLEB128 offset
+  DW_OP_breg30             = 0x8E, // base register 30 + SLEB128 offset
+  DW_OP_breg31             = 0x8F, // base register 31 + SLEB128 offset
+  DW_OP_regx               = 0x90, // ULEB128 register
+  DW_OP_fbreg              = 0x91, // SLEB128 offset
+  DW_OP_bregx              = 0x92, // ULEB128 register followed by SLEB128 offset
+  DW_OP_piece              = 0x93, // ULEB128 size of piece addressed
+  DW_OP_deref_size         = 0x94, // 1-byte size of data retrieved
+  DW_OP_xderef_size        = 0x95, // 1-byte size of data retrieved
+  DW_OP_nop                = 0x96,
+  DW_OP_push_object_addres = 0x97,
+  DW_OP_call2              = 0x98, // 2-byte offset of DIE
+  DW_OP_call4              = 0x99, // 4-byte offset of DIE
+  DW_OP_call_ref           = 0x9A, // 4- or 8-byte offset of DIE
+  DW_OP_lo_user            = 0xE0,
+  DW_OP_APPLE_uninit       = 0xF0,
+  DW_OP_hi_user            = 0xFF
+};
+
+
+#endif
diff --git a/contrib/libs/libunwind/src/libunwind.cpp b/contrib/libs/libunwind/src/libunwind.cpp
index 7c47a76799..03f8b75b5b 100644
--- a/contrib/libs/libunwind/src/libunwind.cpp
+++ b/contrib/libs/libunwind/src/libunwind.cpp
@@ -1,21 +1,21 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Implements unw_* functions from <libunwind.h> 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include <libunwind.h> 
- 
+//
+//
+//  Implements unw_* functions from <libunwind.h>
+//
+//===----------------------------------------------------------------------===//
+
+#include <libunwind.h>
+
 #include "config.h"
-#include "libunwind_ext.h" 
- 
-#include <stdlib.h> 
- 
+#include "libunwind_ext.h"
+
+#include <stdlib.h>
+
 // Define the __has_feature extension for compilers that do not support it so
 // that we can later check for the presence of ASan in a compiler-neutral way.
 #if !defined(__has_feature)
@@ -28,26 +28,26 @@
 
 #if !defined(__USING_SJLJ_EXCEPTIONS__)
 #include "AddressSpace.hpp"
-#include "UnwindCursor.hpp" 
- 
-using namespace libunwind; 
- 
-/// internal object to represent this processes address space 
-LocalAddressSpace LocalAddressSpace::sThisAddressSpace; 
- 
-_LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space = 
-    (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace; 
- 
-/// Create a cursor of a thread in this process given 'context' recorded by 
+#include "UnwindCursor.hpp"
+
+using namespace libunwind;
+
+/// internal object to represent this processes address space
+LocalAddressSpace LocalAddressSpace::sThisAddressSpace;
+
+_LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space =
+    (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace;
+
+/// Create a cursor of a thread in this process given 'context' recorded by
 /// __unw_getcontext().
 _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
                                        unw_context_t *context) {
   _LIBUNWIND_TRACE_API("__unw_init_local(cursor=%p, context=%p)",
-                       static_cast<void *>(cursor), 
-                       static_cast<void *>(context)); 
-#if defined(__i386__) 
+                       static_cast<void *>(cursor),
+                       static_cast<void *>(context));
+#if defined(__i386__)
 # define REGISTER_KIND Registers_x86
-#elif defined(__x86_64__) 
+#elif defined(__x86_64__)
 # define REGISTER_KIND Registers_x86_64
 #elif defined(__powerpc64__)
 # define REGISTER_KIND Registers_ppc64
@@ -57,7 +57,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
 # define REGISTER_KIND Registers_arm64
 #elif defined(__arm__)
 # define REGISTER_KIND Registers_arm
-#elif defined(__or1k__) 
+#elif defined(__or1k__)
 # define REGISTER_KIND Registers_or1k
 #elif defined(__hexagon__)
 # define REGISTER_KIND Registers_hexagon
@@ -65,7 +65,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
 # define REGISTER_KIND Registers_mips_o32
 #elif defined(__mips64)
 # define REGISTER_KIND Registers_mips_newabi
-#elif defined(__mips__) 
+#elif defined(__mips__)
 # warning The MIPS architecture is not supported with this ABI and environment!
 #elif defined(__sparc__) && defined(__arch64__)
 #define REGISTER_KIND Registers_sparc64
@@ -75,53 +75,53 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
 # define REGISTER_KIND Registers_riscv
 #elif defined(__ve__)
 # define REGISTER_KIND Registers_ve
-#else 
+#else
 # error Architecture not supported
-#endif 
+#endif
   // Use "placement new" to allocate UnwindCursor in the cursor buffer.
   new (reinterpret_cast<UnwindCursor<LocalAddressSpace, REGISTER_KIND> *>(cursor))
       UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
           context, LocalAddressSpace::sThisAddressSpace);
 #undef REGISTER_KIND
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  co->setInfoBasedOnIPRegister(); 
- 
-  return UNW_ESUCCESS; 
-} 
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->setInfoBasedOnIPRegister();
+
+  return UNW_ESUCCESS;
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local)
- 
-/// Get value of specified register at cursor position in stack frame. 
+
+/// Get value of specified register at cursor position in stack frame.
 _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                     unw_word_t *value) {
   _LIBUNWIND_TRACE_API("__unw_get_reg(cursor=%p, regNum=%d, &value=%p)",
-                       static_cast<void *>(cursor), regNum, 
-                       static_cast<void *>(value)); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  if (co->validReg(regNum)) { 
-    *value = co->getReg(regNum); 
-    return UNW_ESUCCESS; 
-  } 
-  return UNW_EBADREG; 
-} 
+                       static_cast<void *>(cursor), regNum,
+                       static_cast<void *>(value));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validReg(regNum)) {
+    *value = co->getReg(regNum);
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg)
- 
-/// Set value of specified register at cursor position in stack frame. 
+
+/// Set value of specified register at cursor position in stack frame.
 _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                     unw_word_t value) {
   _LIBUNWIND_TRACE_API("__unw_set_reg(cursor=%p, regNum=%d, value=0x%" PRIxPTR
                        ")",
                        static_cast<void *>(cursor), regNum, value);
-  typedef LocalAddressSpace::pint_t pint_t; 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  if (co->validReg(regNum)) { 
-    co->setReg(regNum, (pint_t)value); 
-    // specical case altering IP to re-find info (being called by personality 
-    // function) 
+  typedef LocalAddressSpace::pint_t pint_t;
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validReg(regNum)) {
+    co->setReg(regNum, (pint_t)value);
+    // specical case altering IP to re-find info (being called by personality
+    // function)
     if (regNum == UNW_REG_IP) {
       unw_proc_info_t info;
       // First, get the FDE for the old location and then update it.
       co->getInfo(&info);
-      co->setInfoBasedOnIPRegister(false); 
+      co->setInfoBasedOnIPRegister(false);
       // If the original call expects stack adjustment, perform this now.
       // Normal frame unwinding would have included the offset already in the
       // CFA computation.
@@ -131,169 +131,169 @@ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
       if (info.gp)
         co->setReg(UNW_REG_SP, co->getReg(UNW_REG_SP) + info.gp);
     }
-    return UNW_ESUCCESS; 
-  } 
-  return UNW_EBADREG; 
-} 
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_set_reg, unw_set_reg)
- 
-/// Get value of specified float register at cursor position in stack frame. 
+
+/// Get value of specified float register at cursor position in stack frame.
 _LIBUNWIND_HIDDEN int __unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                       unw_fpreg_t *value) {
   _LIBUNWIND_TRACE_API("__unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)",
-                       static_cast<void *>(cursor), regNum, 
-                       static_cast<void *>(value)); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  if (co->validFloatReg(regNum)) { 
-    *value = co->getFloatReg(regNum); 
-    return UNW_ESUCCESS; 
-  } 
-  return UNW_EBADREG; 
-} 
+                       static_cast<void *>(cursor), regNum,
+                       static_cast<void *>(value));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validFloatReg(regNum)) {
+    *value = co->getFloatReg(regNum);
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_get_fpreg, unw_get_fpreg)
- 
-/// Set value of specified float register at cursor position in stack frame. 
+
+/// Set value of specified float register at cursor position in stack frame.
 _LIBUNWIND_HIDDEN int __unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum,
                                       unw_fpreg_t value) {
 #if defined(_LIBUNWIND_ARM_EHABI)
   _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)",
-                       static_cast<void *>(cursor), regNum, value); 
-#else 
+                       static_cast<void *>(cursor), regNum, value);
+#else
   _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%g)",
-                       static_cast<void *>(cursor), regNum, value); 
-#endif 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  if (co->validFloatReg(regNum)) { 
-    co->setFloatReg(regNum, value); 
-    return UNW_ESUCCESS; 
-  } 
-  return UNW_EBADREG; 
-} 
+                       static_cast<void *>(cursor), regNum, value);
+#endif
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validFloatReg(regNum)) {
+    co->setFloatReg(regNum, value);
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_set_fpreg, unw_set_fpreg)
- 
-/// Move cursor to next frame. 
+
+/// Move cursor to next frame.
 _LIBUNWIND_HIDDEN int __unw_step(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("__unw_step(cursor=%p)", static_cast<void *>(cursor));
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  return co->step(); 
-} 
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->step();
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_step, unw_step)
- 
-/// Get unwind info at cursor position in stack frame. 
+
+/// Get unwind info at cursor position in stack frame.
 _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor,
                                           unw_proc_info_t *info) {
   _LIBUNWIND_TRACE_API("__unw_get_proc_info(cursor=%p, &info=%p)",
-                       static_cast<void *>(cursor), static_cast<void *>(info)); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  co->getInfo(info); 
-  if (info->end_ip == 0) 
-    return UNW_ENOINFO; 
+                       static_cast<void *>(cursor), static_cast<void *>(info));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->getInfo(info);
+  if (info->end_ip == 0)
+    return UNW_ENOINFO;
   return UNW_ESUCCESS;
-} 
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info)
- 
-/// Resume execution at cursor position (aka longjump). 
+
+/// Resume execution at cursor position (aka longjump).
 _LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast<void *>(cursor));
 #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
   // Inform the ASan runtime that now might be a good time to clean stuff up.
   __asan_handle_no_return();
 #endif
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  co->jumpto(); 
-  return UNW_EUNSPEC; 
-} 
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->jumpto();
+  return UNW_EUNSPEC;
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_resume, unw_resume)
- 
-/// Get name of function at cursor position in stack frame. 
+
+/// Get name of function at cursor position in stack frame.
 _LIBUNWIND_HIDDEN int __unw_get_proc_name(unw_cursor_t *cursor, char *buf,
                                           size_t bufLen, unw_word_t *offset) {
   _LIBUNWIND_TRACE_API("__unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)",
-                       static_cast<void *>(cursor), static_cast<void *>(buf), 
-                       static_cast<unsigned long>(bufLen)); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  if (co->getFunctionName(buf, bufLen, offset)) 
-    return UNW_ESUCCESS; 
+                       static_cast<void *>(cursor), static_cast<void *>(buf),
+                       static_cast<unsigned long>(bufLen));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->getFunctionName(buf, bufLen, offset))
+    return UNW_ESUCCESS;
   return UNW_EUNSPEC;
-} 
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_name, unw_get_proc_name)
- 
-/// Checks if a register is a floating-point register. 
+
+/// Checks if a register is a floating-point register.
 _LIBUNWIND_HIDDEN int __unw_is_fpreg(unw_cursor_t *cursor,
                                      unw_regnum_t regNum) {
   _LIBUNWIND_TRACE_API("__unw_is_fpreg(cursor=%p, regNum=%d)",
-                       static_cast<void *>(cursor), regNum); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  return co->validFloatReg(regNum); 
-} 
+                       static_cast<void *>(cursor), regNum);
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->validFloatReg(regNum);
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_is_fpreg, unw_is_fpreg)
- 
-/// Checks if a register is a floating-point register. 
+
+/// Checks if a register is a floating-point register.
 _LIBUNWIND_HIDDEN const char *__unw_regname(unw_cursor_t *cursor,
                                             unw_regnum_t regNum) {
   _LIBUNWIND_TRACE_API("__unw_regname(cursor=%p, regNum=%d)",
-                       static_cast<void *>(cursor), regNum); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  return co->getRegisterName(regNum); 
-} 
+                       static_cast<void *>(cursor), regNum);
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->getRegisterName(regNum);
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_regname, unw_regname)
- 
-/// Checks if current frame is signal trampoline. 
+
+/// Checks if current frame is signal trampoline.
 _LIBUNWIND_HIDDEN int __unw_is_signal_frame(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("__unw_is_signal_frame(cursor=%p)",
-                       static_cast<void *>(cursor)); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  return co->isSignalFrame(); 
-} 
+                       static_cast<void *>(cursor));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->isSignalFrame();
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_is_signal_frame, unw_is_signal_frame)
- 
-#ifdef __arm__ 
-// Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD 
+
+#ifdef __arm__
+// Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD
 _LIBUNWIND_HIDDEN void __unw_save_vfp_as_X(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("__unw_get_fpreg_save_vfp_as_X(cursor=%p)",
-                       static_cast<void *>(cursor)); 
-  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; 
-  return co->saveVFPAsX(); 
-} 
+                       static_cast<void *>(cursor));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->saveVFPAsX();
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_save_vfp_as_X, unw_save_vfp_as_X)
-#endif 
- 
- 
+#endif
+
+
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
 /// SPI: walks cached DWARF entries
 _LIBUNWIND_HIDDEN void __unw_iterate_dwarf_unwind_cache(void (*func)(
-    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { 
+    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) {
   _LIBUNWIND_TRACE_API("__unw_iterate_dwarf_unwind_cache(func=%p)",
-                       reinterpret_cast<void *>(func)); 
-  DwarfFDECache<LocalAddressSpace>::iterateCacheEntries(func); 
-} 
+                       reinterpret_cast<void *>(func));
+  DwarfFDECache<LocalAddressSpace>::iterateCacheEntries(func);
+}
 _LIBUNWIND_WEAK_ALIAS(__unw_iterate_dwarf_unwind_cache,
                       unw_iterate_dwarf_unwind_cache)
- 
-/// IPI: for __register_frame() 
+
+/// IPI: for __register_frame()
 void __unw_add_dynamic_fde(unw_word_t fde) {
-  CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo; 
-  CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo; 
-  const char *message = CFI_Parser<LocalAddressSpace>::decodeFDE( 
-                           LocalAddressSpace::sThisAddressSpace, 
-                          (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo); 
-  if (message == NULL) { 
-    // dynamically registered FDEs don't have a mach_header group they are in. 
-    // Use fde as mh_group 
-    unw_word_t mh_group = fdeInfo.fdeStart; 
-    DwarfFDECache<LocalAddressSpace>::add((LocalAddressSpace::pint_t)mh_group, 
-                                          fdeInfo.pcStart, fdeInfo.pcEnd, 
-                                          fdeInfo.fdeStart); 
-  } else { 
+  CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
+  CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
+  const char *message = CFI_Parser<LocalAddressSpace>::decodeFDE(
+                           LocalAddressSpace::sThisAddressSpace,
+                          (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo);
+  if (message == NULL) {
+    // dynamically registered FDEs don't have a mach_header group they are in.
+    // Use fde as mh_group
+    unw_word_t mh_group = fdeInfo.fdeStart;
+    DwarfFDECache<LocalAddressSpace>::add((LocalAddressSpace::pint_t)mh_group,
+                                          fdeInfo.pcStart, fdeInfo.pcEnd,
+                                          fdeInfo.fdeStart);
+  } else {
     _LIBUNWIND_DEBUG_LOG("__unw_add_dynamic_fde: bad fde: %s", message);
-  } 
-} 
- 
-/// IPI: for __deregister_frame() 
+  }
+}
+
+/// IPI: for __deregister_frame()
 void __unw_remove_dynamic_fde(unw_word_t fde) {
-  // fde is own mh_group 
+  // fde is own mh_group
   DwarfFDECache<LocalAddressSpace>::removeAllIn((LocalAddressSpace::pint_t)fde);
-} 
+}
 
 void __unw_add_dynamic_eh_frame_section(unw_word_t eh_frame_start) {
   // The eh_frame section start serves as the mh_group
@@ -325,37 +325,37 @@ void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start) {
 
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
 #endif // !defined(__USING_SJLJ_EXCEPTIONS__)
- 
- 
- 
-// Add logging hooks in Debug builds only 
-#ifndef NDEBUG 
-#include <stdlib.h> 
- 
-_LIBUNWIND_HIDDEN 
-bool logAPIs() { 
-  // do manual lock to avoid use of _cxa_guard_acquire or initializers 
-  static bool checked = false; 
-  static bool log = false; 
-  if (!checked) { 
-    log = (getenv("LIBUNWIND_PRINT_APIS") != NULL); 
-    checked = true; 
-  } 
-  return log; 
-} 
- 
-_LIBUNWIND_HIDDEN 
-bool logUnwinding() { 
-  // do manual lock to avoid use of _cxa_guard_acquire or initializers 
-  static bool checked = false; 
-  static bool log = false; 
-  if (!checked) { 
-    log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL); 
-    checked = true; 
-  } 
-  return log; 
-} 
- 
+
+
+
+// Add logging hooks in Debug builds only
+#ifndef NDEBUG
+#include <stdlib.h>
+
+_LIBUNWIND_HIDDEN
+bool logAPIs() {
+  // do manual lock to avoid use of _cxa_guard_acquire or initializers
+  static bool checked = false;
+  static bool log = false;
+  if (!checked) {
+    log = (getenv("LIBUNWIND_PRINT_APIS") != NULL);
+    checked = true;
+  }
+  return log;
+}
+
+_LIBUNWIND_HIDDEN
+bool logUnwinding() {
+  // do manual lock to avoid use of _cxa_guard_acquire or initializers
+  static bool checked = false;
+  static bool log = false;
+  if (!checked) {
+    log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL);
+    checked = true;
+  }
+  return log;
+}
+
 _LIBUNWIND_HIDDEN
 bool logDWARF() {
   // do manual lock to avoid use of _cxa_guard_acquire or initializers
@@ -368,5 +368,5 @@ bool logDWARF() {
   return log;
 }
 
-#endif // NDEBUG 
+#endif // NDEBUG
 
diff --git a/contrib/libs/libunwind/src/libunwind_ext.h b/contrib/libs/libunwind/src/libunwind_ext.h
index d543ce129e..7065ffcdae 100644
--- a/contrib/libs/libunwind/src/libunwind_ext.h
+++ b/contrib/libs/libunwind/src/libunwind_ext.h
@@ -1,27 +1,27 @@
 //===----------------------------------------------------------------------===//
-// 
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// 
-// 
-//  Extensions to libunwind API. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef __LIBUNWIND_EXT__ 
-#define __LIBUNWIND_EXT__ 
- 
-#include "config.h" 
-#include <libunwind.h> 
-#include <unwind.h> 
- 
-#define UNW_STEP_SUCCESS 1 
-#define UNW_STEP_END     0 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
+//
+//
+//  Extensions to libunwind API.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LIBUNWIND_EXT__
+#define __LIBUNWIND_EXT__
+
+#include "config.h"
+#include <libunwind.h>
+#include <unwind.h>
+
+#define UNW_STEP_SUCCESS 1
+#define UNW_STEP_END     0
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 extern int __unw_getcontext(unw_context_t *);
 extern int __unw_init_local(unw_cursor_t *, unw_context_t *);
@@ -43,26 +43,26 @@ extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t);
 extern int __unw_is_signal_frame(unw_cursor_t *);
 extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *);
 
-// SPI 
+// SPI
 extern void __unw_iterate_dwarf_unwind_cache(void (*func)(
     unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh));
- 
-// IPI 
+
+// IPI
 extern void __unw_add_dynamic_fde(unw_word_t fde);
 extern void __unw_remove_dynamic_fde(unw_word_t fde);
- 
+
 extern void __unw_add_dynamic_eh_frame_section(unw_word_t eh_frame_start);
 extern void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start);
 
 #if defined(_LIBUNWIND_ARM_EHABI)
-extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*); 
-extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context, 
-                                                 const uint32_t *data, 
-                                                 size_t offset, size_t len); 
-#endif 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif // __LIBUNWIND_EXT__ 
+extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*);
+extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context,
+                                                 const uint32_t *data,
+                                                 size_t offset, size_t len);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __LIBUNWIND_EXT__
diff --git a/contrib/libs/libunwind/ya.make b/contrib/libs/libunwind/ya.make
index 39141abcdb..2d1aa635d4 100644
--- a/contrib/libs/libunwind/ya.make
+++ b/contrib/libs/libunwind/ya.make
@@ -1,15 +1,15 @@
 # Generated by devtools/yamaker from nixpkgs 21.11.
 
-LIBRARY() 
- 
+LIBRARY()
+
 OWNER(
     pg
     somov
     g:cpp-contrib
 )
- 
+
 VERSION(2022-02-05)
- 
+
 ORIGINAL_SOURCE(https://github.com/llvm/llvm-project/archive/2b9554b8850192bdd86c02eb671de1d866df8d87.tar.gz)
 
 LICENSE(
@@ -22,11 +22,11 @@ LICENSE(
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 DISABLE(USE_LTO)
- 
+
 ADDINCL(
     contrib/libs/libunwind/include
 )
- 
+
 NO_RUNTIME()
 
 NO_UTIL()
@@ -39,17 +39,17 @@ CFLAGS(
     GLOBAL -D_libunwind_
     -D_LIBUNWIND_IS_NATIVE_ONLY
     -fno-exceptions
-    -fno-rtti 
-    -funwind-tables 
-) 
- 
+    -fno-rtti
+    -funwind-tables
+)
+
 IF (SANITIZER_TYPE == memory)
     CFLAGS(
         -fPIC
     )
 ENDIF()
- 
-SRCS( 
+
+SRCS(
     src/Unwind-EHABI.cpp
     src/Unwind-seh.cpp
     src/Unwind-sjlj.c
@@ -58,16 +58,16 @@ SRCS(
     src/UnwindRegistersRestore.S
     src/UnwindRegistersSave.S
     src/libunwind.cpp
-) 
- 
+)
+
 IF (OS_DARWIN OR OS_IOS)
-    SRCS( 
+    SRCS(
         src/Unwind_AppleExtras.cpp
-    ) 
+    )
 ENDIF()
- 
+
 END()
- 
+
 RECURSE_FOR_TESTS(
     ut
 )
diff --git a/contrib/libs/linuxvdso/fake.cpp b/contrib/libs/linuxvdso/fake.cpp
index 395c4c9cb6..f75627feaa 100644
--- a/contrib/libs/linuxvdso/fake.cpp
+++ b/contrib/libs/linuxvdso/fake.cpp
@@ -1,8 +1,8 @@
-#include "interface.h" 
- 
-size_t NVdso::Enumerate(TSymbol*, size_t) { 
-    return 0; 
-} 
+#include "interface.h"
+
+size_t NVdso::Enumerate(TSymbol*, size_t) {
+    return 0;
+}
 
 void* NVdso::Function(const char*, const char*) {
     return nullptr;
diff --git a/contrib/libs/linuxvdso/interface.cpp b/contrib/libs/linuxvdso/interface.cpp
index 49bf3b6707..1c8b92ad25 100644
--- a/contrib/libs/linuxvdso/interface.cpp
+++ b/contrib/libs/linuxvdso/interface.cpp
@@ -1,32 +1,32 @@
-#include "interface.h" 
-#include "original/vdso_support.h" 
- 
+#include "interface.h"
+#include "original/vdso_support.h"
+
 #ifdef HAVE_VDSO_SUPPORT
- 
-size_t NVdso::Enumerate(TSymbol* s, size_t len) { 
-    if (!len) { 
-        return 0; 
-    } 
- 
-    base::VDSOSupport vdso; 
- 
-    if (!vdso.IsPresent()) { 
-        return 0; 
-    } 
- 
-    size_t n = 0; 
- 
-    for (base::VDSOSupport::SymbolIterator it = vdso.begin(); it != vdso.end(); ++it) { 
-        *s++ = TSymbol(it->name, (void*)it->address); 
-        ++n; 
- 
-        if (!--len) { 
-            break; 
-        } 
-    } 
- 
-    return n; 
-} 
+
+size_t NVdso::Enumerate(TSymbol* s, size_t len) {
+    if (!len) {
+        return 0;
+    }
+
+    base::VDSOSupport vdso;
+
+    if (!vdso.IsPresent()) {
+        return 0;
+    }
+
+    size_t n = 0;
+
+    for (base::VDSOSupport::SymbolIterator it = vdso.begin(); it != vdso.end(); ++it) {
+        *s++ = TSymbol(it->name, (void*)it->address);
+        ++n;
+
+        if (!--len) {
+            break;
+        }
+    }
+
+    return n;
+}
 
 void* NVdso::Function(const char* name, const char* version) {
     base::VDSOSupport::SymbolInfo info;
diff --git a/contrib/libs/linuxvdso/interface.h b/contrib/libs/linuxvdso/interface.h
index 2435ae1e7d..97d7f113c1 100644
--- a/contrib/libs/linuxvdso/interface.h
+++ b/contrib/libs/linuxvdso/interface.h
@@ -1,26 +1,26 @@
-#pragma once 
- 
-#include <stddef.h> 
- 
-namespace NVdso { 
-    struct TSymbol { 
-        inline TSymbol() 
-            : Name(0) 
-            , Address(0) 
-        { 
-        } 
- 
-        inline TSymbol(const char* name, void* addr) 
-            : Name(name) 
-            , Address(addr) 
-        { 
-        } 
- 
-        const char* Name; 
-        void* Address; 
-    }; 
- 
-    size_t Enumerate(TSymbol* s, size_t len); 
+#pragma once
+
+#include <stddef.h>
+
+namespace NVdso {
+    struct TSymbol {
+        inline TSymbol()
+            : Name(0)
+            , Address(0)
+        {
+        }
+
+        inline TSymbol(const char* name, void* addr)
+            : Name(name)
+            , Address(addr)
+        {
+        }
+
+        const char* Name;
+        void* Address;
+    };
+
+    size_t Enumerate(TSymbol* s, size_t len);
 
     void* Function(const char* name, const char* version);
-} 
+}
diff --git a/contrib/libs/linuxvdso/original/config.h b/contrib/libs/linuxvdso/original/config.h
index 9ac2e91d8b..06598e84c0 100644
--- a/contrib/libs/linuxvdso/original/config.h
+++ b/contrib/libs/linuxvdso/original/config.h
@@ -1,11 +1,11 @@
-#pragma once 
- 
-#include <inttypes.h> 
- 
-#if !defined(__WORDSIZE) 
-    #define __WORDSIZE (sizeof(unsigned long) * 8) 
-#endif 
- 
-#define DISALLOW_COPY_AND_ASSIGN(x) 
-#define RunningOnValgrind() false 
-#define COMPILE_ASSERT(x, y) 
+#pragma once
+
+#include <inttypes.h>
+
+#if !defined(__WORDSIZE)
+    #define __WORDSIZE (sizeof(unsigned long) * 8)
+#endif
+
+#define DISALLOW_COPY_AND_ASSIGN(x)
+#define RunningOnValgrind() false
+#define COMPILE_ASSERT(x, y)
diff --git a/contrib/libs/linuxvdso/original/elf_mem_image.cc b/contrib/libs/linuxvdso/original/elf_mem_image.cc
index d18ef43f10..066fcba4e3 100644
--- a/contrib/libs/linuxvdso/original/elf_mem_image.cc
+++ b/contrib/libs/linuxvdso/original/elf_mem_image.cc
@@ -1,436 +1,436 @@
-// Copyright (c) 2008, Google Inc. 
-// All rights reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-// --- 
-// Author: Paul Pluzhnikov 
-// 
-// Allow dynamic symbol lookup in an in-memory Elf image. 
-// 
- 
-#include "elf_mem_image.h" 
-#include "logging.h" 
- 
-#ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h 
- 
-#if defined(_musl_) 
-    #include <endian.h> 
-#endif 
- 
-#include <stddef.h>   // for size_t, ptrdiff_t 
- 
-// From binutils/include/elf/common.h (this doesn't appear to be documented 
-// anywhere else). 
-// 
-//   /* This flag appears in a Versym structure.  It means that the symbol 
-//      is hidden, and is only visible with an explicit version number. 
-//      This is a GNU extension.  */ 
-//   #define VERSYM_HIDDEN           0x8000 
-// 
-//   /* This is the mask for the rest of the Versym information.  */ 
-//   #define VERSYM_VERSION          0x7fff 
- 
-#define VERSYM_VERSION 0x7fff 
- 
-namespace base { 
- 
-namespace { 
-template <int N> class ElfClass { 
- public: 
-  static const int kElfClass = -1; 
-  static int ElfBind(const ElfW(Sym) *) { 
-    CHECK(false); // << "Unexpected word size"; 
-    return 0; 
-  } 
-  static int ElfType(const ElfW(Sym) *) { 
-    CHECK(false); // << "Unexpected word size"; 
-    return 0; 
-  } 
-}; 
- 
-template <> class ElfClass<32> { 
- public: 
-  static const int kElfClass = ELFCLASS32; 
-  static int ElfBind(const ElfW(Sym) *symbol) { 
-    return ELF32_ST_BIND(symbol->st_info); 
-  } 
-  static int ElfType(const ElfW(Sym) *symbol) { 
-    return ELF32_ST_TYPE(symbol->st_info); 
-  } 
-}; 
- 
-template <> class ElfClass<64> { 
- public: 
-  static const int kElfClass = ELFCLASS64; 
-  static int ElfBind(const ElfW(Sym) *symbol) { 
-    return ELF64_ST_BIND(symbol->st_info); 
-  } 
-  static int ElfType(const ElfW(Sym) *symbol) { 
-    return ELF64_ST_TYPE(symbol->st_info); 
-  } 
-}; 
- 
-typedef ElfClass<__WORDSIZE> CurrentElfClass; 
- 
-// Extract an element from one of the ELF tables, cast it to desired type. 
-// This is just a simple arithmetic and a glorified cast. 
-// Callers are responsible for bounds checking. 
-template <class T> 
-const T* GetTableElement(const ElfW(Ehdr) *ehdr, 
-                         ElfW(Off) table_offset, 
-                         ElfW(Word) element_size, 
-                         size_t index) { 
-  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) 
-                                    + table_offset 
-                                    + index * element_size); 
-} 
-}  // namespace 
- 
-const void *const ElfMemImage::kInvalidBase = 
-    reinterpret_cast<const void *>(~0L); 
- 
-ElfMemImage::ElfMemImage(const void *base) { 
-  Init(base); 
-} 
- 
-int ElfMemImage::GetNumSymbols() const { 
-  if (!hash_) { 
-    return 0; 
-  } 
-  // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash 
-  return hash_[1]; 
-} 
- 
-const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { 
-  CHECK_LT(index, GetNumSymbols()); 
-  return dynsym_ + index; 
-} 
- 
-const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { 
-  CHECK_LT(index, GetNumSymbols()); 
-  return versym_ + index; 
-} 
- 
-const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { 
-  CHECK_LT(index, ehdr_->e_phnum); 
-  return GetTableElement<ElfW(Phdr)>(ehdr_, 
-                                     ehdr_->e_phoff, 
-                                     ehdr_->e_phentsize, 
-                                     index); 
-} 
- 
-const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { 
-  CHECK_LT(offset, strsize_); 
-  return dynstr_ + offset; 
-} 
- 
-const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { 
-  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { 
-    // Symbol corresponds to "special" (e.g. SHN_ABS) section. 
-    return reinterpret_cast<const void *>(sym->st_value); 
-  } 
-  CHECK_LT(link_base_, sym->st_value); 
-  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_; 
-} 
- 
-const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { 
-  CHECK_LE(index, verdefnum_); 
-  const ElfW(Verdef) *version_definition = verdef_; 
-  while (version_definition->vd_ndx < index && version_definition->vd_next) { 
-    const char *const version_definition_as_char = 
-        reinterpret_cast<const char *>(version_definition); 
-    version_definition = 
-        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + 
-                                               version_definition->vd_next); 
-  } 
-  return version_definition->vd_ndx == index ? version_definition : NULL; 
-} 
- 
-const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( 
-    const ElfW(Verdef) *verdef) const { 
-  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); 
-} 
- 
-const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { 
-  CHECK_LT(offset, strsize_); 
-  return dynstr_ + offset; 
-} 
- 
-void ElfMemImage::Init(const void *base) { 
-  ehdr_      = NULL; 
-  dynsym_    = NULL; 
-  dynstr_    = NULL; 
-  versym_    = NULL; 
-  verdef_    = NULL; 
-  hash_      = NULL; 
-  strsize_   = 0; 
-  verdefnum_ = 0; 
-  link_base_ = ~0L;  // Sentinel: PT_LOAD .p_vaddr can't possibly be this. 
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup in an in-memory Elf image.
+//
+
+#include "elf_mem_image.h"
+#include "logging.h"
+
+#ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
+
+#if defined(_musl_)
+    #include <endian.h>
+#endif
+
+#include <stddef.h>   // for size_t, ptrdiff_t
+
+// From binutils/include/elf/common.h (this doesn't appear to be documented
+// anywhere else).
+//
+//   /* This flag appears in a Versym structure.  It means that the symbol
+//      is hidden, and is only visible with an explicit version number.
+//      This is a GNU extension.  */
+//   #define VERSYM_HIDDEN           0x8000
+//
+//   /* This is the mask for the rest of the Versym information.  */
+//   #define VERSYM_VERSION          0x7fff
+
+#define VERSYM_VERSION 0x7fff
+
+namespace base {
+
+namespace {
+template <int N> class ElfClass {
+ public:
+  static const int kElfClass = -1;
+  static int ElfBind(const ElfW(Sym) *) {
+    CHECK(false); // << "Unexpected word size";
+    return 0;
+  }
+  static int ElfType(const ElfW(Sym) *) {
+    CHECK(false); // << "Unexpected word size";
+    return 0;
+  }
+};
+
+template <> class ElfClass<32> {
+ public:
+  static const int kElfClass = ELFCLASS32;
+  static int ElfBind(const ElfW(Sym) *symbol) {
+    return ELF32_ST_BIND(symbol->st_info);
+  }
+  static int ElfType(const ElfW(Sym) *symbol) {
+    return ELF32_ST_TYPE(symbol->st_info);
+  }
+};
+
+template <> class ElfClass<64> {
+ public:
+  static const int kElfClass = ELFCLASS64;
+  static int ElfBind(const ElfW(Sym) *symbol) {
+    return ELF64_ST_BIND(symbol->st_info);
+  }
+  static int ElfType(const ElfW(Sym) *symbol) {
+    return ELF64_ST_TYPE(symbol->st_info);
+  }
+};
+
+typedef ElfClass<__WORDSIZE> CurrentElfClass;
+
+// Extract an element from one of the ELF tables, cast it to desired type.
+// This is just a simple arithmetic and a glorified cast.
+// Callers are responsible for bounds checking.
+template <class T>
+const T* GetTableElement(const ElfW(Ehdr) *ehdr,
+                         ElfW(Off) table_offset,
+                         ElfW(Word) element_size,
+                         size_t index) {
+  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
+                                    + table_offset
+                                    + index * element_size);
+}
+}  // namespace
+
+const void *const ElfMemImage::kInvalidBase =
+    reinterpret_cast<const void *>(~0L);
+
+ElfMemImage::ElfMemImage(const void *base) {
+  Init(base);
+}
+
+int ElfMemImage::GetNumSymbols() const {
+  if (!hash_) {
+    return 0;
+  }
+  // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
+  return hash_[1];
+}
+
+const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
+  CHECK_LT(index, GetNumSymbols());
+  return dynsym_ + index;
+}
+
+const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
+  CHECK_LT(index, GetNumSymbols());
+  return versym_ + index;
+}
+
+const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
+  CHECK_LT(index, ehdr_->e_phnum);
+  return GetTableElement<ElfW(Phdr)>(ehdr_,
+                                     ehdr_->e_phoff,
+                                     ehdr_->e_phentsize,
+                                     index);
+}
+
+const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
+  CHECK_LT(offset, strsize_);
+  return dynstr_ + offset;
+}
+
+const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
+  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
+    // Symbol corresponds to "special" (e.g. SHN_ABS) section.
+    return reinterpret_cast<const void *>(sym->st_value);
+  }
+  CHECK_LT(link_base_, sym->st_value);
+  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_;
+}
+
+const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
+  CHECK_LE(index, verdefnum_);
+  const ElfW(Verdef) *version_definition = verdef_;
+  while (version_definition->vd_ndx < index && version_definition->vd_next) {
+    const char *const version_definition_as_char =
+        reinterpret_cast<const char *>(version_definition);
+    version_definition =
+        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
+                                               version_definition->vd_next);
+  }
+  return version_definition->vd_ndx == index ? version_definition : NULL;
+}
+
+const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
+    const ElfW(Verdef) *verdef) const {
+  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
+}
+
+const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
+  CHECK_LT(offset, strsize_);
+  return dynstr_ + offset;
+}
+
+void ElfMemImage::Init(const void *base) {
+  ehdr_      = NULL;
+  dynsym_    = NULL;
+  dynstr_    = NULL;
+  versym_    = NULL;
+  verdef_    = NULL;
+  hash_      = NULL;
+  strsize_   = 0;
+  verdefnum_ = 0;
+  link_base_ = ~0L;  // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
   if (!base || base == kInvalidBase) {
-    return; 
-  } 
-  const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base); 
-  // Fake VDSO has low bit set. 
-  const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); 
-  base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1); 
-  const char *const base_as_char = reinterpret_cast<const char *>(base); 
-  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || 
-      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { 
-    RAW_DCHECK(false, "no ELF magic"); // at %p", base); 
-    return; 
-  } 
-  int elf_class = base_as_char[EI_CLASS]; 
-  if (elf_class != CurrentElfClass::kElfClass) { 
-    DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); 
-    return; 
-  } 
-  switch (base_as_char[EI_DATA]) { 
-    case ELFDATA2LSB: { 
-      if (__LITTLE_ENDIAN != __BYTE_ORDER) { 
-        DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; 
-        return; 
-      } 
-      break; 
-    } 
-    case ELFDATA2MSB: { 
-      if (__BIG_ENDIAN != __BYTE_ORDER) { 
-        DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; 
-        return; 
-      } 
-      break; 
-    } 
-    default: { 
-      RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; 
-      return; 
-    } 
-  } 
- 
-  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); 
-  const ElfW(Phdr) *dynamic_program_header = NULL; 
-  for (int i = 0; i < ehdr_->e_phnum; ++i) { 
-    const ElfW(Phdr) *const program_header = GetPhdr(i); 
-    switch (program_header->p_type) { 
-      case PT_LOAD: 
-        if (link_base_ == ~0L) { 
-          link_base_ = program_header->p_vaddr; 
-        } 
-        break; 
-      case PT_DYNAMIC: 
-        dynamic_program_header = program_header; 
-        break; 
-    } 
-  } 
-  if (link_base_ == ~0L || !dynamic_program_header) { 
-    RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); 
-    RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); 
-    // Mark this image as not present. Can not recur infinitely. 
-    Init(0); 
-    return; 
-  } 
-  ptrdiff_t relocation = 
-      base_as_char - reinterpret_cast<const char *>(link_base_); 
-  ElfW(Dyn) *dynamic_entry = 
-      reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + 
-                                    relocation); 
-  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { 
-    ElfW(Xword) value = dynamic_entry->d_un.d_val; 
-    if (fake_vdso) { 
-      // A complication: in the real VDSO, dynamic entries are not relocated 
-      // (it wasn't loaded by a dynamic loader). But when testing with a 
-      // "fake" dlopen()ed vdso library, the loader relocates some (but 
-      // not all!) of them before we get here. 
-      if (dynamic_entry->d_tag == DT_VERDEF) { 
-        // The only dynamic entry (of the ones we care about) libc-2.3.6 
-        // loader doesn't relocate. 
-        value += relocation; 
-      } 
-    } else { 
-      // Real VDSO. Everything needs to be relocated. 
-      value += relocation; 
-    } 
-    switch (dynamic_entry->d_tag) { 
-      case DT_HASH: 
-        hash_ = reinterpret_cast<ElfW(Word) *>(value); 
-        break; 
-      case DT_SYMTAB: 
-        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); 
-        break; 
-      case DT_STRTAB: 
-        dynstr_ = reinterpret_cast<const char *>(value); 
-        break; 
-      case DT_VERSYM: 
-        versym_ = reinterpret_cast<ElfW(Versym) *>(value); 
-        break; 
-      case DT_VERDEF: 
-        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); 
-        break; 
-      case DT_VERDEFNUM: 
-        verdefnum_ = dynamic_entry->d_un.d_val; 
-        break; 
-      case DT_STRSZ: 
-        strsize_ = dynamic_entry->d_un.d_val; 
-        break; 
-      default: 
-        // Unrecognized entries explicitly ignored. 
-        break; 
-    } 
-  } 
-  if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || 
-      !verdef_ || !verdefnum_ || !strsize_) { 
-    RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); 
-    RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); 
-    RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); 
-    RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); 
-    RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); 
-    RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); 
-    RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); 
-    // Mark this image as not present. Can not recur infinitely. 
-    Init(0); 
-    return; 
-  } 
-} 
- 
-bool ElfMemImage::LookupSymbol(const char *name, 
-                               const char *version, 
-                               int type, 
-                               SymbolInfo *info) const { 
-  for (SymbolIterator it = begin(); it != end(); ++it) { 
-    if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && 
-        CurrentElfClass::ElfType(it->symbol) == type) { 
-      if (info) { 
-        *info = *it; 
-      } 
-      return true; 
-    } 
-  } 
-  return false; 
-} 
- 
-bool ElfMemImage::LookupSymbolByAddress(const void *address, 
-                                        SymbolInfo *info_out) const { 
-  for (SymbolIterator it = begin(); it != end(); ++it) { 
-    const char *const symbol_start = 
-        reinterpret_cast<const char *>(it->address); 
-    const char *const symbol_end = symbol_start + it->symbol->st_size; 
-    if (symbol_start <= address && address < symbol_end) { 
-      if (info_out) { 
-        // Client wants to know details for that symbol (the usual case). 
-        if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { 
-          // Strong symbol; just return it. 
-          *info_out = *it; 
-          return true; 
-        } else { 
-          // Weak or local. Record it, but keep looking for a strong one. 
-          *info_out = *it; 
-        } 
-      } else { 
-        // Client only cares if there is an overlapping symbol. 
-        return true; 
-      } 
-    } 
-  } 
-  return false; 
-} 
- 
-ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) 
-    : index_(index), image_(image) { 
-} 
- 
-const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { 
-  return &info_; 
-} 
- 
-const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { 
-  return info_; 
-} 
- 
-bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { 
-  return this->image_ == rhs.image_ && this->index_ == rhs.index_; 
-} 
- 
-bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { 
-  return !(*this == rhs); 
-} 
- 
-ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { 
-  this->Update(1); 
-  return *this; 
-} 
- 
-ElfMemImage::SymbolIterator ElfMemImage::begin() const { 
-  SymbolIterator it(this, 0); 
-  it.Update(0); 
-  return it; 
-} 
- 
-ElfMemImage::SymbolIterator ElfMemImage::end() const { 
-  return SymbolIterator(this, GetNumSymbols()); 
-} 
- 
-void ElfMemImage::SymbolIterator::Update(int increment) { 
-  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); 
-  CHECK(image->IsPresent() || increment == 0); 
-  if (!image->IsPresent()) { 
-    return; 
-  } 
-  index_ += increment; 
-  if (index_ >= image->GetNumSymbols()) { 
-    index_ = image->GetNumSymbols(); 
-    return; 
-  } 
-  const ElfW(Sym)    *symbol = image->GetDynsym(index_); 
-  const ElfW(Versym) *version_symbol = image->GetVersym(index_); 
-  CHECK(symbol && version_symbol); 
-  const char *const symbol_name = image->GetDynstr(symbol->st_name); 
-  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; 
-  const ElfW(Verdef) *version_definition = NULL; 
-  const char *version_name = ""; 
-  if (symbol->st_shndx == SHN_UNDEF) { 
-    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and 
-    // version_index could well be greater than verdefnum_, so calling 
-    // GetVerdef(version_index) may trigger assertion. 
-  } else { 
-    version_definition = image->GetVerdef(version_index); 
-  } 
-  if (version_definition) { 
-    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, 
-    // optional 2nd if the version has a parent. 
-    CHECK_LE(1, version_definition->vd_cnt); 
-    CHECK_LE(version_definition->vd_cnt, 2); 
-    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); 
-    version_name = image->GetVerstr(version_aux->vda_name); 
-  } 
-  info_.name    = symbol_name; 
-  info_.version = version_name; 
-  info_.address = image->GetSymAddr(symbol); 
-  info_.symbol  = symbol; 
-} 
- 
-}  // namespace base 
- 
-#endif  // HAVE_ELF_MEM_IMAGE 
+    return;
+  }
+  const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base);
+  // Fake VDSO has low bit set.
+  const bool fake_vdso = ((base_as_uintptr_t & 1) != 0);
+  base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1);
+  const char *const base_as_char = reinterpret_cast<const char *>(base);
+  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
+      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
+    RAW_DCHECK(false, "no ELF magic"); // at %p", base);
+    return;
+  }
+  int elf_class = base_as_char[EI_CLASS];
+  if (elf_class != CurrentElfClass::kElfClass) {
+    DCHECK_EQ(elf_class, CurrentElfClass::kElfClass);
+    return;
+  }
+  switch (base_as_char[EI_DATA]) {
+    case ELFDATA2LSB: {
+      if (__LITTLE_ENDIAN != __BYTE_ORDER) {
+        DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
+        return;
+      }
+      break;
+    }
+    case ELFDATA2MSB: {
+      if (__BIG_ENDIAN != __BYTE_ORDER) {
+        DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
+        return;
+      }
+      break;
+    }
+    default: {
+      RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA];
+      return;
+    }
+  }
+
+  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
+  const ElfW(Phdr) *dynamic_program_header = NULL;
+  for (int i = 0; i < ehdr_->e_phnum; ++i) {
+    const ElfW(Phdr) *const program_header = GetPhdr(i);
+    switch (program_header->p_type) {
+      case PT_LOAD:
+        if (link_base_ == ~0L) {
+          link_base_ = program_header->p_vaddr;
+        }
+        break;
+      case PT_DYNAMIC:
+        dynamic_program_header = program_header;
+        break;
+    }
+  }
+  if (link_base_ == ~0L || !dynamic_program_header) {
+    RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO");
+    RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO");
+    // Mark this image as not present. Can not recur infinitely.
+    Init(0);
+    return;
+  }
+  ptrdiff_t relocation =
+      base_as_char - reinterpret_cast<const char *>(link_base_);
+  ElfW(Dyn) *dynamic_entry =
+      reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
+                                    relocation);
+  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
+    ElfW(Xword) value = dynamic_entry->d_un.d_val;
+    if (fake_vdso) {
+      // A complication: in the real VDSO, dynamic entries are not relocated
+      // (it wasn't loaded by a dynamic loader). But when testing with a
+      // "fake" dlopen()ed vdso library, the loader relocates some (but
+      // not all!) of them before we get here.
+      if (dynamic_entry->d_tag == DT_VERDEF) {
+        // The only dynamic entry (of the ones we care about) libc-2.3.6
+        // loader doesn't relocate.
+        value += relocation;
+      }
+    } else {
+      // Real VDSO. Everything needs to be relocated.
+      value += relocation;
+    }
+    switch (dynamic_entry->d_tag) {
+      case DT_HASH:
+        hash_ = reinterpret_cast<ElfW(Word) *>(value);
+        break;
+      case DT_SYMTAB:
+        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
+        break;
+      case DT_STRTAB:
+        dynstr_ = reinterpret_cast<const char *>(value);
+        break;
+      case DT_VERSYM:
+        versym_ = reinterpret_cast<ElfW(Versym) *>(value);
+        break;
+      case DT_VERDEF:
+        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
+        break;
+      case DT_VERDEFNUM:
+        verdefnum_ = dynamic_entry->d_un.d_val;
+        break;
+      case DT_STRSZ:
+        strsize_ = dynamic_entry->d_un.d_val;
+        break;
+      default:
+        // Unrecognized entries explicitly ignored.
+        break;
+    }
+  }
+  if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
+      !verdef_ || !verdefnum_ || !strsize_) {
+    RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)");
+    RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)");
+    RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)");
+    RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)");
+    RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)");
+    RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)");
+    RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)");
+    // Mark this image as not present. Can not recur infinitely.
+    Init(0);
+    return;
+  }
+}
+
+bool ElfMemImage::LookupSymbol(const char *name,
+                               const char *version,
+                               int type,
+                               SymbolInfo *info) const {
+  for (SymbolIterator it = begin(); it != end(); ++it) {
+    if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 &&
+        CurrentElfClass::ElfType(it->symbol) == type) {
+      if (info) {
+        *info = *it;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ElfMemImage::LookupSymbolByAddress(const void *address,
+                                        SymbolInfo *info_out) const {
+  for (SymbolIterator it = begin(); it != end(); ++it) {
+    const char *const symbol_start =
+        reinterpret_cast<const char *>(it->address);
+    const char *const symbol_end = symbol_start + it->symbol->st_size;
+    if (symbol_start <= address && address < symbol_end) {
+      if (info_out) {
+        // Client wants to know details for that symbol (the usual case).
+        if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) {
+          // Strong symbol; just return it.
+          *info_out = *it;
+          return true;
+        } else {
+          // Weak or local. Record it, but keep looking for a strong one.
+          *info_out = *it;
+        }
+      } else {
+        // Client only cares if there is an overlapping symbol.
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
+    : index_(index), image_(image) {
+}
+
+const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
+  return &info_;
+}
+
+const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
+  return info_;
+}
+
+bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
+  return this->image_ == rhs.image_ && this->index_ == rhs.index_;
+}
+
+bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
+  return !(*this == rhs);
+}
+
+ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
+  this->Update(1);
+  return *this;
+}
+
+ElfMemImage::SymbolIterator ElfMemImage::begin() const {
+  SymbolIterator it(this, 0);
+  it.Update(0);
+  return it;
+}
+
+ElfMemImage::SymbolIterator ElfMemImage::end() const {
+  return SymbolIterator(this, GetNumSymbols());
+}
+
+void ElfMemImage::SymbolIterator::Update(int increment) {
+  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
+  CHECK(image->IsPresent() || increment == 0);
+  if (!image->IsPresent()) {
+    return;
+  }
+  index_ += increment;
+  if (index_ >= image->GetNumSymbols()) {
+    index_ = image->GetNumSymbols();
+    return;
+  }
+  const ElfW(Sym)    *symbol = image->GetDynsym(index_);
+  const ElfW(Versym) *version_symbol = image->GetVersym(index_);
+  CHECK(symbol && version_symbol);
+  const char *const symbol_name = image->GetDynstr(symbol->st_name);
+  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
+  const ElfW(Verdef) *version_definition = NULL;
+  const char *version_name = "";
+  if (symbol->st_shndx == SHN_UNDEF) {
+    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
+    // version_index could well be greater than verdefnum_, so calling
+    // GetVerdef(version_index) may trigger assertion.
+  } else {
+    version_definition = image->GetVerdef(version_index);
+  }
+  if (version_definition) {
+    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
+    // optional 2nd if the version has a parent.
+    CHECK_LE(1, version_definition->vd_cnt);
+    CHECK_LE(version_definition->vd_cnt, 2);
+    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
+    version_name = image->GetVerstr(version_aux->vda_name);
+  }
+  info_.name    = symbol_name;
+  info_.version = version_name;
+  info_.address = image->GetSymAddr(symbol);
+  info_.symbol  = symbol;
+}
+
+}  // namespace base
+
+#endif  // HAVE_ELF_MEM_IMAGE
diff --git a/contrib/libs/linuxvdso/original/elf_mem_image.h b/contrib/libs/linuxvdso/original/elf_mem_image.h
index b6619bcab9..580184cb37 100644
--- a/contrib/libs/linuxvdso/original/elf_mem_image.h
+++ b/contrib/libs/linuxvdso/original/elf_mem_image.h
@@ -1,135 +1,135 @@
-#pragma once 
- 
-// Copyright (c) 2008, Google Inc. 
-// All rights reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-// --- 
-// Author: Paul Pluzhnikov 
-// 
-// Allow dynamic symbol lookup for in-memory Elf images. 
- 
-#ifndef BASE_ELF_MEM_IMAGE_H_ 
-#define BASE_ELF_MEM_IMAGE_H_ 
- 
-#include "config.h" 
- 
-#include <features.h>   // for __GLIBC__ 
- 
-// Maybe one day we can rewrite this file not to require the elf 
-// symbol extensions in glibc, but for right now we need them. 
-#if (defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)) || defined(_musl_) 
- 
-#define HAVE_ELF_MEM_IMAGE 1 
- 
-#include <stdlib.h> 
-#include <link.h>  // for ElfW 
- 
-namespace base { 
- 
-// An in-memory ELF image (may not exist on disk). 
-class ElfMemImage { 
- public: 
-  // Sentinel: there could never be an elf image at this address. 
-  static const void *const kInvalidBase; 
- 
-  // Information about a single vdso symbol. 
-  // All pointers are into .dynsym, .dynstr, or .text of the VDSO. 
-  // Do not free() them or modify through them. 
-  struct SymbolInfo { 
-    const char      *name;      // E.g. "__vdso_getcpu" 
-    const char      *version;   // E.g. "LINUX_2.6", could be "" 
-                                // for unversioned symbol. 
-    const void      *address;   // Relocated symbol address. 
-    const ElfW(Sym) *symbol;    // Symbol in the dynamic symbol table. 
-  }; 
- 
-  // Supports iteration over all dynamic symbols. 
-  class SymbolIterator { 
-   public: 
-    friend class ElfMemImage; 
-    const SymbolInfo *operator->() const; 
-    const SymbolInfo &operator*() const; 
-    SymbolIterator& operator++(); 
-    bool operator!=(const SymbolIterator &rhs) const; 
-    bool operator==(const SymbolIterator &rhs) const; 
-   private: 
-    SymbolIterator(const void *const image, int index); 
-    void Update(int incr); 
-    SymbolInfo info_; 
-    int index_; 
-    const void *const image_; 
-  }; 
- 
- 
-  explicit ElfMemImage(const void *base); 
-  void                 Init(const void *base); 
-  bool                 IsPresent() const { return ehdr_ != NULL; } 
-  const ElfW(Phdr)*    GetPhdr(int index) const; 
-  const ElfW(Sym)*     GetDynsym(int index) const; 
-  const ElfW(Versym)*  GetVersym(int index) const; 
-  const ElfW(Verdef)*  GetVerdef(int index) const; 
-  const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; 
-  const char*          GetDynstr(ElfW(Word) offset) const; 
-  const void*          GetSymAddr(const ElfW(Sym) *sym) const; 
-  const char*          GetVerstr(ElfW(Word) offset) const; 
-  int                  GetNumSymbols() const; 
- 
-  SymbolIterator begin() const; 
-  SymbolIterator end() const; 
- 
-  // Look up versioned dynamic symbol in the image. 
-  // Returns false if image is not present, or doesn't contain given 
-  // symbol/version/type combination. 
-  // If info_out != NULL, additional details are filled in. 
-  bool LookupSymbol(const char *name, const char *version, 
-                    int symbol_type, SymbolInfo *info_out) const; 
- 
-  // Find info about symbol (if any) which overlaps given address. 
-  // Returns true if symbol was found; false if image isn't present 
-  // or doesn't have a symbol overlapping given address. 
-  // If info_out != NULL, additional details are filled in. 
-  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; 
- 
- private: 
-  const ElfW(Ehdr) *ehdr_; 
-  const ElfW(Sym) *dynsym_; 
-  const ElfW(Versym) *versym_; 
-  const ElfW(Verdef) *verdef_; 
-  const ElfW(Word) *hash_; 
-  const char *dynstr_; 
-  size_t strsize_; 
-  size_t verdefnum_; 
-  ElfW(Addr) link_base_;     // Link-time base (p_vaddr of first PT_LOAD). 
-}; 
- 
-}  // namespace base 
- 
-#endif  // __ELF__ and __GLIBC__ and !__native_client__ 
- 
-#endif  // BASE_ELF_MEM_IMAGE_H_ 
+#pragma once
+
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup for in-memory Elf images.
+
+#ifndef BASE_ELF_MEM_IMAGE_H_
+#define BASE_ELF_MEM_IMAGE_H_
+
+#include "config.h"
+
+#include <features.h>   // for __GLIBC__
+
+// Maybe one day we can rewrite this file not to require the elf
+// symbol extensions in glibc, but for right now we need them.
+#if (defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__)) || defined(_musl_)
+
+#define HAVE_ELF_MEM_IMAGE 1
+
+#include <stdlib.h>
+#include <link.h>  // for ElfW
+
+namespace base {
+
+// An in-memory ELF image (may not exist on disk).
+class ElfMemImage {
+ public:
+  // Sentinel: there could never be an elf image at this address.
+  static const void *const kInvalidBase;
+
+  // Information about a single vdso symbol.
+  // All pointers are into .dynsym, .dynstr, or .text of the VDSO.
+  // Do not free() them or modify through them.
+  struct SymbolInfo {
+    const char      *name;      // E.g. "__vdso_getcpu"
+    const char      *version;   // E.g. "LINUX_2.6", could be ""
+                                // for unversioned symbol.
+    const void      *address;   // Relocated symbol address.
+    const ElfW(Sym) *symbol;    // Symbol in the dynamic symbol table.
+  };
+
+  // Supports iteration over all dynamic symbols.
+  class SymbolIterator {
+   public:
+    friend class ElfMemImage;
+    const SymbolInfo *operator->() const;
+    const SymbolInfo &operator*() const;
+    SymbolIterator& operator++();
+    bool operator!=(const SymbolIterator &rhs) const;
+    bool operator==(const SymbolIterator &rhs) const;
+   private:
+    SymbolIterator(const void *const image, int index);
+    void Update(int incr);
+    SymbolInfo info_;
+    int index_;
+    const void *const image_;
+  };
+
+
+  explicit ElfMemImage(const void *base);
+  void                 Init(const void *base);
+  bool                 IsPresent() const { return ehdr_ != NULL; }
+  const ElfW(Phdr)*    GetPhdr(int index) const;
+  const ElfW(Sym)*     GetDynsym(int index) const;
+  const ElfW(Versym)*  GetVersym(int index) const;
+  const ElfW(Verdef)*  GetVerdef(int index) const;
+  const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
+  const char*          GetDynstr(ElfW(Word) offset) const;
+  const void*          GetSymAddr(const ElfW(Sym) *sym) const;
+  const char*          GetVerstr(ElfW(Word) offset) const;
+  int                  GetNumSymbols() const;
+
+  SymbolIterator begin() const;
+  SymbolIterator end() const;
+
+  // Look up versioned dynamic symbol in the image.
+  // Returns false if image is not present, or doesn't contain given
+  // symbol/version/type combination.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbol(const char *name, const char *version,
+                    int symbol_type, SymbolInfo *info_out) const;
+
+  // Find info about symbol (if any) which overlaps given address.
+  // Returns true if symbol was found; false if image isn't present
+  // or doesn't have a symbol overlapping given address.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
+
+ private:
+  const ElfW(Ehdr) *ehdr_;
+  const ElfW(Sym) *dynsym_;
+  const ElfW(Versym) *versym_;
+  const ElfW(Verdef) *verdef_;
+  const ElfW(Word) *hash_;
+  const char *dynstr_;
+  size_t strsize_;
+  size_t verdefnum_;
+  ElfW(Addr) link_base_;     // Link-time base (p_vaddr of first PT_LOAD).
+};
+
+}  // namespace base
+
+#endif  // __ELF__ and __GLIBC__ and !__native_client__
+
+#endif  // BASE_ELF_MEM_IMAGE_H_
diff --git a/contrib/libs/linuxvdso/original/linux_syscall_support.h b/contrib/libs/linuxvdso/original/linux_syscall_support.h
index 37d182ad3c..0edb951c54 100644
--- a/contrib/libs/linuxvdso/original/linux_syscall_support.h
+++ b/contrib/libs/linuxvdso/original/linux_syscall_support.h
@@ -1,1644 +1,1644 @@
-#pragma once 
- 
-/* Copyright (c) 2005-2008, Google Inc. 
- * All rights reserved. 
- * 
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are 
- * met: 
- * 
- *     * Redistributions of source code must retain the above copyright 
- * notice, this list of conditions and the following disclaimer. 
- *     * Redistributions in binary form must reproduce the above 
- * copyright notice, this list of conditions and the following disclaimer 
- * in the documentation and/or other materials provided with the 
- * distribution. 
- *     * Neither the name of Google Inc. nor the names of its 
- * contributors may be used to endorse or promote products derived from 
- * this software without specific prior written permission. 
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- * 
- * --- 
- * Author: Markus Gutschke 
- */ 
- 
-/* This file includes Linux-specific support functions common to the 
- * coredumper and the thread lister; primarily, this is a collection 
- * of direct system calls, and a couple of symbols missing from 
- * standard header files. 
- * There are a few options that the including file can set to control 
- * the behavior of this file: 
- * 
- * SYS_CPLUSPLUS: 
- *   The entire header file will normally be wrapped in 'extern "C" { }", 
- *   making it suitable for compilation as both C and C++ source. If you 
- *   do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit 
- *   the wrapping. N.B. doing so will suppress inclusion of all prerequisite 
- *   system header files, too. It is the caller's responsibility to provide 
- *   the necessary definitions. 
- * 
- * SYS_ERRNO: 
- *   All system calls will update "errno" unless overriden by setting the 
- *   SYS_ERRNO macro prior to including this file. SYS_ERRNO should be 
- *   an l-value. 
- * 
- * SYS_INLINE: 
- *   New symbols will be defined "static inline", unless overridden by 
- *   the SYS_INLINE macro. 
- * 
- * SYS_LINUX_SYSCALL_SUPPORT_H 
- *   This macro is used to avoid multiple inclusions of this header file. 
- *   If you need to include this file more than once, make sure to 
- *   unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. 
- * 
- * SYS_PREFIX: 
- *   New system calls will have a prefix of "sys_" unless overridden by 
- *   the SYS_PREFIX macro. Valid values for this macro are [0..9] which 
- *   results in prefixes "sys[0..9]_". It is also possible to set this 
- *   macro to -1, which avoids all prefixes. 
- * 
- * This file defines a few internal symbols that all start with "LSS_". 
- * Do not access these symbols from outside this file. They are not part 
- * of the supported API. 
- * 
- * NOTE: This is a stripped down version of the official opensource 
- * version of linux_syscall_support.h, which lives at 
- *    http://code.google.com/p/linux-syscall-support/ 
- * It includes only the syscalls that are used in perftools, plus a 
- * few extra.  Here's the breakdown: 
- * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u 
- *      sys__exit( 
- *      sys_clone( 
- *      sys_close( 
- *      sys_fcntl( 
- *      sys_fstat( 
- *      sys_futex( 
- *      sys_futex1( 
- *      sys_getcpu( 
- *      sys_getdents( 
- *      sys_getppid( 
- *      sys_gettid( 
- *      sys_lseek( 
- *      sys_mmap( 
- *      sys_mremap( 
- *      sys_munmap( 
- *      sys_open( 
- *      sys_pipe( 
- *      sys_prctl( 
- *      sys_ptrace( 
- *      sys_ptrace_detach( 
- *      sys_read( 
- *      sys_sched_yield( 
- *      sys_sigaction( 
- *      sys_sigaltstack( 
- *      sys_sigdelset( 
- *      sys_sigfillset( 
- *      sys_sigprocmask( 
- *      sys_socket( 
- *      sys_stat( 
- *      sys_waitpid( 
- * 2) These are used as subroutines of the above: 
- *      sys_getpid       -- gettid 
- *      sys_kill         -- ptrace_detach 
- *      sys_restore      -- sigaction 
- *      sys_restore_rt   -- sigaction 
- *      sys_socketcall   -- socket 
- *      sys_wait4        -- waitpid 
- * 3) I left these in even though they're not used.  They either 
- * complement the above (write vs read) or are variants (rt_sigaction): 
- *      sys_fstat64 
- *      sys_getdents64 
- *      sys_llseek 
- *      sys_mmap2 
- *      sys_openat 
- *      sys_rt_sigaction 
- *      sys_rt_sigprocmask 
- *      sys_sigaddset 
- *      sys_sigemptyset 
- *      sys_stat64 
- *      sys_write 
- */ 
-#ifndef SYS_LINUX_SYSCALL_SUPPORT_H 
-#define SYS_LINUX_SYSCALL_SUPPORT_H 
- 
-/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux. 
- * Porting to other related platforms should not be difficult. 
- */ 
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ 
-     defined(__mips__) || defined(__PPC__)) && defined(__linux) 
- 
-#ifndef SYS_CPLUSPLUS 
-#ifdef __cplusplus 
-/* Some system header files in older versions of gcc neglect to properly 
- * handle being included from C++. As it appears to be harmless to have 
- * multiple nested 'extern "C"' blocks, just add another one here. 
- */ 
-extern "C" { 
-#endif 
- 
-#include <errno.h> 
-#include <signal.h> 
-#include <stdarg.h> 
-#include <string.h> 
-#include <sys/ptrace.h> 
-#include <sys/resource.h> 
-#include <sys/time.h> 
-#include <sys/types.h> 
-#include <syscall.h> 
-#include <unistd.h> 
-#include <linux/unistd.h> 
-#include <endian.h> 
- 
-#ifdef __mips__ 
-/* Include definitions of the ABI currently in use.                          */ 
-#include <sgidefs.h> 
-#endif 
- 
-#endif 
- 
-/* As glibc often provides subtly incompatible data structures (and implicit 
- * wrapper functions that convert them), we provide our own kernel data 
- * structures for use by the system calls. 
- * These structures have been developed by using Linux 2.6.23 headers for 
- * reference. Note though, we do not care about exact API compatibility 
- * with the kernel, and in fact the kernel often does not have a single 
- * API that works across architectures. Instead, we try to mimic the glibc 
- * API where reasonable, and only guarantee ABI compatibility with the 
- * kernel headers. 
- * Most notably, here are a few changes that were made to the structures 
- * defined by kernel headers: 
- * 
- * - we only define structures, but not symbolic names for kernel data 
- *   types. For the latter, we directly use the native C datatype 
- *   (i.e. "unsigned" instead of "mode_t"). 
- * - in a few cases, it is possible to define identical structures for 
- *   both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by 
- *   standardizing on the 64bit version of the data types. In particular, 
- *   this means that we use "unsigned" where the 32bit headers say 
- *   "unsigned long". 
- * - overall, we try to minimize the number of cases where we need to 
- *   conditionally define different structures. 
- * - the "struct kernel_sigaction" class of structures have been 
- *   modified to more closely mimic glibc's API by introducing an 
- *   anonymous union for the function pointer. 
- * - a small number of field names had to have an underscore appended to 
- *   them, because glibc defines a global macro by the same name. 
- */ 
- 
-/* include/linux/dirent.h                                                    */ 
-struct kernel_dirent64 { 
-  unsigned long long d_ino; 
-  long long          d_off; 
-  unsigned short     d_reclen; 
-  unsigned char      d_type; 
-  char               d_name[256]; 
-}; 
- 
-/* include/linux/dirent.h                                                    */ 
-struct kernel_dirent { 
-  long               d_ino; 
-  long               d_off; 
-  unsigned short     d_reclen; 
-  char               d_name[256]; 
-}; 
- 
-/* include/linux/time.h                                                      */ 
-struct kernel_timespec { 
-  long               tv_sec; 
-  long               tv_nsec; 
-}; 
- 
-/* include/linux/time.h                                                      */ 
-struct kernel_timeval { 
-  long               tv_sec; 
-  long               tv_usec; 
-}; 
- 
-/* include/linux/resource.h                                                  */ 
-struct kernel_rusage { 
-  struct kernel_timeval ru_utime; 
-  struct kernel_timeval ru_stime; 
-  long               ru_maxrss; 
-  long               ru_ixrss; 
-  long               ru_idrss; 
-  long               ru_isrss; 
-  long               ru_minflt; 
-  long               ru_majflt; 
-  long               ru_nswap; 
-  long               ru_inblock; 
-  long               ru_oublock; 
-  long               ru_msgsnd; 
-  long               ru_msgrcv; 
-  long               ru_nsignals; 
-  long               ru_nvcsw; 
-  long               ru_nivcsw; 
-}; 
- 
-struct siginfo; 
-#if defined(__i386__) || defined(__arm__) || defined(__PPC__) 
- 
-/* include/asm-{arm,i386,mips,ppc}/signal.h                                  */ 
-struct kernel_old_sigaction { 
-  union { 
-    void             (*sa_handler_)(int); 
-    void             (*sa_sigaction_)(int, struct siginfo *, void *); 
-  }; 
-  unsigned long      sa_mask; 
-  unsigned long      sa_flags; 
-  void               (*sa_restorer)(void); 
-} __attribute__((packed,aligned(4))); 
-#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) 
-  #define kernel_old_sigaction kernel_sigaction 
-#endif 
- 
-/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the 
- * exactly match the size of the signal set, even though the API was 
- * intended to be extensible. We define our own KERNEL_NSIG to deal with 
- * this. 
- * Please note that glibc provides signals [1.._NSIG-1], whereas the 
- * kernel (and this header) provides the range [1..KERNEL_NSIG]. The 
- * actual number of signals is obviously the same, but the constants 
- * differ by one. 
- */ 
-#ifdef __mips__ 
-#define KERNEL_NSIG 128 
-#else 
-#define KERNEL_NSIG  64 
-#endif 
- 
-/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */ 
-struct kernel_sigset_t { 
-  unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ 
-                    (8*sizeof(unsigned long))]; 
-}; 
- 
-/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h                           */ 
-struct kernel_sigaction { 
-#ifdef __mips__ 
-  unsigned long      sa_flags; 
-  union { 
-    void             (*sa_handler_)(int); 
-    void             (*sa_sigaction_)(int, struct siginfo *, void *); 
-  }; 
-  struct kernel_sigset_t sa_mask; 
-#else 
-  union { 
-    void             (*sa_handler_)(int); 
-    void             (*sa_sigaction_)(int, struct siginfo *, void *); 
-  }; 
-  unsigned long      sa_flags; 
-  void               (*sa_restorer)(void); 
-  struct kernel_sigset_t sa_mask; 
-#endif 
-}; 
- 
-/* include/asm-{arm,i386,mips,ppc}/stat.h                                    */ 
-#ifdef __mips__ 
-#if _MIPS_SIM == _MIPS_SIM_ABI64 
-struct kernel_stat { 
-#else 
-struct kernel_stat64 { 
-#endif 
-  unsigned           st_dev; 
-  unsigned           __pad0[3]; 
-  unsigned long long st_ino; 
-  unsigned           st_mode; 
-  unsigned           st_nlink; 
-  unsigned           st_uid; 
-  unsigned           st_gid; 
-  unsigned           st_rdev; 
-  unsigned           __pad1[3]; 
-  long long          st_size; 
-  unsigned           st_atime_; 
-  unsigned           st_atime_nsec_; 
-  unsigned           st_mtime_; 
-  unsigned           st_mtime_nsec_; 
-  unsigned           st_ctime_; 
-  unsigned           st_ctime_nsec_; 
-  unsigned           st_blksize; 
-  unsigned           __pad2; 
-  unsigned long long st_blocks; 
-}; 
-#elif defined __PPC__ 
-struct kernel_stat64 { 
-  unsigned long long st_dev; 
-  unsigned long long st_ino; 
-  unsigned           st_mode; 
-  unsigned           st_nlink; 
-  unsigned           st_uid; 
-  unsigned           st_gid; 
-  unsigned long long st_rdev; 
-  unsigned short int __pad2; 
-  long long          st_size; 
-  long               st_blksize; 
-  long long          st_blocks; 
-  long               st_atime_; 
-  unsigned long      st_atime_nsec_; 
-  long               st_mtime_; 
-  unsigned long      st_mtime_nsec_; 
-  long               st_ctime_; 
-  unsigned long      st_ctime_nsec_; 
-  unsigned long      __unused4; 
-  unsigned long      __unused5; 
-}; 
-#else 
-struct kernel_stat64 { 
-  unsigned long long st_dev; 
-  unsigned char      __pad0[4]; 
-  unsigned           __st_ino; 
-  unsigned           st_mode; 
-  unsigned           st_nlink; 
-  unsigned           st_uid; 
-  unsigned           st_gid; 
-  unsigned long long st_rdev; 
-  unsigned char      __pad3[4]; 
-  long long          st_size; 
-  unsigned           st_blksize; 
-  unsigned long long st_blocks; 
-  unsigned           st_atime_; 
-  unsigned           st_atime_nsec_; 
-  unsigned           st_mtime_; 
-  unsigned           st_mtime_nsec_; 
-  unsigned           st_ctime_; 
-  unsigned           st_ctime_nsec_; 
-  unsigned long long st_ino; 
-}; 
-#endif 
- 
-/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h                             */ 
-#if defined(__i386__) || defined(__arm__) 
-struct kernel_stat { 
-  /* The kernel headers suggest that st_dev and st_rdev should be 32bit 
-   * quantities encoding 12bit major and 20bit minor numbers in an interleaved 
-   * format. In reality, we do not see useful data in the top bits. So, 
-   * we'll leave the padding in here, until we find a better solution. 
-   */ 
-  unsigned short     st_dev; 
-  short              pad1; 
-  unsigned           st_ino; 
-  unsigned short     st_mode; 
-  unsigned short     st_nlink; 
-  unsigned short     st_uid; 
-  unsigned short     st_gid; 
-  unsigned short     st_rdev; 
-  short              pad2; 
-  unsigned           st_size; 
-  unsigned           st_blksize; 
-  unsigned           st_blocks; 
-  unsigned           st_atime_; 
-  unsigned           st_atime_nsec_; 
-  unsigned           st_mtime_; 
-  unsigned           st_mtime_nsec_; 
-  unsigned           st_ctime_; 
-  unsigned           st_ctime_nsec_; 
-  unsigned           __unused4; 
-  unsigned           __unused5; 
-}; 
-#elif defined(__x86_64__) 
-struct kernel_stat { 
-  unsigned long      st_dev; 
-  unsigned long      st_ino; 
-  unsigned long      st_nlink; 
-  unsigned           st_mode; 
-  unsigned           st_uid; 
-  unsigned           st_gid; 
-  unsigned           __pad0; 
-  unsigned long      st_rdev; 
-  long               st_size; 
-  long               st_blksize; 
-  long               st_blocks; 
-  unsigned long      st_atime_; 
-  unsigned long      st_atime_nsec_; 
-  unsigned long      st_mtime_; 
-  unsigned long      st_mtime_nsec_; 
-  unsigned long      st_ctime_; 
-  unsigned long      st_ctime_nsec_; 
-  long               __unused[3]; 
-}; 
-#elif defined(__PPC__) 
-struct kernel_stat { 
-  unsigned           st_dev; 
-  unsigned long      st_ino;      // ino_t 
-  unsigned long      st_mode;     // mode_t 
-  unsigned short     st_nlink;    // nlink_t 
-  unsigned           st_uid;      // uid_t 
-  unsigned           st_gid;      // gid_t 
-  unsigned           st_rdev; 
-  long               st_size;     // off_t 
-  unsigned long      st_blksize; 
-  unsigned long      st_blocks; 
-  unsigned long      st_atime_; 
-  unsigned long      st_atime_nsec_; 
-  unsigned long      st_mtime_; 
-  unsigned long      st_mtime_nsec_; 
-  unsigned long      st_ctime_; 
-  unsigned long      st_ctime_nsec_; 
-  unsigned long      __unused4; 
-  unsigned long      __unused5; 
-}; 
-#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) 
-struct kernel_stat { 
-  unsigned           st_dev; 
-  int                st_pad1[3]; 
-  unsigned           st_ino; 
-  unsigned           st_mode; 
-  unsigned           st_nlink; 
-  unsigned           st_uid; 
-  unsigned           st_gid; 
-  unsigned           st_rdev; 
-  int                st_pad2[2]; 
-  long               st_size; 
-  int                st_pad3; 
-  long               st_atime_; 
-  long               st_atime_nsec_; 
-  long               st_mtime_; 
-  long               st_mtime_nsec_; 
-  long               st_ctime_; 
-  long               st_ctime_nsec_; 
-  int                st_blksize; 
-  int                st_blocks; 
-  int                st_pad4[14]; 
-}; 
-#endif 
- 
- 
-/* Definitions missing from the standard header files                        */ 
-#ifndef O_DIRECTORY 
-#if defined(__arm__) 
-#define O_DIRECTORY             0040000 
-#else 
-#define O_DIRECTORY             0200000 
-#endif 
-#endif 
-#ifndef PR_GET_DUMPABLE 
-#define PR_GET_DUMPABLE         3 
-#endif 
-#ifndef PR_SET_DUMPABLE 
-#define PR_SET_DUMPABLE         4 
-#endif 
-#ifndef AT_FDCWD 
-#define AT_FDCWD                (-100) 
-#endif 
-#ifndef AT_SYMLINK_NOFOLLOW 
-#define AT_SYMLINK_NOFOLLOW     0x100 
-#endif 
-#ifndef AT_REMOVEDIR 
-#define AT_REMOVEDIR            0x200 
-#endif 
-#ifndef MREMAP_FIXED 
-#define MREMAP_FIXED            2 
-#endif 
-#ifndef SA_RESTORER 
-#define SA_RESTORER             0x04000000 
-#endif 
- 
-#if defined(__i386__) 
-#ifndef __NR_rt_sigaction 
-#define __NR_rt_sigaction       174 
-#define __NR_rt_sigprocmask     175 
-#endif 
-#ifndef __NR_stat64 
-#define __NR_stat64             195 
-#endif 
-#ifndef __NR_fstat64 
-#define __NR_fstat64            197 
-#endif 
-#ifndef __NR_getdents64 
-#define __NR_getdents64         220 
-#endif 
-#ifndef __NR_gettid 
-#define __NR_gettid             224 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              240 
-#endif 
-#ifndef __NR_openat 
-#define __NR_openat             295 
-#endif 
-#ifndef __NR_getcpu 
-#define __NR_getcpu             318 
-#endif 
-/* End of i386 definitions                                                   */ 
-#elif defined(__arm__) 
-#ifndef __syscall 
-#if defined(__thumb__) || defined(__ARM_EABI__) 
-#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; 
-#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs 
-#define __syscall(name) "swi\t0" 
-#define __syscall_safe(name)                     \ 
-  "push  {r7}\n"                                 \ 
-  "mov   r7,%[sysreg]\n"                         \ 
-  __syscall(name)"\n"                            \ 
-  "pop   {r7}" 
-#else 
-#define __SYS_REG(name) 
-#define __SYS_REG_LIST(regs...) regs 
-#define __syscall(name) "swi\t" __sys1(__NR_##name) "" 
-#define __syscall_safe(name) __syscall(name) 
-#endif 
-#endif 
-#ifndef __NR_rt_sigaction 
-#define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174) 
-#define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175) 
-#endif 
-#ifndef __NR_stat64 
-#define __NR_stat64             (__NR_SYSCALL_BASE + 195) 
-#endif 
-#ifndef __NR_fstat64 
-#define __NR_fstat64            (__NR_SYSCALL_BASE + 197) 
-#endif 
-#ifndef __NR_getdents64 
-#define __NR_getdents64         (__NR_SYSCALL_BASE + 217) 
-#endif 
-#ifndef __NR_gettid 
-#define __NR_gettid             (__NR_SYSCALL_BASE + 224) 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              (__NR_SYSCALL_BASE + 240) 
-#endif 
-/* End of ARM definitions                                                  */ 
-#elif defined(__x86_64__) 
-#ifndef __NR_gettid 
-#define __NR_gettid             186 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              202 
-#endif 
-#ifndef __NR_getdents64 
-#define __NR_getdents64         217 
-#endif 
-#ifndef __NR_openat 
-#define __NR_openat             257 
-#endif 
-/* End of x86-64 definitions                                                 */ 
-#elif defined(__mips__) 
-#if _MIPS_SIM == _MIPS_SIM_ABI32 
-#ifndef __NR_rt_sigaction 
-#define __NR_rt_sigaction       (__NR_Linux + 194) 
-#define __NR_rt_sigprocmask     (__NR_Linux + 195) 
-#endif 
-#ifndef __NR_stat64 
-#define __NR_stat64             (__NR_Linux + 213) 
-#endif 
-#ifndef __NR_fstat64 
-#define __NR_fstat64            (__NR_Linux + 215) 
-#endif 
-#ifndef __NR_getdents64 
-#define __NR_getdents64         (__NR_Linux + 219) 
-#endif 
-#ifndef __NR_gettid 
-#define __NR_gettid             (__NR_Linux + 222) 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              (__NR_Linux + 238) 
-#endif 
-#ifndef __NR_openat 
-#define __NR_openat             (__NR_Linux + 288) 
-#endif 
-#ifndef __NR_fstatat 
-#define __NR_fstatat            (__NR_Linux + 293) 
-#endif 
-#ifndef __NR_getcpu 
-#define __NR_getcpu             (__NR_Linux + 312) 
-#endif 
-/* End of MIPS (old 32bit API) definitions */ 
-#elif  _MIPS_SIM == _MIPS_SIM_ABI64 
-#ifndef __NR_gettid 
-#define __NR_gettid             (__NR_Linux + 178) 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              (__NR_Linux + 194) 
-#endif 
-#ifndef __NR_openat 
-#define __NR_openat             (__NR_Linux + 247) 
-#endif 
-#ifndef __NR_fstatat 
-#define __NR_fstatat            (__NR_Linux + 252) 
-#endif 
-#ifndef __NR_getcpu 
-#define __NR_getcpu             (__NR_Linux + 271) 
-#endif 
-/* End of MIPS (64bit API) definitions */ 
-#else 
-#ifndef __NR_gettid 
-#define __NR_gettid             (__NR_Linux + 178) 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              (__NR_Linux + 194) 
-#endif 
-#ifndef __NR_openat 
-#define __NR_openat             (__NR_Linux + 251) 
-#endif 
-#ifndef __NR_fstatat 
-#define __NR_fstatat            (__NR_Linux + 256) 
-#endif 
-#ifndef __NR_getcpu 
-#define __NR_getcpu             (__NR_Linux + 275) 
-#endif 
-/* End of MIPS (new 32bit API) definitions                                   */ 
-#endif 
-/* End of MIPS definitions                                                   */ 
-#elif defined(__PPC__) 
-#ifndef __NR_rt_sigaction 
-#define __NR_rt_sigaction       173 
-#define __NR_rt_sigprocmask     174 
-#endif 
-#ifndef __NR_stat64 
-#define __NR_stat64             195 
-#endif 
-#ifndef __NR_fstat64 
-#define __NR_fstat64            197 
-#endif 
-#ifndef __NR_getdents64 
-#define __NR_getdents64         202 
-#endif 
-#ifndef __NR_gettid 
-#define __NR_gettid             207 
-#endif 
-#ifndef __NR_futex 
-#define __NR_futex              221 
-#endif 
-#ifndef __NR_openat 
-#define __NR_openat             286 
-#endif 
-#ifndef __NR_getcpu 
-#define __NR_getcpu             302 
-#endif 
-/* End of powerpc defininitions                                              */ 
-#endif 
- 
- 
-/* After forking, we must make sure to only call system calls.               */ 
-#if __BOUNDED_POINTERS__ 
-  #error "Need to port invocations of syscalls for bounded ptrs" 
-#else 
-  /* The core dumper and the thread lister get executed after threads 
-   * have been suspended. As a consequence, we cannot call any functions 
-   * that acquire locks. Unfortunately, libc wraps most system calls 
-   * (e.g. in order to implement pthread_atfork, and to make calls 
-   * cancellable), which means we cannot call these functions. Instead, 
-   * we have to call syscall() directly. 
-   */ 
-  #undef LSS_ERRNO 
-  #ifdef SYS_ERRNO 
-    /* Allow the including file to override the location of errno. This can 
-     * be useful when using clone() with the CLONE_VM option. 
-     */ 
-    #define LSS_ERRNO SYS_ERRNO 
-  #else 
-    #define LSS_ERRNO errno 
-  #endif 
- 
-  #undef LSS_INLINE 
-  #ifdef SYS_INLINE 
-    #define LSS_INLINE SYS_INLINE 
-  #else 
-    #define LSS_INLINE static inline 
-  #endif 
- 
-  /* Allow the including file to override the prefix used for all new 
-   * system calls. By default, it will be set to "sys_". 
-   */ 
-  #undef LSS_NAME 
-  #ifndef SYS_PREFIX 
-    #define LSS_NAME(name) sys_##name 
-  #elif SYS_PREFIX < 0 
-    #define LSS_NAME(name) name 
-  #elif SYS_PREFIX == 0 
-    #define LSS_NAME(name) sys0_##name 
-  #elif SYS_PREFIX == 1 
-    #define LSS_NAME(name) sys1_##name 
-  #elif SYS_PREFIX == 2 
-    #define LSS_NAME(name) sys2_##name 
-  #elif SYS_PREFIX == 3 
-    #define LSS_NAME(name) sys3_##name 
-  #elif SYS_PREFIX == 4 
-    #define LSS_NAME(name) sys4_##name 
-  #elif SYS_PREFIX == 5 
-    #define LSS_NAME(name) sys5_##name 
-  #elif SYS_PREFIX == 6 
-    #define LSS_NAME(name) sys6_##name 
-  #elif SYS_PREFIX == 7 
-    #define LSS_NAME(name) sys7_##name 
-  #elif SYS_PREFIX == 8 
-    #define LSS_NAME(name) sys8_##name 
-  #elif SYS_PREFIX == 9 
-    #define LSS_NAME(name) sys9_##name 
-  #endif 
- 
-  #undef  LSS_RETURN 
-  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__)) 
-  /* Failing system calls return a negative result in the range of 
-   * -1..-4095. These are "errno" values with the sign inverted. 
-   */ 
-  #define LSS_RETURN(type, res)                                               \ 
-    do {                                                                      \ 
-      if ((unsigned long)(res) >= (unsigned long)(-4095)) {                   \ 
-        LSS_ERRNO = -(res);                                                   \ 
-        res = -1;                                                             \ 
-      }                                                                       \ 
-      return (type) (res);                                                    \ 
-    } while (0) 
-  #elif defined(__mips__) 
-  /* On MIPS, failing system calls return -1, and set errno in a 
-   * separate CPU register. 
-   */ 
-  #define LSS_RETURN(type, res, err)                                          \ 
-    do {                                                                      \ 
-      if (err) {                                                              \ 
-        LSS_ERRNO = (res);                                                    \ 
-        res = -1;                                                             \ 
-      }                                                                       \ 
-      return (type) (res);                                                    \ 
-    } while (0) 
-  #elif defined(__PPC__) 
-  /* On PPC, failing system calls return -1, and set errno in a 
-   * separate CPU register. See linux/unistd.h. 
-   */ 
-  #define LSS_RETURN(type, res, err)                                          \ 
-   do {                                                                       \ 
-     if (err & 0x10000000 ) {                                                 \ 
-       LSS_ERRNO = (res);                                                     \ 
-       res = -1;                                                              \ 
-     }                                                                        \ 
-     return (type) (res);                                                     \ 
-   } while (0) 
-  #endif 
-  #if defined(__i386__) 
-    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) 
-      /* This only works for GCC-4.4 and above -- the first version to use 
-         .cfi directives for dwarf unwind info.  */ 
-      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \ 
-                  ".cfi_adjust_cfa_offset " #adjust "\n" 
-    #else 
-      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ 
-    #endif 
- 
-    /* In PIC mode (e.g. when building shared libraries), gcc for i386 
-     * reserves ebx. Unfortunately, most distribution ship with implementations 
-     * of _syscallX() which clobber ebx. 
-     * Also, most definitions of _syscallX() neglect to mark "memory" as being 
-     * clobbered. This causes problems with compilers, that do a better job 
-     * at optimizing across __asm__ calls. 
-     * So, we just have to redefine all of the _syscallX() macros. 
-     */ 
-    #undef  LSS_BODY 
-    #define LSS_BODY(type,args...)                                            \ 
-      long __res;                                                             \ 
-      __asm__ __volatile__("push %%ebx\n"                                     \ 
-                           CFI_ADJUST_CFA_OFFSET(4)                           \ 
-                           "movl %2,%%ebx\n"                                  \ 
-                           "int $0x80\n"                                      \ 
-                           "pop %%ebx\n"                                      \ 
-                           CFI_ADJUST_CFA_OFFSET(-4)                          \ 
-                           args                                               \ 
-                           : "esp", "memory");                                \ 
-      LSS_RETURN(type,__res) 
-    #undef  _syscall0 
-    #define _syscall0(type,name)                                              \ 
-      type LSS_NAME(name)(void) {                                             \ 
-        long __res;                                                           \ 
-        __asm__ volatile("int $0x80"                                          \ 
-                         : "=a" (__res)                                       \ 
-                         : "0" (__NR_##name)                                  \ 
-                         : "memory");                                         \ 
-        LSS_RETURN(type,__res);                                               \ 
-      } 
-    #undef  _syscall1 
-    #define _syscall1(type,name,type1,arg1)                                   \ 
-      type LSS_NAME(name)(type1 arg1) {                                       \ 
-        LSS_BODY(type,                                                        \ 
-             : "=a" (__res)                                                   \ 
-             : "0" (__NR_##name), "ri" ((long)(arg1)));                       \ 
-      } 
-    #undef  _syscall2 
-    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \ 
-      type LSS_NAME(name)(type1 arg1,type2 arg2) {                            \ 
-        LSS_BODY(type,                                                        \ 
-             : "=a" (__res)                                                   \ 
-             : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2)));    \ 
-      } 
-    #undef  _syscall3 
-    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \ 
-      type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) {                 \ 
-        LSS_BODY(type,                                                        \ 
-             : "=a" (__res)                                                   \ 
-             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \ 
-               "d" ((long)(arg3)));                                           \ 
-      } 
-    #undef  _syscall4 
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \ 
-        LSS_BODY(type,                                                        \ 
-             : "=a" (__res)                                                   \ 
-             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \ 
-               "d" ((long)(arg3)),"S" ((long)(arg4)));                        \ 
-      } 
-    #undef  _syscall5 
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5)                                             \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5) {                                       \ 
-        long __res;                                                           \ 
-        __asm__ __volatile__("push %%ebx\n"                                   \ 
-                             "movl %2,%%ebx\n"                                \ 
-                             "movl %1,%%eax\n"                                \ 
-                             "int  $0x80\n"                                   \ 
-                             "pop  %%ebx"                                     \ 
-                             : "=a" (__res)                                   \ 
-                             : "i" (__NR_##name), "ri" ((long)(arg1)),        \ 
-                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \ 
-                               "S" ((long)(arg4)), "D" ((long)(arg5))         \ 
-                             : "esp", "memory");                              \ 
-        LSS_RETURN(type,__res);                                               \ 
-      } 
-    #undef  _syscall6 
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5,type6,arg6)                                  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5, type6 arg6) {                           \ 
-        long __res;                                                           \ 
-        struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 };   \ 
-        __asm__ __volatile__("push %%ebp\n"                                   \ 
-                             "push %%ebx\n"                                   \ 
-                             "movl 4(%2),%%ebp\n"                             \ 
-                             "movl 0(%2), %%ebx\n"                            \ 
-                             "movl %1,%%eax\n"                                \ 
-                             "int  $0x80\n"                                   \ 
-                             "pop  %%ebx\n"                                   \ 
-                             "pop  %%ebp"                                     \ 
-                             : "=a" (__res)                                   \ 
-                             : "i" (__NR_##name),  "0" ((long)(&__s)),        \ 
-                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \ 
-                               "S" ((long)(arg4)), "D" ((long)(arg5))         \ 
-                             : "esp", "memory");                              \ 
-        LSS_RETURN(type,__res);                                               \ 
-      } 
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, 
-                                   int flags, void *arg, int *parent_tidptr, 
-                                   void *newtls, int *child_tidptr) { 
-      long __res; 
-      __asm__ __volatile__(/* if (fn == NULL) 
-                            *   return -EINVAL; 
-                            */ 
-                           "movl   %3,%%ecx\n" 
-                           "jecxz  1f\n" 
- 
-                           /* if (child_stack == NULL) 
-                            *   return -EINVAL; 
-                            */ 
-                           "movl   %4,%%ecx\n" 
-                           "jecxz  1f\n" 
- 
-                           /* Set up alignment of the child stack: 
-                            * child_stack = (child_stack & ~0xF) - 20; 
-                            */ 
-                           "andl   $-16,%%ecx\n" 
-                           "subl   $20,%%ecx\n" 
- 
-                           /* Push "arg" and "fn" onto the stack that will be 
-                            * used by the child. 
-                            */ 
-                           "movl   %6,%%eax\n" 
-                           "movl   %%eax,4(%%ecx)\n" 
-                           "movl   %3,%%eax\n" 
-                           "movl   %%eax,(%%ecx)\n" 
- 
-                           /* %eax = syscall(%eax = __NR_clone, 
-                            *                %ebx = flags, 
-                            *                %ecx = child_stack, 
-                            *                %edx = parent_tidptr, 
-                            *                %esi = newtls, 
-                            *                %edi = child_tidptr) 
-                            * Also, make sure that %ebx gets preserved as it is 
-                            * used in PIC mode. 
-                            */ 
-                           "movl   %8,%%esi\n" 
-                           "movl   %7,%%edx\n" 
-                           "movl   %5,%%eax\n" 
-                           "movl   %9,%%edi\n" 
-                           "pushl  %%ebx\n" 
-                           "movl   %%eax,%%ebx\n" 
-                           "movl   %2,%%eax\n" 
-                           "int    $0x80\n" 
- 
-                           /* In the parent: restore %ebx 
-                            * In the child:  move "fn" into %ebx 
-                            */ 
-                           "popl   %%ebx\n" 
- 
-                           /* if (%eax != 0) 
-                            *   return %eax; 
-                            */ 
-                           "test   %%eax,%%eax\n" 
-                           "jnz    1f\n" 
- 
-                           /* In the child, now. Terminate frame pointer chain. 
-                            */ 
-                           "movl   $0,%%ebp\n" 
- 
-                           /* Call "fn". "arg" is already on the stack. 
-                            */ 
-                           "call   *%%ebx\n" 
- 
-                           /* Call _exit(%ebx). Unfortunately older versions 
-                            * of gcc restrict the number of arguments that can 
-                            * be passed to asm(). So, we need to hard-code the 
-                            * system call number. 
-                            */ 
-                           "movl   %%eax,%%ebx\n" 
-                           "movl   $1,%%eax\n" 
-                           "int    $0x80\n" 
- 
-                           /* Return to parent. 
-                            */ 
-                         "1:\n" 
-                           : "=a" (__res) 
-                           : "0"(-EINVAL), "i"(__NR_clone), 
-                             "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), 
-                             "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) 
-                           : "esp", "memory", "ecx", "edx", "esi", "edi"); 
-      LSS_RETURN(int, __res); 
-    } 
- 
-    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { 
-      /* On i386, the kernel does not know how to return from a signal 
-       * handler. Instead, it relies on user space to provide a 
-       * restorer function that calls the {rt_,}sigreturn() system call. 
-       * Unfortunately, we cannot just reference the glibc version of this 
-       * function, as glibc goes out of its way to make it inaccessible. 
-       */ 
-      void (*res)(void); 
-      __asm__ __volatile__("call   2f\n" 
-                         "0:.align 16\n" 
-                         "1:movl   %1,%%eax\n" 
-                           "int    $0x80\n" 
-                         "2:popl   %0\n" 
-                           "addl   $(1b-0b),%0\n" 
-                           : "=a" (res) 
-                           : "i"  (__NR_rt_sigreturn)); 
-      return res; 
-    } 
-    LSS_INLINE void (*LSS_NAME(restore)(void))(void) { 
-      /* On i386, the kernel does not know how to return from a signal 
-       * handler. Instead, it relies on user space to provide a 
-       * restorer function that calls the {rt_,}sigreturn() system call. 
-       * Unfortunately, we cannot just reference the glibc version of this 
-       * function, as glibc goes out of its way to make it inaccessible. 
-       */ 
-      void (*res)(void); 
-      __asm__ __volatile__("call   2f\n" 
-                         "0:.align 16\n" 
-                         "1:pop    %%eax\n" 
-                           "movl   %1,%%eax\n" 
-                           "int    $0x80\n" 
-                         "2:popl   %0\n" 
-                           "addl   $(1b-0b),%0\n" 
-                           : "=a" (res) 
-                           : "i"  (__NR_sigreturn)); 
-      return res; 
-    } 
-  #elif defined(__x86_64__) 
-    /* There are no known problems with any of the _syscallX() macros 
-     * currently shipping for x86_64, but we still need to be able to define 
-     * our own version so that we can override the location of the errno 
-     * location (e.g. when using the clone() system call with the CLONE_VM 
-     * option). 
-     */ 
-    #undef  LSS_BODY 
-    #define LSS_BODY(type,name, ...)                                          \ 
-          long __res;                                                         \ 
-          __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name),  \ 
-            ##__VA_ARGS__ : "r11", "rcx", "memory");                          \ 
-          LSS_RETURN(type, __res) 
-    #undef _syscall0 
-    #define _syscall0(type,name)                                              \ 
-      type LSS_NAME(name)() {                                                 \ 
-        LSS_BODY(type, name);                                                 \ 
-      } 
-    #undef _syscall1 
-    #define _syscall1(type,name,type1,arg1)                                   \ 
-      type LSS_NAME(name)(type1 arg1) {                                       \ 
-        LSS_BODY(type, name, "D" ((long)(arg1)));                             \ 
-      } 
-    #undef _syscall2 
-    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \ 
-        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)));         \ 
-      } 
-    #undef _syscall3 
-    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \ 
-        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)),          \ 
-                             "d" ((long)(arg3)));                             \ 
-      } 
-    #undef _syscall4 
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \ 
-          long __res;                                                         \ 
-          __asm__ __volatile__("movq %5,%%r10; syscall" :                     \ 
-            "=a" (__res) : "0" (__NR_##name),                                 \ 
-            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \ 
-            "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory");              \ 
-          LSS_RETURN(type, __res);                                            \ 
-      } 
-    #undef _syscall5 
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5)                                             \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5) {                                       \ 
-          long __res;                                                         \ 
-          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" :       \ 
-            "=a" (__res) : "0" (__NR_##name),                                 \ 
-            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \ 
-            "r" ((long)(arg4)), "r" ((long)(arg5)) :                          \ 
-            "r8", "r10", "r11", "rcx", "memory");                             \ 
-          LSS_RETURN(type, __res);                                            \ 
-      } 
-    #undef _syscall6 
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5,type6,arg6)                                  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5, type6 arg6) {                           \ 
-          long __res;                                                         \ 
-          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;"   \ 
-                               "syscall" :                                    \ 
-            "=a" (__res) : "0" (__NR_##name),                                 \ 
-            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \ 
-            "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) :      \ 
-            "r8", "r9", "r10", "r11", "rcx", "memory");                       \ 
-          LSS_RETURN(type, __res);                                            \ 
-      } 
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, 
-                                   int flags, void *arg, int *parent_tidptr, 
-                                   void *newtls, int *child_tidptr) { 
-      long __res; 
-      { 
-        __asm__ __volatile__(/* if (fn == NULL) 
-                              *   return -EINVAL; 
-                              */ 
-                             "testq  %4,%4\n" 
-                             "jz     1f\n" 
- 
-                             /* if (child_stack == NULL) 
-                              *   return -EINVAL; 
-                              */ 
-                             "testq  %5,%5\n" 
-                             "jz     1f\n" 
- 
-                             /* Set up alignment of the child stack: 
-                              * child_stack = (child_stack & ~0xF) - 16; 
-                              */ 
-                             "andq   $-16,%5\n" 
-                             "subq   $16,%5\n" 
- 
-                             /* Push "arg" and "fn" onto the stack that will be 
-                              * used by the child. 
-                              */ 
-                             "movq   %7,8(%5)\n" 
-                             "movq   %4,0(%5)\n" 
- 
-                             /* %rax = syscall(%rax = __NR_clone, 
-                              *                %rdi = flags, 
-                              *                %rsi = child_stack, 
-                              *                %rdx = parent_tidptr, 
-                              *                %r8  = new_tls, 
-                              *                %r10 = child_tidptr) 
-                              */ 
-                             "movq   %2,%%rax\n" 
-                             "movq   %9,%%r8\n" 
-                             "movq   %10,%%r10\n" 
-                             "syscall\n" 
- 
-                             /* if (%rax != 0) 
-                              *   return; 
-                              */ 
-                             "testq  %%rax,%%rax\n" 
-                             "jnz    1f\n" 
- 
-                             /* In the child. Terminate frame pointer chain. 
-                              */ 
-                             "xorq   %%rbp,%%rbp\n" 
- 
-                             /* Call "fn(arg)". 
-                              */ 
-                             "popq   %%rax\n" 
-                             "popq   %%rdi\n" 
-                             "call   *%%rax\n" 
- 
-                             /* Call _exit(%ebx). 
-                              */ 
-                             "movq   %%rax,%%rdi\n" 
-                             "movq   %3,%%rax\n" 
-                             "syscall\n" 
- 
-                             /* Return to parent. 
-                              */ 
-                           "1:\n" 
-                             : "=a" (__res) 
-                             : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), 
-                               "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), 
-                               "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr) 
-                             : "rsp", "memory", "r8", "r10", "r11", "rcx"); 
-      } 
-      LSS_RETURN(int, __res); 
-    } 
- 
-    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { 
-      /* On x86-64, the kernel does not know how to return from 
-       * a signal handler. Instead, it relies on user space to provide a 
-       * restorer function that calls the rt_sigreturn() system call. 
-       * Unfortunately, we cannot just reference the glibc version of this 
-       * function, as glibc goes out of its way to make it inaccessible. 
-       */ 
-      void (*res)(void); 
-      __asm__ __volatile__("call   2f\n" 
-                         "0:.align 16\n" 
-                         "1:movq   %1,%%rax\n" 
-                           "syscall\n" 
-                         "2:popq   %0\n" 
-                           "addq   $(1b-0b),%0\n" 
-                           : "=a" (res) 
-                           : "i"  (__NR_rt_sigreturn)); 
-      return res; 
-    } 
-  #elif defined(__arm__) 
-    /* Most definitions of _syscallX() neglect to mark "memory" as being 
-     * clobbered. This causes problems with compilers, that do a better job 
-     * at optimizing across __asm__ calls. 
-     * So, we just have to redefine all fo the _syscallX() macros. 
-     */ 
-    #undef LSS_REG 
-    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a 
- 
-    /* r0..r3 are scratch registers and not preserved across function 
-     * calls.  We need to first evaluate the first 4 syscall arguments 
-     * and store them on stack.  They must be loaded into r0..r3 after 
-     * all function calls to avoid r0..r3 being clobbered. 
-     */ 
-    #undef LSS_SAVE_ARG 
-    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a 
-    #undef LSS_LOAD_ARG 
-    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r 
- 
-    #undef  LSS_BODY 
-    #define LSS_BODY(type, name, args...)                                     \ 
+#pragma once
+
+/* Copyright (c) 2005-2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+/* This file includes Linux-specific support functions common to the
+ * coredumper and the thread lister; primarily, this is a collection
+ * of direct system calls, and a couple of symbols missing from
+ * standard header files.
+ * There are a few options that the including file can set to control
+ * the behavior of this file:
+ *
+ * SYS_CPLUSPLUS:
+ *   The entire header file will normally be wrapped in 'extern "C" { }",
+ *   making it suitable for compilation as both C and C++ source. If you
+ *   do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit
+ *   the wrapping. N.B. doing so will suppress inclusion of all prerequisite
+ *   system header files, too. It is the caller's responsibility to provide
+ *   the necessary definitions.
+ *
+ * SYS_ERRNO:
+ *   All system calls will update "errno" unless overriden by setting the
+ *   SYS_ERRNO macro prior to including this file. SYS_ERRNO should be
+ *   an l-value.
+ *
+ * SYS_INLINE:
+ *   New symbols will be defined "static inline", unless overridden by
+ *   the SYS_INLINE macro.
+ *
+ * SYS_LINUX_SYSCALL_SUPPORT_H
+ *   This macro is used to avoid multiple inclusions of this header file.
+ *   If you need to include this file more than once, make sure to
+ *   unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion.
+ *
+ * SYS_PREFIX:
+ *   New system calls will have a prefix of "sys_" unless overridden by
+ *   the SYS_PREFIX macro. Valid values for this macro are [0..9] which
+ *   results in prefixes "sys[0..9]_". It is also possible to set this
+ *   macro to -1, which avoids all prefixes.
+ *
+ * This file defines a few internal symbols that all start with "LSS_".
+ * Do not access these symbols from outside this file. They are not part
+ * of the supported API.
+ *
+ * NOTE: This is a stripped down version of the official opensource
+ * version of linux_syscall_support.h, which lives at
+ *    http://code.google.com/p/linux-syscall-support/
+ * It includes only the syscalls that are used in perftools, plus a
+ * few extra.  Here's the breakdown:
+ * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u
+ *      sys__exit(
+ *      sys_clone(
+ *      sys_close(
+ *      sys_fcntl(
+ *      sys_fstat(
+ *      sys_futex(
+ *      sys_futex1(
+ *      sys_getcpu(
+ *      sys_getdents(
+ *      sys_getppid(
+ *      sys_gettid(
+ *      sys_lseek(
+ *      sys_mmap(
+ *      sys_mremap(
+ *      sys_munmap(
+ *      sys_open(
+ *      sys_pipe(
+ *      sys_prctl(
+ *      sys_ptrace(
+ *      sys_ptrace_detach(
+ *      sys_read(
+ *      sys_sched_yield(
+ *      sys_sigaction(
+ *      sys_sigaltstack(
+ *      sys_sigdelset(
+ *      sys_sigfillset(
+ *      sys_sigprocmask(
+ *      sys_socket(
+ *      sys_stat(
+ *      sys_waitpid(
+ * 2) These are used as subroutines of the above:
+ *      sys_getpid       -- gettid
+ *      sys_kill         -- ptrace_detach
+ *      sys_restore      -- sigaction
+ *      sys_restore_rt   -- sigaction
+ *      sys_socketcall   -- socket
+ *      sys_wait4        -- waitpid
+ * 3) I left these in even though they're not used.  They either
+ * complement the above (write vs read) or are variants (rt_sigaction):
+ *      sys_fstat64
+ *      sys_getdents64
+ *      sys_llseek
+ *      sys_mmap2
+ *      sys_openat
+ *      sys_rt_sigaction
+ *      sys_rt_sigprocmask
+ *      sys_sigaddset
+ *      sys_sigemptyset
+ *      sys_stat64
+ *      sys_write
+ */
+#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
+#define SYS_LINUX_SYSCALL_SUPPORT_H
+
+/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux.
+ * Porting to other related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
+     defined(__mips__) || defined(__PPC__)) && defined(__linux)
+
+#ifndef SYS_CPLUSPLUS
+#ifdef __cplusplus
+/* Some system header files in older versions of gcc neglect to properly
+ * handle being included from C++. As it appears to be harmless to have
+ * multiple nested 'extern "C"' blocks, just add another one here.
+ */
+extern "C" {
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <endian.h>
+
+#ifdef __mips__
+/* Include definitions of the ABI currently in use.                          */
+#include <sgidefs.h>
+#endif
+
+#endif
+
+/* As glibc often provides subtly incompatible data structures (and implicit
+ * wrapper functions that convert them), we provide our own kernel data
+ * structures for use by the system calls.
+ * These structures have been developed by using Linux 2.6.23 headers for
+ * reference. Note though, we do not care about exact API compatibility
+ * with the kernel, and in fact the kernel often does not have a single
+ * API that works across architectures. Instead, we try to mimic the glibc
+ * API where reasonable, and only guarantee ABI compatibility with the
+ * kernel headers.
+ * Most notably, here are a few changes that were made to the structures
+ * defined by kernel headers:
+ *
+ * - we only define structures, but not symbolic names for kernel data
+ *   types. For the latter, we directly use the native C datatype
+ *   (i.e. "unsigned" instead of "mode_t").
+ * - in a few cases, it is possible to define identical structures for
+ *   both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
+ *   standardizing on the 64bit version of the data types. In particular,
+ *   this means that we use "unsigned" where the 32bit headers say
+ *   "unsigned long".
+ * - overall, we try to minimize the number of cases where we need to
+ *   conditionally define different structures.
+ * - the "struct kernel_sigaction" class of structures have been
+ *   modified to more closely mimic glibc's API by introducing an
+ *   anonymous union for the function pointer.
+ * - a small number of field names had to have an underscore appended to
+ *   them, because glibc defines a global macro by the same name.
+ */
+
+/* include/linux/dirent.h                                                    */
+struct kernel_dirent64 {
+  unsigned long long d_ino;
+  long long          d_off;
+  unsigned short     d_reclen;
+  unsigned char      d_type;
+  char               d_name[256];
+};
+
+/* include/linux/dirent.h                                                    */
+struct kernel_dirent {
+  long               d_ino;
+  long               d_off;
+  unsigned short     d_reclen;
+  char               d_name[256];
+};
+
+/* include/linux/time.h                                                      */
+struct kernel_timespec {
+  long               tv_sec;
+  long               tv_nsec;
+};
+
+/* include/linux/time.h                                                      */
+struct kernel_timeval {
+  long               tv_sec;
+  long               tv_usec;
+};
+
+/* include/linux/resource.h                                                  */
+struct kernel_rusage {
+  struct kernel_timeval ru_utime;
+  struct kernel_timeval ru_stime;
+  long               ru_maxrss;
+  long               ru_ixrss;
+  long               ru_idrss;
+  long               ru_isrss;
+  long               ru_minflt;
+  long               ru_majflt;
+  long               ru_nswap;
+  long               ru_inblock;
+  long               ru_oublock;
+  long               ru_msgsnd;
+  long               ru_msgrcv;
+  long               ru_nsignals;
+  long               ru_nvcsw;
+  long               ru_nivcsw;
+};
+
+struct siginfo;
+#if defined(__i386__) || defined(__arm__) || defined(__PPC__)
+
+/* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
+struct kernel_old_sigaction {
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+  };
+  unsigned long      sa_mask;
+  unsigned long      sa_flags;
+  void               (*sa_restorer)(void);
+} __attribute__((packed,aligned(4)));
+#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+  #define kernel_old_sigaction kernel_sigaction
+#endif
+
+/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
+ * exactly match the size of the signal set, even though the API was
+ * intended to be extensible. We define our own KERNEL_NSIG to deal with
+ * this.
+ * Please note that glibc provides signals [1.._NSIG-1], whereas the
+ * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
+ * actual number of signals is obviously the same, but the constants
+ * differ by one.
+ */
+#ifdef __mips__
+#define KERNEL_NSIG 128
+#else
+#define KERNEL_NSIG  64
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */
+struct kernel_sigset_t {
+  unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
+                    (8*sizeof(unsigned long))];
+};
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h                           */
+struct kernel_sigaction {
+#ifdef __mips__
+  unsigned long      sa_flags;
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+  };
+  struct kernel_sigset_t sa_mask;
+#else
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+  };
+  unsigned long      sa_flags;
+  void               (*sa_restorer)(void);
+  struct kernel_sigset_t sa_mask;
+#endif
+};
+
+/* include/asm-{arm,i386,mips,ppc}/stat.h                                    */
+#ifdef __mips__
+#if _MIPS_SIM == _MIPS_SIM_ABI64
+struct kernel_stat {
+#else
+struct kernel_stat64 {
+#endif
+  unsigned           st_dev;
+  unsigned           __pad0[3];
+  unsigned long long st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           st_rdev;
+  unsigned           __pad1[3];
+  long long          st_size;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned           st_blksize;
+  unsigned           __pad2;
+  unsigned long long st_blocks;
+};
+#elif defined __PPC__
+struct kernel_stat64 {
+  unsigned long long st_dev;
+  unsigned long long st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned long long st_rdev;
+  unsigned short int __pad2;
+  long long          st_size;
+  long               st_blksize;
+  long long          st_blocks;
+  long               st_atime_;
+  unsigned long      st_atime_nsec_;
+  long               st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  long               st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+};
+#else
+struct kernel_stat64 {
+  unsigned long long st_dev;
+  unsigned char      __pad0[4];
+  unsigned           __st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned long long st_rdev;
+  unsigned char      __pad3[4];
+  long long          st_size;
+  unsigned           st_blksize;
+  unsigned long long st_blocks;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned long long st_ino;
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h                             */
+#if defined(__i386__) || defined(__arm__)
+struct kernel_stat {
+  /* The kernel headers suggest that st_dev and st_rdev should be 32bit
+   * quantities encoding 12bit major and 20bit minor numbers in an interleaved
+   * format. In reality, we do not see useful data in the top bits. So,
+   * we'll leave the padding in here, until we find a better solution.
+   */
+  unsigned short     st_dev;
+  short              pad1;
+  unsigned           st_ino;
+  unsigned short     st_mode;
+  unsigned short     st_nlink;
+  unsigned short     st_uid;
+  unsigned short     st_gid;
+  unsigned short     st_rdev;
+  short              pad2;
+  unsigned           st_size;
+  unsigned           st_blksize;
+  unsigned           st_blocks;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned           __unused4;
+  unsigned           __unused5;
+};
+#elif defined(__x86_64__)
+struct kernel_stat {
+  unsigned long      st_dev;
+  unsigned long      st_ino;
+  unsigned long      st_nlink;
+  unsigned           st_mode;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           __pad0;
+  unsigned long      st_rdev;
+  long               st_size;
+  long               st_blksize;
+  long               st_blocks;
+  unsigned long      st_atime_;
+  unsigned long      st_atime_nsec_;
+  unsigned long      st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  unsigned long      st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  long               __unused[3];
+};
+#elif defined(__PPC__)
+struct kernel_stat {
+  unsigned           st_dev;
+  unsigned long      st_ino;      // ino_t
+  unsigned long      st_mode;     // mode_t
+  unsigned short     st_nlink;    // nlink_t
+  unsigned           st_uid;      // uid_t
+  unsigned           st_gid;      // gid_t
+  unsigned           st_rdev;
+  long               st_size;     // off_t
+  unsigned long      st_blksize;
+  unsigned long      st_blocks;
+  unsigned long      st_atime_;
+  unsigned long      st_atime_nsec_;
+  unsigned long      st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  unsigned long      st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+};
+#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+struct kernel_stat {
+  unsigned           st_dev;
+  int                st_pad1[3];
+  unsigned           st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           st_rdev;
+  int                st_pad2[2];
+  long               st_size;
+  int                st_pad3;
+  long               st_atime_;
+  long               st_atime_nsec_;
+  long               st_mtime_;
+  long               st_mtime_nsec_;
+  long               st_ctime_;
+  long               st_ctime_nsec_;
+  int                st_blksize;
+  int                st_blocks;
+  int                st_pad4[14];
+};
+#endif
+
+
+/* Definitions missing from the standard header files                        */
+#ifndef O_DIRECTORY
+#if defined(__arm__)
+#define O_DIRECTORY             0040000
+#else
+#define O_DIRECTORY             0200000
+#endif
+#endif
+#ifndef PR_GET_DUMPABLE
+#define PR_GET_DUMPABLE         3
+#endif
+#ifndef PR_SET_DUMPABLE
+#define PR_SET_DUMPABLE         4
+#endif
+#ifndef AT_FDCWD
+#define AT_FDCWD                (-100)
+#endif
+#ifndef AT_SYMLINK_NOFOLLOW
+#define AT_SYMLINK_NOFOLLOW     0x100
+#endif
+#ifndef AT_REMOVEDIR
+#define AT_REMOVEDIR            0x200
+#endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED            2
+#endif
+#ifndef SA_RESTORER
+#define SA_RESTORER             0x04000000
+#endif
+
+#if defined(__i386__)
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       174
+#define __NR_rt_sigprocmask     175
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            197
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         220
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             224
+#endif
+#ifndef __NR_futex
+#define __NR_futex              240
+#endif
+#ifndef __NR_openat
+#define __NR_openat             295
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             318
+#endif
+/* End of i386 definitions                                                   */
+#elif defined(__arm__)
+#ifndef __syscall
+#if defined(__thumb__) || defined(__ARM_EABI__)
+#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
+#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
+#define __syscall(name) "swi\t0"
+#define __syscall_safe(name)                     \
+  "push  {r7}\n"                                 \
+  "mov   r7,%[sysreg]\n"                         \
+  __syscall(name)"\n"                            \
+  "pop   {r7}"
+#else
+#define __SYS_REG(name)
+#define __SYS_REG_LIST(regs...) regs
+#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
+#define __syscall_safe(name) __syscall(name)
+#endif
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             (__NR_SYSCALL_BASE + 195)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_SYSCALL_BASE + 224)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_SYSCALL_BASE + 240)
+#endif
+/* End of ARM definitions                                                  */
+#elif defined(__x86_64__)
+#ifndef __NR_gettid
+#define __NR_gettid             186
+#endif
+#ifndef __NR_futex
+#define __NR_futex              202
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         217
+#endif
+#ifndef __NR_openat
+#define __NR_openat             257
+#endif
+/* End of x86-64 definitions                                                 */
+#elif defined(__mips__)
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       (__NR_Linux + 194)
+#define __NR_rt_sigprocmask     (__NR_Linux + 195)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             (__NR_Linux + 213)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            (__NR_Linux + 215)
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         (__NR_Linux + 219)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 222)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 238)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 288)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 293)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 312)
+#endif
+/* End of MIPS (old 32bit API) definitions */
+#elif  _MIPS_SIM == _MIPS_SIM_ABI64
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 178)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 194)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 247)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 252)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 271)
+#endif
+/* End of MIPS (64bit API) definitions */
+#else
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 178)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 194)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 251)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 256)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 275)
+#endif
+/* End of MIPS (new 32bit API) definitions                                   */
+#endif
+/* End of MIPS definitions                                                   */
+#elif defined(__PPC__)
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       173
+#define __NR_rt_sigprocmask     174
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            197
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         202
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             207
+#endif
+#ifndef __NR_futex
+#define __NR_futex              221
+#endif
+#ifndef __NR_openat
+#define __NR_openat             286
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             302
+#endif
+/* End of powerpc defininitions                                              */
+#endif
+
+
+/* After forking, we must make sure to only call system calls.               */
+#if __BOUNDED_POINTERS__
+  #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+  /* The core dumper and the thread lister get executed after threads
+   * have been suspended. As a consequence, we cannot call any functions
+   * that acquire locks. Unfortunately, libc wraps most system calls
+   * (e.g. in order to implement pthread_atfork, and to make calls
+   * cancellable), which means we cannot call these functions. Instead,
+   * we have to call syscall() directly.
+   */
+  #undef LSS_ERRNO
+  #ifdef SYS_ERRNO
+    /* Allow the including file to override the location of errno. This can
+     * be useful when using clone() with the CLONE_VM option.
+     */
+    #define LSS_ERRNO SYS_ERRNO
+  #else
+    #define LSS_ERRNO errno
+  #endif
+
+  #undef LSS_INLINE
+  #ifdef SYS_INLINE
+    #define LSS_INLINE SYS_INLINE
+  #else
+    #define LSS_INLINE static inline
+  #endif
+
+  /* Allow the including file to override the prefix used for all new
+   * system calls. By default, it will be set to "sys_".
+   */
+  #undef LSS_NAME
+  #ifndef SYS_PREFIX
+    #define LSS_NAME(name) sys_##name
+  #elif SYS_PREFIX < 0
+    #define LSS_NAME(name) name
+  #elif SYS_PREFIX == 0
+    #define LSS_NAME(name) sys0_##name
+  #elif SYS_PREFIX == 1
+    #define LSS_NAME(name) sys1_##name
+  #elif SYS_PREFIX == 2
+    #define LSS_NAME(name) sys2_##name
+  #elif SYS_PREFIX == 3
+    #define LSS_NAME(name) sys3_##name
+  #elif SYS_PREFIX == 4
+    #define LSS_NAME(name) sys4_##name
+  #elif SYS_PREFIX == 5
+    #define LSS_NAME(name) sys5_##name
+  #elif SYS_PREFIX == 6
+    #define LSS_NAME(name) sys6_##name
+  #elif SYS_PREFIX == 7
+    #define LSS_NAME(name) sys7_##name
+  #elif SYS_PREFIX == 8
+    #define LSS_NAME(name) sys8_##name
+  #elif SYS_PREFIX == 9
+    #define LSS_NAME(name) sys9_##name
+  #endif
+
+  #undef  LSS_RETURN
+  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__))
+  /* Failing system calls return a negative result in the range of
+   * -1..-4095. These are "errno" values with the sign inverted.
+   */
+  #define LSS_RETURN(type, res)                                               \
+    do {                                                                      \
+      if ((unsigned long)(res) >= (unsigned long)(-4095)) {                   \
+        LSS_ERRNO = -(res);                                                   \
+        res = -1;                                                             \
+      }                                                                       \
+      return (type) (res);                                                    \
+    } while (0)
+  #elif defined(__mips__)
+  /* On MIPS, failing system calls return -1, and set errno in a
+   * separate CPU register.
+   */
+  #define LSS_RETURN(type, res, err)                                          \
+    do {                                                                      \
+      if (err) {                                                              \
+        LSS_ERRNO = (res);                                                    \
+        res = -1;                                                             \
+      }                                                                       \
+      return (type) (res);                                                    \
+    } while (0)
+  #elif defined(__PPC__)
+  /* On PPC, failing system calls return -1, and set errno in a
+   * separate CPU register. See linux/unistd.h.
+   */
+  #define LSS_RETURN(type, res, err)                                          \
+   do {                                                                       \
+     if (err & 0x10000000 ) {                                                 \
+       LSS_ERRNO = (res);                                                     \
+       res = -1;                                                              \
+     }                                                                        \
+     return (type) (res);                                                     \
+   } while (0)
+  #endif
+  #if defined(__i386__)
+    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
+      /* This only works for GCC-4.4 and above -- the first version to use
+         .cfi directives for dwarf unwind info.  */
+      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \
+                  ".cfi_adjust_cfa_offset " #adjust "\n"
+    #else
+      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
+    #endif
+
+    /* In PIC mode (e.g. when building shared libraries), gcc for i386
+     * reserves ebx. Unfortunately, most distribution ship with implementations
+     * of _syscallX() which clobber ebx.
+     * Also, most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all of the _syscallX() macros.
+     */
+    #undef  LSS_BODY
+    #define LSS_BODY(type,args...)                                            \
+      long __res;                                                             \
+      __asm__ __volatile__("push %%ebx\n"                                     \
+                           CFI_ADJUST_CFA_OFFSET(4)                           \
+                           "movl %2,%%ebx\n"                                  \
+                           "int $0x80\n"                                      \
+                           "pop %%ebx\n"                                      \
+                           CFI_ADJUST_CFA_OFFSET(-4)                          \
+                           args                                               \
+                           : "esp", "memory");                                \
+      LSS_RETURN(type,__res)
+    #undef  _syscall0
+    #define _syscall0(type,name)                                              \
+      type LSS_NAME(name)(void) {                                             \
+        long __res;                                                           \
+        __asm__ volatile("int $0x80"                                          \
+                         : "=a" (__res)                                       \
+                         : "0" (__NR_##name)                                  \
+                         : "memory");                                         \
+        LSS_RETURN(type,__res);                                               \
+      }
+    #undef  _syscall1
+    #define _syscall1(type,name,type1,arg1)                                   \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)));                       \
+      }
+    #undef  _syscall2
+    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
+      type LSS_NAME(name)(type1 arg1,type2 arg2) {                            \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2)));    \
+      }
+    #undef  _syscall3
+    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
+      type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) {                 \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
+               "d" ((long)(arg3)));                                           \
+      }
+    #undef  _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
+               "d" ((long)(arg3)),"S" ((long)(arg4)));                        \
+      }
+    #undef  _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        long __res;                                                           \
+        __asm__ __volatile__("push %%ebx\n"                                   \
+                             "movl %2,%%ebx\n"                                \
+                             "movl %1,%%eax\n"                                \
+                             "int  $0x80\n"                                   \
+                             "pop  %%ebx"                                     \
+                             : "=a" (__res)                                   \
+                             : "i" (__NR_##name), "ri" ((long)(arg1)),        \
+                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
+                             : "esp", "memory");                              \
+        LSS_RETURN(type,__res);                                               \
+      }
+    #undef  _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        long __res;                                                           \
+        struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 };   \
+        __asm__ __volatile__("push %%ebp\n"                                   \
+                             "push %%ebx\n"                                   \
+                             "movl 4(%2),%%ebp\n"                             \
+                             "movl 0(%2), %%ebx\n"                            \
+                             "movl %1,%%eax\n"                                \
+                             "int  $0x80\n"                                   \
+                             "pop  %%ebx\n"                                   \
+                             "pop  %%ebp"                                     \
+                             : "=a" (__res)                                   \
+                             : "i" (__NR_##name),  "0" ((long)(&__s)),        \
+                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
+                             : "esp", "memory");                              \
+        LSS_RETURN(type,__res);                                               \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      __asm__ __volatile__(/* if (fn == NULL)
+                            *   return -EINVAL;
+                            */
+                           "movl   %3,%%ecx\n"
+                           "jecxz  1f\n"
+
+                           /* if (child_stack == NULL)
+                            *   return -EINVAL;
+                            */
+                           "movl   %4,%%ecx\n"
+                           "jecxz  1f\n"
+
+                           /* Set up alignment of the child stack:
+                            * child_stack = (child_stack & ~0xF) - 20;
+                            */
+                           "andl   $-16,%%ecx\n"
+                           "subl   $20,%%ecx\n"
+
+                           /* Push "arg" and "fn" onto the stack that will be
+                            * used by the child.
+                            */
+                           "movl   %6,%%eax\n"
+                           "movl   %%eax,4(%%ecx)\n"
+                           "movl   %3,%%eax\n"
+                           "movl   %%eax,(%%ecx)\n"
+
+                           /* %eax = syscall(%eax = __NR_clone,
+                            *                %ebx = flags,
+                            *                %ecx = child_stack,
+                            *                %edx = parent_tidptr,
+                            *                %esi = newtls,
+                            *                %edi = child_tidptr)
+                            * Also, make sure that %ebx gets preserved as it is
+                            * used in PIC mode.
+                            */
+                           "movl   %8,%%esi\n"
+                           "movl   %7,%%edx\n"
+                           "movl   %5,%%eax\n"
+                           "movl   %9,%%edi\n"
+                           "pushl  %%ebx\n"
+                           "movl   %%eax,%%ebx\n"
+                           "movl   %2,%%eax\n"
+                           "int    $0x80\n"
+
+                           /* In the parent: restore %ebx
+                            * In the child:  move "fn" into %ebx
+                            */
+                           "popl   %%ebx\n"
+
+                           /* if (%eax != 0)
+                            *   return %eax;
+                            */
+                           "test   %%eax,%%eax\n"
+                           "jnz    1f\n"
+
+                           /* In the child, now. Terminate frame pointer chain.
+                            */
+                           "movl   $0,%%ebp\n"
+
+                           /* Call "fn". "arg" is already on the stack.
+                            */
+                           "call   *%%ebx\n"
+
+                           /* Call _exit(%ebx). Unfortunately older versions
+                            * of gcc restrict the number of arguments that can
+                            * be passed to asm(). So, we need to hard-code the
+                            * system call number.
+                            */
+                           "movl   %%eax,%%ebx\n"
+                           "movl   $1,%%eax\n"
+                           "int    $0x80\n"
+
+                           /* Return to parent.
+                            */
+                         "1:\n"
+                           : "=a" (__res)
+                           : "0"(-EINVAL), "i"(__NR_clone),
+                             "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
+                             "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
+                           : "esp", "memory", "ecx", "edx", "esi", "edi");
+      LSS_RETURN(int, __res);
+    }
+
+    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+      /* On i386, the kernel does not know how to return from a signal
+       * handler. Instead, it relies on user space to provide a
+       * restorer function that calls the {rt_,}sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:movl   %1,%%eax\n"
+                           "int    $0x80\n"
+                         "2:popl   %0\n"
+                           "addl   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_rt_sigreturn));
+      return res;
+    }
+    LSS_INLINE void (*LSS_NAME(restore)(void))(void) {
+      /* On i386, the kernel does not know how to return from a signal
+       * handler. Instead, it relies on user space to provide a
+       * restorer function that calls the {rt_,}sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:pop    %%eax\n"
+                           "movl   %1,%%eax\n"
+                           "int    $0x80\n"
+                         "2:popl   %0\n"
+                           "addl   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_sigreturn));
+      return res;
+    }
+  #elif defined(__x86_64__)
+    /* There are no known problems with any of the _syscallX() macros
+     * currently shipping for x86_64, but we still need to be able to define
+     * our own version so that we can override the location of the errno
+     * location (e.g. when using the clone() system call with the CLONE_VM
+     * option).
+     */
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name, ...)                                          \
+          long __res;                                                         \
+          __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name),  \
+            ##__VA_ARGS__ : "r11", "rcx", "memory");                          \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type,name)                                              \
+      type LSS_NAME(name)() {                                                 \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type,name,type1,arg1)                                   \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_BODY(type, name, "D" ((long)(arg1)));                             \
+      }
+    #undef _syscall2
+    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)));         \
+      }
+    #undef _syscall3
+    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)),          \
+                             "d" ((long)(arg3)));                             \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+          long __res;                                                         \
+          __asm__ __volatile__("movq %5,%%r10; syscall" :                     \
+            "=a" (__res) : "0" (__NR_##name),                                 \
+            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
+            "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory");              \
+          LSS_RETURN(type, __res);                                            \
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+          long __res;                                                         \
+          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" :       \
+            "=a" (__res) : "0" (__NR_##name),                                 \
+            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
+            "r" ((long)(arg4)), "r" ((long)(arg5)) :                          \
+            "r8", "r10", "r11", "rcx", "memory");                             \
+          LSS_RETURN(type, __res);                                            \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+          long __res;                                                         \
+          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;"   \
+                               "syscall" :                                    \
+            "=a" (__res) : "0" (__NR_##name),                                 \
+            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
+            "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) :      \
+            "r8", "r9", "r10", "r11", "rcx", "memory");                       \
+          LSS_RETURN(type, __res);                                            \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      {
+        __asm__ __volatile__(/* if (fn == NULL)
+                              *   return -EINVAL;
+                              */
+                             "testq  %4,%4\n"
+                             "jz     1f\n"
+
+                             /* if (child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "testq  %5,%5\n"
+                             "jz     1f\n"
+
+                             /* Set up alignment of the child stack:
+                              * child_stack = (child_stack & ~0xF) - 16;
+                              */
+                             "andq   $-16,%5\n"
+                             "subq   $16,%5\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "movq   %7,8(%5)\n"
+                             "movq   %4,0(%5)\n"
+
+                             /* %rax = syscall(%rax = __NR_clone,
+                              *                %rdi = flags,
+                              *                %rsi = child_stack,
+                              *                %rdx = parent_tidptr,
+                              *                %r8  = new_tls,
+                              *                %r10 = child_tidptr)
+                              */
+                             "movq   %2,%%rax\n"
+                             "movq   %9,%%r8\n"
+                             "movq   %10,%%r10\n"
+                             "syscall\n"
+
+                             /* if (%rax != 0)
+                              *   return;
+                              */
+                             "testq  %%rax,%%rax\n"
+                             "jnz    1f\n"
+
+                             /* In the child. Terminate frame pointer chain.
+                              */
+                             "xorq   %%rbp,%%rbp\n"
+
+                             /* Call "fn(arg)".
+                              */
+                             "popq   %%rax\n"
+                             "popq   %%rdi\n"
+                             "call   *%%rax\n"
+
+                             /* Call _exit(%ebx).
+                              */
+                             "movq   %%rax,%%rdi\n"
+                             "movq   %3,%%rax\n"
+                             "syscall\n"
+
+                             /* Return to parent.
+                              */
+                           "1:\n"
+                             : "=a" (__res)
+                             : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+                               "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
+                               "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
+                             : "rsp", "memory", "r8", "r10", "r11", "rcx");
+      }
+      LSS_RETURN(int, __res);
+    }
+
+    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+      /* On x86-64, the kernel does not know how to return from
+       * a signal handler. Instead, it relies on user space to provide a
+       * restorer function that calls the rt_sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:movq   %1,%%rax\n"
+                           "syscall\n"
+                         "2:popq   %0\n"
+                           "addq   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_rt_sigreturn));
+      return res;
+    }
+  #elif defined(__arm__)
+    /* Most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all fo the _syscallX() macros.
+     */
+    #undef LSS_REG
+    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+
+    /* r0..r3 are scratch registers and not preserved across function
+     * calls.  We need to first evaluate the first 4 syscall arguments
+     * and store them on stack.  They must be loaded into r0..r3 after
+     * all function calls to avoid r0..r3 being clobbered.
+     */
+    #undef LSS_SAVE_ARG
+    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
+    #undef LSS_LOAD_ARG
+    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
+
+    #undef  LSS_BODY
+    #define LSS_BODY(type, name, args...)                                     \
           long __res_r0 __asm__("r0");                               \
-          long __res;                                                         \ 
-          __SYS_REG(name)                                                     \ 
-          __asm__ __volatile__ (__syscall_safe(name)                          \ 
-                                : "=r"(__res_r0)                              \ 
-                                : __SYS_REG_LIST(args)                        \ 
-                                : "lr", "memory");                            \ 
-          __res = __res_r0;                                                   \ 
-          LSS_RETURN(type, __res) 
-    #undef _syscall0 
-    #define _syscall0(type, name)                                             \ 
-      type LSS_NAME(name)() {                                                 \ 
-        LSS_BODY(type, name);                                                 \ 
-      } 
-    #undef _syscall1 
-    #define _syscall1(type, name, type1, arg1)                                \ 
-      type LSS_NAME(name)(type1 arg1) {                                       \ 
-        /* There is no need for using a volatile temp.  */                    \ 
-        LSS_REG(0, arg1);                                                     \ 
-        LSS_BODY(type, name, "r"(__r0));                                      \ 
-      } 
-    #undef _syscall2 
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \ 
-        LSS_SAVE_ARG(0, arg1);                                                \ 
-        LSS_SAVE_ARG(1, arg2);                                                \ 
-        LSS_LOAD_ARG(0);                                                      \ 
-        LSS_LOAD_ARG(1);                                                      \ 
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \ 
-      } 
-    #undef _syscall3 
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \ 
-        LSS_SAVE_ARG(0, arg1);                                                \ 
-        LSS_SAVE_ARG(1, arg2);                                                \ 
-        LSS_SAVE_ARG(2, arg3);                                                \ 
-        LSS_LOAD_ARG(0);                                                      \ 
-        LSS_LOAD_ARG(1);                                                      \ 
-        LSS_LOAD_ARG(2);                                                      \ 
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \ 
-      } 
-    #undef _syscall4 
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \ 
-                      type4, arg4)                                            \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \ 
-        LSS_SAVE_ARG(0, arg1);                                                \ 
-        LSS_SAVE_ARG(1, arg2);                                                \ 
-        LSS_SAVE_ARG(2, arg3);                                                \ 
-        LSS_SAVE_ARG(3, arg4);                                                \ 
-        LSS_LOAD_ARG(0);                                                      \ 
-        LSS_LOAD_ARG(1);                                                      \ 
-        LSS_LOAD_ARG(2);                                                      \ 
-        LSS_LOAD_ARG(3);                                                      \ 
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \ 
-      } 
-    #undef _syscall5 
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \ 
-                      type4, arg4, type5, arg5)                               \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5) {                                       \ 
-        LSS_SAVE_ARG(0, arg1);                                                \ 
-        LSS_SAVE_ARG(1, arg2);                                                \ 
-        LSS_SAVE_ARG(2, arg3);                                                \ 
-        LSS_SAVE_ARG(3, arg4);                                                \ 
-        LSS_REG(4, arg5);                                                     \ 
-        LSS_LOAD_ARG(0);                                                      \ 
-        LSS_LOAD_ARG(1);                                                      \ 
-        LSS_LOAD_ARG(2);                                                      \ 
-        LSS_LOAD_ARG(3);                                                      \ 
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \ 
-                             "r"(__r4));                                      \ 
-      } 
-    #undef _syscall6 
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \ 
-                      type4, arg4, type5, arg5, type6, arg6)                  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5, type6 arg6) {                           \ 
-        LSS_SAVE_ARG(0, arg1);                                                \ 
-        LSS_SAVE_ARG(1, arg2);                                                \ 
-        LSS_SAVE_ARG(2, arg3);                                                \ 
-        LSS_SAVE_ARG(3, arg4);                                                \ 
-        LSS_REG(4, arg5);                                                     \ 
-        LSS_REG(5, arg6);                                                     \ 
-        LSS_LOAD_ARG(0);                                                      \ 
-        LSS_LOAD_ARG(1);                                                      \ 
-        LSS_LOAD_ARG(2);                                                      \ 
-        LSS_LOAD_ARG(3);                                                      \ 
-        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \ 
-                             "r"(__r4), "r"(__r5));                           \ 
-      } 
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, 
-                                   int flags, void *arg, int *parent_tidptr, 
-                                   void *newtls, int *child_tidptr) { 
+          long __res;                                                         \
+          __SYS_REG(name)                                                     \
+          __asm__ __volatile__ (__syscall_safe(name)                          \
+                                : "=r"(__res_r0)                              \
+                                : __SYS_REG_LIST(args)                        \
+                                : "lr", "memory");                            \
+          __res = __res_r0;                                                   \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)() {                                                 \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        /* There is no need for using a volatile temp.  */                    \
+        LSS_REG(0, arg1);                                                     \
+        LSS_BODY(type, name, "r"(__r0));                                      \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
+      }
+    #undef _syscall4
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4)                                            \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
+      }
+    #undef _syscall5
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4, type5, arg5)                               \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_REG(4, arg5);                                                     \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4));                                      \
+      }
+    #undef _syscall6
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4, type5, arg5, type6, arg6)                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_REG(4, arg5);                                                     \
+        LSS_REG(5, arg6);                                                     \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4), "r"(__r5));                           \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
       long __res __asm__("r5");
-      { 
-        if (fn == NULL || child_stack == NULL) { 
-            __res = -EINVAL; 
-            goto clone_exit; 
-        } 
- 
-        /* stash first 4 arguments on stack first because we can only load 
-         * them after all function calls. 
-         */ 
-        int    tmp_flags = flags; 
-        int  * tmp_stack = (int*) child_stack; 
-        void * tmp_ptid  = parent_tidptr; 
-        void * tmp_tls   = newtls; 
- 
+      {
+        if (fn == NULL || child_stack == NULL) {
+            __res = -EINVAL;
+            goto clone_exit;
+        }
+
+        /* stash first 4 arguments on stack first because we can only load
+         * them after all function calls.
+         */
+        int    tmp_flags = flags;
+        int  * tmp_stack = (int*) child_stack;
+        void * tmp_ptid  = parent_tidptr;
+        void * tmp_tls   = newtls;
+
         int  *__ctid  __asm__("r4") = child_tidptr;
- 
-        /* Push "arg" and "fn" onto the stack that will be 
-         * used by the child. 
-         */ 
-        *(--tmp_stack) = (int) arg; 
-        *(--tmp_stack) = (int) fn; 
- 
-        /* We must load r0..r3 last after all possible function calls.  */ 
+
+        /* Push "arg" and "fn" onto the stack that will be
+         * used by the child.
+         */
+        *(--tmp_stack) = (int) arg;
+        *(--tmp_stack) = (int) fn;
+
+        /* We must load r0..r3 last after all possible function calls.  */
         int   __flags __asm__("r0") = tmp_flags;
         void *__stack __asm__("r1") = tmp_stack;
         void *__ptid  __asm__("r2") = tmp_ptid;
         void *__tls   __asm__("r3") = tmp_tls;
- 
-        /* %r0 = syscall(%r0 = flags, 
-         *               %r1 = child_stack, 
-         *               %r2 = parent_tidptr, 
-         *               %r3 = newtls, 
-         *               %r4 = child_tidptr) 
-         */ 
-        __SYS_REG(clone) 
-        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags, 
-                              *               %r1 = child_stack, 
-                              *               %r2 = parent_tidptr, 
-                              *               %r3 = newtls, 
-                              *               %r4 = child_tidptr) 
-                              */ 
-                             "push  {r7}\n" 
-                             "mov   r7,%1\n" 
-                             __syscall(clone)"\n" 
- 
-                             /* if (%r0 != 0) 
-                              *   return %r0; 
-                              */ 
-                             "movs  %0,r0\n" 
-                             "bne   1f\n" 
- 
-                             /* In the child, now. Call "fn(arg)". 
-                              */ 
-                             "ldr   r0,[sp, #4]\n" 
-                             "mov   lr,pc\n" 
-                             "ldr   pc,[sp]\n" 
- 
-                             /* Call _exit(%r0), which never returns.  We only 
-                              * need to set r7 for EABI syscall ABI but we do 
-                              * this always to simplify code sharing between 
-                              * old and new syscall ABIs. 
-                              */ 
-                             "mov   r7,%2\n" 
-                             __syscall(exit)"\n" 
- 
-                             /* Pop r7 from the stack only in the parent. 
-                              */ 
-                           "1: pop {r7}\n" 
-                             : "=r" (__res) 
-                             : "r"(__sysreg), 
-                               "i"(__NR_exit), "r"(__stack), "r"(__flags), 
-                               "r"(__ptid), "r"(__tls), "r"(__ctid) 
-                             : "cc", "lr", "memory"); 
-      } 
-      clone_exit: 
-      LSS_RETURN(int, __res); 
-    } 
-  #elif defined(__mips__) 
-    #undef LSS_REG 
-    #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \ 
-                                 (unsigned long)(a) 
- 
-    #if _MIPS_SIM == _MIPS_SIM_ABI32 
-    // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html 
-    // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html 
-    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\ 
-                                "$13", "$14", "$15", "$24", "$25", "memory" 
-    #else 
-    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13",     \ 
-                                "$14", "$15", "$24", "$25", "memory" 
-    #endif 
- 
-    #undef  LSS_BODY 
-    #define LSS_BODY(type,name,r7,...)                                        \ 
+
+        /* %r0 = syscall(%r0 = flags,
+         *               %r1 = child_stack,
+         *               %r2 = parent_tidptr,
+         *               %r3 = newtls,
+         *               %r4 = child_tidptr)
+         */
+        __SYS_REG(clone)
+        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
+                              *               %r1 = child_stack,
+                              *               %r2 = parent_tidptr,
+                              *               %r3 = newtls,
+                              *               %r4 = child_tidptr)
+                              */
+                             "push  {r7}\n"
+                             "mov   r7,%1\n"
+                             __syscall(clone)"\n"
+
+                             /* if (%r0 != 0)
+                              *   return %r0;
+                              */
+                             "movs  %0,r0\n"
+                             "bne   1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+                             "ldr   r0,[sp, #4]\n"
+                             "mov   lr,pc\n"
+                             "ldr   pc,[sp]\n"
+
+                             /* Call _exit(%r0), which never returns.  We only
+                              * need to set r7 for EABI syscall ABI but we do
+                              * this always to simplify code sharing between
+                              * old and new syscall ABIs.
+                              */
+                             "mov   r7,%2\n"
+                             __syscall(exit)"\n"
+
+                             /* Pop r7 from the stack only in the parent.
+                              */
+                           "1: pop {r7}\n"
+                             : "=r" (__res)
+                             : "r"(__sysreg),
+                               "i"(__NR_exit), "r"(__stack), "r"(__flags),
+                               "r"(__ptid), "r"(__tls), "r"(__ctid)
+                             : "cc", "lr", "memory");
+      }
+      clone_exit:
+      LSS_RETURN(int, __res);
+    }
+  #elif defined(__mips__)
+    #undef LSS_REG
+    #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
+                                 (unsigned long)(a)
+
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html
+    // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html
+    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\
+                                "$13", "$14", "$15", "$24", "$25", "memory"
+    #else
+    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13",     \
+                                "$14", "$15", "$24", "$25", "memory"
+    #endif
+
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,r7,...)                                        \
           unsigned long __v0 __asm__("$2") = __NR_##name;            \
-          __asm__ __volatile__ ("syscall\n"                                   \ 
-                                : "=&r"(__v0), r7 (__r7)                      \ 
-                                : "0"(__v0), ##__VA_ARGS__                    \ 
-                                : MIPS_SYSCALL_CLOBBERS);                     \ 
-          LSS_RETURN(type, __v0, __r7) 
-    #undef _syscall0 
-    #define _syscall0(type, name)                                             \ 
-      type LSS_NAME(name)() {                                                 \ 
+          __asm__ __volatile__ ("syscall\n"                                   \
+                                : "=&r"(__v0), r7 (__r7)                      \
+                                : "0"(__v0), ##__VA_ARGS__                    \
+                                : MIPS_SYSCALL_CLOBBERS);                     \
+          LSS_RETURN(type, __v0, __r7)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)() {                                                 \
         unsigned long __r7 __asm__("$7");                            \
-        LSS_BODY(type, name, "=r");                                           \ 
-      } 
-    #undef _syscall1 
-    #define _syscall1(type, name, type1, arg1)                                \ 
-      type LSS_NAME(name)(type1 arg1) {                                       \ 
+        LSS_BODY(type, name, "=r");                                           \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
         unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4));              \ 
-      } 
-    #undef _syscall2 
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \ 
+        LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4));              \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
         unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2);                                   \ 
-        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5));                     \ 
-      } 
-    #undef _syscall3 
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \ 
+        LSS_REG(4, arg1); LSS_REG(5, arg2);                                   \
+        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5));                     \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
         unsigned long __r7 __asm__("$7");                            \
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \ 
-        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6));          \ 
-      } 
-    #undef _syscall4 
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \ 
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \ 
-        LSS_REG(7, arg4);                                                     \ 
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6));          \ 
-      } 
-    #undef _syscall5 
-    #if _MIPS_SIM == _MIPS_SIM_ABI32 
-    /* The old 32bit MIPS system call API passes the fifth and sixth argument 
-     * on the stack, whereas the new APIs use registers "r8" and "r9". 
-     */ 
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5)                                             \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5) {                                       \ 
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \ 
-        LSS_REG(7, arg4);                                                     \ 
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6));          \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6));          \
+      }
+    #undef _syscall5
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    /* The old 32bit MIPS system call API passes the fifth and sixth argument
+     * on the stack, whereas the new APIs use registers "r8" and "r9".
+     */
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
         unsigned long __v0 __asm__("$2");                            \
-        __asm__ __volatile__ (".set noreorder\n"                              \ 
-                              "lw    $2, %6\n"                                \ 
-                              "subu  $29, 32\n"                               \ 
-                              "sw    $2, 16($29)\n"                           \ 
-                              "li    $2, %2\n"                                \ 
-                              "syscall\n"                                     \ 
-                              "addiu $29, 32\n"                               \ 
-                              ".set reorder\n"                                \ 
-                              : "=&r"(__v0), "+r" (__r7)                      \ 
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \ 
-                                "r"(__r6), "m" ((unsigned long)arg5)          \ 
-                              : MIPS_SYSCALL_CLOBBERS);                       \ 
-        LSS_RETURN(type, __v0, __r7);                                         \ 
-      } 
-    #else 
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5)                                             \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5) {                                       \ 
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \ 
-        LSS_REG(7, arg4); LSS_REG(8, arg5);                                   \ 
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \ 
-                 "r"(__r8));                                                  \ 
-      } 
-    #endif 
-    #undef _syscall6 
-    #if _MIPS_SIM == _MIPS_SIM_ABI32 
-    /* The old 32bit MIPS system call API passes the fifth and sixth argument 
-     * on the stack, whereas the new APIs use registers "r8" and "r9". 
-     */ 
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5,type6,arg6)                                  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5, type6 arg6) {                           \ 
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \ 
-        LSS_REG(7, arg4);                                                     \ 
+        __asm__ __volatile__ (".set noreorder\n"                              \
+                              "lw    $2, %6\n"                                \
+                              "subu  $29, 32\n"                               \
+                              "sw    $2, 16($29)\n"                           \
+                              "li    $2, %2\n"                                \
+                              "syscall\n"                                     \
+                              "addiu $29, 32\n"                               \
+                              ".set reorder\n"                                \
+                              : "=&r"(__v0), "+r" (__r7)                      \
+                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                                "r"(__r6), "m" ((unsigned long)arg5)          \
+                              : MIPS_SYSCALL_CLOBBERS);                       \
+        LSS_RETURN(type, __v0, __r7);                                         \
+      }
+    #else
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4); LSS_REG(8, arg5);                                   \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
+                 "r"(__r8));                                                  \
+      }
+    #endif
+    #undef _syscall6
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    /* The old 32bit MIPS system call API passes the fifth and sixth argument
+     * on the stack, whereas the new APIs use registers "r8" and "r9".
+     */
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
         unsigned long __v0 __asm__("$2");                            \
-        __asm__ __volatile__ (".set noreorder\n"                              \ 
-                              "lw    $2, %6\n"                                \ 
-                              "lw    $8, %7\n"                                \ 
-                              "subu  $29, 32\n"                               \ 
-                              "sw    $2, 16($29)\n"                           \ 
-                              "sw    $8, 20($29)\n"                           \ 
-                              "li    $2, %2\n"                                \ 
-                              "syscall\n"                                     \ 
-                              "addiu $29, 32\n"                               \ 
-                              ".set reorder\n"                                \ 
-                              : "=&r"(__v0), "+r" (__r7)                      \ 
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \ 
-                                "r"(__r6), "r" ((unsigned long)arg5),         \ 
-                                "r" ((unsigned long)arg6)                     \ 
-                              : MIPS_SYSCALL_CLOBBERS);                       \ 
-        LSS_RETURN(type, __v0, __r7);                                         \ 
-      } 
-    #else 
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \ 
-                      type5,arg5,type6,arg6)                                  \ 
-      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \ 
-                          type5 arg5,type6 arg6) {                            \ 
-        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \ 
-        LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6);                 \ 
-        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \ 
-                 "r"(__r8), "r"(__r9));                                       \ 
-      } 
-    #endif 
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, 
-                                   int flags, void *arg, int *parent_tidptr, 
-                                   void *newtls, int *child_tidptr) { 
+        __asm__ __volatile__ (".set noreorder\n"                              \
+                              "lw    $2, %6\n"                                \
+                              "lw    $8, %7\n"                                \
+                              "subu  $29, 32\n"                               \
+                              "sw    $2, 16($29)\n"                           \
+                              "sw    $8, 20($29)\n"                           \
+                              "li    $2, %2\n"                                \
+                              "syscall\n"                                     \
+                              "addiu $29, 32\n"                               \
+                              ".set reorder\n"                                \
+                              : "=&r"(__v0), "+r" (__r7)                      \
+                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                                "r"(__r6), "r" ((unsigned long)arg5),         \
+                                "r" ((unsigned long)arg6)                     \
+                              : MIPS_SYSCALL_CLOBBERS);                       \
+        LSS_RETURN(type, __v0, __r7);                                         \
+      }
+    #else
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5,type6 arg6) {                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6);                 \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
+                 "r"(__r8), "r"(__r9));                                       \
+      }
+    #endif
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
       unsigned long __v0 __asm__("$2");
       unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
-      { 
+      {
         int   __flags __asm__("$4") = flags;
         void *__stack __asm__("$5") = child_stack;
         void *__ptid  __asm__("$6") = parent_tidptr;
         int  *__ctid  __asm__("$8") = child_tidptr;
-        __asm__ __volatile__( 
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 
-                             "subu  $29,24\n" 
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32 
-                             "sub   $29,16\n" 
-          #else 
-                             "dsubu $29,16\n" 
-          #endif 
- 
-                             /* if (fn == NULL || child_stack == NULL) 
-                              *   return -EINVAL; 
-                              */ 
-                             "li    %0,%2\n" 
-                             "beqz  %5,1f\n" 
-                             "beqz  %6,1f\n" 
- 
-                             /* Push "arg" and "fn" onto the stack that will be 
-                              * used by the child. 
-                              */ 
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 
-                             "subu  %6,32\n" 
-                             "sw    %5,0(%6)\n" 
-                             "sw    %8,4(%6)\n" 
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32 
-                             "sub   %6,32\n" 
-                             "sw    %5,0(%6)\n" 
-                             "sw    %8,8(%6)\n" 
-          #else 
-                             "dsubu %6,32\n" 
-                             "sd    %5,0(%6)\n" 
-                             "sd    %8,8(%6)\n" 
-          #endif 
- 
-                             /* $7 = syscall($4 = flags, 
-                              *              $5 = child_stack, 
-                              *              $6 = parent_tidptr, 
-                              *              $7 = newtls, 
-                              *              $8 = child_tidptr) 
-                              */ 
-                             "li    $2,%3\n" 
-                             "syscall\n" 
- 
-                             /* if ($7 != 0) 
-                              *   return $2; 
-                              */ 
-                             "bnez  $7,1f\n" 
-                             "bnez  $2,1f\n" 
- 
-                             /* In the child, now. Call "fn(arg)". 
-                              */ 
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 
-                            "lw    $25,0($29)\n" 
-                            "lw    $4,4($29)\n" 
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32 
-                            "lw    $25,0($29)\n" 
-                            "lw    $4,8($29)\n" 
-          #else 
-                            "ld    $25,0($29)\n" 
-                            "ld    $4,8($29)\n" 
-          #endif 
-                            "jalr  $25\n" 
- 
-                             /* Call _exit($2) 
-                              */ 
-                            "move  $4,$2\n" 
-                            "li    $2,%4\n" 
-                            "syscall\n" 
- 
-                           "1:\n" 
-          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 
-                             "addu  $29, 24\n" 
-          #elif _MIPS_SIM == _MIPS_SIM_NABI32 
-                             "add   $29, 16\n" 
-          #else 
-                             "daddu $29,16\n" 
-          #endif 
-                             : "=&r" (__v0), "=r" (__r7) 
-                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), 
-                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), 
-                               "r"(__ptid), "r"(__r7), "r"(__ctid) 
-                             : "$9", "$10", "$11", "$12", "$13", "$14", "$15", 
-                               "$24", "memory"); 
-      } 
-      LSS_RETURN(int, __v0, __r7); 
-    } 
-  #elif defined (__PPC__) 
-    #undef  LSS_LOADARGS_0 
-    #define LSS_LOADARGS_0(name, dummy...)                                    \ 
-        __sc_0 = __NR_##name 
-    #undef  LSS_LOADARGS_1 
-    #define LSS_LOADARGS_1(name, arg1)                                        \ 
-            LSS_LOADARGS_0(name);                                             \ 
-            __sc_3 = (unsigned long) (arg1) 
-    #undef  LSS_LOADARGS_2 
-    #define LSS_LOADARGS_2(name, arg1, arg2)                                  \ 
-            LSS_LOADARGS_1(name, arg1);                                       \ 
-            __sc_4 = (unsigned long) (arg2) 
-    #undef  LSS_LOADARGS_3 
-    #define LSS_LOADARGS_3(name, arg1, arg2, arg3)                            \ 
-            LSS_LOADARGS_2(name, arg1, arg2);                                 \ 
-            __sc_5 = (unsigned long) (arg3) 
-    #undef  LSS_LOADARGS_4 
-    #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4)                      \ 
-            LSS_LOADARGS_3(name, arg1, arg2, arg3);                           \ 
-            __sc_6 = (unsigned long) (arg4) 
-    #undef  LSS_LOADARGS_5 
-    #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5)                \ 
-            LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4);                     \ 
-            __sc_7 = (unsigned long) (arg5) 
-    #undef  LSS_LOADARGS_6 
-    #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6)          \ 
-            LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5);               \ 
-            __sc_8 = (unsigned long) (arg6) 
-    #undef  LSS_ASMINPUT_0 
-    #define LSS_ASMINPUT_0 "0" (__sc_0) 
-    #undef  LSS_ASMINPUT_1 
-    #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) 
-    #undef  LSS_ASMINPUT_2 
-    #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) 
-    #undef  LSS_ASMINPUT_3 
-    #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) 
-    #undef  LSS_ASMINPUT_4 
-    #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) 
-    #undef  LSS_ASMINPUT_5 
-    #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) 
-    #undef  LSS_ASMINPUT_6 
-    #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) 
-    #undef  LSS_BODY 
-    #define LSS_BODY(nr, type, name, args...)                                 \ 
-        long __sc_ret, __sc_err;                                              \ 
-        {                                                                     \ 
+        __asm__ __volatile__(
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "subu  $29,24\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "sub   $29,16\n"
+          #else
+                             "dsubu $29,16\n"
+          #endif
+
+                             /* if (fn == NULL || child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "li    %0,%2\n"
+                             "beqz  %5,1f\n"
+                             "beqz  %6,1f\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "subu  %6,32\n"
+                             "sw    %5,0(%6)\n"
+                             "sw    %8,4(%6)\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "sub   %6,32\n"
+                             "sw    %5,0(%6)\n"
+                             "sw    %8,8(%6)\n"
+          #else
+                             "dsubu %6,32\n"
+                             "sd    %5,0(%6)\n"
+                             "sd    %8,8(%6)\n"
+          #endif
+
+                             /* $7 = syscall($4 = flags,
+                              *              $5 = child_stack,
+                              *              $6 = parent_tidptr,
+                              *              $7 = newtls,
+                              *              $8 = child_tidptr)
+                              */
+                             "li    $2,%3\n"
+                             "syscall\n"
+
+                             /* if ($7 != 0)
+                              *   return $2;
+                              */
+                             "bnez  $7,1f\n"
+                             "bnez  $2,1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                            "lw    $25,0($29)\n"
+                            "lw    $4,4($29)\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                            "lw    $25,0($29)\n"
+                            "lw    $4,8($29)\n"
+          #else
+                            "ld    $25,0($29)\n"
+                            "ld    $4,8($29)\n"
+          #endif
+                            "jalr  $25\n"
+
+                             /* Call _exit($2)
+                              */
+                            "move  $4,$2\n"
+                            "li    $2,%4\n"
+                            "syscall\n"
+
+                           "1:\n"
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "addu  $29, 24\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "add   $29, 16\n"
+          #else
+                             "daddu $29,16\n"
+          #endif
+                             : "=&r" (__v0), "=r" (__r7)
+                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__r7), "r"(__ctid)
+                             : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
+                               "$24", "memory");
+      }
+      LSS_RETURN(int, __v0, __r7);
+    }
+  #elif defined (__PPC__)
+    #undef  LSS_LOADARGS_0
+    #define LSS_LOADARGS_0(name, dummy...)                                    \
+        __sc_0 = __NR_##name
+    #undef  LSS_LOADARGS_1
+    #define LSS_LOADARGS_1(name, arg1)                                        \
+            LSS_LOADARGS_0(name);                                             \
+            __sc_3 = (unsigned long) (arg1)
+    #undef  LSS_LOADARGS_2
+    #define LSS_LOADARGS_2(name, arg1, arg2)                                  \
+            LSS_LOADARGS_1(name, arg1);                                       \
+            __sc_4 = (unsigned long) (arg2)
+    #undef  LSS_LOADARGS_3
+    #define LSS_LOADARGS_3(name, arg1, arg2, arg3)                            \
+            LSS_LOADARGS_2(name, arg1, arg2);                                 \
+            __sc_5 = (unsigned long) (arg3)
+    #undef  LSS_LOADARGS_4
+    #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4)                      \
+            LSS_LOADARGS_3(name, arg1, arg2, arg3);                           \
+            __sc_6 = (unsigned long) (arg4)
+    #undef  LSS_LOADARGS_5
+    #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5)                \
+            LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4);                     \
+            __sc_7 = (unsigned long) (arg5)
+    #undef  LSS_LOADARGS_6
+    #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6)          \
+            LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5);               \
+            __sc_8 = (unsigned long) (arg6)
+    #undef  LSS_ASMINPUT_0
+    #define LSS_ASMINPUT_0 "0" (__sc_0)
+    #undef  LSS_ASMINPUT_1
+    #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
+    #undef  LSS_ASMINPUT_2
+    #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
+    #undef  LSS_ASMINPUT_3
+    #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
+    #undef  LSS_ASMINPUT_4
+    #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
+    #undef  LSS_ASMINPUT_5
+    #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
+    #undef  LSS_ASMINPUT_6
+    #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
+    #undef  LSS_BODY
+    #define LSS_BODY(nr, type, name, args...)                                 \
+        long __sc_ret, __sc_err;                                              \
+        {                                                                     \
                         unsigned long __sc_0 __asm__ ("r0");         \
                         unsigned long __sc_3 __asm__ ("r3");         \
                         unsigned long __sc_4 __asm__ ("r4");         \
@@ -1646,71 +1646,71 @@ struct kernel_stat {
                         unsigned long __sc_6 __asm__ ("r6");         \
                         unsigned long __sc_7 __asm__ ("r7");         \
                         unsigned long __sc_8 __asm__ ("r8");         \
-                                                                              \ 
-            LSS_LOADARGS_##nr(name, args);                                    \ 
-            __asm__ __volatile__                                              \ 
-                ("sc\n\t"                                                     \ 
-                 "mfcr %0"                                                    \ 
-                 : "=&r" (__sc_0),                                            \ 
-                   "=&r" (__sc_3), "=&r" (__sc_4),                            \ 
-                   "=&r" (__sc_5), "=&r" (__sc_6),                            \ 
-                   "=&r" (__sc_7), "=&r" (__sc_8)                             \ 
-                 : LSS_ASMINPUT_##nr                                          \ 
-                 : "cr0", "ctr", "memory",                                    \ 
-                   "r9", "r10", "r11", "r12");                                \ 
-            __sc_ret = __sc_3;                                                \ 
-            __sc_err = __sc_0;                                                \ 
-        }                                                                     \ 
-        LSS_RETURN(type, __sc_ret, __sc_err) 
-    #undef _syscall0 
-    #define _syscall0(type, name)                                             \ 
-       type LSS_NAME(name)(void) {                                            \ 
-          LSS_BODY(0, type, name);                                            \ 
-       } 
-    #undef _syscall1 
-    #define _syscall1(type, name, type1, arg1)                                \ 
-       type LSS_NAME(name)(type1 arg1) {                                      \ 
-          LSS_BODY(1, type, name, arg1);                                      \ 
-       } 
-    #undef _syscall2 
-    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \ 
-       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \ 
-          LSS_BODY(2, type, name, arg1, arg2);                                \ 
-       } 
-    #undef _syscall3 
-    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \ 
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \ 
-          LSS_BODY(3, type, name, arg1, arg2, arg3);                          \ 
-       } 
-    #undef _syscall4 
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \ 
-                                  type4, arg4)                                \ 
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \ 
-          LSS_BODY(4, type, name, arg1, arg2, arg3, arg4);                    \ 
-       } 
-    #undef _syscall5 
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \ 
-                                  type4, arg4, type5, arg5)                   \ 
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \ 
-                                               type5 arg5) {                  \ 
-          LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5);              \ 
-       } 
-    #undef _syscall6 
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \ 
-                                  type4, arg4, type5, arg5, type6, arg6)      \ 
-       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \ 
-                                               type5 arg5, type6 arg6) {      \ 
-          LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \ 
-       } 
-    /* clone function adapted from glibc 2.3.6 clone.S                       */ 
-    /* TODO(csilvers): consider wrapping some args up in a struct, like we 
-     * do for i386's _syscall6, so we can compile successfully on gcc 2.95 
-     */ 
-    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, 
-                                   int flags, void *arg, int *parent_tidptr, 
-                                   void *newtls, int *child_tidptr) { 
-      long __ret, __err; 
-      { 
+                                                                              \
+            LSS_LOADARGS_##nr(name, args);                                    \
+            __asm__ __volatile__                                              \
+                ("sc\n\t"                                                     \
+                 "mfcr %0"                                                    \
+                 : "=&r" (__sc_0),                                            \
+                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
+                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
+                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
+                 : LSS_ASMINPUT_##nr                                          \
+                 : "cr0", "ctr", "memory",                                    \
+                   "r9", "r10", "r11", "r12");                                \
+            __sc_ret = __sc_3;                                                \
+            __sc_err = __sc_0;                                                \
+        }                                                                     \
+        LSS_RETURN(type, __sc_ret, __sc_err)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+       type LSS_NAME(name)(void) {                                            \
+          LSS_BODY(0, type, name);                                            \
+       }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+       type LSS_NAME(name)(type1 arg1) {                                      \
+          LSS_BODY(1, type, name, arg1);                                      \
+       }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
+          LSS_BODY(2, type, name, arg1, arg2);                                \
+       }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
+          LSS_BODY(3, type, name, arg1, arg2, arg3);                          \
+       }
+    #undef _syscall4
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4)                                \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
+          LSS_BODY(4, type, name, arg1, arg2, arg3, arg4);                    \
+       }
+    #undef _syscall5
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
+                                               type5 arg5) {                  \
+          LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5);              \
+       }
+    #undef _syscall6
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5, type6, arg6)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
+                                               type5 arg5, type6 arg6) {      \
+          LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \
+       }
+    /* clone function adapted from glibc 2.3.6 clone.S                       */
+    /* TODO(csilvers): consider wrapping some args up in a struct, like we
+     * do for i386's _syscall6, so we can compile successfully on gcc 2.95
+     */
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __ret, __err;
+      {
         int (*__fn)(void *)    __asm__ ("r8")  = fn;
         void *__cstack                 __asm__ ("r4")  = child_stack;
         int __flags                    __asm__ ("r3")  = flags;
@@ -1718,342 +1718,342 @@ struct kernel_stat {
         int * __ptidptr                __asm__ ("r5")  = parent_tidptr;
         void * __newtls                __asm__ ("r6")  = newtls;
         int * __ctidptr                __asm__ ("r7")  = child_tidptr;
-        __asm__ __volatile__( 
-            /* check for fn == NULL 
-             * and child_stack == NULL 
-             */ 
-            "cmpwi cr0, %6, 0\n\t" 
-            "cmpwi cr1, %7, 0\n\t" 
-            "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" 
-            "beq- cr0, 1f\n\t" 
- 
-            /* set up stack frame for child                                  */ 
-            "clrrwi %7, %7, 4\n\t" 
-            "li 0, 0\n\t" 
-            "stwu 0, -16(%7)\n\t" 
- 
-            /* fn, arg, child_stack are saved across the syscall: r28-30     */ 
-            "mr 28, %6\n\t" 
-            "mr 29, %7\n\t" 
-            "mr 27, %9\n\t" 
- 
-            /* syscall                                                       */ 
-            "li 0, %4\n\t" 
-            /* flags already in r3 
-             * child_stack already in r4 
-             * ptidptr already in r5 
-             * newtls already in r6 
-             * ctidptr already in r7 
-             */ 
-            "sc\n\t" 
- 
-            /* Test if syscall was successful                                */ 
-            "cmpwi cr1, 3, 0\n\t" 
-            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" 
-            "bne- cr1, 1f\n\t" 
- 
-            /* Do the function call                                          */ 
-            "mtctr 28\n\t" 
-            "mr 3, 27\n\t" 
-            "bctrl\n\t" 
- 
-            /* Call _exit(r3)                                                */ 
-            "li 0, %5\n\t" 
-            "sc\n\t" 
- 
-            /* Return to parent                                              */ 
-            "1:\n" 
-            "mfcr %1\n\t" 
-            "mr %0, 3\n\t" 
-              : "=r" (__ret), "=r" (__err) 
-              : "0" (-1), "1" (EINVAL), 
-                "i" (__NR_clone), "i" (__NR_exit), 
-                "r" (__fn), "r" (__cstack), "r" (__flags), 
-                "r" (__arg), "r" (__ptidptr), "r" (__newtls), 
-                "r" (__ctidptr) 
-              : "cr0", "cr1", "memory", "ctr", 
-                "r0", "r29", "r27", "r28"); 
-      } 
-      LSS_RETURN(int, __ret, __err); 
-    } 
-  #endif 
-  #define __NR__exit   __NR_exit 
-  #define __NR__gettid __NR_gettid 
-  #define __NR__mremap __NR_mremap 
-  LSS_INLINE _syscall1(int,     close,           int,         f) 
-  LSS_INLINE _syscall1(int,     _exit,           int,         e) 
-  LSS_INLINE _syscall3(int,     fcntl,           int,         f, 
-                       int,            c, long,   a) 
-  LSS_INLINE _syscall2(int,     fstat,           int,         f, 
-                      struct kernel_stat*,   b) 
-  LSS_INLINE _syscall4(int,     futex,           int*,        a, 
-                       int,            o, int,    v, 
-                      struct kernel_timespec*, t) 
-  LSS_INLINE _syscall3(int,     getdents,        int,         f, 
-                      struct kernel_dirent*, d, int,    c) 
-#ifdef __NR_getdents64 
-  LSS_INLINE _syscall3(int,     getdents64,      int,         f, 
-                      struct kernel_dirent64*, d, int,    c) 
-#endif 
-  LSS_INLINE _syscall0(pid_t,   getpid) 
-  LSS_INLINE _syscall0(pid_t,   getppid) 
-  LSS_INLINE _syscall0(pid_t,   _gettid) 
-  LSS_INLINE _syscall2(int,     kill,            pid_t,       p, 
-                       int,            s) 
-  LSS_INLINE _syscall3(off_t,   lseek,           int,         f, 
-                       off_t,          o, int,    w) 
-  LSS_INLINE _syscall2(int,     munmap,          void*,       s, 
-                       size_t,         l) 
-  LSS_INLINE _syscall5(void*,   _mremap,         void*,       o, 
-                       size_t,         os,       size_t,      ns, 
-                       unsigned long,  f, void *, a) 
-  LSS_INLINE _syscall3(int,     open,            const char*, p, 
-                       int,            f, int,    m) 
-  LSS_INLINE _syscall2(int,     prctl,           int,         o, 
-                       long,           a) 
-  LSS_INLINE _syscall4(long,    ptrace,          int,         r, 
-                       pid_t,          p, void *, a, void *, d) 
-  LSS_INLINE _syscall3(ssize_t, read,            int,         f, 
-                       void *,         b, size_t, c) 
-  LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s, 
-                       const struct kernel_sigaction*, a, 
-                       struct kernel_sigaction*, o, size_t,   c) 
-  LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h, 
-                       const struct kernel_sigset_t*,  s, 
-                       struct kernel_sigset_t*,        o, size_t, c); 
-  LSS_INLINE _syscall0(int,     sched_yield) 
-  LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s, 
-                       const stack_t*, o) 
-  LSS_INLINE _syscall2(int,     stat,            const char*, f, 
-                      struct kernel_stat*,   b) 
-  LSS_INLINE _syscall3(ssize_t, write,            int,        f, 
-                       const void *,   b, size_t, c) 
-  #if defined(__NR_getcpu) 
-    LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, 
-                         unsigned *, node, void *, unused); 
-  #endif 
-  #if defined(__x86_64__) ||                                                  \ 
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) 
-    LSS_INLINE _syscall3(int, socket,             int,   d, 
-                         int,                     t, int,       p) 
-  #endif 
-  #if defined(__x86_64__) 
-    LSS_INLINE _syscall6(void*, mmap,              void*, s, 
-                         size_t,                   l, int,               p, 
-                         int,                      f, int,               d, 
-                         off64_t,                o) 
- 
-    LSS_INLINE int LSS_NAME(sigaction)(int signum, 
-                                       const struct kernel_sigaction *act, 
-                                       struct kernel_sigaction *oldact) { 
-      /* On x86_64, the kernel requires us to always set our own 
-       * SA_RESTORER in order to be able to return from a signal handler. 
-       * This function must have a "magic" signature that the "gdb" 
-       * (and maybe the kernel?) can recognize. 
-       */ 
-      if (act != NULL && !(act->sa_flags & SA_RESTORER)) { 
-        struct kernel_sigaction a = *act; 
-        a.sa_flags   |= SA_RESTORER; 
-        a.sa_restorer = LSS_NAME(restore_rt)(); 
-        return LSS_NAME(rt_sigaction)(signum, &a, oldact, 
-                                      (KERNEL_NSIG+7)/8); 
-      } else { 
-        return LSS_NAME(rt_sigaction)(signum, act, oldact, 
-                                      (KERNEL_NSIG+7)/8); 
-      } 
-    } 
- 
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how, 
-                                         const struct kernel_sigset_t *set, 
-                                         struct kernel_sigset_t *oldset) { 
-      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); 
-    } 
-  #endif 
-  #if defined(__x86_64__) || \ 
-      defined(__arm__) || \ 
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) 
-    LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p, 
-                         int*,                    s, int,       o, 
-                         struct kernel_rusage*,   r) 
-    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ 
-      return LSS_NAME(wait4)(pid, status, options, 0); 
-    } 
-   #endif 
-  #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
-    LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) 
-  #endif 
-  LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { 
-    memset(&set->sig, 0, sizeof(set->sig)); 
-    return 0; 
-  } 
- 
-  LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { 
-    memset(&set->sig, -1, sizeof(set->sig)); 
-    return 0; 
-  } 
- 
-  LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, 
-                                     int signum) { 
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { 
-      LSS_ERRNO = EINVAL; 
-      return -1; 
-    } else { 
-      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] 
-          |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); 
-      return 0; 
-    } 
-  } 
- 
-  LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, 
-                                        int signum) { 
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { 
-      LSS_ERRNO = EINVAL; 
-      return -1; 
-    } else { 
-      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] 
-          &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); 
-      return 0; 
-    } 
-  } 
- 
-  #if defined(__i386__) || \ 
-      defined(__arm__) || \ 
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) 
-    #define __NR__sigaction   __NR_sigaction 
-    #define __NR__sigprocmask __NR_sigprocmask 
-    LSS_INLINE _syscall2(int, fstat64,             int, f, 
-                         struct kernel_stat64 *, b) 
-    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo, 
-                         loff_t *, res, uint, wh) 
-#ifdef __PPC64__ 
-    LSS_INLINE _syscall6(void*, mmap,              void*, s, 
-                         size_t,                   l, int,               p, 
-                         int,                      f, int,               d, 
-                         off_t,                    o) 
-#else 
-    #ifndef __ARM_EABI__ 
-    /* Not available on ARM EABI Linux.  */ 
-    LSS_INLINE _syscall1(void*, mmap,              void*, a) 
-    #endif 
-    LSS_INLINE _syscall6(void*, mmap2,             void*, s, 
-                         size_t,                   l, int,               p, 
-                         int,                      f, int,               d, 
-                         off_t,                    o) 
-#endif 
-    LSS_INLINE _syscall3(int,   _sigaction,        int,   s, 
-                         const struct kernel_old_sigaction*,  a, 
-                         struct kernel_old_sigaction*,        o) 
-    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h, 
-                         const unsigned long*,     s, 
-                         unsigned long*,           o) 
-    LSS_INLINE _syscall2(int, stat64,              const char *, p, 
-                         struct kernel_stat64 *, b) 
- 
-    LSS_INLINE int LSS_NAME(sigaction)(int signum, 
-                                       const struct kernel_sigaction *act, 
-                                       struct kernel_sigaction *oldact) { 
-      int old_errno = LSS_ERRNO; 
-      int rc; 
-      struct kernel_sigaction a; 
-      if (act != NULL) { 
-        a             = *act; 
-        #ifdef __i386__ 
-        /* On i386, the kernel requires us to always set our own 
-         * SA_RESTORER when using realtime signals. Otherwise, it does not 
-         * know how to return from a signal handler. This function must have 
-         * a "magic" signature that the "gdb" (and maybe the kernel?) can 
-         * recognize. 
-         * Apparently, a SA_RESTORER is implicitly set by the kernel, when 
-         * using non-realtime signals. 
-         * 
-         * TODO: Test whether ARM needs a restorer 
-         */ 
-        if (!(a.sa_flags & SA_RESTORER)) { 
-          a.sa_flags   |= SA_RESTORER; 
-          a.sa_restorer = (a.sa_flags & SA_SIGINFO) 
-                          ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); 
-        } 
-        #endif 
-      } 
-      rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, 
-                                  (KERNEL_NSIG+7)/8); 
-      if (rc < 0 && LSS_ERRNO == ENOSYS) { 
-        struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; 
-        if (!act) { 
-          ptr_a            = NULL; 
-        } else { 
-          oa.sa_handler_   = act->sa_handler_; 
-          memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); 
-          #ifndef __mips__ 
-          oa.sa_restorer   = act->sa_restorer; 
-          #endif 
-          oa.sa_flags      = act->sa_flags; 
-        } 
-        if (!oldact) { 
-          ptr_oa           = NULL; 
-        } 
-        LSS_ERRNO = old_errno; 
-        rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); 
-        if (rc == 0 && oldact) { 
-          if (act) { 
-            memcpy(oldact, act, sizeof(*act)); 
-          } else { 
-            memset(oldact, 0, sizeof(*oldact)); 
-          } 
-          oldact->sa_handler_    = ptr_oa->sa_handler_; 
-          oldact->sa_flags       = ptr_oa->sa_flags; 
-          memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); 
-          #ifndef __mips__ 
-          oldact->sa_restorer    = ptr_oa->sa_restorer; 
-          #endif 
-        } 
-      } 
-      return rc; 
-    } 
- 
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how, 
-                                         const struct kernel_sigset_t *set, 
-                                         struct kernel_sigset_t *oldset) { 
-      int olderrno = LSS_ERRNO; 
-      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); 
-      if (rc < 0 && LSS_ERRNO == ENOSYS) { 
-        LSS_ERRNO = olderrno; 
-        if (oldset) { 
-          LSS_NAME(sigemptyset)(oldset); 
-        } 
-        rc = LSS_NAME(_sigprocmask)(how, 
-                                    set ? &set->sig[0] : NULL, 
-                                    oldset ? &oldset->sig[0] : NULL); 
-      } 
-      return rc; 
-    } 
-  #endif 
-  #if defined(__PPC__) 
-    #undef LSS_SC_LOADARGS_0 
-    #define LSS_SC_LOADARGS_0(dummy...) 
-    #undef LSS_SC_LOADARGS_1 
-    #define LSS_SC_LOADARGS_1(arg1)                                           \ 
-        __sc_4  = (unsigned long) (arg1) 
-    #undef LSS_SC_LOADARGS_2 
-    #define LSS_SC_LOADARGS_2(arg1, arg2)                                     \ 
-        LSS_SC_LOADARGS_1(arg1);                                              \ 
-        __sc_5  = (unsigned long) (arg2) 
-    #undef LSS_SC_LOADARGS_3 
-    #define LSS_SC_LOADARGS_3(arg1, arg2, arg3)                               \ 
-        LSS_SC_LOADARGS_2(arg1, arg2);                                        \ 
-        __sc_6  = (unsigned long) (arg3) 
-    #undef LSS_SC_LOADARGS_4 
-    #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4)                         \ 
-        LSS_SC_LOADARGS_3(arg1, arg2, arg3);                                  \ 
-        __sc_7  = (unsigned long) (arg4) 
-    #undef LSS_SC_LOADARGS_5 
-    #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5)                   \ 
-        LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4);                            \ 
-        __sc_8  = (unsigned long) (arg5) 
-    #undef LSS_SC_BODY 
-    #define LSS_SC_BODY(nr, type, opt, args...)                               \ 
-        long __sc_ret, __sc_err;                                              \ 
-        {                                                                     \ 
+        __asm__ __volatile__(
+            /* check for fn == NULL
+             * and child_stack == NULL
+             */
+            "cmpwi cr0, %6, 0\n\t"
+            "cmpwi cr1, %7, 0\n\t"
+            "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
+            "beq- cr0, 1f\n\t"
+
+            /* set up stack frame for child                                  */
+            "clrrwi %7, %7, 4\n\t"
+            "li 0, 0\n\t"
+            "stwu 0, -16(%7)\n\t"
+
+            /* fn, arg, child_stack are saved across the syscall: r28-30     */
+            "mr 28, %6\n\t"
+            "mr 29, %7\n\t"
+            "mr 27, %9\n\t"
+
+            /* syscall                                                       */
+            "li 0, %4\n\t"
+            /* flags already in r3
+             * child_stack already in r4
+             * ptidptr already in r5
+             * newtls already in r6
+             * ctidptr already in r7
+             */
+            "sc\n\t"
+
+            /* Test if syscall was successful                                */
+            "cmpwi cr1, 3, 0\n\t"
+            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
+            "bne- cr1, 1f\n\t"
+
+            /* Do the function call                                          */
+            "mtctr 28\n\t"
+            "mr 3, 27\n\t"
+            "bctrl\n\t"
+
+            /* Call _exit(r3)                                                */
+            "li 0, %5\n\t"
+            "sc\n\t"
+
+            /* Return to parent                                              */
+            "1:\n"
+            "mfcr %1\n\t"
+            "mr %0, 3\n\t"
+              : "=r" (__ret), "=r" (__err)
+              : "0" (-1), "1" (EINVAL),
+                "i" (__NR_clone), "i" (__NR_exit),
+                "r" (__fn), "r" (__cstack), "r" (__flags),
+                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
+                "r" (__ctidptr)
+              : "cr0", "cr1", "memory", "ctr",
+                "r0", "r29", "r27", "r28");
+      }
+      LSS_RETURN(int, __ret, __err);
+    }
+  #endif
+  #define __NR__exit   __NR_exit
+  #define __NR__gettid __NR_gettid
+  #define __NR__mremap __NR_mremap
+  LSS_INLINE _syscall1(int,     close,           int,         f)
+  LSS_INLINE _syscall1(int,     _exit,           int,         e)
+  LSS_INLINE _syscall3(int,     fcntl,           int,         f,
+                       int,            c, long,   a)
+  LSS_INLINE _syscall2(int,     fstat,           int,         f,
+                      struct kernel_stat*,   b)
+  LSS_INLINE _syscall4(int,     futex,           int*,        a,
+                       int,            o, int,    v,
+                      struct kernel_timespec*, t)
+  LSS_INLINE _syscall3(int,     getdents,        int,         f,
+                      struct kernel_dirent*, d, int,    c)
+#ifdef __NR_getdents64
+  LSS_INLINE _syscall3(int,     getdents64,      int,         f,
+                      struct kernel_dirent64*, d, int,    c)
+#endif
+  LSS_INLINE _syscall0(pid_t,   getpid)
+  LSS_INLINE _syscall0(pid_t,   getppid)
+  LSS_INLINE _syscall0(pid_t,   _gettid)
+  LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
+                       int,            s)
+  LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
+                       off_t,          o, int,    w)
+  LSS_INLINE _syscall2(int,     munmap,          void*,       s,
+                       size_t,         l)
+  LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
+                       size_t,         os,       size_t,      ns,
+                       unsigned long,  f, void *, a)
+  LSS_INLINE _syscall3(int,     open,            const char*, p,
+                       int,            f, int,    m)
+  LSS_INLINE _syscall2(int,     prctl,           int,         o,
+                       long,           a)
+  LSS_INLINE _syscall4(long,    ptrace,          int,         r,
+                       pid_t,          p, void *, a, void *, d)
+  LSS_INLINE _syscall3(ssize_t, read,            int,         f,
+                       void *,         b, size_t, c)
+  LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
+                       const struct kernel_sigaction*, a,
+                       struct kernel_sigaction*, o, size_t,   c)
+  LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
+                       const struct kernel_sigset_t*,  s,
+                       struct kernel_sigset_t*,        o, size_t, c);
+  LSS_INLINE _syscall0(int,     sched_yield)
+  LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
+                       const stack_t*, o)
+  LSS_INLINE _syscall2(int,     stat,            const char*, f,
+                      struct kernel_stat*,   b)
+  LSS_INLINE _syscall3(ssize_t, write,            int,        f,
+                       const void *,   b, size_t, c)
+  #if defined(__NR_getcpu)
+    LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
+                         unsigned *, node, void *, unused);
+  #endif
+  #if defined(__x86_64__) ||                                                  \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall3(int, socket,             int,   d,
+                         int,                     t, int,       p)
+  #endif
+  #if defined(__x86_64__)
+    LSS_INLINE _syscall6(void*, mmap,              void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         off64_t,                o)
+
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+      /* On x86_64, the kernel requires us to always set our own
+       * SA_RESTORER in order to be able to return from a signal handler.
+       * This function must have a "magic" signature that the "gdb"
+       * (and maybe the kernel?) can recognize.
+       */
+      if (act != NULL && !(act->sa_flags & SA_RESTORER)) {
+        struct kernel_sigaction a = *act;
+        a.sa_flags   |= SA_RESTORER;
+        a.sa_restorer = LSS_NAME(restore_rt)();
+        return LSS_NAME(rt_sigaction)(signum, &a, oldact,
+                                      (KERNEL_NSIG+7)/8);
+      } else {
+        return LSS_NAME(rt_sigaction)(signum, act, oldact,
+                                      (KERNEL_NSIG+7)/8);
+      }
+    }
+
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+    }
+  #endif
+  #if defined(__x86_64__) || \
+      defined(__arm__) || \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
+                         int*,                    s, int,       o,
+                         struct kernel_rusage*,   r)
+    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
+      return LSS_NAME(wait4)(pid, status, options, 0);
+    }
+   #endif
+  #if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+    LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
+  #endif
+  LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
+    memset(&set->sig, 0, sizeof(set->sig));
+    return 0;
+  }
+
+  LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
+    memset(&set->sig, -1, sizeof(set->sig));
+    return 0;
+  }
+
+  LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
+                                     int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+          |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
+      return 0;
+    }
+  }
+
+  LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
+                                        int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+          &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
+      return 0;
+    }
+  }
+
+  #if defined(__i386__) || \
+      defined(__arm__) || \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
+    #define __NR__sigaction   __NR_sigaction
+    #define __NR__sigprocmask __NR_sigprocmask
+    LSS_INLINE _syscall2(int, fstat64,             int, f,
+                         struct kernel_stat64 *, b)
+    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
+                         loff_t *, res, uint, wh)
+#ifdef __PPC64__
+    LSS_INLINE _syscall6(void*, mmap,              void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         off_t,                    o)
+#else
+    #ifndef __ARM_EABI__
+    /* Not available on ARM EABI Linux.  */
+    LSS_INLINE _syscall1(void*, mmap,              void*, a)
+    #endif
+    LSS_INLINE _syscall6(void*, mmap2,             void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         off_t,                    o)
+#endif
+    LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
+                         const struct kernel_old_sigaction*,  a,
+                         struct kernel_old_sigaction*,        o)
+    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
+                         const unsigned long*,     s,
+                         unsigned long*,           o)
+    LSS_INLINE _syscall2(int, stat64,              const char *, p,
+                         struct kernel_stat64 *, b)
+
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+      int old_errno = LSS_ERRNO;
+      int rc;
+      struct kernel_sigaction a;
+      if (act != NULL) {
+        a             = *act;
+        #ifdef __i386__
+        /* On i386, the kernel requires us to always set our own
+         * SA_RESTORER when using realtime signals. Otherwise, it does not
+         * know how to return from a signal handler. This function must have
+         * a "magic" signature that the "gdb" (and maybe the kernel?) can
+         * recognize.
+         * Apparently, a SA_RESTORER is implicitly set by the kernel, when
+         * using non-realtime signals.
+         *
+         * TODO: Test whether ARM needs a restorer
+         */
+        if (!(a.sa_flags & SA_RESTORER)) {
+          a.sa_flags   |= SA_RESTORER;
+          a.sa_restorer = (a.sa_flags & SA_SIGINFO)
+                          ? LSS_NAME(restore_rt)() : LSS_NAME(restore)();
+        }
+        #endif
+      }
+      rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
+                                  (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
+        if (!act) {
+          ptr_a            = NULL;
+        } else {
+          oa.sa_handler_   = act->sa_handler_;
+          memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
+          #ifndef __mips__
+          oa.sa_restorer   = act->sa_restorer;
+          #endif
+          oa.sa_flags      = act->sa_flags;
+        }
+        if (!oldact) {
+          ptr_oa           = NULL;
+        }
+        LSS_ERRNO = old_errno;
+        rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
+        if (rc == 0 && oldact) {
+          if (act) {
+            memcpy(oldact, act, sizeof(*act));
+          } else {
+            memset(oldact, 0, sizeof(*oldact));
+          }
+          oldact->sa_handler_    = ptr_oa->sa_handler_;
+          oldact->sa_flags       = ptr_oa->sa_flags;
+          memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
+          #ifndef __mips__
+          oldact->sa_restorer    = ptr_oa->sa_restorer;
+          #endif
+        }
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      int olderrno = LSS_ERRNO;
+      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        LSS_ERRNO = olderrno;
+        if (oldset) {
+          LSS_NAME(sigemptyset)(oldset);
+        }
+        rc = LSS_NAME(_sigprocmask)(how,
+                                    set ? &set->sig[0] : NULL,
+                                    oldset ? &oldset->sig[0] : NULL);
+      }
+      return rc;
+    }
+  #endif
+  #if defined(__PPC__)
+    #undef LSS_SC_LOADARGS_0
+    #define LSS_SC_LOADARGS_0(dummy...)
+    #undef LSS_SC_LOADARGS_1
+    #define LSS_SC_LOADARGS_1(arg1)                                           \
+        __sc_4  = (unsigned long) (arg1)
+    #undef LSS_SC_LOADARGS_2
+    #define LSS_SC_LOADARGS_2(arg1, arg2)                                     \
+        LSS_SC_LOADARGS_1(arg1);                                              \
+        __sc_5  = (unsigned long) (arg2)
+    #undef LSS_SC_LOADARGS_3
+    #define LSS_SC_LOADARGS_3(arg1, arg2, arg3)                               \
+        LSS_SC_LOADARGS_2(arg1, arg2);                                        \
+        __sc_6  = (unsigned long) (arg3)
+    #undef LSS_SC_LOADARGS_4
+    #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4)                         \
+        LSS_SC_LOADARGS_3(arg1, arg2, arg3);                                  \
+        __sc_7  = (unsigned long) (arg4)
+    #undef LSS_SC_LOADARGS_5
+    #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5)                   \
+        LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4);                            \
+        __sc_8  = (unsigned long) (arg5)
+    #undef LSS_SC_BODY
+    #define LSS_SC_BODY(nr, type, opt, args...)                               \
+        long __sc_ret, __sc_err;                                              \
+        {                                                                     \
           unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall;     \
           unsigned long __sc_3 __asm__ ("r3") = opt;                 \
           unsigned long __sc_4 __asm__ ("r4");                       \
@@ -2061,125 +2061,125 @@ struct kernel_stat {
           unsigned long __sc_6 __asm__ ("r6");                       \
           unsigned long __sc_7 __asm__ ("r7");                       \
           unsigned long __sc_8 __asm__ ("r8");                       \
-          LSS_SC_LOADARGS_##nr(args);                                         \ 
-          __asm__ __volatile__                                                \ 
-              ("stwu 1, -48(1)\n\t"                                           \ 
-               "stw 4, 20(1)\n\t"                                             \ 
-               "stw 5, 24(1)\n\t"                                             \ 
-               "stw 6, 28(1)\n\t"                                             \ 
-               "stw 7, 32(1)\n\t"                                             \ 
-               "stw 8, 36(1)\n\t"                                             \ 
-               "addi 4, 1, 20\n\t"                                            \ 
-               "sc\n\t"                                                       \ 
-               "mfcr %0"                                                      \ 
-                 : "=&r" (__sc_0),                                            \ 
-                   "=&r" (__sc_3), "=&r" (__sc_4),                            \ 
-                   "=&r" (__sc_5), "=&r" (__sc_6),                            \ 
-                   "=&r" (__sc_7), "=&r" (__sc_8)                             \ 
-                 : LSS_ASMINPUT_##nr                                          \ 
-                 : "cr0", "ctr", "memory");                                   \ 
-          __sc_ret = __sc_3;                                                  \ 
-          __sc_err = __sc_0;                                                  \ 
-        }                                                                     \ 
-        LSS_RETURN(type, __sc_ret, __sc_err) 
- 
-    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { 
-      LSS_SC_BODY(3, int, 1, domain, type, protocol); 
-    } 
-  #endif 
-  #if defined(__i386__) || \ 
-      (defined(__arm__) && !defined(__ARM_EABI__)) || \ 
-      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) 
- 
-    /* See sys_socketcall in net/socket.c in kernel source. 
-     * It de-multiplexes on its first arg and unpacks the arglist 
-     * array in its second arg. 
-     */ 
-    LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a) 
- 
-    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { 
-      unsigned long args[3] = { 
-        (unsigned long) domain, 
-        (unsigned long) type, 
-        (unsigned long) protocol 
-      }; 
-      return LSS_NAME(socketcall)(1, args); 
-    } 
-  #elif defined(__ARM_EABI__) 
-    LSS_INLINE _syscall3(int, socket,             int,   d, 
-                         int,                     t, int,       p) 
-  #endif 
-  #if defined(__i386__) || defined(__PPC__) ||                                \ 
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) 
-    LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p, 
-                         int*,              s,    int,   o) 
-  #endif 
-  #if defined(__mips__) 
-    /* sys_pipe() on MIPS has non-standard calling conventions, as it returns 
-     * both file handles through CPU registers. 
-     */ 
-    LSS_INLINE int LSS_NAME(pipe)(int *p) { 
+          LSS_SC_LOADARGS_##nr(args);                                         \
+          __asm__ __volatile__                                                \
+              ("stwu 1, -48(1)\n\t"                                           \
+               "stw 4, 20(1)\n\t"                                             \
+               "stw 5, 24(1)\n\t"                                             \
+               "stw 6, 28(1)\n\t"                                             \
+               "stw 7, 32(1)\n\t"                                             \
+               "stw 8, 36(1)\n\t"                                             \
+               "addi 4, 1, 20\n\t"                                            \
+               "sc\n\t"                                                       \
+               "mfcr %0"                                                      \
+                 : "=&r" (__sc_0),                                            \
+                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
+                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
+                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
+                 : LSS_ASMINPUT_##nr                                          \
+                 : "cr0", "ctr", "memory");                                   \
+          __sc_ret = __sc_3;                                                  \
+          __sc_err = __sc_0;                                                  \
+        }                                                                     \
+        LSS_RETURN(type, __sc_ret, __sc_err)
+
+    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
+      LSS_SC_BODY(3, int, 1, domain, type, protocol);
+    }
+  #endif
+  #if defined(__i386__) || \
+      (defined(__arm__) && !defined(__ARM_EABI__)) || \
+      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+
+    /* See sys_socketcall in net/socket.c in kernel source.
+     * It de-multiplexes on its first arg and unpacks the arglist
+     * array in its second arg.
+     */
+    LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a)
+
+    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
+      unsigned long args[3] = {
+        (unsigned long) domain,
+        (unsigned long) type,
+        (unsigned long) protocol
+      };
+      return LSS_NAME(socketcall)(1, args);
+    }
+  #elif defined(__ARM_EABI__)
+    LSS_INLINE _syscall3(int, socket,             int,   d,
+                         int,                     t, int,       p)
+  #endif
+  #if defined(__i386__) || defined(__PPC__) ||                                \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p,
+                         int*,              s,    int,   o)
+  #endif
+  #if defined(__mips__)
+    /* sys_pipe() on MIPS has non-standard calling conventions, as it returns
+     * both file handles through CPU registers.
+     */
+    LSS_INLINE int LSS_NAME(pipe)(int *p) {
       unsigned long __v0 __asm__("$2") = __NR_pipe;
       unsigned long __v1 __asm__("$3");
       unsigned long __r7 __asm__("$7");
-      __asm__ __volatile__ ("syscall\n" 
-                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) 
-                            : "0"(__v0) 
-                            : "$8", "$9", "$10", "$11", "$12", 
-                              "$13", "$14", "$15", "$24", "memory"); 
-      if (__r7) { 
-        LSS_ERRNO = __v0; 
-        return -1; 
-      } else { 
-        p[0] = __v0; 
-        p[1] = __v1; 
-        return 0; 
-      } 
-    } 
-  #else 
-    LSS_INLINE _syscall1(int,     pipe,           int *, p) 
-  #endif 
- 
-  LSS_INLINE pid_t LSS_NAME(gettid)() { 
-    pid_t tid = LSS_NAME(_gettid)(); 
-    if (tid != -1) { 
-      return tid; 
-    } 
-    return LSS_NAME(getpid)(); 
-  } 
- 
-  LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, 
-                                    size_t new_size, int flags, ...) { 
-    va_list ap; 
-    void *new_address, *rc; 
-    va_start(ap, flags); 
-    new_address = va_arg(ap, void *); 
-    rc = LSS_NAME(_mremap)(old_address, old_size, new_size, 
-                           flags, new_address); 
-    va_end(ap); 
-    return rc; 
-  } 
- 
-  LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { 
-    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it 
-     * then sends job control signals to the real parent, rather than to 
-     * the tracer. We reduce the risk of this happening by starting a 
-     * whole new time slice, and then quickly sending a SIGCONT signal 
-     * right after detaching from the tracee. 
-     */ 
-    int rc, err; 
-    LSS_NAME(sched_yield)(); 
-    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); 
-    err = LSS_ERRNO; 
-    LSS_NAME(kill)(pid, SIGCONT); 
-    LSS_ERRNO = err; 
-    return rc; 
-  } 
-#endif 
- 
-#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) 
-} 
-#endif 
- 
-#endif 
-#endif 
+      __asm__ __volatile__ ("syscall\n"
+                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
+                            : "0"(__v0)
+                            : "$8", "$9", "$10", "$11", "$12",
+                              "$13", "$14", "$15", "$24", "memory");
+      if (__r7) {
+        LSS_ERRNO = __v0;
+        return -1;
+      } else {
+        p[0] = __v0;
+        p[1] = __v1;
+        return 0;
+      }
+    }
+  #else
+    LSS_INLINE _syscall1(int,     pipe,           int *, p)
+  #endif
+
+  LSS_INLINE pid_t LSS_NAME(gettid)() {
+    pid_t tid = LSS_NAME(_gettid)();
+    if (tid != -1) {
+      return tid;
+    }
+    return LSS_NAME(getpid)();
+  }
+
+  LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size,
+                                    size_t new_size, int flags, ...) {
+    va_list ap;
+    void *new_address, *rc;
+    va_start(ap, flags);
+    new_address = va_arg(ap, void *);
+    rc = LSS_NAME(_mremap)(old_address, old_size, new_size,
+                           flags, new_address);
+    va_end(ap);
+    return rc;
+  }
+
+  LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
+    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
+     * then sends job control signals to the real parent, rather than to
+     * the tracer. We reduce the risk of this happening by starting a
+     * whole new time slice, and then quickly sending a SIGCONT signal
+     * right after detaching from the tracee.
+     */
+    int rc, err;
+    LSS_NAME(sched_yield)();
+    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
+    err = LSS_ERRNO;
+    LSS_NAME(kill)(pid, SIGCONT);
+    LSS_ERRNO = err;
+    return rc;
+  }
+#endif
+
+#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
+}
+#endif
+
+#endif
+#endif
diff --git a/contrib/libs/linuxvdso/original/logging.h b/contrib/libs/linuxvdso/original/logging.h
index c61eab18e3..209714ccd0 100644
--- a/contrib/libs/linuxvdso/original/logging.h
+++ b/contrib/libs/linuxvdso/original/logging.h
@@ -1,154 +1,154 @@
-#pragma once 
- 
-// Copyright (c) 2005, Google Inc. 
-// All rights reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-// --- 
-// This file contains #include information about logging-related stuff. 
-// Pretty much everybody needs to #include this file so that they can 
-// log various happenings. 
-// 
-#ifndef _LOGGING_H_ 
-#define _LOGGING_H_ 
- 
-#include <string.h> 
-#include <stdio.h> 
-#include <unistd.h> 
- 
-#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) 
- 
-#define CHECK(condition)                                                \ 
-  do {                                                                  \ 
-    if (!(condition)) {                                                 \ 
-      WRITE_TO_STDERR("Check failed: " #condition "\n",                 \ 
-                      sizeof("Check failed: " #condition "\n")-1);      \ 
-      abort();                                                          \ 
-    }                                                                   \ 
-  } while (0) 
- 
-// This takes a message to print.  The name is historical. 
-#define RAW_CHECK(condition, message)                                          \ 
-  do {                                                                         \ 
-    if (!(condition)) {                                                        \ 
-      WRITE_TO_STDERR("Check failed: " #condition ": " message "\n",           \ 
-                      sizeof("Check failed: " #condition ": " message "\n")-1);\ 
-      abort();                                                                 \ 
-    }                                                                          \ 
-  } while (0) 
- 
-// This is like RAW_CHECK, but only in debug-mode 
-#ifdef NDEBUG 
-enum { DEBUG_MODE = 0 }; 
-#define RAW_DCHECK(condition, message) 
-#else 
-enum { DEBUG_MODE = 1 }; 
-#define RAW_DCHECK(condition, message)  RAW_CHECK(condition, message) 
-#endif 
- 
-// This prints errno as well.  Note we use write instead of printf/puts to 
-// avoid the risk we'll call malloc(). 
-#define PCHECK(condition)                                               \ 
-  do {                                                                  \ 
-    if (!(condition)) {                                                 \ 
-      const int err_no = errno;                                         \ 
-      WRITE_TO_STDERR("Check failed: " #condition ": ",                 \ 
-                      sizeof("Check failed: " #condition ": ")-1);      \ 
-      WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no)));      \ 
-      WRITE_TO_STDERR("\n", sizeof("\n")-1);                            \ 
-      abort();                                                          \ 
-    }                                                                   \ 
-  } while (0) 
- 
-// Helper macro for binary operators; prints the two values on error 
-// Don't use this macro directly in your code, use CHECK_EQ et al below 
- 
-// WARNING: These don't compile correctly if one of the arguments is a pointer 
-// and the other is NULL. To work around this, simply static_cast NULL to the 
-// type of the desired pointer. 
- 
-// TODO(jandrews): Also print the values in case of failure.  Requires some 
-// sort of type-sensitive ToString() function. 
-#define CHECK_OP(op, val1, val2)                                        \ 
-  do {                                                                  \ 
-    if (!((val1) op (val2))) {                                          \ 
-      fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2);   \ 
-      abort();                                                          \ 
-    }                                                                   \ 
-  } while (0) 
- 
-#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2) 
-#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2) 
-#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2) 
-#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2) 
-#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) 
-#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) 
- 
-// Synonyms for CHECK_* that are used in some unittests. 
-#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) 
-#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) 
-#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) 
-#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) 
-#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) 
-#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) 
-#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2) 
-#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2) 
-#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2) 
-#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2) 
-#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2) 
-#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2) 
-// As are these variants. 
-#define EXPECT_TRUE(cond)     CHECK(cond) 
-#define EXPECT_FALSE(cond)    CHECK(!(cond)) 
-#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0) 
-#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond) 
-#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond) 
-#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b) 
- 
-// Used for (libc) functions that return -1 and set errno 
-#define CHECK_ERR(invocation)  PCHECK((invocation) != -1) 
- 
-// A few more checks that only happen in debug mode 
-#ifdef NDEBUG 
-#define DCHECK_EQ(val1, val2) 
-#define DCHECK_NE(val1, val2) 
-#define DCHECK_LE(val1, val2) 
-#define DCHECK_LT(val1, val2) 
-#define DCHECK_GE(val1, val2) 
-#define DCHECK_GT(val1, val2) 
-#else 
-#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2) 
-#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2) 
-#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2) 
-#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2) 
-#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2) 
-#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2) 
-#endif 
- 
- 
-#endif // _LOGGING_H_ 
+#pragma once
+
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// This file contains #include information about logging-related stuff.
+// Pretty much everybody needs to #include this file so that they can
+// log various happenings.
+//
+#ifndef _LOGGING_H_
+#define _LOGGING_H_
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len)
+
+#define CHECK(condition)                                                \
+  do {                                                                  \
+    if (!(condition)) {                                                 \
+      WRITE_TO_STDERR("Check failed: " #condition "\n",                 \
+                      sizeof("Check failed: " #condition "\n")-1);      \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+// This takes a message to print.  The name is historical.
+#define RAW_CHECK(condition, message)                                          \
+  do {                                                                         \
+    if (!(condition)) {                                                        \
+      WRITE_TO_STDERR("Check failed: " #condition ": " message "\n",           \
+                      sizeof("Check failed: " #condition ": " message "\n")-1);\
+      abort();                                                                 \
+    }                                                                          \
+  } while (0)
+
+// This is like RAW_CHECK, but only in debug-mode
+#ifdef NDEBUG
+enum { DEBUG_MODE = 0 };
+#define RAW_DCHECK(condition, message)
+#else
+enum { DEBUG_MODE = 1 };
+#define RAW_DCHECK(condition, message)  RAW_CHECK(condition, message)
+#endif
+
+// This prints errno as well.  Note we use write instead of printf/puts to
+// avoid the risk we'll call malloc().
+#define PCHECK(condition)                                               \
+  do {                                                                  \
+    if (!(condition)) {                                                 \
+      const int err_no = errno;                                         \
+      WRITE_TO_STDERR("Check failed: " #condition ": ",                 \
+                      sizeof("Check failed: " #condition ": ")-1);      \
+      WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no)));      \
+      WRITE_TO_STDERR("\n", sizeof("\n")-1);                            \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+// Helper macro for binary operators; prints the two values on error
+// Don't use this macro directly in your code, use CHECK_EQ et al below
+
+// WARNING: These don't compile correctly if one of the arguments is a pointer
+// and the other is NULL. To work around this, simply static_cast NULL to the
+// type of the desired pointer.
+
+// TODO(jandrews): Also print the values in case of failure.  Requires some
+// sort of type-sensitive ToString() function.
+#define CHECK_OP(op, val1, val2)                                        \
+  do {                                                                  \
+    if (!((val1) op (val2))) {                                          \
+      fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2);   \
+      abort();                                                          \
+    }                                                                   \
+  } while (0)
+
+#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
+#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
+#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
+#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
+#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
+#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)
+
+// Synonyms for CHECK_* that are used in some unittests.
+#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
+#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
+#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
+#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
+#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
+#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
+#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2)
+#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2)
+#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2)
+#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2)
+#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2)
+#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2)
+// As are these variants.
+#define EXPECT_TRUE(cond)     CHECK(cond)
+#define EXPECT_FALSE(cond)    CHECK(!(cond))
+#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0)
+#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond)
+#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond)
+#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b)
+
+// Used for (libc) functions that return -1 and set errno
+#define CHECK_ERR(invocation)  PCHECK((invocation) != -1)
+
+// A few more checks that only happen in debug mode
+#ifdef NDEBUG
+#define DCHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2)
+#else
+#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2)
+#endif
+
+
+#endif // _LOGGING_H_
diff --git a/contrib/libs/linuxvdso/original/vdso_support.cc b/contrib/libs/linuxvdso/original/vdso_support.cc
index d1763f38db..2977477398 100644
--- a/contrib/libs/linuxvdso/original/vdso_support.cc
+++ b/contrib/libs/linuxvdso/original/vdso_support.cc
@@ -1,139 +1,139 @@
-// Copyright (c) 2008, Google Inc. 
-// All rights reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-// --- 
-// Author: Paul Pluzhnikov 
-// 
-// Allow dynamic symbol lookup in the kernel VDSO page. 
-// 
-// VDSOSupport -- a class representing kernel VDSO (if present). 
-// 
- 
-#include "vdso_support.h" 
- 
-#ifdef HAVE_VDSO_SUPPORT     // defined in vdso_support.h 
- 
-#include <fcntl.h> 
-#include <stddef.h>   // for ptrdiff_t 
- 
-#include "linux_syscall_support.h" 
-#include "logging.h" 
- 
-#ifndef AT_SYSINFO_EHDR 
-#define AT_SYSINFO_EHDR 33 
-#endif 
- 
-namespace base { 
- 
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup in the kernel VDSO page.
+//
+// VDSOSupport -- a class representing kernel VDSO (if present).
+//
+
+#include "vdso_support.h"
+
+#ifdef HAVE_VDSO_SUPPORT     // defined in vdso_support.h
+
+#include <fcntl.h>
+#include <stddef.h>   // for ptrdiff_t
+
+#include "linux_syscall_support.h"
+#include "logging.h"
+
+#ifndef AT_SYSINFO_EHDR
+#define AT_SYSINFO_EHDR 33
+#endif
+
+namespace base {
+
 const void *VDSOSupport::vdso_base_ = NULL;
- 
-VDSOSupport::VDSOSupport() 
+
+VDSOSupport::VDSOSupport()
     // If vdso_base_ is still set to NULL, we got here
-    // before VDSOSupport::Init has been called. Call it now. 
+    // before VDSOSupport::Init has been called. Call it now.
     : image_(Init()) {
-} 
- 
-// NOTE: we can't use GoogleOnceInit() below, because we can be 
-// called by tcmalloc, and none of the *once* stuff may be functional yet. 
-// 
-// In addition, we hope that the VDSOSupportHelper constructor 
-// causes this code to run before there are any threads, and before 
-// InitGoogle() has executed any chroot or setuid calls. 
-// 
-// Finally, even if there is a race here, it is harmless, because 
-// the operation should be idempotent. 
-const void *VDSOSupport::Init() { 
+}
+
+// NOTE: we can't use GoogleOnceInit() below, because we can be
+// called by tcmalloc, and none of the *once* stuff may be functional yet.
+//
+// In addition, we hope that the VDSOSupportHelper constructor
+// causes this code to run before there are any threads, and before
+// InitGoogle() has executed any chroot or setuid calls.
+//
+// Finally, even if there is a race here, it is harmless, because
+// the operation should be idempotent.
+const void *VDSOSupport::Init() {
   if (vdso_base_ == NULL) {
-    // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[] 
-    // on stack, and so glibc works as if VDSO was not present. 
-    // But going directly to kernel via /proc/self/auxv below bypasses 
-    // Valgrind zapping. So we check for Valgrind separately. 
-    if (RunningOnValgrind()) { 
+    // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[]
+    // on stack, and so glibc works as if VDSO was not present.
+    // But going directly to kernel via /proc/self/auxv below bypasses
+    // Valgrind zapping. So we check for Valgrind separately.
+    if (RunningOnValgrind()) {
       vdso_base_ = ElfMemImage::kInvalidBase;
       return vdso_base_;
-    } 
-    int fd = open("/proc/self/auxv", O_RDONLY); 
-    if (fd == -1) { 
-      // Kernel too old to have a VDSO. 
+    }
+    int fd = open("/proc/self/auxv", O_RDONLY);
+    if (fd == -1) {
+      // Kernel too old to have a VDSO.
       vdso_base_ = ElfMemImage::kInvalidBase;
       return vdso_base_;
-    } 
-    ElfW(auxv_t) aux; 
-    while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { 
-      if (aux.a_type == AT_SYSINFO_EHDR) { 
-        COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val), 
-                       unexpected_sizeof_pointer_NE_sizeof_a_val); 
-        vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val); 
-        break; 
-      } 
-    } 
-    close(fd); 
+    }
+    ElfW(auxv_t) aux;
+    while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
+      if (aux.a_type == AT_SYSINFO_EHDR) {
+        COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val),
+                       unexpected_sizeof_pointer_NE_sizeof_a_val);
+        vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val);
+        break;
+      }
+    }
+    close(fd);
     if (vdso_base_ == NULL) {
-      // Didn't find AT_SYSINFO_EHDR in auxv[]. 
+      // Didn't find AT_SYSINFO_EHDR in auxv[].
       vdso_base_ = ElfMemImage::kInvalidBase;
-    } 
-  } 
-  return vdso_base_; 
-} 
- 
-const void *VDSOSupport::SetBase(const void *base) { 
+    }
+  }
+  return vdso_base_;
+}
+
+const void *VDSOSupport::SetBase(const void *base) {
   CHECK(base != NULL);
-  const void *old_base = vdso_base_; 
-  vdso_base_ = base; 
-  image_.Init(base); 
-  return old_base; 
-} 
- 
-bool VDSOSupport::LookupSymbol(const char *name, 
-                               const char *version, 
-                               int type, 
-                               SymbolInfo *info) const { 
-  return image_.LookupSymbol(name, version, type, info); 
-} 
- 
-bool VDSOSupport::LookupSymbolByAddress(const void *address, 
-                                        SymbolInfo *info_out) const { 
-  return image_.LookupSymbolByAddress(address, info_out); 
-} 
- 
-// We need to make sure VDSOSupport::Init() is called before 
-// the main() runs, since it might do something like setuid or 
-// chroot.  If VDSOSupport 
-// is used in any global constructor, this will happen, since 
-// VDSOSupport's constructor calls Init.  But if not, we need to 
-// ensure it here, with a global constructor of our own.  This 
-// is an allowed exception to the normal rule against non-trivial 
-// global constructors. 
-static class VDSOInitHelper { 
- public: 
-  VDSOInitHelper() { VDSOSupport::Init(); } 
-} vdso_init_helper; 
-} 
- 
-#endif  // HAVE_VDSO_SUPPORT 
+  const void *old_base = vdso_base_;
+  vdso_base_ = base;
+  image_.Init(base);
+  return old_base;
+}
+
+bool VDSOSupport::LookupSymbol(const char *name,
+                               const char *version,
+                               int type,
+                               SymbolInfo *info) const {
+  return image_.LookupSymbol(name, version, type, info);
+}
+
+bool VDSOSupport::LookupSymbolByAddress(const void *address,
+                                        SymbolInfo *info_out) const {
+  return image_.LookupSymbolByAddress(address, info_out);
+}
+
+// We need to make sure VDSOSupport::Init() is called before
+// the main() runs, since it might do something like setuid or
+// chroot.  If VDSOSupport
+// is used in any global constructor, this will happen, since
+// VDSOSupport's constructor calls Init.  But if not, we need to
+// ensure it here, with a global constructor of our own.  This
+// is an allowed exception to the normal rule against non-trivial
+// global constructors.
+static class VDSOInitHelper {
+ public:
+  VDSOInitHelper() { VDSOSupport::Init(); }
+} vdso_init_helper;
+}
+
+#endif  // HAVE_VDSO_SUPPORT
diff --git a/contrib/libs/linuxvdso/original/vdso_support.h b/contrib/libs/linuxvdso/original/vdso_support.h
index 99623338d2..1ccf32c23b 100644
--- a/contrib/libs/linuxvdso/original/vdso_support.h
+++ b/contrib/libs/linuxvdso/original/vdso_support.h
@@ -1,132 +1,132 @@
-#pragma once 
- 
-// Copyright (c) 2008, Google Inc. 
-// All rights reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-// --- 
-// Author: Paul Pluzhnikov 
-// 
-// Allow dynamic symbol lookup in the kernel VDSO page. 
-// 
-// VDSO stands for "Virtual Dynamic Shared Object" -- a page of 
-// executable code, which looks like a shared library, but doesn't 
-// necessarily exist anywhere on disk, and which gets mmap()ed into 
-// every process by kernels which support VDSO, such as 2.6.x for 32-bit 
-// executables, and 2.6.24 and above for 64-bit executables. 
-// 
-// More details could be found here: 
-// http://www.trilithium.com/johan/2005/08/linux-gate/ 
-// 
-// VDSOSupport -- a class representing kernel VDSO (if present). 
-// 
-// Example usage: 
-//  VDSOSupport vdso; 
-//  VDSOSupport::SymbolInfo info; 
-//  typedef (*FN)(unsigned *, void *, void *); 
-//  FN fn = NULL; 
-//  if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { 
-//     fn = reinterpret_cast<FN>(info.address); 
-//  } 
- 
-#ifndef BASE_VDSO_SUPPORT_H_ 
-#define BASE_VDSO_SUPPORT_H_ 
- 
-#include "config.h" 
-#include "elf_mem_image.h" 
- 
-#ifdef HAVE_ELF_MEM_IMAGE 
- 
-#define HAVE_VDSO_SUPPORT 1 
- 
-#include <stdlib.h>     // for NULL 
- 
-namespace base { 
- 
-// NOTE: this class may be used from within tcmalloc, and can not 
-// use any memory allocation routines. 
-class VDSOSupport { 
- public: 
-  VDSOSupport(); 
- 
-  typedef ElfMemImage::SymbolInfo SymbolInfo; 
-  typedef ElfMemImage::SymbolIterator SymbolIterator; 
- 
-  // Answers whether we have a vdso at all. 
-  bool IsPresent() const { return image_.IsPresent(); } 
- 
-  // Allow to iterate over all VDSO symbols. 
-  SymbolIterator begin() const { return image_.begin(); } 
-  SymbolIterator end() const { return image_.end(); } 
- 
-  // Look up versioned dynamic symbol in the kernel VDSO. 
-  // Returns false if VDSO is not present, or doesn't contain given 
-  // symbol/version/type combination. 
-  // If info_out != NULL, additional details are filled in. 
-  bool LookupSymbol(const char *name, const char *version, 
-                    int symbol_type, SymbolInfo *info_out) const; 
- 
-  // Find info about symbol (if any) which overlaps given address. 
-  // Returns true if symbol was found; false if VDSO isn't present 
-  // or doesn't have a symbol overlapping given address. 
-  // If info_out != NULL, additional details are filled in. 
-  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; 
- 
-  // Used only for testing. Replace real VDSO base with a mock. 
-  // Returns previous value of vdso_base_. After you are done testing, 
-  // you are expected to call SetBase() with previous value, in order to 
-  // reset state to the way it was. 
-  const void *SetBase(const void *s); 
- 
-  // Computes vdso_base_ and returns it. Should be called as early as 
-  // possible; before any thread creation, chroot or setuid. 
-  static const void *Init(); 
- 
- private: 
-  // image_ represents VDSO ELF image in memory. 
-  // image_.ehdr_ == NULL implies there is no VDSO. 
-  ElfMemImage image_; 
- 
-  // Cached value of auxv AT_SYSINFO_EHDR, computed once. 
-  // This is a tri-state: 
+#pragma once
+
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Pluzhnikov
+//
+// Allow dynamic symbol lookup in the kernel VDSO page.
+//
+// VDSO stands for "Virtual Dynamic Shared Object" -- a page of
+// executable code, which looks like a shared library, but doesn't
+// necessarily exist anywhere on disk, and which gets mmap()ed into
+// every process by kernels which support VDSO, such as 2.6.x for 32-bit
+// executables, and 2.6.24 and above for 64-bit executables.
+//
+// More details could be found here:
+// http://www.trilithium.com/johan/2005/08/linux-gate/
+//
+// VDSOSupport -- a class representing kernel VDSO (if present).
+//
+// Example usage:
+//  VDSOSupport vdso;
+//  VDSOSupport::SymbolInfo info;
+//  typedef (*FN)(unsigned *, void *, void *);
+//  FN fn = NULL;
+//  if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) {
+//     fn = reinterpret_cast<FN>(info.address);
+//  }
+
+#ifndef BASE_VDSO_SUPPORT_H_
+#define BASE_VDSO_SUPPORT_H_
+
+#include "config.h"
+#include "elf_mem_image.h"
+
+#ifdef HAVE_ELF_MEM_IMAGE
+
+#define HAVE_VDSO_SUPPORT 1
+
+#include <stdlib.h>     // for NULL
+
+namespace base {
+
+// NOTE: this class may be used from within tcmalloc, and can not
+// use any memory allocation routines.
+class VDSOSupport {
+ public:
+  VDSOSupport();
+
+  typedef ElfMemImage::SymbolInfo SymbolInfo;
+  typedef ElfMemImage::SymbolIterator SymbolIterator;
+
+  // Answers whether we have a vdso at all.
+  bool IsPresent() const { return image_.IsPresent(); }
+
+  // Allow to iterate over all VDSO symbols.
+  SymbolIterator begin() const { return image_.begin(); }
+  SymbolIterator end() const { return image_.end(); }
+
+  // Look up versioned dynamic symbol in the kernel VDSO.
+  // Returns false if VDSO is not present, or doesn't contain given
+  // symbol/version/type combination.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbol(const char *name, const char *version,
+                    int symbol_type, SymbolInfo *info_out) const;
+
+  // Find info about symbol (if any) which overlaps given address.
+  // Returns true if symbol was found; false if VDSO isn't present
+  // or doesn't have a symbol overlapping given address.
+  // If info_out != NULL, additional details are filled in.
+  bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
+
+  // Used only for testing. Replace real VDSO base with a mock.
+  // Returns previous value of vdso_base_. After you are done testing,
+  // you are expected to call SetBase() with previous value, in order to
+  // reset state to the way it was.
+  const void *SetBase(const void *s);
+
+  // Computes vdso_base_ and returns it. Should be called as early as
+  // possible; before any thread creation, chroot or setuid.
+  static const void *Init();
+
+ private:
+  // image_ represents VDSO ELF image in memory.
+  // image_.ehdr_ == NULL implies there is no VDSO.
+  ElfMemImage image_;
+
+  // Cached value of auxv AT_SYSINFO_EHDR, computed once.
+  // This is a tri-state:
   //              0   => value hasn't been determined yet.
   //   kInvalidBase   => there is no VDSO.
-  //           else   => vma of VDSO Elf{32,64}_Ehdr. 
-  // 
-  // When testing with mock VDSO, low bit is set. 
-  // The low bit is always available because vdso_base_ is 
-  // page-aligned. 
-  static const void *vdso_base_; 
- 
-  DISALLOW_COPY_AND_ASSIGN(VDSOSupport); 
-}; 
-}  // namespace base 
- 
-#endif  // HAVE_ELF_MEM_IMAGE 
- 
-#endif  // BASE_VDSO_SUPPORT_H_ 
+  //           else   => vma of VDSO Elf{32,64}_Ehdr.
+  //
+  // When testing with mock VDSO, low bit is set.
+  // The low bit is always available because vdso_base_ is
+  // page-aligned.
+  static const void *vdso_base_;
+
+  DISALLOW_COPY_AND_ASSIGN(VDSOSupport);
+};
+}  // namespace base
+
+#endif  // HAVE_ELF_MEM_IMAGE
+
+#endif  // BASE_VDSO_SUPPORT_H_
diff --git a/contrib/libs/linuxvdso/original/ya.make b/contrib/libs/linuxvdso/original/ya.make
index e545457d87..c5b41c3586 100644
--- a/contrib/libs/linuxvdso/original/ya.make
+++ b/contrib/libs/linuxvdso/original/ya.make
@@ -1,6 +1,6 @@
-LIBRARY() 
+LIBRARY()
 
-LICENSE(BSD-3-Clause) 
+LICENSE(BSD-3-Clause)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
@@ -8,16 +8,16 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 NO_UTIL()
 
 NO_RUNTIME()
 
 NO_COMPILER_WARNINGS()
- 
-SRCS( 
-    vdso_support.cc 
-    elf_mem_image.cc 
-) 
- 
-END() 
+
+SRCS(
+    vdso_support.cc
+    elf_mem_image.cc
+)
+
+END()
diff --git a/contrib/libs/linuxvdso/ya.make b/contrib/libs/linuxvdso/ya.make
index d26073c6cc..4da8d3d076 100644
--- a/contrib/libs/linuxvdso/ya.make
+++ b/contrib/libs/linuxvdso/ya.make
@@ -1,8 +1,8 @@
-LIBRARY() 
+LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(BSD-3-Clause) 
+LICENSE(BSD-3-Clause)
 
 VERSION(2.0)
 
@@ -12,22 +12,22 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 NO_UTIL()
 
 NO_RUNTIME()
- 
+
 IF (OS_LINUX)
-    PEERDIR( 
-        contrib/libs/linuxvdso/original 
-    ) 
-    SRCS( 
-        interface.cpp 
-    ) 
+    PEERDIR(
+        contrib/libs/linuxvdso/original
+    )
+    SRCS(
+        interface.cpp
+    )
 ELSE()
-    SRCS( 
-        fake.cpp 
-    ) 
+    SRCS(
+        fake.cpp
+    )
 ENDIF()
- 
-END() 
+
+END()
diff --git a/contrib/libs/lz4/generated/gen.py b/contrib/libs/lz4/generated/gen.py
index 6bee186e93..24dec0555c 100644
--- a/contrib/libs/lz4/generated/gen.py
+++ b/contrib/libs/lz4/generated/gen.py
@@ -1,5 +1,5 @@
 import os
- 
+
 lz4 = '''
 #define LZ4_MEMORY_USAGE {i}
 #define LZ4_NAMESPACE lz4_{i}
@@ -32,14 +32,14 @@ cases = []
 
 os.chdir(os.path.dirname(__file__))
 
-for i in range(10, 21): 
+for i in range(10, 21):
     name = 'lz4_{}.cpp'.format(i)
     namespaces.append(lz4_namespace.format(i=i))
     cases.append(lz4_case.format(i=i))
     print '    ' + name
- 
+
     with open(name, 'w') as f:
         f.write(lz4.format(i=i))
- 
+
 with open('lz4methods.cpp', 'w') as f:
     f.write(lz4methods % ('\n'.join(namespaces), '\n'.join(cases)))
diff --git a/contrib/libs/lz4/generated/iface.h b/contrib/libs/lz4/generated/iface.h
index c30f2da853..e299d44014 100644
--- a/contrib/libs/lz4/generated/iface.h
+++ b/contrib/libs/lz4/generated/iface.h
@@ -1,15 +1,15 @@
-#pragma once 
- 
-#if defined(__cplusplus) 
-extern "C" { 
-#endif 
- 
-struct TLZ4Methods { 
-    int (*LZ4CompressLimited)(const char* source, char* dest, int isize, int maxOut); 
-}; 
- 
-struct TLZ4Methods* LZ4Methods(int memory); 
- 
-#if defined(__cplusplus) 
-} 
-#endif 
+#pragma once
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct TLZ4Methods {
+    int (*LZ4CompressLimited)(const char* source, char* dest, int isize, int maxOut);
+};
+
+struct TLZ4Methods* LZ4Methods(int memory);
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/contrib/libs/lz4/generated/lz4_ns.h b/contrib/libs/lz4/generated/lz4_ns.h
index 8da45bceb5..2dfbbf8ccc 100644
--- a/contrib/libs/lz4/generated/lz4_ns.h
+++ b/contrib/libs/lz4/generated/lz4_ns.h
@@ -1,7 +1,7 @@
 #pragma once
- 
+
 #include "iface.h"
- 
+
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -14,6 +14,6 @@ namespace LZ4_NAMESPACE {
 
 struct TLZ4Methods ytbl = {
     LZ4_compress_default,
-}; 
- 
+};
+
 }
diff --git a/contrib/libs/lz4/generated/ya.make b/contrib/libs/lz4/generated/ya.make
index ed4cb874d3..f37d13bddb 100644
--- a/contrib/libs/lz4/generated/ya.make
+++ b/contrib/libs/lz4/generated/ya.make
@@ -2,8 +2,8 @@ LIBRARY()
 
 WITHOUT_LICENSE_TEXTS()
 
-LICENSE(BSD-2-Clause) 
- 
+LICENSE(BSD-2-Clause)
+
 OWNER(
     orivej
     g:contrib
diff --git a/contrib/libs/lz4/lz4.c b/contrib/libs/lz4/lz4.c
index 657b8f1baf..c864ba73ba 100644
--- a/contrib/libs/lz4/lz4.c
+++ b/contrib/libs/lz4/lz4.c
@@ -42,7 +42,7 @@
  */
 #ifndef LZ4_HEAPMODE
 #  define LZ4_HEAPMODE 0
-#endif 
+#endif
 
 /*
  * LZ4_ACCELERATION_DEFAULT :
diff --git a/contrib/libs/lzmasdk/7zStream.c b/contrib/libs/lzmasdk/7zStream.c
index 61b8ad60d8..6b5aa1621d 100644
--- a/contrib/libs/lzmasdk/7zStream.c
+++ b/contrib/libs/lzmasdk/7zStream.c
@@ -1,176 +1,176 @@
-/* 7zStream.c -- 7z Stream functions 
-2017-04-03 : Igor Pavlov : Public domain */ 
- 
-#include "Precomp.h" 
- 
-#include <string.h> 
- 
-#include "7zTypes.h" 
- 
-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType) 
-{ 
-  while (size != 0) 
-  { 
-    size_t processed = size; 
-    RINOK(ISeqInStream_Read(stream, buf, &processed)); 
-    if (processed == 0) 
-      return errorType; 
-    buf = (void *)((Byte *)buf + processed); 
-    size -= processed; 
-  } 
-  return SZ_OK; 
-} 
- 
-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size) 
-{ 
-  return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); 
-} 
- 
-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) 
-{ 
-  size_t processed = 1; 
-  RINOK(ISeqInStream_Read(stream, buf, &processed)); 
-  return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; 
-} 
- 
- 
- 
-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) 
-{ 
-  Int64 t = offset; 
-  return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); 
-} 
- 
-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size) 
-{ 
-  const void *lookBuf; 
-  if (*size == 0) 
-    return SZ_OK; 
-  RINOK(ILookInStream_Look(stream, &lookBuf, size)); 
-  memcpy(buf, lookBuf, *size); 
-  return ILookInStream_Skip(stream, *size); 
-} 
- 
-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType) 
-{ 
-  while (size != 0) 
-  { 
-    size_t processed = size; 
-    RINOK(ILookInStream_Read(stream, buf, &processed)); 
-    if (processed == 0) 
-      return errorType; 
-    buf = (void *)((Byte *)buf + processed); 
-    size -= processed; 
-  } 
-  return SZ_OK; 
-} 
- 
-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size) 
-{ 
-  return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); 
-} 
- 
- 
- 
-#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt); 
- 
-static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size) 
-{ 
-  SRes res = SZ_OK; 
-  GET_LookToRead2 
-  size_t size2 = p->size - p->pos; 
-  if (size2 == 0 && *size != 0) 
-  { 
-    p->pos = 0; 
-    p->size = 0; 
-    size2 = p->bufSize; 
-    res = ISeekInStream_Read(p->realStream, p->buf, &size2); 
-    p->size = size2; 
-  } 
-  if (*size > size2) 
-    *size = size2; 
-  *buf = p->buf + p->pos; 
-  return res; 
-} 
- 
-static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size) 
-{ 
-  SRes res = SZ_OK; 
-  GET_LookToRead2 
-  size_t size2 = p->size - p->pos; 
-  if (size2 == 0 && *size != 0) 
-  { 
-    p->pos = 0; 
-    p->size = 0; 
-    if (*size > p->bufSize) 
-      *size = p->bufSize; 
-    res = ISeekInStream_Read(p->realStream, p->buf, size); 
-    size2 = p->size = *size; 
-  } 
-  if (*size > size2) 
-    *size = size2; 
-  *buf = p->buf + p->pos; 
-  return res; 
-} 
- 
-static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset) 
-{ 
-  GET_LookToRead2 
-  p->pos += offset; 
-  return SZ_OK; 
-} 
- 
-static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) 
-{ 
-  GET_LookToRead2 
-  size_t rem = p->size - p->pos; 
-  if (rem == 0) 
-    return ISeekInStream_Read(p->realStream, buf, size); 
-  if (rem > *size) 
-    rem = *size; 
-  memcpy(buf, p->buf + p->pos, rem); 
-  p->pos += rem; 
-  *size = rem; 
-  return SZ_OK; 
-} 
- 
-static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin) 
-{ 
-  GET_LookToRead2 
-  p->pos = p->size = 0; 
-  return ISeekInStream_Seek(p->realStream, pos, origin); 
-} 
- 
-void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) 
-{ 
-  p->vt.Look = lookahead ? 
-      LookToRead2_Look_Lookahead : 
-      LookToRead2_Look_Exact; 
-  p->vt.Skip = LookToRead2_Skip; 
-  p->vt.Read = LookToRead2_Read; 
-  p->vt.Seek = LookToRead2_Seek; 
-} 
- 
- 
- 
-static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size) 
-{ 
-  CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt); 
-  return LookInStream_LookRead(p->realStream, buf, size); 
-} 
- 
-void SecToLook_CreateVTable(CSecToLook *p) 
-{ 
-  p->vt.Read = SecToLook_Read; 
-} 
- 
-static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size) 
-{ 
-  CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt); 
-  return ILookInStream_Read(p->realStream, buf, size); 
-} 
- 
-void SecToRead_CreateVTable(CSecToRead *p) 
-{ 
-  p->vt.Read = SecToRead_Read; 
-} 
+/* 7zStream.c -- 7z Stream functions
+2017-04-03 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "7zTypes.h"
+
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
+{
+  while (size != 0)
+  {
+    size_t processed = size;
+    RINOK(ISeqInStream_Read(stream, buf, &processed));
+    if (processed == 0)
+      return errorType;
+    buf = (void *)((Byte *)buf + processed);
+    size -= processed;
+  }
+  return SZ_OK;
+}
+
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size)
+{
+  return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+}
+
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
+{
+  size_t processed = 1;
+  RINOK(ISeqInStream_Read(stream, buf, &processed));
+  return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
+}
+
+
+
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
+{
+  Int64 t = offset;
+  return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
+}
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size)
+{
+  const void *lookBuf;
+  if (*size == 0)
+    return SZ_OK;
+  RINOK(ILookInStream_Look(stream, &lookBuf, size));
+  memcpy(buf, lookBuf, *size);
+  return ILookInStream_Skip(stream, *size);
+}
+
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType)
+{
+  while (size != 0)
+  {
+    size_t processed = size;
+    RINOK(ILookInStream_Read(stream, buf, &processed));
+    if (processed == 0)
+      return errorType;
+    buf = (void *)((Byte *)buf + processed);
+    size -= processed;
+  }
+  return SZ_OK;
+}
+
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size)
+{
+  return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+}
+
+
+
+#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt);
+
+static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size)
+{
+  SRes res = SZ_OK;
+  GET_LookToRead2
+  size_t size2 = p->size - p->pos;
+  if (size2 == 0 && *size != 0)
+  {
+    p->pos = 0;
+    p->size = 0;
+    size2 = p->bufSize;
+    res = ISeekInStream_Read(p->realStream, p->buf, &size2);
+    p->size = size2;
+  }
+  if (*size > size2)
+    *size = size2;
+  *buf = p->buf + p->pos;
+  return res;
+}
+
+static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size)
+{
+  SRes res = SZ_OK;
+  GET_LookToRead2
+  size_t size2 = p->size - p->pos;
+  if (size2 == 0 && *size != 0)
+  {
+    p->pos = 0;
+    p->size = 0;
+    if (*size > p->bufSize)
+      *size = p->bufSize;
+    res = ISeekInStream_Read(p->realStream, p->buf, size);
+    size2 = p->size = *size;
+  }
+  if (*size > size2)
+    *size = size2;
+  *buf = p->buf + p->pos;
+  return res;
+}
+
+static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset)
+{
+  GET_LookToRead2
+  p->pos += offset;
+  return SZ_OK;
+}
+
+static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
+{
+  GET_LookToRead2
+  size_t rem = p->size - p->pos;
+  if (rem == 0)
+    return ISeekInStream_Read(p->realStream, buf, size);
+  if (rem > *size)
+    rem = *size;
+  memcpy(buf, p->buf + p->pos, rem);
+  p->pos += rem;
+  *size = rem;
+  return SZ_OK;
+}
+
+static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin)
+{
+  GET_LookToRead2
+  p->pos = p->size = 0;
+  return ISeekInStream_Seek(p->realStream, pos, origin);
+}
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
+{
+  p->vt.Look = lookahead ?
+      LookToRead2_Look_Lookahead :
+      LookToRead2_Look_Exact;
+  p->vt.Skip = LookToRead2_Skip;
+  p->vt.Read = LookToRead2_Read;
+  p->vt.Seek = LookToRead2_Seek;
+}
+
+
+
+static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size)
+{
+  CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt);
+  return LookInStream_LookRead(p->realStream, buf, size);
+}
+
+void SecToLook_CreateVTable(CSecToLook *p)
+{
+  p->vt.Read = SecToLook_Read;
+}
+
+static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size)
+{
+  CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt);
+  return ILookInStream_Read(p->realStream, buf, size);
+}
+
+void SecToRead_CreateVTable(CSecToRead *p)
+{
+  p->vt.Read = SecToRead_Read;
+}
diff --git a/contrib/libs/lzmasdk/7zTypes.h b/contrib/libs/lzmasdk/7zTypes.h
index adf08fb279..65b3af63c7 100644
--- a/contrib/libs/lzmasdk/7zTypes.h
+++ b/contrib/libs/lzmasdk/7zTypes.h
@@ -1,375 +1,375 @@
-/* 7zTypes.h -- Basic types 
+/* 7zTypes.h -- Basic types
 2018-08-04 : Igor Pavlov : Public domain */
- 
-#ifndef __7Z_TYPES_H 
-#define __7Z_TYPES_H 
- 
-#ifdef _WIN32 
-/* #include <windows.h> */ 
-#endif 
- 
-#include <stddef.h> 
- 
-#ifndef EXTERN_C_BEGIN 
-#ifdef __cplusplus 
-#define EXTERN_C_BEGIN extern "C" { 
-#define EXTERN_C_END } 
-#else 
-#define EXTERN_C_BEGIN 
-#define EXTERN_C_END 
-#endif 
-#endif 
- 
-EXTERN_C_BEGIN 
- 
-#define SZ_OK 0 
- 
-#define SZ_ERROR_DATA 1 
-#define SZ_ERROR_MEM 2 
-#define SZ_ERROR_CRC 3 
-#define SZ_ERROR_UNSUPPORTED 4 
-#define SZ_ERROR_PARAM 5 
-#define SZ_ERROR_INPUT_EOF 6 
-#define SZ_ERROR_OUTPUT_EOF 7 
-#define SZ_ERROR_READ 8 
-#define SZ_ERROR_WRITE 9 
-#define SZ_ERROR_PROGRESS 10 
-#define SZ_ERROR_FAIL 11 
-#define SZ_ERROR_THREAD 12 
- 
-#define SZ_ERROR_ARCHIVE 16 
-#define SZ_ERROR_NO_ARCHIVE 17 
- 
-typedef int SRes; 
- 
- 
-#ifdef _WIN32 
- 
-/* typedef DWORD WRes; */ 
-typedef unsigned WRes; 
-#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) 
- 
-#else 
- 
-typedef int WRes; 
-#define MY__FACILITY_WIN32 7 
-#define MY__FACILITY__WRes MY__FACILITY_WIN32 
-#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) 
- 
-#endif 
- 
- 
-#ifndef RINOK 
-#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } 
-#endif 
- 
-typedef unsigned char Byte; 
-typedef short Int16; 
-typedef unsigned short UInt16; 
- 
-#ifdef _LZMA_UINT32_IS_ULONG 
-typedef long Int32; 
-typedef unsigned long UInt32; 
-#else 
-typedef int Int32; 
-typedef unsigned int UInt32; 
-#endif 
- 
-#ifdef _SZ_NO_INT_64 
- 
-/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. 
-   NOTES: Some code will work incorrectly in that case! */ 
- 
-typedef long Int64; 
-typedef unsigned long UInt64; 
- 
-#else 
- 
-#if defined(_MSC_VER) || defined(__BORLANDC__) 
-typedef __int64 Int64; 
-typedef unsigned __int64 UInt64; 
-#define UINT64_CONST(n) n 
-#else 
-typedef long long int Int64; 
-typedef unsigned long long int UInt64; 
-#define UINT64_CONST(n) n ## ULL 
-#endif 
- 
-#endif 
- 
-#ifdef _LZMA_NO_SYSTEM_SIZE_T 
-typedef UInt32 SizeT; 
-#else 
-typedef size_t SizeT; 
-#endif 
- 
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+#else
+
+typedef int WRes;
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_WIN32
+#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+   NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
 typedef int BoolInt;
 /* typedef BoolInt Bool; */
-#define True 1 
-#define False 0 
- 
- 
-#ifdef _WIN32 
-#define MY_STD_CALL __stdcall 
-#else 
-#define MY_STD_CALL 
-#endif 
- 
-#ifdef _MSC_VER 
- 
-#if _MSC_VER >= 1300 
-#define MY_NO_INLINE __declspec(noinline) 
-#else 
-#define MY_NO_INLINE 
-#endif 
- 
-#define MY_FORCE_INLINE __forceinline 
- 
-#define MY_CDECL __cdecl 
-#define MY_FAST_CALL __fastcall 
- 
-#else 
- 
-#define MY_NO_INLINE 
-#define MY_FORCE_INLINE 
-#define MY_CDECL 
-#define MY_FAST_CALL 
- 
-/* inline keyword : for C++ / C99 */ 
- 
-/* GCC, clang: */ 
-/* 
-#if defined (__GNUC__) && (__GNUC__ >= 4) 
-#define MY_FORCE_INLINE __attribute__((always_inline)) 
-#define MY_NO_INLINE __attribute__((noinline)) 
-#endif 
-*/ 
- 
-#endif 
- 
- 
-/* The following interfaces use first parameter as pointer to structure */ 
- 
-typedef struct IByteIn IByteIn; 
-struct IByteIn 
-{ 
-  Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ 
-}; 
-#define IByteIn_Read(p) (p)->Read(p) 
- 
- 
-typedef struct IByteOut IByteOut; 
-struct IByteOut 
-{ 
-  void (*Write)(const IByteOut *p, Byte b); 
-}; 
-#define IByteOut_Write(p, b) (p)->Write(p, b) 
- 
- 
-typedef struct ISeqInStream ISeqInStream; 
-struct ISeqInStream 
-{ 
-  SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); 
-    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. 
-       (output(*size) < input(*size)) is allowed */ 
-}; 
-#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) 
- 
-/* it can return SZ_ERROR_INPUT_EOF */ 
-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); 
-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); 
-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); 
- 
- 
-typedef struct ISeqOutStream ISeqOutStream; 
-struct ISeqOutStream 
-{ 
-  size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); 
-    /* Returns: result - the number of actually written bytes. 
-       (result < size) means error */ 
-}; 
-#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) 
- 
-typedef enum 
-{ 
-  SZ_SEEK_SET = 0, 
-  SZ_SEEK_CUR = 1, 
-  SZ_SEEK_END = 2 
-} ESzSeek; 
- 
- 
-typedef struct ISeekInStream ISeekInStream; 
-struct ISeekInStream 
-{ 
-  SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */ 
-  SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); 
-}; 
-#define ISeekInStream_Read(p, buf, size)   (p)->Read(p, buf, size) 
-#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) 
- 
- 
-typedef struct ILookInStream ILookInStream; 
-struct ILookInStream 
-{ 
-  SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); 
-    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. 
-       (output(*size) > input(*size)) is not allowed 
-       (output(*size) < input(*size)) is allowed */ 
-  SRes (*Skip)(const ILookInStream *p, size_t offset); 
-    /* offset must be <= output(*size) of Look */ 
- 
-  SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); 
-    /* reads directly (without buffer). It's same as ISeqInStream::Read */ 
-  SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); 
-}; 
- 
-#define ILookInStream_Look(p, buf, size)   (p)->Look(p, buf, size) 
-#define ILookInStream_Skip(p, offset)      (p)->Skip(p, offset) 
-#define ILookInStream_Read(p, buf, size)   (p)->Read(p, buf, size) 
-#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) 
- 
- 
-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); 
-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); 
- 
-/* reads via ILookInStream::Read */ 
-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); 
-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); 
- 
- 
- 
-typedef struct 
-{ 
-  ILookInStream vt; 
-  const ISeekInStream *realStream; 
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_FORCE_INLINE __forceinline
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_FORCE_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+/* inline keyword : for C++ / C99 */
+
+/* GCC, clang: */
+/*
+#if defined (__GNUC__) && (__GNUC__ >= 4)
+#define MY_FORCE_INLINE __attribute__((always_inline))
+#define MY_NO_INLINE __attribute__((noinline))
+#endif
+*/
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct IByteIn IByteIn;
+struct IByteIn
+{
+  Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+typedef struct IByteOut IByteOut;
+struct IByteOut
+{
+  void (*Write)(const IByteOut *p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+typedef struct ISeqInStream ISeqInStream;
+struct ISeqInStream
+{
+  SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
+
+
+typedef struct ISeqOutStream ISeqOutStream;
+struct ISeqOutStream
+{
+  size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+       (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+  SZ_SEEK_SET = 0,
+  SZ_SEEK_CUR = 1,
+  SZ_SEEK_END = 2
+} ESzSeek;
+
+
+typedef struct ISeekInStream ISeekInStream;
+struct ISeekInStream
+{
+  SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+typedef struct ILookInStream ILookInStream;
+struct ILookInStream
+{
+  SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) > input(*size)) is not allowed
+       (output(*size) < input(*size)) is allowed */
+  SRes (*Skip)(const ILookInStream *p, size_t offset);
+    /* offset must be <= output(*size) of Look */
+
+  SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
+    /* reads directly (without buffer). It's same as ISeqInStream::Read */
+  SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size)   (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset)      (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
+
+
+
+typedef struct
+{
+  ILookInStream vt;
+  const ISeekInStream *realStream;
+ 
+  size_t pos;
+  size_t size; /* it's data size */
   
-  size_t pos; 
-  size_t size; /* it's data size */ 
-   
-  /* the following variables must be set outside */ 
-  Byte *buf; 
-  size_t bufSize; 
-} CLookToRead2; 
- 
-void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); 
- 
-#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } 
- 
- 
-typedef struct 
-{ 
-  ISeqInStream vt; 
-  const ILookInStream *realStream; 
-} CSecToLook; 
- 
-void SecToLook_CreateVTable(CSecToLook *p); 
- 
- 
- 
-typedef struct 
-{ 
-  ISeqInStream vt; 
-  const ILookInStream *realStream; 
-} CSecToRead; 
- 
-void SecToRead_CreateVTable(CSecToRead *p); 
- 
- 
-typedef struct ICompressProgress ICompressProgress; 
- 
-struct ICompressProgress 
-{ 
-  SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); 
-    /* Returns: result. (result != SZ_OK) means break. 
-       Value (UInt64)(Int64)-1 for size means unknown value. */ 
-}; 
-#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) 
- 
- 
- 
-typedef struct ISzAlloc ISzAlloc; 
-typedef const ISzAlloc * ISzAllocPtr; 
- 
-struct ISzAlloc 
-{ 
-  void *(*Alloc)(ISzAllocPtr p, size_t size); 
-  void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ 
-}; 
- 
-#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) 
-#define ISzAlloc_Free(p, a) (p)->Free(p, a) 
- 
-/* deprecated */ 
-#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) 
-#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) 
- 
- 
- 
- 
- 
-#ifndef MY_offsetof 
-  #ifdef offsetof 
-    #define MY_offsetof(type, m) offsetof(type, m) 
-    /* 
-    #define MY_offsetof(type, m) FIELD_OFFSET(type, m) 
-    */ 
-  #else 
-    #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) 
-  #endif 
-#endif 
- 
- 
- 
-#ifndef MY_container_of 
- 
-/* 
-#define MY_container_of(ptr, type, m) container_of(ptr, type, m) 
-#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) 
-#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) 
-#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) 
-*/ 
- 
-/* 
-  GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" 
-    GCC 3.4.4 : classes with constructor 
-    GCC 4.8.1 : classes with non-public variable members" 
-*/ 
- 
-#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) 
- 
- 
-#endif 
- 
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) 
- 
-/* 
-#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) 
-*/ 
-#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) 
- 
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) 
-/* 
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) 
-*/ 
- 
- 
- 
-#ifdef _WIN32 
- 
-#define CHAR_PATH_SEPARATOR '\\' 
-#define WCHAR_PATH_SEPARATOR L'\\' 
-#define STRING_PATH_SEPARATOR "\\" 
-#define WSTRING_PATH_SEPARATOR L"\\" 
- 
-#else 
- 
-#define CHAR_PATH_SEPARATOR '/' 
-#define WCHAR_PATH_SEPARATOR L'/' 
-#define STRING_PATH_SEPARATOR "/" 
-#define WSTRING_PATH_SEPARATOR L"/" 
- 
-#endif 
- 
-EXTERN_C_END 
- 
-#endif 
+  /* the following variables must be set outside */
+  Byte *buf;
+  size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+  ISeqInStream vt;
+  const ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+  ISeqInStream vt;
+  const ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+typedef struct ICompressProgress ICompressProgress;
+
+struct ICompressProgress
+{
+  SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+  void *(*Alloc)(ISzAllocPtr p, size_t size);
+  void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+  #ifdef offsetof
+    #define MY_offsetof(type, m) offsetof(type, m)
+    /*
+    #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+    */
+  #else
+    #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+  #endif
+#endif
+
+
+
+#ifndef MY_container_of
+
+/*
+#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
+#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+  GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+    GCC 3.4.4 : classes with constructor
+    GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
+
+
+#endif
+
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+
+/*
+#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+*/
+#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/contrib/libs/lzmasdk/Alloc.c b/contrib/libs/lzmasdk/Alloc.c
index f2cd4c546b..bcede4b856 100644
--- a/contrib/libs/lzmasdk/Alloc.c
+++ b/contrib/libs/lzmasdk/Alloc.c
@@ -1,26 +1,26 @@
-/* Alloc.c -- Memory allocation functions 
+/* Alloc.c -- Memory allocation functions
 2018-04-27 : Igor Pavlov : Public domain */
- 
-#include "Precomp.h" 
- 
+
+#include "Precomp.h"
+
 #include <stdio.h>
 
-#ifdef _WIN32 
-#include <windows.h> 
-#endif 
-#include <stdlib.h> 
- 
-#include "Alloc.h" 
- 
-/* #define _SZ_ALLOC_DEBUG */ 
- 
-/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ 
-#ifdef _SZ_ALLOC_DEBUG 
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#include <stdlib.h>
+
+#include "Alloc.h"
 
-#include <stdio.h> 
-int g_allocCount = 0; 
-int g_allocCountMid = 0; 
-int g_allocCountBig = 0; 
+/* #define _SZ_ALLOC_DEBUG */
+
+/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
+#ifdef _SZ_ALLOC_DEBUG
+
+#include <stdio.h>
+int g_allocCount = 0;
+int g_allocCountMid = 0;
+int g_allocCountBig = 0;
 
 
 #define CONVERT_INT_TO_STR(charType, tempSize) \
@@ -125,128 +125,128 @@ static void PrintAddr(void *p)
 #define PrintDec(v, align)
 #define PrintAddr(p)
 
-#endif 
- 
+#endif
 
 
-void *MyAlloc(size_t size) 
-{ 
-  if (size == 0) 
-    return NULL; 
-  #ifdef _SZ_ALLOC_DEBUG 
-  { 
-    void *p = malloc(size); 
+
+void *MyAlloc(size_t size)
+{
+  if (size == 0)
+    return NULL;
+  #ifdef _SZ_ALLOC_DEBUG
+  {
+    void *p = malloc(size);
     PRINT_ALLOC("Alloc    ", g_allocCount, size, p);
-    return p; 
-  } 
-  #else 
-  return malloc(size); 
-  #endif 
-} 
- 
-void MyFree(void *address) 
-{ 
+    return p;
+  }
+  #else
+  return malloc(size);
+  #endif
+}
+
+void MyFree(void *address)
+{
   PRINT_FREE("Free    ", g_allocCount, address);
   
-  free(address); 
-} 
- 
-#ifdef _WIN32 
- 
-void *MidAlloc(size_t size) 
-{ 
-  if (size == 0) 
-    return NULL; 
+  free(address);
+}
+
+#ifdef _WIN32
+
+void *MidAlloc(size_t size)
+{
+  if (size == 0)
+    return NULL;
   
   PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
   
-  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); 
-} 
- 
-void MidFree(void *address) 
-{ 
+  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void MidFree(void *address)
+{
   PRINT_FREE("Free-Mid", g_allocCountMid, address);
 
-  if (!address) 
-    return; 
-  VirtualFree(address, 0, MEM_RELEASE); 
-} 
- 
-#ifndef MEM_LARGE_PAGES 
-#undef _7ZIP_LARGE_PAGES 
-#endif 
- 
-#ifdef _7ZIP_LARGE_PAGES 
-SIZE_T g_LargePageSize = 0; 
-typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); 
-#endif 
- 
-void SetLargePageSize() 
-{ 
-  #ifdef _7ZIP_LARGE_PAGES 
-  SIZE_T size; 
-  GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) 
-        GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); 
-  if (!largePageMinimum) 
-    return; 
-  size = largePageMinimum(); 
-  if (size == 0 || (size & (size - 1)) != 0) 
-    return; 
-  g_LargePageSize = size; 
-  #endif 
-} 
- 
- 
-void *BigAlloc(size_t size) 
-{ 
-  if (size == 0) 
-    return NULL; 
+  if (!address)
+    return;
+  VirtualFree(address, 0, MEM_RELEASE);
+}
+
+#ifndef MEM_LARGE_PAGES
+#undef _7ZIP_LARGE_PAGES
+#endif
+
+#ifdef _7ZIP_LARGE_PAGES
+SIZE_T g_LargePageSize = 0;
+typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
+#endif
+
+void SetLargePageSize()
+{
+  #ifdef _7ZIP_LARGE_PAGES
+  SIZE_T size;
+  GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
+        GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
+  if (!largePageMinimum)
+    return;
+  size = largePageMinimum();
+  if (size == 0 || (size & (size - 1)) != 0)
+    return;
+  g_LargePageSize = size;
+  #endif
+}
+
+
+void *BigAlloc(size_t size)
+{
+  if (size == 0)
+    return NULL;
 
   PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
-   
-  #ifdef _7ZIP_LARGE_PAGES 
-  { 
-    SIZE_T ps = g_LargePageSize; 
-    if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) 
-    { 
-      size_t size2; 
-      ps--; 
-      size2 = (size + ps) & ~ps; 
-      if (size2 >= size) 
-      { 
-        void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); 
-        if (res) 
-          return res; 
-      } 
-    } 
-  } 
-  #endif 
- 
-  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); 
-} 
- 
-void BigFree(void *address) 
-{ 
+  
+  #ifdef _7ZIP_LARGE_PAGES
+  {
+    SIZE_T ps = g_LargePageSize;
+    if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
+    {
+      size_t size2;
+      ps--;
+      size2 = (size + ps) & ~ps;
+      if (size2 >= size)
+      {
+        void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+        if (res)
+          return res;
+      }
+    }
+  }
+  #endif
+
+  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void BigFree(void *address)
+{
   PRINT_FREE("Free-Big", g_allocCountBig, address);
-   
-  if (!address) 
-    return; 
-  VirtualFree(address, 0, MEM_RELEASE); 
-} 
- 
-#endif 
- 
- 
-static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } 
-static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } 
+  
+  if (!address)
+    return;
+  VirtualFree(address, 0, MEM_RELEASE);
+}
+
+#endif
+
+
+static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
+static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
 const ISzAlloc g_Alloc = { SzAlloc, SzFree };
- 
+
 static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
 static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
 const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
 
-static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } 
-static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } 
+static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
+static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
 const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
 
 
diff --git a/contrib/libs/lzmasdk/Alloc.h b/contrib/libs/lzmasdk/Alloc.h
index fa1e873e07..648237646f 100644
--- a/contrib/libs/lzmasdk/Alloc.h
+++ b/contrib/libs/lzmasdk/Alloc.h
@@ -1,39 +1,39 @@
-/* Alloc.h -- Memory allocation functions 
+/* Alloc.h -- Memory allocation functions
 2018-02-19 : Igor Pavlov : Public domain */
- 
-#ifndef __COMMON_ALLOC_H 
-#define __COMMON_ALLOC_H 
- 
-#include "7zTypes.h" 
- 
-EXTERN_C_BEGIN 
- 
-void *MyAlloc(size_t size); 
-void MyFree(void *address); 
- 
-#ifdef _WIN32 
- 
-void SetLargePageSize(); 
- 
-void *MidAlloc(size_t size); 
-void MidFree(void *address); 
-void *BigAlloc(size_t size); 
-void BigFree(void *address); 
- 
-#else 
- 
-#define MidAlloc(size) MyAlloc(size) 
-#define MidFree(address) MyFree(address) 
-#define BigAlloc(size) MyAlloc(size) 
-#define BigFree(address) MyFree(address) 
- 
-#endif 
- 
-extern const ISzAlloc g_Alloc; 
-extern const ISzAlloc g_BigAlloc; 
+
+#ifndef __COMMON_ALLOC_H
+#define __COMMON_ALLOC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+void *MyAlloc(size_t size);
+void MyFree(void *address);
+
+#ifdef _WIN32
+
+void SetLargePageSize();
+
+void *MidAlloc(size_t size);
+void MidFree(void *address);
+void *BigAlloc(size_t size);
+void BigFree(void *address);
+
+#else
+
+#define MidAlloc(size) MyAlloc(size)
+#define MidFree(address) MyFree(address)
+#define BigAlloc(size) MyAlloc(size)
+#define BigFree(address) MyFree(address)
+
+#endif
+
+extern const ISzAlloc g_Alloc;
+extern const ISzAlloc g_BigAlloc;
 extern const ISzAlloc g_MidAlloc;
 extern const ISzAlloc g_AlignedAlloc;
- 
+
 
 typedef struct
 {
@@ -46,6 +46,6 @@ typedef struct
 void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
 
 
-EXTERN_C_END 
- 
-#endif 
+EXTERN_C_END
+
+#endif
diff --git a/contrib/libs/lzmasdk/Compiler.h b/contrib/libs/lzmasdk/Compiler.h
index 625d96a6c8..0cc409d8a8 100644
--- a/contrib/libs/lzmasdk/Compiler.h
+++ b/contrib/libs/lzmasdk/Compiler.h
@@ -1,33 +1,33 @@
-/* Compiler.h 
-2017-04-03 : Igor Pavlov : Public domain */ 
- 
-#ifndef __7Z_COMPILER_H 
-#define __7Z_COMPILER_H 
- 
-#ifdef _MSC_VER 
- 
-  #ifdef UNDER_CE 
-    #define RPC_NO_WINDOWS_H 
-    /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ 
-    #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union 
-    #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int 
-  #endif 
- 
-  #if _MSC_VER >= 1300 
-    #pragma warning(disable : 4996) // This function or variable may be unsafe 
-  #else 
-    #pragma warning(disable : 4511) // copy constructor could not be generated 
-    #pragma warning(disable : 4512) // assignment operator could not be generated 
-    #pragma warning(disable : 4514) // unreferenced inline function has been removed 
-    #pragma warning(disable : 4702) // unreachable code 
-    #pragma warning(disable : 4710) // not inlined 
-    #pragma warning(disable : 4714) // function marked as __forceinline not inlined 
-    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information 
-  #endif 
- 
-#endif 
- 
-#define UNUSED_VAR(x) (void)x; 
-/* #define UNUSED_VAR(x) x=x; */ 
- 
-#endif 
+/* Compiler.h
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+  #ifdef UNDER_CE
+    #define RPC_NO_WINDOWS_H
+    /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+    #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+    #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+  #endif
+
+  #if _MSC_VER >= 1300
+    #pragma warning(disable : 4996) // This function or variable may be unsafe
+  #else
+    #pragma warning(disable : 4511) // copy constructor could not be generated
+    #pragma warning(disable : 4512) // assignment operator could not be generated
+    #pragma warning(disable : 4514) // unreferenced inline function has been removed
+    #pragma warning(disable : 4702) // unreachable code
+    #pragma warning(disable : 4710) // not inlined
+    #pragma warning(disable : 4714) // function marked as __forceinline not inlined
+    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+  #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/contrib/libs/lzmasdk/LzFind.c b/contrib/libs/lzmasdk/LzFind.c
index 6758a4478b..df55e86c14 100644
--- a/contrib/libs/lzmasdk/LzFind.c
+++ b/contrib/libs/lzmasdk/LzFind.c
@@ -1,422 +1,422 @@
-/* LzFind.c -- Match finder for LZ algorithms 
+/* LzFind.c -- Match finder for LZ algorithms
 2018-07-08 : Igor Pavlov : Public domain */
- 
-#include "Precomp.h" 
- 
-#include <string.h> 
- 
-#include "LzFind.h" 
-#include "LzHash.h" 
- 
-#define kEmptyHashValue 0 
-#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) 
-#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ 
-#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) 
-#define kMaxHistorySize ((UInt32)7 << 29) 
- 
-#define kStartMaxLen 3 
- 
-static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) 
-{ 
-  if (!p->directInput) 
-  { 
-    ISzAlloc_Free(alloc, p->bufferBase); 
-    p->bufferBase = NULL; 
-  } 
-} 
- 
-/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ 
- 
-static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) 
-{ 
-  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; 
-  if (p->directInput) 
-  { 
-    p->blockSize = blockSize; 
-    return 1; 
-  } 
-  if (!p->bufferBase || p->blockSize != blockSize) 
-  { 
-    LzInWindow_Free(p, alloc); 
-    p->blockSize = blockSize; 
-    p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); 
-  } 
-  return (p->bufferBase != NULL); 
-} 
- 
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } 
- 
-UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } 
- 
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) 
-{ 
-  p->posLimit -= subValue; 
-  p->pos -= subValue; 
-  p->streamPos -= subValue; 
-} 
- 
-static void MatchFinder_ReadBlock(CMatchFinder *p) 
-{ 
-  if (p->streamEndWasReached || p->result != SZ_OK) 
-    return; 
- 
-  /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ 
- 
-  if (p->directInput) 
-  { 
-    UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); 
-    if (curSize > p->directInputRem) 
-      curSize = (UInt32)p->directInputRem; 
-    p->directInputRem -= curSize; 
-    p->streamPos += curSize; 
-    if (p->directInputRem == 0) 
-      p->streamEndWasReached = 1; 
-    return; 
-  } 
-   
-  for (;;) 
-  { 
-    Byte *dest = p->buffer + (p->streamPos - p->pos); 
-    size_t size = (p->bufferBase + p->blockSize - dest); 
-    if (size == 0) 
-      return; 
- 
-    p->result = ISeqInStream_Read(p->stream, dest, &size); 
-    if (p->result != SZ_OK) 
-      return; 
-    if (size == 0) 
-    { 
-      p->streamEndWasReached = 1; 
-      return; 
-    } 
-    p->streamPos += (UInt32)size; 
-    if (p->streamPos - p->pos > p->keepSizeAfter) 
-      return; 
-  } 
-} 
- 
-void MatchFinder_MoveBlock(CMatchFinder *p) 
-{ 
-  memmove(p->bufferBase, 
-      p->buffer - p->keepSizeBefore, 
-      (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); 
-  p->buffer = p->bufferBase + p->keepSizeBefore; 
-} 
- 
-int MatchFinder_NeedMove(CMatchFinder *p) 
-{ 
-  if (p->directInput) 
-    return 0; 
-  /* if (p->streamEndWasReached) return 0; */ 
-  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); 
-} 
- 
-void MatchFinder_ReadIfRequired(CMatchFinder *p) 
-{ 
-  if (p->streamEndWasReached) 
-    return; 
-  if (p->keepSizeAfter >= p->streamPos - p->pos) 
-    MatchFinder_ReadBlock(p); 
-} 
- 
-static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) 
-{ 
-  if (MatchFinder_NeedMove(p)) 
-    MatchFinder_MoveBlock(p); 
-  MatchFinder_ReadBlock(p); 
-} 
- 
-static void MatchFinder_SetDefaultSettings(CMatchFinder *p) 
-{ 
-  p->cutValue = 32; 
-  p->btMode = 1; 
-  p->numHashBytes = 4; 
-  p->bigHash = 0; 
-} 
- 
-#define kCrcPoly 0xEDB88320 
- 
-void MatchFinder_Construct(CMatchFinder *p) 
-{ 
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)7 << 29)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  if (!p->directInput)
+  {
+    ISzAlloc_Free(alloc, p->bufferBase);
+    p->bufferBase = NULL;
+  }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+{
+  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+  if (p->directInput)
+  {
+    p->blockSize = blockSize;
+    return 1;
+  }
+  if (!p->bufferBase || p->blockSize != blockSize)
+  {
+    LzInWindow_Free(p, alloc);
+    p->blockSize = blockSize;
+    p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+  }
+  return (p->bufferBase != NULL);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+
+UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+  p->posLimit -= subValue;
+  p->pos -= subValue;
+  p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+  if (p->streamEndWasReached || p->result != SZ_OK)
+    return;
+
+  /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+
+  if (p->directInput)
+  {
+    UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+    if (curSize > p->directInputRem)
+      curSize = (UInt32)p->directInputRem;
+    p->directInputRem -= curSize;
+    p->streamPos += curSize;
+    if (p->directInputRem == 0)
+      p->streamEndWasReached = 1;
+    return;
+  }
+  
+  for (;;)
+  {
+    Byte *dest = p->buffer + (p->streamPos - p->pos);
+    size_t size = (p->bufferBase + p->blockSize - dest);
+    if (size == 0)
+      return;
+
+    p->result = ISeqInStream_Read(p->stream, dest, &size);
+    if (p->result != SZ_OK)
+      return;
+    if (size == 0)
+    {
+      p->streamEndWasReached = 1;
+      return;
+    }
+    p->streamPos += (UInt32)size;
+    if (p->streamPos - p->pos > p->keepSizeAfter)
+      return;
+  }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+  memmove(p->bufferBase,
+      p->buffer - p->keepSizeBefore,
+      (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
+  p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+  if (p->directInput)
+    return 0;
+  /* if (p->streamEndWasReached) return 0; */
+  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+  if (p->streamEndWasReached)
+    return;
+  if (p->keepSizeAfter >= p->streamPos - p->pos)
+    MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+  if (MatchFinder_NeedMove(p))
+    MatchFinder_MoveBlock(p);
+  MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+  p->cutValue = 32;
+  p->btMode = 1;
+  p->numHashBytes = 4;
+  p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
   unsigned i;
-  p->bufferBase = NULL; 
-  p->directInput = 0; 
-  p->hash = NULL; 
-  p->expectedDataSize = (UInt64)(Int64)-1; 
-  MatchFinder_SetDefaultSettings(p); 
- 
-  for (i = 0; i < 256; i++) 
-  { 
+  p->bufferBase = NULL;
+  p->directInput = 0;
+  p->hash = NULL;
+  p->expectedDataSize = (UInt64)(Int64)-1;
+  MatchFinder_SetDefaultSettings(p);
+
+  for (i = 0; i < 256; i++)
+  {
     UInt32 r = (UInt32)i;
-    unsigned j; 
-    for (j = 0; j < 8; j++) 
-      r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); 
-    p->crc[i] = r; 
-  } 
-} 
- 
-static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) 
-{ 
-  ISzAlloc_Free(alloc, p->hash); 
-  p->hash = NULL; 
-} 
- 
-void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) 
-{ 
-  MatchFinder_FreeThisClassMemory(p, alloc); 
-  LzInWindow_Free(p, alloc); 
-} 
- 
-static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) 
-{ 
-  size_t sizeInBytes = (size_t)num * sizeof(CLzRef); 
-  if (sizeInBytes / sizeof(CLzRef) != num) 
-    return NULL; 
-  return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); 
-} 
- 
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, 
-    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, 
-    ISzAllocPtr alloc) 
-{ 
-  UInt32 sizeReserv; 
-   
-  if (historySize > kMaxHistorySize) 
-  { 
-    MatchFinder_Free(p, alloc); 
-    return 0; 
-  } 
-   
-  sizeReserv = historySize >> 1; 
-       if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; 
-  else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; 
-   
-  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); 
- 
-  p->keepSizeBefore = historySize + keepAddBufferBefore + 1; 
-  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; 
-   
-  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ 
-   
-  if (LzInWindow_Create(p, sizeReserv, alloc)) 
-  { 
-    UInt32 newCyclicBufferSize = historySize + 1; 
-    UInt32 hs; 
-    p->matchMaxLen = matchMaxLen; 
-    { 
-      p->fixedHashSize = 0; 
-      if (p->numHashBytes == 2) 
-        hs = (1 << 16) - 1; 
-      else 
-      { 
-        hs = historySize; 
-        if (hs > p->expectedDataSize) 
-          hs = (UInt32)p->expectedDataSize; 
-        if (hs != 0) 
-          hs--; 
-        hs |= (hs >> 1); 
-        hs |= (hs >> 2); 
-        hs |= (hs >> 4); 
-        hs |= (hs >> 8); 
-        hs >>= 1; 
-        hs |= 0xFFFF; /* don't change it! It's required for Deflate */ 
-        if (hs > (1 << 24)) 
-        { 
-          if (p->numHashBytes == 3) 
-            hs = (1 << 24) - 1; 
-          else 
-            hs >>= 1; 
-          /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ 
-        } 
-      } 
-      p->hashMask = hs; 
-      hs++; 
-      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; 
-      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; 
-      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; 
-      hs += p->fixedHashSize; 
-    } 
- 
-    { 
-      size_t newSize; 
-      size_t numSons; 
-      p->historySize = historySize; 
-      p->hashSizeSum = hs; 
-      p->cyclicBufferSize = newCyclicBufferSize; 
-       
-      numSons = newCyclicBufferSize; 
-      if (p->btMode) 
-        numSons <<= 1; 
-      newSize = hs + numSons; 
- 
-      if (p->hash && p->numRefs == newSize) 
-        return 1; 
-       
-      MatchFinder_FreeThisClassMemory(p, alloc); 
-      p->numRefs = newSize; 
-      p->hash = AllocRefs(newSize, alloc); 
-       
-      if (p->hash) 
-      { 
-        p->son = p->hash + p->hashSizeSum; 
-        return 1; 
-      } 
-    } 
-  } 
- 
-  MatchFinder_Free(p, alloc); 
-  return 0; 
-} 
- 
-static void MatchFinder_SetLimits(CMatchFinder *p) 
-{ 
-  UInt32 limit = kMaxValForNormalize - p->pos; 
-  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; 
-   
-  if (limit2 < limit) 
-    limit = limit2; 
-  limit2 = p->streamPos - p->pos; 
-   
-  if (limit2 <= p->keepSizeAfter) 
-  { 
-    if (limit2 > 0) 
-      limit2 = 1; 
-  } 
-  else 
-    limit2 -= p->keepSizeAfter; 
-   
-  if (limit2 < limit) 
-    limit = limit2; 
-   
-  { 
-    UInt32 lenLimit = p->streamPos - p->pos; 
-    if (lenLimit > p->matchMaxLen) 
-      lenLimit = p->matchMaxLen; 
-    p->lenLimit = lenLimit; 
-  } 
-  p->posLimit = p->pos + limit; 
-} 
- 
- 
-void MatchFinder_Init_LowHash(CMatchFinder *p) 
-{ 
-  size_t i; 
-  CLzRef *items = p->hash; 
-  size_t numItems = p->fixedHashSize; 
-  for (i = 0; i < numItems; i++) 
-    items[i] = kEmptyHashValue; 
-} 
- 
- 
-void MatchFinder_Init_HighHash(CMatchFinder *p) 
-{ 
-  size_t i; 
-  CLzRef *items = p->hash + p->fixedHashSize; 
-  size_t numItems = (size_t)p->hashMask + 1; 
-  for (i = 0; i < numItems; i++) 
-    items[i] = kEmptyHashValue; 
-} 
- 
- 
-void MatchFinder_Init_3(CMatchFinder *p, int readData) 
-{ 
-  p->cyclicBufferPos = 0; 
-  p->buffer = p->bufferBase; 
-  p->pos = 
-  p->streamPos = p->cyclicBufferSize; 
-  p->result = SZ_OK; 
-  p->streamEndWasReached = 0; 
-   
-  if (readData) 
-    MatchFinder_ReadBlock(p); 
-   
-  MatchFinder_SetLimits(p); 
-} 
- 
- 
-void MatchFinder_Init(CMatchFinder *p) 
-{ 
-  MatchFinder_Init_HighHash(p); 
-  MatchFinder_Init_LowHash(p); 
-  MatchFinder_Init_3(p, True); 
-} 
- 
-   
-static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) 
-{ 
-  return (p->pos - p->historySize - 1) & kNormalizeMask; 
-} 
- 
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) 
-{ 
-  size_t i; 
-  for (i = 0; i < numItems; i++) 
-  { 
-    UInt32 value = items[i]; 
-    if (value <= subValue) 
-      value = kEmptyHashValue; 
-    else 
-      value -= subValue; 
-    items[i] = value; 
-  } 
-} 
- 
-static void MatchFinder_Normalize(CMatchFinder *p) 
-{ 
-  UInt32 subValue = MatchFinder_GetSubValue(p); 
-  MatchFinder_Normalize3(subValue, p->hash, p->numRefs); 
-  MatchFinder_ReduceOffsets(p, subValue); 
-} 
- 
+    unsigned j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+    p->crc[i] = r;
+  }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->hash);
+  p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  MatchFinder_FreeThisClassMemory(p, alloc);
+  LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+  if (sizeInBytes / sizeof(CLzRef) != num)
+    return NULL;
+  return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAllocPtr alloc)
+{
+  UInt32 sizeReserv;
+  
+  if (historySize > kMaxHistorySize)
+  {
+    MatchFinder_Free(p, alloc);
+    return 0;
+  }
+  
+  sizeReserv = historySize >> 1;
+       if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
+  else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+  
+  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+  p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+  
+  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+  
+  if (LzInWindow_Create(p, sizeReserv, alloc))
+  {
+    UInt32 newCyclicBufferSize = historySize + 1;
+    UInt32 hs;
+    p->matchMaxLen = matchMaxLen;
+    {
+      p->fixedHashSize = 0;
+      if (p->numHashBytes == 2)
+        hs = (1 << 16) - 1;
+      else
+      {
+        hs = historySize;
+        if (hs > p->expectedDataSize)
+          hs = (UInt32)p->expectedDataSize;
+        if (hs != 0)
+          hs--;
+        hs |= (hs >> 1);
+        hs |= (hs >> 2);
+        hs |= (hs >> 4);
+        hs |= (hs >> 8);
+        hs >>= 1;
+        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+        if (hs > (1 << 24))
+        {
+          if (p->numHashBytes == 3)
+            hs = (1 << 24) - 1;
+          else
+            hs >>= 1;
+          /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+        }
+      }
+      p->hashMask = hs;
+      hs++;
+      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+      hs += p->fixedHashSize;
+    }
+
+    {
+      size_t newSize;
+      size_t numSons;
+      p->historySize = historySize;
+      p->hashSizeSum = hs;
+      p->cyclicBufferSize = newCyclicBufferSize;
+      
+      numSons = newCyclicBufferSize;
+      if (p->btMode)
+        numSons <<= 1;
+      newSize = hs + numSons;
+
+      if (p->hash && p->numRefs == newSize)
+        return 1;
+      
+      MatchFinder_FreeThisClassMemory(p, alloc);
+      p->numRefs = newSize;
+      p->hash = AllocRefs(newSize, alloc);
+      
+      if (p->hash)
+      {
+        p->son = p->hash + p->hashSizeSum;
+        return 1;
+      }
+    }
+  }
+
+  MatchFinder_Free(p, alloc);
+  return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+  UInt32 limit = kMaxValForNormalize - p->pos;
+  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+  
+  if (limit2 < limit)
+    limit = limit2;
+  limit2 = p->streamPos - p->pos;
+  
+  if (limit2 <= p->keepSizeAfter)
+  {
+    if (limit2 > 0)
+      limit2 = 1;
+  }
+  else
+    limit2 -= p->keepSizeAfter;
+  
+  if (limit2 < limit)
+    limit = limit2;
+  
+  {
+    UInt32 lenLimit = p->streamPos - p->pos;
+    if (lenLimit > p->matchMaxLen)
+      lenLimit = p->matchMaxLen;
+    p->lenLimit = lenLimit;
+  }
+  p->posLimit = p->pos + limit;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+  size_t i;
+  CLzRef *items = p->hash;
+  size_t numItems = p->fixedHashSize;
+  for (i = 0; i < numItems; i++)
+    items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+  size_t i;
+  CLzRef *items = p->hash + p->fixedHashSize;
+  size_t numItems = (size_t)p->hashMask + 1;
+  for (i = 0; i < numItems; i++)
+    items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_3(CMatchFinder *p, int readData)
+{
+  p->cyclicBufferPos = 0;
+  p->buffer = p->bufferBase;
+  p->pos =
+  p->streamPos = p->cyclicBufferSize;
+  p->result = SZ_OK;
+  p->streamEndWasReached = 0;
+  
+  if (readData)
+    MatchFinder_ReadBlock(p);
+  
+  MatchFinder_SetLimits(p);
+}
+
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+  MatchFinder_Init_HighHash(p);
+  MatchFinder_Init_LowHash(p);
+  MatchFinder_Init_3(p, True);
+}
+
+  
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+  return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+  size_t i;
+  for (i = 0; i < numItems; i++)
+  {
+    UInt32 value = items[i];
+    if (value <= subValue)
+      value = kEmptyHashValue;
+    else
+      value -= subValue;
+    items[i] = value;
+  }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+  UInt32 subValue = MatchFinder_GetSubValue(p);
+  MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
+  MatchFinder_ReduceOffsets(p, subValue);
+}
+
 
 MY_NO_INLINE
-static void MatchFinder_CheckLimits(CMatchFinder *p) 
-{ 
-  if (p->pos == kMaxValForNormalize) 
-    MatchFinder_Normalize(p); 
-  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) 
-    MatchFinder_CheckAndMoveAndRead(p); 
-  if (p->cyclicBufferPos == p->cyclicBufferSize) 
-    p->cyclicBufferPos = 0; 
-  MatchFinder_SetLimits(p); 
-} 
- 
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+  if (p->pos == kMaxValForNormalize)
+    MatchFinder_Normalize(p);
+  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+    MatchFinder_CheckAndMoveAndRead(p);
+  if (p->cyclicBufferPos == p->cyclicBufferSize)
+    p->cyclicBufferPos = 0;
+  MatchFinder_SetLimits(p);
+}
+
 
 /*
   (lenLimit > maxLen)
 */
 MY_FORCE_INLINE
 static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
-    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, 
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
     UInt32 *distances, unsigned maxLen)
-{ 
+{
   /*
-  son[_cyclicBufferPos] = curMatch; 
-  for (;;) 
-  { 
-    UInt32 delta = pos - curMatch; 
-    if (cutValue-- == 0 || delta >= _cyclicBufferSize) 
-      return distances; 
-    { 
-      const Byte *pb = cur - delta; 
-      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; 
-      if (pb[maxLen] == cur[maxLen] && *pb == *cur) 
-      { 
-        UInt32 len = 0; 
-        while (++len != lenLimit) 
-          if (pb[len] != cur[len]) 
-            break; 
-        if (maxLen < len) 
-        { 
+  son[_cyclicBufferPos] = curMatch;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+      return distances;
+    {
+      const Byte *pb = cur - delta;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+      {
+        UInt32 len = 0;
+        while (++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        if (maxLen < len)
+        {
           maxLen = len;
           *distances++ = len;
-          *distances++ = delta - 1; 
-          if (len == lenLimit) 
-            return distances; 
-        } 
-      } 
-    } 
-  } 
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+            return distances;
+        }
+      }
+    }
+  }
   */
 
   const Byte *lim = cur + lenLimit;
@@ -458,670 +458,670 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
   while (--cutValue);
   
   return distances;
-} 
- 
+}
+
 
 MY_FORCE_INLINE
-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, 
-    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, 
-    UInt32 *distances, UInt32 maxLen) 
-{ 
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
   CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
   CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
   unsigned len0 = 0, len1 = 0;
-  for (;;) 
-  { 
-    UInt32 delta = pos - curMatch; 
-    if (cutValue-- == 0 || delta >= _cyclicBufferSize) 
-    { 
-      *ptr0 = *ptr1 = kEmptyHashValue; 
-      return distances; 
-    } 
-    { 
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return distances;
+    }
+    {
       CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
-      const Byte *pb = cur - delta; 
+      const Byte *pb = cur - delta;
       unsigned len = (len0 < len1 ? len0 : len1);
       UInt32 pair0 = pair[0];
-      if (pb[len] == cur[len]) 
-      { 
-        if (++len != lenLimit && pb[len] == cur[len]) 
-          while (++len != lenLimit) 
-            if (pb[len] != cur[len]) 
-              break; 
-        if (maxLen < len) 
-        { 
+      if (pb[len] == cur[len])
+      {
+        if (++len != lenLimit && pb[len] == cur[len])
+          while (++len != lenLimit)
+            if (pb[len] != cur[len])
+              break;
+        if (maxLen < len)
+        {
           maxLen = (UInt32)len;
           *distances++ = (UInt32)len;
-          *distances++ = delta - 1; 
-          if (len == lenLimit) 
-          { 
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+          {
             *ptr1 = pair0;
-            *ptr0 = pair[1]; 
-            return distances; 
-          } 
-        } 
-      } 
-      if (pb[len] < cur[len]) 
-      { 
-        *ptr1 = curMatch; 
-        ptr1 = pair + 1; 
-        curMatch = *ptr1; 
-        len1 = len; 
-      } 
-      else 
-      { 
-        *ptr0 = curMatch; 
-        ptr0 = pair; 
-        curMatch = *ptr0; 
-        len0 = len; 
-      } 
-    } 
-  } 
-} 
- 
-static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, 
-    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) 
-{ 
+            *ptr0 = pair[1];
+            return distances;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
   CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
   CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
   unsigned len0 = 0, len1 = 0;
-  for (;;) 
-  { 
-    UInt32 delta = pos - curMatch; 
-    if (cutValue-- == 0 || delta >= _cyclicBufferSize) 
-    { 
-      *ptr0 = *ptr1 = kEmptyHashValue; 
-      return; 
-    } 
-    { 
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return;
+    }
+    {
       CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
-      const Byte *pb = cur - delta; 
+      const Byte *pb = cur - delta;
       unsigned len = (len0 < len1 ? len0 : len1);
-      if (pb[len] == cur[len]) 
-      { 
-        while (++len != lenLimit) 
-          if (pb[len] != cur[len]) 
-            break; 
-        { 
-          if (len == lenLimit) 
-          { 
-            *ptr1 = pair[0]; 
-            *ptr0 = pair[1]; 
-            return; 
-          } 
-        } 
-      } 
-      if (pb[len] < cur[len]) 
-      { 
-        *ptr1 = curMatch; 
-        ptr1 = pair + 1; 
-        curMatch = *ptr1; 
-        len1 = len; 
-      } 
-      else 
-      { 
-        *ptr0 = curMatch; 
-        ptr0 = pair; 
-        curMatch = *ptr0; 
-        len0 = len; 
-      } 
-    } 
-  } 
-} 
- 
-#define MOVE_POS \ 
-  ++p->cyclicBufferPos; \ 
-  p->buffer++; \ 
-  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); 
- 
+      if (pb[len] == cur[len])
+      {
+        while (++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        {
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+#define MOVE_POS \
+  ++p->cyclicBufferPos; \
+  p->buffer++; \
+  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
 #define MOVE_POS_RET MOVE_POS return (UInt32)offset;
- 
-static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } 
- 
-#define GET_MATCHES_HEADER2(minLen, ret_op) \ 
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
   unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
   lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
-  cur = p->buffer; 
- 
-#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) 
-#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue) 
- 
-#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue 
- 
-#define GET_MATCHES_FOOTER(offset, maxLen) \ 
+  cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
   offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
   distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
- 
-#define SKIP_FOOTER \ 
+
+#define SKIP_FOOTER \
   SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
- 
-#define UPDATE_maxLen { \ 
-    ptrdiff_t diff = (ptrdiff_t)0 - d2; \ 
-    const Byte *c = cur + maxLen; \ 
-    const Byte *lim = cur + lenLimit; \ 
-    for (; c != lim; c++) if (*(c + diff) != *c) break; \ 
+
+#define UPDATE_maxLen { \
+    ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+    const Byte *c = cur + maxLen; \
+    const Byte *lim = cur + lenLimit; \
+    for (; c != lim; c++) if (*(c + diff) != *c) break; \
     maxLen = (unsigned)(c - cur); }
- 
-static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
   unsigned offset;
-  GET_MATCHES_HEADER(2) 
-  HASH2_CALC; 
-  curMatch = p->hash[hv]; 
-  p->hash[hv] = p->pos; 
-  offset = 0; 
-  GET_MATCHES_FOOTER(offset, 1) 
-} 
- 
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
+  GET_MATCHES_HEADER(2)
+  HASH2_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
   unsigned offset;
-  GET_MATCHES_HEADER(3) 
-  HASH_ZIP_CALC; 
-  curMatch = p->hash[hv]; 
-  p->hash[hv] = p->pos; 
-  offset = 0; 
-  GET_MATCHES_FOOTER(offset, 2) 
-} 
- 
-static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
   UInt32 h2, d2, pos;
   unsigned maxLen, offset;
-  UInt32 *hash; 
-  GET_MATCHES_HEADER(3) 
- 
-  HASH3_CALC; 
- 
-  hash = p->hash; 
-  pos = p->pos; 
- 
-  d2 = pos - hash[h2]; 
- 
-  curMatch = (hash + kFix3HashSize)[hv]; 
-   
-  hash[h2] = pos; 
-  (hash + kFix3HashSize)[hv] = pos; 
- 
-  maxLen = 2; 
-  offset = 0; 
- 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) 
-  { 
-    UPDATE_maxLen 
+  UInt32 *hash;
+  GET_MATCHES_HEADER(3)
+
+  HASH3_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash[h2];
+
+  curMatch = (hash + kFix3HashSize)[hv];
+  
+  hash[h2] = pos;
+  (hash + kFix3HashSize)[hv] = pos;
+
+  maxLen = 2;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    UPDATE_maxLen
     distances[0] = (UInt32)maxLen;
-    distances[1] = d2 - 1; 
-    offset = 2; 
-    if (maxLen == lenLimit) 
-    { 
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (maxLen == lenLimit)
+    {
       SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
-      MOVE_POS_RET; 
-    } 
-  } 
-   
-  GET_MATCHES_FOOTER(offset, maxLen) 
-} 
- 
-static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
+      MOVE_POS_RET;
+    }
+  }
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
   UInt32 h2, h3, d2, d3, pos;
   unsigned maxLen, offset;
-  UInt32 *hash; 
-  GET_MATCHES_HEADER(4) 
- 
-  HASH4_CALC; 
- 
-  hash = p->hash; 
-  pos = p->pos; 
- 
+  UInt32 *hash;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
   d2 = pos - hash                  [h2];
-  d3 = pos - (hash + kFix3HashSize)[h3]; 
- 
-  curMatch = (hash + kFix4HashSize)[hv]; 
- 
+  d3 = pos - (hash + kFix3HashSize)[h3];
+
+  curMatch = (hash + kFix4HashSize)[hv];
+
   hash                  [h2] = pos;
-  (hash + kFix3HashSize)[h3] = pos; 
-  (hash + kFix4HashSize)[hv] = pos; 
- 
-  maxLen = 0; 
-  offset = 0; 
-   
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) 
-  { 
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+  
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
     maxLen = 2;
     distances[0] = 2;
-    distances[1] = d2 - 1; 
-    offset = 2; 
-  } 
-   
-  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) 
-  { 
-    maxLen = 3; 
-    distances[(size_t)offset + 1] = d3 - 1; 
-    offset += 2; 
-    d2 = d3; 
-  } 
-   
-  if (offset != 0) 
-  { 
-    UPDATE_maxLen 
+    distances[1] = d2 - 1;
+    offset = 2;
+  }
+  
+  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    maxLen = 3;
+    distances[(size_t)offset + 1] = d3 - 1;
+    offset += 2;
+    d2 = d3;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
     distances[(size_t)offset - 2] = (UInt32)maxLen;
-    if (maxLen == lenLimit) 
-    { 
+    if (maxLen == lenLimit)
+    {
       SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
-      MOVE_POS_RET; 
-    } 
-  } 
-   
-  if (maxLen < 3) 
-    maxLen = 3; 
-   
-  GET_MATCHES_FOOTER(offset, maxLen) 
-} 
- 
-/* 
-static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
-  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; 
-  UInt32 *hash; 
-  GET_MATCHES_HEADER(5) 
- 
-  HASH5_CALC; 
- 
-  hash = p->hash; 
-  pos = p->pos; 
- 
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 3)
+    maxLen = 3;
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+/*
+static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(5)
+
+  HASH5_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
   d2 = pos - hash                  [h2];
-  d3 = pos - (hash + kFix3HashSize)[h3]; 
-  d4 = pos - (hash + kFix4HashSize)[h4]; 
- 
-  curMatch = (hash + kFix5HashSize)[hv]; 
- 
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  d4 = pos - (hash + kFix4HashSize)[h4];
+
+  curMatch = (hash + kFix5HashSize)[hv];
+
   hash                  [h2] = pos;
-  (hash + kFix3HashSize)[h3] = pos; 
-  (hash + kFix4HashSize)[h4] = pos; 
-  (hash + kFix5HashSize)[hv] = pos; 
- 
-  maxLen = 0; 
-  offset = 0; 
- 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) 
-  { 
-    distances[0] = maxLen = 2; 
-    distances[1] = d2 - 1; 
-    offset = 2; 
-    if (*(cur - d2 + 2) == cur[2]) 
-      distances[0] = maxLen = 3; 
-    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) 
-    { 
-      distances[2] = maxLen = 3; 
-      distances[3] = d3 - 1; 
-      offset = 4; 
-      d2 = d3; 
-    } 
-  } 
-  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) 
-  { 
-    distances[0] = maxLen = 3; 
-    distances[1] = d3 - 1; 
-    offset = 2; 
-    d2 = d3; 
-  } 
-   
-  if (d2 != d4 && d4 < p->cyclicBufferSize 
-      && *(cur - d4) == *cur 
-      && *(cur - d4 + 3) == *(cur + 3)) 
-  { 
-    maxLen = 4; 
-    distances[(size_t)offset + 1] = d4 - 1; 
-    offset += 2; 
-    d2 = d4; 
-  } 
-   
-  if (offset != 0) 
-  { 
-    UPDATE_maxLen 
-    distances[(size_t)offset - 2] = maxLen; 
-    if (maxLen == lenLimit) 
-    { 
-      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); 
-      MOVE_POS_RET; 
-    } 
-  } 
- 
-  if (maxLen < 4) 
-    maxLen = 4; 
-   
-  GET_MATCHES_FOOTER(offset, maxLen) 
-} 
-*/ 
- 
-static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[h4] = pos;
+  (hash + kFix5HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (*(cur - d2 + 2) == cur[2])
+      distances[0] = maxLen = 3;
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      distances[2] = maxLen = 3;
+      distances[3] = d3 - 1;
+      offset = 4;
+      d2 = d3;
+    }
+  }
+  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    distances[0] = maxLen = 3;
+    distances[1] = d3 - 1;
+    offset = 2;
+    d2 = d3;
+  }
+  
+  if (d2 != d4 && d4 < p->cyclicBufferSize
+      && *(cur - d4) == *cur
+      && *(cur - d4 + 3) == *(cur + 3))
+  {
+    maxLen = 4;
+    distances[(size_t)offset + 1] = d4 - 1;
+    offset += 2;
+    d2 = d4;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+
+  if (maxLen < 4)
+    maxLen = 4;
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+*/
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
   UInt32 h2, h3, d2, d3, pos;
   unsigned maxLen, offset;
-  UInt32 *hash; 
-  GET_MATCHES_HEADER(4) 
- 
-  HASH4_CALC; 
- 
-  hash = p->hash; 
-  pos = p->pos; 
-   
+  UInt32 *hash;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+  
   d2 = pos - hash                  [h2];
-  d3 = pos - (hash + kFix3HashSize)[h3]; 
-  curMatch = (hash + kFix4HashSize)[hv]; 
- 
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  curMatch = (hash + kFix4HashSize)[hv];
+
   hash                  [h2] = pos;
-  (hash + kFix3HashSize)[h3] = pos; 
-  (hash + kFix4HashSize)[hv] = pos; 
- 
-  maxLen = 0; 
-  offset = 0; 
- 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) 
-  { 
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
     maxLen = 2;
     distances[0] = 2;
-    distances[1] = d2 - 1; 
-    offset = 2; 
-  } 
-   
-  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) 
-  { 
-    maxLen = 3; 
-    distances[(size_t)offset + 1] = d3 - 1; 
-    offset += 2; 
-    d2 = d3; 
-  } 
-   
-  if (offset != 0) 
-  { 
-    UPDATE_maxLen 
+    distances[1] = d2 - 1;
+    offset = 2;
+  }
+  
+  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    maxLen = 3;
+    distances[(size_t)offset + 1] = d3 - 1;
+    offset += 2;
+    d2 = d3;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
     distances[(size_t)offset - 2] = (UInt32)maxLen;
-    if (maxLen == lenLimit) 
-    { 
-      p->son[p->cyclicBufferPos] = curMatch; 
-      MOVE_POS_RET; 
-    } 
-  } 
-   
-  if (maxLen < 3) 
-    maxLen = 3; 
- 
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 3)
+    maxLen = 3;
+
   offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
-      distances + offset, maxLen) - (distances)); 
-  MOVE_POS_RET 
-} 
- 
-/* 
-static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
-  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos 
-  UInt32 *hash; 
-  GET_MATCHES_HEADER(5) 
- 
-  HASH5_CALC; 
- 
-  hash = p->hash; 
-  pos = p->pos; 
-   
+      distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+
+/*
+static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+  UInt32 *hash;
+  GET_MATCHES_HEADER(5)
+
+  HASH5_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+  
   d2 = pos - hash                  [h2];
-  d3 = pos - (hash + kFix3HashSize)[h3]; 
-  d4 = pos - (hash + kFix4HashSize)[h4]; 
- 
-  curMatch = (hash + kFix5HashSize)[hv]; 
- 
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  d4 = pos - (hash + kFix4HashSize)[h4];
+
+  curMatch = (hash + kFix5HashSize)[hv];
+
   hash                  [h2] = pos;
-  (hash + kFix3HashSize)[h3] = pos; 
-  (hash + kFix4HashSize)[h4] = pos; 
-  (hash + kFix5HashSize)[hv] = pos; 
- 
-  maxLen = 0; 
-  offset = 0; 
- 
-  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) 
-  { 
-    distances[0] = maxLen = 2; 
-    distances[1] = d2 - 1; 
-    offset = 2; 
-    if (*(cur - d2 + 2) == cur[2]) 
-      distances[0] = maxLen = 3; 
-    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) 
-    { 
-      distances[2] = maxLen = 3; 
-      distances[3] = d3 - 1; 
-      offset = 4; 
-      d2 = d3; 
-    } 
-  } 
-  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) 
-  { 
-    distances[0] = maxLen = 3; 
-    distances[1] = d3 - 1; 
-    offset = 2; 
-    d2 = d3; 
-  } 
-   
-  if (d2 != d4 && d4 < p->cyclicBufferSize 
-      && *(cur - d4) == *cur 
-      && *(cur - d4 + 3) == *(cur + 3)) 
-  { 
-    maxLen = 4; 
-    distances[(size_t)offset + 1] = d4 - 1; 
-    offset += 2; 
-    d2 = d4; 
-  } 
-   
-  if (offset != 0) 
-  { 
-    UPDATE_maxLen 
-    distances[(size_t)offset - 2] = maxLen; 
-    if (maxLen == lenLimit) 
-    { 
-      p->son[p->cyclicBufferPos] = curMatch; 
-      MOVE_POS_RET; 
-    } 
-  } 
-   
-  if (maxLen < 4) 
-    maxLen = 4; 
- 
-  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), 
-      distances + offset, maxLen) - (distances)); 
-  MOVE_POS_RET 
-} 
-*/ 
- 
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 
-{ 
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[h4] = pos;
+  (hash + kFix5HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (*(cur - d2 + 2) == cur[2])
+      distances[0] = maxLen = 3;
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      distances[2] = maxLen = 3;
+      distances[3] = d3 - 1;
+      offset = 4;
+      d2 = d3;
+    }
+  }
+  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    distances[0] = maxLen = 3;
+    distances[1] = d3 - 1;
+    offset = 2;
+    d2 = d3;
+  }
+  
+  if (d2 != d4 && d4 < p->cyclicBufferSize
+      && *(cur - d4) == *cur
+      && *(cur - d4 + 3) == *(cur + 3))
+  {
+    maxLen = 4;
+    distances[(size_t)offset + 1] = d4 - 1;
+    offset += 2;
+    d2 = d4;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 4)
+    maxLen = 4;
+
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+*/
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
   unsigned offset;
-  GET_MATCHES_HEADER(3) 
-  HASH_ZIP_CALC; 
-  curMatch = p->hash[hv]; 
-  p->hash[hv] = p->pos; 
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
   offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
-      distances, 2) - (distances)); 
-  MOVE_POS_RET 
-} 
- 
-static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    SKIP_HEADER(2) 
-    HASH2_CALC; 
-    curMatch = p->hash[hv]; 
-    p->hash[hv] = p->pos; 
-    SKIP_FOOTER 
-  } 
-  while (--num != 0); 
-} 
- 
-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    SKIP_HEADER(3) 
-    HASH_ZIP_CALC; 
-    curMatch = p->hash[hv]; 
-    p->hash[hv] = p->pos; 
-    SKIP_FOOTER 
-  } 
-  while (--num != 0); 
-} 
- 
-static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    UInt32 h2; 
-    UInt32 *hash; 
-    SKIP_HEADER(3) 
-    HASH3_CALC; 
-    hash = p->hash; 
-    curMatch = (hash + kFix3HashSize)[hv]; 
-    hash[h2] = 
-    (hash + kFix3HashSize)[hv] = p->pos; 
-    SKIP_FOOTER 
-  } 
-  while (--num != 0); 
-} 
- 
-static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    UInt32 h2, h3; 
-    UInt32 *hash; 
-    SKIP_HEADER(4) 
-    HASH4_CALC; 
-    hash = p->hash; 
-    curMatch = (hash + kFix4HashSize)[hv]; 
+      distances, 2) - (distances));
+  MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(2)
+    HASH2_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2;
+    UInt32 *hash;
+    SKIP_HEADER(3)
+    HASH3_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix3HashSize)[hv];
+    hash[h2] =
+    (hash + kFix3HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3;
+    UInt32 *hash;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix4HashSize)[hv];
     hash                  [h2] =
-    (hash + kFix3HashSize)[h3] = 
-    (hash + kFix4HashSize)[hv] = p->pos; 
-    SKIP_FOOTER 
-  } 
-  while (--num != 0); 
-} 
- 
-/* 
-static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    UInt32 h2, h3, h4; 
-    UInt32 *hash; 
-    SKIP_HEADER(5) 
-    HASH5_CALC; 
-    hash = p->hash; 
-    curMatch = (hash + kFix5HashSize)[hv]; 
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+/*
+static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3, h4;
+    UInt32 *hash;
+    SKIP_HEADER(5)
+    HASH5_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix5HashSize)[hv];
     hash                  [h2] =
-    (hash + kFix3HashSize)[h3] = 
-    (hash + kFix4HashSize)[h4] = 
-    (hash + kFix5HashSize)[hv] = p->pos; 
-    SKIP_FOOTER 
-  } 
-  while (--num != 0); 
-} 
-*/ 
- 
-static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    UInt32 h2, h3; 
-    UInt32 *hash; 
-    SKIP_HEADER(4) 
-    HASH4_CALC; 
-    hash = p->hash; 
-    curMatch = (hash + kFix4HashSize)[hv]; 
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[h4] =
+    (hash + kFix5HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+*/
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3;
+    UInt32 *hash;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix4HashSize)[hv];
     hash                  [h2] =
-    (hash + kFix3HashSize)[h3] = 
-    (hash + kFix4HashSize)[hv] = p->pos; 
-    p->son[p->cyclicBufferPos] = curMatch; 
-    MOVE_POS 
-  } 
-  while (--num != 0); 
-} 
- 
-/* 
-static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    UInt32 h2, h3, h4; 
-    UInt32 *hash; 
-    SKIP_HEADER(5) 
-    HASH5_CALC; 
-    hash = p->hash; 
-    curMatch = hash + kFix5HashSize)[hv]; 
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+/*
+static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3, h4;
+    UInt32 *hash;
+    SKIP_HEADER(5)
+    HASH5_CALC;
+    hash = p->hash;
+    curMatch = hash + kFix5HashSize)[hv];
     hash                  [h2] =
-    (hash + kFix3HashSize)[h3] = 
-    (hash + kFix4HashSize)[h4] = 
-    (hash + kFix5HashSize)[hv] = p->pos; 
-    p->son[p->cyclicBufferPos] = curMatch; 
-    MOVE_POS 
-  } 
-  while (--num != 0); 
-} 
-*/ 
- 
-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) 
-{ 
-  do 
-  { 
-    SKIP_HEADER(3) 
-    HASH_ZIP_CALC; 
-    curMatch = p->hash[hv]; 
-    p->hash[hv] = p->pos; 
-    p->son[p->cyclicBufferPos] = curMatch; 
-    MOVE_POS 
-  } 
-  while (--num != 0); 
-} 
- 
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) 
-{ 
-  vTable->Init = (Mf_Init_Func)MatchFinder_Init; 
-  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; 
-  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; 
-  if (!p->btMode) 
-  { 
-    /* if (p->numHashBytes <= 4) */ 
-    { 
-      vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; 
-      vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; 
-    } 
-    /* 
-    else 
-    { 
-      vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; 
-      vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; 
-    } 
-    */ 
-  } 
-  else if (p->numHashBytes == 2) 
-  { 
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; 
-    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; 
-  } 
-  else if (p->numHashBytes == 3) 
-  { 
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; 
-    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; 
-  } 
-  else /* if (p->numHashBytes == 4) */ 
-  { 
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; 
-    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; 
-  } 
-  /* 
-  else 
-  { 
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; 
-    vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; 
-  } 
-  */ 
-} 
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[h4] =
+    (hash + kFix5HashSize)[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+*/
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+  if (!p->btMode)
+  {
+    /* if (p->numHashBytes <= 4) */
+    {
+      vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+      vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+    }
+    /*
+    else
+    {
+      vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+      vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+    }
+    */
+  }
+  else if (p->numHashBytes == 2)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 3)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+  }
+  else /* if (p->numHashBytes == 4) */
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+  }
+  /*
+  else
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+  }
+  */
+}
diff --git a/contrib/libs/lzmasdk/LzFind.h b/contrib/libs/lzmasdk/LzFind.h
index d77158bd85..42c13be157 100644
--- a/contrib/libs/lzmasdk/LzFind.h
+++ b/contrib/libs/lzmasdk/LzFind.h
@@ -1,121 +1,121 @@
-/* LzFind.h -- Match finder for LZ algorithms 
-2017-06-10 : Igor Pavlov : Public domain */ 
- 
-#ifndef __LZ_FIND_H 
-#define __LZ_FIND_H 
- 
-#include "7zTypes.h" 
- 
-EXTERN_C_BEGIN 
- 
-typedef UInt32 CLzRef; 
- 
-typedef struct _CMatchFinder 
-{ 
-  Byte *buffer; 
-  UInt32 pos; 
-  UInt32 posLimit; 
-  UInt32 streamPos; 
-  UInt32 lenLimit; 
- 
-  UInt32 cyclicBufferPos; 
-  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ 
- 
-  Byte streamEndWasReached; 
-  Byte btMode; 
-  Byte bigHash; 
-  Byte directInput; 
- 
-  UInt32 matchMaxLen; 
-  CLzRef *hash; 
-  CLzRef *son; 
-  UInt32 hashMask; 
-  UInt32 cutValue; 
- 
-  Byte *bufferBase; 
-  ISeqInStream *stream; 
-   
-  UInt32 blockSize; 
-  UInt32 keepSizeBefore; 
-  UInt32 keepSizeAfter; 
- 
-  UInt32 numHashBytes; 
-  size_t directInputRem; 
-  UInt32 historySize; 
-  UInt32 fixedHashSize; 
-  UInt32 hashSizeSum; 
-  SRes result; 
-  UInt32 crc[256]; 
-  size_t numRefs; 
- 
-  UInt64 expectedDataSize; 
-} CMatchFinder; 
- 
-#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) 
- 
-#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) 
- 
-#define Inline_MatchFinder_IsFinishedOK(p) \ 
-    ((p)->streamEndWasReached \ 
-        && (p)->streamPos == (p)->pos \ 
-        && (!(p)->directInput || (p)->directInputRem == 0)) 
-       
-int MatchFinder_NeedMove(CMatchFinder *p); 
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); 
-void MatchFinder_MoveBlock(CMatchFinder *p); 
-void MatchFinder_ReadIfRequired(CMatchFinder *p); 
- 
-void MatchFinder_Construct(CMatchFinder *p); 
- 
-/* Conditions: 
-     historySize <= 3 GB 
-     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB 
-*/ 
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, 
-    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, 
-    ISzAllocPtr alloc); 
-void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); 
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); 
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); 
- 
-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, 
-    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, 
-    UInt32 *distances, UInt32 maxLen); 
- 
-/* 
-Conditions: 
-  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. 
-  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function 
-*/ 
- 
-typedef void (*Mf_Init_Func)(void *object); 
-typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); 
-typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); 
-typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); 
-typedef void (*Mf_Skip_Func)(void *object, UInt32); 
- 
-typedef struct _IMatchFinder 
-{ 
-  Mf_Init_Func Init; 
-  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; 
-  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; 
-  Mf_GetMatches_Func GetMatches; 
-  Mf_Skip_Func Skip; 
-} IMatchFinder; 
- 
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); 
- 
-void MatchFinder_Init_LowHash(CMatchFinder *p); 
-void MatchFinder_Init_HighHash(CMatchFinder *p); 
-void MatchFinder_Init_3(CMatchFinder *p, int readData); 
-void MatchFinder_Init(CMatchFinder *p); 
- 
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); 
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); 
- 
-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); 
-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); 
- 
-EXTERN_C_END 
- 
-#endif 
+/* LzFind.h -- Match finder for LZ algorithms
+2017-06-10 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_FIND_H
+#define __LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+  Byte *buffer;
+  UInt32 pos;
+  UInt32 posLimit;
+  UInt32 streamPos;
+  UInt32 lenLimit;
+
+  UInt32 cyclicBufferPos;
+  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+  Byte streamEndWasReached;
+  Byte btMode;
+  Byte bigHash;
+  Byte directInput;
+
+  UInt32 matchMaxLen;
+  CLzRef *hash;
+  CLzRef *son;
+  UInt32 hashMask;
+  UInt32 cutValue;
+
+  Byte *bufferBase;
+  ISeqInStream *stream;
+  
+  UInt32 blockSize;
+  UInt32 keepSizeBefore;
+  UInt32 keepSizeAfter;
+
+  UInt32 numHashBytes;
+  size_t directInputRem;
+  UInt32 historySize;
+  UInt32 fixedHashSize;
+  UInt32 hashSizeSum;
+  SRes result;
+  UInt32 crc[256];
+  size_t numRefs;
+
+  UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+#define Inline_MatchFinder_IsFinishedOK(p) \
+    ((p)->streamEndWasReached \
+        && (p)->streamPos == (p)->pos \
+        && (!(p)->directInput || (p)->directInputRem == 0))
+      
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+     historySize <= 3 GB
+     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+    UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+  Mf_Init_Func Init;
+  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+  Mf_GetMatches_Func GetMatches;
+  Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init(CMatchFinder *p);
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+EXTERN_C_END
+
+#endif
diff --git a/contrib/libs/lzmasdk/LzHash.h b/contrib/libs/lzmasdk/LzHash.h
index 61d9a5d80e..e7c942303d 100644
--- a/contrib/libs/lzmasdk/LzHash.h
+++ b/contrib/libs/lzmasdk/LzHash.h
@@ -1,57 +1,57 @@
-/* LzHash.h -- HASH functions for LZ algorithms 
-2015-04-12 : Igor Pavlov : Public domain */ 
- 
-#ifndef __LZ_HASH_H 
-#define __LZ_HASH_H 
- 
-#define kHash2Size (1 << 10) 
-#define kHash3Size (1 << 16) 
-#define kHash4Size (1 << 20) 
- 
-#define kFix3HashSize (kHash2Size) 
-#define kFix4HashSize (kHash2Size + kHash3Size) 
-#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) 
- 
-#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); 
- 
-#define HASH3_CALC { \ 
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 
-  h2 = temp & (kHash2Size - 1); \ 
-  hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } 
- 
-#define HASH4_CALC { \ 
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 
-  h2 = temp & (kHash2Size - 1); \ 
-  temp ^= ((UInt32)cur[2] << 8); \ 
-  h3 = temp & (kHash3Size - 1); \ 
-  hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } 
- 
-#define HASH5_CALC { \ 
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 
-  h2 = temp & (kHash2Size - 1); \ 
-  temp ^= ((UInt32)cur[2] << 8); \ 
-  h3 = temp & (kHash3Size - 1); \ 
-  temp ^= (p->crc[cur[3]] << 5); \ 
-  h4 = temp & (kHash4Size - 1); \ 
-  hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } 
- 
-/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ 
-#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; 
- 
- 
-#define MT_HASH2_CALC \ 
-  h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); 
- 
-#define MT_HASH3_CALC { \ 
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 
-  h2 = temp & (kHash2Size - 1); \ 
-  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } 
- 
-#define MT_HASH4_CALC { \ 
-  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 
-  h2 = temp & (kHash2Size - 1); \ 
-  temp ^= ((UInt32)cur[2] << 8); \ 
-  h3 = temp & (kHash3Size - 1); \ 
-  h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } 
- 
-#endif 
+/* LzHash.h -- HASH functions for LZ algorithms
+2015-04-12 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_HASH_H
+#define __LZ_HASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  temp ^= (p->crc[cur[3]] << 5); \
+  h4 = temp & (kHash4Size - 1); \
+  hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
+
+/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+  h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/contrib/libs/lzmasdk/LzmaDec.c b/contrib/libs/lzmasdk/LzmaDec.c
index 100e0a691f..ba3e1dd50e 100644
--- a/contrib/libs/lzmasdk/LzmaDec.c
+++ b/contrib/libs/lzmasdk/LzmaDec.c
@@ -1,31 +1,31 @@
-/* LzmaDec.c -- LZMA Decoder 
+/* LzmaDec.c -- LZMA Decoder
 2018-07-04 : Igor Pavlov : Public domain */
- 
-#include "Precomp.h" 
- 
+
+#include "Precomp.h"
+
 #include <string.h>
 
 /* #include "CpuArch.h" */
-#include "LzmaDec.h" 
- 
-#define kNumTopBits 24 
-#define kTopValue ((UInt32)1 << kNumTopBits) 
- 
-#define kNumBitModelTotalBits 11 
-#define kBitModelTotal (1 << kNumBitModelTotalBits) 
-#define kNumMoveBits 5 
- 
-#define RC_INIT_SIZE 5 
- 
-#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } 
- 
+#include "LzmaDec.h"
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
 #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
-#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); 
-#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); 
-#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ 
-  { UPDATE_0(p); i = (i + i); A0; } else \ 
-  { UPDATE_1(p); i = (i + i) + 1; A1; } 
- 
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+  { UPDATE_0(p); i = (i + i); A0; } else \
+  { UPDATE_1(p); i = (i + i) + 1; A1; }
+
 #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
 
 #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
@@ -35,85 +35,85 @@
 #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m;       , i += m * 2; )
 #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m        , ; )
 
-#define TREE_DECODE(probs, limit, i) \ 
-  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } 
- 
-/* #define _LZMA_SIZE_OPT */ 
- 
-#ifdef _LZMA_SIZE_OPT 
-#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) 
-#else 
-#define TREE_6_DECODE(probs, i) \ 
-  { i = 1; \ 
-  TREE_GET_BIT(probs, i); \ 
-  TREE_GET_BIT(probs, i); \ 
-  TREE_GET_BIT(probs, i); \ 
-  TREE_GET_BIT(probs, i); \ 
-  TREE_GET_BIT(probs, i); \ 
-  TREE_GET_BIT(probs, i); \ 
-  i -= 0x40; } 
-#endif 
- 
+#define TREE_DECODE(probs, limit, i) \
+  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+  { i = 1; \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  i -= 0x40; }
+#endif
+
 #define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
-#define MATCHED_LITER_DEC \ 
+#define MATCHED_LITER_DEC \
   matchByte += matchByte; \
   bit = offs; \
   offs &= matchByte; \
   probLit = prob + (offs + bit + symbol); \
   GET_BIT2(probLit, symbol, offs ^= bit; , ;)
- 
 
 
-#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } 
- 
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
 #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
-#define UPDATE_0_CHECK range = bound; 
-#define UPDATE_1_CHECK range -= bound; code -= bound; 
-#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ 
-  { UPDATE_0_CHECK; i = (i + i); A0; } else \ 
-  { UPDATE_1_CHECK; i = (i + i) + 1; A1; } 
-#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) 
-#define TREE_DECODE_CHECK(probs, limit, i) \ 
-  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } 
- 
- 
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
 #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
   { UPDATE_0_CHECK; i += m; m += m; } else \
   { UPDATE_1_CHECK; m += m; i += m; }
 
 
-#define kNumPosBitsMax 4 
-#define kNumPosStatesMax (1 << kNumPosBitsMax) 
- 
-#define kLenNumLowBits 3 
-#define kLenNumLowSymbols (1 << kLenNumLowBits) 
-#define kLenNumHighBits 8 
-#define kLenNumHighSymbols (1 << kLenNumHighBits) 
- 
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
 #define LenLow 0
 #define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
-#define kNumLenProbs (LenHigh + kLenNumHighSymbols) 
- 
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
 #define LenChoice LenLow
 #define LenChoice2 (LenLow + (1 << kLenNumLowBits))
- 
-#define kNumStates 12 
+
+#define kNumStates 12
 #define kNumStates2 16
-#define kNumLitStates 7 
- 
-#define kStartPosModelIndex 4 
-#define kEndPosModelIndex 14 
-#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) 
- 
-#define kNumPosSlotBits 6 
-#define kNumLenToPosStates 4 
- 
-#define kNumAlignBits 4 
-#define kAlignTableSize (1 << kNumAlignBits) 
- 
-#define kMatchMinLen 2 
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
- 
+
 /* External ASM code needs same CLzmaProb array layout. So don't change it. */
 
 /* (probs_1664) is faster and better for code size at some platforms */
@@ -137,22 +137,22 @@
 #define IsMatch (LenCoder + kNumLenProbs)
 #define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
 #define IsRep (Align + kAlignTableSize)
-#define IsRepG0 (IsRep + kNumStates) 
-#define IsRepG1 (IsRepG0 + kNumStates) 
-#define IsRepG2 (IsRepG1 + kNumStates) 
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
 #define PosSlot (IsRepG2 + kNumStates)
 #define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
 #define NUM_BASE_PROBS (Literal + kStartOffset)
- 
+
 #if Align != 0 && kStartOffset != 0
   #error Stop_Compiling_Bad_LZMA_kAlign
 #endif
- 
+
 #if NUM_BASE_PROBS != 1984
   #error Stop_Compiling_Bad_LZMA_PROBS
-#endif 
- 
- 
+#endif
+
+
 #define LZMA_LIT_SIZE 0x300
 
 #define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
@@ -162,8 +162,8 @@
 #define COMBINED_PS_STATE (posState + state)
 #define GET_LEN_STATE (posState)
 
-#define LZMA_DIC_MIN (1 << 12) 
- 
+#define LZMA_DIC_MIN (1 << 12)
+
 /*
 p->remainLen : shows status of LZMA decoder:
     < kMatchSpecLenStart : normal remain
@@ -197,16 +197,16 @@ Processing:
   It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
   RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
 
-Out: 
+Out:
   RangeCoder is normalized
-  Result: 
-    SZ_OK - OK 
-    SZ_ERROR_DATA - Error 
-  p->remainLen: 
-    < kMatchSpecLenStart : normal remain 
-    = kMatchSpecLenStart : finished 
-*/ 
- 
+  Result:
+    SZ_OK - OK
+    SZ_ERROR_DATA - Error
+  p->remainLen:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+*/
+
 
 #ifdef _LZMA_DEC_OPT
 
@@ -216,403 +216,403 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
 
 static
 int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
-{ 
+{
   CLzmaProb *probs = GET_PROBS;
   unsigned state = (unsigned)p->state;
-  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; 
-  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; 
-  unsigned lc = p->prop.lc; 
+  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+  unsigned lc = p->prop.lc;
   unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
- 
-  Byte *dic = p->dic; 
-  SizeT dicBufSize = p->dicBufSize; 
-  SizeT dicPos = p->dicPos; 
-   
-  UInt32 processedPos = p->processedPos; 
-  UInt32 checkDicSize = p->checkDicSize; 
-  unsigned len = 0; 
- 
-  const Byte *buf = p->buf; 
-  UInt32 range = p->range; 
-  UInt32 code = p->code; 
- 
-  do 
-  { 
-    CLzmaProb *prob; 
-    UInt32 bound; 
-    unsigned ttt; 
+
+  Byte *dic = p->dic;
+  SizeT dicBufSize = p->dicBufSize;
+  SizeT dicPos = p->dicPos;
+  
+  UInt32 processedPos = p->processedPos;
+  UInt32 checkDicSize = p->checkDicSize;
+  unsigned len = 0;
+
+  const Byte *buf = p->buf;
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+
+  do
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
     unsigned posState = CALC_POS_STATE(processedPos, pbMask);
- 
+
     prob = probs + IsMatch + COMBINED_PS_STATE;
-    IF_BIT_0(prob) 
-    { 
-      unsigned symbol; 
-      UPDATE_0(prob); 
-      prob = probs + Literal; 
-      if (processedPos != 0 || checkDicSize != 0) 
+    IF_BIT_0(prob)
+    {
+      unsigned symbol;
+      UPDATE_0(prob);
+      prob = probs + Literal;
+      if (processedPos != 0 || checkDicSize != 0)
         prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
-      processedPos++; 
- 
-      if (state < kNumLitStates) 
-      { 
-        state -= (state < 4) ? state : 3; 
-        symbol = 1; 
-        #ifdef _LZMA_SIZE_OPT 
-        do { NORMAL_LITER_DEC } while (symbol < 0x100); 
-        #else 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        NORMAL_LITER_DEC 
-        #endif 
-      } 
-      else 
-      { 
-        unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 
-        unsigned offs = 0x100; 
-        state -= (state < 10) ? 3 : 6; 
-        symbol = 1; 
-        #ifdef _LZMA_SIZE_OPT 
-        do 
-        { 
-          unsigned bit; 
-          CLzmaProb *probLit; 
-          MATCHED_LITER_DEC 
-        } 
-        while (symbol < 0x100); 
-        #else 
-        { 
-          unsigned bit; 
-          CLzmaProb *probLit; 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-          MATCHED_LITER_DEC 
-        } 
-        #endif 
-      } 
- 
-      dic[dicPos++] = (Byte)symbol; 
-      continue; 
-    } 
-     
-    { 
-      UPDATE_1(prob); 
-      prob = probs + IsRep + state; 
-      IF_BIT_0(prob) 
-      { 
-        UPDATE_0(prob); 
-        state += kNumStates; 
-        prob = probs + LenCoder; 
-      } 
-      else 
-      { 
-        UPDATE_1(prob); 
+      processedPos++;
+
+      if (state < kNumLitStates)
+      {
+        state -= (state < 4) ? state : 3;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do { NORMAL_LITER_DEC } while (symbol < 0x100);
+        #else
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        #endif
+      }
+      else
+      {
+        unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+        unsigned offs = 0x100;
+        state -= (state < 10) ? 3 : 6;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+        }
+        while (symbol < 0x100);
+        #else
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+        }
+        #endif
+      }
+
+      dic[dicPos++] = (Byte)symbol;
+      continue;
+    }
+    
+    {
+      UPDATE_1(prob);
+      prob = probs + IsRep + state;
+      IF_BIT_0(prob)
+      {
+        UPDATE_0(prob);
+        state += kNumStates;
+        prob = probs + LenCoder;
+      }
+      else
+      {
+        UPDATE_1(prob);
         /*
         // that case was checked before with kBadRepCode
-        if (checkDicSize == 0 && processedPos == 0) 
-          return SZ_ERROR_DATA; 
+        if (checkDicSize == 0 && processedPos == 0)
+          return SZ_ERROR_DATA;
         */
-        prob = probs + IsRepG0 + state; 
-        IF_BIT_0(prob) 
-        { 
-          UPDATE_0(prob); 
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0(prob)
+        {
+          UPDATE_0(prob);
           prob = probs + IsRep0Long + COMBINED_PS_STATE;
-          IF_BIT_0(prob) 
-          { 
-            UPDATE_0(prob); 
-            dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 
-            dicPos++; 
-            processedPos++; 
-            state = state < kNumLitStates ? 9 : 11; 
-            continue; 
-          } 
-          UPDATE_1(prob); 
-        } 
-        else 
-        { 
-          UInt32 distance; 
-          UPDATE_1(prob); 
-          prob = probs + IsRepG1 + state; 
-          IF_BIT_0(prob) 
-          { 
-            UPDATE_0(prob); 
-            distance = rep1; 
-          } 
-          else 
-          { 
-            UPDATE_1(prob); 
-            prob = probs + IsRepG2 + state; 
-            IF_BIT_0(prob) 
-            { 
-              UPDATE_0(prob); 
-              distance = rep2; 
-            } 
-            else 
-            { 
-              UPDATE_1(prob); 
-              distance = rep3; 
-              rep3 = rep2; 
-            } 
-            rep2 = rep1; 
-          } 
-          rep1 = rep0; 
-          rep0 = distance; 
-        } 
-        state = state < kNumLitStates ? 8 : 11; 
-        prob = probs + RepLenCoder; 
-      } 
-       
-      #ifdef _LZMA_SIZE_OPT 
-      { 
-        unsigned lim, offset; 
-        CLzmaProb *probLen = prob + LenChoice; 
-        IF_BIT_0(probLen) 
-        { 
-          UPDATE_0(probLen); 
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+            dicPos++;
+            processedPos++;
+            state = state < kNumLitStates ? 9 : 11;
+            continue;
+          }
+          UPDATE_1(prob);
+        }
+        else
+        {
+          UInt32 distance;
+          UPDATE_1(prob);
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            distance = rep1;
+          }
+          else
+          {
+            UPDATE_1(prob);
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0(prob)
+            {
+              UPDATE_0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UPDATE_1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = probs + RepLenCoder;
+      }
+      
+      #ifdef _LZMA_SIZE_OPT
+      {
+        unsigned lim, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
           probLen = prob + LenLow + GET_LEN_STATE;
-          offset = 0; 
-          lim = (1 << kLenNumLowBits); 
-        } 
-        else 
-        { 
-          UPDATE_1(probLen); 
-          probLen = prob + LenChoice2; 
-          IF_BIT_0(probLen) 
-          { 
-            UPDATE_0(probLen); 
+          offset = 0;
+          lim = (1 << kLenNumLowBits);
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
-            offset = kLenNumLowSymbols; 
+            offset = kLenNumLowSymbols;
             lim = (1 << kLenNumLowBits);
-          } 
-          else 
-          { 
-            UPDATE_1(probLen); 
-            probLen = prob + LenHigh; 
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
             offset = kLenNumLowSymbols * 2;
-            lim = (1 << kLenNumHighBits); 
-          } 
-        } 
-        TREE_DECODE(probLen, lim, len); 
-        len += offset; 
-      } 
-      #else 
-      { 
-        CLzmaProb *probLen = prob + LenChoice; 
-        IF_BIT_0(probLen) 
-        { 
-          UPDATE_0(probLen); 
+            lim = (1 << kLenNumHighBits);
+          }
+        }
+        TREE_DECODE(probLen, lim, len);
+        len += offset;
+      }
+      #else
+      {
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
           probLen = prob + LenLow + GET_LEN_STATE;
-          len = 1; 
-          TREE_GET_BIT(probLen, len); 
-          TREE_GET_BIT(probLen, len); 
-          TREE_GET_BIT(probLen, len); 
-          len -= 8; 
-        } 
-        else 
-        { 
-          UPDATE_1(probLen); 
-          probLen = prob + LenChoice2; 
-          IF_BIT_0(probLen) 
-          { 
-            UPDATE_0(probLen); 
+          len = 1;
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          len -= 8;
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
-            len = 1; 
-            TREE_GET_BIT(probLen, len); 
-            TREE_GET_BIT(probLen, len); 
-            TREE_GET_BIT(probLen, len); 
-          } 
-          else 
-          { 
-            UPDATE_1(probLen); 
-            probLen = prob + LenHigh; 
-            TREE_DECODE(probLen, (1 << kLenNumHighBits), len); 
+            len = 1;
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
             len += kLenNumLowSymbols * 2;
-          } 
-        } 
-      } 
-      #endif 
- 
-      if (state >= kNumStates) 
-      { 
-        UInt32 distance; 
-        prob = probs + PosSlot + 
-            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); 
-        TREE_6_DECODE(prob, distance); 
-        if (distance >= kStartPosModelIndex) 
-        { 
-          unsigned posSlot = (unsigned)distance; 
-          unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); 
-          distance = (2 | (distance & 1)); 
-          if (posSlot < kEndPosModelIndex) 
-          { 
-            distance <<= numDirectBits; 
+          }
+        }
+      }
+      #endif
+
+      if (state >= kNumStates)
+      {
+        UInt32 distance;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+        TREE_6_DECODE(prob, distance);
+        if (distance >= kStartPosModelIndex)
+        {
+          unsigned posSlot = (unsigned)distance;
+          unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+          distance = (2 | (distance & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            distance <<= numDirectBits;
             prob = probs + SpecPos;
-            { 
+            {
               UInt32 m = 1;
               distance++;
-              do 
-              { 
+              do
+              {
                 REV_BIT_VAR(prob, distance, m);
-              } 
+              }
               while (--numDirectBits);
               distance -= m;
-            } 
-          } 
-          else 
-          { 
-            numDirectBits -= kNumAlignBits; 
-            do 
-            { 
-              NORMALIZE 
-              range >>= 1; 
-               
-              { 
-                UInt32 t; 
-                code -= range; 
-                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ 
-                distance = (distance << 1) + (t + 1); 
-                code += range & t; 
-              } 
-              /* 
-              distance <<= 1; 
-              if (code >= range) 
-              { 
-                code -= range; 
-                distance |= 1; 
-              } 
-              */ 
-            } 
+            }
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE
+              range >>= 1;
+              
+              {
+                UInt32 t;
+                code -= range;
+                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+                distance = (distance << 1) + (t + 1);
+                code += range & t;
+              }
+              /*
+              distance <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                distance |= 1;
+              }
+              */
+            }
             while (--numDirectBits);
-            prob = probs + Align; 
-            distance <<= kNumAlignBits; 
-            { 
-              unsigned i = 1; 
+            prob = probs + Align;
+            distance <<= kNumAlignBits;
+            {
+              unsigned i = 1;
               REV_BIT_CONST(prob, i, 1);
               REV_BIT_CONST(prob, i, 2);
               REV_BIT_CONST(prob, i, 4);
               REV_BIT_LAST (prob, i, 8);
               distance |= i;
-            } 
-            if (distance == (UInt32)0xFFFFFFFF) 
-            { 
+            }
+            if (distance == (UInt32)0xFFFFFFFF)
+            {
               len = kMatchSpecLenStart;
-              state -= kNumStates; 
-              break; 
-            } 
-          } 
-        } 
-         
-        rep3 = rep2; 
-        rep2 = rep1; 
-        rep1 = rep0; 
-        rep0 = distance + 1; 
+              state -= kNumStates;
+              break;
+            }
+          }
+        }
+        
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        rep0 = distance + 1;
         state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
         if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
-        { 
-          p->dicPos = dicPos; 
-          return SZ_ERROR_DATA; 
-        } 
-      } 
- 
-      len += kMatchMinLen; 
- 
-      { 
-        SizeT rem; 
-        unsigned curLen; 
-        SizeT pos; 
-         
-        if ((rem = limit - dicPos) == 0) 
-        { 
-          p->dicPos = dicPos; 
-          return SZ_ERROR_DATA; 
-        } 
-         
-        curLen = ((rem < len) ? (unsigned)rem : len); 
-        pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); 
- 
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+      }
+
+      len += kMatchMinLen;
+
+      {
+        SizeT rem;
+        unsigned curLen;
+        SizeT pos;
+        
+        if ((rem = limit - dicPos) == 0)
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+        
+        curLen = ((rem < len) ? (unsigned)rem : len);
+        pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
         processedPos += (UInt32)curLen;
- 
-        len -= curLen; 
-        if (curLen <= dicBufSize - pos) 
-        { 
-          Byte *dest = dic + dicPos; 
-          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; 
-          const Byte *lim = dest + curLen; 
+
+        len -= curLen;
+        if (curLen <= dicBufSize - pos)
+        {
+          Byte *dest = dic + dicPos;
+          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+          const Byte *lim = dest + curLen;
           dicPos += (SizeT)curLen;
-          do 
-            *(dest) = (Byte)*(dest + src); 
-          while (++dest != lim); 
-        } 
-        else 
-        { 
-          do 
-          { 
-            dic[dicPos++] = dic[pos]; 
-            if (++pos == dicBufSize) 
-              pos = 0; 
-          } 
-          while (--curLen != 0); 
-        } 
-      } 
-    } 
-  } 
-  while (dicPos < limit && buf < bufLimit); 
- 
-  NORMALIZE; 
-   
-  p->buf = buf; 
-  p->range = range; 
-  p->code = code; 
+          do
+            *(dest) = (Byte)*(dest + src);
+          while (++dest != lim);
+        }
+        else
+        {
+          do
+          {
+            dic[dicPos++] = dic[pos];
+            if (++pos == dicBufSize)
+              pos = 0;
+          }
+          while (--curLen != 0);
+        }
+      }
+    }
+  }
+  while (dicPos < limit && buf < bufLimit);
+
+  NORMALIZE;
+  
+  p->buf = buf;
+  p->range = range;
+  p->code = code;
   p->remainLen = (UInt32)len;
-  p->dicPos = dicPos; 
-  p->processedPos = processedPos; 
-  p->reps[0] = rep0; 
-  p->reps[1] = rep1; 
-  p->reps[2] = rep2; 
-  p->reps[3] = rep3; 
+  p->dicPos = dicPos;
+  p->processedPos = processedPos;
+  p->reps[0] = rep0;
+  p->reps[1] = rep1;
+  p->reps[2] = rep2;
+  p->reps[3] = rep3;
   p->state = (UInt32)state;
- 
-  return SZ_OK; 
-} 
+
+  return SZ_OK;
+}
 #endif
- 
-static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) 
-{ 
-  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) 
-  { 
-    Byte *dic = p->dic; 
-    SizeT dicPos = p->dicPos; 
-    SizeT dicBufSize = p->dicBufSize; 
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  {
+    Byte *dic = p->dic;
+    SizeT dicPos = p->dicPos;
+    SizeT dicBufSize = p->dicBufSize;
     unsigned len = (unsigned)p->remainLen;
-    SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ 
-    SizeT rem = limit - dicPos; 
-    if (rem < len) 
-      len = (unsigned)(rem); 
- 
-    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) 
-      p->checkDicSize = p->prop.dicSize; 
- 
+    SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+    SizeT rem = limit - dicPos;
+    if (rem < len)
+      len = (unsigned)(rem);
+
+    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+      p->checkDicSize = p->prop.dicSize;
+
     p->processedPos += (UInt32)len;
     p->remainLen -= (UInt32)len;
-    while (len != 0) 
-    { 
-      len--; 
-      dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 
-      dicPos++; 
-    } 
-    p->dicPos = dicPos; 
-  } 
-} 
- 
+    while (len != 0)
+    {
+      len--;
+      dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+      dicPos++;
+    }
+    p->dicPos = dicPos;
+  }
+}
+
 
 #define kRange0 0xFFFFFFFF
 #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
@@ -621,265 +621,265 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
   #error Stop_Compiling_Bad_LZMA_Check
 #endif
 
-static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) 
-{ 
-  do 
-  { 
-    SizeT limit2 = limit; 
-    if (p->checkDicSize == 0) 
-    { 
-      UInt32 rem = p->prop.dicSize - p->processedPos; 
-      if (limit - p->dicPos > rem) 
-        limit2 = p->dicPos + rem; 
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  do
+  {
+    SizeT limit2 = limit;
+    if (p->checkDicSize == 0)
+    {
+      UInt32 rem = p->prop.dicSize - p->processedPos;
+      if (limit - p->dicPos > rem)
+        limit2 = p->dicPos + rem;
 
       if (p->processedPos == 0)
         if (p->code >= kBadRepCode)
           return SZ_ERROR_DATA;
-    } 
+    }
 
     RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
-     
-    if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) 
-      p->checkDicSize = p->prop.dicSize; 
-     
-    LzmaDec_WriteRem(p, limit); 
-  } 
-  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); 
- 
-  return 0; 
-} 
- 
-typedef enum 
-{ 
-  DUMMY_ERROR, /* unexpected end of input stream */ 
-  DUMMY_LIT, 
-  DUMMY_MATCH, 
-  DUMMY_REP 
-} ELzmaDummy; 
- 
-static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) 
-{ 
-  UInt32 range = p->range; 
-  UInt32 code = p->code; 
-  const Byte *bufLimit = buf + inSize; 
+    
+    if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+      p->checkDicSize = p->prop.dicSize;
+    
+    LzmaDec_WriteRem(p, limit);
+  }
+  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+  return 0;
+}
+
+typedef enum
+{
+  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_LIT,
+  DUMMY_MATCH,
+  DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+  const Byte *bufLimit = buf + inSize;
   const CLzmaProb *probs = GET_PROBS;
   unsigned state = (unsigned)p->state;
-  ELzmaDummy res; 
- 
-  { 
-    const CLzmaProb *prob; 
-    UInt32 bound; 
-    unsigned ttt; 
+  ELzmaDummy res;
+
+  {
+    const CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
     unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
- 
+
     prob = probs + IsMatch + COMBINED_PS_STATE;
-    IF_BIT_0_CHECK(prob) 
-    { 
-      UPDATE_0_CHECK 
- 
-      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ 
- 
-      prob = probs + Literal; 
-      if (p->checkDicSize != 0 || p->processedPos != 0) 
-        prob += ((UInt32)LZMA_LIT_SIZE * 
-            ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + 
-            (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); 
- 
-      if (state < kNumLitStates) 
-      { 
-        unsigned symbol = 1; 
-        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); 
-      } 
-      else 
-      { 
-        unsigned matchByte = p->dic[p->dicPos - p->reps[0] + 
-            (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; 
-        unsigned offs = 0x100; 
-        unsigned symbol = 1; 
-        do 
-        { 
-          unsigned bit; 
-          const CLzmaProb *probLit; 
+    IF_BIT_0_CHECK(prob)
+    {
+      UPDATE_0_CHECK
+
+      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+      prob = probs + Literal;
+      if (p->checkDicSize != 0 || p->processedPos != 0)
+        prob += ((UInt32)LZMA_LIT_SIZE *
+            ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+            (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+      if (state < kNumLitStates)
+      {
+        unsigned symbol = 1;
+        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+            (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+        unsigned offs = 0x100;
+        unsigned symbol = 1;
+        do
+        {
+          unsigned bit;
+          const CLzmaProb *probLit;
           matchByte += matchByte;
           bit = offs;
           offs &= matchByte;
           probLit = prob + (offs + bit + symbol);
           GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
-        } 
-        while (symbol < 0x100); 
-      } 
-      res = DUMMY_LIT; 
-    } 
-    else 
-    { 
-      unsigned len; 
-      UPDATE_1_CHECK; 
- 
-      prob = probs + IsRep + state; 
-      IF_BIT_0_CHECK(prob) 
-      { 
-        UPDATE_0_CHECK; 
-        state = 0; 
-        prob = probs + LenCoder; 
-        res = DUMMY_MATCH; 
-      } 
-      else 
-      { 
-        UPDATE_1_CHECK; 
-        res = DUMMY_REP; 
-        prob = probs + IsRepG0 + state; 
-        IF_BIT_0_CHECK(prob) 
-        { 
-          UPDATE_0_CHECK; 
+        }
+        while (symbol < 0x100);
+      }
+      res = DUMMY_LIT;
+    }
+    else
+    {
+      unsigned len;
+      UPDATE_1_CHECK;
+
+      prob = probs + IsRep + state;
+      IF_BIT_0_CHECK(prob)
+      {
+        UPDATE_0_CHECK;
+        state = 0;
+        prob = probs + LenCoder;
+        res = DUMMY_MATCH;
+      }
+      else
+      {
+        UPDATE_1_CHECK;
+        res = DUMMY_REP;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0_CHECK(prob)
+        {
+          UPDATE_0_CHECK;
           prob = probs + IsRep0Long + COMBINED_PS_STATE;
-          IF_BIT_0_CHECK(prob) 
-          { 
-            UPDATE_0_CHECK; 
-            NORMALIZE_CHECK; 
-            return DUMMY_REP; 
-          } 
-          else 
-          { 
-            UPDATE_1_CHECK; 
-          } 
-        } 
-        else 
-        { 
-          UPDATE_1_CHECK; 
-          prob = probs + IsRepG1 + state; 
-          IF_BIT_0_CHECK(prob) 
-          { 
-            UPDATE_0_CHECK; 
-          } 
-          else 
-          { 
-            UPDATE_1_CHECK; 
-            prob = probs + IsRepG2 + state; 
-            IF_BIT_0_CHECK(prob) 
-            { 
-              UPDATE_0_CHECK; 
-            } 
-            else 
-            { 
-              UPDATE_1_CHECK; 
-            } 
-          } 
-        } 
-        state = kNumStates; 
-        prob = probs + RepLenCoder; 
-      } 
-      { 
-        unsigned limit, offset; 
-        const CLzmaProb *probLen = prob + LenChoice; 
-        IF_BIT_0_CHECK(probLen) 
-        { 
-          UPDATE_0_CHECK; 
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+            NORMALIZE_CHECK;
+            return DUMMY_REP;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+          }
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0_CHECK(prob)
+            {
+              UPDATE_0_CHECK;
+            }
+            else
+            {
+              UPDATE_1_CHECK;
+            }
+          }
+        }
+        state = kNumStates;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        const CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0_CHECK(probLen)
+        {
+          UPDATE_0_CHECK;
           probLen = prob + LenLow + GET_LEN_STATE;
-          offset = 0; 
-          limit = 1 << kLenNumLowBits; 
-        } 
-        else 
-        { 
-          UPDATE_1_CHECK; 
-          probLen = prob + LenChoice2; 
-          IF_BIT_0_CHECK(probLen) 
-          { 
-            UPDATE_0_CHECK; 
+          offset = 0;
+          limit = 1 << kLenNumLowBits;
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          probLen = prob + LenChoice2;
+          IF_BIT_0_CHECK(probLen)
+          {
+            UPDATE_0_CHECK;
             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
-            offset = kLenNumLowSymbols; 
+            offset = kLenNumLowSymbols;
             limit = 1 << kLenNumLowBits;
-          } 
-          else 
-          { 
-            UPDATE_1_CHECK; 
-            probLen = prob + LenHigh; 
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            probLen = prob + LenHigh;
             offset = kLenNumLowSymbols * 2;
-            limit = 1 << kLenNumHighBits; 
-          } 
-        } 
-        TREE_DECODE_CHECK(probLen, limit, len); 
-        len += offset; 
-      } 
- 
-      if (state < 4) 
-      { 
-        unsigned posSlot; 
-        prob = probs + PosSlot + 
+            limit = 1 << kLenNumHighBits;
+          }
+        }
+        TREE_DECODE_CHECK(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        unsigned posSlot;
+        prob = probs + PosSlot +
             ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
-            kNumPosSlotBits); 
-        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); 
-        if (posSlot >= kStartPosModelIndex) 
-        { 
-          unsigned numDirectBits = ((posSlot >> 1) - 1); 
- 
-          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ 
- 
-          if (posSlot < kEndPosModelIndex) 
-          { 
+            kNumPosSlotBits);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+          if (posSlot < kEndPosModelIndex)
+          {
             prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
-          } 
-          else 
-          { 
-            numDirectBits -= kNumAlignBits; 
-            do 
-            { 
-              NORMALIZE_CHECK 
-              range >>= 1; 
-              code -= range & (((code - range) >> 31) - 1); 
-              /* if (code >= range) code -= range; */ 
-            } 
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE_CHECK
+              range >>= 1;
+              code -= range & (((code - range) >> 31) - 1);
+              /* if (code >= range) code -= range; */
+            }
             while (--numDirectBits);
-            prob = probs + Align; 
-            numDirectBits = kNumAlignBits; 
-          } 
-          { 
-            unsigned i = 1; 
+            prob = probs + Align;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            unsigned i = 1;
             unsigned m = 1;
-            do 
-            { 
+            do
+            {
               REV_BIT_CHECK(prob, i, m);
-            } 
+            }
             while (--numDirectBits);
-          } 
-        } 
-      } 
-    } 
-  } 
-  NORMALIZE_CHECK; 
-  return res; 
-} 
- 
- 
+          }
+        }
+      }
+    }
+  }
+  NORMALIZE_CHECK;
+  return res;
+}
+
+
 void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
-{ 
+{
   p->remainLen = kMatchSpecLenStart + 1;
-  p->tempBufSize = 0; 
- 
-  if (initDic) 
-  { 
-    p->processedPos = 0; 
-    p->checkDicSize = 0; 
+  p->tempBufSize = 0;
+
+  if (initDic)
+  {
+    p->processedPos = 0;
+    p->checkDicSize = 0;
     p->remainLen = kMatchSpecLenStart + 2;
-  } 
-  if (initState) 
+  }
+  if (initState)
     p->remainLen = kMatchSpecLenStart + 2;
-} 
- 
-void LzmaDec_Init(CLzmaDec *p) 
-{ 
-  p->dicPos = 0; 
-  LzmaDec_InitDicAndState(p, True, True); 
-} 
- 
- 
-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, 
-    ELzmaFinishMode finishMode, ELzmaStatus *status) 
-{ 
-  SizeT inSize = *srcLen; 
-  (*srcLen) = 0; 
-   
-  *status = LZMA_STATUS_NOT_SPECIFIED; 
- 
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+  p->dicPos = 0;
+  LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+    ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  (*srcLen) = 0;
+  
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
   if (p->remainLen > kMatchSpecLenStart)
   {
     for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
@@ -915,271 +915,271 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
 
   LzmaDec_WriteRem(p, dicLimit);
 
-  while (p->remainLen != kMatchSpecLenStart) 
-  { 
+  while (p->remainLen != kMatchSpecLenStart)
+  {
       int checkEndMarkNow = 0;
- 
-      if (p->dicPos >= dicLimit) 
-      { 
-        if (p->remainLen == 0 && p->code == 0) 
-        { 
-          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; 
-          return SZ_OK; 
-        } 
-        if (finishMode == LZMA_FINISH_ANY) 
-        { 
-          *status = LZMA_STATUS_NOT_FINISHED; 
-          return SZ_OK; 
-        } 
-        if (p->remainLen != 0) 
-        { 
-          *status = LZMA_STATUS_NOT_FINISHED; 
-          return SZ_ERROR_DATA; 
-        } 
-        checkEndMarkNow = 1; 
-      } 
- 
-      if (p->tempBufSize == 0) 
-      { 
-        SizeT processed; 
-        const Byte *bufLimit; 
-        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) 
-        { 
-          int dummyRes = LzmaDec_TryDummy(p, src, inSize); 
-          if (dummyRes == DUMMY_ERROR) 
-          { 
-            memcpy(p->tempBuf, src, inSize); 
-            p->tempBufSize = (unsigned)inSize; 
-            (*srcLen) += inSize; 
-            *status = LZMA_STATUS_NEEDS_MORE_INPUT; 
-            return SZ_OK; 
-          } 
-          if (checkEndMarkNow && dummyRes != DUMMY_MATCH) 
-          { 
-            *status = LZMA_STATUS_NOT_FINISHED; 
-            return SZ_ERROR_DATA; 
-          } 
-          bufLimit = src; 
-        } 
-        else 
-          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; 
-        p->buf = src; 
-        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) 
-          return SZ_ERROR_DATA; 
-        processed = (SizeT)(p->buf - src); 
-        (*srcLen) += processed; 
-        src += processed; 
-        inSize -= processed; 
-      } 
-      else 
-      { 
-        unsigned rem = p->tempBufSize, lookAhead = 0; 
-        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) 
-          p->tempBuf[rem++] = src[lookAhead++]; 
-        p->tempBufSize = rem; 
-        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) 
-        { 
+
+      if (p->dicPos >= dicLimit)
+      {
+        if (p->remainLen == 0 && p->code == 0)
+        {
+          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+          return SZ_OK;
+        }
+        if (finishMode == LZMA_FINISH_ANY)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_OK;
+        }
+        if (p->remainLen != 0)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_ERROR_DATA;
+        }
+        checkEndMarkNow = 1;
+      }
+
+      if (p->tempBufSize == 0)
+      {
+        SizeT processed;
+        const Byte *bufLimit;
+        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            memcpy(p->tempBuf, src, inSize);
+            p->tempBufSize = (unsigned)inSize;
+            (*srcLen) += inSize;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+          bufLimit = src;
+        }
+        else
+          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+        p->buf = src;
+        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
+          return SZ_ERROR_DATA;
+        processed = (SizeT)(p->buf - src);
+        (*srcLen) += processed;
+        src += processed;
+        inSize -= processed;
+      }
+      else
+      {
+        unsigned rem = p->tempBufSize, lookAhead = 0;
+        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+          p->tempBuf[rem++] = src[lookAhead++];
+        p->tempBufSize = rem;
+        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
           int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
-          if (dummyRes == DUMMY_ERROR) 
-          { 
+          if (dummyRes == DUMMY_ERROR)
+          {
             (*srcLen) += (SizeT)lookAhead;
-            *status = LZMA_STATUS_NEEDS_MORE_INPUT; 
-            return SZ_OK; 
-          } 
-          if (checkEndMarkNow && dummyRes != DUMMY_MATCH) 
-          { 
-            *status = LZMA_STATUS_NOT_FINISHED; 
-            return SZ_ERROR_DATA; 
-          } 
-        } 
-        p->buf = p->tempBuf; 
-        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) 
-          return SZ_ERROR_DATA; 
-         
-        { 
-          unsigned kkk = (unsigned)(p->buf - p->tempBuf); 
-          if (rem < kkk) 
-            return SZ_ERROR_FAIL; /* some internal error */ 
-          rem -= kkk; 
-          if (lookAhead < rem) 
-            return SZ_ERROR_FAIL; /* some internal error */ 
-          lookAhead -= rem; 
-        } 
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+        }
+        p->buf = p->tempBuf;
+        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
+          return SZ_ERROR_DATA;
+        
+        {
+          unsigned kkk = (unsigned)(p->buf - p->tempBuf);
+          if (rem < kkk)
+            return SZ_ERROR_FAIL; /* some internal error */
+          rem -= kkk;
+          if (lookAhead < rem)
+            return SZ_ERROR_FAIL; /* some internal error */
+          lookAhead -= rem;
+        }
         (*srcLen) += (SizeT)lookAhead;
-        src += lookAhead; 
+        src += lookAhead;
         inSize -= (SizeT)lookAhead;
-        p->tempBufSize = 0; 
-      } 
-  } 
+        p->tempBufSize = 0;
+      }
+  }
   
   if (p->code != 0)
     return SZ_ERROR_DATA;
   *status = LZMA_STATUS_FINISHED_WITH_MARK;
   return SZ_OK;
-} 
- 
+}
 
-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) 
-{ 
-  SizeT outSize = *destLen; 
-  SizeT inSize = *srcLen; 
-  *srcLen = *destLen = 0; 
-  for (;;) 
-  { 
-    SizeT inSizeCur = inSize, outSizeCur, dicPos; 
-    ELzmaFinishMode curFinishMode; 
-    SRes res; 
-    if (p->dicPos == p->dicBufSize) 
-      p->dicPos = 0; 
-    dicPos = p->dicPos; 
-    if (outSize > p->dicBufSize - dicPos) 
-    { 
-      outSizeCur = p->dicBufSize; 
-      curFinishMode = LZMA_FINISH_ANY; 
-    } 
-    else 
-    { 
-      outSizeCur = dicPos + outSize; 
-      curFinishMode = finishMode; 
-    } 
- 
-    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); 
-    src += inSizeCur; 
-    inSize -= inSizeCur; 
-    *srcLen += inSizeCur; 
-    outSizeCur = p->dicPos - dicPos; 
-    memcpy(dest, p->dic + dicPos, outSizeCur); 
-    dest += outSizeCur; 
-    outSize -= outSizeCur; 
-    *destLen += outSizeCur; 
-    if (res != 0) 
-      return res; 
-    if (outSizeCur == 0 || outSize == 0) 
-      return SZ_OK; 
-  } 
-} 
- 
-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) 
-{ 
-  ISzAlloc_Free(alloc, p->probs); 
-  p->probs = NULL; 
-} 
- 
-static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) 
-{ 
-  ISzAlloc_Free(alloc, p->dic); 
-  p->dic = NULL; 
-} 
- 
-void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) 
-{ 
-  LzmaDec_FreeProbs(p, alloc); 
-  LzmaDec_FreeDict(p, alloc); 
-} 
- 
-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) 
-{ 
-  UInt32 dicSize; 
-  Byte d; 
-   
-  if (size < LZMA_PROPS_SIZE) 
-    return SZ_ERROR_UNSUPPORTED; 
-  else 
-    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); 
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen;
+  SizeT inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT inSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->dicPos == p->dicBufSize)
+      p->dicPos = 0;
+    dicPos = p->dicPos;
+    if (outSize > p->dicBufSize - dicPos)
+    {
+      outSizeCur = p->dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+    src += inSizeCur;
+    inSize -= inSizeCur;
+    *srcLen += inSizeCur;
+    outSizeCur = p->dicPos - dicPos;
+    memcpy(dest, p->dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->probs);
+  p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->dic);
+  p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  LzmaDec_FreeProbs(p, alloc);
+  LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+  UInt32 dicSize;
+  Byte d;
   
-  if (dicSize < LZMA_DIC_MIN) 
-    dicSize = LZMA_DIC_MIN; 
-  p->dicSize = dicSize; 
- 
-  d = data[0]; 
-  if (d >= (9 * 5 * 5)) 
-    return SZ_ERROR_UNSUPPORTED; 
- 
+  if (size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_UNSUPPORTED;
+  else
+    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+ 
+  if (dicSize < LZMA_DIC_MIN)
+    dicSize = LZMA_DIC_MIN;
+  p->dicSize = dicSize;
+
+  d = data[0];
+  if (d >= (9 * 5 * 5))
+    return SZ_ERROR_UNSUPPORTED;
+
   p->lc = (Byte)(d % 9);
-  d /= 9; 
+  d /= 9;
   p->pb = (Byte)(d / 5);
   p->lp = (Byte)(d % 5);
- 
-  return SZ_OK; 
-} 
- 
-static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) 
-{ 
-  UInt32 numProbs = LzmaProps_GetNumProbs(propNew); 
-  if (!p->probs || numProbs != p->numProbs) 
-  { 
-    LzmaDec_FreeProbs(p, alloc); 
-    p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); 
-    if (!p->probs) 
-      return SZ_ERROR_MEM; 
+
+  return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+  if (!p->probs || numProbs != p->numProbs)
+  {
+    LzmaDec_FreeProbs(p, alloc);
+    p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+    if (!p->probs)
+      return SZ_ERROR_MEM;
     p->probs_1664 = p->probs + 1664;
     p->numProbs = numProbs;
-  } 
-  return SZ_OK; 
-} 
- 
-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) 
-{ 
-  CLzmaProps propNew; 
-  RINOK(LzmaProps_Decode(&propNew, props, propsSize)); 
-  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); 
-  p->prop = propNew; 
-  return SZ_OK; 
-} 
- 
-SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) 
-{ 
-  CLzmaProps propNew; 
-  SizeT dicBufSize; 
-  RINOK(LzmaProps_Decode(&propNew, props, propsSize)); 
-  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); 
- 
-  { 
-    UInt32 dictSize = propNew.dicSize; 
-    SizeT mask = ((UInt32)1 << 12) - 1; 
-         if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; 
-    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; 
-    dicBufSize = ((SizeT)dictSize + mask) & ~mask; 
-    if (dicBufSize < dictSize) 
-      dicBufSize = dictSize; 
-  } 
- 
-  if (!p->dic || dicBufSize != p->dicBufSize) 
-  { 
-    LzmaDec_FreeDict(p, alloc); 
-    p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); 
-    if (!p->dic) 
-    { 
-      LzmaDec_FreeProbs(p, alloc); 
-      return SZ_ERROR_MEM; 
-    } 
-  } 
-  p->dicBufSize = dicBufSize; 
-  p->prop = propNew; 
-  return SZ_OK; 
-} 
- 
-SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, 
-    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 
-    ELzmaStatus *status, ISzAllocPtr alloc) 
-{ 
-  CLzmaDec p; 
-  SRes res; 
-  SizeT outSize = *destLen, inSize = *srcLen; 
-  *destLen = *srcLen = 0; 
-  *status = LZMA_STATUS_NOT_SPECIFIED; 
-  if (inSize < RC_INIT_SIZE) 
-    return SZ_ERROR_INPUT_EOF; 
-  LzmaDec_Construct(&p); 
-  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); 
-  p.dic = dest; 
-  p.dicBufSize = outSize; 
-  LzmaDec_Init(&p); 
-  *srcLen = inSize; 
-  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); 
-  *destLen = p.dicPos; 
-  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) 
-    res = SZ_ERROR_INPUT_EOF; 
-  LzmaDec_FreeProbs(&p, alloc); 
-  return res; 
-} 
+  }
+  return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+  CLzmaProps propNew;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+  CLzmaProps propNew;
+  SizeT dicBufSize;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+
+  {
+    UInt32 dictSize = propNew.dicSize;
+    SizeT mask = ((UInt32)1 << 12) - 1;
+         if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+    dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+    if (dicBufSize < dictSize)
+      dicBufSize = dictSize;
+  }
+
+  if (!p->dic || dicBufSize != p->dicBufSize)
+  {
+    LzmaDec_FreeDict(p, alloc);
+    p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+    if (!p->dic)
+    {
+      LzmaDec_FreeProbs(p, alloc);
+      return SZ_ERROR_MEM;
+    }
+  }
+  p->dicBufSize = dicBufSize;
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAllocPtr alloc)
+{
+  CLzmaDec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  if (inSize < RC_INIT_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+  LzmaDec_Construct(&p);
+  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+  p.dic = dest;
+  p.dicBufSize = outSize;
+  LzmaDec_Init(&p);
+  *srcLen = inSize;
+  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  LzmaDec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/contrib/libs/lzmasdk/LzmaDec.h b/contrib/libs/lzmasdk/LzmaDec.h
index 4d922f23dc..1f0927ab13 100644
--- a/contrib/libs/lzmasdk/LzmaDec.h
+++ b/contrib/libs/lzmasdk/LzmaDec.h
@@ -1,234 +1,234 @@
-/* LzmaDec.h -- LZMA Decoder 
+/* LzmaDec.h -- LZMA Decoder
 2018-04-21 : Igor Pavlov : Public domain */
- 
-#ifndef __LZMA_DEC_H 
-#define __LZMA_DEC_H 
- 
-#include "7zTypes.h" 
- 
-EXTERN_C_BEGIN 
- 
-/* #define _LZMA_PROB32 */ 
-/* _LZMA_PROB32 can increase the speed on some CPUs, 
-   but memory usage for CLzmaDec::probs will be doubled in that case */ 
- 
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
 typedef
-#ifdef _LZMA_PROB32 
+#ifdef _LZMA_PROB32
   UInt32
-#else 
+#else
   UInt16
-#endif 
+#endif
   CLzmaProb;
- 
- 
-/* ---------- LZMA Properties ---------- */ 
- 
-#define LZMA_PROPS_SIZE 5 
- 
-typedef struct _CLzmaProps 
-{ 
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
   Byte lc;
   Byte lp;
   Byte pb;
   Byte _pad_;
-  UInt32 dicSize; 
-} CLzmaProps; 
- 
-/* LzmaProps_Decode - decodes properties 
-Returns: 
-  SZ_OK 
-  SZ_ERROR_UNSUPPORTED - Unsupported properties 
-*/ 
- 
-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); 
- 
- 
-/* ---------- LZMA Decoder state ---------- */ 
- 
-/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. 
-   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ 
- 
-#define LZMA_REQUIRED_INPUT_MAX 20 
- 
-typedef struct 
-{ 
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
   /* Don't change this structure. ASM code can use it. */
-  CLzmaProps prop; 
-  CLzmaProb *probs; 
+  CLzmaProps prop;
+  CLzmaProb *probs;
   CLzmaProb *probs_1664;
-  Byte *dic; 
+  Byte *dic;
   SizeT dicBufSize;
   SizeT dicPos;
-  const Byte *buf; 
+  const Byte *buf;
   UInt32 range;
   UInt32 code;
-  UInt32 processedPos; 
-  UInt32 checkDicSize; 
-  UInt32 reps[4]; 
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  UInt32 reps[4];
   UInt32 state;
   UInt32 remainLen;
 
-  UInt32 numProbs; 
-  unsigned tempBufSize; 
-  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; 
-} CLzmaDec; 
- 
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
 #define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
- 
-void LzmaDec_Init(CLzmaDec *p); 
- 
-/* There are two types of LZMA streams: 
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
      - Stream with end mark. That end mark adds about 6 bytes to compressed size.
      - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
- 
-typedef enum 
-{ 
-  LZMA_FINISH_ANY,   /* finish at any point */ 
-  LZMA_FINISH_END    /* block must be finished at the end */ 
-} ELzmaFinishMode; 
- 
-/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! 
- 
-   You must use LZMA_FINISH_END, when you know that current output buffer 
-   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. 
- 
-   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, 
-   and output value of destLen will be less than output buffer size limit. 
-   You can check status result also. 
- 
-   You can use multiple checks to test data integrity after full decompression: 
-     1) Check Result and "status" variable. 
-     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. 
-     3) Check that output(srcLen) = compressedSize, if you know real compressedSize. 
-        You must use correct finish mode in that case. */ 
- 
-typedef enum 
-{ 
-  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */ 
-  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */ 
-  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */ 
-  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */ 
-  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */ 
-} ELzmaStatus; 
- 
-/* ELzmaStatus is used only as output value for function call */ 
- 
- 
-/* ---------- Interfaces ---------- */ 
- 
-/* There are 3 levels of interfaces: 
-     1) Dictionary Interface 
-     2) Buffer Interface 
-     3) One Call Interface 
-   You can select any of these interfaces, but don't mix functions from different 
-   groups for same object. */ 
- 
- 
-/* There are two variants to allocate state for Dictionary Interface: 
-     1) LzmaDec_Allocate / LzmaDec_Free 
-     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs 
-   You can use variant 2, if you set dictionary buffer manually. 
-   For Buffer Interface you must always use variant 1. 
- 
-LzmaDec_Allocate* can return: 
-  SZ_OK 
-  SZ_ERROR_MEM         - Memory allocation error 
-  SZ_ERROR_UNSUPPORTED - Unsupported properties 
-*/ 
-    
-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); 
-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); 
- 
+
+typedef enum
+{
+  LZMA_FINISH_ANY,   /* finish at any point */
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+        You must use correct finish mode in that case. */
+
+typedef enum
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually.
+   For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+   
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
 SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
 void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
- 
-/* ---------- Dictionary Interface ---------- */ 
- 
-/* You can use it, if you want to eliminate the overhead for data copying from 
-   dictionary to some other external buffer. 
-   You must work with CLzmaDec variables directly in this interface. 
- 
-   STEPS: 
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
      LzmaDec_Construct()
-     LzmaDec_Allocate() 
-     for (each new stream) 
-     { 
-       LzmaDec_Init() 
-       while (it needs more decompression) 
-       { 
-         LzmaDec_DecodeToDic() 
-         use data from CLzmaDec::dic and update CLzmaDec::dicPos 
-       } 
-     } 
-     LzmaDec_Free() 
-*/ 
- 
-/* LzmaDec_DecodeToDic 
-    
-   The decoding to internal dictionary buffer (CLzmaDec::dic). 
-   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! 
- 
-finishMode: 
-  It has meaning only if the decoding reaches output limit (dicLimit). 
-  LZMA_FINISH_ANY - Decode just dicLimit bytes. 
-  LZMA_FINISH_END - Stream must be finished after dicLimit. 
- 
-Returns: 
-  SZ_OK 
-    status: 
-      LZMA_STATUS_FINISHED_WITH_MARK 
-      LZMA_STATUS_NOT_FINISHED 
-      LZMA_STATUS_NEEDS_MORE_INPUT 
-      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK 
-  SZ_ERROR_DATA - Data error 
-*/ 
- 
-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, 
-    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); 
- 
- 
-/* ---------- Buffer Interface ---------- */ 
- 
-/* It's zlib-like interface. 
-   See LzmaDec_DecodeToDic description for information about STEPS and return results, 
-   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need 
-   to work with CLzmaDec variables manually. 
- 
-finishMode: 
-  It has meaning only if the decoding reaches output limit (*destLen). 
-  LZMA_FINISH_ANY - Decode just destLen bytes. 
-  LZMA_FINISH_END - Stream must be finished after (*destLen). 
-*/ 
- 
-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, 
-    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); 
- 
- 
-/* ---------- One Call Interface ---------- */ 
- 
-/* LzmaDecode 
- 
-finishMode: 
-  It has meaning only if the decoding reaches output limit (*destLen). 
-  LZMA_FINISH_ANY - Decode just destLen bytes. 
-  LZMA_FINISH_END - Stream must be finished after (*destLen). 
- 
-Returns: 
-  SZ_OK 
-    status: 
-      LZMA_STATUS_FINISHED_WITH_MARK 
-      LZMA_STATUS_NOT_FINISHED 
-      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK 
-  SZ_ERROR_DATA - Data error 
-  SZ_ERROR_MEM  - Memory allocation error 
-  SZ_ERROR_UNSUPPORTED - Unsupported properties 
-  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). 
-*/ 
- 
-SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, 
-    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 
-    ELzmaStatus *status, ISzAllocPtr alloc); 
- 
-EXTERN_C_END 
- 
-#endif 
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+   
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/contrib/libs/lzmasdk/LzmaEnc.c b/contrib/libs/lzmasdk/LzmaEnc.c
index f6a6795d30..0ee2922390 100644
--- a/contrib/libs/lzmasdk/LzmaEnc.c
+++ b/contrib/libs/lzmasdk/LzmaEnc.c
@@ -1,180 +1,180 @@
-/* LzmaEnc.c -- LZMA Encoder 
+/* LzmaEnc.c -- LZMA Encoder
 2019-01-10: Igor Pavlov : Public domain */
- 
-#include "Precomp.h" 
- 
-#include <string.h> 
- 
-/* #define SHOW_STAT */ 
-/* #define SHOW_STAT2 */ 
- 
-#if defined(SHOW_STAT) || defined(SHOW_STAT2) 
-#include <stdio.h> 
-#endif 
- 
-#include "LzmaEnc.h" 
- 
-#include "LzFind.h" 
-#ifndef _7ZIP_ST 
-#include "LzFindMt.h" 
-#endif 
- 
-#ifdef SHOW_STAT 
-static unsigned g_STAT_OFFSET = 0; 
-#endif 
- 
-#define kLzmaMaxHistorySize ((UInt32)3 << 29) 
-/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ 
- 
-#define kNumTopBits 24 
-#define kTopValue ((UInt32)1 << kNumTopBits) 
- 
-#define kNumBitModelTotalBits 11 
-#define kBitModelTotal (1 << kNumBitModelTotalBits) 
-#define kNumMoveBits 5 
-#define kProbInitValue (kBitModelTotal >> 1) 
- 
-#define kNumMoveReducingBits 4 
-#define kNumBitPriceShiftBits 4 
-#define kBitPrice (1 << kNumBitPriceShiftBits) 
- 
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#if defined(SHOW_STAT) || defined(SHOW_STAT2)
+#include <stdio.h>
+#endif
+
+#include "LzmaEnc.h"
+
+#include "LzFind.h"
+#ifndef _7ZIP_ST
+#include "LzFindMt.h"
+#endif
+
+#ifdef SHOW_STAT
+static unsigned g_STAT_OFFSET = 0;
+#endif
+
+#define kLzmaMaxHistorySize ((UInt32)3 << 29)
+/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+#define kBitPrice (1 << kNumBitPriceShiftBits)
+
 #define REP_LEN_COUNT 64
 
-void LzmaEncProps_Init(CLzmaEncProps *p) 
-{ 
-  p->level = 5; 
-  p->dictSize = p->mc = 0; 
-  p->reduceSize = (UInt64)(Int64)-1; 
-  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; 
-  p->writeEndMark = 0; 
-} 
- 
-void LzmaEncProps_Normalize(CLzmaEncProps *p) 
-{ 
-  int level = p->level; 
-  if (level < 0) level = 5; 
-  p->level = level; 
-   
-  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); 
-  if (p->dictSize > p->reduceSize) 
-  { 
-    unsigned i; 
-    UInt32 reduceSize = (UInt32)p->reduceSize; 
-    for (i = 11; i <= 30; i++) 
-    { 
-      if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } 
-      if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } 
-    } 
-  } 
- 
-  if (p->lc < 0) p->lc = 3; 
-  if (p->lp < 0) p->lp = 0; 
-  if (p->pb < 0) p->pb = 2; 
- 
-  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); 
-  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); 
-  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); 
-  if (p->numHashBytes < 0) p->numHashBytes = 4; 
-  if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); 
-   
-  if (p->numThreads < 0) 
-    p->numThreads = 
-      #ifndef _7ZIP_ST 
-      ((p->btMode && p->algo) ? 2 : 1); 
-      #else 
-      1; 
-      #endif 
-} 
- 
-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) 
-{ 
-  CLzmaEncProps props = *props2; 
-  LzmaEncProps_Normalize(&props); 
-  return props.dictSize; 
-} 
- 
-#if (_MSC_VER >= 1400) 
-/* BSR code is fast for some new CPUs */ 
-/* #define LZMA_LOG_BSR */ 
-#endif 
- 
-#ifdef LZMA_LOG_BSR 
- 
-#define kDicLogSizeMaxCompress 32 
- 
-#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } 
- 
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+  p->level = 5;
+  p->dictSize = p->mc = 0;
+  p->reduceSize = (UInt64)(Int64)-1;
+  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+  p->writeEndMark = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+  int level = p->level;
+  if (level < 0) level = 5;
+  p->level = level;
+  
+  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
+  if (p->dictSize > p->reduceSize)
+  {
+    unsigned i;
+    UInt32 reduceSize = (UInt32)p->reduceSize;
+    for (i = 11; i <= 30; i++)
+    {
+      if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
+      if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
+    }
+  }
+
+  if (p->lc < 0) p->lc = 3;
+  if (p->lp < 0) p->lp = 0;
+  if (p->pb < 0) p->pb = 2;
+
+  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+  if (p->numHashBytes < 0) p->numHashBytes = 4;
+  if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+  
+  if (p->numThreads < 0)
+    p->numThreads =
+      #ifndef _7ZIP_ST
+      ((p->btMode && p->algo) ? 2 : 1);
+      #else
+      1;
+      #endif
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+  return props.dictSize;
+}
+
+#if (_MSC_VER >= 1400)
+/* BSR code is fast for some new CPUs */
+/* #define LZMA_LOG_BSR */
+#endif
+
+#ifdef LZMA_LOG_BSR
+
+#define kDicLogSizeMaxCompress 32
+
+#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
+
 static unsigned GetPosSlot1(UInt32 pos)
-{ 
+{
   unsigned res;
-  BSR2_RET(pos, res); 
-  return res; 
-} 
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } 
-#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } 
- 
-#else 
- 
-#define kNumLogBits (9 + sizeof(size_t) / 2) 
-/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ 
- 
-#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) 
- 
-static void LzmaEnc_FastPosInit(Byte *g_FastPos) 
-{ 
-  unsigned slot; 
-  g_FastPos[0] = 0; 
-  g_FastPos[1] = 1; 
-  g_FastPos += 2; 
-   
-  for (slot = 2; slot < kNumLogBits * 2; slot++) 
-  { 
-    size_t k = ((size_t)1 << ((slot >> 1) - 1)); 
-    size_t j; 
-    for (j = 0; j < k; j++) 
-      g_FastPos[j] = (Byte)slot; 
-    g_FastPos += k; 
-  } 
-} 
- 
-/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ 
-/* 
+  BSR2_RET(pos, res);
+  return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+
+#else
+
+#define kNumLogBits (9 + sizeof(size_t) / 2)
+/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
+
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+static void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+  unsigned slot;
+  g_FastPos[0] = 0;
+  g_FastPos[1] = 1;
+  g_FastPos += 2;
+  
+  for (slot = 2; slot < kNumLogBits * 2; slot++)
+  {
+    size_t k = ((size_t)1 << ((slot >> 1) - 1));
+    size_t j;
+    for (j = 0; j < k; j++)
+      g_FastPos[j] = (Byte)slot;
+    g_FastPos += k;
+  }
+}
+
+/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
+/*
 #define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
-  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ 
-  res = p->g_FastPos[pos >> zz] + (zz * 2); } 
-*/ 
- 
-/* 
+  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+/*
 #define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
-  (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ 
-  res = p->g_FastPos[pos >> zz] + (zz * 2); } 
-*/ 
- 
+  (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
 #define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
-  res = p->g_FastPos[pos >> zz] + (zz * 2); } 
- 
-/* 
-#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ 
-  p->g_FastPos[pos >> 6] + 12 : \ 
-  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } 
-*/ 
- 
-#define GetPosSlot1(pos) p->g_FastPos[pos] 
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } 
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+  p->g_FastPos[pos >> 6] + 12 : \
+  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
 #define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
- 
-#endif 
- 
- 
-#define LZMA_NUM_REPS 4 
- 
+
+#endif
+
+
+#define LZMA_NUM_REPS 4
+
 typedef UInt16 CState;
 typedef UInt16 CExtra;
- 
-typedef struct 
-{ 
-  UInt32 price; 
-  CState state; 
+
+typedef struct
+{
+  UInt32 price;
+  CState state;
   CExtra extra;
       // 0   : normal
       // 1   : LIT : MATCH
@@ -183,194 +183,194 @@ typedef struct
   UInt32 dist;
   UInt32 reps[LZMA_NUM_REPS];
 } COptimal;
- 
- 
+
+
 // 18.06
 #define kNumOpts (1 << 11)
 #define kPackReserve (kNumOpts * 8)
 // #define kNumOpts (1 << 12)
 // #define kPackReserve (1 + kNumOpts * 2)
- 
-#define kNumLenToPosStates 4 
-#define kNumPosSlotBits 6 
-#define kDicLogSizeMin 0 
-#define kDicLogSizeMax 32 
-#define kDistTableSizeMax (kDicLogSizeMax * 2) 
- 
-#define kNumAlignBits 4 
-#define kAlignTableSize (1 << kNumAlignBits) 
-#define kAlignMask (kAlignTableSize - 1) 
- 
-#define kStartPosModelIndex 4 
-#define kEndPosModelIndex 14 
-#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) 
- 
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+#define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
 typedef
-#ifdef _LZMA_PROB32 
+#ifdef _LZMA_PROB32
   UInt32
-#else 
+#else
   UInt16
-#endif 
+#endif
   CLzmaProb;
- 
-#define LZMA_PB_MAX 4 
-#define LZMA_LC_MAX 8 
-#define LZMA_LP_MAX 4 
- 
-#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) 
- 
-#define kLenNumLowBits 3 
-#define kLenNumLowSymbols (1 << kLenNumLowBits) 
-#define kLenNumHighBits 8 
-#define kLenNumHighSymbols (1 << kLenNumHighBits) 
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
 #define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
- 
-#define LZMA_MATCH_LEN_MIN 2 
-#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) 
- 
-#define kNumStates 12 
- 
- 
-typedef struct 
-{ 
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+
+typedef struct
+{
   CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
-  CLzmaProb high[kLenNumHighSymbols]; 
-} CLenEnc; 
- 
- 
-typedef struct 
-{ 
+  CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+
+typedef struct
+{
   unsigned tableSize;
-  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; 
+  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
   // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
   // UInt32 prices2[kLenNumSymbolsTotal];
-} CLenPriceEnc; 
- 
+} CLenPriceEnc;
+
 #define GET_PRICE_LEN(p, posState, len) \
     ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
- 
+
 /*
 #define GET_PRICE_LEN(p, posState, len) \
     ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
 */
 
-typedef struct 
-{ 
-  UInt32 range; 
+typedef struct
+{
+  UInt32 range;
   unsigned cache;
-  UInt64 low; 
-  UInt64 cacheSize; 
-  Byte *buf; 
-  Byte *bufLim; 
-  Byte *bufBase; 
-  ISeqOutStream *outStream; 
-  UInt64 processed; 
-  SRes res; 
-} CRangeEnc; 
- 
- 
-typedef struct 
-{ 
-  CLzmaProb *litProbs; 
- 
+  UInt64 low;
+  UInt64 cacheSize;
+  Byte *buf;
+  Byte *bufLim;
+  Byte *bufBase;
+  ISeqOutStream *outStream;
+  UInt64 processed;
+  SRes res;
+} CRangeEnc;
+
+
+typedef struct
+{
+  CLzmaProb *litProbs;
+
   unsigned state;
-  UInt32 reps[LZMA_NUM_REPS]; 
- 
+  UInt32 reps[LZMA_NUM_REPS];
+
   CLzmaProb posAlignEncoder[1 << kNumAlignBits];
-  CLzmaProb isRep[kNumStates]; 
-  CLzmaProb isRepG0[kNumStates]; 
-  CLzmaProb isRepG1[kNumStates]; 
-  CLzmaProb isRepG2[kNumStates]; 
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
   CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
-  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; 
- 
-  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; 
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
   CLzmaProb posEncoders[kNumFullDistances];
-   
+  
   CLenEnc lenProbs;
   CLenEnc repLenProbs;
 
-} CSaveState; 
- 
- 
+} CSaveState;
+
+
 typedef UInt32 CProbPrice;
 
 
-typedef struct 
-{ 
-  void *matchFinderObj; 
-  IMatchFinder matchFinder; 
- 
+typedef struct
+{
+  void *matchFinderObj;
+  IMatchFinder matchFinder;
+
   unsigned optCur;
   unsigned optEnd;
- 
+
   unsigned longestMatchLen;
   unsigned numPairs;
-  UInt32 numAvail; 
- 
+  UInt32 numAvail;
+
   unsigned state;
   unsigned numFastBytes;
   unsigned additionalOffset;
-  UInt32 reps[LZMA_NUM_REPS]; 
+  UInt32 reps[LZMA_NUM_REPS];
   unsigned lpMask, pbMask;
   CLzmaProb *litProbs;
   CRangeEnc rc;
- 
+
   UInt32 backRes;
 
-  unsigned lc, lp, pb; 
-  unsigned lclp; 
- 
+  unsigned lc, lp, pb;
+  unsigned lclp;
+
   BoolInt fastMode;
   BoolInt writeEndMark;
   BoolInt finished;
   BoolInt multiThread;
   BoolInt needInit;
   // BoolInt _maxMode;
- 
-  UInt64 nowPos64; 
-   
+
+  UInt64 nowPos64;
+  
   unsigned matchPriceCount;
   // unsigned alignPriceCount;
   int repLenEncCounter;
- 
+
   unsigned distTableSize;
- 
-  UInt32 dictSize; 
-  SRes result; 
- 
-  #ifndef _7ZIP_ST 
+
+  UInt32 dictSize;
+  SRes result;
+
+  #ifndef _7ZIP_ST
   BoolInt mtMode;
   // begin of CMatchFinderMt is used in LZ thread
-  CMatchFinderMt matchFinderMt; 
+  CMatchFinderMt matchFinderMt;
   // end of CMatchFinderMt is used in BT and HASH threads
-  #endif 
- 
-  CMatchFinder matchFinderBase; 
- 
-  #ifndef _7ZIP_ST 
-  Byte pad[128]; 
-  #endif 
-   
+  #endif
+
+  CMatchFinder matchFinderBase;
+
+  #ifndef _7ZIP_ST
+  Byte pad[128];
+  #endif
+  
   // LZ thread
   CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
- 
-  UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; 
- 
+
+  UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+
   UInt32 alignPrices[kAlignTableSize];
-  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; 
-  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; 
- 
+  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+
   CLzmaProb posAlignEncoder[1 << kNumAlignBits];
-  CLzmaProb isRep[kNumStates]; 
-  CLzmaProb isRepG0[kNumStates]; 
-  CLzmaProb isRepG1[kNumStates]; 
-  CLzmaProb isRepG2[kNumStates]; 
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
   CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
-  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; 
-  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; 
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
   CLzmaProb posEncoders[kNumFullDistances];
-   
+  
   CLenEnc lenProbs;
   CLenEnc repLenProbs;
 
@@ -378,32 +378,32 @@ typedef struct
   Byte g_FastPos[1 << kNumLogBits];
   #endif
 
-  CLenPriceEnc lenEnc; 
-  CLenPriceEnc repLenEnc; 
- 
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
   COptimal opt[kNumOpts];
 
-  CSaveState saveState; 
- 
-  #ifndef _7ZIP_ST 
-  Byte pad2[128]; 
-  #endif 
-} CLzmaEnc; 
- 
- 
+  CSaveState saveState;
+
+  #ifndef _7ZIP_ST
+  Byte pad2[128];
+  #endif
+} CLzmaEnc;
+
+
 
 #define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
 
-void LzmaEnc_SaveState(CLzmaEncHandle pp) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  CSaveState *dest = &p->saveState; 
+void LzmaEnc_SaveState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CSaveState *dest = &p->saveState;
   
-  dest->state = p->state; 
+  dest->state = p->state;
   
   dest->lenProbs = p->lenProbs;
   dest->repLenProbs = p->repLenProbs;
- 
+
   COPY_ARR(dest, p, reps);
 
   COPY_ARR(dest, p, posAlignEncoder);
@@ -416,17 +416,17 @@ void LzmaEnc_SaveState(CLzmaEncHandle pp)
   COPY_ARR(dest, p, posSlotEncoder);
   COPY_ARR(dest, p, posEncoders);
 
-  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); 
-} 
- 
+  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));
+}
 
-void LzmaEnc_RestoreState(CLzmaEncHandle pp) 
-{ 
-  CLzmaEnc *dest = (CLzmaEnc *)pp; 
-  const CSaveState *p = &dest->saveState; 
 
-  dest->state = p->state; 
- 
+void LzmaEnc_RestoreState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *dest = (CLzmaEnc *)pp;
+  const CSaveState *p = &dest->saveState;
+
+  dest->state = p->state;
+
   dest->lenProbs = p->lenProbs;
   dest->repLenProbs = p->repLenProbs;
   
@@ -442,166 +442,166 @@ void LzmaEnc_RestoreState(CLzmaEncHandle pp)
   COPY_ARR(dest, p, posSlotEncoder);
   COPY_ARR(dest, p, posEncoders);
 
-  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); 
-} 
- 
+  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));
+}
 
 
-SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  CLzmaEncProps props = *props2; 
-  LzmaEncProps_Normalize(&props); 
- 
-  if (props.lc > LZMA_LC_MAX 
-      || props.lp > LZMA_LP_MAX 
-      || props.pb > LZMA_PB_MAX 
-      || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) 
-      || props.dictSize > kLzmaMaxHistorySize) 
-    return SZ_ERROR_PARAM; 
- 
-  p->dictSize = props.dictSize; 
-  { 
-    unsigned fb = props.fb; 
-    if (fb < 5) 
-      fb = 5; 
-    if (fb > LZMA_MATCH_LEN_MAX) 
-      fb = LZMA_MATCH_LEN_MAX; 
-    p->numFastBytes = fb; 
-  } 
-  p->lc = props.lc; 
-  p->lp = props.lp; 
-  p->pb = props.pb; 
-  p->fastMode = (props.algo == 0); 
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+
+  if (props.lc > LZMA_LC_MAX
+      || props.lp > LZMA_LP_MAX
+      || props.pb > LZMA_PB_MAX
+      || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
+      || props.dictSize > kLzmaMaxHistorySize)
+    return SZ_ERROR_PARAM;
+
+  p->dictSize = props.dictSize;
+  {
+    unsigned fb = props.fb;
+    if (fb < 5)
+      fb = 5;
+    if (fb > LZMA_MATCH_LEN_MAX)
+      fb = LZMA_MATCH_LEN_MAX;
+    p->numFastBytes = fb;
+  }
+  p->lc = props.lc;
+  p->lp = props.lp;
+  p->pb = props.pb;
+  p->fastMode = (props.algo == 0);
   // p->_maxMode = True;
-  p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); 
-  { 
+  p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
+  {
     unsigned numHashBytes = 4;
-    if (props.btMode) 
-    { 
-      if (props.numHashBytes < 2) 
-        numHashBytes = 2; 
-      else if (props.numHashBytes < 4) 
-        numHashBytes = props.numHashBytes; 
-    } 
-    p->matchFinderBase.numHashBytes = numHashBytes; 
-  } 
- 
-  p->matchFinderBase.cutValue = props.mc; 
- 
-  p->writeEndMark = props.writeEndMark; 
- 
-  #ifndef _7ZIP_ST 
-  /* 
-  if (newMultiThread != _multiThread) 
-  { 
-    ReleaseMatchFinder(); 
-    _multiThread = newMultiThread; 
-  } 
-  */ 
-  p->multiThread = (props.numThreads > 1); 
-  #endif 
- 
-  return SZ_OK; 
-} 
- 
- 
-void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  p->matchFinderBase.expectedDataSize = expectedDataSiize; 
-} 
- 
- 
+    if (props.btMode)
+    {
+      if (props.numHashBytes < 2)
+        numHashBytes = 2;
+      else if (props.numHashBytes < 4)
+        numHashBytes = props.numHashBytes;
+    }
+    p->matchFinderBase.numHashBytes = numHashBytes;
+  }
+
+  p->matchFinderBase.cutValue = props.mc;
+
+  p->writeEndMark = props.writeEndMark;
+
+  #ifndef _7ZIP_ST
+  /*
+  if (newMultiThread != _multiThread)
+  {
+    ReleaseMatchFinder();
+    _multiThread = newMultiThread;
+  }
+  */
+  p->multiThread = (props.numThreads > 1);
+  #endif
+
+  return SZ_OK;
+}
+
+
+void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.expectedDataSize = expectedDataSiize;
+}
+
+
 #define kState_Start 0
 #define kState_LitAfterMatch 4
 #define kState_LitAfterRep   5
 #define kState_MatchAfterLit 7
 #define kState_RepAfterLit   8
- 
+
 static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4,  5,  6,   4, 5};
 static const Byte kMatchNextStates[kNumStates]   = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
 static const Byte kRepNextStates[kNumStates]     = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
 static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
- 
+
 #define IsLitState(s) ((s) < 7)
 #define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
-#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) 
- 
-#define kInfinityPrice (1 << 30) 
- 
-static void RangeEnc_Construct(CRangeEnc *p) 
-{ 
-  p->outStream = NULL; 
-  p->bufBase = NULL; 
-} 
- 
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+  p->outStream = NULL;
+  p->bufBase = NULL;
+}
+
 #define RangeEnc_GetProcessed(p)       ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
 #define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
- 
-#define RC_BUF_SIZE (1 << 16) 
-
-static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) 
-{ 
-  if (!p->bufBase) 
-  { 
-    p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); 
-    if (!p->bufBase) 
-      return 0; 
-    p->bufLim = p->bufBase + RC_BUF_SIZE; 
-  } 
-  return 1; 
-} 
- 
-static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) 
-{ 
-  ISzAlloc_Free(alloc, p->bufBase); 
-  p->bufBase = 0; 
-} 
- 
-static void RangeEnc_Init(CRangeEnc *p) 
-{ 
-  /* Stream.Init(); */ 
-  p->range = 0xFFFFFFFF; 
-  p->cache = 0; 
+
+#define RC_BUF_SIZE (1 << 16)
+
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
+{
+  if (!p->bufBase)
+  {
+    p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
+    if (!p->bufBase)
+      return 0;
+    p->bufLim = p->bufBase + RC_BUF_SIZE;
+  }
+  return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->bufBase);
+  p->bufBase = 0;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+  /* Stream.Init(); */
+  p->range = 0xFFFFFFFF;
+  p->cache = 0;
   p->low = 0;
   p->cacheSize = 0;
- 
-  p->buf = p->bufBase; 
- 
-  p->processed = 0; 
-  p->res = SZ_OK; 
-} 
- 
+
+  p->buf = p->bufBase;
+
+  p->processed = 0;
+  p->res = SZ_OK;
+}
+
 MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
-{ 
-  size_t num; 
-  if (p->res != SZ_OK) 
-    return; 
-  num = p->buf - p->bufBase; 
-  if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) 
-    p->res = SZ_ERROR_WRITE; 
-  p->processed += num; 
-  p->buf = p->bufBase; 
-} 
- 
+{
+  size_t num;
+  if (p->res != SZ_OK)
+    return;
+  num = p->buf - p->bufBase;
+  if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+    p->res = SZ_ERROR_WRITE;
+  p->processed += num;
+  p->buf = p->bufBase;
+}
+
 MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
-{ 
+{
   UInt32 low = (UInt32)p->low;
   unsigned high = (unsigned)(p->low >> 32);
   p->low = (UInt32)(low << 8);
   if (low < (UInt32)0xFF000000 || high != 0)
-  { 
-    { 
-      Byte *buf = p->buf; 
+  {
+    {
+      Byte *buf = p->buf;
       *buf++ = (Byte)(p->cache + high);
       p->cache = (unsigned)(low >> 24);
-      p->buf = buf; 
-      if (buf == p->bufLim) 
-        RangeEnc_FlushStream(p); 
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
       if (p->cacheSize == 0)
         return;
-    } 
+    }
     high += 0xFF;
     for (;;)
     {
@@ -613,17 +613,17 @@ MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
       if (--p->cacheSize == 0)
         return;
     }
-  } 
-  p->cacheSize++; 
-} 
- 
-static void RangeEnc_FlushData(CRangeEnc *p) 
-{ 
-  int i; 
-  for (i = 0; i < 5; i++) 
-    RangeEnc_ShiftLow(p); 
-} 
- 
+  }
+  p->cacheSize++;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+  int i;
+  for (i = 0; i < 5; i++)
+    RangeEnc_ShiftLow(p);
+}
+
 #define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
 
 #define RC_BIT_PRE(p, prob) \
@@ -640,8 +640,8 @@ static void RangeEnc_FlushData(CRangeEnc *p)
   else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
   *(prob) = (CLzmaProb)ttt; \
   RC_NORM(p) \
-  } 
- 
+  }
+
 #else
 
 #define RC_BIT(p, prob, bit) { \
@@ -681,177 +681,177 @@ static void RangeEnc_FlushData(CRangeEnc *p)
   RC_NORM(p)
 
 static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
-{ 
+{
   UInt32 range, ttt, newBound;
   range = p->range;
   RC_BIT_PRE(p, prob)
   RC_BIT_0(p, prob)
   p->range = range;
-} 
- 
+}
+
 static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
-{ 
+{
   UInt32 range = p->range;
   sym |= 0x100;
-  do 
-  { 
+  do
+  {
     UInt32 ttt, newBound;
     // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
     CLzmaProb *prob = probs + (sym >> 8);
     UInt32 bit = (sym >> 7) & 1;
     sym <<= 1;
     RC_BIT(p, prob, bit);
-  } 
+  }
   while (sym < 0x10000);
   p->range = range;
-} 
- 
+}
+
 static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)
-{ 
+{
   UInt32 range = p->range;
-  UInt32 offs = 0x100; 
+  UInt32 offs = 0x100;
   sym |= 0x100;
-  do 
-  { 
+  do
+  {
     UInt32 ttt, newBound;
     CLzmaProb *prob;
     UInt32 bit;
-    matchByte <<= 1; 
+    matchByte <<= 1;
     // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);
     prob = probs + (offs + (matchByte & offs) + (sym >> 8));
     bit = (sym >> 7) & 1;
     sym <<= 1;
     offs &= ~(matchByte ^ sym);
     RC_BIT(p, prob, bit);
-  } 
+  }
   while (sym < 0x10000);
   p->range = range;
-} 
- 
+}
+
 
 
 static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
-{ 
-  UInt32 i; 
+{
+  UInt32 i;
   for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
-  { 
+  {
     const unsigned kCyclesBits = kNumBitPriceShiftBits;
     UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
     unsigned bitCount = 0;
     unsigned j;
-    for (j = 0; j < kCyclesBits; j++) 
-    { 
-      w = w * w; 
-      bitCount <<= 1; 
-      while (w >= ((UInt32)1 << 16)) 
-      { 
-        w >>= 1; 
-        bitCount++; 
-      } 
-    } 
+    for (j = 0; j < kCyclesBits; j++)
+    {
+      w = w * w;
+      bitCount <<= 1;
+      while (w >= ((UInt32)1 << 16))
+      {
+        w >>= 1;
+        bitCount++;
+      }
+    }
     ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
     // printf("\n%3d: %5d", i, ProbPrices[i]);
-  } 
-} 
- 
- 
+  }
+}
+
+
 #define GET_PRICE(prob, bit) \
   p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
- 
+
 #define GET_PRICEa(prob, bit) \
      ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
- 
-#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] 
-#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] 
- 
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
 #define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
 #define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
- 
+
 
 static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
-{ 
-  UInt32 price = 0; 
+{
+  UInt32 price = 0;
   sym |= 0x100;
-  do 
-  { 
+  do
+  {
     unsigned bit = sym & 1;
     sym >>= 1;
     price += GET_PRICEa(probs[sym], bit);
-  } 
+  }
   while (sym >= 2);
-  return price; 
-} 
- 
+  return price;
+}
+
 
 static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
-{ 
-  UInt32 price = 0; 
-  UInt32 offs = 0x100; 
+{
+  UInt32 price = 0;
+  UInt32 offs = 0x100;
   sym |= 0x100;
-  do 
-  { 
-    matchByte <<= 1; 
+  do
+  {
+    matchByte <<= 1;
     price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
     sym <<= 1;
     offs &= ~(matchByte ^ sym);
-  } 
+  }
   while (sym < 0x10000);
-  return price; 
-} 
- 
- 
+  return price;
+}
+
+
 static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)
-{ 
+{
   UInt32 range = rc->range;
   unsigned m = 1;
   do
-  { 
+  {
     UInt32 ttt, newBound;
     unsigned bit = sym & 1;
     // RangeEnc_EncodeBit(rc, probs + m, bit);
     sym >>= 1;
     RC_BIT(rc, probs + m, bit);
-    m = (m << 1) | bit; 
-  } 
+    m = (m << 1) | bit;
+  }
   while (--numBits);
   rc->range = range;
-} 
- 
- 
- 
-static void LenEnc_Init(CLenEnc *p) 
-{ 
-  unsigned i; 
+}
+
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+  unsigned i;
   for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
-    p->low[i] = kProbInitValue; 
-  for (i = 0; i < kLenNumHighSymbols; i++) 
-    p->high[i] = kProbInitValue; 
-} 
- 
+    p->low[i] = kProbInitValue;
+  for (i = 0; i < kLenNumHighSymbols; i++)
+    p->high[i] = kProbInitValue;
+}
+
 static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
-{ 
+{
   UInt32 range, ttt, newBound;
   CLzmaProb *probs = p->low;
   range = rc->range;
   RC_BIT_PRE(rc, probs);
   if (sym >= kLenNumLowSymbols)
-  { 
+  {
     RC_BIT_1(rc, probs);
     probs += kLenNumLowSymbols;
     RC_BIT_PRE(rc, probs);
     if (sym >= kLenNumLowSymbols * 2)
-    { 
+    {
       RC_BIT_1(rc, probs);
       rc->range = range;
       // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
       LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
       return;
-    } 
+    }
     sym -= kLenNumLowSymbols;
-  } 
- 
+  }
+
   // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
-  { 
+  {
     unsigned m;
     unsigned bit;
     RC_BIT_0(rc, probs);
@@ -860,11 +860,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
     bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;
     bit =  sym       & 1; RC_BIT(rc, probs + m, bit);
     rc->range = range;
-  } 
-} 
- 
+  }
+}
+
 static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
-{ 
+{
   unsigned i;
   for (i = 0; i < 8; i += 2)
   {
@@ -876,15 +876,15 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price
     prices[i    ] = price + GET_PRICEa_0(prob);
     prices[i + 1] = price + GET_PRICEa_1(prob);
   }
-} 
- 
- 
+}
+
+
 MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
     CLenPriceEnc *p,
     unsigned numPosStates,
     const CLenEnc *enc,
     const CProbPrice *ProbPrices)
-{ 
+{
   UInt32 b;
  
   {
@@ -902,7 +902,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
       SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
     }
   }
- 
+
   /*
   {
     unsigned i;
@@ -920,7 +920,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
  
   // p->counter = numSymbols;
   // p->counter = 64;
- 
+
   {
     unsigned i = p->tableSize;
     
@@ -948,7 +948,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
           price += GET_PRICEa(probs[sym], bit);
         }
         while (sym >= 2);
- 
+
         {
           unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
           prices[(size_t)i * 2    ] = price + GET_PRICEa_0(prob);
@@ -956,7 +956,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
         }
       }
       while (i);
- 
+
       {
         unsigned posState;
         size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
@@ -968,66 +968,66 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
 }
 
 /*
-  #ifdef SHOW_STAT 
-  g_STAT_OFFSET += num; 
-  printf("\n MovePos %u", num); 
-  #endif 
+  #ifdef SHOW_STAT
+  g_STAT_OFFSET += num;
+  printf("\n MovePos %u", num);
+  #endif
 */
-   
+  
 #define MOVE_POS(p, num) { \
     p->additionalOffset += (num); \
     p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
- 
+
 
 static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
-{ 
+{
   unsigned numPairs;
   
   p->additionalOffset++;
-  p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); 
-  numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); 
+  p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+  numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
   *numPairsRes = numPairs;
-   
-  #ifdef SHOW_STAT 
-  printf("\n i = %u numPairs = %u    ", g_STAT_OFFSET, numPairs / 2); 
-  g_STAT_OFFSET++; 
-  { 
+  
+  #ifdef SHOW_STAT
+  printf("\n i = %u numPairs = %u    ", g_STAT_OFFSET, numPairs / 2);
+  g_STAT_OFFSET++;
+  {
     unsigned i;
-    for (i = 0; i < numPairs; i += 2) 
-      printf("%2u %6u   | ", p->matches[i], p->matches[i + 1]); 
-  } 
-  #endif 
-   
+    for (i = 0; i < numPairs; i += 2)
+      printf("%2u %6u   | ", p->matches[i], p->matches[i + 1]);
+  }
+  #endif
+  
   if (numPairs == 0)
     return 0;
-  { 
+  {
     unsigned len = p->matches[(size_t)numPairs - 2];
     if (len != p->numFastBytes)
       return len;
-    { 
-      UInt32 numAvail = p->numAvail; 
-      if (numAvail > LZMA_MATCH_LEN_MAX) 
-        numAvail = LZMA_MATCH_LEN_MAX; 
-      { 
+    {
+      UInt32 numAvail = p->numAvail;
+      if (numAvail > LZMA_MATCH_LEN_MAX)
+        numAvail = LZMA_MATCH_LEN_MAX;
+      {
         const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
         const Byte *p2 = p1 + len;
-        ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; 
+        ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
         const Byte *lim = p1 + numAvail;
         for (; p2 != lim && *p2 == p2[dif]; p2++)
         {}
         return (unsigned)(p2 - p1);
-      } 
-    } 
-  } 
-} 
- 
+      }
+    }
+  }
+}
+
 #define MARK_LIT ((UInt32)(Int32)-1)
- 
+
 #define MakeAs_Lit(p)       { (p)->dist = MARK_LIT; (p)->extra = 0; }
 #define MakeAs_ShortRep(p)  { (p)->dist = 0; (p)->extra = 0; }
 #define IsShortRep(p)       ((p)->dist == 0)
- 
- 
+
+
 #define GetPrice_ShortRep(p, state, posState) \
   ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
 
@@ -1039,53 +1039,53 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
   
 MY_FORCE_INLINE
 static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
-{ 
-  UInt32 price; 
+{
+  UInt32 price;
   UInt32 prob = p->isRepG0[state];
-  if (repIndex == 0) 
-  { 
+  if (repIndex == 0)
+  {
     price = GET_PRICE_0(prob);
-    price += GET_PRICE_1(p->isRep0Long[state][posState]); 
-  } 
-  else 
-  { 
+    price += GET_PRICE_1(p->isRep0Long[state][posState]);
+  }
+  else
+  {
     price = GET_PRICE_1(prob);
     prob = p->isRepG1[state];
-    if (repIndex == 1) 
+    if (repIndex == 1)
       price += GET_PRICE_0(prob);
-    else 
-    { 
+    else
+    {
       price += GET_PRICE_1(prob);
-      price += GET_PRICE(p->isRepG2[state], repIndex - 2); 
-    } 
-  } 
-  return price; 
-} 
- 
+      price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+    }
+  }
+  return price;
+}
+
 
 static unsigned Backward(CLzmaEnc *p, unsigned cur)
-{ 
+{
   unsigned wr = cur + 1;
   p->optEnd = wr;
- 
+
   for (;;)
-  { 
+  {
     UInt32 dist = p->opt[cur].dist;
     unsigned len = (unsigned)p->opt[cur].len;
     unsigned extra = (unsigned)p->opt[cur].extra;
     cur -= len;
 
     if (extra)
-    { 
+    {
       wr--;
       p->opt[wr].len = (UInt32)len;
       cur -= extra;
       len = extra;
       if (extra == 1)
-      { 
+      {
         p->opt[wr].dist = dist;
         dist = MARK_LIT;
-      } 
+      }
       else
       {
         p->opt[wr].dist = 0;
@@ -1094,35 +1094,35 @@ static unsigned Backward(CLzmaEnc *p, unsigned cur)
         p->opt[wr].dist = MARK_LIT;
         p->opt[wr].len = 1;
       }
-    } 
+    }
 
     if (cur == 0)
-    { 
+    {
       p->backRes = dist;
       p->optCur = wr;
       return len;
-    } 
+    }
     
     wr--;
     p->opt[wr].dist = dist;
     p->opt[wr].len = (UInt32)len;
-  } 
-} 
- 
- 
- 
+  }
+}
+
+
+
 #define LIT_PROBS(pos, prevByte) \
   (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))
- 
- 
+
+
 static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
 {
   unsigned last, cur;
   UInt32 reps[LZMA_NUM_REPS];
   unsigned repLens[LZMA_NUM_REPS];
   UInt32 *matches;
- 
-  { 
+
+  {
     UInt32 numAvail;
     unsigned numPairs, mainLen, repMaxIndex, i, posState;
     UInt32 matchPrice, repMatchPrice;
@@ -1134,17 +1134,17 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
     if (p->additionalOffset == 0)
       mainLen = ReadMatchDistances(p, &numPairs);
     else
-    { 
+    {
       mainLen = p->longestMatchLen;
       numPairs = p->numPairs;
-    } 
+    }
     
     numAvail = p->numAvail;
     if (numAvail < 2)
-    { 
+    {
       p->backRes = MARK_LIT;
       return 1;
-    } 
+    }
     if (numAvail > LZMA_MATCH_LEN_MAX)
       numAvail = LZMA_MATCH_LEN_MAX;
     
@@ -1152,22 +1152,22 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
     repMaxIndex = 0;
     
     for (i = 0; i < LZMA_NUM_REPS; i++)
-    { 
+    {
       unsigned len;
       const Byte *data2;
       reps[i] = p->reps[i];
       data2 = data - reps[i];
       if (data[0] != data2[0] || data[1] != data2[1])
-      { 
+      {
         repLens[i] = 0;
         continue;
-      } 
+      }
       for (len = 2; len < numAvail && data[len] == data2[len]; len++)
       {}
       repLens[i] = len;
       if (len > repLens[repMaxIndex])
         repMaxIndex = i;
-    } 
+    }
     
     if (repLens[repMaxIndex] >= p->numFastBytes)
     {
@@ -1189,7 +1189,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
     
     curByte = *data;
     matchByte = *(data - reps[0]);
- 
+
     last = repLens[repMaxIndex];
     if (last <= mainLen)
       last = mainLen;
@@ -1211,7 +1211,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
           LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
           LitEnc_GetPrice(probs, curByte, p->ProbPrices));
     }
- 
+
     MakeAs_Lit(&p->opt[1]);
     
     matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
@@ -1219,18 +1219,18 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
     
     // 18.06
     if (matchByte == curByte && repLens[0] == 0)
-    { 
+    {
       UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);
       if (shortRepPrice < p->opt[1].price)
-      { 
+      {
         p->opt[1].price = shortRepPrice;
         MakeAs_ShortRep(&p->opt[1]);
-      } 
+      }
       if (last < 2)
-      { 
+      {
         p->backRes = p->opt[1].dist;
         return 1;
-      } 
+      }
     }
    
     p->opt[1].len = 1;
@@ -1250,7 +1250,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         continue;
       price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);
       do
-      { 
+      {
         UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);
         COptimal *opt = &p->opt[repLen];
         if (price2 < opt->price)
@@ -1260,9 +1260,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
           opt->dist = (UInt32)i;
           opt->extra = 0;
         }
-      } 
+      }
       while (--repLen >= 2);
-    } 
+    }
     
     
     // ---------- MATCH ----------
@@ -1272,7 +1272,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       {
         unsigned offs = 0;
         UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
- 
+
         if (len < 2)
           len = 2;
         else
@@ -1316,35 +1316,35 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       }
     }
     
- 
+
     cur = 0;
 
-    #ifdef SHOW_STAT2 
-    /* if (position >= 0) */ 
-    { 
-      unsigned i; 
-      printf("\n pos = %4X", position); 
+    #ifdef SHOW_STAT2
+    /* if (position >= 0) */
+    {
+      unsigned i;
+      printf("\n pos = %4X", position);
       for (i = cur; i <= last; i++)
-      printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); 
-    } 
-    #endif 
-  } 
- 
+      printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);
+    }
+    #endif
+  }
+
 
   
   // ---------- Optimal Parsing ----------
 
-  for (;;) 
-  { 
+  for (;;)
+  {
     unsigned numAvail;
     UInt32 numAvailFull;
     unsigned newLen, numPairs, prev, state, posState, startLen;
     UInt32 litPrice, matchPrice, repMatchPrice;
     BoolInt nextIsLit;
-    Byte curByte, matchByte; 
-    const Byte *data; 
+    Byte curByte, matchByte;
+    const Byte *data;
     COptimal *curOpt, *nextOpt;
- 
+
     if (++cur == last)
       break;
     
@@ -1373,19 +1373,19 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       cur = best;
       break;
     }
- 
-    newLen = ReadMatchDistances(p, &numPairs); 
+
+    newLen = ReadMatchDistances(p, &numPairs);
     
-    if (newLen >= p->numFastBytes) 
-    { 
-      p->numPairs = numPairs; 
+    if (newLen >= p->numFastBytes)
+    {
+      p->numPairs = numPairs;
       p->longestMatchLen = newLen;
       break;
-    } 
+    }
     
     curOpt = &p->opt[cur];
 
-    position++; 
+    position++;
 
     // we need that check here, if skip_items in p->opt are possible
     /*
@@ -1396,40 +1396,40 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
     prev = cur - curOpt->len;
 
     if (curOpt->len == 1)
-    { 
+    {
       state = (unsigned)p->opt[prev].state;
-      if (IsShortRep(curOpt)) 
-        state = kShortRepNextStates[state]; 
-      else 
-        state = kLiteralNextStates[state]; 
-    } 
-    else 
-    { 
-      const COptimal *prevOpt; 
+      if (IsShortRep(curOpt))
+        state = kShortRepNextStates[state];
+      else
+        state = kLiteralNextStates[state];
+    }
+    else
+    {
+      const COptimal *prevOpt;
       UInt32 b0;
       UInt32 dist = curOpt->dist;
 
       if (curOpt->extra)
-      { 
+      {
         prev -= (unsigned)curOpt->extra;
         state = kState_RepAfterLit;
         if (curOpt->extra == 1)
           state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);
-      } 
-      else 
-      { 
+      }
+      else
+      {
         state = (unsigned)p->opt[prev].state;
         if (dist < LZMA_NUM_REPS)
-          state = kRepNextStates[state]; 
-        else 
-          state = kMatchNextStates[state]; 
-      } 
+          state = kRepNextStates[state];
+        else
+          state = kMatchNextStates[state];
+      }
 
       prevOpt = &p->opt[prev];
       b0 = prevOpt->reps[0];
 
       if (dist < LZMA_NUM_REPS)
-      { 
+      {
         if (dist == 0)
         {
           reps[0] = b0;
@@ -1454,28 +1454,28 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
             reps[3] = prevOpt->reps[dist ^ 1];
           }
         }
-      } 
-      else 
-      { 
+      }
+      else
+      {
         reps[0] = (dist - LZMA_NUM_REPS + 1);
         reps[1] = b0;
         reps[2] = prevOpt->reps[1];
         reps[3] = prevOpt->reps[2];
-      } 
-    } 
+      }
+    }
     
-    curOpt->state = (CState)state; 
+    curOpt->state = (CState)state;
     curOpt->reps[0] = reps[0];
     curOpt->reps[1] = reps[1];
     curOpt->reps[2] = reps[2];
     curOpt->reps[3] = reps[3];
- 
-    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; 
-    curByte = *data; 
+
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    curByte = *data;
     matchByte = *(data - reps[0]);
- 
-    posState = (position & p->pbMask); 
- 
+
+    posState = (position & p->pbMask);
+
     /*
     The order of Price checks:
        <  LIT
@@ -1485,16 +1485,16 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
        <  MATCH  [ : LIT : REP_0 ]
     */
 
-    { 
+    {
       UInt32 curPrice = curOpt->price;
       unsigned prob = p->isMatch[state][posState];
       matchPrice = curPrice + GET_PRICE_1(prob);
       litPrice = curPrice + GET_PRICE_0(prob);
-    } 
- 
-    nextOpt = &p->opt[(size_t)cur + 1]; 
+    }
+
+    nextOpt = &p->opt[(size_t)cur + 1];
     nextIsLit = False;
- 
+
     // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)
     // 18.new.06
     if ((nextOpt->price < kInfinityPrice
@@ -1504,7 +1504,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         )
       litPrice = 0;
     else
-    { 
+    {
       const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
       litPrice += (!IsLitState(state) ?
           LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
@@ -1517,12 +1517,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         MakeAs_Lit(nextOpt);
         nextIsLit = True;
       }
-    } 
- 
-    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); 
-     
+    }
+
+    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
+    
     numAvailFull = p->numAvail;
-    { 
+    {
       unsigned temp = kNumOpts - 1 - cur;
       if (numAvailFull > temp)
         numAvailFull = (UInt32)temp;
@@ -1545,18 +1545,18 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);
       // if (shortRepPrice <= nextOpt->price) // 17.old
       if (shortRepPrice < nextOpt->price)  // 18.new
-      { 
-        nextOpt->price = shortRepPrice; 
+      {
+        nextOpt->price = shortRepPrice;
         nextOpt->len = 1;
         MakeAs_ShortRep(nextOpt);
         nextIsLit = False;
-      } 
-    } 
+      }
+    }
     
-    if (numAvailFull < 2) 
-      continue; 
-    numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); 
- 
+    if (numAvailFull < 2)
+      continue;
+    numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
+
     // numAvail <= p->numFastBytes
 
     // ---------- LIT : REP_0 ----------
@@ -1565,10 +1565,10 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         && litPrice != 0 // 18.new
         && matchByte != curByte
         && numAvailFull > 2)
-    { 
+    {
       const Byte *data2 = data - reps[0];
       if (data[1] == data2[1] && data[2] == data2[2])
-      { 
+      {
         unsigned len;
         unsigned limit = p->numFastBytes + 1;
         if (limit > numAvailFull)
@@ -1576,11 +1576,11 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         for (len = 3; len < limit && data[len] == data2[len]; len++)
         {}
         
-        { 
+        {
           unsigned state2 = kLiteralNextStates[state];
           unsigned posState2 = (position + 1) & p->pbMask;
           UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);
-          { 
+          {
             unsigned offset = cur + len;
 
             if (last < offset)
@@ -1605,19 +1605,19 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
               }
             }
             // while (len >= 3);
-          } 
-        } 
-      } 
-    } 
-     
-    startLen = 2; /* speed optimization */ 
-
-    { 
+          }
+        }
+      }
+    }
+    
+    startLen = 2; /* speed optimization */
+
+    {
       // ---------- REP ----------
       unsigned repIndex = 0; // 17.old
       // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused
       for (; repIndex < LZMA_NUM_REPS; repIndex++)
-      { 
+      {
         unsigned len;
         UInt32 price;
         const Byte *data2 = data - reps[repIndex];
@@ -1629,11 +1629,11 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         
         // if (len < startLen) continue; // 18.new: speed optimization
 
-        { 
+        {
           unsigned offset = cur + len;
           if (last < offset)
             last = offset;
-        } 
+        }
         {
           unsigned len2 = len;
           price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);
@@ -1651,32 +1651,32 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
           }
           while (--len2 >= 2);
         }
-         
+        
         if (repIndex == 0) startLen = len + 1;  // 17.old
         // startLen = len + 1; // 18.new
 
         /* if (_maxMode) */
-        { 
+        {
           // ---------- REP : LIT : REP_0 ----------
           // numFastBytes + 1 + numFastBytes
 
           unsigned len2 = len + 1;
           unsigned limit = len2 + p->numFastBytes;
-          if (limit > numAvailFull) 
-            limit = numAvailFull; 
+          if (limit > numAvailFull)
+            limit = numAvailFull;
           
           len2 += 2;
           if (len2 <= limit)
           if (data[len2 - 2] == data2[len2 - 2])
           if (data[len2 - 1] == data2[len2 - 1])
-          { 
+          {
             unsigned state2 = kRepNextStates[state];
             unsigned posState2 = (position + len) & p->pbMask;
             price += GET_PRICE_LEN(&p->repLenEnc, posState, len)
                 + GET_PRICE_0(p->isMatch[state2][posState2])
                 + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
                     data[len], data2[len], p->ProbPrices);
-             
+            
             // state2 = kLiteralNextStates[state2];
             state2 = kState_LitAfterRep;
             posState2 = (posState2 + 1) & p->pbMask;
@@ -1690,13 +1690,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
           len2 -= len;
           // if (len2 >= 3)
           {
-            { 
+            {
               unsigned offset = cur + len + len2;
 
               if (last < offset)
                 last = offset;
               // do
-              { 
+              {
                 UInt32 price2;
                 COptimal *opt;
                 len2--;
@@ -1712,31 +1712,31 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
                   opt->extra = (CExtra)(len + 1);
                   opt->dist = (UInt32)repIndex;
                 }
-              } 
+              }
               // while (len2 >= 3);
-            } 
-          } 
+            }
+          }
           }
-        } 
+        }
       }
-    } 
+    }
 
 
     // ---------- MATCH ----------
     /* for (unsigned len = 2; len <= newLen; len++) */
-    if (newLen > numAvail) 
-    { 
-      newLen = numAvail; 
-      for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); 
+    if (newLen > numAvail)
+    {
+      newLen = numAvail;
+      for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
       matches[numPairs] = (UInt32)newLen;
-      numPairs += 2; 
-    } 
+      numPairs += 2;
+    }
     
     // startLen = 2; /* speed optimization */
 
-    if (newLen >= startLen) 
-    { 
-      UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); 
+    if (newLen >= startLen)
+    {
+      UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
       UInt32 dist;
       unsigned offs, posSlot, len;
       
@@ -1745,19 +1745,19 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         if (last < offset)
           last = offset;
       }
- 
-      offs = 0; 
-      while (startLen > matches[offs]) 
-        offs += 2; 
+
+      offs = 0;
+      while (startLen > matches[offs])
+        offs += 2;
       dist = matches[(size_t)offs + 1];
       
       // if (dist >= kNumFullDistances)
       GetPosSlot2(dist, posSlot);
       
       for (len = /*2*/ startLen; ; len++)
-      { 
+      {
         UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
-        { 
+        {
           COptimal *opt;
           unsigned lenNorm = len - 2;
           lenNorm = GetLenToPosState2(lenNorm);
@@ -1774,24 +1774,24 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
             opt->dist = dist + LZMA_NUM_REPS;
             opt->extra = 0;
           }
-        } 
- 
+        }
+
         if (len == matches[offs])
-        { 
+        {
           // if (p->_maxMode) {
           // MATCH : LIT : REP_0
 
           const Byte *data2 = data - dist - 1;
           unsigned len2 = len + 1;
           unsigned limit = len2 + p->numFastBytes;
-          if (limit > numAvailFull) 
-            limit = numAvailFull; 
+          if (limit > numAvailFull)
+            limit = numAvailFull;
           
           len2 += 2;
           if (len2 <= limit)
           if (data[len2 - 2] == data2[len2 - 2])
           if (data[len2 - 1] == data2[len2 - 1])
-          { 
+          {
           for (; len2 < limit && data[len2] == data2[len2]; len2++)
           {}
           
@@ -1817,138 +1817,138 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
             if (last < offset)
               last = offset;
             // do
-            { 
+            {
               UInt32 price2;
-              COptimal *opt; 
+              COptimal *opt;
               len2--;
               // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
               price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
-              opt = &p->opt[offset]; 
+              opt = &p->opt[offset];
               // offset--;
               if (price2 < opt->price)
-              { 
+              {
                 opt->price = price2;
                 opt->len = (UInt32)len2;
                 opt->extra = (CExtra)(len + 1);
                 opt->dist = dist + LZMA_NUM_REPS;
-              } 
-            } 
+              }
+            }
             // while (len2 >= 3);
-          } 
+          }
 
           }
         
-          offs += 2; 
-          if (offs == numPairs) 
-            break; 
+          offs += 2;
+          if (offs == numPairs)
+            break;
           dist = matches[(size_t)offs + 1];
           // if (dist >= kNumFullDistances)
             GetPosSlot2(dist, posSlot);
-        } 
-      } 
-    } 
-  } 
+        }
+      }
+    }
+  }
 
   do
     p->opt[last].price = kInfinityPrice;
   while (--last);
 
   return Backward(p, cur);
-} 
- 
+}
 
 
-#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) 
- 
+
+#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
+
 
 
 static unsigned GetOptimumFast(CLzmaEnc *p)
-{ 
+{
   UInt32 numAvail, mainDist;
   unsigned mainLen, numPairs, repIndex, repLen, i;
-  const Byte *data; 
- 
-  if (p->additionalOffset == 0) 
-    mainLen = ReadMatchDistances(p, &numPairs); 
-  else 
-  { 
+  const Byte *data;
+
+  if (p->additionalOffset == 0)
+    mainLen = ReadMatchDistances(p, &numPairs);
+  else
+  {
     mainLen = p->longestMatchLen;
-    numPairs = p->numPairs; 
-  } 
- 
-  numAvail = p->numAvail; 
+    numPairs = p->numPairs;
+  }
+
+  numAvail = p->numAvail;
   p->backRes = MARK_LIT;
-  if (numAvail < 2) 
-    return 1; 
+  if (numAvail < 2)
+    return 1;
   // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused
-  if (numAvail > LZMA_MATCH_LEN_MAX) 
-    numAvail = LZMA_MATCH_LEN_MAX; 
-  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; 
-  repLen = repIndex = 0; 
+  if (numAvail > LZMA_MATCH_LEN_MAX)
+    numAvail = LZMA_MATCH_LEN_MAX;
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  repLen = repIndex = 0;
   
-  for (i = 0; i < LZMA_NUM_REPS; i++) 
-  { 
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
     unsigned len;
     const Byte *data2 = data - p->reps[i];
-    if (data[0] != data2[0] || data[1] != data2[1]) 
-      continue; 
+    if (data[0] != data2[0] || data[1] != data2[1])
+      continue;
     for (len = 2; len < numAvail && data[len] == data2[len]; len++)
     {}
-    if (len >= p->numFastBytes) 
-    { 
+    if (len >= p->numFastBytes)
+    {
       p->backRes = (UInt32)i;
       MOVE_POS(p, len - 1)
-      return len; 
-    } 
-    if (len > repLen) 
-    { 
-      repIndex = i; 
-      repLen = len; 
-    } 
-  } 
- 
-  if (mainLen >= p->numFastBytes) 
-  { 
+      return len;
+    }
+    if (len > repLen)
+    {
+      repIndex = i;
+      repLen = len;
+    }
+  }
+
+  if (mainLen >= p->numFastBytes)
+  {
     p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
     MOVE_POS(p, mainLen - 1)
-    return mainLen; 
-  } 
- 
-  mainDist = 0; /* for GCC */ 
+    return mainLen;
+  }
+
+  mainDist = 0; /* for GCC */
   
-  if (mainLen >= 2) 
-  { 
+  if (mainLen >= 2)
+  {
     mainDist = p->matches[(size_t)numPairs - 1];
     while (numPairs > 2)
-    { 
+    {
       UInt32 dist2;
       if (mainLen != p->matches[(size_t)numPairs - 4] + 1)
-        break; 
+        break;
       dist2 = p->matches[(size_t)numPairs - 3];
       if (!ChangePair(dist2, mainDist))
         break;
-      numPairs -= 2; 
+      numPairs -= 2;
       mainLen--;
       mainDist = dist2;
-    } 
-    if (mainLen == 2 && mainDist >= 0x80) 
-      mainLen = 1; 
-  } 
- 
+    }
+    if (mainLen == 2 && mainDist >= 0x80)
+      mainLen = 1;
+  }
+
   if (repLen >= 2)
     if (    repLen + 1 >= mainLen
         || (repLen + 2 >= mainLen && mainDist >= (1 << 9))
         || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))
-  { 
+  {
     p->backRes = (UInt32)repIndex;
     MOVE_POS(p, repLen - 1)
-    return repLen; 
-  } 
-   
-  if (mainLen < 2 || numAvail <= 2) 
-    return 1; 
- 
-  { 
+    return repLen;
+  }
+  
+  if (mainLen < 2 || numAvail <= 2)
+    return 1;
+
+  {
     unsigned len1 = ReadMatchDistances(p, &p->numPairs);
     p->longestMatchLen = len1;
   
@@ -1961,17 +1961,17 @@ static unsigned GetOptimumFast(CLzmaEnc *p)
           || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))
         return 1;
     }
-  } 
-   
-  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; 
+  }
+  
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
   
-  for (i = 0; i < LZMA_NUM_REPS; i++) 
-  { 
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
     unsigned len, limit;
     const Byte *data2 = data - p->reps[i];
-    if (data[0] != data2[0] || data[1] != data2[1]) 
-      continue; 
-    limit = mainLen - 1; 
+    if (data[0] != data2[0] || data[1] != data2[1])
+      continue;
+    limit = mainLen - 1;
     for (len = 2;; len++)
     {
       if (len >= limit)
@@ -1979,21 +1979,21 @@ static unsigned GetOptimumFast(CLzmaEnc *p)
       if (data[len] != data2[len])
         break;
     }
-  } 
+  }
   
   p->backRes = mainDist + LZMA_NUM_REPS;
   if (mainLen != 2)
   {
     MOVE_POS(p, mainLen - 2)
   }
-  return mainLen; 
-} 
- 
+  return mainLen;
+}
+
 
 
 
 static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
-{ 
+{
   UInt32 range;
   range = p->rc.range;
   {
@@ -2005,7 +2005,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
     RC_BIT_PRE(&p->rc, prob)
     RC_BIT_0(&p->rc, prob)
   }
-  p->state = kMatchNextStates[p->state]; 
+  p->state = kMatchNextStates[p->state];
   
   p->rc.range = range;
   LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);
@@ -2050,37 +2050,37 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
     while (m < kAlignTableSize);
   }
   p->rc.range = range;
-} 
- 
+}
+
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+  if (p->result != SZ_OK)
+    return p->result;
+  if (p->rc.res != SZ_OK)
+    p->result = SZ_ERROR_WRITE;
+  if (p->matchFinderBase.result != SZ_OK)
+    p->result = SZ_ERROR_READ;
+  if (p->result != SZ_OK)
+    p->finished = True;
+  return p->result;
+}
 
-static SRes CheckErrors(CLzmaEnc *p) 
-{ 
-  if (p->result != SZ_OK) 
-    return p->result; 
-  if (p->rc.res != SZ_OK) 
-    p->result = SZ_ERROR_WRITE; 
-  if (p->matchFinderBase.result != SZ_OK) 
-    p->result = SZ_ERROR_READ; 
-  if (p->result != SZ_OK) 
-    p->finished = True; 
-  return p->result; 
-} 
- 
 
 MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
-{ 
-  /* ReleaseMFStream(); */ 
-  p->finished = True; 
-  if (p->writeEndMark) 
-    WriteEndMarker(p, nowPos & p->pbMask); 
-  RangeEnc_FlushData(&p->rc); 
-  RangeEnc_FlushStream(&p->rc); 
-  return CheckErrors(p); 
-} 
- 
+{
+  /* ReleaseMFStream(); */
+  p->finished = True;
+  if (p->writeEndMark)
+    WriteEndMarker(p, nowPos & p->pbMask);
+  RangeEnc_FlushData(&p->rc);
+  RangeEnc_FlushStream(&p->rc);
+  return CheckErrors(p);
+}
+
 
 MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
-{ 
+{
   unsigned i;
   const CProbPrice *ProbPrices = p->ProbPrices;
   const CLzmaProb *probs = p->posAlignEncoder;
@@ -2100,21 +2100,21 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
     p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
     // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
   }
-} 
- 
+}
+
 
 MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
-{ 
+{
   // int y; for (y = 0; y < 100; y++) {
 
-  UInt32 tempPrices[kNumFullDistances]; 
+  UInt32 tempPrices[kNumFullDistances];
   unsigned i, lps;
 
   const CProbPrice *ProbPrices = p->ProbPrices;
   p->matchPriceCount = 0;
 
   for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
-  { 
+  {
     unsigned posSlot = GetPosSlot1(i);
     unsigned footerBits = (posSlot >> 1) - 1;
     unsigned base = ((2 | (posSlot & 1)) << footerBits);
@@ -2141,10 +2141,10 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
       tempPrices[base         ] = price + GET_PRICEa_0(prob);
       tempPrices[base + offset] = price + GET_PRICEa_1(prob);
     }
-  } 
- 
+  }
+
   for (lps = 0; lps < kNumLenToPosStates; lps++)
-  { 
+  {
     unsigned slot;
     unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
     UInt32 *posSlotPrices = p->posSlotPrices[lps];
@@ -2176,8 +2176,8 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
         delta += ((UInt32)1 << kNumBitPriceShiftBits);
       }
     }
- 
-    { 
+
+    {
       UInt32 *dp = p->distancesPrices[lps];
       
       dp[0] = posSlotPrices[0];
@@ -2191,118 +2191,118 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
         dp[i    ] = slotPrice + tempPrices[i];
         dp[i + 1] = slotPrice + tempPrices[i + 1];
       }
-    } 
-  } 
+    }
+  }
   // }
-} 
- 
+}
 
 
-void LzmaEnc_Construct(CLzmaEnc *p) 
-{ 
-  RangeEnc_Construct(&p->rc); 
-  MatchFinder_Construct(&p->matchFinderBase); 
-   
-  #ifndef _7ZIP_ST 
-  MatchFinderMt_Construct(&p->matchFinderMt); 
-  p->matchFinderMt.MatchFinder = &p->matchFinderBase; 
-  #endif 
- 
-  { 
-    CLzmaEncProps props; 
-    LzmaEncProps_Init(&props); 
-    LzmaEnc_SetProps(p, &props); 
-  } 
- 
-  #ifndef LZMA_LOG_BSR 
-  LzmaEnc_FastPosInit(p->g_FastPos); 
-  #endif 
- 
-  LzmaEnc_InitPriceTables(p->ProbPrices); 
-  p->litProbs = NULL; 
-  p->saveState.litProbs = NULL; 
 
-} 
- 
-CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) 
-{ 
-  void *p; 
-  p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); 
-  if (p) 
-    LzmaEnc_Construct((CLzmaEnc *)p); 
-  return p; 
-} 
- 
-void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) 
-{ 
-  ISzAlloc_Free(alloc, p->litProbs); 
-  ISzAlloc_Free(alloc, p->saveState.litProbs); 
-  p->litProbs = NULL; 
-  p->saveState.litProbs = NULL; 
-} 
- 
-void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  #ifndef _7ZIP_ST 
-  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); 
-  #endif 
-   
-  MatchFinder_Free(&p->matchFinderBase, allocBig); 
-  LzmaEnc_FreeLits(p, alloc); 
-  RangeEnc_Free(&p->rc, alloc); 
-} 
- 
-void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); 
-  ISzAlloc_Free(alloc, p); 
-} 
- 
+void LzmaEnc_Construct(CLzmaEnc *p)
+{
+  RangeEnc_Construct(&p->rc);
+  MatchFinder_Construct(&p->matchFinderBase);
+  
+  #ifndef _7ZIP_ST
+  MatchFinderMt_Construct(&p->matchFinderMt);
+  p->matchFinderMt.MatchFinder = &p->matchFinderBase;
+  #endif
+
+  {
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    LzmaEnc_SetProps(p, &props);
+  }
+
+  #ifndef LZMA_LOG_BSR
+  LzmaEnc_FastPosInit(p->g_FastPos);
+  #endif
+
+  LzmaEnc_InitPriceTables(p->ProbPrices);
+  p->litProbs = NULL;
+  p->saveState.litProbs = NULL;
+
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
+{
+  void *p;
+  p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
+  if (p)
+    LzmaEnc_Construct((CLzmaEnc *)p);
+  return p;
+}
+
+void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->litProbs);
+  ISzAlloc_Free(alloc, p->saveState.litProbs);
+  p->litProbs = NULL;
+  p->saveState.litProbs = NULL;
+}
+
+void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  #ifndef _7ZIP_ST
+  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+  #endif
+  
+  MatchFinder_Free(&p->matchFinderBase, allocBig);
+  LzmaEnc_FreeLits(p, alloc);
+  RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
+  ISzAlloc_Free(alloc, p);
+}
+
 
 SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpackSize)
-{ 
+{
   CLzmaEnc *p = (CLzmaEnc *) pp;
-  UInt32 nowPos32, startPos32; 
-  if (p->needInit) 
-  { 
-    p->matchFinder.Init(p->matchFinderObj); 
-    p->needInit = 0; 
-  } 
- 
-  if (p->finished) 
-    return p->result; 
-  RINOK(CheckErrors(p)); 
- 
-  nowPos32 = (UInt32)p->nowPos64; 
-  startPos32 = nowPos32; 
- 
-  if (p->nowPos64 == 0) 
-  { 
+  UInt32 nowPos32, startPos32;
+  if (p->needInit)
+  {
+    p->matchFinder.Init(p->matchFinderObj);
+    p->needInit = 0;
+  }
+
+  if (p->finished)
+    return p->result;
+  RINOK(CheckErrors(p));
+
+  nowPos32 = (UInt32)p->nowPos64;
+  startPos32 = nowPos32;
+
+  if (p->nowPos64 == 0)
+  {
     unsigned numPairs;
-    Byte curByte; 
-    if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) 
-      return Flush(p, nowPos32); 
-    ReadMatchDistances(p, &numPairs); 
+    Byte curByte;
+    if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+      return Flush(p, nowPos32);
+    ReadMatchDistances(p, &numPairs);
     RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);
     // p->state = kLiteralNextStates[p->state];
-    curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); 
-    LitEnc_Encode(&p->rc, p->litProbs, curByte); 
-    p->additionalOffset--; 
-    nowPos32++; 
-  } 
- 
-  if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) 
+    curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);
+    LitEnc_Encode(&p->rc, p->litProbs, curByte);
+    p->additionalOffset--;
+    nowPos32++;
+  }
+
+  if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
   
-  for (;;) 
-  { 
+  for (;;)
+  {
     UInt32 dist;
     unsigned len, posState;
     UInt32 range, ttt, newBound;
     CLzmaProb *probs;
   
-    if (p->fastMode) 
+    if (p->fastMode)
       len = GetOptimumFast(p);
-    else 
+    else
     {
       unsigned oci = p->optCur;
       if (p->optEnd == oci)
@@ -2315,7 +2315,7 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
         p->optCur = oci + 1;
       }
     }
- 
+
     posState = (unsigned)nowPos32 & p->pbMask;
     range = p->rc.range;
     probs = &p->isMatch[p->state][posState];
@@ -2324,41 +2324,41 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
     
     dist = p->backRes;
 
-    #ifdef SHOW_STAT2 
+    #ifdef SHOW_STAT2
     printf("\n pos = %6X, len = %3u  pos = %6u", nowPos32, len, dist);
-    #endif 
- 
+    #endif
+
     if (dist == MARK_LIT)
-    { 
-      Byte curByte; 
-      const Byte *data; 
+    {
+      Byte curByte;
+      const Byte *data;
       unsigned state;
- 
+
       RC_BIT_0(&p->rc, probs);
       p->rc.range = range;
-      data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; 
+      data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
       probs = LIT_PROBS(nowPos32, *(data - 1));
-      curByte = *data; 
+      curByte = *data;
       state = p->state;
       p->state = kLiteralNextStates[state];
       if (IsLitState(state))
-        LitEnc_Encode(&p->rc, probs, curByte); 
-      else 
+        LitEnc_Encode(&p->rc, probs, curByte);
+      else
         LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));
-    } 
-    else 
-    { 
+    }
+    else
+    {
       RC_BIT_1(&p->rc, probs);
       probs = &p->isRep[p->state];
       RC_BIT_PRE(&p->rc, probs)
       
       if (dist < LZMA_NUM_REPS)
-      { 
+      {
         RC_BIT_1(&p->rc, probs);
         probs = &p->isRepG0[p->state];
         RC_BIT_PRE(&p->rc, probs)
         if (dist == 0)
-        { 
+        {
           RC_BIT_0(&p->rc, probs);
           probs = &p->isRep0Long[p->state][posState];
           RC_BIT_PRE(&p->rc, probs)
@@ -2371,9 +2371,9 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
             RC_BIT_0_BASE(&p->rc, probs);
             p->state = kShortRepNextStates[p->state];
           }
-        } 
-        else 
-        { 
+        }
+        else
+        {
           RC_BIT_1(&p->rc, probs);
           probs = &p->isRepG1[p->state];
           RC_BIT_PRE(&p->rc, probs)
@@ -2382,8 +2382,8 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
             RC_BIT_0_BASE(&p->rc, probs);
             dist = p->reps[1];
           }
-          else 
-          { 
+          else
+          {
             RC_BIT_1(&p->rc, probs);
             probs = &p->isRepG2[p->state];
             RC_BIT_PRE(&p->rc, probs)
@@ -2396,31 +2396,31 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
             {
               RC_BIT_1_BASE(&p->rc, probs);
               dist = p->reps[3];
-              p->reps[3] = p->reps[2]; 
+              p->reps[3] = p->reps[2];
             }
-            p->reps[2] = p->reps[1]; 
-          } 
-          p->reps[1] = p->reps[0]; 
+            p->reps[2] = p->reps[1];
+          }
+          p->reps[1] = p->reps[0];
           p->reps[0] = dist;
-        } 
+        }
 
         RC_NORM(&p->rc)
 
         p->rc.range = range;
 
         if (len != 1)
-        { 
+        {
           LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
           --p->repLenEncCounter;
-          p->state = kRepNextStates[p->state]; 
-        } 
-      } 
-      else 
-      { 
+          p->state = kRepNextStates[p->state];
+        }
+      }
+      else
+      {
         unsigned posSlot;
         RC_BIT_0(&p->rc, probs);
         p->rc.range = range;
-        p->state = kMatchNextStates[p->state]; 
+        p->state = kMatchNextStates[p->state];
 
         LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
         // --p->lenEnc.counter;
@@ -2430,11 +2430,11 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
         p->reps[2] = p->reps[1];
         p->reps[1] = p->reps[0];
         p->reps[0] = dist + 1;
-         
+        
         p->matchPriceCount++;
         GetPosSlot(dist, posSlot);
         // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
-        { 
+        {
           UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
           range = p->rc.range;
           probs = p->posSlotEncoder[GetLenToPosState(len)];
@@ -2452,14 +2452,14 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
         if (dist >= kStartPosModelIndex)
         {
           unsigned footerBits = ((posSlot >> 1) - 1);
- 
+
           if (dist < kNumFullDistances)
           {
             unsigned base = ((2 | (posSlot & 1)) << footerBits);
             RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));
           }
-          else 
-          { 
+          else
+          {
             UInt32 pos2 = (dist | 0xF) << (32 - footerBits);
             range = p->rc.range;
             // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
@@ -2494,31 +2494,31 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
               p->rc.range = range;
               // p->alignPriceCount++;
             }
-          } 
-        } 
-      } 
-    } 
+          }
+        }
+      }
+    }
 
     nowPos32 += (UInt32)len;
-    p->additionalOffset -= len; 
+    p->additionalOffset -= len;
     
-    if (p->additionalOffset == 0) 
-    { 
-      UInt32 processed; 
+    if (p->additionalOffset == 0)
+    {
+      UInt32 processed;
 
-      if (!p->fastMode) 
-      { 
+      if (!p->fastMode)
+      {
         /*
         if (p->alignPriceCount >= 16) // kAlignTableSize
           FillAlignPrices(p);
         if (p->matchPriceCount >= 128)
-          FillDistancesPrices(p); 
+          FillDistancesPrices(p);
         if (p->lenEnc.counter <= 0)
           LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
         */
         if (p->matchPriceCount >= 64)
         {
-          FillAlignPrices(p); 
+          FillAlignPrices(p);
           // { int y; for (y = 0; y < 100; y++) {
           FillDistancesPrices(p);
           // }}
@@ -2529,131 +2529,131 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
           p->repLenEncCounter = REP_LEN_COUNT;
           LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
         }
-      } 
+      }
     
-      if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) 
-        break; 
-      processed = nowPos32 - startPos32; 
+      if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+        break;
+      processed = nowPos32 - startPos32;
       
       if (maxPackSize)
-      { 
+      {
         if (processed + kNumOpts + 300 >= maxUnpackSize
             || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)
-          break; 
-      } 
-      else if (processed >= (1 << 17)) 
-      { 
-        p->nowPos64 += nowPos32 - startPos32; 
-        return CheckErrors(p); 
-      } 
-    } 
-  } 
-
-  p->nowPos64 += nowPos32 - startPos32; 
-  return Flush(p, nowPos32); 
-} 
- 
+          break;
+      }
+      else if (processed >= (1 << 17))
+      {
+        p->nowPos64 += nowPos32 - startPos32;
+        return CheckErrors(p);
+      }
+    }
+  }
 
+  p->nowPos64 += nowPos32 - startPos32;
+  return Flush(p, nowPos32);
+}
 
-#define kBigHashDicLimit ((UInt32)1 << 24) 
- 
-static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  UInt32 beforeSize = kNumOpts; 
-  if (!RangeEnc_Alloc(&p->rc, alloc)) 
-    return SZ_ERROR_MEM; 
- 
-  #ifndef _7ZIP_ST 
-  p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); 
-  #endif 
- 
-  { 
-    unsigned lclp = p->lc + p->lp; 
-    if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) 
-    { 
-      LzmaEnc_FreeLits(p, alloc); 
-      p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); 
-      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); 
-      if (!p->litProbs || !p->saveState.litProbs) 
-      { 
-        LzmaEnc_FreeLits(p, alloc); 
-        return SZ_ERROR_MEM; 
-      } 
-      p->lclp = lclp; 
-    } 
-  } 
- 
-  p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); 
- 
-  if (beforeSize + p->dictSize < keepWindowSize) 
-    beforeSize = keepWindowSize - p->dictSize; 
- 
-  #ifndef _7ZIP_ST 
-  if (p->mtMode) 
-  { 
+
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  UInt32 beforeSize = kNumOpts;
+  if (!RangeEnc_Alloc(&p->rc, alloc))
+    return SZ_ERROR_MEM;
+
+  #ifndef _7ZIP_ST
+  p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
+  #endif
+
+  {
+    unsigned lclp = p->lc + p->lp;
+    if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
+    {
+      LzmaEnc_FreeLits(p, alloc);
+      p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      if (!p->litProbs || !p->saveState.litProbs)
+      {
+        LzmaEnc_FreeLits(p, alloc);
+        return SZ_ERROR_MEM;
+      }
+      p->lclp = lclp;
+    }
+  }
+
+  p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+
+  if (beforeSize + p->dictSize < keepWindowSize)
+    beforeSize = keepWindowSize - p->dictSize;
+
+  #ifndef _7ZIP_ST
+  if (p->mtMode)
+  {
     RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
         LZMA_MATCH_LEN_MAX
         + 1  /* 18.04 */
         , allocBig));
-    p->matchFinderObj = &p->matchFinderMt; 
-    p->matchFinderBase.bigHash = (Byte)( 
-        (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); 
-    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); 
-  } 
-  else 
-  #endif 
-  { 
-    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) 
-      return SZ_ERROR_MEM; 
-    p->matchFinderObj = &p->matchFinderBase; 
-    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); 
-  } 
-   
-  return SZ_OK; 
-} 
- 
-void LzmaEnc_Init(CLzmaEnc *p) 
-{ 
+    p->matchFinderObj = &p->matchFinderMt;
+    p->matchFinderBase.bigHash = (Byte)(
+        (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
+    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+  }
+  else
+  #endif
+  {
+    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+      return SZ_ERROR_MEM;
+    p->matchFinderObj = &p->matchFinderBase;
+    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+  }
+  
+  return SZ_OK;
+}
+
+void LzmaEnc_Init(CLzmaEnc *p)
+{
   unsigned i;
-  p->state = 0; 
+  p->state = 0;
   p->reps[0] =
   p->reps[1] =
   p->reps[2] =
   p->reps[3] = 1;
- 
-  RangeEnc_Init(&p->rc); 
- 
+
+  RangeEnc_Init(&p->rc);
+
   for (i = 0; i < (1 << kNumAlignBits); i++)
     p->posAlignEncoder[i] = kProbInitValue;
- 
-  for (i = 0; i < kNumStates; i++) 
-  { 
+
+  for (i = 0; i < kNumStates; i++)
+  {
     unsigned j;
-    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) 
-    { 
-      p->isMatch[i][j] = kProbInitValue; 
-      p->isRep0Long[i][j] = kProbInitValue; 
-    } 
-    p->isRep[i] = kProbInitValue; 
-    p->isRepG0[i] = kProbInitValue; 
-    p->isRepG1[i] = kProbInitValue; 
-    p->isRepG2[i] = kProbInitValue; 
-  } 
- 
-  { 
-    for (i = 0; i < kNumLenToPosStates; i++) 
-    { 
-      CLzmaProb *probs = p->posSlotEncoder[i]; 
+    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+    {
+      p->isMatch[i][j] = kProbInitValue;
+      p->isRep0Long[i][j] = kProbInitValue;
+    }
+    p->isRep[i] = kProbInitValue;
+    p->isRepG0[i] = kProbInitValue;
+    p->isRepG1[i] = kProbInitValue;
+    p->isRepG2[i] = kProbInitValue;
+  }
+
+  {
+    for (i = 0; i < kNumLenToPosStates; i++)
+    {
+      CLzmaProb *probs = p->posSlotEncoder[i];
       unsigned j;
-      for (j = 0; j < (1 << kNumPosSlotBits); j++) 
-        probs[j] = kProbInitValue; 
-    } 
-  } 
-  { 
+      for (j = 0; j < (1 << kNumPosSlotBits); j++)
+        probs[j] = kProbInitValue;
+    }
+  }
+  {
     for (i = 0; i < kNumFullDistances; i++)
-      p->posEncoders[i] = kProbInitValue; 
-  } 
- 
+      p->posEncoders[i] = kProbInitValue;
+  }
+
   {
     UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
     UInt32 k;
@@ -2661,8 +2661,8 @@ void LzmaEnc_Init(CLzmaEnc *p)
     for (k = 0; k < num; k++)
       probs[k] = kProbInitValue;
   }
- 
- 
+
+
   LenEnc_Init(&p->lenProbs);
   LenEnc_Init(&p->repLenProbs);
 
@@ -2674,307 +2674,307 @@ void LzmaEnc_Init(CLzmaEnc *p)
       p->opt[i].price = kInfinityPrice;
   }
 
-  p->additionalOffset = 0; 
- 
-  p->pbMask = (1 << p->pb) - 1; 
+  p->additionalOffset = 0;
+
+  p->pbMask = (1 << p->pb) - 1;
   p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
-} 
- 
+}
 
-void LzmaEnc_InitPrices(CLzmaEnc *p) 
-{ 
-  if (!p->fastMode) 
-  { 
-    FillDistancesPrices(p); 
-    FillAlignPrices(p); 
-  } 
- 
-  p->lenEnc.tableSize = 
-  p->repLenEnc.tableSize = 
-      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; 
+
+void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+  if (!p->fastMode)
+  {
+    FillDistancesPrices(p);
+    FillAlignPrices(p);
+  }
+
+  p->lenEnc.tableSize =
+  p->repLenEnc.tableSize =
+      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
 
   p->repLenEncCounter = REP_LEN_COUNT;
 
   LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
   LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
-} 
- 
-static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
+}
+
+static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
   unsigned i;
   for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)
-    if (p->dictSize <= ((UInt32)1 << i)) 
-      break; 
-  p->distTableSize = i * 2; 
- 
-  p->finished = False; 
-  p->result = SZ_OK; 
-  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); 
-  LzmaEnc_Init(p); 
-  LzmaEnc_InitPrices(p); 
-  p->nowPos64 = 0; 
-  return SZ_OK; 
-} 
- 
+    if (p->dictSize <= ((UInt32)1 << i))
+      break;
+  p->distTableSize = i * 2;
+
+  p->finished = False;
+  p->result = SZ_OK;
+  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
+  LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+  p->nowPos64 = 0;
+  return SZ_OK;
+}
+
 SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,
-    ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  p->matchFinderBase.stream = inStream; 
-  p->needInit = 1; 
-  p->rc.outStream = outStream; 
-  return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); 
-} 
- 
-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, 
-    ISeqInStream *inStream, UInt32 keepWindowSize, 
-    ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  p->matchFinderBase.stream = inStream; 
-  p->needInit = 1; 
-  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); 
-} 
- 
-static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) 
-{ 
-  p->matchFinderBase.directInput = 1; 
-  p->matchFinderBase.bufferBase = (Byte *)src; 
-  p->matchFinderBase.directInputRem = srcLen; 
-} 
- 
-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, 
-    UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  LzmaEnc_SetInputBuf(p, src, srcLen); 
-  p->needInit = 1; 
- 
-  LzmaEnc_SetDataSize(pp, srcLen); 
-  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); 
-} 
- 
-void LzmaEnc_Finish(CLzmaEncHandle pp) 
-{ 
-  #ifndef _7ZIP_ST 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  if (p->mtMode) 
-    MatchFinderMt_ReleaseStream(&p->matchFinderMt); 
-  #else 
-  UNUSED_VAR(pp); 
-  #endif 
-} 
- 
- 
-typedef struct 
-{ 
-  ISeqOutStream vt; 
-  Byte *data; 
-  SizeT rem; 
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.stream = inStream;
+  p->needInit = 1;
+  p->rc.outStream = outStream;
+  return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+}
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
+    ISeqInStream *inStream, UInt32 keepWindowSize,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.stream = inStream;
+  p->needInit = 1;
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
+{
+  p->matchFinderBase.directInput = 1;
+  p->matchFinderBase.bufferBase = (Byte *)src;
+  p->matchFinderBase.directInputRem = srcLen;
+}
+
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+    UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  LzmaEnc_SetInputBuf(p, src, srcLen);
+  p->needInit = 1;
+
+  LzmaEnc_SetDataSize(pp, srcLen);
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+void LzmaEnc_Finish(CLzmaEncHandle pp)
+{
+  #ifndef _7ZIP_ST
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  if (p->mtMode)
+    MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+  #else
+  UNUSED_VAR(pp);
+  #endif
+}
+
+
+typedef struct
+{
+  ISeqOutStream vt;
+  Byte *data;
+  SizeT rem;
   BoolInt overflow;
-} CLzmaEnc_SeqOutStreamBuf; 
- 
-static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) 
-{ 
-  CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); 
-  if (p->rem < size) 
-  { 
-    size = p->rem; 
-    p->overflow = True; 
-  } 
-  memcpy(p->data, data, size); 
-  p->rem -= size; 
-  p->data += size; 
-  return size; 
-} 
- 
- 
-UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) 
-{ 
-  const CLzmaEnc *p = (CLzmaEnc *)pp; 
-  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); 
-} 
- 
- 
-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) 
-{ 
-  const CLzmaEnc *p = (CLzmaEnc *)pp; 
-  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; 
-} 
- 
- 
+} CLzmaEnc_SeqOutStreamBuf;
+
+static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)
+{
+  CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);
+  if (p->rem < size)
+  {
+    size = p->rem;
+    p->overflow = True;
+  }
+  memcpy(p->data, data, size);
+  p->rem -= size;
+  p->data += size;
+  return size;
+}
+
+
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+
 SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
-    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  UInt64 nowPos64; 
-  SRes res; 
-  CLzmaEnc_SeqOutStreamBuf outStream; 
- 
-  outStream.vt.Write = SeqOutStreamBuf_Write; 
-  outStream.data = dest; 
-  outStream.rem = *destLen; 
-  outStream.overflow = False; 
- 
-  p->writeEndMark = False; 
-  p->finished = False; 
-  p->result = SZ_OK; 
- 
-  if (reInit) 
-    LzmaEnc_Init(p); 
-  LzmaEnc_InitPrices(p); 
+    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  UInt64 nowPos64;
+  SRes res;
+  CLzmaEnc_SeqOutStreamBuf outStream;
+
+  outStream.vt.Write = SeqOutStreamBuf_Write;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = False;
+  p->finished = False;
+  p->result = SZ_OK;
+
+  if (reInit)
+    LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+
+  nowPos64 = p->nowPos64;
+  RangeEnc_Init(&p->rc);
+  p->rc.outStream = &outStream.vt;
 
-  nowPos64 = p->nowPos64; 
-  RangeEnc_Init(&p->rc); 
-  p->rc.outStream = &outStream.vt; 
- 
   if (desiredPackSize == 0)
     return SZ_ERROR_OUTPUT_EOF;
 
   res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
-   
-  *unpackSize = (UInt32)(p->nowPos64 - nowPos64); 
-  *destLen -= outStream.rem; 
-  if (outStream.overflow) 
-    return SZ_ERROR_OUTPUT_EOF; 
- 
-  return res; 
-} 
- 
- 
-static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) 
-{ 
-  SRes res = SZ_OK; 
- 
-  #ifndef _7ZIP_ST 
-  Byte allocaDummy[0x300]; 
-  allocaDummy[0] = 0; 
-  allocaDummy[1] = allocaDummy[0]; 
-  #endif 
- 
-  for (;;) 
-  { 
+  
+  *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+
+  return res;
+}
+
+
+static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
+{
+  SRes res = SZ_OK;
+
+  #ifndef _7ZIP_ST
+  Byte allocaDummy[0x300];
+  allocaDummy[0] = 0;
+  allocaDummy[1] = allocaDummy[0];
+  #endif
+
+  for (;;)
+  {
     res = LzmaEnc_CodeOneBlock(p, 0, 0);
-    if (res != SZ_OK || p->finished) 
-      break; 
-    if (progress) 
-    { 
-      res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); 
-      if (res != SZ_OK) 
-      { 
-        res = SZ_ERROR_PROGRESS; 
-        break; 
-      } 
-    } 
-  } 
-   
-  LzmaEnc_Finish(p); 
- 
-  /* 
-  if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) 
-    res = SZ_ERROR_FAIL; 
-  } 
-  */ 
- 
-  return res; 
-} 
- 
- 
-SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, 
-    ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); 
-  return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); 
-} 
- 
- 
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
-  unsigned i; 
-  UInt32 dictSize = p->dictSize; 
-  if (*size < LZMA_PROPS_SIZE) 
-    return SZ_ERROR_PARAM; 
-  *size = LZMA_PROPS_SIZE; 
-  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); 
- 
-  if (dictSize >= ((UInt32)1 << 22)) 
-  { 
-    UInt32 kDictMask = ((UInt32)1 << 20) - 1; 
-    if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) 
-      dictSize = (dictSize + kDictMask) & ~kDictMask; 
-  } 
-  else for (i = 11; i <= 30; i++) 
-  { 
-    if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } 
-    if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } 
-  } 
- 
-  for (i = 0; i < 4; i++) 
-    props[1 + i] = (Byte)(dictSize >> (8 * i)); 
-  return SZ_OK; 
-} 
- 
- 
-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) 
-{ 
-  return ((CLzmaEnc *)pp)->writeEndMark; 
-} 
- 
- 
-SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, 
-    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  SRes res; 
-  CLzmaEnc *p = (CLzmaEnc *)pp; 
- 
-  CLzmaEnc_SeqOutStreamBuf outStream; 
- 
-  outStream.vt.Write = SeqOutStreamBuf_Write; 
-  outStream.data = dest; 
-  outStream.rem = *destLen; 
-  outStream.overflow = False; 
- 
-  p->writeEndMark = writeEndMark; 
-  p->rc.outStream = &outStream.vt; 
- 
-  res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); 
-   
-  if (res == SZ_OK) 
-  { 
-    res = LzmaEnc_Encode2(p, progress); 
-    if (res == SZ_OK && p->nowPos64 != srcLen) 
-      res = SZ_ERROR_FAIL; 
-  } 
- 
-  *destLen -= outStream.rem; 
-  if (outStream.overflow) 
-    return SZ_ERROR_OUTPUT_EOF; 
-  return res; 
-} 
- 
- 
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, 
-    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, 
-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) 
-{ 
-  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); 
-  SRes res; 
-  if (!p) 
-    return SZ_ERROR_MEM; 
- 
-  res = LzmaEnc_SetProps(p, props); 
-  if (res == SZ_OK) 
-  { 
-    res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); 
-    if (res == SZ_OK) 
-      res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, 
-          writeEndMark, progress, alloc, allocBig); 
-  } 
- 
-  LzmaEnc_Destroy(p, alloc, allocBig); 
-  return res; 
-} 
+    if (res != SZ_OK || p->finished)
+      break;
+    if (progress)
+    {
+      res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
+      if (res != SZ_OK)
+      {
+        res = SZ_ERROR_PROGRESS;
+        break;
+      }
+    }
+  }
+  
+  LzmaEnc_Finish(p);
+
+  /*
+  if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
+    res = SZ_ERROR_FAIL;
+  }
+  */
+
+  return res;
+}
+
+
+SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));
+  return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);
+}
+
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  unsigned i;
+  UInt32 dictSize = p->dictSize;
+  if (*size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_PARAM;
+  *size = LZMA_PROPS_SIZE;
+  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+  if (dictSize >= ((UInt32)1 << 22))
+  {
+    UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+    if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
+      dictSize = (dictSize + kDictMask) & ~kDictMask;
+  }
+  else for (i = 11; i <= 30; i++)
+  {
+    if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
+    if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
+  }
+
+  for (i = 0; i < 4; i++)
+    props[1 + i] = (Byte)(dictSize >> (8 * i));
+  return SZ_OK;
+}
+
+
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
+{
+  return ((CLzmaEnc *)pp)->writeEndMark;
+}
+
+
+SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  SRes res;
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+
+  CLzmaEnc_SeqOutStreamBuf outStream;
+
+  outStream.vt.Write = SeqOutStreamBuf_Write;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = writeEndMark;
+  p->rc.outStream = &outStream.vt;
+
+  res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);
+  
+  if (res == SZ_OK)
+  {
+    res = LzmaEnc_Encode2(p, progress);
+    if (res == SZ_OK && p->nowPos64 != srcLen)
+      res = SZ_ERROR_FAIL;
+  }
+
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+  return res;
+}
+
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
+  SRes res;
+  if (!p)
+    return SZ_ERROR_MEM;
+
+  res = LzmaEnc_SetProps(p, props);
+  if (res == SZ_OK)
+  {
+    res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
+    if (res == SZ_OK)
+      res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
+          writeEndMark, progress, alloc, allocBig);
+  }
+
+  LzmaEnc_Destroy(p, alloc, allocBig);
+  return res;
+}
 
 BoolInt LzmaEnc_IsFinished(CLzmaEncHandle pp)
 {
diff --git a/contrib/libs/lzmasdk/LzmaEnc.h b/contrib/libs/lzmasdk/LzmaEnc.h
index 55c257d54d..37a0906c7e 100644
--- a/contrib/libs/lzmasdk/LzmaEnc.h
+++ b/contrib/libs/lzmasdk/LzmaEnc.h
@@ -1,78 +1,78 @@
-/*  LzmaEnc.h -- LZMA Encoder 
-2017-07-27 : Igor Pavlov : Public domain */ 
- 
-#ifndef __LZMA_ENC_H 
-#define __LZMA_ENC_H 
- 
-#include "7zTypes.h" 
- 
-EXTERN_C_BEGIN 
- 
-#define LZMA_PROPS_SIZE 5 
- 
-typedef struct _CLzmaEncProps 
-{ 
-  int level;       /* 0 <= level <= 9 */ 
-  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version 
-                      (1 << 12) <= dictSize <= (3 << 29) for 64-bit version 
-                      default = (1 << 24) */ 
-  int lc;          /* 0 <= lc <= 8, default = 3 */ 
-  int lp;          /* 0 <= lp <= 4, default = 0 */ 
-  int pb;          /* 0 <= pb <= 4, default = 2 */ 
-  int algo;        /* 0 - fast, 1 - normal, default = 1 */ 
-  int fb;          /* 5 <= fb <= 273, default = 32 */ 
-  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ 
-  int numHashBytes; /* 2, 3 or 4, default = 4 */ 
-  UInt32 mc;       /* 1 <= mc <= (1 << 30), default = 32 */ 
-  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ 
-  int numThreads;  /* 1 or 2, default = 2 */ 
- 
-  UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. 
-                        Encoder uses this value to reduce dictionary size */ 
-} CLzmaEncProps; 
- 
-void LzmaEncProps_Init(CLzmaEncProps *p); 
-void LzmaEncProps_Normalize(CLzmaEncProps *p); 
-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); 
- 
- 
-/* ---------- CLzmaEncHandle Interface ---------- */ 
- 
-/* LzmaEnc* functions can return the following exit codes: 
-SRes: 
-  SZ_OK           - OK 
-  SZ_ERROR_MEM    - Memory allocation error 
-  SZ_ERROR_PARAM  - Incorrect paramater in props 
-  SZ_ERROR_WRITE  - ISeqOutStream write callback error 
-  SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output 
-  SZ_ERROR_PROGRESS - some break from progress callback 
-  SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) 
-*/ 
- 
-typedef void * CLzmaEncHandle; 
- 
-CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); 
-void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); 
- 
-SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); 
-void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); 
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); 
-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); 
- 
-SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, 
-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); 
-SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, 
-    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); 
- 
- 
-/* ---------- One Call Interface ---------- */ 
- 
-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, 
-    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, 
-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); 
- 
-EXTERN_C_END 
- 
+/*  LzmaEnc.h -- LZMA Encoder
+2017-07-27 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_ENC_H
+#define __LZMA_ENC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaEncProps
+{
+  int level;       /* 0 <= level <= 9 */
+  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+                      (1 << 12) <= dictSize <= (3 << 29) for 64-bit version
+                      default = (1 << 24) */
+  int lc;          /* 0 <= lc <= 8, default = 3 */
+  int lp;          /* 0 <= lp <= 4, default = 0 */
+  int pb;          /* 0 <= pb <= 4, default = 2 */
+  int algo;        /* 0 - fast, 1 - normal, default = 1 */
+  int fb;          /* 5 <= fb <= 273, default = 32 */
+  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+  int numHashBytes; /* 2, 3 or 4, default = 4 */
+  UInt32 mc;       /* 1 <= mc <= (1 << 30), default = 32 */
+  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+  int numThreads;  /* 1 or 2, default = 2 */
+
+  UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
+                        Encoder uses this value to reduce dictionary size */
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc* functions can return the following exit codes:
+SRes:
+  SZ_OK           - OK
+  SZ_ERROR_MEM    - Memory allocation error
+  SZ_ERROR_PARAM  - Incorrect paramater in props
+  SZ_ERROR_WRITE  - ISeqOutStream write callback error
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
+  SZ_ERROR_PROGRESS - some break from progress callback
+  SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
+*/
+
+typedef void * CLzmaEncHandle;
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+
+/* ---------- One Call Interface ---------- */
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+EXTERN_C_END
+
 /* ---------- Streaming Interface ---------- */
 
 SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ISzAllocPtr alloc, ISzAllocPtr allocBig);
@@ -80,4 +80,4 @@ SRes LzmaEnc_CodeOneBlock(CLzmaEncHandle pp, UInt32 maxPackSize, UInt32 maxUnpac
 BoolInt LzmaEnc_IsFinished(CLzmaEncHandle pp);
 void LzmaEnc_Finish(CLzmaEncHandle pp);
 
-#endif 
+#endif
diff --git a/contrib/libs/lzmasdk/LzmaLib.c b/contrib/libs/lzmasdk/LzmaLib.c
index 9403aedee6..706e9e58cd 100644
--- a/contrib/libs/lzmasdk/LzmaLib.c
+++ b/contrib/libs/lzmasdk/LzmaLib.c
@@ -1,40 +1,40 @@
-/* LzmaLib.c -- LZMA library wrapper 
-2015-06-13 : Igor Pavlov : Public domain */ 
- 
-#include "Alloc.h" 
-#include "LzmaDec.h" 
-#include "LzmaEnc.h" 
-#include "LzmaLib.h" 
- 
-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, 
-  unsigned char *outProps, size_t *outPropsSize, 
-  int level, /* 0 <= level <= 9, default = 5 */ 
-  unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ 
-  int lc, /* 0 <= lc <= 8, default = 3  */ 
-  int lp, /* 0 <= lp <= 4, default = 0  */ 
-  int pb, /* 0 <= pb <= 4, default = 2  */ 
-  int fb,  /* 5 <= fb <= 273, default = 32 */ 
-  int numThreads /* 1 or 2, default = 2 */ 
-) 
-{ 
-  CLzmaEncProps props; 
-  LzmaEncProps_Init(&props); 
-  props.level = level; 
-  props.dictSize = dictSize; 
-  props.lc = lc; 
-  props.lp = lp; 
-  props.pb = pb; 
-  props.fb = fb; 
-  props.numThreads = numThreads; 
- 
-  return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, 
-      NULL, &g_Alloc, &g_Alloc); 
-} 
- 
- 
-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, 
-  const unsigned char *props, size_t propsSize) 
-{ 
-  ELzmaStatus status; 
-  return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); 
-} 
+/* LzmaLib.c -- LZMA library wrapper
+2015-06-13 : Igor Pavlov : Public domain */
+
+#include "Alloc.h"
+#include "LzmaDec.h"
+#include "LzmaEnc.h"
+#include "LzmaLib.h"
+
+MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+  unsigned char *outProps, size_t *outPropsSize,
+  int level, /* 0 <= level <= 9, default = 5 */
+  unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
+  int lc, /* 0 <= lc <= 8, default = 3  */
+  int lp, /* 0 <= lp <= 4, default = 0  */
+  int pb, /* 0 <= pb <= 4, default = 2  */
+  int fb,  /* 5 <= fb <= 273, default = 32 */
+  int numThreads /* 1 or 2, default = 2 */
+)
+{
+  CLzmaEncProps props;
+  LzmaEncProps_Init(&props);
+  props.level = level;
+  props.dictSize = dictSize;
+  props.lc = lc;
+  props.lp = lp;
+  props.pb = pb;
+  props.fb = fb;
+  props.numThreads = numThreads;
+
+  return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
+      NULL, &g_Alloc, &g_Alloc);
+}
+
+
+MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
+  const unsigned char *props, size_t propsSize)
+{
+  ELzmaStatus status;
+  return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
+}
diff --git a/contrib/libs/lzmasdk/LzmaLib.h b/contrib/libs/lzmasdk/LzmaLib.h
index d4afea8393..88fa87d350 100644
--- a/contrib/libs/lzmasdk/LzmaLib.h
+++ b/contrib/libs/lzmasdk/LzmaLib.h
@@ -1,131 +1,131 @@
-/* LzmaLib.h -- LZMA library interface 
-2013-01-18 : Igor Pavlov : Public domain */ 
- 
-#ifndef __LZMA_LIB_H 
-#define __LZMA_LIB_H 
- 
-#include "7zTypes.h" 
- 
-EXTERN_C_BEGIN 
- 
-#define MY_STDAPI int MY_STD_CALL 
- 
-#define LZMA_PROPS_SIZE 5 
- 
-/* 
-RAM requirements for LZMA: 
-  for compression:   (dictSize * 11.5 + 6 MB) + state_size 
-  for decompression: dictSize + state_size 
-    state_size = (4 + (1.5 << (lc + lp))) KB 
-    by default (lc=3, lp=0), state_size = 16 KB. 
- 
-LZMA properties (5 bytes) format 
-    Offset Size  Description 
-      0     1    lc, lp and pb in encoded form. 
-      1     4    dictSize (little endian). 
-*/ 
- 
-/* 
-LzmaCompress 
------------- 
- 
-outPropsSize - 
-     In:  the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. 
-     Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. 
- 
-  LZMA Encoder will use defult values for any parameter, if it is 
-  -1  for any from: level, loc, lp, pb, fb, numThreads 
-   0  for dictSize 
-   
-level - compression level: 0 <= level <= 9; 
- 
-  level dictSize algo  fb 
-    0:    16 KB   0    32 
-    1:    64 KB   0    32 
-    2:   256 KB   0    32 
-    3:     1 MB   0    32 
-    4:     4 MB   0    32 
-    5:    16 MB   1    32 
-    6:    32 MB   1    32 
-    7+:   64 MB   1    64 
+/* LzmaLib.h -- LZMA library interface
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_LIB_H
+#define __LZMA_LIB_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define MY_STDAPI int MY_STD_CALL
+
+#define LZMA_PROPS_SIZE 5
+
+/*
+RAM requirements for LZMA:
+  for compression:   (dictSize * 11.5 + 6 MB) + state_size
+  for decompression: dictSize + state_size
+    state_size = (4 + (1.5 << (lc + lp))) KB
+    by default (lc=3, lp=0), state_size = 16 KB.
+
+LZMA properties (5 bytes) format
+    Offset Size  Description
+      0     1    lc, lp and pb in encoded form.
+      1     4    dictSize (little endian).
+*/
+
+/*
+LzmaCompress
+------------
+
+outPropsSize -
+     In:  the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
+     Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
+
+  LZMA Encoder will use defult values for any parameter, if it is
+  -1  for any from: level, loc, lp, pb, fb, numThreads
+   0  for dictSize
   
-  The default value for "level" is 5. 
- 
-  algo = 0 means fast method 
-  algo = 1 means normal method 
- 
-dictSize - The dictionary size in bytes. The maximum value is 
-        128 MB = (1 << 27) bytes for 32-bit version 
-          1 GB = (1 << 30) bytes for 64-bit version 
-     The default value is 16 MB = (1 << 24) bytes. 
-     It's recommended to use the dictionary that is larger than 4 KB and 
-     that can be calculated as (1 << N) or (3 << N) sizes. 
- 
-lc - The number of literal context bits (high bits of previous literal). 
-     It can be in the range from 0 to 8. The default value is 3. 
-     Sometimes lc=4 gives the gain for big files. 
- 
-lp - The number of literal pos bits (low bits of current position for literals). 
-     It can be in the range from 0 to 4. The default value is 0. 
-     The lp switch is intended for periodical data when the period is equal to 2^lp. 
-     For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's 
-     better to set lc=0, if you change lp switch. 
- 
-pb - The number of pos bits (low bits of current position). 
-     It can be in the range from 0 to 4. The default value is 2. 
-     The pb switch is intended for periodical data when the period is equal 2^pb. 
- 
-fb - Word size (the number of fast bytes). 
-     It can be in the range from 5 to 273. The default value is 32. 
-     Usually, a big number gives a little bit better compression ratio and 
-     slower compression process. 
- 
-numThreads - The number of thereads. 1 or 2. The default value is 2. 
-     Fast mode (algo = 0) can use only 1 thread. 
- 
-Out: 
-  destLen  - processed output size 
-Returns: 
-  SZ_OK               - OK 
-  SZ_ERROR_MEM        - Memory allocation error 
-  SZ_ERROR_PARAM      - Incorrect paramater 
-  SZ_ERROR_OUTPUT_EOF - output buffer overflow 
-  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version) 
-*/ 
- 
-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, 
-  unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ 
-  int level,      /* 0 <= level <= 9, default = 5 */ 
-  unsigned dictSize,  /* default = (1 << 24) */ 
-  int lc,        /* 0 <= lc <= 8, default = 3  */ 
-  int lp,        /* 0 <= lp <= 4, default = 0  */ 
-  int pb,        /* 0 <= pb <= 4, default = 2  */ 
-  int fb,        /* 5 <= fb <= 273, default = 32 */ 
-  int numThreads /* 1 or 2, default = 2 */ 
-  ); 
- 
-/* 
-LzmaUncompress 
--------------- 
-In: 
-  dest     - output data 
-  destLen  - output data size 
-  src      - input data 
-  srcLen   - input data size 
-Out: 
-  destLen  - processed output size 
-  srcLen   - processed input size 
-Returns: 
-  SZ_OK                - OK 
-  SZ_ERROR_DATA        - Data error 
-  SZ_ERROR_MEM         - Memory allocation arror 
-  SZ_ERROR_UNSUPPORTED - Unsupported properties 
-  SZ_ERROR_INPUT_EOF   - it needs more bytes in input buffer (src) 
-*/ 
- 
-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, 
-  const unsigned char *props, size_t propsSize); 
- 
-EXTERN_C_END 
- 
-#endif 
+level - compression level: 0 <= level <= 9;
+
+  level dictSize algo  fb
+    0:    16 KB   0    32
+    1:    64 KB   0    32
+    2:   256 KB   0    32
+    3:     1 MB   0    32
+    4:     4 MB   0    32
+    5:    16 MB   1    32
+    6:    32 MB   1    32
+    7+:   64 MB   1    64
+ 
+  The default value for "level" is 5.
+
+  algo = 0 means fast method
+  algo = 1 means normal method
+
+dictSize - The dictionary size in bytes. The maximum value is
+        128 MB = (1 << 27) bytes for 32-bit version
+          1 GB = (1 << 30) bytes for 64-bit version
+     The default value is 16 MB = (1 << 24) bytes.
+     It's recommended to use the dictionary that is larger than 4 KB and
+     that can be calculated as (1 << N) or (3 << N) sizes.
+
+lc - The number of literal context bits (high bits of previous literal).
+     It can be in the range from 0 to 8. The default value is 3.
+     Sometimes lc=4 gives the gain for big files.
+
+lp - The number of literal pos bits (low bits of current position for literals).
+     It can be in the range from 0 to 4. The default value is 0.
+     The lp switch is intended for periodical data when the period is equal to 2^lp.
+     For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
+     better to set lc=0, if you change lp switch.
+
+pb - The number of pos bits (low bits of current position).
+     It can be in the range from 0 to 4. The default value is 2.
+     The pb switch is intended for periodical data when the period is equal 2^pb.
+
+fb - Word size (the number of fast bytes).
+     It can be in the range from 5 to 273. The default value is 32.
+     Usually, a big number gives a little bit better compression ratio and
+     slower compression process.
+
+numThreads - The number of thereads. 1 or 2. The default value is 2.
+     Fast mode (algo = 0) can use only 1 thread.
+
+Out:
+  destLen  - processed output size
+Returns:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+*/
+
+MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+  unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
+  int level,      /* 0 <= level <= 9, default = 5 */
+  unsigned dictSize,  /* default = (1 << 24) */
+  int lc,        /* 0 <= lc <= 8, default = 3  */
+  int lp,        /* 0 <= lp <= 4, default = 0  */
+  int pb,        /* 0 <= pb <= 4, default = 2  */
+  int fb,        /* 5 <= fb <= 273, default = 32 */
+  int numThreads /* 1 or 2, default = 2 */
+  );
+
+/*
+LzmaUncompress
+--------------
+In:
+  dest     - output data
+  destLen  - output data size
+  src      - input data
+  srcLen   - input data size
+Out:
+  destLen  - processed output size
+  srcLen   - processed input size
+Returns:
+  SZ_OK                - OK
+  SZ_ERROR_DATA        - Data error
+  SZ_ERROR_MEM         - Memory allocation arror
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF   - it needs more bytes in input buffer (src)
+*/
+
+MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
+  const unsigned char *props, size_t propsSize);
+
+EXTERN_C_END
+
+#endif
diff --git a/contrib/libs/lzmasdk/Precomp.h b/contrib/libs/lzmasdk/Precomp.h
index ab1ee910a5..e8ff8b40e8 100644
--- a/contrib/libs/lzmasdk/Precomp.h
+++ b/contrib/libs/lzmasdk/Precomp.h
@@ -1,10 +1,10 @@
-/* Precomp.h -- StdAfx 
-2013-11-12 : Igor Pavlov : Public domain */ 
- 
-#ifndef __7Z_PRECOMP_H 
-#define __7Z_PRECOMP_H 
- 
-#include "Compiler.h" 
-/* #include "7zTypes.h" */ 
- 
-#endif 
+/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/contrib/libs/lzmasdk/ya.make b/contrib/libs/lzmasdk/ya.make
index 68f6605e7d..db0a55788d 100644
--- a/contrib/libs/lzmasdk/ya.make
+++ b/contrib/libs/lzmasdk/ya.make
@@ -1,26 +1,26 @@
-LIBRARY() 
+LIBRARY()
 
 LICENSE(Public-Domain)
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
+
 # https://www.7-zip.org/sdk.html
 VERSION(19.00)
 
-CFLAGS(-D_7ZIP_ST=1) 
- 
+CFLAGS(-D_7ZIP_ST=1)
+
 NO_UTIL()
- 
-SRCS( 
-    7zStream.c 
+
+SRCS(
+    7zStream.c
     Aes.c
     AesOpt.c
-    Alloc.c 
+    Alloc.c
     Bra.c
     Bra86.c
     BraIA64.c
@@ -28,10 +28,10 @@ SRCS(
     LzFind.c
     Lzma2Dec.c
     Lzma2Enc.c
-    LzmaDec.c 
-    LzmaEnc.c 
-    LzmaLib.c 
+    LzmaDec.c
+    LzmaEnc.c
+    LzmaLib.c
     Sha256.c
-) 
- 
-END() 
+)
+
+END()
diff --git a/contrib/libs/nayuki_md5/md5-fast-x8664.S b/contrib/libs/nayuki_md5/md5-fast-x8664.S
index ac8fa4cdaa..a48f499385 100644
--- a/contrib/libs/nayuki_md5/md5-fast-x8664.S
+++ b/contrib/libs/nayuki_md5/md5-fast-x8664.S
@@ -1,171 +1,171 @@
-/*  
- * MD5 hash in x86-64 assembly 
- *  
- * Copyright (c) 2016 Project Nayuki. (MIT License) 
- * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly 
- *  
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
- * the Software, and to permit persons to whom the Software is furnished to do so, 
- * subject to the following conditions: 
- * - The above copyright notice and this permission notice shall be included in 
- *   all copies or substantial portions of the Software. 
- * - The Software is provided "as is", without warranty of any kind, express or 
- *   implied, including but not limited to the warranties of merchantability, 
- *   fitness for a particular purpose and noninfringement. In no event shall the 
- *   authors or copyright holders be liable for any claim, damages or other 
- *   liability, whether in an action of contract, tort or otherwise, arising from, 
- *   out of or in connection with the Software or the use or other dealings in the 
- *   Software. 
- */ 
- 
- 
-/* void md5_compress(uint32_t state[4], const uint8_t block[64]) */ 
-.globl md5_compress 
-md5_compress: 
-	/*  
-	 * Storage usage: 
-	 *   Bytes  Location  Description 
-	 *       4  eax       MD5 state variable A 
-	 *       4  ebx       MD5 state variable B 
-	 *       4  ecx       MD5 state variable C 
-	 *       4  edx       MD5 state variable D 
-	 *       4  esi       Temporary for calculation per round 
-	 *       4  edi       Temporary for calculation per round 
-	 *       8  rbp       Base address of block array argument (read-only) 
-	 *       8  r8        Base address of state array argument (read-only) 
-	 *      16  xmm0      Caller's value of rbx (only low 64 bits are used) 
-	 *      16  xmm1      Caller's value of rbp (only low 64 bits are used) 
-	 */ 
-	 
-	#define ROUND0(a, b, c, d, k, s, t)  \ 
-		movl  %c, %esi;         \ 
-		addl  (k*4)(%rbp), %a;  \ 
-		xorl  %d, %esi;         \ 
-		andl  %b, %esi;         \ 
-		xorl  %d, %esi;         \ 
-		leal  t(%esi,%a), %a;   \ 
-		roll  $s, %a;           \ 
-		addl  %b, %a; 
-	 
-	#define ROUND1(a, b, c, d, k, s, t)  \ 
-		movl  %d, %esi;         \ 
-		movl  %d, %edi;         \ 
-		addl  (k*4)(%rbp), %a;  \ 
-		notl  %esi;             \ 
-		andl  %b, %edi;         \ 
-		andl  %c, %esi;         \ 
-		orl   %edi, %esi;       \ 
-		leal  t(%esi,%a), %a;   \ 
-		roll  $s, %a;           \ 
-		addl  %b, %a; 
-	 
-	#define ROUND2(a, b, c, d, k, s, t)  \ 
-		movl  %c, %esi;         \ 
-		addl  (k*4)(%rbp), %a;  \ 
-		xorl  %d, %esi;         \ 
-		xorl  %b, %esi;         \ 
-		leal  t(%esi,%a), %a;   \ 
-		roll  $s, %a;           \ 
-		addl  %b, %a; 
-	 
-	#define ROUND3(a, b, c, d, k, s, t)  \ 
-		movl  %d, %esi;         \ 
-		not   %esi;             \ 
-		addl  (k*4)(%rbp), %a;  \ 
-		orl   %b, %esi;         \ 
-		xorl  %c, %esi;         \ 
-		leal  t(%esi,%a), %a;   \ 
-		roll  $s, %a;           \ 
-		addl  %b, %a; 
-	 
-	/* Save registers */ 
-	movq  %rbx, %xmm0 
-	movq  %rbp, %xmm1 
-	 
-	/* Load arguments */ 
-	movq  %rsi, %rbp 
-	movl   0(%rdi), %eax  /* a */ 
-	movl   4(%rdi), %ebx  /* b */ 
-	movl   8(%rdi), %ecx  /* c */ 
-	movl  12(%rdi), %edx  /* d */ 
-	movq  %rdi, %r8 
-	 
-	/* 64 rounds of hashing */ 
-	ROUND0(eax, ebx, ecx, edx,  0,  7, -0x28955B88) 
-	ROUND0(edx, eax, ebx, ecx,  1, 12, -0x173848AA) 
-	ROUND0(ecx, edx, eax, ebx,  2, 17,  0x242070DB) 
-	ROUND0(ebx, ecx, edx, eax,  3, 22, -0x3E423112) 
-	ROUND0(eax, ebx, ecx, edx,  4,  7, -0x0A83F051) 
-	ROUND0(edx, eax, ebx, ecx,  5, 12,  0x4787C62A) 
-	ROUND0(ecx, edx, eax, ebx,  6, 17, -0x57CFB9ED) 
-	ROUND0(ebx, ecx, edx, eax,  7, 22, -0x02B96AFF) 
-	ROUND0(eax, ebx, ecx, edx,  8,  7,  0x698098D8) 
-	ROUND0(edx, eax, ebx, ecx,  9, 12, -0x74BB0851) 
-	ROUND0(ecx, edx, eax, ebx, 10, 17, -0x0000A44F) 
-	ROUND0(ebx, ecx, edx, eax, 11, 22, -0x76A32842) 
-	ROUND0(eax, ebx, ecx, edx, 12,  7,  0x6B901122) 
-	ROUND0(edx, eax, ebx, ecx, 13, 12, -0x02678E6D) 
-	ROUND0(ecx, edx, eax, ebx, 14, 17, -0x5986BC72) 
-	ROUND0(ebx, ecx, edx, eax, 15, 22,  0x49B40821) 
-	ROUND1(eax, ebx, ecx, edx,  1,  5, -0x09E1DA9E) 
-	ROUND1(edx, eax, ebx, ecx,  6,  9, -0x3FBF4CC0) 
-	ROUND1(ecx, edx, eax, ebx, 11, 14,  0x265E5A51) 
-	ROUND1(ebx, ecx, edx, eax,  0, 20, -0x16493856) 
-	ROUND1(eax, ebx, ecx, edx,  5,  5, -0x29D0EFA3) 
-	ROUND1(edx, eax, ebx, ecx, 10,  9,  0x02441453) 
-	ROUND1(ecx, edx, eax, ebx, 15, 14, -0x275E197F) 
-	ROUND1(ebx, ecx, edx, eax,  4, 20, -0x182C0438) 
-	ROUND1(eax, ebx, ecx, edx,  9,  5,  0x21E1CDE6) 
-	ROUND1(edx, eax, ebx, ecx, 14,  9, -0x3CC8F82A) 
-	ROUND1(ecx, edx, eax, ebx,  3, 14, -0x0B2AF279) 
-	ROUND1(ebx, ecx, edx, eax,  8, 20,  0x455A14ED) 
-	ROUND1(eax, ebx, ecx, edx, 13,  5, -0x561C16FB) 
-	ROUND1(edx, eax, ebx, ecx,  2,  9, -0x03105C08) 
-	ROUND1(ecx, edx, eax, ebx,  7, 14,  0x676F02D9) 
-	ROUND1(ebx, ecx, edx, eax, 12, 20, -0x72D5B376) 
-	ROUND2(eax, ebx, ecx, edx,  5,  4, -0x0005C6BE) 
-	ROUND2(edx, eax, ebx, ecx,  8, 11, -0x788E097F) 
-	ROUND2(ecx, edx, eax, ebx, 11, 16,  0x6D9D6122) 
-	ROUND2(ebx, ecx, edx, eax, 14, 23, -0x021AC7F4) 
-	ROUND2(eax, ebx, ecx, edx,  1,  4, -0x5B4115BC) 
-	ROUND2(edx, eax, ebx, ecx,  4, 11,  0x4BDECFA9) 
-	ROUND2(ecx, edx, eax, ebx,  7, 16, -0x0944B4A0) 
-	ROUND2(ebx, ecx, edx, eax, 10, 23, -0x41404390) 
-	ROUND2(eax, ebx, ecx, edx, 13,  4,  0x289B7EC6) 
-	ROUND2(edx, eax, ebx, ecx,  0, 11, -0x155ED806) 
-	ROUND2(ecx, edx, eax, ebx,  3, 16, -0x2B10CF7B) 
-	ROUND2(ebx, ecx, edx, eax,  6, 23,  0x04881D05) 
-	ROUND2(eax, ebx, ecx, edx,  9,  4, -0x262B2FC7) 
-	ROUND2(edx, eax, ebx, ecx, 12, 11, -0x1924661B) 
-	ROUND2(ecx, edx, eax, ebx, 15, 16,  0x1FA27CF8) 
-	ROUND2(ebx, ecx, edx, eax,  2, 23, -0x3B53A99B) 
-	ROUND3(eax, ebx, ecx, edx,  0,  6, -0x0BD6DDBC) 
-	ROUND3(edx, eax, ebx, ecx,  7, 10,  0x432AFF97) 
-	ROUND3(ecx, edx, eax, ebx, 14, 15, -0x546BDC59) 
-	ROUND3(ebx, ecx, edx, eax,  5, 21, -0x036C5FC7) 
-	ROUND3(eax, ebx, ecx, edx, 12,  6,  0x655B59C3) 
-	ROUND3(edx, eax, ebx, ecx,  3, 10, -0x70F3336E) 
-	ROUND3(ecx, edx, eax, ebx, 10, 15, -0x00100B83) 
-	ROUND3(ebx, ecx, edx, eax,  1, 21, -0x7A7BA22F) 
-	ROUND3(eax, ebx, ecx, edx,  8,  6,  0x6FA87E4F) 
-	ROUND3(edx, eax, ebx, ecx, 15, 10, -0x01D31920) 
-	ROUND3(ecx, edx, eax, ebx,  6, 15, -0x5CFEBCEC) 
-	ROUND3(ebx, ecx, edx, eax, 13, 21,  0x4E0811A1) 
-	ROUND3(eax, ebx, ecx, edx,  4,  6, -0x08AC817E) 
-	ROUND3(edx, eax, ebx, ecx, 11, 10, -0x42C50DCB) 
-	ROUND3(ecx, edx, eax, ebx,  2, 15,  0x2AD7D2BB) 
-	ROUND3(ebx, ecx, edx, eax,  9, 21, -0x14792C6F) 
-	 
-	/* Save updated state */ 
-	addl  %eax,  0(%r8) 
-	addl  %ebx,  4(%r8) 
-	addl  %ecx,  8(%r8) 
-	addl  %edx, 12(%r8) 
-	 
-	/* Restore registers */ 
-	movq  %xmm0, %rbx 
-	movq  %xmm1, %rbp 
-	retq 
+/* 
+ * MD5 hash in x86-64 assembly
+ * 
+ * Copyright (c) 2016 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ *   all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ *   implied, including but not limited to the warranties of merchantability,
+ *   fitness for a particular purpose and noninfringement. In no event shall the
+ *   authors or copyright holders be liable for any claim, damages or other
+ *   liability, whether in an action of contract, tort or otherwise, arising from,
+ *   out of or in connection with the Software or the use or other dealings in the
+ *   Software.
+ */
+
+
+/* void md5_compress(uint32_t state[4], const uint8_t block[64]) */
+.globl md5_compress
+md5_compress:
+	/* 
+	 * Storage usage:
+	 *   Bytes  Location  Description
+	 *       4  eax       MD5 state variable A
+	 *       4  ebx       MD5 state variable B
+	 *       4  ecx       MD5 state variable C
+	 *       4  edx       MD5 state variable D
+	 *       4  esi       Temporary for calculation per round
+	 *       4  edi       Temporary for calculation per round
+	 *       8  rbp       Base address of block array argument (read-only)
+	 *       8  r8        Base address of state array argument (read-only)
+	 *      16  xmm0      Caller's value of rbx (only low 64 bits are used)
+	 *      16  xmm1      Caller's value of rbp (only low 64 bits are used)
+	 */
+	
+	#define ROUND0(a, b, c, d, k, s, t)  \
+		movl  %c, %esi;         \
+		addl  (k*4)(%rbp), %a;  \
+		xorl  %d, %esi;         \
+		andl  %b, %esi;         \
+		xorl  %d, %esi;         \
+		leal  t(%esi,%a), %a;   \
+		roll  $s, %a;           \
+		addl  %b, %a;
+	
+	#define ROUND1(a, b, c, d, k, s, t)  \
+		movl  %d, %esi;         \
+		movl  %d, %edi;         \
+		addl  (k*4)(%rbp), %a;  \
+		notl  %esi;             \
+		andl  %b, %edi;         \
+		andl  %c, %esi;         \
+		orl   %edi, %esi;       \
+		leal  t(%esi,%a), %a;   \
+		roll  $s, %a;           \
+		addl  %b, %a;
+	
+	#define ROUND2(a, b, c, d, k, s, t)  \
+		movl  %c, %esi;         \
+		addl  (k*4)(%rbp), %a;  \
+		xorl  %d, %esi;         \
+		xorl  %b, %esi;         \
+		leal  t(%esi,%a), %a;   \
+		roll  $s, %a;           \
+		addl  %b, %a;
+	
+	#define ROUND3(a, b, c, d, k, s, t)  \
+		movl  %d, %esi;         \
+		not   %esi;             \
+		addl  (k*4)(%rbp), %a;  \
+		orl   %b, %esi;         \
+		xorl  %c, %esi;         \
+		leal  t(%esi,%a), %a;   \
+		roll  $s, %a;           \
+		addl  %b, %a;
+	
+	/* Save registers */
+	movq  %rbx, %xmm0
+	movq  %rbp, %xmm1
+	
+	/* Load arguments */
+	movq  %rsi, %rbp
+	movl   0(%rdi), %eax  /* a */
+	movl   4(%rdi), %ebx  /* b */
+	movl   8(%rdi), %ecx  /* c */
+	movl  12(%rdi), %edx  /* d */
+	movq  %rdi, %r8
+	
+	/* 64 rounds of hashing */
+	ROUND0(eax, ebx, ecx, edx,  0,  7, -0x28955B88)
+	ROUND0(edx, eax, ebx, ecx,  1, 12, -0x173848AA)
+	ROUND0(ecx, edx, eax, ebx,  2, 17,  0x242070DB)
+	ROUND0(ebx, ecx, edx, eax,  3, 22, -0x3E423112)
+	ROUND0(eax, ebx, ecx, edx,  4,  7, -0x0A83F051)
+	ROUND0(edx, eax, ebx, ecx,  5, 12,  0x4787C62A)
+	ROUND0(ecx, edx, eax, ebx,  6, 17, -0x57CFB9ED)
+	ROUND0(ebx, ecx, edx, eax,  7, 22, -0x02B96AFF)
+	ROUND0(eax, ebx, ecx, edx,  8,  7,  0x698098D8)
+	ROUND0(edx, eax, ebx, ecx,  9, 12, -0x74BB0851)
+	ROUND0(ecx, edx, eax, ebx, 10, 17, -0x0000A44F)
+	ROUND0(ebx, ecx, edx, eax, 11, 22, -0x76A32842)
+	ROUND0(eax, ebx, ecx, edx, 12,  7,  0x6B901122)
+	ROUND0(edx, eax, ebx, ecx, 13, 12, -0x02678E6D)
+	ROUND0(ecx, edx, eax, ebx, 14, 17, -0x5986BC72)
+	ROUND0(ebx, ecx, edx, eax, 15, 22,  0x49B40821)
+	ROUND1(eax, ebx, ecx, edx,  1,  5, -0x09E1DA9E)
+	ROUND1(edx, eax, ebx, ecx,  6,  9, -0x3FBF4CC0)
+	ROUND1(ecx, edx, eax, ebx, 11, 14,  0x265E5A51)
+	ROUND1(ebx, ecx, edx, eax,  0, 20, -0x16493856)
+	ROUND1(eax, ebx, ecx, edx,  5,  5, -0x29D0EFA3)
+	ROUND1(edx, eax, ebx, ecx, 10,  9,  0x02441453)
+	ROUND1(ecx, edx, eax, ebx, 15, 14, -0x275E197F)
+	ROUND1(ebx, ecx, edx, eax,  4, 20, -0x182C0438)
+	ROUND1(eax, ebx, ecx, edx,  9,  5,  0x21E1CDE6)
+	ROUND1(edx, eax, ebx, ecx, 14,  9, -0x3CC8F82A)
+	ROUND1(ecx, edx, eax, ebx,  3, 14, -0x0B2AF279)
+	ROUND1(ebx, ecx, edx, eax,  8, 20,  0x455A14ED)
+	ROUND1(eax, ebx, ecx, edx, 13,  5, -0x561C16FB)
+	ROUND1(edx, eax, ebx, ecx,  2,  9, -0x03105C08)
+	ROUND1(ecx, edx, eax, ebx,  7, 14,  0x676F02D9)
+	ROUND1(ebx, ecx, edx, eax, 12, 20, -0x72D5B376)
+	ROUND2(eax, ebx, ecx, edx,  5,  4, -0x0005C6BE)
+	ROUND2(edx, eax, ebx, ecx,  8, 11, -0x788E097F)
+	ROUND2(ecx, edx, eax, ebx, 11, 16,  0x6D9D6122)
+	ROUND2(ebx, ecx, edx, eax, 14, 23, -0x021AC7F4)
+	ROUND2(eax, ebx, ecx, edx,  1,  4, -0x5B4115BC)
+	ROUND2(edx, eax, ebx, ecx,  4, 11,  0x4BDECFA9)
+	ROUND2(ecx, edx, eax, ebx,  7, 16, -0x0944B4A0)
+	ROUND2(ebx, ecx, edx, eax, 10, 23, -0x41404390)
+	ROUND2(eax, ebx, ecx, edx, 13,  4,  0x289B7EC6)
+	ROUND2(edx, eax, ebx, ecx,  0, 11, -0x155ED806)
+	ROUND2(ecx, edx, eax, ebx,  3, 16, -0x2B10CF7B)
+	ROUND2(ebx, ecx, edx, eax,  6, 23,  0x04881D05)
+	ROUND2(eax, ebx, ecx, edx,  9,  4, -0x262B2FC7)
+	ROUND2(edx, eax, ebx, ecx, 12, 11, -0x1924661B)
+	ROUND2(ecx, edx, eax, ebx, 15, 16,  0x1FA27CF8)
+	ROUND2(ebx, ecx, edx, eax,  2, 23, -0x3B53A99B)
+	ROUND3(eax, ebx, ecx, edx,  0,  6, -0x0BD6DDBC)
+	ROUND3(edx, eax, ebx, ecx,  7, 10,  0x432AFF97)
+	ROUND3(ecx, edx, eax, ebx, 14, 15, -0x546BDC59)
+	ROUND3(ebx, ecx, edx, eax,  5, 21, -0x036C5FC7)
+	ROUND3(eax, ebx, ecx, edx, 12,  6,  0x655B59C3)
+	ROUND3(edx, eax, ebx, ecx,  3, 10, -0x70F3336E)
+	ROUND3(ecx, edx, eax, ebx, 10, 15, -0x00100B83)
+	ROUND3(ebx, ecx, edx, eax,  1, 21, -0x7A7BA22F)
+	ROUND3(eax, ebx, ecx, edx,  8,  6,  0x6FA87E4F)
+	ROUND3(edx, eax, ebx, ecx, 15, 10, -0x01D31920)
+	ROUND3(ecx, edx, eax, ebx,  6, 15, -0x5CFEBCEC)
+	ROUND3(ebx, ecx, edx, eax, 13, 21,  0x4E0811A1)
+	ROUND3(eax, ebx, ecx, edx,  4,  6, -0x08AC817E)
+	ROUND3(edx, eax, ebx, ecx, 11, 10, -0x42C50DCB)
+	ROUND3(ecx, edx, eax, ebx,  2, 15,  0x2AD7D2BB)
+	ROUND3(ebx, ecx, edx, eax,  9, 21, -0x14792C6F)
+	
+	/* Save updated state */
+	addl  %eax,  0(%r8)
+	addl  %ebx,  4(%r8)
+	addl  %ecx,  8(%r8)
+	addl  %edx, 12(%r8)
+	
+	/* Restore registers */
+	movq  %xmm0, %rbx
+	movq  %xmm1, %rbp
+	retq
diff --git a/contrib/libs/nayuki_md5/md5.c b/contrib/libs/nayuki_md5/md5.c
index 08973459ff..6fce57700f 100644
--- a/contrib/libs/nayuki_md5/md5.c
+++ b/contrib/libs/nayuki_md5/md5.c
@@ -1,134 +1,134 @@
-/* 
- * MD5 hash in C 
- * 
- * Copyright (c) 2016 Project Nayuki. (MIT License) 
- * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly 
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
- * the Software, and to permit persons to whom the Software is furnished to do so, 
- * subject to the following conditions: 
- * - The above copyright notice and this permission notice shall be included in 
- *   all copies or substantial portions of the Software. 
- * - The Software is provided "as is", without warranty of any kind, express or 
- *   implied, including but not limited to the warranties of merchantability, 
- *   fitness for a particular purpose and noninfringement. In no event shall the 
- *   authors or copyright holders be liable for any claim, damages or other 
- *   liability, whether in an action of contract, tort or otherwise, arising from, 
- *   out of or in connection with the Software or the use or other dealings in the 
- *   Software. 
- */ 
- 
-#include "md5.h" 
- 
-void md5_compress(uint32_t state[4], const uint8_t block[64]) { 
-	#define LOADSCHEDULE(i)  \ 
-		schedule[i] = (uint32_t)block[i * 4 + 0] <<  0  \ 
-		            | (uint32_t)block[i * 4 + 1] <<  8  \ 
-		            | (uint32_t)block[i * 4 + 2] << 16  \ 
-		            | (uint32_t)block[i * 4 + 3] << 24; 
- 
-	uint32_t schedule[16]; 
-	LOADSCHEDULE( 0) 
-	LOADSCHEDULE( 1) 
-	LOADSCHEDULE( 2) 
-	LOADSCHEDULE( 3) 
-	LOADSCHEDULE( 4) 
-	LOADSCHEDULE( 5) 
-	LOADSCHEDULE( 6) 
-	LOADSCHEDULE( 7) 
-	LOADSCHEDULE( 8) 
-	LOADSCHEDULE( 9) 
-	LOADSCHEDULE(10) 
-	LOADSCHEDULE(11) 
-	LOADSCHEDULE(12) 
-	LOADSCHEDULE(13) 
-	LOADSCHEDULE(14) 
-	LOADSCHEDULE(15) 
- 
-	#define ROTL32(x, n)  (((0U + (x)) << (n)) | ((x) >> (32 - (n))))  // Assumes that x is uint32_t and 0 < n < 32 
-	#define ROUND0(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, d ^ (b & (c ^ d)), k, s, t) 
-	#define ROUND1(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, c ^ (d & (b ^ c)), k, s, t) 
-	#define ROUND2(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, b ^ c ^ d        , k, s, t) 
-	#define ROUND3(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, c ^ (b | ~d)     , k, s, t) 
-	#define ROUND_TAIL(a, b, expr, k, s, t)    \ 
-		a = 0U + a + (expr) + UINT32_C(t) + schedule[k];  \ 
-		a = 0U + b + ROTL32(a, s); 
- 
-	uint32_t a = state[0]; 
-	uint32_t b = state[1]; 
-	uint32_t c = state[2]; 
-	uint32_t d = state[3]; 
- 
-	ROUND0(a, b, c, d,  0,  7, 0xD76AA478) 
-	ROUND0(d, a, b, c,  1, 12, 0xE8C7B756) 
-	ROUND0(c, d, a, b,  2, 17, 0x242070DB) 
-	ROUND0(b, c, d, a,  3, 22, 0xC1BDCEEE) 
-	ROUND0(a, b, c, d,  4,  7, 0xF57C0FAF) 
-	ROUND0(d, a, b, c,  5, 12, 0x4787C62A) 
-	ROUND0(c, d, a, b,  6, 17, 0xA8304613) 
-	ROUND0(b, c, d, a,  7, 22, 0xFD469501) 
-	ROUND0(a, b, c, d,  8,  7, 0x698098D8) 
-	ROUND0(d, a, b, c,  9, 12, 0x8B44F7AF) 
-	ROUND0(c, d, a, b, 10, 17, 0xFFFF5BB1) 
-	ROUND0(b, c, d, a, 11, 22, 0x895CD7BE) 
-	ROUND0(a, b, c, d, 12,  7, 0x6B901122) 
-	ROUND0(d, a, b, c, 13, 12, 0xFD987193) 
-	ROUND0(c, d, a, b, 14, 17, 0xA679438E) 
-	ROUND0(b, c, d, a, 15, 22, 0x49B40821) 
-	ROUND1(a, b, c, d,  1,  5, 0xF61E2562) 
-	ROUND1(d, a, b, c,  6,  9, 0xC040B340) 
-	ROUND1(c, d, a, b, 11, 14, 0x265E5A51) 
-	ROUND1(b, c, d, a,  0, 20, 0xE9B6C7AA) 
-	ROUND1(a, b, c, d,  5,  5, 0xD62F105D) 
-	ROUND1(d, a, b, c, 10,  9, 0x02441453) 
-	ROUND1(c, d, a, b, 15, 14, 0xD8A1E681) 
-	ROUND1(b, c, d, a,  4, 20, 0xE7D3FBC8) 
-	ROUND1(a, b, c, d,  9,  5, 0x21E1CDE6) 
-	ROUND1(d, a, b, c, 14,  9, 0xC33707D6) 
-	ROUND1(c, d, a, b,  3, 14, 0xF4D50D87) 
-	ROUND1(b, c, d, a,  8, 20, 0x455A14ED) 
-	ROUND1(a, b, c, d, 13,  5, 0xA9E3E905) 
-	ROUND1(d, a, b, c,  2,  9, 0xFCEFA3F8) 
-	ROUND1(c, d, a, b,  7, 14, 0x676F02D9) 
-	ROUND1(b, c, d, a, 12, 20, 0x8D2A4C8A) 
-	ROUND2(a, b, c, d,  5,  4, 0xFFFA3942) 
-	ROUND2(d, a, b, c,  8, 11, 0x8771F681) 
-	ROUND2(c, d, a, b, 11, 16, 0x6D9D6122) 
-	ROUND2(b, c, d, a, 14, 23, 0xFDE5380C) 
-	ROUND2(a, b, c, d,  1,  4, 0xA4BEEA44) 
-	ROUND2(d, a, b, c,  4, 11, 0x4BDECFA9) 
-	ROUND2(c, d, a, b,  7, 16, 0xF6BB4B60) 
-	ROUND2(b, c, d, a, 10, 23, 0xBEBFBC70) 
-	ROUND2(a, b, c, d, 13,  4, 0x289B7EC6) 
-	ROUND2(d, a, b, c,  0, 11, 0xEAA127FA) 
-	ROUND2(c, d, a, b,  3, 16, 0xD4EF3085) 
-	ROUND2(b, c, d, a,  6, 23, 0x04881D05) 
-	ROUND2(a, b, c, d,  9,  4, 0xD9D4D039) 
-	ROUND2(d, a, b, c, 12, 11, 0xE6DB99E5) 
-	ROUND2(c, d, a, b, 15, 16, 0x1FA27CF8) 
-	ROUND2(b, c, d, a,  2, 23, 0xC4AC5665) 
-	ROUND3(a, b, c, d,  0,  6, 0xF4292244) 
-	ROUND3(d, a, b, c,  7, 10, 0x432AFF97) 
-	ROUND3(c, d, a, b, 14, 15, 0xAB9423A7) 
-	ROUND3(b, c, d, a,  5, 21, 0xFC93A039) 
-	ROUND3(a, b, c, d, 12,  6, 0x655B59C3) 
-	ROUND3(d, a, b, c,  3, 10, 0x8F0CCC92) 
-	ROUND3(c, d, a, b, 10, 15, 0xFFEFF47D) 
-	ROUND3(b, c, d, a,  1, 21, 0x85845DD1) 
-	ROUND3(a, b, c, d,  8,  6, 0x6FA87E4F) 
-	ROUND3(d, a, b, c, 15, 10, 0xFE2CE6E0) 
-	ROUND3(c, d, a, b,  6, 15, 0xA3014314) 
-	ROUND3(b, c, d, a, 13, 21, 0x4E0811A1) 
-	ROUND3(a, b, c, d,  4,  6, 0xF7537E82) 
-	ROUND3(d, a, b, c, 11, 10, 0xBD3AF235) 
-	ROUND3(c, d, a, b,  2, 15, 0x2AD7D2BB) 
-	ROUND3(b, c, d, a,  9, 21, 0xEB86D391) 
- 
-	state[0] = 0U + state[0] + a; 
-	state[1] = 0U + state[1] + b; 
-	state[2] = 0U + state[2] + c; 
-	state[3] = 0U + state[3] + d; 
-} 
+/*
+ * MD5 hash in C
+ *
+ * Copyright (c) 2016 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ *   all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ *   implied, including but not limited to the warranties of merchantability,
+ *   fitness for a particular purpose and noninfringement. In no event shall the
+ *   authors or copyright holders be liable for any claim, damages or other
+ *   liability, whether in an action of contract, tort or otherwise, arising from,
+ *   out of or in connection with the Software or the use or other dealings in the
+ *   Software.
+ */
+
+#include "md5.h"
+
+void md5_compress(uint32_t state[4], const uint8_t block[64]) {
+	#define LOADSCHEDULE(i)  \
+		schedule[i] = (uint32_t)block[i * 4 + 0] <<  0  \
+		            | (uint32_t)block[i * 4 + 1] <<  8  \
+		            | (uint32_t)block[i * 4 + 2] << 16  \
+		            | (uint32_t)block[i * 4 + 3] << 24;
+
+	uint32_t schedule[16];
+	LOADSCHEDULE( 0)
+	LOADSCHEDULE( 1)
+	LOADSCHEDULE( 2)
+	LOADSCHEDULE( 3)
+	LOADSCHEDULE( 4)
+	LOADSCHEDULE( 5)
+	LOADSCHEDULE( 6)
+	LOADSCHEDULE( 7)
+	LOADSCHEDULE( 8)
+	LOADSCHEDULE( 9)
+	LOADSCHEDULE(10)
+	LOADSCHEDULE(11)
+	LOADSCHEDULE(12)
+	LOADSCHEDULE(13)
+	LOADSCHEDULE(14)
+	LOADSCHEDULE(15)
+
+	#define ROTL32(x, n)  (((0U + (x)) << (n)) | ((x) >> (32 - (n))))  // Assumes that x is uint32_t and 0 < n < 32
+	#define ROUND0(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, d ^ (b & (c ^ d)), k, s, t)
+	#define ROUND1(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, c ^ (d & (b ^ c)), k, s, t)
+	#define ROUND2(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, b ^ c ^ d        , k, s, t)
+	#define ROUND3(a, b, c, d, k, s, t)  ROUND_TAIL(a, b, c ^ (b | ~d)     , k, s, t)
+	#define ROUND_TAIL(a, b, expr, k, s, t)    \
+		a = 0U + a + (expr) + UINT32_C(t) + schedule[k];  \
+		a = 0U + b + ROTL32(a, s);
+
+	uint32_t a = state[0];
+	uint32_t b = state[1];
+	uint32_t c = state[2];
+	uint32_t d = state[3];
+
+	ROUND0(a, b, c, d,  0,  7, 0xD76AA478)
+	ROUND0(d, a, b, c,  1, 12, 0xE8C7B756)
+	ROUND0(c, d, a, b,  2, 17, 0x242070DB)
+	ROUND0(b, c, d, a,  3, 22, 0xC1BDCEEE)
+	ROUND0(a, b, c, d,  4,  7, 0xF57C0FAF)
+	ROUND0(d, a, b, c,  5, 12, 0x4787C62A)
+	ROUND0(c, d, a, b,  6, 17, 0xA8304613)
+	ROUND0(b, c, d, a,  7, 22, 0xFD469501)
+	ROUND0(a, b, c, d,  8,  7, 0x698098D8)
+	ROUND0(d, a, b, c,  9, 12, 0x8B44F7AF)
+	ROUND0(c, d, a, b, 10, 17, 0xFFFF5BB1)
+	ROUND0(b, c, d, a, 11, 22, 0x895CD7BE)
+	ROUND0(a, b, c, d, 12,  7, 0x6B901122)
+	ROUND0(d, a, b, c, 13, 12, 0xFD987193)
+	ROUND0(c, d, a, b, 14, 17, 0xA679438E)
+	ROUND0(b, c, d, a, 15, 22, 0x49B40821)
+	ROUND1(a, b, c, d,  1,  5, 0xF61E2562)
+	ROUND1(d, a, b, c,  6,  9, 0xC040B340)
+	ROUND1(c, d, a, b, 11, 14, 0x265E5A51)
+	ROUND1(b, c, d, a,  0, 20, 0xE9B6C7AA)
+	ROUND1(a, b, c, d,  5,  5, 0xD62F105D)
+	ROUND1(d, a, b, c, 10,  9, 0x02441453)
+	ROUND1(c, d, a, b, 15, 14, 0xD8A1E681)
+	ROUND1(b, c, d, a,  4, 20, 0xE7D3FBC8)
+	ROUND1(a, b, c, d,  9,  5, 0x21E1CDE6)
+	ROUND1(d, a, b, c, 14,  9, 0xC33707D6)
+	ROUND1(c, d, a, b,  3, 14, 0xF4D50D87)
+	ROUND1(b, c, d, a,  8, 20, 0x455A14ED)
+	ROUND1(a, b, c, d, 13,  5, 0xA9E3E905)
+	ROUND1(d, a, b, c,  2,  9, 0xFCEFA3F8)
+	ROUND1(c, d, a, b,  7, 14, 0x676F02D9)
+	ROUND1(b, c, d, a, 12, 20, 0x8D2A4C8A)
+	ROUND2(a, b, c, d,  5,  4, 0xFFFA3942)
+	ROUND2(d, a, b, c,  8, 11, 0x8771F681)
+	ROUND2(c, d, a, b, 11, 16, 0x6D9D6122)
+	ROUND2(b, c, d, a, 14, 23, 0xFDE5380C)
+	ROUND2(a, b, c, d,  1,  4, 0xA4BEEA44)
+	ROUND2(d, a, b, c,  4, 11, 0x4BDECFA9)
+	ROUND2(c, d, a, b,  7, 16, 0xF6BB4B60)
+	ROUND2(b, c, d, a, 10, 23, 0xBEBFBC70)
+	ROUND2(a, b, c, d, 13,  4, 0x289B7EC6)
+	ROUND2(d, a, b, c,  0, 11, 0xEAA127FA)
+	ROUND2(c, d, a, b,  3, 16, 0xD4EF3085)
+	ROUND2(b, c, d, a,  6, 23, 0x04881D05)
+	ROUND2(a, b, c, d,  9,  4, 0xD9D4D039)
+	ROUND2(d, a, b, c, 12, 11, 0xE6DB99E5)
+	ROUND2(c, d, a, b, 15, 16, 0x1FA27CF8)
+	ROUND2(b, c, d, a,  2, 23, 0xC4AC5665)
+	ROUND3(a, b, c, d,  0,  6, 0xF4292244)
+	ROUND3(d, a, b, c,  7, 10, 0x432AFF97)
+	ROUND3(c, d, a, b, 14, 15, 0xAB9423A7)
+	ROUND3(b, c, d, a,  5, 21, 0xFC93A039)
+	ROUND3(a, b, c, d, 12,  6, 0x655B59C3)
+	ROUND3(d, a, b, c,  3, 10, 0x8F0CCC92)
+	ROUND3(c, d, a, b, 10, 15, 0xFFEFF47D)
+	ROUND3(b, c, d, a,  1, 21, 0x85845DD1)
+	ROUND3(a, b, c, d,  8,  6, 0x6FA87E4F)
+	ROUND3(d, a, b, c, 15, 10, 0xFE2CE6E0)
+	ROUND3(c, d, a, b,  6, 15, 0xA3014314)
+	ROUND3(b, c, d, a, 13, 21, 0x4E0811A1)
+	ROUND3(a, b, c, d,  4,  6, 0xF7537E82)
+	ROUND3(d, a, b, c, 11, 10, 0xBD3AF235)
+	ROUND3(c, d, a, b,  2, 15, 0x2AD7D2BB)
+	ROUND3(b, c, d, a,  9, 21, 0xEB86D391)
+
+	state[0] = 0U + state[0] + a;
+	state[1] = 0U + state[1] + b;
+	state[2] = 0U + state[2] + c;
+	state[3] = 0U + state[3] + d;
+}
diff --git a/contrib/libs/nayuki_md5/md5.h b/contrib/libs/nayuki_md5/md5.h
index cef3110d4e..aa1188092e 100644
--- a/contrib/libs/nayuki_md5/md5.h
+++ b/contrib/libs/nayuki_md5/md5.h
@@ -1,9 +1,9 @@
-#pragma once 
- 
-#include <stdint.h> 
- 
-#if defined(__cplusplus) 
-extern "C" 
-#endif 
- 
-void md5_compress(uint32_t state[4], const uint8_t block[64]); 
+#pragma once
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C"
+#endif
+
+void md5_compress(uint32_t state[4], const uint8_t block[64]);
diff --git a/contrib/libs/nayuki_md5/ya.make b/contrib/libs/nayuki_md5/ya.make
index e8c03cecac..15a6141c7a 100644
--- a/contrib/libs/nayuki_md5/ya.make
+++ b/contrib/libs/nayuki_md5/ya.make
@@ -1,7 +1,7 @@
-LIBRARY() 
- 
-LICENSE(MIT) 
- 
+LIBRARY()
+
+LICENSE(MIT)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(2016)
@@ -13,8 +13,8 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
-IF (OS_LINUX AND ARCH_X86_64) 
+
+IF (OS_LINUX AND ARCH_X86_64)
     SRCS(
         md5-fast-x8664.S
     )
@@ -23,5 +23,5 @@ ELSE()
         md5.c
     )
 ENDIF()
- 
-END() 
+
+END()
diff --git a/contrib/libs/nghttp2/ya.make b/contrib/libs/nghttp2/ya.make
index 69f6a54481..325cc6ae30 100644
--- a/contrib/libs/nghttp2/ya.make
+++ b/contrib/libs/nghttp2/ya.make
@@ -15,7 +15,7 @@ LICENSE(
     FSFAP AND
     MIT
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 ADDINCL(
diff --git a/contrib/libs/openssl/crypto/ya.make b/contrib/libs/openssl/crypto/ya.make
index ca01b4776b..3acfb0cac7 100644
--- a/contrib/libs/openssl/crypto/ya.make
+++ b/contrib/libs/openssl/crypto/ya.make
@@ -10,7 +10,7 @@ LICENSE(
     Public-Domain AND
     Snprintf
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/openssl/ya.make b/contrib/libs/openssl/ya.make
index 80f8e8d3a7..060d069785 100644
--- a/contrib/libs/openssl/ya.make
+++ b/contrib/libs/openssl/ya.make
@@ -9,7 +9,7 @@ LICENSE(
     OpenSSL AND
     Public-Domain
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/pcre/pcre.h b/contrib/libs/pcre/pcre.h
index 133d828216..86e3956c21 100644
--- a/contrib/libs/pcre/pcre.h
+++ b/contrib/libs/pcre/pcre.h
@@ -1,51 +1,51 @@
-/************************************************* 
-*       Perl-Compatible Regular Expressions      * 
-*************************************************/ 
- 
-/* This is the public header file for the PCRE library, to be #included by 
-applications that call the PCRE functions. 
- 
+/*************************************************
+*       Perl-Compatible Regular Expressions      *
+*************************************************/
+
+/* This is the public header file for the PCRE library, to be #included by
+applications that call the PCRE functions.
+
            Copyright (c) 1997-2014 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
-#ifndef _PCRE_H 
-#define _PCRE_H 
- 
-/* The current PCRE version information. */ 
- 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef _PCRE_H
+#define _PCRE_H
+
+/* The current PCRE version information. */
+
 #define PCRE_MAJOR          8
 #define PCRE_MINOR          44
 #define PCRE_PRERELEASE     
 #define PCRE_DATE           2020-02-12
- 
+
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
 export setting is defined in pcre_internal.h, which includes this file. So we
@@ -65,16 +65,16 @@ don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
 #  endif
 #endif
 
-/* By default, we use the standard "extern" declarations. */ 
- 
-#ifndef PCRE_EXP_DECL 
+/* By default, we use the standard "extern" declarations. */
+
+#ifndef PCRE_EXP_DECL
 #  ifdef __cplusplus
 #    define PCRE_EXP_DECL  extern "C"
 #  else
 #    define PCRE_EXP_DECL  extern
 #  endif
-#endif 
- 
+#endif
+
 #ifdef __cplusplus
 #  ifndef PCRECPP_EXP_DECL
 #    define PCRECPP_EXP_DECL  extern
@@ -82,19 +82,19 @@ don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
 #  ifndef PCRECPP_EXP_DEFN
 #    define PCRECPP_EXP_DEFN
 #  endif
-#endif 
- 
-/* Have to include stdlib.h in order to ensure that size_t is defined; 
-it is needed here for malloc. */ 
- 
-#include <stdlib.h> 
- 
-/* Allow for C++ users */ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
+#endif
+
+/* Have to include stdlib.h in order to ensure that size_t is defined;
+it is needed here for malloc. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Public options. Some are compile-time only, some are run-time only, and some
 are both. Most of the compile-time options are saved with the compiled regex so
 that they can be inspected during studying (and therefore JIT compiling). Note
@@ -104,12 +104,12 @@ are now used, so in order to conserve them, option bits that were previously
 only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
 also be used for compile-time options that affect only compiling and are not
 relevant for studying or JIT compiling.
- 
+
 Some options for pcre_compile() change its behaviour but do not affect the
 behaviour of the execution functions. Other options are passed through to the
 execution functions and affect their behaviour, with or without affecting the
 behaviour of pcre_compile().
- 
+
 Options that can be passed to pcre_compile() are tagged Cx below, with these
 variants:
 
@@ -170,8 +170,8 @@ with J. */
 #define PCRE_NOTEMPTY_ATSTART   0x10000000  /*    E D J */
 #define PCRE_UCP                0x20000000  /* C3       */
 
-/* Exec-time and get/set-time error codes */ 
- 
+/* Exec-time and get/set-time error codes */
+
 #define PCRE_ERROR_NOMATCH          (-1)
 #define PCRE_ERROR_NULL             (-2)
 #define PCRE_ERROR_BADOPTION        (-3)
@@ -210,7 +210,7 @@ with J. */
 #define PCRE_ERROR_JIT_BADOPTION   (-31)
 #define PCRE_ERROR_BADLENGTH       (-32)
 #define PCRE_ERROR_UNSET           (-33)
- 
+
 /* Specific error codes for UTF-8 validity checks */
 
 #define PCRE_UTF8_ERR0               0
@@ -252,24 +252,24 @@ with J. */
 #define PCRE_UTF32_ERR2              2  /* Unused (was non-character) */
 #define PCRE_UTF32_ERR3              3
 
-/* Request types for pcre_fullinfo() */ 
- 
-#define PCRE_INFO_OPTIONS            0 
-#define PCRE_INFO_SIZE               1 
-#define PCRE_INFO_CAPTURECOUNT       2 
-#define PCRE_INFO_BACKREFMAX         3 
-#define PCRE_INFO_FIRSTBYTE          4 
-#define PCRE_INFO_FIRSTCHAR          4  /* For backwards compatibility */ 
-#define PCRE_INFO_FIRSTTABLE         5 
-#define PCRE_INFO_LASTLITERAL        6 
-#define PCRE_INFO_NAMEENTRYSIZE      7 
-#define PCRE_INFO_NAMECOUNT          8 
-#define PCRE_INFO_NAMETABLE          9 
-#define PCRE_INFO_STUDYSIZE         10 
-#define PCRE_INFO_DEFAULT_TABLES    11 
-#define PCRE_INFO_OKPARTIAL         12 
-#define PCRE_INFO_JCHANGED          13 
-#define PCRE_INFO_HASCRORLF         14 
+/* Request types for pcre_fullinfo() */
+
+#define PCRE_INFO_OPTIONS            0
+#define PCRE_INFO_SIZE               1
+#define PCRE_INFO_CAPTURECOUNT       2
+#define PCRE_INFO_BACKREFMAX         3
+#define PCRE_INFO_FIRSTBYTE          4
+#define PCRE_INFO_FIRSTCHAR          4  /* For backwards compatibility */
+#define PCRE_INFO_FIRSTTABLE         5
+#define PCRE_INFO_LASTLITERAL        6
+#define PCRE_INFO_NAMEENTRYSIZE      7
+#define PCRE_INFO_NAMECOUNT          8
+#define PCRE_INFO_NAMETABLE          9
+#define PCRE_INFO_STUDYSIZE         10
+#define PCRE_INFO_DEFAULT_TABLES    11
+#define PCRE_INFO_OKPARTIAL         12
+#define PCRE_INFO_JCHANGED          13
+#define PCRE_INFO_HASCRORLF         14
 #define PCRE_INFO_MINLENGTH         15
 #define PCRE_INFO_JIT               16
 #define PCRE_INFO_JITSIZE           17
@@ -281,25 +281,25 @@ with J. */
 #define PCRE_INFO_MATCHLIMIT        23
 #define PCRE_INFO_RECURSIONLIMIT    24
 #define PCRE_INFO_MATCH_EMPTY       25
- 
-/* Request types for pcre_config(). Do not re-arrange, in order to remain 
-compatible. */ 
- 
-#define PCRE_CONFIG_UTF8                    0 
-#define PCRE_CONFIG_NEWLINE                 1 
-#define PCRE_CONFIG_LINK_SIZE               2 
-#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD  3 
-#define PCRE_CONFIG_MATCH_LIMIT             4 
-#define PCRE_CONFIG_STACKRECURSE            5 
-#define PCRE_CONFIG_UNICODE_PROPERTIES      6 
-#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7 
-#define PCRE_CONFIG_BSR                     8 
+
+/* Request types for pcre_config(). Do not re-arrange, in order to remain
+compatible. */
+
+#define PCRE_CONFIG_UTF8                    0
+#define PCRE_CONFIG_NEWLINE                 1
+#define PCRE_CONFIG_LINK_SIZE               2
+#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD  3
+#define PCRE_CONFIG_MATCH_LIMIT             4
+#define PCRE_CONFIG_STACKRECURSE            5
+#define PCRE_CONFIG_UNICODE_PROPERTIES      6
+#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
+#define PCRE_CONFIG_BSR                     8
 #define PCRE_CONFIG_JIT                     9
 #define PCRE_CONFIG_UTF16                  10
 #define PCRE_CONFIG_JITTARGET              11
 #define PCRE_CONFIG_UTF32                  12
 #define PCRE_CONFIG_PARENS_LIMIT           13
- 
+
 /* Request types for pcre_study(). Do not re-arrange, in order to remain
 compatible. */
 
@@ -309,21 +309,21 @@ compatible. */
 #define PCRE_STUDY_EXTRA_NEEDED               0x0008
 
 /* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
-these bits, just add new ones on the end, in order to remain compatible. */ 
- 
-#define PCRE_EXTRA_STUDY_DATA             0x0001 
-#define PCRE_EXTRA_MATCH_LIMIT            0x0002 
-#define PCRE_EXTRA_CALLOUT_DATA           0x0004 
-#define PCRE_EXTRA_TABLES                 0x0008 
-#define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010 
+these bits, just add new ones on the end, in order to remain compatible. */
+
+#define PCRE_EXTRA_STUDY_DATA             0x0001
+#define PCRE_EXTRA_MATCH_LIMIT            0x0002
+#define PCRE_EXTRA_CALLOUT_DATA           0x0004
+#define PCRE_EXTRA_TABLES                 0x0008
+#define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010
 #define PCRE_EXTRA_MARK                   0x0020
 #define PCRE_EXTRA_EXECUTABLE_JIT         0x0040
- 
-/* Types */ 
- 
+
+/* Types */
+
 struct real_pcre8_or_16;          /* declaration; the definition is private  */
 typedef struct real_pcre8_or_16 pcre;
- 
+
 struct real_pcre8_or_16;          /* declaration; the definition is private  */
 typedef struct real_pcre8_or_16 pcre16;
 
@@ -361,29 +361,29 @@ pcre32 functions are not implemented. There is a check for this in pcre_internal
 #define PCRE_SPTR32 const PCRE_UCHAR32 *
 #endif
 
-/* When PCRE is compiled as a C++ library, the subject pointer type can be 
-replaced with a custom type. For conventional use, the public interface is a 
-const char *. */ 
- 
-#ifndef PCRE_SPTR 
-#define PCRE_SPTR const char * 
-#endif 
- 
-/* The structure for passing additional data to pcre_exec(). This is defined in 
-such as way as to be extensible. Always add new fields at the end, in order to 
-remain compatible. */ 
- 
-typedef struct pcre_extra { 
-  unsigned long int flags;        /* Bits for which fields are set */ 
-  void *study_data;               /* Opaque data from pcre_study() */ 
-  unsigned long int match_limit;  /* Maximum number of calls to match() */ 
-  void *callout_data;             /* Data passed back in callouts */ 
-  const unsigned char *tables;    /* Pointer to character tables */ 
-  unsigned long int match_limit_recursion; /* Max recursive calls to match() */ 
+/* When PCRE is compiled as a C++ library, the subject pointer type can be
+replaced with a custom type. For conventional use, the public interface is a
+const char *. */
+
+#ifndef PCRE_SPTR
+#define PCRE_SPTR const char *
+#endif
+
+/* The structure for passing additional data to pcre_exec(). This is defined in
+such as way as to be extensible. Always add new fields at the end, in order to
+remain compatible. */
+
+typedef struct pcre_extra {
+  unsigned long int flags;        /* Bits for which fields are set */
+  void *study_data;               /* Opaque data from pcre_study() */
+  unsigned long int match_limit;  /* Maximum number of calls to match() */
+  void *callout_data;             /* Data passed back in callouts */
+  const unsigned char *tables;    /* Pointer to character tables */
+  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
   unsigned char **mark;           /* For passing back a mark pointer */
   void *executable_jit;           /* Contains a pointer to a compiled jit code */
-} pcre_extra; 
- 
+} pcre_extra;
+
 /* Same structure as above, but with 16 bit char pointers. */
 
 typedef struct pcre16_extra {
@@ -410,31 +410,31 @@ typedef struct pcre32_extra {
   void *executable_jit;           /* Contains a pointer to a compiled jit code */
 } pcre32_extra;
 
-/* The structure for passing out data via the pcre_callout_function. We use a 
-structure so that new fields can be added on the end in future versions, 
-without changing the API of the function, thereby allowing old clients to work 
-without modification. */ 
- 
-typedef struct pcre_callout_block { 
-  int          version;           /* Identifies version of block */ 
-  /* ------------------------ Version 0 ------------------------------- */ 
-  int          callout_number;    /* Number compiled into pattern */ 
-  int         *offset_vector;     /* The offset vector */ 
-  PCRE_SPTR    subject;           /* The subject being matched */ 
-  int          subject_length;    /* The length of the subject */ 
-  int          start_match;       /* Offset to start of this match attempt */ 
-  int          current_position;  /* Where we currently are in the subject */ 
-  int          capture_top;       /* Max current capture */ 
-  int          capture_last;      /* Most recently closed capture */ 
-  void        *callout_data;      /* Data passed in with the call */ 
-  /* ------------------- Added for Version 1 -------------------------- */ 
-  int          pattern_position;  /* Offset to next item in the pattern */ 
-  int          next_item_length;  /* Length of next item in the pattern */ 
+/* The structure for passing out data via the pcre_callout_function. We use a
+structure so that new fields can be added on the end in future versions,
+without changing the API of the function, thereby allowing old clients to work
+without modification. */
+
+typedef struct pcre_callout_block {
+  int          version;           /* Identifies version of block */
+  /* ------------------------ Version 0 ------------------------------- */
+  int          callout_number;    /* Number compiled into pattern */
+  int         *offset_vector;     /* The offset vector */
+  PCRE_SPTR    subject;           /* The subject being matched */
+  int          subject_length;    /* The length of the subject */
+  int          start_match;       /* Offset to start of this match attempt */
+  int          current_position;  /* Where we currently are in the subject */
+  int          capture_top;       /* Max current capture */
+  int          capture_last;      /* Most recently closed capture */
+  void        *callout_data;      /* Data passed in with the call */
+  /* ------------------- Added for Version 1 -------------------------- */
+  int          pattern_position;  /* Offset to next item in the pattern */
+  int          next_item_length;  /* Length of next item in the pattern */
   /* ------------------- Added for Version 2 -------------------------- */
   const unsigned char *mark;      /* Pointer to current mark or NULL    */
-  /* ------------------------------------------------------------------ */ 
-} pcre_callout_block; 
- 
+  /* ------------------------------------------------------------------ */
+} pcre_callout_block;
+
 /* Same structure as above, but with 16 bit char pointers. */
 
 typedef struct pcre16_callout_block {
@@ -479,18 +479,18 @@ typedef struct pcre32_callout_block {
   /* ------------------------------------------------------------------ */
 } pcre32_callout_block;
 
-/* Indirection for store get and free functions. These can be set to 
-alternative malloc/free functions if required. Special ones are used in the 
-non-recursive case for "frames". There is also an optional callout function 
-that is triggered by the (?) regex item. For Virtual Pascal, these definitions 
-have to take another form. */ 
- 
-#ifndef VPCOMPAT 
-PCRE_EXP_DECL void *(*pcre_malloc)(size_t); 
-PCRE_EXP_DECL void  (*pcre_free)(void *); 
-PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); 
-PCRE_EXP_DECL void  (*pcre_stack_free)(void *); 
-PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *); 
+/* Indirection for store get and free functions. These can be set to
+alternative malloc/free functions if required. Special ones are used in the
+non-recursive case for "frames". There is also an optional callout function
+that is triggered by the (?) regex item. For Virtual Pascal, these definitions
+have to take another form. */
+
+#ifndef VPCOMPAT
+PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
+PCRE_EXP_DECL void  (*pcre_free)(void *);
+PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
+PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
+PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
 PCRE_EXP_DECL int   (*pcre_stack_guard)(void);
 
 PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
@@ -506,12 +506,12 @@ PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre32_stack_free)(void *);
 PCRE_EXP_DECL int   (*pcre32_callout)(pcre32_callout_block *);
 PCRE_EXP_DECL int   (*pcre32_stack_guard)(void);
-#else   /* VPCOMPAT */ 
-PCRE_EXP_DECL void *pcre_malloc(size_t); 
-PCRE_EXP_DECL void  pcre_free(void *); 
-PCRE_EXP_DECL void *pcre_stack_malloc(size_t); 
-PCRE_EXP_DECL void  pcre_stack_free(void *); 
-PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *); 
+#else   /* VPCOMPAT */
+PCRE_EXP_DECL void *pcre_malloc(size_t);
+PCRE_EXP_DECL void  pcre_free(void *);
+PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
+PCRE_EXP_DECL void  pcre_stack_free(void *);
+PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
 PCRE_EXP_DECL int   pcre_stack_guard(void);
 
 PCRE_EXP_DECL void *pcre16_malloc(size_t);
@@ -527,33 +527,33 @@ PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
 PCRE_EXP_DECL void  pcre32_stack_free(void *);
 PCRE_EXP_DECL int   pcre32_callout(pcre32_callout_block *);
 PCRE_EXP_DECL int   pcre32_stack_guard(void);
-#endif  /* VPCOMPAT */ 
- 
+#endif  /* VPCOMPAT */
+
 /* User defined callback which provides a stack just before the match starts. */
 
 typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
 typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
 typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
 
-/* Exported PCRE functions */ 
- 
-PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, 
-                  const unsigned char *); 
+/* Exported PCRE functions */
+
+PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
+                  const unsigned char *);
 PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
                   const unsigned char *);
 PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
                   const unsigned char *);
-PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, 
-                  int *, const unsigned char *); 
+PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
+                  int *, const unsigned char *);
 PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
                   int *, const unsigned char *);
 PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
                   int *, const unsigned char *);
-PCRE_EXP_DECL int  pcre_config(int, void *); 
+PCRE_EXP_DECL int  pcre_config(int, void *);
 PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre32_config(int, void *);
-PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *, 
-                  int *, int, const char *, char *, int); 
+PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
+                  int *, int, const char *, char *, int);
 PCRE_EXP_DECL int  pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
                   int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
 PCRE_EXP_DECL int  pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
@@ -564,14 +564,14 @@ PCRE_EXP_DECL int  pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
                   PCRE_UCHAR16 *, int);
 PCRE_EXP_DECL int  pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
                   PCRE_UCHAR32 *, int);
-PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *, 
-                  const char *, int, int, int, int *, int , int *, int); 
+PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
+                  const char *, int, int, int, int *, int , int *, int);
 PCRE_EXP_DECL int  pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
                   PCRE_SPTR16, int, int, int, int *, int , int *, int);
 PCRE_EXP_DECL int  pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
                   PCRE_SPTR32, int, int, int, int *, int , int *, int);
-PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, 
-                   int, int, int, int *, int); 
+PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
+                   int, int, int, int *, int);
 PCRE_EXP_DECL int  pcre16_exec(const pcre16 *, const pcre16_extra *,
                    PCRE_SPTR16, int, int, int, int *, int);
 PCRE_EXP_DECL int  pcre32_exec(const pcre32 *, const pcre32_extra *,
@@ -585,61 +585,61 @@ PCRE_EXP_DECL int  pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
 PCRE_EXP_DECL int  pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
                    PCRE_SPTR32, int, int, int, int *, int,
                    pcre32_jit_stack *);
-PCRE_EXP_DECL void pcre_free_substring(const char *); 
+PCRE_EXP_DECL void pcre_free_substring(const char *);
 PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
-PCRE_EXP_DECL void pcre_free_substring_list(const char **); 
+PCRE_EXP_DECL void pcre_free_substring_list(const char **);
 PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
 PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
-PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int, 
-                  void *); 
+PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
+                  void *);
 PCRE_EXP_DECL int  pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
                   void *);
 PCRE_EXP_DECL int  pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
                   void *);
-PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *, 
-                  int *, int, const char *, const char **); 
+PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
+                  int *, int, const char *, const char **);
 PCRE_EXP_DECL int  pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
                   int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
                   int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
-PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *); 
+PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
 PCRE_EXP_DECL int  pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
 PCRE_EXP_DECL int  pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
-PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *, 
-                  char **, char **); 
+PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
+                  char **, char **);
 PCRE_EXP_DECL int  pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
                   PCRE_UCHAR16 **, PCRE_UCHAR16 **);
 PCRE_EXP_DECL int  pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
                   PCRE_UCHAR32 **, PCRE_UCHAR32 **);
-PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int, 
-                  const char **); 
+PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
+                  const char **);
 PCRE_EXP_DECL int  pcre16_get_substring(PCRE_SPTR16, int *, int, int,
                   PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre32_get_substring(PCRE_SPTR32, int *, int, int,
                   PCRE_SPTR32 *);
-PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int, 
-                  const char ***); 
+PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
+                  const char ***);
 PCRE_EXP_DECL int  pcre16_get_substring_list(PCRE_SPTR16, int *, int,
                   PCRE_SPTR16 **);
 PCRE_EXP_DECL int  pcre32_get_substring_list(PCRE_SPTR32, int *, int,
                   PCRE_SPTR32 **);
-PCRE_EXP_DECL const unsigned char *pcre_maketables(void); 
+PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
 PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
 PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
-PCRE_EXP_DECL int  pcre_refcount(pcre *, int); 
+PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
 PCRE_EXP_DECL int  pcre16_refcount(pcre16 *, int);
 PCRE_EXP_DECL int  pcre32_refcount(pcre32 *, int);
-PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); 
+PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
 PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
 PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
 PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
 PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
 PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
-PCRE_EXP_DECL const char *pcre_version(void); 
+PCRE_EXP_DECL const char *pcre_version(void);
 PCRE_EXP_DECL const char *pcre16_version(void);
 PCRE_EXP_DECL const char *pcre32_version(void);
- 
+
 /* Utility functions for byte order swaps. */
 PCRE_EXP_DECL int  pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
                   const unsigned char *);
@@ -670,8 +670,8 @@ PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
 PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
 PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
 
-#ifdef __cplusplus 
-}  /* extern "C" */ 
-#endif 
- 
-#endif /* End of pcre.h */ 
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif /* End of pcre.h */
diff --git a/contrib/libs/pcre/pcre_chartables.c b/contrib/libs/pcre/pcre_chartables.c
index 87ecc05e91..f22172b835 100644
--- a/contrib/libs/pcre/pcre_chartables.c
+++ b/contrib/libs/pcre/pcre_chartables.c
@@ -1,198 +1,198 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* This file contains character tables that are used when no external tables 
-are passed to PCRE by the application that calls it. The tables are used only 
-for characters whose code values are less than 256. 
- 
-This is a default version of the tables that assumes ASCII encoding. A program 
-called dftables (which is distributed with PCRE) can be used to build 
-alternative versions of this file. This is necessary if you are running in an 
-EBCDIC environment, or if you want to default to a different encoding, for 
-example ISO-8859-1. When dftables is run, it creates these tables in the 
-current locale. If PCRE is configured with --enable-rebuild-chartables, this 
-happens automatically. 
- 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* This file contains character tables that are used when no external tables
+are passed to PCRE by the application that calls it. The tables are used only
+for characters whose code values are less than 256.
+
+This is a default version of the tables that assumes ASCII encoding. A program
+called dftables (which is distributed with PCRE) can be used to build
+alternative versions of this file. This is necessary if you are running in an
+EBCDIC environment, or if you want to default to a different encoding, for
+example ISO-8859-1. When dftables is run, it creates these tables in the
+current locale. If PCRE is configured with --enable-rebuild-chartables, this
+happens automatically.
+
 The following #includes are present because without them gcc 4.x may remove the
-array definition from the final binary if PCRE is built into a static library 
-and dead code stripping is activated. This leads to link errors. Pulling in the 
-header ensures that the array gets flagged as "someone outside this compilation 
-unit might reference this" and so it will always be supplied to the linker. */ 
- 
-#ifdef HAVE_CONFIG_H 
+array definition from the final binary if PCRE is built into a static library
+and dead code stripping is activated. This leads to link errors. Pulling in the
+header ensures that the array gets flagged as "someone outside this compilation
+unit might reference this" and so it will always be supplied to the linker. */
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
+#endif
+
+#include "pcre_internal.h"
+
 const pcre_uint8 PRIV(default_tables)[] = {
- 
-/* This table is a lower casing table. */ 
- 
-    0,  1,  2,  3,  4,  5,  6,  7, 
-    8,  9, 10, 11, 12, 13, 14, 15, 
-   16, 17, 18, 19, 20, 21, 22, 23, 
-   24, 25, 26, 27, 28, 29, 30, 31, 
-   32, 33, 34, 35, 36, 37, 38, 39, 
-   40, 41, 42, 43, 44, 45, 46, 47, 
-   48, 49, 50, 51, 52, 53, 54, 55, 
-   56, 57, 58, 59, 60, 61, 62, 63, 
-   64, 97, 98, 99,100,101,102,103, 
-  104,105,106,107,108,109,110,111, 
-  112,113,114,115,116,117,118,119, 
-  120,121,122, 91, 92, 93, 94, 95, 
-   96, 97, 98, 99,100,101,102,103, 
-  104,105,106,107,108,109,110,111, 
-  112,113,114,115,116,117,118,119, 
-  120,121,122,123,124,125,126,127, 
-  128,129,130,131,132,133,134,135, 
-  136,137,138,139,140,141,142,143, 
-  144,145,146,147,148,149,150,151, 
-  152,153,154,155,156,157,158,159, 
-  160,161,162,163,164,165,166,167, 
-  168,169,170,171,172,173,174,175, 
-  176,177,178,179,180,181,182,183, 
-  184,185,186,187,188,189,190,191, 
-  192,193,194,195,196,197,198,199, 
-  200,201,202,203,204,205,206,207, 
-  208,209,210,211,212,213,214,215, 
-  216,217,218,219,220,221,222,223, 
-  224,225,226,227,228,229,230,231, 
-  232,233,234,235,236,237,238,239, 
-  240,241,242,243,244,245,246,247, 
-  248,249,250,251,252,253,254,255, 
- 
-/* This table is a case flipping table. */ 
- 
-    0,  1,  2,  3,  4,  5,  6,  7, 
-    8,  9, 10, 11, 12, 13, 14, 15, 
-   16, 17, 18, 19, 20, 21, 22, 23, 
-   24, 25, 26, 27, 28, 29, 30, 31, 
-   32, 33, 34, 35, 36, 37, 38, 39, 
-   40, 41, 42, 43, 44, 45, 46, 47, 
-   48, 49, 50, 51, 52, 53, 54, 55, 
-   56, 57, 58, 59, 60, 61, 62, 63, 
-   64, 97, 98, 99,100,101,102,103, 
-  104,105,106,107,108,109,110,111, 
-  112,113,114,115,116,117,118,119, 
-  120,121,122, 91, 92, 93, 94, 95, 
-   96, 65, 66, 67, 68, 69, 70, 71, 
-   72, 73, 74, 75, 76, 77, 78, 79, 
-   80, 81, 82, 83, 84, 85, 86, 87, 
-   88, 89, 90,123,124,125,126,127, 
-  128,129,130,131,132,133,134,135, 
-  136,137,138,139,140,141,142,143, 
-  144,145,146,147,148,149,150,151, 
-  152,153,154,155,156,157,158,159, 
-  160,161,162,163,164,165,166,167, 
-  168,169,170,171,172,173,174,175, 
-  176,177,178,179,180,181,182,183, 
-  184,185,186,187,188,189,190,191, 
-  192,193,194,195,196,197,198,199, 
-  200,201,202,203,204,205,206,207, 
-  208,209,210,211,212,213,214,215, 
-  216,217,218,219,220,221,222,223, 
-  224,225,226,227,228,229,230,231, 
-  232,233,234,235,236,237,238,239, 
-  240,241,242,243,244,245,246,247, 
-  248,249,250,251,252,253,254,255, 
- 
-/* This table contains bit maps for various character classes. Each map is 32 
-bytes long and the bits run from the least significant end of each byte. The 
-classes that have their own maps are: space, xdigit, digit, upper, lower, word, 
-graph, print, punct, and cntrl. Other classes are built from combinations. */ 
- 
-  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 
-  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 
-  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 
-  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 
-  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 
-  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 
- 
-/* This table identifies various classes of character by individual bits: 
-  0x01   white space character 
-  0x02   letter 
-  0x04   decimal digit 
-  0x08   hexadecimal digit 
-  0x10   alphanumeric or '_' 
-  0x80   regular expression metacharacter or binary zero 
-*/ 
- 
-  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */ 
+
+/* This table is a lower casing table. */
+
+    0,  1,  2,  3,  4,  5,  6,  7,
+    8,  9, 10, 11, 12, 13, 14, 15,
+   16, 17, 18, 19, 20, 21, 22, 23,
+   24, 25, 26, 27, 28, 29, 30, 31,
+   32, 33, 34, 35, 36, 37, 38, 39,
+   40, 41, 42, 43, 44, 45, 46, 47,
+   48, 49, 50, 51, 52, 53, 54, 55,
+   56, 57, 58, 59, 60, 61, 62, 63,
+   64, 97, 98, 99,100,101,102,103,
+  104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,
+  120,121,122, 91, 92, 93, 94, 95,
+   96, 97, 98, 99,100,101,102,103,
+  104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,
+  120,121,122,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,
+  136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,
+  152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,
+  168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,
+  184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,
+  200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,
+  216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,
+  232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,
+  248,249,250,251,252,253,254,255,
+
+/* This table is a case flipping table. */
+
+    0,  1,  2,  3,  4,  5,  6,  7,
+    8,  9, 10, 11, 12, 13, 14, 15,
+   16, 17, 18, 19, 20, 21, 22, 23,
+   24, 25, 26, 27, 28, 29, 30, 31,
+   32, 33, 34, 35, 36, 37, 38, 39,
+   40, 41, 42, 43, 44, 45, 46, 47,
+   48, 49, 50, 51, 52, 53, 54, 55,
+   56, 57, 58, 59, 60, 61, 62, 63,
+   64, 97, 98, 99,100,101,102,103,
+  104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,
+  120,121,122, 91, 92, 93, 94, 95,
+   96, 65, 66, 67, 68, 69, 70, 71,
+   72, 73, 74, 75, 76, 77, 78, 79,
+   80, 81, 82, 83, 84, 85, 86, 87,
+   88, 89, 90,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,
+  136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,
+  152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,
+  168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,
+  184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,
+  200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,
+  216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,
+  232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,
+  248,249,250,251,252,253,254,255,
+
+/* This table contains bit maps for various character classes. Each map is 32
+bytes long and the bits run from the least significant end of each byte. The
+classes that have their own maps are: space, xdigit, digit, upper, lower, word,
+graph, print, punct, and cntrl. Other classes are built from combinations. */
+
+  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
+  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
+  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
+  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+/* This table identifies various classes of character by individual bits:
+  0x01   white space character
+  0x02   letter
+  0x04   decimal digit
+  0x08   hexadecimal digit
+  0x10   alphanumeric or '_'
+  0x80   regular expression metacharacter or binary zero
+*/
+
+  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */ 
-  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */ 
-  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */ 
-  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */ 
-  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */ 
-  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */ 
-  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */ 
-  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */ 
-  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */ 
-  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */ 
-  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */ 
-  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */ 
-  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 
- 
-/* End of pcre_chartables.c */ 
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
+  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
+  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
+  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
+  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
+  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
+  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
+  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
+  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
+  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
+  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
+  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
+  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
+
+/* End of pcre_chartables.c */
diff --git a/contrib/libs/pcre/pcre_compile.c b/contrib/libs/pcre/pcre_compile.c
index 4501a7e47b..8051988093 100644
--- a/contrib/libs/pcre/pcre_compile.c
+++ b/contrib/libs/pcre/pcre_compile.c
@@ -1,84 +1,84 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2020 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_compile(), along with 
-supporting internal functions that are not used by other modules. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_compile(), along with
+supporting internal functions that are not used by other modules. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#define NLBLOCK cd             /* Block containing newline information */ 
+#endif
+
+#define NLBLOCK cd             /* Block containing newline information */
 #define PSSTART start_pattern  /* Field containing pattern start */
 #define PSEND   end_pattern    /* Field containing pattern end */
- 
-#include "pcre_internal.h" 
- 
- 
+
+#include "pcre_internal.h"
+
+
 /* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which
 is also used by pcretest. PCRE_DEBUG is not defined when building a production
 library. We do not need to select pcre16_printint.c specially, because the
 COMPILE_PCREx macro will already be appropriately set. */
- 
+
 #ifdef PCRE_DEBUG
 /* pcre_printint.c should not include any headers */
 #define PCRE_INCLUDED
 #include "pcre_printint.c"
 #undef PCRE_INCLUDED
-#endif 
- 
- 
-/* Macro for setting individual bits in class bitmaps. */ 
- 
+#endif
+
+
+/* Macro for setting individual bits in class bitmaps. */
+
 #define SETBIT(a,b) a[(b)/8] |= (1U << ((b)&7))
- 
-/* Maximum length value to check against when making sure that the integer that 
-holds the compiled pattern length does not overflow. We make it a bit less than 
-INT_MAX to allow for adding in group terminating bytes, so that we don't have 
-to check them every time. */ 
- 
-#define OFLOW_MAX (INT_MAX - 20) 
- 
+
+/* Maximum length value to check against when making sure that the integer that
+holds the compiled pattern length does not overflow. We make it a bit less than
+INT_MAX to allow for adding in group terminating bytes, so that we don't have
+to check them every time. */
+
+#define OFLOW_MAX (INT_MAX - 20)
+
 /* Definitions to allow mutual recursion */
- 
+
 static int
   add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,
     const pcre_uint32 *, unsigned int);
@@ -90,20 +90,20 @@ static BOOL
 
 
 
-/************************************************* 
-*      Code parameters and static tables         * 
-*************************************************/ 
- 
-/* This value specifies the size of stack workspace that is used during the 
-first pre-compile phase that determines how much memory is required. The regex 
-is partly compiled into this space, but the compiled parts are discarded as 
-soon as they can be, so that hopefully there will never be an overrun. The code 
-does, however, check for an overrun. The largest amount I've seen used is 218, 
-so this number is very generous. 
- 
-The same workspace is used during the second, actual compile phase for 
-remembering forward references to groups so that they can be filled in at the 
-end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE 
+/*************************************************
+*      Code parameters and static tables         *
+*************************************************/
+
+/* This value specifies the size of stack workspace that is used during the
+first pre-compile phase that determines how much memory is required. The regex
+is partly compiled into this space, but the compiled parts are discarded as
+soon as they can be, so that hopefully there will never be an overrun. The code
+does, however, check for an overrun. The largest amount I've seen used is 218,
+so this number is very generous.
+
+The same workspace is used during the second, actual compile phase for
+remembering forward references to groups so that they can be filled in at the
+end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE
 is 4 there is plenty of room for most patterns. However, the memory can get
 filled up by repetitions of forward references, for example patterns like
 /(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so
@@ -111,15 +111,15 @@ that the workspace is expanded using malloc() in this situation. The value
 below is therefore a minimum, and we put a maximum on it for safety. The
 minimum is now also defined in terms of LINK_SIZE so that the use of malloc()
 kicks in at the same number of forward references in all cases. */
- 
+
 #define COMPILE_WORK_SIZE (2048*LINK_SIZE)
 #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
- 
+
 /* This value determines the size of the initial vector that is used for
 remembering named groups during the pre-compile. It is allocated on the stack,
 but if it is too small, it is expanded using malloc(), in a similar way to the
 workspace. The value is the number of slots in the list. */
- 
+
 #define NAMED_GROUP_LIST_SIZE  20
 
 /* The overrun tests check for a slightly smaller size so that they detect the
@@ -139,17 +139,17 @@ overrun before it actually does run off the end of the data block. */
 
 #define UTF_LENGTH     0x10000000l      /* The char contains its length. */
 
-/* Table for handling escaped characters in the range '0'-'z'. Positive returns 
-are simple data values; negative values are for special things like \d and so 
-on. Zero means further processing is needed (for things like \x), or the escape 
-is invalid. */ 
- 
+/* Table for handling escaped characters in the range '0'-'z'. Positive returns
+are simple data values; negative values are for special things like \d and so
+on. Zero means further processing is needed (for things like \x), or the escape
+is invalid. */
+
 #ifndef EBCDIC
 
 /* This is the "normal" table for ASCII systems or for EBCDIC systems running
 in UTF-8 mode. */
 
-static const short int escapes[] = { 
+static const short int escapes[] = {
      0,                       0,
      0,                       0,
      0,                       0,
@@ -188,59 +188,59 @@ static const short int escapes[] = {
      -ESC_v,                  -ESC_w,
      0,                       0,
      -ESC_z
-}; 
- 
+};
+
 #else
 
 /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
 
-static const short int escapes[] = { 
-/*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|', 
-/*  50 */   '&',     0,      0,       0,      0,     0,      0,      0, 
-/*  58 */     0,     0,    '!',     '$',    '*',   ')',    ';',    '~', 
-/*  60 */   '-',   '/',      0,       0,      0,     0,      0,      0, 
-/*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?', 
-/*  70 */     0,     0,      0,       0,      0,     0,      0,      0, 
-/*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"', 
+static const short int escapes[] = {
+/*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
+/*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
+/*  58 */     0,     0,    '!',     '$',    '*',   ')',    ';',    '~',
+/*  60 */   '-',   '/',      0,       0,      0,     0,      0,      0,
+/*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',
+/*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
+/*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
 /*  80 */     0, ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
-/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0, 
+/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
 /*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
-/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0, 
-/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0, 
-/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0, 
-/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0, 
-/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-', 
-/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G, 
-/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0, 
+/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
+/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
+/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
+/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
+/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
+/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
+/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
 /*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P,
-/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0, 
-/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X, 
-/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0, 
-/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0, 
-/*  F8 */     0,     0,      0,       0,      0,     0,      0,      0 
-}; 
+/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
+/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
+/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
+/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
+/*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
+};
 
 /* We also need a table of characters that may follow \c in an EBCDIC
 environment for characters 0-31. */
 
 static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
 
-#endif 
- 
- 
-/* Table of special "verbs" like (*PRUNE). This is a short table, so it is 
-searched linearly. Put all the names into a single string, in order to reduce 
+#endif
+
+
+/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
+searched linearly. Put all the names into a single string, in order to reduce
 the number of relocations when a shared library is dynamically linked. The
 string is built from string macros so that it works in UTF-8 mode on EBCDIC
 platforms. */
- 
-typedef struct verbitem { 
+
+typedef struct verbitem {
   int   len;                 /* Length of verb name */
   int   op;                  /* Op when no arg, or -1 if arg mandatory */
   int   op_arg;              /* Op when arg present, or -1 if not allowed */
-} verbitem; 
- 
-static const char verbnames[] = 
+} verbitem;
+
+static const char verbnames[] =
   "\0"                       /* Empty name is a shorthand for MARK */
   STRING_MARK0
   STRING_ACCEPT0
@@ -250,7 +250,7 @@ static const char verbnames[] =
   STRING_PRUNE0
   STRING_SKIP0
   STRING_THEN;
- 
+
 static const verbitem verbs[] = {
   { 0, -1,        OP_MARK },
   { 4, -1,        OP_MARK },
@@ -261,11 +261,11 @@ static const verbitem verbs[] = {
   { 5, OP_PRUNE,  OP_PRUNE_ARG },
   { 4, OP_SKIP,   OP_SKIP_ARG  },
   { 4, OP_THEN,   OP_THEN_ARG  }
-}; 
- 
+};
+
 static const int verbcount = sizeof(verbs)/sizeof(verbitem);
- 
- 
+
+
 /* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in
 another regex library. */
 
@@ -279,57 +279,57 @@ static const pcre_uchar sub_end_of_word[] = {
   CHAR_RIGHT_PARENTHESIS, '\0' };
 
 
-/* Tables of names of POSIX character classes and their lengths. The names are 
-now all in a single string, to reduce the number of relocations when a shared 
-library is dynamically loaded. The list of lengths is terminated by a zero 
-length entry. The first three must be alpha, lower, upper, as this is assumed 
+/* Tables of names of POSIX character classes and their lengths. The names are
+now all in a single string, to reduce the number of relocations when a shared
+library is dynamically loaded. The list of lengths is terminated by a zero
+length entry. The first three must be alpha, lower, upper, as this is assumed
 for handling case independence. The indices for graph, print, and punct are
 needed, so identify them. */
- 
-static const char posix_names[] = 
+
+static const char posix_names[] =
   STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
   STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
   STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
   STRING_word0  STRING_xdigit;
- 
+
 static const pcre_uint8 posix_name_lengths[] = {
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; 
- 
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
+
 #define PC_GRAPH  8
 #define PC_PRINT  9
 #define PC_PUNCT 10
 
 
-/* Table of class bit maps for each POSIX class. Each class is formed from a 
-base map, with an optional addition or removal of another map. Then, for some 
-classes, there is some additional tweaking: for [:blank:] the vertical space 
-characters are removed, and for [:alpha:] and [:alnum:] the underscore 
-character is removed. The triples in the table consist of the base map offset, 
-second map offset or -1 if no second map, and a non-negative value for map 
-addition or a negative value for map subtraction (if there are two maps). The 
-absolute value of the third field has these meanings: 0 => no tweaking, 1 => 
-remove vertical space characters, 2 => remove underscore. */ 
- 
-static const int posix_class_maps[] = { 
-  cbit_word,  cbit_digit, -2,             /* alpha */ 
-  cbit_lower, -1,          0,             /* lower */ 
-  cbit_upper, -1,          0,             /* upper */ 
-  cbit_word,  -1,          2,             /* alnum - word without underscore */ 
-  cbit_print, cbit_cntrl,  0,             /* ascii */ 
-  cbit_space, -1,          1,             /* blank - a GNU extension */ 
-  cbit_cntrl, -1,          0,             /* cntrl */ 
-  cbit_digit, -1,          0,             /* digit */ 
-  cbit_graph, -1,          0,             /* graph */ 
-  cbit_print, -1,          0,             /* print */ 
-  cbit_punct, -1,          0,             /* punct */ 
-  cbit_space, -1,          0,             /* space */ 
-  cbit_word,  -1,          0,             /* word - a Perl extension */ 
-  cbit_xdigit,-1,          0              /* xdigit */ 
-}; 
- 
+/* Table of class bit maps for each POSIX class. Each class is formed from a
+base map, with an optional addition or removal of another map. Then, for some
+classes, there is some additional tweaking: for [:blank:] the vertical space
+characters are removed, and for [:alpha:] and [:alnum:] the underscore
+character is removed. The triples in the table consist of the base map offset,
+second map offset or -1 if no second map, and a non-negative value for map
+addition or a negative value for map subtraction (if there are two maps). The
+absolute value of the third field has these meanings: 0 => no tweaking, 1 =>
+remove vertical space characters, 2 => remove underscore. */
+
+static const int posix_class_maps[] = {
+  cbit_word,  cbit_digit, -2,             /* alpha */
+  cbit_lower, -1,          0,             /* lower */
+  cbit_upper, -1,          0,             /* upper */
+  cbit_word,  -1,          2,             /* alnum - word without underscore */
+  cbit_print, cbit_cntrl,  0,             /* ascii */
+  cbit_space, -1,          1,             /* blank - a GNU extension */
+  cbit_cntrl, -1,          0,             /* cntrl */
+  cbit_digit, -1,          0,             /* digit */
+  cbit_graph, -1,          0,             /* graph */
+  cbit_print, -1,          0,             /* print */
+  cbit_punct, -1,          0,             /* punct */
+  cbit_space, -1,          0,             /* space */
+  cbit_word,  -1,          0,             /* word - a Perl extension */
+  cbit_xdigit,-1,          0              /* xdigit */
+};
+
 /* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by
 Unicode property escapes. */
- 
+
 #ifdef SUPPORT_UCP
 static const pcre_uchar string_PNd[]  = {
   CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
@@ -434,99 +434,99 @@ static const pcre_uchar *posix_substitutes[] = {
 #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
 #endif
 
-#define STRING(a)  # a 
-#define XSTRING(s) STRING(s) 
- 
-/* The texts of compile-time error messages. These are "char *" because they 
-are passed to the outside world. Do not ever re-use any error number, because 
-they are documented. Always add a new error instead. Messages marked DEAD below 
-are no longer used. This used to be a table of strings, but in order to reduce 
-the number of relocations needed when a shared library is loaded dynamically, 
-it is now one long string. We cannot use a table of offsets, because the 
-lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we 
-simply count through to the one we want - this isn't a performance issue 
+#define STRING(a)  # a
+#define XSTRING(s) STRING(s)
+
+/* The texts of compile-time error messages. These are "char *" because they
+are passed to the outside world. Do not ever re-use any error number, because
+they are documented. Always add a new error instead. Messages marked DEAD below
+are no longer used. This used to be a table of strings, but in order to reduce
+the number of relocations needed when a shared library is loaded dynamically,
+it is now one long string. We cannot use a table of offsets, because the
+lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
+simply count through to the one we want - this isn't a performance issue
 because these strings are used only when there is a compilation error.
- 
+
 Each substring ends with \0 to insert a null character. This includes the final
 substring, so that the whole string ends with \0\0, which can be detected when
 counting through. */
 
-static const char error_texts[] = 
-  "no error\0" 
-  "\\ at end of pattern\0" 
-  "\\c at end of pattern\0" 
-  "unrecognized character follows \\\0" 
-  "numbers out of order in {} quantifier\0" 
-  /* 5 */ 
-  "number too big in {} quantifier\0" 
-  "missing terminating ] for character class\0" 
-  "invalid escape sequence in character class\0" 
-  "range out of order in character class\0" 
-  "nothing to repeat\0" 
-  /* 10 */ 
+static const char error_texts[] =
+  "no error\0"
+  "\\ at end of pattern\0"
+  "\\c at end of pattern\0"
+  "unrecognized character follows \\\0"
+  "numbers out of order in {} quantifier\0"
+  /* 5 */
+  "number too big in {} quantifier\0"
+  "missing terminating ] for character class\0"
+  "invalid escape sequence in character class\0"
+  "range out of order in character class\0"
+  "nothing to repeat\0"
+  /* 10 */
   "internal error: invalid forward reference offset\0"
-  "internal error: unexpected repeat\0" 
-  "unrecognized character after (? or (?-\0" 
-  "POSIX named classes are supported only within a class\0" 
-  "missing )\0" 
-  /* 15 */ 
-  "reference to non-existent subpattern\0" 
-  "erroffset passed as NULL\0" 
-  "unknown option bit(s) set\0" 
-  "missing ) after comment\0" 
-  "parentheses nested too deeply\0"  /** DEAD **/ 
-  /* 20 */ 
-  "regular expression is too large\0" 
-  "failed to get memory\0" 
-  "unmatched parentheses\0" 
-  "internal error: code overflow\0" 
-  "unrecognized character after (?<\0" 
-  /* 25 */ 
-  "lookbehind assertion is not fixed length\0" 
-  "malformed number or name after (?(\0" 
-  "conditional group contains more than two branches\0" 
+  "internal error: unexpected repeat\0"
+  "unrecognized character after (? or (?-\0"
+  "POSIX named classes are supported only within a class\0"
+  "missing )\0"
+  /* 15 */
+  "reference to non-existent subpattern\0"
+  "erroffset passed as NULL\0"
+  "unknown option bit(s) set\0"
+  "missing ) after comment\0"
+  "parentheses nested too deeply\0"  /** DEAD **/
+  /* 20 */
+  "regular expression is too large\0"
+  "failed to get memory\0"
+  "unmatched parentheses\0"
+  "internal error: code overflow\0"
+  "unrecognized character after (?<\0"
+  /* 25 */
+  "lookbehind assertion is not fixed length\0"
+  "malformed number or name after (?(\0"
+  "conditional group contains more than two branches\0"
   "assertion expected after (?( or (?(?C)\0"
-  "(?R or (?[+-]digits must be followed by )\0" 
-  /* 30 */ 
-  "unknown POSIX class name\0" 
-  "POSIX collating elements are not supported\0" 
+  "(?R or (?[+-]digits must be followed by )\0"
+  /* 30 */
+  "unknown POSIX class name\0"
+  "POSIX collating elements are not supported\0"
   "this version of PCRE is compiled without UTF support\0"
-  "spare error\0"  /** DEAD **/ 
+  "spare error\0"  /** DEAD **/
   "character value in \\x{} or \\o{} is too large\0"
-  /* 35 */ 
-  "invalid condition (?(0)\0" 
-  "\\C not allowed in lookbehind assertion\0" 
+  /* 35 */
+  "invalid condition (?(0)\0"
+  "\\C not allowed in lookbehind assertion\0"
   "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
-  "number after (?C is > 255\0" 
-  "closing ) for (?C expected\0" 
-  /* 40 */ 
-  "recursive call could loop indefinitely\0" 
-  "unrecognized character after (?P\0" 
-  "syntax error in subpattern name (missing terminator)\0" 
-  "two named subpatterns have the same name\0" 
-  "invalid UTF-8 string\0" 
-  /* 45 */ 
-  "support for \\P, \\p, and \\X has not been compiled\0" 
-  "malformed \\P or \\p sequence\0" 
-  "unknown property name after \\P or \\p\0" 
-  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" 
-  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" 
-  /* 50 */ 
-  "repeated subpattern is too long\0"    /** DEAD **/ 
+  "number after (?C is > 255\0"
+  "closing ) for (?C expected\0"
+  /* 40 */
+  "recursive call could loop indefinitely\0"
+  "unrecognized character after (?P\0"
+  "syntax error in subpattern name (missing terminator)\0"
+  "two named subpatterns have the same name\0"
+  "invalid UTF-8 string\0"
+  /* 45 */
+  "support for \\P, \\p, and \\X has not been compiled\0"
+  "malformed \\P or \\p sequence\0"
+  "unknown property name after \\P or \\p\0"
+  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
+  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
+  /* 50 */
+  "repeated subpattern is too long\0"    /** DEAD **/
   "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
-  "internal error: overran compiling workspace\0" 
-  "internal error: previously-checked referenced subpattern not found\0" 
-  "DEFINE group contains more than one branch\0" 
-  /* 55 */ 
+  "internal error: overran compiling workspace\0"
+  "internal error: previously-checked referenced subpattern not found\0"
+  "DEFINE group contains more than one branch\0"
+  /* 55 */
   "repeating a DEFINE group is not allowed\0"  /** DEAD **/
-  "inconsistent NEWLINE options\0" 
+  "inconsistent NEWLINE options\0"
   "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
   "a numbered reference must not be zero\0"
   "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
-  /* 60 */ 
+  /* 60 */
   "(*VERB) not recognized or malformed\0"
-  "number is too big\0" 
-  "subpattern name expected\0" 
+  "number is too big\0"
+  "subpattern name expected\0"
   "digit expected after (?+\0"
   "] is an invalid data character in JavaScript compatibility mode\0"
   /* 65 */
@@ -562,23 +562,23 @@ static const char error_texts[] =
   "digits missing in \\x{} or \\o{}\0"
   "regular expression is too complicated\0"
   ;
- 
-/* Table to identify digits and hex digits. This is used when compiling 
-patterns. Note that the tables in chartables are dependent on the locale, and 
-may mark arbitrary characters as digits - but the PCRE compiling code expects 
-to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have 
-a private table here. It costs 256 bytes, but it is a lot faster than doing 
-character value tests (at least in some simple cases I timed), and in some 
-applications one wants PCRE to compile efficiently as well as match 
-efficiently. 
- 
-For convenience, we use the same bit definitions as in chartables: 
- 
-  0x04   decimal digit 
-  0x08   hexadecimal digit 
- 
-Then we can use ctype_digit and ctype_xdigit in the code. */ 
- 
+
+/* Table to identify digits and hex digits. This is used when compiling
+patterns. Note that the tables in chartables are dependent on the locale, and
+may mark arbitrary characters as digits - but the PCRE compiling code expects
+to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have
+a private table here. It costs 256 bytes, but it is a lot faster than doing
+character value tests (at least in some simple cases I timed), and in some
+applications one wants PCRE to compile efficiently as well as match
+efficiently.
+
+For convenience, we use the same bit definitions as in chartables:
+
+  0x04   decimal digit
+  0x08   hexadecimal digit
+
+Then we can use ctype_digit and ctype_xdigit in the code. */
+
 /* Using a simple comparison for decimal numbers rather than a memory read
 is much faster, and the resulting code is simpler (the compiler turns it
 into a subtraction and unsigned comparison). */
@@ -591,131 +591,131 @@ into a subtraction and unsigned comparison). */
 UTF-8 mode. */
 
 static const pcre_uint8 digitab[] =
-  { 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */ 
-  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  */ 
-  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */ 
-  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  @ - G  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H - O  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  P - W  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  X - _  */ 
-  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  ` - g  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h - o  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  p - w  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  x -127 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 
- 
+  {
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */
+  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  */
+  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
+  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  @ - G  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H - O  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  P - W  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  X - _  */
+  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  ` - g  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h - o  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  p - w  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
+
 #else
 
 /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
 
 static const pcre_uint8 digitab[] =
-  { 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 10 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  32- 39 20 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 30 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "     */ 
-  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g  80 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p  90 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x  A0 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 B0 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191    */ 
-  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  { - G  C0 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  } - P  D0 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223    */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  \ - X  E0 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239    */ 
-  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */ 
-  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */ 
- 
+  {
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 10 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  32- 39 20 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 30 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "     */
+  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g  80 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p  90 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x  A0 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 B0 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191    */
+  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  { - G  C0 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  } - P  D0 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223    */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  \ - X  E0 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239    */
+  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */
+  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */
+
 static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
-  0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */ 
-  0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */ 
-  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */ 
-  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  32- 39 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63 */ 
-  0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */ 
-  0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */ 
-  0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */ 
-  0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ 
-  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "  */ 
-  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g  */ 
-  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143 */ 
-  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p  */ 
-  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159 */ 
-  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x  */ 
-  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175 */ 
-  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 */ 
-  0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 
-  0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  { - G  */ 
-  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207 */ 
-  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  } - P  */ 
-  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223 */ 
-  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /*  \ - X  */ 
-  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239 */ 
-  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */ 
-  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255 */ 
-#endif 
- 
- 
+  0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */
+  0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
+  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
+  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  32- 39 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63 */
+  0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
+  0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
+  0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
+  0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "  */
+  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g  */
+  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143 */
+  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p  */
+  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159 */
+  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x  */
+  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175 */
+  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 */
+  0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
+  0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  { - G  */
+  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207 */
+  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  } - P  */
+  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223 */
+  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /*  \ - X  */
+  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239 */
+  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
+  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255 */
+#endif
+
+
 /* This table is used to check whether auto-possessification is possible
 between adjacent character-type opcodes. The left-hand (repeated) opcode is
 used to select the row, and the right-hand opcode is use to select the column.
 A value of 1 means that auto-possessification is OK. For example, the second
 value in the first row means that \D+\d can be turned into \D++\d.
- 
+
 The Unicode property types (\P and \p) have to be present to fill out the table
 because of what their opcode values are, but the table values should always be
 zero because property types are handled separately in the code. The last four
 columns apply to items that cannot be repeated, so there is no need to have
 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
- 
+
 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
- 
+
 static const pcre_uint8 autoposstab[APTROWS][APTCOLS] = {
 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
@@ -736,7 +736,7 @@ static const pcre_uint8 autoposstab[APTROWS][APTCOLS] = {
   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
 };
- 
+
 
 /* This table is used to check whether auto-possessification is possible
 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
@@ -878,34 +878,34 @@ static const pcre_uint8 opcode_possessify[] = {
 
 
 
-/************************************************* 
-*            Find an error text                  * 
-*************************************************/ 
- 
-/* The error texts are now all in one long string, to save on relocations. As 
-some of the text is of unknown length, we can't use a table of offsets. 
-Instead, just count through the strings. This is not a performance issue 
-because it happens only when there has been a compilation error. 
- 
-Argument:   the error number 
-Returns:    pointer to the error string 
-*/ 
- 
-static const char * 
-find_error_text(int n) 
-{ 
-const char *s = error_texts; 
+/*************************************************
+*            Find an error text                  *
+*************************************************/
+
+/* The error texts are now all in one long string, to save on relocations. As
+some of the text is of unknown length, we can't use a table of offsets.
+Instead, just count through the strings. This is not a performance issue
+because it happens only when there has been a compilation error.
+
+Argument:   the error number
+Returns:    pointer to the error string
+*/
+
+static const char *
+find_error_text(int n)
+{
+const char *s = error_texts;
 for (; n > 0; n--)
   {
   while (*s++ != CHAR_NULL) {};
   if (*s == CHAR_NULL) return "Error text not found (please report)";
   }
-return s; 
-} 
- 
- 
+return s;
+}
+
+
 
-/************************************************* 
+/*************************************************
 *           Expand the workspace                 *
 *************************************************/
 
@@ -978,82 +978,82 @@ return (*p == CHAR_RIGHT_CURLY_BRACKET);
 
 
 /*************************************************
-*            Handle escapes                      * 
-*************************************************/ 
- 
-/* This function is called when a \ has been encountered. It either returns a 
+*            Handle escapes                      *
+*************************************************/
+
+/* This function is called when a \ has been encountered. It either returns a
 positive value for a simple escape such as \n, or 0 for a data character which
 will be placed in chptr. A backreference to group n is returned as negative n.
 When UTF-8 is enabled, a positive value greater than 255 may be returned in
 chptr. On entry, ptr is pointing at the \. On exit, it is on the final
 character of the escape sequence.
- 
-Arguments: 
-  ptrptr         points to the pattern position pointer 
+
+Arguments:
+  ptrptr         points to the pattern position pointer
   chptr          points to a returned data character
-  errorcodeptr   points to the errorcode variable 
-  bracount       number of previous extracting brackets 
-  options        the options bits 
-  isclass        TRUE if inside a character class 
- 
+  errorcodeptr   points to the errorcode variable
+  bracount       number of previous extracting brackets
+  options        the options bits
+  isclass        TRUE if inside a character class
+
 Returns:         zero => a data character
                  positive => a special escape sequence
                  negative => a back reference
-                 on error, errorcodeptr is set 
-*/ 
- 
-static int 
+                 on error, errorcodeptr is set
+*/
+
+static int
 check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
   int bracount, int options, BOOL isclass)
-{ 
+{
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 const pcre_uchar *ptr = *ptrptr + 1;
 pcre_uint32 c;
 int escape = 0;
 int i;
- 
-GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */ 
-ptr--;                            /* Set pointer back to the last byte */ 
- 
-/* If backslash is at the end of the pattern, it's an error. */ 
- 
+
+GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
+ptr--;                            /* Set pointer back to the last byte */
+
+/* If backslash is at the end of the pattern, it's an error. */
+
 if (c == CHAR_NULL) *errorcodeptr = ERR1;
- 
-/* Non-alphanumerics are literals. For digits or letters, do an initial lookup 
-in a table. A non-zero result is something that can be returned immediately. 
-Otherwise further processing may be required. */ 
- 
+
+/* Non-alphanumerics are literals. For digits or letters, do an initial lookup
+in a table. A non-zero result is something that can be returned immediately.
+Otherwise further processing may be required. */
+
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
 /* Not alphanumeric */
 else if (c < CHAR_0 || c > CHAR_z) {}
 else if ((i = escapes[c - CHAR_0]) != 0)
   { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
- 
-#else           /* EBCDIC coding */ 
+
+#else           /* EBCDIC coding */
 /* Not alphanumeric */
 else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
 else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
-#endif 
- 
-/* Escapes that need further processing, or are illegal. */ 
- 
-else 
-  { 
+#endif
+
+/* Escapes that need further processing, or are illegal. */
+
+else
+  {
   const pcre_uchar *oldptr;
   BOOL braced, negated, overflow;
   int s;
- 
-  switch (c) 
-    { 
-    /* A number of Perl escapes are not handled by PCRE. We give an explicit 
-    error. */ 
- 
+
+  switch (c)
+    {
+    /* A number of Perl escapes are not handled by PCRE. We give an explicit
+    error. */
+
     case CHAR_l:
     case CHAR_L:
-    *errorcodeptr = ERR37; 
-    break; 
- 
+    *errorcodeptr = ERR37;
+    break;
+
     case CHAR_u:
     if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
@@ -1076,7 +1076,7 @@ else
           c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
           }
- 
+
 #if defined COMPILE_PCRE8
         if (c > (utf ? 0x10ffffU : 0xffU))
 #elif defined COMPILE_PCRE16
@@ -1119,7 +1119,7 @@ else
     case CHAR_g:
     if (isclass) break;
     if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
-      { 
+      {
       escape = ESC_g;
       break;
       }
@@ -1132,27 +1132,27 @@ else
       for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
         if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
       if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
-        { 
+        {
         escape = ESC_k;
-        break; 
-        } 
-      braced = TRUE; 
-      ptr++; 
-      } 
-    else braced = FALSE; 
- 
+        break;
+        }
+      braced = TRUE;
+      ptr++;
+      }
+    else braced = FALSE;
+
     if (ptr[1] == CHAR_MINUS)
-      { 
-      negated = TRUE; 
-      ptr++; 
-      } 
-    else negated = FALSE; 
- 
+      {
+      negated = TRUE;
+      ptr++;
+      }
+    else negated = FALSE;
+
     /* The integer range is limited by the machine's int representation. */
     s = 0;
     overflow = FALSE;
     while (IS_DIGIT(ptr[1]))
-      { 
+      {
       if (s > INT_MAX / 10 - 1) /* Integer overflow */
         {
         overflow = TRUE;
@@ -1164,62 +1164,62 @@ else
       {
       while (IS_DIGIT(ptr[1]))
         ptr++;
-      *errorcodeptr = ERR61; 
-      break; 
-      } 
- 
+      *errorcodeptr = ERR61;
+      break;
+      }
+
     if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
-      { 
-      *errorcodeptr = ERR57; 
-      break; 
-      } 
- 
+      {
+      *errorcodeptr = ERR57;
+      break;
+      }
+
     if (s == 0)
       {
       *errorcodeptr = ERR58;
       break;
       }
 
-    if (negated) 
-      { 
+    if (negated)
+      {
       if (s > bracount)
-        { 
-        *errorcodeptr = ERR15; 
-        break; 
-        } 
+        {
+        *errorcodeptr = ERR15;
+        break;
+        }
       s = bracount - (s - 1);
-      } 
- 
+      }
+
     escape = -s;
-    break; 
- 
-    /* The handling of escape sequences consisting of a string of digits 
+    break;
+
+    /* The handling of escape sequences consisting of a string of digits
     starting with one that is not zero is not straightforward. Perl has changed
     over the years. Nowadays \g{} for backreferences and \o{} for octal are
     recommended to avoid the ambiguities in the old syntax.
- 
-    Outside a character class, the digits are read as a decimal number. If the 
+
+    Outside a character class, the digits are read as a decimal number. If the
     number is less than 8 (used to be 10), or if there are that many previous
     extracting left brackets, then it is a back reference. Otherwise, up to
     three octal digits are read to form an escaped byte. Thus \123 is likely to
     be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
     the octal value is greater than 377, the least significant 8 bits are
     taken. \8 and \9 are treated as the literal characters 8 and 9.
- 
+
     Inside a character class, \ followed by a digit is always either a literal
     8 or 9 or an octal number. */
- 
+
     case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
     case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
 
-    if (!isclass) 
-      { 
-      oldptr = ptr; 
+    if (!isclass)
+      {
+      oldptr = ptr;
       /* The integer range is limited by the machine's int representation. */
       s = (int)(c -CHAR_0);
       overflow = FALSE;
       while (IS_DIGIT(ptr[1]))
-        { 
+        {
         if (s > INT_MAX / 10 - 1) /* Integer overflow */
           {
           overflow = TRUE;
@@ -1231,32 +1231,32 @@ else
         {
         while (IS_DIGIT(ptr[1]))
           ptr++;
-        *errorcodeptr = ERR61; 
-        break; 
-        } 
+        *errorcodeptr = ERR61;
+        break;
+        }
       if (s < 8 || s <= bracount)  /* Check for back reference */
-        { 
+        {
         escape = -s;
-        break; 
-        } 
-      ptr = oldptr;      /* Put the pointer back and fall through */ 
-      } 
- 
+        break;
+        }
+      ptr = oldptr;      /* Put the pointer back and fall through */
+      }
+
     /* Handle a digit following \ when the number is not a back reference. If
     the first digit is 8 or 9, Perl used to generate a binary zero byte and
     then treat the digit as a following literal. At least by Perl 5.18 this
     changed so as not to insert the binary zero. */
- 
+
     if ((c = *ptr) >= CHAR_8) break;
- 
+
     /* Fall through with a digit less than 8 */
 
-    /* \0 always starts an octal number, but we may drop through to here with a 
-    larger first octal digit. The original code used just to take the least 
-    significant 8 bits of octal numbers (I think this is what early Perls used 
+    /* \0 always starts an octal number, but we may drop through to here with a
+    larger first octal digit. The original code used just to take the least
+    significant 8 bits of octal numbers (I think this is what early Perls used
     to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
     but no more than 3 octal digits. */
- 
+
     case CHAR_0:
     c -= CHAR_0;
     while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
@@ -1264,20 +1264,20 @@ else
 #ifdef COMPILE_PCRE8
     if (!utf && c > 0xff) *errorcodeptr = ERR51;
 #endif
-    break; 
- 
+    break;
+
     /* \o is a relatively new Perl feature, supporting a more general way of
     specifying character codes in octal. The only supported form is \o{ddd}. */
- 
+
     case CHAR_o:
     if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
     if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else
-      { 
+      {
       ptr += 2;
-      c = 0; 
+      c = 0;
       overflow = FALSE;
       while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
-        { 
+        {
         register pcre_uint32 cc = *ptr++;
         if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
 #ifdef COMPILE_PCRE32
@@ -1304,7 +1304,7 @@ else
       else *errorcodeptr = ERR80;
       }
     break;
- 
+
     /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal
     numbers. Otherwise it is a lowercase x letter. */
 
@@ -1321,14 +1321,14 @@ else
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
           if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
           c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
-#else           /* EBCDIC coding */ 
+#else           /* EBCDIC coding */
           if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
           c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
-#endif 
+#endif
           }
-        } 
+        }
       }    /* End JavaScript handling */
- 
+
     /* Handle \x in Perl's style. \x{ddd} is a character number which can be
     greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
     digits. If not, { used to be treated as a data character. However, Perl
@@ -1339,7 +1339,7 @@ else
     else
       {
       if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
-        { 
+        {
         ptr += 2;
         if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
           {
@@ -1352,11 +1352,11 @@ else
           {
           register pcre_uint32 cc = *ptr++;
           if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
- 
+
 #ifdef COMPILE_PCRE32
           if (c >= 0x10000000l) { overflow = TRUE; break; }
 #endif
- 
+
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
           if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
           c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
@@ -1364,7 +1364,7 @@ else
           if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
           c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
- 
+
 #if defined COMPILE_PCRE8
           if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
 #elif defined COMPILE_PCRE16
@@ -1405,27 +1405,27 @@ else
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
           if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
           c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
-#else           /* EBCDIC coding */ 
+#else           /* EBCDIC coding */
           if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
           c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
-#endif 
+#endif
           }
         }     /* End of \xdd handling */
       }       /* End of Perl-style \x handling */
-    break; 
- 
-    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. 
+    break;
+
+    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
     An error is given if the byte following \c is not an ASCII character. This
     coding is ASCII-specific, but then the whole concept of \cx is
-    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ 
- 
+    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
+
     case CHAR_c:
-    c = *(++ptr); 
+    c = *(++ptr);
     if (c == CHAR_NULL)
-      { 
-      *errorcodeptr = ERR2; 
-      break; 
-      } 
+      {
+      *errorcodeptr = ERR2;
+      break;
+      }
 #ifndef EBCDIC    /* ASCII/UTF-8 coding */
     if (c > 127)  /* Excludes all non-ASCII in either mode */
       {
@@ -1433,7 +1433,7 @@ else
       break;
       }
     if (c >= CHAR_a && c <= CHAR_z) c -= 32;
-    c ^= 0x40; 
+    c ^= 0x40;
 #else             /* EBCDIC coding */
     if (c >= CHAR_a && c <= CHAR_z) c += 64;
     if (c == CHAR_QUESTION_MARK)
@@ -1446,26 +1446,26 @@ else
         }
       if (i < 32) c = i; else *errorcodeptr = ERR68;
       }
-#endif 
-    break; 
- 
-    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any 
-    other alphanumeric following \ is an error if PCRE_EXTRA was set; 
-    otherwise, for Perl compatibility, it is a literal. This code looks a bit 
-    odd, but there used to be some cases other than the default, and there may 
-    be again in future, so I haven't "optimized" it. */ 
- 
-    default: 
-    if ((options & PCRE_EXTRA) != 0) switch(c) 
-      { 
-      default: 
-      *errorcodeptr = ERR3; 
-      break; 
-      } 
-    break; 
-    } 
-  } 
- 
+#endif
+    break;
+
+    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
+    other alphanumeric following \ is an error if PCRE_EXTRA was set;
+    otherwise, for Perl compatibility, it is a literal. This code looks a bit
+    odd, but there used to be some cases other than the default, and there may
+    be again in future, so I haven't "optimized" it. */
+
+    default:
+    if ((options & PCRE_EXTRA) != 0) switch(c)
+      {
+      default:
+      *errorcodeptr = ERR3;
+      break;
+      }
+    break;
+    }
+  }
+
 /* Perl supports \N{name} for character names, as well as plain \N for "not
 newline". PCRE does not support \N{name}. However, it does support
 quantification such as \N{2,3}. */
@@ -1481,314 +1481,314 @@ if ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
 
 /* Set the pointer to the final character before returning. */
 
-*ptrptr = ptr; 
+*ptrptr = ptr;
 *chptr = c;
 return escape;
-} 
- 
- 
- 
-#ifdef SUPPORT_UCP 
-/************************************************* 
-*               Handle \P and \p                 * 
-*************************************************/ 
- 
-/* This function is called after \P or \p has been encountered, provided that 
-PCRE is compiled with support for Unicode properties. On entry, ptrptr is 
-pointing at the P or p. On exit, it is pointing at the final character of the 
-escape sequence. 
- 
-Argument: 
-  ptrptr         points to the pattern position pointer 
-  negptr         points to a boolean that is set TRUE for negation else FALSE 
+}
+
+
+
+#ifdef SUPPORT_UCP
+/*************************************************
+*               Handle \P and \p                 *
+*************************************************/
+
+/* This function is called after \P or \p has been encountered, provided that
+PCRE is compiled with support for Unicode properties. On entry, ptrptr is
+pointing at the P or p. On exit, it is pointing at the final character of the
+escape sequence.
+
+Argument:
+  ptrptr         points to the pattern position pointer
+  negptr         points to a boolean that is set TRUE for negation else FALSE
   ptypeptr       points to an unsigned int that is set to the type value
   pdataptr       points to an unsigned int that is set to the detailed property value
-  errorcodeptr   points to the error code variable 
- 
+  errorcodeptr   points to the error code variable
+
 Returns:         TRUE if the type value was found, or FALSE for an invalid type
-*/ 
- 
+*/
+
 static BOOL
 get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,
   unsigned int *pdataptr, int *errorcodeptr)
-{ 
+{
 pcre_uchar c;
 int i, bot, top;
 const pcre_uchar *ptr = *ptrptr;
 pcre_uchar name[32];
- 
-c = *(++ptr); 
+
+c = *(++ptr);
 if (c == CHAR_NULL) goto ERROR_RETURN;
- 
-*negptr = FALSE; 
- 
-/* \P or \p can be followed by a name in {}, optionally preceded by ^ for 
-negation. */ 
- 
+
+*negptr = FALSE;
+
+/* \P or \p can be followed by a name in {}, optionally preceded by ^ for
+negation. */
+
 if (c == CHAR_LEFT_CURLY_BRACKET)
-  { 
+  {
   if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
-    { 
-    *negptr = TRUE; 
-    ptr++; 
-    } 
+    {
+    *negptr = TRUE;
+    ptr++;
+    }
   for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)
-    { 
-    c = *(++ptr); 
+    {
+    c = *(++ptr);
     if (c == CHAR_NULL) goto ERROR_RETURN;
     if (c == CHAR_RIGHT_CURLY_BRACKET) break;
-    name[i] = c; 
-    } 
+    name[i] = c;
+    }
   if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
-  name[i] = 0; 
-  } 
- 
-/* Otherwise there is just one following character */ 
- 
-else 
-  { 
-  name[0] = c; 
-  name[1] = 0; 
-  } 
- 
-*ptrptr = ptr; 
- 
-/* Search for a recognized property name using binary chop */ 
- 
-bot = 0; 
+  name[i] = 0;
+  }
+
+/* Otherwise there is just one following character */
+
+else
+  {
+  name[0] = c;
+  name[1] = 0;
+  }
+
+*ptrptr = ptr;
+
+/* Search for a recognized property name using binary chop */
+
+bot = 0;
 top = PRIV(utt_size);
- 
-while (bot < top) 
-  { 
+
+while (bot < top)
+  {
   int r;
-  i = (bot + top) >> 1; 
+  i = (bot + top) >> 1;
   r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
   if (r == 0)
-    { 
+    {
     *ptypeptr = PRIV(utt)[i].type;
     *pdataptr = PRIV(utt)[i].value;
     return TRUE;
-    } 
+    }
   if (r > 0) bot = i + 1; else top = i;
-  } 
- 
-*errorcodeptr = ERR47; 
-*ptrptr = ptr; 
+  }
+
+*errorcodeptr = ERR47;
+*ptrptr = ptr;
 return FALSE;
- 
-ERROR_RETURN: 
-*errorcodeptr = ERR46; 
-*ptrptr = ptr; 
+
+ERROR_RETURN:
+*errorcodeptr = ERR46;
+*ptrptr = ptr;
 return FALSE;
-} 
-#endif 
- 
- 
- 
-/************************************************* 
-*         Read repeat counts                     * 
-*************************************************/ 
- 
-/* Read an item of the form {n,m} and return the values. This is called only 
-after is_counted_repeat() has confirmed that a repeat-count quantifier exists, 
-so the syntax is guaranteed to be correct, but we need to check the values. 
- 
-Arguments: 
-  p              pointer to first char after '{' 
-  minp           pointer to int for min 
-  maxp           pointer to int for max 
-                 returned as -1 if no max 
-  errorcodeptr   points to error code variable 
- 
-Returns:         pointer to '}' on success; 
-                 current ptr on error, with errorcodeptr set non-zero 
-*/ 
- 
+}
+#endif
+
+
+
+/*************************************************
+*         Read repeat counts                     *
+*************************************************/
+
+/* Read an item of the form {n,m} and return the values. This is called only
+after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
+so the syntax is guaranteed to be correct, but we need to check the values.
+
+Arguments:
+  p              pointer to first char after '{'
+  minp           pointer to int for min
+  maxp           pointer to int for max
+                 returned as -1 if no max
+  errorcodeptr   points to error code variable
+
+Returns:         pointer to '}' on success;
+                 current ptr on error, with errorcodeptr set non-zero
+*/
+
 static const pcre_uchar *
 read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
-{ 
-int min = 0; 
-int max = -1; 
- 
+{
+int min = 0;
+int max = -1;
+
 while (IS_DIGIT(*p))
-  { 
+  {
   min = min * 10 + (int)(*p++ - CHAR_0);
   if (min > 65535)
     {
     *errorcodeptr = ERR5;
     return p;
     }
-  } 
- 
+  }
+
 if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
-  { 
+  {
   if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
-    { 
-    max = 0; 
+    {
+    max = 0;
     while(IS_DIGIT(*p))
-      { 
+      {
       max = max * 10 + (int)(*p++ - CHAR_0);
       if (max > 65535)
         {
         *errorcodeptr = ERR5;
         return p;
         }
-      } 
-    if (max < min) 
-      { 
-      *errorcodeptr = ERR4; 
-      return p; 
-      } 
-    } 
-  } 
- 
-*minp = min; 
-*maxp = max; 
-return p; 
-} 
- 
- 
- 
-/************************************************* 
-*      Find first significant op code            * 
-*************************************************/ 
- 
-/* This is called by several functions that scan a compiled expression looking 
-for a fixed first character, or an anchoring op code etc. It skips over things 
+      }
+    if (max < min)
+      {
+      *errorcodeptr = ERR4;
+      return p;
+      }
+    }
+  }
+
+*minp = min;
+*maxp = max;
+return p;
+}
+
+
+
+/*************************************************
+*      Find first significant op code            *
+*************************************************/
+
+/* This is called by several functions that scan a compiled expression looking
+for a fixed first character, or an anchoring op code etc. It skips over things
 that do not influence this. For some calls, it makes sense to skip negative
 forward and all backward assertions, and also the \b assertion; for others it
 does not.
- 
-Arguments: 
-  code         pointer to the start of the group 
-  skipassert   TRUE if certain assertions are to be skipped 
- 
-Returns:       pointer to the first significant opcode 
-*/ 
- 
+
+Arguments:
+  code         pointer to the start of the group
+  skipassert   TRUE if certain assertions are to be skipped
+
+Returns:       pointer to the first significant opcode
+*/
+
 static const pcre_uchar*
 first_significant_code(const pcre_uchar *code, BOOL skipassert)
-{ 
-for (;;) 
-  { 
-  switch ((int)*code) 
-    { 
-    case OP_ASSERT_NOT: 
-    case OP_ASSERTBACK: 
-    case OP_ASSERTBACK_NOT: 
-    if (!skipassert) return code; 
-    do code += GET(code, 1); while (*code == OP_ALT); 
+{
+for (;;)
+  {
+  switch ((int)*code)
+    {
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    if (!skipassert) return code;
+    do code += GET(code, 1); while (*code == OP_ALT);
     code += PRIV(OP_lengths)[*code];
-    break; 
- 
-    case OP_WORD_BOUNDARY: 
-    case OP_NOT_WORD_BOUNDARY: 
-    if (!skipassert) return code; 
-    /* Fall through */ 
- 
-    case OP_CALLOUT: 
-    case OP_CREF: 
+    break;
+
+    case OP_WORD_BOUNDARY:
+    case OP_NOT_WORD_BOUNDARY:
+    if (!skipassert) return code;
+    /* Fall through */
+
+    case OP_CALLOUT:
+    case OP_CREF:
     case OP_DNCREF:
-    case OP_RREF: 
+    case OP_RREF:
     case OP_DNRREF:
-    case OP_DEF: 
+    case OP_DEF:
     code += PRIV(OP_lengths)[*code];
-    break; 
- 
-    default: 
-    return code; 
-    } 
-  } 
-/* Control never reaches here */ 
-} 
- 
- 
- 
-/************************************************* 
+    break;
+
+    default:
+    return code;
+    }
+  }
+/* Control never reaches here */
+}
+
+
+
+/*************************************************
 *        Find the fixed length of a branch       *
-*************************************************/ 
- 
+*************************************************/
+
 /* Scan a branch and compute the fixed length of subject that will match it,
-if the length is fixed. This is needed for dealing with backward assertions. 
+if the length is fixed. This is needed for dealing with backward assertions.
 In UTF8 mode, the result is in characters rather than bytes. The branch is
 temporarily terminated with OP_END when this function is called.
- 
+
 This function is called when a backward assertion is encountered, so that if it
 fails, the error message can point to the correct place in the pattern.
 However, we cannot do this when the assertion contains subroutine calls,
 because they can be forward references. We solve this by remembering this case
 and doing the check at the end; a flag specifies which mode we are running in.
 
-Arguments: 
-  code     points to the start of the pattern (the bracket) 
+Arguments:
+  code     points to the start of the pattern (the bracket)
   utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
   atend    TRUE if called when the pattern is complete
   cd       the "compile data" structure
   recurses    chain of recurse_check to catch mutual recursion
- 
+
 Returns:   the fixed length,
              or -1 if there is no fixed length,
              or -2 if \C was encountered (in UTF-8 mode only)
              or -3 if an OP_RECURSE item was encountered and atend is FALSE
              or -4 if an unknown opcode was encountered (internal error)
-*/ 
- 
-static int 
+*/
+
+static int
 find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
   recurse_check *recurses)
-{ 
-int length = -1; 
+{
+int length = -1;
 recurse_check this_recurse;
-register int branchlength = 0; 
+register int branchlength = 0;
 register pcre_uchar *cc = code + 1 + LINK_SIZE;
- 
-/* Scan along the opcodes for this branch. If we get to the end of the 
-branch, check the length against that of the other branches. */ 
- 
-for (;;) 
-  { 
-  int d; 
+
+/* Scan along the opcodes for this branch. If we get to the end of the
+branch, check the length against that of the other branches. */
+
+for (;;)
+  {
+  int d;
   pcre_uchar *ce, *cs;
   register pcre_uchar op = *cc;
 
-  switch (op) 
-    { 
+  switch (op)
+    {
     /* We only need to continue for OP_CBRA (normal capturing bracket) and
     OP_BRA (normal non-capturing bracket) because the other variants of these
     opcodes are all concerned with unlimited repeated groups, which of course
     are not of fixed length. */
 
-    case OP_CBRA: 
-    case OP_BRA: 
-    case OP_ONCE: 
+    case OP_CBRA:
+    case OP_BRA:
+    case OP_ONCE:
     case OP_ONCE_NC:
-    case OP_COND: 
+    case OP_COND:
     d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
       recurses);
-    if (d < 0) return d; 
-    branchlength += d; 
-    do cc += GET(cc, 1); while (*cc == OP_ALT); 
-    cc += 1 + LINK_SIZE; 
-    break; 
- 
+    if (d < 0) return d;
+    branchlength += d;
+    do cc += GET(cc, 1); while (*cc == OP_ALT);
+    cc += 1 + LINK_SIZE;
+    break;
+
     /* Reached end of a branch; if it's a ket it is the end of a nested call.
     If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
     an ALT. If it is END it's the end of the outer call. All can be handled by
     the same code. Note that we must not include the OP_KETRxxx opcodes here,
     because they all imply an unlimited repeat. */
- 
-    case OP_ALT: 
-    case OP_KET: 
-    case OP_END: 
+
+    case OP_ALT:
+    case OP_KET:
+    case OP_END:
     case OP_ACCEPT:
     case OP_ASSERT_ACCEPT:
-    if (length < 0) length = branchlength; 
-      else if (length != branchlength) return -1; 
-    if (*cc != OP_ALT) return length; 
-    cc += 1 + LINK_SIZE; 
-    branchlength = 0; 
-    break; 
- 
+    if (length < 0) length = branchlength;
+      else if (length != branchlength) return -1;
+    if (*cc != OP_ALT) return length;
+    cc += 1 + LINK_SIZE;
+    branchlength = 0;
+    break;
+
     /* A true recursion implies not fixed length, but a subroutine call may
     be OK. If the subroutine is a forward reference, we can't deal with
     it until the end of the pattern, so return -3. */
@@ -1812,18 +1812,18 @@ for (;;)
     cc += 1 + LINK_SIZE;
     break;
 
-    /* Skip over assertive subpatterns */ 
- 
-    case OP_ASSERT: 
-    case OP_ASSERT_NOT: 
-    case OP_ASSERTBACK: 
-    case OP_ASSERTBACK_NOT: 
-    do cc += GET(cc, 1); while (*cc == OP_ALT); 
+    /* Skip over assertive subpatterns */
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    do cc += GET(cc, 1); while (*cc == OP_ALT);
     cc += 1 + LINK_SIZE;
     break;
- 
-    /* Skip over things that don't match chars */ 
- 
+
+    /* Skip over things that don't match chars */
+
     case OP_MARK:
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
@@ -1836,16 +1836,16 @@ for (;;)
     case OP_CIRCM:
     case OP_CLOSE:
     case OP_COMMIT:
-    case OP_CREF: 
-    case OP_DEF: 
+    case OP_CREF:
+    case OP_DEF:
     case OP_DNCREF:
     case OP_DNRREF:
     case OP_DOLL:
     case OP_DOLLM:
-    case OP_EOD: 
-    case OP_EODN: 
+    case OP_EOD:
+    case OP_EODN:
     case OP_FAIL:
-    case OP_NOT_WORD_BOUNDARY: 
+    case OP_NOT_WORD_BOUNDARY:
     case OP_PRUNE:
     case OP_REVERSE:
     case OP_RREF:
@@ -1854,27 +1854,27 @@ for (;;)
     case OP_SOD:
     case OP_SOM:
     case OP_THEN:
-    case OP_WORD_BOUNDARY: 
+    case OP_WORD_BOUNDARY:
     cc += PRIV(OP_lengths)[*cc];
-    break; 
- 
-    /* Handle literal characters */ 
- 
-    case OP_CHAR: 
+    break;
+
+    /* Handle literal characters */
+
+    case OP_CHAR:
     case OP_CHARI:
-    case OP_NOT: 
+    case OP_NOT:
     case OP_NOTI:
-    branchlength++; 
-    cc += 2; 
+    branchlength++;
+    cc += 2;
 #ifdef SUPPORT_UTF
     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif 
-    break; 
- 
-    /* Handle exact repetitions. The count is already in characters, but we 
-    need to skip over a multibyte character in UTF8 mode.  */ 
- 
-    case OP_EXACT: 
+#endif
+    break;
+
+    /* Handle exact repetitions. The count is already in characters, but we
+    need to skip over a multibyte character in UTF8 mode.  */
+
+    case OP_EXACT:
     case OP_EXACTI:
     case OP_NOTEXACT:
     case OP_NOTEXACTI:
@@ -1882,51 +1882,51 @@ for (;;)
     cc += 2 + IMM2_SIZE;
 #ifdef SUPPORT_UTF
     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif 
-    break; 
- 
-    case OP_TYPEEXACT: 
-    branchlength += GET2(cc,1); 
+#endif
+    break;
+
+    case OP_TYPEEXACT:
+    branchlength += GET2(cc,1);
     if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
       cc += 2;
     cc += 1 + IMM2_SIZE + 1;
-    break; 
- 
-    /* Handle single-char matchers */ 
- 
-    case OP_PROP: 
-    case OP_NOTPROP: 
-    cc += 2; 
-    /* Fall through */ 
- 
+    break;
+
+    /* Handle single-char matchers */
+
+    case OP_PROP:
+    case OP_NOTPROP:
+    cc += 2;
+    /* Fall through */
+
     case OP_HSPACE:
     case OP_VSPACE:
     case OP_NOT_HSPACE:
     case OP_NOT_VSPACE:
-    case OP_NOT_DIGIT: 
-    case OP_DIGIT: 
-    case OP_NOT_WHITESPACE: 
-    case OP_WHITESPACE: 
-    case OP_NOT_WORDCHAR: 
-    case OP_WORDCHAR: 
-    case OP_ANY: 
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
+    case OP_ANY:
     case OP_ALLANY:
-    branchlength++; 
-    cc++; 
-    break; 
- 
+    branchlength++;
+    cc++;
+    break;
+
     /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
     otherwise \C is coded as OP_ALLANY. */
- 
-    case OP_ANYBYTE: 
-    return -2; 
- 
-    /* Check a class for variable quantification */ 
- 
+
+    case OP_ANYBYTE:
+    return -2;
+
+    /* Check a class for variable quantification */
+
     case OP_CLASS:
     case OP_NCLASS:
 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
-    case OP_XCLASS: 
+    case OP_XCLASS:
     /* The original code caused an unsigned overflow in 64 bit systems,
     so now we use a conditional statement. */
     if (op == OP_XCLASS)
@@ -1935,36 +1935,36 @@ for (;;)
       cc += PRIV(OP_lengths)[OP_CLASS];
 #else
     cc += PRIV(OP_lengths)[OP_CLASS];
-#endif 
- 
-    switch (*cc) 
-      { 
-      case OP_CRSTAR: 
-      case OP_CRMINSTAR: 
+#endif
+
+    switch (*cc)
+      {
+      case OP_CRSTAR:
+      case OP_CRMINSTAR:
       case OP_CRPLUS:
       case OP_CRMINPLUS:
-      case OP_CRQUERY: 
-      case OP_CRMINQUERY: 
+      case OP_CRQUERY:
+      case OP_CRMINQUERY:
       case OP_CRPOSSTAR:
       case OP_CRPOSPLUS:
       case OP_CRPOSQUERY:
-      return -1; 
- 
-      case OP_CRRANGE: 
-      case OP_CRMINRANGE: 
+      return -1;
+
+      case OP_CRRANGE:
+      case OP_CRMINRANGE:
       case OP_CRPOSRANGE:
       if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
       branchlength += (int)GET2(cc,1);
       cc += 1 + 2 * IMM2_SIZE;
-      break; 
- 
-      default: 
-      branchlength++; 
-      } 
-    break; 
- 
-    /* Anything else is variable length */ 
- 
+      break;
+
+      default:
+      branchlength++;
+      }
+    break;
+
+    /* Anything else is variable length */
+
     case OP_ANYNL:
     case OP_BRAMINZERO:
     case OP_BRAPOS:
@@ -2050,48 +2050,48 @@ for (;;)
     /* Catch unrecognized opcodes so that when new ones are added they
     are not forgotten, as has happened in the past. */
 
-    default: 
+    default:
     return -4;
-    } 
-  } 
-/* Control never gets here */ 
-} 
- 
- 
- 
-/************************************************* 
+    }
+  }
+/* Control never gets here */
+}
+
+
+
+/*************************************************
 *    Scan compiled regex for specific bracket    *
-*************************************************/ 
- 
-/* This little function scans through a compiled pattern until it finds a 
+*************************************************/
+
+/* This little function scans through a compiled pattern until it finds a
 capturing bracket with the given number, or, if the number is negative, an
 instance of OP_REVERSE for a lookbehind. The function is global in the C sense
 so that it can be called from pcre_study() when finding the minimum matching
 length.
- 
-Arguments: 
-  code        points to start of expression 
+
+Arguments:
+  code        points to start of expression
   utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
   number      the required bracket number or negative to find a lookbehind
- 
-Returns:      pointer to the opcode for the bracket, or NULL if not found 
-*/ 
- 
+
+Returns:      pointer to the opcode for the bracket, or NULL if not found
+*/
+
 const pcre_uchar *
 PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number)
-{ 
-for (;;) 
-  { 
+{
+for (;;)
+  {
   register pcre_uchar c = *code;
 
-  if (c == OP_END) return NULL; 
- 
-  /* XCLASS is used for classes that cannot be represented just by a bit 
-  map. This includes negated single high-valued characters. The length in 
-  the table is zero; the actual length is stored in the compiled code. */ 
- 
-  if (c == OP_XCLASS) code += GET(code, 1); 
- 
+  if (c == OP_END) return NULL;
+
+  /* XCLASS is used for classes that cannot be represented just by a bit
+  map. This includes negated single high-valued characters. The length in
+  the table is zero; the actual length is stored in the compiled code. */
+
+  if (c == OP_XCLASS) code += GET(code, 1);
+
   /* Handle recursion */
 
   else if (c == OP_REVERSE)
@@ -2100,44 +2100,44 @@ for (;;)
     code += PRIV(OP_lengths)[c];
     }
 
-  /* Handle capturing bracket */ 
- 
+  /* Handle capturing bracket */
+
   else if (c == OP_CBRA || c == OP_SCBRA ||
            c == OP_CBRAPOS || c == OP_SCBRAPOS)
-    { 
+    {
     int n = (int)GET2(code, 1+LINK_SIZE);
     if (n == number) return (pcre_uchar *)code;
     code += PRIV(OP_lengths)[c];
-    } 
- 
-  /* Otherwise, we can get the item's length from the table, except that for 
-  repeated character types, we have to test for \p and \P, which have an extra 
+    }
+
+  /* Otherwise, we can get the item's length from the table, except that for
+  repeated character types, we have to test for \p and \P, which have an extra
   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
   must add in its length. */
- 
-  else 
-    { 
-    switch(c) 
-      { 
-      case OP_TYPESTAR: 
-      case OP_TYPEMINSTAR: 
-      case OP_TYPEPLUS: 
-      case OP_TYPEMINPLUS: 
-      case OP_TYPEQUERY: 
-      case OP_TYPEMINQUERY: 
-      case OP_TYPEPOSSTAR: 
-      case OP_TYPEPOSPLUS: 
-      case OP_TYPEPOSQUERY: 
-      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 
-      break; 
- 
-      case OP_TYPEUPTO: 
-      case OP_TYPEMINUPTO: 
-      case OP_TYPEEXACT: 
-      case OP_TYPEPOSUPTO: 
+
+  else
+    {
+    switch(c)
+      {
+      case OP_TYPESTAR:
+      case OP_TYPEMINSTAR:
+      case OP_TYPEPLUS:
+      case OP_TYPEMINPLUS:
+      case OP_TYPEQUERY:
+      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSSTAR:
+      case OP_TYPEPOSPLUS:
+      case OP_TYPEPOSQUERY:
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+      break;
+
+      case OP_TYPEUPTO:
+      case OP_TYPEMINUPTO:
+      case OP_TYPEEXACT:
+      case OP_TYPEPOSUPTO:
       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
         code += 2;
-      break; 
+      break;
 
       case OP_MARK:
       case OP_PRUNE_ARG:
@@ -2145,144 +2145,144 @@ for (;;)
       case OP_THEN_ARG:
       code += code[1];
       break;
-      } 
- 
-    /* Add in the fixed length from the table */ 
- 
+      }
+
+    /* Add in the fixed length from the table */
+
     code += PRIV(OP_lengths)[c];
- 
-  /* In UTF-8 mode, opcodes that are followed by a character may be followed by 
-  a multi-byte character. The length in the table is a minimum, so we have to 
-  arrange to skip the extra bytes. */ 
- 
+
+  /* In UTF-8 mode, opcodes that are followed by a character may be followed by
+  a multi-byte character. The length in the table is a minimum, so we have to
+  arrange to skip the extra bytes. */
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
     if (utf) switch(c)
-      { 
-      case OP_CHAR: 
+      {
+      case OP_CHAR:
       case OP_CHARI:
       case OP_NOT:
       case OP_NOTI:
-      case OP_EXACT: 
+      case OP_EXACT:
       case OP_EXACTI:
       case OP_NOTEXACT:
       case OP_NOTEXACTI:
-      case OP_UPTO: 
+      case OP_UPTO:
       case OP_UPTOI:
       case OP_NOTUPTO:
       case OP_NOTUPTOI:
-      case OP_MINUPTO: 
+      case OP_MINUPTO:
       case OP_MINUPTOI:
       case OP_NOTMINUPTO:
       case OP_NOTMINUPTOI:
-      case OP_POSUPTO: 
+      case OP_POSUPTO:
       case OP_POSUPTOI:
       case OP_NOTPOSUPTO:
       case OP_NOTPOSUPTOI:
-      case OP_STAR: 
+      case OP_STAR:
       case OP_STARI:
       case OP_NOTSTAR:
       case OP_NOTSTARI:
-      case OP_MINSTAR: 
+      case OP_MINSTAR:
       case OP_MINSTARI:
       case OP_NOTMINSTAR:
       case OP_NOTMINSTARI:
-      case OP_POSSTAR: 
+      case OP_POSSTAR:
       case OP_POSSTARI:
       case OP_NOTPOSSTAR:
       case OP_NOTPOSSTARI:
-      case OP_PLUS: 
+      case OP_PLUS:
       case OP_PLUSI:
       case OP_NOTPLUS:
       case OP_NOTPLUSI:
-      case OP_MINPLUS: 
+      case OP_MINPLUS:
       case OP_MINPLUSI:
       case OP_NOTMINPLUS:
       case OP_NOTMINPLUSI:
-      case OP_POSPLUS: 
+      case OP_POSPLUS:
       case OP_POSPLUSI:
       case OP_NOTPOSPLUS:
       case OP_NOTPOSPLUSI:
-      case OP_QUERY: 
+      case OP_QUERY:
       case OP_QUERYI:
       case OP_NOTQUERY:
       case OP_NOTQUERYI:
-      case OP_MINQUERY: 
+      case OP_MINQUERY:
       case OP_MINQUERYI:
       case OP_NOTMINQUERY:
       case OP_NOTMINQUERYI:
-      case OP_POSQUERY: 
+      case OP_POSQUERY:
       case OP_POSQUERYI:
       case OP_NOTPOSQUERY:
       case OP_NOTPOSQUERYI:
       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
-      break; 
-      } 
+      break;
+      }
 #else
     (void)(utf);  /* Keep compiler happy by referencing function argument */
-#endif 
-    } 
-  } 
-} 
- 
- 
- 
-/************************************************* 
-*   Scan compiled regex for recursion reference  * 
-*************************************************/ 
- 
-/* This little function scans through a compiled pattern until it finds an 
-instance of OP_RECURSE. 
- 
-Arguments: 
-  code        points to start of expression 
+#endif
+    }
+  }
+}
+
+
+
+/*************************************************
+*   Scan compiled regex for recursion reference  *
+*************************************************/
+
+/* This little function scans through a compiled pattern until it finds an
+instance of OP_RECURSE.
+
+Arguments:
+  code        points to start of expression
   utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
- 
-Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found 
-*/ 
- 
+
+Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
+*/
+
 static const pcre_uchar *
 find_recurse(const pcre_uchar *code, BOOL utf)
-{ 
-for (;;) 
-  { 
+{
+for (;;)
+  {
   register pcre_uchar c = *code;
-  if (c == OP_END) return NULL; 
-  if (c == OP_RECURSE) return code; 
- 
-  /* XCLASS is used for classes that cannot be represented just by a bit 
-  map. This includes negated single high-valued characters. The length in 
-  the table is zero; the actual length is stored in the compiled code. */ 
- 
-  if (c == OP_XCLASS) code += GET(code, 1); 
- 
-  /* Otherwise, we can get the item's length from the table, except that for 
-  repeated character types, we have to test for \p and \P, which have an extra 
+  if (c == OP_END) return NULL;
+  if (c == OP_RECURSE) return code;
+
+  /* XCLASS is used for classes that cannot be represented just by a bit
+  map. This includes negated single high-valued characters. The length in
+  the table is zero; the actual length is stored in the compiled code. */
+
+  if (c == OP_XCLASS) code += GET(code, 1);
+
+  /* Otherwise, we can get the item's length from the table, except that for
+  repeated character types, we have to test for \p and \P, which have an extra
   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
   must add in its length. */
- 
-  else 
-    { 
-    switch(c) 
-      { 
-      case OP_TYPESTAR: 
-      case OP_TYPEMINSTAR: 
-      case OP_TYPEPLUS: 
-      case OP_TYPEMINPLUS: 
-      case OP_TYPEQUERY: 
-      case OP_TYPEMINQUERY: 
-      case OP_TYPEPOSSTAR: 
-      case OP_TYPEPOSPLUS: 
-      case OP_TYPEPOSQUERY: 
-      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 
-      break; 
- 
-      case OP_TYPEPOSUPTO: 
-      case OP_TYPEUPTO: 
-      case OP_TYPEMINUPTO: 
-      case OP_TYPEEXACT: 
+
+  else
+    {
+    switch(c)
+      {
+      case OP_TYPESTAR:
+      case OP_TYPEMINSTAR:
+      case OP_TYPEPLUS:
+      case OP_TYPEMINPLUS:
+      case OP_TYPEQUERY:
+      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSSTAR:
+      case OP_TYPEPOSPLUS:
+      case OP_TYPEPOSQUERY:
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+      break;
+
+      case OP_TYPEPOSUPTO:
+      case OP_TYPEUPTO:
+      case OP_TYPEMINUPTO:
+      case OP_TYPEEXACT:
       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
         code += 2;
-      break; 
+      break;
 
       case OP_MARK:
       case OP_PRUNE_ARG:
@@ -2290,134 +2290,134 @@ for (;;)
       case OP_THEN_ARG:
       code += code[1];
       break;
-      } 
- 
-    /* Add in the fixed length from the table */ 
- 
+      }
+
+    /* Add in the fixed length from the table */
+
     code += PRIV(OP_lengths)[c];
- 
-    /* In UTF-8 mode, opcodes that are followed by a character may be followed 
-    by a multi-byte character. The length in the table is a minimum, so we have 
-    to arrange to skip the extra bytes. */ 
- 
+
+    /* In UTF-8 mode, opcodes that are followed by a character may be followed
+    by a multi-byte character. The length in the table is a minimum, so we have
+    to arrange to skip the extra bytes. */
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
     if (utf) switch(c)
-      { 
-      case OP_CHAR: 
+      {
+      case OP_CHAR:
       case OP_CHARI:
       case OP_NOT:
       case OP_NOTI:
-      case OP_EXACT: 
+      case OP_EXACT:
       case OP_EXACTI:
       case OP_NOTEXACT:
       case OP_NOTEXACTI:
-      case OP_UPTO: 
+      case OP_UPTO:
       case OP_UPTOI:
       case OP_NOTUPTO:
       case OP_NOTUPTOI:
-      case OP_MINUPTO: 
+      case OP_MINUPTO:
       case OP_MINUPTOI:
       case OP_NOTMINUPTO:
       case OP_NOTMINUPTOI:
-      case OP_POSUPTO: 
+      case OP_POSUPTO:
       case OP_POSUPTOI:
       case OP_NOTPOSUPTO:
       case OP_NOTPOSUPTOI:
-      case OP_STAR: 
+      case OP_STAR:
       case OP_STARI:
       case OP_NOTSTAR:
       case OP_NOTSTARI:
-      case OP_MINSTAR: 
+      case OP_MINSTAR:
       case OP_MINSTARI:
       case OP_NOTMINSTAR:
       case OP_NOTMINSTARI:
-      case OP_POSSTAR: 
+      case OP_POSSTAR:
       case OP_POSSTARI:
       case OP_NOTPOSSTAR:
       case OP_NOTPOSSTARI:
-      case OP_PLUS: 
+      case OP_PLUS:
       case OP_PLUSI:
       case OP_NOTPLUS:
       case OP_NOTPLUSI:
-      case OP_MINPLUS: 
+      case OP_MINPLUS:
       case OP_MINPLUSI:
       case OP_NOTMINPLUS:
       case OP_NOTMINPLUSI:
-      case OP_POSPLUS: 
+      case OP_POSPLUS:
       case OP_POSPLUSI:
       case OP_NOTPOSPLUS:
       case OP_NOTPOSPLUSI:
-      case OP_QUERY: 
+      case OP_QUERY:
       case OP_QUERYI:
       case OP_NOTQUERY:
       case OP_NOTQUERYI:
-      case OP_MINQUERY: 
+      case OP_MINQUERY:
       case OP_MINQUERYI:
       case OP_NOTMINQUERY:
       case OP_NOTMINQUERYI:
-      case OP_POSQUERY: 
+      case OP_POSQUERY:
       case OP_POSQUERYI:
       case OP_NOTPOSQUERY:
       case OP_NOTPOSQUERYI:
       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
-      break; 
-      } 
+      break;
+      }
 #else
     (void)(utf);  /* Keep compiler happy by referencing function argument */
-#endif 
-    } 
-  } 
-} 
- 
- 
- 
-/************************************************* 
-*    Scan compiled branch for non-emptiness      * 
-*************************************************/ 
- 
-/* This function scans through a branch of a compiled pattern to see whether it 
-can match the empty string or not. It is called from could_be_empty() 
-below and from compile_branch() when checking for an unlimited repeat of a 
-group that can match nothing. Note that first_significant_code() skips over 
-backward and negative forward assertions when its final argument is TRUE. If we 
-hit an unclosed bracket, we return "empty" - this means we've struck an inner 
-bracket whose current branch will already have been scanned. 
- 
-Arguments: 
-  code        points to start of search 
-  endcode     points to where to stop 
+#endif
+    }
+  }
+}
+
+
+
+/*************************************************
+*    Scan compiled branch for non-emptiness      *
+*************************************************/
+
+/* This function scans through a branch of a compiled pattern to see whether it
+can match the empty string or not. It is called from could_be_empty()
+below and from compile_branch() when checking for an unlimited repeat of a
+group that can match nothing. Note that first_significant_code() skips over
+backward and negative forward assertions when its final argument is TRUE. If we
+hit an unclosed bracket, we return "empty" - this means we've struck an inner
+bracket whose current branch will already have been scanned.
+
+Arguments:
+  code        points to start of search
+  endcode     points to where to stop
   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
   cd          contains pointers to tables etc.
   recurses    chain of recurse_check to catch mutual recursion
- 
-Returns:      TRUE if what is matched could be empty 
-*/ 
- 
-static BOOL 
+
+Returns:      TRUE if what is matched could be empty
+*/
+
+static BOOL
 could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
   BOOL utf, compile_data *cd, recurse_check *recurses)
-{ 
+{
 register pcre_uchar c;
 recurse_check this_recurse;
 
 for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
-     code < endcode; 
+     code < endcode;
      code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
-  { 
+  {
   const pcre_uchar *ccode;
- 
-  c = *code; 
- 
-  /* Skip over forward assertions; the other assertions are skipped by 
-  first_significant_code() with a TRUE final argument. */ 
- 
-  if (c == OP_ASSERT) 
-    { 
-    do code += GET(code, 1); while (*code == OP_ALT); 
-    c = *code; 
-    continue; 
-    } 
- 
+
+  c = *code;
+
+  /* Skip over forward assertions; the other assertions are skipped by
+  first_significant_code() with a TRUE final argument. */
+
+  if (c == OP_ASSERT)
+    {
+    do code += GET(code, 1); while (*code == OP_ALT);
+    c = *code;
+    continue;
+    }
+
   /* For a recursion/subroutine call, if its end has been reached, which
   implies a backward reference subroutine call, we can scan it. If it's a
   forward reference subroutine call, we can't. To detect forward reference
@@ -2480,17 +2480,17 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
     continue;
     }
 
-  /* Groups with zero repeats can of course be empty; skip them. */ 
- 
+  /* Groups with zero repeats can of course be empty; skip them. */
+
   if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
       c == OP_BRAPOSZERO)
-    { 
+    {
     code += PRIV(OP_lengths)[c];
-    do code += GET(code, 1); while (*code == OP_ALT); 
-    c = *code; 
-    continue; 
-    } 
- 
+    do code += GET(code, 1); while (*code == OP_ALT);
+    c = *code;
+    continue;
+    }
+
   /* A nested group that is already marked as "could be empty" can just be
   skipped. */
 
@@ -2502,24 +2502,24 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
     continue;
     }
 
-  /* For other groups, scan the branches. */ 
- 
+  /* For other groups, scan the branches. */
+
   if (c == OP_BRA  || c == OP_BRAPOS ||
       c == OP_CBRA || c == OP_CBRAPOS ||
       c == OP_ONCE || c == OP_ONCE_NC ||
       c == OP_COND || c == OP_SCOND)
-    { 
-    BOOL empty_branch; 
-    if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */ 
- 
+    {
+    BOOL empty_branch;
+    if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
+
     /* If a conditional group has only one branch, there is a second, implied,
     empty branch, so just skip over the conditional, because it could be empty.
     Otherwise, scan the individual branches of the group. */
- 
+
     if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
       code += GET(code, 1);
     else
-      { 
+      {
       empty_branch = FALSE;
       do
         {
@@ -2529,176 +2529,176 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
         }
       while (*code == OP_ALT);
       if (!empty_branch) return FALSE;   /* All branches are non-empty */
-      } 
-
-    c = *code; 
-    continue; 
-    } 
- 
-  /* Handle the other opcodes */ 
- 
-  switch (c) 
-    { 
-    /* Check for quantifiers after a class. XCLASS is used for classes that 
-    cannot be represented just by a bit map. This includes negated single 
+      }
+
+    c = *code;
+    continue;
+    }
+
+  /* Handle the other opcodes */
+
+  switch (c)
+    {
+    /* Check for quantifiers after a class. XCLASS is used for classes that
+    cannot be represented just by a bit map. This includes negated single
     high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
-    actual length is stored in the compiled code, so we must update "code" 
-    here. */ 
- 
+    actual length is stored in the compiled code, so we must update "code"
+    here. */
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-    case OP_XCLASS: 
-    ccode = code += GET(code, 1); 
-    goto CHECK_CLASS_REPEAT; 
-#endif 
- 
-    case OP_CLASS: 
-    case OP_NCLASS: 
+    case OP_XCLASS:
+    ccode = code += GET(code, 1);
+    goto CHECK_CLASS_REPEAT;
+#endif
+
+    case OP_CLASS:
+    case OP_NCLASS:
     ccode = code + PRIV(OP_lengths)[OP_CLASS];
- 
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-    CHECK_CLASS_REPEAT: 
-#endif 
- 
-    switch (*ccode) 
-      { 
-      case OP_CRSTAR:            /* These could be empty; continue */ 
-      case OP_CRMINSTAR: 
-      case OP_CRQUERY: 
-      case OP_CRMINQUERY: 
+    CHECK_CLASS_REPEAT:
+#endif
+
+    switch (*ccode)
+      {
+      case OP_CRSTAR:            /* These could be empty; continue */
+      case OP_CRMINSTAR:
+      case OP_CRQUERY:
+      case OP_CRMINQUERY:
       case OP_CRPOSSTAR:
       case OP_CRPOSQUERY:
-      break; 
- 
-      default:                   /* Non-repeat => class must match */ 
-      case OP_CRPLUS:            /* These repeats aren't empty */ 
-      case OP_CRMINPLUS: 
+      break;
+
+      default:                   /* Non-repeat => class must match */
+      case OP_CRPLUS:            /* These repeats aren't empty */
+      case OP_CRMINPLUS:
       case OP_CRPOSPLUS:
-      return FALSE; 
- 
-      case OP_CRRANGE: 
-      case OP_CRMINRANGE: 
+      return FALSE;
+
+      case OP_CRRANGE:
+      case OP_CRMINRANGE:
       case OP_CRPOSRANGE:
-      if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */ 
-      break; 
-      } 
-    break; 
- 
-    /* Opcodes that must match a character */ 
- 
+      if (GET2(ccode, 1) > 0) return FALSE;  /* Minimum > 0 */
+      break;
+      }
+    break;
+
+    /* Opcodes that must match a character */
+
     case OP_ANY:
     case OP_ALLANY:
     case OP_ANYBYTE:
 
-    case OP_PROP: 
-    case OP_NOTPROP: 
+    case OP_PROP:
+    case OP_NOTPROP:
     case OP_ANYNL:
 
     case OP_NOT_HSPACE:
     case OP_HSPACE:
     case OP_NOT_VSPACE:
     case OP_VSPACE:
-    case OP_EXTUNI: 
+    case OP_EXTUNI:
 
-    case OP_NOT_DIGIT: 
-    case OP_DIGIT: 
-    case OP_NOT_WHITESPACE: 
-    case OP_WHITESPACE: 
-    case OP_NOT_WORDCHAR: 
-    case OP_WORDCHAR: 
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
 
-    case OP_CHAR: 
+    case OP_CHAR:
     case OP_CHARI:
-    case OP_NOT: 
+    case OP_NOT:
     case OP_NOTI:
 
-    case OP_PLUS: 
+    case OP_PLUS:
     case OP_PLUSI:
-    case OP_MINPLUS: 
+    case OP_MINPLUS:
     case OP_MINPLUSI:
 
-    case OP_NOTPLUS: 
+    case OP_NOTPLUS:
     case OP_NOTPLUSI:
-    case OP_NOTMINPLUS: 
+    case OP_NOTMINPLUS:
     case OP_NOTMINPLUSI:
 
     case OP_POSPLUS:
     case OP_POSPLUSI:
-    case OP_NOTPOSPLUS: 
+    case OP_NOTPOSPLUS:
     case OP_NOTPOSPLUSI:
 
     case OP_EXACT:
     case OP_EXACTI:
-    case OP_NOTEXACT: 
+    case OP_NOTEXACT:
     case OP_NOTEXACTI:
 
-    case OP_TYPEPLUS: 
-    case OP_TYPEMINPLUS: 
-    case OP_TYPEPOSPLUS: 
-    case OP_TYPEEXACT: 
-
-    return FALSE; 
- 
-    /* These are going to continue, as they may be empty, but we have to 
-    fudge the length for the \p and \P cases. */ 
- 
-    case OP_TYPESTAR: 
-    case OP_TYPEMINSTAR: 
-    case OP_TYPEPOSSTAR: 
-    case OP_TYPEQUERY: 
-    case OP_TYPEMINQUERY: 
-    case OP_TYPEPOSQUERY: 
-    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 
-    break; 
- 
-    /* Same for these */ 
- 
-    case OP_TYPEUPTO: 
-    case OP_TYPEMINUPTO: 
-    case OP_TYPEPOSUPTO: 
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEEXACT:
+
+    return FALSE;
+
+    /* These are going to continue, as they may be empty, but we have to
+    fudge the length for the \p and \P cases. */
+
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEPOSQUERY:
+    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+    break;
+
+    /* Same for these */
+
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEPOSUPTO:
     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
       code += 2;
-    break; 
- 
-    /* End of branch */ 
- 
-    case OP_KET: 
-    case OP_KETRMAX: 
-    case OP_KETRMIN: 
+    break;
+
+    /* End of branch */
+
+    case OP_KET:
+    case OP_KETRMAX:
+    case OP_KETRMIN:
     case OP_KETRPOS:
-    case OP_ALT: 
-    return TRUE; 
- 
-    /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, 
+    case OP_ALT:
+    return TRUE;
+
+    /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
     MINUPTO, and POSUPTO and their caseless and negative versions may be
     followed by a multibyte character. */
- 
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
-    case OP_STAR: 
+    case OP_STAR:
     case OP_STARI:
     case OP_NOTSTAR:
     case OP_NOTSTARI:
 
-    case OP_MINSTAR: 
+    case OP_MINSTAR:
     case OP_MINSTARI:
     case OP_NOTMINSTAR:
     case OP_NOTMINSTARI:
 
-    case OP_POSSTAR: 
+    case OP_POSSTAR:
     case OP_POSSTARI:
     case OP_NOTPOSSTAR:
     case OP_NOTPOSSTARI:
 
-    case OP_QUERY: 
+    case OP_QUERY:
     case OP_QUERYI:
     case OP_NOTQUERY:
     case OP_NOTQUERYI:
 
-    case OP_MINQUERY: 
+    case OP_MINQUERY:
     case OP_MINQUERYI:
     case OP_NOTMINQUERY:
     case OP_NOTMINQUERYI:
 
-    case OP_POSQUERY: 
+    case OP_POSQUERY:
     case OP_POSQUERYI:
     case OP_NOTPOSQUERY:
     case OP_NOTPOSQUERYI:
@@ -2706,24 +2706,24 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
     break;
 
-    case OP_UPTO: 
+    case OP_UPTO:
     case OP_UPTOI:
     case OP_NOTUPTO:
     case OP_NOTUPTOI:
 
-    case OP_MINUPTO: 
+    case OP_MINUPTO:
     case OP_MINUPTOI:
     case OP_NOTMINUPTO:
     case OP_NOTMINUPTOI:
 
-    case OP_POSUPTO: 
+    case OP_POSUPTO:
     case OP_POSUPTOI:
     case OP_NOTPOSUPTO:
     case OP_NOTPOSUPTOI:
 
     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
-    break; 
-#endif 
+    break;
+#endif
 
     /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument
     string. */
@@ -2739,51 +2739,51 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
 
     default:
     break;
-    } 
-  } 
- 
-return TRUE; 
-} 
- 
- 
- 
-/************************************************* 
-*    Scan compiled regex for non-emptiness       * 
-*************************************************/ 
- 
-/* This function is called to check for left recursive calls. We want to check 
-the current branch of the current pattern to see if it could match the empty 
-string. If it could, we must look outwards for branches at other levels, 
-stopping when we pass beyond the bracket which is the subject of the recursion. 
+    }
+  }
+
+return TRUE;
+}
+
+
+
+/*************************************************
+*    Scan compiled regex for non-emptiness       *
+*************************************************/
+
+/* This function is called to check for left recursive calls. We want to check
+the current branch of the current pattern to see if it could match the empty
+string. If it could, we must look outwards for branches at other levels,
+stopping when we pass beyond the bracket which is the subject of the recursion.
 This function is called only during the real compile, not during the
 pre-compile.
- 
-Arguments: 
-  code        points to start of the recursion 
-  endcode     points to where to stop (current RECURSE item) 
-  bcptr       points to the chain of current (unclosed) branch starts 
+
+Arguments:
+  code        points to start of the recursion
+  endcode     points to where to stop (current RECURSE item)
+  bcptr       points to the chain of current (unclosed) branch starts
   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
   cd          pointers to tables etc
- 
-Returns:      TRUE if what is matched could be empty 
-*/ 
- 
-static BOOL 
+
+Returns:      TRUE if what is matched could be empty
+*/
+
+static BOOL
 could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
   branch_chain *bcptr, BOOL utf, compile_data *cd)
-{ 
+{
 while (bcptr != NULL && bcptr->current_branch >= code)
-  { 
+  {
   if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
     return FALSE;
-  bcptr = bcptr->outer; 
-  } 
-return TRUE; 
-} 
- 
- 
- 
-/************************************************* 
+  bcptr = bcptr->outer;
+  }
+return TRUE;
+}
+
+
+
+/*************************************************
 *        Base opcode of repeated opcodes         *
 *************************************************/
 
@@ -3889,29 +3889,29 @@ for (;;)
 
 
 /*************************************************
-*           Check for POSIX class syntax         * 
-*************************************************/ 
- 
-/* This function is called when the sequence "[:" or "[." or "[=" is 
-encountered in a character class. It checks whether this is followed by a 
-sequence of characters terminated by a matching ":]" or ".]" or "=]". If we 
-reach an unescaped ']' without the special preceding character, return FALSE. 
- 
-Originally, this function only recognized a sequence of letters between the 
-terminators, but it seems that Perl recognizes any sequence of characters, 
-though of course unknown POSIX names are subsequently rejected. Perl gives an 
-"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE 
-didn't consider this to be a POSIX class. Likewise for [:1234:]. 
- 
-The problem in trying to be exactly like Perl is in the handling of escapes. We 
-have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX 
-class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code 
+*           Check for POSIX class syntax         *
+*************************************************/
+
+/* This function is called when the sequence "[:" or "[." or "[=" is
+encountered in a character class. It checks whether this is followed by a
+sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
+reach an unescaped ']' without the special preceding character, return FALSE.
+
+Originally, this function only recognized a sequence of letters between the
+terminators, but it seems that Perl recognizes any sequence of characters,
+though of course unknown POSIX names are subsequently rejected. Perl gives an
+"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
+didn't consider this to be a POSIX class. Likewise for [:1234:].
+
+The problem in trying to be exactly like Perl is in the handling of escapes. We
+have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
+class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
 below handles the special cases \\ and \], but does not try to do any other
 escape processing. This makes it different from Perl for cases such as
 [:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does
 not recognize "l\ower". This is a lesser evil than not diagnosing bad classes
 when Perl does, I think.
- 
+
 A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
 It seems that the appearance of a nested POSIX class supersedes an apparent
 external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
@@ -3923,20 +3923,20 @@ example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
 seem right at all. PCRE does not allow closing square brackets in POSIX class
 names.
 
-Arguments: 
-  ptr      pointer to the initial [ 
-  endptr   where to return the end pointer 
- 
-Returns:   TRUE or FALSE 
-*/ 
- 
-static BOOL 
+Arguments:
+  ptr      pointer to the initial [
+  endptr   where to return the end pointer
+
+Returns:   TRUE or FALSE
+*/
+
+static BOOL
 check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
-{ 
+{
 pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
-terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */ 
+terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
 for (++ptr; *ptr != CHAR_NULL; ptr++)
-  { 
+  {
   if (*ptr == CHAR_BACKSLASH &&
       (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET ||
        ptr[1] == CHAR_BACKSLASH))
@@ -3944,107 +3944,107 @@ for (++ptr; *ptr != CHAR_NULL; ptr++)
   else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
             *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
   else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
-    { 
+    {
     *endptr = ptr;
     return TRUE;
-    } 
-  } 
-return FALSE; 
-} 
- 
- 
- 
- 
-/************************************************* 
-*          Check POSIX class name                * 
-*************************************************/ 
- 
-/* This function is called to check the name given in a POSIX-style class entry 
-such as [:alnum:]. 
- 
-Arguments: 
-  ptr        points to the first letter 
-  len        the length of the name 
- 
-Returns:     a value representing the name, or -1 if unknown 
-*/ 
- 
-static int 
+    }
+  }
+return FALSE;
+}
+
+
+
+
+/*************************************************
+*          Check POSIX class name                *
+*************************************************/
+
+/* This function is called to check the name given in a POSIX-style class entry
+such as [:alnum:].
+
+Arguments:
+  ptr        points to the first letter
+  len        the length of the name
+
+Returns:     a value representing the name, or -1 if unknown
+*/
+
+static int
 check_posix_name(const pcre_uchar *ptr, int len)
-{ 
-const char *pn = posix_names; 
-register int yield = 0; 
-while (posix_name_lengths[yield] != 0) 
-  { 
-  if (len == posix_name_lengths[yield] && 
+{
+const char *pn = posix_names;
+register int yield = 0;
+while (posix_name_lengths[yield] != 0)
+  {
+  if (len == posix_name_lengths[yield] &&
     STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield;
-  pn += posix_name_lengths[yield] + 1; 
-  yield++; 
-  } 
-return -1; 
-} 
- 
- 
-/************************************************* 
-*    Adjust OP_RECURSE items in repeated group   * 
-*************************************************/ 
- 
-/* OP_RECURSE items contain an offset from the start of the regex to the group 
-that is referenced. This means that groups can be replicated for fixed 
-repetition simply by copying (because the recursion is allowed to refer to 
-earlier groups that are outside the current group). However, when a group is 
+  pn += posix_name_lengths[yield] + 1;
+  yield++;
+  }
+return -1;
+}
+
+
+/*************************************************
+*    Adjust OP_RECURSE items in repeated group   *
+*************************************************/
+
+/* OP_RECURSE items contain an offset from the start of the regex to the group
+that is referenced. This means that groups can be replicated for fixed
+repetition simply by copying (because the recursion is allowed to refer to
+earlier groups that are outside the current group). However, when a group is
 optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
 inserted before it, after it has been compiled. This means that any OP_RECURSE
 items within it that refer to the group itself or any contained groups have to
 have their offsets adjusted. That one of the jobs of this function. Before it
 is called, the partially compiled regex must be temporarily terminated with
 OP_END.
- 
+
 This function has been extended to cope with forward references for recursions
 and subroutine calls. It must check the list of such references for the
 group we are dealing with. If it finds that one of the recursions in the
 current group is on this list, it does not adjust the value in the reference
 (which is a group number). After the group has been scanned, all the offsets in
 the forward reference list for the group are adjusted.
- 
-Arguments: 
-  group      points to the start of the group 
-  adjust     the amount by which the group is to be moved 
+
+Arguments:
+  group      points to the start of the group
+  adjust     the amount by which the group is to be moved
   utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
-  cd         contains pointers to tables etc. 
+  cd         contains pointers to tables etc.
   save_hwm_offset   the hwm forward reference offset at the start of the group
- 
-Returns:     nothing 
-*/ 
- 
-static void 
+
+Returns:     nothing
+*/
+
+static void
 adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
   size_t save_hwm_offset)
-{ 
+{
 int offset;
 pcre_uchar *hc;
 pcre_uchar *ptr = group;
- 
+
 while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
-  { 
+  {
   for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
        hc += LINK_SIZE)
-    { 
+    {
     offset = (int)GET(hc, 0);
     if (cd->start_code + offset == ptr + 1) break;
-    } 
- 
+    }
+
   /* If we have not found this recursion on the forward reference list, adjust
   the recursion's offset if it's after the start of this group. */
- 
-  if (hc >= cd->hwm) 
-    { 
+
+  if (hc >= cd->hwm)
+    {
     offset = (int)GET(ptr, 1);
-    if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); 
-    } 
- 
-  ptr += 1 + LINK_SIZE; 
-  } 
+    if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
+    }
+
+  ptr += 1 + LINK_SIZE;
+  }
 
 /* Now adjust all forward reference offsets for the group. */
 
@@ -4054,96 +4054,96 @@ for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
   offset = (int)GET(hc, 0);
   PUT(hc, 0, offset + adjust);
   }
-} 
- 
- 
- 
-/************************************************* 
-*        Insert an automatic callout point       * 
-*************************************************/ 
- 
-/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert 
-callout points before each pattern item. 
- 
-Arguments: 
-  code           current code pointer 
-  ptr            current pattern pointer 
-  cd             pointers to tables etc 
- 
-Returns:         new code pointer 
-*/ 
- 
+}
+
+
+
+/*************************************************
+*        Insert an automatic callout point       *
+*************************************************/
+
+/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert
+callout points before each pattern item.
+
+Arguments:
+  code           current code pointer
+  ptr            current pattern pointer
+  cd             pointers to tables etc
+
+Returns:         new code pointer
+*/
+
 static pcre_uchar *
 auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
-{ 
-*code++ = OP_CALLOUT; 
-*code++ = 255; 
+{
+*code++ = OP_CALLOUT;
+*code++ = 255;
 PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
 PUT(code, LINK_SIZE, 0);                       /* Default length */
 return code + 2 * LINK_SIZE;
-} 
- 
- 
- 
-/************************************************* 
-*         Complete a callout item                * 
-*************************************************/ 
- 
-/* A callout item contains the length of the next item in the pattern, which 
-we can't fill in till after we have reached the relevant point. This is used 
-for both automatic and manual callouts. 
- 
-Arguments: 
-  previous_callout   points to previous callout item 
-  ptr                current pattern pointer 
-  cd                 pointers to tables etc 
- 
-Returns:             nothing 
-*/ 
- 
-static void 
+}
+
+
+
+/*************************************************
+*         Complete a callout item                *
+*************************************************/
+
+/* A callout item contains the length of the next item in the pattern, which
+we can't fill in till after we have reached the relevant point. This is used
+for both automatic and manual callouts.
+
+Arguments:
+  previous_callout   points to previous callout item
+  ptr                current pattern pointer
+  cd                 pointers to tables etc
+
+Returns:             nothing
+*/
+
+static void
 complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
-{ 
+{
 int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
-PUT(previous_callout, 2 + LINK_SIZE, length); 
-} 
- 
- 
- 
-#ifdef SUPPORT_UCP 
-/************************************************* 
-*           Get othercase range                  * 
-*************************************************/ 
- 
-/* This function is passed the start and end of a class range, in UTF-8 mode 
+PUT(previous_callout, 2 + LINK_SIZE, length);
+}
+
+
+
+#ifdef SUPPORT_UCP
+/*************************************************
+*           Get othercase range                  *
+*************************************************/
+
+/* This function is passed the start and end of a class range, in UTF-8 mode
 with UCP support. It searches up the characters, looking for ranges of
-characters in the "other" case. Each call returns the next one, updating the 
+characters in the "other" case. Each call returns the next one, updating the
 start address. A character with multiple other cases is returned on its own
 with a special return value.
- 
-Arguments: 
-  cptr        points to starting character value; updated 
-  d           end value 
-  ocptr       where to put start of othercase range 
-  odptr       where to put end of othercase range 
- 
+
+Arguments:
+  cptr        points to starting character value; updated
+  d           end value
+  ocptr       where to put start of othercase range
+  odptr       where to put end of othercase range
+
 Yield:        -1 when no more
                0 when a range is returned
               >0 the CASESET offset for char with multiple other cases
                 in this case, ocptr contains the original
-*/ 
- 
+*/
+
 static int
 get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
   pcre_uint32 *odptr)
-{ 
+{
 pcre_uint32 c, othercase, next;
 unsigned int co;
- 
+
 /* Find the first character that has an other case. If it has multiple other
 cases, return its case offset value. */
 
-for (c = *cptr; c <= d; c++) 
+for (c = *cptr; c <= d; c++)
   {
   if ((co = UCD_CASESET(c)) != 0)
     {
@@ -4153,69 +4153,69 @@ for (c = *cptr; c <= d; c++)
     }
   if ((othercase = UCD_OTHERCASE(c)) != c) break;
   }
- 
+
 if (c > d) return -1;  /* Reached end of range */
- 
+
 /* Found a character that has a single other case. Search for the end of the
 range, which is either the end of the input range, or a character that has zero
 or more than one other cases. */
 
-*ocptr = othercase; 
-next = othercase + 1; 
- 
-for (++c; c <= d; c++) 
-  { 
+*ocptr = othercase;
+next = othercase + 1;
+
+for (++c; c <= d; c++)
+  {
   if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
-  next++; 
-  } 
- 
+  next++;
+  }
+
 *odptr = next - 1;     /* End of othercase range */
 *cptr = c;             /* Rest of input range */
 return 0;
-} 
-#endif  /* SUPPORT_UCP */ 
- 
- 
- 
-/************************************************* 
+}
+#endif  /* SUPPORT_UCP */
+
+
+
+/*************************************************
 *        Add a character or range to a class     *
-*************************************************/ 
- 
+*************************************************/
+
 /* This function packages up the logic of adding a character or range of
 characters to a class. The character values in the arguments will be within the
 valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
 mutually recursive with the function immediately below.
- 
-Arguments: 
+
+Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options word
-  cd            contains pointers to tables etc. 
+  cd            contains pointers to tables etc.
   start         start of range character
   end           end of range character
- 
+
 Returns:        the number of < 256 characters added
                 the pointer to extra data is updated
-*/ 
- 
+*/
+
 static int
 add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
   compile_data *cd, pcre_uint32 start, pcre_uint32 end)
-{ 
+{
 pcre_uint32 c;
 pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff);
 int n8 = 0;
- 
+
 /* If caseless matching is required, scan the range and process alternate
 cases. In Unicode, there are 8-bit characters that have alternate cases that
 are greater than 255 and vice-versa. Sometimes we can just extend the original
 range. */
- 
+
 if ((options & PCRE_CASELESS) != 0)
-  { 
+  {
 #ifdef SUPPORT_UCP
   if ((options & PCRE_UTF8) != 0)
-    { 
+    {
     int rc;
     pcre_uint32 oc, od;
 
@@ -4223,20 +4223,20 @@ if ((options & PCRE_CASELESS) != 0)
     c = start;
 
     while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
-      { 
+      {
       /* Handle a single character that has more than one other case. */
- 
+
       if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,
         PRIV(ucd_caseless_sets) + rc, oc);
- 
+
       /* Do nothing if the other case range is within the original range. */
- 
+
       else if (oc >= start && od <= end) continue;
- 
+
       /* Extend the original range if there is overlap, noting that if oc < c, we
       can't have od > end because a subrange is always shorter than the basic
       range. Otherwise, use a recursive call to add the additional range. */
- 
+
       else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
       else if (od > end && oc <= end + 1)
         {
@@ -4248,52 +4248,52 @@ if ((options & PCRE_CASELESS) != 0)
     }
   else
 #endif  /* SUPPORT_UCP */
- 
+
   /* Not UTF-mode, or no UCP */
 
   for (c = start; c <= classbits_end; c++)
-    { 
+    {
     SETBIT(classbits, cd->fcc[c]);
     n8++;
-    } 
-  } 
- 
+    }
+  }
+
 /* Now handle the original range. Adjust the final value according to the bit
 length - this means that the same lists of (e.g.) horizontal spaces can be used
 in all cases. */
- 
+
 #if defined COMPILE_PCRE8
 #ifdef SUPPORT_UTF
   if ((options & PCRE_UTF8) == 0)
 #endif
   if (end > 0xff) end = 0xff;
- 
+
 #elif defined COMPILE_PCRE16
 #ifdef SUPPORT_UTF
   if ((options & PCRE_UTF16) == 0)
 #endif
   if (end > 0xffff) end = 0xffff;
- 
+
 #endif /* COMPILE_PCRE[8|16] */
- 
+
 /* Use the bitmap for characters < 256. Otherwise use extra data.*/
- 
+
 for (c = start; c <= classbits_end; c++)
-  { 
+  {
   /* Regardless of start, c will always be <= 255. */
   SETBIT(classbits, c);
   n8++;
   }
- 
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
 if (start <= 0xff) start = 0xff + 1;
- 
+
 if (end >= start)
   {
   pcre_uchar *uchardata = *uchardptr;
 #ifdef SUPPORT_UTF
   if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
-    { 
+    {
     if (start < end)
       {
       *uchardata++ = XCL_RANGE;
@@ -4305,49 +4305,49 @@ if (end >= start)
       *uchardata++ = XCL_SINGLE;
       uchardata += PRIV(ord2utf)(start, uchardata);
       }
-    } 
-  else 
+    }
+  else
 #endif  /* SUPPORT_UTF */
- 
+
   /* Without UTF support, character values are constrained by the bit length,
   and can only be > 256 for 16-bit and 32-bit libraries. */
- 
+
 #ifdef COMPILE_PCRE8
     {}
-#else 
+#else
   if (start < end)
-    { 
+    {
     *uchardata++ = XCL_RANGE;
     *uchardata++ = start;
     *uchardata++ = end;
-    } 
+    }
   else if (start == end)
-    { 
+    {
     *uchardata++ = XCL_SINGLE;
     *uchardata++ = start;
-    } 
+    }
 #endif
- 
+
   *uchardptr = uchardata;   /* Updata extra data pointer */
-  } 
+  }
 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
- 
+
 return n8;    /* Number of 8-bit characters */
 }
- 
- 
- 
- 
+
+
+
+
 /*************************************************
 *        Add a list of characters to a class     *
 *************************************************/
- 
+
 /* This function is used for adding a list of case-equivalent characters to a
 class, and also for adding a list of horizontal or vertical whitespace. If the
 list is in order (which it should be), ranges of characters are detected and
 handled appropriately. This function is mutually recursive with the function
 above.
- 
+
 Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
@@ -4357,11 +4357,11 @@ Arguments:
   except        character to omit; this is used when adding lists of
                   case-equivalent characters to avoid including the one we
                   already know about
- 
+
 Returns:        the number of < 256 characters added
                 the pointer to extra data is updated
 */
- 
+
 static int
 add_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
   compile_data *cd, const pcre_uint32 *p, unsigned int except)
@@ -4374,32 +4374,32 @@ while (p[0] < NOTACHAR)
     {
     while(p[n+1] == p[0] + n + 1) n++;
     n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);
-    } 
+    }
   p += n + 1;
   }
 return n8;
 }
- 
- 
- 
+
+
+
 /*************************************************
 *    Add characters not in a list to a class     *
 *************************************************/
- 
+
 /* This function is used for adding the complement of a list of horizontal or
 vertical whitespace to a class. The list must be in order.
- 
+
 Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options word
   cd            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR
- 
+
 Returns:        the number of < 256 characters added
                 the pointer to extra data is updated
 */
- 
+
 static int
 add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
   int options, compile_data *cd, const pcre_uint32 *p)
@@ -4414,23 +4414,23 @@ while (p[0] < NOTACHAR)
   n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
     (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
   p++;
-  } 
+  }
 return n8;
-} 
- 
- 
- 
-/************************************************* 
-*           Compile one branch                   * 
-*************************************************/ 
- 
-/* Scan the pattern, compiling it into the a vector. If the options are 
-changed during the branch, the pointer is used to change the external options 
-bits. This function is used during the pre-compile phase when we are trying 
-to find out the amount of memory needed, as well as during the real compile 
-phase. The value of lengthptr distinguishes the two phases. 
- 
-Arguments: 
+}
+
+
+
+/*************************************************
+*           Compile one branch                   *
+*************************************************/
+
+/* Scan the pattern, compiling it into the a vector. If the options are
+changed during the branch, the pointer is used to change the external options
+bits. This function is used during the pre-compile phase when we are trying
+to find out the amount of memory needed, as well as during the real compile
+phase. The value of lengthptr distinguishes the two phases.
+
+Arguments:
   optionsptr        pointer to the option bits
   codeptr           points to the pointer to the current code point
   ptrptr            points to the current pattern pointer
@@ -4444,38 +4444,38 @@ Arguments:
   cd                contains pointers to tables etc.
   lengthptr         NULL during the real compile phase
                     points to length accumulator during pre-compile phase
- 
+
 Returns:            TRUE on success
                     FALSE, with *errorcodeptr set non-zero on error
-*/ 
- 
-static BOOL 
+*/
+
+static BOOL
 compile_branch(int *optionsptr, pcre_uchar **codeptr,
   const pcre_uchar **ptrptr, int *errorcodeptr,
   pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
   pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
   branch_chain *bcptr, int cond_depth,
-  compile_data *cd, int *lengthptr) 
-{ 
-int repeat_type, op_type; 
-int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */ 
-int bravalue = 0; 
-int greedy_default, greedy_non_default; 
+  compile_data *cd, int *lengthptr)
+{
+int repeat_type, op_type;
+int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
+int bravalue = 0;
+int greedy_default, greedy_non_default;
 pcre_uint32 firstchar, reqchar;
 pcre_int32 firstcharflags, reqcharflags;
 pcre_uint32 zeroreqchar, zerofirstchar;
 pcre_int32 zeroreqcharflags, zerofirstcharflags;
 pcre_int32 req_caseopt, reqvary, tempreqvary;
 int options = *optionsptr;               /* May change dynamically */
-int after_manual_callout = 0; 
-int length_prevgroup = 0; 
+int after_manual_callout = 0;
+int length_prevgroup = 0;
 register pcre_uint32 c;
 int escape;
 register pcre_uchar *code = *codeptr;
 pcre_uchar *last_code = code;
 pcre_uchar *orig_code = code;
 pcre_uchar *tempcode;
-BOOL inescq = FALSE; 
+BOOL inescq = FALSE;
 BOOL groupsetfirstchar = FALSE;
 const pcre_uchar *ptr = *ptrptr;
 const pcre_uchar *tempptr;
@@ -4484,7 +4484,7 @@ pcre_uchar *previous = NULL;
 pcre_uchar *previous_callout = NULL;
 size_t item_hwm_offset = 0;
 pcre_uint8 classbits[32];
- 
+
 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
 must not do this for other options (e.g. PCRE_EXTENDED) because they may change
 dynamically as we process the pattern. */
@@ -4495,10 +4495,10 @@ BOOL utf = (options & PCRE_UTF8) != 0;
 #ifndef COMPILE_PCRE32
 pcre_uchar utf_chars[6];
 #endif
-#else 
+#else
 BOOL utf = FALSE;
-#endif 
- 
+#endif
+
 /* Helper variables for OP_XCLASS opcode (for characters > 255). We define
 class_uchardata always so that it can be passed to add_to_class() always,
 though it will not be used in non-UTF 8-bit cases. This avoids having to supply
@@ -4511,70 +4511,70 @@ pcre_uchar *class_uchardata_base;
 #endif
 
 #ifdef PCRE_DEBUG
-if (lengthptr != NULL) DPRINTF((">> start branch\n")); 
-#endif 
- 
-/* Set up the default and non-default settings for greediness */ 
- 
-greedy_default = ((options & PCRE_UNGREEDY) != 0); 
-greedy_non_default = greedy_default ^ 1; 
- 
-/* Initialize no first byte, no required byte. REQ_UNSET means "no char 
-matching encountered yet". It gets changed to REQ_NONE if we hit something that 
+if (lengthptr != NULL) DPRINTF((">> start branch\n"));
+#endif
+
+/* Set up the default and non-default settings for greediness */
+
+greedy_default = ((options & PCRE_UNGREEDY) != 0);
+greedy_non_default = greedy_default ^ 1;
+
+/* Initialize no first byte, no required byte. REQ_UNSET means "no char
+matching encountered yet". It gets changed to REQ_NONE if we hit something that
 matches a non-fixed char first char; reqchar just remains unset if we never
-find one. 
- 
-When we hit a repeat whose minimum is zero, we may have to adjust these values 
-to take the zero repeat into account. This is implemented by setting them to 
+find one.
+
+When we hit a repeat whose minimum is zero, we may have to adjust these values
+to take the zero repeat into account. This is implemented by setting them to
 zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
-item types that can be repeated set these backoff variables appropriately. */ 
- 
+item types that can be repeated set these backoff variables appropriately. */
+
 firstchar = reqchar = zerofirstchar = zeroreqchar = 0;
 firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET;
- 
+
 /* The variable req_caseopt contains either the REQ_CASELESS value
 or zero, according to the current setting of the caseless flag. The
 REQ_CASELESS leaves the lower 28 bit empty. It is added into the
 firstchar or reqchar variables to record the case status of the
 value. This is used only for ASCII characters. */
- 
+
 req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
- 
-/* Switch on next character until the end of the branch */ 
- 
-for (;; ptr++) 
-  { 
-  BOOL negate_class; 
-  BOOL should_flip_negation; 
-  BOOL possessive_quantifier; 
-  BOOL is_quantifier; 
-  BOOL is_recurse; 
-  BOOL reset_bracount; 
+
+/* Switch on next character until the end of the branch */
+
+for (;; ptr++)
+  {
+  BOOL negate_class;
+  BOOL should_flip_negation;
+  BOOL possessive_quantifier;
+  BOOL is_quantifier;
+  BOOL is_recurse;
+  BOOL reset_bracount;
   int class_has_8bitchar;
   int class_one_char;
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   BOOL xclass_has_prop;
 #endif
-  int newoptions; 
-  int recno; 
-  int refsign; 
-  int skipbytes; 
+  int newoptions;
+  int recno;
+  int refsign;
+  int skipbytes;
   pcre_uint32 subreqchar, subfirstchar;
   pcre_int32 subreqcharflags, subfirstcharflags;
-  int terminator; 
+  int terminator;
   unsigned int mclength;
   unsigned int tempbracount;
   pcre_uint32 ec;
   pcre_uchar mcbuffer[8];
- 
+
   /* Come here to restart the loop without advancing the pointer. */
- 
+
   REDO_LOOP:
 
   /* Get next character in the pattern */
 
-  c = *ptr; 
- 
+  c = *ptr;
+
   /* If we are at the end of a nested substitution, revert to the outer level
   string. Nesting only happens one level deep. */
 
@@ -4585,122 +4585,122 @@ for (;; ptr++)
     c = *ptr;
     }
 
-  /* If we are in the pre-compile phase, accumulate the length used for the 
-  previous cycle of this loop. */ 
- 
-  if (lengthptr != NULL) 
-    { 
+  /* If we are in the pre-compile phase, accumulate the length used for the
+  previous cycle of this loop. */
+
+  if (lengthptr != NULL)
+    {
 #ifdef PCRE_DEBUG
-    if (code > cd->hwm) cd->hwm = code;                 /* High water info */ 
-#endif 
+    if (code > cd->hwm) cd->hwm = code;                 /* High water info */
+#endif
     if (code > cd->start_workspace + cd->workspace_size -
         WORK_SIZE_SAFETY_MARGIN)                       /* Check for overrun */
-      { 
+      {
       *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)?
         ERR52 : ERR87;
-      goto FAILED; 
-      } 
- 
-    /* There is at least one situation where code goes backwards: this is the 
-    case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, 
-    the class is simply eliminated. However, it is created first, so we have to 
-    allow memory for it. Therefore, don't ever reduce the length at this point. 
-    */ 
- 
-    if (code < last_code) code = last_code; 
- 
-    /* Paranoid check for integer overflow */ 
- 
-    if (OFLOW_MAX - *lengthptr < code - last_code) 
-      { 
-      *errorcodeptr = ERR20; 
-      goto FAILED; 
-      } 
- 
+      goto FAILED;
+      }
+
+    /* There is at least one situation where code goes backwards: this is the
+    case of a zero quantifier after a class (e.g. [ab]{0}). At compile time,
+    the class is simply eliminated. However, it is created first, so we have to
+    allow memory for it. Therefore, don't ever reduce the length at this point.
+    */
+
+    if (code < last_code) code = last_code;
+
+    /* Paranoid check for integer overflow */
+
+    if (OFLOW_MAX - *lengthptr < code - last_code)
+      {
+      *errorcodeptr = ERR20;
+      goto FAILED;
+      }
+
     *lengthptr += (int)(code - last_code);
     DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,
       (int)(code - last_code), c, c));
- 
-    /* If "previous" is set and it is not at the start of the work space, move 
-    it back to there, in order to avoid filling up the work space. Otherwise, 
-    if "previous" is NULL, reset the current code pointer to the start. */ 
- 
-    if (previous != NULL) 
-      { 
-      if (previous > orig_code) 
-        { 
+
+    /* If "previous" is set and it is not at the start of the work space, move
+    it back to there, in order to avoid filling up the work space. Otherwise,
+    if "previous" is NULL, reset the current code pointer to the start. */
+
+    if (previous != NULL)
+      {
+      if (previous > orig_code)
+        {
         memmove(orig_code, previous, IN_UCHARS(code - previous));
-        code -= previous - orig_code; 
-        previous = orig_code; 
-        } 
-      } 
-    else code = orig_code; 
- 
-    /* Remember where this code item starts so we can pick up the length 
-    next time round. */ 
- 
-    last_code = code; 
-    } 
- 
-  /* In the real compile phase, just check the workspace used by the forward 
-  reference list. */ 
- 
+        code -= previous - orig_code;
+        previous = orig_code;
+        }
+      }
+    else code = orig_code;
+
+    /* Remember where this code item starts so we can pick up the length
+    next time round. */
+
+    last_code = code;
+    }
+
+  /* In the real compile phase, just check the workspace used by the forward
+  reference list. */
+
   else if (cd->hwm > cd->start_workspace + cd->workspace_size)
-    { 
-    *errorcodeptr = ERR52; 
-    goto FAILED; 
-    } 
- 
+    {
+    *errorcodeptr = ERR52;
+    goto FAILED;
+    }
+
   /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
   isolated \E is ignored. */
- 
+
   if (c != CHAR_NULL)
-    { 
+    {
     if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
-      { 
-      inescq = FALSE; 
-      ptr++; 
-      continue; 
-      } 
+      {
+      inescq = FALSE;
+      ptr++;
+      continue;
+      }
     else if (inescq)
-      { 
-      if (previous_callout != NULL) 
-        { 
-        if (lengthptr == NULL)  /* Don't attempt in pre-compile phase */ 
-          complete_callout(previous_callout, ptr, cd); 
-        previous_callout = NULL; 
-        } 
-      if ((options & PCRE_AUTO_CALLOUT) != 0) 
-        { 
-        previous_callout = code; 
-        code = auto_callout(code, ptr, cd); 
-        } 
-      goto NORMAL_CHAR; 
-      } 
- 
+      {
+      if (previous_callout != NULL)
+        {
+        if (lengthptr == NULL)  /* Don't attempt in pre-compile phase */
+          complete_callout(previous_callout, ptr, cd);
+        previous_callout = NULL;
+        }
+      if ((options & PCRE_AUTO_CALLOUT) != 0)
+        {
+        previous_callout = code;
+        code = auto_callout(code, ptr, cd);
+        }
+      goto NORMAL_CHAR;
+      }
+
     /* Check for the start of a \Q...\E sequence. We must do this here rather
     than later in case it is immediately followed by \E, which turns it into a
     "do nothing" sequence. */
- 
+
     if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
       {
       inescq = TRUE;
       ptr++;
       continue;
       }
-    } 
- 
+    }
+
   /* In extended mode, skip white space and comments. */
- 
-  if ((options & PCRE_EXTENDED) != 0) 
-    { 
+
+  if ((options & PCRE_EXTENDED) != 0)
+    {
     const pcre_uchar *wscptr = ptr;
     while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
     if (c == CHAR_NUMBER_SIGN)
-      { 
+      {
       ptr++;
       while (*ptr != CHAR_NULL)
-        { 
+        {
         if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
           {                          /* IS_NEWLINE sets cd->nllen. */
           ptr += cd->nllen;
@@ -4710,9 +4710,9 @@ for (;; ptr++)
 #ifdef SUPPORT_UTF
         if (utf) FORWARDCHAR(ptr);
 #endif
-        } 
+        }
       }
- 
+
     /* If we skipped any characters, restart the loop. Otherwise, we didn't see
     a comment. */
 
@@ -4732,12 +4732,12 @@ for (;; ptr++)
       {
       *errorcodeptr = ERR18;
       goto FAILED;
-      } 
+      }
     continue;
-    } 
- 
+    }
+
   /* See if the next thing is a quantifier. */
- 
+
   is_quantifier =
     c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
     (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
@@ -4747,7 +4747,7 @@ for (;; ptr++)
 
   if (!is_quantifier && previous_callout != NULL && nestptr == NULL &&
        after_manual_callout-- <= 0)
-    { 
+    {
     if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
       complete_callout(previous_callout, ptr, cd);
     previous_callout = NULL;
@@ -4758,15 +4758,15 @@ for (;; ptr++)
 
   if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
     {
-    previous_callout = code; 
-    code = auto_callout(code, ptr, cd); 
-    } 
- 
+    previous_callout = code;
+    code = auto_callout(code, ptr, cd);
+    }
+
   /* Process the next pattern item. */
 
-  switch(c) 
-    { 
-    /* ===================================================================*/ 
+  switch(c)
+    {
+    /* ===================================================================*/
     case CHAR_NULL:                /* The branch terminates at string end */
     case CHAR_VERTICAL_LINE:       /* or | or ) */
     case CHAR_RIGHT_PARENTHESIS:
@@ -4774,68 +4774,68 @@ for (;; ptr++)
     *firstcharflagsptr = firstcharflags;
     *reqcharptr = reqchar;
     *reqcharflagsptr = reqcharflags;
-    *codeptr = code; 
-    *ptrptr = ptr; 
-    if (lengthptr != NULL) 
-      { 
-      if (OFLOW_MAX - *lengthptr < code - last_code) 
-        { 
-        *errorcodeptr = ERR20; 
-        goto FAILED; 
-        } 
+    *codeptr = code;
+    *ptrptr = ptr;
+    if (lengthptr != NULL)
+      {
+      if (OFLOW_MAX - *lengthptr < code - last_code)
+        {
+        *errorcodeptr = ERR20;
+        goto FAILED;
+        }
       *lengthptr += (int)(code - last_code);   /* To include callout length */
-      DPRINTF((">> end branch\n")); 
-      } 
-    return TRUE; 
- 
- 
-    /* ===================================================================*/ 
-    /* Handle single-character metacharacters. In multiline mode, ^ disables 
-    the setting of any following char as a first character. */ 
- 
+      DPRINTF((">> end branch\n"));
+      }
+    return TRUE;
+
+
+    /* ===================================================================*/
+    /* Handle single-character metacharacters. In multiline mode, ^ disables
+    the setting of any following char as a first character. */
+
     case CHAR_CIRCUMFLEX_ACCENT:
     previous = NULL;
-    if ((options & PCRE_MULTILINE) != 0) 
-      { 
+    if ((options & PCRE_MULTILINE) != 0)
+      {
       if (firstcharflags == REQ_UNSET)
         zerofirstcharflags = firstcharflags = REQ_NONE;
       *code++ = OP_CIRCM;
-      } 
+      }
     else *code++ = OP_CIRC;
-    break; 
- 
+    break;
+
     case CHAR_DOLLAR_SIGN:
-    previous = NULL; 
+    previous = NULL;
     *code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL;
-    break; 
- 
-    /* There can never be a first char if '.' is first, whatever happens about 
+    break;
+
+    /* There can never be a first char if '.' is first, whatever happens about
     repeats. The value of reqchar doesn't change either. */
- 
+
     case CHAR_DOT:
     if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
     zerofirstchar = firstchar;
     zerofirstcharflags = firstcharflags;
     zeroreqchar = reqchar;
     zeroreqcharflags = reqcharflags;
-    previous = code; 
+    previous = code;
     item_hwm_offset = cd->hwm - cd->start_workspace;
     *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
-    break; 
- 
- 
-    /* ===================================================================*/ 
-    /* Character classes. If the included characters are all < 256, we build a 
-    32-byte bitmap of the permitted characters, except in the special case 
-    where there is only one such character. For negated classes, we build the 
-    map as usual, then invert it at the end. However, we use a different opcode 
-    so that data characters > 255 can be handled correctly. 
- 
-    If the class contains characters outside the 0-255 range, a different 
-    opcode is compiled. It may optionally have a bit map for characters < 256, 
-    but those above are are explicitly listed afterwards. A flag byte tells 
-    whether the bitmap is present, and whether this is a negated class or not. 
- 
+    break;
+
+
+    /* ===================================================================*/
+    /* Character classes. If the included characters are all < 256, we build a
+    32-byte bitmap of the permitted characters, except in the special case
+    where there is only one such character. For negated classes, we build the
+    map as usual, then invert it at the end. However, we use a different opcode
+    so that data characters > 255 can be handled correctly.
+
+    If the class contains characters outside the 0-255 range, a different
+    opcode is compiled. It may optionally have a bit map for characters < 256,
+    but those above are are explicitly listed afterwards. A flag byte tells
+    whether the bitmap is present, and whether this is a negated class or not.
+
     In JavaScript compatibility mode, an isolated ']' causes an error. In
     default (Perl) mode, it is treated as a data character. */
 
@@ -4870,42 +4870,42 @@ for (;; ptr++)
 
     /* Handle a real character class. */
 
-    previous = code; 
+    previous = code;
     item_hwm_offset = cd->hwm - cd->start_workspace;
- 
-    /* PCRE supports POSIX class stuff inside a class. Perl gives an error if 
-    they are encountered at the top level, so we'll do that too. */ 
- 
+
+    /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
+    they are encountered at the top level, so we'll do that too. */
+
     if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
          ptr[1] == CHAR_EQUALS_SIGN) &&
-        check_posix_syntax(ptr, &tempptr)) 
-      { 
+        check_posix_syntax(ptr, &tempptr))
+      {
       *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
-      goto FAILED; 
-      } 
- 
-    /* If the first character is '^', set the negation flag and skip it. Also, 
-    if the first few characters (either before or after ^) are \Q\E or \E we 
-    skip them too. This makes for compatibility with Perl. */ 
- 
-    negate_class = FALSE; 
-    for (;;) 
-      { 
-      c = *(++ptr); 
+      goto FAILED;
+      }
+
+    /* If the first character is '^', set the negation flag and skip it. Also,
+    if the first few characters (either before or after ^) are \Q\E or \E we
+    skip them too. This makes for compatibility with Perl. */
+
+    negate_class = FALSE;
+    for (;;)
+      {
+      c = *(++ptr);
       if (c == CHAR_BACKSLASH)
-        { 
+        {
         if (ptr[1] == CHAR_E)
           ptr++;
         else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
           ptr += 3;
         else
           break;
-        } 
+        }
       else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
-        negate_class = TRUE; 
-      else break; 
-      } 
- 
+        negate_class = TRUE;
+      else break;
+      }
+
     /* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
     an initial ']' is taken as a data character -- the code below handles
     that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
@@ -4921,21 +4921,21 @@ for (;; ptr++)
       break;
       }
 
-    /* If a class contains a negative special such as \S, we need to flip the 
-    negation flag at the end, so that support for characters > 255 works 
-    correctly (they are all included in the class). */ 
- 
-    should_flip_negation = FALSE; 
- 
+    /* If a class contains a negative special such as \S, we need to flip the
+    negation flag at the end, so that support for characters > 255 works
+    correctly (they are all included in the class). */
+
+    should_flip_negation = FALSE;
+
     /* Extended class (xclass) will be used when characters > 255
     might match. */
- 
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     xclass = FALSE;
     class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
     class_uchardata_base = class_uchardata;   /* Save the start */
 #endif
- 
+
     /* For optimization purposes, we track some properties of the class:
     class_has_8bitchar will be non-zero if the class contains at least one <
     256 character; class_one_char will be 1 if the class contains just one
@@ -4948,28 +4948,28 @@ for (;; ptr++)
     xclass_has_prop = FALSE;
 #endif
 
-    /* Initialize the 32-char bit map to all zeros. We build the map in a 
+    /* Initialize the 32-char bit map to all zeros. We build the map in a
     temporary bit of memory, in case the class contains fewer than two
     8-bit characters because in that case the compiled code doesn't use the bit
     map. */
- 
+
     memset(classbits, 0, 32 * sizeof(pcre_uint8));
- 
-    /* Process characters until ] is reached. By writing this as a "do" it 
-    means that an initial ] is taken as a data character. At the start of the 
-    loop, c contains the first byte of the character. */ 
- 
+
+    /* Process characters until ] is reached. By writing this as a "do" it
+    means that an initial ] is taken as a data character. At the start of the
+    loop, c contains the first byte of the character. */
+
     if (c != CHAR_NULL) do
-      { 
+      {
       const pcre_uchar *oldptr;
- 
+
 #ifdef SUPPORT_UTF
       if (utf && HAS_EXTRALEN(c))
-        {                           /* Braces are required because the */ 
-        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */ 
-        } 
-#endif 
- 
+        {                           /* Braces are required because the */
+        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
+        }
+#endif
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       /* In the pre-compile phase, accumulate the length of any extra
       data and reset the pointer. This is so that very large classes that
@@ -4986,67 +4986,67 @@ for (;; ptr++)
         }
 #endif
 
-      /* Inside \Q...\E everything is literal except \E */ 
- 
-      if (inescq) 
-        { 
+      /* Inside \Q...\E everything is literal except \E */
+
+      if (inescq)
+        {
         if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
-          { 
-          inescq = FALSE;                   /* Reset literal state */ 
-          ptr++;                            /* Skip the 'E' */ 
-          continue;                         /* Carry on with next */ 
-          } 
-        goto CHECK_RANGE;                   /* Could be range if \E follows */ 
-        } 
- 
-      /* Handle POSIX class names. Perl allows a negation extension of the 
-      form [:^name:]. A square bracket that doesn't match the syntax is 
-      treated as a literal. We also recognize the POSIX constructions 
-      [.ch.] and [=ch=] ("collating elements") and fault them, as Perl 
-      5.6 and 5.8 do. */ 
- 
+          {
+          inescq = FALSE;                   /* Reset literal state */
+          ptr++;                            /* Skip the 'E' */
+          continue;                         /* Carry on with next */
+          }
+        goto CHECK_RANGE;                   /* Could be range if \E follows */
+        }
+
+      /* Handle POSIX class names. Perl allows a negation extension of the
+      form [:^name:]. A square bracket that doesn't match the syntax is
+      treated as a literal. We also recognize the POSIX constructions
+      [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
+      5.6 and 5.8 do. */
+
       if (c == CHAR_LEFT_SQUARE_BRACKET &&
           (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
            ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
-        { 
-        BOOL local_negate = FALSE; 
-        int posix_class, taboffset, tabopt; 
+        {
+        BOOL local_negate = FALSE;
+        int posix_class, taboffset, tabopt;
         register const pcre_uint8 *cbits = cd->cbits;
         pcre_uint8 pbits[32];
- 
+
         if (ptr[1] != CHAR_COLON)
-          { 
-          *errorcodeptr = ERR31; 
-          goto FAILED; 
-          } 
- 
-        ptr += 2; 
+          {
+          *errorcodeptr = ERR31;
+          goto FAILED;
+          }
+
+        ptr += 2;
         if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
-          { 
-          local_negate = TRUE; 
-          should_flip_negation = TRUE;  /* Note negative special */ 
-          ptr++; 
-          } 
- 
+          {
+          local_negate = TRUE;
+          should_flip_negation = TRUE;  /* Note negative special */
+          ptr++;
+          }
+
         posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
-        if (posix_class < 0) 
-          { 
-          *errorcodeptr = ERR30; 
-          goto FAILED; 
-          } 
- 
-        /* If matching is caseless, upper and lower are converted to 
-        alpha. This relies on the fact that the class table starts with 
-        alpha, lower, upper as the first 3 entries. */ 
- 
-        if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) 
-          posix_class = 0; 
- 
+        if (posix_class < 0)
+          {
+          *errorcodeptr = ERR30;
+          goto FAILED;
+          }
+
+        /* If matching is caseless, upper and lower are converted to
+        alpha. This relies on the fact that the class table starts with
+        alpha, lower, upper as the first 3 entries. */
+
+        if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
+          posix_class = 0;
+
         /* When PCRE_UCP is set, some of the POSIX classes are converted to
         different escape sequences that use Unicode properties \p or \P. Others
         that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
         directly. */
- 
+
 #ifdef SUPPORT_UCP
         if ((options & PCRE_UCP) != 0)
           {
@@ -5115,91 +5115,91 @@ for (;; ptr++)
         may be in the main map already. At the end we or the result into the
         bit map that is being built. */
 
-        posix_class *= 3; 
- 
-        /* Copy in the first table (always present) */ 
- 
-        memcpy(pbits, cbits + posix_class_maps[posix_class], 
+        posix_class *= 3;
+
+        /* Copy in the first table (always present) */
+
+        memcpy(pbits, cbits + posix_class_maps[posix_class],
           32 * sizeof(pcre_uint8));
- 
-        /* If there is a second table, add or remove it as required. */ 
- 
-        taboffset = posix_class_maps[posix_class + 1]; 
-        tabopt = posix_class_maps[posix_class + 2]; 
- 
-        if (taboffset >= 0) 
-          { 
-          if (tabopt >= 0) 
-            for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset]; 
-          else 
-            for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset]; 
-          } 
- 
+
+        /* If there is a second table, add or remove it as required. */
+
+        taboffset = posix_class_maps[posix_class + 1];
+        tabopt = posix_class_maps[posix_class + 2];
+
+        if (taboffset >= 0)
+          {
+          if (tabopt >= 0)
+            for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
+          else
+            for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
+          }
+
         /* Now see if we need to remove any special characters. An option
-        value of 1 removes vertical space and 2 removes underscore. */ 
- 
-        if (tabopt < 0) tabopt = -tabopt; 
-        if (tabopt == 1) pbits[1] &= ~0x3c; 
-          else if (tabopt == 2) pbits[11] &= 0x7f; 
- 
-        /* Add the POSIX table or its complement into the main table that is 
-        being built and we are done. */ 
- 
-        if (local_negate) 
-          for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; 
-        else 
-          for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; 
- 
-        ptr = tempptr + 1; 
+        value of 1 removes vertical space and 2 removes underscore. */
+
+        if (tabopt < 0) tabopt = -tabopt;
+        if (tabopt == 1) pbits[1] &= ~0x3c;
+          else if (tabopt == 2) pbits[11] &= 0x7f;
+
+        /* Add the POSIX table or its complement into the main table that is
+        being built and we are done. */
+
+        if (local_negate)
+          for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
+        else
+          for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
+
+        ptr = tempptr + 1;
         /* Every class contains at least one < 256 character. */
         class_has_8bitchar = 1;
         /* Every class contains at least two characters. */
         class_one_char = 2;
-        continue;    /* End of POSIX syntax handling */ 
-        } 
- 
-      /* Backslash may introduce a single character, or it may introduce one 
-      of the specials, which just set a flag. The sequence \b is a special 
+        continue;    /* End of POSIX syntax handling */
+        }
+
+      /* Backslash may introduce a single character, or it may introduce one
+      of the specials, which just set a flag. The sequence \b is a special
       case. Inside a class (and only there) it is treated as backspace. We
       assume that other escapes have more than one character in them, so
       speculatively set both class_has_8bitchar and class_one_char bigger
       than one. Unrecognized escapes fall through and are either treated
       as literal characters (by default), or are faulted if
       PCRE_EXTRA is set. */
- 
+
       if (c == CHAR_BACKSLASH)
-        { 
+        {
         escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
           TRUE);
-        if (*errorcodeptr != 0) goto FAILED; 
+        if (*errorcodeptr != 0) goto FAILED;
         if (escape == 0) c = ec;
         else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
         else if (escape == ESC_N)          /* \N is not supported in a class */
-          { 
+          {
           *errorcodeptr = ERR71;
           goto FAILED;
           }
         else if (escape == ESC_Q)            /* Handle start of quoted string */
           {
           if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
-            { 
-            ptr += 2; /* avoid empty string */ 
-            } 
-          else inescq = TRUE; 
-          continue; 
-          } 
+            {
+            ptr += 2; /* avoid empty string */
+            }
+          else inescq = TRUE;
+          continue;
+          }
         else if (escape == ESC_E) continue;  /* Ignore orphan \E */
- 
+
         else
-          { 
+          {
           register const pcre_uint8 *cbits = cd->cbits;
           /* Every class contains at least two < 256 characters. */
           class_has_8bitchar++;
           /* Every class contains at least two characters. */
           class_one_char += 2;
- 
+
           switch (escape)
-            { 
+            {
 #ifdef SUPPORT_UCP
             case ESC_du:     /* These are the values given for \d etc */
             case ESC_DU:     /* when PCRE_UCP is set. We replace the */
@@ -5212,24 +5212,24 @@ for (;; ptr++)
             class_has_8bitchar--;                /* Undo! */
             continue;
 #endif
-            case ESC_d: 
-            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; 
-            continue; 
- 
-            case ESC_D: 
-            should_flip_negation = TRUE; 
-            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; 
-            continue; 
- 
-            case ESC_w: 
-            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; 
-            continue; 
- 
-            case ESC_W: 
-            should_flip_negation = TRUE; 
-            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; 
-            continue; 
- 
+            case ESC_d:
+            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
+            continue;
+
+            case ESC_D:
+            should_flip_negation = TRUE;
+            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
+            continue;
+
+            case ESC_w:
+            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
+            continue;
+
+            case ESC_W:
+            should_flip_negation = TRUE;
+            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
+            continue;
+
             /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
             5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
             previously set by something earlier in the character class.
@@ -5237,41 +5237,41 @@ for (;; ptr++)
             we could just adjust the appropriate bit. From PCRE 8.34 we no
             longer treat \s and \S specially. */
 
-            case ESC_s: 
-            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; 
-            continue; 
- 
-            case ESC_S: 
-            should_flip_negation = TRUE; 
-            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; 
-            continue; 
- 
+            case ESC_s:
+            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
+            continue;
+
+            case ESC_S:
+            should_flip_negation = TRUE;
+            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
+            continue;
+
             /* The rest apply in both UCP and non-UCP cases. */
- 
+
             case ESC_h:
             (void)add_list_to_class(classbits, &class_uchardata, options, cd,
               PRIV(hspace_list), NOTACHAR);
             continue;
- 
+
             case ESC_H:
             (void)add_not_list_to_class(classbits, &class_uchardata, options,
               cd, PRIV(hspace_list));
             continue;
- 
+
             case ESC_v:
             (void)add_list_to_class(classbits, &class_uchardata, options, cd,
               PRIV(vspace_list), NOTACHAR);
-            continue; 
- 
+            continue;
+
             case ESC_V:
             (void)add_not_list_to_class(classbits, &class_uchardata, options,
               cd, PRIV(vspace_list));
-            continue; 
- 
+            continue;
+
             case ESC_p:
             case ESC_P:
 #ifdef SUPPORT_UCP
-              { 
+              {
               BOOL negated;
               unsigned int ptype = 0, pdata = 0;
               if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
@@ -5283,118 +5283,118 @@ for (;; ptr++)
               xclass_has_prop = TRUE;
               class_has_8bitchar--;                /* Undo! */
               continue;
-              } 
+              }
 #else
             *errorcodeptr = ERR45;
             goto FAILED;
-#endif 
+#endif
             /* Unrecognized escapes are faulted if PCRE is running in its
             strict mode. By default, for compatibility with Perl, they are
             treated as literals. */
- 
+
             default:
             if ((options & PCRE_EXTRA) != 0)
-              { 
+              {
               *errorcodeptr = ERR7;
               goto FAILED;
-              } 
+              }
             class_has_8bitchar--;    /* Undo the speculative increase. */
             class_one_char -= 2;     /* Undo the speculative increase. */
             c = *ptr;                /* Get the final character and fall through */
             break;
-            } 
+            }
           }
- 
+
         /* Fall through if the escape just defined a single character (c >= 0).
         This may be greater than 256. */
- 
+
         escape = 0;
- 
-        }   /* End of backslash handling */ 
- 
+
+        }   /* End of backslash handling */
+
       /* A character may be followed by '-' to form a range. However, Perl does
       not permit ']' to be the end of the range. A '-' character at the end is
       treated as a literal. Perl ignores orphaned \E sequences entirely. The
       code for handling \Q and \E is messy. */
- 
-      CHECK_RANGE: 
+
+      CHECK_RANGE:
       while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
-        { 
-        inescq = FALSE; 
-        ptr += 2; 
-        } 
-      oldptr = ptr; 
- 
+        {
+        inescq = FALSE;
+        ptr += 2;
+        }
+      oldptr = ptr;
+
       /* Remember if \r or \n were explicitly used */
- 
+
       if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
- 
-      /* Check for range */ 
- 
+
+      /* Check for range */
+
       if (!inescq && ptr[1] == CHAR_MINUS)
-        { 
+        {
         pcre_uint32 d;
-        ptr += 2; 
+        ptr += 2;
         while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
- 
-        /* If we hit \Q (not followed by \E) at this point, go into escaped 
-        mode. */ 
- 
+
+        /* If we hit \Q (not followed by \E) at this point, go into escaped
+        mode. */
+
         while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
-          { 
-          ptr += 2; 
+          {
+          ptr += 2;
           if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
             { ptr += 2; continue; }
-          inescq = TRUE; 
-          break; 
-          } 
- 
+          inescq = TRUE;
+          break;
+          }
+
         /* Minus (hyphen) at the end of a class is treated as a literal, so put
         back the pointer and jump to handle the character that preceded it. */
 
         if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
-          { 
-          ptr = oldptr; 
+          {
+          ptr = oldptr;
           goto CLASS_SINGLE_CHARACTER;
-          } 
- 
+          }
+
         /* Otherwise, we have a potential range; pick up the next character */
 
 #ifdef SUPPORT_UTF
         if (utf)
-          {                           /* Braces are required because the */ 
-          GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */ 
-          } 
-        else 
-#endif 
-        d = *ptr;  /* Not UTF-8 mode */ 
- 
+          {                           /* Braces are required because the */
+          GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
+          }
+        else
+#endif
+        d = *ptr;  /* Not UTF-8 mode */
+
         /* The second part of a range can be a single-character escape
         sequence, but not any of the other escapes. Perl treats a hyphen as a
         literal in such circumstances. However, in Perl's warning mode, a
         warning is given, so PCRE now faults it as it is almost certainly a
         mistake on the user's part. */
- 
+
         if (!inescq)
-          { 
+          {
           if (d == CHAR_BACKSLASH)
             {
             int descape;
             descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
             if (*errorcodeptr != 0) goto FAILED;
- 
+
             /* 0 means a character was put into d; \b is backspace; any other
             special causes an error. */
- 
+
             if (descape != 0)
-              { 
+              {
               if (descape == ESC_b) d = CHAR_BS; else
                 {
                 *errorcodeptr = ERR83;
                 goto FAILED;
                 }
-              } 
-            } 
+              }
+            }
 
           /* A hyphen followed by a POSIX class is treated in the same way. */
 
@@ -5406,43 +5406,43 @@ for (;; ptr++)
             *errorcodeptr = ERR83;
             goto FAILED;
             }
-          } 
- 
-        /* Check that the two values are in the correct order. Optimize 
+          }
+
+        /* Check that the two values are in the correct order. Optimize
         one-character ranges. */
- 
-        if (d < c) 
-          { 
-          *errorcodeptr = ERR8; 
-          goto FAILED; 
-          } 
+
+        if (d < c)
+          {
+          *errorcodeptr = ERR8;
+          goto FAILED;
+          }
         if (d == c) goto CLASS_SINGLE_CHARACTER;  /* A few lines below */
- 
+
         /* We have found a character range, so single character optimizations
         cannot be done anymore. Any value greater than 1 indicates that there
         is more than one character. */
- 
+
         class_one_char = 2;
- 
+
         /* Remember an explicit \r or \n, and add the range to the class. */
- 
+
         if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
- 
+
         class_has_8bitchar +=
           add_to_class(classbits, &class_uchardata, options, cd, c, d);
- 
+
         continue;   /* Go get the next char in the class */
         }
- 
+
       /* Handle a single character - we can get here for a normal non-escape
       char, or after \ that introduces a single character or for an apparent
       range that isn't. Only the value 1 matters for class_one_char, so don't
       increase it if it is already 2 or more ... just in case there's a class
       with a zillion characters in it. */
- 
+
       CLASS_SINGLE_CHARACTER:
       if (class_one_char < 2) class_one_char++;
- 
+
       /* If xclass_has_prop is false and class_one_char is 1, we have the first
       single character in the class, and there have been no prior ranges, or
       XCLASS items generated by escapes. If this is the final character in the
@@ -5451,7 +5451,7 @@ for (;; ptr++)
       can cause firstchar to be set. Otherwise, there can be no first char if
       this item is first, whatever repeat count may follow. In the case of
       reqchar, save the previous value for reinstating. */
- 
+
       if (!inescq &&
 #ifdef SUPPORT_UCP
           !xclass_has_prop &&
@@ -5461,7 +5461,7 @@ for (;; ptr++)
         ptr++;
         zeroreqchar = reqchar;
         zeroreqcharflags = reqcharflags;
- 
+
         if (negate_class)
           {
 #ifdef SUPPORT_UCP
@@ -5470,12 +5470,12 @@ for (;; ptr++)
           if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
           zerofirstchar = firstchar;
           zerofirstcharflags = firstcharflags;
- 
+
           /* For caseless UTF-8 mode when UCP support is available, check
           whether this character has more than one other case. If so, generate
           a special OP_NOTPROP item instead of OP_NOTI. */
- 
-#ifdef SUPPORT_UCP 
+
+#ifdef SUPPORT_UCP
           if (utf && (options & PCRE_CASELESS) != 0 &&
               (d = UCD_CASESET(c)) != 0)
             {
@@ -5486,8 +5486,8 @@ for (;; ptr++)
           else
 #endif
           /* Char has only one other case, or UCP not available */
- 
-            { 
+
+            {
             *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
             if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
@@ -5495,52 +5495,52 @@ for (;; ptr++)
             else
 #endif
               *code++ = c;
-            } 
- 
+            }
+
           /* We are finished with this character class */
- 
+
           goto END_CLASS;
           }
- 
+
         /* For a single, positive character, get the value into mcbuffer, and
         then we can handle this with the normal one-character code. */
- 
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
         if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
           mclength = PRIV(ord2utf)(c, mcbuffer);
         else
 #endif
-          { 
+          {
           mcbuffer[0] = c;
           mclength = 1;
-          } 
+          }
         goto ONE_CHAR;
         }       /* End of 1-char optimization */
- 
+
       /* There is more than one character in the class, or an XCLASS item
       has been generated. Add this character to the class. */
- 
+
       class_has_8bitchar +=
         add_to_class(classbits, &class_uchardata, options, cd, c, c);
-      } 
- 
+      }
+
     /* Loop until ']' reached. This "while" is the end of the "do" far above.
     If we are at the end of an internal nested string, revert to the outer
     string. */
- 
+
     while (((c = *(++ptr)) != CHAR_NULL ||
            (nestptr != NULL &&
              (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) &&
            (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
- 
+
     /* Check for missing terminating ']' */
 
     if (c == CHAR_NULL)
-      { 
-      *errorcodeptr = ERR6; 
-      goto FAILED; 
-      } 
- 
+      {
+      *errorcodeptr = ERR6;
+      goto FAILED;
+      }
+
     /* We will need an XCLASS if data has been placed in class_uchardata. In
     the second phase this is a sufficient test. However, in the pre-compile
     phase, class_uchardata gets emptied to prevent workspace overflow, so it
@@ -5548,21 +5548,21 @@ for (;; ptr++)
     anything at this point. For this reason, xclass gets set TRUE above when
     uchar_classdata is emptied, and that's why this code is the way it is here
     instead of just doing a test on class_uchardata below. */
- 
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     if (class_uchardata > class_uchardata_base) xclass = TRUE;
-#endif 
- 
+#endif
+
     /* If this is the first thing in the branch, there can be no first char
     setting, whatever the repeat count. Any reqchar setting must remain
     unchanged after any kind of repeat. */
- 
+
     if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
     zerofirstchar = firstchar;
     zerofirstcharflags = firstcharflags;
     zeroreqchar = reqchar;
     zeroreqcharflags = reqcharflags;
- 
+
     /* If there are characters with values > 255, we have to compile an
     extended class, with its own opcode, unless there was a negated special
     such as \S in the class, and PCRE_UCP is not set, because in that case all
@@ -5570,25 +5570,25 @@ for (;; ptr++)
     well can be ignored. If (when there are explicit characters > 255 that must
     be listed) there are no characters < 256, we can omit the bitmap in the
     actual compiled code. */
- 
+
 #ifdef SUPPORT_UTF
     if (xclass && (xclass_has_prop || !should_flip_negation ||
         (options & PCRE_UCP) != 0))
 #elif !defined COMPILE_PCRE8
     if (xclass && (xclass_has_prop || !should_flip_negation))
-#endif 
+#endif
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-      { 
+      {
       /* For non-UCP wide characters, in a non-negative class containing \S or
       similar (should_flip_negation is set), all characters greater than 255
       must be in the class. */
- 
+
       if (
 #if defined COMPILE_PCRE8
            utf &&
 #endif
            should_flip_negation && !negate_class && (options & PCRE_UCP) == 0)
-        { 
+        {
         *class_uchardata++ = XCL_RANGE;
         if (utf)   /* Will always be utf in the 8-bit library */
           {
@@ -5603,114 +5603,114 @@ for (;; ptr++)
 #elif defined COMPILE_PCRE32
           *class_uchardata++ = 0x100;
           *class_uchardata++ = 0xffffffffu;
-#endif 
+#endif
           }
-        } 
- 
+        }
+
       *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
-      *code++ = OP_XCLASS; 
-      code += LINK_SIZE; 
+      *code++ = OP_XCLASS;
+      code += LINK_SIZE;
       *code = negate_class? XCL_NOT:0;
       if (xclass_has_prop) *code |= XCL_HASPROP;
- 
-      /* If the map is required, move up the extra data to make room for it; 
-      otherwise just move the code pointer to the end of the extra data. */ 
- 
+
+      /* If the map is required, move up the extra data to make room for it;
+      otherwise just move the code pointer to the end of the extra data. */
+
       if (class_has_8bitchar > 0)
-        { 
-        *code++ |= XCL_MAP; 
+        {
+        *code++ |= XCL_MAP;
         memmove(code + (32 / sizeof(pcre_uchar)), code,
           IN_UCHARS(class_uchardata - code));
         if (negate_class && !xclass_has_prop)
           for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
-        memcpy(code, classbits, 32); 
+        memcpy(code, classbits, 32);
         code = class_uchardata + (32 / sizeof(pcre_uchar));
-        } 
+        }
       else code = class_uchardata;
- 
-      /* Now fill in the complete length of the item */ 
- 
+
+      /* Now fill in the complete length of the item */
+
       PUT(previous, 1, (int)(code - previous));
-      break;   /* End of class handling */ 
-      } 
+      break;   /* End of class handling */
+      }
 
     /* Even though any XCLASS list is now discarded, we must allow for
     its memory. */
 
     if (lengthptr != NULL)
       *lengthptr += (int)(class_uchardata - class_uchardata_base);
-#endif 
- 
+#endif
+
     /* If there are no characters > 255, or they are all to be included or
     excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
     whole class was negated and whether there were negative specials such as \S
     (non-UCP) in the class. Then copy the 32-byte map into the code vector,
     negating it if necessary. */
- 
-    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; 
+
+    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
     if (lengthptr == NULL)    /* Save time in the pre-compile phase */
-      { 
+      {
       if (negate_class)
         for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
-      memcpy(code, classbits, 32); 
-      } 
+      memcpy(code, classbits, 32);
+      }
     code += 32 / sizeof(pcre_uchar);
 
     END_CLASS:
-    break; 
- 
- 
-    /* ===================================================================*/ 
-    /* Various kinds of repeat; '{' is not necessarily a quantifier, but this 
-    has been tested above. */ 
- 
+    break;
+
+
+    /* ===================================================================*/
+    /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
+    has been tested above. */
+
     case CHAR_LEFT_CURLY_BRACKET:
-    if (!is_quantifier) goto NORMAL_CHAR; 
-    ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); 
-    if (*errorcodeptr != 0) goto FAILED; 
-    goto REPEAT; 
- 
+    if (!is_quantifier) goto NORMAL_CHAR;
+    ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
+    if (*errorcodeptr != 0) goto FAILED;
+    goto REPEAT;
+
     case CHAR_ASTERISK:
-    repeat_min = 0; 
-    repeat_max = -1; 
-    goto REPEAT; 
- 
+    repeat_min = 0;
+    repeat_max = -1;
+    goto REPEAT;
+
     case CHAR_PLUS:
-    repeat_min = 1; 
-    repeat_max = -1; 
-    goto REPEAT; 
- 
+    repeat_min = 1;
+    repeat_max = -1;
+    goto REPEAT;
+
     case CHAR_QUESTION_MARK:
-    repeat_min = 0; 
-    repeat_max = 1; 
- 
-    REPEAT: 
-    if (previous == NULL) 
-      { 
-      *errorcodeptr = ERR9; 
-      goto FAILED; 
-      } 
- 
-    if (repeat_min == 0) 
-      { 
+    repeat_min = 0;
+    repeat_max = 1;
+
+    REPEAT:
+    if (previous == NULL)
+      {
+      *errorcodeptr = ERR9;
+      goto FAILED;
+      }
+
+    if (repeat_min == 0)
+      {
       firstchar = zerofirstchar;    /* Adjust for zero repeat */
       firstcharflags = zerofirstcharflags;
       reqchar = zeroreqchar;        /* Ditto */
       reqcharflags = zeroreqcharflags;
-      } 
- 
-    /* Remember whether this is a variable length repeat */ 
- 
-    reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; 
- 
-    op_type = 0;                    /* Default single-char op codes */ 
-    possessive_quantifier = FALSE;  /* Default not possessive quantifier */ 
- 
+      }
+
+    /* Remember whether this is a variable length repeat */
+
+    reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
+
+    op_type = 0;                    /* Default single-char op codes */
+    possessive_quantifier = FALSE;  /* Default not possessive quantifier */
+
     /* Save start of previous item, in case we have to move it up in order to
     insert something before it. */
- 
-    tempcode = previous; 
- 
+
+    tempcode = previous;
+
     /* Before checking for a possessive quantifier, we must skip over
     whitespace and comments in extended mode because Perl allows white space at
     this point. */
@@ -5754,33 +5754,33 @@ for (;; ptr++)
         }
       }
 
-    /* If the next character is '+', we have a possessive quantifier. This 
-    implies greediness, whatever the setting of the PCRE_UNGREEDY option. 
-    If the next character is '?' this is a minimizing repeat, by default, 
-    but if PCRE_UNGREEDY is set, it works the other way round. We change the 
-    repeat type to the non-default. */ 
- 
+    /* If the next character is '+', we have a possessive quantifier. This
+    implies greediness, whatever the setting of the PCRE_UNGREEDY option.
+    If the next character is '?' this is a minimizing repeat, by default,
+    but if PCRE_UNGREEDY is set, it works the other way round. We change the
+    repeat type to the non-default. */
+
     if (ptr[1] == CHAR_PLUS)
-      { 
-      repeat_type = 0;                  /* Force greedy */ 
-      possessive_quantifier = TRUE; 
-      ptr++; 
-      } 
+      {
+      repeat_type = 0;                  /* Force greedy */
+      possessive_quantifier = TRUE;
+      ptr++;
+      }
     else if (ptr[1] == CHAR_QUESTION_MARK)
-      { 
-      repeat_type = greedy_non_default; 
-      ptr++; 
-      } 
-    else repeat_type = greedy_default; 
- 
+      {
+      repeat_type = greedy_non_default;
+      ptr++;
+      }
+    else repeat_type = greedy_default;
+
     /* If previous was a recursion call, wrap it in atomic brackets so that
     previous becomes the atomic group. All recursions were so wrapped in the
     past, but it no longer happens for non-repeated recursions. In fact, the
     repeated ones could be re-implemented independently so as not to need this,
     but for the moment we rely on the code for repeating groups. */
- 
+
     if (*previous == OP_RECURSE)
-      { 
+      {
       memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
       *previous = OP_ONCE;
       PUT(previous, 1, 2 + 2*LINK_SIZE);
@@ -5788,20 +5788,20 @@ for (;; ptr++)
       PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
       code += 2 + 2 * LINK_SIZE;
       length_prevgroup = 3 + 3*LINK_SIZE;
- 
+
       /* When actually compiling, we need to check whether this was a forward
       reference, and if so, adjust the offset. */
 
       if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
-        { 
+        {
         int offset = GET(cd->hwm, -LINK_SIZE);
         if (offset == previous + 1 - cd->start_code)
           PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE);
-        } 
+        }
       }
- 
+
     /* Now handle repetition for the different types of item. */
- 
+
     /* If previous was a character or negated character match, abolish the item
     and generate a repeat item instead. If a char item has a minimum of more
     than one, ensure that it is set in reqchar - it might not be if a sequence
@@ -5812,236 +5812,236 @@ for (;; ptr++)
         || *previous == OP_NOT || *previous == OP_NOTI)
       {
       switch (*previous)
-        { 
+        {
         default: /* Make compiler happy. */
         case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
         case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
         case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
         case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
-        } 
- 
+        }
+
       /* Deal with UTF characters that take up more than one character. It's
       easier to write this out separately than try to macrify it. Use c to
       hold the length of the character in bytes, plus UTF_LENGTH to flag that
       it's a length rather than a small character. */
- 
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
       if (utf && NOT_FIRSTCHAR(code[-1]))
-        { 
+        {
         pcre_uchar *lastchar = code - 1;
         BACKCHAR(lastchar);
         c = (int)(code - lastchar);     /* Length of UTF-8 character */
         memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
         c |= UTF_LENGTH;                /* Flag c as a length */
-        } 
+        }
       else
 #endif /* SUPPORT_UTF */
- 
+
       /* Handle the case of a single charater - either with no UTF support, or
       with UTF disabled, or for a single character UTF character. */
-        { 
+        {
         c = code[-1];
         if (*previous <= OP_CHARI && repeat_min > 1)
           {
           reqchar = c;
           reqcharflags = req_caseopt | cd->req_varyopt;
           }
-        } 
+        }
 
       goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
-      } 
- 
-    /* If previous was a character type match (\d or similar), abolish it and 
-    create a suitable repeat item. The code is shared with single-character 
-    repeats by setting op_type to add a suitable offset into repeat_type. Note 
-    the the Unicode property types will be present only when SUPPORT_UCP is 
-    defined, but we don't wrap the little bits of code here because it just 
-    makes it horribly messy. */ 
- 
-    else if (*previous < OP_EODN) 
-      { 
+      }
+
+    /* If previous was a character type match (\d or similar), abolish it and
+    create a suitable repeat item. The code is shared with single-character
+    repeats by setting op_type to add a suitable offset into repeat_type. Note
+    the the Unicode property types will be present only when SUPPORT_UCP is
+    defined, but we don't wrap the little bits of code here because it just
+    makes it horribly messy. */
+
+    else if (*previous < OP_EODN)
+      {
       pcre_uchar *oldcode;
-      int prop_type, prop_value; 
-      op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */ 
-      c = *previous; 
- 
-      OUTPUT_SINGLE_REPEAT: 
-      if (*previous == OP_PROP || *previous == OP_NOTPROP) 
-        { 
-        prop_type = previous[1]; 
-        prop_value = previous[2]; 
-        } 
-      else prop_type = prop_value = -1; 
- 
-      oldcode = code; 
-      code = previous;                  /* Usually overwrite previous item */ 
- 
-      /* If the maximum is zero then the minimum must also be zero; Perl allows 
-      this case, so we do too - by simply omitting the item altogether. */ 
- 
-      if (repeat_max == 0) goto END_REPEAT; 
- 
-      /* Combine the op_type with the repeat_type */ 
- 
-      repeat_type += op_type; 
- 
-      /* A minimum of zero is handled either as the special case * or ?, or as 
-      an UPTO, with the maximum given. */ 
- 
-      if (repeat_min == 0) 
-        { 
-        if (repeat_max == -1) *code++ = OP_STAR + repeat_type; 
-          else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; 
-        else 
-          { 
-          *code++ = OP_UPTO + repeat_type; 
-          PUT2INC(code, 0, repeat_max); 
-          } 
-        } 
- 
-      /* A repeat minimum of 1 is optimized into some special cases. If the 
-      maximum is unlimited, we use OP_PLUS. Otherwise, the original item is 
-      left in place and, if the maximum is greater than 1, we use OP_UPTO with 
-      one less than the maximum. */ 
- 
-      else if (repeat_min == 1) 
-        { 
-        if (repeat_max == -1) 
-          *code++ = OP_PLUS + repeat_type; 
-        else 
-          { 
-          code = oldcode;                 /* leave previous item in place */ 
-          if (repeat_max == 1) goto END_REPEAT; 
-          *code++ = OP_UPTO + repeat_type; 
-          PUT2INC(code, 0, repeat_max - 1); 
-          } 
-        } 
- 
-      /* The case {n,n} is just an EXACT, while the general case {n,m} is 
-      handled as an EXACT followed by an UPTO. */ 
- 
-      else 
-        { 
-        *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */ 
-        PUT2INC(code, 0, repeat_min); 
- 
-        /* If the maximum is unlimited, insert an OP_STAR. Before doing so, 
-        we have to insert the character for the previous code. For a repeated 
-        Unicode property match, there are two extra bytes that define the 
-        required property. In UTF-8 mode, long characters have their length in 
+      int prop_type, prop_value;
+      op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
+      c = *previous;
+
+      OUTPUT_SINGLE_REPEAT:
+      if (*previous == OP_PROP || *previous == OP_NOTPROP)
+        {
+        prop_type = previous[1];
+        prop_value = previous[2];
+        }
+      else prop_type = prop_value = -1;
+
+      oldcode = code;
+      code = previous;                  /* Usually overwrite previous item */
+
+      /* If the maximum is zero then the minimum must also be zero; Perl allows
+      this case, so we do too - by simply omitting the item altogether. */
+
+      if (repeat_max == 0) goto END_REPEAT;
+
+      /* Combine the op_type with the repeat_type */
+
+      repeat_type += op_type;
+
+      /* A minimum of zero is handled either as the special case * or ?, or as
+      an UPTO, with the maximum given. */
+
+      if (repeat_min == 0)
+        {
+        if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
+          else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
+        else
+          {
+          *code++ = OP_UPTO + repeat_type;
+          PUT2INC(code, 0, repeat_max);
+          }
+        }
+
+      /* A repeat minimum of 1 is optimized into some special cases. If the
+      maximum is unlimited, we use OP_PLUS. Otherwise, the original item is
+      left in place and, if the maximum is greater than 1, we use OP_UPTO with
+      one less than the maximum. */
+
+      else if (repeat_min == 1)
+        {
+        if (repeat_max == -1)
+          *code++ = OP_PLUS + repeat_type;
+        else
+          {
+          code = oldcode;                 /* leave previous item in place */
+          if (repeat_max == 1) goto END_REPEAT;
+          *code++ = OP_UPTO + repeat_type;
+          PUT2INC(code, 0, repeat_max - 1);
+          }
+        }
+
+      /* The case {n,n} is just an EXACT, while the general case {n,m} is
+      handled as an EXACT followed by an UPTO. */
+
+      else
+        {
+        *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */
+        PUT2INC(code, 0, repeat_min);
+
+        /* If the maximum is unlimited, insert an OP_STAR. Before doing so,
+        we have to insert the character for the previous code. For a repeated
+        Unicode property match, there are two extra bytes that define the
+        required property. In UTF-8 mode, long characters have their length in
         c, with the UTF_LENGTH bit as a flag. */
- 
-        if (repeat_max < 0) 
-          { 
+
+        if (repeat_max < 0)
+          {
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
           if (utf && (c & UTF_LENGTH) != 0)
-            { 
+            {
             memcpy(code, utf_chars, IN_UCHARS(c & 7));
-            code += c & 7; 
-            } 
-          else 
-#endif 
-            { 
-            *code++ = c; 
-            if (prop_type >= 0) 
-              { 
-              *code++ = prop_type; 
-              *code++ = prop_value; 
-              } 
-            } 
-          *code++ = OP_STAR + repeat_type; 
-          } 
- 
-        /* Else insert an UPTO if the max is greater than the min, again 
-        preceded by the character, for the previously inserted code. If the 
-        UPTO is just for 1 instance, we can use QUERY instead. */ 
- 
-        else if (repeat_max != repeat_min) 
-          { 
+            code += c & 7;
+            }
+          else
+#endif
+            {
+            *code++ = c;
+            if (prop_type >= 0)
+              {
+              *code++ = prop_type;
+              *code++ = prop_value;
+              }
+            }
+          *code++ = OP_STAR + repeat_type;
+          }
+
+        /* Else insert an UPTO if the max is greater than the min, again
+        preceded by the character, for the previously inserted code. If the
+        UPTO is just for 1 instance, we can use QUERY instead. */
+
+        else if (repeat_max != repeat_min)
+          {
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
           if (utf && (c & UTF_LENGTH) != 0)
-            { 
+            {
             memcpy(code, utf_chars, IN_UCHARS(c & 7));
-            code += c & 7; 
-            } 
-          else 
-#endif 
-          *code++ = c; 
-          if (prop_type >= 0) 
-            { 
-            *code++ = prop_type; 
-            *code++ = prop_value; 
-            } 
-          repeat_max -= repeat_min; 
- 
-          if (repeat_max == 1) 
-            { 
-            *code++ = OP_QUERY + repeat_type; 
-            } 
-          else 
-            { 
-            *code++ = OP_UPTO + repeat_type; 
-            PUT2INC(code, 0, repeat_max); 
-            } 
-          } 
-        } 
- 
-      /* The character or character type itself comes last in all cases. */ 
- 
+            code += c & 7;
+            }
+          else
+#endif
+          *code++ = c;
+          if (prop_type >= 0)
+            {
+            *code++ = prop_type;
+            *code++ = prop_value;
+            }
+          repeat_max -= repeat_min;
+
+          if (repeat_max == 1)
+            {
+            *code++ = OP_QUERY + repeat_type;
+            }
+          else
+            {
+            *code++ = OP_UPTO + repeat_type;
+            PUT2INC(code, 0, repeat_max);
+            }
+          }
+        }
+
+      /* The character or character type itself comes last in all cases. */
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
       if (utf && (c & UTF_LENGTH) != 0)
-        { 
+        {
         memcpy(code, utf_chars, IN_UCHARS(c & 7));
-        code += c & 7; 
-        } 
-      else 
-#endif 
-      *code++ = c; 
- 
-      /* For a repeated Unicode property match, there are two extra bytes that 
-      define the required property. */ 
- 
-#ifdef SUPPORT_UCP 
-      if (prop_type >= 0) 
-        { 
-        *code++ = prop_type; 
-        *code++ = prop_value; 
-        } 
-#endif 
-      } 
- 
-    /* If previous was a character class or a back reference, we put the repeat 
-    stuff after it, but just skip the item if the repeat was {0,0}. */ 
- 
+        code += c & 7;
+        }
+      else
+#endif
+      *code++ = c;
+
+      /* For a repeated Unicode property match, there are two extra bytes that
+      define the required property. */
+
+#ifdef SUPPORT_UCP
+      if (prop_type >= 0)
+        {
+        *code++ = prop_type;
+        *code++ = prop_value;
+        }
+#endif
+      }
+
+    /* If previous was a character class or a back reference, we put the repeat
+    stuff after it, but just skip the item if the repeat was {0,0}. */
+
     else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-             *previous == OP_XCLASS || 
-#endif 
+             *previous == OP_XCLASS ||
+#endif
              *previous == OP_REF   || *previous == OP_REFI ||
              *previous == OP_DNREF || *previous == OP_DNREFI)
-      { 
-      if (repeat_max == 0) 
-        { 
-        code = previous; 
-        goto END_REPEAT; 
-        } 
- 
-      if (repeat_min == 0 && repeat_max == -1) 
-        *code++ = OP_CRSTAR + repeat_type; 
-      else if (repeat_min == 1 && repeat_max == -1) 
-        *code++ = OP_CRPLUS + repeat_type; 
-      else if (repeat_min == 0 && repeat_max == 1) 
-        *code++ = OP_CRQUERY + repeat_type; 
-      else 
-        { 
-        *code++ = OP_CRRANGE + repeat_type; 
-        PUT2INC(code, 0, repeat_min); 
-        if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */ 
-        PUT2INC(code, 0, repeat_max); 
-        } 
-      } 
- 
-    /* If previous was a bracket group, we may have to replicate it in certain 
+      {
+      if (repeat_max == 0)
+        {
+        code = previous;
+        goto END_REPEAT;
+        }
+
+      if (repeat_min == 0 && repeat_max == -1)
+        *code++ = OP_CRSTAR + repeat_type;
+      else if (repeat_min == 1 && repeat_max == -1)
+        *code++ = OP_CRPLUS + repeat_type;
+      else if (repeat_min == 0 && repeat_max == 1)
+        *code++ = OP_CRQUERY + repeat_type;
+      else
+        {
+        *code++ = OP_CRRANGE + repeat_type;
+        PUT2INC(code, 0, repeat_min);
+        if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */
+        PUT2INC(code, 0, repeat_max);
+        }
+      }
+
+    /* If previous was a bracket group, we may have to replicate it in certain
     cases. Note that at this point we can encounter only the "basic" bracket
     opcodes such as BRA and CBRA, as this is the place where they get converted
     into the more special varieties such as BRAPOS and SBRA. A test for >=
@@ -6049,56 +6049,56 @@ for (;; ptr++)
     ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND.
     Originally, PCRE did not allow repetition of assertions, but now it does,
     for Perl compatibility. */
- 
+
     else if (*previous >= OP_ASSERT && *previous <= OP_COND)
-      { 
+      {
       register int i;
       int len = (int)(code - previous);
       size_t base_hwm_offset = item_hwm_offset;
       pcre_uchar *bralink = NULL;
       pcre_uchar *brazeroptr = NULL;
- 
+
       /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
       we just ignore the repeat. */
- 
-      if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF) 
+
+      if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
         goto END_REPEAT;
- 
+
       /* There is no sense in actually repeating assertions. The only potential
       use of repetition is in cases when the assertion is optional. Therefore,
       if the minimum is greater than zero, just ignore the repeat. If the
       maximum is not zero or one, set it to 1. */
- 
+
       if (*previous < OP_ONCE)    /* Assertion */
-        { 
+        {
         if (repeat_min > 0) goto END_REPEAT;
         if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
-        } 
- 
-      /* The case of a zero minimum is special because of the need to stick 
-      OP_BRAZERO in front of it, and because the group appears once in the 
-      data, whereas in other cases it appears the minimum number of times. For 
-      this reason, it is simplest to treat this case separately, as otherwise 
-      the code gets far too messy. There are several special subcases when the 
-      minimum is zero. */ 
- 
-      if (repeat_min == 0) 
-        { 
+        }
+
+      /* The case of a zero minimum is special because of the need to stick
+      OP_BRAZERO in front of it, and because the group appears once in the
+      data, whereas in other cases it appears the minimum number of times. For
+      this reason, it is simplest to treat this case separately, as otherwise
+      the code gets far too messy. There are several special subcases when the
+      minimum is zero. */
+
+      if (repeat_min == 0)
+        {
         /* If the maximum is also zero, we used to just omit the group from the
         output altogether, like this:
- 
+
         ** if (repeat_max == 0)
         **   {
         **   code = previous;
         **   goto END_REPEAT;
         **   }
- 
+
         However, that fails when a group or a subgroup within it is referenced
         as a subroutine from elsewhere in the pattern, so now we stick in
         OP_SKIPZERO in front of it so that it is skipped on execution. As we
         don't have a list of which groups are referenced, we cannot do this
         selectively.
- 
+
         If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
         and do no more at this point. However, we do need to adjust any
         OP_RECURSE calls inside the group that refer to the group itself or any
@@ -6107,94 +6107,94 @@ for (;; ptr++)
         this. */
 
         if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
-          { 
-          *code = OP_END; 
+          {
+          *code = OP_END;
           adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
           memmove(previous + 1, previous, IN_UCHARS(len));
-          code++; 
+          code++;
           if (repeat_max == 0)
             {
             *previous++ = OP_SKIPZERO;
             goto END_REPEAT;
             }
           brazeroptr = previous;    /* Save for possessive optimizing */
-          *previous++ = OP_BRAZERO + repeat_type; 
-          } 
- 
-        /* If the maximum is greater than 1 and limited, we have to replicate 
-        in a nested fashion, sticking OP_BRAZERO before each set of brackets. 
-        The first one has to be handled carefully because it's the original 
-        copy, which has to be moved up. The remainder can be handled by code 
-        that is common with the non-zero minimum case below. We have to 
-        adjust the value or repeat_max, since one less copy is required. Once 
-        again, we may have to adjust any OP_RECURSE calls inside the group. */ 
- 
-        else 
-          { 
-          int offset; 
-          *code = OP_END; 
+          *previous++ = OP_BRAZERO + repeat_type;
+          }
+
+        /* If the maximum is greater than 1 and limited, we have to replicate
+        in a nested fashion, sticking OP_BRAZERO before each set of brackets.
+        The first one has to be handled carefully because it's the original
+        copy, which has to be moved up. The remainder can be handled by code
+        that is common with the non-zero minimum case below. We have to
+        adjust the value or repeat_max, since one less copy is required. Once
+        again, we may have to adjust any OP_RECURSE calls inside the group. */
+
+        else
+          {
+          int offset;
+          *code = OP_END;
           adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
           memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
-          code += 2 + LINK_SIZE; 
-          *previous++ = OP_BRAZERO + repeat_type; 
-          *previous++ = OP_BRA; 
- 
-          /* We chain together the bracket offset fields that have to be 
-          filled in later when the ends of the brackets are reached. */ 
- 
+          code += 2 + LINK_SIZE;
+          *previous++ = OP_BRAZERO + repeat_type;
+          *previous++ = OP_BRA;
+
+          /* We chain together the bracket offset fields that have to be
+          filled in later when the ends of the brackets are reached. */
+
           offset = (bralink == NULL)? 0 : (int)(previous - bralink);
-          bralink = previous; 
-          PUTINC(previous, 0, offset); 
-          } 
- 
-        repeat_max--; 
-        } 
- 
-      /* If the minimum is greater than zero, replicate the group as many 
-      times as necessary, and adjust the maximum to the number of subsequent 
-      copies that we need. If we set a first char from the group, and didn't 
-      set a required char, copy the latter from the former. If there are any 
-      forward reference subroutine calls in the group, there will be entries on 
-      the workspace list; replicate these with an appropriate increment. */ 
- 
-      else 
-        { 
-        if (repeat_min > 1) 
-          { 
-          /* In the pre-compile phase, we don't actually do the replication. We 
-          just adjust the length as if we had. Do some paranoid checks for 
+          bralink = previous;
+          PUTINC(previous, 0, offset);
+          }
+
+        repeat_max--;
+        }
+
+      /* If the minimum is greater than zero, replicate the group as many
+      times as necessary, and adjust the maximum to the number of subsequent
+      copies that we need. If we set a first char from the group, and didn't
+      set a required char, copy the latter from the former. If there are any
+      forward reference subroutine calls in the group, there will be entries on
+      the workspace list; replicate these with an appropriate increment. */
+
+      else
+        {
+        if (repeat_min > 1)
+          {
+          /* In the pre-compile phase, we don't actually do the replication. We
+          just adjust the length as if we had. Do some paranoid checks for
           potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
           integer type when available, otherwise double. */
- 
-          if (lengthptr != NULL) 
-            { 
-            int delta = (repeat_min - 1)*length_prevgroup; 
+
+          if (lengthptr != NULL)
+            {
+            int delta = (repeat_min - 1)*length_prevgroup;
             if ((INT64_OR_DOUBLE)(repeat_min - 1)*
                   (INT64_OR_DOUBLE)length_prevgroup >
                     (INT64_OR_DOUBLE)INT_MAX ||
-                OFLOW_MAX - *lengthptr < delta) 
-              { 
-              *errorcodeptr = ERR20; 
-              goto FAILED; 
-              } 
-            *lengthptr += delta; 
-            } 
- 
+                OFLOW_MAX - *lengthptr < delta)
+              {
+              *errorcodeptr = ERR20;
+              goto FAILED;
+              }
+            *lengthptr += delta;
+            }
+
           /* This is compiling for real. If there is a set first byte for
           the group, and we have not yet set a "required byte", set it. Make
           sure there is enough workspace for copying forward references before
           doing the copy. */
- 
-          else 
-            { 
+
+          else
+            {
             if (groupsetfirstchar && reqcharflags < 0)
               {
               reqchar = firstchar;
               reqcharflags = firstcharflags;
               }
 
-            for (i = 1; i < repeat_min; i++) 
-              { 
+            for (i = 1; i < repeat_min; i++)
+              {
               pcre_uchar *hc;
               size_t this_hwm_offset = cd->hwm - cd->start_workspace;
               memcpy(code, previous, IN_UCHARS(len));
@@ -6202,7 +6202,7 @@ for (;; ptr++)
               while (cd->hwm > cd->start_workspace + cd->workspace_size -
                      WORK_SIZE_SAFETY_MARGIN -
                      (this_hwm_offset - base_hwm_offset))
-                { 
+                {
                 *errorcodeptr = expand_workspace(cd);
                 if (*errorcodeptr != 0) goto FAILED;
                 }
@@ -6211,70 +6211,70 @@ for (;; ptr++)
                    hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
                    hc += LINK_SIZE)
                 {
-                PUT(cd->hwm, 0, GET(hc, 0) + len); 
-                cd->hwm += LINK_SIZE; 
-                } 
+                PUT(cd->hwm, 0, GET(hc, 0) + len);
+                cd->hwm += LINK_SIZE;
+                }
               base_hwm_offset = this_hwm_offset;
-              code += len; 
-              } 
-            } 
-          } 
- 
-        if (repeat_max > 0) repeat_max -= repeat_min; 
-        } 
- 
-      /* This code is common to both the zero and non-zero minimum cases. If 
-      the maximum is limited, it replicates the group in a nested fashion, 
-      remembering the bracket starts on a stack. In the case of a zero minimum, 
-      the first one was set up above. In all cases the repeat_max now specifies 
-      the number of additional copies needed. Again, we must remember to 
-      replicate entries on the forward reference list. */ 
- 
-      if (repeat_max >= 0) 
-        { 
-        /* In the pre-compile phase, we don't actually do the replication. We 
-        just adjust the length as if we had. For each repetition we must add 1 
-        to the length for BRAZERO and for all but the last repetition we must 
-        add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some 
+              code += len;
+              }
+            }
+          }
+
+        if (repeat_max > 0) repeat_max -= repeat_min;
+        }
+
+      /* This code is common to both the zero and non-zero minimum cases. If
+      the maximum is limited, it replicates the group in a nested fashion,
+      remembering the bracket starts on a stack. In the case of a zero minimum,
+      the first one was set up above. In all cases the repeat_max now specifies
+      the number of additional copies needed. Again, we must remember to
+      replicate entries on the forward reference list. */
+
+      if (repeat_max >= 0)
+        {
+        /* In the pre-compile phase, we don't actually do the replication. We
+        just adjust the length as if we had. For each repetition we must add 1
+        to the length for BRAZERO and for all but the last repetition we must
+        add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
         paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
         a 64-bit integer type when available, otherwise double. */
- 
-        if (lengthptr != NULL && repeat_max > 0) 
-          { 
-          int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - 
-                      2 - 2*LINK_SIZE;   /* Last one doesn't nest */ 
+
+        if (lengthptr != NULL && repeat_max > 0)
+          {
+          int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
+                      2 - 2*LINK_SIZE;   /* Last one doesn't nest */
           if ((INT64_OR_DOUBLE)repeat_max *
                 (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
                   > (INT64_OR_DOUBLE)INT_MAX ||
-              OFLOW_MAX - *lengthptr < delta) 
-            { 
-            *errorcodeptr = ERR20; 
-            goto FAILED; 
-            } 
-          *lengthptr += delta; 
-          } 
- 
-        /* This is compiling for real */ 
- 
-        else for (i = repeat_max - 1; i >= 0; i--) 
-          { 
+              OFLOW_MAX - *lengthptr < delta)
+            {
+            *errorcodeptr = ERR20;
+            goto FAILED;
+            }
+          *lengthptr += delta;
+          }
+
+        /* This is compiling for real */
+
+        else for (i = repeat_max - 1; i >= 0; i--)
+          {
           pcre_uchar *hc;
           size_t this_hwm_offset = cd->hwm - cd->start_workspace;
- 
-          *code++ = OP_BRAZERO + repeat_type; 
- 
-          /* All but the final copy start a new nesting, maintaining the 
-          chain of brackets outstanding. */ 
- 
-          if (i != 0) 
-            { 
-            int offset; 
-            *code++ = OP_BRA; 
+
+          *code++ = OP_BRAZERO + repeat_type;
+
+          /* All but the final copy start a new nesting, maintaining the
+          chain of brackets outstanding. */
+
+          if (i != 0)
+            {
+            int offset;
+            *code++ = OP_BRA;
             offset = (bralink == NULL)? 0 : (int)(code - bralink);
-            bralink = code; 
-            PUTINC(code, 0, offset); 
-            } 
- 
+            bralink = code;
+            PUTINC(code, 0, offset);
+            }
+
           memcpy(code, previous, IN_UCHARS(len));
 
           /* Ensure there is enough workspace for forward references before
@@ -6283,7 +6283,7 @@ for (;; ptr++)
           while (cd->hwm > cd->start_workspace + cd->workspace_size -
                  WORK_SIZE_SAFETY_MARGIN -
                  (this_hwm_offset - base_hwm_offset))
-            { 
+            {
             *errorcodeptr = expand_workspace(cd);
             if (*errorcodeptr != 0) goto FAILED;
             }
@@ -6292,41 +6292,41 @@ for (;; ptr++)
                hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
                hc += LINK_SIZE)
             {
-            PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); 
-            cd->hwm += LINK_SIZE; 
-            } 
+            PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
+            cd->hwm += LINK_SIZE;
+            }
           base_hwm_offset = this_hwm_offset;
-          code += len; 
-          } 
- 
-        /* Now chain through the pending brackets, and fill in their length 
-        fields (which are holding the chain links pro tem). */ 
- 
-        while (bralink != NULL) 
-          { 
-          int oldlinkoffset; 
+          code += len;
+          }
+
+        /* Now chain through the pending brackets, and fill in their length
+        fields (which are holding the chain links pro tem). */
+
+        while (bralink != NULL)
+          {
+          int oldlinkoffset;
           int offset = (int)(code - bralink + 1);
           pcre_uchar *bra = code - offset;
-          oldlinkoffset = GET(bra, 1); 
-          bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; 
-          *code++ = OP_KET; 
-          PUTINC(code, 0, offset); 
-          PUT(bra, 1, offset); 
-          } 
-        } 
- 
+          oldlinkoffset = GET(bra, 1);
+          bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
+          *code++ = OP_KET;
+          PUTINC(code, 0, offset);
+          PUT(bra, 1, offset);
+          }
+        }
+
       /* If the maximum is unlimited, set a repeater in the final copy. For
       ONCE brackets, that's all we need to do. However, possessively repeated
       ONCE brackets can be converted into non-capturing brackets, as the
       behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
       deal with possessive ONCEs specially.
- 
+
       Otherwise, when we are doing the actual compile phase, check to see
       whether this group is one that could match an empty string. If so,
-      convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so 
+      convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
       that runtime checking can be done. [This check is also applied to ONCE
       groups at runtime, but in a different way.]
- 
+
       Then, if the quantifier was possessive and the bracket is not a
       conditional, we convert the BRA code to the POS form, and the KET code to
       KETRPOS. (It turns out to be convenient at runtime to detect this kind of
@@ -6340,8 +6340,8 @@ for (;; ptr++)
       there will be earlier copies of the group, and so we still have to wrap
       the whole thing. */
 
-      else 
-        { 
+      else
+        {
         pcre_uchar *ketcode = code - 1 - LINK_SIZE;
         pcre_uchar *bracode = ketcode - GET(ketcode, 1);
 
@@ -6360,23 +6360,23 @@ for (;; ptr++)
         converted to non-capturing above). */
 
         else
-          { 
+          {
           /* In the compile phase, check for empty string matching. */
 
           if (lengthptr == NULL)
-            { 
+            {
             pcre_uchar *scode = bracode;
             do
-              { 
+              {
               if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
                 {
                 *bracode += OP_SBRA - OP_BRA;
                 break;
                 }
               scode += GET(scode, 1);
-              } 
+              }
             while (*scode == OP_ALT);
-            } 
+            }
 
           /* A conditional group with only one branch has an implicit empty
           alternative branch. */
@@ -6425,10 +6425,10 @@ for (;; ptr++)
           /* Non-possessive quantifier */
 
           else *ketcode = OP_KETRMAX + repeat_type;
-          } 
-        } 
-      } 
- 
+          }
+        }
+      }
+
     /* If previous is OP_FAIL, it was generated by an empty class [] in
     JavaScript mode. The other ways in which OP_FAIL can be generated, that is
     by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
@@ -6436,14 +6436,14 @@ for (;; ptr++)
 
     else if (*previous == OP_FAIL) goto END_REPEAT;
 
-    /* Else there's some kind of shambles */ 
- 
-    else 
-      { 
-      *errorcodeptr = ERR11; 
-      goto FAILED; 
-      } 
- 
+    /* Else there's some kind of shambles */
+
+    else
+      {
+      *errorcodeptr = ERR11;
+      goto FAILED;
+      }
+
     /* If the character following a repeat is '+', possessive_quantifier is
     TRUE. For some opcodes, there are special alternative opcodes for this
     case. For anything else, we wrap the entire repeated item inside OP_ONCE
@@ -6453,12 +6453,12 @@ for (;; ptr++)
     Some (but not all) possessively repeated subpatterns have already been
     completely handled in the code just above. For them, possessive_quantifier
     is always FALSE at this stage. Note that the repeated item starts at
-    tempcode, not at previous, which might be the first part of a string whose 
+    tempcode, not at previous, which might be the first part of a string whose
     (former) last char we repeated. */
- 
-    if (possessive_quantifier) 
-      { 
-      int len; 
+
+    if (possessive_quantifier)
+      {
+      int len;
 
       /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
       However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
@@ -6543,23 +6543,23 @@ for (;; ptr++)
         }
 
 #ifdef NEVER
-      if (len > 0) switch (*tempcode) 
-        { 
-        case OP_STAR:  *tempcode = OP_POSSTAR; break; 
-        case OP_PLUS:  *tempcode = OP_POSPLUS; break; 
-        case OP_QUERY: *tempcode = OP_POSQUERY; break; 
-        case OP_UPTO:  *tempcode = OP_POSUPTO; break; 
- 
+      if (len > 0) switch (*tempcode)
+        {
+        case OP_STAR:  *tempcode = OP_POSSTAR; break;
+        case OP_PLUS:  *tempcode = OP_POSPLUS; break;
+        case OP_QUERY: *tempcode = OP_POSQUERY; break;
+        case OP_UPTO:  *tempcode = OP_POSUPTO; break;
+
         case OP_STARI:  *tempcode = OP_POSSTARI; break;
         case OP_PLUSI:  *tempcode = OP_POSPLUSI; break;
         case OP_QUERYI: *tempcode = OP_POSQUERYI; break;
         case OP_UPTOI:  *tempcode = OP_POSUPTOI; break;
- 
-        case OP_NOTSTAR:  *tempcode = OP_NOTPOSSTAR; break; 
-        case OP_NOTPLUS:  *tempcode = OP_NOTPOSPLUS; break; 
-        case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; 
-        case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break; 
- 
+
+        case OP_NOTSTAR:  *tempcode = OP_NOTPOSSTAR; break;
+        case OP_NOTPLUS:  *tempcode = OP_NOTPOSPLUS; break;
+        case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
+        case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;
+
         case OP_NOTSTARI:  *tempcode = OP_NOTPOSSTARI; break;
         case OP_NOTPLUSI:  *tempcode = OP_NOTPOSPLUSI; break;
         case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break;
@@ -6578,50 +6578,50 @@ for (;; ptr++)
         /* Because we are moving code along, we must ensure that any
         pending recursive references are updated. */
 
-        default: 
+        default:
         *code = OP_END;
         adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
         memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
-        code += 1 + LINK_SIZE; 
-        len += 1 + LINK_SIZE; 
-        tempcode[0] = OP_ONCE; 
-        *code++ = OP_KET; 
-        PUTINC(code, 0, len); 
-        PUT(tempcode, 1, len); 
-        break; 
-        } 
+        code += 1 + LINK_SIZE;
+        len += 1 + LINK_SIZE;
+        tempcode[0] = OP_ONCE;
+        *code++ = OP_KET;
+        PUTINC(code, 0, len);
+        PUT(tempcode, 1, len);
+        break;
+        }
 #endif
-      } 
- 
-    /* In all case we no longer have a previous item. We also set the 
+      }
+
+    /* In all case we no longer have a previous item. We also set the
     "follows varying string" flag for subsequently encountered reqchars if
-    it isn't already set and we have just passed a varying length item. */ 
- 
-    END_REPEAT: 
-    previous = NULL; 
-    cd->req_varyopt |= reqvary; 
-    break; 
- 
- 
-    /* ===================================================================*/ 
-    /* Start of nested parenthesized sub-expression, or comment or lookahead or 
-    lookbehind or option setting or condition or all the other extended 
-    parenthesis forms.  */ 
- 
+    it isn't already set and we have just passed a varying length item. */
+
+    END_REPEAT:
+    previous = NULL;
+    cd->req_varyopt |= reqvary;
+    break;
+
+
+    /* ===================================================================*/
+    /* Start of nested parenthesized sub-expression, or comment or lookahead or
+    lookbehind or option setting or condition or all the other extended
+    parenthesis forms.  */
+
     case CHAR_LEFT_PARENTHESIS:
     ptr++;
- 
+
     /* Now deal with various "verbs" that can be introduced by '*'. */
- 
+
     if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
          || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))
-      { 
-      int i, namelen; 
+      {
+      int i, namelen;
       int arglen = 0;
-      const char *vn = verbnames; 
+      const char *vn = verbnames;
       const pcre_uchar *name = ptr + 1;
       const pcre_uchar *arg = NULL;
-      previous = NULL; 
+      previous = NULL;
       ptr++;
       while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
       namelen = (int)(ptr - name);
@@ -6631,7 +6631,7 @@ for (;; ptr++)
       letters, digits, and underscores. */
 
       if (*ptr == CHAR_COLON)
-        { 
+        {
         arg = ++ptr;
         while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
         arglen = (int)(ptr - arg);
@@ -6640,21 +6640,21 @@ for (;; ptr++)
           *errorcodeptr = ERR75;
           goto FAILED;
           }
-        } 
+        }
 
       if (*ptr != CHAR_RIGHT_PARENTHESIS)
-        { 
-        *errorcodeptr = ERR60; 
-        goto FAILED; 
-        } 
+        {
+        *errorcodeptr = ERR60;
+        goto FAILED;
+        }
 
       /* Scan the table of verb names */
 
-      for (i = 0; i < verbcount; i++) 
-        { 
-        if (namelen == verbs[i].len && 
+      for (i = 0; i < verbcount; i++)
+        {
+        if (namelen == verbs[i].len &&
             STRNCMP_UC_C8(name, vn, namelen) == 0)
-          { 
+          {
           int setverb;
 
           /* Check for open captures before ACCEPT and convert it to
@@ -6744,16 +6744,16 @@ for (;; ptr++)
             }
 
           break;  /* Found verb, exit loop */
-          } 
+          }
 
-        vn += verbs[i].len + 1; 
-        } 
+        vn += verbs[i].len + 1;
+        }
 
       if (i < verbcount) continue;    /* Successfully handled a verb */
       *errorcodeptr = ERR60;          /* Verb not recognized */
-      goto FAILED; 
-      } 
- 
+      goto FAILED;
+      }
+
     /* Initialize for "real" parentheses */
 
     newoptions = options;
@@ -6762,48 +6762,48 @@ for (;; ptr++)
     item_hwm_offset = cd->hwm - cd->start_workspace;
     reset_bracount = FALSE;
 
-    /* Deal with the extended parentheses; all are introduced by '?', and the 
-    appearance of any of them means that this is not a capturing group. */ 
- 
+    /* Deal with the extended parentheses; all are introduced by '?', and the
+    appearance of any of them means that this is not a capturing group. */
+
     if (*ptr == CHAR_QUESTION_MARK)
-      { 
-      int i, set, unset, namelen; 
-      int *optset; 
+      {
+      int i, set, unset, namelen;
+      int *optset;
       const pcre_uchar *name;
       pcre_uchar *slot;
- 
-      switch (*(++ptr)) 
-        { 
-        /* ------------------------------------------------------------ */ 
+
+      switch (*(++ptr))
+        {
+        /* ------------------------------------------------------------ */
         case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
-        reset_bracount = TRUE; 
+        reset_bracount = TRUE;
         cd->dupgroups = TRUE;     /* Record (?| encountered */
-        /* Fall through */ 
- 
-        /* ------------------------------------------------------------ */ 
+        /* Fall through */
+
+        /* ------------------------------------------------------------ */
         case CHAR_COLON:          /* Non-capturing bracket */
-        bravalue = OP_BRA; 
-        ptr++; 
-        break; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+        bravalue = OP_BRA;
+        ptr++;
+        break;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_LEFT_PARENTHESIS:
-        bravalue = OP_COND;       /* Conditional group */ 
+        bravalue = OP_COND;       /* Conditional group */
         tempptr = ptr;
- 
-        /* A condition can be an assertion, a number (referring to a numbered 
+
+        /* A condition can be an assertion, a number (referring to a numbered
         group's having been set), a name (referring to a named group), or 'R',
         referring to recursion. R<digits> and R&name are also permitted for
         recursion tests.
- 
+
         There are ways of testing a named group: (?(name)) is used by Python;
         Perl 5.10 onwards uses (?(<name>) or (?('name')).
- 
+
         There is one unfortunate ambiguity, caused by history. 'R' can be the
         recursive thing or the name 'R' (and similarly for 'R' followed by
         digits). We look for a name first; if not found, we try the other case.
- 
+
         For compatibility with auto-callouts, we allow a callout to be
         specified before a condition that is an assertion. First, check for the
         syntax of a callout; if found, adjust the temporary pointer that is
@@ -6825,10 +6825,10 @@ for (;; ptr++)
             }
           }
 
-        /* For conditions that are assertions, check the syntax, and then exit 
-        the switch. This will take control down to where bracketed groups, 
-        including assertions, are processed. */ 
- 
+        /* For conditions that are assertions, check the syntax, and then exit
+        the switch. This will take control down to where bracketed groups,
+        including assertions, are processed. */
+
         if (tempptr[1] == CHAR_QUESTION_MARK &&
               (tempptr[2] == CHAR_EQUALS_SIGN ||
                tempptr[2] == CHAR_EXCLAMATION_MARK ||
@@ -6837,54 +6837,54 @@ for (;; ptr++)
                     tempptr[3] == CHAR_EXCLAMATION_MARK))))
           {
           cd->iscondassert = TRUE;
-          break; 
+          break;
           }
- 
+
         /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
         need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
- 
-        code[1+LINK_SIZE] = OP_CREF; 
+
+        code[1+LINK_SIZE] = OP_CREF;
         skipbytes = 1+IMM2_SIZE;
         refsign = -1;     /* => not a number */
         namelen = -1;     /* => not a name; must set to avoid warning */
         name = NULL;      /* Always set to avoid warning */
         recno = 0;        /* Always set to avoid warning */
- 
-        /* Check for a test for recursion in a named group. */ 
- 
+
+        /* Check for a test for recursion in a named group. */
+
         ptr++;
         if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
-          { 
-          terminator = -1; 
-          ptr += 2; 
-          code[1+LINK_SIZE] = OP_RREF;    /* Change the type of test */ 
-          } 
- 
-        /* Check for a test for a named group's having been set, using the Perl 
+          {
+          terminator = -1;
+          ptr += 2;
+          code[1+LINK_SIZE] = OP_RREF;    /* Change the type of test */
+          }
+
+        /* Check for a test for a named group's having been set, using the Perl
         syntax (?(<name>) or (?('name'), and also allow for the original PCRE
         syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */
- 
+
         else if (*ptr == CHAR_LESS_THAN_SIGN)
-          { 
+          {
           terminator = CHAR_GREATER_THAN_SIGN;
-          ptr++; 
-          } 
+          ptr++;
+          }
         else if (*ptr == CHAR_APOSTROPHE)
-          { 
+          {
           terminator = CHAR_APOSTROPHE;
-          ptr++; 
-          } 
-        else 
-          { 
+          ptr++;
+          }
+        else
+          {
           terminator = CHAR_NULL;
           if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
             else if (IS_DIGIT(*ptr)) refsign = 0;
-          } 
- 
+          }
+
         /* Handle a number */
- 
+
         if (refsign >= 0)
-          { 
+          {
           while (IS_DIGIT(*ptr))
             {
             if (recno > INT_MAX / 10 - 1)  /* Integer overflow */
@@ -6896,15 +6896,15 @@ for (;; ptr++)
             recno = recno * 10 + (int)(*ptr - CHAR_0);
             ptr++;
             }
-          } 
- 
+          }
+
         /* Otherwise we expect to read a name; anything else is an error. When
         a name is one of a number of duplicates, a different opcode is used and
         it needs more memory. Unfortunately we cannot tell whether a name is a
         duplicate in the first pass, so we have to allow for more memory. */
- 
+
         else
-          { 
+          {
           if (IS_DIGIT(*ptr))
             {
             *errorcodeptr = ERR84;
@@ -6922,62 +6922,62 @@ for (;; ptr++)
             }
           namelen = (int)(ptr - name);
           if (lengthptr != NULL) skipbytes += IMM2_SIZE;
-          } 
- 
+          }
+
         /* Check the terminator */
 
         if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
             *ptr++ != CHAR_RIGHT_PARENTHESIS)
-          { 
+          {
           ptr--;                  /* Error offset */
           *errorcodeptr = ERR26;  /* Malformed number or name */
-          goto FAILED; 
-          } 
- 
-        /* Do no further checking in the pre-compile phase. */ 
- 
-        if (lengthptr != NULL) break; 
- 
-        /* In the real compile we do the work of looking for the actual 
+          goto FAILED;
+          }
+
+        /* Do no further checking in the pre-compile phase. */
+
+        if (lengthptr != NULL) break;
+
+        /* In the real compile we do the work of looking for the actual
         reference. If refsign is not negative, it means we have a number in
         recno. */
- 
+
         if (refsign >= 0)
-          { 
-          if (recno <= 0) 
-            { 
+          {
+          if (recno <= 0)
+            {
             *errorcodeptr = ERR35;
-            goto FAILED; 
-            } 
+            goto FAILED;
+            }
           if (refsign != 0) recno = (refsign == CHAR_MINUS)?
             cd->bracount - recno + 1 : recno + cd->bracount;
-          if (recno <= 0 || recno > cd->final_bracount) 
-            { 
-            *errorcodeptr = ERR15; 
-            goto FAILED; 
-            } 
-          PUT2(code, 2+LINK_SIZE, recno); 
+          if (recno <= 0 || recno > cd->final_bracount)
+            {
+            *errorcodeptr = ERR15;
+            goto FAILED;
+            }
+          PUT2(code, 2+LINK_SIZE, recno);
           if (recno > cd->top_backref) cd->top_backref = recno;
-          break; 
-          } 
- 
+          break;
+          }
+
         /* Otherwise look for the name. */
- 
-        slot = cd->name_table; 
-        for (i = 0; i < cd->names_found; i++) 
-          { 
+
+        slot = cd->name_table;
+        for (i = 0; i < cd->names_found; i++)
+          {
           if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
             slot[IMM2_SIZE+namelen] == 0) break;
-          slot += cd->name_entry_size; 
-          } 
- 
+          slot += cd->name_entry_size;
+          }
+
         /* Found the named subpattern. If the name is duplicated, add one to
         the opcode to change CREF/RREF into DNCREF/DNRREF and insert
         appropriate data values. Otherwise, just insert the unique subpattern
         number. */
- 
-        if (i < cd->names_found) 
-          { 
+
+        if (i < cd->names_found)
+          {
           int offset = i++;
           int count = 1;
           recno = GET2(slot, 0);   /* Number from first found */
@@ -6989,7 +6989,7 @@ for (;; ptr++)
               (slot+IMM2_SIZE)[namelen] != 0) break;
             count++;
             }
- 
+
           if (count > 1)
             {
             PUT2(code, 2+LINK_SIZE, offset);
@@ -7001,133 +7001,133 @@ for (;; ptr++)
             {
             PUT2(code, 2+LINK_SIZE, recno);
             }
-          } 
- 
+          }
+
         /* If terminator == CHAR_NULL it means that the name followed directly
         after the opening parenthesis [e.g. (?(abc)...] and in this case there
         are some further alternatives to try. For the cases where terminator !=
         CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ]
         we have now checked all the possibilities, so give an error. */
- 
+
         else if (terminator != CHAR_NULL)
-          { 
-          *errorcodeptr = ERR15; 
-          goto FAILED; 
-          } 
- 
-        /* Check for (?(R) for recursion. Allow digits after R to specify a 
-        specific group number. */ 
- 
+          {
+          *errorcodeptr = ERR15;
+          goto FAILED;
+          }
+
+        /* Check for (?(R) for recursion. Allow digits after R to specify a
+        specific group number. */
+
         else if (*name == CHAR_R)
-          { 
-          recno = 0; 
-          for (i = 1; i < namelen; i++) 
-            { 
+          {
+          recno = 0;
+          for (i = 1; i < namelen; i++)
+            {
             if (!IS_DIGIT(name[i]))
-              { 
-              *errorcodeptr = ERR15; 
-              goto FAILED; 
-              } 
+              {
+              *errorcodeptr = ERR15;
+              goto FAILED;
+              }
             if (recno > INT_MAX / 10 - 1)   /* Integer overflow */
               {
               *errorcodeptr = ERR61;
               goto FAILED;
               }
             recno = recno * 10 + name[i] - CHAR_0;
-            } 
-          if (recno == 0) recno = RREF_ANY; 
-          code[1+LINK_SIZE] = OP_RREF;      /* Change test type */ 
-          PUT2(code, 2+LINK_SIZE, recno); 
-          } 
- 
-        /* Similarly, check for the (?(DEFINE) "condition", which is always 
-        false. */ 
- 
+            }
+          if (recno == 0) recno = RREF_ANY;
+          code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
+          PUT2(code, 2+LINK_SIZE, recno);
+          }
+
+        /* Similarly, check for the (?(DEFINE) "condition", which is always
+        false. */
+
         else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
-          { 
-          code[1+LINK_SIZE] = OP_DEF; 
-          skipbytes = 1; 
-          } 
- 
+          {
+          code[1+LINK_SIZE] = OP_DEF;
+          skipbytes = 1;
+          }
+
         /* Reference to an unidentified subpattern. */
- 
-        else 
-          { 
+
+        else
+          {
           *errorcodeptr = ERR15;
-          goto FAILED; 
-          } 
-        break; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+          goto FAILED;
+          }
+        break;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
-        bravalue = OP_ASSERT; 
+        bravalue = OP_ASSERT;
         cd->assert_depth += 1;
-        ptr++; 
-        break; 
- 
+        ptr++;
+        break;
+
         /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
         thing to do, but Perl allows all assertions to be quantified, and when
         they contain capturing parentheses there may be a potential use for
         this feature. Not that that applies to a quantified (?!) but we allow
         it for uniformity. */
- 
-        /* ------------------------------------------------------------ */ 
+
+        /* ------------------------------------------------------------ */
         case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
-        ptr++; 
+        ptr++;
         if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
              ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
             (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
-          { 
-          *code++ = OP_FAIL; 
-          previous = NULL; 
-          continue; 
-          } 
-        bravalue = OP_ASSERT_NOT; 
+          {
+          *code++ = OP_FAIL;
+          previous = NULL;
+          continue;
+          }
+        bravalue = OP_ASSERT_NOT;
         cd->assert_depth += 1;
-        break; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+        break;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
-        switch (ptr[1]) 
-          { 
+        switch (ptr[1])
+          {
           case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
-          bravalue = OP_ASSERTBACK; 
+          bravalue = OP_ASSERTBACK;
           cd->assert_depth += 1;
-          ptr += 2; 
-          break; 
- 
+          ptr += 2;
+          break;
+
           case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
-          bravalue = OP_ASSERTBACK_NOT; 
+          bravalue = OP_ASSERTBACK_NOT;
           cd->assert_depth += 1;
-          ptr += 2; 
-          break; 
- 
-          default:                /* Could be name define, else bad */ 
+          ptr += 2;
+          break;
+
+          default:                /* Could be name define, else bad */
           if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0)
             goto DEFINE_NAME;
-          ptr++;                  /* Correct offset for error */ 
-          *errorcodeptr = ERR24; 
-          goto FAILED; 
-          } 
-        break; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+          ptr++;                  /* Correct offset for error */
+          *errorcodeptr = ERR24;
+          goto FAILED;
+          }
+        break;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
-        bravalue = OP_ONCE; 
-        ptr++; 
-        break; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+        bravalue = OP_ONCE;
+        ptr++;
+        break;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_C:                 /* Callout - may be followed by digits; */
         previous_callout = code;     /* Save for later completion */
         after_manual_callout = 1;    /* Skip one item before completing */
-        *code++ = OP_CALLOUT; 
-          { 
-          int n = 0; 
+        *code++ = OP_CALLOUT;
+          {
+          int n = 0;
           ptr++;
           while(IS_DIGIT(*ptr))
             {
@@ -7139,63 +7139,63 @@ for (;; ptr++)
               }
             }
           if (*ptr != CHAR_RIGHT_PARENTHESIS)
-            { 
-            *errorcodeptr = ERR39; 
-            goto FAILED; 
-            } 
-          *code++ = n; 
+            {
+            *errorcodeptr = ERR39;
+            goto FAILED;
+            }
+          *code++ = n;
           PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
           PUT(code, LINK_SIZE, 0);                          /* Default length */
-          code += 2 * LINK_SIZE; 
-          } 
-        previous = NULL; 
-        continue; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+          code += 2 * LINK_SIZE;
+          }
+        previous = NULL;
+        continue;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_P:              /* Python-style named subpattern handling */
         if (*(++ptr) == CHAR_EQUALS_SIGN ||
             *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
-          { 
+          {
           is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
           terminator = CHAR_RIGHT_PARENTHESIS;
-          goto NAMED_REF_OR_RECURSE; 
-          } 
+          goto NAMED_REF_OR_RECURSE;
+          }
         else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
-          { 
-          *errorcodeptr = ERR41; 
-          goto FAILED; 
-          } 
-        /* Fall through to handle (?P< as (?< is handled */ 
- 
- 
-        /* ------------------------------------------------------------ */ 
-        DEFINE_NAME:    /* Come here from (?< handling */ 
+          {
+          *errorcodeptr = ERR41;
+          goto FAILED;
+          }
+        /* Fall through to handle (?P< as (?< is handled */
+
+
+        /* ------------------------------------------------------------ */
+        DEFINE_NAME:    /* Come here from (?< handling */
         case CHAR_APOSTROPHE:
         terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
           CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
         name = ++ptr;
         if (IS_DIGIT(*ptr))
-          { 
+          {
           *errorcodeptr = ERR84;   /* Group name must start with non-digit */
           goto FAILED;
           }
         while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
         namelen = (int)(ptr - name);
- 
+
         /* In the pre-compile phase, do a syntax check, remember the longest
         name, and then remember the group in a vector, expanding it if
         necessary. Duplicates for the same number are skipped; other duplicates
         are checked for validity. In the actual compile, there is nothing to
         do. */
- 
+
         if (lengthptr != NULL)
           {
           named_group *ng;
           pcre_uint32 number = cd->bracount + 1;
- 
+
           if (*ptr != (pcre_uchar)terminator)
-            { 
+            {
             *errorcodeptr = ERR42;
             goto FAILED;
             }
@@ -7210,10 +7210,10 @@ for (;; ptr++)
             {
             cd->name_entry_size = namelen + IMM2_SIZE + 1;
             if (namelen > MAX_NAME_SIZE)
-              { 
+              {
               *errorcodeptr = ERR48;
-              goto FAILED; 
-              } 
+              goto FAILED;
+              }
             }
 
           /* Scan the list to check for duplicates. For duplicate names, if the
@@ -7228,37 +7228,37 @@ for (;; ptr++)
             {
             if (namelen == ng->length &&
                 STRNCMP_UC_UC(name, ng->name, namelen) == 0)
-              { 
+              {
               if (ng->number == number) break;
               if ((options & PCRE_DUPNAMES) == 0)
-                { 
+                {
                 *errorcodeptr = ERR43;
-                goto FAILED; 
-                } 
+                goto FAILED;
+                }
               cd->dupnames = TRUE;  /* Duplicate names exist */
-              } 
+              }
             else if (ng->number == number)
               {
               *errorcodeptr = ERR65;
               goto FAILED;
               }
-            } 
- 
+            }
+
           if (i >= cd->names_found)     /* Not a duplicate with same number */
             {
             /* Increase the list size if necessary */
- 
+
             if (cd->names_found >= cd->named_group_list_size)
-              { 
+              {
               int newsize = cd->named_group_list_size * 2;
               named_group *newspace = (PUBL(malloc))
                 (newsize * sizeof(named_group));
 
               if (newspace == NULL)
-                { 
+                {
                 *errorcodeptr = ERR21;
                 goto FAILED;
-                } 
+                }
 
               memcpy(newspace, cd->named_groups,
                 cd->named_group_list_size * sizeof(named_group));
@@ -7266,33 +7266,33 @@ for (;; ptr++)
                 (PUBL(free))((void *)cd->named_groups);
               cd->named_groups = newspace;
               cd->named_group_list_size = newsize;
-              } 
- 
+              }
+
             cd->named_groups[cd->names_found].name = name;
             cd->named_groups[cd->names_found].length = namelen;
             cd->named_groups[cd->names_found].number = number;
             cd->names_found++;
-            } 
-          } 
- 
+            }
+          }
+
         ptr++;                    /* Move past > or ' in both passes. */
-        goto NUMBERED_GROUP; 
- 
- 
-        /* ------------------------------------------------------------ */ 
+        goto NUMBERED_GROUP;
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
         terminator = CHAR_RIGHT_PARENTHESIS;
-        is_recurse = TRUE; 
-        /* Fall through */ 
- 
-        /* We come here from the Python syntax above that handles both 
-        references (?P=name) and recursion (?P>name), as well as falling 
-        through from the Perl recursion syntax (?&name). We also come here from 
-        the Perl \k<name> or \k'name' back reference syntax and the \k{name} 
+        is_recurse = TRUE;
+        /* Fall through */
+
+        /* We come here from the Python syntax above that handles both
+        references (?P=name) and recursion (?P>name), as well as falling
+        through from the Perl recursion syntax (?&name). We also come here from
+        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
         .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
- 
-        NAMED_REF_OR_RECURSE: 
-        name = ++ptr; 
+
+        NAMED_REF_OR_RECURSE:
+        name = ++ptr;
         if (IS_DIGIT(*ptr))
           {
           *errorcodeptr = ERR84;   /* Group name must start with non-digit */
@@ -7300,34 +7300,34 @@ for (;; ptr++)
           }
         while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
         namelen = (int)(ptr - name);
- 
+
         /* In the pre-compile phase, do a syntax check. We used to just set
         a dummy reference number, because it was not used in the first pass.
         However, with the change of recursive back references to be atomic,
         we have to look for the number so that this state can be identified, as
         otherwise the incorrect length is computed. If it's not a backwards
         reference, the dummy number will do. */
- 
-        if (lengthptr != NULL) 
-          { 
+
+        if (lengthptr != NULL)
+          {
           named_group *ng;
           recno = 0;
 
-          if (namelen == 0) 
-            { 
-            *errorcodeptr = ERR62; 
-            goto FAILED; 
-            } 
+          if (namelen == 0)
+            {
+            *errorcodeptr = ERR62;
+            goto FAILED;
+            }
           if (*ptr != (pcre_uchar)terminator)
-            { 
-            *errorcodeptr = ERR42; 
-            goto FAILED; 
-            } 
-          if (namelen > MAX_NAME_SIZE) 
-            { 
-            *errorcodeptr = ERR48; 
-            goto FAILED; 
-            } 
+            {
+            *errorcodeptr = ERR42;
+            goto FAILED;
+            }
+          if (namelen > MAX_NAME_SIZE)
+            {
+            *errorcodeptr = ERR48;
+            goto FAILED;
+            }
 
           /* Count named back references. */
 
@@ -7393,43 +7393,43 @@ for (;; ptr++)
                 }
               }
             }
-          } 
- 
+          }
+
         /* In the real compile, search the name table. We check the name
-        first, and then check that we have reached the end of the name in the 
+        first, and then check that we have reached the end of the name in the
         table. That way, if the name is longer than any in the table, the
         comparison will fail without reading beyond the table entry. */
- 
-        else 
-          { 
-          slot = cd->name_table; 
-          for (i = 0; i < cd->names_found; i++) 
-            { 
+
+        else
+          {
+          slot = cd->name_table;
+          for (i = 0; i < cd->names_found; i++)
+            {
             if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
                 slot[IMM2_SIZE+namelen] == 0)
-              break; 
-            slot += cd->name_entry_size; 
-            } 
- 
+              break;
+            slot += cd->name_entry_size;
+            }
+
           if (i < cd->names_found)
-            { 
-            recno = GET2(slot, 0); 
-            } 
+            {
+            recno = GET2(slot, 0);
+            }
           else
-            { 
-            *errorcodeptr = ERR15; 
-            goto FAILED; 
-            } 
-          } 
- 
+            {
+            *errorcodeptr = ERR15;
+            goto FAILED;
+            }
+          }
+
         /* In both phases, for recursions, we can now go to the code than
         handles numerical recursion. */
- 
-        if (is_recurse) goto HANDLE_RECURSION; 
- 
+
+        if (is_recurse) goto HANDLE_RECURSION;
+
         /* In the second pass we must see if the name is duplicated. If so, we
         generate a different opcode. */
- 
+
         if (lengthptr == NULL && cd->dupnames)
           {
           int count = 1;
@@ -7484,7 +7484,7 @@ for (;; ptr++)
         goto HANDLE_REFERENCE;
 
 
-        /* ------------------------------------------------------------ */ 
+        /* ------------------------------------------------------------ */
         case CHAR_R:              /* Recursion, same as (?0) */
         recno = 0;
         if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
@@ -7493,16 +7493,16 @@ for (;; ptr++)
           goto FAILED;
           }
         goto HANDLE_RECURSION;
- 
- 
-        /* ------------------------------------------------------------ */ 
+
+
+        /* ------------------------------------------------------------ */
         case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
         case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
         case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
-          { 
+          {
           const pcre_uchar *called;
           terminator = CHAR_RIGHT_PARENTHESIS;
- 
+
           /* Come here from the \g<...> and \g'...' code (Oniguruma
           compatibility). However, the syntax has been checked to ensure that
           the ... are a (signed) number, so that neither ERR63 nor ERR29 will
@@ -7512,22 +7512,22 @@ for (;; ptr++)
           HANDLE_NUMERICAL_RECURSION:
 
           if ((refsign = *ptr) == CHAR_PLUS)
-            { 
-            ptr++; 
+            {
+            ptr++;
             if (!IS_DIGIT(*ptr))
-              { 
-              *errorcodeptr = ERR63; 
-              goto FAILED; 
-              } 
-            } 
+              {
+              *errorcodeptr = ERR63;
+              goto FAILED;
+              }
+            }
           else if (refsign == CHAR_MINUS)
-            { 
+            {
             if (!IS_DIGIT(ptr[1]))
-              goto OTHER_CHAR_AFTER_QUERY; 
-            ptr++; 
-            } 
- 
-          recno = 0; 
+              goto OTHER_CHAR_AFTER_QUERY;
+            ptr++;
+            }
+
+          recno = 0;
           while(IS_DIGIT(*ptr))
             {
             if (recno > INT_MAX / 10 - 1) /* Integer overflow */
@@ -7538,73 +7538,73 @@ for (;; ptr++)
               }
             recno = recno * 10 + *ptr++ - CHAR_0;
             }
- 
+
           if (*ptr != (pcre_uchar)terminator)
-            { 
-            *errorcodeptr = ERR29; 
-            goto FAILED; 
-            } 
- 
+            {
+            *errorcodeptr = ERR29;
+            goto FAILED;
+            }
+
           if (refsign == CHAR_MINUS)
-            { 
-            if (recno == 0) 
-              { 
-              *errorcodeptr = ERR58; 
-              goto FAILED; 
-              } 
-            recno = cd->bracount - recno + 1; 
-            if (recno <= 0) 
-              { 
-              *errorcodeptr = ERR15; 
-              goto FAILED; 
-              } 
-            } 
+            {
+            if (recno == 0)
+              {
+              *errorcodeptr = ERR58;
+              goto FAILED;
+              }
+            recno = cd->bracount - recno + 1;
+            if (recno <= 0)
+              {
+              *errorcodeptr = ERR15;
+              goto FAILED;
+              }
+            }
           else if (refsign == CHAR_PLUS)
-            { 
-            if (recno == 0) 
-              { 
-              *errorcodeptr = ERR58; 
-              goto FAILED; 
-              } 
-            recno += cd->bracount; 
-            } 
- 
-          /* Come here from code above that handles a named recursion */ 
- 
-          HANDLE_RECURSION: 
- 
-          previous = code; 
+            {
+            if (recno == 0)
+              {
+              *errorcodeptr = ERR58;
+              goto FAILED;
+              }
+            recno += cd->bracount;
+            }
+
+          /* Come here from code above that handles a named recursion */
+
+          HANDLE_RECURSION:
+
+          previous = code;
           item_hwm_offset = cd->hwm - cd->start_workspace;
-          called = cd->start_code; 
- 
-          /* When we are actually compiling, find the bracket that is being 
-          referenced. Temporarily end the regex in case it doesn't exist before 
-          this point. If we end up with a forward reference, first check that 
-          the bracket does occur later so we can give the error (and position) 
-          now. Then remember this forward reference in the workspace so it can 
-          be filled in at the end. */ 
- 
-          if (lengthptr == NULL) 
-            { 
-            *code = OP_END; 
+          called = cd->start_code;
+
+          /* When we are actually compiling, find the bracket that is being
+          referenced. Temporarily end the regex in case it doesn't exist before
+          this point. If we end up with a forward reference, first check that
+          the bracket does occur later so we can give the error (and position)
+          now. Then remember this forward reference in the workspace so it can
+          be filled in at the end. */
+
+          if (lengthptr == NULL)
+            {
+            *code = OP_END;
             if (recno != 0)
               called = PRIV(find_bracket)(cd->start_code, utf, recno);
- 
-            /* Forward reference */ 
- 
-            if (called == NULL) 
-              { 
+
+            /* Forward reference */
+
+            if (called == NULL)
+              {
               if (recno > cd->final_bracount)
-                { 
-                *errorcodeptr = ERR15; 
-                goto FAILED; 
-                } 
+                {
+                *errorcodeptr = ERR15;
+                goto FAILED;
+                }
 
               /* Fudge the value of "called" so that when it is inserted as an
               offset below, what it actually inserted is the reference number
               of the group. Then remember the forward reference. */
 
-              called = cd->start_code + recno; 
+              called = cd->start_code + recno;
               if (cd->hwm >= cd->start_workspace + cd->workspace_size -
                   WORK_SIZE_SAFETY_MARGIN)
                 {
@@ -7612,128 +7612,128 @@ for (;; ptr++)
                 if (*errorcodeptr != 0) goto FAILED;
                 }
               PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
-              } 
- 
-            /* If not a forward reference, and the subpattern is still open, 
-            this is a recursive call. We check to see if this is a left 
+              }
+
+            /* If not a forward reference, and the subpattern is still open,
+            this is a recursive call. We check to see if this is a left
             recursion that could loop for ever, and diagnose that case. We
             must not, however, do this check if we are in a conditional
             subpattern because the condition might be testing for recursion in
             a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid.
             Forever loops are also detected at runtime, so those that occur in
             conditional subpatterns will be picked up then. */
- 
+
             else if (GET(called, 1) == 0 && cond_depth <= 0 &&
                      could_be_empty(called, code, bcptr, utf, cd))
-              { 
-              *errorcodeptr = ERR40; 
-              goto FAILED; 
-              } 
-            } 
- 
+              {
+              *errorcodeptr = ERR40;
+              goto FAILED;
+              }
+            }
+
           /* Insert the recursion/subroutine item. It does not have a set first
           character (relevant if it is repeated, because it will then be
           wrapped with ONCE brackets). */
- 
-          *code = OP_RECURSE; 
+
+          *code = OP_RECURSE;
           PUT(code, 1, (int)(called - cd->start_code));
-          code += 1 + LINK_SIZE; 
+          code += 1 + LINK_SIZE;
           groupsetfirstchar = FALSE;
-          } 
- 
-        /* Can't determine a first byte now */ 
- 
+          }
+
+        /* Can't determine a first byte now */
+
         if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
         zerofirstchar = firstchar;
         zerofirstcharflags = firstcharflags;
-        continue; 
- 
- 
-        /* ------------------------------------------------------------ */ 
-        default:              /* Other characters: check option setting */ 
-        OTHER_CHAR_AFTER_QUERY: 
-        set = unset = 0; 
-        optset = &set; 
- 
+        continue;
+
+
+        /* ------------------------------------------------------------ */
+        default:              /* Other characters: check option setting */
+        OTHER_CHAR_AFTER_QUERY:
+        set = unset = 0;
+        optset = &set;
+
         while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
-          { 
-          switch (*ptr++) 
-            { 
+          {
+          switch (*ptr++)
+            {
             case CHAR_MINUS: optset = &unset; break;
- 
+
             case CHAR_J:    /* Record that it changed in the external options */
-            *optset |= PCRE_DUPNAMES; 
-            cd->external_flags |= PCRE_JCHANGED; 
-            break; 
- 
+            *optset |= PCRE_DUPNAMES;
+            cd->external_flags |= PCRE_JCHANGED;
+            break;
+
             case CHAR_i: *optset |= PCRE_CASELESS; break;
             case CHAR_m: *optset |= PCRE_MULTILINE; break;
             case CHAR_s: *optset |= PCRE_DOTALL; break;
             case CHAR_x: *optset |= PCRE_EXTENDED; break;
             case CHAR_U: *optset |= PCRE_UNGREEDY; break;
             case CHAR_X: *optset |= PCRE_EXTRA; break;
- 
-            default:  *errorcodeptr = ERR12; 
-                      ptr--;    /* Correct the offset */ 
-                      goto FAILED; 
-            } 
-          } 
- 
-        /* Set up the changed option bits, but don't change anything yet. */ 
- 
-        newoptions = (options | set) & (~unset); 
- 
-        /* If the options ended with ')' this is not the start of a nested 
+
+            default:  *errorcodeptr = ERR12;
+                      ptr--;    /* Correct the offset */
+                      goto FAILED;
+            }
+          }
+
+        /* Set up the changed option bits, but don't change anything yet. */
+
+        newoptions = (options | set) & (~unset);
+
+        /* If the options ended with ')' this is not the start of a nested
         group with option changes, so the options change at this level.
         If we are not at the pattern start, reset the greedy defaults and the
         case value for firstchar and reqchar. */
- 
+
         if (*ptr == CHAR_RIGHT_PARENTHESIS)
-          { 
+          {
           greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
           greedy_non_default = greedy_default ^ 1;
           req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
- 
+
           /* Change options at this level, and pass them back for use
           in subsequent branches. */
- 
+
           *optionsptr = options = newoptions;
-          previous = NULL;       /* This item can't be repeated */ 
-          continue;              /* It is complete */ 
-          } 
- 
-        /* If the options ended with ':' we are heading into a nested group 
-        with possible change of options. Such groups are non-capturing and are 
-        not assertions of any kind. All we need to do is skip over the ':'; 
-        the newoptions value is handled below. */ 
- 
-        bravalue = OP_BRA; 
-        ptr++; 
-        }     /* End of switch for character following (? */ 
-      }       /* End of (? handling */ 
- 
+          previous = NULL;       /* This item can't be repeated */
+          continue;              /* It is complete */
+          }
+
+        /* If the options ended with ':' we are heading into a nested group
+        with possible change of options. Such groups are non-capturing and are
+        not assertions of any kind. All we need to do is skip over the ':';
+        the newoptions value is handled below. */
+
+        bravalue = OP_BRA;
+        ptr++;
+        }     /* End of switch for character following (? */
+      }       /* End of (? handling */
+
     /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE
     is set, all unadorned brackets become non-capturing and behave like (?:...)
-    brackets. */ 
- 
-    else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) 
-      { 
-      bravalue = OP_BRA; 
-      } 
- 
-    /* Else we have a capturing group. */ 
- 
-    else 
-      { 
-      NUMBERED_GROUP: 
-      cd->bracount += 1; 
-      PUT2(code, 1+LINK_SIZE, cd->bracount); 
+    brackets. */
+
+    else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
+      {
+      bravalue = OP_BRA;
+      }
+
+    /* Else we have a capturing group. */
+
+    else
+      {
+      NUMBERED_GROUP:
+      cd->bracount += 1;
+      PUT2(code, 1+LINK_SIZE, cd->bracount);
       skipbytes = IMM2_SIZE;
-      } 
- 
+      }
+
     /* Process nested bracketed regex. First check for parentheses nested too
     deeply. */
- 
+
     if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)
       {
       *errorcodeptr = ERR82;
@@ -7760,19 +7760,19 @@ for (;; ptr++)
       item_hwm_offset = cd->hwm - cd->start_workspace;
       }
 
-    *code = bravalue; 
-    tempcode = code; 
+    *code = bravalue;
+    tempcode = code;
     tempreqvary = cd->req_varyopt;        /* Save value before bracket */
     tempbracount = cd->bracount;          /* Save value before bracket */
     length_prevgroup = 0;                 /* Initialize for pre-compile phase */
- 
-    if (!compile_regex( 
+
+    if (!compile_regex(
          newoptions,                      /* The complete new option state */
          &tempcode,                       /* Where to put code (updated) */
          &ptr,                            /* Input pointer (updated) */
          errorcodeptr,                    /* Where to put an error message */
-         (bravalue == OP_ASSERTBACK || 
-          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ 
+         (bravalue == OP_ASSERTBACK ||
+          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
          reset_bracount,                  /* True if (?| group */
          skipbytes,                       /* Skip over bracket number */
          cond_depth +
@@ -7785,9 +7785,9 @@ for (;; ptr++)
          cd,                              /* Tables block */
          (lengthptr == NULL)? NULL :      /* Actual compile phase */
            &length_prevgroup              /* Pre-compile phase */
-         )) 
-      goto FAILED; 
- 
+         ))
+      goto FAILED;
+
     cd->parens_depth -= 1;
 
     /* If this was an atomic group and there are no capturing groups within it,
@@ -7799,144 +7799,144 @@ for (;; ptr++)
     if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
       cd->assert_depth -= 1;
 
-    /* At the end of compiling, code is still pointing to the start of the 
+    /* At the end of compiling, code is still pointing to the start of the
     group, while tempcode has been updated to point past the end of the group.
     The pattern pointer (ptr) is on the bracket.
- 
+
     If this is a conditional bracket, check that there are no more than
-    two branches in the group, or just one if it's a DEFINE group. We do this 
-    in the real compile phase, not in the pre-pass, where the whole group may 
-    not be available. */ 
- 
-    if (bravalue == OP_COND && lengthptr == NULL) 
-      { 
+    two branches in the group, or just one if it's a DEFINE group. We do this
+    in the real compile phase, not in the pre-pass, where the whole group may
+    not be available. */
+
+    if (bravalue == OP_COND && lengthptr == NULL)
+      {
       pcre_uchar *tc = code;
-      int condcount = 0; 
- 
-      do { 
-         condcount++; 
-         tc += GET(tc,1); 
-         } 
-      while (*tc != OP_KET); 
- 
-      /* A DEFINE group is never obeyed inline (the "condition" is always 
-      false). It must have only one branch. */ 
- 
-      if (code[LINK_SIZE+1] == OP_DEF) 
-        { 
-        if (condcount > 1) 
-          { 
-          *errorcodeptr = ERR54; 
-          goto FAILED; 
-          } 
-        bravalue = OP_DEF;   /* Just a flag to suppress char handling below */ 
-        } 
- 
-      /* A "normal" conditional group. If there is just one branch, we must not 
+      int condcount = 0;
+
+      do {
+         condcount++;
+         tc += GET(tc,1);
+         }
+      while (*tc != OP_KET);
+
+      /* A DEFINE group is never obeyed inline (the "condition" is always
+      false). It must have only one branch. */
+
+      if (code[LINK_SIZE+1] == OP_DEF)
+        {
+        if (condcount > 1)
+          {
+          *errorcodeptr = ERR54;
+          goto FAILED;
+          }
+        bravalue = OP_DEF;   /* Just a flag to suppress char handling below */
+        }
+
+      /* A "normal" conditional group. If there is just one branch, we must not
       make use of its firstchar or reqchar, because this is equivalent to an
-      empty second branch. */ 
- 
-      else 
-        { 
-        if (condcount > 2) 
-          { 
-          *errorcodeptr = ERR27; 
-          goto FAILED; 
-          } 
+      empty second branch. */
+
+      else
+        {
+        if (condcount > 2)
+          {
+          *errorcodeptr = ERR27;
+          goto FAILED;
+          }
         if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE;
-        } 
-      } 
- 
-    /* Error if hit end of pattern */ 
- 
+        }
+      }
+
+    /* Error if hit end of pattern */
+
     if (*ptr != CHAR_RIGHT_PARENTHESIS)
-      { 
-      *errorcodeptr = ERR14; 
-      goto FAILED; 
-      } 
- 
-    /* In the pre-compile phase, update the length by the length of the group, 
-    less the brackets at either end. Then reduce the compiled code to just a 
-    set of non-capturing brackets so that it doesn't use much memory if it is 
-    duplicated by a quantifier.*/ 
- 
-    if (lengthptr != NULL) 
-      { 
-      if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) 
-        { 
-        *errorcodeptr = ERR20; 
-        goto FAILED; 
-        } 
-      *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; 
+      {
+      *errorcodeptr = ERR14;
+      goto FAILED;
+      }
+
+    /* In the pre-compile phase, update the length by the length of the group,
+    less the brackets at either end. Then reduce the compiled code to just a
+    set of non-capturing brackets so that it doesn't use much memory if it is
+    duplicated by a quantifier.*/
+
+    if (lengthptr != NULL)
+      {
+      if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
+        {
+        *errorcodeptr = ERR20;
+        goto FAILED;
+        }
+      *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
       code++;   /* This already contains bravalue */
-      PUTINC(code, 0, 1 + LINK_SIZE); 
-      *code++ = OP_KET; 
-      PUTINC(code, 0, 1 + LINK_SIZE); 
-      break;    /* No need to waste time with special character handling */ 
-      } 
- 
-    /* Otherwise update the main code pointer to the end of the group. */ 
- 
-    code = tempcode; 
- 
-    /* For a DEFINE group, required and first character settings are not 
-    relevant. */ 
- 
-    if (bravalue == OP_DEF) break; 
- 
-    /* Handle updating of the required and first characters for other types of 
-    group. Update for normal brackets of all kinds, and conditions with two 
-    branches (see code above). If the bracket is followed by a quantifier with 
+      PUTINC(code, 0, 1 + LINK_SIZE);
+      *code++ = OP_KET;
+      PUTINC(code, 0, 1 + LINK_SIZE);
+      break;    /* No need to waste time with special character handling */
+      }
+
+    /* Otherwise update the main code pointer to the end of the group. */
+
+    code = tempcode;
+
+    /* For a DEFINE group, required and first character settings are not
+    relevant. */
+
+    if (bravalue == OP_DEF) break;
+
+    /* Handle updating of the required and first characters for other types of
+    group. Update for normal brackets of all kinds, and conditions with two
+    branches (see code above). If the bracket is followed by a quantifier with
     zero repeat, we have to back off. Hence the definition of zeroreqchar and
     zerofirstchar outside the main loop so that they can be accessed for the
-    back off. */ 
- 
+    back off. */
+
     zeroreqchar = reqchar;
     zeroreqcharflags = reqcharflags;
     zerofirstchar = firstchar;
     zerofirstcharflags = firstcharflags;
     groupsetfirstchar = FALSE;
- 
-    if (bravalue >= OP_ONCE) 
-      { 
+
+    if (bravalue >= OP_ONCE)
+      {
       /* If we have not yet set a firstchar in this branch, take it from the
-      subpattern, remembering that it was set here so that a repeat of more 
+      subpattern, remembering that it was set here so that a repeat of more
       than one can replicate it as reqchar if necessary. If the subpattern has
       no firstchar, set "none" for the whole branch. In both cases, a zero
       repeat forces firstchar to "none". */
- 
+
       if (firstcharflags == REQ_UNSET)
-        { 
+        {
         if (subfirstcharflags >= 0)
-          { 
+          {
           firstchar = subfirstchar;
           firstcharflags = subfirstcharflags;
           groupsetfirstchar = TRUE;
-          } 
+          }
         else firstcharflags = REQ_NONE;
         zerofirstcharflags = REQ_NONE;
-        } 
- 
+        }
+
       /* If firstchar was previously set, convert the subpattern's firstchar
       into reqchar if there wasn't one, using the vary flag that was in
-      existence beforehand. */ 
- 
+      existence beforehand. */
+
       else if (subfirstcharflags >= 0 && subreqcharflags < 0)
         {
         subreqchar = subfirstchar;
         subreqcharflags = subfirstcharflags | tempreqvary;
         }
- 
-      /* If the subpattern set a required byte (or set a first byte that isn't 
-      really the first byte - see above), set it. */ 
- 
+
+      /* If the subpattern set a required byte (or set a first byte that isn't
+      really the first byte - see above), set it. */
+
       if (subreqcharflags >= 0)
         {
         reqchar = subreqchar;
         reqcharflags = subreqcharflags;
         }
-      } 
- 
+      }
+
     /* For a forward assertion, we take the reqchar, if set, provided that the
     group has also set a first char. This can be helpful if the pattern that
     follows the assertion doesn't set a different char. For example, it's
@@ -7945,47 +7945,47 @@ for (;; ptr++)
     the "real" "a" would then become a reqchar instead of a firstchar. This is
     overcome by a scan at the end if there's no firstchar, looking for an
     asserted first char. */
- 
+
     else if (bravalue == OP_ASSERT && subreqcharflags >= 0 &&
              subfirstcharflags >= 0)
       {
       reqchar = subreqchar;
       reqcharflags = subreqcharflags;
       }
-    break;     /* End of processing '(' */ 
- 
- 
-    /* ===================================================================*/ 
-    /* Handle metasequences introduced by \. For ones like \d, the ESC_ values 
+    break;     /* End of processing '(' */
+
+
+    /* ===================================================================*/
+    /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
     are arranged to be the negation of the corresponding OP_values in the
     default case when PCRE_UCP is not set. For the back references, the values
     are negative the reference number. Only back references and those types
     that consume a character may be repeated. We can test for values between
     ESC_b and ESC_Z for the latter; this may have to change if any new ones are
     ever created. */
- 
+
     case CHAR_BACKSLASH:
-    tempptr = ptr; 
+    tempptr = ptr;
     escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
-    if (*errorcodeptr != 0) goto FAILED; 
- 
+    if (*errorcodeptr != 0) goto FAILED;
+
     if (escape == 0)                  /* The escape coded a single character */
       c = ec;
     else
-      { 
-      /* For metasequences that actually match a character, we disable the 
-      setting of a first character if it hasn't already been set. */ 
- 
+      {
+      /* For metasequences that actually match a character, we disable the
+      setting of a first character if it hasn't already been set. */
+
       if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
         firstcharflags = REQ_NONE;
- 
-      /* Set values to reset to if this is followed by a zero repeat. */ 
- 
+
+      /* Set values to reset to if this is followed by a zero repeat. */
+
       zerofirstchar = firstchar;
       zerofirstcharflags = firstcharflags;
       zeroreqchar = reqchar;
       zeroreqcharflags = reqcharflags;
- 
+
       /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
       is a subroutine call by number (Oniguruma syntax). In fact, the value
       ESC_g is returned only for these cases. So we don't need to check for <
@@ -8033,44 +8033,44 @@ for (;; ptr++)
         goto HANDLE_NUMERICAL_RECURSION;
         }
 
-      /* \k<name> or \k'name' is a back reference by name (Perl syntax). 
+      /* \k<name> or \k'name' is a back reference by name (Perl syntax).
       We also support \k{name} (.NET syntax).  */
- 
+
       if (escape == ESC_k)
-        { 
+        {
         if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
           ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
           {
           *errorcodeptr = ERR69;
           goto FAILED;
           }
-        is_recurse = FALSE; 
+        is_recurse = FALSE;
         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
           CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
           CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
-        goto NAMED_REF_OR_RECURSE; 
-        } 
- 
+        goto NAMED_REF_OR_RECURSE;
+        }
+
       /* Back references are handled specially; must disable firstchar if
-      not set to cope with cases like (?=(\w+))\1: which would otherwise set 
-      ':' later. */ 
- 
+      not set to cope with cases like (?=(\w+))\1: which would otherwise set
+      ':' later. */
+
       if (escape < 0)
-        { 
+        {
         open_capitem *oc;
         recno = -escape;
- 
+
         /* Come here from named backref handling when the reference is to a
         single group (i.e. not to a duplicated name. */
 
         HANDLE_REFERENCE:
         if (firstcharflags == REQ_UNSET) zerofirstcharflags = firstcharflags = REQ_NONE;
-        previous = code; 
+        previous = code;
         item_hwm_offset = cd->hwm - cd->start_workspace;
         *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
-        PUT2INC(code, 0, recno); 
+        PUT2INC(code, 0, recno);
         cd->backref_map |= (recno < 32)? (1U << recno) : 1;
-        if (recno > cd->top_backref) cd->top_backref = recno; 
+        if (recno > cd->top_backref) cd->top_backref = recno;
 
         /* Check to see if this back reference is recursive, that it, it
         is inside the group that it references. A flag is set so that the
@@ -8084,43 +8084,43 @@ for (;; ptr++)
             break;
             }
           }
-        } 
- 
-      /* So are Unicode property matches, if supported. */ 
- 
-#ifdef SUPPORT_UCP 
+        }
+
+      /* So are Unicode property matches, if supported. */
+
+#ifdef SUPPORT_UCP
       else if (escape == ESC_P || escape == ESC_p)
-        { 
-        BOOL negated; 
+        {
+        BOOL negated;
         unsigned int ptype = 0, pdata = 0;
         if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
           goto FAILED;
-        previous = code; 
+        previous = code;
         item_hwm_offset = cd->hwm - cd->start_workspace;
         *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
-        *code++ = ptype; 
-        *code++ = pdata; 
-        } 
-#else 
- 
-      /* If Unicode properties are not supported, \X, \P, and \p are not 
-      allowed. */ 
- 
+        *code++ = ptype;
+        *code++ = pdata;
+        }
+#else
+
+      /* If Unicode properties are not supported, \X, \P, and \p are not
+      allowed. */
+
       else if (escape == ESC_X || escape == ESC_P || escape == ESC_p)
-        { 
-        *errorcodeptr = ERR45; 
-        goto FAILED; 
-        } 
-#endif 
- 
-      /* For the rest (including \X when Unicode properties are supported), we 
+        {
+        *errorcodeptr = ERR45;
+        goto FAILED;
+        }
+#endif
+
+      /* For the rest (including \X when Unicode properties are supported), we
       can obtain the OP value by negating the escape value in the default
       situation when PCRE_UCP is not set. When it *is* set, we substitute
       Unicode property tests. Note that \b and \B do a one-character
       lookbehind, and \A also behaves as if it does. */
- 
-      else 
-        { 
+
+      else
+        {
         if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
              cd->max_lookbehind == 0)
           cd->max_lookbehind = 1;
@@ -8140,47 +8140,47 @@ for (;; ptr++)
           item_hwm_offset = cd->hwm - cd->start_workspace;
           *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
           }
-        } 
-      continue; 
-      } 
- 
-    /* We have a data character whose value is in c. In UTF-8 mode it may have 
-    a value > 127. We set its representation in the length/buffer, and then 
-    handle it as a data character. */ 
- 
+        }
+      continue;
+      }
+
+    /* We have a data character whose value is in c. In UTF-8 mode it may have
+    a value > 127. We set its representation in the length/buffer, and then
+    handle it as a data character. */
+
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
     if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
       mclength = PRIV(ord2utf)(c, mcbuffer);
-    else 
-#endif 
- 
-     { 
-     mcbuffer[0] = c; 
-     mclength = 1; 
-     } 
-    goto ONE_CHAR; 
- 
- 
-    /* ===================================================================*/ 
-    /* Handle a literal character. It is guaranteed not to be whitespace or # 
+    else
+#endif
+
+     {
+     mcbuffer[0] = c;
+     mclength = 1;
+     }
+    goto ONE_CHAR;
+
+
+    /* ===================================================================*/
+    /* Handle a literal character. It is guaranteed not to be whitespace or #
     when the extended flag is set. If we are in a UTF mode, it may be a
     multi-unit literal character. */
- 
-    default: 
-    NORMAL_CHAR: 
-    mclength = 1; 
-    mcbuffer[0] = c; 
- 
+
+    default:
+    NORMAL_CHAR:
+    mclength = 1;
+    mcbuffer[0] = c;
+
 #ifdef SUPPORT_UTF
     if (utf && HAS_EXTRALEN(c))
       ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
-#endif 
- 
-    /* At this point we have the character's bytes in mcbuffer, and the length 
-    in mclength. When not in UTF-8 mode, the length is always 1. */ 
- 
-    ONE_CHAR: 
-    previous = code; 
+#endif
+
+    /* At this point we have the character's bytes in mcbuffer, and the length
+    in mclength. When not in UTF-8 mode, the length is always 1. */
+
+    ONE_CHAR:
+    previous = code;
     item_hwm_offset = cd->hwm - cd->start_workspace;
 
     /* For caseless UTF-8 mode when UCP support is available, check whether
@@ -8206,29 +8206,29 @@ for (;; ptr++)
     /* Caseful matches, or not one of the multicase characters. */
 
     *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;
-    for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; 
- 
-    /* Remember if \r or \n were seen */ 
- 
+    for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
+
+    /* Remember if \r or \n were seen */
+
     if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
-      cd->external_flags |= PCRE_HASCRORLF; 
- 
-    /* Set the first and required bytes appropriately. If no previous first 
-    byte, set it from this character, but revert to none on a zero repeat. 
+      cd->external_flags |= PCRE_HASCRORLF;
+
+    /* Set the first and required bytes appropriately. If no previous first
+    byte, set it from this character, but revert to none on a zero repeat.
     Otherwise, leave the firstchar value alone, and don't change it on a zero
-    repeat. */ 
- 
+    repeat. */
+
     if (firstcharflags == REQ_UNSET)
-      { 
+      {
       zerofirstcharflags = REQ_NONE;
       zeroreqchar = reqchar;
       zeroreqcharflags = reqcharflags;
- 
+
       /* If the character is more than one byte long, we can set firstchar
-      only if it is not to be matched caselessly. */ 
- 
-      if (mclength == 1 || req_caseopt == 0) 
-        { 
+      only if it is not to be matched caselessly. */
+
+      if (mclength == 1 || req_caseopt == 0)
+        {
         firstchar = mcbuffer[0];
         firstcharflags = req_caseopt;
 
@@ -8237,54 +8237,54 @@ for (;; ptr++)
           reqchar = code[-1];
           reqcharflags = cd->req_varyopt;
           }
-        } 
+        }
       else firstcharflags = reqcharflags = REQ_NONE;
-      } 
- 
+      }
+
     /* firstchar was previously set; we can set reqchar only if the length is
-    1 or the matching is caseful. */ 
- 
-    else 
-      { 
+    1 or the matching is caseful. */
+
+    else
+      {
       zerofirstchar = firstchar;
       zerofirstcharflags = firstcharflags;
       zeroreqchar = reqchar;
       zeroreqcharflags = reqcharflags;
-      if (mclength == 1 || req_caseopt == 0) 
+      if (mclength == 1 || req_caseopt == 0)
         {
         reqchar = code[-1];
         reqcharflags = req_caseopt | cd->req_varyopt;
         }
-      } 
- 
-    break;            /* End of literal character handling */ 
-    } 
-  }                   /* end of big loop */ 
- 
- 
-/* Control never reaches here by falling through, only by a goto for all the 
-error states. Pass back the position in the pattern so that it can be displayed 
-to the user for diagnosing the error. */ 
- 
-FAILED: 
-*ptrptr = ptr; 
-return FALSE; 
-} 
- 
- 
- 
-/************************************************* 
-*     Compile sequence of alternatives           * 
-*************************************************/ 
- 
-/* On entry, ptr is pointing past the bracket character, but on return it 
-points to the closing bracket, or vertical bar, or end of string. The code 
-variable is pointing at the byte into which the BRA operator has been stored. 
-This function is used during the pre-compile phase when we are trying to find 
-out the amount of memory needed, as well as during the real compile phase. The 
-value of lengthptr distinguishes the two phases. 
- 
-Arguments: 
+      }
+
+    break;            /* End of literal character handling */
+    }
+  }                   /* end of big loop */
+
+
+/* Control never reaches here by falling through, only by a goto for all the
+error states. Pass back the position in the pattern so that it can be displayed
+to the user for diagnosing the error. */
+
+FAILED:
+*ptrptr = ptr;
+return FALSE;
+}
+
+
+
+/*************************************************
+*     Compile sequence of alternatives           *
+*************************************************/
+
+/* On entry, ptr is pointing past the bracket character, but on return it
+points to the closing bracket, or vertical bar, or end of string. The code
+variable is pointing at the byte into which the BRA operator has been stored.
+This function is used during the pre-compile phase when we are trying to find
+out the amount of memory needed, as well as during the real compile phase. The
+value of lengthptr distinguishes the two phases.
+
+Arguments:
   options           option bits, including any changes for this subpattern
   codeptr           -> the address of the current code pointer
   ptrptr            -> the address of the current pattern pointer
@@ -8301,18 +8301,18 @@ Arguments:
   cd                points to the data block with tables pointers etc.
   lengthptr         NULL during the real compile phase
                     points to length accumulator during pre-compile phase
- 
+
 Returns:            TRUE on success
-*/ 
- 
-static BOOL 
+*/
+
+static BOOL
 compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
-  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, 
+  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
   int cond_depth,
   pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
   pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
   branch_chain *bcptr, compile_data *cd, int *lengthptr)
-{ 
+{
 const pcre_uchar *ptr = *ptrptr;
 pcre_uchar *code = *codeptr;
 pcre_uchar *last_branch = code;
@@ -8324,12 +8324,12 @@ pcre_uint32 firstchar, reqchar;
 pcre_int32 firstcharflags, reqcharflags;
 pcre_uint32 branchfirstchar, branchreqchar;
 pcre_int32 branchfirstcharflags, branchreqcharflags;
-int length; 
+int length;
 unsigned int orig_bracount;
 unsigned int max_bracount;
-branch_chain bc; 
+branch_chain bc;
 size_t save_hwm_offset;
- 
+
 /* If set, call the external function that checks for stack availability. */
 
 if (PUBL(stack_guard) != NULL && PUBL(stack_guard)())
@@ -8340,28 +8340,28 @@ if (PUBL(stack_guard) != NULL && PUBL(stack_guard)())
 
 /* Miscellaneous initialization */
 
-bc.outer = bcptr; 
+bc.outer = bcptr;
 bc.current_branch = code;
- 
+
 firstchar = reqchar = 0;
 firstcharflags = reqcharflags = REQ_UNSET;
- 
+
 save_hwm_offset = cd->hwm - cd->start_workspace;
 
-/* Accumulate the length for use in the pre-compile phase. Start with the 
-length of the BRA and KET and any extra bytes that are required at the 
-beginning. We accumulate in a local variable to save frequent testing of 
-lenthptr for NULL. We cannot do this by looking at the value of code at the 
-start and end of each alternative, because compiled items are discarded during 
-the pre-compile phase so that the work space is not exceeded. */ 
- 
-length = 2 + 2*LINK_SIZE + skipbytes; 
- 
-/* WARNING: If the above line is changed for any reason, you must also change 
-the code that abstracts option settings at the start of the pattern and makes 
-them global. It tests the value of length for (2 + 2*LINK_SIZE) in the 
-pre-compile phase to find out whether anything has yet been compiled or not. */ 
- 
+/* Accumulate the length for use in the pre-compile phase. Start with the
+length of the BRA and KET and any extra bytes that are required at the
+beginning. We accumulate in a local variable to save frequent testing of
+lenthptr for NULL. We cannot do this by looking at the value of code at the
+start and end of each alternative, because compiled items are discarded during
+the pre-compile phase so that the work space is not exceeded. */
+
+length = 2 + 2*LINK_SIZE + skipbytes;
+
+/* WARNING: If the above line is changed for any reason, you must also change
+the code that abstracts option settings at the start of the pattern and makes
+them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
+pre-compile phase to find out whether anything has yet been compiled or not. */
+
 /* If this is a capturing subpattern, add to the chain of open capturing items
 so that we can detect them if (*ACCEPT) is encountered. This is also used to
 detect groups that contain recursive back references to themselves. Note that
@@ -8377,95 +8377,95 @@ if (*code == OP_CBRA)
   cd->open_caps = &capitem;
   }
 
-/* Offset is set zero to mark that this bracket is still open */ 
- 
-PUT(code, 1, 0); 
-code += 1 + LINK_SIZE + skipbytes; 
- 
-/* Loop for each alternative branch */ 
- 
-orig_bracount = max_bracount = cd->bracount; 
-for (;;) 
-  { 
-  /* For a (?| group, reset the capturing bracket count so that each branch 
-  uses the same numbers. */ 
- 
-  if (reset_bracount) cd->bracount = orig_bracount; 
- 
-  /* Set up dummy OP_REVERSE if lookbehind assertion */ 
- 
-  if (lookbehind) 
-    { 
-    *code++ = OP_REVERSE; 
-    reverse_count = code; 
-    PUTINC(code, 0, 0); 
-    length += 1 + LINK_SIZE; 
-    } 
- 
-  /* Now compile the branch; in the pre-compile phase its length gets added 
-  into the length. */ 
- 
+/* Offset is set zero to mark that this bracket is still open */
+
+PUT(code, 1, 0);
+code += 1 + LINK_SIZE + skipbytes;
+
+/* Loop for each alternative branch */
+
+orig_bracount = max_bracount = cd->bracount;
+for (;;)
+  {
+  /* For a (?| group, reset the capturing bracket count so that each branch
+  uses the same numbers. */
+
+  if (reset_bracount) cd->bracount = orig_bracount;
+
+  /* Set up dummy OP_REVERSE if lookbehind assertion */
+
+  if (lookbehind)
+    {
+    *code++ = OP_REVERSE;
+    reverse_count = code;
+    PUTINC(code, 0, 0);
+    length += 1 + LINK_SIZE;
+    }
+
+  /* Now compile the branch; in the pre-compile phase its length gets added
+  into the length. */
+
   if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
         &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc,
         cond_depth, cd, (lengthptr == NULL)? NULL : &length))
-    { 
-    *ptrptr = ptr; 
-    return FALSE; 
-    } 
- 
-  /* Keep the highest bracket count in case (?| was used and some branch 
-  has fewer than the rest. */ 
- 
-  if (cd->bracount > max_bracount) max_bracount = cd->bracount; 
- 
-  /* In the real compile phase, there is some post-processing to be done. */ 
- 
-  if (lengthptr == NULL) 
-    { 
+    {
+    *ptrptr = ptr;
+    return FALSE;
+    }
+
+  /* Keep the highest bracket count in case (?| was used and some branch
+  has fewer than the rest. */
+
+  if (cd->bracount > max_bracount) max_bracount = cd->bracount;
+
+  /* In the real compile phase, there is some post-processing to be done. */
+
+  if (lengthptr == NULL)
+    {
     /* If this is the first branch, the firstchar and reqchar values for the
-    branch become the values for the regex. */ 
- 
-    if (*last_branch != OP_ALT) 
-      { 
+    branch become the values for the regex. */
+
+    if (*last_branch != OP_ALT)
+      {
       firstchar = branchfirstchar;
       firstcharflags = branchfirstcharflags;
       reqchar = branchreqchar;
       reqcharflags = branchreqcharflags;
-      } 
- 
+      }
+
     /* If this is not the first branch, the first char and reqchar have to
-    match the values from all the previous branches, except that if the 
+    match the values from all the previous branches, except that if the
     previous value for reqchar didn't have REQ_VARY set, it can still match,
-    and we set REQ_VARY for the regex. */ 
- 
-    else 
-      { 
+    and we set REQ_VARY for the regex. */
+
+    else
+      {
       /* If we previously had a firstchar, but it doesn't match the new branch,
       we have to abandon the firstchar for the regex, but if there was
       previously no reqchar, it takes on the value of the old firstchar. */
- 
+
       if (firstcharflags >= 0 &&
           (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))
-        { 
+        {
         if (reqcharflags < 0)
           {
           reqchar = firstchar;
           reqcharflags = firstcharflags;
           }
         firstcharflags = REQ_NONE;
-        } 
- 
+        }
+
       /* If we (now or from before) have no firstchar, a firstchar from the
       branch becomes a reqchar if there isn't a branch reqchar. */
- 
+
       if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0)
         {
         branchreqchar = branchfirstchar;
         branchreqcharflags = branchfirstcharflags;
         }
- 
+
       /* Now ensure that the reqchars match */
- 
+
       if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) ||
           reqchar != branchreqchar)
         reqcharflags = REQ_NONE;
@@ -8474,78 +8474,78 @@ for (;;)
         reqchar = branchreqchar;
         reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */
         }
-      } 
- 
-    /* If lookbehind, check that this branch matches a fixed-length string, and 
-    put the length into the OP_REVERSE item. Temporarily mark the end of the 
+      }
+
+    /* If lookbehind, check that this branch matches a fixed-length string, and
+    put the length into the OP_REVERSE item. Temporarily mark the end of the
     branch with OP_END. If the branch contains OP_RECURSE, the result is -3
     because there may be forward references that we can't check here. Set a
     flag to cause another lookbehind check at the end. Why not do it all at the
     end? Because common, erroneous checks are picked up here and the offset of
     the problem can be shown. */
- 
-    if (lookbehind) 
-      { 
-      int fixed_length; 
-      *code = OP_END; 
+
+    if (lookbehind)
+      {
+      int fixed_length;
+      *code = OP_END;
       fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
         FALSE, cd, NULL);
-      DPRINTF(("fixed length = %d\n", fixed_length)); 
+      DPRINTF(("fixed length = %d\n", fixed_length));
       if (fixed_length == -3)
-        { 
+        {
         cd->check_lookbehind = TRUE;
         }
       else if (fixed_length < 0)
         {
         *errorcodeptr = (fixed_length == -2)? ERR36 :
                         (fixed_length == -4)? ERR70: ERR25;
-        *ptrptr = ptr; 
-        return FALSE; 
-        } 
+        *ptrptr = ptr;
+        return FALSE;
+        }
       else
         {
         if (fixed_length > cd->max_lookbehind)
           cd->max_lookbehind = fixed_length;
         PUT(reverse_count, 0, fixed_length);
         }
-      } 
-    } 
- 
-  /* Reached end of expression, either ')' or end of pattern. In the real 
-  compile phase, go back through the alternative branches and reverse the chain 
-  of offsets, with the field in the BRA item now becoming an offset to the 
-  first alternative. If there are no alternatives, it points to the end of the 
-  group. The length in the terminating ket is always the length of the whole 
+      }
+    }
+
+  /* Reached end of expression, either ')' or end of pattern. In the real
+  compile phase, go back through the alternative branches and reverse the chain
+  of offsets, with the field in the BRA item now becoming an offset to the
+  first alternative. If there are no alternatives, it points to the end of the
+  group. The length in the terminating ket is always the length of the whole
   bracketed item. Return leaving the pointer at the terminating char. */
- 
+
   if (*ptr != CHAR_VERTICAL_LINE)
-    { 
-    if (lengthptr == NULL) 
-      { 
+    {
+    if (lengthptr == NULL)
+      {
       int branch_length = (int)(code - last_branch);
-      do 
-        { 
-        int prev_length = GET(last_branch, 1); 
-        PUT(last_branch, 1, branch_length); 
-        branch_length = prev_length; 
-        last_branch -= branch_length; 
-        } 
-      while (branch_length > 0); 
-      } 
- 
-    /* Fill in the ket */ 
- 
-    *code = OP_KET; 
+      do
+        {
+        int prev_length = GET(last_branch, 1);
+        PUT(last_branch, 1, branch_length);
+        branch_length = prev_length;
+        last_branch -= branch_length;
+        }
+      while (branch_length > 0);
+      }
+
+    /* Fill in the ket */
+
+    *code = OP_KET;
     PUT(code, 1, (int)(code - start_bracket));
-    code += 1 + LINK_SIZE; 
- 
+    code += 1 + LINK_SIZE;
+
     /* If it was a capturing subpattern, check to see if it contained any
     recursive back references. If so, we must wrap it in atomic brackets.
     Because we are moving code along, we must ensure that any pending recursive
     references are updated. In any event, remove the block from the chain. */
- 
+
     if (capnumber > 0)
-      { 
+      {
       if (cd->open_caps->flag)
         {
         *code = OP_END;
@@ -8562,139 +8562,139 @@ for (;;)
         length += 2 + 2*LINK_SIZE;
         }
       cd->open_caps = cd->open_caps->next;
-      } 
- 
-    /* Retain the highest bracket number, in case resetting was used. */ 
- 
-    cd->bracount = max_bracount; 
- 
-    /* Set values to pass back */ 
- 
-    *codeptr = code; 
-    *ptrptr = ptr; 
+      }
+
+    /* Retain the highest bracket number, in case resetting was used. */
+
+    cd->bracount = max_bracount;
+
+    /* Set values to pass back */
+
+    *codeptr = code;
+    *ptrptr = ptr;
     *firstcharptr = firstchar;
     *firstcharflagsptr = firstcharflags;
     *reqcharptr = reqchar;
     *reqcharflagsptr = reqcharflags;
-    if (lengthptr != NULL) 
-      { 
-      if (OFLOW_MAX - *lengthptr < length) 
-        { 
-        *errorcodeptr = ERR20; 
-        return FALSE; 
-        } 
-      *lengthptr += length; 
-      } 
-    return TRUE; 
-    } 
- 
-  /* Another branch follows. In the pre-compile phase, we can move the code 
-  pointer back to where it was for the start of the first branch. (That is, 
-  pretend that each branch is the only one.) 
- 
-  In the real compile phase, insert an ALT node. Its length field points back 
-  to the previous branch while the bracket remains open. At the end the chain 
-  is reversed. It's done like this so that the start of the bracket has a 
-  zero offset until it is closed, making it possible to detect recursion. */ 
- 
-  if (lengthptr != NULL) 
-    { 
-    code = *codeptr + 1 + LINK_SIZE + skipbytes; 
-    length += 1 + LINK_SIZE; 
-    } 
-  else 
-    { 
-    *code = OP_ALT; 
+    if (lengthptr != NULL)
+      {
+      if (OFLOW_MAX - *lengthptr < length)
+        {
+        *errorcodeptr = ERR20;
+        return FALSE;
+        }
+      *lengthptr += length;
+      }
+    return TRUE;
+    }
+
+  /* Another branch follows. In the pre-compile phase, we can move the code
+  pointer back to where it was for the start of the first branch. (That is,
+  pretend that each branch is the only one.)
+
+  In the real compile phase, insert an ALT node. Its length field points back
+  to the previous branch while the bracket remains open. At the end the chain
+  is reversed. It's done like this so that the start of the bracket has a
+  zero offset until it is closed, making it possible to detect recursion. */
+
+  if (lengthptr != NULL)
+    {
+    code = *codeptr + 1 + LINK_SIZE + skipbytes;
+    length += 1 + LINK_SIZE;
+    }
+  else
+    {
+    *code = OP_ALT;
     PUT(code, 1, (int)(code - last_branch));
     bc.current_branch = last_branch = code;
-    code += 1 + LINK_SIZE; 
-    } 
- 
-  ptr++; 
-  } 
-/* Control never reaches here */ 
-} 
- 
- 
- 
- 
-/************************************************* 
-*          Check for anchored expression         * 
-*************************************************/ 
- 
-/* Try to find out if this is an anchored regular expression. Consider each 
-alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket 
-all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then 
+    code += 1 + LINK_SIZE;
+    }
+
+  ptr++;
+  }
+/* Control never reaches here */
+}
+
+
+
+
+/*************************************************
+*          Check for anchored expression         *
+*************************************************/
+
+/* Try to find out if this is an anchored regular expression. Consider each
+alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
+all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
 it's anchored. However, if this is a multiline pattern, then only OP_SOD will
 be found, because ^ generates OP_CIRCM in that mode.
- 
-We can also consider a regex to be anchored if OP_SOM starts all its branches. 
-This is the code for \G, which means "match at start of match position, taking 
-into account the match offset". 
- 
-A branch is also implicitly anchored if it starts with .* and DOTALL is set, 
-because that will try the rest of the pattern at all possible matching points, 
-so there is no point trying again.... er .... 
- 
-.... except when the .* appears inside capturing parentheses, and there is a 
-subsequent back reference to those parentheses. We haven't enough information 
-to catch that case precisely. 
- 
-At first, the best we could do was to detect when .* was in capturing brackets 
-and the highest back reference was greater than or equal to that level. 
-However, by keeping a bitmap of the first 31 back references, we can catch some 
-of the more common cases more precisely. 
- 
+
+We can also consider a regex to be anchored if OP_SOM starts all its branches.
+This is the code for \G, which means "match at start of match position, taking
+into account the match offset".
+
+A branch is also implicitly anchored if it starts with .* and DOTALL is set,
+because that will try the rest of the pattern at all possible matching points,
+so there is no point trying again.... er ....
+
+.... except when the .* appears inside capturing parentheses, and there is a
+subsequent back reference to those parentheses. We haven't enough information
+to catch that case precisely.
+
+At first, the best we could do was to detect when .* was in capturing brackets
+and the highest back reference was greater than or equal to that level.
+However, by keeping a bitmap of the first 31 back references, we can catch some
+of the more common cases more precisely.
+
 ... A second exception is when the .* appears inside an atomic group, because
 this prevents the number of characters it matches from being adjusted.
 
-Arguments: 
-  code           points to start of expression (the bracket) 
-  bracket_map    a bitmap of which brackets we are inside while testing; this 
-                  handles up to substring 31; after that we just have to take 
-                  the less precise approach 
+Arguments:
+  code           points to start of expression (the bracket)
+  bracket_map    a bitmap of which brackets we are inside while testing; this
+                  handles up to substring 31; after that we just have to take
+                  the less precise approach
   cd             points to the compile data block
   atomcount      atomic group level
- 
-Returns:     TRUE or FALSE 
-*/ 
- 
-static BOOL 
+
+Returns:     TRUE or FALSE
+*/
+
+static BOOL
 is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
   compile_data *cd, int atomcount)
-{ 
-do { 
+{
+do {
    const pcre_uchar *scode = first_significant_code(
      code + PRIV(OP_lengths)[*code], FALSE);
    register int op = *scode;
- 
-   /* Non-capturing brackets */ 
- 
+
+   /* Non-capturing brackets */
+
    if (op == OP_BRA  || op == OP_BRAPOS ||
        op == OP_SBRA || op == OP_SBRAPOS)
-     { 
+     {
      if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
-     } 
- 
-   /* Capturing brackets */ 
- 
+     }
+
+   /* Capturing brackets */
+
    else if (op == OP_CBRA  || op == OP_CBRAPOS ||
             op == OP_SCBRA || op == OP_SCBRAPOS)
-     { 
-     int n = GET2(scode, 1+LINK_SIZE); 
+     {
+     int n = GET2(scode, 1+LINK_SIZE);
      int new_map = bracket_map | ((n < 32)? (1U << n) : 1);
      if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
-     } 
- 
+     }
+
    /* Positive forward assertion */
- 
+
    else if (op == OP_ASSERT)
-     { 
+     {
      if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
-     } 
- 
+     }
+
    /* Condition; not anchored if no second branch */
- 
+
    else if (op == OP_COND)
      {
      if (scode[GET(scode,1)] != OP_ALT) return FALSE;
@@ -8713,60 +8713,60 @@ do {
    it isn't in brackets that are or may be referenced or inside an atomic
    group. */
 
-   else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || 
+   else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
              op == OP_TYPEPOSSTAR))
-     { 
+     {
      if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 ||
          atomcount > 0 || cd->had_pruneorskip)
        return FALSE;
-     } 
- 
-   /* Check for explicit anchoring */ 
- 
+     }
+
+   /* Check for explicit anchoring */
+
    else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;
 
-   code += GET(code, 1); 
-   } 
-while (*code == OP_ALT);   /* Loop for each alternative */ 
-return TRUE; 
-} 
- 
- 
- 
-/************************************************* 
-*         Check for starting with ^ or .*        * 
-*************************************************/ 
- 
-/* This is called to find out if every branch starts with ^ or .* so that 
-"first char" processing can be done to speed things up in multiline 
-matching and for non-DOTALL patterns that start with .* (which must start at 
-the beginning or after \n). As in the case of is_anchored() (see above), we 
-have to take account of back references to capturing brackets that contain .* 
+   code += GET(code, 1);
+   }
+while (*code == OP_ALT);   /* Loop for each alternative */
+return TRUE;
+}
+
+
+
+/*************************************************
+*         Check for starting with ^ or .*        *
+*************************************************/
+
+/* This is called to find out if every branch starts with ^ or .* so that
+"first char" processing can be done to speed things up in multiline
+matching and for non-DOTALL patterns that start with .* (which must start at
+the beginning or after \n). As in the case of is_anchored() (see above), we
+have to take account of back references to capturing brackets that contain .*
 because in that case we can't make the assumption. Also, the appearance of .*
 inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE
 or *SKIP does not count, because once again the assumption no longer holds.
- 
-Arguments: 
-  code           points to start of expression (the bracket) 
-  bracket_map    a bitmap of which brackets we are inside while testing; this 
-                  handles up to substring 31; after that we just have to take 
-                  the less precise approach 
+
+Arguments:
+  code           points to start of expression (the bracket)
+  bracket_map    a bitmap of which brackets we are inside while testing; this
+                  handles up to substring 31; after that we just have to take
+                  the less precise approach
   cd             points to the compile data
   atomcount      atomic group level
   inassert       TRUE if in an assertion
- 
-Returns:         TRUE or FALSE 
-*/ 
- 
-static BOOL 
+
+Returns:         TRUE or FALSE
+*/
+
+static BOOL
 is_startline(const pcre_uchar *code, unsigned int bracket_map,
   compile_data *cd, int atomcount, BOOL inassert)
-{ 
-do { 
+{
+do {
    const pcre_uchar *scode = first_significant_code(
      code + PRIV(OP_lengths)[*code], FALSE);
    register int op = *scode;
- 
+
    /* If we are at the start of a conditional assertion group, *both* the
    conditional assertion *and* what follows the condition must satisfy the test
    for start of line. Other kinds of condition fail. Note that there may be an
@@ -8796,33 +8796,33 @@ do {
      op = *scode;
      }
 
-   /* Non-capturing brackets */ 
- 
+   /* Non-capturing brackets */
+
    if (op == OP_BRA  || op == OP_BRAPOS ||
        op == OP_SBRA || op == OP_SBRAPOS)
-     { 
+     {
      if (!is_startline(scode, bracket_map, cd, atomcount, inassert)) return FALSE;
-     } 
- 
-   /* Capturing brackets */ 
- 
+     }
+
+   /* Capturing brackets */
+
    else if (op == OP_CBRA  || op == OP_CBRAPOS ||
             op == OP_SCBRA || op == OP_SCBRAPOS)
-     { 
-     int n = GET2(scode, 1+LINK_SIZE); 
+     {
+     int n = GET2(scode, 1+LINK_SIZE);
      int new_map = bracket_map | ((n < 32)? (1U << n) : 1);
      if (!is_startline(scode, new_map, cd, atomcount, inassert)) return FALSE;
-     } 
- 
+     }
+
    /* Positive forward assertions */
- 
+
    else if (op == OP_ASSERT)
      {
      if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE;
      }
- 
+
    /* Atomic brackets */
- 
+
    else if (op == OP_ONCE || op == OP_ONCE_NC)
      {
      if (!is_startline(scode, bracket_map, cd, atomcount + 1, inassert)) return FALSE;
@@ -8834,60 +8834,60 @@ do {
    example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e.
    not at the start of a line. */
 
-   else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) 
-     { 
+   else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
+     {
      if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
          atomcount > 0 || cd->had_pruneorskip || inassert)
        return FALSE;
-     } 
- 
+     }
+
    /* Check for explicit circumflex; anything else gives a FALSE result. Note
    in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
    because the number of characters matched by .* cannot be adjusted inside
    them. */
- 
+
    else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
- 
-   /* Move on to the next alternative */ 
- 
-   code += GET(code, 1); 
-   } 
-while (*code == OP_ALT);  /* Loop for each alternative */ 
-return TRUE; 
-} 
- 
- 
- 
-/************************************************* 
-*       Check for asserted fixed first char      * 
-*************************************************/ 
- 
-/* During compilation, the "first char" settings from forward assertions are 
-discarded, because they can cause conflicts with actual literals that follow. 
-However, if we end up without a first char setting for an unanchored pattern, 
-it is worth scanning the regex to see if there is an initial asserted first 
+
+   /* Move on to the next alternative */
+
+   code += GET(code, 1);
+   }
+while (*code == OP_ALT);  /* Loop for each alternative */
+return TRUE;
+}
+
+
+
+/*************************************************
+*       Check for asserted fixed first char      *
+*************************************************/
+
+/* During compilation, the "first char" settings from forward assertions are
+discarded, because they can cause conflicts with actual literals that follow.
+However, if we end up without a first char setting for an unanchored pattern,
+it is worth scanning the regex to see if there is an initial asserted first
 char. If all branches start with the same asserted char, or with a
 non-conditional bracket all of whose alternatives start with the same asserted
 char (recurse ad lib), then we return that char, with the flags set to zero or
 REQ_CASELESS; otherwise return zero with REQ_NONE in the flags.
- 
-Arguments: 
-  code       points to start of expression (the bracket) 
+
+Arguments:
+  code       points to start of expression (the bracket)
   flags      points to the first char flags, or to REQ_NONE
-  inassert   TRUE if in an assertion 
- 
+  inassert   TRUE if in an assertion
+
 Returns:     the fixed first char, or 0 with REQ_NONE in flags
-*/ 
- 
+*/
+
 static pcre_uint32
 find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags,
   BOOL inassert)
-{ 
+{
 register pcre_uint32 c = 0;
 int cflags = REQ_NONE;
 
 *flags = REQ_NONE;
-do { 
+do {
    pcre_uint32 d;
    int dflags;
    int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
@@ -8895,39 +8895,39 @@ do {
    const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
      TRUE);
    register pcre_uchar op = *scode;
- 
-   switch(op) 
-     { 
-     default: 
+
+   switch(op)
+     {
+     default:
      return 0;
- 
-     case OP_BRA: 
+
+     case OP_BRA:
      case OP_BRAPOS:
-     case OP_CBRA: 
+     case OP_CBRA:
      case OP_SCBRA:
      case OP_CBRAPOS:
      case OP_SCBRAPOS:
-     case OP_ASSERT: 
-     case OP_ONCE: 
+     case OP_ASSERT:
+     case OP_ONCE:
      case OP_ONCE_NC:
      d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
      if (dflags < 0)
        return 0;
      if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0;
-     break; 
- 
+     break;
+
      case OP_EXACT:
      scode += IMM2_SIZE;
      /* Fall through */
- 
-     case OP_CHAR: 
-     case OP_PLUS: 
-     case OP_MINPLUS: 
-     case OP_POSPLUS: 
+
+     case OP_CHAR:
+     case OP_PLUS:
+     case OP_MINPLUS:
+     case OP_POSPLUS:
      if (!inassert) return 0;
      if (cflags < 0) { c = scode[1]; cflags = 0; }
        else if (c != scode[1]) return 0;
-     break; 
+     break;
 
      case OP_EXACTI:
      scode += IMM2_SIZE;
@@ -8941,19 +8941,19 @@ do {
      if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
        else if (c != scode[1]) return 0;
      break;
-     } 
- 
-   code += GET(code, 1); 
-   } 
-while (*code == OP_ALT); 
+     }
+
+   code += GET(code, 1);
+   }
+while (*code == OP_ALT);
 
 *flags = cflags;
-return c; 
-} 
- 
- 
- 
-/************************************************* 
+return c;
+}
+
+
+
+/*************************************************
 *     Add an entry to the name/number table      *
 *************************************************/
 
@@ -9009,31 +9009,31 @@ cd->names_found++;
 
 
 /*************************************************
-*        Compile a Regular Expression            * 
-*************************************************/ 
- 
-/* This function takes a string and returns a pointer to a block of store 
-holding a compiled version of the expression. The original API for this 
-function had no error code return variable; it is retained for backwards 
-compatibility. The new function is given a new name. 
- 
-Arguments: 
-  pattern       the regular expression 
-  options       various option bits 
-  errorcodeptr  pointer to error code variable (pcre_compile2() only) 
-                  can be NULL if you don't want a code value 
-  errorptr      pointer to pointer to error text 
-  erroroffset   ptr offset in pattern where error was detected 
-  tables        pointer to character tables or NULL 
- 
-Returns:        pointer to compiled data block, or NULL on error, 
-                with errorptr and erroroffset set 
-*/ 
- 
+*        Compile a Regular Expression            *
+*************************************************/
+
+/* This function takes a string and returns a pointer to a block of store
+holding a compiled version of the expression. The original API for this
+function had no error code return variable; it is retained for backwards
+compatibility. The new function is given a new name.
+
+Arguments:
+  pattern       the regular expression
+  options       various option bits
+  errorcodeptr  pointer to error code variable (pcre_compile2() only)
+                  can be NULL if you don't want a code value
+  errorptr      pointer to pointer to error text
+  erroroffset   ptr offset in pattern where error was detected
+  tables        pointer to character tables or NULL
+
+Returns:        pointer to compiled data block, or NULL on error,
+                with errorptr and erroroffset set
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
-pcre_compile(const char *pattern, int options, const char **errorptr, 
-  int *erroroffset, const unsigned char *tables) 
+pcre_compile(const char *pattern, int options, const char **errorptr,
+  int *erroroffset, const unsigned char *tables)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
 pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
@@ -9043,21 +9043,21 @@ PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
 pcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr,
   int *erroroffset, const unsigned char *tables)
 #endif
-{ 
+{
 #if defined COMPILE_PCRE8
-return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); 
+return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
 #elif defined COMPILE_PCRE16
 return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
 #elif defined COMPILE_PCRE32
 return pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
 #endif
-} 
- 
- 
+}
+
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
-pcre_compile2(const char *pattern, int options, int *errorcodeptr, 
-  const char **errorptr, int *erroroffset, const unsigned char *tables) 
+pcre_compile2(const char *pattern, int options, int *errorcodeptr,
+  const char **errorptr, int *erroroffset, const unsigned char *tables)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
 pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
@@ -9067,97 +9067,97 @@ PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
 pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
   const char **errorptr, int *erroroffset, const unsigned char *tables)
 #endif
-{ 
+{
 REAL_PCRE *re;
-int length = 1;  /* For final END opcode */ 
+int length = 1;  /* For final END opcode */
 pcre_int32 firstcharflags, reqcharflags;
 pcre_uint32 firstchar, reqchar;
 pcre_uint32 limit_match = PCRE_UINT32_MAX;
 pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
 int newline;
-int errorcode = 0; 
-int skipatstart = 0; 
+int errorcode = 0;
+int skipatstart = 0;
 BOOL utf;
 BOOL never_utf = FALSE;
-size_t size; 
+size_t size;
 pcre_uchar *code;
 const pcre_uchar *codestart;
 const pcre_uchar *ptr;
-compile_data compile_block; 
-compile_data *cd = &compile_block; 
- 
-/* This space is used for "compiling" into during the first phase, when we are 
-computing the amount of memory that is needed. Compiled items are thrown away 
-as soon as possible, so that a fairly large buffer should be sufficient for 
-this purpose. The same space is used in the second phase for remembering where 
+compile_data compile_block;
+compile_data *cd = &compile_block;
+
+/* This space is used for "compiling" into during the first phase, when we are
+computing the amount of memory that is needed. Compiled items are thrown away
+as soon as possible, so that a fairly large buffer should be sufficient for
+this purpose. The same space is used in the second phase for remembering where
 to fill in forward references to subpatterns. That may overflow, in which case
 new memory is obtained from malloc(). */
- 
+
 pcre_uchar cworkspace[COMPILE_WORK_SIZE];
- 
+
 /* This vector is used for remembering name groups during the pre-compile. In a
 similar way to cworkspace, it can be expanded using malloc() if necessary. */
- 
+
 named_group named_groups[NAMED_GROUP_LIST_SIZE];
 
-/* Set this early so that early errors get offset 0. */ 
- 
+/* Set this early so that early errors get offset 0. */
+
 ptr = (const pcre_uchar *)pattern;
- 
-/* We can't pass back an error message if errorptr is NULL; I guess the best we 
-can do is just return NULL, but we can set a code value if there is a code 
-pointer. */ 
- 
-if (errorptr == NULL) 
-  { 
-  if (errorcodeptr != NULL) *errorcodeptr = 99; 
-  return NULL; 
-  } 
- 
-*errorptr = NULL; 
-if (errorcodeptr != NULL) *errorcodeptr = ERR0; 
- 
-/* However, we can give a message for this error */ 
- 
-if (erroroffset == NULL) 
-  { 
-  errorcode = ERR16; 
-  goto PCRE_EARLY_ERROR_RETURN2; 
-  } 
- 
-*erroroffset = 0; 
- 
+
+/* We can't pass back an error message if errorptr is NULL; I guess the best we
+can do is just return NULL, but we can set a code value if there is a code
+pointer. */
+
+if (errorptr == NULL)
+  {
+  if (errorcodeptr != NULL) *errorcodeptr = 99;
+  return NULL;
+  }
+
+*errorptr = NULL;
+if (errorcodeptr != NULL) *errorcodeptr = ERR0;
+
+/* However, we can give a message for this error */
+
+if (erroroffset == NULL)
+  {
+  errorcode = ERR16;
+  goto PCRE_EARLY_ERROR_RETURN2;
+  }
+
+*erroroffset = 0;
+
 /* Set up pointers to the individual character tables */
- 
+
 if (tables == NULL) tables = PRIV(default_tables);
 cd->lcc = tables + lcc_offset;
 cd->fcc = tables + fcc_offset;
 cd->cbits = tables + cbits_offset;
 cd->ctypes = tables + ctypes_offset;
- 
+
 /* Check that all undefined public option bits are zero */
 
 if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
-  { 
-  errorcode = ERR17; 
-  goto PCRE_EARLY_ERROR_RETURN; 
-  } 
- 
+  {
+  errorcode = ERR17;
+  goto PCRE_EARLY_ERROR_RETURN;
+  }
+
 /* If PCRE_NEVER_UTF is set, remember it. */
- 
+
 if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
- 
-/* Check for global one-time settings at the start of the pattern, and remember 
-the offset for later. */ 
- 
+
+/* Check for global one-time settings at the start of the pattern, and remember
+the offset for later. */
+
 cd->external_flags = 0;   /* Initialize here for LIMIT_MATCH/RECURSION */
 
 while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
        ptr[skipatstart+1] == CHAR_ASTERISK)
-  { 
-  int newnl = 0; 
-  int newbsr = 0; 
- 
+  {
+  int newnl = 0;
+  int newbsr = 0;
+
 /* For completeness and backward compatibility, (*UTFn) is supported in the
 relevant libraries, but (*UTF) is generic and always supported. Note that
 PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
@@ -9223,28 +9223,28 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
     }
 
   if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
-    { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } 
+    { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
-    { skipatstart += 5; newnl = PCRE_NEWLINE_LF; } 
+    { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
-    { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } 
+    { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
-    { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } 
+    { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
-    { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } 
- 
+    { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
+
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
-    { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } 
+    { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
-    { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } 
- 
-  if (newnl != 0) 
-    options = (options & ~PCRE_NEWLINE_BITS) | newnl; 
-  else if (newbsr != 0) 
-    options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; 
-  else break; 
-  } 
- 
+    { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
+
+  if (newnl != 0)
+    options = (options & ~PCRE_NEWLINE_BITS) | newnl;
+  else if (newbsr != 0)
+    options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
+  else break;
+  }
+
 /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
 utf = (options & PCRE_UTF8) != 0;
 if (utf && never_utf)
@@ -9289,86 +9289,86 @@ if ((options & PCRE_UCP) != 0)
   }
 #endif
 
-/* Check validity of \R options. */ 
- 
+/* Check validity of \R options. */
+
 if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) ==
      (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
-  { 
+  {
   errorcode = ERR56;
   goto PCRE_EARLY_ERROR_RETURN;
-  } 
- 
-/* Handle different types of newline. The three bits give seven cases. The 
-current code allows for fixed one- or two-byte sequences, plus "any" and 
-"anycrlf". */ 
- 
-switch (options & PCRE_NEWLINE_BITS) 
-  { 
-  case 0: newline = NEWLINE; break;   /* Build-time default */ 
+  }
+
+/* Handle different types of newline. The three bits give seven cases. The
+current code allows for fixed one- or two-byte sequences, plus "any" and
+"anycrlf". */
+
+switch (options & PCRE_NEWLINE_BITS)
+  {
+  case 0: newline = NEWLINE; break;   /* Build-time default */
   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
-  case PCRE_NEWLINE_CR+ 
+  case PCRE_NEWLINE_CR+
        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
-  case PCRE_NEWLINE_ANY: newline = -1; break; 
-  case PCRE_NEWLINE_ANYCRLF: newline = -2; break; 
-  default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; 
-  } 
- 
-if (newline == -2) 
-  { 
-  cd->nltype = NLTYPE_ANYCRLF; 
-  } 
-else if (newline < 0) 
-  { 
-  cd->nltype = NLTYPE_ANY; 
-  } 
-else 
-  { 
-  cd->nltype = NLTYPE_FIXED; 
-  if (newline > 255) 
-    { 
-    cd->nllen = 2; 
-    cd->nl[0] = (newline >> 8) & 255; 
-    cd->nl[1] = newline & 255; 
-    } 
-  else 
-    { 
-    cd->nllen = 1; 
-    cd->nl[0] = newline; 
-    } 
-  } 
- 
-/* Maximum back reference and backref bitmap. The bitmap records up to 31 back 
-references to help in deciding whether (.*) can be treated as anchored or not. 
-*/ 
- 
-cd->top_backref = 0; 
-cd->backref_map = 0; 
- 
-/* Reflect pattern for debugging output */ 
- 
-DPRINTF(("------------------------------------------------------------------\n")); 
+  case PCRE_NEWLINE_ANY: newline = -1; break;
+  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
+  default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
+  }
+
+if (newline == -2)
+  {
+  cd->nltype = NLTYPE_ANYCRLF;
+  }
+else if (newline < 0)
+  {
+  cd->nltype = NLTYPE_ANY;
+  }
+else
+  {
+  cd->nltype = NLTYPE_FIXED;
+  if (newline > 255)
+    {
+    cd->nllen = 2;
+    cd->nl[0] = (newline >> 8) & 255;
+    cd->nl[1] = newline & 255;
+    }
+  else
+    {
+    cd->nllen = 1;
+    cd->nl[0] = newline;
+    }
+  }
+
+/* Maximum back reference and backref bitmap. The bitmap records up to 31 back
+references to help in deciding whether (.*) can be treated as anchored or not.
+*/
+
+cd->top_backref = 0;
+cd->backref_map = 0;
+
+/* Reflect pattern for debugging output */
+
+DPRINTF(("------------------------------------------------------------------\n"));
 #ifdef PCRE_DEBUG
 print_puchar(stdout, (PCRE_PUCHAR)pattern);
 #endif
 DPRINTF(("\n"));
- 
-/* Pretend to compile the pattern while actually just accumulating the length 
-of memory required. This behaviour is triggered by passing a non-NULL final 
-argument to compile_regex(). We pass a block of workspace (cworkspace) for it 
-to compile parts of the pattern into; the compiled code is discarded when it is 
-no longer needed, so hopefully this workspace will never overflow, though there 
-is a test for its doing so. */ 
- 
-cd->bracount = cd->final_bracount = 0; 
-cd->names_found = 0; 
-cd->name_entry_size = 0; 
-cd->name_table = NULL; 
+
+/* Pretend to compile the pattern while actually just accumulating the length
+of memory required. This behaviour is triggered by passing a non-NULL final
+argument to compile_regex(). We pass a block of workspace (cworkspace) for it
+to compile parts of the pattern into; the compiled code is discarded when it is
+no longer needed, so hopefully this workspace will never overflow, though there
+is a test for its doing so. */
+
+cd->bracount = cd->final_bracount = 0;
+cd->names_found = 0;
+cd->name_entry_size = 0;
+cd->name_table = NULL;
 cd->dupnames = FALSE;
 cd->dupgroups = FALSE;
 cd->namedrefcount = 0;
-cd->start_code = cworkspace; 
-cd->hwm = cworkspace; 
+cd->start_code = cworkspace;
+cd->hwm = cworkspace;
 cd->iscondassert = FALSE;
 cd->start_workspace = cworkspace;
 cd->workspace_size = COMPILE_WORK_SIZE;
@@ -9376,102 +9376,102 @@ cd->named_groups = named_groups;
 cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
 cd->start_pattern = (const pcre_uchar *)pattern;
 cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
-cd->req_varyopt = 0; 
+cd->req_varyopt = 0;
 cd->parens_depth = 0;
 cd->assert_depth = 0;
 cd->max_lookbehind = 0;
-cd->external_options = options; 
+cd->external_options = options;
 cd->open_caps = NULL;
- 
-/* Now do the pre-compile. On error, errorcode will be set non-zero, so we 
-don't need to look at the result of the function here. The initial options have 
-been put into the cd block so that they can be changed if an option setting is 
-found within the regex right at the beginning. Bringing initial option settings 
-outside can help speed up starting point checks. */ 
- 
-ptr += skipatstart; 
-code = cworkspace; 
-*code = OP_BRA; 
+
+/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
+don't need to look at the result of the function here. The initial options have
+been put into the cd block so that they can be changed if an option setting is
+found within the regex right at the beginning. Bringing initial option settings
+outside can help speed up starting point checks. */
+
+ptr += skipatstart;
+code = cworkspace;
+*code = OP_BRA;
 
 (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
   FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
   cd, &length);
-if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; 
- 
-DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, 
+if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
+
+DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
   (int)(cd->hwm - cworkspace)));
- 
-if (length > MAX_PATTERN_SIZE) 
-  { 
-  errorcode = ERR20; 
-  goto PCRE_EARLY_ERROR_RETURN; 
-  } 
- 
+
+if (length > MAX_PATTERN_SIZE)
+  {
+  errorcode = ERR20;
+  goto PCRE_EARLY_ERROR_RETURN;
+  }
+
 /* Compute the size of the data block for storing the compiled pattern. Integer
 overflow should no longer be possible because nowadays we limit the maximum
 value of cd->names_found and cd->name_entry_size. */
- 
+
 size = sizeof(REAL_PCRE) +
   (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
- 
+
 /* Get the memory. */
 
 re = (REAL_PCRE *)(PUBL(malloc))(size);
-if (re == NULL) 
-  { 
-  errorcode = ERR21; 
-  goto PCRE_EARLY_ERROR_RETURN; 
-  } 
- 
-/* Put in the magic number, and save the sizes, initial options, internal 
-flags, and character table pointer. NULL is used for the default character 
-tables. The nullpad field is at the end; it's there to help in the case when a 
-regex compiled on a system with 4-byte pointers is run on another with 8-byte 
-pointers. */ 
- 
-re->magic_number = MAGIC_NUMBER; 
+if (re == NULL)
+  {
+  errorcode = ERR21;
+  goto PCRE_EARLY_ERROR_RETURN;
+  }
+
+/* Put in the magic number, and save the sizes, initial options, internal
+flags, and character table pointer. NULL is used for the default character
+tables. The nullpad field is at the end; it's there to help in the case when a
+regex compiled on a system with 4-byte pointers is run on another with 8-byte
+pointers. */
+
+re->magic_number = MAGIC_NUMBER;
 re->size = (int)size;
-re->options = cd->external_options; 
-re->flags = cd->external_flags; 
+re->options = cd->external_options;
+re->flags = cd->external_flags;
 re->limit_match = limit_match;
 re->limit_recursion = limit_recursion;
 re->first_char = 0;
 re->req_char = 0;
 re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
-re->name_entry_size = cd->name_entry_size; 
-re->name_count = cd->names_found; 
-re->ref_count = 0; 
+re->name_entry_size = cd->name_entry_size;
+re->name_count = cd->names_found;
+re->ref_count = 0;
 re->tables = (tables == PRIV(default_tables))? NULL : tables;
-re->nullpad = NULL; 
+re->nullpad = NULL;
 #ifdef COMPILE_PCRE32
 re->dummy = 0;
 #else
 re->dummy1 = re->dummy2 = re->dummy3 = 0;
 #endif
- 
-/* The starting points of the name/number translation table and of the code are 
-passed around in the compile data block. The start/end pattern and initial 
-options are already set from the pre-compile phase, as is the name_entry_size 
-field. Reset the bracket count and the names_found field. Also reset the hwm 
-field; this time it's used for remembering forward references to subpatterns. 
-*/ 
- 
-cd->final_bracount = cd->bracount;  /* Save for checking forward references */ 
+
+/* The starting points of the name/number translation table and of the code are
+passed around in the compile data block. The start/end pattern and initial
+options are already set from the pre-compile phase, as is the name_entry_size
+field. Reset the bracket count and the names_found field. Also reset the hwm
+field; this time it's used for remembering forward references to subpatterns.
+*/
+
+cd->final_bracount = cd->bracount;  /* Save for checking forward references */
 cd->parens_depth = 0;
 cd->assert_depth = 0;
-cd->bracount = 0; 
+cd->bracount = 0;
 cd->max_lookbehind = 0;
 cd->name_table = (pcre_uchar *)re + re->name_table_offset;
-codestart = cd->name_table + re->name_entry_size * re->name_count; 
-cd->start_code = codestart; 
+codestart = cd->name_table + re->name_entry_size * re->name_count;
+cd->start_code = codestart;
 cd->hwm = (pcre_uchar *)(cd->start_workspace);
 cd->iscondassert = FALSE;
-cd->req_varyopt = 0; 
-cd->had_accept = FALSE; 
+cd->req_varyopt = 0;
+cd->had_accept = FALSE;
 cd->had_pruneorskip = FALSE;
 cd->check_lookbehind = FALSE;
 cd->open_caps = NULL;
- 
+
 /* If any named groups were found, create the name/number table from the list
 created in the first pass. */
 
@@ -9486,51 +9486,51 @@ if (cd->names_found > 0)
     (PUBL(free))((void *)cd->named_groups);
   }
 
-/* Set up a starting, non-extracting bracket, then compile the expression. On 
-error, errorcode will be set non-zero, so we don't need to look at the result 
-of the function here. */ 
- 
+/* Set up a starting, non-extracting bracket, then compile the expression. On
+error, errorcode will be set non-zero, so we don't need to look at the result
+of the function here. */
+
 ptr = (const pcre_uchar *)pattern + skipatstart;
 code = (pcre_uchar *)codestart;
-*code = OP_BRA; 
+*code = OP_BRA;
 (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
   &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL);
-re->top_bracket = cd->bracount; 
-re->top_backref = cd->top_backref; 
+re->top_bracket = cd->bracount;
+re->top_backref = cd->top_backref;
 re->max_lookbehind = cd->max_lookbehind;
 re->flags = cd->external_flags | PCRE_MODE;
- 
+
 if (cd->had_accept)
   {
   reqchar = 0;              /* Must disable after (*ACCEPT) */
   reqcharflags = REQ_NONE;
   }
- 
-/* If not reached end of pattern on success, there's an excess bracket. */ 
- 
+
+/* If not reached end of pattern on success, there's an excess bracket. */
+
 if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22;
- 
-/* Fill in the terminating state and check for disastrous overflow, but 
-if debugging, leave the test till after things are printed out. */ 
- 
-*code++ = OP_END; 
- 
+
+/* Fill in the terminating state and check for disastrous overflow, but
+if debugging, leave the test till after things are printed out. */
+
+*code++ = OP_END;
+
 #ifndef PCRE_DEBUG
-if (code - codestart > length) errorcode = ERR23; 
-#endif 
- 
+if (code - codestart > length) errorcode = ERR23;
+#endif
+
 #ifdef SUPPORT_VALGRIND
 /* If the estimated length exceeds the really used length, mark the extra
 allocated memory as unaddressable, so that any out-of-bound reads can be
 detected. */
 VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
 #endif
- 
+
 /* Fill in any forward references that are required. There may be repeated
 references; optimize for them, as searching a large regex takes time. */
 
 if (cd->hwm > cd->start_workspace)
-  { 
+  {
   int prev_recno = -1;
   const pcre_uchar *groupptr = NULL;
   while (errorcode == 0 && cd->hwm > cd->start_workspace)
@@ -9557,8 +9557,8 @@ if (cd->hwm > cd->start_workspace)
     if (groupptr == NULL) errorcode = ERR53;
       else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
     }
-  } 
- 
+  }
+
 /* If the workspace had to be expanded, free the new memory. Set the pointer to
 NULL to indicate that forward references have been filled in. */
 
@@ -9566,11 +9566,11 @@ if (cd->workspace_size > COMPILE_WORK_SIZE)
   (PUBL(free))((void *)cd->start_workspace);
 cd->start_workspace = NULL;
 
-/* Give an error if there's back reference to a non-existent capturing 
-subpattern. */ 
- 
-if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; 
- 
+/* Give an error if there's back reference to a non-existent capturing
+subpattern. */
+
+if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
+
 /* Unless disabled, check whether any single character iterators can be
 auto-possessified. The function overwrites the appropriate opcode values, so
 the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
@@ -9628,39 +9628,39 @@ if (errorcode == 0 && cd->check_lookbehind)
     }
   }
 
-/* Failed to compile, or error while post-processing */ 
- 
-if (errorcode != 0) 
-  { 
+/* Failed to compile, or error while post-processing */
+
+if (errorcode != 0)
+  {
   (PUBL(free))(re);
-  PCRE_EARLY_ERROR_RETURN: 
+  PCRE_EARLY_ERROR_RETURN:
   *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
-  PCRE_EARLY_ERROR_RETURN2: 
-  *errorptr = find_error_text(errorcode); 
-  if (errorcodeptr != NULL) *errorcodeptr = errorcode; 
-  return NULL; 
-  } 
- 
-/* If the anchored option was not passed, set the flag if we can determine that 
+  PCRE_EARLY_ERROR_RETURN2:
+  *errorptr = find_error_text(errorcode);
+  if (errorcodeptr != NULL) *errorcodeptr = errorcode;
+  return NULL;
+  }
+
+/* If the anchored option was not passed, set the flag if we can determine that
 the pattern is anchored by virtue of ^ characters or \A or anything else, such
 as starting with non-atomic .* when DOTALL is set and there are no occurrences
 of *PRUNE or *SKIP.
- 
-Otherwise, if we know what the first byte has to be, save it, because that 
-speeds up unanchored matches no end. If not, see if we can set the 
-PCRE_STARTLINE flag. This is helpful for multiline matches when all branches 
+
+Otherwise, if we know what the first byte has to be, save it, because that
+speeds up unanchored matches no end. If not, see if we can set the
+PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
 start with ^. and also when all branches start with non-atomic .* for
 non-DOTALL matches when *PRUNE and SKIP are not present. */
- 
-if ((re->options & PCRE_ANCHORED) == 0) 
-  { 
+
+if ((re->options & PCRE_ANCHORED) == 0)
+  {
   if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
-  else 
-    { 
+  else
+    {
     if (firstcharflags < 0)
       firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE);
     if (firstcharflags >= 0)   /* Remove caseless flag for non-caseable chars */
-      { 
+      {
 #if defined COMPILE_PCRE8
       re->first_char = firstchar & 0xff;
 #elif defined COMPILE_PCRE16
@@ -9689,20 +9689,20 @@ if ((re->options & PCRE_ANCHORED) == 0)
           re->flags |= PCRE_FCH_CASELESS;
         }
 
-      re->flags |= PCRE_FIRSTSET; 
-      } 
+      re->flags |= PCRE_FIRSTSET;
+      }
 
     else if (is_startline(codestart, 0, cd, 0, FALSE)) re->flags |= PCRE_STARTLINE;
-    } 
-  } 
- 
-/* For an anchored pattern, we use the "required byte" only if it follows a 
-variable length item in the regex. Remove the caseless flag for non-caseable 
-bytes. */ 
- 
+    }
+  }
+
+/* For an anchored pattern, we use the "required byte" only if it follows a
+variable length item in the regex. Remove the caseless flag for non-caseable
+bytes. */
+
 if (reqcharflags >= 0 &&
      ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0))
-  { 
+  {
 #if defined COMPILE_PCRE8
   re->req_char = reqchar & 0xff;
 #elif defined COMPILE_PCRE16
@@ -9730,36 +9730,36 @@ if (reqcharflags >= 0 &&
       re->flags |= PCRE_RCH_CASELESS;
     }
 
-  re->flags |= PCRE_REQCHSET; 
-  } 
- 
-/* Print out the compiled data if debugging is enabled. This is never the 
-case when building a production library. */ 
- 
+  re->flags |= PCRE_REQCHSET;
+  }
+
+/* Print out the compiled data if debugging is enabled. This is never the
+case when building a production library. */
+
 #ifdef PCRE_DEBUG
-printf("Length = %d top_bracket = %d top_backref = %d\n", 
-  length, re->top_bracket, re->top_backref); 
- 
-printf("Options=%08x\n", re->options); 
- 
-if ((re->flags & PCRE_FIRSTSET) != 0) 
-  { 
+printf("Length = %d top_bracket = %d top_backref = %d\n",
+  length, re->top_bracket, re->top_backref);
+
+printf("Options=%08x\n", re->options);
+
+if ((re->flags & PCRE_FIRSTSET) != 0)
+  {
   pcre_uchar ch = re->first_char;
   const char *caseless =
     ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
   if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
-    else printf("First char = \\x%02x%s\n", ch, caseless); 
-  } 
- 
-if ((re->flags & PCRE_REQCHSET) != 0) 
-  { 
+    else printf("First char = \\x%02x%s\n", ch, caseless);
+  }
+
+if ((re->flags & PCRE_REQCHSET) != 0)
+  {
   pcre_uchar ch = re->req_char;
   const char *caseless =
     ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
   if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
-    else printf("Req char = \\x%02x%s\n", ch, caseless); 
-  } 
- 
+    else printf("Req char = \\x%02x%s\n", ch, caseless);
+  }
+
 #if defined COMPILE_PCRE8
 pcre_printint((pcre *)re, stdout, TRUE);
 #elif defined COMPILE_PCRE16
@@ -9767,20 +9767,20 @@ pcre16_printint((pcre *)re, stdout, TRUE);
 #elif defined COMPILE_PCRE32
 pcre32_printint((pcre *)re, stdout, TRUE);
 #endif
- 
-/* This check is done here in the debugging case so that the code that 
-was compiled can be seen. */ 
- 
-if (code - codestart > length) 
-  { 
+
+/* This check is done here in the debugging case so that the code that
+was compiled can be seen. */
+
+if (code - codestart > length)
+  {
   (PUBL(free))(re);
-  *errorptr = find_error_text(ERR23); 
+  *errorptr = find_error_text(ERR23);
   *erroroffset = ptr - (pcre_uchar *)pattern;
-  if (errorcodeptr != NULL) *errorcodeptr = ERR23; 
-  return NULL; 
-  } 
+  if (errorcodeptr != NULL) *errorcodeptr = ERR23;
+  return NULL;
+  }
 #endif   /* PCRE_DEBUG */
- 
+
 /* Check for a pattern than can match an empty string, so that this information
 can be provided to applications. */
 
@@ -9796,12 +9796,12 @@ do
 while (*codestart == OP_ALT);
 
 #if defined COMPILE_PCRE8
-return (pcre *)re; 
+return (pcre *)re;
 #elif defined COMPILE_PCRE16
 return (pcre16 *)re;
 #elif defined COMPILE_PCRE32
 return (pcre32 *)re;
 #endif
-} 
- 
-/* End of pcre_compile.c */ 
+}
+
+/* End of pcre_compile.c */
diff --git a/contrib/libs/pcre/pcre_config.c b/contrib/libs/pcre/pcre_config.c
index 6c303244fc..3c5364e2f8 100644
--- a/contrib/libs/pcre/pcre_config.c
+++ b/contrib/libs/pcre/pcre_config.c
@@ -1,73 +1,73 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_config(). */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_config(). */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
+#endif
+
 /* Keep the original link size. */
 static int real_link_size = LINK_SIZE;
 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-* Return info about what features are configured * 
-*************************************************/ 
- 
-/* This function has an extensible interface so that additional items can be 
-added compatibly. 
- 
-Arguments: 
-  what             what information is required 
-  where            where to put the information 
- 
-Returns:           0 if data returned, negative on error 
-*/ 
- 
+#include "pcre_internal.h"
+
+
+/*************************************************
+* Return info about what features are configured *
+*************************************************/
+
+/* This function has an extensible interface so that additional items can be
+added compatibly.
+
+Arguments:
+  what             what information is required
+  where            where to put the information
+
+Returns:           0 if data returned, negative on error
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_config(int what, void *where) 
+pcre_config(int what, void *where)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_config(int what, void *where)
@@ -75,22 +75,22 @@ pcre16_config(int what, void *where)
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_config(int what, void *where)
 #endif
-{ 
-switch (what) 
-  { 
-  case PCRE_CONFIG_UTF8: 
+{
+switch (what)
+  {
+  case PCRE_CONFIG_UTF8:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   *((int *)where) = 0;
   return PCRE_ERROR_BADOPTION;
 #else
 #if defined SUPPORT_UTF
-  *((int *)where) = 1; 
-#else 
-  *((int *)where) = 0; 
-#endif 
-  break; 
+  *((int *)where) = 1;
+#else
+  *((int *)where) = 0;
+#endif
+  break;
 #endif
- 
+
   case PCRE_CONFIG_UTF16:
 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
   *((int *)where) = 0;
@@ -117,14 +117,14 @@ switch (what)
   break;
 #endif
 
-  case PCRE_CONFIG_UNICODE_PROPERTIES: 
-#ifdef SUPPORT_UCP 
-  *((int *)where) = 1; 
-#else 
-  *((int *)where) = 0; 
-#endif 
-  break; 
- 
+  case PCRE_CONFIG_UNICODE_PROPERTIES:
+#ifdef SUPPORT_UCP
+  *((int *)where) = 1;
+#else
+  *((int *)where) = 0;
+#endif
+  break;
+
   case PCRE_CONFIG_JIT:
 #ifdef SUPPORT_JIT
   *((int *)where) = 1;
@@ -141,50 +141,50 @@ switch (what)
 #endif
   break;
 
-  case PCRE_CONFIG_NEWLINE: 
-  *((int *)where) = NEWLINE; 
-  break; 
- 
-  case PCRE_CONFIG_BSR: 
-#ifdef BSR_ANYCRLF 
-  *((int *)where) = 1; 
-#else 
-  *((int *)where) = 0; 
-#endif 
-  break; 
- 
-  case PCRE_CONFIG_LINK_SIZE: 
+  case PCRE_CONFIG_NEWLINE:
+  *((int *)where) = NEWLINE;
+  break;
+
+  case PCRE_CONFIG_BSR:
+#ifdef BSR_ANYCRLF
+  *((int *)where) = 1;
+#else
+  *((int *)where) = 0;
+#endif
+  break;
+
+  case PCRE_CONFIG_LINK_SIZE:
   *((int *)where) = real_link_size;
-  break; 
- 
-  case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: 
-  *((int *)where) = POSIX_MALLOC_THRESHOLD; 
-  break; 
- 
+  break;
+
+  case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
+  *((int *)where) = POSIX_MALLOC_THRESHOLD;
+  break;
+
   case PCRE_CONFIG_PARENS_LIMIT:
   *((unsigned long int *)where) = PARENS_NEST_LIMIT;
   break;
 
-  case PCRE_CONFIG_MATCH_LIMIT: 
+  case PCRE_CONFIG_MATCH_LIMIT:
   *((unsigned long int *)where) = MATCH_LIMIT;
-  break; 
- 
-  case PCRE_CONFIG_MATCH_LIMIT_RECURSION: 
+  break;
+
+  case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
   *((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
-  break; 
- 
-  case PCRE_CONFIG_STACKRECURSE: 
-#ifdef NO_RECURSE 
-  *((int *)where) = 0; 
-#else 
-  *((int *)where) = 1; 
-#endif 
-  break; 
- 
-  default: return PCRE_ERROR_BADOPTION; 
-  } 
- 
-return 0; 
-} 
- 
-/* End of pcre_config.c */ 
+  break;
+
+  case PCRE_CONFIG_STACKRECURSE:
+#ifdef NO_RECURSE
+  *((int *)where) = 0;
+#else
+  *((int *)where) = 1;
+#endif
+  break;
+
+  default: return PCRE_ERROR_BADOPTION;
+  }
+
+return 0;
+}
+
+/* End of pcre_config.c */
diff --git a/contrib/libs/pcre/pcre_config.h b/contrib/libs/pcre/pcre_config.h
index ebc9c01fc3..622b2ec59b 100644
--- a/contrib/libs/pcre/pcre_config.h
+++ b/contrib/libs/pcre/pcre_config.h
@@ -53,8 +53,8 @@ sure both macros are undefined; an emulation function will then be used. */
 /* #undef EBCDIC_NL25 */
 
 /* Define to 1 if you have the `bcopy' function. */
-#define HAVE_BCOPY 1 
- 
+#define HAVE_BCOPY 1
+
 /* Define to 1 if you have the <bits/type_traits.h> header file. */
 /* #undef HAVE_BITS_TYPE_TRAITS_H */
 
@@ -76,15 +76,15 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <inttypes.h> header file. */
 #define HAVE_INTTYPES_H 1
 
-/* Define to 1 if you have the <limits.h> header file. */ 
-#define HAVE_LIMITS_H 1 
- 
-/* Define to 1 if the system has the type `long long'. */ 
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if the system has the type `long long'. */
 #define HAVE_LONG_LONG 1
- 
-/* Define to 1 if you have the `memmove' function. */ 
-#define HAVE_MEMMOVE 1 
- 
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
 /* Define to 1 if you have the <memory.h> header file. */
 #define HAVE_MEMORY_H 1
 
@@ -100,27 +100,27 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <readline/readline.h> header file. */
 /* #undef HAVE_READLINE_READLINE_H */
 
-/* Define to 1 if you have the <stdint.h> header file. */ 
-#define HAVE_STDINT_H 1 
- 
-/* Define to 1 if you have the <stdlib.h> header file. */ 
-#define HAVE_STDLIB_H 1 
- 
-/* Define to 1 if you have the `strerror' function. */ 
-#define HAVE_STRERROR 1 
- 
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strerror' function. */
+#define HAVE_STRERROR 1
+
 /* Define to 1 if you have the <string> header file. */
 #define HAVE_STRING 1
 
-/* Define to 1 if you have the <strings.h> header file. */ 
-#define HAVE_STRINGS_H 1 
- 
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
 /* Define to 1 if you have the <string.h> header file. */
 #define HAVE_STRING_H 1
- 
+
 /* Define to 1 if you have `strtoimax'. */
 /* #undef HAVE_STRTOIMAX */
- 
+
 /* Define to 1 if you have `strtoll'. */
 /* #undef HAVE_STRTOLL */
 
@@ -139,9 +139,9 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 
-/* Define to 1 if the system has the type `unsigned long long'. */ 
+/* Define to 1 if the system has the type `unsigned long long'. */
 #define HAVE_UNSIGNED_LONG_LONG 1
- 
+
 /* Define to 1 if the compiler supports simple visibility declarations. */
 #define HAVE_VISIBILITY 1
 
@@ -154,44 +154,44 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have `_strtoi64'. */
 /* #undef HAVE__STRTOI64 */
 
-/* The value of LINK_SIZE determines the number of bytes used to store links 
-   as offsets within the compiled regex. The default is 2, which allows for 
-   compiled patterns up to 64K long. This covers the vast majority of cases. 
-   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows 
+/* The value of LINK_SIZE determines the number of bytes used to store links
+   as offsets within the compiled regex. The default is 2, which allows for
+   compiled patterns up to 64K long. This covers the vast majority of cases.
+   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
    for longer patterns in extreme cases. */
-#define LINK_SIZE 2 
- 
+#define LINK_SIZE 2
+
 /* Define to the sub-directory where libtool stores uninstalled libraries. */
 #define LT_OBJDIR ".libs/"
 
-/* The value of MATCH_LIMIT determines the default number of times the 
-   internal match() function can be called during a single execution of 
-   pcre_exec(). There is a runtime interface for setting a different limit. 
-   The limit exists in order to catch runaway regular expressions that take 
-   for ever to determine that they do not match. The default is set very large 
+/* The value of MATCH_LIMIT determines the default number of times the
+   internal match() function can be called during a single execution of
+   pcre_exec(). There is a runtime interface for setting a different limit.
+   The limit exists in order to catch runaway regular expressions that take
+   for ever to determine that they do not match. The default is set very large
    so that it does not accidentally catch legitimate cases. */
-#define MATCH_LIMIT 10000000 
- 
-/* The above limit applies to all calls of match(), whether or not they 
-   increase the recursion depth. In some environments it is desirable to limit 
-   the depth of recursive calls of match() more strictly, in order to restrict 
-   the maximum amount of stack (or heap, if NO_RECURSE is defined) that is 
-   used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of 
-   match(). To have any useful effect, it must be less than the value of 
-   MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is 
+#define MATCH_LIMIT 10000000
+
+/* The above limit applies to all calls of match(), whether or not they
+   increase the recursion depth. In some environments it is desirable to limit
+   the depth of recursive calls of match() more strictly, in order to restrict
+   the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
+   used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
+   match(). To have any useful effect, it must be less than the value of
+   MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
    a runtime method for setting a different limit. */
-#define MATCH_LIMIT_RECURSION MATCH_LIMIT 
- 
-/* This limit is parameterized just in case anybody ever wants to change it. 
-   Care must be taken if it is increased, because it guards against integer 
-   overflow caused by enormously large patterns. */ 
-#define MAX_NAME_COUNT 10000 
- 
-/* This limit is parameterized just in case anybody ever wants to change it. 
-   Care must be taken if it is increased, because it guards against integer 
-   overflow caused by enormously large patterns. */ 
-#define MAX_NAME_SIZE 32 
- 
+#define MATCH_LIMIT_RECURSION MATCH_LIMIT
+
+/* This limit is parameterized just in case anybody ever wants to change it.
+   Care must be taken if it is increased, because it guards against integer
+   overflow caused by enormously large patterns. */
+#define MAX_NAME_COUNT 10000
+
+/* This limit is parameterized just in case anybody ever wants to change it.
+   Care must be taken if it is increased, because it guards against integer
+   overflow caused by enormously large patterns. */
+#define MAX_NAME_SIZE 32
+
 /* The value of NEWLINE determines the default newline character sequence.
    PCRE client programs can override this by selecting other values at run
    time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
@@ -200,38 +200,38 @@ sure both macros are undefined; an emulation function will then be used. */
    0x25) that are used as the NL line terminator that is equivalent to ASCII
    LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
    or -2 (ANYCRLF). */
-#define NEWLINE 10 
- 
-/* PCRE uses recursive function calls to handle backtracking while matching. 
-   This can sometimes be a problem on systems that have stacks of limited 
+#define NEWLINE 10
+
+/* PCRE uses recursive function calls to handle backtracking while matching.
+   This can sometimes be a problem on systems that have stacks of limited
    size. Define NO_RECURSE to any value to get a version that doesn't use
    recursion in the match() function; instead it creates its own stack by
    steam using pcre_recurse_malloc() to obtain memory from the heap. For more
    detail, see the comments and other stuff just above the match() function.
    */
-/* #undef NO_RECURSE */ 
- 
-/* Name of package */ 
-#define PACKAGE "pcre" 
- 
-/* Define to the address where bug reports for this package should be sent. */ 
-#define PACKAGE_BUGREPORT "" 
- 
-/* Define to the full name of this package. */ 
-#define PACKAGE_NAME "PCRE" 
- 
-/* Define to the full name and version of this package. */ 
+/* #undef NO_RECURSE */
+
+/* Name of package */
+#define PACKAGE "pcre"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "PCRE"
+
+/* Define to the full name and version of this package. */
 #define PACKAGE_STRING "PCRE 8.44"
- 
-/* Define to the one symbol short name of this package. */ 
-#define PACKAGE_TARNAME "pcre" 
- 
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "pcre"
+
 /* Define to the home page for this package. */
 #define PACKAGE_URL ""
 
-/* Define to the version of this package. */ 
+/* Define to the version of this package. */
 #define PACKAGE_VERSION "8.44"
- 
+
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
    stack that is used while compiling a pattern. */
@@ -277,27 +277,27 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value if linking statically (TODO: make nice with Libtool) */
 /* #undef PCRE_STATIC */
 
-/* When calling PCRE via the POSIX interface, additional working storage is 
-   required for holding the pointers to capturing substrings because PCRE 
-   requires three integers per substring, whereas the POSIX interface provides 
-   only two. If the number of expected substrings is small, the wrapper 
-   function uses space on the stack, because this is faster than using 
-   malloc() for each call. The threshold above which the stack is no longer 
+/* When calling PCRE via the POSIX interface, additional working storage is
+   required for holding the pointers to capturing substrings because PCRE
+   requires three integers per substring, whereas the POSIX interface provides
+   only two. If the number of expected substrings is small, the wrapper
+   function uses space on the stack, because this is faster than using
+   malloc() for each call. The threshold above which the stack is no longer
    used is defined by POSIX_MALLOC_THRESHOLD. */
-#define POSIX_MALLOC_THRESHOLD 10 
- 
+#define POSIX_MALLOC_THRESHOLD 10
+
 /* Define to necessary symbol if this constant uses a non-standard name on
    your system. */
 /* #undef PTHREAD_CREATE_JOINABLE */
 
-/* Define to 1 if you have the ANSI C header files. */ 
-#define STDC_HEADERS 1 
- 
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
 #ifdef ARCADIA_PCRE_ENABLE_JIT
 /* Define to any value to enable support for Just-In-Time compiling. */
 #define SUPPORT_JIT /**/
 #endif
- 
+
 /* Define to any value to allow pcregrep to be linked with libbz2, so that it
    is able to handle .bz2 files. */
 /* #undef SUPPORT_LIBBZ2 */
@@ -338,7 +338,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value for valgrind support to find invalid memory reads. */
 /* #undef SUPPORT_VALGRIND */
 
-/* Version number of package */ 
+/* Version number of package */
 #define VERSION "8.44"
 
 /* Define to empty if `const' does not conform to ANSI C. */
diff --git a/contrib/libs/pcre/pcre_dfa_exec.c b/contrib/libs/pcre/pcre_dfa_exec.c
index 649d1b19d9..81eec05356 100644
--- a/contrib/libs/pcre/pcre_dfa_exec.c
+++ b/contrib/libs/pcre/pcre_dfa_exec.c
@@ -1,49 +1,49 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language (but see
 below for why this module is different).
- 
-                       Written by Philip Hazel 
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2017 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
-/* This module contains the external function pcre_dfa_exec(), which is an 
-alternative matching function that uses a sort of DFA algorithm (not a true 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains the external function pcre_dfa_exec(), which is an
+alternative matching function that uses a sort of DFA algorithm (not a true
 FSM). This is NOT Perl-compatible, but it has advantages in certain
-applications. */ 
- 
- 
+applications. */
+
+
 /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
 the performance of his patterns greatly. I could not use it as it stood, as it
 was not thread safe, and made assumptions about pattern sizes. Also, it caused
@@ -72,61 +72,61 @@ in others, so I abandoned this code. */
 
 
 
-#ifdef HAVE_CONFIG_H 
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#define NLBLOCK md             /* Block containing newline information */ 
-#define PSSTART start_subject  /* Field containing processed string start */ 
-#define PSEND   end_subject    /* Field containing processed string end */ 
- 
-#include "pcre_internal.h" 
- 
- 
-/* For use to indent debugging output */ 
- 
-#define SP "                   " 
- 
- 
-/************************************************* 
-*      Code parameters and static tables         * 
-*************************************************/ 
- 
-/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes 
-into others, under special conditions. A gap of 20 between the blocks should be 
-enough. The resulting opcodes don't have to be less than 256 because they are 
-never stored, so we push them well clear of the normal opcodes. */ 
- 
-#define OP_PROP_EXTRA       300 
-#define OP_EXTUNI_EXTRA     320 
-#define OP_ANYNL_EXTRA      340 
-#define OP_HSPACE_EXTRA     360 
-#define OP_VSPACE_EXTRA     380 
- 
- 
-/* This table identifies those opcodes that are followed immediately by a 
+#endif
+
+#define NLBLOCK md             /* Block containing newline information */
+#define PSSTART start_subject  /* Field containing processed string start */
+#define PSEND   end_subject    /* Field containing processed string end */
+
+#include "pcre_internal.h"
+
+
+/* For use to indent debugging output */
+
+#define SP "                   "
+
+
+/*************************************************
+*      Code parameters and static tables         *
+*************************************************/
+
+/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
+into others, under special conditions. A gap of 20 between the blocks should be
+enough. The resulting opcodes don't have to be less than 256 because they are
+never stored, so we push them well clear of the normal opcodes. */
+
+#define OP_PROP_EXTRA       300
+#define OP_EXTUNI_EXTRA     320
+#define OP_ANYNL_EXTRA      340
+#define OP_HSPACE_EXTRA     360
+#define OP_VSPACE_EXTRA     380
+
+
+/* This table identifies those opcodes that are followed immediately by a
 character that is to be tested in some way. This makes it possible to
-centralize the loading of these characters. In the case of Type * etc, the 
-"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a 
+centralize the loading of these characters. In the case of Type * etc, the
+"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
 small value. Non-zero values in the table are the offsets from the opcode where
 the character is to be found. ***NOTE*** If the start of this table is
 modified, the three tables that follow must also be modified. */
- 
+
 static const pcre_uint8 coptable[] = {
-  0,                             /* End                                    */ 
-  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */ 
-  0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */ 
+  0,                             /* End                                    */
+  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
+  0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
   0, 0,                          /* \P, \p                                 */
-  0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */ 
+  0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
   0,                             /* \X                                     */
   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
-  1,                             /* Char                                   */ 
+  1,                             /* Char                                   */
   1,                             /* Chari                                  */
-  1,                             /* not                                    */ 
+  1,                             /* not                                    */
   1,                             /* noti                                   */
-  /* Positive single-char repeats                                          */ 
-  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ 
+  /* Positive single-char repeats                                          */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
   1+IMM2_SIZE,                   /* exact                                  */
   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
@@ -134,8 +134,8 @@ static const pcre_uint8 coptable[] = {
   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
   1+IMM2_SIZE,                   /* exact I                                */
   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
-  /* Negative single-char repeats - only for chars < 256                   */ 
-  1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */ 
+  /* Negative single-char repeats - only for chars < 256                   */
+  1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
   1+IMM2_SIZE,                   /* NOT exact                              */
   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
@@ -143,34 +143,34 @@ static const pcre_uint8 coptable[] = {
   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
   1+IMM2_SIZE,                   /* NOT exact I                            */
   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
-  /* Positive type repeats                                                 */ 
-  1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */ 
+  /* Positive type repeats                                                 */
+  1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
   1+IMM2_SIZE,                   /* Type exact                             */
   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
-  /* Character class & ref repeats                                         */ 
-  0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */ 
-  0, 0,                          /* CRRANGE, CRMINRANGE                    */ 
+  /* Character class & ref repeats                                         */
+  0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
+  0, 0,                          /* CRRANGE, CRMINRANGE                    */
   0, 0, 0, 0,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
-  0,                             /* CLASS                                  */ 
-  0,                             /* NCLASS                                 */ 
-  0,                             /* XCLASS - variable length               */ 
-  0,                             /* REF                                    */ 
+  0,                             /* CLASS                                  */
+  0,                             /* NCLASS                                 */
+  0,                             /* XCLASS - variable length               */
+  0,                             /* REF                                    */
   0,                             /* REFI                                   */
   0,                             /* DNREF                                  */
   0,                             /* DNREFI                                 */
-  0,                             /* RECURSE                                */ 
-  0,                             /* CALLOUT                                */ 
-  0,                             /* Alt                                    */ 
-  0,                             /* Ket                                    */ 
-  0,                             /* KetRmax                                */ 
-  0,                             /* KetRmin                                */ 
+  0,                             /* RECURSE                                */
+  0,                             /* CALLOUT                                */
+  0,                             /* Alt                                    */
+  0,                             /* Ket                                    */
+  0,                             /* KetRmax                                */
+  0,                             /* KetRmin                                */
   0,                             /* KetRpos                                */
   0,                             /* Reverse                                */
-  0,                             /* Assert                                 */ 
-  0,                             /* Assert not                             */ 
-  0,                             /* Assert behind                          */ 
-  0,                             /* Assert behind not                      */ 
+  0,                             /* Assert                                 */
+  0,                             /* Assert not                             */
+  0,                             /* Assert behind                          */
+  0,                             /* Assert behind not                      */
   0, 0,                          /* ONCE, ONCE_NC                          */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
@@ -238,7 +238,7 @@ static const pcre_uint8 poptable[] = {
   0,                             /* KetRmax                                */
   0,                             /* KetRmin                                */
   0,                             /* KetRpos                                */
-  0,                             /* Reverse                                */ 
+  0,                             /* Reverse                                */
   0,                             /* Assert                                 */
   0,                             /* Assert not                             */
   0,                             /* Assert behind                          */
@@ -248,516 +248,516 @@ static const pcre_uint8 poptable[] = {
   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
   0, 0,                          /* CREF, DNCREF                           */
   0, 0,                          /* RREF, DNRREF                           */
-  0,                             /* DEF                                    */ 
+  0,                             /* DEF                                    */
   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
   0, 0                           /* CLOSE, SKIPZERO                        */
-}; 
- 
-/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, 
-and \w */ 
- 
+};
+
+/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
+and \w */
+
 static const pcre_uint8 toptable1[] = {
-  0, 0, 0, 0, 0, 0, 
-  ctype_digit, ctype_digit, 
-  ctype_space, ctype_space, 
-  ctype_word,  ctype_word, 
+  0, 0, 0, 0, 0, 0,
+  ctype_digit, ctype_digit,
+  ctype_space, ctype_space,
+  ctype_word,  ctype_word,
   0, 0                            /* OP_ANY, OP_ALLANY */
-}; 
- 
+};
+
 static const pcre_uint8 toptable2[] = {
-  0, 0, 0, 0, 0, 0, 
-  ctype_digit, 0, 
-  ctype_space, 0, 
-  ctype_word,  0, 
+  0, 0, 0, 0, 0, 0,
+  ctype_digit, 0,
+  ctype_space, 0,
+  ctype_word,  0,
   1, 1                            /* OP_ANY, OP_ALLANY */
-}; 
- 
- 
-/* Structure for holding data about a particular state, which is in effect the 
-current data for an active path through the match tree. It must consist 
-entirely of ints because the working vector we are passed, and which we put 
-these structures in, is a vector of ints. */ 
- 
-typedef struct stateblock { 
-  int offset;                     /* Offset to opcode */ 
-  int count;                      /* Count for repeats */ 
-  int data;                       /* Some use extra data */ 
-} stateblock; 
- 
+};
+
+
+/* Structure for holding data about a particular state, which is in effect the
+current data for an active path through the match tree. It must consist
+entirely of ints because the working vector we are passed, and which we put
+these structures in, is a vector of ints. */
+
+typedef struct stateblock {
+  int offset;                     /* Offset to opcode */
+  int count;                      /* Count for repeats */
+  int data;                       /* Some use extra data */
+} stateblock;
+
 #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
- 
- 
+
+
 #ifdef PCRE_DEBUG
-/************************************************* 
-*             Print character string             * 
-*************************************************/ 
- 
-/* Character string printing function for debugging. 
- 
-Arguments: 
-  p            points to string 
-  length       number of bytes 
-  f            where to print 
- 
-Returns:       nothing 
-*/ 
- 
-static void 
+/*************************************************
+*             Print character string             *
+*************************************************/
+
+/* Character string printing function for debugging.
+
+Arguments:
+  p            points to string
+  length       number of bytes
+  f            where to print
+
+Returns:       nothing
+*/
+
+static void
 pchars(const pcre_uchar *p, int length, FILE *f)
-{ 
+{
 pcre_uint32 c;
-while (length-- > 0) 
-  { 
-  if (isprint(c = *(p++))) 
-    fprintf(f, "%c", c); 
-  else 
+while (length-- > 0)
+  {
+  if (isprint(c = *(p++)))
+    fprintf(f, "%c", c);
+  else
     fprintf(f, "\\x{%02x}", c);
-  } 
-} 
-#endif 
- 
- 
- 
-/************************************************* 
-*    Execute a Regular Expression - DFA engine   * 
-*************************************************/ 
- 
-/* This internal function applies a compiled pattern to a subject string, 
-starting at a given point, using a DFA engine. This function is called from the 
-external one, possibly multiple times if the pattern is not anchored. The 
-function calls itself recursively for some kinds of subpattern. 
- 
-Arguments: 
-  md                the match_data block with fixed information 
-  this_start_code   the opening bracket of this subexpression's code 
-  current_subject   where we currently are in the subject string 
-  start_offset      start offset in the subject string 
-  offsets           vector to contain the matching string offsets 
-  offsetcount       size of same 
-  workspace         vector of workspace 
-  wscount           size of same 
-  rlevel            function call recursion level 
- 
+  }
+}
+#endif
+
+
+
+/*************************************************
+*    Execute a Regular Expression - DFA engine   *
+*************************************************/
+
+/* This internal function applies a compiled pattern to a subject string,
+starting at a given point, using a DFA engine. This function is called from the
+external one, possibly multiple times if the pattern is not anchored. The
+function calls itself recursively for some kinds of subpattern.
+
+Arguments:
+  md                the match_data block with fixed information
+  this_start_code   the opening bracket of this subexpression's code
+  current_subject   where we currently are in the subject string
+  start_offset      start offset in the subject string
+  offsets           vector to contain the matching string offsets
+  offsetcount       size of same
+  workspace         vector of workspace
+  wscount           size of same
+  rlevel            function call recursion level
+
 Returns:            > 0 => number of match offset pairs placed in offsets
                     = 0 => offsets overflowed; longest matches are present
-                     -1 => failed to match 
-                   < -1 => some kind of unexpected problem 
- 
-The following macros are used for adding states to the two state vectors (one 
-for the current character, one for the following character). */ 
- 
-#define ADD_ACTIVE(x,y) \ 
-  if (active_count++ < wscount) \ 
-    { \ 
-    next_active_state->offset = (x); \ 
-    next_active_state->count  = (y); \ 
-    next_active_state++; \ 
-    DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ 
-    } \ 
-  else return PCRE_ERROR_DFA_WSSIZE 
- 
-#define ADD_ACTIVE_DATA(x,y,z) \ 
-  if (active_count++ < wscount) \ 
-    { \ 
-    next_active_state->offset = (x); \ 
-    next_active_state->count  = (y); \ 
-    next_active_state->data   = (z); \ 
-    next_active_state++; \ 
-    DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ 
-    } \ 
-  else return PCRE_ERROR_DFA_WSSIZE 
- 
-#define ADD_NEW(x,y) \ 
-  if (new_count++ < wscount) \ 
-    { \ 
-    next_new_state->offset = (x); \ 
-    next_new_state->count  = (y); \ 
-    next_new_state++; \ 
-    DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ 
-    } \ 
-  else return PCRE_ERROR_DFA_WSSIZE 
- 
-#define ADD_NEW_DATA(x,y,z) \ 
-  if (new_count++ < wscount) \ 
-    { \ 
-    next_new_state->offset = (x); \ 
-    next_new_state->count  = (y); \ 
-    next_new_state->data   = (z); \ 
-    next_new_state++; \ 
+                     -1 => failed to match
+                   < -1 => some kind of unexpected problem
+
+The following macros are used for adding states to the two state vectors (one
+for the current character, one for the following character). */
+
+#define ADD_ACTIVE(x,y) \
+  if (active_count++ < wscount) \
+    { \
+    next_active_state->offset = (x); \
+    next_active_state->count  = (y); \
+    next_active_state++; \
+    DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
+    } \
+  else return PCRE_ERROR_DFA_WSSIZE
+
+#define ADD_ACTIVE_DATA(x,y,z) \
+  if (active_count++ < wscount) \
+    { \
+    next_active_state->offset = (x); \
+    next_active_state->count  = (y); \
+    next_active_state->data   = (z); \
+    next_active_state++; \
+    DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
+    } \
+  else return PCRE_ERROR_DFA_WSSIZE
+
+#define ADD_NEW(x,y) \
+  if (new_count++ < wscount) \
+    { \
+    next_new_state->offset = (x); \
+    next_new_state->count  = (y); \
+    next_new_state++; \
+    DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
+    } \
+  else return PCRE_ERROR_DFA_WSSIZE
+
+#define ADD_NEW_DATA(x,y,z) \
+  if (new_count++ < wscount) \
+    { \
+    next_new_state->offset = (x); \
+    next_new_state->count  = (y); \
+    next_new_state->data   = (z); \
+    next_new_state++; \
     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
       (x), (y), (z), __LINE__)); \
-    } \ 
-  else return PCRE_ERROR_DFA_WSSIZE 
- 
-/* And now, here is the code */ 
- 
-static int 
-internal_dfa_exec( 
-  dfa_match_data *md, 
+    } \
+  else return PCRE_ERROR_DFA_WSSIZE
+
+/* And now, here is the code */
+
+static int
+internal_dfa_exec(
+  dfa_match_data *md,
   const pcre_uchar *this_start_code,
   const pcre_uchar *current_subject,
-  int start_offset, 
-  int *offsets, 
-  int offsetcount, 
-  int *workspace, 
-  int wscount, 
+  int start_offset,
+  int *offsets,
+  int offsetcount,
+  int *workspace,
+  int wscount,
   int  rlevel)
-{ 
-stateblock *active_states, *new_states, *temp_states; 
-stateblock *next_active_state, *next_new_state; 
- 
+{
+stateblock *active_states, *new_states, *temp_states;
+stateblock *next_active_state, *next_new_state;
+
 const pcre_uint8 *ctypes, *lcc, *fcc;
 const pcre_uchar *ptr;
 const pcre_uchar *end_code, *first_op;
- 
+
 dfa_recursion_info new_recursive;
 
-int active_count, new_count, match_count; 
- 
-/* Some fields in the md block are frequently referenced, so we load them into 
-independent variables in the hope that this will perform better. */ 
- 
+int active_count, new_count, match_count;
+
+/* Some fields in the md block are frequently referenced, so we load them into
+independent variables in the hope that this will perform better. */
+
 const pcre_uchar *start_subject = md->start_subject;
 const pcre_uchar *end_subject = md->end_subject;
 const pcre_uchar *start_code = md->start_code;
- 
+
 #ifdef SUPPORT_UTF
 BOOL utf = (md->poptions & PCRE_UTF8) != 0;
-#else 
+#else
 BOOL utf = FALSE;
-#endif 
- 
+#endif
+
 BOOL reset_could_continue = FALSE;
 
-rlevel++; 
-offsetcount &= (-2); 
- 
-wscount -= 2; 
-wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / 
-          (2 * INTS_PER_STATEBLOCK); 
- 
-DPRINTF(("\n%.*s---------------------\n" 
+rlevel++;
+offsetcount &= (-2);
+
+wscount -= 2;
+wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
+          (2 * INTS_PER_STATEBLOCK);
+
+DPRINTF(("\n%.*s---------------------\n"
   "%.*sCall to internal_dfa_exec f=%d\n",
   rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
- 
-ctypes = md->tables + ctypes_offset; 
-lcc = md->tables + lcc_offset; 
-fcc = md->tables + fcc_offset; 
- 
-match_count = PCRE_ERROR_NOMATCH;   /* A negative number */ 
- 
-active_states = (stateblock *)(workspace + 2); 
-next_new_state = new_states = active_states + wscount; 
-new_count = 0; 
- 
-first_op = this_start_code + 1 + LINK_SIZE + 
+
+ctypes = md->tables + ctypes_offset;
+lcc = md->tables + lcc_offset;
+fcc = md->tables + fcc_offset;
+
+match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
+
+active_states = (stateblock *)(workspace + 2);
+next_new_state = new_states = active_states + wscount;
+new_count = 0;
+
+first_op = this_start_code + 1 + LINK_SIZE +
   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
     ? IMM2_SIZE:0);
- 
-/* The first thing in any (sub) pattern is a bracket of some sort. Push all 
-the alternative states onto the list, and find out where the end is. This 
-makes is possible to use this function recursively, when we want to stop at a 
-matching internal ket rather than at the end. 
- 
-If the first opcode in the first alternative is OP_REVERSE, we are dealing with 
-a backward assertion. In that case, we have to find out the maximum amount to 
-move back, and set up each alternative appropriately. */ 
- 
-if (*first_op == OP_REVERSE) 
-  { 
-  int max_back = 0; 
-  int gone_back; 
- 
-  end_code = this_start_code; 
-  do 
-    { 
-    int back = GET(end_code, 2+LINK_SIZE); 
-    if (back > max_back) max_back = back; 
-    end_code += GET(end_code, 1); 
-    } 
-  while (*end_code == OP_ALT); 
- 
-  /* If we can't go back the amount required for the longest lookbehind 
-  pattern, go back as far as we can; some alternatives may still be viable. */ 
- 
+
+/* The first thing in any (sub) pattern is a bracket of some sort. Push all
+the alternative states onto the list, and find out where the end is. This
+makes is possible to use this function recursively, when we want to stop at a
+matching internal ket rather than at the end.
+
+If the first opcode in the first alternative is OP_REVERSE, we are dealing with
+a backward assertion. In that case, we have to find out the maximum amount to
+move back, and set up each alternative appropriately. */
+
+if (*first_op == OP_REVERSE)
+  {
+  int max_back = 0;
+  int gone_back;
+
+  end_code = this_start_code;
+  do
+    {
+    int back = GET(end_code, 2+LINK_SIZE);
+    if (back > max_back) max_back = back;
+    end_code += GET(end_code, 1);
+    }
+  while (*end_code == OP_ALT);
+
+  /* If we can't go back the amount required for the longest lookbehind
+  pattern, go back as far as we can; some alternatives may still be viable. */
+
 #ifdef SUPPORT_UTF
-  /* In character mode we have to step back character by character */ 
- 
+  /* In character mode we have to step back character by character */
+
   if (utf)
-    { 
-    for (gone_back = 0; gone_back < max_back; gone_back++) 
-      { 
-      if (current_subject <= start_subject) break; 
-      current_subject--; 
+    {
+    for (gone_back = 0; gone_back < max_back; gone_back++)
+      {
+      if (current_subject <= start_subject) break;
+      current_subject--;
       ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
-      } 
-    } 
-  else 
-#endif 
- 
-  /* In byte-mode we can do this quickly. */ 
- 
-    { 
-    gone_back = (current_subject - max_back < start_subject)? 
+      }
+    }
+  else
+#endif
+
+  /* In byte-mode we can do this quickly. */
+
+    {
+    gone_back = (current_subject - max_back < start_subject)?
       (int)(current_subject - start_subject) : max_back;
-    current_subject -= gone_back; 
-    } 
- 
+    current_subject -= gone_back;
+    }
+
   /* Save the earliest consulted character */
 
   if (current_subject < md->start_used_ptr)
     md->start_used_ptr = current_subject;
 
-  /* Now we can process the individual branches. */ 
- 
-  end_code = this_start_code; 
-  do 
-    { 
-    int back = GET(end_code, 2+LINK_SIZE); 
-    if (back <= gone_back) 
-      { 
+  /* Now we can process the individual branches. */
+
+  end_code = this_start_code;
+  do
+    {
+    int back = GET(end_code, 2+LINK_SIZE);
+    if (back <= gone_back)
+      {
       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
-      ADD_NEW_DATA(-bstate, 0, gone_back - back); 
-      } 
-    end_code += GET(end_code, 1); 
-    } 
-  while (*end_code == OP_ALT); 
- } 
- 
-/* This is the code for a "normal" subpattern (not a backward assertion). The 
-start of a whole pattern is always one of these. If we are at the top level, 
-we may be asked to restart matching from the same point that we reached for a 
-previous partial match. We still have to scan through the top-level branches to 
-find the end state. */ 
- 
-else 
-  { 
-  end_code = this_start_code; 
- 
-  /* Restarting */ 
- 
-  if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0) 
-    { 
-    do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); 
-    new_count = workspace[1]; 
-    if (!workspace[0]) 
-      memcpy(new_states, active_states, new_count * sizeof(stateblock)); 
-    } 
- 
-  /* Not restarting */ 
- 
-  else 
-    { 
-    int length = 1 + LINK_SIZE + 
+      ADD_NEW_DATA(-bstate, 0, gone_back - back);
+      }
+    end_code += GET(end_code, 1);
+    }
+  while (*end_code == OP_ALT);
+ }
+
+/* This is the code for a "normal" subpattern (not a backward assertion). The
+start of a whole pattern is always one of these. If we are at the top level,
+we may be asked to restart matching from the same point that we reached for a
+previous partial match. We still have to scan through the top-level branches to
+find the end state. */
+
+else
+  {
+  end_code = this_start_code;
+
+  /* Restarting */
+
+  if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
+    {
+    do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
+    new_count = workspace[1];
+    if (!workspace[0])
+      memcpy(new_states, active_states, new_count * sizeof(stateblock));
+    }
+
+  /* Not restarting */
+
+  else
+    {
+    int length = 1 + LINK_SIZE +
       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
         ? IMM2_SIZE:0);
-    do 
-      { 
+    do
+      {
       ADD_NEW((int)(end_code - start_code + length), 0);
-      end_code += GET(end_code, 1); 
-      length = 1 + LINK_SIZE; 
-      } 
-    while (*end_code == OP_ALT); 
-    } 
-  } 
- 
-workspace[0] = 0;    /* Bit indicating which vector is current */ 
- 
+      end_code += GET(end_code, 1);
+      length = 1 + LINK_SIZE;
+      }
+    while (*end_code == OP_ALT);
+    }
+  }
+
+workspace[0] = 0;    /* Bit indicating which vector is current */
+
 DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
- 
-/* Loop for scanning the subject */ 
- 
-ptr = current_subject; 
-for (;;) 
-  { 
-  int i, j; 
-  int clen, dlen; 
+
+/* Loop for scanning the subject */
+
+ptr = current_subject;
+for (;;)
+  {
+  int i, j;
+  int clen, dlen;
   pcre_uint32 c, d;
   int forced_fail = 0;
   BOOL partial_newline = FALSE;
   BOOL could_continue = reset_could_continue;
   reset_could_continue = FALSE;
- 
-  /* Make the new state list into the active state list and empty the 
-  new state list. */ 
- 
-  temp_states = active_states; 
-  active_states = new_states; 
-  new_states = temp_states; 
-  active_count = new_count; 
-  new_count = 0; 
- 
-  workspace[0] ^= 1;              /* Remember for the restarting feature */ 
-  workspace[1] = active_count; 
- 
+
+  /* Make the new state list into the active state list and empty the
+  new state list. */
+
+  temp_states = active_states;
+  active_states = new_states;
+  new_states = temp_states;
+  active_count = new_count;
+  new_count = 0;
+
+  workspace[0] ^= 1;              /* Remember for the restarting feature */
+  workspace[1] = active_count;
+
 #ifdef PCRE_DEBUG
-  printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); 
+  printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
   pchars(ptr, STRLEN_UC(ptr), stdout);
-  printf("\"\n"); 
- 
-  printf("%.*sActive states: ", rlevel*2-2, SP); 
-  for (i = 0; i < active_count; i++) 
-    printf("%d/%d ", active_states[i].offset, active_states[i].count); 
-  printf("\n"); 
-#endif 
- 
-  /* Set the pointers for adding new states */ 
- 
-  next_active_state = active_states + active_count; 
-  next_new_state = new_states; 
- 
-  /* Load the current character from the subject outside the loop, as many 
-  different states may want to look at it, and we assume that at least one 
-  will. */ 
- 
-  if (ptr < end_subject) 
-    { 
+  printf("\"\n");
+
+  printf("%.*sActive states: ", rlevel*2-2, SP);
+  for (i = 0; i < active_count; i++)
+    printf("%d/%d ", active_states[i].offset, active_states[i].count);
+  printf("\n");
+#endif
+
+  /* Set the pointers for adding new states */
+
+  next_active_state = active_states + active_count;
+  next_new_state = new_states;
+
+  /* Load the current character from the subject outside the loop, as many
+  different states may want to look at it, and we assume that at least one
+  will. */
+
+  if (ptr < end_subject)
+    {
     clen = 1;        /* Number of data items in the character */
 #ifdef SUPPORT_UTF
     GETCHARLENTEST(c, ptr, clen);
 #else
-    c = *ptr; 
+    c = *ptr;
 #endif  /* SUPPORT_UTF */
-    } 
-  else 
-    { 
-    clen = 0;        /* This indicates the end of the subject */ 
-    c = NOTACHAR;    /* This value should never actually be used */ 
-    } 
- 
-  /* Scan up the active states and act on each one. The result of an action 
-  may be to add more states to the currently active list (e.g. on hitting a 
-  parenthesis) or it may be to put states on the new list, for considering 
-  when we move the character pointer on. */ 
- 
-  for (i = 0; i < active_count; i++) 
-    { 
-    stateblock *current_state = active_states + i; 
+    }
+  else
+    {
+    clen = 0;        /* This indicates the end of the subject */
+    c = NOTACHAR;    /* This value should never actually be used */
+    }
+
+  /* Scan up the active states and act on each one. The result of an action
+  may be to add more states to the currently active list (e.g. on hitting a
+  parenthesis) or it may be to put states on the new list, for considering
+  when we move the character pointer on. */
+
+  for (i = 0; i < active_count; i++)
+    {
+    stateblock *current_state = active_states + i;
     BOOL caseless = FALSE;
     const pcre_uchar *code;
-    int state_offset = current_state->offset; 
+    int state_offset = current_state->offset;
     int codevalue, rrc;
     int count;
- 
+
 #ifdef PCRE_DEBUG
-    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); 
-    if (clen == 0) printf("EOL\n"); 
-      else if (c > 32 && c < 127) printf("'%c'\n", c); 
-        else printf("0x%02x\n", c); 
-#endif 
- 
-    /* A negative offset is a special case meaning "hold off going to this 
-    (negated) state until the number of characters in the data field have 
+    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
+    if (clen == 0) printf("EOL\n");
+      else if (c > 32 && c < 127) printf("'%c'\n", c);
+        else printf("0x%02x\n", c);
+#endif
+
+    /* A negative offset is a special case meaning "hold off going to this
+    (negated) state until the number of characters in the data field have
     been skipped". If the could_continue flag was passed over from a previous
     state, arrange for it to passed on. */
- 
-    if (state_offset < 0) 
-      { 
-      if (current_state->data > 0) 
-        { 
-        DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); 
-        ADD_NEW_DATA(state_offset, current_state->count, 
-          current_state->data - 1); 
+
+    if (state_offset < 0)
+      {
+      if (current_state->data > 0)
+        {
+        DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
+        ADD_NEW_DATA(state_offset, current_state->count,
+          current_state->data - 1);
         if (could_continue) reset_could_continue = TRUE;
-        continue; 
-        } 
-      else 
-        { 
-        current_state->offset = state_offset = -state_offset; 
-        } 
-      } 
- 
+        continue;
+        }
+      else
+        {
+        current_state->offset = state_offset = -state_offset;
+        }
+      }
+
     /* Check for a duplicate state with the same count, and skip if found.
     See the note at the head of this module about the possibility of improving
     performance here. */
- 
-    for (j = 0; j < i; j++) 
-      { 
-      if (active_states[j].offset == state_offset && 
-          active_states[j].count == current_state->count) 
-        { 
-        DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP)); 
-        goto NEXT_ACTIVE_STATE; 
-        } 
-      } 
- 
-    /* The state offset is the offset to the opcode */ 
- 
-    code = start_code + state_offset; 
-    codevalue = *code; 
- 
+
+    for (j = 0; j < i; j++)
+      {
+      if (active_states[j].offset == state_offset &&
+          active_states[j].count == current_state->count)
+        {
+        DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
+        goto NEXT_ACTIVE_STATE;
+        }
+      }
+
+    /* The state offset is the offset to the opcode */
+
+    code = start_code + state_offset;
+    codevalue = *code;
+
     /* If this opcode inspects a character, but we are at the end of the
     subject, remember the fact for use when testing for a partial match. */
 
     if (clen == 0 && poptable[codevalue] != 0)
       could_continue = TRUE;
 
-    /* If this opcode is followed by an inline character, load it. It is 
-    tempting to test for the presence of a subject character here, but that 
-    is wrong, because sometimes zero repetitions of the subject are 
-    permitted. 
- 
-    We also use this mechanism for opcodes such as OP_TYPEPLUS that take an 
+    /* If this opcode is followed by an inline character, load it. It is
+    tempting to test for the presence of a subject character here, but that
+    is wrong, because sometimes zero repetitions of the subject are
+    permitted.
+
+    We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
     argument that is not a data character - but is always one byte long because
     the values are small. We have to take special action to deal with  \P, \p,
     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
     these ones to new opcodes. */
- 
-    if (coptable[codevalue] > 0) 
-      { 
-      dlen = 1; 
+
+    if (coptable[codevalue] > 0)
+      {
+      dlen = 1;
 #ifdef SUPPORT_UTF
       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
 #endif  /* SUPPORT_UTF */
-      d = code[coptable[codevalue]]; 
-      if (codevalue >= OP_TYPESTAR) 
-        { 
-        switch(d) 
-          { 
-          case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM; 
-          case OP_NOTPROP: 
-          case OP_PROP: codevalue += OP_PROP_EXTRA; break; 
-          case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break; 
-          case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break; 
-          case OP_NOT_HSPACE: 
-          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; 
-          case OP_NOT_VSPACE: 
-          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; 
-          default: break; 
-          } 
-        } 
-      } 
-    else 
-      { 
-      dlen = 0;         /* Not strictly necessary, but compilers moan */ 
-      d = NOTACHAR;     /* if these variables are not set. */ 
-      } 
- 
- 
-    /* Now process the individual opcodes */ 
- 
-    switch (codevalue) 
-      { 
+      d = code[coptable[codevalue]];
+      if (codevalue >= OP_TYPESTAR)
+        {
+        switch(d)
+          {
+          case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
+          case OP_NOTPROP:
+          case OP_PROP: codevalue += OP_PROP_EXTRA; break;
+          case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
+          case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
+          case OP_NOT_HSPACE:
+          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
+          case OP_NOT_VSPACE:
+          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
+          default: break;
+          }
+        }
+      }
+    else
+      {
+      dlen = 0;         /* Not strictly necessary, but compilers moan */
+      d = NOTACHAR;     /* if these variables are not set. */
+      }
+
+
+    /* Now process the individual opcodes */
+
+    switch (codevalue)
+      {
 /* ========================================================================== */
       /* These cases are never obeyed. This is a fudge that causes a compile-
       time error if the vectors coptable or poptable, which are indexed by
       opcode, are not the correct length. It seems to be the only way to do
       such a check at compile time, as the sizeof() operator does not work
       in the C preprocessor. */
- 
+
       case OP_TABLE_LENGTH:
       case OP_TABLE_LENGTH +
         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
          (sizeof(poptable) == OP_TABLE_LENGTH)):
       break;
 
-/* ========================================================================== */ 
-      /* Reached a closing bracket. If not at the end of the pattern, carry 
+/* ========================================================================== */
+      /* Reached a closing bracket. If not at the end of the pattern, carry
       on with the next opcode. For repeating opcodes, also add the repeat
       state. Note that KETRPOS will always be encountered at the end of the
       subpattern, because the possessive subpattern repeats are always handled
@@ -766,27 +766,27 @@ for (;;)
       At the end of the (sub)pattern, unless we have an empty string and
       PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
       start of the subject, save the match data, shifting up all previous
-      matches so we always have the longest first. */ 
- 
-      case OP_KET: 
-      case OP_KETRMIN: 
-      case OP_KETRMAX: 
+      matches so we always have the longest first. */
+
+      case OP_KET:
+      case OP_KETRMIN:
+      case OP_KETRMAX:
       case OP_KETRPOS:
-      if (code != end_code) 
-        { 
-        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); 
-        if (codevalue != OP_KET) 
-          { 
-          ADD_ACTIVE(state_offset - GET(code, 1), 0); 
-          } 
-        } 
+      if (code != end_code)
+        {
+        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
+        if (codevalue != OP_KET)
+          {
+          ADD_ACTIVE(state_offset - GET(code, 1), 0);
+          }
+        }
       else
-        { 
+        {
         if (ptr > current_subject ||
             ((md->moptions & PCRE_NOTEMPTY) == 0 &&
               ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
                 current_subject > start_subject + md->start_offset)))
-          { 
+          {
           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
             else if (match_count > 0 && ++match_count * 2 > offsetcount)
               match_count = 0;
@@ -806,53 +806,53 @@ for (;;)
               match_count, rlevel*2-2, SP));
             return match_count;
             }
-          } 
-        } 
-      break; 
- 
-/* ========================================================================== */ 
-      /* These opcodes add to the current list of states without looking 
-      at the current character. */ 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_ALT: 
-      do { code += GET(code, 1); } while (*code == OP_ALT); 
+          }
+        }
+      break;
+
+/* ========================================================================== */
+      /* These opcodes add to the current list of states without looking
+      at the current character. */
+
+      /*-----------------------------------------------------------------*/
+      case OP_ALT:
+      do { code += GET(code, 1); } while (*code == OP_ALT);
       ADD_ACTIVE((int)(code - start_code), 0);
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_BRA: 
-      case OP_SBRA: 
-      do 
-        { 
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_BRA:
+      case OP_SBRA:
+      do
+        {
         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
-        code += GET(code, 1); 
-        } 
-      while (*code == OP_ALT); 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_CBRA: 
-      case OP_SCBRA: 
+        code += GET(code, 1);
+        }
+      while (*code == OP_ALT);
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_CBRA:
+      case OP_SCBRA:
       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
-      code += GET(code, 1); 
-      while (*code == OP_ALT) 
-        { 
+      code += GET(code, 1);
+      while (*code == OP_ALT)
+        {
         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
-        code += GET(code, 1); 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_BRAZERO: 
-      case OP_BRAMINZERO: 
-      ADD_ACTIVE(state_offset + 1, 0); 
-      code += 1 + GET(code, 2); 
-      while (*code == OP_ALT) code += GET(code, 1); 
+        code += GET(code, 1);
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_BRAZERO:
+      case OP_BRAMINZERO:
+      ADD_ACTIVE(state_offset + 1, 0);
+      code += 1 + GET(code, 2);
+      while (*code == OP_ALT) code += GET(code, 1);
       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_SKIPZERO:
       code += 1 + GET(code, 2);
       while (*code == OP_ALT) code += GET(code, 1);
@@ -860,19 +860,19 @@ for (;;)
       break;
 
       /*-----------------------------------------------------------------*/
-      case OP_CIRC: 
+      case OP_CIRC:
       if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
-        { ADD_ACTIVE(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+        { ADD_ACTIVE(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_CIRCM:
       if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
           (ptr != end_subject && WAS_NEWLINE(ptr)))
         { ADD_ACTIVE(state_offset + 1, 0); }
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_EOD:
       if (ptr >= end_subject)
         {
@@ -880,27 +880,27 @@ for (;;)
           could_continue = TRUE;
         else { ADD_ACTIVE(state_offset + 1, 0); }
         }
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_SOD: 
-      if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_SOM: 
-      if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); } 
-      break; 
- 
- 
-/* ========================================================================== */ 
-      /* These opcodes inspect the next subject character, and sometimes 
-      the previous one as well, but do not have an argument. The variable 
-      clen contains the length of the current character and is zero if we are 
-      at the end of the subject. */ 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_ANY: 
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_SOD:
+      if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_SOM:
+      if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
+      break;
+
+
+/* ========================================================================== */
+      /* These opcodes inspect the next subject character, and sometimes
+      the previous one as well, but do not have an argument. The variable
+      clen contains the length of the current character and is zero if we are
+      at the end of the subject. */
+
+      /*-----------------------------------------------------------------*/
+      case OP_ANY:
       if (clen > 0 && !IS_NEWLINE(ptr))
         {
         if (ptr + 1 >= md->end_subject &&
@@ -921,28 +921,28 @@ for (;;)
       /*-----------------------------------------------------------------*/
       case OP_ALLANY:
       if (clen > 0)
-        { ADD_NEW(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_EODN: 
+        { ADD_NEW(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_EODN:
       if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
         could_continue = TRUE;
       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
-        { ADD_ACTIVE(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_DOLL: 
-      if ((md->moptions & PCRE_NOTEOL) == 0) 
-        { 
+        { ADD_ACTIVE(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_DOLL:
+      if ((md->moptions & PCRE_NOTEOL) == 0)
+        {
         if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
           could_continue = TRUE;
         else if (clen == 0 ||
             ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
                (ptr == end_subject - md->nllen)
-            )) 
-          { ADD_ACTIVE(state_offset + 1, 0); } 
+            ))
+          { ADD_ACTIVE(state_offset + 1, 0); }
         else if (ptr + 1 >= md->end_subject &&
                  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
                  NLBLOCK->nltype == NLTYPE_FIXED &&
@@ -956,7 +956,7 @@ for (;;)
             }
           else could_continue = partial_newline = TRUE;
           }
-        } 
+        }
       break;
 
       /*-----------------------------------------------------------------*/
@@ -983,42 +983,42 @@ for (;;)
           }
         }
       else if (IS_NEWLINE(ptr))
-        { ADD_ACTIVE(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
- 
-      case OP_DIGIT: 
-      case OP_WHITESPACE: 
-      case OP_WORDCHAR: 
-      if (clen > 0 && c < 256 && 
-            ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0) 
-        { ADD_NEW(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_NOT_DIGIT: 
-      case OP_NOT_WHITESPACE: 
-      case OP_NOT_WORDCHAR: 
-      if (clen > 0 && (c >= 256 || 
-            ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)) 
-        { ADD_NEW(state_offset + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_WORD_BOUNDARY: 
-      case OP_NOT_WORD_BOUNDARY: 
-        { 
-        int left_word, right_word; 
- 
-        if (ptr > start_subject) 
-          { 
+        { ADD_ACTIVE(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+
+      case OP_DIGIT:
+      case OP_WHITESPACE:
+      case OP_WORDCHAR:
+      if (clen > 0 && c < 256 &&
+            ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
+        { ADD_NEW(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_NOT_DIGIT:
+      case OP_NOT_WHITESPACE:
+      case OP_NOT_WORDCHAR:
+      if (clen > 0 && (c >= 256 ||
+            ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
+        { ADD_NEW(state_offset + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_WORD_BOUNDARY:
+      case OP_NOT_WORD_BOUNDARY:
+        {
+        int left_word, right_word;
+
+        if (ptr > start_subject)
+          {
           const pcre_uchar *temp = ptr - 1;
           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
           if (utf) { BACKCHAR(temp); }
-#endif 
-          GETCHARTEST(d, temp); 
+#endif
+          GETCHARTEST(d, temp);
 #ifdef SUPPORT_UCP
           if ((md->poptions & PCRE_UCP) != 0)
             {
@@ -1030,10 +1030,10 @@ for (;;)
             }
           else
 #endif
-          left_word = d < 256 && (ctypes[d] & ctype_word) != 0; 
-          } 
+          left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
+          }
         else left_word = FALSE;
- 
+
         if (clen > 0)
           {
 #ifdef SUPPORT_UCP
@@ -1050,49 +1050,49 @@ for (;;)
           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
           }
         else right_word = FALSE;
- 
-        if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY)) 
-          { ADD_ACTIVE(state_offset + 1, 0); } 
-        } 
-      break; 
- 
- 
-      /*-----------------------------------------------------------------*/ 
-      /* Check the next character by Unicode property. We will get here only 
-      if the support is in the binary; otherwise a compile-time error occurs. 
-      */ 
- 
-#ifdef SUPPORT_UCP 
-      case OP_PROP: 
-      case OP_NOTPROP: 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
+
+        if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
+          { ADD_ACTIVE(state_offset + 1, 0); }
+        }
+      break;
+
+
+      /*-----------------------------------------------------------------*/
+      /* Check the next character by Unicode property. We will get here only
+      if the support is in the binary; otherwise a compile-time error occurs.
+      */
+
+#ifdef SUPPORT_UCP
+      case OP_PROP:
+      case OP_NOTPROP:
+      if (clen > 0)
+        {
+        BOOL OK;
         const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
-        switch(code[1]) 
-          { 
-          case PT_ANY: 
-          OK = TRUE; 
-          break; 
- 
-          case PT_LAMP: 
+        switch(code[1])
+          {
+          case PT_ANY:
+          OK = TRUE;
+          break;
+
+          case PT_LAMP:
           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
                prop->chartype == ucp_Lt;
-          break; 
- 
-          case PT_GC: 
+          break;
+
+          case PT_GC:
           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
-          break; 
- 
-          case PT_PC: 
+          break;
+
+          case PT_PC:
           OK = prop->chartype == code[2];
-          break; 
- 
-          case PT_SC: 
+          break;
+
+          case PT_SC:
           OK = prop->script == code[2];
-          break; 
- 
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1140,33 +1140,33 @@ for (;;)
                c >= 0xe000;
           break;
 
-          /* Should never occur, but keep compilers from grumbling. */ 
- 
-          default: 
-          OK = codevalue != OP_PROP; 
-          break; 
-          } 
- 
-        if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); } 
-        } 
-      break; 
-#endif 
- 
- 
- 
-/* ========================================================================== */ 
-      /* These opcodes likewise inspect the subject character, but have an 
-      argument that is not a data character. It is one of these opcodes: 
+          /* Should never occur, but keep compilers from grumbling. */
+
+          default:
+          OK = codevalue != OP_PROP;
+          break;
+          }
+
+        if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
+        }
+      break;
+#endif
+
+
+
+/* ========================================================================== */
+      /* These opcodes likewise inspect the subject character, but have an
+      argument that is not a data character. It is one of these opcodes:
       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
- 
-      case OP_TYPEPLUS: 
-      case OP_TYPEMINPLUS: 
-      case OP_TYPEPOSPLUS: 
-      count = current_state->count;  /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } 
-      if (clen > 0) 
-        { 
+
+      case OP_TYPEPLUS:
+      case OP_TYPEMINPLUS:
+      case OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
+      if (clen > 0)
+        {
         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
@@ -1176,28 +1176,28 @@ for (;;)
           could_continue = partial_newline = TRUE;
           }
         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
-            (c < 256 && 
+            (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
-              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) 
-          { 
-          if (count > 0 && codevalue == OP_TYPEPOSPLUS) 
-            { 
-            active_count--;            /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          count++; 
-          ADD_NEW(state_offset, count); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_TYPEQUERY: 
-      case OP_TYPEMINQUERY: 
-      case OP_TYPEPOSQUERY: 
-      ADD_ACTIVE(state_offset + 2, 0); 
-      if (clen > 0) 
-        { 
+              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
+          {
+          if (count > 0 && codevalue == OP_TYPEPOSPLUS)
+            {
+            active_count--;            /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW(state_offset, count);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_TYPEQUERY:
+      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSQUERY:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
@@ -1207,27 +1207,27 @@ for (;;)
           could_continue = partial_newline = TRUE;
           }
         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
-            (c < 256 && 
+            (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
-              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) 
-          { 
-          if (codevalue == OP_TYPEPOSQUERY) 
-            { 
-            active_count--;            /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          ADD_NEW(state_offset + 2, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_TYPESTAR: 
-      case OP_TYPEMINSTAR: 
-      case OP_TYPEPOSSTAR: 
-      ADD_ACTIVE(state_offset + 2, 0); 
-      if (clen > 0) 
-        { 
+              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
+          {
+          if (codevalue == OP_TYPEPOSQUERY)
+            {
+            active_count--;            /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW(state_offset + 2, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_TYPESTAR:
+      case OP_TYPEMINSTAR:
+      case OP_TYPEPOSSTAR:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
@@ -1237,25 +1237,25 @@ for (;;)
           could_continue = partial_newline = TRUE;
           }
         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
-            (c < 256 && 
+            (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
-              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) 
-          { 
-          if (codevalue == OP_TYPEPOSSTAR) 
-            { 
-            active_count--;            /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          ADD_NEW(state_offset, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_TYPEEXACT: 
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
+              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
+          {
+          if (codevalue == OP_TYPEPOSSTAR)
+            {
+            active_count--;            /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW(state_offset, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_TYPEEXACT:
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
@@ -1265,26 +1265,26 @@ for (;;)
           could_continue = partial_newline = TRUE;
           }
         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
-            (c < 256 && 
+            (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
-              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) 
-          { 
+              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
+          {
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
-          else 
-            { ADD_NEW(state_offset, count); } 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_TYPEUPTO: 
-      case OP_TYPEMINUPTO: 
-      case OP_TYPEPOSUPTO: 
+          else
+            { ADD_NEW(state_offset, count); }
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_TYPEUPTO:
+      case OP_TYPEMINUPTO:
+      case OP_TYPEPOSUPTO:
       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
         if (d == OP_ANY && ptr + 1 >= md->end_subject &&
             (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
@@ -1294,63 +1294,63 @@ for (;;)
           could_continue = partial_newline = TRUE;
           }
         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
-            (c < 256 && 
+            (c < 256 &&
               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
-              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) 
-          { 
-          if (codevalue == OP_TYPEPOSUPTO) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
+          {
+          if (codevalue == OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
-          else 
-            { ADD_NEW(state_offset, count); } 
-          } 
-        } 
-      break; 
- 
-/* ========================================================================== */ 
-      /* These are virtual opcodes that are used when something like 
-      OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its 
-      argument. It keeps the code above fast for the other cases. The argument 
-      is in the d variable. */ 
- 
-#ifdef SUPPORT_UCP 
-      case OP_PROP_EXTRA + OP_TYPEPLUS: 
-      case OP_PROP_EXTRA + OP_TYPEMINPLUS: 
-      case OP_PROP_EXTRA + OP_TYPEPOSPLUS: 
-      count = current_state->count;           /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); } 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
+          else
+            { ADD_NEW(state_offset, count); }
+          }
+        }
+      break;
+
+/* ========================================================================== */
+      /* These are virtual opcodes that are used when something like
+      OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
+      argument. It keeps the code above fast for the other cases. The argument
+      is in the d variable. */
+
+#ifdef SUPPORT_UCP
+      case OP_PROP_EXTRA + OP_TYPEPLUS:
+      case OP_PROP_EXTRA + OP_TYPEMINPLUS:
+      case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;           /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
+      if (clen > 0)
+        {
+        BOOL OK;
         const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
-        switch(code[2]) 
-          { 
-          case PT_ANY: 
-          OK = TRUE; 
-          break; 
- 
-          case PT_LAMP: 
+        switch(code[2])
+          {
+          case PT_ANY:
+          OK = TRUE;
+          break;
+
+          case PT_LAMP:
           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
             prop->chartype == ucp_Lt;
-          break; 
- 
-          case PT_GC: 
+          break;
+
+          case PT_GC:
           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
-          break; 
- 
-          case PT_PC: 
+          break;
+
+          case PT_PC:
           OK = prop->chartype == code[3];
-          break; 
- 
-          case PT_SC: 
+          break;
+
+          case PT_SC:
           OK = prop->script == code[3];
-          break; 
- 
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1398,211 +1398,211 @@ for (;;)
                c >= 0xe000;
           break;
 
-          /* Should never occur, but keep compilers from grumbling. */ 
- 
-          default: 
-          OK = codevalue != OP_PROP; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_PROP)) 
-          { 
-          if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          count++; 
-          ADD_NEW(state_offset, count); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_EXTUNI_EXTRA + OP_TYPEPLUS: 
-      case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS: 
-      case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: 
-      count = current_state->count;  /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } 
+          /* Should never occur, but keep compilers from grumbling. */
+
+          default:
+          OK = codevalue != OP_PROP;
+          break;
+          }
+
+        if (OK == (d == OP_PROP))
+          {
+          if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW(state_offset, count);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
+      case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
+      case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
       if (clen > 0)
-        { 
+        {
         int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
-        int ncount = 0; 
-        if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) 
-          { 
-          active_count--;           /* Remove non-match possibility */ 
-          next_active_state--; 
-          } 
+        int ncount = 0;
+        if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
+          {
+          active_count--;           /* Remove non-match possibility */
+          next_active_state--;
+          }
         lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject) 
-          { 
+        while (nptr < end_subject)
+          {
           dlen = 1;
           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
           rgb = UCD_GRAPHBREAK(d);
           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-          ncount++; 
+          ncount++;
           lgb = rgb;
           nptr += dlen;
-          } 
-        count++; 
-        ADD_NEW_DATA(-state_offset, count, ncount); 
-        } 
-      break; 
-#endif 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_ANYNL_EXTRA + OP_TYPEPLUS: 
-      case OP_ANYNL_EXTRA + OP_TYPEMINPLUS: 
-      case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS: 
-      count = current_state->count;  /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } 
-      if (clen > 0) 
-        { 
-        int ncount = 0; 
-        switch (c) 
-          { 
+          }
+        count++;
+        ADD_NEW_DATA(-state_offset, count, ncount);
+        }
+      break;
+#endif
+
+      /*-----------------------------------------------------------------*/
+      case OP_ANYNL_EXTRA + OP_TYPEPLUS:
+      case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
+      case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
+      if (clen > 0)
+        {
+        int ncount = 0;
+        switch (c)
+          {
           case CHAR_VT:
           case CHAR_FF:
           case CHAR_NEL:
 #ifndef EBCDIC
-          case 0x2028: 
-          case 0x2029: 
+          case 0x2028:
+          case 0x2029:
 #endif  /* Not EBCDIC */
-          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; 
-          goto ANYNL01; 
- 
+          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
+          goto ANYNL01;
+
           case CHAR_CR:
           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
-          /* Fall through */ 
- 
-          ANYNL01: 
+          /* Fall through */
+
+          ANYNL01:
           case CHAR_LF:
-          if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          count++; 
-          ADD_NEW_DATA(-state_offset, count, ncount); 
-          break; 
- 
-          default: 
-          break; 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_VSPACE_EXTRA + OP_TYPEPLUS: 
-      case OP_VSPACE_EXTRA + OP_TYPEMINPLUS: 
-      case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS: 
-      count = current_state->count;  /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
-        switch (c) 
-          { 
+          if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW_DATA(-state_offset, count, ncount);
+          break;
+
+          default:
+          break;
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_VSPACE_EXTRA + OP_TYPEPLUS:
+      case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
           VSPACE_CASES:
-          OK = TRUE; 
-          break; 
- 
-          default: 
-          OK = FALSE; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_VSPACE)) 
-          { 
-          if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          count++; 
-          ADD_NEW_DATA(-state_offset, count, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_HSPACE_EXTRA + OP_TYPEPLUS: 
-      case OP_HSPACE_EXTRA + OP_TYPEMINPLUS: 
-      case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS: 
-      count = current_state->count;  /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
-        switch (c) 
-          { 
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_VSPACE))
+          {
+          if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW_DATA(-state_offset, count, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE_EXTRA + OP_TYPEPLUS:
+      case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
           HSPACE_CASES:
-          OK = TRUE; 
-          break; 
- 
-          default: 
-          OK = FALSE; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_HSPACE)) 
-          { 
-          if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          count++; 
-          ADD_NEW_DATA(-state_offset, count, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-#ifdef SUPPORT_UCP 
-      case OP_PROP_EXTRA + OP_TYPEQUERY: 
-      case OP_PROP_EXTRA + OP_TYPEMINQUERY: 
-      case OP_PROP_EXTRA + OP_TYPEPOSQUERY: 
-      count = 4; 
-      goto QS1; 
- 
-      case OP_PROP_EXTRA + OP_TYPESTAR: 
-      case OP_PROP_EXTRA + OP_TYPEMINSTAR: 
-      case OP_PROP_EXTRA + OP_TYPEPOSSTAR: 
-      count = 0; 
- 
-      QS1: 
- 
-      ADD_ACTIVE(state_offset + 4, 0); 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_HSPACE))
+          {
+          if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW_DATA(-state_offset, count, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+#ifdef SUPPORT_UCP
+      case OP_PROP_EXTRA + OP_TYPEQUERY:
+      case OP_PROP_EXTRA + OP_TYPEMINQUERY:
+      case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
+      count = 4;
+      goto QS1;
+
+      case OP_PROP_EXTRA + OP_TYPESTAR:
+      case OP_PROP_EXTRA + OP_TYPEMINSTAR:
+      case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS1:
+
+      ADD_ACTIVE(state_offset + 4, 0);
+      if (clen > 0)
+        {
+        BOOL OK;
         const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
-        switch(code[2]) 
-          { 
-          case PT_ANY: 
-          OK = TRUE; 
-          break; 
- 
-          case PT_LAMP: 
+        switch(code[2])
+          {
+          case PT_ANY:
+          OK = TRUE;
+          break;
+
+          case PT_LAMP:
           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
             prop->chartype == ucp_Lt;
-          break; 
- 
-          case PT_GC: 
+          break;
+
+          case PT_GC:
           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
-          break; 
- 
-          case PT_PC: 
+          break;
+
+          case PT_PC:
           OK = prop->chartype == code[3];
-          break; 
- 
-          case PT_SC: 
+          break;
+
+          case PT_SC:
           OK = prop->script == code[3];
-          break; 
- 
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1650,236 +1650,236 @@ for (;;)
                c >= 0xe000;
           break;
 
-          /* Should never occur, but keep compilers from grumbling. */ 
- 
-          default: 
-          OK = codevalue != OP_PROP; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_PROP)) 
-          { 
-          if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR || 
-              codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          ADD_NEW(state_offset + count, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_EXTUNI_EXTRA + OP_TYPEQUERY: 
-      case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY: 
-      case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY: 
-      count = 2; 
-      goto QS2; 
- 
-      case OP_EXTUNI_EXTRA + OP_TYPESTAR: 
-      case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR: 
-      case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR: 
-      count = 0; 
- 
-      QS2: 
- 
-      ADD_ACTIVE(state_offset + 2, 0); 
+          /* Should never occur, but keep compilers from grumbling. */
+
+          default:
+          OK = codevalue != OP_PROP;
+          break;
+          }
+
+        if (OK == (d == OP_PROP))
+          {
+          if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
+              codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW(state_offset + count, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
+      case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
+      case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
+      count = 2;
+      goto QS2;
+
+      case OP_EXTUNI_EXTRA + OP_TYPESTAR:
+      case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
+      case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS2:
+
+      ADD_ACTIVE(state_offset + 2, 0);
       if (clen > 0)
-        { 
+        {
         int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
-        int ncount = 0; 
-        if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || 
-            codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) 
-          { 
-          active_count--;           /* Remove non-match possibility */ 
-          next_active_state--; 
-          } 
+        int ncount = 0;
+        if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
+            codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
+          {
+          active_count--;           /* Remove non-match possibility */
+          next_active_state--;
+          }
         lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject) 
-          { 
+        while (nptr < end_subject)
+          {
           dlen = 1;
           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
           rgb = UCD_GRAPHBREAK(d);
           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-          ncount++; 
+          ncount++;
           lgb = rgb;
           nptr += dlen;
-          } 
-        ADD_NEW_DATA(-(state_offset + count), 0, ncount); 
-        } 
-      break; 
-#endif 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_ANYNL_EXTRA + OP_TYPEQUERY: 
-      case OP_ANYNL_EXTRA + OP_TYPEMINQUERY: 
-      case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY: 
-      count = 2; 
-      goto QS3; 
- 
-      case OP_ANYNL_EXTRA + OP_TYPESTAR: 
-      case OP_ANYNL_EXTRA + OP_TYPEMINSTAR: 
-      case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR: 
-      count = 0; 
- 
-      QS3: 
-      ADD_ACTIVE(state_offset + 2, 0); 
-      if (clen > 0) 
-        { 
-        int ncount = 0; 
-        switch (c) 
-          { 
+          }
+        ADD_NEW_DATA(-(state_offset + count), 0, ncount);
+        }
+      break;
+#endif
+
+      /*-----------------------------------------------------------------*/
+      case OP_ANYNL_EXTRA + OP_TYPEQUERY:
+      case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
+      case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
+      count = 2;
+      goto QS3;
+
+      case OP_ANYNL_EXTRA + OP_TYPESTAR:
+      case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
+      case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS3:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
+        int ncount = 0;
+        switch (c)
+          {
           case CHAR_VT:
           case CHAR_FF:
           case CHAR_NEL:
 #ifndef EBCDIC
-          case 0x2028: 
-          case 0x2029: 
+          case 0x2028:
+          case 0x2029:
 #endif  /* Not EBCDIC */
-          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; 
-          goto ANYNL02; 
- 
+          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
+          goto ANYNL02;
+
           case CHAR_CR:
           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
-          /* Fall through */ 
- 
-          ANYNL02: 
+          /* Fall through */
+
+          ANYNL02:
           case CHAR_LF:
-          if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR || 
-              codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
+              codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
-          break; 
- 
-          default: 
-          break; 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_VSPACE_EXTRA + OP_TYPEQUERY: 
-      case OP_VSPACE_EXTRA + OP_TYPEMINQUERY: 
-      case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY: 
-      count = 2; 
-      goto QS4; 
- 
-      case OP_VSPACE_EXTRA + OP_TYPESTAR: 
-      case OP_VSPACE_EXTRA + OP_TYPEMINSTAR: 
-      case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR: 
-      count = 0; 
- 
-      QS4: 
-      ADD_ACTIVE(state_offset + 2, 0); 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
-        switch (c) 
-          { 
+          break;
+
+          default:
+          break;
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_VSPACE_EXTRA + OP_TYPEQUERY:
+      case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
+      count = 2;
+      goto QS4;
+
+      case OP_VSPACE_EXTRA + OP_TYPESTAR:
+      case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS4:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
           VSPACE_CASES:
-          OK = TRUE; 
-          break; 
- 
-          default: 
-          OK = FALSE; 
-          break; 
-          } 
-        if (OK == (d == OP_VSPACE)) 
-          { 
-          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR || 
-              codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+        if (OK == (d == OP_VSPACE))
+          {
+          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
+              codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_HSPACE_EXTRA + OP_TYPEQUERY: 
-      case OP_HSPACE_EXTRA + OP_TYPEMINQUERY: 
-      case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY: 
-      count = 2; 
-      goto QS5; 
- 
-      case OP_HSPACE_EXTRA + OP_TYPESTAR: 
-      case OP_HSPACE_EXTRA + OP_TYPEMINSTAR: 
-      case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR: 
-      count = 0; 
- 
-      QS5: 
-      ADD_ACTIVE(state_offset + 2, 0); 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
-        switch (c) 
-          { 
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE_EXTRA + OP_TYPEQUERY:
+      case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
+      count = 2;
+      goto QS5;
+
+      case OP_HSPACE_EXTRA + OP_TYPESTAR:
+      case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
+      count = 0;
+
+      QS5:
+      ADD_ACTIVE(state_offset + 2, 0);
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
           HSPACE_CASES:
-          OK = TRUE; 
-          break; 
- 
-          default: 
-          OK = FALSE; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_HSPACE)) 
-          { 
-          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR || 
-              codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_HSPACE))
+          {
+          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
+              codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-#ifdef SUPPORT_UCP 
-      case OP_PROP_EXTRA + OP_TYPEEXACT: 
-      case OP_PROP_EXTRA + OP_TYPEUPTO: 
-      case OP_PROP_EXTRA + OP_TYPEMINUPTO: 
-      case OP_PROP_EXTRA + OP_TYPEPOSUPTO: 
-      if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) 
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+#ifdef SUPPORT_UCP
+      case OP_PROP_EXTRA + OP_TYPEEXACT:
+      case OP_PROP_EXTRA + OP_TYPEUPTO:
+      case OP_PROP_EXTRA + OP_TYPEMINUPTO:
+      case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
+        BOOL OK;
         const pcre_uint32 *cp;
         const ucd_record * prop = GET_UCD(c);
         switch(code[1 + IMM2_SIZE + 1])
-          { 
-          case PT_ANY: 
-          OK = TRUE; 
-          break; 
- 
-          case PT_LAMP: 
+          {
+          case PT_ANY:
+          OK = TRUE;
+          break;
+
+          case PT_LAMP:
           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
             prop->chartype == ucp_Lt;
-          break; 
- 
-          case PT_GC: 
+          break;
+
+          case PT_GC:
           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
-          break; 
- 
-          case PT_PC: 
+          break;
+
+          case PT_PC:
           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
-          break; 
- 
-          case PT_SC: 
+          break;
+
+          case PT_SC:
           OK = prop->script == code[1 + IMM2_SIZE + 2];
-          break; 
- 
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1927,357 +1927,357 @@ for (;;)
                c >= 0xe000;
           break;
 
-          /* Should never occur, but keep compilers from grumbling. */ 
- 
-          default: 
-          OK = codevalue != OP_PROP; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_PROP)) 
-          { 
-          if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          /* Should never occur, but keep compilers from grumbling. */
+
+          default:
+          OK = codevalue != OP_PROP;
+          break;
+          }
+
+        if (OK == (d == OP_PROP))
+          {
+          if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
-          else 
-            { ADD_NEW(state_offset, count); } 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_EXTUNI_EXTRA + OP_TYPEEXACT: 
-      case OP_EXTUNI_EXTRA + OP_TYPEUPTO: 
-      case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: 
-      case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: 
-      if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) 
+          else
+            { ADD_NEW(state_offset, count); }
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
+      case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
+      case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
+      case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
-      count = current_state->count;  /* Number already matched */ 
+      count = current_state->count;  /* Number already matched */
       if (clen > 0)
-        { 
+        {
         int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
-        int ncount = 0; 
-        if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) 
-          { 
-          active_count--;           /* Remove non-match possibility */ 
-          next_active_state--; 
-          } 
+        int ncount = 0;
+        if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
+          {
+          active_count--;           /* Remove non-match possibility */
+          next_active_state--;
+          }
         lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject) 
-          { 
+        while (nptr < end_subject)
+          {
           dlen = 1;
           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
           rgb = UCD_GRAPHBREAK(d);
           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-          ncount++; 
+          ncount++;
           lgb = rgb;
           nptr += dlen;
-          } 
+          }
         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
         if (++count >= (int)GET2(code, 1))
           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
-        else 
-          { ADD_NEW_DATA(-state_offset, count, ncount); } 
-        } 
-      break; 
-#endif 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_ANYNL_EXTRA + OP_TYPEEXACT: 
-      case OP_ANYNL_EXTRA + OP_TYPEUPTO: 
-      case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: 
-      case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: 
-      if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) 
+        else
+          { ADD_NEW_DATA(-state_offset, count, ncount); }
+        }
+      break;
+#endif
+
+      /*-----------------------------------------------------------------*/
+      case OP_ANYNL_EXTRA + OP_TYPEEXACT:
+      case OP_ANYNL_EXTRA + OP_TYPEUPTO:
+      case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
+      case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
-        int ncount = 0; 
-        switch (c) 
-          { 
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
+        int ncount = 0;
+        switch (c)
+          {
           case CHAR_VT:
           case CHAR_FF:
           case CHAR_NEL:
 #ifndef EBCDIC
-          case 0x2028: 
-          case 0x2029: 
+          case 0x2028:
+          case 0x2029:
 #endif  /* Not EBCDIC */
-          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; 
-          goto ANYNL03; 
- 
+          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
+          goto ANYNL03;
+
           case CHAR_CR:
           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
-          /* Fall through */ 
- 
-          ANYNL03: 
+          /* Fall through */
+
+          ANYNL03:
           case CHAR_LF:
-          if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
-          else 
-            { ADD_NEW_DATA(-state_offset, count, ncount); } 
-          break; 
- 
-          default: 
-          break; 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_VSPACE_EXTRA + OP_TYPEEXACT: 
-      case OP_VSPACE_EXTRA + OP_TYPEUPTO: 
-      case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: 
-      case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: 
-      if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) 
+          else
+            { ADD_NEW_DATA(-state_offset, count, ncount); }
+          break;
+
+          default:
+          break;
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_VSPACE_EXTRA + OP_TYPEEXACT:
+      case OP_VSPACE_EXTRA + OP_TYPEUPTO:
+      case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
+      case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
-        switch (c) 
-          { 
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
           VSPACE_CASES:
-          OK = TRUE; 
-          break; 
- 
-          default: 
-          OK = FALSE; 
-          } 
- 
-        if (OK == (d == OP_VSPACE)) 
-          { 
-          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          }
+
+        if (OK == (d == OP_VSPACE))
+          {
+          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
-          else 
-            { ADD_NEW_DATA(-state_offset, count, 0); } 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_HSPACE_EXTRA + OP_TYPEEXACT: 
-      case OP_HSPACE_EXTRA + OP_TYPEUPTO: 
-      case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: 
-      case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: 
-      if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) 
+          else
+            { ADD_NEW_DATA(-state_offset, count, 0); }
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE_EXTRA + OP_TYPEEXACT:
+      case OP_HSPACE_EXTRA + OP_TYPEUPTO:
+      case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
+      case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
+      if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
-        BOOL OK; 
-        switch (c) 
-          { 
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
+        BOOL OK;
+        switch (c)
+          {
           HSPACE_CASES:
-          OK = TRUE; 
-          break; 
- 
-          default: 
-          OK = FALSE; 
-          break; 
-          } 
- 
-        if (OK == (d == OP_HSPACE)) 
-          { 
-          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) 
-            { 
-            active_count--;           /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          OK = TRUE;
+          break;
+
+          default:
+          OK = FALSE;
+          break;
+          }
+
+        if (OK == (d == OP_HSPACE))
+          {
+          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
+            {
+            active_count--;           /* Remove non-match possibility */
+            next_active_state--;
+            }
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
-          else 
-            { ADD_NEW_DATA(-state_offset, count, 0); } 
-          } 
-        } 
-      break; 
- 
-/* ========================================================================== */ 
-      /* These opcodes are followed by a character that is usually compared 
-      to the current subject character; it is loaded into d. We still get 
-      here even if there is no subject character, because in some cases zero 
-      repetitions are permitted. */ 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_CHAR: 
-      if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+          else
+            { ADD_NEW_DATA(-state_offset, count, 0); }
+          }
+        }
+      break;
+
+/* ========================================================================== */
+      /* These opcodes are followed by a character that is usually compared
+      to the current subject character; it is loaded into d. We still get
+      here even if there is no subject character, because in some cases zero
+      repetitions are permitted. */
+
+      /*-----------------------------------------------------------------*/
+      case OP_CHAR:
+      if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_CHARI:
-      if (clen == 0) break; 
- 
+      if (clen == 0) break;
+
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
-        if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else 
-          { 
-          unsigned int othercase; 
+        {
+        if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
+          {
+          unsigned int othercase;
           if (c < 128)
             othercase = fcc[c];
           else
             /* If we have Unicode property support, we can use it to test the
             other case of the character. */
-#ifdef SUPPORT_UCP 
+#ifdef SUPPORT_UCP
             othercase = UCD_OTHERCASE(c);
-#else 
+#else
             othercase = NOTACHAR;
-#endif 
- 
-          if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } 
-          } 
-        } 
-      else 
+#endif
+
+          if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
+          }
+        }
+      else
 #endif  /* SUPPORT_UTF */
       /* Not UTF mode */
-        { 
+        {
         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
           { ADD_NEW(state_offset + 2, 0); }
-        } 
-      break; 
- 
- 
-#ifdef SUPPORT_UCP 
-      /*-----------------------------------------------------------------*/ 
-      /* This is a tricky one because it can match more than one character. 
-      Find out how many characters to skip, and then set up a negative state 
-      to wait for them to pass before continuing. */ 
- 
-      case OP_EXTUNI: 
+        }
+      break;
+
+
+#ifdef SUPPORT_UCP
+      /*-----------------------------------------------------------------*/
+      /* This is a tricky one because it can match more than one character.
+      Find out how many characters to skip, and then set up a negative state
+      to wait for them to pass before continuing. */
+
+      case OP_EXTUNI:
       if (clen > 0)
-        { 
+        {
         int lgb, rgb;
         const pcre_uchar *nptr = ptr + clen;
-        int ncount = 0; 
+        int ncount = 0;
         lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject) 
-          { 
+        while (nptr < end_subject)
+          {
           dlen = 1;
           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
           rgb = UCD_GRAPHBREAK(d);
           if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-          ncount++; 
+          ncount++;
           lgb = rgb;
           nptr += dlen;
-          } 
+          }
         if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
-        ADD_NEW_DATA(-(state_offset + 1), 0, ncount); 
-        } 
-      break; 
-#endif 
- 
-      /*-----------------------------------------------------------------*/ 
-      /* This is a tricky like EXTUNI because it too can match more than one 
-      character (when CR is followed by LF). In this case, set up a negative 
-      state to wait for one character to pass before continuing. */ 
- 
-      case OP_ANYNL: 
-      if (clen > 0) switch(c) 
-        { 
+        ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
+        }
+      break;
+#endif
+
+      /*-----------------------------------------------------------------*/
+      /* This is a tricky like EXTUNI because it too can match more than one
+      character (when CR is followed by LF). In this case, set up a negative
+      state to wait for one character to pass before continuing. */
+
+      case OP_ANYNL:
+      if (clen > 0) switch(c)
+        {
         case CHAR_VT:
         case CHAR_FF:
         case CHAR_NEL:
 #ifndef EBCDIC
-        case 0x2028: 
-        case 0x2029: 
+        case 0x2028:
+        case 0x2029:
 #endif  /* Not EBCDIC */
-        if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; 
- 
+        if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
+
         case CHAR_LF:
-        ADD_NEW(state_offset + 1, 0); 
-        break; 
- 
+        ADD_NEW(state_offset + 1, 0);
+        break;
+
         case CHAR_CR:
         if (ptr + 1 >= end_subject)
-          { 
+          {
           ADD_NEW(state_offset + 1, 0);
           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
           }
         else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
           {
-          ADD_NEW_DATA(-(state_offset + 1), 0, 1); 
-          } 
-        else 
-          { 
-          ADD_NEW(state_offset + 1, 0); 
-          } 
-        break; 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_NOT_VSPACE: 
-      if (clen > 0) switch(c) 
-        { 
+          ADD_NEW_DATA(-(state_offset + 1), 0, 1);
+          }
+        else
+          {
+          ADD_NEW(state_offset + 1, 0);
+          }
+        break;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_NOT_VSPACE:
+      if (clen > 0) switch(c)
+        {
         VSPACE_CASES:
-        break; 
- 
-        default: 
-        ADD_NEW(state_offset + 1, 0); 
-        break; 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_VSPACE: 
-      if (clen > 0) switch(c) 
-        { 
+        break;
+
+        default:
+        ADD_NEW(state_offset + 1, 0);
+        break;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_VSPACE:
+      if (clen > 0) switch(c)
+        {
         VSPACE_CASES:
-        ADD_NEW(state_offset + 1, 0); 
-        break; 
- 
+        ADD_NEW(state_offset + 1, 0);
+        break;
+
         default:
         break;
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_NOT_HSPACE: 
-      if (clen > 0) switch(c) 
-        { 
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_NOT_HSPACE:
+      if (clen > 0) switch(c)
+        {
         HSPACE_CASES:
-        break; 
- 
-        default: 
-        ADD_NEW(state_offset + 1, 0); 
-        break; 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_HSPACE: 
-      if (clen > 0) switch(c) 
-        { 
+        break;
+
+        default:
+        ADD_NEW(state_offset + 1, 0);
+        break;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_HSPACE:
+      if (clen > 0) switch(c)
+        {
         HSPACE_CASES:
-        ADD_NEW(state_offset + 1, 0); 
-        break; 
+        ADD_NEW(state_offset + 1, 0);
+        break;
 
         default:
         break;
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       /* Match a negated single character casefully. */
- 
-      case OP_NOT: 
+
+      case OP_NOT:
       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
       break;
 
@@ -2285,8 +2285,8 @@ for (;;)
       /* Match a negated single character caselessly. */
 
       case OP_NOTI:
-      if (clen > 0) 
-        { 
+      if (clen > 0)
+        {
         pcre_uint32 otherd;
 #ifdef SUPPORT_UTF
         if (utf && d >= 128)
@@ -2302,10 +2302,10 @@ for (;;)
         otherd = TABLE_GET(d, fcc, d);
         if (c != d && c != otherd)
           { ADD_NEW(state_offset + dlen + 1, 0); }
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_PLUSI:
       case OP_MINPLUSI:
       case OP_POSPLUSI:
@@ -2316,45 +2316,45 @@ for (;;)
       codevalue -= OP_STARI - OP_STAR;
 
       /* Fall through */
-      case OP_PLUS: 
-      case OP_MINPLUS: 
-      case OP_POSPLUS: 
-      case OP_NOTPLUS: 
-      case OP_NOTMINPLUS: 
-      case OP_NOTPOSPLUS: 
-      count = current_state->count;  /* Already matched */ 
-      if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } 
-      if (clen > 0) 
-        { 
+      case OP_PLUS:
+      case OP_MINPLUS:
+      case OP_POSPLUS:
+      case OP_NOTPLUS:
+      case OP_NOTMINPLUS:
+      case OP_NOTPOSPLUS:
+      count = current_state->count;  /* Already matched */
+      if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
+      if (clen > 0)
+        {
         pcre_uint32 otherd = NOTACHAR;
         if (caseless)
-          { 
+          {
 #ifdef SUPPORT_UTF
           if (utf && d >= 128)
-            { 
-#ifdef SUPPORT_UCP 
+            {
+#ifdef SUPPORT_UCP
             otherd = UCD_OTHERCASE(d);
-#endif  /* SUPPORT_UCP */ 
-            } 
-          else 
+#endif  /* SUPPORT_UCP */
+            }
+          else
 #endif  /* SUPPORT_UTF */
           otherd = TABLE_GET(d, fcc, d);
-          } 
-        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) 
-          { 
-          if (count > 0 && 
-              (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS)) 
-            { 
-            active_count--;             /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          count++; 
-          ADD_NEW(state_offset, count); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+          }
+        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
+          {
+          if (count > 0 &&
+              (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
+            {
+            active_count--;             /* Remove non-match possibility */
+            next_active_state--;
+            }
+          count++;
+          ADD_NEW(state_offset, count);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_QUERYI:
       case OP_MINQUERYI:
       case OP_POSQUERYI:
@@ -2364,42 +2364,42 @@ for (;;)
       caseless = TRUE;
       codevalue -= OP_STARI - OP_STAR;
       /* Fall through */
-      case OP_QUERY: 
-      case OP_MINQUERY: 
-      case OP_POSQUERY: 
-      case OP_NOTQUERY: 
-      case OP_NOTMINQUERY: 
-      case OP_NOTPOSQUERY: 
-      ADD_ACTIVE(state_offset + dlen + 1, 0); 
-      if (clen > 0) 
-        { 
+      case OP_QUERY:
+      case OP_MINQUERY:
+      case OP_POSQUERY:
+      case OP_NOTQUERY:
+      case OP_NOTMINQUERY:
+      case OP_NOTPOSQUERY:
+      ADD_ACTIVE(state_offset + dlen + 1, 0);
+      if (clen > 0)
+        {
         pcre_uint32 otherd = NOTACHAR;
         if (caseless)
-          { 
+          {
 #ifdef SUPPORT_UTF
           if (utf && d >= 128)
-            { 
-#ifdef SUPPORT_UCP 
+            {
+#ifdef SUPPORT_UCP
             otherd = UCD_OTHERCASE(d);
-#endif  /* SUPPORT_UCP */ 
-            } 
-          else 
+#endif  /* SUPPORT_UCP */
+            }
+          else
 #endif  /* SUPPORT_UTF */
           otherd = TABLE_GET(d, fcc, d);
-          } 
-        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) 
-          { 
-          if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY) 
-            { 
-            active_count--;            /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          ADD_NEW(state_offset + dlen + 1, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+          }
+        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
+          {
+          if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
+            {
+            active_count--;            /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW(state_offset + dlen + 1, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_STARI:
       case OP_MINSTARI:
       case OP_POSSTARI:
@@ -2409,77 +2409,77 @@ for (;;)
       caseless = TRUE;
       codevalue -= OP_STARI - OP_STAR;
       /* Fall through */
-      case OP_STAR: 
-      case OP_MINSTAR: 
-      case OP_POSSTAR: 
-      case OP_NOTSTAR: 
-      case OP_NOTMINSTAR: 
-      case OP_NOTPOSSTAR: 
-      ADD_ACTIVE(state_offset + dlen + 1, 0); 
-      if (clen > 0) 
-        { 
+      case OP_STAR:
+      case OP_MINSTAR:
+      case OP_POSSTAR:
+      case OP_NOTSTAR:
+      case OP_NOTMINSTAR:
+      case OP_NOTPOSSTAR:
+      ADD_ACTIVE(state_offset + dlen + 1, 0);
+      if (clen > 0)
+        {
         pcre_uint32 otherd = NOTACHAR;
         if (caseless)
-          { 
+          {
 #ifdef SUPPORT_UTF
           if (utf && d >= 128)
-            { 
-#ifdef SUPPORT_UCP 
+            {
+#ifdef SUPPORT_UCP
             otherd = UCD_OTHERCASE(d);
-#endif  /* SUPPORT_UCP */ 
-            } 
-          else 
+#endif  /* SUPPORT_UCP */
+            }
+          else
 #endif  /* SUPPORT_UTF */
           otherd = TABLE_GET(d, fcc, d);
-          } 
-        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) 
-          { 
-          if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR) 
-            { 
-            active_count--;            /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
-          ADD_NEW(state_offset, 0); 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+          }
+        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
+          {
+          if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
+            {
+            active_count--;            /* Remove non-match possibility */
+            next_active_state--;
+            }
+          ADD_NEW(state_offset, 0);
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_EXACTI:
       case OP_NOTEXACTI:
       caseless = TRUE;
       codevalue -= OP_STARI - OP_STAR;
       /* Fall through */
-      case OP_EXACT: 
-      case OP_NOTEXACT: 
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
+      case OP_EXACT:
+      case OP_NOTEXACT:
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
         pcre_uint32 otherd = NOTACHAR;
         if (caseless)
-          { 
+          {
 #ifdef SUPPORT_UTF
           if (utf && d >= 128)
-            { 
-#ifdef SUPPORT_UCP 
+            {
+#ifdef SUPPORT_UCP
             otherd = UCD_OTHERCASE(d);
-#endif  /* SUPPORT_UCP */ 
-            } 
-          else 
+#endif  /* SUPPORT_UCP */
+            }
+          else
 #endif  /* SUPPORT_UTF */
           otherd = TABLE_GET(d, fcc, d);
-          } 
-        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) 
-          { 
+          }
+        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
+          {
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
-          else 
-            { ADD_NEW(state_offset, count); } 
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+          else
+            { ADD_NEW(state_offset, count); }
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_UPTOI:
       case OP_MINUPTOI:
       case OP_POSUPTOI:
@@ -2489,92 +2489,92 @@ for (;;)
       caseless = TRUE;
       codevalue -= OP_STARI - OP_STAR;
       /* Fall through */
-      case OP_UPTO: 
-      case OP_MINUPTO: 
-      case OP_POSUPTO: 
-      case OP_NOTUPTO: 
-      case OP_NOTMINUPTO: 
-      case OP_NOTPOSUPTO: 
+      case OP_UPTO:
+      case OP_MINUPTO:
+      case OP_POSUPTO:
+      case OP_NOTUPTO:
+      case OP_NOTMINUPTO:
+      case OP_NOTPOSUPTO:
       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
-      count = current_state->count;  /* Number already matched */ 
-      if (clen > 0) 
-        { 
+      count = current_state->count;  /* Number already matched */
+      if (clen > 0)
+        {
         pcre_uint32 otherd = NOTACHAR;
         if (caseless)
-          { 
+          {
 #ifdef SUPPORT_UTF
           if (utf && d >= 128)
-            { 
-#ifdef SUPPORT_UCP 
+            {
+#ifdef SUPPORT_UCP
             otherd = UCD_OTHERCASE(d);
-#endif  /* SUPPORT_UCP */ 
-            } 
-          else 
+#endif  /* SUPPORT_UCP */
+            }
+          else
 #endif  /* SUPPORT_UTF */
           otherd = TABLE_GET(d, fcc, d);
-          } 
-        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) 
-          { 
-          if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO) 
-            { 
-            active_count--;             /* Remove non-match possibility */ 
-            next_active_state--; 
-            } 
+          }
+        if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
+          {
+          if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
+            {
+            active_count--;             /* Remove non-match possibility */
+            next_active_state--;
+            }
           if (++count >= (int)GET2(code, 1))
             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
-          else 
-            { ADD_NEW(state_offset, count); } 
-          } 
-        } 
-      break; 
- 
- 
-/* ========================================================================== */ 
-      /* These are the class-handling opcodes */ 
- 
-      case OP_CLASS: 
-      case OP_NCLASS: 
-      case OP_XCLASS: 
-        { 
-        BOOL isinclass = FALSE; 
-        int next_state_offset; 
+          else
+            { ADD_NEW(state_offset, count); }
+          }
+        }
+      break;
+
+
+/* ========================================================================== */
+      /* These are the class-handling opcodes */
+
+      case OP_CLASS:
+      case OP_NCLASS:
+      case OP_XCLASS:
+        {
+        BOOL isinclass = FALSE;
+        int next_state_offset;
         const pcre_uchar *ecode;
- 
-        /* For a simple class, there is always just a 32-byte table, and we 
-        can set isinclass from it. */ 
- 
-        if (codevalue != OP_XCLASS) 
-          { 
+
+        /* For a simple class, there is always just a 32-byte table, and we
+        can set isinclass from it. */
+
+        if (codevalue != OP_XCLASS)
+          {
           ecode = code + 1 + (32 / sizeof(pcre_uchar));
-          if (clen > 0) 
-            { 
-            isinclass = (c > 255)? (codevalue == OP_NCLASS) : 
+          if (clen > 0)
+            {
+            isinclass = (c > 255)? (codevalue == OP_NCLASS) :
               ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
-            } 
-          } 
- 
-        /* An extended class may have a table or a list of single characters, 
-        ranges, or both, and it may be positive or negative. There's a 
-        function that sorts all this out. */ 
- 
-        else 
-         { 
-         ecode = code + GET(code, 1); 
+            }
+          }
+
+        /* An extended class may have a table or a list of single characters,
+        ranges, or both, and it may be positive or negative. There's a
+        function that sorts all this out. */
+
+        else
+         {
+         ecode = code + GET(code, 1);
          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
-         } 
- 
-        /* At this point, isinclass is set for all kinds of class, and ecode 
-        points to the byte after the end of the class. If there is a 
-        quantifier, this is where it will be. */ 
- 
+         }
+
+        /* At this point, isinclass is set for all kinds of class, and ecode
+        points to the byte after the end of the class. If there is a
+        quantifier, this is where it will be. */
+
         next_state_offset = (int)(ecode - start_code);
- 
-        switch (*ecode) 
-          { 
-          case OP_CRSTAR: 
-          case OP_CRMINSTAR: 
+
+        switch (*ecode)
+          {
+          case OP_CRSTAR:
+          case OP_CRMINSTAR:
           case OP_CRPOSSTAR:
-          ADD_ACTIVE(next_state_offset + 1, 0); 
+          ADD_ACTIVE(next_state_offset + 1, 0);
           if (isinclass)
             {
             if (*ecode == OP_CRPOSSTAR)
@@ -2584,13 +2584,13 @@ for (;;)
               }
             ADD_NEW(state_offset, 0);
             }
-          break; 
- 
-          case OP_CRPLUS: 
-          case OP_CRMINPLUS: 
+          break;
+
+          case OP_CRPLUS:
+          case OP_CRMINPLUS:
           case OP_CRPOSPLUS:
-          count = current_state->count;  /* Already matched */ 
-          if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); } 
+          count = current_state->count;  /* Already matched */
+          if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
           if (isinclass)
             {
             if (count > 0 && *ecode == OP_CRPOSPLUS)
@@ -2601,12 +2601,12 @@ for (;;)
             count++;
             ADD_NEW(state_offset, count);
             }
-          break; 
- 
-          case OP_CRQUERY: 
-          case OP_CRMINQUERY: 
+          break;
+
+          case OP_CRQUERY:
+          case OP_CRMINQUERY:
           case OP_CRPOSQUERY:
-          ADD_ACTIVE(next_state_offset + 1, 0); 
+          ADD_ACTIVE(next_state_offset + 1, 0);
           if (isinclass)
             {
             if (*ecode == OP_CRPOSQUERY)
@@ -2616,90 +2616,90 @@ for (;;)
               }
             ADD_NEW(next_state_offset + 1, 0);
             }
-          break; 
- 
-          case OP_CRRANGE: 
-          case OP_CRMINRANGE: 
+          break;
+
+          case OP_CRRANGE:
+          case OP_CRMINRANGE:
           case OP_CRPOSRANGE:
-          count = current_state->count;  /* Already matched */ 
+          count = current_state->count;  /* Already matched */
           if (count >= (int)GET2(ecode, 1))
             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
-          if (isinclass) 
-            { 
+          if (isinclass)
+            {
             int max = (int)GET2(ecode, 1 + IMM2_SIZE);
             if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
               {
               active_count--;           /* Remove non-match possibility */
               next_active_state--;
               }
-            if (++count >= max && max != 0)   /* Max 0 => no limit */ 
+            if (++count >= max && max != 0)   /* Max 0 => no limit */
               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
-            else 
-              { ADD_NEW(state_offset, count); } 
-            } 
-          break; 
- 
-          default: 
-          if (isinclass) { ADD_NEW(next_state_offset, 0); } 
-          break; 
-          } 
-        } 
-      break; 
- 
-/* ========================================================================== */ 
-      /* These are the opcodes for fancy brackets of various kinds. We have 
+            else
+              { ADD_NEW(state_offset, count); }
+            }
+          break;
+
+          default:
+          if (isinclass) { ADD_NEW(next_state_offset, 0); }
+          break;
+          }
+        }
+      break;
+
+/* ========================================================================== */
+      /* These are the opcodes for fancy brackets of various kinds. We have
       to use recursion in order to handle them. The "always failing" assertion
       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
       though the other "backtracking verbs" are not supported. */
- 
+
       case OP_FAIL:
       forced_fail++;    /* Count FAILs for multiple states */
       break;
 
-      case OP_ASSERT: 
-      case OP_ASSERT_NOT: 
-      case OP_ASSERTBACK: 
-      case OP_ASSERTBACK_NOT: 
-        { 
-        int rc; 
-        int local_offsets[2]; 
-        int local_workspace[1000]; 
+      case OP_ASSERT:
+      case OP_ASSERT_NOT:
+      case OP_ASSERTBACK:
+      case OP_ASSERTBACK_NOT:
+        {
+        int rc;
+        int local_offsets[2];
+        int local_workspace[1000];
         const pcre_uchar *endasscode = code + GET(code, 1);
- 
-        while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); 
- 
-        rc = internal_dfa_exec( 
-          md,                                   /* static match data */ 
-          code,                                 /* this subexpression's code */ 
-          ptr,                                  /* where we currently are */ 
+
+        while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
+
+        rc = internal_dfa_exec(
+          md,                                   /* static match data */
+          code,                                 /* this subexpression's code */
+          ptr,                                  /* where we currently are */
           (int)(ptr - start_subject),           /* start offset */
-          local_offsets,                        /* offset vector */ 
-          sizeof(local_offsets)/sizeof(int),    /* size of same */ 
-          local_workspace,                      /* workspace vector */ 
-          sizeof(local_workspace)/sizeof(int),  /* size of same */ 
+          local_offsets,                        /* offset vector */
+          sizeof(local_offsets)/sizeof(int),    /* size of same */
+          local_workspace,                      /* workspace vector */
+          sizeof(local_workspace)/sizeof(int),  /* size of same */
           rlevel);                              /* function recursion level */
- 
+
         if (rc == PCRE_ERROR_DFA_UITEM) return rc;
-        if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) 
+        if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_COND: 
-      case OP_SCOND: 
-        { 
-        int local_offsets[1000]; 
-        int local_workspace[1000]; 
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_COND:
+      case OP_SCOND:
+        {
+        int local_offsets[1000];
+        int local_workspace[1000];
         int codelink = GET(code, 1);
         int condcode;
- 
+
         /* Because of the way auto-callout works during compile, a callout item
         is inserted between OP_COND and an assertion condition. This does not
         happen for the other conditions. */
- 
+
         if (code[LINK_SIZE+1] == OP_CALLOUT)
-          { 
+          {
           rrc = 0;
           if (PUBL(callout) != NULL)
             {
@@ -2727,8 +2727,8 @@ for (;;)
             }
           if (rrc > 0) break;                      /* Fail this thread */
           code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
-          } 
- 
+          }
+
         condcode = code[LINK_SIZE+1];
 
         /* Back reference conditions and duplicate named recursion conditions
@@ -2744,63 +2744,63 @@ for (;;)
         if (condcode == OP_DEF || condcode == OP_FAIL)
           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
 
-        /* The only supported version of OP_RREF is for the value RREF_ANY, 
-        which means "test if in any recursion". We can't test for specifically 
-        recursed groups. */ 
- 
-        else if (condcode == OP_RREF) 
-          { 
+        /* The only supported version of OP_RREF is for the value RREF_ANY,
+        which means "test if in any recursion". We can't test for specifically
+        recursed groups. */
+
+        else if (condcode == OP_RREF)
+          {
           int value = GET2(code, LINK_SIZE + 2);
-          if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; 
+          if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
           if (md->recursive != NULL)
             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
-          } 
- 
-        /* Otherwise, the condition is an assertion */ 
- 
-        else 
-          { 
-          int rc; 
+          }
+
+        /* Otherwise, the condition is an assertion */
+
+        else
+          {
+          int rc;
           const pcre_uchar *asscode = code + LINK_SIZE + 1;
           const pcre_uchar *endasscode = asscode + GET(asscode, 1);
- 
-          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); 
- 
-          rc = internal_dfa_exec( 
-            md,                                   /* fixed match data */ 
-            asscode,                              /* this subexpression's code */ 
-            ptr,                                  /* where we currently are */ 
+
+          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
+
+          rc = internal_dfa_exec(
+            md,                                   /* fixed match data */
+            asscode,                              /* this subexpression's code */
+            ptr,                                  /* where we currently are */
             (int)(ptr - start_subject),           /* start offset */
-            local_offsets,                        /* offset vector */ 
-            sizeof(local_offsets)/sizeof(int),    /* size of same */ 
-            local_workspace,                      /* workspace vector */ 
-            sizeof(local_workspace)/sizeof(int),  /* size of same */ 
+            local_offsets,                        /* offset vector */
+            sizeof(local_offsets)/sizeof(int),    /* size of same */
+            local_workspace,                      /* workspace vector */
+            sizeof(local_workspace)/sizeof(int),  /* size of same */
             rlevel);                              /* function recursion level */
- 
+
           if (rc == PCRE_ERROR_DFA_UITEM) return rc;
-          if ((rc >= 0) == 
-                (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) 
+          if ((rc >= 0) ==
+                (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
-          else 
+          else
             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
-          } 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
-      case OP_RECURSE: 
-        { 
+          }
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
+      case OP_RECURSE:
+        {
         dfa_recursion_info *ri;
-        int local_offsets[1000]; 
-        int local_workspace[1000]; 
+        int local_offsets[1000];
+        int local_workspace[1000];
         const pcre_uchar *callpat = start_code + GET(code, 1);
         int recno = (callpat == md->start_code)? 0 :
           GET2(callpat, 1 + LINK_SIZE);
-        int rc; 
- 
+        int rc;
+
         DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
- 
+
         /* Check for repeating a recursion without advancing the subject
         pointer. This should catch convoluted mutual recursions. (Some simple
         cases are caught at compile time.) */
@@ -2817,35 +2817,35 @@ for (;;)
         new_recursive.prevrec = md->recursive;
         md->recursive = &new_recursive;
 
-        rc = internal_dfa_exec( 
-          md,                                   /* fixed match data */ 
+        rc = internal_dfa_exec(
+          md,                                   /* fixed match data */
           callpat,                              /* this subexpression's code */
-          ptr,                                  /* where we currently are */ 
+          ptr,                                  /* where we currently are */
           (int)(ptr - start_subject),           /* start offset */
-          local_offsets,                        /* offset vector */ 
-          sizeof(local_offsets)/sizeof(int),    /* size of same */ 
-          local_workspace,                      /* workspace vector */ 
-          sizeof(local_workspace)/sizeof(int),  /* size of same */ 
+          local_offsets,                        /* offset vector */
+          sizeof(local_offsets)/sizeof(int),    /* size of same */
+          local_workspace,                      /* workspace vector */
+          sizeof(local_workspace)/sizeof(int),  /* size of same */
           rlevel);                              /* function recursion level */
- 
+
         md->recursive = new_recursive.prevrec;  /* Done this recursion */
- 
+
         DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
           rc));
 
-        /* Ran out of internal offsets */ 
- 
-        if (rc == 0) return PCRE_ERROR_DFA_RECURSE; 
- 
-        /* For each successful matched substring, set up the next state with a 
-        count of characters to skip before trying it. Note that the count is in 
-        characters, not bytes. */ 
- 
-        if (rc > 0) 
-          { 
-          for (rc = rc*2 - 2; rc >= 0; rc -= 2) 
-            { 
-            int charcount = local_offsets[rc+1] - local_offsets[rc]; 
+        /* Ran out of internal offsets */
+
+        if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
+
+        /* For each successful matched substring, set up the next state with a
+        count of characters to skip before trying it. Note that the count is in
+        characters, not bytes. */
+
+        if (rc > 0)
+          {
+          for (rc = rc*2 - 2; rc >= 0; rc -= 2)
+            {
+            int charcount = local_offsets[rc+1] - local_offsets[rc];
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
             if (utf)
               {
@@ -2854,21 +2854,21 @@ for (;;)
               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
               }
 #endif
-            if (charcount > 0) 
-              { 
-              ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); 
-              } 
-            else 
-              { 
-              ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0); 
-              } 
-            } 
-          } 
-        else if (rc != PCRE_ERROR_NOMATCH) return rc; 
-        } 
-      break; 
- 
-      /*-----------------------------------------------------------------*/ 
+            if (charcount > 0)
+              {
+              ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
+              }
+            else
+              {
+              ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
+              }
+            }
+          }
+        else if (rc != PCRE_ERROR_NOMATCH) return rc;
+        }
+      break;
+
+      /*-----------------------------------------------------------------*/
       case OP_BRAPOS:
       case OP_SBRAPOS:
       case OP_CBRAPOS:
@@ -2960,78 +2960,78 @@ for (;;)
       break;
 
       /*-----------------------------------------------------------------*/
-      case OP_ONCE: 
+      case OP_ONCE:
       case OP_ONCE_NC:
-        { 
-        int local_offsets[2]; 
-        int local_workspace[1000]; 
- 
-        int rc = internal_dfa_exec( 
-          md,                                   /* fixed match data */ 
-          code,                                 /* this subexpression's code */ 
-          ptr,                                  /* where we currently are */ 
+        {
+        int local_offsets[2];
+        int local_workspace[1000];
+
+        int rc = internal_dfa_exec(
+          md,                                   /* fixed match data */
+          code,                                 /* this subexpression's code */
+          ptr,                                  /* where we currently are */
           (int)(ptr - start_subject),           /* start offset */
-          local_offsets,                        /* offset vector */ 
-          sizeof(local_offsets)/sizeof(int),    /* size of same */ 
-          local_workspace,                      /* workspace vector */ 
-          sizeof(local_workspace)/sizeof(int),  /* size of same */ 
+          local_offsets,                        /* offset vector */
+          sizeof(local_offsets)/sizeof(int),    /* size of same */
+          local_workspace,                      /* workspace vector */
+          sizeof(local_workspace)/sizeof(int),  /* size of same */
           rlevel);                              /* function recursion level */
- 
-        if (rc >= 0) 
-          { 
+
+        if (rc >= 0)
+          {
           const pcre_uchar *end_subpattern = code;
-          int charcount = local_offsets[1] - local_offsets[0]; 
-          int next_state_offset, repeat_state_offset; 
- 
-          do { end_subpattern += GET(end_subpattern, 1); } 
-            while (*end_subpattern == OP_ALT); 
+          int charcount = local_offsets[1] - local_offsets[0];
+          int next_state_offset, repeat_state_offset;
+
+          do { end_subpattern += GET(end_subpattern, 1); }
+            while (*end_subpattern == OP_ALT);
           next_state_offset =
             (int)(end_subpattern - start_code + LINK_SIZE + 1);
- 
-          /* If the end of this subpattern is KETRMAX or KETRMIN, we must 
-          arrange for the repeat state also to be added to the relevant list. 
-          Calculate the offset, or set -1 for no repeat. */ 
- 
-          repeat_state_offset = (*end_subpattern == OP_KETRMAX || 
-                                 *end_subpattern == OP_KETRMIN)? 
+
+          /* If the end of this subpattern is KETRMAX or KETRMIN, we must
+          arrange for the repeat state also to be added to the relevant list.
+          Calculate the offset, or set -1 for no repeat. */
+
+          repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
+                                 *end_subpattern == OP_KETRMIN)?
             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
- 
-          /* If we have matched an empty string, add the next state at the 
-          current character pointer. This is important so that the duplicate 
-          checking kicks in, which is what breaks infinite loops that match an 
-          empty string. */ 
- 
-          if (charcount == 0) 
-            { 
-            ADD_ACTIVE(next_state_offset, 0); 
-            } 
- 
-          /* Optimization: if there are no more active states, and there 
-          are no new states yet set up, then skip over the subject string 
-          right here, to save looping. Otherwise, set up the new state to swing 
+
+          /* If we have matched an empty string, add the next state at the
+          current character pointer. This is important so that the duplicate
+          checking kicks in, which is what breaks infinite loops that match an
+          empty string. */
+
+          if (charcount == 0)
+            {
+            ADD_ACTIVE(next_state_offset, 0);
+            }
+
+          /* Optimization: if there are no more active states, and there
+          are no new states yet set up, then skip over the subject string
+          right here, to save looping. Otherwise, set up the new state to swing
           into action when the end of the matched substring is reached. */
- 
-          else if (i + 1 >= active_count && new_count == 0) 
-            { 
-            ptr += charcount; 
-            clen = 0; 
-            ADD_NEW(next_state_offset, 0); 
- 
-            /* If we are adding a repeat state at the new character position, 
-            we must fudge things so that it is the only current state. 
-            Otherwise, it might be a duplicate of one we processed before, and 
-            that would cause it to be skipped. */ 
- 
-            if (repeat_state_offset >= 0) 
-              { 
-              next_active_state = active_states; 
-              active_count = 0; 
-              i = -1; 
-              ADD_ACTIVE(repeat_state_offset, 0); 
-              } 
-            } 
-          else 
-            { 
+
+          else if (i + 1 >= active_count && new_count == 0)
+            {
+            ptr += charcount;
+            clen = 0;
+            ADD_NEW(next_state_offset, 0);
+
+            /* If we are adding a repeat state at the new character position,
+            we must fudge things so that it is the only current state.
+            Otherwise, it might be a duplicate of one we processed before, and
+            that would cause it to be skipped. */
+
+            if (repeat_state_offset >= 0)
+              {
+              next_active_state = active_states;
+              active_count = 0;
+              i = -1;
+              ADD_ACTIVE(repeat_state_offset, 0);
+              }
+            }
+          else
+            {
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
             if (utf)
               {
@@ -3040,29 +3040,29 @@ for (;;)
               while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
               }
 #endif
-            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); 
-            if (repeat_state_offset >= 0) 
-              { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } 
-            } 
-          } 
-        else if (rc != PCRE_ERROR_NOMATCH) return rc; 
-        } 
-      break; 
- 
- 
-/* ========================================================================== */ 
-      /* Handle callouts */ 
- 
-      case OP_CALLOUT: 
+            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
+            if (repeat_state_offset >= 0)
+              { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
+            }
+          }
+        else if (rc != PCRE_ERROR_NOMATCH) return rc;
+        }
+      break;
+
+
+/* ========================================================================== */
+      /* Handle callouts */
+
+      case OP_CALLOUT:
       rrc = 0;
       if (PUBL(callout) != NULL)
-        { 
+        {
         PUBL(callout_block) cb;
-        cb.version          = 1;   /* Version 1 of the callout block */ 
-        cb.callout_number   = code[1]; 
-        cb.offset_vector    = offsets; 
+        cb.version          = 1;   /* Version 1 of the callout block */
+        cb.callout_number   = code[1];
+        cb.offset_vector    = offsets;
 #if defined COMPILE_PCRE8
-        cb.subject          = (PCRE_SPTR)start_subject; 
+        cb.subject          = (PCRE_SPTR)start_subject;
 #elif defined COMPILE_PCRE16
         cb.subject          = (PCRE_SPTR16)start_subject;
 #elif defined COMPILE_PCRE32
@@ -3071,33 +3071,33 @@ for (;;)
         cb.subject_length   = (int)(end_subject - start_subject);
         cb.start_match      = (int)(current_subject - start_subject);
         cb.current_position = (int)(ptr - start_subject);
-        cb.pattern_position = GET(code, 2); 
-        cb.next_item_length = GET(code, 2 + LINK_SIZE); 
-        cb.capture_top      = 1; 
-        cb.capture_last     = -1; 
-        cb.callout_data     = md->callout_data; 
+        cb.pattern_position = GET(code, 2);
+        cb.next_item_length = GET(code, 2 + LINK_SIZE);
+        cb.capture_top      = 1;
+        cb.capture_last     = -1;
+        cb.callout_data     = md->callout_data;
         cb.mark             = NULL;   /* No (*MARK) support */
         if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
-        } 
+        }
       if (rrc == 0)
         { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
-      break; 
- 
- 
-/* ========================================================================== */ 
-      default:        /* Unsupported opcode */ 
-      return PCRE_ERROR_DFA_UITEM; 
-      } 
- 
-    NEXT_ACTIVE_STATE: continue; 
- 
-    }      /* End of loop scanning active states */ 
- 
-  /* We have finished the processing at the current subject character. If no 
-  new states have been set for the next character, we have found all the 
-  matches that we are going to find. If we are at the top level and partial 
+      break;
+
+
+/* ========================================================================== */
+      default:        /* Unsupported opcode */
+      return PCRE_ERROR_DFA_UITEM;
+      }
+
+    NEXT_ACTIVE_STATE: continue;
+
+    }      /* End of loop scanning active states */
+
+  /* We have finished the processing at the current subject character. If no
+  new states have been set for the next character, we have found all the
+  matches that we are going to find. If we are at the top level and partial
   matching has been requested, check for appropriate conditions.
- 
+
   The "forced_ fail" variable counts the number of (*F) encountered for the
   character. If it is equal to the original active_count (saved in
   workspace[1]) it means that (*F) was found on every active state. In this
@@ -3106,8 +3106,8 @@ for (;;)
   The "could_continue" variable is true if a state could have continued but
   for the fact that the end of the subject was reached. */
 
-  if (new_count <= 0) 
-    { 
+  if (new_count <= 0)
+    {
     if (rlevel == 1 &&                               /* Top level, and */
         could_continue &&                            /* Some could go on, and */
         forced_fail != workspace[1] &&               /* Not all forced fail & */
@@ -3124,61 +3124,61 @@ for (;;)
           ptr > md->start_used_ptr)            /* Inspected non-empty string */
           )
         )
-      match_count = PCRE_ERROR_PARTIAL; 
-    DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" 
-      "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count, 
-      rlevel*2-2, SP)); 
-    break;        /* In effect, "return", but see the comment below */ 
-    } 
- 
-  /* One or more states are active for the next character. */ 
- 
-  ptr += clen;    /* Advance to next subject character */ 
-  }               /* Loop to move along the subject string */ 
- 
-/* Control gets here from "break" a few lines above. We do it this way because 
-if we use "return" above, we have compiler trouble. Some compilers warn if 
-there's nothing here because they think the function doesn't return a value. On 
-the other hand, if we put a dummy statement here, some more clever compilers 
-complain that it can't be reached. Sigh. */ 
- 
-return match_count; 
-} 
- 
- 
- 
- 
-/************************************************* 
-*    Execute a Regular Expression - DFA engine   * 
-*************************************************/ 
- 
-/* This external function applies a compiled re to a subject string using a DFA 
-engine. This function calls the internal function multiple times if the pattern 
-is not anchored. 
- 
-Arguments: 
-  argument_re     points to the compiled expression 
-  extra_data      points to extra data or is NULL 
-  subject         points to the subject string 
-  length          length of subject string (may contain binary zeros) 
-  start_offset    where to start in the subject string 
-  options         option bits 
-  offsets         vector of match offsets 
-  offsetcount     size of same 
-  workspace       workspace vector 
-  wscount         size of same 
- 
-Returns:          > 0 => number of match offset pairs placed in offsets 
-                  = 0 => offsets overflowed; longest matches are present 
-                   -1 => failed to match 
-                 < -1 => some kind of unexpected problem 
-*/ 
- 
+      match_count = PCRE_ERROR_PARTIAL;
+    DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
+      "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
+      rlevel*2-2, SP));
+    break;        /* In effect, "return", but see the comment below */
+    }
+
+  /* One or more states are active for the next character. */
+
+  ptr += clen;    /* Advance to next subject character */
+  }               /* Loop to move along the subject string */
+
+/* Control gets here from "break" a few lines above. We do it this way because
+if we use "return" above, we have compiler trouble. Some compilers warn if
+there's nothing here because they think the function doesn't return a value. On
+the other hand, if we put a dummy statement here, some more clever compilers
+complain that it can't be reached. Sigh. */
+
+return match_count;
+}
+
+
+
+
+/*************************************************
+*    Execute a Regular Expression - DFA engine   *
+*************************************************/
+
+/* This external function applies a compiled re to a subject string using a DFA
+engine. This function calls the internal function multiple times if the pattern
+is not anchored.
+
+Arguments:
+  argument_re     points to the compiled expression
+  extra_data      points to extra data or is NULL
+  subject         points to the subject string
+  length          length of subject string (may contain binary zeros)
+  start_offset    where to start in the subject string
+  options         option bits
+  offsets         vector of match offsets
+  offsetcount     size of same
+  workspace       workspace vector
+  wscount         size of same
+
+Returns:          > 0 => number of match offset pairs placed in offsets
+                  = 0 => offsets overflowed; longest matches are present
+                   -1 => failed to match
+                 < -1 => some kind of unexpected problem
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, 
-  const char *subject, int length, int start_offset, int options, int *offsets, 
-  int offsetcount, int *workspace, int wscount) 
+pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
+  const char *subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount, int *workspace, int wscount)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
@@ -3190,14 +3190,14 @@ pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
   int offsetcount, int *workspace, int wscount)
 #endif
-{ 
+{
 REAL_PCRE *re = (REAL_PCRE *)argument_re;
-dfa_match_data match_block; 
-dfa_match_data *md = &match_block; 
+dfa_match_data match_block;
+dfa_match_data *md = &match_block;
 BOOL utf, anchored, startline, firstline;
 const pcre_uchar *current_subject, *end_subject;
-const pcre_study_data *study = NULL; 
- 
+const pcre_study_data *study = NULL;
+
 const pcre_uchar *req_char_ptr;
 const pcre_uint8 *start_bits = NULL;
 BOOL has_first_char = FALSE;
@@ -3206,23 +3206,23 @@ pcre_uchar first_char = 0;
 pcre_uchar first_char2 = 0;
 pcre_uchar req_char = 0;
 pcre_uchar req_char2 = 0;
-int newline; 
- 
-/* Plausibility checks */ 
- 
-if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; 
-if (re == NULL || subject == NULL || workspace == NULL || 
-   (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; 
-if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; 
-if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; 
+int newline;
+
+/* Plausibility checks */
+
+if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
+if (re == NULL || subject == NULL || workspace == NULL ||
+   (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
+if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
+if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
 if (length < 0) return PCRE_ERROR_BADLENGTH;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
- 
+
 /* Check that the first field in the block is the magic number. If it is not,
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
 means that the pattern is likely compiled with different endianness. */
- 
+
 if (re->magic_number != MAGIC_NUMBER)
   return re->magic_number == REVERSED_MAGIC_NUMBER?
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
@@ -3240,116 +3240,116 @@ if ((options & PCRE_DFA_RESTART) != 0)
 
 /* Set up study, callout, and table data */
 
-md->tables = re->tables; 
-md->callout_data = NULL; 
- 
-if (extra_data != NULL) 
-  { 
+md->tables = re->tables;
+md->callout_data = NULL;
+
+if (extra_data != NULL)
+  {
   unsigned long int flags = extra_data->flags;
-  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) 
-    study = (const pcre_study_data *)extra_data->study_data; 
-  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT; 
-  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) 
-    return PCRE_ERROR_DFA_UMLIMIT; 
-  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) 
-    md->callout_data = extra_data->callout_data; 
-  if ((flags & PCRE_EXTRA_TABLES) != 0) 
-    md->tables = extra_data->tables; 
-  } 
- 
-/* Set some local values */ 
- 
+  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
+    study = (const pcre_study_data *)extra_data->study_data;
+  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
+  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
+    return PCRE_ERROR_DFA_UMLIMIT;
+  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
+    md->callout_data = extra_data->callout_data;
+  if ((flags & PCRE_EXTRA_TABLES) != 0)
+    md->tables = extra_data->tables;
+  }
+
+/* Set some local values */
+
 current_subject = (const pcre_uchar *)subject + start_offset;
 end_subject = (const pcre_uchar *)subject + length;
 req_char_ptr = current_subject - 1;
- 
+
 #ifdef SUPPORT_UTF
 /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
 utf = (re->options & PCRE_UTF8) != 0;
-#else 
+#else
 utf = FALSE;
-#endif 
- 
-anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || 
-  (re->options & PCRE_ANCHORED) != 0; 
- 
-/* The remaining fixed data for passing around. */ 
- 
+#endif
+
+anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
+  (re->options & PCRE_ANCHORED) != 0;
+
+/* The remaining fixed data for passing around. */
+
 md->start_code = (const pcre_uchar *)argument_re +
-    re->name_table_offset + re->name_count * re->name_entry_size; 
+    re->name_table_offset + re->name_count * re->name_entry_size;
 md->start_subject = (const pcre_uchar *)subject;
-md->end_subject = end_subject; 
+md->end_subject = end_subject;
 md->start_offset = start_offset;
-md->moptions = options; 
-md->poptions = re->options; 
- 
-/* If the BSR option is not set at match time, copy what was set 
-at compile time. */ 
- 
-if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0) 
-  { 
-  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) 
-    md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE); 
-#ifdef BSR_ANYCRLF 
-  else md->moptions |= PCRE_BSR_ANYCRLF; 
-#endif 
-  } 
- 
-/* Handle different types of newline. The three bits give eight cases. If 
-nothing is set at run time, whatever was used at compile time applies. */ 
- 
-switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & 
-         PCRE_NEWLINE_BITS) 
-  { 
-  case 0: newline = NEWLINE; break;   /* Compile-time default */ 
+md->moptions = options;
+md->poptions = re->options;
+
+/* If the BSR option is not set at match time, copy what was set
+at compile time. */
+
+if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
+  {
+  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
+    md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
+#ifdef BSR_ANYCRLF
+  else md->moptions |= PCRE_BSR_ANYCRLF;
+#endif
+  }
+
+/* Handle different types of newline. The three bits give eight cases. If
+nothing is set at run time, whatever was used at compile time applies. */
+
+switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
+         PCRE_NEWLINE_BITS)
+  {
+  case 0: newline = NEWLINE; break;   /* Compile-time default */
   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
-  case PCRE_NEWLINE_CR+ 
+  case PCRE_NEWLINE_CR+
        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
-  case PCRE_NEWLINE_ANY: newline = -1; break; 
-  case PCRE_NEWLINE_ANYCRLF: newline = -2; break; 
-  default: return PCRE_ERROR_BADNEWLINE; 
-  } 
- 
-if (newline == -2) 
-  { 
-  md->nltype = NLTYPE_ANYCRLF; 
-  } 
-else if (newline < 0) 
-  { 
-  md->nltype = NLTYPE_ANY; 
-  } 
-else 
-  { 
-  md->nltype = NLTYPE_FIXED; 
-  if (newline > 255) 
-    { 
-    md->nllen = 2; 
-    md->nl[0] = (newline >> 8) & 255; 
-    md->nl[1] = newline & 255; 
-    } 
-  else 
-    { 
-    md->nllen = 1; 
-    md->nl[0] = newline; 
-    } 
-  } 
- 
-/* Check a UTF-8 string if required. Unfortunately there's no way of passing 
-back the character offset. */ 
- 
+  case PCRE_NEWLINE_ANY: newline = -1; break;
+  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
+  default: return PCRE_ERROR_BADNEWLINE;
+  }
+
+if (newline == -2)
+  {
+  md->nltype = NLTYPE_ANYCRLF;
+  }
+else if (newline < 0)
+  {
+  md->nltype = NLTYPE_ANY;
+  }
+else
+  {
+  md->nltype = NLTYPE_FIXED;
+  if (newline > 255)
+    {
+    md->nllen = 2;
+    md->nl[0] = (newline >> 8) & 255;
+    md->nl[1] = newline & 255;
+    }
+  else
+    {
+    md->nllen = 1;
+    md->nl[0] = newline;
+    }
+  }
+
+/* Check a UTF-8 string if required. Unfortunately there's no way of passing
+back the character offset. */
+
 #ifdef SUPPORT_UTF
 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
-  { 
+  {
   int erroroffset;
   int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
   if (errorcode != 0)
-    { 
+    {
     if (offsetcount >= 2)
-      { 
+      {
       offsets[0] = erroroffset;
       offsets[1] = errorcode;
-      } 
+      }
 #if defined COMPILE_PCRE8
     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
@@ -3359,37 +3359,37 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
 #elif defined COMPILE_PCRE32
     return PCRE_ERROR_BADUTF32;
 #endif
-    } 
+    }
 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
   if (start_offset > 0 && start_offset < length &&
         NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
 #endif
-  } 
-#endif 
- 
-/* If the exec call supplied NULL for tables, use the inbuilt ones. This 
-is a feature that makes it possible to save compiled regex and re-use them 
-in other programs later. */ 
- 
+  }
+#endif
+
+/* If the exec call supplied NULL for tables, use the inbuilt ones. This
+is a feature that makes it possible to save compiled regex and re-use them
+in other programs later. */
+
 if (md->tables == NULL) md->tables = PRIV(default_tables);
- 
+
 /* The "must be at the start of a line" flags are used in a loop when finding
 where to start. */
- 
-startline = (re->flags & PCRE_STARTLINE) != 0; 
-firstline = (re->options & PCRE_FIRSTLINE) != 0; 
- 
-/* Set up the first character to match, if available. The first_byte value is 
-never set for an anchored regular expression, but the anchoring may be forced 
-at run time, so we have to test for anchoring. The first char may be unset for 
-an unanchored pattern, of course. If there's no first char and the pattern was 
-studied, there may be a bitmap of possible first characters. */ 
- 
-if (!anchored) 
-  { 
-  if ((re->flags & PCRE_FIRSTSET) != 0) 
-    { 
+
+startline = (re->flags & PCRE_STARTLINE) != 0;
+firstline = (re->options & PCRE_FIRSTLINE) != 0;
+
+/* Set up the first character to match, if available. The first_byte value is
+never set for an anchored regular expression, but the anchoring may be forced
+at run time, so we have to test for anchoring. The first char may be unset for
+an unanchored pattern, of course. If there's no first char and the pattern was
+studied, there may be a bitmap of possible first characters. */
+
+if (!anchored)
+  {
+  if ((re->flags & PCRE_FIRSTSET) != 0)
+    {
     has_first_char = TRUE;
     first_char = first_char2 = (pcre_uchar)(re->first_char);
     if ((re->flags & PCRE_FCH_CASELESS) != 0)
@@ -3400,20 +3400,20 @@ if (!anchored)
         first_char2 = UCD_OTHERCASE(first_char);
 #endif
       }
-    } 
-  else 
-    { 
+    }
+  else
+    {
     if (!startline && study != NULL &&
          (study->flags & PCRE_STUDY_MAPPED) != 0)
-      start_bits = study->start_bits; 
-    } 
-  } 
- 
-/* For anchored or unanchored matches, there may be a "last known required 
-character" set. */ 
- 
-if ((re->flags & PCRE_REQCHSET) != 0) 
-  { 
+      start_bits = study->start_bits;
+    }
+  }
+
+/* For anchored or unanchored matches, there may be a "last known required
+character" set. */
+
+if ((re->flags & PCRE_REQCHSET) != 0)
+  {
   has_req_char = TRUE;
   req_char = req_char2 = (pcre_uchar)(re->req_char);
   if ((re->flags & PCRE_RCH_CASELESS) != 0)
@@ -3424,27 +3424,27 @@ if ((re->flags & PCRE_REQCHSET) != 0)
       req_char2 = UCD_OTHERCASE(req_char);
 #endif
     }
-  } 
- 
-/* Call the main matching function, looping for a non-anchored regex after a 
+  }
+
+/* Call the main matching function, looping for a non-anchored regex after a
 failed match. If not restarting, perform certain optimizations at the start of
 a match. */
- 
-for (;;) 
-  { 
-  int rc; 
- 
-  if ((options & PCRE_DFA_RESTART) == 0) 
-    { 
+
+for (;;)
+  {
+  int rc;
+
+  if ((options & PCRE_DFA_RESTART) == 0)
+    {
     const pcre_uchar *save_end_subject = end_subject;
- 
+
     /* If firstline is TRUE, the start of the match is constrained to the first
     line of a multiline string. Implement this by temporarily adjusting
     end_subject so that we stop scanning at a newline. If the match fails at
     the newline, later code breaks this loop. */
- 
-    if (firstline) 
-      { 
+
+    if (firstline)
+      {
       PCRE_PUCHAR t = current_subject;
 #ifdef SUPPORT_UTF
       if (utf)
@@ -3457,10 +3457,10 @@ for (;;)
         }
       else
 #endif
-      while (t < md->end_subject && !IS_NEWLINE(t)) t++; 
-      end_subject = t; 
-      } 
- 
+      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
+      end_subject = t;
+      }
+
     /* There are some optimizations that avoid running the match if a known
     starting point is not found. However, there is an option that disables
     these, for testing and for ensuring that all callouts do actually occur.
@@ -3468,9 +3468,9 @@ for (;;)
     match-time options. */
 
     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
-      { 
+      {
       /* Advance to a known first pcre_uchar (i.e. data item) */
- 
+
       if (has_first_char)
         {
         if (first_char != first_char2)
@@ -3485,11 +3485,11 @@ for (;;)
                  UCHAR21TEST(current_subject) != first_char)
             current_subject++;
         }
- 
+
       /* Or to just after a linebreak for a multiline match if possible */
 
       else if (startline)
-        { 
+        {
         if (current_subject > md->start_subject + start_offset)
           {
 #ifdef SUPPORT_UTF
@@ -3507,23 +3507,23 @@ for (;;)
 #endif
           while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
             current_subject++;
- 
+
           /* If we have just passed a CR and the newline option is ANY or
           ANYCRLF, and we are now at a LF, advance the match position by one
           more character. */
- 
+
           if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                current_subject < end_subject &&
                UCHAR21TEST(current_subject) == CHAR_NL)
             current_subject++;
           }
-        } 
- 
+        }
+
       /* Advance to a non-unique first pcre_uchar after study */
- 
+
       else if (start_bits != NULL)
-        { 
+        {
         while (current_subject < end_subject)
           {
           register pcre_uint32 c = UCHAR21TEST(current_subject);
@@ -3533,17 +3533,17 @@ for (;;)
           if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
           current_subject++;
           }
-        } 
-      } 
- 
-    /* Restore fudged end_subject */ 
- 
-    end_subject = save_end_subject; 
- 
+        }
+      }
+
+    /* Restore fudged end_subject */
+
+    end_subject = save_end_subject;
+
     /* The following two optimizations are disabled for partial matching or if
     disabling is explicitly requested (and of course, by the test above, this
     code is not obeyed when restarting after a partial match). */
- 
+
     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
         (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
       {
@@ -3552,11 +3552,11 @@ for (;;)
       pattern. Although the value is, strictly, in characters, we treat it as
       in pcre_uchar units to avoid spending too much time in this optimization.
       */
- 
+
       if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
           (pcre_uint32)(end_subject - current_subject) < study->minlength)
         return PCRE_ERROR_NOMATCH;
- 
+
       /* If req_char is set, we know that that pcre_uchar must appear in the
       subject for the match to succeed. If the first pcre_uchar is set,
       req_char must be later in the subject; otherwise the test starts at the
@@ -3564,21 +3564,21 @@ for (;;)
       with nested unlimited repeats that aren't going to match. Writing
       separate code for cased/caseless versions makes it go faster, as does
       using an autoincrement and backing off on a match.
- 
+
       HOWEVER: when the subject string is very, very long, searching to its end
       can take a long time, and give bad performance on quite ordinary
       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
       string... so we don't do this when the string is sufficiently long. */
- 
+
       if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
-        { 
+        {
         register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
 
         /* We don't need to repeat the search if we haven't yet reached the
         place we found it at last time. */
 
         if (p > req_char_ptr)
-          { 
+          {
           if (req_char != req_char2)
             {
             while (p < end_subject)
@@ -3594,41 +3594,41 @@ for (;;)
               if (UCHAR21INCTEST(p) == req_char) { p--; break; }
               }
             }
- 
+
           /* If we can't find the required pcre_uchar, break the matching loop,
           which will cause a return or PCRE_ERROR_NOMATCH. */
- 
+
           if (p >= end_subject) break;
- 
+
           /* If we have found the required pcre_uchar, save the point where we
           found it, so that we don't search again next time round the loop if
           the start hasn't passed this point yet. */
- 
+
           req_char_ptr = p;
           }
         }
-      } 
+      }
     }   /* End of optimizations that are done when not restarting */
- 
-  /* OK, now we can do the business */ 
- 
+
+  /* OK, now we can do the business */
+
   md->start_used_ptr = current_subject;
   md->recursive = NULL;
 
-  rc = internal_dfa_exec( 
-    md,                                /* fixed match data */ 
-    md->start_code,                    /* this subexpression's code */ 
-    current_subject,                   /* where we currently are */ 
-    start_offset,                      /* start offset in subject */ 
-    offsets,                           /* offset vector */ 
-    offsetcount,                       /* size of same */ 
-    workspace,                         /* workspace vector */ 
-    wscount,                           /* size of same */ 
+  rc = internal_dfa_exec(
+    md,                                /* fixed match data */
+    md->start_code,                    /* this subexpression's code */
+    current_subject,                   /* where we currently are */
+    start_offset,                      /* start offset in subject */
+    offsets,                           /* offset vector */
+    offsetcount,                       /* size of same */
+    workspace,                         /* workspace vector */
+    wscount,                           /* size of same */
     0);                                /* function recurse level */
- 
-  /* Anything other than "no match" means we are done, always; otherwise, carry 
-  on only if not anchored. */ 
- 
+
+  /* Anything other than "no match" means we are done, always; otherwise, carry
+  on only if not anchored. */
+
   if (rc != PCRE_ERROR_NOMATCH || anchored)
     {
     if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
@@ -3640,37 +3640,37 @@ for (;;)
       }
     return rc;
     }
- 
-  /* Advance to the next subject character unless we are at the end of a line 
-  and firstline is set. */ 
- 
-  if (firstline && IS_NEWLINE(current_subject)) break; 
-  current_subject++; 
+
+  /* Advance to the next subject character unless we are at the end of a line
+  and firstline is set. */
+
+  if (firstline && IS_NEWLINE(current_subject)) break;
+  current_subject++;
 #ifdef SUPPORT_UTF
   if (utf)
-    { 
+    {
     ACROSSCHAR(current_subject < end_subject, *current_subject,
       current_subject++);
-    } 
+    }
 #endif
-  if (current_subject > end_subject) break; 
- 
-  /* If we have just passed a CR and we are now at a LF, and the pattern does 
-  not contain any explicit matches for \r or \n, and the newline option is CRLF 
-  or ANY or ANYCRLF, advance the match position by one more character. */ 
- 
+  if (current_subject > end_subject) break;
+
+  /* If we have just passed a CR and we are now at a LF, and the pattern does
+  not contain any explicit matches for \r or \n, and the newline option is CRLF
+  or ANY or ANYCRLF, advance the match position by one more character. */
+
   if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
-      current_subject < end_subject && 
+      current_subject < end_subject &&
       UCHAR21TEST(current_subject) == CHAR_NL &&
-      (re->flags & PCRE_HASCRORLF) == 0 && 
-        (md->nltype == NLTYPE_ANY || 
-         md->nltype == NLTYPE_ANYCRLF || 
-         md->nllen == 2)) 
-    current_subject++; 
- 
-  }   /* "Bumpalong" loop */ 
- 
-return PCRE_ERROR_NOMATCH; 
-} 
- 
-/* End of pcre_dfa_exec.c */ 
+      (re->flags & PCRE_HASCRORLF) == 0 &&
+        (md->nltype == NLTYPE_ANY ||
+         md->nltype == NLTYPE_ANYCRLF ||
+         md->nllen == 2))
+    current_subject++;
+
+  }   /* "Bumpalong" loop */
+
+return PCRE_ERROR_NOMATCH;
+}
+
+/* End of pcre_dfa_exec.c */
diff --git a/contrib/libs/pcre/pcre_exec.c b/contrib/libs/pcre/pcre_exec.c
index 9d023d74e9..4b5cb73fea 100644
--- a/contrib/libs/pcre/pcre_exec.c
+++ b/contrib/libs/pcre/pcre_exec.c
@@ -1,61 +1,61 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2018 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
-/* This module contains pcre_exec(), the externally visible function that does 
-pattern matching using an NFA algorithm, trying to mimic Perl as closely as 
-possible. There are also some static supporting functions. */ 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains pcre_exec(), the externally visible function that does
+pattern matching using an NFA algorithm, trying to mimic Perl as closely as
+possible. There are also some static supporting functions. */
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#define NLBLOCK md             /* Block containing newline information */ 
-#define PSSTART start_subject  /* Field containing processed string start */ 
-#define PSEND   end_subject    /* Field containing processed string end */ 
- 
-#include "pcre_internal.h" 
- 
-/* Undefine some potentially clashing cpp symbols */ 
- 
-#undef min 
-#undef max 
- 
+#endif
+
+#define NLBLOCK md             /* Block containing newline information */
+#define PSSTART start_subject  /* Field containing processed string start */
+#define PSEND   end_subject    /* Field containing processed string end */
+
+#include "pcre_internal.h"
+
+/* Undefine some potentially clashing cpp symbols */
+
+#undef min
+#undef max
+
 /* The md->capture_last field uses the lower 16 bits for the last captured
 substring (which can never be greater than 65535) and a bit in the top half
 to mean "capture vector overflowed". This odd way of doing things was
@@ -65,11 +65,11 @@ interface, and doing it this way saved on (a) another variable, which would
 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
 separate set of save/restore instructions. The following defines are used in
 implementing this. */
- 
+
 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
- 
+
 /* Values for setting in md->match_function_type to indicate two special types
 of call to match(). We do it this way to save on using another stack variable,
 as stack usage is to be discouraged. */
@@ -77,15 +77,15 @@ as stack usage is to be discouraged. */
 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
 
-/* Non-error returns from the match() function. Error returns are externally 
-defined PCRE_ERROR_xxx codes, which are all negative. */ 
- 
-#define MATCH_MATCH        1 
-#define MATCH_NOMATCH      0 
- 
-/* Special internal returns from the match() function. Make them sufficiently 
-negative to avoid the external error codes. */ 
- 
+/* Non-error returns from the match() function. Error returns are externally
+defined PCRE_ERROR_xxx codes, which are all negative. */
+
+#define MATCH_MATCH        1
+#define MATCH_NOMATCH      0
+
+/* Special internal returns from the match() function. Make them sufficiently
+negative to avoid the external error codes. */
+
 #define MATCH_ACCEPT       (-999)
 #define MATCH_KETRPOS      (-998)
 #define MATCH_ONCE         (-997)
@@ -98,103 +98,103 @@ for any one of them can use a range. */
 #define MATCH_THEN         (-992)
 #define MATCH_BACKTRACK_MAX MATCH_THEN
 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
- 
-/* Maximum number of ints of offset to save on the stack for recursive calls. 
-If the offset vector is bigger, malloc is used. This should be a multiple of 3, 
-because the offset vector is always a multiple of 3 long. */ 
- 
-#define REC_STACK_SAVE_MAX 30 
- 
-/* Min and max values for the common repeats; for the maxima, 0 => infinity */ 
- 
+
+/* Maximum number of ints of offset to save on the stack for recursive calls.
+If the offset vector is bigger, malloc is used. This should be a multiple of 3,
+because the offset vector is always a multiple of 3 long. */
+
+#define REC_STACK_SAVE_MAX 30
+
+/* Min and max values for the common repeats; for the maxima, 0 => infinity */
+
 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
- 
+
 #ifdef PCRE_DEBUG
-/************************************************* 
-*        Debugging function to print chars       * 
-*************************************************/ 
- 
-/* Print a sequence of chars in printable format, stopping at the end of the 
-subject if the requested. 
- 
-Arguments: 
-  p           points to characters 
-  length      number to print 
-  is_subject  TRUE if printing from within md->start_subject 
-  md          pointer to matching data block, if is_subject is TRUE 
- 
-Returns:     nothing 
-*/ 
- 
-static void 
+/*************************************************
+*        Debugging function to print chars       *
+*************************************************/
+
+/* Print a sequence of chars in printable format, stopping at the end of the
+subject if the requested.
+
+Arguments:
+  p           points to characters
+  length      number to print
+  is_subject  TRUE if printing from within md->start_subject
+  md          pointer to matching data block, if is_subject is TRUE
+
+Returns:     nothing
+*/
+
+static void
 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
-{ 
+{
 pcre_uint32 c;
 BOOL utf = md->utf;
-if (is_subject && length > md->end_subject - p) length = md->end_subject - p; 
-while (length-- > 0) 
+if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
+while (length-- > 0)
   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
-} 
-#endif 
- 
- 
- 
-/************************************************* 
-*          Match a back-reference                * 
-*************************************************/ 
- 
+}
+#endif
+
+
+
+/*************************************************
+*          Match a back-reference                *
+*************************************************/
+
 /* Normally, if a back reference hasn't been set, the length that is passed is
 negative, so the match always fails. However, in JavaScript compatibility mode,
 the length passed is zero. Note that in caseless UTF-8 mode, the number of
 subject bytes matched may be different to the number of reference bytes.
- 
-Arguments: 
-  offset      index into the offset vector 
+
+Arguments:
+  offset      index into the offset vector
   eptr        pointer into the subject
   length      length of reference to be matched (number of bytes)
-  md          points to match data block 
+  md          points to match data block
   caseless    TRUE if caseless
- 
+
 Returns:      >= 0 the number of subject bytes matched
               -1 no match
               -2 partial match; always given if at end subject
-*/ 
- 
+*/
+
 static int
 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
   BOOL caseless)
-{ 
+{
 PCRE_PUCHAR eptr_start = eptr;
 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
 #if defined SUPPORT_UTF && defined SUPPORT_UCP
 BOOL utf = md->utf;
 #endif
- 
+
 #ifdef PCRE_DEBUG
-if (eptr >= md->end_subject) 
-  printf("matching subject <null>"); 
-else 
-  { 
-  printf("matching subject "); 
-  pchars(eptr, length, TRUE, md); 
-  } 
-printf(" against backref "); 
-pchars(p, length, FALSE, md); 
-printf("\n"); 
-#endif 
- 
+if (eptr >= md->end_subject)
+  printf("matching subject <null>");
+else
+  {
+  printf("matching subject ");
+  pchars(eptr, length, TRUE, md);
+  }
+printf(" against backref ");
+pchars(p, length, FALSE, md);
+printf("\n");
+#endif
+
 /* Always fail if reference not set (and not JavaScript compatible - in that
 case the length is passed as zero). */
- 
+
 if (length < 0) return -1;
- 
+
 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
 properly if Unicode properties are supported. Otherwise, we can check only
 ASCII characters. */
- 
+
 if (caseless)
-  { 
+  {
 #if defined SUPPORT_UTF && defined SUPPORT_UCP
   if (utf)
     {
@@ -251,103 +251,103 @@ are in UTF-8 mode. */
 
 else
   {
-  while (length-- > 0) 
+  while (length-- > 0)
     {
     if (eptr >= md->end_subject) return -2;   /* Partial match */
     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
     }
-  } 
- 
+  }
+
 return (int)(eptr - eptr_start);
-} 
- 
- 
- 
-/*************************************************************************** 
-**************************************************************************** 
-                   RECURSION IN THE match() FUNCTION 
- 
-The match() function is highly recursive, though not every recursive call 
-increases the recursive depth. Nevertheless, some regular expressions can cause 
-it to recurse to a great depth. I was writing for Unix, so I just let it call 
-itself recursively. This uses the stack for saving everything that has to be 
-saved for a recursive call. On Unix, the stack can be large, and this works 
-fine. 
- 
-It turns out that on some non-Unix-like systems there are problems with 
-programs that use a lot of stack. (This despite the fact that every last chip 
-has oodles of memory these days, and techniques for extending the stack have 
-been known for decades.) So.... 
- 
-There is a fudge, triggered by defining NO_RECURSE, which avoids recursive 
-calls by keeping local variables that need to be preserved in blocks of memory 
-obtained from malloc() instead instead of on the stack. Macros are used to 
-achieve this so that the actual code doesn't look very different to what it 
-always used to. 
- 
-The original heap-recursive code used longjmp(). However, it seems that this 
-can be very slow on some operating systems. Following a suggestion from Stan 
-Switzer, the use of longjmp() has been abolished, at the cost of having to 
-provide a unique number for each call to RMATCH. There is no way of generating 
-a sequence of numbers at compile time in C. I have given them names, to make 
-them stand out more clearly. 
- 
-Crude tests on x86 Linux show a small speedup of around 5-8%. However, on 
-FreeBSD, avoiding longjmp() more than halves the time taken to run the standard 
-tests. Furthermore, not using longjmp() means that local dynamic variables 
-don't have indeterminate values; this has meant that the frame size can be 
-reduced because the result can be "passed back" by straight setting of the 
-variable instead of being passed in the frame. 
-**************************************************************************** 
-***************************************************************************/ 
- 
-/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN 
-below must be updated in sync.  */ 
- 
-enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10, 
-       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, 
-       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, 
-       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, 
-       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, 
+}
+
+
+
+/***************************************************************************
+****************************************************************************
+                   RECURSION IN THE match() FUNCTION
+
+The match() function is highly recursive, though not every recursive call
+increases the recursive depth. Nevertheless, some regular expressions can cause
+it to recurse to a great depth. I was writing for Unix, so I just let it call
+itself recursively. This uses the stack for saving everything that has to be
+saved for a recursive call. On Unix, the stack can be large, and this works
+fine.
+
+It turns out that on some non-Unix-like systems there are problems with
+programs that use a lot of stack. (This despite the fact that every last chip
+has oodles of memory these days, and techniques for extending the stack have
+been known for decades.) So....
+
+There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
+calls by keeping local variables that need to be preserved in blocks of memory
+obtained from malloc() instead instead of on the stack. Macros are used to
+achieve this so that the actual code doesn't look very different to what it
+always used to.
+
+The original heap-recursive code used longjmp(). However, it seems that this
+can be very slow on some operating systems. Following a suggestion from Stan
+Switzer, the use of longjmp() has been abolished, at the cost of having to
+provide a unique number for each call to RMATCH. There is no way of generating
+a sequence of numbers at compile time in C. I have given them names, to make
+them stand out more clearly.
+
+Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
+FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
+tests. Furthermore, not using longjmp() means that local dynamic variables
+don't have indeterminate values; this has meant that the frame size can be
+reduced because the result can be "passed back" by straight setting of the
+variable instead of being passed in the frame.
+****************************************************************************
+***************************************************************************/
+
+/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
+below must be updated in sync.  */
+
+enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
+       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
+       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
+       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
+       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
- 
-/* These versions of the macros use the stack, as normal. There are debugging 
-versions and production versions. Note that the "rw" argument of RMATCH isn't 
+
+/* These versions of the macros use the stack, as normal. There are debugging
+versions and production versions. Note that the "rw" argument of RMATCH isn't
 actually used in this definition. */
- 
-#ifndef NO_RECURSE 
-#define REGISTER register 
- 
+
+#ifndef NO_RECURSE
+#define REGISTER register
+
 #ifdef PCRE_DEBUG
 #define RMATCH(ra,rb,rc,rd,re,rw) \
-  { \ 
-  printf("match() called in line %d\n", __LINE__); \ 
+  { \
+  printf("match() called in line %d\n", __LINE__); \
   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
-  printf("to line %d\n", __LINE__); \ 
-  } 
-#define RRETURN(ra) \ 
-  { \ 
+  printf("to line %d\n", __LINE__); \
+  }
+#define RRETURN(ra) \
+  { \
   printf("match() returned %d from line %d\n", ra, __LINE__); \
-  return ra; \ 
-  } 
-#else 
+  return ra; \
+  }
+#else
 #define RMATCH(ra,rb,rc,rd,re,rw) \
   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
-#define RRETURN(ra) return ra 
-#endif 
- 
-#else 
- 
- 
-/* These versions of the macros manage a private stack on the heap. Note that 
-the "rd" argument of RMATCH isn't actually used in this definition. It's the md 
-argument of match(), which never changes. */ 
- 
-#define REGISTER 
- 
+#define RRETURN(ra) return ra
+#endif
+
+#else
+
+
+/* These versions of the macros manage a private stack on the heap. Note that
+the "rd" argument of RMATCH isn't actually used in this definition. It's the md
+argument of match(), which never changes. */
+
+#define REGISTER
+
 #define RMATCH(ra,rb,rc,rd,re,rw)\
-  {\ 
+  {\
   heapframe *newframe = frame->Xnextframe;\
   if (newframe == NULL)\
     {\
@@ -357,50 +357,50 @@ argument of match(), which never changes. */
     frame->Xnextframe = newframe;\
     }\
   frame->Xwhere = rw;\
-  newframe->Xeptr = ra;\ 
-  newframe->Xecode = rb;\ 
-  newframe->Xmstart = mstart;\ 
-  newframe->Xoffset_top = rc;\ 
+  newframe->Xeptr = ra;\
+  newframe->Xecode = rb;\
+  newframe->Xmstart = mstart;\
+  newframe->Xoffset_top = rc;\
   newframe->Xeptrb = re;\
-  newframe->Xrdepth = frame->Xrdepth + 1;\ 
-  newframe->Xprevframe = frame;\ 
-  frame = newframe;\ 
-  DPRINTF(("restarting from line %d\n", __LINE__));\ 
-  goto HEAP_RECURSE;\ 
-  L_##rw:\ 
-  DPRINTF(("jumped back to line %d\n", __LINE__));\ 
-  } 
- 
-#define RRETURN(ra)\ 
-  {\ 
+  newframe->Xrdepth = frame->Xrdepth + 1;\
+  newframe->Xprevframe = frame;\
+  frame = newframe;\
+  DPRINTF(("restarting from line %d\n", __LINE__));\
+  goto HEAP_RECURSE;\
+  L_##rw:\
+  DPRINTF(("jumped back to line %d\n", __LINE__));\
+  }
+
+#define RRETURN(ra)\
+  {\
   heapframe *oldframe = frame;\
   frame = oldframe->Xprevframe;\
-  if (frame != NULL)\ 
-    {\ 
-    rrc = ra;\ 
-    goto HEAP_RETURN;\ 
-    }\ 
-  return ra;\ 
-  } 
- 
- 
-/* Structure for remembering the local variables in a private frame */ 
- 
-typedef struct heapframe { 
-  struct heapframe *Xprevframe; 
+  if (frame != NULL)\
+    {\
+    rrc = ra;\
+    goto HEAP_RETURN;\
+    }\
+  return ra;\
+  }
+
+
+/* Structure for remembering the local variables in a private frame */
+
+typedef struct heapframe {
+  struct heapframe *Xprevframe;
   struct heapframe *Xnextframe;
- 
-  /* Function arguments that may change */ 
- 
+
+  /* Function arguments that may change */
+
   PCRE_PUCHAR Xeptr;
   const pcre_uchar *Xecode;
   PCRE_PUCHAR Xmstart;
-  int Xoffset_top; 
-  eptrblock *Xeptrb; 
-  unsigned int Xrdepth; 
- 
-  /* Function local variables */ 
- 
+  int Xoffset_top;
+  eptrblock *Xeptrb;
+  unsigned int Xrdepth;
+
+  /* Function local variables */
+
   PCRE_PUCHAR Xcallpat;
 #ifdef SUPPORT_UTF
   PCRE_PUCHAR Xcharptr;
@@ -410,59 +410,59 @@ typedef struct heapframe {
   PCRE_PUCHAR Xpp;
   PCRE_PUCHAR Xprev;
   PCRE_PUCHAR Xsaved_eptr;
- 
-  recursion_info Xnew_recursive; 
- 
-  BOOL Xcur_is_word; 
-  BOOL Xcondition; 
-  BOOL Xprev_is_word; 
- 
-#ifdef SUPPORT_UCP 
-  int Xprop_type; 
+
+  recursion_info Xnew_recursive;
+
+  BOOL Xcur_is_word;
+  BOOL Xcondition;
+  BOOL Xprev_is_word;
+
+#ifdef SUPPORT_UCP
+  int Xprop_type;
   unsigned int Xprop_value;
-  int Xprop_fail_result; 
-  int Xoclength; 
+  int Xprop_fail_result;
+  int Xoclength;
   pcre_uchar Xocchars[6];
-#endif 
- 
+#endif
+
   int Xcodelink;
-  int Xctype; 
-  unsigned int Xfc; 
-  int Xfi; 
-  int Xlength; 
-  int Xmax; 
-  int Xmin; 
+  int Xctype;
+  unsigned int Xfc;
+  int Xfi;
+  int Xlength;
+  int Xmax;
+  int Xmin;
   unsigned int Xnumber;
-  int Xoffset; 
+  int Xoffset;
   unsigned int Xop;
   pcre_int32 Xsave_capture_last;
-  int Xsave_offset1, Xsave_offset2, Xsave_offset3; 
-  int Xstacksave[REC_STACK_SAVE_MAX]; 
- 
-  eptrblock Xnewptrb; 
- 
-  /* Where to jump back to */ 
- 
-  int Xwhere; 
- 
-} heapframe; 
- 
-#endif 
- 
- 
-/*************************************************************************** 
-***************************************************************************/ 
- 
- 
- 
-/************************************************* 
-*         Match from current position            * 
-*************************************************/ 
- 
-/* This function is called recursively in many circumstances. Whenever it 
-returns a negative (error) response, the outer incarnation must also return the 
+  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
+  int Xstacksave[REC_STACK_SAVE_MAX];
+
+  eptrblock Xnewptrb;
+
+  /* Where to jump back to */
+
+  int Xwhere;
+
+} heapframe;
+
+#endif
+
+
+/***************************************************************************
+***************************************************************************/
+
+
+
+/*************************************************
+*         Match from current position            *
+*************************************************/
+
+/* This function is called recursively in many circumstances. Whenever it
+returns a negative (error) response, the outer incarnation must also return the
 same response. */
- 
+
 /* These macros pack up tests that are used for partial matching, and which
 appear several times in the code. We set the "hit end" flag if the pointer is
 at the end of the subject and also past the start of the subject (i.e.
@@ -488,29 +488,29 @@ the subject. */
 
 /* Performance note: It might be tempting to extract commonly used fields from
 the md structure (e.g. utf, end_subject) into individual variables to improve
-performance. Tests using gcc on a SPARC disproved this; in the first case, it 
-made performance worse. 
- 
-Arguments: 
-   eptr        pointer to current character in subject 
-   ecode       pointer to current position in compiled code 
-   mstart      pointer to the current match start position (can be modified 
-                 by encountering \K) 
-   offset_top  current top pointer 
-   md          pointer to "static" info for the match 
-   eptrb       pointer to chain of blocks containing eptr at start of 
-                 brackets - for testing for empty matches 
-   rdepth      the recursion depth 
- 
-Returns:       MATCH_MATCH if matched            )  these values are >= 0 
-               MATCH_NOMATCH if failed to match  ) 
+performance. Tests using gcc on a SPARC disproved this; in the first case, it
+made performance worse.
+
+Arguments:
+   eptr        pointer to current character in subject
+   ecode       pointer to current position in compiled code
+   mstart      pointer to the current match start position (can be modified
+                 by encountering \K)
+   offset_top  current top pointer
+   md          pointer to "static" info for the match
+   eptrb       pointer to chain of blocks containing eptr at start of
+                 brackets - for testing for empty matches
+   rdepth      the recursion depth
+
+Returns:       MATCH_MATCH if matched            )  these values are >= 0
+               MATCH_NOMATCH if failed to match  )
                a negative MATCH_xxx value for PRUNE, SKIP, etc
-               a negative PCRE_ERROR_xxx value if aborted by an error condition 
-                 (e.g. stopped by repeated call or recursion limit) 
-*/ 
- 
+               a negative PCRE_ERROR_xxx value if aborted by an error condition
+                 (e.g. stopped by repeated call or recursion limit)
+*/
+
 #ifdef __GNUC__
-static int 
+static int
 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
   unsigned int rdepth) __attribute__((noinline,noclone));
@@ -519,104 +519,104 @@ static int
 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
   unsigned int rdepth)
-{ 
-/* These variables do not need to be preserved over recursion in this function, 
-so they can be ordinary variables in all cases. Mark some of them with 
-"register" because they are used a lot in loops. */ 
- 
-register int  rrc;         /* Returns from recursive calls */ 
-register int  i;           /* Used for loops not involving calls to RMATCH() */ 
+{
+/* These variables do not need to be preserved over recursion in this function,
+so they can be ordinary variables in all cases. Mark some of them with
+"register" because they are used a lot in loops. */
+
+register int  rrc;         /* Returns from recursive calls */
+register int  i;           /* Used for loops not involving calls to RMATCH() */
 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
 register BOOL utf;         /* Local copy of UTF flag for speed */
- 
-BOOL minimize, possessive; /* Quantifier options */ 
+
+BOOL minimize, possessive; /* Quantifier options */
 BOOL caseless;
 int condcode;
- 
-/* When recursion is not being used, all "local" variables that have to be 
+
+/* When recursion is not being used, all "local" variables that have to be
 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
 frame on the stack here; subsequent instantiations are obtained from the heap
 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
 the top-level on the stack rather than malloc-ing them all gives a performance
 boost in many cases where there is not much "recursion". */
- 
-#ifdef NO_RECURSE 
+
+#ifdef NO_RECURSE
 heapframe *frame = (heapframe *)md->match_frames_base;
- 
-/* Copy in the original argument variables */ 
- 
-frame->Xeptr = eptr; 
-frame->Xecode = ecode; 
-frame->Xmstart = mstart; 
-frame->Xoffset_top = offset_top; 
-frame->Xeptrb = eptrb; 
-frame->Xrdepth = rdepth; 
- 
-/* This is where control jumps back to to effect "recursion" */ 
- 
-HEAP_RECURSE: 
- 
-/* Macros make the argument variables come from the current frame */ 
- 
-#define eptr               frame->Xeptr 
-#define ecode              frame->Xecode 
-#define mstart             frame->Xmstart 
-#define offset_top         frame->Xoffset_top 
-#define eptrb              frame->Xeptrb 
-#define rdepth             frame->Xrdepth 
- 
-/* Ditto for the local variables */ 
- 
+
+/* Copy in the original argument variables */
+
+frame->Xeptr = eptr;
+frame->Xecode = ecode;
+frame->Xmstart = mstart;
+frame->Xoffset_top = offset_top;
+frame->Xeptrb = eptrb;
+frame->Xrdepth = rdepth;
+
+/* This is where control jumps back to to effect "recursion" */
+
+HEAP_RECURSE:
+
+/* Macros make the argument variables come from the current frame */
+
+#define eptr               frame->Xeptr
+#define ecode              frame->Xecode
+#define mstart             frame->Xmstart
+#define offset_top         frame->Xoffset_top
+#define eptrb              frame->Xeptrb
+#define rdepth             frame->Xrdepth
+
+/* Ditto for the local variables */
+
 #ifdef SUPPORT_UTF
-#define charptr            frame->Xcharptr 
-#endif 
-#define callpat            frame->Xcallpat 
+#define charptr            frame->Xcharptr
+#endif
+#define callpat            frame->Xcallpat
 #define codelink           frame->Xcodelink
-#define data               frame->Xdata 
-#define next               frame->Xnext 
-#define pp                 frame->Xpp 
-#define prev               frame->Xprev 
-#define saved_eptr         frame->Xsaved_eptr 
- 
-#define new_recursive      frame->Xnew_recursive 
- 
-#define cur_is_word        frame->Xcur_is_word 
-#define condition          frame->Xcondition 
-#define prev_is_word       frame->Xprev_is_word 
- 
-#ifdef SUPPORT_UCP 
-#define prop_type          frame->Xprop_type 
-#define prop_value         frame->Xprop_value 
-#define prop_fail_result   frame->Xprop_fail_result 
-#define oclength           frame->Xoclength 
-#define occhars            frame->Xocchars 
-#endif 
- 
-#define ctype              frame->Xctype 
-#define fc                 frame->Xfc 
-#define fi                 frame->Xfi 
-#define length             frame->Xlength 
-#define max                frame->Xmax 
-#define min                frame->Xmin 
-#define number             frame->Xnumber 
-#define offset             frame->Xoffset 
-#define op                 frame->Xop 
-#define save_capture_last  frame->Xsave_capture_last 
-#define save_offset1       frame->Xsave_offset1 
-#define save_offset2       frame->Xsave_offset2 
-#define save_offset3       frame->Xsave_offset3 
-#define stacksave          frame->Xstacksave 
- 
-#define newptrb            frame->Xnewptrb 
- 
-/* When recursion is being used, local variables are allocated on the stack and 
-get preserved during recursion in the normal way. In this environment, fi and 
-i, and fc and c, can be the same variables. */ 
- 
-#else         /* NO_RECURSE not defined */ 
-#define fi i 
-#define fc c 
- 
+#define data               frame->Xdata
+#define next               frame->Xnext
+#define pp                 frame->Xpp
+#define prev               frame->Xprev
+#define saved_eptr         frame->Xsaved_eptr
+
+#define new_recursive      frame->Xnew_recursive
+
+#define cur_is_word        frame->Xcur_is_word
+#define condition          frame->Xcondition
+#define prev_is_word       frame->Xprev_is_word
+
+#ifdef SUPPORT_UCP
+#define prop_type          frame->Xprop_type
+#define prop_value         frame->Xprop_value
+#define prop_fail_result   frame->Xprop_fail_result
+#define oclength           frame->Xoclength
+#define occhars            frame->Xocchars
+#endif
+
+#define ctype              frame->Xctype
+#define fc                 frame->Xfc
+#define fi                 frame->Xfi
+#define length             frame->Xlength
+#define max                frame->Xmax
+#define min                frame->Xmin
+#define number             frame->Xnumber
+#define offset             frame->Xoffset
+#define op                 frame->Xop
+#define save_capture_last  frame->Xsave_capture_last
+#define save_offset1       frame->Xsave_offset1
+#define save_offset2       frame->Xsave_offset2
+#define save_offset3       frame->Xsave_offset3
+#define stacksave          frame->Xstacksave
+
+#define newptrb            frame->Xnewptrb
+
+/* When recursion is being used, local variables are allocated on the stack and
+get preserved during recursion in the normal way. In this environment, fi and
+i, and fc and c, can be the same variables. */
+
+#else         /* NO_RECURSE not defined */
+#define fi i
+#define fc c
+
 /* Many of the following variables are used only in small blocks of the code.
 My normal style of coding would have declared them within each of those blocks.
 However, in order to accommodate the version of this code that uses an external
@@ -624,7 +624,7 @@ However, in order to accommodate the version of this code that uses an external
 declarations can be cut out in a block. The only declarations within blocks
 below are for variables that do not have to be preserved over a recursive call
 to RMATCH(). */
- 
+
 #ifdef SUPPORT_UTF
 const pcre_uchar *charptr;
 #endif
@@ -638,30 +638,30 @@ PCRE_PUCHAR       saved_eptr;
 recursion_info new_recursive;
 
 BOOL cur_is_word;
-BOOL condition; 
-BOOL prev_is_word; 
- 
-#ifdef SUPPORT_UCP 
-int prop_type; 
+BOOL condition;
+BOOL prev_is_word;
+
+#ifdef SUPPORT_UCP
+int prop_type;
 unsigned int prop_value;
-int prop_fail_result; 
-int oclength; 
+int prop_fail_result;
+int oclength;
 pcre_uchar occhars[6];
-#endif 
- 
+#endif
+
 int codelink;
-int ctype; 
-int length; 
-int max; 
-int min; 
+int ctype;
+int length;
+int max;
+int min;
 unsigned int number;
-int offset; 
+int offset;
 unsigned int op;
 pcre_int32 save_capture_last;
-int save_offset1, save_offset2, save_offset3; 
-int stacksave[REC_STACK_SAVE_MAX]; 
- 
-eptrblock newptrb; 
+int save_offset1, save_offset2, save_offset3;
+int stacksave[REC_STACK_SAVE_MAX];
+
+eptrblock newptrb;
 
 /* There is a special fudge for calling match() in a way that causes it to
 measure the size of its basic stack frame when the stack is being used for
@@ -679,8 +679,8 @@ if (ecode == NULL)
     return (len > 0)? -len : len;
     }
   }
-#endif     /* NO_RECURSE */ 
- 
+#endif     /* NO_RECURSE */
+
 /* To save space on the stack and in the heap frame, I have doubled up on some
 of the local variables that are used only in localised parts of the code, but
 still need to be preserved over recursive calls of match(). These macros define
@@ -694,47 +694,47 @@ the alternative names that are used. */
 #define foc           number
 #define save_mark     data
 
-/* These statements are here to stop the compiler complaining about unitialized 
-variables. */ 
- 
-#ifdef SUPPORT_UCP 
-prop_value = 0; 
-prop_fail_result = 0; 
-#endif 
- 
- 
-/* This label is used for tail recursion, which is used in a few cases even 
-when NO_RECURSE is not defined, in order to reduce the amount of stack that is 
-used. Thanks to Ian Taylor for noticing this possibility and sending the 
-original patch. */ 
- 
-TAIL_RECURSE: 
- 
-/* OK, now we can get on with the real code of the function. Recursive calls 
-are specified by the macro RMATCH and RRETURN is used to return. When 
-NO_RECURSE is *not* defined, these just turn into a recursive call to match() 
+/* These statements are here to stop the compiler complaining about unitialized
+variables. */
+
+#ifdef SUPPORT_UCP
+prop_value = 0;
+prop_fail_result = 0;
+#endif
+
+
+/* This label is used for tail recursion, which is used in a few cases even
+when NO_RECURSE is not defined, in order to reduce the amount of stack that is
+used. Thanks to Ian Taylor for noticing this possibility and sending the
+original patch. */
+
+TAIL_RECURSE:
+
+/* OK, now we can get on with the real code of the function. Recursive calls
+are specified by the macro RMATCH and RRETURN is used to return. When
+NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
-defined). However, RMATCH isn't like a function call because it's quite a 
-complicated macro. It has to be used in one particular way. This shouldn't, 
-however, impact performance when true recursion is being used. */ 
- 
+defined). However, RMATCH isn't like a function call because it's quite a
+complicated macro. It has to be used in one particular way. This shouldn't,
+however, impact performance when true recursion is being used. */
+
 #ifdef SUPPORT_UTF
 utf = md->utf;       /* Local copy of the flag */
-#else 
+#else
 utf = FALSE;
-#endif 
- 
-/* First check that we haven't called match() too many times, or that we 
-haven't exceeded the recursive call limit. */ 
- 
-if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT); 
-if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT); 
- 
-/* At the start of a group with an unlimited repeat that may match an empty 
+#endif
+
+/* First check that we haven't called match() too many times, or that we
+haven't exceeded the recursive call limit. */
+
+if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
+if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
+
+/* At the start of a group with an unlimited repeat that may match an empty
 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
 done this way to save having to use another function argument, which would take
 up space on the stack. See also MATCH_CONDASSERT below.
- 
+
 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
 such remembered pointers, to be checked when we hit the closing ket, in order
 to break infinite loops that match no characters. When match() is called in
@@ -743,20 +743,20 @@ NOT be used with tail recursion, because the memory block that is used is on
 the stack, so a new one may be required for each match(). */
 
 if (md->match_function_type == MATCH_CBEGROUP)
-  { 
-  newptrb.epb_saved_eptr = eptr; 
-  newptrb.epb_prev = eptrb; 
-  eptrb = &newptrb; 
+  {
+  newptrb.epb_saved_eptr = eptr;
+  newptrb.epb_prev = eptrb;
+  eptrb = &newptrb;
   md->match_function_type = 0;
-  } 
- 
-/* Now start processing the opcodes. */ 
- 
-for (;;) 
-  { 
-  minimize = possessive = FALSE; 
-  op = *ecode; 
- 
+  }
+
+/* Now start processing the opcodes. */
+
+for (;;)
+  {
+  minimize = possessive = FALSE;
+  op = *ecode;
+
   switch(op)
     {
     case OP_MARK:
@@ -766,14 +766,14 @@ for (;;)
       eptrb, RM55);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
- 
+
     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
     argument, and we must check whether that argument matches this MARK's
     argument. It is passed back in md->start_match_ptr (an overloading of that
     variable). If it does match, we reset that variable to the current subject
     position and return MATCH_SKIP. Otherwise, pass back the return code
     unaltered. */
- 
+
     else if (rrc == MATCH_SKIP_ARG &&
         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
       {
@@ -782,21 +782,21 @@ for (;;)
       }
     RRETURN(rrc);
 
-    case OP_FAIL: 
-    RRETURN(MATCH_NOMATCH); 
- 
+    case OP_FAIL:
+    RRETURN(MATCH_NOMATCH);
+
     case OP_COMMIT:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM52);
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_COMMIT);
 
-    case OP_PRUNE: 
+    case OP_PRUNE:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM51);
-    if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-    RRETURN(MATCH_PRUNE); 
- 
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+    RRETURN(MATCH_PRUNE);
+
     case OP_PRUNE_ARG:
     md->nomatch_mark = ecode + 2;
     md->mark = NULL;    /* In case previously set by assertion */
@@ -804,16 +804,16 @@ for (;;)
       eptrb, RM56);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
-    if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_PRUNE);
- 
-    case OP_SKIP: 
+
+    case OP_SKIP:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM53);
-    if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-    md->start_match_ptr = eptr;   /* Pass back current position */ 
-    RRETURN(MATCH_SKIP); 
- 
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+    md->start_match_ptr = eptr;   /* Pass back current position */
+    RRETURN(MATCH_SKIP);
+
     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
@@ -845,13 +845,13 @@ for (;;)
     the branch in which it occurs can be determined. Overload the start of
     match pointer to do this. */
 
-    case OP_THEN: 
+    case OP_THEN:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM54);
-    if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     md->start_match_ptr = ecode;
-    RRETURN(MATCH_THEN); 
- 
+    RRETURN(MATCH_THEN);
+
     case OP_THEN_ARG:
     md->nomatch_mark = ecode + 2;
     md->mark = NULL;    /* In case previously set by assertion */
@@ -862,7 +862,7 @@ for (;;)
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     md->start_match_ptr = ecode;
     RRETURN(MATCH_THEN);
- 
+
     /* Handle an atomic group that does not contain any capturing parentheses.
     This can be handled like an assertion. Prior to 8.13, all atomic groups
     were handled this way. In 8.13, the code was changed as below for ONCE, so
@@ -870,7 +870,7 @@ for (;;)
     However, this uses a lot more stack, so in 8.20, atomic groups that do not
     contain any captures generate OP_ONCE_NC, which can be handled in the old,
     less stack intensive way.
- 
+
     Check the alternative branches in turn - the matching won't pass the KET
     for this kind of subpattern. If any one branch matches, we carry on as at
     the end of a normal bracket, leaving the subject pointer, but resetting
@@ -955,36 +955,36 @@ for (;;)
     the working value and also the values of the final offsets, in case they
     were set by a previous iteration of the same bracket.
 
-    If there isn't enough space in the offset vector, treat this as if it were 
-    a non-capturing bracket. Don't worry about setting the flag for the error 
-    case here; that is handled in the code for KET. */ 
- 
-    case OP_CBRA: 
-    case OP_SCBRA: 
-    number = GET2(ecode, 1+LINK_SIZE); 
-    offset = number << 1; 
- 
+    If there isn't enough space in the offset vector, treat this as if it were
+    a non-capturing bracket. Don't worry about setting the flag for the error
+    case here; that is handled in the code for KET. */
+
+    case OP_CBRA:
+    case OP_SCBRA:
+    number = GET2(ecode, 1+LINK_SIZE);
+    offset = number << 1;
+
 #ifdef PCRE_DEBUG
-    printf("start bracket %d\n", number); 
-    printf("subject="); 
-    pchars(eptr, 16, TRUE, md); 
-    printf("\n"); 
-#endif 
- 
-    if (offset < md->offset_max) 
-      { 
-      save_offset1 = md->offset_vector[offset]; 
-      save_offset2 = md->offset_vector[offset+1]; 
-      save_offset3 = md->offset_vector[md->offset_end - number]; 
-      save_capture_last = md->capture_last; 
+    printf("start bracket %d\n", number);
+    printf("subject=");
+    pchars(eptr, 16, TRUE, md);
+    printf("\n");
+#endif
+
+    if (offset < md->offset_max)
+      {
+      save_offset1 = md->offset_vector[offset];
+      save_offset2 = md->offset_vector[offset+1];
+      save_offset3 = md->offset_vector[md->offset_end - number];
+      save_capture_last = md->capture_last;
       save_mark = md->mark;
- 
-      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); 
+
+      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
       md->offset_vector[md->offset_end - number] =
         (int)(eptr - md->start_subject);
- 
+
       for (;;)
-        { 
+        {
         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
           eptrb, RM1);
@@ -1012,36 +1012,36 @@ for (;;)
         /* Anything other than NOMATCH is passed back. */
 
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        md->capture_last = save_capture_last; 
-        ecode += GET(ecode, 1); 
+        md->capture_last = save_capture_last;
+        ecode += GET(ecode, 1);
         md->mark = save_mark;
         if (*ecode != OP_ALT) break;
-        } 
- 
-      DPRINTF(("bracket %d failed\n", number)); 
-      md->offset_vector[offset] = save_offset1; 
-      md->offset_vector[offset+1] = save_offset2; 
-      md->offset_vector[md->offset_end - number] = save_offset3; 
- 
+        }
+
+      DPRINTF(("bracket %d failed\n", number));
+      md->offset_vector[offset] = save_offset1;
+      md->offset_vector[offset+1] = save_offset2;
+      md->offset_vector[md->offset_end - number] = save_offset3;
+
       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
 
       RRETURN(rrc);
-      } 
- 
-    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat 
-    as a non-capturing bracket. */ 
- 
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */ 
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */ 
- 
-    DPRINTF(("insufficient capture room: treat as non-capturing\n")); 
- 
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */ 
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */ 
- 
+      }
+
+    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
+    as a non-capturing bracket. */
+
+    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+
+    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
+
+    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+
     /* Non-capturing or atomic group, except for possessive with unlimited
     repeat and ONCE group with no captures. Loop for all the alternatives.
- 
+
     When we get to the final alternative within the brackets, we used to return
     the result of a recursive call to match() whatever happened so it was
     possible to reduce stack usage by turning this into a tail recursion,
@@ -1060,12 +1060,12 @@ for (;;)
     previous backup points can be taken. */
 
     case OP_ONCE:
-    case OP_BRA: 
-    case OP_SBRA: 
-    DPRINTF(("start non-capturing bracket\n")); 
+    case OP_BRA:
+    case OP_SBRA:
+    DPRINTF(("start non-capturing bracket\n"));
 
-    for (;;) 
-      { 
+    for (;;)
+      {
       if (op >= OP_SBRA || op == OP_ONCE)
         md->match_function_type = MATCH_CBEGROUP;
 
@@ -1074,7 +1074,7 @@ for (;;)
       above. */
 
       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
-        { 
+        {
         ecode += PRIV(OP_lengths)[*ecode];
         goto TAIL_RECURSE;
         }
@@ -1100,7 +1100,7 @@ for (;;)
       if (rrc != MATCH_NOMATCH)
         {
         if (rrc == MATCH_ONCE)
-          { 
+          {
           const pcre_uchar *scode = ecode;
           if (*scode != OP_ONCE)           /* If not at start, find it */
             {
@@ -1108,7 +1108,7 @@ for (;;)
             scode -= GET(scode, 1);
             }
           if (md->once_target == scode) rrc = MATCH_NOMATCH;
-          } 
+          }
         RRETURN(rrc);
         }
       ecode += GET(ecode, 1);
@@ -1116,9 +1116,9 @@ for (;;)
       if (*ecode != OP_ALT) break;
       md->capture_last = save_capture_last;
       }
- 
+
     RRETURN(MATCH_NOMATCH);
- 
+
     /* Handle possessive capturing brackets with an unlimited repeat. We come
     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
     handled similarly to the normal case above. However, the matching is
@@ -1184,11 +1184,11 @@ for (;;)
           }
         eptr = md->end_match_ptr;
         continue;
-        } 
- 
+        }
+
       /* See comment in the code for capturing groups above about handling
       THEN. */
- 
+
       if (rrc == MATCH_THEN)
         {
         next = ecode + GET(ecode,1);
@@ -1199,9 +1199,9 @@ for (;;)
 
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       md->capture_last = save_capture_last;
-      ecode += GET(ecode, 1); 
+      ecode += GET(ecode, 1);
       if (*ecode != OP_ALT) break;
-      } 
+      }
 
     if (!matched_once)
       {
@@ -1276,15 +1276,15 @@ for (;;)
       }
     RRETURN(MATCH_NOMATCH);
 
-    /* Control never reaches here. */ 
- 
+    /* Control never reaches here. */
+
     /* Conditional group: compilation checked that there are no more than two
     branches. If the condition is false, skipping the first branch takes us
     past the end of the item if there is only one branch, but that's exactly
     what we want. */
- 
-    case OP_COND: 
-    case OP_SCOND: 
+
+    case OP_COND:
+    case OP_SCOND:
 
     /* The variable codelink will be added to ecode when the condition is
     false, to get to the second branch. Setting it to the offset to the ALT
@@ -1298,7 +1298,7 @@ for (;;)
     inserted between OP_COND and an assertion condition. */
 
     if (*ecode == OP_CALLOUT)
-      { 
+      {
       if (PUBL(callout) != NULL)
         {
         PUBL(callout_block) cb;
@@ -1332,13 +1332,13 @@ for (;;)
 
       ecode += PRIV(OP_lengths)[OP_CALLOUT];
       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
-      } 
- 
+      }
+
     /* Test the various possible conditions */
 
     condition = FALSE;
     switch(condcode = *ecode)
-      { 
+      {
       case OP_RREF:         /* Numbered group recursion test */
       if (md->recursive != NULL)     /* Not recursing => FALSE */
         {
@@ -1364,9 +1364,9 @@ for (;;)
 
       case OP_CREF:         /* Numbered group used test */
       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
-      condition = offset < offset_top && md->offset_vector[offset] >= 0; 
+      condition = offset < offset_top && md->offset_vector[offset] >= 0;
       break;
- 
+
       case OP_DNCREF:      /* Duplicate named group used test */
         {
         int count = GET2(ecode, 1 + IMM2_SIZE);
@@ -1380,11 +1380,11 @@ for (;;)
           }
         }
       break;
- 
+
       case OP_DEF:     /* DEFINE - always false */
       case OP_FAIL:    /* From optimized (?!) condition */
       break;
- 
+
       /* The condition is an assertion. Call match() to evaluate it - setting
       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
       of an assertion. */
@@ -1392,11 +1392,11 @@ for (;;)
       default:
       md->match_function_type = MATCH_CONDASSERT;
       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
-      if (rrc == MATCH_MATCH) 
-        { 
+      if (rrc == MATCH_MATCH)
+        {
         if (md->end_offset_top > offset_top)
           offset_top = md->end_offset_top;  /* Captures may have happened */
-        condition = TRUE; 
+        condition = TRUE;
 
         /* Advance ecode past the assertion to the start of the first branch,
         but adjust it so that the general choosing code below works. If the
@@ -1405,23 +1405,23 @@ for (;;)
 
         if (*ecode == OP_BRAZERO) ecode++;
         ecode += GET(ecode, 1);
-        while (*ecode == OP_ALT) ecode += GET(ecode, 1); 
+        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
-        } 
+        }
 
       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
       assertion; it is therefore treated as NOMATCH. Any other return is an
       error. */
 
-      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) 
-        { 
-        RRETURN(rrc);         /* Need braces because of following else */ 
-        } 
+      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
+        {
+        RRETURN(rrc);         /* Need braces because of following else */
+        }
       break;
-      } 
- 
+      }
+
     /* Choose branch according to the condition */
- 
+
     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
 
     /* We are now at the branch that is to be obeyed. As there is only one, we
@@ -1435,28 +1435,28 @@ for (;;)
     of alternatives, of course). */
 
     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
-      { 
+      {
       if (op != OP_SCOND)
-        { 
-        goto TAIL_RECURSE; 
-        } 
+        {
+        goto TAIL_RECURSE;
+        }
 
       md->match_function_type = MATCH_CBEGROUP;
       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
       RRETURN(rrc);
-      } 
+      }
 
      /* Condition false & no alternative; continue after the group. */
 
     else
-      { 
-      } 
-    break; 
- 
- 
+      {
+      }
+    break;
+
+
     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
     to close any currently open capturing brackets. */
- 
+
     case OP_CLOSE:
     number = GET2(ecode, 1);   /* Must be less than 65536 */
     offset = number << 1;
@@ -1468,7 +1468,7 @@ for (;;)
 
     md->capture_last = (md->capture_last & OVFLMASK) | number;
     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
-      { 
+      {
       md->offset_vector[offset] =
         md->offset_vector[md->offset_end - number];
       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
@@ -1484,11 +1484,11 @@ for (;;)
         while (iptr < iend) *iptr++ = -1;
         offset_top = offset + 2;
         }
-      } 
+      }
     ecode += 1 + IMM2_SIZE;
     break;
- 
- 
+
+
     /* End of the pattern, either real or forced. */
 
     case OP_END:
@@ -1509,29 +1509,29 @@ for (;;)
 
     /* Otherwise, we have a match. */
 
-    md->end_match_ptr = eptr;           /* Record where we ended */ 
-    md->end_offset_top = offset_top;    /* and how many extracts were taken */ 
-    md->start_match_ptr = mstart;       /* and the start (\K can modify) */ 
- 
+    md->end_match_ptr = eptr;           /* Record where we ended */
+    md->end_offset_top = offset_top;    /* and how many extracts were taken */
+    md->start_match_ptr = mstart;       /* and the start (\K can modify) */
+
     /* For some reason, the macros don't work properly if an expression is
     given as the argument to RRETURN when the heap is in use. */
- 
+
     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
     RRETURN(rrc);
- 
-    /* Assertion brackets. Check the alternative branches in turn - the 
-    matching won't pass the KET for an assertion. If any one branch matches, 
-    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the 
-    start of each branch to move the current point backwards, so the code at 
+
+    /* Assertion brackets. Check the alternative branches in turn - the
+    matching won't pass the KET for an assertion. If any one branch matches,
+    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
+    start of each branch to move the current point backwards, so the code at
     this level is identical to the lookahead case. When the assertion is part
     of a condition, we want to return immediately afterwards. The caller of
     this incarnation of the match() function will have set MATCH_CONDASSERT in
     md->match_function type, and one of these opcodes will be the first opcode
     that is processed. We use a local variable that is preserved over calls to
     match() to remember this case. */
- 
-    case OP_ASSERT: 
-    case OP_ASSERTBACK: 
+
+    case OP_ASSERT:
+    case OP_ASSERTBACK:
     save_mark = md->mark;
     if (md->match_function_type == MATCH_CONDASSERT)
       {
@@ -1542,8 +1542,8 @@ for (;;)
 
     /* Loop for each branch */
 
-    do 
-      { 
+    do
+      {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
 
       /* A match means that the assertion is true; break out of the loop
@@ -1577,32 +1577,32 @@ for (;;)
       Perl. */
 
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-      ecode += GET(ecode, 1); 
-      } 
+      ecode += GET(ecode, 1);
+      }
     while (*ecode == OP_ALT);   /* Continue for next alternative */
 
     /* If we have tried all the alternative branches, the assertion has
     failed. If not, we broke out after a match. */
 
-    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); 
- 
-    /* If checking an assertion for a condition, return MATCH_MATCH. */ 
- 
+    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
+
+    /* If checking an assertion for a condition, return MATCH_MATCH. */
+
     if (condassert) RRETURN(MATCH_MATCH);
- 
+
     /* Continue from after a successful assertion, updating the offsets high
     water mark, since extracts may have been taken during the assertion. */
- 
-    do ecode += GET(ecode,1); while (*ecode == OP_ALT); 
-    ecode += 1 + LINK_SIZE; 
-    offset_top = md->end_offset_top; 
-    continue; 
- 
+
+    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+    ecode += 1 + LINK_SIZE;
+    offset_top = md->end_offset_top;
+    continue;
+
     /* Negative assertion: all branches must fail to match for the assertion to
     succeed. */
- 
-    case OP_ASSERT_NOT: 
-    case OP_ASSERTBACK_NOT: 
+
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK_NOT:
     save_mark = md->mark;
     if (md->match_function_type == MATCH_CONDASSERT)
       {
@@ -1613,8 +1613,8 @@ for (;;)
 
     /* Loop for each alternative branch. */
 
-    do 
-      { 
+    do
+      {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
       md->mark = save_mark;   /* Always restore the mark setting */
 
@@ -1660,63 +1660,63 @@ for (;;)
 
       /* Continue with next branch */
 
-      ecode += GET(ecode,1); 
-      } 
-    while (*ecode == OP_ALT); 
- 
+      ecode += GET(ecode,1);
+      }
+    while (*ecode == OP_ALT);
+
     /* All branches in the assertion failed to match. */
- 
+
     NEG_ASSERT_TRUE:
     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
-    continue; 
- 
-    /* Move the subject pointer back. This occurs only at the start of 
-    each branch of a lookbehind assertion. If we are too close to the start to 
-    move back, this match function fails. When working with UTF-8 we move 
-    back a number of characters, not bytes. */ 
- 
-    case OP_REVERSE: 
+    continue;
+
+    /* Move the subject pointer back. This occurs only at the start of
+    each branch of a lookbehind assertion. If we are too close to the start to
+    move back, this match function fails. When working with UTF-8 we move
+    back a number of characters, not bytes. */
+
+    case OP_REVERSE:
 #ifdef SUPPORT_UTF
     if (utf)
-      { 
-      i = GET(ecode, 1); 
-      while (i-- > 0) 
-        { 
-        eptr--; 
-        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); 
-        BACKCHAR(eptr); 
-        } 
-      } 
-    else 
-#endif 
- 
-    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ 
- 
-      { 
-      eptr -= GET(ecode, 1); 
-      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); 
-      } 
- 
+      {
+      i = GET(ecode, 1);
+      while (i-- > 0)
+        {
+        eptr--;
+        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
+        BACKCHAR(eptr);
+        }
+      }
+    else
+#endif
+
+    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
+
+      {
+      eptr -= GET(ecode, 1);
+      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
+      }
+
     /* Save the earliest consulted character, then skip to next op code */
- 
+
     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
-    ecode += 1 + LINK_SIZE; 
-    break; 
- 
-    /* The callout item calls an external function, if one is provided, passing 
-    details of the match so far. This is mainly for debugging, though the 
-    function is able to force a failure. */ 
- 
-    case OP_CALLOUT: 
+    ecode += 1 + LINK_SIZE;
+    break;
+
+    /* The callout item calls an external function, if one is provided, passing
+    details of the match so far. This is mainly for debugging, though the
+    function is able to force a failure. */
+
+    case OP_CALLOUT:
     if (PUBL(callout) != NULL)
-      { 
+      {
       PUBL(callout_block) cb;
       cb.version          = 2;   /* Version 1 of the callout block */
-      cb.callout_number   = ecode[1]; 
-      cb.offset_vector    = md->offset_vector; 
+      cb.callout_number   = ecode[1];
+      cb.offset_vector    = md->offset_vector;
 #if defined COMPILE_PCRE8
-      cb.subject          = (PCRE_SPTR)md->start_subject; 
+      cb.subject          = (PCRE_SPTR)md->start_subject;
 #elif defined COMPILE_PCRE16
       cb.subject          = (PCRE_SPTR16)md->start_subject;
 #elif defined COMPILE_PCRE32
@@ -1725,24 +1725,24 @@ for (;;)
       cb.subject_length   = (int)(md->end_subject - md->start_subject);
       cb.start_match      = (int)(mstart - md->start_subject);
       cb.current_position = (int)(eptr - md->start_subject);
-      cb.pattern_position = GET(ecode, 2); 
-      cb.next_item_length = GET(ecode, 2 + LINK_SIZE); 
-      cb.capture_top      = offset_top/2; 
+      cb.pattern_position = GET(ecode, 2);
+      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
+      cb.capture_top      = offset_top/2;
       cb.capture_last     = md->capture_last & CAPLMASK;
       /* Internal change requires this for API compatibility. */
       if (cb.capture_last == 0) cb.capture_last = -1;
-      cb.callout_data     = md->callout_data; 
+      cb.callout_data     = md->callout_data;
       cb.mark             = md->nomatch_mark;
       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
-      if (rrc < 0) RRETURN(rrc); 
-      } 
-    ecode += 2 + 2*LINK_SIZE; 
-    break; 
- 
-    /* Recursion either matches the current regex, or some subexpression. The 
-    offset data is the offset to the starting bracket from the start of the 
-    whole pattern. (This is so that it works from duplicated subpatterns.) 
- 
+      if (rrc < 0) RRETURN(rrc);
+      }
+    ecode += 2 + 2*LINK_SIZE;
+    break;
+
+    /* Recursion either matches the current regex, or some subexpression. The
+    offset data is the offset to the starting bracket from the start of the
+    whole pattern. (This is so that it works from duplicated subpatterns.)
+
     The state of the capturing groups is preserved over recursion, and
     re-instated afterwards. We don't know how many are started and not yet
     finished (offset_top records the completed total) so we just have to save
@@ -1750,21 +1750,21 @@ for (;;)
     large to put on the stack, but using malloc for small numbers seems
     expensive. As a compromise, the stack is used when there are no more than
     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
- 
-    There are also other values that have to be saved. We use a chained 
-    sequence of blocks that actually live on the stack. Thanks to Robin Houston 
+
+    There are also other values that have to be saved. We use a chained
+    sequence of blocks that actually live on the stack. Thanks to Robin Houston
     for the original version of this logic. It has, however, been hacked around
     a lot, so he is not to blame for the current way it works. */
- 
-    case OP_RECURSE: 
-      { 
+
+    case OP_RECURSE:
+      {
       recursion_info *ri;
       unsigned int recno;
 
-      callpat = md->start_code + GET(ecode, 1); 
+      callpat = md->start_code + GET(ecode, 1);
       recno = (callpat == md->start_code)? 0 :
-        GET2(callpat, 1 + LINK_SIZE); 
- 
+        GET2(callpat, 1 + LINK_SIZE);
+
       /* Check for repeating a recursion without advancing the subject pointer.
       This should catch convoluted mutual recursions. (Some simple cases are
       caught at compile time.) */
@@ -1773,41 +1773,41 @@ for (;;)
         if (recno == ri->group_num && eptr == ri->subject_position)
           RRETURN(PCRE_ERROR_RECURSELOOP);
 
-      /* Add to "recursing stack" */ 
- 
+      /* Add to "recursing stack" */
+
       new_recursive.group_num = recno;
       new_recursive.saved_capture_last = md->capture_last;
       new_recursive.subject_position = eptr;
-      new_recursive.prevrec = md->recursive; 
-      md->recursive = &new_recursive; 
- 
+      new_recursive.prevrec = md->recursive;
+      md->recursive = &new_recursive;
+
       /* Where to continue from afterwards */
- 
-      ecode += 1 + LINK_SIZE; 
- 
+
+      ecode += 1 + LINK_SIZE;
+
       /* Now save the offset data */
- 
-      new_recursive.saved_max = md->offset_end; 
-      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) 
-        new_recursive.offset_save = stacksave; 
-      else 
-        { 
-        new_recursive.offset_save = 
+
+      new_recursive.saved_max = md->offset_end;
+      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
+        new_recursive.offset_save = stacksave;
+      else
+        {
+        new_recursive.offset_save =
           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
-        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); 
-        } 
-      memcpy(new_recursive.offset_save, md->offset_vector, 
-            new_recursive.saved_max * sizeof(int)); 
- 
+        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
+        }
+      memcpy(new_recursive.offset_save, md->offset_vector,
+            new_recursive.saved_max * sizeof(int));
+
       /* OK, now we can do the recursion. After processing each alternative,
       restore the offset data and the last captured value. If there were nested
       recursions, md->recursive might be changed, so reset it before looping.
       */
- 
-      DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); 
+
+      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
       cbegroup = (*callpat >= OP_SBRA);
-      do 
-        { 
+      do
+        {
         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
           md, eptrb, RM6);
@@ -1816,9 +1816,9 @@ for (;;)
         md->capture_last = new_recursive.saved_capture_last;
         md->recursive = new_recursive.prevrec;
         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
-          { 
-          DPRINTF(("Recursion matched\n")); 
-          if (new_recursive.offset_save != stacksave) 
+          {
+          DPRINTF(("Recursion matched\n"));
+          if (new_recursive.offset_save != stacksave)
             (PUBL(free))(new_recursive.offset_save);
 
           /* Set where we got to in the subject, and reset the start in case
@@ -1828,14 +1828,14 @@ for (;;)
           eptr = md->end_match_ptr;
           mstart = md->start_match_ptr;
           goto RECURSION_MATCHED;        /* Exit loop; end processing */
-          } 
+          }
 
         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
         recursion; they cause a NOMATCH for the entire recursion. These codes
         are defined in a range that can be tested for. */
 
         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
-          { 
+          {
           if (new_recursive.offset_save != stacksave)
             (PUBL(free))(new_recursive.offset_save);
           RRETURN(MATCH_NOMATCH);
@@ -1845,56 +1845,56 @@ for (;;)
 
         if (rrc != MATCH_NOMATCH)
           {
-          DPRINTF(("Recursion gave error %d\n", rrc)); 
+          DPRINTF(("Recursion gave error %d\n", rrc));
           if (new_recursive.offset_save != stacksave)
             (PUBL(free))(new_recursive.offset_save);
-          RRETURN(rrc); 
-          } 
- 
-        md->recursive = &new_recursive; 
-        callpat += GET(callpat, 1); 
-        } 
-      while (*callpat == OP_ALT); 
- 
-      DPRINTF(("Recursion didn't match\n")); 
-      md->recursive = new_recursive.prevrec; 
-      if (new_recursive.offset_save != stacksave) 
+          RRETURN(rrc);
+          }
+
+        md->recursive = &new_recursive;
+        callpat += GET(callpat, 1);
+        }
+      while (*callpat == OP_ALT);
+
+      DPRINTF(("Recursion didn't match\n"));
+      md->recursive = new_recursive.prevrec;
+      if (new_recursive.offset_save != stacksave)
         (PUBL(free))(new_recursive.offset_save);
-      RRETURN(MATCH_NOMATCH); 
-      } 
- 
+      RRETURN(MATCH_NOMATCH);
+      }
+
     RECURSION_MATCHED:
     break;
- 
-    /* An alternation is the end of a branch; scan along to find the end of the 
-    bracketed group and go to there. */ 
- 
-    case OP_ALT: 
-    do ecode += GET(ecode,1); while (*ecode == OP_ALT); 
-    break; 
- 
+
+    /* An alternation is the end of a branch; scan along to find the end of the
+    bracketed group and go to there. */
+
+    case OP_ALT:
+    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+    break;
+
     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
     indicating that it may occur zero times. It may repeat infinitely, or not
     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
     with fixed upper repeat limits are compiled as a number of copies, with the
     optional ones preceded by BRAZERO or BRAMINZERO. */
- 
-    case OP_BRAZERO: 
+
+    case OP_BRAZERO:
     next = ecode + 1;
     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     do next += GET(next, 1); while (*next == OP_ALT);
     ecode = next + 1 + LINK_SIZE;
-    break; 
- 
-    case OP_BRAMINZERO: 
+    break;
+
+    case OP_BRAMINZERO:
     next = ecode + 1;
     do next += GET(next, 1); while (*next == OP_ALT);
     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     ecode++;
-    break; 
- 
+    break;
+
     case OP_SKIPZERO:
     next = ecode+1;
     do next += GET(next,1); while (*next == OP_ALT);
@@ -1910,72 +1910,72 @@ for (;;)
     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
       goto POSSESSIVE_NON_CAPTURE;
 
-    /* End of a group, repeated or non-repeating. */ 
- 
-    case OP_KET: 
-    case OP_KETRMIN: 
-    case OP_KETRMAX: 
+    /* End of a group, repeated or non-repeating. */
+
+    case OP_KET:
+    case OP_KETRMIN:
+    case OP_KETRMAX:
     case OP_KETRPOS:
-    prev = ecode - GET(ecode, 1); 
- 
-    /* If this was a group that remembered the subject start, in order to break 
-    infinite repeats of empty string matches, retrieve the subject start from 
-    the chain. Otherwise, set it NULL. */ 
- 
+    prev = ecode - GET(ecode, 1);
+
+    /* If this was a group that remembered the subject start, in order to break
+    infinite repeats of empty string matches, retrieve the subject start from
+    the chain. Otherwise, set it NULL. */
+
     if (*prev >= OP_SBRA || *prev == OP_ONCE)
-      { 
-      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */ 
-      eptrb = eptrb->epb_prev;              /* Backup to previous group */ 
-      } 
-    else saved_eptr = NULL; 
- 
+      {
+      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
+      eptrb = eptrb->epb_prev;              /* Backup to previous group */
+      }
+    else saved_eptr = NULL;
+
     /* If we are at the end of an assertion group or a non-capturing atomic
     group, stop matching and return MATCH_MATCH, but record the current high
     water mark for use by positive assertions. We also need to record the match
     start in case it was changed by \K. */
- 
+
     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
          *prev == OP_ONCE_NC)
-      { 
+      {
       md->end_match_ptr = eptr;      /* For ONCE_NC */
-      md->end_offset_top = offset_top; 
+      md->end_offset_top = offset_top;
       md->start_match_ptr = mstart;
       RRETURN(MATCH_MATCH);         /* Sets md->mark */
-      } 
- 
-    /* For capturing groups we have to check the group number back at the start 
-    and if necessary complete handling an extraction by setting the offsets and 
+      }
+
+    /* For capturing groups we have to check the group number back at the start
+    and if necessary complete handling an extraction by setting the offsets and
     bumping the high water mark. Whole-pattern recursion is coded as a recurse
     into group 0, so it won't be picked up here. Instead, we catch it when the
     OP_END is reached. Other recursion is handled here. We just have to record
     the current subject position and start match pointer and give a MATCH
     return. */
- 
+
     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
-      { 
-      number = GET2(prev, 1+LINK_SIZE); 
-      offset = number << 1; 
- 
+      {
+      number = GET2(prev, 1+LINK_SIZE);
+      offset = number << 1;
+
 #ifdef PCRE_DEBUG
-      printf("end bracket %d", number); 
-      printf("\n"); 
-#endif 
- 
+      printf("end bracket %d", number);
+      printf("\n");
+#endif
+
       /* Handle a recursively called group. */
 
       if (md->recursive != NULL && md->recursive->group_num == number)
-        { 
+        {
         md->end_match_ptr = eptr;
         md->start_match_ptr = mstart;
         RRETURN(MATCH_MATCH);
-        } 
- 
+        }
+
       /* Deal with capturing */
- 
+
       md->capture_last = (md->capture_last & OVFLMASK) | number;
       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
-        { 
+        {
         /* If offset is greater than offset_top, it means that we are
         "skipping" a capturing group, and that group's offsets must be marked
         unset. In earlier versions of PCRE, all the offsets were unset at the
@@ -1999,14 +1999,14 @@ for (;;)
           md->offset_vector[md->offset_end - number];
         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
         if (offset_top <= offset) offset_top = offset + 2;
-        } 
-      } 
- 
+        }
+      }
+
     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
     at a time from the outer level, thus saving stack. This must precede the
     empty string test - in this case that test is done at the outer level. */
- 
+
     if (*ecode == OP_KETRPOS)
       {
       md->start_match_ptr = mstart;    /* In case \K reset it */
@@ -2014,7 +2014,7 @@ for (;;)
       md->end_offset_top = offset_top;
       RRETURN(MATCH_KETRPOS);
       }
- 
+
     /* For an ordinary non-repeating ket, just continue at this level. This
     also happens for a repeating ket if no characters were matched in the
     group. This is the forcible breaking of infinite loops as implemented in
@@ -2023,9 +2023,9 @@ for (;;)
     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
     original OP_ONCE level, thereby bypassing intermediate backup points, but
     resetting any captures that happened along the way. */
- 
-    if (*ecode == OP_KET || eptr == saved_eptr) 
-      { 
+
+    if (*ecode == OP_KET || eptr == saved_eptr)
+      {
       if (*prev == OP_ONCE)
         {
         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
@@ -2034,21 +2034,21 @@ for (;;)
         RRETURN(MATCH_ONCE);
         }
       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
-      break; 
-      } 
- 
+      break;
+      }
+
     /* The normal repeating kets try the rest of the pattern or restart from
     the preceding bracket, in the appropriate order. In the second case, we can
     use tail recursion to avoid using another stack frame, unless we have an
     an atomic group or an unlimited repeat of a group that can match an empty
     string. */
- 
-    if (*ecode == OP_KETRMIN) 
-      { 
+
+    if (*ecode == OP_KETRMIN)
+      {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
-      if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       if (*prev == OP_ONCE)
-        { 
+        {
         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
@@ -2057,16 +2057,16 @@ for (;;)
       if (*prev >= OP_SBRA)    /* Could match an empty string */
         {
         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
-        RRETURN(rrc); 
-        } 
-      ecode = prev; 
-      goto TAIL_RECURSE; 
-      } 
-    else  /* OP_KETRMAX */ 
-      { 
+        RRETURN(rrc);
+        }
+      ecode = prev;
+      goto TAIL_RECURSE;
+      }
+    else  /* OP_KETRMAX */
+      {
       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
-      if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       if (*prev == OP_ONCE)
         {
         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
@@ -2074,23 +2074,23 @@ for (;;)
         md->once_target = prev;
         RRETURN(MATCH_ONCE);
         }
-      ecode += 1 + LINK_SIZE; 
-      goto TAIL_RECURSE; 
-      } 
-    /* Control never gets here */ 
- 
+      ecode += 1 + LINK_SIZE;
+      goto TAIL_RECURSE;
+      }
+    /* Control never gets here */
+
     /* Not multiline mode: start of subject assertion, unless notbol. */
- 
-    case OP_CIRC: 
-    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); 
- 
-    /* Start of subject assertion */ 
- 
-    case OP_SOD: 
-    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
+
+    case OP_CIRC:
+    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
+
+    /* Start of subject assertion */
+
+    case OP_SOD:
+    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
     /* Multiline mode: start of subject unless notbol, or after any newline. */
 
     case OP_CIRCM:
@@ -2101,26 +2101,26 @@ for (;;)
     ecode++;
     break;
 
-    /* Start of match assertion */ 
- 
-    case OP_SOM: 
-    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    /* Reset the start of match point */ 
- 
-    case OP_SET_SOM: 
-    mstart = eptr; 
-    ecode++; 
-    break; 
- 
+    /* Start of match assertion */
+
+    case OP_SOM:
+    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    /* Reset the start of match point */
+
+    case OP_SET_SOM:
+    mstart = eptr;
+    ecode++;
+    break;
+
     /* Multiline mode: assert before any newline, or before end of subject
     unless noteol is set. */
- 
+
     case OP_DOLLM:
     if (eptr < md->end_subject)
-      { 
+      {
       if (!IS_NEWLINE(eptr))
         {
         if (md->partial != 0 &&
@@ -2134,12 +2134,12 @@ for (;;)
           }
         RRETURN(MATCH_NOMATCH);
         }
-      } 
-    else 
-      { 
-      if (md->noteol) RRETURN(MATCH_NOMATCH); 
+      }
+    else
+      {
+      if (md->noteol) RRETURN(MATCH_NOMATCH);
       SCHECK_PARTIAL();
-      } 
+      }
     ecode++;
     break;
 
@@ -2150,22 +2150,22 @@ for (;;)
     if (md->noteol) RRETURN(MATCH_NOMATCH);
     if (!md->endonly) goto ASSERT_NL_OR_EOS;
 
-    /* ... else fall through for endonly */ 
- 
-    /* End of subject assertion (\z) */ 
- 
-    case OP_EOD: 
-    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); 
+    /* ... else fall through for endonly */
+
+    /* End of subject assertion (\z) */
+
+    case OP_EOD:
+    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
     SCHECK_PARTIAL();
-    ecode++; 
-    break; 
- 
-    /* End of subject or ending \n assertion (\Z) */ 
- 
-    case OP_EODN: 
+    ecode++;
+    break;
+
+    /* End of subject or ending \n assertion (\Z) */
+
+    case OP_EODN:
     ASSERT_NL_OR_EOS:
     if (eptr < md->end_subject &&
-        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) 
+        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
       {
       if (md->partial != 0 &&
           eptr + 1 >= md->end_subject &&
@@ -2176,37 +2176,37 @@ for (;;)
         md->hitend = TRUE;
         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
         }
-      RRETURN(MATCH_NOMATCH); 
+      RRETURN(MATCH_NOMATCH);
       }
 
     /* Either at end of string or \n before end. */
 
     SCHECK_PARTIAL();
-    ecode++; 
-    break; 
- 
-    /* Word boundary assertions */ 
- 
-    case OP_NOT_WORD_BOUNDARY: 
-    case OP_WORD_BOUNDARY: 
-      { 
- 
-      /* Find out if the previous and current characters are "word" characters. 
-      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to 
+    ecode++;
+    break;
+
+    /* Word boundary assertions */
+
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+      {
+
+      /* Find out if the previous and current characters are "word" characters.
+      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
       be "non-word" characters. Remember the earliest consulted character for
       partial matching. */
- 
+
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
+        {
         /* Get status of previous character */
 
-        if (eptr == md->start_subject) prev_is_word = FALSE; else 
-          { 
+        if (eptr == md->start_subject) prev_is_word = FALSE; else
+          {
           PCRE_PUCHAR lastptr = eptr - 1;
           BACKCHAR(lastptr);
           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
-          GETCHAR(c, lastptr); 
+          GETCHAR(c, lastptr);
 #ifdef SUPPORT_UCP
           if (md->use_ucp)
             {
@@ -2218,19 +2218,19 @@ for (;;)
             }
           else
 #endif
-          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; 
-          } 
+          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
+          }
 
         /* Get status of next character */
 
         if (eptr >= md->end_subject)
-          { 
+          {
           SCHECK_PARTIAL();
           cur_is_word = FALSE;
           }
         else
           {
-          GETCHAR(c, eptr); 
+          GETCHAR(c, eptr);
 #ifdef SUPPORT_UCP
           if (md->use_ucp)
             {
@@ -2242,16 +2242,16 @@ for (;;)
             }
           else
 #endif
-          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; 
-          } 
-        } 
-      else 
-#endif 
- 
+          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
+          }
+        }
+      else
+#endif
+
       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
       consistency with the behaviour of \w we do use it in this case. */
- 
-        { 
+
+        {
         /* Get status of previous character */
 
         if (eptr == md->start_subject) prev_is_word = FALSE; else
@@ -2295,30 +2295,30 @@ for (;;)
 #endif
         cur_is_word = MAX_255(*eptr)
           && ((md->ctypes[*eptr] & ctype_word) != 0);
-        } 
- 
-      /* Now see if the situation is what we want */ 
- 
-      if ((*ecode++ == OP_WORD_BOUNDARY)? 
-           cur_is_word == prev_is_word : cur_is_word != prev_is_word) 
-        RRETURN(MATCH_NOMATCH); 
-      } 
-    break; 
- 
+        }
+
+      /* Now see if the situation is what we want */
+
+      if ((*ecode++ == OP_WORD_BOUNDARY)?
+           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
+        RRETURN(MATCH_NOMATCH);
+      }
+    break;
+
     /* Match any single character type except newline; have to take care with
     CRLF newlines and partial matching. */
- 
-    case OP_ANY: 
+
+    case OP_ANY:
     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
     if (md->partial != 0 &&
         eptr == md->end_subject - 1 &&
         NLBLOCK->nltype == NLTYPE_FIXED &&
         NLBLOCK->nllen == 2 &&
         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
-      { 
+      {
       md->hitend = TRUE;
       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
-      } 
+      }
 
     /* Fall through */
 
@@ -2334,134 +2334,134 @@ for (;;)
 #ifdef SUPPORT_UTF
     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
 #endif
-    ecode++; 
-    break; 
- 
-    /* Match a single byte, even in UTF-8 mode. This opcode really does match 
-    any byte, even newline, independent of the setting of PCRE_DOTALL. */ 
- 
-    case OP_ANYBYTE: 
+    ecode++;
+    break;
+
+    /* Match a single byte, even in UTF-8 mode. This opcode really does match
+    any byte, even newline, independent of the setting of PCRE_DOTALL. */
+
+    case OP_ANYBYTE:
     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
       {                            /* not be updated before SCHECK_PARTIAL. */
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
     eptr++;
-    ecode++; 
-    break; 
- 
-    case OP_NOT_DIGIT: 
+    ecode++;
+    break;
+
+    case OP_NOT_DIGIT:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    if ( 
+    GETCHARINCTEST(c, eptr);
+    if (
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
-       c < 256 && 
-#endif 
-       (md->ctypes[c] & ctype_digit) != 0 
-       ) 
-      RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    case OP_DIGIT: 
+       c < 256 &&
+#endif
+       (md->ctypes[c] & ctype_digit) != 0
+       )
+      RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    case OP_DIGIT:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    if ( 
+    GETCHARINCTEST(c, eptr);
+    if (
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        c > 255 ||
-#endif 
-       (md->ctypes[c] & ctype_digit) == 0 
-       ) 
-      RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    case OP_NOT_WHITESPACE: 
+#endif
+       (md->ctypes[c] & ctype_digit) == 0
+       )
+      RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    case OP_NOT_WHITESPACE:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    if ( 
+    GETCHARINCTEST(c, eptr);
+    if (
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
-       c < 256 && 
-#endif 
-       (md->ctypes[c] & ctype_space) != 0 
-       ) 
-      RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    case OP_WHITESPACE: 
+       c < 256 &&
+#endif
+       (md->ctypes[c] & ctype_space) != 0
+       )
+      RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    case OP_WHITESPACE:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    if ( 
+    GETCHARINCTEST(c, eptr);
+    if (
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        c > 255 ||
-#endif 
-       (md->ctypes[c] & ctype_space) == 0 
-       ) 
-      RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    case OP_NOT_WORDCHAR: 
+#endif
+       (md->ctypes[c] & ctype_space) == 0
+       )
+      RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    case OP_NOT_WORDCHAR:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    if ( 
+    GETCHARINCTEST(c, eptr);
+    if (
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
-       c < 256 && 
-#endif 
-       (md->ctypes[c] & ctype_word) != 0 
-       ) 
-      RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    case OP_WORDCHAR: 
+       c < 256 &&
+#endif
+       (md->ctypes[c] & ctype_word) != 0
+       )
+      RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    case OP_WORDCHAR:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    if ( 
+    GETCHARINCTEST(c, eptr);
+    if (
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        c > 255 ||
-#endif 
-       (md->ctypes[c] & ctype_word) == 0 
-       ) 
-      RRETURN(MATCH_NOMATCH); 
-    ecode++; 
-    break; 
- 
-    case OP_ANYNL: 
+#endif
+       (md->ctypes[c] & ctype_word) == 0
+       )
+      RRETURN(MATCH_NOMATCH);
+    ecode++;
+    break;
+
+    case OP_ANYNL:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    switch(c) 
-      { 
-      default: RRETURN(MATCH_NOMATCH); 
+    GETCHARINCTEST(c, eptr);
+    switch(c)
+      {
+      default: RRETURN(MATCH_NOMATCH);
 
       case CHAR_CR:
       if (eptr >= md->end_subject)
@@ -2469,128 +2469,128 @@ for (;;)
         SCHECK_PARTIAL();
         }
       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
-      break; 
- 
+      break;
+
       case CHAR_LF:
-      break; 
- 
+      break;
+
       case CHAR_VT:
       case CHAR_FF:
       case CHAR_NEL:
 #ifndef EBCDIC
-      case 0x2028: 
-      case 0x2029: 
+      case 0x2028:
+      case 0x2029:
 #endif  /* Not EBCDIC */
-      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); 
-      break; 
-      } 
-    ecode++; 
-    break; 
- 
-    case OP_NOT_HSPACE: 
+      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
+      break;
+      }
+    ecode++;
+    break;
+
+    case OP_NOT_HSPACE:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    switch(c) 
-      { 
+    GETCHARINCTEST(c, eptr);
+    switch(c)
+      {
       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
-      default: break; 
-      } 
-    ecode++; 
-    break; 
- 
-    case OP_HSPACE: 
+      default: break;
+      }
+    ecode++;
+    break;
+
+    case OP_HSPACE:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    switch(c) 
-      { 
+    GETCHARINCTEST(c, eptr);
+    switch(c)
+      {
       HSPACE_CASES: break;  /* Byte and multibyte cases */
-      default: RRETURN(MATCH_NOMATCH); 
-      } 
-    ecode++; 
-    break; 
- 
-    case OP_NOT_VSPACE: 
+      default: RRETURN(MATCH_NOMATCH);
+      }
+    ecode++;
+    break;
+
+    case OP_NOT_VSPACE:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    switch(c) 
-      { 
+    GETCHARINCTEST(c, eptr);
+    switch(c)
+      {
       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
-      default: break; 
-      } 
-    ecode++; 
-    break; 
- 
-    case OP_VSPACE: 
+      default: break;
+      }
+    ecode++;
+    break;
+
+    case OP_VSPACE:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-    switch(c) 
-      { 
+    GETCHARINCTEST(c, eptr);
+    switch(c)
+      {
       VSPACE_CASES: break;
-      default: RRETURN(MATCH_NOMATCH); 
-      } 
-    ecode++; 
-    break; 
- 
-#ifdef SUPPORT_UCP 
-    /* Check the next character by Unicode property. We will get here only 
-    if the support is in the binary; otherwise a compile-time error occurs. */ 
- 
-    case OP_PROP: 
-    case OP_NOTPROP: 
+      default: RRETURN(MATCH_NOMATCH);
+      }
+    ecode++;
+    break;
+
+#ifdef SUPPORT_UCP
+    /* Check the next character by Unicode property. We will get here only
+    if the support is in the binary; otherwise a compile-time error occurs. */
+
+    case OP_PROP:
+    case OP_NOTPROP:
     if (eptr >= md->end_subject)
       {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    GETCHARINCTEST(c, eptr); 
-      { 
+    GETCHARINCTEST(c, eptr);
+      {
       const pcre_uint32 *cp;
       const ucd_record *prop = GET_UCD(c);
- 
-      switch(ecode[1]) 
-        { 
-        case PT_ANY: 
-        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH); 
-        break; 
- 
-        case PT_LAMP: 
+
+      switch(ecode[1])
+        {
+        case PT_ANY:
+        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+        break;
+
+        case PT_LAMP:
         if ((prop->chartype == ucp_Lu ||
              prop->chartype == ucp_Ll ||
              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
-          RRETURN(MATCH_NOMATCH); 
+          RRETURN(MATCH_NOMATCH);
         break;
- 
-        case PT_GC: 
+
+        case PT_GC:
         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
-          RRETURN(MATCH_NOMATCH); 
-        break; 
- 
-        case PT_PC: 
+          RRETURN(MATCH_NOMATCH);
+        break;
+
+        case PT_PC:
         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
-          RRETURN(MATCH_NOMATCH); 
-        break; 
- 
-        case PT_SC: 
+          RRETURN(MATCH_NOMATCH);
+        break;
+
+        case PT_SC:
         if ((ecode[2] != prop->script) == (op == OP_PROP))
-          RRETURN(MATCH_NOMATCH); 
-        break; 
- 
+          RRETURN(MATCH_NOMATCH);
+        break;
+
         /* These are specials */
 
         case PT_ALNUM:
@@ -2646,20 +2646,20 @@ for (;;)
 
         /* This should never occur */
 
-        default: 
-        RRETURN(PCRE_ERROR_INTERNAL); 
-        } 
- 
-      ecode += 3; 
-      } 
-    break; 
- 
-    /* Match an extended Unicode sequence. We will get here only if the support 
-    is in the binary; otherwise a compile-time error occurs. */ 
- 
-    case OP_EXTUNI: 
+        default:
+        RRETURN(PCRE_ERROR_INTERNAL);
+        }
+
+      ecode += 3;
+      }
+    break;
+
+    /* Match an extended Unicode sequence. We will get here only if the support
+    is in the binary; otherwise a compile-time error occurs. */
+
+    case OP_EXTUNI:
     if (eptr >= md->end_subject)
-      { 
+      {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
@@ -2668,30 +2668,30 @@ for (;;)
       int lgb, rgb;
       GETCHARINCTEST(c, eptr);
       lgb = UCD_GRAPHBREAK(c);
-      while (eptr < md->end_subject) 
-        { 
-        int len = 1; 
+      while (eptr < md->end_subject)
+        {
+        int len = 1;
         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
         rgb = UCD_GRAPHBREAK(c);
         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
         lgb = rgb;
-        eptr += len; 
-        } 
-      } 
+        eptr += len;
+        }
+      }
     CHECK_PARTIAL();
-    ecode++; 
-    break; 
+    ecode++;
+    break;
 #endif  /* SUPPORT_UCP */
- 
- 
-    /* Match a back reference, possibly repeatedly. Look past the end of the 
-    item to see if there is repeat information following. The code is similar 
-    to that for character classes, but repeated for efficiency. Then obey 
-    similar code to character type repeats - written out again for speed. 
-    However, if the referenced string is the empty string, always treat 
-    it as matched, any number of times (otherwise there could be infinite 
+
+
+    /* Match a back reference, possibly repeatedly. Look past the end of the
+    item to see if there is repeat information following. The code is similar
+    to that for character classes, but repeated for efficiency. Then obey
+    similar code to character type repeats - written out again for speed.
+    However, if the referenced string is the empty string, always treat
+    it as matched, any number of times (otherwise there could be infinite
     loops). If the reference is unset, there are two possibilities:
- 
+
     (a) In the default, Perl-compatible state, set the length negative;
     this ensures that every attempt at a match fails. We can't just fail
     here, because of the possibility of quantifiers with zero minima.
@@ -2710,19 +2710,19 @@ for (;;)
     case OP_DNREF:
     case OP_DNREFI:
     caseless = op == OP_DNREFI;
-      { 
+      {
       int count = GET2(ecode, 1+IMM2_SIZE);
       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
       ecode += 1 + 2*IMM2_SIZE;
- 
+
       /* Setting the default length first and initializing 'offset' avoids
       compiler warnings in the REF_REPEAT code. */
- 
+
       length = (md->jscript_compat)? 0 : -1;
       offset = 0;
- 
+
       while (count-- > 0)
-        { 
+        {
         offset = GET2(slot, 0) << 1;
         if (offset < offset_top && md->offset_vector[offset] >= 0)
           {
@@ -2733,7 +2733,7 @@ for (;;)
         }
       }
     goto REF_REPEAT;
- 
+
     case OP_REF:
     case OP_REFI:
     caseless = op == OP_REFI;
@@ -2743,7 +2743,7 @@ for (;;)
       length = (md->jscript_compat)? 0 : -1;
     else
       length = md->offset_vector[offset+1] - md->offset_vector[offset];
- 
+
     /* Set up for repetition, or handle the non-repeated case */
 
     REF_REPEAT:
@@ -2777,72 +2777,72 @@ for (;;)
         if (length == -2) eptr = md->end_subject;   /* Partial match */
         CHECK_PARTIAL();
         RRETURN(MATCH_NOMATCH);
-        } 
+        }
       eptr += length;
       continue;              /* With the main loop */
       }
- 
+
     /* Handle repeated back references. If the length of the reference is
     zero, just continue with the main loop. If the length is negative, it
     means the reference is unset in non-Java-compatible mode. If the minimum is
     zero, we can continue at the same level without recursion. For any other
     minimum, carrying on will result in NOMATCH. */
- 
+
     if (length == 0) continue;
     if (length < 0 && min == 0) continue;
- 
+
     /* First, ensure the minimum number of matches are present. We get back
     the length of the reference string explicitly rather than passing the
     address of eptr, so that eptr can be a register variable. */
- 
+
     for (i = 1; i <= min; i++)
       {
       int slength;
       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
-        { 
+        {
         if (slength == -2) eptr = md->end_subject;   /* Partial match */
         CHECK_PARTIAL();
         RRETURN(MATCH_NOMATCH);
-        } 
+        }
       eptr += slength;
       }
- 
+
     /* If min = max, continue at the same level without recursion.
     They are not both allowed to be zero. */
- 
+
     if (min == max) continue;
- 
+
     /* If minimizing, keep trying and advancing the pointer */
- 
+
     if (minimize)
       {
       for (fi = min;; fi++)
-        { 
+        {
         int slength;
         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         if (fi >= max) RRETURN(MATCH_NOMATCH);
         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
-          { 
+          {
           if (slength == -2) eptr = md->end_subject;   /* Partial match */
           CHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
-          } 
+          }
         eptr += slength;
-        } 
+        }
       /* Control never gets here */
       }
- 
+
     /* If maximizing, find the longest string and work backwards */
- 
+
     else
       {
       pp = eptr;
       for (i = min; i < max; i++)
-        { 
+        {
         int slength;
         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
-          { 
+          {
           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
           the soft partial matching case. */
 
@@ -2853,9 +2853,9 @@ for (;;)
             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
             }
           break;
-          } 
+          }
         eptr += slength;
-        } 
+        }
 
       while (eptr >= pp)
         {
@@ -2864,97 +2864,97 @@ for (;;)
         eptr -= length;
         }
       RRETURN(MATCH_NOMATCH);
-      } 
-    /* Control never gets here */ 
- 
-    /* Match a bit-mapped character class, possibly repeatedly. This op code is 
-    used when all the characters in the class have values in the range 0-255, 
-    and either the matching is caseful, or the characters are in the range 
-    0-127 when UTF-8 processing is enabled. The only difference between 
-    OP_CLASS and OP_NCLASS occurs when a data character outside the range is 
-    encountered. 
- 
-    First, look past the end of the item to see if there is repeat information 
-    following. Then obey similar code to character type repeats - written out 
-    again for speed. */ 
- 
-    case OP_NCLASS: 
-    case OP_CLASS: 
-      { 
+      }
+    /* Control never gets here */
+
+    /* Match a bit-mapped character class, possibly repeatedly. This op code is
+    used when all the characters in the class have values in the range 0-255,
+    and either the matching is caseful, or the characters are in the range
+    0-127 when UTF-8 processing is enabled. The only difference between
+    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
+    encountered.
+
+    First, look past the end of the item to see if there is repeat information
+    following. Then obey similar code to character type repeats - written out
+    again for speed. */
+
+    case OP_NCLASS:
+    case OP_CLASS:
+      {
       /* The data variable is saved across frames, so the byte map needs to
       be stored there. */
 #define BYTE_MAP ((pcre_uint8 *)data)
-      data = ecode + 1;                /* Save for matching */ 
+      data = ecode + 1;                /* Save for matching */
       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
- 
-      switch (*ecode) 
-        { 
-        case OP_CRSTAR: 
-        case OP_CRMINSTAR: 
-        case OP_CRPLUS: 
-        case OP_CRMINPLUS: 
-        case OP_CRQUERY: 
-        case OP_CRMINQUERY: 
+
+      switch (*ecode)
+        {
+        case OP_CRSTAR:
+        case OP_CRMINSTAR:
+        case OP_CRPLUS:
+        case OP_CRMINPLUS:
+        case OP_CRQUERY:
+        case OP_CRMINQUERY:
         case OP_CRPOSSTAR:
         case OP_CRPOSPLUS:
         case OP_CRPOSQUERY:
-        c = *ecode++ - OP_CRSTAR; 
+        c = *ecode++ - OP_CRSTAR;
         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
         else possessive = TRUE;
-        min = rep_min[c];                 /* Pick up values from tables; */ 
-        max = rep_max[c];                 /* zero for max => infinity */ 
-        if (max == 0) max = INT_MAX; 
-        break; 
- 
-        case OP_CRRANGE: 
-        case OP_CRMINRANGE: 
+        min = rep_min[c];                 /* Pick up values from tables; */
+        max = rep_max[c];                 /* zero for max => infinity */
+        if (max == 0) max = INT_MAX;
+        break;
+
+        case OP_CRRANGE:
+        case OP_CRMINRANGE:
         case OP_CRPOSRANGE:
-        minimize = (*ecode == OP_CRMINRANGE); 
+        minimize = (*ecode == OP_CRMINRANGE);
         possessive = (*ecode == OP_CRPOSRANGE);
-        min = GET2(ecode, 1); 
+        min = GET2(ecode, 1);
         max = GET2(ecode, 1 + IMM2_SIZE);
-        if (max == 0) max = INT_MAX; 
+        if (max == 0) max = INT_MAX;
         ecode += 1 + 2 * IMM2_SIZE;
-        break; 
- 
-        default:               /* No repeat follows */ 
-        min = max = 1; 
-        break; 
-        } 
- 
-      /* First, ensure the minimum number of matches are present. */ 
- 
+        break;
+
+        default:               /* No repeat follows */
+        min = max = 1;
+        break;
+        }
+
+      /* First, ensure the minimum number of matches are present. */
+
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
-        for (i = 1; i <= min; i++) 
-          { 
+        {
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          if (c > 255) 
-            { 
-            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); 
-            } 
-          else 
+          GETCHARINC(c, eptr);
+          if (c > 255)
+            {
+            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+            }
+          else
             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
-          } 
-        } 
-      else 
-#endif 
+          }
+        }
+      else
+#endif
       /* Not UTF mode */
-        { 
-        for (i = 1; i <= min; i++) 
-          { 
+        {
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          c = *eptr++; 
+          c = *eptr++;
 #ifndef COMPILE_PCRE8
           if (c > 255)
             {
@@ -2963,56 +2963,56 @@ for (;;)
           else
 #endif
             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
-          } 
-        } 
- 
-      /* If max == min we can continue with the main loop without the 
-      need to recurse. */ 
- 
-      if (min == max) continue; 
- 
-      /* If minimizing, keep testing the rest of the expression and advancing 
-      the pointer while it matches the class. */ 
- 
-      if (minimize) 
-        { 
+          }
+        }
+
+      /* If max == min we can continue with the main loop without the
+      need to recurse. */
+
+      if (min == max) continue;
+
+      /* If minimizing, keep testing the rest of the expression and advancing
+      the pointer while it matches the class. */
+
+      if (minimize)
+        {
 #ifdef SUPPORT_UTF
         if (utf)
-          { 
-          for (fi = min;; fi++) 
-            { 
+          {
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr); 
-            if (c > 255) 
-              { 
-              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); 
-              } 
-            else 
+            GETCHARINC(c, eptr);
+            if (c > 255)
+              {
+              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+              }
+            else
               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
-            } 
-          } 
-        else 
-#endif 
+            }
+          }
+        else
+#endif
         /* Not UTF mode */
-          { 
-          for (fi = min;; fi++) 
-            { 
+          {
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            c = *eptr++; 
+            c = *eptr++;
 #ifndef COMPILE_PCRE8
             if (c > 255)
               {
@@ -3021,60 +3021,60 @@ for (;;)
             else
 #endif
               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
-            } 
-          } 
-        /* Control never gets here */ 
-        } 
- 
-      /* If maximizing, find the longest possible run, then work backwards. */ 
- 
-      else 
-        { 
-        pp = eptr; 
- 
+            }
+          }
+        /* Control never gets here */
+        }
+
+      /* If maximizing, find the longest possible run, then work backwards. */
+
+      else
+        {
+        pp = eptr;
+
 #ifdef SUPPORT_UTF
         if (utf)
-          { 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+          {
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c > 255) 
-              { 
-              if (op == OP_CLASS) break; 
-              } 
-            else 
+            GETCHARLEN(c, eptr, len);
+            if (c > 255)
+              {
+              if (op == OP_CLASS) break;
+              }
+            else
               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
-            eptr += len; 
-            } 
+            eptr += len;
+            }
 
           if (possessive) continue;    /* No backtracking */
 
-          for (;;) 
-            { 
+          for (;;)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (eptr-- <= pp) break;        /* Stop if tried at original pos */
-            BACKCHAR(eptr); 
-            } 
-          } 
-        else 
-#endif 
+            BACKCHAR(eptr);
+            }
+          }
+        else
+#endif
           /* Not UTF mode */
-          { 
-          for (i = min; i < max; i++) 
-            { 
+          {
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            c = *eptr; 
+            c = *eptr;
 #ifndef COMPILE_PCRE8
             if (c > 255)
               {
@@ -3083,76 +3083,76 @@ for (;;)
             else
 #endif
               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
-            eptr++; 
-            } 
+            eptr++;
+            }
 
           if (possessive) continue;    /* No backtracking */
 
-          while (eptr >= pp) 
-            { 
+          while (eptr >= pp)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-            eptr--; 
-            } 
-          } 
- 
-        RRETURN(MATCH_NOMATCH); 
-        } 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            eptr--;
+            }
+          }
+
+        RRETURN(MATCH_NOMATCH);
+        }
 #undef BYTE_MAP
-      } 
-    /* Control never gets here */ 
- 
- 
+      }
+    /* Control never gets here */
+
+
     /* Match an extended character class. In the 8-bit library, this opcode is
     encountered only when UTF-8 mode mode is supported. In the 16-bit and
     32-bit libraries, codepoints greater than 255 may be encountered even when
     UTF is not supported. */
- 
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-    case OP_XCLASS: 
-      { 
-      data = ecode + 1 + LINK_SIZE;                /* Save for matching */ 
-      ecode += GET(ecode, 1);                      /* Advance past the item */ 
- 
-      switch (*ecode) 
-        { 
-        case OP_CRSTAR: 
-        case OP_CRMINSTAR: 
-        case OP_CRPLUS: 
-        case OP_CRMINPLUS: 
-        case OP_CRQUERY: 
-        case OP_CRMINQUERY: 
+    case OP_XCLASS:
+      {
+      data = ecode + 1 + LINK_SIZE;                /* Save for matching */
+      ecode += GET(ecode, 1);                      /* Advance past the item */
+
+      switch (*ecode)
+        {
+        case OP_CRSTAR:
+        case OP_CRMINSTAR:
+        case OP_CRPLUS:
+        case OP_CRMINPLUS:
+        case OP_CRQUERY:
+        case OP_CRMINQUERY:
         case OP_CRPOSSTAR:
         case OP_CRPOSPLUS:
         case OP_CRPOSQUERY:
-        c = *ecode++ - OP_CRSTAR; 
+        c = *ecode++ - OP_CRSTAR;
         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
         else possessive = TRUE;
-        min = rep_min[c];                 /* Pick up values from tables; */ 
-        max = rep_max[c];                 /* zero for max => infinity */ 
-        if (max == 0) max = INT_MAX; 
-        break; 
- 
-        case OP_CRRANGE: 
-        case OP_CRMINRANGE: 
+        min = rep_min[c];                 /* Pick up values from tables; */
+        max = rep_max[c];                 /* zero for max => infinity */
+        if (max == 0) max = INT_MAX;
+        break;
+
+        case OP_CRRANGE:
+        case OP_CRMINRANGE:
         case OP_CRPOSRANGE:
-        minimize = (*ecode == OP_CRMINRANGE); 
+        minimize = (*ecode == OP_CRMINRANGE);
         possessive = (*ecode == OP_CRPOSRANGE);
-        min = GET2(ecode, 1); 
+        min = GET2(ecode, 1);
         max = GET2(ecode, 1 + IMM2_SIZE);
-        if (max == 0) max = INT_MAX; 
+        if (max == 0) max = INT_MAX;
         ecode += 1 + 2 * IMM2_SIZE;
-        break; 
- 
-        default:               /* No repeat follows */ 
-        min = max = 1; 
-        break; 
-        } 
- 
-      /* First, ensure the minimum number of matches are present. */ 
- 
-      for (i = 1; i <= min; i++) 
-        { 
+        break;
+
+        default:               /* No repeat follows */
+        min = max = 1;
+        break;
+        }
+
+      /* First, ensure the minimum number of matches are present. */
+
+      for (i = 1; i <= min; i++)
+        {
         if (eptr >= md->end_subject)
           {
           SCHECK_PARTIAL();
@@ -3160,22 +3160,22 @@ for (;;)
           }
         GETCHARINCTEST(c, eptr);
         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
-        } 
- 
-      /* If max == min we can continue with the main loop without the 
-      need to recurse. */ 
- 
-      if (min == max) continue; 
- 
-      /* If minimizing, keep testing the rest of the expression and advancing 
-      the pointer while it matches the class. */ 
- 
-      if (minimize) 
-        { 
-        for (fi = min;; fi++) 
-          { 
+        }
+
+      /* If max == min we can continue with the main loop without the
+      need to recurse. */
+
+      if (min == max) continue;
+
+      /* If minimizing, keep testing the rest of the expression and advancing
+      the pointer while it matches the class. */
+
+      if (minimize)
+        {
+        for (fi = min;; fi++)
+          {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
           if (eptr >= md->end_subject)
             {
@@ -3184,18 +3184,18 @@ for (;;)
             }
           GETCHARINCTEST(c, eptr);
           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
-          } 
-        /* Control never gets here */ 
-        } 
- 
-      /* If maximizing, find the longest possible run, then work backwards. */ 
- 
-      else 
-        { 
-        pp = eptr; 
-        for (i = min; i < max; i++) 
-          { 
-          int len = 1; 
+          }
+        /* Control never gets here */
+        }
+
+      /* If maximizing, find the longest possible run, then work backwards. */
+
+      else
+        {
+        pp = eptr;
+        for (i = min; i < max; i++)
+          {
+          int len = 1;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -3207,63 +3207,63 @@ for (;;)
           c = *eptr;
 #endif
           if (!PRIV(xclass)(c, data, utf)) break;
-          eptr += len; 
-          } 
+          eptr += len;
+          }
 
         if (possessive) continue;    /* No backtracking */
 
-        for(;;) 
-          { 
+        for(;;)
+          {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (eptr-- <= pp) break;        /* Stop if tried at original pos */
 #ifdef SUPPORT_UTF
           if (utf) BACKCHAR(eptr);
 #endif
-          } 
-        RRETURN(MATCH_NOMATCH); 
-        } 
- 
-      /* Control never gets here */ 
-      } 
-#endif    /* End of XCLASS */ 
- 
-    /* Match a single character, casefully */ 
- 
-    case OP_CHAR: 
+          }
+        RRETURN(MATCH_NOMATCH);
+        }
+
+      /* Control never gets here */
+      }
+#endif    /* End of XCLASS */
+
+    /* Match a single character, casefully */
+
+    case OP_CHAR:
 #ifdef SUPPORT_UTF
     if (utf)
-      { 
-      length = 1; 
-      ecode++; 
-      GETCHARLEN(fc, ecode, length); 
+      {
+      length = 1;
+      ecode++;
+      GETCHARLEN(fc, ecode, length);
       if (length > md->end_subject - eptr)
         {
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
         RRETURN(MATCH_NOMATCH);
         }
       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
-      } 
-    else 
-#endif 
+      }
+    else
+#endif
     /* Not UTF mode */
-      { 
+      {
       if (md->end_subject - eptr < 1)
         {
         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
         RRETURN(MATCH_NOMATCH);
         }
-      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); 
-      ecode += 2; 
-      } 
-    break; 
- 
+      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
+      ecode += 2;
+      }
+    break;
+
     /* Match a single character, caselessly. If we are at the end of the
     subject, give up immediately. */
- 
+
     case OP_CHARI:
     if (eptr >= md->end_subject)
-      { 
+      {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
@@ -3271,128 +3271,128 @@ for (;;)
 #ifdef SUPPORT_UTF
     if (utf)
       {
-      length = 1; 
-      ecode++; 
-      GETCHARLEN(fc, ecode, length); 
- 
-      /* If the pattern character's value is < 128, we have only one byte, and 
+      length = 1;
+      ecode++;
+      GETCHARLEN(fc, ecode, length);
+
+      /* If the pattern character's value is < 128, we have only one byte, and
       we know that its other case must also be one byte long, so we can use the
       fast lookup table. We know that there is at least one byte left in the
       subject. */
- 
-      if (fc < 128) 
-        { 
+
+      if (fc < 128)
+        {
         pcre_uint32 cc = UCHAR21(eptr);
         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
         ecode++;
         eptr++;
-        } 
- 
+        }
+
       /* Otherwise we must pick up the subject character. Note that we cannot
       use the value of "length" to check for sufficient bytes left, because the
       other case of the character may have more or fewer bytes.  */
- 
-      else 
-        { 
+
+      else
+        {
         pcre_uint32 dc;
-        GETCHARINC(dc, eptr); 
-        ecode += length; 
- 
-        /* If we have Unicode property support, we can use it to test the other 
-        case of the character, if there is one. */ 
- 
-        if (fc != dc) 
-          { 
-#ifdef SUPPORT_UCP 
+        GETCHARINC(dc, eptr);
+        ecode += length;
+
+        /* If we have Unicode property support, we can use it to test the other
+        case of the character, if there is one. */
+
+        if (fc != dc)
+          {
+#ifdef SUPPORT_UCP
           if (dc != UCD_OTHERCASE(fc))
-#endif 
-            RRETURN(MATCH_NOMATCH); 
-          } 
-        } 
-      } 
-    else 
+#endif
+            RRETURN(MATCH_NOMATCH);
+          }
+        }
+      }
+    else
 #endif   /* SUPPORT_UTF */
- 
+
     /* Not UTF mode */
-      { 
+      {
       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
       eptr++;
-      ecode += 2; 
-      } 
-    break; 
- 
-    /* Match a single character repeatedly. */ 
- 
-    case OP_EXACT: 
+      ecode += 2;
+      }
+    break;
+
+    /* Match a single character repeatedly. */
+
+    case OP_EXACT:
     case OP_EXACTI:
-    min = max = GET2(ecode, 1); 
+    min = max = GET2(ecode, 1);
     ecode += 1 + IMM2_SIZE;
-    goto REPEATCHAR; 
- 
-    case OP_POSUPTO: 
+    goto REPEATCHAR;
+
+    case OP_POSUPTO:
     case OP_POSUPTOI:
-    possessive = TRUE; 
-    /* Fall through */ 
- 
-    case OP_UPTO: 
+    possessive = TRUE;
+    /* Fall through */
+
+    case OP_UPTO:
     case OP_UPTOI:
-    case OP_MINUPTO: 
+    case OP_MINUPTO:
     case OP_MINUPTOI:
-    min = 0; 
-    max = GET2(ecode, 1); 
+    min = 0;
+    max = GET2(ecode, 1);
     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
     ecode += 1 + IMM2_SIZE;
-    goto REPEATCHAR; 
- 
-    case OP_POSSTAR: 
+    goto REPEATCHAR;
+
+    case OP_POSSTAR:
     case OP_POSSTARI:
-    possessive = TRUE; 
-    min = 0; 
-    max = INT_MAX; 
-    ecode++; 
-    goto REPEATCHAR; 
- 
-    case OP_POSPLUS: 
+    possessive = TRUE;
+    min = 0;
+    max = INT_MAX;
+    ecode++;
+    goto REPEATCHAR;
+
+    case OP_POSPLUS:
     case OP_POSPLUSI:
-    possessive = TRUE; 
-    min = 1; 
-    max = INT_MAX; 
-    ecode++; 
-    goto REPEATCHAR; 
- 
-    case OP_POSQUERY: 
+    possessive = TRUE;
+    min = 1;
+    max = INT_MAX;
+    ecode++;
+    goto REPEATCHAR;
+
+    case OP_POSQUERY:
     case OP_POSQUERYI:
-    possessive = TRUE; 
-    min = 0; 
-    max = 1; 
-    ecode++; 
-    goto REPEATCHAR; 
- 
-    case OP_STAR: 
+    possessive = TRUE;
+    min = 0;
+    max = 1;
+    ecode++;
+    goto REPEATCHAR;
+
+    case OP_STAR:
     case OP_STARI:
-    case OP_MINSTAR: 
+    case OP_MINSTAR:
     case OP_MINSTARI:
-    case OP_PLUS: 
+    case OP_PLUS:
     case OP_PLUSI:
-    case OP_MINPLUS: 
+    case OP_MINPLUS:
     case OP_MINPLUSI:
-    case OP_QUERY: 
+    case OP_QUERY:
     case OP_QUERYI:
-    case OP_MINQUERY: 
+    case OP_MINQUERY:
     case OP_MINQUERYI:
     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
-    minimize = (c & 1) != 0; 
-    min = rep_min[c];                 /* Pick up values from tables; */ 
-    max = rep_max[c];                 /* zero for max => infinity */ 
-    if (max == 0) max = INT_MAX; 
- 
+    minimize = (c & 1) != 0;
+    min = rep_min[c];                 /* Pick up values from tables; */
+    max = rep_max[c];                 /* zero for max => infinity */
+    if (max == 0) max = INT_MAX;
+
     /* Common code for all repeated single-character matches. We first check
     for the minimum number of characters. If the minimum equals the maximum, we
     are done. Otherwise, if minimizing, check the rest of the pattern for a
     match; if there isn't one, advance up to the maximum, one character at a
     time.
- 
+
     If maximizing, advance up to the maximum number of matching characters,
     until eptr is past the end of the maximum run. If possessive, we are
     then done (no backing up). Otherwise, match at this position; anything
@@ -3404,128 +3404,128 @@ for (;;)
     The various UTF/non-UTF and caseful/caseless cases are handled separately,
     for speed. */
 
-    REPEATCHAR: 
+    REPEATCHAR:
 #ifdef SUPPORT_UTF
     if (utf)
-      { 
-      length = 1; 
-      charptr = ecode; 
-      GETCHARLEN(fc, ecode, length); 
-      ecode += length; 
- 
-      /* Handle multibyte character matching specially here. There is 
-      support for caseless matching if UCP support is present. */ 
- 
-      if (length > 1) 
-        { 
-#ifdef SUPPORT_UCP 
+      {
+      length = 1;
+      charptr = ecode;
+      GETCHARLEN(fc, ecode, length);
+      ecode += length;
+
+      /* Handle multibyte character matching specially here. There is
+      support for caseless matching if UCP support is present. */
+
+      if (length > 1)
+        {
+#ifdef SUPPORT_UCP
         pcre_uint32 othercase;
         if (op >= OP_STARI &&     /* Caseless */
             (othercase = UCD_OTHERCASE(fc)) != fc)
           oclength = PRIV(ord2utf)(othercase, occhars);
-        else oclength = 0; 
-#endif  /* SUPPORT_UCP */ 
- 
-        for (i = 1; i <= min; i++) 
-          { 
+        else oclength = 0;
+#endif  /* SUPPORT_UCP */
+
+        for (i = 1; i <= min; i++)
+          {
           if (eptr <= md->end_subject - length &&
             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
-#ifdef SUPPORT_UCP 
+#ifdef SUPPORT_UCP
           else if (oclength > 0 &&
                    eptr <= md->end_subject - oclength &&
                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
-          else 
-            { 
+          else
+            {
             CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
-          } 
- 
-        if (min == max) continue; 
- 
-        if (minimize) 
-          { 
-          for (fi = min;; fi++) 
-            { 
+            }
+          }
+
+        if (min == max) continue;
+
+        if (minimize)
+          {
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr <= md->end_subject - length &&
               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
-#ifdef SUPPORT_UCP 
+#ifdef SUPPORT_UCP
             else if (oclength > 0 &&
                      eptr <= md->end_subject - oclength &&
                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
-            else 
-              { 
+            else
+              {
               CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            } 
-          /* Control never gets here */ 
-          } 
- 
-        else  /* Maximize */ 
-          { 
-          pp = eptr; 
-          for (i = min; i < max; i++) 
-            { 
+              }
+            }
+          /* Control never gets here */
+          }
+
+        else  /* Maximize */
+          {
+          pp = eptr;
+          for (i = min; i < max; i++)
+            {
             if (eptr <= md->end_subject - length &&
                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
-#ifdef SUPPORT_UCP 
+#ifdef SUPPORT_UCP
             else if (oclength > 0 &&
                      eptr <= md->end_subject - oclength &&
                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
-            else 
-              { 
+            else
+              {
               CHECK_PARTIAL();
               break;
-              } 
-            } 
- 
+              }
+            }
+
           if (possessive) continue;    /* No backtracking */
-          for(;;) 
+          for(;;)
             {
             if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-#ifdef SUPPORT_UCP 
+#ifdef SUPPORT_UCP
             eptr--;
             BACKCHAR(eptr);
-#else   /* without SUPPORT_UCP */ 
+#else   /* without SUPPORT_UCP */
             eptr -= length;
-#endif  /* SUPPORT_UCP */ 
-            }
-          } 
-        /* Control never gets here */ 
-        } 
- 
-      /* If the length of a UTF-8 character is 1, we fall through here, and 
-      obey the code as for non-UTF-8 characters below, though in this case the 
-      value of fc will always be < 128. */ 
-      } 
-    else 
+#endif  /* SUPPORT_UCP */
+            }
+          }
+        /* Control never gets here */
+        }
+
+      /* If the length of a UTF-8 character is 1, we fall through here, and
+      obey the code as for non-UTF-8 characters below, though in this case the
+      value of fc will always be < 128. */
+      }
+    else
 #endif  /* SUPPORT_UTF */
       /* When not in UTF-8 mode, load a single-byte character. */
-      fc = *ecode++; 
- 
+      fc = *ecode++;
+
     /* The value of fc at this point is always one character, though we may
     or may not be in UTF mode. The code is duplicated for the caseless and
-    caseful cases, for speed, since matching characters is likely to be quite 
-    common. First, ensure the minimum number of matches are present. If min = 
-    max, continue at the same level without recursing. Otherwise, if 
-    minimizing, keep trying the rest of the expression and advancing one 
-    matching character if failing, up to the maximum. Alternatively, if 
-    maximizing, find the maximum number of characters and work backwards. */ 
- 
-    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, 
+    caseful cases, for speed, since matching characters is likely to be quite
+    common. First, ensure the minimum number of matches are present. If min =
+    max, continue at the same level without recursing. Otherwise, if
+    minimizing, keep trying the rest of the expression and advancing one
+    matching character if failing, up to the maximum. Alternatively, if
+    maximizing, find the maximum number of characters and work backwards. */
+
+    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       max, (char *)eptr));
- 
+
     if (op >= OP_STARI)  /* Caseless */
-      { 
+      {
 #ifdef COMPILE_PCRE8
       /* fc must be < 128 if UTF is enabled. */
       foc = md->fcc[fc];
@@ -3543,7 +3543,7 @@ for (;;)
         foc = TABLE_GET(fc, md->fcc, fc);
 #endif /* COMPILE_PCRE8 */
 
-      for (i = 1; i <= min; i++) 
+      for (i = 1; i <= min; i++)
         {
         pcre_uint32 cc;                 /* Faster than pcre_uchar */
         if (eptr >= md->end_subject)
@@ -3555,31 +3555,31 @@ for (;;)
         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
         eptr++;
         }
-      if (min == max) continue; 
-      if (minimize) 
-        { 
-        for (fi = min;; fi++) 
-          { 
+      if (min == max) continue;
+      if (minimize)
+        {
+        for (fi = min;; fi++)
+          {
           pcre_uint32 cc;               /* Faster than pcre_uchar */
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           cc = UCHAR21TEST(eptr);
           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
           eptr++;
-          } 
-        /* Control never gets here */ 
-        } 
-      else  /* Maximize */ 
-        { 
-        pp = eptr; 
-        for (i = min; i < max; i++) 
-          { 
+          }
+        /* Control never gets here */
+        }
+      else  /* Maximize */
+        {
+        pp = eptr;
+        for (i = min; i < max; i++)
+          {
           pcre_uint32 cc;               /* Faster than pcre_uchar */
           if (eptr >= md->end_subject)
             {
@@ -3588,24 +3588,24 @@ for (;;)
             }
           cc = UCHAR21TEST(eptr);
           if (fc != cc && foc != cc) break;
-          eptr++; 
-          } 
+          eptr++;
+          }
         if (possessive) continue;       /* No backtracking */
         for (;;)
-          { 
+          {
           if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
-          eptr--; 
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-          } 
+          eptr--;
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+          }
         /* Control never gets here */
-        } 
-      } 
- 
-    /* Caseful comparisons (includes all multi-byte characters) */ 
- 
-    else 
-      { 
+        }
+      }
+
+    /* Caseful comparisons (includes all multi-byte characters) */
+
+    else
+      {
       for (i = 1; i <= min; i++)
         {
         if (eptr >= md->end_subject)
@@ -3616,60 +3616,60 @@ for (;;)
         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
         }
 
-      if (min == max) continue; 
+      if (min == max) continue;
 
-      if (minimize) 
-        { 
-        for (fi = min;; fi++) 
-          { 
+      if (minimize)
+        {
+        for (fi = min;; fi++)
+          {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
-          } 
-        /* Control never gets here */ 
-        } 
-      else  /* Maximize */ 
-        { 
-        pp = eptr; 
-        for (i = min; i < max; i++) 
-          { 
+          }
+        /* Control never gets here */
+        }
+      else  /* Maximize */
+        {
+        pp = eptr;
+        for (i = min; i < max; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             break;
             }
           if (fc != UCHAR21TEST(eptr)) break;
-          eptr++; 
-          } 
+          eptr++;
+          }
         if (possessive) continue;    /* No backtracking */
         for (;;)
-          { 
+          {
           if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
-          eptr--; 
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-          } 
+          eptr--;
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+          }
         /* Control never gets here */
-        } 
-      } 
-    /* Control never gets here */ 
- 
-    /* Match a negated single one-byte character. The character we are 
-    checking can be multibyte. */ 
- 
-    case OP_NOT: 
+        }
+      }
+    /* Control never gets here */
+
+    /* Match a negated single one-byte character. The character we are
+    checking can be multibyte. */
+
+    case OP_NOT:
     case OP_NOTI:
     if (eptr >= md->end_subject)
-      { 
+      {
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
-      } 
+      }
 #ifdef SUPPORT_UTF
     if (utf)
       {
@@ -3697,108 +3697,108 @@ for (;;)
         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
         }
       }
-    else 
+    else
 #endif
-      { 
+      {
       register pcre_uint32 ch = ecode[1];
       c = *eptr++;
       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
         RRETURN(MATCH_NOMATCH);
       ecode += 2;
-      } 
-    break; 
- 
-    /* Match a negated single one-byte character repeatedly. This is almost a 
-    repeat of the code for a repeated single character, but I haven't found a 
-    nice way of commoning these up that doesn't require a test of the 
-    positive/negative option for each character match. Maybe that wouldn't add 
-    very much to the time taken, but character matching *is* what this is all 
-    about... */ 
- 
-    case OP_NOTEXACT: 
+      }
+    break;
+
+    /* Match a negated single one-byte character repeatedly. This is almost a
+    repeat of the code for a repeated single character, but I haven't found a
+    nice way of commoning these up that doesn't require a test of the
+    positive/negative option for each character match. Maybe that wouldn't add
+    very much to the time taken, but character matching *is* what this is all
+    about... */
+
+    case OP_NOTEXACT:
     case OP_NOTEXACTI:
-    min = max = GET2(ecode, 1); 
+    min = max = GET2(ecode, 1);
     ecode += 1 + IMM2_SIZE;
-    goto REPEATNOTCHAR; 
- 
-    case OP_NOTUPTO: 
+    goto REPEATNOTCHAR;
+
+    case OP_NOTUPTO:
     case OP_NOTUPTOI:
-    case OP_NOTMINUPTO: 
+    case OP_NOTMINUPTO:
     case OP_NOTMINUPTOI:
-    min = 0; 
-    max = GET2(ecode, 1); 
+    min = 0;
+    max = GET2(ecode, 1);
     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
     ecode += 1 + IMM2_SIZE;
-    goto REPEATNOTCHAR; 
- 
-    case OP_NOTPOSSTAR: 
+    goto REPEATNOTCHAR;
+
+    case OP_NOTPOSSTAR:
     case OP_NOTPOSSTARI:
-    possessive = TRUE; 
-    min = 0; 
-    max = INT_MAX; 
-    ecode++; 
-    goto REPEATNOTCHAR; 
- 
-    case OP_NOTPOSPLUS: 
+    possessive = TRUE;
+    min = 0;
+    max = INT_MAX;
+    ecode++;
+    goto REPEATNOTCHAR;
+
+    case OP_NOTPOSPLUS:
     case OP_NOTPOSPLUSI:
-    possessive = TRUE; 
-    min = 1; 
-    max = INT_MAX; 
-    ecode++; 
-    goto REPEATNOTCHAR; 
- 
-    case OP_NOTPOSQUERY: 
+    possessive = TRUE;
+    min = 1;
+    max = INT_MAX;
+    ecode++;
+    goto REPEATNOTCHAR;
+
+    case OP_NOTPOSQUERY:
     case OP_NOTPOSQUERYI:
-    possessive = TRUE; 
-    min = 0; 
-    max = 1; 
-    ecode++; 
-    goto REPEATNOTCHAR; 
- 
-    case OP_NOTPOSUPTO: 
+    possessive = TRUE;
+    min = 0;
+    max = 1;
+    ecode++;
+    goto REPEATNOTCHAR;
+
+    case OP_NOTPOSUPTO:
     case OP_NOTPOSUPTOI:
-    possessive = TRUE; 
-    min = 0; 
-    max = GET2(ecode, 1); 
+    possessive = TRUE;
+    min = 0;
+    max = GET2(ecode, 1);
     ecode += 1 + IMM2_SIZE;
-    goto REPEATNOTCHAR; 
- 
-    case OP_NOTSTAR: 
+    goto REPEATNOTCHAR;
+
+    case OP_NOTSTAR:
     case OP_NOTSTARI:
-    case OP_NOTMINSTAR: 
+    case OP_NOTMINSTAR:
     case OP_NOTMINSTARI:
-    case OP_NOTPLUS: 
+    case OP_NOTPLUS:
     case OP_NOTPLUSI:
-    case OP_NOTMINPLUS: 
+    case OP_NOTMINPLUS:
     case OP_NOTMINPLUSI:
-    case OP_NOTQUERY: 
+    case OP_NOTQUERY:
     case OP_NOTQUERYI:
-    case OP_NOTMINQUERY: 
+    case OP_NOTMINQUERY:
     case OP_NOTMINQUERYI:
     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
-    minimize = (c & 1) != 0; 
-    min = rep_min[c];                 /* Pick up values from tables; */ 
-    max = rep_max[c];                 /* zero for max => infinity */ 
-    if (max == 0) max = INT_MAX; 
- 
+    minimize = (c & 1) != 0;
+    min = rep_min[c];                 /* Pick up values from tables; */
+    max = rep_max[c];                 /* zero for max => infinity */
+    if (max == 0) max = INT_MAX;
+
     /* Common code for all repeated single-byte matches. */
- 
-    REPEATNOTCHAR: 
+
+    REPEATNOTCHAR:
     GETCHARINCTEST(fc, ecode);
- 
-    /* The code is duplicated for the caseless and caseful cases, for speed, 
-    since matching characters is likely to be quite common. First, ensure the 
-    minimum number of matches are present. If min = max, continue at the same 
-    level without recursing. Otherwise, if minimizing, keep trying the rest of 
-    the expression and advancing one matching character if failing, up to the 
-    maximum. Alternatively, if maximizing, find the maximum number of 
-    characters and work backwards. */ 
- 
-    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, 
+
+    /* The code is duplicated for the caseless and caseful cases, for speed,
+    since matching characters is likely to be quite common. First, ensure the
+    minimum number of matches are present. If min = max, continue at the same
+    level without recursing. Otherwise, if minimizing, keep trying the rest of
+    the expression and advancing one matching character if failing, up to the
+    maximum. Alternatively, if maximizing, find the maximum number of
+    characters and work backwards. */
+
+    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
       max, (char *)eptr));
- 
+
     if (op >= OP_NOTSTARI)     /* Caseless */
-      { 
+      {
 #ifdef SUPPORT_UTF
 #ifdef SUPPORT_UCP
       if (utf && fc > 127)
@@ -3810,27 +3810,27 @@ for (;;)
       else
 #endif /* SUPPORT_UTF */
         foc = TABLE_GET(fc, md->fcc, fc);
- 
+
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
+        {
         register pcre_uint32 d;
-        for (i = 1; i <= min; i++) 
-          { 
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(d, eptr); 
+          GETCHARINC(d, eptr);
           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
-          } 
-        } 
-      else 
+          }
+        }
+      else
 #endif  /* SUPPORT_UTF */
       /* Not UTF mode */
-        { 
-        for (i = 1; i <= min; i++) 
+        {
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
@@ -3840,407 +3840,407 @@ for (;;)
           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        } 
- 
-      if (min == max) continue; 
- 
-      if (minimize) 
-        { 
+        }
+
+      if (min == max) continue;
+
+      if (minimize)
+        {
 #ifdef SUPPORT_UTF
         if (utf)
-          { 
+          {
           register pcre_uint32 d;
-          for (fi = min;; fi++) 
-            { 
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(d, eptr); 
+            GETCHARINC(d, eptr);
             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
-            } 
-          } 
-        else 
+            }
+          }
+        else
 #endif  /*SUPPORT_UTF */
         /* Not UTF mode */
-          { 
-          for (fi = min;; fi++) 
-            { 
+          {
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
               }
             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
             eptr++;
-            } 
-          } 
-        /* Control never gets here */ 
-        } 
- 
-      /* Maximize case */ 
- 
-      else 
-        { 
-        pp = eptr; 
- 
+            }
+          }
+        /* Control never gets here */
+        }
+
+      /* Maximize case */
+
+      else
+        {
+        pp = eptr;
+
 #ifdef SUPPORT_UTF
         if (utf)
-          { 
+          {
           register pcre_uint32 d;
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(d, eptr, len); 
+            GETCHARLEN(d, eptr, len);
             if (fc == d || (unsigned int)foc == d) break;
-            eptr += len; 
-            } 
+            eptr += len;
+            }
           if (possessive) continue;    /* No backtracking */
           for(;;)
-            { 
+            {
             if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
-            BACKCHAR(eptr); 
-            } 
-          } 
-        else 
+            BACKCHAR(eptr);
+            }
+          }
+        else
 #endif  /* SUPPORT_UTF */
         /* Not UTF mode */
-          { 
-          for (i = min; i < max; i++) 
-            { 
+          {
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
             if (fc == *eptr || foc == *eptr) break;
-            eptr++; 
-            } 
+            eptr++;
+            }
           if (possessive) continue;    /* No backtracking */
           for (;;)
-            { 
+            {
             if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-            eptr--; 
-            } 
-          } 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            eptr--;
+            }
+          }
         /* Control never gets here */
-        } 
-      } 
- 
-    /* Caseful comparisons */ 
- 
-    else 
-      { 
+        }
+      }
+
+    /* Caseful comparisons */
+
+    else
+      {
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
+        {
         register pcre_uint32 d;
-        for (i = 1; i <= min; i++) 
-          { 
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(d, eptr); 
-          if (fc == d) RRETURN(MATCH_NOMATCH); 
-          } 
-        } 
-      else 
-#endif 
+          GETCHARINC(d, eptr);
+          if (fc == d) RRETURN(MATCH_NOMATCH);
+          }
+        }
+      else
+#endif
       /* Not UTF mode */
-        { 
-        for (i = 1; i <= min; i++) 
+        {
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (fc == *eptr++) RRETURN(MATCH_NOMATCH); 
+          if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
           }
-        } 
- 
-      if (min == max) continue; 
- 
-      if (minimize) 
-        { 
+        }
+
+      if (min == max) continue;
+
+      if (minimize)
+        {
 #ifdef SUPPORT_UTF
         if (utf)
-          { 
+          {
           register pcre_uint32 d;
-          for (fi = min;; fi++) 
-            { 
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(d, eptr); 
+            GETCHARINC(d, eptr);
             if (fc == d) RRETURN(MATCH_NOMATCH);
-            } 
-          } 
-        else 
-#endif 
+            }
+          }
+        else
+#endif
         /* Not UTF mode */
-          { 
-          for (fi = min;; fi++) 
-            { 
+          {
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
               }
             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
-            } 
-          } 
-        /* Control never gets here */ 
-        } 
- 
-      /* Maximize case */ 
- 
-      else 
-        { 
-        pp = eptr; 
- 
+            }
+          }
+        /* Control never gets here */
+        }
+
+      /* Maximize case */
+
+      else
+        {
+        pp = eptr;
+
 #ifdef SUPPORT_UTF
         if (utf)
-          { 
+          {
           register pcre_uint32 d;
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(d, eptr, len); 
-            if (fc == d) break; 
-            eptr += len; 
-            } 
+            GETCHARLEN(d, eptr, len);
+            if (fc == d) break;
+            eptr += len;
+            }
           if (possessive) continue;    /* No backtracking */
-          for(;;) 
-            { 
+          for(;;)
+            {
             if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
-            BACKCHAR(eptr); 
-            } 
-          } 
-        else 
-#endif 
+            BACKCHAR(eptr);
+            }
+          }
+        else
+#endif
         /* Not UTF mode */
-          { 
-          for (i = min; i < max; i++) 
-            { 
+          {
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
             if (fc == *eptr) break;
-            eptr++; 
-            } 
+            eptr++;
+            }
           if (possessive) continue;    /* No backtracking */
           for (;;)
-            { 
+            {
             if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
-            eptr--; 
-            } 
-          } 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            eptr--;
+            }
+          }
         /* Control never gets here */
-        } 
-      } 
-    /* Control never gets here */ 
- 
-    /* Match a single character type repeatedly; several different opcodes 
-    share code. This is very similar to the code for single characters, but we 
-    repeat it in the interests of efficiency. */ 
- 
-    case OP_TYPEEXACT: 
-    min = max = GET2(ecode, 1); 
-    minimize = TRUE; 
+        }
+      }
+    /* Control never gets here */
+
+    /* Match a single character type repeatedly; several different opcodes
+    share code. This is very similar to the code for single characters, but we
+    repeat it in the interests of efficiency. */
+
+    case OP_TYPEEXACT:
+    min = max = GET2(ecode, 1);
+    minimize = TRUE;
     ecode += 1 + IMM2_SIZE;
-    goto REPEATTYPE; 
- 
-    case OP_TYPEUPTO: 
-    case OP_TYPEMINUPTO: 
-    min = 0; 
-    max = GET2(ecode, 1); 
-    minimize = *ecode == OP_TYPEMINUPTO; 
+    goto REPEATTYPE;
+
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    min = 0;
+    max = GET2(ecode, 1);
+    minimize = *ecode == OP_TYPEMINUPTO;
     ecode += 1 + IMM2_SIZE;
-    goto REPEATTYPE; 
- 
-    case OP_TYPEPOSSTAR: 
-    possessive = TRUE; 
-    min = 0; 
-    max = INT_MAX; 
-    ecode++; 
-    goto REPEATTYPE; 
- 
-    case OP_TYPEPOSPLUS: 
-    possessive = TRUE; 
-    min = 1; 
-    max = INT_MAX; 
-    ecode++; 
-    goto REPEATTYPE; 
- 
-    case OP_TYPEPOSQUERY: 
-    possessive = TRUE; 
-    min = 0; 
-    max = 1; 
-    ecode++; 
-    goto REPEATTYPE; 
- 
-    case OP_TYPEPOSUPTO: 
-    possessive = TRUE; 
-    min = 0; 
-    max = GET2(ecode, 1); 
+    goto REPEATTYPE;
+
+    case OP_TYPEPOSSTAR:
+    possessive = TRUE;
+    min = 0;
+    max = INT_MAX;
+    ecode++;
+    goto REPEATTYPE;
+
+    case OP_TYPEPOSPLUS:
+    possessive = TRUE;
+    min = 1;
+    max = INT_MAX;
+    ecode++;
+    goto REPEATTYPE;
+
+    case OP_TYPEPOSQUERY:
+    possessive = TRUE;
+    min = 0;
+    max = 1;
+    ecode++;
+    goto REPEATTYPE;
+
+    case OP_TYPEPOSUPTO:
+    possessive = TRUE;
+    min = 0;
+    max = GET2(ecode, 1);
     ecode += 1 + IMM2_SIZE;
-    goto REPEATTYPE; 
- 
-    case OP_TYPESTAR: 
-    case OP_TYPEMINSTAR: 
-    case OP_TYPEPLUS: 
-    case OP_TYPEMINPLUS: 
-    case OP_TYPEQUERY: 
-    case OP_TYPEMINQUERY: 
-    c = *ecode++ - OP_TYPESTAR; 
-    minimize = (c & 1) != 0; 
-    min = rep_min[c];                 /* Pick up values from tables; */ 
-    max = rep_max[c];                 /* zero for max => infinity */ 
-    if (max == 0) max = INT_MAX; 
- 
-    /* Common code for all repeated single character type matches. Note that 
-    in UTF-8 mode, '.' matches a character of any length, but for the other 
-    character types, the valid characters are all one-byte long. */ 
- 
-    REPEATTYPE: 
-    ctype = *ecode++;      /* Code for the character type */ 
- 
-#ifdef SUPPORT_UCP 
-    if (ctype == OP_PROP || ctype == OP_NOTPROP) 
-      { 
-      prop_fail_result = ctype == OP_NOTPROP; 
-      prop_type = *ecode++; 
-      prop_value = *ecode++; 
-      } 
-    else prop_type = -1; 
-#endif 
- 
-    /* First, ensure the minimum number of matches are present. Use inline 
-    code for maximizing the speed, and do the type test once at the start 
+    goto REPEATTYPE;
+
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    c = *ecode++ - OP_TYPESTAR;
+    minimize = (c & 1) != 0;
+    min = rep_min[c];                 /* Pick up values from tables; */
+    max = rep_max[c];                 /* zero for max => infinity */
+    if (max == 0) max = INT_MAX;
+
+    /* Common code for all repeated single character type matches. Note that
+    in UTF-8 mode, '.' matches a character of any length, but for the other
+    character types, the valid characters are all one-byte long. */
+
+    REPEATTYPE:
+    ctype = *ecode++;      /* Code for the character type */
+
+#ifdef SUPPORT_UCP
+    if (ctype == OP_PROP || ctype == OP_NOTPROP)
+      {
+      prop_fail_result = ctype == OP_NOTPROP;
+      prop_type = *ecode++;
+      prop_value = *ecode++;
+      }
+    else prop_type = -1;
+#endif
+
+    /* First, ensure the minimum number of matches are present. Use inline
+    code for maximizing the speed, and do the type test once at the start
     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
-    is tidier. Also separate the UCP code, which can be the same for both UTF-8 
-    and single-bytes. */ 
- 
-    if (min > 0) 
-      { 
-#ifdef SUPPORT_UCP 
-      if (prop_type >= 0) 
-        { 
-        switch(prop_type) 
-          { 
-          case PT_ANY: 
-          if (prop_fail_result) RRETURN(MATCH_NOMATCH); 
-          for (i = 1; i <= min; i++) 
-            { 
+    is tidier. Also separate the UCP code, which can be the same for both UTF-8
+    and single-bytes. */
+
+    if (min > 0)
+      {
+#ifdef SUPPORT_UCP
+      if (prop_type >= 0)
+        {
+        switch(prop_type)
+          {
+          case PT_ANY:
+          if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+          for (i = 1; i <= min; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINCTEST(c, eptr); 
-            } 
-          break; 
- 
-          case PT_LAMP: 
-          for (i = 1; i <= min; i++) 
-            { 
+            GETCHARINCTEST(c, eptr);
+            }
+          break;
+
+          case PT_LAMP:
+          for (i = 1; i <= min; i++)
+            {
             int chartype;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINCTEST(c, eptr); 
+            GETCHARINCTEST(c, eptr);
             chartype = UCD_CHARTYPE(c);
             if ((chartype == ucp_Lu ||
                  chartype == ucp_Ll ||
                  chartype == ucp_Lt) == prop_fail_result)
-              RRETURN(MATCH_NOMATCH); 
-            } 
-          break; 
- 
-          case PT_GC: 
-          for (i = 1; i <= min; i++) 
-            { 
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
+          case PT_GC:
+          for (i = 1; i <= min; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINCTEST(c, eptr); 
+            GETCHARINCTEST(c, eptr);
             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
-              RRETURN(MATCH_NOMATCH); 
-            } 
-          break; 
- 
-          case PT_PC: 
-          for (i = 1; i <= min; i++) 
-            { 
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
+          case PT_PC:
+          for (i = 1; i <= min; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINCTEST(c, eptr); 
+            GETCHARINCTEST(c, eptr);
             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
-              RRETURN(MATCH_NOMATCH); 
-            } 
-          break; 
- 
-          case PT_SC: 
-          for (i = 1; i <= min; i++) 
-            { 
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
+          case PT_SC:
+          for (i = 1; i <= min; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            GETCHARINCTEST(c, eptr); 
+            GETCHARINCTEST(c, eptr);
             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
-              RRETURN(MATCH_NOMATCH); 
-            } 
-          break; 
- 
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
           case PT_ALNUM:
           for (i = 1; i <= min; i++)
             {
@@ -4342,20 +4342,20 @@ for (;;)
 
           /* This should not occur */
 
-          default: 
-          RRETURN(PCRE_ERROR_INTERNAL); 
-          } 
-        } 
- 
-      /* Match extended Unicode sequences. We will get here only if the 
-      support is in the binary; otherwise a compile-time error occurs. */ 
- 
-      else if (ctype == OP_EXTUNI) 
-        { 
-        for (i = 1; i <= min; i++) 
-          { 
+          default:
+          RRETURN(PCRE_ERROR_INTERNAL);
+          }
+        }
+
+      /* Match extended Unicode sequences. We will get here only if the
+      support is in the binary; otherwise a compile-time error occurs. */
+
+      else if (ctype == OP_EXTUNI)
+        {
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
-            { 
+            {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
@@ -4365,34 +4365,34 @@ for (;;)
             GETCHARINCTEST(c, eptr);
             lgb = UCD_GRAPHBREAK(c);
            while (eptr < md->end_subject)
-              { 
+              {
               int len = 1;
               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
               rgb = UCD_GRAPHBREAK(c);
               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
               lgb = rgb;
               eptr += len;
-              } 
-            } 
+              }
+            }
           CHECK_PARTIAL();
-          } 
-        } 
- 
-      else 
-#endif     /* SUPPORT_UCP */ 
- 
-/* Handle all other cases when the coding is UTF-8 */ 
- 
+          }
+        }
+
+      else
+#endif     /* SUPPORT_UCP */
+
+/* Handle all other cases when the coding is UTF-8 */
+
 #ifdef SUPPORT_UTF
       if (utf) switch(ctype)
-        { 
-        case OP_ANY: 
-        for (i = 1; i <= min; i++) 
-          { 
+        {
+        case OP_ANY:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           if (md->partial != 0 &&
@@ -4404,11 +4404,11 @@ for (;;)
             md->hitend = TRUE;
             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
             }
-          eptr++; 
+          eptr++;
           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
-          } 
-        break; 
- 
+          }
+        break;
+
         case OP_ALLANY:
         for (i = 1; i <= min; i++)
           {
@@ -4422,231 +4422,231 @@ for (;;)
           }
         break;
 
-        case OP_ANYBYTE: 
+        case OP_ANYBYTE:
         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
-        eptr += min; 
-        break; 
- 
-        case OP_ANYNL: 
-        for (i = 1; i <= min; i++) 
-          { 
+        eptr += min;
+        break;
+
+        case OP_ANYNL:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          switch(c) 
-            { 
-            default: RRETURN(MATCH_NOMATCH); 
+          GETCHARINC(c, eptr);
+          switch(c)
+            {
+            default: RRETURN(MATCH_NOMATCH);
 
             case CHAR_CR:
             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
-            break; 
- 
+            break;
+
             case CHAR_LF:
-            break; 
- 
+            break;
+
             case CHAR_VT:
             case CHAR_FF:
             case CHAR_NEL:
 #ifndef EBCDIC
-            case 0x2028: 
-            case 0x2029: 
+            case 0x2028:
+            case 0x2029:
 #endif  /* Not EBCDIC */
-            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); 
-            break; 
-            } 
-          } 
-        break; 
- 
-        case OP_NOT_HSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
+            break;
+            }
+          }
+        break;
+
+        case OP_NOT_HSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          switch(c) 
-            { 
+          GETCHARINC(c, eptr);
+          switch(c)
+            {
             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
-            default: break; 
-            } 
-          } 
-        break; 
- 
-        case OP_HSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            default: break;
+            }
+          }
+        break;
+
+        case OP_HSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          switch(c) 
-            { 
+          GETCHARINC(c, eptr);
+          switch(c)
+            {
             HSPACE_CASES: break;  /* Byte and multibyte cases */
-            default: RRETURN(MATCH_NOMATCH); 
-            } 
-          } 
-        break; 
- 
-        case OP_NOT_VSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            default: RRETURN(MATCH_NOMATCH);
+            }
+          }
+        break;
+
+        case OP_NOT_VSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          switch(c) 
-            { 
+          GETCHARINC(c, eptr);
+          switch(c)
+            {
             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
-            default: break; 
-            } 
-          } 
-        break; 
- 
-        case OP_VSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            default: break;
+            }
+          }
+        break;
+
+        case OP_VSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          switch(c) 
-            { 
+          GETCHARINC(c, eptr);
+          switch(c)
+            {
             VSPACE_CASES: break;
-            default: RRETURN(MATCH_NOMATCH); 
-            } 
-          } 
-        break; 
- 
-        case OP_NOT_DIGIT: 
-        for (i = 1; i <= min; i++) 
-          { 
+            default: RRETURN(MATCH_NOMATCH);
+            }
+          }
+        break;
+
+        case OP_NOT_DIGIT:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          GETCHARINC(c, eptr); 
-          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) 
-            RRETURN(MATCH_NOMATCH); 
-          } 
-        break; 
- 
-        case OP_DIGIT: 
-        for (i = 1; i <= min; i++) 
-          { 
+          GETCHARINC(c, eptr);
+          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
+            RRETURN(MATCH_NOMATCH);
+          }
+        break;
+
+        case OP_DIGIT:
+        for (i = 1; i <= min; i++)
+          {
           pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
-          /* No need to skip more bytes - we know it's a 1-byte character */ 
-          } 
-        break; 
- 
-        case OP_NOT_WHITESPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+          /* No need to skip more bytes - we know it's a 1-byte character */
+          }
+        break;
+
+        case OP_NOT_WHITESPACE:
+        for (i = 1; i <= min; i++)
+          {
           pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           cc = UCHAR21(eptr);
           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
-          } 
-        break; 
- 
-        case OP_WHITESPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+          }
+        break;
+
+        case OP_WHITESPACE:
+        for (i = 1; i <= min; i++)
+          {
           pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
-          /* No need to skip more bytes - we know it's a 1-byte character */ 
-          } 
-        break; 
- 
-        case OP_NOT_WORDCHAR: 
-        for (i = 1; i <= min; i++) 
-          { 
+          /* No need to skip more bytes - we know it's a 1-byte character */
+          }
+        break;
+
+        case OP_NOT_WORDCHAR:
+        for (i = 1; i <= min; i++)
+          {
           pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           cc = UCHAR21(eptr);
           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
-          } 
-        break; 
- 
-        case OP_WORDCHAR: 
-        for (i = 1; i <= min; i++) 
-          { 
+          }
+        break;
+
+        case OP_WORDCHAR:
+        for (i = 1; i <= min; i++)
+          {
           pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
-          /* No need to skip more bytes - we know it's a 1-byte character */ 
-          } 
-        break; 
- 
-        default: 
-        RRETURN(PCRE_ERROR_INTERNAL); 
-        }  /* End switch(ctype) */ 
- 
-      else 
+          /* No need to skip more bytes - we know it's a 1-byte character */
+          }
+        break;
+
+        default:
+        RRETURN(PCRE_ERROR_INTERNAL);
+        }  /* End switch(ctype) */
+
+      else
 #endif     /* SUPPORT_UTF */
- 
-      /* Code for the non-UTF-8 case for minimum matching of operators other 
+
+      /* Code for the non-UTF-8 case for minimum matching of operators other
       than OP_PROP and OP_NOTPROP. */
- 
-      switch(ctype) 
-        { 
-        case OP_ANY: 
+
+      switch(ctype)
+        {
+        case OP_ANY:
         for (i = 1; i <= min; i++)
-          { 
+          {
           if (eptr >= md->end_subject)
-            { 
+            {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           if (md->partial != 0 &&
               eptr + 1 >= md->end_subject &&
@@ -4658,9 +4658,9 @@ for (;;)
             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
             }
           eptr++;
-          } 
-        break; 
- 
+          }
+        break;
+
         case OP_ALLANY:
         if (eptr > md->end_subject - min)
           {
@@ -4670,34 +4670,34 @@ for (;;)
         eptr += min;
         break;
 
-        case OP_ANYBYTE: 
+        case OP_ANYBYTE:
         if (eptr > md->end_subject - min)
           {
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
           }
-        eptr += min; 
-        break; 
- 
-        case OP_ANYNL: 
-        for (i = 1; i <= min; i++) 
-          { 
+        eptr += min;
+        break;
+
+        case OP_ANYNL:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          switch(*eptr++) 
-            { 
-            default: RRETURN(MATCH_NOMATCH); 
+          switch(*eptr++)
+            {
+            default: RRETURN(MATCH_NOMATCH);
 
             case CHAR_CR:
             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
-            break; 
+            break;
 
             case CHAR_LF:
-            break; 
- 
+            break;
+
             case CHAR_VT:
             case CHAR_FF:
             case CHAR_NEL:
@@ -4705,94 +4705,94 @@ for (;;)
             case 0x2028:
             case 0x2029:
 #endif
-            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); 
-            break; 
-            } 
-          } 
-        break; 
- 
-        case OP_NOT_HSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
+            break;
+            }
+          }
+        break;
+
+        case OP_NOT_HSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          switch(*eptr++) 
-            { 
-            default: break; 
+          switch(*eptr++)
+            {
+            default: break;
             HSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
             HSPACE_MULTIBYTE_CASES:
 #endif
-            RRETURN(MATCH_NOMATCH); 
-            } 
-          } 
-        break; 
- 
-        case OP_HSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            RRETURN(MATCH_NOMATCH);
+            }
+          }
+        break;
+
+        case OP_HSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          switch(*eptr++) 
-            { 
-            default: RRETURN(MATCH_NOMATCH); 
+          switch(*eptr++)
+            {
+            default: RRETURN(MATCH_NOMATCH);
             HSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
             HSPACE_MULTIBYTE_CASES:
 #endif
-            break; 
-            } 
-          } 
-        break; 
- 
-        case OP_NOT_VSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            break;
+            }
+          }
+        break;
+
+        case OP_NOT_VSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          switch(*eptr++) 
-            { 
+          switch(*eptr++)
+            {
             VSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
             VSPACE_MULTIBYTE_CASES:
 #endif
             RRETURN(MATCH_NOMATCH);
-            default: break; 
-            } 
-          } 
-        break; 
- 
-        case OP_VSPACE: 
-        for (i = 1; i <= min; i++) 
-          { 
+            default: break;
+            }
+          }
+        break;
+
+        case OP_VSPACE:
+        for (i = 1; i <= min; i++)
+          {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          switch(*eptr++) 
-            { 
-            default: RRETURN(MATCH_NOMATCH); 
+          switch(*eptr++)
+            {
+            default: RRETURN(MATCH_NOMATCH);
             VSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
             VSPACE_MULTIBYTE_CASES:
 #endif
-            break; 
-            } 
-          } 
-        break; 
- 
-        case OP_NOT_DIGIT: 
-        for (i = 1; i <= min; i++) 
+            break;
+            }
+          }
+        break;
+
+        case OP_NOT_DIGIT:
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
@@ -4803,10 +4803,10 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        break; 
- 
-        case OP_DIGIT: 
-        for (i = 1; i <= min; i++) 
+        break;
+
+        case OP_DIGIT:
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
@@ -4817,10 +4817,10 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        break; 
- 
-        case OP_NOT_WHITESPACE: 
-        for (i = 1; i <= min; i++) 
+        break;
+
+        case OP_NOT_WHITESPACE:
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
@@ -4831,10 +4831,10 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        break; 
- 
-        case OP_WHITESPACE: 
-        for (i = 1; i <= min; i++) 
+        break;
+
+        case OP_WHITESPACE:
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
@@ -4845,61 +4845,61 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        break; 
- 
-        case OP_NOT_WORDCHAR: 
-        for (i = 1; i <= min; i++) 
+        break;
+
+        case OP_NOT_WORDCHAR:
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        break; 
- 
-        case OP_WORDCHAR: 
-        for (i = 1; i <= min; i++) 
+        break;
+
+        case OP_WORDCHAR:
+        for (i = 1; i <= min; i++)
           {
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
           }
-        break; 
- 
-        default: 
-        RRETURN(PCRE_ERROR_INTERNAL); 
-        } 
-      } 
- 
-    /* If min = max, continue at the same level without recursing */ 
- 
-    if (min == max) continue; 
- 
-    /* If minimizing, we have to test the rest of the pattern before each 
-    subsequent match. Again, separate the UTF-8 case for speed, and also 
-    separate the UCP cases. */ 
- 
-    if (minimize) 
-      { 
-#ifdef SUPPORT_UCP 
-      if (prop_type >= 0) 
-        { 
-        switch(prop_type) 
-          { 
-          case PT_ANY: 
-          for (fi = min;; fi++) 
-            { 
+        break;
+
+        default:
+        RRETURN(PCRE_ERROR_INTERNAL);
+        }
+      }
+
+    /* If min = max, continue at the same level without recursing */
+
+    if (min == max) continue;
+
+    /* If minimizing, we have to test the rest of the pattern before each
+    subsequent match. Again, separate the UTF-8 case for speed, and also
+    separate the UCP cases. */
+
+    if (minimize)
+      {
+#ifdef SUPPORT_UCP
+      if (prop_type >= 0)
+        {
+        switch(prop_type)
+          {
+          case PT_ANY:
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
@@ -4907,21 +4907,21 @@ for (;;)
               RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            if (prop_fail_result) RRETURN(MATCH_NOMATCH); 
-            } 
-          /* Control never gets here */ 
- 
-          case PT_LAMP: 
-          for (fi = min;; fi++) 
-            { 
+            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+            }
+          /* Control never gets here */
+
+          case PT_LAMP:
+          for (fi = min;; fi++)
+            {
             int chartype;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
             chartype = UCD_CHARTYPE(c);
@@ -4929,60 +4929,60 @@ for (;;)
                  chartype == ucp_Ll ||
                  chartype == ucp_Lt) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
-            } 
-          /* Control never gets here */ 
- 
-          case PT_GC: 
-          for (fi = min;; fi++) 
-            { 
+            }
+          /* Control never gets here */
+
+          case PT_GC:
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
-            } 
-          /* Control never gets here */ 
- 
-          case PT_PC: 
-          for (fi = min;; fi++) 
-            { 
+            }
+          /* Control never gets here */
+
+          case PT_PC:
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
-            } 
-          /* Control never gets here */ 
- 
-          case PT_SC: 
-          for (fi = min;; fi++) 
-            { 
+            }
+          /* Control never gets here */
+
+          case PT_SC:
+          for (fi = min;; fi++)
+            {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
-            if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
-            } 
-          /* Control never gets here */ 
- 
+            }
+          /* Control never gets here */
+
           case PT_ALNUM:
           for (fi = min;; fi++)
             {
@@ -5100,23 +5100,23 @@ for (;;)
           /* Control never gets here */
 
           /* This should never occur */
-          default: 
-          RRETURN(PCRE_ERROR_INTERNAL); 
-          } 
-        } 
- 
-      /* Match extended Unicode sequences. We will get here only if the 
-      support is in the binary; otherwise a compile-time error occurs. */ 
- 
-      else if (ctype == OP_EXTUNI) 
-        { 
-        for (fi = min;; fi++) 
-          { 
+          default:
+          RRETURN(PCRE_ERROR_INTERNAL);
+          }
+        }
+
+      /* Match extended Unicode sequences. We will get here only if the
+      support is in the binary; otherwise a compile-time error occurs. */
+
+      else if (ctype == OP_EXTUNI)
+        {
+        for (fi = min;; fi++)
+          {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
           if (eptr >= md->end_subject)
-            { 
+            {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
@@ -5126,39 +5126,39 @@ for (;;)
             GETCHARINCTEST(c, eptr);
             lgb = UCD_GRAPHBREAK(c);
             while (eptr < md->end_subject)
-              { 
+              {
               int len = 1;
               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
               rgb = UCD_GRAPHBREAK(c);
               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
               lgb = rgb;
               eptr += len;
-              } 
-            } 
+              }
+            }
           CHECK_PARTIAL();
-          } 
-        } 
-      else 
-#endif     /* SUPPORT_UCP */ 
- 
+          }
+        }
+      else
+#endif     /* SUPPORT_UCP */
+
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
-        for (fi = min;; fi++) 
-          { 
+        {
+        for (fi = min;; fi++)
+          {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           if (ctype == OP_ANY && IS_NEWLINE(eptr))
             RRETURN(MATCH_NOMATCH);
-          GETCHARINC(c, eptr); 
-          switch(ctype) 
-            { 
+          GETCHARINC(c, eptr);
+          switch(ctype)
+            {
             case OP_ANY:               /* This is the non-NL case */
             if (md->partial != 0 &&    /* Take care with CRLF partial */
                 eptr >= md->end_subject &&
@@ -5169,121 +5169,121 @@ for (;;)
               md->hitend = TRUE;
               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
               }
-            break; 
- 
+            break;
+
             case OP_ALLANY:
-            case OP_ANYBYTE: 
-            break; 
- 
-            case OP_ANYNL: 
-            switch(c) 
-              { 
-              default: RRETURN(MATCH_NOMATCH); 
+            case OP_ANYBYTE:
+            break;
+
+            case OP_ANYNL:
+            switch(c)
+              {
+              default: RRETURN(MATCH_NOMATCH);
               case CHAR_CR:
               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
-              break; 
+              break;
 
               case CHAR_LF:
-              break; 
- 
+              break;
+
               case CHAR_VT:
               case CHAR_FF:
               case CHAR_NEL:
 #ifndef EBCDIC
-              case 0x2028: 
-              case 0x2029: 
+              case 0x2028:
+              case 0x2029:
 #endif  /* Not EBCDIC */
-              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); 
-              break; 
-              } 
-            break; 
- 
-            case OP_NOT_HSPACE: 
-            switch(c) 
-              { 
+              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
+              break;
+              }
+            break;
+
+            case OP_NOT_HSPACE:
+            switch(c)
+              {
               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
-              default: break; 
-              } 
-            break; 
- 
-            case OP_HSPACE: 
-            switch(c) 
-              { 
+              default: break;
+              }
+            break;
+
+            case OP_HSPACE:
+            switch(c)
+              {
               HSPACE_CASES: break;
-              default: RRETURN(MATCH_NOMATCH); 
-              } 
-            break; 
- 
-            case OP_NOT_VSPACE: 
-            switch(c) 
-              { 
+              default: RRETURN(MATCH_NOMATCH);
+              }
+            break;
+
+            case OP_NOT_VSPACE:
+            switch(c)
+              {
               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
-              default: break; 
-              } 
-            break; 
- 
-            case OP_VSPACE: 
-            switch(c) 
-              { 
+              default: break;
+              }
+            break;
+
+            case OP_VSPACE:
+            switch(c)
+              {
               VSPACE_CASES: break;
-              default: RRETURN(MATCH_NOMATCH); 
-              } 
-            break; 
- 
-            case OP_NOT_DIGIT: 
-            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) 
-              RRETURN(MATCH_NOMATCH); 
-            break; 
- 
-            case OP_DIGIT: 
-            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) 
-              RRETURN(MATCH_NOMATCH); 
-            break; 
- 
-            case OP_NOT_WHITESPACE: 
-            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) 
-              RRETURN(MATCH_NOMATCH); 
-            break; 
- 
-            case OP_WHITESPACE: 
+              default: RRETURN(MATCH_NOMATCH);
+              }
+            break;
+
+            case OP_NOT_DIGIT:
+            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
+              RRETURN(MATCH_NOMATCH);
+            break;
+
+            case OP_DIGIT:
+            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
+              RRETURN(MATCH_NOMATCH);
+            break;
+
+            case OP_NOT_WHITESPACE:
+            if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
+              RRETURN(MATCH_NOMATCH);
+            break;
+
+            case OP_WHITESPACE:
             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
-              RRETURN(MATCH_NOMATCH); 
-            break; 
- 
-            case OP_NOT_WORDCHAR: 
-            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) 
-              RRETURN(MATCH_NOMATCH); 
-            break; 
- 
-            case OP_WORDCHAR: 
-            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) 
-              RRETURN(MATCH_NOMATCH); 
-            break; 
- 
-            default: 
-            RRETURN(PCRE_ERROR_INTERNAL); 
-            } 
-          } 
-        } 
-      else 
-#endif 
+              RRETURN(MATCH_NOMATCH);
+            break;
+
+            case OP_NOT_WORDCHAR:
+            if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
+              RRETURN(MATCH_NOMATCH);
+            break;
+
+            case OP_WORDCHAR:
+            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
+              RRETURN(MATCH_NOMATCH);
+            break;
+
+            default:
+            RRETURN(PCRE_ERROR_INTERNAL);
+            }
+          }
+        }
+      else
+#endif
       /* Not UTF mode */
-        { 
-        for (fi = min;; fi++) 
-          { 
+        {
+        for (fi = min;; fi++)
+          {
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH); 
+            RRETURN(MATCH_NOMATCH);
             }
           if (ctype == OP_ANY && IS_NEWLINE(eptr))
             RRETURN(MATCH_NOMATCH);
-          c = *eptr++; 
-          switch(ctype) 
-            { 
+          c = *eptr++;
+          switch(ctype)
+            {
             case OP_ANY:               /* This is the non-NL case */
             if (md->partial != 0 &&    /* Take care with CRLF partial */
                 eptr >= md->end_subject &&
@@ -5294,23 +5294,23 @@ for (;;)
               md->hitend = TRUE;
               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
               }
-            break; 
- 
+            break;
+
             case OP_ALLANY:
-            case OP_ANYBYTE: 
-            break; 
- 
-            case OP_ANYNL: 
-            switch(c) 
-              { 
-              default: RRETURN(MATCH_NOMATCH); 
+            case OP_ANYBYTE:
+            break;
+
+            case OP_ANYNL:
+            switch(c)
+              {
+              default: RRETURN(MATCH_NOMATCH);
               case CHAR_CR:
               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
-              break; 
- 
+              break;
+
               case CHAR_LF:
-              break; 
- 
+              break;
+
               case CHAR_VT:
               case CHAR_FF:
               case CHAR_NEL:
@@ -5318,128 +5318,128 @@ for (;;)
               case 0x2028:
               case 0x2029:
 #endif
-              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); 
-              break; 
-              } 
-            break; 
- 
-            case OP_NOT_HSPACE: 
-            switch(c) 
-              { 
-              default: break; 
+              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
+              break;
+              }
+            break;
+
+            case OP_NOT_HSPACE:
+            switch(c)
+              {
+              default: break;
               HSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
               HSPACE_MULTIBYTE_CASES:
 #endif
-              RRETURN(MATCH_NOMATCH); 
-              } 
-            break; 
- 
-            case OP_HSPACE: 
-            switch(c) 
-              { 
-              default: RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
+              }
+            break;
+
+            case OP_HSPACE:
+            switch(c)
+              {
+              default: RRETURN(MATCH_NOMATCH);
               HSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
               HSPACE_MULTIBYTE_CASES:
 #endif
-              break; 
-              } 
-            break; 
- 
-            case OP_NOT_VSPACE: 
-            switch(c) 
-              { 
-              default: break; 
+              break;
+              }
+            break;
+
+            case OP_NOT_VSPACE:
+            switch(c)
+              {
+              default: break;
               VSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
               VSPACE_MULTIBYTE_CASES:
 #endif
-              RRETURN(MATCH_NOMATCH); 
-              } 
-            break; 
- 
-            case OP_VSPACE: 
-            switch(c) 
-              { 
-              default: RRETURN(MATCH_NOMATCH); 
+              RRETURN(MATCH_NOMATCH);
+              }
+            break;
+
+            case OP_VSPACE:
+            switch(c)
+              {
+              default: RRETURN(MATCH_NOMATCH);
               VSPACE_BYTE_CASES:
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
               VSPACE_MULTIBYTE_CASES:
 #endif
-              break; 
-              } 
-            break; 
- 
-            case OP_NOT_DIGIT: 
+              break;
+              }
+            break;
+
+            case OP_NOT_DIGIT:
             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
-            break; 
- 
-            case OP_DIGIT: 
+            break;
+
+            case OP_DIGIT:
             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
-            break; 
- 
-            case OP_NOT_WHITESPACE: 
+            break;
+
+            case OP_NOT_WHITESPACE:
             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
-            break; 
- 
-            case OP_WHITESPACE: 
+            break;
+
+            case OP_WHITESPACE:
             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
-            break; 
- 
-            case OP_NOT_WORDCHAR: 
+            break;
+
+            case OP_NOT_WORDCHAR:
             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
-            break; 
- 
-            case OP_WORDCHAR: 
+            break;
+
+            case OP_WORDCHAR:
             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
-            break; 
- 
-            default: 
-            RRETURN(PCRE_ERROR_INTERNAL); 
-            } 
-          } 
-        } 
-      /* Control never gets here */ 
-      } 
- 
-    /* If maximizing, it is worth using inline code for speed, doing the type 
-    test once at the start (i.e. keep it out of the loop). Again, keep the 
-    UTF-8 and UCP stuff separate. */ 
- 
-    else 
-      { 
-      pp = eptr;  /* Remember where we started */ 
- 
-#ifdef SUPPORT_UCP 
-      if (prop_type >= 0) 
-        { 
-        switch(prop_type) 
-          { 
-          case PT_ANY: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            break;
+
+            default:
+            RRETURN(PCRE_ERROR_INTERNAL);
+            }
+          }
+        }
+      /* Control never gets here */
+      }
+
+    /* If maximizing, it is worth using inline code for speed, doing the type
+    test once at the start (i.e. keep it out of the loop). Again, keep the
+    UTF-8 and UCP stuff separate. */
+
+    else
+      {
+      pp = eptr;  /* Remember where we started */
+
+#ifdef SUPPORT_UCP
+      if (prop_type >= 0)
+        {
+        switch(prop_type)
+          {
+          case PT_ANY:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            if (prop_fail_result) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          case PT_LAMP: 
-          for (i = min; i < max; i++) 
-            { 
+            if (prop_fail_result) break;
+            eptr+= len;
+            }
+          break;
+
+          case PT_LAMP:
+          for (i = min; i < max; i++)
+            {
             int chartype;
-            int len = 1; 
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             GETCHARLENTEST(c, eptr, len);
             chartype = UCD_CHARTYPE(c);
@@ -5447,54 +5447,54 @@ for (;;)
                  chartype == ucp_Ll ||
                  chartype == ucp_Lt) == prop_fail_result)
               break;
-            eptr+= len; 
-            } 
-          break; 
- 
-          case PT_GC: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            eptr+= len;
+            }
+          break;
+
+          case PT_GC:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             GETCHARLENTEST(c, eptr, len);
             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
-            eptr+= len; 
-            } 
-          break; 
- 
-          case PT_PC: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            eptr+= len;
+            }
+          break;
+
+          case PT_PC:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             GETCHARLENTEST(c, eptr, len);
             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
-            eptr+= len; 
-            } 
-          break; 
- 
-          case PT_SC: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            eptr+= len;
+            }
+          break;
+
+          case PT_SC:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             GETCHARLENTEST(c, eptr, len);
             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
-            eptr+= len; 
-            } 
-          break; 
+            eptr+= len;
+            }
+          break;
 
           case PT_ALNUM:
           for (i = min; i < max; i++)
@@ -5609,30 +5609,30 @@ for (;;)
 
           default:
           RRETURN(PCRE_ERROR_INTERNAL);
-          } 
- 
-        /* eptr is now past the end of the maximum run */ 
- 
+          }
+
+        /* eptr is now past the end of the maximum run */
+
         if (possessive) continue;    /* No backtracking */
-        for(;;) 
-          { 
+        for(;;)
+          {
           if (eptr <= pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
           if (utf) BACKCHAR(eptr);
-          } 
-        } 
- 
+          }
+        }
+
       /* Match extended Unicode grapheme clusters. We will get here only if the
-      support is in the binary; otherwise a compile-time error occurs. */ 
- 
-      else if (ctype == OP_EXTUNI) 
-        { 
-        for (i = min; i < max; i++) 
-          { 
+      support is in the binary; otherwise a compile-time error occurs. */
+
+      else if (ctype == OP_EXTUNI)
+        {
+        for (i = min; i < max; i++)
+          {
           if (eptr >= md->end_subject)
-            { 
+            {
             SCHECK_PARTIAL();
             break;
             }
@@ -5642,20 +5642,20 @@ for (;;)
             GETCHARINCTEST(c, eptr);
             lgb = UCD_GRAPHBREAK(c);
             while (eptr < md->end_subject)
-              { 
+              {
               int len = 1;
               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
               rgb = UCD_GRAPHBREAK(c);
               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
               lgb = rgb;
               eptr += len;
-              } 
-            } 
+              }
+            }
           CHECK_PARTIAL();
-          } 
- 
-        /* eptr is now past the end of the maximum run */ 
- 
+          }
+
+        /* eptr is now past the end of the maximum run */
+
         if (possessive) continue;    /* No backtracking */
 
         /* We use <= pp rather than == pp to detect the start of the run while
@@ -5663,14 +5663,14 @@ for (;;)
         move back past pp. This is just palliative; the use of \C in UTF mode
         is fraught with danger. */
 
-        for(;;) 
-          { 
+        for(;;)
+          {
           int lgb, rgb;
           PCRE_PUCHAR fptr;
 
           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 
           /* Backtracking over an extended grapheme cluster involves inspecting
           the previous two characters (if present) to see if a break is
@@ -5678,7 +5678,7 @@ for (;;)
 
           eptr--;
           if (!utf) c = *eptr; else
-            { 
+            {
             BACKCHAR(eptr);
             GETCHAR(c, eptr);
             }
@@ -5689,276 +5689,276 @@ for (;;)
             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
             fptr = eptr - 1;
             if (!utf) c = *fptr; else
-              { 
+              {
               BACKCHAR(fptr);
               GETCHAR(c, fptr);
-              } 
+              }
             lgb = UCD_GRAPHBREAK(c);
             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
             eptr = fptr;
             rgb = lgb;
-            } 
-          } 
-        } 
- 
-      else 
-#endif   /* SUPPORT_UCP */ 
- 
+            }
+          }
+        }
+
+      else
+#endif   /* SUPPORT_UCP */
+
 #ifdef SUPPORT_UTF
       if (utf)
-        { 
-        switch(ctype) 
-          { 
-          case OP_ANY: 
+        {
+        switch(ctype)
+          {
+          case OP_ANY:
           for (i = min; i < max; i++)
-            { 
+            {
             if (eptr >= md->end_subject)
-              { 
+              {
               SCHECK_PARTIAL();
               break;
-              } 
+              }
             if (IS_NEWLINE(eptr)) break;
             if (md->partial != 0 &&    /* Take care with CRLF partial */
                 eptr + 1 >= md->end_subject &&
                 NLBLOCK->nltype == NLTYPE_FIXED &&
                 NLBLOCK->nllen == 2 &&
                 UCHAR21(eptr) == NLBLOCK->nl[0])
-              { 
+              {
               md->hitend = TRUE;
               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
-              } 
+              }
             eptr++;
             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
-            } 
+            }
           break;
- 
+
           case OP_ALLANY:
           if (max < INT_MAX)
-            { 
+            {
             for (i = min; i < max; i++)
-              { 
+              {
               if (eptr >= md->end_subject)
-                { 
+                {
                 SCHECK_PARTIAL();
                 break;
-                } 
+                }
               eptr++;
               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
-              } 
-            } 
+              }
+            }
           else
             {
             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
             SCHECK_PARTIAL();
             }
-          break; 
- 
-          /* The byte case is the same as non-UTF8 */ 
- 
-          case OP_ANYBYTE: 
-          c = max - min; 
-          if (c > (unsigned int)(md->end_subject - eptr)) 
+          break;
+
+          /* The byte case is the same as non-UTF8 */
+
+          case OP_ANYBYTE:
+          c = max - min;
+          if (c > (unsigned int)(md->end_subject - eptr))
             {
             eptr = md->end_subject;
             SCHECK_PARTIAL();
             }
           else eptr += c;
-          break; 
- 
-          case OP_ANYNL: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+          break;
+
+          case OP_ANYNL:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
+            GETCHARLEN(c, eptr, len);
             if (c == CHAR_CR)
-              { 
-              if (++eptr >= md->end_subject) break; 
+              {
+              if (++eptr >= md->end_subject) break;
               if (UCHAR21(eptr) == CHAR_LF) eptr++;
-              } 
-            else 
-              { 
+              }
+            else
+              {
               if (c != CHAR_LF &&
-                  (md->bsr_anycrlf || 
+                  (md->bsr_anycrlf ||
                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
 #ifndef EBCDIC
                     && c != 0x2028 && c != 0x2029
 #endif  /* Not EBCDIC */
                     )))
-                break; 
-              eptr += len; 
-              } 
-            } 
-          break; 
- 
-          case OP_NOT_HSPACE: 
-          case OP_HSPACE: 
-          for (i = min; i < max; i++) 
-            { 
-            BOOL gotspace; 
-            int len = 1; 
+                break;
+              eptr += len;
+              }
+            }
+          break;
+
+          case OP_NOT_HSPACE:
+          case OP_HSPACE:
+          for (i = min; i < max; i++)
+            {
+            BOOL gotspace;
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            switch(c) 
-              { 
+            GETCHARLEN(c, eptr, len);
+            switch(c)
+              {
               HSPACE_CASES: gotspace = TRUE; break;
-              default: gotspace = FALSE; break; 
-              } 
-            if (gotspace == (ctype == OP_NOT_HSPACE)) break; 
-            eptr += len; 
-            } 
-          break; 
- 
-          case OP_NOT_VSPACE: 
-          case OP_VSPACE: 
-          for (i = min; i < max; i++) 
-            { 
-            BOOL gotspace; 
-            int len = 1; 
+              default: gotspace = FALSE; break;
+              }
+            if (gotspace == (ctype == OP_NOT_HSPACE)) break;
+            eptr += len;
+            }
+          break;
+
+          case OP_NOT_VSPACE:
+          case OP_VSPACE:
+          for (i = min; i < max; i++)
+            {
+            BOOL gotspace;
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            switch(c) 
-              { 
+            GETCHARLEN(c, eptr, len);
+            switch(c)
+              {
               VSPACE_CASES: gotspace = TRUE; break;
-              default: gotspace = FALSE; break; 
-              } 
-            if (gotspace == (ctype == OP_NOT_VSPACE)) break; 
-            eptr += len; 
-            } 
-          break; 
- 
-          case OP_NOT_DIGIT: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+              default: gotspace = FALSE; break;
+              }
+            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
+            eptr += len;
+            }
+          break;
+
+          case OP_NOT_DIGIT:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          case OP_DIGIT: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            GETCHARLEN(c, eptr, len);
+            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
+            eptr+= len;
+            }
+          break;
+
+          case OP_DIGIT:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          case OP_NOT_WHITESPACE: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            GETCHARLEN(c, eptr, len);
+            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
+            eptr+= len;
+            }
+          break;
+
+          case OP_NOT_WHITESPACE:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          case OP_WHITESPACE: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            GETCHARLEN(c, eptr, len);
+            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
+            eptr+= len;
+            }
+          break;
+
+          case OP_WHITESPACE:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          case OP_NOT_WORDCHAR: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            GETCHARLEN(c, eptr, len);
+            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
+            eptr+= len;
+            }
+          break;
+
+          case OP_NOT_WORDCHAR:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          case OP_WORDCHAR: 
-          for (i = min; i < max; i++) 
-            { 
-            int len = 1; 
+            GETCHARLEN(c, eptr, len);
+            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
+            eptr+= len;
+            }
+          break;
+
+          case OP_WORDCHAR:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len); 
-            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; 
-            eptr+= len; 
-            } 
-          break; 
- 
-          default: 
-          RRETURN(PCRE_ERROR_INTERNAL); 
-          } 
- 
+            GETCHARLEN(c, eptr, len);
+            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
+            eptr+= len;
+            }
+          break;
+
+          default:
+          RRETURN(PCRE_ERROR_INTERNAL);
+          }
+
         if (possessive) continue;    /* No backtracking */
-        for(;;) 
-          { 
+        for(;;)
+          {
           if (eptr <= pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
-          if (rrc != MATCH_NOMATCH) RRETURN(rrc); 
+          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
-          BACKCHAR(eptr); 
+          BACKCHAR(eptr);
           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
-          } 
-        } 
-      else 
+          }
+        }
+      else
 #endif  /* SUPPORT_UTF */
       /* Not UTF mode */
-        { 
-        switch(ctype) 
-          { 
-          case OP_ANY: 
+        {
+        switch(ctype)
+          {
+          case OP_ANY:
           for (i = min; i < max; i++)
-            { 
+            {
             if (eptr >= md->end_subject)
-              { 
+              {
               SCHECK_PARTIAL();
               break;
-              } 
+              }
             if (IS_NEWLINE(eptr)) break;
             if (md->partial != 0 &&    /* Take care with CRLF partial */
                 eptr + 1 >= md->end_subject &&
@@ -5970,50 +5970,50 @@ for (;;)
               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
               }
             eptr++;
-            } 
+            }
           break;
- 
+
           case OP_ALLANY:
-          case OP_ANYBYTE: 
-          c = max - min; 
-          if (c > (unsigned int)(md->end_subject - eptr)) 
+          case OP_ANYBYTE:
+          c = max - min;
+          if (c > (unsigned int)(md->end_subject - eptr))
             {
             eptr = md->end_subject;
             SCHECK_PARTIAL();
             }
           else eptr += c;
-          break; 
- 
-          case OP_ANYNL: 
-          for (i = min; i < max; i++) 
-            { 
+          break;
+
+          case OP_ANYNL:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               break;
               }
-            c = *eptr; 
+            c = *eptr;
             if (c == CHAR_CR)
-              { 
-              if (++eptr >= md->end_subject) break; 
+              {
+              if (++eptr >= md->end_subject) break;
               if (*eptr == CHAR_LF) eptr++;
-              } 
-            else 
-              { 
+              }
+            else
+              {
               if (c != CHAR_LF && (md->bsr_anycrlf ||
                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                  && c != 0x2028 && c != 0x2029
 #endif
                  ))) break;
-              eptr++; 
-              } 
-            } 
-          break; 
- 
-          case OP_NOT_HSPACE: 
-          for (i = min; i < max; i++) 
-            { 
+              eptr++;
+              }
+            }
+          break;
+
+          case OP_NOT_HSPACE:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
@@ -6028,13 +6028,13 @@ for (;;)
 #endif
               goto ENDLOOP00;
               }
-            } 
+            }
           ENDLOOP00:
-          break; 
- 
-          case OP_HSPACE: 
-          for (i = min; i < max; i++) 
-            { 
+          break;
+
+          case OP_HSPACE:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
@@ -6049,17 +6049,17 @@ for (;;)
 #endif
               eptr++; break;
               }
-            } 
+            }
           ENDLOOP01:
-          break; 
- 
-          case OP_NOT_VSPACE: 
-          for (i = min; i < max; i++) 
-            { 
+          break;
+
+          case OP_NOT_VSPACE:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             switch(*eptr)
               {
@@ -6070,17 +6070,17 @@ for (;;)
 #endif
               goto ENDLOOP02;
               }
-            } 
+            }
           ENDLOOP02:
-          break; 
- 
-          case OP_VSPACE: 
-          for (i = min; i < max; i++) 
-            { 
+          break;
+
+          case OP_VSPACE:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             switch(*eptr)
               {
@@ -6091,136 +6091,136 @@ for (;;)
 #endif
               eptr++; break;
               }
-            } 
+            }
           ENDLOOP03:
-          break; 
- 
-          case OP_NOT_DIGIT: 
-          for (i = min; i < max; i++) 
-            { 
+          break;
+
+          case OP_NOT_DIGIT:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
-            eptr++; 
-            } 
-          break; 
- 
-          case OP_DIGIT: 
-          for (i = min; i < max; i++) 
-            { 
+            eptr++;
+            }
+          break;
+
+          case OP_DIGIT:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
-            eptr++; 
-            } 
-          break; 
- 
-          case OP_NOT_WHITESPACE: 
-          for (i = min; i < max; i++) 
-            { 
+            eptr++;
+            }
+          break;
+
+          case OP_NOT_WHITESPACE:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
-            eptr++; 
-            } 
-          break; 
- 
-          case OP_WHITESPACE: 
-          for (i = min; i < max; i++) 
-            { 
+            eptr++;
+            }
+          break;
+
+          case OP_WHITESPACE:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
-            eptr++; 
-            } 
-          break; 
- 
-          case OP_NOT_WORDCHAR: 
-          for (i = min; i < max; i++) 
-            { 
+            eptr++;
+            }
+          break;
+
+          case OP_NOT_WORDCHAR:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
-            eptr++; 
-            } 
-          break; 
- 
-          case OP_WORDCHAR: 
-          for (i = min; i < max; i++) 
-            { 
+            eptr++;
+            }
+          break;
+
+          case OP_WORDCHAR:
+          for (i = min; i < max; i++)
+            {
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
-              break; 
+              break;
               }
             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
-            eptr++; 
-            } 
-          break; 
- 
-          default: 
-          RRETURN(PCRE_ERROR_INTERNAL); 
-          } 
- 
+            eptr++;
+            }
+          break;
+
+          default:
+          RRETURN(PCRE_ERROR_INTERNAL);
+          }
+
         if (possessive) continue;    /* No backtracking */
         for (;;)
-          { 
+          {
           if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          eptr--; 
+          eptr--;
           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
               eptr[-1] == CHAR_CR) eptr--;
-          } 
-        } 
- 
+          }
+        }
+
       /* Control never gets here */
-      } 
- 
-    /* There's been some horrible disaster. Arrival here can only mean there is 
-    something seriously wrong in the code above or the OP_xxx definitions. */ 
- 
-    default: 
-    DPRINTF(("Unknown opcode %d\n", *ecode)); 
-    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE); 
-    } 
- 
-  /* Do not stick any code in here without much thought; it is assumed 
-  that "continue" in the code above comes out to here to repeat the main 
-  loop. */ 
- 
-  }             /* End of main loop */ 
-/* Control never reaches here */ 
- 
- 
-/* When compiling to use the heap rather than the stack for recursive calls to 
-match(), the RRETURN() macro jumps here. The number that is saved in 
-frame->Xwhere indicates which label we actually want to return to. */ 
- 
-#ifdef NO_RECURSE 
-#define LBL(val) case val: goto L_RM##val; 
-HEAP_RETURN: 
-switch (frame->Xwhere) 
-  { 
-  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) 
-  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) 
-  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) 
-  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) 
+      }
+
+    /* There's been some horrible disaster. Arrival here can only mean there is
+    something seriously wrong in the code above or the OP_xxx definitions. */
+
+    default:
+    DPRINTF(("Unknown opcode %d\n", *ecode));
+    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
+    }
+
+  /* Do not stick any code in here without much thought; it is assumed
+  that "continue" in the code above comes out to here to repeat the main
+  loop. */
+
+  }             /* End of main loop */
+/* Control never reaches here */
+
+
+/* When compiling to use the heap rather than the stack for recursive calls to
+match(), the RRETURN() macro jumps here. The number that is saved in
+frame->Xwhere indicates which label we actually want to return to. */
+
+#ifdef NO_RECURSE
+#define LBL(val) case val: goto L_RM##val;
+HEAP_RETURN:
+switch (frame->Xwhere)
+  {
+  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
+  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
+  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
+  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
   LBL(65) LBL(66)
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
@@ -6229,80 +6229,80 @@ switch (frame->Xwhere)
 #ifdef SUPPORT_UTF
   LBL(16) LBL(18)
   LBL(22) LBL(23) LBL(28) LBL(30)
-  LBL(32) LBL(34) LBL(42) LBL(46) 
-#ifdef SUPPORT_UCP 
-  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) 
+  LBL(32) LBL(34) LBL(42) LBL(46)
+#ifdef SUPPORT_UCP
+  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
-#endif  /* SUPPORT_UCP */ 
+#endif  /* SUPPORT_UCP */
 #endif  /* SUPPORT_UTF */
-  default: 
-  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); 
-  return PCRE_ERROR_INTERNAL; 
-  } 
-#undef LBL 
-#endif  /* NO_RECURSE */ 
-} 
- 
- 
-/*************************************************************************** 
-**************************************************************************** 
-                   RECURSION IN THE match() FUNCTION 
- 
-Undefine all the macros that were defined above to handle this. */ 
- 
-#ifdef NO_RECURSE 
-#undef eptr 
-#undef ecode 
-#undef mstart 
-#undef offset_top 
-#undef eptrb 
-#undef flags 
- 
-#undef callpat 
-#undef charptr 
-#undef data 
-#undef next 
-#undef pp 
-#undef prev 
-#undef saved_eptr 
- 
-#undef new_recursive 
- 
-#undef cur_is_word 
-#undef condition 
-#undef prev_is_word 
- 
-#undef ctype 
-#undef length 
-#undef max 
-#undef min 
-#undef number 
-#undef offset 
-#undef op 
-#undef save_capture_last 
-#undef save_offset1 
-#undef save_offset2 
-#undef save_offset3 
-#undef stacksave 
- 
-#undef newptrb 
- 
-#endif 
- 
-/* These two are defined as macros in both cases */ 
- 
-#undef fc 
-#undef fi 
- 
-/*************************************************************************** 
-***************************************************************************/ 
- 
- 
+  default:
+  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
+  return PCRE_ERROR_INTERNAL;
+  }
+#undef LBL
+#endif  /* NO_RECURSE */
+}
+
+
+/***************************************************************************
+****************************************************************************
+                   RECURSION IN THE match() FUNCTION
+
+Undefine all the macros that were defined above to handle this. */
+
+#ifdef NO_RECURSE
+#undef eptr
+#undef ecode
+#undef mstart
+#undef offset_top
+#undef eptrb
+#undef flags
+
+#undef callpat
+#undef charptr
+#undef data
+#undef next
+#undef pp
+#undef prev
+#undef saved_eptr
+
+#undef new_recursive
+
+#undef cur_is_word
+#undef condition
+#undef prev_is_word
+
+#undef ctype
+#undef length
+#undef max
+#undef min
+#undef number
+#undef offset
+#undef op
+#undef save_capture_last
+#undef save_offset1
+#undef save_offset2
+#undef save_offset3
+#undef stacksave
+
+#undef newptrb
+
+#endif
+
+/* These two are defined as macros in both cases */
+
+#undef fc
+#undef fi
+
+/***************************************************************************
+***************************************************************************/
+
+
 #ifdef NO_RECURSE
 /*************************************************
 *          Release allocated heap frames         *
 *************************************************/
- 
+
 /* This function releases all the allocated frames. The base frame is on the
 machine stack, and so must not be freed.
 
@@ -6324,35 +6324,35 @@ while (nextframe != NULL)
 #endif
 
 
-/************************************************* 
-*         Execute a Regular Expression           * 
-*************************************************/ 
- 
-/* This function applies a compiled re to a subject string and picks out 
-portions of the string if it matches. Two elements in the vector are set for 
-each substring: the offsets to the start and end of the substring. 
- 
-Arguments: 
-  argument_re     points to the compiled expression 
-  extra_data      points to extra data or is NULL 
-  subject         points to the subject string 
-  length          length of subject string (may contain binary zeros) 
-  start_offset    where to start in the subject string 
-  options         option bits 
-  offsets         points to a vector of ints to be filled in with offsets 
-  offsetcount     the number of elements in the vector 
- 
-Returns:          > 0 => success; value is the number of elements filled in 
-                  = 0 => success, but offsets is not big enough 
-                   -1 => failed to match 
-                 < -1 => some kind of unexpected problem 
-*/ 
- 
+/*************************************************
+*         Execute a Regular Expression           *
+*************************************************/
+
+/* This function applies a compiled re to a subject string and picks out
+portions of the string if it matches. Two elements in the vector are set for
+each substring: the offsets to the start and end of the substring.
+
+Arguments:
+  argument_re     points to the compiled expression
+  extra_data      points to extra data or is NULL
+  subject         points to the subject string
+  length          length of subject string (may contain binary zeros)
+  start_offset    where to start in the subject string
+  options         option bits
+  offsets         points to a vector of ints to be filled in with offsets
+  offsetcount     the number of elements in the vector
+
+Returns:          > 0 => success; value is the number of elements filled in
+                  = 0 => success, but offsets is not big enough
+                   -1 => failed to match
+                 < -1 => some kind of unexpected problem
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, 
-  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, 
-  int offsetcount) 
+pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
+  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
@@ -6364,13 +6364,13 @@ pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
   int offsetcount)
 #endif
-{ 
+{
 int rc, ocount, arg_offset_max;
-int newline; 
-BOOL using_temporary_offsets = FALSE; 
-BOOL anchored; 
-BOOL startline; 
-BOOL firstline; 
+int newline;
+BOOL using_temporary_offsets = FALSE;
+BOOL anchored;
+BOOL startline;
+BOOL firstline;
 BOOL utf;
 BOOL has_first_char = FALSE;
 BOOL has_req_char = FALSE;
@@ -6378,8 +6378,8 @@ pcre_uchar first_char = 0;
 pcre_uchar first_char2 = 0;
 pcre_uchar req_char = 0;
 pcre_uchar req_char2 = 0;
-match_data match_block; 
-match_data *md = &match_block; 
+match_data match_block;
+match_data *md = &match_block;
 const pcre_uint8 *tables;
 const pcre_uint8 *start_bits = NULL;
 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
@@ -6387,17 +6387,17 @@ PCRE_PUCHAR end_subject;
 PCRE_PUCHAR start_partial = NULL;
 PCRE_PUCHAR match_partial = NULL;
 PCRE_PUCHAR req_char_ptr = start_match - 1;
- 
-const pcre_study_data *study; 
+
+const pcre_study_data *study;
 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
- 
+
 #ifdef NO_RECURSE
 heapframe frame_zero;
 frame_zero.Xprevframe = NULL;            /* Marks the top level */
 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
 md->match_frames_base = &frame_zero;
 #endif
- 
+
 /* Check for the special magic call that measures the size of the stack used
 per recursive call of match(). Without the funny casting for sizeof, a Windows
 compiler gave this error: "unary minus operator applied to unsigned type,
@@ -6411,15 +6411,15 @@ if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
 #endif
 
-/* Plausibility checks */ 
- 
-if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; 
+/* Plausibility checks */
+
+if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
   return PCRE_ERROR_NULL;
-if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; 
+if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (length < 0) return PCRE_ERROR_BADLENGTH;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
- 
+
 /* Check that the first field in the block is the magic number. If it is not,
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
@@ -6503,34 +6503,34 @@ md->name_table = (pcre_uchar *)re + re->name_table_offset;
 md->name_count = re->name_count;
 md->name_entry_size = re->name_entry_size;
 
-/* Fish out the optional data from the extra_data structure, first setting 
-the default values. */ 
- 
-study = NULL; 
-md->match_limit = MATCH_LIMIT; 
-md->match_limit_recursion = MATCH_LIMIT_RECURSION; 
-md->callout_data = NULL; 
- 
-/* The table pointer is always in native byte order. */ 
- 
+/* Fish out the optional data from the extra_data structure, first setting
+the default values. */
+
+study = NULL;
+md->match_limit = MATCH_LIMIT;
+md->match_limit_recursion = MATCH_LIMIT_RECURSION;
+md->callout_data = NULL;
+
+/* The table pointer is always in native byte order. */
+
 tables = re->tables;
- 
+
 /* The two limit values override the defaults, whatever their value. */
 
-if (extra_data != NULL) 
-  { 
+if (extra_data != NULL)
+  {
   unsigned long int flags = extra_data->flags;
-  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) 
-    study = (const pcre_study_data *)extra_data->study_data; 
-  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) 
-    md->match_limit = extra_data->match_limit; 
-  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0) 
-    md->match_limit_recursion = extra_data->match_limit_recursion; 
-  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) 
-    md->callout_data = extra_data->callout_data; 
-  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; 
-  } 
- 
+  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
+    study = (const pcre_study_data *)extra_data->study_data;
+  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
+    md->match_limit = extra_data->match_limit;
+  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
+    md->match_limit_recursion = extra_data->match_limit_recursion;
+  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
+    md->callout_data = extra_data->callout_data;
+  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
+  }
+
 /* Limits in the regex override only if they are smaller. */
 
 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
@@ -6540,171 +6540,171 @@ if ((re->flags & PCRE_RLSET) != 0 &&
     re->limit_recursion < md->match_limit_recursion)
   md->match_limit_recursion = re->limit_recursion;
 
-/* If the exec call supplied NULL for tables, use the inbuilt ones. This 
-is a feature that makes it possible to save compiled regex and re-use them 
-in other programs later. */ 
- 
+/* If the exec call supplied NULL for tables, use the inbuilt ones. This
+is a feature that makes it possible to save compiled regex and re-use them
+in other programs later. */
+
 if (tables == NULL) tables = PRIV(default_tables);
- 
-/* Set up other data */ 
- 
-anchored = ((re->options | options) & PCRE_ANCHORED) != 0; 
-startline = (re->flags & PCRE_STARTLINE) != 0; 
-firstline = (re->options & PCRE_FIRSTLINE) != 0; 
- 
-/* The code starts after the real_pcre block and the capture name table. */ 
- 
+
+/* Set up other data */
+
+anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+startline = (re->flags & PCRE_STARTLINE) != 0;
+firstline = (re->options & PCRE_FIRSTLINE) != 0;
+
+/* The code starts after the real_pcre block and the capture name table. */
+
 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
-  re->name_count * re->name_entry_size; 
- 
+  re->name_count * re->name_entry_size;
+
 md->start_subject = (PCRE_PUCHAR)subject;
-md->start_offset = start_offset; 
-md->end_subject = md->start_subject + length; 
-end_subject = md->end_subject; 
- 
-md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; 
+md->start_offset = start_offset;
+md->end_subject = md->start_subject + length;
+end_subject = md->end_subject;
+
+md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 md->use_ucp = (re->options & PCRE_UCP) != 0;
 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
 md->ignore_skip_arg = 0;
- 
+
 /* Some options are unpacked into BOOL variables in the hope that testing
 them will be faster than individual option bits. */
 
-md->notbol = (options & PCRE_NOTBOL) != 0; 
-md->noteol = (options & PCRE_NOTEOL) != 0; 
-md->notempty = (options & PCRE_NOTEMPTY) != 0; 
+md->notbol = (options & PCRE_NOTBOL) != 0;
+md->noteol = (options & PCRE_NOTEOL) != 0;
+md->notempty = (options & PCRE_NOTEMPTY) != 0;
 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 
-md->hitend = FALSE; 
+md->hitend = FALSE;
 md->mark = md->nomatch_mark = NULL;     /* In case never set */
- 
-md->recursive = NULL;                   /* No recursion at top level */ 
+
+md->recursive = NULL;                   /* No recursion at top level */
 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
- 
-md->lcc = tables + lcc_offset; 
+
+md->lcc = tables + lcc_offset;
 md->fcc = tables + fcc_offset;
-md->ctypes = tables + ctypes_offset; 
- 
-/* Handle different \R options. */ 
- 
-switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) 
-  { 
-  case 0: 
-  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) 
-    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0; 
-  else 
-#ifdef BSR_ANYCRLF 
-  md->bsr_anycrlf = TRUE; 
-#else 
-  md->bsr_anycrlf = FALSE; 
-#endif 
-  break; 
- 
-  case PCRE_BSR_ANYCRLF: 
-  md->bsr_anycrlf = TRUE; 
-  break; 
- 
-  case PCRE_BSR_UNICODE: 
-  md->bsr_anycrlf = FALSE; 
-  break; 
- 
-  default: return PCRE_ERROR_BADNEWLINE; 
-  } 
- 
-/* Handle different types of newline. The three bits give eight cases. If 
-nothing is set at run time, whatever was used at compile time applies. */ 
- 
-switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : 
-        (pcre_uint32)options) & PCRE_NEWLINE_BITS) 
-  { 
-  case 0: newline = NEWLINE; break;   /* Compile-time default */ 
+md->ctypes = tables + ctypes_offset;
+
+/* Handle different \R options. */
+
+switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
+  {
+  case 0:
+  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
+    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
+  else
+#ifdef BSR_ANYCRLF
+  md->bsr_anycrlf = TRUE;
+#else
+  md->bsr_anycrlf = FALSE;
+#endif
+  break;
+
+  case PCRE_BSR_ANYCRLF:
+  md->bsr_anycrlf = TRUE;
+  break;
+
+  case PCRE_BSR_UNICODE:
+  md->bsr_anycrlf = FALSE;
+  break;
+
+  default: return PCRE_ERROR_BADNEWLINE;
+  }
+
+/* Handle different types of newline. The three bits give eight cases. If
+nothing is set at run time, whatever was used at compile time applies. */
+
+switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
+        (pcre_uint32)options) & PCRE_NEWLINE_BITS)
+  {
+  case 0: newline = NEWLINE; break;   /* Compile-time default */
   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
-  case PCRE_NEWLINE_CR+ 
+  case PCRE_NEWLINE_CR+
        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
-  case PCRE_NEWLINE_ANY: newline = -1; break; 
-  case PCRE_NEWLINE_ANYCRLF: newline = -2; break; 
-  default: return PCRE_ERROR_BADNEWLINE; 
-  } 
- 
-if (newline == -2) 
-  { 
-  md->nltype = NLTYPE_ANYCRLF; 
-  } 
-else if (newline < 0) 
-  { 
-  md->nltype = NLTYPE_ANY; 
-  } 
-else 
-  { 
-  md->nltype = NLTYPE_FIXED; 
-  if (newline > 255) 
-    { 
-    md->nllen = 2; 
-    md->nl[0] = (newline >> 8) & 255; 
-    md->nl[1] = newline & 255; 
-    } 
-  else 
-    { 
-    md->nllen = 1; 
-    md->nl[0] = newline; 
-    } 
-  } 
- 
+  case PCRE_NEWLINE_ANY: newline = -1; break;
+  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
+  default: return PCRE_ERROR_BADNEWLINE;
+  }
+
+if (newline == -2)
+  {
+  md->nltype = NLTYPE_ANYCRLF;
+  }
+else if (newline < 0)
+  {
+  md->nltype = NLTYPE_ANY;
+  }
+else
+  {
+  md->nltype = NLTYPE_FIXED;
+  if (newline > 255)
+    {
+    md->nllen = 2;
+    md->nl[0] = (newline >> 8) & 255;
+    md->nl[1] = newline & 255;
+    }
+  else
+    {
+    md->nllen = 1;
+    md->nl[0] = newline;
+    }
+  }
+
 /* Partial matching was originally supported only for a restricted set of
 regexes; from release 8.00 there are no restrictions, but the bits are still
 defined (though never set). So there's no harm in leaving this code. */
- 
-if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) 
-  return PCRE_ERROR_BADPARTIAL; 
- 
-/* If the expression has got more back references than the offsets supplied can 
-hold, we get a temporary chunk of working store to use during the matching. 
-Otherwise, we can use the vector supplied, rounding down its size to a multiple 
-of 3. */ 
- 
-ocount = offsetcount - (offsetcount % 3); 
+
+if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
+  return PCRE_ERROR_BADPARTIAL;
+
+/* If the expression has got more back references than the offsets supplied can
+hold, we get a temporary chunk of working store to use during the matching.
+Otherwise, we can use the vector supplied, rounding down its size to a multiple
+of 3. */
+
+ocount = offsetcount - (offsetcount % 3);
 arg_offset_max = (2*ocount)/3;
- 
-if (re->top_backref > 0 && re->top_backref >= ocount/3) 
-  { 
-  ocount = re->top_backref * 3 + 3; 
+
+if (re->top_backref > 0 && re->top_backref >= ocount/3)
+  {
+  ocount = re->top_backref * 3 + 3;
   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
-  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY; 
-  using_temporary_offsets = TRUE; 
-  DPRINTF(("Got memory to hold back references\n")); 
-  } 
-else md->offset_vector = offsets; 
-md->offset_end = ocount; 
-md->offset_max = (2*ocount)/3; 
+  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
+  using_temporary_offsets = TRUE;
+  DPRINTF(("Got memory to hold back references\n"));
+  }
+else md->offset_vector = offsets;
+md->offset_end = ocount;
+md->offset_max = (2*ocount)/3;
 md->capture_last = 0;
- 
-/* Reset the working variable associated with each extraction. These should 
-never be used unless previously set, but they get saved and restored, and so we 
+
+/* Reset the working variable associated with each extraction. These should
+never be used unless previously set, but they get saved and restored, and so we
 initialize them to avoid reading uninitialized locations. Also, unset the
 offsets for the matched string. This is really just for tidiness with callouts,
 in case they inspect these fields. */
- 
-if (md->offset_vector != NULL) 
-  { 
+
+if (md->offset_vector != NULL)
+  {
   register int *iptr = md->offset_vector + ocount;
   register int *iend = iptr - re->top_bracket;
   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
-  while (--iptr >= iend) *iptr = -1; 
+  while (--iptr >= iend) *iptr = -1;
   if (offsetcount > 0) md->offset_vector[0] = -1;
   if (offsetcount > 1) md->offset_vector[1] = -1;
-  } 
- 
+  }
+
 /* Set up the first character to match, if available. The first_char value is
-never set for an anchored regular expression, but the anchoring may be forced 
-at run time, so we have to test for anchoring. The first char may be unset for 
-an unanchored pattern, of course. If there's no first char and the pattern was 
-studied, there may be a bitmap of possible first characters. */ 
- 
-if (!anchored) 
-  { 
-  if ((re->flags & PCRE_FIRSTSET) != 0) 
-    { 
+never set for an anchored regular expression, but the anchoring may be forced
+at run time, so we have to test for anchoring. The first char may be unset for
+an unanchored pattern, of course. If there's no first char and the pattern was
+studied, there may be a bitmap of possible first characters. */
+
+if (!anchored)
+  {
+  if ((re->flags & PCRE_FIRSTSET) != 0)
+    {
     has_first_char = TRUE;
     first_char = first_char2 = (pcre_uchar)(re->first_char);
     if ((re->flags & PCRE_FCH_CASELESS) != 0)
@@ -6715,18 +6715,18 @@ if (!anchored)
         first_char2 = UCD_OTHERCASE(first_char);
 #endif
       }
-    } 
-  else 
-    if (!startline && study != NULL && 
+    }
+  else
+    if (!startline && study != NULL &&
       (study->flags & PCRE_STUDY_MAPPED) != 0)
-        start_bits = study->start_bits; 
-  } 
- 
-/* For anchored or unanchored matches, there may be a "last known required 
-character" set. */ 
- 
-if ((re->flags & PCRE_REQCHSET) != 0) 
-  { 
+        start_bits = study->start_bits;
+  }
+
+/* For anchored or unanchored matches, there may be a "last known required
+character" set. */
+
+if ((re->flags & PCRE_REQCHSET) != 0)
+  {
   has_req_char = TRUE;
   req_char = req_char2 = (pcre_uchar)(re->req_char);
   if ((re->flags & PCRE_RCH_CASELESS) != 0)
@@ -6737,27 +6737,27 @@ if ((re->flags & PCRE_REQCHSET) != 0)
       req_char2 = UCD_OTHERCASE(req_char);
 #endif
     }
-  } 
- 
- 
-/* ==========================================================================*/ 
- 
-/* Loop for handling unanchored repeated matching attempts; for anchored regexs 
-the loop runs just once. */ 
- 
-for(;;) 
-  { 
+  }
+
+
+/* ==========================================================================*/
+
+/* Loop for handling unanchored repeated matching attempts; for anchored regexs
+the loop runs just once. */
+
+for(;;)
+  {
   PCRE_PUCHAR save_end_subject = end_subject;
   PCRE_PUCHAR new_start_match;
- 
+
   /* If firstline is TRUE, the start of the match is constrained to the first
   line of a multiline string. That is, the match must be before or at the first
   newline. Implement this by temporarily adjusting end_subject so that we stop
   scanning at a newline. If the match fails at the newline, later code breaks
   this loop. */
- 
-  if (firstline) 
-    { 
+
+  if (firstline)
+    {
     PCRE_PUCHAR t = start_match;
 #ifdef SUPPORT_UTF
     if (utf)
@@ -6770,24 +6770,24 @@ for(;;)
       }
     else
 #endif
-    while (t < md->end_subject && !IS_NEWLINE(t)) t++; 
-    end_subject = t; 
-    } 
- 
+    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
+    end_subject = t;
+    }
+
   /* There are some optimizations that avoid running the match if a known
   starting point is not found, or if a known later character is not present.
   However, there is an option that disables these, for testing and for ensuring
   that all callouts do actually occur. The option can be set in the regex by
   (*NO_START_OPT) or passed in match-time options. */
- 
+
   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
-    { 
+    {
     /* Advance to a unique first char if there is one. */
- 
+
     if (has_first_char)
       {
       pcre_uchar smc;
- 
+
       if (first_char != first_char2)
         while (start_match < end_subject &&
           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
@@ -6800,7 +6800,7 @@ for(;;)
     /* Or to just after a linebreak for a multiline match */
 
     else if (startline)
-      { 
+      {
       if (start_match > md->start_subject + start_offset)
         {
 #ifdef SUPPORT_UTF
@@ -6817,7 +6817,7 @@ for(;;)
 #endif
         while (start_match < end_subject && !WAS_NEWLINE(start_match))
           start_match++;
- 
+
         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
         and we are now at a LF, advance the match position by one more character.
         */
@@ -6828,12 +6828,12 @@ for(;;)
              UCHAR21TEST(start_match) == CHAR_NL)
           start_match++;
         }
-      } 
- 
+      }
+
     /* Or to a non-unique first byte after study */
- 
+
     else if (start_bits != NULL)
-      { 
+      {
       while (start_match < end_subject)
         {
         register pcre_uint32 c = UCHAR21TEST(start_match);
@@ -6843,30 +6843,30 @@ for(;;)
         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
         start_match++;
         }
-      } 
+      }
     }   /* Starting optimizations */
- 
-  /* Restore fudged end_subject */ 
- 
-  end_subject = save_end_subject; 
- 
+
+  /* Restore fudged end_subject */
+
+  end_subject = save_end_subject;
+
   /* The following two optimizations are disabled for partial matching or if
   disabling is explicitly requested. */
- 
+
   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
     {
     /* If the pattern was studied, a minimum subject length may be set. This is
     a lower bound; no actual string of that length may actually match the
     pattern. Although the value is, strictly, in characters, we treat it as
     bytes to avoid spending too much time in this optimization. */
- 
+
     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
         (pcre_uint32)(end_subject - start_match) < study->minlength)
       {
       rc = MATCH_NOMATCH;
       break;
       }
- 
+
     /* If req_char is set, we know that that character must appear in the
     subject for the match to succeed. If the first character is set, req_char
     must be later in the subject; otherwise the test starts at the match point.
@@ -6874,56 +6874,56 @@ for(;;)
     nested unlimited repeats that aren't going to match. Writing separate code
     for cased/caseless versions makes it go faster, as does using an
     autoincrement and backing off on a match.
- 
+
     HOWEVER: when the subject string is very, very long, searching to its end
     can take a long time, and give bad performance on quite ordinary patterns.
     This showed up when somebody was matching something like /^\d+C/ on a
     32-megabyte string... so we don't do this when the string is sufficiently
     long. */
- 
+
     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
       {
       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
- 
+
       /* We don't need to repeat the search if we haven't yet reached the
       place we found it at last time. */
 
       if (p > req_char_ptr)
-        { 
+        {
         if (req_char != req_char2)
-          { 
+          {
           while (p < end_subject)
             {
             register pcre_uint32 pp = UCHAR21INCTEST(p);
             if (pp == req_char || pp == req_char2) { p--; break; }
             }
-          } 
+          }
         else
-          { 
+          {
           while (p < end_subject)
             {
             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
             }
-          } 
- 
+          }
+
         /* If we can't find the required character, break the matching loop,
         forcing a match failure. */
- 
+
         if (p >= end_subject)
           {
           rc = MATCH_NOMATCH;
           break;
           }
- 
+
         /* If we have found the required character, save the point where we
         found it, so that we don't search again next time round the loop if
         the start hasn't passed this character yet. */
- 
+
         req_char_ptr = p;
         }
-      } 
-    } 
- 
+      }
+    }
+
 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
   printf(">>>> Match against: ");
   pchars(start_match, end_subject - start_match, TRUE, md);
@@ -6933,9 +6933,9 @@ for(;;)
   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
   first starting point for which a partial match was found. */
 
-  md->start_match_ptr = start_match; 
+  md->start_match_ptr = start_match;
   md->start_used_ptr = start_match;
-  md->match_call_count = 0; 
+  md->match_call_count = 0;
   md->match_function_type = 0;
   md->end_offset_top = 0;
   md->skip_arg_count = 0;
@@ -6945,9 +6945,9 @@ for(;;)
     start_partial = md->start_used_ptr;
     match_partial = start_match;
     }
- 
-  switch(rc) 
-    { 
+
+  switch(rc)
+    {
     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
     entirely. The only way we can do that is to re-do the match at the same
@@ -6971,117 +6971,117 @@ for(;;)
       }
     /* Fall through */
 
-    /* NOMATCH and PRUNE advance by one character. THEN at this level acts 
+    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
     exactly like PRUNE. Unset ignore SKIP-with-argument. */
- 
-    case MATCH_NOMATCH: 
-    case MATCH_PRUNE: 
-    case MATCH_THEN: 
+
+    case MATCH_NOMATCH:
+    case MATCH_PRUNE:
+    case MATCH_THEN:
     md->ignore_skip_arg = 0;
-    new_start_match = start_match + 1; 
+    new_start_match = start_match + 1;
 #ifdef SUPPORT_UTF
     if (utf)
       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
         new_start_match++);
-#endif 
-    break; 
- 
-    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */ 
- 
-    case MATCH_COMMIT: 
-    rc = MATCH_NOMATCH; 
-    goto ENDLOOP; 
- 
+#endif
+    break;
+
+    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
+
+    case MATCH_COMMIT:
+    rc = MATCH_NOMATCH;
+    goto ENDLOOP;
+
     /* Any other return is either a match, or some kind of error. */
- 
-    default: 
-    goto ENDLOOP; 
-    } 
- 
-  /* Control reaches here for the various types of "no match at this point" 
-  result. Reset the code to MATCH_NOMATCH for subsequent checking. */ 
- 
-  rc = MATCH_NOMATCH; 
- 
-  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first 
-  newline in the subject (though it may continue over the newline). Therefore, 
-  if we have just failed to match, starting at a newline, do not continue. */ 
- 
-  if (firstline && IS_NEWLINE(start_match)) break; 
- 
-  /* Advance to new matching position */ 
- 
-  start_match = new_start_match; 
- 
-  /* Break the loop if the pattern is anchored or if we have passed the end of 
-  the subject. */ 
- 
-  if (anchored || start_match > end_subject) break; 
- 
-  /* If we have just passed a CR and we are now at a LF, and the pattern does 
-  not contain any explicit matches for \r or \n, and the newline option is CRLF 
+
+    default:
+    goto ENDLOOP;
+    }
+
+  /* Control reaches here for the various types of "no match at this point"
+  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
+
+  rc = MATCH_NOMATCH;
+
+  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
+  newline in the subject (though it may continue over the newline). Therefore,
+  if we have just failed to match, starting at a newline, do not continue. */
+
+  if (firstline && IS_NEWLINE(start_match)) break;
+
+  /* Advance to new matching position */
+
+  start_match = new_start_match;
+
+  /* Break the loop if the pattern is anchored or if we have passed the end of
+  the subject. */
+
+  if (anchored || start_match > end_subject) break;
+
+  /* If we have just passed a CR and we are now at a LF, and the pattern does
+  not contain any explicit matches for \r or \n, and the newline option is CRLF
   or ANY or ANYCRLF, advance the match position by one more character. In
   normal matching start_match will aways be greater than the first position at
   this stage, but a failed *SKIP can cause a return at the same point, which is
   why the first test exists. */
- 
+
   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
       start_match[-1] == CHAR_CR &&
-      start_match < end_subject && 
+      start_match < end_subject &&
       *start_match == CHAR_NL &&
-      (re->flags & PCRE_HASCRORLF) == 0 && 
-        (md->nltype == NLTYPE_ANY || 
-         md->nltype == NLTYPE_ANYCRLF || 
-         md->nllen == 2)) 
-    start_match++; 
- 
+      (re->flags & PCRE_HASCRORLF) == 0 &&
+        (md->nltype == NLTYPE_ANY ||
+         md->nltype == NLTYPE_ANYCRLF ||
+         md->nllen == 2))
+    start_match++;
+
   md->mark = NULL;   /* Reset for start of next match attempt */
   }                  /* End of for(;;) "bumpalong" loop */
- 
-/* ==========================================================================*/ 
- 
-/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping 
-conditions is true: 
- 
-(1) The pattern is anchored or the match was failed by (*COMMIT); 
- 
-(2) We are past the end of the subject; 
- 
-(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because 
-    this option requests that a match occur at or before the first newline in 
-    the subject. 
- 
-When we have a match and the offset vector is big enough to deal with any 
-backreferences, captured substring offsets will already be set up. In the case 
-where we had to get some local store to hold offsets for backreference 
-processing, copy those that we can. In this case there need not be overflow if 
-certain parts of the pattern were not used, even though there are more 
-capturing parentheses than vector slots. */ 
- 
-ENDLOOP: 
- 
+
+/* ==========================================================================*/
+
+/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
+conditions is true:
+
+(1) The pattern is anchored or the match was failed by (*COMMIT);
+
+(2) We are past the end of the subject;
+
+(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
+    this option requests that a match occur at or before the first newline in
+    the subject.
+
+When we have a match and the offset vector is big enough to deal with any
+backreferences, captured substring offsets will already be set up. In the case
+where we had to get some local store to hold offsets for backreference
+processing, copy those that we can. In this case there need not be overflow if
+certain parts of the pattern were not used, even though there are more
+capturing parentheses than vector slots. */
+
+ENDLOOP:
+
 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
-  { 
-  if (using_temporary_offsets) 
-    { 
+  {
+  if (using_temporary_offsets)
+    {
     if (arg_offset_max >= 4)
-      { 
-      memcpy(offsets + 2, md->offset_vector + 2, 
+      {
+      memcpy(offsets + 2, md->offset_vector + 2,
         (arg_offset_max - 2) * sizeof(int));
-      DPRINTF(("Copied offsets from temporary memory\n")); 
-      } 
+      DPRINTF(("Copied offsets from temporary memory\n"));
+      }
     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
-    DPRINTF(("Freeing temporary memory\n")); 
+    DPRINTF(("Freeing temporary memory\n"));
     (PUBL(free))(md->offset_vector);
-    } 
- 
+    }
+
   /* Set the return code to the number of captured strings, or 0 if there were
-  too many to fit into the vector. */ 
- 
+  too many to fit into the vector. */
+
   rc = ((md->capture_last & OVFLBIT) != 0 &&
          md->end_offset_top >= arg_offset_max)?
     0 : md->end_offset_top/2;
- 
+
   /* If there is space in the offset vector, set any unused pairs at the end of
   the pattern to -1 for backwards compatibility. It is documented that this
   happens. In earlier versions, the whole set of potential capturing offsets
@@ -7101,52 +7101,52 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
     while (iptr < iend) *iptr++ = -1;
     }
 
-  /* If there is space, set up the whole thing as substring 0. The value of 
-  md->start_match_ptr might be modified if \K was encountered on the success 
-  matching path. */ 
- 
-  if (offsetcount < 2) rc = 0; else 
-    { 
+  /* If there is space, set up the whole thing as substring 0. The value of
+  md->start_match_ptr might be modified if \K was encountered on the success
+  matching path. */
+
+  if (offsetcount < 2) rc = 0; else
+    {
     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
-    } 
- 
+    }
+
   /* Return MARK data if requested */
 
   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
     *(extra_data->mark) = (pcre_uchar *)md->mark;
-  DPRINTF((">>>> returning %d\n", rc)); 
+  DPRINTF((">>>> returning %d\n", rc));
 #ifdef NO_RECURSE
   release_match_heapframes(&frame_zero);
 #endif
-  return rc; 
-  } 
- 
-/* Control gets here if there has been an error, or if the overall match 
-attempt has failed at all permitted starting positions. */ 
- 
-if (using_temporary_offsets) 
-  { 
-  DPRINTF(("Freeing temporary memory\n")); 
+  return rc;
+  }
+
+/* Control gets here if there has been an error, or if the overall match
+attempt has failed at all permitted starting positions. */
+
+if (using_temporary_offsets)
+  {
+  DPRINTF(("Freeing temporary memory\n"));
   (PUBL(free))(md->offset_vector);
-  } 
- 
+  }
+
 /* For anything other than nomatch or partial match, just return the code. */
 
 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
-  { 
-  DPRINTF((">>>> error: returning %d\n", rc)); 
+  {
+  DPRINTF((">>>> error: returning %d\n", rc));
 #ifdef NO_RECURSE
   release_match_heapframes(&frame_zero);
 #endif
-  return rc; 
-  } 
+  return rc;
+  }
 
 /* Handle partial matches - disable any mark data */
 
 if (match_partial != NULL)
-  { 
-  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); 
+  {
+  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
   md->mark = NULL;
   if (offsetcount > 1)
     {
@@ -7156,15 +7156,15 @@ if (match_partial != NULL)
       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
     }
   rc = PCRE_ERROR_PARTIAL;
-  } 
+  }
 
 /* This is the classic nomatch case */
 
-else 
-  { 
-  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); 
+else
+  {
+  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
   rc = PCRE_ERROR_NOMATCH;
-  } 
+  }
 
 /* Return the MARK data if it has been requested. */
 
@@ -7174,6 +7174,6 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
   release_match_heapframes(&frame_zero);
 #endif
 return rc;
-} 
- 
-/* End of pcre_exec.c */ 
+}
+
+/* End of pcre_exec.c */
diff --git a/contrib/libs/pcre/pcre_fullinfo.c b/contrib/libs/pcre/pcre_fullinfo.c
index ac066ecf20..bfccc02598 100644
--- a/contrib/libs/pcre/pcre_fullinfo.c
+++ b/contrib/libs/pcre/pcre_fullinfo.c
@@ -1,70 +1,70 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
 /* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2013 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_fullinfo(), which returns 
-information about a compiled pattern. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_fullinfo(), which returns
+information about a compiled pattern. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-*        Return info about compiled pattern      * 
-*************************************************/ 
- 
-/* This is a newer "info" function which has an extensible interface so 
-that additional items can be added compatibly. 
- 
-Arguments: 
-  argument_re      points to compiled code 
-  extra_data       points extra data, or NULL 
-  what             what information is required 
-  where            where to put the information 
- 
-Returns:           0 if data returned, negative on error 
-*/ 
- 
+#endif
+
+#include "pcre_internal.h"
+
+
+/*************************************************
+*        Return info about compiled pattern      *
+*************************************************/
+
+/* This is a newer "info" function which has an extensible interface so
+that additional items can be added compatibly.
+
+Arguments:
+  argument_re      points to compiled code
+  extra_data       points extra data, or NULL
+  what             what information is required
+  where            where to put the information
+
+Returns:           0 if data returned, negative on error
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
@@ -78,42 +78,42 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data,
   int what, void *where)
 #endif
-{ 
+{
 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
-const pcre_study_data *study = NULL; 
- 
-if (re == NULL || where == NULL) return PCRE_ERROR_NULL; 
- 
-if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) 
-  study = (const pcre_study_data *)extra_data->study_data; 
- 
+const pcre_study_data *study = NULL;
+
+if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
+
+if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
+  study = (const pcre_study_data *)extra_data->study_data;
+
 /* Check that the first field in the block is the magic number. If it is not,
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
 means that the pattern is likely compiled with different endianness. */
 
-if (re->magic_number != MAGIC_NUMBER) 
+if (re->magic_number != MAGIC_NUMBER)
   return re->magic_number == REVERSED_MAGIC_NUMBER?
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
- 
+
 /* Check that this pattern was compiled in the correct bit mode */
 
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 
-switch (what) 
-  { 
-  case PCRE_INFO_OPTIONS: 
+switch (what)
+  {
+  case PCRE_INFO_OPTIONS:
   *((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
-  break; 
- 
-  case PCRE_INFO_SIZE: 
-  *((size_t *)where) = re->size; 
-  break; 
- 
-  case PCRE_INFO_STUDYSIZE: 
-  *((size_t *)where) = (study == NULL)? 0 : study->size; 
-  break; 
- 
+  break;
+
+  case PCRE_INFO_SIZE:
+  *((size_t *)where) = re->size;
+  break;
+
+  case PCRE_INFO_STUDYSIZE:
+  *((size_t *)where) = (study == NULL)? 0 : study->size;
+  break;
+
   case PCRE_INFO_JITSIZE:
 #ifdef SUPPORT_JIT
   *((size_t *)where) =
@@ -126,20 +126,20 @@ switch (what)
 #endif
   break;
 
-  case PCRE_INFO_CAPTURECOUNT: 
-  *((int *)where) = re->top_bracket; 
-  break; 
- 
-  case PCRE_INFO_BACKREFMAX: 
-  *((int *)where) = re->top_backref; 
-  break; 
- 
-  case PCRE_INFO_FIRSTBYTE: 
-  *((int *)where) = 
+  case PCRE_INFO_CAPTURECOUNT:
+  *((int *)where) = re->top_bracket;
+  break;
+
+  case PCRE_INFO_BACKREFMAX:
+  *((int *)where) = re->top_backref;
+  break;
+
+  case PCRE_INFO_FIRSTBYTE:
+  *((int *)where) =
     ((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char :
-    ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2; 
-  break; 
- 
+    ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
+  break;
+
   case PCRE_INFO_FIRSTCHARACTER:
     *((pcre_uint32 *)where) =
       (re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0;
@@ -151,15 +151,15 @@ switch (what)
       ((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0;
     break;
 
-  /* Make sure we pass back the pointer to the bit vector in the external 
-  block, not the internal copy (with flipped integer fields). */ 
- 
-  case PCRE_INFO_FIRSTTABLE: 
+  /* Make sure we pass back the pointer to the bit vector in the external
+  block, not the internal copy (with flipped integer fields). */
+
+  case PCRE_INFO_FIRSTTABLE:
   *((const pcre_uint8 **)where) =
     (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
-      ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL; 
-  break; 
- 
+      ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
+  break;
+
   case PCRE_INFO_MINLENGTH:
   *((int *)where) =
     (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
@@ -172,11 +172,11 @@ switch (what)
                     extra_data->executable_jit != NULL;
   break;
 
-  case PCRE_INFO_LASTLITERAL: 
-  *((int *)where) = 
+  case PCRE_INFO_LASTLITERAL:
+  *((int *)where) =
     ((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1;
-  break; 
- 
+  break;
+
   case PCRE_INFO_REQUIREDCHAR:
     *((pcre_uint32 *)where) =
       ((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0;
@@ -187,37 +187,37 @@ switch (what)
       ((re->flags & PCRE_REQCHSET) != 0);
     break;
 
-  case PCRE_INFO_NAMEENTRYSIZE: 
-  *((int *)where) = re->name_entry_size; 
-  break; 
- 
-  case PCRE_INFO_NAMECOUNT: 
-  *((int *)where) = re->name_count; 
-  break; 
- 
-  case PCRE_INFO_NAMETABLE: 
+  case PCRE_INFO_NAMEENTRYSIZE:
+  *((int *)where) = re->name_entry_size;
+  break;
+
+  case PCRE_INFO_NAMECOUNT:
+  *((int *)where) = re->name_count;
+  break;
+
+  case PCRE_INFO_NAMETABLE:
   *((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
-  break; 
- 
-  case PCRE_INFO_DEFAULT_TABLES: 
+  break;
+
+  case PCRE_INFO_DEFAULT_TABLES:
   *((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
-  break; 
- 
+  break;
+
   /* From release 8.00 this will always return TRUE because NOPARTIAL is
   no longer ever set (the restrictions have been removed). */
 
-  case PCRE_INFO_OKPARTIAL: 
-  *((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0; 
-  break; 
- 
-  case PCRE_INFO_JCHANGED: 
-  *((int *)where) = (re->flags & PCRE_JCHANGED) != 0; 
-  break; 
- 
-  case PCRE_INFO_HASCRORLF: 
-  *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0; 
-  break; 
- 
+  case PCRE_INFO_OKPARTIAL:
+  *((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
+  break;
+
+  case PCRE_INFO_JCHANGED:
+  *((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
+  break;
+
+  case PCRE_INFO_HASCRORLF:
+  *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
+  break;
+
   case PCRE_INFO_MAXLOOKBEHIND:
   *((int *)where) = re->max_lookbehind;
   break;
@@ -236,10 +236,10 @@ switch (what)
   *((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
   break;
 
-  default: return PCRE_ERROR_BADOPTION; 
-  } 
- 
-return 0; 
-} 
- 
-/* End of pcre_fullinfo.c */ 
+  default: return PCRE_ERROR_BADOPTION;
+  }
+
+return 0;
+}
+
+/* End of pcre_fullinfo.c */
diff --git a/contrib/libs/pcre/pcre_get.c b/contrib/libs/pcre/pcre_get.c
index c6b1e97536..11392db08e 100644
--- a/contrib/libs/pcre/pcre_get.c
+++ b/contrib/libs/pcre/pcre_get.c
@@ -1,73 +1,73 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains some convenience functions for extracting substrings 
-from the subject string after a regex match has succeeded. The original idea 
-for these functions came from Scott Wimer. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains some convenience functions for extracting substrings
+from the subject string after a regex match has succeeded. The original idea
+for these functions came from Scott Wimer. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-*           Find number for named string         * 
-*************************************************/ 
- 
-/* This function is used by the get_first_set() function below, as well 
-as being generally available. It assumes that names are unique. 
- 
-Arguments: 
-  code        the compiled regex 
-  stringname  the name whose number is required 
- 
-Returns:      the number of the named parentheses, or a negative number 
-                (PCRE_ERROR_NOSUBSTRING) if not found 
-*/ 
- 
+#endif
+
+#include "pcre_internal.h"
+
+
+/*************************************************
+*           Find number for named string         *
+*************************************************/
+
+/* This function is used by the get_first_set() function below, as well
+as being generally available. It assumes that names are unique.
+
+Arguments:
+  code        the compiled regex
+  stringname  the name whose number is required
+
+Returns:      the number of the named parentheses, or a negative number
+                (PCRE_ERROR_NOSUBSTRING) if not found
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_get_stringnumber(const pcre *code, const char *stringname) 
+pcre_get_stringnumber(const pcre *code, const char *stringname)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
@@ -75,27 +75,27 @@ pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
 #endif
-{ 
-int rc; 
-int entrysize; 
-int top, bot; 
+{
+int rc;
+int entrysize;
+int top, bot;
 pcre_uchar *nametable;
- 
+
 #ifdef COMPILE_PCRE8
-if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 
-  return rc; 
-if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 
- 
-if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 
-  return rc; 
-if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 
-  return rc; 
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+  return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+  return rc;
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+  return rc;
 #endif
 #ifdef COMPILE_PCRE16
 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
   return rc;
 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
- 
+
 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
   return rc;
 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
@@ -112,43 +112,43 @@ if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
   return rc;
 #endif
 
-bot = 0; 
-while (top > bot) 
-  { 
-  int mid = (top + bot) / 2; 
+bot = 0;
+while (top > bot)
+  {
+  int mid = (top + bot) / 2;
   pcre_uchar *entry = nametable + entrysize*mid;
   int c = STRCMP_UC_UC((pcre_uchar *)stringname,
     (pcre_uchar *)(entry + IMM2_SIZE));
   if (c == 0) return GET2(entry, 0);
-  if (c > 0) bot = mid + 1; else top = mid; 
-  } 
- 
-return PCRE_ERROR_NOSUBSTRING; 
-} 
- 
- 
- 
-/************************************************* 
-*     Find (multiple) entries for named string   * 
-*************************************************/ 
- 
-/* This is used by the get_first_set() function below, as well as being 
-generally available. It is used when duplicated names are permitted. 
- 
-Arguments: 
-  code        the compiled regex 
-  stringname  the name whose entries required 
-  firstptr    where to put the pointer to the first entry 
-  lastptr     where to put the pointer to the last entry 
- 
-Returns:      the length of each entry, or a negative number 
-                (PCRE_ERROR_NOSUBSTRING) if not found 
-*/ 
- 
+  if (c > 0) bot = mid + 1; else top = mid;
+  }
+
+return PCRE_ERROR_NOSUBSTRING;
+}
+
+
+
+/*************************************************
+*     Find (multiple) entries for named string   *
+*************************************************/
+
+/* This is used by the get_first_set() function below, as well as being
+generally available. It is used when duplicated names are permitted.
+
+Arguments:
+  code        the compiled regex
+  stringname  the name whose entries required
+  firstptr    where to put the pointer to the first entry
+  lastptr     where to put the pointer to the last entry
+
+Returns:      the length of each entry, or a negative number
+                (PCRE_ERROR_NOSUBSTRING) if not found
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_get_stringtable_entries(const pcre *code, const char *stringname, 
-  char **firstptr, char **lastptr) 
+pcre_get_stringtable_entries(const pcre *code, const char *stringname,
+  char **firstptr, char **lastptr)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
@@ -158,27 +158,27 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname,
   PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr)
 #endif
-{ 
-int rc; 
-int entrysize; 
-int top, bot; 
+{
+int rc;
+int entrysize;
+int top, bot;
 pcre_uchar *nametable, *lastentry;
- 
+
 #ifdef COMPILE_PCRE8
-if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 
-  return rc; 
-if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 
- 
-if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 
-  return rc; 
-if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 
-  return rc; 
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+  return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+  return rc;
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+  return rc;
 #endif
 #ifdef COMPILE_PCRE16
 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
   return rc;
 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
- 
+
 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
   return rc;
 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
@@ -195,33 +195,33 @@ if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
   return rc;
 #endif
 
-lastentry = nametable + entrysize * (top - 1); 
-bot = 0; 
-while (top > bot) 
-  { 
-  int mid = (top + bot) / 2; 
+lastentry = nametable + entrysize * (top - 1);
+bot = 0;
+while (top > bot)
+  {
+  int mid = (top + bot) / 2;
   pcre_uchar *entry = nametable + entrysize*mid;
   int c = STRCMP_UC_UC((pcre_uchar *)stringname,
     (pcre_uchar *)(entry + IMM2_SIZE));
-  if (c == 0) 
-    { 
+  if (c == 0)
+    {
     pcre_uchar *first = entry;
     pcre_uchar *last = entry;
-    while (first > nametable) 
-      { 
+    while (first > nametable)
+      {
       if (STRCMP_UC_UC((pcre_uchar *)stringname,
         (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
-      first -= entrysize; 
-      } 
-    while (last < lastentry) 
-      { 
+      first -= entrysize;
+      }
+    while (last < lastentry)
+      {
       if (STRCMP_UC_UC((pcre_uchar *)stringname,
         (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
-      last += entrysize; 
-      } 
+      last += entrysize;
+      }
 #if defined COMPILE_PCRE8
-    *firstptr = (char *)first; 
-    *lastptr = (char *)last; 
+    *firstptr = (char *)first;
+    *lastptr = (char *)last;
 #elif defined COMPILE_PCRE16
     *firstptr = (PCRE_UCHAR16 *)first;
     *lastptr = (PCRE_UCHAR16 *)last;
@@ -229,36 +229,36 @@ while (top > bot)
     *firstptr = (PCRE_UCHAR32 *)first;
     *lastptr = (PCRE_UCHAR32 *)last;
 #endif
-    return entrysize; 
-    } 
-  if (c > 0) bot = mid + 1; else top = mid; 
-  } 
- 
-return PCRE_ERROR_NOSUBSTRING; 
-} 
- 
- 
- 
-/************************************************* 
-*    Find first set of multiple named strings    * 
-*************************************************/ 
- 
-/* This function allows for duplicate names in the table of named substrings. 
-It returns the number of the first one that was set in a pattern match. 
- 
-Arguments: 
-  code         the compiled regex 
-  stringname   the name of the capturing substring 
-  ovector      the vector of matched substrings 
+    return entrysize;
+    }
+  if (c > 0) bot = mid + 1; else top = mid;
+  }
+
+return PCRE_ERROR_NOSUBSTRING;
+}
+
+
+
+/*************************************************
+*    Find first set of multiple named strings    *
+*************************************************/
+
+/* This function allows for duplicate names in the table of named substrings.
+It returns the number of the first one that was set in a pattern match.
+
+Arguments:
+  code         the compiled regex
+  stringname   the name of the capturing substring
+  ovector      the vector of matched substrings
   stringcount  number of captured substrings
- 
-Returns:       the number of the first that is set, 
-               or the number of the last one if none are set, 
-               or a negative number on error 
-*/ 
- 
+
+Returns:       the number of the first that is set,
+               or the number of the last one if none are set,
+               or a negative number on error
+*/
+
 #if defined COMPILE_PCRE8
-static int 
+static int
 get_first_set(const pcre *code, const char *stringname, int *ovector,
   int stringcount)
 #elif defined COMPILE_PCRE16
@@ -270,12 +270,12 @@ static int
 get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
   int stringcount)
 #endif
-{ 
+{
 const REAL_PCRE *re = (const REAL_PCRE *)code;
-int entrysize; 
+int entrysize;
 pcre_uchar *entry;
 #if defined COMPILE_PCRE8
-char *first, *last; 
+char *first, *last;
 #elif defined COMPILE_PCRE16
 PCRE_UCHAR16 *first, *last;
 #elif defined COMPILE_PCRE32
@@ -283,9 +283,9 @@ PCRE_UCHAR32 *first, *last;
 #endif
 
 #if defined COMPILE_PCRE8
-if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) 
-  return pcre_get_stringnumber(code, stringname); 
-entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); 
+if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+  return pcre_get_stringnumber(code, stringname);
+entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
 #elif defined COMPILE_PCRE16
 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
   return pcre16_get_stringnumber(code, stringname);
@@ -295,49 +295,49 @@ if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
   return pcre32_get_stringnumber(code, stringname);
 entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last);
 #endif
-if (entrysize <= 0) return entrysize; 
+if (entrysize <= 0) return entrysize;
 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
-  { 
+  {
   int n = GET2(entry, 0);
   if (n < stringcount && ovector[n*2] >= 0) return n;
-  } 
+  }
 return GET2(entry, 0);
-} 
- 
- 
- 
- 
-/************************************************* 
-*      Copy captured string to given buffer      * 
-*************************************************/ 
- 
-/* This function copies a single captured substring into a given buffer. 
-Note that we use memcpy() rather than strncpy() in case there are binary zeros 
-in the string. 
- 
-Arguments: 
-  subject        the subject string that was matched 
-  ovector        pointer to the offsets table 
-  stringcount    the number of substrings that were captured 
-                   (i.e. the yield of the pcre_exec call, unless 
-                   that was zero, in which case it should be 1/3 
-                   of the offset table size) 
-  stringnumber   the number of the required substring 
-  buffer         where to put the substring 
-  size           the size of the buffer 
- 
-Returns:         if successful: 
-                   the length of the copied string, not including the zero 
-                   that is put on the end; can be zero 
-                 if not successful: 
-                   PCRE_ERROR_NOMEMORY (-6) buffer too small 
-                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 
-*/ 
- 
+}
+
+
+
+
+/*************************************************
+*      Copy captured string to given buffer      *
+*************************************************/
+
+/* This function copies a single captured substring into a given buffer.
+Note that we use memcpy() rather than strncpy() in case there are binary zeros
+in the string.
+
+Arguments:
+  subject        the subject string that was matched
+  ovector        pointer to the offsets table
+  stringcount    the number of substrings that were captured
+                   (i.e. the yield of the pcre_exec call, unless
+                   that was zero, in which case it should be 1/3
+                   of the offset table size)
+  stringnumber   the number of the required substring
+  buffer         where to put the substring
+  size           the size of the buffer
+
+Returns:         if successful:
+                   the length of the copied string, not including the zero
+                   that is put on the end; can be zero
+                 if not successful:
+                   PCRE_ERROR_NOMEMORY (-6) buffer too small
+                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_copy_substring(const char *subject, int *ovector, int stringcount, 
-  int stringnumber, char *buffer, int size) 
+pcre_copy_substring(const char *subject, int *ovector, int stringcount,
+  int stringnumber, char *buffer, int size)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
@@ -347,48 +347,48 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
   int stringnumber, PCRE_UCHAR32 *buffer, int size)
 #endif
-{ 
-int yield; 
-if (stringnumber < 0 || stringnumber >= stringcount) 
-  return PCRE_ERROR_NOSUBSTRING; 
-stringnumber *= 2; 
-yield = ovector[stringnumber+1] - ovector[stringnumber]; 
-if (size < yield + 1) return PCRE_ERROR_NOMEMORY; 
+{
+int yield;
+if (stringnumber < 0 || stringnumber >= stringcount)
+  return PCRE_ERROR_NOSUBSTRING;
+stringnumber *= 2;
+yield = ovector[stringnumber+1] - ovector[stringnumber];
+if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
 memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
-buffer[yield] = 0; 
-return yield; 
-} 
- 
- 
- 
-/************************************************* 
-*   Copy named captured string to given buffer   * 
-*************************************************/ 
- 
-/* This function copies a single captured substring into a given buffer, 
-identifying it by name. If the regex permits duplicate names, the first 
-substring that is set is chosen. 
- 
-Arguments: 
-  code           the compiled regex 
-  subject        the subject string that was matched 
-  ovector        pointer to the offsets table 
-  stringcount    the number of substrings that were captured 
-                   (i.e. the yield of the pcre_exec call, unless 
-                   that was zero, in which case it should be 1/3 
-                   of the offset table size) 
-  stringname     the name of the required substring 
-  buffer         where to put the substring 
-  size           the size of the buffer 
- 
-Returns:         if successful: 
-                   the length of the copied string, not including the zero 
-                   that is put on the end; can be zero 
-                 if not successful: 
-                   PCRE_ERROR_NOMEMORY (-6) buffer too small 
-                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 
-*/ 
- 
+buffer[yield] = 0;
+return yield;
+}
+
+
+
+/*************************************************
+*   Copy named captured string to given buffer   *
+*************************************************/
+
+/* This function copies a single captured substring into a given buffer,
+identifying it by name. If the regex permits duplicate names, the first
+substring that is set is chosen.
+
+Arguments:
+  code           the compiled regex
+  subject        the subject string that was matched
+  ovector        pointer to the offsets table
+  stringcount    the number of substrings that were captured
+                   (i.e. the yield of the pcre_exec call, unless
+                   that was zero, in which case it should be 1/3
+                   of the offset table size)
+  stringname     the name of the required substring
+  buffer         where to put the substring
+  size           the size of the buffer
+
+Returns:         if successful:
+                   the length of the copied string, not including the zero
+                   that is put on the end; can be zero
+                 if not successful:
+                   PCRE_ERROR_NOMEMORY (-6) buffer too small
+                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_named_substring(const pcre *code, const char *subject,
@@ -405,45 +405,45 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
   int *ovector, int stringcount, PCRE_SPTR32 stringname,
   PCRE_UCHAR32 *buffer, int size)
 #endif
-{ 
+{
 int n = get_first_set(code, stringname, ovector, stringcount);
-if (n <= 0) return n; 
+if (n <= 0) return n;
 #if defined COMPILE_PCRE8
-return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); 
+return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
 #elif defined COMPILE_PCRE16
 return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
 #elif defined COMPILE_PCRE32
 return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size);
 #endif
-} 
- 
- 
- 
-/************************************************* 
-*      Copy all captured strings to new store    * 
-*************************************************/ 
- 
-/* This function gets one chunk of store and builds a list of pointers and all 
-of the captured substrings in it. A NULL pointer is put on the end of the list. 
- 
-Arguments: 
-  subject        the subject string that was matched 
-  ovector        pointer to the offsets table 
-  stringcount    the number of substrings that were captured 
-                   (i.e. the yield of the pcre_exec call, unless 
-                   that was zero, in which case it should be 1/3 
-                   of the offset table size) 
-  listptr        set to point to the list of pointers 
- 
-Returns:         if successful: 0 
-                 if not successful: 
-                   PCRE_ERROR_NOMEMORY (-6) failed to get store 
-*/ 
- 
+}
+
+
+
+/*************************************************
+*      Copy all captured strings to new store    *
+*************************************************/
+
+/* This function gets one chunk of store and builds a list of pointers and all
+of the captured substrings in it. A NULL pointer is put on the end of the list.
+
+Arguments:
+  subject        the subject string that was matched
+  ovector        pointer to the offsets table
+  stringcount    the number of substrings that were captured
+                   (i.e. the yield of the pcre_exec call, unless
+                   that was zero, in which case it should be 1/3
+                   of the offset table size)
+  listptr        set to point to the list of pointers
+
+Returns:         if successful: 0
+                 if not successful:
+                   PCRE_ERROR_NOMEMORY (-6) failed to get store
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_get_substring_list(const char *subject, int *ovector, int stringcount, 
-  const char ***listptr) 
+pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
+  const char ***listptr)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
@@ -453,61 +453,61 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount,
   PCRE_SPTR32 **listptr)
 #endif
-{ 
-int i; 
+{
+int i;
 int size = sizeof(pcre_uchar *);
-int double_count = stringcount * 2; 
+int double_count = stringcount * 2;
 pcre_uchar **stringlist;
 pcre_uchar *p;
- 
-for (i = 0; i < double_count; i += 2) 
+
+for (i = 0; i < double_count; i += 2)
   {
   size += sizeof(pcre_uchar *) + IN_UCHARS(1);
   if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
   }
- 
+
 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
-if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; 
- 
+if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
+
 #if defined COMPILE_PCRE8
-*listptr = (const char **)stringlist; 
+*listptr = (const char **)stringlist;
 #elif defined COMPILE_PCRE16
 *listptr = (PCRE_SPTR16 *)stringlist;
 #elif defined COMPILE_PCRE32
 *listptr = (PCRE_SPTR32 *)stringlist;
 #endif
 p = (pcre_uchar *)(stringlist + stringcount + 1);
- 
-for (i = 0; i < double_count; i += 2) 
-  { 
+
+for (i = 0; i < double_count; i += 2)
+  {
   int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
   memcpy(p, subject + ovector[i], IN_UCHARS(len));
-  *stringlist++ = p; 
-  p += len; 
-  *p++ = 0; 
-  } 
- 
-*stringlist = NULL; 
-return 0; 
-} 
- 
- 
- 
-/************************************************* 
-*   Free store obtained by get_substring_list    * 
-*************************************************/ 
- 
-/* This function exists for the benefit of people calling PCRE from non-C 
+  *stringlist++ = p;
+  p += len;
+  *p++ = 0;
+  }
+
+*stringlist = NULL;
+return 0;
+}
+
+
+
+/*************************************************
+*   Free store obtained by get_substring_list    *
+*************************************************/
+
+/* This function exists for the benefit of people calling PCRE from non-C
 programs that can call its functions, but not free() or (PUBL(free))()
 directly.
- 
-Argument:   the result of a previous pcre_get_substring_list() 
-Returns:    nothing 
-*/ 
- 
+
+Argument:   the result of a previous pcre_get_substring_list()
+Returns:    nothing
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
-pcre_free_substring_list(const char **pointer) 
+pcre_free_substring_list(const char **pointer)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre16_free_substring_list(PCRE_SPTR16 *pointer)
@@ -515,41 +515,41 @@ pcre16_free_substring_list(PCRE_SPTR16 *pointer)
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre32_free_substring_list(PCRE_SPTR32 *pointer)
 #endif
-{ 
+{
 (PUBL(free))((void *)pointer);
-} 
- 
- 
- 
-/************************************************* 
-*      Copy captured string to new store         * 
-*************************************************/ 
- 
-/* This function copies a single captured substring into a piece of new 
-store 
- 
-Arguments: 
-  subject        the subject string that was matched 
-  ovector        pointer to the offsets table 
-  stringcount    the number of substrings that were captured 
-                   (i.e. the yield of the pcre_exec call, unless 
-                   that was zero, in which case it should be 1/3 
-                   of the offset table size) 
-  stringnumber   the number of the required substring 
-  stringptr      where to put a pointer to the substring 
- 
-Returns:         if successful: 
-                   the length of the string, not including the zero that 
-                   is put on the end; can be zero 
-                 if not successful: 
-                   PCRE_ERROR_NOMEMORY (-6) failed to get store 
-                   PCRE_ERROR_NOSUBSTRING (-7) substring not present 
-*/ 
- 
+}
+
+
+
+/*************************************************
+*      Copy captured string to new store         *
+*************************************************/
+
+/* This function copies a single captured substring into a piece of new
+store
+
+Arguments:
+  subject        the subject string that was matched
+  ovector        pointer to the offsets table
+  stringcount    the number of substrings that were captured
+                   (i.e. the yield of the pcre_exec call, unless
+                   that was zero, in which case it should be 1/3
+                   of the offset table size)
+  stringnumber   the number of the required substring
+  stringptr      where to put a pointer to the substring
+
+Returns:         if successful:
+                   the length of the string, not including the zero that
+                   is put on the end; can be zero
+                 if not successful:
+                   PCRE_ERROR_NOMEMORY (-6) failed to get store
+                   PCRE_ERROR_NOSUBSTRING (-7) substring not present
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_get_substring(const char *subject, int *ovector, int stringcount, 
-  int stringnumber, const char **stringptr) 
+pcre_get_substring(const char *subject, int *ovector, int stringcount,
+  int stringnumber, const char **stringptr)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
@@ -559,17 +559,17 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
   int stringnumber, PCRE_SPTR32 *stringptr)
 #endif
-{ 
-int yield; 
+{
+int yield;
 pcre_uchar *substring;
-if (stringnumber < 0 || stringnumber >= stringcount) 
-  return PCRE_ERROR_NOSUBSTRING; 
-stringnumber *= 2; 
-yield = ovector[stringnumber+1] - ovector[stringnumber]; 
+if (stringnumber < 0 || stringnumber >= stringcount)
+  return PCRE_ERROR_NOSUBSTRING;
+stringnumber *= 2;
+yield = ovector[stringnumber+1] - ovector[stringnumber];
 substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
-if (substring == NULL) return PCRE_ERROR_NOMEMORY; 
+if (substring == NULL) return PCRE_ERROR_NOMEMORY;
 memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
-substring[yield] = 0; 
+substring[yield] = 0;
 #if defined COMPILE_PCRE8
 *stringptr = (const char *)substring;
 #elif defined COMPILE_PCRE16
@@ -577,38 +577,38 @@ substring[yield] = 0;
 #elif defined COMPILE_PCRE32
 *stringptr = (PCRE_SPTR32)substring;
 #endif
-return yield; 
-} 
- 
- 
- 
-/************************************************* 
-*   Copy named captured string to new store      * 
-*************************************************/ 
- 
-/* This function copies a single captured substring, identified by name, into 
-new store. If the regex permits duplicate names, the first substring that is 
-set is chosen. 
- 
-Arguments: 
-  code           the compiled regex 
-  subject        the subject string that was matched 
-  ovector        pointer to the offsets table 
-  stringcount    the number of substrings that were captured 
-                   (i.e. the yield of the pcre_exec call, unless 
-                   that was zero, in which case it should be 1/3 
-                   of the offset table size) 
-  stringname     the name of the required substring 
-  stringptr      where to put the pointer 
- 
-Returns:         if successful: 
-                   the length of the copied string, not including the zero 
-                   that is put on the end; can be zero 
-                 if not successful: 
-                   PCRE_ERROR_NOMEMORY (-6) couldn't get memory 
-                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 
-*/ 
- 
+return yield;
+}
+
+
+
+/*************************************************
+*   Copy named captured string to new store      *
+*************************************************/
+
+/* This function copies a single captured substring, identified by name, into
+new store. If the regex permits duplicate names, the first substring that is
+set is chosen.
+
+Arguments:
+  code           the compiled regex
+  subject        the subject string that was matched
+  ovector        pointer to the offsets table
+  stringcount    the number of substrings that were captured
+                   (i.e. the yield of the pcre_exec call, unless
+                   that was zero, in which case it should be 1/3
+                   of the offset table size)
+  stringname     the name of the required substring
+  stringptr      where to put the pointer
+
+Returns:         if successful:
+                   the length of the copied string, not including the zero
+                   that is put on the end; can be zero
+                 if not successful:
+                   PCRE_ERROR_NOMEMORY (-6) couldn't get memory
+                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_named_substring(const pcre *code, const char *subject,
@@ -625,36 +625,36 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
   int *ovector, int stringcount, PCRE_SPTR32 stringname,
   PCRE_SPTR32 *stringptr)
 #endif
-{ 
+{
 int n = get_first_set(code, stringname, ovector, stringcount);
-if (n <= 0) return n; 
+if (n <= 0) return n;
 #if defined COMPILE_PCRE8
-return pcre_get_substring(subject, ovector, stringcount, n, stringptr); 
+return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
 #elif defined COMPILE_PCRE16
 return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
 #elif defined COMPILE_PCRE32
 return pcre32_get_substring(subject, ovector, stringcount, n, stringptr);
 #endif
-} 
- 
- 
- 
- 
-/************************************************* 
-*       Free store obtained by get_substring     * 
-*************************************************/ 
- 
-/* This function exists for the benefit of people calling PCRE from non-C 
+}
+
+
+
+
+/*************************************************
+*       Free store obtained by get_substring     *
+*************************************************/
+
+/* This function exists for the benefit of people calling PCRE from non-C
 programs that can call its functions, but not free() or (PUBL(free))()
 directly.
- 
-Argument:   the result of a previous pcre_get_substring() 
-Returns:    nothing 
-*/ 
- 
+
+Argument:   the result of a previous pcre_get_substring()
+Returns:    nothing
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
-pcre_free_substring(const char *pointer) 
+pcre_free_substring(const char *pointer)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre16_free_substring(PCRE_SPTR16 pointer)
@@ -662,8 +662,8 @@ pcre16_free_substring(PCRE_SPTR16 pointer)
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre32_free_substring(PCRE_SPTR32 pointer)
 #endif
-{ 
+{
 (PUBL(free))((void *)pointer);
-} 
- 
-/* End of pcre_get.c */ 
+}
+
+/* End of pcre_get.c */
diff --git a/contrib/libs/pcre/pcre_globals.c b/contrib/libs/pcre/pcre_globals.c
index 21a2fe9de9..b0418be597 100644
--- a/contrib/libs/pcre/pcre_globals.c
+++ b/contrib/libs/pcre/pcre_globals.c
@@ -1,50 +1,50 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2014 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains global variables that are exported by the PCRE library. 
-PCRE is thread-clean and doesn't use any global variables in the normal sense. 
-However, it calls memory allocation and freeing functions via the four 
-indirections below, and it can optionally do callouts, using the fifth 
-indirection. These values can be changed by the caller, but are shared between 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains global variables that are exported by the PCRE library.
+PCRE is thread-clean and doesn't use any global variables in the normal sense.
+However, it calls memory allocation and freeing functions via the four
+indirections below, and it can optionally do callouts, using the fifth
+indirection. These values can be changed by the caller, but are shared between
 all threads.
- 
+
 For MS Visual Studio and Symbian OS, there are problems in initializing these
 variables to non-local functions. In these cases, therefore, an indirection via
 a local function is used.
@@ -52,12 +52,12 @@ a local function is used.
 Also, when compiling for Virtual Pascal, things are done differently, and
 global variables are not used. */
 
-#ifdef HAVE_CONFIG_H 
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
+#endif
+
+#include "pcre_internal.h"
+
 #if defined _MSC_VER || defined  __SYMBIAN32__
 static void* LocalPcreMalloc(size_t aSize)
   {
@@ -81,6 +81,6 @@ PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
 PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = free;
 PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;
 PCRE_EXP_DATA_DEFN int   (*PUBL(stack_guard))(void) = NULL;
-#endif 
- 
-/* End of pcre_globals.c */ 
+#endif
+
+/* End of pcre_globals.c */
diff --git a/contrib/libs/pcre/pcre_internal.h b/contrib/libs/pcre/pcre_internal.h
index 7d7d2c4bac..97ff55d03b 100644
--- a/contrib/libs/pcre/pcre_internal.h
+++ b/contrib/libs/pcre/pcre_internal.h
@@ -1,57 +1,57 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2016 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
-/* This header contains definitions that are shared between the different 
-modules, but which are not relevant to the exported API. This includes some 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This header contains definitions that are shared between the different
+modules, but which are not relevant to the exported API. This includes some
 functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_"
 depending on the PRIV macro. */
- 
-#ifndef PCRE_INTERNAL_H 
-#define PCRE_INTERNAL_H 
- 
+
+#ifndef PCRE_INTERNAL_H
+#define PCRE_INTERNAL_H
+
 /* Define PCRE_DEBUG to get debugging output on stdout. */
- 
-#if 0 
+
+#if 0
 #define PCRE_DEBUG
-#endif 
- 
+#endif
+
 /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
 
 #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32
@@ -86,66 +86,66 @@ script prevents both being selected, but not everybody uses "configure". */
 #error The use of both EBCDIC and SUPPORT_UTF is not supported.
 #endif
 
-/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef 
-inline, and there are *still* stupid compilers about that don't like indented 
-pre-processor statements, or at least there were when I first wrote this. After 
-all, it had only been about 10 years then... 
- 
-It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so 
-be absolutely sure we get our version. */ 
- 
-#undef DPRINTF 
+/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
+inline, and there are *still* stupid compilers about that don't like indented
+pre-processor statements, or at least there were when I first wrote this. After
+all, it had only been about 10 years then...
+
+It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
+be absolutely sure we get our version. */
+
+#undef DPRINTF
 #ifdef PCRE_DEBUG
-#define DPRINTF(p) printf p 
-#else 
-#define DPRINTF(p) /* Nothing */ 
-#endif 
- 
- 
-/* Standard C headers plus the external interface definition. The only time 
-setjmp and stdarg are used is when NO_RECURSE is set. */ 
- 
-#include <ctype.h> 
-#include <limits.h> 
-#include <stddef.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
+#define DPRINTF(p) printf p
+#else
+#define DPRINTF(p) /* Nothing */
+#endif
+
+
+/* Standard C headers plus the external interface definition. The only time
+setjmp and stdarg are used is when NO_RECURSE is set. */
+
+#include <ctype.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
 /* Valgrind (memcheck) support */
 
 #ifdef SUPPORT_VALGRIND
 #include <valgrind/memcheck.h>
 #endif
 
-/* When compiling a DLL for Windows, the exported symbols have to be declared 
-using some MS magic. I found some useful information on this web page: 
-http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the 
-information there, using __declspec(dllexport) without "extern" we have a 
-definition; with "extern" we have a declaration. The settings here override the 
-setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, 
-which is all that is needed for applications (they just import the symbols). We 
-use: 
- 
-  PCRE_EXP_DECL       for declarations 
-  PCRE_EXP_DEFN       for definitions of exported functions 
-  PCRE_EXP_DATA_DEFN  for definitions of exported variables 
- 
-The reason for the two DEFN macros is that in non-Windows environments, one 
-does not want to have "extern" before variable definitions because it leads to 
-compiler warnings. So we distinguish between functions and variables. In 
-Windows, the two should always be the same. 
- 
-The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, 
-which is an application, but needs to import this file in order to "peek" at 
-internals, can #include pcre.h first to get an application's-eye view. 
- 
-In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, 
-special-purpose environments) might want to stick other stuff in front of 
-exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and 
-PCRE_EXP_DATA_DEFN only if they are not already set. */ 
- 
-#ifndef PCRE_EXP_DECL 
+/* When compiling a DLL for Windows, the exported symbols have to be declared
+using some MS magic. I found some useful information on this web page:
+http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
+information there, using __declspec(dllexport) without "extern" we have a
+definition; with "extern" we have a declaration. The settings here override the
+setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
+which is all that is needed for applications (they just import the symbols). We
+use:
+
+  PCRE_EXP_DECL       for declarations
+  PCRE_EXP_DEFN       for definitions of exported functions
+  PCRE_EXP_DATA_DEFN  for definitions of exported variables
+
+The reason for the two DEFN macros is that in non-Windows environments, one
+does not want to have "extern" before variable definitions because it leads to
+compiler warnings. So we distinguish between functions and variables. In
+Windows, the two should always be the same.
+
+The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
+which is an application, but needs to import this file in order to "peek" at
+internals, can #include pcre.h first to get an application's-eye view.
+
+In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
+special-purpose environments) might want to stick other stuff in front of
+exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
+PCRE_EXP_DATA_DEFN only if they are not already set. */
+
+#ifndef PCRE_EXP_DECL
 #  ifdef _WIN32
 #    ifndef PCRE_STATIC
 #      define PCRE_EXP_DECL       extern __declspec(dllexport)
@@ -169,8 +169,8 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
 #      define PCRE_EXP_DATA_DEFN
 #    endif
 #  endif
-#endif 
- 
+#endif
+
 /* When compiling with the MSVC compiler, it is sometimes necessary to include
 a "calling convention" before exported function names. (This is secondhand
 information; I know nothing about MSVC myself). For example, something like
@@ -186,27 +186,27 @@ set, we ensure here that it has no effect. */
 #endif
 
 /* We need to have types that specify unsigned 8, 16 and 32-bit integers. We
-cannot determine these outside the compilation (e.g. by running a program as 
-part of "configure") because PCRE is often cross-compiled for use on other 
-systems. Instead we make use of the maximum sizes that are available at 
-preprocessor time in standard C environments. */ 
- 
+cannot determine these outside the compilation (e.g. by running a program as
+part of "configure") because PCRE is often cross-compiled for use on other
+systems. Instead we make use of the maximum sizes that are available at
+preprocessor time in standard C environments. */
+
 typedef unsigned char pcre_uint8;
 
-#if USHRT_MAX == 65535 
+#if USHRT_MAX == 65535
 typedef unsigned short pcre_uint16;
 typedef short pcre_int16;
 #define PCRE_UINT16_MAX USHRT_MAX
 #define PCRE_INT16_MAX SHRT_MAX
-#elif UINT_MAX == 65535 
+#elif UINT_MAX == 65535
 typedef unsigned int pcre_uint16;
 typedef int pcre_int16;
 #define PCRE_UINT16_MAX UINT_MAX
 #define PCRE_INT16_MAX INT_MAX
-#else 
+#else
 #error Cannot determine a type for 16-bit integers
-#endif 
- 
+#endif
+
 #if UINT_MAX == 4294967295U
 typedef unsigned int pcre_uint32;
 typedef int pcre_int32;
@@ -217,10 +217,10 @@ typedef unsigned long int pcre_uint32;
 typedef long int pcre_int32;
 #define PCRE_UINT32_MAX ULONG_MAX
 #define PCRE_INT32_MAX LONG_MAX
-#else 
+#else
 #error Cannot determine a type for 32-bit integers
-#endif 
- 
+#endif
+
 /* When checking for integer overflow in pcre_compile(), we need to handle
 large integers. If a 64-bit integer type is available, we can use that.
 Otherwise we have to cast to double, which of course requires floating point
@@ -241,16 +241,16 @@ by "configure". */
 #define INT64_OR_DOUBLE double
 #endif
 
-/* All character handling must be done as unsigned characters. Otherwise there 
-are problems with top-bit-set characters and functions such as isspace(). 
+/* All character handling must be done as unsigned characters. Otherwise there
+are problems with top-bit-set characters and functions such as isspace().
 However, we leave the interface to the outside world as char * or short *,
 because that should make things easier for callers. This character type is
 called pcre_uchar.
- 
+
 The IN_UCHARS macro multiply its argument with the byte size of the current
 pcre_uchar type. Useful for memcpy and such operations, whose require the
 byte size of their input/output buffers.
- 
+
 The MAX_255 macro checks whether its pcre_uchar input is less than 256.
 
 The TABLE_GET macro is designed for accessing elements of tables whose contain
@@ -291,182 +291,182 @@ typedef pcre_uint32 pcre_uchar;
 #error Unsupported compiling mode
 #endif /* COMPILE_PCRE[8|16|32] */
 
-/* This is an unsigned int value that no character can ever have. UTF-8 
-characters only go up to 0x7fffffff (though Unicode doesn't go beyond 
-0x0010ffff). */ 
- 
-#define NOTACHAR 0xffffffff 
- 
-/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, 
-"any" and "anycrlf" at present). The following macros are used to package up 
-testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various 
-modules to indicate in which datablock the parameters exist, and what the 
-start/end of string field names are. */ 
- 
-#define NLTYPE_FIXED    0     /* Newline is a fixed length string */ 
-#define NLTYPE_ANY      1     /* Newline is any Unicode line ending */ 
-#define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */ 
- 
-/* This macro checks for a newline at the given position */ 
- 
-#define IS_NEWLINE(p) \ 
-  ((NLBLOCK->nltype != NLTYPE_FIXED)? \ 
-    ((p) < NLBLOCK->PSEND && \ 
+/* This is an unsigned int value that no character can ever have. UTF-8
+characters only go up to 0x7fffffff (though Unicode doesn't go beyond
+0x0010ffff). */
+
+#define NOTACHAR 0xffffffff
+
+/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
+"any" and "anycrlf" at present). The following macros are used to package up
+testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
+modules to indicate in which datablock the parameters exist, and what the
+start/end of string field names are. */
+
+#define NLTYPE_FIXED    0     /* Newline is a fixed length string */
+#define NLTYPE_ANY      1     /* Newline is any Unicode line ending */
+#define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */
+
+/* This macro checks for a newline at the given position */
+
+#define IS_NEWLINE(p) \
+  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
+    ((p) < NLBLOCK->PSEND && \
      PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
        &(NLBLOCK->nllen), utf)) \
-    : \ 
-    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ 
+    : \
+    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
      UCHAR21TEST(p) == NLBLOCK->nl[0] && \
      (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1])       \
-    ) \ 
-  ) 
- 
-/* This macro checks for a newline immediately preceding the given position */ 
- 
-#define WAS_NEWLINE(p) \ 
-  ((NLBLOCK->nltype != NLTYPE_FIXED)? \ 
-    ((p) > NLBLOCK->PSSTART && \ 
+    ) \
+  )
+
+/* This macro checks for a newline immediately preceding the given position */
+
+#define WAS_NEWLINE(p) \
+  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
+    ((p) > NLBLOCK->PSSTART && \
      PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
        &(NLBLOCK->nllen), utf)) \
-    : \ 
-    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ 
+    : \
+    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
      UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
      (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
-    ) \ 
-  ) 
- 
-/* When PCRE is compiled as a C++ library, the subject pointer can be replaced 
-with a custom type. This makes it possible, for example, to allow pcre_exec() 
-to process subject strings that are discontinuous by using a smart pointer 
-class. It must always be possible to inspect all of the subject string in 
-pcre_exec() because of the way it backtracks. Two macros are required in the 
-normal case, for sign-unspecified and unsigned char pointers. The former is 
-used for the external interface and appears in pcre.h, which is why its name 
-must begin with PCRE_. */ 
- 
-#ifdef CUSTOM_SUBJECT_PTR 
+    ) \
+  )
+
+/* When PCRE is compiled as a C++ library, the subject pointer can be replaced
+with a custom type. This makes it possible, for example, to allow pcre_exec()
+to process subject strings that are discontinuous by using a smart pointer
+class. It must always be possible to inspect all of the subject string in
+pcre_exec() because of the way it backtracks. Two macros are required in the
+normal case, for sign-unspecified and unsigned char pointers. The former is
+used for the external interface and appears in pcre.h, which is why its name
+must begin with PCRE_. */
+
+#ifdef CUSTOM_SUBJECT_PTR
 #define PCRE_PUCHAR CUSTOM_SUBJECT_PTR
-#else 
+#else
 #define PCRE_PUCHAR const pcre_uchar *
-#endif 
- 
-/* Include the public PCRE header and the definitions of UCP character property 
-values. */ 
- 
-#include "pcre.h" 
-#include "ucp.h" 
- 
+#endif
+
+/* Include the public PCRE header and the definitions of UCP character property
+values. */
+
+#include "pcre.h"
+#include "ucp.h"
+
 #ifdef COMPILE_PCRE32
 /* Assert that the public PCRE_UCHAR32 is a 32-bit type */
 typedef int __assert_pcre_uchar32_size[sizeof(PCRE_UCHAR32) == 4 ? 1 : -1];
 #endif
 
-/* When compiling for use with the Virtual Pascal compiler, these functions 
-need to have their names changed. PCRE must be compiled with the -DVPCOMPAT 
-option on the command line. */ 
- 
-#ifdef VPCOMPAT 
-#define strlen(s)        _strlen(s) 
-#define strncmp(s1,s2,m) _strncmp(s1,s2,m) 
-#define memcmp(s,c,n)    _memcmp(s,c,n) 
-#define memcpy(d,s,n)    _memcpy(d,s,n) 
-#define memmove(d,s,n)   _memmove(d,s,n) 
-#define memset(s,c,n)    _memset(s,c,n) 
-#else  /* VPCOMPAT */ 
- 
-/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), 
-define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY 
-is set. Otherwise, include an emulating function for those systems that have 
-neither (there some non-Unix environments where this is the case). */ 
- 
-#ifndef HAVE_MEMMOVE 
-#undef  memmove        /* some systems may have a macro */ 
-#ifdef HAVE_BCOPY 
-#define memmove(a, b, c) bcopy(b, a, c) 
-#else  /* HAVE_BCOPY */ 
-static void * 
-pcre_memmove(void *d, const void *s, size_t n) 
-{ 
-size_t i; 
-unsigned char *dest = (unsigned char *)d; 
-const unsigned char *src = (const unsigned char *)s; 
-if (dest > src) 
-  { 
-  dest += n; 
-  src += n; 
-  for (i = 0; i < n; ++i) *(--dest) = *(--src); 
-  return (void *)dest; 
-  } 
-else 
-  { 
-  for (i = 0; i < n; ++i) *dest++ = *src++; 
-  return (void *)(dest - n); 
-  } 
-} 
-#define memmove(a, b, c) pcre_memmove(a, b, c) 
-#endif   /* not HAVE_BCOPY */ 
-#endif   /* not HAVE_MEMMOVE */ 
-#endif   /* not VPCOMPAT */ 
- 
- 
-/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored 
-in big-endian order) by default. These are used, for example, to link from the 
-start of a subpattern to its alternatives and its end. The use of 2 bytes per 
-offset limits the size of the compiled regex to around 64K, which is big enough 
-for almost everybody. However, I received a request for an even bigger limit. 
-For this reason, and also to make the code easier to maintain, the storing and 
-loading of offsets from the byte string is now handled by the macros that are 
-defined here. 
- 
-The macros are controlled by the value of LINK_SIZE. This defaults to 2 in 
-the config.h file, but can be overridden by using -D on the command line. This 
-is automated on Unix systems via the "configure" command. */ 
- 
+/* When compiling for use with the Virtual Pascal compiler, these functions
+need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
+option on the command line. */
+
+#ifdef VPCOMPAT
+#define strlen(s)        _strlen(s)
+#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
+#define memcmp(s,c,n)    _memcmp(s,c,n)
+#define memcpy(d,s,n)    _memcpy(d,s,n)
+#define memmove(d,s,n)   _memmove(d,s,n)
+#define memset(s,c,n)    _memset(s,c,n)
+#else  /* VPCOMPAT */
+
+/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
+define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
+is set. Otherwise, include an emulating function for those systems that have
+neither (there some non-Unix environments where this is the case). */
+
+#ifndef HAVE_MEMMOVE
+#undef  memmove        /* some systems may have a macro */
+#ifdef HAVE_BCOPY
+#define memmove(a, b, c) bcopy(b, a, c)
+#else  /* HAVE_BCOPY */
+static void *
+pcre_memmove(void *d, const void *s, size_t n)
+{
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+  {
+  dest += n;
+  src += n;
+  for (i = 0; i < n; ++i) *(--dest) = *(--src);
+  return (void *)dest;
+  }
+else
+  {
+  for (i = 0; i < n; ++i) *dest++ = *src++;
+  return (void *)(dest - n);
+  }
+}
+#define memmove(a, b, c) pcre_memmove(a, b, c)
+#endif   /* not HAVE_BCOPY */
+#endif   /* not HAVE_MEMMOVE */
+#endif   /* not VPCOMPAT */
+
+
+/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
+in big-endian order) by default. These are used, for example, to link from the
+start of a subpattern to its alternatives and its end. The use of 2 bytes per
+offset limits the size of the compiled regex to around 64K, which is big enough
+for almost everybody. However, I received a request for an even bigger limit.
+For this reason, and also to make the code easier to maintain, the storing and
+loading of offsets from the byte string is now handled by the macros that are
+defined here.
+
+The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
+the config.h file, but can be overridden by using -D on the command line. This
+is automated on Unix systems via the "configure" command. */
+
 #if defined COMPILE_PCRE8
 
-#if LINK_SIZE == 2 
- 
-#define PUT(a,n,d)   \ 
-  (a[n] = (d) >> 8), \ 
-  (a[(n)+1] = (d) & 255) 
- 
-#define GET(a,n) \ 
-  (((a)[n] << 8) | (a)[(n)+1]) 
- 
-#define MAX_PATTERN_SIZE (1 << 16) 
- 
- 
-#elif LINK_SIZE == 3 
- 
-#define PUT(a,n,d)       \ 
-  (a[n] = (d) >> 16),    \ 
-  (a[(n)+1] = (d) >> 8), \ 
-  (a[(n)+2] = (d) & 255) 
- 
-#define GET(a,n) \ 
-  (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) 
- 
-#define MAX_PATTERN_SIZE (1 << 24) 
- 
- 
-#elif LINK_SIZE == 4 
- 
-#define PUT(a,n,d)        \ 
-  (a[n] = (d) >> 24),     \ 
-  (a[(n)+1] = (d) >> 16), \ 
-  (a[(n)+2] = (d) >> 8),  \ 
-  (a[(n)+3] = (d) & 255) 
- 
-#define GET(a,n) \ 
-  (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) 
- 
+#if LINK_SIZE == 2
+
+#define PUT(a,n,d)   \
+  (a[n] = (d) >> 8), \
+  (a[(n)+1] = (d) & 255)
+
+#define GET(a,n) \
+  (((a)[n] << 8) | (a)[(n)+1])
+
+#define MAX_PATTERN_SIZE (1 << 16)
+
+
+#elif LINK_SIZE == 3
+
+#define PUT(a,n,d)       \
+  (a[n] = (d) >> 16),    \
+  (a[(n)+1] = (d) >> 8), \
+  (a[(n)+2] = (d) & 255)
+
+#define GET(a,n) \
+  (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
+
+#define MAX_PATTERN_SIZE (1 << 24)
+
+
+#elif LINK_SIZE == 4
+
+#define PUT(a,n,d)        \
+  (a[n] = (d) >> 24),     \
+  (a[(n)+1] = (d) >> 16), \
+  (a[(n)+2] = (d) >> 8),  \
+  (a[(n)+3] = (d) & 255)
+
+#define GET(a,n) \
+  (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
+
 /* Keep it positive */
 #define MAX_PATTERN_SIZE (1 << 30)
- 
+
 #else
 #error LINK_SIZE must be either 2, 3, or 4
 #endif
- 
+
 #elif defined COMPILE_PCRE16
 
 #if LINK_SIZE == 2
@@ -499,12 +499,12 @@ is automated on Unix systems via the "configure" command. */
 /* Keep it positive */
 #define MAX_PATTERN_SIZE (1 << 30)
 
-#else 
-#error LINK_SIZE must be either 2, 3, or 4 
-#endif 
- 
+#else
+#error LINK_SIZE must be either 2, 3, or 4
+#endif
+
 #elif defined COMPILE_PCRE32
- 
+
 /* Only supported LINK_SIZE is 4 */
 /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
 #undef LINK_SIZE
@@ -523,37 +523,37 @@ is automated on Unix systems via the "configure" command. */
 #error Unsupported compiling mode
 #endif /* COMPILE_PCRE[8|16|32] */
 
-/* Convenience macro defined in terms of the others */ 
- 
-#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE 
- 
- 
-/* PCRE uses some other 2-byte quantities that do not change when the size of 
-offsets changes. There are used for repeat counts and for other things such as 
-capturing parenthesis numbers in back references. */ 
- 
+/* Convenience macro defined in terms of the others */
+
+#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
+
+
+/* PCRE uses some other 2-byte quantities that do not change when the size of
+offsets changes. There are used for repeat counts and for other things such as
+capturing parenthesis numbers in back references. */
+
 #if defined COMPILE_PCRE8
 
 #define IMM2_SIZE 2
 
-#define PUT2(a,n,d)   \ 
-  a[n] = (d) >> 8; \ 
-  a[(n)+1] = (d) & 255 
- 
+#define PUT2(a,n,d)   \
+  a[n] = (d) >> 8; \
+  a[(n)+1] = (d) & 255
+
 /* For reasons that I do not understand, the expression in this GET2 macro is
 treated by gcc as a signed expression, even when a is declared as unsigned. It
 seems that any kind of arithmetic results in a signed value. */
 
-#define GET2(a,n) \ 
+#define GET2(a,n) \
   (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
- 
+
 #elif defined COMPILE_PCRE16
- 
+
 #define IMM2_SIZE 1
- 
+
 #define PUT2(a,n,d)   \
    a[n] = d
- 
+
 #define GET2(a,n) \
    a[n]
 
@@ -610,27 +610,27 @@ UTF support is omitted, we don't even define them. */
 /* #define HAS_EXTRALEN(c) */
 /* #define GET_EXTRALEN(c) */
 /* #define NOT_FIRSTCHAR(c) */
-#define GETCHAR(c, eptr) c = *eptr; 
-#define GETCHARTEST(c, eptr) c = *eptr; 
-#define GETCHARINC(c, eptr) c = *eptr++; 
-#define GETCHARINCTEST(c, eptr) c = *eptr++; 
-#define GETCHARLEN(c, eptr, len) c = *eptr; 
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARTEST(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define GETCHARINCTEST(c, eptr) c = *eptr++;
+#define GETCHARLEN(c, eptr, len) c = *eptr;
 /* #define GETCHARLENTEST(c, eptr, len) */
-/* #define BACKCHAR(eptr) */ 
+/* #define BACKCHAR(eptr) */
 /* #define FORWARDCHAR(eptr) */
 /* #define ACROSSCHAR(condition, eptr, action) */
- 
+
 #else   /* SUPPORT_UTF */
- 
+
 /* Tests whether the code point needs extra characters to decode. */
- 
+
 #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
- 
+
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer. */
- 
+
 #define GETUTF8(c, eptr) \
-    { \ 
+    { \
     if ((c & 0x20) == 0) \
       c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
     else if ((c & 0x10) == 0) \
@@ -656,10 +656,10 @@ the pointer. */
     if ((c & 0x20) == 0) \
       c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \
     else if ((c & 0x10) == 0) \
-      { \ 
+      { \
       c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \
       eptr += 2; \
-      } \ 
+      } \
     else if ((c & 0x08) == 0) \
       { \
       c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \
@@ -680,8 +680,8 @@ the pointer. */
           ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \
       eptr += 5; \
       } \
-    } 
- 
+    }
+
 #if defined COMPILE_PCRE8
 
 /* These macros were originally written in the form of loops that used data
@@ -714,37 +714,37 @@ we know we are in UTF-8 mode. */
   c = *eptr; \
   if (c >= 0xc0) GETUTF8(c, eptr);
 
-/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the 
-pointer. */ 
- 
-#define GETCHARTEST(c, eptr) \ 
-  c = *eptr; \ 
+/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *eptr; \
   if (utf && c >= 0xc0) GETUTF8(c, eptr);
- 
-/* Get the next UTF-8 character, advancing the pointer. This is called when we 
-know we are in UTF-8 mode. */ 
- 
-#define GETCHARINC(c, eptr) \ 
-  c = *eptr++; \ 
+
+/* Get the next UTF-8 character, advancing the pointer. This is called when we
+know we are in UTF-8 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *eptr++; \
   if (c >= 0xc0) GETUTF8INC(c, eptr);
- 
+
 /* Get the next character, testing for UTF-8 mode, and advancing the pointer.
 This is called when we don't know if we are in UTF-8 mode. */
- 
-#define GETCHARINCTEST(c, eptr) \ 
-  c = *eptr++; \ 
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *eptr++; \
   if (utf && c >= 0xc0) GETUTF8INC(c, eptr);
 
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer, incrementing the length. */
 
 #define GETUTF8LEN(c, eptr, len) \
-    { \ 
+    { \
     if ((c & 0x20) == 0) \
-      { \ 
+      { \
       c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
       len++; \
-      } \ 
+      } \
     else if ((c & 0x10)  == 0) \
       { \
       c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
@@ -770,15 +770,15 @@ advancing the pointer, incrementing the length. */
           ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
       len += 5; \
       } \
-    } 
- 
-/* Get the next UTF-8 character, not advancing the pointer, incrementing length 
-if there are extra bytes. This is called when we know we are in UTF-8 mode. */ 
- 
-#define GETCHARLEN(c, eptr, len) \ 
-  c = *eptr; \ 
+    }
+
+/* Get the next UTF-8 character, not advancing the pointer, incrementing length
+if there are extra bytes. This is called when we know we are in UTF-8 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  c = *eptr; \
   if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
- 
+
 /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
 pointer, incrementing length if there are extra bytes. This is called when we
 do not know if we are in UTF-8 mode. */
@@ -787,21 +787,21 @@ do not know if we are in UTF-8 mode. */
   c = *eptr; \
   if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
 
-/* If the pointer is not at the start of a character, move it back until 
-it is. This is called only in UTF-8 mode - we don't put a test within the macro 
-because almost all calls are already within a block of UTF-8 only code. */ 
- 
-#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- 
- 
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-8 mode - we don't put a test within the macro
+because almost all calls are already within a block of UTF-8 only code. */
+
+#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
+
 /* Same as above, just in the other direction. */
 #define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++
- 
+
 /* Same as above, but it allows a fully customizable form. */
 #define ACROSSCHAR(condition, eptr, action) \
   while((condition) && ((eptr) & 0xc0) == 0x80) action
- 
+
 #elif defined COMPILE_PCRE16
- 
+
 /* Tells the biggest code point which can be encoded as a single character. */
 
 #define MAX_VALUE_FOR_SINGLE_CHAR 65535
@@ -1052,29 +1052,29 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
 #else
 #define VSPACE_LIST \
   CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
-#endif 
- 
+#endif
+
 #define VSPACE_BYTE_CASES \
   case CHAR_LF: \
   case CHAR_VT: \
   case CHAR_FF: \
   case CHAR_CR: \
   case CHAR_NEL
- 
+
 #define VSPACE_CASES VSPACE_BYTE_CASES
 #endif  /* EBCDIC */
- 
+
 /* ------ End of whitespace macros ------ */
- 
 
 
-/* Private flags containing information about the compiled regex. They used to 
+
+/* Private flags containing information about the compiled regex. They used to
 live at the top end of the options word, but that got almost full, so they were
 moved to a 16-bit flags word - which got almost full, so now they are in a
 32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
 restrictions on partial matching have been lifted. It remains for backwards
 compatibility. */
- 
+
 #define PCRE_MODE8         0x00000001  /* compiled in 8 bit mode */
 #define PCRE_MODE16        0x00000002  /* compiled in 16 bit mode */
 #define PCRE_MODE32        0x00000004  /* compiled in 32 bit mode */
@@ -1090,7 +1090,7 @@ compatibility. */
 #define PCRE_MLSET         0x00002000  /* match limit set by regex */
 #define PCRE_RLSET         0x00004000  /* recursion limit set by regex */
 #define PCRE_MATCH_EMPTY   0x00008000  /* pattern can match empty string */
- 
+
 #if defined COMPILE_PCRE8
 #define PCRE_MODE          PCRE_MODE8
 #elif defined COMPILE_PCRE16
@@ -1099,70 +1099,70 @@ compatibility. */
 #define PCRE_MODE          PCRE_MODE32
 #endif
 #define PCRE_MODE_MASK     (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32)
- 
+
 /* Flags for the "extra" block produced by pcre_study(). */
- 
+
 #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */
 #define PCRE_STUDY_MINLEN  0x0002  /* a minimum length field exists */
 
-/* Masks for identifying the public options that are permitted at compile 
-time, run time, or study time, respectively. */ 
- 
-#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ 
-                           PCRE_NEWLINE_ANYCRLF) 
- 
+/* Masks for identifying the public options that are permitted at compile
+time, run time, or study time, respectively. */
+
+#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
+                           PCRE_NEWLINE_ANYCRLF)
+
 #define PUBLIC_COMPILE_OPTIONS \
-  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ 
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ 
+  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
+   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
    PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \
    PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
    PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
    PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
- 
-#define PUBLIC_EXEC_OPTIONS \ 
+
+#define PUBLIC_EXEC_OPTIONS \
   (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
    PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
    PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
- 
-#define PUBLIC_DFA_EXEC_OPTIONS \ 
+
+#define PUBLIC_DFA_EXEC_OPTIONS \
   (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
    PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
    PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
    PCRE_NO_START_OPTIMIZE)
- 
+
 #define PUBLIC_STUDY_OPTIONS \
    (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED)
- 
+
 #define PUBLIC_JIT_EXEC_OPTIONS \
    (PCRE_NO_UTF8_CHECK|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|\
     PCRE_NOTEMPTY_ATSTART|PCRE_PARTIAL_SOFT|PCRE_PARTIAL_HARD)
- 
+
 /* Magic number to provide a small check against being handed junk. */
 
-#define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */ 
- 
+#define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
+
 /* This variable is used to detect a loaded regular expression
 in different endianness. */
- 
+
 #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */
- 
-/* The maximum remaining length of subject we are prepared to search for a 
-req_byte match. */ 
- 
-#define REQ_BYTE_MAX 1000 
- 
+
+/* The maximum remaining length of subject we are prepared to search for a
+req_byte match. */
+
+#define REQ_BYTE_MAX 1000
+
 /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
 environments where these macros are defined elsewhere. Unfortunately, there
 is no way to do the same for the typedef. */
- 
-typedef int BOOL; 
- 
+
+typedef int BOOL;
+
 #ifndef FALSE
-#define FALSE   0 
-#define TRUE    1 
+#define FALSE   0
+#define TRUE    1
 #endif
- 
+
 /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
 character constants like '*' because the compiler would emit their EBCDIC code,
 which is different from their ASCII/UTF-8 code. Instead we define macros for
@@ -1764,42 +1764,42 @@ only. */
 
 #endif  /* SUPPORT_UTF */
 
-/* Escape items that are just an encoding of a particular data value. */ 
- 
+/* Escape items that are just an encoding of a particular data value. */
+
 #ifndef ESC_a
 #define ESC_a CHAR_BEL
 #endif
 
-#ifndef ESC_e 
+#ifndef ESC_e
 #define ESC_e CHAR_ESC
-#endif 
- 
-#ifndef ESC_f 
+#endif
+
+#ifndef ESC_f
 #define ESC_f CHAR_FF
-#endif 
- 
-#ifndef ESC_n 
+#endif
+
+#ifndef ESC_n
 #define ESC_n CHAR_LF
-#endif 
- 
-#ifndef ESC_r 
+#endif
+
+#ifndef ESC_r
 #define ESC_r CHAR_CR
-#endif 
- 
-/* We can't officially use ESC_t because it is a POSIX reserved identifier 
-(presumably because of all the others like size_t). */ 
- 
-#ifndef ESC_tee 
+#endif
+
+/* We can't officially use ESC_t because it is a POSIX reserved identifier
+(presumably because of all the others like size_t). */
+
+#ifndef ESC_tee
 #define ESC_tee CHAR_HT
-#endif 
- 
-/* Codes for different types of Unicode property */ 
- 
-#define PT_ANY        0    /* Any property - matches all chars */ 
-#define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */ 
+#endif
+
+/* Codes for different types of Unicode property */
+
+#define PT_ANY        0    /* Any property - matches all chars */
+#define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
 #define PT_GC         2    /* Specified general characteristic (e.g. L) */
 #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
-#define PT_SC         4    /* Script (e.g. Han) */ 
+#define PT_SC         4    /* Script (e.g. Han) */
 #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
 #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
 #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
@@ -1807,7 +1807,7 @@ only. */
 #define PT_CLIST      9    /* Pseudo-property: match character list */
 #define PT_UCNC      10    /* Universal Character nameable character */
 #define PT_TABSIZE   11    /* Size of square table for autopossessify tests */
- 
+
 /* The following special properties are used only in XCLASS items, when POSIX
 classes are specified and PCRE_UCP is set - in other words, for Unicode
 handling of these classes. They are not available via the \p or \P escapes like
@@ -1818,27 +1818,27 @@ table. */
 #define PT_PXPRINT   12    /* [:print:] - [:graph:] plus non-control spaces */
 #define PT_PXPUNCT   13    /* [:punct:] - punctuation characters */
 
-/* Flag bits and data types for the extended class (OP_XCLASS) for classes that 
+/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 contain characters with values greater than 255. */
- 
+
 #define XCL_NOT       0x01    /* Flag: this is a negative class */
 #define XCL_MAP       0x02    /* Flag: a 32-byte map is present */
 #define XCL_HASPROP   0x04    /* Flag: property checks are present. */
- 
-#define XCL_END       0    /* Marks end of individual items */ 
-#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */ 
-#define XCL_RANGE     2    /* A range (two multibyte chars) follows */ 
-#define XCL_PROP      3    /* Unicode property (2-byte property code follows) */ 
-#define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */ 
- 
-/* These are escaped items that aren't just an encoding of a particular data 
+
+#define XCL_END       0    /* Marks end of individual items */
+#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
+#define XCL_RANGE     2    /* A range (two multibyte chars) follows */
+#define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
+#define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
+
+/* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
 for a data character.  Also, they must appear in the same order as in the
 opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
 corresponds to "." in DOTALL mode rather than an escape sequence. It is also
 used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
 non-DOTALL mode, "." behaves like \N.
- 
+
 The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
 when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
 They must be contiguous, and remain in order so that the replacements can be
@@ -1851,17 +1851,17 @@ repeated. These are the types that consume characters. If any new escapes are
 put in between that don't consume a character, that code will have to change.
 */
 
-enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, 
+enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
        ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
        ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
        ESC_E, ESC_Q, ESC_g, ESC_k,
        ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
- 
- 
+
+
 /********************** Opcode definitions ******************/
- 
+
 /****** NOTE NOTE NOTE ******
- 
+
 Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
 order to the list of escapes immediately above. Furthermore, values up to
 OP_DOLLM must not be changed without adjusting the table called autoposstab in
@@ -1883,23 +1883,23 @@ auto-possessified. */
 #define LAST_AUTOTAB_LEFT_OP   OP_EXTUNI
 #define LAST_AUTOTAB_RIGHT_OP  OP_DOLLM
 
-enum { 
-  OP_END,            /* 0 End of pattern */ 
- 
-  /* Values corresponding to backslashed metacharacters */ 
- 
-  OP_SOD,            /* 1 Start of data: \A */ 
-  OP_SOM,            /* 2 Start of match (subject + offset): \G */ 
-  OP_SET_SOM,        /* 3 Set start of match (\K) */ 
-  OP_NOT_WORD_BOUNDARY,  /*  4 \B */ 
-  OP_WORD_BOUNDARY,      /*  5 \b */ 
-  OP_NOT_DIGIT,          /*  6 \D */ 
-  OP_DIGIT,              /*  7 \d */ 
-  OP_NOT_WHITESPACE,     /*  8 \S */ 
-  OP_WHITESPACE,         /*  9 \s */ 
-  OP_NOT_WORDCHAR,       /* 10 \W */ 
-  OP_WORDCHAR,           /* 11 \w */ 
- 
+enum {
+  OP_END,            /* 0 End of pattern */
+
+  /* Values corresponding to backslashed metacharacters */
+
+  OP_SOD,            /* 1 Start of data: \A */
+  OP_SOM,            /* 2 Start of match (subject + offset): \G */
+  OP_SET_SOM,        /* 3 Set start of match (\K) */
+  OP_NOT_WORD_BOUNDARY,  /*  4 \B */
+  OP_WORD_BOUNDARY,      /*  5 \b */
+  OP_NOT_DIGIT,          /*  6 \D */
+  OP_DIGIT,              /*  7 \d */
+  OP_NOT_WHITESPACE,     /*  8 \S */
+  OP_WHITESPACE,         /*  9 \s */
+  OP_NOT_WORDCHAR,       /* 10 \W */
+  OP_WORDCHAR,           /* 11 \w */
+
   OP_ANY,            /* 12 Match any character except newline (\N) */
   OP_ALLANY,         /* 13 Match any character */
   OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
@@ -1913,81 +1913,81 @@ enum {
   OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
   OP_EODN,           /* 23 End of data or \n at end of data (\Z) */
   OP_EOD,            /* 24 End of data (\z) */
- 
+
   /* Line end assertions */
- 
+
   OP_DOLL,           /* 25 End of line - not multiline */
   OP_DOLLM,          /* 26 End of line - multiline */
   OP_CIRC,           /* 27 Start of line - not multiline */
   OP_CIRCM,          /* 28 Start of line - multiline */
- 
+
   /* Single characters; caseful must precede the caseless ones */
- 
+
   OP_CHAR,           /* 29 Match one character, casefully */
   OP_CHARI,          /* 30 Match one character, caselessly */
   OP_NOT,            /* 31 Match one character, not the given one, casefully */
   OP_NOTI,           /* 32 Match one character, not the given one, caselessly */
- 
+
   /* The following sets of 13 opcodes must always be kept in step because
   the offset from the first one is used to generate the others. */
- 
+
   /* Repeated characters; caseful must precede the caseless ones */
- 
+
   OP_STAR,           /* 33 The maximizing and minimizing versions of */
   OP_MINSTAR,        /* 34 these six opcodes must come in pairs, with */
   OP_PLUS,           /* 35 the minimizing one second. */
   OP_MINPLUS,        /* 36 */
   OP_QUERY,          /* 37 */
   OP_MINQUERY,       /* 38 */
- 
+
   OP_UPTO,           /* 39 From 0 to n matches of one character, caseful*/
   OP_MINUPTO,        /* 40 */
   OP_EXACT,          /* 41 Exactly n matches */
- 
+
   OP_POSSTAR,        /* 42 Possessified star, caseful */
   OP_POSPLUS,        /* 43 Possessified plus, caseful */
   OP_POSQUERY,       /* 44 Posesssified query, caseful */
   OP_POSUPTO,        /* 45 Possessified upto, caseful */
- 
+
   /* Repeated characters; caseless must follow the caseful ones */
- 
+
   OP_STARI,          /* 46 */
   OP_MINSTARI,       /* 47 */
   OP_PLUSI,          /* 48 */
   OP_MINPLUSI,       /* 49 */
   OP_QUERYI,         /* 50 */
   OP_MINQUERYI,      /* 51 */
- 
+
   OP_UPTOI,          /* 52 From 0 to n matches of one character, caseless */
   OP_MINUPTOI,       /* 53 */
   OP_EXACTI,         /* 54 */
- 
+
   OP_POSSTARI,       /* 55 Possessified star, caseless */
   OP_POSPLUSI,       /* 56 Possessified plus, caseless */
   OP_POSQUERYI,      /* 57 Posesssified query, caseless */
   OP_POSUPTOI,       /* 58 Possessified upto, caseless */
- 
+
   /* The negated ones must follow the non-negated ones, and match them */
   /* Negated repeated character, caseful; must precede the caseless ones */
- 
+
   OP_NOTSTAR,        /* 59 The maximizing and minimizing versions of */
   OP_NOTMINSTAR,     /* 60 these six opcodes must come in pairs, with */
   OP_NOTPLUS,        /* 61 the minimizing one second. They must be in */
   OP_NOTMINPLUS,     /* 62 exactly the same order as those above. */
   OP_NOTQUERY,       /* 63 */
   OP_NOTMINQUERY,    /* 64 */
- 
+
   OP_NOTUPTO,        /* 65 From 0 to n matches, caseful */
   OP_NOTMINUPTO,     /* 66 */
   OP_NOTEXACT,       /* 67 Exactly n matches */
- 
+
   OP_NOTPOSSTAR,     /* 68 Possessified versions, caseful */
   OP_NOTPOSPLUS,     /* 69 */
   OP_NOTPOSQUERY,    /* 70 */
   OP_NOTPOSUPTO,     /* 71 */
- 
+
   /* Negated repeated character, caseless; must follow the caseful ones */
- 
+
   OP_NOTSTARI,       /* 72 */
   OP_NOTMINSTARI,    /* 73 */
   OP_NOTPLUSI,       /* 74 */
@@ -2084,28 +2084,28 @@ enum {
   OP_COND,           /* 135 Conditional group */
 
   /* These five must follow the previous five, in the same order. There's a
-  check for >= SBRA to distinguish the two sets. */ 
- 
+  check for >= SBRA to distinguish the two sets. */
+
   OP_SBRA,           /* 136 Start of non-capturing bracket, check empty  */
   OP_SBRAPOS,        /* 137 Ditto, with unlimited, possessive repeat */
   OP_SCBRA,          /* 138 Start of capturing bracket, check empty */
   OP_SCBRAPOS,       /* 139 Ditto, with unlimited, possessive repeat */
   OP_SCOND,          /* 140 Conditional group, check empty */
- 
+
   /* The next two pairs must (respectively) be kept together. */
- 
+
   OP_CREF,           /* 141 Used to hold a capture number as condition */
   OP_DNCREF,         /* 142 Used to point to duplicate names as a condition */
   OP_RREF,           /* 143 Used to hold a recursion number as condition */
   OP_DNRREF,         /* 144 Used to point to duplicate names as a condition */
   OP_DEF,            /* 145 The DEFINE condition */
- 
+
   OP_BRAZERO,        /* 146 These two must remain together and in this */
   OP_BRAMINZERO,     /* 147 order. */
   OP_BRAPOSZERO,     /* 148 */
 
-  /* These are backtracking control verbs */ 
- 
+  /* These are backtracking control verbs */
+
   OP_MARK,           /* 149 always has an argument */
   OP_PRUNE,          /* 150 */
   OP_PRUNE_ARG,      /* 151 same, but with argument */
@@ -2114,9 +2114,9 @@ enum {
   OP_THEN,           /* 154 */
   OP_THEN_ARG,       /* 155 same, but with argument */
   OP_COMMIT,         /* 156 */
- 
-  /* These are forced failure and success verbs */ 
- 
+
+  /* These are forced failure and success verbs */
+
   OP_FAIL,           /* 157 */
   OP_ACCEPT,         /* 158 */
   OP_ASSERT_ACCEPT,  /* 159 Used inside assertions */
@@ -2131,40 +2131,40 @@ enum {
   some in the past. */
 
   OP_TABLE_LENGTH
-}; 
- 
+};
+
 /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
 definitions that follow must also be updated to match. There are also tables
 called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in
 pcre_dfa_exec.c that must be updated. */
- 
 
-/* This macro defines textual names for all the opcodes. These are used only 
+
+/* This macro defines textual names for all the opcodes. These are used only
 for debugging, and some of them are only partial names. The macro is referenced
 only in pcre_printint.c, which fills out the full names in many cases (and in
 some cases doesn't actually use these names at all). */
- 
-#define OP_NAME_LIST \ 
-  "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \ 
+
+#define OP_NAME_LIST \
+  "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \
   "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte",         \
-  "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \ 
-  "extuni",  "\\Z", "\\z",                                        \ 
+  "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \
+  "extuni",  "\\Z", "\\z",                                        \
   "$", "$", "^", "^", "char", "chari", "not", "noti",             \
   "*", "*?", "+", "+?", "?", "??",                                \
   "{", "{", "{",                                                  \
-  "*+","++", "?+", "{",                                           \ 
+  "*+","++", "?+", "{",                                           \
   "*", "*?", "+", "+?", "?", "??",                                \
   "{", "{", "{",                                                  \
-  "*+","++", "?+", "{",                                           \ 
+  "*+","++", "?+", "{",                                           \
   "*", "*?", "+", "+?", "?", "??",                                \
   "{", "{", "{",                                                  \
   "*+","++", "?+", "{",                                           \
   "*", "*?", "+", "+?", "?", "??",                                \
   "{", "{", "{",                                                  \
   "*+","++", "?+", "{",                                           \
-  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \ 
-  "*+","++", "?+", "{",                                           \ 
-  "*", "*?", "+", "+?", "?", "??", "{", "{",                      \ 
+  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
+  "*+","++", "?+", "{",                                           \
+  "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
   "*+","++", "?+", "{",                                           \
   "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi",  \
   "Recurse", "Callout",                                           \
@@ -2181,29 +2181,29 @@ some cases doesn't actually use these names at all). */
   "*THEN", "*THEN", "*COMMIT", "*FAIL",                           \
   "*ACCEPT", "*ASSERT_ACCEPT",                                    \
   "Close", "Skip zero"
- 
- 
-/* This macro defines the length of fixed length operations in the compiled 
-regex. The lengths are used when searching for specific things, and also in the 
-debugging printing of a compiled regex. We use a macro so that it can be 
-defined close to the definitions of the opcodes themselves. 
- 
-As things have been extended, some of these are no longer fixed lenths, but are 
-minima instead. For example, the length of a single-character repeat may vary 
-in UTF-8 mode. The code that uses this table must know about such things. */ 
- 
-#define OP_LENGTHS \ 
-  1,                             /* End                                    */ \ 
-  1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \ 
-  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \ 
+
+
+/* This macro defines the length of fixed length operations in the compiled
+regex. The lengths are used when searching for specific things, and also in the
+debugging printing of a compiled regex. We use a macro so that it can be
+defined close to the definitions of the opcodes themselves.
+
+As things have been extended, some of these are no longer fixed lenths, but are
+minima instead. For example, the length of a single-character repeat may vary
+in UTF-8 mode. The code that uses this table must know about such things. */
+
+#define OP_LENGTHS \
+  1,                             /* End                                    */ \
+  1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
+  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
   1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \
   3, 3,                          /* \P, \p                                 */ \
-  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \ 
+  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
   1,                             /* \X                                     */ \
   1, 1, 1, 1, 1, 1,              /* \Z, \z, $, $M ^, ^M                    */ \
-  2,                             /* Char  - the minimum length             */ \ 
+  2,                             /* Char  - the minimum length             */ \
   2,                             /* Chari  - the minimum length            */ \
-  2,                             /* not                                    */ \ 
+  2,                             /* not                                    */ \
   2,                             /* noti                                   */ \
   /* Positive single-char repeats                             ** These are */ \
   2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??       ** minima in */ \
@@ -2214,8 +2214,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   2+IMM2_SIZE, 2+IMM2_SIZE,      /* upto I, minupto I                      */ \
   2+IMM2_SIZE,                   /* exact I                                */ \
   2, 2, 2, 2+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */ \
-  /* Negative single-char repeats - only for chars < 256                   */ \ 
-  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \ 
+  /* Negative single-char repeats - only for chars < 256                   */ \
+  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
   2+IMM2_SIZE, 2+IMM2_SIZE,      /* NOT upto, minupto                      */ \
   2+IMM2_SIZE,                   /* NOT exact                              */ \
   2, 2, 2, 2+IMM2_SIZE,          /* Possessive NOT *, +, ?, upto           */ \
@@ -2223,85 +2223,85 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   2+IMM2_SIZE, 2+IMM2_SIZE,      /* NOT upto I, minupto I                  */ \
   2+IMM2_SIZE,                   /* NOT exact I                            */ \
   2, 2, 2, 2+IMM2_SIZE,          /* Possessive NOT *I, +I, ?I, upto I      */ \
-  /* Positive type repeats                                                 */ \ 
-  2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \ 
+  /* Positive type repeats                                                 */ \
+  2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
   2+IMM2_SIZE, 2+IMM2_SIZE,      /* Type upto, minupto                     */ \
   2+IMM2_SIZE,                   /* Type exact                             */ \
   2, 2, 2, 2+IMM2_SIZE,          /* Possessive *+, ++, ?+, upto+           */ \
-  /* Character class & ref repeats                                         */ \ 
-  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \ 
+  /* Character class & ref repeats                                         */ \
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
   1+2*IMM2_SIZE, 1+2*IMM2_SIZE,  /* CRRANGE, CRMINRANGE                    */ \
   1, 1, 1, 1+2*IMM2_SIZE,        /* Possessive *+, ++, ?+, CRPOSRANGE      */ \
   1+(32/sizeof(pcre_uchar)),     /* CLASS                                  */ \
   1+(32/sizeof(pcre_uchar)),     /* NCLASS                                 */ \
-  0,                             /* XCLASS - variable length               */ \ 
+  0,                             /* XCLASS - variable length               */ \
   1+IMM2_SIZE,                   /* REF                                    */ \
   1+IMM2_SIZE,                   /* REFI                                   */ \
   1+2*IMM2_SIZE,                 /* DNREF                                  */ \
   1+2*IMM2_SIZE,                 /* DNREFI                                 */ \
-  1+LINK_SIZE,                   /* RECURSE                                */ \ 
-  2+2*LINK_SIZE,                 /* CALLOUT                                */ \ 
-  1+LINK_SIZE,                   /* Alt                                    */ \ 
-  1+LINK_SIZE,                   /* Ket                                    */ \ 
-  1+LINK_SIZE,                   /* KetRmax                                */ \ 
-  1+LINK_SIZE,                   /* KetRmin                                */ \ 
+  1+LINK_SIZE,                   /* RECURSE                                */ \
+  2+2*LINK_SIZE,                 /* CALLOUT                                */ \
+  1+LINK_SIZE,                   /* Alt                                    */ \
+  1+LINK_SIZE,                   /* Ket                                    */ \
+  1+LINK_SIZE,                   /* KetRmax                                */ \
+  1+LINK_SIZE,                   /* KetRmin                                */ \
   1+LINK_SIZE,                   /* KetRpos                                */ \
   1+LINK_SIZE,                   /* Reverse                                */ \
-  1+LINK_SIZE,                   /* Assert                                 */ \ 
-  1+LINK_SIZE,                   /* Assert not                             */ \ 
-  1+LINK_SIZE,                   /* Assert behind                          */ \ 
-  1+LINK_SIZE,                   /* Assert behind not                      */ \ 
-  1+LINK_SIZE,                   /* ONCE                                   */ \ 
+  1+LINK_SIZE,                   /* Assert                                 */ \
+  1+LINK_SIZE,                   /* Assert not                             */ \
+  1+LINK_SIZE,                   /* Assert behind                          */ \
+  1+LINK_SIZE,                   /* Assert behind not                      */ \
+  1+LINK_SIZE,                   /* ONCE                                   */ \
   1+LINK_SIZE,                   /* ONCE_NC                                */ \
-  1+LINK_SIZE,                   /* BRA                                    */ \ 
+  1+LINK_SIZE,                   /* BRA                                    */ \
   1+LINK_SIZE,                   /* BRAPOS                                 */ \
   1+LINK_SIZE+IMM2_SIZE,         /* CBRA                                   */ \
   1+LINK_SIZE+IMM2_SIZE,         /* CBRAPOS                                */ \
-  1+LINK_SIZE,                   /* COND                                   */ \ 
-  1+LINK_SIZE,                   /* SBRA                                   */ \ 
+  1+LINK_SIZE,                   /* COND                                   */ \
+  1+LINK_SIZE,                   /* SBRA                                   */ \
   1+LINK_SIZE,                   /* SBRAPOS                                */ \
   1+LINK_SIZE+IMM2_SIZE,         /* SCBRA                                  */ \
   1+LINK_SIZE+IMM2_SIZE,         /* SCBRAPOS                               */ \
-  1+LINK_SIZE,                   /* SCOND                                  */ \ 
+  1+LINK_SIZE,                   /* SCOND                                  */ \
   1+IMM2_SIZE, 1+2*IMM2_SIZE,    /* CREF, DNCREF                           */ \
   1+IMM2_SIZE, 1+2*IMM2_SIZE,    /* RREF, DNRREF                           */ \
-  1,                             /* DEF                                    */ \ 
+  1,                             /* DEF                                    */ \
   1, 1, 1,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */ \
   3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
   1, 3,                          /* SKIP, SKIP_ARG                         */ \
   1, 3,                          /* THEN, THEN_ARG                         */ \
   1, 1, 1, 1,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */ \
   1+IMM2_SIZE, 1                 /* CLOSE, SKIPZERO                        */
- 
-/* A magic value for OP_RREF to indicate the "any recursion" condition. */ 
- 
-#define RREF_ANY  0xffff 
- 
+
+/* A magic value for OP_RREF to indicate the "any recursion" condition. */
+
+#define RREF_ANY  0xffff
+
 /* Compile time error code numbers. They are given names so that they can more
 easily be tracked. When a new number is added, the table called eint in
 pcreposix.c must be updated. */
- 
-enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9, 
-       ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, 
-       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, 
-       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, 
-       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, 
-       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, 
+
+enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
+       ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
+       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
+       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
+       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
+       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
        ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
        ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
- 
+
 /* JIT compiling modes. The function list is indexed by them. */
 
 enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
        JIT_NUMBER_OF_COMPILE_MODES };
 
-/* The real format of the start of the pcre block; the index of names and the 
-code vector run on as long as necessary after the end. We store an explicit 
-offset to the name table so that if a regex is compiled on one host, saved, and 
-then run on another where the size of pointers is different, all might still 
+/* The real format of the start of the pcre block; the index of names and the
+code vector run on as long as necessary after the end. We store an explicit
+offset to the name table so that if a regex is compiled on one host, saved, and
+then run on another where the size of pointers is different, all might still
 be well.
- 
+
 The size of the structure must be a multiple of 8 bytes. For the case of
 compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
 that there are an even number of pointers which therefore are a multiple of 8
@@ -2320,12 +2320,12 @@ when a compiled regex is reloaded on a host with different endianness.
 There is also similar byte-flipping code in pcretest.c, which is used for
 testing the byte-flipping features. It must also be kept in step.
 *** WARNING ***
-*/ 
- 
+*/
+
 typedef struct real_pcre8_or_16 {
-  pcre_uint32 magic_number; 
-  pcre_uint32 size;               /* Total that was malloced */ 
-  pcre_uint32 options;            /* Public options */ 
+  pcre_uint32 magic_number;
+  pcre_uint32 size;               /* Total that was malloced */
+  pcre_uint32 options;            /* Public options */
   pcre_uint32 flags;              /* Private flags */
   pcre_uint32 limit_match;        /* Limit set from regex */
   pcre_uint32 limit_recursion;    /* Limit set from regex */
@@ -2334,20 +2334,20 @@ typedef struct real_pcre8_or_16 {
   pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
   pcre_uint16 top_bracket;        /* Highest numbered group */
   pcre_uint16 top_backref;        /* Highest numbered back reference */
-  pcre_uint16 name_table_offset;  /* Offset to name table that follows */ 
-  pcre_uint16 name_entry_size;    /* Size of any name items */ 
-  pcre_uint16 name_count;         /* Number of name items */ 
-  pcre_uint16 ref_count;          /* Reference count */ 
+  pcre_uint16 name_table_offset;  /* Offset to name table that follows */
+  pcre_uint16 name_entry_size;    /* Size of any name items */
+  pcre_uint16 name_count;         /* Number of name items */
+  pcre_uint16 ref_count;          /* Reference count */
   pcre_uint16 dummy1;             /* To ensure size is a multiple of 8 */
   pcre_uint16 dummy2;             /* To ensure size is a multiple of 8 */
   pcre_uint16 dummy3;             /* To ensure size is a multiple of 8 */
   const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
   void             *nullpad;      /* NULL padding */
 } real_pcre8_or_16;
- 
+
 typedef struct real_pcre8_or_16 real_pcre;
 typedef struct real_pcre8_or_16 real_pcre16;
- 
+
 typedef struct real_pcre32 {
   pcre_uint32 magic_number;
   pcre_uint32 size;               /* Total that was malloced */
@@ -2389,16 +2389,16 @@ typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1
 #define REAL_PCRE_OPTIONS(re)   (((REAL_PCRE*)re)->options)
 #define REAL_PCRE_FLAGS(re)     (((REAL_PCRE*)re)->flags)
 
-/* The format of the block used to store data from pcre_study(). The same 
-remark (see NOTE above) about extending this structure applies. */ 
- 
-typedef struct pcre_study_data { 
-  pcre_uint32 size;               /* Total that was malloced */ 
+/* The format of the block used to store data from pcre_study(). The same
+remark (see NOTE above) about extending this structure applies. */
+
+typedef struct pcre_study_data {
+  pcre_uint32 size;               /* Total that was malloced */
   pcre_uint32 flags;              /* Private flags */
   pcre_uint8 start_bits[32];      /* Starting char bits */
   pcre_uint32 minlength;          /* Minimum subject length */
-} pcre_study_data; 
- 
+} pcre_study_data;
+
 /* Structure for building a chain of open capturing subpatterns during
 compiling, so that instructions to close them can be compiled when (*ACCEPT) is
 encountered. This is also used to identify subpatterns that contain recursive
@@ -2419,10 +2419,10 @@ typedef struct named_group {
   pcre_uint32        number;        /* Group number */
 } named_group;
 
-/* Structure for passing "static" information around between the functions 
-doing the compiling, so that they are thread-safe. */ 
- 
-typedef struct compile_data { 
+/* Structure for passing "static" information around between the functions
+doing the compiling, so that they are thread-safe. */
+
+typedef struct compile_data {
   const pcre_uint8 *lcc;            /* Points to lower casing table */
   const pcre_uint8 *fcc;            /* Points to case-flipping table */
   const pcre_uint8 *cbits;          /* Points to character type table */
@@ -2459,16 +2459,16 @@ typedef struct compile_data {
   int  nltype;                      /* Newline type */
   int  nllen;                       /* Newline string length */
   pcre_uchar nl[4];                 /* Newline string when fixed length */
-} compile_data; 
- 
-/* Structure for maintaining a chain of pointers to the currently incomplete 
+} compile_data;
+
+/* Structure for maintaining a chain of pointers to the currently incomplete
 branches, for testing for left recursion while compiling. */
- 
-typedef struct branch_chain { 
-  struct branch_chain *outer; 
+
+typedef struct branch_chain {
+  struct branch_chain *outer;
   pcre_uchar *current_branch;
-} branch_chain; 
- 
+} branch_chain;
+
 /* Structure for mutual recursion detection. */
 
 typedef struct recurse_check {
@@ -2476,18 +2476,18 @@ typedef struct recurse_check {
   const pcre_uchar *group;
 } recurse_check;
 
-/* Structure for items in a linked list that represents an explicit recursive 
+/* Structure for items in a linked list that represents an explicit recursive
 call within the pattern; used by pcre_exec(). */
- 
-typedef struct recursion_info { 
-  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ 
+
+typedef struct recursion_info {
+  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
   unsigned int group_num;         /* Number of group that was called */
   int *offset_save;               /* Pointer to start of saved offsets */
   int saved_max;                  /* Number of saved offsets */
   int saved_capture_last;         /* Last capture number */
   PCRE_PUCHAR subject_position;   /* Position at start of recursion */
-} recursion_info; 
- 
+} recursion_info;
+
 /* A similar structure for pcre_dfa_exec(). */
 
 typedef struct dfa_recursion_info {
@@ -2496,24 +2496,24 @@ typedef struct dfa_recursion_info {
   PCRE_PUCHAR subject_position;
 } dfa_recursion_info;
 
-/* Structure for building a chain of data for holding the values of the subject 
-pointer at the start of each subpattern, so as to detect when an empty string 
+/* Structure for building a chain of data for holding the values of the subject
+pointer at the start of each subpattern, so as to detect when an empty string
 has been matched by a subpattern - to break infinite loops; used by
 pcre_exec(). */
- 
-typedef struct eptrblock { 
-  struct eptrblock *epb_prev; 
+
+typedef struct eptrblock {
+  struct eptrblock *epb_prev;
   PCRE_PUCHAR epb_saved_eptr;
-} eptrblock; 
- 
- 
-/* Structure for passing "static" information around between the functions 
-doing traditional NFA matching, so that they are thread-safe. */ 
- 
-typedef struct match_data { 
-  unsigned long int match_call_count;      /* As it says */ 
-  unsigned long int match_limit;           /* As it says */ 
-  unsigned long int match_limit_recursion; /* As it says */ 
+} eptrblock;
+
+
+/* Structure for passing "static" information around between the functions
+doing traditional NFA matching, so that they are thread-safe. */
+
+typedef struct match_data {
+  unsigned long int match_call_count;      /* As it says */
+  unsigned long int match_limit;           /* As it says */
+  unsigned long int match_limit_recursion; /* As it says */
   int   *offset_vector;           /* Offset vector */
   int    offset_end;              /* One past the end */
   int    offset_max;              /* The maximum usable for return data */
@@ -2560,12 +2560,12 @@ typedef struct match_data {
 #ifdef NO_RECURSE
   void  *match_frames_base;       /* For remembering malloc'd frames */
 #endif
-} match_data; 
- 
-/* A similar structure is used for the same purpose by the DFA matching 
-functions. */ 
- 
-typedef struct dfa_match_data { 
+} match_data;
+
+/* A similar structure is used for the same purpose by the DFA matching
+functions. */
+
+typedef struct dfa_match_data {
   const pcre_uchar *start_code;     /* Start of the compiled pattern */
   const pcre_uchar *start_subject ; /* Start of the subject string */
   const pcre_uchar *end_subject;    /* End of subject string */
@@ -2579,41 +2579,41 @@ typedef struct dfa_match_data {
   pcre_uchar nl[4];                 /* Newline string when fixed */
   void *callout_data;               /* To pass back to callouts */
   dfa_recursion_info *recursive;    /* Linked list of recursion data */
-} dfa_match_data; 
- 
-/* Bit definitions for entries in the pcre_ctypes table. */ 
- 
-#define ctype_space   0x01 
-#define ctype_letter  0x02 
-#define ctype_digit   0x04 
-#define ctype_xdigit  0x08 
-#define ctype_word    0x10   /* alphanumeric or '_' */ 
-#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */ 
- 
-/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set 
-of bits for a class map. Some classes are built by combining these tables. */ 
- 
-#define cbit_space     0      /* [:space:] or \s */ 
-#define cbit_xdigit   32      /* [:xdigit:] */ 
-#define cbit_digit    64      /* [:digit:] or \d */ 
-#define cbit_upper    96      /* [:upper:] */ 
-#define cbit_lower   128      /* [:lower:] */ 
-#define cbit_word    160      /* [:word:] or \w */ 
-#define cbit_graph   192      /* [:graph:] */ 
-#define cbit_print   224      /* [:print:] */ 
-#define cbit_punct   256      /* [:punct:] */ 
-#define cbit_cntrl   288      /* [:cntrl:] */ 
-#define cbit_length  320      /* Length of the cbits table */ 
- 
-/* Offsets of the various tables from the base tables pointer, and 
-total length. */ 
- 
-#define lcc_offset      0 
-#define fcc_offset    256 
-#define cbits_offset  512 
-#define ctypes_offset (cbits_offset + cbit_length) 
-#define tables_length (ctypes_offset + 256) 
- 
+} dfa_match_data;
+
+/* Bit definitions for entries in the pcre_ctypes table. */
+
+#define ctype_space   0x01
+#define ctype_letter  0x02
+#define ctype_digit   0x04
+#define ctype_xdigit  0x08
+#define ctype_word    0x10   /* alphanumeric or '_' */
+#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
+
+/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
+of bits for a class map. Some classes are built by combining these tables. */
+
+#define cbit_space     0      /* [:space:] or \s */
+#define cbit_xdigit   32      /* [:xdigit:] */
+#define cbit_digit    64      /* [:digit:] or \d */
+#define cbit_upper    96      /* [:upper:] */
+#define cbit_lower   128      /* [:lower:] */
+#define cbit_word    160      /* [:word:] or \w */
+#define cbit_graph   192      /* [:graph:] */
+#define cbit_print   224      /* [:print:] */
+#define cbit_punct   256      /* [:punct:] */
+#define cbit_cntrl   288      /* [:cntrl:] */
+#define cbit_length  320      /* Length of the cbits table */
+
+/* Offsets of the various tables from the base tables pointer, and
+total length. */
+
+#define lcc_offset      0
+#define fcc_offset    256
+#define cbits_offset  512
+#define ctypes_offset (cbits_offset + cbit_length)
+#define tables_length (ctypes_offset + 256)
+
 /* Internal function and data prefixes. */
 
 #if defined COMPILE_PCRE8
@@ -2641,23 +2641,23 @@ total length. */
 #error Unsupported compiling mode
 #endif /* COMPILE_PCRE[8|16|32] */
 
-/* Layout of the UCP type table that translates property names into types and 
-codes. Each entry used to point directly to a name, but to reduce the number of 
-relocations in shared libraries, it now has an offset into a single string 
-instead. */ 
- 
-typedef struct { 
-  pcre_uint16 name_offset; 
-  pcre_uint16 type; 
-  pcre_uint16 value; 
-} ucp_type_table; 
- 
- 
-/* Internal shared data tables. These are tables that are used by more than one 
-of the exported public functions. They have to be "external" in the C sense, 
-but are not part of the PCRE public API. The data for these tables is in the 
-pcre_tables.c module. */ 
- 
+/* Layout of the UCP type table that translates property names into types and
+codes. Each entry used to point directly to a name, but to reduce the number of
+relocations in shared libraries, it now has an offset into a single string
+instead. */
+
+typedef struct {
+  pcre_uint16 name_offset;
+  pcre_uint16 type;
+  pcre_uint16 value;
+} ucp_type_table;
+
+
+/* Internal shared data tables. These are tables that are used by more than one
+of the exported public functions. They have to be "external" in the C sense,
+but are not part of the PCRE public API. The data for these tables is in the
+pcre_tables.c module. */
+
 #ifdef COMPILE_PCRE8
 extern const int            PRIV(utf8_table1)[];
 extern const int            PRIV(utf8_table1_size);
@@ -2665,25 +2665,25 @@ extern const int            PRIV(utf8_table2)[];
 extern const int            PRIV(utf8_table3)[];
 extern const pcre_uint8     PRIV(utf8_table4)[];
 #endif /* COMPILE_PCRE8 */
- 
+
 extern const char           PRIV(utt_names)[];
 extern const ucp_type_table PRIV(utt)[];
 extern const int            PRIV(utt_size);
- 
+
 extern const pcre_uint8     PRIV(OP_lengths)[];
 extern const pcre_uint8     PRIV(default_tables)[];
- 
+
 extern const pcre_uint32    PRIV(hspace_list)[];
 extern const pcre_uint32    PRIV(vspace_list)[];
- 
- 
-/* Internal shared functions. These are functions that are used by more than 
-one of the exported public functions. They have to be "external" in the C 
-sense, but are not part of the PCRE public API. */ 
- 
+
+
+/* Internal shared functions. These are functions that are used by more than
+one of the exported public functions. They have to be "external" in the C
+sense, but are not part of the PCRE public API. */
+
 /* String comparison functions. */
 #if defined COMPILE_PCRE8
- 
+
 #define STRCMP_UC_UC(str1, str2) \
   strcmp((char *)(str1), (char *)(str2))
 #define STRCMP_UC_C8(str1, str2) \
@@ -2754,8 +2754,8 @@ extern int               PRIV(jit_exec)(const PUBL(extra) *,
 extern void              PRIV(jit_free)(void *);
 extern int               PRIV(jit_get_size)(void *);
 extern const char*       PRIV(jit_get_target)(void);
-#endif 
- 
+#endif
+
 /* Unicode character database (UCD) */
 
 typedef struct {
@@ -2804,4 +2804,4 @@ extern const int         PRIV(ucp_typerange)[];
 
 #endif
 
-/* End of pcre_internal.h */ 
+/* End of pcre_internal.h */
diff --git a/contrib/libs/pcre/pcre_maketables.c b/contrib/libs/pcre/pcre_maketables.c
index 5328e96673..873b46aa91 100644
--- a/contrib/libs/pcre/pcre_maketables.c
+++ b/contrib/libs/pcre/pcre_maketables.c
@@ -1,74 +1,74 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_maketables(), which builds 
-character tables for PCRE in the current locale. The file is compiled on its 
-own as part of the PCRE library. However, it is also included in the 
-compilation of dftables.c, in which case the macro DFTABLES is defined. */ 
- 
- 
-#ifndef DFTABLES 
-#  ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_maketables(), which builds
+character tables for PCRE in the current locale. The file is compiled on its
+own as part of the PCRE library. However, it is also included in the
+compilation of dftables.c, in which case the macro DFTABLES is defined. */
+
+
+#ifndef DFTABLES
+#  ifdef HAVE_CONFIG_H
 #  include "pcre_config.h"
-#  endif 
-#  include "pcre_internal.h" 
-#endif 
- 
- 
-/************************************************* 
-*           Create PCRE character tables         * 
-*************************************************/ 
- 
-/* This function builds a set of character tables for use by PCRE and returns 
-a pointer to them. They are build using the ctype functions, and consequently 
-their contents will depend upon the current locale setting. When compiled as 
+#  endif
+#  include "pcre_internal.h"
+#endif
+
+
+/*************************************************
+*           Create PCRE character tables         *
+*************************************************/
+
+/* This function builds a set of character tables for use by PCRE and returns
+a pointer to them. They are build using the ctype functions, and consequently
+their contents will depend upon the current locale setting. When compiled as
 part of the library, the store is obtained via PUBL(malloc)(), but when
 compiled inside dftables, use malloc().
- 
-Arguments:   none 
-Returns:     pointer to the contiguous block of data 
-*/ 
- 
+
+Arguments:   none
+Returns:     pointer to the contiguous block of data
+*/
+
 #if defined COMPILE_PCRE8
-const unsigned char * 
-pcre_maketables(void) 
+const unsigned char *
+pcre_maketables(void)
 #elif defined COMPILE_PCRE16
 const unsigned char *
 pcre16_maketables(void)
@@ -76,30 +76,30 @@ pcre16_maketables(void)
 const unsigned char *
 pcre32_maketables(void)
 #endif
-{ 
-unsigned char *yield, *p; 
-int i; 
- 
-#ifndef DFTABLES 
+{
+unsigned char *yield, *p;
+int i;
+
+#ifndef DFTABLES
 yield = (unsigned char*)(PUBL(malloc))(tables_length);
-#else 
-yield = (unsigned char*)malloc(tables_length); 
-#endif 
- 
-if (yield == NULL) return NULL; 
-p = yield; 
- 
-/* First comes the lower casing table */ 
- 
-for (i = 0; i < 256; i++) *p++ = tolower(i); 
- 
-/* Next the case-flipping table */ 
- 
-for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); 
- 
-/* Then the character class tables. Don't try to be clever and save effort on 
+#else
+yield = (unsigned char*)malloc(tables_length);
+#endif
+
+if (yield == NULL) return NULL;
+p = yield;
+
+/* First comes the lower casing table */
+
+for (i = 0; i < 256; i++) *p++ = tolower(i);
+
+/* Next the case-flipping table */
+
+for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
+
+/* Then the character class tables. Don't try to be clever and save effort on
 exclusive ones - in some locales things may be different.
- 
+
 Note that the table for "space" includes everything "isspace" gives, including
 VT in the default locale. This makes it work for the POSIX class [:space:].
 From release 8.34 is is also correct for Perl space, because Perl added VT at
@@ -110,47 +110,47 @@ being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
 fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
 test for alnum specially. */
 
-memset(p, 0, cbit_length); 
-for (i = 0; i < 256; i++) 
-  { 
-  if (isdigit(i)) p[cbit_digit  + i/8] |= 1 << (i&7); 
-  if (isupper(i)) p[cbit_upper  + i/8] |= 1 << (i&7); 
-  if (islower(i)) p[cbit_lower  + i/8] |= 1 << (i&7); 
-  if (isalnum(i)) p[cbit_word   + i/8] |= 1 << (i&7); 
-  if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7); 
-  if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7); 
-  if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); 
-  if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7); 
-  if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7); 
-  if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7); 
-  if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7); 
-  } 
-p += cbit_length; 
- 
+memset(p, 0, cbit_length);
+for (i = 0; i < 256; i++)
+  {
+  if (isdigit(i)) p[cbit_digit  + i/8] |= 1 << (i&7);
+  if (isupper(i)) p[cbit_upper  + i/8] |= 1 << (i&7);
+  if (islower(i)) p[cbit_lower  + i/8] |= 1 << (i&7);
+  if (isalnum(i)) p[cbit_word   + i/8] |= 1 << (i&7);
+  if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7);
+  if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7);
+  if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
+  if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7);
+  if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7);
+  if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7);
+  if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7);
+  }
+p += cbit_length;
+
 /* Finally, the character type table. In this, we used to exclude VT from the
 white space chars, because Perl didn't recognize it as such for \s and for
 comments within regexes. However, Perl changed at release 5.18, so PCRE changed
 at release 8.34. */
- 
-for (i = 0; i < 256; i++) 
-  { 
-  int x = 0; 
+
+for (i = 0; i < 256; i++)
+  {
+  int x = 0;
   if (isspace(i)) x += ctype_space;
-  if (isalpha(i)) x += ctype_letter; 
-  if (isdigit(i)) x += ctype_digit; 
-  if (isxdigit(i)) x += ctype_xdigit; 
-  if (isalnum(i) || i == '_') x += ctype_word; 
- 
-  /* Note: strchr includes the terminating zero in the characters it considers. 
-  In this instance, that is ok because we want binary zero to be flagged as a 
-  meta-character, which in this sense is any character that terminates a run 
-  of data characters. */ 
- 
-  if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; 
-  *p++ = x; 
-  } 
- 
-return yield; 
-} 
- 
-/* End of pcre_maketables.c */ 
+  if (isalpha(i)) x += ctype_letter;
+  if (isdigit(i)) x += ctype_digit;
+  if (isxdigit(i)) x += ctype_xdigit;
+  if (isalnum(i) || i == '_') x += ctype_word;
+
+  /* Note: strchr includes the terminating zero in the characters it considers.
+  In this instance, that is ok because we want binary zero to be flagged as a
+  meta-character, which in this sense is any character that terminates a run
+  of data characters. */
+
+  if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
+  *p++ = x;
+  }
+
+return yield;
+}
+
+/* End of pcre_maketables.c */
diff --git a/contrib/libs/pcre/pcre_newline.c b/contrib/libs/pcre/pcre_newline.c
index c8b5e374ae..252cad9c9e 100644
--- a/contrib/libs/pcre/pcre_newline.c
+++ b/contrib/libs/pcre/pcre_newline.c
@@ -1,81 +1,81 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains internal functions for testing newlines when more than 
-one kind of newline is to be recognized. When a newline is found, its length is 
-returned. In principle, we could implement several newline "types", each 
-referring to a different set of newline characters. At present, PCRE supports 
-only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, 
-and NLTYPE_ANY. The full list of Unicode newline characters is taken from 
-http://unicode.org/unicode/reports/tr18/. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains internal functions for testing newlines when more than
+one kind of newline is to be recognized. When a newline is found, its length is
+returned. In principle, we could implement several newline "types", each
+referring to a different set of newline characters. At present, PCRE supports
+only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
+and NLTYPE_ANY. The full list of Unicode newline characters is taken from
+http://unicode.org/unicode/reports/tr18/. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
- 
-/************************************************* 
-*      Check for newline at given position       * 
-*************************************************/ 
- 
-/* It is guaranteed that the initial value of ptr is less than the end of the 
-string that is being processed. 
- 
-Arguments: 
-  ptr          pointer to possible newline 
-  type         the newline type 
-  endptr       pointer to the end of the string 
-  lenptr       where to return the length 
+#endif
+
+#include "pcre_internal.h"
+
+
+
+/*************************************************
+*      Check for newline at given position       *
+*************************************************/
+
+/* It is guaranteed that the initial value of ptr is less than the end of the
+string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  type         the newline type
+  endptr       pointer to the end of the string
+  lenptr       where to return the length
   utf          TRUE if in utf mode
- 
-Returns:       TRUE or FALSE 
-*/ 
- 
-BOOL 
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
 PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
   BOOL utf)
-{ 
+{
 pcre_uint32 c;
 (void)utf;
 #ifdef SUPPORT_UTF
@@ -86,21 +86,21 @@ if (utf)
 else
 #endif  /* SUPPORT_UTF */
   c = *ptr;
- 
+
 /* Note that this function is called only for ANY or ANYCRLF. */
 
-if (type == NLTYPE_ANYCRLF) switch(c) 
-  { 
+if (type == NLTYPE_ANYCRLF) switch(c)
+  {
   case CHAR_LF: *lenptr = 1; return TRUE;
   case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
                return TRUE;
-  default: return FALSE; 
-  } 
- 
-/* NLTYPE_ANY */ 
- 
-else switch(c) 
-  { 
+  default: return FALSE;
+  }
+
+/* NLTYPE_ANY */
+
+else switch(c)
+  {
 #ifdef EBCDIC
   case CHAR_NEL:
 #endif
@@ -115,8 +115,8 @@ else switch(c)
 #ifndef EBCDIC
 #ifdef COMPILE_PCRE8
   case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
-  case 0x2028:                                       /* LS */ 
-  case 0x2029: *lenptr = 3; return TRUE;             /* PS */ 
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
 #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
   case CHAR_NEL:
   case 0x2028:                                       /* LS */
@@ -124,62 +124,62 @@ else switch(c)
 #endif  /* COMPILE_PCRE8 */
 #endif  /* Not EBCDIC */
 
-  default: return FALSE; 
-  } 
-} 
- 
- 
- 
-/************************************************* 
-*     Check for newline at previous position     * 
-*************************************************/ 
- 
-/* It is guaranteed that the initial value of ptr is greater than the start of 
-the string that is being processed. 
- 
-Arguments: 
-  ptr          pointer to possible newline 
-  type         the newline type 
-  startptr     pointer to the start of the string 
-  lenptr       where to return the length 
+  default: return FALSE;
+  }
+}
+
+
+
+/*************************************************
+*     Check for newline at previous position     *
+*************************************************/
+
+/* It is guaranteed that the initial value of ptr is greater than the start of
+the string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  type         the newline type
+  startptr     pointer to the start of the string
+  lenptr       where to return the length
   utf          TRUE if in utf mode
- 
-Returns:       TRUE or FALSE 
-*/ 
- 
-BOOL 
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
 PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
   BOOL utf)
-{ 
+{
 pcre_uint32 c;
 (void)utf;
-ptr--; 
+ptr--;
 #ifdef SUPPORT_UTF
 if (utf)
-  { 
-  BACKCHAR(ptr); 
-  GETCHAR(c, ptr); 
-  } 
+  {
+  BACKCHAR(ptr);
+  GETCHAR(c, ptr);
+  }
 else
 #endif  /* SUPPORT_UTF */
   c = *ptr;
- 
+
 /* Note that this function is called only for ANY or ANYCRLF. */
 
-if (type == NLTYPE_ANYCRLF) switch(c) 
-  { 
+if (type == NLTYPE_ANYCRLF) switch(c)
+  {
   case CHAR_LF:
   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
   return TRUE;
 
   case CHAR_CR: *lenptr = 1; return TRUE;
-  default: return FALSE; 
-  } 
- 
+  default: return FALSE;
+  }
+
 /* NLTYPE_ANY */
 
-else switch(c) 
-  { 
+else switch(c)
+  {
   case CHAR_LF:
   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
   return TRUE;
@@ -203,8 +203,8 @@ else switch(c)
 #endif  /* COMPILE_PCRE8 */
 #endif  /* NotEBCDIC */
 
-  default: return FALSE; 
-  } 
-} 
- 
-/* End of pcre_newline.c */ 
+  default: return FALSE;
+  }
+}
+
+/* End of pcre_newline.c */
diff --git a/contrib/libs/pcre/pcre_ord2utf8.c b/contrib/libs/pcre/pcre_ord2utf8.c
index 827a9fa22a..e608a29a30 100644
--- a/contrib/libs/pcre/pcre_ord2utf8.c
+++ b/contrib/libs/pcre/pcre_ord2utf8.c
@@ -1,94 +1,94 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This file contains a private PCRE function that converts an ordinal 
-character value into a UTF8 string. */ 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This file contains a private PCRE function that converts an ordinal
+character value into a UTF8 string. */
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
+#endif
+
 #define COMPILE_PCRE8
 
-#include "pcre_internal.h" 
- 
-/************************************************* 
-*       Convert character value to UTF-8         * 
-*************************************************/ 
- 
+#include "pcre_internal.h"
+
+/*************************************************
+*       Convert character value to UTF-8         *
+*************************************************/
+
 /* This function takes an integer value in the range 0 - 0x10ffff
 and encodes it as a UTF-8 character in 1 to 4 pcre_uchars.
- 
-Arguments: 
-  cvalue     the character value 
+
+Arguments:
+  cvalue     the character value
   buffer     pointer to buffer for result - at least 6 pcre_uchars long
- 
-Returns:     number of characters placed in the buffer 
-*/ 
- 
+
+Returns:     number of characters placed in the buffer
+*/
+
 unsigned
-int 
+int
 PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
-{ 
+{
 #ifdef SUPPORT_UTF
 
-register int i, j; 
+register int i, j;
 
 for (i = 0; i < PRIV(utf8_table1_size); i++)
   if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
-buffer += i; 
-for (j = i; j > 0; j--) 
- { 
- *buffer-- = 0x80 | (cvalue & 0x3f); 
- cvalue >>= 6; 
- } 
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
 *buffer = PRIV(utf8_table2)[i] | cvalue;
-return i + 1; 
+return i + 1;
 
-#else 
+#else
 
 (void)(cvalue);  /* Keep compiler happy; this function won't ever be */
 (void)(buffer);  /* called when SUPPORT_UTF is not defined. */
 return 0;
 
 #endif
-} 
- 
-/* End of pcre_ord2utf8.c */ 
+}
+
+/* End of pcre_ord2utf8.c */
diff --git a/contrib/libs/pcre/pcre_refcount.c b/contrib/libs/pcre/pcre_refcount.c
index 5de3422a14..65a3c23a8f 100644
--- a/contrib/libs/pcre/pcre_refcount.c
+++ b/contrib/libs/pcre/pcre_refcount.c
@@ -1,76 +1,76 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_refcount(), which is an 
-auxiliary function that can be used to maintain a reference count in a compiled 
-pattern data block. This might be helpful in applications where the block is 
-shared by different users. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_refcount(), which is an
+auxiliary function that can be used to maintain a reference count in a compiled
+pattern data block. This might be helpful in applications where the block is
+shared by different users. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-*           Maintain reference count             * 
-*************************************************/ 
- 
-/* The reference count is a 16-bit field, initialized to zero. It is not 
-possible to transfer a non-zero count from one host to a different host that 
-has a different byte order - though I can't see why anyone in their right mind 
-would ever want to do that! 
- 
-Arguments: 
-  argument_re   points to compiled code 
-  adjust        value to add to the count 
- 
-Returns:        the (possibly updated) count value (a non-negative number), or 
-                a negative error number 
-*/ 
- 
+#endif
+
+#include "pcre_internal.h"
+
+
+/*************************************************
+*           Maintain reference count             *
+*************************************************/
+
+/* The reference count is a 16-bit field, initialized to zero. It is not
+possible to transfer a non-zero count from one host to a different host that
+has a different byte order - though I can't see why anyone in their right mind
+would ever want to do that!
+
+Arguments:
+  argument_re   points to compiled code
+  adjust        value to add to the count
+
+Returns:        the (possibly updated) count value (a non-negative number), or
+                a negative error number
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_refcount(pcre *argument_re, int adjust) 
+pcre_refcount(pcre *argument_re, int adjust)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre16_refcount(pcre16 *argument_re, int adjust)
@@ -78,15 +78,15 @@ pcre16_refcount(pcre16 *argument_re, int adjust)
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre32_refcount(pcre32 *argument_re, int adjust)
 #endif
-{ 
+{
 REAL_PCRE *re = (REAL_PCRE *)argument_re;
-if (re == NULL) return PCRE_ERROR_NULL; 
+if (re == NULL) return PCRE_ERROR_NULL;
 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-re->ref_count = (-adjust > re->ref_count)? 0 : 
-                (adjust + re->ref_count > 65535)? 65535 : 
-                re->ref_count + adjust; 
-return re->ref_count; 
-} 
- 
-/* End of pcre_refcount.c */ 
+re->ref_count = (-adjust > re->ref_count)? 0 :
+                (adjust + re->ref_count > 65535)? 65535 :
+                re->ref_count + adjust;
+return re->ref_count;
+}
+
+/* End of pcre_refcount.c */
diff --git a/contrib/libs/pcre/pcre_study.c b/contrib/libs/pcre/pcre_study.c
index d4ee0295d4..b6088fe882 100644
--- a/contrib/libs/pcre/pcre_study.c
+++ b/contrib/libs/pcre/pcre_study.c
@@ -1,62 +1,62 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_study(), along with local 
-supporting functions. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_study(), along with local
+supporting functions. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
+#endif
+
+#include "pcre_internal.h"
+
 #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
- 
-/* Returns from set_start_bits() */ 
- 
+
+/* Returns from set_start_bits() */
+
 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
- 
- 
 
-/************************************************* 
+
+
+/*************************************************
 *   Find the minimum subject length for a group  *
 *************************************************/
 
@@ -613,24 +613,24 @@ for (;;)
 
 
 /*************************************************
-*      Set a bit and maybe its alternate case    * 
-*************************************************/ 
- 
+*      Set a bit and maybe its alternate case    *
+*************************************************/
+
 /* Given a character, set its first byte's bit in the table, and also the
 corresponding bit for the other version of a letter if we are caseless. In
 UTF-8 mode, for characters greater than 127, we can only do the caseless thing
 when Unicode property support is available.
- 
-Arguments: 
-  start_bits    points to the bit map 
+
+Arguments:
+  start_bits    points to the bit map
   p             points to the character
-  caseless      the caseless flag 
-  cd            the block with char table pointers 
+  caseless      the caseless flag
+  cd            the block with char table pointers
   utf           TRUE for UTF-8 / UTF-16 / UTF-32 mode
- 
+
 Returns:        pointer after the character
-*/ 
- 
+*/
+
 static const pcre_uchar *
 set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
   compile_data *cd, BOOL utf)
@@ -719,10 +719,10 @@ Arguments:
 Returns:         nothing
 */
 
-static void 
+static void
 set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
   compile_data *cd)
-{ 
+{
 register pcre_uint32 c;
 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
@@ -737,13 +737,13 @@ for (c = 128; c < 256; c++)
     }
   }
 #endif
-} 
- 
- 
+}
+
+
 /*************************************************
 *     Set bits for a negative character type     *
 *************************************************/
- 
+
 /* This function sets starting bits for a negative character type such as \D.
 In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
 otherwise there can be confusion with bytes in the middle of UTF-8 characters.
@@ -774,78 +774,78 @@ if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
 
 
 
-/************************************************* 
-*          Create bitmap of starting bytes       * 
-*************************************************/ 
- 
-/* This function scans a compiled unanchored expression recursively and 
-attempts to build a bitmap of the set of possible starting bytes. As time goes 
-by, we may be able to get more clever at doing this. The SSB_CONTINUE return is 
-useful for parenthesized groups in patterns such as (a*)b where the group 
-provides some optional starting bytes but scanning must continue at the outer 
-level to find at least one mandatory byte. At the outermost level, this 
-function fails unless the result is SSB_DONE. 
- 
-Arguments: 
-  code         points to an expression 
-  start_bits   points to a 32-byte table, initialized to 0 
+/*************************************************
+*          Create bitmap of starting bytes       *
+*************************************************/
+
+/* This function scans a compiled unanchored expression recursively and
+attempts to build a bitmap of the set of possible starting bytes. As time goes
+by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
+useful for parenthesized groups in patterns such as (a*)b where the group
+provides some optional starting bytes but scanning must continue at the outer
+level to find at least one mandatory byte. At the outermost level, this
+function fails unless the result is SSB_DONE.
+
+Arguments:
+  code         points to an expression
+  start_bits   points to a 32-byte table, initialized to 0
   utf          TRUE if in UTF-8 / UTF-16 / UTF-32 mode
-  cd           the block with char table pointers 
- 
-Returns:       SSB_FAIL     => Failed to find any starting bytes 
-               SSB_DONE     => Found mandatory starting bytes 
-               SSB_CONTINUE => Found optional starting bytes 
+  cd           the block with char table pointers
+
+Returns:       SSB_FAIL     => Failed to find any starting bytes
+               SSB_DONE     => Found mandatory starting bytes
+               SSB_CONTINUE => Found optional starting bytes
                SSB_UNKNOWN  => Hit an unrecognized opcode
-*/ 
- 
-static int 
+*/
+
+static int
 set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
   compile_data *cd)
-{ 
+{
 register pcre_uint32 c;
-int yield = SSB_DONE; 
+int yield = SSB_DONE;
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
 int table_limit = utf? 16:32;
 #else
 int table_limit = 32;
 #endif
- 
-#if 0 
-/* ========================================================================= */ 
-/* The following comment and code was inserted in January 1999. In May 2006, 
-when it was observed to cause compiler warnings about unused values, I took it 
-out again. If anybody is still using OS/2, they will have to put it back 
-manually. */ 
- 
-/* This next statement and the later reference to dummy are here in order to 
-trick the optimizer of the IBM C compiler for OS/2 into generating correct 
-code. Apparently IBM isn't going to fix the problem, and we would rather not 
-disable optimization (in this module it actually makes a big difference, and 
-the pcre module can use all the optimization it can get). */ 
- 
-volatile int dummy; 
-/* ========================================================================= */ 
-#endif 
- 
-do 
-  { 
-  BOOL try_next = TRUE; 
+
+#if 0
+/* ========================================================================= */
+/* The following comment and code was inserted in January 1999. In May 2006,
+when it was observed to cause compiler warnings about unused values, I took it
+out again. If anybody is still using OS/2, they will have to put it back
+manually. */
+
+/* This next statement and the later reference to dummy are here in order to
+trick the optimizer of the IBM C compiler for OS/2 into generating correct
+code. Apparently IBM isn't going to fix the problem, and we would rather not
+disable optimization (in this module it actually makes a big difference, and
+the pcre module can use all the optimization it can get). */
+
+volatile int dummy;
+/* ========================================================================= */
+#endif
+
+do
+  {
+  BOOL try_next = TRUE;
   const pcre_uchar *tcode = code + 1 + LINK_SIZE;
- 
+
   if (*code == OP_CBRA || *code == OP_SCBRA ||
       *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
 
-  while (try_next)    /* Loop for items in this branch */ 
-    { 
-    int rc; 
+  while (try_next)    /* Loop for items in this branch */
+    {
+    int rc;
 
-    switch(*tcode) 
-      { 
+    switch(*tcode)
+      {
       /* If we reach something we don't understand, it means a new opcode has
       been created that hasn't been added to this code. Hopefully this problem
       will be discovered during testing. */
- 
-      default: 
+
+      default:
       return SSB_UNKNOWN;
 
       /* Fail for a valid opcode that implies no starting bits. */
@@ -920,8 +920,8 @@ do
       case OP_SOM:
       case OP_THEN:
       case OP_THEN_ARG:
-      return SSB_FAIL; 
- 
+      return SSB_FAIL;
+
       /* A "real" property test implies no starting bits, but the fake property
       PT_CLIST identifies a list of characters. These lists are short, as they
       are used for characters with more than one "other case", so there is no
@@ -954,80 +954,80 @@ do
       tcode++;
       break;
 
-      /* If we hit a bracket or a positive lookahead assertion, recurse to set 
-      bits from within the subpattern. If it can't find anything, we have to 
-      give up. If it finds some mandatory character(s), we are done for this 
-      branch. Otherwise, carry on scanning after the subpattern. */ 
- 
-      case OP_BRA: 
-      case OP_SBRA: 
-      case OP_CBRA: 
-      case OP_SCBRA: 
+      /* If we hit a bracket or a positive lookahead assertion, recurse to set
+      bits from within the subpattern. If it can't find anything, we have to
+      give up. If it finds some mandatory character(s), we are done for this
+      branch. Otherwise, carry on scanning after the subpattern. */
+
+      case OP_BRA:
+      case OP_SBRA:
+      case OP_CBRA:
+      case OP_SCBRA:
       case OP_BRAPOS:
       case OP_SBRAPOS:
       case OP_CBRAPOS:
       case OP_SCBRAPOS:
-      case OP_ONCE: 
+      case OP_ONCE:
       case OP_ONCE_NC:
-      case OP_ASSERT: 
+      case OP_ASSERT:
       rc = set_start_bits(tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
-      if (rc == SSB_DONE) try_next = FALSE; else 
-        { 
-        do tcode += GET(tcode, 1); while (*tcode == OP_ALT); 
-        tcode += 1 + LINK_SIZE; 
-        } 
-      break; 
- 
-      /* If we hit ALT or KET, it means we haven't found anything mandatory in 
-      this branch, though we might have found something optional. For ALT, we 
-      continue with the next alternative, but we have to arrange that the final 
-      result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, 
-      return SSB_CONTINUE: if this is the top level, that indicates failure, 
-      but after a nested subpattern, it causes scanning to continue. */ 
- 
-      case OP_ALT: 
-      yield = SSB_CONTINUE; 
-      try_next = FALSE; 
-      break; 
- 
-      case OP_KET: 
-      case OP_KETRMAX: 
-      case OP_KETRMIN: 
+      if (rc == SSB_DONE) try_next = FALSE; else
+        {
+        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
+        tcode += 1 + LINK_SIZE;
+        }
+      break;
+
+      /* If we hit ALT or KET, it means we haven't found anything mandatory in
+      this branch, though we might have found something optional. For ALT, we
+      continue with the next alternative, but we have to arrange that the final
+      result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
+      return SSB_CONTINUE: if this is the top level, that indicates failure,
+      but after a nested subpattern, it causes scanning to continue. */
+
+      case OP_ALT:
+      yield = SSB_CONTINUE;
+      try_next = FALSE;
+      break;
+
+      case OP_KET:
+      case OP_KETRMAX:
+      case OP_KETRMIN:
       case OP_KETRPOS:
-      return SSB_CONTINUE; 
- 
-      /* Skip over callout */ 
- 
-      case OP_CALLOUT: 
-      tcode += 2 + 2*LINK_SIZE; 
-      break; 
- 
-      /* Skip over lookbehind and negative lookahead assertions */ 
- 
-      case OP_ASSERT_NOT: 
-      case OP_ASSERTBACK: 
-      case OP_ASSERTBACK_NOT: 
-      do tcode += GET(tcode, 1); while (*tcode == OP_ALT); 
-      tcode += 1 + LINK_SIZE; 
-      break; 
- 
-      /* BRAZERO does the bracket, but carries on. */ 
- 
-      case OP_BRAZERO: 
-      case OP_BRAMINZERO: 
+      return SSB_CONTINUE;
+
+      /* Skip over callout */
+
+      case OP_CALLOUT:
+      tcode += 2 + 2*LINK_SIZE;
+      break;
+
+      /* Skip over lookbehind and negative lookahead assertions */
+
+      case OP_ASSERT_NOT:
+      case OP_ASSERTBACK:
+      case OP_ASSERTBACK_NOT:
+      do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
+      tcode += 1 + LINK_SIZE;
+      break;
+
+      /* BRAZERO does the bracket, but carries on. */
+
+      case OP_BRAZERO:
+      case OP_BRAMINZERO:
       case OP_BRAPOSZERO:
       rc = set_start_bits(++tcode, start_bits, utf, cd);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
-/* ========================================================================= 
-      See the comment at the head of this function concerning the next line, 
-      which was an old fudge for the benefit of OS/2. 
-      dummy = 1; 
-  ========================================================================= */ 
-      do tcode += GET(tcode,1); while (*tcode == OP_ALT); 
-      tcode += 1 + LINK_SIZE; 
-      break; 
- 
+/* =========================================================================
+      See the comment at the head of this function concerning the next line,
+      which was an old fudge for the benefit of OS/2.
+      dummy = 1;
+  ========================================================================= */
+      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
+      tcode += 1 + LINK_SIZE;
+      break;
+
       /* SKIPZERO skips the bracket. */
 
       case OP_SKIPZERO:
@@ -1036,17 +1036,17 @@ do
       tcode += 1 + LINK_SIZE;
       break;
 
-      /* Single-char * or ? sets the bit and tries the next item */ 
- 
-      case OP_STAR: 
-      case OP_MINSTAR: 
-      case OP_POSSTAR: 
-      case OP_QUERY: 
-      case OP_MINQUERY: 
-      case OP_POSQUERY: 
+      /* Single-char * or ? sets the bit and tries the next item */
+
+      case OP_STAR:
+      case OP_MINSTAR:
+      case OP_POSSTAR:
+      case OP_QUERY:
+      case OP_MINQUERY:
+      case OP_POSQUERY:
       tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
-      break; 
- 
+      break;
+
       case OP_STARI:
       case OP_MINSTARI:
       case OP_POSSTARI:
@@ -1056,33 +1056,33 @@ do
       tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       break;
 
-      /* Single-char upto sets the bit and tries the next */ 
- 
-      case OP_UPTO: 
-      case OP_MINUPTO: 
-      case OP_POSUPTO: 
+      /* Single-char upto sets the bit and tries the next */
+
+      case OP_UPTO:
+      case OP_MINUPTO:
+      case OP_POSUPTO:
       tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
-      break; 
- 
+      break;
+
       case OP_UPTOI:
       case OP_MINUPTOI:
       case OP_POSUPTOI:
       tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
       break;
 
-      /* At least one single char sets the bit and stops */ 
- 
+      /* At least one single char sets the bit and stops */
+
       case OP_EXACT:
       tcode += IMM2_SIZE;
       /* Fall through */
-      case OP_CHAR: 
-      case OP_PLUS: 
-      case OP_MINPLUS: 
-      case OP_POSPLUS: 
+      case OP_CHAR:
+      case OP_PLUS:
+      case OP_MINPLUS:
+      case OP_POSPLUS:
       (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
-      try_next = FALSE; 
-      break; 
- 
+      try_next = FALSE;
+      break;
+
       case OP_EXACTI:
       tcode += IMM2_SIZE;
       /* Fall through */
@@ -1093,7 +1093,7 @@ do
       (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
       try_next = FALSE;
       break;
- 
+
       /* Special spacing and line-terminating items. These recognize specific
       lists of characters. The difference between VSPACE and ANYNL is that the
       latter can match the two-character CRLF sequence, but that is not
@@ -1162,74 +1162,74 @@ do
       properties. Therefore, these apply in the case when only characters less
       than 256 are recognized to match the types. */
 
-      case OP_NOT_DIGIT: 
+      case OP_NOT_DIGIT:
       set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
-      try_next = FALSE; 
-      break; 
- 
-      case OP_DIGIT: 
+      try_next = FALSE;
+      break;
+
+      case OP_DIGIT:
       set_type_bits(start_bits, cbit_digit, table_limit, cd);
-      try_next = FALSE; 
-      break; 
- 
+      try_next = FALSE;
+      break;
+
       /* The cbit_space table has vertical tab as whitespace; we no longer
       have to play fancy tricks because Perl added VT to its whitespace at
       release 5.18. PCRE added it at release 8.34. */
- 
-      case OP_NOT_WHITESPACE: 
+
+      case OP_NOT_WHITESPACE:
       set_nottype_bits(start_bits, cbit_space, table_limit, cd);
-      try_next = FALSE; 
-      break; 
- 
-      case OP_WHITESPACE: 
+      try_next = FALSE;
+      break;
+
+      case OP_WHITESPACE:
       set_type_bits(start_bits, cbit_space, table_limit, cd);
-      try_next = FALSE; 
-      break; 
- 
-      case OP_NOT_WORDCHAR: 
+      try_next = FALSE;
+      break;
+
+      case OP_NOT_WORDCHAR:
       set_nottype_bits(start_bits, cbit_word, table_limit, cd);
-      try_next = FALSE; 
-      break; 
- 
-      case OP_WORDCHAR: 
+      try_next = FALSE;
+      break;
+
+      case OP_WORDCHAR:
       set_type_bits(start_bits, cbit_word, table_limit, cd);
-      try_next = FALSE; 
-      break; 
- 
-      /* One or more character type fudges the pointer and restarts, knowing 
-      it will hit a single character type and stop there. */ 
- 
-      case OP_TYPEPLUS: 
-      case OP_TYPEMINPLUS: 
+      try_next = FALSE;
+      break;
+
+      /* One or more character type fudges the pointer and restarts, knowing
+      it will hit a single character type and stop there. */
+
+      case OP_TYPEPLUS:
+      case OP_TYPEMINPLUS:
       case OP_TYPEPOSPLUS:
-      tcode++; 
-      break; 
- 
-      case OP_TYPEEXACT: 
+      tcode++;
+      break;
+
+      case OP_TYPEEXACT:
       tcode += 1 + IMM2_SIZE;
-      break; 
- 
-      /* Zero or more repeats of character types set the bits and then 
-      try again. */ 
- 
-      case OP_TYPEUPTO: 
-      case OP_TYPEMINUPTO: 
-      case OP_TYPEPOSUPTO: 
+      break;
+
+      /* Zero or more repeats of character types set the bits and then
+      try again. */
+
+      case OP_TYPEUPTO:
+      case OP_TYPEMINUPTO:
+      case OP_TYPEPOSUPTO:
       tcode += IMM2_SIZE;  /* Fall through */
- 
-      case OP_TYPESTAR: 
-      case OP_TYPEMINSTAR: 
-      case OP_TYPEPOSSTAR: 
-      case OP_TYPEQUERY: 
-      case OP_TYPEMINQUERY: 
-      case OP_TYPEPOSQUERY: 
-      switch(tcode[1]) 
-        { 
+
+      case OP_TYPESTAR:
+      case OP_TYPEMINSTAR:
+      case OP_TYPEPOSSTAR:
+      case OP_TYPEQUERY:
+      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSQUERY:
+      switch(tcode[1])
+        {
         default:
-        case OP_ANY: 
+        case OP_ANY:
         case OP_ALLANY:
-        return SSB_FAIL; 
- 
+        return SSB_FAIL;
+
         case OP_HSPACE:
         SET_BIT(CHAR_HT);
         SET_BIT(CHAR_SPACE);
@@ -1275,44 +1275,44 @@ do
           SET_BIT(CHAR_NEL);
         break;
 
-        case OP_NOT_DIGIT: 
+        case OP_NOT_DIGIT:
         set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
-        break; 
- 
-        case OP_DIGIT: 
+        break;
+
+        case OP_DIGIT:
         set_type_bits(start_bits, cbit_digit, table_limit, cd);
-        break; 
- 
+        break;
+
         /* The cbit_space table has vertical tab as whitespace; we no longer
         have to play fancy tricks because Perl added VT to its whitespace at
         release 5.18. PCRE added it at release 8.34. */
- 
-        case OP_NOT_WHITESPACE: 
+
+        case OP_NOT_WHITESPACE:
         set_nottype_bits(start_bits, cbit_space, table_limit, cd);
-        break; 
- 
-        case OP_WHITESPACE: 
+        break;
+
+        case OP_WHITESPACE:
         set_type_bits(start_bits, cbit_space, table_limit, cd);
-        break; 
- 
-        case OP_NOT_WORDCHAR: 
+        break;
+
+        case OP_NOT_WORDCHAR:
         set_nottype_bits(start_bits, cbit_word, table_limit, cd);
-        break; 
- 
-        case OP_WORDCHAR: 
+        break;
+
+        case OP_WORDCHAR:
         set_type_bits(start_bits, cbit_word, table_limit, cd);
-        break; 
-        } 
- 
-      tcode += 2; 
-      break; 
- 
-      /* Character class where all the information is in a bit map: set the 
-      bits and either carry on or not, according to the repeat count. If it was 
-      a negative class, and we are operating with UTF-8 characters, any byte 
-      with a value >= 0xc4 is a potentially valid starter because it starts a 
-      character with a value > 255. */ 
- 
+        break;
+        }
+
+      tcode += 2;
+      break;
+
+      /* Character class where all the information is in a bit map: set the
+      bits and either carry on or not, according to the repeat count. If it was
+      a negative class, and we are operating with UTF-8 characters, any byte
+      with a value >= 0xc4 is a potentially valid starter because it starts a
+      character with a value > 255. */
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       case OP_XCLASS:
       if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
@@ -1323,21 +1323,21 @@ do
 #endif
       /* Fall through */
 
-      case OP_NCLASS: 
+      case OP_NCLASS:
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
       if (utf)
-        { 
-        start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */ 
-        memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */ 
-        } 
-#endif 
+        {
+        start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
+        memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
+        }
+#endif
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
       SET_BIT(0xFF);                         /* For characters > 255 */
 #endif
-      /* Fall through */ 
- 
-      case OP_CLASS: 
-        { 
+      /* Fall through */
+
+      case OP_CLASS:
+        {
         pcre_uint8 *map;
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
         map = NULL;
@@ -1354,102 +1354,102 @@ do
           map = (pcre_uint8 *)tcode;
           tcode += 32 / sizeof(pcre_uchar);
           }
- 
-        /* In UTF-8 mode, the bits in a bit map correspond to character 
-        values, not to byte values. However, the bit map we are constructing is 
-        for byte values. So we have to do a conversion for characters whose 
-        value is > 127. In fact, there are only two possible starting bytes for 
-        characters in the range 128 - 255. */ 
- 
+
+        /* In UTF-8 mode, the bits in a bit map correspond to character
+        values, not to byte values. However, the bit map we are constructing is
+        for byte values. So we have to do a conversion for characters whose
+        value is > 127. In fact, there are only two possible starting bytes for
+        characters in the range 128 - 255. */
+
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
         if (map != NULL)
 #endif
-          { 
+          {
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
           if (utf)
-            { 
+            {
             for (c = 0; c < 16; c++) start_bits[c] |= map[c];
             for (c = 128; c < 256; c++)
-              { 
+              {
               if ((map[c/8] & (1 << (c&7))) != 0)
                 {
                 int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
                 start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
                 c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
                 }
-              } 
-            } 
+              }
+            }
           else
-#endif 
+#endif
             {
             /* In non-UTF-8 mode, the two bit maps are completely compatible. */
             for (c = 0; c < 32; c++) start_bits[c] |= map[c];
             }
-          } 
- 
+          }
+
         /* Advance past the bit map, and act on what follows. For a zero
         minimum repeat, continue; otherwise stop processing. */
- 
-        switch (*tcode) 
-          { 
-          case OP_CRSTAR: 
-          case OP_CRMINSTAR: 
-          case OP_CRQUERY: 
-          case OP_CRMINQUERY: 
+
+        switch (*tcode)
+          {
+          case OP_CRSTAR:
+          case OP_CRMINSTAR:
+          case OP_CRQUERY:
+          case OP_CRMINQUERY:
           case OP_CRPOSSTAR:
           case OP_CRPOSQUERY:
-          tcode++; 
-          break; 
- 
-          case OP_CRRANGE: 
-          case OP_CRMINRANGE: 
+          tcode++;
+          break;
+
+          case OP_CRRANGE:
+          case OP_CRMINRANGE:
           case OP_CRPOSRANGE:
           if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
-            else try_next = FALSE; 
-          break; 
- 
-          default: 
-          try_next = FALSE; 
-          break; 
-          } 
-        } 
-      break; /* End of bitmap class handling */ 
- 
-      }      /* End of switch */ 
-    }        /* End of try_next loop */ 
- 
-  code += GET(code, 1);   /* Advance to next branch */ 
-  } 
-while (*code == OP_ALT); 
-return yield; 
-} 
- 
- 
- 
-
-
-/************************************************* 
-*          Study a compiled expression           * 
-*************************************************/ 
- 
-/* This function is handed a compiled expression that it must study to produce 
+            else try_next = FALSE;
+          break;
+
+          default:
+          try_next = FALSE;
+          break;
+          }
+        }
+      break; /* End of bitmap class handling */
+
+      }      /* End of switch */
+    }        /* End of try_next loop */
+
+  code += GET(code, 1);   /* Advance to next branch */
+  }
+while (*code == OP_ALT);
+return yield;
+}
+
+
+
+
+
+/*************************************************
+*          Study a compiled expression           *
+*************************************************/
+
+/* This function is handed a compiled expression that it must study to produce
 information that will speed up the matching. It returns a pcre[16]_extra block
-which then gets handed back to pcre_exec(). 
- 
-Arguments: 
-  re        points to the compiled expression 
-  options   contains option bits 
-  errorptr  points to where to place error messages; 
-            set NULL unless error 
- 
+which then gets handed back to pcre_exec().
+
+Arguments:
+  re        points to the compiled expression
+  options   contains option bits
+  errorptr  points to where to place error messages;
+            set NULL unless error
+
 Returns:    pointer to a pcre[16]_extra block, with study_data filled in and
               the appropriate flags set;
-            NULL on error or if no optimization possible 
-*/ 
- 
+            NULL on error or if no optimization possible
+*/
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
-pcre_study(const pcre *external_re, int options, const char **errorptr) 
+pcre_study(const pcre *external_re, int options, const char **errorptr)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
 pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
@@ -1457,27 +1457,27 @@ pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
 PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
 pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
 #endif
-{ 
+{
 int min;
 int count = 0;
 BOOL bits_set = FALSE;
 pcre_uint8 start_bits[32];
 PUBL(extra) *extra = NULL;
-pcre_study_data *study; 
+pcre_study_data *study;
 const pcre_uint8 *tables;
 pcre_uchar *code;
-compile_data compile_block; 
+compile_data compile_block;
 const REAL_PCRE *re = (const REAL_PCRE *)external_re;
- 
-
-*errorptr = NULL; 
- 
-if (re == NULL || re->magic_number != MAGIC_NUMBER) 
-  { 
-  *errorptr = "argument is not a compiled regular expression"; 
-  return NULL; 
-  } 
- 
+
+
+*errorptr = NULL;
+
+if (re == NULL || re->magic_number != MAGIC_NUMBER)
+  {
+  *errorptr = "argument is not a compiled regular expression";
+  return NULL;
+  }
+
 if ((re->flags & PCRE_MODE) == 0)
   {
 #if defined COMPILE_PCRE8
@@ -1490,28 +1490,28 @@ if ((re->flags & PCRE_MODE) == 0)
   return NULL;
   }
 
-if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) 
-  { 
-  *errorptr = "unknown or incorrect option bit(s) set"; 
-  return NULL; 
-  } 
- 
+if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
+  {
+  *errorptr = "unknown or incorrect option bit(s) set";
+  return NULL;
+  }
+
 code = (pcre_uchar *)re + re->name_table_offset +
-  (re->name_count * re->name_entry_size); 
- 
-/* For an anchored pattern, or an unanchored pattern that has a first char, or 
+  (re->name_count * re->name_entry_size);
+
+/* For an anchored pattern, or an unanchored pattern that has a first char, or
 a multiline pattern that matches only at "line starts", there is no point in
 seeking a list of starting bytes. */
- 
+
 if ((re->options & PCRE_ANCHORED) == 0 &&
     (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
   {
   int rc;
- 
+
   /* Set the character tables in the block that is passed around */
- 
+
   tables = re->tables;
- 
+
 #if defined COMPILE_PCRE8
   if (tables == NULL)
     (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
@@ -1525,14 +1525,14 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
     (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
     (void *)(&tables));
 #endif
- 
+
   compile_block.lcc = tables + lcc_offset;
   compile_block.fcc = tables + fcc_offset;
   compile_block.cbits = tables + cbits_offset;
   compile_block.ctypes = tables + ctypes_offset;
- 
+
   /* See if we can find a fixed set of initial characters for the pattern. */
- 
+
   memset(start_bits, 0, 32 * sizeof(pcre_uint8));
   rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
     &compile_block);
@@ -1543,16 +1543,16 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
     return NULL;
     }
   }
- 
+
 /* Find the minimum length of subject string. */
- 
+
 switch(min = find_minlength(re, code, code, re->options, NULL, &count))
-  { 
+  {
   case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
   case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
   default: break;
-  } 
- 
+  }
+
 /* If a set of starting bytes has been identified, or if the minimum length is
 greater than zero, or if JIT optimization has been requested, or if
 PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a
@@ -1561,7 +1561,7 @@ by the former, which may also get additional data set later by the calling
 program. At the moment, the size of pcre_study_data is fixed. We nevertheless
 save it in a field for returning via the pcre_fullinfo() function so that if it
 becomes variable in the future, we don't have to change that code. */
- 
+
 if (bits_set || min > 0 || (options & (
 #ifdef SUPPORT_JIT
     PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
@@ -1576,7 +1576,7 @@ if (bits_set || min > 0 || (options & (
     *errorptr = "failed to get memory";
     return NULL;
     }
- 
+
   study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
   extra->flags = PCRE_EXTRA_STUDY_DATA;
   extra->study_data = study;
@@ -1648,9 +1648,9 @@ if (bits_set || min > 0 || (options & (
 #endif
   }
 
-return extra; 
-} 
- 
+return extra;
+}
+
 
 /*************************************************
 *          Free the study data                   *
@@ -1683,4 +1683,4 @@ if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
 PUBL(free)(extra);
 }
 
-/* End of pcre_study.c */ 
+/* End of pcre_study.c */
diff --git a/contrib/libs/pcre/pcre_tables.c b/contrib/libs/pcre/pcre_tables.c
index 00abeff330..179038d025 100644
--- a/contrib/libs/pcre/pcre_tables.c
+++ b/contrib/libs/pcre/pcre_tables.c
@@ -1,103 +1,103 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2017 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
 #ifndef PCRE_INCLUDED
- 
-/* This module contains some fixed tables that are used by more than one of the 
-PCRE code modules. The tables are also #included by the pcretest program, which 
-uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name 
-clashes with the library. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+/* This module contains some fixed tables that are used by more than one of the
+PCRE code modules. The tables are also #included by the pcretest program, which
+uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
+clashes with the library. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
+#endif
+
+#include "pcre_internal.h"
+
 #endif /* PCRE_INCLUDED */
- 
-/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that 
-the definition is next to the definition of the opcodes in pcre_internal.h. */ 
- 
+
+/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
+the definition is next to the definition of the opcodes in pcre_internal.h. */
+
 const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
- 
+
 /* Tables of horizontal and vertical whitespace characters, suitable for
 adding to classes. */
- 
+
 const pcre_uint32 PRIV(hspace_list)[] = { HSPACE_LIST };
 const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST };
- 
 
 
-/************************************************* 
-*           Tables for UTF-8 support             * 
-*************************************************/ 
- 
-/* These are the breakpoints for different numbers of bytes in a UTF-8 
-character. */ 
- 
+
+/*************************************************
+*           Tables for UTF-8 support             *
+*************************************************/
+
+/* These are the breakpoints for different numbers of bytes in a UTF-8
+character. */
+
 #if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
   || (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
- 
+
 /* These tables are also required by pcretest in 16- or 32-bit mode. */
 
 const int PRIV(utf8_table1)[] =
-  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; 
- 
+  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
+
 const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
- 
-/* These are the indicator bits and the mask for the data bits to set in the 
-first byte of a character, indexed by the number of additional bytes. */ 
- 
+
+/* These are the indicator bits and the mask for the data bits to set in the
+first byte of a character, indexed by the number of additional bytes. */
+
 const int PRIV(utf8_table2)[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
 const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
- 
-/* Table of the number of extra bytes, indexed by the first byte masked with 
-0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ 
- 
+
+/* Table of the number of extra bytes, indexed by the first byte masked with
+0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
+
 const pcre_uint8 PRIV(utf8_table4)[] = {
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
-  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 
-  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 
- 
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+
 #endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/
 
 #ifdef SUPPORT_UTF
@@ -190,18 +190,18 @@ const int PRIV(ucp_typerange)[] = {
 };
 #endif /* SUPPORT_JIT */
 
-/* The pcre_utt[] table below translates Unicode property names into type and 
-code values. It is searched by binary chop, so must be in collating sequence of 
-name. Originally, the table contained pointers to the name strings in the first 
-field of each entry. However, that leads to a large number of relocations when 
-a shared library is dynamically loaded. A significant reduction is made by 
-putting all the names into a single, large string and then using offsets in the 
-table itself. Maintenance is more error-prone, but frequent changes to this 
+/* The pcre_utt[] table below translates Unicode property names into type and
+code values. It is searched by binary chop, so must be in collating sequence of
+name. Originally, the table contained pointers to the name strings in the first
+field of each entry. However, that leads to a large number of relocations when
+a shared library is dynamically loaded. A significant reduction is made by
+putting all the names into a single, large string and then using offsets in the
+table itself. Maintenance is more error-prone, but frequent changes to this
 data are unlikely.
- 
+
 July 2008: There is now a script called maint/GenerateUtt.py that can be used
 to generate this data automatically instead of maintaining it by hand.
- 
+
 The script was updated in March 2009 to generate a new EBCDIC-compliant
 version. Like all other character and string literals that are compared against
 the regular expression pattern, we must use STR_ macros instead of literal
@@ -718,10 +718,10 @@ const ucp_type_table PRIV(utt)[] = {
   { 1277, PT_PC, ucp_Zl },
   { 1280, PT_PC, ucp_Zp },
   { 1283, PT_PC, ucp_Zs }
-}; 
- 
+};
+
 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
- 
+
 #endif /* SUPPORT_UTF */
- 
-/* End of pcre_tables.c */ 
+
+/* End of pcre_tables.c */
diff --git a/contrib/libs/pcre/pcre_valid_utf8.c b/contrib/libs/pcre/pcre_valid_utf8.c
index d81291b7f2..3983ed1d68 100644
--- a/contrib/libs/pcre/pcre_valid_utf8.c
+++ b/contrib/libs/pcre/pcre_valid_utf8.c
@@ -1,72 +1,72 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2013 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains an internal function for validating UTF-8 character 
-strings. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains an internal function for validating UTF-8 character
+strings. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-*         Validate a UTF-8 string                * 
-*************************************************/ 
- 
-/* This function is called (optionally) at the start of compile or match, to 
+#endif
+
+#include "pcre_internal.h"
+
+
+/*************************************************
+*         Validate a UTF-8 string                *
+*************************************************/
+
+/* This function is called (optionally) at the start of compile or match, to
 check that a supposed UTF-8 string is actually valid. The early check means
-that subsequent code can assume it is dealing with a valid string. The check 
+that subsequent code can assume it is dealing with a valid string. The check
 can be turned off for maximum performance, but the consequences of supplying an
 invalid string are then undefined.
- 
-Originally, this function checked according to RFC 2279, allowing for values in 
-the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in 
-the canonical format. Once somebody had pointed out RFC 3629 to me (it 
-obsoletes 2279), additional restrictions were applied. The values are now 
-limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the 
+
+Originally, this function checked according to RFC 2279, allowing for values in
+the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
+the canonical format. Once somebody had pointed out RFC 3629 to me (it
+obsoletes 2279), additional restrictions were applied. The values are now
+limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
 subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
 characters is still checked.
- 
+
 From release 8.13 more information about the details of the error are passed
 back in the returned value:
 
@@ -94,31 +94,31 @@ PCRE_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
 PCRE_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff
 PCRE_UTF8_ERR22  Unused (was non-character)
 
-Arguments: 
-  string       points to the string 
-  length       length of string, or -1 if the string is zero-terminated 
+Arguments:
+  string       points to the string
+  length       length of string, or -1 if the string is zero-terminated
   errp         pointer to an error position offset variable
- 
+
 Returns:       = 0    if the string is a valid UTF-8 string
                > 0    otherwise, setting the offset of the bad character
-*/ 
- 
-int 
+*/
+
+int
 PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
-{ 
+{
 #ifdef SUPPORT_UTF
 register PCRE_PUCHAR p;
- 
-if (length < 0) 
-  { 
-  for (p = string; *p != 0; p++); 
+
+if (length < 0)
+  {
+  for (p = string; *p != 0; p++);
   length = (int)(p - string);
-  } 
- 
-for (p = string; length-- > 0; p++) 
-  { 
+  }
+
+for (p = string; length-- > 0; p++)
+  {
   register pcre_uchar ab, c, d;
- 
+
   c = *p;
   if (c < 128) continue;                /* ASCII character */
 
@@ -142,35 +142,35 @@ for (p = string; length-- > 0; p++)
     }
   length -= ab;                         /* Length remaining */
 
-  /* Check top bits in the second byte */ 
- 
+  /* Check top bits in the second byte */
+
   if (((d = *(++p)) & 0xc0) != 0x80)
     {
     *erroroffset = (int)(p - string) - 1;
     return PCRE_UTF8_ERR6;
     }
- 
+
   /* For each length, check that the remaining bytes start with the 0x80 bit
   set and not the 0x40 bit. Then check for an overlong sequence, and for the
   excluded range 0xd800 to 0xdfff. */
 
-  switch (ab) 
-    { 
+  switch (ab)
+    {
     /* 2-byte character. No further bytes to check for 0x80. Check first byte
     for for xx00 000x (overlong sequence). */
- 
+
     case 1: if ((c & 0x3e) == 0)
       {
       *erroroffset = (int)(p - string) - 1;
       return PCRE_UTF8_ERR15;
       }
     break;
- 
+
     /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
       for 1110 0000, xx0x xxxx (overlong sequence) or
           1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
- 
-    case 2: 
+
+    case 2:
     if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
       {
       *erroroffset = (int)(p - string) - 2;
@@ -186,13 +186,13 @@ for (p = string; length-- > 0; p++)
       *erroroffset = (int)(p - string) - 2;
       return PCRE_UTF8_ERR14;
       }
-    break; 
- 
+    break;
+
     /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
        bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
        character greater than 0x0010ffff (f4 8f bf bf) */
- 
-    case 3: 
+
+    case 3:
     if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
       {
       *erroroffset = (int)(p - string) - 2;
@@ -213,17 +213,17 @@ for (p = string; length-- > 0; p++)
       *erroroffset = (int)(p - string) - 3;
       return PCRE_UTF8_ERR13;
       }
-    break; 
- 
+    break;
+
     /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
     rejected by the length test below. However, we do the appropriate tests
     here so that overlong sequences get diagnosed, and also in case there is
     ever an option for handling these larger code points. */
- 
+
     /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
     1111 1000, xx00 0xxx */
 
-    case 4: 
+    case 4:
     if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
       {
       *erroroffset = (int)(p - string) - 2;
@@ -244,12 +244,12 @@ for (p = string; length-- > 0; p++)
       *erroroffset = (int)(p - string) - 4;
       return PCRE_UTF8_ERR18;
       }
-    break; 
- 
+    break;
+
     /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
     1111 1100, xx00 00xx. */
 
-    case 5: 
+    case 5:
     if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
       {
       *erroroffset = (int)(p - string) - 2;
@@ -275,27 +275,27 @@ for (p = string; length-- > 0; p++)
       *erroroffset = (int)(p - string) - 5;
       return PCRE_UTF8_ERR19;
       }
-    break; 
-    } 
- 
+    break;
+    }
+
   /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
   excluded by RFC 3629. The pointer p is currently at the last byte of the
   character. */
 
   if (ab > 3)
-    { 
+    {
     *erroroffset = (int)(p - string) - ab;
     return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
-    } 
-  } 
+    }
+  }
 
 #else  /* Not SUPPORT_UTF */
 (void)(string);  /* Keep picky compilers happy */
 (void)(length);
 (void)(erroroffset);
-#endif 
- 
+#endif
+
 return PCRE_UTF8_ERR0;   /* This indicates success */
-} 
- 
-/* End of pcre_valid_utf8.c */ 
+}
+
+/* End of pcre_valid_utf8.c */
diff --git a/contrib/libs/pcre/pcre_version.c b/contrib/libs/pcre/pcre_version.c
index ea896e1d80..2ff2b79b8c 100644
--- a/contrib/libs/pcre/pcre_version.c
+++ b/contrib/libs/pcre/pcre_version.c
@@ -1,87 +1,87 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains the external function pcre_version(), which returns a 
-string that identifies the PCRE version that is in use. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre_version(), which returns a
+string that identifies the PCRE version that is in use. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-*          Return version string                 * 
-*************************************************/ 
- 
-/* These macros are the standard way of turning unquoted text into C strings. 
-They allow macros like PCRE_MAJOR to be defined without quotes, which is 
-convenient for user programs that want to test its value. */ 
- 
-#define STRING(a)  # a 
-#define XSTRING(s) STRING(s) 
- 
-/* A problem turned up with PCRE_PRERELEASE, which is defined empty for 
-production releases. Originally, it was used naively in this code: 
- 
-  return XSTRING(PCRE_MAJOR) 
-         "." XSTRING(PCRE_MINOR) 
-             XSTRING(PCRE_PRERELEASE) 
-         " " XSTRING(PCRE_DATE); 
- 
-However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of 
-STRING(). The C standard states: "If (before argument substitution) any 
-argument consists of no preprocessing tokens, the behavior is undefined." It 
-turns out the gcc treats this case as a single empty string - which is what we 
-really want - but Visual C grumbles about the lack of an argument for the 
-macro. Unfortunately, both are within their rights. To cope with both ways of 
-handling this, I had resort to some messy hackery that does a test at run time. 
-I could find no way of detecting that a macro is defined as an empty string at 
-pre-processor time. This hack uses a standard trick for avoiding calling 
-the STRING macro with an empty argument when doing the test. */ 
- 
+#endif
+
+#include "pcre_internal.h"
+
+
+/*************************************************
+*          Return version string                 *
+*************************************************/
+
+/* These macros are the standard way of turning unquoted text into C strings.
+They allow macros like PCRE_MAJOR to be defined without quotes, which is
+convenient for user programs that want to test its value. */
+
+#define STRING(a)  # a
+#define XSTRING(s) STRING(s)
+
+/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
+production releases. Originally, it was used naively in this code:
+
+  return XSTRING(PCRE_MAJOR)
+         "." XSTRING(PCRE_MINOR)
+             XSTRING(PCRE_PRERELEASE)
+         " " XSTRING(PCRE_DATE);
+
+However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
+STRING(). The C standard states: "If (before argument substitution) any
+argument consists of no preprocessing tokens, the behavior is undefined." It
+turns out the gcc treats this case as a single empty string - which is what we
+really want - but Visual C grumbles about the lack of an argument for the
+macro. Unfortunately, both are within their rights. To cope with both ways of
+handling this, I had resort to some messy hackery that does a test at run time.
+I could find no way of detecting that a macro is defined as an empty string at
+pre-processor time. This hack uses a standard trick for avoiding calling
+the STRING macro with an empty argument when doing the test. */
+
 #if defined COMPILE_PCRE8
 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
-pcre_version(void) 
+pcre_version(void)
 #elif defined COMPILE_PCRE16
 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
 pcre16_version(void)
@@ -89,10 +89,10 @@ pcre16_version(void)
 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
 pcre32_version(void)
 #endif
-{ 
-return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)? 
-  XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : 
-  XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE); 
-} 
- 
-/* End of pcre_version.c */ 
+{
+return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
+  XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
+  XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
+}
+
+/* End of pcre_version.c */
diff --git a/contrib/libs/pcre/pcre_xclass.c b/contrib/libs/pcre/pcre_xclass.c
index f42b4a1a9c..942696ed7c 100644
--- a/contrib/libs/pcre/pcre_xclass.c
+++ b/contrib/libs/pcre/pcre_xclass.c
@@ -1,86 +1,86 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2013 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module contains an internal function that is used to match an extended 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains an internal function that is used to match an extended
 class. It is used by both pcre_exec() and pcre_def_exec(). */
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
-#include "pcre_internal.h" 
- 
- 
-/************************************************* 
-*       Match character against an XCLASS        * 
-*************************************************/ 
- 
-/* This function is called to match a character against an extended class that 
+#endif
+
+#include "pcre_internal.h"
+
+
+/*************************************************
+*       Match character against an XCLASS        *
+*************************************************/
+
+/* This function is called to match a character against an extended class that
 might contain values > 255 and/or Unicode properties.
- 
-Arguments: 
-  c           the character 
-  data        points to the flag byte of the XCLASS data 
- 
-Returns:      TRUE if character matches, else FALSE 
-*/ 
- 
-BOOL 
+
+Arguments:
+  c           the character
+  data        points to the flag byte of the XCLASS data
+
+Returns:      TRUE if character matches, else FALSE
+*/
+
+BOOL
 PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
-{ 
+{
 pcre_uchar t;
-BOOL negated = (*data & XCL_NOT) != 0; 
- 
+BOOL negated = (*data & XCL_NOT) != 0;
+
 (void)utf;
 #ifdef COMPILE_PCRE8
 /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
 utf = TRUE;
 #endif
 
-/* Character values < 256 are matched against a bitmap, if one is present. If 
-not, we still carry on, because there may be ranges that start below 256 in the 
-additional data. */ 
- 
-if (c < 256) 
-  { 
+/* Character values < 256 are matched against a bitmap, if one is present. If
+not, we still carry on, because there may be ranges that start below 256 in the
+additional data. */
+
+if (c < 256)
+  {
   if ((*data & XCL_HASPROP) == 0)
     {
     if ((*data & XCL_MAP) == 0) return negated;
@@ -89,19 +89,19 @@ if (c < 256)
   if ((*data & XCL_MAP) != 0 &&
     (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
     return !negated; /* char found */
-  } 
- 
-/* First skip the bit map if present. Then match against the list of Unicode 
-properties or large chars or ranges that end with a large char. We won't ever 
-encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ 
- 
+  }
+
+/* First skip the bit map if present. Then match against the list of Unicode
+properties or large chars or ranges that end with a large char. We won't ever
+encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
+
 if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
- 
-while ((t = *data++) != XCL_END) 
-  { 
+
+while ((t = *data++) != XCL_END)
+  {
   pcre_uint32 x, y;
-  if (t == XCL_SINGLE) 
-    { 
+  if (t == XCL_SINGLE)
+    {
 #ifdef SUPPORT_UTF
     if (utf)
       {
@@ -110,10 +110,10 @@ while ((t = *data++) != XCL_END)
     else
 #endif
       x = *data++;
-    if (c == x) return !negated; 
-    } 
-  else if (t == XCL_RANGE) 
-    { 
+    if (c == x) return !negated;
+    }
+  else if (t == XCL_RANGE)
+    {
 #ifdef SUPPORT_UTF
     if (utf)
       {
@@ -126,39 +126,39 @@ while ((t = *data++) != XCL_END)
       x = *data++;
       y = *data++;
       }
-    if (c >= x && c <= y) return !negated; 
-    } 
- 
-#ifdef SUPPORT_UCP 
-  else  /* XCL_PROP & XCL_NOTPROP */ 
-    { 
+    if (c >= x && c <= y) return !negated;
+    }
+
+#ifdef SUPPORT_UCP
+  else  /* XCL_PROP & XCL_NOTPROP */
+    {
     const ucd_record *prop = GET_UCD(c);
     BOOL isprop = t == XCL_PROP;
- 
-    switch(*data) 
-      { 
-      case PT_ANY: 
+
+    switch(*data)
+      {
+      case PT_ANY:
       if (isprop) return !negated;
-      break; 
- 
-      case PT_LAMP: 
+      break;
+
+      case PT_LAMP:
       if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
            prop->chartype == ucp_Lt) == isprop) return !negated;
-      break; 
- 
-      case PT_GC: 
+      break;
+
+      case PT_GC:
       if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
         return !negated;
-      break; 
- 
-      case PT_PC: 
+      break;
+
+      case PT_PC:
       if ((data[1] == prop->chartype) == isprop) return !negated;
-      break; 
- 
-      case PT_SC: 
+      break;
+
+      case PT_SC:
       if ((data[1] == prop->script) == isprop) return !negated;
-      break; 
- 
+      break;
+
       case PT_ALNUM:
       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
@@ -250,19 +250,19 @@ while ((t = *data++) != XCL_END)
         return !negated;
       break;
 
-      /* This should never occur, but compilers may mutter if there is no 
-      default. */ 
- 
-      default: 
-      return FALSE; 
-      } 
- 
-    data += 2; 
-    } 
-#endif  /* SUPPORT_UCP */ 
-  } 
- 
-return negated;   /* char did not match */ 
-} 
- 
-/* End of pcre_xclass.c */ 
+      /* This should never occur, but compilers may mutter if there is no
+      default. */
+
+      default:
+      return FALSE;
+      }
+
+    data += 2;
+    }
+#endif  /* SUPPORT_UCP */
+  }
+
+return negated;   /* char did not match */
+}
+
+/* End of pcre_xclass.c */
diff --git a/contrib/libs/pcre/pcrecpp/ya.make b/contrib/libs/pcre/pcrecpp/ya.make
index 8eb2dacc7f..c832b9e56e 100644
--- a/contrib/libs/pcre/pcrecpp/ya.make
+++ b/contrib/libs/pcre/pcrecpp/ya.make
@@ -9,8 +9,8 @@ OWNER(
     g:cpp-contrib
 )
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 PEERDIR(
     contrib/libs/pcre
 )
diff --git a/contrib/libs/pcre/pcreposix.c b/contrib/libs/pcre/pcreposix.c
index 94a82336eb..55972c1c23 100644
--- a/contrib/libs/pcre/pcreposix.c
+++ b/contrib/libs/pcre/pcreposix.c
@@ -1,57 +1,57 @@
-/************************************************* 
-*      Perl-Compatible Regular Expressions       * 
-*************************************************/ 
- 
-/* PCRE is a library of functions to support regular expressions whose syntax 
-and semantics are as close as possible to those of the Perl 5 language. 
- 
-                       Written by Philip Hazel 
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
            Copyright (c) 1997-2020 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
- 
-/* This module is a wrapper that provides a POSIX API to the underlying PCRE 
-functions. */ 
- 
- 
-#ifdef HAVE_CONFIG_H 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module is a wrapper that provides a POSIX API to the underlying PCRE
+functions. */
+
+
+#ifdef HAVE_CONFIG_H
 #include "pcre_config.h"
-#endif 
- 
- 
-/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for 
-compiling these functions. This must come before including pcreposix.h, where 
-they are set for an application (using these functions) if they have not 
-previously been set. */ 
- 
+#endif
+
+
+/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
+compiling these functions. This must come before including pcreposix.h, where
+they are set for an application (using these functions) if they have not
+previously been set. */
+
 #if defined(_WIN32) && !defined(PCRE_STATIC)
 #  define PCREPOSIX_EXP_DECL extern __declspec(dllexport)
 #  define PCREPOSIX_EXP_DEFN __declspec(dllexport)
@@ -62,89 +62,89 @@ are declared as "import" for Windows by defining PCRE_EXP_DECL as "import".
 This is needed even though pcre_internal.h itself includes pcre.h, because it
 does so after it has set PCRE_EXP_DECL to "export" if it is not already set. */
 
-#include "pcre.h" 
-#include "pcre_internal.h" 
-#include "pcreposix.h" 
- 
- 
-/* Table to translate PCRE compile time error codes into POSIX error codes. */ 
- 
-static const int eint[] = { 
-  0,           /* no error */ 
-  REG_EESCAPE, /* \ at end of pattern */ 
-  REG_EESCAPE, /* \c at end of pattern */ 
-  REG_EESCAPE, /* unrecognized character follows \ */ 
-  REG_BADBR,   /* numbers out of order in {} quantifier */ 
+#include "pcre.h"
+#include "pcre_internal.h"
+#include "pcreposix.h"
+
+
+/* Table to translate PCRE compile time error codes into POSIX error codes. */
+
+static const int eint[] = {
+  0,           /* no error */
+  REG_EESCAPE, /* \ at end of pattern */
+  REG_EESCAPE, /* \c at end of pattern */
+  REG_EESCAPE, /* unrecognized character follows \ */
+  REG_BADBR,   /* numbers out of order in {} quantifier */
   /* 5 */
-  REG_BADBR,   /* number too big in {} quantifier */ 
-  REG_EBRACK,  /* missing terminating ] for character class */ 
-  REG_ECTYPE,  /* invalid escape sequence in character class */ 
-  REG_ERANGE,  /* range out of order in character class */ 
-  REG_BADRPT,  /* nothing to repeat */ 
+  REG_BADBR,   /* number too big in {} quantifier */
+  REG_EBRACK,  /* missing terminating ] for character class */
+  REG_ECTYPE,  /* invalid escape sequence in character class */
+  REG_ERANGE,  /* range out of order in character class */
+  REG_BADRPT,  /* nothing to repeat */
   /* 10 */
-  REG_BADRPT,  /* operand of unlimited repeat could match the empty string */ 
-  REG_ASSERT,  /* internal error: unexpected repeat */ 
-  REG_BADPAT,  /* unrecognized character after (? */ 
-  REG_BADPAT,  /* POSIX named classes are supported only within a class */ 
-  REG_EPAREN,  /* missing ) */ 
+  REG_BADRPT,  /* operand of unlimited repeat could match the empty string */
+  REG_ASSERT,  /* internal error: unexpected repeat */
+  REG_BADPAT,  /* unrecognized character after (? */
+  REG_BADPAT,  /* POSIX named classes are supported only within a class */
+  REG_EPAREN,  /* missing ) */
   /* 15 */
-  REG_ESUBREG, /* reference to non-existent subpattern */ 
-  REG_INVARG,  /* erroffset passed as NULL */ 
-  REG_INVARG,  /* unknown option bit(s) set */ 
-  REG_EPAREN,  /* missing ) after comment */ 
-  REG_ESIZE,   /* parentheses nested too deeply */ 
+  REG_ESUBREG, /* reference to non-existent subpattern */
+  REG_INVARG,  /* erroffset passed as NULL */
+  REG_INVARG,  /* unknown option bit(s) set */
+  REG_EPAREN,  /* missing ) after comment */
+  REG_ESIZE,   /* parentheses nested too deeply */
   /* 20 */
-  REG_ESIZE,   /* regular expression too large */ 
-  REG_ESPACE,  /* failed to get memory */ 
+  REG_ESIZE,   /* regular expression too large */
+  REG_ESPACE,  /* failed to get memory */
   REG_EPAREN,  /* unmatched parentheses */
-  REG_ASSERT,  /* internal error: code overflow */ 
-  REG_BADPAT,  /* unrecognized character after (?< */ 
+  REG_ASSERT,  /* internal error: code overflow */
+  REG_BADPAT,  /* unrecognized character after (?< */
   /* 25 */
-  REG_BADPAT,  /* lookbehind assertion is not fixed length */ 
-  REG_BADPAT,  /* malformed number or name after (?( */ 
-  REG_BADPAT,  /* conditional group contains more than two branches */ 
-  REG_BADPAT,  /* assertion expected after (?( */ 
-  REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */ 
+  REG_BADPAT,  /* lookbehind assertion is not fixed length */
+  REG_BADPAT,  /* malformed number or name after (?( */
+  REG_BADPAT,  /* conditional group contains more than two branches */
+  REG_BADPAT,  /* assertion expected after (?( */
+  REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */
   /* 30 */
-  REG_ECTYPE,  /* unknown POSIX class name */ 
-  REG_BADPAT,  /* POSIX collating elements are not supported */ 
-  REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */ 
-  REG_BADPAT,  /* spare error */ 
+  REG_ECTYPE,  /* unknown POSIX class name */
+  REG_BADPAT,  /* POSIX collating elements are not supported */
+  REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */
+  REG_BADPAT,  /* spare error */
   REG_BADPAT,  /* character value in \x{} or \o{} is too large */
   /* 35 */
-  REG_BADPAT,  /* invalid condition (?(0) */ 
-  REG_BADPAT,  /* \C not allowed in lookbehind assertion */ 
-  REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ 
-  REG_BADPAT,  /* number after (?C is > 255 */ 
-  REG_BADPAT,  /* closing ) for (?C expected */ 
+  REG_BADPAT,  /* invalid condition (?(0) */
+  REG_BADPAT,  /* \C not allowed in lookbehind assertion */
+  REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
+  REG_BADPAT,  /* number after (?C is > 255 */
+  REG_BADPAT,  /* closing ) for (?C expected */
   /* 40 */
-  REG_BADPAT,  /* recursive call could loop indefinitely */ 
-  REG_BADPAT,  /* unrecognized character after (?P */ 
-  REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */ 
-  REG_BADPAT,  /* two named subpatterns have the same name */ 
-  REG_BADPAT,  /* invalid UTF-8 string */ 
+  REG_BADPAT,  /* recursive call could loop indefinitely */
+  REG_BADPAT,  /* unrecognized character after (?P */
+  REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */
+  REG_BADPAT,  /* two named subpatterns have the same name */
+  REG_BADPAT,  /* invalid UTF-8 string */
   /* 45 */
-  REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */ 
-  REG_BADPAT,  /* malformed \P or \p sequence */ 
-  REG_BADPAT,  /* unknown property name after \P or \p */ 
-  REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */ 
-  REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */ 
+  REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */
+  REG_BADPAT,  /* malformed \P or \p sequence */
+  REG_BADPAT,  /* unknown property name after \P or \p */
+  REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */
+  REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */
   /* 50 */
-  REG_BADPAT,  /* repeated subpattern is too long */ 
-  REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */ 
-  REG_BADPAT,  /* internal error: overran compiling workspace */ 
-  REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */ 
-  REG_BADPAT,  /* DEFINE group contains more than one branch */ 
+  REG_BADPAT,  /* repeated subpattern is too long */
+  REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */
+  REG_BADPAT,  /* internal error: overran compiling workspace */
+  REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */
+  REG_BADPAT,  /* DEFINE group contains more than one branch */
   /* 55 */
-  REG_BADPAT,  /* repeating a DEFINE group is not allowed */ 
-  REG_INVARG,  /* inconsistent NEWLINE options */ 
-  REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */ 
+  REG_BADPAT,  /* repeating a DEFINE group is not allowed */
+  REG_INVARG,  /* inconsistent NEWLINE options */
+  REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */
   REG_BADPAT,  /* a numbered reference must not be zero */
   REG_BADPAT,  /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */
   /* 60 */
   REG_BADPAT,  /* (*VERB) not recognized */
-  REG_BADPAT,  /* number is too big */ 
-  REG_BADPAT,  /* subpattern name expected */ 
+  REG_BADPAT,  /* number is too big */
+  REG_BADPAT,  /* subpattern name expected */
   REG_BADPAT,  /* digit expected after (?+ */
   REG_BADPAT,  /* ] is an invalid data character in JavaScript compatibility mode */
   /* 65 */
@@ -175,105 +175,105 @@ static const int eint[] = {
   REG_BADPAT,  /* parentheses too deeply nested (stack check) */
   REG_BADPAT,  /* missing digits in \x{} or \o{} */
   REG_BADPAT   /* pattern too complicated */
-}; 
- 
-/* Table of texts corresponding to POSIX error codes */ 
- 
-static const char *const pstring[] = { 
-  "",                                /* Dummy for value 0 */ 
-  "internal error",                  /* REG_ASSERT */ 
-  "invalid repeat counts in {}",     /* BADBR      */ 
-  "pattern error",                   /* BADPAT     */ 
-  "? * + invalid",                   /* BADRPT     */ 
-  "unbalanced {}",                   /* EBRACE     */ 
-  "unbalanced []",                   /* EBRACK     */ 
-  "collation error - not relevant",  /* ECOLLATE   */ 
-  "bad class",                       /* ECTYPE     */ 
-  "bad escape sequence",             /* EESCAPE    */ 
-  "empty expression",                /* EMPTY      */ 
-  "unbalanced ()",                   /* EPAREN     */ 
-  "bad range inside []",             /* ERANGE     */ 
-  "expression too big",              /* ESIZE      */ 
-  "failed to get memory",            /* ESPACE     */ 
-  "bad back reference",              /* ESUBREG    */ 
-  "bad argument",                    /* INVARG     */ 
-  "match failed"                     /* NOMATCH    */ 
-}; 
- 
- 
- 
- 
-/************************************************* 
-*          Translate error code to string        * 
-*************************************************/ 
- 
+};
+
+/* Table of texts corresponding to POSIX error codes */
+
+static const char *const pstring[] = {
+  "",                                /* Dummy for value 0 */
+  "internal error",                  /* REG_ASSERT */
+  "invalid repeat counts in {}",     /* BADBR      */
+  "pattern error",                   /* BADPAT     */
+  "? * + invalid",                   /* BADRPT     */
+  "unbalanced {}",                   /* EBRACE     */
+  "unbalanced []",                   /* EBRACK     */
+  "collation error - not relevant",  /* ECOLLATE   */
+  "bad class",                       /* ECTYPE     */
+  "bad escape sequence",             /* EESCAPE    */
+  "empty expression",                /* EMPTY      */
+  "unbalanced ()",                   /* EPAREN     */
+  "bad range inside []",             /* ERANGE     */
+  "expression too big",              /* ESIZE      */
+  "failed to get memory",            /* ESPACE     */
+  "bad back reference",              /* ESUBREG    */
+  "bad argument",                    /* INVARG     */
+  "match failed"                     /* NOMATCH    */
+};
+
+
+
+
+/*************************************************
+*          Translate error code to string        *
+*************************************************/
+
 PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION
-regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) 
-{ 
-const char *message, *addmessage; 
-size_t length, addlength; 
- 
-message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? 
-  "unknown error code" : pstring[errcode]; 
-length = strlen(message) + 1; 
- 
-addmessage = " at offset "; 
+regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+const char *message, *addmessage;
+size_t length, addlength;
+
+message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
+  "unknown error code" : pstring[errcode];
+length = strlen(message) + 1;
+
+addmessage = " at offset ";
 addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
-  strlen(addmessage) + 6 : 0; 
- 
-if (errbuf_size > 0) 
-  { 
-  if (addlength > 0 && errbuf_size >= length + addlength) 
-    sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); 
-  else 
-    { 
-    strncpy(errbuf, message, errbuf_size - 1); 
-    errbuf[errbuf_size-1] = 0; 
-    } 
-  } 
- 
-return length + addlength; 
-} 
- 
- 
- 
- 
-/************************************************* 
-*           Free store held by a regex           * 
-*************************************************/ 
- 
+  strlen(addmessage) + 6 : 0;
+
+if (errbuf_size > 0)
+  {
+  if (addlength > 0 && errbuf_size >= length + addlength)
+    sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
+  else
+    {
+    strncpy(errbuf, message, errbuf_size - 1);
+    errbuf[errbuf_size-1] = 0;
+    }
+  }
+
+return length + addlength;
+}
+
+
+
+
+/*************************************************
+*           Free store held by a regex           *
+*************************************************/
+
 PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
-regfree(regex_t *preg) 
-{ 
+regfree(regex_t *preg)
+{
 (PUBL(free))(preg->re_pcre);
-} 
- 
- 
- 
- 
-/************************************************* 
-*            Compile a regular expression        * 
-*************************************************/ 
- 
-/* 
-Arguments: 
-  preg        points to a structure for recording the compiled expression 
-  pattern     the pattern to compile 
-  cflags      compilation flags 
- 
-Returns:      0 on success 
-              various non-zero codes on failure 
-*/ 
- 
+}
+
+
+
+
+/*************************************************
+*            Compile a regular expression        *
+*************************************************/
+
+/*
+Arguments:
+  preg        points to a structure for recording the compiled expression
+  pattern     the pattern to compile
+  cflags      compilation flags
+
+Returns:      0 on success
+              various non-zero codes on failure
+*/
+
 PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
-regcomp(regex_t *preg, const char *pattern, int cflags) 
-{ 
-const char *errorptr; 
-int erroffset; 
-int errorcode; 
-int options = 0; 
+regcomp(regex_t *preg, const char *pattern, int cflags)
+{
+const char *errorptr;
+int erroffset;
+int errorcode;
+int options = 0;
 int re_nsub = 0;
- 
+
 if ((cflags & REG_ICASE) != 0)    options |= PCRE_CASELESS;
 if ((cflags & REG_NEWLINE) != 0)  options |= PCRE_MULTILINE;
 if ((cflags & REG_DOTALL) != 0)   options |= PCRE_DOTALL;
@@ -281,14 +281,14 @@ if ((cflags & REG_NOSUB) != 0)    options |= PCRE_NO_AUTO_CAPTURE;
 if ((cflags & REG_UTF8) != 0)     options |= PCRE_UTF8;
 if ((cflags & REG_UCP) != 0)      options |= PCRE_UCP;
 if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;
- 
-preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, 
-  &erroffset, NULL); 
-preg->re_erroffset = erroffset; 
- 
+
+preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
+  &erroffset, NULL);
+preg->re_erroffset = erroffset;
+
 /* Safety: if the error code is too big for the translation vector (which
 should not happen, but we all make mistakes), return REG_BADPAT. */
- 
+
 if (preg->re_pcre == NULL)
   {
   return (errorcode < (int)(sizeof(eint)/sizeof(const int)))?
@@ -299,69 +299,69 @@ if (preg->re_pcre == NULL)
   &re_nsub);
 preg->re_nsub = (size_t)re_nsub;
 preg->re_erroffset = (size_t)(-1);  /* No meaning after successful compile */
-return 0; 
-} 
- 
- 
- 
- 
-/************************************************* 
-*              Match a regular expression        * 
-*************************************************/ 
- 
-/* Unfortunately, PCRE requires 3 ints of working space for each captured 
-substring, so we have to get and release working store instead of just using 
-the POSIX structures as was done in earlier releases when PCRE needed only 2 
-ints. However, if the number of possible capturing brackets is small, use a 
-block of store on the stack, to reduce the use of malloc/free. The threshold is 
-in a macro that can be changed at configure time. 
- 
-If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will 
-be set. When this is the case, the nmatch and pmatch arguments are ignored, and 
-the only result is yes/no/error. */ 
- 
+return 0;
+}
+
+
+
+
+/*************************************************
+*              Match a regular expression        *
+*************************************************/
+
+/* Unfortunately, PCRE requires 3 ints of working space for each captured
+substring, so we have to get and release working store instead of just using
+the POSIX structures as was done in earlier releases when PCRE needed only 2
+ints. However, if the number of possible capturing brackets is small, use a
+block of store on the stack, to reduce the use of malloc/free. The threshold is
+in a macro that can be changed at configure time.
+
+If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
+be set. When this is the case, the nmatch and pmatch arguments are ignored, and
+the only result is yes/no/error. */
+
 PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
-regexec(const regex_t *preg, const char *string, size_t nmatch, 
-  regmatch_t pmatch[], int eflags) 
-{ 
+regexec(const regex_t *preg, const char *string, size_t nmatch,
+  regmatch_t pmatch[], int eflags)
+{
 int rc, so, eo;
-int options = 0; 
-int *ovector = NULL; 
-int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; 
-BOOL allocated_ovector = FALSE; 
-BOOL nosub = 
+int options = 0;
+int *ovector = NULL;
+int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
+BOOL allocated_ovector = FALSE;
+BOOL nosub =
   (REAL_PCRE_OPTIONS((const pcre *)preg->re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0;
- 
-if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; 
-if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; 
+
+if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
+if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
 if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY;
- 
+
 /* When no string data is being returned, or no vector has been passed in which
 to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
 the return data is large enough. */
- 
+
 if (nosub || pmatch == NULL) nmatch = 0;
- 
-else if (nmatch > 0) 
-  { 
-  if (nmatch <= POSIX_MALLOC_THRESHOLD) 
-    { 
-    ovector = &(small_ovector[0]); 
-    } 
-  else 
-    { 
-    if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE; 
-    ovector = (int *)malloc(sizeof(int) * nmatch * 3); 
-    if (ovector == NULL) return REG_ESPACE; 
-    allocated_ovector = TRUE; 
-    } 
-  } 
- 
+
+else if (nmatch > 0)
+  {
+  if (nmatch <= POSIX_MALLOC_THRESHOLD)
+    {
+    ovector = &(small_ovector[0]);
+    }
+  else
+    {
+    if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
+    ovector = (int *)malloc(sizeof(int) * nmatch * 3);
+    if (ovector == NULL) return REG_ESPACE;
+    allocated_ovector = TRUE;
+    }
+  }
+
 /* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
 The man page from OS X says "REG_STARTEND affects only the location of the
 string, not how it is matched". That is why the "so" value is used to bump the
 start location rather than being passed as a PCRE "starting offset". */
- 
+
 if ((eflags & REG_STARTEND) != 0)
   {
   if (pmatch == NULL) return REG_INVARG;
@@ -373,7 +373,7 @@ else
   so = 0;
   eo = (int)strlen(string);
   }
- 
+
 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
   0, options, ovector, (int)(nmatch * 3));
 
@@ -381,27 +381,27 @@ if (rc == 0) rc = (int)nmatch;    /* All captured slots were filled in */
 
 /* Successful match */
 
-if (rc >= 0) 
-  { 
-  size_t i; 
-  if (!nosub) 
-    { 
-    for (i = 0; i < (size_t)rc; i++) 
-      { 
+if (rc >= 0)
+  {
+  size_t i;
+  if (!nosub)
+    {
+    for (i = 0; i < (size_t)rc; i++)
+      {
       pmatch[i].rm_so = (ovector[i*2] < 0)? -1 : ovector[i*2] + so;
       pmatch[i].rm_eo = (ovector[i*2+1] < 0)? -1: ovector[i*2+1] + so;
-      } 
-    if (allocated_ovector) free(ovector); 
-    for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; 
-    } 
-  return 0; 
-  } 
- 
+      }
+    if (allocated_ovector) free(ovector);
+    for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+    }
+  return 0;
+  }
+
 /* Unsuccessful match */
 
 if (allocated_ovector) free(ovector);
 switch(rc)
-  { 
+  {
 /* ========================================================================== */
   /* These cases are never obeyed. This is a fudge that causes a compile-time
   error if the vector eint, which is indexed by compile-time error number, is
@@ -425,7 +425,7 @@ switch(rc)
   case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
   case PCRE_ERROR_BADMODE: return REG_INVARG;
   default: return REG_ASSERT;
-  } 
-} 
- 
-/* End of pcreposix.c */ 
+  }
+}
+
+/* End of pcreposix.c */
diff --git a/contrib/libs/pcre/pcreposix.h b/contrib/libs/pcre/pcreposix.h
index 4667ea388d..62cf33ae17 100644
--- a/contrib/libs/pcre/pcreposix.h
+++ b/contrib/libs/pcre/pcreposix.h
@@ -1,57 +1,57 @@
-/************************************************* 
-*       Perl-Compatible Regular Expressions      * 
-*************************************************/ 
- 
-#ifndef _PCREPOSIX_H 
-#define _PCREPOSIX_H 
- 
-/* This is the header for the POSIX wrapper interface to the PCRE Perl- 
-Compatible Regular Expression library. It defines the things POSIX says should 
-be there. I hope. 
- 
+/*************************************************
+*       Perl-Compatible Regular Expressions      *
+*************************************************/
+
+#ifndef _PCREPOSIX_H
+#define _PCREPOSIX_H
+
+/* This is the header for the POSIX wrapper interface to the PCRE Perl-
+Compatible Regular Expression library. It defines the things POSIX says should
+be there. I hope.
+
             Copyright (c) 1997-2012 University of Cambridge
- 
------------------------------------------------------------------------------ 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met: 
- 
-    * Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer. 
- 
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution. 
- 
-    * Neither the name of the University of Cambridge nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
-POSSIBILITY OF SUCH DAMAGE. 
------------------------------------------------------------------------------ 
-*/ 
- 
-/* Have to include stdlib.h in order to ensure that size_t is defined. */ 
- 
-#include <stdlib.h> 
- 
-/* Allow for C++ users */ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Have to include stdlib.h in order to ensure that size_t is defined. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* Options, mostly defined by POSIX, but with some extras. */
- 
+
 #define REG_ICASE     0x0001   /* Maps to PCRE_CASELESS */
 #define REG_NEWLINE   0x0002   /* Maps to PCRE_MULTILINE */
 #define REG_NOTBOL    0x0004   /* Maps to PCRE_NOTBOL */
@@ -63,64 +63,64 @@ extern "C" {
 #define REG_NOTEMPTY  0x0100   /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
 #define REG_UNGREEDY  0x0200   /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
 #define REG_UCP       0x0400   /* NOT defined by POSIX; maps to PCRE_UCP */
- 
-/* This is not used by PCRE, but by defining it we make it easier 
-to slot PCRE into existing programs that make POSIX calls. */ 
- 
-#define REG_EXTENDED  0 
- 
-/* Error values. Not all these are relevant or used by the wrapper. */ 
- 
-enum { 
-  REG_ASSERT = 1,  /* internal error ? */ 
-  REG_BADBR,       /* invalid repeat counts in {} */ 
-  REG_BADPAT,      /* pattern error */ 
-  REG_BADRPT,      /* ? * + invalid */ 
-  REG_EBRACE,      /* unbalanced {} */ 
-  REG_EBRACK,      /* unbalanced [] */ 
-  REG_ECOLLATE,    /* collation error - not relevant */ 
-  REG_ECTYPE,      /* bad class */ 
-  REG_EESCAPE,     /* bad escape sequence */ 
-  REG_EMPTY,       /* empty expression */ 
-  REG_EPAREN,      /* unbalanced () */ 
-  REG_ERANGE,      /* bad range inside [] */ 
-  REG_ESIZE,       /* expression too big */ 
-  REG_ESPACE,      /* failed to get memory */ 
-  REG_ESUBREG,     /* bad back reference */ 
-  REG_INVARG,      /* bad argument */ 
-  REG_NOMATCH      /* match failed */ 
-}; 
- 
- 
-/* The structure representing a compiled regular expression. */ 
- 
-typedef struct { 
-  void *re_pcre; 
-  size_t re_nsub; 
-  size_t re_erroffset; 
-} regex_t; 
- 
-/* The structure in which a captured offset is returned. */ 
- 
-typedef int regoff_t; 
- 
-typedef struct { 
-  regoff_t rm_so; 
-  regoff_t rm_eo; 
-} regmatch_t; 
- 
-/* When an application links to a PCRE DLL in Windows, the symbols that are 
-imported have to be identified as such. When building PCRE, the appropriate 
-export settings are needed, and are set in pcreposix.c before including this 
-file. */ 
- 
+
+/* This is not used by PCRE, but by defining it we make it easier
+to slot PCRE into existing programs that make POSIX calls. */
+
+#define REG_EXTENDED  0
+
+/* Error values. Not all these are relevant or used by the wrapper. */
+
+enum {
+  REG_ASSERT = 1,  /* internal error ? */
+  REG_BADBR,       /* invalid repeat counts in {} */
+  REG_BADPAT,      /* pattern error */
+  REG_BADRPT,      /* ? * + invalid */
+  REG_EBRACE,      /* unbalanced {} */
+  REG_EBRACK,      /* unbalanced [] */
+  REG_ECOLLATE,    /* collation error - not relevant */
+  REG_ECTYPE,      /* bad class */
+  REG_EESCAPE,     /* bad escape sequence */
+  REG_EMPTY,       /* empty expression */
+  REG_EPAREN,      /* unbalanced () */
+  REG_ERANGE,      /* bad range inside [] */
+  REG_ESIZE,       /* expression too big */
+  REG_ESPACE,      /* failed to get memory */
+  REG_ESUBREG,     /* bad back reference */
+  REG_INVARG,      /* bad argument */
+  REG_NOMATCH      /* match failed */
+};
+
+
+/* The structure representing a compiled regular expression. */
+
+typedef struct {
+  void *re_pcre;
+  size_t re_nsub;
+  size_t re_erroffset;
+} regex_t;
+
+/* The structure in which a captured offset is returned. */
+
+typedef int regoff_t;
+
+typedef struct {
+  regoff_t rm_so;
+  regoff_t rm_eo;
+} regmatch_t;
+
+/* When an application links to a PCRE DLL in Windows, the symbols that are
+imported have to be identified as such. When building PCRE, the appropriate
+export settings are needed, and are set in pcreposix.c before including this
+file. */
+
 #if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
 #  define PCREPOSIX_EXP_DECL  extern __declspec(dllimport)
 #  define PCREPOSIX_EXP_DEFN  __declspec(dllimport)
 #endif
 
-/* By default, we use the standard "extern" declarations. */ 
- 
+/* By default, we use the standard "extern" declarations. */
+
 #ifndef PCREPOSIX_EXP_DECL
 #  ifdef __cplusplus
 #    define PCREPOSIX_EXP_DECL  extern "C"
@@ -131,8 +131,8 @@ file. */
 #  endif
 #endif
 
-/* The functions */ 
- 
+/* The functions */
+
 #define regcomp pcre_regcomp
 #define regexec pcre_regexec
 #define regerror pcre_regerror
@@ -140,12 +140,12 @@ file. */
 
 PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
 PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
-                     regmatch_t *, int); 
+                     regmatch_t *, int);
 PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
 PCREPOSIX_EXP_DECL void regfree(regex_t *);
- 
-#ifdef __cplusplus 
-}   /* extern "C" */ 
-#endif 
- 
-#endif /* End of pcreposix.h */ 
+
+#ifdef __cplusplus
+}   /* extern "C" */
+#endif
+
+#endif /* End of pcreposix.h */
diff --git a/contrib/libs/pcre/ucp.h b/contrib/libs/pcre/ucp.h
index a052b6d069..2fa00296e4 100644
--- a/contrib/libs/pcre/ucp.h
+++ b/contrib/libs/pcre/ucp.h
@@ -1,14 +1,14 @@
-/************************************************* 
-*          Unicode Property Table handler        * 
-*************************************************/ 
- 
-#ifndef _UCP_H 
-#define _UCP_H 
- 
-/* This file contains definitions of the property values that are returned by 
+/*************************************************
+*          Unicode Property Table handler        *
+*************************************************/
+
+#ifndef _UCP_H
+#define _UCP_H
+
+/* This file contains definitions of the property values that are returned by
 the UCD access macros. New values that are added for new releases of Unicode
 should always be at the end of each enum, for backwards compatibility.
- 
+
 IMPORTANT: Note also that the specific numeric values of the enums have to be
 the same as the values that are generated by the maint/MultiStage2.py script,
 where the equivalent property descriptive names are listed in vectors.
@@ -16,53 +16,53 @@ where the equivalent property descriptive names are listed in vectors.
 ALSO: The specific values of the first two enums are assumed for the table
 called catposstab in pcre_compile.c. */
 
-/* These are the general character categories. */ 
- 
-enum { 
-  ucp_C,     /* Other */ 
-  ucp_L,     /* Letter */ 
-  ucp_M,     /* Mark */ 
-  ucp_N,     /* Number */ 
-  ucp_P,     /* Punctuation */ 
-  ucp_S,     /* Symbol */ 
-  ucp_Z      /* Separator */ 
-}; 
- 
+/* These are the general character categories. */
+
+enum {
+  ucp_C,     /* Other */
+  ucp_L,     /* Letter */
+  ucp_M,     /* Mark */
+  ucp_N,     /* Number */
+  ucp_P,     /* Punctuation */
+  ucp_S,     /* Symbol */
+  ucp_Z      /* Separator */
+};
+
 /* These are the particular character categories. */
- 
-enum { 
-  ucp_Cc,    /* Control */ 
-  ucp_Cf,    /* Format */ 
-  ucp_Cn,    /* Unassigned */ 
-  ucp_Co,    /* Private use */ 
-  ucp_Cs,    /* Surrogate */ 
-  ucp_Ll,    /* Lower case letter */ 
-  ucp_Lm,    /* Modifier letter */ 
-  ucp_Lo,    /* Other letter */ 
-  ucp_Lt,    /* Title case letter */ 
-  ucp_Lu,    /* Upper case letter */ 
-  ucp_Mc,    /* Spacing mark */ 
-  ucp_Me,    /* Enclosing mark */ 
-  ucp_Mn,    /* Non-spacing mark */ 
-  ucp_Nd,    /* Decimal number */ 
-  ucp_Nl,    /* Letter number */ 
-  ucp_No,    /* Other number */ 
-  ucp_Pc,    /* Connector punctuation */ 
-  ucp_Pd,    /* Dash punctuation */ 
-  ucp_Pe,    /* Close punctuation */ 
-  ucp_Pf,    /* Final punctuation */ 
-  ucp_Pi,    /* Initial punctuation */ 
-  ucp_Po,    /* Other punctuation */ 
-  ucp_Ps,    /* Open punctuation */ 
-  ucp_Sc,    /* Currency symbol */ 
-  ucp_Sk,    /* Modifier symbol */ 
-  ucp_Sm,    /* Mathematical symbol */ 
-  ucp_So,    /* Other symbol */ 
-  ucp_Zl,    /* Line separator */ 
-  ucp_Zp,    /* Paragraph separator */ 
-  ucp_Zs     /* Space separator */ 
-}; 
- 
+
+enum {
+  ucp_Cc,    /* Control */
+  ucp_Cf,    /* Format */
+  ucp_Cn,    /* Unassigned */
+  ucp_Co,    /* Private use */
+  ucp_Cs,    /* Surrogate */
+  ucp_Ll,    /* Lower case letter */
+  ucp_Lm,    /* Modifier letter */
+  ucp_Lo,    /* Other letter */
+  ucp_Lt,    /* Title case letter */
+  ucp_Lu,    /* Upper case letter */
+  ucp_Mc,    /* Spacing mark */
+  ucp_Me,    /* Enclosing mark */
+  ucp_Mn,    /* Non-spacing mark */
+  ucp_Nd,    /* Decimal number */
+  ucp_Nl,    /* Letter number */
+  ucp_No,    /* Other number */
+  ucp_Pc,    /* Connector punctuation */
+  ucp_Pd,    /* Dash punctuation */
+  ucp_Pe,    /* Close punctuation */
+  ucp_Pf,    /* Final punctuation */
+  ucp_Pi,    /* Initial punctuation */
+  ucp_Po,    /* Other punctuation */
+  ucp_Ps,    /* Open punctuation */
+  ucp_Sc,    /* Currency symbol */
+  ucp_Sk,    /* Modifier symbol */
+  ucp_Sm,    /* Mathematical symbol */
+  ucp_So,    /* Other symbol */
+  ucp_Zl,    /* Line separator */
+  ucp_Zp,    /* Paragraph separator */
+  ucp_Zs     /* Space separator */
+};
+
 /* These are grapheme break properties. Note that the code for processing them
 assumes that the values are less than 16. If more values are added that take
 the number to 16 or more, the code will have to be rewritten. */
@@ -83,70 +83,70 @@ enum {
   ucp_gbOther              /* 12 */
 };
 
-/* These are the script identifications. */ 
- 
-enum { 
-  ucp_Arabic, 
-  ucp_Armenian, 
-  ucp_Bengali, 
-  ucp_Bopomofo, 
-  ucp_Braille, 
-  ucp_Buginese, 
-  ucp_Buhid, 
-  ucp_Canadian_Aboriginal, 
-  ucp_Cherokee, 
-  ucp_Common, 
-  ucp_Coptic, 
-  ucp_Cypriot, 
-  ucp_Cyrillic, 
-  ucp_Deseret, 
-  ucp_Devanagari, 
-  ucp_Ethiopic, 
-  ucp_Georgian, 
-  ucp_Glagolitic, 
-  ucp_Gothic, 
-  ucp_Greek, 
-  ucp_Gujarati, 
-  ucp_Gurmukhi, 
-  ucp_Han, 
-  ucp_Hangul, 
-  ucp_Hanunoo, 
-  ucp_Hebrew, 
-  ucp_Hiragana, 
-  ucp_Inherited, 
-  ucp_Kannada, 
-  ucp_Katakana, 
-  ucp_Kharoshthi, 
-  ucp_Khmer, 
-  ucp_Lao, 
-  ucp_Latin, 
-  ucp_Limbu, 
-  ucp_Linear_B, 
-  ucp_Malayalam, 
-  ucp_Mongolian, 
-  ucp_Myanmar, 
-  ucp_New_Tai_Lue, 
-  ucp_Ogham, 
-  ucp_Old_Italic, 
-  ucp_Old_Persian, 
-  ucp_Oriya, 
-  ucp_Osmanya, 
-  ucp_Runic, 
-  ucp_Shavian, 
-  ucp_Sinhala, 
-  ucp_Syloti_Nagri, 
-  ucp_Syriac, 
-  ucp_Tagalog, 
-  ucp_Tagbanwa, 
-  ucp_Tai_Le, 
-  ucp_Tamil, 
-  ucp_Telugu, 
-  ucp_Thaana, 
-  ucp_Thai, 
-  ucp_Tibetan, 
-  ucp_Tifinagh, 
-  ucp_Ugaritic, 
-  ucp_Yi, 
+/* These are the script identifications. */
+
+enum {
+  ucp_Arabic,
+  ucp_Armenian,
+  ucp_Bengali,
+  ucp_Bopomofo,
+  ucp_Braille,
+  ucp_Buginese,
+  ucp_Buhid,
+  ucp_Canadian_Aboriginal,
+  ucp_Cherokee,
+  ucp_Common,
+  ucp_Coptic,
+  ucp_Cypriot,
+  ucp_Cyrillic,
+  ucp_Deseret,
+  ucp_Devanagari,
+  ucp_Ethiopic,
+  ucp_Georgian,
+  ucp_Glagolitic,
+  ucp_Gothic,
+  ucp_Greek,
+  ucp_Gujarati,
+  ucp_Gurmukhi,
+  ucp_Han,
+  ucp_Hangul,
+  ucp_Hanunoo,
+  ucp_Hebrew,
+  ucp_Hiragana,
+  ucp_Inherited,
+  ucp_Kannada,
+  ucp_Katakana,
+  ucp_Kharoshthi,
+  ucp_Khmer,
+  ucp_Lao,
+  ucp_Latin,
+  ucp_Limbu,
+  ucp_Linear_B,
+  ucp_Malayalam,
+  ucp_Mongolian,
+  ucp_Myanmar,
+  ucp_New_Tai_Lue,
+  ucp_Ogham,
+  ucp_Old_Italic,
+  ucp_Old_Persian,
+  ucp_Oriya,
+  ucp_Osmanya,
+  ucp_Runic,
+  ucp_Shavian,
+  ucp_Sinhala,
+  ucp_Syloti_Nagri,
+  ucp_Syriac,
+  ucp_Tagalog,
+  ucp_Tagbanwa,
+  ucp_Tai_Le,
+  ucp_Tamil,
+  ucp_Telugu,
+  ucp_Thaana,
+  ucp_Thai,
+  ucp_Tibetan,
+  ucp_Tifinagh,
+  ucp_Ugaritic,
+  ucp_Yi,
   /* New for Unicode 5.0: */
   ucp_Balinese,
   ucp_Cuneiform,
@@ -217,8 +217,8 @@ enum {
   ucp_Siddham,
   ucp_Tirhuta,
   ucp_Warang_Citi
-}; 
- 
-#endif 
- 
-/* End of ucp.h */ 
+};
+
+#endif
+
+/* End of ucp.h */
diff --git a/contrib/libs/pcre/ya.make b/contrib/libs/pcre/ya.make
index 26b8b1a571..85ae150697 100644
--- a/contrib/libs/pcre/ya.make
+++ b/contrib/libs/pcre/ya.make
@@ -1,12 +1,12 @@
 # Generated by devtools/yamaker from nixpkgs a58a0b5098f0c2a389ee70eb69422a052982d990.
 
-LIBRARY() 
+LIBRARY()
 
 OWNER(
     orivej
     g:cpp-contrib
 )
- 
+
 VERSION(8.44)
 
 ORIGINAL_SOURCE(https://ftp.pcre.org/pub/pcre/pcre-8.44.tar.bz2)
@@ -26,14 +26,14 @@ ADDINCL(
 )
 
 NO_COMPILER_WARNINGS()
- 
+
 NO_RUNTIME()
 
 CFLAGS(
     GLOBAL -DPCRE_STATIC
     -DHAVE_CONFIG_H
 )
- 
+
 # JIT adds ≈108KB to binary size which may be critical for mobile and embedded devices binary distributions
 DEFAULT(ARCADIA_PCRE_ENABLE_JIT yes)
 
@@ -43,32 +43,32 @@ IF (ARCADIA_PCRE_ENABLE_JIT)
     )
 ENDIF()
 
-SRCS( 
+SRCS(
     pcre_byte_order.c
     pcre_chartables.c
-    pcre_compile.c 
-    pcre_config.c 
-    pcre_dfa_exec.c 
-    pcre_exec.c 
-    pcre_fullinfo.c 
-    pcre_get.c 
-    pcre_globals.c 
+    pcre_compile.c
+    pcre_config.c
+    pcre_dfa_exec.c
+    pcre_exec.c
+    pcre_fullinfo.c
+    pcre_get.c
+    pcre_globals.c
     pcre_jit_compile.c
-    pcre_maketables.c 
-    pcre_newline.c 
-    pcre_ord2utf8.c 
-    pcre_refcount.c 
+    pcre_maketables.c
+    pcre_newline.c
+    pcre_ord2utf8.c
+    pcre_refcount.c
     pcre_string_utils.c
-    pcre_study.c 
-    pcre_tables.c 
+    pcre_study.c
+    pcre_tables.c
     pcre_ucd.c
-    pcre_valid_utf8.c 
-    pcre_version.c 
-    pcre_xclass.c 
-    pcreposix.c 
-) 
- 
-END() 
+    pcre_valid_utf8.c
+    pcre_version.c
+    pcre_xclass.c
+    pcreposix.c
+)
+
+END()
 
 RECURSE(
     pcre16
diff --git a/contrib/libs/pdqsort/ya.make b/contrib/libs/pdqsort/ya.make
index cfb0bab273..bba3e8e78d 100644
--- a/contrib/libs/pdqsort/ya.make
+++ b/contrib/libs/pdqsort/ya.make
@@ -7,8 +7,8 @@ OWNER(
 
 LIBRARY()
 
-LICENSE(Zlib) 
- 
+LICENSE(Zlib)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(978bc36a9bd4143a54b2551cfd9ce8a6afd6d04c)
diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp
index 5a97351e41..468ff61d92 100644
--- a/contrib/libs/pire/pire/extra/count.cpp
+++ b/contrib/libs/pire/pire/extra/count.cpp
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -33,7 +33,7 @@
 #include <tuple>
 
 namespace Pire {
- 
+
 namespace Impl {
 
 typedef LoadedScanner::Action Action;
@@ -886,28 +886,28 @@ public:
 	using TAction = typename Scanner::Action;
 	using InternalState = typename Scanner::InternalState;
 	typedef TMap<State, size_t> InvStates;
- 
+
 	CountingScannerGlueTask(const Scanner& lhs, const Scanner& rhs)
 		: ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters))
 	{
 	}
- 
+
 	void AcceptStates(const TVector<State>& states)
 	{
 		States = states;
 		this->SetSc(THolder<Scanner>(new Scanner));
 		this->Sc().Init(states.size(), this->Letters(), 0, this->Lhs().RegexpsCount() + this->Rhs().RegexpsCount());
- 
+
 		for (size_t i = 0; i < states.size(); ++i)
 			this->Sc().SetTag(i, this->Lhs().m_tags[this->Lhs().StateIdx(states[i].first)] | (this->Rhs().m_tags[this->Rhs().StateIdx(states[i].second)] << 3));
 	}
- 
+
 	void Connect(size_t from, size_t to, Char letter)
 	{
 		this->Sc().SetJump(from, letter, to,
 			Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount()));
 	}
- 
+
 protected:
 	TVector<State> States;
 	TAction Action(const Scanner& sc, InternalState state, Char letter) const
@@ -981,7 +981,7 @@ private:
 
 
 }
- 
+
 CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */)
 {
 	if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h
index 03c2659c80..bd1526b98d 100644
--- a/contrib/libs/pire/pire/extra/count.h
+++ b/contrib/libs/pire/pire/extra/count.h
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -31,11 +31,11 @@
 
 namespace Pire {
 class Fsm;
- 
+
 namespace Impl {
 	template<class T>
 	class ScannerGlueCommon;
- 
+
 	template<class T>
 	class CountingScannerGlueTask;
 
@@ -121,7 +121,7 @@ public:
 	enum {
 		IncrementAction = 1,
 		ResetAction = 2,
- 
+
 		FinalFlag = 0,
 		DeadFlag = 1,
 	};
@@ -170,7 +170,7 @@ public:
 	bool Dead(const State&) const { return false; }
 
 	using LoadedScanner::Swap;
- 
+
 	size_t StateIndex(const State& s) const { return StateIdx(s.m_state); }
 
 protected:
@@ -192,7 +192,7 @@ protected:
 		mask &= s.m_updatedMask;
 		if (mask) {
 			ResetPerformer<ActualReCount>::Do(s, mask);
-			s.m_updatedMask &= (Action)~mask; 
+			s.m_updatedMask &= (Action)~mask;
 		}
 	}
 
@@ -267,7 +267,7 @@ private:
 		else
 			return 0;
 	}
- 
+
 	friend void BuildScanner<CountingScanner>(const Fsm&, CountingScanner&);
 	friend class Impl::ScannerGlueCommon<CountingScanner>;
 	friend class Impl::CountingScannerGlueTask<CountingScanner>;
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index 3b77a3979e..120dc403b7 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -68,16 +68,16 @@ public:
 	};
 
 	// Override in subclass, if neccessary
-	enum { 
+	enum {
 		FinalFlag = 0,
 		DeadFlag  = 0
 	};
 
 	static const size_t MAX_RE_COUNT = 16;
 
-protected: 
+protected:
 	LoadedScanner() { Alias(Null()); }
- 
+
 	LoadedScanner(const LoadedScanner& s): m(s.m)
 	{
 		if (s.m_buffer) {
@@ -137,7 +137,7 @@ public:
 		Locals* locals;
 		Impl::MapPtr(locals, 1, p, size);
 		memcpy(&s.m, locals, sizeof(s.m));
- 
+
 		Impl::MapPtr(s.m_letters, MaxChar, p, size);
 		Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size);
 		if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
@@ -145,7 +145,7 @@ public:
 			Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size);
 		}
 		Impl::MapPtr(s.m_tags, s.m.statesCount, p, size);
- 
+
 		s.m.initial += reinterpret_cast<size_t>(s.m_jumps);
 		Swap(s);
 
@@ -260,14 +260,14 @@ private:
 		static const LoadedScanner n = Fsm::MakeFalse().Compile<LoadedScanner>();
 		return n;
 	}
- 
+
 	void Markup(void* buf)
 	{
 		m_letters = reinterpret_cast<Letter*>(buf);
 		m_jumps   = reinterpret_cast<Transition*>(m_letters + MaxChar);
 		m_tags    = reinterpret_cast<Tag*>(m_jumps + m.statesCount * m.lettersCount);
 	}
- 
+
 	void Alias(const LoadedScanner& s)
 	{
 		memcpy(&m, &s.m, sizeof(m));
@@ -283,9 +283,9 @@ private:
 		Init(states, letters, startState, regexpsCount);
 	}
 
-	friend class Fsm; 
+	friend class Fsm;
 };
- 
+
 inline LoadedScanner::~LoadedScanner() = default;
 
 }
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index 105b44ca3e..29679e416e 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -82,7 +82,7 @@ namespace Impl {
 	};
 
 
-// Scanner implementation parametrized by 
+// Scanner implementation parametrized by
 //      - transition table representation strategy
 //      - strategy for fast forwarding through memory ranges
 template<class Relocation, class Shortcutting>
@@ -120,7 +120,7 @@ public:
 	typedef typename Shortcutting::template ExtendedRowHeader<Scanner> ScannerRowHeader;
 
 	Scanner() { Alias(Null()); }
- 
+
 	explicit Scanner(Fsm& fsm, size_t distance = 0)
 	{
 		if (distance) {
@@ -257,14 +257,14 @@ public:
 			throw Error("Type mismatch while mmapping Pire::Scanner");
 		Impl::AdvancePtr(p, size, sizeof(s.m));
 		Impl::AlignPtr(p, size);
- 
+
 		if (Shortcutting::Signature != s.m.shortcuttingSignature)
 			throw Error("This scanner has different shortcutting type");
- 
+
 		bool empty = *((const bool*) p);
 		Impl::AdvancePtr(p, size, sizeof(empty));
 		Impl::AlignPtr(p, size);
- 
+
 		if (empty)
 			s.Alias(Null());
 		else {
@@ -335,8 +335,8 @@ protected:
 	inline static const Scanner& Null()
 	{
 		static const Scanner n = Fsm::MakeFalse().Compile< Scanner<Relocation, Shortcutting> >();
- 
-		return n; 
+
+		return n;
 	}
 
 	// Returns transition row size in Transition's. Row size_in bytes should be a multiple of sizeof(MaxSizeWord)
@@ -394,7 +394,7 @@ protected:
 		m_finalIndex = s.m_finalIndex;
 		m_transitions = s.m_transitions;
 	}
- 
+
 	template<class AnotherRelocation>
 	void DeepCopy(const Scanner<AnotherRelocation, Shortcutting>& s)
 	{
@@ -582,7 +582,7 @@ struct ScannerSaver {
 		bool empty;
 		LoadPodType(s, empty);
 		Impl::AlignLoad(s, sizeof(empty));
- 
+
 		if (empty) {
 			sc.Alias(ScannerType::Null());
 		} else {
@@ -596,13 +596,13 @@ struct ScannerSaver {
 
 	// TODO: implement more effective serialization
 	// of nonrelocatable scanner if necessary
- 
+
 	template<class Shortcutting>
 	static void SaveScanner(const Scanner<Nonrelocatable, Shortcutting>& scanner, yostream* s)
 	{
 		Scanner<Relocatable, Shortcutting>(scanner).Save(s);
 	}
- 
+
 	template<class Shortcutting>
 	static void LoadScanner(Scanner<Nonrelocatable, Shortcutting>& scanner, yistream* s)
 	{
@@ -633,7 +633,7 @@ private:
 		NO_SHORTCUT_MASK = 1, // the state doesn't have shortcuts
 		NO_EXIT_MASK  =    2  // the state has only transtions to itself (we can stop the scan)
 	};
- 
+
 	template<class ScannerRowHeader, unsigned N>
 	struct MaskCheckerBase {
 		static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
@@ -645,7 +645,7 @@ private:
 			}
 			return !IsAnySet(mask);
 		}
- 
+
 		static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
 		const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
 		{
@@ -653,12 +653,12 @@ private:
 			return begin;
 		}
 	};
- 
+
 	template<class ScannerRowHeader, unsigned N, unsigned Nmax>
 	struct MaskChecker : MaskCheckerBase<ScannerRowHeader, N>  {
 		typedef MaskCheckerBase<ScannerRowHeader, N> Base;
 		typedef MaskChecker<ScannerRowHeader, N+1, Nmax> Next;
- 
+
 		static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
 		const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
 		{
@@ -668,17 +668,17 @@ private:
 				return Next::Run(hdr, alignOffset, begin, end);
 		}
 	};
- 
+
 	template<class ScannerRowHeader, unsigned N>
 	struct MaskChecker<ScannerRowHeader, N, N> : MaskCheckerBase<ScannerRowHeader, N>  {
 		typedef MaskCheckerBase<ScannerRowHeader, N> Base;
- 
+
 		static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
 		const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
 		{
 			return Base::DoRun(hdr, alignOffset, begin, end);
 		}
-	}; 
+	};
 
 	// Compares the ExitMask[0] value without SSE reads which seems to be more optimal
 	template <class Relocation>
@@ -704,7 +704,7 @@ public:
 			MaskSizeInSizeT = 2 * SizeTInMaxSizeWord,
 		};
 
-	public: 
+	public:
 		static const size_t ExitMaskCount = MaskCount;
 
 		inline
@@ -716,14 +716,14 @@ public:
 			Y_ASSERT(IsAligned(p, sizeof(Word)));
 			return *p;
 		}
- 
+
 		PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
 		size_t Mask(size_t i) const
 		{
 			Y_ASSERT(i < ExitMaskCount);
 			return ExitMasksArray[MaskSizeInSizeT*i];
 		}
- 
+
 		void SetMask(size_t i, size_t val)
 		{
 			for (size_t j = 0; j < MaskSizeInSizeT; ++j)
@@ -735,7 +735,7 @@ public:
 			for (size_t i = 0; i < ExitMaskCount; ++i)
 				SetMask(i, NO_SHORTCUT_MASK);
 		}
- 
+
 		template <class OtherScanner>
 		ExtendedRowHeader& operator =(const ExtendedRowHeader<OtherScanner>& other)
 		{
@@ -932,7 +932,7 @@ public:
 	static inline PIRE_HOT_FUNCTION
 	Action RunAligned(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, const size_t* end , Pred pred)
 	{
-		typename ScannerType::State state = st; 
+		typename ScannerType::State state = st;
 		const Word* head = AlignUp((const Word*) begin, sizeof(Word));
 		const Word* tail = AlignDown((const Word*) end, sizeof(Word));
 		for (; begin != (const size_t*) head && begin != end; ++begin)
@@ -940,7 +940,7 @@ public:
 				st = state;
 				return Stop;
 			}
- 
+
 		if (begin == end) {
 			st = state;
 			return Continue;
@@ -949,7 +949,7 @@ public:
 			st = state;
 			return pred(scanner, state, ((const char*) end));
 		}
- 
+
 		// Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state
 		Y_ASSERT((scanner.RowSize()*sizeof(typename ScannerType::Transition)) % sizeof(MaxSizeWord) == 0);
 		size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t);
@@ -980,14 +980,14 @@ public:
 			head = skipEnd;
 			noShortcut = true;
 		}
- 
+
 		for (size_t* p = (size_t*) tail; p != end; ++p) {
 			if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Stop) {
 				st = state;
 				return Stop;
 			}
 		}
- 
+
 		st = state;
 		return Continue;
 	}
@@ -1004,36 +1004,36 @@ public:
 	using Base::Rhs;
 	using Base::Sc;
 	using Base::Letters;
- 
+
 	typedef GluedStateLookupTable<256*1024, typename Scanner::State> InvStates;
- 
+
 	ScannerGlueTask(const Scanner& lhs, const Scanner& rhs)
 		: ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters))
 	{
 	}
- 
+
 	void AcceptStates(const TVector<State>& states)
 	{
 		// Make up a new scanner and fill in the final table
- 
+
 		size_t finalTableSize = 0;
 		for (auto&& i : states)
 			finalTableSize += RangeLen(Lhs().AcceptedRegexps(i.first)) + RangeLen(Rhs().AcceptedRegexps(i.second));
 		this->SetSc(THolder<Scanner>(new Scanner));
 		Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount());
- 
+
 		auto finalWriter = Sc().m_final;
 		for (size_t state = 0; state != states.size(); ++state) {
 			Sc().m_finalIndex[state] = finalWriter - Sc().m_final;
 			finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter);
 			finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter);
 			*finalWriter++ = static_cast<size_t>(-1);
- 
+
 			Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0)
 				| ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0));
 		}
 	}
- 
+
 	void Connect(size_t from, size_t to, Char letter) { Sc().SetJump(from, letter, to); }
 
 	const Scanner& Success()
@@ -1041,8 +1041,8 @@ public:
 		Sc().BuildShortcuts();
 		return Sc();
 	}
- 
-private: 
+
+private:
 	template<class Iter>
 	size_t RangeLen(ypair<Iter, Iter> range) const
 	{
@@ -1089,7 +1089,7 @@ Impl::Scanner<Relocation, Shortcutting> Impl::Scanner<Relocation, Shortcutting>:
 		return rhs;
 	if (rhs.Empty())
 		return lhs;
- 
+
 	static const size_t DefMaxSize = 80000;
 	Impl::ScannerGlueTask< Impl::Scanner<Relocation, Shortcutting> > task(lhs, rhs);
 	return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h
index ab0aca6ae1..ef959aeed1 100644
--- a/contrib/libs/pire/pire/scanners/simple.h
+++ b/contrib/libs/pire/pire/scanners/simple.h
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -48,7 +48,7 @@ public:
 	typedef ui8         Tag;
 
 	SimpleScanner()	{ Alias(Null()); }
- 
+
 	explicit SimpleScanner(Fsm& fsm, size_t distance = 0);
 
 	size_t Size() const { return m.statesCount; }
@@ -96,7 +96,7 @@ public:
 			m.initial += (m_transitions - s.m_transitions) * sizeof(Transition);
 		}
 	}
- 
+
 	// Makes a shallow ("weak") copy of the given scanner.
 	// The copied scanner does not maintain lifetime of the original's entrails.
 	void Alias(const SimpleScanner& s)
@@ -139,7 +139,7 @@ public:
 		bool empty = *((const bool*) p);
 		Impl::AdvancePtr(p, size, sizeof(empty));
 		Impl::AlignPtr(p, size);
- 
+
 		if (empty)
 			s.Alias(Null());
 		else {
@@ -235,7 +235,7 @@ inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
 		fsm = CreateApproxFsm(fsm, distance);
 	}
 	fsm.Canonize();
- 
+
 	m.statesCount = fsm.Size();
 	m_buffer = BufferType(new char[BufSize()]);
 	memset(m_buffer.Get(), 0, BufSize());
@@ -255,7 +255,7 @@ inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
 		}
 }
 
- 
+
 }
 
 #endif
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 6a12817623..6adfcb8c1d 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -56,7 +56,7 @@ public:
 	typedef ui32        Action;
 	typedef ui8         Tag;
 
-	enum { 
+	enum {
 		FinalFlag = 1,
 		DeadFlag  = 0
 	};
@@ -84,7 +84,7 @@ public:
 	size_t Size() const { return GetSize(); }
 	size_t GetSize() const { return m.statesCount; }
 	bool Empty() const { return m_finals == Null().m_finals; }
- 
+
 	size_t Id() const {return (size_t) -1;}
 	size_t RegexpsCount() const { return Empty() ? 0 : 1; }
 
@@ -170,7 +170,7 @@ public:
 	bool CanStop(const State& s) const {
 		return Final(s);
 	}
- 
+
 	const void* Mmap(const void* ptr, size_t size)
 	{
 		Impl::CheckAlign(ptr);
@@ -181,11 +181,11 @@ public:
 		Locals* locals;
 		Impl::MapPtr(locals, 1, p, size);
 		memcpy(&s.m, locals, sizeof(s.m));
- 
+
 		bool empty = *((const bool*) p);
 		Impl::AdvancePtr(p, size, sizeof(empty));
 		Impl::AlignPtr(p, size);
- 
+
 		if (empty)
 			s.Alias(Null());
 		else {
@@ -213,7 +213,7 @@ public:
 		DoSwap(m_letters, s.m_letters);
 		DoSwap(m_pool, s.m_pool);
 		DoSwap(m_vec, s.m_vec);
- 
+
 		DoSwap(m_vecptr, s.m_vecptr);
 		DoSwap(need_actions, s.need_actions);
 		DoSwap(m_actionsvec, s.m_actionsvec);
@@ -249,7 +249,7 @@ public:
 			m_vecptr = &m_vec;
 		}
 	}
- 
+
 	explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
 		: need_actions(needActions)
 	{
@@ -365,9 +365,9 @@ private:
 		memset(p, 0, size * sizeof(T));
 		m_pool.push_back(p);
 	}
- 
+
 	void Alias(const SlowScanner& s)
-	{ 
+	{
 		memcpy(&m, &s.m, sizeof(m));
 		m_vec.clear();
 		need_actions = s.need_actions;
@@ -380,7 +380,7 @@ private:
 		m_vecptr = s.m_vecptr;
 		m_pool.clear();
 	}
- 
+
 	void SetJump(size_t oldState, Char c, size_t newState, unsigned long action)
 	{
 		Y_ASSERT(!m_vec.empty());
@@ -397,7 +397,7 @@ private:
 
 	void SetInitial(size_t state) { m.start = state; }
 	void SetTag(size_t state, ui8 tag) { m_finals[state] = (tag != 0); }
- 
+
 	void FinishBuild() {}
 
 	static ypair<const size_t*, const size_t*> Accept()
@@ -427,7 +427,7 @@ inline const SlowScanner& SlowScanner::Null()
 	return n;
 }
 
-#ifndef PIRE_DEBUG 
+#ifndef PIRE_DEBUG
 /// A specialization of Run(), since its state is much heavier than other ones
 /// and we thus want to avoid copying states.
 template<>
diff --git a/contrib/libs/pire/pire/stub/stl.h b/contrib/libs/pire/pire/stub/stl.h
index 53247b2afd..98ebd9f7c6 100644
--- a/contrib/libs/pire/pire/stub/stl.h
+++ b/contrib/libs/pire/pire/stub/stl.h
@@ -27,10 +27,10 @@
 
 namespace Pire {
     using ystring = TString;
-    template<size_t N> using ybitset = std::bitset<N>; 
+    template<size_t N> using ybitset = std::bitset<N>;
     template<typename T1, typename T2> using ypair = std::pair<T1, T2>;
-    template<typename T> using yauto_ptr = std::auto_ptr<T>; 
-    template<typename Arg1, typename Arg2, typename Result> using ybinary_function = std::binary_function<Arg1, Arg2, Result>; 
+    template<typename T> using yauto_ptr = std::auto_ptr<T>;
+    template<typename Arg1, typename Arg2, typename Result> using ybinary_function = std::binary_function<Arg1, Arg2, Result>;
 
     template<typename T1, typename T2>
     inline ypair<T1, T2> ymake_pair(T1 v1, T2 v2) {
@@ -39,16 +39,16 @@ namespace Pire {
 
     template<typename T>
     inline T ymax(T v1, T v2) {
-        return std::max(v1, v2); 
+        return std::max(v1, v2);
     }
 
     template<typename T>
     inline T ymin(T v1, T v2) {
-        return std::min(v1, v2); 
+        return std::min(v1, v2);
     }
 
     template<class Iter, class T>
-    void Fill(Iter begin, Iter end, T t) { std::fill(begin, end, t); } 
+    void Fill(Iter begin, Iter end, T t) { std::fill(begin, end, t); }
 
     class Error: public yexception {
     public:
diff --git a/contrib/libs/pire/pire/stub/utf8.h b/contrib/libs/pire/pire/stub/utf8.h
index 7bb05c8ef9..51ea0479d4 100644
--- a/contrib/libs/pire/pire/stub/utf8.h
+++ b/contrib/libs/pire/pire/stub/utf8.h
@@ -1,5 +1,5 @@
-#pragma once 
- 
+#pragma once
+
 #include <library/cpp/charset/codepage.h>
 #include <util/charset/unidata.h>
 
@@ -11,7 +11,7 @@ inline wchar32 to_upper(wchar32 c) {
 }
 
 inline bool is_digit(wchar32 c) {
-    return IsDigit(c); 
+    return IsDigit(c);
 }
 
 inline bool is_upper(wchar32 c) {
diff --git a/contrib/libs/poco/Crypto/ya.make b/contrib/libs/poco/Crypto/ya.make
index 37bd231c0a..cf58950785 100644
--- a/contrib/libs/poco/Crypto/ya.make
+++ b/contrib/libs/poco/Crypto/ya.make
@@ -11,7 +11,7 @@ LICENSE(
     BSD-3-Clause AND
     BSL-1.0
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/poco/Foundation/ya.make b/contrib/libs/poco/Foundation/ya.make
index 2e1346c3a5..9b22a79979 100644
--- a/contrib/libs/poco/Foundation/ya.make
+++ b/contrib/libs/poco/Foundation/ya.make
@@ -15,7 +15,7 @@ LICENSE(
     RSA-MD AND
     RSA-MD4
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/poco/JSON/ya.make b/contrib/libs/poco/JSON/ya.make
index 98aaf57650..11e0ff72b1 100644
--- a/contrib/libs/poco/JSON/ya.make
+++ b/contrib/libs/poco/JSON/ya.make
@@ -11,7 +11,7 @@ LICENSE(
     BSD-3-Clause AND
     BSL-1.0
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/poco/Net/ya.make b/contrib/libs/poco/Net/ya.make
index c0daf1e991..9feadc7027 100644
--- a/contrib/libs/poco/Net/ya.make
+++ b/contrib/libs/poco/Net/ya.make
@@ -12,7 +12,7 @@ LICENSE(
     BSL-1.0 AND
     Custom-Punycode
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/poco/NetSSL_OpenSSL/ya.make b/contrib/libs/poco/NetSSL_OpenSSL/ya.make
index 599ebc617b..ba6f99d872 100644
--- a/contrib/libs/poco/NetSSL_OpenSSL/ya.make
+++ b/contrib/libs/poco/NetSSL_OpenSSL/ya.make
@@ -11,7 +11,7 @@ LICENSE(
     BSD-3-Clause AND
     BSL-1.0
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/poco/Util/ya.make b/contrib/libs/poco/Util/ya.make
index 2f349bb7c6..3a295763c6 100644
--- a/contrib/libs/poco/Util/ya.make
+++ b/contrib/libs/poco/Util/ya.make
@@ -11,7 +11,7 @@ LICENSE(
     BSD-3-Clause AND
     BSL-1.0
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/poco/XML/ya.make b/contrib/libs/poco/XML/ya.make
index 40c4504028..ffa0227a18 100644
--- a/contrib/libs/poco/XML/ya.make
+++ b/contrib/libs/poco/XML/ya.make
@@ -11,7 +11,7 @@ LICENSE(
     BSD-3-Clause AND
     BSL-1.0
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
diff --git a/contrib/libs/python/Include/ya.make b/contrib/libs/python/Include/ya.make
index 556ec54e08..e65263e6a0 100644
--- a/contrib/libs/python/Include/ya.make
+++ b/contrib/libs/python/Include/ya.make
@@ -3,7 +3,7 @@ PY23_LIBRARY()
 WITHOUT_LICENSE_TEXTS()
 
 LICENSE(YandexOpen)
- 
+
 OWNER(
     orivej
     spreis
diff --git a/contrib/libs/python/ut/lib/ya.make b/contrib/libs/python/ut/lib/ya.make
index 07b2c246f1..cfa0aaa612 100644
--- a/contrib/libs/python/ut/lib/ya.make
+++ b/contrib/libs/python/ut/lib/ya.make
@@ -1,17 +1,17 @@
 OWNER(spreis)
 
 PY23_LIBRARY()
- 
+
 WITHOUT_LICENSE_TEXTS()
 
 LICENSE(YandexOpen)
- 
+
 PEERDIR(
     library/cpp/testing/unittest
 )
- 
+
 SRCS(
     test.cpp
 )
- 
+
 END()
diff --git a/contrib/libs/python/ya.make b/contrib/libs/python/ya.make
index f635a96ab0..20a57f4b48 100644
--- a/contrib/libs/python/ya.make
+++ b/contrib/libs/python/ya.make
@@ -7,7 +7,7 @@ OWNER(
 PY23_LIBRARY()
 
 LICENSE(YandexOpen)
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 NO_PYTHON_INCLUDES()
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index 718bc9c679..85f16f060b 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -1329,7 +1329,7 @@ static bool ParseInteger(StringPiece* s, int* np) {
   if (s->empty() || !isdigit((*s)[0] & 0xFF))
     return false;
   // Disallow leading zeros.
-  if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) 
+  if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
     return false;
   int n = 0;
   int c;
@@ -1471,7 +1471,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
   int code;
   switch (c) {
     default:
-      if (c < Runeself && !isalpha(c) && !isdigit(c)) { 
+      if (c < Runeself && !isalpha(c) && !isdigit(c)) {
         // Escaped non-word characters are always themselves.
         // PCRE is not quite so rigorous: it accepts things like
         // \q, but we don't.  We once rejected \_, but too many
@@ -1633,11 +1633,11 @@ static const UGroup* LookupGroup(const StringPiece& name,
 }
 
 // Look for a POSIX group with the given name (e.g., "[:^alpha:]")
-static const UGroup* LookupPosixGroup(const StringPiece& name) { 
+static const UGroup* LookupPosixGroup(const StringPiece& name) {
   return LookupGroup(name, posix_groups, num_posix_groups);
 }
 
-static const UGroup* LookupPerlGroup(const StringPiece& name) { 
+static const UGroup* LookupPerlGroup(const StringPiece& name) {
   return LookupGroup(name, perl_groups, num_perl_groups);
 }
 
@@ -1648,7 +1648,7 @@ static URange32 any32[] = { { 65536, Runemax } };
 static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
 
 // Look for a Unicode group with the given name (e.g., "Han")
-static const UGroup* LookupUnicodeGroup(const StringPiece& name) { 
+static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
   // Special case: "Any" means any.
   if (name == StringPiece("Any"))
     return &anygroup;
@@ -1708,7 +1708,7 @@ static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
 // On success, sets *s to span the remainder of the string
 // and returns the corresponding UGroup.
 // The StringPiece must *NOT* be edited unless the call succeeds.
-const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) { 
+const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) {
   if (!(parse_flags & Regexp::PerlClasses))
     return NULL;
   if (s->size() < 2 || (*s)[0] != '\\')
@@ -1716,7 +1716,7 @@ const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_fl
   // Could use StringPieceToRune, but there aren't
   // any non-ASCII Perl group names.
   StringPiece name(s->data(), 2);
-  const UGroup *g = LookupPerlGroup(name); 
+  const UGroup *g = LookupPerlGroup(name);
   if (g == NULL)
     return NULL;
   s->remove_prefix(name.size());
@@ -1783,7 +1783,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
 
 #if !defined(RE2_USE_ICU)
   // Look up the group in the RE2 Unicode data.
-  const UGroup *g = LookupUnicodeGroup(name); 
+  const UGroup *g = LookupUnicodeGroup(name);
   if (g == NULL) {
     status->set_code(kRegexpBadCharRange);
     status->set_error_arg(seq);
@@ -1843,7 +1843,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
   q += 2;
   StringPiece name(p, static_cast<size_t>(q - p));
 
-  const UGroup *g = LookupPosixGroup(name); 
+  const UGroup *g = LookupPosixGroup(name);
   if (g == NULL) {
     status->set_code(kRegexpBadCharRange);
     status->set_error_arg(name);
@@ -1981,7 +1981,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
     }
 
     // Look for Perl character class symbols (extension).
-    const UGroup *g = MaybeParsePerlCCEscape(s, flags_); 
+    const UGroup *g = MaybeParsePerlCCEscape(s, flags_);
     if (g != NULL) {
       AddUGroup(re->ccb_, g, g->sign, flags_);
       continue;
@@ -2456,7 +2456,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
           }
         }
 
-        const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags()); 
+        const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags());
         if (g != NULL) {
           Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
           re->ccb_ = new CharClassBuilder;
diff --git a/contrib/libs/re2/re2/perl_groups.cc b/contrib/libs/re2/re2/perl_groups.cc
index 605c0e4aa7..4687444581 100644
--- a/contrib/libs/re2/re2/perl_groups.cc
+++ b/contrib/libs/re2/re2/perl_groups.cc
@@ -5,21 +5,21 @@
 
 namespace re2 {
 
-static const URange16 code1[] = {  /* \d */ 
+static const URange16 code1[] = {  /* \d */
 	{ 0x30, 0x39 },
 };
-static const URange16 code2[] = {  /* \s */ 
+static const URange16 code2[] = {  /* \s */
 	{ 0x9, 0xa },
 	{ 0xc, 0xd },
 	{ 0x20, 0x20 },
 };
-static const URange16 code3[] = {  /* \w */ 
+static const URange16 code3[] = {  /* \w */
 	{ 0x30, 0x39 },
 	{ 0x41, 0x5a },
 	{ 0x5f, 0x5f },
 	{ 0x61, 0x7a },
 };
-const UGroup perl_groups[] = { 
+const UGroup perl_groups[] = {
 	{ "\\d", +1, code1, 1, 0, 0 },
 	{ "\\D", -1, code1, 1, 0, 0 },
 	{ "\\s", +1, code2, 3, 0, 0 },
@@ -27,64 +27,64 @@ const UGroup perl_groups[] = {
 	{ "\\w", +1, code3, 4, 0, 0 },
 	{ "\\W", -1, code3, 4, 0, 0 },
 };
-const int num_perl_groups = 6; 
-static const URange16 code4[] = {  /* [:alnum:] */ 
+const int num_perl_groups = 6;
+static const URange16 code4[] = {  /* [:alnum:] */
 	{ 0x30, 0x39 },
 	{ 0x41, 0x5a },
 	{ 0x61, 0x7a },
 };
-static const URange16 code5[] = {  /* [:alpha:] */ 
+static const URange16 code5[] = {  /* [:alpha:] */
 	{ 0x41, 0x5a },
 	{ 0x61, 0x7a },
 };
-static const URange16 code6[] = {  /* [:ascii:] */ 
+static const URange16 code6[] = {  /* [:ascii:] */
 	{ 0x0, 0x7f },
 };
-static const URange16 code7[] = {  /* [:blank:] */ 
+static const URange16 code7[] = {  /* [:blank:] */
 	{ 0x9, 0x9 },
 	{ 0x20, 0x20 },
 };
-static const URange16 code8[] = {  /* [:cntrl:] */ 
+static const URange16 code8[] = {  /* [:cntrl:] */
 	{ 0x0, 0x1f },
 	{ 0x7f, 0x7f },
 };
-static const URange16 code9[] = {  /* [:digit:] */ 
+static const URange16 code9[] = {  /* [:digit:] */
 	{ 0x30, 0x39 },
 };
-static const URange16 code10[] = {  /* [:graph:] */ 
+static const URange16 code10[] = {  /* [:graph:] */
 	{ 0x21, 0x7e },
 };
-static const URange16 code11[] = {  /* [:lower:] */ 
+static const URange16 code11[] = {  /* [:lower:] */
 	{ 0x61, 0x7a },
 };
-static const URange16 code12[] = {  /* [:print:] */ 
+static const URange16 code12[] = {  /* [:print:] */
 	{ 0x20, 0x7e },
 };
-static const URange16 code13[] = {  /* [:punct:] */ 
+static const URange16 code13[] = {  /* [:punct:] */
 	{ 0x21, 0x2f },
 	{ 0x3a, 0x40 },
 	{ 0x5b, 0x60 },
 	{ 0x7b, 0x7e },
 };
-static const URange16 code14[] = {  /* [:space:] */ 
+static const URange16 code14[] = {  /* [:space:] */
 	{ 0x9, 0xd },
 	{ 0x20, 0x20 },
 };
-static const URange16 code15[] = {  /* [:upper:] */ 
+static const URange16 code15[] = {  /* [:upper:] */
 	{ 0x41, 0x5a },
 };
-static const URange16 code16[] = {  /* [:word:] */ 
+static const URange16 code16[] = {  /* [:word:] */
 	{ 0x30, 0x39 },
 	{ 0x41, 0x5a },
 	{ 0x5f, 0x5f },
 	{ 0x61, 0x7a },
 };
-static const URange16 code17[] = {  /* [:xdigit:] */ 
+static const URange16 code17[] = {  /* [:xdigit:] */
 	{ 0x30, 0x39 },
 	{ 0x41, 0x46 },
 	{ 0x61, 0x66 },
 };
-const UGroup posix_groups[] = { 
+const UGroup posix_groups[] = {
 	{ "[:alnum:]", +1, code4, 3, 0, 0 },
 	{ "[:^alnum:]", -1, code4, 3, 0, 0 },
 	{ "[:alpha:]", +1, code5, 2, 0, 0 },
@@ -114,6 +114,6 @@ const UGroup posix_groups[] = {
 	{ "[:xdigit:]", +1, code17, 3, 0, 0 },
 	{ "[:^xdigit:]", -1, code17, 3, 0, 0 },
 };
-const int num_posix_groups = 28; 
+const int num_posix_groups = 28;
 
 }  // namespace re2
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index 1a226d12b9..47fb385e4e 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -956,7 +956,7 @@ bool RE2::CheckRewriteString(const StringPiece& rewrite,
     if (c == '\\') {
       continue;
     }
-    if (!isdigit(c)) { 
+    if (!isdigit(c)) {
       *error = "Rewrite schema error: "
                "'\\' must be followed by a digit or '\\'.";
       return false;
diff --git a/contrib/libs/re2/re2/unicode_casefold.h b/contrib/libs/re2/re2/unicode_casefold.h
index d71f50f0b8..8bdbb42fbc 100644
--- a/contrib/libs/re2/re2/unicode_casefold.h
+++ b/contrib/libs/re2/re2/unicode_casefold.h
@@ -59,8 +59,8 @@ struct CaseFold {
   int32_t delta;
 };
 
-extern const CaseFold unicode_casefold[]; 
-extern const int num_unicode_casefold; 
+extern const CaseFold unicode_casefold[];
+extern const int num_unicode_casefold;
 
 extern const CaseFold unicode_tolower[];
 extern const int num_unicode_tolower;
diff --git a/contrib/libs/re2/re2/unicode_groups.h b/contrib/libs/re2/re2/unicode_groups.h
index 512203c43a..75f55daa61 100644
--- a/contrib/libs/re2/re2/unicode_groups.h
+++ b/contrib/libs/re2/re2/unicode_groups.h
@@ -41,26 +41,26 @@ struct UGroup
 {
   const char *name;
   int sign;  // +1 for [abc], -1 for [^abc]
-  const URange16 *r16; 
+  const URange16 *r16;
   int nr16;
-  const URange32 *r32; 
+  const URange32 *r32;
   int nr32;
 };
 
 // Named by property or script name (e.g., "Nd", "N", "Han").
 // Negated groups are not included.
-extern const UGroup unicode_groups[]; 
-extern const int num_unicode_groups; 
+extern const UGroup unicode_groups[];
+extern const int num_unicode_groups;
 
 // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
 // Negated groups are included.
-extern const UGroup posix_groups[]; 
-extern const int num_posix_groups; 
+extern const UGroup posix_groups[];
+extern const int num_posix_groups;
 
 // Named by Perl name (e.g., "\\d", "\\D").
 // Negated groups are included.
-extern const UGroup perl_groups[]; 
-extern const int num_perl_groups; 
+extern const UGroup perl_groups[];
+extern const int num_perl_groups;
 
 }  // namespace re2
 
diff --git a/contrib/libs/re2/util/utf.h b/contrib/libs/re2/util/utf.h
index 74a52727c3..85b4297239 100644
--- a/contrib/libs/re2/util/utf.h
+++ b/contrib/libs/re2/util/utf.h
@@ -18,7 +18,7 @@
 #ifndef UTIL_UTF_H_
 #define UTIL_UTF_H_
 
-#include <stdint.h> 
+#include <stdint.h>
 
 namespace re2 {
 
diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make
index 1af8c4ed6a..8072de2eb2 100644
--- a/contrib/libs/re2/ya.make
+++ b/contrib/libs/re2/ya.make
@@ -21,7 +21,7 @@ ADDINCL(
 )
 
 NO_COMPILER_WARNINGS()
- 
+
 IF (WITH_VALGRIND)
     CFLAGS(
         GLOBAL -DRE2_ON_VALGRIND
diff --git a/contrib/libs/snappy/snappy-c.cc b/contrib/libs/snappy/snappy-c.cc
index 0cb59c7296..473a0b0978 100644
--- a/contrib/libs/snappy/snappy-c.cc
+++ b/contrib/libs/snappy/snappy-c.cc
@@ -1,90 +1,90 @@
-// Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#include "snappy.h" 
-#include "snappy-c.h" 
- 
-extern "C" { 
- 
-snappy_status snappy_compress(const char* input, 
-                              size_t input_length, 
-                              char* compressed, 
-                              size_t *compressed_length) { 
-  if (*compressed_length < snappy_max_compressed_length(input_length)) { 
-    return SNAPPY_BUFFER_TOO_SMALL; 
-  } 
-  snappy::RawCompress(input, input_length, compressed, compressed_length); 
-  return SNAPPY_OK; 
-} 
- 
-snappy_status snappy_uncompress(const char* compressed, 
-                                size_t compressed_length, 
-                                char* uncompressed, 
-                                size_t* uncompressed_length) { 
-  size_t real_uncompressed_length; 
-  if (!snappy::GetUncompressedLength(compressed, 
-                                     compressed_length, 
-                                     &real_uncompressed_length)) { 
-    return SNAPPY_INVALID_INPUT; 
-  } 
-  if (*uncompressed_length < real_uncompressed_length) { 
-    return SNAPPY_BUFFER_TOO_SMALL; 
-  } 
-  if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) { 
-    return SNAPPY_INVALID_INPUT; 
-  } 
-  *uncompressed_length = real_uncompressed_length; 
-  return SNAPPY_OK; 
-} 
- 
-size_t snappy_max_compressed_length(size_t source_length) { 
-  return snappy::MaxCompressedLength(source_length); 
-} 
- 
-snappy_status snappy_uncompressed_length(const char *compressed, 
-                                         size_t compressed_length, 
-                                         size_t *result) { 
-  if (snappy::GetUncompressedLength(compressed, 
-                                    compressed_length, 
-                                    result)) { 
-    return SNAPPY_OK; 
-  } else { 
-    return SNAPPY_INVALID_INPUT; 
-  } 
-} 
- 
-snappy_status snappy_validate_compressed_buffer(const char *compressed, 
-                                                size_t compressed_length) { 
-  if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) { 
-    return SNAPPY_OK; 
-  } else { 
-    return SNAPPY_INVALID_INPUT; 
-  } 
-} 
- 
-}  // extern "C" 
+// Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "snappy.h"
+#include "snappy-c.h"
+
+extern "C" {
+
+snappy_status snappy_compress(const char* input,
+                              size_t input_length,
+                              char* compressed,
+                              size_t *compressed_length) {
+  if (*compressed_length < snappy_max_compressed_length(input_length)) {
+    return SNAPPY_BUFFER_TOO_SMALL;
+  }
+  snappy::RawCompress(input, input_length, compressed, compressed_length);
+  return SNAPPY_OK;
+}
+
+snappy_status snappy_uncompress(const char* compressed,
+                                size_t compressed_length,
+                                char* uncompressed,
+                                size_t* uncompressed_length) {
+  size_t real_uncompressed_length;
+  if (!snappy::GetUncompressedLength(compressed,
+                                     compressed_length,
+                                     &real_uncompressed_length)) {
+    return SNAPPY_INVALID_INPUT;
+  }
+  if (*uncompressed_length < real_uncompressed_length) {
+    return SNAPPY_BUFFER_TOO_SMALL;
+  }
+  if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) {
+    return SNAPPY_INVALID_INPUT;
+  }
+  *uncompressed_length = real_uncompressed_length;
+  return SNAPPY_OK;
+}
+
+size_t snappy_max_compressed_length(size_t source_length) {
+  return snappy::MaxCompressedLength(source_length);
+}
+
+snappy_status snappy_uncompressed_length(const char *compressed,
+                                         size_t compressed_length,
+                                         size_t *result) {
+  if (snappy::GetUncompressedLength(compressed,
+                                    compressed_length,
+                                    result)) {
+    return SNAPPY_OK;
+  } else {
+    return SNAPPY_INVALID_INPUT;
+  }
+}
+
+snappy_status snappy_validate_compressed_buffer(const char *compressed,
+                                                size_t compressed_length) {
+  if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) {
+    return SNAPPY_OK;
+  } else {
+    return SNAPPY_INVALID_INPUT;
+  }
+}
+
+}  // extern "C"
diff --git a/contrib/libs/snappy/snappy-c.h b/contrib/libs/snappy/snappy-c.h
index 826bccfded..32aa0c6b8b 100644
--- a/contrib/libs/snappy/snappy-c.h
+++ b/contrib/libs/snappy/snappy-c.h
@@ -1,138 +1,138 @@
-/* 
- * Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>. 
- * 
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are 
- * met: 
- * 
- *     * Redistributions of source code must retain the above copyright 
- * notice, this list of conditions and the following disclaimer. 
- *     * Redistributions in binary form must reproduce the above 
- * copyright notice, this list of conditions and the following disclaimer 
- * in the documentation and/or other materials provided with the 
- * distribution. 
- *     * Neither the name of Google Inc. nor the names of its 
- * contributors may be used to endorse or promote products derived from 
- * this software without specific prior written permission. 
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- * 
- * Plain C interface (a wrapper around the C++ implementation). 
- */ 
- 
+/*
+ * Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Plain C interface (a wrapper around the C++ implementation).
+ */
+
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-#include <stddef.h> 
- 
-/* 
- * Return values; see the documentation for each function to know 
- * what each can return. 
- */ 
-typedef enum { 
-  SNAPPY_OK = 0, 
-  SNAPPY_INVALID_INPUT = 1, 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/*
+ * Return values; see the documentation for each function to know
+ * what each can return.
+ */
+typedef enum {
+  SNAPPY_OK = 0,
+  SNAPPY_INVALID_INPUT = 1,
   SNAPPY_BUFFER_TOO_SMALL = 2
-} snappy_status; 
- 
-/* 
- * Takes the data stored in "input[0..input_length-1]" and stores 
- * it in the array pointed to by "compressed". 
- * 
- * <compressed_length> signals the space available in "compressed". 
- * If it is not at least equal to "snappy_max_compressed_length(input_length)", 
- * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, 
- * <compressed_length> contains the true length of the compressed output, 
- * and SNAPPY_OK is returned. 
- * 
- * Example: 
- *   size_t output_length = snappy_max_compressed_length(input_length); 
- *   char* output = (char*)malloc(output_length); 
- *   if (snappy_compress(input, input_length, output, &output_length) 
- *       == SNAPPY_OK) { 
- *     ... Process(output, output_length) ... 
- *   } 
- *   free(output); 
- */ 
-snappy_status snappy_compress(const char* input, 
-                              size_t input_length, 
-                              char* compressed, 
-                              size_t* compressed_length); 
- 
-/* 
- * Given data in "compressed[0..compressed_length-1]" generated by 
- * calling the snappy_compress routine, this routine stores 
- * the uncompressed data to 
- *   uncompressed[0..uncompressed_length-1]. 
- * Returns failure (a value not equal to SNAPPY_OK) if the message 
- * is corrupted and could not be decrypted. 
- * 
- * <uncompressed_length> signals the space available in "uncompressed". 
- * If it is not at least equal to the value returned by 
- * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL 
- * is returned. After successful decompression, <uncompressed_length> 
- * contains the true length of the decompressed output. 
- * 
- * Example: 
- *   size_t output_length; 
- *   if (snappy_uncompressed_length(input, input_length, &output_length) 
- *       != SNAPPY_OK) { 
- *     ... fail ... 
- *   } 
- *   char* output = (char*)malloc(output_length); 
- *   if (snappy_uncompress(input, input_length, output, &output_length) 
- *       == SNAPPY_OK) { 
- *     ... Process(output, output_length) ... 
- *   } 
- *   free(output); 
- */ 
-snappy_status snappy_uncompress(const char* compressed, 
-                                size_t compressed_length, 
-                                char* uncompressed, 
-                                size_t* uncompressed_length); 
- 
-/* 
- * Returns the maximal size of the compressed representation of 
- * input data that is "source_length" bytes in length. 
- */ 
-size_t snappy_max_compressed_length(size_t source_length); 
- 
-/* 
- * REQUIRES: "compressed[]" was produced by snappy_compress() 
- * Returns SNAPPY_OK and stores the length of the uncompressed data in 
- * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. 
- * This operation takes O(1) time. 
- */ 
-snappy_status snappy_uncompressed_length(const char* compressed, 
-                                         size_t compressed_length, 
-                                         size_t* result); 
- 
-/* 
- * Check if the contents of "compressed[]" can be uncompressed successfully. 
- * Does not return the uncompressed data; if so, returns SNAPPY_OK, 
- * or if not, returns SNAPPY_INVALID_INPUT. 
- * Takes time proportional to compressed_length, but is usually at least a 
- * factor of four faster than actual decompression. 
- */ 
-snappy_status snappy_validate_compressed_buffer(const char* compressed, 
-                                                size_t compressed_length); 
- 
-#ifdef __cplusplus 
-}  // extern "C" 
-#endif 
- 
+} snappy_status;
+
+/*
+ * Takes the data stored in "input[0..input_length-1]" and stores
+ * it in the array pointed to by "compressed".
+ *
+ * <compressed_length> signals the space available in "compressed".
+ * If it is not at least equal to "snappy_max_compressed_length(input_length)",
+ * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression,
+ * <compressed_length> contains the true length of the compressed output,
+ * and SNAPPY_OK is returned.
+ *
+ * Example:
+ *   size_t output_length = snappy_max_compressed_length(input_length);
+ *   char* output = (char*)malloc(output_length);
+ *   if (snappy_compress(input, input_length, output, &output_length)
+ *       == SNAPPY_OK) {
+ *     ... Process(output, output_length) ...
+ *   }
+ *   free(output);
+ */
+snappy_status snappy_compress(const char* input,
+                              size_t input_length,
+                              char* compressed,
+                              size_t* compressed_length);
+
+/*
+ * Given data in "compressed[0..compressed_length-1]" generated by
+ * calling the snappy_compress routine, this routine stores
+ * the uncompressed data to
+ *   uncompressed[0..uncompressed_length-1].
+ * Returns failure (a value not equal to SNAPPY_OK) if the message
+ * is corrupted and could not be decrypted.
+ *
+ * <uncompressed_length> signals the space available in "uncompressed".
+ * If it is not at least equal to the value returned by
+ * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL
+ * is returned. After successful decompression, <uncompressed_length>
+ * contains the true length of the decompressed output.
+ *
+ * Example:
+ *   size_t output_length;
+ *   if (snappy_uncompressed_length(input, input_length, &output_length)
+ *       != SNAPPY_OK) {
+ *     ... fail ...
+ *   }
+ *   char* output = (char*)malloc(output_length);
+ *   if (snappy_uncompress(input, input_length, output, &output_length)
+ *       == SNAPPY_OK) {
+ *     ... Process(output, output_length) ...
+ *   }
+ *   free(output);
+ */
+snappy_status snappy_uncompress(const char* compressed,
+                                size_t compressed_length,
+                                char* uncompressed,
+                                size_t* uncompressed_length);
+
+/*
+ * Returns the maximal size of the compressed representation of
+ * input data that is "source_length" bytes in length.
+ */
+size_t snappy_max_compressed_length(size_t source_length);
+
+/*
+ * REQUIRES: "compressed[]" was produced by snappy_compress()
+ * Returns SNAPPY_OK and stores the length of the uncompressed data in
+ * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error.
+ * This operation takes O(1) time.
+ */
+snappy_status snappy_uncompressed_length(const char* compressed,
+                                         size_t compressed_length,
+                                         size_t* result);
+
+/*
+ * Check if the contents of "compressed[]" can be uncompressed successfully.
+ * Does not return the uncompressed data; if so, returns SNAPPY_OK,
+ * or if not, returns SNAPPY_INVALID_INPUT.
+ * Takes time proportional to compressed_length, but is usually at least a
+ * factor of four faster than actual decompression.
+ */
+snappy_status snappy_validate_compressed_buffer(const char* compressed,
+                                                size_t compressed_length);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
diff --git a/contrib/libs/snappy/snappy-internal.h b/contrib/libs/snappy/snappy-internal.h
index 28c179ee46..1e1c307fef 100644
--- a/contrib/libs/snappy/snappy-internal.h
+++ b/contrib/libs/snappy/snappy-internal.h
@@ -1,94 +1,94 @@
-// Copyright 2008 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// Internals shared between the Snappy implementation and its unittest. 
- 
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Internals shared between the Snappy implementation and its unittest.
+
 #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
 #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
- 
-#include "snappy-stubs-internal.h" 
- 
-namespace snappy { 
-namespace internal { 
- 
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+namespace internal {
+
 // Working memory performs a single allocation to hold all scratch space
 // required for compression.
-class WorkingMemory { 
- public: 
+class WorkingMemory {
+ public:
   explicit WorkingMemory(size_t input_size);
   ~WorkingMemory();
- 
-  // Allocates and clears a hash table using memory in "*this", 
-  // stores the number of buckets in "*table_size" and returns a pointer to 
-  // the base of the hash table. 
+
+  // Allocates and clears a hash table using memory in "*this",
+  // stores the number of buckets in "*table_size" and returns a pointer to
+  // the base of the hash table.
   uint16* GetHashTable(size_t fragment_size, int* table_size) const;
   char* GetScratchInput() const { return input_; }
   char* GetScratchOutput() const { return output_; }
- 
- private: 
+
+ private:
   char* mem_;      // the allocated memory, never nullptr
   size_t size_;    // the size of the allocated memory, never 0
   uint16* table_;  // the pointer to the hashtable
   char* input_;    // the pointer to the input scratch buffer
   char* output_;   // the pointer to the output scratch buffer
- 
+
   // No copying
   WorkingMemory(const WorkingMemory&);
   void operator=(const WorkingMemory&);
-}; 
- 
-// Flat array compression that does not emit the "uncompressed length" 
-// prefix. Compresses "input" string to the "*op" buffer. 
-// 
-// REQUIRES: "input_length <= kBlockSize" 
-// REQUIRES: "op" points to an array of memory that is at least 
-// "MaxCompressedLength(input_length)" in size. 
-// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. 
-// REQUIRES: "table_size" is a power of two 
-// 
-// Returns an "end" pointer into "op" buffer. 
-// "end - op" is the compressed size of "input". 
-char* CompressFragment(const char* input, 
-                       size_t input_length, 
-                       char* op, 
-                       uint16* table, 
-                       const int table_size); 
- 
+};
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input_length <= kBlockSize"
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input_length)" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+char* CompressFragment(const char* input,
+                       size_t input_length,
+                       char* op,
+                       uint16* table,
+                       const int table_size);
+
 // Find the largest n such that
-// 
-//   s1[0,n-1] == s2[0,n-1] 
-//   and n <= (s2_limit - s2). 
-// 
+//
+//   s1[0,n-1] == s2[0,n-1]
+//   and n <= (s2_limit - s2).
+//
 // Return make_pair(n, n < 8).
-// Does not read *s2_limit or beyond. 
-// Does not read *(s1 + (s2_limit - s2)) or beyond. 
-// Requires that s2_limit >= s2. 
-// 
+// Does not read *s2_limit or beyond.
+// Does not read *(s1 + (s2_limit - s2)) or beyond.
+// Requires that s2_limit >= s2.
+//
 // Separate implementation for 64-bit, little-endian cpus.
 #if !defined(SNAPPY_IS_BIG_ENDIAN) && \
     (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
@@ -97,7 +97,7 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
                                                       const char* s2_limit) {
   assert(s2_limit >= s2);
   size_t matched = 0;
- 
+
   // This block isn't necessary for correctness; we could just start looping
   // immediately.  As an optimization though, it is useful.  It creates some not
   // uncommon code paths that determine, without extra effort, whether the match
@@ -115,59 +115,59 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
     }
   }
 
-  // Find out how long the match is. We loop over the data 64 bits at a 
-  // time until we find a 64-bit block that doesn't match; then we find 
-  // the first non-matching bit and use that to calculate the total 
-  // length of the match. 
+  // Find out how long the match is. We loop over the data 64 bits at a
+  // time until we find a 64-bit block that doesn't match; then we find
+  // the first non-matching bit and use that to calculate the total
+  // length of the match.
   while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
     if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
-      s2 += 8; 
-      matched += 8; 
-    } else { 
-      uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); 
-      int matching_bits = Bits::FindLSBSetNonZero64(x); 
-      matched += matching_bits >> 3; 
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
+      int matching_bits = Bits::FindLSBSetNonZero64(x);
+      matched += matching_bits >> 3;
       assert(matched >= 8);
       return std::pair<size_t, bool>(matched, false);
-    } 
-  } 
+    }
+  }
   while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
     if (s1[matched] == *s2) {
-      ++s2; 
-      ++matched; 
-    } else { 
+      ++s2;
+      ++matched;
+    } else {
       return std::pair<size_t, bool>(matched, matched < 8);
-    } 
-  } 
+    }
+  }
   return std::pair<size_t, bool>(matched, matched < 8);
-} 
-#else 
+}
+#else
 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
                                                       const char* s2,
                                                       const char* s2_limit) {
-  // Implementation based on the x86-64 version, above. 
+  // Implementation based on the x86-64 version, above.
   assert(s2_limit >= s2);
-  int matched = 0; 
- 
-  while (s2 <= s2_limit - 4 && 
-         UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { 
-    s2 += 4; 
-    matched += 4; 
-  } 
-  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { 
-    uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); 
-    int matching_bits = Bits::FindLSBSetNonZero(x); 
-    matched += matching_bits >> 3; 
-  } else { 
-    while ((s2 < s2_limit) && (s1[matched] == *s2)) { 
-      ++s2; 
-      ++matched; 
-    } 
-  } 
+  int matched = 0;
+
+  while (s2 <= s2_limit - 4 &&
+         UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
+    s2 += 4;
+    matched += 4;
+  }
+  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
+    uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
+    int matching_bits = Bits::FindLSBSetNonZero(x);
+    matched += matching_bits >> 3;
+  } else {
+    while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    }
+  }
   return std::pair<size_t, bool>(matched, matched < 8);
-} 
-#endif 
- 
+}
+#endif
+
 // Lookup tables for decompression code.  Give --snappy_dump_decompression_table
 // to the unit test to recompute char_table.
 
@@ -225,7 +225,7 @@ static const uint16 char_table[256] = {
   0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
 };
 
-}  // end namespace internal 
-}  // end namespace snappy 
- 
+}  // end namespace internal
+}  // end namespace snappy
+
 #endif  // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
diff --git a/contrib/libs/snappy/snappy-sinksource.cc b/contrib/libs/snappy/snappy-sinksource.cc
index 42651664bf..369a13215b 100644
--- a/contrib/libs/snappy/snappy-sinksource.cc
+++ b/contrib/libs/snappy/snappy-sinksource.cc
@@ -1,45 +1,45 @@
-// Copyright 2011 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#include <string.h> 
- 
-#include "snappy-sinksource.h" 
- 
-namespace snappy { 
- 
-Source::~Source() { } 
- 
-Sink::~Sink() { } 
- 
-char* Sink::GetAppendBuffer(size_t length, char* scratch) { 
-  return scratch; 
-} 
- 
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string.h>
+
+#include "snappy-sinksource.h"
+
+namespace snappy {
+
+Source::~Source() { }
+
+Sink::~Sink() { }
+
+char* Sink::GetAppendBuffer(size_t length, char* scratch) {
+  return scratch;
+}
+
 char* Sink::GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size) {
@@ -55,34 +55,34 @@ void Sink::AppendAndTakeOwnership(
   (*deleter)(deleter_arg, bytes, n);
 }
 
-ByteArraySource::~ByteArraySource() { } 
- 
-size_t ByteArraySource::Available() const { return left_; } 
- 
-const char* ByteArraySource::Peek(size_t* len) { 
-  *len = left_; 
-  return ptr_; 
-} 
- 
-void ByteArraySource::Skip(size_t n) { 
-  left_ -= n; 
-  ptr_ += n; 
-} 
- 
-UncheckedByteArraySink::~UncheckedByteArraySink() { } 
- 
-void UncheckedByteArraySink::Append(const char* data, size_t n) { 
-  // Do no copying if the caller filled in the result of GetAppendBuffer() 
-  if (data != dest_) { 
-    memcpy(dest_, data, n); 
-  } 
-  dest_ += n; 
-} 
- 
-char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { 
-  return dest_; 
-} 
- 
+ByteArraySource::~ByteArraySource() { }
+
+size_t ByteArraySource::Available() const { return left_; }
+
+const char* ByteArraySource::Peek(size_t* len) {
+  *len = left_;
+  return ptr_;
+}
+
+void ByteArraySource::Skip(size_t n) {
+  left_ -= n;
+  ptr_ += n;
+}
+
+UncheckedByteArraySink::~UncheckedByteArraySink() { }
+
+void UncheckedByteArraySink::Append(const char* data, size_t n) {
+  // Do no copying if the caller filled in the result of GetAppendBuffer()
+  if (data != dest_) {
+    memcpy(dest_, data, n);
+  }
+  dest_ += n;
+}
+
+char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
+  return dest_;
+}
+
 void UncheckedByteArraySink::AppendAndTakeOwnership(
     char* data, size_t n,
     void (*deleter)(void*, const char*, size_t),
@@ -92,7 +92,7 @@ void UncheckedByteArraySink::AppendAndTakeOwnership(
     (*deleter)(deleter_arg, data, n);
   }
   dest_ += n;
-} 
+}
 
 char* UncheckedByteArraySink::GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
diff --git a/contrib/libs/snappy/snappy-sinksource.h b/contrib/libs/snappy/snappy-sinksource.h
index 75aa872653..8afcdaaa2c 100644
--- a/contrib/libs/snappy/snappy-sinksource.h
+++ b/contrib/libs/snappy/snappy-sinksource.h
@@ -1,69 +1,69 @@
-// Copyright 2011 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 #ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
 #define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
- 
-#include <stddef.h> 
- 
-namespace snappy { 
- 
-// A Sink is an interface that consumes a sequence of bytes. 
-class Sink { 
- public: 
-  Sink() { } 
-  virtual ~Sink(); 
- 
-  // Append "bytes[0,n-1]" to this. 
-  virtual void Append(const char* bytes, size_t n) = 0; 
- 
-  // Returns a writable buffer of the specified length for appending. 
-  // May return a pointer to the caller-owned scratch buffer which 
-  // must have at least the indicated length.  The returned buffer is 
-  // only valid until the next operation on this Sink. 
-  // 
-  // After writing at most "length" bytes, call Append() with the 
-  // pointer returned from this function and the number of bytes 
-  // written.  Many Append() implementations will avoid copying 
-  // bytes if this function returned an internal buffer. 
-  // 
-  // If a non-scratch buffer is returned, the caller may only pass a 
-  // prefix of it to Append().  That is, it is not correct to pass an 
-  // interior pointer of the returned array to Append(). 
-  // 
-  // The default implementation always returns the scratch buffer. 
-  virtual char* GetAppendBuffer(size_t length, char* scratch); 
- 
+
+#include <stddef.h>
+
+namespace snappy {
+
+// A Sink is an interface that consumes a sequence of bytes.
+class Sink {
+ public:
+  Sink() { }
+  virtual ~Sink();
+
+  // Append "bytes[0,n-1]" to this.
+  virtual void Append(const char* bytes, size_t n) = 0;
+
+  // Returns a writable buffer of the specified length for appending.
+  // May return a pointer to the caller-owned scratch buffer which
+  // must have at least the indicated length.  The returned buffer is
+  // only valid until the next operation on this Sink.
+  //
+  // After writing at most "length" bytes, call Append() with the
+  // pointer returned from this function and the number of bytes
+  // written.  Many Append() implementations will avoid copying
+  // bytes if this function returned an internal buffer.
+  //
+  // If a non-scratch buffer is returned, the caller may only pass a
+  // prefix of it to Append().  That is, it is not correct to pass an
+  // interior pointer of the returned array to Append().
+  //
+  // The default implementation always returns the scratch buffer.
+  virtual char* GetAppendBuffer(size_t length, char* scratch);
+
   // For higher performance, Sink implementations can provide custom
   // AppendAndTakeOwnership() and GetAppendBufferVariable() methods.
   // These methods can reduce the number of copies done during
   // compression/decompression.
- 
+
   // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes"
   // and calls the deleter function as (*deleter)(deleter_arg, bytes, n)
   // to free the buffer. deleter function must be non NULL.
@@ -101,82 +101,82 @@ class Sink {
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size);
 
- private: 
-  // No copying 
-  Sink(const Sink&); 
-  void operator=(const Sink&); 
-}; 
- 
-// A Source is an interface that yields a sequence of bytes 
-class Source { 
- public: 
-  Source() { } 
-  virtual ~Source(); 
- 
-  // Return the number of bytes left to read from the source 
-  virtual size_t Available() const = 0; 
- 
-  // Peek at the next flat region of the source.  Does not reposition 
-  // the source.  The returned region is empty iff Available()==0. 
-  // 
-  // Returns a pointer to the beginning of the region and store its 
-  // length in *len. 
-  // 
-  // The returned region is valid until the next call to Skip() or 
-  // until this object is destroyed, whichever occurs first. 
-  // 
-  // The returned region may be larger than Available() (for example 
-  // if this ByteSource is a view on a substring of a larger source). 
-  // The caller is responsible for ensuring that it only reads the 
-  // Available() bytes. 
-  virtual const char* Peek(size_t* len) = 0; 
- 
-  // Skip the next n bytes.  Invalidates any buffer returned by 
-  // a previous call to Peek(). 
-  // REQUIRES: Available() >= n 
-  virtual void Skip(size_t n) = 0; 
- 
- private: 
-  // No copying 
-  Source(const Source&); 
-  void operator=(const Source&); 
-}; 
- 
-// A Source implementation that yields the contents of a flat array 
-class ByteArraySource : public Source { 
- public: 
-  ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } 
-  virtual ~ByteArraySource(); 
-  virtual size_t Available() const; 
-  virtual const char* Peek(size_t* len); 
-  virtual void Skip(size_t n); 
- private: 
-  const char* ptr_; 
-  size_t left_; 
-}; 
- 
-// A Sink implementation that writes to a flat array without any bound checks. 
-class UncheckedByteArraySink : public Sink { 
- public: 
-  explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } 
-  virtual ~UncheckedByteArraySink(); 
-  virtual void Append(const char* data, size_t n); 
-  virtual char* GetAppendBuffer(size_t len, char* scratch); 
+ private:
+  // No copying
+  Sink(const Sink&);
+  void operator=(const Sink&);
+};
+
+// A Source is an interface that yields a sequence of bytes
+class Source {
+ public:
+  Source() { }
+  virtual ~Source();
+
+  // Return the number of bytes left to read from the source
+  virtual size_t Available() const = 0;
+
+  // Peek at the next flat region of the source.  Does not reposition
+  // the source.  The returned region is empty iff Available()==0.
+  //
+  // Returns a pointer to the beginning of the region and store its
+  // length in *len.
+  //
+  // The returned region is valid until the next call to Skip() or
+  // until this object is destroyed, whichever occurs first.
+  //
+  // The returned region may be larger than Available() (for example
+  // if this ByteSource is a view on a substring of a larger source).
+  // The caller is responsible for ensuring that it only reads the
+  // Available() bytes.
+  virtual const char* Peek(size_t* len) = 0;
+
+  // Skip the next n bytes.  Invalidates any buffer returned by
+  // a previous call to Peek().
+  // REQUIRES: Available() >= n
+  virtual void Skip(size_t n) = 0;
+
+ private:
+  // No copying
+  Source(const Source&);
+  void operator=(const Source&);
+};
+
+// A Source implementation that yields the contents of a flat array
+class ByteArraySource : public Source {
+ public:
+  ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
+  virtual ~ByteArraySource();
+  virtual size_t Available() const;
+  virtual const char* Peek(size_t* len);
+  virtual void Skip(size_t n);
+ private:
+  const char* ptr_;
+  size_t left_;
+};
+
+// A Sink implementation that writes to a flat array without any bound checks.
+class UncheckedByteArraySink : public Sink {
+ public:
+  explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
+  virtual ~UncheckedByteArraySink();
+  virtual void Append(const char* data, size_t n);
+  virtual char* GetAppendBuffer(size_t len, char* scratch);
   virtual char* GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size);
   virtual void AppendAndTakeOwnership(
       char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
       void *deleter_arg);
- 
-  // Return the current output pointer so that a caller can see how 
-  // many bytes were produced. 
-  // Note: this is not a Sink method. 
-  char* CurrentDestination() const { return dest_; } 
- private: 
-  char* dest_; 
-}; 
- 
+
+  // Return the current output pointer so that a caller can see how
+  // many bytes were produced.
+  // Note: this is not a Sink method.
+  char* CurrentDestination() const { return dest_; }
+ private:
+  char* dest_;
+};
+
 }  // namespace snappy
- 
+
 #endif  // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
diff --git a/contrib/libs/snappy/snappy-stubs-internal.cc b/contrib/libs/snappy/snappy-stubs-internal.cc
index 3e43a2124b..66ed2e9039 100644
--- a/contrib/libs/snappy/snappy-stubs-internal.cc
+++ b/contrib/libs/snappy/snappy-stubs-internal.cc
@@ -1,42 +1,42 @@
-// Copyright 2011 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#include <algorithm> 
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
 #include <string>
- 
-#include "snappy-stubs-internal.h" 
- 
-namespace snappy { 
- 
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+
 void Varint::Append32(std::string* s, uint32 value) {
-  char buf[Varint::kMax32]; 
-  const char* p = Varint::Encode32(buf, value); 
+  char buf[Varint::kMax32];
+  const char* p = Varint::Encode32(buf, value);
   s->append(buf, p - buf);
-} 
- 
-}  // namespace snappy 
+}
+
+}  // namespace snappy
diff --git a/contrib/libs/snappy/snappy-stubs-internal.h b/contrib/libs/snappy/snappy-stubs-internal.h
index 48b40bac98..4854689d17 100644
--- a/contrib/libs/snappy/snappy-stubs-internal.h
+++ b/contrib/libs/snappy/snappy-stubs-internal.h
@@ -1,46 +1,46 @@
-// Copyright 2011 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// Various stubs for the open-source version of Snappy. 
- 
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various stubs for the open-source version of Snappy.
+
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
- 
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif 
- 
+#endif
+
 #include <string>
- 
-#include <assert.h> 
-#include <stdlib.h> 
-#include <string.h> 
- 
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
 #endif
@@ -65,13 +65,13 @@
 #define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
 #endif  // __has_feature(memory_sanitizer)
 
-#include "snappy-stubs-public.h" 
- 
-#if defined(__x86_64__) 
- 
-// Enable 64-bit optimized versions of some routines. 
-#define ARCH_K8 1 
- 
+#include "snappy-stubs-public.h"
+
+#if defined(__x86_64__)
+
+// Enable 64-bit optimized versions of some routines.
+#define ARCH_K8 1
+
 #elif defined(__ppc64__)
 
 #define ARCH_PPC 1
@@ -80,21 +80,21 @@
 
 #define ARCH_ARM 1
 
-#endif 
- 
-// Needed by OS X, among others. 
-#ifndef MAP_ANONYMOUS 
-#define MAP_ANONYMOUS MAP_ANON 
-#endif 
- 
-// The size of an array, if known at compile-time. 
-// Will give unexpected results if used on a pointer. 
-// We undefine it first, since some compilers already have a definition. 
-#ifdef ARRAYSIZE 
-#undef ARRAYSIZE 
-#endif 
-#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) 
- 
+#endif
+
+// Needed by OS X, among others.
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// The size of an array, if known at compile-time.
+// Will give unexpected results if used on a pointer.
+// We undefine it first, since some compilers already have a definition.
+#ifdef ARRAYSIZE
+#undef ARRAYSIZE
+#endif
+#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+
 // Static prediction hints.
 #ifdef HAVE_BUILTIN_EXPECT
 #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
@@ -103,44 +103,44 @@
 #define SNAPPY_PREDICT_FALSE(x) x
 #define SNAPPY_PREDICT_TRUE(x) x
 #endif
- 
-// This is only used for recomputing the tag byte table used during 
-// decompression; for simplicity we just remove it from the open-source 
-// version (anyone who wants to regenerate it can just do the call 
-// themselves within main()). 
-#define DEFINE_bool(flag_name, default_value, description) \ 
-  bool FLAGS_ ## flag_name = default_value 
-#define DECLARE_bool(flag_name) \ 
-  extern bool FLAGS_ ## flag_name 
- 
-namespace snappy { 
- 
-static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF); 
-static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL); 
- 
-// Potentially unaligned loads and stores. 
- 
+
+// This is only used for recomputing the tag byte table used during
+// decompression; for simplicity we just remove it from the open-source
+// version (anyone who wants to regenerate it can just do the call
+// themselves within main()).
+#define DEFINE_bool(flag_name, default_value, description) \
+  bool FLAGS_ ## flag_name = default_value
+#define DECLARE_bool(flag_name) \
+  extern bool FLAGS_ ## flag_name
+
+namespace snappy {
+
+static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
+static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+
+// Potentially unaligned loads and stores.
+
 // x86, PowerPC, and ARM64 can simply do these loads and stores native.
- 
+
 #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
     defined(__aarch64__)
- 
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p)) 
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) 
-#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p)) 
- 
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val)) 
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val)) 
-#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val)) 
- 
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit 
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal, 
-// do an unaligned read and rotate the words around a bit, or do the reads very 
-// slowly (trip through kernel mode). There's no simple #define that says just 
-// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 
-// sub-architectures. 
-// 
-// This is a mess, but there's not much we can do about it. 
+
+#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
+
+#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
+#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode). There's no simple #define that says just
+// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
+// sub-architectures.
+//
+// This is a mess, but there's not much we can do about it.
 //
 // To further complicate matters, only LDR instructions (single reads) are
 // allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
@@ -150,30 +150,30 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
 // (it ignores __attribute__((packed)) on individual variables). However,
 // we can tell it that a _struct_ is unaligned, which has the same effect,
 // so we do that.
- 
-#elif defined(__arm__) && \ 
+
+#elif defined(__arm__) && \
       !defined(__ARM_ARCH_4__) && \
       !defined(__ARM_ARCH_4T__) && \
-      !defined(__ARM_ARCH_5__) && \ 
-      !defined(__ARM_ARCH_5T__) && \ 
-      !defined(__ARM_ARCH_5TE__) && \ 
-      !defined(__ARM_ARCH_5TEJ__) && \ 
-      !defined(__ARM_ARCH_6__) && \ 
-      !defined(__ARM_ARCH_6J__) && \ 
-      !defined(__ARM_ARCH_6K__) && \ 
-      !defined(__ARM_ARCH_6Z__) && \ 
-      !defined(__ARM_ARCH_6ZK__) && \ 
-      !defined(__ARM_ARCH_6T2__) 
- 
+      !defined(__ARM_ARCH_5__) && \
+      !defined(__ARM_ARCH_5T__) && \
+      !defined(__ARM_ARCH_5TE__) && \
+      !defined(__ARM_ARCH_5TEJ__) && \
+      !defined(__ARM_ARCH_6__) && \
+      !defined(__ARM_ARCH_6J__) && \
+      !defined(__ARM_ARCH_6K__) && \
+      !defined(__ARM_ARCH_6Z__) && \
+      !defined(__ARM_ARCH_6ZK__) && \
+      !defined(__ARM_ARCH_6T2__)
+
 #if __GNUC__
 #define ATTRIBUTE_PACKED __attribute__((__packed__))
 #else
 #define ATTRIBUTE_PACKED
 #endif
- 
+
 namespace base {
 namespace internal {
- 
+
 struct Unaligned16Struct {
   uint16 value;
   uint8 dummy;  // To make the size non-power-of-two.
@@ -200,59 +200,59 @@ struct Unaligned32Struct {
          (_val))
 
 // TODO: NEON supports unaligned 64-bit loads and stores.
-// See if that would be more efficient on platforms supporting it, 
-// at least for copies. 
- 
-inline uint64 UNALIGNED_LOAD64(const void *p) { 
-  uint64 t; 
-  memcpy(&t, p, sizeof t); 
-  return t; 
-} 
- 
-inline void UNALIGNED_STORE64(void *p, uint64 v) { 
-  memcpy(p, &v, sizeof v); 
-} 
- 
-#else 
- 
-// These functions are provided for architectures that don't support 
-// unaligned loads and stores. 
- 
-inline uint16 UNALIGNED_LOAD16(const void *p) { 
-  uint16 t; 
-  memcpy(&t, p, sizeof t); 
-  return t; 
-} 
- 
-inline uint32 UNALIGNED_LOAD32(const void *p) { 
-  uint32 t; 
-  memcpy(&t, p, sizeof t); 
-  return t; 
-} 
- 
-inline uint64 UNALIGNED_LOAD64(const void *p) { 
-  uint64 t; 
-  memcpy(&t, p, sizeof t); 
-  return t; 
-} 
- 
-inline void UNALIGNED_STORE16(void *p, uint16 v) { 
-  memcpy(p, &v, sizeof v); 
-} 
- 
-inline void UNALIGNED_STORE32(void *p, uint32 v) { 
-  memcpy(p, &v, sizeof v); 
-} 
- 
-inline void UNALIGNED_STORE64(void *p, uint64 v) { 
-  memcpy(p, &v, sizeof v); 
-} 
- 
-#endif 
- 
+// See if that would be more efficient on platforms supporting it,
+// at least for copies.
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+  uint64 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint16 UNALIGNED_LOAD16(const void *p) {
+  uint16 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint32 UNALIGNED_LOAD32(const void *p) {
+  uint32 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+  uint64 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void UNALIGNED_STORE16(void *p, uint16 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void UNALIGNED_STORE32(void *p, uint32 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#endif
+
 // The following guarantees declaration of the byte swap functions.
 #if defined(SNAPPY_IS_BIG_ENDIAN)
- 
+
 #ifdef HAVE_SYS_BYTEORDER_H
 #include <sys/byteorder.h>
 #endif
@@ -293,8 +293,8 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
 
 inline uint16 bswap_16(uint16 x) {
   return (x << 8) | (x >> 8);
-} 
- 
+}
+
 inline uint32 bswap_32(uint32 x) {
   x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
   return (x >> 16) | (x << 16);
@@ -310,28 +310,28 @@ inline uint64 bswap_64(uint64 x) {
 
 #endif  // defined(SNAPPY_IS_BIG_ENDIAN)
 
-// Convert to little-endian storage, opposite of network format. 
-// Convert x from host to little endian: x = LittleEndian.FromHost(x); 
-// convert x from little endian to host: x = LittleEndian.ToHost(x); 
-// 
-//  Store values into unaligned memory converting to little endian order: 
-//    LittleEndian.Store16(p, x); 
-// 
-//  Load unaligned values stored in little endian converting to host order: 
-//    x = LittleEndian.Load16(p); 
-class LittleEndian { 
- public: 
-  // Conversion functions. 
+// Convert to little-endian storage, opposite of network format.
+// Convert x from host to little endian: x = LittleEndian.FromHost(x);
+// convert x from little endian to host: x = LittleEndian.ToHost(x);
+//
+//  Store values into unaligned memory converting to little endian order:
+//    LittleEndian.Store16(p, x);
+//
+//  Load unaligned values stored in little endian converting to host order:
+//    x = LittleEndian.Load16(p);
+class LittleEndian {
+ public:
+  // Conversion functions.
 #if defined(SNAPPY_IS_BIG_ENDIAN)
- 
+
   static uint16 FromHost16(uint16 x) { return bswap_16(x); }
   static uint16 ToHost16(uint16 x) { return bswap_16(x); }
- 
+
   static uint32 FromHost32(uint32 x) { return bswap_32(x); }
   static uint32 ToHost32(uint32 x) { return bswap_32(x); }
 
-  static bool IsLittleEndian() { return false; } 
- 
+  static bool IsLittleEndian() { return false; }
+
 #else  // !defined(SNAPPY_IS_BIG_ENDIAN)
 
   static uint16 FromHost16(uint16 x) { return x; }
@@ -344,50 +344,50 @@ class LittleEndian {
 
 #endif  // !defined(SNAPPY_IS_BIG_ENDIAN)
 
-  // Functions to do unaligned loads and stores in little-endian order. 
-  static uint16 Load16(const void *p) { 
-    return ToHost16(UNALIGNED_LOAD16(p)); 
-  } 
- 
-  static void Store16(void *p, uint16 v) { 
-    UNALIGNED_STORE16(p, FromHost16(v)); 
-  } 
- 
-  static uint32 Load32(const void *p) { 
-    return ToHost32(UNALIGNED_LOAD32(p)); 
-  } 
- 
-  static void Store32(void *p, uint32 v) { 
-    UNALIGNED_STORE32(p, FromHost32(v)); 
-  } 
-}; 
- 
-// Some bit-manipulation functions. 
-class Bits { 
- public: 
+  // Functions to do unaligned loads and stores in little-endian order.
+  static uint16 Load16(const void *p) {
+    return ToHost16(UNALIGNED_LOAD16(p));
+  }
+
+  static void Store16(void *p, uint16 v) {
+    UNALIGNED_STORE16(p, FromHost16(v));
+  }
+
+  static uint32 Load32(const void *p) {
+    return ToHost32(UNALIGNED_LOAD32(p));
+  }
+
+  static void Store32(void *p, uint32 v) {
+    UNALIGNED_STORE32(p, FromHost32(v));
+  }
+};
+
+// Some bit-manipulation functions.
+class Bits {
+ public:
   // Return floor(log2(n)) for positive integer n.
   static int Log2FloorNonZero(uint32 n);
 
-  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0. 
-  static int Log2Floor(uint32 n); 
- 
-  // Return the first set least / most significant bit, 0-indexed.  Returns an 
-  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except 
-  // that it's 0-indexed. 
-  static int FindLSBSetNonZero(uint32 n); 
+  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+  static int Log2Floor(uint32 n);
+
+  // Return the first set least / most significant bit, 0-indexed.  Returns an
+  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
+  // that it's 0-indexed.
+  static int FindLSBSetNonZero(uint32 n);
 
 #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-  static int FindLSBSetNonZero64(uint64 n); 
+  static int FindLSBSetNonZero64(uint64 n);
 #endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
- 
- private: 
+
+ private:
   // No copying
   Bits(const Bits&);
   void operator=(const Bits&);
-}; 
- 
-#ifdef HAVE_BUILTIN_CTZ 
- 
+};
+
+#ifdef HAVE_BUILTIN_CTZ
+
 inline int Bits::Log2FloorNonZero(uint32 n) {
   assert(n != 0);
   // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
@@ -399,22 +399,22 @@ inline int Bits::Log2FloorNonZero(uint32 n) {
   return 31 ^ __builtin_clz(n);
 }
 
-inline int Bits::Log2Floor(uint32 n) { 
+inline int Bits::Log2Floor(uint32 n) {
   return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
-} 
- 
-inline int Bits::FindLSBSetNonZero(uint32 n) { 
+}
+
+inline int Bits::FindLSBSetNonZero(uint32 n) {
   assert(n != 0);
-  return __builtin_ctz(n); 
-} 
- 
+  return __builtin_ctz(n);
+}
+
 #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-inline int Bits::FindLSBSetNonZero64(uint64 n) { 
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
   assert(n != 0);
-  return __builtin_ctzll(n); 
-} 
+  return __builtin_ctzll(n);
+}
 #endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
- 
+
 #elif defined(_MSC_VER)
 
 inline int Bits::Log2FloorNonZero(uint32 n) {
@@ -449,158 +449,158 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) {
 }
 #endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
 
-#else  // Portable versions. 
- 
+#else  // Portable versions.
+
 inline int Bits::Log2FloorNonZero(uint32 n) {
   assert(n != 0);
 
-  int log = 0; 
-  uint32 value = n; 
-  for (int i = 4; i >= 0; --i) { 
-    int shift = (1 << i); 
-    uint32 x = value >> shift; 
-    if (x != 0) { 
-      value = x; 
-      log += shift; 
-    } 
-  } 
-  assert(value == 1); 
-  return log; 
-} 
- 
+  int log = 0;
+  uint32 value = n;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    uint32 x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  assert(value == 1);
+  return log;
+}
+
 inline int Bits::Log2Floor(uint32 n) {
   return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) { 
+inline int Bits::FindLSBSetNonZero(uint32 n) {
   assert(n != 0);
 
-  int rc = 31; 
-  for (int i = 4, shift = 1 << 4; i >= 0; --i) { 
-    const uint32 x = n << shift; 
-    if (x != 0) { 
-      n = x; 
-      rc -= shift; 
-    } 
-    shift >>= 1; 
-  } 
-  return rc; 
-} 
- 
+  int rc = 31;
+  for (int i = 4, shift = 1 << 4; i >= 0; --i) {
+    const uint32 x = n << shift;
+    if (x != 0) {
+      n = x;
+      rc -= shift;
+    }
+    shift >>= 1;
+  }
+  return rc;
+}
+
 #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). 
-inline int Bits::FindLSBSetNonZero64(uint64 n) { 
+// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
   assert(n != 0);
 
-  const uint32 bottombits = static_cast<uint32>(n); 
-  if (bottombits == 0) { 
-    // Bottom bits are zero, so scan in top bits 
-    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32)); 
-  } else { 
-    return FindLSBSetNonZero(bottombits); 
-  } 
-} 
+  const uint32 bottombits = static_cast<uint32>(n);
+  if (bottombits == 0) {
+    // Bottom bits are zero, so scan in top bits
+    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
+  } else {
+    return FindLSBSetNonZero(bottombits);
+  }
+}
 #endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
- 
-#endif  // End portable versions. 
- 
-// Variable-length integer encoding. 
-class Varint { 
- public: 
-  // Maximum lengths of varint encoding of uint32. 
-  static const int kMax32 = 5; 
- 
-  // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. 
-  // Never reads a character at or beyond limit.  If a valid/terminated varint32 
-  // was found in the range, stores it in *OUTPUT and returns a pointer just 
-  // past the last byte of the varint32. Else returns NULL.  On success, 
-  // "result <= limit". 
-  static const char* Parse32WithLimit(const char* ptr, const char* limit, 
-                                      uint32* OUTPUT); 
- 
-  // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v". 
-  // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the 
-  //            byte just past the last encoded byte. 
-  static char* Encode32(char* ptr, uint32 v); 
- 
-  // EFFECTS    Appends the varint representation of "value" to "*s". 
+
+#endif  // End portable versions.
+
+// Variable-length integer encoding.
+class Varint {
+ public:
+  // Maximum lengths of varint encoding of uint32.
+  static const int kMax32 = 5;
+
+  // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
+  // Never reads a character at or beyond limit.  If a valid/terminated varint32
+  // was found in the range, stores it in *OUTPUT and returns a pointer just
+  // past the last byte of the varint32. Else returns NULL.  On success,
+  // "result <= limit".
+  static const char* Parse32WithLimit(const char* ptr, const char* limit,
+                                      uint32* OUTPUT);
+
+  // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
+  // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
+  //            byte just past the last encoded byte.
+  static char* Encode32(char* ptr, uint32 v);
+
+  // EFFECTS    Appends the varint representation of "value" to "*s".
   static void Append32(std::string* s, uint32 value);
-}; 
- 
-inline const char* Varint::Parse32WithLimit(const char* p, 
-                                            const char* l, 
-                                            uint32* OUTPUT) { 
-  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); 
-  const unsigned char* limit = reinterpret_cast<const unsigned char*>(l); 
-  uint32 b, result; 
-  if (ptr >= limit) return NULL; 
-  b = *(ptr++); result = b & 127;          if (b < 128) goto done; 
-  if (ptr >= limit) return NULL; 
-  b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done; 
-  if (ptr >= limit) return NULL; 
-  b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; 
-  if (ptr >= limit) return NULL; 
-  b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; 
-  if (ptr >= limit) return NULL; 
-  b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; 
-  return NULL;       // Value is too long to be a varint32 
- done: 
-  *OUTPUT = result; 
-  return reinterpret_cast<const char*>(ptr); 
-} 
- 
-inline char* Varint::Encode32(char* sptr, uint32 v) { 
-  // Operate on characters as unsigneds 
-  unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr); 
-  static const int B = 128; 
-  if (v < (1<<7)) { 
-    *(ptr++) = v; 
-  } else if (v < (1<<14)) { 
-    *(ptr++) = v | B; 
-    *(ptr++) = v>>7; 
-  } else if (v < (1<<21)) { 
-    *(ptr++) = v | B; 
-    *(ptr++) = (v>>7) | B; 
-    *(ptr++) = v>>14; 
-  } else if (v < (1<<28)) { 
-    *(ptr++) = v | B; 
-    *(ptr++) = (v>>7) | B; 
-    *(ptr++) = (v>>14) | B; 
-    *(ptr++) = v>>21; 
-  } else { 
-    *(ptr++) = v | B; 
-    *(ptr++) = (v>>7) | B; 
-    *(ptr++) = (v>>14) | B; 
-    *(ptr++) = (v>>21) | B; 
-    *(ptr++) = v>>28; 
-  } 
-  return reinterpret_cast<char*>(ptr); 
-} 
- 
+};
+
+inline const char* Varint::Parse32WithLimit(const char* p,
+                                            const char* l,
+                                            uint32* OUTPUT) {
+  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+  const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
+  uint32 b, result;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result = b & 127;          if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
+  return NULL;       // Value is too long to be a varint32
+ done:
+  *OUTPUT = result;
+  return reinterpret_cast<const char*>(ptr);
+}
+
+inline char* Varint::Encode32(char* sptr, uint32 v) {
+  // Operate on characters as unsigneds
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
+  static const int B = 128;
+  if (v < (1<<7)) {
+    *(ptr++) = v;
+  } else if (v < (1<<14)) {
+    *(ptr++) = v | B;
+    *(ptr++) = v>>7;
+  } else if (v < (1<<21)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = v>>14;
+  } else if (v < (1<<28)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = (v>>14) | B;
+    *(ptr++) = v>>21;
+  } else {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = (v>>14) | B;
+    *(ptr++) = (v>>21) | B;
+    *(ptr++) = v>>28;
+  }
+  return reinterpret_cast<char*>(ptr);
+}
+
 // If you know the internal layout of the std::string in use, you can
-// replace this function with one that resizes the string without 
-// filling the new space with zeros (if applicable) -- 
-// it will be non-portable but faster. 
+// replace this function with one that resizes the string without
+// filling the new space with zeros (if applicable) --
+// it will be non-portable but faster.
 inline void STLStringResizeUninitialized(std::string* s, size_t new_size) {
   s->resize(new_size);
-} 
- 
-// Return a mutable char* pointing to a string's internal buffer, 
-// which may not be null-terminated. Writing through this pointer will 
-// modify the string. 
-// 
-// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the 
-// next call to a string method that invalidates iterators. 
-// 
-// As of 2006-04, there is no standard-blessed way of getting a 
-// mutable reference to a string's internal buffer. However, issue 530 
-// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) 
-// proposes this as the method. It will officially be part of the standard 
-// for C++0x. This should already work on all current implementations. 
+}
+
+// Return a mutable char* pointing to a string's internal buffer,
+// which may not be null-terminated. Writing through this pointer will
+// modify the string.
+//
+// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
+// next call to a string method that invalidates iterators.
+//
+// As of 2006-04, there is no standard-blessed way of getting a
+// mutable reference to a string's internal buffer. However, issue 530
+// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
+// proposes this as the method. It will officially be part of the standard
+// for C++0x. This should already work on all current implementations.
 inline char* string_as_array(std::string* str) {
   return str->empty() ? NULL : &*str->begin();
-} 
- 
-}  // namespace snappy 
- 
+}
+
+}  // namespace snappy
+
 #endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
diff --git a/contrib/libs/snappy/snappy-stubs-public.h b/contrib/libs/snappy/snappy-stubs-public.h
index 2a2931c4a4..357c4b2e4b 100644
--- a/contrib/libs/snappy/snappy-stubs-public.h
+++ b/contrib/libs/snappy/snappy-stubs-public.h
@@ -1,58 +1,58 @@
-// Copyright 2011 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// Various type stubs for the open-source version of Snappy. 
-// 
-// This file cannot include config.h, as it is included from snappy.h, 
-// which is a public header. Instead, snappy-stubs-public.h is generated by 
-// from snappy-stubs-public.h.in at configure time. 
- 
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various type stubs for the open-source version of Snappy.
+//
+// This file cannot include config.h, as it is included from snappy.h,
+// which is a public header. Instead, snappy-stubs-public.h is generated by
+// from snappy-stubs-public.h.in at configure time.
+
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
- 
+
 #include <cstddef>
 #include <cstdint>
 #include <string>
- 
+
 #include "config.h"
  
 #if defined(HAVE_SYS_UIO_H)
 #include <sys/uio.h>
 #endif  // HAVE_SYS_UIO_H
 
-#define SNAPPY_MAJOR 1 
+#define SNAPPY_MAJOR 1
 #define SNAPPY_MINOR 1
 #define SNAPPY_PATCHLEVEL 8
-#define SNAPPY_VERSION \ 
-    ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) 
- 
-namespace snappy { 
- 
+#define SNAPPY_VERSION \
+    ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
+
+namespace snappy {
+
 using int8 = std::int8_t;
 using uint8 = std::uint8_t;
 using int16 = std::int16_t;
@@ -61,7 +61,7 @@ using int32 = std::int32_t;
 using uint32 = std::uint32_t;
 using int64 = std::int64_t;
 using uint64 = std::uint64_t;
- 
+
 #if !defined(HAVE_SYS_UIO_H)
 // Windows does not have an iovec type, yet the concept is universally useful.
 // It is simple to define it ourselves, so we put it inside our own namespace.
@@ -70,7 +70,7 @@ struct iovec {
   size_t iov_len;
 };
 #endif  // !HAVE_SYS_UIO_H
- 
-}  // namespace snappy 
- 
+
+}  // namespace snappy
+
 #endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
diff --git a/contrib/libs/snappy/snappy.cc b/contrib/libs/snappy/snappy.cc
index 27e491c043..9351b0f21e 100644
--- a/contrib/libs/snappy/snappy.cc
+++ b/contrib/libs/snappy/snappy.cc
@@ -1,35 +1,35 @@
-// Copyright 2005 Google Inc. All Rights Reserved. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-#include "snappy.h" 
-#include "snappy-internal.h" 
-#include "snappy-sinksource.h" 
- 
+// Copyright 2005 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "snappy.h"
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+
 #if !defined(SNAPPY_HAVE_SSSE3)
 // __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD
 // support between SSE2 and AVX (so SSSE3 instructions require AVX support), and
@@ -68,60 +68,60 @@
 #include <immintrin.h>
 #endif
 
-#include <stdio.h> 
- 
-#include <algorithm> 
+#include <stdio.h>
+
+#include <algorithm>
 #include <string>
 #include <vector>
 #include <util/generic/string.h>
- 
-namespace snappy { 
- 
+
+namespace snappy {
+
 using internal::COPY_1_BYTE_OFFSET;
 using internal::COPY_2_BYTE_OFFSET;
 using internal::LITERAL;
 using internal::char_table;
 using internal::kMaximumTagLength;
 
-// Any hash function will produce a valid compressed bitstream, but a good 
-// hash function reduces the number of collisions and thus yields better 
-// compression for compressible input, and more speed for incompressible 
-// input. Of course, it doesn't hurt if the hash function is reasonably fast 
-// either, as it gets called a lot. 
-static inline uint32 HashBytes(uint32 bytes, int shift) { 
-  uint32 kMul = 0x1e35a7bd; 
-  return (bytes * kMul) >> shift; 
-} 
-static inline uint32 Hash(const char* p, int shift) { 
-  return HashBytes(UNALIGNED_LOAD32(p), shift); 
-} 
- 
-size_t MaxCompressedLength(size_t source_len) { 
-  // Compressed data can be defined as: 
-  //    compressed := item* literal* 
-  //    item       := literal* copy 
-  // 
-  // The trailing literal sequence has a space blowup of at most 62/60 
-  // since a literal of length 60 needs one tag byte + one extra byte 
-  // for length information. 
-  // 
-  // Item blowup is trickier to measure.  Suppose the "copy" op copies 
-  // 4 bytes of data.  Because of a special check in the encoding code, 
-  // we produce a 4-byte copy only if the offset is < 65536.  Therefore 
-  // the copy op takes 3 bytes to encode, and this type of item leads 
-  // to at most the 62/60 blowup for representing literals. 
-  // 
-  // Suppose the "copy" op copies 5 bytes of data.  If the offset is big 
-  // enough, it will take 5 bytes to encode the copy op.  Therefore the 
-  // worst case here is a one-byte literal followed by a five-byte copy. 
-  // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. 
-  // 
-  // This last factor dominates the blowup, so the final estimate is: 
-  return 32 + source_len + source_len/6; 
-} 
- 
+// Any hash function will produce a valid compressed bitstream, but a good
+// hash function reduces the number of collisions and thus yields better
+// compression for compressible input, and more speed for incompressible
+// input. Of course, it doesn't hurt if the hash function is reasonably fast
+// either, as it gets called a lot.
+static inline uint32 HashBytes(uint32 bytes, int shift) {
+  uint32 kMul = 0x1e35a7bd;
+  return (bytes * kMul) >> shift;
+}
+static inline uint32 Hash(const char* p, int shift) {
+  return HashBytes(UNALIGNED_LOAD32(p), shift);
+}
+
+size_t MaxCompressedLength(size_t source_len) {
+  // Compressed data can be defined as:
+  //    compressed := item* literal*
+  //    item       := literal* copy
+  //
+  // The trailing literal sequence has a space blowup of at most 62/60
+  // since a literal of length 60 needs one tag byte + one extra byte
+  // for length information.
+  //
+  // Item blowup is trickier to measure.  Suppose the "copy" op copies
+  // 4 bytes of data.  Because of a special check in the encoding code,
+  // we produce a 4-byte copy only if the offset is < 65536.  Therefore
+  // the copy op takes 3 bytes to encode, and this type of item leads
+  // to at most the 62/60 blowup for representing literals.
+  //
+  // Suppose the "copy" op copies 5 bytes of data.  If the offset is big
+  // enough, it will take 5 bytes to encode the copy op.  Therefore the
+  // worst case here is a one-byte literal followed by a five-byte copy.
+  // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
+  //
+  // This last factor dominates the blowup, so the final estimate is:
+  return 32 + source_len + source_len/6;
+}
+
 namespace {
- 
+
 void UnalignedCopy64(const void* src, void* dst) {
   char tmp[8];
   memcpy(tmp, src, 8);
@@ -145,7 +145,7 @@ void UnalignedCopy128(const void* src, void* dst) {
 //    op_limit  == op + 20
 // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
 // copies of "ab"
-//    ababababababababababab 
+//    ababababababababababab
 // Note that this does not match the semantics of either memcpy() or memmove().
 inline char* IncrementalCopySlow(const char* src, char* op,
                                  char* const op_limit) {
@@ -156,13 +156,13 @@ inline char* IncrementalCopySlow(const char* src, char* op,
 #pragma clang loop unroll(disable)
 #endif
   while (op < op_limit) {
-    *op++ = *src++; 
+    *op++ = *src++;
   }
   return op_limit;
-} 
- 
+}
+
 #if SNAPPY_HAVE_SSSE3
- 
+
 // This is a table of shuffle control masks that can be used as the source
 // operand for PSHUFB to permute the contents of the destination XMM register
 // into a repeating byte pattern.
@@ -175,9 +175,9 @@ alignas(16) const char pshufb_fill_patterns[7][16] = {
   {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3},
   {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1},
 };
- 
+
 #endif  // SNAPPY_HAVE_SSSE3
- 
+
 // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
 // IncrementalCopySlow. buf_limit is the address past the end of the writable
 // region of the buffer.
@@ -194,7 +194,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
   // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
   // to optimize this function but we have to also handle other cases in case
   // the input does not satisfy these conditions.
- 
+
   size_t pattern_size = op - src;
   // The cases are split into different branches to allow the branch predictor,
   // FDO, and static prediction hints to work better. For each input we list the
@@ -286,7 +286,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
     // conditionals instead of a loop allows FDO to layout the code with respect
     // to the actual probabilities of each length.
     // TODO: Replace with loop with trip count hint.
-    UnalignedCopy64(src, op); 
+    UnalignedCopy64(src, op);
     UnalignedCopy64(src + 8, op + 8);
 
     if (op + 16 < op_limit) {
@@ -302,7 +302,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
       UnalignedCopy64(src + 56, op + 56);
     }
     return op_limit;
-  } 
+  }
 
   // Fall back to doing as much as we can with the available slop in the
   // buffer. This code path is relatively cold however so we save code size by
@@ -314,7 +314,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
 #pragma clang loop unroll(disable)
 #endif
   for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
-    UnalignedCopy64(src, op); 
+    UnalignedCopy64(src, op);
     UnalignedCopy64(src + 8, op + 8);
   }
   if (op >= op_limit)
@@ -324,17 +324,17 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
   // single 8 byte copy.
   if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
     UnalignedCopy64(src, op);
-    src += 8; 
-    op += 8; 
-  } 
+    src += 8;
+    op += 8;
+  }
   return IncrementalCopySlow(src, op, op_limit);
-} 
- 
+}
+
 }  // namespace
 
 template <bool allow_fast_path>
-static inline char* EmitLiteral(char* op, 
-                                const char* literal, 
+static inline char* EmitLiteral(char* op,
+                                const char* literal,
                                 int len) {
   // The vast majority of copies are below 16 bytes, for which a
   // call to memcpy is overkill. This fast path can sometimes
@@ -356,13 +356,13 @@ static inline char* EmitLiteral(char* op,
     return op + len;
   }
 
-  if (n < 60) { 
-    // Fits in tag byte 
-    *op++ = LITERAL | (n << 2); 
-  } else { 
+  if (n < 60) {
+    // Fits in tag byte
+    *op++ = LITERAL | (n << 2);
+  } else {
     int count = (Bits::Log2Floor(n) >> 3) + 1;
-    assert(count >= 1); 
-    assert(count <= 4); 
+    assert(count >= 1);
+    assert(count <= 4);
     *op++ = LITERAL | ((59 + count) << 2);
     // Encode in upcoming bytes.
     // Write 4 bytes, though we may care about only 1 of them. The output buffer
@@ -370,33 +370,33 @@ static inline char* EmitLiteral(char* op,
     // here and there is a memcpy of size 'len' below.
     LittleEndian::Store32(op, n);
     op += count;
-  } 
-  memcpy(op, literal, len); 
-  return op + len; 
-} 
- 
+  }
+  memcpy(op, literal, len);
+  return op + len;
+}
+
 template <bool len_less_than_12>
 static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
   assert(len <= 64);
   assert(len >= 4);
   assert(offset < 65536);
   assert(len_less_than_12 == (len < 12));
- 
+
   if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
     // offset fits in 11 bits.  The 3 highest go in the top of the first byte,
     // and the rest go in the second byte.
     *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
-    *op++ = offset & 0xff; 
-  } else { 
+    *op++ = offset & 0xff;
+  } else {
     // Write 4 bytes, though we only care about 3 of them.  The output buffer
     // is required to have some slack, so the extra byte won't overrun it.
     uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
     LittleEndian::Store32(op, u);
     op += 3;
-  } 
-  return op; 
-} 
- 
+  }
+  return op;
+}
+
 template <bool len_less_than_12>
 static inline char* EmitCopy(char* op, size_t offset, size_t len) {
   assert(len_less_than_12 == (len < 12));
@@ -405,7 +405,7 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len) {
   } else {
     // A special case for len <= 64 might help, but so far measurements suggest
     // it's in the noise.
- 
+
     // Emit 64 byte copies but make sure to keep at least four bytes reserved.
     while (SNAPPY_PREDICT_FALSE(len >= 68)) {
       op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
@@ -425,20 +425,20 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len) {
       op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
     }
     return op;
-  } 
-} 
- 
-bool GetUncompressedLength(const char* start, size_t n, size_t* result) { 
-  uint32 v = 0; 
-  const char* limit = start + n; 
-  if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { 
-    *result = v; 
-    return true; 
-  } else { 
-    return false; 
-  } 
-} 
- 
+  }
+}
+
+bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
+  uint32 v = 0;
+  const char* limit = start + n;
+  if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
+    *result = v;
+    return true;
+  } else {
+    return false;
+  }
+}
+
 namespace {
 uint32 CalculateTableSize(uint32 input_size) {
   static_assert(
@@ -446,16 +446,16 @@ uint32 CalculateTableSize(uint32 input_size) {
       "kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
   if (input_size > kMaxHashTableSize) {
     return kMaxHashTableSize;
-  } 
+  }
   if (input_size < kMinHashTableSize) {
     return kMinHashTableSize;
-  } 
+  }
   // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
   // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
   return 2u << Bits::Log2Floor(input_size - 1);
 }
 }  // namespace
- 
+
 namespace internal {
 WorkingMemory::WorkingMemory(size_t input_size) {
   const size_t max_fragment_size = std::min(input_size, kBlockSize);
@@ -476,225 +476,225 @@ uint16* WorkingMemory::GetHashTable(size_t fragment_size,
                                     int* table_size) const {
   const size_t htsize = CalculateTableSize(fragment_size);
   memset(table_, 0, htsize * sizeof(*table_));
-  *table_size = htsize; 
+  *table_size = htsize;
   return table_;
-} 
-}  // end namespace internal 
- 
-// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will 
-// equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have 
-// empirically found that overlapping loads such as 
-//  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) 
-// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. 
-// 
-// We have different versions for 64- and 32-bit; ideally we would avoid the 
-// two functions and just inline the UNALIGNED_LOAD64 call into 
-// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever 
-// enough to avoid loading the value multiple times then. For 64-bit, the load 
-// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is 
-// done at GetUint32AtOffset() time. 
- 
-#ifdef ARCH_K8 
- 
-typedef uint64 EightBytesReference; 
- 
-static inline EightBytesReference GetEightBytesAt(const char* ptr) { 
-  return UNALIGNED_LOAD64(ptr); 
-} 
- 
-static inline uint32 GetUint32AtOffset(uint64 v, int offset) { 
+}
+}  // end namespace internal
+
+// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
+// equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
+// empirically found that overlapping loads such as
+//  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
+// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
+//
+// We have different versions for 64- and 32-bit; ideally we would avoid the
+// two functions and just inline the UNALIGNED_LOAD64 call into
+// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
+// enough to avoid loading the value multiple times then. For 64-bit, the load
+// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
+// done at GetUint32AtOffset() time.
+
+#ifdef ARCH_K8
+
+typedef uint64 EightBytesReference;
+
+static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+  return UNALIGNED_LOAD64(ptr);
+}
+
+static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
   assert(offset >= 0);
   assert(offset <= 4);
-  return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); 
-} 
- 
-#else 
- 
-typedef const char* EightBytesReference; 
- 
-static inline EightBytesReference GetEightBytesAt(const char* ptr) { 
-  return ptr; 
-} 
- 
-static inline uint32 GetUint32AtOffset(const char* v, int offset) { 
+  return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
+}
+
+#else
+
+typedef const char* EightBytesReference;
+
+static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+  return ptr;
+}
+
+static inline uint32 GetUint32AtOffset(const char* v, int offset) {
   assert(offset >= 0);
   assert(offset <= 4);
-  return UNALIGNED_LOAD32(v + offset); 
-} 
- 
-#endif 
- 
-// Flat array compression that does not emit the "uncompressed length" 
-// prefix. Compresses "input" string to the "*op" buffer. 
-// 
-// REQUIRES: "input" is at most "kBlockSize" bytes long. 
-// REQUIRES: "op" points to an array of memory that is at least 
-// "MaxCompressedLength(input.size())" in size. 
-// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. 
-// REQUIRES: "table_size" is a power of two 
-// 
-// Returns an "end" pointer into "op" buffer. 
-// "end - op" is the compressed size of "input". 
-namespace internal { 
-char* CompressFragment(const char* input, 
-                       size_t input_size, 
-                       char* op, 
-                       uint16* table, 
-                       const int table_size) { 
-  // "ip" is the input pointer, and "op" is the output pointer. 
-  const char* ip = input; 
+  return UNALIGNED_LOAD32(v + offset);
+}
+
+#endif
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input" is at most "kBlockSize" bytes long.
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input.size())" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+namespace internal {
+char* CompressFragment(const char* input,
+                       size_t input_size,
+                       char* op,
+                       uint16* table,
+                       const int table_size) {
+  // "ip" is the input pointer, and "op" is the output pointer.
+  const char* ip = input;
   assert(input_size <= kBlockSize);
   assert((table_size & (table_size - 1)) == 0);  // table must be power of two
-  const int shift = 32 - Bits::Log2Floor(table_size); 
+  const int shift = 32 - Bits::Log2Floor(table_size);
   assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
-  const char* ip_end = input + input_size; 
-  const char* base_ip = ip; 
-  // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or 
-  // [next_emit, ip_end) after the main loop. 
-  const char* next_emit = ip; 
- 
-  const size_t kInputMarginBytes = 15; 
+  const char* ip_end = input + input_size;
+  const char* base_ip = ip;
+  // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
+  // [next_emit, ip_end) after the main loop.
+  const char* next_emit = ip;
+
+  const size_t kInputMarginBytes = 15;
   if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
-    const char* ip_limit = input + input_size - kInputMarginBytes; 
- 
-    for (uint32 next_hash = Hash(++ip, shift); ; ) { 
+    const char* ip_limit = input + input_size - kInputMarginBytes;
+
+    for (uint32 next_hash = Hash(++ip, shift); ; ) {
       assert(next_emit < ip);
-      // The body of this loop calls EmitLiteral once and then EmitCopy one or 
-      // more times.  (The exception is that when we're close to exhausting 
-      // the input we goto emit_remainder.) 
-      // 
-      // In the first iteration of this loop we're just starting, so 
-      // there's nothing to copy, so calling EmitLiteral once is 
-      // necessary.  And we only start a new iteration when the 
-      // current iteration has determined that a call to EmitLiteral will 
-      // precede the next call to EmitCopy (if any). 
-      // 
-      // Step 1: Scan forward in the input looking for a 4-byte-long match. 
-      // If we get close to exhausting the input then goto emit_remainder. 
-      // 
-      // Heuristic match skipping: If 32 bytes are scanned with no matches 
-      // found, start looking only at every other byte. If 32 more bytes are 
+      // The body of this loop calls EmitLiteral once and then EmitCopy one or
+      // more times.  (The exception is that when we're close to exhausting
+      // the input we goto emit_remainder.)
+      //
+      // In the first iteration of this loop we're just starting, so
+      // there's nothing to copy, so calling EmitLiteral once is
+      // necessary.  And we only start a new iteration when the
+      // current iteration has determined that a call to EmitLiteral will
+      // precede the next call to EmitCopy (if any).
+      //
+      // Step 1: Scan forward in the input looking for a 4-byte-long match.
+      // If we get close to exhausting the input then goto emit_remainder.
+      //
+      // Heuristic match skipping: If 32 bytes are scanned with no matches
+      // found, start looking only at every other byte. If 32 more bytes are
       // scanned (or skipped), look at every third byte, etc.. When a match is
       // found, immediately go back to looking at every byte. This is a small
       // loss (~5% performance, ~0.1% density) for compressible data due to more
-      // bookkeeping, but for non-compressible data (such as JPEG) it's a huge 
-      // win since the compressor quickly "realizes" the data is incompressible 
-      // and doesn't bother looking for matches everywhere. 
-      // 
-      // The "skip" variable keeps track of how many bytes there are since the 
-      // last match; dividing it by 32 (ie. right-shifting by five) gives the 
-      // number of bytes to move ahead for each iteration. 
-      uint32 skip = 32; 
- 
-      const char* next_ip = ip; 
-      const char* candidate; 
-      do { 
-        ip = next_ip; 
-        uint32 hash = next_hash; 
+      // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+      // win since the compressor quickly "realizes" the data is incompressible
+      // and doesn't bother looking for matches everywhere.
+      //
+      // The "skip" variable keeps track of how many bytes there are since the
+      // last match; dividing it by 32 (ie. right-shifting by five) gives the
+      // number of bytes to move ahead for each iteration.
+      uint32 skip = 32;
+
+      const char* next_ip = ip;
+      const char* candidate;
+      do {
+        ip = next_ip;
+        uint32 hash = next_hash;
         assert(hash == Hash(ip, shift));
         uint32 bytes_between_hash_lookups = skip >> 5;
         skip += bytes_between_hash_lookups;
-        next_ip = ip + bytes_between_hash_lookups; 
+        next_ip = ip + bytes_between_hash_lookups;
         if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
-          goto emit_remainder; 
-        } 
-        next_hash = Hash(next_ip, shift); 
-        candidate = base_ip + table[hash]; 
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift);
+        candidate = base_ip + table[hash];
         assert(candidate >= base_ip);
         assert(candidate < ip);
- 
-        table[hash] = ip - base_ip; 
+
+        table[hash] = ip - base_ip;
       } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
                                  UNALIGNED_LOAD32(candidate)));
- 
-      // Step 2: A 4-byte match has been found.  We'll later see if more 
-      // than 4 bytes match.  But, prior to the match, input 
-      // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes." 
+
+      // Step 2: A 4-byte match has been found.  We'll later see if more
+      // than 4 bytes match.  But, prior to the match, input
+      // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
       assert(next_emit + 16 <= ip_end);
       op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
- 
-      // Step 3: Call EmitCopy, and then see if another EmitCopy could 
-      // be our next move.  Repeat until we find no match for the 
-      // input immediately after what was consumed by the last EmitCopy call. 
-      // 
-      // If we exit this loop normally then we need to call EmitLiteral next, 
-      // though we don't yet know how big the literal will be.  We handle that 
-      // by proceeding to the next iteration of the main loop.  We also can exit 
-      // this loop via goto if we get close to exhausting the input. 
-      EightBytesReference input_bytes; 
-      uint32 candidate_bytes = 0; 
- 
-      do { 
-        // We have a 4-byte match at ip, and no need to emit any 
-        // "literal bytes" prior to ip. 
-        const char* base = ip; 
+
+      // Step 3: Call EmitCopy, and then see if another EmitCopy could
+      // be our next move.  Repeat until we find no match for the
+      // input immediately after what was consumed by the last EmitCopy call.
+      //
+      // If we exit this loop normally then we need to call EmitLiteral next,
+      // though we don't yet know how big the literal will be.  We handle that
+      // by proceeding to the next iteration of the main loop.  We also can exit
+      // this loop via goto if we get close to exhausting the input.
+      EightBytesReference input_bytes;
+      uint32 candidate_bytes = 0;
+
+      do {
+        // We have a 4-byte match at ip, and no need to emit any
+        // "literal bytes" prior to ip.
+        const char* base = ip;
         std::pair<size_t, bool> p =
             FindMatchLength(candidate + 4, ip + 4, ip_end);
         size_t matched = 4 + p.first;
-        ip += matched; 
-        size_t offset = base - candidate; 
+        ip += matched;
+        size_t offset = base - candidate;
         assert(0 == memcmp(base, candidate, matched));
         if (p.second) {
           op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
         } else {
           op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
         }
-        next_emit = ip; 
+        next_emit = ip;
         if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
-          goto emit_remainder; 
-        } 
+          goto emit_remainder;
+        }
         // We are now looking for a 4-byte match again.  We read
         // table[Hash(ip, shift)] for that.  To improve compression,
         // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
         input_bytes = GetEightBytesAt(ip - 1);
-        uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); 
-        table[prev_hash] = ip - base_ip - 1; 
-        uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); 
-        candidate = base_ip + table[cur_hash]; 
-        candidate_bytes = UNALIGNED_LOAD32(candidate); 
-        table[cur_hash] = ip - base_ip; 
-      } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); 
- 
-      next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); 
-      ++ip; 
-    } 
-  } 
- 
- emit_remainder: 
-  // Emit the remaining bytes as a literal 
-  if (next_emit < ip_end) { 
+        uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
+        table[prev_hash] = ip - base_ip - 1;
+        uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
+        candidate = base_ip + table[cur_hash];
+        candidate_bytes = UNALIGNED_LOAD32(candidate);
+        table[cur_hash] = ip - base_ip;
+      } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
+
+      next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
+      ++ip;
+    }
+  }
+
+ emit_remainder:
+  // Emit the remaining bytes as a literal
+  if (next_emit < ip_end) {
     op = EmitLiteral</*allow_fast_path=*/false>(op, next_emit,
                                                 ip_end - next_emit);
-  } 
- 
-  return op; 
-} 
-}  // end namespace internal 
- 
+  }
+
+  return op;
+}
+}  // end namespace internal
+
 // Called back at avery compression call to trace parameters and sizes.
 static inline void Report(const char *algorithm, size_t compressed_size,
                           size_t uncompressed_size) {}
 
-// Signature of output types needed by decompression code. 
-// The decompression code is templatized on a type that obeys this 
-// signature so that we do not pay virtual function call overhead in 
-// the middle of a tight decompression loop. 
-// 
-// class DecompressionWriter { 
-//  public: 
-//   // Called before decompression 
-//   void SetExpectedLength(size_t length); 
-// 
-//   // Called after decompression 
-//   bool CheckLength() const; 
-// 
-//   // Called repeatedly during decompression 
-//   bool Append(const char* ip, size_t length); 
-//   bool AppendFromSelf(uint32 offset, size_t length); 
-// 
+// Signature of output types needed by decompression code.
+// The decompression code is templatized on a type that obeys this
+// signature so that we do not pay virtual function call overhead in
+// the middle of a tight decompression loop.
+//
+// class DecompressionWriter {
+//  public:
+//   // Called before decompression
+//   void SetExpectedLength(size_t length);
+//
+//   // Called after decompression
+//   bool CheckLength() const;
+//
+//   // Called repeatedly during decompression
+//   bool Append(const char* ip, size_t length);
+//   bool AppendFromSelf(uint32 offset, size_t length);
+//
 //   // The rules for how TryFastAppend differs from Append are somewhat
 //   // convoluted:
-//   // 
+//   //
 //   //  - TryFastAppend is allowed to decline (return false) at any
 //   //    time, for any reason -- just "return false" would be
 //   //    a perfectly legal implementation of TryFastAppend.
@@ -711,10 +711,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
 //   //    decoded fully. In practice, this should not be a big problem,
 //   //    as it is unlikely that one would implement a fast path accepting
 //   //    this much data.
-//   // 
-//   bool TryFastAppend(const char* ip, size_t available, size_t length); 
-// }; 
- 
+//   //
+//   bool TryFastAppend(const char* ip, size_t available, size_t length);
+// };
+
 static inline uint32 ExtractLowBytes(uint32 v, int n) {
   assert(n >= 0);
   assert(n <= 4);
@@ -726,8 +726,8 @@ static inline uint32 ExtractLowBytes(uint32 v, int n) {
   uint64 mask = 0xffffffff;
   return v & ~(mask << (8 * n));
 #endif
-} 
- 
+}
+
 static inline bool LeftShiftOverflows(uint8 value, uint32 shift) {
   assert(shift < 32);
   static const uint8 masks[] = {
@@ -736,77 +736,77 @@ static inline bool LeftShiftOverflows(uint8 value, uint32 shift) {
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
       0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
   return (value & masks[shift]) != 0;
-} 
- 
-// Helper class for decompression 
-class SnappyDecompressor { 
- private: 
-  Source*       reader_;         // Underlying source of bytes to decompress 
-  const char*   ip_;             // Points to next buffered byte 
-  const char*   ip_limit_;       // Points just past buffered bytes 
-  uint32        peeked_;         // Bytes peeked from reader (need to skip) 
-  bool          eof_;            // Hit end of input without an error? 
+}
+
+// Helper class for decompression
+class SnappyDecompressor {
+ private:
+  Source*       reader_;         // Underlying source of bytes to decompress
+  const char*   ip_;             // Points to next buffered byte
+  const char*   ip_limit_;       // Points just past buffered bytes
+  uint32        peeked_;         // Bytes peeked from reader (need to skip)
+  bool          eof_;            // Hit end of input without an error?
   char          scratch_[kMaximumTagLength];  // See RefillTag().
- 
-  // Ensure that all of the tag metadata for the next tag is available 
-  // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even 
-  // if (ip_limit_ - ip_ < 5). 
-  // 
-  // Returns true on success, false on error or end of input. 
-  bool RefillTag(); 
- 
- public: 
-  explicit SnappyDecompressor(Source* reader) 
-      : reader_(reader), 
-        ip_(NULL), 
-        ip_limit_(NULL), 
-        peeked_(0), 
-        eof_(false) { 
-  } 
- 
-  ~SnappyDecompressor() { 
-    // Advance past any bytes we peeked at from the reader 
-    reader_->Skip(peeked_); 
-  } 
- 
-  // Returns true iff we have hit the end of the input without an error. 
-  bool eof() const { 
-    return eof_; 
-  } 
- 
-  // Read the uncompressed length stored at the start of the compressed data. 
+
+  // Ensure that all of the tag metadata for the next tag is available
+  // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
+  // if (ip_limit_ - ip_ < 5).
+  //
+  // Returns true on success, false on error or end of input.
+  bool RefillTag();
+
+ public:
+  explicit SnappyDecompressor(Source* reader)
+      : reader_(reader),
+        ip_(NULL),
+        ip_limit_(NULL),
+        peeked_(0),
+        eof_(false) {
+  }
+
+  ~SnappyDecompressor() {
+    // Advance past any bytes we peeked at from the reader
+    reader_->Skip(peeked_);
+  }
+
+  // Returns true iff we have hit the end of the input without an error.
+  bool eof() const {
+    return eof_;
+  }
+
+  // Read the uncompressed length stored at the start of the compressed data.
   // On success, stores the length in *result and returns true.
-  // On failure, returns false. 
-  bool ReadUncompressedLength(uint32* result) { 
+  // On failure, returns false.
+  bool ReadUncompressedLength(uint32* result) {
     assert(ip_ == NULL);       // Must not have read anything yet
-    // Length is encoded in 1..5 bytes 
-    *result = 0; 
-    uint32 shift = 0; 
-    while (true) { 
-      if (shift >= 32) return false; 
-      size_t n; 
-      const char* ip = reader_->Peek(&n); 
-      if (n == 0) return false; 
-      const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); 
-      reader_->Skip(1); 
+    // Length is encoded in 1..5 bytes
+    *result = 0;
+    uint32 shift = 0;
+    while (true) {
+      if (shift >= 32) return false;
+      size_t n;
+      const char* ip = reader_->Peek(&n);
+      if (n == 0) return false;
+      const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
+      reader_->Skip(1);
       uint32 val = c & 0x7f;
       if (LeftShiftOverflows(static_cast<uint8>(val), shift)) return false;
       *result |= val << shift;
-      if (c < 128) { 
-        break; 
-      } 
-      shift += 7; 
-    } 
-    return true; 
-  } 
- 
-  // Process the next item found in the input. 
-  // Returns true if successful, false on error or end of input. 
-  template <class Writer> 
+      if (c < 128) {
+        break;
+      }
+      shift += 7;
+    }
+    return true;
+  }
+
+  // Process the next item found in the input.
+  // Returns true if successful, false on error or end of input.
+  template <class Writer>
 #if defined(__GNUC__) && defined(__x86_64__)
   __attribute__((aligned(32)))
 #endif
-  void DecompressAllTags(Writer* writer) { 
+  void DecompressAllTags(Writer* writer) {
     // In x86, pad the function body to start 16 bytes later. This function has
     // a couple of hotspots that are highly sensitive to alignment: we have
     // observed regressions by more than 20% in some metrics just by moving the
@@ -823,22 +823,22 @@ class SnappyDecompressor {
     asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
 #endif
 
-    const char* ip = ip_; 
-    // We could have put this refill fragment only at the beginning of the loop. 
-    // However, duplicating it at the end of each branch gives the compiler more 
-    // scope to optimize the <ip_limit_ - ip> expression based on the local 
-    // context, which overall increases speed. 
-    #define MAYBE_REFILL() \ 
+    const char* ip = ip_;
+    // We could have put this refill fragment only at the beginning of the loop.
+    // However, duplicating it at the end of each branch gives the compiler more
+    // scope to optimize the <ip_limit_ - ip> expression based on the local
+    // context, which overall increases speed.
+    #define MAYBE_REFILL() \
         if (ip_limit_ - ip < kMaximumTagLength) { \
-          ip_ = ip; \ 
-          if (!RefillTag()) return; \ 
-          ip = ip_; \ 
-        } 
- 
-    MAYBE_REFILL(); 
-    for ( ;; ) { 
-      const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); 
- 
+          ip_ = ip; \
+          if (!RefillTag()) return; \
+          ip = ip_; \
+        }
+
+    MAYBE_REFILL();
+    for ( ;; ) {
+      const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
+
       // Ratio of iterations that have LITERAL vs non-LITERAL for different
       // inputs.
       //
@@ -852,202 +852,202 @@ class SnappyDecompressor {
       // pb              24%        76%
       // bin             24%        76%
       if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
-        size_t literal_length = (c >> 2) + 1u; 
-        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { 
+        size_t literal_length = (c >> 2) + 1u;
+        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
           assert(literal_length < 61);
-          ip += literal_length; 
+          ip += literal_length;
           // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
           // will not return true unless there's already at least five spare
           // bytes in addition to the literal.
-          continue; 
-        } 
+          continue;
+        }
         if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
-          // Long literal. 
-          const size_t literal_length_length = literal_length - 60; 
-          literal_length = 
+          // Long literal.
+          const size_t literal_length_length = literal_length - 60;
+          literal_length =
               ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
               1;
-          ip += literal_length_length; 
-        } 
- 
-        size_t avail = ip_limit_ - ip; 
-        while (avail < literal_length) { 
-          if (!writer->Append(ip, avail)) return; 
-          literal_length -= avail; 
-          reader_->Skip(peeked_); 
-          size_t n; 
-          ip = reader_->Peek(&n); 
-          avail = n; 
-          peeked_ = avail; 
-          if (avail == 0) return;  // Premature end of input 
-          ip_limit_ = ip + avail; 
-        } 
-        if (!writer->Append(ip, literal_length)) { 
-          return; 
-        } 
-        ip += literal_length; 
-        MAYBE_REFILL(); 
-      } else { 
+          ip += literal_length_length;
+        }
+
+        size_t avail = ip_limit_ - ip;
+        while (avail < literal_length) {
+          if (!writer->Append(ip, avail)) return;
+          literal_length -= avail;
+          reader_->Skip(peeked_);
+          size_t n;
+          ip = reader_->Peek(&n);
+          avail = n;
+          peeked_ = avail;
+          if (avail == 0) return;  // Premature end of input
+          ip_limit_ = ip + avail;
+        }
+        if (!writer->Append(ip, literal_length)) {
+          return;
+        }
+        ip += literal_length;
+        MAYBE_REFILL();
+      } else {
         const size_t entry = char_table[c];
         const size_t trailer =
             ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11);
         const size_t length = entry & 0xff;
-        ip += entry >> 11; 
- 
-        // copy_offset/256 is encoded in bits 8..10.  By just fetching 
-        // those bits, we get copy_offset (since the bit-field starts at 
-        // bit 8). 
+        ip += entry >> 11;
+
+        // copy_offset/256 is encoded in bits 8..10.  By just fetching
+        // those bits, we get copy_offset (since the bit-field starts at
+        // bit 8).
         const size_t copy_offset = entry & 0x700;
-        if (!writer->AppendFromSelf(copy_offset + trailer, length)) { 
-          return; 
-        } 
-        MAYBE_REFILL(); 
-      } 
-    } 
- 
-#undef MAYBE_REFILL 
-  } 
-}; 
- 
-bool SnappyDecompressor::RefillTag() { 
-  const char* ip = ip_; 
-  if (ip == ip_limit_) { 
-    // Fetch a new fragment from the reader 
-    reader_->Skip(peeked_);   // All peeked bytes are used up 
-    size_t n; 
-    ip = reader_->Peek(&n); 
-    peeked_ = n; 
+        if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
+          return;
+        }
+        MAYBE_REFILL();
+      }
+    }
+
+#undef MAYBE_REFILL
+  }
+};
+
+bool SnappyDecompressor::RefillTag() {
+  const char* ip = ip_;
+  if (ip == ip_limit_) {
+    // Fetch a new fragment from the reader
+    reader_->Skip(peeked_);   // All peeked bytes are used up
+    size_t n;
+    ip = reader_->Peek(&n);
+    peeked_ = n;
     eof_ = (n == 0);
     if (eof_) return false;
-    ip_limit_ = ip + n; 
-  } 
- 
-  // Read the tag character 
+    ip_limit_ = ip + n;
+  }
+
+  // Read the tag character
   assert(ip < ip_limit_);
-  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); 
-  const uint32 entry = char_table[c]; 
-  const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c' 
+  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
+  const uint32 entry = char_table[c];
+  const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
   assert(needed <= sizeof(scratch_));
- 
-  // Read more bytes from reader if needed 
-  uint32 nbuf = ip_limit_ - ip; 
-  if (nbuf < needed) { 
-    // Stitch together bytes from ip and reader to form the word 
-    // contents.  We store the needed bytes in "scratch_".  They 
-    // will be consumed immediately by the caller since we do not 
-    // read more than we need. 
-    memmove(scratch_, ip, nbuf); 
-    reader_->Skip(peeked_);  // All peeked bytes are used up 
-    peeked_ = 0; 
-    while (nbuf < needed) { 
-      size_t length; 
-      const char* src = reader_->Peek(&length); 
-      if (length == 0) return false; 
+
+  // Read more bytes from reader if needed
+  uint32 nbuf = ip_limit_ - ip;
+  if (nbuf < needed) {
+    // Stitch together bytes from ip and reader to form the word
+    // contents.  We store the needed bytes in "scratch_".  They
+    // will be consumed immediately by the caller since we do not
+    // read more than we need.
+    memmove(scratch_, ip, nbuf);
+    reader_->Skip(peeked_);  // All peeked bytes are used up
+    peeked_ = 0;
+    while (nbuf < needed) {
+      size_t length;
+      const char* src = reader_->Peek(&length);
+      if (length == 0) return false;
       uint32 to_add = std::min<uint32>(needed - nbuf, length);
-      memcpy(scratch_ + nbuf, src, to_add); 
-      nbuf += to_add; 
-      reader_->Skip(to_add); 
-    } 
+      memcpy(scratch_ + nbuf, src, to_add);
+      nbuf += to_add;
+      reader_->Skip(to_add);
+    }
     assert(nbuf == needed);
-    ip_ = scratch_; 
-    ip_limit_ = scratch_ + needed; 
+    ip_ = scratch_;
+    ip_limit_ = scratch_ + needed;
   } else if (nbuf < kMaximumTagLength) {
-    // Have enough bytes, but move into scratch_ so that we do not 
-    // read past end of input 
-    memmove(scratch_, ip, nbuf); 
-    reader_->Skip(peeked_);  // All peeked bytes are used up 
-    peeked_ = 0; 
-    ip_ = scratch_; 
-    ip_limit_ = scratch_ + nbuf; 
-  } else { 
-    // Pass pointer to buffer returned by reader_. 
-    ip_ = ip; 
-  } 
-  return true; 
-} 
- 
-template <typename Writer> 
+    // Have enough bytes, but move into scratch_ so that we do not
+    // read past end of input
+    memmove(scratch_, ip, nbuf);
+    reader_->Skip(peeked_);  // All peeked bytes are used up
+    peeked_ = 0;
+    ip_ = scratch_;
+    ip_limit_ = scratch_ + nbuf;
+  } else {
+    // Pass pointer to buffer returned by reader_.
+    ip_ = ip;
+  }
+  return true;
+}
+
+template <typename Writer>
 static bool InternalUncompress(Source* r, Writer* writer) {
-  // Read the uncompressed length from the front of the compressed input 
-  SnappyDecompressor decompressor(r); 
-  uint32 uncompressed_len = 0; 
-  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; 
+  // Read the uncompressed length from the front of the compressed input
+  SnappyDecompressor decompressor(r);
+  uint32 uncompressed_len = 0;
+  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
 
   return InternalUncompressAllTags(&decompressor, writer, r->Available(),
                                    uncompressed_len);
-} 
- 
-template <typename Writer> 
-static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, 
-                                      Writer* writer, 
+}
+
+template <typename Writer>
+static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
+                                      Writer* writer,
                                       uint32 compressed_len,
                                       uint32 uncompressed_len) {
   Report("snappy_uncompress", compressed_len, uncompressed_len);
- 
-  writer->SetExpectedLength(uncompressed_len); 
- 
-  // Process the entire input 
-  decompressor->DecompressAllTags(writer); 
+
+  writer->SetExpectedLength(uncompressed_len);
+
+  // Process the entire input
+  decompressor->DecompressAllTags(writer);
   writer->Flush();
-  return (decompressor->eof() && writer->CheckLength()); 
-} 
- 
-bool GetUncompressedLength(Source* source, uint32* result) { 
-  SnappyDecompressor decompressor(source); 
-  return decompressor.ReadUncompressedLength(result); 
-} 
- 
-size_t Compress(Source* reader, Sink* writer) { 
-  size_t written = 0; 
-  size_t N = reader->Available(); 
+  return (decompressor->eof() && writer->CheckLength());
+}
+
+bool GetUncompressedLength(Source* source, uint32* result) {
+  SnappyDecompressor decompressor(source);
+  return decompressor.ReadUncompressedLength(result);
+}
+
+size_t Compress(Source* reader, Sink* writer) {
+  size_t written = 0;
+  size_t N = reader->Available();
   const size_t uncompressed_size = N;
-  char ulength[Varint::kMax32]; 
-  char* p = Varint::Encode32(ulength, N); 
-  writer->Append(ulength, p-ulength); 
-  written += (p - ulength); 
- 
+  char ulength[Varint::kMax32];
+  char* p = Varint::Encode32(ulength, N);
+  writer->Append(ulength, p-ulength);
+  written += (p - ulength);
+
   internal::WorkingMemory wmem(N);
- 
-  while (N > 0) { 
-    // Get next block to compress (without copying if possible) 
-    size_t fragment_size; 
-    const char* fragment = reader->Peek(&fragment_size); 
+
+  while (N > 0) {
+    // Get next block to compress (without copying if possible)
+    size_t fragment_size;
+    const char* fragment = reader->Peek(&fragment_size);
     assert(fragment_size != 0);  // premature end of input
     const size_t num_to_read = std::min(N, kBlockSize);
-    size_t bytes_read = fragment_size; 
- 
-    size_t pending_advance = 0; 
-    if (bytes_read >= num_to_read) { 
-      // Buffer returned by reader is large enough 
-      pending_advance = num_to_read; 
-      fragment_size = num_to_read; 
-    } else { 
+    size_t bytes_read = fragment_size;
+
+    size_t pending_advance = 0;
+    if (bytes_read >= num_to_read) {
+      // Buffer returned by reader is large enough
+      pending_advance = num_to_read;
+      fragment_size = num_to_read;
+    } else {
       char* scratch = wmem.GetScratchInput();
-      memcpy(scratch, fragment, bytes_read); 
-      reader->Skip(bytes_read); 
- 
-      while (bytes_read < num_to_read) { 
-        fragment = reader->Peek(&fragment_size); 
+      memcpy(scratch, fragment, bytes_read);
+      reader->Skip(bytes_read);
+
+      while (bytes_read < num_to_read) {
+        fragment = reader->Peek(&fragment_size);
         size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
-        memcpy(scratch + bytes_read, fragment, n); 
-        bytes_read += n; 
-        reader->Skip(n); 
-      } 
+        memcpy(scratch + bytes_read, fragment, n);
+        bytes_read += n;
+        reader->Skip(n);
+      }
       assert(bytes_read == num_to_read);
-      fragment = scratch; 
-      fragment_size = num_to_read; 
-    } 
+      fragment = scratch;
+      fragment_size = num_to_read;
+    }
     assert(fragment_size == num_to_read);
- 
-    // Get encoding table for compression 
-    int table_size; 
-    uint16* table = wmem.GetHashTable(num_to_read, &table_size); 
- 
-    // Compress input_fragment and append to dest 
-    const int max_output = MaxCompressedLength(num_to_read); 
- 
-    // Need a scratch buffer for the output, in case the byte sink doesn't 
-    // have room for us directly. 
+
+    // Get encoding table for compression
+    int table_size;
+    uint16* table = wmem.GetHashTable(num_to_read, &table_size);
+
+    // Compress input_fragment and append to dest
+    const int max_output = MaxCompressedLength(num_to_read);
+
+    // Need a scratch buffer for the output, in case the byte sink doesn't
+    // have room for us directly.
 
     // Since we encode kBlockSize regions followed by a region
     // which is <= kBlockSize in length, a previously allocated
@@ -1055,19 +1055,19 @@ size_t Compress(Source* reader, Sink* writer) {
     char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
     char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
                                            table_size);
-    writer->Append(dest, end - dest); 
-    written += (end - dest); 
- 
-    N -= num_to_read; 
-    reader->Skip(pending_advance); 
-  } 
- 
+    writer->Append(dest, end - dest);
+    written += (end - dest);
+
+    N -= num_to_read;
+    reader->Skip(pending_advance);
+  }
+
   Report("snappy_compress", written, uncompressed_size);
- 
-  return written; 
-} 
- 
-// ----------------------------------------------------------------------- 
+
+  return written;
+}
+
+// -----------------------------------------------------------------------
 // IOVec interfaces
 // -----------------------------------------------------------------------
 
@@ -1260,60 +1260,60 @@ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
 }
 
 // -----------------------------------------------------------------------
-// Flat array interfaces 
-// ----------------------------------------------------------------------- 
- 
-// A type that writes to a flat array. 
-// Note that this is not a "ByteSink", but a type that matches the 
-// Writer template argument to SnappyDecompressor::DecompressAllTags(). 
-class SnappyArrayWriter { 
- private: 
-  char* base_; 
-  char* op_; 
-  char* op_limit_; 
- 
- public: 
-  inline explicit SnappyArrayWriter(char* dst) 
-      : base_(dst), 
+// Flat array interfaces
+// -----------------------------------------------------------------------
+
+// A type that writes to a flat array.
+// Note that this is not a "ByteSink", but a type that matches the
+// Writer template argument to SnappyDecompressor::DecompressAllTags().
+class SnappyArrayWriter {
+ private:
+  char* base_;
+  char* op_;
+  char* op_limit_;
+
+ public:
+  inline explicit SnappyArrayWriter(char* dst)
+      : base_(dst),
         op_(dst),
         op_limit_(dst) {
-  } 
- 
-  inline void SetExpectedLength(size_t len) { 
-    op_limit_ = op_ + len; 
-  } 
- 
-  inline bool CheckLength() const { 
-    return op_ == op_limit_; 
-  } 
- 
-  inline bool Append(const char* ip, size_t len) { 
-    char* op = op_; 
-    const size_t space_left = op_limit_ - op; 
-    if (space_left < len) { 
-      return false; 
-    } 
-    memcpy(op, ip, len); 
-    op_ = op + len; 
-    return true; 
-  } 
- 
-  inline bool TryFastAppend(const char* ip, size_t available, size_t len) { 
-    char* op = op_; 
-    const size_t space_left = op_limit_ - op; 
+  }
+
+  inline void SetExpectedLength(size_t len) {
+    op_limit_ = op_ + len;
+  }
+
+  inline bool CheckLength() const {
+    return op_ == op_limit_;
+  }
+
+  inline bool Append(const char* ip, size_t len) {
+    char* op = op_;
+    const size_t space_left = op_limit_ - op;
+    if (space_left < len) {
+      return false;
+    }
+    memcpy(op, ip, len);
+    op_ = op + len;
+    return true;
+  }
+
+  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
+    char* op = op_;
+    const size_t space_left = op_limit_ - op;
     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
-      // Fast path, used for the majority (about 95%) of invocations. 
+      // Fast path, used for the majority (about 95%) of invocations.
       UnalignedCopy128(ip, op);
-      op_ = op + len; 
-      return true; 
-    } else { 
-      return false; 
-    } 
-  } 
- 
-  inline bool AppendFromSelf(size_t offset, size_t len) { 
+      op_ = op + len;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool AppendFromSelf(size_t offset, size_t len) {
     char* const op_end = op_ + len;
- 
+
     // Check if we try to append from before the start of the buffer.
     // Normally this would just be a check for "produced < offset",
     // but "produced <= offset - 1u" is equivalent for every case
@@ -1323,40 +1323,40 @@ class SnappyArrayWriter {
     // into an infinite loop.
     if (Produced() <= offset - 1u || op_end > op_limit_) return false;
     op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
- 
-    return true; 
-  } 
+
+    return true;
+  }
   inline size_t Produced() const {
     assert(op_ >= base_);
     return op_ - base_;
   }
   inline void Flush() {}
-}; 
- 
-bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { 
-  ByteArraySource reader(compressed, n); 
-  return RawUncompress(&reader, uncompressed); 
-} 
- 
-bool RawUncompress(Source* compressed, char* uncompressed) { 
-  SnappyArrayWriter output(uncompressed); 
+};
+
+bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
+  ByteArraySource reader(compressed, n);
+  return RawUncompress(&reader, uncompressed);
+}
+
+bool RawUncompress(Source* compressed, char* uncompressed) {
+  SnappyArrayWriter output(uncompressed);
   return InternalUncompress(compressed, &output);
-} 
- 
+}
+
 bool Uncompress(const char* compressed, size_t n, std::string* uncompressed) {
-  size_t ulength; 
-  if (!GetUncompressedLength(compressed, n, &ulength)) { 
-    return false; 
-  } 
+  size_t ulength;
+  if (!GetUncompressedLength(compressed, n, &ulength)) {
+    return false;
+  }
   // On 32-bit builds: max_size() < kuint32max.  Check for that instead
   // of crashing (e.g., consider externally specified compressed data).
   if (ulength > uncompressed->max_size()) {
-    return false; 
-  } 
-  STLStringResizeUninitialized(uncompressed, ulength); 
-  return RawUncompress(compressed, n, string_as_array(uncompressed)); 
-} 
- 
+    return false;
+  }
+  STLStringResizeUninitialized(uncompressed, ulength);
+  return RawUncompress(compressed, n, string_as_array(uncompressed));
+}
+
 bool Uncompress(const char* compressed, size_t n, TString* uncompressed) {
   size_t ulength;
   if (!GetUncompressedLength(compressed, n, &ulength)) {
@@ -1371,72 +1371,72 @@ bool Uncompress(const char* compressed, size_t n, TString* uncompressed) {
   return RawUncompress(compressed, n, uncompressed->begin());
 }
 
-// A Writer that drops everything on the floor and just does validation 
-class SnappyDecompressionValidator { 
- private: 
-  size_t expected_; 
-  size_t produced_; 
- 
- public: 
+// A Writer that drops everything on the floor and just does validation
+class SnappyDecompressionValidator {
+ private:
+  size_t expected_;
+  size_t produced_;
+
+ public:
   inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
-  inline void SetExpectedLength(size_t len) { 
-    expected_ = len; 
-  } 
-  inline bool CheckLength() const { 
-    return expected_ == produced_; 
-  } 
-  inline bool Append(const char* ip, size_t len) { 
-    produced_ += len; 
-    return produced_ <= expected_; 
-  } 
-  inline bool TryFastAppend(const char* ip, size_t available, size_t length) { 
-    return false; 
-  } 
-  inline bool AppendFromSelf(size_t offset, size_t len) { 
+  inline void SetExpectedLength(size_t len) {
+    expected_ = len;
+  }
+  inline bool CheckLength() const {
+    return expected_ == produced_;
+  }
+  inline bool Append(const char* ip, size_t len) {
+    produced_ += len;
+    return produced_ <= expected_;
+  }
+  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
+    return false;
+  }
+  inline bool AppendFromSelf(size_t offset, size_t len) {
     // See SnappyArrayWriter::AppendFromSelf for an explanation of
     // the "offset - 1u" trick.
     if (produced_ <= offset - 1u) return false;
-    produced_ += len; 
-    return produced_ <= expected_; 
-  } 
+    produced_ += len;
+    return produced_ <= expected_;
+  }
   inline void Flush() {}
-}; 
- 
-bool IsValidCompressedBuffer(const char* compressed, size_t n) { 
-  ByteArraySource reader(compressed, n); 
-  SnappyDecompressionValidator writer; 
+};
+
+bool IsValidCompressedBuffer(const char* compressed, size_t n) {
+  ByteArraySource reader(compressed, n);
+  SnappyDecompressionValidator writer;
   return InternalUncompress(&reader, &writer);
-} 
- 
+}
+
 bool IsValidCompressed(Source* compressed) {
   SnappyDecompressionValidator writer;
   return InternalUncompress(compressed, &writer);
 }
 
-void RawCompress(const char* input, 
-                 size_t input_length, 
-                 char* compressed, 
-                 size_t* compressed_length) { 
-  ByteArraySource reader(input, input_length); 
-  UncheckedByteArraySink writer(compressed); 
-  Compress(&reader, &writer); 
- 
-  // Compute how many bytes were added 
-  *compressed_length = (writer.CurrentDestination() - compressed); 
-} 
- 
+void RawCompress(const char* input,
+                 size_t input_length,
+                 char* compressed,
+                 size_t* compressed_length) {
+  ByteArraySource reader(input, input_length);
+  UncheckedByteArraySink writer(compressed);
+  Compress(&reader, &writer);
+
+  // Compute how many bytes were added
+  *compressed_length = (writer.CurrentDestination() - compressed);
+}
+
 size_t Compress(const char* input, size_t input_length,
                 std::string* compressed) {
-  // Pre-grow the buffer to the max length of the compressed output 
+  // Pre-grow the buffer to the max length of the compressed output
   STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
- 
-  size_t compressed_length; 
-  RawCompress(input, input_length, string_as_array(compressed), 
-              &compressed_length); 
-  compressed->resize(compressed_length); 
-  return compressed_length; 
-} 
- 
+
+  size_t compressed_length;
+  RawCompress(input, input_length, string_as_array(compressed),
+              &compressed_length);
+  compressed->resize(compressed_length);
+  return compressed_length;
+}
+
 size_t Compress(const char* input, size_t input_length,
                 TString* compressed) {
   // Pre-grow the buffer to the max length of the compressed output
@@ -1452,14 +1452,14 @@ size_t Compress(const char* input, size_t input_length,
 // -----------------------------------------------------------------------
 // Sink interface
 // -----------------------------------------------------------------------
- 
+
 // A type that decompresses into a Sink. The template parameter
 // Allocator must export one method "char* Allocate(int size);", which
 // allocates a buffer of "size" and appends that to the destination.
 template <typename Allocator>
 class SnappyScatteredWriter {
   Allocator allocator_;
- 
+
   // We need random access into the data generated so far.  Therefore
   // we keep track of all of the generated data as an array of blocks.
   // All of the blocks except the last have length kBlockSize.
diff --git a/contrib/libs/snappy/snappy.h b/contrib/libs/snappy/snappy.h
index a91ef2b4d5..9a3bc3fa64 100644
--- a/contrib/libs/snappy/snappy.h
+++ b/contrib/libs/snappy/snappy.h
@@ -1,63 +1,63 @@
-// Copyright 2005 and onwards Google Inc. 
-// 
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions are 
-// met: 
-// 
-//     * Redistributions of source code must retain the above copyright 
-// notice, this list of conditions and the following disclaimer. 
-//     * Redistributions in binary form must reproduce the above 
-// copyright notice, this list of conditions and the following disclaimer 
-// in the documentation and/or other materials provided with the 
-// distribution. 
-//     * Neither the name of Google Inc. nor the names of its 
-// contributors may be used to endorse or promote products derived from 
-// this software without specific prior written permission. 
-// 
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// A light-weight compression algorithm.  It is designed for speed of 
-// compression and decompression, rather than for the utmost in space 
-// savings. 
-// 
-// For getting better compression ratios when you are compressing data 
-// with long repeated sequences or compressing data that is similar to 
-// other data, while still compressing fast, you might look at first 
-// using BMDiff and then compressing the output of BMDiff with 
-// Snappy. 
- 
+// Copyright 2005 and onwards Google Inc.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// A light-weight compression algorithm.  It is designed for speed of
+// compression and decompression, rather than for the utmost in space
+// savings.
+//
+// For getting better compression ratios when you are compressing data
+// with long repeated sequences or compressing data that is similar to
+// other data, while still compressing fast, you might look at first
+// using BMDiff and then compressing the output of BMDiff with
+// Snappy.
+
 #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
 #define THIRD_PARTY_SNAPPY_SNAPPY_H__
- 
+
 #include <cstddef>
 #include <string>
- 
+
 #include <util/generic/fwd.h>
 
-#include "snappy-stubs-public.h" 
- 
-namespace snappy { 
-  class Source; 
-  class Sink; 
- 
-  // ------------------------------------------------------------------------ 
-  // Generic compression/decompression routines. 
-  // ------------------------------------------------------------------------ 
- 
-  // Compress the bytes read from "*source" and append to "*sink". Return the 
-  // number of bytes written. 
-  size_t Compress(Source* source, Sink* sink); 
- 
+#include "snappy-stubs-public.h"
+
+namespace snappy {
+  class Source;
+  class Sink;
+
+  // ------------------------------------------------------------------------
+  // Generic compression/decompression routines.
+  // ------------------------------------------------------------------------
+
+  // Compress the bytes read from "*source" and append to "*sink". Return the
+  // number of bytes written.
+  size_t Compress(Source* source, Sink* sink);
+
   // Find the uncompressed length of the given stream, as given by the header.
   // Note that the true length could deviate from this; the stream could e.g.
   // be truncated.
@@ -65,37 +65,37 @@ namespace snappy {
   // Also note that this leaves "*source" in a state that is unsuitable for
   // further operations, such as RawUncompress(). You will need to rewind
   // or recreate the source yourself before attempting any further calls.
-  bool GetUncompressedLength(Source* source, uint32* result); 
- 
-  // ------------------------------------------------------------------------ 
-  // Higher-level string based routines (should be sufficient for most users) 
-  // ------------------------------------------------------------------------ 
- 
+  bool GetUncompressedLength(Source* source, uint32* result);
+
+  // ------------------------------------------------------------------------
+  // Higher-level string based routines (should be sufficient for most users)
+  // ------------------------------------------------------------------------
+
   // Sets "*compressed" to the compressed version of "input[0,input_length-1]".
   // Original contents of *compressed are lost.
-  // 
+  //
   // REQUIRES: "input[]" is not an alias of "*compressed".
   size_t Compress(const char* input, size_t input_length,
                   std::string* compressed);
   size_t Compress(const char* input, size_t input_length,
                   TString* compressed);
- 
-  // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". 
-  // Original contents of "*uncompressed" are lost. 
-  // 
-  // REQUIRES: "compressed[]" is not an alias of "*uncompressed". 
-  // 
-  // returns false if the message is corrupted and could not be decompressed 
-  bool Uncompress(const char* compressed, size_t compressed_length, 
+
+  // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
+  // Original contents of "*uncompressed" are lost.
+  //
+  // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
+  //
+  // returns false if the message is corrupted and could not be decompressed
+  bool Uncompress(const char* compressed, size_t compressed_length,
                   std::string* uncompressed);
   bool Uncompress(const char* compressed, size_t compressed_length,
                   TString* uncompressed);
- 
+
   // Decompresses "compressed" to "*uncompressed".
   //
   // returns false if the message is corrupted and could not be decompressed
   bool Uncompress(Source* compressed, Sink* uncompressed);
- 
+
   // This routine uncompresses as much of the "compressed" as possible
   // into sink.  It returns the number of valid bytes added to sink
   // (extra invalid bytes may have been added due to errors; the caller
@@ -104,45 +104,45 @@ namespace snappy {
   // encountered.
   size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
 
-  // ------------------------------------------------------------------------ 
-  // Lower-level character array based routines.  May be useful for 
-  // efficiency reasons in certain circumstances. 
-  // ------------------------------------------------------------------------ 
- 
-  // REQUIRES: "compressed" must point to an area of memory that is at 
-  // least "MaxCompressedLength(input_length)" bytes in length. 
-  // 
-  // Takes the data stored in "input[0..input_length]" and stores 
-  // it in the array pointed to by "compressed". 
-  // 
-  // "*compressed_length" is set to the length of the compressed output. 
-  // 
-  // Example: 
-  //    char* output = new char[snappy::MaxCompressedLength(input_length)]; 
-  //    size_t output_length; 
-  //    RawCompress(input, input_length, output, &output_length); 
-  //    ... Process(output, output_length) ... 
-  //    delete [] output; 
-  void RawCompress(const char* input, 
-                   size_t input_length, 
-                   char* compressed, 
-                   size_t* compressed_length); 
- 
-  // Given data in "compressed[0..compressed_length-1]" generated by 
-  // calling the Snappy::Compress routine, this routine 
-  // stores the uncompressed data to 
-  //    uncompressed[0..GetUncompressedLength(compressed)-1] 
-  // returns false if the message is corrupted and could not be decrypted 
-  bool RawUncompress(const char* compressed, size_t compressed_length, 
-                     char* uncompressed); 
- 
-  // Given data from the byte source 'compressed' generated by calling 
-  // the Snappy::Compress routine, this routine stores the uncompressed 
-  // data to 
-  //    uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] 
-  // returns false if the message is corrupted and could not be decrypted 
-  bool RawUncompress(Source* compressed, char* uncompressed); 
- 
+  // ------------------------------------------------------------------------
+  // Lower-level character array based routines.  May be useful for
+  // efficiency reasons in certain circumstances.
+  // ------------------------------------------------------------------------
+
+  // REQUIRES: "compressed" must point to an area of memory that is at
+  // least "MaxCompressedLength(input_length)" bytes in length.
+  //
+  // Takes the data stored in "input[0..input_length]" and stores
+  // it in the array pointed to by "compressed".
+  //
+  // "*compressed_length" is set to the length of the compressed output.
+  //
+  // Example:
+  //    char* output = new char[snappy::MaxCompressedLength(input_length)];
+  //    size_t output_length;
+  //    RawCompress(input, input_length, output, &output_length);
+  //    ... Process(output, output_length) ...
+  //    delete [] output;
+  void RawCompress(const char* input,
+                   size_t input_length,
+                   char* compressed,
+                   size_t* compressed_length);
+
+  // Given data in "compressed[0..compressed_length-1]" generated by
+  // calling the Snappy::Compress routine, this routine
+  // stores the uncompressed data to
+  //    uncompressed[0..GetUncompressedLength(compressed)-1]
+  // returns false if the message is corrupted and could not be decrypted
+  bool RawUncompress(const char* compressed, size_t compressed_length,
+                     char* uncompressed);
+
+  // Given data from the byte source 'compressed' generated by calling
+  // the Snappy::Compress routine, this routine stores the uncompressed
+  // data to
+  //    uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
+  // returns false if the message is corrupted and could not be decrypted
+  bool RawUncompress(Source* compressed, char* uncompressed);
+
   // Given data in "compressed[0..compressed_length-1]" generated by
   // calling the Snappy::Compress routine, this routine
   // stores the uncompressed data to the iovec "iov". The number of physical
@@ -165,24 +165,24 @@ namespace snappy {
   bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
                             size_t iov_cnt);
 
-  // Returns the maximal size of the compressed representation of 
-  // input data that is "source_bytes" bytes in length; 
-  size_t MaxCompressedLength(size_t source_bytes); 
- 
-  // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() 
-  // Returns true and stores the length of the uncompressed data in 
-  // *result normally.  Returns false on parsing error. 
-  // This operation takes O(1) time. 
-  bool GetUncompressedLength(const char* compressed, size_t compressed_length, 
-                             size_t* result); 
- 
-  // Returns true iff the contents of "compressed[]" can be uncompressed 
-  // successfully.  Does not return the uncompressed data.  Takes 
-  // time proportional to compressed_length, but is usually at least 
-  // a factor of four faster than actual decompression. 
-  bool IsValidCompressedBuffer(const char* compressed, 
-                               size_t compressed_length); 
- 
+  // Returns the maximal size of the compressed representation of
+  // input data that is "source_bytes" bytes in length;
+  size_t MaxCompressedLength(size_t source_bytes);
+
+  // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
+  // Returns true and stores the length of the uncompressed data in
+  // *result normally.  Returns false on parsing error.
+  // This operation takes O(1) time.
+  bool GetUncompressedLength(const char* compressed, size_t compressed_length,
+                             size_t* result);
+
+  // Returns true iff the contents of "compressed[]" can be uncompressed
+  // successfully.  Does not return the uncompressed data.  Takes
+  // time proportional to compressed_length, but is usually at least
+  // a factor of four faster than actual decompression.
+  bool IsValidCompressedBuffer(const char* compressed,
+                               size_t compressed_length);
+
   // Returns true iff the contents of "compressed" can be uncompressed
   // successfully.  Does not return the uncompressed data.  Takes
   // time proportional to *compressed length, but is usually at least
@@ -196,18 +196,18 @@ namespace snappy {
   // can only store 16-bit offsets, and EmitCopy() also assumes the offset
   // is 65535 bytes or less. Note also that if you change this, it will
   // affect the framing format (see framing_format.txt).
-  // 
+  //
   // Note that there might be older data around that is compressed with larger
   // block sizes, so the decompression code should not rely on the
   // non-existence of long backreferences.
   static constexpr int kBlockLog = 16;
   static constexpr size_t kBlockSize = 1 << kBlockLog;
- 
+
   static constexpr int kMinHashTableBits = 8;
   static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
- 
+
   static constexpr int kMaxHashTableBits = 14;
   static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
-}  // end namespace snappy 
- 
+}  // end namespace snappy
+
 #endif  // THIRD_PARTY_SNAPPY_SNAPPY_H__
diff --git a/contrib/libs/snappy/ya.make b/contrib/libs/snappy/ya.make
index 4fb0aef69c..472daa0c80 100644
--- a/contrib/libs/snappy/ya.make
+++ b/contrib/libs/snappy/ya.make
@@ -1,6 +1,6 @@
 # Generated by devtools/yamaker from nixpkgs 92c884dfd7140a6c3e6c717cf8990f7a78524331.
 
-LIBRARY() 
+LIBRARY()
 
 OWNER(g:cpp-contrib)
 
@@ -11,7 +11,7 @@ ORIGINAL_SOURCE(https://github.com/google/snappy/archive/1.1.8.tar.gz)
 LICENSE(BSD-3-Clause)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
- 
+
 ADDINCL(
     GLOBAL contrib/libs/snappy/include
 )
@@ -21,12 +21,12 @@ NO_COMPILER_WARNINGS()
 CFLAGS(
     -DHAVE_CONFIG_H
 )
- 
-SRCS( 
-    snappy-c.cc 
+
+SRCS(
+    snappy-c.cc
     snappy-sinksource.cc
-    snappy-stubs-internal.cc 
+    snappy-stubs-internal.cc
     snappy.cc
-) 
- 
-END() 
+)
+
+END()
diff --git a/contrib/libs/sqlite3/ya.make b/contrib/libs/sqlite3/ya.make
index 9535cdc9c4..e6b26dccc3 100644
--- a/contrib/libs/sqlite3/ya.make
+++ b/contrib/libs/sqlite3/ya.make
@@ -15,7 +15,7 @@ LICENSE(
     Public-Domain AND
     blessing
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 ADDINCL(
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override.h b/contrib/libs/tcmalloc/tcmalloc/libc_override.h
index 6cc5895bac..89f8e4e5c8 100644
--- a/contrib/libs/tcmalloc/tcmalloc/libc_override.h
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override.h
@@ -32,7 +32,7 @@
 #include "tcmalloc/libc_override_glibc.h"
 
 #else
-#include "tcmalloc/libc_override_redefine.h" 
+#include "tcmalloc/libc_override_redefine.h"
 
 #endif
 
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
index 5a851e2a18..b1655461c3 100644
--- a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
@@ -57,9 +57,9 @@ int posix_memalign(void** r, size_t a, size_t s) {
 size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); }
 
 // tcmalloc extension
-void sdallocx(void* p, size_t s, int flags) noexcept { 
-  TCMallocInternalSdallocx(p, s, flags); 
-} 
+void sdallocx(void* p, size_t s, int flags) noexcept {
+  TCMallocInternalSdallocx(p, s, flags);
+}
 
 #if defined(__GLIBC__) || defined(__NEWLIB__)
 // SunOS extension
diff --git a/contrib/libs/tcmalloc/ya.make b/contrib/libs/tcmalloc/ya.make
index 362d676ede..54701b1b77 100644
--- a/contrib/libs/tcmalloc/ya.make
+++ b/contrib/libs/tcmalloc/ya.make
@@ -1,7 +1,7 @@
 LIBRARY()
 
 LICENSE(Apache-2.0)
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/utf8proc/ya.make b/contrib/libs/utf8proc/ya.make
index 4a11983f58..5da2c75ea8 100644
--- a/contrib/libs/utf8proc/ya.make
+++ b/contrib/libs/utf8proc/ya.make
@@ -4,7 +4,7 @@ LICENSE(
     MIT AND
     Unicode
 )
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 OWNER(
diff --git a/contrib/libs/xz/common/ya.make b/contrib/libs/xz/common/ya.make
index 6a2c2062df..ce25e6a230 100644
--- a/contrib/libs/xz/common/ya.make
+++ b/contrib/libs/xz/common/ya.make
@@ -10,7 +10,7 @@ OWNER(
 LIBRARY()
 
 LICENSE(Public-Domain)
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(5.2.4)
diff --git a/contrib/libs/xz/liblzma/ya.make b/contrib/libs/xz/liblzma/ya.make
index 6e8fd9d68c..d548283a2b 100644
--- a/contrib/libs/xz/liblzma/ya.make
+++ b/contrib/libs/xz/liblzma/ya.make
@@ -10,7 +10,7 @@ OWNER(
 LIBRARY()
 
 LICENSE(Public-Domain)
- 
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(5.2.4)
diff --git a/contrib/libs/ya.make b/contrib/libs/ya.make
index bf98d28041..9c4640fdcf 100644
--- a/contrib/libs/ya.make
+++ b/contrib/libs/ya.make
@@ -2,8 +2,8 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
-RECURSE( 
+
+RECURSE(
     afl
     antlr4_cpp_runtime
     apache
@@ -22,7 +22,7 @@ RECURSE(
     bento4
     bluez-clean-headers
     breakpad
-    brotli 
+    brotli
     c-ares
     cairo
     cbc
@@ -112,7 +112,7 @@ RECURSE(
     ImageMagick
     inja
     inja/ut
-    intel 
+    intel
     isa-l
     isa-l/ut
     jansson
@@ -222,7 +222,7 @@ RECURSE(
     matrixssl
     mecab
     metrohash
-    mimalloc 
+    mimalloc
     minilzo
     minizip
     mlir11
@@ -282,7 +282,7 @@ RECURSE(
     protoc_std
     psimd
     pthreadpool
-    pugixml 
+    pugixml
     pybind11
     pycxx
     python
@@ -363,12 +363,12 @@ RECURSE(
     yaml-cpp
     zeromq
     zlib
-    zlib-ng-develop 
+    zlib-ng-develop
     zookeeper
     zstd
-    zstd06 
+    zstd06
     zzip
-) 
+)
 
 IF (OS_FREEBSD OR OS_LINUX)
     RECURSE(
@@ -381,7 +381,7 @@ ENDIF()
 
 IF (OS_DARWIN)
     RECURSE(
-        gperftools 
+        gperftools
         osxfuse
         macfuse-headers
         uuid
@@ -421,11 +421,11 @@ ELSE()
         unixodbc
     )
 ENDIF()
- 
+
 IF (OS_LINUX OR OS_WINDOWS)
-    RECURSE( 
+    RECURSE(
         lockless
-    ) 
+    )
 ENDIF()
 
 IF (OS_ANDROID)
@@ -441,6 +441,6 @@ IF (OS_IOS AND ARCH_ARM64 OR OS_DARWIN)
     )
 ENDIF()
 
-IF (MUSL) 
-    RECURSE(musl_extra) 
-ENDIF() 
+IF (MUSL)
+    RECURSE(musl_extra)
+ENDIF()
diff --git a/contrib/libs/yaml-cpp/ya.make b/contrib/libs/yaml-cpp/ya.make
index 66656da401..058caf92fa 100644
--- a/contrib/libs/yaml-cpp/ya.make
+++ b/contrib/libs/yaml-cpp/ya.make
@@ -1,7 +1,7 @@
 LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 VERSION(0.5.3)
diff --git a/contrib/libs/zlib/deflate.c b/contrib/libs/zlib/deflate.c
index 7f3ff741f8..7318b1e2fb 100644
--- a/contrib/libs/zlib/deflate.c
+++ b/contrib/libs/zlib/deflate.c
@@ -1236,7 +1236,7 @@ local void lm_init (s)
 /* For 80x86 and 680x0, an optimized version will be provided in match.asm or
  * match.S. The code will be functionally equivalent.
  */
-Y_NO_SANITIZE("undefined") local uInt longest_match(s, cur_match) 
+Y_NO_SANITIZE("undefined") local uInt longest_match(s, cur_match)
     deflate_state *s;
     IPos cur_match;                             /* current match */
 {
diff --git a/contrib/libs/zstd06/LICENSE b/contrib/libs/zstd06/LICENSE
index 29c8670589..35495850f2 100755
--- a/contrib/libs/zstd06/LICENSE
+++ b/contrib/libs/zstd06/LICENSE
@@ -1,26 +1,26 @@
-ZSTD Library 
-Copyright (c) 2014-2015, Yann Collet 
-All rights reserved. 
- 
-BSD License 
- 
-Redistribution and use in source and binary forms, with or without modification, 
-are permitted provided that the following conditions are met: 
- 
-* Redistributions of source code must retain the above copyright notice, this 
-  list of conditions and the following disclaimer. 
- 
-* Redistributions in binary form must reproduce the above copyright notice, this 
-  list of conditions and the following disclaimer in the documentation and/or 
-  other materials provided with the distribution. 
- 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ZSTD Library
+Copyright (c) 2014-2015, Yann Collet
+All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/libs/zstd06/common/bitstream.h b/contrib/libs/zstd06/common/bitstream.h
index 5d89872904..97fc621579 100644
--- a/contrib/libs/zstd06/common/bitstream.h
+++ b/contrib/libs/zstd06/common/bitstream.h
@@ -1,417 +1,417 @@
 #include <contrib/libs/zstd06/renames.h>
-/* ****************************************************************** 
-   bitstream 
-   Part of FSE library 
-   header file (to include) 
-   Copyright (C) 2013-2016, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-****************************************************************** */ 
-#ifndef BITSTREAM_H_MODULE 
-#define BITSTREAM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* 
-*  This API consists of small unitary functions, which must be inlined for best performance. 
-*  Since link-time-optimization is not available for all compilers, 
-*  these functions are defined into a .h to be included. 
-*/ 
- 
-/*-**************************************** 
-*  Dependencies 
-******************************************/ 
-#include "mem.h"            /* unaligned access routines */ 
-#include "error_private.h"  /* error codes and messages */ 
- 
- 
-/*========================================= 
-*  Target specific 
-=========================================*/ 
-#if defined(__BMI__) && defined(__GNUC__) 
-#  include <immintrin.h>   /* support for bextr (experimental) */ 
-#endif 
- 
- 
-/*-****************************************** 
-*  bitStream encoding API (write forward) 
-********************************************/ 
-/* bitStream can mix input from multiple sources. 
-*  A critical property of these streams is that they encode and decode in **reverse** direction. 
-*  So the first bit sequence you add will be the last to be read, like a LIFO stack. 
-*/ 
-typedef struct 
-{ 
-    size_t bitContainer; 
-    int    bitPos; 
-    char*  startPtr; 
-    char*  ptr; 
-    char*  endPtr; 
-} BIT_CStream_t; 
- 
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); 
-MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); 
-MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC); 
-MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); 
- 
-/* Start with initCStream, providing the size of buffer to write into. 
-*  bitStream will never write outside of this buffer. 
+/* ******************************************************************
+   bitstream
+   Part of FSE library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "mem.h"            /* unaligned access routines */
+#include "error_private.h"  /* error codes and messages */
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#if defined(__BMI__) && defined(__GNUC__)
+#  include <immintrin.h>   /* support for bextr (experimental) */
+#endif
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+*  A critical property of these streams is that they encode and decode in **reverse** direction.
+*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
+*/
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
 *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
-* 
-*  bits are first added to a local register. 
-*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. 
-*  Writing data into memory is an explicit operation, performed by the flushBits function. 
-*  Hence keep track how many bits are potentially stored into local register to avoid register overflow. 
-*  After a flushBits, a maximum of 7 bits might still be stored into local register. 
-* 
-*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. 
-* 
-*  Last operation is to close the bitStream. 
-*  The function returns the final size of CStream in bytes. 
-*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) 
-*/ 
- 
- 
-/*-******************************************** 
-*  bitStream decoding API (read backward) 
-**********************************************/ 
-typedef struct 
-{ 
-    size_t   bitContainer; 
-    unsigned bitsConsumed; 
-    const char* ptr; 
-    const char* start; 
-} BIT_DStream_t; 
- 
-typedef enum { BIT_DStream_unfinished = 0, 
-               BIT_DStream_endOfBuffer = 1, 
-               BIT_DStream_completed = 2, 
-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */ 
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ 
- 
-MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 
-MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); 
- 
- 
-/* Start by invoking BIT_initDStream(). 
-*  A chunk of the bitStream is then stored into a local register. 
-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 
-*  You can then retrieve bitFields stored into the local register, **in reverse order**. 
-*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. 
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
 *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
-*  Otherwise, it can be less than that, so proceed accordingly. 
-*  Checking if DStream has reached its end can be performed with BIT_endOfDStream(). 
-*/ 
- 
- 
-/*-**************************************** 
-*  unsafe API 
-******************************************/ 
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); 
-/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ 
- 
-MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); 
-/* unsafe version; does not check buffer overflow */ 
- 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); 
-/* faster, but works only if nbBits >= 1 */ 
- 
- 
- 
-/*-************************************************************** 
-*  Internal functions 
-****************************************************************/ 
-MEM_STATIC unsigned BIT_highbit32 (register U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse ( &r, val ); 
-    return (unsigned) r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */ 
-    return 31 - __builtin_clz (val); 
-#   else   /* Software version */ 
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    unsigned r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
-/*=====    Local Constants   =====*/ 
-static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */ 
- 
- 
-/*-************************************************************** 
-*  bitStream encoding 
-****************************************************************/ 
-/*! BIT_initCStream() : 
- *  `dstCapacity` must be > sizeof(void*) 
- *  @return : 0 if success, 
-              otherwise an error code (can be tested using ERR_isError() ) */ 
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) 
-{ 
-    bitC->bitContainer = 0; 
-    bitC->bitPos = 0; 
-    bitC->startPtr = (char*)startPtr; 
-    bitC->ptr = bitC->startPtr; 
-    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); 
-    if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); 
-    return 0; 
-} 
- 
-/*! BIT_addBits() : 
-    can add up to 26 bits into `bitC`. 
-    Does not check for register overflow ! */ 
-MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) 
-{ 
-    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; 
-    bitC->bitPos += nbBits; 
-} 
- 
-/*! BIT_addBitsFast() : 
- *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ 
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) 
-{ 
-    bitC->bitContainer |= value << bitC->bitPos; 
-    bitC->bitPos += nbBits; 
-} 
- 
-/*! BIT_flushBitsFast() : 
- *  unsafe version; does not check buffer overflow */ 
-MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) 
-{ 
-    size_t const nbBytes = bitC->bitPos >> 3; 
-    MEM_writeLEST(bitC->ptr, bitC->bitContainer); 
-    bitC->ptr += nbBytes; 
-    bitC->bitPos &= 7; 
-    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ 
-} 
- 
-/*! BIT_flushBits() : 
- *  safe version; check for buffer overflow, and prevents it. 
- *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ 
-MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) 
-{ 
-    size_t const nbBytes = bitC->bitPos >> 3; 
-    MEM_writeLEST(bitC->ptr, bitC->bitContainer); 
-    bitC->ptr += nbBytes; 
-    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; 
-    bitC->bitPos &= 7; 
-    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ 
-} 
- 
-/*! BIT_closeCStream() : 
- *  @return : size of CStream, in bytes, 
-              or 0 if it could not fit into dstBuffer */ 
-MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) 
-{ 
-    BIT_addBitsFast(bitC, 1, 1);   /* endMark */ 
-    BIT_flushBits(bitC); 
- 
-    if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ 
- 
-    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); 
-} 
- 
- 
-/*-******************************************************** 
-* bitStream decoding 
-**********************************************************/ 
-/*! BIT_initDStream() : 
-*   Initialize a BIT_DStream_t. 
-*   `bitD` : a pointer to an already allocated BIT_DStream_t structure. 
-*   `srcSize` must be the *exact* size of the bitStream, in bytes. 
-*   @return : size of stream (== srcSize) or an errorCode if a problem is detected 
-*/ 
-MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 
-{ 
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } 
- 
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
+
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(void*)
+ *  @return : 0 if success,
+              otherwise an error code (can be tested using ERR_isError() ) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
+    if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+    can add up to 26 bits into `bitC`.
+    Does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_flushBits() :
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+              or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+
+    if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
+
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+*   Initialize a BIT_DStream_t.
+*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+*   `srcSize` must be the *exact* size of the bitStream, in bytes.
+*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
-        bitD->start = (const char*)srcBuffer; 
+        bitD->start = (const char*)srcBuffer;
         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; 
-          if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */ 
-          bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } 
-    } else { 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = bitD->start; 
-        bitD->bitContainer = *(const BYTE*)(bitD->start); 
-        switch(srcSize) 
-        { 
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */
+          bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); }
+    } else {
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
             case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
             case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
             case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
             case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
             case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
             case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-            default:; 
-        } 
-        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; 
-          if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */ 
-          bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } 
+            default:;
+        }
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          if (lastByte == 0) return ERROR(GENERIC);   /* endMark not present */
+          bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); }
         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
-    } 
- 
-    return srcSize; 
-} 
- 
+    }
+
+    return srcSize;
+}
+
 MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
-{ 
+{
     return bitContainer >> start;
-} 
- 
+}
+
 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
-{ 
-#if defined(__BMI__) && defined(__GNUC__)   /* experimental */ 
+{
+#if defined(__BMI__) && defined(__GNUC__)   /* experimental */
 #  if defined(__x86_64__)
     if (sizeof(bitContainer)==8)
         return _bextr_u64(bitContainer, start, nbBits);
     else
 #  endif
         return _bextr_u32(bitContainer, start, nbBits);
-#else 
+#else
     return (bitContainer >> start) & BIT_mask[nbBits];
-#endif 
-} 
- 
+#endif
+}
+
 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
-{ 
+{
     return bitContainer & BIT_mask[nbBits];
-} 
- 
-/*! BIT_lookBits() : 
- *  Provides next n bits from local register. 
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
  *  local register is not modified.
- *  On 32-bits, maxNbBits==24. 
- *  On 64-bits, maxNbBits==56. 
- *  @return : value extracted 
- */ 
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) 
-{ 
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ *  @return : value extracted
+ */
+ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+{
 #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
-#else 
-    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 
-#endif 
-} 
- 
-/*! BIT_lookBitsFast() : 
-*   unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 
-} 
- 
-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    bitD->bitsConsumed += nbBits; 
-} 
- 
-/*! BIT_readBits() : 
- *  Read (consume) next n bits from local register and update. 
- *  Pay attention to not read more than nbBits contained into local register. 
- *  @return : extracted value. 
- */ 
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t const value = BIT_lookBits(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-/*! BIT_readBitsFast() : 
-*   unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t const value = BIT_lookBitsFast(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-/*! BIT_reloadDStream() : 
-*   Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ). 
-*   This function is safe, it guarantees it will not read beyond src buffer. 
-*   @return : status of `BIT_DStream_t` internal register. 
+#else
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ *  @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStream() :
+*   Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
+*   This function is safe, it guarantees it will not read beyond src buffer.
+*   @return : status of `BIT_DStream_t` internal register.
               if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 
-{ 
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */ 
-		return BIT_DStream_overflow; 
- 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { 
-        bitD->ptr -= bitD->bitsConsumed >> 3; 
-        bitD->bitsConsumed &= 7; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        return BIT_DStream_unfinished; 
-    } 
-    if (bitD->ptr == bitD->start) { 
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; 
-        return BIT_DStream_completed; 
-    } 
-    {   U32 nbBytes = bitD->bitsConsumed >> 3; 
-        BIT_DStream_status result = BIT_DStream_unfinished; 
-        if (bitD->ptr - nbBytes < bitD->start) { 
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */ 
-            result = BIT_DStream_endOfBuffer; 
-        } 
-        bitD->ptr -= nbBytes; 
-        bitD->bitsConsumed -= nbBytes*8; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */ 
-        return result; 
-    } 
-} 
- 
-/*! BIT_endOfDStream() : 
-*   @return Tells if DStream has exactly reached its end (all bits consumed). 
-*/ 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) 
-{ 
-    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* BITSTREAM_H_MODULE */ 
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+*   @return Tells if DStream has exactly reached its end (all bits consumed).
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/contrib/libs/zstd06/common/error_private.h b/contrib/libs/zstd06/common/error_private.h
index f2cd35dd15..d8d1ef24b3 100644
--- a/contrib/libs/zstd06/common/error_private.h
+++ b/contrib/libs/zstd06/common/error_private.h
@@ -1,124 +1,124 @@
 #include <contrib/libs/zstd06/renames.h>
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Homepage : http://www.zstd.net 
-****************************************************************** */ 
-/* Note : this module is expected to remain private, do not expose it */ 
- 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* **************************************** 
-*  Dependencies 
-******************************************/ 
-#include <stddef.h>        /* size_t */ 
-#include "error_public.h"  /* enum list */ 
- 
- 
-/* **************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/*-**************************************** 
-*  Customization (error_public.h) 
-******************************************/ 
-typedef ZSTD_ErrorCode ERR_enum; 
-#define PREFIX(name) ZSTD_error_##name 
- 
- 
-/*-**************************************** 
-*  Error codes handling 
-******************************************/ 
-#ifdef ERROR 
-#  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */ 
-#endif 
-#define ERROR(name) ((size_t)-PREFIX(name)) 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Homepage : http://www.zstd.net
+****************************************************************** */
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>        /* size_t */
+#include "error_public.h"  /* enum list */
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#ifdef ERROR
+#  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#endif
+#define ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
- 
- 
-/*-**************************************** 
-*  Error Strings 
-******************************************/ 
- 
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
 ERR_STATIC const char* ERR_getErrorString(ERR_enum code)
-{ 
-    static const char* notErrorCode = "Unspecified error code"; 
+{
+    static const char* notErrorCode = "Unspecified error code";
     switch( code )
-    { 
-    case PREFIX(no_error): return "No error detected"; 
-    case PREFIX(GENERIC):  return "Error (generic)"; 
-    case PREFIX(prefix_unknown): return "Unknown frame descriptor"; 
-    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; 
-    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; 
-    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; 
-    case PREFIX(init_missing): return "Context should be init first"; 
-    case PREFIX(memory_allocation): return "Allocation error : not enough memory"; 
-    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; 
-    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; 
-    case PREFIX(srcSize_wrong): return "Src size incorrect"; 
-    case PREFIX(corruption_detected): return "Corrupted block detected"; 
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; 
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; 
-    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; 
-    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; 
-    case PREFIX(maxCode): 
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(maxCode):
     default: return notErrorCode;
-    } 
-} 
- 
+    }
+}
+
 ERR_STATIC const char* ERR_getErrorName(size_t code)
 {
     return ERR_getErrorString(ERR_getErrorCode(code));
 }
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
diff --git a/contrib/libs/zstd06/common/error_public.h b/contrib/libs/zstd06/common/error_public.h
index 20c04de531..1d8f03995f 100644
--- a/contrib/libs/zstd06/common/error_public.h
+++ b/contrib/libs/zstd06/common/error_public.h
@@ -1,72 +1,72 @@
 #include <contrib/libs/zstd06/renames.h>
-/* ****************************************************************** 
-   Error codes list 
-   Copyright (C) 2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Homepage : http://www.zstd.net 
-****************************************************************** */ 
-#ifndef ERROR_PUBLIC_H_MODULE 
-#define ERROR_PUBLIC_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* **************************************** 
-*  error codes list 
-******************************************/ 
-typedef enum { 
-  ZSTD_error_no_error, 
-  ZSTD_error_GENERIC, 
-  ZSTD_error_prefix_unknown, 
-  ZSTD_error_frameParameter_unsupported, 
-  ZSTD_error_frameParameter_unsupportedBy32bits, 
-  ZSTD_error_compressionParameter_unsupported, 
-  ZSTD_error_init_missing, 
-  ZSTD_error_memory_allocation, 
-  ZSTD_error_stage_wrong, 
-  ZSTD_error_dstSize_tooSmall, 
-  ZSTD_error_srcSize_wrong, 
-  ZSTD_error_corruption_detected, 
-  ZSTD_error_tableLog_tooLarge, 
-  ZSTD_error_maxSymbolValue_tooLarge, 
-  ZSTD_error_maxSymbolValue_tooSmall, 
-  ZSTD_error_dictionary_corrupted, 
-  ZSTD_error_maxCode 
-} ZSTD_ErrorCode; 
- 
-/* note : compare with size_t function results using ZSTD_getError() */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_PUBLIC_H_MODULE */ 
+/* ******************************************************************
+   Error codes list
+   Copyright (C) 2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Homepage : http://www.zstd.net
+****************************************************************** */
+#ifndef ERROR_PUBLIC_H_MODULE
+#define ERROR_PUBLIC_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  error codes list
+******************************************/
+typedef enum {
+  ZSTD_error_no_error,
+  ZSTD_error_GENERIC,
+  ZSTD_error_prefix_unknown,
+  ZSTD_error_frameParameter_unsupported,
+  ZSTD_error_frameParameter_unsupportedBy32bits,
+  ZSTD_error_compressionParameter_unsupported,
+  ZSTD_error_init_missing,
+  ZSTD_error_memory_allocation,
+  ZSTD_error_stage_wrong,
+  ZSTD_error_dstSize_tooSmall,
+  ZSTD_error_srcSize_wrong,
+  ZSTD_error_corruption_detected,
+  ZSTD_error_tableLog_tooLarge,
+  ZSTD_error_maxSymbolValue_tooLarge,
+  ZSTD_error_maxSymbolValue_tooSmall,
+  ZSTD_error_dictionary_corrupted,
+  ZSTD_error_maxCode
+} ZSTD_ErrorCode;
+
+/* note : compare with size_t function results using ZSTD_getError() */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_PUBLIC_H_MODULE */
diff --git a/contrib/libs/zstd06/common/fse.h b/contrib/libs/zstd06/common/fse.h
index fcd0216f10..1cc6992951 100644
--- a/contrib/libs/zstd06/common/fse.h
+++ b/contrib/libs/zstd06/common/fse.h
@@ -1,280 +1,280 @@
 #include <contrib/libs/zstd06/renames.h>
-/* ****************************************************************** 
-   FSE : Finite State Entropy codec 
-   Public Prototypes declaration 
-   Copyright (C) 2013-2016, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-****************************************************************** */ 
-#ifndef FSE_H 
-#define FSE_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/*-***************************************** 
-*  Dependencies 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
- 
- 
-/*-**************************************** 
-*  FSE simple functions 
-******************************************/ 
-/*! FSE_compress() : 
-    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. 
-    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). 
-    @return : size of compressed data (<= dstCapacity). 
-    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! 
-                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. 
-                     if FSE_isError(return), compression failed (more details using FSE_getErrorName()) 
-*/ 
-size_t FSE_compress(void* dst, size_t dstCapacity, 
-              const void* src, size_t srcSize); 
- 
-/*! FSE_decompress(): 
-    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', 
-    into already allocated destination buffer 'dst', of size 'dstCapacity'. 
-    @return : size of regenerated data (<= maxDstSize), 
-              or an error code, which can be tested using FSE_isError() . 
- 
-    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! 
-    Why ? : making this distinction requires a header. 
-    Header management is intentionally delegated to the user layer, which can better manage special cases. 
-*/ 
-size_t FSE_decompress(void* dst,  size_t dstCapacity, 
-                const void* cSrc, size_t cSrcSize); 
- 
- 
-/*-***************************************** 
-*  Tool functions 
-******************************************/ 
-size_t FSE_compressBound(size_t size);       /* maximum compressed size */ 
- 
-/* Error Management */ 
-unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */ 
-const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */ 
- 
- 
-/*-***************************************** 
-*  FSE advanced functions 
-******************************************/ 
-/*! FSE_compress2() : 
-    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' 
-    Both parameters can be defined as '0' to mean : use default value 
-    @return : size of compressed data 
-    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! 
-                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. 
-                     if FSE_isError(return), it's an error code. 
-*/ 
-size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); 
- 
- 
-/*-***************************************** 
-*  FSE detailed API 
-******************************************/ 
-/*! 
-FSE_compress() does the following: 
-1. count symbol occurrence from source[] into table count[] 
-2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 
-3. save normalized counters to memory buffer using writeNCount() 
-4. build encoding table 'CTable' from normalized counters 
-5. encode the data stream using encoding table 'CTable' 
- 
-FSE_decompress() does the following: 
-1. read normalized counters with readNCount() 
-2. build decoding table 'DTable' from normalized counters 
-3. decode the data stream using decoding table 'DTable' 
- 
-The following API allows targeting specific sub-functions for advanced tasks. 
-For example, it's possible to compress several blocks using the same 'CTable', 
-or to save and provide normalized distribution using external method. 
-*/ 
- 
-/* *** COMPRESSION *** */ 
- 
-/*! FSE_count(): 
-    Provides the precise count of each byte within a table 'count'. 
-    'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). 
-    *maxSymbolValuePtr will be updated if detected smaller than initial value. 
-    @return : the count of the most frequent symbol (which is not identified). 
-              if return == srcSize, there is only one symbol. 
-              Can also return an error code, which can be tested with FSE_isError(). */ 
-size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); 
- 
-/*! FSE_optimalTableLog(): 
-    dynamically downsize 'tableLog' when conditions are met. 
-    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. 
-    @return : recommended tableLog (necessarily <= initial 'tableLog') */ 
-unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue); 
- 
-/*! FSE_normalizeCount(): 
-    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 
-    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). 
-    @return : tableLog, 
-              or an errorCode, which can be tested using FSE_isError() */ 
-size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); 
- 
-/*! FSE_NCountWriteBound(): 
-    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. 
-    Typically useful for allocation purpose. */ 
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); 
- 
-/*! FSE_writeNCount(): 
-    Compactly save 'normalizedCounter' into 'buffer'. 
-    @return : size of the compressed table, 
-              or an errorCode, which can be tested using FSE_isError(). */ 
-size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); 
- 
- 
-/*! Constructor and Destructor of FSE_CTable. 
-    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ 
-typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */ 
-FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); 
-void        FSE_freeCTable (FSE_CTable* ct); 
- 
-/*! FSE_buildCTable(): 
-    Builds `ct`, which must be already allocated, using FSE_createCTable(). 
-    @return : 0, or an errorCode, which can be tested using FSE_isError() */ 
-size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); 
- 
-/*! FSE_compress_usingCTable(): 
-    Compress `src` using `ct` into `dst` which must be already allocated. 
-    @return : size of compressed data (<= `dstCapacity`), 
-              or 0 if compressed data could not fit into `dst`, 
-              or an errorCode, which can be tested using FSE_isError() */ 
-size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); 
- 
-/*! 
-Tutorial : 
----------- 
-The first step is to count all symbols. FSE_count() does this job very fast. 
-Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. 
-'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] 
-maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) 
-FSE_count() will return the number of occurrence of the most frequent symbol. 
-This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. 
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). 
- 
-The next step is to normalize the frequencies. 
-FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. 
-It also guarantees a minimum of 1 to any Symbol with frequency >= 1. 
-You can use 'tableLog'==0 to mean "use default tableLog value". 
-If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), 
-which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). 
- 
-The result of FSE_normalizeCount() will be saved into a table, 
-called 'normalizedCounter', which is a table of signed short. 
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. 
-The return value is tableLog if everything proceeded as expected. 
-It is 0 if there is a single symbol within distribution. 
-If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). 
- 
-'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). 
-'buffer' must be already allocated. 
-For guaranteed success, buffer size must be at least FSE_headerBound(). 
-The result of the function is the number of bytes written into 'buffer'. 
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). 
- 
-'normalizedCounter' can then be used to create the compression table 'CTable'. 
-The space required by 'CTable' must be already allocated, using FSE_createCTable(). 
-You can then use FSE_buildCTable() to fill 'CTable'. 
-If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). 
- 
-'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). 
-Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' 
-The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. 
-If it returns '0', compressed data could not fit into 'dst'. 
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). 
-*/ 
- 
- 
-/* *** DECOMPRESSION *** */ 
- 
-/*! FSE_readNCount(): 
-    Read compactly saved 'normalizedCounter' from 'rBuffer'. 
-    @return : size read from 'rBuffer', 
-              or an errorCode, which can be tested using FSE_isError(). 
-              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ 
-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); 
- 
-/*! Constructor and Destructor of FSE_DTable. 
-    Note that its size depends on 'tableLog' */ 
-typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
-FSE_DTable* FSE_createDTable(unsigned tableLog); 
-void        FSE_freeDTable(FSE_DTable* dt); 
- 
-/*! FSE_buildDTable(): 
-    Builds 'dt', which must be already allocated, using FSE_createDTable(). 
-    return : 0, or an errorCode, which can be tested using FSE_isError() */ 
-size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); 
- 
-/*! FSE_decompress_usingDTable(): 
-    Decompress compressed source `cSrc` of size `cSrcSize` using `dt` 
-    into `dst` which must be already allocated. 
-    @return : size of regenerated data (necessarily <= `dstCapacity`), 
-              or an errorCode, which can be tested using FSE_isError() */ 
-size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); 
- 
-/*! 
-Tutorial : 
----------- 
-(Note : these functions only decompress FSE-compressed blocks. 
- If block is uncompressed, use memcpy() instead 
- If block is a single repeated byte, use memset() instead ) 
- 
-The first step is to obtain the normalized frequencies of symbols. 
-This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). 
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. 
-In practice, that means it's necessary to know 'maxSymbolValue' beforehand, 
-or size the table to handle worst case situations (typically 256). 
-FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. 
-The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. 
-Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. 
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). 
- 
-The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. 
-This is performed by the function FSE_buildDTable(). 
-The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). 
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). 
- 
-`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). 
-`cSrcSize` must be strictly correct, otherwise decompression will fail. 
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). 
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) 
-*/ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* FSE_H */ 
+/* ******************************************************************
+   FSE : Finite State Entropy codec
+   Public Prototypes declaration
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef FSE_H
+#define FSE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+size_t FSE_compress(void* dst, size_t dstCapacity,
+              const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_count():
+    Provides the precise count of each byte within a table 'count'.
+    'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+    *maxSymbolValuePtr will be updated if detected smaller than initial value.
+    @return : the count of the most frequent symbol (which is not identified).
+              if return == srcSize, there is only one symbol.
+              Can also return an error code, which can be tested with FSE_isError(). */
+size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= initial 'tableLog') */
+unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_DTable* FSE_createDTable(unsigned tableLog);
+void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_H */
diff --git a/contrib/libs/zstd06/common/fse_static.h b/contrib/libs/zstd06/common/fse_static.h
index 9ecb0b2cb9..d09589efee 100644
--- a/contrib/libs/zstd06/common/fse_static.h
+++ b/contrib/libs/zstd06/common/fse_static.h
@@ -1,340 +1,340 @@
 #include <contrib/libs/zstd06/renames.h>
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef FSE_STATIC_H 
-#define FSE_STATIC_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* ***************************************** 
-*  Dependencies 
-*******************************************/ 
-#include "fse.h" 
-#include "bitstream.h" 
- 
- 
-/* ***************************************** 
-*  Static allocation 
-*******************************************/ 
-/* FSE buffer bounds */ 
-#define FSE_NCOUNTBOUND 512 
-#define FSE_BLOCKBOUND(size) (size + (size>>7)) 
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */ 
- 
-/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */ 
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) 
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog)) 
- 
- 
-/* ***************************************** 
-*  FSE advanced API 
-*******************************************/ 
-size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); 
-/* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */ 
- 
-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); 
-/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */ 
- 
-size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); 
-/* build a fake FSE_CTable, designed to compress always the same symbolValue */ 
- 
-size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); 
-/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ 
- 
-size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); 
-/* build a fake FSE_DTable, designed to always generate the same symbolValue */ 
- 
- 
-/* ***************************************** 
-*  FSE symbol compression API 
-*******************************************/ 
-/*! 
-   This API consists of small unitary functions, which highly benefit from being inlined. 
-   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. 
-   Visual seems to do it automatically. 
-   For gcc or clang, you'll need to add -flto flag at compilation and linking stages. 
-   If none of these solutions is applicable, include "fse.c" directly. 
-*/ 
-typedef struct 
-{ 
-    ptrdiff_t   value; 
-    const void* stateTable; 
-    const void* symbolTT; 
-    unsigned    stateLog; 
-} FSE_CState_t; 
- 
-static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); 
- 
-static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); 
- 
-static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); 
- 
-/*! 
-These functions are inner components of FSE_compress_usingCTable(). 
-They allow the creation of custom streams, mixing multiple tables and bit sources. 
- 
-A key property to keep in mind is that encoding and decoding are done **in reverse direction**. 
-So the first symbol you will encode is the last you will decode, like a LIFO stack. 
- 
-You will need a few variables to track your CStream. They are : 
- 
-FSE_CTable    ct;         // Provided by FSE_buildCTable() 
-BIT_CStream_t bitStream;  // bitStream tracking structure 
-FSE_CState_t  state;      // State tracking structure (can have several) 
- 
- 
-The first thing to do is to init bitStream and state. 
-    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); 
-    FSE_initCState(&state, ct); 
- 
-Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); 
-You can then encode your input data, byte after byte. 
-FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. 
-Remember decoding will be done in reverse direction. 
-    FSE_encodeByte(&bitStream, &state, symbol); 
- 
-At any time, you can also add any bit sequence. 
-Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders 
-    BIT_addBits(&bitStream, bitField, nbBits); 
- 
-The above methods don't commit data to memory, they just store it into local register, for speed. 
-Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 
-Writing data to memory is a manual operation, performed by the flushBits function. 
-    BIT_flushBits(&bitStream); 
- 
-Your last FSE encoding operation shall be to flush your last state value(s). 
-    FSE_flushState(&bitStream, &state); 
- 
-Finally, you must close the bitStream. 
-The function returns the size of CStream in bytes. 
-If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) 
-If there is an error, it returns an errorCode (which can be tested using FSE_isError()). 
-    size_t size = BIT_closeCStream(&bitStream); 
-*/ 
- 
- 
-/* ***************************************** 
-*  FSE symbol decompression API 
-*******************************************/ 
-typedef struct 
-{ 
-    size_t      state; 
-    const void* table;   /* precise table may vary, depending on U16 */ 
-} FSE_DState_t; 
- 
- 
-static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); 
- 
-static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
- 
-static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); 
- 
-/*! 
-Let's now decompose FSE_decompress_usingDTable() into its unitary components. 
-You will decode FSE-encoded symbols from the bitStream, 
-and also any other bitFields you put in, **in reverse order**. 
- 
-You will need a few variables to track your bitStream. They are : 
- 
-BIT_DStream_t DStream;    // Stream context 
-FSE_DState_t  DState;     // State context. Multiple ones are possible 
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable() 
- 
-The first thing to do is to init the bitStream. 
-    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); 
- 
-You should then retrieve your initial state(s) 
-(in reverse flushing order if you have several ones) : 
-    errorCode = FSE_initDState(&DState, &DStream, DTablePtr); 
- 
-You can then decode your data, symbol after symbol. 
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. 
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). 
-    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); 
- 
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) 
-Note : maximum allowed nbBits is 25, for 32-bits compatibility 
-    size_t bitField = BIT_readBits(&DStream, nbBits); 
- 
-All above operations only read from local register (which size depends on size_t). 
-Refueling the register from memory is manually performed by the reload method. 
-    endSignal = FSE_reloadDStream(&DStream); 
- 
-BIT_reloadDStream() result tells if there is still some more data to read from DStream. 
-BIT_DStream_unfinished : there is still some data left into the DStream. 
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. 
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. 
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. 
- 
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, 
-to properly detect the exact end of stream. 
-After each decoded symbol, check if DStream is fully consumed using this simple test : 
-    BIT_reloadDStream(&DStream) >= BIT_DStream_completed 
- 
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. 
-Checking if DStream has reached its end is performed by : 
-    BIT_endOfDStream(&DStream); 
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. 
-    FSE_endOfDState(&DState); 
-*/ 
- 
- 
-/* ***************************************** 
-*  FSE unsafe API 
-*******************************************/ 
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ 
- 
- 
-/* ***************************************** 
-*  Implementation of inlined functions 
-*******************************************/ 
-typedef struct { 
-    int deltaFindState; 
-    U32 deltaNbBits; 
-} FSE_symbolCompressionTransform; /* total 8 bytes */ 
- 
-MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) 
-{ 
-    const void* ptr = ct; 
-    const U16* u16ptr = (const U16*) ptr; 
-    const U32 tableLog = MEM_read16(ptr); 
-    statePtr->value = (ptrdiff_t)1<<tableLog; 
-    statePtr->stateTable = u16ptr+2; 
-    statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); 
-    statePtr->stateLog = tableLog; 
-} 
- 
-MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) 
-{ 
-    FSE_initCState(statePtr, ct); 
-    { 
-        const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; 
-        const U16* stateTable = (const U16*)(statePtr->stateTable); 
-        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); 
-        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; 
-        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; 
- 
-    } 
-} 
- 
-MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) 
-{ 
-    const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; 
-    const U16* const stateTable = (const U16*)(statePtr->stateTable); 
-    U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); 
-    BIT_addBits(bitC, statePtr->value, nbBitsOut); 
-    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; 
-} 
- 
-MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) 
-{ 
-    BIT_addBits(bitC, statePtr->value, statePtr->stateLog); 
-    BIT_flushBits(bitC); 
-} 
- 
-/*<=====    Decompression    =====>*/ 
- 
-typedef struct { 
-    U16 tableLog; 
-    U16 fastMode; 
-} FSE_DTableHeader;   /* sizeof U32 */ 
- 
-typedef struct 
-{ 
-    unsigned short newState; 
-    unsigned char  symbol; 
-    unsigned char  nbBits; 
-} FSE_decode_t;   /* size == U32 */ 
- 
-MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) 
-{ 
-    const void* ptr = dt; 
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; 
-    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); 
-    BIT_reloadDStream(bitD); 
-    DStatePtr->table = dt + 1; 
-} 
- 
-MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) 
-{ 
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    return DInfo.symbol; 
-} 
- 
-MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    U32 const nbBits = DInfo.nbBits; 
-    size_t const lowBits = BIT_readBits(bitD, nbBits); 
-    DStatePtr->state = DInfo.newState + lowBits; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    U32 const nbBits = DInfo.nbBits; 
-    BYTE const symbol = DInfo.symbol; 
-    size_t const lowBits = BIT_readBits(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-/*! FSE_decodeSymbolFast() : 
-    unsafe, only works if no symbol has a probability > 50% */ 
-MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    U32 const nbBits = DInfo.nbBits; 
-    BYTE const symbol = DInfo.symbol; 
-    size_t const lowBits = BIT_readBitsFast(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state == 0; 
-} 
- 
- 
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_STATIC_H
+#define FSE_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Dependencies
+*******************************************/
+#include "fse.h"
+#include "bitstream.h"
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+/* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/* build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/*!
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
+    statePtr->stateLog = tableLog;
+}
+
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {
+        const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
+{
+    const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+/*<=====    Decompression    =====>*/
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
 
 #ifndef FSE_COMMONDEFS_ONLY
 
@@ -383,8 +383,8 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
 
 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* FSE_STATIC_H */ 
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_STATIC_H */
diff --git a/contrib/libs/zstd06/common/mem.h b/contrib/libs/zstd06/common/mem.h
index 1b84bfdb3d..85d78c84c1 100644
--- a/contrib/libs/zstd06/common/mem.h
+++ b/contrib/libs/zstd06/common/mem.h
@@ -1,201 +1,201 @@
 #include <contrib/libs/zstd06/renames.h>
-/* ****************************************************************** 
-   mem.h 
-   low-level memory access routines 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef MEM_H_MODULE 
-#define MEM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-**************************************** 
-*  Dependencies 
-******************************************/ 
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
 #include <stddef.h>     /* size_t, ptrdiff_t */
 #include <string.h>     /* memcpy */
- 
- 
-/*-**************************************** 
-*  Compiler specifics 
-******************************************/ 
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
 #if defined(_MSC_VER)   /* Visual Studio */
 #   include <stdlib.h>  /* _byteswap_ulong */
 #   include <intrin.h>  /* _byteswap_* */
 #endif
-#if defined(__GNUC__) 
-#  define MEM_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define MEM_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define MEM_STATIC static __inline 
-#else 
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
 /* code only tested on 32 and 64 bits systems */
 #define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
- 
 
-/*-************************************************************** 
-*  Basic Types 
-*****************************************************************/ 
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h> 
-  typedef  uint8_t BYTE; 
-  typedef uint16_t U16; 
-  typedef  int16_t S16; 
-  typedef uint32_t U32; 
-  typedef  int32_t S32; 
-  typedef uint64_t U64; 
-  typedef  int64_t S64; 
-#else 
-  typedef unsigned char       BYTE; 
-  typedef unsigned short      U16; 
-  typedef   signed short      S16; 
-  typedef unsigned int        U32; 
-  typedef   signed int        S32; 
-  typedef unsigned long long  U64; 
-  typedef   signed long long  S64; 
-#endif 
- 
- 
-/*-************************************************************** 
-*  Memory I/O 
-*****************************************************************/ 
-/* MEM_FORCE_MEMORY_ACCESS : 
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 
- * The below switch allow to select different access method for improved performance. 
- * Method 0 (default) : use `memcpy()`. Safe and portable. 
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 
- * Method 2 : direct access. This method is portable but violate C standard. 
- *            It can generate buggy code on targets depending on alignment. 
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 
- * Prefer these methods in priority order (0 > 1 > 2) 
- */ 
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ 
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 
-#    define MEM_FORCE_MEMORY_ACCESS 2 
-#  elif defined(__INTEL_COMPILER) || \ 
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 
-#    define MEM_FORCE_MEMORY_ACCESS 1 
-#  endif 
-#endif 
- 
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
- 
-MEM_STATIC unsigned MEM_isLittleEndian(void) 
-{ 
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) 
- 
-/* violates C standard, by lying on structure alignment. 
-Only use if no other choice to achieve best performance on target platform */ 
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } 
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } 
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } 
-MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } 
- 
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) 
- 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 
-/* currently only defined for gcc and icc */ 
-typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; 
- 
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 
-MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } 
- 
-#else 
- 
-/* default method, safe and standard. 
-   can sometimes prove slower */ 
- 
-MEM_STATIC U16 MEM_read16(const void* memPtr) 
-{ 
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U32 MEM_read32(const void* memPtr) 
-{ 
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U64 MEM_read64(const void* memPtr) 
-{ 
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC size_t MEM_readST(const void* memPtr) 
-{ 
-    size_t val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-#endif /* MEM_FORCE_MEMORY_ACCESS */ 
- 
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC size_t MEM_readST(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
 MEM_STATIC U32 MEM_swap32(U32 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
@@ -238,27 +238,27 @@ MEM_STATIC size_t MEM_swapST(size_t in)
 
 /*=== Little endian r/w ===*/
 
-MEM_STATIC U16 MEM_readLE16(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read16(memPtr); 
-    else { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)(p[0] + (p[1]<<8)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) 
-{ 
-    if (MEM_isLittleEndian()) { 
-        MEM_write16(memPtr, val); 
-    } else { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val; 
-        p[1] = (BYTE)(val>>8); 
-    } 
-} 
- 
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
 {
     return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
@@ -270,54 +270,54 @@ MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
     ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
 }
 
-MEM_STATIC U32 MEM_readLE32(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read32(memPtr); 
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
     else
         return MEM_swap32(MEM_read32(memPtr));
-} 
- 
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) 
-{ 
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
     if (MEM_isLittleEndian())
-        MEM_write32(memPtr, val32); 
+        MEM_write32(memPtr, val32);
     else
         MEM_write32(memPtr, MEM_swap32(val32));
-} 
- 
-MEM_STATIC U64 MEM_readLE64(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read64(memPtr); 
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
     else
         return MEM_swap64(MEM_read64(memPtr));
-} 
- 
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) 
-{ 
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
     if (MEM_isLittleEndian())
-        MEM_write64(memPtr, val64); 
+        MEM_write64(memPtr, val64);
     else
         MEM_write64(memPtr, MEM_swap64(val64));
-} 
- 
-MEM_STATIC size_t MEM_readLEST(const void* memPtr) 
-{ 
-    if (MEM_32bits()) 
-        return (size_t)MEM_readLE32(memPtr); 
-    else 
-        return (size_t)MEM_readLE64(memPtr); 
-} 
- 
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) 
-{ 
-    if (MEM_32bits()) 
-        MEM_writeLE32(memPtr, (U32)val); 
-    else 
-        MEM_writeLE64(memPtr, (U64)val); 
-} 
- 
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
 /*=== Big endian r/w ===*/
 
 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
@@ -370,21 +370,21 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 
 
 /* function safe only for comparisons */
-MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) 
-{ 
-    switch (length) 
-    { 
-    default : 
-    case 4 : return MEM_read32(memPtr); 
-    case 3 : if (MEM_isLittleEndian()) 
-                return MEM_read32(memPtr)<<8; 
-             else 
-                return MEM_read32(memPtr)>>8; 
-    } 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* MEM_H_MODULE */ 
+MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
diff --git a/contrib/libs/zstd06/common/zbuff.h b/contrib/libs/zstd06/common/zbuff.h
index 03de2ad268..54c40b47ca 100644
--- a/contrib/libs/zstd06/common/zbuff.h
+++ b/contrib/libs/zstd06/common/zbuff.h
@@ -1,168 +1,168 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    Buffered version of Zstd compression library 
-    Copyright (C) 2015-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd homepage : http://www.zstd.net/ 
-*/ 
-#ifndef ZSTD_BUFFERED_H 
-#define ZSTD_BUFFERED_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Dependencies 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* *************************************************************** 
-*  Compiler specifics 
-*****************************************************************/ 
-/*! 
-*  ZSTD_DLL_EXPORT : 
-*  Enable exporting of functions when building a Windows DLL 
-*/ 
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) 
-#  define ZSTDLIB_API __declspec(dllexport) 
-#else 
-#  define ZSTDLIB_API 
-#endif 
- 
- 
-/* ************************************* 
-*  Streaming functions 
-***************************************/ 
-typedef struct ZBUFF_CCtx_s ZBUFF_CCtx; 
-ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void); 
-ZSTDLIB_API size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); 
- 
-ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); 
-ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); 
- 
-ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); 
-ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); 
-ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); 
- 
-/*-************************************************* 
-*  Streaming compression - howto 
-* 
-*  A ZBUFF_CCtx object is required to track streaming operation. 
-*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. 
-*  ZBUFF_CCtx objects can be reused multiple times. 
-* 
-*  Start by initializing ZBUF_CCtx. 
-*  Use ZBUFF_compressInit() to start a new compression operation. 
-*  Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. 
-* 
-*  Use ZBUFF_compressContinue() repetitively to consume input stream. 
-*  *srcSizePtr and *dstCapacityPtr can be any size. 
-*  The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. 
-*  Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. 
-*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . 
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) 
-*            or an error code, which can be tested using ZBUFF_isError(). 
-* 
-*  At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). 
-*  The nb of bytes written into `dst` will be reported into *dstCapacityPtr. 
-*  Note that the function cannot output more than *dstCapacityPtr, 
-*  therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. 
-*  @return : nb of bytes still present into internal buffer (0 if it's empty) 
-*            or an error code, which can be tested using ZBUFF_isError(). 
-* 
-*  ZBUFF_compressEnd() instructs to finish a frame. 
-*  It will perform a flush and write frame epilogue. 
-*  The epilogue is required for decoders to consider a frame completed. 
-*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. 
-*  In which case, call again ZBUFF_compressFlush() to complete the flush. 
-*  @return : nb of bytes still present into internal buffer (0 if it's empty) 
-*            or an error code, which can be tested using ZBUFF_isError(). 
-* 
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize 
-*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering). 
-*  output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. 
-*  By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. 
-* **************************************************/ 
- 
- 
-typedef struct ZBUFF_DCtx_s ZBUFF_DCtx; 
-ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void); 
-ZSTDLIB_API size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); 
- 
-ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); 
-ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); 
- 
-ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, 
-                                            void* dst, size_t* dstCapacityPtr, 
-                                      const void* src, size_t* srcSizePtr); 
- 
-/*-*************************************************************************** 
-*  Streaming decompression howto 
-* 
-*  A ZBUFF_DCtx object is required to track streaming operations. 
-*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. 
-*  Use ZBUFF_decompressInit() to start a new decompression operation, 
-*   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. 
-*  Note that ZBUFF_DCtx objects can be re-init multiple times. 
-* 
-*  Use ZBUFF_decompressContinue() repetitively to consume your input. 
-*  *srcSizePtr and *dstCapacityPtr can be any size. 
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. 
-*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. 
-*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. 
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), 
-*            or 0 when a frame is completely decoded, 
-*            or an error code, which can be tested using ZBUFF_isError(). 
-* 
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() 
-*  output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. 
-*  input  : ZBUFF_recommendedDInSize == 128KB + 3; 
-*           just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . 
-* *******************************************************************************/ 
- 
- 
-/* ************************************* 
-*  Tool functions 
-***************************************/ 
-ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode); 
-ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode); 
- 
-/** Functions below provide recommended buffer sizes for Compression or Decompression operations. 
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+#ifndef ZSTD_BUFFERED_H
+#define ZSTD_BUFFERED_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* ***************************************************************
+*  Compiler specifics
+*****************************************************************/
+/*!
+*  ZSTD_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport)
+#else
+#  define ZSTDLIB_API
+#endif
+
+
+/* *************************************
+*  Streaming functions
+***************************************/
+typedef struct ZBUFF_CCtx_s ZBUFF_CCtx;
+ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void);
+ZSTDLIB_API size_t      ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
+
+ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+
+ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
+ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
+ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
+
+/*-*************************************************
+*  Streaming compression - howto
+*
+*  A ZBUFF_CCtx object is required to track streaming operation.
+*  Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
+*  ZBUFF_CCtx objects can be reused multiple times.
+*
+*  Start by initializing ZBUF_CCtx.
+*  Use ZBUFF_compressInit() to start a new compression operation.
+*  Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary.
+*
+*  Use ZBUFF_compressContinue() repetitively to consume input stream.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush().
+*  The nb of bytes written into `dst` will be reported into *dstCapacityPtr.
+*  Note that the function cannot output more than *dstCapacityPtr,
+*  therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  ZBUFF_compressEnd() instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
+*  In which case, call again ZBUFF_compressFlush() to complete the flush.
+*  @return : nb of bytes still present into internal buffer (0 if it's empty)
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize
+*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering).
+*  output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
+*  By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering.
+* **************************************************/
+
+
+typedef struct ZBUFF_DCtx_s ZBUFF_DCtx;
+ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void);
+ZSTDLIB_API size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
+
+ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
+ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
+
+ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFF_DCtx object is required to track streaming operations.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation,
+*   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFF_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
+*  output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFF_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode);
+ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
 *   These sizes are just hints, they tend to offer better latency */
-ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void); 
-ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void); 
-ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void); 
-ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void); 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* ZSTD_BUFFERED_H */ 
+ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void);
+ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void);
+ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void);
+ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_BUFFERED_H */
diff --git a/contrib/libs/zstd06/common/zbuff_static.h b/contrib/libs/zstd06/common/zbuff_static.h
index 7aa81642f6..e06404f646 100644
--- a/contrib/libs/zstd06/common/zbuff_static.h
+++ b/contrib/libs/zstd06/common/zbuff_static.h
@@ -1,72 +1,72 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd - buffered version of compression library 
-    experimental complementary API, for static linking only 
-    Copyright (C) 2015-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd homepage : http://www.zstd.net 
-*/ 
-#ifndef ZSTD_BUFFERED_STATIC_H 
-#define ZSTD_BUFFERED_STATIC_H 
- 
-/* The objects defined into this file should be considered experimental. 
- * They are not labelled stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risk of future changes. 
- */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include "zstd_static.h"     /* ZSTD_parameters */ 
-#include "zbuff.h" 
+/*
+    zstd - buffered version of compression library
+    experimental complementary API, for static linking only
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_BUFFERED_STATIC_H
+#define ZSTD_BUFFERED_STATIC_H
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include "zstd_static.h"     /* ZSTD_parameters */
+#include "zbuff.h"
 #include "zstd_internal.h"  /* MIN  */
- 
- 
-/* ************************************* 
-*  Advanced Streaming functions 
-***************************************/ 
-ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, 
-                                               const void* dict, size_t dictSize, 
-                                               ZSTD_parameters params, U64 pledgedSrcSize); 
- 
+
+
+/* *************************************
+*  Advanced Streaming functions
+***************************************/
+ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx,
+                                               const void* dict, size_t dictSize,
+                                               ZSTD_parameters params, U64 pledgedSrcSize);
+
 MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
     size_t length = MIN(dstCapacity, srcSize);
     memcpy(dst, src, length);
     return length;
 }
- 
 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* ZSTD_BUFFERED_STATIC_H */ 
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_BUFFERED_STATIC_H */
diff --git a/contrib/libs/zstd06/common/zstd.h b/contrib/libs/zstd06/common/zstd.h
index 49ad80a8a8..3574a82ec0 100644
--- a/contrib/libs/zstd06/common/zstd.h
+++ b/contrib/libs/zstd06/common/zstd.h
@@ -1,65 +1,65 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd - standard compression library 
-    Header File 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-*/ 
-#ifndef ZSTD_H 
-#define ZSTD_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/*-*************************************************************** 
-*  Export parameters 
-*****************************************************************/ 
-/*! 
-*  ZSTD_DLL_EXPORT : 
-*  Enable exporting of functions when building a Windows DLL 
-*/ 
-#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) 
-#  define ZSTDLIB_API __declspec(dllexport) 
-#else 
-#  define ZSTDLIB_API 
-#endif 
- 
- 
-/* ************************************* 
-*  Version 
-***************************************/ 
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTD_H
+#define ZSTD_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/*-***************************************************************
+*  Export parameters
+*****************************************************************/
+/*!
+*  ZSTD_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*/
+#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+#  define ZSTDLIB_API __declspec(dllexport)
+#else
+#  define ZSTDLIB_API
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
 #define ZSTD_VERSION_MAJOR    0
 #define ZSTD_VERSION_MINOR    6
 #define ZSTD_VERSION_RELEASE  2
@@ -69,88 +69,88 @@ extern "C" {
 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
 
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 
-ZSTDLIB_API unsigned ZSTD_versionNumber (void); 
- 
- 
-/* ************************************* 
-*  Simple functions 
-***************************************/ 
-/*! ZSTD_compress() : 
-    Compresses `srcSize` bytes from buffer `src` into buffer `dst` of size `dstCapacity`. 
-    Destination buffer must be already allocated. 
-    Compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`. 
-    @return : the number of bytes written into `dst`, 
-              or an error code if it fails (which can be tested using ZSTD_isError()) */ 
-ZSTDLIB_API size_t ZSTD_compress(   void* dst, size_t dstCapacity, 
-                              const void* src, size_t srcSize, 
-                                     int  compressionLevel); 
- 
-/*! ZSTD_decompress() : 
-    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. 
-    `dstCapacity` must be large enough, equal or larger than originalSize. 
-    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), 
-              or an errorCode if it fails (which can be tested using ZSTD_isError()) */ 
-ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, 
-                              const void* src, size_t compressedSize); 
- 
- 
-/* ************************************* 
-*  Helper functions 
-***************************************/ 
-ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ 
- 
-/* Error Management */ 
-ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */ 
-ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string for an error code */ 
- 
- 
-/* ************************************* 
-*  Explicit memory management 
-***************************************/ 
-/** Compression context */ 
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;                       /*< incomplete type */ 
-ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); 
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);      /*!< @return : errorCode */ 
- 
-/** ZSTD_compressCCtx() : 
-    Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ 
-ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); 
- 
-/** Decompression context */ 
-typedef struct ZSTD_DCtx_s ZSTD_DCtx; 
-ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); 
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);      /*!< @return : errorCode */ 
- 
-/** ZSTD_decompressDCtx() : 
-*   Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ 
-ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 
- 
- 
-/*-*********************** 
-*  Dictionary API 
-*************************/ 
-/*! ZSTD_compress_usingDict() : 
-*   Compression using a pre-defined Dictionary content (see dictBuilder). 
-*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */ 
-ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, 
-                                           void* dst, size_t dstCapacity, 
-                                     const void* src, size_t srcSize, 
-                                     const void* dict,size_t dictSize, 
-                                           int compressionLevel); 
- 
-/*! ZSTD_decompress_usingDict() : 
-*   Decompression using a pre-defined Dictionary content (see dictBuilder). 
-*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. 
-*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ 
-ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, 
-                                             void* dst, size_t dstCapacity, 
-                                       const void* src, size_t srcSize, 
-                                       const void* dict,size_t dictSize); 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* ZSTD_H */ 
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+ZSTDLIB_API unsigned ZSTD_versionNumber (void);
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTD_compress() :
+    Compresses `srcSize` bytes from buffer `src` into buffer `dst` of size `dstCapacity`.
+    Destination buffer must be already allocated.
+    Compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+    @return : the number of bytes written into `dst`,
+              or an error code if it fails (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_compress(   void* dst, size_t dstCapacity,
+                              const void* src, size_t srcSize,
+                                     int  compressionLevel);
+
+/*! ZSTD_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Helper functions
+***************************************/
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */
+
+/* Error Management */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Compression context */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;                       /*< incomplete type */
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);      /*!< @return : errorCode */
+
+/** ZSTD_compressCCtx() :
+    Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
+
+/** Decompression context */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);      /*!< @return : errorCode */
+
+/** ZSTD_decompressDCtx() :
+*   Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
+*  Dictionary API
+*************************/
+/*! ZSTD_compress_usingDict() :
+*   Compression using a pre-defined Dictionary content (see dictBuilder).
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_H */
diff --git a/contrib/libs/zstd06/common/zstd_internal.h b/contrib/libs/zstd06/common/zstd_internal.h
index 367c1d0335..2ce2b18cf1 100644
--- a/contrib/libs/zstd06/common/zstd_internal.h
+++ b/contrib/libs/zstd06/common/zstd_internal.h
@@ -1,256 +1,256 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd_internal - common functions to include 
-    Header File for include 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd homepage : https://www.zstd.net 
-*/ 
-#ifndef ZSTD_CCOMMON_H_MODULE 
-#define ZSTD_CCOMMON_H_MODULE 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include "mem.h" 
-#include "error_private.h" 
-#include "zstd_static.h" 
- 
- 
-/*-************************************* 
-*  Common macros 
-***************************************/ 
-#define MIN(a,b) ((a)<(b) ? (a) : (b)) 
-#define MAX(a,b) ((a)>(b) ? (a) : (b)) 
- 
- 
-/*-************************************* 
-*  Common constants 
-***************************************/ 
-#define ZSTD_OPT_DEBUG 0     // 3 = compression stats;  5 = check encoded sequences;  9 = full logs 
-#include <stdio.h> 
-#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9 
-    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__) 
-    #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__) 
-    #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__) 
-#else 
-    #define ZSTD_LOG_PARSER(...) 
-    #define ZSTD_LOG_ENCODE(...) 
-    #define ZSTD_LOG_BLOCK(...) 
-#endif 
- 
-#define ZSTD_OPT_NUM    (1<<12) 
-#define ZSTD_DICT_MAGIC  0xEC30A436 
- 
-#define ZSTD_REP_NUM    3 
-#define ZSTD_REP_INIT   ZSTD_REP_NUM 
-#define ZSTD_REP_MOVE   (ZSTD_REP_NUM-1) 
- 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define BIT7 128 
-#define BIT6  64 
-#define BIT5  32 
-#define BIT4  16 
-#define BIT1   2 
-#define BIT0   1 
- 
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12 
-static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 }; 
- 
-#define ZSTD_BLOCKHEADERSIZE 3   /* because C standard does not allow a static const value to be defined using another static const value .... :( */ 
-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; 
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 
- 
-#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ 
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */ 
- 
-#define HufLog 12 
- 
-#define IS_HUF 0 
-#define IS_PCH 1 
-#define IS_RAW 2 
-#define IS_RLE 3 
- 
-#define LONGNBSEQ 0x7F00 
- 
-#define MINMATCH 3 
-#define EQUAL_READ32 4 
-#define REPCODE_STARTVALUE 1 
- 
-#define Litbits  8 
-#define MaxLit ((1<<Litbits) - 1) 
-#define MaxML  52 
-#define MaxLL  35 
-#define MaxOff 28 
-#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */ 
-#define MLFSELog    9 
-#define LLFSELog    9 
-#define OffFSELog   8 
- 
-#define FSE_ENCODING_RAW     0 
-#define FSE_ENCODING_RLE     1 
-#define FSE_ENCODING_STATIC  2 
-#define FSE_ENCODING_DYNAMIC 3 
- 
-static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12, 
-                                     13,14,15,16 }; 
-static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 
-                                             2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, 
-                                            -1,-1,-1,-1 }; 
-static const U32 LL_defaultNormLog = 6; 
- 
-static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11, 
-                                     12,13,14,15,16 }; 
-static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 
-                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
-                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, 
-                                            -1,-1,-1,-1,-1 }; 
-static const U32 ML_defaultNormLog = 6; 
- 
-static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 
-                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; 
-static const U32 OF_defaultNormLog = 5; 
- 
- 
-/*-******************************************* 
-*  Shared functions to include for inlining 
-*********************************************/ 
-static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 
- 
-/*! ZSTD_wildcopy() : 
-*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ 
-#define WILDCOPY_OVERLENGTH 8 
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + length; 
-    do 
-        COPY8(op, ip) 
-    while (op < oend); 
-} 
- 
-MEM_STATIC unsigned ZSTD_highbit(U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse(&r, val); 
-    return (unsigned)r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */ 
-    return 31 - __builtin_clz(val); 
-#   else   /* Software version */ 
-    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    int r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
-/*-******************************************* 
-*  Private interfaces 
-*********************************************/ 
-typedef struct { 
-    U32 off; 
-    U32 len; 
-} ZSTD_match_t; 
- 
-typedef struct { 
-    U32 price; 
-    U32 off; 
-    U32 mlen; 
-    U32 litlen; 
-    U32 rep[ZSTD_REP_INIT]; 
-} ZSTD_optimal_t; 
- 
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : https://www.zstd.net
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h"
+#include "zstd_static.h"
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_DEBUG 0     // 3 = compression stats;  5 = check encoded sequences;  9 = full logs
+#include <stdio.h>
+#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
+    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
+    #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
+    #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__)
+#else
+    #define ZSTD_LOG_PARSER(...)
+    #define ZSTD_LOG_ENCODE(...)
+    #define ZSTD_LOG_BLOCK(...)
+#endif
+
+#define ZSTD_OPT_NUM    (1<<12)
+#define ZSTD_DICT_MAGIC  0xEC30A436
+
+#define ZSTD_REP_NUM    3
+#define ZSTD_REP_INIT   ZSTD_REP_NUM
+#define ZSTD_REP_MOVE   (ZSTD_REP_NUM-1)
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12
+static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 };
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* because C standard does not allow a static const value to be defined using another static const value .... :( */
+static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+
+#define IS_HUF 0
+#define IS_PCH 1
+#define IS_RAW 2
+#define IS_RLE 3
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+#define EQUAL_READ32 4
+#define REPCODE_STARTVALUE 1
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  52
+#define MaxLL  35
+#define MaxOff 28
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+
+#define FSE_ENCODING_RAW     0
+#define FSE_ENCODING_RLE     1
+#define FSE_ENCODING_STATIC  2
+#define FSE_ENCODING_DYNAMIC 3
+
+static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
+                                     13,14,15,16 };
+static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+                                             2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+                                            -1,-1,-1,-1 };
+static const U32 LL_defaultNormLog = 6;
+
+static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
+                                     12,13,14,15,16 };
+static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+                                            -1,-1,-1,-1,-1 };
+static const U32 ML_defaultNormLog = 6;
+
+static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+static const U32 OF_defaultNormLog = 5;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+#define WILDCOPY_OVERLENGTH 8
+MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+MEM_STATIC unsigned ZSTD_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+    return 31 - __builtin_clz(val);
+#   else   /* Software version */
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct {
+    U32 off;
+    U32 len;
+} ZSTD_match_t;
+
+typedef struct {
+    U32 price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_INIT];
+} ZSTD_optimal_t;
+
 //#if ZSTD_OPT_DEBUG == 3
 //    #include ".debug/zstd_stats.h"
 //#else
-    typedef struct { U32  unused; } ZSTD_stats_t; 
+    typedef struct { U32  unused; } ZSTD_stats_t;
     MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; }
     MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; }
     MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; }
     MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }
 //#endif
- 
-typedef struct { 
-    void* buffer; 
-    U32*  offsetStart; 
-    U32*  offset; 
-    BYTE* offCodeStart; 
-    BYTE* litStart; 
-    BYTE* lit; 
-    U16*  litLengthStart; 
-    U16*  litLength; 
-    BYTE* llCodeStart; 
-    U16*  matchLengthStart; 
-    U16*  matchLength; 
-    BYTE* mlCodeStart; 
-    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ 
-    U32   longLengthPos; 
-    /* opt */ 
-    ZSTD_optimal_t* priceTable; 
-    ZSTD_match_t* matchTable; 
-    U32* matchLengthFreq; 
-    U32* litLengthFreq; 
-    U32* litFreq; 
-    U32* offCodeFreq; 
-    U32  matchLengthSum; 
-    U32  matchSum; 
-    U32  litLengthSum; 
-    U32  litSum; 
-    U32  offCodeSum; 
-    U32  log2matchLengthSum; 
-    U32  log2matchSum; 
-    U32  log2litLengthSum; 
-    U32  log2litSum; 
-    U32  log2offCodeSum; 
-    U32  factor; 
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* litStart;
+    BYTE* lit;
+    U16*  litLengthStart;
+    U16*  litLength;
+    BYTE* llCodeStart;
+    U16*  matchLengthStart;
+    U16*  matchLength;
+    BYTE* mlCodeStart;
+    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
+    U32   longLengthPos;
+    /* opt */
+    ZSTD_optimal_t* priceTable;
+    ZSTD_match_t* matchTable;
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  matchSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+    U32  log2matchLengthSum;
+    U32  log2matchSum;
+    U32  log2litLengthSum;
+    U32  log2litSum;
+    U32  log2offCodeSum;
+    U32  factor;
     U32  cachedPrice;
     U32  cachedLitLength;
     const BYTE* cachedLiterals;
-    ZSTD_stats_t stats; 
-} seqStore_t; 
- 
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); 
-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq); 
- 
- 
-#endif   /* ZSTD_CCOMMON_H_MODULE */ 
+    ZSTD_stats_t stats;
+} seqStore_t;
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/contrib/libs/zstd06/common/zstd_static.h b/contrib/libs/zstd06/common/zstd_static.h
index 65d3bc8748..a1dae42094 100644
--- a/contrib/libs/zstd06/common/zstd_static.h
+++ b/contrib/libs/zstd06/common/zstd_static.h
@@ -1,273 +1,273 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd - standard compression library 
-    Header File for static linking only 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd homepage : http://www.zstd.net 
-*/ 
-#ifndef ZSTD_STATIC_H 
-#define ZSTD_STATIC_H 
- 
-/* The prototypes defined within this file are considered experimental. 
- * They should not be used in the context DLL as they may change in the future. 
- * Prefer static linking if you need them, to control breaking version changes issues. 
- */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include "zstd.h" 
-#include "mem.h" 
- 
- 
-/*-************************************* 
-*  Constants 
-***************************************/ 
-#define ZSTD_MAGICNUMBER 0xFD2FB526   /* v0.6 */ 
- 
- 
-/*-************************************* 
-*  Types 
-***************************************/ 
-#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? 25 : 27)) 
-#define ZSTD_WINDOWLOG_MIN     18 
-#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1) 
-#define ZSTD_CHAINLOG_MIN       4 
-#define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX 
-#define ZSTD_HASHLOG_MIN       12 
-#define ZSTD_HASHLOG3_MAX      17 
-#define ZSTD_HASHLOG3_MIN      15 
-#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1) 
-#define ZSTD_SEARCHLOG_MIN      1 
-#define ZSTD_SEARCHLENGTH_MAX   7 
-#define ZSTD_SEARCHLENGTH_MIN   3 
-#define ZSTD_TARGETLENGTH_MIN   4 
-#define ZSTD_TARGETLENGTH_MAX 999 
- 
-/* from faster to stronger */ 
-typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; 
- 
-typedef struct { 
-    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */ 
-    U32 chainLog;      /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */ 
-    U32 hashLog;       /* dispatch table : larger == faster, more memory */ 
-    U32 searchLog;     /* nb of searches : larger == more compression, slower */ 
-    U32 searchLength;  /* match length searched : larger == faster decompression, sometimes less compression */ 
-    U32 targetLength;  /* acceptable match size for optimal parser (only) : larger == more compression, slower */ 
-    ZSTD_strategy strategy; 
-} ZSTD_compressionParameters; 
- 
-typedef struct { 
-    U32 contentSizeFlag;   /* 1: content size will be in frame header (if known). */ 
-} ZSTD_frameParameters; 
- 
-typedef struct { 
-    ZSTD_compressionParameters cParams; 
-    ZSTD_frameParameters fParams; 
-} ZSTD_parameters; 
- 
- 
-/*-************************************* 
-*  Advanced functions 
-***************************************/ 
-ZSTDLIB_API unsigned ZSTD_maxCLevel (void); 
- 
-/*! ZSTD_getCParams() : 
-*   @return ZSTD_compressionParameters structure for a selected compression level and srcSize. 
-*   `srcSize` value is optional, select 0 if not known */ 
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The prototypes defined within this file are considered experimental.
+ * They should not be used in the context DLL as they may change in the future.
+ * Prefer static linking if you need them, to control breaking version changes issues.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "zstd.h"
+#include "mem.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTD_MAGICNUMBER 0xFD2FB526   /* v0.6 */
+
+
+/*-*************************************
+*  Types
+***************************************/
+#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? 25 : 27))
+#define ZSTD_WINDOWLOG_MIN     18
+#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CHAINLOG_MIN       4
+#define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX
+#define ZSTD_HASHLOG_MIN       12
+#define ZSTD_HASHLOG3_MAX      17
+#define ZSTD_HASHLOG3_MIN      15
+#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN      1
+#define ZSTD_SEARCHLENGTH_MAX   7
+#define ZSTD_SEARCHLENGTH_MIN   3
+#define ZSTD_TARGETLENGTH_MIN   4
+#define ZSTD_TARGETLENGTH_MAX 999
+
+/* from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;
+
+typedef struct {
+    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
+    U32 chainLog;      /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;       /* dispatch table : larger == faster, more memory */
+    U32 searchLog;     /* nb of searches : larger == more compression, slower */
+    U32 searchLength;  /* match length searched : larger == faster decompression, sometimes less compression */
+    U32 targetLength;  /* acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+
+typedef struct {
+    U32 contentSizeFlag;   /* 1: content size will be in frame header (if known). */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+
+/*-*************************************
+*  Advanced functions
+***************************************/
+ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
+
+/*! ZSTD_getCParams() :
+*   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
+*   `srcSize` value is optional, select 0 if not known */
 ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize);
- 
-/*! ZSTD_checkParams() : 
-*   Ensure param values remain within authorized range */ 
-ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); 
- 
-/*! ZSTD_adjustParams() : 
-*   optimize params for a given `srcSize` and `dictSize`. 
-*   both values are optional, select `0` if unknown. */ 
-ZSTDLIB_API void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize); 
- 
-/*! ZSTD_compress_advanced() : 
-*   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ 
-ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, 
-                                           void* dst, size_t dstCapacity, 
-                                     const void* src, size_t srcSize, 
-                                     const void* dict,size_t dictSize, 
-                                           ZSTD_parameters params); 
- 
-/*! ZSTD_compress_usingPreparedDCtx() : 
-*   Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded. 
-*   It avoids reloading the dictionary each time. 
-*   `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced(). 
-*   Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */ 
-ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx( 
-                                           ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, 
-                                           void* dst, size_t dstCapacity, 
-                                     const void* src, size_t srcSize); 
- 
-/*- Advanced Decompression functions -*/ 
- 
-/*! ZSTD_decompress_usingPreparedDCtx() : 
-*   Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. 
-*   It avoids reloading the dictionary each time. 
-*   `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict(). 
-*   Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */ 
-ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx( 
-                                           ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx, 
-                                           void* dst, size_t dstCapacity, 
-                                     const void* src, size_t srcSize); 
- 
- 
-/* ************************************** 
-*  Streaming functions (direct mode) 
-****************************************/ 
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); 
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); 
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize); 
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx); 
- 
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity); 
- 
-/* 
-  Streaming compression, synchronous mode (bufferless) 
- 
-  A ZSTD_CCtx object is required to track streaming operations. 
-  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. 
-  ZSTD_CCtx object can be re-used multiple times within successive compression operations. 
- 
-  Start by initializing a context. 
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, 
-  or ZSTD_compressBegin_advanced(), for finer parameter control. 
-  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() 
- 
-  Then, consume your input using ZSTD_compressContinue(). 
-  The interface is synchronous, so all input will be consumed and produce a compressed output. 
-  You must ensure there is enough space in destination buffer to store compressed data under worst case scenario. 
-  Worst case evaluation is provided by ZSTD_compressBound(). 
- 
-  Finish a frame with ZSTD_compressEnd(), which will write the epilogue. 
-  Without the epilogue, frames will be considered incomplete by decoder. 
- 
-  You can then reuse ZSTD_CCtx to compress some new frame. 
-*/ 
- 
-typedef struct { U64 frameContentSize; U32 windowLog; } ZSTD_frameParams; 
- 
-#define ZSTD_FRAMEHEADERSIZE_MAX 13    /* for static allocation */ 
-static const size_t ZSTD_frameHeaderSize_min = 5; 
-static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; 
-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */ 
- 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); 
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); 
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); 
- 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); 
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 
- 
-/* 
-  Streaming decompression, direct mode (bufferless) 
- 
-  A ZSTD_DCtx object is required to track streaming operations. 
-  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. 
-  A ZSTD_DCtx object can be re-used multiple times. 
- 
-  First optional operation is to retrieve frame parameters, using ZSTD_getFrameParams(), which doesn't consume the input. 
-  It can provide the minimum size of rolling buffer required to properly decompress data, 
-  and optionally the final size of uncompressed content. 
-  (Note : content size is an optional info that may not be present. 0 means : content size unknown) 
-  Frame parameters are extracted from the beginning of compressed frame. 
-  The amount of data to read is variable, from ZSTD_frameHeaderSize_min to ZSTD_frameHeaderSize_max (so if `srcSize` >= ZSTD_frameHeaderSize_max, it will always work) 
-  If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result. 
-  Result : 0 when successful, it means the ZSTD_frameParams structure has been filled. 
-          >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header. 
-           errorCode, which can be tested using ZSTD_isError() 
- 
-  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). 
-  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). 
- 
-  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail. 
-  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog). 
-  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible. 
- 
-  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity) 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
- 
-  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. 
-  Context can then be reset to start a new decompression. 
-*/ 
- 
- 
-/* ************************************** 
-*  Block functions 
-****************************************/ 
-/*! Block functions produce and decode raw zstd blocks, without frame metadata. 
-    User will have to take in charge required information to regenerate data, such as compressed and content sizes. 
- 
-    A few rules to respect : 
-    - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB) 
-    - Compressing or decompressing requires a context structure 
-      + Use ZSTD_createCCtx() and ZSTD_createDCtx() 
-    - It is necessary to init context before starting 
-      + compression : ZSTD_compressBegin() 
-      + decompression : ZSTD_decompressBegin() 
-      + variants _usingDict() are also allowed 
-      + copyCCtx() and copyDCtx() work too 
-    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. 
-      In which case, nothing is produced into `dst`. 
-      + User must test for such outcome and deal directly with uncompressed data 
-      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !! 
-*/ 
- 
-#define ZSTD_BLOCKSIZE_MAX (128 * 1024)   /* define, for static allocation */ 
+
+/*! ZSTD_checkParams() :
+*   Ensure param values remain within authorized range */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustParams() :
+*   optimize params for a given `srcSize` and `dictSize`.
+*   both values are optional, select `0` if unknown. */
+ZSTDLIB_API void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+*   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
+ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           ZSTD_parameters params);
+
+/*! ZSTD_compress_usingPreparedDCtx() :
+*   Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced().
+*   Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */
+ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx(
+                                           ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize);
+
+/*- Advanced Decompression functions -*/
+
+/*! ZSTD_decompress_usingPreparedDCtx() :
+*   Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
+ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx(
+                                           ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx);
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity);
+
+/*
+  Streaming compression, synchronous mode (bufferless)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  The interface is synchronous, so all input will be consumed and produce a compressed output.
+  You must ensure there is enough space in destination buffer to store compressed data under worst case scenario.
+  Worst case evaluation is provided by ZSTD_compressBound().
+
+  Finish a frame with ZSTD_compressEnd(), which will write the epilogue.
+  Without the epilogue, frames will be considered incomplete by decoder.
+
+  You can then reuse ZSTD_CCtx to compress some new frame.
+*/
+
+typedef struct { U64 frameContentSize; U32 windowLog; } ZSTD_frameParams;
+
+#define ZSTD_FRAMEHEADERSIZE_MAX 13    /* for static allocation */
+static const size_t ZSTD_frameHeaderSize_min = 5;
+static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
+ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/*
+  Streaming decompression, direct mode (bufferless)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First optional operation is to retrieve frame parameters, using ZSTD_getFrameParams(), which doesn't consume the input.
+  It can provide the minimum size of rolling buffer required to properly decompress data,
+  and optionally the final size of uncompressed content.
+  (Note : content size is an optional info that may not be present. 0 means : content size unknown)
+  Frame parameters are extracted from the beginning of compressed frame.
+  The amount of data to read is variable, from ZSTD_frameHeaderSize_min to ZSTD_frameHeaderSize_max (so if `srcSize` >= ZSTD_frameHeaderSize_max, it will always work)
+  If `srcSize` is too small for operation to succeed, function will return the minimum size it requires to produce a result.
+  Result : 0 when successful, it means the ZSTD_frameParams structure has been filled.
+          >0 : means there is not enough data into `src`. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTD_isError()
+
+  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
+  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity)
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB)
+    - Compressing or decompressing requires a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTD_compressBegin()
+      + decompression : ZSTD_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!
+*/
+
+#define ZSTD_BLOCKSIZE_MAX (128 * 1024)   /* define, for static allocation */
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
- 
- 
-/*-************************************* 
-*  Error management 
-***************************************/ 
-#include "error_public.h" 
-/*! ZSTD_getErrorCode() : 
-    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, 
-    which can be used to compare directly with enum list published into "error_public.h" */ 
+
+
+/*-*************************************
+*  Error management
+***************************************/
+#include "error_public.h"
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare directly with enum list published into "error_public.h" */
 ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
 ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* ZSTD_STATIC_H */ 
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTD_STATIC_H */
diff --git a/contrib/libs/zstd06/compress/zstd_compress.c b/contrib/libs/zstd06/compress/zstd_compress.c
index e1a9e5a48b..1bb75c68cc 100644
--- a/contrib/libs/zstd06/compress/zstd_compress.c
+++ b/contrib/libs/zstd06/compress/zstd_compress.c
@@ -1,1971 +1,1971 @@
-/* 
-    ZSTD HC - High Compression Mode of Zstandard 
-    Copyright (C) 2015-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
- 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
- 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-       - Zstd source repository : https://www.zstd.net 
-*/ 
- 
- 
-/* ******************************************************* 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#else 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include <stdlib.h>   /* malloc */ 
-#include <string.h>   /* memset */ 
-#include "mem.h" 
-#include "fse_static.h" 
+/*
+    ZSTD HC - High Compression Mode of Zstandard
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdlib.h>   /* malloc */
+#include <string.h>   /* memset */
+#include "mem.h"
+#include "fse_static.h"
 #include "huf_static.h"
-#include "zstd_internal.h" 
- 
- 
-/*-************************************* 
-*  Constants 
-***************************************/ 
-static const U32 g_searchStrength = 8;   /* control skip over incompressible data */ 
- 
- 
-/*-************************************* 
-*  Helper functions 
-***************************************/ 
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } 
- 
- 
-/*-************************************* 
-*  Sequence storage 
-***************************************/ 
-static void ZSTD_resetSeqStore(seqStore_t* ssPtr) 
-{ 
-    ssPtr->offset = ssPtr->offsetStart; 
-    ssPtr->lit = ssPtr->litStart; 
-    ssPtr->litLength = ssPtr->litLengthStart; 
-    ssPtr->matchLength = ssPtr->matchLengthStart; 
-    ssPtr->longLengthID = 0; 
-} 
- 
- 
-/*-************************************* 
-*  Context memory management 
-***************************************/ 
-struct ZSTD_CCtx_s 
-{ 
-    const BYTE* nextSrc;    /* next block here to continue on current prefix */ 
-    const BYTE* base;       /* All regular indexes relative to this position */ 
-    const BYTE* dictBase;   /* extDict indexes relative to this position */ 
-    U32   dictLimit;        /* below that point, need extDict */ 
-    U32   lowLimit;         /* below that point, no more data */ 
-    U32   nextToUpdate;     /* index from which to continue dictionary update */ 
-    U32   nextToUpdate3;    /* index from which to continue dictionary update */ 
-    U32   hashLog3;         /* dispatch table : larger == faster, more memory */ 
-    U32   loadedDictEnd; 
-    U32   stage;            /* 0: created; 1: init,dictLoad; 2:started */ 
-    ZSTD_parameters params; 
-    void* workSpace; 
-    size_t workSpaceSize; 
-    size_t blockSize; 
- 
-    seqStore_t seqStore;    /* sequences storage ptrs */ 
-    U32* hashTable; 
-    U32* hashTable3; 
-    U32* chainTable; 
-    HUF_CElt* hufTable; 
-    U32 flagStaticTables; 
-    FSE_CTable offcodeCTable   [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 
-    FSE_CTable matchlengthCTable [FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 
-    FSE_CTable litlengthCTable   [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 
-}; 
- 
-ZSTD_CCtx* ZSTD_createCCtx(void) 
-{ 
-    return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx)); 
-} 
- 
-size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 
-{ 
-    free(cctx->workSpace); 
-    free(cctx); 
-    return 0;   /* reserved as a potential error code in the future */ 
-} 
- 
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */ 
-{ 
-    return &(ctx->seqStore); 
-} 
- 
- 
-#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; } 
-#define CLAMPCHECK(val,min,max) { if ((val<min) || (val>max)) return ERROR(compressionParameter_unsupported); } 
- 
-/** ZSTD_checkParams() : 
-    ensure param values remain within authorized range. 
-    @return : 0, or an error code if one value is beyond authorized range */ 
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) 
-{ 
-    CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); 
-    CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); 
-    CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); 
-    CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); 
-    { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN; 
-      U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; 
-      CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } 
-    CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); 
-    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported); 
-    return 0; 
-} 
- 
- 
-static unsigned ZSTD_highbit(U32 val); 
- 
-/** ZSTD_checkCParams_advanced() : 
-    temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */ 
-size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize) 
-{ 
-    if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams); 
-    if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported); 
-    if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */ 
-    if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN;    /* fake value - temporary work around */ 
-    if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN;       /* fake value - temporary work around */ 
-    return ZSTD_checkCParams(cParams); 
-} 
- 
- 
-/** ZSTD_adjustParams() : 
-    optimize params for q given input (`srcSize` and `dictSize`). 
-    mostly downsizing to reduce memory consumption and initialization. 
-    Both `srcSize` and `dictSize` are optional (use 0 if unknown), 
-    but if both are 0, no optimization can be done. 
-    Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ 
-void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize) 
-{ 
-    if (srcSize+dictSize == 0) return;   /* no size information available : no adjustment */ 
- 
-    /* resize params, to use less memory when necessary */ 
-    {   U32 const minSrcSize = (srcSize==0) ? 500 : 0; 
-        U64 const rSize = srcSize + dictSize + minSrcSize; 
-        if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) { 
-            U32 const srcLog = ZSTD_highbit((U32)(rSize)-1) + 1; 
-            if (params->windowLog > srcLog) params->windowLog = srcLog; 
-    }   } 
-    if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; 
-    {   U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); 
-        U32 const maxChainLog = params->windowLog+btPlus; 
-        if (params->chainLog > maxChainLog) params->chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */ 
- 
-    if (params->windowLog  < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */ 
-    if ((params->hashLog  < ZSTD_HASHLOG_MIN) && ((U32)params->strategy >= (U32)ZSTD_btlazy2)) params->hashLog = ZSTD_HASHLOG_MIN;  /* required to ensure collision resistance in bt */ 
-} 
- 
- 
-size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams)   /* hidden interface, for paramagrill */ 
-{ 
-    ZSTD_CCtx* zc = ZSTD_createCCtx(); 
-    ZSTD_parameters params; 
-    params.cParams = cParams; 
-    params.fParams.contentSizeFlag = 1; 
-    ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0); 
-    { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize; 
-      ZSTD_freeCCtx(zc); 
-      return ccsize; } 
-} 
- 
-/*! ZSTD_resetCCtx_advanced() : 
-    note : 'params' is expected to be validated */ 
-static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, 
-                                       ZSTD_parameters params, U32 reset) 
-{   /* note : params considered validated here */ 
-    const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); 
-    const U32    divider = (params.cParams.searchLength==3) ? 3 : 4; 
-    const size_t maxNbSeq = blockSize / divider; 
-    const size_t tokenSpace = blockSize + 11*maxNbSeq; 
-    const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); 
+#include "zstd_internal.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+static const U32 g_searchStrength = 8;   /* control skip over incompressible data */
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
+
+
+/*-*************************************
+*  Sequence storage
+***************************************/
+static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->offset = ssPtr->offsetStart;
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->litLength = ssPtr->litLengthStart;
+    ssPtr->matchLength = ssPtr->matchLengthStart;
+    ssPtr->longLengthID = 0;
+}
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CCtx_s
+{
+    const BYTE* nextSrc;    /* next block here to continue on current prefix */
+    const BYTE* base;       /* All regular indexes relative to this position */
+    const BYTE* dictBase;   /* extDict indexes relative to this position */
+    U32   dictLimit;        /* below that point, need extDict */
+    U32   lowLimit;         /* below that point, no more data */
+    U32   nextToUpdate;     /* index from which to continue dictionary update */
+    U32   nextToUpdate3;    /* index from which to continue dictionary update */
+    U32   hashLog3;         /* dispatch table : larger == faster, more memory */
+    U32   loadedDictEnd;
+    U32   stage;            /* 0: created; 1: init,dictLoad; 2:started */
+    ZSTD_parameters params;
+    void* workSpace;
+    size_t workSpaceSize;
+    size_t blockSize;
+
+    seqStore_t seqStore;    /* sequences storage ptrs */
+    U32* hashTable;
+    U32* hashTable3;
+    U32* chainTable;
+    HUF_CElt* hufTable;
+    U32 flagStaticTables;
+    FSE_CTable offcodeCTable   [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable [FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable   [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+};
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx));
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    free(cctx->workSpace);
+    free(cctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
+{
+    return &(ctx->seqStore);
+}
+
+
+#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; }
+#define CLAMPCHECK(val,min,max) { if ((val<min) || (val>max)) return ERROR(compressionParameter_unsupported); }
+
+/** ZSTD_checkParams() :
+    ensure param values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+    CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
+    CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+    CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+    { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
+      U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
+      CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
+    CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported);
+    return 0;
+}
+
+
+static unsigned ZSTD_highbit(U32 val);
+
+/** ZSTD_checkCParams_advanced() :
+    temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */
+size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize)
+{
+    if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams);
+    if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported);
+    if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */
+    if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN;    /* fake value - temporary work around */
+    if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN;       /* fake value - temporary work around */
+    return ZSTD_checkCParams(cParams);
+}
+
+
+/** ZSTD_adjustParams() :
+    optimize params for q given input (`srcSize` and `dictSize`).
+    mostly downsizing to reduce memory consumption and initialization.
+    Both `srcSize` and `dictSize` are optional (use 0 if unknown),
+    but if both are 0, no optimization can be done.
+    Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
+void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize)
+{
+    if (srcSize+dictSize == 0) return;   /* no size information available : no adjustment */
+
+    /* resize params, to use less memory when necessary */
+    {   U32 const minSrcSize = (srcSize==0) ? 500 : 0;
+        U64 const rSize = srcSize + dictSize + minSrcSize;
+        if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
+            U32 const srcLog = ZSTD_highbit((U32)(rSize)-1) + 1;
+            if (params->windowLog > srcLog) params->windowLog = srcLog;
+    }   }
+    if (params->hashLog > params->windowLog) params->hashLog = params->windowLog;
+    {   U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt);
+        U32 const maxChainLog = params->windowLog+btPlus;
+        if (params->chainLog > maxChainLog) params->chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */
+
+    if (params->windowLog  < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
+    if ((params->hashLog  < ZSTD_HASHLOG_MIN) && ((U32)params->strategy >= (U32)ZSTD_btlazy2)) params->hashLog = ZSTD_HASHLOG_MIN;  /* required to ensure collision resistance in bt */
+}
+
+
+size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams)   /* hidden interface, for paramagrill */
+{
+    ZSTD_CCtx* zc = ZSTD_createCCtx();
+    ZSTD_parameters params;
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0);
+    { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize;
+      ZSTD_freeCCtx(zc);
+      return ccsize; }
+}
+
+/*! ZSTD_resetCCtx_advanced() :
+    note : 'params' is expected to be validated */
+static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
+                                       ZSTD_parameters params, U32 reset)
+{   /* note : params considered validated here */
+    const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
+    const U32    divider = (params.cParams.searchLength==3) ? 3 : 4;
+    const size_t maxNbSeq = blockSize / divider;
+    const size_t tokenSpace = blockSize + 11*maxNbSeq;
+    const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
     const size_t hSize = ((size_t)1) << params.cParams.hashLog;
-    const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; 
-    const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 
- 
-    /* Check if workSpace is large enough, alloc a new one if needed */ 
-    {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32) 
-                              + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); 
-        size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace 
-                              + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0); 
-        if (zc->workSpaceSize < neededSpace) { 
-            free(zc->workSpace); 
-            zc->workSpace = malloc(neededSpace); 
-            if (zc->workSpace == NULL) return ERROR(memory_allocation); 
-            zc->workSpaceSize = neededSpace; 
-    }   } 
- 
-    if (reset) memset(zc->workSpace, 0, tableSpace );   /* reset only tables */ 
-    zc->hashTable3 = (U32*)(zc->workSpace); 
-    zc->hashTable = zc->hashTable3 + h3Size; 
-    zc->chainTable = zc->hashTable + hSize; 
-    zc->seqStore.buffer = zc->chainTable + chainSize; 
-    zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; 
-    zc->flagStaticTables = 0; 
-    zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256; 
- 
-    zc->nextToUpdate = 1; 
-    zc->nextSrc = NULL; 
-    zc->base = NULL; 
-    zc->dictBase = NULL; 
-    zc->dictLimit = 0; 
-    zc->lowLimit = 0; 
-    zc->params = params; 
-    zc->blockSize = blockSize; 
- 
-    if (params.cParams.strategy == ZSTD_btopt) { 
-        zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); 
-        zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits); 
-        zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); 
-        zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); 
-        zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1))); 
-        zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); 
-        zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; 
-        zc->seqStore.litLengthSum = 0; 
-    } 
-    zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); 
-    zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); 
-    zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); 
-    zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq); 
-    zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; 
-    zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; 
-    zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; 
- 
-    zc->stage = 1; 
-    zc->loadedDictEnd = 0; 
- 
-    return 0; 
-} 
- 
- 
-/*! ZSTD_copyCCtx() : 
-*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`. 
-*   Only works during stage 1 (i.e. after creation, but before first call to ZSTD_compressContinue()). 
-*   @return : 0, or an error code */ 
-size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) 
-{ 
-    if (srcCCtx->stage!=1) return ERROR(stage_wrong); 
- 
-    dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */ 
-    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, 0); 
-    dstCCtx->params.fParams.contentSizeFlag = 0;   /* content size different from the one set during srcCCtx init */ 
- 
-    /* copy tables */ 
-    {   const size_t chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); 
+    const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
+    const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+
+    /* Check if workSpace is large enough, alloc a new one if needed */
+    {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+                              + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
+        size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
+                              + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+        if (zc->workSpaceSize < neededSpace) {
+            free(zc->workSpace);
+            zc->workSpace = malloc(neededSpace);
+            if (zc->workSpace == NULL) return ERROR(memory_allocation);
+            zc->workSpaceSize = neededSpace;
+    }   }
+
+    if (reset) memset(zc->workSpace, 0, tableSpace );   /* reset only tables */
+    zc->hashTable3 = (U32*)(zc->workSpace);
+    zc->hashTable = zc->hashTable3 + h3Size;
+    zc->chainTable = zc->hashTable + hSize;
+    zc->seqStore.buffer = zc->chainTable + chainSize;
+    zc->hufTable = (HUF_CElt*)zc->seqStore.buffer;
+    zc->flagStaticTables = 0;
+    zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256;
+
+    zc->nextToUpdate = 1;
+    zc->nextSrc = NULL;
+    zc->base = NULL;
+    zc->dictBase = NULL;
+    zc->dictLimit = 0;
+    zc->lowLimit = 0;
+    zc->params = params;
+    zc->blockSize = blockSize;
+
+    if (params.cParams.strategy == ZSTD_btopt) {
+        zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer);
+        zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
+        zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
+        zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
+        zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1)));
+        zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1));
+        zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
+        zc->seqStore.litLengthSum = 0;
+    }
+    zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
+    zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq);
+    zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq);
+    zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq);
+    zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq;
+    zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq;
+    zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq;
+
+    zc->stage = 1;
+    zc->loadedDictEnd = 0;
+
+    return 0;
+}
+
+
+/*! ZSTD_copyCCtx() :
+*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+*   Only works during stage 1 (i.e. after creation, but before first call to ZSTD_compressContinue()).
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
+{
+    if (srcCCtx->stage!=1) return ERROR(stage_wrong);
+
+    dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */
+    ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, 0);
+    dstCCtx->params.fParams.contentSizeFlag = 0;   /* content size different from the one set during srcCCtx init */
+
+    /* copy tables */
+    {   const size_t chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
         const size_t hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
-        const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0; 
-        const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); 
-        memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); 
-    } 
- 
-    /* copy dictionary pointers */ 
-    dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; 
-    dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; 
-    dstCCtx->nextSrc      = srcCCtx->nextSrc; 
-    dstCCtx->base         = srcCCtx->base; 
-    dstCCtx->dictBase     = srcCCtx->dictBase; 
-    dstCCtx->dictLimit    = srcCCtx->dictLimit; 
-    dstCCtx->lowLimit     = srcCCtx->lowLimit; 
-    dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; 
- 
-    /* copy entropy tables */ 
-    dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; 
-    if (srcCCtx->flagStaticTables) { 
-        memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); 
-        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); 
-        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); 
-        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); 
-    } 
- 
-    return 0; 
-} 
- 
- 
-/*! ZSTD_reduceTable() : 
-*   reduce table indexes by `reducerValue` */ 
-static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) 
-{ 
-    U32 u; 
-    for (u=0 ; u < size ; u++) { 
-        if (table[u] < reducerValue) table[u] = 0; 
-        else table[u] -= reducerValue; 
-    } 
-} 
- 
-/*! ZSTD_reduceIndex() : 
-*   rescale all indexes to avoid future overflow (indexes are U32) */ 
-static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) 
-{ 
-    { const U32 hSize = 1 << zc->params.cParams.hashLog; 
-      ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } 
- 
-    { const U32 chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); 
-      ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } 
- 
-    { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; 
-      ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } 
-} 
- 
- 
-/*-******************************************************* 
-*  Block entropic compression 
-*********************************************************/ 
- 
-/* Frame format description 
-   Frame Header -  [ Block Header - Block ] - Frame End 
-   1) Frame Header 
-      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) 
-      - 1 byte  - Frame Descriptor 
-   2) Block Header 
-      - 3 bytes, starting with a 2-bits descriptor 
-                 Uncompressed, Compressed, Frame End, unused 
-   3) Block 
-      See Block Format Description 
-   4) Frame End 
-      - 3 bytes, compatible with Block Header 
-*/ 
- 
- 
-/* Frame descriptor 
- 
-   1 byte, using : 
-   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h) 
-   bit 4   : minmatch 4(0) or 3(1) 
-   bit 5   : reserved (must be zero) 
-   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes 
- 
-   Optional : content size (0, 1, 2 or 8 bytes) 
-   0 : unknown 
-   1 : 0-255 bytes 
-   2 : 256 - 65535+256 
-   8 : up to 16 exa 
-*/ 
- 
- 
-/* Block format description 
- 
-   Block = Literal Section - Sequences Section 
-   Prerequisite : size of (compressed) block, maximum size of regenerated data 
- 
-   1) Literal Section 
- 
-   1.1) Header : 1-5 bytes 
-        flags: 2 bits 
-            00 compressed by Huff0 
-            01 unused 
-            10 is Raw (uncompressed) 
-            11 is Rle 
-            Note : using 01 => Huff0 with precomputed table ? 
-            Note : delta map ? => compressed ? 
- 
-   1.1.1) Huff0-compressed literal block : 3-5 bytes 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) 
-            srcSize < 16KB => 4 bytes (2-2-14-14) 
-            else           => 5 bytes (2-2-18-18) 
-            big endian convention 
- 
-   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes 
-        size :  5 bits: (IS_RAW<<6) + (0<<4) + size 
-               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) 
-                        size&255 
-               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) 
-                        size>>8&255 
-                        size&255 
- 
-   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes 
-        size :  5 bits: (IS_RLE<<6) + (0<<4) + size 
-               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) 
-                        size&255 
-               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) 
-                        size>>8&255 
-                        size&255 
- 
-   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) 
-            srcSize < 16KB => 4 bytes (2-2-14-14) 
-            else           => 5 bytes (2-2-18-18) 
-            big endian convention 
- 
-        1- CTable available (stored into workspace ?) 
-        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) 
- 
- 
-   1.2) Literal block content 
- 
-   1.2.1) Huff0 block, using sizes from header 
-        See Huff0 format 
- 
-   1.2.2) Huff0 block, using prepared table 
- 
-   1.2.3) Raw content 
- 
-   1.2.4) single byte 
- 
- 
-   2) Sequences section 
- 
-      - Nb Sequences : 2 bytes, little endian 
-      - Control Token : 1 byte (see below) 
-      - Dumps Length : 1 or 2 bytes (depending on control token) 
-      - Dumps : as stated by dumps length 
-      - Literal Lengths FSE table (as needed depending on encoding method) 
-      - Offset Codes FSE table (as needed depending on encoding method) 
-      - Match Lengths FSE table (as needed depending on encoding method) 
- 
-    2.1) Control Token 
-      8 bits, divided as : 
-      0-1 : dumpsLength 
-      2-3 : MatchLength, FSE encoding method 
-      4-5 : Offset Codes, FSE encoding method 
-      6-7 : Literal Lengths, FSE encoding method 
- 
-      FSE encoding method : 
-      FSE_ENCODING_RAW : uncompressed; no header 
-      FSE_ENCODING_RLE : single repeated value; header 1 byte 
-      FSE_ENCODING_STATIC : use prepared table; no header 
-      FSE_ENCODING_DYNAMIC : read NCount 
-*/ 
- 
-size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    BYTE* const ostart = (BYTE* const)dst; 
- 
-    if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); 
-    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); 
- 
-    /* Build header */ 
-    ostart[0]  = (BYTE)(srcSize>>16); 
-    ostart[1]  = (BYTE)(srcSize>>8); 
-    ostart[2]  = (BYTE) srcSize; 
-    ostart[0] += (BYTE)(bt_raw<<6);   /* is a raw (uncompressed) block */ 
- 
-    return ZSTD_blockHeaderSize+srcSize; 
-} 
- 
- 
-static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    BYTE* const ostart = (BYTE* const)dst; 
-    U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 
- 
-    if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); 
- 
-    switch(flSize) 
-    { 
-        case 1: /* 2 - 1 - 5 */ 
-            ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize); 
-            break; 
-        case 2: /* 2 - 2 - 12 */ 
-            ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8)); 
-            ostart[1] = (BYTE)srcSize; 
-            break; 
-        default:   /*note : should not be necessary : flSize is within {1,2,3} */ 
-        case 3: /* 2 - 2 - 20 */ 
-            ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16)); 
-            ostart[1] = (BYTE)(srcSize>>8); 
-            ostart[2] = (BYTE)srcSize; 
-            break; 
-    } 
- 
-    memcpy(ostart + flSize, src, srcSize); 
-    return srcSize + flSize; 
-} 
- 
-static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    BYTE* const ostart = (BYTE* const)dst; 
-    U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 
- 
-    (void)dstCapacity;  /* dstCapacity guaranteed to be >=4, hence large enough */ 
- 
-    switch(flSize) 
-    { 
-        case 1: /* 2 - 1 - 5 */ 
-            ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize); 
-            break; 
-        case 2: /* 2 - 2 - 12 */ 
-            ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8)); 
-            ostart[1] = (BYTE)srcSize; 
-            break; 
-        default:   /*note : should not be necessary : flSize is necessarily within {1,2,3} */ 
-        case 3: /* 2 - 2 - 20 */ 
-            ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16)); 
-            ostart[1] = (BYTE)(srcSize>>8); 
-            ostart[2] = (BYTE)srcSize; 
-            break; 
-    } 
- 
-    ostart[flSize] = *(const BYTE*)src; 
-    return flSize+1; 
-} 
- 
- 
-static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } 
- 
-static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, 
-                                     void* dst, size_t dstCapacity, 
-                               const void* src, size_t srcSize) 
-{ 
-    size_t const minGain = ZSTD_minGain(srcSize); 
-    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); 
-    BYTE* const ostart = (BYTE*)dst; 
-    U32 singleStream = srcSize < 256; 
-    U32 hType = IS_HUF; 
-    size_t cLitSize; 
- 
- 
-    /* small ? don't even attempt compression (speed opt) */ 
-#   define LITERAL_NOENTROPY 63 
-    {   size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; 
-        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 
-    } 
- 
-    if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */ 
-    if (zc->flagStaticTables && (lhSize==3)) { 
-        hType = IS_PCH; 
-        singleStream = 1; 
-        cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); 
-    } else { 
-        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12) 
-                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12); 
-    } 
- 
-    if ((cLitSize==0) || (cLitSize >= srcSize - minGain)) 
-        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 
-    if (cLitSize==1) 
-        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); 
- 
-    /* Build header */ 
-    switch(lhSize) 
-    { 
-    case 3: /* 2 - 2 - 10 - 10 */ 
-        ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6)); 
-        ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8)); 
-        ostart[2] = (BYTE)(cLitSize); 
-        break; 
-    case 4: /* 2 - 2 - 14 - 14 */ 
-        ostart[0] = (BYTE)((srcSize>>10) + (2<<4) +  (hType<<6)); 
-        ostart[1] = (BYTE)(srcSize>> 2); 
-        ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8)); 
-        ostart[3] = (BYTE)(cLitSize); 
-        break; 
-    default:   /* should not be necessary, lhSize is only {3,4,5} */ 
-    case 5: /* 2 - 2 - 18 - 18 */ 
-        ostart[0] = (BYTE)((srcSize>>14) + (3<<4) +  (hType<<6)); 
-        ostart[1] = (BYTE)(srcSize>>6); 
-        ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16)); 
-        ostart[3] = (BYTE)(cLitSize>>8); 
-        ostart[4] = (BYTE)(cLitSize); 
-        break; 
-    } 
-    return lhSize+cLitSize; 
-} 
- 
- 
-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq) 
-{ 
-    /* LL codes */ 
-    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7, 
-                                           8,  9, 10, 11, 12, 13, 14, 15, 
-                                          16, 16, 17, 17, 18, 18, 19, 19, 
-                                          20, 20, 20, 20, 21, 21, 21, 21, 
-                                          22, 22, 22, 22, 22, 22, 22, 22, 
-                                          23, 23, 23, 23, 23, 23, 23, 23, 
-                                          24, 24, 24, 24, 24, 24, 24, 24, 
-                                          24, 24, 24, 24, 24, 24, 24, 24 }; 
-        const BYTE LL_deltaCode = 19; 
-        const U16* const llTable = seqStorePtr->litLengthStart; 
-        BYTE* const llCodeTable = seqStorePtr->llCodeStart; 
-        size_t u; 
-        for (u=0; u<nbSeq; u++) { 
-            U32 const  ll = llTable[u]; 
-            llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; 
-        } 
-        if (seqStorePtr->longLengthID==1) 
-            llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 
-    } 
- 
-    /* Offset codes */ 
-    {   const U32* const offsetTable = seqStorePtr->offsetStart; 
-        BYTE* const ofCodeTable = seqStorePtr->offCodeStart; 
-        size_t u; 
-        for (u=0; u<nbSeq; u++) ofCodeTable[u] = (BYTE)ZSTD_highbit(offsetTable[u]); 
-    } 
- 
-    /* ML codes */ 
-    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
-                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 
-                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 
-                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 
-                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 
-                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 
-                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 
-                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; 
-        const BYTE ML_deltaCode = 36; 
-        const U16* const mlTable = seqStorePtr->matchLengthStart; 
-        BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; 
-        size_t u; 
-        for (u=0; u<nbSeq; u++) { 
-            U32 const ml = mlTable[u]; 
-            mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; 
-        } 
-        if (seqStorePtr->longLengthID==2) 
-            mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 
-    } 
-} 
- 
- 
-size_t ZSTD_compressSequences(ZSTD_CCtx* zc, 
-                              void* dst, size_t dstCapacity, 
-                              size_t srcSize) 
-{ 
-    const seqStore_t* seqStorePtr = &(zc->seqStore); 
-    U32 count[MaxSeq+1]; 
-    S16 norm[MaxSeq+1]; 
-    FSE_CTable* CTable_LitLength = zc->litlengthCTable; 
-    FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; 
-    FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; 
-    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */ 
-    U16*  const llTable = seqStorePtr->litLengthStart; 
-    U16*  const mlTable = seqStorePtr->matchLengthStart; 
-    const U32*  const offsetTable = seqStorePtr->offsetStart; 
-    const U32*  const offsetTableEnd = seqStorePtr->offset; 
-    BYTE* const ofCodeTable = seqStorePtr->offCodeStart; 
-    BYTE* const llCodeTable = seqStorePtr->llCodeStart; 
-    BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; 
-    BYTE* const ostart = (BYTE*)dst; 
-    BYTE* const oend = ostart + dstCapacity; 
-    BYTE* op = ostart; 
-    size_t const nbSeq = offsetTableEnd - offsetTable; 
-    BYTE* seqHead; 
- 
-    /* Compress literals */ 
-    {   const BYTE* const literals = seqStorePtr->litStart; 
-        size_t const litSize = seqStorePtr->lit - literals; 
-        size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); 
-        if (ZSTD_isError(cSize)) return cSize; 
-        op += cSize; 
-    } 
- 
-    /* Sequences Header */ 
-    if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); 
-    if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; 
-    else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 
-    else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 
-    if (nbSeq==0) goto _check_compressibility; 
- 
-    /* seqHead : flags for FSE encoding type */ 
-    seqHead = op++; 
- 
-#define MIN_SEQ_FOR_DYNAMIC_FSE   64 
-#define MAX_SEQ_FOR_STATIC_FSE  1000 
- 
-    /* convert length/distances into codes */ 
-    ZSTD_seqToCodes(seqStorePtr, nbSeq); 
- 
-    /* CTable for Literal Lengths */ 
-    {   U32 max = MaxLL; 
-        size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); 
-        if ((mostFrequent == nbSeq) && (nbSeq > 2)) { 
-            *op++ = llCodeTable[0]; 
-            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); 
-            LLtype = FSE_ENCODING_RLE; 
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { 
-            LLtype = FSE_ENCODING_STATIC; 
-        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { 
-            FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); 
-            LLtype = FSE_ENCODING_RAW; 
-        } else { 
-            size_t nbSeq_1 = nbSeq; 
-            const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); 
-            if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } 
-            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); 
-            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */ 
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC); 
-              op += NCountSize; } 
-            FSE_buildCTable(CTable_LitLength, norm, max, tableLog); 
-            LLtype = FSE_ENCODING_DYNAMIC; 
-    }   } 
- 
-    /* CTable for Offsets */ 
-    {   U32 max = MaxOff; 
-        size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); 
-        if ((mostFrequent == nbSeq) && (nbSeq > 2)) { 
-            *op++ = ofCodeTable[0]; 
-            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); 
-            Offtype = FSE_ENCODING_RLE; 
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { 
-            Offtype = FSE_ENCODING_STATIC; 
-        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { 
-            FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); 
-            Offtype = FSE_ENCODING_RAW; 
-        } else { 
-            size_t nbSeq_1 = nbSeq; 
-            const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); 
-            if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } 
-            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); 
-            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */ 
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC); 
-              op += NCountSize; } 
-            FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); 
-            Offtype = FSE_ENCODING_DYNAMIC; 
-    }   } 
- 
-    /* CTable for MatchLengths */ 
-    {   U32 max = MaxML; 
-        size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); 
-        if ((mostFrequent == nbSeq) && (nbSeq > 2)) { 
-            *op++ = *mlCodeTable; 
-            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); 
-            MLtype = FSE_ENCODING_RLE; 
-        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { 
-            MLtype = FSE_ENCODING_STATIC; 
-        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { 
-            FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); 
-            MLtype = FSE_ENCODING_RAW; 
-        } else { 
-            size_t nbSeq_1 = nbSeq; 
-            const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); 
-            if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } 
-            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); 
-            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */ 
-              if (FSE_isError(NCountSize)) return ERROR(GENERIC); 
-              op += NCountSize; } 
-            FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); 
-            MLtype = FSE_ENCODING_DYNAMIC; 
-    }   } 
- 
-    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 
-    zc->flagStaticTables = 0; 
- 
-    /* Encoding Sequences */ 
-    {   BIT_CStream_t blockStream; 
-        FSE_CState_t  stateMatchLength; 
-        FSE_CState_t  stateOffsetBits; 
-        FSE_CState_t  stateLitLength; 
- 
-        { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); 
-          if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); }   /* not enough space remaining */ 
- 
-        /* first symbols */ 
-        FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); 
-        FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]); 
-        FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]); 
-        BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); 
-        if (MEM_32bits()) BIT_flushBits(&blockStream); 
-        BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); 
-        if (MEM_32bits()) BIT_flushBits(&blockStream); 
-        BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]); 
-        BIT_flushBits(&blockStream); 
- 
-        {   size_t n; 
-            for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */ 
-                const BYTE ofCode = ofCodeTable[n]; 
-                const BYTE mlCode = mlCodeTable[n]; 
-                const BYTE llCode = llCodeTable[n]; 
-                const U32  llBits = LL_bits[llCode]; 
-                const U32  mlBits = ML_bits[mlCode]; 
-                const U32  ofBits = ofCode;                                     /* 32b*/  /* 64b*/ 
-                                                                                /* (7)*/  /* (7)*/ 
-                FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */ 
-                FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */ 
-                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/ 
-                FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */ 
-                if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) 
-                    BIT_flushBits(&blockStream);                                /* (7)*/ 
-                BIT_addBits(&blockStream, llTable[n], llBits); 
-                if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); 
-                BIT_addBits(&blockStream, mlTable[n], mlBits); 
-                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/ 
-                BIT_addBits(&blockStream, offsetTable[n], ofBits);              /* 31 */ 
-                BIT_flushBits(&blockStream);                                    /* (7)*/ 
-        }   } 
- 
-        FSE_flushCState(&blockStream, &stateMatchLength); 
-        FSE_flushCState(&blockStream, &stateOffsetBits); 
-        FSE_flushCState(&blockStream, &stateLitLength); 
- 
-        {   size_t const streamSize = BIT_closeCStream(&blockStream); 
-            if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */ 
-            op += streamSize; 
-    }   } 
- 
-    /* check compressibility */ 
-_check_compressibility: 
-    { size_t const minGain = ZSTD_minGain(srcSize); 
-      size_t const maxCSize = srcSize - minGain; 
-      if ((size_t)(op-ostart) >= maxCSize) return 0; } 
- 
-    return op - ostart; 
-} 
- 
- 
-/*! ZSTD_storeSeq() : 
-    Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. 
-    `offsetCode` : distance to match, or 0 == repCode. 
-    `matchCode` : matchLength - MINMATCH 
-*/ 
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) 
-{ 
-#if 0  /* for debug */ 
-    static const BYTE* g_start = NULL; 
-    const U32 pos = (U32)(literals - g_start); 
-    if (g_start==NULL) g_start = literals; 
+        const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0;
+        const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+        memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
+    }
+
+    /* copy dictionary pointers */
+    dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
+    dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
+    dstCCtx->nextSrc      = srcCCtx->nextSrc;
+    dstCCtx->base         = srcCCtx->base;
+    dstCCtx->dictBase     = srcCCtx->dictBase;
+    dstCCtx->dictLimit    = srcCCtx->dictLimit;
+    dstCCtx->lowLimit     = srcCCtx->lowLimit;
+    dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
+
+    /* copy entropy tables */
+    dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
+    if (srcCCtx->flagStaticTables) {
+        memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
+        memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
+        memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
+        memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
+    }
+
+    return 0;
+}
+
+
+/*! ZSTD_reduceTable() :
+*   reduce table indexes by `reducerValue` */
+static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
+{
+    U32 u;
+    for (u=0 ; u < size ; u++) {
+        if (table[u] < reducerValue) table[u] = 0;
+        else table[u] -= reducerValue;
+    }
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
+{
+    { const U32 hSize = 1 << zc->params.cParams.hashLog;
+      ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
+
+    { const U32 chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
+      ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
+
+    { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
+      ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
+}
+
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h)
+      - 1 byte  - Frame Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+
+/* Frame descriptor
+
+   1 byte, using :
+   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
+   bit 4   : minmatch 4(0) or 3(1)
+   bit 5   : reserved (must be zero)
+   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
+
+   Optional : content size (0, 1, 2 or 8 bytes)
+   0 : unknown
+   1 : 0-255 bytes
+   2 : 256 - 65535+256
+   8 : up to 16 exa
+*/
+
+
+/* Block format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+
+      - Nb Sequences : 2 bytes, little endian
+      - Control Token : 1 byte (see below)
+      - Dumps Length : 1 or 2 bytes (depending on control token)
+      - Dumps : as stated by dumps length
+      - Literal Lengths FSE table (as needed depending on encoding method)
+      - Offset Codes FSE table (as needed depending on encoding method)
+      - Match Lengths FSE table (as needed depending on encoding method)
+
+    2.1) Control Token
+      8 bits, divided as :
+      0-1 : dumpsLength
+      2-3 : MatchLength, FSE encoding method
+      4-5 : Offset Codes, FSE encoding method
+      6-7 : Literal Lengths, FSE encoding method
+
+      FSE encoding method :
+      FSE_ENCODING_RAW : uncompressed; no header
+      FSE_ENCODING_RLE : single repeated value; header 1 byte
+      FSE_ENCODING_STATIC : use prepared table; no header
+      FSE_ENCODING_DYNAMIC : read NCount
+*/
+
+size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+
+    if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
+
+    /* Build header */
+    ostart[0]  = (BYTE)(srcSize>>16);
+    ostart[1]  = (BYTE)(srcSize>>8);
+    ostart[2]  = (BYTE) srcSize;
+    ostart[0] += (BYTE)(bt_raw<<6);   /* is a raw (uncompressed) block */
+
+    return ZSTD_blockHeaderSize+srcSize;
+}
+
+
+static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize);
+            break;
+        case 2: /* 2 - 2 - 12 */
+            ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8));
+            ostart[1] = (BYTE)srcSize;
+            break;
+        default:   /*note : should not be necessary : flSize is within {1,2,3} */
+        case 3: /* 2 - 2 - 20 */
+            ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16));
+            ostart[1] = (BYTE)(srcSize>>8);
+            ostart[2] = (BYTE)srcSize;
+            break;
+    }
+
+    memcpy(ostart + flSize, src, srcSize);
+    return srcSize + flSize;
+}
+
+static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize);
+            break;
+        case 2: /* 2 - 2 - 12 */
+            ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8));
+            ostart[1] = (BYTE)srcSize;
+            break;
+        default:   /*note : should not be necessary : flSize is necessarily within {1,2,3} */
+        case 3: /* 2 - 2 - 20 */
+            ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16));
+            ostart[1] = (BYTE)(srcSize>>8);
+            ostart[2] = (BYTE)srcSize;
+            break;
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    return flSize+1;
+}
+
+
+static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
+
+static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize)
+{
+    size_t const minGain = ZSTD_minGain(srcSize);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE* const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    U32 hType = IS_HUF;
+    size_t cLitSize;
+
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define LITERAL_NOENTROPY 63
+    {   size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
+    if (zc->flagStaticTables && (lhSize==3)) {
+        hType = IS_PCH;
+        singleStream = 1;
+        cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
+    } else {
+        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12)
+                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12);
+    }
+
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain))
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    if (cLitSize==1)
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6));
+        ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8));
+        ostart[2] = (BYTE)(cLitSize);
+        break;
+    case 4: /* 2 - 2 - 14 - 14 */
+        ostart[0] = (BYTE)((srcSize>>10) + (2<<4) +  (hType<<6));
+        ostart[1] = (BYTE)(srcSize>> 2);
+        ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8));
+        ostart[3] = (BYTE)(cLitSize);
+        break;
+    default:   /* should not be necessary, lhSize is only {3,4,5} */
+    case 5: /* 2 - 2 - 18 - 18 */
+        ostart[0] = (BYTE)((srcSize>>14) + (3<<4) +  (hType<<6));
+        ostart[1] = (BYTE)(srcSize>>6);
+        ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16));
+        ostart[3] = (BYTE)(cLitSize>>8);
+        ostart[4] = (BYTE)(cLitSize);
+        break;
+    }
+    return lhSize+cLitSize;
+}
+
+
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq)
+{
+    /* LL codes */
+    {   static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                           8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 16, 17, 17, 18, 18, 19, 19,
+                                          20, 20, 20, 20, 21, 21, 21, 21,
+                                          22, 22, 22, 22, 22, 22, 22, 22,
+                                          23, 23, 23, 23, 23, 23, 23, 23,
+                                          24, 24, 24, 24, 24, 24, 24, 24,
+                                          24, 24, 24, 24, 24, 24, 24, 24 };
+        const BYTE LL_deltaCode = 19;
+        const U16* const llTable = seqStorePtr->litLengthStart;
+        BYTE* const llCodeTable = seqStorePtr->llCodeStart;
+        size_t u;
+        for (u=0; u<nbSeq; u++) {
+            U32 const  ll = llTable[u];
+            llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll];
+        }
+        if (seqStorePtr->longLengthID==1)
+            llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    }
+
+    /* Offset codes */
+    {   const U32* const offsetTable = seqStorePtr->offsetStart;
+        BYTE* const ofCodeTable = seqStorePtr->offCodeStart;
+        size_t u;
+        for (u=0; u<nbSeq; u++) ofCodeTable[u] = (BYTE)ZSTD_highbit(offsetTable[u]);
+    }
+
+    /* ML codes */
+    {   static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                          32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                          38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                          40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                          41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                          42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+        const BYTE ML_deltaCode = 36;
+        const U16* const mlTable = seqStorePtr->matchLengthStart;
+        BYTE* const mlCodeTable = seqStorePtr->mlCodeStart;
+        size_t u;
+        for (u=0; u<nbSeq; u++) {
+            U32 const ml = mlTable[u];
+            mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml];
+        }
+        if (seqStorePtr->longLengthID==2)
+            mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+    }
+}
+
+
+size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
+                              void* dst, size_t dstCapacity,
+                              size_t srcSize)
+{
+    const seqStore_t* seqStorePtr = &(zc->seqStore);
+    U32 count[MaxSeq+1];
+    S16 norm[MaxSeq+1];
+    FSE_CTable* CTable_LitLength = zc->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    U16*  const llTable = seqStorePtr->litLengthStart;
+    U16*  const mlTable = seqStorePtr->matchLengthStart;
+    const U32*  const offsetTable = seqStorePtr->offsetStart;
+    const U32*  const offsetTableEnd = seqStorePtr->offset;
+    BYTE* const ofCodeTable = seqStorePtr->offCodeStart;
+    BYTE* const llCodeTable = seqStorePtr->llCodeStart;
+    BYTE* const mlCodeTable = seqStorePtr->mlCodeStart;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t const nbSeq = offsetTableEnd - offsetTable;
+    BYTE* seqHead;
+
+    /* Compress literals */
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = seqStorePtr->lit - literals;
+        size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
+    if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) goto _check_compressibility;
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+#define MIN_SEQ_FOR_DYNAMIC_FSE   64
+#define MAX_SEQ_FOR_STATIC_FSE  1000
+
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr, nbSeq);
+
+    /* CTable for Literal Lengths */
+    {   U32 max = MaxLL;
+        size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
+        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+            *op++ = llCodeTable[0];
+            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+            LLtype = FSE_ENCODING_RLE;
+        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            LLtype = FSE_ENCODING_STATIC;
+        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
+            FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
+            LLtype = FSE_ENCODING_RAW;
+        } else {
+            size_t nbSeq_1 = nbSeq;
+            const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+            if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
+            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              op += NCountSize; }
+            FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+            LLtype = FSE_ENCODING_DYNAMIC;
+    }   }
+
+    /* CTable for Offsets */
+    {   U32 max = MaxOff;
+        size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
+        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+            *op++ = ofCodeTable[0];
+            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+            Offtype = FSE_ENCODING_RLE;
+        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            Offtype = FSE_ENCODING_STATIC;
+        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
+            FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
+            Offtype = FSE_ENCODING_RAW;
+        } else {
+            size_t nbSeq_1 = nbSeq;
+            const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+            if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
+            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              op += NCountSize; }
+            FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+            Offtype = FSE_ENCODING_DYNAMIC;
+    }   }
+
+    /* CTable for MatchLengths */
+    {   U32 max = MaxML;
+        size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
+        if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+            *op++ = *mlCodeTable;
+            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+            MLtype = FSE_ENCODING_RLE;
+        } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            MLtype = FSE_ENCODING_STATIC;
+        } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
+            FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
+            MLtype = FSE_ENCODING_RAW;
+        } else {
+            size_t nbSeq_1 = nbSeq;
+            const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+            if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
+            FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+            { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
+              if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+              op += NCountSize; }
+            FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+            MLtype = FSE_ENCODING_DYNAMIC;
+    }   }
+
+    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+    zc->flagStaticTables = 0;
+
+    /* Encoding Sequences */
+    {   BIT_CStream_t blockStream;
+        FSE_CState_t  stateMatchLength;
+        FSE_CState_t  stateOffsetBits;
+        FSE_CState_t  stateLitLength;
+
+        { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op);
+          if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); }   /* not enough space remaining */
+
+        /* first symbols */
+        FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+        FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+        FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+        BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]);
+        if (MEM_32bits()) BIT_flushBits(&blockStream);
+        BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]);
+        if (MEM_32bits()) BIT_flushBits(&blockStream);
+        BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]);
+        BIT_flushBits(&blockStream);
+
+        {   size_t n;
+            for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+                const BYTE ofCode = ofCodeTable[n];
+                const BYTE mlCode = mlCodeTable[n];
+                const BYTE llCode = llCodeTable[n];
+                const U32  llBits = LL_bits[llCode];
+                const U32  mlBits = ML_bits[mlCode];
+                const U32  ofBits = ofCode;                                     /* 32b*/  /* 64b*/
+                                                                                /* (7)*/  /* (7)*/
+                FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+                FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+                FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+                if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                    BIT_flushBits(&blockStream);                                /* (7)*/
+                BIT_addBits(&blockStream, llTable[n], llBits);
+                if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+                BIT_addBits(&blockStream, mlTable[n], mlBits);
+                if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+                BIT_addBits(&blockStream, offsetTable[n], ofBits);              /* 31 */
+                BIT_flushBits(&blockStream);                                    /* (7)*/
+        }   }
+
+        FSE_flushCState(&blockStream, &stateMatchLength);
+        FSE_flushCState(&blockStream, &stateOffsetBits);
+        FSE_flushCState(&blockStream, &stateLitLength);
+
+        {   size_t const streamSize = BIT_closeCStream(&blockStream);
+            if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
+            op += streamSize;
+    }   }
+
+    /* check compressibility */
+_check_compressibility:
+    { size_t const minGain = ZSTD_minGain(srcSize);
+      size_t const maxCSize = srcSize - minGain;
+      if ((size_t)(op-ostart) >= maxCSize) return 0; }
+
+    return op - ostart;
+}
+
+
+/*! ZSTD_storeSeq() :
+    Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
+    `offsetCode` : distance to match, or 0 == repCode.
+    `matchCode` : matchLength - MINMATCH
+*/
+MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode)
+{
+#if 0  /* for debug */
+    static const BYTE* g_start = NULL;
+    const U32 pos = (U32)(literals - g_start);
+    if (g_start==NULL) g_start = literals;
     if ((pos > 2587900) && (pos < 2588050))
-        printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", 
-               pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); 
-#endif 
-    ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); 
- 
-    /* copy Literals */ 
-    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); 
-    seqStorePtr->lit += litLength; 
- 
-    /* literal Length */ 
-    if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->litLength - seqStorePtr->litLengthStart); } 
-    *seqStorePtr->litLength++ = (U16)litLength; 
- 
-    /* match offset */ 
-    *(seqStorePtr->offset++) = (U32)offsetCode + 1; 
- 
-    /* match Length */ 
-    if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); } 
-    *seqStorePtr->matchLength++ = (U16)matchCode; 
-} 
- 
- 
-/*-************************************* 
-*  Match length counter 
-***************************************/ 
-static unsigned ZSTD_NbCommonBytes (register size_t val) 
-{ 
-    if (MEM_isLittleEndian()) { 
-        if (MEM_64bits()) { 
-#       if defined(_MSC_VER) && defined(_WIN64) 
-            unsigned long r = 0; 
-            _BitScanForward64( &r, (U64)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_ctzll((U64)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; 
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 
-#       endif 
-        } else { /* 32 bits */ 
-#       if defined(_MSC_VER) 
-            unsigned long r=0; 
-            _BitScanForward( &r, (U32)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_ctz((U32)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; 
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 
-#       endif 
-        } 
-    } else {  /* Big Endian CPU */ 
-        if (MEM_64bits()) { 
-#       if defined(_MSC_VER) && defined(_WIN64) 
-            unsigned long r = 0; 
-            _BitScanReverse64( &r, val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_clzll(val) >> 3); 
-#       else 
-            unsigned r; 
-            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */ 
-            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-        } else { /* 32 bits */ 
-#       if defined(_MSC_VER) 
-            unsigned long r = 0; 
-            _BitScanReverse( &r, (unsigned long)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_clz((U32)val) >> 3); 
-#       else 
-            unsigned r; 
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-    }   } 
-} 
- 
- 
-static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) 
-{ 
-    const BYTE* const pStart = pIn; 
- 
-    while ((pIn<pInLimit-(sizeof(size_t)-1))) { 
-        size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 
-        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 
-        pIn += ZSTD_NbCommonBytes(diff); 
-        return (size_t)(pIn - pStart); 
-    } 
-    if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 
-    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 
-    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 
-    return (size_t)(pIn - pStart); 
-} 
- 
-/** ZSTD_count_2segments() : 
-*   can count match length with `ip` & `match` in 2 different segments. 
-*   convention : on reaching mEnd, match count continue starting from iStart 
-*/ 
-static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) 
-{ 
-    size_t matchLength; 
-    const BYTE* vEnd = ip + (mEnd - match); 
-    if (vEnd > iEnd) vEnd = iEnd; 
-    matchLength = ZSTD_count(ip, match, vEnd); 
-    if (match + matchLength == mEnd) 
-        matchLength += ZSTD_count(ip+matchLength, iStart, iEnd); 
-    return matchLength; 
-} 
- 
- 
-/*-************************************* 
-*  Hashes 
-***************************************/ 
-static const U32 prime3bytes = 506832829U; 
-static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; } 
-static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } 
- 
-static const U32 prime4bytes = 2654435761U; 
-static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } 
-static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } 
- 
-static const U64 prime5bytes = 889523592379ULL; 
-static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; } 
-static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } 
- 
-static const U64 prime6bytes = 227718039650203ULL; 
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; } 
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } 
- 
-static const U64 prime7bytes = 58295818150454627ULL; 
-static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; } 
-static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } 
- 
-static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 
-{ 
-    switch(mls) 
-    { 
-    default: 
-    case 4: return ZSTD_hash4Ptr(p, hBits); 
-    case 5: return ZSTD_hash5Ptr(p, hBits); 
-    case 6: return ZSTD_hash6Ptr(p, hBits); 
-    case 7: return ZSTD_hash7Ptr(p, hBits); 
-    } 
-} 
- 
- 
-/*-************************************* 
-*  Fast Scan 
-***************************************/ 
-static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) 
-{ 
-    U32* const hashTable = zc->hashTable; 
-    const U32 hBits = zc->params.cParams.hashLog; 
-    const BYTE* const base = zc->base; 
-    const BYTE* ip = base + zc->nextToUpdate; 
-    const BYTE* const iend = ((const BYTE*)end) - 8; 
-    const size_t fastHashFillStep = 3; 
- 
-    while(ip <= iend) { 
-        hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); 
-        ip += fastHashFillStep; 
-    } 
-} 
- 
- 
-FORCE_INLINE 
-void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, 
-                                 const void* src, size_t srcSize, 
-                                 const U32 mls) 
-{ 
-    U32* const hashTable = zc->hashTable; 
-    const U32 hBits = zc->params.cParams.hashLog; 
-    seqStore_t* seqStorePtr = &(zc->seqStore); 
-    const BYTE* const base = zc->base; 
-    const BYTE* const istart = (const BYTE*)src; 
-    const BYTE* ip = istart; 
-    const BYTE* anchor = istart; 
-    const U32 lowIndex = zc->dictLimit; 
-    const BYTE* const lowest = base + lowIndex; 
-    const BYTE* const iend = istart + srcSize; 
-    const BYTE* const ilimit = iend - 8; 
-    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; 
- 
-    /* init */ 
-    ZSTD_resetSeqStore(seqStorePtr); 
-    if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; 
- 
-    /* Main Search Loop */ 
-    while (ip < ilimit) {  /* < instead of <=, because repcode check at (ip+1) */ 
-        size_t mlCode; 
-        size_t offset; 
-        const size_t h = ZSTD_hashPtr(ip, hBits, mls); 
-        const U32 matchIndex = hashTable[h]; 
-        const BYTE* match = base + matchIndex; 
-        const U32 current = (U32)(ip-base); 
-        hashTable[h] = current;   /* update hash table */ 
- 
-        if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) {   /* note : by construction, offset_1 <= current */ 
-            mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; 
-            ip++; 
-            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); 
-       } else { 
-            if ( (matchIndex <= lowIndex) || 
-                 (MEM_read32(match) != MEM_read32(ip)) ) { 
-                ip += ((ip-anchor) >> g_searchStrength) + 1; 
-                continue; 
-            } 
-            mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32; 
-            offset = ip-match; 
-            while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }  /* catch up */ 
-            offset_2 = offset_1; 
-            offset_1 = offset; 
- 
-            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); 
-        } 
- 
-        /* match found */ 
-        ip += mlCode; 
-        anchor = ip; 
- 
-        if (ip <= ilimit) { 
-            /* Fill Table */ 
-            hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;  /* here because current+2 could be > iend-8 */ 
-            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); 
-            /* check immediate repcode */ 
-            while ( (ip <= ilimit) 
-                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { 
-                /* store sequence */ 
-                size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; 
-                { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ 
-                hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); 
-                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH); 
-                ip += rlCode; 
-                anchor = ip; 
-                continue;   /* faster when present ... (?) */ 
-    }   }   } 
- 
-    /* Last Literals */ 
-    {   size_t const lastLLSize = iend - anchor; 
-        memcpy(seqStorePtr->lit, anchor, lastLLSize); 
-        seqStorePtr->lit += lastLLSize; 
-    } 
-} 
- 
- 
-static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, 
-                       const void* src, size_t srcSize) 
-{ 
-    const U32 mls = ctx->params.cParams.searchLength; 
-    switch(mls) 
-    { 
-    default: 
-    case 4 : 
-        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; 
-    case 5 : 
-        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; 
-    case 6 : 
-        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; 
-    case 7 : 
-        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; 
-    } 
-} 
- 
- 
-static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, 
-                                 const void* src, size_t srcSize, 
-                                 const U32 mls) 
-{ 
-    U32* hashTable = ctx->hashTable; 
-    const U32 hBits = ctx->params.cParams.hashLog; 
-    seqStore_t* seqStorePtr = &(ctx->seqStore); 
-    const BYTE* const base = ctx->base; 
-    const BYTE* const dictBase = ctx->dictBase; 
-    const BYTE* const istart = (const BYTE*)src; 
-    const BYTE* ip = istart; 
-    const BYTE* anchor = istart; 
-    const U32   lowLimit = ctx->lowLimit; 
-    const BYTE* const dictStart = dictBase + lowLimit; 
-    const U32   dictLimit = ctx->dictLimit; 
-    const BYTE* const lowPrefixPtr = base + dictLimit; 
-    const BYTE* const dictEnd = dictBase + dictLimit; 
-    const BYTE* const iend = istart + srcSize; 
-    const BYTE* const ilimit = iend - 8; 
- 
-    U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; 
- 
- 
-    /* init */ 
-    ZSTD_resetSeqStore(seqStorePtr); 
-    /* skip first position to avoid read overflow during repcode match check */ 
-    hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); 
-    ip += REPCODE_STARTVALUE; 
- 
-    /* Main Search Loop */ 
-    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */ 
-        const size_t h = ZSTD_hashPtr(ip, hBits, mls); 
-        const U32 matchIndex = hashTable[h]; 
-        const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; 
-        const BYTE* match = matchBase + matchIndex; 
-        const U32 current = (U32)(ip-base); 
-        const U32 repIndex = current + 1 - offset_1; 
-        const BYTE* repBase = repIndex < dictLimit ? dictBase : base; 
-        const BYTE* repMatch = repBase + repIndex; 
-        size_t mlCode; 
-        U32 offset; 
-        hashTable[h] = current;   /* update hash table */ 
- 
-        if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4)) 
-          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 
-            const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; 
-            mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; 
-            ip++; 
-            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); 
-        } else { 
-            if ( (matchIndex < lowLimit) || 
-                 (MEM_read32(match) != MEM_read32(ip)) ) { 
-                ip += ((ip-anchor) >> g_searchStrength) + 1; 
-                continue; 
-            } 
-            {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; 
-                const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; 
-                mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; 
-                while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }   /* catch up */ 
-                offset = current - matchIndex; 
-                offset_2 = offset_1; 
-                offset_1 = offset; 
-                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); 
-        }   } 
- 
-        /* found a match : store it */ 
-        ip += mlCode; 
-        anchor = ip; 
- 
-        if (ip <= ilimit) { 
-            /* Fill Table */ 
-			hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; 
-            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); 
-            /* check immediate repcode */ 
-            while (ip <= ilimit) { 
-                U32 const current2 = (U32)(ip-base); 
-                U32 const repIndex2 = current2 - offset_2; 
-                const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; 
-                if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) 
-                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 
-                    const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; 
-                    size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; 
-                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */ 
-                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); 
-                    hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; 
-                    ip += repLength2; 
-                    anchor = ip; 
-                    continue; 
-                } 
-                break; 
-    }   }   } 
- 
-    /* Last Literals */ 
-    {   size_t const lastLLSize = iend - anchor; 
-        memcpy(seqStorePtr->lit, anchor, lastLLSize); 
-        seqStorePtr->lit += lastLLSize; 
-    } 
-} 
- 
- 
-static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, 
-                         const void* src, size_t srcSize) 
-{ 
-    const U32 mls = ctx->params.cParams.searchLength; 
-    switch(mls) 
-    { 
-    default: 
-    case 4 : 
-        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; 
-    case 5 : 
-        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; 
-    case 6 : 
-        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; 
-    case 7 : 
-        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; 
-    } 
-} 
- 
- 
- 
- 
-/*-************************************* 
-*  Binary Tree search 
-***************************************/ 
-/** ZSTD_insertBt1() : add one or multiple positions to tree. 
-*   ip : assumed <= iend-8 . 
-*   @return : nb of positions added */ 
-static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, 
-                          U32 extDict) 
-{ 
-    U32* const hashTable = zc->hashTable; 
-    const U32 hashLog = zc->params.cParams.hashLog; 
-    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls); 
-    U32* const bt   = zc->chainTable; 
-    const U32 btLog = zc->params.cParams.chainLog - 1; 
-    const U32 btMask= (1 << btLog) - 1; 
-    U32 matchIndex  = hashTable[h]; 
-    size_t commonLengthSmaller=0, commonLengthLarger=0; 
-    const BYTE* const base = zc->base; 
-    const BYTE* const dictBase = zc->dictBase; 
-    const U32 dictLimit = zc->dictLimit; 
-    const BYTE* const dictEnd = dictBase + dictLimit; 
-    const BYTE* const prefixStart = base + dictLimit; 
-    const BYTE* match = base + matchIndex; 
-    const U32 current = (U32)(ip-base); 
-    const U32 btLow = btMask >= current ? 0 : current - btMask; 
-    U32* smallerPtr = bt + 2*(current&btMask); 
-    U32* largerPtr  = smallerPtr + 1; 
-    U32 dummy32;   /* to be nullified at the end */ 
-    const U32 windowLow = zc->lowLimit; 
-    U32 matchEndIdx = current+8; 
-    size_t bestLength = 8; 
-    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); 
-    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); 
-    predictedSmall += (predictedSmall>0); 
-    predictedLarge += (predictedLarge>0); 
- 
-    hashTable[h] = current;   /* Update Hash Table */ 
- 
-    while (nbCompares-- && (matchIndex > windowLow)) { 
-        U32* nextPtr = bt + 2*(matchIndex & btMask); 
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */ 
-#if 0   /* note : can create issues when hlog small <= 11 */ 
-        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */ 
-        if (matchIndex == predictedSmall) { 
-            /* no need to check length, result known */ 
-            *smallerPtr = matchIndex; 
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ 
-            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */ 
-            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */ 
-            predictedSmall = predictPtr[1] + (predictPtr[1]>0); 
-            continue; 
-        } 
-        if (matchIndex == predictedLarge) { 
-            *largerPtr = matchIndex; 
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ 
-            largerPtr = nextPtr; 
-            matchIndex = nextPtr[0]; 
-            predictedLarge = predictPtr[0] + (predictPtr[0]>0); 
-            continue; 
-        } 
-#endif 
-        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 
-            match = base + matchIndex; 
-            if (match[matchLength] == ip[matchLength]) 
-                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; 
-        } else { 
-            match = dictBase + matchIndex; 
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 
-            if (matchIndex+matchLength >= dictLimit) 
-				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */ 
-        } 
- 
-        if (matchLength > bestLength) { 
-            bestLength = matchLength; 
-            if (matchLength > matchEndIdx - matchIndex) 
-                matchEndIdx = matchIndex + (U32)matchLength; 
-        } 
- 
-        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */ 
-            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ 
- 
-        if (match[matchLength] < ip[matchLength]) {  /* necessarily within correct buffer */ 
-            /* match is smaller than current */ 
-            *smallerPtr = matchIndex;             /* update smaller idx */ 
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */ 
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ 
-            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */ 
-            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */ 
-        } else { 
-            /* match is larger than current */ 
-            *largerPtr = matchIndex; 
-            commonLengthLarger = matchLength; 
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ 
-            largerPtr = nextPtr; 
-            matchIndex = nextPtr[0]; 
-    }   } 
- 
-    *smallerPtr = *largerPtr = 0; 
-    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); 
-    if (matchEndIdx > current + 8) return matchEndIdx - current - 8; 
-    return 1; 
-} 
- 
- 
-static size_t ZSTD_insertBtAndFindBestMatch ( 
-                        ZSTD_CCtx* zc, 
-                        const BYTE* const ip, const BYTE* const iend, 
-                        size_t* offsetPtr, 
-                        U32 nbCompares, const U32 mls, 
-                        U32 extDict) 
-{ 
-    U32* const hashTable = zc->hashTable; 
-    const U32 hashLog = zc->params.cParams.hashLog; 
-    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls); 
-    U32* const bt   = zc->chainTable; 
-    const U32 btLog = zc->params.cParams.chainLog - 1; 
-    const U32 btMask= (1 << btLog) - 1; 
-    U32 matchIndex  = hashTable[h]; 
-    size_t commonLengthSmaller=0, commonLengthLarger=0; 
-    const BYTE* const base = zc->base; 
-    const BYTE* const dictBase = zc->dictBase; 
-    const U32 dictLimit = zc->dictLimit; 
-    const BYTE* const dictEnd = dictBase + dictLimit; 
-    const BYTE* const prefixStart = base + dictLimit; 
-    const U32 current = (U32)(ip-base); 
-    const U32 btLow = btMask >= current ? 0 : current - btMask; 
-    const U32 windowLow = zc->lowLimit; 
-    U32* smallerPtr = bt + 2*(current&btMask); 
-    U32* largerPtr  = bt + 2*(current&btMask) + 1; 
-    U32 matchEndIdx = current+8; 
-    U32 dummy32;   /* to be nullified at the end */ 
-    size_t bestLength = 0; 
- 
-    hashTable[h] = current;   /* Update Hash Table */ 
- 
-    while (nbCompares-- && (matchIndex > windowLow)) { 
-        U32* nextPtr = bt + 2*(matchIndex & btMask); 
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */ 
-        const BYTE* match; 
- 
-        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 
-            match = base + matchIndex; 
-            if (match[matchLength] == ip[matchLength]) 
-                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; 
-        } else { 
-            match = dictBase + matchIndex; 
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 
-            if (matchIndex+matchLength >= dictLimit) 
-				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */ 
-        } 
- 
-        if (matchLength > bestLength) { 
-            if (matchLength > matchEndIdx - matchIndex) 
-                matchEndIdx = matchIndex + (U32)matchLength; 
-            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) ) 
-                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 
-            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */ 
-                break;   /* drop, to guarantee consistency (miss a little bit of compression) */ 
-        } 
- 
-        if (match[matchLength] < ip[matchLength]) { 
-            /* match is smaller than current */ 
-            *smallerPtr = matchIndex;             /* update smaller idx */ 
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */ 
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ 
-            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */ 
-            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */ 
-        } else { 
-            /* match is larger than current */ 
-            *largerPtr = matchIndex; 
-            commonLengthLarger = matchLength; 
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */ 
-            largerPtr = nextPtr; 
-            matchIndex = nextPtr[0]; 
-    }   } 
- 
-    *smallerPtr = *largerPtr = 0; 
- 
-    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; 
-    return bestLength; 
-} 
- 
- 
-static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) 
-{ 
-    const BYTE* const base = zc->base; 
-    const U32 target = (U32)(ip - base); 
-    U32 idx = zc->nextToUpdate; 
- 
-    while(idx < target) 
-        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); 
-} 
- 
-/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 
-static size_t ZSTD_BtFindBestMatch ( 
-                        ZSTD_CCtx* zc, 
-                        const BYTE* const ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 mls) 
-{ 
-    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */ 
-    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); 
-    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); 
-} 
- 
- 
-static size_t ZSTD_BtFindBestMatch_selectMLS ( 
-                        ZSTD_CCtx* zc,   /* Index table will be updated */ 
-                        const BYTE* ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 matchLengthSearch) 
-{ 
-    switch(matchLengthSearch) 
-    { 
-    default : 
-    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); 
-    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); 
-    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); 
-    } 
-} 
- 
- 
-static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) 
-{ 
-    const BYTE* const base = zc->base; 
-    const U32 target = (U32)(ip - base); 
-    U32 idx = zc->nextToUpdate; 
- 
-    while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); 
-} 
- 
- 
- 
-/** Tree updater, providing best match */ 
-static size_t ZSTD_BtFindBestMatch_extDict ( 
-                        ZSTD_CCtx* zc, 
-                        const BYTE* const ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 mls) 
-{ 
-    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */ 
-    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); 
-    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); 
-} 
- 
- 
-static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( 
-                        ZSTD_CCtx* zc,   /* Index table will be updated */ 
-                        const BYTE* ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 matchLengthSearch) 
-{ 
-    switch(matchLengthSearch) 
-    { 
-    default : 
-    case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); 
-    case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); 
-    case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); 
-    } 
-} 
- 
- 
- 
-/* *********************** 
-*  Hash Chain 
-*************************/ 
- 
-#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask] 
- 
- 
-/* Update chains up to ip (excluded) 
-   Assumption : always within prefix (ie. not within extDict) */ 
-FORCE_INLINE 
-U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) 
-{ 
-    U32* const hashTable  = zc->hashTable; 
-    const U32 hashLog = zc->params.cParams.hashLog; 
-    U32* const chainTable = zc->chainTable; 
-    const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; 
-    const BYTE* const base = zc->base; 
-    const U32 target = (U32)(ip - base); 
-    U32 idx = zc->nextToUpdate; 
- 
-    while(idx < target) { 
-        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); 
-        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 
-        hashTable[h] = idx; 
-        idx++; 
-    } 
- 
-    zc->nextToUpdate = target; 
-    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 
-} 
- 
- 
- 
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ 
-size_t ZSTD_HcFindBestMatch_generic ( 
-                        ZSTD_CCtx* zc,   /* Index table will be updated */ 
-                        const BYTE* const ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 mls, const U32 extDict) 
-{ 
-    U32* const chainTable = zc->chainTable; 
-    const U32 chainSize = (1 << zc->params.cParams.chainLog); 
-    const U32 chainMask = chainSize-1; 
-    const BYTE* const base = zc->base; 
-    const BYTE* const dictBase = zc->dictBase; 
-    const U32 dictLimit = zc->dictLimit; 
-    const BYTE* const prefixStart = base + dictLimit; 
-    const BYTE* const dictEnd = dictBase + dictLimit; 
-    const U32 lowLimit = zc->lowLimit; 
-    const U32 current = (U32)(ip-base); 
-    const U32 minChain = current > chainSize ? current - chainSize : 0; 
-    int nbAttempts=maxNbAttempts; 
-    size_t ml=EQUAL_READ32-1; 
- 
-    /* HC4 match finder */ 
-    U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); 
- 
-    for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { 
-        const BYTE* match; 
-        size_t currentMl=0; 
-        if ((!extDict) || matchIndex >= dictLimit) { 
-            match = base + matchIndex; 
-            if (match[ml] == ip[ml])   /* potentially better */ 
-                currentMl = ZSTD_count(ip, match, iLimit); 
-        } else { 
-            match = dictBase + matchIndex; 
-            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 
-                currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; 
-        } 
- 
-        /* save best solution */ 
-        if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } 
- 
-        if (matchIndex <= minChain) break; 
-        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); 
-    } 
- 
-    return ml; 
-} 
- 
- 
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( 
-                        ZSTD_CCtx* zc, 
-                        const BYTE* ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 matchLengthSearch) 
-{ 
-    switch(matchLengthSearch) 
-    { 
-    default : 
-    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); 
-    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); 
-    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); 
-    } 
-} 
- 
- 
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( 
-                        ZSTD_CCtx* zc, 
-                        const BYTE* ip, const BYTE* const iLimit, 
-                        size_t* offsetPtr, 
-                        const U32 maxNbAttempts, const U32 matchLengthSearch) 
-{ 
-    switch(matchLengthSearch) 
-    { 
-    default : 
-    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); 
-    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); 
-    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); 
-    } 
-} 
- 
- 
-/* ******************************* 
-*  Common parser - lazy strategy 
-*********************************/ 
-FORCE_INLINE 
-void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, 
-                                     const void* src, size_t srcSize, 
-                                     const U32 searchMethod, const U32 depth) 
-{ 
-    seqStore_t* seqStorePtr = &(ctx->seqStore); 
-    const BYTE* const istart = (const BYTE*)src; 
-    const BYTE* ip = istart; 
-    const BYTE* anchor = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    const BYTE* const ilimit = iend - 8; 
-    const BYTE* const base = ctx->base + ctx->dictLimit; 
- 
-    U32 const maxSearches = 1 << ctx->params.cParams.searchLog; 
-    U32 const mls = ctx->params.cParams.searchLength; 
- 
-    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, 
-                        size_t* offsetPtr, 
-                        U32 maxNbAttempts, U32 matchLengthSearch); 
-    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; 
- 
-    /* init */ 
-    U32 rep[ZSTD_REP_INIT]; 
-    { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; } 
- 
-    ctx->nextToUpdate3 = ctx->nextToUpdate; 
-    ZSTD_resetSeqStore(seqStorePtr); 
-    if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; 
- 
-    /* Match Loop */ 
-    while (ip < ilimit) { 
-        size_t matchLength=0; 
-        size_t offset=0; 
-        const BYTE* start=ip+1; 
- 
-        /* check repCode */ 
-        if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) { 
-            /* repcode : we take it */ 
-            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; 
-            if (depth==0) goto _storeSequence; 
-        } 
- 
-        /* first search (depth 0) */ 
-        {   size_t offsetFound = 99999999; 
-            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); 
-            if (ml2 > matchLength) 
-                matchLength = ml2, start = ip, offset=offsetFound; 
-        } 
- 
-        if (matchLength < EQUAL_READ32) { 
-            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */ 
-            continue; 
-        } 
- 
-        /* let's try to find a better solution */ 
-        if (depth>=1) 
-        while (ip<ilimit) { 
-            ip ++; 
-            if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) { 
-                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; 
-                int const gain2 = (int)(mlRep * 3); 
-                int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); 
-                if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) 
-                    matchLength = mlRep, offset = 0, start = ip; 
-            } 
-            {   size_t offset2=99999999; 
-                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */ 
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); 
-                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { 
-                    matchLength = ml2, offset = offset2, start = ip; 
-                    continue;   /* search a better one */ 
-            }   } 
- 
-            /* let's find an even better one */ 
-            if ((depth==2) && (ip<ilimit)) { 
-                ip ++; 
-                if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) { 
-                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; 
-                    int const gain2 = (int)(ml2 * 4); 
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); 
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) 
-                        matchLength = ml2, offset = 0, start = ip; 
-                } 
-                {   size_t offset2=99999999; 
-                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */ 
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); 
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { 
-                        matchLength = ml2, offset = offset2, start = ip; 
-                        continue; 
-            }   }   } 
-            break;  /* nothing found : store previous solution */ 
-        } 
- 
-        /* catch up */ 
-        if (offset) { 
-            while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */ 
-                { start--; matchLength++; } 
-            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); 
-        } 
- 
-        /* store sequence */ 
-_storeSequence: 
-        {   size_t const litLength = start - anchor; 
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); 
-            anchor = ip = start + matchLength; 
-        } 
- 
-        /* check immediate repcode */ 
-        while ( (ip <= ilimit) 
-             && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { 
-            /* store sequence */ 
-            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; 
-            offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */ 
-            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); 
-            ip += matchLength; 
-            anchor = ip; 
-            continue;   /* faster when present ... (?) */ 
-    }   } 
- 
-    /* Last Literals */ 
-    {   size_t const lastLLSize = iend - anchor; 
-        memcpy(seqStorePtr->lit, anchor, lastLLSize); 
-        seqStorePtr->lit += lastLLSize; 
-        ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); 
-    } 
-} 
- 
- 
-static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); 
-} 
- 
-static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); 
-} 
- 
-static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); 
-} 
- 
-static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); 
-} 
- 
- 
-FORCE_INLINE 
-void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, 
-                                     const void* src, size_t srcSize, 
-                                     const U32 searchMethod, const U32 depth) 
-{ 
-    seqStore_t* seqStorePtr = &(ctx->seqStore); 
-    const BYTE* const istart = (const BYTE*)src; 
-    const BYTE* ip = istart; 
-    const BYTE* anchor = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    const BYTE* const ilimit = iend - 8; 
-    const BYTE* const base = ctx->base; 
-    const U32 dictLimit = ctx->dictLimit; 
-    const BYTE* const prefixStart = base + dictLimit; 
-    const BYTE* const dictBase = ctx->dictBase; 
-    const BYTE* const dictEnd  = dictBase + dictLimit; 
-    const BYTE* const dictStart  = dictBase + ctx->lowLimit; 
- 
-    const U32 maxSearches = 1 << ctx->params.cParams.searchLog; 
-    const U32 mls = ctx->params.cParams.searchLength; 
- 
-    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, 
-                        size_t* offsetPtr, 
-                        U32 maxNbAttempts, U32 matchLengthSearch); 
-    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; 
- 
-    /* init */ 
-    U32 rep[ZSTD_REP_INIT]; 
-    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; } 
- 
-    ctx->nextToUpdate3 = ctx->nextToUpdate; 
-    ZSTD_resetSeqStore(seqStorePtr); 
-    if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; 
- 
-    /* Match Loop */ 
-    while (ip < ilimit) { 
-        size_t matchLength=0; 
-        size_t offset=0; 
-        const BYTE* start=ip+1; 
-        U32 current = (U32)(ip-base); 
- 
-        /* check repCode */ 
-        { 
-            const U32 repIndex = (U32)(current+1 - rep[0]); 
-            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 
-            const BYTE* const repMatch = repBase + repIndex; 
-            if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */ 
-            if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 
-                /* repcode detected we should take it */ 
-                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 
-                matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; 
-                if (depth==0) goto _storeSequence; 
-        }   } 
- 
-        /* first search (depth 0) */ 
-        {   size_t offsetFound = 99999999; 
-            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); 
-            if (ml2 > matchLength) 
-                matchLength = ml2, start = ip, offset=offsetFound; 
-        } 
- 
-         if (matchLength < EQUAL_READ32) { 
-            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */ 
-            continue; 
-        } 
- 
-        /* let's try to find a better solution */ 
-        if (depth>=1) 
-        while (ip<ilimit) { 
-            ip ++; 
-            current++; 
-            /* check repCode */ 
-            if (offset) { 
-                const U32 repIndex = (U32)(current - rep[0]); 
-                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 
-                const BYTE* const repMatch = repBase + repIndex; 
-                if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */ 
-                if (MEM_read32(ip) == MEM_read32(repMatch)) { 
-                    /* repcode detected */ 
-                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 
-                    size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; 
-                    int const gain2 = (int)(repLength * 3); 
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); 
-                    if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) 
-                        matchLength = repLength, offset = 0, start = ip; 
-            }   } 
- 
-            /* search match, depth 1 */ 
-            {   size_t offset2=99999999; 
-                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */ 
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); 
-                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { 
-                    matchLength = ml2, offset = offset2, start = ip; 
-                    continue;   /* search a better one */ 
-            }   } 
- 
-            /* let's find an even better one */ 
-            if ((depth==2) && (ip<ilimit)) { 
-                ip ++; 
-                current++; 
-                /* check repCode */ 
-                if (offset) { 
-                    const U32 repIndex = (U32)(current - rep[0]); 
-                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 
-                    const BYTE* const repMatch = repBase + repIndex; 
-                    if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */ 
-                    if (MEM_read32(ip) == MEM_read32(repMatch)) { 
-                        /* repcode detected */ 
-                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 
-                        size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; 
-                        int gain2 = (int)(repLength * 4); 
-                        int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); 
-                        if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) 
-                            matchLength = repLength, offset = 0, start = ip; 
-                }   } 
- 
-                /* search match, depth 2 */ 
-                {   size_t offset2=99999999; 
-                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */ 
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); 
-                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { 
-                        matchLength = ml2, offset = offset2, start = ip; 
-                        continue; 
-            }   }   } 
-            break;  /* nothing found : store previous solution */ 
-        } 
- 
-        /* catch up */ 
-        if (offset) { 
-            U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 
-            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 
-            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 
-            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */ 
-            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); 
-        } 
- 
-        /* store sequence */ 
-_storeSequence: 
-        {   size_t const litLength = start - anchor; 
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); 
-            anchor = ip = start + matchLength; 
-        } 
- 
-        /* check immediate repcode */ 
-        while (ip <= ilimit) { 
-            const U32 repIndex = (U32)((ip-base) - rep[1]); 
-            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 
-            const BYTE* const repMatch = repBase + repIndex; 
-            if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */ 
-            if (MEM_read32(ip) == MEM_read32(repMatch)) { 
-                /* repcode detected we should take it */ 
-                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 
-                matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; 
-                offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset;   /* swap offset history */ 
-                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); 
-                ip += matchLength; 
-                anchor = ip; 
-                continue;   /* faster when present ... (?) */ 
-            } 
-            break; 
-    }   } 
- 
-    /* Last Literals */ 
-    {   size_t const lastLLSize = iend - anchor; 
-        memcpy(seqStorePtr->lit, anchor, lastLLSize); 
-        seqStorePtr->lit += lastLLSize; 
-    } 
-} 
- 
- 
-void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); 
-} 
- 
-static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); 
-} 
- 
-static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); 
-} 
- 
-static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); 
-} 
- 
+        printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+               pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
+#endif
+    ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode);
+
+    /* copy Literals */
+    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->litLength - seqStorePtr->litLengthStart); }
+    *seqStorePtr->litLength++ = (U16)litLength;
+
+    /* match offset */
+    *(seqStorePtr->offset++) = (U32)offsetCode + 1;
+
+    /* match Length */
+    if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); }
+    *seqStorePtr->matchLength++ = (U16)matchCode;
+}
+
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r=0;
+            _BitScanForward( &r, (U32)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1))) {
+        size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+    if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/** ZSTD_count_2segments() :
+*   can count match length with `ip` & `match` in 2 different segments.
+*   convention : on reaching mEnd, match count continue starting from iStart
+*/
+static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    size_t matchLength;
+    const BYTE* vEnd = ip + (mEnd - match);
+    if (vEnd > iEnd) vEnd = iEnd;
+    matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength == mEnd)
+        matchLength += ZSTD_count(ip+matchLength, iStart, iEnd);
+    return matchLength;
+}
+
+
+/*-*************************************
+*  Hashes
+***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }
+
+static const U32 prime4bytes = 2654435761U;
+static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+
+static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    }
+}
+
+
+/*-*************************************
+*  Fast Scan
+***************************************/
+static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hBits = zc->params.cParams.hashLog;
+    const BYTE* const base = zc->base;
+    const BYTE* ip = base + zc->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - 8;
+    const size_t fastHashFillStep = 3;
+
+    while(ip <= iend) {
+        hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
+        ip += fastHashFillStep;
+    }
+}
+
+
+FORCE_INLINE
+void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
+                                 const void* src, size_t srcSize,
+                                 const U32 mls)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hBits = zc->params.cParams.hashLog;
+    seqStore_t* seqStorePtr = &(zc->seqStore);
+    const BYTE* const base = zc->base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 lowIndex = zc->dictLimit;
+    const BYTE* const lowest = base + lowIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE;
+
+    /* Main Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because repcode check at (ip+1) */
+        size_t mlCode;
+        size_t offset;
+        const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+        const U32 matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 current = (U32)(ip-base);
+        hashTable[h] = current;   /* update hash table */
+
+        if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) {   /* note : by construction, offset_1 <= current */
+            mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+            ip++;
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH);
+       } else {
+            if ( (matchIndex <= lowIndex) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                ip += ((ip-anchor) >> g_searchStrength) + 1;
+                continue;
+            }
+            mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
+            offset = ip-match;
+            while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }  /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH);
+        }
+
+        /* match found */
+        ip += mlCode;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while ( (ip <= ilimit)
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                /* store sequence */
+                size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+                { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH);
+                ip += rlCode;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
+
+
+static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
+                       const void* src, size_t srcSize)
+{
+    const U32 mls = ctx->params.cParams.searchLength;
+    switch(mls)
+    {
+    default:
+    case 4 :
+        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
+    case 5 :
+        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
+    case 6 :
+        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
+    case 7 :
+        ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
+    }
+}
+
+
+static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
+                                 const void* src, size_t srcSize,
+                                 const U32 mls)
+{
+    U32* hashTable = ctx->hashTable;
+    const U32 hBits = ctx->params.cParams.hashLog;
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const base = ctx->base;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   lowLimit = ctx->lowLimit;
+    const BYTE* const dictStart = dictBase + lowLimit;
+    const U32   dictLimit = ctx->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+
+    U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    /* skip first position to avoid read overflow during repcode match check */
+    hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0);
+    ip += REPCODE_STARTVALUE;
+
+    /* Main Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+        const U32 matchIndex = hashTable[h];
+        const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+        const U32 current = (U32)(ip-base);
+        const U32 repIndex = current + 1 - offset_1;
+        const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
+        const BYTE* repMatch = repBase + repIndex;
+        size_t mlCode;
+        U32 offset;
+        hashTable[h] = current;   /* update hash table */
+
+        if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4))
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
+            mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
+            ip++;
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH);
+        } else {
+            if ( (matchIndex < lowLimit) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                ip += ((ip-anchor) >> g_searchStrength) + 1;
+                continue;
+            }
+            {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
+                const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
+                mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
+                while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; }   /* catch up */
+                offset = current - matchIndex;
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH);
+        }   }
+
+        /* found a match : store it */
+        ip += mlCode;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+			hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
+            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
+                if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
+                    size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
+
+
+static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
+                         const void* src, size_t srcSize)
+{
+    const U32 mls = ctx->params.cParams.searchLength;
+    switch(mls)
+    {
+    default:
+    case 4 :
+        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
+    case 5 :
+        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
+    case 6 :
+        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
+    case 7 :
+        ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
+    }
+}
+
+
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+*   ip : assumed <= iend-8 .
+*   @return : nb of positions added */
+static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
+                          U32 extDict)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hashLog = zc->params.cParams.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const bt   = zc->chainTable;
+    const U32 btLog = zc->params.cParams.chainLog - 1;
+    const U32 btMask= (1 << btLog) - 1;
+    U32 matchIndex  = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match = base + matchIndex;
+    const U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    const U32 windowLow = zc->lowLimit;
+    U32 matchEndIdx = current+8;
+    size_t bestLength = 8;
+    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
+
+    hashTable[h] = current;   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow)) {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+#if 0   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
+        if (matchIndex == predictedSmall) {
+            /* no need to check length, result known */
+            *smallerPtr = matchIndex;
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
+        }
+        if (matchIndex == predictedLarge) {
+            *largerPtr = matchIndex;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
+        }
+#endif
+        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+            match = base + matchIndex;
+            if (match[matchLength] == ip[matchLength])
+                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
+
+        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within correct buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));
+    if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
+    return 1;
+}
+
+
+static size_t ZSTD_insertBtAndFindBestMatch (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 nbCompares, const U32 mls,
+                        U32 extDict)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hashLog = zc->params.cParams.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const bt   = zc->chainTable;
+    const U32 btLog = zc->params.cParams.chainLog - 1;
+    const U32 btMask= (1 << btLog) - 1;
+    U32 matchIndex  = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const U32 current = (U32)(ip-base);
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    const U32 windowLow = zc->lowLimit;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    U32 matchEndIdx = current+8;
+    U32 dummy32;   /* to be nullified at the end */
+    size_t bestLength = 0;
+
+    hashTable[h] = current;   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow)) {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match;
+
+        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+            match = base + matchIndex;
+            if (match[matchLength] == ip[matchLength])
+                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
+    return bestLength;
+}
+
+
+static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+{
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    while(idx < target)
+        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
+}
+
+/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+static size_t ZSTD_BtFindBestMatch (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls)
+{
+    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
+    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
+}
+
+
+static size_t ZSTD_BtFindBestMatch_selectMLS (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+    }
+}
+
+
+static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+{
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
+}
+
+
+
+/** Tree updater, providing best match */
+static size_t ZSTD_BtFindBestMatch_extDict (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls)
+{
+    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
+    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
+}
+
+
+static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+    case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+    }
+}
+
+
+
+/* ***********************
+*  Hash Chain
+*************************/
+
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
+
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (ie. not within extDict) */
+FORCE_INLINE
+U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
+{
+    U32* const hashTable  = zc->hashTable;
+    const U32 hashLog = zc->params.cParams.hashLog;
+    U32* const chainTable = zc->chainTable;
+    const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    while(idx < target) {
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    zc->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+
+
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 mls, const U32 extDict)
+{
+    U32* const chainTable = zc->chainTable;
+    const U32 chainSize = (1 << zc->params.cParams.chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 lowLimit = zc->lowLimit;
+    const U32 current = (U32)(ip-base);
+    const U32 minChain = current > chainSize ? current - chainSize : 0;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=EQUAL_READ32-1;
+
+    /* HC4 match finder */
+    U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
+
+    for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) {
+        const BYTE* match;
+        size_t currentMl=0;
+        if ((!extDict) || matchIndex >= dictLimit) {
+            match = base + matchIndex;
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        } else {
+            match = dictBase + matchIndex;
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_CCtx* zc,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
+    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
+    }
+}
+
+
+FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_CCtx* zc,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
+    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+FORCE_INLINE
+void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
+                                     const void* src, size_t srcSize,
+                                     const U32 searchMethod, const U32 depth)
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ctx->base + ctx->dictLimit;
+
+    U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
+    U32 const mls = ctx->params.cParams.searchLength;
+
+    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
+                        size_t* offsetPtr,
+                        U32 maxNbAttempts, U32 matchLengthSearch);
+    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
+
+    /* init */
+    U32 rep[ZSTD_REP_INIT];
+    { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; }
+
+    ctx->nextToUpdate3 = ctx->nextToUpdate;
+    ZSTD_resetSeqStore(seqStorePtr);
+    if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) {
+            /* repcode : we take it */
+            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+            if (depth==0) goto _storeSequence;
+        }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 99999999;
+            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < EQUAL_READ32) {
+            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) {
+                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            {   size_t offset2=99999999;
+                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
+                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) {
+                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+                    int const gain2 = (int)(ml2 * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
+                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
+                        matchLength = ml2, offset = 0, start = ip;
+                }
+                {   size_t offset2=99999999;
+                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
+                { start--; matchLength++; }
+            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while ( (ip <= ilimit)
+             && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) {
+            /* store sequence */
+            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32;
+            offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */
+            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+            ip += matchLength;
+            anchor = ip;
+            continue;   /* faster when present ... (?) */
+    }   }
+
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+        ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0);
+    }
+}
+
+
+static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
+}
+
+static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
+}
+
+static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
+}
+
+static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
+}
+
+
+FORCE_INLINE
+void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
+                                     const void* src, size_t srcSize,
+                                     const U32 searchMethod, const U32 depth)
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ctx->base;
+    const U32 dictLimit = ctx->dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ctx->lowLimit;
+
+    const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
+    const U32 mls = ctx->params.cParams.searchLength;
+
+    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
+                        size_t* offsetPtr,
+                        U32 maxNbAttempts, U32 matchLengthSearch);
+    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+    /* init */
+    U32 rep[ZSTD_REP_INIT];
+    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; }
+
+    ctx->nextToUpdate3 = ctx->nextToUpdate;
+    ZSTD_resetSeqStore(seqStorePtr);
+    if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 current = (U32)(ip-base);
+
+        /* check repCode */
+        {
+            const U32 repIndex = (U32)(current+1 - rep[0]);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                if (depth==0) goto _storeSequence;
+        }   }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 99999999;
+            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+         if (matchLength < EQUAL_READ32) {
+            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            current++;
+            /* check repCode */
+            if (offset) {
+                const U32 repIndex = (U32)(current - rep[0]);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+                if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                    if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+            }   }
+
+            /* search match, depth 1 */
+            {   size_t offset2=99999999;
+                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
+                if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                current++;
+                /* check repCode */
+                if (offset) {
+                    const U32 repIndex = (U32)(current - rep[0]);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                        int gain2 = (int)(repLength * 4);
+                        int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
+                        if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                }   }
+
+                /* search match, depth 2 */
+                {   size_t offset2=99999999;
+                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit) {
+            const U32 repIndex = (U32)((ip-base) - rep[1]);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+            if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
+                offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+    }   }
+
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
+
+
+void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
+}
+
+static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
+}
+
+static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
+}
+
+static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
+}
+
 
 
 /* The optimal parser */
@@ -1976,590 +1976,590 @@ static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t src
     ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
 }
 
-static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 
-{ 
-    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize); 
-} 
- 
- 
-typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); 
- 
-static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) 
-{ 
-    static const ZSTD_blockCompressor blockCompressor[2][6] = { 
-#if 1 
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, 
-#else 
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, 
-#endif 
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } 
-    }; 
- 
-    return blockCompressor[extDict][(U32)strat]; 
-} 
- 
- 
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); 
-    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */ 
-    blockCompressor(zc, src, srcSize); 
-    return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); 
-} 
- 
- 
- 
- 
-static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, 
-                                        void* dst, size_t dstCapacity, 
-                                  const void* src, size_t srcSize) 
-{ 
-    size_t blockSize = zc->blockSize; 
-    size_t remaining = srcSize; 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* const ostart = (BYTE*)dst; 
-    BYTE* op = ostart; 
-    const U32 maxDist = 1 << zc->params.cParams.windowLog; 
-    ZSTD_stats_t* stats = &zc->seqStore.stats; 
- 
-    ZSTD_statsInit(stats); 
- 
-    while (remaining) { 
-        size_t cSize; 
-        ZSTD_statsResetFreqs(stats); 
- 
-        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */ 
-        if (remaining < blockSize) blockSize = remaining; 
- 
-        if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { 
-            /* enforce maxDist */ 
-            U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; 
-            if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit; 
-            if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit; 
-        } 
- 
-        cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); 
-        if (ZSTD_isError(cSize)) return cSize; 
- 
-        if (cSize == 0) {  /* block is not compressible */ 
-            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); 
-            if (ZSTD_isError(cSize)) return cSize; 
-        } else { 
-            op[0] = (BYTE)(cSize>>16); 
-            op[1] = (BYTE)(cSize>>8); 
-            op[2] = (BYTE)cSize; 
-            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ 
-            cSize += 3; 
-        } 
- 
-        remaining -= blockSize; 
-        dstCapacity -= cSize; 
-        ip += blockSize; 
-        op += cSize; 
-    } 
- 
-    ZSTD_statsPrint(stats, zc->params.cParams.searchLength); 
-    return op-ostart; 
-} 
- 
- 
-static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 
-                                    ZSTD_parameters params, U64 pledgedSrcSize) 
-{   BYTE* const op = (BYTE*)dst; 
-    U32 const fcsId = params.fParams.contentSizeFlag ? 
-                     (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) :   /* 0-3 */ 
-                      0; 
-    BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN)   /* windowLog : 4 KB - 128 MB */ 
-                                  | (fcsId << 6) ); 
-    size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; 
-    if (hSize > dstCapacity) return ERROR(dstSize_tooSmall); 
- 
-    MEM_writeLE32(dst, ZSTD_MAGICNUMBER); 
-    op[4] = fdescriptor; 
-    switch(fcsId) 
-    { 
-        default:   /* impossible */ 
-        case 0 : break; 
-        case 1 : op[5] = (BYTE)(pledgedSrcSize); break; 
-        case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break; 
-        case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break; 
-    } 
-    return hSize; 
-} 
- 
- 
-static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, 
-                              void* dst, size_t dstCapacity, 
-                        const void* src, size_t srcSize, 
-                               U32 frame) 
-{ 
-    const BYTE* const ip = (const BYTE*) src; 
-    size_t fhSize = 0; 
- 
-    if (zc->stage==0) return ERROR(stage_wrong); 
-    if (frame && (zc->stage==1)) {   /* copy saved header */ 
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize); 
-        if (ZSTD_isError(fhSize)) return fhSize; 
-        dstCapacity -= fhSize; 
-        dst = (char*)dst + fhSize; 
-        zc->stage = 2; 
-    } 
- 
-    /* Check if blocks follow each other */ 
-    if (src != zc->nextSrc) { 
-        /* not contiguous */ 
-        size_t const delta = zc->nextSrc - ip; 
-        zc->lowLimit = zc->dictLimit; 
-        zc->dictLimit = (U32)(zc->nextSrc - zc->base); 
-        zc->dictBase = zc->base; 
-        zc->base -= delta; 
-        zc->nextToUpdate = zc->dictLimit; 
-        if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit;   /* too small extDict */ 
-    } 
- 
-    /* preemptive overflow correction */ 
-    if (zc->lowLimit > (1<<30)) { 
-        U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt); 
-        U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1; 
-        U32 const newLowLimit = zc->lowLimit & chainMask;   /* preserve position % chainSize */ 
-        U32 const correction = zc->lowLimit - newLowLimit; 
-        ZSTD_reduceIndex(zc, correction); 
-        zc->base += correction; 
-        zc->dictBase += correction; 
-        zc->lowLimit = newLowLimit; 
-        zc->dictLimit -= correction; 
-        if (zc->nextToUpdate < correction) zc->nextToUpdate = 0; 
-        else zc->nextToUpdate -= correction; 
-    } 
- 
-    /* if input and dictionary overlap : reduce dictionary (presumed modified by input) */ 
-    if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit)) { 
-        zc->lowLimit = (U32)(ip + srcSize - zc->dictBase); 
-        if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit; 
-    } 
- 
-    zc->nextSrc = ip + srcSize; 
-    {   size_t const cSize = frame ? 
-                             ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : 
-                             ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); 
-        if (ZSTD_isError(cSize)) return cSize; 
-        return cSize + fhSize; 
-    } 
-} 
- 
- 
-size_t ZSTD_compressContinue (ZSTD_CCtx* zc, 
-                              void* dst, size_t dstCapacity, 
-                        const void* src, size_t srcSize) 
-{ 
-    return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); 
-} 
- 
- 
-size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); 
-    ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength); 
-    return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0); 
-} 
- 
- 
-static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) 
-{ 
-    const BYTE* const ip = (const BYTE*) src; 
-    const BYTE* const iend = ip + srcSize; 
- 
-    /* input becomes current prefix */ 
-    zc->lowLimit = zc->dictLimit; 
-    zc->dictLimit = (U32)(zc->nextSrc - zc->base); 
-    zc->dictBase = zc->base; 
-    zc->base += ip - zc->nextSrc; 
-    zc->nextToUpdate = zc->dictLimit; 
-    zc->loadedDictEnd = (U32)(iend - zc->base); 
- 
-    zc->nextSrc = iend; 
-    if (srcSize <= 8) return 0; 
- 
-    switch(zc->params.cParams.strategy) 
-    { 
-    case ZSTD_fast: 
-        ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); 
-        break; 
- 
-    case ZSTD_greedy: 
-    case ZSTD_lazy: 
-    case ZSTD_lazy2: 
-        ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.cParams.searchLength); 
-        break; 
- 
-    case ZSTD_btlazy2: 
-    case ZSTD_btopt: 
-        ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); 
-        break; 
- 
-    default: 
-        return ERROR(GENERIC);   /* strategy doesn't exist; impossible */ 
-    } 
- 
-    zc->nextToUpdate = zc->loadedDictEnd; 
-    return 0; 
-} 
- 
- 
-/* Dictionary format : 
-     Magic == ZSTD_DICT_MAGIC (4 bytes) 
-     HUF_writeCTable(256) 
-     Dictionary content 
-*/ 
-/*! ZSTD_loadDictEntropyStats() : 
-    @return : size read from dictionary */ 
-static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) 
-{ 
-    /* note : magic number already checked */ 
-    size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode; 
-    short offcodeNCount[MaxOff+1]; 
-    unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; 
-    short matchlengthNCount[MaxML+1]; 
-    unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; 
-    short litlengthNCount[MaxLL+1]; 
-    unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; 
- 
-    size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); 
-    if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); 
-    zc->flagStaticTables = 1; 
-    dict = (const char*)dict + hufHeaderSize; 
-    dictSize -= hufHeaderSize; 
- 
-    offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); 
-    if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); 
-    errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); 
-    if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); 
-    dict = (const char*)dict + offcodeHeaderSize; 
-    dictSize -= offcodeHeaderSize; 
- 
-    matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); 
-    if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); 
-    errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); 
-    if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); 
-    dict = (const char*)dict + matchlengthHeaderSize; 
-    dictSize -= matchlengthHeaderSize; 
- 
-    litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); 
-    if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); 
-    errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); 
-    if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); 
- 
-    return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; 
-} 
- 
-/** ZSTD_compress_insertDictionary() : 
-*   @return : 0, or an error code */ 
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) 
-{ 
-    if ((dict==NULL) || (dictSize<=4)) return 0; 
- 
-    /* default : dict is pure content */ 
-    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); 
- 
-    /* known magic number : dict is parsed for entropy stats and content */ 
-    {   size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; 
-        if (ZSTD_isError(eSize)) return eSize; 
-        return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); 
-    } 
-} 
- 
- 
-/*! ZSTD_compressBegin_internal() : 
-*   @return : 0, or an error code */ 
-static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, 
-                             const void* dict, size_t dictSize, 
-                                   ZSTD_parameters params, U64 pledgedSrcSize) 
-{ 
-    { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); 
-      zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; } 
- 
-    { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1); 
-      if (ZSTD_isError(resetError)) return resetError; } 
- 
-    return ZSTD_compress_insertDictionary(zc, dict, dictSize); 
-} 
- 
- 
-/*! ZSTD_compressBegin_advanced() : 
-*   @return : 0, or an error code */ 
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, 
-                             const void* dict, size_t dictSize, 
-                                   ZSTD_parameters params, U64 pledgedSrcSize) 
-{ 
-    /* compression parameters verification and optimization */ 
-    { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); 
-      if (ZSTD_isError(errorCode)) return errorCode; } 
- 
-    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize); 
-} 
- 
- 
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) 
-{ 
-    ZSTD_parameters params; 
-    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); 
-    params.fParams.contentSizeFlag = 0; 
-    ZSTD_adjustCParams(&params.cParams, 0, dictSize); 
-    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); 
-    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); 
-} 
- 
- 
-size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) 
-{ 
-    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel); 
-    return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); 
-} 
- 
- 
-/*! ZSTD_compressEnd() : 
-*   Write frame epilogue. 
-*   @return : nb of bytes written into dst (or an error code) */ 
-size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) 
-{ 
-    BYTE* op = (BYTE*)dst; 
-    size_t fhSize = 0; 
- 
-    /* not even init ! */ 
-    if (zc->stage==0) return ERROR(stage_wrong); 
- 
-    /* special case : empty frame */ 
-    if (zc->stage==1) { 
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0); 
-        if (ZSTD_isError(fhSize)) return fhSize; 
-        dstCapacity -= fhSize; 
-        op += fhSize; 
-        zc->stage = 2; 
-    } 
- 
-    /* frame epilogue */ 
-    if (dstCapacity < 3) return ERROR(dstSize_tooSmall); 
-    op[0] = (BYTE)(bt_end << 6); 
-    op[1] = 0; 
-    op[2] = 0; 
- 
-    zc->stage = 0;  /* return to "created by not init" status */ 
-    return 3+fhSize; 
-} 
- 
- 
-size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, 
-                                       void* dst, size_t dstCapacity, 
-                                 const void* src, size_t srcSize) 
-{ 
-    {   size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx); 
-        if (ZSTD_isError(errorCode)) return errorCode; 
-    } 
-    {   size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize); 
-        if (ZSTD_isError(cSize)) return cSize; 
- 
-        {   size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize); 
-            if (ZSTD_isError(endSize)) return endSize; 
-            return cSize + endSize; 
-    }   } 
-} 
- 
- 
-static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, 
-                               void* dst, size_t dstCapacity, 
-                         const void* src, size_t srcSize, 
-                         const void* dict,size_t dictSize, 
-                               ZSTD_parameters params) 
-{ 
-    BYTE* const ostart = (BYTE*)dst; 
-    BYTE* op = ostart; 
- 
-    /* Init */ 
-    { size_t const errorCode = ZSTD_compressBegin_internal(ctx, dict, dictSize, params, srcSize); 
-      if(ZSTD_isError(errorCode)) return errorCode; } 
- 
-    /* body (compression) */ 
-    { size_t const oSize = ZSTD_compressContinue (ctx, op,  dstCapacity, src, srcSize); 
-      if(ZSTD_isError(oSize)) return oSize; 
-      op += oSize; 
-      dstCapacity -= oSize; } 
- 
-    /* Close frame */ 
-    { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity); 
-      if(ZSTD_isError(oSize)) return oSize; 
-      op += oSize; } 
- 
-    return (op - ostart); 
-} 
- 
-size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, 
-                               void* dst, size_t dstCapacity, 
-                         const void* src, size_t srcSize, 
-                         const void* dict,size_t dictSize, 
-                               ZSTD_parameters params) 
-{ 
-    size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize); 
-    if (ZSTD_isError(errorCode)) return errorCode; 
-    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); 
-} 
- 
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) 
-{ 
-    ZSTD_parameters params; 
-    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); 
-    params.cParams =  ZSTD_getCParams(compressionLevel, srcSize, dictSize); 
-    params.fParams.contentSizeFlag = 1; 
-    ZSTD_adjustCParams(&params.cParams, srcSize, dictSize); 
-    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); 
-} 
- 
-size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) 
-{ 
-    ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); 
-    return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); 
-} 
- 
-size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) 
-{ 
-    size_t result; 
-    ZSTD_CCtx ctxBody; 
-    memset(&ctxBody, 0, sizeof(ctxBody)); 
-    result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); 
-    free(ctxBody.workSpace);   /* can't free ctxBody, since it's on stack; just free heap content */ 
-    return result; 
-} 
- 
- 
-/*-=====  Pre-defined compression levels  =====-*/ 
- 
+static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
+}
+
+
+typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+
+static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
+{
+    static const ZSTD_blockCompressor blockCompressor[2][6] = {
+#if 1
+        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
+#else
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict },
+#endif
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
+    };
+
+    return blockCompressor[extDict][(U32)strat];
+}
+
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
+    blockCompressor(zc, src, srcSize);
+    return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
+}
+
+
+
+
+static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize)
+{
+    size_t blockSize = zc->blockSize;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    const U32 maxDist = 1 << zc->params.cParams.windowLog;
+    ZSTD_stats_t* stats = &zc->seqStore.stats;
+
+    ZSTD_statsInit(stats);
+
+    while (remaining) {
+        size_t cSize;
+        ZSTD_statsResetFreqs(stats);
+
+        if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
+        if (remaining < blockSize) blockSize = remaining;
+
+        if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) {
+            /* enforce maxDist */
+            U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist;
+            if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit;
+            if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit;
+        }
+
+        cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
+        if (ZSTD_isError(cSize)) return cSize;
+
+        if (cSize == 0) {  /* block is not compressible */
+            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize);
+            if (ZSTD_isError(cSize)) return cSize;
+        } else {
+            op[0] = (BYTE)(cSize>>16);
+            op[1] = (BYTE)(cSize>>8);
+            op[2] = (BYTE)cSize;
+            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
+            cSize += 3;
+        }
+
+        remaining -= blockSize;
+        dstCapacity -= cSize;
+        ip += blockSize;
+        op += cSize;
+    }
+
+    ZSTD_statsPrint(stats, zc->params.cParams.searchLength);
+    return op-ostart;
+}
+
+
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    ZSTD_parameters params, U64 pledgedSrcSize)
+{   BYTE* const op = (BYTE*)dst;
+    U32 const fcsId = params.fParams.contentSizeFlag ?
+                     (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) :   /* 0-3 */
+                      0;
+    BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN)   /* windowLog : 4 KB - 128 MB */
+                                  | (fcsId << 6) );
+    size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId];
+    if (hSize > dstCapacity) return ERROR(dstSize_tooSmall);
+
+    MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+    op[4] = fdescriptor;
+    switch(fcsId)
+    {
+        default:   /* impossible */
+        case 0 : break;
+        case 1 : op[5] = (BYTE)(pledgedSrcSize); break;
+        case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break;
+        case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break;
+    }
+    return hSize;
+}
+
+
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                               U32 frame)
+{
+    const BYTE* const ip = (const BYTE*) src;
+    size_t fhSize = 0;
+
+    if (zc->stage==0) return ERROR(stage_wrong);
+    if (frame && (zc->stage==1)) {   /* copy saved header */
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize);
+        if (ZSTD_isError(fhSize)) return fhSize;
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        zc->stage = 2;
+    }
+
+    /* Check if blocks follow each other */
+    if (src != zc->nextSrc) {
+        /* not contiguous */
+        size_t const delta = zc->nextSrc - ip;
+        zc->lowLimit = zc->dictLimit;
+        zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+        zc->dictBase = zc->base;
+        zc->base -= delta;
+        zc->nextToUpdate = zc->dictLimit;
+        if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit;   /* too small extDict */
+    }
+
+    /* preemptive overflow correction */
+    if (zc->lowLimit > (1<<30)) {
+        U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt);
+        U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1;
+        U32 const newLowLimit = zc->lowLimit & chainMask;   /* preserve position % chainSize */
+        U32 const correction = zc->lowLimit - newLowLimit;
+        ZSTD_reduceIndex(zc, correction);
+        zc->base += correction;
+        zc->dictBase += correction;
+        zc->lowLimit = newLowLimit;
+        zc->dictLimit -= correction;
+        if (zc->nextToUpdate < correction) zc->nextToUpdate = 0;
+        else zc->nextToUpdate -= correction;
+    }
+
+    /* if input and dictionary overlap : reduce dictionary (presumed modified by input) */
+    if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit)) {
+        zc->lowLimit = (U32)(ip + srcSize - zc->dictBase);
+        if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit;
+    }
+
+    zc->nextSrc = ip + srcSize;
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) :
+                             ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        return cSize + fhSize;
+    }
+}
+
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize)
+{
+    return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1);
+}
+
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength);
+    return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0);
+}
+
+
+static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    const BYTE* const ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+
+    /* input becomes current prefix */
+    zc->lowLimit = zc->dictLimit;
+    zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+    zc->dictBase = zc->base;
+    zc->base += ip - zc->nextSrc;
+    zc->nextToUpdate = zc->dictLimit;
+    zc->loadedDictEnd = (U32)(iend - zc->base);
+
+    zc->nextSrc = iend;
+    if (srcSize <= 8) return 0;
+
+    switch(zc->params.cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+        ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.cParams.searchLength);
+        break;
+
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+        ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
+        break;
+
+    default:
+        return ERROR(GENERIC);   /* strategy doesn't exist; impossible */
+    }
+
+    zc->nextToUpdate = zc->loadedDictEnd;
+    return 0;
+}
+
+
+/* Dictionary format :
+     Magic == ZSTD_DICT_MAGIC (4 bytes)
+     HUF_writeCTable(256)
+     Dictionary content
+*/
+/*! ZSTD_loadDictEntropyStats() :
+    @return : size read from dictionary */
+static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
+{
+    /* note : magic number already checked */
+    size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode;
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
+    short matchlengthNCount[MaxML+1];
+    unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+    short litlengthNCount[MaxLL+1];
+    unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+
+    size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
+    if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
+    zc->flagStaticTables = 1;
+    dict = (const char*)dict + hufHeaderSize;
+    dictSize -= hufHeaderSize;
+
+    offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+    if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+    if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + offcodeHeaderSize;
+    dictSize -= offcodeHeaderSize;
+
+    matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+    if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+    if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + matchlengthHeaderSize;
+    dictSize -= matchlengthHeaderSize;
+
+    litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+    if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+    if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
+
+    return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
+}
+
+/** ZSTD_compress_insertDictionary() :
+*   @return : 0, or an error code */
+static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
+{
+    if ((dict==NULL) || (dictSize<=4)) return 0;
+
+    /* default : dict is pure content */
+    if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
+
+    /* known magic number : dict is parsed for entropy stats and content */
+    {   size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4;
+        if (ZSTD_isError(eSize)) return eSize;
+        return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
+    }
+}
+
+
+/*! ZSTD_compressBegin_internal() :
+*   @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, U64 pledgedSrcSize)
+{
+    { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN);
+      zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; }
+
+    { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1);
+      if (ZSTD_isError(resetError)) return resetError; }
+
+    return ZSTD_compress_insertDictionary(zc, dict, dictSize);
+}
+
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, U64 pledgedSrcSize)
+{
+    /* compression parameters verification and optimization */
+    { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize);
+      if (ZSTD_isError(errorCode)) return errorCode; }
+
+    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize);
+}
+
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_parameters params;
+    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    params.fParams.contentSizeFlag = 0;
+    ZSTD_adjustCParams(&params.cParams, 0, dictSize);
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel);
+    return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0);
+}
+
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
+{
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel);
+    return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
+}
+
+
+/*! ZSTD_compressEnd() :
+*   Write frame epilogue.
+*   @return : nb of bytes written into dst (or an error code) */
+size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity)
+{
+    BYTE* op = (BYTE*)dst;
+    size_t fhSize = 0;
+
+    /* not even init ! */
+    if (zc->stage==0) return ERROR(stage_wrong);
+
+    /* special case : empty frame */
+    if (zc->stage==1) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0);
+        if (ZSTD_isError(fhSize)) return fhSize;
+        dstCapacity -= fhSize;
+        op += fhSize;
+        zc->stage = 2;
+    }
+
+    /* frame epilogue */
+    if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
+    op[0] = (BYTE)(bt_end << 6);
+    op[1] = 0;
+    op[2] = 0;
+
+    zc->stage = 0;  /* return to "created by not init" status */
+    return 3+fhSize;
+}
+
+
+size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize)
+{
+    {   size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx);
+        if (ZSTD_isError(errorCode)) return errorCode;
+    }
+    {   size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize);
+        if (ZSTD_isError(cSize)) return cSize;
+
+        {   size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize);
+            if (ZSTD_isError(endSize)) return endSize;
+            return cSize + endSize;
+    }   }
+}
+
+
+static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+
+    /* Init */
+    { size_t const errorCode = ZSTD_compressBegin_internal(ctx, dict, dictSize, params, srcSize);
+      if(ZSTD_isError(errorCode)) return errorCode; }
+
+    /* body (compression) */
+    { size_t const oSize = ZSTD_compressContinue (ctx, op,  dstCapacity, src, srcSize);
+      if(ZSTD_isError(oSize)) return oSize;
+      op += oSize;
+      dstCapacity -= oSize; }
+
+    /* Close frame */
+    { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity);
+      if(ZSTD_isError(oSize)) return oSize;
+      op += oSize; }
+
+    return (op - ostart);
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_parameters params;
+    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
+    params.cParams =  ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    params.fParams.contentSizeFlag = 1;
+    ZSTD_adjustCParams(&params.cParams, srcSize, dictSize);
+    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
+}
+
+size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
+{
+    ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel);
+    return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
+{
+    size_t result;
+    ZSTD_CCtx ctxBody;
+    memset(&ctxBody, 0, sizeof(ctxBody));
+    result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
+    free(ctxBody.workSpace);   /* can't free ctxBody, since it's on stack; just free heap content */
+    return result;
+}
+
+
+/*-=====  Pre-defined compression levels  =====-*/
+
 #define ZSTD_DEFAULT_CLEVEL 5
 #define ZSTD_MAX_CLEVEL     22
-unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } 
- 
-static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { 
-{   /* "default" */ 
-    /* W,  C,  H,  S,  L, TL, strat */ 
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */ 
-    { 19, 13, 14,  1,  7,  4, ZSTD_fast    },  /* level  1 */ 
-    { 19, 15, 16,  1,  6,  4, ZSTD_fast    },  /* level  2 */ 
-    { 20, 18, 20,  1,  6,  4, ZSTD_fast    },  /* level  3 */ 
-    { 20, 13, 17,  2,  5,  4, ZSTD_greedy  },  /* level  4.*/ 
-    { 20, 15, 18,  3,  5,  4, ZSTD_greedy  },  /* level  5 */ 
-    { 21, 16, 19,  2,  5,  4, ZSTD_lazy    },  /* level  6 */ 
-    { 21, 17, 20,  3,  5,  4, ZSTD_lazy    },  /* level  7 */ 
-    { 21, 18, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  8.*/ 
-    { 21, 20, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  9 */ 
-    { 21, 19, 21,  4,  5,  4, ZSTD_lazy2   },  /* level 10 */ 
-    { 22, 20, 22,  4,  5,  4, ZSTD_lazy2   },  /* level 11 */ 
-    { 22, 20, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 12 */ 
-    { 22, 21, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 13 */ 
-    { 22, 21, 22,  6,  5,  4, ZSTD_lazy2   },  /* level 14 */ 
-    { 22, 21, 21,  5,  5,  4, ZSTD_btlazy2 },  /* level 15 */ 
-    { 23, 22, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 16 */ 
-    { 23, 23, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 17.*/ 
-    { 23, 23, 22,  6,  5, 24, ZSTD_btopt   },  /* level 18.*/ 
-    { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19.*/ 
-    { 25, 26, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20.*/ 
-    { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21.*/ 
-    { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22.*/ 
-}, 
-{   /* for srcSize <= 256 KB */ 
-    /* W,  C,  H,  S,  L,  T, strat */ 
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 */ 
-    { 18, 13, 14,  1,  6,  4, ZSTD_fast    },  /* level  1 */ 
-    { 18, 15, 17,  1,  5,  4, ZSTD_fast    },  /* level  2 */ 
-    { 18, 13, 15,  1,  5,  4, ZSTD_greedy  },  /* level  3.*/ 
-    { 18, 15, 17,  1,  5,  4, ZSTD_greedy  },  /* level  4.*/ 
-    { 18, 16, 17,  4,  5,  4, ZSTD_greedy  },  /* level  5 */ 
-    { 18, 17, 17,  5,  5,  4, ZSTD_greedy  },  /* level  6 */ 
-    { 18, 17, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */ 
-    { 18, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */ 
-    { 18, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */ 
-    { 18, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */ 
-    { 18, 18, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 11.*/ 
-    { 18, 18, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 12.*/ 
-    { 18, 19, 17,  7,  4,  4, ZSTD_btlazy2 },  /* level 13 */ 
-    { 18, 18, 18,  4,  4, 16, ZSTD_btopt   },  /* level 14.*/ 
-    { 18, 18, 18,  8,  4, 24, ZSTD_btopt   },  /* level 15.*/ 
-    { 18, 19, 18,  8,  3, 48, ZSTD_btopt   },  /* level 16.*/ 
-    { 18, 19, 18,  8,  3, 96, ZSTD_btopt   },  /* level 17.*/ 
-    { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/ 
-    { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/ 
-    { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/ 
-    { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/ 
-    { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/ 
-}, 
-{   /* for srcSize <= 128 KB */ 
-    /* W,  C,  H,  S,  L,  T, strat */ 
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */ 
-    { 17, 12, 13,  1,  6,  4, ZSTD_fast    },  /* level  1 */ 
-    { 17, 13, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */ 
-    { 17, 13, 14,  2,  5,  4, ZSTD_greedy  },  /* level  3 */ 
-    { 17, 13, 15,  3,  4,  4, ZSTD_greedy  },  /* level  4 */ 
-    { 17, 15, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */ 
-    { 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */ 
-    { 17, 15, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  7 */ 
-    { 17, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */ 
-    { 17, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */ 
-    { 17, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */ 
-    { 17, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */ 
-    { 17, 17, 17,  8,  4,  4, ZSTD_lazy2   },  /* level 12 */ 
-    { 17, 18, 17,  6,  4,  4, ZSTD_btlazy2 },  /* level 13.*/ 
-    { 17, 17, 17,  7,  3,  8, ZSTD_btopt   },  /* level 14.*/ 
-    { 17, 17, 17,  7,  3, 16, ZSTD_btopt   },  /* level 15.*/ 
-    { 17, 18, 17,  7,  3, 32, ZSTD_btopt   },  /* level 16.*/ 
-    { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/ 
-    { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/ 
-    { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/ 
-    { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/ 
-    { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/ 
-    { 17, 18, 17, 11,  3,256, ZSTD_btopt   },  /* level 22.*/ 
-}, 
-{   /* for srcSize <= 16 KB */ 
-    /* W,  C,  H,  S,  L,  T, strat */ 
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 -- never used */ 
-    { 14, 14, 14,  1,  4,  4, ZSTD_fast    },  /* level  1 */ 
-    { 14, 14, 15,  1,  4,  4, ZSTD_fast    },  /* level  2 */ 
-    { 14, 14, 14,  4,  4,  4, ZSTD_greedy  },  /* level  3.*/ 
-    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  4.*/ 
-    { 14, 14, 14,  4,  4,  4, ZSTD_lazy2   },  /* level  5 */ 
-    { 14, 14, 14,  5,  4,  4, ZSTD_lazy2   },  /* level  6 */ 
-    { 14, 14, 14,  6,  4,  4, ZSTD_lazy2   },  /* level  7.*/ 
-    { 14, 14, 14,  7,  4,  4, ZSTD_lazy2   },  /* level  8.*/ 
-    { 14, 15, 14,  6,  4,  4, ZSTD_btlazy2 },  /* level  9.*/ 
-    { 14, 15, 14,  3,  3,  6, ZSTD_btopt   },  /* level 10.*/ 
-    { 14, 15, 14,  6,  3,  8, ZSTD_btopt   },  /* level 11.*/ 
-    { 14, 15, 14,  6,  3, 16, ZSTD_btopt   },  /* level 12.*/ 
-    { 14, 15, 14,  6,  3, 24, ZSTD_btopt   },  /* level 13.*/ 
-    { 14, 15, 15,  6,  3, 48, ZSTD_btopt   },  /* level 14.*/ 
-    { 14, 15, 15,  6,  3, 64, ZSTD_btopt   },  /* level 15.*/ 
-    { 14, 15, 15,  6,  3, 96, ZSTD_btopt   },  /* level 16.*/ 
-    { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/ 
-    { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/ 
-    { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/ 
-    { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/ 
-    { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/ 
-    { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/ 
-}, 
-}; 
- 
-/*! ZSTD_getParams() : 
-*   @return ZSTD_parameters structure for a selected compression level and srcSize. 
-*   `srcSize` value is optional, select 0 if not known */ 
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize) 
-{ 
-    ZSTD_compressionParameters cp; 
-    size_t const addedSize = srcSize ? 0 : 500; 
-    U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; 
-    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */ 
+unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" */
+    /* W,  C,  H,  S,  L, TL, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    { 19, 13, 14,  1,  7,  4, ZSTD_fast    },  /* level  1 */
+    { 19, 15, 16,  1,  6,  4, ZSTD_fast    },  /* level  2 */
+    { 20, 18, 20,  1,  6,  4, ZSTD_fast    },  /* level  3 */
+    { 20, 13, 17,  2,  5,  4, ZSTD_greedy  },  /* level  4.*/
+    { 20, 15, 18,  3,  5,  4, ZSTD_greedy  },  /* level  5 */
+    { 21, 16, 19,  2,  5,  4, ZSTD_lazy    },  /* level  6 */
+    { 21, 17, 20,  3,  5,  4, ZSTD_lazy    },  /* level  7 */
+    { 21, 18, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  8.*/
+    { 21, 20, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  9 */
+    { 21, 19, 21,  4,  5,  4, ZSTD_lazy2   },  /* level 10 */
+    { 22, 20, 22,  4,  5,  4, ZSTD_lazy2   },  /* level 11 */
+    { 22, 20, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 13 */
+    { 22, 21, 22,  6,  5,  4, ZSTD_lazy2   },  /* level 14 */
+    { 22, 21, 21,  5,  5,  4, ZSTD_btlazy2 },  /* level 15 */
+    { 23, 22, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 16 */
+    { 23, 23, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 17.*/
+    { 23, 23, 22,  6,  5, 24, ZSTD_btopt   },  /* level 18.*/
+    { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19.*/
+    { 25, 26, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20.*/
+    { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21.*/
+    { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22.*/
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 */
+    { 18, 13, 14,  1,  6,  4, ZSTD_fast    },  /* level  1 */
+    { 18, 15, 17,  1,  5,  4, ZSTD_fast    },  /* level  2 */
+    { 18, 13, 15,  1,  5,  4, ZSTD_greedy  },  /* level  3.*/
+    { 18, 15, 17,  1,  5,  4, ZSTD_greedy  },  /* level  4.*/
+    { 18, 16, 17,  4,  5,  4, ZSTD_greedy  },  /* level  5 */
+    { 18, 17, 17,  5,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 18, 17, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    { 18, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    { 18, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 11.*/
+    { 18, 18, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 12.*/
+    { 18, 19, 17,  7,  4,  4, ZSTD_btlazy2 },  /* level 13 */
+    { 18, 18, 18,  4,  4, 16, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 18,  8,  4, 24, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 18,  8,  3, 48, ZSTD_btopt   },  /* level 16.*/
+    { 18, 19, 18,  8,  3, 96, ZSTD_btopt   },  /* level 17.*/
+    { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/
+    { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/
+    { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/
+    { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/
+    { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    { 17, 12, 13,  1,  6,  4, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
+    { 17, 13, 14,  2,  5,  4, ZSTD_greedy  },  /* level  3 */
+    { 17, 13, 15,  3,  4,  4, ZSTD_greedy  },  /* level  4 */
+    { 17, 15, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */
+    { 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 15, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */
+    { 17, 17, 17,  8,  4,  4, ZSTD_lazy2   },  /* level 12 */
+    { 17, 18, 17,  6,  4,  4, ZSTD_btlazy2 },  /* level 13.*/
+    { 17, 17, 17,  7,  3,  8, ZSTD_btopt   },  /* level 14.*/
+    { 17, 17, 17,  7,  3, 16, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  7,  3, 32, ZSTD_btopt   },  /* level 16.*/
+    { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/
+    { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/
+    { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/
+    { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/
+    { 17, 18, 17, 11,  3,256, ZSTD_btopt   },  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 -- never used */
+    { 14, 14, 14,  1,  4,  4, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  4, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 14,  4,  4,  4, ZSTD_greedy  },  /* level  3.*/
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  4.*/
+    { 14, 14, 14,  4,  4,  4, ZSTD_lazy2   },  /* level  5 */
+    { 14, 14, 14,  5,  4,  4, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  4, ZSTD_lazy2   },  /* level  7.*/
+    { 14, 14, 14,  7,  4,  4, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  6,  4,  4, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  3,  3,  6, ZSTD_btopt   },  /* level 10.*/
+    { 14, 15, 14,  6,  3,  8, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  6,  3, 16, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  6,  3, 24, ZSTD_btopt   },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 48, ZSTD_btopt   },  /* level 14.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btopt   },  /* level 15.*/
+    { 14, 15, 15,  6,  3, 96, ZSTD_btopt   },  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/
+    { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/
+    { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/
+    { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/
+},
+};
+
+/*! ZSTD_getParams() :
+*   @return ZSTD_parameters structure for a selected compression level and srcSize.
+*   `srcSize` value is optional, select 0 if not known */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize)
+{
+    ZSTD_compressionParameters cp;
+    size_t const addedSize = srcSize ? 0 : 500;
+    U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
     if (compressionLevel < 0) compressionLevel = ZSTD_DEFAULT_CLEVEL;
     if (compressionLevel==0) compressionLevel = 1;
-    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; 
-    cp = ZSTD_defaultCParameters[tableID][compressionLevel]; 
-    if (MEM_32bits()) {   /* auto-correction, for 32-bits mode */ 
-        if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; 
-        if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; 
-        if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; 
-    } 
-    return cp; 
-} 
+    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
+    cp = ZSTD_defaultCParameters[tableID][compressionLevel];
+    if (MEM_32bits()) {   /* auto-correction, for 32-bits mode */
+        if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
+        if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
+        if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
+    }
+    return cp;
+}
diff --git a/contrib/libs/zstd06/decompress/zstd_decompress.c b/contrib/libs/zstd06/decompress/zstd_decompress.c
index c4ce3f81d8..177e9c8a21 100644
--- a/contrib/libs/zstd06/decompress/zstd_decompress.c
+++ b/contrib/libs/zstd06/decompress/zstd_decompress.c
@@ -1,88 +1,88 @@
-/* 
-    zstd - standard compression library 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd homepage : http://www.zstd.net 
-*/ 
- 
-/* *************************************************************** 
-*  Tuning parameters 
-*****************************************************************/ 
-/*! 
- * HEAPMODE : 
- * Select how default decompression function ZSTD_decompress() will allocate memory, 
- * in memory stack (0), or in memory heap (1, requires malloc()) 
- */ 
-#ifndef ZSTD_HEAPMODE 
-#  define ZSTD_HEAPMODE 1 
-#endif 
- 
-/*! 
-*  LEGACY_SUPPORT : 
-*  if set to 1, ZSTD_decompress() can decode older formats (v0.1+) 
-*/ 
-#ifndef ZSTD_LEGACY_SUPPORT 
-#  define ZSTD_LEGACY_SUPPORT 0 
-#endif 
- 
- 
-/*-******************************************************* 
-*  Dependencies 
-*********************************************************/ 
-#include <stdlib.h>      /* calloc */ 
-#include <string.h>      /* memcpy, memmove */ 
-#include <stdio.h>       /* debug only : printf */ 
-#include "mem.h"         /* low level memory routines */ 
-#include "zstd_internal.h" 
-#include "fse_static.h" 
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 0
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug only : printf */
+#include "mem.h"         /* low level memory routines */
+#include "zstd_internal.h"
+#include "fse_static.h"
 #include "huf_static.h"
- 
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) 
-#  include "zstd_legacy.h" 
-#endif 
- 
- 
-/*-******************************************************* 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */ 
-#else 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+#  include "zstd_legacy.h"
+#endif
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
 /*-*************************************
 *  Macros
 ***************************************/
@@ -91,892 +91,892 @@
 #define HUF_isError  ERR_isError
 
 
-/*_******************************************************* 
-*  Memory operations 
-**********************************************************/ 
-static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 
- 
- 
-/*-************************************************************* 
-*   Context management 
-***************************************************************/ 
-typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, 
-               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage; 
- 
-struct ZSTD_DCtx_s 
-{ 
-    FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 
-    FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 
-    FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 
-    unsigned   hufTableX4[HUF_DTABLE_SIZE(HufLog)]; 
-    const void* previousDstEnd; 
-    const void* base; 
-    const void* vBase; 
-    const void* dictEnd; 
-    size_t expected; 
-    size_t headerSize; 
-    ZSTD_frameParams fParams; 
-    blockType_t bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ 
-    ZSTD_dStage stage; 
-    U32 flagRepeatTable; 
-    const BYTE* litPtr; 
-    size_t litBufSize; 
-    size_t litSize; 
-    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; 
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 
-};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */ 
- 
-size_t ZSTD_sizeofDCtx (void) { return sizeof(ZSTD_DCtx); }   /* non published interface */ 
- 
-size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) 
-{ 
-    dctx->expected = ZSTD_frameHeaderSize_min; 
-    dctx->stage = ZSTDds_getFrameHeaderSize; 
-    dctx->previousDstEnd = NULL; 
-    dctx->base = NULL; 
-    dctx->vBase = NULL; 
-    dctx->dictEnd = NULL; 
-    dctx->hufTableX4[0] = HufLog; 
-    dctx->flagRepeatTable = 0; 
-    return 0; 
-} 
- 
-ZSTD_DCtx* ZSTD_createDCtx(void) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); 
-    if (dctx==NULL) return NULL; 
-    ZSTD_decompressBegin(dctx); 
-    return dctx; 
-} 
- 
-size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) 
-{ 
-    free(dctx); 
-    return 0;   /* reserved as a potential error code in the future */ 
-} 
- 
-void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) 
-{ 
-    memcpy(dstDCtx, srcDCtx, 
-           sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max));  /* no need to copy workspace */ 
-} 
- 
- 
-/*-************************************************************* 
-*   Decompression section 
-***************************************************************/ 
- 
-/* Frame format description 
-   Frame Header -  [ Block Header - Block ] - Frame End 
-   1) Frame Header 
-      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) 
-      - 1 byte  - Frame Descriptor 
-   2) Block Header 
-      - 3 bytes, starting with a 2-bits descriptor 
-                 Uncompressed, Compressed, Frame End, unused 
-   3) Block 
-      See Block Format Description 
-   4) Frame End 
-      - 3 bytes, compatible with Block Header 
-*/ 
- 
- 
-/* Frame descriptor 
- 
-   1 byte, using : 
-   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h) 
-   bit 4   : minmatch 4(0) or 3(1) 
-   bit 5   : reserved (must be zero) 
-   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes 
- 
-   Optional : content size (0, 1, 2 or 8 bytes) 
-   0 : unknown 
-   1 : 0-255 bytes 
-   2 : 256 - 65535+256 
-   8 : up to 16 exa 
-*/ 
- 
- 
-/* Compressed Block, format description 
- 
-   Block = Literal Section - Sequences Section 
-   Prerequisite : size of (compressed) block, maximum size of regenerated data 
- 
-   1) Literal Section 
- 
-   1.1) Header : 1-5 bytes 
-        flags: 2 bits 
-            00 compressed by Huff0 
-            01 unused 
-            10 is Raw (uncompressed) 
-            11 is Rle 
-            Note : using 01 => Huff0 with precomputed table ? 
-            Note : delta map ? => compressed ? 
- 
-   1.1.1) Huff0-compressed literal block : 3-5 bytes 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) 
-            srcSize < 16KB => 4 bytes (2-2-14-14) 
-            else           => 5 bytes (2-2-18-18) 
-            big endian convention 
- 
-   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes 
-        size :  5 bits: (IS_RAW<<6) + (0<<4) + size 
-               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) 
-                        size&255 
-               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) 
-                        size>>8&255 
-                        size&255 
- 
-   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes 
-        size :  5 bits: (IS_RLE<<6) + (0<<4) + size 
-               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) 
-                        size&255 
-               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) 
-                        size>>8&255 
-                        size&255 
- 
-   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) 
-            srcSize < 16KB => 4 bytes (2-2-14-14) 
-            else           => 5 bytes (2-2-18-18) 
-            big endian convention 
- 
-        1- CTable available (stored into workspace ?) 
-        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) 
- 
- 
-   1.2) Literal block content 
- 
-   1.2.1) Huff0 block, using sizes from header 
-        See Huff0 format 
- 
-   1.2.2) Huff0 block, using prepared table 
- 
-   1.2.3) Raw content 
- 
-   1.2.4) single byte 
- 
- 
-   2) Sequences section 
-      TO DO 
-*/ 
- 
-/** ZSTD_frameHeaderSize() : 
-*   srcSize must be >= ZSTD_frameHeaderSize_min. 
-*   @return : size of the Frame Header */ 
-static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) 
-{ 
-    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); 
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
+
+struct ZSTD_DCtx_s
+{
+    FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    unsigned   hufTableX4[HUF_DTABLE_SIZE(HufLog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTD_frameParams fParams;
+    blockType_t bType;   /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTD_dStage stage;
+    U32 flagRepeatTable;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
+
+size_t ZSTD_sizeofDCtx (void) { return sizeof(ZSTD_DCtx); }   /* non published interface */
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTableX4[0] = HufLog;
+    dctx->flagRepeatTable = 0;
+    return 0;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_decompressBegin(dctx);
+    return dctx;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTD_DCtx) - (ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/*-*************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h)
+      - 1 byte  - Frame Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+
+/* Frame descriptor
+
+   1 byte, using :
+   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
+   bit 4   : minmatch 4(0) or 3(1)
+   bit 5   : reserved (must be zero)
+   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
+
+   Optional : content size (0, 1, 2 or 8 bytes)
+   0 : unknown
+   1 : 0-255 bytes
+   2 : 256 - 65535+256
+   8 : up to 16 exa
+*/
+
+
+/* Compressed Block, format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+/** ZSTD_frameHeaderSize() :
+*   srcSize must be >= ZSTD_frameHeaderSize_min.
+*   @return : size of the Frame Header */
+static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
     { U32 const fcsId = (((const BYTE*)src)[4]) >> 6;
       return ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; }
-} 
- 
- 
-/** ZSTD_getFrameParams() : 
-*   decode Frame Header, or provide expected `srcSize`. 
-*   @return : 0, `fparamsPtr` is correctly filled, 
-*            >0, `srcSize` is too small, result is expected `srcSize`, 
-*             or an error code, which can be tested using ZSTD_isError() */ 
-size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_min; 
-    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); 
- 
-    /* ensure there is enough `srcSize` to fully read/decode frame header */ 
-    { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); 
-      if (srcSize < fhsize) return fhsize; } 
- 
-    memset(fparamsPtr, 0, sizeof(*fparamsPtr)); 
-    {   BYTE const frameDesc = ip[4]; 
-        fparamsPtr->windowLog = (frameDesc & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN; 
-        if ((frameDesc & 0x20) != 0) return ERROR(frameParameter_unsupported);   /* reserved 1 bit */ 
-        switch(frameDesc >> 6)  /* fcsId */ 
-        { 
-            default:   /* impossible */ 
-            case 0 : fparamsPtr->frameContentSize = 0; break; 
-            case 1 : fparamsPtr->frameContentSize = ip[5]; break; 
-            case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+5)+256; break; 
-            case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+5); break; 
-    }   } 
-    return 0; 
-} 
- 
- 
-/** ZSTD_decodeFrameHeader() : 
-*   `srcSize` must be the size provided by ZSTD_frameHeaderSize(). 
-*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ 
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize) 
-{ 
-    size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); 
-    if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); 
-    return result; 
-} 
- 
- 
-typedef struct 
-{ 
-    blockType_t blockType; 
-    U32 origSize; 
-} blockProperties_t; 
- 
-/*! ZSTD_getcBlockSize() : 
-*   Provides the size of compressed block from block header `src` */ 
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 
-{ 
-    const BYTE* const in = (const BYTE* const)src; 
-    U32 cSize; 
- 
-    if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
- 
-    bpPtr->blockType = (blockType_t)((*in) >> 6); 
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 
- 
-    if (bpPtr->blockType == bt_end) return 0; 
-    if (bpPtr->blockType == bt_rle) return 1; 
-    return cSize; 
-} 
- 
- 
-static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); 
-    memcpy(dst, src, srcSize); 
-    return srcSize; 
-} 
- 
- 
-/*! ZSTD_decodeLiteralsBlock() : 
-    @return : nb of bytes read from src (< srcSize ) */ 
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 
-                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */ 
-{ 
-    const BYTE* const istart = (const BYTE*) src; 
- 
-    /* any compressed block with literals segment must be at least this size */ 
-    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); 
- 
-    switch(istart[0]>> 6) 
-    { 
-    case IS_HUF: 
-        {   size_t litSize, litCSize, singleStream=0; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */ 
-            switch(lhSize) 
-            { 
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */ 
-                /* 2 - 2 - 10 - 10 */ 
-                lhSize=3; 
-                singleStream = istart[0] & 16; 
-                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2); 
-                litCSize = ((istart[1] &  3) << 8) + istart[2]; 
-                break; 
-            case 2: 
-                /* 2 - 2 - 14 - 14 */ 
-                lhSize=4; 
-                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6); 
-                litCSize = ((istart[2] & 63) <<  8) + istart[3]; 
-                break; 
-            case 3: 
-                /* 2 - 2 - 18 - 18 */ 
-                lhSize=5; 
-                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2); 
-                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4]; 
-                break; 
-            } 
-            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); 
-            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); 
- 
-            if (HUF_isError(singleStream ? 
-                            HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : 
-                            HUF_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) )) 
-                return ERROR(corruption_detected); 
- 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; 
-            dctx->litSize = litSize; 
-            return litCSize + lhSize; 
-        } 
-    case IS_PCH: 
-        {   size_t litSize, litCSize; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */ 
-                return ERROR(corruption_detected); 
-            if (!dctx->flagRepeatTable) 
-                return ERROR(dictionary_corrupted); 
- 
-            /* 2 - 2 - 10 - 10 */ 
-            lhSize=3; 
-            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2); 
-            litCSize = ((istart[1] &  3) << 8) + istart[2]; 
- 
-            {   size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); 
-                if (HUF_isError(errorCode)) return ERROR(corruption_detected); 
-            } 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH; 
-            dctx->litSize = litSize; 
-            return litCSize + lhSize; 
-        } 
-    case IS_RAW: 
-        {   size_t litSize; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            switch(lhSize) 
-            { 
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */ 
-                lhSize=1; 
-                litSize = istart[0] & 31; 
-                break; 
-            case 2: 
-                litSize = ((istart[0] & 15) << 8) + istart[1]; 
-                break; 
-            case 3: 
-                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; 
-                break; 
-            } 
- 
-            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */ 
-                if (litSize+lhSize > srcSize) return ERROR(corruption_detected); 
-                memcpy(dctx->litBuffer, istart+lhSize, litSize); 
-                dctx->litPtr = dctx->litBuffer; 
-                dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8; 
-                dctx->litSize = litSize; 
-                return lhSize+litSize; 
-            } 
-            /* direct reference into compressed stream */ 
-            dctx->litPtr = istart+lhSize; 
-            dctx->litBufSize = srcSize-lhSize; 
-            dctx->litSize = litSize; 
-            return lhSize+litSize; 
-        } 
-    case IS_RLE: 
-        {   size_t litSize; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            switch(lhSize) 
-            { 
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */ 
-                lhSize = 1; 
-                litSize = istart[0] & 31; 
-                break; 
-            case 2: 
-                litSize = ((istart[0] & 15) << 8) + istart[1]; 
-                break; 
-            case 3: 
-                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; 
-                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ 
-                break; 
-            } 
-            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); 
-            memset(dctx->litBuffer, istart[lhSize], litSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH; 
-            dctx->litSize = litSize; 
-            return lhSize+1; 
-        } 
-    default: 
-        return ERROR(corruption_detected);   /* impossible */ 
-    } 
-} 
- 
- 
-/*! ZSTD_buildSeqTable() : 
-    @return : nb bytes read from src, 
-              or an error code if it fails, testable with ZSTD_isError() 
-*/ 
-FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, 
-                                 const void* src, size_t srcSize, 
-                                 const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable) 
-{ 
-    switch(type) 
-    { 
-    case FSE_ENCODING_RLE : 
-        if (!srcSize) return ERROR(srcSize_wrong); 
-        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); 
-        FSE_buildDTable_rle(DTable, *(const BYTE*)src);   /* if *src > max, data is corrupted */ 
-        return 1; 
-    case FSE_ENCODING_RAW : 
-        FSE_buildDTable(DTable, defaultNorm, max, defaultLog); 
-        return 0; 
-    case FSE_ENCODING_STATIC: 
-        if (!flagRepeatTable) return ERROR(corruption_detected); 
-        return 0; 
-    default :   /* impossible */ 
-    case FSE_ENCODING_DYNAMIC : 
-        {   U32 tableLog; 
-            S16 norm[MaxSeq+1]; 
-            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); 
-            if (FSE_isError(headerSize)) return ERROR(corruption_detected); 
-            if (tableLog > maxLog) return ERROR(corruption_detected); 
-            FSE_buildDTable(DTable, norm, max, tableLog); 
-            return headerSize; 
-    }   } 
-} 
- 
- 
-size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr, 
-                             FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 flagRepeatTable, 
-                             const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* const iend = istart + srcSize; 
-    const BYTE* ip = istart; 
- 
-    /* check */ 
-    if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); 
- 
-    /* SeqHead */ 
-    {   int nbSeq = *ip++; 
-        if (!nbSeq) { *nbSeqPtr=0; return 1; } 
-        if (nbSeq > 0x7F) { 
-            if (nbSeq == 0xFF) 
-                nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; 
-            else 
-                nbSeq = ((nbSeq-0x80)<<8) + *ip++; 
-        } 
-        *nbSeqPtr = nbSeq; 
-    } 
- 
-    /* FSE table descriptors */ 
-    {   U32 const LLtype  = *ip >> 6; 
-        U32 const Offtype = (*ip >> 4) & 3; 
-        U32 const MLtype  = (*ip >> 2) & 3; 
-        ip++; 
- 
-        /* check */ 
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 
- 
-        /* Build DTables */ 
-        {   size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); 
-            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); 
-            ip += bhSize; 
-        } 
-        {   size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable); 
-            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); 
-            ip += bhSize; 
-        } 
-        {   size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable); 
-            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); 
-            ip += bhSize; 
-    }   } 
- 
-    return ip-istart; 
-} 
- 
- 
-typedef struct { 
-    size_t litLength; 
-    size_t matchLength; 
-    size_t offset; 
-} seq_t; 
- 
-typedef struct { 
-    BIT_DStream_t DStream; 
-    FSE_DState_t stateLL; 
-    FSE_DState_t stateOffb; 
-    FSE_DState_t stateML; 
-    size_t prevOffset[ZSTD_REP_INIT]; 
-} seqState_t; 
- 
- 
- 
-static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 
-{ 
-    /* Literal length */ 
-    U32 const llCode = FSE_peekSymbol(&(seqState->stateLL)); 
-    U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); 
-    U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */ 
- 
-    U32 const llBits = LL_bits[llCode]; 
-    U32 const mlBits = ML_bits[mlCode]; 
-    U32 const ofBits = ofCode; 
-    U32 const totalBits = llBits+mlBits+ofBits; 
- 
-    static const U32 LL_base[MaxLL+1] = { 
-                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15, 
-                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 
-                            0x2000, 0x4000, 0x8000, 0x10000 }; 
- 
-    static const U32 ML_base[MaxML+1] = { 
-                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,   11,    12,    13,    14,    15, 
-                            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,   27,    28,    29,    30,    31, 
-                            32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, 
-                            0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; 
- 
-    static const U32 OF_base[MaxOff+1] = { 
-                 0,        1,       3,       7,     0xF,     0x1F,     0x3F,     0x7F, 
-                 0xFF,   0x1FF,   0x3FF,   0x7FF,   0xFFF,   0x1FFF,   0x3FFF,   0x7FFF, 
-                 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 
-                 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 }; 
- 
-    /* sequence */ 
-    {   size_t offset; 
-        if (!ofCode) 
-            offset = 0; 
-        else { 
-            offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits);   /* <=  26 bits */ 
-            if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        } 
- 
-        if (offset < ZSTD_REP_NUM) { 
-            if (llCode == 0 && offset <= 1) offset = 1-offset; 
- 
-            if (offset != 0) { 
-                size_t temp = seqState->prevOffset[offset]; 
-                if (offset != 1) { 
-                    seqState->prevOffset[2] = seqState->prevOffset[1]; 
-                } 
-                seqState->prevOffset[1] = seqState->prevOffset[0]; 
-                seqState->prevOffset[0] = offset = temp; 
- 
-            } else { 
-                offset = seqState->prevOffset[0]; 
-            } 
-        } else { 
-            offset -= ZSTD_REP_MOVE; 
-            seqState->prevOffset[2] = seqState->prevOffset[1]; 
-            seqState->prevOffset[1] = seqState->prevOffset[0]; 
-            seqState->prevOffset[0] = offset; 
-        } 
-        seq->offset = offset; 
-    } 
- 
-    seq->matchLength = ML_base[mlCode] + MINMATCH + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */ 
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); 
- 
-    seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0);   /* <=  16 bits */ 
-    if (MEM_32bits() || 
-       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); 
- 
-    /* ANS state update */ 
-    FSE_updateState(&(seqState->stateLL), &(seqState->DStream));   /* <=  9 bits */ 
-    FSE_updateState(&(seqState->stateML), &(seqState->DStream));   /* <=  9 bits */ 
-    if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));     /* <= 18 bits */ 
-    FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <=  8 bits */ 
-} 
- 
- 
+}
+
+
+/** ZSTD_getFrameParams() :
+*   decode Frame Header, or provide expected `srcSize`.
+*   @return : 0, `fparamsPtr` is correctly filled,
+*            >0, `srcSize` is too small, result is expected `srcSize`,
+*             or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_min;
+    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
+      if (srcSize < fhsize) return fhsize; }
+
+    memset(fparamsPtr, 0, sizeof(*fparamsPtr));
+    {   BYTE const frameDesc = ip[4];
+        fparamsPtr->windowLog = (frameDesc & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+        if ((frameDesc & 0x20) != 0) return ERROR(frameParameter_unsupported);   /* reserved 1 bit */
+        switch(frameDesc >> 6)  /* fcsId */
+        {
+            default:   /* impossible */
+            case 0 : fparamsPtr->frameContentSize = 0; break;
+            case 1 : fparamsPtr->frameContentSize = ip[5]; break;
+            case 2 : fparamsPtr->frameContentSize = MEM_readLE16(ip+5)+256; break;
+            case 3 : fparamsPtr->frameContentSize = MEM_readLE64(ip+5); break;
+    }   }
+    return 0;
+}
+
+
+/** ZSTD_decodeFrameHeader() :
+*   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
+*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize);
+    if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+/*! ZSTD_getcBlockSize() :
+*   Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    U32 cSize;
+
+    if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    bpPtr->blockType = (blockType_t)((*in) >> 6);
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTD_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(istart[0]>> 6)
+    {
+    case IS_HUF:
+        {   size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+
+            if (HUF_isError(singleStream ?
+                            HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUF_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8;
+            dctx->litSize = litSize;
+            return litCSize + lhSize;
+        }
+    case IS_PCH:
+        {   size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (!dctx->flagRepeatTable)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+
+            {   size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
+                if (HUF_isError(errorCode)) return ERROR(corruption_detected);
+            }
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
+            dctx->litSize = litSize;
+            return litCSize + lhSize;
+        }
+    case IS_RAW:
+        {   size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+8;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litBufSize = srcSize-lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case IS_RLE:
+        {   size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                break;
+            }
+            if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = ZSTD_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+/*! ZSTD_buildSeqTable() :
+    @return : nb bytes read from src,
+              or an error code if it fails, testable with ZSTD_isError()
+*/
+FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
+{
+    switch(type)
+    {
+    case FSE_ENCODING_RLE :
+        if (!srcSize) return ERROR(srcSize_wrong);
+        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
+        FSE_buildDTable_rle(DTable, *(const BYTE*)src);   /* if *src > max, data is corrupted */
+        return 1;
+    case FSE_ENCODING_RAW :
+        FSE_buildDTable(DTable, defaultNorm, max, defaultLog);
+        return 0;
+    case FSE_ENCODING_STATIC:
+        if (!flagRepeatTable) return ERROR(corruption_detected);
+        return 0;
+    default :   /* impossible */
+    case FSE_ENCODING_DYNAMIC :
+        {   U32 tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            if (FSE_isError(headerSize)) return ERROR(corruption_detected);
+            if (tableLog > maxLog) return ERROR(corruption_detected);
+            FSE_buildDTable(DTable, norm, max, tableLog);
+            return headerSize;
+    }   }
+}
+
+
+size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
+                             FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 flagRepeatTable,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    {   int nbSeq = *ip++;
+        if (!nbSeq) { *nbSeqPtr=0; return 1; }
+        if (nbSeq > 0x7F) {
+            if (nbSeq == 0xFF)
+                nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+            else
+                nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+        *nbSeqPtr = nbSeq;
+    }
+
+    /* FSE table descriptors */
+    {   U32 const LLtype  = *ip >> 6;
+        U32 const Offtype = (*ip >> 4) & 3;
+        U32 const MLtype  = (*ip >> 2) & 3;
+        ip++;
+
+        /* check */
+        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+        /* Build DTables */
+        {   size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
+            ip += bhSize;
+        }
+        {   size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
+            ip += bhSize;
+        }
+        {   size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
+            ip += bhSize;
+    }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset[ZSTD_REP_INIT];
+} seqState_t;
+
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    /* Literal length */
+    U32 const llCode = FSE_peekSymbol(&(seqState->stateLL));
+    U32 const mlCode = FSE_peekSymbol(&(seqState->stateML));
+    U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+
+    U32 const llBits = LL_bits[llCode];
+    U32 const mlBits = ML_bits[mlCode];
+    U32 const ofBits = ofCode;
+    U32 const totalBits = llBits+mlBits+ofBits;
+
+    static const U32 LL_base[MaxLL+1] = {
+                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9,   10,    11,    12,    13,    14,     15,
+                            16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                            0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 ML_base[MaxML+1] = {
+                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,   11,    12,    13,    14,    15,
+                            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,   27,    28,    29,    30,    31,
+                            32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
+                            0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
+
+    static const U32 OF_base[MaxOff+1] = {
+                 0,        1,       3,       7,     0xF,     0x1F,     0x3F,     0x7F,
+                 0xFF,   0x1FF,   0x3FF,   0x7FF,   0xFFF,   0x1FFF,   0x3FFF,   0x7FFF,
+                 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
+                 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 };
+
+    /* sequence */
+    {   size_t offset;
+        if (!ofCode)
+            offset = 0;
+        else {
+            offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits);   /* <=  26 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        }
+
+        if (offset < ZSTD_REP_NUM) {
+            if (llCode == 0 && offset <= 1) offset = 1-offset;
+
+            if (offset != 0) {
+                size_t temp = seqState->prevOffset[offset];
+                if (offset != 1) {
+                    seqState->prevOffset[2] = seqState->prevOffset[1];
+                }
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset = temp;
+
+            } else {
+                offset = seqState->prevOffset[0];
+            }
+        } else {
+            offset -= ZSTD_REP_MOVE;
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        }
+        seq->offset = offset;
+    }
+
+    seq->matchLength = ML_base[mlCode] + MINMATCH + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream));
+
+    seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0);   /* <=  16 bits */
+    if (MEM_32bits() ||
+       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream));
+
+    /* ANS state update */
+    FSE_updateState(&(seqState->stateLL), &(seqState->DStream));   /* <=  9 bits */
+    FSE_updateState(&(seqState->stateML), &(seqState->DStream));   /* <=  9 bits */
+    if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));     /* <= 18 bits */
+    FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <=  8 bits */
+}
+
+
 FORCE_INLINE
 size_t ZSTD_execSequence(BYTE* op,
-                                BYTE* const oend, seq_t sequence, 
-                                const BYTE** litPtr, const BYTE* const litLimit_8, 
-                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) 
-{ 
-    BYTE* const oLitEnd = op + sequence.litLength; 
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength; 
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */ 
-    BYTE* const oend_8 = oend-8; 
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength; 
-    const BYTE* match = oLitEnd - sequence.offset; 
- 
-    /* check */ 
-    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */ 
-    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */ 
-    if (iLitEnd > litLimit_8) return ERROR(corruption_detected);   /* over-read beyond lit buffer */ 
- 
-    /* copy Literals */ 
-    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ 
-    op = oLitEnd; 
-    *litPtr = iLitEnd;   /* update for next sequence */ 
- 
-    /* copy Match */ 
-    if (sequence.offset > (size_t)(oLitEnd - base)) { 
-        /* offset beyond prefix */ 
-        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); 
-        match = dictEnd - (base-match); 
-        if (match + sequence.matchLength <= dictEnd) { 
-            memmove(oLitEnd, match, sequence.matchLength); 
-            return sequenceLength; 
-        } 
-        /* span extDict & currentPrefixSegment */ 
-        {   size_t const length1 = dictEnd - match; 
-            memmove(oLitEnd, match, length1); 
-            op = oLitEnd + length1; 
-            sequence.matchLength -= length1; 
-            match = base; 
-    }   } 
- 
-    /* match within prefix */ 
-    if (sequence.offset < 8) { 
-        /* close range match, overlap */ 
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */ 
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */ 
-        int const sub2 = dec64table[sequence.offset]; 
-        op[0] = match[0]; 
-        op[1] = match[1]; 
-        op[2] = match[2]; 
-        op[3] = match[3]; 
-        match += dec32table[sequence.offset]; 
-        ZSTD_copy4(op+4, match); 
-        match -= sub2; 
-    } else { 
-        ZSTD_copy8(op, match); 
-    } 
-    op += 8; match += 8; 
- 
-    if (oMatchEnd > oend-(16-MINMATCH)) { 
-        if (op < oend_8) { 
-            ZSTD_wildcopy(op, match, oend_8 - op); 
-            match += oend_8 - op; 
-            op = oend_8; 
-        } 
-        while (op < oMatchEnd) *op++ = *match++; 
-    } else { 
-        ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */ 
-    } 
-    return sequenceLength; 
-} 
- 
- 
-static size_t ZSTD_decompressSequences( 
-                               ZSTD_DCtx* dctx, 
-                               void* dst, size_t maxDstSize, 
-                         const void* seqStart, size_t seqSize) 
-{ 
-    const BYTE* ip = (const BYTE*)seqStart; 
-    const BYTE* const iend = ip + seqSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* const oend = ostart + maxDstSize; 
-    BYTE* op = ostart; 
-    const BYTE* litPtr = dctx->litPtr; 
-    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; 
-    const BYTE* const litEnd = litPtr + dctx->litSize; 
-    FSE_DTable* DTableLL = dctx->LLTable; 
-    FSE_DTable* DTableML = dctx->MLTable; 
-    FSE_DTable* DTableOffb = dctx->OffTable; 
-    const BYTE* const base = (const BYTE*) (dctx->base); 
-    const BYTE* const vBase = (const BYTE*) (dctx->vBase); 
-    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 
-    int nbSeq; 
- 
-    /* Build Decoding Tables */ 
-    {   size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->flagRepeatTable, ip, seqSize); 
-        if (ZSTD_isError(seqHSize)) return seqHSize; 
-        ip += seqHSize; 
-        dctx->flagRepeatTable = 0; 
-    } 
- 
-    /* Regen sequences */ 
-    if (nbSeq) { 
-        seq_t sequence; 
-        seqState_t seqState; 
- 
-        memset(&sequence, 0, sizeof(sequence)); 
-        sequence.offset = REPCODE_STARTVALUE; 
-        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) seqState.prevOffset[i] = REPCODE_STARTVALUE; } 
-        { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); 
-          if (ERR_isError(errorCode)) return ERROR(corruption_detected); } 
-        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 
-        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 
-        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 
- 
-        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { 
-            nbSeq--; 
-            ZSTD_decodeSequence(&sequence, &seqState); 
- 
-#if 0  /* debug */ 
-            static BYTE* start = NULL; 
-            if (start==NULL) start = op; 
-            size_t pos = (size_t)(op-start); 
-            if ((pos >= 5810037) && (pos < 5810400)) 
-                printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n", 
-                       pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); 
-#endif 
- 
-            {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); 
-                if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 
-                op += oneSeqSize; 
-        }   } 
- 
-        /* check if reached exact end */ 
-        if (nbSeq) return ERROR(corruption_detected); 
-    } 
- 
-    /* last literal segment */ 
-    {   size_t const lastLLSize = litEnd - litPtr; 
-        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */ 
-        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 
-        memcpy(op, litPtr, lastLLSize); 
-        op += lastLLSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) 
-{ 
-    if (dst != dctx->previousDstEnd) {   /* not contiguous */ 
-        dctx->dictEnd = dctx->previousDstEnd; 
-        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); 
-        dctx->base = dst; 
-        dctx->previousDstEnd = dst; 
-    } 
-} 
- 
- 
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 
-                            void* dst, size_t dstCapacity, 
-                      const void* src, size_t srcSize) 
-{   /* blockType == blockCompressed */ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); 
- 
-    /* Decode literals sub-block */ 
-    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); 
-        if (ZSTD_isError(litCSize)) return litCSize; 
-        ip += litCSize; 
-        srcSize -= litCSize; 
-    } 
-    return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); 
-} 
- 
- 
-size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, 
-                            void* dst, size_t dstCapacity, 
-                      const void* src, size_t srcSize) 
-{ 
-    ZSTD_checkContinuity(dctx, dst); 
-    return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); 
-} 
- 
- 
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (iLitEnd > litLimit_8) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+    }   }
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        int const sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTD_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTD_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-(16-MINMATCH)) {
+        if (op < oend_8) {
+            ZSTD_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    } else {
+        ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTD_decompressSequences(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    FSE_DTable* DTableLL = dctx->LLTable;
+    FSE_DTable* DTableML = dctx->MLTable;
+    FSE_DTable* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    int nbSeq;
+
+    /* Build Decoding Tables */
+    {   size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->flagRepeatTable, ip, seqSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        dctx->flagRepeatTable = 0;
+    }
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = REPCODE_STARTVALUE;
+        { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) seqState.prevOffset[i] = REPCODE_STARTVALUE; }
+        { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+          if (ERR_isError(errorCode)) return ERROR(corruption_detected); }
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+
+#if 0  /* debug */
+            static BYTE* start = NULL;
+            if (start==NULL) start = op;
+            size_t pos = (size_t)(op-start);
+            if ((pos >= 5810037) && (pos < 5810400))
+                printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+                       pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
+#endif
+
+            {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                op += oneSeqSize;
+        }   }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+    return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTD_checkContinuity(dctx, dst);
+    return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
 /*! ZSTD_decompressFrame() :
 *   `dctx` must be properly initialized */
-static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, 
-                                 void* dst, size_t dstCapacity, 
-                                 const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
     const BYTE* const iend = ip + srcSize;
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + dstCapacity; 
-    size_t remainingSize = srcSize; 
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstCapacity;
+    size_t remainingSize = srcSize;
     blockProperties_t blockProperties = { bt_compressed, 0 };
- 
-    /* check */ 
-    if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
- 
-    /* Frame Header */ 
-    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); 
-        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; 
-        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
-        if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected); 
-        ip += frameHeaderSize; remainingSize -= frameHeaderSize; 
-    } 
- 
-    /* Loop on each block */ 
-    while (1) { 
-        size_t decodedSize=0; 
-        size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); 
-        if (ZSTD_isError(cBlockSize)) return cBlockSize; 
- 
-        ip += ZSTD_blockHeaderSize; 
-        remainingSize -= ZSTD_blockHeaderSize; 
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); 
- 
-        switch(blockProperties.blockType) 
-        { 
-        case bt_compressed: 
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_raw : 
-            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet supported */ 
-            break; 
-        case bt_end : 
-            /* end of frame */ 
-            if (remainingSize) return ERROR(srcSize_wrong); 
-            break; 
-        default: 
-            return ERROR(GENERIC);   /* impossible */ 
-        } 
-        if (cBlockSize == 0) break;   /* bt_end */ 
- 
-        if (ZSTD_isError(decodedSize)) return decodedSize; 
-        op += decodedSize; 
-        ip += cBlockSize; 
-        remainingSize -= cBlockSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx, 
-                                         void* dst, size_t dstCapacity, 
-                                   const void* src, size_t srcSize) 
-{ 
-    ZSTD_copyDCtx(dctx, refDCtx); 
-    ZSTD_checkContinuity(dctx, dst); 
-    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); 
-} 
- 
- 
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, 
-                                 void* dst, size_t dstCapacity, 
-                                 const void* src, size_t srcSize, 
-                                 const void* dict, size_t dictSize) 
-{ 
+
+    /* check */
+    if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        if (ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize)) return ERROR(corruption_detected);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize=0;
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx,
+                                         void* dst, size_t dstCapacity,
+                                   const void* src, size_t srcSize)
+{
+    ZSTD_copyDCtx(dctx, refDCtx);
+    ZSTD_checkContinuity(dctx, dst);
+    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
     {   const U32 magicNumber = MEM_readLE32(src);
         if (ZSTD_isLegacy(magicNumber))
             return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize, magicNumber);
     }
 #endif
-    ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); 
-    ZSTD_checkContinuity(dctx, dst); 
-    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); 
-} 
- 
- 
-size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-    return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); 
-} 
- 
- 
-size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) 
-{ 
-#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) 
-    size_t regenSize; 
-    ZSTD_DCtx* dctx = ZSTD_createDCtx(); 
-    if (dctx==NULL) return ERROR(memory_allocation); 
-    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); 
-    ZSTD_freeDCtx(dctx); 
-    return regenSize; 
+    ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTD_checkContinuity(dctx, dst);
+    return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    size_t regenSize;
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
 #else   /* stack mode */
-    ZSTD_DCtx dctx; 
-    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); 
-#endif 
-} 
- 
- 
-/*_****************************** 
-*  Streaming Decompression API 
-********************************/ 
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) 
-{ 
-    return dctx->expected; 
-} 
- 
+    ZSTD_DCtx dctx;
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*_******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{ 
-    /* Sanity check */ 
-    if (srcSize != dctx->expected) return ERROR(srcSize_wrong); 
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
     if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
- 
-    /* Decompress : frame header; part 1 */ 
-    switch (dctx->stage) 
-    { 
-    case ZSTDds_getFrameHeaderSize : 
+
+    /* Decompress : frame header; part 1 */
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
         if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
         dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min);
         if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
@@ -985,79 +985,79 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_min;
             dctx->stage = ZSTDds_decodeFrameHeader;
             return 0;
-        } 
+        }
         dctx->expected = 0;   /* not necessary to copy more */
 
-    case ZSTDds_decodeFrameHeader: 
+    case ZSTDds_decodeFrameHeader:
         {   size_t result;
-            memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); 
-            result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); 
-            if (ZSTD_isError(result)) return result; 
-            dctx->expected = ZSTD_blockHeaderSize; 
-            dctx->stage = ZSTDds_decodeBlockHeader; 
-            return 0; 
-        } 
-    case ZSTDds_decodeBlockHeader: 
+            memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected);
+            result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize);
+            if (ZSTD_isError(result)) return result;
+            dctx->expected = ZSTD_blockHeaderSize;
+            dctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
         {   blockProperties_t bp;
-            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 
-            if (ZSTD_isError(cBlockSize)) return cBlockSize; 
-            if (bp.blockType == bt_end) { 
-                dctx->expected = 0; 
-                dctx->stage = ZSTDds_getFrameHeaderSize; 
-            } else { 
-                dctx->expected = cBlockSize; 
-                dctx->bType = bp.blockType; 
-                dctx->stage = ZSTDds_decompressBlock; 
-            } 
-            return 0; 
-        } 
-    case ZSTDds_decompressBlock: 
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            if (bp.blockType == bt_end) {
+                dctx->expected = 0;
+                dctx->stage = ZSTDds_getFrameHeaderSize;
+            } else {
+                dctx->expected = cBlockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDds_decompressBlock:
         {   size_t rSize;
-            switch(dctx->bType) 
-            { 
-            case bt_compressed: 
+            switch(dctx->bType)
+            {
+            case bt_compressed:
                 rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
-                break; 
-            case bt_raw : 
+                break;
+            case bt_raw :
                 rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
-                break; 
-            case bt_rle : 
-                return ERROR(GENERIC);   /* not yet handled */ 
-                break; 
-            case bt_end :   /* should never happen (filtered at phase 1) */ 
-                rSize = 0; 
-                break; 
-            default: 
-                return ERROR(GENERIC);   /* impossible */ 
-            } 
-            dctx->stage = ZSTDds_decodeBlockHeader; 
-            dctx->expected = ZSTD_blockHeaderSize; 
-            dctx->previousDstEnd = (char*)dst + rSize; 
-            return rSize; 
-        } 
-    default: 
-        return ERROR(GENERIC);   /* impossible */ 
-    } 
-} 
- 
- 
-static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    dctx->dictEnd = dctx->previousDstEnd; 
-    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); 
-    dctx->base = dict; 
-    dctx->previousDstEnd = (const char*)dict + dictSize; 
-} 
- 
-static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDds_decodeBlockHeader;
+            dctx->expected = ZSTD_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
     size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize;
- 
-    hSize = HUF_readDTableX4(dctx->hufTableX4, dict, dictSize); 
-    if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); 
-    dict = (const char*)dict + hSize; 
-    dictSize -= hSize; 
- 
+
+    hSize = HUF_readDTableX4(dctx->hufTableX4, dict, dictSize);
+    if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + hSize;
+    dictSize -= hSize;
+
     {   short offcodeNCount[MaxOff+1];
         U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
         offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
@@ -1067,7 +1067,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz
         dict = (const char*)dict + offcodeHeaderSize;
         dictSize -= offcodeHeaderSize;
     }
- 
+
     {   short matchlengthNCount[MaxML+1];
         unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
         matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
@@ -1077,7 +1077,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz
         dict = (const char*)dict + matchlengthHeaderSize;
         dictSize -= matchlengthHeaderSize;
     }
- 
+
     {   short litlengthNCount[MaxLL+1];
         unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
         litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
@@ -1085,45 +1085,45 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz
         { size_t const errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
     }
- 
-    dctx->flagRepeatTable = 1; 
-    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; 
-} 
- 
-static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    size_t eSize; 
-    U32 const magic = MEM_readLE32(dict); 
-    if (magic != ZSTD_DICT_MAGIC) { 
-        /* pure content mode */ 
-        ZSTD_refDictContent(dctx, dict, dictSize); 
-        return 0; 
-    } 
-    /* load entropy tables */ 
-    dict = (const char*)dict + 4; 
-    dictSize -= 4; 
-    eSize = ZSTD_loadEntropy(dctx, dict, dictSize); 
-    if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); 
- 
-    /* reference dictionary content */ 
-    dict = (const char*)dict + eSize; 
-    dictSize -= eSize; 
-    ZSTD_refDictContent(dctx, dict, dictSize); 
- 
-    return 0; 
-} 
- 
- 
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    { size_t const errorCode = ZSTD_decompressBegin(dctx); 
-      if (ZSTD_isError(errorCode)) return errorCode; } 
- 
-    if (dict && dictSize) { 
-        size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); 
-        if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted); 
-    } 
- 
-    return 0; 
-} 
- 
+
+    dctx->flagRepeatTable = 1;
+    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t eSize;
+    U32 const magic = MEM_readLE32(dict);
+    if (magic != ZSTD_DICT_MAGIC) {
+        /* pure content mode */
+        ZSTD_refDictContent(dctx, dict, dictSize);
+        return 0;
+    }
+    /* load entropy tables */
+    dict = (const char*)dict + 4;
+    dictSize -= 4;
+    eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
+    if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
+
+    /* reference dictionary content */
+    dict = (const char*)dict + eSize;
+    dictSize -= eSize;
+    ZSTD_refDictContent(dctx, dict, dictSize);
+
+    return 0;
+}
+
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    { size_t const errorCode = ZSTD_decompressBegin(dctx);
+      if (ZSTD_isError(errorCode)) return errorCode; }
+
+    if (dict && dictSize) {
+        size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
diff --git a/contrib/libs/zstd06/dictBuilder/divsufsort.c b/contrib/libs/zstd06/dictBuilder/divsufsort.c
index 17116166b4..60cceb0883 100644
--- a/contrib/libs/zstd06/dictBuilder/divsufsort.c
+++ b/contrib/libs/zstd06/dictBuilder/divsufsort.c
@@ -1,1913 +1,1913 @@
-/* 
- * divsufsort.c for libdivsufsort-lite 
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 
- * 
- * Permission is hereby granted, free of charge, to any person 
- * obtaining a copy of this software and associated documentation 
- * files (the "Software"), to deal in the Software without 
- * restriction, including without limitation the rights to use, 
- * copy, modify, merge, publish, distribute, sublicense, and/or sell 
- * copies of the Software, and to permit persons to whom the 
- * Software is furnished to do so, subject to the following 
- * conditions: 
- * 
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software. 
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
- * OTHER DEALINGS IN THE SOFTWARE. 
- */ 
- 
-/*- Compiler specifics -*/ 
-#ifdef __clang__ 
-#pragma clang diagnostic ignored "-Wshorten-64-to-32" 
-#endif 
- 
-#if defined(_MSC_VER) 
-#  pragma warning(disable : 4244) 
-#  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */ 
-#endif 
- 
- 
-/*- Dependencies -*/ 
-#include <assert.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
- 
-#include "divsufsort.h" 
- 
-/*- Constants -*/ 
-#if defined(INLINE) 
-# undef INLINE 
-#endif 
-#if !defined(INLINE) 
-# define INLINE __inline 
-#endif 
-#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) 
-# undef ALPHABET_SIZE 
-#endif 
-#if !defined(ALPHABET_SIZE) 
-# define ALPHABET_SIZE (256) 
-#endif 
-#define BUCKET_A_SIZE (ALPHABET_SIZE) 
-#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) 
-#if defined(SS_INSERTIONSORT_THRESHOLD) 
-# if SS_INSERTIONSORT_THRESHOLD < 1 
-#  undef SS_INSERTIONSORT_THRESHOLD 
-#  define SS_INSERTIONSORT_THRESHOLD (1) 
-# endif 
-#else 
-# define SS_INSERTIONSORT_THRESHOLD (8) 
-#endif 
-#if defined(SS_BLOCKSIZE) 
-# if SS_BLOCKSIZE < 0 
-#  undef SS_BLOCKSIZE 
-#  define SS_BLOCKSIZE (0) 
-# elif 32768 <= SS_BLOCKSIZE 
-#  undef SS_BLOCKSIZE 
-#  define SS_BLOCKSIZE (32767) 
-# endif 
-#else 
-# define SS_BLOCKSIZE (1024) 
-#endif 
-/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ 
-#if SS_BLOCKSIZE == 0 
-# define SS_MISORT_STACKSIZE (96) 
-#elif SS_BLOCKSIZE <= 4096 
-# define SS_MISORT_STACKSIZE (16) 
-#else 
-# define SS_MISORT_STACKSIZE (24) 
-#endif 
-#define SS_SMERGE_STACKSIZE (32) 
-#define TR_INSERTIONSORT_THRESHOLD (8) 
-#define TR_STACKSIZE (64) 
- 
- 
-/*- Macros -*/ 
-#ifndef SWAP 
-# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) 
-#endif /* SWAP */ 
-#ifndef MIN 
-# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) 
-#endif /* MIN */ 
-#ifndef MAX 
-# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) 
-#endif /* MAX */ 
-#define STACK_PUSH(_a, _b, _c, _d)\ 
-  do {\ 
-    assert(ssize < STACK_SIZE);\ 
-    stack[ssize].a = (_a), stack[ssize].b = (_b),\ 
-    stack[ssize].c = (_c), stack[ssize++].d = (_d);\ 
-  } while(0) 
-#define STACK_PUSH5(_a, _b, _c, _d, _e)\ 
-  do {\ 
-    assert(ssize < STACK_SIZE);\ 
-    stack[ssize].a = (_a), stack[ssize].b = (_b),\ 
-    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ 
-  } while(0) 
-#define STACK_POP(_a, _b, _c, _d)\ 
-  do {\ 
-    assert(0 <= ssize);\ 
-    if(ssize == 0) { return; }\ 
-    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 
-    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ 
-  } while(0) 
-#define STACK_POP5(_a, _b, _c, _d, _e)\ 
-  do {\ 
-    assert(0 <= ssize);\ 
-    if(ssize == 0) { return; }\ 
-    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 
-    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ 
-  } while(0) 
-#define BUCKET_A(_c0) bucket_A[(_c0)] 
-#if ALPHABET_SIZE == 256 
-#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) 
-#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) 
-#else 
-#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) 
-#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) 
-#endif 
- 
- 
-/*- Private Functions -*/ 
- 
-static const int lg_table[256]= { 
- -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 
-  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 
-  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 
-  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 
-}; 
- 
-#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 
- 
-static INLINE 
-int 
-ss_ilg(int n) { 
-#if SS_BLOCKSIZE == 0 
-  return (n & 0xffff0000) ? 
-          ((n & 0xff000000) ? 
-            24 + lg_table[(n >> 24) & 0xff] : 
-            16 + lg_table[(n >> 16) & 0xff]) : 
-          ((n & 0x0000ff00) ? 
-             8 + lg_table[(n >>  8) & 0xff] : 
-             0 + lg_table[(n >>  0) & 0xff]); 
-#elif SS_BLOCKSIZE < 256 
-  return lg_table[n]; 
-#else 
-  return (n & 0xff00) ? 
-          8 + lg_table[(n >> 8) & 0xff] : 
-          0 + lg_table[(n >> 0) & 0xff]; 
-#endif 
-} 
- 
-#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 
- 
-#if SS_BLOCKSIZE != 0 
- 
-static const int sqq_table[256] = { 
-  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61, 
- 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89, 
- 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109, 
-110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 
-128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 
-143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 
-156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 
-169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 
-181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 
-192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 
-202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 
-212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 
-221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 
-230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 
-239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 
-247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 
-}; 
- 
-static INLINE 
-int 
-ss_isqrt(int x) { 
-  int y, e; 
- 
-  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } 
-  e = (x & 0xffff0000) ? 
-        ((x & 0xff000000) ? 
-          24 + lg_table[(x >> 24) & 0xff] : 
-          16 + lg_table[(x >> 16) & 0xff]) : 
-        ((x & 0x0000ff00) ? 
-           8 + lg_table[(x >>  8) & 0xff] : 
-           0 + lg_table[(x >>  0) & 0xff]); 
- 
-  if(e >= 16) { 
-    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); 
-    if(e >= 24) { y = (y + 1 + x / y) >> 1; } 
-    y = (y + 1 + x / y) >> 1; 
-  } else if(e >= 8) { 
-    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; 
-  } else { 
-    return sqq_table[x] >> 4; 
-  } 
- 
-  return (x < (y * y)) ? y - 1 : y; 
-} 
- 
-#endif /* SS_BLOCKSIZE != 0 */ 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Compares two suffixes. */ 
-static INLINE 
-int 
-ss_compare(const unsigned char *T, 
-           const int *p1, const int *p2, 
-           int depth) { 
-  const unsigned char *U1, *U2, *U1n, *U2n; 
- 
-  for(U1 = T + depth + *p1, 
-      U2 = T + depth + *p2, 
-      U1n = T + *(p1 + 1) + 2, 
-      U2n = T + *(p2 + 1) + 2; 
-      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); 
-      ++U1, ++U2) { 
-  } 
- 
-  return U1 < U1n ? 
-        (U2 < U2n ? *U1 - *U2 : 1) : 
-        (U2 < U2n ? -1 : 0); 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) 
- 
-/* Insertionsort for small size groups */ 
-static 
-void 
-ss_insertionsort(const unsigned char *T, const int *PA, 
-                 int *first, int *last, int depth) { 
-  int *i, *j; 
-  int t; 
-  int r; 
- 
-  for(i = last - 2; first <= i; --i) { 
-    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { 
-      do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); 
-      if(last <= j) { break; } 
-    } 
-    if(r == 0) { *j = ~*j; } 
-    *(j - 1) = t; 
-  } 
-} 
- 
-#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 
- 
-static INLINE 
-void 
-ss_fixdown(const unsigned char *Td, const int *PA, 
-           int *SA, int i, int size) { 
-  int j, k; 
-  int v; 
-  int c, d, e; 
- 
-  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 
-    d = Td[PA[SA[k = j++]]]; 
-    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } 
-    if(d <= c) { break; } 
-  } 
-  SA[i] = v; 
-} 
- 
-/* Simple top-down heapsort. */ 
-static 
-void 
-ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { 
-  int i, m; 
-  int t; 
- 
-  m = size; 
-  if((size % 2) == 0) { 
-    m--; 
-    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } 
-  } 
- 
-  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } 
-  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } 
-  for(i = m - 1; 0 < i; --i) { 
-    t = SA[0], SA[0] = SA[i]; 
-    ss_fixdown(Td, PA, SA, 0, i); 
-    SA[i] = t; 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Returns the median of three elements. */ 
-static INLINE 
-int * 
-ss_median3(const unsigned char *Td, const int *PA, 
-           int *v1, int *v2, int *v3) { 
-  int *t; 
-  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } 
-  if(Td[PA[*v2]] > Td[PA[*v3]]) { 
-    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } 
-    else { return v3; } 
-  } 
-  return v2; 
-} 
- 
-/* Returns the median of five elements. */ 
-static INLINE 
-int * 
-ss_median5(const unsigned char *Td, const int *PA, 
-           int *v1, int *v2, int *v3, int *v4, int *v5) { 
-  int *t; 
-  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } 
-  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } 
-  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } 
-  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } 
-  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } 
-  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } 
-  return v3; 
-} 
- 
-/* Returns the pivot element. */ 
-static INLINE 
-int * 
-ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { 
-  int *middle; 
-  int t; 
- 
-  t = last - first; 
-  middle = first + t / 2; 
- 
-  if(t <= 512) { 
-    if(t <= 32) { 
-      return ss_median3(Td, PA, first, middle, last - 1); 
-    } else { 
-      t >>= 2; 
-      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); 
-    } 
-  } 
-  t >>= 3; 
-  first  = ss_median3(Td, PA, first, first + t, first + (t << 1)); 
-  middle = ss_median3(Td, PA, middle - t, middle, middle + t); 
-  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); 
-  return ss_median3(Td, PA, first, middle, last); 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Binary partition for substrings. */ 
-static INLINE 
-int * 
-ss_partition(const int *PA, 
-                    int *first, int *last, int depth) { 
-  int *a, *b; 
-  int t; 
-  for(a = first - 1, b = last;;) { 
-    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } 
-    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { } 
-    if(b <= a) { break; } 
-    t = ~*b; 
-    *b = *a; 
-    *a = t; 
-  } 
-  if(first < a) { *first = ~*first; } 
-  return a; 
-} 
- 
-/* Multikey introsort for medium size groups. */ 
-static 
-void 
-ss_mintrosort(const unsigned char *T, const int *PA, 
-              int *first, int *last, 
-              int depth) { 
-#define STACK_SIZE SS_MISORT_STACKSIZE 
-  struct { int *a, *b, c; int d; } stack[STACK_SIZE]; 
-  const unsigned char *Td; 
-  int *a, *b, *c, *d, *e, *f; 
-  int s, t; 
-  int ssize; 
-  int limit; 
-  int v, x = 0; 
- 
-  for(ssize = 0, limit = ss_ilg(last - first);;) { 
- 
-    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { 
-#if 1 < SS_INSERTIONSORT_THRESHOLD 
-      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } 
-#endif 
-      STACK_POP(first, last, depth, limit); 
-      continue; 
-    } 
- 
-    Td = T + depth; 
-    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } 
-    if(limit < 0) { 
-      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { 
-        if((x = Td[PA[*a]]) != v) { 
-          if(1 < (a - first)) { break; } 
-          v = x; 
-          first = a; 
-        } 
-      } 
-      if(Td[PA[*first] - 1] < v) { 
-        first = ss_partition(PA, first, a, depth); 
-      } 
-      if((a - first) <= (last - a)) { 
-        if(1 < (a - first)) { 
-          STACK_PUSH(a, last, depth, -1); 
-          last = a, depth += 1, limit = ss_ilg(a - first); 
-        } else { 
-          first = a, limit = -1; 
-        } 
-      } else { 
-        if(1 < (last - a)) { 
-          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); 
-          first = a, limit = -1; 
-        } else { 
-          last = a, depth += 1, limit = ss_ilg(a - first); 
-        } 
-      } 
-      continue; 
-    } 
- 
-    /* choose pivot */ 
-    a = ss_pivot(Td, PA, first, last); 
-    v = Td[PA[*a]]; 
-    SWAP(*first, *a); 
- 
-    /* partition */ 
-    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } 
-    if(((a = b) < last) && (x < v)) { 
-      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { 
-        if(x == v) { SWAP(*b, *a); ++a; } 
-      } 
-    } 
-    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } 
-    if((b < (d = c)) && (x > v)) { 
-      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 
-        if(x == v) { SWAP(*c, *d); --d; } 
-      } 
-    } 
-    for(; b < c;) { 
-      SWAP(*b, *c); 
-      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { 
-        if(x == v) { SWAP(*b, *a); ++a; } 
-      } 
-      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 
-        if(x == v) { SWAP(*c, *d); --d; } 
-      } 
-    } 
- 
-    if(a <= d) { 
-      c = b - 1; 
- 
-      if((s = a - first) > (t = b - a)) { s = t; } 
-      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 
-      if((s = d - c) > (t = last - d - 1)) { s = t; } 
-      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 
- 
-      a = first + (b - a), c = last - (d - c); 
-      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); 
- 
-      if((a - first) <= (last - c)) { 
-        if((last - c) <= (c - b)) { 
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 
-          STACK_PUSH(c, last, depth, limit); 
-          last = a; 
-        } else if((a - first) <= (c - b)) { 
-          STACK_PUSH(c, last, depth, limit); 
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 
-          last = a; 
-        } else { 
-          STACK_PUSH(c, last, depth, limit); 
-          STACK_PUSH(first, a, depth, limit); 
-          first = b, last = c, depth += 1, limit = ss_ilg(c - b); 
-        } 
-      } else { 
-        if((a - first) <= (c - b)) { 
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 
-          STACK_PUSH(first, a, depth, limit); 
-          first = c; 
-        } else if((last - c) <= (c - b)) { 
-          STACK_PUSH(first, a, depth, limit); 
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 
-          first = c; 
-        } else { 
-          STACK_PUSH(first, a, depth, limit); 
-          STACK_PUSH(c, last, depth, limit); 
-          first = b, last = c, depth += 1, limit = ss_ilg(c - b); 
-        } 
-      } 
-    } else { 
-      limit += 1; 
-      if(Td[PA[*first] - 1] < v) { 
-        first = ss_partition(PA, first, last, depth); 
-        limit = ss_ilg(last - first); 
-      } 
-      depth += 1; 
-    } 
-  } 
-#undef STACK_SIZE 
-} 
- 
-#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-#if SS_BLOCKSIZE != 0 
- 
-static INLINE 
-void 
-ss_blockswap(int *a, int *b, int n) { 
-  int t; 
-  for(; 0 < n; --n, ++a, ++b) { 
-    t = *a, *a = *b, *b = t; 
-  } 
-} 
- 
-static INLINE 
-void 
-ss_rotate(int *first, int *middle, int *last) { 
-  int *a, *b, t; 
-  int l, r; 
-  l = middle - first, r = last - middle; 
-  for(; (0 < l) && (0 < r);) { 
-    if(l == r) { ss_blockswap(first, middle, l); break; } 
-    if(l < r) { 
-      a = last - 1, b = middle - 1; 
-      t = *a; 
-      do { 
-        *a-- = *b, *b-- = *a; 
-        if(b < first) { 
-          *a = t; 
-          last = a; 
-          if((r -= l + 1) <= l) { break; } 
-          a -= 1, b = middle - 1; 
-          t = *a; 
-        } 
-      } while(1); 
-    } else { 
-      a = first, b = middle; 
-      t = *a; 
-      do { 
-        *a++ = *b, *b++ = *a; 
-        if(last <= b) { 
-          *a = t; 
-          first = a + 1; 
-          if((l -= r + 1) <= r) { break; } 
-          a += 1, b = middle; 
-          t = *a; 
-        } 
-      } while(1); 
-    } 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-static 
-void 
-ss_inplacemerge(const unsigned char *T, const int *PA, 
-                int *first, int *middle, int *last, 
-                int depth) { 
-  const int *p; 
-  int *a, *b; 
-  int len, half; 
-  int q, r; 
-  int x; 
- 
-  for(;;) { 
-    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } 
-    else                { x = 0; p = PA +  *(last - 1); } 
-    for(a = first, len = middle - first, half = len >> 1, r = -1; 
-        0 < len; 
-        len = half, half >>= 1) { 
-      b = a + half; 
-      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); 
-      if(q < 0) { 
-        a = b + 1; 
-        half -= (len & 1) ^ 1; 
-      } else { 
-        r = q; 
-      } 
-    } 
-    if(a < middle) { 
-      if(r == 0) { *a = ~*a; } 
-      ss_rotate(a, middle, last); 
-      last -= middle - a; 
-      middle = a; 
-      if(first == middle) { break; } 
-    } 
-    --last; 
-    if(x != 0) { while(*--last < 0) { } } 
-    if(middle == last) { break; } 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Merge-forward with internal buffer. */ 
-static 
-void 
-ss_mergeforward(const unsigned char *T, const int *PA, 
-                int *first, int *middle, int *last, 
-                int *buf, int depth) { 
-  int *a, *b, *c, *bufend; 
-  int t; 
-  int r; 
- 
-  bufend = buf + (middle - first) - 1; 
-  ss_blockswap(buf, first, middle - first); 
- 
-  for(t = *(a = first), b = buf, c = middle;;) { 
-    r = ss_compare(T, PA + *b, PA + *c, depth); 
-    if(r < 0) { 
-      do { 
-        *a++ = *b; 
-        if(bufend <= b) { *bufend = t; return; } 
-        *b++ = *a; 
-      } while(*b < 0); 
-    } else if(r > 0) { 
-      do { 
-        *a++ = *c, *c++ = *a; 
-        if(last <= c) { 
-          while(b < bufend) { *a++ = *b, *b++ = *a; } 
-          *a = *b, *b = t; 
-          return; 
-        } 
-      } while(*c < 0); 
-    } else { 
-      *c = ~*c; 
-      do { 
-        *a++ = *b; 
-        if(bufend <= b) { *bufend = t; return; } 
-        *b++ = *a; 
-      } while(*b < 0); 
- 
-      do { 
-        *a++ = *c, *c++ = *a; 
-        if(last <= c) { 
-          while(b < bufend) { *a++ = *b, *b++ = *a; } 
-          *a = *b, *b = t; 
-          return; 
-        } 
-      } while(*c < 0); 
-    } 
-  } 
-} 
- 
-/* Merge-backward with internal buffer. */ 
-static 
-void 
-ss_mergebackward(const unsigned char *T, const int *PA, 
-                 int *first, int *middle, int *last, 
-                 int *buf, int depth) { 
-  const int *p1, *p2; 
-  int *a, *b, *c, *bufend; 
-  int t; 
-  int r; 
-  int x; 
- 
-  bufend = buf + (last - middle) - 1; 
-  ss_blockswap(buf, middle, last - middle); 
- 
-  x = 0; 
-  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; } 
-  else                  { p1 = PA +  *bufend; } 
-  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } 
-  else                  { p2 = PA +  *(middle - 1); } 
-  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { 
-    r = ss_compare(T, p1, p2, depth); 
-    if(0 < r) { 
-      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 
-      *a-- = *b; 
-      if(b <= buf) { *buf = t; break; } 
-      *b-- = *a; 
-      if(*b < 0) { p1 = PA + ~*b; x |= 1; } 
-      else       { p1 = PA +  *b; } 
-    } else if(r < 0) { 
-      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 
-      *a-- = *c, *c-- = *a; 
-      if(c < first) { 
-        while(buf < b) { *a-- = *b, *b-- = *a; } 
-        *a = *b, *b = t; 
-        break; 
-      } 
-      if(*c < 0) { p2 = PA + ~*c; x |= 2; } 
-      else       { p2 = PA +  *c; } 
-    } else { 
-      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 
-      *a-- = ~*b; 
-      if(b <= buf) { *buf = t; break; } 
-      *b-- = *a; 
-      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 
-      *a-- = *c, *c-- = *a; 
-      if(c < first) { 
-        while(buf < b) { *a-- = *b, *b-- = *a; } 
-        *a = *b, *b = t; 
-        break; 
-      } 
-      if(*b < 0) { p1 = PA + ~*b; x |= 1; } 
-      else       { p1 = PA +  *b; } 
-      if(*c < 0) { p2 = PA + ~*c; x |= 2; } 
-      else       { p2 = PA +  *c; } 
-    } 
-  } 
-} 
- 
-/* D&C based merge. */ 
-static 
-void 
-ss_swapmerge(const unsigned char *T, const int *PA, 
-             int *first, int *middle, int *last, 
-             int *buf, int bufsize, int depth) { 
-#define STACK_SIZE SS_SMERGE_STACKSIZE 
-#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) 
-#define MERGE_CHECK(a, b, c)\ 
-  do {\ 
-    if(((c) & 1) ||\ 
-       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ 
-      *(a) = ~*(a);\ 
-    }\ 
-    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ 
-      *(b) = ~*(b);\ 
-    }\ 
-  } while(0) 
-  struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; 
-  int *l, *r, *lm, *rm; 
-  int m, len, half; 
-  int ssize; 
-  int check, next; 
- 
-  for(check = 0, ssize = 0;;) { 
-    if((last - middle) <= bufsize) { 
-      if((first < middle) && (middle < last)) { 
-        ss_mergebackward(T, PA, first, middle, last, buf, depth); 
-      } 
-      MERGE_CHECK(first, last, check); 
-      STACK_POP(first, middle, last, check); 
-      continue; 
-    } 
- 
-    if((middle - first) <= bufsize) { 
-      if(first < middle) { 
-        ss_mergeforward(T, PA, first, middle, last, buf, depth); 
-      } 
-      MERGE_CHECK(first, last, check); 
-      STACK_POP(first, middle, last, check); 
-      continue; 
-    } 
- 
-    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; 
-        0 < len; 
-        len = half, half >>= 1) { 
-      if(ss_compare(T, PA + GETIDX(*(middle + m + half)), 
-                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { 
-        m += half + 1; 
-        half -= (len & 1) ^ 1; 
-      } 
-    } 
- 
-    if(0 < m) { 
-      lm = middle - m, rm = middle + m; 
-      ss_blockswap(lm, middle, m); 
-      l = r = middle, next = 0; 
-      if(rm < last) { 
-        if(*rm < 0) { 
-          *rm = ~*rm; 
-          if(first < lm) { for(; *--l < 0;) { } next |= 4; } 
-          next |= 1; 
-        } else if(first < lm) { 
-          for(; *r < 0; ++r) { } 
-          next |= 2; 
-        } 
-      } 
- 
-      if((l - first) <= (last - r)) { 
-        STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); 
-        middle = lm, last = l, check = (check & 3) | (next & 4); 
-      } else { 
-        if((next & 2) && (r == middle)) { next ^= 6; } 
-        STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); 
-        first = r, middle = rm, check = (next & 3) | (check & 4); 
-      } 
-    } else { 
-      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { 
-        *middle = ~*middle; 
-      } 
-      MERGE_CHECK(first, last, check); 
-      STACK_POP(first, middle, last, check); 
-    } 
-  } 
-#undef STACK_SIZE 
-} 
- 
-#endif /* SS_BLOCKSIZE != 0 */ 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Substring sort */ 
-static 
-void 
-sssort(const unsigned char *T, const int *PA, 
-       int *first, int *last, 
-       int *buf, int bufsize, 
-       int depth, int n, int lastsuffix) { 
-  int *a; 
-#if SS_BLOCKSIZE != 0 
-  int *b, *middle, *curbuf; 
-  int j, k, curbufsize, limit; 
-#endif 
-  int i; 
- 
-  if(lastsuffix != 0) { ++first; } 
- 
-#if SS_BLOCKSIZE == 0 
-  ss_mintrosort(T, PA, first, last, depth); 
-#else 
-  if((bufsize < SS_BLOCKSIZE) && 
-      (bufsize < (last - first)) && 
-      (bufsize < (limit = ss_isqrt(last - first)))) { 
-    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } 
-    buf = middle = last - limit, bufsize = limit; 
-  } else { 
-    middle = last, limit = 0; 
-  } 
-  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { 
-#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 
-    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); 
-#elif 1 < SS_BLOCKSIZE 
-    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); 
-#endif 
-    curbufsize = last - (a + SS_BLOCKSIZE); 
-    curbuf = a + SS_BLOCKSIZE; 
-    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } 
-    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { 
-      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); 
-    } 
-  } 
-#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 
-  ss_mintrosort(T, PA, a, middle, depth); 
-#elif 1 < SS_BLOCKSIZE 
-  ss_insertionsort(T, PA, a, middle, depth); 
-#endif 
-  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { 
-    if(i & 1) { 
-      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); 
-      a -= k; 
-    } 
-  } 
-  if(limit != 0) { 
-#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 
-    ss_mintrosort(T, PA, middle, last, depth); 
-#elif 1 < SS_BLOCKSIZE 
-    ss_insertionsort(T, PA, middle, last, depth); 
-#endif 
-    ss_inplacemerge(T, PA, first, middle, last, depth); 
-  } 
-#endif 
- 
-  if(lastsuffix != 0) { 
-    /* Insert last type B* suffix. */ 
-    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; 
-    for(a = first, i = *(first - 1); 
-        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); 
-        ++a) { 
-      *(a - 1) = *a; 
-    } 
-    *(a - 1) = i; 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-static INLINE 
-int 
-tr_ilg(int n) { 
-  return (n & 0xffff0000) ? 
-          ((n & 0xff000000) ? 
-            24 + lg_table[(n >> 24) & 0xff] : 
-            16 + lg_table[(n >> 16) & 0xff]) : 
-          ((n & 0x0000ff00) ? 
-             8 + lg_table[(n >>  8) & 0xff] : 
-             0 + lg_table[(n >>  0) & 0xff]); 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Simple insertionsort for small size groups. */ 
-static 
-void 
-tr_insertionsort(const int *ISAd, int *first, int *last) { 
-  int *a, *b; 
-  int t, r; 
- 
-  for(a = first + 1; a < last; ++a) { 
-    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { 
-      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); 
-      if(b < first) { break; } 
-    } 
-    if(r == 0) { *b = ~*b; } 
-    *(b + 1) = t; 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-static INLINE 
-void 
-tr_fixdown(const int *ISAd, int *SA, int i, int size) { 
-  int j, k; 
-  int v; 
-  int c, d, e; 
- 
-  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 
-    d = ISAd[SA[k = j++]]; 
-    if(d < (e = ISAd[SA[j]])) { k = j; d = e; } 
-    if(d <= c) { break; } 
-  } 
-  SA[i] = v; 
-} 
- 
-/* Simple top-down heapsort. */ 
-static 
-void 
-tr_heapsort(const int *ISAd, int *SA, int size) { 
-  int i, m; 
-  int t; 
- 
-  m = size; 
-  if((size % 2) == 0) { 
-    m--; 
-    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } 
-  } 
- 
-  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } 
-  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } 
-  for(i = m - 1; 0 < i; --i) { 
-    t = SA[0], SA[0] = SA[i]; 
-    tr_fixdown(ISAd, SA, 0, i); 
-    SA[i] = t; 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Returns the median of three elements. */ 
-static INLINE 
-int * 
-tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { 
-  int *t; 
-  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } 
-  if(ISAd[*v2] > ISAd[*v3]) { 
-    if(ISAd[*v1] > ISAd[*v3]) { return v1; } 
-    else { return v3; } 
-  } 
-  return v2; 
-} 
- 
-/* Returns the median of five elements. */ 
-static INLINE 
-int * 
-tr_median5(const int *ISAd, 
-           int *v1, int *v2, int *v3, int *v4, int *v5) { 
-  int *t; 
-  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } 
-  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } 
-  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } 
-  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } 
-  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } 
-  if(ISAd[*v3] > ISAd[*v4]) { return v4; } 
-  return v3; 
-} 
- 
-/* Returns the pivot element. */ 
-static INLINE 
-int * 
-tr_pivot(const int *ISAd, int *first, int *last) { 
-  int *middle; 
-  int t; 
- 
-  t = last - first; 
-  middle = first + t / 2; 
- 
-  if(t <= 512) { 
-    if(t <= 32) { 
-      return tr_median3(ISAd, first, middle, last - 1); 
-    } else { 
-      t >>= 2; 
-      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); 
-    } 
-  } 
-  t >>= 3; 
-  first  = tr_median3(ISAd, first, first + t, first + (t << 1)); 
-  middle = tr_median3(ISAd, middle - t, middle, middle + t); 
-  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); 
-  return tr_median3(ISAd, first, middle, last); 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-typedef struct _trbudget_t trbudget_t; 
-struct _trbudget_t { 
-  int chance; 
-  int remain; 
-  int incval; 
-  int count; 
-}; 
- 
-static INLINE 
-void 
-trbudget_init(trbudget_t *budget, int chance, int incval) { 
-  budget->chance = chance; 
-  budget->remain = budget->incval = incval; 
-} 
- 
-static INLINE 
-int 
-trbudget_check(trbudget_t *budget, int size) { 
-  if(size <= budget->remain) { budget->remain -= size; return 1; } 
-  if(budget->chance == 0) { budget->count += size; return 0; } 
-  budget->remain += budget->incval - size; 
-  budget->chance -= 1; 
-  return 1; 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-static INLINE 
-void 
-tr_partition(const int *ISAd, 
-             int *first, int *middle, int *last, 
-             int **pa, int **pb, int v) { 
-  int *a, *b, *c, *d, *e, *f; 
-  int t, s; 
-  int x = 0; 
- 
-  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } 
-  if(((a = b) < last) && (x < v)) { 
-    for(; (++b < last) && ((x = ISAd[*b]) <= v);) { 
-      if(x == v) { SWAP(*b, *a); ++a; } 
-    } 
-  } 
-  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } 
-  if((b < (d = c)) && (x > v)) { 
-    for(; (b < --c) && ((x = ISAd[*c]) >= v);) { 
-      if(x == v) { SWAP(*c, *d); --d; } 
-    } 
-  } 
-  for(; b < c;) { 
-    SWAP(*b, *c); 
-    for(; (++b < c) && ((x = ISAd[*b]) <= v);) { 
-      if(x == v) { SWAP(*b, *a); ++a; } 
-    } 
-    for(; (b < --c) && ((x = ISAd[*c]) >= v);) { 
-      if(x == v) { SWAP(*c, *d); --d; } 
-    } 
-  } 
- 
-  if(a <= d) { 
-    c = b - 1; 
-    if((s = a - first) > (t = b - a)) { s = t; } 
-    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 
-    if((s = d - c) > (t = last - d - 1)) { s = t; } 
-    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 
-    first += (b - a), last -= (d - c); 
-  } 
-  *pa = first, *pb = last; 
-} 
- 
-static 
-void 
-tr_copy(int *ISA, const int *SA, 
-        int *first, int *a, int *b, int *last, 
-        int depth) { 
-  /* sort suffixes of middle partition 
-     by using sorted order of suffixes of left and right partition. */ 
-  int *c, *d, *e; 
-  int s, v; 
- 
-  v = b - SA - 1; 
-  for(c = first, d = a - 1; c <= d; ++c) { 
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 
-      *++d = s; 
-      ISA[s] = d - SA; 
-    } 
-  } 
-  for(c = last - 1, e = d + 1, d = b; e < d; --c) { 
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 
-      *--d = s; 
-      ISA[s] = d - SA; 
-    } 
-  } 
-} 
- 
-static 
-void 
-tr_partialcopy(int *ISA, const int *SA, 
-               int *first, int *a, int *b, int *last, 
-               int depth) { 
-  int *c, *d, *e; 
-  int s, v; 
-  int rank, lastrank, newrank = -1; 
- 
-  v = b - SA - 1; 
-  lastrank = -1; 
-  for(c = first, d = a - 1; c <= d; ++c) { 
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 
-      *++d = s; 
-      rank = ISA[s + depth]; 
-      if(lastrank != rank) { lastrank = rank; newrank = d - SA; } 
-      ISA[s] = newrank; 
-    } 
-  } 
- 
-  lastrank = -1; 
-  for(e = d; first <= e; --e) { 
-    rank = ISA[*e]; 
-    if(lastrank != rank) { lastrank = rank; newrank = e - SA; } 
-    if(newrank != rank) { ISA[*e] = newrank; } 
-  } 
- 
-  lastrank = -1; 
-  for(c = last - 1, e = d + 1, d = b; e < d; --c) { 
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 
-      *--d = s; 
-      rank = ISA[s + depth]; 
-      if(lastrank != rank) { lastrank = rank; newrank = d - SA; } 
-      ISA[s] = newrank; 
-    } 
-  } 
-} 
- 
-static 
-void 
-tr_introsort(int *ISA, const int *ISAd, 
-             int *SA, int *first, int *last, 
-             trbudget_t *budget) { 
-#define STACK_SIZE TR_STACKSIZE 
-  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; 
-  int *a, *b, *c; 
-  int t; 
-  int v, x = 0; 
-  int incr = ISAd - ISA; 
-  int limit, next; 
-  int ssize, trlink = -1; 
- 
-  for(ssize = 0, limit = tr_ilg(last - first);;) { 
- 
-    if(limit < 0) { 
-      if(limit == -1) { 
-        /* tandem repeat partition */ 
-        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); 
- 
-        /* update ranks */ 
-        if(a < last) { 
-          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } 
-        } 
-        if(b < last) { 
-          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } 
-        } 
- 
-        /* push */ 
-        if(1 < (b - a)) { 
-          STACK_PUSH5(NULL, a, b, 0, 0); 
-          STACK_PUSH5(ISAd - incr, first, last, -2, trlink); 
-          trlink = ssize - 2; 
-        } 
-        if((a - first) <= (last - b)) { 
-          if(1 < (a - first)) { 
-            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); 
-            last = a, limit = tr_ilg(a - first); 
-          } else if(1 < (last - b)) { 
-            first = b, limit = tr_ilg(last - b); 
-          } else { 
-            STACK_POP5(ISAd, first, last, limit, trlink); 
-          } 
-        } else { 
-          if(1 < (last - b)) { 
-            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); 
-            first = b, limit = tr_ilg(last - b); 
-          } else if(1 < (a - first)) { 
-            last = a, limit = tr_ilg(a - first); 
-          } else { 
-            STACK_POP5(ISAd, first, last, limit, trlink); 
-          } 
-        } 
-      } else if(limit == -2) { 
-        /* tandem repeat copy */ 
-        a = stack[--ssize].b, b = stack[ssize].c; 
-        if(stack[ssize].d == 0) { 
-          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); 
-        } else { 
-          if(0 <= trlink) { stack[trlink].d = -1; } 
-          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); 
-        } 
-        STACK_POP5(ISAd, first, last, limit, trlink); 
-      } else { 
-        /* sorted partition */ 
-        if(0 <= *first) { 
-          a = first; 
-          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); 
-          first = a; 
-        } 
-        if(first < last) { 
-          a = first; do { *a = ~*a; } while(*++a < 0); 
-          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; 
-          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } 
- 
-          /* push */ 
-          if(trbudget_check(budget, a - first)) { 
-            if((a - first) <= (last - a)) { 
-              STACK_PUSH5(ISAd, a, last, -3, trlink); 
-              ISAd += incr, last = a, limit = next; 
-            } else { 
-              if(1 < (last - a)) { 
-                STACK_PUSH5(ISAd + incr, first, a, next, trlink); 
-                first = a, limit = -3; 
-              } else { 
-                ISAd += incr, last = a, limit = next; 
-              } 
-            } 
-          } else { 
-            if(0 <= trlink) { stack[trlink].d = -1; } 
-            if(1 < (last - a)) { 
-              first = a, limit = -3; 
-            } else { 
-              STACK_POP5(ISAd, first, last, limit, trlink); 
-            } 
-          } 
-        } else { 
-          STACK_POP5(ISAd, first, last, limit, trlink); 
-        } 
-      } 
-      continue; 
-    } 
- 
-    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { 
-      tr_insertionsort(ISAd, first, last); 
-      limit = -3; 
-      continue; 
-    } 
- 
-    if(limit-- == 0) { 
-      tr_heapsort(ISAd, first, last - first); 
-      for(a = last - 1; first < a; a = b) { 
-        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } 
-      } 
-      limit = -3; 
-      continue; 
-    } 
- 
-    /* choose pivot */ 
-    a = tr_pivot(ISAd, first, last); 
-    SWAP(*first, *a); 
-    v = ISAd[*first]; 
- 
-    /* partition */ 
-    tr_partition(ISAd, first, first + 1, last, &a, &b, v); 
-    if((last - first) != (b - a)) { 
-      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; 
- 
-      /* update ranks */ 
-      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } 
-      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } 
- 
-      /* push */ 
-      if((1 < (b - a)) && (trbudget_check(budget, b - a))) { 
-        if((a - first) <= (last - b)) { 
-          if((last - b) <= (b - a)) { 
-            if(1 < (a - first)) { 
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink); 
-              STACK_PUSH5(ISAd, b, last, limit, trlink); 
-              last = a; 
-            } else if(1 < (last - b)) { 
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink); 
-              first = b; 
-            } else { 
-              ISAd += incr, first = a, last = b, limit = next; 
-            } 
-          } else if((a - first) <= (b - a)) { 
-            if(1 < (a - first)) { 
-              STACK_PUSH5(ISAd, b, last, limit, trlink); 
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink); 
-              last = a; 
-            } else { 
-              STACK_PUSH5(ISAd, b, last, limit, trlink); 
-              ISAd += incr, first = a, last = b, limit = next; 
-            } 
-          } else { 
-            STACK_PUSH5(ISAd, b, last, limit, trlink); 
-            STACK_PUSH5(ISAd, first, a, limit, trlink); 
-            ISAd += incr, first = a, last = b, limit = next; 
-          } 
-        } else { 
-          if((a - first) <= (b - a)) { 
-            if(1 < (last - b)) { 
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink); 
-              STACK_PUSH5(ISAd, first, a, limit, trlink); 
-              first = b; 
-            } else if(1 < (a - first)) { 
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink); 
-              last = a; 
-            } else { 
-              ISAd += incr, first = a, last = b, limit = next; 
-            } 
-          } else if((last - b) <= (b - a)) { 
-            if(1 < (last - b)) { 
-              STACK_PUSH5(ISAd, first, a, limit, trlink); 
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink); 
-              first = b; 
-            } else { 
-              STACK_PUSH5(ISAd, first, a, limit, trlink); 
-              ISAd += incr, first = a, last = b, limit = next; 
-            } 
-          } else { 
-            STACK_PUSH5(ISAd, first, a, limit, trlink); 
-            STACK_PUSH5(ISAd, b, last, limit, trlink); 
-            ISAd += incr, first = a, last = b, limit = next; 
-          } 
-        } 
-      } else { 
-        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } 
-        if((a - first) <= (last - b)) { 
-          if(1 < (a - first)) { 
-            STACK_PUSH5(ISAd, b, last, limit, trlink); 
-            last = a; 
-          } else if(1 < (last - b)) { 
-            first = b; 
-          } else { 
-            STACK_POP5(ISAd, first, last, limit, trlink); 
-          } 
-        } else { 
-          if(1 < (last - b)) { 
-            STACK_PUSH5(ISAd, first, a, limit, trlink); 
-            first = b; 
-          } else if(1 < (a - first)) { 
-            last = a; 
-          } else { 
-            STACK_POP5(ISAd, first, last, limit, trlink); 
-          } 
-        } 
-      } 
-    } else { 
-      if(trbudget_check(budget, last - first)) { 
-        limit = tr_ilg(last - first), ISAd += incr; 
-      } else { 
-        if(0 <= trlink) { stack[trlink].d = -1; } 
-        STACK_POP5(ISAd, first, last, limit, trlink); 
-      } 
-    } 
-  } 
-#undef STACK_SIZE 
-} 
- 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Tandem repeat sort */ 
-static 
-void 
-trsort(int *ISA, int *SA, int n, int depth) { 
-  int *ISAd; 
-  int *first, *last; 
-  trbudget_t budget; 
-  int t, skip, unsorted; 
- 
-  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); 
-/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ 
-  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { 
-    first = SA; 
-    skip = 0; 
-    unsorted = 0; 
-    do { 
-      if((t = *first) < 0) { first -= t; skip += t; } 
-      else { 
-        if(skip != 0) { *(first + skip) = skip; skip = 0; } 
-        last = SA + ISA[t] + 1; 
-        if(1 < (last - first)) { 
-          budget.count = 0; 
-          tr_introsort(ISA, ISAd, SA, first, last, &budget); 
-          if(budget.count != 0) { unsorted += budget.count; } 
-          else { skip = first - last; } 
-        } else if((last - first) == 1) { 
-          skip = -1; 
-        } 
-        first = last; 
-      } 
-    } while(first < (SA + n)); 
-    if(skip != 0) { *(first + skip) = skip; } 
-    if(unsorted == 0) { break; } 
-  } 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/* Sorts suffixes of type B*. */ 
-static 
-int 
-sort_typeBstar(const unsigned char *T, int *SA, 
-               int *bucket_A, int *bucket_B, 
-               int n, int openMP) { 
-  int *PAb, *ISAb, *buf; 
-#ifdef LIBBSC_OPENMP 
-  int *curbuf; 
-  int l; 
-#endif 
-  int i, j, k, t, m, bufsize; 
-  int c0, c1; 
-#ifdef LIBBSC_OPENMP 
-  int d0, d1; 
-#endif 
-  (void)openMP; 
- 
-  /* Initialize bucket arrays. */ 
-  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } 
-  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } 
- 
-  /* Count the number of occurrences of the first one or two characters of each 
-     type A, B and B* suffix. Moreover, store the beginning position of all 
-     type B* suffixes into the array SA. */ 
-  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { 
-    /* type A suffix. */ 
-    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); 
-    if(0 <= i) { 
-      /* type B* suffix. */ 
-      ++BUCKET_BSTAR(c0, c1); 
-      SA[--m] = i; 
-      /* type B suffix. */ 
-      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { 
-        ++BUCKET_B(c0, c1); 
-      } 
-    } 
-  } 
-  m = n - m; 
-/* 
-note: 
-  A type B* suffix is lexicographically smaller than a type B suffix that 
-  begins with the same first two characters. 
-*/ 
- 
-  /* Calculate the index of start/end point of each bucket. */ 
-  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { 
-    t = i + BUCKET_A(c0); 
-    BUCKET_A(c0) = i + j; /* start point */ 
-    i = t + BUCKET_B(c0, c0); 
-    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { 
-      j += BUCKET_BSTAR(c0, c1); 
-      BUCKET_BSTAR(c0, c1) = j; /* end point */ 
-      i += BUCKET_B(c0, c1); 
-    } 
-  } 
- 
-  if(0 < m) { 
-    /* Sort the type B* suffixes by their first two characters. */ 
-    PAb = SA + n - m; ISAb = SA + m; 
-    for(i = m - 2; 0 <= i; --i) { 
-      t = PAb[i], c0 = T[t], c1 = T[t + 1]; 
-      SA[--BUCKET_BSTAR(c0, c1)] = i; 
-    } 
-    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; 
-    SA[--BUCKET_BSTAR(c0, c1)] = m - 1; 
- 
-    /* Sort the type B* substrings using sssort. */ 
-#ifdef LIBBSC_OPENMP 
-    if (openMP) 
-    { 
-        buf = SA + m; 
-        c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; 
-#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) 
-        { 
-          bufsize = (n - (2 * m)) / omp_get_num_threads(); 
-          curbuf = buf + omp_get_thread_num() * bufsize; 
-          k = 0; 
-          for(;;) { 
-            #pragma omp critical(sssort_lock) 
-            { 
-              if(0 < (l = j)) { 
-                d0 = c0, d1 = c1; 
-                do { 
-                  k = BUCKET_BSTAR(d0, d1); 
-                  if(--d1 <= d0) { 
-                    d1 = ALPHABET_SIZE - 1; 
-                    if(--d0 < 0) { break; } 
-                  } 
-                } while(((l - k) <= 1) && (0 < (l = k))); 
-                c0 = d0, c1 = d1, j = k; 
-              } 
-            } 
-            if(l == 0) { break; } 
-            sssort(T, PAb, SA + k, SA + l, 
-                   curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); 
-          } 
-        } 
-    } 
-    else 
-    { 
-        buf = SA + m, bufsize = n - (2 * m); 
-        for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { 
-          for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { 
-            i = BUCKET_BSTAR(c0, c1); 
-            if(1 < (j - i)) { 
-              sssort(T, PAb, SA + i, SA + j, 
-                     buf, bufsize, 2, n, *(SA + i) == (m - 1)); 
-            } 
-          } 
-        } 
-    } 
-#else 
-    buf = SA + m, bufsize = n - (2 * m); 
-    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { 
-      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { 
-        i = BUCKET_BSTAR(c0, c1); 
-        if(1 < (j - i)) { 
-          sssort(T, PAb, SA + i, SA + j, 
-                 buf, bufsize, 2, n, *(SA + i) == (m - 1)); 
-        } 
-      } 
-    } 
-#endif 
- 
-    /* Compute ranks of type B* substrings. */ 
-    for(i = m - 1; 0 <= i; --i) { 
-      if(0 <= SA[i]) { 
-        j = i; 
-        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); 
-        SA[i + 1] = i - j; 
-        if(i <= 0) { break; } 
-      } 
-      j = i; 
-      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); 
-      ISAb[SA[i]] = j; 
-    } 
- 
-    /* Construct the inverse suffix array of type B* suffixes using trsort. */ 
-    trsort(ISAb, SA, m, 1); 
- 
-    /* Set the sorted order of tyoe B* suffixes. */ 
-    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { 
-      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } 
-      if(0 <= i) { 
-        t = i; 
-        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } 
-        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; 
-      } 
-    } 
- 
-    /* Calculate the index of start/end point of each bucket. */ 
-    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ 
-    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { 
-      i = BUCKET_A(c0 + 1) - 1; 
-      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { 
-        t = i - BUCKET_B(c0, c1); 
-        BUCKET_B(c0, c1) = i; /* end point */ 
- 
-        /* Move all type B* suffixes to the correct position. */ 
-        for(i = t, j = BUCKET_BSTAR(c0, c1); 
-            j <= k; 
-            --i, --k) { SA[i] = SA[k]; } 
-      } 
-      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ 
-      BUCKET_B(c0, c0) = i; /* end point */ 
-    } 
-  } 
- 
-  return m; 
-} 
- 
-/* Constructs the suffix array by using the sorted order of type B* suffixes. */ 
-static 
-void 
-construct_SA(const unsigned char *T, int *SA, 
-             int *bucket_A, int *bucket_B, 
-             int n, int m) { 
-  int *i, *j, *k; 
-  int s; 
-  int c0, c1, c2; 
- 
-  if(0 < m) { 
-    /* Construct the sorted order of type B suffixes by using 
-       the sorted order of type B* suffixes. */ 
-    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { 
-      /* Scan the suffix array from right to left. */ 
-      for(i = SA + BUCKET_BSTAR(c1, c1 + 1), 
-          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; 
-          i <= j; 
-          --j) { 
-        if(0 < (s = *j)) { 
-          assert(T[s] == c1); 
-          assert(((s + 1) < n) && (T[s] <= T[s + 1])); 
-          assert(T[s - 1] <= T[s]); 
-          *j = ~s; 
-          c0 = T[--s]; 
-          if((0 < s) && (T[s - 1] > c0)) { s = ~s; } 
-          if(c0 != c2) { 
-            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } 
-            k = SA + BUCKET_B(c2 = c0, c1); 
-          } 
-          assert(k < j); 
-          *k-- = s; 
-        } else { 
-          assert(((s == 0) && (T[s] == c1)) || (s < 0)); 
-          *j = ~s; 
-        } 
-      } 
-    } 
-  } 
- 
-  /* Construct the suffix array by using 
-     the sorted order of type B suffixes. */ 
-  k = SA + BUCKET_A(c2 = T[n - 1]); 
-  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); 
-  /* Scan the suffix array from left to right. */ 
-  for(i = SA, j = SA + n; i < j; ++i) { 
-    if(0 < (s = *i)) { 
-      assert(T[s - 1] >= T[s]); 
-      c0 = T[--s]; 
-      if((s == 0) || (T[s - 1] < c0)) { s = ~s; } 
-      if(c0 != c2) { 
-        BUCKET_A(c2) = k - SA; 
-        k = SA + BUCKET_A(c2 = c0); 
-      } 
-      assert(i < k); 
-      *k++ = s; 
-    } else { 
-      assert(s < 0); 
-      *i = ~s; 
-    } 
-  } 
-} 
- 
-/* Constructs the burrows-wheeler transformed string directly 
-   by using the sorted order of type B* suffixes. */ 
-static 
-int 
-construct_BWT(const unsigned char *T, int *SA, 
-              int *bucket_A, int *bucket_B, 
-              int n, int m) { 
-  int *i, *j, *k, *orig; 
-  int s; 
-  int c0, c1, c2; 
- 
-  if(0 < m) { 
-    /* Construct the sorted order of type B suffixes by using 
-       the sorted order of type B* suffixes. */ 
-    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { 
-      /* Scan the suffix array from right to left. */ 
-      for(i = SA + BUCKET_BSTAR(c1, c1 + 1), 
-          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; 
-          i <= j; 
-          --j) { 
-        if(0 < (s = *j)) { 
-          assert(T[s] == c1); 
-          assert(((s + 1) < n) && (T[s] <= T[s + 1])); 
-          assert(T[s - 1] <= T[s]); 
-          c0 = T[--s]; 
-          *j = ~((int)c0); 
-          if((0 < s) && (T[s - 1] > c0)) { s = ~s; } 
-          if(c0 != c2) { 
-            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } 
-            k = SA + BUCKET_B(c2 = c0, c1); 
-          } 
-          assert(k < j); 
-          *k-- = s; 
-        } else if(s != 0) { 
-          *j = ~s; 
-#ifndef NDEBUG 
-        } else { 
-          assert(T[s] == c1); 
-#endif 
-        } 
-      } 
-    } 
-  } 
- 
-  /* Construct the BWTed string by using 
-     the sorted order of type B suffixes. */ 
-  k = SA + BUCKET_A(c2 = T[n - 1]); 
-  *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); 
-  /* Scan the suffix array from left to right. */ 
-  for(i = SA, j = SA + n, orig = SA; i < j; ++i) { 
-    if(0 < (s = *i)) { 
-      assert(T[s - 1] >= T[s]); 
-      c0 = T[--s]; 
-      *i = c0; 
-      if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } 
-      if(c0 != c2) { 
-        BUCKET_A(c2) = k - SA; 
-        k = SA + BUCKET_A(c2 = c0); 
-      } 
-      assert(i < k); 
-      *k++ = s; 
-    } else if(s != 0) { 
-      *i = ~s; 
-    } else { 
-      orig = i; 
-    } 
-  } 
- 
-  return orig - SA; 
-} 
- 
-/* Constructs the burrows-wheeler transformed string directly 
-   by using the sorted order of type B* suffixes. */ 
-static 
-int 
-construct_BWT_indexes(const unsigned char *T, int *SA, 
-                      int *bucket_A, int *bucket_B, 
-                      int n, int m, 
-                      unsigned char * num_indexes, int * indexes) { 
-  int *i, *j, *k, *orig; 
-  int s; 
-  int c0, c1, c2; 
- 
-  int mod = n / 8; 
-  { 
-      mod |= mod >> 1;  mod |= mod >> 2; 
-      mod |= mod >> 4;  mod |= mod >> 8; 
-      mod |= mod >> 16; mod >>= 1; 
- 
-      *num_indexes = (unsigned char)((n - 1) / (mod + 1)); 
-  } 
- 
-  if(0 < m) { 
-    /* Construct the sorted order of type B suffixes by using 
-       the sorted order of type B* suffixes. */ 
-    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { 
-      /* Scan the suffix array from right to left. */ 
-      for(i = SA + BUCKET_BSTAR(c1, c1 + 1), 
-          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; 
-          i <= j; 
-          --j) { 
-        if(0 < (s = *j)) { 
-          assert(T[s] == c1); 
-          assert(((s + 1) < n) && (T[s] <= T[s + 1])); 
-          assert(T[s - 1] <= T[s]); 
- 
-          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; 
- 
-          c0 = T[--s]; 
-          *j = ~((int)c0); 
-          if((0 < s) && (T[s - 1] > c0)) { s = ~s; } 
-          if(c0 != c2) { 
-            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } 
-            k = SA + BUCKET_B(c2 = c0, c1); 
-          } 
-          assert(k < j); 
-          *k-- = s; 
-        } else if(s != 0) { 
-          *j = ~s; 
-#ifndef NDEBUG 
-        } else { 
-          assert(T[s] == c1); 
-#endif 
-        } 
-      } 
-    } 
-  } 
- 
-  /* Construct the BWTed string by using 
-     the sorted order of type B suffixes. */ 
-  k = SA + BUCKET_A(c2 = T[n - 1]); 
-  if (T[n - 2] < c2) { 
-    if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; 
-    *k++ = ~((int)T[n - 2]); 
-  } 
-  else { 
-    *k++ = n - 1; 
-  } 
- 
-  /* Scan the suffix array from left to right. */ 
-  for(i = SA, j = SA + n, orig = SA; i < j; ++i) { 
-    if(0 < (s = *i)) { 
-      assert(T[s - 1] >= T[s]); 
- 
-      if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; 
- 
-      c0 = T[--s]; 
-      *i = c0; 
-      if(c0 != c2) { 
-        BUCKET_A(c2) = k - SA; 
-        k = SA + BUCKET_A(c2 = c0); 
-      } 
-      assert(i < k); 
-      if((0 < s) && (T[s - 1] < c0)) { 
-          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; 
-          *k++ = ~((int)T[s - 1]); 
-      } else 
-        *k++ = s; 
-    } else if(s != 0) { 
-      *i = ~s; 
-    } else { 
-      orig = i; 
-    } 
-  } 
- 
-  return orig - SA; 
-} 
- 
- 
-/*---------------------------------------------------------------------------*/ 
- 
-/*- Function -*/ 
- 
-int 
-divsufsort(const unsigned char *T, int *SA, int n, int openMP) { 
-  int *bucket_A, *bucket_B; 
-  int m; 
-  int err = 0; 
- 
-  /* Check arguments. */ 
-  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } 
-  else if(n == 0) { return 0; } 
-  else if(n == 1) { SA[0] = 0; return 0; } 
-  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } 
- 
-  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); 
-  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); 
- 
-  /* Suffixsort. */ 
-  if((bucket_A != NULL) && (bucket_B != NULL)) { 
-    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); 
-    construct_SA(T, SA, bucket_A, bucket_B, n, m); 
-  } else { 
-    err = -2; 
-  } 
- 
-  free(bucket_B); 
-  free(bucket_A); 
- 
-  return err; 
-} 
- 
-int 
-divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { 
-  int *B; 
-  int *bucket_A, *bucket_B; 
-  int m, pidx, i; 
- 
-  /* Check arguments. */ 
-  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } 
-  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } 
- 
-  if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } 
-  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); 
-  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); 
- 
-  /* Burrows-Wheeler Transform. */ 
-  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { 
-    m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); 
- 
-    if (num_indexes == NULL || indexes == NULL) { 
-        pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); 
-    } else { 
-        pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); 
-    } 
- 
-    /* Copy to output string. */ 
-    U[0] = T[n - 1]; 
-    for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } 
-    for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } 
-    pidx += 1; 
-  } else { 
-    pidx = -2; 
-  } 
- 
-  free(bucket_B); 
-  free(bucket_A); 
-  if(A == NULL) { free(B); } 
- 
-  return pidx; 
-} 
+/*
+ * divsufsort.c for libdivsufsort-lite
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*- Compiler specifics -*/
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wshorten-64-to-32"
+#endif
+
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
+#endif
+
+
+/*- Dependencies -*/
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "divsufsort.h"
+
+/*- Constants -*/
+#if defined(INLINE)
+# undef INLINE
+#endif
+#if !defined(INLINE)
+# define INLINE __inline
+#endif
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (256)
+#endif
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+# if SS_INSERTIONSORT_THRESHOLD < 1
+#  undef SS_INSERTIONSORT_THRESHOLD
+#  define SS_INSERTIONSORT_THRESHOLD (1)
+# endif
+#else
+# define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+# if SS_BLOCKSIZE < 0
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (0)
+# elif 32768 <= SS_BLOCKSIZE
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (32767)
+# endif
+#else
+# define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# define SS_MISORT_STACKSIZE (96)
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#define SS_SMERGE_STACKSIZE (32)
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#define TR_STACKSIZE (64)
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Functions -*/
+
+static const int lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+int
+ss_ilg(int n) {
+#if SS_BLOCKSIZE == 0
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const int sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static INLINE
+int
+ss_isqrt(int x) {
+  int y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static INLINE
+int
+ss_compare(const unsigned char *T,
+           const int *p1, const int *p2,
+           int depth) {
+  const unsigned char *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const unsigned char *T, const int *PA,
+                 int *first, int *last, int depth) {
+  int *i, *j;
+  int t;
+  int r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+void
+ss_fixdown(const unsigned char *Td, const int *PA,
+           int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+int *
+ss_median3(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3) {
+  int *t;
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+int *
+ss_median5(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  int *t;
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+int *
+ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static INLINE
+int *
+ss_partition(const int *PA,
+                    int *first, int *last, int depth) {
+  int *a, *b;
+  int t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const unsigned char *T, const int *PA,
+              int *first, int *last,
+              int depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
+  const unsigned char *Td;
+  int *a, *b, *c, *d, *e, *f;
+  int s, t;
+  int ssize;
+  int limit;
+  int v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static INLINE
+void
+ss_blockswap(int *a, int *b, int n) {
+  int t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+static INLINE
+void
+ss_rotate(int *first, int *middle, int *last) {
+  int *a, *b, t;
+  int l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int depth) {
+  const int *p;
+  int *a, *b;
+  int len, half;
+  int q, r;
+  int x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int *buf, int depth) {
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const unsigned char *T, const int *PA,
+                 int *first, int *middle, int *last,
+                 int *buf, int depth) {
+  const int *p1, *p2;
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+  int x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const unsigned char *T, const int *PA,
+             int *first, int *middle, int *last,
+             int *buf, int bufsize, int depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
+  int *l, *r, *lm, *rm;
+  int m, len, half;
+  int ssize;
+  int check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Substring sort */
+static
+void
+sssort(const unsigned char *T, const int *PA,
+       int *first, int *last,
+       int *buf, int bufsize,
+       int depth, int n, int lastsuffix) {
+  int *a;
+#if SS_BLOCKSIZE != 0
+  int *b, *middle, *curbuf;
+  int j, k, curbufsize, limit;
+#endif
+  int i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+int
+tr_ilg(int n) {
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const int *ISAd, int *first, int *last) {
+  int *a, *b;
+  int t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_fixdown(const int *ISAd, int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const int *ISAd, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+int *
+tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
+  int *t;
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+int *
+tr_median5(const int *ISAd,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  int *t;
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+int *
+tr_pivot(const int *ISAd, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  int chance;
+  int remain;
+  int incval;
+  int count;
+};
+
+static INLINE
+void
+trbudget_init(trbudget_t *budget, int chance, int incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+static INLINE
+int
+trbudget_check(trbudget_t *budget, int size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_partition(const int *ISAd,
+             int *first, int *middle, int *last,
+             int **pa, int **pb, int v) {
+  int *a, *b, *c, *d, *e, *f;
+  int t, s;
+  int x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(int *ISA, const int *SA,
+        int *first, int *a, int *b, int *last,
+        int depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  int *c, *d, *e;
+  int s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(int *ISA, const int *SA,
+               int *first, int *a, int *b, int *last,
+               int depth) {
+  int *c, *d, *e;
+  int s, v;
+  int rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(int *ISA, const int *ISAd,
+             int *SA, int *first, int *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
+  int *a, *b, *c;
+  int t;
+  int v, x = 0;
+  int incr = ISAd - ISA;
+  int limit, next;
+  int ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Tandem repeat sort */
+static
+void
+trsort(int *ISA, int *SA, int n, int depth) {
+  int *ISAd;
+  int *first, *last;
+  trbudget_t budget;
+  int t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Sorts suffixes of type B*. */
+static
+int
+sort_typeBstar(const unsigned char *T, int *SA,
+               int *bucket_A, int *bucket_B,
+               int n, int openMP) {
+  int *PAb, *ISAb, *buf;
+#ifdef LIBBSC_OPENMP
+  int *curbuf;
+  int l;
+#endif
+  int i, j, k, t, m, bufsize;
+  int c0, c1;
+#ifdef LIBBSC_OPENMP
+  int d0, d1;
+#endif
+  (void)openMP;
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+#ifdef LIBBSC_OPENMP
+    if (openMP)
+    {
+        buf = SA + m;
+        c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
+        {
+          bufsize = (n - (2 * m)) / omp_get_num_threads();
+          curbuf = buf + omp_get_thread_num() * bufsize;
+          k = 0;
+          for(;;) {
+            #pragma omp critical(sssort_lock)
+            {
+              if(0 < (l = j)) {
+                d0 = c0, d1 = c1;
+                do {
+                  k = BUCKET_BSTAR(d0, d1);
+                  if(--d1 <= d0) {
+                    d1 = ALPHABET_SIZE - 1;
+                    if(--d0 < 0) { break; }
+                  }
+                } while(((l - k) <= 1) && (0 < (l = k)));
+                c0 = d0, c1 = d1, j = k;
+              }
+            }
+            if(l == 0) { break; }
+            sssort(T, PAb, SA + k, SA + l,
+                   curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+          }
+        }
+    }
+    else
+    {
+        buf = SA + m, bufsize = n - (2 * m);
+        for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+          for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+            i = BUCKET_BSTAR(c0, c1);
+            if(1 < (j - i)) {
+              sssort(T, PAb, SA + i, SA + j,
+                     buf, bufsize, 2, n, *(SA + i) == (m - 1));
+            }
+          }
+        }
+    }
+#else
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+#endif
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of tyoe B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const unsigned char *T, int *SA,
+             int *bucket_A, int *bucket_B,
+             int n, int m) {
+  int *i, *j, *k;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else {
+          assert(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else {
+      assert(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+int
+construct_BWT(const unsigned char *T, int *SA,
+              int *bucket_A, int *bucket_B,
+              int n, int m) {
+  int *i, *j, *k, *orig;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          c0 = T[--s];
+          *j = ~((int)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      *i = c0;
+      if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+int
+construct_BWT_indexes(const unsigned char *T, int *SA,
+                      int *bucket_A, int *bucket_B,
+                      int n, int m,
+                      unsigned char * num_indexes, int * indexes) {
+  int *i, *j, *k, *orig;
+  int s;
+  int c0, c1, c2;
+
+  int mod = n / 8;
+  {
+      mod |= mod >> 1;  mod |= mod >> 2;
+      mod |= mod >> 4;  mod |= mod >> 8;
+      mod |= mod >> 16; mod >>= 1;
+
+      *num_indexes = (unsigned char)((n - 1) / (mod + 1));
+  }
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+
+          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
+
+          c0 = T[--s];
+          *j = ~((int)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  if (T[n - 2] < c2) {
+    if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
+    *k++ = ~((int)T[n - 2]);
+  }
+  else {
+    *k++ = n - 1;
+  }
+
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+
+      if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
+
+      c0 = T[--s];
+      *i = c0;
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      if((0 < s) && (T[s - 1] < c0)) {
+          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
+          *k++ = ~((int)T[s - 1]);
+      } else
+        *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+int
+divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
+  int *bucket_A, *bucket_B;
+  int m;
+  int err = 0;
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+  else if(n == 0) { return 0; }
+  else if(n == 1) { SA[0] = 0; return 0; }
+  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
+  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
+
+  /* Suffixsort. */
+  if((bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
+    construct_SA(T, SA, bucket_A, bucket_B, n, m);
+  } else {
+    err = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+
+  return err;
+}
+
+int
+divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
+  int *B;
+  int *bucket_A, *bucket_B;
+  int m, pidx, i;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+  if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
+  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
+  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
+
+  /* Burrows-Wheeler Transform. */
+  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
+
+    if (num_indexes == NULL || indexes == NULL) {
+        pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+    } else {
+        pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
+    }
+
+    /* Copy to output string. */
+    U[0] = T[n - 1];
+    for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
+    for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
+    pidx += 1;
+  } else {
+    pidx = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+  if(A == NULL) { free(B); }
+
+  return pidx;
+}
diff --git a/contrib/libs/zstd06/dictBuilder/divsufsort.h b/contrib/libs/zstd06/dictBuilder/divsufsort.h
index 546c302cb4..84686157ab 100644
--- a/contrib/libs/zstd06/dictBuilder/divsufsort.h
+++ b/contrib/libs/zstd06/dictBuilder/divsufsort.h
@@ -1,68 +1,68 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
- * divsufsort.h for libdivsufsort-lite 
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 
- * 
- * Permission is hereby granted, free of charge, to any person 
- * obtaining a copy of this software and associated documentation 
- * files (the "Software"), to deal in the Software without 
- * restriction, including without limitation the rights to use, 
- * copy, modify, merge, publish, distribute, sublicense, and/or sell 
- * copies of the Software, and to permit persons to whom the 
- * Software is furnished to do so, subject to the following 
- * conditions: 
- * 
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software. 
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
- * OTHER DEALINGS IN THE SOFTWARE. 
- */ 
- 
-#ifndef _DIVSUFSORT_H 
-#define _DIVSUFSORT_H 1 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif /* __cplusplus */ 
- 
- 
-/*- Prototypes -*/ 
- 
-/** 
- * Constructs the suffix array of a given string. 
- * @param T[0..n-1] The input string. 
- * @param SA[0..n-1] The output array of suffixes. 
- * @param n The length of the given string. 
- * @param openMP enables OpenMP optimization. 
- * @return 0 if no error occurred, -1 or -2 otherwise. 
- */ 
-int 
-divsufsort(const unsigned char *T, int *SA, int n, int openMP); 
- 
-/** 
- * Constructs the burrows-wheeler transformed string of a given string. 
- * @param T[0..n-1] The input string. 
- * @param U[0..n-1] The output string. (can be T) 
- * @param A[0..n-1] The temporary array. (can be NULL) 
- * @param n The length of the given string. 
- * @param num_indexes The length of secondary indexes array. (can be NULL) 
- * @param indexes The secondary indexes array. (can be NULL) 
- * @param openMP enables OpenMP optimization. 
- * @return The primary index if no error occurred, -1 or -2 otherwise. 
- */ 
-int 
-divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); 
- 
- 
-#ifdef __cplusplus 
-} /* extern "C" */ 
-#endif /* __cplusplus */ 
- 
-#endif /* _DIVSUFSORT_H */ 
+/*
+ * divsufsort.h for libdivsufsort-lite
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @param openMP enables OpenMP optimization.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+int
+divsufsort(const unsigned char *T, int *SA, int n, int openMP);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param num_indexes The length of secondary indexes array. (can be NULL)
+ * @param indexes The secondary indexes array. (can be NULL)
+ * @param openMP enables OpenMP optimization.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+int
+divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H */
diff --git a/contrib/libs/zstd06/dictBuilder/zdict.c b/contrib/libs/zstd06/dictBuilder/zdict.c
index 4b19394073..95d291f409 100644
--- a/contrib/libs/zstd06/dictBuilder/zdict.c
+++ b/contrib/libs/zstd06/dictBuilder/zdict.c
@@ -1,949 +1,949 @@
-/* 
-    dictBuilder - dictionary builder for zstd 
-    Copyright (C) Yann Collet 2016 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
- 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
- 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - Zstd homepage : https://www.zstd.net 
-*/ 
- 
-/*-************************************** 
-*  Compiler Options 
-****************************************/ 
-/* Disable some Visual warning messages */ 
-#ifdef _MSC_VER 
-#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */ 
-#endif 
- 
-/* Unix Large Files support (>4GB) */ 
-#define _FILE_OFFSET_BITS 64 
-#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */ 
-#  define _LARGEFILE_SOURCE 
-#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */ 
-#  define _LARGEFILE64_SOURCE 
-#endif 
- 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include <stdlib.h>        /* malloc, free */ 
-#include <string.h>        /* memset */ 
-#include <stdio.h>         /* fprintf, fopen, ftello64 */ 
-#include <time.h>          /* clock */ 
- 
-#include "mem.h"           /* read */ 
-#include "error_private.h" 
-#include "fse.h" 
+/*
+    dictBuilder - dictionary builder for zstd
+    Copyright (C) Yann Collet 2016
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - Zstd homepage : https://www.zstd.net
+*/
+
+/*-**************************************
+*  Compiler Options
+****************************************/
+/* Disable some Visual warning messages */
+#ifdef _MSC_VER
+#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
+#endif
+
+/* Unix Large Files support (>4GB) */
+#define _FILE_OFFSET_BITS 64
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  define _LARGEFILE_SOURCE
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  define _LARGEFILE64_SOURCE
+#endif
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdlib.h>        /* malloc, free */
+#include <string.h>        /* memset */
+#include <stdio.h>         /* fprintf, fopen, ftello64 */
+#include <time.h>          /* clock */
+
+#include "mem.h"           /* read */
+#include "error_private.h"
+#include "fse.h"
 #include "huf_static.h"
-#include "zstd_internal.h" 
-#include "divsufsort.h" 
-#include "zdict_static.h" 
- 
- 
- 
-/*-************************************* 
-*  Constants 
-***************************************/ 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define DICTLISTSIZE 10000 
- 
-#define NOISELENGTH 32 
-#define PRIME1   2654435761U 
-#define PRIME2   2246822519U 
- 
-#define MINRATIO 4 
-static const U32 g_compressionLevel_default = 5; 
-static const U32 g_selectivity_default = 9; 
-static const size_t g_provision_entropySize = 200; 
-static const size_t g_min_fast_dictContent = 192; 
- 
- 
-/*-************************************* 
-*  Console display 
-***************************************/ 
-#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__) 
-#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } 
-static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */ 
- 
-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ 
-            if (ZDICT_GetMilliSpan(g_time) > refreshRate)  \ 
-            { g_time = clock(); DISPLAY(__VA_ARGS__); \ 
-            if (g_displayLevel>=4) fflush(stdout); } } 
-static const unsigned refreshRate = 300; 
-static clock_t g_time = 0; 
- 
-static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length) 
-{ 
-    const BYTE* const b = (const BYTE*)ptr; 
-    size_t u; 
-    for (u=0; u<length; u++) 
-    { 
-        BYTE c = b[u]; 
-        if (c<32 || c>126) c = '.';   /* non-printable char */ 
-        DISPLAYLEVEL(dlevel, "%c", c); 
-    } 
-} 
- 
- 
-/*-******************************************************** 
-*  Helper functions 
-**********************************************************/ 
-static unsigned ZDICT_GetMilliSpan(clock_t nPrevious) 
-{ 
-    clock_t nCurrent = clock(); 
-    unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC); 
-    return nSpan; 
-} 
- 
-unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } 
- 
-const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } 
- 
- 
-/*-******************************************************** 
-*  Dictionary training functions 
-**********************************************************/ 
-static unsigned ZDICT_NbCommonBytes (register size_t val) 
-{ 
-    if (MEM_isLittleEndian()) { 
-        if (MEM_64bits()) { 
-#       if defined(_MSC_VER) && defined(_WIN64) 
-            unsigned long r = 0; 
-            _BitScanForward64( &r, (U64)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_ctzll((U64)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; 
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 
-#       endif 
-        } else { /* 32 bits */ 
-#       if defined(_MSC_VER) 
-            unsigned long r=0; 
-            _BitScanForward( &r, (U32)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_ctz((U32)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; 
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 
-#       endif 
-        } 
-    } else {  /* Big Endian CPU */ 
-        if (MEM_64bits()) { 
-#       if defined(_MSC_VER) && defined(_WIN64) 
-            unsigned long r = 0; 
-            _BitScanReverse64( &r, val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_clzll(val) >> 3); 
-#       else 
-            unsigned r; 
-            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */ 
-            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-        } else { /* 32 bits */ 
-#       if defined(_MSC_VER) 
-            unsigned long r = 0; 
-            _BitScanReverse( &r, (unsigned long)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) 
-            return (__builtin_clz((U32)val) >> 3); 
-#       else 
-            unsigned r; 
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-    }   } 
-} 
- 
- 
-/*! ZDICT_count() : 
-    Count the nb of common bytes between 2 pointers. 
-    Note : this function presumes end of buffer followed by noisy guard band. 
-*/ 
-static size_t ZDICT_count(const void* pIn, const void* pMatch) 
-{ 
-    const char* const pStart = (const char*)pIn; 
-    for (;;) { 
-        size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 
-        if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; } 
-        pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); 
-        return (size_t)((const char*)pIn - pStart); 
-    } 
-} 
- 
- 
-typedef struct { 
-    U32 pos; 
-    U32 length; 
-    U32 savings; 
-} dictItem; 
- 
-static void ZDICT_initDictItem(dictItem* d) 
-{ 
-    d->pos = 1; 
-    d->length = 0; 
-    d->savings = (U32)(-1); 
-} 
- 
- 
-#define LLIMIT 64          /* heuristic determined experimentally */ 
-#define MINMATCHLENGTH 7   /* heuristic determined experimentally */ 
-static dictItem ZDICT_analyzePos( 
-                       BYTE* doneMarks, 
-                       const int* suffix, U32 start, 
-                       const void* buffer, U32 minRatio) 
-{ 
-    U32 lengthList[LLIMIT] = {0}; 
-    U32 cumulLength[LLIMIT] = {0}; 
-    U32 savings[LLIMIT] = {0}; 
-    const BYTE* b = (const BYTE*)buffer; 
-    size_t length; 
-    size_t maxLength = LLIMIT; 
-    size_t pos = suffix[start]; 
-    U32 end = start; 
-    dictItem solution; 
- 
-    /* init */ 
-    memset(&solution, 0, sizeof(solution)); 
-    doneMarks[pos] = 1; 
- 
-    /* trivial repetition cases */ 
-    if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) 
-       ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) 
-       ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { 
-        /* skip and mark segment */ 
-        U16 u16 = MEM_read16(b+pos+4); 
-        U32 u, e = 6; 
-        while (MEM_read16(b+pos+e) == u16) e+=2 ; 
-        if (b[pos+e] == b[pos+e-1]) e++; 
-        for (u=1; u<e; u++) 
-            doneMarks[pos+u] = 1; 
-        return solution; 
-    } 
- 
-    /* look forward */ 
-    do { 
-        end++; 
-        length = ZDICT_count(b + pos, b + suffix[end]); 
-    } while (length >=MINMATCHLENGTH); 
- 
-    /* look backward */ 
-    do { 
-        length = ZDICT_count(b + pos, b + *(suffix+start-1)); 
-        if (length >=MINMATCHLENGTH) start--; 
-    } while(length >= MINMATCHLENGTH); 
- 
-    /* exit if not found a minimum nb of repetitions */ 
-    if (end-start < minRatio) { 
-        U32 idx; 
-        for(idx=start; idx<end; idx++) 
-            doneMarks[suffix[idx]] = 1; 
-        return solution; 
-    } 
- 
-    {   int i; 
-        U32 searchLength; 
-        U32 refinedStart = start; 
-        U32 refinedEnd = end; 
- 
-        DISPLAYLEVEL(4, "\n"); 
-        DISPLAYLEVEL(4, "found %3u matches of length >= %u at pos %7u  ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); 
-        DISPLAYLEVEL(4, "\n"); 
- 
-        for (searchLength = MINMATCHLENGTH ; ; searchLength++) { 
-            BYTE currentChar = 0; 
-            U32 currentCount = 0; 
-            U32 currentID = refinedStart; 
-            U32 id; 
-            U32 selectedCount = 0; 
-            U32 selectedID = currentID; 
-            for (id =refinedStart; id < refinedEnd; id++) { 
-                if (b[ suffix[id] + searchLength] != currentChar) { 
-                    if (currentCount > selectedCount) { 
-                        selectedCount = currentCount; 
-                        selectedID = currentID; 
-                    } 
-                    currentID = id; 
-                    currentChar = b[ suffix[id] + searchLength]; 
-                    currentCount = 0; 
-                } 
-                currentCount ++; 
-            } 
-            if (currentCount > selectedCount) {  /* for last */ 
-                selectedCount = currentCount; 
-                selectedID = currentID; 
-            } 
- 
-            if (selectedCount < minRatio) 
-                break; 
-            refinedStart = selectedID; 
-            refinedEnd = refinedStart + selectedCount; 
-        } 
- 
-        /* evaluate gain based on new ref */ 
-        start = refinedStart; 
-        pos = suffix[refinedStart]; 
-        end = start; 
-        memset(lengthList, 0, sizeof(lengthList)); 
- 
-        /* look forward */ 
-        do { 
-            end++; 
-            length = ZDICT_count(b + pos, b + suffix[end]); 
-            if (length >= LLIMIT) length = LLIMIT-1; 
-            lengthList[length]++; 
-        } while (length >=MINMATCHLENGTH); 
- 
-        /* look backward */ 
-        do { 
-            length = ZDICT_count(b + pos, b + suffix[start-1]); 
-            if (length >= LLIMIT) length = LLIMIT-1; 
-            lengthList[length]++; 
-            if (length >=MINMATCHLENGTH) start--; 
-        } while(length >= MINMATCHLENGTH); 
- 
-        /* largest useful length */ 
-        memset(cumulLength, 0, sizeof(cumulLength)); 
-        cumulLength[maxLength-1] = lengthList[maxLength-1]; 
-        for (i=(int)(maxLength-2); i>=0; i--) 
-            cumulLength[i] = cumulLength[i+1] + lengthList[i]; 
- 
-        for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; 
-        maxLength = i; 
- 
-        /* reduce maxLength in case of final into repetitive data */ 
-        { 
-            U32 l = (U32)maxLength; 
-            BYTE c = b[pos + maxLength-1]; 
-            while (b[pos+l-2]==c) l--; 
-            maxLength = l; 
-        } 
-        if (maxLength < MINMATCHLENGTH) return solution;   /* skip : no long-enough solution */ 
- 
-        /* calculate savings */ 
-        savings[5] = 0; 
-        for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) 
-            savings[i] = savings[i-1] + (lengthList[i] * (i-3)); 
- 
-        DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f)  \n", 
-                     (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); 
- 
-        solution.pos = (U32)pos; 
-        solution.length = (U32)maxLength; 
-        solution.savings = savings[maxLength]; 
- 
-        /* mark positions done */ 
-        { 
-            U32 id; 
-            U32 testedPos; 
-            for (id=start; id<end; id++) { 
-                U32 p, pEnd; 
-                testedPos = suffix[id]; 
-                if (testedPos == pos) 
-                    length = solution.length; 
-                else { 
-                    length = ZDICT_count(b+pos, b+testedPos); 
-                    if (length > solution.length) length = solution.length; 
-                } 
-                pEnd = (U32)(testedPos + length); 
-                for (p=testedPos; p<pEnd; p++) 
-                    doneMarks[p] = 1; 
-    }   }   } 
- 
-    return solution; 
-} 
- 
- 
-/*! ZDICT_checkMerge 
-    check if dictItem can be merged, do it if possible 
-    @return : id of destination elt, 0 if not merged 
-*/ 
-static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip) 
-{ 
-    const U32 tableSize = table->pos; 
-    const U32 max = elt.pos + (elt.length-1); 
- 
-    /* tail overlap */ 
-    U32 u; for (u=1; u<tableSize; u++) { 
-        if (u==eltNbToSkip) continue; 
-        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */ 
-            /* append */ 
-            U32 addedLength = table[u].pos - elt.pos; 
-            table[u].length += addedLength; 
-            table[u].pos = elt.pos; 
-            table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */ 
-            table[u].savings += elt.length / 8;    /* rough approx */ 
-            elt = table[u]; 
-            while ((u>1) && (table[u-1].savings < elt.savings)) 
-                table[u] = table[u-1], u--; 
-            table[u] = elt; 
-            return u; 
-    }   } 
- 
-    /* front overlap */ 
-    for (u=1; u<tableSize; u++) { 
-        if (u==eltNbToSkip) continue; 
-        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */ 
-            /* append */ 
-            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length); 
-            table[u].savings += elt.length / 8;    /* rough approx */ 
-            if (addedLength > 0) {   /* otherwise, already included */ 
-                table[u].length += addedLength; 
-                table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */ 
-            } 
-            elt = table[u]; 
-            while ((u>1) && (table[u-1].savings < elt.savings)) 
-                table[u] = table[u-1], u--; 
-            table[u] = elt; 
-            return u; 
-    }   } 
- 
-    return 0; 
-} 
- 
- 
-static void ZDICT_removeDictItem(dictItem* table, U32 id) 
-{ 
-    /* convention : first element is nb of elts */ 
-    U32 max = table->pos; 
-    U32 u; 
-    if (!id) return;   /* protection, should never happen */ 
-    for (u=id; u<max-1; u++) 
-        table[u] = table[u+1]; 
-    table->pos--; 
-} 
- 
- 
-static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt) 
-{ 
-    /* merge if possible */ 
-    U32 mergeId = ZDICT_checkMerge(table, elt, 0); 
-    if (mergeId) { 
-        U32 newMerge = 1; 
-        while (newMerge) { 
-            newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId); 
-            if (newMerge) ZDICT_removeDictItem(table, mergeId); 
-            mergeId = newMerge; 
-        } 
-        return; 
-    } 
- 
-    /* insert */ 
-    { 
-        U32 current; 
-        U32 nextElt = table->pos; 
-        if (nextElt >= maxSize) nextElt = maxSize-1; 
-        current = nextElt-1; 
-        while (table[current].savings < elt.savings) { 
-            table[current+1] = table[current]; 
-            current--; 
-        } 
-        table[current+1] = elt; 
-        table->pos = nextElt+1; 
-    } 
-} 
- 
- 
-static U32 ZDICT_dictSize(const dictItem* dictList) 
-{ 
-    U32 u, dictSize = 0; 
-    for (u=1; u<dictList[0].pos; u++) 
-        dictSize += dictList[u].length; 
-    return dictSize; 
-} 
- 
- 
-static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, 
-                            const void* const buffer, const size_t bufferSize,   /* buffer must end with noisy guard band */ 
-                            const size_t* fileSizes, unsigned nbFiles, 
-                            U32 shiftRatio, unsigned maxDictSize) 
-{ 
-    int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0)); 
-    int* const suffix = suffix0+1; 
-    U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix)); 
-    BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */ 
-    U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos)); 
-    U32 minRatio = nbFiles >> shiftRatio; 
-    int divSuftSortResult; 
-    size_t result = 0; 
- 
-    /* init */ 
-    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */ 
-    if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { 
-        result = ERROR(memory_allocation); 
-        goto _cleanup; 
-    } 
-    if (minRatio < MINRATIO) minRatio = MINRATIO; 
-    memset(doneMarks, 0, bufferSize+16); 
- 
-    /* sort */ 
-    DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); 
-    divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); 
-    if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } 
-    suffix[bufferSize] = (int)bufferSize;   /* leads into noise */ 
-    suffix0[0] = (int)bufferSize;           /* leads into noise */ 
-    { 
-        /* build reverse suffix sort */ 
-        size_t pos; 
-        for (pos=0; pos < bufferSize; pos++) 
-            reverseSuffix[suffix[pos]] = (U32)pos; 
-        /* build file pos */ 
-        filePos[0] = 0; 
-        for (pos=1; pos<nbFiles; pos++) 
-            filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]); 
-    } 
- 
-    DISPLAYLEVEL(2, "finding patterns ... \n"); 
-    DISPLAYLEVEL(3, "minimum ratio : %u \n", minRatio); 
- 
-    { 
-        U32 cursor; for (cursor=0; cursor < bufferSize; ) { 
-            dictItem solution; 
-            if (doneMarks[cursor]) { cursor++; continue; } 
-            solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio); 
-            if (solution.length==0) { cursor++; continue; } 
-            ZDICT_insertDictItem(dictList, dictListSize, solution); 
-            cursor += solution.length; 
-            DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100); 
-    }   } 
- 
-    /* limit dictionary size */ 
-    { 
-        U32 max = dictList->pos;   /* convention : nb of useful elts within dictList */ 
-        U32 currentSize = 0; 
-        U32 n; for (n=1; n<max; n++) { 
-            currentSize += dictList[n].length; 
-            if (currentSize > maxDictSize) break; 
-        } 
-        dictList->pos = n; 
-    } 
- 
-_cleanup: 
-    free(suffix0); 
-    free(reverseSuffix); 
-    free(doneMarks); 
-    free(filePos); 
-    return result; 
-} 
- 
- 
-static void ZDICT_fillNoise(void* buffer, size_t length) 
-{ 
-    unsigned acc = PRIME1; 
-    size_t p=0;; 
-    for (p=0; p<length; p++) { 
-        acc *= PRIME2; 
-        ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); 
-    } 
-} 
- 
- 
-typedef struct 
-{ 
-    ZSTD_CCtx* ref; 
-    ZSTD_CCtx* zc; 
-    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */ 
-} EStats_ress_t; 
- 
- 
-static void ZDICT_countEStats(EStats_ress_t esr, 
-                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, 
-                            const void* src, size_t srcSize) 
-{ 
-    const seqStore_t* seqStorePtr; 
- 
-    if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX;   /* protection vs large samples */ 
-    ZSTD_copyCCtx(esr.zc, esr.ref); 
-    ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); 
-    seqStorePtr = ZSTD_getSeqStore(esr.zc); 
- 
-    /* literals stats */ 
-    {   const BYTE* bytePtr; 
-        for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) 
-            countLit[*bytePtr]++; 
-    } 
- 
-    /* seqStats */ 
-    {   size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart); 
-        ZSTD_seqToCodes(seqStorePtr, nbSeq); 
- 
-        {   const BYTE* codePtr = seqStorePtr->offCodeStart; 
-            size_t u; 
-            for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++; 
-        } 
- 
-        {   const BYTE* codePtr = seqStorePtr->mlCodeStart; 
-            size_t u; 
-            for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++; 
-        } 
- 
-        {   const BYTE* codePtr = seqStorePtr->llCodeStart; 
-            size_t u; 
-            for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++; 
-    }   } 
-} 
- 
-/* 
-static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) 
-{ 
-    unsigned u; 
-    size_t max=0; 
-    for (u=0; u<nbFiles; u++) 
-        if (max < fileSizes[u]) max = fileSizes[u]; 
-    return max; 
-} 
-*/ 
- 
-static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) 
-{ 
-    size_t total; 
-    unsigned u; 
-    for (u=0, total=0; u<nbFiles; u++) total += fileSizes[u]; 
-    return total; 
-} 
- 
-#define OFFCODE_MAX 18  /* only applicable to first block */ 
-static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize, 
-                                 unsigned compressionLevel, 
-                           const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles, 
-                           const void* dictBuffer, size_t  dictBufferSize) 
-{ 
-    U32 countLit[256]; 
-    HUF_CREATE_STATIC_CTABLE(hufTable, 255); 
-    U32 offcodeCount[OFFCODE_MAX+1]; 
-    short offcodeNCount[OFFCODE_MAX+1]; 
-    U32 matchLengthCount[MaxML+1]; 
-    short matchLengthNCount[MaxML+1]; 
-    U32 litLengthCount[MaxLL+1]; 
-    short litLengthNCount[MaxLL+1]; 
-    EStats_ress_t esr; 
-    ZSTD_parameters params; 
-    U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; 
-    size_t pos = 0, errorCode; 
-    size_t eSize = 0; 
-    size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); 
-    size_t const averageSampleSize = totalSrcSize / nbFiles; 
- 
-    /* init */ 
-    for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */ 
-    for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1; 
-    for (u=0; u<=MaxML; u++) matchLengthCount[u]=1; 
-    for (u=0; u<=MaxLL; u++) litLengthCount[u]=1; 
-    esr.ref = ZSTD_createCCtx(); 
-    esr.zc = ZSTD_createCCtx(); 
-    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); 
-    if (!esr.ref || !esr.zc || !esr.workPlace) { 
-            eSize = ERROR(memory_allocation); 
-            DISPLAYLEVEL(1, "Not enough memory"); 
-            goto _cleanup; 
-    } 
-    if (compressionLevel==0) compressionLevel=g_compressionLevel_default; 
-    params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize); 
-    params.cParams.strategy = ZSTD_greedy; 
-    params.fParams.contentSizeFlag = 0; 
-    ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0); 
- 
-    /* collect stats on all files */ 
-    for (u=0; u<nbFiles; u++) { 
-        ZDICT_countEStats(esr, 
-                        countLit, offcodeCount, matchLengthCount, litLengthCount, 
-           (const char*)srcBuffer + pos, fileSizes[u]); 
-        pos += fileSizes[u]; 
-    } 
- 
-    /* analyze */ 
-    errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog); 
-    if (HUF_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "HUF_buildCTable error"); 
-        goto _cleanup; 
-    } 
-    huffLog = (U32)errorCode; 
- 
-    total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u]; 
-    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX); 
-    if (FSE_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount"); 
-        goto _cleanup; 
-    } 
-    Offlog = (U32)errorCode; 
- 
-    total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; 
-    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); 
-    if (FSE_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount"); 
-        goto _cleanup; 
-    } 
-    mlLog = (U32)errorCode; 
- 
-    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; 
-    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); 
-    if (FSE_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount"); 
-        goto _cleanup; 
-    } 
-    llLog = (U32)errorCode; 
- 
-    /* write result to buffer */ 
-    errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog); 
-    if (HUF_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "HUF_writeCTable error"); 
-        goto _cleanup; 
-    } 
-    dstBuffer = (char*)dstBuffer + errorCode; 
-    maxDstSize -= errorCode; 
-    eSize += errorCode; 
- 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); 
-    if (FSE_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount"); 
-        goto _cleanup; 
-    } 
-    dstBuffer = (char*)dstBuffer + errorCode; 
-    maxDstSize -= errorCode; 
-    eSize += errorCode; 
- 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog); 
-    if (FSE_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount"); 
-        goto _cleanup; 
-    } 
-    dstBuffer = (char*)dstBuffer + errorCode; 
-    maxDstSize -= errorCode; 
-    eSize += errorCode; 
- 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog); 
-    if (FSE_isError(errorCode)) { 
-        eSize = ERROR(GENERIC); 
-        DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount"); 
-        goto _cleanup; 
-    } 
-    dstBuffer = (char*)dstBuffer + errorCode; 
-    maxDstSize -= errorCode; 
-    eSize += errorCode; 
- 
-_cleanup: 
-    ZSTD_freeCCtx(esr.ref); 
-    ZSTD_freeCCtx(esr.zc); 
-    free(esr.workPlace); 
- 
-    return eSize; 
-} 
- 
- 
-#define DIB_FASTSEGMENTSIZE 64 
-/*! ZDICT_fastSampling()  (based on an idea proposed by Giuseppe Ottaviano) : 
-    Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`, 
-    up to `dictSize`. 
-    Filling starts from the end of `dictBuffer`, down to maximum possible. 
-    if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used. 
-    @return : amount of data written into `dictBuffer`, 
-              or an error code 
-*/ 
-static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize, 
-                         const void* samplesBuffer, size_t samplesSize) 
-{ 
-    char* dstPtr = (char*)dictBuffer + dictSize; 
-    const char* srcPtr = (const char*)samplesBuffer; 
-    size_t nbSegments = dictSize / DIB_FASTSEGMENTSIZE; 
-    size_t segNb, interSize; 
- 
-    if (nbSegments <= 2) return ERROR(srcSize_wrong); 
-    if (samplesSize < dictSize) return ERROR(srcSize_wrong); 
- 
-    /* first and last segments are part of dictionary, in case they contain interesting header/footer */ 
-    dstPtr -= DIB_FASTSEGMENTSIZE; 
-    memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE); 
-    dstPtr -= DIB_FASTSEGMENTSIZE; 
-    memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE); 
- 
-    /* regularly copy a segment */ 
-    interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1); 
-    srcPtr += DIB_FASTSEGMENTSIZE; 
-    for (segNb=2; segNb < nbSegments; segNb++) { 
-        srcPtr += interSize; 
-        dstPtr -= DIB_FASTSEGMENTSIZE; 
-        memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE); 
-        srcPtr += DIB_FASTSEGMENTSIZE; 
-    } 
- 
-    return nbSegments * DIB_FASTSEGMENTSIZE; 
-} 
- 
- 
-#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3) 
-/*! ZDICT_trainFromBuffer_unsafe() : 
-*   `samplesBuffer` must be followed by noisy guard band. 
-*   @return : size of dictionary. 
-*/ 
-size_t ZDICT_trainFromBuffer_unsafe( 
-                            void* dictBuffer, size_t maxDictSize, 
-                            const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples, 
-                            ZDICT_params_t params) 
-{ 
-    U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16)); 
-    dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); 
-    unsigned selectivity = params.selectivityLevel; 
-    unsigned compressionLevel = params.compressionLevel; 
-    size_t targetDictSize = maxDictSize; 
-    size_t sBuffSize; 
-    size_t dictSize = 0; 
- 
-    /* checks */ 
+#include "zstd_internal.h"
+#include "divsufsort.h"
+#include "zdict_static.h"
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define DICTLISTSIZE 10000
+
+#define NOISELENGTH 32
+#define PRIME1   2654435761U
+#define PRIME2   2246822519U
+
+#define MINRATIO 4
+static const U32 g_compressionLevel_default = 5;
+static const U32 g_selectivity_default = 9;
+static const size_t g_provision_entropySize = 200;
+static const size_t g_min_fast_dictContent = 192;
+
+
+/*-*************************************
+*  Console display
+***************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if (ZDICT_GetMilliSpan(g_time) > refreshRate)  \
+            { g_time = clock(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const unsigned refreshRate = 300;
+static clock_t g_time = 0;
+
+static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
+{
+    const BYTE* const b = (const BYTE*)ptr;
+    size_t u;
+    for (u=0; u<length; u++)
+    {
+        BYTE c = b[u];
+        if (c<32 || c>126) c = '.';   /* non-printable char */
+        DISPLAYLEVEL(dlevel, "%c", c);
+    }
+}
+
+
+/*-********************************************************
+*  Helper functions
+**********************************************************/
+static unsigned ZDICT_GetMilliSpan(clock_t nPrevious)
+{
+    clock_t nCurrent = clock();
+    unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
+    return nSpan;
+}
+
+unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
+
+const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+
+/*-********************************************************
+*  Dictionary training functions
+**********************************************************/
+static unsigned ZDICT_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r=0;
+            _BitScanForward( &r, (U32)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if defined(_MSC_VER) && defined(_WIN64)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if defined(_MSC_VER)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+/*! ZDICT_count() :
+    Count the nb of common bytes between 2 pointers.
+    Note : this function presumes end of buffer followed by noisy guard band.
+*/
+static size_t ZDICT_count(const void* pIn, const void* pMatch)
+{
+    const char* const pStart = (const char*)pIn;
+    for (;;) {
+        size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+        if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; }
+        pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
+        return (size_t)((const char*)pIn - pStart);
+    }
+}
+
+
+typedef struct {
+    U32 pos;
+    U32 length;
+    U32 savings;
+} dictItem;
+
+static void ZDICT_initDictItem(dictItem* d)
+{
+    d->pos = 1;
+    d->length = 0;
+    d->savings = (U32)(-1);
+}
+
+
+#define LLIMIT 64          /* heuristic determined experimentally */
+#define MINMATCHLENGTH 7   /* heuristic determined experimentally */
+static dictItem ZDICT_analyzePos(
+                       BYTE* doneMarks,
+                       const int* suffix, U32 start,
+                       const void* buffer, U32 minRatio)
+{
+    U32 lengthList[LLIMIT] = {0};
+    U32 cumulLength[LLIMIT] = {0};
+    U32 savings[LLIMIT] = {0};
+    const BYTE* b = (const BYTE*)buffer;
+    size_t length;
+    size_t maxLength = LLIMIT;
+    size_t pos = suffix[start];
+    U32 end = start;
+    dictItem solution;
+
+    /* init */
+    memset(&solution, 0, sizeof(solution));
+    doneMarks[pos] = 1;
+
+    /* trivial repetition cases */
+    if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2))
+       ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
+       ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
+        /* skip and mark segment */
+        U16 u16 = MEM_read16(b+pos+4);
+        U32 u, e = 6;
+        while (MEM_read16(b+pos+e) == u16) e+=2 ;
+        if (b[pos+e] == b[pos+e-1]) e++;
+        for (u=1; u<e; u++)
+            doneMarks[pos+u] = 1;
+        return solution;
+    }
+
+    /* look forward */
+    do {
+        end++;
+        length = ZDICT_count(b + pos, b + suffix[end]);
+    } while (length >=MINMATCHLENGTH);
+
+    /* look backward */
+    do {
+        length = ZDICT_count(b + pos, b + *(suffix+start-1));
+        if (length >=MINMATCHLENGTH) start--;
+    } while(length >= MINMATCHLENGTH);
+
+    /* exit if not found a minimum nb of repetitions */
+    if (end-start < minRatio) {
+        U32 idx;
+        for(idx=start; idx<end; idx++)
+            doneMarks[suffix[idx]] = 1;
+        return solution;
+    }
+
+    {   int i;
+        U32 searchLength;
+        U32 refinedStart = start;
+        U32 refinedEnd = end;
+
+        DISPLAYLEVEL(4, "\n");
+        DISPLAYLEVEL(4, "found %3u matches of length >= %u at pos %7u  ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
+        DISPLAYLEVEL(4, "\n");
+
+        for (searchLength = MINMATCHLENGTH ; ; searchLength++) {
+            BYTE currentChar = 0;
+            U32 currentCount = 0;
+            U32 currentID = refinedStart;
+            U32 id;
+            U32 selectedCount = 0;
+            U32 selectedID = currentID;
+            for (id =refinedStart; id < refinedEnd; id++) {
+                if (b[ suffix[id] + searchLength] != currentChar) {
+                    if (currentCount > selectedCount) {
+                        selectedCount = currentCount;
+                        selectedID = currentID;
+                    }
+                    currentID = id;
+                    currentChar = b[ suffix[id] + searchLength];
+                    currentCount = 0;
+                }
+                currentCount ++;
+            }
+            if (currentCount > selectedCount) {  /* for last */
+                selectedCount = currentCount;
+                selectedID = currentID;
+            }
+
+            if (selectedCount < minRatio)
+                break;
+            refinedStart = selectedID;
+            refinedEnd = refinedStart + selectedCount;
+        }
+
+        /* evaluate gain based on new ref */
+        start = refinedStart;
+        pos = suffix[refinedStart];
+        end = start;
+        memset(lengthList, 0, sizeof(lengthList));
+
+        /* look forward */
+        do {
+            end++;
+            length = ZDICT_count(b + pos, b + suffix[end]);
+            if (length >= LLIMIT) length = LLIMIT-1;
+            lengthList[length]++;
+        } while (length >=MINMATCHLENGTH);
+
+        /* look backward */
+        do {
+            length = ZDICT_count(b + pos, b + suffix[start-1]);
+            if (length >= LLIMIT) length = LLIMIT-1;
+            lengthList[length]++;
+            if (length >=MINMATCHLENGTH) start--;
+        } while(length >= MINMATCHLENGTH);
+
+        /* largest useful length */
+        memset(cumulLength, 0, sizeof(cumulLength));
+        cumulLength[maxLength-1] = lengthList[maxLength-1];
+        for (i=(int)(maxLength-2); i>=0; i--)
+            cumulLength[i] = cumulLength[i+1] + lengthList[i];
+
+        for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break;
+        maxLength = i;
+
+        /* reduce maxLength in case of final into repetitive data */
+        {
+            U32 l = (U32)maxLength;
+            BYTE c = b[pos + maxLength-1];
+            while (b[pos+l-2]==c) l--;
+            maxLength = l;
+        }
+        if (maxLength < MINMATCHLENGTH) return solution;   /* skip : no long-enough solution */
+
+        /* calculate savings */
+        savings[5] = 0;
+        for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
+            savings[i] = savings[i-1] + (lengthList[i] * (i-3));
+
+        DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f)  \n",
+                     (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
+
+        solution.pos = (U32)pos;
+        solution.length = (U32)maxLength;
+        solution.savings = savings[maxLength];
+
+        /* mark positions done */
+        {
+            U32 id;
+            U32 testedPos;
+            for (id=start; id<end; id++) {
+                U32 p, pEnd;
+                testedPos = suffix[id];
+                if (testedPos == pos)
+                    length = solution.length;
+                else {
+                    length = ZDICT_count(b+pos, b+testedPos);
+                    if (length > solution.length) length = solution.length;
+                }
+                pEnd = (U32)(testedPos + length);
+                for (p=testedPos; p<pEnd; p++)
+                    doneMarks[p] = 1;
+    }   }   }
+
+    return solution;
+}
+
+
+/*! ZDICT_checkMerge
+    check if dictItem can be merged, do it if possible
+    @return : id of destination elt, 0 if not merged
+*/
+static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
+{
+    const U32 tableSize = table->pos;
+    const U32 max = elt.pos + (elt.length-1);
+
+    /* tail overlap */
+    U32 u; for (u=1; u<tableSize; u++) {
+        if (u==eltNbToSkip) continue;
+        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */
+            /* append */
+            U32 addedLength = table[u].pos - elt.pos;
+            table[u].length += addedLength;
+            table[u].pos = elt.pos;
+            table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx */
+            elt = table[u];
+            while ((u>1) && (table[u-1].savings < elt.savings))
+                table[u] = table[u-1], u--;
+            table[u] = elt;
+            return u;
+    }   }
+
+    /* front overlap */
+    for (u=1; u<tableSize; u++) {
+        if (u==eltNbToSkip) continue;
+        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */
+            /* append */
+            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx */
+            if (addedLength > 0) {   /* otherwise, already included */
+                table[u].length += addedLength;
+                table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
+            }
+            elt = table[u];
+            while ((u>1) && (table[u-1].savings < elt.savings))
+                table[u] = table[u-1], u--;
+            table[u] = elt;
+            return u;
+    }   }
+
+    return 0;
+}
+
+
+static void ZDICT_removeDictItem(dictItem* table, U32 id)
+{
+    /* convention : first element is nb of elts */
+    U32 max = table->pos;
+    U32 u;
+    if (!id) return;   /* protection, should never happen */
+    for (u=id; u<max-1; u++)
+        table[u] = table[u+1];
+    table->pos--;
+}
+
+
+static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
+{
+    /* merge if possible */
+    U32 mergeId = ZDICT_checkMerge(table, elt, 0);
+    if (mergeId) {
+        U32 newMerge = 1;
+        while (newMerge) {
+            newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
+            if (newMerge) ZDICT_removeDictItem(table, mergeId);
+            mergeId = newMerge;
+        }
+        return;
+    }
+
+    /* insert */
+    {
+        U32 current;
+        U32 nextElt = table->pos;
+        if (nextElt >= maxSize) nextElt = maxSize-1;
+        current = nextElt-1;
+        while (table[current].savings < elt.savings) {
+            table[current+1] = table[current];
+            current--;
+        }
+        table[current+1] = elt;
+        table->pos = nextElt+1;
+    }
+}
+
+
+static U32 ZDICT_dictSize(const dictItem* dictList)
+{
+    U32 u, dictSize = 0;
+    for (u=1; u<dictList[0].pos; u++)
+        dictSize += dictList[u].length;
+    return dictSize;
+}
+
+
+static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
+                            const void* const buffer, const size_t bufferSize,   /* buffer must end with noisy guard band */
+                            const size_t* fileSizes, unsigned nbFiles,
+                            U32 shiftRatio, unsigned maxDictSize)
+{
+    int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
+    int* const suffix = suffix0+1;
+    U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
+    BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */
+    U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
+    U32 minRatio = nbFiles >> shiftRatio;
+    int divSuftSortResult;
+    size_t result = 0;
+
+    /* init */
+    DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+    if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
+        result = ERROR(memory_allocation);
+        goto _cleanup;
+    }
+    if (minRatio < MINRATIO) minRatio = MINRATIO;
+    memset(doneMarks, 0, bufferSize+16);
+
+    /* sort */
+    DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
+    divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
+    if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
+    suffix[bufferSize] = (int)bufferSize;   /* leads into noise */
+    suffix0[0] = (int)bufferSize;           /* leads into noise */
+    {
+        /* build reverse suffix sort */
+        size_t pos;
+        for (pos=0; pos < bufferSize; pos++)
+            reverseSuffix[suffix[pos]] = (U32)pos;
+        /* build file pos */
+        filePos[0] = 0;
+        for (pos=1; pos<nbFiles; pos++)
+            filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]);
+    }
+
+    DISPLAYLEVEL(2, "finding patterns ... \n");
+    DISPLAYLEVEL(3, "minimum ratio : %u \n", minRatio);
+
+    {
+        U32 cursor; for (cursor=0; cursor < bufferSize; ) {
+            dictItem solution;
+            if (doneMarks[cursor]) { cursor++; continue; }
+            solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
+            if (solution.length==0) { cursor++; continue; }
+            ZDICT_insertDictItem(dictList, dictListSize, solution);
+            cursor += solution.length;
+            DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
+    }   }
+
+    /* limit dictionary size */
+    {
+        U32 max = dictList->pos;   /* convention : nb of useful elts within dictList */
+        U32 currentSize = 0;
+        U32 n; for (n=1; n<max; n++) {
+            currentSize += dictList[n].length;
+            if (currentSize > maxDictSize) break;
+        }
+        dictList->pos = n;
+    }
+
+_cleanup:
+    free(suffix0);
+    free(reverseSuffix);
+    free(doneMarks);
+    free(filePos);
+    return result;
+}
+
+
+static void ZDICT_fillNoise(void* buffer, size_t length)
+{
+    unsigned acc = PRIME1;
+    size_t p=0;;
+    for (p=0; p<length; p++) {
+        acc *= PRIME2;
+        ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
+    }
+}
+
+
+typedef struct
+{
+    ZSTD_CCtx* ref;
+    ZSTD_CCtx* zc;
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
+} EStats_ress_t;
+
+
+static void ZDICT_countEStats(EStats_ress_t esr,
+                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
+                            const void* src, size_t srcSize)
+{
+    const seqStore_t* seqStorePtr;
+
+    if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX;   /* protection vs large samples */
+    ZSTD_copyCCtx(esr.zc, esr.ref);
+    ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    seqStorePtr = ZSTD_getSeqStore(esr.zc);
+
+    /* literals stats */
+    {   const BYTE* bytePtr;
+        for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
+            countLit[*bytePtr]++;
+    }
+
+    /* seqStats */
+    {   size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
+        ZSTD_seqToCodes(seqStorePtr, nbSeq);
+
+        {   const BYTE* codePtr = seqStorePtr->offCodeStart;
+            size_t u;
+            for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
+        }
+
+        {   const BYTE* codePtr = seqStorePtr->mlCodeStart;
+            size_t u;
+            for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
+        }
+
+        {   const BYTE* codePtr = seqStorePtr->llCodeStart;
+            size_t u;
+            for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
+    }   }
+}
+
+/*
+static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
+{
+    unsigned u;
+    size_t max=0;
+    for (u=0; u<nbFiles; u++)
+        if (max < fileSizes[u]) max = fileSizes[u];
+    return max;
+}
+*/
+
+static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
+{
+    size_t total;
+    unsigned u;
+    for (u=0, total=0; u<nbFiles; u++) total += fileSizes[u];
+    return total;
+}
+
+#define OFFCODE_MAX 18  /* only applicable to first block */
+static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
+                                 unsigned compressionLevel,
+                           const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
+                           const void* dictBuffer, size_t  dictBufferSize)
+{
+    U32 countLit[256];
+    HUF_CREATE_STATIC_CTABLE(hufTable, 255);
+    U32 offcodeCount[OFFCODE_MAX+1];
+    short offcodeNCount[OFFCODE_MAX+1];
+    U32 matchLengthCount[MaxML+1];
+    short matchLengthNCount[MaxML+1];
+    U32 litLengthCount[MaxLL+1];
+    short litLengthNCount[MaxLL+1];
+    EStats_ress_t esr;
+    ZSTD_parameters params;
+    U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
+    size_t pos = 0, errorCode;
+    size_t eSize = 0;
+    size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
+    size_t const averageSampleSize = totalSrcSize / nbFiles;
+
+    /* init */
+    for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
+    for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
+    for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
+    for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+    esr.ref = ZSTD_createCCtx();
+    esr.zc = ZSTD_createCCtx();
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
+    if (!esr.ref || !esr.zc || !esr.workPlace) {
+            eSize = ERROR(memory_allocation);
+            DISPLAYLEVEL(1, "Not enough memory");
+            goto _cleanup;
+    }
+    if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
+    params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize);
+    params.cParams.strategy = ZSTD_greedy;
+    params.fParams.contentSizeFlag = 0;
+    ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
+
+    /* collect stats on all files */
+    for (u=0; u<nbFiles; u++) {
+        ZDICT_countEStats(esr,
+                        countLit, offcodeCount, matchLengthCount, litLengthCount,
+           (const char*)srcBuffer + pos, fileSizes[u]);
+        pos += fileSizes[u];
+    }
+
+    /* analyze */
+    errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
+    if (HUF_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "HUF_buildCTable error");
+        goto _cleanup;
+    }
+    huffLog = (U32)errorCode;
+
+    total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
+    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
+        goto _cleanup;
+    }
+    Offlog = (U32)errorCode;
+
+    total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
+    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
+        goto _cleanup;
+    }
+    mlLog = (U32)errorCode;
+
+    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
+    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
+        goto _cleanup;
+    }
+    llLog = (U32)errorCode;
+
+    /* write result to buffer */
+    errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog);
+    if (HUF_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "HUF_writeCTable error");
+        goto _cleanup;
+    }
+    dstBuffer = (char*)dstBuffer + errorCode;
+    maxDstSize -= errorCode;
+    eSize += errorCode;
+
+    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
+        goto _cleanup;
+    }
+    dstBuffer = (char*)dstBuffer + errorCode;
+    maxDstSize -= errorCode;
+    eSize += errorCode;
+
+    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog);
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
+        goto _cleanup;
+    }
+    dstBuffer = (char*)dstBuffer + errorCode;
+    maxDstSize -= errorCode;
+    eSize += errorCode;
+
+    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+        goto _cleanup;
+    }
+    dstBuffer = (char*)dstBuffer + errorCode;
+    maxDstSize -= errorCode;
+    eSize += errorCode;
+
+_cleanup:
+    ZSTD_freeCCtx(esr.ref);
+    ZSTD_freeCCtx(esr.zc);
+    free(esr.workPlace);
+
+    return eSize;
+}
+
+
+#define DIB_FASTSEGMENTSIZE 64
+/*! ZDICT_fastSampling()  (based on an idea proposed by Giuseppe Ottaviano) :
+    Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
+    up to `dictSize`.
+    Filling starts from the end of `dictBuffer`, down to maximum possible.
+    if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
+    @return : amount of data written into `dictBuffer`,
+              or an error code
+*/
+static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
+                         const void* samplesBuffer, size_t samplesSize)
+{
+    char* dstPtr = (char*)dictBuffer + dictSize;
+    const char* srcPtr = (const char*)samplesBuffer;
+    size_t nbSegments = dictSize / DIB_FASTSEGMENTSIZE;
+    size_t segNb, interSize;
+
+    if (nbSegments <= 2) return ERROR(srcSize_wrong);
+    if (samplesSize < dictSize) return ERROR(srcSize_wrong);
+
+    /* first and last segments are part of dictionary, in case they contain interesting header/footer */
+    dstPtr -= DIB_FASTSEGMENTSIZE;
+    memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
+    dstPtr -= DIB_FASTSEGMENTSIZE;
+    memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE);
+
+    /* regularly copy a segment */
+    interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1);
+    srcPtr += DIB_FASTSEGMENTSIZE;
+    for (segNb=2; segNb < nbSegments; segNb++) {
+        srcPtr += interSize;
+        dstPtr -= DIB_FASTSEGMENTSIZE;
+        memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
+        srcPtr += DIB_FASTSEGMENTSIZE;
+    }
+
+    return nbSegments * DIB_FASTSEGMENTSIZE;
+}
+
+
+#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
+/*! ZDICT_trainFromBuffer_unsafe() :
+*   `samplesBuffer` must be followed by noisy guard band.
+*   @return : size of dictionary.
+*/
+size_t ZDICT_trainFromBuffer_unsafe(
+                            void* dictBuffer, size_t maxDictSize,
+                            const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
+                            ZDICT_params_t params)
+{
+    U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
+    dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
+    unsigned selectivity = params.selectivityLevel;
+    unsigned compressionLevel = params.compressionLevel;
+    size_t targetDictSize = maxDictSize;
+    size_t sBuffSize;
+    size_t dictSize = 0;
+
+    /* checks */
     if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
-    if (!dictList) return ERROR(memory_allocation); 
- 
-    /* init */ 
-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; } 
+    if (!dictList) return ERROR(memory_allocation);
+
+    /* init */
+    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
     if (sBuffSize < DIB_MINSAMPLESSIZE) return 0;   /* not enough source to create dictionary */
-    ZDICT_initDictItem(dictList); 
-    g_displayLevel = params.notificationLevel; 
-    if (selectivity==0) selectivity = g_selectivity_default; 
-    if (compressionLevel==0) compressionLevel = g_compressionLevel_default; 
- 
-    /* build dictionary */ 
-    if (selectivity>1) {  /* selectivity == 1 => fast mode */ 
-        ZDICT_trainBuffer(dictList, dictListSize, 
-                        samplesBuffer, sBuffSize, 
-                        sampleSizes, nbSamples, 
-                        selectivity, (U32)targetDictSize); 
- 
-        /* display best matches */ 
-        if (g_displayLevel>= 3) { 
-            U32 const nb = 25; 
-            U32 const dictContentSize = ZDICT_dictSize(dictList); 
-            U32 u; 
-            DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); 
-            DISPLAYLEVEL(3, "list %u best segments \n", nb); 
-            for (u=1; u<=nb; u++) { 
-                U32 p = dictList[u].pos; 
-                U32 l = dictList[u].length; 
-                U32 d = MIN(40, l); 
-                DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", 
-                             u, l, p, dictList[u].savings); 
-                ZDICT_printHex(3, (const char*)samplesBuffer+p, d); 
-                DISPLAYLEVEL(3, "| \n"); 
-    }   }   } 
- 
-    /* create dictionary */ 
-    {   U32 dictContentSize = ZDICT_dictSize(dictList); 
-        size_t hSize; 
-        BYTE* ptr; 
-        U32 u; 
- 
-        /* build dict content */ 
-        ptr = (BYTE*)dictBuffer + maxDictSize; 
-        for (u=1; u<dictList->pos; u++) { 
-            U32 l = dictList[u].length; 
-            ptr -= l; 
+    ZDICT_initDictItem(dictList);
+    g_displayLevel = params.notificationLevel;
+    if (selectivity==0) selectivity = g_selectivity_default;
+    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
+
+    /* build dictionary */
+    if (selectivity>1) {  /* selectivity == 1 => fast mode */
+        ZDICT_trainBuffer(dictList, dictListSize,
+                        samplesBuffer, sBuffSize,
+                        sampleSizes, nbSamples,
+                        selectivity, (U32)targetDictSize);
+
+        /* display best matches */
+        if (g_displayLevel>= 3) {
+            U32 const nb = 25;
+            U32 const dictContentSize = ZDICT_dictSize(dictList);
+            U32 u;
+            DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
+            DISPLAYLEVEL(3, "list %u best segments \n", nb);
+            for (u=1; u<=nb; u++) {
+                U32 p = dictList[u].pos;
+                U32 l = dictList[u].length;
+                U32 d = MIN(40, l);
+                DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
+                             u, l, p, dictList[u].savings);
+                ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
+                DISPLAYLEVEL(3, "| \n");
+    }   }   }
+
+    /* create dictionary */
+    {   U32 dictContentSize = ZDICT_dictSize(dictList);
+        size_t hSize;
+        BYTE* ptr;
+        U32 u;
+
+        /* build dict content */
+        ptr = (BYTE*)dictBuffer + maxDictSize;
+        for (u=1; u<dictList->pos; u++) {
+            U32 l = dictList[u].length;
+            ptr -= l;
             if (ptr<(BYTE*)dictBuffer) return ERROR(GENERIC);   /* should not happen */
-            memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); 
-        } 
- 
-        /* fast mode dict content */ 
-        if (selectivity==1) {  /* note could also be used to complete a dictionary, but not necessarily better */ 
-            DISPLAYLEVEL(3, "\r%70s\r", "");   /* clean display line */ 
-            DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10)); 
-            dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize, 
-                                                      samplesBuffer, sBuffSize); 
-        } 
- 
-       /* dictionary header */ 
-        MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC); 
-        hSize = 4; 
- 
-        /* entropic tables */ 
-        DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */ 
-        DISPLAYLEVEL(2, "statistics ... \n"); 
-        hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4, 
-                                    compressionLevel, 
-                                    samplesBuffer, sampleSizes, nbSamples, 
-                                    (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); 
- 
-        if (hSize + dictContentSize < maxDictSize) 
-            memmove((char*)dictBuffer + hSize, (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize); 
-        dictSize = MIN(maxDictSize, hSize+dictContentSize); 
-    } 
- 
-    /* clean up */ 
-    free(dictList); 
-    return dictSize; 
-} 
- 
- 
-/* issue : samplesBuffer need to be followed by a noisy guard band. 
-*  work around : duplicate the buffer, and add the noise */ 
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, 
-                           const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, 
-                           ZDICT_params_t params) 
-{ 
-    void* newBuff; 
-    size_t sBuffSize; 
- 
-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; } 
-    if (sBuffSize==0) return 0;   /* empty content => no dictionary */ 
-    newBuff = malloc(sBuffSize + NOISELENGTH); 
-    if (!newBuff) return ERROR(memory_allocation); 
- 
-    memcpy(newBuff, samplesBuffer, sBuffSize); 
-    ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */ 
- 
-    { size_t const result = ZDICT_trainFromBuffer_unsafe( 
-                                        dictBuffer, dictBufferCapacity, 
-                                        newBuff, samplesSizes, nbSamples, 
-                                        params); 
-      free(newBuff); 
-      return result; } 
-} 
- 
- 
-size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, 
-                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) 
-{ 
-    ZDICT_params_t params; 
-    memset(&params, 0, sizeof(params)); 
-    return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity, 
-                                          samplesBuffer, samplesSizes, nbSamples, 
-                                          params); 
-} 
- 
+            memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
+        }
+
+        /* fast mode dict content */
+        if (selectivity==1) {  /* note could also be used to complete a dictionary, but not necessarily better */
+            DISPLAYLEVEL(3, "\r%70s\r", "");   /* clean display line */
+            DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
+            dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
+                                                      samplesBuffer, sBuffSize);
+        }
+
+       /* dictionary header */
+        MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
+        hSize = 4;
+
+        /* entropic tables */
+        DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+        DISPLAYLEVEL(2, "statistics ... \n");
+        hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4,
+                                    compressionLevel,
+                                    samplesBuffer, sampleSizes, nbSamples,
+                                    (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize);
+
+        if (hSize + dictContentSize < maxDictSize)
+            memmove((char*)dictBuffer + hSize, (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize);
+        dictSize = MIN(maxDictSize, hSize+dictContentSize);
+    }
+
+    /* clean up */
+    free(dictList);
+    return dictSize;
+}
+
+
+/* issue : samplesBuffer need to be followed by a noisy guard band.
+*  work around : duplicate the buffer, and add the noise */
+size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+                           const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                           ZDICT_params_t params)
+{
+    void* newBuff;
+    size_t sBuffSize;
+
+    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
+    if (sBuffSize==0) return 0;   /* empty content => no dictionary */
+    newBuff = malloc(sBuffSize + NOISELENGTH);
+    if (!newBuff) return ERROR(memory_allocation);
+
+    memcpy(newBuff, samplesBuffer, sBuffSize);
+    ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
+
+    { size_t const result = ZDICT_trainFromBuffer_unsafe(
+                                        dictBuffer, dictBufferCapacity,
+                                        newBuff, samplesSizes, nbSamples,
+                                        params);
+      free(newBuff);
+      return result; }
+}
+
+
+size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
+{
+    ZDICT_params_t params;
+    memset(&params, 0, sizeof(params));
+    return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
+                                          samplesBuffer, samplesSizes, nbSamples,
+                                          params);
+}
+
diff --git a/contrib/libs/zstd06/dictBuilder/zdict.h b/contrib/libs/zstd06/dictBuilder/zdict.h
index a898cb9c32..d9e6d3262d 100644
--- a/contrib/libs/zstd06/dictBuilder/zdict.h
+++ b/contrib/libs/zstd06/dictBuilder/zdict.h
@@ -1,68 +1,68 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    dictBuilder header file 
-    Copyright (C) Yann Collet 2016 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
- 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
- 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-       - Zstd source repository : https://www.zstd.net 
-*/ 
- 
-#ifndef DICTBUILDER_H_001 
-#define DICTBUILDER_H_001 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-************************************* 
-*  Public functions 
-***************************************/ 
-/*! ZDICT_trainFromBuffer() : 
-    Train a dictionary from a memory buffer `samplesBuffer`, 
-    where `nbSamples` samples have been stored concatenated. 
-    Each sample size is provided into an orderly table `samplesSizes`. 
-    Resulting dictionary will be saved into `dictBuffer`. 
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) 
-              or an error code, which can be tested by ZDICT_isError(). 
-*/ 
-size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, 
-                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); 
- 
- 
-/*-************************************* 
-*  Helper functions 
-***************************************/ 
-unsigned ZDICT_isError(size_t errorCode); 
-const char* ZDICT_getErrorName(size_t errorCode); 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif 
+/*
+    dictBuilder header file
+    Copyright (C) Yann Collet 2016
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+#ifndef DICTBUILDER_H_001
+#define DICTBUILDER_H_001
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Public functions
+***************************************/
+/*! ZDICT_trainFromBuffer() :
+    Train a dictionary from a memory buffer `samplesBuffer`,
+    where `nbSamples` samples have been stored concatenated.
+    Each sample size is provided into an orderly table `samplesSizes`.
+    Resulting dictionary will be saved into `dictBuffer`.
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code, which can be tested by ZDICT_isError().
+*/
+size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+unsigned ZDICT_isError(size_t errorCode);
+const char* ZDICT_getErrorName(size_t errorCode);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
diff --git a/contrib/libs/zstd06/dictBuilder/zdict_static.h b/contrib/libs/zstd06/dictBuilder/zdict_static.h
index 6553e904ae..f83f917008 100644
--- a/contrib/libs/zstd06/dictBuilder/zdict_static.h
+++ b/contrib/libs/zstd06/dictBuilder/zdict_static.h
@@ -1,81 +1,81 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    dictBuilder header file 
-    for static linking only 
-    Copyright (C) Yann Collet 2016 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
- 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
- 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-       - Zstd source repository : https://www.zstd.net 
-*/ 
- 
-/* This library is EXPERIMENTAL, below API is not yet stable */ 
- 
-#ifndef DICTBUILDER_STATIC_H_002 
-#define DICTBUILDER_STATIC_H_002 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include "zdict.h" 
- 
- 
-/*-************************************* 
-*  Public type 
-***************************************/ 
-typedef struct { 
-    unsigned selectivityLevel;   /* 0 means default; larger => bigger selection => larger dictionary */ 
-    unsigned compressionLevel;   /* 0 means default; target a specific zstd compression level */ 
-    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ 
-    unsigned reserved[3];        /* space for future parameters */ 
-} ZDICT_params_t; 
- 
- 
-/*-************************************* 
-*  Public functions 
-***************************************/ 
-/*! ZDICT_trainFromBuffer_advanced() : 
-    Same as ZDICT_trainFromBuffer() with control over more parameters. 
-    `parameters` is optional and can be provided with values set to 0 to mean "default". 
-    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`) 
-              or an error code, which can be tested by DiB_isError(). 
-    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel() 
-*/ 
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, 
-                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, 
-                             ZDICT_params_t parameters); 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* DICTBUILDER_STATIC_H_002 */ 
+/*
+    dictBuilder header file
+    for static linking only
+    Copyright (C) Yann Collet 2016
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+/* This library is EXPERIMENTAL, below API is not yet stable */
+
+#ifndef DICTBUILDER_STATIC_H_002
+#define DICTBUILDER_STATIC_H_002
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "zdict.h"
+
+
+/*-*************************************
+*  Public type
+***************************************/
+typedef struct {
+    unsigned selectivityLevel;   /* 0 means default; larger => bigger selection => larger dictionary */
+    unsigned compressionLevel;   /* 0 means default; target a specific zstd compression level */
+    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned reserved[3];        /* space for future parameters */
+} ZDICT_params_t;
+
+
+/*-*************************************
+*  Public functions
+***************************************/
+/*! ZDICT_trainFromBuffer_advanced() :
+    Same as ZDICT_trainFromBuffer() with control over more parameters.
+    `parameters` is optional and can be provided with values set to 0 to mean "default".
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`)
+              or an error code, which can be tested by DiB_isError().
+    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel()
+*/
+size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                             ZDICT_params_t parameters);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* DICTBUILDER_STATIC_H_002 */
diff --git a/contrib/libs/zstd06/legacy/zstd_legacy.h b/contrib/libs/zstd06/legacy/zstd_legacy.h
index 8b17a15317..80fc364cac 100644
--- a/contrib/libs/zstd06/legacy/zstd_legacy.h
+++ b/contrib/libs/zstd06/legacy/zstd_legacy.h
@@ -1,65 +1,65 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd_legacy - decoder for legacy format 
-    Header File 
+/*
+    zstd_legacy - decoder for legacy format
+    Header File
     Copyright (C) 2015-2016, Yann Collet.
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#ifndef ZSTD_LEGACY_H 
-#define ZSTD_LEGACY_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include "mem.h"            /* MEM_STATIC */ 
-#include "error_private.h"  /* ERROR */ 
-#include "zstd_v01.h" 
-#include "zstd_v02.h" 
-#include "zstd_v03.h" 
-#include "zstd_v04.h" 
-#include "zstd_v05.h" 
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_LEGACY_H
+#define ZSTD_LEGACY_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include "mem.h"            /* MEM_STATIC */
+#include "error_private.h"  /* ERROR */
+#include "zstd_v01.h"
+#include "zstd_v02.h"
+#include "zstd_v03.h"
+#include "zstd_v04.h"
+#include "zstd_v05.h"
 #include "zstd_v07.h"
 #include "zstd_v08.h"
- 
+
 
 /** ZSTD_isLegacy() :
     @return : > 0 if supported by legacy decoder. 0 otherwise.
               return value is the version.
 */
-MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE) 
-{ 
-	switch(magicNumberLE) 
-	{ 
+MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
+{
+	switch(magicNumberLE)
+	{
 		case ZSTDv01_magicNumberLE:return 1;
 		case ZSTDv02_magicNumber : return 2;
 		case ZSTDv03_magicNumber : return 3;
@@ -67,17 +67,17 @@ MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
 		case ZSTDv05_MAGICNUMBER : return 5;
 		case ZSTDv07_MAGICNUMBER : return 7;
 		case ZSTDv08_MAGICNUMBER : return 8;
-		default : return 0; 
-	} 
-} 
- 
- 
-MEM_STATIC size_t ZSTD_decompressLegacy( 
-                     void* dst, size_t dstCapacity, 
-               const void* src, size_t compressedSize, 
+		default : return 0;
+	}
+}
+
+
+MEM_STATIC size_t ZSTD_decompressLegacy(
+                     void* dst, size_t dstCapacity,
+               const void* src, size_t compressedSize,
                const void* dict,size_t dictSize,
-                     U32 magicNumberLE) 
-{ 
+                     U32 magicNumberLE)
+{
     switch(magicNumberLE)
     {
         case ZSTDv01_magicNumberLE :
@@ -116,12 +116,12 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
         default :
             return ERROR(prefix_unknown);
     }
-} 
- 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif   /* ZSTD_LEGACY_H */ 
+}
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* ZSTD_LEGACY_H */
diff --git a/contrib/libs/zstd06/legacy/zstd_v01.c b/contrib/libs/zstd06/legacy/zstd_v01.c
index f2d657ece9..d62367df29 100644
--- a/contrib/libs/zstd06/legacy/zstd_v01.c
+++ b/contrib/libs/zstd06/legacy/zstd_v01.c
@@ -1,2178 +1,2178 @@
-/* ****************************************************************** 
-   ZSTD_v01 
-   Zstandard decoder, compatible with v0.1.x format 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-/****************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include "zstd_v01.h" 
- 
- 
-/****************************************** 
-*  Static allocation 
-******************************************/ 
-/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ 
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog)) 
- 
-/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */ 
-#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog)) 
-#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \ 
-        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog } 
- 
- 
-/****************************************** 
-*  Error Management 
-******************************************/ 
-#define FSE_LIST_ERRORS(ITEM) \ 
-        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \ 
-        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \ 
-        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\ 
-        ITEM(FSE_ERROR_corruptionDetected) \ 
-        ITEM(FSE_ERROR_maxCode) 
- 
-#define FSE_GENERATE_ENUM(ENUM) ENUM, 
-typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 
- 
- 
-/****************************************** 
-*  FSE symbol compression API 
-******************************************/ 
-/* 
-   This API consists of small unitary functions, which highly benefit from being inlined. 
-   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. 
-   Visual seems to do it automatically. 
-   For gcc or clang, you'll need to add -flto flag at compilation and linking stages. 
-   If none of these solutions is applicable, include "fse.c" directly. 
-*/ 
- 
-typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
-typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
- 
-typedef struct 
-{ 
-    size_t bitContainer; 
-    int    bitPos; 
-    char*  startPtr; 
-    char*  ptr; 
-    char*  endPtr; 
-} FSE_CStream_t; 
- 
-typedef struct 
-{ 
-    ptrdiff_t   value; 
-    const void* stateTable; 
-    const void* symbolTT; 
-    unsigned    stateLog; 
-} FSE_CState_t; 
- 
-typedef struct 
-{ 
-    size_t   bitContainer; 
-    unsigned bitsConsumed; 
-    const char* ptr; 
-    const char* start; 
-} FSE_DStream_t; 
- 
-typedef struct 
-{ 
-    size_t      state; 
-    const void* table;   /* precise table may vary, depending on U16 */ 
-} FSE_DState_t; 
- 
-typedef enum { FSE_DStream_unfinished = 0, 
-               FSE_DStream_endOfBuffer = 1, 
-               FSE_DStream_completed = 2, 
-               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */ 
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */ 
- 
- 
-/**************************************************************** 
-*  Tuning parameters 
-****************************************************************/ 
-/* MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 
-#define FSE_MAX_MEMORY_USAGE 14 
-#define FSE_DEFAULT_MEMORY_USAGE 13 
- 
-/* FSE_MAX_SYMBOL_VALUE : 
-*  Maximum symbol value authorized. 
-*  Required for proper stack allocation */ 
-#define FSE_MAX_SYMBOL_VALUE 255 
- 
- 
-/**************************************************************** 
-*  template functions type & suffix 
-****************************************************************/ 
-#define FSE_FUNCTION_TYPE BYTE 
-#define FSE_FUNCTION_EXTENSION 
- 
- 
-/**************************************************************** 
-*  Byte symbol type 
-****************************************************************/ 
-typedef struct 
-{ 
-    unsigned short newState; 
-    unsigned char  symbol; 
-    unsigned char  nbBits; 
-} FSE_decode_t;   /* size == U32 */ 
- 
- 
- 
-/**************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */ 
-#else 
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/**************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
- 
-#ifndef MEM_ACCESS_MODULE 
-#define MEM_ACCESS_MODULE 
-/**************************************************************** 
-*  Basic Types 
-*****************************************************************/ 
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */ 
-# include <stdint.h> 
-typedef  uint8_t BYTE; 
-typedef uint16_t U16; 
-typedef  int16_t S16; 
-typedef uint32_t U32; 
-typedef  int32_t S32; 
-typedef uint64_t U64; 
-typedef  int64_t S64; 
-#else 
-typedef unsigned char       BYTE; 
-typedef unsigned short      U16; 
-typedef   signed short      S16; 
-typedef unsigned int        U32; 
-typedef   signed int        S32; 
-typedef unsigned long long  U64; 
-typedef   signed long long  S64; 
-#endif 
- 
-#endif   /* MEM_ACCESS_MODULE */ 
- 
-/**************************************************************** 
-*  Memory I/O 
-*****************************************************************/ 
-/* FSE_FORCE_MEMORY_ACCESS 
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 
- * The below switch allow to select different access method for improved performance. 
- * Method 0 (default) : use `memcpy()`. Safe and portable. 
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 
- * Method 2 : direct access. This method is portable but violate C standard. 
- *            It can generate buggy code on targets generating assembly depending on alignment. 
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 
- * Prefer these methods in priority order (0 > 1 > 2) 
- */ 
-#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ 
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 
-#    define FSE_FORCE_MEMORY_ACCESS 2 
-#  elif defined(__INTEL_COMPILER) || \ 
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 
-#    define FSE_FORCE_MEMORY_ACCESS 1 
-#  endif 
-#endif 
- 
- 
-static unsigned FSE_32bits(void) 
-{ 
-    return sizeof(void*)==4; 
-} 
- 
-static unsigned FSE_isLittleEndian(void) 
-{ 
-    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) 
- 
-static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } 
-static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } 
-static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } 
- 
-#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) 
- 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 
-/* currently only defined for gcc and icc */ 
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 
- 
-static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 
-static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 
-static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 
- 
-#else 
- 
-static U16 FSE_read16(const void* memPtr) 
-{ 
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-static U32 FSE_read32(const void* memPtr) 
-{ 
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-static U64 FSE_read64(const void* memPtr) 
-{ 
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-#endif // FSE_FORCE_MEMORY_ACCESS 
- 
-static U16 FSE_readLE16(const void* memPtr) 
-{ 
-    if (FSE_isLittleEndian()) 
-        return FSE_read16(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)(p[0] + (p[1]<<8)); 
-    } 
-} 
- 
-static U32 FSE_readLE32(const void* memPtr) 
-{ 
-    if (FSE_isLittleEndian()) 
-        return FSE_read32(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 
-    } 
-} 
- 
- 
-static U64 FSE_readLE64(const void* memPtr) 
-{ 
-    if (FSE_isLittleEndian()) 
-        return FSE_read64(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 
-                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 
-    } 
-} 
- 
-static size_t FSE_readLEST(const void* memPtr) 
-{ 
-    if (FSE_32bits()) 
-        return (size_t)FSE_readLE32(memPtr); 
-    else 
-        return (size_t)FSE_readLE64(memPtr); 
-} 
- 
- 
- 
-/**************************************************************** 
-*  Constants 
-*****************************************************************/ 
-#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2) 
-#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 
-#define FSE_MIN_TABLELOG 5 
- 
-#define FSE_TABLELOG_ABSOLUTE_MAX 15 
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 
-#endif 
- 
- 
-/**************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/**************************************************************** 
-*  Complex types 
-****************************************************************/ 
-typedef struct 
-{ 
-    int deltaFindState; 
-    U32 deltaNbBits; 
-} FSE_symbolCompressionTransform; /* total 8 bytes */ 
- 
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 
- 
-/**************************************************************** 
-*  Internal functions 
-****************************************************************/ 
-FORCE_INLINE unsigned FSE_highbit32 (register U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r; 
-    _BitScanReverse ( &r, val ); 
-    return (unsigned) r; 
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */ 
-    return 31 - __builtin_clz (val); 
-#   else   /* Software version */ 
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    unsigned r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
-/**************************************************************** 
-*  Templates 
-****************************************************************/ 
-/* 
-  designed to be included 
-  for type-specific functions (template emulation in C) 
-  Objective is to write these functions only once, for improved maintenance 
-*/ 
- 
-/* safety checks */ 
-#ifndef FSE_FUNCTION_EXTENSION 
-#  error "FSE_FUNCTION_EXTENSION must be defined" 
-#endif 
-#ifndef FSE_FUNCTION_TYPE 
-#  error "FSE_FUNCTION_TYPE must be defined" 
-#endif 
- 
-/* Function names */ 
-#define FSE_CAT(X,Y) X##Y 
-#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 
-#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 
- 
- 
- 
-static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 
- 
+/* ******************************************************************
+   ZSTD_v01
+   Zstandard decoder, compatible with v0.1.x format
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v01.h"
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
+#define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define FSE_LIST_ERRORS(ITEM) \
+        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
+        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
+        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
+        ITEM(FSE_ERROR_corruptionDetected) \
+        ITEM(FSE_ERROR_maxCode)
+
+#define FSE_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+
+/******************************************
+*  FSE symbol compression API
+******************************************/
+/*
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} FSE_CStream_t;
+
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} FSE_DStream_t;
+
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+typedef enum { FSE_DStream_unfinished = 0,
+               FSE_DStream_endOfBuffer = 1,
+               FSE_DStream_completed = 2,
+               FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
+
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+typedef  int64_t S64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+typedef   signed long long  S64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* FSE_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define FSE_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define FSE_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+
+static unsigned FSE_32bits(void)
+{
+    return sizeof(void*)==4;
+}
+
+static unsigned FSE_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
+
+static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+static U16 FSE_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 FSE_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U64 FSE_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+#endif // FSE_FORCE_MEMORY_ACCESS
+
+static U16 FSE_readLE16(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+static U32 FSE_readLE32(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+
+static U64 FSE_readLE64(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+static size_t FSE_readLEST(const void* memPtr)
+{
+    if (FSE_32bits())
+        return (size_t)FSE_readLE32(memPtr);
+    else
+        return (size_t)FSE_readLE64(memPtr);
+}
+
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef struct
+{
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+/****************************************************************
+*  Internal functions
+****************************************************************/
+FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
 #define FSE_DECODE_TYPE FSE_decode_t
- 
- 
-typedef struct { 
-    U16 tableLog; 
-    U16 fastMode; 
-} FSE_DTableHeader;   /* sizeof U32 */ 
- 
+
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
 static size_t FSE_buildDTable
-(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */ 
-    const U32 tableSize = 1 << tableLog; 
-    const U32 tableMask = tableSize-1; 
-    const U32 step = FSE_tableStep(tableSize); 
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 
-    U32 position = 0; 
-    U32 highThreshold = tableSize-1; 
-    const S16 largeLimit= (S16)(1 << (tableLog-1)); 
-    U32 noLarge = 1; 
-    U32 s; 
- 
-    /* Sanity Checks */ 
-    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; 
-    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; 
- 
-    /* Init, lay down lowprob symbols */ 
-    DTableH[0].tableLog = (U16)tableLog; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        if (normalizedCounter[s]==-1) 
-        { 
-            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 
-            symbolNext[s] = 1; 
-        } 
-        else 
-        { 
-            if (normalizedCounter[s] >= largeLimit) noLarge=0; 
-            symbolNext[s] = normalizedCounter[s]; 
-        } 
-    } 
- 
-    /* Spread symbols */ 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        int i; 
-        for (i=0; i<normalizedCounter[s]; i++) 
-        { 
-            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 
-            position = (position + step) & tableMask; 
-            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ 
-        } 
-    } 
- 
-    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 
- 
-    /* Build Decoding table */ 
-    { 
-        U32 i; 
-        for (i=0; i<tableSize; i++) 
-        { 
-            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 
-            U16 nextState = symbolNext[symbol]++; 
-            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) ); 
-            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 
-        } 
-    } 
- 
-    DTableH->fastMode = (U16)noLarge; 
-    return 0; 
-} 
- 
- 
-/****************************************** 
-*  FSE byte symbol 
-******************************************/ 
-#ifndef FSE_COMMONDEFS_ONLY 
- 
-static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } 
- 
-static short FSE_abs(short a) 
-{ 
-    return a<0? -a : a; 
-} 
- 
- 
-/**************************************************************** 
-*  Header bitstream management 
-****************************************************************/ 
-static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 
-                 const void* headerBuffer, size_t hbSize) 
-{ 
-    const BYTE* const istart = (const BYTE*) headerBuffer; 
-    const BYTE* const iend = istart + hbSize; 
-    const BYTE* ip = istart; 
-    int nbBits; 
-    int remaining; 
-    int threshold; 
-    U32 bitStream; 
-    int bitCount; 
-    unsigned charnum = 0; 
-    int previous0 = 0; 
- 
-    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; 
-    bitStream = FSE_readLE32(ip); 
-    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */ 
-    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; 
-    bitStream >>= 4; 
-    bitCount = 4; 
-    *tableLogPtr = nbBits; 
-    remaining = (1<<nbBits)+1; 
-    threshold = 1<<nbBits; 
-    nbBits++; 
- 
-    while ((remaining>1) && (charnum<=*maxSVPtr)) 
-    { 
-        if (previous0) 
-        { 
-            unsigned n0 = charnum; 
-            while ((bitStream & 0xFFFF) == 0xFFFF) 
-            { 
-                n0+=24; 
-                if (ip < iend-5) 
-                { 
-                    ip+=2; 
-                    bitStream = FSE_readLE32(ip) >> bitCount; 
-                } 
-                else 
-                { 
-                    bitStream >>= 16; 
-                    bitCount+=16; 
-                } 
-            } 
-            while ((bitStream & 3) == 3) 
-            { 
-                n0+=3; 
-                bitStream>>=2; 
-                bitCount+=2; 
-            } 
-            n0 += bitStream & 3; 
-            bitCount += 2; 
-            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; 
-            while (charnum < n0) normalizedCounter[charnum++] = 0; 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-            { 
-                ip += bitCount>>3; 
-                bitCount &= 7; 
-                bitStream = FSE_readLE32(ip) >> bitCount; 
-            } 
-            else 
-                bitStream >>= 2; 
-        } 
-        { 
-            const short max = (short)((2*threshold-1)-remaining); 
-            short count; 
- 
-            if ((bitStream & (threshold-1)) < (U32)max) 
-            { 
-                count = (short)(bitStream & (threshold-1)); 
-                bitCount   += nbBits-1; 
-            } 
-            else 
-            { 
-                count = (short)(bitStream & (2*threshold-1)); 
-                if (count >= threshold) count -= max; 
-                bitCount   += nbBits; 
-            } 
- 
-            count--;   /* extra accuracy */ 
-            remaining -= FSE_abs(count); 
-            normalizedCounter[charnum++] = count; 
-            previous0 = !count; 
-            while (remaining < threshold) 
-            { 
-                nbBits--; 
-                threshold >>= 1; 
-            } 
- 
-            { 
-                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-                { 
-                    ip += bitCount>>3; 
-                    bitCount &= 7; 
-                } 
-                else 
-                { 
-                    bitCount -= (int)(8 * (iend - 4 - ip)); 
-                    ip = iend - 4; 
-                } 
-                bitStream = FSE_readLE32(ip) >> (bitCount & 31); 
-            } 
-        } 
-    } 
-    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; 
-    *maxSVPtr = charnum-1; 
- 
-    ip += (bitCount+7)>>3; 
-    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; 
-    return ip-istart; 
-} 
- 
- 
-/********************************************************* 
-*  Decompression (Byte symbols) 
-*********************************************************/ 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */ 
- 
-    DTableH->tableLog = 0; 
-    DTableH->fastMode = 0; 
- 
-    cell->newState = 0; 
-    cell->symbol = symbolValue; 
-    cell->nbBits = 0; 
- 
-    return 0; 
-} 
- 
- 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */ 
-    const unsigned tableSize = 1 << nbBits; 
-    const unsigned tableMask = tableSize - 1; 
-    const unsigned maxSymbolValue = tableMask; 
-    unsigned s; 
- 
-    /* Sanity checks */ 
-    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */ 
- 
-    /* Build Decoding Table */ 
-    DTableH->tableLog = (U16)nbBits; 
-    DTableH->fastMode = 1; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        dinfo[s].newState = 0; 
-        dinfo[s].symbol = (BYTE)s; 
-        dinfo[s].nbBits = (BYTE)nbBits; 
-    } 
- 
-    return 0; 
-} 
- 
- 
-/* FSE_initDStream 
- * Initialize a FSE_DStream_t. 
- * srcBuffer must point at the beginning of an FSE block. 
- * The function result is the size of the FSE_block (== srcSize). 
- * If srcSize is too small, the function will return an errorCode; 
- */ 
-static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 
-{ 
-    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; 
- 
-    if (srcSize >=  sizeof(size_t)) 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t); 
-        bitD->bitContainer = FSE_readLEST(bitD->ptr); 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */ 
-        bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 
-    } 
-    else 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = bitD->start; 
-        bitD->bitContainer = *(const BYTE*)(bitD->start); 
-        switch(srcSize) 
-        { 
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; 
-            default:; 
-        } 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */ 
-        bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 
-        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 
-    } 
- 
-    return srcSize; 
-} 
- 
- 
-/*!FSE_lookBits 
- * Provides next n bits from the bitContainer. 
- * bitContainer is not modified (bits are still present for next read/look) 
- * On 32-bits, maxNbBits==25 
- * On 64-bits, maxNbBits==57 
- * return : value extracted. 
- */ 
-static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 
-} 
- 
-static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */ 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 
-} 
- 
-static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) 
-{ 
-    bitD->bitsConsumed += nbBits; 
-} 
- 
- 
-/*!FSE_readBits 
- * Read next n bits from the bitContainer. 
- * On 32-bits, don't read more than maxNbBits==25 
- * On 64-bits, don't read more than maxNbBits==57 
- * Use the fast variant *only* if n >= 1. 
- * return : value extracted. 
- */ 
-static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = FSE_lookBits(bitD, nbBits); 
-    FSE_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */ 
-{ 
-    size_t value = FSE_lookBitsFast(bitD, nbBits); 
-    FSE_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) 
-{ 
-    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */ 
-        return FSE_DStream_tooFar; 
- 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 
-    { 
-        bitD->ptr -= bitD->bitsConsumed >> 3; 
-        bitD->bitsConsumed &= 7; 
-        bitD->bitContainer = FSE_readLEST(bitD->ptr); 
-        return FSE_DStream_unfinished; 
-    } 
-    if (bitD->ptr == bitD->start) 
-    { 
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; 
-        return FSE_DStream_completed; 
-    } 
-    { 
-        U32 nbBytes = bitD->bitsConsumed >> 3; 
-        U32 result = FSE_DStream_unfinished; 
-        if (bitD->ptr - nbBytes < bitD->start) 
-        { 
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */ 
-            result = FSE_DStream_endOfBuffer; 
-        } 
-        bitD->ptr -= nbBytes; 
-        bitD->bitsConsumed -= nbBytes*8; 
-        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */ 
-        return result; 
-    } 
-} 
- 
- 
-static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) 
-{ 
-    const void* ptr = dt; 
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; 
-    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); 
-    FSE_reloadDStream(bitD); 
-    DStatePtr->table = dt + 1; 
-} 
- 
-static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32  nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = FSE_readBits(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32 nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = FSE_readBitsFast(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-/* FSE_endOfDStream 
-   Tells if bitD has reached end of bitStream or not */ 
- 
-static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) 
-{ 
-    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); 
-} 
- 
-static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state == 0; 
-} 
- 
- 
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 
-          void* dst, size_t maxDstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const FSE_DTable* dt, const unsigned fast) 
-{ 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* op = ostart; 
-    BYTE* const omax = op + maxDstSize; 
-    BYTE* const olimit = omax-3; 
- 
-    FSE_DStream_t bitD; 
-    FSE_DState_t state1; 
-    FSE_DState_t state2; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */ 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    FSE_initDState(&state1, &bitD, dt); 
-    FSE_initDState(&state2, &bitD, dt); 
- 
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 
- 
-    /* 4 symbols per loop */ 
-    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4) 
-    { 
-        op[0] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            FSE_reloadDStream(&bitD); 
- 
-        op[1] = FSE_GETSYMBOL(&state2); 
- 
-        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } 
- 
-        op[2] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            FSE_reloadDStream(&bitD); 
- 
-        op[3] = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* tail */ 
-    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ 
-    while (1) 
-    { 
-        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state1); 
- 
-        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* end ? */ 
-    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 
-        return op-ostart; 
- 
-    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */ 
- 
-    return (size_t)-FSE_ERROR_corruptionDetected; 
-} 
- 
- 
-static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 
-                            const void* cSrc, size_t cSrcSize, 
-                            const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */ 
- 
-    /* select fast mode (static) */ 
-    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 
-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 
-} 
- 
- 
-static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    const BYTE* const istart = (const BYTE*)cSrc; 
-    const BYTE* ip = istart; 
-    short counting[FSE_MAX_SYMBOL_VALUE+1]; 
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */ 
-    unsigned tableLog; 
-    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 
-    size_t errorCode; 
- 
-    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */ 
- 
-    /* normal FSE decoding mode */ 
-    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */ 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    /* always return, even if it is an error code */ 
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 
-} 
- 
- 
- 
-/* ******************************************************* 
-*  Huff0 : Huffman block compression 
-*********************************************************/ 
-#define HUF_MAX_SYMBOL_VALUE 255 
-#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */ 
-#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */ 
-#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 
-#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 
-#  error "HUF_MAX_TABLELOG is too large !" 
-#endif 
- 
-typedef struct HUF_CElt_s { 
-  U16  val; 
-  BYTE nbBits; 
-} HUF_CElt ; 
- 
-typedef struct nodeElt_s { 
-    U32 count; 
-    U16 parent; 
-    BYTE byte; 
-    BYTE nbBits; 
-} nodeElt; 
- 
- 
-/* ******************************************************* 
-*  Huff0 : Huffman block decompression 
-*********************************************************/ 
-typedef struct { 
-    BYTE byte; 
-    BYTE nbBits; 
-} HUF_DElt; 
- 
-static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */ 
-    U32 weightTotal; 
-    U32 maxBits; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    size_t oSize; 
-    U32 n; 
-    U32 nextRankStart; 
-    void* ptr = DTable+1; 
-    HUF_DElt* const dt = (HUF_DElt*)ptr; 
- 
-    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */ 
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */ 
-    if (iSize >= 128)  /* special header */ 
-    { 
-        if (iSize >= (242))   /* RLE */ 
-        { 
-            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 
-            oSize = l[iSize-242]; 
-            memset(huffWeight, 1, sizeof(huffWeight)); 
-            iSize = 0; 
-        } 
-        else   /* Incompressible */ 
-        { 
-            oSize = iSize - 127; 
-            iSize = ((oSize+1)/2); 
-            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 
-            ip += 1; 
-            for (n=0; n<oSize; n+=2) 
-            { 
-                huffWeight[n]   = ip[n/2] >> 4; 
-                huffWeight[n+1] = ip[n/2] & 15; 
-            } 
-        } 
-    } 
-    else  /* header compressed with FSE (normal case) */ 
-    { 
-        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 
-        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */ 
-        if (FSE_isError(oSize)) return oSize; 
-    } 
- 
-    /* collect weight stats */ 
-    memset(rankVal, 0, sizeof(rankVal)); 
-    weightTotal = 0; 
-    for (n=0; n<oSize; n++) 
-    { 
-        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; 
-        rankVal[huffWeight[n]]++; 
-        weightTotal += (1 << huffWeight[n]) >> 1; 
-    } 
- 
-    /* get last non-null symbol weight (implied, total must be 2^n) */ 
-    maxBits = FSE_highbit32(weightTotal) + 1; 
-    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */ 
-    DTable[0] = (U16)maxBits; 
-    { 
-        U32 total = 1 << maxBits; 
-        U32 rest = total - weightTotal; 
-        U32 verif = 1 << FSE_highbit32(rest); 
-        U32 lastWeight = FSE_highbit32(rest) + 1; 
-        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */ 
-        huffWeight[oSize] = (BYTE)lastWeight; 
-        rankVal[lastWeight]++; 
-    } 
- 
-    /* check tree construction validity */ 
-    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */ 
- 
-    /* Prepare ranks */ 
-    nextRankStart = 0; 
-    for (n=1; n<=maxBits; n++) 
-    { 
-        U32 current = nextRankStart; 
-        nextRankStart += (rankVal[n] << (n-1)); 
-        rankVal[n] = current; 
-    } 
- 
-    /* fill DTable */ 
-    for (n=0; n<=oSize; n++) 
-    { 
-        const U32 w = huffWeight[n]; 
-        const U32 length = (1 << w) >> 1; 
-        U32 i; 
-        HUF_DElt D; 
-        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); 
-        for (i = rankVal[w]; i < rankVal[w] + length; i++) 
-            dt[i] = D; 
-        rankVal[w] += length; 
-    } 
- 
-    return iSize+1; 
-} 
- 
- 
-static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) 
-{ 
-        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 
-        const BYTE c = dt[val].byte; 
-        FSE_skipBits(Dstream, dt[val].nbBits); 
-        return c; 
-} 
- 
-static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */ 
-          void* dst, size_t maxDstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U16* DTable) 
-{ 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* op = ostart; 
-    BYTE* const omax = op + maxDstSize; 
-    BYTE* const olimit = omax-15; 
- 
-    const void* ptr = DTable; 
-    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; 
-    const U32 dtLog = DTable[0]; 
-    size_t errorCode; 
-    U32 reloadStatus; 
- 
-    /* Init */ 
- 
-    const U16* jumpTable = (const U16*)cSrc; 
-    const size_t length1 = FSE_readLE16(jumpTable); 
-    const size_t length2 = FSE_readLE16(jumpTable+1); 
-    const size_t length3 = FSE_readLE16(jumpTable+2); 
-    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !! 
-    const char* const start1 = (const char*)(cSrc) + 6; 
-    const char* const start2 = start1 + length1; 
-    const char* const start3 = start2 + length2; 
-    const char* const start4 = start3 + length3; 
-    FSE_DStream_t bitD1, bitD2, bitD3, bitD4; 
- 
-    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 
- 
-    errorCode = FSE_initDStream(&bitD1, start1, length1); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    errorCode = FSE_initDStream(&bitD2, start2, length2); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    errorCode = FSE_initDStream(&bitD3, start3, length3); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    errorCode = FSE_initDStream(&bitD4, start4, length4); 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    reloadStatus=FSE_reloadDStream(&bitD2); 
- 
-    /* 16 symbols per loop */ 
-    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */ 
-        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1)) 
-    { 
-#define HUF_DECODE_SYMBOL_0(n, Dstream) \ 
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); 
- 
-#define HUF_DECODE_SYMBOL_1(n, Dstream) \ 
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 
-        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream) 
- 
-#define HUF_DECODE_SYMBOL_2(n, Dstream) \ 
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 
-        if (FSE_32bits()) FSE_reloadDStream(&Dstream) 
- 
-        HUF_DECODE_SYMBOL_1( 0, bitD1); 
-        HUF_DECODE_SYMBOL_1( 1, bitD2); 
-        HUF_DECODE_SYMBOL_1( 2, bitD3); 
-        HUF_DECODE_SYMBOL_1( 3, bitD4); 
-        HUF_DECODE_SYMBOL_2( 4, bitD1); 
-        HUF_DECODE_SYMBOL_2( 5, bitD2); 
-        HUF_DECODE_SYMBOL_2( 6, bitD3); 
-        HUF_DECODE_SYMBOL_2( 7, bitD4); 
-        HUF_DECODE_SYMBOL_1( 8, bitD1); 
-        HUF_DECODE_SYMBOL_1( 9, bitD2); 
-        HUF_DECODE_SYMBOL_1(10, bitD3); 
-        HUF_DECODE_SYMBOL_1(11, bitD4); 
-        HUF_DECODE_SYMBOL_0(12, bitD1); 
-        HUF_DECODE_SYMBOL_0(13, bitD2); 
-        HUF_DECODE_SYMBOL_0(14, bitD3); 
-        HUF_DECODE_SYMBOL_0(15, bitD4); 
-    } 
- 
-    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */ 
-        return (size_t)-FSE_ERROR_corruptionDetected; 
- 
-    /* tail */ 
-    { 
-        // bitTail = bitD1;   // *much* slower : -20% !??! 
-        FSE_DStream_t bitTail; 
-        bitTail.ptr = bitD1.ptr; 
-        bitTail.bitsConsumed = bitD1.bitsConsumed; 
-        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer 
-        bitTail.start = start1; 
-        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++) 
-        { 
-            HUF_DECODE_SYMBOL_0(0, bitTail); 
-        } 
- 
-        if (FSE_endOfDStream(&bitTail)) 
-            return op-ostart; 
-    } 
- 
-    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */ 
- 
-    return (size_t)-FSE_ERROR_corruptionDetected; 
-} 
- 
- 
-static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
-    size_t errorCode; 
- 
-    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-#endif   /* FSE_COMMONDEFS_ONLY */ 
- 
-/* 
-    zstd - standard compression library 
-    Header File for static linking only 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* The objects defined into this file should be considered experimental. 
- * They are not labelled stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risk of future changes. 
- */ 
- 
-/************************************** 
-*  Error management 
-**************************************/ 
-#define ZSTD_LIST_ERRORS(ITEM) \ 
-        ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \ 
-        ITEM(ZSTD_ERROR_MagicNumber) \ 
-        ITEM(ZSTD_ERROR_SrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \ 
-        ITEM(ZSTD_ERROR_corruption) \ 
-        ITEM(ZSTD_ERROR_maxCode) 
- 
-#define ZSTD_GENERATE_ENUM(ENUM) ENUM, 
-typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes;   /* exposed list of errors; static linking only */ 
- 
-/* 
-    zstd - standard compression library 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/**************************************************************** 
-*  Tuning parameters 
-*****************************************************************/ 
-/* MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect */ 
-#define ZSTD_MEMORY_USAGE 17 
- 
- 
-/************************************** 
-   CPU Feature Detection 
-**************************************/ 
-/* 
- * Automated efficient unaligned memory access detection 
- * Based on known hardware architectures 
- * This list will be updated thanks to feedbacks 
- */ 
-#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ 
-    || defined(__ARM_FEATURE_UNALIGNED) \ 
-    || defined(__i386__) || defined(__x86_64__) \ 
-    || defined(_M_IX86) || defined(_M_X64) \ 
-    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ 
-    || (defined(_M_ARM) && (_M_ARM >= 7)) 
-#  define ZSTD_UNALIGNED_ACCESS 1 
-#else 
-#  define ZSTD_UNALIGNED_ACCESS 0 
-#endif 
- 
- 
-/******************************************************** 
-*  Includes 
-*********************************************************/ 
-#include <stdlib.h>      /* calloc */ 
-#include <string.h>      /* memcpy, memmove */ 
-#include <stdio.h>       /* debug : printf */ 
- 
- 
-/******************************************************** 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef __AVX2__ 
-#  include <immintrin.h>   /* AVX2 intrinsics */ 
-#endif 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */ 
-#else 
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-#ifndef MEM_ACCESS_MODULE 
-#define MEM_ACCESS_MODULE 
-/******************************************************** 
-*  Basic Types 
-*********************************************************/ 
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */ 
-# include <stdint.h> 
-typedef  uint8_t BYTE; 
-typedef uint16_t U16; 
-typedef  int16_t S16; 
-typedef uint32_t U32; 
-typedef  int32_t S32; 
-typedef uint64_t U64; 
-#else 
-typedef unsigned char       BYTE; 
-typedef unsigned short      U16; 
-typedef   signed short      S16; 
-typedef unsigned int        U32; 
-typedef   signed int        S32; 
-typedef unsigned long long  U64; 
-#endif 
- 
-#endif   /* MEM_ACCESS_MODULE */ 
- 
- 
-/******************************************************** 
-*  Constants 
-*********************************************************/ 
-static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */ 
- 
-#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) 
-#define HASH_TABLESIZE (1 << HASH_LOG) 
-#define HASH_MASK (HASH_TABLESIZE - 1) 
- 
-#define KNUTH 2654435761 
- 
-#define BIT7 128 
-#define BIT6  64 
-#define BIT5  32 
-#define BIT4  16 
- 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define BLOCKSIZE (128 KB)                 /* define, for static allocation */ 
- 
-#define WORKPLACESIZE (BLOCKSIZE*3) 
-#define MINMATCH 4 
-#define MLbits   7 
-#define LLbits   6 
-#define Offbits  5 
-#define MaxML  ((1<<MLbits )-1) 
-#define MaxLL  ((1<<LLbits )-1) 
-#define MaxOff ((1<<Offbits)-1) 
-#define LitFSELog  11 
-#define MLFSELog   10 
-#define LLFSELog   10 
-#define OffFSELog   9 
-#define MAX(a,b) ((a)<(b)?(b):(a)) 
-#define MaxSeq MAX(MaxLL, MaxML) 
- 
-#define LITERAL_NOENTROPY 63 
-#define COMMAND_NOENTROPY 7   /* to remove */ 
- 
-static const size_t ZSTD_blockHeaderSize = 3; 
-static const size_t ZSTD_frameHeaderSize = 4; 
- 
- 
-/******************************************************** 
-*  Memory operations 
-*********************************************************/ 
-static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; } 
- 
-static unsigned ZSTD_isLittleEndian(void) 
-{ 
-    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; } 
- 
-static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; } 
- 
-static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 
- 
-static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 
- 
-#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; } 
- 
-static void ZSTD_wildcopy(void* dst, const void* src, size_t length) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + length; 
-    while (op < oend) COPY8(op, ip); 
-} 
- 
-static U16 ZSTD_readLE16(const void* memPtr) 
-{ 
-    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)((U16)p[0] + ((U16)p[1]<<8)); 
-    } 
-} 
- 
- 
-static U32 ZSTD_readLE32(const void* memPtr) 
-{ 
-    if (ZSTD_isLittleEndian()) 
-        return ZSTD_read32(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 
-    } 
-} 
- 
-static U32 ZSTD_readBE32(const void* memPtr) 
-{ 
-    const BYTE* p = (const BYTE*)memPtr; 
-    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0)); 
-} 
- 
- 
-/************************************** 
-*  Local structures 
-***************************************/ 
-typedef struct ZSTD_Cctx_s ZSTD_Cctx; 
- 
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 
- 
-typedef struct 
-{ 
-    blockType_t blockType; 
-    U32 origSize; 
-} blockProperties_t; 
- 
-typedef struct { 
-    void* buffer; 
-    U32*  offsetStart; 
-    U32*  offset; 
-    BYTE* offCodeStart; 
-    BYTE* offCode; 
-    BYTE* litStart; 
-    BYTE* lit; 
-    BYTE* litLengthStart; 
-    BYTE* litLength; 
-    BYTE* matchLengthStart; 
-    BYTE* matchLength; 
-    BYTE* dumpsStart; 
-    BYTE* dumps; 
-} seqStore_t; 
- 
- 
-typedef struct ZSTD_Cctx_s 
-{ 
-    const BYTE* base; 
-    U32 current; 
-    U32 nextUpdate; 
-    seqStore_t seqStore; 
-#ifdef __AVX2__ 
-    __m256i hashTable[HASH_TABLESIZE>>3]; 
-#else 
-    U32 hashTable[HASH_TABLESIZE]; 
-#endif 
-	BYTE buffer[WORKPLACESIZE]; 
-} cctxi_t; 
- 
- 
- 
- 
-/************************************** 
-*  Error Management 
-**************************************/ 
-/* tells if a return value is an error code */ 
-static unsigned ZSTD_isError(size_t code) { return (code > (size_t)(-ZSTD_ERROR_maxCode)); } 
- 
-/* published entry point */ 
-unsigned ZSTDv01_isError(size_t code) { return ZSTD_isError(code); } 
- 
- 
-/************************************** 
-*  Tool functions 
-**************************************/ 
-#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */ 
-#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */ 
-#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */ 
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 
- 
-/************************************************************** 
-*   Decompression code 
-**************************************************************/ 
- 
-static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 
-{ 
-    const BYTE* const in = (const BYTE* const)src; 
-    BYTE headerFlags; 
-    U32 cSize; 
- 
-    if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize; 
- 
-    headerFlags = *in; 
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 
- 
-    bpPtr->blockType = (blockType_t)(headerFlags >> 6); 
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 
- 
-    if (bpPtr->blockType == bt_end) return 0; 
-    if (bpPtr->blockType == bt_rle) return 1; 
-    return cSize; 
-} 
- 
- 
-static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; 
-    memcpy(dst, src, srcSize); 
-    return srcSize; 
-} 
- 
- 
-static size_t ZSTD_decompressLiterals(void* ctx, 
-                                      void* dst, size_t maxDstSize, 
-                                const void* src, size_t srcSize) 
-{ 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + maxDstSize; 
-    const BYTE* ip = (const BYTE*)src; 
-    size_t errorCode; 
-    size_t litSize; 
- 
-    /* check : minimum 2, for litSize, +1, for content */ 
-    if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption; 
- 
-    litSize = ip[1] + (ip[0]<<8); 
-    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh.... 
-    op = oend - litSize; 
- 
-    (void)ctx; 
-    if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; 
-    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); 
-    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC; 
-    return litSize; 
-} 
- 
- 
-static size_t ZSTD_decodeLiteralsBlock(void* ctx, 
-                                void* dst, size_t maxDstSize, 
-                          const BYTE** litStart, size_t* litSize, 
-                          const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* ip = istart; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* const oend = ostart + maxDstSize; 
-    blockProperties_t litbp; 
- 
-    size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp); 
-    if (ZSTD_isError(litcSize)) return litcSize; 
-    if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize; 
-    ip += ZSTD_blockHeaderSize; 
- 
-    switch(litbp.blockType) 
-    { 
-    case bt_raw: 
-        *litStart = ip; 
-        ip += litcSize; 
-        *litSize = litcSize; 
-        break; 
-    case bt_rle: 
-        { 
-            size_t rleSize = litbp.origSize; 
-            if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; 
-            memset(oend - rleSize, *ip, rleSize); 
-            *litStart = oend - rleSize; 
-            *litSize = rleSize; 
-            ip++; 
-            break; 
-        } 
-    case bt_compressed: 
-        { 
-            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); 
-            if (ZSTD_isError(decodedLitSize)) return decodedLitSize; 
-            *litStart = oend - decodedLitSize; 
-            *litSize = decodedLitSize; 
-            ip += litcSize; 
-            break; 
-        } 
-    case bt_end: 
-    default: 
-        return (size_t)-ZSTD_ERROR_GENERIC; 
-    } 
- 
-    return ip-istart; 
-} 
- 
- 
-static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 
-                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 
-                         const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* ip = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    U32 LLtype, Offtype, MLtype; 
-    U32 LLlog, Offlog, MLlog; 
-    size_t dumpsLength; 
- 
-    /* check */ 
-    if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize; 
- 
-    /* SeqHead */ 
-    *nbSeq = ZSTD_readLE16(ip); ip+=2; 
-    LLtype  = *ip >> 6; 
-    Offtype = (*ip >> 4) & 3; 
-    MLtype  = (*ip >> 2) & 3; 
-    if (*ip & 2) 
-    { 
-        dumpsLength  = ip[2]; 
-        dumpsLength += ip[1] << 8; 
-        ip += 3; 
-    } 
-    else 
-    { 
-        dumpsLength  = ip[1]; 
-        dumpsLength += (ip[0] & 1) << 8; 
-        ip += 2; 
-    } 
-    *dumpsPtr = ip; 
-    ip += dumpsLength; 
-    *dumpsLengthPtr = dumpsLength; 
- 
-    /* check */ 
-    if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 
- 
-    /* sequences */ 
-    { 
-        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */ 
-        size_t headerSize; 
- 
-        /* Build DTables */ 
-        switch(LLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            LLlog = 0; 
-            FSE_buildDTable_rle(DTableLL, *ip++); break; 
-        case bt_raw : 
-            LLlog = LLbits; 
-            FSE_buildDTable_raw(DTableLL, LLbits); break; 
-        default : 
-            max = MaxLL; 
-            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; 
-            if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption; 
-            ip += headerSize; 
-            FSE_buildDTable(DTableLL, norm, max, LLlog); 
-        } 
- 
-        switch(Offtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            Offlog = 0; 
-            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableOffb, *ip++); break; 
-        case bt_raw : 
-            Offlog = Offbits; 
-            FSE_buildDTable_raw(DTableOffb, Offbits); break; 
-        default : 
-            max = MaxOff; 
-            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; 
-            if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption; 
-            ip += headerSize; 
-            FSE_buildDTable(DTableOffb, norm, max, Offlog); 
-        } 
- 
-        switch(MLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            MLlog = 0; 
-            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableML, *ip++); break; 
-        case bt_raw : 
-            MLlog = MLbits; 
-            FSE_buildDTable_raw(DTableML, MLbits); break; 
-        default : 
-            max = MaxML; 
-            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC; 
-            if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption; 
-            ip += headerSize; 
-            FSE_buildDTable(DTableML, norm, max, MLlog); 
-        } 
-    } 
- 
-    return ip-istart; 
-} 
- 
- 
-typedef struct { 
-    size_t litLength; 
-    size_t offset; 
-    size_t matchLength; 
-} seq_t; 
- 
-typedef struct { 
-    FSE_DStream_t DStream; 
-    FSE_DState_t stateLL; 
-    FSE_DState_t stateOffb; 
-    FSE_DState_t stateML; 
-    size_t prevOffset; 
-    const BYTE* dumps; 
-    const BYTE* dumpsEnd; 
-} seqState_t; 
- 
- 
-static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 
-{ 
-    size_t litLength; 
-    size_t prevOffset; 
-    size_t offset; 
-    size_t matchLength; 
-    const BYTE* dumps = seqState->dumps; 
-    const BYTE* const de = seqState->dumpsEnd; 
- 
-    /* Literal length */ 
-    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 
-    prevOffset = litLength ? seq->offset : seqState->prevOffset; 
-    seqState->prevOffset = seq->offset; 
-    if (litLength == MaxLL) 
-    { 
-        U32 add = dumps<de ? *dumps++ : 0; 
-        if (add < 255) litLength += add; 
-        else 
-        { 
-            if (dumps<=(de-3)) 
-            { 
-                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-                dumps += 3; 
-            } 
-        } 
-    } 
- 
-    /* Offset */ 
-    { 
-        U32 offsetCode, nbBits; 
-        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); 
-        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 
-        nbBits = offsetCode - 1; 
-        if (offsetCode==0) nbBits = 0;   /* cmove */ 
-        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); 
-        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 
-        if (offsetCode==0) offset = prevOffset; 
-    } 
- 
-    /* MatchLength */ 
-    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 
-    if (matchLength == MaxML) 
-    { 
-        U32 add = dumps<de ? *dumps++ : 0; 
-        if (add < 255) matchLength += add; 
-        else 
-        { 
-            if (dumps<=(de-3)) 
-            { 
-                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-                dumps += 3; 
-            } 
-        } 
-    } 
-    matchLength += MINMATCH; 
- 
-    /* save result */ 
-    seq->litLength = litLength; 
-    seq->offset = offset; 
-    seq->matchLength = matchLength; 
-    seqState->dumps = dumps; 
-} 
- 
- 
-static size_t ZSTD_execSequence(BYTE* op, 
-                                seq_t sequence, 
-                                const BYTE** litPtr, const BYTE* const litLimit, 
-                                BYTE* const base, BYTE* const oend) 
-{ 
-    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */ 
-    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */ 
-    const BYTE* const ostart = op; 
-    const size_t litLength = sequence.litLength; 
-    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */ 
-    const BYTE* const litEnd = *litPtr + litLength; 
- 
-    /* check */ 
-    if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;   /* overwrite beyond dst buffer */ 
-    if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption; 
-    if (sequence.matchLength > (size_t)(*litPtr-op))  return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;    /* overwrite literal segment */ 
- 
-    /* copy Literals */ 
-    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) 
-        memmove(op, *litPtr, litLength);   /* overwrite risk */ 
-    else 
-        ZSTD_wildcopy(op, *litPtr, litLength); 
-    op += litLength; 
-    *litPtr = litEnd;   /* update for next sequence */ 
- 
-    /* check : last match must be at a minimum distance of 8 from end of dest buffer */ 
-    if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; 
- 
-    /* copy Match */ 
-    { 
-        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); 
-        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */ 
-        size_t qutt = 12; 
-        U64 saved[2]; 
- 
-        /* check */ 
-        if (match < base) return (size_t)-ZSTD_ERROR_corruption; 
-        if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption; 
- 
-        /* save beginning of literal sequence, in case of write overlap */ 
-        if (overlapRisk) 
-        { 
-            if ((endMatch + qutt) > oend) qutt = oend-endMatch; 
-            memcpy(saved, endMatch, qutt); 
-        } 
- 
-        if (sequence.offset < 8) 
-        { 
-            const int dec64 = dec64table[sequence.offset]; 
-            op[0] = match[0]; 
-            op[1] = match[1]; 
-            op[2] = match[2]; 
-            op[3] = match[3]; 
-            match += dec32table[sequence.offset]; 
-            ZSTD_copy4(op+4, match); 
-            match -= dec64; 
-        } else { ZSTD_copy8(op, match); } 
-        op += 8; match += 8; 
- 
-        if (endMatch > oend-12) 
-        { 
-            if (op < oend-8) 
-            { 
-                ZSTD_wildcopy(op, match, (oend-8) - op); 
-                match += (oend-8) - op; 
-                op = oend-8; 
-            } 
-            while (op<endMatch) *op++ = *match++; 
-        } 
-        else 
-            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */ 
- 
-        /* restore, in case of overlap */ 
-        if (overlapRisk) memcpy(endMatch, saved, qutt); 
-    } 
- 
-    return endMatch-ostart; 
-} 
- 
-typedef struct ZSTDv01_Dctx_s 
-{ 
-    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 
-    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 
-    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 
-    void* previousDstEnd; 
-    void* base; 
-    size_t expected; 
-    blockType_t bType; 
-    U32 phase; 
-} dctx_t; 
- 
- 
-static size_t ZSTD_decompressSequences( 
-                               void* ctx, 
-                               void* dst, size_t maxDstSize, 
-                         const void* seqStart, size_t seqSize, 
-                         const BYTE* litStart, size_t litSize) 
-{ 
-    dctx_t* dctx = (dctx_t*)ctx; 
-    const BYTE* ip = (const BYTE*)seqStart; 
-    const BYTE* const iend = ip + seqSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t errorCode, dumpsLength; 
-    const BYTE* litPtr = litStart; 
-    const BYTE* const litEnd = litStart + litSize; 
-    int nbSeq; 
-    const BYTE* dumps; 
-    U32* DTableLL = dctx->LLTable; 
-    U32* DTableML = dctx->MLTable; 
-    U32* DTableOffb = dctx->OffTable; 
-    BYTE* const base = (BYTE*) (dctx->base); 
- 
-    /* Build Decoding Tables */ 
-    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 
-                                      DTableLL, DTableML, DTableOffb, 
-                                      ip, iend-ip); 
-    if (ZSTD_isError(errorCode)) return errorCode; 
-    ip += errorCode; 
- 
-    /* Regen sequences */ 
-    { 
-        seq_t sequence; 
-        seqState_t seqState; 
- 
-        memset(&sequence, 0, sizeof(sequence)); 
-        seqState.dumps = dumps; 
-        seqState.dumpsEnd = dumps + dumpsLength; 
-        seqState.prevOffset = 1; 
-        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); 
-        if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption; 
-        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 
-        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 
-        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 
- 
-        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) 
-        { 
-            size_t oneSeqSize; 
-            nbSeq--; 
-            ZSTD_decodeSequence(&sequence, &seqState); 
-            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); 
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 
-            op += oneSeqSize; 
-        } 
- 
-        /* check if reached exact end */ 
-        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return (size_t)-ZSTD_ERROR_corruption;   /* requested too much : data is corrupted */ 
-        if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption;   /* requested too many sequences : data is corrupted */ 
- 
-        /* last literal segment */ 
-        { 
-            size_t lastLLSize = litEnd - litPtr; 
-            if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; 
-            if (op != litPtr) memmove(op, litPtr, lastLLSize); 
-            op += lastLLSize; 
-        } 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-static size_t ZSTD_decompressBlock( 
-                            void* ctx, 
-                            void* dst, size_t maxDstSize, 
-                      const void* src, size_t srcSize) 
-{ 
-    /* blockType == blockCompressed, srcSize is trusted */ 
-    const BYTE* ip = (const BYTE*)src; 
-    const BYTE* litPtr = NULL; 
-    size_t litSize = 0; 
-    size_t errorCode; 
- 
-    /* Decode literals sub-block */ 
-    errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); 
-    if (ZSTD_isError(errorCode)) return errorCode; 
-    ip += errorCode; 
-    srcSize -= errorCode; 
- 
-    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); 
-} 
- 
- 
-size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    const BYTE* iend = ip + srcSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t remainingSize = srcSize; 
-    U32 magicNumber; 
-    size_t errorCode=0; 
-    blockProperties_t blockProperties; 
- 
-    /* Frame Header */ 
-    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize; 
-    magicNumber = ZSTD_readBE32(src); 
-    if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber; 
-    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 
- 
-    /* Loop on each block */ 
-    while (1) 
-    { 
-        size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); 
-        if (ZSTD_isError(blockSize)) return blockSize; 
- 
-        ip += ZSTD_blockHeaderSize; 
-        remainingSize -= ZSTD_blockHeaderSize; 
-        if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize; 
- 
-        switch(blockProperties.blockType) 
-        { 
-        case bt_compressed: 
-            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); 
-            break; 
-        case bt_raw : 
-            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); 
-            break; 
-        case bt_rle : 
-            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet supported */ 
-            break; 
-        case bt_end : 
-            /* end of frame */ 
-            if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize; 
-            break; 
-        default: 
-            return (size_t)-ZSTD_ERROR_GENERIC; 
-        } 
-        if (blockSize == 0) break;   /* bt_end */ 
- 
-        if (ZSTD_isError(errorCode)) return errorCode; 
-        op += errorCode; 
-        ip += blockSize; 
-        remainingSize -= blockSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
-size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    dctx_t ctx; 
-    ctx.base = dst; 
-    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); 
-} 
- 
- 
-/******************************* 
-*  Streaming Decompression API 
-*******************************/ 
- 
-size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) 
-{ 
-    dctx->expected = ZSTD_frameHeaderSize; 
-    dctx->phase = 0; 
-    dctx->previousDstEnd = NULL; 
-    dctx->base = NULL; 
-    return 0; 
-} 
- 
-ZSTDv01_Dctx* ZSTDv01_createDCtx(void) 
-{ 
-    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); 
-    if (dctx==NULL) return NULL; 
-    ZSTDv01_resetDCtx(dctx); 
-    return dctx; 
-} 
- 
-size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) 
-{ 
-    free(dctx); 
-    return 0; 
-} 
- 
-size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) 
-{ 
-    return ((dctx_t*)dctx)->expected; 
-} 
- 
-size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    dctx_t* ctx = (dctx_t*)dctx; 
- 
-    /* Sanity check */ 
-    if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize; 
-    if (dst != ctx->previousDstEnd)  /* not contiguous */ 
-        ctx->base = dst; 
- 
-    /* Decompress : frame header */ 
-    if (ctx->phase == 0) 
-    { 
-        /* Check frame magic header */ 
-        U32 magicNumber = ZSTD_readBE32(src); 
-        if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber; 
-        ctx->phase = 1; 
-        ctx->expected = ZSTD_blockHeaderSize; 
-        return 0; 
-    } 
- 
-    /* Decompress : block header */ 
-    if (ctx->phase == 1) 
-    { 
-        blockProperties_t bp; 
-        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 
-        if (ZSTD_isError(blockSize)) return blockSize; 
-        if (bp.blockType == bt_end) 
-        { 
-            ctx->expected = 0; 
-            ctx->phase = 0; 
-        } 
-        else 
-        { 
-            ctx->expected = blockSize; 
-            ctx->bType = bp.blockType; 
-            ctx->phase = 2; 
-        } 
- 
-        return 0; 
-    } 
- 
-    /* Decompress : block content */ 
-    { 
-        size_t rSize; 
-        switch(ctx->bType) 
-        { 
-        case bt_compressed: 
-            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); 
-            break; 
-        case bt_raw : 
-            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); 
-            break; 
-        case bt_rle : 
-            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */ 
-            break; 
-        case bt_end :   /* should never happen (filtered at phase 1) */ 
-            rSize = 0; 
-            break; 
-        default: 
-            return (size_t)-ZSTD_ERROR_GENERIC; 
-        } 
-        ctx->phase = 1; 
-        ctx->expected = ZSTD_blockHeaderSize; 
-        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); 
-        return rSize; 
-    } 
- 
-} 
- 
- 
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+
+    /* Init, lay down lowprob symbols */
+    DTableH[0].tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH->fastMode = (U16)noLarge;
+    return 0;
+}
+
+
+/******************************************
+*  FSE byte symbol
+******************************************/
+#ifndef FSE_COMMONDEFS_ONLY
+
+static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
+
+static short FSE_abs(short a)
+{
+    return a<0? -a : a;
+}
+
+
+/****************************************************************
+*  Header bitstream management
+****************************************************************/
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
+    bitStream = FSE_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = FSE_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = FSE_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+                    ip = iend - 4;
+                }
+                bitStream = FSE_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+
+/* FSE_initDStream
+ * Initialize a FSE_DStream_t.
+ * srcBuffer must point at the beginning of an FSE block.
+ * The function result is the size of the FSE_block (== srcSize).
+ * If srcSize is too small, the function will return an errorCode;
+ */
+static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    if (srcSize >=  sizeof(size_t))
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/*!FSE_lookBits
+ * Provides next n bits from the bitContainer.
+ * bitContainer is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * return : value extracted.
+ */
+static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+
+/*!FSE_readBits
+ * Read next n bits from the bitContainer.
+ * On 32-bits, don't read more than maxNbBits==25
+ * On 64-bits, don't read more than maxNbBits==57
+ * Use the fast variant *only* if n >= 1.
+ * return : value extracted.
+ */
+static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = FSE_lookBits(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
+{
+    size_t value = FSE_lookBitsFast(bitD, nbBits);
+    FSE_skipBits(bitD, nbBits);
+    return value;
+}
+
+static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+        return FSE_DStream_tooFar;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        return FSE_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
+        return FSE_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        U32 result = FSE_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = FSE_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+
+static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
+    FSE_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = FSE_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/* FSE_endOfDStream
+   Tells if bitD has reached end of bitStream or not */
+
+static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
+{
+    return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
+}
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    FSE_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+/* *******************************************************
+*  Huff0 : Huffman block compression
+*********************************************************/
+#define HUF_MAX_SYMBOL_VALUE 255
+#define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
+#define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+typedef struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+} HUF_CElt ;
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct {
+    BYTE byte;
+    BYTE nbBits;
+} HUF_DElt;
+
+static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
+    U32 weightTotal;
+    U32 maxBits;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DElt* const dt = (HUF_DElt*)ptr;
+
+    FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, sizeof(huffWeight));
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+        oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankVal, 0, sizeof(rankVal));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
+        rankVal[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    maxBits = FSE_highbit32(weightTotal) + 1;
+    if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
+    DTable[0] = (U16)maxBits;
+    {
+        U32 total = 1 << maxBits;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << FSE_highbit32(rest);
+        U32 lastWeight = FSE_highbit32(rest) + 1;
+        if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankVal[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=maxBits; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<=oSize; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DElt D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize+1;
+}
+
+
+static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
+{
+        const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        FSE_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-15;
+
+    const void* ptr = DTable;
+    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+    U32 reloadStatus;
+
+    /* Init */
+
+    const U16* jumpTable = (const U16*)cSrc;
+    const size_t length1 = FSE_readLE16(jumpTable);
+    const size_t length2 = FSE_readLE16(jumpTable+1);
+    const size_t length3 = FSE_readLE16(jumpTable+2);
+    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+    const char* const start1 = (const char*)(cSrc) + 6;
+    const char* const start2 = start1 + length1;
+    const char* const start3 = start2 + length2;
+    const char* const start4 = start3 + length3;
+    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
+
+    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    errorCode = FSE_initDStream(&bitD1, start1, length1);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD2, start2, length2);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD3, start3, length3);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_initDStream(&bitD4, start4, length4);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    reloadStatus=FSE_reloadDStream(&bitD2);
+
+    /* 16 symbols per loop */
+    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    {
+#define HUF_DECODE_SYMBOL_0(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+#define HUF_DECODE_SYMBOL_1(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+#define HUF_DECODE_SYMBOL_2(n, Dstream) \
+        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+        HUF_DECODE_SYMBOL_1( 0, bitD1);
+        HUF_DECODE_SYMBOL_1( 1, bitD2);
+        HUF_DECODE_SYMBOL_1( 2, bitD3);
+        HUF_DECODE_SYMBOL_1( 3, bitD4);
+        HUF_DECODE_SYMBOL_2( 4, bitD1);
+        HUF_DECODE_SYMBOL_2( 5, bitD2);
+        HUF_DECODE_SYMBOL_2( 6, bitD3);
+        HUF_DECODE_SYMBOL_2( 7, bitD4);
+        HUF_DECODE_SYMBOL_1( 8, bitD1);
+        HUF_DECODE_SYMBOL_1( 9, bitD2);
+        HUF_DECODE_SYMBOL_1(10, bitD3);
+        HUF_DECODE_SYMBOL_1(11, bitD4);
+        HUF_DECODE_SYMBOL_0(12, bitD1);
+        HUF_DECODE_SYMBOL_0(13, bitD2);
+        HUF_DECODE_SYMBOL_0(14, bitD3);
+        HUF_DECODE_SYMBOL_0(15, bitD4);
+    }
+
+    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+        return (size_t)-FSE_ERROR_corruptionDetected;
+
+    /* tail */
+    {
+        // bitTail = bitD1;   // *much* slower : -20% !??!
+        FSE_DStream_t bitTail;
+        bitTail.ptr = bitD1.ptr;
+        bitTail.bitsConsumed = bitD1.bitsConsumed;
+        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+        bitTail.start = start1;
+        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        {
+            HUF_DECODE_SYMBOL_0(0, bitTail);
+        }
+
+        if (FSE_endOfDStream(&bitTail))
+            return op-ostart;
+    }
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+/**************************************
+*  Error management
+**************************************/
+#define ZSTD_LIST_ERRORS(ITEM) \
+        ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \
+        ITEM(ZSTD_ERROR_MagicNumber) \
+        ITEM(ZSTD_ERROR_SrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \
+        ITEM(ZSTD_ERROR_corruption) \
+        ITEM(ZSTD_ERROR_maxCode)
+
+#define ZSTD_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes;   /* exposed list of errors; static linking only */
+
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/****************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect */
+#define ZSTD_MEMORY_USAGE 17
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define ZSTD_UNALIGNED_ACCESS 1
+#else
+#  define ZSTD_UNALIGNED_ACCESS 0
+#endif
+
+
+/********************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/********************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+#ifndef MEM_ACCESS_MODULE
+#define MEM_ACCESS_MODULE
+/********************************************************
+*  Basic Types
+*********************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+#endif   /* MEM_ACCESS_MODULE */
+
+
+/********************************************************
+*  Constants
+*********************************************************/
+static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
+
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff ((1<<Offbits)-1)
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/********************************************************
+*  Memory operations
+*********************************************************/
+static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
+
+static unsigned ZSTD_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
+
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    while (op < oend) COPY8(op, ip);
+}
+
+static U16 ZSTD_readLE16(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + ((U16)p[1]<<8));
+    }
+}
+
+
+static U32 ZSTD_readLE32(const void* memPtr)
+{
+    if (ZSTD_isLittleEndian())
+        return ZSTD_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+static U32 ZSTD_readBE32(const void* memPtr)
+{
+    const BYTE* p = (const BYTE*)memPtr;
+    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
+}
+
+
+/**************************************
+*  Local structures
+***************************************/
+typedef struct ZSTD_Cctx_s ZSTD_Cctx;
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+typedef struct ZSTD_Cctx_s
+{
+    const BYTE* base;
+    U32 current;
+    U32 nextUpdate;
+    seqStore_t seqStore;
+#ifdef __AVX2__
+    __m256i hashTable[HASH_TABLESIZE>>3];
+#else
+    U32 hashTable[HASH_TABLESIZE];
+#endif
+	BYTE buffer[WORKPLACESIZE];
+} cctxi_t;
+
+
+
+
+/**************************************
+*  Error Management
+**************************************/
+/* tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return (code > (size_t)(-ZSTD_ERROR_maxCode)); }
+
+/* published entry point */
+unsigned ZSTDv01_isError(size_t code) { return ZSTD_isError(code); }
+
+
+/**************************************
+*  Tool functions
+**************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/**************************************************************
+*   Decompression code
+**************************************************************/
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize;
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+static size_t ZSTD_decompressLiterals(void* ctx,
+                                      void* dst, size_t maxDstSize,
+                                const void* src, size_t srcSize)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + maxDstSize;
+    const BYTE* ip = (const BYTE*)src;
+    size_t errorCode;
+    size_t litSize;
+
+    /* check : minimum 2, for litSize, +1, for content */
+    if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption;
+
+    litSize = ip[1] + (ip[0]<<8);
+    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
+    op = oend - litSize;
+
+    (void)ctx;
+    if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    return litSize;
+}
+
+
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                                void* dst, size_t maxDstSize,
+                          const BYTE** litStart, size_t* litSize,
+                          const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    blockProperties_t litbp;
+
+    size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
+    if (ZSTD_isError(litcSize)) return litcSize;
+    if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
+    ip += ZSTD_blockHeaderSize;
+
+    switch(litbp.blockType)
+    {
+    case bt_raw:
+        *litStart = ip;
+        ip += litcSize;
+        *litSize = litcSize;
+        break;
+    case bt_rle:
+        {
+            size_t rleSize = litbp.origSize;
+            if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+            memset(oend - rleSize, *ip, rleSize);
+            *litStart = oend - rleSize;
+            *litSize = rleSize;
+            ip++;
+            break;
+        }
+    case bt_compressed:
+        {
+            size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
+            if (ZSTD_isError(decodedLitSize)) return decodedLitSize;
+            *litStart = oend - decodedLitSize;
+            *litSize = decodedLitSize;
+            ip += litcSize;
+            break;
+        }
+    case bt_end:
+    default:
+        return (size_t)-ZSTD_ERROR_GENERIC;
+    }
+
+    return ip-istart;
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize;
+
+    /* SeqHead */
+    *nbSeq = ZSTD_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
+            if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption;
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++); break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
+            if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption;
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
+            if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption;
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    FSE_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) litLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+
+    /* Offset */
+    {
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
+        if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = dumps<de ? *dumps++ : 0;
+        if (add < 255) matchLength += add;
+        else
+        {
+            if (dumps<=(de-3))
+            {
+                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                dumps += 3;
+            }
+        }
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    const size_t litLength = sequence.litLength;
+    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
+    const BYTE* const litEnd = *litPtr + litLength;
+
+    /* check */
+    if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption;
+    if (sequence.matchLength > (size_t)(*litPtr-op))  return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;    /* overwrite literal segment */
+
+    /* copy Literals */
+    if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
+        memmove(op, *litPtr, litLength);   /* overwrite risk */
+    else
+        ZSTD_wildcopy(op, *litPtr, litLength);
+    op += litLength;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* check : last match must be at a minimum distance of 8 from end of dest buffer */
+    if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    /* copy Match */
+    {
+        const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
+        const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
+        size_t qutt = 12;
+        U64 saved[2];
+
+        /* check */
+        if (match < base) return (size_t)-ZSTD_ERROR_corruption;
+        if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption;
+
+        /* save beginning of literal sequence, in case of write overlap */
+        if (overlapRisk)
+        {
+            if ((endMatch + qutt) > oend) qutt = oend-endMatch;
+            memcpy(saved, endMatch, qutt);
+        }
+
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        } else { ZSTD_copy8(op, match); }
+        op += 8; match += 8;
+
+        if (endMatch > oend-12)
+        {
+            if (op < oend-8)
+            {
+                ZSTD_wildcopy(op, match, (oend-8) - op);
+                match += (oend-8) - op;
+                op = oend-8;
+            }
+            while (op<endMatch) *op++ = *match++;
+        }
+        else
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+
+        /* restore, in case of overlap */
+        if (overlapRisk) memcpy(endMatch, saved, qutt);
+    }
+
+    return endMatch-ostart;
+}
+
+typedef struct ZSTDv01_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+} dctx_t;
+
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize,
+                         const BYTE* litStart, size_t litSize)
+{
+    dctx_t* dctx = (dctx_t*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = litStart;
+    const BYTE* const litEnd = litStart + litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption;
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !FSE_endOfDStream(&(seqState.DStream)) ) return (size_t)-ZSTD_ERROR_corruption;   /* requested too much : data is corrupted */
+        if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption;   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed, srcSize is trusted */
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* litPtr = NULL;
+    size_t litSize = 0;
+    size_t errorCode;
+
+    /* Decode literals sub-block */
+    errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+    srcSize -= errorCode;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
+}
+
+
+size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    size_t errorCode=0;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(blockSize)) return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
+            break;
+        case bt_raw :
+            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+        if (blockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(errorCode)) return errorCode;
+        op += errorCode;
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t ctx;
+    ctx.base = dst;
+    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
+{
+    ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
+    if (dctx==NULL) return NULL;
+    ZSTDv01_resetDCtx(dctx);
+    return dctx;
+}
+
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
+{
+    return ((dctx_t*)dctx)->expected;
+}
+
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t* ctx = (dctx_t*)dctx;
+
+    /* Sanity check */
+    if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize;
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = ZSTD_readBE32(src);
+        if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
diff --git a/contrib/libs/zstd06/legacy/zstd_v01.h b/contrib/libs/zstd06/legacy/zstd_v01.h
index c90731b2b1..2b0bea2857 100644
--- a/contrib/libs/zstd06/legacy/zstd_v01.h
+++ b/contrib/libs/zstd06/legacy/zstd_v01.h
@@ -1,101 +1,101 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd - standard compression library 
-    Header File 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#pragma once 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* ************************************* 
-*  Simple one-step function 
-***************************************/ 
-/** 
-ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format 
-    compressedSize : is the exact source size 
-    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. 
-                      It must be equal or larger than originalSize, otherwise decompression will fail. 
-    return : the number of bytes decompressed into destination buffer (originalSize) 
-             or an errorCode if it fails (which can be tested using ZSTDv01_isError()) 
-*/ 
-size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, 
-                     const void* src, size_t compressedSize); 
- 
-/** 
-ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error 
-*/ 
-unsigned ZSTDv01_isError(size_t code); 
- 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; 
-ZSTDv01_Dctx* ZSTDv01_createDCtx(void); 
-size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); 
- 
-size_t ZSTDv01_decompressDCtx(void* ctx, 
-                              void* dst, size_t maxOriginalSize, 
-                        const void* src, size_t compressedSize); 
- 
-/* ************************************* 
-*  Streaming functions 
-***************************************/ 
-size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); 
- 
-size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); 
-size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); 
-/** 
-  Use above functions alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. 
-  Result is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
-*/ 
- 
-/* ************************************* 
-*  Prefix - version detection 
-***************************************/ 
-#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */ 
-#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
+*/
+unsigned ZSTDv01_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
+ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
+size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
+
+size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
+size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
+#define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/contrib/libs/zstd06/legacy/zstd_v02.c b/contrib/libs/zstd06/legacy/zstd_v02.c
index ffc6c9d61e..2d4cfa59c4 100644
--- a/contrib/libs/zstd06/legacy/zstd_v02.c
+++ b/contrib/libs/zstd06/legacy/zstd_v02.c
@@ -1,3748 +1,3748 @@
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include "zstd_v02.h" 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#elif defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/****************************************** 
-*  Error Management 
-******************************************/ 
-#define PREFIX(name) ZSTD_error_##name 
- 
-#define ERROR(name) (size_t)-PREFIX(name) 
- 
-#define ERROR_LIST(ITEM) \ 
-        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ 
-        ITEM(PREFIX(memory_allocation)) \ 
-        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ 
-        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ 
-        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ 
-        ITEM(PREFIX(maxCode)) 
- 
-#define ERROR_GENERATE_ENUM(ENUM) ENUM, 
-typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 
- 
-#define ERROR_CONVERTTOSTRING(STRING) #STRING, 
-#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
- 
- 
-/* ****************************************************************** 
-   mem.h 
-   low-level memory access routines 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef MEM_H_MODULE 
-#define MEM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/****************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include <string.h>    /* memcpy */ 
- 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define MEM_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define MEM_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define MEM_STATIC static __inline 
-#else 
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/**************************************************************** 
-*  Basic Types 
-*****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-# include <stdint.h> 
-  typedef  uint8_t BYTE; 
-  typedef uint16_t U16; 
-  typedef  int16_t S16; 
-  typedef uint32_t U32; 
-  typedef  int32_t S32; 
-  typedef uint64_t U64; 
-  typedef  int64_t S64; 
-#else 
-  typedef unsigned char       BYTE; 
-  typedef unsigned short      U16; 
-  typedef   signed short      S16; 
-  typedef unsigned int        U32; 
-  typedef   signed int        S32; 
-  typedef unsigned long long  U64; 
-  typedef   signed long long  S64; 
-#endif 
- 
- 
-/**************************************************************** 
-*  Memory I/O 
-*****************************************************************/ 
-/* MEM_FORCE_MEMORY_ACCESS 
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 
- * The below switch allow to select different access method for improved performance. 
- * Method 0 (default) : use `memcpy()`. Safe and portable. 
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 
- * Method 2 : direct access. This method is portable but violate C standard. 
- *            It can generate buggy code on targets generating assembly depending on alignment. 
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 
- * Prefer these methods in priority order (0 > 1 > 2) 
- */ 
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ 
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 
-#    define MEM_FORCE_MEMORY_ACCESS 2 
-#  elif defined(__INTEL_COMPILER) || \ 
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 
-#    define MEM_FORCE_MEMORY_ACCESS 1 
-#  endif 
-#endif 
- 
-MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } 
-MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } 
- 
-MEM_STATIC unsigned MEM_isLittleEndian(void) 
-{ 
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) 
- 
-/* violates C standard on structure alignment. 
-Only use if no other choice to achieve best performance on target platform */ 
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } 
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } 
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } 
- 
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) 
- 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 
-/* currently only defined for gcc and icc */ 
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 
- 
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } 
- 
-#else 
- 
-/* default method, safe and standard. 
-   can sometimes prove slower */ 
- 
-MEM_STATIC U16 MEM_read16(const void* memPtr) 
-{ 
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U32 MEM_read32(const void* memPtr) 
-{ 
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U64 MEM_read64(const void* memPtr) 
-{ 
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-#endif // MEM_FORCE_MEMORY_ACCESS 
- 
- 
-MEM_STATIC U16 MEM_readLE16(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read16(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)(p[0] + (p[1]<<8)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write16(memPtr, val); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val; 
-        p[1] = (BYTE)(val>>8); 
-    } 
-} 
- 
-MEM_STATIC U32 MEM_readLE32(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read32(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write32(memPtr, val32); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val32; 
-        p[1] = (BYTE)(val32>>8); 
-        p[2] = (BYTE)(val32>>16); 
-        p[3] = (BYTE)(val32>>24); 
-    } 
-} 
- 
-MEM_STATIC U64 MEM_readLE64(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read64(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 
-                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write64(memPtr, val64); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val64; 
-        p[1] = (BYTE)(val64>>8); 
-        p[2] = (BYTE)(val64>>16); 
-        p[3] = (BYTE)(val64>>24); 
-        p[4] = (BYTE)(val64>>32); 
-        p[5] = (BYTE)(val64>>40); 
-        p[6] = (BYTE)(val64>>48); 
-        p[7] = (BYTE)(val64>>56); 
-    } 
-} 
- 
-MEM_STATIC size_t MEM_readLEST(const void* memPtr) 
-{ 
-    if (MEM_32bits()) 
-        return (size_t)MEM_readLE32(memPtr); 
-    else 
-        return (size_t)MEM_readLE64(memPtr); 
-} 
- 
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) 
-{ 
-    if (MEM_32bits()) 
-        MEM_writeLE32(memPtr, (U32)val); 
-    else 
-        MEM_writeLE64(memPtr, (U64)val); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* MEM_H_MODULE */ 
- 
- 
-/* ****************************************************************** 
-   bitstream 
-   Part of NewGen Entropy library 
-   header file (to include) 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef BITSTREAM_H_MODULE 
-#define BITSTREAM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* 
-*  This API consists of small unitary functions, which highly benefit from being inlined. 
-*  Since link-time-optimization is not available for all compilers, 
-*  these functions are defined into a .h to be included. 
-*/ 
- 
- 
-/********************************************** 
-*  bitStream decompression API (read backward) 
-**********************************************/ 
-typedef struct 
-{ 
-    size_t   bitContainer; 
-    unsigned bitsConsumed; 
-    const char* ptr; 
-    const char* start; 
-} BIT_DStream_t; 
- 
-typedef enum { BIT_DStream_unfinished = 0, 
-               BIT_DStream_endOfBuffer = 1, 
-               BIT_DStream_completed = 2, 
-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */ 
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ 
- 
-MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 
-MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); 
- 
- 
-/* 
-* Start by invoking BIT_initDStream(). 
-* A chunk of the bitStream is then stored into a local register. 
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 
-* You can then retrieve bitFields stored into the local register, **in reverse order**. 
-* Local register is manually filled from memory by the BIT_reloadDStream() method. 
-* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. 
-* Otherwise, it can be less than that, so proceed accordingly. 
-* Checking if DStream has reached its end can be performed with BIT_endOfDStream() 
-*/ 
- 
- 
-/****************************************** 
-*  unsafe API 
-******************************************/ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); 
-/* faster, but works only if nbBits >= 1 */ 
- 
- 
- 
-/**************************************************************** 
-*  Helper functions 
-****************************************************************/ 
-MEM_STATIC unsigned BIT_highbit32 (register U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse ( &r, val ); 
-    return (unsigned) r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */ 
-    return 31 - __builtin_clz (val); 
-#   else   /* Software version */ 
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    unsigned r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
- 
-/********************************************************** 
-* bitStream decoding 
-**********************************************************/ 
- 
-/*!BIT_initDStream 
-*  Initialize a BIT_DStream_t. 
-*  @bitD : a pointer to an already allocated BIT_DStream_t structure 
-*  @srcBuffer must point at the beginning of a bitStream 
-*  @srcSize must be the exact size of the bitStream 
-*  @result : size of stream (== srcSize) or an errorCode if a problem is detected 
-*/ 
-MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 
-{ 
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } 
- 
-    if (srcSize >=  sizeof(size_t))   /* normal case */ 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t); 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32); 
-    } 
-    else 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = bitD->start; 
-        bitD->bitContainer = *(const BYTE*)(bitD->start); 
-        switch(srcSize) 
-        { 
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; 
-            default:; 
-        } 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32); 
-        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 
-    } 
- 
-    return srcSize; 
-} 
- 
-/*!BIT_lookBits 
- * Provides next n bits from local register 
- * local register is not modified (bits are still present for next read/look) 
- * On 32-bits, maxNbBits==25 
- * On 64-bits, maxNbBits==57 
- * @return : value extracted 
- */ 
-MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 
-} 
- 
-/*! BIT_lookBitsFast : 
-*   unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 
-} 
- 
-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    bitD->bitsConsumed += nbBits; 
-} 
- 
-/*!BIT_readBits 
- * Read next n bits from local register. 
- * pay attention to not read more than nbBits contained into local register. 
- * @return : extracted value. 
- */ 
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BIT_lookBits(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-/*!BIT_readBitsFast : 
-*  unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BIT_lookBitsFast(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 
-{ 
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */ 
-		return BIT_DStream_overflow; 
- 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 
-    { 
-        bitD->ptr -= bitD->bitsConsumed >> 3; 
-        bitD->bitsConsumed &= 7; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        return BIT_DStream_unfinished; 
-    } 
-    if (bitD->ptr == bitD->start) 
-    { 
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; 
-        return BIT_DStream_completed; 
-    } 
-    { 
-        U32 nbBytes = bitD->bitsConsumed >> 3; 
-        BIT_DStream_status result = BIT_DStream_unfinished; 
-        if (bitD->ptr - nbBytes < bitD->start) 
-        { 
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */ 
-            result = BIT_DStream_endOfBuffer; 
-        } 
-        bitD->ptr -= nbBytes; 
-        bitD->bitsConsumed -= nbBytes*8; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */ 
-        return result; 
-    } 
-} 
- 
-/*! BIT_endOfDStream 
-*   @return Tells if DStream has reached its exact end 
-*/ 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) 
-{ 
-    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* BITSTREAM_H_MODULE */ 
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#elif defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/****************************************** 
-*  Error Management 
-******************************************/ 
-#define PREFIX(name) ZSTD_error_##name 
- 
-#define ERROR(name) (size_t)-PREFIX(name) 
- 
-#define ERROR_LIST(ITEM) \ 
-        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ 
-        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ 
-        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ 
-        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ 
-        ITEM(PREFIX(maxCode)) 
- 
-#define ERROR_GENERATE_ENUM(ENUM) ENUM, 
-typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 
- 
-#define ERROR_CONVERTTOSTRING(STRING) #STRING, 
-#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) 
-static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
-ERR_STATIC const char* ERR_getErrorName(size_t code) 
-{ 
-    static const char* codeError = "Unspecified error code"; 
-    if (ERR_isError(code)) return ERR_strings[-(int)(code)]; 
-    return codeError; 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
-/* 
-Constructor and Destructor of type FSE_CTable 
-    Note that its size depends on 'tableLog' and 'maxSymbolValue' */ 
-typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
-typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
- 
- 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/****************************************** 
-*  Static allocation 
-******************************************/ 
-/* FSE buffer bounds */ 
-#define FSE_NCOUNTBOUND 512 
-#define FSE_BLOCKBOUND(size) (size + (size>>7)) 
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */ 
- 
-/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ 
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) 
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog)) 
- 
- 
-/****************************************** 
-*  FSE advanced API 
-******************************************/ 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); 
-/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ 
- 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); 
-/* build a fake FSE_DTable, designed to always generate the same symbolValue */ 
- 
- 
-/****************************************** 
-*  FSE symbol decompression API 
-******************************************/ 
-typedef struct 
-{ 
-    size_t      state; 
-    const void* table;   /* precise table may vary, depending on U16 */ 
-} FSE_DState_t; 
- 
- 
-static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); 
- 
-static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
- 
-static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); 
- 
-/* 
-Let's now decompose FSE_decompress_usingDTable() into its unitary components. 
-You will decode FSE-encoded symbols from the bitStream, 
-and also any other bitFields you put in, **in reverse order**. 
- 
-You will need a few variables to track your bitStream. They are : 
- 
-BIT_DStream_t DStream;    // Stream context 
-FSE_DState_t  DState;     // State context. Multiple ones are possible 
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable() 
- 
-The first thing to do is to init the bitStream. 
-    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); 
- 
-You should then retrieve your initial state(s) 
-(in reverse flushing order if you have several ones) : 
-    errorCode = FSE_initDState(&DState, &DStream, DTablePtr); 
- 
-You can then decode your data, symbol after symbol. 
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. 
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). 
-    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); 
- 
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) 
-Note : maximum allowed nbBits is 25, for 32-bits compatibility 
-    size_t bitField = BIT_readBits(&DStream, nbBits); 
- 
-All above operations only read from local register (which size depends on size_t). 
-Refueling the register from memory is manually performed by the reload method. 
-    endSignal = FSE_reloadDStream(&DStream); 
- 
-BIT_reloadDStream() result tells if there is still some more data to read from DStream. 
-BIT_DStream_unfinished : there is still some data left into the DStream. 
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. 
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. 
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. 
- 
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, 
-to properly detect the exact end of stream. 
-After each decoded symbol, check if DStream is fully consumed using this simple test : 
-    BIT_reloadDStream(&DStream) >= BIT_DStream_completed 
- 
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. 
-Checking if DStream has reached its end is performed by : 
-    BIT_endOfDStream(&DStream); 
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. 
-    FSE_endOfDState(&DState); 
-*/ 
- 
- 
-/****************************************** 
-*  FSE unsafe API 
-******************************************/ 
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ 
- 
- 
-/****************************************** 
-*  Implementation of inline functions 
-******************************************/ 
- 
-/* decompression */ 
- 
-typedef struct { 
-    U16 tableLog; 
-    U16 fastMode; 
-} FSE_DTableHeader;   /* sizeof U32 */ 
- 
-typedef struct 
-{ 
-    unsigned short newState; 
-    unsigned char  symbol; 
-    unsigned char  nbBits; 
-} FSE_decode_t;   /* size == U32 */ 
- 
-MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    memcpy(&DTableH, dt, sizeof(DTableH)); 
-    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); 
-    BIT_reloadDStream(bitD); 
-    DStatePtr->table = dt + 1; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32  nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BIT_readBits(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32 nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BIT_readBitsFast(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state == 0; 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/****************************************** 
-*  Static allocation macros 
-******************************************/ 
-/* Huff0 buffer bounds */ 
-#define HUF_CTABLEBOUND 129 
-#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */ 
-#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */ 
- 
-/* static allocation of Huff0's DTable */ 
-#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */ 
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ 
-        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ 
-        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \ 
-        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog } 
- 
- 
-/****************************************** 
-*  Advanced functions 
-******************************************/ 
-static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */ 
-static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */ 
-static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-/* 
-    zstd - standard compression library 
-    Header File 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* ************************************* 
-*  Version 
-***************************************/ 
-#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */ 
-#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */ 
-#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */ 
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 
- 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */ 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
-/* 
-    zstd - standard compression library 
-    Header File for static linking only 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* The objects defined into this file should be considered experimental. 
- * They are not labelled stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risk of future changes. 
- */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Streaming functions 
-***************************************/ 
- 
-typedef struct ZSTD_DCtx_s ZSTD_DCtx; 
- 
-/* 
-  Use above functions alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. 
-  Result is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
-*/ 
- 
-/* ************************************* 
-*  Prefix - version detection 
-***************************************/ 
-#define ZSTD_magicNumber 0xFD2FB522   /* v0.2 (current)*/ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-#ifndef FSE_COMMONDEFS_ONLY 
- 
-/**************************************************************** 
-*  Tuning parameters 
-****************************************************************/ 
-/* MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 
-#define FSE_MAX_MEMORY_USAGE 14 
-#define FSE_DEFAULT_MEMORY_USAGE 13 
- 
-/* FSE_MAX_SYMBOL_VALUE : 
-*  Maximum symbol value authorized. 
-*  Required for proper stack allocation */ 
-#define FSE_MAX_SYMBOL_VALUE 255 
- 
- 
-/**************************************************************** 
-*  template functions type & suffix 
-****************************************************************/ 
-#define FSE_FUNCTION_TYPE BYTE 
-#define FSE_FUNCTION_EXTENSION 
- 
- 
-/**************************************************************** 
-*  Byte symbol type 
-****************************************************************/ 
-#endif   /* !FSE_COMMONDEFS_ONLY */ 
- 
- 
-/**************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/**************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
-/**************************************************************** 
-*  Constants 
-*****************************************************************/ 
-#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2) 
-#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 
-#define FSE_MIN_TABLELOG 5 
- 
-#define FSE_TABLELOG_ABSOLUTE_MAX 15 
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 
-#endif 
- 
- 
-/**************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/**************************************************************** 
-*  Complex types 
-****************************************************************/ 
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 
- 
- 
-/**************************************************************** 
-*  Templates 
-****************************************************************/ 
-/* 
-  designed to be included 
-  for type-specific functions (template emulation in C) 
-  Objective is to write these functions only once, for improved maintenance 
-*/ 
- 
-/* safety checks */ 
-#ifndef FSE_FUNCTION_EXTENSION 
-#  error "FSE_FUNCTION_EXTENSION must be defined" 
-#endif 
-#ifndef FSE_FUNCTION_TYPE 
-#  error "FSE_FUNCTION_TYPE must be defined" 
-#endif 
- 
-/* Function names */ 
-#define FSE_CAT(X,Y) X##Y 
-#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 
-#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 
- 
- 
-/* Function templates */ 
- 
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v02.h"
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(memory_allocation)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write32(memPtr, val32);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write64(memPtr, val64);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB522   /* v0.2 (current)*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
 #define FSE_DECODE_TYPE FSE_decode_t
- 
-static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 
- 
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
 static size_t FSE_buildDTable
-(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 
-{ 
-    void* ptr = dt+1; 
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr; 
-    FSE_DTableHeader DTableH; 
-    const U32 tableSize = 1 << tableLog; 
-    const U32 tableMask = tableSize-1; 
-    const U32 step = FSE_tableStep(tableSize); 
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 
-    U32 position = 0; 
-    U32 highThreshold = tableSize-1; 
-    const S16 largeLimit= (S16)(1 << (tableLog-1)); 
-    U32 noLarge = 1; 
-    U32 s; 
- 
-    /* Sanity Checks */ 
-    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); 
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 
- 
-    /* Init, lay down lowprob symbols */ 
-    DTableH.tableLog = (U16)tableLog; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        if (normalizedCounter[s]==-1) 
-        { 
-            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 
-            symbolNext[s] = 1; 
-        } 
-        else 
-        { 
-            if (normalizedCounter[s] >= largeLimit) noLarge=0; 
-            symbolNext[s] = normalizedCounter[s]; 
-        } 
-    } 
- 
-    /* Spread symbols */ 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        int i; 
-        for (i=0; i<normalizedCounter[s]; i++) 
-        { 
-            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 
-            position = (position + step) & tableMask; 
-            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ 
-        } 
-    } 
- 
-    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 
- 
-    /* Build Decoding table */ 
-    { 
-        U32 i; 
-        for (i=0; i<tableSize; i++) 
-        { 
-            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 
-            U16 nextState = symbolNext[symbol]++; 
-            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); 
-            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 
-        } 
-    } 
- 
-    DTableH.fastMode = (U16)noLarge; 
-    memcpy(dt, &DTableH, sizeof(DTableH));   /* memcpy(), to avoid strict aliasing warnings */ 
-    return 0; 
-} 
- 
- 
-#ifndef FSE_COMMONDEFS_ONLY 
-/****************************************** 
-*  FSE helper functions 
-******************************************/ 
-static unsigned FSE_isError(size_t code) { return ERR_isError(code); } 
- 
- 
-/**************************************************************** 
-*  FSE NCount encoding-decoding 
-****************************************************************/ 
-static short FSE_abs(short a) 
-{ 
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt+1;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    FSE_DTableHeader DTableH;
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));   /* memcpy(), to avoid strict aliasing warnings */
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
     return (short)(a<0 ? -a : a);
-} 
- 
-static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 
-                 const void* headerBuffer, size_t hbSize) 
-{ 
-    const BYTE* const istart = (const BYTE*) headerBuffer; 
-    const BYTE* const iend = istart + hbSize; 
-    const BYTE* ip = istart; 
-    int nbBits; 
-    int remaining; 
-    int threshold; 
-    U32 bitStream; 
-    int bitCount; 
-    unsigned charnum = 0; 
-    int previous0 = 0; 
- 
-    if (hbSize < 4) return ERROR(srcSize_wrong); 
-    bitStream = MEM_readLE32(ip); 
-    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */ 
-    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 
-    bitStream >>= 4; 
-    bitCount = 4; 
-    *tableLogPtr = nbBits; 
-    remaining = (1<<nbBits)+1; 
-    threshold = 1<<nbBits; 
-    nbBits++; 
- 
-    while ((remaining>1) && (charnum<=*maxSVPtr)) 
-    { 
-        if (previous0) 
-        { 
-            unsigned n0 = charnum; 
-            while ((bitStream & 0xFFFF) == 0xFFFF) 
-            { 
-                n0+=24; 
-                if (ip < iend-5) 
-                { 
-                    ip+=2; 
-                    bitStream = MEM_readLE32(ip) >> bitCount; 
-                } 
-                else 
-                { 
-                    bitStream >>= 16; 
-                    bitCount+=16; 
-                } 
-            } 
-            while ((bitStream & 3) == 3) 
-            { 
-                n0+=3; 
-                bitStream>>=2; 
-                bitCount+=2; 
-            } 
-            n0 += bitStream & 3; 
-            bitCount += 2; 
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 
-            while (charnum < n0) normalizedCounter[charnum++] = 0; 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-            { 
-                ip += bitCount>>3; 
-                bitCount &= 7; 
-                bitStream = MEM_readLE32(ip) >> bitCount; 
-            } 
-            else 
-                bitStream >>= 2; 
-        } 
-        { 
-            const short max = (short)((2*threshold-1)-remaining); 
-            short count; 
- 
-            if ((bitStream & (threshold-1)) < (U32)max) 
-            { 
-                count = (short)(bitStream & (threshold-1)); 
-                bitCount   += nbBits-1; 
-            } 
-            else 
-            { 
-                count = (short)(bitStream & (2*threshold-1)); 
-                if (count >= threshold) count -= max; 
-                bitCount   += nbBits; 
-            } 
- 
-            count--;   /* extra accuracy */ 
-            remaining -= FSE_abs(count); 
-            normalizedCounter[charnum++] = count; 
-            previous0 = !count; 
-            while (remaining < threshold) 
-            { 
-                nbBits--; 
-                threshold >>= 1; 
-            } 
- 
-            { 
-                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-                { 
-                    ip += bitCount>>3; 
-                    bitCount &= 7; 
-                } 
-                else 
-                { 
-                    bitCount -= (int)(8 * (iend - 4 - ip)); 
-					ip = iend - 4; 
-				} 
-                bitStream = MEM_readLE32(ip) >> (bitCount & 31); 
-            } 
-        } 
-    } 
-    if (remaining != 1) return ERROR(GENERIC); 
-    *maxSVPtr = charnum-1; 
- 
-    ip += (bitCount+7)>>3; 
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); 
-    return ip-istart; 
-} 
- 
- 
-/********************************************************* 
-*  Decompression (Byte symbols) 
-*********************************************************/ 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */ 
- 
-    DTableH->tableLog = 0; 
-    DTableH->fastMode = 0; 
- 
-    cell->newState = 0; 
-    cell->symbol = symbolValue; 
-    cell->nbBits = 0; 
- 
-    return 0; 
-} 
- 
- 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */ 
-    const unsigned tableSize = 1 << nbBits; 
-    const unsigned tableMask = tableSize - 1; 
-    const unsigned maxSymbolValue = tableMask; 
-    unsigned s; 
- 
-    /* Sanity checks */ 
-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */ 
- 
-    /* Build Decoding Table */ 
-    DTableH->tableLog = (U16)nbBits; 
-    DTableH->fastMode = 1; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        dinfo[s].newState = 0; 
-        dinfo[s].symbol = (BYTE)s; 
-        dinfo[s].nbBits = (BYTE)nbBits; 
-    } 
- 
-    return 0; 
-} 
- 
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 
-          void* dst, size_t maxDstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const FSE_DTable* dt, const unsigned fast) 
-{ 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* op = ostart; 
-    BYTE* const omax = op + maxDstSize; 
-    BYTE* const olimit = omax-3; 
- 
-    BIT_DStream_t bitD; 
-    FSE_DState_t state1; 
-    FSE_DState_t state2; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */ 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    FSE_initDState(&state1, &bitD, dt); 
-    FSE_initDState(&state2, &bitD, dt); 
- 
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 
- 
-    /* 4 symbols per loop */ 
-    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) 
-    { 
-        op[0] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BIT_reloadDStream(&bitD); 
- 
-        op[1] = FSE_GETSYMBOL(&state2); 
- 
-        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } 
- 
-        op[2] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BIT_reloadDStream(&bitD); 
- 
-        op[3] = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* tail */ 
-    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ 
-    while (1) 
-    { 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state1); 
- 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* end ? */ 
-    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 
-        return op-ostart; 
- 
-    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */ 
- 
-    return ERROR(corruption_detected); 
-} 
- 
- 
-static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 
-                            const void* cSrc, size_t cSrcSize, 
-                            const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    memcpy(&DTableH, dt, sizeof(DTableH)); 
- 
-    /* select fast mode (static) */ 
-    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 
-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 
-} 
- 
- 
-static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    const BYTE* const istart = (const BYTE*)cSrc; 
-    const BYTE* ip = istart; 
-    short counting[FSE_MAX_SYMBOL_VALUE+1]; 
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */ 
-    unsigned tableLog; 
-    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 
-    size_t errorCode; 
- 
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */ 
- 
-    /* normal FSE decoding mode */ 
-    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */ 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    /* always return, even if it is an error code */ 
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 
-} 
- 
- 
- 
-#endif   /* FSE_COMMONDEFS_ONLY */ 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-/**************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-/* inline is defined */ 
-#elif defined(_MSC_VER) 
-#  define inline __inline 
-#else 
-#  define inline /* disable inline */ 
-#endif 
- 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/**************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
-/**************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/****************************************** 
-*  Helper functions 
-******************************************/ 
-static unsigned HUF_isError(size_t code) { return ERR_isError(code); } 
- 
-#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 
-#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ 
-#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */ 
-#define HUF_MAX_SYMBOL_VALUE 255 
-#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 
-#  error "HUF_MAX_TABLELOG is too large !" 
-#endif 
- 
- 
- 
-/********************************************************* 
-*  Huff0 : Huffman block decompression 
-*********************************************************/ 
-typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */ 
- 
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */ 
- 
-typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 
- 
-/*! HUF_readStats 
-    Read compact Huffman tree, saved by HUF_writeCTable 
-    @huffWeight : destination buffer 
-    @return : size read from `src` 
-*/ 
-static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 
-                            U32* nbSymbolsPtr, U32* tableLogPtr, 
-                            const void* src, size_t srcSize) 
-{ 
-    U32 weightTotal; 
-    U32 tableLog; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    size_t oSize; 
-    U32 n; 
- 
-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */ 
- 
-    if (iSize >= 128)  /* special header */ 
-    { 
-        if (iSize >= (242))   /* RLE */ 
-        { 
-            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 
-            oSize = l[iSize-242]; 
-            memset(huffWeight, 1, hwSize); 
-            iSize = 0; 
-        } 
-        else   /* Incompressible */ 
-        { 
-            oSize = iSize - 127; 
-            iSize = ((oSize+1)/2); 
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-            if (oSize >= hwSize) return ERROR(corruption_detected); 
-            ip += 1; 
-            for (n=0; n<oSize; n+=2) 
-            { 
-                huffWeight[n]   = ip[n/2] >> 4; 
-                huffWeight[n+1] = ip[n/2] & 15; 
-            } 
-        } 
-    } 
-    else  /* header compressed with FSE (normal case) */ 
-    { 
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */ 
-        if (FSE_isError(oSize)) return oSize; 
-    } 
- 
-    /* collect weight stats */ 
-    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); 
-    weightTotal = 0; 
-    for (n=0; n<oSize; n++) 
-    { 
-        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-        rankStats[huffWeight[n]]++; 
-        weightTotal += (1 << huffWeight[n]) >> 1; 
-    } 
- 
-    /* get last non-null symbol weight (implied, total must be 2^n) */ 
-    tableLog = BIT_highbit32(weightTotal) + 1; 
-    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-    { 
-        U32 total = 1 << tableLog; 
-        U32 rest = total - weightTotal; 
-        U32 verif = 1 << BIT_highbit32(rest); 
-        U32 lastWeight = BIT_highbit32(rest) + 1; 
-        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */ 
-        huffWeight[oSize] = (BYTE)lastWeight; 
-        rankStats[lastWeight]++; 
-    } 
- 
-    /* check tree construction validity */ 
-    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */ 
- 
-    /* results */ 
-    *nbSymbolsPtr = (U32)(oSize+1); 
-    *tableLogPtr = tableLog; 
-    return iSize+1; 
-} 
- 
- 
-/**************************/ 
-/* single-symbol decoding */ 
-/**************************/ 
- 
-static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */ 
-    U32 tableLog = 0; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    U32 nbSymbols = 0; 
-    U32 n; 
-    U32 nextRankStart; 
-    void* ptr = DTable+1; 
-    HUF_DEltX2* const dt = (HUF_DEltX2*)ptr; 
- 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */ 
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */ 
-    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ 
- 
-    /* Prepare ranks */ 
-    nextRankStart = 0; 
-    for (n=1; n<=tableLog; n++) 
-    { 
-        U32 current = nextRankStart; 
-        nextRankStart += (rankVal[n] << (n-1)); 
-        rankVal[n] = current; 
-    } 
- 
-    /* fill DTable */ 
-    for (n=0; n<nbSymbols; n++) 
-    { 
-        const U32 w = huffWeight[n]; 
-        const U32 length = (1 << w) >> 1; 
-        U32 i; 
-        HUF_DEltX2 D; 
-        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 
-        for (i = rankVal[w]; i < rankVal[w] + length; i++) 
-            dt[i] = D; 
-        rankVal[w] += length; 
-    } 
- 
-    return iSize; 
-} 
- 
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) 
-{ 
-        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 
-        const BYTE c = dt[val].byte; 
-        BIT_skipBits(Dstream, dt[val].nbBits); 
-        return c; 
-} 
- 
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 
-    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 4 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) 
-    { 
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    /* no more data to retrieve from bitstream, hence no need to reload */ 
-    while (p < pEnd) 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    return pEnd-pStart; 
-} 
- 
- 
-static size_t HUF_decompress4X2_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U16* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
- 
-        const void* ptr = DTable; 
-        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 
-        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 
-        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 
-        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
-    size_t errorCode; 
- 
-    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/***************************/ 
-/* double-symbols decoding */ 
-/***************************/ 
- 
-static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, 
-                           const U32* rankValOrigin, const int minWeight, 
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 
-                           U32 nbBitsBaseline, U16 baseSeq) 
-{ 
-    HUF_DEltX4 DElt; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    U32 s; 
- 
-    /* get pre-calculated rankVal */ 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill skipped values */ 
-    if (minWeight>1) 
-    { 
-        U32 i, skipSize = rankVal[minWeight]; 
-        MEM_writeLE16(&(DElt.sequence), baseSeq); 
-        DElt.nbBits   = (BYTE)(consumed); 
-        DElt.length   = 1; 
-        for (i = 0; i < skipSize; i++) 
-            DTable[i] = DElt; 
-    } 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */ 
-    { 
-        const U32 symbol = sortedSymbols[s].symbol; 
-        const U32 weight = sortedSymbols[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 length = 1 << (sizeLog-nbBits); 
-        const U32 start = rankVal[weight]; 
-        U32 i = start; 
-        const U32 end = start + length; 
- 
-        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 
-        DElt.nbBits = (BYTE)(nbBits + consumed); 
-        DElt.length = 2; 
-        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */ 
- 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; 
- 
-static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, 
-                           const sortedSymbol_t* sortedList, const U32 sortedListSize, 
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 
-                           const U32 nbBitsBaseline) 
-{ 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 
-    const U32 minBits  = nbBitsBaseline - maxWeight; 
-    U32 s; 
- 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++) 
-    { 
-        const U16 symbol = sortedList[s].symbol; 
-        const U32 weight = sortedList[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 start = rankVal[weight]; 
-        const U32 length = 1 << (targetLog-nbBits); 
- 
-        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */ 
-        { 
-            U32 sortedRank; 
-            int minWeight = nbBits + scaleLog; 
-            if (minWeight < 1) minWeight = 1; 
-            sortedRank = rankStart[minWeight]; 
-            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, 
-                           rankValOrigin[nbBits], minWeight, 
-                           sortedList+sortedRank, sortedListSize-sortedRank, 
-                           nbBitsBaseline, symbol); 
-        } 
-        else 
-        { 
-            U32 i; 
-            const U32 end = start + length; 
-            HUF_DEltX4 DElt; 
- 
-            MEM_writeLE16(&(DElt.sequence), symbol); 
-            DElt.nbBits   = (BYTE)(nbBits); 
-            DElt.length   = 1; 
-            for (i = start; i < end; i++) 
-                DTable[i] = DElt; 
-        } 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; 
-    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; 
-    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; 
-    U32* const rankStart = rankStart0+1; 
-    rankVal_t rankVal; 
-    U32 tableLog, maxW, sizeOfSort, nbSymbols; 
-    const U32 memLog = DTable[0]; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    void* ptr = DTable; 
-    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1; 
- 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */ 
-    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); 
-    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */ 
- 
-    /* find maxWeight */ 
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) 
-        {if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */ 
- 
-    /* Get start index of each weight */ 
-    { 
-        U32 w, nextRankStart = 0; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankStart; 
-            nextRankStart += rankStats[w]; 
-            rankStart[w] = current; 
-        } 
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/ 
-        sizeOfSort = nextRankStart; 
-    } 
- 
-    /* sort symbols by weight */ 
-    { 
-        U32 s; 
-        for (s=0; s<nbSymbols; s++) 
-        { 
-            U32 w = weightList[s]; 
-            U32 r = rankStart[w]++; 
-            sortedSymbol[r].symbol = (BYTE)s; 
-            sortedSymbol[r].weight = (BYTE)w; 
-        } 
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */ 
-    } 
- 
-	/* Build rankVal */ 
-    { 
-        const U32 minBits = tableLog+1 - maxW; 
-        U32 nextRankVal = 0; 
-        U32 w, consumed; 
-        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */ 
-        U32* rankVal0 = rankVal[0]; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankVal; 
-            nextRankVal += rankStats[w] << (w+rescale); 
-            rankVal0[w] = current; 
-        } 
-        for (consumed = minBits; consumed <= memLog - minBits; consumed++) 
-        { 
-            U32* rankValPtr = rankVal[consumed]; 
-            for (w = 1; w <= maxW; w++) 
-            { 
-                rankValPtr[w] = rankVal0[w] >> consumed; 
-            } 
-        } 
-    } 
- 
-    HUF_fillDTableX4(dt, memLog, 
-                   sortedSymbol, sizeOfSort, 
-                   rankStart0, rankVal, maxW, 
-                   tableLog+1); 
- 
-    return iSize; 
-} 
- 
- 
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 2); 
-    BIT_skipBits(DStream, dt[val].nbBits); 
-    return dt[val].length; 
-} 
- 
-static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 1); 
-    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 
-    else 
-    { 
-        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) 
-        { 
-            BIT_skipBits(DStream, dt[val].nbBits); 
-            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 
-        } 
-    } 
-    return 1; 
-} 
- 
- 
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ 
-    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 8 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) 
-    { 
-        HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 
- 
-    while (p <= pEnd-2) 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */ 
- 
-    if (p < pEnd) 
-        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); 
- 
-    return p-pStart; 
-} 
- 
- 
- 
-static size_t HUF_decompress4X4_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U32* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
- 
-        const void* ptr = DTable; 
-        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); 
-        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); 
-        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); 
-        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
- 
-    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(hSize)) return hSize; 
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += hSize; 
-    cSrcSize -= hSize; 
- 
-    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/**********************************/ 
-/* quad-symbol decoding           */ 
-/**********************************/ 
-typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; 
-typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; 
- 
-/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */ 
-static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog, 
-                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight, 
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart, 
-                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc) 
-{ 
-    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */ 
-    const int minBits  = nbBitsBaseline - maxWeight; 
-    const U32 level = DDesc.nbBytes; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    U32 symbolStartPos, s; 
- 
-    /* local rankVal, will be modified */ 
-    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal)); 
- 
-    /* fill skipped values */ 
-    if (minWeight>1) 
-    { 
-        U32 i; 
-        const U32 skipSize = rankVal[minWeight]; 
-        for (i = 0; i < skipSize; i++) 
-        { 
-            DSequence[i] = baseSeq; 
-            DDescription[i] = DDesc; 
-        } 
-    } 
- 
-    /* fill DTable */ 
-    DDesc.nbBytes++; 
-    symbolStartPos = rankStart[minWeight]; 
-    for (s=symbolStartPos; s<sortedListSize; s++) 
-    { 
-        const BYTE symbol = sortedSymbols[s].symbol; 
-        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */ 
-        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */ 
-        const int  totalBits = consumed+nbBits; 
-        const U32  start  = rankVal[weight]; 
-        const U32  length = 1 << (sizeLog-nbBits); 
-        baseSeq.byte[level] = symbol; 
-        DDesc.nbBits = (BYTE)totalBits; 
- 
-        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */ 
-        { 
-            int nextMinWeight = totalBits + scaleLog; 
-            if (nextMinWeight < 1) nextMinWeight = 1; 
-            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits, 
-                           rankValOrigin, totalBits, nextMinWeight, maxWeight, 
-                           sortedSymbols, sortedListSize, rankStart, 
-                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */ 
-        } 
-        else 
-        { 
-            U32 i; 
-            const U32 end = start + length; 
-            for (i = start; i < end; i++) 
-            { 
-                DDescription[i] = DDesc; 
-                DSequence[i] = baseSeq; 
-            } 
-        } 
-        rankVal[weight] += length; 
-    } 
-} 
- 
- 
-/* note : same preparation as X4 */ 
-static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; 
-    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; 
-    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; 
-    U32* const rankStart = rankStart0+1; 
-    U32 tableLog, maxW, sizeOfSort, nbSymbols; 
-    rankVal_t rankVal; 
-    const U32 memLog = DTable[0]; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
- 
-    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); 
-    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */ 
- 
-    /* find maxWeight */ 
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) 
-        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */ 
- 
- 
-    /* Get start index of each weight */ 
-    { 
-        U32 w, nextRankStart = 0; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankStart; 
-            nextRankStart += rankStats[w]; 
-            rankStart[w] = current; 
-        } 
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/ 
-        sizeOfSort = nextRankStart; 
-    } 
- 
-    /* sort symbols by weight */ 
-    { 
-        U32 s; 
-        for (s=0; s<nbSymbols; s++) 
-        { 
-            U32 w = weightList[s]; 
-            U32 r = rankStart[w]++; 
-            sortedSymbol[r].symbol = (BYTE)s; 
-            sortedSymbol[r].weight = (BYTE)w; 
-        } 
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */ 
-    } 
- 
-	/* Build rankVal */ 
-    { 
-        const U32 minBits = tableLog+1 - maxW; 
-        U32 nextRankVal = 0; 
-        U32 w, consumed; 
-        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */ 
-        U32* rankVal0 = rankVal[0]; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankVal; 
-            nextRankVal += rankStats[w] << (w+rescale); 
-            rankVal0[w] = current; 
-        } 
-        for (consumed = minBits; consumed <= memLog - minBits; consumed++) 
-        { 
-            U32* rankValPtr = rankVal[consumed]; 
-            for (w = 1; w <= maxW; w++) 
-            { 
-                rankValPtr[w] = rankVal0[w] >> consumed; 
-            } 
-        } 
-    } 
- 
- 
-    /* fill tables */ 
-    { 
-        void* ptr = DTable+1; 
-        HUF_DDescX6* DDescription = (HUF_DDescX6*)(ptr); 
-        void* dSeqStart = DTable + 1 + ((size_t)1<<(memLog-1)); 
-        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(dSeqStart); 
-        HUF_DSeqX6 DSeq; 
-        HUF_DDescX6 DDesc; 
-        DSeq.sequence = 0; 
-        DDesc.nbBits = 0; 
-        DDesc.nbBytes = 0; 
-        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog, 
-                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW, 
-                       sortedSymbol, sizeOfSort, rankStart0, 
-                       tableLog+1, DSeq, DDesc); 
-    } 
- 
-    return iSize; 
-} 
- 
- 
-static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, ds+val, sizeof(HUF_DSeqX6)); 
-    BIT_skipBits(DStream, dd[val].nbBits); 
-    return dd[val].nbBytes; 
-} 
- 
-static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream, 
-                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    U32 length = dd[val].nbBytes; 
-    if (length <= maxL) 
-    { 
-        memcpy(op, ds+val, length); 
-        BIT_skipBits(DStream, dd[val].nbBits); 
-        return length; 
-    } 
-    memcpy(op, ds+val, maxL); 
-    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) 
-    { 
-        BIT_skipBits(DStream, dd[val].nbBits); 
-        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 
-            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 
-    } 
-    return maxL; 
-} 
- 
- 
-#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \ 
-    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) 
- 
-#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) 
- 
-static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog) 
-{ 
-    const void* ddPtr = DTable+1; 
-    const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr); 
-    const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1)); 
-    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr); 
-    BYTE* const pStart = p; 
- 
-    /* up to 16 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16)) 
-    { 
-        HUF_DECODE_SYMBOLX6_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX6_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX6_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX6_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end, up to 4 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) 
-        HUF_DECODE_SYMBOLX6_0(p, bitDPtr); 
- 
-    while (p <= pEnd-4) 
-        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */ 
- 
-    while (p < pEnd) 
-        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog); 
- 
-    return p-pStart; 
-} 
- 
- 
- 
-static size_t HUF_decompress4X6_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U32* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
- 
-        const U32 dtLog = DTable[0]; 
-        const void* ddPtr = DTable+1; 
-        const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr); 
-        const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1)); 
-        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr); 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-64 symbols per loop (4-16 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX6_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX6_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX6_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX6_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX6_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX6_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX6_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX6_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX6_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX6_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX6_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX6_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX6_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX6_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX6_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX6_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog); 
-        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog); 
-        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog); 
-        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
- 
-    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(hSize)) return hSize; 
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += hSize; 
-    cSrcSize -= hSize; 
- 
-    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/**********************************/ 
-/* Generic decompression selector */ 
-/**********************************/ 
- 
-typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 
-{ 
-    /* single, double, quad */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */ 
-    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */ 
-    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */ 
-    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */ 
-    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */ 
-    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */ 
-    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */ 
-    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */ 
-    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */ 
-    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */ 
-    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */ 
-    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */ 
-    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */ 
-    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */ 
-    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */ 
-}; 
- 
-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 
- 
-static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 }; 
-    /* estimate decompression time */ 
-    U32 Q; 
-    const U32 D256 = (U32)(dstSize >> 8); 
-    U32 Dtime[3]; 
-    U32 algoNb = 0; 
-    int n; 
- 
-    /* validation checks */ 
-    if (dstSize == 0) return ERROR(dstSize_tooSmall); 
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ 
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ 
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ 
- 
-    /* decoder timing evaluation */ 
-    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */ 
-    for (n=0; n<3; n++) 
-        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256); 
- 
-    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */ 
- 
-    if (Dtime[1] < Dtime[0]) algoNb = 1; 
-    if (Dtime[2] < Dtime[algoNb]) algoNb = 2; 
- 
-    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 
- 
-    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */ 
-    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */ 
-    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */ 
-} 
-/* 
-    zstd - standard compression library 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* *************************************************************** 
-*  Tuning parameters 
-*****************************************************************/ 
-/*! 
-*  MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*/ 
-#define ZSTD_MEMORY_USAGE 17 
- 
-/*! 
- * HEAPMODE : 
- * Select how default compression functions will allocate memory for their hash table, 
- * in memory stack (0, fastest), or in memory heap (1, requires malloc()) 
- * Note that compression context is fairly large, as a consequence heap memory is recommended. 
- */ 
-#ifndef ZSTD_HEAPMODE 
-#  define ZSTD_HEAPMODE 1 
-#endif /* ZSTD_HEAPMODE */ 
- 
-/*! 
-*  LEGACY_SUPPORT : 
-*  decompressor can decode older formats (starting from Zstd 0.1+) 
-*/ 
-#ifndef ZSTD_LEGACY_SUPPORT 
-#  define ZSTD_LEGACY_SUPPORT 1 
-#endif 
- 
- 
-/* ******************************************************* 
-*  Includes 
-*********************************************************/ 
-#include <stdlib.h>      /* calloc */ 
-#include <string.h>      /* memcpy, memmove */ 
-#include <stdio.h>       /* debug : printf */ 
- 
- 
-/* ******************************************************* 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef __AVX2__ 
-#  include <immintrin.h>   /* AVX2 intrinsics */ 
-#endif 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */ 
-#else 
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ******************************************************* 
-*  Constants 
-*********************************************************/ 
-#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) 
-#define HASH_TABLESIZE (1 << HASH_LOG) 
-#define HASH_MASK (HASH_TABLESIZE - 1) 
- 
-#define KNUTH 2654435761 
- 
-#define BIT7 128 
-#define BIT6  64 
-#define BIT5  32 
-#define BIT4  16 
-#define BIT1   2 
-#define BIT0   1 
- 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define BLOCKSIZE (128 KB)                 /* define, for static allocation */ 
-#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/) 
-#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE) 
-#define IS_RAW BIT0 
-#define IS_RLE BIT1 
- 
-#define WORKPLACESIZE (BLOCKSIZE*3) 
-#define MINMATCH 4 
-#define MLbits   7 
-#define LLbits   6 
-#define Offbits  5 
-#define MaxML  ((1<<MLbits )-1) 
-#define MaxLL  ((1<<LLbits )-1) 
-#define MaxOff   31 
-#define LitFSELog  11 
-#define MLFSELog   10 
-#define LLFSELog   10 
-#define OffFSELog   9 
-#define MAX(a,b) ((a)<(b)?(b):(a)) 
-#define MaxSeq MAX(MaxLL, MaxML) 
- 
-#define LITERAL_NOENTROPY 63 
-#define COMMAND_NOENTROPY 7   /* to remove */ 
- 
-static const size_t ZSTD_blockHeaderSize = 3; 
-static const size_t ZSTD_frameHeaderSize = 4; 
- 
- 
-/* ******************************************************* 
-*  Memory operations 
-**********************************************************/ 
-static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 
- 
-static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 
- 
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 
- 
-/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */ 
-static void ZSTD_wildcopy(void* dst, const void* src, size_t length) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + length; 
-    do COPY8(op, ip) while (op < oend); 
-} 
- 
- 
-/* ************************************** 
-*  Local structures 
-****************************************/ 
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 
- 
-typedef struct 
-{ 
-    blockType_t blockType; 
-    U32 origSize; 
-} blockProperties_t; 
- 
-typedef struct { 
-    void* buffer; 
-    U32*  offsetStart; 
-    U32*  offset; 
-    BYTE* offCodeStart; 
-    BYTE* offCode; 
-    BYTE* litStart; 
-    BYTE* lit; 
-    BYTE* litLengthStart; 
-    BYTE* litLength; 
-    BYTE* matchLengthStart; 
-    BYTE* matchLength; 
-    BYTE* dumpsStart; 
-    BYTE* dumps; 
-} seqStore_t; 
- 
- 
-/* ************************************* 
-*  Error Management 
-***************************************/ 
-/*! ZSTD_isError 
-*   tells if a return value is an error code */ 
-static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } 
- 
- 
-/* ************************************* 
-*  Function body to include 
-***************************************/ 
-static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } 
- 
-MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        if (MEM_64bits()) 
-        { 
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r = 0; 
-            _BitScanForward64( &r, (U64)val ); 
-            return (int)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_ctzll((U64)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; 
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 
-#       endif 
-        } 
-        else /* 32 bits */ 
-        { 
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r; 
-            _BitScanForward( &r, (U32)val ); 
-            return (int)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_ctz((U32)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; 
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 
-#       endif 
-        } 
-    } 
-    else   /* Big Endian CPU */ 
-    { 
-        if (MEM_32bits()) 
-        { 
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r = 0; 
-            _BitScanReverse64( &r, val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_clzll(val) >> 3); 
-#       else 
-            unsigned r; 
-            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */ 
-            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-        } 
-        else /* 32 bits */ 
-        { 
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r = 0; 
-            _BitScanReverse( &r, (unsigned long)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_clz((U32)val) >> 3); 
-#       else 
-            unsigned r; 
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-        } 
-    } 
-} 
- 
- 
-MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) 
-{ 
-    const BYTE* const pStart = pIn; 
- 
-    while ((pIn<pInLimit-(sizeof(size_t)-1))) 
-    { 
-        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn); 
-        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 
-        pIn += ZSTD_NbCommonBytes(diff); 
-        return (size_t)(pIn - pStart); 
-    } 
- 
-    if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 
-    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 
-    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 
-    return (size_t)(pIn - pStart); 
-} 
- 
- 
-/* ************************************************************* 
-*   Decompression section 
-***************************************************************/ 
-struct ZSTD_DCtx_s 
-{ 
-    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 
-    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 
-    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 
-    void* previousDstEnd; 
-    void* base; 
-    size_t expected; 
-    blockType_t bType; 
-    U32 phase; 
-    const BYTE* litPtr; 
-    size_t litBufSize; 
-    size_t litSize; 
-    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */]; 
-};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */ 
- 
- 
-static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 
-{ 
-    const BYTE* const in = (const BYTE* const)src; 
-    BYTE headerFlags; 
-    U32 cSize; 
- 
-    if (srcSize < 3) return ERROR(srcSize_wrong); 
- 
-    headerFlags = *in; 
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 
- 
-    bpPtr->blockType = (blockType_t)(headerFlags >> 6); 
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 
- 
-    if (bpPtr->blockType == bt_end) return 0; 
-    if (bpPtr->blockType == bt_rle) return 1; 
-    return cSize; 
-} 
- 
-static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 
-    memcpy(dst, src, srcSize); 
-    return srcSize; 
-} 
- 
- 
-/** ZSTD_decompressLiterals 
-    @return : nb of bytes read from src, or an error code*/ 
-static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, 
-                                const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
- 
-    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected); 
-    if (litCSize + 5 > srcSize) return ERROR(corruption_detected); 
- 
-    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected); 
- 
-    *maxDstSizePtr = litSize; 
-    return litCSize + 5; 
-} 
- 
- 
-/** ZSTD_decodeLiteralsBlock 
-    @return : nb of bytes read from src (< srcSize )*/ 
-static size_t ZSTD_decodeLiteralsBlock(void* ctx, 
-                          const void* src, size_t srcSize) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; 
-    const BYTE* const istart = (const BYTE* const)src; 
- 
-    /* any compressed block with literals segment must be at least this size */ 
-    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); 
- 
-    switch(*istart & 3) 
-    { 
-    default: 
-    case 0: 
-        { 
-            size_t litSize = BLOCKSIZE; 
-            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE; 
-            dctx->litSize = litSize; 
-            return readSize;   /* works if it's an error too */ 
-        } 
-    case IS_RAW: 
-        { 
-            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */ 
-            { 
-				if (litSize > srcSize-3) return ERROR(corruption_detected); 
-				memcpy(dctx->litBuffer, istart, litSize); 
-				dctx->litPtr = dctx->litBuffer; 
-				dctx->litBufSize = BLOCKSIZE; 
-				dctx->litSize = litSize; 
-				return litSize+3; 
-			} 
-			/* direct reference into compressed stream */ 
-            dctx->litPtr = istart+3; 
-            dctx->litBufSize = srcSize-3; 
-            dctx->litSize = litSize; 
-            return litSize+3; 
-        } 
-    case IS_RLE: 
-        { 
-            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected); 
-            memset(dctx->litBuffer, istart[3], litSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE; 
-            dctx->litSize = litSize; 
-            return 4; 
-        } 
-    } 
-} 
- 
- 
-static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 
-                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 
-                         const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* ip = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    U32 LLtype, Offtype, MLtype; 
-    U32 LLlog, Offlog, MLlog; 
-    size_t dumpsLength; 
- 
-    /* check */ 
-    if (srcSize < 5) return ERROR(srcSize_wrong); 
- 
-    /* SeqHead */ 
-    *nbSeq = MEM_readLE16(ip); ip+=2; 
-    LLtype  = *ip >> 6; 
-    Offtype = (*ip >> 4) & 3; 
-    MLtype  = (*ip >> 2) & 3; 
-    if (*ip & 2) 
-    { 
-        dumpsLength  = ip[2]; 
-        dumpsLength += ip[1] << 8; 
-        ip += 3; 
-    } 
-    else 
-    { 
-        dumpsLength  = ip[1]; 
-        dumpsLength += (ip[0] & 1) << 8; 
-        ip += 2; 
-    } 
-    *dumpsPtr = ip; 
-    ip += dumpsLength; 
-    *dumpsLengthPtr = dumpsLength; 
- 
-    /* check */ 
-    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 
- 
-    /* sequences */ 
-    { 
-        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */ 
-        size_t headerSize; 
- 
-        /* Build DTables */ 
-        switch(LLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            LLlog = 0; 
-            FSE_buildDTable_rle(DTableLL, *ip++); break; 
-        case bt_raw : 
-            LLlog = LLbits; 
-            FSE_buildDTable_raw(DTableLL, LLbits); break; 
-        default : 
-            max = MaxLL; 
-            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (LLlog > LLFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableLL, norm, max, LLlog); 
-        } 
- 
-        switch(Offtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            Offlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ 
-            break; 
-        case bt_raw : 
-            Offlog = Offbits; 
-            FSE_buildDTable_raw(DTableOffb, Offbits); break; 
-        default : 
-            max = MaxOff; 
-            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (Offlog > OffFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableOffb, norm, max, Offlog); 
-        } 
- 
-        switch(MLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            MLlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableML, *ip++); break; 
-        case bt_raw : 
-            MLlog = MLbits; 
-            FSE_buildDTable_raw(DTableML, MLbits); break; 
-        default : 
-            max = MaxML; 
-            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (MLlog > MLFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableML, norm, max, MLlog); 
-        } 
-    } 
- 
-    return ip-istart; 
-} 
- 
- 
-typedef struct { 
-    size_t litLength; 
-    size_t offset; 
-    size_t matchLength; 
-} seq_t; 
- 
-typedef struct { 
-    BIT_DStream_t DStream; 
-    FSE_DState_t stateLL; 
-    FSE_DState_t stateOffb; 
-    FSE_DState_t stateML; 
-    size_t prevOffset; 
-    const BYTE* dumps; 
-    const BYTE* dumpsEnd; 
-} seqState_t; 
- 
- 
-static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 
-{ 
-    size_t litLength; 
-    size_t prevOffset; 
-    size_t offset; 
-    size_t matchLength; 
-    const BYTE* dumps = seqState->dumps; 
-    const BYTE* const de = seqState->dumpsEnd; 
- 
-    /* Literal length */ 
-    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 
-    prevOffset = litLength ? seq->offset : seqState->prevOffset; 
-    seqState->prevOffset = seq->offset; 
-    if (litLength == MaxLL) 
-    { 
-        U32 add = *dumps++; 
-        if (add < 255) litLength += add; 
-        else 
-        { 
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            dumps += 3; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
- 
-    /* Offset */ 
-    { 
-        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */ 
-                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256, 
-                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 
-                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 }; 
-        U32 offsetCode, nbBits; 
-        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */ 
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        nbBits = offsetCode - 1; 
-        if (offsetCode==0) nbBits = 0;   /* cmove */ 
-        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); 
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        if (offsetCode==0) offset = prevOffset;   /* cmove */ 
-    } 
- 
-    /* MatchLength */ 
-    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 
-    if (matchLength == MaxML) 
-    { 
-        U32 add = *dumps++; 
-        if (add < 255) matchLength += add; 
-        else 
-        { 
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            dumps += 3; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
-    matchLength += MINMATCH; 
- 
-    /* save result */ 
-    seq->litLength = litLength; 
-    seq->offset = offset; 
-    seq->matchLength = matchLength; 
-    seqState->dumps = dumps; 
-} 
- 
- 
-static size_t ZSTD_execSequence(BYTE* op, 
-                                seq_t sequence, 
-                                const BYTE** litPtr, const BYTE* const litLimit, 
-                                BYTE* const base, BYTE* const oend) 
-{ 
-    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */ 
-    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */ 
-    const BYTE* const ostart = op; 
-    BYTE* const oLitEnd = op + sequence.litLength; 
-    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */ 
-    BYTE* const oend_8 = oend-8; 
-    const BYTE* const litEnd = *litPtr + sequence.litLength; 
- 
-    /* checks */ 
-    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */ 
-    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */ 
-    if (litEnd > litLimit-8) return ERROR(corruption_detected);   /* overRead beyond lit buffer */ 
- 
-    /* copy Literals */ 
-    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ 
-    op = oLitEnd; 
-    *litPtr = litEnd;   /* update for next sequence */ 
- 
-    /* copy Match */ 
-    { 
-        const BYTE* match = op - sequence.offset; 
- 
-        /* check */ 
-        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */ 
-        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */ 
-        if (match < base) return ERROR(corruption_detected); 
- 
-        /* close range match, overlap */ 
-        if (sequence.offset < 8) 
-        { 
-            const int dec64 = dec64table[sequence.offset]; 
-            op[0] = match[0]; 
-            op[1] = match[1]; 
-            op[2] = match[2]; 
-            op[3] = match[3]; 
-            match += dec32table[sequence.offset]; 
-            ZSTD_copy4(op+4, match); 
-            match -= dec64; 
-        } 
-        else 
-        { 
-            ZSTD_copy8(op, match); 
-        } 
-        op += 8; match += 8; 
- 
-        if (oMatchEnd > oend-12) 
-        { 
-            if (op < oend_8) 
-            { 
-                ZSTD_wildcopy(op, match, oend_8 - op); 
-                match += oend_8 - op; 
-                op = oend_8; 
-            } 
-            while (op < oMatchEnd) *op++ = *match++; 
-        } 
-        else 
-        { 
-            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */ 
-        } 
-    } 
- 
-    return oMatchEnd - ostart; 
-} 
- 
-static size_t ZSTD_decompressSequences( 
-                               void* ctx, 
-                               void* dst, size_t maxDstSize, 
-                         const void* seqStart, size_t seqSize) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; 
-    const BYTE* ip = (const BYTE*)seqStart; 
-    const BYTE* const iend = ip + seqSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t errorCode, dumpsLength; 
-    const BYTE* litPtr = dctx->litPtr; 
-    const BYTE* const litMax = litPtr + dctx->litBufSize; 
-    const BYTE* const litEnd = litPtr + dctx->litSize; 
-    int nbSeq; 
-    const BYTE* dumps; 
-    U32* DTableLL = dctx->LLTable; 
-    U32* DTableML = dctx->MLTable; 
-    U32* DTableOffb = dctx->OffTable; 
-    BYTE* const base = (BYTE*) (dctx->base); 
- 
-    /* Build Decoding Tables */ 
-    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 
-                                      DTableLL, DTableML, DTableOffb, 
-                                      ip, iend-ip); 
-    if (ZSTD_isError(errorCode)) return errorCode; 
-    ip += errorCode; 
- 
-    /* Regen sequences */ 
-    { 
-        seq_t sequence; 
-        seqState_t seqState; 
- 
-        memset(&sequence, 0, sizeof(sequence)); 
-        seqState.dumps = dumps; 
-        seqState.dumpsEnd = dumps + dumpsLength; 
-        seqState.prevOffset = 1; 
-        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); 
-        if (ERR_isError(errorCode)) return ERROR(corruption_detected); 
-        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 
-        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 
-        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 
- 
-        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; ) 
-        { 
-            size_t oneSeqSize; 
-            nbSeq--; 
-            ZSTD_decodeSequence(&sequence, &seqState); 
-            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litMax, base, oend); 
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 
-            op += oneSeqSize; 
-        } 
- 
-        /* check if reached exact end */ 
-        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */ 
-        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */ 
- 
-        /* last literal segment */ 
-        { 
-            size_t lastLLSize = litEnd - litPtr; 
-            if (litPtr > litEnd) return ERROR(corruption_detected); 
-            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 
-            if (op != litPtr) memmove(op, litPtr, lastLLSize); 
-            op += lastLLSize; 
-        } 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-static size_t ZSTD_decompressBlock( 
-                            void* ctx, 
-                            void* dst, size_t maxDstSize, 
-                      const void* src, size_t srcSize) 
-{ 
-    /* blockType == blockCompressed */ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    /* Decode literals sub-block */ 
-    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize); 
-    if (ZSTD_isError(litCSize)) return litCSize; 
-    ip += litCSize; 
-    srcSize -= litCSize; 
- 
-    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize); 
-} 
- 
- 
-static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    const BYTE* iend = ip + srcSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t remainingSize = srcSize; 
-    U32 magicNumber; 
-    blockProperties_t blockProperties; 
- 
-    /* Frame Header */ 
-    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
-    magicNumber = MEM_readLE32(src); 
-    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 
-    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 
- 
-    /* Loop on each block */ 
-    while (1) 
-    { 
-        size_t decodedSize=0; 
-        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); 
-        if (ZSTD_isError(cBlockSize)) return cBlockSize; 
- 
-        ip += ZSTD_blockHeaderSize; 
-        remainingSize -= ZSTD_blockHeaderSize; 
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); 
- 
-        switch(blockProperties.blockType) 
-        { 
-        case bt_compressed: 
-            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_raw : 
-            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet supported */ 
-            break; 
-        case bt_end : 
-            /* end of frame */ 
-            if (remainingSize) return ERROR(srcSize_wrong); 
-            break; 
-        default: 
-            return ERROR(GENERIC);   /* impossible */ 
-        } 
-        if (cBlockSize == 0) break;   /* bt_end */ 
- 
-        if (ZSTD_isError(decodedSize)) return decodedSize; 
-        op += decodedSize; 
-        ip += cBlockSize; 
-        remainingSize -= cBlockSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
-static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    ZSTD_DCtx ctx; 
-    ctx.base = dst; 
-    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); 
-} 
- 
- 
-/******************************* 
-*  Streaming Decompression API 
-*******************************/ 
- 
-static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) 
-{ 
-    dctx->expected = ZSTD_frameHeaderSize; 
-    dctx->phase = 0; 
-    dctx->previousDstEnd = NULL; 
-    dctx->base = NULL; 
-    return 0; 
-} 
- 
-static ZSTD_DCtx* ZSTD_createDCtx(void) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); 
-    if (dctx==NULL) return NULL; 
-    ZSTD_resetDCtx(dctx); 
-    return dctx; 
-} 
- 
-static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) 
-{ 
-    free(dctx); 
-    return 0; 
-} 
- 
-static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) 
-{ 
-    return dctx->expected; 
-} 
- 
-static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    /* Sanity check */ 
-    if (srcSize != ctx->expected) return ERROR(srcSize_wrong); 
-    if (dst != ctx->previousDstEnd)  /* not contiguous */ 
-        ctx->base = dst; 
- 
-    /* Decompress : frame header */ 
-    if (ctx->phase == 0) 
-    { 
-        /* Check frame magic header */ 
-        U32 magicNumber = MEM_readLE32(src); 
-        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 
-        ctx->phase = 1; 
-        ctx->expected = ZSTD_blockHeaderSize; 
-        return 0; 
-    } 
- 
-    /* Decompress : block header */ 
-    if (ctx->phase == 1) 
-    { 
-        blockProperties_t bp; 
-        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 
-        if (ZSTD_isError(blockSize)) return blockSize; 
-        if (bp.blockType == bt_end) 
-        { 
-            ctx->expected = 0; 
-            ctx->phase = 0; 
-        } 
-        else 
-        { 
-            ctx->expected = blockSize; 
-            ctx->bType = bp.blockType; 
-            ctx->phase = 2; 
-        } 
- 
-        return 0; 
-    } 
- 
-    /* Decompress : block content */ 
-    { 
-        size_t rSize; 
-        switch(ctx->bType) 
-        { 
-        case bt_compressed: 
-            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); 
-            break; 
-        case bt_raw : 
-            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet handled */ 
-            break; 
-        case bt_end :   /* should never happen (filtered at phase 1) */ 
-            rSize = 0; 
-            break; 
-        default: 
-            return ERROR(GENERIC); 
-        } 
-        ctx->phase = 1; 
-        ctx->expected = ZSTD_blockHeaderSize; 
-        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); 
-        return rSize; 
-    } 
- 
-} 
- 
- 
-/* wrapper layer */ 
- 
-unsigned ZSTDv02_isError(size_t code) 
-{ 
-	return ZSTD_isError(code); 
-} 
- 
-size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, 
-                     const void* src, size_t compressedSize) 
-{ 
-	return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize); 
-} 
- 
-ZSTDv02_Dctx* ZSTDv02_createDCtx(void) 
-{ 
-	return (ZSTDv02_Dctx*)ZSTD_createDCtx(); 
-} 
- 
-size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx) 
-{ 
-	return ZSTD_freeDCtx((ZSTD_DCtx*)dctx); 
-} 
- 
-size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx) 
-{ 
-	return ZSTD_resetDCtx((ZSTD_DCtx*)dctx); 
-} 
- 
-size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx) 
-{ 
-	return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx); 
-} 
- 
-size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-	return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize); 
-} 
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+					ip = iend - 4;
+				}
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)ptr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        {if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* quad-symbol decoding           */
+/**********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++)
+        {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+        }
+    }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++)
+    {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits))   /* enough room for another symbol */
+        {
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++)
+            {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+            }
+        }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+
+    /* fill tables */
+    {
+        void* ptr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)(ptr);
+        void* dSeqStart = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)(dSeqStart);
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 length = dd[val].nbBytes;
+    if (length <= maxL)
+    {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+    {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const void* ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+    const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16))
+    {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while (p <= pEnd-4)
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    while (p < pEnd)
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const void* ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)(ddPtr);
+        const void* dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)(dsPtr);
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-64 symbols per loop (4-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; )
+        {
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+    if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************
+*  Function body to include
+***************************************/
+static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
+
+MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian())
+    {
+        if (MEM_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (MEM_32bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+
+    if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+				if (litSize > srcSize-3) return ERROR(corruption_detected);
+				memcpy(dctx->litBuffer, istart, litSize);
+				dctx->litPtr = dctx->litBuffer;
+				dctx->litBufSize = BLOCKSIZE;
+				dctx->litSize = litSize;
+				return litSize+3;
+			}
+			/* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit-8) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-12)
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litMax = litPtr + dctx->litBufSize;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 1;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litMax, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv02_isError(size_t code)
+{
+	return ZSTD_isError(code);
+}
+
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+	return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
+{
+	return (ZSTDv02_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
+{
+	return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
+{
+	return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
+{
+	return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+	return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/contrib/libs/zstd06/legacy/zstd_v02.h b/contrib/libs/zstd06/legacy/zstd_v02.h
index 12be124985..462ea1e303 100644
--- a/contrib/libs/zstd06/legacy/zstd_v02.h
+++ b/contrib/libs/zstd06/legacy/zstd_v02.h
@@ -1,100 +1,100 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd_v02 - decoder for 0.2 format 
-    Header File 
-    Copyright (C) 2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#pragma once 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* ************************************* 
-*  Simple one-step function 
-***************************************/ 
-/** 
-ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format 
-    compressedSize : is the exact source size 
-    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. 
-                      It must be equal or larger than originalSize, otherwise decompression will fail. 
-    return : the number of bytes decompressed into destination buffer (originalSize) 
-             or an errorCode if it fails (which can be tested using ZSTDv01_isError()) 
-*/ 
-size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, 
-                     const void* src, size_t compressedSize); 
- 
-/** 
-ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error 
-*/ 
-unsigned ZSTDv02_isError(size_t code); 
- 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx; 
-ZSTDv02_Dctx* ZSTDv02_createDCtx(void); 
-size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx); 
- 
-size_t ZSTDv02_decompressDCtx(void* ctx, 
-                              void* dst, size_t maxOriginalSize, 
-                        const void* src, size_t compressedSize); 
- 
-/* ************************************* 
-*  Streaming functions 
-***************************************/ 
-size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx); 
- 
-size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx); 
-size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); 
-/** 
-  Use above functions alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. 
-  Result is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
-*/ 
- 
-/* ************************************* 
-*  Prefix - version detection 
-***************************************/ 
-#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
+/*
+    zstd_v02 - decoder for 0.2 format
+    Header File
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
+*/
+unsigned ZSTDv02_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
+ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
+size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
+
+size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
+size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/contrib/libs/zstd06/legacy/zstd_v03.c b/contrib/libs/zstd06/legacy/zstd_v03.c
index 8a30ecd35c..f06b47fdc1 100644
--- a/contrib/libs/zstd06/legacy/zstd_v03.c
+++ b/contrib/libs/zstd06/legacy/zstd_v03.c
@@ -1,3389 +1,3389 @@
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include "zstd_v03.h" 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#elif defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/****************************************** 
-*  Error Management 
-******************************************/ 
-#define PREFIX(name) ZSTD_error_##name 
- 
-#define ERROR(name) (size_t)-PREFIX(name) 
- 
-#define ERROR_LIST(ITEM) \ 
-        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ 
-        ITEM(PREFIX(memory_allocation)) \ 
-        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ 
-        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ 
-        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ 
-        ITEM(PREFIX(maxCode)) 
- 
-#define ERROR_GENERATE_ENUM(ENUM) ENUM, 
-typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 
- 
-#define ERROR_CONVERTTOSTRING(STRING) #STRING, 
-#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
- 
- 
-/* ****************************************************************** 
-   mem.h 
-   low-level memory access routines 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef MEM_H_MODULE 
-#define MEM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/****************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include <string.h>    /* memcpy */ 
- 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define MEM_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define MEM_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define MEM_STATIC static __inline 
-#else 
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/**************************************************************** 
-*  Basic Types 
-*****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-# include <stdint.h> 
-  typedef  uint8_t BYTE; 
-  typedef uint16_t U16; 
-  typedef  int16_t S16; 
-  typedef uint32_t U32; 
-  typedef  int32_t S32; 
-  typedef uint64_t U64; 
-  typedef  int64_t S64; 
-#else 
-  typedef unsigned char       BYTE; 
-  typedef unsigned short      U16; 
-  typedef   signed short      S16; 
-  typedef unsigned int        U32; 
-  typedef   signed int        S32; 
-  typedef unsigned long long  U64; 
-  typedef   signed long long  S64; 
-#endif 
- 
- 
-/**************************************************************** 
-*  Memory I/O 
-*****************************************************************/ 
-/* MEM_FORCE_MEMORY_ACCESS 
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 
- * The below switch allow to select different access method for improved performance. 
- * Method 0 (default) : use `memcpy()`. Safe and portable. 
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 
- * Method 2 : direct access. This method is portable but violate C standard. 
- *            It can generate buggy code on targets generating assembly depending on alignment. 
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 
- * Prefer these methods in priority order (0 > 1 > 2) 
- */ 
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ 
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 
-#    define MEM_FORCE_MEMORY_ACCESS 2 
-#  elif defined(__INTEL_COMPILER) || \ 
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 
-#    define MEM_FORCE_MEMORY_ACCESS 1 
-#  endif 
-#endif 
- 
-MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } 
-MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } 
- 
-MEM_STATIC unsigned MEM_isLittleEndian(void) 
-{ 
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) 
- 
-/* violates C standard on structure alignment. 
-Only use if no other choice to achieve best performance on target platform */ 
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } 
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } 
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } 
- 
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) 
- 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 
-/* currently only defined for gcc and icc */ 
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 
- 
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } 
- 
-#else 
- 
-/* default method, safe and standard. 
-   can sometimes prove slower */ 
- 
-MEM_STATIC U16 MEM_read16(const void* memPtr) 
-{ 
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U32 MEM_read32(const void* memPtr) 
-{ 
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U64 MEM_read64(const void* memPtr) 
-{ 
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-#endif // MEM_FORCE_MEMORY_ACCESS 
- 
- 
-MEM_STATIC U16 MEM_readLE16(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read16(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)(p[0] + (p[1]<<8)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write16(memPtr, val); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val; 
-        p[1] = (BYTE)(val>>8); 
-    } 
-} 
- 
-MEM_STATIC U32 MEM_readLE32(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read32(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write32(memPtr, val32); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val32; 
-        p[1] = (BYTE)(val32>>8); 
-        p[2] = (BYTE)(val32>>16); 
-        p[3] = (BYTE)(val32>>24); 
-    } 
-} 
- 
-MEM_STATIC U64 MEM_readLE64(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read64(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 
-                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write64(memPtr, val64); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val64; 
-        p[1] = (BYTE)(val64>>8); 
-        p[2] = (BYTE)(val64>>16); 
-        p[3] = (BYTE)(val64>>24); 
-        p[4] = (BYTE)(val64>>32); 
-        p[5] = (BYTE)(val64>>40); 
-        p[6] = (BYTE)(val64>>48); 
-        p[7] = (BYTE)(val64>>56); 
-    } 
-} 
- 
-MEM_STATIC size_t MEM_readLEST(const void* memPtr) 
-{ 
-    if (MEM_32bits()) 
-        return (size_t)MEM_readLE32(memPtr); 
-    else 
-        return (size_t)MEM_readLE64(memPtr); 
-} 
- 
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) 
-{ 
-    if (MEM_32bits()) 
-        MEM_writeLE32(memPtr, (U32)val); 
-    else 
-        MEM_writeLE64(memPtr, (U64)val); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* MEM_H_MODULE */ 
- 
- 
-/* ****************************************************************** 
-   bitstream 
-   Part of NewGen Entropy library 
-   header file (to include) 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef BITSTREAM_H_MODULE 
-#define BITSTREAM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* 
-*  This API consists of small unitary functions, which highly benefit from being inlined. 
-*  Since link-time-optimization is not available for all compilers, 
-*  these functions are defined into a .h to be included. 
-*/ 
- 
- 
-/********************************************** 
-*  bitStream decompression API (read backward) 
-**********************************************/ 
-typedef struct 
-{ 
-    size_t   bitContainer; 
-    unsigned bitsConsumed; 
-    const char* ptr; 
-    const char* start; 
-} BIT_DStream_t; 
- 
-typedef enum { BIT_DStream_unfinished = 0, 
-               BIT_DStream_endOfBuffer = 1, 
-               BIT_DStream_completed = 2, 
-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */ 
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ 
- 
-MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 
-MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); 
- 
- 
-/* 
-* Start by invoking BIT_initDStream(). 
-* A chunk of the bitStream is then stored into a local register. 
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 
-* You can then retrieve bitFields stored into the local register, **in reverse order**. 
-* Local register is manually filled from memory by the BIT_reloadDStream() method. 
-* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. 
-* Otherwise, it can be less than that, so proceed accordingly. 
-* Checking if DStream has reached its end can be performed with BIT_endOfDStream() 
-*/ 
- 
- 
-/****************************************** 
-*  unsafe API 
-******************************************/ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); 
-/* faster, but works only if nbBits >= 1 */ 
- 
- 
- 
-/**************************************************************** 
-*  Helper functions 
-****************************************************************/ 
-MEM_STATIC unsigned BIT_highbit32 (register U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse ( &r, val ); 
-    return (unsigned) r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */ 
-    return 31 - __builtin_clz (val); 
-#   else   /* Software version */ 
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    unsigned r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
- 
-/********************************************************** 
-* bitStream decoding 
-**********************************************************/ 
- 
-/*!BIT_initDStream 
-*  Initialize a BIT_DStream_t. 
-*  @bitD : a pointer to an already allocated BIT_DStream_t structure 
-*  @srcBuffer must point at the beginning of a bitStream 
-*  @srcSize must be the exact size of the bitStream 
-*  @result : size of stream (== srcSize) or an errorCode if a problem is detected 
-*/ 
-MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 
-{ 
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } 
- 
-    if (srcSize >=  sizeof(size_t))   /* normal case */ 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t); 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32); 
-    } 
-    else 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = bitD->start; 
-        bitD->bitContainer = *(const BYTE*)(bitD->start); 
-        switch(srcSize) 
-        { 
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; 
-            default:; 
-        } 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32); 
-        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 
-    } 
- 
-    return srcSize; 
-} 
- 
-/*!BIT_lookBits 
- * Provides next n bits from local register 
- * local register is not modified (bits are still present for next read/look) 
- * On 32-bits, maxNbBits==25 
- * On 64-bits, maxNbBits==57 
- * @return : value extracted 
- */ 
-MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 
-} 
- 
-/*! BIT_lookBitsFast : 
-*   unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 
-} 
- 
-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    bitD->bitsConsumed += nbBits; 
-} 
- 
-/*!BIT_readBits 
- * Read next n bits from local register. 
- * pay attention to not read more than nbBits contained into local register. 
- * @return : extracted value. 
- */ 
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BIT_lookBits(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-/*!BIT_readBitsFast : 
-*  unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BIT_lookBitsFast(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 
-{ 
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */ 
-		return BIT_DStream_overflow; 
- 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 
-    { 
-        bitD->ptr -= bitD->bitsConsumed >> 3; 
-        bitD->bitsConsumed &= 7; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        return BIT_DStream_unfinished; 
-    } 
-    if (bitD->ptr == bitD->start) 
-    { 
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; 
-        return BIT_DStream_completed; 
-    } 
-    { 
-        U32 nbBytes = bitD->bitsConsumed >> 3; 
-        BIT_DStream_status result = BIT_DStream_unfinished; 
-        if (bitD->ptr - nbBytes < bitD->start) 
-        { 
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */ 
-            result = BIT_DStream_endOfBuffer; 
-        } 
-        bitD->ptr -= nbBytes; 
-        bitD->bitsConsumed -= nbBytes*8; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */ 
-        return result; 
-    } 
-} 
- 
-/*! BIT_endOfDStream 
-*   @return Tells if DStream has reached its exact end 
-*/ 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) 
-{ 
-    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* BITSTREAM_H_MODULE */ 
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#elif defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/****************************************** 
-*  Error Management 
-******************************************/ 
-#define PREFIX(name) ZSTD_error_##name 
- 
-#define ERROR(name) (size_t)-PREFIX(name) 
- 
-#define ERROR_LIST(ITEM) \ 
-        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ 
-        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ 
-        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ 
-        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ 
-        ITEM(PREFIX(maxCode)) 
- 
-#define ERROR_GENERATE_ENUM(ENUM) ENUM, 
-typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 
- 
-#define ERROR_CONVERTTOSTRING(STRING) #STRING, 
-#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) 
-static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
-ERR_STATIC const char* ERR_getErrorName(size_t code) 
-{ 
-    static const char* codeError = "Unspecified error code"; 
-    if (ERR_isError(code)) return ERR_strings[-(int)(code)]; 
-    return codeError; 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
-/* 
-Constructor and Destructor of type FSE_CTable 
-    Note that its size depends on 'tableLog' and 'maxSymbolValue' */ 
-typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
-typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
- 
- 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/****************************************** 
-*  Static allocation 
-******************************************/ 
-/* FSE buffer bounds */ 
-#define FSE_NCOUNTBOUND 512 
-#define FSE_BLOCKBOUND(size) (size + (size>>7)) 
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */ 
- 
-/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ 
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) 
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog)) 
- 
- 
-/****************************************** 
-*  FSE advanced API 
-******************************************/ 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); 
-/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ 
- 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); 
-/* build a fake FSE_DTable, designed to always generate the same symbolValue */ 
- 
- 
-/****************************************** 
-*  FSE symbol decompression API 
-******************************************/ 
-typedef struct 
-{ 
-    size_t      state; 
-    const void* table;   /* precise table may vary, depending on U16 */ 
-} FSE_DState_t; 
- 
- 
-static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); 
- 
-static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
- 
-static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); 
- 
-/* 
-Let's now decompose FSE_decompress_usingDTable() into its unitary components. 
-You will decode FSE-encoded symbols from the bitStream, 
-and also any other bitFields you put in, **in reverse order**. 
- 
-You will need a few variables to track your bitStream. They are : 
- 
-BIT_DStream_t DStream;    // Stream context 
-FSE_DState_t  DState;     // State context. Multiple ones are possible 
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable() 
- 
-The first thing to do is to init the bitStream. 
-    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); 
- 
-You should then retrieve your initial state(s) 
-(in reverse flushing order if you have several ones) : 
-    errorCode = FSE_initDState(&DState, &DStream, DTablePtr); 
- 
-You can then decode your data, symbol after symbol. 
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. 
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). 
-    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); 
- 
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) 
-Note : maximum allowed nbBits is 25, for 32-bits compatibility 
-    size_t bitField = BIT_readBits(&DStream, nbBits); 
- 
-All above operations only read from local register (which size depends on size_t). 
-Refueling the register from memory is manually performed by the reload method. 
-    endSignal = FSE_reloadDStream(&DStream); 
- 
-BIT_reloadDStream() result tells if there is still some more data to read from DStream. 
-BIT_DStream_unfinished : there is still some data left into the DStream. 
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. 
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. 
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. 
- 
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, 
-to properly detect the exact end of stream. 
-After each decoded symbol, check if DStream is fully consumed using this simple test : 
-    BIT_reloadDStream(&DStream) >= BIT_DStream_completed 
- 
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. 
-Checking if DStream has reached its end is performed by : 
-    BIT_endOfDStream(&DStream); 
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. 
-    FSE_endOfDState(&DState); 
-*/ 
- 
- 
-/****************************************** 
-*  FSE unsafe API 
-******************************************/ 
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ 
- 
- 
-/****************************************** 
-*  Implementation of inline functions 
-******************************************/ 
- 
-/* decompression */ 
- 
-typedef struct { 
-    U16 tableLog; 
-    U16 fastMode; 
-} FSE_DTableHeader;   /* sizeof U32 */ 
- 
-typedef struct 
-{ 
-    unsigned short newState; 
-    unsigned char  symbol; 
-    unsigned char  nbBits; 
-} FSE_decode_t;   /* size == U32 */ 
- 
-MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    memcpy(&DTableH, dt, sizeof(DTableH)); 
-    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); 
-    BIT_reloadDStream(bitD); 
-    DStatePtr->table = dt + 1; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32  nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BIT_readBits(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32 nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BIT_readBitsFast(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state == 0; 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/****************************************** 
-*  Static allocation macros 
-******************************************/ 
-/* Huff0 buffer bounds */ 
-#define HUF_CTABLEBOUND 129 
-#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */ 
-#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */ 
- 
-/* static allocation of Huff0's DTable */ 
-#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */ 
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ 
-        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ 
-        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \ 
-        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog } 
- 
- 
-/****************************************** 
-*  Advanced functions 
-******************************************/ 
-static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */ 
-static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-/* 
-    zstd - standard compression library 
-    Header File 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* ************************************* 
-*  Version 
-***************************************/ 
-#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */ 
-#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */ 
-#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */ 
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 
- 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */ 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
-/* 
-    zstd - standard compression library 
-    Header File for static linking only 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* The objects defined into this file should be considered experimental. 
- * They are not labelled stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risk of future changes. 
- */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Streaming functions 
-***************************************/ 
- 
-typedef struct ZSTD_DCtx_s ZSTD_DCtx; 
- 
-/* 
-  Use above functions alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. 
-  Result is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
-*/ 
- 
-/* ************************************* 
-*  Prefix - version detection 
-***************************************/ 
-#define ZSTD_magicNumber 0xFD2FB523   /* v0.3 */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-#ifndef FSE_COMMONDEFS_ONLY 
- 
-/**************************************************************** 
-*  Tuning parameters 
-****************************************************************/ 
-/* MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 
-#define FSE_MAX_MEMORY_USAGE 14 
-#define FSE_DEFAULT_MEMORY_USAGE 13 
- 
-/* FSE_MAX_SYMBOL_VALUE : 
-*  Maximum symbol value authorized. 
-*  Required for proper stack allocation */ 
-#define FSE_MAX_SYMBOL_VALUE 255 
- 
- 
-/**************************************************************** 
-*  template functions type & suffix 
-****************************************************************/ 
-#define FSE_FUNCTION_TYPE BYTE 
-#define FSE_FUNCTION_EXTENSION 
- 
- 
-/**************************************************************** 
-*  Byte symbol type 
-****************************************************************/ 
-#endif   /* !FSE_COMMONDEFS_ONLY */ 
- 
- 
-/**************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/**************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
-/**************************************************************** 
-*  Constants 
-*****************************************************************/ 
-#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2) 
-#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 
-#define FSE_MIN_TABLELOG 5 
- 
-#define FSE_TABLELOG_ABSOLUTE_MAX 15 
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 
-#endif 
- 
- 
-/**************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/**************************************************************** 
-*  Complex types 
-****************************************************************/ 
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 
- 
- 
-/**************************************************************** 
-*  Templates 
-****************************************************************/ 
-/* 
-  designed to be included 
-  for type-specific functions (template emulation in C) 
-  Objective is to write these functions only once, for improved maintenance 
-*/ 
- 
-/* safety checks */ 
-#ifndef FSE_FUNCTION_EXTENSION 
-#  error "FSE_FUNCTION_EXTENSION must be defined" 
-#endif 
-#ifndef FSE_FUNCTION_TYPE 
-#  error "FSE_FUNCTION_TYPE must be defined" 
-#endif 
- 
-/* Function names */ 
-#define FSE_CAT(X,Y) X##Y 
-#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 
-#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 
- 
- 
-/* Function templates */ 
- 
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_v03.h"
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(memory_allocation)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write32(memPtr, val32);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write64(memPtr, val64);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#elif defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#define ERROR(name) (size_t)-PREFIX(name)
+
+#define ERROR_LIST(ITEM) \
+        ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \
+        ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \
+        ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \
+        ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \
+        ITEM(PREFIX(maxCode))
+
+#define ERROR_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+#define ERROR_CONVERTTOSTRING(STRING) #STRING,
+#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR)
+static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) };
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ERR_isError(code)) return ERR_strings[-(int)(code)];
+    return codeError;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+Constructor and Destructor of type FSE_CTable
+    Note that its size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE symbol decompression API
+******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/******************************************
+*  FSE unsafe API
+******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/******************************************
+*  Implementation of inline functions
+******************************************/
+
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Static allocation macros
+******************************************/
+/* Huff0 buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/******************************************
+*  Advanced functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Version
+***************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    2    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;   /* incomplete type */
+
+#if defined (__cplusplus)
+}
+#endif
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Streaming functions
+***************************************/
+
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
+/*
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTD_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+
+/****************************************************************
+*  Byte symbol type
+****************************************************************/
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/****************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
 #define FSE_DECODE_TYPE FSE_decode_t
- 
-static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 
- 
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
 static size_t FSE_buildDTable
-(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 
-{ 
-    void* ptr = dt+1; 
-    FSE_DTableHeader DTableH; 
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr; 
-    const U32 tableSize = 1 << tableLog; 
-    const U32 tableMask = tableSize-1; 
-    const U32 step = FSE_tableStep(tableSize); 
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 
-    U32 position = 0; 
-    U32 highThreshold = tableSize-1; 
-    const S16 largeLimit= (S16)(1 << (tableLog-1)); 
-    U32 noLarge = 1; 
-    U32 s; 
- 
-    /* Sanity Checks */ 
-    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); 
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 
- 
-    /* Init, lay down lowprob symbols */ 
-    DTableH.tableLog = (U16)tableLog; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        if (normalizedCounter[s]==-1) 
-        { 
-            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 
-            symbolNext[s] = 1; 
-        } 
-        else 
-        { 
-            if (normalizedCounter[s] >= largeLimit) noLarge=0; 
-            symbolNext[s] = normalizedCounter[s]; 
-        } 
-    } 
- 
-    /* Spread symbols */ 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        int i; 
-        for (i=0; i<normalizedCounter[s]; i++) 
-        { 
-            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 
-            position = (position + step) & tableMask; 
-            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ 
-        } 
-    } 
- 
-    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 
- 
-    /* Build Decoding table */ 
-    { 
-        U32 i; 
-        for (i=0; i<tableSize; i++) 
-        { 
-            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 
-            U16 nextState = symbolNext[symbol]++; 
-            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); 
-            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 
-        } 
-    } 
- 
-    DTableH.fastMode = (U16)noLarge; 
-    memcpy(dt, &DTableH, sizeof(DTableH)); 
-    return 0; 
-} 
- 
- 
-#ifndef FSE_COMMONDEFS_ONLY 
-/****************************************** 
-*  FSE helper functions 
-******************************************/ 
-static unsigned FSE_isError(size_t code) { return ERR_isError(code); } 
- 
- 
-/**************************************************************** 
-*  FSE NCount encoding-decoding 
-****************************************************************/ 
-static short FSE_abs(short a) 
-{ 
-    return a<0 ? -a : a; 
-} 
- 
-static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 
-                 const void* headerBuffer, size_t hbSize) 
-{ 
-    const BYTE* const istart = (const BYTE*) headerBuffer; 
-    const BYTE* const iend = istart + hbSize; 
-    const BYTE* ip = istart; 
-    int nbBits; 
-    int remaining; 
-    int threshold; 
-    U32 bitStream; 
-    int bitCount; 
-    unsigned charnum = 0; 
-    int previous0 = 0; 
- 
-    if (hbSize < 4) return ERROR(srcSize_wrong); 
-    bitStream = MEM_readLE32(ip); 
-    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */ 
-    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 
-    bitStream >>= 4; 
-    bitCount = 4; 
-    *tableLogPtr = nbBits; 
-    remaining = (1<<nbBits)+1; 
-    threshold = 1<<nbBits; 
-    nbBits++; 
- 
-    while ((remaining>1) && (charnum<=*maxSVPtr)) 
-    { 
-        if (previous0) 
-        { 
-            unsigned n0 = charnum; 
-            while ((bitStream & 0xFFFF) == 0xFFFF) 
-            { 
-                n0+=24; 
-                if (ip < iend-5) 
-                { 
-                    ip+=2; 
-                    bitStream = MEM_readLE32(ip) >> bitCount; 
-                } 
-                else 
-                { 
-                    bitStream >>= 16; 
-                    bitCount+=16; 
-                } 
-            } 
-            while ((bitStream & 3) == 3) 
-            { 
-                n0+=3; 
-                bitStream>>=2; 
-                bitCount+=2; 
-            } 
-            n0 += bitStream & 3; 
-            bitCount += 2; 
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 
-            while (charnum < n0) normalizedCounter[charnum++] = 0; 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-            { 
-                ip += bitCount>>3; 
-                bitCount &= 7; 
-                bitStream = MEM_readLE32(ip) >> bitCount; 
-            } 
-            else 
-                bitStream >>= 2; 
-        } 
-        { 
-            const short max = (short)((2*threshold-1)-remaining); 
-            short count; 
- 
-            if ((bitStream & (threshold-1)) < (U32)max) 
-            { 
-                count = (short)(bitStream & (threshold-1)); 
-                bitCount   += nbBits-1; 
-            } 
-            else 
-            { 
-                count = (short)(bitStream & (2*threshold-1)); 
-                if (count >= threshold) count -= max; 
-                bitCount   += nbBits; 
-            } 
- 
-            count--;   /* extra accuracy */ 
-            remaining -= FSE_abs(count); 
-            normalizedCounter[charnum++] = count; 
-            previous0 = !count; 
-            while (remaining < threshold) 
-            { 
-                nbBits--; 
-                threshold >>= 1; 
-            } 
- 
-            { 
-                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-                { 
-                    ip += bitCount>>3; 
-                    bitCount &= 7; 
-                } 
-                else 
-                { 
-                    bitCount -= (int)(8 * (iend - 4 - ip)); 
-					ip = iend - 4; 
-				} 
-                bitStream = MEM_readLE32(ip) >> (bitCount & 31); 
-            } 
-        } 
-    } 
-    if (remaining != 1) return ERROR(GENERIC); 
-    *maxSVPtr = charnum-1; 
- 
-    ip += (bitCount+7)>>3; 
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); 
-    return ip-istart; 
-} 
- 
- 
-/********************************************************* 
-*  Decompression (Byte symbols) 
-*********************************************************/ 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; 
- 
-    DTableH->tableLog = 0; 
-    DTableH->fastMode = 0; 
- 
-    cell->newState = 0; 
-    cell->symbol = symbolValue; 
-    cell->nbBits = 0; 
- 
-    return 0; 
-} 
- 
- 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; 
-    const unsigned tableSize = 1 << nbBits; 
-    const unsigned tableMask = tableSize - 1; 
-    const unsigned maxSymbolValue = tableMask; 
-    unsigned s; 
- 
-    /* Sanity checks */ 
-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */ 
- 
-    /* Build Decoding Table */ 
-    DTableH->tableLog = (U16)nbBits; 
-    DTableH->fastMode = 1; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        dinfo[s].newState = 0; 
-        dinfo[s].symbol = (BYTE)s; 
-        dinfo[s].nbBits = (BYTE)nbBits; 
-    } 
- 
-    return 0; 
-} 
- 
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 
-          void* dst, size_t maxDstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const FSE_DTable* dt, const unsigned fast) 
-{ 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* op = ostart; 
-    BYTE* const omax = op + maxDstSize; 
-    BYTE* const olimit = omax-3; 
- 
-    BIT_DStream_t bitD; 
-    FSE_DState_t state1; 
-    FSE_DState_t state2; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */ 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    FSE_initDState(&state1, &bitD, dt); 
-    FSE_initDState(&state2, &bitD, dt); 
- 
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 
- 
-    /* 4 symbols per loop */ 
-    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) 
-    { 
-        op[0] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BIT_reloadDStream(&bitD); 
- 
-        op[1] = FSE_GETSYMBOL(&state2); 
- 
-        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } 
- 
-        op[2] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BIT_reloadDStream(&bitD); 
- 
-        op[3] = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* tail */ 
-    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ 
-    while (1) 
-    { 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state1); 
- 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* end ? */ 
-    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 
-        return op-ostart; 
- 
-    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */ 
- 
-    return ERROR(corruption_detected); 
-} 
- 
- 
-static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 
-                            const void* cSrc, size_t cSrcSize, 
-                            const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    memcpy(&DTableH, dt, sizeof(DTableH)); 
- 
-    /* select fast mode (static) */ 
-    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 
-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 
-} 
- 
- 
-static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    const BYTE* const istart = (const BYTE*)cSrc; 
-    const BYTE* ip = istart; 
-    short counting[FSE_MAX_SYMBOL_VALUE+1]; 
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */ 
-    unsigned tableLog; 
-    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 
-    size_t errorCode; 
- 
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */ 
- 
-    /* normal FSE decoding mode */ 
-    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */ 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    /* always return, even if it is an error code */ 
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 
-} 
- 
- 
- 
-#endif   /* FSE_COMMONDEFS_ONLY */ 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-/**************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-/* inline is defined */ 
-#elif defined(_MSC_VER) 
-#  define inline __inline 
-#else 
-#  define inline /* disable inline */ 
-#endif 
- 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/**************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
-/**************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/****************************************** 
-*  Helper functions 
-******************************************/ 
-static unsigned HUF_isError(size_t code) { return ERR_isError(code); } 
- 
-#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 
-#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ 
-#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */ 
-#define HUF_MAX_SYMBOL_VALUE 255 
-#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 
-#  error "HUF_MAX_TABLELOG is too large !" 
-#endif 
- 
- 
- 
-/********************************************************* 
-*  Huff0 : Huffman block decompression 
-*********************************************************/ 
-typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */ 
- 
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */ 
- 
-typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 
- 
-/*! HUF_readStats 
-    Read compact Huffman tree, saved by HUF_writeCTable 
-    @huffWeight : destination buffer 
-    @return : size read from `src` 
-*/ 
-static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 
-                            U32* nbSymbolsPtr, U32* tableLogPtr, 
-                            const void* src, size_t srcSize) 
-{ 
-    U32 weightTotal; 
-    U32 tableLog; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    size_t oSize; 
-    U32 n; 
- 
-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */ 
- 
-    if (iSize >= 128)  /* special header */ 
-    { 
-        if (iSize >= (242))   /* RLE */ 
-        { 
-            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 
-            oSize = l[iSize-242]; 
-            memset(huffWeight, 1, hwSize); 
-            iSize = 0; 
-        } 
-        else   /* Incompressible */ 
-        { 
-            oSize = iSize - 127; 
-            iSize = ((oSize+1)/2); 
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-            if (oSize >= hwSize) return ERROR(corruption_detected); 
-            ip += 1; 
-            for (n=0; n<oSize; n+=2) 
-            { 
-                huffWeight[n]   = ip[n/2] >> 4; 
-                huffWeight[n+1] = ip[n/2] & 15; 
-            } 
-        } 
-    } 
-    else  /* header compressed with FSE (normal case) */ 
-    { 
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */ 
-        if (FSE_isError(oSize)) return oSize; 
-    } 
- 
-    /* collect weight stats */ 
-    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); 
-    weightTotal = 0; 
-    for (n=0; n<oSize; n++) 
-    { 
-        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-        rankStats[huffWeight[n]]++; 
-        weightTotal += (1 << huffWeight[n]) >> 1; 
-    } 
- 
-    /* get last non-null symbol weight (implied, total must be 2^n) */ 
-    tableLog = BIT_highbit32(weightTotal) + 1; 
-    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-    { 
-        U32 total = 1 << tableLog; 
-        U32 rest = total - weightTotal; 
-        U32 verif = 1 << BIT_highbit32(rest); 
-        U32 lastWeight = BIT_highbit32(rest) + 1; 
-        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */ 
-        huffWeight[oSize] = (BYTE)lastWeight; 
-        rankStats[lastWeight]++; 
-    } 
- 
-    /* check tree construction validity */ 
-    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */ 
- 
-    /* results */ 
-    *nbSymbolsPtr = (U32)(oSize+1); 
-    *tableLogPtr = tableLog; 
-    return iSize+1; 
-} 
- 
- 
-/**************************/ 
-/* single-symbol decoding */ 
-/**************************/ 
- 
-static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */ 
-    U32 tableLog = 0; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    U32 nbSymbols = 0; 
-    U32 n; 
-    U32 nextRankStart; 
-    void* ptr = DTable+1; 
-    HUF_DEltX2* const dt = (HUF_DEltX2*)(ptr); 
- 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */ 
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */ 
-    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ 
- 
-    /* Prepare ranks */ 
-    nextRankStart = 0; 
-    for (n=1; n<=tableLog; n++) 
-    { 
-        U32 current = nextRankStart; 
-        nextRankStart += (rankVal[n] << (n-1)); 
-        rankVal[n] = current; 
-    } 
- 
-    /* fill DTable */ 
-    for (n=0; n<nbSymbols; n++) 
-    { 
-        const U32 w = huffWeight[n]; 
-        const U32 length = (1 << w) >> 1; 
-        U32 i; 
-        HUF_DEltX2 D; 
-        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 
-        for (i = rankVal[w]; i < rankVal[w] + length; i++) 
-            dt[i] = D; 
-        rankVal[w] += length; 
-    } 
- 
-    return iSize; 
-} 
- 
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) 
-{ 
-        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 
-        const BYTE c = dt[val].byte; 
-        BIT_skipBits(Dstream, dt[val].nbBits); 
-        return c; 
-} 
- 
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 
-    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 4 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) 
-    { 
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    /* no more data to retrieve from bitstream, hence no need to reload */ 
-    while (p < pEnd) 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    return pEnd-pStart; 
-} 
- 
- 
-static size_t HUF_decompress4X2_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U16* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
- 
-        const void* ptr = DTable; 
-        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 
-        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 
-        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 
-        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
-    size_t errorCode; 
- 
-    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/***************************/ 
-/* double-symbols decoding */ 
-/***************************/ 
- 
-static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, 
-                           const U32* rankValOrigin, const int minWeight, 
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 
-                           U32 nbBitsBaseline, U16 baseSeq) 
-{ 
-    HUF_DEltX4 DElt; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    U32 s; 
- 
-    /* get pre-calculated rankVal */ 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill skipped values */ 
-    if (minWeight>1) 
-    { 
-        U32 i, skipSize = rankVal[minWeight]; 
-        MEM_writeLE16(&(DElt.sequence), baseSeq); 
-        DElt.nbBits   = (BYTE)(consumed); 
-        DElt.length   = 1; 
-        for (i = 0; i < skipSize; i++) 
-            DTable[i] = DElt; 
-    } 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */ 
-    { 
-        const U32 symbol = sortedSymbols[s].symbol; 
-        const U32 weight = sortedSymbols[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 length = 1 << (sizeLog-nbBits); 
-        const U32 start = rankVal[weight]; 
-        U32 i = start; 
-        const U32 end = start + length; 
- 
-        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 
-        DElt.nbBits = (BYTE)(nbBits + consumed); 
-        DElt.length = 2; 
-        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */ 
- 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; 
- 
-static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, 
-                           const sortedSymbol_t* sortedList, const U32 sortedListSize, 
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 
-                           const U32 nbBitsBaseline) 
-{ 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 
-    const U32 minBits  = nbBitsBaseline - maxWeight; 
-    U32 s; 
- 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++) 
-    { 
-        const U16 symbol = sortedList[s].symbol; 
-        const U32 weight = sortedList[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 start = rankVal[weight]; 
-        const U32 length = 1 << (targetLog-nbBits); 
- 
-        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */ 
-        { 
-            U32 sortedRank; 
-            int minWeight = nbBits + scaleLog; 
-            if (minWeight < 1) minWeight = 1; 
-            sortedRank = rankStart[minWeight]; 
-            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, 
-                           rankValOrigin[nbBits], minWeight, 
-                           sortedList+sortedRank, sortedListSize-sortedRank, 
-                           nbBitsBaseline, symbol); 
-        } 
-        else 
-        { 
-            U32 i; 
-            const U32 end = start + length; 
-            HUF_DEltX4 DElt; 
- 
-            MEM_writeLE16(&(DElt.sequence), symbol); 
-            DElt.nbBits   = (BYTE)(nbBits); 
-            DElt.length   = 1; 
-            for (i = start; i < end; i++) 
-                DTable[i] = DElt; 
-        } 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; 
-    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; 
-    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; 
-    U32* const rankStart = rankStart0+1; 
-    rankVal_t rankVal; 
-    U32 tableLog, maxW, sizeOfSort, nbSymbols; 
-    const U32 memLog = DTable[0]; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    void* ptr = DTable; 
-    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1; 
- 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */ 
-    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); 
-    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */ 
- 
-    /* find maxWeight */ 
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) 
-        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */ 
- 
-    /* Get start index of each weight */ 
-    { 
-        U32 w, nextRankStart = 0; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankStart; 
-            nextRankStart += rankStats[w]; 
-            rankStart[w] = current; 
-        } 
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/ 
-        sizeOfSort = nextRankStart; 
-    } 
- 
-    /* sort symbols by weight */ 
-    { 
-        U32 s; 
-        for (s=0; s<nbSymbols; s++) 
-        { 
-            U32 w = weightList[s]; 
-            U32 r = rankStart[w]++; 
-            sortedSymbol[r].symbol = (BYTE)s; 
-            sortedSymbol[r].weight = (BYTE)w; 
-        } 
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */ 
-    } 
- 
-	/* Build rankVal */ 
-    { 
-        const U32 minBits = tableLog+1 - maxW; 
-        U32 nextRankVal = 0; 
-        U32 w, consumed; 
-        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */ 
-        U32* rankVal0 = rankVal[0]; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankVal; 
-            nextRankVal += rankStats[w] << (w+rescale); 
-            rankVal0[w] = current; 
-        } 
-        for (consumed = minBits; consumed <= memLog - minBits; consumed++) 
-        { 
-            U32* rankValPtr = rankVal[consumed]; 
-            for (w = 1; w <= maxW; w++) 
-            { 
-                rankValPtr[w] = rankVal0[w] >> consumed; 
-            } 
-        } 
-    } 
- 
-    HUF_fillDTableX4(dt, memLog, 
-                   sortedSymbol, sizeOfSort, 
-                   rankStart0, rankVal, maxW, 
-                   tableLog+1); 
- 
-    return iSize; 
-} 
- 
- 
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 2); 
-    BIT_skipBits(DStream, dt[val].nbBits); 
-    return dt[val].length; 
-} 
- 
-static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 1); 
-    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 
-    else 
-    { 
-        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) 
-        { 
-            BIT_skipBits(DStream, dt[val].nbBits); 
-            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 
-        } 
-    } 
-    return 1; 
-} 
- 
- 
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ 
-    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 8 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) 
-    { 
-        HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 
- 
-    while (p <= pEnd-2) 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */ 
- 
-    if (p < pEnd) 
-        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); 
- 
-    return p-pStart; 
-} 
- 
- 
- 
-static size_t HUF_decompress4X4_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U32* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
- 
-        const void* ptr = DTable; 
-        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); 
-        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); 
-        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); 
-        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
- 
-    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(hSize)) return hSize; 
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += hSize; 
-    cSrcSize -= hSize; 
- 
-    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/**********************************/ 
-/* Generic decompression selector */ 
-/**********************************/ 
- 
-typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 
-{ 
-    /* single, double, quad */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */ 
-    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */ 
-    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */ 
-    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */ 
-    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */ 
-    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */ 
-    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */ 
-    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */ 
-    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */ 
-    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */ 
-    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */ 
-    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */ 
-    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */ 
-    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */ 
-    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */ 
-}; 
- 
-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 
- 
-static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
+(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* ptr = dt+1;
+    FSE_DTableHeader DTableH;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr;
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+					ip = iend - 4;
+				}
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+
+    /* select fast mode (static) */
+    if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/******************************************
+*  Helper functions
+******************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*********************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* ptr = DTable+1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)(ptr);
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    void* ptr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+	/* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const void* ptr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
     static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, NULL };
-    /* estimate decompression time */ 
-    U32 Q; 
-    const U32 D256 = (U32)(dstSize >> 8); 
-    U32 Dtime[3]; 
-    U32 algoNb = 0; 
-    int n; 
- 
-    /* validation checks */ 
-    if (dstSize == 0) return ERROR(dstSize_tooSmall); 
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ 
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ 
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ 
- 
-    /* decoder timing evaluation */ 
-    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */ 
-    for (n=0; n<3; n++) 
-        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256); 
- 
-    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */ 
- 
-    if (Dtime[1] < Dtime[0]) algoNb = 1; 
- 
-    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 
- 
-    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */ 
-    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */ 
-    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */ 
-} 
-/* 
-    zstd - standard compression library 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* *************************************************************** 
-*  Tuning parameters 
-*****************************************************************/ 
-/*! 
-*  MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*/ 
-#define ZSTD_MEMORY_USAGE 17 
- 
-/*! 
- * HEAPMODE : 
- * Select how default compression functions will allocate memory for their hash table, 
- * in memory stack (0, fastest), or in memory heap (1, requires malloc()) 
- * Note that compression context is fairly large, as a consequence heap memory is recommended. 
- */ 
-#ifndef ZSTD_HEAPMODE 
-#  define ZSTD_HEAPMODE 1 
-#endif /* ZSTD_HEAPMODE */ 
- 
-/*! 
-*  LEGACY_SUPPORT : 
-*  decompressor can decode older formats (starting from Zstd 0.1+) 
-*/ 
-#ifndef ZSTD_LEGACY_SUPPORT 
-#  define ZSTD_LEGACY_SUPPORT 1 
-#endif 
- 
- 
-/* ******************************************************* 
-*  Includes 
-*********************************************************/ 
-#include <stdlib.h>      /* calloc */ 
-#include <string.h>      /* memcpy, memmove */ 
-#include <stdio.h>       /* debug : printf */ 
- 
- 
-/* ******************************************************* 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef __AVX2__ 
-#  include <immintrin.h>   /* AVX2 intrinsics */ 
-#endif 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */ 
-#else 
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ******************************************************* 
-*  Constants 
-*********************************************************/ 
-#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) 
-#define HASH_TABLESIZE (1 << HASH_LOG) 
-#define HASH_MASK (HASH_TABLESIZE - 1) 
- 
-#define KNUTH 2654435761 
- 
-#define BIT7 128 
-#define BIT6  64 
-#define BIT5  32 
-#define BIT4  16 
-#define BIT1   2 
-#define BIT0   1 
- 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define BLOCKSIZE (128 KB)                 /* define, for static allocation */ 
-#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/) 
-#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE) 
-#define IS_RAW BIT0 
-#define IS_RLE BIT1 
- 
-#define WORKPLACESIZE (BLOCKSIZE*3) 
-#define MINMATCH 4 
-#define MLbits   7 
-#define LLbits   6 
-#define Offbits  5 
-#define MaxML  ((1<<MLbits )-1) 
-#define MaxLL  ((1<<LLbits )-1) 
-#define MaxOff   31 
-#define LitFSELog  11 
-#define MLFSELog   10 
-#define LLFSELog   10 
-#define OffFSELog   9 
-#define MAX(a,b) ((a)<(b)?(b):(a)) 
-#define MaxSeq MAX(MaxLL, MaxML) 
- 
-#define LITERAL_NOENTROPY 63 
-#define COMMAND_NOENTROPY 7   /* to remove */ 
- 
-static const size_t ZSTD_blockHeaderSize = 3; 
-static const size_t ZSTD_frameHeaderSize = 4; 
- 
- 
-/* ******************************************************* 
-*  Memory operations 
-**********************************************************/ 
-static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 
- 
-static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 
- 
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 
- 
-/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */ 
-static void ZSTD_wildcopy(void* dst, const void* src, size_t length) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + length; 
-    do COPY8(op, ip) while (op < oend); 
-} 
- 
- 
-/* ************************************** 
-*  Local structures 
-****************************************/ 
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 
- 
-typedef struct 
-{ 
-    blockType_t blockType; 
-    U32 origSize; 
-} blockProperties_t; 
- 
-typedef struct { 
-    void* buffer; 
-    U32*  offsetStart; 
-    U32*  offset; 
-    BYTE* offCodeStart; 
-    BYTE* offCode; 
-    BYTE* litStart; 
-    BYTE* lit; 
-    BYTE* litLengthStart; 
-    BYTE* litLength; 
-    BYTE* matchLengthStart; 
-    BYTE* matchLength; 
-    BYTE* dumpsStart; 
-    BYTE* dumps; 
-} seqStore_t; 
- 
- 
-/* ************************************* 
-*  Error Management 
-***************************************/ 
-/*! ZSTD_isError 
-*   tells if a return value is an error code */ 
-static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } 
- 
- 
-/* ************************************* 
-*  Function body to include 
-***************************************/ 
-static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } 
- 
-MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        if (MEM_64bits()) 
-        { 
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r = 0; 
-            _BitScanForward64( &r, (U64)val ); 
-            return (int)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_ctzll((U64)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; 
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 
-#       endif 
-        } 
-        else /* 32 bits */ 
-        { 
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r; 
-            _BitScanForward( &r, (U32)val ); 
-            return (int)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_ctz((U32)val) >> 3); 
-#       else 
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; 
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 
-#       endif 
-        } 
-    } 
-    else   /* Big Endian CPU */ 
-    { 
-        if (MEM_32bits()) 
-        { 
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r = 0; 
-            _BitScanReverse64( &r, val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_clzll(val) >> 3); 
-#       else 
-            unsigned r; 
-            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */ 
-            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-        } 
-        else /* 32 bits */ 
-        { 
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            unsigned long r = 0; 
-            _BitScanReverse( &r, (unsigned long)val ); 
-            return (unsigned)(r>>3); 
-#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) 
-            return (__builtin_clz((U32)val) >> 3); 
-#       else 
-            unsigned r; 
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 
-            r += (!val); 
-            return r; 
-#       endif 
-        } 
-    } 
-} 
- 
- 
-MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) 
-{ 
-    const BYTE* const pStart = pIn; 
- 
-    while ((pIn<pInLimit-(sizeof(size_t)-1))) 
-    { 
-        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn); 
-        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 
-        pIn += ZSTD_NbCommonBytes(diff); 
-        return (size_t)(pIn - pStart); 
-    } 
- 
-    if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 
-    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 
-    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 
-    return (size_t)(pIn - pStart); 
-} 
- 
- 
-/* ************************************************************* 
-*   Decompression section 
-***************************************************************/ 
-struct ZSTD_DCtx_s 
-{ 
-    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 
-    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 
-    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 
-    void* previousDstEnd; 
-    void* base; 
-    size_t expected; 
-    blockType_t bType; 
-    U32 phase; 
-    const BYTE* litPtr; 
-    size_t litBufSize; 
-    size_t litSize; 
-    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */]; 
-};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */ 
- 
- 
-static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 
-{ 
-    const BYTE* const in = (const BYTE* const)src; 
-    BYTE headerFlags; 
-    U32 cSize; 
- 
-    if (srcSize < 3) return ERROR(srcSize_wrong); 
- 
-    headerFlags = *in; 
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 
- 
-    bpPtr->blockType = (blockType_t)(headerFlags >> 6); 
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 
- 
-    if (bpPtr->blockType == bt_end) return 0; 
-    if (bpPtr->blockType == bt_rle) return 1; 
-    return cSize; 
-} 
- 
-static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 
-    memcpy(dst, src, srcSize); 
-    return srcSize; 
-} 
- 
- 
-/** ZSTD_decompressLiterals 
-    @return : nb of bytes read from src, or an error code*/ 
-static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, 
-                                const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
- 
-    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected); 
-    if (litCSize + 5 > srcSize) return ERROR(corruption_detected); 
- 
-    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected); 
- 
-    *maxDstSizePtr = litSize; 
-    return litCSize + 5; 
-} 
- 
- 
-/** ZSTD_decodeLiteralsBlock 
-    @return : nb of bytes read from src (< srcSize )*/ 
-static size_t ZSTD_decodeLiteralsBlock(void* ctx, 
-                          const void* src, size_t srcSize) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; 
-    const BYTE* const istart = (const BYTE* const)src; 
- 
-    /* any compressed block with literals segment must be at least this size */ 
-    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); 
- 
-    switch(*istart & 3) 
-    { 
-    default: 
-    case 0: 
-        { 
-            size_t litSize = BLOCKSIZE; 
-            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE; 
-            dctx->litSize = litSize; 
-            return readSize;   /* works if it's an error too */ 
-        } 
-    case IS_RAW: 
-        { 
-            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */ 
-            { 
-				if (litSize > srcSize-3) return ERROR(corruption_detected); 
-				memcpy(dctx->litBuffer, istart, litSize); 
-				dctx->litPtr = dctx->litBuffer; 
-				dctx->litBufSize = BLOCKSIZE; 
-				dctx->litSize = litSize; 
-				return litSize+3; 
-			} 
-			/* direct reference into compressed stream */ 
-            dctx->litPtr = istart+3; 
-            dctx->litBufSize = srcSize-3; 
-            dctx->litSize = litSize; 
-            return litSize+3; 
-        } 
-    case IS_RLE: 
-        { 
-            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected); 
-            memset(dctx->litBuffer, istart[3], litSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE; 
-            dctx->litSize = litSize; 
-            return 4; 
-        } 
-    } 
-} 
- 
- 
-static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 
-                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 
-                         const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* ip = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    U32 LLtype, Offtype, MLtype; 
-    U32 LLlog, Offlog, MLlog; 
-    size_t dumpsLength; 
- 
-    /* check */ 
-    if (srcSize < 5) return ERROR(srcSize_wrong); 
- 
-    /* SeqHead */ 
-    *nbSeq = MEM_readLE16(ip); ip+=2; 
-    LLtype  = *ip >> 6; 
-    Offtype = (*ip >> 4) & 3; 
-    MLtype  = (*ip >> 2) & 3; 
-    if (*ip & 2) 
-    { 
-        dumpsLength  = ip[2]; 
-        dumpsLength += ip[1] << 8; 
-        ip += 3; 
-    } 
-    else 
-    { 
-        dumpsLength  = ip[1]; 
-        dumpsLength += (ip[0] & 1) << 8; 
-        ip += 2; 
-    } 
-    *dumpsPtr = ip; 
-    ip += dumpsLength; 
-    *dumpsLengthPtr = dumpsLength; 
- 
-    /* check */ 
-    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 
- 
-    /* sequences */ 
-    { 
-        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */ 
-        size_t headerSize; 
- 
-        /* Build DTables */ 
-        switch(LLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            LLlog = 0; 
-            FSE_buildDTable_rle(DTableLL, *ip++); break; 
-        case bt_raw : 
-            LLlog = LLbits; 
-            FSE_buildDTable_raw(DTableLL, LLbits); break; 
-        default : 
-            max = MaxLL; 
-            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (LLlog > LLFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableLL, norm, max, LLlog); 
-        } 
- 
-        switch(Offtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            Offlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ 
-            break; 
-        case bt_raw : 
-            Offlog = Offbits; 
-            FSE_buildDTable_raw(DTableOffb, Offbits); break; 
-        default : 
-            max = MaxOff; 
-            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (Offlog > OffFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableOffb, norm, max, Offlog); 
-        } 
- 
-        switch(MLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            MLlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableML, *ip++); break; 
-        case bt_raw : 
-            MLlog = MLbits; 
-            FSE_buildDTable_raw(DTableML, MLbits); break; 
-        default : 
-            max = MaxML; 
-            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (MLlog > MLFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableML, norm, max, MLlog); 
-        } 
-    } 
- 
-    return ip-istart; 
-} 
- 
- 
-typedef struct { 
-    size_t litLength; 
-    size_t offset; 
-    size_t matchLength; 
-} seq_t; 
- 
-typedef struct { 
-    BIT_DStream_t DStream; 
-    FSE_DState_t stateLL; 
-    FSE_DState_t stateOffb; 
-    FSE_DState_t stateML; 
-    size_t prevOffset; 
-    const BYTE* dumps; 
-    const BYTE* dumpsEnd; 
-} seqState_t; 
- 
- 
-static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 
-{ 
-    size_t litLength; 
-    size_t prevOffset; 
-    size_t offset; 
-    size_t matchLength; 
-    const BYTE* dumps = seqState->dumps; 
-    const BYTE* const de = seqState->dumpsEnd; 
- 
-    /* Literal length */ 
-    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 
-    prevOffset = litLength ? seq->offset : seqState->prevOffset; 
-    seqState->prevOffset = seq->offset; 
-    if (litLength == MaxLL) 
-    { 
-        U32 add = *dumps++; 
-        if (add < 255) litLength += add; 
-        else 
-        { 
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            dumps += 3; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
- 
-    /* Offset */ 
-    { 
-        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */ 
-                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256, 
-                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 
-                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 }; 
-        U32 offsetCode, nbBits; 
-        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */ 
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        nbBits = offsetCode - 1; 
-        if (offsetCode==0) nbBits = 0;   /* cmove */ 
-        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); 
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        if (offsetCode==0) offset = prevOffset;   /* cmove */ 
-    } 
- 
-    /* MatchLength */ 
-    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 
-    if (matchLength == MaxML) 
-    { 
-        U32 add = *dumps++; 
-        if (add < 255) matchLength += add; 
-        else 
-        { 
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            dumps += 3; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
-    matchLength += MINMATCH; 
- 
-    /* save result */ 
-    seq->litLength = litLength; 
-    seq->offset = offset; 
-    seq->matchLength = matchLength; 
-    seqState->dumps = dumps; 
-} 
- 
- 
-static size_t ZSTD_execSequence(BYTE* op, 
-                                seq_t sequence, 
-                                const BYTE** litPtr, const BYTE* const litLimit, 
-                                BYTE* const base, BYTE* const oend) 
-{ 
-    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */ 
-    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */ 
-    const BYTE* const ostart = op; 
-    BYTE* const oLitEnd = op + sequence.litLength; 
-    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */ 
-    BYTE* const oend_8 = oend-8; 
-    const BYTE* const litEnd = *litPtr + sequence.litLength; 
- 
-    /* checks */ 
-    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */ 
-    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */ 
-    if (litEnd > litLimit-8) return ERROR(corruption_detected);   /* overRead beyond lit buffer */ 
- 
-    /* copy Literals */ 
-    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ 
-    op = oLitEnd; 
-    *litPtr = litEnd;   /* update for next sequence */ 
- 
-    /* copy Match */ 
-    { 
-        const BYTE* match = op - sequence.offset; 
- 
-        /* check */ 
-        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */ 
-        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */ 
-        if (match < base) return ERROR(corruption_detected); 
- 
-        /* close range match, overlap */ 
-        if (sequence.offset < 8) 
-        { 
-            const int dec64 = dec64table[sequence.offset]; 
-            op[0] = match[0]; 
-            op[1] = match[1]; 
-            op[2] = match[2]; 
-            op[3] = match[3]; 
-            match += dec32table[sequence.offset]; 
-            ZSTD_copy4(op+4, match); 
-            match -= dec64; 
-        } 
-        else 
-        { 
-            ZSTD_copy8(op, match); 
-        } 
-        op += 8; match += 8; 
- 
-        if (oMatchEnd > oend-12) 
-        { 
-            if (op < oend_8) 
-            { 
-                ZSTD_wildcopy(op, match, oend_8 - op); 
-                match += oend_8 - op; 
-                op = oend_8; 
-            } 
-            while (op < oMatchEnd) *op++ = *match++; 
-        } 
-        else 
-        { 
-            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */ 
-        } 
-    } 
- 
-    return oMatchEnd - ostart; 
-} 
- 
-static size_t ZSTD_decompressSequences( 
-                               void* ctx, 
-                               void* dst, size_t maxDstSize, 
-                         const void* seqStart, size_t seqSize) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; 
-    const BYTE* ip = (const BYTE*)seqStart; 
-    const BYTE* const iend = ip + seqSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t errorCode, dumpsLength; 
-    const BYTE* litPtr = dctx->litPtr; 
-    const BYTE* const litMax = litPtr + dctx->litBufSize; 
-    const BYTE* const litEnd = litPtr + dctx->litSize; 
-    int nbSeq; 
-    const BYTE* dumps; 
-    U32* DTableLL = dctx->LLTable; 
-    U32* DTableML = dctx->MLTable; 
-    U32* DTableOffb = dctx->OffTable; 
-    BYTE* const base = (BYTE*) (dctx->base); 
- 
-    /* Build Decoding Tables */ 
-    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 
-                                      DTableLL, DTableML, DTableOffb, 
-                                      ip, iend-ip); 
-    if (ZSTD_isError(errorCode)) return errorCode; 
-    ip += errorCode; 
- 
-    /* Regen sequences */ 
-    { 
-        seq_t sequence; 
-        seqState_t seqState; 
- 
-        memset(&sequence, 0, sizeof(sequence)); 
-        seqState.dumps = dumps; 
-        seqState.dumpsEnd = dumps + dumpsLength; 
-        seqState.prevOffset = sequence.offset = 4; 
-        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); 
-        if (ERR_isError(errorCode)) return ERROR(corruption_detected); 
-        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 
-        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 
-        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 
- 
-        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; ) 
-        { 
-            size_t oneSeqSize; 
-            nbSeq--; 
-            ZSTD_decodeSequence(&sequence, &seqState); 
-            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litMax, base, oend); 
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 
-            op += oneSeqSize; 
-        } 
- 
-        /* check if reached exact end */ 
-        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */ 
-        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */ 
- 
-        /* last literal segment */ 
-        { 
-            size_t lastLLSize = litEnd - litPtr; 
-            if (litPtr > litEnd) return ERROR(corruption_detected); 
-            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 
-            if (op != litPtr) memmove(op, litPtr, lastLLSize); 
-            op += lastLLSize; 
-        } 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-static size_t ZSTD_decompressBlock( 
-                            void* ctx, 
-                            void* dst, size_t maxDstSize, 
-                      const void* src, size_t srcSize) 
-{ 
-    /* blockType == blockCompressed */ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    /* Decode literals sub-block */ 
-    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize); 
-    if (ZSTD_isError(litCSize)) return litCSize; 
-    ip += litCSize; 
-    srcSize -= litCSize; 
- 
-    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize); 
-} 
- 
- 
-static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    const BYTE* iend = ip + srcSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t remainingSize = srcSize; 
-    U32 magicNumber; 
-    blockProperties_t blockProperties; 
- 
-    /* Frame Header */ 
-    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
-    magicNumber = MEM_readLE32(src); 
-    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 
-    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 
- 
-    /* Loop on each block */ 
-    while (1) 
-    { 
-        size_t decodedSize=0; 
-        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); 
-        if (ZSTD_isError(cBlockSize)) return cBlockSize; 
- 
-        ip += ZSTD_blockHeaderSize; 
-        remainingSize -= ZSTD_blockHeaderSize; 
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); 
- 
-        switch(blockProperties.blockType) 
-        { 
-        case bt_compressed: 
-            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_raw : 
-            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet supported */ 
-            break; 
-        case bt_end : 
-            /* end of frame */ 
-            if (remainingSize) return ERROR(srcSize_wrong); 
-            break; 
-        default: 
-            return ERROR(GENERIC);   /* impossible */ 
-        } 
-        if (cBlockSize == 0) break;   /* bt_end */ 
- 
-        if (ZSTD_isError(decodedSize)) return decodedSize; 
-        op += decodedSize; 
-        ip += cBlockSize; 
-        remainingSize -= cBlockSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
-static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    ZSTD_DCtx ctx; 
-    ctx.base = dst; 
-    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); 
-} 
- 
- 
-/******************************* 
-*  Streaming Decompression API 
-*******************************/ 
- 
-static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) 
-{ 
-    dctx->expected = ZSTD_frameHeaderSize; 
-    dctx->phase = 0; 
-    dctx->previousDstEnd = NULL; 
-    dctx->base = NULL; 
-    return 0; 
-} 
- 
-static ZSTD_DCtx* ZSTD_createDCtx(void) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); 
-    if (dctx==NULL) return NULL; 
-    ZSTD_resetDCtx(dctx); 
-    return dctx; 
-} 
- 
-static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) 
-{ 
-    free(dctx); 
-    return 0; 
-} 
- 
-static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) 
-{ 
-    return dctx->expected; 
-} 
- 
-static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    /* Sanity check */ 
-    if (srcSize != ctx->expected) return ERROR(srcSize_wrong); 
-    if (dst != ctx->previousDstEnd)  /* not contiguous */ 
-        ctx->base = dst; 
- 
-    /* Decompress : frame header */ 
-    if (ctx->phase == 0) 
-    { 
-        /* Check frame magic header */ 
-        U32 magicNumber = MEM_readLE32(src); 
-        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 
-        ctx->phase = 1; 
-        ctx->expected = ZSTD_blockHeaderSize; 
-        return 0; 
-    } 
- 
-    /* Decompress : block header */ 
-    if (ctx->phase == 1) 
-    { 
-        blockProperties_t bp; 
-        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 
-        if (ZSTD_isError(blockSize)) return blockSize; 
-        if (bp.blockType == bt_end) 
-        { 
-            ctx->expected = 0; 
-            ctx->phase = 0; 
-        } 
-        else 
-        { 
-            ctx->expected = blockSize; 
-            ctx->bType = bp.blockType; 
-            ctx->phase = 2; 
-        } 
- 
-        return 0; 
-    } 
- 
-    /* Decompress : block content */ 
-    { 
-        size_t rSize; 
-        switch(ctx->bType) 
-        { 
-        case bt_compressed: 
-            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); 
-            break; 
-        case bt_raw : 
-            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet handled */ 
-            break; 
-        case bt_end :   /* should never happen (filtered at phase 1) */ 
-            rSize = 0; 
-            break; 
-        default: 
-            return ERROR(GENERIC); 
-        } 
-        ctx->phase = 1; 
-        ctx->expected = ZSTD_blockHeaderSize; 
-        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); 
-        return rSize; 
-    } 
- 
-} 
- 
- 
-/* wrapper layer */ 
- 
-unsigned ZSTDv03_isError(size_t code) 
-{ 
-	return ZSTD_isError(code); 
-} 
- 
-size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, 
-                     const void* src, size_t compressedSize) 
-{ 
-	return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize); 
-} 
- 
-ZSTDv03_Dctx* ZSTDv03_createDCtx(void) 
-{ 
-	return (ZSTDv03_Dctx*)ZSTD_createDCtx(); 
-} 
- 
-size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx) 
-{ 
-	return ZSTD_freeDCtx((ZSTD_DCtx*)dctx); 
-} 
- 
-size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx) 
-{ 
-	return ZSTD_resetDCtx((ZSTD_DCtx*)dctx); 
-} 
- 
-size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx) 
-{ 
-	return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx); 
-} 
- 
-size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-	return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize); 
-} 
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+*  MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*/
+#define ZSTD_MEMORY_USAGE 17
+
+/*!
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0, fastest), or in memory heap (1, requires malloc())
+ * Note that compression context is fairly large, as a consequence heap memory is recommended.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif /* ZSTD_HEAPMODE */
+
+/*!
+*  LEGACY_SUPPORT :
+*  decompressor can decode older formats (starting from Zstd 0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+#  define ZSTD_LEGACY_SUPPORT 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef __AVX2__
+#  include <immintrin.h>   /* AVX2 intrinsics */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *******************************************************
+*  Constants
+*********************************************************/
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define WORKPLACESIZE (BLOCKSIZE*3)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff   31
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MAX(a,b) ((a)<(b)?(b):(a))
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do COPY8(op, ip) while (op < oend);
+}
+
+
+/* **************************************
+*  Local structures
+****************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+} seqStore_t;
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************
+*  Function body to include
+***************************************/
+static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
+
+MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (MEM_isLittleEndian())
+    {
+        if (MEM_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (MEM_32bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (size_t)(pIn - pStart);
+    }
+
+    if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+struct ZSTD_DCtx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    void* previousDstEnd;
+    void* base;
+    size_t expected;
+    blockType_t bType;
+    U32 phase;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize )*/
+static size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                          const void* src, size_t srcSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* const istart = (const BYTE* const)src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    default:
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+				if (litSize > srcSize-3) return ERROR(corruption_detected);
+				memcpy(dctx->litBuffer, istart, litSize);
+				dctx->litPtr = dctx->litBuffer;
+				dctx->litBufSize = BLOCKSIZE;
+				dctx->litSize = litSize;
+				return litSize+3;
+			}
+			/* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;
+        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    seqState->prevOffset = seq->offset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const size_t offsetPrefix[MaxOff+1] = {  /* note : size_t faster than U32 */
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit,
+                                BYTE* const base, BYTE* const oend)
+{
+    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    const BYTE* const ostart = op;
+    BYTE* const oLitEnd = op + sequence.litLength;
+    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+
+    /* checks */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit-8) return ERROR(corruption_detected);   /* overRead beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    {
+        const BYTE* match = op - sequence.offset;
+
+        /* check */
+        if (sequence.offset > (size_t)op) return ERROR(corruption_detected);   /* address space overflow test (this test seems kept by clang optimizer) */
+        //if (match > op) return ERROR(corruption_detected);   /* address space overflow test (is clang optimizer removing this test ?) */
+        if (match < base) return ERROR(corruption_detected);
+
+        /* close range match, overlap */
+        if (sequence.offset < 8)
+        {
+            const int dec64 = dec64table[sequence.offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[sequence.offset];
+            ZSTD_copy4(op+4, match);
+            match -= dec64;
+        }
+        else
+        {
+            ZSTD_copy8(op, match);
+        }
+        op += 8; match += 8;
+
+        if (oMatchEnd > oend-12)
+        {
+            if (op < oend_8)
+            {
+                ZSTD_wildcopy(op, match, oend_8 - op);
+                match += oend_8 - op;
+                op = oend_8;
+            }
+            while (op < oMatchEnd) *op++ = *match++;
+        }
+        else
+        {
+            ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+        }
+    }
+
+    return oMatchEnd - ostart;
+}
+
+static size_t ZSTD_decompressSequences(
+                               void* ctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx;
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litMax = litPtr + dctx->litBufSize;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    BYTE* const base = (BYTE*) (dctx->base);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = sequence.offset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litMax, base, oend);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
+        if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressBlock(
+                            void* ctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    ZSTD_DCtx ctx;
+    ctx.base = dst;
+    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/*******************************
+*  Streaming Decompression API
+*******************************/
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize;
+    dctx->phase = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    if (dst != ctx->previousDstEnd)  /* not contiguous */
+        ctx->base = dst;
+
+    /* Decompress : frame header */
+    if (ctx->phase == 0)
+    {
+        /* Check frame magic header */
+        U32 magicNumber = MEM_readLE32(src);
+        if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        return 0;
+    }
+
+    /* Decompress : block header */
+    if (ctx->phase == 1)
+    {
+        blockProperties_t bp;
+        size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->phase = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize;
+            ctx->bType = bp.blockType;
+            ctx->phase = 2;
+        }
+
+        return 0;
+    }
+
+    /* Decompress : block content */
+    {
+        size_t rSize;
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet handled */
+            break;
+        case bt_end :   /* should never happen (filtered at phase 1) */
+            rSize = 0;
+            break;
+        default:
+            return ERROR(GENERIC);
+        }
+        ctx->phase = 1;
+        ctx->expected = ZSTD_blockHeaderSize;
+        ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
+        return rSize;
+    }
+
+}
+
+
+/* wrapper layer */
+
+unsigned ZSTDv03_isError(size_t code)
+{
+	return ZSTD_isError(code);
+}
+
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize)
+{
+	return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
+}
+
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
+{
+	return (ZSTDv03_Dctx*)ZSTD_createDCtx();
+}
+
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx)
+{
+	return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx)
+{
+	return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx)
+{
+	return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
+}
+
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+	return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
+}
diff --git a/contrib/libs/zstd06/legacy/zstd_v03.h b/contrib/libs/zstd06/legacy/zstd_v03.h
index 7607f343c9..38b1f6eb98 100644
--- a/contrib/libs/zstd06/legacy/zstd_v03.h
+++ b/contrib/libs/zstd06/legacy/zstd_v03.h
@@ -1,100 +1,100 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd_v03 - decoder for 0.3 format 
-    Header File 
-    Copyright (C) 2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#pragma once 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* ************************************* 
-*  Simple one-step function 
-***************************************/ 
-/** 
-ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format 
-    compressedSize : is the exact source size 
-    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. 
-                      It must be equal or larger than originalSize, otherwise decompression will fail. 
-    return : the number of bytes decompressed into destination buffer (originalSize) 
-             or an errorCode if it fails (which can be tested using ZSTDv01_isError()) 
-*/ 
-size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, 
-                     const void* src, size_t compressedSize); 
- 
-/** 
-ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error 
-*/ 
-unsigned ZSTDv03_isError(size_t code); 
- 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx; 
-ZSTDv03_Dctx* ZSTDv03_createDCtx(void); 
-size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx); 
- 
-size_t ZSTDv03_decompressDCtx(void* ctx, 
-                              void* dst, size_t maxOriginalSize, 
-                        const void* src, size_t compressedSize); 
- 
-/* ************************************* 
-*  Streaming functions 
-***************************************/ 
-size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx); 
- 
-size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx); 
-size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); 
-/** 
-  Use above functions alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. 
-  Result is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
-*/ 
- 
-/* ************************************* 
-*  Prefix - version detection 
-***************************************/ 
-#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
+/*
+    zstd_v03 - decoder for 0.3 format
+    Header File
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
+*/
+unsigned ZSTDv03_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
+ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
+size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_decompressDCtx(void* ctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+/* *************************************
+*  Streaming functions
+***************************************/
+size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
+
+size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
+size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/contrib/libs/zstd06/legacy/zstd_v04.c b/contrib/libs/zstd06/legacy/zstd_v04.c
index 0c72bcf43c..66a47e7a12 100644
--- a/contrib/libs/zstd06/legacy/zstd_v04.c
+++ b/contrib/libs/zstd06/legacy/zstd_v04.c
@@ -1,3625 +1,3625 @@
-/* ****************************************************************** 
-   zstd_v04.c 
-   Decompression module for ZSTD v0.4 legacy format 
-   Copyright (C) 2016, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - Homepage : http://www.zstd.net/ 
-****************************************************************** */ 
- 
-/*- Dependencies -*/ 
-#include "zstd_v04.h" 
- 
- 
-/* ****************************************************************** 
-   mem.h 
-   low-level memory access routines 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef MEM_H_MODULE 
-#define MEM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/****************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include <string.h>    /* memcpy */ 
- 
- 
-/****************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define MEM_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define MEM_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define MEM_STATIC static __inline 
-#else 
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/**************************************************************** 
-*  Basic Types 
-*****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-# include <stdint.h> 
-  typedef  uint8_t BYTE; 
-  typedef uint16_t U16; 
-  typedef  int16_t S16; 
-  typedef uint32_t U32; 
-  typedef  int32_t S32; 
-  typedef uint64_t U64; 
-  typedef  int64_t S64; 
-#else 
-  typedef unsigned char       BYTE; 
-  typedef unsigned short      U16; 
-  typedef   signed short      S16; 
-  typedef unsigned int        U32; 
-  typedef   signed int        S32; 
-  typedef unsigned long long  U64; 
-  typedef   signed long long  S64; 
-#endif 
- 
- 
-/**************************************************************** 
-*  Memory I/O 
-*****************************************************************/ 
-/* MEM_FORCE_MEMORY_ACCESS 
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 
- * The below switch allow to select different access method for improved performance. 
- * Method 0 (default) : use `memcpy()`. Safe and portable. 
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 
- * Method 2 : direct access. This method is portable but violate C standard. 
- *            It can generate buggy code on targets generating assembly depending on alignment. 
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 
- * Prefer these methods in priority order (0 > 1 > 2) 
- */ 
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ 
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 
-#    define MEM_FORCE_MEMORY_ACCESS 2 
-#  elif defined(__INTEL_COMPILER) || \ 
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 
-#    define MEM_FORCE_MEMORY_ACCESS 1 
-#  endif 
-#endif 
- 
-MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } 
-MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } 
- 
-MEM_STATIC unsigned MEM_isLittleEndian(void) 
-{ 
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) 
- 
-/* violates C standard on structure alignment. 
-Only use if no other choice to achieve best performance on target platform */ 
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } 
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } 
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } 
- 
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) 
- 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 
-/* currently only defined for gcc and icc */ 
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 
- 
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } 
- 
-#else 
- 
-/* default method, safe and standard. 
-   can sometimes prove slower */ 
- 
-MEM_STATIC U16 MEM_read16(const void* memPtr) 
-{ 
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U32 MEM_read32(const void* memPtr) 
-{ 
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U64 MEM_read64(const void* memPtr) 
-{ 
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-#endif // MEM_FORCE_MEMORY_ACCESS 
- 
- 
-MEM_STATIC U16 MEM_readLE16(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read16(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)(p[0] + (p[1]<<8)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write16(memPtr, val); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val; 
-        p[1] = (BYTE)(val>>8); 
-    } 
-} 
- 
-MEM_STATIC U32 MEM_readLE32(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read32(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write32(memPtr, val32); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val32; 
-        p[1] = (BYTE)(val32>>8); 
-        p[2] = (BYTE)(val32>>16); 
-        p[3] = (BYTE)(val32>>24); 
-    } 
-} 
- 
-MEM_STATIC U64 MEM_readLE64(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read64(memPtr); 
-    else 
-    { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 
-                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) 
-{ 
-    if (MEM_isLittleEndian()) 
-    { 
-        MEM_write64(memPtr, val64); 
-    } 
-    else 
-    { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val64; 
-        p[1] = (BYTE)(val64>>8); 
-        p[2] = (BYTE)(val64>>16); 
-        p[3] = (BYTE)(val64>>24); 
-        p[4] = (BYTE)(val64>>32); 
-        p[5] = (BYTE)(val64>>40); 
-        p[6] = (BYTE)(val64>>48); 
-        p[7] = (BYTE)(val64>>56); 
-    } 
-} 
- 
-MEM_STATIC size_t MEM_readLEST(const void* memPtr) 
-{ 
-    if (MEM_32bits()) 
-        return (size_t)MEM_readLE32(memPtr); 
-    else 
-        return (size_t)MEM_readLE64(memPtr); 
-} 
- 
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) 
-{ 
-    if (MEM_32bits()) 
-        MEM_writeLE32(memPtr, (U32)val); 
-    else 
-        MEM_writeLE64(memPtr, (U64)val); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* MEM_H_MODULE */ 
- 
-/* ****************************************************************** 
-   Error codes list 
-   Copyright (C) 2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/zstd 
-****************************************************************** */ 
-#ifndef ERROR_PUBLIC_H_MODULE 
-#define ERROR_PUBLIC_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* **************************************** 
-*  error list 
-******************************************/ 
-enum { 
-  ZSTD_error_No_Error, 
-  ZSTD_error_GENERIC, 
-  ZSTD_error_prefix_unknown, 
-  ZSTD_error_frameParameter_unsupported, 
-  ZSTD_error_frameParameter_unsupportedBy32bitsImplementation, 
-  ZSTD_error_init_missing, 
-  ZSTD_error_memory_allocation, 
-  ZSTD_error_stage_wrong, 
-  ZSTD_error_dstSize_tooSmall, 
-  ZSTD_error_srcSize_wrong, 
-  ZSTD_error_corruption_detected, 
-  ZSTD_error_tableLog_tooLarge, 
-  ZSTD_error_maxSymbolValue_tooLarge, 
-  ZSTD_error_maxSymbolValue_tooSmall, 
-  ZSTD_error_maxCode 
-}; 
- 
-/* note : functions provide error codes in reverse negative order, 
-          so compare with (size_t)(0-enum) */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_PUBLIC_H_MODULE */ 
- 
- 
- 
-/* 
-    zstd - standard compression library 
-    Header File for static linking only 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#ifndef ZSTD_STATIC_H 
-#define ZSTD_STATIC_H 
- 
-/* The objects defined into this file shall be considered experimental. 
- * They are not considered stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risks of future changes. 
- */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Types 
-***************************************/ 
-#define ZSTD_WINDOWLOG_MAX 26 
-#define ZSTD_WINDOWLOG_MIN 18 
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11 
-#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1) 
-#define ZSTD_CONTENTLOG_MIN 4 
-#define ZSTD_HASHLOG_MAX 28 
-#define ZSTD_HASHLOG_MIN 4 
-#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) 
-#define ZSTD_SEARCHLOG_MIN 1 
-#define ZSTD_SEARCHLENGTH_MAX 7 
-#define ZSTD_SEARCHLENGTH_MIN 4 
- 
-/** from faster to stronger */ 
-typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy; 
- 
-typedef struct 
-{ 
-    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */ 
-    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */ 
-    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */ 
-    U32 hashLog;       /* dispatch table : larger == more memory, faster */ 
-    U32 searchLog;     /* nb of searches : larger == more compression, slower */ 
-    U32 searchLength;  /* size of matches : larger == faster decompression, sometimes less compression */ 
-    ZSTD_strategy strategy; 
-} ZSTD_parameters; 
- 
-typedef ZSTDv04_Dctx ZSTD_DCtx; 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-/** ZSTD_decompress_usingDict 
-*   Same as ZSTD_decompressDCtx, using a Dictionary content as prefix 
-*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ 
-static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, 
-                                             void* dst, size_t maxDstSize, 
-                                       const void* src, size_t srcSize, 
-                                       const void* dict,size_t dictSize); 
- 
- 
-/* ************************************** 
-*  Streaming functions (direct mode) 
-****************************************/ 
-static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx); 
-static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize); 
-static void   ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize); 
- 
-static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); 
-static size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); 
- 
-/** 
-  Streaming decompression, bufferless mode 
- 
-  A ZSTD_DCtx object is required to track streaming operations. 
-  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. 
-  A ZSTD_DCtx object can be re-used multiple times. Use ZSTD_resetDCtx() to return to fresh status. 
- 
-  First operation is to retrieve frame parameters, using ZSTD_getFrameParams(). 
-  This function doesn't consume its input. It needs enough input data to properly decode the frame header. 
-  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding. 
-  Result : 0 when successful, it means the ZSTD_parameters structure has been filled. 
-           >0 : means there is not enough data into src. Provides the expected size to successfully decode header. 
-           errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header) 
- 
-  Then, you can optionally insert a dictionary. 
-  This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted. 
- 
-  Then it's possible to start decompression. 
-  Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail. 
-  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog). 
-  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible. 
- 
-  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
- 
-  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. 
-  Context can then be reset to start a new decompression. 
-*/ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/zstd 
-****************************************************************** */ 
-/* Note : this module is expected to remain private, do not expose it */ 
- 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* ***************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>        /* size_t, ptrdiff_t */ 
- 
- 
-/* ***************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/* ***************************************** 
-*  Error Codes 
-******************************************/ 
-#define PREFIX(name) ZSTD_error_##name 
- 
-#ifdef ERROR 
-#  undef ERROR   /* reported already defined on VS 2015 by Rich Geldreich */ 
-#endif 
-#define ERROR(name) (size_t)-PREFIX(name) 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
- 
-/* ***************************************** 
-*  Error Strings 
-******************************************/ 
- 
-ERR_STATIC const char* ERR_getErrorName(size_t code) 
-{ 
-    static const char* codeError = "Unspecified error code"; 
-    switch( (size_t)(0-code) ) 
-    { 
-    case ZSTD_error_No_Error: return "No error detected"; 
-    case ZSTD_error_GENERIC:  return "Error (generic)"; 
-    case ZSTD_error_prefix_unknown: return "Unknown frame descriptor"; 
-    case ZSTD_error_frameParameter_unsupported: return "Unsupported frame parameter"; 
-    case ZSTD_error_frameParameter_unsupportedBy32bitsImplementation: return "Frame parameter unsupported in 32-bits mode"; 
-    case ZSTD_error_init_missing: return "Context should be init first"; 
-    case ZSTD_error_memory_allocation: return "Allocation error : not enough memory"; 
-    case ZSTD_error_dstSize_tooSmall: return "Destination buffer is too small"; 
-    case ZSTD_error_srcSize_wrong: return "Src size incorrect"; 
-    case ZSTD_error_corruption_detected: return "Corrupted block detected"; 
-    case ZSTD_error_tableLog_tooLarge: return "tableLog requires too much memory"; 
-    case ZSTD_error_maxSymbolValue_tooLarge: return "Unsupported max possible Symbol Value : too large"; 
-    case ZSTD_error_maxSymbolValue_tooSmall: return "Specified maxSymbolValue is too small"; 
-    case ZSTD_error_maxCode: 
-    default: return codeError; 
-    } 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
- 
- 
-#endif  /* ZSTD_STATIC_H */ 
- 
- 
-/* 
-    zstd_internal - common functions to include 
-    Header File for include 
-    Copyright (C) 2014-2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#ifndef ZSTD_CCOMMON_H_MODULE 
-#define ZSTD_CCOMMON_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Common macros 
-***************************************/ 
-#define MIN(a,b) ((a)<(b) ? (a) : (b)) 
-#define MAX(a,b) ((a)>(b) ? (a) : (b)) 
- 
- 
-/* ************************************* 
-*  Common constants 
-***************************************/ 
-#define ZSTD_MAGICNUMBER 0xFD2FB524   /* v0.4 */ 
- 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define BLOCKSIZE (128 KB)                 /* define, for static allocation */ 
- 
-static const size_t ZSTD_blockHeaderSize = 3; 
-static const size_t ZSTD_frameHeaderSize_min = 5; 
-#define ZSTD_frameHeaderSize_max 5         /* define, for static allocation */ 
- 
-#define BIT7 128 
-#define BIT6  64 
-#define BIT5  32 
-#define BIT4  16 
-#define BIT1   2 
-#define BIT0   1 
- 
-#define IS_RAW BIT0 
-#define IS_RLE BIT1 
- 
-#define MINMATCH 4 
-#define REPCODE_STARTVALUE 4 
- 
-#define MLbits   7 
-#define LLbits   6 
-#define Offbits  5 
-#define MaxML  ((1<<MLbits) - 1) 
-#define MaxLL  ((1<<LLbits) - 1) 
-#define MaxOff ((1<<Offbits)- 1) 
-#define MLFSELog   10 
-#define LLFSELog   10 
-#define OffFSELog   9 
-#define MaxSeq MAX(MaxLL, MaxML) 
- 
-#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/) 
-#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE) 
- 
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 
- 
- 
-/* ****************************************** 
-*  Shared functions to include for inlining 
-********************************************/ 
-static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 
- 
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 
- 
-/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */ 
-static void ZSTD_wildcopy(void* dst, const void* src, size_t length) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + length; 
-    do 
-        COPY8(op, ip) 
-    while (op < oend); 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
- 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   header file 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef FSE_H 
-#define FSE_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* ***************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
- 
- 
-/* ***************************************** 
-*  FSE simple functions 
-******************************************/ 
-static size_t FSE_decompress(void* dst,  size_t maxDstSize, 
-                const void* cSrc, size_t cSrcSize); 
-/*! 
-FSE_decompress(): 
-    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', 
-    into already allocated destination buffer 'dst', of size 'maxDstSize'. 
-    return : size of regenerated data (<= maxDstSize) 
-             or an error code, which can be tested using FSE_isError() 
- 
-    ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!! 
-    Why ? : making this distinction requires a header. 
-    Header management is intentionally delegated to the user layer, which can better manage special cases. 
-*/ 
- 
- 
-/* ***************************************** 
-*  Tool functions 
-******************************************/ 
-/* Error Management */ 
-static unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */ 
- 
- 
- 
-/* ***************************************** 
-*  FSE detailed API 
-******************************************/ 
-/*! 
-FSE_compress() does the following: 
-1. count symbol occurrence from source[] into table count[] 
-2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 
-3. save normalized counters to memory buffer using writeNCount() 
-4. build encoding table 'CTable' from normalized counters 
-5. encode the data stream using encoding table 'CTable' 
- 
-FSE_decompress() does the following: 
-1. read normalized counters with readNCount() 
-2. build decoding table 'DTable' from normalized counters 
-3. decode the data stream using decoding table 'DTable' 
- 
-The following API allows targeting specific sub-functions for advanced tasks. 
-For example, it's possible to compress several blocks using the same 'CTable', 
-or to save and provide normalized distribution using external method. 
-*/ 
- 
- 
-/* *** DECOMPRESSION *** */ 
- 
-/*! 
-FSE_readNCount(): 
-   Read compactly saved 'normalizedCounter' from 'rBuffer'. 
-   return : size read from 'rBuffer' 
-            or an errorCode, which can be tested using FSE_isError() 
-            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ 
-static  size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); 
- 
-/*! 
-Constructor and Destructor of type FSE_DTable 
-    Note that its size depends on 'tableLog' */ 
-typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
- 
-/*! 
-FSE_buildDTable(): 
-   Builds 'dt', which must be already allocated, using FSE_createDTable() 
-   return : 0, 
-            or an errorCode, which can be tested using FSE_isError() */ 
-static size_t FSE_buildDTable ( FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); 
- 
-/*! 
-FSE_decompress_usingDTable(): 
-   Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt' 
-   into 'dst' which must be already allocated. 
-   return : size of regenerated data (necessarily <= maxDstSize) 
-            or an errorCode, which can be tested using FSE_isError() */ 
-static  size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); 
- 
-/*! 
-Tutorial : 
----------- 
-(Note : these functions only decompress FSE-compressed blocks. 
- If block is uncompressed, use memcpy() instead 
- If block is a single repeated byte, use memset() instead ) 
- 
-The first step is to obtain the normalized frequencies of symbols. 
-This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). 
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. 
-In practice, that means it's necessary to know 'maxSymbolValue' beforehand, 
-or size the table to handle worst case situations (typically 256). 
-FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. 
-The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. 
-Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. 
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). 
- 
-The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. 
-This is performed by the function FSE_buildDTable(). 
-The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). 
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). 
- 
-'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable(). 
-'cSrcSize' must be strictly correct, otherwise decompression will fail. 
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize). 
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) 
-*/ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* FSE_H */ 
- 
- 
-/* ****************************************************************** 
-   bitstream 
-   Part of NewGen Entropy library 
-   header file (to include) 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef BITSTREAM_H_MODULE 
-#define BITSTREAM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* 
-*  This API consists of small unitary functions, which highly benefit from being inlined. 
-*  Since link-time-optimization is not available for all compilers, 
-*  these functions are defined into a .h to be included. 
-*/ 
- 
-/********************************************** 
-*  bitStream decompression API (read backward) 
-**********************************************/ 
-typedef struct 
-{ 
-    size_t   bitContainer; 
-    unsigned bitsConsumed; 
-    const char* ptr; 
-    const char* start; 
-} BIT_DStream_t; 
- 
-typedef enum { BIT_DStream_unfinished = 0, 
-               BIT_DStream_endOfBuffer = 1, 
-               BIT_DStream_completed = 2, 
-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */ 
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ 
- 
-MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 
-MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); 
- 
- 
-/* 
-* Start by invoking BIT_initDStream(). 
-* A chunk of the bitStream is then stored into a local register. 
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 
-* You can then retrieve bitFields stored into the local register, **in reverse order**. 
-* Local register is manually filled from memory by the BIT_reloadDStream() method. 
-* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. 
-* Otherwise, it can be less than that, so proceed accordingly. 
-* Checking if DStream has reached its end can be performed with BIT_endOfDStream() 
-*/ 
- 
- 
-/****************************************** 
-*  unsafe API 
-******************************************/ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); 
-/* faster, but works only if nbBits >= 1 */ 
- 
- 
- 
-/**************************************************************** 
-*  Helper functions 
-****************************************************************/ 
-MEM_STATIC unsigned BIT_highbit32 (register U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse ( &r, val ); 
-    return (unsigned) r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */ 
-    return 31 - __builtin_clz (val); 
-#   else   /* Software version */ 
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    unsigned r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
-/********************************************************** 
-* bitStream decoding 
-**********************************************************/ 
- 
-/*!BIT_initDStream 
-*  Initialize a BIT_DStream_t. 
-*  @bitD : a pointer to an already allocated BIT_DStream_t structure 
-*  @srcBuffer must point at the beginning of a bitStream 
-*  @srcSize must be the exact size of the bitStream 
-*  @result : size of stream (== srcSize) or an errorCode if a problem is detected 
-*/ 
-MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 
-{ 
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } 
- 
-    if (srcSize >=  sizeof(size_t))   /* normal case */ 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t); 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32); 
-    } 
-    else 
-    { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = bitD->start; 
-        bitD->bitContainer = *(const BYTE*)(bitD->start); 
-        switch(srcSize) 
-        { 
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; 
-            default:; 
-        } 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BIT_highbit32(contain32); 
-        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 
-    } 
- 
-    return srcSize; 
-} 
- 
-/*!BIT_lookBits 
- * Provides next n bits from local register 
- * local register is not modified (bits are still present for next read/look) 
- * On 32-bits, maxNbBits==25 
- * On 64-bits, maxNbBits==57 
- * @return : value extracted 
- */ 
-MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 
-} 
- 
-/*! BIT_lookBitsFast : 
-*   unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 
-} 
- 
-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    bitD->bitsConsumed += nbBits; 
-} 
- 
-/*!BIT_readBits 
- * Read next n bits from local register. 
- * pay attention to not read more than nbBits contained into local register. 
- * @return : extracted value. 
- */ 
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BIT_lookBits(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-/*!BIT_readBitsFast : 
-*  unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BIT_lookBitsFast(bitD, nbBits); 
-    BIT_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 
-{ 
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */ 
-		return BIT_DStream_overflow; 
- 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 
-    { 
-        bitD->ptr -= bitD->bitsConsumed >> 3; 
-        bitD->bitsConsumed &= 7; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        return BIT_DStream_unfinished; 
-    } 
-    if (bitD->ptr == bitD->start) 
-    { 
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; 
-        return BIT_DStream_completed; 
-    } 
-    { 
-        U32 nbBytes = bitD->bitsConsumed >> 3; 
-        BIT_DStream_status result = BIT_DStream_unfinished; 
-        if (bitD->ptr - nbBytes < bitD->start) 
-        { 
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */ 
-            result = BIT_DStream_endOfBuffer; 
-        } 
-        bitD->ptr -= nbBytes; 
-        bitD->bitsConsumed -= nbBytes*8; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */ 
-        return result; 
-    } 
-} 
- 
-/*! BIT_endOfDStream 
-*   @return Tells if DStream has reached its exact end 
-*/ 
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) 
-{ 
-    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* BITSTREAM_H_MODULE */ 
- 
- 
- 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef FSE_STATIC_H 
-#define FSE_STATIC_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* ***************************************** 
-*  Static allocation 
-*******************************************/ 
-/* FSE buffer bounds */ 
-#define FSE_NCOUNTBOUND 512 
-#define FSE_BLOCKBOUND(size) (size + (size>>7)) 
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */ 
- 
-/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */ 
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) 
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog)) 
- 
- 
-/* ***************************************** 
-*  FSE advanced API 
-*******************************************/ 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); 
-/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ 
- 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); 
-/* build a fake FSE_DTable, designed to always generate the same symbolValue */ 
- 
- 
- 
-/* ***************************************** 
-*  FSE symbol decompression API 
-*******************************************/ 
-typedef struct 
-{ 
-    size_t      state; 
-    const void* table;   /* precise table may vary, depending on U16 */ 
-} FSE_DState_t; 
- 
- 
-static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); 
- 
-static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
- 
-static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); 
- 
-/*! 
-Let's now decompose FSE_decompress_usingDTable() into its unitary components. 
-You will decode FSE-encoded symbols from the bitStream, 
-and also any other bitFields you put in, **in reverse order**. 
- 
-You will need a few variables to track your bitStream. They are : 
- 
-BIT_DStream_t DStream;    // Stream context 
-FSE_DState_t  DState;     // State context. Multiple ones are possible 
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable() 
- 
-The first thing to do is to init the bitStream. 
-    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); 
- 
-You should then retrieve your initial state(s) 
-(in reverse flushing order if you have several ones) : 
-    errorCode = FSE_initDState(&DState, &DStream, DTablePtr); 
- 
-You can then decode your data, symbol after symbol. 
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. 
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). 
-    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); 
- 
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) 
-Note : maximum allowed nbBits is 25, for 32-bits compatibility 
-    size_t bitField = BIT_readBits(&DStream, nbBits); 
- 
-All above operations only read from local register (which size depends on size_t). 
-Refueling the register from memory is manually performed by the reload method. 
-    endSignal = FSE_reloadDStream(&DStream); 
- 
-BIT_reloadDStream() result tells if there is still some more data to read from DStream. 
-BIT_DStream_unfinished : there is still some data left into the DStream. 
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. 
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. 
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. 
- 
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, 
-to properly detect the exact end of stream. 
-After each decoded symbol, check if DStream is fully consumed using this simple test : 
-    BIT_reloadDStream(&DStream) >= BIT_DStream_completed 
- 
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. 
-Checking if DStream has reached its end is performed by : 
-    BIT_endOfDStream(&DStream); 
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. 
-    FSE_endOfDState(&DState); 
-*/ 
- 
- 
-/* ***************************************** 
-*  FSE unsafe API 
-*******************************************/ 
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); 
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ 
- 
- 
-/* ***************************************** 
-*  Implementation of inlined functions 
-*******************************************/ 
-/* decompression */ 
- 
-typedef struct { 
-    U16 tableLog; 
-    U16 fastMode; 
-} FSE_DTableHeader;   /* sizeof U32 */ 
- 
-typedef struct 
-{ 
-    unsigned short newState; 
-    unsigned char  symbol; 
-    unsigned char  nbBits; 
-} FSE_decode_t;   /* size == U32 */ 
- 
-MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    memcpy(&DTableH, dt, sizeof(DTableH)); 
-    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); 
-    BIT_reloadDStream(bitD); 
-    DStatePtr->table = dt + 1; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32  nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BIT_readBits(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) 
-{ 
-    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32 nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BIT_readBitsFast(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state == 0; 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* FSE_STATIC_H */ 
- 
-/* ****************************************************************** 
-   FSE : Finite State Entropy coder 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-#ifndef FSE_COMMONDEFS_ONLY 
- 
-/* ************************************************************** 
-*  Tuning parameters 
-****************************************************************/ 
-/*!MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 
-#define FSE_MAX_MEMORY_USAGE 14 
-#define FSE_DEFAULT_MEMORY_USAGE 13 
- 
-/*!FSE_MAX_SYMBOL_VALUE : 
-*  Maximum symbol value authorized. 
-*  Required for proper stack allocation */ 
-#define FSE_MAX_SYMBOL_VALUE 255 
- 
- 
-/* ************************************************************** 
-*  template functions type & suffix 
-****************************************************************/ 
-#define FSE_FUNCTION_TYPE BYTE 
-#define FSE_FUNCTION_EXTENSION 
-#define FSE_DECODE_TYPE FSE_decode_t 
- 
- 
-#endif   /* !FSE_COMMONDEFS_ONLY */ 
- 
-/* ************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Dependencies 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
- 
-/* *************************************************************** 
-*  Constants 
-*****************************************************************/ 
-#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2) 
-#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 
-#define FSE_MIN_TABLELOG 5 
- 
-#define FSE_TABLELOG_ABSOLUTE_MAX 15 
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/* ************************************************************** 
-*  Complex types 
-****************************************************************/ 
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 
- 
- 
-/*-************************************************************** 
-*  Templates 
-****************************************************************/ 
-/* 
-  designed to be included 
-  for type-specific functions (template emulation in C) 
-  Objective is to write these functions only once, for improved maintenance 
-*/ 
- 
-/* safety checks */ 
-#ifndef FSE_FUNCTION_EXTENSION 
-#  error "FSE_FUNCTION_EXTENSION must be defined" 
-#endif 
-#ifndef FSE_FUNCTION_TYPE 
-#  error "FSE_FUNCTION_TYPE must be defined" 
-#endif 
- 
-/* Function names */ 
-#define FSE_CAT(X,Y) X##Y 
-#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 
-#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 
- 
-static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 
- 
- 
-static size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 
-{ 
-    FSE_DTableHeader DTableH; 
-    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */ 
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); 
-    const U32 tableSize = 1 << tableLog; 
-    const U32 tableMask = tableSize-1; 
-    const U32 step = FSE_tableStep(tableSize); 
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 
-    U32 position = 0; 
-    U32 highThreshold = tableSize-1; 
-    const S16 largeLimit= (S16)(1 << (tableLog-1)); 
-    U32 noLarge = 1; 
-    U32 s; 
- 
-    /* Sanity Checks */ 
-    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); 
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 
- 
-    /* Init, lay down lowprob symbols */ 
-    DTableH.tableLog = (U16)tableLog; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        if (normalizedCounter[s]==-1) 
-        { 
-            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 
-            symbolNext[s] = 1; 
-        } 
-        else 
-        { 
-            if (normalizedCounter[s] >= largeLimit) noLarge=0; 
-            symbolNext[s] = normalizedCounter[s]; 
-        } 
-    } 
- 
-    /* Spread symbols */ 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        int i; 
-        for (i=0; i<normalizedCounter[s]; i++) 
-        { 
-            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 
-            position = (position + step) & tableMask; 
-            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ 
-        } 
-    } 
- 
-    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 
- 
-    /* Build Decoding table */ 
-    { 
-        U32 i; 
-        for (i=0; i<tableSize; i++) 
-        { 
-            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 
-            U16 nextState = symbolNext[symbol]++; 
-            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); 
-            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 
-        } 
-    } 
- 
-    DTableH.fastMode = (U16)noLarge; 
-    memcpy(dt, &DTableH, sizeof(DTableH)); 
-    return 0; 
-} 
- 
- 
-#ifndef FSE_COMMONDEFS_ONLY 
-/****************************************** 
-*  FSE helper functions 
-******************************************/ 
-static unsigned FSE_isError(size_t code) { return ERR_isError(code); } 
- 
- 
-/**************************************************************** 
-*  FSE NCount encoding-decoding 
-****************************************************************/ 
-static short FSE_abs(short a) 
-{ 
-    return a<0 ? -a : a; 
-} 
- 
-static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 
-                 const void* headerBuffer, size_t hbSize) 
-{ 
-    const BYTE* const istart = (const BYTE*) headerBuffer; 
-    const BYTE* const iend = istart + hbSize; 
-    const BYTE* ip = istart; 
-    int nbBits; 
-    int remaining; 
-    int threshold; 
-    U32 bitStream; 
-    int bitCount; 
-    unsigned charnum = 0; 
-    int previous0 = 0; 
- 
-    if (hbSize < 4) return ERROR(srcSize_wrong); 
-    bitStream = MEM_readLE32(ip); 
-    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */ 
-    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 
-    bitStream >>= 4; 
-    bitCount = 4; 
-    *tableLogPtr = nbBits; 
-    remaining = (1<<nbBits)+1; 
-    threshold = 1<<nbBits; 
-    nbBits++; 
- 
-    while ((remaining>1) && (charnum<=*maxSVPtr)) 
-    { 
-        if (previous0) 
-        { 
-            unsigned n0 = charnum; 
-            while ((bitStream & 0xFFFF) == 0xFFFF) 
-            { 
-                n0+=24; 
-                if (ip < iend-5) 
-                { 
-                    ip+=2; 
-                    bitStream = MEM_readLE32(ip) >> bitCount; 
-                } 
-                else 
-                { 
-                    bitStream >>= 16; 
-                    bitCount+=16; 
-                } 
-            } 
-            while ((bitStream & 3) == 3) 
-            { 
-                n0+=3; 
-                bitStream>>=2; 
-                bitCount+=2; 
-            } 
-            n0 += bitStream & 3; 
-            bitCount += 2; 
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 
-            while (charnum < n0) normalizedCounter[charnum++] = 0; 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-            { 
-                ip += bitCount>>3; 
-                bitCount &= 7; 
-                bitStream = MEM_readLE32(ip) >> bitCount; 
-            } 
-            else 
-                bitStream >>= 2; 
-        } 
-        { 
-            const short max = (short)((2*threshold-1)-remaining); 
-            short count; 
- 
-            if ((bitStream & (threshold-1)) < (U32)max) 
-            { 
-                count = (short)(bitStream & (threshold-1)); 
-                bitCount   += nbBits-1; 
-            } 
-            else 
-            { 
-                count = (short)(bitStream & (2*threshold-1)); 
-                if (count >= threshold) count -= max; 
-                bitCount   += nbBits; 
-            } 
- 
-            count--;   /* extra accuracy */ 
-            remaining -= FSE_abs(count); 
-            normalizedCounter[charnum++] = count; 
-            previous0 = !count; 
-            while (remaining < threshold) 
-            { 
-                nbBits--; 
-                threshold >>= 1; 
-            } 
- 
-            { 
-                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 
-                { 
-                    ip += bitCount>>3; 
-                    bitCount &= 7; 
-                } 
-                else 
-                { 
-                    bitCount -= (int)(8 * (iend - 4 - ip)); 
-					ip = iend - 4; 
-				} 
-                bitStream = MEM_readLE32(ip) >> (bitCount & 31); 
-            } 
-        } 
-    } 
-    if (remaining != 1) return ERROR(GENERIC); 
-    *maxSVPtr = charnum-1; 
- 
-    ip += (bitCount+7)>>3; 
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); 
-    return ip-istart; 
-} 
- 
- 
-/********************************************************* 
-*  Decompression (Byte symbols) 
-*********************************************************/ 
-static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    void* dPtr = dt + 1; 
-    FSE_decode_t* const cell = (FSE_decode_t*)dPtr; 
- 
-    DTableH->tableLog = 0; 
-    DTableH->fastMode = 0; 
- 
-    cell->newState = 0; 
-    cell->symbol = symbolValue; 
-    cell->nbBits = 0; 
- 
-    return 0; 
-} 
- 
- 
-static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 
-{ 
-    void* ptr = dt; 
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 
-    void* dPtr = dt + 1; 
-    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; 
-    const unsigned tableSize = 1 << nbBits; 
-    const unsigned tableMask = tableSize - 1; 
-    const unsigned maxSymbolValue = tableMask; 
-    unsigned s; 
- 
-    /* Sanity checks */ 
-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */ 
- 
-    /* Build Decoding Table */ 
-    DTableH->tableLog = (U16)nbBits; 
-    DTableH->fastMode = 1; 
-    for (s=0; s<=maxSymbolValue; s++) 
-    { 
-        dinfo[s].newState = 0; 
-        dinfo[s].symbol = (BYTE)s; 
-        dinfo[s].nbBits = (BYTE)nbBits; 
-    } 
- 
-    return 0; 
-} 
- 
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 
-          void* dst, size_t maxDstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const FSE_DTable* dt, const unsigned fast) 
-{ 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* op = ostart; 
-    BYTE* const omax = op + maxDstSize; 
-    BYTE* const olimit = omax-3; 
- 
-    BIT_DStream_t bitD; 
-    FSE_DState_t state1; 
-    FSE_DState_t state2; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */ 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    FSE_initDState(&state1, &bitD, dt); 
-    FSE_initDState(&state2, &bitD, dt); 
- 
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 
- 
-    /* 4 symbols per loop */ 
-    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) 
-    { 
-        op[0] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BIT_reloadDStream(&bitD); 
- 
-        op[1] = FSE_GETSYMBOL(&state2); 
- 
-        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } 
- 
-        op[2] = FSE_GETSYMBOL(&state1); 
- 
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BIT_reloadDStream(&bitD); 
- 
-        op[3] = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* tail */ 
-    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ 
-    while (1) 
-    { 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state1); 
- 
-        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 
-            break; 
- 
-        *op++ = FSE_GETSYMBOL(&state2); 
-    } 
- 
-    /* end ? */ 
-    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 
-        return op-ostart; 
- 
-    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */ 
- 
-    return ERROR(corruption_detected); 
-} 
- 
- 
-static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 
-                            const void* cSrc, size_t cSrcSize, 
-                            const FSE_DTable* dt) 
-{ 
-    FSE_DTableHeader DTableH; 
-    U32 fastMode; 
- 
-    memcpy(&DTableH, dt, sizeof(DTableH)); 
-    fastMode = DTableH.fastMode; 
- 
-    /* select fast mode (static) */ 
-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 
-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 
-} 
- 
- 
-static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    const BYTE* const istart = (const BYTE*)cSrc; 
-    const BYTE* ip = istart; 
-    short counting[FSE_MAX_SYMBOL_VALUE+1]; 
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */ 
-    unsigned tableLog; 
-    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 
-    size_t errorCode; 
- 
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */ 
- 
-    /* normal FSE decoding mode */ 
-    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 
-    if (FSE_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */ 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 
-    if (FSE_isError(errorCode)) return errorCode; 
- 
-    /* always return, even if it is an error code */ 
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 
-} 
- 
- 
- 
-#endif   /* FSE_COMMONDEFS_ONLY */ 
- 
- 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   header file 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef HUFF0_H 
-#define HUFF0_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* **************************************** 
-*  Dependency 
-******************************************/ 
-#include <stddef.h>    /* size_t */ 
- 
- 
-/* **************************************** 
-*  Huff0 simple functions 
-******************************************/ 
-static size_t HUF_decompress(void* dst,  size_t dstSize, 
-                const void* cSrc, size_t cSrcSize); 
-/*! 
-HUF_decompress(): 
-    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize', 
-    into already allocated destination buffer 'dst', of size 'dstSize'. 
-    'dstSize' must be the exact size of original (uncompressed) data. 
-    Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate. 
-    @return : size of regenerated data (== dstSize) 
-              or an error code, which can be tested using HUF_isError() 
-*/ 
- 
- 
-/* **************************************** 
-*  Tool functions 
-******************************************/ 
-/* Error Management */ 
-static unsigned    HUF_isError(size_t code);        /* tells if a return value is an error code */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif   /* HUFF0_H */ 
- 
- 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef HUFF0_STATIC_H 
-#define HUFF0_STATIC_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
- 
-/* **************************************** 
-*  Static allocation macros 
-******************************************/ 
-/* static allocation of Huff0's DTable */ 
-#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */ 
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ 
-        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ 
-        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \ 
-        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog } 
- 
- 
-/* **************************************** 
-*  Advanced decompression functions 
-******************************************/ 
-static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */ 
-static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */ 
- 
- 
-/* **************************************** 
-*  Huff0 detailed API 
-******************************************/ 
-/*! 
-HUF_decompress() does the following: 
-1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics 
-2. build Huffman table from save, using HUF_readDTableXn() 
-3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable 
- 
-*/ 
-static size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize); 
-static size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize); 
- 
-static size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable); 
-static size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* HUFF0_STATIC_H */ 
- 
- 
- 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-****************************************************************** */ 
- 
-/* ************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-/* inline is defined */ 
-#elif defined(_MSC_VER) 
-#  define inline __inline 
-#else 
-#  define inline /* disable inline */ 
-#endif 
- 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
- 
-/* ************************************************************** 
-*  Constants 
-****************************************************************/ 
-#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 
-#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ 
-#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */ 
-#define HUF_MAX_SYMBOL_VALUE 255 
-#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 
-#  error "HUF_MAX_TABLELOG is too large !" 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Error Management 
-****************************************************************/ 
-static unsigned HUF_isError(size_t code) { return ERR_isError(code); } 
-#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
- 
-/*-******************************************************* 
-*  Huff0 : Huffman block decompression 
-*********************************************************/ 
-typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */ 
- 
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */ 
- 
-typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 
- 
-/*! HUF_readStats 
-    Read compact Huffman tree, saved by HUF_writeCTable 
-    @huffWeight : destination buffer 
-    @return : size read from `src` 
-*/ 
-static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 
-                            U32* nbSymbolsPtr, U32* tableLogPtr, 
-                            const void* src, size_t srcSize) 
-{ 
-    U32 weightTotal; 
-    U32 tableLog; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    size_t oSize; 
-    U32 n; 
- 
-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */ 
- 
-    if (iSize >= 128)  /* special header */ 
-    { 
-        if (iSize >= (242))   /* RLE */ 
-        { 
-            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 
-            oSize = l[iSize-242]; 
-            memset(huffWeight, 1, hwSize); 
-            iSize = 0; 
-        } 
-        else   /* Incompressible */ 
-        { 
-            oSize = iSize - 127; 
-            iSize = ((oSize+1)/2); 
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-            if (oSize >= hwSize) return ERROR(corruption_detected); 
-            ip += 1; 
-            for (n=0; n<oSize; n+=2) 
-            { 
-                huffWeight[n]   = ip[n/2] >> 4; 
-                huffWeight[n+1] = ip[n/2] & 15; 
-            } 
-        } 
-    } 
-    else  /* header compressed with FSE (normal case) */ 
-    { 
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */ 
-        if (FSE_isError(oSize)) return oSize; 
-    } 
- 
-    /* collect weight stats */ 
-    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); 
-    weightTotal = 0; 
-    for (n=0; n<oSize; n++) 
-    { 
-        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-        rankStats[huffWeight[n]]++; 
-        weightTotal += (1 << huffWeight[n]) >> 1; 
-    } 
- 
-    /* get last non-null symbol weight (implied, total must be 2^n) */ 
-    tableLog = BIT_highbit32(weightTotal) + 1; 
-    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-    { 
-        U32 total = 1 << tableLog; 
-        U32 rest = total - weightTotal; 
-        U32 verif = 1 << BIT_highbit32(rest); 
-        U32 lastWeight = BIT_highbit32(rest) + 1; 
-        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */ 
-        huffWeight[oSize] = (BYTE)lastWeight; 
-        rankStats[lastWeight]++; 
-    } 
- 
-    /* check tree construction validity */ 
-    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */ 
- 
-    /* results */ 
-    *nbSymbolsPtr = (U32)(oSize+1); 
-    *tableLogPtr = tableLog; 
-    return iSize+1; 
-} 
- 
- 
-/**************************/ 
-/* single-symbol decoding */ 
-/**************************/ 
- 
-static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */ 
-    U32 tableLog = 0; 
-    size_t iSize; 
-    U32 nbSymbols = 0; 
-    U32 n; 
-    U32 nextRankStart; 
-    void* const dtPtr = DTable + 1; 
-    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 
- 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */ 
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */ 
-    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ 
- 
-    /* Prepare ranks */ 
-    nextRankStart = 0; 
-    for (n=1; n<=tableLog; n++) 
-    { 
-        U32 current = nextRankStart; 
-        nextRankStart += (rankVal[n] << (n-1)); 
-        rankVal[n] = current; 
-    } 
- 
-    /* fill DTable */ 
-    for (n=0; n<nbSymbols; n++) 
-    { 
-        const U32 w = huffWeight[n]; 
-        const U32 length = (1 << w) >> 1; 
-        U32 i; 
-        HUF_DEltX2 D; 
-        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 
-        for (i = rankVal[w]; i < rankVal[w] + length; i++) 
-            dt[i] = D; 
-        rankVal[w] += length; 
-    } 
- 
-    return iSize; 
-} 
- 
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) 
-{ 
-        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 
-        const BYTE c = dt[val].byte; 
-        BIT_skipBits(Dstream, dt[val].nbBits); 
-        return c; 
-} 
- 
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 
-    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 4 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) 
-    { 
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    /* no more data to retrieve from bitstream, hence no need to reload */ 
-    while (p < pEnd) 
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    return pEnd-pStart; 
-} 
- 
- 
-static size_t HUF_decompress4X2_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U16* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
-        const void* const dtPtr = DTable; 
-        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 
-        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 
-        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 
-        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
-    size_t errorCode; 
- 
-    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/***************************/ 
-/* double-symbols decoding */ 
-/***************************/ 
- 
-static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, 
-                           const U32* rankValOrigin, const int minWeight, 
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 
-                           U32 nbBitsBaseline, U16 baseSeq) 
-{ 
-    HUF_DEltX4 DElt; 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    U32 s; 
- 
-    /* get pre-calculated rankVal */ 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill skipped values */ 
-    if (minWeight>1) 
-    { 
-        U32 i, skipSize = rankVal[minWeight]; 
-        MEM_writeLE16(&(DElt.sequence), baseSeq); 
-        DElt.nbBits   = (BYTE)(consumed); 
-        DElt.length   = 1; 
-        for (i = 0; i < skipSize; i++) 
-            DTable[i] = DElt; 
-    } 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */ 
-    { 
-        const U32 symbol = sortedSymbols[s].symbol; 
-        const U32 weight = sortedSymbols[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 length = 1 << (sizeLog-nbBits); 
-        const U32 start = rankVal[weight]; 
-        U32 i = start; 
-        const U32 end = start + length; 
- 
-        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 
-        DElt.nbBits = (BYTE)(nbBits + consumed); 
-        DElt.length = 2; 
-        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */ 
- 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; 
- 
-static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, 
-                           const sortedSymbol_t* sortedList, const U32 sortedListSize, 
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 
-                           const U32 nbBitsBaseline) 
-{ 
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; 
-    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 
-    const U32 minBits  = nbBitsBaseline - maxWeight; 
-    U32 s; 
- 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++) 
-    { 
-        const U16 symbol = sortedList[s].symbol; 
-        const U32 weight = sortedList[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 start = rankVal[weight]; 
-        const U32 length = 1 << (targetLog-nbBits); 
- 
-        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */ 
-        { 
-            U32 sortedRank; 
-            int minWeight = nbBits + scaleLog; 
-            if (minWeight < 1) minWeight = 1; 
-            sortedRank = rankStart[minWeight]; 
-            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, 
-                           rankValOrigin[nbBits], minWeight, 
-                           sortedList+sortedRank, sortedListSize-sortedRank, 
-                           nbBitsBaseline, symbol); 
-        } 
-        else 
-        { 
-            U32 i; 
-            const U32 end = start + length; 
-            HUF_DEltX4 DElt; 
- 
-            MEM_writeLE16(&(DElt.sequence), symbol); 
-            DElt.nbBits   = (BYTE)(nbBits); 
-            DElt.length   = 1; 
-            for (i = start; i < end; i++) 
-                DTable[i] = DElt; 
-        } 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; 
-    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; 
-    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; 
-    U32* const rankStart = rankStart0+1; 
-    rankVal_t rankVal; 
-    U32 tableLog, maxW, sizeOfSort, nbSymbols; 
-    const U32 memLog = DTable[0]; 
-    size_t iSize; 
-    void* dtPtr = DTable; 
-    HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1; 
- 
-    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */ 
-    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); 
-    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUF_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */ 
- 
-    /* find maxWeight */ 
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) 
-        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */ 
- 
-    /* Get start index of each weight */ 
-    { 
-        U32 w, nextRankStart = 0; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankStart; 
-            nextRankStart += rankStats[w]; 
-            rankStart[w] = current; 
-        } 
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/ 
-        sizeOfSort = nextRankStart; 
-    } 
- 
-    /* sort symbols by weight */ 
-    { 
-        U32 s; 
-        for (s=0; s<nbSymbols; s++) 
-        { 
-            U32 w = weightList[s]; 
-            U32 r = rankStart[w]++; 
-            sortedSymbol[r].symbol = (BYTE)s; 
-            sortedSymbol[r].weight = (BYTE)w; 
-        } 
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */ 
-    } 
- 
-    /* Build rankVal */ 
-    { 
-        const U32 minBits = tableLog+1 - maxW; 
-        U32 nextRankVal = 0; 
-        U32 w, consumed; 
-        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */ 
-        U32* rankVal0 = rankVal[0]; 
-        for (w=1; w<=maxW; w++) 
-        { 
-            U32 current = nextRankVal; 
-            nextRankVal += rankStats[w] << (w+rescale); 
-            rankVal0[w] = current; 
-        } 
-        for (consumed = minBits; consumed <= memLog - minBits; consumed++) 
-        { 
-            U32* rankValPtr = rankVal[consumed]; 
-            for (w = 1; w <= maxW; w++) 
-            { 
-                rankValPtr[w] = rankVal0[w] >> consumed; 
-            } 
-        } 
-    } 
- 
-    HUF_fillDTableX4(dt, memLog, 
-                   sortedSymbol, sizeOfSort, 
-                   rankStart0, rankVal, maxW, 
-                   tableLog+1); 
- 
-    return iSize; 
-} 
- 
- 
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 2); 
-    BIT_skipBits(DStream, dt[val].nbBits); 
-    return dt[val].length; 
-} 
- 
-static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 1); 
-    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 
-    else 
-    { 
-        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) 
-        { 
-            BIT_skipBits(DStream, dt[val].nbBits); 
-            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 
-        } 
-    } 
-    return 1; 
-} 
- 
- 
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ 
-    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ 
-        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 8 symbols at a time */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) 
-    { 
-        HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_1(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr); 
- 
-    while (p <= pEnd-2) 
-        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */ 
- 
-    if (p < pEnd) 
-        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); 
- 
-    return p-pStart; 
-} 
- 
-static size_t HUF_decompress4X4_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U32* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
-        const void* const dtPtr = DTable; 
-        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BIT_DStream_t bitD1; 
-        BIT_DStream_t bitD2; 
-        BIT_DStream_t bitD3; 
-        BIT_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BIT_initDStream(&bitD1, istart1, length1); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD2, istart2, length2); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD3, istart3, length3); 
-        if (HUF_isError(errorCode)) return errorCode; 
-        errorCode = BIT_initDStream(&bitD4, istart4, length4); 
-        if (HUF_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) 
-        { 
-            HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_1(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_1(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_1(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_1(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUF_DECODE_SYMBOLX4_0(op1, &bitD1); 
-            HUF_DECODE_SYMBOLX4_0(op2, &bitD2); 
-            HUF_DECODE_SYMBOLX4_0(op3, &bitD3); 
-            HUF_DECODE_SYMBOLX4_0(op4, &bitD4); 
- 
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); 
-        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); 
-        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); 
-        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
- 
-    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); 
-    if (HUF_isError(hSize)) return hSize; 
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += hSize; 
-    cSrcSize -= hSize; 
- 
-    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/**********************************/ 
-/* Generic decompression selector */ 
-/**********************************/ 
- 
-typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 
-{ 
-    /* single, double, quad */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */ 
-    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */ 
-    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */ 
-    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */ 
-    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */ 
-    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */ 
-    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */ 
-    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */ 
-    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */ 
-    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */ 
-    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */ 
-    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */ 
-    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */ 
-    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */ 
-    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */ 
-}; 
- 
-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 
- 
-static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
+/* ******************************************************************
+   zstd_v04.c
+   Decompression module for ZSTD v0.4 legacy format
+   Copyright (C) 2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - Homepage : http://www.zstd.net/
+****************************************************************** */
+
+/*- Dependencies -*/
+#include "zstd_v04.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/******************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets generating assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // MEM_FORCE_MEMORY_ACCESS
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write32(memPtr, val32);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+    {
+        MEM_write64(memPtr, val64);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/* ******************************************************************
+   Error codes list
+   Copyright (C) 2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+#ifndef ERROR_PUBLIC_H_MODULE
+#define ERROR_PUBLIC_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  error list
+******************************************/
+enum {
+  ZSTD_error_No_Error,
+  ZSTD_error_GENERIC,
+  ZSTD_error_prefix_unknown,
+  ZSTD_error_frameParameter_unsupported,
+  ZSTD_error_frameParameter_unsupportedBy32bitsImplementation,
+  ZSTD_error_init_missing,
+  ZSTD_error_memory_allocation,
+  ZSTD_error_stage_wrong,
+  ZSTD_error_dstSize_tooSmall,
+  ZSTD_error_srcSize_wrong,
+  ZSTD_error_corruption_detected,
+  ZSTD_error_tableLog_tooLarge,
+  ZSTD_error_maxSymbolValue_tooLarge,
+  ZSTD_error_maxSymbolValue_tooSmall,
+  ZSTD_error_maxCode
+};
+
+/* note : functions provide error codes in reverse negative order,
+          so compare with (size_t)(0-enum) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_PUBLIC_H_MODULE */
+
+
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The objects defined into this file shall be considered experimental.
+ * They are not considered stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risks of future changes.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Types
+***************************************/
+#define ZSTD_WINDOWLOG_MAX 26
+#define ZSTD_WINDOWLOG_MIN 18
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
+#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CONTENTLOG_MIN 4
+#define ZSTD_HASHLOG_MAX 28
+#define ZSTD_HASHLOG_MIN 4
+#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN 1
+#define ZSTD_SEARCHLENGTH_MAX 7
+#define ZSTD_SEARCHLENGTH_MIN 4
+
+/** from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
+
+typedef struct
+{
+    U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
+    U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
+    U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
+    U32 hashLog;       /* dispatch table : larger == more memory, faster */
+    U32 searchLog;     /* nb of searches : larger == more compression, slower */
+    U32 searchLength;  /* size of matches : larger == faster decompression, sometimes less compression */
+    ZSTD_strategy strategy;
+} ZSTD_parameters;
+
+typedef ZSTDv04_Dctx ZSTD_DCtx;
+
+/* *************************************
+*  Advanced functions
+***************************************/
+/** ZSTD_decompress_usingDict
+*   Same as ZSTD_decompressDCtx, using a Dictionary content as prefix
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */
+static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                             void* dst, size_t maxDstSize,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
+static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
+static void   ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
+
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
+/**
+  Streaming decompression, bufferless mode
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times. Use ZSTD_resetDCtx() to return to fresh status.
+
+  First operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  This function doesn't consume its input. It needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)
+
+  Then, you can optionally insert a dictionary.
+  This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
+
+  Then it's possible to start decompression.
+  Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>        /* size_t, ptrdiff_t */
+
+
+/* *****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/* *****************************************
+*  Error Codes
+******************************************/
+#define PREFIX(name) ZSTD_error_##name
+
+#ifdef ERROR
+#  undef ERROR   /* reported already defined on VS 2015 by Rich Geldreich */
+#endif
+#define ERROR(name) (size_t)-PREFIX(name)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+
+/* *****************************************
+*  Error Strings
+******************************************/
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    switch( (size_t)(0-code) )
+    {
+    case ZSTD_error_No_Error: return "No error detected";
+    case ZSTD_error_GENERIC:  return "Error (generic)";
+    case ZSTD_error_prefix_unknown: return "Unknown frame descriptor";
+    case ZSTD_error_frameParameter_unsupported: return "Unsupported frame parameter";
+    case ZSTD_error_frameParameter_unsupportedBy32bitsImplementation: return "Frame parameter unsupported in 32-bits mode";
+    case ZSTD_error_init_missing: return "Context should be init first";
+    case ZSTD_error_memory_allocation: return "Allocation error : not enough memory";
+    case ZSTD_error_dstSize_tooSmall: return "Destination buffer is too small";
+    case ZSTD_error_srcSize_wrong: return "Src size incorrect";
+    case ZSTD_error_corruption_detected: return "Corrupted block detected";
+    case ZSTD_error_tableLog_tooLarge: return "tableLog requires too much memory";
+    case ZSTD_error_maxSymbolValue_tooLarge: return "Unsupported max possible Symbol Value : too large";
+    case ZSTD_error_maxSymbolValue_tooSmall: return "Specified maxSymbolValue is too small";
+    case ZSTD_error_maxCode:
+    default: return codeError;
+    }
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+
+
+#endif  /* ZSTD_STATIC_H */
+
+
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/* *************************************
+*  Common constants
+***************************************/
+#define ZSTD_MAGICNUMBER 0xFD2FB524   /* v0.4 */
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize_min = 5;
+#define ZSTD_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define IS_RAW BIT0
+#define IS_RLE BIT1
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 4
+
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
+#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/* ******************************************
+*  Shared functions to include for inlining
+********************************************/
+static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_H
+#define FSE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/* *****************************************
+*  FSE simple functions
+******************************************/
+static size_t FSE_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSE_isError()
+
+    ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+static unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+
+
+
+/* *****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*!
+FSE_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSE_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+static  size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSE_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+
+/*!
+FSE_buildDTable():
+   Builds 'dt', which must be already allocated, using FSE_createDTable()
+   return : 0,
+            or an errorCode, which can be tested using FSE_isError() */
+static size_t FSE_buildDTable ( FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSE_decompress_usingDTable():
+   Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt'
+   into 'dst' which must be already allocated.
+   return : size of regenerated data (necessarily <= maxDstSize)
+            or an errorCode, which can be tested using FSE_isError() */
+static  size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
+'cSrcSize' must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_H */
+
+
+/* ******************************************************************
+   bitstream
+   Part of NewGen Entropy library
+   header file (to include)
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/**********************************************
+*  bitStream decompression API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/*
+* Start by invoking BIT_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is manually filled from memory by the BIT_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BIT_endOfDStream()
+*/
+
+
+/******************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/****************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/**********************************************************
+* bitStream decoding
+**********************************************************/
+
+/*!BIT_initDStream
+*  Initialize a BIT_DStream_t.
+*  @bitD : a pointer to an already allocated BIT_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t))   /* normal case */
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BIT_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BIT_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BIT_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BIT_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BIT_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
+
+
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSE_STATIC_H
+#define FSE_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/* build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_STATIC_H */
+
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/*-**************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+static size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/******************************************
+*  FSE helper functions
+******************************************/
+static unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+
+/****************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a)
+{
+    return a<0 ? -a : a;
+}
+
+static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                if (ip < iend-5)
+                {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                }
+                else
+                {
+                    bitStream >>= 16;
+                    bitCount+=16;
+                }
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+            {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            {
+                if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
+                {
+                    ip += bitCount>>3;
+                    bitCount &= 7;
+                }
+                else
+                {
+                    bitCount -= (int)(8 * (iend - 4 - ip));
+					ip = iend - 4;
+				}
+                bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+            }
+        }
+    }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSE_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    FSE_DTableHeader DTableH;
+    U32 fastMode;
+
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    fastMode = DTableH.fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependency
+******************************************/
+#include <stddef.h>    /* size_t */
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+static size_t HUF_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUF_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    'dstSize' must be the exact size of original (uncompressed) data.
+    Note : in contrast with FSE, HUF_decompress can regenerate RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUF_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+static unsigned    HUF_isError(size_t code);        /* tells if a return value is an error code */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUFF0_H */
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef HUFF0_STATIC_H
+#define HUFF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation macros
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))  /* nb Cells; use unsigned short for X2, unsigned int for X4 */
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUF_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUF_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
+
+*/
+static size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+static size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+
+static size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+static size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUFF0_STATIC_H */
+
+
+
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+static unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+
+/*-*******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUF_readStats
+    Read compact Huffman tree, saved by HUF_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  /* special header */
+    {
+        if (iSize >= (242))   /* RLE */
+        {
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else   /* Incompressible */
+        {
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2)
+            {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+            }
+        }
+    }
+    else  /* header compressed with FSE (normal case) */
+    {
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++)
+    {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BIT_highbit32(rest);
+        U32 lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/**************************/
+/* single-symbol decoding */
+/**************************/
+
+static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++)
+    {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++)
+    {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUF_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BIT_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+    {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+
+static size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/***************************/
+/* double-symbols decoding */
+/***************************/
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1)
+    {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)   /* note : sortedSymbols already skipped */
+    {
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++)
+    {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits)   /* enough room for a second symbol */
+        {
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        }
+        else
+        {
+            U32 i;
+            const U32 end = start + length;
+            HUF_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++)
+        {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++)
+        {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++)
+        {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++)
+            {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+            }
+        }
+    }
+
+    HUF_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+        }
+    }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7))
+    {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+static size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BIT_initDStream(&bitD1, istart1, length1);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD2, istart2, length2);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD3, istart3, length3);
+        if (HUF_isError(errorCode)) return errorCode;
+        errorCode = BIT_initDStream(&bitD4, istart4, length4);
+        if (HUF_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; )
+        {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/**********************************/
+/* Generic decompression selector */
+/**********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
     static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, NULL };
-    /* estimate decompression time */ 
-    U32 Q; 
-    const U32 D256 = (U32)(dstSize >> 8); 
-    U32 Dtime[3]; 
-    U32 algoNb = 0; 
-    int n; 
- 
-    /* validation checks */ 
-    if (dstSize == 0) return ERROR(dstSize_tooSmall); 
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ 
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ 
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ 
- 
-    /* decoder timing evaluation */ 
-    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */ 
-    for (n=0; n<3; n++) 
-        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256); 
- 
-    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */ 
- 
-    if (Dtime[1] < Dtime[0]) algoNb = 1; 
- 
-    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 
- 
-    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */ 
-    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */ 
-    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */ 
-} 
- 
- 
- 
-#endif   /* ZSTD_CCOMMON_H_MODULE */ 
- 
- 
-/* 
-    zstd - decompression module fo v0.4 legacy format 
-    Copyright (C) 2015-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* *************************************************************** 
-*  Tuning parameters 
-*****************************************************************/ 
-/*! 
- * HEAPMODE : 
- * Select how default decompression function ZSTD_decompress() will allocate memory, 
- * in memory stack (0), or in memory heap (1, requires malloc()) 
- */ 
-#ifndef ZSTD_HEAPMODE 
-#  define ZSTD_HEAPMODE 1 
-#endif 
- 
- 
-/* ******************************************************* 
-*  Includes 
-*********************************************************/ 
-#include <stdlib.h>      /* calloc */ 
-#include <string.h>      /* memcpy, memmove */ 
-#include <stdio.h>       /* debug : printf */ 
- 
- 
-/* ******************************************************* 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */ 
-#else 
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ************************************* 
-*  Local types 
-***************************************/ 
-typedef struct 
-{ 
-    blockType_t blockType; 
-    U32 origSize; 
-} blockProperties_t; 
- 
- 
-/* ******************************************************* 
-*  Memory operations 
-**********************************************************/ 
-static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 
- 
- 
-/* ************************************* 
-*  Error Management 
-***************************************/ 
- 
-/*! ZSTD_isError 
-*   tells if a return value is an error code */ 
-static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } 
- 
- 
-/* ************************************************************* 
-*   Context management 
-***************************************************************/ 
-typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, 
-               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage; 
- 
-struct ZSTDv04_Dctx_s 
-{ 
-    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 
-    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 
-    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 
-    const void* previousDstEnd; 
-    const void* base; 
-    const void* vBase; 
-    const void* dictEnd; 
-    size_t expected; 
-    size_t headerSize; 
-    ZSTD_parameters params; 
-    blockType_t bType; 
-    ZSTD_dStage stage; 
-    const BYTE* litPtr; 
-    size_t litBufSize; 
-    size_t litSize; 
-    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */]; 
-    BYTE headerBuffer[ZSTD_frameHeaderSize_max]; 
-};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */ 
- 
-static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) 
-{ 
-    dctx->expected = ZSTD_frameHeaderSize_min; 
-    dctx->stage = ZSTDds_getFrameHeaderSize; 
-    dctx->previousDstEnd = NULL; 
-    dctx->base = NULL; 
-    dctx->vBase = NULL; 
-    dctx->dictEnd = NULL; 
-    return 0; 
-} 
- 
-static ZSTD_DCtx* ZSTD_createDCtx(void) 
-{ 
-    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); 
-    if (dctx==NULL) return NULL; 
-    ZSTD_resetDCtx(dctx); 
-    return dctx; 
-} 
- 
-static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) 
-{ 
-    free(dctx); 
-    return 0; 
-} 
- 
- 
-/* ************************************************************* 
-*   Decompression section 
-***************************************************************/ 
-/** ZSTD_decodeFrameHeader_Part1 
-*   decode the 1st part of the Frame Header, which tells Frame Header size. 
-*   srcSize must be == ZSTD_frameHeaderSize_min 
-*   @return : the full size of the Frame Header */ 
-static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize) 
-{ 
-    U32 magicNumber; 
-    if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); 
-    magicNumber = MEM_readLE32(src); 
-    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); 
-    zc->headerSize = ZSTD_frameHeaderSize_min; 
-    return zc->headerSize; 
-} 
- 
- 
-static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize) 
-{ 
-    U32 magicNumber; 
-    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max; 
-    magicNumber = MEM_readLE32(src); 
-    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); 
-    memset(params, 0, sizeof(*params)); 
-    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN; 
-    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */ 
-    return 0; 
-} 
- 
-/** ZSTD_decodeFrameHeader_Part2 
-*   decode the full Frame Header 
-*   srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1 
-*   @return : 0, or an error code, which can be tested using ZSTD_isError() */ 
-static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize) 
-{ 
-    size_t result; 
-    if (srcSize != zc->headerSize) return ERROR(srcSize_wrong); 
-    result = ZSTD_getFrameParams(&(zc->params), src, srcSize); 
-    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation); 
-    return result; 
-} 
- 
- 
-static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 
-{ 
-    const BYTE* const in = (const BYTE* const)src; 
-    BYTE headerFlags; 
-    U32 cSize; 
- 
-    if (srcSize < 3) return ERROR(srcSize_wrong); 
- 
-    headerFlags = *in; 
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 
- 
-    bpPtr->blockType = (blockType_t)(headerFlags >> 6); 
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 
- 
-    if (bpPtr->blockType == bt_end) return 0; 
-    if (bpPtr->blockType == bt_rle) return 1; 
-    return cSize; 
-} 
- 
-static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 
-    memcpy(dst, src, srcSize); 
-    return srcSize; 
-} 
- 
- 
-/** ZSTD_decompressLiterals 
-    @return : nb of bytes read from src, or an error code*/ 
-static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, 
-                                const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
- 
-    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected); 
-    if (litCSize + 5 > srcSize) return ERROR(corruption_detected); 
- 
-    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected); 
- 
-    *maxDstSizePtr = litSize; 
-    return litCSize + 5; 
-} 
- 
- 
-/** ZSTD_decodeLiteralsBlock 
-    @return : nb of bytes read from src (< srcSize ) */ 
-static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 
-                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */ 
-{ 
-    const BYTE* const istart = (const BYTE*) src; 
- 
-    /* any compressed block with literals segment must be at least this size */ 
-    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); 
- 
-    switch(*istart & 3) 
-    { 
-    /* compressed */ 
-    case 0: 
-        { 
-            size_t litSize = BLOCKSIZE; 
-            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE+8; 
-            dctx->litSize = litSize; 
-            return readSize;   /* works if it's an error too */ 
-        } 
-    case IS_RAW: 
-        { 
-            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */ 
-            { 
-                if (litSize > srcSize-3) return ERROR(corruption_detected); 
-                memcpy(dctx->litBuffer, istart, litSize); 
-                dctx->litPtr = dctx->litBuffer; 
-                dctx->litBufSize = BLOCKSIZE+8; 
-                dctx->litSize = litSize; 
-                return litSize+3; 
-            } 
-            /* direct reference into compressed stream */ 
-            dctx->litPtr = istart+3; 
-            dctx->litBufSize = srcSize-3; 
-            dctx->litSize = litSize; 
-            return litSize+3;        } 
-    case IS_RLE: 
-        { 
-            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ 
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected); 
-            memset(dctx->litBuffer, istart[3], litSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE+8; 
-            dctx->litSize = litSize; 
-            return 4; 
-        } 
-    default: 
-        return ERROR(corruption_detected);   /* forbidden nominal case */ 
-    } 
-} 
- 
- 
-static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 
-                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 
-                         const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* ip = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    U32 LLtype, Offtype, MLtype; 
-    U32 LLlog, Offlog, MLlog; 
-    size_t dumpsLength; 
- 
-    /* check */ 
-    if (srcSize < 5) return ERROR(srcSize_wrong); 
- 
-    /* SeqHead */ 
-    *nbSeq = MEM_readLE16(ip); ip+=2; 
-    LLtype  = *ip >> 6; 
-    Offtype = (*ip >> 4) & 3; 
-    MLtype  = (*ip >> 2) & 3; 
-    if (*ip & 2) 
-    { 
-        dumpsLength  = ip[2]; 
-        dumpsLength += ip[1] << 8; 
-        ip += 3; 
-    } 
-    else 
-    { 
-        dumpsLength  = ip[1]; 
-        dumpsLength += (ip[0] & 1) << 8; 
-        ip += 2; 
-    } 
-    *dumpsPtr = ip; 
-    ip += dumpsLength; 
-    *dumpsLengthPtr = dumpsLength; 
- 
-    /* check */ 
-    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 
- 
-    /* sequences */ 
-    { 
-        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */ 
-        size_t headerSize; 
- 
-        /* Build DTables */ 
-        switch(LLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            LLlog = 0; 
-            FSE_buildDTable_rle(DTableLL, *ip++); break; 
-        case bt_raw : 
-            LLlog = LLbits; 
-            FSE_buildDTable_raw(DTableLL, LLbits); break; 
-        default : 
-            max = MaxLL; 
-            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (LLlog > LLFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableLL, norm, max, LLlog); 
-        } 
- 
-        switch(Offtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            Offlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ 
-            break; 
-        case bt_raw : 
-            Offlog = Offbits; 
-            FSE_buildDTable_raw(DTableOffb, Offbits); break; 
-        default : 
-            max = MaxOff; 
-            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (Offlog > OffFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableOffb, norm, max, Offlog); 
-        } 
- 
-        switch(MLtype) 
-        { 
-        U32 max; 
-        case bt_rle : 
-            MLlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSE_buildDTable_rle(DTableML, *ip++); break; 
-        case bt_raw : 
-            MLlog = MLbits; 
-            FSE_buildDTable_raw(DTableML, MLbits); break; 
-        default : 
-            max = MaxML; 
-            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 
-            if (FSE_isError(headerSize)) return ERROR(GENERIC); 
-            if (MLlog > MLFSELog) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSE_buildDTable(DTableML, norm, max, MLlog); 
-        } 
-    } 
- 
-    return ip-istart; 
-} 
- 
- 
-typedef struct { 
-    size_t litLength; 
-    size_t offset; 
-    size_t matchLength; 
-} seq_t; 
- 
-typedef struct { 
-    BIT_DStream_t DStream; 
-    FSE_DState_t stateLL; 
-    FSE_DState_t stateOffb; 
-    FSE_DState_t stateML; 
-    size_t prevOffset; 
-    const BYTE* dumps; 
-    const BYTE* dumpsEnd; 
-} seqState_t; 
- 
- 
-static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 
-{ 
-    size_t litLength; 
-    size_t prevOffset; 
-    size_t offset; 
-    size_t matchLength; 
-    const BYTE* dumps = seqState->dumps; 
-    const BYTE* const de = seqState->dumpsEnd; 
- 
-    /* Literal length */ 
-    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 
-    prevOffset = litLength ? seq->offset : seqState->prevOffset; 
-    if (litLength == MaxLL) 
-    { 
-        U32 add = *dumps++; 
-        if (add < 255) litLength += add; 
-        else 
-        { 
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            dumps += 3; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
- 
-    /* Offset */ 
-    { 
-        static const U32 offsetPrefix[MaxOff+1] = { 
-                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256, 
-                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 
-                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 }; 
-        U32 offsetCode, nbBits; 
-        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */ 
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        nbBits = offsetCode - 1; 
-        if (offsetCode==0) nbBits = 0;   /* cmove */ 
-        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); 
-        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); 
-        if (offsetCode==0) offset = prevOffset;   /* cmove */ 
-        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */ 
-    } 
- 
-    /* MatchLength */ 
-    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 
-    if (matchLength == MaxML) 
-    { 
-        U32 add = *dumps++; 
-        if (add < 255) matchLength += add; 
-        else 
-        { 
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            dumps += 3; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
-    matchLength += MINMATCH; 
- 
-    /* save result */ 
-    seq->litLength = litLength; 
-    seq->offset = offset; 
-    seq->matchLength = matchLength; 
-    seqState->dumps = dumps; 
-} 
- 
- 
-static size_t ZSTD_execSequence(BYTE* op, 
-                                BYTE* const oend, seq_t sequence, 
-                                const BYTE** litPtr, const BYTE* const litLimit_8, 
-                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) 
-{ 
-    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */ 
-    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */ 
-    BYTE* const oLitEnd = op + sequence.litLength; 
-    const size_t sequenceLength = sequence.litLength + sequence.matchLength; 
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */ 
-    BYTE* const oend_8 = oend-8; 
-    const BYTE* const litEnd = *litPtr + sequence.litLength; 
-    const BYTE* match = oLitEnd - sequence.offset; 
- 
-    /* check */ 
-    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */ 
-    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */ 
-    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */ 
- 
-    /* copy Literals */ 
-    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ 
-    op = oLitEnd; 
-    *litPtr = litEnd;   /* update for next sequence */ 
- 
-    /* copy Match */ 
-    if (sequence.offset > (size_t)(oLitEnd - base)) 
-    { 
-        /* offset beyond prefix */ 
-        if (sequence.offset > (size_t)(oLitEnd - vBase)) 
-            return ERROR(corruption_detected); 
-        match = dictEnd - (base-match); 
-        if (match + sequence.matchLength <= dictEnd) 
-        { 
-            memmove(oLitEnd, match, sequence.matchLength); 
-            return sequenceLength; 
-        } 
-        /* span extDict & currentPrefixSegment */ 
-        { 
-            size_t length1 = dictEnd - match; 
-            memmove(oLitEnd, match, length1); 
-            op = oLitEnd + length1; 
-            sequence.matchLength -= length1; 
-            match = base; 
-        } 
-    } 
- 
-    /* match within prefix */ 
-    if (sequence.offset < 8) 
-    { 
-        /* close range match, overlap */ 
-        const int sub2 = dec64table[sequence.offset]; 
-        op[0] = match[0]; 
-        op[1] = match[1]; 
-        op[2] = match[2]; 
-        op[3] = match[3]; 
-        match += dec32table[sequence.offset]; 
-        ZSTD_copy4(op+4, match); 
-        match -= sub2; 
-    } 
-    else 
-    { 
-        ZSTD_copy8(op, match); 
-    } 
-    op += 8; match += 8; 
- 
-    if (oMatchEnd > oend-12) 
-    { 
-        if (op < oend_8) 
-        { 
-            ZSTD_wildcopy(op, match, oend_8 - op); 
-            match += oend_8 - op; 
-            op = oend_8; 
-        } 
-        while (op < oMatchEnd) *op++ = *match++; 
-    } 
-    else 
-    { 
-        ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */ 
-    } 
-    return sequenceLength; 
-} 
- 
- 
-static size_t ZSTD_decompressSequences( 
-                               ZSTD_DCtx* dctx, 
-                               void* dst, size_t maxDstSize, 
-                         const void* seqStart, size_t seqSize) 
-{ 
-    const BYTE* ip = (const BYTE*)seqStart; 
-    const BYTE* const iend = ip + seqSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t errorCode, dumpsLength; 
-    const BYTE* litPtr = dctx->litPtr; 
-    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; 
-    const BYTE* const litEnd = litPtr + dctx->litSize; 
-    int nbSeq; 
-    const BYTE* dumps; 
-    U32* DTableLL = dctx->LLTable; 
-    U32* DTableML = dctx->MLTable; 
-    U32* DTableOffb = dctx->OffTable; 
-    const BYTE* const base = (const BYTE*) (dctx->base); 
-    const BYTE* const vBase = (const BYTE*) (dctx->vBase); 
-    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 
- 
-    /* Build Decoding Tables */ 
-    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 
-                                      DTableLL, DTableML, DTableOffb, 
-                                      ip, iend-ip); 
-    if (ZSTD_isError(errorCode)) return errorCode; 
-    ip += errorCode; 
- 
-    /* Regen sequences */ 
-    { 
-        seq_t sequence; 
-        seqState_t seqState; 
- 
-        memset(&sequence, 0, sizeof(sequence)); 
-        sequence.offset = 4; 
-        seqState.dumps = dumps; 
-        seqState.dumpsEnd = dumps + dumpsLength; 
-        seqState.prevOffset = 4; 
-        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); 
-        if (ERR_isError(errorCode)) return ERROR(corruption_detected); 
-        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 
-        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 
-        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 
- 
-        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) 
-        { 
-            size_t oneSeqSize; 
-            nbSeq--; 
-            ZSTD_decodeSequence(&sequence, &seqState); 
-            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); 
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 
-            op += oneSeqSize; 
-        } 
- 
-        /* check if reached exact end */ 
-        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* DStream should be entirely and exactly consumed; otherwise data is corrupted */ 
- 
-        /* last literal segment */ 
-        { 
-            size_t lastLLSize = litEnd - litPtr; 
-            if (litPtr > litEnd) return ERROR(corruption_detected); 
-            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 
-            if (op != litPtr) memcpy(op, litPtr, lastLLSize); 
-            op += lastLLSize; 
-        } 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) 
-{ 
-    if (dst != dctx->previousDstEnd)   /* not contiguous */ 
-    { 
-        dctx->dictEnd = dctx->previousDstEnd; 
-        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); 
-        dctx->base = dst; 
-        dctx->previousDstEnd = dst; 
-    } 
-} 
- 
- 
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 
-                            void* dst, size_t maxDstSize, 
-                      const void* src, size_t srcSize) 
-{ 
-    /* blockType == blockCompressed */ 
-    const BYTE* ip = (const BYTE*)src; 
- 
-    /* Decode literals sub-block */ 
-    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); 
-    if (ZSTD_isError(litCSize)) return litCSize; 
-    ip += litCSize; 
-    srcSize -= litCSize; 
- 
-    return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize); 
-} 
- 
- 
-static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, 
-                                 void* dst, size_t maxDstSize, 
-                                 const void* src, size_t srcSize, 
-                                 const void* dict, size_t dictSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    const BYTE* iend = ip + srcSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t remainingSize = srcSize; 
-    blockProperties_t blockProperties; 
- 
-    /* init */ 
-    ZSTD_resetDCtx(ctx); 
-    if (dict) 
-    { 
-        ZSTD_decompress_insertDictionary(ctx, dict, dictSize); 
-        ctx->dictEnd = ctx->previousDstEnd; 
-        ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base)); 
-        ctx->base = dst; 
-    } 
-    else 
-    { 
-        ctx->vBase = ctx->base = ctx->dictEnd = dst; 
-    } 
- 
-    /* Frame Header */ 
-    { 
-        size_t frameHeaderSize; 
-        if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
-        frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min); 
-        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; 
-        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 
-        ip += frameHeaderSize; remainingSize -= frameHeaderSize; 
-        frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize); 
-        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; 
-    } 
- 
-    /* Loop on each block */ 
-    while (1) 
-    { 
-        size_t decodedSize=0; 
-        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); 
-        if (ZSTD_isError(cBlockSize)) return cBlockSize; 
- 
-        ip += ZSTD_blockHeaderSize; 
-        remainingSize -= ZSTD_blockHeaderSize; 
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); 
- 
-        switch(blockProperties.blockType) 
-        { 
-        case bt_compressed: 
-            decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_raw : 
-            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet supported */ 
-            break; 
-        case bt_end : 
-            /* end of frame */ 
-            if (remainingSize) return ERROR(srcSize_wrong); 
-            break; 
-        default: 
-            return ERROR(GENERIC);   /* impossible */ 
-        } 
-        if (cBlockSize == 0) break;   /* bt_end */ 
- 
-        if (ZSTD_isError(decodedSize)) return decodedSize; 
-        op += decodedSize; 
-        ip += cBlockSize; 
-        remainingSize -= cBlockSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-/* ****************************** 
-*  Streaming Decompression API 
-********************************/ 
-static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) 
-{ 
-    return dctx->expected; 
-} 
- 
-static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    /* Sanity check */ 
-    if (srcSize != ctx->expected) return ERROR(srcSize_wrong); 
-    ZSTD_checkContinuity(ctx, dst); 
- 
-    /* Decompress : frame header; part 1 */ 
-    switch (ctx->stage) 
-    { 
-    case ZSTDds_getFrameHeaderSize : 
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
+
+
+/*
+    zstd - decompression module fo v0.4 legacy format
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+
+/* *******************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+
+
+/* *******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* *************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+
+/*! ZSTD_isError
+*   tells if a return value is an error code */
+static unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
+
+struct ZSTDv04_Dctx_s
+{
+    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
+    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
+    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTD_parameters params;
+    blockType_t bType;
+    ZSTD_dStage stage;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
+    BYTE headerBuffer[ZSTD_frameHeaderSize_max];
+};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
+
+static size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
+{
+    dctx->expected = ZSTD_frameHeaderSize_min;
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    return 0;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTD_resetDCtx(dctx);
+    return dctx;
+}
+
+static size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+/** ZSTD_decodeFrameHeader_Part1
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTD_frameHeaderSize_min
+*   @return : the full size of the Frame Header */
+static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTD_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+static size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTD_decodeFrameHeader_Part2
+*   decode the full Frame Header
+*   srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
+*   @return : 0, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
+    result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation);
+    return result;
+}
+
+
+static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/** ZSTD_decompressLiterals
+    @return : nb of bytes read from src, or an error code*/
+static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
+                                const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+
+    const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+    const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+
+    if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
+    if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
+
+    if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
+
+    *maxDstSizePtr = litSize;
+    return litCSize + 5;
+}
+
+
+/** ZSTD_decodeLiteralsBlock
+    @return : nb of bytes read from src (< srcSize ) */
+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(*istart & 3)
+    {
+    /* compressed */
+    case 0:
+        {
+            size_t litSize = BLOCKSIZE;
+            const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return readSize;   /* works if it's an error too */
+        }
+    case IS_RAW:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
+            {
+                if (litSize > srcSize-3) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = BLOCKSIZE+8;
+                dctx->litSize = litSize;
+                return litSize+3;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+3;
+            dctx->litBufSize = srcSize-3;
+            dctx->litSize = litSize;
+            return litSize+3;        }
+    case IS_RLE:
+        {
+            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[3], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return 4;
+        }
+    default:
+        return ERROR(corruption_detected);   /* forbidden nominal case */
+    }
+}
+
+
+static size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < 5) return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = MEM_readLE16(ip); ip+=2;
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSELog) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t offset;
+    size_t matchLength;
+} seq_t;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    FSE_DState_t stateLL;
+    FSE_DState_t stateOffb;
+    FSE_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL)
+    {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else
+        {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode, nbBits;
+        offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));   /* <= maxOff, by table construction */
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+    }
+
+    /* MatchLength */
+    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML)
+    {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else
+        {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            dumps += 3;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+}
+
+
+static size_t ZSTD_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTD_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base))
+    {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+        }
+    }
+
+    /* match within prefix */
+    if (sequence.offset < 8)
+    {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTD_copy4(op+4, match);
+        match -= sub2;
+    }
+    else
+    {
+        ZSTD_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-12)
+    {
+        if (op < oend_8)
+        {
+            ZSTD_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd) *op++ = *match++;
+    }
+    else
+    {
+        ZSTD_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTD_decompressSequences(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = 4;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = 4;
+        errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; )
+        {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTD_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* DStream should be entirely and exactly consumed; otherwise data is corrupted */
+
+        /* last literal segment */
+        {
+            size_t lastLLSize = litEnd - litPtr;
+            if (litPtr > litEnd) return ERROR(corruption_detected);
+            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+            if (op != litPtr) memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd)   /* not contiguous */
+    {
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                      const void* src, size_t srcSize)
+{
+    /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+
+    /* Decode literals sub-block */
+    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTD_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
+}
+
+
+static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* init */
+    ZSTD_resetDCtx(ctx);
+    if (dict)
+    {
+        ZSTD_decompress_insertDictionary(ctx, dict, dictSize);
+        ctx->dictEnd = ctx->previousDstEnd;
+        ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+        ctx->base = dst;
+    }
+    else
+    {
+        ctx->vBase = ctx->base = ctx->dictEnd = dst;
+    }
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+static size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
+    ZSTD_checkContinuity(ctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (ctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
         /* get frame header size */
         if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
         ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
@@ -3628,432 +3628,432 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
         if (ctx->headerSize > ZSTD_frameHeaderSize_min) return ERROR(GENERIC);   /* impossible */
         ctx->expected = 0;   /* not necessary to copy more */
         /* fallthrough */
-    case ZSTDds_decodeFrameHeader: 
+    case ZSTDds_decodeFrameHeader:
         /* get frame header */
         {   size_t const result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
-            if (ZSTD_isError(result)) return result; 
-            ctx->expected = ZSTD_blockHeaderSize; 
-            ctx->stage = ZSTDds_decodeBlockHeader; 
-            return 0; 
-        } 
-    case ZSTDds_decodeBlockHeader: 
+            if (ZSTD_isError(result)) return result;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDds_decodeBlockHeader:
         /* Decode block header */
         {   blockProperties_t bp;
             size_t const blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
-            if (ZSTD_isError(blockSize)) return blockSize; 
-            if (bp.blockType == bt_end) 
-            { 
-                ctx->expected = 0; 
-                ctx->stage = ZSTDds_getFrameHeaderSize; 
-            } 
-            else 
-            { 
-                ctx->expected = blockSize; 
-                ctx->bType = bp.blockType; 
-                ctx->stage = ZSTDds_decompressBlock; 
-            } 
-            return 0; 
-        } 
-    case ZSTDds_decompressBlock: 
-        { 
-            /* Decompress : block content */ 
-            size_t rSize; 
-            switch(ctx->bType) 
-            { 
-            case bt_compressed: 
-                rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize); 
-                break; 
-            case bt_raw : 
-                rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize); 
-                break; 
-            case bt_rle : 
-                return ERROR(GENERIC);   /* not yet handled */ 
-                break; 
-            case bt_end :   /* should never happen (filtered at phase 1) */ 
-                rSize = 0; 
-                break; 
-            default: 
-                return ERROR(GENERIC); 
-            } 
-            ctx->stage = ZSTDds_decodeBlockHeader; 
-            ctx->expected = ZSTD_blockHeaderSize; 
-            ctx->previousDstEnd = (char*)dst + rSize; 
-            return rSize; 
-        } 
-    default: 
-        return ERROR(GENERIC);   /* impossible */ 
-    } 
-} 
- 
- 
-static void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize) 
-{ 
-    ctx->dictEnd = ctx->previousDstEnd; 
-    ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base)); 
-    ctx->base = dict; 
-    ctx->previousDstEnd = (const char*)dict + dictSize; 
-} 
- 
- 
- 
-/* 
-    Buffered version of Zstd compression library 
-    Copyright (C) 2015, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* The objects defined into this file should be considered experimental. 
- * They are not labelled stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risk of future changes. 
- */ 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stdlib.h> 
- 
- 
-/** ************************************************ 
-*  Streaming decompression 
-* 
-*  A ZBUFF_DCtx object is required to track streaming operation. 
-*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. 
-*  Use ZBUFF_decompressInit() to start a new decompression operation. 
-*  ZBUFF_DCtx objects can be reused multiple times. 
-* 
-*  Use ZBUFF_decompressContinue() repetitively to consume your input. 
-*  *srcSizePtr and *maxDstSizePtr can be any size. 
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. 
-*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. 
-*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst . 
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) 
-*            or 0 when a frame is completely decoded 
-*            or an error code, which can be tested using ZBUFF_isError(). 
-* 
-*  Hint : recommended buffer sizes (not compulsory) 
-*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. 
-*  input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . 
-* **************************************************/ 
- 
-typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader, 
-               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage; 
- 
-/* *** Resource management *** */ 
- 
-#define ZSTD_frameHeaderSize_max 5   /* too magical, should come from reference */ 
-struct ZBUFFv04_DCtx_s { 
-    ZSTD_DCtx* zc; 
-    ZSTD_parameters params; 
-    char* inBuff; 
-    size_t inBuffSize; 
-    size_t inPos; 
-    char* outBuff; 
-    size_t outBuffSize; 
-    size_t outStart; 
-    size_t outEnd; 
-    size_t hPos; 
-    const char* dict; 
-    size_t dictSize; 
-    ZBUFF_dStage stage; 
-    unsigned char headerBuffer[ZSTD_frameHeaderSize_max]; 
-};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */ 
- 
-typedef ZBUFFv04_DCtx ZBUFF_DCtx; 
- 
- 
-static ZBUFF_DCtx* ZBUFF_createDCtx(void) 
-{ 
-    ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx)); 
-    if (zbc==NULL) return NULL; 
-    memset(zbc, 0, sizeof(*zbc)); 
-    zbc->zc = ZSTD_createDCtx(); 
-    zbc->stage = ZBUFFds_init; 
-    return zbc; 
-} 
- 
-static size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc) 
-{ 
-    if (zbc==NULL) return 0;   /* support free on null */ 
-    ZSTD_freeDCtx(zbc->zc); 
-    free(zbc->inBuff); 
-    free(zbc->outBuff); 
-    free(zbc); 
-    return 0; 
-} 
- 
- 
-/* *** Initialization *** */ 
- 
-static size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc) 
-{ 
-    zbc->stage = ZBUFFds_readHeader; 
-    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = zbc->dictSize = 0; 
-    return ZSTD_resetDCtx(zbc->zc); 
-} 
- 
- 
-static size_t ZBUFF_decompressWithDictionary(ZBUFF_DCtx* zbc, const void* src, size_t srcSize) 
-{ 
-    zbc->dict = (const char*)src; 
-    zbc->dictSize = srcSize; 
-    return 0; 
-} 
- 
-static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    size_t length = MIN(maxDstSize, srcSize); 
-    memcpy(dst, src, length); 
-    return length; 
-} 
- 
-/* *** Decompression *** */ 
- 
-static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr) 
-{ 
-    const char* const istart = (const char*)src; 
-    const char* ip = istart; 
-    const char* const iend = istart + *srcSizePtr; 
-    char* const ostart = (char*)dst; 
-    char* op = ostart; 
-    char* const oend = ostart + *maxDstSizePtr; 
-    U32 notDone = 1; 
- 
-    while (notDone) 
-    { 
-        switch(zbc->stage) 
-        { 
- 
-        case ZBUFFds_init : 
-            return ERROR(init_missing); 
- 
-        case ZBUFFds_readHeader : 
-            /* read header from src */ 
+            if (ZSTD_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end)
+            {
+                ctx->expected = 0;
+                ctx->stage = ZSTDds_getFrameHeaderSize;
+            }
+            else
+            {
+                ctx->expected = blockSize;
+                ctx->bType = bp.blockType;
+                ctx->stage = ZSTDds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(ctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);
+            }
+            ctx->stage = ZSTDds_decodeBlockHeader;
+            ctx->expected = ZSTD_blockHeaderSize;
+            ctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize)
+{
+    ctx->dictEnd = ctx->previousDstEnd;
+    ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+    ctx->base = dict;
+    ctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stdlib.h>
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTD_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv04_DCtx_s {
+    ZSTD_DCtx* zc;
+    ZSTD_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    const char* dict;
+    size_t dictSize;
+    ZBUFF_dStage stage;
+    unsigned char headerBuffer[ZSTD_frameHeaderSize_max];
+};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
+
+typedef ZBUFFv04_DCtx ZBUFF_DCtx;
+
+
+static ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTD_createDCtx();
+    zbc->stage = ZBUFFds_init;
+    return zbc;
+}
+
+static size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTD_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+static size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc)
+{
+    zbc->stage = ZBUFFds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = zbc->dictSize = 0;
+    return ZSTD_resetDCtx(zbc->zc);
+}
+
+
+static size_t ZBUFF_decompressWithDictionary(ZBUFF_DCtx* zbc, const void* src, size_t srcSize)
+{
+    zbc->dict = (const char*)src;
+    zbc->dictSize = srcSize;
+    return 0;
+}
+
+static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+/* *** Decompression *** */
+
+static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone)
+    {
+        switch(zbc->stage)
+        {
+
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_readHeader :
+            /* read header from src */
             {   size_t const headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
-                if (ZSTD_isError(headerSize)) return headerSize; 
+                if (ZSTD_isError(headerSize)) return headerSize;
                 if (headerSize) {
-                    /* not enough input to decode header : tell how many bytes would be necessary */ 
-                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr); 
-                    zbc->hPos += *srcSizePtr; 
-                    *maxDstSizePtr = 0; 
-                    zbc->stage = ZBUFFds_loadHeader; 
-                    return headerSize - zbc->hPos; 
-                } 
-                zbc->stage = ZBUFFds_decodeHeader; 
-                break; 
-            } 
- 
-        case ZBUFFds_loadHeader: 
-            /* complete header from src */ 
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFds_decodeHeader;
+                break;
+            }
+
+        case ZBUFFds_loadHeader:
+            /* complete header from src */
             {   size_t headerSize = ZBUFF_limitCopy(
-                    zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos, 
-                    src, *srcSizePtr); 
-                zbc->hPos += headerSize; 
-                ip += headerSize; 
-                headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos); 
-                if (ZSTD_isError(headerSize)) return headerSize; 
-                if (headerSize) { 
-                    /* not enough input to decode header : tell how many bytes would be necessary */ 
-                    *maxDstSizePtr = 0; 
-                    return headerSize - zbc->hPos; 
-            }   } 
+                    zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+            }   }
             /* intentional fallthrough */
- 
-        case ZBUFFds_decodeHeader: 
-                /* apply header to create / resize buffers */ 
+
+        case ZBUFFds_decodeHeader:
+                /* apply header to create / resize buffers */
                 {   size_t const neededOutSize = (size_t)1 << zbc->params.windowLog;
                     size_t const neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
-                    if (zbc->inBuffSize < neededInSize) { 
-                        free(zbc->inBuff); 
-                        zbc->inBuffSize = neededInSize; 
-                        zbc->inBuff = (char*)malloc(neededInSize); 
-                        if (zbc->inBuff == NULL) return ERROR(memory_allocation); 
-                    } 
-                    if (zbc->outBuffSize < neededOutSize) { 
-                        free(zbc->outBuff); 
-                        zbc->outBuffSize = neededOutSize; 
-                        zbc->outBuff = (char*)malloc(neededOutSize); 
-                        if (zbc->outBuff == NULL) return ERROR(memory_allocation); 
-                }   } 
-                if (zbc->dictSize) 
-                    ZSTD_decompress_insertDictionary(zbc->zc, zbc->dict, zbc->dictSize); 
-                if (zbc->hPos) { 
-                    /* some data already loaded into headerBuffer : transfer into inBuff */ 
-                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos); 
-                    zbc->inPos = zbc->hPos; 
-                    zbc->hPos = 0; 
-                    zbc->stage = ZBUFFds_load; 
-                    break; 
-                } 
-                zbc->stage = ZBUFFds_read; 
- 
-        case ZBUFFds_read: 
-            { 
-                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc); 
-                if (neededInSize==0)   /* end of frame */ 
-                { 
-                    zbc->stage = ZBUFFds_init; 
-                    notDone = 0; 
-                    break; 
-                } 
-                if ((size_t)(iend-ip) >= neededInSize) 
-                { 
-                    /* directly decode from src */ 
-                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc, 
-                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, 
-                        ip, neededInSize); 
-                    if (ZSTD_isError(decodedSize)) return decodedSize; 
-                    ip += neededInSize; 
-                    if (!decodedSize) break;   /* this was just a header */ 
-                    zbc->outEnd = zbc->outStart +  decodedSize; 
-                    zbc->stage = ZBUFFds_flush; 
-                    break; 
-                } 
-                if (ip==iend) { notDone = 0; break; }   /* no more input */ 
-                zbc->stage = ZBUFFds_load; 
-            } 
- 
-        case ZBUFFds_load: 
-            { 
-                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc); 
-                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */ 
-                size_t loadedSize; 
-                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */ 
-                loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip); 
-                ip += loadedSize; 
-                zbc->inPos += loadedSize; 
-                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */ 
-                { 
-                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc, 
-                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, 
-                        zbc->inBuff, neededInSize); 
-                    if (ZSTD_isError(decodedSize)) return decodedSize; 
-                    zbc->inPos = 0;   /* input is consumed */ 
-                    if (!decodedSize) { zbc->stage = ZBUFFds_read; break; }   /* this was just a header */ 
-                    zbc->outEnd = zbc->outStart +  decodedSize; 
-                    zbc->stage = ZBUFFds_flush; 
-                    // break; /* ZBUFFds_flush follows */ 
-                } 
-            } 
-        case ZBUFFds_flush: 
-            { 
-                size_t toFlushSize = zbc->outEnd - zbc->outStart; 
-                size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize); 
-                op += flushedSize; 
-                zbc->outStart += flushedSize; 
-                if (flushedSize == toFlushSize) 
-                { 
-                    zbc->stage = ZBUFFds_read; 
-                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize) 
-                        zbc->outStart = zbc->outEnd = 0; 
-                    break; 
-                } 
-                /* cannot flush everything */ 
-                notDone = 0; 
-                break; 
-            } 
-        default: return ERROR(GENERIC);   /* impossible */ 
-        } 
-    } 
- 
-    *srcSizePtr = ip-istart; 
-    *maxDstSizePtr = op-ostart; 
- 
-    { 
-        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbc->zc); 
-        if (nextSrcSizeHint > 3) nextSrcSizeHint+= 3;   /* get the next block header while at it */ 
-        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/ 
-        return nextSrcSizeHint; 
-    } 
-} 
- 
- 
-/* ************************************* 
-*  Tool functions 
-***************************************/ 
-unsigned ZBUFFv04_isError(size_t errorCode) { return ERR_isError(errorCode); } 
-const char* ZBUFFv04_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } 
- 
-size_t ZBUFFv04_recommendedDInSize()  { return BLOCKSIZE + 3; } 
-size_t ZBUFFv04_recommendedDOutSize() { return BLOCKSIZE; } 
- 
- 
- 
-/*- ========================================================================= -*/ 
- 
-/* final wrapping stage */ 
- 
-size_t ZSTDv04_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0); 
-} 
- 
-size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) 
-    size_t regenSize; 
-    ZSTD_DCtx* dctx = ZSTD_createDCtx(); 
-    if (dctx==NULL) return ERROR(memory_allocation); 
-    regenSize = ZSTDv04_decompressDCtx(dctx, dst, maxDstSize, src, srcSize); 
-    ZSTD_freeDCtx(dctx); 
-    return regenSize; 
-#else 
-    ZSTD_DCtx dctx; 
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->dictSize)
+                    ZSTD_decompress_insertDictionary(zbc->zc, zbc->dict, zbc->dictSize);
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFds_read;
+
+        case ZBUFFds_read:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0)   /* end of frame */
+                {
+                    zbc->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize)
+                {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFds_load;
+            }
+
+        case ZBUFFds_load:
+            {
+                size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTD_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFds_flush;
+                    // break; /* ZBUFFds_flush follows */
+                }
+            }
+        case ZBUFFds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize)
+                {
+                    zbc->stage = ZBUFFds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+        }
+    }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
+    {
+        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbc->zc);
+        if (nextSrcSizeHint > 3) nextSrcSizeHint+= 3;   /* get the next block header while at it */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv04_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv04_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv04_recommendedDInSize()  { return BLOCKSIZE + 3; }
+size_t ZBUFFv04_recommendedDOutSize() { return BLOCKSIZE; }
+
+
+
+/*- ========================================================================= -*/
+
+/* final wrapping stage */
+
+size_t ZSTDv04_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
+    size_t regenSize;
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv04_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTD_DCtx dctx;
     return ZSTDv04_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
-#endif 
-} 
- 
- 
-size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); } 
- 
-size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx) 
-{ 
-    return ZSTD_nextSrcSizeToDecompress(dctx); 
-} 
- 
-size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    return ZSTD_decompressContinue(dctx, dst, maxDstSize, src, srcSize); 
-} 
- 
- 
- 
-ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void) { return ZBUFF_createDCtx(); } 
-size_t      ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx) { return ZBUFF_freeDCtx(dctx); } 
- 
-size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx) { return ZBUFF_decompressInit(dctx); } 
-size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* src, size_t srcSize) 
-{ return ZBUFF_decompressWithDictionary(dctx, src, srcSize); } 
- 
-size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr) 
-{ 
-    return ZBUFF_decompressContinue(dctx, dst, maxDstSizePtr, src, srcSizePtr); 
-} 
+#endif
+}
+
+
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
+{
+    return ZSTD_nextSrcSizeToDecompress(dctx);
+}
+
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_decompressContinue(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void) { return ZBUFF_createDCtx(); }
+size_t      ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx) { return ZBUFF_freeDCtx(dctx); }
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx) { return ZBUFF_decompressInit(dctx); }
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* src, size_t srcSize)
+{ return ZBUFF_decompressWithDictionary(dctx, src, srcSize); }
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    return ZBUFF_decompressContinue(dctx, dst, maxDstSizePtr, src, srcSizePtr);
+}
 
 ZSTD_DCtx* ZSTDv04_createDCtx(void) { return ZSTD_createDCtx(); }
 size_t ZSTDv04_freeDCtx(ZSTD_DCtx* dctx) { return ZSTD_freeDCtx(dctx); }
diff --git a/contrib/libs/zstd06/legacy/zstd_v04.h b/contrib/libs/zstd06/legacy/zstd_v04.h
index 807d9535e9..c539000a32 100644
--- a/contrib/libs/zstd06/legacy/zstd_v04.h
+++ b/contrib/libs/zstd06/legacy/zstd_v04.h
@@ -1,149 +1,149 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd_v04 - decoder for 0.4 format 
-    Header File 
-    Copyright (C) 2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
-#pragma once 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/* ************************************* 
-*  Includes 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
- 
- 
-/* ************************************* 
-*  Simple one-step function 
-***************************************/ 
-/** 
-ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format 
-    compressedSize : is the exact source size 
-    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. 
-                      It must be equal or larger than originalSize, otherwise decompression will fail. 
-    return : the number of bytes decompressed into destination buffer (originalSize) 
-             or an errorCode if it fails (which can be tested using ZSTDv01_isError()) 
-*/ 
-size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, 
-                     const void* src, size_t compressedSize); 
- 
-/** 
-ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error 
-*/ 
-unsigned ZSTDv04_isError(size_t code); 
- 
- 
-/* ************************************* 
-*  Advanced functions 
-***************************************/ 
-typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx; 
-ZSTDv04_Dctx* ZSTDv04_createDCtx(void); 
-size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx); 
- 
-size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx, 
-                              void* dst, size_t maxOriginalSize, 
-                        const void* src, size_t compressedSize); 
- 
- 
-/* ************************************* 
-*  Direct Streaming 
-***************************************/ 
-size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx); 
- 
-size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx); 
-size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); 
-/** 
-  Use above functions alternatively. 
-  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 
-  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. 
-  Result is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. 
-*/ 
- 
- 
-/* ************************************* 
-*  Buffered Streaming 
-***************************************/ 
-typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx; 
-ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void); 
-size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx); 
- 
-size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx); 
-size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize); 
- 
-size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); 
- 
-/** ************************************************ 
-*  Streaming decompression 
-* 
-*  A ZBUFF_DCtx object is required to track streaming operation. 
-*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. 
-*  Use ZBUFF_decompressInit() to start a new decompression operation. 
-*  ZBUFF_DCtx objects can be reused multiple times. 
-* 
-*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary() 
-*  It must be the same content as the one set during compression phase. 
-*  Dictionary content must remain accessible during the decompression process. 
-* 
-*  Use ZBUFF_decompressContinue() repetitively to consume your input. 
-*  *srcSizePtr and *maxDstSizePtr can be any size. 
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. 
-*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. 
-*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. 
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) 
-*            or 0 when a frame is completely decoded 
-*            or an error code, which can be tested using ZBUFF_isError(). 
-* 
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize 
-*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. 
-*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . 
-* **************************************************/ 
-unsigned ZBUFFv04_isError(size_t errorCode); 
-const char* ZBUFFv04_getErrorName(size_t errorCode); 
- 
- 
-/** The below functions provide recommended buffer sizes for Compression or Decompression operations. 
-*   These sizes are not compulsory, they just tend to offer better latency */ 
-size_t ZBUFFv04_recommendedDInSize(void); 
-size_t ZBUFFv04_recommendedDOutSize(void); 
- 
- 
-/* ************************************* 
-*  Prefix - version detection 
-***************************************/ 
-#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
+/*
+    zstd_v04 - decoder for 0.4 format
+    Header File
+    Copyright (C) 2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* *************************************
+*  Includes
+***************************************/
+#include <stddef.h>   /* size_t */
+
+
+/* *************************************
+*  Simple one-step function
+***************************************/
+/**
+ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
+    compressedSize : is the exact source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
+*/
+size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
+                     const void* src, size_t compressedSize);
+
+/**
+ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
+*/
+unsigned ZSTDv04_isError(size_t code);
+
+
+/* *************************************
+*  Advanced functions
+***************************************/
+typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
+ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
+size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
+                              void* dst, size_t maxOriginalSize,
+                        const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Direct Streaming
+***************************************/
+size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
+
+size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
+size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+/**
+  Use above functions alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
+  Result is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
+*/
+
+
+/* *************************************
+*  Buffered Streaming
+***************************************/
+typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
+ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
+size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
+
+size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
+size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFF_DCtx object is required to track streaming operation.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation.
+*  ZBUFF_DCtx objects can be reused multiple times.
+*
+*  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
+*  It must be the same content as the one set during compression phase.
+*  Dictionary content must remain accessible during the decompression process.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+unsigned ZBUFFv04_isError(size_t errorCode);
+const char* ZBUFFv04_getErrorName(size_t errorCode);
+
+
+/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are not compulsory, they just tend to offer better latency */
+size_t ZBUFFv04_recommendedDInSize(void);
+size_t ZBUFFv04_recommendedDOutSize(void);
+
+
+/* *************************************
+*  Prefix - version detection
+***************************************/
+#define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/contrib/libs/zstd06/legacy/zstd_v05.c b/contrib/libs/zstd06/legacy/zstd_v05.c
index 10a8baaf4d..f3c720fd26 100644
--- a/contrib/libs/zstd06/legacy/zstd_v05.c
+++ b/contrib/libs/zstd06/legacy/zstd_v05.c
@@ -1,3877 +1,3877 @@
-/* ****************************************************************** 
-   zstd_v05.c 
-   Decompression module for ZSTD v0.5 legacy format 
-   Copyright (C) 2016, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - Homepage : http://www.zstd.net/ 
-****************************************************************** */ 
- 
-/*- Dependencies -*/ 
-#include "zstd_v05.h" 
- 
- 
-/* ****************************************************************** 
-   mem.h 
-   low-level memory access routines 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef MEM_H_MODULE 
-#define MEM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-**************************************** 
-*  Dependencies 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
-#include <string.h>    /* memcpy */ 
- 
- 
-/*-**************************************** 
-*  Compiler specifics 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define MEM_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define MEM_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define MEM_STATIC static __inline 
-#else 
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/*-************************************************************** 
-*  Basic Types 
-*****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-# include <stdint.h> 
-  typedef  uint8_t BYTE; 
-  typedef uint16_t U16; 
-  typedef  int16_t S16; 
-  typedef uint32_t U32; 
-  typedef  int32_t S32; 
-  typedef uint64_t U64; 
-  typedef  int64_t S64; 
-#else 
-  typedef unsigned char       BYTE; 
-  typedef unsigned short      U16; 
-  typedef   signed short      S16; 
-  typedef unsigned int        U32; 
-  typedef   signed int        S32; 
-  typedef unsigned long long  U64; 
-  typedef   signed long long  S64; 
-#endif 
- 
- 
-/*-************************************************************** 
-*  Memory I/O 
-*****************************************************************/ 
-/* MEM_FORCE_MEMORY_ACCESS : 
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 
- * The below switch allow to select different access method for improved performance. 
- * Method 0 (default) : use `memcpy()`. Safe and portable. 
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 
- * Method 2 : direct access. This method is portable but violate C standard. 
- *            It can generate buggy code on targets depending on alignment. 
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 
- * Prefer these methods in priority order (0 > 1 > 2) 
- */ 
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ 
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 
-#    define MEM_FORCE_MEMORY_ACCESS 2 
-#  elif defined(__INTEL_COMPILER) || \ 
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 
-#    define MEM_FORCE_MEMORY_ACCESS 1 
-#  endif 
-#endif 
- 
-MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } 
-MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } 
- 
-MEM_STATIC unsigned MEM_isLittleEndian(void) 
-{ 
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ 
-    return one.c[0]; 
-} 
- 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) 
- 
-/* violates C standard, by lying on structure alignment. 
-Only use if no other choice to achieve best performance on target platform */ 
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } 
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } 
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } 
- 
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) 
- 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 
-/* currently only defined for gcc and icc */ 
-typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; 
- 
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } 
- 
-#else 
- 
-/* default method, safe and standard. 
-   can sometimes prove slower */ 
- 
-MEM_STATIC U16 MEM_read16(const void* memPtr) 
-{ 
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U32 MEM_read32(const void* memPtr) 
-{ 
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC U64 MEM_read64(const void* memPtr) 
-{ 
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 
-} 
- 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) 
-{ 
-    memcpy(memPtr, &value, sizeof(value)); 
-} 
- 
-#endif /* MEM_FORCE_MEMORY_ACCESS */ 
- 
- 
-MEM_STATIC U16 MEM_readLE16(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read16(memPtr); 
-    else { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U16)(p[0] + (p[1]<<8)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) 
-{ 
-    if (MEM_isLittleEndian()) { 
-        MEM_write16(memPtr, val); 
-    } else { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val; 
-        p[1] = (BYTE)(val>>8); 
-    } 
-} 
- 
-MEM_STATIC U32 MEM_readLE32(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read32(memPtr); 
-    else { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) 
-{ 
-    if (MEM_isLittleEndian()) { 
-        MEM_write32(memPtr, val32); 
-    } else { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val32; 
-        p[1] = (BYTE)(val32>>8); 
-        p[2] = (BYTE)(val32>>16); 
-        p[3] = (BYTE)(val32>>24); 
-    } 
-} 
- 
-MEM_STATIC U64 MEM_readLE64(const void* memPtr) 
-{ 
-    if (MEM_isLittleEndian()) 
-        return MEM_read64(memPtr); 
-    else { 
-        const BYTE* p = (const BYTE*)memPtr; 
-        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 
-                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 
-    } 
-} 
- 
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) 
-{ 
-    if (MEM_isLittleEndian()) { 
-        MEM_write64(memPtr, val64); 
-    } else { 
-        BYTE* p = (BYTE*)memPtr; 
-        p[0] = (BYTE)val64; 
-        p[1] = (BYTE)(val64>>8); 
-        p[2] = (BYTE)(val64>>16); 
-        p[3] = (BYTE)(val64>>24); 
-        p[4] = (BYTE)(val64>>32); 
-        p[5] = (BYTE)(val64>>40); 
-        p[6] = (BYTE)(val64>>48); 
-        p[7] = (BYTE)(val64>>56); 
-    } 
-} 
- 
-MEM_STATIC size_t MEM_readLEST(const void* memPtr) 
-{ 
-    if (MEM_32bits()) 
-        return (size_t)MEM_readLE32(memPtr); 
-    else 
-        return (size_t)MEM_readLE64(memPtr); 
-} 
- 
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) 
-{ 
-    if (MEM_32bits()) 
-        MEM_writeLE32(memPtr, (U32)val); 
-    else 
-        MEM_writeLE64(memPtr, (U64)val); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* MEM_H_MODULE */ 
- 
-/* ****************************************************************** 
-   Error codes list 
-   Copyright (C) 2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/zstd 
-****************************************************************** */ 
-#ifndef ERROR_PUBLIC_H_MODULE 
-#define ERROR_PUBLIC_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* **************************************** 
-*  error codes list 
-******************************************/ 
-typedef enum { 
-  ZSTDv05_error_no_error, 
-  ZSTDv05_error_GENERIC, 
-  ZSTDv05_error_prefix_unknown, 
-  ZSTDv05_error_frameParameter_unsupported, 
-  ZSTDv05_error_frameParameter_unsupportedBy32bits, 
-  ZSTDv05_error_init_missing, 
-  ZSTDv05_error_memory_allocation, 
-  ZSTDv05_error_stage_wrong, 
-  ZSTDv05_error_dstSize_tooSmall, 
-  ZSTDv05_error_srcSize_wrong, 
-  ZSTDv05_error_corruption_detected, 
-  ZSTDv05_error_tableLog_tooLarge, 
-  ZSTDv05_error_maxSymbolValue_tooLarge, 
-  ZSTDv05_error_maxSymbolValue_tooSmall, 
-  ZSTDv05_error_dictionary_corrupted, 
-  ZSTDv05_error_maxCode 
-} ZSTDv05_ErrorCode; 
- 
-/* note : functions provide error codes in reverse negative order, 
-          so compare with (size_t)(0-enum) */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_PUBLIC_H_MODULE */ 
- 
- 
-/* 
-    zstd - standard compression library 
-    Header File for static linking only 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd homepage : http://www.zstd.net 
-*/ 
-#ifndef ZSTD_STATIC_H 
-#define ZSTD_STATIC_H 
- 
-/* The prototypes defined within this file are considered experimental. 
- * They should not be used in the context DLL as they may change in the future. 
- * Prefer static linking if you need them, to control breaking version changes issues. 
- */ 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
- 
-/*-************************************* 
-*  Types 
-***************************************/ 
-#define ZSTDv05_WINDOWLOG_ABSOLUTEMIN 11 
- 
- 
-/*-************************************* 
-*  Advanced functions 
-***************************************/ 
-/*- Advanced Decompression functions -*/ 
- 
-/*! ZSTDv05_decompress_usingPreparedDCtx() : 
-*   Same as ZSTDv05_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. 
-*   It avoids reloading the dictionary each time. 
-*   `preparedDCtx` must have been properly initialized using ZSTDv05_decompressBegin_usingDict(). 
-*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the decompression operation */ 
-size_t ZSTDv05_decompress_usingPreparedDCtx( 
-                                             ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx, 
-                                             void* dst, size_t dstCapacity, 
-                                       const void* src, size_t srcSize); 
- 
- 
-/* ************************************** 
-*  Streaming functions (direct mode) 
-****************************************/ 
-size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx); 
-size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); 
-void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx); 
- 
-size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); 
- 
-size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); 
-size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 
- 
-/* 
-  Streaming decompression, direct mode (bufferless) 
- 
-  A ZSTDv05_DCtx object is required to track streaming operations. 
-  Use ZSTDv05_createDCtx() / ZSTDv05_freeDCtx() to manage it. 
-  A ZSTDv05_DCtx object can be re-used multiple times. 
- 
-  First typical operation is to retrieve frame parameters, using ZSTDv05_getFrameParams(). 
-  This operation is independent, and just needs enough input data to properly decode the frame header. 
-  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding. 
-  Result : 0 when successful, it means the ZSTDv05_parameters structure has been filled. 
-           >0 : means there is not enough data into src. Provides the expected size to successfully decode header. 
-           errorCode, which can be tested using ZSTDv05_isError() 
- 
-  Start decompression, with ZSTDv05_decompressBegin() or ZSTDv05_decompressBegin_usingDict() 
-  Alternatively, you can copy a prepared context, using ZSTDv05_copyDCtx() 
- 
-  Then use ZSTDv05_nextSrcSizeToDecompress() and ZSTDv05_decompressContinue() alternatively. 
-  ZSTDv05_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv05_decompressContinue(). 
-  ZSTDv05_decompressContinue() requires this exact amount of bytes, or it will fail. 
-  ZSTDv05_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog). 
-  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible. 
- 
-  @result of ZSTDv05_decompressContinue() is the number of bytes regenerated within 'dst'. 
-  It can be zero, which is not an error; it just means ZSTDv05_decompressContinue() has decoded some header. 
- 
-  A frame is fully decoded when ZSTDv05_nextSrcSizeToDecompress() returns zero. 
-  Context can then be reset to start a new decompression. 
-*/ 
- 
- 
-/* ************************************** 
-*  Block functions 
-****************************************/ 
-/*! Block functions produce and decode raw zstd blocks, without frame metadata. 
-    User will have to take in charge required information to regenerate data, such as block sizes. 
- 
-    A few rules to respect : 
-    - Uncompressed block size must be <= 128 KB 
-    - Compressing or decompressing requires a context structure 
-      + Use ZSTDv05_createCCtx() and ZSTDv05_createDCtx() 
-    - It is necessary to init context before starting 
-      + compression : ZSTDv05_compressBegin() 
-      + decompression : ZSTDv05_decompressBegin() 
-      + variants _usingDict() are also allowed 
-      + copyCCtx() and copyDCtx() work too 
-    - When a block is considered not compressible enough, ZSTDv05_compressBlock() result will be zero. 
-      In which case, nothing is produced into `dst`. 
-      + User must test for such outcome and deal directly with uncompressed data 
-      + ZSTDv05_decompressBlock() doesn't accept uncompressed data as input !! 
-*/ 
- 
-size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 
- 
- 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* ZSTDv05_STATIC_H */ 
- 
- 
- 
-/* ****************************************************************** 
-   Error codes and messages 
-   Copyright (C) 2013-2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/zstd 
-****************************************************************** */ 
-/* Note : this module is expected to remain private, do not expose it */ 
- 
-#ifndef ERROR_H_MODULE 
-#define ERROR_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
- 
-/* **************************************** 
-*  Compiler-specific 
-******************************************/ 
-#if defined(__GNUC__) 
-#  define ERR_STATIC static __attribute__((unused)) 
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-#  define ERR_STATIC static inline 
-#elif defined(_MSC_VER) 
-#  define ERR_STATIC static __inline 
-#else 
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ 
-#endif 
- 
- 
-/*-**************************************** 
-*  Customization 
-******************************************/ 
-typedef ZSTDv05_ErrorCode ERR_enum; 
-#define PREFIX(name) ZSTDv05_error_##name 
- 
- 
-/*-**************************************** 
-*  Error codes handling 
-******************************************/ 
-#ifdef ERROR 
-#  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */ 
-#endif 
-#define ERROR(name) (size_t)-PREFIX(name) 
- 
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } 
- 
-ERR_STATIC ERR_enum ERR_getError(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } 
- 
- 
-/*-**************************************** 
-*  Error Strings 
-******************************************/ 
- 
-ERR_STATIC const char* ERR_getErrorName(size_t code) 
-{ 
-    static const char* notErrorCode = "Unspecified error code"; 
-    switch( ERR_getError(code) ) 
-    { 
-    case PREFIX(no_error): return "No error detected"; 
-    case PREFIX(GENERIC):  return "Error (generic)"; 
-    case PREFIX(prefix_unknown): return "Unknown frame descriptor"; 
-    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; 
-    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; 
-    case PREFIX(init_missing): return "Context should be init first"; 
-    case PREFIX(memory_allocation): return "Allocation error : not enough memory"; 
-    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; 
-    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; 
-    case PREFIX(srcSize_wrong): return "Src size incorrect"; 
-    case PREFIX(corruption_detected): return "Corrupted block detected"; 
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory"; 
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max possible Symbol Value : too large"; 
-    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; 
-    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; 
-    case PREFIX(maxCode): 
-    default: return notErrorCode;   /* should be impossible, due to ERR_getError() */ 
-    } 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* ERROR_H_MODULE */ 
-/* 
-    zstd_internal - common functions to include 
-    Header File for include 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-*/ 
-#ifndef ZSTD_CCOMMON_H_MODULE 
-#define ZSTD_CCOMMON_H_MODULE 
- 
- 
- 
-/*-************************************* 
-*  Common macros 
-***************************************/ 
-#define MIN(a,b) ((a)<(b) ? (a) : (b)) 
-#define MAX(a,b) ((a)>(b) ? (a) : (b)) 
- 
- 
-/*-************************************* 
-*  Common constants 
-***************************************/ 
-#define ZSTDv05_DICT_MAGIC  0xEC30A435 
- 
-#define KB *(1 <<10) 
-#define MB *(1 <<20) 
-#define GB *(1U<<30) 
- 
-#define BLOCKSIZE (128 KB)                 /* define, for static allocation */ 
- 
-static const size_t ZSTDv05_blockHeaderSize = 3; 
-static const size_t ZSTDv05_frameHeaderSize_min = 5; 
-#define ZSTDv05_frameHeaderSize_max 5         /* define, for static allocation */ 
- 
-#define BITv057 128 
-#define BITv056  64 
-#define BITv055  32 
-#define BITv054  16 
-#define BITv051   2 
-#define BITv050   1 
- 
-#define IS_HUFv05 0 
-#define IS_PCH 1 
-#define IS_RAW 2 
-#define IS_RLE 3 
- 
-#define MINMATCH 4 
-#define REPCODE_STARTVALUE 1 
- 
-#define Litbits  8 
-#define MLbits   7 
-#define LLbits   6 
-#define Offbits  5 
-#define MaxLit ((1<<Litbits) - 1) 
-#define MaxML  ((1<<MLbits) - 1) 
-#define MaxLL  ((1<<LLbits) - 1) 
-#define MaxOff ((1<<Offbits)- 1) 
-#define MLFSEv05Log   10 
-#define LLFSEv05Log   10 
-#define OffFSEv05Log   9 
-#define MaxSeq MAX(MaxLL, MaxML) 
- 
-#define FSEv05_ENCODING_RAW     0 
-#define FSEv05_ENCODING_RLE     1 
-#define FSEv05_ENCODING_STATIC  2 
-#define FSEv05_ENCODING_DYNAMIC 3 
- 
- 
-#define HufLog 12 
- 
-#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ 
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */ 
- 
-#define WILDCOPY_OVERLENGTH 8 
- 
-typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 
- 
- 
-/*-******************************************* 
-*  Shared functions to include for inlining 
-*********************************************/ 
-static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 
- 
-#define COPY8(d,s) { ZSTDv05_copy8(d,s); d+=8; s+=8; } 
- 
-/*! ZSTDv05_wildcopy() : 
-*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ 
-MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, size_t length) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + length; 
-    do 
-        COPY8(op, ip) 
-    while (op < oend); 
-} 
- 
-MEM_STATIC unsigned ZSTDv05_highbit(U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse(&r, val); 
-    return (unsigned)r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */ 
-    return 31 - __builtin_clz(val); 
-#   else   /* Software version */ 
-    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    int r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
-/*-******************************************* 
-*  Private interfaces 
-*********************************************/ 
-typedef struct { 
-    void* buffer; 
-    U32*  offsetStart; 
-    U32*  offset; 
-    BYTE* offCodeStart; 
-    BYTE* offCode; 
-    BYTE* litStart; 
-    BYTE* lit; 
-    BYTE* litLengthStart; 
-    BYTE* litLength; 
-    BYTE* matchLengthStart; 
-    BYTE* matchLength; 
-    BYTE* dumpsStart; 
-    BYTE* dumps; 
-    /* opt */ 
-    U32* matchLengthFreq; 
-    U32* litLengthFreq; 
-    U32* litFreq; 
-    U32* offCodeFreq; 
-    U32  matchLengthSum; 
-    U32  litLengthSum; 
-    U32  litSum; 
-    U32  offCodeSum; 
-} seqStore_t; 
- 
- 
- 
-#endif   /* ZSTDv05_CCOMMON_H_MODULE */ 
-/* ****************************************************************** 
-   FSEv05 : Finite State Entropy coder 
-   header file 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef FSEv05_H 
-#define FSEv05_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* ***************************************** 
-*  Includes 
-******************************************/ 
-#include <stddef.h>    /* size_t, ptrdiff_t */ 
- 
- 
-/*-**************************************** 
-*  FSEv05 simple functions 
-******************************************/ 
-size_t FSEv05_decompress(void* dst,  size_t maxDstSize, 
-                const void* cSrc, size_t cSrcSize); 
-/*! 
-FSEv05_decompress(): 
-    Decompress FSEv05 data from buffer 'cSrc', of size 'cSrcSize', 
-    into already allocated destination buffer 'dst', of size 'maxDstSize'. 
-    return : size of regenerated data (<= maxDstSize) 
-             or an error code, which can be tested using FSEv05_isError() 
- 
-    ** Important ** : FSEv05_decompress() doesn't decompress non-compressible nor RLE data !!! 
-    Why ? : making this distinction requires a header. 
-    Header management is intentionally delegated to the user layer, which can better manage special cases. 
-*/ 
- 
- 
-/* ***************************************** 
-*  Tool functions 
-******************************************/ 
-/* Error Management */ 
-unsigned    FSEv05_isError(size_t code);        /* tells if a return value is an error code */ 
-const char* FSEv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */ 
- 
- 
- 
- 
-/* ***************************************** 
-*  FSEv05 detailed API 
-******************************************/ 
-/* *** DECOMPRESSION *** */ 
- 
-/*! 
-FSEv05_readNCount(): 
-   Read compactly saved 'normalizedCounter' from 'rBuffer'. 
-   return : size read from 'rBuffer' 
-            or an errorCode, which can be tested using FSEv05_isError() 
-            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ 
-size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); 
- 
-/*! 
-Constructor and Destructor of type FSEv05_DTable 
-    Note that its size depends on 'tableLog' */ 
-typedef unsigned FSEv05_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */ 
-FSEv05_DTable* FSEv05_createDTable(unsigned tableLog); 
-void        FSEv05_freeDTable(FSEv05_DTable* dt); 
- 
-/*! 
-FSEv05_buildDTable(): 
-   Builds 'dt', which must be already allocated, using FSEv05_createDTable() 
-   return : 0, 
-            or an errorCode, which can be tested using FSEv05_isError() */ 
-size_t FSEv05_buildDTable (FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); 
- 
-/*! 
-FSEv05_decompress_usingDTable(): 
-   Decompress compressed source @cSrc of size @cSrcSize using @dt 
-   into @dst which must be already allocated. 
-   return : size of regenerated data (necessarily <= @dstCapacity) 
-            or an errorCode, which can be tested using FSEv05_isError() */ 
-size_t FSEv05_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv05_DTable* dt); 
- 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* FSEv05_H */ 
-/* ****************************************************************** 
-   bitstream 
-   Part of FSEv05 library 
-   header file (to include) 
-   Copyright (C) 2013-2016, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-****************************************************************** */ 
-#ifndef BITv05STREAM_H_MODULE 
-#define BITv05STREAM_H_MODULE 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
-/* 
-*  This API consists of small unitary functions, which highly benefit from being inlined. 
-*  Since link-time-optimization is not available for all compilers, 
-*  these functions are defined into a .h to be included. 
-*/ 
- 
- 
- 
-/*-******************************************** 
-*  bitStream decoding API (read backward) 
-**********************************************/ 
-typedef struct 
-{ 
-    size_t   bitContainer; 
-    unsigned bitsConsumed; 
-    const char* ptr; 
-    const char* start; 
-} BITv05_DStream_t; 
- 
-typedef enum { BITv05_DStream_unfinished = 0, 
-               BITv05_DStream_endOfBuffer = 1, 
-               BITv05_DStream_completed = 2, 
-               BITv05_DStream_overflow = 3 } BITv05_DStream_status;  /* result of BITv05_reloadDStream() */ 
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ 
- 
-MEM_STATIC size_t   BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 
-MEM_STATIC size_t   BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits); 
-MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD); 
-MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* bitD); 
- 
- 
-/*! 
-* Start by invoking BITv05_initDStream(). 
-* A chunk of the bitStream is then stored into a local register. 
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 
-* You can then retrieve bitFields stored into the local register, **in reverse order**. 
-* Local register is explicitly reloaded from memory by the BITv05_reloadDStream() method. 
-* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BITv05_DStream_unfinished. 
-* Otherwise, it can be less than that, so proceed accordingly. 
-* Checking if DStream has reached its end can be performed with BITv05_endOfDStream() 
-*/ 
- 
- 
-/*-**************************************** 
-*  unsafe API 
-******************************************/ 
-MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits); 
-/* faster, but works only if nbBits >= 1 */ 
- 
- 
- 
-/*-************************************************************** 
-*  Helper functions 
-****************************************************************/ 
-MEM_STATIC unsigned BITv05_highbit32 (register U32 val) 
-{ 
-#   if defined(_MSC_VER)   /* Visual */ 
-    unsigned long r=0; 
-    _BitScanReverse ( &r, val ); 
-    return (unsigned) r; 
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */ 
-    return 31 - __builtin_clz (val); 
-#   else   /* Software version */ 
-    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 
-    U32 v = val; 
-    unsigned r; 
-    v |= v >> 1; 
-    v |= v >> 2; 
-    v |= v >> 4; 
-    v |= v >> 8; 
-    v |= v >> 16; 
-    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 
-    return r; 
-#   endif 
-} 
- 
- 
- 
-/*-******************************************************** 
-* bitStream decoding 
-**********************************************************/ 
-/*!BITv05_initDStream 
-*  Initialize a BITv05_DStream_t. 
-*  @bitD : a pointer to an already allocated BITv05_DStream_t structure 
-*  @srcBuffer must point at the beginning of a bitStream 
-*  @srcSize must be the exact size of the bitStream 
-*  @result : size of stream (== srcSize) or an errorCode if a problem is detected 
-*/ 
-MEM_STATIC size_t BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 
-{ 
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } 
- 
-    if (srcSize >=  sizeof(size_t)) {  /* normal case */ 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t); 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32); 
-    } else { 
-        U32 contain32; 
-        bitD->start = (const char*)srcBuffer; 
-        bitD->ptr   = bitD->start; 
-        bitD->bitContainer = *(const BYTE*)(bitD->start); 
-        switch(srcSize) 
-        { 
-            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 
-            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 
-            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 
-            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 
-            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 
-            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8; 
-            default:; 
-        } 
-        contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 
-        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */ 
-        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32); 
-        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 
-    } 
- 
-    return srcSize; 
-} 
- 
-/*!BITv05_lookBits 
- * Provides next n bits from local register 
- * local register is not modified (bits are still present for next read/look) 
- * On 32-bits, maxNbBits==25 
- * On 64-bits, maxNbBits==57 
- * @return : value extracted 
- */ 
-MEM_STATIC size_t BITv05_lookBits(BITv05_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 
-} 
- 
-/*! BITv05_lookBitsFast : 
-*   unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BITv05_lookBitsFast(BITv05_DStream_t* bitD, U32 nbBits) 
-{ 
-    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 
-    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 
-} 
- 
-MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits) 
-{ 
-    bitD->bitsConsumed += nbBits; 
-} 
- 
-/*!BITv05_readBits 
- * Read next n bits from local register. 
- * pay attention to not read more than nbBits contained into local register. 
- * @return : extracted value. 
- */ 
-MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BITv05_lookBits(bitD, nbBits); 
-    BITv05_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-/*!BITv05_readBitsFast : 
-*  unsafe version; only works only if nbBits >= 1 */ 
-MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits) 
-{ 
-    size_t value = BITv05_lookBitsFast(bitD, nbBits); 
-    BITv05_skipBits(bitD, nbBits); 
-    return value; 
-} 
- 
-MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD) 
-{ 
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */ 
-		return BITv05_DStream_overflow; 
- 
-    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { 
-        bitD->ptr -= bitD->bitsConsumed >> 3; 
-        bitD->bitsConsumed &= 7; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr); 
-        return BITv05_DStream_unfinished; 
-    } 
-    if (bitD->ptr == bitD->start) { 
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv05_DStream_endOfBuffer; 
-        return BITv05_DStream_completed; 
-    } 
-    { 
-        U32 nbBytes = bitD->bitsConsumed >> 3; 
-        BITv05_DStream_status result = BITv05_DStream_unfinished; 
-        if (bitD->ptr - nbBytes < bitD->start) { 
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */ 
-            result = BITv05_DStream_endOfBuffer; 
-        } 
-        bitD->ptr -= nbBytes; 
-        bitD->bitsConsumed -= nbBytes*8; 
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */ 
-        return result; 
-    } 
-} 
- 
-/*! BITv05_endOfDStream 
-*   @return Tells if DStream has reached its exact end 
-*/ 
-MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* DStream) 
-{ 
-    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 
-} 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* BITv05STREAM_H_MODULE */ 
-/* ****************************************************************** 
-   FSEv05 : Finite State Entropy coder 
-   header file for static linking (only) 
-   Copyright (C) 2013-2015, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-   - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
-#ifndef FSEv05_STATIC_H 
-#define FSEv05_STATIC_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
- 
-/* ***************************************** 
-*  Static allocation 
-*******************************************/ 
-/* It is possible to statically allocate FSEv05 CTable/DTable as a table of unsigned using below macros */ 
-#define FSEv05_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog)) 
- 
- 
-/* ***************************************** 
-*  FSEv05 advanced API 
-*******************************************/ 
-size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits); 
-/* build a fake FSEv05_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ 
- 
-size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, unsigned char symbolValue); 
-/* build a fake FSEv05_DTable, designed to always generate the same symbolValue */ 
- 
- 
- 
-/* ***************************************** 
-*  FSEv05 symbol decompression API 
-*******************************************/ 
-typedef struct 
-{ 
-    size_t      state; 
-    const void* table;   /* precise table may vary, depending on U16 */ 
-} FSEv05_DState_t; 
- 
- 
-static void     FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt); 
- 
-static unsigned char FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD); 
- 
-static unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr); 
- 
-/*! 
-Let's now decompose FSEv05_decompress_usingDTable() into its unitary components. 
-You will decode FSEv05-encoded symbols from the bitStream, 
-and also any other bitFields you put in, **in reverse order**. 
- 
-You will need a few variables to track your bitStream. They are : 
- 
-BITv05_DStream_t DStream;    // Stream context 
-FSEv05_DState_t  DState;     // State context. Multiple ones are possible 
-FSEv05_DTable*   DTablePtr;  // Decoding table, provided by FSEv05_buildDTable() 
- 
-The first thing to do is to init the bitStream. 
-    errorCode = BITv05_initDStream(&DStream, srcBuffer, srcSize); 
- 
-You should then retrieve your initial state(s) 
-(in reverse flushing order if you have several ones) : 
-    errorCode = FSEv05_initDState(&DState, &DStream, DTablePtr); 
- 
-You can then decode your data, symbol after symbol. 
-For information the maximum number of bits read by FSEv05_decodeSymbol() is 'tableLog'. 
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). 
-    unsigned char symbol = FSEv05_decodeSymbol(&DState, &DStream); 
- 
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) 
-Note : maximum allowed nbBits is 25, for 32-bits compatibility 
-    size_t bitField = BITv05_readBits(&DStream, nbBits); 
- 
-All above operations only read from local register (which size depends on size_t). 
-Refueling the register from memory is manually performed by the reload method. 
-    endSignal = FSEv05_reloadDStream(&DStream); 
- 
-BITv05_reloadDStream() result tells if there is still some more data to read from DStream. 
-BITv05_DStream_unfinished : there is still some data left into the DStream. 
-BITv05_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. 
-BITv05_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. 
-BITv05_DStream_tooFar : Dstream went too far. Decompression result is corrupted. 
- 
-When reaching end of buffer (BITv05_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, 
-to properly detect the exact end of stream. 
-After each decoded symbol, check if DStream is fully consumed using this simple test : 
-    BITv05_reloadDStream(&DStream) >= BITv05_DStream_completed 
- 
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. 
-Checking if DStream has reached its end is performed by : 
-    BITv05_endOfDStream(&DStream); 
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. 
-    FSEv05_endOfDState(&DState); 
-*/ 
- 
- 
-/* ***************************************** 
-*  FSEv05 unsafe API 
-*******************************************/ 
-static unsigned char FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD); 
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ 
- 
- 
-/* ***************************************** 
-*  Implementation of inlined functions 
-*******************************************/ 
-/* decompression */ 
- 
-typedef struct { 
-    U16 tableLog; 
-    U16 fastMode; 
-} FSEv05_DTableHeader;   /* sizeof U32 */ 
- 
-typedef struct 
-{ 
-    unsigned short newState; 
-    unsigned char  symbol; 
-    unsigned char  nbBits; 
-} FSEv05_decode_t;   /* size == U32 */ 
- 
-MEM_STATIC void FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt) 
-{ 
-    const void* ptr = dt; 
-    const FSEv05_DTableHeader* const DTableH = (const FSEv05_DTableHeader*)ptr; 
-    DStatePtr->state = BITv05_readBits(bitD, DTableH->tableLog); 
-    BITv05_reloadDStream(bitD); 
-    DStatePtr->table = dt + 1; 
-} 
- 
-MEM_STATIC size_t FSEv05_getStateValue(FSEv05_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state; 
-} 
- 
-MEM_STATIC BYTE FSEv05_peakSymbol(FSEv05_DState_t* DStatePtr) 
-{ 
-    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    return DInfo.symbol; 
-} 
- 
-MEM_STATIC BYTE FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD) 
-{ 
-    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32  nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BITv05_readBits(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC BYTE FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD) 
-{ 
-    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state]; 
-    const U32 nbBits = DInfo.nbBits; 
-    BYTE symbol = DInfo.symbol; 
-    size_t lowBits = BITv05_readBitsFast(bitD, nbBits); 
- 
-    DStatePtr->state = DInfo.newState + lowBits; 
-    return symbol; 
-} 
- 
-MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr) 
-{ 
-    return DStatePtr->state == 0; 
-} 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* FSEv05_STATIC_H */ 
-/* ****************************************************************** 
-   FSEv05 : Finite State Entropy coder 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-#ifndef FSEv05_COMMONDEFS_ONLY 
- 
-/* ************************************************************** 
-*  Tuning parameters 
-****************************************************************/ 
-/*!MEMORY_USAGE : 
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 
-*  Increasing memory usage improves compression ratio 
-*  Reduced memory usage can improve speed, due to cache effect 
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 
-#define FSEv05_MAX_MEMORY_USAGE 14 
-#define FSEv05_DEFAULT_MEMORY_USAGE 13 
- 
-/*!FSEv05_MAX_SYMBOL_VALUE : 
-*  Maximum symbol value authorized. 
-*  Required for proper stack allocation */ 
-#define FSEv05_MAX_SYMBOL_VALUE 255 
- 
- 
-/* ************************************************************** 
-*  template functions type & suffix 
-****************************************************************/ 
-#define FSEv05_FUNCTION_TYPE BYTE 
-#define FSEv05_FUNCTION_EXTENSION 
-#define FSEv05_DECODE_TYPE FSEv05_decode_t 
- 
- 
-#endif   /* !FSEv05_COMMONDEFS_ONLY */ 
- 
-/* ************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */ 
-#else 
-#  ifdef __GNUC__ 
-#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
- 
- 
-/* *************************************************************** 
-*  Constants 
-*****************************************************************/ 
-#define FSEv05_MAX_TABLELOG  (FSEv05_MAX_MEMORY_USAGE-2) 
-#define FSEv05_MAX_TABLESIZE (1U<<FSEv05_MAX_TABLELOG) 
-#define FSEv05_MAXTABLESIZE_MASK (FSEv05_MAX_TABLESIZE-1) 
-#define FSEv05_DEFAULT_TABLELOG (FSEv05_DEFAULT_MEMORY_USAGE-2) 
-#define FSEv05_MIN_TABLELOG 5 
- 
-#define FSEv05_TABLELOG_ABSOLUTE_MAX 15 
-#if FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX 
-#error "FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX is not supported" 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Error Management 
-****************************************************************/ 
-#define FSEv05_STATIC_ASSERT(c) { enum { FSEv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/* ************************************************************** 
-*  Complex types 
-****************************************************************/ 
-typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)]; 
- 
- 
-/* ************************************************************** 
-*  Templates 
-****************************************************************/ 
-/* 
-  designed to be included 
-  for type-specific functions (template emulation in C) 
-  Objective is to write these functions only once, for improved maintenance 
-*/ 
- 
-/* safety checks */ 
-#ifndef FSEv05_FUNCTION_EXTENSION 
-#  error "FSEv05_FUNCTION_EXTENSION must be defined" 
-#endif 
-#ifndef FSEv05_FUNCTION_TYPE 
-#  error "FSEv05_FUNCTION_TYPE must be defined" 
-#endif 
- 
-/* Function names */ 
-#define FSEv05_CAT(X,Y) X##Y 
-#define FSEv05_FUNCTION_NAME(X,Y) FSEv05_CAT(X,Y) 
-#define FSEv05_TYPE_NAME(X,Y) FSEv05_CAT(X,Y) 
- 
- 
-/* Function templates */ 
-static U32 FSEv05_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 
- 
- 
- 
-FSEv05_DTable* FSEv05_createDTable (unsigned tableLog) 
-{ 
-    if (tableLog > FSEv05_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv05_TABLELOG_ABSOLUTE_MAX; 
-    return (FSEv05_DTable*)malloc( FSEv05_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); 
-} 
- 
-void FSEv05_freeDTable (FSEv05_DTable* dt) 
-{ 
-    free(dt); 
-} 
- 
-size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 
-{ 
-    FSEv05_DTableHeader DTableH; 
-    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */ 
-    FSEv05_DECODE_TYPE* const tableDecode = (FSEv05_DECODE_TYPE*) (tdPtr); 
-    const U32 tableSize = 1 << tableLog; 
-    const U32 tableMask = tableSize-1; 
-    const U32 step = FSEv05_tableStep(tableSize); 
-    U16 symbolNext[FSEv05_MAX_SYMBOL_VALUE+1]; 
-    U32 position = 0; 
-    U32 highThreshold = tableSize-1; 
-    const S16 largeLimit= (S16)(1 << (tableLog-1)); 
-    U32 noLarge = 1; 
-    U32 s; 
- 
-    /* Sanity Checks */ 
-    if (maxSymbolValue > FSEv05_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); 
-    if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 
- 
-    /* Init, lay down lowprob symbols */ 
-    DTableH.tableLog = (U16)tableLog; 
-    for (s=0; s<=maxSymbolValue; s++) { 
-        if (normalizedCounter[s]==-1) { 
-            tableDecode[highThreshold--].symbol = (FSEv05_FUNCTION_TYPE)s; 
-            symbolNext[s] = 1; 
-        } else { 
-            if (normalizedCounter[s] >= largeLimit) noLarge=0; 
-            symbolNext[s] = normalizedCounter[s]; 
-    }   } 
- 
-    /* Spread symbols */ 
-    for (s=0; s<=maxSymbolValue; s++) { 
-        int i; 
-        for (i=0; i<normalizedCounter[s]; i++) { 
-            tableDecode[position].symbol = (FSEv05_FUNCTION_TYPE)s; 
-            position = (position + step) & tableMask; 
-            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */ 
-    }   } 
- 
-    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 
- 
-    /* Build Decoding table */ 
-    { 
-        U32 i; 
-        for (i=0; i<tableSize; i++) { 
-            FSEv05_FUNCTION_TYPE symbol = (FSEv05_FUNCTION_TYPE)(tableDecode[i].symbol); 
-            U16 nextState = symbolNext[symbol]++; 
-            tableDecode[i].nbBits = (BYTE) (tableLog - BITv05_highbit32 ((U32)nextState) ); 
-            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 
-    }   } 
- 
-    DTableH.fastMode = (U16)noLarge; 
-    memcpy(dt, &DTableH, sizeof(DTableH)); 
-    return 0; 
-} 
- 
- 
-#ifndef FSEv05_COMMONDEFS_ONLY 
-/*-**************************************** 
-*  FSEv05 helper functions 
-******************************************/ 
-unsigned FSEv05_isError(size_t code) { return ERR_isError(code); } 
- 
-const char* FSEv05_getErrorName(size_t code) { return ERR_getErrorName(code); } 
- 
- 
-/*-************************************************************** 
-*  FSEv05 NCount encoding-decoding 
-****************************************************************/ 
-static short FSEv05_abs(short a) { return a<0 ? -a : a; } 
- 
- 
-size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 
-                 const void* headerBuffer, size_t hbSize) 
-{ 
-    const BYTE* const istart = (const BYTE*) headerBuffer; 
-    const BYTE* const iend = istart + hbSize; 
-    const BYTE* ip = istart; 
-    int nbBits; 
-    int remaining; 
-    int threshold; 
-    U32 bitStream; 
-    int bitCount; 
-    unsigned charnum = 0; 
-    int previous0 = 0; 
- 
-    if (hbSize < 4) return ERROR(srcSize_wrong); 
-    bitStream = MEM_readLE32(ip); 
-    nbBits = (bitStream & 0xF) + FSEv05_MIN_TABLELOG;   /* extract tableLog */ 
-    if (nbBits > FSEv05_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 
-    bitStream >>= 4; 
-    bitCount = 4; 
-    *tableLogPtr = nbBits; 
-    remaining = (1<<nbBits)+1; 
-    threshold = 1<<nbBits; 
-    nbBits++; 
- 
-    while ((remaining>1) && (charnum<=*maxSVPtr)) { 
-        if (previous0) { 
-            unsigned n0 = charnum; 
-            while ((bitStream & 0xFFFF) == 0xFFFF) { 
-                n0+=24; 
-                if (ip < iend-5) { 
-                    ip+=2; 
-                    bitStream = MEM_readLE32(ip) >> bitCount; 
-                } else { 
-                    bitStream >>= 16; 
-                    bitCount+=16; 
-            }   } 
-            while ((bitStream & 3) == 3) { 
-                n0+=3; 
-                bitStream>>=2; 
-                bitCount+=2; 
-            } 
-            n0 += bitStream & 3; 
-            bitCount += 2; 
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 
-            while (charnum < n0) normalizedCounter[charnum++] = 0; 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 
-                ip += bitCount>>3; 
-                bitCount &= 7; 
-                bitStream = MEM_readLE32(ip) >> bitCount; 
-            } 
-            else 
-                bitStream >>= 2; 
-        } 
-        { 
-            const short max = (short)((2*threshold-1)-remaining); 
-            short count; 
- 
-            if ((bitStream & (threshold-1)) < (U32)max) { 
-                count = (short)(bitStream & (threshold-1)); 
-                bitCount   += nbBits-1; 
-            } else { 
-                count = (short)(bitStream & (2*threshold-1)); 
-                if (count >= threshold) count -= max; 
-                bitCount   += nbBits; 
-            } 
- 
-            count--;   /* extra accuracy */ 
-            remaining -= FSEv05_abs(count); 
-            normalizedCounter[charnum++] = count; 
-            previous0 = !count; 
-            while (remaining < threshold) { 
-                nbBits--; 
-                threshold >>= 1; 
-            } 
- 
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 
-                ip += bitCount>>3; 
-                bitCount &= 7; 
-            } else { 
-                bitCount -= (int)(8 * (iend - 4 - ip)); 
-                ip = iend - 4; 
-            } 
-            bitStream = MEM_readLE32(ip) >> (bitCount & 31); 
-    }   } 
-    if (remaining != 1) return ERROR(GENERIC); 
-    *maxSVPtr = charnum-1; 
- 
-    ip += (bitCount+7)>>3; 
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); 
-    return ip-istart; 
-} 
- 
- 
- 
-/*-******************************************************* 
-*  Decompression (Byte symbols) 
-*********************************************************/ 
-size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, BYTE symbolValue) 
-{ 
-    void* ptr = dt; 
-    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr; 
-    void* dPtr = dt + 1; 
-    FSEv05_decode_t* const cell = (FSEv05_decode_t*)dPtr; 
- 
-    DTableH->tableLog = 0; 
-    DTableH->fastMode = 0; 
- 
-    cell->newState = 0; 
-    cell->symbol = symbolValue; 
-    cell->nbBits = 0; 
- 
-    return 0; 
-} 
- 
- 
-size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits) 
-{ 
-    void* ptr = dt; 
-    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr; 
-    void* dPtr = dt + 1; 
-    FSEv05_decode_t* const dinfo = (FSEv05_decode_t*)dPtr; 
-    const unsigned tableSize = 1 << nbBits; 
-    const unsigned tableMask = tableSize - 1; 
-    const unsigned maxSymbolValue = tableMask; 
-    unsigned s; 
- 
-    /* Sanity checks */ 
-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */ 
- 
-    /* Build Decoding Table */ 
-    DTableH->tableLog = (U16)nbBits; 
-    DTableH->fastMode = 1; 
-    for (s=0; s<=maxSymbolValue; s++) { 
-        dinfo[s].newState = 0; 
-        dinfo[s].symbol = (BYTE)s; 
-        dinfo[s].nbBits = (BYTE)nbBits; 
-    } 
- 
-    return 0; 
-} 
- 
-FORCE_INLINE size_t FSEv05_decompress_usingDTable_generic( 
-          void* dst, size_t maxDstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const FSEv05_DTable* dt, const unsigned fast) 
-{ 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* op = ostart; 
-    BYTE* const omax = op + maxDstSize; 
-    BYTE* const olimit = omax-3; 
- 
-    BITv05_DStream_t bitD; 
-    FSEv05_DState_t state1; 
-    FSEv05_DState_t state2; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */ 
-    if (FSEv05_isError(errorCode)) return errorCode; 
- 
-    FSEv05_initDState(&state1, &bitD, dt); 
-    FSEv05_initDState(&state2, &bitD, dt); 
- 
-#define FSEv05_GETSYMBOL(statePtr) fast ? FSEv05_decodeSymbolFast(statePtr, &bitD) : FSEv05_decodeSymbol(statePtr, &bitD) 
- 
-    /* 4 symbols per loop */ 
-    for ( ; (BITv05_reloadDStream(&bitD)==BITv05_DStream_unfinished) && (op<olimit) ; op+=4) { 
-        op[0] = FSEv05_GETSYMBOL(&state1); 
- 
-        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BITv05_reloadDStream(&bitD); 
- 
-        op[1] = FSEv05_GETSYMBOL(&state2); 
- 
-        if (FSEv05_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            { if (BITv05_reloadDStream(&bitD) > BITv05_DStream_unfinished) { op+=2; break; } } 
- 
-        op[2] = FSEv05_GETSYMBOL(&state1); 
- 
-        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */ 
-            BITv05_reloadDStream(&bitD); 
- 
-        op[3] = FSEv05_GETSYMBOL(&state2); 
-    } 
- 
-    /* tail */ 
-    /* note : BITv05_reloadDStream(&bitD) >= FSEv05_DStream_partiallyFilled; Ends at exactly BITv05_DStream_completed */ 
-    while (1) { 
-        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state1))) ) 
-            break; 
- 
-        *op++ = FSEv05_GETSYMBOL(&state1); 
- 
-        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state2))) ) 
-            break; 
- 
-        *op++ = FSEv05_GETSYMBOL(&state2); 
-    } 
- 
-    /* end ? */ 
-    if (BITv05_endOfDStream(&bitD) && FSEv05_endOfDState(&state1) && FSEv05_endOfDState(&state2)) 
-        return op-ostart; 
- 
-    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */ 
- 
-    return ERROR(corruption_detected); 
-} 
- 
- 
-size_t FSEv05_decompress_usingDTable(void* dst, size_t originalSize, 
-                            const void* cSrc, size_t cSrcSize, 
-                            const FSEv05_DTable* dt) 
-{ 
-    const void* ptr = dt; 
-    const FSEv05_DTableHeader* DTableH = (const FSEv05_DTableHeader*)ptr; 
-    const U32 fastMode = DTableH->fastMode; 
- 
-    /* select fast mode (static) */ 
-    if (fastMode) return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 
-    return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 
-} 
- 
- 
-size_t FSEv05_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    const BYTE* const istart = (const BYTE*)cSrc; 
-    const BYTE* ip = istart; 
-    short counting[FSEv05_MAX_SYMBOL_VALUE+1]; 
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */ 
-    unsigned tableLog; 
-    unsigned maxSymbolValue = FSEv05_MAX_SYMBOL_VALUE; 
-    size_t errorCode; 
- 
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */ 
- 
-    /* normal FSEv05 decoding mode */ 
-    errorCode = FSEv05_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 
-    if (FSEv05_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */ 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    errorCode = FSEv05_buildDTable (dt, counting, maxSymbolValue, tableLog); 
-    if (FSEv05_isError(errorCode)) return errorCode; 
- 
-    /* always return, even if it is an error code */ 
-    return FSEv05_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 
-} 
- 
- 
- 
-#endif   /* FSEv05_COMMONDEFS_ONLY */ 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   header file 
-   Copyright (C) 2013-2016, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-****************************************************************** */ 
-#ifndef HUFF0_H 
-#define HUFF0_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
- 
-/* **************************************** 
-*  Huff0 simple functions 
-******************************************/ 
-size_t HUFv05_decompress(void* dst,  size_t dstSize, 
-                const void* cSrc, size_t cSrcSize); 
-/*! 
-HUFv05_decompress(): 
-    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize', 
-    into already allocated destination buffer 'dst', of size 'dstSize'. 
-    @dstSize : must be the **exact** size of original (uncompressed) data. 
-    Note : in contrast with FSEv05, HUFv05_decompress can regenerate 
-           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, 
-           because it knows size to regenerate. 
-    @return : size of regenerated data (== dstSize) 
-              or an error code, which can be tested using HUFv05_isError() 
-*/ 
- 
- 
-/* **************************************** 
-*  Tool functions 
-******************************************/ 
-/* Error Management */ 
-unsigned    HUFv05_isError(size_t code);        /* tells if a return value is an error code */ 
-const char* HUFv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */ 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif   /* HUF0_H */ 
-/* ****************************************************************** 
-   Huff0 : Huffman codec, part of New Generation Entropy library 
-   header file, for static linking only 
-   Copyright (C) 2013-2016, Yann Collet 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-   You can contact the author at : 
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-****************************************************************** */ 
-#ifndef HUF0_STATIC_H 
-#define HUF0_STATIC_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
- 
- 
-/* **************************************** 
-*  Static allocation 
-******************************************/ 
-/* static allocation of Huff0's DTable */ 
-#define HUFv05_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog)) 
-#define HUFv05_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ 
-        unsigned short DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUFv05_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ 
-        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog } 
-#define HUFv05_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \ 
-        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog } 
- 
- 
-/* **************************************** 
-*  Advanced decompression functions 
-******************************************/ 
-size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */ 
-size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */ 
- 
- 
-/* **************************************** 
-*  Huff0 detailed API 
-******************************************/ 
-/*! 
-HUFv05_decompress() does the following: 
-1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics 
-2. build Huffman table from save, using HUFv05_readDTableXn() 
-3. decode 1 or 4 segments in parallel using HUFv05_decompressSXn_usingDTable 
-*/ 
-size_t HUFv05_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize); 
-size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize); 
- 
-size_t HUFv05_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable); 
-size_t HUFv05_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); 
- 
- 
-/* single stream variants */ 
- 
-size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */ 
-size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */ 
- 
-size_t HUFv05_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable); 
-size_t HUFv05_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); 
- 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif /* HUF0_STATIC_H */ 
-/* ****************************************************************** 
-   Huff0 : Huffman coder, part of New Generation Entropy library 
-   Copyright (C) 2013-2015, Yann Collet. 
- 
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-   Redistribution and use in source and binary forms, with or without 
-   modification, are permitted provided that the following conditions are 
-   met: 
- 
-       * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer. 
-       * Redistributions in binary form must reproduce the above 
-   copyright notice, this list of conditions and the following disclaimer 
-   in the documentation and/or other materials provided with the 
-   distribution. 
- 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - FSEv05+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy 
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c 
-****************************************************************** */ 
- 
-/* ************************************************************** 
-*  Compiler specifics 
-****************************************************************/ 
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 
-/* inline is defined */ 
-#elif defined(_MSC_VER) 
-#  define inline __inline 
-#else 
-#  define inline /* disable inline */ 
-#endif 
- 
- 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#else 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Includes 
-****************************************************************/ 
-#include <stdlib.h>     /* malloc, free, qsort */ 
-#include <string.h>     /* memcpy, memset */ 
-#include <stdio.h>      /* printf (debug) */ 
- 
- 
-/* ************************************************************** 
-*  Constants 
-****************************************************************/ 
-#define HUFv05_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUFv05_MAX_TABLELOG. Beyond that value, code does not work */ 
-#define HUFv05_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUFv05_ABSOLUTEMAX_TABLELOG */ 
-#define HUFv05_DEFAULT_TABLELOG  HUFv05_MAX_TABLELOG   /* tableLog by default, when not specified */ 
-#define HUFv05_MAX_SYMBOL_VALUE 255 
-#if (HUFv05_MAX_TABLELOG > HUFv05_ABSOLUTEMAX_TABLELOG) 
-#  error "HUFv05_MAX_TABLELOG is too large !" 
-#endif 
- 
- 
-/* ************************************************************** 
-*  Error Management 
-****************************************************************/ 
-unsigned HUFv05_isError(size_t code) { return ERR_isError(code); } 
-const char* HUFv05_getErrorName(size_t code) { return ERR_getErrorName(code); } 
-#define HUFv05_STATIC_ASSERT(c) { enum { HUFv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */ 
- 
- 
-/* ******************************************************* 
-*  Huff0 : Huffman block decompression 
-*********************************************************/ 
-typedef struct { BYTE byte; BYTE nbBits; } HUFv05_DEltX2;   /* single-symbol decoding */ 
- 
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv05_DEltX4;  /* double-symbols decoding */ 
- 
-typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 
- 
-/*! HUFv05_readStats 
-    Read compact Huffman tree, saved by HUFv05_writeCTable 
-    @huffWeight : destination buffer 
-    @return : size read from `src` 
-*/ 
-static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 
-                            U32* nbSymbolsPtr, U32* tableLogPtr, 
-                            const void* src, size_t srcSize) 
-{ 
-    U32 weightTotal; 
-    U32 tableLog; 
-    const BYTE* ip = (const BYTE*) src; 
-    size_t iSize = ip[0]; 
-    size_t oSize; 
-    U32 n; 
- 
-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */ 
- 
-    if (iSize >= 128)  { /* special header */ 
-        if (iSize >= (242)) {  /* RLE */ 
-            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 
-            oSize = l[iSize-242]; 
-            memset(huffWeight, 1, hwSize); 
-            iSize = 0; 
-        } 
-        else {   /* Incompressible */ 
-            oSize = iSize - 127; 
-            iSize = ((oSize+1)/2); 
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-            if (oSize >= hwSize) return ERROR(corruption_detected); 
-            ip += 1; 
-            for (n=0; n<oSize; n+=2) { 
-                huffWeight[n]   = ip[n/2] >> 4; 
-                huffWeight[n+1] = ip[n/2] & 15; 
-    }   }   } 
-    else  {   /* header compressed with FSEv05 (normal case) */ 
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 
-        oSize = FSEv05_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */ 
-        if (FSEv05_isError(oSize)) return oSize; 
-    } 
- 
-    /* collect weight stats */ 
-    memset(rankStats, 0, (HUFv05_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); 
-    weightTotal = 0; 
-    for (n=0; n<oSize; n++) { 
-        if (huffWeight[n] >= HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-        rankStats[huffWeight[n]]++; 
-        weightTotal += (1 << huffWeight[n]) >> 1; 
-    } 
- 
-    /* get last non-null symbol weight (implied, total must be 2^n) */ 
-    tableLog = BITv05_highbit32(weightTotal) + 1; 
-    if (tableLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); 
-    {   /* determine last weight */ 
-        U32 total = 1 << tableLog; 
-        U32 rest = total - weightTotal; 
-        U32 verif = 1 << BITv05_highbit32(rest); 
-        U32 lastWeight = BITv05_highbit32(rest) + 1; 
-        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */ 
-        huffWeight[oSize] = (BYTE)lastWeight; 
-        rankStats[lastWeight]++; 
-    } 
- 
-    /* check tree construction validity */ 
-    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */ 
- 
-    /* results */ 
-    *nbSymbolsPtr = (U32)(oSize+1); 
-    *tableLogPtr = tableLog; 
-    return iSize+1; 
-} 
- 
- 
-/*-***************************/ 
-/*  single-symbol decoding   */ 
-/*-***************************/ 
- 
-size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE huffWeight[HUFv05_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */ 
-    U32 tableLog = 0; 
-    size_t iSize; 
-    U32 nbSymbols = 0; 
-    U32 n; 
-    U32 nextRankStart; 
-    void* const dtPtr = DTable + 1; 
-    HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr; 
- 
-    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */ 
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUFv05_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */ 
-    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */ 
- 
-    /* Prepare ranks */ 
-    nextRankStart = 0; 
-    for (n=1; n<=tableLog; n++) { 
-        U32 current = nextRankStart; 
-        nextRankStart += (rankVal[n] << (n-1)); 
-        rankVal[n] = current; 
-    } 
- 
-    /* fill DTable */ 
-    for (n=0; n<nbSymbols; n++) { 
-        const U32 w = huffWeight[n]; 
-        const U32 length = (1 << w) >> 1; 
-        U32 i; 
-        HUFv05_DEltX2 D; 
-        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 
-        for (i = rankVal[w]; i < rankVal[w] + length; i++) 
-            dt[i] = D; 
-        rankVal[w] += length; 
-    } 
- 
-    return iSize; 
-} 
- 
-static BYTE HUFv05_decodeSymbolX2(BITv05_DStream_t* Dstream, const HUFv05_DEltX2* dt, const U32 dtLog) 
-{ 
-        const size_t val = BITv05_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 
-        const BYTE c = dt[val].byte; 
-        BITv05_skipBits(Dstream, dt[val].nbBits); 
-        return c; 
-} 
- 
-#define HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 
-    *ptr++ = HUFv05_decodeSymbolX2(DStreamPtr, dt, dtLog) 
- 
-#define HUFv05_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \ 
-        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-#define HUFv05_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) 
- 
-static inline size_t HUFv05_decodeStreamX2(BYTE* p, BITv05_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv05_DEltX2* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 4 symbols at a time */ 
-    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-4)) { 
-        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUFv05_DECODE_SYMBOLX2_1(p, bitDPtr); 
-        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr); 
-        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd)) 
-        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    /* no more data to retrieve from bitstream, hence no need to reload */ 
-    while (p < pEnd) 
-        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr); 
- 
-    return pEnd-pStart; 
-} 
- 
-size_t HUFv05_decompress1X2_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U16* DTable) 
-{ 
-    BYTE* op = (BYTE*)dst; 
-    BYTE* const oend = op + dstSize; 
-    size_t errorCode; 
-    const U32 dtLog = DTable[0]; 
-    const void* dtPtr = DTable; 
-    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1; 
-    BITv05_DStream_t bitD; 
-    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
- 
-    HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog); 
- 
-    /* check */ 
-    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected); 
- 
-    return dstSize; 
-} 
- 
-size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
-    size_t errorCode; 
- 
-    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    return HUFv05_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-size_t HUFv05_decompress4X2_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U16* DTable) 
-{ 
-    const BYTE* const istart = (const BYTE*) cSrc; 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* const oend = ostart + dstSize; 
-    const void* const dtPtr = DTable; 
-    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1; 
-    const U32 dtLog = DTable[0]; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    BITv05_DStream_t bitD1; 
-    BITv05_DStream_t bitD2; 
-    BITv05_DStream_t bitD3; 
-    BITv05_DStream_t bitD4; 
-    const size_t length1 = MEM_readLE16(istart); 
-    const size_t length2 = MEM_readLE16(istart+2); 
-    const size_t length3 = MEM_readLE16(istart+4); 
-    size_t length4; 
-    const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-    const BYTE* const istart2 = istart1 + length1; 
-    const BYTE* const istart3 = istart2 + length2; 
-    const BYTE* const istart4 = istart3 + length3; 
-    const size_t segmentSize = (dstSize+3) / 4; 
-    BYTE* const opStart2 = ostart + segmentSize; 
-    BYTE* const opStart3 = opStart2 + segmentSize; 
-    BYTE* const opStart4 = opStart3 + segmentSize; 
-    BYTE* op1 = ostart; 
-    BYTE* op2 = opStart2; 
-    BYTE* op3 = opStart3; 
-    BYTE* op4 = opStart4; 
-    U32 endSignal; 
- 
+/* ******************************************************************
+   zstd_v05.c
+   Decompression module for ZSTD v0.5 legacy format
+   Copyright (C) 2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - Homepage : http://www.zstd.net/
+****************************************************************** */
+
+/*- Dependencies -*/
+#include "zstd_v05.h"
+
+
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write32(memPtr, val32);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write64(memPtr, val64);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
+/* ******************************************************************
+   Error codes list
+   Copyright (C) 2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+#ifndef ERROR_PUBLIC_H_MODULE
+#define ERROR_PUBLIC_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  error codes list
+******************************************/
+typedef enum {
+  ZSTDv05_error_no_error,
+  ZSTDv05_error_GENERIC,
+  ZSTDv05_error_prefix_unknown,
+  ZSTDv05_error_frameParameter_unsupported,
+  ZSTDv05_error_frameParameter_unsupportedBy32bits,
+  ZSTDv05_error_init_missing,
+  ZSTDv05_error_memory_allocation,
+  ZSTDv05_error_stage_wrong,
+  ZSTDv05_error_dstSize_tooSmall,
+  ZSTDv05_error_srcSize_wrong,
+  ZSTDv05_error_corruption_detected,
+  ZSTDv05_error_tableLog_tooLarge,
+  ZSTDv05_error_maxSymbolValue_tooLarge,
+  ZSTDv05_error_maxSymbolValue_tooSmall,
+  ZSTDv05_error_dictionary_corrupted,
+  ZSTDv05_error_maxCode
+} ZSTDv05_ErrorCode;
+
+/* note : functions provide error codes in reverse negative order,
+          so compare with (size_t)(0-enum) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_PUBLIC_H_MODULE */
+
+
+/*
+    zstd - standard compression library
+    Header File for static linking only
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net
+*/
+#ifndef ZSTD_STATIC_H
+#define ZSTD_STATIC_H
+
+/* The prototypes defined within this file are considered experimental.
+ * They should not be used in the context DLL as they may change in the future.
+ * Prefer static linking if you need them, to control breaking version changes issues.
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/*-*************************************
+*  Types
+***************************************/
+#define ZSTDv05_WINDOWLOG_ABSOLUTEMIN 11
+
+
+/*-*************************************
+*  Advanced functions
+***************************************/
+/*- Advanced Decompression functions -*/
+
+/*! ZSTDv05_decompress_usingPreparedDCtx() :
+*   Same as ZSTDv05_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded.
+*   It avoids reloading the dictionary each time.
+*   `preparedDCtx` must have been properly initialized using ZSTDv05_decompressBegin_usingDict().
+*   Requires 2 contexts : 1 for reference, which will not be modified, and 1 to run the decompression operation */
+size_t ZSTDv05_decompress_usingPreparedDCtx(
+                                             ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize);
+
+
+/* **************************************
+*  Streaming functions (direct mode)
+****************************************/
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* preparedDCtx);
+
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
+
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/*
+  Streaming decompression, direct mode (bufferless)
+
+  A ZSTDv05_DCtx object is required to track streaming operations.
+  Use ZSTDv05_createDCtx() / ZSTDv05_freeDCtx() to manage it.
+  A ZSTDv05_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTDv05_getFrameParams().
+  This operation is independent, and just needs enough input data to properly decode the frame header.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Result : 0 when successful, it means the ZSTDv05_parameters structure has been filled.
+           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
+           errorCode, which can be tested using ZSTDv05_isError()
+
+  Start decompression, with ZSTDv05_decompressBegin() or ZSTDv05_decompressBegin_usingDict()
+  Alternatively, you can copy a prepared context, using ZSTDv05_copyDCtx()
+
+  Then use ZSTDv05_nextSrcSizeToDecompress() and ZSTDv05_decompressContinue() alternatively.
+  ZSTDv05_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTDv05_decompressContinue().
+  ZSTDv05_decompressContinue() requires this exact amount of bytes, or it will fail.
+  ZSTDv05_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
+  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
+
+  @result of ZSTDv05_decompressContinue() is the number of bytes regenerated within 'dst'.
+  It can be zero, which is not an error; it just means ZSTDv05_decompressContinue() has decoded some header.
+
+  A frame is fully decoded when ZSTDv05_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+*/
+
+
+/* **************************************
+*  Block functions
+****************************************/
+/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    User will have to take in charge required information to regenerate data, such as block sizes.
+
+    A few rules to respect :
+    - Uncompressed block size must be <= 128 KB
+    - Compressing or decompressing requires a context structure
+      + Use ZSTDv05_createCCtx() and ZSTDv05_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTDv05_compressBegin()
+      + decompression : ZSTDv05_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - When a block is considered not compressible enough, ZSTDv05_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTDv05_decompressBlock() doesn't accept uncompressed data as input !!
+*/
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv05_STATIC_H */
+
+
+
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/zstd
+****************************************************************** */
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization
+******************************************/
+typedef ZSTDv05_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTDv05_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#ifdef ERROR
+#  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#endif
+#define ERROR(name) (size_t)-PREFIX(name)
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getError(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    static const char* notErrorCode = "Unspecified error code";
+    switch( ERR_getError(code) )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max possible Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(maxCode):
+    default: return notErrorCode;   /* should be impossible, due to ERR_getError() */
+    }
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
+/*
+    zstd_internal - common functions to include
+    Header File for include
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+
+
+/*-*************************************
+*  Common macros
+***************************************/
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTDv05_DICT_MAGIC  0xEC30A435
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BLOCKSIZE (128 KB)                 /* define, for static allocation */
+
+static const size_t ZSTDv05_blockHeaderSize = 3;
+static const size_t ZSTDv05_frameHeaderSize_min = 5;
+#define ZSTDv05_frameHeaderSize_max 5         /* define, for static allocation */
+
+#define BITv057 128
+#define BITv056  64
+#define BITv055  32
+#define BITv054  16
+#define BITv051   2
+#define BITv050   1
+
+#define IS_HUFv05 0
+#define IS_PCH 1
+#define IS_RAW 2
+#define IS_RLE 3
+
+#define MINMATCH 4
+#define REPCODE_STARTVALUE 1
+
+#define Litbits  8
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML  ((1<<MLbits) - 1)
+#define MaxLL  ((1<<LLbits) - 1)
+#define MaxOff ((1<<Offbits)- 1)
+#define MLFSEv05Log   10
+#define LLFSEv05Log   10
+#define OffFSEv05Log   9
+#define MaxSeq MAX(MaxLL, MaxML)
+
+#define FSEv05_ENCODING_RAW     0
+#define FSEv05_ENCODING_RLE     1
+#define FSEv05_ENCODING_STATIC  2
+#define FSEv05_ENCODING_DYNAMIC 3
+
+
+#define HufLog 12
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define WILDCOPY_OVERLENGTH 8
+
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s) { ZSTDv05_copy8(d,s); d+=8; s+=8; }
+
+/*! ZSTDv05_wildcopy() :
+*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
+MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+    do
+        COPY8(op, ip)
+    while (op < oend);
+}
+
+MEM_STATIC unsigned ZSTDv05_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+    return 31 - __builtin_clz(val);
+#   else   /* Software version */
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+    /* opt */
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+} seqStore_t;
+
+
+
+#endif   /* ZSTDv05_CCOMMON_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_H
+#define FSEv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *****************************************
+*  Includes
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-****************************************
+*  FSEv05 simple functions
+******************************************/
+size_t FSEv05_decompress(void* dst,  size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+FSEv05_decompress():
+    Decompress FSEv05 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSEv05_isError()
+
+    ** Important ** : FSEv05_decompress() doesn't decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+
+
+/* *****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    FSEv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSEv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+
+
+/* *****************************************
+*  FSEv05 detailed API
+******************************************/
+/* *** DECOMPRESSION *** */
+
+/*!
+FSEv05_readNCount():
+   Read compactly saved 'normalizedCounter' from 'rBuffer'.
+   return : size read from 'rBuffer'
+            or an errorCode, which can be tested using FSEv05_isError()
+            maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*!
+Constructor and Destructor of type FSEv05_DTable
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSEv05_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSEv05_DTable* FSEv05_createDTable(unsigned tableLog);
+void        FSEv05_freeDTable(FSEv05_DTable* dt);
+
+/*!
+FSEv05_buildDTable():
+   Builds 'dt', which must be already allocated, using FSEv05_createDTable()
+   return : 0,
+            or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_buildDTable (FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*!
+FSEv05_decompress_usingDTable():
+   Decompress compressed source @cSrc of size @cSrcSize using @dt
+   into @dst which must be already allocated.
+   return : size of regenerated data (necessarily <= @dstCapacity)
+            or an errorCode, which can be tested using FSEv05_isError() */
+size_t FSEv05_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSEv05_DTable* dt);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_H */
+/* ******************************************************************
+   bitstream
+   Part of FSEv05 library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITv05STREAM_H_MODULE
+#define BITv05STREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which highly benefit from being inlined.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BITv05_DStream_t;
+
+typedef enum { BITv05_DStream_unfinished = 0,
+               BITv05_DStream_endOfBuffer = 1,
+               BITv05_DStream_completed = 2,
+               BITv05_DStream_overflow = 3 } BITv05_DStream_status;  /* result of BITv05_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD);
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* bitD);
+
+
+/*!
+* Start by invoking BITv05_initDStream().
+* A chunk of the bitStream is then stored into a local register.
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* You can then retrieve bitFields stored into the local register, **in reverse order**.
+* Local register is explicitly reloaded from memory by the BITv05_reloadDStream() method.
+* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BITv05_DStream_unfinished.
+* Otherwise, it can be less than that, so proceed accordingly.
+* Checking if DStream has reached its end can be performed with BITv05_endOfDStream()
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Helper functions
+****************************************************************/
+MEM_STATIC unsigned BITv05_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*!BITv05_initDStream
+*  Initialize a BITv05_DStream_t.
+*  @bitD : a pointer to an already allocated BITv05_DStream_t structure
+*  @srcBuffer must point at the beginning of a bitStream
+*  @srcSize must be the exact size of the bitStream
+*  @result : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BITv05_initDStream(BITv05_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(size_t)) {  /* normal case */
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+    } else {
+        U32 contain32;
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return ERROR(GENERIC);   /* endMark not present */
+        bitD->bitsConsumed = 8 - BITv05_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+/*!BITv05_lookBits
+ * Provides next n bits from local register
+ * local register is not modified (bits are still present for next read/look)
+ * On 32-bits, maxNbBits==25
+ * On 64-bits, maxNbBits==57
+ * @return : value extracted
+ */
+MEM_STATIC size_t BITv05_lookBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+}
+
+/*! BITv05_lookBitsFast :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_lookBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*!BITv05_readBits
+ * Read next n bits from local register.
+ * pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value.
+ */
+MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBits(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*!BITv05_readBitsFast :
+*  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
+{
+    size_t value = BITv05_lookBitsFast(bitD, nbBits);
+    BITv05_skipBits(bitD, nbBits);
+    return value;
+}
+
+MEM_STATIC BITv05_DStream_status BITv05_reloadDStream(BITv05_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
+		return BITv05_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BITv05_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BITv05_DStream_endOfBuffer;
+        return BITv05_DStream_completed;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        BITv05_DStream_status result = BITv05_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BITv05_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BITv05_endOfDStream
+*   @return Tells if DStream has reached its exact end
+*/
+MEM_STATIC unsigned BITv05_endOfDStream(const BITv05_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITv05STREAM_H_MODULE */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef FSEv05_STATIC_H
+#define FSEv05_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* It is possible to statically allocate FSEv05 CTable/DTable as a table of unsigned using below macros */
+#define FSEv05_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSEv05 advanced API
+*******************************************/
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits);
+/* build a fake FSEv05_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, unsigned char symbolValue);
+/* build a fake FSEv05_DTable, designed to always generate the same symbolValue */
+
+
+
+/* *****************************************
+*  FSEv05 symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSEv05_DState_t;
+
+
+static void     FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt);
+
+static unsigned char FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+
+static unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr);
+
+/*!
+Let's now decompose FSEv05_decompress_usingDTable() into its unitary components.
+You will decode FSEv05-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BITv05_DStream_t DStream;    // Stream context
+FSEv05_DState_t  DState;     // State context. Multiple ones are possible
+FSEv05_DTable*   DTablePtr;  // Decoding table, provided by FSEv05_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BITv05_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSEv05_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSEv05_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSEv05_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BITv05_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSEv05_reloadDStream(&DStream);
+
+BITv05_reloadDStream() result tells if there is still some more data to read from DStream.
+BITv05_DStream_unfinished : there is still some data left into the DStream.
+BITv05_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BITv05_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BITv05_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BITv05_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BITv05_reloadDStream(&DStream) >= BITv05_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BITv05_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSEv05_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSEv05 unsafe API
+*******************************************/
+static unsigned char FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+/* decompression */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSEv05_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSEv05_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSEv05_initDState(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD, const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* const DTableH = (const FSEv05_DTableHeader*)ptr;
+    DStatePtr->state = BITv05_readBits(bitD, DTableH->tableLog);
+    BITv05_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC size_t FSEv05_getStateValue(FSEv05_DState_t* DStatePtr)
+{
+    return DStatePtr->state;
+}
+
+MEM_STATIC BYTE FSEv05_peakSymbol(FSEv05_DState_t* DStatePtr)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbol(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC BYTE FSEv05_decodeSymbolFast(FSEv05_DState_t* DStatePtr, BITv05_DStream_t* bitD)
+{
+    const FSEv05_decode_t DInfo = ((const FSEv05_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    size_t lowBits = BITv05_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSEv05_STATIC_H */
+/* ******************************************************************
+   FSEv05 : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSEv05_MAX_MEMORY_USAGE 14
+#define FSEv05_DEFAULT_MEMORY_USAGE 13
+
+/*!FSEv05_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSEv05_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSEv05_FUNCTION_TYPE BYTE
+#define FSEv05_FUNCTION_EXTENSION
+#define FSEv05_DECODE_TYPE FSEv05_decode_t
+
+
+#endif   /* !FSEv05_COMMONDEFS_ONLY */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSEv05_MAX_TABLELOG  (FSEv05_MAX_MEMORY_USAGE-2)
+#define FSEv05_MAX_TABLESIZE (1U<<FSEv05_MAX_TABLELOG)
+#define FSEv05_MAXTABLESIZE_MASK (FSEv05_MAX_TABLESIZE-1)
+#define FSEv05_DEFAULT_TABLELOG (FSEv05_DEFAULT_MEMORY_USAGE-2)
+#define FSEv05_MIN_TABLELOG 5
+
+#define FSEv05_TABLELOG_ABSOLUTE_MAX 15
+#if FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX
+#error "FSEv05_MAX_TABLELOG > FSEv05_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSEv05_STATIC_ASSERT(c) { enum { FSEv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSEv05_FUNCTION_EXTENSION
+#  error "FSEv05_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSEv05_FUNCTION_TYPE
+#  error "FSEv05_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSEv05_CAT(X,Y) X##Y
+#define FSEv05_FUNCTION_NAME(X,Y) FSEv05_CAT(X,Y)
+#define FSEv05_TYPE_NAME(X,Y) FSEv05_CAT(X,Y)
+
+
+/* Function templates */
+static U32 FSEv05_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+
+
+FSEv05_DTable* FSEv05_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSEv05_TABLELOG_ABSOLUTE_MAX) tableLog = FSEv05_TABLELOG_ABSOLUTE_MAX;
+    return (FSEv05_DTable*)malloc( FSEv05_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSEv05_freeDTable (FSEv05_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSEv05_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSEv05_DECODE_TYPE* const tableDecode = (FSEv05_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSEv05_tableStep(tableSize);
+    U16 symbolNext[FSEv05_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSEv05_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (normalizedCounter[s]==-1) {
+            tableDecode[highThreshold--].symbol = (FSEv05_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        } else {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+    }   }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++) {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++) {
+            tableDecode[position].symbol = (FSEv05_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+    }   }
+
+    if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++) {
+            FSEv05_FUNCTION_TYPE symbol = (FSEv05_FUNCTION_TYPE)(tableDecode[i].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - BITv05_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+    }   }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+#ifndef FSEv05_COMMONDEFS_ONLY
+/*-****************************************
+*  FSEv05 helper functions
+******************************************/
+unsigned FSEv05_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSEv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSEv05 NCount encoding-decoding
+****************************************************************/
+static short FSEv05_abs(short a) { return a<0 ? -a : a; }
+
+
+size_t FSEv05_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSEv05_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSEv05_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSEv05_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSEv05_buildDTable_rle (FSEv05_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const cell = (FSEv05_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSEv05_buildDTable_raw (FSEv05_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSEv05_DTableHeader* const DTableH = (FSEv05_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSEv05_decode_t* const dinfo = (FSEv05_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSEv05_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSEv05_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BITv05_DStream_t bitD;
+    FSEv05_DState_t state1;
+    FSEv05_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    FSEv05_initDState(&state1, &bitD, dt);
+    FSEv05_initDState(&state2, &bitD, dt);
+
+#define FSEv05_GETSYMBOL(statePtr) fast ? FSEv05_decodeSymbolFast(statePtr, &bitD) : FSEv05_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BITv05_reloadDStream(&bitD)==BITv05_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[1] = FSEv05_GETSYMBOL(&state2);
+
+        if (FSEv05_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BITv05_reloadDStream(&bitD) > BITv05_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSEv05_GETSYMBOL(&state1);
+
+        if (FSEv05_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BITv05_reloadDStream(&bitD);
+
+        op[3] = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BITv05_reloadDStream(&bitD) >= FSEv05_DStream_partiallyFilled; Ends at exactly BITv05_DStream_completed */
+    while (1) {
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state1))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state1);
+
+        if ( (BITv05_reloadDStream(&bitD)>BITv05_DStream_completed) || (op==omax) || (BITv05_endOfDStream(&bitD) && (fast || FSEv05_endOfDState(&state2))) )
+            break;
+
+        *op++ = FSEv05_GETSYMBOL(&state2);
+    }
+
+    /* end ? */
+    if (BITv05_endOfDStream(&bitD) && FSEv05_endOfDState(&state1) && FSEv05_endOfDState(&state2))
+        return op-ostart;
+
+    if (op==omax) return ERROR(dstSize_tooSmall);   /* dst buffer is full, but cSrc unfinished */
+
+    return ERROR(corruption_detected);
+}
+
+
+size_t FSEv05_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSEv05_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSEv05_DTableHeader* DTableH = (const FSEv05_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSEv05_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSEv05_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSEv05_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSEv05_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSEv05 decoding mode */
+    errorCode = FSEv05_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSEv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSEv05_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSEv05_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSEv05_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSEv05_COMMONDEFS_ONLY */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUFF0_H
+#define HUFF0_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Huff0 simple functions
+******************************************/
+size_t HUFv05_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+/*!
+HUFv05_decompress():
+    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstSize'.
+    @dstSize : must be the **exact** size of original (uncompressed) data.
+    Note : in contrast with FSEv05, HUFv05_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize)
+              or an error code, which can be tested using HUFv05_isError()
+*/
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+/* Error Management */
+unsigned    HUFv05_isError(size_t code);        /* tells if a return value is an error code */
+const char* HUFv05_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUF0_H */
+/* ******************************************************************
+   Huff0 : Huffman codec, part of New Generation Entropy library
+   header file, for static linking only
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUF0_STATIC_H
+#define HUF0_STATIC_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* static allocation of Huff0's DTable */
+#define HUFv05_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))
+#define HUFv05_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        unsigned short DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
+#define HUFv05_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUFv05_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbols decoder */
+
+
+/* ****************************************
+*  Huff0 detailed API
+******************************************/
+/*!
+HUFv05_decompress() does the following:
+1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
+2. build Huffman table from save, using HUFv05_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUFv05_decompressSXn_usingDTable
+*/
+size_t HUFv05_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
+size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
+
+size_t HUFv05_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+/* single stream variants */
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+
+size_t HUFv05_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
+size_t HUFv05_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* HUF0_STATIC_H */
+/* ******************************************************************
+   Huff0 : Huffman coder, part of New Generation Entropy library
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSEv05+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUFv05_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUFv05_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUFv05_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUFv05_ABSOLUTEMAX_TABLELOG */
+#define HUFv05_DEFAULT_TABLELOG  HUFv05_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUFv05_MAX_SYMBOL_VALUE 255
+#if (HUFv05_MAX_TABLELOG > HUFv05_ABSOLUTEMAX_TABLELOG)
+#  error "HUFv05_MAX_TABLELOG is too large !"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+unsigned HUFv05_isError(size_t code) { return ERR_isError(code); }
+const char* HUFv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+#define HUFv05_STATIC_ASSERT(c) { enum { HUFv05_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* *******************************************************
+*  Huff0 : Huffman block decompression
+*********************************************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUFv05_DEltX2;   /* single-symbol decoding */
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUFv05_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+/*! HUFv05_readStats
+    Read compact Huffman tree, saved by HUFv05_writeCTable
+    @huffWeight : destination buffer
+    @return : size read from `src`
+*/
+static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+    U32 n;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSEv05 (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSEv05_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSEv05_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUFv05_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    for (n=0; n<oSize; n++) {
+        if (huffWeight[n] >= HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BITv05_highbit32(weightTotal) + 1;
+    if (tableLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    {   /* determine last weight */
+        U32 total = 1 << tableLog;
+        U32 rest = total - weightTotal;
+        U32 verif = 1 << BITv05_highbit32(rest);
+        U32 lastWeight = BITv05_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t iSize;
+    U32 nbSymbols = 0;
+    U32 n;
+    U32 nextRankStart;
+    void* const dtPtr = DTable + 1;
+    HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+    DTable[0] = (U16)tableLog;   /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */
+
+    /* Prepare ranks */
+    nextRankStart = 0;
+    for (n=1; n<=tableLog; n++) {
+        U32 current = nextRankStart;
+        nextRankStart += (rankVal[n] << (n-1));
+        rankVal[n] = current;
+    }
+
+    /* fill DTable */
+    for (n=0; n<nbSymbols; n++) {
+        const U32 w = huffWeight[n];
+        const U32 length = (1 << w) >> 1;
+        U32 i;
+        HUFv05_DEltX2 D;
+        D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+        for (i = rankVal[w]; i < rankVal[w] + length; i++)
+            dt[i] = D;
+        rankVal[w] += length;
+    }
+
+    return iSize;
+}
+
+static BYTE HUFv05_decodeSymbolX2(BITv05_DStream_t* Dstream, const HUFv05_DEltX2* dt, const U32 dtLog)
+{
+        const size_t val = BITv05_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+        const BYTE c = dt[val].byte;
+        BITv05_skipBits(Dstream, dt[val].nbBits);
+        return c;
+}
+
+#define HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUFv05_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUFv05_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUFv05_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUFv05_decodeStreamX2(BYTE* p, BITv05_DStream_t* const bitDPtr, BYTE* const pEnd, const HUFv05_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-4)) {
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd))
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUFv05_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+size_t HUFv05_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    size_t errorCode;
+    const U32 dtLog = DTable[0];
+    const void* dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1;
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+size_t HUFv05_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U16* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+    const U32 dtLog = DTable[0];
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD1;
+    BITv05_DStream_t bitD2;
+    BITv05_DStream_t bitD3;
+    BITv05_DStream_t bitD4;
+    const size_t length1 = MEM_readLE16(istart);
+    const size_t length2 = MEM_readLE16(istart+2);
+    const size_t length3 = MEM_readLE16(istart+4);
+    size_t length4;
+    const BYTE* const istart1 = istart + 6;  /* jumpTable */
+    const BYTE* const istart2 = istart1 + length1;
+    const BYTE* const istart3 = istart2 + length2;
+    const BYTE* const istart4 = istart3 + length3;
+    const size_t segmentSize = (dstSize+3) / 4;
+    BYTE* const opStart2 = ostart + segmentSize;
+    BYTE* const opStart3 = opStart2 + segmentSize;
+    BYTE* const opStart4 = opStart3 + segmentSize;
+    BYTE* op1 = ostart;
+    BYTE* op2 = opStart2;
+    BYTE* op3 = opStart3;
+    BYTE* op4 = opStart4;
+    U32 endSignal;
+
     /* Check */
     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
 
-    length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-    errorCode = BITv05_initDStream(&bitD1, istart1, length1); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
-    errorCode = BITv05_initDStream(&bitD2, istart2, length2); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
-    errorCode = BITv05_initDStream(&bitD3, istart3, length3); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
-    errorCode = BITv05_initDStream(&bitD4, istart4, length4); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
- 
-    /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); 
-    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) { 
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); 
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); 
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); 
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); 
-        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1); 
-        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2); 
-        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3); 
-        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4); 
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); 
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); 
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); 
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); 
-        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1); 
-        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2); 
-        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3); 
-        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4); 
-        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); 
-    } 
- 
-    /* check corruption */ 
-    if (op1 > opStart2) return ERROR(corruption_detected); 
-    if (op2 > opStart3) return ERROR(corruption_detected); 
-    if (op3 > opStart4) return ERROR(corruption_detected); 
-    /* note : op4 supposed already verified within main loop */ 
- 
-    /* finish bitStreams one by one */ 
-    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 
-    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 
-    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 
-    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog); 
- 
-    /* check */ 
-    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4); 
-    if (!endSignal) return ERROR(corruption_detected); 
- 
-    /* decoded size */ 
-    return dstSize; 
-} 
- 
- 
-size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
-    size_t errorCode; 
- 
-    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += errorCode; 
-    cSrcSize -= errorCode; 
- 
-    return HUFv05_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/* *************************/ 
-/* double-symbols decoding */ 
-/* *************************/ 
- 
-static void HUFv05_fillDTableX4Level2(HUFv05_DEltX4* DTable, U32 sizeLog, const U32 consumed, 
-                           const U32* rankValOrigin, const int minWeight, 
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 
-                           U32 nbBitsBaseline, U16 baseSeq) 
-{ 
-    HUFv05_DEltX4 DElt; 
-    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1]; 
-    U32 s; 
- 
-    /* get pre-calculated rankVal */ 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill skipped values */ 
-    if (minWeight>1) { 
-        U32 i, skipSize = rankVal[minWeight]; 
-        MEM_writeLE16(&(DElt.sequence), baseSeq); 
-        DElt.nbBits   = (BYTE)(consumed); 
-        DElt.length   = 1; 
-        for (i = 0; i < skipSize; i++) 
-            DTable[i] = DElt; 
-    } 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */ 
-        const U32 symbol = sortedSymbols[s].symbol; 
-        const U32 weight = sortedSymbols[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 length = 1 << (sizeLog-nbBits); 
-        const U32 start = rankVal[weight]; 
-        U32 i = start; 
-        const U32 end = start + length; 
- 
-        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 
-        DElt.nbBits = (BYTE)(nbBits + consumed); 
-        DElt.length = 2; 
-        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */ 
- 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-typedef U32 rankVal_t[HUFv05_ABSOLUTEMAX_TABLELOG][HUFv05_ABSOLUTEMAX_TABLELOG + 1]; 
- 
-static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog, 
-                           const sortedSymbol_t* sortedList, const U32 sortedListSize, 
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 
-                           const U32 nbBitsBaseline) 
-{ 
-    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1]; 
-    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 
-    const U32 minBits  = nbBitsBaseline - maxWeight; 
-    U32 s; 
- 
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 
- 
-    /* fill DTable */ 
-    for (s=0; s<sortedListSize; s++) { 
-        const U16 symbol = sortedList[s].symbol; 
-        const U32 weight = sortedList[s].weight; 
-        const U32 nbBits = nbBitsBaseline - weight; 
-        const U32 start = rankVal[weight]; 
-        const U32 length = 1 << (targetLog-nbBits); 
- 
-        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */ 
-            U32 sortedRank; 
-            int minWeight = nbBits + scaleLog; 
-            if (minWeight < 1) minWeight = 1; 
-            sortedRank = rankStart[minWeight]; 
-            HUFv05_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, 
-                           rankValOrigin[nbBits], minWeight, 
-                           sortedList+sortedRank, sortedListSize-sortedRank, 
-                           nbBitsBaseline, symbol); 
-        } else { 
-            U32 i; 
-            const U32 end = start + length; 
-            HUFv05_DEltX4 DElt; 
- 
-            MEM_writeLE16(&(DElt.sequence), symbol); 
-            DElt.nbBits   = (BYTE)(nbBits); 
-            DElt.length   = 1; 
-            for (i = start; i < end; i++) 
-                DTable[i] = DElt; 
-        } 
-        rankVal[weight] += length; 
-    } 
-} 
- 
-size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize) 
-{ 
-    BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1]; 
-    sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1]; 
-    U32 rankStats[HUFv05_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; 
-    U32 rankStart0[HUFv05_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; 
-    U32* const rankStart = rankStart0+1; 
-    rankVal_t rankVal; 
-    U32 tableLog, maxW, sizeOfSort, nbSymbols; 
-    const U32 memLog = DTable[0]; 
-    size_t iSize; 
-    void* dtPtr = DTable; 
-    HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1; 
- 
-    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */ 
-    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); 
-    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */ 
- 
-    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 
-    if (HUFv05_isError(iSize)) return iSize; 
- 
-    /* check result */ 
-    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */ 
- 
-    /* find maxWeight */ 
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */ 
- 
-    /* Get start index of each weight */ 
-    { 
-        U32 w, nextRankStart = 0; 
-        for (w=1; w<=maxW; w++) { 
-            U32 current = nextRankStart; 
-            nextRankStart += rankStats[w]; 
-            rankStart[w] = current; 
-        } 
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/ 
-        sizeOfSort = nextRankStart; 
-    } 
- 
-    /* sort symbols by weight */ 
-    { 
-        U32 s; 
-        for (s=0; s<nbSymbols; s++) { 
-            U32 w = weightList[s]; 
-            U32 r = rankStart[w]++; 
-            sortedSymbol[r].symbol = (BYTE)s; 
-            sortedSymbol[r].weight = (BYTE)w; 
-        } 
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */ 
-    } 
- 
-    /* Build rankVal */ 
-    { 
-        const U32 minBits = tableLog+1 - maxW; 
-        U32 nextRankVal = 0; 
-        U32 w, consumed; 
-        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */ 
-        U32* rankVal0 = rankVal[0]; 
-        for (w=1; w<=maxW; w++) { 
-            U32 current = nextRankVal; 
-            nextRankVal += rankStats[w] << (w+rescale); 
-            rankVal0[w] = current; 
-        } 
-        for (consumed = minBits; consumed <= memLog - minBits; consumed++) { 
-            U32* rankValPtr = rankVal[consumed]; 
-            for (w = 1; w <= maxW; w++) { 
-                rankValPtr[w] = rankVal0[w] >> consumed; 
-    }   }   } 
- 
-    HUFv05_fillDTableX4(dt, memLog, 
-                   sortedSymbol, sizeOfSort, 
-                   rankStart0, rankVal, maxW, 
-                   tableLog+1); 
- 
-    return iSize; 
-} 
- 
- 
-static U32 HUFv05_decodeSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 2); 
-    BITv05_skipBits(DStream, dt[val].nbBits); 
-    return dt[val].length; 
-} 
- 
-static U32 HUFv05_decodeLastSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog) 
-{ 
-    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ 
-    memcpy(op, dt+val, 1); 
-    if (dt[val].length==1) BITv05_skipBits(DStream, dt[val].nbBits); 
-    else { 
-        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 
-            BITv05_skipBits(DStream, dt[val].nbBits); 
-            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 
-    }   } 
-    return 1; 
-} 
- 
- 
-#define HUFv05_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ 
-    ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUFv05_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ 
-    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \ 
-        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-#define HUFv05_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ 
-    if (MEM_64bits()) \ 
-        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) 
- 
-static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv05_DEltX4* const dt, const U32 dtLog) 
-{ 
-    BYTE* const pStart = p; 
- 
-    /* up to 8 symbols at a time */ 
-    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd-7)) { 
-        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUFv05_DECODE_SYMBOLX4_1(p, bitDPtr); 
-        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr); 
-        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr); 
-    } 
- 
-    /* closer to the end */ 
-    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-2)) 
-        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr); 
- 
-    while (p <= pEnd-2) 
-        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */ 
- 
-    if (p < pEnd) 
-        p += HUFv05_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); 
- 
-    return p-pStart; 
-} 
- 
- 
-size_t HUFv05_decompress1X4_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U32* DTable) 
-{ 
-    const BYTE* const istart = (const BYTE*) cSrc; 
-    BYTE* const ostart = (BYTE*) dst; 
-    BYTE* const oend = ostart + dstSize; 
- 
-    const U32 dtLog = DTable[0]; 
-    const void* const dtPtr = DTable; 
-    const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1; 
-    size_t errorCode; 
- 
-    /* Init */ 
-    BITv05_DStream_t bitD; 
-    errorCode = BITv05_initDStream(&bitD, istart, cSrcSize); 
-    if (HUFv05_isError(errorCode)) return errorCode; 
- 
-    /* finish bitStreams one by one */ 
-    HUFv05_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog); 
- 
-    /* check */ 
-    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected); 
- 
-    /* decoded size */ 
-    return dstSize; 
-} 
- 
-size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
- 
-    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize); 
-    if (HUFv05_isError(hSize)) return hSize; 
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += hSize; 
-    cSrcSize -= hSize; 
- 
-    return HUFv05_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
-size_t HUFv05_decompress4X4_usingDTable( 
-          void* dst,  size_t dstSize, 
-    const void* cSrc, size_t cSrcSize, 
-    const U32* DTable) 
-{ 
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */ 
- 
-    { 
-        const BYTE* const istart = (const BYTE*) cSrc; 
-        BYTE* const ostart = (BYTE*) dst; 
-        BYTE* const oend = ostart + dstSize; 
-        const void* const dtPtr = DTable; 
-        const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1; 
-        const U32 dtLog = DTable[0]; 
-        size_t errorCode; 
- 
-        /* Init */ 
-        BITv05_DStream_t bitD1; 
-        BITv05_DStream_t bitD2; 
-        BITv05_DStream_t bitD3; 
-        BITv05_DStream_t bitD4; 
-        const size_t length1 = MEM_readLE16(istart); 
-        const size_t length2 = MEM_readLE16(istart+2); 
-        const size_t length3 = MEM_readLE16(istart+4); 
-        size_t length4; 
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */ 
-        const BYTE* const istart2 = istart1 + length1; 
-        const BYTE* const istart3 = istart2 + length2; 
-        const BYTE* const istart4 = istart3 + length3; 
-        const size_t segmentSize = (dstSize+3) / 4; 
-        BYTE* const opStart2 = ostart + segmentSize; 
-        BYTE* const opStart3 = opStart2 + segmentSize; 
-        BYTE* const opStart4 = opStart3 + segmentSize; 
-        BYTE* op1 = ostart; 
-        BYTE* op2 = opStart2; 
-        BYTE* op3 = opStart3; 
-        BYTE* op4 = opStart4; 
-        U32 endSignal; 
- 
-        length4 = cSrcSize - (length1 + length2 + length3 + 6); 
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */ 
-        errorCode = BITv05_initDStream(&bitD1, istart1, length1); 
-        if (HUFv05_isError(errorCode)) return errorCode; 
-        errorCode = BITv05_initDStream(&bitD2, istart2, length2); 
-        if (HUFv05_isError(errorCode)) return errorCode; 
-        errorCode = BITv05_initDStream(&bitD3, istart3, length3); 
-        if (HUFv05_isError(errorCode)) return errorCode; 
-        errorCode = BITv05_initDStream(&bitD4, istart4, length4); 
-        if (HUFv05_isError(errorCode)) return errorCode; 
- 
-        /* 16-32 symbols per loop (4-8 symbols per stream) */ 
-        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); 
-        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) { 
-            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUFv05_DECODE_SYMBOLX4_1(op1, &bitD1); 
-            HUFv05_DECODE_SYMBOLX4_1(op2, &bitD2); 
-            HUFv05_DECODE_SYMBOLX4_1(op3, &bitD3); 
-            HUFv05_DECODE_SYMBOLX4_1(op4, &bitD4); 
-            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1); 
-            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2); 
-            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3); 
-            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4); 
-            HUFv05_DECODE_SYMBOLX4_0(op1, &bitD1); 
-            HUFv05_DECODE_SYMBOLX4_0(op2, &bitD2); 
-            HUFv05_DECODE_SYMBOLX4_0(op3, &bitD3); 
-            HUFv05_DECODE_SYMBOLX4_0(op4, &bitD4); 
- 
-            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); 
-        } 
- 
-        /* check corruption */ 
-        if (op1 > opStart2) return ERROR(corruption_detected); 
-        if (op2 > opStart3) return ERROR(corruption_detected); 
-        if (op3 > opStart4) return ERROR(corruption_detected); 
-        /* note : op4 supposed already verified within main loop */ 
- 
-        /* finish bitStreams one by one */ 
-        HUFv05_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); 
-        HUFv05_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); 
-        HUFv05_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); 
-        HUFv05_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog); 
- 
-        /* check */ 
-        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4); 
-        if (!endSignal) return ERROR(corruption_detected); 
- 
-        /* decoded size */ 
-        return dstSize; 
-    } 
-} 
- 
- 
-size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
-    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG); 
-    const BYTE* ip = (const BYTE*) cSrc; 
- 
-    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize); 
-    if (HUFv05_isError(hSize)) return hSize; 
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 
-    ip += hSize; 
-    cSrcSize -= hSize; 
- 
-    return HUFv05_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); 
-} 
- 
- 
-/* ********************************/ 
-/* Generic decompression selector */ 
-/* ********************************/ 
- 
-typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 
-{ 
-    /* single, double, quad */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */ 
-    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */ 
-    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */ 
-    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */ 
-    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */ 
-    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */ 
-    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */ 
-    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */ 
-    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */ 
-    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */ 
-    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */ 
-    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */ 
-    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */ 
-    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */ 
-    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */ 
-    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */ 
-}; 
- 
-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 
- 
-size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 
-{ 
+    length4 = cSrcSize - (length1 + length2 + length3 + 6);
+    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* 16-32 symbols per loop (4-8 symbols per stream) */
+    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+    }
+
+    /* check corruption */
+    if (op1 > opStart2) return ERROR(corruption_detected);
+    if (op2 > opStart3) return ERROR(corruption_detected);
+    if (op3 > opStart4) return ERROR(corruption_detected);
+    /* note : op4 supposed already verified within main loop */
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+    /* check */
+    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+    if (!endSignal) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+
+size_t HUFv05_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX2(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+    size_t errorCode;
+
+    errorCode = HUFv05_readDTableX2 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    return HUFv05_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+static void HUFv05_fillDTableX4Level2(HUFv05_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUFv05_DEltX4 DElt;
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    U32 s;
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }
+}
+
+typedef U32 rankVal_t[HUFv05_ABSOLUTEMAX_TABLELOG][HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+
+static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUFv05_ABSOLUTEMAX_TABLELOG + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUFv05_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            U32 i;
+            const U32 end = start + length;
+            HUFv05_DEltX4 DElt;
+
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits   = (BYTE)(nbBits);
+            DElt.length   = 1;
+            for (i = start; i < end; i++)
+                DTable[i] = DElt;
+        }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
+    sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
+    U32 rankStats[HUFv05_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
+    U32 rankStart0[HUFv05_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+    void* dtPtr = DTable;
+    HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1;
+
+    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32));   /* if compilation fails here, assertion is false */
+    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUFv05_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<=maxW; w++) {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w <= maxW; w++) {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }
+
+    HUFv05_fillDTableX4(dt, memLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    return iSize;
+}
+
+
+static U32 HUFv05_decodeSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BITv05_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUFv05_decodeLastSymbolX4(void* op, BITv05_DStream_t* DStream, const HUFv05_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BITv05_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BITv05_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BITv05_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUFv05_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUFv05_MAX_TABLELOG<=12)) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUFv05_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUFv05_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, BYTE* const pEnd, const HUFv05_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p < pEnd-7)) {
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BITv05_reloadDStream(bitDPtr) == BITv05_DStream_unfinished) && (p <= pEnd-2))
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUFv05_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUFv05_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+size_t HUFv05_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+
+    const U32 dtLog = DTable[0];
+    const void* const dtPtr = DTable;
+    const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+    size_t errorCode;
+
+    /* Init */
+    BITv05_DStream_t bitD;
+    errorCode = BITv05_initDStream(&bitD, istart, cSrcSize);
+    if (HUFv05_isError(errorCode)) return errorCode;
+
+    /* finish bitStreams one by one */
+    HUFv05_decodeStreamX4(ostart, &bitD, oend,     dt, dtLog);
+
+    /* check */
+    if (!BITv05_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+size_t HUFv05_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX4* const dt = ((const HUFv05_DEltX4*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUFv05_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUFv05_CREATE_STATIC_DTABLEX4(DTable, HUFv05_MAX_TABLELOG);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUFv05_readDTableX4 (DTable, cSrc, cSrcSize);
+    if (HUFv05_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUFv05_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
     static const decompressionAlgo decompress[3] = { HUFv05_decompress4X2, HUFv05_decompress4X4, NULL };
-    /* estimate decompression time */ 
-    U32 Q; 
-    const U32 D256 = (U32)(dstSize >> 8); 
-    U32 Dtime[3]; 
-    U32 algoNb = 0; 
-    int n; 
- 
-    /* validation checks */ 
-    if (dstSize == 0) return ERROR(dstSize_tooSmall); 
+    /* estimate decompression time */
+    U32 Q;
+    const U32 D256 = (U32)(dstSize >> 8);
+    U32 Dtime[3];
+    U32 algoNb = 0;
+    int n;
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
     if (cSrcSize >= dstSize) return ERROR(corruption_detected);   /* invalid, or not compressed, but not compressed already dealt with */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ 
- 
-    /* decoder timing evaluation */ 
-    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */ 
-    for (n=0; n<3; n++) 
-        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256); 
- 
-    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */ 
- 
-    if (Dtime[1] < Dtime[0]) algoNb = 1; 
- 
-    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 
- 
-    //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */ 
-    //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */ 
-    //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */ 
-} 
-/* 
-    zstd - standard compression library 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-*/ 
- 
-/* *************************************************************** 
-*  Tuning parameters 
-*****************************************************************/ 
-/*! 
- * HEAPMODE : 
- * Select how default decompression function ZSTDv05_decompress() will allocate memory, 
- * in memory stack (0), or in memory heap (1, requires malloc()) 
- */ 
-#ifndef ZSTDv05_HEAPMODE 
-#  define ZSTDv05_HEAPMODE 1 
-#endif 
- 
- 
-/*-******************************************************* 
-*  Dependencies 
-*********************************************************/ 
-#include <stdlib.h>      /* calloc */ 
-#include <string.h>      /* memcpy, memmove */ 
-#include <stdio.h>       /* debug only : printf */ 
- 
- 
-/*-******************************************************* 
-*  Compiler specifics 
-*********************************************************/ 
-#ifdef _MSC_VER    /* Visual Studio */ 
-#  define FORCE_INLINE static __forceinline 
-#  include <intrin.h>                    /* For Visual 2005 */ 
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */ 
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */ 
-#else 
-#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 
-#  ifdef __GNUC__ 
-#    define FORCE_INLINE static inline __attribute__((always_inline)) 
-#  else 
-#    define FORCE_INLINE static inline 
-#  endif 
-#endif 
- 
- 
-/*-************************************* 
-*  Local types 
-***************************************/ 
-typedef struct 
-{ 
-    blockType_t blockType; 
-    U32 origSize; 
-} blockProperties_t; 
- 
- 
-/* ******************************************************* 
-*  Memory operations 
-**********************************************************/ 
-static void ZSTDv05_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 
- 
- 
-/* ************************************* 
-*  Error Management 
-***************************************/ 
-/*! ZSTDv05_isError() : 
-*   tells if a return value is an error code */ 
-unsigned ZSTDv05_isError(size_t code) { return ERR_isError(code); } 
- 
-/*! ZSTDv05_getError() : 
-*   convert a `size_t` function result into a proper ZSTDv05_errorCode enum */ 
-ZSTDv05_ErrorCode ZSTDv05_getError(size_t code) { return ERR_getError(code); } 
- 
-/*! ZSTDv05_getErrorName() : 
-*   provides error code string (useful for debugging) */ 
-const char* ZSTDv05_getErrorName(size_t code) { return ERR_getErrorName(code); } 
- 
- 
-/* ************************************************************* 
-*   Context management 
-***************************************************************/ 
-typedef enum { ZSTDv05ds_getFrameHeaderSize, ZSTDv05ds_decodeFrameHeader, 
-               ZSTDv05ds_decodeBlockHeader, ZSTDv05ds_decompressBlock } ZSTDv05_dStage; 
- 
-struct ZSTDv05_DCtx_s 
-{ 
-    FSEv05_DTable LLTable[FSEv05_DTABLE_SIZE_U32(LLFSEv05Log)]; 
-    FSEv05_DTable OffTable[FSEv05_DTABLE_SIZE_U32(OffFSEv05Log)]; 
-    FSEv05_DTable MLTable[FSEv05_DTABLE_SIZE_U32(MLFSEv05Log)]; 
-    unsigned   hufTableX4[HUFv05_DTABLE_SIZE(HufLog)]; 
-    const void* previousDstEnd; 
-    const void* base; 
-    const void* vBase; 
-    const void* dictEnd; 
-    size_t expected; 
-    size_t headerSize; 
-    ZSTDv05_parameters params; 
-    blockType_t bType;   /* used in ZSTDv05_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ 
-    ZSTDv05_dStage stage; 
-    U32 flagStaticTables; 
-    const BYTE* litPtr; 
-    size_t litBufSize; 
-    size_t litSize; 
-    BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH]; 
-    BYTE headerBuffer[ZSTDv05_frameHeaderSize_max]; 
-};  /* typedef'd to ZSTDv05_DCtx within "zstd_static.h" */ 
- 
-size_t ZSTDv05_sizeofDCtx (void) { return sizeof(ZSTDv05_DCtx); } 
- 
-size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx) 
-{ 
-    dctx->expected = ZSTDv05_frameHeaderSize_min; 
-    dctx->stage = ZSTDv05ds_getFrameHeaderSize; 
-    dctx->previousDstEnd = NULL; 
-    dctx->base = NULL; 
-    dctx->vBase = NULL; 
-    dctx->dictEnd = NULL; 
-    dctx->hufTableX4[0] = HufLog; 
-    dctx->flagStaticTables = 0; 
-    return 0; 
-} 
- 
-ZSTDv05_DCtx* ZSTDv05_createDCtx(void) 
-{ 
-    ZSTDv05_DCtx* dctx = (ZSTDv05_DCtx*)malloc(sizeof(ZSTDv05_DCtx)); 
-    if (dctx==NULL) return NULL; 
-    ZSTDv05_decompressBegin(dctx); 
-    return dctx; 
-} 
- 
-size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx) 
-{ 
-    free(dctx); 
-    return 0;   /* reserved as a potential error code in the future */ 
-} 
- 
-void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx) 
-{ 
-    memcpy(dstDCtx, srcDCtx, 
-           sizeof(ZSTDv05_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTDv05_frameHeaderSize_max));  /* no need to copy workspace */ 
-} 
- 
- 
-/* ************************************************************* 
-*   Decompression section 
-***************************************************************/ 
- 
-/* Frame format description 
-   Frame Header -  [ Block Header - Block ] - Frame End 
-   1) Frame Header 
-      - 4 bytes - Magic Number : ZSTDv05_MAGICNUMBER (defined within zstd_internal.h) 
-      - 1 byte  - Window Descriptor 
-   2) Block Header 
-      - 3 bytes, starting with a 2-bits descriptor 
-                 Uncompressed, Compressed, Frame End, unused 
-   3) Block 
-      See Block Format Description 
-   4) Frame End 
-      - 3 bytes, compatible with Block Header 
-*/ 
- 
-/* Block format description 
- 
-   Block = Literal Section - Sequences Section 
-   Prerequisite : size of (compressed) block, maximum size of regenerated data 
- 
-   1) Literal Section 
- 
-   1.1) Header : 1-5 bytes 
-        flags: 2 bits 
-            00 compressed by Huff0 
-            01 unused 
-            10 is Raw (uncompressed) 
-            11 is Rle 
-            Note : using 01 => Huff0 with precomputed table ? 
-            Note : delta map ? => compressed ? 
- 
-   1.1.1) Huff0-compressed literal block : 3-5 bytes 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) 
-            srcSize < 16KB => 4 bytes (2-2-14-14) 
-            else           => 5 bytes (2-2-18-18) 
-            big endian convention 
- 
-   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes 
-        size :  5 bits: (IS_RAW<<6) + (0<<4) + size 
-               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) 
-                        size&255 
-               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) 
-                        size>>8&255 
-                        size&255 
- 
-   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes 
-        size :  5 bits: (IS_RLE<<6) + (0<<4) + size 
-               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) 
-                        size&255 
-               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) 
-                        size>>8&255 
-                        size&255 
- 
-   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream 
-            srcSize < 1 KB => 3 bytes (2-2-10-10) 
-            srcSize < 16KB => 4 bytes (2-2-14-14) 
-            else           => 5 bytes (2-2-18-18) 
-            big endian convention 
- 
-        1- CTable available (stored into workspace ?) 
-        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) 
- 
- 
-   1.2) Literal block content 
- 
-   1.2.1) Huff0 block, using sizes from header 
-        See Huff0 format 
- 
-   1.2.2) Huff0 block, using prepared table 
- 
-   1.2.3) Raw content 
- 
-   1.2.4) single byte 
- 
- 
-   2) Sequences section 
-      TO DO 
-*/ 
- 
- 
-/** ZSTDv05_decodeFrameHeader_Part1() : 
-*   decode the 1st part of the Frame Header, which tells Frame Header size. 
-*   srcSize must be == ZSTDv05_frameHeaderSize_min. 
-*   @return : the full size of the Frame Header */ 
-static size_t ZSTDv05_decodeFrameHeader_Part1(ZSTDv05_DCtx* zc, const void* src, size_t srcSize) 
-{ 
-    U32 magicNumber; 
-    if (srcSize != ZSTDv05_frameHeaderSize_min) 
-        return ERROR(srcSize_wrong); 
-    magicNumber = MEM_readLE32(src); 
-    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown); 
-    zc->headerSize = ZSTDv05_frameHeaderSize_min; 
-    return zc->headerSize; 
-} 
- 
- 
-size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize) 
-{ 
-    U32 magicNumber; 
-    if (srcSize < ZSTDv05_frameHeaderSize_min) return ZSTDv05_frameHeaderSize_max; 
-    magicNumber = MEM_readLE32(src); 
-    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown); 
-    memset(params, 0, sizeof(*params)); 
-    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTDv05_WINDOWLOG_ABSOLUTEMIN; 
-    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */ 
-    return 0; 
-} 
- 
-/** ZSTDv05_decodeFrameHeader_Part2() : 
-*   decode the full Frame Header. 
-*   srcSize must be the size provided by ZSTDv05_decodeFrameHeader_Part1(). 
-*   @return : 0, or an error code, which can be tested using ZSTDv05_isError() */ 
-static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src, size_t srcSize) 
-{ 
-    size_t result; 
-    if (srcSize != zc->headerSize) 
-        return ERROR(srcSize_wrong); 
-    result = ZSTDv05_getFrameParams(&(zc->params), src, srcSize); 
-    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); 
-    return result; 
-} 
- 
- 
-size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 
-{ 
-    const BYTE* const in = (const BYTE* const)src; 
-    BYTE headerFlags; 
-    U32 cSize; 
- 
-    if (srcSize < 3) 
-        return ERROR(srcSize_wrong); 
- 
-    headerFlags = *in; 
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 
- 
-    bpPtr->blockType = (blockType_t)(headerFlags >> 6); 
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 
- 
-    if (bpPtr->blockType == bt_end) return 0; 
-    if (bpPtr->blockType == bt_rle) return 1; 
-    return cSize; 
-} 
- 
- 
-static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 
-    memcpy(dst, src, srcSize); 
-    return srcSize; 
-} 
- 
- 
-/*! ZSTDv05_decodeLiteralsBlock() : 
-    @return : nb of bytes read from src (< srcSize ) */ 
-size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx, 
-                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */ 
-{ 
-    const BYTE* const istart = (const BYTE*) src; 
- 
-    /* any compressed block with literals segment must be at least this size */ 
-    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); 
- 
-    switch(istart[0]>> 6) 
-    { 
-    case IS_HUFv05: 
-        { 
-            size_t litSize, litCSize, singleStream=0; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            switch(lhSize) 
-            { 
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */ 
-                /* 2 - 2 - 10 - 10 */ 
-                lhSize=3; 
-                singleStream = istart[0] & 16; 
-                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2); 
-                litCSize = ((istart[1] &  3) << 8) + istart[2]; 
-                break; 
-            case 2: 
-                /* 2 - 2 - 14 - 14 */ 
-                lhSize=4; 
-                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6); 
-                litCSize = ((istart[2] & 63) <<  8) + istart[3]; 
-                break; 
-            case 3: 
-                /* 2 - 2 - 18 - 18 */ 
-                lhSize=5; 
-                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2); 
-                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4]; 
-                break; 
-            } 
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected); 
- 
-            if (HUFv05_isError(singleStream ? 
-                            HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : 
-                            HUFv05_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) )) 
-                return ERROR(corruption_detected); 
- 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE+8; 
-            dctx->litSize = litSize; 
-            return litCSize + lhSize; 
-        } 
-    case IS_PCH: 
-        { 
-            size_t errorCode; 
-            size_t litSize, litCSize; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */ 
-                return ERROR(corruption_detected); 
-            if (!dctx->flagStaticTables) 
-                return ERROR(dictionary_corrupted); 
- 
-            /* 2 - 2 - 10 - 10 */ 
-            lhSize=3; 
-            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2); 
-            litCSize = ((istart[1] &  3) << 8) + istart[2]; 
- 
-            errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); 
-            if (HUFv05_isError(errorCode)) return ERROR(corruption_detected); 
- 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH; 
-            dctx->litSize = litSize; 
-            return litCSize + lhSize; 
-        } 
-    case IS_RAW: 
-        { 
-            size_t litSize; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            switch(lhSize) 
-            { 
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */ 
-                lhSize=1; 
-                litSize = istart[0] & 31; 
-                break; 
-            case 2: 
-                litSize = ((istart[0] & 15) << 8) + istart[1]; 
-                break; 
-            case 3: 
-                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; 
-                break; 
-            } 
- 
-            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */ 
-                if (litSize+lhSize > srcSize) return ERROR(corruption_detected); 
-                memcpy(dctx->litBuffer, istart+lhSize, litSize); 
-                dctx->litPtr = dctx->litBuffer; 
-                dctx->litBufSize = BLOCKSIZE+8; 
-                dctx->litSize = litSize; 
-                return lhSize+litSize; 
-            } 
-            /* direct reference into compressed stream */ 
-            dctx->litPtr = istart+lhSize; 
-            dctx->litBufSize = srcSize-lhSize; 
-            dctx->litSize = litSize; 
-            return lhSize+litSize; 
-        } 
-    case IS_RLE: 
-        { 
-            size_t litSize; 
-            U32 lhSize = ((istart[0]) >> 4) & 3; 
-            switch(lhSize) 
-            { 
-            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */ 
-                lhSize = 1; 
-                litSize = istart[0] & 31; 
-                break; 
-            case 2: 
-                litSize = ((istart[0] & 15) << 8) + istart[1]; 
-                break; 
-            case 3: 
-                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; 
-                break; 
-            } 
-            if (litSize > BLOCKSIZE) return ERROR(corruption_detected); 
-            memset(dctx->litBuffer, istart[lhSize], litSize); 
-            dctx->litPtr = dctx->litBuffer; 
-            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH; 
-            dctx->litSize = litSize; 
-            return lhSize+1; 
-        } 
-    default: 
-        return ERROR(corruption_detected);   /* impossible */ 
-    } 
-} 
- 
- 
-size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 
-                         FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb, 
-                         const void* src, size_t srcSize) 
-{ 
-    const BYTE* const istart = (const BYTE* const)src; 
-    const BYTE* ip = istart; 
-    const BYTE* const iend = istart + srcSize; 
-    U32 LLtype, Offtype, MLtype; 
-    U32 LLlog, Offlog, MLlog; 
-    size_t dumpsLength; 
- 
-    /* check */ 
-    if (srcSize < MIN_SEQUENCES_SIZE) 
-        return ERROR(srcSize_wrong); 
- 
-    /* SeqHead */ 
-    *nbSeq = *ip++; 
-    if (*nbSeq==0) return 1; 
-    if (*nbSeq >= 128) 
-        *nbSeq = ((nbSeq[0]-128)<<8) + *ip++; 
- 
-    LLtype  = *ip >> 6; 
-    Offtype = (*ip >> 4) & 3; 
-    MLtype  = (*ip >> 2) & 3; 
-    if (*ip & 2) { 
-        dumpsLength  = ip[2]; 
-        dumpsLength += ip[1] << 8; 
-        ip += 3; 
-    } else { 
-        dumpsLength  = ip[1]; 
-        dumpsLength += (ip[0] & 1) << 8; 
-        ip += 2; 
-    } 
-    *dumpsPtr = ip; 
-    ip += dumpsLength; 
-    *dumpsLengthPtr = dumpsLength; 
- 
-    /* check */ 
-    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 
- 
-    /* sequences */ 
-    { 
-        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */ 
-        size_t headerSize; 
- 
-        /* Build DTables */ 
-        switch(LLtype) 
-        { 
-        U32 max; 
-        case FSEv05_ENCODING_RLE : 
-            LLlog = 0; 
-            FSEv05_buildDTable_rle(DTableLL, *ip++); 
-            break; 
-        case FSEv05_ENCODING_RAW : 
-            LLlog = LLbits; 
-            FSEv05_buildDTable_raw(DTableLL, LLbits); 
-            break; 
-        case FSEv05_ENCODING_STATIC: 
-            break; 
-        case FSEv05_ENCODING_DYNAMIC : 
-        default :   /* impossible */ 
-            max = MaxLL; 
-            headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip); 
-            if (FSEv05_isError(headerSize)) return ERROR(GENERIC); 
-            if (LLlog > LLFSEv05Log) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSEv05_buildDTable(DTableLL, norm, max, LLlog); 
-        } 
- 
-        switch(Offtype) 
-        { 
-        U32 max; 
-        case FSEv05_ENCODING_RLE : 
-            Offlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSEv05_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ 
-            break; 
-        case FSEv05_ENCODING_RAW : 
-            Offlog = Offbits; 
-            FSEv05_buildDTable_raw(DTableOffb, Offbits); 
-            break; 
-        case FSEv05_ENCODING_STATIC: 
-            break; 
-        case FSEv05_ENCODING_DYNAMIC : 
-        default :   /* impossible */ 
-            max = MaxOff; 
-            headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip); 
-            if (FSEv05_isError(headerSize)) return ERROR(GENERIC); 
-            if (Offlog > OffFSEv05Log) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSEv05_buildDTable(DTableOffb, norm, max, Offlog); 
-        } 
- 
-        switch(MLtype) 
-        { 
-        U32 max; 
-        case FSEv05_ENCODING_RLE : 
-            MLlog = 0; 
-            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 
-            FSEv05_buildDTable_rle(DTableML, *ip++); 
-            break; 
-        case FSEv05_ENCODING_RAW : 
-            MLlog = MLbits; 
-            FSEv05_buildDTable_raw(DTableML, MLbits); 
-            break; 
-        case FSEv05_ENCODING_STATIC: 
-            break; 
-        case FSEv05_ENCODING_DYNAMIC : 
-        default :   /* impossible */ 
-            max = MaxML; 
-            headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip); 
-            if (FSEv05_isError(headerSize)) return ERROR(GENERIC); 
-            if (MLlog > MLFSEv05Log) return ERROR(corruption_detected); 
-            ip += headerSize; 
-            FSEv05_buildDTable(DTableML, norm, max, MLlog); 
-    }   } 
- 
-    return ip-istart; 
-} 
- 
- 
-typedef struct { 
-    size_t litLength; 
-    size_t matchLength; 
-    size_t offset; 
-} seq_t; 
- 
-typedef struct { 
-    BITv05_DStream_t DStream; 
-    FSEv05_DState_t stateLL; 
-    FSEv05_DState_t stateOffb; 
-    FSEv05_DState_t stateML; 
-    size_t prevOffset; 
-    const BYTE* dumps; 
-    const BYTE* dumpsEnd; 
-} seqState_t; 
- 
- 
- 
-static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) 
-{ 
-    size_t litLength; 
-    size_t prevOffset; 
-    size_t offset; 
-    size_t matchLength; 
-    const BYTE* dumps = seqState->dumps; 
-    const BYTE* const de = seqState->dumpsEnd; 
- 
-    /* Literal length */ 
-    litLength = FSEv05_peakSymbol(&(seqState->stateLL)); 
-    prevOffset = litLength ? seq->offset : seqState->prevOffset; 
-    if (litLength == MaxLL) { 
-        U32 add = *dumps++; 
-        if (add < 255) litLength += add; 
-        else { 
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */ 
-            if (litLength&1) litLength>>=1, dumps += 3; 
-            else litLength = (U16)(litLength)>>1, dumps += 2; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
- 
-    /* Offset */ 
-    { 
-        static const U32 offsetPrefix[MaxOff+1] = { 
-                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256, 
-                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 
-                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 }; 
-        U32 offsetCode = FSEv05_peakSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */ 
-        U32 nbBits = offsetCode - 1; 
-        if (offsetCode==0) nbBits = 0;   /* cmove */ 
-        offset = offsetPrefix[offsetCode] + BITv05_readBits(&(seqState->DStream), nbBits); 
-        if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream)); 
-        if (offsetCode==0) offset = prevOffset;   /* repcode, cmove */ 
-        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */ 
-        FSEv05_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));    /* update */ 
-    } 
- 
-    /* Literal length update */ 
-    FSEv05_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));   /* update */ 
-    if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream)); 
- 
-    /* MatchLength */ 
-    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 
-    if (matchLength == MaxML) { 
-        U32 add = *dumps++; 
-        if (add < 255) matchLength += add; 
-        else { 
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */ 
-            if (matchLength&1) matchLength>>=1, dumps += 3; 
-            else matchLength = (U16)(matchLength)>>1, dumps += 2; 
-        } 
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */ 
-    } 
-    matchLength += MINMATCH; 
- 
-    /* save result */ 
-    seq->litLength = litLength; 
-    seq->offset = offset; 
-    seq->matchLength = matchLength; 
-    seqState->dumps = dumps; 
- 
-#if 0   /* debug */ 
-    { 
-        static U64 totalDecoded = 0; 
-        printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", 
-           (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset); 
-        totalDecoded += litLength + matchLength; 
-    } 
-#endif 
-} 
- 
- 
-static size_t ZSTDv05_execSequence(BYTE* op, 
-                                BYTE* const oend, seq_t sequence, 
-                                const BYTE** litPtr, const BYTE* const litLimit_8, 
-                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) 
-{ 
-    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */ 
-    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */ 
-    BYTE* const oLitEnd = op + sequence.litLength; 
-    const size_t sequenceLength = sequence.litLength + sequence.matchLength; 
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */ 
-    BYTE* const oend_8 = oend-8; 
-    const BYTE* const litEnd = *litPtr + sequence.litLength; 
-    const BYTE* match = oLitEnd - sequence.offset; 
- 
-    /* check */ 
-    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */ 
-    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */ 
-    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */ 
- 
-    /* copy Literals */ 
-    ZSTDv05_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ 
-    op = oLitEnd; 
-    *litPtr = litEnd;   /* update for next sequence */ 
- 
-    /* copy Match */ 
-    if (sequence.offset > (size_t)(oLitEnd - base)) { 
-        /* offset beyond prefix */ 
-        if (sequence.offset > (size_t)(oLitEnd - vBase)) 
-            return ERROR(corruption_detected); 
-        match = dictEnd - (base-match); 
-        if (match + sequence.matchLength <= dictEnd) { 
-            memmove(oLitEnd, match, sequence.matchLength); 
-            return sequenceLength; 
-        } 
-        /* span extDict & currentPrefixSegment */ 
-        { 
-            size_t length1 = dictEnd - match; 
-            memmove(oLitEnd, match, length1); 
-            op = oLitEnd + length1; 
-            sequence.matchLength -= length1; 
-            match = base; 
-    }   } 
- 
-    /* match within prefix */ 
-    if (sequence.offset < 8) { 
-        /* close range match, overlap */ 
-        const int sub2 = dec64table[sequence.offset]; 
-        op[0] = match[0]; 
-        op[1] = match[1]; 
-        op[2] = match[2]; 
-        op[3] = match[3]; 
-        match += dec32table[sequence.offset]; 
-        ZSTDv05_copy4(op+4, match); 
-        match -= sub2; 
-    } else { 
-        ZSTDv05_copy8(op, match); 
-    } 
-    op += 8; match += 8; 
- 
-    if (oMatchEnd > oend-12) { 
-        if (op < oend_8) { 
-            ZSTDv05_wildcopy(op, match, oend_8 - op); 
-            match += oend_8 - op; 
-            op = oend_8; 
-        } 
-        while (op < oMatchEnd) 
-            *op++ = *match++; 
-    } else { 
-        ZSTDv05_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */ 
-    } 
-    return sequenceLength; 
-} 
- 
- 
-static size_t ZSTDv05_decompressSequences( 
-                               ZSTDv05_DCtx* dctx, 
-                               void* dst, size_t maxDstSize, 
-                         const void* seqStart, size_t seqSize) 
-{ 
-    const BYTE* ip = (const BYTE*)seqStart; 
-    const BYTE* const iend = ip + seqSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t errorCode, dumpsLength; 
-    const BYTE* litPtr = dctx->litPtr; 
-    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; 
-    const BYTE* const litEnd = litPtr + dctx->litSize; 
-    int nbSeq; 
-    const BYTE* dumps; 
-    U32* DTableLL = dctx->LLTable; 
-    U32* DTableML = dctx->MLTable; 
-    U32* DTableOffb = dctx->OffTable; 
-    const BYTE* const base = (const BYTE*) (dctx->base); 
-    const BYTE* const vBase = (const BYTE*) (dctx->vBase); 
-    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 
- 
-    /* Build Decoding Tables */ 
-    errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 
-                                      DTableLL, DTableML, DTableOffb, 
-                                      ip, seqSize); 
-    if (ZSTDv05_isError(errorCode)) return errorCode; 
-    ip += errorCode; 
- 
-    /* Regen sequences */ 
-    if (nbSeq) { 
-        seq_t sequence; 
-        seqState_t seqState; 
- 
-        memset(&sequence, 0, sizeof(sequence)); 
-        sequence.offset = REPCODE_STARTVALUE; 
-        seqState.dumps = dumps; 
-        seqState.dumpsEnd = dumps + dumpsLength; 
-        seqState.prevOffset = REPCODE_STARTVALUE; 
-        errorCode = BITv05_initDStream(&(seqState.DStream), ip, iend-ip); 
-        if (ERR_isError(errorCode)) return ERROR(corruption_detected); 
-        FSEv05_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 
-        FSEv05_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 
-        FSEv05_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 
- 
-        for ( ; (BITv05_reloadDStream(&(seqState.DStream)) <= BITv05_DStream_completed) && nbSeq ; ) { 
-            size_t oneSeqSize; 
-            nbSeq--; 
-            ZSTDv05_decodeSequence(&sequence, &seqState); 
-            oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); 
-            if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize; 
-            op += oneSeqSize; 
-        } 
- 
-        /* check if reached exact end */ 
-        if (nbSeq) return ERROR(corruption_detected); 
-    } 
- 
-    /* last literal segment */ 
-    { 
-        size_t lastLLSize = litEnd - litPtr; 
-        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */ 
-        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 
-        memcpy(op, litPtr, lastLLSize); 
-        op += lastLLSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-static void ZSTDv05_checkContinuity(ZSTDv05_DCtx* dctx, const void* dst) 
-{ 
-    if (dst != dctx->previousDstEnd) {   /* not contiguous */ 
-        dctx->dictEnd = dctx->previousDstEnd; 
-        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); 
-        dctx->base = dst; 
-        dctx->previousDstEnd = dst; 
-    } 
-} 
- 
- 
-static size_t ZSTDv05_decompressBlock_internal(ZSTDv05_DCtx* dctx, 
-                            void* dst, size_t dstCapacity, 
-                      const void* src, size_t srcSize) 
-{   /* blockType == blockCompressed */ 
-    const BYTE* ip = (const BYTE*)src; 
-    size_t litCSize; 
- 
-    if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong); 
- 
-    /* Decode literals sub-block */ 
-    litCSize = ZSTDv05_decodeLiteralsBlock(dctx, src, srcSize); 
-    if (ZSTDv05_isError(litCSize)) return litCSize; 
-    ip += litCSize; 
-    srcSize -= litCSize; 
- 
-    return ZSTDv05_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); 
-} 
- 
- 
-size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx, 
-                            void* dst, size_t dstCapacity, 
-                      const void* src, size_t srcSize) 
-{ 
-    ZSTDv05_checkContinuity(dctx, dst); 
-    return ZSTDv05_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); 
-} 
- 
- 
-/*! ZSTDv05_decompress_continueDCtx 
-*   dctx must have been properly initialized */ 
-static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx, 
-                                 void* dst, size_t maxDstSize, 
-                                 const void* src, size_t srcSize) 
-{ 
-    const BYTE* ip = (const BYTE*)src; 
-    const BYTE* iend = ip + srcSize; 
-    BYTE* const ostart = (BYTE* const)dst; 
-    BYTE* op = ostart; 
-    BYTE* const oend = ostart + maxDstSize; 
-    size_t remainingSize = srcSize; 
-    blockProperties_t blockProperties; 
- 
-    /* Frame Header */ 
-    { 
-        size_t frameHeaderSize; 
-        if (srcSize < ZSTDv05_frameHeaderSize_min+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong); 
-        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min); 
-        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize; 
-        if (srcSize < frameHeaderSize+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong); 
-        ip += frameHeaderSize; remainingSize -= frameHeaderSize; 
-        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part2(dctx, src, frameHeaderSize); 
-        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize; 
-    } 
- 
-    /* Loop on each block */ 
-    while (1) 
-    { 
-        size_t decodedSize=0; 
-        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, iend-ip, &blockProperties); 
-        if (ZSTDv05_isError(cBlockSize)) return cBlockSize; 
- 
-        ip += ZSTDv05_blockHeaderSize; 
-        remainingSize -= ZSTDv05_blockHeaderSize; 
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); 
- 
-        switch(blockProperties.blockType) 
-        { 
-        case bt_compressed: 
-            decodedSize = ZSTDv05_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_raw : 
-            decodedSize = ZSTDv05_copyRawBlock(op, oend-op, ip, cBlockSize); 
-            break; 
-        case bt_rle : 
-            return ERROR(GENERIC);   /* not yet supported */ 
-            break; 
-        case bt_end : 
-            /* end of frame */ 
-            if (remainingSize) return ERROR(srcSize_wrong); 
-            break; 
-        default: 
-            return ERROR(GENERIC);   /* impossible */ 
-        } 
-        if (cBlockSize == 0) break;   /* bt_end */ 
- 
-        if (ZSTDv05_isError(decodedSize)) return decodedSize; 
-        op += decodedSize; 
-        ip += cBlockSize; 
-        remainingSize -= cBlockSize; 
-    } 
- 
-    return op-ostart; 
-} 
- 
- 
-size_t ZSTDv05_decompress_usingPreparedDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* refDCtx, 
-                                         void* dst, size_t maxDstSize, 
-                                   const void* src, size_t srcSize) 
-{ 
-    ZSTDv05_copyDCtx(dctx, refDCtx); 
-    ZSTDv05_checkContinuity(dctx, dst); 
-    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize); 
-} 
- 
- 
-size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, 
-                                 void* dst, size_t maxDstSize, 
-                                 const void* src, size_t srcSize, 
-                                 const void* dict, size_t dictSize) 
-{ 
-    ZSTDv05_decompressBegin_usingDict(dctx, dict, dictSize); 
-    ZSTDv05_checkContinuity(dctx, dst); 
-    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize); 
-} 
- 
- 
-size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    return ZSTDv05_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0); 
-} 
- 
-size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-#if defined(ZSTDv05_HEAPMODE) && (ZSTDv05_HEAPMODE==1) 
-    size_t regenSize; 
-    ZSTDv05_DCtx* dctx = ZSTDv05_createDCtx(); 
-    if (dctx==NULL) return ERROR(memory_allocation); 
-    regenSize = ZSTDv05_decompressDCtx(dctx, dst, maxDstSize, src, srcSize); 
-    ZSTDv05_freeDCtx(dctx); 
-    return regenSize; 
-#else 
-    ZSTDv05_DCtx dctx; 
-    return ZSTDv05_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize); 
-#endif 
-} 
- 
- 
-/* ****************************** 
-*  Streaming Decompression API 
-********************************/ 
-size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx) 
-{ 
-    return dctx->expected; 
-} 
- 
-size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    /* Sanity check */ 
-    if (srcSize != dctx->expected) return ERROR(srcSize_wrong); 
-    ZSTDv05_checkContinuity(dctx, dst); 
- 
-    /* Decompress : frame header; part 1 */ 
-    switch (dctx->stage) 
-    { 
-    case ZSTDv05ds_getFrameHeaderSize : 
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    /* decoder timing evaluation */
+    Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    for (n=0; n<3; n++)
+        Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+
+    Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
+
+    if (Dtime[1] < Dtime[0]) algoNb = 1;
+
+    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+
+    //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+    //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+}
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTDv05_decompress() will allocate memory,
+ * in memory stack (0), or in memory heap (1, requires malloc())
+ */
+#ifndef ZSTDv05_HEAPMODE
+#  define ZSTDv05_HEAPMODE 1
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug only : printf */
+
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/*-*************************************
+*  Local types
+***************************************/
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+
+/* *******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTDv05_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+
+/* *************************************
+*  Error Management
+***************************************/
+/*! ZSTDv05_isError() :
+*   tells if a return value is an error code */
+unsigned ZSTDv05_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTDv05_getError() :
+*   convert a `size_t` function result into a proper ZSTDv05_errorCode enum */
+ZSTDv05_ErrorCode ZSTDv05_getError(size_t code) { return ERR_getError(code); }
+
+/*! ZSTDv05_getErrorName() :
+*   provides error code string (useful for debugging) */
+const char* ZSTDv05_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* *************************************************************
+*   Context management
+***************************************************************/
+typedef enum { ZSTDv05ds_getFrameHeaderSize, ZSTDv05ds_decodeFrameHeader,
+               ZSTDv05ds_decodeBlockHeader, ZSTDv05ds_decompressBlock } ZSTDv05_dStage;
+
+struct ZSTDv05_DCtx_s
+{
+    FSEv05_DTable LLTable[FSEv05_DTABLE_SIZE_U32(LLFSEv05Log)];
+    FSEv05_DTable OffTable[FSEv05_DTABLE_SIZE_U32(OffFSEv05Log)];
+    FSEv05_DTable MLTable[FSEv05_DTABLE_SIZE_U32(MLFSEv05Log)];
+    unsigned   hufTableX4[HUFv05_DTABLE_SIZE(HufLog)];
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
+    size_t expected;
+    size_t headerSize;
+    ZSTDv05_parameters params;
+    blockType_t bType;   /* used in ZSTDv05_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
+    ZSTDv05_dStage stage;
+    U32 flagStaticTables;
+    const BYTE* litPtr;
+    size_t litBufSize;
+    size_t litSize;
+    BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
+};  /* typedef'd to ZSTDv05_DCtx within "zstd_static.h" */
+
+size_t ZSTDv05_sizeofDCtx (void) { return sizeof(ZSTDv05_DCtx); }
+
+size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx)
+{
+    dctx->expected = ZSTDv05_frameHeaderSize_min;
+    dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+    dctx->previousDstEnd = NULL;
+    dctx->base = NULL;
+    dctx->vBase = NULL;
+    dctx->dictEnd = NULL;
+    dctx->hufTableX4[0] = HufLog;
+    dctx->flagStaticTables = 0;
+    return 0;
+}
+
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void)
+{
+    ZSTDv05_DCtx* dctx = (ZSTDv05_DCtx*)malloc(sizeof(ZSTDv05_DCtx));
+    if (dctx==NULL) return NULL;
+    ZSTDv05_decompressBegin(dctx);
+    return dctx;
+}
+
+size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx)
+{
+    free(dctx);
+    return 0;   /* reserved as a potential error code in the future */
+}
+
+void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx)
+{
+    memcpy(dstDCtx, srcDCtx,
+           sizeof(ZSTDv05_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTDv05_frameHeaderSize_max));  /* no need to copy workspace */
+}
+
+
+/* *************************************************************
+*   Decompression section
+***************************************************************/
+
+/* Frame format description
+   Frame Header -  [ Block Header - Block ] - Frame End
+   1) Frame Header
+      - 4 bytes - Magic Number : ZSTDv05_MAGICNUMBER (defined within zstd_internal.h)
+      - 1 byte  - Window Descriptor
+   2) Block Header
+      - 3 bytes, starting with a 2-bits descriptor
+                 Uncompressed, Compressed, Frame End, unused
+   3) Block
+      See Block Format Description
+   4) Frame End
+      - 3 bytes, compatible with Block Header
+*/
+
+/* Block format description
+
+   Block = Literal Section - Sequences Section
+   Prerequisite : size of (compressed) block, maximum size of regenerated data
+
+   1) Literal Section
+
+   1.1) Header : 1-5 bytes
+        flags: 2 bits
+            00 compressed by Huff0
+            01 unused
+            10 is Raw (uncompressed)
+            11 is Rle
+            Note : using 01 => Huff0 with precomputed table ?
+            Note : delta map ? => compressed ?
+
+   1.1.1) Huff0-compressed literal block : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+   1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RAW<<6) + (0<<4) + size
+               12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
+        size :  5 bits: (IS_RLE<<6) + (0<<4) + size
+               12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
+                        size&255
+               20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
+                        size>>8&255
+                        size&255
+
+   1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
+            srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
+            srcSize < 1 KB => 3 bytes (2-2-10-10)
+            srcSize < 16KB => 4 bytes (2-2-14-14)
+            else           => 5 bytes (2-2-18-18)
+            big endian convention
+
+        1- CTable available (stored into workspace ?)
+        2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
+
+
+   1.2) Literal block content
+
+   1.2.1) Huff0 block, using sizes from header
+        See Huff0 format
+
+   1.2.2) Huff0 block, using prepared table
+
+   1.2.3) Raw content
+
+   1.2.4) single byte
+
+
+   2) Sequences section
+      TO DO
+*/
+
+
+/** ZSTDv05_decodeFrameHeader_Part1() :
+*   decode the 1st part of the Frame Header, which tells Frame Header size.
+*   srcSize must be == ZSTDv05_frameHeaderSize_min.
+*   @return : the full size of the Frame Header */
+static size_t ZSTDv05_decodeFrameHeader_Part1(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize != ZSTDv05_frameHeaderSize_min)
+        return ERROR(srcSize_wrong);
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    zc->headerSize = ZSTDv05_frameHeaderSize_min;
+    return zc->headerSize;
+}
+
+
+size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize)
+{
+    U32 magicNumber;
+    if (srcSize < ZSTDv05_frameHeaderSize_min) return ZSTDv05_frameHeaderSize_max;
+    magicNumber = MEM_readLE32(src);
+    if (magicNumber != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    memset(params, 0, sizeof(*params));
+    params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTDv05_WINDOWLOG_ABSOLUTEMIN;
+    if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+    return 0;
+}
+
+/** ZSTDv05_decodeFrameHeader_Part2() :
+*   decode the full Frame Header.
+*   srcSize must be the size provided by ZSTDv05_decodeFrameHeader_Part1().
+*   @return : 0, or an error code, which can be tested using ZSTDv05_isError() */
+static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src, size_t srcSize)
+{
+    size_t result;
+    if (srcSize != zc->headerSize)
+        return ERROR(srcSize_wrong);
+    result = ZSTDv05_getFrameParams(&(zc->params), src, srcSize);
+    if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
+    return result;
+}
+
+
+size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = (const BYTE* const)src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3)
+        return ERROR(srcSize_wrong);
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = (blockType_t)(headerFlags >> 6);
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/*! ZSTDv05_decodeLiteralsBlock() :
+    @return : nb of bytes read from src (< srcSize ) */
+size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    const BYTE* const istart = (const BYTE*) src;
+
+    /* any compressed block with literals segment must be at least this size */
+    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+
+    switch(istart[0]>> 6)
+    {
+    case IS_HUFv05:
+        {
+            size_t litSize, litCSize, singleStream=0;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                /* 2 - 2 - 10 - 10 */
+                lhSize=3;
+                singleStream = istart[0] & 16;
+                litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+                litCSize = ((istart[1] &  3) << 8) + istart[2];
+                break;
+            case 2:
+                /* 2 - 2 - 14 - 14 */
+                lhSize=4;
+                litSize  = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
+                litCSize = ((istart[2] & 63) <<  8) + istart[3];
+                break;
+            case 3:
+                /* 2 - 2 - 18 - 18 */
+                lhSize=5;
+                litSize  = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
+                litCSize = ((istart[2] &  3) << 16) + (istart[3] << 8) + istart[4];
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+
+            if (HUFv05_isError(singleStream ?
+                            HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
+                            HUFv05_decompress   (dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
+                return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+8;
+            dctx->litSize = litSize;
+            return litCSize + lhSize;
+        }
+    case IS_PCH:
+        {
+            size_t errorCode;
+            size_t litSize, litCSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (lhSize != 1)  /* only case supported for now : small litSize, single stream */
+                return ERROR(corruption_detected);
+            if (!dctx->flagStaticTables)
+                return ERROR(dictionary_corrupted);
+
+            /* 2 - 2 - 10 - 10 */
+            lhSize=3;
+            litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
+            litCSize = ((istart[1] &  3) << 8) + istart[2];
+
+            errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
+            if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
+
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
+            dctx->litSize = litSize;
+            return litCSize + lhSize;
+        }
+    case IS_RAW:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize=1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+
+            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litBufSize = BLOCKSIZE+8;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr = istart+lhSize;
+            dctx->litBufSize = srcSize-lhSize;
+            dctx->litSize = litSize;
+            return lhSize+litSize;
+        }
+    case IS_RLE:
+        {
+            size_t litSize;
+            U32 lhSize = ((istart[0]) >> 4) & 3;
+            switch(lhSize)
+            {
+            case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
+                lhSize = 1;
+                litSize = istart[0] & 31;
+                break;
+            case 2:
+                litSize = ((istart[0] & 15) << 8) + istart[1];
+                break;
+            case 3:
+                litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                break;
+            }
+            if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            memset(dctx->litBuffer, istart[lhSize], litSize);
+            dctx->litPtr = dctx->litBuffer;
+            dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
+            dctx->litSize = litSize;
+            return lhSize+1;
+        }
+    default:
+        return ERROR(corruption_detected);   /* impossible */
+    }
+}
+
+
+size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
+                         FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE* const)src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* check */
+    if (srcSize < MIN_SEQUENCES_SIZE)
+        return ERROR(srcSize_wrong);
+
+    /* SeqHead */
+    *nbSeq = *ip++;
+    if (*nbSeq==0) return 1;
+    if (*nbSeq >= 128)
+        *nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
+
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2) {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    } else {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+    *dumpsLengthPtr = dumpsLength;
+
+    /* check */
+    if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL >= MaxOff */
+        size_t headerSize;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case FSEv05_ENCODING_RLE :
+            LLlog = 0;
+            FSEv05_buildDTable_rle(DTableLL, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            LLlog = LLbits;
+            FSEv05_buildDTable_raw(DTableLL, LLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            max = MaxLL;
+            headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip);
+            if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+            if (LLlog > LLFSEv05Log) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSEv05_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case FSEv05_ENCODING_RLE :
+            Offlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong);   /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
+            break;
+        case FSEv05_ENCODING_RAW :
+            Offlog = Offbits;
+            FSEv05_buildDTable_raw(DTableOffb, Offbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            max = MaxOff;
+            headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip);
+            if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+            if (Offlog > OffFSEv05Log) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSEv05_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case FSEv05_ENCODING_RLE :
+            MLlog = 0;
+            if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
+            FSEv05_buildDTable_rle(DTableML, *ip++);
+            break;
+        case FSEv05_ENCODING_RAW :
+            MLlog = MLbits;
+            FSEv05_buildDTable_raw(DTableML, MLbits);
+            break;
+        case FSEv05_ENCODING_STATIC:
+            break;
+        case FSEv05_ENCODING_DYNAMIC :
+        default :   /* impossible */
+            max = MaxML;
+            headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip);
+            if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
+            if (MLlog > MLFSEv05Log) return ERROR(corruption_detected);
+            ip += headerSize;
+            FSEv05_buildDTable(DTableML, norm, max, MLlog);
+    }   }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    BITv05_DStream_t DStream;
+    FSEv05_DState_t stateLL;
+    FSEv05_DState_t stateOffb;
+    FSEv05_DState_t stateML;
+    size_t prevOffset;
+    const BYTE* dumps;
+    const BYTE* dumpsEnd;
+} seqState_t;
+
+
+
+static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
+{
+    size_t litLength;
+    size_t prevOffset;
+    size_t offset;
+    size_t matchLength;
+    const BYTE* dumps = seqState->dumps;
+    const BYTE* const de = seqState->dumpsEnd;
+
+    /* Literal length */
+    litLength = FSEv05_peakSymbol(&(seqState->stateLL));
+    prevOffset = litLength ? seq->offset : seqState->prevOffset;
+    if (litLength == MaxLL) {
+        U32 add = *dumps++;
+        if (add < 255) litLength += add;
+        else {
+            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
+            if (litLength&1) litLength>>=1, dumps += 3;
+            else litLength = (U16)(litLength)>>1, dumps += 2;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+
+    /* Offset */
+    {
+        static const U32 offsetPrefix[MaxOff+1] = {
+                1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
+                512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
+                524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
+        U32 offsetCode = FSEv05_peakSymbol(&(seqState->stateOffb));   /* <= maxOff, by table construction */
+        U32 nbBits = offsetCode - 1;
+        if (offsetCode==0) nbBits = 0;   /* cmove */
+        offset = offsetPrefix[offsetCode] + BITv05_readBits(&(seqState->DStream), nbBits);
+        if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+        if (offsetCode==0) offset = prevOffset;   /* repcode, cmove */
+        if (offsetCode | !litLength) seqState->prevOffset = seq->offset;   /* cmove */
+        FSEv05_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));    /* update */
+    }
+
+    /* Literal length update */
+    FSEv05_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));   /* update */
+    if (MEM_32bits()) BITv05_reloadDStream(&(seqState->DStream));
+
+    /* MatchLength */
+    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
+    if (matchLength == MaxML) {
+        U32 add = *dumps++;
+        if (add < 255) matchLength += add;
+        else {
+            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            if (matchLength&1) matchLength>>=1, dumps += 3;
+            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+        }
+        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+    }
+    matchLength += MINMATCH;
+
+    /* save result */
+    seq->litLength = litLength;
+    seq->offset = offset;
+    seq->matchLength = matchLength;
+    seqState->dumps = dumps;
+
+#if 0   /* debug */
+    {
+        static U64 totalDecoded = 0;
+        printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
+           (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
+        totalDecoded += litLength + matchLength;
+    }
+#endif
+}
+
+
+static size_t ZSTDv05_execSequence(BYTE* op,
+                                BYTE* const oend, seq_t sequence,
+                                const BYTE** litPtr, const BYTE* const litLimit_8,
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    BYTE* const oLitEnd = op + sequence.litLength;
+    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_8 = oend-8;
+    const BYTE* const litEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall);   /* last match must start at a minimum distance of 8 from oend */
+    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
+    if (litEnd > litLimit_8) return ERROR(corruption_detected);   /* risk read beyond lit buffer */
+
+    /* copy Literals */
+    ZSTDv05_wildcopy(op, *litPtr, sequence.litLength);   /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
+    op = oLitEnd;
+    *litPtr = litEnd;   /* update for next sequence */
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase))
+            return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+    }   }
+
+    /* match within prefix */
+    if (sequence.offset < 8) {
+        /* close range match, overlap */
+        const int sub2 = dec64table[sequence.offset];
+        op[0] = match[0];
+        op[1] = match[1];
+        op[2] = match[2];
+        op[3] = match[3];
+        match += dec32table[sequence.offset];
+        ZSTDv05_copy4(op+4, match);
+        match -= sub2;
+    } else {
+        ZSTDv05_copy8(op, match);
+    }
+    op += 8; match += 8;
+
+    if (oMatchEnd > oend-12) {
+        if (op < oend_8) {
+            ZSTDv05_wildcopy(op, match, oend_8 - op);
+            match += oend_8 - op;
+            op = oend_8;
+        }
+        while (op < oMatchEnd)
+            *op++ = *match++;
+    } else {
+        ZSTDv05_wildcopy(op, match, sequence.matchLength-8);   /* works even if matchLength < 8 */
+    }
+    return sequenceLength;
+}
+
+
+static size_t ZSTDv05_decompressSequences(
+                               ZSTDv05_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode, dumpsLength;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    int nbSeq;
+    const BYTE* dumps;
+    U32* DTableLL = dctx->LLTable;
+    U32* DTableML = dctx->MLTable;
+    U32* DTableOffb = dctx->OffTable;
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+
+    /* Build Decoding Tables */
+    errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, seqSize);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seq_t sequence;
+        seqState_t seqState;
+
+        memset(&sequence, 0, sizeof(sequence));
+        sequence.offset = REPCODE_STARTVALUE;
+        seqState.dumps = dumps;
+        seqState.dumpsEnd = dumps + dumpsLength;
+        seqState.prevOffset = REPCODE_STARTVALUE;
+        errorCode = BITv05_initDStream(&(seqState.DStream), ip, iend-ip);
+        if (ERR_isError(errorCode)) return ERROR(corruption_detected);
+        FSEv05_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
+        FSEv05_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
+        FSEv05_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
+
+        for ( ; (BITv05_reloadDStream(&(seqState.DStream)) <= BITv05_DStream_completed) && nbSeq ; ) {
+            size_t oneSeqSize;
+            nbSeq--;
+            ZSTDv05_decodeSequence(&sequence, &seqState);
+            oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
+            if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        if (nbSeq) return ERROR(corruption_detected);
+    }
+
+    /* last literal segment */
+    {
+        size_t lastLLSize = litEnd - litPtr;
+        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
+        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
+        memcpy(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static void ZSTDv05_checkContinuity(ZSTDv05_DCtx* dctx, const void* dst)
+{
+    if (dst != dctx->previousDstEnd) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+        dctx->base = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+static size_t ZSTDv05_decompressBlock_internal(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize >= BLOCKSIZE) return ERROR(srcSize_wrong);
+
+    /* Decode literals sub-block */
+    litCSize = ZSTDv05_decodeLiteralsBlock(dctx, src, srcSize);
+    if (ZSTDv05_isError(litCSize)) return litCSize;
+    ip += litCSize;
+    srcSize -= litCSize;
+
+    return ZSTDv05_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
+}
+
+
+size_t ZSTDv05_decompressBlock(ZSTDv05_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+}
+
+
+/*! ZSTDv05_decompress_continueDCtx
+*   dctx must have been properly initialized */
+static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = (BYTE* const)dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    blockProperties_t blockProperties;
+
+    /* Frame Header */
+    {
+        size_t frameHeaderSize;
+        if (srcSize < ZSTDv05_frameHeaderSize_min+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+        if (srcSize < frameHeaderSize+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
+        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
+        frameHeaderSize = ZSTDv05_decodeFrameHeader_Part2(dctx, src, frameHeaderSize);
+        if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1)
+    {
+        size_t decodedSize=0;
+        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTDv05_blockHeaderSize;
+        remainingSize -= ZSTDv05_blockHeaderSize;
+        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTDv05_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
+            break;
+        case bt_raw :
+            decodedSize = ZSTDv05_copyRawBlock(op, oend-op, ip, cBlockSize);
+            break;
+        case bt_rle :
+            return ERROR(GENERIC);   /* not yet supported */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return ERROR(srcSize_wrong);
+            break;
+        default:
+            return ERROR(GENERIC);   /* impossible */
+        }
+        if (cBlockSize == 0) break;   /* bt_end */
+
+        if (ZSTDv05_isError(decodedSize)) return decodedSize;
+        op += decodedSize;
+        ip += cBlockSize;
+        remainingSize -= cBlockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTDv05_decompress_usingPreparedDCtx(ZSTDv05_DCtx* dctx, const ZSTDv05_DCtx* refDCtx,
+                                         void* dst, size_t maxDstSize,
+                                   const void* src, size_t srcSize)
+{
+    ZSTDv05_copyDCtx(dctx, refDCtx);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                                 const void* src, size_t srcSize,
+                                 const void* dict, size_t dictSize)
+{
+    ZSTDv05_decompressBegin_usingDict(dctx, dict, dictSize);
+    ZSTDv05_checkContinuity(dctx, dst);
+    return ZSTDv05_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
+}
+
+
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTDv05_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
+}
+
+size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+#if defined(ZSTDv05_HEAPMODE) && (ZSTDv05_HEAPMODE==1)
+    size_t regenSize;
+    ZSTDv05_DCtx* dctx = ZSTDv05_createDCtx();
+    if (dctx==NULL) return ERROR(memory_allocation);
+    regenSize = ZSTDv05_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
+    ZSTDv05_freeDCtx(dctx);
+    return regenSize;
+#else
+    ZSTDv05_DCtx dctx;
+    return ZSTDv05_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
+#endif
+}
+
+
+/* ******************************
+*  Streaming Decompression API
+********************************/
+size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx)
+{
+    return dctx->expected;
+}
+
+size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    /* Sanity check */
+    if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
+    ZSTDv05_checkContinuity(dctx, dst);
+
+    /* Decompress : frame header; part 1 */
+    switch (dctx->stage)
+    {
+    case ZSTDv05ds_getFrameHeaderSize :
         /* get frame header size */
         if (srcSize != ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
         dctx->headerSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
@@ -3880,446 +3880,446 @@ size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSi
         if (dctx->headerSize > ZSTDv05_frameHeaderSize_min) return ERROR(GENERIC); /* should never happen */
         dctx->expected = 0;   /* not necessary to copy more */
         /* fallthrough */
-    case ZSTDv05ds_decodeFrameHeader: 
+    case ZSTDv05ds_decodeFrameHeader:
         /* get frame header */
         {   size_t const result = ZSTDv05_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
-            if (ZSTDv05_isError(result)) return result; 
-            dctx->expected = ZSTDv05_blockHeaderSize; 
-            dctx->stage = ZSTDv05ds_decodeBlockHeader; 
-            return 0; 
-        } 
-    case ZSTDv05ds_decodeBlockHeader: 
-        { 
-            /* Decode block header */ 
-            blockProperties_t bp; 
-            size_t blockSize = ZSTDv05_getcBlockSize(src, ZSTDv05_blockHeaderSize, &bp); 
-            if (ZSTDv05_isError(blockSize)) return blockSize; 
-            if (bp.blockType == bt_end) { 
-                dctx->expected = 0; 
-                dctx->stage = ZSTDv05ds_getFrameHeaderSize; 
-            } 
-            else { 
-                dctx->expected = blockSize; 
-                dctx->bType = bp.blockType; 
-                dctx->stage = ZSTDv05ds_decompressBlock; 
-            } 
-            return 0; 
-        } 
-    case ZSTDv05ds_decompressBlock: 
-        { 
-            /* Decompress : block content */ 
-            size_t rSize; 
-            switch(dctx->bType) 
-            { 
-            case bt_compressed: 
-                rSize = ZSTDv05_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize); 
-                break; 
-            case bt_raw : 
-                rSize = ZSTDv05_copyRawBlock(dst, maxDstSize, src, srcSize); 
-                break; 
-            case bt_rle : 
-                return ERROR(GENERIC);   /* not yet handled */ 
-                break; 
-            case bt_end :   /* should never happen (filtered at phase 1) */ 
-                rSize = 0; 
-                break; 
-            default: 
-                return ERROR(GENERIC);   /* impossible */ 
-            } 
-            dctx->stage = ZSTDv05ds_decodeBlockHeader; 
-            dctx->expected = ZSTDv05_blockHeaderSize; 
-            dctx->previousDstEnd = (char*)dst + rSize; 
-            return rSize; 
-        } 
-    default: 
-        return ERROR(GENERIC);   /* impossible */ 
-    } 
-} 
- 
- 
-static void ZSTDv05_refDictContent(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    dctx->dictEnd = dctx->previousDstEnd; 
-    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); 
-    dctx->base = dict; 
-    dctx->previousDstEnd = (const char*)dict + dictSize; 
-} 
- 
-static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize; 
-    short offcodeNCount[MaxOff+1]; 
-    U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSEv05Log; 
-    short matchlengthNCount[MaxML+1]; 
-    unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSEv05Log; 
-    short litlengthNCount[MaxLL+1]; 
-    unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSEv05Log; 
- 
-    hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize); 
-    if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted); 
-    dict = (const char*)dict + hSize; 
-    dictSize -= hSize; 
- 
-    offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); 
-    if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); 
-    errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog); 
-    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted); 
-    dict = (const char*)dict + offcodeHeaderSize; 
-    dictSize -= offcodeHeaderSize; 
- 
-    matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); 
-    if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); 
-    errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); 
-    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted); 
-    dict = (const char*)dict + matchlengthHeaderSize; 
-    dictSize -= matchlengthHeaderSize; 
- 
-    litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); 
-    if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); 
-    errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); 
-    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted); 
- 
-    dctx->flagStaticTables = 1; 
-    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; 
-} 
- 
-static size_t ZSTDv05_decompress_insertDictionary(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    size_t eSize; 
-    U32 magic = MEM_readLE32(dict); 
-    if (magic != ZSTDv05_DICT_MAGIC) { 
-        /* pure content mode */ 
-        ZSTDv05_refDictContent(dctx, dict, dictSize); 
-        return 0; 
-    } 
-    /* load entropy tables */ 
-    dict = (const char*)dict + 4; 
-    dictSize -= 4; 
-    eSize = ZSTDv05_loadEntropy(dctx, dict, dictSize); 
-    if (ZSTDv05_isError(eSize)) return ERROR(dictionary_corrupted); 
- 
-    /* reference dictionary content */ 
-    dict = (const char*)dict + eSize; 
-    dictSize -= eSize; 
-    ZSTDv05_refDictContent(dctx, dict, dictSize); 
- 
-    return 0; 
-} 
- 
- 
-size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize) 
-{ 
-    size_t errorCode; 
-    errorCode = ZSTDv05_decompressBegin(dctx); 
-    if (ZSTDv05_isError(errorCode)) return errorCode; 
- 
-    if (dict && dictSize) { 
-        errorCode = ZSTDv05_decompress_insertDictionary(dctx, dict, dictSize); 
-        if (ZSTDv05_isError(errorCode)) return ERROR(dictionary_corrupted); 
-    } 
- 
-    return 0; 
-} 
- 
-/* 
-    Buffered version of Zstd compression library 
-    Copyright (C) 2015-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 
-*/ 
- 
-/* The objects defined into this file should be considered experimental. 
- * They are not labelled stable, as their prototype may change in the future. 
- * You can use them for tests, provide feedback, or if you can endure risk of future changes. 
- */ 
- 
- 
- 
-/* ************************************* 
-*  Constants 
-***************************************/ 
-static size_t ZBUFFv05_blockHeaderSize = 3; 
- 
- 
- 
-/* *** Compression *** */ 
- 
-static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 
-{ 
-    size_t length = MIN(maxDstSize, srcSize); 
-    memcpy(dst, src, length); 
-    return length; 
-} 
- 
- 
- 
- 
-/** ************************************************ 
-*  Streaming decompression 
-* 
-*  A ZBUFFv05_DCtx object is required to track streaming operation. 
-*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. 
-*  Use ZBUFFv05_decompressInit() to start a new decompression operation. 
-*  ZBUFFv05_DCtx objects can be reused multiple times. 
-* 
-*  Use ZBUFFv05_decompressContinue() repetitively to consume your input. 
-*  *srcSizePtr and *maxDstSizePtr can be any size. 
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. 
-*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. 
-*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst . 
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) 
-*            or 0 when a frame is completely decoded 
-*            or an error code, which can be tested using ZBUFFv05_isError(). 
-* 
-*  Hint : recommended buffer sizes (not compulsory) 
-*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. 
-*  input : just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . 
-* **************************************************/ 
- 
-typedef enum { ZBUFFv05ds_init, ZBUFFv05ds_readHeader, ZBUFFv05ds_loadHeader, ZBUFFv05ds_decodeHeader, 
-               ZBUFFv05ds_read, ZBUFFv05ds_load, ZBUFFv05ds_flush } ZBUFFv05_dStage; 
- 
-/* *** Resource management *** */ 
- 
-#define ZSTDv05_frameHeaderSize_max 5   /* too magical, should come from reference */ 
-struct ZBUFFv05_DCtx_s { 
-    ZSTDv05_DCtx* zc; 
-    ZSTDv05_parameters params; 
-    char* inBuff; 
-    size_t inBuffSize; 
-    size_t inPos; 
-    char* outBuff; 
-    size_t outBuffSize; 
-    size_t outStart; 
-    size_t outEnd; 
-    size_t hPos; 
-    ZBUFFv05_dStage stage; 
-    unsigned char headerBuffer[ZSTDv05_frameHeaderSize_max]; 
-};   /* typedef'd to ZBUFFv05_DCtx within "zstd_buffered.h" */ 
- 
- 
-ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void) 
-{ 
-    ZBUFFv05_DCtx* zbc = (ZBUFFv05_DCtx*)malloc(sizeof(ZBUFFv05_DCtx)); 
-    if (zbc==NULL) return NULL; 
-    memset(zbc, 0, sizeof(*zbc)); 
-    zbc->zc = ZSTDv05_createDCtx(); 
-    zbc->stage = ZBUFFv05ds_init; 
-    return zbc; 
-} 
- 
-size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* zbc) 
-{ 
-    if (zbc==NULL) return 0;   /* support free on null */ 
-    ZSTDv05_freeDCtx(zbc->zc); 
-    free(zbc->inBuff); 
-    free(zbc->outBuff); 
-    free(zbc); 
-    return 0; 
-} 
- 
- 
-/* *** Initialization *** */ 
- 
-size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* zbc, const void* dict, size_t dictSize) 
-{ 
-    zbc->stage = ZBUFFv05ds_readHeader; 
-    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0; 
-    return ZSTDv05_decompressBegin_usingDict(zbc->zc, dict, dictSize); 
-} 
- 
-size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* zbc) 
-{ 
-    return ZBUFFv05_decompressInitDictionary(zbc, NULL, 0); 
-} 
- 
- 
-/* *** Decompression *** */ 
- 
-size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr) 
-{ 
-    const char* const istart = (const char*)src; 
-    const char* ip = istart; 
-    const char* const iend = istart + *srcSizePtr; 
-    char* const ostart = (char*)dst; 
-    char* op = ostart; 
-    char* const oend = ostart + *maxDstSizePtr; 
-    U32 notDone = 1; 
- 
-    while (notDone) { 
-        switch(zbc->stage) 
-        { 
-        case ZBUFFv05ds_init : 
-            return ERROR(init_missing); 
- 
-        case ZBUFFv05ds_readHeader : 
-            /* read header from src */ 
-            { 
-                size_t headerSize = ZSTDv05_getFrameParams(&(zbc->params), src, *srcSizePtr); 
-                if (ZSTDv05_isError(headerSize)) return headerSize; 
-                if (headerSize) { 
-                    /* not enough input to decode header : tell how many bytes would be necessary */ 
-                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr); 
-                    zbc->hPos += *srcSizePtr; 
-                    *maxDstSizePtr = 0; 
-                    zbc->stage = ZBUFFv05ds_loadHeader; 
-                    return headerSize - zbc->hPos; 
-                } 
-                zbc->stage = ZBUFFv05ds_decodeHeader; 
-                break; 
-            } 
- 
-        case ZBUFFv05ds_loadHeader: 
-            /* complete header from src */ 
-            { 
-                size_t headerSize = ZBUFFv05_limitCopy( 
-                    zbc->headerBuffer + zbc->hPos, ZSTDv05_frameHeaderSize_max - zbc->hPos, 
-                    src, *srcSizePtr); 
-                zbc->hPos += headerSize; 
-                ip += headerSize; 
-                headerSize = ZSTDv05_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos); 
-                if (ZSTDv05_isError(headerSize)) return headerSize; 
-                if (headerSize) { 
-                    /* not enough input to decode header : tell how many bytes would be necessary */ 
-                    *maxDstSizePtr = 0; 
-                    return headerSize - zbc->hPos; 
-                } 
-                // zbc->stage = ZBUFFv05ds_decodeHeader; break;   /* useless : stage follows */ 
-            } 
- 
-        case ZBUFFv05ds_decodeHeader: 
-                /* apply header to create / resize buffers */ 
-                { 
-                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog; 
-                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */ 
-                    if (zbc->inBuffSize < neededInSize) { 
-                        free(zbc->inBuff); 
-                        zbc->inBuffSize = neededInSize; 
-                        zbc->inBuff = (char*)malloc(neededInSize); 
-                        if (zbc->inBuff == NULL) return ERROR(memory_allocation); 
-                    } 
-                    if (zbc->outBuffSize < neededOutSize) { 
-                        free(zbc->outBuff); 
-                        zbc->outBuffSize = neededOutSize; 
-                        zbc->outBuff = (char*)malloc(neededOutSize); 
-                        if (zbc->outBuff == NULL) return ERROR(memory_allocation); 
-                }   } 
-                if (zbc->hPos) { 
-                    /* some data already loaded into headerBuffer : transfer into inBuff */ 
-                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos); 
-                    zbc->inPos = zbc->hPos; 
-                    zbc->hPos = 0; 
-                    zbc->stage = ZBUFFv05ds_load; 
-                    break; 
-                } 
-                zbc->stage = ZBUFFv05ds_read; 
- 
-        case ZBUFFv05ds_read: 
-            { 
-                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc); 
-                if (neededInSize==0) {  /* end of frame */ 
-                    zbc->stage = ZBUFFv05ds_init; 
-                    notDone = 0; 
-                    break; 
-                } 
-                if ((size_t)(iend-ip) >= neededInSize) { 
-                    /* directly decode from src */ 
-                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc, 
-                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, 
-                        ip, neededInSize); 
-                    if (ZSTDv05_isError(decodedSize)) return decodedSize; 
-                    ip += neededInSize; 
-                    if (!decodedSize) break;   /* this was just a header */ 
-                    zbc->outEnd = zbc->outStart +  decodedSize; 
-                    zbc->stage = ZBUFFv05ds_flush; 
-                    break; 
-                } 
-                if (ip==iend) { notDone = 0; break; }   /* no more input */ 
-                zbc->stage = ZBUFFv05ds_load; 
-            } 
- 
-        case ZBUFFv05ds_load: 
-            { 
-                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc); 
-                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */ 
-                size_t loadedSize; 
-                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */ 
-                loadedSize = ZBUFFv05_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip); 
-                ip += loadedSize; 
-                zbc->inPos += loadedSize; 
-                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */ 
-                { 
-                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc, 
-                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, 
-                        zbc->inBuff, neededInSize); 
-                    if (ZSTDv05_isError(decodedSize)) return decodedSize; 
-                    zbc->inPos = 0;   /* input is consumed */ 
-                    if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; }   /* this was just a header */ 
-                    zbc->outEnd = zbc->outStart +  decodedSize; 
-                    zbc->stage = ZBUFFv05ds_flush; 
-                    // break; /* ZBUFFv05ds_flush follows */ 
-            }   } 
-        case ZBUFFv05ds_flush: 
-            { 
-                size_t toFlushSize = zbc->outEnd - zbc->outStart; 
-                size_t flushedSize = ZBUFFv05_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize); 
-                op += flushedSize; 
-                zbc->outStart += flushedSize; 
-                if (flushedSize == toFlushSize) { 
-                    zbc->stage = ZBUFFv05ds_read; 
-                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize) 
-                        zbc->outStart = zbc->outEnd = 0; 
-                    break; 
-                } 
-                /* cannot flush everything */ 
-                notDone = 0; 
-                break; 
-            } 
-        default: return ERROR(GENERIC);   /* impossible */ 
-    }   } 
- 
-    *srcSizePtr = ip-istart; 
-    *maxDstSizePtr = op-ostart; 
- 
+            if (ZSTDv05_isError(result)) return result;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            return 0;
+        }
+    case ZSTDv05ds_decodeBlockHeader:
+        {
+            /* Decode block header */
+            blockProperties_t bp;
+            size_t blockSize = ZSTDv05_getcBlockSize(src, ZSTDv05_blockHeaderSize, &bp);
+            if (ZSTDv05_isError(blockSize)) return blockSize;
+            if (bp.blockType == bt_end) {
+                dctx->expected = 0;
+                dctx->stage = ZSTDv05ds_getFrameHeaderSize;
+            }
+            else {
+                dctx->expected = blockSize;
+                dctx->bType = bp.blockType;
+                dctx->stage = ZSTDv05ds_decompressBlock;
+            }
+            return 0;
+        }
+    case ZSTDv05ds_decompressBlock:
+        {
+            /* Decompress : block content */
+            size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                rSize = ZSTDv05_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
+                break;
+            case bt_raw :
+                rSize = ZSTDv05_copyRawBlock(dst, maxDstSize, src, srcSize);
+                break;
+            case bt_rle :
+                return ERROR(GENERIC);   /* not yet handled */
+                break;
+            case bt_end :   /* should never happen (filtered at phase 1) */
+                rSize = 0;
+                break;
+            default:
+                return ERROR(GENERIC);   /* impossible */
+            }
+            dctx->stage = ZSTDv05ds_decodeBlockHeader;
+            dctx->expected = ZSTDv05_blockHeaderSize;
+            dctx->previousDstEnd = (char*)dst + rSize;
+            return rSize;
+        }
+    default:
+        return ERROR(GENERIC);   /* impossible */
+    }
+}
+
+
+static void ZSTDv05_refDictContent(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
+    dctx->base = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+}
+
+static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
+    short offcodeNCount[MaxOff+1];
+    U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSEv05Log;
+    short matchlengthNCount[MaxML+1];
+    unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSEv05Log;
+    short litlengthNCount[MaxLL+1];
+    unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSEv05Log;
+
+    hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize);
+    if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + hSize;
+    dictSize -= hSize;
+
+    offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+    if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + offcodeHeaderSize;
+    dictSize -= offcodeHeaderSize;
+
+    matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+    if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    dict = (const char*)dict + matchlengthHeaderSize;
+    dictSize -= matchlengthHeaderSize;
+
+    litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+    if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+    if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+
+    dctx->flagStaticTables = 1;
+    return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
+}
+
+static size_t ZSTDv05_decompress_insertDictionary(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t eSize;
+    U32 magic = MEM_readLE32(dict);
+    if (magic != ZSTDv05_DICT_MAGIC) {
+        /* pure content mode */
+        ZSTDv05_refDictContent(dctx, dict, dictSize);
+        return 0;
+    }
+    /* load entropy tables */
+    dict = (const char*)dict + 4;
+    dictSize -= 4;
+    eSize = ZSTDv05_loadEntropy(dctx, dict, dictSize);
+    if (ZSTDv05_isError(eSize)) return ERROR(dictionary_corrupted);
+
+    /* reference dictionary content */
+    dict = (const char*)dict + eSize;
+    dictSize -= eSize;
+    ZSTDv05_refDictContent(dctx, dict, dictSize);
+
+    return 0;
+}
+
+
+size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    size_t errorCode;
+    errorCode = ZSTDv05_decompressBegin(dctx);
+    if (ZSTDv05_isError(errorCode)) return errorCode;
+
+    if (dict && dictSize) {
+        errorCode = ZSTDv05_decompress_insertDictionary(dctx, dict, dictSize);
+        if (ZSTDv05_isError(errorCode)) return ERROR(dictionary_corrupted);
+    }
+
+    return 0;
+}
+
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* The objects defined into this file should be considered experimental.
+ * They are not labelled stable, as their prototype may change in the future.
+ * You can use them for tests, provide feedback, or if you can endure risk of future changes.
+ */
+
+
+
+/* *************************************
+*  Constants
+***************************************/
+static size_t ZBUFFv05_blockHeaderSize = 3;
+
+
+
+/* *** Compression *** */
+
+static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    size_t length = MIN(maxDstSize, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
+
+
+
+/** ************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operation.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation.
+*  ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *maxDstSizePtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
+*  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst .
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory)
+*  output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
+*  input : just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* **************************************************/
+
+typedef enum { ZBUFFv05ds_init, ZBUFFv05ds_readHeader, ZBUFFv05ds_loadHeader, ZBUFFv05ds_decodeHeader,
+               ZBUFFv05ds_read, ZBUFFv05ds_load, ZBUFFv05ds_flush } ZBUFFv05_dStage;
+
+/* *** Resource management *** */
+
+#define ZSTDv05_frameHeaderSize_max 5   /* too magical, should come from reference */
+struct ZBUFFv05_DCtx_s {
+    ZSTDv05_DCtx* zc;
+    ZSTDv05_parameters params;
+    char* inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char* outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t hPos;
+    ZBUFFv05_dStage stage;
+    unsigned char headerBuffer[ZSTDv05_frameHeaderSize_max];
+};   /* typedef'd to ZBUFFv05_DCtx within "zstd_buffered.h" */
+
+
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void)
+{
+    ZBUFFv05_DCtx* zbc = (ZBUFFv05_DCtx*)malloc(sizeof(ZBUFFv05_DCtx));
+    if (zbc==NULL) return NULL;
+    memset(zbc, 0, sizeof(*zbc));
+    zbc->zc = ZSTDv05_createDCtx();
+    zbc->stage = ZBUFFv05ds_init;
+    return zbc;
+}
+
+size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* zbc)
+{
+    if (zbc==NULL) return 0;   /* support free on null */
+    ZSTDv05_freeDCtx(zbc->zc);
+    free(zbc->inBuff);
+    free(zbc->outBuff);
+    free(zbc);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* zbc, const void* dict, size_t dictSize)
+{
+    zbc->stage = ZBUFFv05ds_readHeader;
+    zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0;
+    return ZSTDv05_decompressBegin_usingDict(zbc->zc, dict, dictSize);
+}
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* zbc)
+{
+    return ZBUFFv05_decompressInitDictionary(zbc, NULL, 0);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* ip = istart;
+    const char* const iend = istart + *srcSizePtr;
+    char* const ostart = (char*)dst;
+    char* op = ostart;
+    char* const oend = ostart + *maxDstSizePtr;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbc->stage)
+        {
+        case ZBUFFv05ds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFv05ds_readHeader :
+            /* read header from src */
+            {
+                size_t headerSize = ZSTDv05_getFrameParams(&(zbc->params), src, *srcSizePtr);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
+                    zbc->hPos += *srcSizePtr;
+                    *maxDstSizePtr = 0;
+                    zbc->stage = ZBUFFv05ds_loadHeader;
+                    return headerSize - zbc->hPos;
+                }
+                zbc->stage = ZBUFFv05ds_decodeHeader;
+                break;
+            }
+
+        case ZBUFFv05ds_loadHeader:
+            /* complete header from src */
+            {
+                size_t headerSize = ZBUFFv05_limitCopy(
+                    zbc->headerBuffer + zbc->hPos, ZSTDv05_frameHeaderSize_max - zbc->hPos,
+                    src, *srcSizePtr);
+                zbc->hPos += headerSize;
+                ip += headerSize;
+                headerSize = ZSTDv05_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
+                if (ZSTDv05_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : tell how many bytes would be necessary */
+                    *maxDstSizePtr = 0;
+                    return headerSize - zbc->hPos;
+                }
+                // zbc->stage = ZBUFFv05ds_decodeHeader; break;   /* useless : stage follows */
+            }
+
+        case ZBUFFv05ds_decodeHeader:
+                /* apply header to create / resize buffers */
+                {
+                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                    if (zbc->inBuffSize < neededInSize) {
+                        free(zbc->inBuff);
+                        zbc->inBuffSize = neededInSize;
+                        zbc->inBuff = (char*)malloc(neededInSize);
+                        if (zbc->inBuff == NULL) return ERROR(memory_allocation);
+                    }
+                    if (zbc->outBuffSize < neededOutSize) {
+                        free(zbc->outBuff);
+                        zbc->outBuffSize = neededOutSize;
+                        zbc->outBuff = (char*)malloc(neededOutSize);
+                        if (zbc->outBuff == NULL) return ERROR(memory_allocation);
+                }   }
+                if (zbc->hPos) {
+                    /* some data already loaded into headerBuffer : transfer into inBuff */
+                    memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
+                    zbc->inPos = zbc->hPos;
+                    zbc->hPos = 0;
+                    zbc->stage = ZBUFFv05ds_load;
+                    break;
+                }
+                zbc->stage = ZBUFFv05ds_read;
+
+        case ZBUFFv05ds_read:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                if (neededInSize==0) {  /* end of frame */
+                    zbc->stage = ZBUFFv05ds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {
+                    /* directly decode from src */
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        ip, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbc->stage = ZBUFFv05ds_load;
+            }
+
+        case ZBUFFv05ds_load:
+            {
+                size_t neededInSize = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
+                size_t toLoad = neededInSize - zbc->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFFv05_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbc->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                {
+                    size_t decodedSize = ZSTDv05_decompressContinue(zbc->zc,
+                        zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart,
+                        zbc->inBuff, neededInSize);
+                    if (ZSTDv05_isError(decodedSize)) return decodedSize;
+                    zbc->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; }   /* this was just a header */
+                    zbc->outEnd = zbc->outStart +  decodedSize;
+                    zbc->stage = ZBUFFv05ds_flush;
+                    // break; /* ZBUFFv05ds_flush follows */
+            }   }
+        case ZBUFFv05ds_flush:
+            {
+                size_t toFlushSize = zbc->outEnd - zbc->outStart;
+                size_t flushedSize = ZBUFFv05_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize);
+                op += flushedSize;
+                zbc->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbc->stage = ZBUFFv05ds_read;
+                    if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize)
+                        zbc->outStart = zbc->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    *srcSizePtr = ip-istart;
+    *maxDstSizePtr = op-ostart;
+
     {   size_t nextSrcSizeHint = ZSTDv05_nextSrcSizeToDecompress(zbc->zc);
-        if (nextSrcSizeHint > ZBUFFv05_blockHeaderSize) nextSrcSizeHint+= ZBUFFv05_blockHeaderSize;   /* get next block header too */ 
-        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/ 
-        return nextSrcSizeHint; 
-    } 
-} 
- 
- 
- 
-/* ************************************* 
-*  Tool functions 
-***************************************/ 
-unsigned ZBUFFv05_isError(size_t errorCode) { return ERR_isError(errorCode); } 
-const char* ZBUFFv05_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } 
- 
-size_t ZBUFFv05_recommendedDInSize(void)  { return BLOCKSIZE + ZBUFFv05_blockHeaderSize /* block header size*/ ; } 
-size_t ZBUFFv05_recommendedDOutSize(void) { return BLOCKSIZE; } 
+        if (nextSrcSizeHint > ZBUFFv05_blockHeaderSize) nextSrcSizeHint+= ZBUFFv05_blockHeaderSize;   /* get next block header too */
+        nextSrcSizeHint -= zbc->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFFv05_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFFv05_recommendedDInSize(void)  { return BLOCKSIZE + ZBUFFv05_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFFv05_recommendedDOutSize(void) { return BLOCKSIZE; }
diff --git a/contrib/libs/zstd06/legacy/zstd_v05.h b/contrib/libs/zstd06/legacy/zstd_v05.h
index 5f5354a1c9..a20e6c7068 100644
--- a/contrib/libs/zstd06/legacy/zstd_v05.h
+++ b/contrib/libs/zstd06/legacy/zstd_v05.h
@@ -1,94 +1,94 @@
 #include <contrib/libs/zstd06/renames.h>
-/* 
-    zstd_v05 - decoder for 0.5 format 
-    Header File 
-    Copyright (C) 2014-2016, Yann Collet. 
- 
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-    Redistribution and use in source and binary forms, with or without 
-    modification, are permitted provided that the following conditions are 
-    met: 
-    * Redistributions of source code must retain the above copyright 
-    notice, this list of conditions and the following disclaimer. 
-    * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following disclaimer 
-    in the documentation and/or other materials provided with the 
-    distribution. 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- 
-    You can contact the author at : 
-    - zstd source repository : https://github.com/Cyan4973/zstd 
-*/ 
-#ifndef ZSTDv05_H 
-#define ZSTDv05_H 
- 
-#if defined (__cplusplus) 
-extern "C" { 
-#endif 
- 
-/*-************************************* 
-*  Dependencies 
-***************************************/ 
-#include <stddef.h>   /* size_t */ 
+/*
+    zstd_v05 - decoder for 0.5 format
+    Header File
+    Copyright (C) 2014-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+*/
+#ifndef ZSTDv05_H
+#define ZSTDv05_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stddef.h>   /* size_t */
 #include "mem.h"      /* U64, U32 */
- 
- 
-/* ************************************* 
-*  Simple functions 
-***************************************/ 
-/*! ZSTDv05_decompress() : 
-    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. 
-    `dstCapacity` must be large enough, equal or larger than originalSize. 
-    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), 
-              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ 
-size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, 
-                     const void* src, size_t compressedSize); 
- 
- 
-/* ************************************* 
-*  Helper functions 
-***************************************/ 
-/* Error Management */ 
-unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */ 
-const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */ 
- 
- 
-/* ************************************* 
-*  Explicit memory management 
-***************************************/ 
-/** Decompression context */ 
-typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx; 
-ZSTDv05_DCtx* ZSTDv05_createDCtx(void); 
+
+
+/* *************************************
+*  Simple functions
+***************************************/
+/*! ZSTDv05_decompress() :
+    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
+    `dstCapacity` must be large enough, equal or larger than originalSize.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
+size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
+                     const void* src, size_t compressedSize);
+
+
+/* *************************************
+*  Helper functions
+***************************************/
+/* Error Management */
+unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
+
+
+/* *************************************
+*  Explicit memory management
+***************************************/
+/** Decompression context */
+typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
+ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
 size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
- 
-/** ZSTDv05_decompressDCtx() : 
-*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */ 
-size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 
- 
- 
-/*-*********************** 
+
+/** ZSTDv05_decompressDCtx() :
+*   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
+size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*-***********************
 *  Simple Dictionary API
-*************************/ 
-/*! ZSTDv05_decompress_usingDict() : 
-*   Decompression using a pre-defined Dictionary content (see dictBuilder). 
-*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. 
-*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */ 
-size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, 
+*************************/
+/*! ZSTDv05_decompress_usingDict() :
+*   Decompression using a pre-defined Dictionary content (see dictBuilder).
+*   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
+*   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
+size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
                                             void* dst, size_t dstCapacity,
                                       const void* src, size_t srcSize,
                                       const void* dict,size_t dictSize);
- 
+
 /*-************************
 *  Advanced Streaming API
 ***************************/
@@ -99,74 +99,74 @@ typedef struct {
     U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
 } ZSTDv05_parameters;
 size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
- 
+
 size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
 void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
 size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
 size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
- 
- 
+
+
 /*-***********************
 *  ZBUFF API
 *************************/
-typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx; 
-ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void); 
+typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
+ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
 size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
- 
-size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx); 
-size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize); 
- 
-size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx, 
-                                            void* dst, size_t* dstCapacityPtr, 
-                                      const void* src, size_t* srcSizePtr); 
- 
-/*-*************************************************************************** 
-*  Streaming decompression 
-* 
-*  A ZBUFFv05_DCtx object is required to track streaming operations. 
-*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. 
-*  Use ZBUFFv05_decompressInit() to start a new decompression operation, 
-*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary. 
-*  Note that ZBUFFv05_DCtx objects can be reused multiple times. 
-* 
-*  Use ZBUFFv05_decompressContinue() repetitively to consume your input. 
-*  *srcSizePtr and *dstCapacityPtr can be any size. 
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. 
-*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. 
-*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst. 
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency) 
-*            or 0 when a frame is completely decoded 
-*            or an error code, which can be tested using ZBUFFv05_isError(). 
-* 
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize() 
-*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. 
-*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . 
-* *******************************************************************************/ 
- 
- 
-/* ************************************* 
-*  Tool functions 
-***************************************/ 
-unsigned ZBUFFv05_isError(size_t errorCode); 
-const char* ZBUFFv05_getErrorName(size_t errorCode); 
- 
-/** Functions below provide recommended buffer sizes for Compression or Decompression operations. 
-*   These sizes are just hints, and tend to offer better latency */ 
-size_t ZBUFFv05_recommendedDInSize(void); 
-size_t ZBUFFv05_recommendedDOutSize(void); 
- 
- 
- 
-/*-************************************* 
-*  Constants 
-***************************************/ 
-#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */ 
- 
- 
- 
- 
-#if defined (__cplusplus) 
-} 
-#endif 
- 
-#endif  /* ZSTDv0505_H */ 
+
+size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
+size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
+
+size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
+
+/*-***************************************************************************
+*  Streaming decompression
+*
+*  A ZBUFFv05_DCtx object is required to track streaming operations.
+*  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
+*  Use ZBUFFv05_decompressInit() to start a new decompression operation,
+*   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFFv05_DCtx objects can be reused multiple times.
+*
+*  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
+*            or 0 when a frame is completely decoded
+*            or an error code, which can be tested using ZBUFFv05_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
+*  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFFv05_isError(size_t errorCode);
+const char* ZBUFFv05_getErrorName(size_t errorCode);
+
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
+*   These sizes are just hints, and tend to offer better latency */
+size_t ZBUFFv05_recommendedDInSize(void);
+size_t ZBUFFv05_recommendedDOutSize(void);
+
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* ZSTDv0505_H */
diff --git a/contrib/libs/zstd06/libzstd.pc.in b/contrib/libs/zstd06/libzstd.pc.in
index 63e2483f55..28afc3add5 100755
--- a/contrib/libs/zstd06/libzstd.pc.in
+++ b/contrib/libs/zstd06/libzstd.pc.in
@@ -1,14 +1,14 @@
-#   ZSTD - standard compression algorithm 
-#   Copyright (C) 2014-2015, Yann Collet. 
-#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 
- 
-prefix=@PREFIX@ 
-libdir=@LIBDIR@ 
-includedir=@INCLUDEDIR@ 
- 
-Name: zstd 
-Description: lossless compression algorithm library 
-URL: https://github.com/Cyan4973/zstd 
-Version: @VERSION@ 
-Libs: -L@LIBDIR@ -lzstd 
-Cflags: -I@INCLUDEDIR@ 
+#   ZSTD - standard compression algorithm
+#   Copyright (C) 2014-2015, Yann Collet.
+#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: zstd
+Description: lossless compression algorithm library
+URL: https://github.com/Cyan4973/zstd
+Version: @VERSION@
+Libs: -L@LIBDIR@ -lzstd
+Cflags: -I@INCLUDEDIR@
diff --git a/contrib/libs/zstd06/ya.make b/contrib/libs/zstd06/ya.make
index 0446eaf7ca..e35f69f443 100644
--- a/contrib/libs/zstd06/ya.make
+++ b/contrib/libs/zstd06/ya.make
@@ -1,5 +1,5 @@
-LIBRARY() 
- 
+LIBRARY()
+
 VERSION(0.6.2)
 
 LICENSE(
@@ -14,10 +14,10 @@ OWNER(
     g:contrib
     g:cpp-contrib
 )
- 
-NO_UTIL() 
- 
-SRCS( 
+
+NO_UTIL()
+
+SRCS(
     common/entropy_common.c
     common/fse_decompress.c
     common/xxhash.c
@@ -31,28 +31,28 @@ SRCS(
     decompress/zstd_decompress.c
     dictBuilder/divsufsort.c
     dictBuilder/zdict.c
-    legacy/zstd_v01.c 
-    legacy/zstd_v02.c 
-    legacy/zstd_v03.c 
-    legacy/zstd_v04.c 
+    legacy/zstd_v01.c
+    legacy/zstd_v02.c
+    legacy/zstd_v03.c
+    legacy/zstd_v04.c
     legacy/zstd_v05.c
     legacy/zstd_v07.c
     legacy/zstd_v08.c
-) 
- 
+)
+
 NO_COMPILER_WARNINGS()
 
 CFLAGS(
     -DZSTD_LEGACY_SUPPORT=1
 )
- 
-ADDINCL( 
-    contrib/libs/zstd06 
+
+ADDINCL(
+    contrib/libs/zstd06
     contrib/libs/zstd06/common
     contrib/libs/zstd06/compress
     contrib/libs/zstd06/decompress
     contrib/libs/zstd06/dictBuilder
-    contrib/libs/zstd06/legacy 
-) 
- 
-END() 
+    contrib/libs/zstd06/legacy
+)
+
+END()
diff --git a/contrib/python/Jinja2/py2/ya.make b/contrib/python/Jinja2/py2/ya.make
index c96a62b9ec..5f1bf64811 100644
--- a/contrib/python/Jinja2/py2/ya.make
+++ b/contrib/python/Jinja2/py2/ya.make
@@ -3,7 +3,7 @@
 PY2_LIBRARY()
 
 OWNER(floatdrop g:python-contrib)
- 
+
 VERSION(2.11.3)
 
 LICENSE(BSD-3-Clause)
diff --git a/contrib/python/Jinja2/py3/ya.make b/contrib/python/Jinja2/py3/ya.make
index ea50aea17f..3043e7fc3d 100644
--- a/contrib/python/Jinja2/py3/ya.make
+++ b/contrib/python/Jinja2/py3/ya.make
@@ -3,7 +3,7 @@
 PY3_LIBRARY()
 
 OWNER(floatdrop g:python-contrib)
- 
+
 VERSION(3.0.3)
 
 LICENSE(BSD-3-Clause)
diff --git a/contrib/python/Jinja2/ya.make b/contrib/python/Jinja2/ya.make
index f5bf0742d4..40f6572401 100644
--- a/contrib/python/Jinja2/ya.make
+++ b/contrib/python/Jinja2/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/MarkupSafe/py2/ya.make b/contrib/python/MarkupSafe/py2/ya.make
index 4106977a79..0e773ee33a 100644
--- a/contrib/python/MarkupSafe/py2/ya.make
+++ b/contrib/python/MarkupSafe/py2/ya.make
@@ -2,8 +2,8 @@ OWNER(g:python-contrib)
 
 PY2_LIBRARY()
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 VERSION(1.1.1)
 
 PY_SRCS(
diff --git a/contrib/python/MarkupSafe/py3/ya.make b/contrib/python/MarkupSafe/py3/ya.make
index 69f66f4764..8c750e15d8 100644
--- a/contrib/python/MarkupSafe/py3/ya.make
+++ b/contrib/python/MarkupSafe/py3/ya.make
@@ -3,7 +3,7 @@
 PY3_LIBRARY()
 
 OWNER(g:python-contrib)
- 
+
 VERSION(2.0.1)
 
 LICENSE(BSD-3-Clause)
diff --git a/contrib/python/MarkupSafe/ya.make b/contrib/python/MarkupSafe/ya.make
index 58d3eeac9e..764b5915ff 100644
--- a/contrib/python/MarkupSafe/ya.make
+++ b/contrib/python/MarkupSafe/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/PyHamcrest/tests/ya.make b/contrib/python/PyHamcrest/tests/ya.make
index 77ce1d7236..6519301793 100644
--- a/contrib/python/PyHamcrest/tests/ya.make
+++ b/contrib/python/PyHamcrest/tests/ya.make
@@ -3,7 +3,7 @@ OWNER(g:python-contrib)
 PY23_TEST()
 
 NO_LINT()
- 
+
 TEST_SRCS(
     test_raises.py
     test_string_description.py
diff --git a/contrib/python/PyHamcrest/ya.make b/contrib/python/PyHamcrest/ya.make
index 2f34c69df5..c24f0fbef8 100644
--- a/contrib/python/PyHamcrest/ya.make
+++ b/contrib/python/PyHamcrest/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
- 
-LICENSE(BSD-3-Clause) 
- 
+
+LICENSE(BSD-3-Clause)
+
 OWNER(g:python-contrib)
 
 VERSION(1.9.0)
@@ -10,71 +10,71 @@ PEERDIR (
     contrib/python/six
 )
 
-SRCDIR( 
-    contrib/python/PyHamcrest/src 
-) 
- 
-PY_SRCS( 
-    TOP_LEVEL 
- 
-    hamcrest/core/compat.py 
-    hamcrest/core/assert_that.py 
-    hamcrest/core/matcher.py 
-    hamcrest/core/base_matcher.py 
-    hamcrest/core/selfdescribingvalue.py 
-    hamcrest/core/string_description.py 
-    hamcrest/core/core/isnot.py 
-    hamcrest/core/core/allof.py 
-    hamcrest/core/core/issame.py 
-    hamcrest/core/core/anyof.py 
-    hamcrest/core/core/isanything.py 
-    hamcrest/core/core/is_.py 
-    hamcrest/core/core/described_as.py 
-    hamcrest/core/core/raises.py 
-    hamcrest/core/core/isequal.py 
-    hamcrest/core/core/isnone.py 
-    hamcrest/core/core/isinstanceof.py 
-    hamcrest/core/core/__init__.py 
-    hamcrest/core/description.py 
-    hamcrest/core/selfdescribing.py 
-    hamcrest/core/base_description.py 
-    hamcrest/core/helpers/hasmethod.py 
-    hamcrest/core/helpers/wrap_matcher.py 
-    hamcrest/core/helpers/__init__.py 
-    hamcrest/core/__init__.py 
-    hamcrest/library/integration/match_equality.py 
-    hamcrest/library/integration/__init__.py 
-    hamcrest/library/number/ordering_comparison.py 
-    hamcrest/library/number/iscloseto.py 
-    hamcrest/library/number/__init__.py 
-    hamcrest/library/text/substringmatcher.py 
-    hamcrest/library/text/stringcontainsinorder.py 
-    hamcrest/library/text/isequal_ignoring_case.py 
-    hamcrest/library/text/stringstartswith.py 
-    hamcrest/library/text/stringendswith.py 
-    hamcrest/library/text/isequal_ignoring_whitespace.py 
-    hamcrest/library/text/stringcontains.py 
-    hamcrest/library/text/stringmatches.py 
-    hamcrest/library/text/__init__.py 
-    hamcrest/library/object/hasstring.py 
-    hamcrest/library/object/hasproperty.py 
-    hamcrest/library/object/haslength.py 
-    hamcrest/library/object/__init__.py 
-    hamcrest/library/collection/isdict_containingkey.py 
-    hamcrest/library/collection/issequence_onlycontaining.py 
-    hamcrest/library/collection/issequence_containing.py 
-    hamcrest/library/collection/issequence_containinginorder.py 
-    hamcrest/library/collection/isdict_containing.py 
-    hamcrest/library/collection/issequence_containinginanyorder.py 
-    hamcrest/library/collection/isin.py 
-    hamcrest/library/collection/isdict_containingvalue.py 
-    hamcrest/library/collection/is_empty.py 
-    hamcrest/library/collection/isdict_containingentries.py 
-    hamcrest/library/collection/__init__.py 
-    hamcrest/library/__init__.py 
-    hamcrest/__init__.py 
-) 
- 
+SRCDIR(
+    contrib/python/PyHamcrest/src
+)
+
+PY_SRCS(
+    TOP_LEVEL
+
+    hamcrest/core/compat.py
+    hamcrest/core/assert_that.py
+    hamcrest/core/matcher.py
+    hamcrest/core/base_matcher.py
+    hamcrest/core/selfdescribingvalue.py
+    hamcrest/core/string_description.py
+    hamcrest/core/core/isnot.py
+    hamcrest/core/core/allof.py
+    hamcrest/core/core/issame.py
+    hamcrest/core/core/anyof.py
+    hamcrest/core/core/isanything.py
+    hamcrest/core/core/is_.py
+    hamcrest/core/core/described_as.py
+    hamcrest/core/core/raises.py
+    hamcrest/core/core/isequal.py
+    hamcrest/core/core/isnone.py
+    hamcrest/core/core/isinstanceof.py
+    hamcrest/core/core/__init__.py
+    hamcrest/core/description.py
+    hamcrest/core/selfdescribing.py
+    hamcrest/core/base_description.py
+    hamcrest/core/helpers/hasmethod.py
+    hamcrest/core/helpers/wrap_matcher.py
+    hamcrest/core/helpers/__init__.py
+    hamcrest/core/__init__.py
+    hamcrest/library/integration/match_equality.py
+    hamcrest/library/integration/__init__.py
+    hamcrest/library/number/ordering_comparison.py
+    hamcrest/library/number/iscloseto.py
+    hamcrest/library/number/__init__.py
+    hamcrest/library/text/substringmatcher.py
+    hamcrest/library/text/stringcontainsinorder.py
+    hamcrest/library/text/isequal_ignoring_case.py
+    hamcrest/library/text/stringstartswith.py
+    hamcrest/library/text/stringendswith.py
+    hamcrest/library/text/isequal_ignoring_whitespace.py
+    hamcrest/library/text/stringcontains.py
+    hamcrest/library/text/stringmatches.py
+    hamcrest/library/text/__init__.py
+    hamcrest/library/object/hasstring.py
+    hamcrest/library/object/hasproperty.py
+    hamcrest/library/object/haslength.py
+    hamcrest/library/object/__init__.py
+    hamcrest/library/collection/isdict_containingkey.py
+    hamcrest/library/collection/issequence_onlycontaining.py
+    hamcrest/library/collection/issequence_containing.py
+    hamcrest/library/collection/issequence_containinginorder.py
+    hamcrest/library/collection/isdict_containing.py
+    hamcrest/library/collection/issequence_containinginanyorder.py
+    hamcrest/library/collection/isin.py
+    hamcrest/library/collection/isdict_containingvalue.py
+    hamcrest/library/collection/is_empty.py
+    hamcrest/library/collection/isdict_containingentries.py
+    hamcrest/library/collection/__init__.py
+    hamcrest/library/__init__.py
+    hamcrest/__init__.py
+)
+
 NO_LINT()
 
 END()
diff --git a/contrib/python/PyYAML/py2/ya.make b/contrib/python/PyYAML/py2/ya.make
index ec27882b0a..1ec5c0c301 100644
--- a/contrib/python/PyYAML/py2/ya.make
+++ b/contrib/python/PyYAML/py2/ya.make
@@ -1,7 +1,7 @@
 PY2_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 VERSION(5.4.1)
 
 OWNER(g:python-contrib borman g:testenv)
diff --git a/contrib/python/PyYAML/py3/ya.make b/contrib/python/PyYAML/py3/ya.make
index 57792cc822..0401c04651 100644
--- a/contrib/python/PyYAML/py3/ya.make
+++ b/contrib/python/PyYAML/py3/ya.make
@@ -1,7 +1,7 @@
 PY3_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 VERSION(5.4.1)
 
 OWNER(g:python-contrib borman g:testenv)
diff --git a/contrib/python/PyYAML/ya.make b/contrib/python/PyYAML/ya.make
index df68e6b035..a604fce51f 100644
--- a/contrib/python/PyYAML/ya.make
+++ b/contrib/python/PyYAML/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/Pygments/py2/ya.make b/contrib/python/Pygments/py2/ya.make
index a71aca976a..978caf3d68 100644
--- a/contrib/python/Pygments/py2/ya.make
+++ b/contrib/python/Pygments/py2/ya.make
@@ -3,7 +3,7 @@
 PY2_LIBRARY()
 
 OWNER(blinkov g:python-contrib)
- 
+
 VERSION(2.5.2)
 
 LICENSE(BSD-3-Clause)
diff --git a/contrib/python/Pygments/py3/ya.make b/contrib/python/Pygments/py3/ya.make
index db361d8f6c..3fab931499 100644
--- a/contrib/python/Pygments/py3/ya.make
+++ b/contrib/python/Pygments/py3/ya.make
@@ -3,7 +3,7 @@
 PY3_LIBRARY()
 
 OWNER(blinkov g:python-contrib)
- 
+
 VERSION(2.11.2)
 
 LICENSE(BSD-3-Clause)
diff --git a/contrib/python/Pygments/ya.make b/contrib/python/Pygments/ya.make
index 48f83a0365..9aec5a65a8 100644
--- a/contrib/python/Pygments/ya.make
+++ b/contrib/python/Pygments/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/attrs/ya.make b/contrib/python/attrs/ya.make
index 064a63297d..15fb4c423b 100644
--- a/contrib/python/attrs/ya.make
+++ b/contrib/python/attrs/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(g:python-contrib)
 
 VERSION(21.2.0)
diff --git a/contrib/python/boto3/ya.make b/contrib/python/boto3/ya.make
index d0c560d325..bd12ae12b7 100644
--- a/contrib/python/boto3/ya.make
+++ b/contrib/python/boto3/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 OWNER(g:python-contrib)
- 
+
 VERSION(1.17.112)
 
 LICENSE(Apache-2.0)
diff --git a/contrib/python/botocore/ya.make b/contrib/python/botocore/ya.make
index 0a1b1ec133..dc02d4a419 100644
--- a/contrib/python/botocore/ya.make
+++ b/contrib/python/botocore/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 OWNER(g:python-contrib)
- 
+
 VERSION(1.20.112)
 
 LICENSE(Apache-2.0)
diff --git a/contrib/python/certifi/ya.make b/contrib/python/certifi/ya.make
index 1fb4471af7..d13fe73acc 100644
--- a/contrib/python/certifi/ya.make
+++ b/contrib/python/certifi/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(orivej g:python-contrib)
 
 PEERDIR(
diff --git a/contrib/python/cffi/gen/lib/ya.make b/contrib/python/cffi/gen/lib/ya.make
index cbd7ca9b74..e5891f26a4 100644
--- a/contrib/python/cffi/gen/lib/ya.make
+++ b/contrib/python/cffi/gen/lib/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(orivej)
 
 PEERDIR(
diff --git a/contrib/python/cffi/ya.make b/contrib/python/cffi/ya.make
index 0dadd08009..7bff9477f9 100644
--- a/contrib/python/cffi/ya.make
+++ b/contrib/python/cffi/ya.make
@@ -1,7 +1,7 @@
-PY23_LIBRARY() 
+PY23_LIBRARY()
+
+LICENSE(MIT)
 
-LICENSE(MIT) 
- 
 OWNER(g:python-contrib)
 
 VERSION(1.15.0)
diff --git a/contrib/python/cryptography/ya.make b/contrib/python/cryptography/ya.make
index 130816d194..d63bb62f6a 100644
--- a/contrib/python/cryptography/ya.make
+++ b/contrib/python/cryptography/ya.make
@@ -1,18 +1,18 @@
-PY23_LIBRARY() 
+PY23_LIBRARY()
+
+LICENSE(BSD-3-Clause)
 
-LICENSE(BSD-3-Clause) 
- 
 OWNER(g:python-contrib)
 
 VERSION(3.3.2)
 
-IF (PYTHON2) 
-    PEERDIR( 
-        contrib/python/enum34 
-        contrib/python/ipaddress 
-    ) 
-ENDIF() 
- 
+IF (PYTHON2)
+    PEERDIR(
+        contrib/python/enum34
+        contrib/python/ipaddress
+    )
+ENDIF()
+
 PEERDIR(
     contrib/libs/openssl
     contrib/python/cffi
diff --git a/contrib/python/dateutil/ya.make b/contrib/python/dateutil/ya.make
index 723f65dd7f..0c4102c505 100644
--- a/contrib/python/dateutil/ya.make
+++ b/contrib/python/dateutil/ya.make
@@ -3,15 +3,15 @@
 PY23_LIBRARY()
 
 OWNER(g:python-contrib)
- 
+
 VERSION(2.8.2)
 
 LICENSE(BSD-3-Clause)
 
-PEERDIR( 
-    contrib/python/six 
-) 
- 
+PEERDIR(
+    contrib/python/six
+)
+
 NO_LINT()
 
 NO_CHECK_IMPORTS(
diff --git a/contrib/python/decorator/ya.make b/contrib/python/decorator/ya.make
index a6f44d1b66..fbb7ebf626 100644
--- a/contrib/python/decorator/ya.make
+++ b/contrib/python/decorator/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 OWNER(g:python-contrib borman)
 
 VERSION(4.4.2)
diff --git a/contrib/python/future/ya.make b/contrib/python/future/ya.make
index 94955d6fbe..ba24f13341 100644
--- a/contrib/python/future/ya.make
+++ b/contrib/python/future/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(g:python-contrib)
 
 VERSION(0.18.2)
diff --git a/contrib/python/idna/ya.make b/contrib/python/idna/ya.make
index 12ca32eea3..211e0637be 100644
--- a/contrib/python/idna/ya.make
+++ b/contrib/python/idna/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(BSD-3-Clause)
- 
+
 OWNER(g:python-contrib)
 
 VERSION(2.10)
diff --git a/contrib/python/ipdb/ya.make b/contrib/python/ipdb/ya.make
index 5089c24a6c..c1c769c05c 100644
--- a/contrib/python/ipdb/ya.make
+++ b/contrib/python/ipdb/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(BSD-3-Clause) 
- 
+LICENSE(BSD-3-Clause)
+
 OWNER(orivej g:python-contrib)
 
 VERSION(0.13.9)
diff --git a/contrib/python/ipython/py2/ya.make b/contrib/python/ipython/py2/ya.make
index d0b7876b37..c356efb08c 100644
--- a/contrib/python/ipython/py2/ya.make
+++ b/contrib/python/ipython/py2/ya.make
@@ -1,6 +1,6 @@
 PY2_LIBRARY()
 
-LICENSE(BSD-3-Clause) 
+LICENSE(BSD-3-Clause)
 
 VERSION(5.9.0)
 
diff --git a/contrib/python/ipython/ya.make b/contrib/python/ipython/ya.make
index 6b94de1911..b1dd8c5e23 100644
--- a/contrib/python/ipython/ya.make
+++ b/contrib/python/ipython/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/jedi/ya.make b/contrib/python/jedi/ya.make
index 1088c42118..eff2fef2a7 100644
--- a/contrib/python/jedi/ya.make
+++ b/contrib/python/jedi/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY(jedi)
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(g:python-contrib borman)
 
 VERSION(0.13.3)
diff --git a/contrib/python/parso/py3/ya.make b/contrib/python/parso/py3/ya.make
index 761a9cbb51..aa36a61abd 100644
--- a/contrib/python/parso/py3/ya.make
+++ b/contrib/python/parso/py3/ya.make
@@ -6,8 +6,8 @@ OWNER(g:python-contrib)
 
 VERSION(0.8.3)
 
-LICENSE(PSF-2.0) 
- 
+LICENSE(PSF-2.0)
+
 NO_LINT()
 
 PY_SRCS(
diff --git a/contrib/python/parso/ya.make b/contrib/python/parso/ya.make
index 1b69b5c186..05de6d3bbd 100644
--- a/contrib/python/parso/ya.make
+++ b/contrib/python/parso/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/pexpect/ya.make b/contrib/python/pexpect/ya.make
index 395f9c3122..a5bb92fcac 100644
--- a/contrib/python/pexpect/ya.make
+++ b/contrib/python/pexpect/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(ISC) 
- 
+LICENSE(ISC)
+
 OWNER(g:python-contrib borman)
 
 VERSION(4.8.0)
diff --git a/contrib/python/pickleshare/ya.make b/contrib/python/pickleshare/ya.make
index 5635d513f5..e24c2cdad7 100644
--- a/contrib/python/pickleshare/ya.make
+++ b/contrib/python/pickleshare/ya.make
@@ -1,7 +1,7 @@
 # Generated by devtools/yamaker (pypi).
 
 PY23_LIBRARY()
- 
+
 OWNER(borman g:python-contrib)
 
 VERSION(0.7.5)
diff --git a/contrib/python/pluggy/ya.make b/contrib/python/pluggy/ya.make
index feb64d1301..43e4c7b2ab 100644
--- a/contrib/python/pluggy/ya.make
+++ b/contrib/python/pluggy/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/prompt-toolkit/ya.make b/contrib/python/prompt-toolkit/ya.make
index cffc876935..f1f936eb3f 100644
--- a/contrib/python/prompt-toolkit/ya.make
+++ b/contrib/python/prompt-toolkit/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/ptyprocess/ya.make b/contrib/python/ptyprocess/ya.make
index c1316eef3e..5e780214f2 100644
--- a/contrib/python/ptyprocess/ya.make
+++ b/contrib/python/ptyprocess/ya.make
@@ -3,7 +3,7 @@
 PY23_LIBRARY()
 
 OWNER(borman g:python-contrib)
- 
+
 VERSION(0.7.0)
 
 LICENSE(ISC)
diff --git a/contrib/python/py/ya.make b/contrib/python/py/ya.make
index bc999790ac..cc86cb7fa9 100644
--- a/contrib/python/py/ya.make
+++ b/contrib/python/py/ya.make
@@ -3,7 +3,7 @@
 PY23_LIBRARY()
 
 OWNER(g:python-contrib)
- 
+
 VERSION(1.11.0)
 
 LICENSE(MIT)
diff --git a/contrib/python/pycparser/ya.make b/contrib/python/pycparser/ya.make
index a70f19751c..1eb209b7b6 100644
--- a/contrib/python/pycparser/ya.make
+++ b/contrib/python/pycparser/ya.make
@@ -1,6 +1,6 @@
 # Generated by devtools/yamaker (pypi).
 
-PY23_LIBRARY() 
+PY23_LIBRARY()
 
 OWNER(g:python-contrib)
 
diff --git a/contrib/python/requests/ya.make b/contrib/python/requests/ya.make
index 0bac91fd53..f971752d75 100644
--- a/contrib/python/requests/ya.make
+++ b/contrib/python/requests/ya.make
@@ -8,7 +8,7 @@ VERSION(2.27.1)
 
 LICENSE(Apache-2.0)
 
-PEERDIR( 
+PEERDIR(
     contrib/python/certifi
     contrib/python/idna
     contrib/python/urllib3
diff --git a/contrib/python/s3transfer/py2/ya.make b/contrib/python/s3transfer/py2/ya.make
index b2138936ac..45eb28f88c 100644
--- a/contrib/python/s3transfer/py2/ya.make
+++ b/contrib/python/s3transfer/py2/ya.make
@@ -3,7 +3,7 @@
 PY2_LIBRARY()
 
 OWNER(gebetix g:python-contrib)
- 
+
 VERSION(0.4.2)
 
 LICENSE(Apache-2.0)
diff --git a/contrib/python/s3transfer/py3/ya.make b/contrib/python/s3transfer/py3/ya.make
index 7bd09318c6..964a630639 100644
--- a/contrib/python/s3transfer/py3/ya.make
+++ b/contrib/python/s3transfer/py3/ya.make
@@ -3,7 +3,7 @@
 PY3_LIBRARY()
 
 OWNER(gebetix g:python-contrib)
- 
+
 VERSION(0.5.1)
 
 LICENSE(Apache-2.0)
diff --git a/contrib/python/s3transfer/ya.make b/contrib/python/s3transfer/ya.make
index 6f08f5d756..bf4a234edb 100644
--- a/contrib/python/s3transfer/ya.make
+++ b/contrib/python/s3transfer/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/six/ya.make b/contrib/python/six/ya.make
index d14b9e0686..e0c7849214 100644
--- a/contrib/python/six/ya.make
+++ b/contrib/python/six/ya.make
@@ -2,8 +2,8 @@ OWNER(g:python-contrib)
 
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 VERSION(1.16.0)
 
 PY_SRCS(
diff --git a/contrib/python/toml/ya.make b/contrib/python/toml/ya.make
index 568e5b8b7d..104e501e8e 100644
--- a/contrib/python/toml/ya.make
+++ b/contrib/python/toml/ya.make
@@ -2,8 +2,8 @@ OWNER(g:python-contrib)
 
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 VERSION(0.10.2)
 
 PY_SRCS(
diff --git a/contrib/python/traitlets/ya.make b/contrib/python/traitlets/ya.make
index a73d2da052..3156aae8c5 100644
--- a/contrib/python/traitlets/ya.make
+++ b/contrib/python/traitlets/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
 LICENSE(Service-Py23-Proxy)
- 
+
 OWNER(g:python-contrib)
 
 IF (PYTHON2)
diff --git a/contrib/python/wcwidth/ya.make b/contrib/python/wcwidth/ya.make
index 9b79498123..f1aeefaa1c 100644
--- a/contrib/python/wcwidth/ya.make
+++ b/contrib/python/wcwidth/ya.make
@@ -1,7 +1,7 @@
 PY23_LIBRARY()
 
-LICENSE(MIT) 
- 
+LICENSE(MIT)
+
 OWNER(g:python-contrib blinkov)
 
 VERSION(0.2.5)
diff --git a/contrib/python/ya.make b/contrib/python/ya.make
index bfe388383a..d01ced9f3a 100644
--- a/contrib/python/ya.make
+++ b/contrib/python/ya.make
@@ -811,7 +811,7 @@ RECURSE(
     pydot
     pydub
     pyelftools
-    pyelftools/readelf 
+    pyelftools/readelf
     pyfakefs
     pyflakes
     pyfst
@@ -970,7 +970,7 @@ RECURSE(
     scales
     scancode-toolkit
     scandir
-    schedule 
+    schedule
     schema
     schematics
     schwifty
diff --git a/contrib/restricted/libffi/include/ffi_common.h b/contrib/restricted/libffi/include/ffi_common.h
index c403bade2d..76b9dd6faf 100644
--- a/contrib/restricted/libffi/include/ffi_common.h
+++ b/contrib/restricted/libffi/include/ffi_common.h
@@ -1,24 +1,24 @@
-/* ----------------------------------------------------------------------- 
+/* -----------------------------------------------------------------------
    ffi_common.h - Copyright (C) 2011, 2012, 2013  Anthony Green
-                  Copyright (C) 2007  Free Software Foundation, Inc 
-                  Copyright (c) 1996  Red Hat, Inc. 
+                  Copyright (C) 2007  Free Software Foundation, Inc
+                  Copyright (c) 1996  Red Hat, Inc.
                   
-   Common internal definitions and macros. Only necessary for building 
-   libffi. 
-   ----------------------------------------------------------------------- */ 
- 
-#ifndef FFI_COMMON_H 
-#define FFI_COMMON_H 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif 
- 
-#include <fficonfig.h> 
- 
-/* Do not move this. Some versions of AIX are very picky about where 
-   this is positioned. */ 
-#ifdef __GNUC__ 
+   Common internal definitions and macros. Only necessary for building
+   libffi.
+   ----------------------------------------------------------------------- */
+
+#ifndef FFI_COMMON_H
+#define FFI_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <fficonfig.h>
+
+/* Do not move this. Some versions of AIX are very picky about where
+   this is positioned. */
+#ifdef __GNUC__
 # if HAVE_ALLOCA_H
 #  include <alloca.h>
 # else
@@ -27,63 +27,63 @@ extern "C" {
 #    define alloca __builtin_alloca
 #  endif
 # endif
-# define MAYBE_UNUSED __attribute__((__unused__)) 
-#else 
-# define MAYBE_UNUSED 
-# if HAVE_ALLOCA_H 
-#  include <alloca.h> 
-# else 
-#  ifdef _AIX 
+# define MAYBE_UNUSED __attribute__((__unused__))
+#else
+# define MAYBE_UNUSED
+# if HAVE_ALLOCA_H
+#  include <alloca.h>
+# else
+#  ifdef _AIX
 #   pragma alloca
-#  else 
-#   ifndef alloca /* predefined by HP cc +Olibcalls */ 
-#    ifdef _MSC_VER 
-#     define alloca _alloca 
-#    else 
-char *alloca (); 
-#   endif 
-#  endif 
-# endif 
+#  else
+#   ifndef alloca /* predefined by HP cc +Olibcalls */
+#    ifdef _MSC_VER
+#     define alloca _alloca
+#    else
+char *alloca ();
+#   endif
+#  endif
 # endif
-#endif 
- 
-/* Check for the existence of memcpy. */ 
-#if STDC_HEADERS 
-# include <string.h> 
-#else 
-# ifndef HAVE_MEMCPY 
-#  define memcpy(d, s, n) bcopy ((s), (d), (n)) 
-# endif 
-#endif 
- 
-#if defined(FFI_DEBUG) 
-#include <stdio.h> 
-#endif 
- 
-#ifdef FFI_DEBUG 
-void ffi_assert(char *expr, char *file, int line); 
-void ffi_stop_here(void); 
-void ffi_type_test(ffi_type *a, char *file, int line); 
- 
-#define FFI_ASSERT(x) ((x) ? (void)0 : ffi_assert(#x, __FILE__,__LINE__)) 
-#define FFI_ASSERT_AT(x, f, l) ((x) ? 0 : ffi_assert(#x, (f), (l))) 
-#define FFI_ASSERT_VALID_TYPE(x) ffi_type_test (x, __FILE__, __LINE__) 
-#else 
-#define FFI_ASSERT(x) 
-#define FFI_ASSERT_AT(x, f, l) 
-#define FFI_ASSERT_VALID_TYPE(x) 
-#endif 
- 
+# endif
+#endif
+
+/* Check for the existence of memcpy. */
+#if STDC_HEADERS
+# include <string.h>
+#else
+# ifndef HAVE_MEMCPY
+#  define memcpy(d, s, n) bcopy ((s), (d), (n))
+# endif
+#endif
+
+#if defined(FFI_DEBUG)
+#include <stdio.h>
+#endif
+
+#ifdef FFI_DEBUG
+void ffi_assert(char *expr, char *file, int line);
+void ffi_stop_here(void);
+void ffi_type_test(ffi_type *a, char *file, int line);
+
+#define FFI_ASSERT(x) ((x) ? (void)0 : ffi_assert(#x, __FILE__,__LINE__))
+#define FFI_ASSERT_AT(x, f, l) ((x) ? 0 : ffi_assert(#x, (f), (l)))
+#define FFI_ASSERT_VALID_TYPE(x) ffi_type_test (x, __FILE__, __LINE__)
+#else
+#define FFI_ASSERT(x)
+#define FFI_ASSERT_AT(x, f, l)
+#define FFI_ASSERT_VALID_TYPE(x)
+#endif
+
 /* v cast to size_t and aligned up to a multiple of a */
 #define FFI_ALIGN(v, a)  (((((size_t) (v))-1) | ((a)-1))+1)
 /* v cast to size_t and aligned down to a multiple of a */
 #define FFI_ALIGN_DOWN(v, a) (((size_t) (v)) & -a)
- 
-/* Perform machine dependent cif processing */ 
-ffi_status ffi_prep_cif_machdep(ffi_cif *cif); 
-ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, 
-	 unsigned int nfixedargs, unsigned int ntotalargs); 
- 
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif);
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+	 unsigned int nfixedargs, unsigned int ntotalargs);
+
 
 #if HAVE_LONG_DOUBLE_VARIANT
 /* Used to adjust size/alignment of ffi types.  */
@@ -103,51 +103,51 @@ ffi_status ffi_prep_cif_core(ffi_cif *cif,
    some targets.  */
 void *ffi_data_to_code_pointer (void *data) FFI_HIDDEN;
 
-/* Extended cif, used in callback from assembly routine */ 
-typedef struct 
-{ 
-  ffi_cif *cif; 
-  void *rvalue; 
-  void **avalue; 
-} extended_cif; 
- 
-/* Terse sized type definitions.  */ 
-#if defined(_MSC_VER) || defined(__sgi) || defined(__SUNPRO_C) 
-typedef unsigned char UINT8; 
-typedef signed char   SINT8; 
-typedef unsigned short UINT16; 
-typedef signed short   SINT16; 
-typedef unsigned int UINT32; 
-typedef signed int   SINT32; 
-# ifdef _MSC_VER 
-typedef unsigned __int64 UINT64; 
-typedef signed __int64   SINT64; 
-# else 
-# include <inttypes.h> 
-typedef uint64_t UINT64; 
-typedef int64_t  SINT64; 
-# endif 
-#else 
-typedef unsigned int UINT8  __attribute__((__mode__(__QI__))); 
-typedef signed int   SINT8  __attribute__((__mode__(__QI__))); 
-typedef unsigned int UINT16 __attribute__((__mode__(__HI__))); 
-typedef signed int   SINT16 __attribute__((__mode__(__HI__))); 
-typedef unsigned int UINT32 __attribute__((__mode__(__SI__))); 
-typedef signed int   SINT32 __attribute__((__mode__(__SI__))); 
-typedef unsigned int UINT64 __attribute__((__mode__(__DI__))); 
-typedef signed int   SINT64 __attribute__((__mode__(__DI__))); 
-#endif 
- 
-typedef float FLOAT32; 
- 
-#ifndef __GNUC__ 
-#define __builtin_expect(x, expected_value) (x) 
-#endif 
-#define LIKELY(x)    __builtin_expect(!!(x),1) 
-#define UNLIKELY(x)  __builtin_expect((x)!=0,0) 
- 
-#ifdef __cplusplus 
-} 
-#endif 
- 
-#endif 
+/* Extended cif, used in callback from assembly routine */
+typedef struct
+{
+  ffi_cif *cif;
+  void *rvalue;
+  void **avalue;
+} extended_cif;
+
+/* Terse sized type definitions.  */
+#if defined(_MSC_VER) || defined(__sgi) || defined(__SUNPRO_C)
+typedef unsigned char UINT8;
+typedef signed char   SINT8;
+typedef unsigned short UINT16;
+typedef signed short   SINT16;
+typedef unsigned int UINT32;
+typedef signed int   SINT32;
+# ifdef _MSC_VER
+typedef unsigned __int64 UINT64;
+typedef signed __int64   SINT64;
+# else
+# include <inttypes.h>
+typedef uint64_t UINT64;
+typedef int64_t  SINT64;
+# endif
+#else
+typedef unsigned int UINT8  __attribute__((__mode__(__QI__)));
+typedef signed int   SINT8  __attribute__((__mode__(__QI__)));
+typedef unsigned int UINT16 __attribute__((__mode__(__HI__)));
+typedef signed int   SINT16 __attribute__((__mode__(__HI__)));
+typedef unsigned int UINT32 __attribute__((__mode__(__SI__)));
+typedef signed int   SINT32 __attribute__((__mode__(__SI__)));
+typedef unsigned int UINT64 __attribute__((__mode__(__DI__)));
+typedef signed int   SINT64 __attribute__((__mode__(__DI__)));
+#endif
+
+typedef float FLOAT32;
+
+#ifndef __GNUC__
+#define __builtin_expect(x, expected_value) (x)
+#endif
+#define LIKELY(x)    __builtin_expect(!!(x),1)
+#define UNLIKELY(x)  __builtin_expect((x)!=0,0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/restricted/libffi/src/closures.c b/contrib/restricted/libffi/src/closures.c
index 5819fdb769..5120021652 100644
--- a/contrib/restricted/libffi/src/closures.c
+++ b/contrib/restricted/libffi/src/closures.c
@@ -1,40 +1,40 @@
-/* ----------------------------------------------------------------------- 
+/* -----------------------------------------------------------------------
    closures.c - Copyright (c) 2019 Anthony Green
                 Copyright (c) 2007, 2009, 2010 Red Hat, Inc.
-                Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc 
-                Copyright (c) 2011 Plausible Labs Cooperative, Inc. 
- 
-   Code to allocate and deallocate memory for closures. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#if defined __linux__ && !defined _GNU_SOURCE 
-#define _GNU_SOURCE 1 
-#endif 
- 
+                Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
+                Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+
+   Code to allocate and deallocate memory for closures.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined __linux__ && !defined _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
 #include <fficonfig.h>
-#include <ffi.h> 
-#include <ffi_common.h> 
- 
+#include <ffi.h>
+#include <ffi_common.h>
+
 #ifdef __NetBSD__
 #include <sys/param.h>
 #endif
@@ -111,39 +111,39 @@ ffi_closure_free (void *ptr)
 }
 #else /* !NetBSD with PROT_MPROTECT */
 
-#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE 
+#if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
 # if __linux__ && !defined(__ANDROID__)
-/* This macro indicates it may be forbidden to map anonymous memory 
-   with both write and execute permission.  Code compiled when this 
-   option is defined will attempt to map such pages once, but if it 
-   fails, it falls back to creating a temporary file in a writable and 
-   executable filesystem and mapping pages from it into separate 
-   locations in the virtual memory space, one location writable and 
-   another executable.  */ 
-#  define FFI_MMAP_EXEC_WRIT 1 
-#  define HAVE_MNTENT 1 
-# endif 
+/* This macro indicates it may be forbidden to map anonymous memory
+   with both write and execute permission.  Code compiled when this
+   option is defined will attempt to map such pages once, but if it
+   fails, it falls back to creating a temporary file in a writable and
+   executable filesystem and mapping pages from it into separate
+   locations in the virtual memory space, one location writable and
+   another executable.  */
+#  define FFI_MMAP_EXEC_WRIT 1
+#  define HAVE_MNTENT 1
+# endif
 # if defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)
-/* Windows systems may have Data Execution Protection (DEP) enabled,  
-   which requires the use of VirtualMalloc/VirtualFree to alloc/free 
-   executable memory. */ 
-#  define FFI_MMAP_EXEC_WRIT 1 
-# endif 
-#endif 
- 
-#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX 
+/* Windows systems may have Data Execution Protection (DEP) enabled, 
+   which requires the use of VirtualMalloc/VirtualFree to alloc/free
+   executable memory. */
+#  define FFI_MMAP_EXEC_WRIT 1
+# endif
+#endif
+
+#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
 # if defined(__linux__) && !defined(__ANDROID__)
-/* When defined to 1 check for SELinux and if SELinux is active, 
-   don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that 
-   might cause audit messages.  */ 
-#  define FFI_MMAP_EXEC_SELINUX 1 
-# endif 
-#endif 
- 
-#if FFI_CLOSURES 
- 
+/* When defined to 1 check for SELinux and if SELinux is active,
+   don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
+   might cause audit messages.  */
+#  define FFI_MMAP_EXEC_SELINUX 1
+# endif
+#endif
+
+#if FFI_CLOSURES
+
 #if FFI_EXEC_TRAMPOLINE_TABLE
- 
+
 #ifdef __MACH__
 
 #include <mach/mach.h>
@@ -349,120 +349,120 @@ ffi_closure_free (void *ptr)
 
 #endif
 
-// Per-target implementation; It's unclear what can reasonable be shared between two OS/architecture implementations. 
- 
+// Per-target implementation; It's unclear what can reasonable be shared between two OS/architecture implementations.
+
 #elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
- 
-#define USE_LOCKS 1 
-#define USE_DL_PREFIX 1 
-#ifdef __GNUC__ 
-#ifndef USE_BUILTIN_FFS 
-#define USE_BUILTIN_FFS 1 
-#endif 
-#endif 
- 
-/* We need to use mmap, not sbrk.  */ 
-#define HAVE_MORECORE 0 
- 
-/* We could, in theory, support mremap, but it wouldn't buy us anything.  */ 
-#define HAVE_MREMAP 0 
- 
-/* We have no use for this, so save some code and data.  */ 
-#define NO_MALLINFO 1 
- 
-/* We need all allocations to be in regular segments, otherwise we 
-   lose track of the corresponding code address.  */ 
-#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T 
- 
-/* Don't allocate more than a page unless needed.  */ 
-#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize) 
- 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <fcntl.h> 
-#include <errno.h> 
-#ifndef _MSC_VER 
-#include <unistd.h> 
-#endif 
-#include <string.h> 
-#include <stdio.h> 
+
+#define USE_LOCKS 1
+#define USE_DL_PREFIX 1
+#ifdef __GNUC__
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 1
+#endif
+#endif
+
+/* We need to use mmap, not sbrk.  */
+#define HAVE_MORECORE 0
+
+/* We could, in theory, support mremap, but it wouldn't buy us anything.  */
+#define HAVE_MREMAP 0
+
+/* We have no use for this, so save some code and data.  */
+#define NO_MALLINFO 1
+
+/* We need all allocations to be in regular segments, otherwise we
+   lose track of the corresponding code address.  */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+
+/* Don't allocate more than a page unless needed.  */
+#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize)
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <stdio.h>
 #if !defined(X86_WIN32) && !defined(X86_WIN64) && !defined(_M_ARM64)
-#ifdef HAVE_MNTENT 
-#include <mntent.h> 
-#endif /* HAVE_MNTENT */ 
-#include <sys/param.h> 
-#include <pthread.h> 
- 
-/* We don't want sys/mman.h to be included after we redefine mmap and 
-   dlmunmap.  */ 
-#include <sys/mman.h> 
-#define LACKS_SYS_MMAN_H 1 
- 
-#if FFI_MMAP_EXEC_SELINUX 
-#include <sys/statfs.h> 
-#include <stdlib.h> 
- 
-static int selinux_enabled = -1; 
- 
-static int 
-selinux_enabled_check (void) 
-{ 
-  struct statfs sfs; 
-  FILE *f; 
-  char *buf = NULL; 
-  size_t len = 0; 
- 
-  if (statfs ("/selinux", &sfs) >= 0 
-      && (unsigned int) sfs.f_type == 0xf97cff8cU) 
-    return 1; 
-  f = fopen ("/proc/mounts", "r"); 
-  if (f == NULL) 
-    return 0; 
-  while (getline (&buf, &len, f) >= 0) 
-    { 
-      char *p = strchr (buf, ' '); 
-      if (p == NULL) 
-        break; 
-      p = strchr (p + 1, ' '); 
-      if (p == NULL) 
-        break; 
-      if (strncmp (p + 1, "selinuxfs ", 10) == 0) 
-        { 
-          free (buf); 
-          fclose (f); 
-          return 1; 
-        } 
-    } 
-  free (buf); 
-  fclose (f); 
-  return 0; 
-} 
- 
-#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \ 
-			      : (selinux_enabled = selinux_enabled_check ())) 
- 
-#else 
- 
-#define is_selinux_enabled() 0 
- 
-#endif /* !FFI_MMAP_EXEC_SELINUX */ 
- 
-/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */ 
-#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX 
-#include <stdlib.h> 
- 
-static int emutramp_enabled = -1; 
- 
-static int 
-emutramp_enabled_check (void) 
-{ 
+#ifdef HAVE_MNTENT
+#include <mntent.h>
+#endif /* HAVE_MNTENT */
+#include <sys/param.h>
+#include <pthread.h>
+
+/* We don't want sys/mman.h to be included after we redefine mmap and
+   dlmunmap.  */
+#include <sys/mman.h>
+#define LACKS_SYS_MMAN_H 1
+
+#if FFI_MMAP_EXEC_SELINUX
+#include <sys/statfs.h>
+#include <stdlib.h>
+
+static int selinux_enabled = -1;
+
+static int
+selinux_enabled_check (void)
+{
+  struct statfs sfs;
+  FILE *f;
+  char *buf = NULL;
+  size_t len = 0;
+
+  if (statfs ("/selinux", &sfs) >= 0
+      && (unsigned int) sfs.f_type == 0xf97cff8cU)
+    return 1;
+  f = fopen ("/proc/mounts", "r");
+  if (f == NULL)
+    return 0;
+  while (getline (&buf, &len, f) >= 0)
+    {
+      char *p = strchr (buf, ' ');
+      if (p == NULL)
+        break;
+      p = strchr (p + 1, ' ');
+      if (p == NULL)
+        break;
+      if (strncmp (p + 1, "selinuxfs ", 10) == 0)
+        {
+          free (buf);
+          fclose (f);
+          return 1;
+        }
+    }
+  free (buf);
+  fclose (f);
+  return 0;
+}
+
+#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \
+			      : (selinux_enabled = selinux_enabled_check ()))
+
+#else
+
+#define is_selinux_enabled() 0
+
+#endif /* !FFI_MMAP_EXEC_SELINUX */
+
+/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */
+#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#include <stdlib.h>
+
+static int emutramp_enabled = -1;
+
+static int
+emutramp_enabled_check (void)
+{
   char *buf = NULL;
   size_t len = 0;
   FILE *f;
   int ret;
   f = fopen ("/proc/self/status", "r");
   if (f == NULL)
-    return 0; 
+    return 0;
   ret = 0;
 
   while (getline (&buf, &len, f) != -1)
@@ -476,97 +476,97 @@ emutramp_enabled_check (void)
   free (buf);
   fclose (f);
   return ret;
-} 
- 
-#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \ 
-                               : (emutramp_enabled = emutramp_enabled_check ())) 
-#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */ 
- 
-#elif defined (__CYGWIN__) || defined(__INTERIX) 
- 
-#include <sys/mman.h> 
- 
-/* Cygwin is Linux-like, but not quite that Linux-like.  */ 
-#define is_selinux_enabled() 0 
- 
-#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */ 
- 
-#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX 
-#define is_emutramp_enabled() 0 
-#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */ 
- 
-/* Declare all functions defined in dlmalloc.c as static.  */ 
-static void *dlmalloc(size_t); 
-static void dlfree(void*); 
-static void *dlcalloc(size_t, size_t) MAYBE_UNUSED; 
-static void *dlrealloc(void *, size_t) MAYBE_UNUSED; 
-static void *dlmemalign(size_t, size_t) MAYBE_UNUSED; 
-static void *dlvalloc(size_t) MAYBE_UNUSED; 
-static int dlmallopt(int, int) MAYBE_UNUSED; 
-static size_t dlmalloc_footprint(void) MAYBE_UNUSED; 
-static size_t dlmalloc_max_footprint(void) MAYBE_UNUSED; 
-static void** dlindependent_calloc(size_t, size_t, void**) MAYBE_UNUSED; 
-static void** dlindependent_comalloc(size_t, size_t*, void**) MAYBE_UNUSED; 
-static void *dlpvalloc(size_t) MAYBE_UNUSED; 
-static int dlmalloc_trim(size_t) MAYBE_UNUSED; 
-static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED; 
-static void dlmalloc_stats(void) MAYBE_UNUSED; 
- 
+}
+
+#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
+                               : (emutramp_enabled = emutramp_enabled_check ()))
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+#elif defined (__CYGWIN__) || defined(__INTERIX)
+
+#include <sys/mman.h>
+
+/* Cygwin is Linux-like, but not quite that Linux-like.  */
+#define is_selinux_enabled() 0
+
+#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+
+#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#define is_emutramp_enabled() 0
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+/* Declare all functions defined in dlmalloc.c as static.  */
+static void *dlmalloc(size_t);
+static void dlfree(void*);
+static void *dlcalloc(size_t, size_t) MAYBE_UNUSED;
+static void *dlrealloc(void *, size_t) MAYBE_UNUSED;
+static void *dlmemalign(size_t, size_t) MAYBE_UNUSED;
+static void *dlvalloc(size_t) MAYBE_UNUSED;
+static int dlmallopt(int, int) MAYBE_UNUSED;
+static size_t dlmalloc_footprint(void) MAYBE_UNUSED;
+static size_t dlmalloc_max_footprint(void) MAYBE_UNUSED;
+static void** dlindependent_calloc(size_t, size_t, void**) MAYBE_UNUSED;
+static void** dlindependent_comalloc(size_t, size_t*, void**) MAYBE_UNUSED;
+static void *dlpvalloc(size_t) MAYBE_UNUSED;
+static int dlmalloc_trim(size_t) MAYBE_UNUSED;
+static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED;
+static void dlmalloc_stats(void) MAYBE_UNUSED;
+
 #if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
-/* Use these for mmap and munmap within dlmalloc.c.  */ 
-static void *dlmmap(void *, size_t, int, int, int, off_t); 
-static int dlmunmap(void *, size_t); 
-#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */ 
- 
-#define mmap dlmmap 
-#define munmap dlmunmap 
- 
-#include "dlmalloc.c" 
- 
-#undef mmap 
-#undef munmap 
- 
+/* Use these for mmap and munmap within dlmalloc.c.  */
+static void *dlmmap(void *, size_t, int, int, int, off_t);
+static int dlmunmap(void *, size_t);
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
+
+#define mmap dlmmap
+#define munmap dlmunmap
+
+#include "dlmalloc.c"
+
+#undef mmap
+#undef munmap
+
 #if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
- 
-/* A mutex used to synchronize access to *exec* variables in this file.  */ 
-static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER; 
- 
-/* A file descriptor of a temporary file from which we'll map 
-   executable pages.  */ 
-static int execfd = -1; 
- 
-/* The amount of space already allocated from the temporary file.  */ 
-static size_t execsize = 0; 
- 
-/* Open a temporary file name, and immediately unlink it.  */ 
-static int 
+
+/* A mutex used to synchronize access to *exec* variables in this file.  */
+static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* A file descriptor of a temporary file from which we'll map
+   executable pages.  */
+static int execfd = -1;
+
+/* The amount of space already allocated from the temporary file.  */
+static size_t execsize = 0;
+
+/* Open a temporary file name, and immediately unlink it.  */
+static int
 open_temp_exec_file_name (char *name, int flags)
-{ 
+{
   int fd;
- 
+
 #ifdef HAVE_MKOSTEMP
   fd = mkostemp (name, flags);
 #else
   fd = mkstemp (name);
 #endif
 
-  if (fd != -1) 
-    unlink (name); 
- 
-  return fd; 
-} 
- 
-/* Open a temporary file in the named directory.  */ 
-static int 
-open_temp_exec_file_dir (const char *dir) 
-{ 
-  static const char suffix[] = "/ffiXXXXXX"; 
+  if (fd != -1)
+    unlink (name);
+
+  return fd;
+}
+
+/* Open a temporary file in the named directory.  */
+static int
+open_temp_exec_file_dir (const char *dir)
+{
+  static const char suffix[] = "/ffiXXXXXX";
   int lendir, flags;
   char *tempname;
 #ifdef O_TMPFILE
   int fd;
 #endif
- 
+
 #ifdef O_CLOEXEC
   flags = O_CLOEXEC;
 #else
@@ -586,144 +586,144 @@ open_temp_exec_file_dir (const char *dir)
   lendir = (int) strlen (dir);
   tempname = __builtin_alloca (lendir + sizeof (suffix));
 
-  if (!tempname) 
-    return -1; 
- 
-  memcpy (tempname, dir, lendir); 
-  memcpy (tempname + lendir, suffix, sizeof (suffix)); 
- 
+  if (!tempname)
+    return -1;
+
+  memcpy (tempname, dir, lendir);
+  memcpy (tempname + lendir, suffix, sizeof (suffix));
+
   return open_temp_exec_file_name (tempname, flags);
-} 
- 
-/* Open a temporary file in the directory in the named environment 
-   variable.  */ 
-static int 
-open_temp_exec_file_env (const char *envvar) 
-{ 
-  const char *value = getenv (envvar); 
- 
-  if (!value) 
-    return -1; 
- 
-  return open_temp_exec_file_dir (value); 
-} 
- 
-#ifdef HAVE_MNTENT 
-/* Open a temporary file in an executable and writable mount point 
-   listed in the mounts file.  Subsequent calls with the same mounts 
-   keep searching for mount points in the same file.  Providing NULL 
-   as the mounts file closes the file.  */ 
-static int 
-open_temp_exec_file_mnt (const char *mounts) 
-{ 
-  static const char *last_mounts; 
-  static FILE *last_mntent; 
- 
-  if (mounts != last_mounts) 
-    { 
-      if (last_mntent) 
-	endmntent (last_mntent); 
- 
-      last_mounts = mounts; 
- 
-      if (mounts) 
-	last_mntent = setmntent (mounts, "r"); 
-      else 
-	last_mntent = NULL; 
-    } 
- 
-  if (!last_mntent) 
-    return -1; 
- 
-  for (;;) 
-    { 
-      int fd; 
-      struct mntent mnt; 
-      char buf[MAXPATHLEN * 3]; 
- 
-      if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL) 
-	return -1; 
- 
-      if (hasmntopt (&mnt, "ro") 
-	  || hasmntopt (&mnt, "noexec") 
-	  || access (mnt.mnt_dir, W_OK)) 
-	continue; 
- 
-      fd = open_temp_exec_file_dir (mnt.mnt_dir); 
- 
-      if (fd != -1) 
-	return fd; 
-    } 
-} 
-#endif /* HAVE_MNTENT */ 
- 
-/* Instructions to look for a location to hold a temporary file that 
-   can be mapped in for execution.  */ 
-static struct 
-{ 
-  int (*func)(const char *); 
-  const char *arg; 
-  int repeat; 
-} open_temp_exec_file_opts[] = { 
-  { open_temp_exec_file_env, "TMPDIR", 0 }, 
-  { open_temp_exec_file_dir, "/tmp", 0 }, 
-  { open_temp_exec_file_dir, "/var/tmp", 0 }, 
-  { open_temp_exec_file_dir, "/dev/shm", 0 }, 
-  { open_temp_exec_file_env, "HOME", 0 }, 
-#ifdef HAVE_MNTENT 
-  { open_temp_exec_file_mnt, "/etc/mtab", 1 }, 
-  { open_temp_exec_file_mnt, "/proc/mounts", 1 }, 
-#endif /* HAVE_MNTENT */ 
-}; 
- 
-/* Current index into open_temp_exec_file_opts.  */ 
-static int open_temp_exec_file_opts_idx = 0; 
- 
-/* Reset a current multi-call func, then advances to the next entry. 
-   If we're at the last, go back to the first and return nonzero, 
-   otherwise return zero.  */ 
-static int 
-open_temp_exec_file_opts_next (void) 
-{ 
-  if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat) 
-    open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL); 
- 
-  open_temp_exec_file_opts_idx++; 
-  if (open_temp_exec_file_opts_idx 
-      == (sizeof (open_temp_exec_file_opts) 
-	  / sizeof (*open_temp_exec_file_opts))) 
-    { 
-      open_temp_exec_file_opts_idx = 0; 
-      return 1; 
-    } 
- 
-  return 0; 
-} 
- 
-/* Return a file descriptor of a temporary zero-sized file in a 
+}
+
+/* Open a temporary file in the directory in the named environment
+   variable.  */
+static int
+open_temp_exec_file_env (const char *envvar)
+{
+  const char *value = getenv (envvar);
+
+  if (!value)
+    return -1;
+
+  return open_temp_exec_file_dir (value);
+}
+
+#ifdef HAVE_MNTENT
+/* Open a temporary file in an executable and writable mount point
+   listed in the mounts file.  Subsequent calls with the same mounts
+   keep searching for mount points in the same file.  Providing NULL
+   as the mounts file closes the file.  */
+static int
+open_temp_exec_file_mnt (const char *mounts)
+{
+  static const char *last_mounts;
+  static FILE *last_mntent;
+
+  if (mounts != last_mounts)
+    {
+      if (last_mntent)
+	endmntent (last_mntent);
+
+      last_mounts = mounts;
+
+      if (mounts)
+	last_mntent = setmntent (mounts, "r");
+      else
+	last_mntent = NULL;
+    }
+
+  if (!last_mntent)
+    return -1;
+
+  for (;;)
+    {
+      int fd;
+      struct mntent mnt;
+      char buf[MAXPATHLEN * 3];
+
+      if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL)
+	return -1;
+
+      if (hasmntopt (&mnt, "ro")
+	  || hasmntopt (&mnt, "noexec")
+	  || access (mnt.mnt_dir, W_OK))
+	continue;
+
+      fd = open_temp_exec_file_dir (mnt.mnt_dir);
+
+      if (fd != -1)
+	return fd;
+    }
+}
+#endif /* HAVE_MNTENT */
+
+/* Instructions to look for a location to hold a temporary file that
+   can be mapped in for execution.  */
+static struct
+{
+  int (*func)(const char *);
+  const char *arg;
+  int repeat;
+} open_temp_exec_file_opts[] = {
+  { open_temp_exec_file_env, "TMPDIR", 0 },
+  { open_temp_exec_file_dir, "/tmp", 0 },
+  { open_temp_exec_file_dir, "/var/tmp", 0 },
+  { open_temp_exec_file_dir, "/dev/shm", 0 },
+  { open_temp_exec_file_env, "HOME", 0 },
+#ifdef HAVE_MNTENT
+  { open_temp_exec_file_mnt, "/etc/mtab", 1 },
+  { open_temp_exec_file_mnt, "/proc/mounts", 1 },
+#endif /* HAVE_MNTENT */
+};
+
+/* Current index into open_temp_exec_file_opts.  */
+static int open_temp_exec_file_opts_idx = 0;
+
+/* Reset a current multi-call func, then advances to the next entry.
+   If we're at the last, go back to the first and return nonzero,
+   otherwise return zero.  */
+static int
+open_temp_exec_file_opts_next (void)
+{
+  if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL);
+
+  open_temp_exec_file_opts_idx++;
+  if (open_temp_exec_file_opts_idx
+      == (sizeof (open_temp_exec_file_opts)
+	  / sizeof (*open_temp_exec_file_opts)))
+    {
+      open_temp_exec_file_opts_idx = 0;
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return a file descriptor of a temporary zero-sized file in a
    writable and executable filesystem.  */
-static int 
-open_temp_exec_file (void) 
-{ 
-  int fd; 
- 
-  do 
-    { 
-      fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func 
-	(open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg); 
- 
-      if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat 
-	  || fd == -1) 
-	{ 
-	  if (open_temp_exec_file_opts_next ()) 
-	    break; 
-	} 
-    } 
-  while (fd == -1); 
- 
-  return fd; 
-} 
- 
+static int
+open_temp_exec_file (void)
+{
+  int fd;
+
+  do
+    {
+      fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func
+	(open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg);
+
+      if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat
+	  || fd == -1)
+	{
+	  if (open_temp_exec_file_opts_next ())
+	    break;
+	}
+    }
+  while (fd == -1);
+
+  return fd;
+}
+
 /* We need to allocate space in a file that will be backing a writable
    mapping.  Several problems exist with the usual approaches:
    - fallocate() is Linux-only
@@ -754,42 +754,42 @@ allocate_space (int fd, off_t offset, off_t len)
   return 0;
 }
 
-/* Map in a chunk of memory from the temporary exec file into separate 
-   locations in the virtual memory address space, one writable and one 
-   executable.  Returns the address of the writable portion, after 
-   storing an offset to the corresponding executable portion at the 
-   last word of the requested chunk.  */ 
-static void * 
-dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset) 
-{ 
-  void *ptr; 
- 
-  if (execfd == -1) 
-    { 
-      open_temp_exec_file_opts_idx = 0; 
-    retry_open: 
-      execfd = open_temp_exec_file (); 
-      if (execfd == -1) 
-	return MFAIL; 
-    } 
- 
-  offset = execsize; 
- 
+/* Map in a chunk of memory from the temporary exec file into separate
+   locations in the virtual memory address space, one writable and one
+   executable.  Returns the address of the writable portion, after
+   storing an offset to the corresponding executable portion at the
+   last word of the requested chunk.  */
+static void *
+dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
+{
+  void *ptr;
+
+  if (execfd == -1)
+    {
+      open_temp_exec_file_opts_idx = 0;
+    retry_open:
+      execfd = open_temp_exec_file ();
+      if (execfd == -1)
+	return MFAIL;
+    }
+
+  offset = execsize;
+
   if (allocate_space (execfd, offset, length))
-    return MFAIL; 
- 
-  flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS); 
-  flags |= MAP_SHARED; 
- 
-  ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC, 
-	      flags, execfd, offset); 
-  if (ptr == MFAIL) 
-    { 
-      if (!offset) 
-	{ 
-	  close (execfd); 
-	  goto retry_open; 
-	} 
+    return MFAIL;
+
+  flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+  flags |= MAP_SHARED;
+
+  ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC,
+	      flags, execfd, offset);
+  if (ptr == MFAIL)
+    {
+      if (!offset)
+	{
+	  close (execfd);
+	  goto retry_open;
+	}
       if (ftruncate (execfd, offset) != 0)
       {
         /* Fixme : Error logs can be added here. Returning an error for
@@ -797,142 +797,142 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
          * validating in the error case. */
       }
 
-      return MFAIL; 
-    } 
-  else if (!offset 
-	   && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat) 
-    open_temp_exec_file_opts_next (); 
- 
-  start = mmap (start, length, prot, flags, execfd, offset); 
- 
-  if (start == MFAIL) 
-    { 
-      munmap (ptr, length); 
+      return MFAIL;
+    }
+  else if (!offset
+	   && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat)
+    open_temp_exec_file_opts_next ();
+
+  start = mmap (start, length, prot, flags, execfd, offset);
+
+  if (start == MFAIL)
+    {
+      munmap (ptr, length);
       if (ftruncate (execfd, offset) != 0)
       {
         /* Fixme : Error logs can be added here. Returning an error for
          * ftruncte() will not add any advantage as it is being
          * validating in the error case. */
       }
-      return start; 
-    } 
- 
-  mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start; 
- 
-  execsize += length; 
- 
-  return start; 
-} 
- 
-/* Map in a writable and executable chunk of memory if possible. 
-   Failing that, fall back to dlmmap_locked.  */ 
-static void * 
-dlmmap (void *start, size_t length, int prot, 
-	int flags, int fd, off_t offset) 
-{ 
-  void *ptr; 
- 
-  assert (start == NULL && length % malloc_getpagesize == 0 
-	  && prot == (PROT_READ | PROT_WRITE) 
-	  && flags == (MAP_PRIVATE | MAP_ANONYMOUS) 
-	  && fd == -1 && offset == 0); 
- 
-  if (execfd == -1 && is_emutramp_enabled ()) 
-    { 
-      ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset); 
-      return ptr; 
-    } 
- 
-  if (execfd == -1 && !is_selinux_enabled ()) 
-    { 
-      ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset); 
- 
-      if (ptr != MFAIL || (errno != EPERM && errno != EACCES)) 
-	/* Cool, no need to mess with separate segments.  */ 
-	return ptr; 
- 
-      /* If MREMAP_DUP is ever introduced and implemented, try mmap 
-	 with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with 
-	 MREMAP_DUP and prot at this point.  */ 
-    } 
- 
-  if (execsize == 0 || execfd == -1) 
-    { 
-      pthread_mutex_lock (&open_temp_exec_file_mutex); 
-      ptr = dlmmap_locked (start, length, prot, flags, offset); 
-      pthread_mutex_unlock (&open_temp_exec_file_mutex); 
- 
-      return ptr; 
-    } 
- 
-  return dlmmap_locked (start, length, prot, flags, offset); 
-} 
- 
-/* Release memory at the given address, as well as the corresponding 
-   executable page if it's separate.  */ 
-static int 
-dlmunmap (void *start, size_t length) 
-{ 
-  /* We don't bother decreasing execsize or truncating the file, since 
-     we can't quite tell whether we're unmapping the end of the file. 
-     We don't expect frequent deallocation anyway.  If we did, we 
-     could locate pages in the file by writing to the pages being 
-     deallocated and checking that the file contents change. 
-     Yuck.  */ 
-  msegmentptr seg = segment_holding (gm, start); 
-  void *code; 
- 
-  if (seg && (code = add_segment_exec_offset (start, seg)) != start) 
-    { 
-      int ret = munmap (code, length); 
-      if (ret) 
-	return ret; 
-    } 
- 
-  return munmap (start, length); 
-} 
- 
-#if FFI_CLOSURE_FREE_CODE 
-/* Return segment holding given code address.  */ 
-static msegmentptr 
-segment_holding_code (mstate m, char* addr) 
-{ 
-  msegmentptr sp = &m->seg; 
-  for (;;) { 
-    if (addr >= add_segment_exec_offset (sp->base, sp) 
-	&& addr < add_segment_exec_offset (sp->base, sp) + sp->size) 
-      return sp; 
-    if ((sp = sp->next) == 0) 
-      return 0; 
-  } 
-} 
-#endif 
- 
+      return start;
+    }
+
+  mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start;
+
+  execsize += length;
+
+  return start;
+}
+
+/* Map in a writable and executable chunk of memory if possible.
+   Failing that, fall back to dlmmap_locked.  */
+static void *
+dlmmap (void *start, size_t length, int prot,
+	int flags, int fd, off_t offset)
+{
+  void *ptr;
+
+  assert (start == NULL && length % malloc_getpagesize == 0
+	  && prot == (PROT_READ | PROT_WRITE)
+	  && flags == (MAP_PRIVATE | MAP_ANONYMOUS)
+	  && fd == -1 && offset == 0);
+
+  if (execfd == -1 && is_emutramp_enabled ())
+    {
+      ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+      return ptr;
+    }
+
+  if (execfd == -1 && !is_selinux_enabled ())
+    {
+      ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset);
+
+      if (ptr != MFAIL || (errno != EPERM && errno != EACCES))
+	/* Cool, no need to mess with separate segments.  */
+	return ptr;
+
+      /* If MREMAP_DUP is ever introduced and implemented, try mmap
+	 with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with
+	 MREMAP_DUP and prot at this point.  */
+    }
+
+  if (execsize == 0 || execfd == -1)
+    {
+      pthread_mutex_lock (&open_temp_exec_file_mutex);
+      ptr = dlmmap_locked (start, length, prot, flags, offset);
+      pthread_mutex_unlock (&open_temp_exec_file_mutex);
+
+      return ptr;
+    }
+
+  return dlmmap_locked (start, length, prot, flags, offset);
+}
+
+/* Release memory at the given address, as well as the corresponding
+   executable page if it's separate.  */
+static int
+dlmunmap (void *start, size_t length)
+{
+  /* We don't bother decreasing execsize or truncating the file, since
+     we can't quite tell whether we're unmapping the end of the file.
+     We don't expect frequent deallocation anyway.  If we did, we
+     could locate pages in the file by writing to the pages being
+     deallocated and checking that the file contents change.
+     Yuck.  */
+  msegmentptr seg = segment_holding (gm, start);
+  void *code;
+
+  if (seg && (code = add_segment_exec_offset (start, seg)) != start)
+    {
+      int ret = munmap (code, length);
+      if (ret)
+	return ret;
+    }
+
+  return munmap (start, length);
+}
+
+#if FFI_CLOSURE_FREE_CODE
+/* Return segment holding given code address.  */
+static msegmentptr
+segment_holding_code (mstate m, char* addr)
+{
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= add_segment_exec_offset (sp->base, sp)
+	&& addr < add_segment_exec_offset (sp->base, sp) + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+#endif
+
 #endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
- 
-/* Allocate a chunk of memory with the given size.  Returns a pointer 
-   to the writable address, and sets *CODE to the executable 
-   corresponding virtual address.  */ 
-void * 
-ffi_closure_alloc (size_t size, void **code) 
-{ 
-  void *ptr; 
- 
-  if (!code) 
-    return NULL; 
- 
-  ptr = dlmalloc (size); 
- 
-  if (ptr) 
-    { 
-      msegmentptr seg = segment_holding (gm, ptr); 
- 
-      *code = add_segment_exec_offset (ptr, seg); 
-    } 
- 
-  return ptr; 
-} 
- 
+
+/* Allocate a chunk of memory with the given size.  Returns a pointer
+   to the writable address, and sets *CODE to the executable
+   corresponding virtual address.  */
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  void *ptr;
+
+  if (!code)
+    return NULL;
+
+  ptr = dlmalloc (size);
+
+  if (ptr)
+    {
+      msegmentptr seg = segment_holding (gm, ptr);
+
+      *code = add_segment_exec_offset (ptr, seg);
+    }
+
+  return ptr;
+}
+
 void *
 ffi_data_to_code_pointer (void *data)
 {
@@ -947,52 +947,52 @@ ffi_data_to_code_pointer (void *data)
     return data;
 }
 
-/* Release a chunk of memory allocated with ffi_closure_alloc.  If 
-   FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the 
-   writable or the executable address given.  Otherwise, only the 
-   writable address can be provided here.  */ 
-void 
-ffi_closure_free (void *ptr) 
-{ 
-#if FFI_CLOSURE_FREE_CODE 
-  msegmentptr seg = segment_holding_code (gm, ptr); 
- 
-  if (seg) 
-    ptr = sub_segment_exec_offset (ptr, seg); 
-#endif 
- 
-  dlfree (ptr); 
-} 
- 
-# else /* ! FFI_MMAP_EXEC_WRIT */ 
- 
-/* On many systems, memory returned by malloc is writable and 
-   executable, so just use it.  */ 
- 
-#include <stdlib.h> 
- 
-void * 
-ffi_closure_alloc (size_t size, void **code) 
-{ 
-  if (!code) 
-    return NULL; 
- 
-  return *code = malloc (size); 
-} 
- 
-void 
-ffi_closure_free (void *ptr) 
-{ 
-  free (ptr); 
-} 
- 
+/* Release a chunk of memory allocated with ffi_closure_alloc.  If
+   FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
+   writable or the executable address given.  Otherwise, only the
+   writable address can be provided here.  */
+void
+ffi_closure_free (void *ptr)
+{
+#if FFI_CLOSURE_FREE_CODE
+  msegmentptr seg = segment_holding_code (gm, ptr);
+
+  if (seg)
+    ptr = sub_segment_exec_offset (ptr, seg);
+#endif
+
+  dlfree (ptr);
+}
+
+# else /* ! FFI_MMAP_EXEC_WRIT */
+
+/* On many systems, memory returned by malloc is writable and
+   executable, so just use it.  */
+
+#include <stdlib.h>
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  if (!code)
+    return NULL;
+
+  return *code = malloc (size);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  free (ptr);
+}
+
 void *
 ffi_data_to_code_pointer (void *data)
 {
   return data;
 }
 
-# endif /* ! FFI_MMAP_EXEC_WRIT */ 
-#endif /* FFI_CLOSURES */ 
+# endif /* ! FFI_MMAP_EXEC_WRIT */
+#endif /* FFI_CLOSURES */
 
 #endif /* NetBSD with PROT_MPROTECT */
diff --git a/contrib/restricted/libffi/src/dlmalloc.c b/contrib/restricted/libffi/src/dlmalloc.c
index 6f47b7cece..ec85fcec2a 100644
--- a/contrib/restricted/libffi/src/dlmalloc.c
+++ b/contrib/restricted/libffi/src/dlmalloc.c
@@ -1,5166 +1,5166 @@
-/* 
-  This is a version (aka dlmalloc) of malloc/free/realloc written by 
-  Doug Lea and released to the public domain, as explained at 
-  http://creativecommons.org/licenses/publicdomain.  Send questions, 
-  comments, complaints, performance data, etc to dl@cs.oswego.edu 
- 
-* Version 2.8.3 Thu Sep 22 11:16:15 2005  Doug Lea  (dl at gee) 
- 
-   Note: There may be an updated version of this malloc obtainable at 
-           ftp://gee.cs.oswego.edu/pub/misc/malloc.c 
-         Check before installing! 
- 
-* Quickstart 
- 
-  This library is all in one file to simplify the most common usage: 
-  ftp it, compile it (-O3), and link it into another program. All of 
-  the compile-time options default to reasonable values for use on 
-  most platforms.  You might later want to step through various 
-  compile-time and dynamic tuning options. 
- 
-  For convenience, an include file for code using this malloc is at: 
-     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.3.h 
-  You don't really need this .h file unless you call functions not 
-  defined in your system include files.  The .h file contains only the 
-  excerpts from this file needed for using this malloc on ANSI C/C++ 
-  systems, so long as you haven't changed compile-time options about 
-  naming and tuning parameters.  If you do, then you can create your 
-  own malloc.h that does include all settings by cutting at the point 
-  indicated below. Note that you may already by default be using a C 
-  library containing a malloc that is based on some version of this 
-  malloc (for example in linux). You might still want to use the one 
-  in this file to customize settings or to avoid overheads associated 
-  with library versions. 
- 
-* Vital statistics: 
- 
-  Supported pointer/size_t representation:       4 or 8 bytes 
-       size_t MUST be an unsigned type of the same width as 
-       pointers. (If you are using an ancient system that declares 
-       size_t as a signed type, or need it to be a different width 
-       than pointers, you can use a previous release of this malloc 
-       (e.g. 2.7.2) supporting these.) 
- 
-  Alignment:                                     8 bytes (default) 
-       This suffices for nearly all current machines and C compilers. 
-       However, you can define MALLOC_ALIGNMENT to be wider than this 
-       if necessary (up to 128bytes), at the expense of using more space. 
- 
-  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes) 
-                                          8 or 16 bytes (if 8byte sizes) 
-       Each malloced chunk has a hidden word of overhead holding size 
-       and status information, and additional cross-check word 
-       if FOOTERS is defined. 
- 
-  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead) 
-                          8-byte ptrs:  32 bytes    (including overhead) 
- 
-       Even a request for zero bytes (i.e., malloc(0)) returns a 
-       pointer to something of the minimum allocatable size. 
-       The maximum overhead wastage (i.e., number of extra bytes 
-       allocated than were requested in malloc) is less than or equal 
-       to the minimum size, except for requests >= mmap_threshold that 
-       are serviced via mmap(), where the worst case wastage is about 
-       32 bytes plus the remainder from a system page (the minimal 
-       mmap unit); typically 4096 or 8192 bytes. 
- 
-  Security: static-safe; optionally more or less 
-       The "security" of malloc refers to the ability of malicious 
-       code to accentuate the effects of errors (for example, freeing 
-       space that is not currently malloc'ed or overwriting past the 
-       ends of chunks) in code that calls malloc.  This malloc 
-       guarantees not to modify any memory locations below the base of 
-       heap, i.e., static variables, even in the presence of usage 
-       errors.  The routines additionally detect most improper frees 
-       and reallocs.  All this holds as long as the static bookkeeping 
-       for malloc itself is not corrupted by some other means.  This 
-       is only one aspect of security -- these checks do not, and 
-       cannot, detect all possible programming errors. 
- 
-       If FOOTERS is defined nonzero, then each allocated chunk 
-       carries an additional check word to verify that it was malloced 
-       from its space.  These check words are the same within each 
-       execution of a program using malloc, but differ across 
-       executions, so externally crafted fake chunks cannot be 
-       freed. This improves security by rejecting frees/reallocs that 
-       could corrupt heap memory, in addition to the checks preventing 
-       writes to statics that are always on.  This may further improve 
-       security at the expense of time and space overhead.  (Note that 
-       FOOTERS may also be worth using with MSPACES.) 
- 
-       By default detected errors cause the program to abort (calling 
-       "abort()"). You can override this to instead proceed past 
-       errors by defining PROCEED_ON_ERROR.  In this case, a bad free 
-       has no effect, and a malloc that encounters a bad address 
-       caused by user overwrites will ignore the bad address by 
-       dropping pointers and indices to all known memory. This may 
-       be appropriate for programs that should continue if at all 
-       possible in the face of programming errors, although they may 
-       run out of memory because dropped memory is never reclaimed. 
- 
-       If you don't like either of these options, you can define 
-       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything 
-       else. And if if you are sure that your program using malloc has 
-       no errors or vulnerabilities, you can define INSECURE to 1, 
-       which might (or might not) provide a small performance improvement. 
- 
-  Thread-safety: NOT thread-safe unless USE_LOCKS defined 
-       When USE_LOCKS is defined, each public call to malloc, free, 
-       etc is surrounded with either a pthread mutex or a win32 
-       spinlock (depending on WIN32). This is not especially fast, and 
-       can be a major bottleneck.  It is designed only to provide 
-       minimal protection in concurrent environments, and to provide a 
-       basis for extensions.  If you are using malloc in a concurrent 
-       program, consider instead using ptmalloc, which is derived from 
-       a version of this malloc. (See http://www.malloc.de). 
- 
-  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP 
-       This malloc can use unix sbrk or any emulation (invoked using 
-       the CALL_MORECORE macro) and/or mmap/munmap or any emulation 
-       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system 
-       memory.  On most unix systems, it tends to work best if both 
-       MORECORE and MMAP are enabled.  On Win32, it uses emulations 
-       based on VirtualAlloc. It also uses common C library functions 
-       like memset. 
- 
-  Compliance: I believe it is compliant with the Single Unix Specification 
-       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably 
-       others as well. 
- 
-* Overview of algorithms 
- 
-  This is not the fastest, most space-conserving, most portable, or 
-  most tunable malloc ever written. However it is among the fastest 
-  while also being among the most space-conserving, portable and 
-  tunable.  Consistent balance across these factors results in a good 
-  general-purpose allocator for malloc-intensive programs. 
- 
-  In most ways, this malloc is a best-fit allocator. Generally, it 
-  chooses the best-fitting existing chunk for a request, with ties 
-  broken in approximately least-recently-used order. (This strategy 
-  normally maintains low fragmentation.) However, for requests less 
-  than 256bytes, it deviates from best-fit when there is not an 
-  exactly fitting available chunk by preferring to use space adjacent 
-  to that used for the previous small request, as well as by breaking 
-  ties in approximately most-recently-used order. (These enhance 
-  locality of series of small allocations.)  And for very large requests 
-  (>= 256Kb by default), it relies on system memory mapping 
-  facilities, if supported.  (This helps avoid carrying around and 
-  possibly fragmenting memory used only for large chunks.) 
- 
-  All operations (except malloc_stats and mallinfo) have execution 
-  times that are bounded by a constant factor of the number of bits in 
-  a size_t, not counting any clearing in calloc or copying in realloc, 
-  or actions surrounding MORECORE and MMAP that have times 
-  proportional to the number of non-contiguous regions returned by 
-  system allocation routines, which is often just 1. 
- 
-  The implementation is not very modular and seriously overuses 
-  macros. Perhaps someday all C compilers will do as good a job 
-  inlining modular code as can now be done by brute-force expansion, 
-  but now, enough of them seem not to. 
- 
-  Some compilers issue a lot of warnings about code that is 
-  dead/unreachable only on some platforms, and also about intentional 
-  uses of negation on unsigned types. All known cases of each can be 
-  ignored. 
- 
-  For a longer but out of date high-level description, see 
-     http://gee.cs.oswego.edu/dl/html/malloc.html 
- 
-* MSPACES 
-  If MSPACES is defined, then in addition to malloc, free, etc., 
-  this file also defines mspace_malloc, mspace_free, etc. These 
-  are versions of malloc routines that take an "mspace" argument 
-  obtained using create_mspace, to control all internal bookkeeping. 
-  If ONLY_MSPACES is defined, only these versions are compiled. 
-  So if you would like to use this allocator for only some allocations, 
-  and your system malloc for others, you can compile with 
-  ONLY_MSPACES and then do something like... 
-    static mspace mymspace = create_mspace(0,0); // for example 
-    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes) 
- 
-  (Note: If you only need one instance of an mspace, you can instead 
-  use "USE_DL_PREFIX" to relabel the global malloc.) 
- 
-  You can similarly create thread-local allocators by storing 
-  mspaces as thread-locals. For example: 
-    static __thread mspace tlms = 0; 
-    void*  tlmalloc(size_t bytes) { 
-      if (tlms == 0) tlms = create_mspace(0, 0); 
-      return mspace_malloc(tlms, bytes); 
-    } 
-    void  tlfree(void* mem) { mspace_free(tlms, mem); } 
- 
-  Unless FOOTERS is defined, each mspace is completely independent. 
-  You cannot allocate from one and free to another (although 
-  conformance is only weakly checked, so usage errors are not always 
-  caught). If FOOTERS is defined, then each chunk carries around a tag 
-  indicating its originating mspace, and frees are directed to their 
-  originating spaces. 
- 
- -------------------------  Compile-time options --------------------------- 
- 
-Be careful in setting #define values for numerical constants of type 
-size_t. On some systems, literal values are not automatically extended 
-to size_t precision unless they are explicitly casted. 
- 
-WIN32                    default: defined if _WIN32 defined 
-  Defining WIN32 sets up defaults for MS environment and compilers. 
-  Otherwise defaults are for unix. 
- 
-MALLOC_ALIGNMENT         default: (size_t)8 
-  Controls the minimum alignment for malloc'ed chunks.  It must be a 
-  power of two and at least 8, even on machines for which smaller 
-  alignments would suffice. It may be defined as larger than this 
-  though. Note however that code and data structures are optimized for 
-  the case of 8-byte alignment. 
- 
-MSPACES                  default: 0 (false) 
-  If true, compile in support for independent allocation spaces. 
-  This is only supported if HAVE_MMAP is true. 
- 
-ONLY_MSPACES             default: 0 (false) 
-  If true, only compile in mspace versions, not regular versions. 
- 
-USE_LOCKS                default: 0 (false) 
-  Causes each call to each public routine to be surrounded with 
-  pthread or WIN32 mutex lock/unlock. (If set true, this can be 
-  overridden on a per-mspace basis for mspace versions.) 
- 
-FOOTERS                  default: 0 
-  If true, provide extra checking and dispatching by placing 
-  information in the footers of allocated chunks. This adds 
-  space and time overhead. 
- 
-INSECURE                 default: 0 
-  If true, omit checks for usage errors and heap space overwrites. 
- 
-USE_DL_PREFIX            default: NOT defined 
-  Causes compiler to prefix all public routines with the string 'dl'. 
-  This can be useful when you only want to use this malloc in one part 
-  of a program, using your regular system malloc elsewhere. 
- 
-ABORT                    default: defined as abort() 
-  Defines how to abort on failed checks.  On most systems, a failed 
-  check cannot die with an "assert" or even print an informative 
-  message, because the underlying print routines in turn call malloc, 
-  which will fail again.  Generally, the best policy is to simply call 
-  abort(). It's not very useful to do more than this because many 
-  errors due to overwriting will show up as address faults (null, odd 
-  addresses etc) rather than malloc-triggered checks, so will also 
-  abort.  Also, most compilers know that abort() does not return, so 
-  can better optimize code conditionally calling it. 
- 
-PROCEED_ON_ERROR           default: defined as 0 (false) 
-  Controls whether detected bad addresses cause them to bypassed 
-  rather than aborting. If set, detected bad arguments to free and 
-  realloc are ignored. And all bookkeeping information is zeroed out 
-  upon a detected overwrite of freed heap space, thus losing the 
-  ability to ever return it from malloc again, but enabling the 
-  application to proceed. If PROCEED_ON_ERROR is defined, the 
-  static variable malloc_corruption_error_count is compiled in 
-  and can be examined to see if errors have occurred. This option 
-  generates slower code than the default abort policy. 
- 
-DEBUG                    default: NOT defined 
-  The DEBUG setting is mainly intended for people trying to modify 
-  this code or diagnose problems when porting to new platforms. 
-  However, it may also be able to better isolate user errors than just 
-  using runtime checks.  The assertions in the check routines spell 
-  out in more detail the assumptions and invariants underlying the 
-  algorithms.  The checking is fairly extensive, and will slow down 
-  execution noticeably. Calling malloc_stats or mallinfo with DEBUG 
-  set will attempt to check every non-mmapped allocated and free chunk 
-  in the course of computing the summaries. 
- 
-ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true) 
-  Debugging assertion failures can be nearly impossible if your 
-  version of the assert macro causes malloc to be called, which will 
-  lead to a cascade of further failures, blowing the runtime stack. 
-  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), 
-  which will usually make debugging easier. 
- 
-MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32 
-  The action to take before "return 0" when malloc fails to be able to 
-  return memory because there is none available. 
- 
-HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES 
-  True if this system supports sbrk or an emulation of it. 
- 
-MORECORE                  default: sbrk 
-  The name of the sbrk-style system routine to call to obtain more 
-  memory.  See below for guidance on writing custom MORECORE 
-  functions. The type of the argument to sbrk/MORECORE varies across 
-  systems.  It cannot be size_t, because it supports negative 
-  arguments, so it is normally the signed type of the same width as 
-  size_t (sometimes declared as "intptr_t").  It doesn't much matter 
-  though. Internally, we only call it with arguments less than half 
-  the max value of a size_t, which should work across all reasonable 
-  possibilities, although sometimes generating compiler warnings.  See 
-  near the end of this file for guidelines for creating a custom 
-  version of MORECORE. 
- 
-MORECORE_CONTIGUOUS       default: 1 (true) 
-  If true, take advantage of fact that consecutive calls to MORECORE 
-  with positive arguments always return contiguous increasing 
-  addresses.  This is true of unix sbrk. It does not hurt too much to 
-  set it true anyway, since malloc copes with non-contiguities. 
-  Setting it false when definitely non-contiguous saves time 
-  and possibly wasted space it would take to discover this though. 
- 
-MORECORE_CANNOT_TRIM      default: NOT defined 
-  True if MORECORE cannot release space back to the system when given 
-  negative arguments. This is generally necessary only if you are 
-  using a hand-crafted MORECORE function that cannot handle negative 
-  arguments. 
- 
-HAVE_MMAP                 default: 1 (true) 
-  True if this system supports mmap or an emulation of it.  If so, and 
-  HAVE_MORECORE is not true, MMAP is used for all system 
-  allocation. If set and HAVE_MORECORE is true as well, MMAP is 
-  primarily used to directly allocate very large blocks. It is also 
-  used as a backup strategy in cases where MORECORE fails to provide 
-  space from system. Note: A single call to MUNMAP is assumed to be 
-  able to unmap memory that may have be allocated using multiple calls 
-  to MMAP, so long as they are adjacent. 
- 
-HAVE_MREMAP               default: 1 on linux, else 0 
-  If true realloc() uses mremap() to re-allocate large blocks and 
-  extend or shrink allocation spaces. 
- 
-MMAP_CLEARS               default: 1 on unix 
-  True if mmap clears memory so calloc doesn't need to. This is true 
-  for standard unix mmap using /dev/zero. 
- 
-USE_BUILTIN_FFS            default: 0 (i.e., not used) 
-  Causes malloc to use the builtin ffs() function to compute indices. 
-  Some compilers may recognize and intrinsify ffs to be faster than the 
-  supplied C version. Also, the case of x86 using gcc is special-cased 
-  to an asm instruction, so is already as fast as it can be, and so 
-  this setting has no effect. (On most x86s, the asm version is only 
-  slightly faster than the C version.) 
- 
-malloc_getpagesize         default: derive from system includes, or 4096. 
-  The system page size. To the extent possible, this malloc manages 
-  memory from the system in page-size units.  This may be (and 
-  usually is) a function rather than a constant. This is ignored 
-  if WIN32, where page size is determined using getSystemInfo during 
-  initialization. 
- 
-USE_DEV_RANDOM             default: 0 (i.e., not used) 
-  Causes malloc to use /dev/random to initialize secure magic seed for 
-  stamping footers. Otherwise, the current time is used. 
- 
-NO_MALLINFO                default: 0 
-  If defined, don't compile "mallinfo". This can be a simple way 
-  of dealing with mismatches between system declarations and 
-  those in this file. 
- 
-MALLINFO_FIELD_TYPE        default: size_t 
-  The type of the fields in the mallinfo struct. This was originally 
-  defined as "int" in SVID etc, but is more usefully defined as 
-  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set 
- 
-REALLOC_ZERO_BYTES_FREES    default: not defined 
-  This should be set if a call to realloc with zero bytes should  
-  be the same as a call to free. Some people think it should. Otherwise,  
-  since this malloc returns a unique pointer for malloc(0), so does  
-  realloc(p, 0). 
- 
-LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H 
-LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H 
-LACKS_STDLIB_H                default: NOT defined unless on WIN32 
-  Define these if your system does not have these header files. 
-  You might need to manually insert some of the declarations they provide. 
- 
-DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS, 
-                                system_info.dwAllocationGranularity in WIN32, 
-                                otherwise 64K. 
-      Also settable using mallopt(M_GRANULARITY, x) 
-  The unit for allocating and deallocating memory from the system.  On 
-  most systems with contiguous MORECORE, there is no reason to 
-  make this more than a page. However, systems with MMAP tend to 
-  either require or encourage larger granularities.  You can increase 
-  this value to prevent system allocation functions to be called so 
-  often, especially if they are slow.  The value must be at least one 
-  page and must be a power of two.  Setting to 0 causes initialization 
-  to either page size or win32 region size.  (Note: In previous 
-  versions of malloc, the equivalent of this option was called 
-  "TOP_PAD") 
- 
-DEFAULT_TRIM_THRESHOLD    default: 2MB 
-      Also settable using mallopt(M_TRIM_THRESHOLD, x) 
-  The maximum amount of unused top-most memory to keep before 
-  releasing via malloc_trim in free().  Automatic trimming is mainly 
-  useful in long-lived programs using contiguous MORECORE.  Because 
-  trimming via sbrk can be slow on some systems, and can sometimes be 
-  wasteful (in cases where programs immediately afterward allocate 
-  more large chunks) the value should be high enough so that your 
-  overall system performance would improve by releasing this much 
-  memory.  As a rough guide, you might set to a value close to the 
-  average size of a process (program) running on your system. 
-  Releasing this much memory would allow such a process to run in 
-  memory.  Generally, it is worth tuning trim thresholds when a 
-  program undergoes phases where several large chunks are allocated 
-  and released in ways that can reuse each other's storage, perhaps 
-  mixed with phases where there are no such chunks at all. The trim 
-  value must be greater than page size to have any useful effect.  To 
-  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick 
-  some people use of mallocing a huge space and then freeing it at 
-  program startup, in an attempt to reserve system memory, doesn't 
-  have the intended effect under automatic trimming, since that memory 
-  will immediately be returned to the system. 
- 
-DEFAULT_MMAP_THRESHOLD       default: 256K 
-      Also settable using mallopt(M_MMAP_THRESHOLD, x) 
-  The request size threshold for using MMAP to directly service a 
-  request. Requests of at least this size that cannot be allocated 
-  using already-existing space will be serviced via mmap.  (If enough 
-  normal freed space already exists it is used instead.)  Using mmap 
-  segregates relatively large chunks of memory so that they can be 
-  individually obtained and released from the host system. A request 
-  serviced through mmap is never reused by any other request (at least 
-  not directly; the system may just so happen to remap successive 
-  requests to the same locations).  Segregating space in this way has 
-  the benefits that: Mmapped space can always be individually released 
-  back to the system, which helps keep the system level memory demands 
-  of a long-lived program low.  Also, mapped memory doesn't become 
-  `locked' between other chunks, as can happen with normally allocated 
-  chunks, which means that even trimming via malloc_trim would not 
-  release them.  However, it has the disadvantage that the space 
-  cannot be reclaimed, consolidated, and then used to service later 
-  requests, as happens with normal chunks.  The advantages of mmap 
-  nearly always outweigh disadvantages for "large" chunks, but the 
-  value of "large" may vary across systems.  The default is an 
-  empirically derived value that works well in most systems. You can 
-  disable mmap by setting to MAX_SIZE_T. 
- 
-*/ 
- 
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain, as explained at
+  http://creativecommons.org/licenses/publicdomain.  Send questions,
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.3 Thu Sep 22 11:16:15 2005  Doug Lea  (dl at gee)
+
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O3), and link it into another program. All of
+  the compile-time options default to reasonable values for use on
+  most platforms.  You might later want to step through various
+  compile-time and dynamic tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.3.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below. Note that you may already by default be using a C
+  library containing a malloc that is based on some version of this
+  malloc (for example in linux). You might still want to use the one
+  in this file to customize settings or to avoid overheads associated
+  with library versions.
+
+* Vital statistics:
+
+  Supported pointer/size_t representation:       4 or 8 bytes
+       size_t MUST be an unsigned type of the same width as
+       pointers. (If you are using an ancient system that declares
+       size_t as a signed type, or need it to be a different width
+       than pointers, you can use a previous release of this malloc
+       (e.g. 2.7.2) supporting these.)
+
+  Alignment:                                     8 bytes (default)
+       This suffices for nearly all current machines and C compilers.
+       However, you can define MALLOC_ALIGNMENT to be wider than this
+       if necessary (up to 128bytes), at the expense of using more space.
+
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
+                                          8 or 16 bytes (if 8byte sizes)
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information, and additional cross-check word
+       if FOOTERS is defined.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
+                          8-byte ptrs:  32 bytes    (including overhead)
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is about
+       32 bytes plus the remainder from a system page (the minimal
+       mmap unit); typically 4096 or 8192 bytes.
+
+  Security: static-safe; optionally more or less
+       The "security" of malloc refers to the ability of malicious
+       code to accentuate the effects of errors (for example, freeing
+       space that is not currently malloc'ed or overwriting past the
+       ends of chunks) in code that calls malloc.  This malloc
+       guarantees not to modify any memory locations below the base of
+       heap, i.e., static variables, even in the presence of usage
+       errors.  The routines additionally detect most improper frees
+       and reallocs.  All this holds as long as the static bookkeeping
+       for malloc itself is not corrupted by some other means.  This
+       is only one aspect of security -- these checks do not, and
+       cannot, detect all possible programming errors.
+
+       If FOOTERS is defined nonzero, then each allocated chunk
+       carries an additional check word to verify that it was malloced
+       from its space.  These check words are the same within each
+       execution of a program using malloc, but differ across
+       executions, so externally crafted fake chunks cannot be
+       freed. This improves security by rejecting frees/reallocs that
+       could corrupt heap memory, in addition to the checks preventing
+       writes to statics that are always on.  This may further improve
+       security at the expense of time and space overhead.  (Note that
+       FOOTERS may also be worth using with MSPACES.)
+
+       By default detected errors cause the program to abort (calling
+       "abort()"). You can override this to instead proceed past
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
+       has no effect, and a malloc that encounters a bad address
+       caused by user overwrites will ignore the bad address by
+       dropping pointers and indices to all known memory. This may
+       be appropriate for programs that should continue if at all
+       possible in the face of programming errors, although they may
+       run out of memory because dropped memory is never reclaimed.
+
+       If you don't like either of these options, you can define
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+       else. And if if you are sure that your program using malloc has
+       no errors or vulnerabilities, you can define INSECURE to 1,
+       which might (or might not) provide a small performance improvement.
+
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined
+       When USE_LOCKS is defined, each public call to malloc, free,
+       etc is surrounded with either a pthread mutex or a win32
+       spinlock (depending on WIN32). This is not especially fast, and
+       can be a major bottleneck.  It is designed only to provide
+       minimal protection in concurrent environments, and to provide a
+       basis for extensions.  If you are using malloc in a concurrent
+       program, consider instead using ptmalloc, which is derived from
+       a version of this malloc. (See http://www.malloc.de).
+
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+       This malloc can use unix sbrk or any emulation (invoked using
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+       memory.  On most unix systems, it tends to work best if both
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
+       based on VirtualAlloc. It also uses common C library functions
+       like memset.
+
+  Compliance: I believe it is compliant with the Single Unix Specification
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Overview of algorithms
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and
+  tunable.  Consistent balance across these factors results in a good
+  general-purpose allocator for malloc-intensive programs.
+
+  In most ways, this malloc is a best-fit allocator. Generally, it
+  chooses the best-fitting existing chunk for a request, with ties
+  broken in approximately least-recently-used order. (This strategy
+  normally maintains low fragmentation.) However, for requests less
+  than 256bytes, it deviates from best-fit when there is not an
+  exactly fitting available chunk by preferring to use space adjacent
+  to that used for the previous small request, as well as by breaking
+  ties in approximately most-recently-used order. (These enhance
+  locality of series of small allocations.)  And for very large requests
+  (>= 256Kb by default), it relies on system memory mapping
+  facilities, if supported.  (This helps avoid carrying around and
+  possibly fragmenting memory used only for large chunks.)
+
+  All operations (except malloc_stats and mallinfo) have execution
+  times that are bounded by a constant factor of the number of bits in
+  a size_t, not counting any clearing in calloc or copying in realloc,
+  or actions surrounding MORECORE and MMAP that have times
+  proportional to the number of non-contiguous regions returned by
+  system allocation routines, which is often just 1.
+
+  The implementation is not very modular and seriously overuses
+  macros. Perhaps someday all C compilers will do as good a job
+  inlining modular code as can now be done by brute-force expansion,
+  but now, enough of them seem not to.
+
+  Some compilers issue a lot of warnings about code that is
+  dead/unreachable only on some platforms, and also about intentional
+  uses of negation on unsigned types. All known cases of each can be
+  ignored.
+
+  For a longer but out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+  If MSPACES is defined, then in addition to malloc, free, etc.,
+  this file also defines mspace_malloc, mspace_free, etc. These
+  are versions of malloc routines that take an "mspace" argument
+  obtained using create_mspace, to control all internal bookkeeping.
+  If ONLY_MSPACES is defined, only these versions are compiled.
+  So if you would like to use this allocator for only some allocations,
+  and your system malloc for others, you can compile with
+  ONLY_MSPACES and then do something like...
+    static mspace mymspace = create_mspace(0,0); // for example
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
+
+  (Note: If you only need one instance of an mspace, you can instead
+  use "USE_DL_PREFIX" to relabel the global malloc.)
+
+  You can similarly create thread-local allocators by storing
+  mspaces as thread-locals. For example:
+    static __thread mspace tlms = 0;
+    void*  tlmalloc(size_t bytes) {
+      if (tlms == 0) tlms = create_mspace(0, 0);
+      return mspace_malloc(tlms, bytes);
+    }
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
+
+  Unless FOOTERS is defined, each mspace is completely independent.
+  You cannot allocate from one and free to another (although
+  conformance is only weakly checked, so usage errors are not always
+  caught). If FOOTERS is defined, then each chunk carries around a tag
+  indicating its originating mspace, and frees are directed to their
+  originating spaces.
+
+ -------------------------  Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted.
+
+WIN32                    default: defined if _WIN32 defined
+  Defining WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix.
+
+MALLOC_ALIGNMENT         default: (size_t)8
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
+  power of two and at least 8, even on machines for which smaller
+  alignments would suffice. It may be defined as larger than this
+  though. Note however that code and data structures are optimized for
+  the case of 8-byte alignment.
+
+MSPACES                  default: 0 (false)
+  If true, compile in support for independent allocation spaces.
+  This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES             default: 0 (false)
+  If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS                default: 0 (false)
+  Causes each call to each public routine to be surrounded with
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
+  overridden on a per-mspace basis for mspace versions.)
+
+FOOTERS                  default: 0
+  If true, provide extra checking and dispatching by placing
+  information in the footers of allocated chunks. This adds
+  space and time overhead.
+
+INSECURE                 default: 0
+  If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX            default: NOT defined
+  Causes compiler to prefix all public routines with the string 'dl'.
+  This can be useful when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+
+ABORT                    default: defined as abort()
+  Defines how to abort on failed checks.  On most systems, a failed
+  check cannot die with an "assert" or even print an informative
+  message, because the underlying print routines in turn call malloc,
+  which will fail again.  Generally, the best policy is to simply call
+  abort(). It's not very useful to do more than this because many
+  errors due to overwriting will show up as address faults (null, odd
+  addresses etc) rather than malloc-triggered checks, so will also
+  abort.  Also, most compilers know that abort() does not return, so
+  can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR           default: defined as 0 (false)
+  Controls whether detected bad addresses cause them to bypassed
+  rather than aborting. If set, detected bad arguments to free and
+  realloc are ignored. And all bookkeeping information is zeroed out
+  upon a detected overwrite of freed heap space, thus losing the
+  ability to ever return it from malloc again, but enabling the
+  application to proceed. If PROCEED_ON_ERROR is defined, the
+  static variable malloc_corruption_error_count is compiled in
+  and can be examined to see if errors have occurred. This option
+  generates slower code than the default abort policy.
+
+DEBUG                    default: NOT defined
+  The DEBUG setting is mainly intended for people trying to modify
+  this code or diagnose problems when porting to new platforms.
+  However, it may also be able to better isolate user errors than just
+  using runtime checks.  The assertions in the check routines spell
+  out in more detail the assumptions and invariants underlying the
+  algorithms.  The checking is fairly extensive, and will slow down
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+  set will attempt to check every non-mmapped allocated and free chunk
+  in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
+  Debugging assertion failures can be nearly impossible if your
+  version of the assert macro causes malloc to be called, which will
+  lead to a cascade of further failures, blowing the runtime stack.
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+  which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
+  The action to take before "return 0" when malloc fails to be able to
+  return memory because there is none available.
+
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
+  True if this system supports sbrk or an emulation of it.
+
+MORECORE                  default: sbrk
+  The name of the sbrk-style system routine to call to obtain more
+  memory.  See below for guidance on writing custom MORECORE
+  functions. The type of the argument to sbrk/MORECORE varies across
+  systems.  It cannot be size_t, because it supports negative
+  arguments, so it is normally the signed type of the same width as
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
+  though. Internally, we only call it with arguments less than half
+  the max value of a size_t, which should work across all reasonable
+  possibilities, although sometimes generating compiler warnings.  See
+  near the end of this file for guidelines for creating a custom
+  version of MORECORE.
+
+MORECORE_CONTIGUOUS       default: 1 (true)
+  If true, take advantage of fact that consecutive calls to MORECORE
+  with positive arguments always return contiguous increasing
+  addresses.  This is true of unix sbrk. It does not hurt too much to
+  set it true anyway, since malloc copes with non-contiguities.
+  Setting it false when definitely non-contiguous saves time
+  and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM      default: NOT defined
+  True if MORECORE cannot release space back to the system when given
+  negative arguments. This is generally necessary only if you are
+  using a hand-crafted MORECORE function that cannot handle negative
+  arguments.
+
+HAVE_MMAP                 default: 1 (true)
+  True if this system supports mmap or an emulation of it.  If so, and
+  HAVE_MORECORE is not true, MMAP is used for all system
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
+  primarily used to directly allocate very large blocks. It is also
+  used as a backup strategy in cases where MORECORE fails to provide
+  space from system. Note: A single call to MUNMAP is assumed to be
+  able to unmap memory that may have be allocated using multiple calls
+  to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP               default: 1 on linux, else 0
+  If true realloc() uses mremap() to re-allocate large blocks and
+  extend or shrink allocation spaces.
+
+MMAP_CLEARS               default: 1 on unix
+  True if mmap clears memory so calloc doesn't need to. This is true
+  for standard unix mmap using /dev/zero.
+
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
+  Causes malloc to use the builtin ffs() function to compute indices.
+  Some compilers may recognize and intrinsify ffs to be faster than the
+  supplied C version. Also, the case of x86 using gcc is special-cased
+  to an asm instruction, so is already as fast as it can be, and so
+  this setting has no effect. (On most x86s, the asm version is only
+  slightly faster than the C version.)
+
+malloc_getpagesize         default: derive from system includes, or 4096.
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  This may be (and
+  usually is) a function rather than a constant. This is ignored
+  if WIN32, where page size is determined using getSystemInfo during
+  initialization.
+
+USE_DEV_RANDOM             default: 0 (i.e., not used)
+  Causes malloc to use /dev/random to initialize secure magic seed for
+  stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO                default: 0
+  If defined, don't compile "mallinfo". This can be a simple way
+  of dealing with mismatches between system declarations and
+  those in this file.
+
+MALLINFO_FIELD_TYPE        default: size_t
+  The type of the fields in the mallinfo struct. This was originally
+  defined as "int" in SVID etc, but is more usefully defined as
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
+
+REALLOC_ZERO_BYTES_FREES    default: not defined
+  This should be set if a call to realloc with zero bytes should 
+  be the same as a call to free. Some people think it should. Otherwise, 
+  since this malloc returns a unique pointer for malloc(0), so does 
+  realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
+LACKS_STDLIB_H                default: NOT defined unless on WIN32
+  Define these if your system does not have these header files.
+  You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
+                                system_info.dwAllocationGranularity in WIN32,
+                                otherwise 64K.
+      Also settable using mallopt(M_GRANULARITY, x)
+  The unit for allocating and deallocating memory from the system.  On
+  most systems with contiguous MORECORE, there is no reason to
+  make this more than a page. However, systems with MMAP tend to
+  either require or encourage larger granularities.  You can increase
+  this value to prevent system allocation functions to be called so
+  often, especially if they are slow.  The value must be at least one
+  page and must be a power of two.  Setting to 0 causes initialization
+  to either page size or win32 region size.  (Note: In previous
+  versions of malloc, the equivalent of this option was called
+  "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD    default: 2MB
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
+  The maximum amount of unused top-most memory to keep before
+  releasing via malloc_trim in free().  Automatic trimming is mainly
+  useful in long-lived programs using contiguous MORECORE.  Because
+  trimming via sbrk can be slow on some systems, and can sometimes be
+  wasteful (in cases where programs immediately afterward allocate
+  more large chunks) the value should be high enough so that your
+  overall system performance would improve by releasing this much
+  memory.  As a rough guide, you might set to a value close to the
+  average size of a process (program) running on your system.
+  Releasing this much memory would allow such a process to run in
+  memory.  Generally, it is worth tuning trim thresholds when a
+  program undergoes phases where several large chunks are allocated
+  and released in ways that can reuse each other's storage, perhaps
+  mixed with phases where there are no such chunks at all. The trim
+  value must be greater than page size to have any useful effect.  To
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+  some people use of mallocing a huge space and then freeing it at
+  program startup, in an attempt to reserve system memory, doesn't
+  have the intended effect under automatic trimming, since that memory
+  will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD       default: 256K
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
+  The request size threshold for using MMAP to directly service a
+  request. Requests of at least this size that cannot be allocated
+  using already-existing space will be serviced via mmap.  (If enough
+  normal freed space already exists it is used instead.)  Using mmap
+  segregates relatively large chunks of memory so that they can be
+  individually obtained and released from the host system. A request
+  serviced through mmap is never reused by any other request (at least
+  not directly; the system may just so happen to remap successive
+  requests to the same locations).  Segregating space in this way has
+  the benefits that: Mmapped space can always be individually released
+  back to the system, which helps keep the system level memory demands
+  of a long-lived program low.  Also, mapped memory doesn't become
+  `locked' between other chunks, as can happen with normally allocated
+  chunks, which means that even trimming via malloc_trim would not
+  release them.  However, it has the disadvantage that the space
+  cannot be reclaimed, consolidated, and then used to service later
+  requests, as happens with normal chunks.  The advantages of mmap
+  nearly always outweigh disadvantages for "large" chunks, but the
+  value of "large" may vary across systems.  The default is an
+  empirically derived value that works well in most systems. You can
+  disable mmap by setting to MAX_SIZE_T.
+
+*/
+
 #if defined __linux__ && !defined _GNU_SOURCE
 /* mremap() on Linux requires this via sys/mman.h */
 #define _GNU_SOURCE 1
 #endif
 
-#ifndef WIN32 
-#ifdef _WIN32 
-#define WIN32 1 
-#endif  /* _WIN32 */ 
-#endif  /* WIN32 */ 
-#ifdef WIN32 
-#define WIN32_LEAN_AND_MEAN 
-#include <windows.h> 
-#define HAVE_MMAP 1 
-#define HAVE_MORECORE 0 
-#define LACKS_UNISTD_H 
-#define LACKS_SYS_PARAM_H 
-#define LACKS_SYS_MMAN_H 
-#define LACKS_STRING_H 
-#define LACKS_STRINGS_H 
-#define LACKS_SYS_TYPES_H 
-#define LACKS_ERRNO_H 
-#define MALLOC_FAILURE_ACTION 
-#define MMAP_CLEARS 0 /* WINCE and some others apparently don't clear */ 
-#endif  /* WIN32 */ 
- 
-#ifdef __OS2__ 
-#define INCL_DOS 
-#include <os2.h> 
-#define HAVE_MMAP 1 
-#define HAVE_MORECORE 0 
-#define LACKS_SYS_MMAN_H 
-#endif  /* __OS2__ */ 
- 
-#if defined(DARWIN) || defined(_DARWIN) 
-/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ 
-#ifndef HAVE_MORECORE 
-#define HAVE_MORECORE 0 
-#define HAVE_MMAP 1 
-#endif  /* HAVE_MORECORE */ 
-#endif  /* DARWIN */ 
- 
-#ifndef LACKS_SYS_TYPES_H 
-#include <sys/types.h>  /* For size_t */ 
-#endif  /* LACKS_SYS_TYPES_H */ 
- 
-/* The maximum possible size_t value has all bits set */ 
-#define MAX_SIZE_T           (~(size_t)0) 
- 
-#ifndef ONLY_MSPACES 
-#define ONLY_MSPACES 0 
-#endif  /* ONLY_MSPACES */ 
-#ifndef MSPACES 
-#if ONLY_MSPACES 
-#define MSPACES 1 
-#else   /* ONLY_MSPACES */ 
-#define MSPACES 0 
-#endif  /* ONLY_MSPACES */ 
-#endif  /* MSPACES */ 
-#ifndef MALLOC_ALIGNMENT 
-#define MALLOC_ALIGNMENT ((size_t)8U) 
-#endif  /* MALLOC_ALIGNMENT */ 
-#ifndef FOOTERS 
-#define FOOTERS 0 
-#endif  /* FOOTERS */ 
-#ifndef ABORT 
-#define ABORT  abort() 
-#endif  /* ABORT */ 
-#ifndef ABORT_ON_ASSERT_FAILURE 
-#define ABORT_ON_ASSERT_FAILURE 1 
-#endif  /* ABORT_ON_ASSERT_FAILURE */ 
-#ifndef PROCEED_ON_ERROR 
-#define PROCEED_ON_ERROR 0 
-#endif  /* PROCEED_ON_ERROR */ 
-#ifndef USE_LOCKS 
-#define USE_LOCKS 0 
-#endif  /* USE_LOCKS */ 
-#ifndef INSECURE 
-#define INSECURE 0 
-#endif  /* INSECURE */ 
-#ifndef HAVE_MMAP 
-#define HAVE_MMAP 1 
-#endif  /* HAVE_MMAP */ 
-#ifndef MMAP_CLEARS 
-#define MMAP_CLEARS 1 
-#endif  /* MMAP_CLEARS */ 
-#ifndef HAVE_MREMAP 
-#ifdef linux 
-#define HAVE_MREMAP 1 
-#else   /* linux */ 
-#define HAVE_MREMAP 0 
-#endif  /* linux */ 
-#endif  /* HAVE_MREMAP */ 
-#ifndef MALLOC_FAILURE_ACTION 
-#define MALLOC_FAILURE_ACTION  errno = ENOMEM; 
-#endif  /* MALLOC_FAILURE_ACTION */ 
-#ifndef HAVE_MORECORE 
-#if ONLY_MSPACES 
-#define HAVE_MORECORE 0 
-#else   /* ONLY_MSPACES */ 
-#define HAVE_MORECORE 1 
-#endif  /* ONLY_MSPACES */ 
-#endif  /* HAVE_MORECORE */ 
-#if !HAVE_MORECORE 
-#define MORECORE_CONTIGUOUS 0 
-#else   /* !HAVE_MORECORE */ 
-#ifndef MORECORE 
-#define MORECORE sbrk 
-#endif  /* MORECORE */ 
-#ifndef MORECORE_CONTIGUOUS 
-#define MORECORE_CONTIGUOUS 1 
-#endif  /* MORECORE_CONTIGUOUS */ 
-#endif  /* HAVE_MORECORE */ 
-#ifndef DEFAULT_GRANULARITY 
-#if MORECORE_CONTIGUOUS 
-#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */ 
-#else   /* MORECORE_CONTIGUOUS */ 
-#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) 
-#endif  /* MORECORE_CONTIGUOUS */ 
-#endif  /* DEFAULT_GRANULARITY */ 
-#ifndef DEFAULT_TRIM_THRESHOLD 
-#ifndef MORECORE_CANNOT_TRIM 
-#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) 
-#else   /* MORECORE_CANNOT_TRIM */ 
-#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T 
-#endif  /* MORECORE_CANNOT_TRIM */ 
-#endif  /* DEFAULT_TRIM_THRESHOLD */ 
-#ifndef DEFAULT_MMAP_THRESHOLD 
-#if HAVE_MMAP 
-#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) 
-#else   /* HAVE_MMAP */ 
-#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T 
-#endif  /* HAVE_MMAP */ 
-#endif  /* DEFAULT_MMAP_THRESHOLD */ 
-#ifndef USE_BUILTIN_FFS 
-#define USE_BUILTIN_FFS 0 
-#endif  /* USE_BUILTIN_FFS */ 
-#ifndef USE_DEV_RANDOM 
-#define USE_DEV_RANDOM 0 
-#endif  /* USE_DEV_RANDOM */ 
-#ifndef NO_MALLINFO 
-#define NO_MALLINFO 0 
-#endif  /* NO_MALLINFO */ 
-#ifndef MALLINFO_FIELD_TYPE 
-#define MALLINFO_FIELD_TYPE size_t 
-#endif  /* MALLINFO_FIELD_TYPE */ 
- 
-/* 
-  mallopt tuning options.  SVID/XPG defines four standard parameter 
-  numbers for mallopt, normally defined in malloc.h.  None of these 
-  are used in this malloc, so setting them has no effect. But this 
-  malloc does support the following options. 
-*/ 
- 
-#define M_TRIM_THRESHOLD     (-1) 
-#define M_GRANULARITY        (-2) 
-#define M_MMAP_THRESHOLD     (-3) 
- 
-/* ------------------------ Mallinfo declarations ------------------------ */ 
- 
-#if !NO_MALLINFO 
-/* 
-  This version of malloc supports the standard SVID/XPG mallinfo 
-  routine that returns a struct containing usage properties and 
-  statistics. It should work on any system that has a 
-  /usr/include/malloc.h defining struct mallinfo.  The main 
-  declaration needed is the mallinfo struct that is returned (by-copy) 
-  by mallinfo().  The malloinfo struct contains a bunch of fields that 
-  are not even meaningful in this version of malloc.  These fields are 
-  are instead filled by mallinfo() with other numbers that might be of 
-  interest. 
- 
-  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a 
-  /usr/include/malloc.h file that includes a declaration of struct 
-  mallinfo.  If so, it is included; else a compliant version is 
-  declared below.  These must be precisely the same for mallinfo() to 
-  work.  The original SVID version of this struct, defined on most 
-  systems with mallinfo, declares all fields as ints. But some others 
-  define as unsigned long. If your system defines the fields using a 
-  type of different width than listed here, you MUST #include your 
-  system version and #define HAVE_USR_INCLUDE_MALLOC_H. 
-*/ 
- 
-/* #define HAVE_USR_INCLUDE_MALLOC_H */ 
- 
-#ifdef HAVE_USR_INCLUDE_MALLOC_H 
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif  /* _WIN32 */
+#endif  /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#define MALLOC_FAILURE_ACTION
+#define MMAP_CLEARS 0 /* WINCE and some others apparently don't clear */
+#endif  /* WIN32 */
+
+#ifdef __OS2__
+#define INCL_DOS
+#include <os2.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_SYS_MMAN_H
+#endif  /* __OS2__ */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+#endif  /* HAVE_MORECORE */
+#endif  /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h>  /* For size_t */
+#endif  /* LACKS_SYS_TYPES_H */
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T           (~(size_t)0)
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0
+#endif  /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else   /* ONLY_MSPACES */
+#define MSPACES 0
+#endif  /* ONLY_MSPACES */
+#endif  /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)8U)
+#endif  /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif  /* FOOTERS */
+#ifndef ABORT
+#define ABORT  abort()
+#endif  /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif  /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif  /* PROCEED_ON_ERROR */
+#ifndef USE_LOCKS
+#define USE_LOCKS 0
+#endif  /* USE_LOCKS */
+#ifndef INSECURE
+#define INSECURE 0
+#endif  /* INSECURE */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif  /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif  /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#else   /* linux */
+#define HAVE_MREMAP 0
+#endif  /* linux */
+#endif  /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
+#endif  /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else   /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif  /* ONLY_MSPACES */
+#endif  /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else   /* !HAVE_MORECORE */
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif  /* MORECORE */
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if MORECORE_CONTIGUOUS
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+#else   /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else   /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif  /* MORECORE_CANNOT_TRIM */
+#endif  /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else   /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif  /* HAVE_MMAP */
+#endif  /* DEFAULT_MMAP_THRESHOLD */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif  /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif  /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif  /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif  /* MALLINFO_FIELD_TYPE */
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD     (-1)
+#define M_GRANULARITY        (-2)
+#define M_MMAP_THRESHOLD     (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any system that has a
+  /usr/include/malloc.h defining struct mallinfo.  The main
+  declaration needed is the mallinfo struct that is returned (by-copy)
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
+  are not even meaningful in this version of malloc.  These fields are
+  are instead filled by mallinfo() with other numbers that might be of
+  interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else a compliant version is
+  declared below.  These must be precisely the same for mallinfo() to
+  work.  The original SVID version of this struct, defined on most
+  systems with mallinfo, declares all fields as ints. But some others
+  define as unsigned long. If your system defines the fields using a
+  type of different width than listed here, you MUST #include your
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
 #error #include "/usr/include/malloc.h"
-#else /* HAVE_USR_INCLUDE_MALLOC_H */ 
- 
-/* HP-UX's stdlib.h redefines mallinfo unless _STRUCT_MALLINFO is defined */ 
-#define _STRUCT_MALLINFO 
- 
-struct mallinfo { 
-  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */ 
-  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */ 
-  MALLINFO_FIELD_TYPE smblks;   /* always 0 */ 
-  MALLINFO_FIELD_TYPE hblks;    /* always 0 */ 
-  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */ 
-  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */ 
-  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */ 
-  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ 
-  MALLINFO_FIELD_TYPE fordblks; /* total free space */ 
-  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ 
-}; 
- 
-#endif /* HAVE_USR_INCLUDE_MALLOC_H */ 
-#endif /* NO_MALLINFO */ 
- 
-#ifdef __cplusplus 
-extern "C" { 
-#endif /* __cplusplus */ 
- 
-#if !ONLY_MSPACES 
- 
-/* ------------------- Declarations of public routines ------------------- */ 
- 
-#ifndef USE_DL_PREFIX 
-#define dlcalloc               calloc 
-#define dlfree                 free 
-#define dlmalloc               malloc 
-#define dlmemalign             memalign 
-#define dlrealloc              realloc 
-#define dlvalloc               valloc 
-#define dlpvalloc              pvalloc 
-#define dlmallinfo             mallinfo 
-#define dlmallopt              mallopt 
-#define dlmalloc_trim          malloc_trim 
-#define dlmalloc_stats         malloc_stats 
-#define dlmalloc_usable_size   malloc_usable_size 
-#define dlmalloc_footprint     malloc_footprint 
-#define dlmalloc_max_footprint malloc_max_footprint 
-#define dlindependent_calloc   independent_calloc 
-#define dlindependent_comalloc independent_comalloc 
-#endif /* USE_DL_PREFIX */ 
- 
- 
-/* 
-  malloc(size_t n) 
-  Returns a pointer to a newly allocated chunk of at least n bytes, or 
-  null if no space is available, in which case errno is set to ENOMEM 
-  on ANSI C systems. 
- 
-  If n is zero, malloc returns a minimum-sized chunk. (The minimum 
-  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit 
-  systems.)  Note that size_t is an unsigned type, so calls with 
-  arguments that would be negative if signed are interpreted as 
-  requests for huge amounts of space, which will often fail. The 
-  maximum supported value of n differs across systems, but is in all 
-  cases less than the maximum representable value of a size_t. 
-*/ 
-void* dlmalloc(size_t); 
- 
-/* 
-  free(void* p) 
-  Releases the chunk of memory pointed to by p, that had been previously 
-  allocated using malloc or a related routine such as realloc. 
-  It has no effect if p is null. If p was not malloced or already 
-  freed, free(p) will by default cause the current program to abort. 
-*/ 
-void  dlfree(void*); 
- 
-/* 
-  calloc(size_t n_elements, size_t element_size); 
-  Returns a pointer to n_elements * element_size bytes, with all locations 
-  set to zero. 
-*/ 
-void* dlcalloc(size_t, size_t); 
- 
-/* 
-  realloc(void* p, size_t n) 
-  Returns a pointer to a chunk of size n that contains the same data 
-  as does chunk p up to the minimum of (n, p's size) bytes, or null 
-  if no space is available. 
- 
-  The returned pointer may or may not be the same as p. The algorithm 
-  prefers extending p in most cases when possible, otherwise it 
-  employs the equivalent of a malloc-copy-free sequence. 
- 
-  If p is null, realloc is equivalent to malloc. 
- 
-  If space is not available, realloc returns null, errno is set (if on 
-  ANSI) and p is NOT freed. 
- 
-  if n is for fewer bytes than already held by p, the newly unused 
-  space is lopped off and freed if possible.  realloc with a size 
-  argument of zero (re)allocates a minimum-sized chunk. 
- 
-  The old unix realloc convention of allowing the last-free'd chunk 
-  to be used as an argument to realloc is not supported. 
-*/ 
- 
-void* dlrealloc(void*, size_t); 
- 
-/* 
-  memalign(size_t alignment, size_t n); 
-  Returns a pointer to a newly allocated chunk of n bytes, aligned 
-  in accord with the alignment argument. 
- 
-  The alignment argument should be a power of two. If the argument is 
-  not a power of two, the nearest greater power is used. 
-  8-byte alignment is guaranteed by normal malloc calls, so don't 
-  bother calling memalign with an argument of 8 or less. 
- 
-  Overreliance on memalign is a sure way to fragment space. 
-*/ 
-void* dlmemalign(size_t, size_t); 
- 
-/* 
-  valloc(size_t n); 
-  Equivalent to memalign(pagesize, n), where pagesize is the page 
-  size of the system. If the pagesize is unknown, 4096 is used. 
-*/ 
-void* dlvalloc(size_t); 
- 
-/* 
-  mallopt(int parameter_number, int parameter_value) 
-  Sets tunable parameters The format is to provide a 
-  (parameter-number, parameter-value) pair.  mallopt then sets the 
-  corresponding parameter to the argument value if it can (i.e., so 
-  long as the value is meaningful), and returns 1 if successful else 
-  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt, 
-  normally defined in malloc.h.  None of these are use in this malloc, 
-  so setting them has no effect. But this malloc also supports other 
-  options in mallopt. See below for details.  Briefly, supported 
-  parameters are as follows (listed defaults are for "typical" 
-  configurations). 
- 
-  Symbol            param #  default    allowed param values 
-  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (MAX_SIZE_T disables) 
-  M_GRANULARITY        -2     page size   any power of 2 >= page size 
-  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support) 
-*/ 
-int dlmallopt(int, int); 
- 
-/* 
-  malloc_footprint(); 
-  Returns the number of bytes obtained from the system.  The total 
-  number of bytes allocated by malloc, realloc etc., is less than this 
-  value. Unlike mallinfo, this function returns only a precomputed 
-  result, so can be called frequently to monitor memory consumption. 
-  Even if locks are otherwise defined, this function does not use them, 
-  so results might not be up to date. 
-*/ 
-size_t dlmalloc_footprint(void); 
- 
-/* 
-  malloc_max_footprint(); 
-  Returns the maximum number of bytes obtained from the system. This 
-  value will be greater than current footprint if deallocated space 
-  has been reclaimed by the system. The peak number of bytes allocated 
-  by malloc, realloc etc., is less than this value. Unlike mallinfo, 
-  this function returns only a precomputed result, so can be called 
-  frequently to monitor memory consumption.  Even if locks are 
-  otherwise defined, this function does not use them, so results might 
-  not be up to date. 
-*/ 
-size_t dlmalloc_max_footprint(void); 
- 
-#if !NO_MALLINFO 
-/* 
-  mallinfo() 
-  Returns (by copy) a struct containing various summary statistics: 
- 
-  arena:     current total non-mmapped bytes allocated from system 
-  ordblks:   the number of free chunks 
-  smblks:    always zero. 
-  hblks:     current number of mmapped regions 
-  hblkhd:    total bytes held in mmapped regions 
-  usmblks:   the maximum total allocated space. This will be greater 
-                than current total if trimming has occurred. 
-  fsmblks:   always zero 
-  uordblks:  current total allocated space (normal or mmapped) 
-  fordblks:  total free space 
-  keepcost:  the maximum number of bytes that could ideally be released 
-               back to system via malloc_trim. ("ideally" means that 
-               it ignores page restrictions etc.) 
- 
-  Because these fields are ints, but internal bookkeeping may 
-  be kept as longs, the reported values may wrap around zero and 
-  thus be inaccurate. 
-*/ 
-struct mallinfo dlmallinfo(void); 
-#endif /* NO_MALLINFO */ 
- 
-/* 
-  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); 
- 
-  independent_calloc is similar to calloc, but instead of returning a 
-  single cleared space, it returns an array of pointers to n_elements 
-  independent elements that can hold contents of size elem_size, each 
-  of which starts out cleared, and can be independently freed, 
-  realloc'ed etc. The elements are guaranteed to be adjacently 
-  allocated (this is not guaranteed to occur with multiple callocs or 
-  mallocs), which may also improve cache locality in some 
-  applications. 
- 
-  The "chunks" argument is optional (i.e., may be null, which is 
-  probably the most typical usage). If it is null, the returned array 
-  is itself dynamically allocated and should also be freed when it is 
-  no longer needed. Otherwise, the chunks array must be of at least 
-  n_elements in length. It is filled in with the pointers to the 
-  chunks. 
- 
-  In either case, independent_calloc returns this pointer array, or 
-  null if the allocation failed.  If n_elements is zero and "chunks" 
-  is null, it returns a chunk representing an array with zero elements 
-  (which should be freed if not wanted). 
- 
-  Each element must be individually freed when it is no longer 
-  needed. If you'd like to instead be able to free all at once, you 
-  should instead use regular calloc and assign pointers into this 
-  space to represent elements.  (In this case though, you cannot 
-  independently free elements.) 
- 
-  independent_calloc simplifies and speeds up implementations of many 
-  kinds of pools.  It may also be useful when constructing large data 
-  structures that initially have a fixed number of fixed-sized nodes, 
-  but the number is not known at compile time, and some of the nodes 
-  may later need to be freed. For example: 
- 
-  struct Node { int item; struct Node* next; }; 
- 
-  struct Node* build_list() { 
-    struct Node** pool; 
-    int n = read_number_of_nodes_needed(); 
-    if (n <= 0) return 0; 
-    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); 
-    if (pool == 0) die(); 
-    // organize into a linked list... 
-    struct Node* first = pool[0]; 
-    for (i = 0; i < n-1; ++i) 
-      pool[i]->next = pool[i+1]; 
-    free(pool);     // Can now free the array (or not, if it is needed later) 
-    return first; 
-  } 
-*/ 
-void** dlindependent_calloc(size_t, size_t, void**); 
- 
-/* 
-  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); 
- 
-  independent_comalloc allocates, all at once, a set of n_elements 
-  chunks with sizes indicated in the "sizes" array.    It returns 
-  an array of pointers to these elements, each of which can be 
-  independently freed, realloc'ed etc. The elements are guaranteed to 
-  be adjacently allocated (this is not guaranteed to occur with 
-  multiple callocs or mallocs), which may also improve cache locality 
-  in some applications. 
- 
-  The "chunks" argument is optional (i.e., may be null). If it is null 
-  the returned array is itself dynamically allocated and should also 
-  be freed when it is no longer needed. Otherwise, the chunks array 
-  must be of at least n_elements in length. It is filled in with the 
-  pointers to the chunks. 
- 
-  In either case, independent_comalloc returns this pointer array, or 
-  null if the allocation failed.  If n_elements is zero and chunks is 
-  null, it returns a chunk representing an array with zero elements 
-  (which should be freed if not wanted). 
- 
-  Each element must be individually freed when it is no longer 
-  needed. If you'd like to instead be able to free all at once, you 
-  should instead use a single regular malloc, and assign pointers at 
-  particular offsets in the aggregate space. (In this case though, you 
-  cannot independently free elements.) 
- 
-  independent_comallac differs from independent_calloc in that each 
-  element may have a different size, and also that it does not 
-  automatically clear elements. 
- 
-  independent_comalloc can be used to speed up allocation in cases 
-  where several structs or objects must always be allocated at the 
-  same time.  For example: 
- 
-  struct Head { ... } 
-  struct Foot { ... } 
- 
-  void send_message(char* msg) { 
-    int msglen = strlen(msg); 
-    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; 
-    void* chunks[3]; 
-    if (independent_comalloc(3, sizes, chunks) == 0) 
-      die(); 
-    struct Head* head = (struct Head*)(chunks[0]); 
-    char*        body = (char*)(chunks[1]); 
-    struct Foot* foot = (struct Foot*)(chunks[2]); 
-    // ... 
-  } 
- 
-  In general though, independent_comalloc is worth using only for 
-  larger values of n_elements. For small values, you probably won't 
-  detect enough difference from series of malloc calls to bother. 
- 
-  Overuse of independent_comalloc can increase overall memory usage, 
-  since it cannot reuse existing noncontiguous small chunks that 
-  might be available for some of the elements. 
-*/ 
-void** dlindependent_comalloc(size_t, size_t*, void**); 
- 
- 
-/* 
-  pvalloc(size_t n); 
-  Equivalent to valloc(minimum-page-that-holds(n)), that is, 
-  round up n to nearest pagesize. 
- */ 
-void*  dlpvalloc(size_t); 
- 
-/* 
-  malloc_trim(size_t pad); 
- 
-  If possible, gives memory back to the system (via negative arguments 
-  to sbrk) if there is unused memory at the `high' end of the malloc 
-  pool or in unused MMAP segments. You can call this after freeing 
-  large blocks of memory to potentially reduce the system-level memory 
-  requirements of a program. However, it cannot guarantee to reduce 
-  memory. Under some allocation patterns, some large free blocks of 
-  memory will be locked between two used chunks, so they cannot be 
-  given back to the system. 
- 
-  The `pad' argument to malloc_trim represents the amount of free 
-  trailing space to leave untrimmed. If this argument is zero, only 
-  the minimum amount of memory to maintain internal data structures 
-  will be left. Non-zero arguments can be supplied to maintain enough 
-  trailing space to service future expected allocations without having 
-  to re-obtain memory from the system. 
- 
-  Malloc_trim returns 1 if it actually released any memory, else 0. 
-*/ 
-int  dlmalloc_trim(size_t); 
- 
-/* 
-  malloc_usable_size(void* p); 
- 
-  Returns the number of bytes you can actually use in 
-  an allocated chunk, which may be more than you requested (although 
-  often not) due to alignment and minimum size constraints. 
-  You can use this many bytes without worrying about 
-  overwriting other allocated objects. This is not a particularly great 
-  programming practice. malloc_usable_size can be more useful in 
-  debugging and assertions, for example: 
- 
-  p = malloc(n); 
-  assert(malloc_usable_size(p) >= 256); 
-*/ 
-size_t dlmalloc_usable_size(void*); 
- 
-/* 
-  malloc_stats(); 
-  Prints on stderr the amount of space obtained from the system (both 
-  via sbrk and mmap), the maximum amount (which may be more than 
-  current if malloc_trim and/or munmap got called), and the current 
-  number of bytes allocated via malloc (or realloc, etc) but not yet 
-  freed. Note that this is the number of bytes allocated, not the 
-  number requested. It will be larger than the number requested 
-  because of alignment and bookkeeping overhead. Because it includes 
-  alignment wastage as being in use, this figure may be greater than 
-  zero even when no user-level chunks are allocated. 
- 
-  The reported current and maximum system memory can be inaccurate if 
-  a program makes other calls to system memory allocation functions 
-  (normally sbrk) outside of malloc. 
- 
-  malloc_stats prints only the most commonly interesting statistics. 
-  More information can be obtained by calling mallinfo. 
-*/ 
-void  dlmalloc_stats(void); 
- 
-#endif /* ONLY_MSPACES */ 
- 
-#if MSPACES 
- 
-/* 
-  mspace is an opaque type representing an independent 
-  region of space that supports mspace_malloc, etc. 
-*/ 
-typedef void* mspace; 
- 
-/* 
-  create_mspace creates and returns a new independent space with the 
-  given initial capacity, or, if 0, the default granularity size.  It 
-  returns null if there is no system memory available to create the 
-  space.  If argument locked is non-zero, the space uses a separate 
-  lock to control access. The capacity of the space will grow 
-  dynamically as needed to service mspace_malloc requests.  You can 
-  control the sizes of incremental increases of this space by 
-  compiling with a different DEFAULT_GRANULARITY or dynamically 
-  setting with mallopt(M_GRANULARITY, value). 
-*/ 
-mspace create_mspace(size_t capacity, int locked); 
- 
-/* 
-  destroy_mspace destroys the given space, and attempts to return all 
-  of its memory back to the system, returning the total number of 
-  bytes freed. After destruction, the results of access to all memory 
-  used by the space become undefined. 
-*/ 
-size_t destroy_mspace(mspace msp); 
- 
-/* 
-  create_mspace_with_base uses the memory supplied as the initial base 
-  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this 
-  space is used for bookkeeping, so the capacity must be at least this 
-  large. (Otherwise 0 is returned.) When this initial space is 
-  exhausted, additional memory will be obtained from the system. 
-  Destroying this space will deallocate all additionally allocated 
-  space (if possible) but not the initial base. 
-*/ 
-mspace create_mspace_with_base(void* base, size_t capacity, int locked); 
- 
-/* 
-  mspace_malloc behaves as malloc, but operates within 
-  the given space. 
-*/ 
-void* mspace_malloc(mspace msp, size_t bytes); 
- 
-/* 
-  mspace_free behaves as free, but operates within 
-  the given space. 
- 
-  If compiled with FOOTERS==1, mspace_free is not actually needed. 
-  free may be called instead of mspace_free because freed chunks from 
-  any space are handled by their originating spaces. 
-*/ 
-void mspace_free(mspace msp, void* mem); 
- 
-/* 
-  mspace_realloc behaves as realloc, but operates within 
-  the given space. 
- 
-  If compiled with FOOTERS==1, mspace_realloc is not actually 
-  needed.  realloc may be called instead of mspace_realloc because 
-  realloced chunks from any space are handled by their originating 
-  spaces. 
-*/ 
-void* mspace_realloc(mspace msp, void* mem, size_t newsize); 
- 
-/* 
-  mspace_calloc behaves as calloc, but operates within 
-  the given space. 
-*/ 
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); 
- 
-/* 
-  mspace_memalign behaves as memalign, but operates within 
-  the given space. 
-*/ 
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); 
- 
-/* 
-  mspace_independent_calloc behaves as independent_calloc, but 
-  operates within the given space. 
-*/ 
-void** mspace_independent_calloc(mspace msp, size_t n_elements, 
-                                 size_t elem_size, void* chunks[]); 
- 
-/* 
-  mspace_independent_comalloc behaves as independent_comalloc, but 
-  operates within the given space. 
-*/ 
-void** mspace_independent_comalloc(mspace msp, size_t n_elements, 
-                                   size_t sizes[], void* chunks[]); 
- 
-/* 
-  mspace_footprint() returns the number of bytes obtained from the 
-  system for this space. 
-*/ 
-size_t mspace_footprint(mspace msp); 
- 
-/* 
-  mspace_max_footprint() returns the peak number of bytes obtained from the 
-  system for this space. 
-*/ 
-size_t mspace_max_footprint(mspace msp); 
- 
- 
-#if !NO_MALLINFO 
-/* 
-  mspace_mallinfo behaves as mallinfo, but reports properties of 
-  the given space. 
-*/ 
-struct mallinfo mspace_mallinfo(mspace msp); 
-#endif /* NO_MALLINFO */ 
- 
-/* 
-  mspace_malloc_stats behaves as malloc_stats, but reports 
-  properties of the given space. 
-*/ 
-void mspace_malloc_stats(mspace msp); 
- 
-/* 
-  mspace_trim behaves as malloc_trim, but 
-  operates within the given space. 
-*/ 
-int mspace_trim(mspace msp, size_t pad); 
- 
-/* 
-  An alias for mallopt. 
-*/ 
-int mspace_mallopt(int, int); 
- 
-#endif /* MSPACES */ 
- 
-#ifdef __cplusplus 
-};  /* end of extern "C" */ 
-#endif /* __cplusplus */ 
- 
-/* 
-  ======================================================================== 
-  To make a fully customizable malloc.h header file, cut everything 
-  above this line, put into file malloc.h, edit to suit, and #include it 
-  on the next line, as well as in programs that use this malloc. 
-  ======================================================================== 
-*/ 
- 
-/* #include "malloc.h" */ 
- 
-/*------------------------------ internal #includes ---------------------- */ 
- 
-#ifdef _MSC_VER 
-#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ 
-#endif /* _MSC_VER */ 
- 
-#include <stdio.h>       /* for printing in malloc_stats */ 
- 
-#ifndef LACKS_ERRNO_H 
-#include <errno.h>       /* for MALLOC_FAILURE_ACTION */ 
-#endif /* LACKS_ERRNO_H */ 
-#if FOOTERS 
-#include <time.h>        /* for magic initialization */ 
-#endif /* FOOTERS */ 
-#ifndef LACKS_STDLIB_H 
-#include <stdlib.h>      /* for abort() */ 
-#endif /* LACKS_STDLIB_H */ 
-#ifdef DEBUG 
-#if ABORT_ON_ASSERT_FAILURE 
-#define assert(x) if(!(x)) ABORT 
-#else /* ABORT_ON_ASSERT_FAILURE */ 
-#include <assert.h> 
-#endif /* ABORT_ON_ASSERT_FAILURE */ 
-#else  /* DEBUG */ 
-#define assert(x) 
-#endif /* DEBUG */ 
-#ifndef LACKS_STRING_H 
-#include <string.h>      /* for memset etc */ 
-#endif  /* LACKS_STRING_H */ 
-#if USE_BUILTIN_FFS 
-#ifndef LACKS_STRINGS_H 
-#include <strings.h>     /* for ffs */ 
-#endif /* LACKS_STRINGS_H */ 
-#endif /* USE_BUILTIN_FFS */ 
-#if HAVE_MMAP 
-#ifndef LACKS_SYS_MMAN_H 
-#include <sys/mman.h>    /* for mmap */ 
-#endif /* LACKS_SYS_MMAN_H */ 
-#ifndef LACKS_FCNTL_H 
-#include <fcntl.h> 
-#endif /* LACKS_FCNTL_H */ 
-#endif /* HAVE_MMAP */ 
-#if HAVE_MORECORE 
-#ifndef LACKS_UNISTD_H 
-#include <unistd.h>     /* for sbrk */ 
-#else /* LACKS_UNISTD_H */ 
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) 
-extern void*     sbrk(ptrdiff_t); 
-#endif /* FreeBSD etc */ 
-#endif /* LACKS_UNISTD_H */ 
-#endif /* HAVE_MMAP */ 
- 
-#ifndef WIN32 
-#ifndef malloc_getpagesize 
-#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */ 
-#    ifndef _SC_PAGE_SIZE 
-#      define _SC_PAGE_SIZE _SC_PAGESIZE 
-#    endif 
-#  endif 
-#  ifdef _SC_PAGE_SIZE 
-#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE) 
-#  else 
-#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) 
-       extern size_t getpagesize(); 
-#      define malloc_getpagesize getpagesize() 
-#    else 
-#      ifdef WIN32 /* use supplied emulation of getpagesize */ 
-#        define malloc_getpagesize getpagesize() 
-#      else 
-#        ifndef LACKS_SYS_PARAM_H 
-#          include <sys/param.h> 
-#        endif 
-#        ifdef EXEC_PAGESIZE 
-#          define malloc_getpagesize EXEC_PAGESIZE 
-#        else 
-#          ifdef NBPG 
-#            ifndef CLSIZE 
-#              define malloc_getpagesize NBPG 
-#            else 
-#              define malloc_getpagesize (NBPG * CLSIZE) 
-#            endif 
-#          else 
-#            ifdef NBPC 
-#              define malloc_getpagesize NBPC 
-#            else 
-#              ifdef PAGESIZE 
-#                define malloc_getpagesize PAGESIZE 
-#              else /* just guess */ 
-#                define malloc_getpagesize ((size_t)4096U) 
-#              endif 
-#            endif 
-#          endif 
-#        endif 
-#      endif 
-#    endif 
-#  endif 
-#endif 
-#endif 
- 
-/* ------------------- size_t and alignment properties -------------------- */ 
- 
-/* The byte and bit size of a size_t */ 
-#define SIZE_T_SIZE         (sizeof(size_t)) 
-#define SIZE_T_BITSIZE      (sizeof(size_t) << 3) 
- 
-/* Some constants coerced to size_t */ 
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+
+/* HP-UX's stdlib.h redefines mallinfo unless _STRUCT_MALLINFO is defined */
+#define _STRUCT_MALLINFO
+
+struct mallinfo {
+  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
+  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
+  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
+  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
+  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
+  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
+  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
+  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+  MALLINFO_FIELD_TYPE fordblks; /* total free space */
+  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc               calloc
+#define dlfree                 free
+#define dlmalloc               malloc
+#define dlmemalign             memalign
+#define dlrealloc              realloc
+#define dlvalloc               valloc
+#define dlpvalloc              pvalloc
+#define dlmallinfo             mallinfo
+#define dlmallopt              mallopt
+#define dlmalloc_trim          malloc_trim
+#define dlmalloc_stats         malloc_stats
+#define dlmalloc_usable_size   malloc_usable_size
+#define dlmalloc_footprint     malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlindependent_calloc   independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#endif /* USE_DL_PREFIX */
+
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available, in which case errno is set to ENOMEM
+  on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+  systems.)  Note that size_t is an unsigned type, so calls with
+  arguments that would be negative if signed are interpreted as
+  requests for huge amounts of space, which will often fail. The
+  maximum supported value of n differs across systems, but is in all
+  cases less than the maximum representable value of a size_t.
+*/
+void* dlmalloc(size_t);
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. If p was not malloced or already
+  freed, free(p) will by default cause the current program to abort.
+*/
+void  dlfree(void*);
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void* dlcalloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p in most cases when possible, otherwise it
+  employs the equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  realloc with a size
+  argument of zero (re)allocates a minimum-sized chunk.
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+
+void* dlrealloc(void*, size_t);
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void* dlmemalign(size_t, size_t);
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void* dlvalloc(size_t);
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  None of these are use in this malloc,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #  default    allowed param values
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (MAX_SIZE_T disables)
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+*/
+int dlmallopt(int, int);
+
+/*
+  malloc_footprint();
+  Returns the number of bytes obtained from the system.  The total
+  number of bytes allocated by malloc, realloc etc., is less than this
+  value. Unlike mallinfo, this function returns only a precomputed
+  result, so can be called frequently to monitor memory consumption.
+  Even if locks are otherwise defined, this function does not use them,
+  so results might not be up to date.
+*/
+size_t dlmalloc_footprint(void);
+
+/*
+  malloc_max_footprint();
+  Returns the maximum number of bytes obtained from the system. This
+  value will be greater than current footprint if deallocated space
+  has been reclaimed by the system. The peak number of bytes allocated
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
+  this function returns only a precomputed result, so can be called
+  frequently to monitor memory consumption.  Even if locks are
+  otherwise defined, this function does not use them, so results might
+  not be up to date.
+*/
+size_t dlmalloc_max_footprint(void);
+
+#if !NO_MALLINFO
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    always zero.
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   always zero
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use regular calloc and assign pointers into this
+  space to represent elements.  (In this case though, you cannot
+  independently free elements.)
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use a single regular malloc, and assign pointers at
+  particular offsets in the aggregate space. (In this case though, you
+  cannot independently free elements.)
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+void** dlindependent_comalloc(size_t, size_t*, void**);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  dlpvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative arguments
+  to sbrk) if there is unused memory at the `high' end of the malloc
+  pool or in unused MMAP segments. You can call this after freeing
+  large blocks of memory to potentially reduce the system-level memory
+  requirements of a program. However, it cannot guarantee to reduce
+  memory. Under some allocation patterns, some large free blocks of
+  memory will be locked between two used chunks, so they cannot be
+  given back to the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero, only
+  the minimum amount of memory to maintain internal data structures
+  will be left. Non-zero arguments can be supplied to maintain enough
+  trailing space to service future expected allocations without having
+  to re-obtain memory from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+int  dlmalloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+void  dlmalloc_stats(void);
+
+#endif /* ONLY_MSPACES */
+
+#if MSPACES
+
+/*
+  mspace is an opaque type representing an independent
+  region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+  create_mspace creates and returns a new independent space with the
+  given initial capacity, or, if 0, the default granularity size.  It
+  returns null if there is no system memory available to create the
+  space.  If argument locked is non-zero, the space uses a separate
+  lock to control access. The capacity of the space will grow
+  dynamically as needed to service mspace_malloc requests.  You can
+  control the sizes of incremental increases of this space by
+  compiling with a different DEFAULT_GRANULARITY or dynamically
+  setting with mallopt(M_GRANULARITY, value).
+*/
+mspace create_mspace(size_t capacity, int locked);
+
+/*
+  destroy_mspace destroys the given space, and attempts to return all
+  of its memory back to the system, returning the total number of
+  bytes freed. After destruction, the results of access to all memory
+  used by the space become undefined.
+*/
+size_t destroy_mspace(mspace msp);
+
+/*
+  create_mspace_with_base uses the memory supplied as the initial base
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+  space is used for bookkeeping, so the capacity must be at least this
+  large. (Otherwise 0 is returned.) When this initial space is
+  exhausted, additional memory will be obtained from the system.
+  Destroying this space will deallocate all additionally allocated
+  space (if possible) but not the initial base.
+*/
+mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+  mspace_malloc behaves as malloc, but operates within
+  the given space.
+*/
+void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+  mspace_free behaves as free, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
+  free may be called instead of mspace_free because freed chunks from
+  any space are handled by their originating spaces.
+*/
+void mspace_free(mspace msp, void* mem);
+
+/*
+  mspace_realloc behaves as realloc, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_realloc is not actually
+  needed.  realloc may be called instead of mspace_realloc because
+  realloced chunks from any space are handled by their originating
+  spaces.
+*/
+void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+  mspace_calloc behaves as calloc, but operates within
+  the given space.
+*/
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+  mspace_memalign behaves as memalign, but operates within
+  the given space.
+*/
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+  mspace_independent_calloc behaves as independent_calloc, but
+  operates within the given space.
+*/
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]);
+
+/*
+  mspace_independent_comalloc behaves as independent_comalloc, but
+  operates within the given space.
+*/
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]);
+
+/*
+  mspace_footprint() returns the number of bytes obtained from the
+  system for this space.
+*/
+size_t mspace_footprint(mspace msp);
+
+/*
+  mspace_max_footprint() returns the peak number of bytes obtained from the
+  system for this space.
+*/
+size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+  mspace_mallinfo behaves as mallinfo, but reports properties of
+  the given space.
+*/
+struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+  mspace_malloc_stats behaves as malloc_stats, but reports
+  properties of the given space.
+*/
+void mspace_malloc_stats(mspace msp);
+
+/*
+  mspace_trim behaves as malloc_trim, but
+  operates within the given space.
+*/
+int mspace_trim(mspace msp, size_t pad);
+
+/*
+  An alias for mallopt.
+*/
+int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+#ifdef __cplusplus
+};  /* end of extern "C" */
+#endif /* __cplusplus */
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* _MSC_VER */
+
+#include <stdio.h>       /* for printing in malloc_stats */
+
+#ifndef LACKS_ERRNO_H
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#if FOOTERS
+#include <time.h>        /* for magic initialization */
+#endif /* FOOTERS */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h>      /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifdef DEBUG
+#if ABORT_ON_ASSERT_FAILURE
+#define assert(x) if(!(x)) ABORT
+#else /* ABORT_ON_ASSERT_FAILURE */
+#include <assert.h>
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#else  /* DEBUG */
+#define assert(x)
+#endif /* DEBUG */
+#ifndef LACKS_STRING_H
+#include <string.h>      /* for memset etc */
+#endif  /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h>     /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+#include <sys/mman.h>    /* for mmap */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#if HAVE_MORECORE
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>     /* for sbrk */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void*     sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+#endif /* HAVE_MMAP */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize()
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize ((size_t)4096U)
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+#endif
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE         (sizeof(size_t))
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
 /* Annoying but necessary to avoid errors on some platforms */
-#define SIZE_T_ZERO         ((size_t)0) 
-#define SIZE_T_ONE          ((size_t)1) 
-#define SIZE_T_TWO          ((size_t)2) 
-#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1) 
-#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2) 
-#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) 
-#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U) 
- 
-/* The bit mask value corresponding to MALLOC_ALIGNMENT */ 
-#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE) 
- 
-/* True if address a has acceptable alignment */ 
-#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) 
- 
-/* the number of bytes to offset an address to align it */ 
-#define align_offset(A)\ 
- ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ 
-  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) 
- 
-/* -------------------------- MMAP preliminaries ------------------------- */ 
- 
-/* 
-   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and 
-   checks to fail so compiler optimizer can delete code rather than 
-   using so many "#if"s. 
-*/ 
- 
- 
-/* MORECORE and MMAP must return MFAIL on failure */ 
-#define MFAIL                ((void*)(MAX_SIZE_T)) 
-#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */ 
- 
-#if !HAVE_MMAP 
-#define IS_MMAPPED_BIT       (SIZE_T_ZERO) 
-#define USE_MMAP_BIT         (SIZE_T_ZERO) 
-#define CALL_MMAP(s)         MFAIL 
-#define CALL_MUNMAP(a, s)    (-1) 
-#define DIRECT_MMAP(s)       MFAIL 
- 
-#else /* HAVE_MMAP */ 
-#define IS_MMAPPED_BIT       (SIZE_T_ONE) 
-#define USE_MMAP_BIT         (SIZE_T_ONE) 
- 
-#if !defined(WIN32) && !defined (__OS2__) 
-#define CALL_MUNMAP(a, s)    munmap((a), (s)) 
-#define MMAP_PROT            (PROT_READ|PROT_WRITE) 
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) 
-#define MAP_ANONYMOUS        MAP_ANON 
-#endif /* MAP_ANON */ 
-#ifdef MAP_ANONYMOUS 
-#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS) 
-#define CALL_MMAP(s)         mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) 
-#else /* MAP_ANONYMOUS */ 
-/* 
-   Nearly all versions of mmap support MAP_ANONYMOUS, so the following 
-   is unlikely to be needed, but is supplied just in case. 
-*/ 
-#define MMAP_FLAGS           (MAP_PRIVATE) 
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ 
-#define CALL_MMAP(s) ((dev_zero_fd < 0) ? \ 
-           (dev_zero_fd = open("/dev/zero", O_RDWR), \ 
-            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ 
-            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) 
-#endif /* MAP_ANONYMOUS */ 
- 
-#define DIRECT_MMAP(s)       CALL_MMAP(s) 
- 
-#elif defined(__OS2__) 
- 
-/* OS/2 MMAP via DosAllocMem */ 
-static void* os2mmap(size_t size) { 
-  void* ptr; 
-  if (DosAllocMem(&ptr, size, OBJ_ANY|PAG_COMMIT|PAG_READ|PAG_WRITE) && 
-      DosAllocMem(&ptr, size, PAG_COMMIT|PAG_READ|PAG_WRITE)) 
-    return MFAIL; 
-  return ptr; 
-} 
- 
-#define os2direct_mmap(n)     os2mmap(n) 
- 
-/* This function supports releasing coalesed segments */ 
-static int os2munmap(void* ptr, size_t size) { 
-  while (size) { 
-    ULONG ulSize = size; 
-    ULONG ulFlags = 0; 
-    if (DosQueryMem(ptr, &ulSize, &ulFlags) != 0) 
-      return -1; 
-    if ((ulFlags & PAG_BASE) == 0 ||(ulFlags & PAG_COMMIT) == 0 || 
-        ulSize > size) 
-      return -1; 
-    if (DosFreeMem(ptr) != 0) 
-      return -1; 
-    ptr = ( void * ) ( ( char * ) ptr + ulSize ); 
-    size -= ulSize; 
-  } 
-  return 0; 
-} 
- 
-#define CALL_MMAP(s)         os2mmap(s) 
-#define CALL_MUNMAP(a, s)    os2munmap((a), (s)) 
-#define DIRECT_MMAP(s)       os2direct_mmap(s) 
- 
-#else /* WIN32 */ 
- 
-/* Win32 MMAP via VirtualAlloc */ 
-static void* win32mmap(size_t size) { 
-  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE); 
-  return (ptr != 0)? ptr: MFAIL; 
-} 
- 
-/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 
-static void* win32direct_mmap(size_t size) { 
-  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, 
-                           PAGE_EXECUTE_READWRITE); 
-  return (ptr != 0)? ptr: MFAIL; 
-} 
- 
-/* This function supports releasing coalesed segments */ 
-static int win32munmap(void* ptr, size_t size) { 
-  MEMORY_BASIC_INFORMATION minfo; 
-  char* cptr = ptr; 
-  while (size) { 
-    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) 
-      return -1; 
-    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || 
-        minfo.State != MEM_COMMIT || minfo.RegionSize > size) 
-      return -1; 
-    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) 
-      return -1; 
-    cptr += minfo.RegionSize; 
-    size -= minfo.RegionSize; 
-  } 
-  return 0; 
-} 
- 
-#define CALL_MMAP(s)         win32mmap(s) 
-#define CALL_MUNMAP(a, s)    win32munmap((a), (s)) 
-#define DIRECT_MMAP(s)       win32direct_mmap(s) 
-#endif /* WIN32 */ 
-#endif /* HAVE_MMAP */ 
- 
-#if HAVE_MMAP && HAVE_MREMAP 
-#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) 
-#else  /* HAVE_MMAP && HAVE_MREMAP */ 
-#define CALL_MREMAP(addr, osz, nsz, mv) MFAIL 
-#endif /* HAVE_MMAP && HAVE_MREMAP */ 
- 
-#if HAVE_MORECORE 
-#define CALL_MORECORE(S)     MORECORE(S) 
-#else  /* HAVE_MORECORE */ 
-#define CALL_MORECORE(S)     MFAIL 
-#endif /* HAVE_MORECORE */ 
- 
+#define SIZE_T_ZERO         ((size_t)0)
+#define SIZE_T_ONE          ((size_t)1)
+#define SIZE_T_TWO          ((size_t)2)
+#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* True if address a has acceptable alignment */
+#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+   checks to fail so compiler optimizer can delete code rather than
+   using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL                ((void*)(MAX_SIZE_T))
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
+
+#if !HAVE_MMAP
+#define IS_MMAPPED_BIT       (SIZE_T_ZERO)
+#define USE_MMAP_BIT         (SIZE_T_ZERO)
+#define CALL_MMAP(s)         MFAIL
+#define CALL_MUNMAP(a, s)    (-1)
+#define DIRECT_MMAP(s)       MFAIL
+
+#else /* HAVE_MMAP */
+#define IS_MMAPPED_BIT       (SIZE_T_ONE)
+#define USE_MMAP_BIT         (SIZE_T_ONE)
+
+#if !defined(WIN32) && !defined (__OS2__)
+#define CALL_MUNMAP(a, s)    munmap((a), (s))
+#define MMAP_PROT            (PROT_READ|PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS        MAP_ANON
+#endif /* MAP_ANON */
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
+#define CALL_MMAP(s)         mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+   is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS           (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+#define CALL_MMAP(s) ((dev_zero_fd < 0) ? \
+           (dev_zero_fd = open("/dev/zero", O_RDWR), \
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP(s)       CALL_MMAP(s)
+
+#elif defined(__OS2__)
+
+/* OS/2 MMAP via DosAllocMem */
+static void* os2mmap(size_t size) {
+  void* ptr;
+  if (DosAllocMem(&ptr, size, OBJ_ANY|PAG_COMMIT|PAG_READ|PAG_WRITE) &&
+      DosAllocMem(&ptr, size, PAG_COMMIT|PAG_READ|PAG_WRITE))
+    return MFAIL;
+  return ptr;
+}
+
+#define os2direct_mmap(n)     os2mmap(n)
+
+/* This function supports releasing coalesed segments */
+static int os2munmap(void* ptr, size_t size) {
+  while (size) {
+    ULONG ulSize = size;
+    ULONG ulFlags = 0;
+    if (DosQueryMem(ptr, &ulSize, &ulFlags) != 0)
+      return -1;
+    if ((ulFlags & PAG_BASE) == 0 ||(ulFlags & PAG_COMMIT) == 0 ||
+        ulSize > size)
+      return -1;
+    if (DosFreeMem(ptr) != 0)
+      return -1;
+    ptr = ( void * ) ( ( char * ) ptr + ulSize );
+    size -= ulSize;
+  }
+  return 0;
+}
+
+#define CALL_MMAP(s)         os2mmap(s)
+#define CALL_MUNMAP(a, s)    os2munmap((a), (s))
+#define DIRECT_MMAP(s)       os2direct_mmap(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+static void* win32mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static void* win32direct_mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+                           PAGE_EXECUTE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static int win32munmap(void* ptr, size_t size) {
+  MEMORY_BASIC_INFORMATION minfo;
+  char* cptr = ptr;
+  while (size) {
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+      return -1;
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+      return -1;
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+      return -1;
+    cptr += minfo.RegionSize;
+    size -= minfo.RegionSize;
+  }
+  return 0;
+}
+
+#define CALL_MMAP(s)         win32mmap(s)
+#define CALL_MUNMAP(a, s)    win32munmap((a), (s))
+#define DIRECT_MMAP(s)       win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MMAP && HAVE_MREMAP
+#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#else  /* HAVE_MMAP && HAVE_MREMAP */
+#define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+#if HAVE_MORECORE
+#define CALL_MORECORE(S)     MORECORE(S)
+#else  /* HAVE_MORECORE */
+#define CALL_MORECORE(S)     MFAIL
+#endif /* HAVE_MORECORE */
+
 /* mstate bit set if contiguous morecore disabled or failed */
-#define USE_NONCONTIGUOUS_BIT (4U) 
- 
-/* segment bit set in create_mspace_with_base */ 
-#define EXTERN_BIT            (8U) 
- 
- 
-/* --------------------------- Lock preliminaries ------------------------ */ 
- 
-#if USE_LOCKS 
- 
-/* 
-  When locks are defined, there are up to two global locks: 
- 
-  * If HAVE_MORECORE, morecore_mutex protects sequences of calls to 
-    MORECORE.  In many cases sys_alloc requires two calls, that should 
-    not be interleaved with calls by other threads.  This does not 
-    protect against direct calls to MORECORE by other threads not 
-    using this lock, so there is still code to cope the best we can on 
-    interference. 
- 
-  * magic_init_mutex ensures that mparams.magic and other 
-    unique mparams values are initialized only once. 
-*/ 
- 
-#if !defined(WIN32) && !defined(__OS2__) 
-/* By default use posix locks */ 
-#include <pthread.h> 
-#define MLOCK_T pthread_mutex_t 
-#define INITIAL_LOCK(l)      pthread_mutex_init(l, NULL) 
-#define ACQUIRE_LOCK(l)      pthread_mutex_lock(l) 
-#define RELEASE_LOCK(l)      pthread_mutex_unlock(l) 
- 
-#if HAVE_MORECORE 
-static MLOCK_T morecore_mutex = PTHREAD_MUTEX_INITIALIZER; 
-#endif /* HAVE_MORECORE */ 
- 
-static MLOCK_T magic_init_mutex = PTHREAD_MUTEX_INITIALIZER; 
- 
-#elif defined(__OS2__) 
-#define MLOCK_T HMTX 
-#define INITIAL_LOCK(l)      DosCreateMutexSem(0, l, 0, FALSE) 
-#define ACQUIRE_LOCK(l)      DosRequestMutexSem(*l, SEM_INDEFINITE_WAIT) 
-#define RELEASE_LOCK(l)      DosReleaseMutexSem(*l) 
-#if HAVE_MORECORE 
-static MLOCK_T morecore_mutex; 
-#endif /* HAVE_MORECORE */ 
-static MLOCK_T magic_init_mutex; 
- 
-#else /* WIN32 */ 
-/* 
-   Because lock-protected regions have bounded times, and there 
-   are no recursive lock calls, we can use simple spinlocks. 
-*/ 
- 
-#define MLOCK_T long 
-static int win32_acquire_lock (MLOCK_T *sl) { 
-  for (;;) { 
-#ifdef InterlockedCompareExchangePointer 
-    if (!InterlockedCompareExchange(sl, 1, 0)) 
-      return 0; 
-#else  /* Use older void* version */ 
-    if (!InterlockedCompareExchange((void**)sl, (void*)1, (void*)0)) 
-      return 0; 
-#endif /* InterlockedCompareExchangePointer */ 
-    Sleep (0); 
-  } 
-} 
- 
-static void win32_release_lock (MLOCK_T *sl) { 
-  InterlockedExchange (sl, 0); 
-} 
- 
-#define INITIAL_LOCK(l)      *(l)=0 
-#define ACQUIRE_LOCK(l)      win32_acquire_lock(l) 
-#define RELEASE_LOCK(l)      win32_release_lock(l) 
-#if HAVE_MORECORE 
-static MLOCK_T morecore_mutex; 
-#endif /* HAVE_MORECORE */ 
-static MLOCK_T magic_init_mutex; 
-#endif /* WIN32 */ 
- 
-#define USE_LOCK_BIT               (2U) 
-#else  /* USE_LOCKS */ 
-#define USE_LOCK_BIT               (0U) 
-#define INITIAL_LOCK(l) 
-#endif /* USE_LOCKS */ 
- 
-#if USE_LOCKS && HAVE_MORECORE 
-#define ACQUIRE_MORECORE_LOCK()    ACQUIRE_LOCK(&morecore_mutex); 
-#define RELEASE_MORECORE_LOCK()    RELEASE_LOCK(&morecore_mutex); 
-#else /* USE_LOCKS && HAVE_MORECORE */ 
-#define ACQUIRE_MORECORE_LOCK() 
-#define RELEASE_MORECORE_LOCK() 
-#endif /* USE_LOCKS && HAVE_MORECORE */ 
- 
-#if USE_LOCKS 
-#define ACQUIRE_MAGIC_INIT_LOCK()  ACQUIRE_LOCK(&magic_init_mutex); 
-#define RELEASE_MAGIC_INIT_LOCK()  RELEASE_LOCK(&magic_init_mutex); 
-#else  /* USE_LOCKS */ 
-#define ACQUIRE_MAGIC_INIT_LOCK() 
-#define RELEASE_MAGIC_INIT_LOCK() 
-#endif /* USE_LOCKS */ 
- 
- 
-/* -----------------------  Chunk representations ------------------------ */ 
- 
-/* 
-  (The following includes lightly edited explanations by Colin Plumb.) 
- 
-  The malloc_chunk declaration below is misleading (but accurate and 
-  necessary).  It declares a "view" into memory allowing access to 
-  necessary fields at known offsets from a given base. 
- 
-  Chunks of memory are maintained using a `boundary tag' method as 
-  originally described by Knuth.  (See the paper by Paul Wilson 
-  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such 
-  techniques.)  Sizes of free chunks are stored both in the front of 
-  each chunk and at the end.  This makes consolidating fragmented 
-  chunks into bigger chunks fast.  The head fields also hold bits 
-  representing whether chunks are free or in use. 
- 
-  Here are some pictures to make it clearer.  They are "exploded" to 
-  show that the state of a chunk can be thought of as extending from 
-  the high 31 bits of the head field of its header through the 
-  prev_foot and PINUSE_BIT bit of the following chunk header. 
- 
-  A chunk that's in use looks like: 
- 
-   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-           | Size of previous chunk (if P = 1)                             | 
-           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| 
-         | Size of this chunk                                         1| +-+ 
-   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         |                                                               | 
-         +-                                                             -+ 
-         |                                                               | 
-         +-                                                             -+ 
-         |                                                               : 
-         +-      size - sizeof(size_t) available payload bytes          -+ 
-         :                                                               | 
- chunk-> +-                                                             -+ 
-         |                                                               | 
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| 
-       | Size of next chunk (may or may not be in use)               | +-+ 
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
- 
-    And if it's free, it looks like this: 
- 
-   chunk-> +-                                                             -+ 
-           | User payload (must be in use, or we would have merged!)       | 
-           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| 
-         | Size of this chunk                                         0| +-+ 
-   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         | Next pointer                                                  | 
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         | Prev pointer                                                  | 
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         |                                                               : 
-         +-      size - sizeof(struct chunk) unused bytes               -+ 
-         :                                                               | 
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-         | Size of this chunk                                            | 
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| 
-       | Size of next chunk (must be in use, or we would have merged)| +-+ 
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-       |                                                               : 
-       +- User payload                                                -+ 
-       :                                                               | 
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-                                                                     |0| 
-                                                                     +-+ 
-  Note that since we always merge adjacent free chunks, the chunks 
-  adjacent to a free chunk must be in use. 
- 
-  Given a pointer to a chunk (which can be derived trivially from the 
-  payload pointer) we can, in O(1) time, find out whether the adjacent 
-  chunks are free, and if so, unlink them from the lists that they 
-  are on and merge them with the current chunk. 
- 
-  Chunks always begin on even word boundaries, so the mem portion 
-  (which is returned to the user) is also on an even word boundary, and 
-  thus at least double-word aligned. 
- 
-  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the 
-  chunk size (which is always a multiple of two words), is an in-use 
-  bit for the *previous* chunk.  If that bit is *clear*, then the 
-  word before the current chunk size contains the previous chunk 
-  size, and can be used to find the front of the previous chunk. 
-  The very first chunk allocated always has this bit set, preventing 
-  access to non-existent (or non-owned) memory. If pinuse is set for 
-  any given chunk, then you CANNOT determine the size of the 
-  previous chunk, and might even get a memory addressing fault when 
-  trying to do so. 
- 
-  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of 
-  the chunk size redundantly records whether the current chunk is 
-  inuse. This redundancy enables usage checks within free and realloc, 
-  and reduces indirection when freeing and consolidating chunks. 
- 
-  Each freshly allocated chunk must have both cinuse and pinuse set. 
-  That is, each allocated chunk borders either a previously allocated 
-  and still in-use chunk, or the base of its memory arena. This is 
-  ensured by making all allocations from the the `lowest' part of any 
-  found chunk.  Further, no free chunk physically borders another one, 
-  so each free chunk is known to be preceded and followed by either 
-  inuse chunks or the ends of memory. 
- 
-  Note that the `foot' of the current chunk is actually represented 
-  as the prev_foot of the NEXT chunk. This makes it easier to 
-  deal with alignments etc but can be very confusing when trying 
-  to extend or adapt this code. 
- 
-  The exceptions to all this are 
- 
-     1. The special chunk `top' is the top-most available chunk (i.e., 
-        the one bordering the end of available memory). It is treated 
-        specially.  Top is never included in any bin, is used only if 
-        no other chunk is available, and is released back to the 
-        system if it is very large (see M_TRIM_THRESHOLD).  In effect, 
-        the top chunk is treated as larger (and thus less well 
-        fitting) than any other available chunk.  The top chunk 
-        doesn't update its trailing size field since there is no next 
-        contiguous chunk that would have to index off it. However, 
-        space is still allocated for it (TOP_FOOT_SIZE) to enable 
-        separation or merging when space is extended. 
- 
-     3. Chunks allocated via mmap, which have the lowest-order bit 
-        (IS_MMAPPED_BIT) set in their prev_foot fields, and do not set 
-        PINUSE_BIT in their head fields.  Because they are allocated 
-        one-by-one, each must carry its own prev_foot field, which is 
-        also used to hold the offset this chunk has within its mmapped 
-        region, which is needed to preserve alignment. Each mmapped 
-        chunk is trailed by the first two fields of a fake next-chunk 
-        for sake of usage checks. 
- 
-*/ 
- 
-struct malloc_chunk { 
-  size_t               prev_foot;  /* Size of previous chunk (if free).  */ 
-  size_t               head;       /* Size and inuse bits. */ 
-  struct malloc_chunk* fd;         /* double links -- used only if free. */ 
-  struct malloc_chunk* bk; 
-}; 
- 
-typedef struct malloc_chunk  mchunk; 
-typedef struct malloc_chunk* mchunkptr; 
-typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */ 
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT            (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+#if USE_LOCKS
+
+/*
+  When locks are defined, there are up to two global locks:
+
+  * If HAVE_MORECORE, morecore_mutex protects sequences of calls to
+    MORECORE.  In many cases sys_alloc requires two calls, that should
+    not be interleaved with calls by other threads.  This does not
+    protect against direct calls to MORECORE by other threads not
+    using this lock, so there is still code to cope the best we can on
+    interference.
+
+  * magic_init_mutex ensures that mparams.magic and other
+    unique mparams values are initialized only once.
+*/
+
+#if !defined(WIN32) && !defined(__OS2__)
+/* By default use posix locks */
+#include <pthread.h>
+#define MLOCK_T pthread_mutex_t
+#define INITIAL_LOCK(l)      pthread_mutex_init(l, NULL)
+#define ACQUIRE_LOCK(l)      pthread_mutex_lock(l)
+#define RELEASE_LOCK(l)      pthread_mutex_unlock(l)
+
+#if HAVE_MORECORE
+static MLOCK_T morecore_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif /* HAVE_MORECORE */
+
+static MLOCK_T magic_init_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#elif defined(__OS2__)
+#define MLOCK_T HMTX
+#define INITIAL_LOCK(l)      DosCreateMutexSem(0, l, 0, FALSE)
+#define ACQUIRE_LOCK(l)      DosRequestMutexSem(*l, SEM_INDEFINITE_WAIT)
+#define RELEASE_LOCK(l)      DosReleaseMutexSem(*l)
+#if HAVE_MORECORE
+static MLOCK_T morecore_mutex;
+#endif /* HAVE_MORECORE */
+static MLOCK_T magic_init_mutex;
+
+#else /* WIN32 */
+/*
+   Because lock-protected regions have bounded times, and there
+   are no recursive lock calls, we can use simple spinlocks.
+*/
+
+#define MLOCK_T long
+static int win32_acquire_lock (MLOCK_T *sl) {
+  for (;;) {
+#ifdef InterlockedCompareExchangePointer
+    if (!InterlockedCompareExchange(sl, 1, 0))
+      return 0;
+#else  /* Use older void* version */
+    if (!InterlockedCompareExchange((void**)sl, (void*)1, (void*)0))
+      return 0;
+#endif /* InterlockedCompareExchangePointer */
+    Sleep (0);
+  }
+}
+
+static void win32_release_lock (MLOCK_T *sl) {
+  InterlockedExchange (sl, 0);
+}
+
+#define INITIAL_LOCK(l)      *(l)=0
+#define ACQUIRE_LOCK(l)      win32_acquire_lock(l)
+#define RELEASE_LOCK(l)      win32_release_lock(l)
+#if HAVE_MORECORE
+static MLOCK_T morecore_mutex;
+#endif /* HAVE_MORECORE */
+static MLOCK_T magic_init_mutex;
+#endif /* WIN32 */
+
+#define USE_LOCK_BIT               (2U)
+#else  /* USE_LOCKS */
+#define USE_LOCK_BIT               (0U)
+#define INITIAL_LOCK(l)
+#endif /* USE_LOCKS */
+
+#if USE_LOCKS && HAVE_MORECORE
+#define ACQUIRE_MORECORE_LOCK()    ACQUIRE_LOCK(&morecore_mutex);
+#define RELEASE_MORECORE_LOCK()    RELEASE_LOCK(&morecore_mutex);
+#else /* USE_LOCKS && HAVE_MORECORE */
+#define ACQUIRE_MORECORE_LOCK()
+#define RELEASE_MORECORE_LOCK()
+#endif /* USE_LOCKS && HAVE_MORECORE */
+
+#if USE_LOCKS
+#define ACQUIRE_MAGIC_INIT_LOCK()  ACQUIRE_LOCK(&magic_init_mutex);
+#define RELEASE_MAGIC_INIT_LOCK()  RELEASE_LOCK(&magic_init_mutex);
+#else  /* USE_LOCKS */
+#define ACQUIRE_MAGIC_INIT_LOCK()
+#define RELEASE_MAGIC_INIT_LOCK()
+#endif /* USE_LOCKS */
+
+
+/* -----------------------  Chunk representations ------------------------ */
+
+/*
+  (The following includes lightly edited explanations by Colin Plumb.)
+
+  The malloc_chunk declaration below is misleading (but accurate and
+  necessary).  It declares a "view" into memory allowing access to
+  necessary fields at known offsets from a given base.
+
+  Chunks of memory are maintained using a `boundary tag' method as
+  originally described by Knuth.  (See the paper by Paul Wilson
+  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+  techniques.)  Sizes of free chunks are stored both in the front of
+  each chunk and at the end.  This makes consolidating fragmented
+  chunks into bigger chunks fast.  The head fields also hold bits
+  representing whether chunks are free or in use.
+
+  Here are some pictures to make it clearer.  They are "exploded" to
+  show that the state of a chunk can be thought of as extending from
+  the high 31 bits of the head field of its header through the
+  prev_foot and PINUSE_BIT bit of the following chunk header.
+
+  A chunk that's in use looks like:
+
+   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+           | Size of previous chunk (if P = 1)                             |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         1| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               |
+         +-                                                             -+
+         |                                                               |
+         +-                                                             -+
+         |                                                               :
+         +-      size - sizeof(size_t) available payload bytes          -+
+         :                                                               |
+ chunk-> +-                                                             -+
+         |                                                               |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+       | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    And if it's free, it looks like this:
+
+   chunk-> +-                                                             -+
+           | User payload (must be in use, or we would have merged!)       |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         0| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Next pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Prev pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               :
+         +-      size - sizeof(struct chunk) unused bytes               -+
+         :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Size of this chunk                                            |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+       | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               :
+       +- User payload                                                -+
+       :                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                                                     |0|
+                                                                     +-+
+  Note that since we always merge adjacent free chunks, the chunks
+  adjacent to a free chunk must be in use.
+
+  Given a pointer to a chunk (which can be derived trivially from the
+  payload pointer) we can, in O(1) time, find out whether the adjacent
+  chunks are free, and if so, unlink them from the lists that they
+  are on and merge them with the current chunk.
+
+  Chunks always begin on even word boundaries, so the mem portion
+  (which is returned to the user) is also on an even word boundary, and
+  thus at least double-word aligned.
+
+  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+  chunk size (which is always a multiple of two words), is an in-use
+  bit for the *previous* chunk.  If that bit is *clear*, then the
+  word before the current chunk size contains the previous chunk
+  size, and can be used to find the front of the previous chunk.
+  The very first chunk allocated always has this bit set, preventing
+  access to non-existent (or non-owned) memory. If pinuse is set for
+  any given chunk, then you CANNOT determine the size of the
+  previous chunk, and might even get a memory addressing fault when
+  trying to do so.
+
+  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+  the chunk size redundantly records whether the current chunk is
+  inuse. This redundancy enables usage checks within free and realloc,
+  and reduces indirection when freeing and consolidating chunks.
+
+  Each freshly allocated chunk must have both cinuse and pinuse set.
+  That is, each allocated chunk borders either a previously allocated
+  and still in-use chunk, or the base of its memory arena. This is
+  ensured by making all allocations from the the `lowest' part of any
+  found chunk.  Further, no free chunk physically borders another one,
+  so each free chunk is known to be preceded and followed by either
+  inuse chunks or the ends of memory.
+
+  Note that the `foot' of the current chunk is actually represented
+  as the prev_foot of the NEXT chunk. This makes it easier to
+  deal with alignments etc but can be very confusing when trying
+  to extend or adapt this code.
+
+  The exceptions to all this are
+
+     1. The special chunk `top' is the top-most available chunk (i.e.,
+        the one bordering the end of available memory). It is treated
+        specially.  Top is never included in any bin, is used only if
+        no other chunk is available, and is released back to the
+        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+        the top chunk is treated as larger (and thus less well
+        fitting) than any other available chunk.  The top chunk
+        doesn't update its trailing size field since there is no next
+        contiguous chunk that would have to index off it. However,
+        space is still allocated for it (TOP_FOOT_SIZE) to enable
+        separation or merging when space is extended.
+
+     3. Chunks allocated via mmap, which have the lowest-order bit
+        (IS_MMAPPED_BIT) set in their prev_foot fields, and do not set
+        PINUSE_BIT in their head fields.  Because they are allocated
+        one-by-one, each must carry its own prev_foot field, which is
+        also used to hold the offset this chunk has within its mmapped
+        region, which is needed to preserve alignment. Each mmapped
+        chunk is trailed by the first two fields of a fake next-chunk
+        for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+  size_t               prev_foot;  /* Size of previous chunk (if free).  */
+  size_t               head;       /* Size and inuse bits. */
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+typedef struct malloc_chunk  mchunk;
+typedef struct malloc_chunk* mchunkptr;
+typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */
 typedef size_t bindex_t;               /* Described below */
-typedef unsigned int binmap_t;         /* Described below */ 
-typedef unsigned int flag_t;           /* The type of various bit flag sets */ 
- 
-/* ------------------- Chunks sizes and alignments ----------------------- */ 
- 
-#define MCHUNK_SIZE         (sizeof(mchunk)) 
- 
-#if FOOTERS 
-#define CHUNK_OVERHEAD      (TWO_SIZE_T_SIZES) 
-#else /* FOOTERS */ 
-#define CHUNK_OVERHEAD      (SIZE_T_SIZE) 
-#endif /* FOOTERS */ 
- 
-/* MMapped chunks need a second word of overhead ... */ 
-#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) 
-/* ... and additional padding for fake next-chunk at foot */ 
-#define MMAP_FOOT_PAD       (FOUR_SIZE_T_SIZES) 
- 
-/* The smallest size we can malloc is an aligned minimal chunk */ 
-#define MIN_CHUNK_SIZE\ 
-  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) 
- 
-/* conversion from malloc headers to user pointers, and back */ 
-#define chunk2mem(p)        ((void*)((char*)(p)       + TWO_SIZE_T_SIZES)) 
-#define mem2chunk(mem)      ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) 
-/* chunk associated with aligned address A */ 
-#define align_as_chunk(A)   (mchunkptr)((A) + align_offset(chunk2mem(A))) 
- 
-/* Bounds on request (not chunk) sizes. */ 
-#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2) 
-#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) 
- 
-/* pad request bytes into a usable size */ 
-#define pad_request(req) \ 
-   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) 
- 
-/* pad request, checking for minimum (but not maximum) */ 
-#define request2size(req) \ 
-  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) 
- 
- 
-/* ------------------ Operations on head and foot fields ----------------- */ 
- 
-/* 
-  The head field of a chunk is or'ed with PINUSE_BIT when previous 
-  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in 
-  use. If the chunk was obtained with mmap, the prev_foot field has 
-  IS_MMAPPED_BIT set, otherwise holding the offset of the base of the 
-  mmapped region to the base of the chunk. 
-*/ 
- 
-#define PINUSE_BIT          (SIZE_T_ONE) 
-#define CINUSE_BIT          (SIZE_T_TWO) 
-#define INUSE_BITS          (PINUSE_BIT|CINUSE_BIT) 
- 
-/* Head value for fenceposts */ 
-#define FENCEPOST_HEAD      (INUSE_BITS|SIZE_T_SIZE) 
- 
-/* extraction of fields from head words */ 
-#define cinuse(p)           ((p)->head & CINUSE_BIT) 
-#define pinuse(p)           ((p)->head & PINUSE_BIT) 
-#define chunksize(p)        ((p)->head & ~(INUSE_BITS)) 
- 
-#define clear_pinuse(p)     ((p)->head &= ~PINUSE_BIT) 
-#define clear_cinuse(p)     ((p)->head &= ~CINUSE_BIT) 
- 
-/* Treat space at ptr +/- offset as a chunk */ 
-#define chunk_plus_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s))) 
-#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) 
- 
-/* Ptr to next or previous physical malloc_chunk. */ 
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~INUSE_BITS))) 
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) 
- 
-/* extract next chunk's pinuse bit */ 
-#define next_pinuse(p)  ((next_chunk(p)->head) & PINUSE_BIT) 
- 
-/* Get/set size at footer */ 
-#define get_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot) 
-#define set_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) 
- 
-/* Set size, pinuse bit, and foot */ 
-#define set_size_and_pinuse_of_free_chunk(p, s)\ 
-  ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) 
- 
-/* Set size, pinuse bit, foot, and clear next pinuse */ 
-#define set_free_with_pinuse(p, s, n)\ 
-  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) 
- 
-#define is_mmapped(p)\ 
-  (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_MMAPPED_BIT)) 
- 
-/* Get the internal overhead associated with chunk p */ 
-#define overhead_for(p)\ 
- (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) 
- 
-/* Return true if malloced space is not necessarily cleared */ 
-#if MMAP_CLEARS 
-#define calloc_must_clear(p) (!is_mmapped(p)) 
-#else /* MMAP_CLEARS */ 
-#define calloc_must_clear(p) (1) 
-#endif /* MMAP_CLEARS */ 
- 
-/* ---------------------- Overlaid data structures ----------------------- */ 
- 
-/* 
-  When chunks are not in use, they are treated as nodes of either 
-  lists or trees. 
- 
-  "Small"  chunks are stored in circular doubly-linked lists, and look 
-  like this: 
- 
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Size of previous chunk                            | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    `head:' |             Size of chunk, in bytes                         |P| 
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Forward pointer to next chunk in list             | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Back pointer to previous chunk in list            | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Unused space (may be 0 bytes long)                . 
-            .                                                               . 
-            .                                                               | 
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    `foot:' |             Size of chunk, in bytes                           | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
- 
-  Larger chunks are kept in a form of bitwise digital trees (aka 
-  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for 
-  free chunks greater than 256 bytes, their size doesn't impose any 
-  constraints on user chunk sizes.  Each node looks like: 
- 
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Size of previous chunk                            | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    `head:' |             Size of chunk, in bytes                         |P| 
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Forward pointer to next chunk of same size        | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Back pointer to previous chunk of same size       | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Pointer to left child (child[0])                  | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Pointer to right child (child[1])                 | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Pointer to parent                                 | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             bin index of this chunk                           | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-            |             Unused space                                      . 
-            .                                                               | 
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    `foot:' |             Size of chunk, in bytes                           | 
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
- 
-  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks 
-  of the same size are arranged in a circularly-linked list, with only 
-  the oldest chunk (the next to be used, in our FIFO ordering) 
-  actually in the tree.  (Tree members are distinguished by a non-null 
-  parent pointer.)  If a chunk with the same size an an existing node 
-  is inserted, it is linked off the existing node using pointers that 
-  work in the same way as fd/bk pointers of small chunks. 
- 
-  Each tree contains a power of 2 sized range of chunk sizes (the 
-  smallest is 0x100 <= x < 0x180), which is is divided in half at each 
-  tree level, with the chunks in the smaller half of the range (0x100 
-  <= x < 0x140 for the top nose) in the left subtree and the larger 
-  half (0x140 <= x < 0x180) in the right subtree.  This is, of course, 
-  done by inspecting individual bits. 
- 
-  Using these rules, each node's left subtree contains all smaller 
-  sizes than its right subtree.  However, the node at the root of each 
-  subtree has no particular ordering relationship to either.  (The 
-  dividing line between the subtree sizes is based on trie relation.) 
-  If we remove the last chunk of a given size from the interior of the 
-  tree, we need to replace it with a leaf node.  The tree ordering 
-  rules permit a node to be replaced by any leaf below it. 
- 
-  The smallest chunk in a tree (a common operation in a best-fit 
-  allocator) can be found by walking a path to the leftmost leaf in 
-  the tree.  Unlike a usual binary tree, where we follow left child 
-  pointers until we reach a null, here we follow the right child 
-  pointer any time the left one is null, until we reach a leaf with 
-  both child pointers null. The smallest chunk in the tree will be 
-  somewhere along that path. 
- 
-  The worst case number of steps to add, find, or remove a node is 
-  bounded by the number of bits differentiating chunks within 
-  bins. Under current bin calculations, this ranges from 6 up to 21 
-  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case 
-  is of course much better. 
-*/ 
- 
-struct malloc_tree_chunk { 
-  /* The first four fields must be compatible with malloc_chunk */ 
-  size_t                    prev_foot; 
-  size_t                    head; 
-  struct malloc_tree_chunk* fd; 
-  struct malloc_tree_chunk* bk; 
- 
-  struct malloc_tree_chunk* child[2]; 
-  struct malloc_tree_chunk* parent; 
-  bindex_t                  index; 
-}; 
- 
-typedef struct malloc_tree_chunk  tchunk; 
-typedef struct malloc_tree_chunk* tchunkptr; 
-typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ 
- 
-/* A little helper macro for trees */ 
-#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) 
- 
-/* ----------------------------- Segments -------------------------------- */ 
- 
-/* 
-  Each malloc space may include non-contiguous segments, held in a 
-  list headed by an embedded malloc_segment record representing the 
-  top-most space. Segments also include flags holding properties of 
-  the space. Large chunks that are directly allocated by mmap are not 
-  included in this list. They are instead independently created and 
-  destroyed without otherwise keeping track of them. 
- 
-  Segment management mainly comes into play for spaces allocated by 
-  MMAP.  Any call to MMAP might or might not return memory that is 
-  adjacent to an existing segment.  MORECORE normally contiguously 
-  extends the current space, so this space is almost always adjacent, 
-  which is simpler and faster to deal with. (This is why MORECORE is 
-  used preferentially to MMAP when both are available -- see 
-  sys_alloc.)  When allocating using MMAP, we don't use any of the 
-  hinting mechanisms (inconsistently) supported in various 
-  implementations of unix mmap, or distinguish reserving from 
-  committing memory. Instead, we just ask for space, and exploit 
-  contiguity when we get it.  It is probably possible to do 
-  better than this on some systems, but no general scheme seems 
-  to be significantly better. 
- 
-  Management entails a simpler variant of the consolidation scheme 
-  used for chunks to reduce fragmentation -- new adjacent memory is 
-  normally prepended or appended to an existing segment. However, 
-  there are limitations compared to chunk consolidation that mostly 
-  reflect the fact that segment processing is relatively infrequent 
-  (occurring only when getting memory from system) and that we 
-  don't expect to have huge numbers of segments: 
- 
-  * Segments are not indexed, so traversal requires linear scans.  (It 
-    would be possible to index these, but is not worth the extra 
-    overhead and complexity for most programs on most platforms.) 
-  * New segments are only appended to old ones when holding top-most 
-    memory; if they cannot be prepended to others, they are held in 
-    different segments. 
- 
-  Except for the top-most segment of an mstate, each segment record 
-  is kept at the tail of its segment. Segments are added by pushing 
-  segment records onto the list headed by &mstate.seg for the 
-  containing mstate. 
- 
-  Segment flags control allocation/merge/deallocation policies: 
-  * If EXTERN_BIT set, then we did not allocate this segment, 
-    and so should not try to deallocate or merge with others. 
-    (This currently holds only for the initial segment passed 
-    into create_mspace_with_base.) 
-  * If IS_MMAPPED_BIT set, the segment may be merged with 
-    other surrounding mmapped segments and trimmed/de-allocated 
-    using munmap. 
-  * If neither bit is set, then the segment was obtained using 
-    MORECORE so can be merged with surrounding MORECORE'd segments 
-    and deallocated/trimmed using MORECORE with negative arguments. 
-*/ 
- 
-struct malloc_segment { 
-  char*        base;             /* base address */ 
-  size_t       size;             /* allocated size */ 
-  struct malloc_segment* next;   /* ptr to next segment */ 
-#if FFI_MMAP_EXEC_WRIT 
-  /* The mmap magic is supposed to store the address of the executable 
-     segment at the very end of the requested block.  */ 
- 
-# define mmap_exec_offset(b,s) (*(ptrdiff_t*)((b)+(s)-sizeof(ptrdiff_t))) 
- 
-  /* We can only merge segments if their corresponding executable 
-     segments are at identical offsets.  */ 
-# define check_segment_merge(S,b,s) \ 
-  (mmap_exec_offset((b),(s)) == (S)->exec_offset) 
- 
-# define add_segment_exec_offset(p,S) ((char*)(p) + (S)->exec_offset) 
-# define sub_segment_exec_offset(p,S) ((char*)(p) - (S)->exec_offset) 
- 
-  /* The removal of sflags only works with HAVE_MORECORE == 0.  */ 
- 
-# define get_segment_flags(S)   (IS_MMAPPED_BIT) 
-# define set_segment_flags(S,v) \ 
-  (((v) != IS_MMAPPED_BIT) ? (ABORT, (v)) :				\ 
-   (((S)->exec_offset =							\ 
-     mmap_exec_offset((S)->base, (S)->size)),				\ 
-    (mmap_exec_offset((S)->base + (S)->exec_offset, (S)->size) !=	\ 
-     (S)->exec_offset) ? (ABORT, (v)) :					\ 
-   (mmap_exec_offset((S)->base, (S)->size) = 0), (v))) 
- 
-  /* We use an offset here, instead of a pointer, because then, when 
-     base changes, we don't have to modify this.  On architectures 
-     with segmented addresses, this might not work.  */ 
-  ptrdiff_t    exec_offset; 
-#else 
- 
-# define get_segment_flags(S)   ((S)->sflags) 
-# define set_segment_flags(S,v) ((S)->sflags = (v)) 
-# define check_segment_merge(S,b,s) (1) 
- 
-  flag_t       sflags;           /* mmap and extern flag */ 
-#endif 
-}; 
- 
-#define is_mmapped_segment(S)  (get_segment_flags(S) & IS_MMAPPED_BIT) 
-#define is_extern_segment(S)   (get_segment_flags(S) & EXTERN_BIT) 
- 
-typedef struct malloc_segment  msegment; 
-typedef struct malloc_segment* msegmentptr; 
- 
-/* ---------------------------- malloc_state ----------------------------- */ 
- 
-/* 
-   A malloc_state holds all of the bookkeeping for a space. 
-   The main fields are: 
- 
-  Top 
-    The topmost chunk of the currently active segment. Its size is 
-    cached in topsize.  The actual size of topmost space is 
-    topsize+TOP_FOOT_SIZE, which includes space reserved for adding 
-    fenceposts and segment records if necessary when getting more 
-    space from the system.  The size at which to autotrim top is 
-    cached from mparams in trim_check, except that it is disabled if 
-    an autotrim fails. 
- 
-  Designated victim (dv) 
-    This is the preferred chunk for servicing small requests that 
-    don't have exact fits.  It is normally the chunk split off most 
-    recently to service another small request.  Its size is cached in 
-    dvsize. The link fields of this chunk are not maintained since it 
-    is not kept in a bin. 
- 
-  SmallBins 
-    An array of bin headers for free chunks.  These bins hold chunks 
-    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains 
-    chunks of all the same size, spaced 8 bytes apart.  To simplify 
-    use in double-linked lists, each bin header acts as a malloc_chunk 
-    pointing to the real first node, if it exists (else pointing to 
-    itself).  This avoids special-casing for headers.  But to avoid 
-    waste, we allocate only the fd/bk pointers of bins, and then use 
-    repositioning tricks to treat these as the fields of a chunk. 
- 
-  TreeBins 
-    Treebins are pointers to the roots of trees holding a range of 
-    sizes. There are 2 equally spaced treebins for each power of two 
-    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything 
-    larger. 
- 
-  Bin maps 
-    There is one bit map for small bins ("smallmap") and one for 
-    treebins ("treemap).  Each bin sets its bit when non-empty, and 
-    clears the bit when empty.  Bit operations are then used to avoid 
-    bin-by-bin searching -- nearly all "search" is done without ever 
-    looking at bins that won't be selected.  The bit maps 
-    conservatively use 32 bits per map word, even if on 64bit system. 
-    For a good description of some of the bit-based techniques used 
-    here, see Henry S. Warren Jr's book "Hacker's Delight" (and 
-    supplement at http://hackersdelight.org/). Many of these are 
-    intended to reduce the branchiness of paths through malloc etc, as 
-    well as to reduce the number of memory locations read or written. 
- 
-  Segments 
-    A list of segments headed by an embedded malloc_segment record 
-    representing the initial space. 
- 
-  Address check support 
-    The least_addr field is the least address ever obtained from 
-    MORECORE or MMAP. Attempted frees and reallocs of any address less 
-    than this are trapped (unless INSECURE is defined). 
- 
-  Magic tag 
-    A cross-check field that should always hold same value as mparams.magic. 
- 
-  Flags 
-    Bits recording whether to use MMAP, locks, or contiguous MORECORE 
- 
-  Statistics 
-    Each space keeps track of current and maximum system memory 
-    obtained via MORECORE or MMAP. 
- 
-  Locking 
-    If USE_LOCKS is defined, the "mutex" lock is acquired and released 
-    around every public call using this mspace. 
-*/ 
- 
-/* Bin types, widths and sizes */ 
-#define NSMALLBINS        (32U) 
-#define NTREEBINS         (32U) 
-#define SMALLBIN_SHIFT    (3U) 
-#define SMALLBIN_WIDTH    (SIZE_T_ONE << SMALLBIN_SHIFT) 
-#define TREEBIN_SHIFT     (8U) 
-#define MIN_LARGE_SIZE    (SIZE_T_ONE << TREEBIN_SHIFT) 
-#define MAX_SMALL_SIZE    (MIN_LARGE_SIZE - SIZE_T_ONE) 
-#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) 
- 
-struct malloc_state { 
-  binmap_t   smallmap; 
-  binmap_t   treemap; 
-  size_t     dvsize; 
-  size_t     topsize; 
-  char*      least_addr; 
-  mchunkptr  dv; 
-  mchunkptr  top; 
-  size_t     trim_check; 
-  size_t     magic; 
-  mchunkptr  smallbins[(NSMALLBINS+1)*2]; 
-  tbinptr    treebins[NTREEBINS]; 
-  size_t     footprint; 
-  size_t     max_footprint; 
-  flag_t     mflags; 
-#if USE_LOCKS 
-  MLOCK_T    mutex;     /* locate lock among fields that rarely change */ 
-#endif /* USE_LOCKS */ 
-  msegment   seg; 
-}; 
- 
-typedef struct malloc_state*    mstate; 
- 
-/* ------------- Global malloc_state and malloc_params ------------------- */ 
- 
-/* 
-  malloc_params holds global properties, including those that can be 
-  dynamically set using mallopt. There is a single instance, mparams, 
-  initialized in init_mparams. 
-*/ 
- 
-struct malloc_params { 
-  size_t magic; 
-  size_t page_size; 
-  size_t granularity; 
-  size_t mmap_threshold; 
-  size_t trim_threshold; 
-  flag_t default_mflags; 
-}; 
- 
-static struct malloc_params mparams; 
- 
-/* The global malloc_state used for all non-"mspace" calls */ 
-static struct malloc_state _gm_; 
-#define gm                 (&_gm_) 
-#define is_global(M)       ((M) == &_gm_) 
-#define is_initialized(M)  ((M)->top != 0) 
- 
-/* -------------------------- system alloc setup ------------------------- */ 
- 
-/* Operations on mflags */ 
- 
-#define use_lock(M)           ((M)->mflags &   USE_LOCK_BIT) 
-#define enable_lock(M)        ((M)->mflags |=  USE_LOCK_BIT) 
-#define disable_lock(M)       ((M)->mflags &= ~USE_LOCK_BIT) 
- 
-#define use_mmap(M)           ((M)->mflags &   USE_MMAP_BIT) 
-#define enable_mmap(M)        ((M)->mflags |=  USE_MMAP_BIT) 
-#define disable_mmap(M)       ((M)->mflags &= ~USE_MMAP_BIT) 
- 
-#define use_noncontiguous(M)  ((M)->mflags &   USE_NONCONTIGUOUS_BIT) 
-#define disable_contiguous(M) ((M)->mflags |=  USE_NONCONTIGUOUS_BIT) 
- 
-#define set_lock(M,L)\ 
- ((M)->mflags = (L)?\ 
-  ((M)->mflags | USE_LOCK_BIT) :\ 
-  ((M)->mflags & ~USE_LOCK_BIT)) 
- 
-/* page-align a size */ 
-#define page_align(S)\ 
- (((S) + (mparams.page_size)) & ~(mparams.page_size - SIZE_T_ONE)) 
- 
-/* granularity-align a size */ 
-#define granularity_align(S)\ 
-  (((S) + (mparams.granularity)) & ~(mparams.granularity - SIZE_T_ONE)) 
- 
-#define is_page_aligned(S)\ 
-   (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) 
-#define is_granularity_aligned(S)\ 
-   (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) 
- 
-/*  True if segment S holds address A */ 
-#define segment_holds(S, A)\ 
-  ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) 
- 
-/* Return segment holding given address */ 
-static msegmentptr segment_holding(mstate m, char* addr) { 
-  msegmentptr sp = &m->seg; 
-  for (;;) { 
-    if (addr >= sp->base && addr < sp->base + sp->size) 
-      return sp; 
-    if ((sp = sp->next) == 0) 
-      return 0; 
-  } 
-} 
- 
-/* Return true if segment contains a segment link */ 
-static int has_segment_link(mstate m, msegmentptr ss) { 
-  msegmentptr sp = &m->seg; 
-  for (;;) { 
-    if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) 
-      return 1; 
-    if ((sp = sp->next) == 0) 
-      return 0; 
-  } 
-} 
- 
-#ifndef MORECORE_CANNOT_TRIM 
-#define should_trim(M,s)  ((s) > (M)->trim_check) 
-#else  /* MORECORE_CANNOT_TRIM */ 
-#define should_trim(M,s)  (0) 
-#endif /* MORECORE_CANNOT_TRIM */ 
- 
-/* 
-  TOP_FOOT_SIZE is padding at the end of a segment, including space 
-  that may be needed to place segment records and fenceposts when new 
-  noncontiguous segments are added. 
-*/ 
-#define TOP_FOOT_SIZE\ 
-  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) 
- 
- 
-/* -------------------------------  Hooks -------------------------------- */ 
- 
-/* 
-  PREACTION should be defined to return 0 on success, and nonzero on 
-  failure. If you are not using locking, you can redefine these to do 
-  anything you like. 
-*/ 
- 
-#if USE_LOCKS 
- 
-/* Ensure locks are initialized */ 
-#define GLOBALLY_INITIALIZE() (mparams.page_size == 0 && init_mparams()) 
- 
-#define PREACTION(M)  ((GLOBALLY_INITIALIZE() || use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) 
-#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } 
-#else /* USE_LOCKS */ 
- 
-#ifndef PREACTION 
-#define PREACTION(M) (0) 
-#endif  /* PREACTION */ 
- 
-#ifndef POSTACTION 
-#define POSTACTION(M) 
-#endif  /* POSTACTION */ 
- 
-#endif /* USE_LOCKS */ 
- 
-/* 
-  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. 
-  USAGE_ERROR_ACTION is triggered on detected bad frees and 
-  reallocs. The argument p is an address that might have triggered the 
-  fault. It is ignored by the two predefined actions, but might be 
-  useful in custom actions that try to help diagnose errors. 
-*/ 
- 
-#if PROCEED_ON_ERROR 
- 
-/* A count of the number of corruption errors causing resets */ 
-int malloc_corruption_error_count; 
- 
-/* default corruption action */ 
-static void reset_on_error(mstate m); 
- 
-#define CORRUPTION_ERROR_ACTION(m)  reset_on_error(m) 
-#define USAGE_ERROR_ACTION(m, p) 
- 
-#else /* PROCEED_ON_ERROR */ 
- 
-#ifndef CORRUPTION_ERROR_ACTION 
-#define CORRUPTION_ERROR_ACTION(m) ABORT 
-#endif /* CORRUPTION_ERROR_ACTION */ 
- 
-#ifndef USAGE_ERROR_ACTION 
-#define USAGE_ERROR_ACTION(m,p) ABORT 
-#endif /* USAGE_ERROR_ACTION */ 
- 
-#endif /* PROCEED_ON_ERROR */ 
- 
-/* -------------------------- Debugging setup ---------------------------- */ 
- 
-#if ! DEBUG 
- 
-#define check_free_chunk(M,P) 
-#define check_inuse_chunk(M,P) 
-#define check_malloced_chunk(M,P,N) 
-#define check_mmapped_chunk(M,P) 
-#define check_malloc_state(M) 
-#define check_top_chunk(M,P) 
- 
-#else /* DEBUG */ 
-#define check_free_chunk(M,P)       do_check_free_chunk(M,P) 
-#define check_inuse_chunk(M,P)      do_check_inuse_chunk(M,P) 
-#define check_top_chunk(M,P)        do_check_top_chunk(M,P) 
-#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) 
-#define check_mmapped_chunk(M,P)    do_check_mmapped_chunk(M,P) 
-#define check_malloc_state(M)       do_check_malloc_state(M) 
- 
-static void   do_check_any_chunk(mstate m, mchunkptr p); 
-static void   do_check_top_chunk(mstate m, mchunkptr p); 
-static void   do_check_mmapped_chunk(mstate m, mchunkptr p); 
-static void   do_check_inuse_chunk(mstate m, mchunkptr p); 
-static void   do_check_free_chunk(mstate m, mchunkptr p); 
-static void   do_check_malloced_chunk(mstate m, void* mem, size_t s); 
-static void   do_check_tree(mstate m, tchunkptr t); 
-static void   do_check_treebin(mstate m, bindex_t i); 
-static void   do_check_smallbin(mstate m, bindex_t i); 
-static void   do_check_malloc_state(mstate m); 
-static int    bin_find(mstate m, mchunkptr x); 
-static size_t traverse_and_check(mstate m); 
-#endif /* DEBUG */ 
- 
-/* ---------------------------- Indexing Bins ---------------------------- */ 
- 
-#define is_small(s)         (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) 
-#define small_index(s)      ((s)  >> SMALLBIN_SHIFT) 
-#define small_index2size(i) ((i)  << SMALLBIN_SHIFT) 
-#define MIN_SMALL_INDEX     (small_index(MIN_CHUNK_SIZE)) 
- 
-/* addressing by index. See above about smallbin repositioning */ 
-#define smallbin_at(M, i)   ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) 
-#define treebin_at(M,i)     (&((M)->treebins[i])) 
- 
-/* assign tree index for size S to variable I */ 
+typedef unsigned int binmap_t;         /* Described below */
+typedef unsigned int flag_t;           /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE         (sizeof(mchunk))
+
+#if FOOTERS
+#define CHUNK_OVERHEAD      (TWO_SIZE_T_SIZES)
+#else /* FOOTERS */
+#define CHUNK_OVERHEAD      (SIZE_T_SIZE)
+#endif /* FOOTERS */
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD       (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p)        ((void*)((char*)(p)       + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem)      ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A)   (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use. If the chunk was obtained with mmap, the prev_foot field has
+  IS_MMAPPED_BIT set, otherwise holding the offset of the base of the
+  mmapped region to the base of the chunk.
+*/
+
+#define PINUSE_BIT          (SIZE_T_ONE)
+#define CINUSE_BIT          (SIZE_T_TWO)
+#define INUSE_BITS          (PINUSE_BIT|CINUSE_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD      (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p)           ((p)->head & CINUSE_BIT)
+#define pinuse(p)           ((p)->head & PINUSE_BIT)
+#define chunksize(p)        ((p)->head & ~(INUSE_BITS))
+
+#define clear_pinuse(p)     ((p)->head &= ~PINUSE_BIT)
+#define clear_cinuse(p)     ((p)->head &= ~CINUSE_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~INUSE_BITS)))
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p)  ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot)
+#define set_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+  ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+#define is_mmapped(p)\
+  (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_MMAPPED_BIT))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* Return true if malloced space is not necessarily cleared */
+#if MMAP_CLEARS
+#define calloc_must_clear(p) (!is_mmapped(p))
+#else /* MMAP_CLEARS */
+#define calloc_must_clear(p) (1)
+#endif /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+struct malloc_tree_chunk {
+  /* The first four fields must be compatible with malloc_chunk */
+  size_t                    prev_foot;
+  size_t                    head;
+  struct malloc_tree_chunk* fd;
+  struct malloc_tree_chunk* bk;
+
+  struct malloc_tree_chunk* child[2];
+  struct malloc_tree_chunk* parent;
+  bindex_t                  index;
+};
+
+typedef struct malloc_tree_chunk  tchunk;
+typedef struct malloc_tree_chunk* tchunkptr;
+typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If IS_MMAPPED_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+  char*        base;             /* base address */
+  size_t       size;             /* allocated size */
+  struct malloc_segment* next;   /* ptr to next segment */
+#if FFI_MMAP_EXEC_WRIT
+  /* The mmap magic is supposed to store the address of the executable
+     segment at the very end of the requested block.  */
+
+# define mmap_exec_offset(b,s) (*(ptrdiff_t*)((b)+(s)-sizeof(ptrdiff_t)))
+
+  /* We can only merge segments if their corresponding executable
+     segments are at identical offsets.  */
+# define check_segment_merge(S,b,s) \
+  (mmap_exec_offset((b),(s)) == (S)->exec_offset)
+
+# define add_segment_exec_offset(p,S) ((char*)(p) + (S)->exec_offset)
+# define sub_segment_exec_offset(p,S) ((char*)(p) - (S)->exec_offset)
+
+  /* The removal of sflags only works with HAVE_MORECORE == 0.  */
+
+# define get_segment_flags(S)   (IS_MMAPPED_BIT)
+# define set_segment_flags(S,v) \
+  (((v) != IS_MMAPPED_BIT) ? (ABORT, (v)) :				\
+   (((S)->exec_offset =							\
+     mmap_exec_offset((S)->base, (S)->size)),				\
+    (mmap_exec_offset((S)->base + (S)->exec_offset, (S)->size) !=	\
+     (S)->exec_offset) ? (ABORT, (v)) :					\
+   (mmap_exec_offset((S)->base, (S)->size) = 0), (v)))
+
+  /* We use an offset here, instead of a pointer, because then, when
+     base changes, we don't have to modify this.  On architectures
+     with segmented addresses, this might not work.  */
+  ptrdiff_t    exec_offset;
+#else
+
+# define get_segment_flags(S)   ((S)->sflags)
+# define set_segment_flags(S,v) ((S)->sflags = (v))
+# define check_segment_merge(S,b,s) (1)
+
+  flag_t       sflags;           /* mmap and extern flag */
+#endif
+};
+
+#define is_mmapped_segment(S)  (get_segment_flags(S) & IS_MMAPPED_BIT)
+#define is_extern_segment(S)   (get_segment_flags(S) & EXTERN_BIT)
+
+typedef struct malloc_segment  msegment;
+typedef struct malloc_segment* msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+   A malloc_state holds all of the bookkeeping for a space.
+   The main fields are:
+
+  Top
+    The topmost chunk of the currently active segment. Its size is
+    cached in topsize.  The actual size of topmost space is
+    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+    fenceposts and segment records if necessary when getting more
+    space from the system.  The size at which to autotrim top is
+    cached from mparams in trim_check, except that it is disabled if
+    an autotrim fails.
+
+  Designated victim (dv)
+    This is the preferred chunk for servicing small requests that
+    don't have exact fits.  It is normally the chunk split off most
+    recently to service another small request.  Its size is cached in
+    dvsize. The link fields of this chunk are not maintained since it
+    is not kept in a bin.
+
+  SmallBins
+    An array of bin headers for free chunks.  These bins hold chunks
+    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+    chunks of all the same size, spaced 8 bytes apart.  To simplify
+    use in double-linked lists, each bin header acts as a malloc_chunk
+    pointing to the real first node, if it exists (else pointing to
+    itself).  This avoids special-casing for headers.  But to avoid
+    waste, we allocate only the fd/bk pointers of bins, and then use
+    repositioning tricks to treat these as the fields of a chunk.
+
+  TreeBins
+    Treebins are pointers to the roots of trees holding a range of
+    sizes. There are 2 equally spaced treebins for each power of two
+    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+    larger.
+
+  Bin maps
+    There is one bit map for small bins ("smallmap") and one for
+    treebins ("treemap).  Each bin sets its bit when non-empty, and
+    clears the bit when empty.  Bit operations are then used to avoid
+    bin-by-bin searching -- nearly all "search" is done without ever
+    looking at bins that won't be selected.  The bit maps
+    conservatively use 32 bits per map word, even if on 64bit system.
+    For a good description of some of the bit-based techniques used
+    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+    supplement at http://hackersdelight.org/). Many of these are
+    intended to reduce the branchiness of paths through malloc etc, as
+    well as to reduce the number of memory locations read or written.
+
+  Segments
+    A list of segments headed by an embedded malloc_segment record
+    representing the initial space.
+
+  Address check support
+    The least_addr field is the least address ever obtained from
+    MORECORE or MMAP. Attempted frees and reallocs of any address less
+    than this are trapped (unless INSECURE is defined).
+
+  Magic tag
+    A cross-check field that should always hold same value as mparams.magic.
+
+  Flags
+    Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+  Statistics
+    Each space keeps track of current and maximum system memory
+    obtained via MORECORE or MMAP.
+
+  Locking
+    If USE_LOCKS is defined, the "mutex" lock is acquired and released
+    around every public call using this mspace.
+*/
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS        (32U)
+#define NTREEBINS         (32U)
+#define SMALLBIN_SHIFT    (3U)
+#define SMALLBIN_WIDTH    (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT     (8U)
+#define MIN_LARGE_SIZE    (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE    (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+  binmap_t   smallmap;
+  binmap_t   treemap;
+  size_t     dvsize;
+  size_t     topsize;
+  char*      least_addr;
+  mchunkptr  dv;
+  mchunkptr  top;
+  size_t     trim_check;
+  size_t     magic;
+  mchunkptr  smallbins[(NSMALLBINS+1)*2];
+  tbinptr    treebins[NTREEBINS];
+  size_t     footprint;
+  size_t     max_footprint;
+  flag_t     mflags;
+#if USE_LOCKS
+  MLOCK_T    mutex;     /* locate lock among fields that rarely change */
+#endif /* USE_LOCKS */
+  msegment   seg;
+};
+
+typedef struct malloc_state*    mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams.
+*/
+
+struct malloc_params {
+  size_t magic;
+  size_t page_size;
+  size_t granularity;
+  size_t mmap_threshold;
+  size_t trim_threshold;
+  flag_t default_mflags;
+};
+
+static struct malloc_params mparams;
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+#define gm                 (&_gm_)
+#define is_global(M)       ((M) == &_gm_)
+#define is_initialized(M)  ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+#define use_lock(M)           ((M)->mflags &   USE_LOCK_BIT)
+#define enable_lock(M)        ((M)->mflags |=  USE_LOCK_BIT)
+#define disable_lock(M)       ((M)->mflags &= ~USE_LOCK_BIT)
+
+#define use_mmap(M)           ((M)->mflags &   USE_MMAP_BIT)
+#define enable_mmap(M)        ((M)->mflags |=  USE_MMAP_BIT)
+#define disable_mmap(M)       ((M)->mflags &= ~USE_MMAP_BIT)
+
+#define use_noncontiguous(M)  ((M)->mflags &   USE_NONCONTIGUOUS_BIT)
+#define disable_contiguous(M) ((M)->mflags |=  USE_NONCONTIGUOUS_BIT)
+
+#define set_lock(M,L)\
+ ((M)->mflags = (L)?\
+  ((M)->mflags | USE_LOCK_BIT) :\
+  ((M)->mflags & ~USE_LOCK_BIT))
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (mparams.page_size)) & ~(mparams.page_size - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+  (((S) + (mparams.granularity)) & ~(mparams.granularity - SIZE_T_ONE))
+
+#define is_page_aligned(S)\
+   (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+#define is_granularity_aligned(S)\
+   (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+/*  True if segment S holds address A */
+#define segment_holds(S, A)\
+  ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char* addr) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= sp->base && addr < sp->base + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
+      return 1;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+#ifndef MORECORE_CANNOT_TRIM
+#define should_trim(M,s)  ((s) > (M)->trim_check)
+#else  /* MORECORE_CANNOT_TRIM */
+#define should_trim(M,s)  (0)
+#endif /* MORECORE_CANNOT_TRIM */
+
+/*
+  TOP_FOOT_SIZE is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* -------------------------------  Hooks -------------------------------- */
+
+/*
+  PREACTION should be defined to return 0 on success, and nonzero on
+  failure. If you are not using locking, you can redefine these to do
+  anything you like.
+*/
+
+#if USE_LOCKS
+
+/* Ensure locks are initialized */
+#define GLOBALLY_INITIALIZE() (mparams.page_size == 0 && init_mparams())
+
+#define PREACTION(M)  ((GLOBALLY_INITIALIZE() || use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
+#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
+#else /* USE_LOCKS */
+
+#ifndef PREACTION
+#define PREACTION(M) (0)
+#endif  /* PREACTION */
+
+#ifndef POSTACTION
+#define POSTACTION(M)
+#endif  /* POSTACTION */
+
+#endif /* USE_LOCKS */
+
+/*
+  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+  USAGE_ERROR_ACTION is triggered on detected bad frees and
+  reallocs. The argument p is an address that might have triggered the
+  fault. It is ignored by the two predefined actions, but might be
+  useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+#define CORRUPTION_ERROR_ACTION(m)  reset_on_error(m)
+#define USAGE_ERROR_ACTION(m, p)
+
+#else /* PROCEED_ON_ERROR */
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif /* CORRUPTION_ERROR_ACTION */
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif /* USAGE_ERROR_ACTION */
+
+#endif /* PROCEED_ON_ERROR */
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+
+#define check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)
+#define check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)
+#define check_malloc_state(M)
+#define check_top_chunk(M,P)
+
+#else /* DEBUG */
+#define check_free_chunk(M,P)       do_check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)      do_check_inuse_chunk(M,P)
+#define check_top_chunk(M,P)        do_check_top_chunk(M,P)
+#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)    do_check_mmapped_chunk(M,P)
+#define check_malloc_state(M)       do_check_malloc_state(M)
+
+static void   do_check_any_chunk(mstate m, mchunkptr p);
+static void   do_check_top_chunk(mstate m, mchunkptr p);
+static void   do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void   do_check_inuse_chunk(mstate m, mchunkptr p);
+static void   do_check_free_chunk(mstate m, mchunkptr p);
+static void   do_check_malloced_chunk(mstate m, void* mem, size_t s);
+static void   do_check_tree(mstate m, tchunkptr t);
+static void   do_check_treebin(mstate m, bindex_t i);
+static void   do_check_smallbin(mstate m, bindex_t i);
+static void   do_check_malloc_state(mstate m);
+static int    bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+#endif /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s)         (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s)      ((s)  >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i)  << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX     (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i)   ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i)     (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I */
 #if defined(__GNUC__) && defined(__i386__)
-#define compute_tree_index(S, I)\ 
-{\ 
-  size_t X = S >> TREEBIN_SHIFT;\ 
-  if (X == 0)\ 
-    I = 0;\ 
-  else if (X > 0xFFFF)\ 
-    I = NTREEBINS-1;\ 
-  else {\ 
-    unsigned int K;\ 
-    __asm__("bsrl %1,%0\n\t" : "=r" (K) : "rm"  (X));\ 
-    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ 
-  }\ 
-} 
-#else /* GNUC */ 
-#define compute_tree_index(S, I)\ 
-{\ 
-  size_t X = S >> TREEBIN_SHIFT;\ 
-  if (X == 0)\ 
-    I = 0;\ 
-  else if (X > 0xFFFF)\ 
-    I = NTREEBINS-1;\ 
-  else {\ 
-    unsigned int Y = (unsigned int)X;\ 
-    unsigned int N = ((Y - 0x100) >> 16) & 8;\ 
-    unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ 
-    N += K;\ 
-    N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ 
-    K = 14 - N + ((Y <<= K) >> 15);\ 
-    I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ 
-  }\ 
-} 
-#endif /* GNUC */ 
- 
-/* Bit representing maximum resolved size in a treebin at i */ 
-#define bit_for_tree_index(i) \ 
-   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) 
- 
-/* Shift placing maximum resolved bit in a treebin at i as sign bit */ 
-#define leftshift_for_tree_index(i) \ 
-   ((i == NTREEBINS-1)? 0 : \ 
-    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) 
- 
-/* The size of the smallest chunk held in bin with index i */ 
-#define minsize_for_tree_index(i) \ 
-   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \ 
-   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) 
- 
- 
-/* ------------------------ Operations on bin maps ----------------------- */ 
- 
-/* bit corresponding to given index */ 
-#define idx2bit(i)              ((binmap_t)(1) << (i)) 
- 
-/* Mark/Clear bits with given index */ 
-#define mark_smallmap(M,i)      ((M)->smallmap |=  idx2bit(i)) 
-#define clear_smallmap(M,i)     ((M)->smallmap &= ~idx2bit(i)) 
-#define smallmap_is_marked(M,i) ((M)->smallmap &   idx2bit(i)) 
- 
-#define mark_treemap(M,i)       ((M)->treemap  |=  idx2bit(i)) 
-#define clear_treemap(M,i)      ((M)->treemap  &= ~idx2bit(i)) 
-#define treemap_is_marked(M,i)  ((M)->treemap  &   idx2bit(i)) 
- 
-/* index corresponding to given bit */ 
- 
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K;\
+    __asm__("bsrl %1,%0\n\t" : "=r" (K) : "rm"  (X));\
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+#else /* GNUC */
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int Y = (unsigned int)X;\
+    unsigned int N = ((Y - 0x100) >> 16) & 8;\
+    unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
+    N += K;\
+    N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
+    K = 14 - N + ((Y <<= K) >> 15);\
+    I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
+  }\
+}
+#endif /* GNUC */
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+   ((i == NTREEBINS-1)? 0 : \
+    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
+   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i)              ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i)      ((M)->smallmap |=  idx2bit(i))
+#define clear_smallmap(M,i)     ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap &   idx2bit(i))
+
+#define mark_treemap(M,i)       ((M)->treemap  |=  idx2bit(i))
+#define clear_treemap(M,i)      ((M)->treemap  &= ~idx2bit(i))
+#define treemap_is_marked(M,i)  ((M)->treemap  &   idx2bit(i))
+
+/* index corresponding to given bit */
+
 #if defined(__GNUC__) && defined(__i386__)
-#define compute_bit2idx(X, I)\ 
-{\ 
-  unsigned int J;\ 
-  __asm__("bsfl %1,%0\n\t" : "=r" (J) : "rm" (X));\ 
-  I = (bindex_t)J;\ 
-} 
- 
-#else /* GNUC */ 
-#if  USE_BUILTIN_FFS 
-#define compute_bit2idx(X, I) I = ffs(X)-1 
- 
-#else /* USE_BUILTIN_FFS */ 
-#define compute_bit2idx(X, I)\ 
-{\ 
-  unsigned int Y = X - 1;\ 
-  unsigned int K = Y >> (16-4) & 16;\ 
-  unsigned int N = K;        Y >>= K;\ 
-  N += K = Y >> (8-3) &  8;  Y >>= K;\ 
-  N += K = Y >> (4-2) &  4;  Y >>= K;\ 
-  N += K = Y >> (2-1) &  2;  Y >>= K;\ 
-  N += K = Y >> (1-0) &  1;  Y >>= K;\ 
-  I = (bindex_t)(N + Y);\ 
-} 
-#endif /* USE_BUILTIN_FFS */ 
-#endif /* GNUC */ 
- 
-/* isolate the least set bit of a bitmap */ 
-#define least_bit(x)         ((x) & -(x)) 
- 
-/* mask with all bits to left of least bit of x on */ 
-#define left_bits(x)         ((x<<1) | -(x<<1)) 
- 
-/* mask with all bits to left of or equal to least bit of x on */ 
-#define same_or_left_bits(x) ((x) | -(x)) 
- 
- 
-/* ----------------------- Runtime Check Support ------------------------- */ 
- 
-/* 
-  For security, the main invariant is that malloc/free/etc never 
-  writes to a static address other than malloc_state, unless static 
-  malloc_state itself has been corrupted, which cannot occur via 
-  malloc (because of these checks). In essence this means that we 
-  believe all pointers, sizes, maps etc held in malloc_state, but 
-  check all of those linked or offsetted from other embedded data 
-  structures.  These checks are interspersed with main code in a way 
-  that tends to minimize their run-time cost. 
- 
-  When FOOTERS is defined, in addition to range checking, we also 
-  verify footer fields of inuse chunks, which can be used guarantee 
-  that the mstate controlling malloc/free is intact.  This is a 
-  streamlined version of the approach described by William Robertson 
-  et al in "Run-time Detection of Heap-based Overflows" LISA'03 
-  http://www.usenix.org/events/lisa03/tech/robertson.html The footer 
-  of an inuse chunk holds the xor of its mstate and a random seed, 
-  that is checked upon calls to free() and realloc().  This is 
-  (probablistically) unguessable from outside the program, but can be 
-  computed by any code successfully malloc'ing any chunk, so does not 
-  itself provide protection against code that has already broken 
-  security through some other means.  Unlike Robertson et al, we 
-  always dynamically check addresses of all offset chunks (previous, 
-  next, etc). This turns out to be cheaper than relying on hashes. 
-*/ 
- 
-#if !INSECURE 
-/* Check if address a is at least as high as any from MORECORE or MMAP */ 
-#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) 
-/* Check if address of next chunk n is higher than base chunk p */ 
-#define ok_next(p, n)    ((char*)(p) < (char*)(n)) 
-/* Check if p has its cinuse bit on */ 
-#define ok_cinuse(p)     cinuse(p) 
-/* Check if p has its pinuse bit on */ 
-#define ok_pinuse(p)     pinuse(p) 
- 
-#else /* !INSECURE */ 
-#define ok_address(M, a) (1) 
-#define ok_next(b, n)    (1) 
-#define ok_cinuse(p)     (1) 
-#define ok_pinuse(p)     (1) 
-#endif /* !INSECURE */ 
- 
-#if (FOOTERS && !INSECURE) 
-/* Check if (alleged) mstate m has expected magic field */ 
-#define ok_magic(M)      ((M)->magic == mparams.magic) 
-#else  /* (FOOTERS && !INSECURE) */ 
-#define ok_magic(M)      (1) 
-#endif /* (FOOTERS && !INSECURE) */ 
- 
- 
-/* In gcc, use __builtin_expect to minimize impact of checks */ 
-#if !INSECURE 
-#if defined(__GNUC__) && __GNUC__ >= 3 
-#define RTCHECK(e)  __builtin_expect(e, 1) 
-#else /* GNUC */ 
-#define RTCHECK(e)  (e) 
-#endif /* GNUC */ 
-#else /* !INSECURE */ 
-#define RTCHECK(e)  (1) 
-#endif /* !INSECURE */ 
- 
-/* macros to set up inuse chunks with or without footers */ 
- 
-#if !FOOTERS 
- 
-#define mark_inuse_foot(M,p,s) 
- 
-/* Set cinuse bit and pinuse bit of next chunk */ 
-#define set_inuse(M,p,s)\ 
-  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ 
-  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) 
- 
-/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ 
-#define set_inuse_and_pinuse(M,p,s)\ 
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ 
-  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) 
- 
-/* Set size, cinuse and pinuse bit of this chunk */ 
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ 
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) 
- 
-#else /* FOOTERS */ 
- 
-/* Set foot of inuse chunk to be xor of mstate and seed */ 
-#define mark_inuse_foot(M,p,s)\ 
-  (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) 
- 
-#define get_mstate_for(p)\ 
-  ((mstate)(((mchunkptr)((char*)(p) +\ 
-    (chunksize(p))))->prev_foot ^ mparams.magic)) 
- 
-#define set_inuse(M,p,s)\ 
-  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ 
-  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ 
-  mark_inuse_foot(M,p,s)) 
- 
-#define set_inuse_and_pinuse(M,p,s)\ 
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ 
-  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ 
- mark_inuse_foot(M,p,s)) 
- 
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ 
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ 
-  mark_inuse_foot(M, p, s)) 
- 
-#endif /* !FOOTERS */ 
- 
-/* ---------------------------- setting mparams -------------------------- */ 
- 
-/* Initialize mparams */ 
-static int init_mparams(void) { 
-  if (mparams.page_size == 0) { 
-    size_t s; 
- 
-    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; 
-    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; 
-#if MORECORE_CONTIGUOUS 
-    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; 
-#else  /* MORECORE_CONTIGUOUS */ 
-    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; 
-#endif /* MORECORE_CONTIGUOUS */ 
- 
-#if (FOOTERS && !INSECURE) 
-    { 
-#if USE_DEV_RANDOM 
-      int fd; 
-      unsigned char buf[sizeof(size_t)]; 
-      /* Try to use /dev/urandom, else fall back on using time */ 
-      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && 
-          read(fd, buf, sizeof(buf)) == sizeof(buf)) { 
-        s = *((size_t *) buf); 
-        close(fd); 
-      } 
-      else 
-#endif /* USE_DEV_RANDOM */ 
-        s = (size_t)(time(0) ^ (size_t)0x55555555U); 
- 
-      s |= (size_t)8U;    /* ensure nonzero */ 
-      s &= ~(size_t)7U;   /* improve chances of fault for bad values */ 
- 
-    } 
-#else /* (FOOTERS && !INSECURE) */ 
-    s = (size_t)0x58585858U; 
-#endif /* (FOOTERS && !INSECURE) */ 
-    ACQUIRE_MAGIC_INIT_LOCK(); 
-    if (mparams.magic == 0) { 
-      mparams.magic = s; 
-      /* Set up lock for main malloc area */ 
-      INITIAL_LOCK(&gm->mutex); 
-      gm->mflags = mparams.default_mflags; 
-    } 
-    RELEASE_MAGIC_INIT_LOCK(); 
- 
-#if !defined(WIN32) && !defined(__OS2__) 
-    mparams.page_size = malloc_getpagesize; 
-    mparams.granularity = ((DEFAULT_GRANULARITY != 0)? 
-                           DEFAULT_GRANULARITY : mparams.page_size); 
-#elif defined (__OS2__) 
- /* if low-memory is used, os2munmap() would break 
-    if it were anything other than 64k */ 
-    mparams.page_size = 4096u; 
-    mparams.granularity = 65536u; 
-#else /* WIN32 */ 
-    { 
-      SYSTEM_INFO system_info; 
-      GetSystemInfo(&system_info); 
-      mparams.page_size = system_info.dwPageSize; 
-      mparams.granularity = system_info.dwAllocationGranularity; 
-    } 
-#endif /* WIN32 */ 
- 
-    /* Sanity-check configuration: 
-       size_t must be unsigned and as wide as pointer type. 
-       ints must be at least 4 bytes. 
-       alignment must be at least 8. 
-       Alignment, min chunk size, and page size must all be powers of 2. 
-    */ 
-    if ((sizeof(size_t) != sizeof(char*)) || 
-        (MAX_SIZE_T < MIN_CHUNK_SIZE)  || 
-        (sizeof(int) < 4)  || 
-        (MALLOC_ALIGNMENT < (size_t)8U) || 
-        ((MALLOC_ALIGNMENT    & (MALLOC_ALIGNMENT-SIZE_T_ONE))    != 0) || 
-        ((MCHUNK_SIZE         & (MCHUNK_SIZE-SIZE_T_ONE))         != 0) || 
-        ((mparams.granularity & (mparams.granularity-SIZE_T_ONE)) != 0) || 
-        ((mparams.page_size   & (mparams.page_size-SIZE_T_ONE))   != 0)) 
-      ABORT; 
-  } 
-  return 0; 
-} 
- 
-/* support for mallopt */ 
-static int change_mparam(int param_number, int value) { 
-  size_t val = (size_t)value; 
-  init_mparams(); 
-  switch(param_number) { 
-  case M_TRIM_THRESHOLD: 
-    mparams.trim_threshold = val; 
-    return 1; 
-  case M_GRANULARITY: 
-    if (val >= mparams.page_size && ((val & (val-1)) == 0)) { 
-      mparams.granularity = val; 
-      return 1; 
-    } 
-    else 
-      return 0; 
-  case M_MMAP_THRESHOLD: 
-    mparams.mmap_threshold = val; 
-    return 1; 
-  default: 
-    return 0; 
-  } 
-} 
- 
-#if DEBUG 
-/* ------------------------- Debugging Support --------------------------- */ 
- 
-/* Check properties of any chunk, whether free, inuse, mmapped etc  */ 
-static void do_check_any_chunk(mstate m, mchunkptr p) { 
-  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); 
-  assert(ok_address(m, p)); 
-} 
- 
-/* Check properties of top chunk */ 
-static void do_check_top_chunk(mstate m, mchunkptr p) { 
-  msegmentptr sp = segment_holding(m, (char*)p); 
-  size_t  sz = chunksize(p); 
-  assert(sp != 0); 
-  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); 
-  assert(ok_address(m, p)); 
-  assert(sz == m->topsize); 
-  assert(sz > 0); 
-  assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); 
-  assert(pinuse(p)); 
-  assert(!next_pinuse(p)); 
-} 
- 
-/* Check properties of (inuse) mmapped chunks */ 
-static void do_check_mmapped_chunk(mstate m, mchunkptr p) { 
-  size_t  sz = chunksize(p); 
-  size_t len = (sz + (p->prev_foot & ~IS_MMAPPED_BIT) + MMAP_FOOT_PAD); 
-  assert(is_mmapped(p)); 
-  assert(use_mmap(m)); 
-  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); 
-  assert(ok_address(m, p)); 
-  assert(!is_small(sz)); 
-  assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); 
-  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); 
-  assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); 
-} 
- 
-/* Check properties of inuse chunks */ 
-static void do_check_inuse_chunk(mstate m, mchunkptr p) { 
-  do_check_any_chunk(m, p); 
-  assert(cinuse(p)); 
-  assert(next_pinuse(p)); 
-  /* If not pinuse and not mmapped, previous chunk has OK offset */ 
-  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); 
-  if (is_mmapped(p)) 
-    do_check_mmapped_chunk(m, p); 
-} 
- 
-/* Check properties of free chunks */ 
-static void do_check_free_chunk(mstate m, mchunkptr p) { 
-  size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); 
-  mchunkptr next = chunk_plus_offset(p, sz); 
-  do_check_any_chunk(m, p); 
-  assert(!cinuse(p)); 
-  assert(!next_pinuse(p)); 
-  assert (!is_mmapped(p)); 
-  if (p != m->dv && p != m->top) { 
-    if (sz >= MIN_CHUNK_SIZE) { 
-      assert((sz & CHUNK_ALIGN_MASK) == 0); 
-      assert(is_aligned(chunk2mem(p))); 
-      assert(next->prev_foot == sz); 
-      assert(pinuse(p)); 
-      assert (next == m->top || cinuse(next)); 
-      assert(p->fd->bk == p); 
-      assert(p->bk->fd == p); 
-    } 
-    else  /* markers are always of size SIZE_T_SIZE */ 
-      assert(sz == SIZE_T_SIZE); 
-  } 
-} 
- 
-/* Check properties of malloced chunks at the point they are malloced */ 
-static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { 
-  if (mem != 0) { 
-    mchunkptr p = mem2chunk(mem); 
-    size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); 
-    do_check_inuse_chunk(m, p); 
-    assert((sz & CHUNK_ALIGN_MASK) == 0); 
-    assert(sz >= MIN_CHUNK_SIZE); 
-    assert(sz >= s); 
-    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ 
-    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); 
-  } 
-} 
- 
-/* Check a tree and its subtrees.  */ 
-static void do_check_tree(mstate m, tchunkptr t) { 
-  tchunkptr head = 0; 
-  tchunkptr u = t; 
-  bindex_t tindex = t->index; 
-  size_t tsize = chunksize(t); 
-  bindex_t idx; 
-  compute_tree_index(tsize, idx); 
-  assert(tindex == idx); 
-  assert(tsize >= MIN_LARGE_SIZE); 
-  assert(tsize >= minsize_for_tree_index(idx)); 
-  assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); 
- 
-  do { /* traverse through chain of same-sized nodes */ 
-    do_check_any_chunk(m, ((mchunkptr)u)); 
-    assert(u->index == tindex); 
-    assert(chunksize(u) == tsize); 
-    assert(!cinuse(u)); 
-    assert(!next_pinuse(u)); 
-    assert(u->fd->bk == u); 
-    assert(u->bk->fd == u); 
-    if (u->parent == 0) { 
-      assert(u->child[0] == 0); 
-      assert(u->child[1] == 0); 
-    } 
-    else { 
-      assert(head == 0); /* only one node on chain has parent */ 
-      head = u; 
-      assert(u->parent != u); 
-      assert (u->parent->child[0] == u || 
-              u->parent->child[1] == u || 
-              *((tbinptr*)(u->parent)) == u); 
-      if (u->child[0] != 0) { 
-        assert(u->child[0]->parent == u); 
-        assert(u->child[0] != u); 
-        do_check_tree(m, u->child[0]); 
-      } 
-      if (u->child[1] != 0) { 
-        assert(u->child[1]->parent == u); 
-        assert(u->child[1] != u); 
-        do_check_tree(m, u->child[1]); 
-      } 
-      if (u->child[0] != 0 && u->child[1] != 0) { 
-        assert(chunksize(u->child[0]) < chunksize(u->child[1])); 
-      } 
-    } 
-    u = u->fd; 
-  } while (u != t); 
-  assert(head != 0); 
-} 
- 
-/*  Check all the chunks in a treebin.  */ 
-static void do_check_treebin(mstate m, bindex_t i) { 
-  tbinptr* tb = treebin_at(m, i); 
-  tchunkptr t = *tb; 
-  int empty = (m->treemap & (1U << i)) == 0; 
-  if (t == 0) 
-    assert(empty); 
-  if (!empty) 
-    do_check_tree(m, t); 
-} 
- 
-/*  Check all the chunks in a smallbin.  */ 
-static void do_check_smallbin(mstate m, bindex_t i) { 
-  sbinptr b = smallbin_at(m, i); 
-  mchunkptr p = b->bk; 
-  unsigned int empty = (m->smallmap & (1U << i)) == 0; 
-  if (p == b) 
-    assert(empty); 
-  if (!empty) { 
-    for (; p != b; p = p->bk) { 
-      size_t size = chunksize(p); 
-      mchunkptr q; 
-      /* each chunk claims to be free */ 
-      do_check_free_chunk(m, p); 
-      /* chunk belongs in bin */ 
-      assert(small_index(size) == i); 
-      assert(p->bk == b || chunksize(p->bk) == chunksize(p)); 
-      /* chunk is followed by an inuse chunk */ 
-      q = next_chunk(p); 
-      if (q->head != FENCEPOST_HEAD) 
-        do_check_inuse_chunk(m, q); 
-    } 
-  } 
-} 
- 
-/* Find x in a bin. Used in other check functions. */ 
-static int bin_find(mstate m, mchunkptr x) { 
-  size_t size = chunksize(x); 
-  if (is_small(size)) { 
-    bindex_t sidx = small_index(size); 
-    sbinptr b = smallbin_at(m, sidx); 
-    if (smallmap_is_marked(m, sidx)) { 
-      mchunkptr p = b; 
-      do { 
-        if (p == x) 
-          return 1; 
-      } while ((p = p->fd) != b); 
-    } 
-  } 
-  else { 
-    bindex_t tidx; 
-    compute_tree_index(size, tidx); 
-    if (treemap_is_marked(m, tidx)) { 
-      tchunkptr t = *treebin_at(m, tidx); 
-      size_t sizebits = size << leftshift_for_tree_index(tidx); 
-      while (t != 0 && chunksize(t) != size) { 
-        t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; 
-        sizebits <<= 1; 
-      } 
-      if (t != 0) { 
-        tchunkptr u = t; 
-        do { 
-          if (u == (tchunkptr)x) 
-            return 1; 
-        } while ((u = u->fd) != t); 
-      } 
-    } 
-  } 
-  return 0; 
-} 
- 
-/* Traverse each chunk and check it; return total */ 
-static size_t traverse_and_check(mstate m) { 
-  size_t sum = 0; 
-  if (is_initialized(m)) { 
-    msegmentptr s = &m->seg; 
-    sum += m->topsize + TOP_FOOT_SIZE; 
-    while (s != 0) { 
-      mchunkptr q = align_as_chunk(s->base); 
-      mchunkptr lastq = 0; 
-      assert(pinuse(q)); 
-      while (segment_holds(s, q) && 
-             q != m->top && q->head != FENCEPOST_HEAD) { 
-        sum += chunksize(q); 
-        if (cinuse(q)) { 
-          assert(!bin_find(m, q)); 
-          do_check_inuse_chunk(m, q); 
-        } 
-        else { 
-          assert(q == m->dv || bin_find(m, q)); 
-          assert(lastq == 0 || cinuse(lastq)); /* Not 2 consecutive free */ 
-          do_check_free_chunk(m, q); 
-        } 
-        lastq = q; 
-        q = next_chunk(q); 
-      } 
-      s = s->next; 
-    } 
-  } 
-  return sum; 
-} 
- 
-/* Check all properties of malloc_state. */ 
-static void do_check_malloc_state(mstate m) { 
-  bindex_t i; 
-  size_t total; 
-  /* check bins */ 
-  for (i = 0; i < NSMALLBINS; ++i) 
-    do_check_smallbin(m, i); 
-  for (i = 0; i < NTREEBINS; ++i) 
-    do_check_treebin(m, i); 
- 
-  if (m->dvsize != 0) { /* check dv chunk */ 
-    do_check_any_chunk(m, m->dv); 
-    assert(m->dvsize == chunksize(m->dv)); 
-    assert(m->dvsize >= MIN_CHUNK_SIZE); 
-    assert(bin_find(m, m->dv) == 0); 
-  } 
- 
-  if (m->top != 0) {   /* check top chunk */ 
-    do_check_top_chunk(m, m->top); 
-    assert(m->topsize == chunksize(m->top)); 
-    assert(m->topsize > 0); 
-    assert(bin_find(m, m->top) == 0); 
-  } 
- 
-  total = traverse_and_check(m); 
-  assert(total <= m->footprint); 
-  assert(m->footprint <= m->max_footprint); 
-} 
-#endif /* DEBUG */ 
- 
-/* ----------------------------- statistics ------------------------------ */ 
- 
-#if !NO_MALLINFO 
-static struct mallinfo internal_mallinfo(mstate m) { 
-  struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 
-  if (!PREACTION(m)) { 
-    check_malloc_state(m); 
-    if (is_initialized(m)) { 
-      size_t nfree = SIZE_T_ONE; /* top always free */ 
-      size_t mfree = m->topsize + TOP_FOOT_SIZE; 
-      size_t sum = mfree; 
-      msegmentptr s = &m->seg; 
-      while (s != 0) { 
-        mchunkptr q = align_as_chunk(s->base); 
-        while (segment_holds(s, q) && 
-               q != m->top && q->head != FENCEPOST_HEAD) { 
-          size_t sz = chunksize(q); 
-          sum += sz; 
-          if (!cinuse(q)) { 
-            mfree += sz; 
-            ++nfree; 
-          } 
-          q = next_chunk(q); 
-        } 
-        s = s->next; 
-      } 
- 
-      nm.arena    = sum; 
-      nm.ordblks  = nfree; 
-      nm.hblkhd   = m->footprint - sum; 
-      nm.usmblks  = m->max_footprint; 
-      nm.uordblks = m->footprint - mfree; 
-      nm.fordblks = mfree; 
-      nm.keepcost = m->topsize; 
-    } 
- 
-    POSTACTION(m); 
-  } 
-  return nm; 
-} 
-#endif /* !NO_MALLINFO */ 
- 
-static void internal_malloc_stats(mstate m) { 
-  if (!PREACTION(m)) { 
-    size_t maxfp = 0; 
-    size_t fp = 0; 
-    size_t used = 0; 
-    check_malloc_state(m); 
-    if (is_initialized(m)) { 
-      msegmentptr s = &m->seg; 
-      maxfp = m->max_footprint; 
-      fp = m->footprint; 
-      used = fp - (m->topsize + TOP_FOOT_SIZE); 
- 
-      while (s != 0) { 
-        mchunkptr q = align_as_chunk(s->base); 
-        while (segment_holds(s, q) && 
-               q != m->top && q->head != FENCEPOST_HEAD) { 
-          if (!cinuse(q)) 
-            used -= chunksize(q); 
-          q = next_chunk(q); 
-        } 
-        s = s->next; 
-      } 
-    } 
- 
-    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); 
-    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp)); 
-    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used)); 
- 
-    POSTACTION(m); 
-  } 
-} 
- 
-/* ----------------------- Operations on smallbins ----------------------- */ 
- 
-/* 
-  Various forms of linking and unlinking are defined as macros.  Even 
-  the ones for trees, which are very long but have very short typical 
-  paths.  This is ugly but reduces reliance on inlining support of 
-  compilers. 
-*/ 
- 
-/* Link a free chunk into a smallbin  */ 
-#define insert_small_chunk(M, P, S) {\ 
-  bindex_t I  = small_index(S);\ 
-  mchunkptr B = smallbin_at(M, I);\ 
-  mchunkptr F = B;\ 
-  assert(S >= MIN_CHUNK_SIZE);\ 
-  if (!smallmap_is_marked(M, I))\ 
-    mark_smallmap(M, I);\ 
-  else if (RTCHECK(ok_address(M, B->fd)))\ 
-    F = B->fd;\ 
-  else {\ 
-    CORRUPTION_ERROR_ACTION(M);\ 
-  }\ 
-  B->fd = P;\ 
-  F->bk = P;\ 
-  P->fd = F;\ 
-  P->bk = B;\ 
-} 
- 
-/* Unlink a chunk from a smallbin  */ 
-#define unlink_small_chunk(M, P, S) {\ 
-  mchunkptr F = P->fd;\ 
-  mchunkptr B = P->bk;\ 
-  bindex_t I = small_index(S);\ 
-  assert(P != B);\ 
-  assert(P != F);\ 
-  assert(chunksize(P) == small_index2size(I));\ 
-  if (F == B)\ 
-    clear_smallmap(M, I);\ 
-  else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\ 
-                   (B == smallbin_at(M,I) || ok_address(M, B)))) {\ 
-    F->bk = B;\ 
-    B->fd = F;\ 
-  }\ 
-  else {\ 
-    CORRUPTION_ERROR_ACTION(M);\ 
-  }\ 
-} 
- 
-/* Unlink the first chunk from a smallbin */ 
-#define unlink_first_small_chunk(M, B, P, I) {\ 
-  mchunkptr F = P->fd;\ 
-  assert(P != B);\ 
-  assert(P != F);\ 
-  assert(chunksize(P) == small_index2size(I));\ 
-  if (B == F)\ 
-    clear_smallmap(M, I);\ 
-  else if (RTCHECK(ok_address(M, F))) {\ 
-    B->fd = F;\ 
-    F->bk = B;\ 
-  }\ 
-  else {\ 
-    CORRUPTION_ERROR_ACTION(M);\ 
-  }\ 
-} 
- 
-/* Replace dv node, binning the old one */ 
-/* Used only when dvsize known to be small */ 
-#define replace_dv(M, P, S) {\ 
-  size_t DVS = M->dvsize;\ 
-  if (DVS != 0) {\ 
-    mchunkptr DV = M->dv;\ 
-    assert(is_small(DVS));\ 
-    insert_small_chunk(M, DV, DVS);\ 
-  }\ 
-  M->dvsize = S;\ 
-  M->dv = P;\ 
-} 
- 
-/* ------------------------- Operations on trees ------------------------- */ 
- 
-/* Insert chunk into tree */ 
-#define insert_large_chunk(M, X, S) {\ 
-  tbinptr* H;\ 
-  bindex_t I;\ 
-  compute_tree_index(S, I);\ 
-  H = treebin_at(M, I);\ 
-  X->index = I;\ 
-  X->child[0] = X->child[1] = 0;\ 
-  if (!treemap_is_marked(M, I)) {\ 
-    mark_treemap(M, I);\ 
-    *H = X;\ 
-    X->parent = (tchunkptr)H;\ 
-    X->fd = X->bk = X;\ 
-  }\ 
-  else {\ 
-    tchunkptr T = *H;\ 
-    size_t K = S << leftshift_for_tree_index(I);\ 
-    for (;;) {\ 
-      if (chunksize(T) != S) {\ 
-        tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ 
-        K <<= 1;\ 
-        if (*C != 0)\ 
-          T = *C;\ 
-        else if (RTCHECK(ok_address(M, C))) {\ 
-          *C = X;\ 
-          X->parent = T;\ 
-          X->fd = X->bk = X;\ 
-          break;\ 
-        }\ 
-        else {\ 
-          CORRUPTION_ERROR_ACTION(M);\ 
-          break;\ 
-        }\ 
-      }\ 
-      else {\ 
-        tchunkptr F = T->fd;\ 
-        if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ 
-          T->fd = F->bk = X;\ 
-          X->fd = F;\ 
-          X->bk = T;\ 
-          X->parent = 0;\ 
-          break;\ 
-        }\ 
-        else {\ 
-          CORRUPTION_ERROR_ACTION(M);\ 
-          break;\ 
-        }\ 
-      }\ 
-    }\ 
-  }\ 
-} 
- 
-/* 
-  Unlink steps: 
- 
-  1. If x is a chained node, unlink it from its same-sized fd/bk links 
-     and choose its bk node as its replacement. 
-  2. If x was the last node of its size, but not a leaf node, it must 
-     be replaced with a leaf node (not merely one with an open left or 
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  __asm__("bsfl %1,%0\n\t" : "=r" (J) : "rm" (X));\
+  I = (bindex_t)J;\
+}
+
+#else /* GNUC */
+#if  USE_BUILTIN_FFS
+#define compute_bit2idx(X, I) I = ffs(X)-1
+
+#else /* USE_BUILTIN_FFS */
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int Y = X - 1;\
+  unsigned int K = Y >> (16-4) & 16;\
+  unsigned int N = K;        Y >>= K;\
+  N += K = Y >> (8-3) &  8;  Y >>= K;\
+  N += K = Y >> (4-2) &  4;  Y >>= K;\
+  N += K = Y >> (2-1) &  2;  Y >>= K;\
+  N += K = Y >> (1-0) &  1;  Y >>= K;\
+  I = (bindex_t)(N + Y);\
+}
+#endif /* USE_BUILTIN_FFS */
+#endif /* GNUC */
+
+/* isolate the least set bit of a bitmap */
+#define least_bit(x)         ((x) & -(x))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x)         ((x<<1) | -(x<<1))
+
+/* mask with all bits to left of or equal to least bit of x on */
+#define same_or_left_bits(x) ((x) | -(x))
+
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+  For security, the main invariant is that malloc/free/etc never
+  writes to a static address other than malloc_state, unless static
+  malloc_state itself has been corrupted, which cannot occur via
+  malloc (because of these checks). In essence this means that we
+  believe all pointers, sizes, maps etc held in malloc_state, but
+  check all of those linked or offsetted from other embedded data
+  structures.  These checks are interspersed with main code in a way
+  that tends to minimize their run-time cost.
+
+  When FOOTERS is defined, in addition to range checking, we also
+  verify footer fields of inuse chunks, which can be used guarantee
+  that the mstate controlling malloc/free is intact.  This is a
+  streamlined version of the approach described by William Robertson
+  et al in "Run-time Detection of Heap-based Overflows" LISA'03
+  http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+  of an inuse chunk holds the xor of its mstate and a random seed,
+  that is checked upon calls to free() and realloc().  This is
+  (probablistically) unguessable from outside the program, but can be
+  computed by any code successfully malloc'ing any chunk, so does not
+  itself provide protection against code that has already broken
+  security through some other means.  Unlike Robertson et al, we
+  always dynamically check addresses of all offset chunks (previous,
+  next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+#if !INSECURE
+/* Check if address a is at least as high as any from MORECORE or MMAP */
+#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
+/* Check if address of next chunk n is higher than base chunk p */
+#define ok_next(p, n)    ((char*)(p) < (char*)(n))
+/* Check if p has its cinuse bit on */
+#define ok_cinuse(p)     cinuse(p)
+/* Check if p has its pinuse bit on */
+#define ok_pinuse(p)     pinuse(p)
+
+#else /* !INSECURE */
+#define ok_address(M, a) (1)
+#define ok_next(b, n)    (1)
+#define ok_cinuse(p)     (1)
+#define ok_pinuse(p)     (1)
+#endif /* !INSECURE */
+
+#if (FOOTERS && !INSECURE)
+/* Check if (alleged) mstate m has expected magic field */
+#define ok_magic(M)      ((M)->magic == mparams.magic)
+#else  /* (FOOTERS && !INSECURE) */
+#define ok_magic(M)      (1)
+#endif /* (FOOTERS && !INSECURE) */
+
+
+/* In gcc, use __builtin_expect to minimize impact of checks */
+#if !INSECURE
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define RTCHECK(e)  __builtin_expect(e, 1)
+#else /* GNUC */
+#define RTCHECK(e)  (e)
+#endif /* GNUC */
+#else /* !INSECURE */
+#define RTCHECK(e)  (1)
+#endif /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+#if !FOOTERS
+
+#define mark_inuse_foot(M,p,s)
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+#else /* FOOTERS */
+
+/* Set foot of inuse chunk to be xor of mstate and seed */
+#define mark_inuse_foot(M,p,s)\
+  (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
+
+#define get_mstate_for(p)\
+  ((mstate)(((mchunkptr)((char*)(p) +\
+    (chunksize(p))))->prev_foot ^ mparams.magic))
+
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
+  mark_inuse_foot(M,p,s))
+
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
+ mark_inuse_foot(M,p,s))
+
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  mark_inuse_foot(M, p, s))
+
+#endif /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+/* Initialize mparams */
+static int init_mparams(void) {
+  if (mparams.page_size == 0) {
+    size_t s;
+
+    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
+#else  /* MORECORE_CONTIGUOUS */
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
+#endif /* MORECORE_CONTIGUOUS */
+
+#if (FOOTERS && !INSECURE)
+    {
+#if USE_DEV_RANDOM
+      int fd;
+      unsigned char buf[sizeof(size_t)];
+      /* Try to use /dev/urandom, else fall back on using time */
+      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+          read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+        s = *((size_t *) buf);
+        close(fd);
+      }
+      else
+#endif /* USE_DEV_RANDOM */
+        s = (size_t)(time(0) ^ (size_t)0x55555555U);
+
+      s |= (size_t)8U;    /* ensure nonzero */
+      s &= ~(size_t)7U;   /* improve chances of fault for bad values */
+
+    }
+#else /* (FOOTERS && !INSECURE) */
+    s = (size_t)0x58585858U;
+#endif /* (FOOTERS && !INSECURE) */
+    ACQUIRE_MAGIC_INIT_LOCK();
+    if (mparams.magic == 0) {
+      mparams.magic = s;
+      /* Set up lock for main malloc area */
+      INITIAL_LOCK(&gm->mutex);
+      gm->mflags = mparams.default_mflags;
+    }
+    RELEASE_MAGIC_INIT_LOCK();
+
+#if !defined(WIN32) && !defined(__OS2__)
+    mparams.page_size = malloc_getpagesize;
+    mparams.granularity = ((DEFAULT_GRANULARITY != 0)?
+                           DEFAULT_GRANULARITY : mparams.page_size);
+#elif defined (__OS2__)
+ /* if low-memory is used, os2munmap() would break
+    if it were anything other than 64k */
+    mparams.page_size = 4096u;
+    mparams.granularity = 65536u;
+#else /* WIN32 */
+    {
+      SYSTEM_INFO system_info;
+      GetSystemInfo(&system_info);
+      mparams.page_size = system_info.dwPageSize;
+      mparams.granularity = system_info.dwAllocationGranularity;
+    }
+#endif /* WIN32 */
+
+    /* Sanity-check configuration:
+       size_t must be unsigned and as wide as pointer type.
+       ints must be at least 4 bytes.
+       alignment must be at least 8.
+       Alignment, min chunk size, and page size must all be powers of 2.
+    */
+    if ((sizeof(size_t) != sizeof(char*)) ||
+        (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
+        (sizeof(int) < 4)  ||
+        (MALLOC_ALIGNMENT < (size_t)8U) ||
+        ((MALLOC_ALIGNMENT    & (MALLOC_ALIGNMENT-SIZE_T_ONE))    != 0) ||
+        ((MCHUNK_SIZE         & (MCHUNK_SIZE-SIZE_T_ONE))         != 0) ||
+        ((mparams.granularity & (mparams.granularity-SIZE_T_ONE)) != 0) ||
+        ((mparams.page_size   & (mparams.page_size-SIZE_T_ONE))   != 0))
+      ABORT;
+  }
+  return 0;
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+  size_t val = (size_t)value;
+  init_mparams();
+  switch(param_number) {
+  case M_TRIM_THRESHOLD:
+    mparams.trim_threshold = val;
+    return 1;
+  case M_GRANULARITY:
+    if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
+      mparams.granularity = val;
+      return 1;
+    }
+    else
+      return 0;
+  case M_MMAP_THRESHOLD:
+    mparams.mmap_threshold = val;
+    return 1;
+  default:
+    return 0;
+  }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc  */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+  msegmentptr sp = segment_holding(m, (char*)p);
+  size_t  sz = chunksize(p);
+  assert(sp != 0);
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(sz == m->topsize);
+  assert(sz > 0);
+  assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
+  assert(pinuse(p));
+  assert(!next_pinuse(p));
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+  size_t  sz = chunksize(p);
+  size_t len = (sz + (p->prev_foot & ~IS_MMAPPED_BIT) + MMAP_FOOT_PAD);
+  assert(is_mmapped(p));
+  assert(use_mmap(m));
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(!is_small(sz));
+  assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
+  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+  assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+  do_check_any_chunk(m, p);
+  assert(cinuse(p));
+  assert(next_pinuse(p));
+  /* If not pinuse and not mmapped, previous chunk has OK offset */
+  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+  if (is_mmapped(p))
+    do_check_mmapped_chunk(m, p);
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+  size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT);
+  mchunkptr next = chunk_plus_offset(p, sz);
+  do_check_any_chunk(m, p);
+  assert(!cinuse(p));
+  assert(!next_pinuse(p));
+  assert (!is_mmapped(p));
+  if (p != m->dv && p != m->top) {
+    if (sz >= MIN_CHUNK_SIZE) {
+      assert((sz & CHUNK_ALIGN_MASK) == 0);
+      assert(is_aligned(chunk2mem(p)));
+      assert(next->prev_foot == sz);
+      assert(pinuse(p));
+      assert (next == m->top || cinuse(next));
+      assert(p->fd->bk == p);
+      assert(p->bk->fd == p);
+    }
+    else  /* markers are always of size SIZE_T_SIZE */
+      assert(sz == SIZE_T_SIZE);
+  }
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT);
+    do_check_inuse_chunk(m, p);
+    assert((sz & CHUNK_ALIGN_MASK) == 0);
+    assert(sz >= MIN_CHUNK_SIZE);
+    assert(sz >= s);
+    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+  }
+}
+
+/* Check a tree and its subtrees.  */
+static void do_check_tree(mstate m, tchunkptr t) {
+  tchunkptr head = 0;
+  tchunkptr u = t;
+  bindex_t tindex = t->index;
+  size_t tsize = chunksize(t);
+  bindex_t idx;
+  compute_tree_index(tsize, idx);
+  assert(tindex == idx);
+  assert(tsize >= MIN_LARGE_SIZE);
+  assert(tsize >= minsize_for_tree_index(idx));
+  assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
+
+  do { /* traverse through chain of same-sized nodes */
+    do_check_any_chunk(m, ((mchunkptr)u));
+    assert(u->index == tindex);
+    assert(chunksize(u) == tsize);
+    assert(!cinuse(u));
+    assert(!next_pinuse(u));
+    assert(u->fd->bk == u);
+    assert(u->bk->fd == u);
+    if (u->parent == 0) {
+      assert(u->child[0] == 0);
+      assert(u->child[1] == 0);
+    }
+    else {
+      assert(head == 0); /* only one node on chain has parent */
+      head = u;
+      assert(u->parent != u);
+      assert (u->parent->child[0] == u ||
+              u->parent->child[1] == u ||
+              *((tbinptr*)(u->parent)) == u);
+      if (u->child[0] != 0) {
+        assert(u->child[0]->parent == u);
+        assert(u->child[0] != u);
+        do_check_tree(m, u->child[0]);
+      }
+      if (u->child[1] != 0) {
+        assert(u->child[1]->parent == u);
+        assert(u->child[1] != u);
+        do_check_tree(m, u->child[1]);
+      }
+      if (u->child[0] != 0 && u->child[1] != 0) {
+        assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+      }
+    }
+    u = u->fd;
+  } while (u != t);
+  assert(head != 0);
+}
+
+/*  Check all the chunks in a treebin.  */
+static void do_check_treebin(mstate m, bindex_t i) {
+  tbinptr* tb = treebin_at(m, i);
+  tchunkptr t = *tb;
+  int empty = (m->treemap & (1U << i)) == 0;
+  if (t == 0)
+    assert(empty);
+  if (!empty)
+    do_check_tree(m, t);
+}
+
+/*  Check all the chunks in a smallbin.  */
+static void do_check_smallbin(mstate m, bindex_t i) {
+  sbinptr b = smallbin_at(m, i);
+  mchunkptr p = b->bk;
+  unsigned int empty = (m->smallmap & (1U << i)) == 0;
+  if (p == b)
+    assert(empty);
+  if (!empty) {
+    for (; p != b; p = p->bk) {
+      size_t size = chunksize(p);
+      mchunkptr q;
+      /* each chunk claims to be free */
+      do_check_free_chunk(m, p);
+      /* chunk belongs in bin */
+      assert(small_index(size) == i);
+      assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+      /* chunk is followed by an inuse chunk */
+      q = next_chunk(p);
+      if (q->head != FENCEPOST_HEAD)
+        do_check_inuse_chunk(m, q);
+    }
+  }
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+  size_t size = chunksize(x);
+  if (is_small(size)) {
+    bindex_t sidx = small_index(size);
+    sbinptr b = smallbin_at(m, sidx);
+    if (smallmap_is_marked(m, sidx)) {
+      mchunkptr p = b;
+      do {
+        if (p == x)
+          return 1;
+      } while ((p = p->fd) != b);
+    }
+  }
+  else {
+    bindex_t tidx;
+    compute_tree_index(size, tidx);
+    if (treemap_is_marked(m, tidx)) {
+      tchunkptr t = *treebin_at(m, tidx);
+      size_t sizebits = size << leftshift_for_tree_index(tidx);
+      while (t != 0 && chunksize(t) != size) {
+        t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+        sizebits <<= 1;
+      }
+      if (t != 0) {
+        tchunkptr u = t;
+        do {
+          if (u == (tchunkptr)x)
+            return 1;
+        } while ((u = u->fd) != t);
+      }
+    }
+  }
+  return 0;
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+  size_t sum = 0;
+  if (is_initialized(m)) {
+    msegmentptr s = &m->seg;
+    sum += m->topsize + TOP_FOOT_SIZE;
+    while (s != 0) {
+      mchunkptr q = align_as_chunk(s->base);
+      mchunkptr lastq = 0;
+      assert(pinuse(q));
+      while (segment_holds(s, q) &&
+             q != m->top && q->head != FENCEPOST_HEAD) {
+        sum += chunksize(q);
+        if (cinuse(q)) {
+          assert(!bin_find(m, q));
+          do_check_inuse_chunk(m, q);
+        }
+        else {
+          assert(q == m->dv || bin_find(m, q));
+          assert(lastq == 0 || cinuse(lastq)); /* Not 2 consecutive free */
+          do_check_free_chunk(m, q);
+        }
+        lastq = q;
+        q = next_chunk(q);
+      }
+      s = s->next;
+    }
+  }
+  return sum;
+}
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+  bindex_t i;
+  size_t total;
+  /* check bins */
+  for (i = 0; i < NSMALLBINS; ++i)
+    do_check_smallbin(m, i);
+  for (i = 0; i < NTREEBINS; ++i)
+    do_check_treebin(m, i);
+
+  if (m->dvsize != 0) { /* check dv chunk */
+    do_check_any_chunk(m, m->dv);
+    assert(m->dvsize == chunksize(m->dv));
+    assert(m->dvsize >= MIN_CHUNK_SIZE);
+    assert(bin_find(m, m->dv) == 0);
+  }
+
+  if (m->top != 0) {   /* check top chunk */
+    do_check_top_chunk(m, m->top);
+    assert(m->topsize == chunksize(m->top));
+    assert(m->topsize > 0);
+    assert(bin_find(m, m->top) == 0);
+  }
+
+  total = traverse_and_check(m);
+  assert(total <= m->footprint);
+  assert(m->footprint <= m->max_footprint);
+}
+#endif /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+  struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  if (!PREACTION(m)) {
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      size_t nfree = SIZE_T_ONE; /* top always free */
+      size_t mfree = m->topsize + TOP_FOOT_SIZE;
+      size_t sum = mfree;
+      msegmentptr s = &m->seg;
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          size_t sz = chunksize(q);
+          sum += sz;
+          if (!cinuse(q)) {
+            mfree += sz;
+            ++nfree;
+          }
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+
+      nm.arena    = sum;
+      nm.ordblks  = nfree;
+      nm.hblkhd   = m->footprint - sum;
+      nm.usmblks  = m->max_footprint;
+      nm.uordblks = m->footprint - mfree;
+      nm.fordblks = mfree;
+      nm.keepcost = m->topsize;
+    }
+
+    POSTACTION(m);
+  }
+  return nm;
+}
+#endif /* !NO_MALLINFO */
+
+static void internal_malloc_stats(mstate m) {
+  if (!PREACTION(m)) {
+    size_t maxfp = 0;
+    size_t fp = 0;
+    size_t used = 0;
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      msegmentptr s = &m->seg;
+      maxfp = m->max_footprint;
+      fp = m->footprint;
+      used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          if (!cinuse(q))
+            used -= chunksize(q);
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+    }
+
+    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
+    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+
+    POSTACTION(m);
+  }
+}
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+  Various forms of linking and unlinking are defined as macros.  Even
+  the ones for trees, which are very long but have very short typical
+  paths.  This is ugly but reduces reliance on inlining support of
+  compilers.
+*/
+
+/* Link a free chunk into a smallbin  */
+#define insert_small_chunk(M, P, S) {\
+  bindex_t I  = small_index(S);\
+  mchunkptr B = smallbin_at(M, I);\
+  mchunkptr F = B;\
+  assert(S >= MIN_CHUNK_SIZE);\
+  if (!smallmap_is_marked(M, I))\
+    mark_smallmap(M, I);\
+  else if (RTCHECK(ok_address(M, B->fd)))\
+    F = B->fd;\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+  B->fd = P;\
+  F->bk = P;\
+  P->fd = F;\
+  P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin  */
+#define unlink_small_chunk(M, P, S) {\
+  mchunkptr F = P->fd;\
+  mchunkptr B = P->bk;\
+  bindex_t I = small_index(S);\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (F == B)\
+    clear_smallmap(M, I);\
+  else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\
+                   (B == smallbin_at(M,I) || ok_address(M, B)))) {\
+    F->bk = B;\
+    B->fd = F;\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+  mchunkptr F = P->fd;\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (B == F)\
+    clear_smallmap(M, I);\
+  else if (RTCHECK(ok_address(M, F))) {\
+    B->fd = F;\
+    F->bk = B;\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+  size_t DVS = M->dvsize;\
+  if (DVS != 0) {\
+    mchunkptr DV = M->dv;\
+    assert(is_small(DVS));\
+    insert_small_chunk(M, DV, DVS);\
+  }\
+  M->dvsize = S;\
+  M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+  tbinptr* H;\
+  bindex_t I;\
+  compute_tree_index(S, I);\
+  H = treebin_at(M, I);\
+  X->index = I;\
+  X->child[0] = X->child[1] = 0;\
+  if (!treemap_is_marked(M, I)) {\
+    mark_treemap(M, I);\
+    *H = X;\
+    X->parent = (tchunkptr)H;\
+    X->fd = X->bk = X;\
+  }\
+  else {\
+    tchunkptr T = *H;\
+    size_t K = S << leftshift_for_tree_index(I);\
+    for (;;) {\
+      if (chunksize(T) != S) {\
+        tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+        K <<= 1;\
+        if (*C != 0)\
+          T = *C;\
+        else if (RTCHECK(ok_address(M, C))) {\
+          *C = X;\
+          X->parent = T;\
+          X->fd = X->bk = X;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+      else {\
+        tchunkptr F = T->fd;\
+        if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
+          T->fd = F->bk = X;\
+          X->fd = F;\
+          X->bk = T;\
+          X->parent = 0;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+    }\
+  }\
+}
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
      right), to make sure that lefts and rights of descendants
      correspond properly to bit masks.  We use the rightmost descendant
-     of x.  We could use any other leaf, but this is easy to locate and 
-     tends to counteract removal of leftmosts elsewhere, and so keeps 
-     paths shorter than minimally guaranteed.  This doesn't loop much 
-     because on average a node in a tree is near the bottom. 
-  3. If x is the base of a chain (i.e., has parent links) relink 
-     x's parent and children to x's replacement (or null if none). 
-*/ 
- 
-#define unlink_large_chunk(M, X) {\ 
-  tchunkptr XP = X->parent;\ 
-  tchunkptr R;\ 
-  if (X->bk != X) {\ 
-    tchunkptr F = X->fd;\ 
-    R = X->bk;\ 
-    if (RTCHECK(ok_address(M, F))) {\ 
-      F->bk = R;\ 
-      R->fd = F;\ 
-    }\ 
-    else {\ 
-      CORRUPTION_ERROR_ACTION(M);\ 
-    }\ 
-  }\ 
-  else {\ 
-    tchunkptr* RP;\ 
-    if (((R = *(RP = &(X->child[1]))) != 0) ||\ 
-        ((R = *(RP = &(X->child[0]))) != 0)) {\ 
-      tchunkptr* CP;\ 
-      while ((*(CP = &(R->child[1])) != 0) ||\ 
-             (*(CP = &(R->child[0])) != 0)) {\ 
-        R = *(RP = CP);\ 
-      }\ 
-      if (RTCHECK(ok_address(M, RP)))\ 
-        *RP = 0;\ 
-      else {\ 
-        CORRUPTION_ERROR_ACTION(M);\ 
-      }\ 
-    }\ 
-  }\ 
-  if (XP != 0) {\ 
-    tbinptr* H = treebin_at(M, X->index);\ 
-    if (X == *H) {\ 
-      if ((*H = R) == 0) \ 
-        clear_treemap(M, X->index);\ 
-    }\ 
-    else if (RTCHECK(ok_address(M, XP))) {\ 
-      if (XP->child[0] == X) \ 
-        XP->child[0] = R;\ 
-      else \ 
-        XP->child[1] = R;\ 
-    }\ 
-    else\ 
-      CORRUPTION_ERROR_ACTION(M);\ 
-    if (R != 0) {\ 
-      if (RTCHECK(ok_address(M, R))) {\ 
-        tchunkptr C0, C1;\ 
-        R->parent = XP;\ 
-        if ((C0 = X->child[0]) != 0) {\ 
-          if (RTCHECK(ok_address(M, C0))) {\ 
-            R->child[0] = C0;\ 
-            C0->parent = R;\ 
-          }\ 
-          else\ 
-            CORRUPTION_ERROR_ACTION(M);\ 
-        }\ 
-        if ((C1 = X->child[1]) != 0) {\ 
-          if (RTCHECK(ok_address(M, C1))) {\ 
-            R->child[1] = C1;\ 
-            C1->parent = R;\ 
-          }\ 
-          else\ 
-            CORRUPTION_ERROR_ACTION(M);\ 
-        }\ 
-      }\ 
-      else\ 
-        CORRUPTION_ERROR_ACTION(M);\ 
-    }\ 
-  }\ 
-} 
- 
-/* Relays to large vs small bin operations */ 
- 
-#define insert_chunk(M, P, S)\ 
-  if (is_small(S)) insert_small_chunk(M, P, S)\ 
-  else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } 
- 
-#define unlink_chunk(M, P, S)\ 
-  if (is_small(S)) unlink_small_chunk(M, P, S)\ 
-  else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } 
- 
- 
-/* Relays to internal calls to malloc/free from realloc, memalign etc */ 
- 
-#if ONLY_MSPACES 
-#define internal_malloc(m, b) mspace_malloc(m, b) 
-#define internal_free(m, mem) mspace_free(m,mem); 
-#else /* ONLY_MSPACES */ 
-#if MSPACES 
-#define internal_malloc(m, b)\ 
-   (m == gm)? dlmalloc(b) : mspace_malloc(m, b) 
-#define internal_free(m, mem)\ 
-   if (m == gm) dlfree(mem); else mspace_free(m,mem); 
-#else /* MSPACES */ 
-#define internal_malloc(m, b) dlmalloc(b) 
-#define internal_free(m, mem) dlfree(mem) 
-#endif /* MSPACES */ 
-#endif /* ONLY_MSPACES */ 
- 
-/* -----------------------  Direct-mmapping chunks ----------------------- */ 
- 
-/* 
-  Directly mmapped chunks are set up with an offset to the start of 
-  the mmapped region stored in the prev_foot field of the chunk. This 
-  allows reconstruction of the required argument to MUNMAP when freed, 
-  and also allows adjustment of the returned chunk to meet alignment 
-  requirements (especially in memalign).  There is also enough space 
-  allocated to hold a fake next chunk of size SIZE_T_SIZE to maintain 
-  the PINUSE bit so frees can be checked. 
-*/ 
- 
-/* Malloc using mmap */ 
-static void* mmap_alloc(mstate m, size_t nb) { 
-  size_t mmsize = granularity_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); 
-  if (mmsize > nb) {     /* Check for wrap around 0 */ 
-    char* mm = (char*)(DIRECT_MMAP(mmsize)); 
-    if (mm != CMFAIL) { 
-      size_t offset = align_offset(chunk2mem(mm)); 
-      size_t psize = mmsize - offset - MMAP_FOOT_PAD; 
-      mchunkptr p = (mchunkptr)(mm + offset); 
-      p->prev_foot = offset | IS_MMAPPED_BIT; 
-      (p)->head = (psize|CINUSE_BIT); 
-      mark_inuse_foot(m, p, psize); 
-      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; 
-      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; 
- 
-      if (mm < m->least_addr) 
-        m->least_addr = mm; 
-      if ((m->footprint += mmsize) > m->max_footprint) 
-        m->max_footprint = m->footprint; 
-      assert(is_aligned(chunk2mem(p))); 
-      check_mmapped_chunk(m, p); 
-      return chunk2mem(p); 
-    } 
-  } 
-  return 0; 
-} 
- 
-/* Realloc using mmap */ 
-static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { 
-  size_t oldsize = chunksize(oldp); 
-  if (is_small(nb)) /* Can't shrink mmap regions below small size */ 
-    return 0; 
-  /* Keep old chunk if big enough but not too big */ 
-  if (oldsize >= nb + SIZE_T_SIZE && 
-      (oldsize - nb) <= (mparams.granularity << 1)) 
-    return oldp; 
-  else { 
-    size_t offset = oldp->prev_foot & ~IS_MMAPPED_BIT; 
-    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; 
-    size_t newmmsize = granularity_align(nb + SIX_SIZE_T_SIZES + 
-                                         CHUNK_ALIGN_MASK); 
-    char* cp = (char*)CALL_MREMAP((char*)oldp - offset, 
-                                  oldmmsize, newmmsize, 1); 
-    if (cp != CMFAIL) { 
-      mchunkptr newp = (mchunkptr)(cp + offset); 
-      size_t psize = newmmsize - offset - MMAP_FOOT_PAD; 
-      newp->head = (psize|CINUSE_BIT); 
-      mark_inuse_foot(m, newp, psize); 
-      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; 
-      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; 
- 
-      if (cp < m->least_addr) 
-        m->least_addr = cp; 
-      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) 
-        m->max_footprint = m->footprint; 
-      check_mmapped_chunk(m, newp); 
-      return newp; 
-    } 
-  } 
-  return 0; 
-} 
- 
-/* -------------------------- mspace management -------------------------- */ 
- 
-/* Initialize top chunk and its size */ 
-static void init_top(mstate m, mchunkptr p, size_t psize) { 
-  /* Ensure alignment */ 
-  size_t offset = align_offset(chunk2mem(p)); 
-  p = (mchunkptr)((char*)p + offset); 
-  psize -= offset; 
- 
-  m->top = p; 
-  m->topsize = psize; 
-  p->head = psize | PINUSE_BIT; 
-  /* set size of fake trailing chunk holding overhead space only once */ 
-  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; 
-  m->trim_check = mparams.trim_threshold; /* reset on each update */ 
-} 
- 
-/* Initialize bins for a new mstate that is otherwise zeroed out */ 
-static void init_bins(mstate m) { 
-  /* Establish circular links for smallbins */ 
-  bindex_t i; 
-  for (i = 0; i < NSMALLBINS; ++i) { 
-    sbinptr bin = smallbin_at(m,i); 
-    bin->fd = bin->bk = bin; 
-  } 
-} 
- 
-#if PROCEED_ON_ERROR 
- 
-/* default corruption action */ 
-static void reset_on_error(mstate m) { 
-  int i; 
-  ++malloc_corruption_error_count; 
-  /* Reinitialize fields to forget about all memory */ 
-  m->smallbins = m->treebins = 0; 
-  m->dvsize = m->topsize = 0; 
-  m->seg.base = 0; 
-  m->seg.size = 0; 
-  m->seg.next = 0; 
-  m->top = m->dv = 0; 
-  for (i = 0; i < NTREEBINS; ++i) 
-    *treebin_at(m, i) = 0; 
-  init_bins(m); 
-} 
-#endif /* PROCEED_ON_ERROR */ 
- 
-/* Allocate chunk and prepend remainder with chunk in successor base. */ 
-static void* prepend_alloc(mstate m, char* newbase, char* oldbase, 
-                           size_t nb) { 
-  mchunkptr p = align_as_chunk(newbase); 
-  mchunkptr oldfirst = align_as_chunk(oldbase); 
-  size_t psize = (char*)oldfirst - (char*)p; 
-  mchunkptr q = chunk_plus_offset(p, nb); 
-  size_t qsize = psize - nb; 
-  set_size_and_pinuse_of_inuse_chunk(m, p, nb); 
- 
-  assert((char*)oldfirst > (char*)q); 
-  assert(pinuse(oldfirst)); 
-  assert(qsize >= MIN_CHUNK_SIZE); 
- 
-  /* consolidate remainder with first chunk of old base */ 
-  if (oldfirst == m->top) { 
-    size_t tsize = m->topsize += qsize; 
-    m->top = q; 
-    q->head = tsize | PINUSE_BIT; 
-    check_top_chunk(m, q); 
-  } 
-  else if (oldfirst == m->dv) { 
-    size_t dsize = m->dvsize += qsize; 
-    m->dv = q; 
-    set_size_and_pinuse_of_free_chunk(q, dsize); 
-  } 
-  else { 
-    if (!cinuse(oldfirst)) { 
-      size_t nsize = chunksize(oldfirst); 
-      unlink_chunk(m, oldfirst, nsize); 
-      oldfirst = chunk_plus_offset(oldfirst, nsize); 
-      qsize += nsize; 
-    } 
-    set_free_with_pinuse(q, qsize, oldfirst); 
-    insert_chunk(m, q, qsize); 
-    check_free_chunk(m, q); 
-  } 
- 
-  check_malloced_chunk(m, chunk2mem(p), nb); 
-  return chunk2mem(p); 
-} 
- 
- 
-/* Add a segment to hold a new noncontiguous region */ 
-static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { 
-  /* Determine locations and sizes of segment, fenceposts, old top */ 
-  char* old_top = (char*)m->top; 
-  msegmentptr oldsp = segment_holding(m, old_top); 
-  char* old_end = oldsp->base + oldsp->size; 
-  size_t ssize = pad_request(sizeof(struct malloc_segment)); 
-  char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); 
-  size_t offset = align_offset(chunk2mem(rawsp)); 
-  char* asp = rawsp + offset; 
-  char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; 
-  mchunkptr sp = (mchunkptr)csp; 
-  msegmentptr ss = (msegmentptr)(chunk2mem(sp)); 
-  mchunkptr tnext = chunk_plus_offset(sp, ssize); 
-  mchunkptr p = tnext; 
-  int nfences = 0; 
- 
-  /* reset top to new space */ 
-  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); 
- 
-  /* Set up segment record */ 
-  assert(is_aligned(ss)); 
-  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); 
-  *ss = m->seg; /* Push current record */ 
-  m->seg.base = tbase; 
-  m->seg.size = tsize; 
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+#define unlink_large_chunk(M, X) {\
+  tchunkptr XP = X->parent;\
+  tchunkptr R;\
+  if (X->bk != X) {\
+    tchunkptr F = X->fd;\
+    R = X->bk;\
+    if (RTCHECK(ok_address(M, F))) {\
+      F->bk = R;\
+      R->fd = F;\
+    }\
+    else {\
+      CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+  else {\
+    tchunkptr* RP;\
+    if (((R = *(RP = &(X->child[1]))) != 0) ||\
+        ((R = *(RP = &(X->child[0]))) != 0)) {\
+      tchunkptr* CP;\
+      while ((*(CP = &(R->child[1])) != 0) ||\
+             (*(CP = &(R->child[0])) != 0)) {\
+        R = *(RP = CP);\
+      }\
+      if (RTCHECK(ok_address(M, RP)))\
+        *RP = 0;\
+      else {\
+        CORRUPTION_ERROR_ACTION(M);\
+      }\
+    }\
+  }\
+  if (XP != 0) {\
+    tbinptr* H = treebin_at(M, X->index);\
+    if (X == *H) {\
+      if ((*H = R) == 0) \
+        clear_treemap(M, X->index);\
+    }\
+    else if (RTCHECK(ok_address(M, XP))) {\
+      if (XP->child[0] == X) \
+        XP->child[0] = R;\
+      else \
+        XP->child[1] = R;\
+    }\
+    else\
+      CORRUPTION_ERROR_ACTION(M);\
+    if (R != 0) {\
+      if (RTCHECK(ok_address(M, R))) {\
+        tchunkptr C0, C1;\
+        R->parent = XP;\
+        if ((C0 = X->child[0]) != 0) {\
+          if (RTCHECK(ok_address(M, C0))) {\
+            R->child[0] = C0;\
+            C0->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+        if ((C1 = X->child[1]) != 0) {\
+          if (RTCHECK(ok_address(M, C1))) {\
+            R->child[1] = C1;\
+            C1->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+      }\
+      else\
+        CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+  if (is_small(S)) insert_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+  if (is_small(S)) unlink_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+#if ONLY_MSPACES
+#define internal_malloc(m, b) mspace_malloc(m, b)
+#define internal_free(m, mem) mspace_free(m,mem);
+#else /* ONLY_MSPACES */
+#if MSPACES
+#define internal_malloc(m, b)\
+   (m == gm)? dlmalloc(b) : mspace_malloc(m, b)
+#define internal_free(m, mem)\
+   if (m == gm) dlfree(mem); else mspace_free(m,mem);
+#else /* MSPACES */
+#define internal_malloc(m, b) dlmalloc(b)
+#define internal_free(m, mem) dlfree(mem)
+#endif /* MSPACES */
+#endif /* ONLY_MSPACES */
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).  There is also enough space
+  allocated to hold a fake next chunk of size SIZE_T_SIZE to maintain
+  the PINUSE bit so frees can be checked.
+*/
+
+/* Malloc using mmap */
+static void* mmap_alloc(mstate m, size_t nb) {
+  size_t mmsize = granularity_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  if (mmsize > nb) {     /* Check for wrap around 0 */
+    char* mm = (char*)(DIRECT_MMAP(mmsize));
+    if (mm != CMFAIL) {
+      size_t offset = align_offset(chunk2mem(mm));
+      size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+      mchunkptr p = (mchunkptr)(mm + offset);
+      p->prev_foot = offset | IS_MMAPPED_BIT;
+      (p)->head = (psize|CINUSE_BIT);
+      mark_inuse_foot(m, p, psize);
+      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+
+      if (mm < m->least_addr)
+        m->least_addr = mm;
+      if ((m->footprint += mmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      assert(is_aligned(chunk2mem(p)));
+      check_mmapped_chunk(m, p);
+      return chunk2mem(p);
+    }
+  }
+  return 0;
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) {
+  size_t oldsize = chunksize(oldp);
+  if (is_small(nb)) /* Can't shrink mmap regions below small size */
+    return 0;
+  /* Keep old chunk if big enough but not too big */
+  if (oldsize >= nb + SIZE_T_SIZE &&
+      (oldsize - nb) <= (mparams.granularity << 1))
+    return oldp;
+  else {
+    size_t offset = oldp->prev_foot & ~IS_MMAPPED_BIT;
+    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+    size_t newmmsize = granularity_align(nb + SIX_SIZE_T_SIZES +
+                                         CHUNK_ALIGN_MASK);
+    char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+                                  oldmmsize, newmmsize, 1);
+    if (cp != CMFAIL) {
+      mchunkptr newp = (mchunkptr)(cp + offset);
+      size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+      newp->head = (psize|CINUSE_BIT);
+      mark_inuse_foot(m, newp, psize);
+      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+
+      if (cp < m->least_addr)
+        m->least_addr = cp;
+      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      check_mmapped_chunk(m, newp);
+      return newp;
+    }
+  }
+  return 0;
+}
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+  /* Ensure alignment */
+  size_t offset = align_offset(chunk2mem(p));
+  p = (mchunkptr)((char*)p + offset);
+  psize -= offset;
+
+  m->top = p;
+  m->topsize = psize;
+  p->head = psize | PINUSE_BIT;
+  /* set size of fake trailing chunk holding overhead space only once */
+  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+  m->trim_check = mparams.trim_threshold; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+  /* Establish circular links for smallbins */
+  bindex_t i;
+  for (i = 0; i < NSMALLBINS; ++i) {
+    sbinptr bin = smallbin_at(m,i);
+    bin->fd = bin->bk = bin;
+  }
+}
+
+#if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+  int i;
+  ++malloc_corruption_error_count;
+  /* Reinitialize fields to forget about all memory */
+  m->smallbins = m->treebins = 0;
+  m->dvsize = m->topsize = 0;
+  m->seg.base = 0;
+  m->seg.size = 0;
+  m->seg.next = 0;
+  m->top = m->dv = 0;
+  for (i = 0; i < NTREEBINS; ++i)
+    *treebin_at(m, i) = 0;
+  init_bins(m);
+}
+#endif /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
+                           size_t nb) {
+  mchunkptr p = align_as_chunk(newbase);
+  mchunkptr oldfirst = align_as_chunk(oldbase);
+  size_t psize = (char*)oldfirst - (char*)p;
+  mchunkptr q = chunk_plus_offset(p, nb);
+  size_t qsize = psize - nb;
+  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+  assert((char*)oldfirst > (char*)q);
+  assert(pinuse(oldfirst));
+  assert(qsize >= MIN_CHUNK_SIZE);
+
+  /* consolidate remainder with first chunk of old base */
+  if (oldfirst == m->top) {
+    size_t tsize = m->topsize += qsize;
+    m->top = q;
+    q->head = tsize | PINUSE_BIT;
+    check_top_chunk(m, q);
+  }
+  else if (oldfirst == m->dv) {
+    size_t dsize = m->dvsize += qsize;
+    m->dv = q;
+    set_size_and_pinuse_of_free_chunk(q, dsize);
+  }
+  else {
+    if (!cinuse(oldfirst)) {
+      size_t nsize = chunksize(oldfirst);
+      unlink_chunk(m, oldfirst, nsize);
+      oldfirst = chunk_plus_offset(oldfirst, nsize);
+      qsize += nsize;
+    }
+    set_free_with_pinuse(q, qsize, oldfirst);
+    insert_chunk(m, q, qsize);
+    check_free_chunk(m, q);
+  }
+
+  check_malloced_chunk(m, chunk2mem(p), nb);
+  return chunk2mem(p);
+}
+
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
+  /* Determine locations and sizes of segment, fenceposts, old top */
+  char* old_top = (char*)m->top;
+  msegmentptr oldsp = segment_holding(m, old_top);
+  char* old_end = oldsp->base + oldsp->size;
+  size_t ssize = pad_request(sizeof(struct malloc_segment));
+  char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  size_t offset = align_offset(chunk2mem(rawsp));
+  char* asp = rawsp + offset;
+  char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+  mchunkptr sp = (mchunkptr)csp;
+  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+  mchunkptr tnext = chunk_plus_offset(sp, ssize);
+  mchunkptr p = tnext;
+  int nfences = 0;
+
+  /* reset top to new space */
+  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+  /* Set up segment record */
+  assert(is_aligned(ss));
+  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+  *ss = m->seg; /* Push current record */
+  m->seg.base = tbase;
+  m->seg.size = tsize;
   (void)set_segment_flags(&m->seg, mmapped);
-  m->seg.next = ss; 
- 
-  /* Insert trailing fenceposts */ 
-  for (;;) { 
-    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); 
-    p->head = FENCEPOST_HEAD; 
-    ++nfences; 
-    if ((char*)(&(nextp->head)) < old_end) 
-      p = nextp; 
-    else 
-      break; 
-  } 
-  assert(nfences >= 2); 
- 
-  /* Insert the rest of old top into a bin as an ordinary free chunk */ 
-  if (csp != old_top) { 
-    mchunkptr q = (mchunkptr)old_top; 
-    size_t psize = csp - old_top; 
-    mchunkptr tn = chunk_plus_offset(q, psize); 
-    set_free_with_pinuse(q, psize, tn); 
-    insert_chunk(m, q, psize); 
-  } 
- 
-  check_top_chunk(m, m->top); 
-} 
- 
-/* -------------------------- System allocation -------------------------- */ 
- 
-/* Get memory from system using MORECORE or MMAP */ 
-static void* sys_alloc(mstate m, size_t nb) { 
-  char* tbase = CMFAIL; 
-  size_t tsize = 0; 
-  flag_t mmap_flag = 0; 
- 
-  init_mparams(); 
- 
-  /* Directly map large chunks */ 
-  if (use_mmap(m) && nb >= mparams.mmap_threshold) { 
-    void* mem = mmap_alloc(m, nb); 
-    if (mem != 0) 
-      return mem; 
-  } 
- 
-  /* 
-    Try getting memory in any of three ways (in most-preferred to 
-    least-preferred order): 
-    1. A call to MORECORE that can normally contiguously extend memory. 
-       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or 
-       or main space is mmapped or a previous contiguous call failed) 
-    2. A call to MMAP new space (disabled if not HAVE_MMAP). 
-       Note that under the default settings, if MORECORE is unable to 
-       fulfill a request, and HAVE_MMAP is true, then mmap is 
-       used as a noncontiguous system allocator. This is a useful backup 
-       strategy for systems with holes in address spaces -- in this case 
-       sbrk cannot contiguously expand the heap, but mmap may be able to 
-       find space. 
-    3. A call to MORECORE that cannot usually contiguously extend memory. 
-       (disabled if not HAVE_MORECORE) 
-  */ 
- 
-  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { 
-    char* br = CMFAIL; 
-    msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); 
-    size_t asize = 0; 
-    ACQUIRE_MORECORE_LOCK(); 
- 
-    if (ss == 0) {  /* First time through or recovery */ 
-      char* base = (char*)CALL_MORECORE(0); 
-      if (base != CMFAIL) { 
-        asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE); 
-        /* Adjust to end on a page boundary */ 
-        if (!is_page_aligned(base)) 
-          asize += (page_align((size_t)base) - (size_t)base); 
-        /* Can't call MORECORE if size is negative when treated as signed */ 
-        if (asize < HALF_MAX_SIZE_T && 
-            (br = (char*)(CALL_MORECORE(asize))) == base) { 
-          tbase = base; 
-          tsize = asize; 
-        } 
-      } 
-    } 
-    else { 
-      /* Subtract out existing available top space from MORECORE request. */ 
-      asize = granularity_align(nb - m->topsize + TOP_FOOT_SIZE + SIZE_T_ONE); 
-      /* Use mem here only if it did continuously extend old space */ 
-      if (asize < HALF_MAX_SIZE_T && 
-          (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { 
-        tbase = br; 
-        tsize = asize; 
-      } 
-    } 
- 
-    if (tbase == CMFAIL) {    /* Cope with partial failure */ 
-      if (br != CMFAIL) {    /* Try to use/extend the space we did get */ 
-        if (asize < HALF_MAX_SIZE_T && 
-            asize < nb + TOP_FOOT_SIZE + SIZE_T_ONE) { 
-          size_t esize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE - asize); 
-          if (esize < HALF_MAX_SIZE_T) { 
-            char* end = (char*)CALL_MORECORE(esize); 
-            if (end != CMFAIL) 
-              asize += esize; 
-            else {            /* Can't use; try to release */ 
-              (void)CALL_MORECORE(-asize); 
-              br = CMFAIL; 
-            } 
-          } 
-        } 
-      } 
-      if (br != CMFAIL) {    /* Use the space we did get */ 
-        tbase = br; 
-        tsize = asize; 
-      } 
-      else 
-        disable_contiguous(m); /* Don't try contiguous path in the future */ 
-    } 
- 
-    RELEASE_MORECORE_LOCK(); 
-  } 
- 
-  if (HAVE_MMAP && tbase == CMFAIL) {  /* Try MMAP */ 
-    size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; 
-    size_t rsize = granularity_align(req); 
-    if (rsize > nb) { /* Fail if wraps around zero */ 
-      char* mp = (char*)(CALL_MMAP(rsize)); 
-      if (mp != CMFAIL) { 
-        tbase = mp; 
-        tsize = rsize; 
-        mmap_flag = IS_MMAPPED_BIT; 
-      } 
-    } 
-  } 
- 
-  if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ 
-    size_t asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE); 
-    if (asize < HALF_MAX_SIZE_T) { 
-      char* br = CMFAIL; 
-      char* end = CMFAIL; 
-      ACQUIRE_MORECORE_LOCK(); 
-      br = (char*)(CALL_MORECORE(asize)); 
-      end = (char*)(CALL_MORECORE(0)); 
-      RELEASE_MORECORE_LOCK(); 
-      if (br != CMFAIL && end != CMFAIL && br < end) { 
-        size_t ssize = end - br; 
-        if (ssize > nb + TOP_FOOT_SIZE) { 
-          tbase = br; 
-          tsize = ssize; 
-        } 
-      } 
-    } 
-  } 
- 
-  if (tbase != CMFAIL) { 
- 
-    if ((m->footprint += tsize) > m->max_footprint) 
-      m->max_footprint = m->footprint; 
- 
-    if (!is_initialized(m)) { /* first-time initialization */ 
-      m->seg.base = m->least_addr = tbase; 
-      m->seg.size = tsize; 
+  m->seg.next = ss;
+
+  /* Insert trailing fenceposts */
+  for (;;) {
+    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+    p->head = FENCEPOST_HEAD;
+    ++nfences;
+    if ((char*)(&(nextp->head)) < old_end)
+      p = nextp;
+    else
+      break;
+  }
+  assert(nfences >= 2);
+
+  /* Insert the rest of old top into a bin as an ordinary free chunk */
+  if (csp != old_top) {
+    mchunkptr q = (mchunkptr)old_top;
+    size_t psize = csp - old_top;
+    mchunkptr tn = chunk_plus_offset(q, psize);
+    set_free_with_pinuse(q, psize, tn);
+    insert_chunk(m, q, psize);
+  }
+
+  check_top_chunk(m, m->top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void* sys_alloc(mstate m, size_t nb) {
+  char* tbase = CMFAIL;
+  size_t tsize = 0;
+  flag_t mmap_flag = 0;
+
+  init_mparams();
+
+  /* Directly map large chunks */
+  if (use_mmap(m) && nb >= mparams.mmap_threshold) {
+    void* mem = mmap_alloc(m, nb);
+    if (mem != 0)
+      return mem;
+  }
+
+  /*
+    Try getting memory in any of three ways (in most-preferred to
+    least-preferred order):
+    1. A call to MORECORE that can normally contiguously extend memory.
+       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+       or main space is mmapped or a previous contiguous call failed)
+    2. A call to MMAP new space (disabled if not HAVE_MMAP).
+       Note that under the default settings, if MORECORE is unable to
+       fulfill a request, and HAVE_MMAP is true, then mmap is
+       used as a noncontiguous system allocator. This is a useful backup
+       strategy for systems with holes in address spaces -- in this case
+       sbrk cannot contiguously expand the heap, but mmap may be able to
+       find space.
+    3. A call to MORECORE that cannot usually contiguously extend memory.
+       (disabled if not HAVE_MORECORE)
+  */
+
+  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+    char* br = CMFAIL;
+    msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
+    size_t asize = 0;
+    ACQUIRE_MORECORE_LOCK();
+
+    if (ss == 0) {  /* First time through or recovery */
+      char* base = (char*)CALL_MORECORE(0);
+      if (base != CMFAIL) {
+        asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE);
+        /* Adjust to end on a page boundary */
+        if (!is_page_aligned(base))
+          asize += (page_align((size_t)base) - (size_t)base);
+        /* Can't call MORECORE if size is negative when treated as signed */
+        if (asize < HALF_MAX_SIZE_T &&
+            (br = (char*)(CALL_MORECORE(asize))) == base) {
+          tbase = base;
+          tsize = asize;
+        }
+      }
+    }
+    else {
+      /* Subtract out existing available top space from MORECORE request. */
+      asize = granularity_align(nb - m->topsize + TOP_FOOT_SIZE + SIZE_T_ONE);
+      /* Use mem here only if it did continuously extend old space */
+      if (asize < HALF_MAX_SIZE_T &&
+          (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
+        tbase = br;
+        tsize = asize;
+      }
+    }
+
+    if (tbase == CMFAIL) {    /* Cope with partial failure */
+      if (br != CMFAIL) {    /* Try to use/extend the space we did get */
+        if (asize < HALF_MAX_SIZE_T &&
+            asize < nb + TOP_FOOT_SIZE + SIZE_T_ONE) {
+          size_t esize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE - asize);
+          if (esize < HALF_MAX_SIZE_T) {
+            char* end = (char*)CALL_MORECORE(esize);
+            if (end != CMFAIL)
+              asize += esize;
+            else {            /* Can't use; try to release */
+              (void)CALL_MORECORE(-asize);
+              br = CMFAIL;
+            }
+          }
+        }
+      }
+      if (br != CMFAIL) {    /* Use the space we did get */
+        tbase = br;
+        tsize = asize;
+      }
+      else
+        disable_contiguous(m); /* Don't try contiguous path in the future */
+    }
+
+    RELEASE_MORECORE_LOCK();
+  }
+
+  if (HAVE_MMAP && tbase == CMFAIL) {  /* Try MMAP */
+    size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
+    size_t rsize = granularity_align(req);
+    if (rsize > nb) { /* Fail if wraps around zero */
+      char* mp = (char*)(CALL_MMAP(rsize));
+      if (mp != CMFAIL) {
+        tbase = mp;
+        tsize = rsize;
+        mmap_flag = IS_MMAPPED_BIT;
+      }
+    }
+  }
+
+  if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
+    size_t asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE);
+    if (asize < HALF_MAX_SIZE_T) {
+      char* br = CMFAIL;
+      char* end = CMFAIL;
+      ACQUIRE_MORECORE_LOCK();
+      br = (char*)(CALL_MORECORE(asize));
+      end = (char*)(CALL_MORECORE(0));
+      RELEASE_MORECORE_LOCK();
+      if (br != CMFAIL && end != CMFAIL && br < end) {
+        size_t ssize = end - br;
+        if (ssize > nb + TOP_FOOT_SIZE) {
+          tbase = br;
+          tsize = ssize;
+        }
+      }
+    }
+  }
+
+  if (tbase != CMFAIL) {
+
+    if ((m->footprint += tsize) > m->max_footprint)
+      m->max_footprint = m->footprint;
+
+    if (!is_initialized(m)) { /* first-time initialization */
+      m->seg.base = m->least_addr = tbase;
+      m->seg.size = tsize;
       (void)set_segment_flags(&m->seg, mmap_flag);
-      m->magic = mparams.magic; 
-      init_bins(m); 
-      if (is_global(m))  
-        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); 
-      else { 
-        /* Offset top by embedded malloc_state */ 
-        mchunkptr mn = next_chunk(mem2chunk(m)); 
-        init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); 
-      } 
-    } 
- 
-    else { 
-      /* Try to merge with an existing segment */ 
-      msegmentptr sp = &m->seg; 
-      while (sp != 0 && tbase != sp->base + sp->size) 
-        sp = sp->next; 
-      if (sp != 0 && 
-          !is_extern_segment(sp) && 
-	  check_segment_merge(sp, tbase, tsize) && 
-          (get_segment_flags(sp) & IS_MMAPPED_BIT) == mmap_flag && 
-          segment_holds(sp, m->top)) { /* append */ 
-        sp->size += tsize; 
-        init_top(m, m->top, m->topsize + tsize); 
-      } 
-      else { 
-        if (tbase < m->least_addr) 
-          m->least_addr = tbase; 
-        sp = &m->seg; 
-        while (sp != 0 && sp->base != tbase + tsize) 
-          sp = sp->next; 
-        if (sp != 0 && 
-            !is_extern_segment(sp) && 
-	    check_segment_merge(sp, tbase, tsize) && 
-            (get_segment_flags(sp) & IS_MMAPPED_BIT) == mmap_flag) { 
-          char* oldbase = sp->base; 
-          sp->base = tbase; 
-          sp->size += tsize; 
-          return prepend_alloc(m, tbase, oldbase, nb); 
-        } 
-        else 
-          add_segment(m, tbase, tsize, mmap_flag); 
-      } 
-    } 
- 
-    if (nb < m->topsize) { /* Allocate from new or extended top space */ 
-      size_t rsize = m->topsize -= nb; 
-      mchunkptr p = m->top; 
-      mchunkptr r = m->top = chunk_plus_offset(p, nb); 
-      r->head = rsize | PINUSE_BIT; 
-      set_size_and_pinuse_of_inuse_chunk(m, p, nb); 
-      check_top_chunk(m, m->top); 
-      check_malloced_chunk(m, chunk2mem(p), nb); 
-      return chunk2mem(p); 
-    } 
-  } 
- 
-  MALLOC_FAILURE_ACTION; 
-  return 0; 
-} 
- 
-/* -----------------------  system deallocation -------------------------- */ 
- 
-/* Unmap and unlink any mmapped segments that don't contain used chunks */ 
-static size_t release_unused_segments(mstate m) { 
-  size_t released = 0; 
-  msegmentptr pred = &m->seg; 
-  msegmentptr sp = pred->next; 
-  while (sp != 0) { 
-    char* base = sp->base; 
-    size_t size = sp->size; 
-    msegmentptr next = sp->next; 
-    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { 
-      mchunkptr p = align_as_chunk(base); 
-      size_t psize = chunksize(p); 
-      /* Can unmap if first chunk holds entire segment and not pinned */ 
-      if (!cinuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { 
-        tchunkptr tp = (tchunkptr)p; 
-        assert(segment_holds(sp, (char*)sp)); 
-        if (p == m->dv) { 
-          m->dv = 0; 
-          m->dvsize = 0; 
-        } 
-        else { 
-          unlink_large_chunk(m, tp); 
-        } 
-        if (CALL_MUNMAP(base, size) == 0) { 
-          released += size; 
-          m->footprint -= size; 
-          /* unlink obsoleted record */ 
-          sp = pred; 
-          sp->next = next; 
-        } 
-        else { /* back out if cannot unmap */ 
-          insert_large_chunk(m, tp, psize); 
-        } 
-      } 
-    } 
-    pred = sp; 
-    sp = next; 
-  } 
-  return released; 
-} 
- 
-static int sys_trim(mstate m, size_t pad) { 
-  size_t released = 0; 
-  if (pad < MAX_REQUEST && is_initialized(m)) { 
-    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ 
- 
-    if (m->topsize > pad) { 
-      /* Shrink top space in granularity-size units, keeping at least one */ 
-      size_t unit = mparams.granularity; 
-      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - 
-                      SIZE_T_ONE) * unit; 
-      msegmentptr sp = segment_holding(m, (char*)m->top); 
- 
-      if (!is_extern_segment(sp)) { 
-        if (is_mmapped_segment(sp)) { 
-          if (HAVE_MMAP && 
-              sp->size >= extra && 
-              !has_segment_link(m, sp)) { /* can't shrink if pinned */ 
-            size_t newsize = sp->size - extra; 
-            /* Prefer mremap, fall back to munmap */ 
-            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || 
-                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { 
-              released = extra; 
-            } 
-          } 
-        } 
-        else if (HAVE_MORECORE) { 
-          if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ 
-            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; 
-          ACQUIRE_MORECORE_LOCK(); 
-          { 
-            /* Make sure end of memory is where we last set it. */ 
-            char* old_br = (char*)(CALL_MORECORE(0)); 
-            if (old_br == sp->base + sp->size) { 
-              char* rel_br = (char*)(CALL_MORECORE(-extra)); 
-              char* new_br = (char*)(CALL_MORECORE(0)); 
-              if (rel_br != CMFAIL && new_br < old_br) 
-                released = old_br - new_br; 
-            } 
-          } 
-          RELEASE_MORECORE_LOCK(); 
-        } 
-      } 
- 
-      if (released != 0) { 
-        sp->size -= released; 
-        m->footprint -= released; 
-        init_top(m, m->top, m->topsize - released); 
-        check_top_chunk(m, m->top); 
-      } 
-    } 
- 
-    /* Unmap any unused mmapped segments */ 
-    if (HAVE_MMAP)  
-      released += release_unused_segments(m); 
- 
-    /* On failure, disable autotrim to avoid repeated failed future calls */ 
-    if (released == 0) 
-      m->trim_check = MAX_SIZE_T; 
-  } 
- 
-  return (released != 0)? 1 : 0; 
-} 
- 
-/* ---------------------------- malloc support --------------------------- */ 
- 
-/* allocate a large request from the best fitting chunk in a treebin */ 
-static void* tmalloc_large(mstate m, size_t nb) { 
-  tchunkptr v = 0; 
-  size_t rsize = -nb; /* Unsigned negation */ 
-  tchunkptr t; 
-  bindex_t idx; 
-  compute_tree_index(nb, idx); 
- 
-  if ((t = *treebin_at(m, idx)) != 0) { 
-    /* Traverse tree for this bin looking for node with size == nb */ 
-    size_t sizebits = nb << leftshift_for_tree_index(idx); 
-    tchunkptr rst = 0;  /* The deepest untaken right subtree */ 
-    for (;;) { 
-      tchunkptr rt; 
-      size_t trem = chunksize(t) - nb; 
-      if (trem < rsize) { 
-        v = t; 
-        if ((rsize = trem) == 0) 
-          break; 
-      } 
-      rt = t->child[1]; 
-      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; 
-      if (rt != 0 && rt != t) 
-        rst = rt; 
-      if (t == 0) { 
-        t = rst; /* set t to least subtree holding sizes > nb */ 
-        break; 
-      } 
-      sizebits <<= 1; 
-    } 
-  } 
- 
-  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ 
-    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; 
-    if (leftbits != 0) { 
-      bindex_t i; 
-      binmap_t leastbit = least_bit(leftbits); 
-      compute_bit2idx(leastbit, i); 
-      t = *treebin_at(m, i); 
-    } 
-  } 
- 
-  while (t != 0) { /* find smallest of tree or subtree */ 
-    size_t trem = chunksize(t) - nb; 
-    if (trem < rsize) { 
-      rsize = trem; 
-      v = t; 
-    } 
-    t = leftmost_child(t); 
-  } 
- 
-  /*  If dv is a better fit, return 0 so malloc will use it */ 
-  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { 
-    if (RTCHECK(ok_address(m, v))) { /* split */ 
-      mchunkptr r = chunk_plus_offset(v, nb); 
-      assert(chunksize(v) == rsize + nb); 
-      if (RTCHECK(ok_next(v, r))) { 
-        unlink_large_chunk(m, v); 
-        if (rsize < MIN_CHUNK_SIZE) 
-          set_inuse_and_pinuse(m, v, (rsize + nb)); 
-        else { 
-          set_size_and_pinuse_of_inuse_chunk(m, v, nb); 
-          set_size_and_pinuse_of_free_chunk(r, rsize); 
-          insert_chunk(m, r, rsize); 
-        } 
-        return chunk2mem(v); 
-      } 
-    } 
-    CORRUPTION_ERROR_ACTION(m); 
-  } 
-  return 0; 
-} 
- 
-/* allocate a small request from the best fitting chunk in a treebin */ 
-static void* tmalloc_small(mstate m, size_t nb) { 
-  tchunkptr t, v; 
-  size_t rsize; 
-  bindex_t i; 
-  binmap_t leastbit = least_bit(m->treemap); 
-  compute_bit2idx(leastbit, i); 
- 
-  v = t = *treebin_at(m, i); 
-  rsize = chunksize(t) - nb; 
- 
-  while ((t = leftmost_child(t)) != 0) { 
-    size_t trem = chunksize(t) - nb; 
-    if (trem < rsize) { 
-      rsize = trem; 
-      v = t; 
-    } 
-  } 
- 
-  if (RTCHECK(ok_address(m, v))) { 
-    mchunkptr r = chunk_plus_offset(v, nb); 
-    assert(chunksize(v) == rsize + nb); 
-    if (RTCHECK(ok_next(v, r))) { 
-      unlink_large_chunk(m, v); 
-      if (rsize < MIN_CHUNK_SIZE) 
-        set_inuse_and_pinuse(m, v, (rsize + nb)); 
-      else { 
-        set_size_and_pinuse_of_inuse_chunk(m, v, nb); 
-        set_size_and_pinuse_of_free_chunk(r, rsize); 
-        replace_dv(m, r, rsize); 
-      } 
-      return chunk2mem(v); 
-    } 
-  } 
- 
-  CORRUPTION_ERROR_ACTION(m); 
-  return 0; 
-} 
- 
-/* --------------------------- realloc support --------------------------- */ 
- 
-static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { 
-  if (bytes >= MAX_REQUEST) { 
-    MALLOC_FAILURE_ACTION; 
-    return 0; 
-  } 
-  if (!PREACTION(m)) { 
-    mchunkptr oldp = mem2chunk(oldmem); 
-    size_t oldsize = chunksize(oldp); 
-    mchunkptr next = chunk_plus_offset(oldp, oldsize); 
-    mchunkptr newp = 0; 
-    void* extra = 0; 
- 
-    /* Try to either shrink or extend into top. Else malloc-copy-free */ 
- 
-    if (RTCHECK(ok_address(m, oldp) && ok_cinuse(oldp) && 
-                ok_next(oldp, next) && ok_pinuse(next))) { 
-      size_t nb = request2size(bytes); 
-      if (is_mmapped(oldp)) 
-        newp = mmap_resize(m, oldp, nb); 
-      else if (oldsize >= nb) { /* already big enough */ 
-        size_t rsize = oldsize - nb; 
-        newp = oldp; 
-        if (rsize >= MIN_CHUNK_SIZE) { 
-          mchunkptr remainder = chunk_plus_offset(newp, nb); 
-          set_inuse(m, newp, nb); 
-          set_inuse(m, remainder, rsize); 
-          extra = chunk2mem(remainder); 
-        } 
-      } 
-      else if (next == m->top && oldsize + m->topsize > nb) { 
-        /* Expand into top */ 
-        size_t newsize = oldsize + m->topsize; 
-        size_t newtopsize = newsize - nb; 
-        mchunkptr newtop = chunk_plus_offset(oldp, nb); 
-        set_inuse(m, oldp, nb); 
-        newtop->head = newtopsize |PINUSE_BIT; 
-        m->top = newtop; 
-        m->topsize = newtopsize; 
-        newp = oldp; 
-      } 
-    } 
-    else { 
-      USAGE_ERROR_ACTION(m, oldmem); 
-      POSTACTION(m); 
-      return 0; 
-    } 
- 
-    POSTACTION(m); 
- 
-    if (newp != 0) { 
-      if (extra != 0) { 
-        internal_free(m, extra); 
-      } 
-      check_inuse_chunk(m, newp); 
-      return chunk2mem(newp); 
-    } 
-    else { 
-      void* newmem = internal_malloc(m, bytes); 
-      if (newmem != 0) { 
-        size_t oc = oldsize - overhead_for(oldp); 
-        memcpy(newmem, oldmem, (oc < bytes)? oc : bytes); 
-        internal_free(m, oldmem); 
-      } 
-      return newmem; 
-    } 
-  } 
-  return 0; 
-} 
- 
-/* --------------------------- memalign support -------------------------- */ 
- 
-static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { 
-  if (alignment <= MALLOC_ALIGNMENT)    /* Can just use malloc */ 
-    return internal_malloc(m, bytes); 
-  if (alignment <  MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ 
-    alignment = MIN_CHUNK_SIZE; 
-  if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ 
-    size_t a = MALLOC_ALIGNMENT << 1; 
-    while (a < alignment) a <<= 1; 
-    alignment = a; 
-  } 
-   
-  if (bytes >= MAX_REQUEST - alignment) { 
-    if (m != 0)  { /* Test isn't needed but avoids compiler warning */ 
-      MALLOC_FAILURE_ACTION; 
-    } 
-  } 
-  else { 
-    size_t nb = request2size(bytes); 
-    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; 
-    char* mem = (char*)internal_malloc(m, req); 
-    if (mem != 0) { 
-      void* leader = 0; 
-      void* trailer = 0; 
-      mchunkptr p = mem2chunk(mem); 
- 
-      if (PREACTION(m)) return 0; 
-      if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */ 
-        /* 
-          Find an aligned spot inside chunk.  Since we need to give 
-          back leading space in a chunk of at least MIN_CHUNK_SIZE, if 
-          the first calculation places us at a spot with less than 
-          MIN_CHUNK_SIZE leader, we can move to the next aligned spot. 
-          We've allocated enough total room so that this is always 
-          possible. 
-        */ 
-        char* br = (char*)mem2chunk((size_t)(((size_t)(mem + 
-                                                       alignment - 
-                                                       SIZE_T_ONE)) & 
-                                             -alignment)); 
-        char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? 
-          br : br+alignment; 
-        mchunkptr newp = (mchunkptr)pos; 
-        size_t leadsize = pos - (char*)(p); 
-        size_t newsize = chunksize(p) - leadsize; 
- 
-        if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ 
-          newp->prev_foot = p->prev_foot + leadsize; 
-          newp->head = (newsize|CINUSE_BIT); 
-        } 
-        else { /* Otherwise, give back leader, use the rest */ 
-          set_inuse(m, newp, newsize); 
-          set_inuse(m, p, leadsize); 
-          leader = chunk2mem(p); 
-        } 
-        p = newp; 
-      } 
- 
-      /* Give back spare room at the end */ 
-      if (!is_mmapped(p)) { 
-        size_t size = chunksize(p); 
-        if (size > nb + MIN_CHUNK_SIZE) { 
-          size_t remainder_size = size - nb; 
-          mchunkptr remainder = chunk_plus_offset(p, nb); 
-          set_inuse(m, p, nb); 
-          set_inuse(m, remainder, remainder_size); 
-          trailer = chunk2mem(remainder); 
-        } 
-      } 
- 
-      assert (chunksize(p) >= nb); 
-      assert((((size_t)(chunk2mem(p))) % alignment) == 0); 
-      check_inuse_chunk(m, p); 
-      POSTACTION(m); 
-      if (leader != 0) { 
-        internal_free(m, leader); 
-      } 
-      if (trailer != 0) { 
-        internal_free(m, trailer); 
-      } 
-      return chunk2mem(p); 
-    } 
-  } 
-  return 0; 
-} 
- 
-/* ------------------------ comalloc/coalloc support --------------------- */ 
- 
-static void** ialloc(mstate m, 
-                     size_t n_elements, 
-                     size_t* sizes, 
-                     int opts, 
-                     void* chunks[]) { 
-  /* 
-    This provides common support for independent_X routines, handling 
-    all of the combinations that can result. 
- 
-    The opts arg has: 
-    bit 0 set if all elements are same size (using sizes[0]) 
-    bit 1 set if elements should be zeroed 
-  */ 
- 
-  size_t    element_size;   /* chunksize of each element, if all same */ 
-  size_t    contents_size;  /* total size of elements */ 
-  size_t    array_size;     /* request size of pointer array */ 
-  void*     mem;            /* malloced aggregate space */ 
-  mchunkptr p;              /* corresponding chunk */ 
-  size_t    remainder_size; /* remaining bytes while splitting */ 
-  void**    marray;         /* either "chunks" or malloced ptr array */ 
-  mchunkptr array_chunk;    /* chunk for malloced ptr array */ 
-  flag_t    was_enabled;    /* to disable mmap */ 
-  size_t    size; 
-  size_t    i; 
- 
-  /* compute array length, if needed */ 
-  if (chunks != 0) { 
-    if (n_elements == 0) 
-      return chunks; /* nothing to do */ 
-    marray = chunks; 
-    array_size = 0; 
-  } 
-  else { 
-    /* if empty req, must still return chunk representing empty array */ 
-    if (n_elements == 0) 
-      return (void**)internal_malloc(m, 0); 
-    marray = 0; 
-    array_size = request2size(n_elements * (sizeof(void*))); 
-  } 
- 
-  /* compute total element size */ 
-  if (opts & 0x1) { /* all-same-size */ 
-    element_size = request2size(*sizes); 
-    contents_size = n_elements * element_size; 
-  } 
-  else { /* add up all the sizes */ 
-    element_size = 0; 
-    contents_size = 0; 
-    for (i = 0; i != n_elements; ++i) 
-      contents_size += request2size(sizes[i]); 
-  } 
- 
-  size = contents_size + array_size; 
- 
-  /* 
-     Allocate the aggregate chunk.  First disable direct-mmapping so 
-     malloc won't use it, since we would not be able to later 
-     free/realloc space internal to a segregated mmap region. 
-  */ 
-  was_enabled = use_mmap(m); 
-  disable_mmap(m); 
-  mem = internal_malloc(m, size - CHUNK_OVERHEAD); 
-  if (was_enabled) 
-    enable_mmap(m); 
-  if (mem == 0) 
-    return 0; 
- 
-  if (PREACTION(m)) return 0; 
-  p = mem2chunk(mem); 
-  remainder_size = chunksize(p); 
- 
-  assert(!is_mmapped(p)); 
- 
-  if (opts & 0x2) {       /* optionally clear the elements */ 
-    memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); 
-  } 
- 
-  /* If not provided, allocate the pointer array as final part of chunk */ 
-  if (marray == 0) { 
-    size_t  array_chunk_size; 
-    array_chunk = chunk_plus_offset(p, contents_size); 
-    array_chunk_size = remainder_size - contents_size; 
-    marray = (void**) (chunk2mem(array_chunk)); 
-    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); 
-    remainder_size = contents_size; 
-  } 
- 
-  /* split out elements */ 
-  for (i = 0; ; ++i) { 
-    marray[i] = chunk2mem(p); 
-    if (i != n_elements-1) { 
-      if (element_size != 0) 
-        size = element_size; 
-      else 
-        size = request2size(sizes[i]); 
-      remainder_size -= size; 
-      set_size_and_pinuse_of_inuse_chunk(m, p, size); 
-      p = chunk_plus_offset(p, size); 
-    } 
-    else { /* the final element absorbs any overallocation slop */ 
-      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); 
-      break; 
-    } 
-  } 
- 
-#if DEBUG 
-  if (marray != chunks) { 
-    /* final element must have exactly exhausted chunk */ 
-    if (element_size != 0) { 
-      assert(remainder_size == element_size); 
-    } 
-    else { 
-      assert(remainder_size == request2size(sizes[i])); 
-    } 
-    check_inuse_chunk(m, mem2chunk(marray)); 
-  } 
-  for (i = 0; i != n_elements; ++i) 
-    check_inuse_chunk(m, mem2chunk(marray[i])); 
- 
-#endif /* DEBUG */ 
- 
-  POSTACTION(m); 
-  return marray; 
-} 
- 
- 
-/* -------------------------- public routines ---------------------------- */ 
- 
-#if !ONLY_MSPACES 
- 
-void* dlmalloc(size_t bytes) { 
-  /* 
-     Basic algorithm: 
-     If a small request (< 256 bytes minus per-chunk overhead): 
-       1. If one exists, use a remainderless chunk in associated smallbin. 
-          (Remainderless means that there are too few excess bytes to 
-          represent as a chunk.) 
-       2. If it is big enough, use the dv chunk, which is normally the 
-          chunk adjacent to the one used for the most recent small request. 
-       3. If one exists, split the smallest available chunk in a bin, 
-          saving remainder in dv. 
-       4. If it is big enough, use the top chunk. 
-       5. If available, get memory from system and use it 
-     Otherwise, for a large request: 
-       1. Find the smallest available binned chunk that fits, and use it 
-          if it is better fitting than dv chunk, splitting if necessary. 
-       2. If better fitting than any binned chunk, use the dv chunk. 
-       3. If it is big enough, use the top chunk. 
-       4. If request size >= mmap threshold, try to directly mmap this chunk. 
-       5. If available, get memory from system and use it 
- 
-     The ugly goto's here ensure that postaction occurs along all paths. 
-  */ 
- 
-  if (!PREACTION(gm)) { 
-    void* mem; 
-    size_t nb; 
-    if (bytes <= MAX_SMALL_REQUEST) { 
-      bindex_t idx; 
-      binmap_t smallbits; 
-      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); 
-      idx = small_index(nb); 
-      smallbits = gm->smallmap >> idx; 
- 
-      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ 
-        mchunkptr b, p; 
-        idx += ~smallbits & 1;       /* Uses next bin if idx empty */ 
-        b = smallbin_at(gm, idx); 
-        p = b->fd; 
-        assert(chunksize(p) == small_index2size(idx)); 
-        unlink_first_small_chunk(gm, b, p, idx); 
-        set_inuse_and_pinuse(gm, p, small_index2size(idx)); 
-        mem = chunk2mem(p); 
-        check_malloced_chunk(gm, mem, nb); 
-        goto postaction; 
-      } 
- 
-      else if (nb > gm->dvsize) { 
-        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ 
-          mchunkptr b, p, r; 
-          size_t rsize; 
-          bindex_t i; 
-          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); 
-          binmap_t leastbit = least_bit(leftbits); 
-          compute_bit2idx(leastbit, i); 
-          b = smallbin_at(gm, i); 
-          p = b->fd; 
-          assert(chunksize(p) == small_index2size(i)); 
-          unlink_first_small_chunk(gm, b, p, i); 
-          rsize = small_index2size(i) - nb; 
-          /* Fit here cannot be remainderless if 4byte sizes */ 
-          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) 
-            set_inuse_and_pinuse(gm, p, small_index2size(i)); 
-          else { 
-            set_size_and_pinuse_of_inuse_chunk(gm, p, nb); 
-            r = chunk_plus_offset(p, nb); 
-            set_size_and_pinuse_of_free_chunk(r, rsize); 
-            replace_dv(gm, r, rsize); 
-          } 
-          mem = chunk2mem(p); 
-          check_malloced_chunk(gm, mem, nb); 
-          goto postaction; 
-        } 
- 
-        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { 
-          check_malloced_chunk(gm, mem, nb); 
-          goto postaction; 
-        } 
-      } 
-    } 
-    else if (bytes >= MAX_REQUEST) 
-      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ 
-    else { 
-      nb = pad_request(bytes); 
-      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { 
-        check_malloced_chunk(gm, mem, nb); 
-        goto postaction; 
-      } 
-    } 
- 
-    if (nb <= gm->dvsize) { 
-      size_t rsize = gm->dvsize - nb; 
-      mchunkptr p = gm->dv; 
-      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ 
-        mchunkptr r = gm->dv = chunk_plus_offset(p, nb); 
-        gm->dvsize = rsize; 
-        set_size_and_pinuse_of_free_chunk(r, rsize); 
-        set_size_and_pinuse_of_inuse_chunk(gm, p, nb); 
-      } 
-      else { /* exhaust dv */ 
-        size_t dvs = gm->dvsize; 
-        gm->dvsize = 0; 
-        gm->dv = 0; 
-        set_inuse_and_pinuse(gm, p, dvs); 
-      } 
-      mem = chunk2mem(p); 
-      check_malloced_chunk(gm, mem, nb); 
-      goto postaction; 
-    } 
- 
-    else if (nb < gm->topsize) { /* Split top */ 
-      size_t rsize = gm->topsize -= nb; 
-      mchunkptr p = gm->top; 
-      mchunkptr r = gm->top = chunk_plus_offset(p, nb); 
-      r->head = rsize | PINUSE_BIT; 
-      set_size_and_pinuse_of_inuse_chunk(gm, p, nb); 
-      mem = chunk2mem(p); 
-      check_top_chunk(gm, gm->top); 
-      check_malloced_chunk(gm, mem, nb); 
-      goto postaction; 
-    } 
- 
-    mem = sys_alloc(gm, nb); 
- 
-  postaction: 
-    POSTACTION(gm); 
-    return mem; 
-  } 
- 
-  return 0; 
-} 
- 
-void dlfree(void* mem) { 
-  /* 
-     Consolidate freed chunks with preceding or succeeding bordering 
-     free chunks, if they exist, and then place in a bin.  Intermixed 
-     with special cases for top, dv, mmapped chunks, and usage errors. 
-  */ 
- 
-  if (mem != 0) { 
-    mchunkptr p  = mem2chunk(mem); 
-#if FOOTERS 
-    mstate fm = get_mstate_for(p); 
-    if (!ok_magic(fm)) { 
-      USAGE_ERROR_ACTION(fm, p); 
-      return; 
-    } 
-#else /* FOOTERS */ 
-#define fm gm 
-#endif /* FOOTERS */ 
-    if (!PREACTION(fm)) { 
-      check_inuse_chunk(fm, p); 
-      if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { 
-        size_t psize = chunksize(p); 
-        mchunkptr next = chunk_plus_offset(p, psize); 
-        if (!pinuse(p)) { 
-          size_t prevsize = p->prev_foot; 
-          if ((prevsize & IS_MMAPPED_BIT) != 0) { 
-            prevsize &= ~IS_MMAPPED_BIT; 
-            psize += prevsize + MMAP_FOOT_PAD; 
-            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) 
-              fm->footprint -= psize; 
-            goto postaction; 
-          } 
-          else { 
-            mchunkptr prev = chunk_minus_offset(p, prevsize); 
-            psize += prevsize; 
-            p = prev; 
-            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ 
-              if (p != fm->dv) { 
-                unlink_chunk(fm, p, prevsize); 
-              } 
-              else if ((next->head & INUSE_BITS) == INUSE_BITS) { 
-                fm->dvsize = psize; 
-                set_free_with_pinuse(p, psize, next); 
-                goto postaction; 
-              } 
-            } 
-            else 
-              goto erroraction; 
-          } 
-        } 
- 
-        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { 
-          if (!cinuse(next)) {  /* consolidate forward */ 
-            if (next == fm->top) { 
-              size_t tsize = fm->topsize += psize; 
-              fm->top = p; 
-              p->head = tsize | PINUSE_BIT; 
-              if (p == fm->dv) { 
-                fm->dv = 0; 
-                fm->dvsize = 0; 
-              } 
-              if (should_trim(fm, tsize)) 
-                sys_trim(fm, 0); 
-              goto postaction; 
-            } 
-            else if (next == fm->dv) { 
-              size_t dsize = fm->dvsize += psize; 
-              fm->dv = p; 
-              set_size_and_pinuse_of_free_chunk(p, dsize); 
-              goto postaction; 
-            } 
-            else { 
-              size_t nsize = chunksize(next); 
-              psize += nsize; 
-              unlink_chunk(fm, next, nsize); 
-              set_size_and_pinuse_of_free_chunk(p, psize); 
-              if (p == fm->dv) { 
-                fm->dvsize = psize; 
-                goto postaction; 
-              } 
-            } 
-          } 
-          else 
-            set_free_with_pinuse(p, psize, next); 
-          insert_chunk(fm, p, psize); 
-          check_free_chunk(fm, p); 
-          goto postaction; 
-        } 
-      } 
-    erroraction: 
-      USAGE_ERROR_ACTION(fm, p); 
-    postaction: 
-      POSTACTION(fm); 
-    } 
-  } 
-#if !FOOTERS 
-#undef fm 
-#endif /* FOOTERS */ 
-} 
- 
-void* dlcalloc(size_t n_elements, size_t elem_size) { 
-  void* mem; 
-  size_t req = 0; 
-  if (n_elements != 0) { 
-    req = n_elements * elem_size; 
-    if (((n_elements | elem_size) & ~(size_t)0xffff) && 
-        (req / n_elements != elem_size)) 
-      req = MAX_SIZE_T; /* force downstream failure on overflow */ 
-  } 
-  mem = dlmalloc(req); 
-  if (mem != 0 && calloc_must_clear(mem2chunk(mem))) 
-    memset(mem, 0, req); 
-  return mem; 
-} 
- 
-void* dlrealloc(void* oldmem, size_t bytes) { 
-  if (oldmem == 0) 
-    return dlmalloc(bytes); 
-#ifdef REALLOC_ZERO_BYTES_FREES 
-  if (bytes == 0) { 
-    dlfree(oldmem); 
-    return 0; 
-  } 
-#endif /* REALLOC_ZERO_BYTES_FREES */ 
-  else { 
-#if ! FOOTERS 
-    mstate m = gm; 
-#else /* FOOTERS */ 
-    mstate m = get_mstate_for(mem2chunk(oldmem)); 
-    if (!ok_magic(m)) { 
-      USAGE_ERROR_ACTION(m, oldmem); 
-      return 0; 
-    } 
-#endif /* FOOTERS */ 
-    return internal_realloc(m, oldmem, bytes); 
-  } 
-} 
- 
-void* dlmemalign(size_t alignment, size_t bytes) { 
-  return internal_memalign(gm, alignment, bytes); 
-} 
- 
-void** dlindependent_calloc(size_t n_elements, size_t elem_size, 
-                                 void* chunks[]) { 
-  size_t sz = elem_size; /* serves as 1-element array */ 
-  return ialloc(gm, n_elements, &sz, 3, chunks); 
-} 
- 
-void** dlindependent_comalloc(size_t n_elements, size_t sizes[], 
-                                   void* chunks[]) { 
-  return ialloc(gm, n_elements, sizes, 0, chunks); 
-} 
- 
-void* dlvalloc(size_t bytes) { 
-  size_t pagesz; 
-  init_mparams(); 
-  pagesz = mparams.page_size; 
-  return dlmemalign(pagesz, bytes); 
-} 
- 
-void* dlpvalloc(size_t bytes) { 
-  size_t pagesz; 
-  init_mparams(); 
-  pagesz = mparams.page_size; 
-  return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); 
-} 
- 
-int dlmalloc_trim(size_t pad) { 
-  int result = 0; 
-  if (!PREACTION(gm)) { 
-    result = sys_trim(gm, pad); 
-    POSTACTION(gm); 
-  } 
-  return result; 
-} 
- 
-size_t dlmalloc_footprint(void) { 
-  return gm->footprint; 
-} 
- 
-size_t dlmalloc_max_footprint(void) { 
-  return gm->max_footprint; 
-} 
- 
-#if !NO_MALLINFO 
-struct mallinfo dlmallinfo(void) { 
-  return internal_mallinfo(gm); 
-} 
-#endif /* NO_MALLINFO */ 
- 
-void dlmalloc_stats() { 
-  internal_malloc_stats(gm); 
-} 
- 
-size_t dlmalloc_usable_size(void* mem) { 
-  if (mem != 0) { 
-    mchunkptr p = mem2chunk(mem); 
-    if (cinuse(p)) 
-      return chunksize(p) - overhead_for(p); 
-  } 
-  return 0; 
-} 
- 
-int dlmallopt(int param_number, int value) { 
-  return change_mparam(param_number, value); 
-} 
- 
-#endif /* !ONLY_MSPACES */ 
- 
-/* ----------------------------- user mspaces ---------------------------- */ 
- 
-#if MSPACES 
- 
-static mstate init_user_mstate(char* tbase, size_t tsize) { 
-  size_t msize = pad_request(sizeof(struct malloc_state)); 
-  mchunkptr mn; 
-  mchunkptr msp = align_as_chunk(tbase); 
-  mstate m = (mstate)(chunk2mem(msp)); 
-  memset(m, 0, msize); 
-  INITIAL_LOCK(&m->mutex); 
-  msp->head = (msize|PINUSE_BIT|CINUSE_BIT); 
-  m->seg.base = m->least_addr = tbase; 
-  m->seg.size = m->footprint = m->max_footprint = tsize; 
-  m->magic = mparams.magic; 
-  m->mflags = mparams.default_mflags; 
-  disable_contiguous(m); 
-  init_bins(m); 
-  mn = next_chunk(mem2chunk(m)); 
-  init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); 
-  check_top_chunk(m, m->top); 
-  return m; 
-} 
- 
-mspace create_mspace(size_t capacity, int locked) { 
-  mstate m = 0; 
-  size_t msize = pad_request(sizeof(struct malloc_state)); 
-  init_mparams(); /* Ensure pagesize etc initialized */ 
- 
-  if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { 
-    size_t rs = ((capacity == 0)? mparams.granularity : 
-                 (capacity + TOP_FOOT_SIZE + msize)); 
-    size_t tsize = granularity_align(rs); 
-    char* tbase = (char*)(CALL_MMAP(tsize)); 
-    if (tbase != CMFAIL) { 
-      m = init_user_mstate(tbase, tsize); 
-      set_segment_flags(&m->seg, IS_MMAPPED_BIT); 
-      set_lock(m, locked); 
-    } 
-  } 
-  return (mspace)m; 
-} 
- 
-mspace create_mspace_with_base(void* base, size_t capacity, int locked) { 
-  mstate m = 0; 
-  size_t msize = pad_request(sizeof(struct malloc_state)); 
-  init_mparams(); /* Ensure pagesize etc initialized */ 
- 
-  if (capacity > msize + TOP_FOOT_SIZE && 
-      capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { 
-    m = init_user_mstate((char*)base, capacity); 
-    set_segment_flags(&m->seg, EXTERN_BIT); 
-    set_lock(m, locked); 
-  } 
-  return (mspace)m; 
-} 
- 
-size_t destroy_mspace(mspace msp) { 
-  size_t freed = 0; 
-  mstate ms = (mstate)msp; 
-  if (ok_magic(ms)) { 
-    msegmentptr sp = &ms->seg; 
-    while (sp != 0) { 
-      char* base = sp->base; 
-      size_t size = sp->size; 
-      flag_t flag = get_segment_flags(sp); 
-      sp = sp->next; 
-      if ((flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) && 
-          CALL_MUNMAP(base, size) == 0) 
-        freed += size; 
-    } 
-  } 
-  else { 
-    USAGE_ERROR_ACTION(ms,ms); 
-  } 
-  return freed; 
-} 
- 
-/* 
-  mspace versions of routines are near-clones of the global 
-  versions. This is not so nice but better than the alternatives. 
-*/ 
- 
- 
-void* mspace_malloc(mspace msp, size_t bytes) { 
-  mstate ms = (mstate)msp; 
-  if (!ok_magic(ms)) { 
-    USAGE_ERROR_ACTION(ms,ms); 
-    return 0; 
-  } 
-  if (!PREACTION(ms)) { 
-    void* mem; 
-    size_t nb; 
-    if (bytes <= MAX_SMALL_REQUEST) { 
-      bindex_t idx; 
-      binmap_t smallbits; 
-      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); 
-      idx = small_index(nb); 
-      smallbits = ms->smallmap >> idx; 
- 
-      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ 
-        mchunkptr b, p; 
-        idx += ~smallbits & 1;       /* Uses next bin if idx empty */ 
-        b = smallbin_at(ms, idx); 
-        p = b->fd; 
-        assert(chunksize(p) == small_index2size(idx)); 
-        unlink_first_small_chunk(ms, b, p, idx); 
-        set_inuse_and_pinuse(ms, p, small_index2size(idx)); 
-        mem = chunk2mem(p); 
-        check_malloced_chunk(ms, mem, nb); 
-        goto postaction; 
-      } 
- 
-      else if (nb > ms->dvsize) { 
-        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ 
-          mchunkptr b, p, r; 
-          size_t rsize; 
-          bindex_t i; 
-          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); 
-          binmap_t leastbit = least_bit(leftbits); 
-          compute_bit2idx(leastbit, i); 
-          b = smallbin_at(ms, i); 
-          p = b->fd; 
-          assert(chunksize(p) == small_index2size(i)); 
-          unlink_first_small_chunk(ms, b, p, i); 
-          rsize = small_index2size(i) - nb; 
-          /* Fit here cannot be remainderless if 4byte sizes */ 
-          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) 
-            set_inuse_and_pinuse(ms, p, small_index2size(i)); 
-          else { 
-            set_size_and_pinuse_of_inuse_chunk(ms, p, nb); 
-            r = chunk_plus_offset(p, nb); 
-            set_size_and_pinuse_of_free_chunk(r, rsize); 
-            replace_dv(ms, r, rsize); 
-          } 
-          mem = chunk2mem(p); 
-          check_malloced_chunk(ms, mem, nb); 
-          goto postaction; 
-        } 
- 
-        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { 
-          check_malloced_chunk(ms, mem, nb); 
-          goto postaction; 
-        } 
-      } 
-    } 
-    else if (bytes >= MAX_REQUEST) 
-      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ 
-    else { 
-      nb = pad_request(bytes); 
-      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { 
-        check_malloced_chunk(ms, mem, nb); 
-        goto postaction; 
-      } 
-    } 
- 
-    if (nb <= ms->dvsize) { 
-      size_t rsize = ms->dvsize - nb; 
-      mchunkptr p = ms->dv; 
-      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ 
-        mchunkptr r = ms->dv = chunk_plus_offset(p, nb); 
-        ms->dvsize = rsize; 
-        set_size_and_pinuse_of_free_chunk(r, rsize); 
-        set_size_and_pinuse_of_inuse_chunk(ms, p, nb); 
-      } 
-      else { /* exhaust dv */ 
-        size_t dvs = ms->dvsize; 
-        ms->dvsize = 0; 
-        ms->dv = 0; 
-        set_inuse_and_pinuse(ms, p, dvs); 
-      } 
-      mem = chunk2mem(p); 
-      check_malloced_chunk(ms, mem, nb); 
-      goto postaction; 
-    } 
- 
-    else if (nb < ms->topsize) { /* Split top */ 
-      size_t rsize = ms->topsize -= nb; 
-      mchunkptr p = ms->top; 
-      mchunkptr r = ms->top = chunk_plus_offset(p, nb); 
-      r->head = rsize | PINUSE_BIT; 
-      set_size_and_pinuse_of_inuse_chunk(ms, p, nb); 
-      mem = chunk2mem(p); 
-      check_top_chunk(ms, ms->top); 
-      check_malloced_chunk(ms, mem, nb); 
-      goto postaction; 
-    } 
- 
-    mem = sys_alloc(ms, nb); 
- 
-  postaction: 
-    POSTACTION(ms); 
-    return mem; 
-  } 
- 
-  return 0; 
-} 
- 
-void mspace_free(mspace msp, void* mem) { 
-  if (mem != 0) { 
-    mchunkptr p  = mem2chunk(mem); 
-#if FOOTERS 
-    mstate fm = get_mstate_for(p); 
-#else /* FOOTERS */ 
-    mstate fm = (mstate)msp; 
-#endif /* FOOTERS */ 
-    if (!ok_magic(fm)) { 
-      USAGE_ERROR_ACTION(fm, p); 
-      return; 
-    } 
-    if (!PREACTION(fm)) { 
-      check_inuse_chunk(fm, p); 
-      if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { 
-        size_t psize = chunksize(p); 
-        mchunkptr next = chunk_plus_offset(p, psize); 
-        if (!pinuse(p)) { 
-          size_t prevsize = p->prev_foot; 
-          if ((prevsize & IS_MMAPPED_BIT) != 0) { 
-            prevsize &= ~IS_MMAPPED_BIT; 
-            psize += prevsize + MMAP_FOOT_PAD; 
-            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) 
-              fm->footprint -= psize; 
-            goto postaction; 
-          } 
-          else { 
-            mchunkptr prev = chunk_minus_offset(p, prevsize); 
-            psize += prevsize; 
-            p = prev; 
-            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ 
-              if (p != fm->dv) { 
-                unlink_chunk(fm, p, prevsize); 
-              } 
-              else if ((next->head & INUSE_BITS) == INUSE_BITS) { 
-                fm->dvsize = psize; 
-                set_free_with_pinuse(p, psize, next); 
-                goto postaction; 
-              } 
-            } 
-            else 
-              goto erroraction; 
-          } 
-        } 
- 
-        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { 
-          if (!cinuse(next)) {  /* consolidate forward */ 
-            if (next == fm->top) { 
-              size_t tsize = fm->topsize += psize; 
-              fm->top = p; 
-              p->head = tsize | PINUSE_BIT; 
-              if (p == fm->dv) { 
-                fm->dv = 0; 
-                fm->dvsize = 0; 
-              } 
-              if (should_trim(fm, tsize)) 
-                sys_trim(fm, 0); 
-              goto postaction; 
-            } 
-            else if (next == fm->dv) { 
-              size_t dsize = fm->dvsize += psize; 
-              fm->dv = p; 
-              set_size_and_pinuse_of_free_chunk(p, dsize); 
-              goto postaction; 
-            } 
-            else { 
-              size_t nsize = chunksize(next); 
-              psize += nsize; 
-              unlink_chunk(fm, next, nsize); 
-              set_size_and_pinuse_of_free_chunk(p, psize); 
-              if (p == fm->dv) { 
-                fm->dvsize = psize; 
-                goto postaction; 
-              } 
-            } 
-          } 
-          else 
-            set_free_with_pinuse(p, psize, next); 
-          insert_chunk(fm, p, psize); 
-          check_free_chunk(fm, p); 
-          goto postaction; 
-        } 
-      } 
-    erroraction: 
-      USAGE_ERROR_ACTION(fm, p); 
-    postaction: 
-      POSTACTION(fm); 
-    } 
-  } 
-} 
- 
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { 
-  void* mem; 
-  size_t req = 0; 
-  mstate ms = (mstate)msp; 
-  if (!ok_magic(ms)) { 
-    USAGE_ERROR_ACTION(ms,ms); 
-    return 0; 
-  } 
-  if (n_elements != 0) { 
-    req = n_elements * elem_size; 
-    if (((n_elements | elem_size) & ~(size_t)0xffff) && 
-        (req / n_elements != elem_size)) 
-      req = MAX_SIZE_T; /* force downstream failure on overflow */ 
-  } 
-  mem = internal_malloc(ms, req); 
-  if (mem != 0 && calloc_must_clear(mem2chunk(mem))) 
-    memset(mem, 0, req); 
-  return mem; 
-} 
- 
-void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { 
-  if (oldmem == 0) 
-    return mspace_malloc(msp, bytes); 
-#ifdef REALLOC_ZERO_BYTES_FREES 
-  if (bytes == 0) { 
-    mspace_free(msp, oldmem); 
-    return 0; 
-  } 
-#endif /* REALLOC_ZERO_BYTES_FREES */ 
-  else { 
-#if FOOTERS 
-    mchunkptr p  = mem2chunk(oldmem); 
-    mstate ms = get_mstate_for(p); 
-#else /* FOOTERS */ 
-    mstate ms = (mstate)msp; 
-#endif /* FOOTERS */ 
-    if (!ok_magic(ms)) { 
-      USAGE_ERROR_ACTION(ms,ms); 
-      return 0; 
-    } 
-    return internal_realloc(ms, oldmem, bytes); 
-  } 
-} 
- 
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { 
-  mstate ms = (mstate)msp; 
-  if (!ok_magic(ms)) { 
-    USAGE_ERROR_ACTION(ms,ms); 
-    return 0; 
-  } 
-  return internal_memalign(ms, alignment, bytes); 
-} 
- 
-void** mspace_independent_calloc(mspace msp, size_t n_elements, 
-                                 size_t elem_size, void* chunks[]) { 
-  size_t sz = elem_size; /* serves as 1-element array */ 
-  mstate ms = (mstate)msp; 
-  if (!ok_magic(ms)) { 
-    USAGE_ERROR_ACTION(ms,ms); 
-    return 0; 
-  } 
-  return ialloc(ms, n_elements, &sz, 3, chunks); 
-} 
- 
-void** mspace_independent_comalloc(mspace msp, size_t n_elements, 
-                                   size_t sizes[], void* chunks[]) { 
-  mstate ms = (mstate)msp; 
-  if (!ok_magic(ms)) { 
-    USAGE_ERROR_ACTION(ms,ms); 
-    return 0; 
-  } 
-  return ialloc(ms, n_elements, sizes, 0, chunks); 
-} 
- 
-int mspace_trim(mspace msp, size_t pad) { 
-  int result = 0; 
-  mstate ms = (mstate)msp; 
-  if (ok_magic(ms)) { 
-    if (!PREACTION(ms)) { 
-      result = sys_trim(ms, pad); 
-      POSTACTION(ms); 
-    } 
-  } 
-  else { 
-    USAGE_ERROR_ACTION(ms,ms); 
-  } 
-  return result; 
-} 
- 
-void mspace_malloc_stats(mspace msp) { 
-  mstate ms = (mstate)msp; 
-  if (ok_magic(ms)) { 
-    internal_malloc_stats(ms); 
-  } 
-  else { 
-    USAGE_ERROR_ACTION(ms,ms); 
-  } 
-} 
- 
-size_t mspace_footprint(mspace msp) { 
-  size_t result; 
-  mstate ms = (mstate)msp; 
-  if (ok_magic(ms)) { 
-    result = ms->footprint; 
-  } 
-  USAGE_ERROR_ACTION(ms,ms); 
-  return result; 
-} 
- 
- 
-size_t mspace_max_footprint(mspace msp) { 
-  size_t result; 
-  mstate ms = (mstate)msp; 
-  if (ok_magic(ms)) { 
-    result = ms->max_footprint; 
-  } 
-  USAGE_ERROR_ACTION(ms,ms); 
-  return result; 
-} 
- 
- 
-#if !NO_MALLINFO 
-struct mallinfo mspace_mallinfo(mspace msp) { 
-  mstate ms = (mstate)msp; 
-  if (!ok_magic(ms)) { 
-    USAGE_ERROR_ACTION(ms,ms); 
-  } 
-  return internal_mallinfo(ms); 
-} 
-#endif /* NO_MALLINFO */ 
- 
-int mspace_mallopt(int param_number, int value) { 
-  return change_mparam(param_number, value); 
-} 
- 
-#endif /* MSPACES */ 
- 
-/* -------------------- Alternative MORECORE functions ------------------- */ 
- 
-/* 
-  Guidelines for creating a custom version of MORECORE: 
- 
-  * For best performance, MORECORE should allocate in multiples of pagesize. 
-  * MORECORE may allocate more memory than requested. (Or even less, 
-      but this will usually result in a malloc failure.) 
-  * MORECORE must not allocate memory when given argument zero, but 
-      instead return one past the end address of memory from previous 
-      nonzero call. 
-  * For best performance, consecutive calls to MORECORE with positive 
-      arguments should return increasing addresses, indicating that 
-      space has been contiguously extended. 
-  * Even though consecutive calls to MORECORE need not return contiguous 
-      addresses, it must be OK for malloc'ed chunks to span multiple 
-      regions in those cases where they do happen to be contiguous. 
-  * MORECORE need not handle negative arguments -- it may instead 
-      just return MFAIL when given negative arguments. 
-      Negative arguments are always multiples of pagesize. MORECORE 
-      must not misinterpret negative args as large positive unsigned 
-      args. You can suppress all such calls from even occurring by defining 
-      MORECORE_CANNOT_TRIM, 
- 
-  As an example alternative MORECORE, here is a custom allocator 
-  kindly contributed for pre-OSX macOS.  It uses virtually but not 
-  necessarily physically contiguous non-paged memory (locked in, 
-  present and won't get swapped out).  You can use it by uncommenting 
-  this section, adding some #includes, and setting up the appropriate 
-  defines above: 
- 
-      #define MORECORE osMoreCore 
- 
-  There is also a shutdown routine that should somehow be called for 
-  cleanup upon program exit. 
- 
-  #define MAX_POOL_ENTRIES 100 
-  #define MINIMUM_MORECORE_SIZE  (64 * 1024U) 
-  static int next_os_pool; 
-  void *our_os_pools[MAX_POOL_ENTRIES]; 
- 
-  void *osMoreCore(int size) 
-  { 
-    void *ptr = 0; 
-    static void *sbrk_top = 0; 
- 
-    if (size > 0) 
-    { 
-      if (size < MINIMUM_MORECORE_SIZE) 
-         size = MINIMUM_MORECORE_SIZE; 
-      if (CurrentExecutionLevel() == kTaskLevel) 
-         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); 
-      if (ptr == 0) 
-      { 
-        return (void *) MFAIL; 
-      } 
-      // save ptrs so they can be freed during cleanup 
-      our_os_pools[next_os_pool] = ptr; 
-      next_os_pool++; 
-      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); 
-      sbrk_top = (char *) ptr + size; 
-      return ptr; 
-    } 
-    else if (size < 0) 
-    { 
-      // we don't currently support shrink behavior 
-      return (void *) MFAIL; 
-    } 
-    else 
-    { 
-      return sbrk_top; 
-    } 
-  } 
- 
-  // cleanup any allocated memory pools 
-  // called as last thing before shutting down driver 
- 
-  void osCleanupMem(void) 
-  { 
-    void **ptr; 
- 
-    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) 
-      if (*ptr) 
-      { 
-         PoolDeallocate(*ptr); 
-         *ptr = 0; 
-      } 
-  } 
- 
-*/ 
- 
- 
-/* ----------------------------------------------------------------------- 
-History: 
-    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee) 
-      * Add max_footprint functions 
-      * Ensure all appropriate literals are size_t 
-      * Fix conditional compilation problem for some #define settings 
-      * Avoid concatenating segments with the one provided 
-        in create_mspace_with_base 
-      * Rename some variables to avoid compiler shadowing warnings 
-      * Use explicit lock initialization. 
-      * Better handling of sbrk interference. 
-      * Simplify and fix segment insertion, trimming and mspace_destroy 
-      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x 
-      * Thanks especially to Dennis Flanagan for help on these. 
- 
-    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee) 
-      * Fix memalign brace error. 
- 
-    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee) 
-      * Fix improper #endif nesting in C++ 
-      * Add explicit casts needed for C++ 
- 
-    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee) 
-      * Use trees for large bins 
-      * Support mspaces 
-      * Use segments to unify sbrk-based and mmap-based system allocation, 
-        removing need for emulation on most platforms without sbrk. 
-      * Default safety checks 
-      * Optional footer checks. Thanks to William Robertson for the idea. 
-      * Internal code refactoring 
-      * Incorporate suggestions and platform-specific changes. 
-        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, 
-        Aaron Bachmann,  Emery Berger, and others. 
-      * Speed up non-fastbin processing enough to remove fastbins. 
-      * Remove useless cfree() to avoid conflicts with other apps. 
-      * Remove internal memcpy, memset. Compilers handle builtins better. 
-      * Remove some options that no one ever used and rename others. 
- 
-    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee) 
-      * Fix malloc_state bitmap array misdeclaration 
- 
-    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee) 
-      * Allow tuning of FIRST_SORTED_BIN_SIZE 
-      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. 
-      * Better detection and support for non-contiguousness of MORECORE. 
-        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger 
-      * Bypass most of malloc if no frees. Thanks To Emery Berger. 
-      * Fix freeing of old top non-contiguous chunk im sysmalloc. 
-      * Raised default trim and map thresholds to 256K. 
-      * Fix mmap-related #defines. Thanks to Lubos Lunak. 
-      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. 
-      * Branch-free bin calculation 
-      * Default trim and mmap thresholds now 256K. 
- 
-    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee) 
-      * Introduce independent_comalloc and independent_calloc. 
-        Thanks to Michael Pachos for motivation and help. 
-      * Make optional .h file available 
-      * Allow > 2GB requests on 32bit systems. 
-      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>. 
-        Thanks also to Andreas Mueller <a.mueller at paradatec.de>, 
-        and Anonymous. 
-      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for 
-        helping test this.) 
-      * memalign: check alignment arg 
-      * realloc: don't try to shift chunks backwards, since this 
-        leads to  more fragmentation in some programs and doesn't 
-        seem to help in any others. 
-      * Collect all cases in malloc requiring system memory into sysmalloc 
-      * Use mmap as backup to sbrk 
-      * Place all internal state in malloc_state 
-      * Introduce fastbins (although similar to 2.5.1) 
-      * Many minor tunings and cosmetic improvements 
-      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK 
-      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS 
-        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others. 
-      * Include errno.h to support default failure action. 
- 
-    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee) 
-      * return null for negative arguments 
-      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com> 
-         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' 
-          (e.g. WIN32 platforms) 
-         * Cleanup header file inclusion for WIN32 platforms 
-         * Cleanup code to avoid Microsoft Visual C++ compiler complaints 
-         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing 
-           memory allocation routines 
-         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) 
-         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to 
-           usage of 'assert' in non-WIN32 code 
-         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to 
-           avoid infinite loop 
-      * Always call 'fREe()' rather than 'free()' 
- 
-    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee) 
-      * Fixed ordering problem with boundary-stamping 
- 
-    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee) 
-      * Added pvalloc, as recommended by H.J. Liu 
-      * Added 64bit pointer support mainly from Wolfram Gloger 
-      * Added anonymously donated WIN32 sbrk emulation 
-      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen 
-      * malloc_extend_top: fix mask error that caused wastage after 
-        foreign sbrks 
-      * Add linux mremap support code from HJ Liu 
- 
-    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee) 
-      * Integrated most documentation with the code. 
-      * Add support for mmap, with help from 
-        Wolfram Gloger (Gloger@lrz.uni-muenchen.de). 
-      * Use last_remainder in more cases. 
-      * Pack bins using idea from  colin@nyx10.cs.du.edu 
+      m->magic = mparams.magic;
+      init_bins(m);
+      if (is_global(m)) 
+        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+      else {
+        /* Offset top by embedded malloc_state */
+        mchunkptr mn = next_chunk(mem2chunk(m));
+        init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+      }
+    }
+
+    else {
+      /* Try to merge with an existing segment */
+      msegmentptr sp = &m->seg;
+      while (sp != 0 && tbase != sp->base + sp->size)
+        sp = sp->next;
+      if (sp != 0 &&
+          !is_extern_segment(sp) &&
+	  check_segment_merge(sp, tbase, tsize) &&
+          (get_segment_flags(sp) & IS_MMAPPED_BIT) == mmap_flag &&
+          segment_holds(sp, m->top)) { /* append */
+        sp->size += tsize;
+        init_top(m, m->top, m->topsize + tsize);
+      }
+      else {
+        if (tbase < m->least_addr)
+          m->least_addr = tbase;
+        sp = &m->seg;
+        while (sp != 0 && sp->base != tbase + tsize)
+          sp = sp->next;
+        if (sp != 0 &&
+            !is_extern_segment(sp) &&
+	    check_segment_merge(sp, tbase, tsize) &&
+            (get_segment_flags(sp) & IS_MMAPPED_BIT) == mmap_flag) {
+          char* oldbase = sp->base;
+          sp->base = tbase;
+          sp->size += tsize;
+          return prepend_alloc(m, tbase, oldbase, nb);
+        }
+        else
+          add_segment(m, tbase, tsize, mmap_flag);
+      }
+    }
+
+    if (nb < m->topsize) { /* Allocate from new or extended top space */
+      size_t rsize = m->topsize -= nb;
+      mchunkptr p = m->top;
+      mchunkptr r = m->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+      check_top_chunk(m, m->top);
+      check_malloced_chunk(m, chunk2mem(p), nb);
+      return chunk2mem(p);
+    }
+  }
+
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+  size_t released = 0;
+  msegmentptr pred = &m->seg;
+  msegmentptr sp = pred->next;
+  while (sp != 0) {
+    char* base = sp->base;
+    size_t size = sp->size;
+    msegmentptr next = sp->next;
+    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+      mchunkptr p = align_as_chunk(base);
+      size_t psize = chunksize(p);
+      /* Can unmap if first chunk holds entire segment and not pinned */
+      if (!cinuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+        tchunkptr tp = (tchunkptr)p;
+        assert(segment_holds(sp, (char*)sp));
+        if (p == m->dv) {
+          m->dv = 0;
+          m->dvsize = 0;
+        }
+        else {
+          unlink_large_chunk(m, tp);
+        }
+        if (CALL_MUNMAP(base, size) == 0) {
+          released += size;
+          m->footprint -= size;
+          /* unlink obsoleted record */
+          sp = pred;
+          sp->next = next;
+        }
+        else { /* back out if cannot unmap */
+          insert_large_chunk(m, tp, psize);
+        }
+      }
+    }
+    pred = sp;
+    sp = next;
+  }
+  return released;
+}
+
+static int sys_trim(mstate m, size_t pad) {
+  size_t released = 0;
+  if (pad < MAX_REQUEST && is_initialized(m)) {
+    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+    if (m->topsize > pad) {
+      /* Shrink top space in granularity-size units, keeping at least one */
+      size_t unit = mparams.granularity;
+      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+                      SIZE_T_ONE) * unit;
+      msegmentptr sp = segment_holding(m, (char*)m->top);
+
+      if (!is_extern_segment(sp)) {
+        if (is_mmapped_segment(sp)) {
+          if (HAVE_MMAP &&
+              sp->size >= extra &&
+              !has_segment_link(m, sp)) { /* can't shrink if pinned */
+            size_t newsize = sp->size - extra;
+            /* Prefer mremap, fall back to munmap */
+            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+              released = extra;
+            }
+          }
+        }
+        else if (HAVE_MORECORE) {
+          if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
+            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+          ACQUIRE_MORECORE_LOCK();
+          {
+            /* Make sure end of memory is where we last set it. */
+            char* old_br = (char*)(CALL_MORECORE(0));
+            if (old_br == sp->base + sp->size) {
+              char* rel_br = (char*)(CALL_MORECORE(-extra));
+              char* new_br = (char*)(CALL_MORECORE(0));
+              if (rel_br != CMFAIL && new_br < old_br)
+                released = old_br - new_br;
+            }
+          }
+          RELEASE_MORECORE_LOCK();
+        }
+      }
+
+      if (released != 0) {
+        sp->size -= released;
+        m->footprint -= released;
+        init_top(m, m->top, m->topsize - released);
+        check_top_chunk(m, m->top);
+      }
+    }
+
+    /* Unmap any unused mmapped segments */
+    if (HAVE_MMAP) 
+      released += release_unused_segments(m);
+
+    /* On failure, disable autotrim to avoid repeated failed future calls */
+    if (released == 0)
+      m->trim_check = MAX_SIZE_T;
+  }
+
+  return (released != 0)? 1 : 0;
+}
+
+/* ---------------------------- malloc support --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void* tmalloc_large(mstate m, size_t nb) {
+  tchunkptr v = 0;
+  size_t rsize = -nb; /* Unsigned negation */
+  tchunkptr t;
+  bindex_t idx;
+  compute_tree_index(nb, idx);
+
+  if ((t = *treebin_at(m, idx)) != 0) {
+    /* Traverse tree for this bin looking for node with size == nb */
+    size_t sizebits = nb << leftshift_for_tree_index(idx);
+    tchunkptr rst = 0;  /* The deepest untaken right subtree */
+    for (;;) {
+      tchunkptr rt;
+      size_t trem = chunksize(t) - nb;
+      if (trem < rsize) {
+        v = t;
+        if ((rsize = trem) == 0)
+          break;
+      }
+      rt = t->child[1];
+      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+      if (rt != 0 && rt != t)
+        rst = rt;
+      if (t == 0) {
+        t = rst; /* set t to least subtree holding sizes > nb */
+        break;
+      }
+      sizebits <<= 1;
+    }
+  }
+
+  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+    if (leftbits != 0) {
+      bindex_t i;
+      binmap_t leastbit = least_bit(leftbits);
+      compute_bit2idx(leastbit, i);
+      t = *treebin_at(m, i);
+    }
+  }
+
+  while (t != 0) { /* find smallest of tree or subtree */
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+    t = leftmost_child(t);
+  }
+
+  /*  If dv is a better fit, return 0 so malloc will use it */
+  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+    if (RTCHECK(ok_address(m, v))) { /* split */
+      mchunkptr r = chunk_plus_offset(v, nb);
+      assert(chunksize(v) == rsize + nb);
+      if (RTCHECK(ok_next(v, r))) {
+        unlink_large_chunk(m, v);
+        if (rsize < MIN_CHUNK_SIZE)
+          set_inuse_and_pinuse(m, v, (rsize + nb));
+        else {
+          set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+          set_size_and_pinuse_of_free_chunk(r, rsize);
+          insert_chunk(m, r, rsize);
+        }
+        return chunk2mem(v);
+      }
+    }
+    CORRUPTION_ERROR_ACTION(m);
+  }
+  return 0;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void* tmalloc_small(mstate m, size_t nb) {
+  tchunkptr t, v;
+  size_t rsize;
+  bindex_t i;
+  binmap_t leastbit = least_bit(m->treemap);
+  compute_bit2idx(leastbit, i);
+
+  v = t = *treebin_at(m, i);
+  rsize = chunksize(t) - nb;
+
+  while ((t = leftmost_child(t)) != 0) {
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+  }
+
+  if (RTCHECK(ok_address(m, v))) {
+    mchunkptr r = chunk_plus_offset(v, nb);
+    assert(chunksize(v) == rsize + nb);
+    if (RTCHECK(ok_next(v, r))) {
+      unlink_large_chunk(m, v);
+      if (rsize < MIN_CHUNK_SIZE)
+        set_inuse_and_pinuse(m, v, (rsize + nb));
+      else {
+        set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        replace_dv(m, r, rsize);
+      }
+      return chunk2mem(v);
+    }
+  }
+
+  CORRUPTION_ERROR_ACTION(m);
+  return 0;
+}
+
+/* --------------------------- realloc support --------------------------- */
+
+static void* internal_realloc(mstate m, void* oldmem, size_t bytes) {
+  if (bytes >= MAX_REQUEST) {
+    MALLOC_FAILURE_ACTION;
+    return 0;
+  }
+  if (!PREACTION(m)) {
+    mchunkptr oldp = mem2chunk(oldmem);
+    size_t oldsize = chunksize(oldp);
+    mchunkptr next = chunk_plus_offset(oldp, oldsize);
+    mchunkptr newp = 0;
+    void* extra = 0;
+
+    /* Try to either shrink or extend into top. Else malloc-copy-free */
+
+    if (RTCHECK(ok_address(m, oldp) && ok_cinuse(oldp) &&
+                ok_next(oldp, next) && ok_pinuse(next))) {
+      size_t nb = request2size(bytes);
+      if (is_mmapped(oldp))
+        newp = mmap_resize(m, oldp, nb);
+      else if (oldsize >= nb) { /* already big enough */
+        size_t rsize = oldsize - nb;
+        newp = oldp;
+        if (rsize >= MIN_CHUNK_SIZE) {
+          mchunkptr remainder = chunk_plus_offset(newp, nb);
+          set_inuse(m, newp, nb);
+          set_inuse(m, remainder, rsize);
+          extra = chunk2mem(remainder);
+        }
+      }
+      else if (next == m->top && oldsize + m->topsize > nb) {
+        /* Expand into top */
+        size_t newsize = oldsize + m->topsize;
+        size_t newtopsize = newsize - nb;
+        mchunkptr newtop = chunk_plus_offset(oldp, nb);
+        set_inuse(m, oldp, nb);
+        newtop->head = newtopsize |PINUSE_BIT;
+        m->top = newtop;
+        m->topsize = newtopsize;
+        newp = oldp;
+      }
+    }
+    else {
+      USAGE_ERROR_ACTION(m, oldmem);
+      POSTACTION(m);
+      return 0;
+    }
+
+    POSTACTION(m);
+
+    if (newp != 0) {
+      if (extra != 0) {
+        internal_free(m, extra);
+      }
+      check_inuse_chunk(m, newp);
+      return chunk2mem(newp);
+    }
+    else {
+      void* newmem = internal_malloc(m, bytes);
+      if (newmem != 0) {
+        size_t oc = oldsize - overhead_for(oldp);
+        memcpy(newmem, oldmem, (oc < bytes)? oc : bytes);
+        internal_free(m, oldmem);
+      }
+      return newmem;
+    }
+  }
+  return 0;
+}
+
+/* --------------------------- memalign support -------------------------- */
+
+static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
+  if (alignment <= MALLOC_ALIGNMENT)    /* Can just use malloc */
+    return internal_malloc(m, bytes);
+  if (alignment <  MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
+    alignment = MIN_CHUNK_SIZE;
+  if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
+    size_t a = MALLOC_ALIGNMENT << 1;
+    while (a < alignment) a <<= 1;
+    alignment = a;
+  }
+  
+  if (bytes >= MAX_REQUEST - alignment) {
+    if (m != 0)  { /* Test isn't needed but avoids compiler warning */
+      MALLOC_FAILURE_ACTION;
+    }
+  }
+  else {
+    size_t nb = request2size(bytes);
+    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+    char* mem = (char*)internal_malloc(m, req);
+    if (mem != 0) {
+      void* leader = 0;
+      void* trailer = 0;
+      mchunkptr p = mem2chunk(mem);
+
+      if (PREACTION(m)) return 0;
+      if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */
+        /*
+          Find an aligned spot inside chunk.  Since we need to give
+          back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+          the first calculation places us at a spot with less than
+          MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+          We've allocated enough total room so that this is always
+          possible.
+        */
+        char* br = (char*)mem2chunk((size_t)(((size_t)(mem +
+                                                       alignment -
+                                                       SIZE_T_ONE)) &
+                                             -alignment));
+        char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+          br : br+alignment;
+        mchunkptr newp = (mchunkptr)pos;
+        size_t leadsize = pos - (char*)(p);
+        size_t newsize = chunksize(p) - leadsize;
+
+        if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
+          newp->prev_foot = p->prev_foot + leadsize;
+          newp->head = (newsize|CINUSE_BIT);
+        }
+        else { /* Otherwise, give back leader, use the rest */
+          set_inuse(m, newp, newsize);
+          set_inuse(m, p, leadsize);
+          leader = chunk2mem(p);
+        }
+        p = newp;
+      }
+
+      /* Give back spare room at the end */
+      if (!is_mmapped(p)) {
+        size_t size = chunksize(p);
+        if (size > nb + MIN_CHUNK_SIZE) {
+          size_t remainder_size = size - nb;
+          mchunkptr remainder = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, remainder, remainder_size);
+          trailer = chunk2mem(remainder);
+        }
+      }
+
+      assert (chunksize(p) >= nb);
+      assert((((size_t)(chunk2mem(p))) % alignment) == 0);
+      check_inuse_chunk(m, p);
+      POSTACTION(m);
+      if (leader != 0) {
+        internal_free(m, leader);
+      }
+      if (trailer != 0) {
+        internal_free(m, trailer);
+      }
+      return chunk2mem(p);
+    }
+  }
+  return 0;
+}
+
+/* ------------------------ comalloc/coalloc support --------------------- */
+
+static void** ialloc(mstate m,
+                     size_t n_elements,
+                     size_t* sizes,
+                     int opts,
+                     void* chunks[]) {
+  /*
+    This provides common support for independent_X routines, handling
+    all of the combinations that can result.
+
+    The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+  */
+
+  size_t    element_size;   /* chunksize of each element, if all same */
+  size_t    contents_size;  /* total size of elements */
+  size_t    array_size;     /* request size of pointer array */
+  void*     mem;            /* malloced aggregate space */
+  mchunkptr p;              /* corresponding chunk */
+  size_t    remainder_size; /* remaining bytes while splitting */
+  void**    marray;         /* either "chunks" or malloced ptr array */
+  mchunkptr array_chunk;    /* chunk for malloced ptr array */
+  flag_t    was_enabled;    /* to disable mmap */
+  size_t    size;
+  size_t    i;
+
+  /* compute array length, if needed */
+  if (chunks != 0) {
+    if (n_elements == 0)
+      return chunks; /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+  }
+  else {
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0)
+      return (void**)internal_malloc(m, 0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(void*)));
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) { /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+  }
+  else { /* add up all the sizes */
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i)
+      contents_size += request2size(sizes[i]);
+  }
+
+  size = contents_size + array_size;
+
+  /*
+     Allocate the aggregate chunk.  First disable direct-mmapping so
+     malloc won't use it, since we would not be able to later
+     free/realloc space internal to a segregated mmap region.
+  */
+  was_enabled = use_mmap(m);
+  disable_mmap(m);
+  mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+  if (was_enabled)
+    enable_mmap(m);
+  if (mem == 0)
+    return 0;
+
+  if (PREACTION(m)) return 0;
+  p = mem2chunk(mem);
+  remainder_size = chunksize(p);
+
+  assert(!is_mmapped(p));
+
+  if (opts & 0x2) {       /* optionally clear the elements */
+    memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
+  }
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+    size_t  array_chunk_size;
+    array_chunk = chunk_plus_offset(p, contents_size);
+    array_chunk_size = remainder_size - contents_size;
+    marray = (void**) (chunk2mem(array_chunk));
+    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+    remainder_size = contents_size;
+  }
+
+  /* split out elements */
+  for (i = 0; ; ++i) {
+    marray[i] = chunk2mem(p);
+    if (i != n_elements-1) {
+      if (element_size != 0)
+        size = element_size;
+      else
+        size = request2size(sizes[i]);
+      remainder_size -= size;
+      set_size_and_pinuse_of_inuse_chunk(m, p, size);
+      p = chunk_plus_offset(p, size);
+    }
+    else { /* the final element absorbs any overallocation slop */
+      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+      break;
+    }
+  }
+
+#if DEBUG
+  if (marray != chunks) {
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0) {
+      assert(remainder_size == element_size);
+    }
+    else {
+      assert(remainder_size == request2size(sizes[i]));
+    }
+    check_inuse_chunk(m, mem2chunk(marray));
+  }
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(m, mem2chunk(marray[i]));
+
+#endif /* DEBUG */
+
+  POSTACTION(m);
+  return marray;
+}
+
+
+/* -------------------------- public routines ---------------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) {
+  /*
+     Basic algorithm:
+     If a small request (< 256 bytes minus per-chunk overhead):
+       1. If one exists, use a remainderless chunk in associated smallbin.
+          (Remainderless means that there are too few excess bytes to
+          represent as a chunk.)
+       2. If it is big enough, use the dv chunk, which is normally the
+          chunk adjacent to the one used for the most recent small request.
+       3. If one exists, split the smallest available chunk in a bin,
+          saving remainder in dv.
+       4. If it is big enough, use the top chunk.
+       5. If available, get memory from system and use it
+     Otherwise, for a large request:
+       1. Find the smallest available binned chunk that fits, and use it
+          if it is better fitting than dv chunk, splitting if necessary.
+       2. If better fitting than any binned chunk, use the dv chunk.
+       3. If it is big enough, use the top chunk.
+       4. If request size >= mmap threshold, try to directly mmap this chunk.
+       5. If available, get memory from system and use it
+
+     The ugly goto's here ensure that postaction occurs along all paths.
+  */
+
+  if (!PREACTION(gm)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = gm->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(gm, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(gm, b, p, idx);
+        set_inuse_and_pinuse(gm, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > gm->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(gm, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(gm, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(gm, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(gm, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+
+        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= gm->dvsize) {
+      size_t rsize = gm->dvsize - nb;
+      mchunkptr p = gm->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+        gm->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = gm->dvsize;
+        gm->dvsize = 0;
+        gm->dv = 0;
+        set_inuse_and_pinuse(gm, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < gm->topsize) { /* Split top */
+      size_t rsize = gm->topsize -= nb;
+      mchunkptr p = gm->top;
+      mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(gm, gm->top);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(gm, nb);
+
+  postaction:
+    POSTACTION(gm);
+    return mem;
+  }
+
+  return 0;
+}
+
+void dlfree(void* mem) {
+  /*
+     Consolidate freed chunks with preceding or succeeding bordering
+     free chunks, if they exist, and then place in a bin.  Intermixed
+     with special cases for top, dv, mmapped chunks, and usage errors.
+  */
+
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+#else /* FOOTERS */
+#define fm gm
+#endif /* FOOTERS */
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if ((prevsize & IS_MMAPPED_BIT) != 0) {
+            prevsize &= ~IS_MMAPPED_BIT;
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+          insert_chunk(fm, p, psize);
+          check_free_chunk(fm, p);
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+#if !FOOTERS
+#undef fm
+#endif /* FOOTERS */
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = dlmalloc(req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+void* dlrealloc(void* oldmem, size_t bytes) {
+  if (oldmem == 0)
+    return dlmalloc(bytes);
+#ifdef REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0) {
+    dlfree(oldmem);
+    return 0;
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+#if ! FOOTERS
+    mstate m = gm;
+#else /* FOOTERS */
+    mstate m = get_mstate_for(mem2chunk(oldmem));
+    if (!ok_magic(m)) {
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+    }
+#endif /* FOOTERS */
+    return internal_realloc(m, oldmem, bytes);
+  }
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+  return internal_memalign(gm, alignment, bytes);
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+                                 void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  return ialloc(gm, n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+                                   void* chunks[]) {
+  return ialloc(gm, n_elements, sizes, 0, chunks);
+}
+
+void* dlvalloc(size_t bytes) {
+  size_t pagesz;
+  init_mparams();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+  size_t pagesz;
+  init_mparams();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+}
+
+int dlmalloc_trim(size_t pad) {
+  int result = 0;
+  if (!PREACTION(gm)) {
+    result = sys_trim(gm, pad);
+    POSTACTION(gm);
+  }
+  return result;
+}
+
+size_t dlmalloc_footprint(void) {
+  return gm->footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+  return gm->max_footprint;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+  return internal_mallinfo(gm);
+}
+#endif /* NO_MALLINFO */
+
+void dlmalloc_stats() {
+  internal_malloc_stats(gm);
+}
+
+size_t dlmalloc_usable_size(void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (cinuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+int dlmallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+  size_t msize = pad_request(sizeof(struct malloc_state));
+  mchunkptr mn;
+  mchunkptr msp = align_as_chunk(tbase);
+  mstate m = (mstate)(chunk2mem(msp));
+  memset(m, 0, msize);
+  INITIAL_LOCK(&m->mutex);
+  msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
+  m->seg.base = m->least_addr = tbase;
+  m->seg.size = m->footprint = m->max_footprint = tsize;
+  m->magic = mparams.magic;
+  m->mflags = mparams.default_mflags;
+  disable_contiguous(m);
+  init_bins(m);
+  mn = next_chunk(mem2chunk(m));
+  init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+  check_top_chunk(m, m->top);
+  return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize = pad_request(sizeof(struct malloc_state));
+  init_mparams(); /* Ensure pagesize etc initialized */
+
+  if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    size_t rs = ((capacity == 0)? mparams.granularity :
+                 (capacity + TOP_FOOT_SIZE + msize));
+    size_t tsize = granularity_align(rs);
+    char* tbase = (char*)(CALL_MMAP(tsize));
+    if (tbase != CMFAIL) {
+      m = init_user_mstate(tbase, tsize);
+      set_segment_flags(&m->seg, IS_MMAPPED_BIT);
+      set_lock(m, locked);
+    }
+  }
+  return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize = pad_request(sizeof(struct malloc_state));
+  init_mparams(); /* Ensure pagesize etc initialized */
+
+  if (capacity > msize + TOP_FOOT_SIZE &&
+      capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    m = init_user_mstate((char*)base, capacity);
+    set_segment_flags(&m->seg, EXTERN_BIT);
+    set_lock(m, locked);
+  }
+  return (mspace)m;
+}
+
+size_t destroy_mspace(mspace msp) {
+  size_t freed = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    msegmentptr sp = &ms->seg;
+    while (sp != 0) {
+      char* base = sp->base;
+      size_t size = sp->size;
+      flag_t flag = get_segment_flags(sp);
+      sp = sp->next;
+      if ((flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) &&
+          CALL_MUNMAP(base, size) == 0)
+        freed += size;
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return freed;
+}
+
+/*
+  mspace versions of routines are near-clones of the global
+  versions. This is not so nice but better than the alternatives.
+*/
+
+
+void* mspace_malloc(mspace msp, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (!PREACTION(ms)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = ms->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(ms, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(ms, b, p, idx);
+        set_inuse_and_pinuse(ms, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > ms->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(ms, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(ms, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(ms, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(ms, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+
+        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= ms->dvsize) {
+      size_t rsize = ms->dvsize - nb;
+      mchunkptr p = ms->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+        ms->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = ms->dvsize;
+        ms->dvsize = 0;
+        ms->dv = 0;
+        set_inuse_and_pinuse(ms, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < ms->topsize) { /* Split top */
+      size_t rsize = ms->topsize -= nb;
+      mchunkptr p = ms->top;
+      mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(ms, ms->top);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(ms, nb);
+
+  postaction:
+    POSTACTION(ms);
+    return mem;
+  }
+
+  return 0;
+}
+
+void mspace_free(mspace msp, void* mem) {
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+#else /* FOOTERS */
+    mstate fm = (mstate)msp;
+#endif /* FOOTERS */
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if ((prevsize & IS_MMAPPED_BIT) != 0) {
+            prevsize &= ~IS_MMAPPED_BIT;
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+          insert_chunk(fm, p, psize);
+          check_free_chunk(fm, p);
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = internal_malloc(ms, req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+  if (oldmem == 0)
+    return mspace_malloc(msp, bytes);
+#ifdef REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0) {
+    mspace_free(msp, oldmem);
+    return 0;
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+#if FOOTERS
+    mchunkptr p  = mem2chunk(oldmem);
+    mstate ms = get_mstate_for(p);
+#else /* FOOTERS */
+    mstate ms = (mstate)msp;
+#endif /* FOOTERS */
+    if (!ok_magic(ms)) {
+      USAGE_ERROR_ACTION(ms,ms);
+      return 0;
+    }
+    return internal_realloc(ms, oldmem, bytes);
+  }
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return internal_memalign(ms, alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, sizes, 0, chunks);
+}
+
+int mspace_trim(mspace msp, size_t pad) {
+  int result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (!PREACTION(ms)) {
+      result = sys_trim(ms, pad);
+      POSTACTION(ms);
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+void mspace_malloc_stats(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    internal_malloc_stats(ms);
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+}
+
+size_t mspace_footprint(mspace msp) {
+  size_t result;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->footprint;
+  }
+  USAGE_ERROR_ACTION(ms,ms);
+  return result;
+}
+
+
+size_t mspace_max_footprint(mspace msp) {
+  size_t result;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->max_footprint;
+  }
+  USAGE_ERROR_ACTION(ms,ms);
+  return result;
+}
+
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return internal_mallinfo(ms);
+}
+#endif /* NO_MALLINFO */
+
+int mspace_mallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+#endif /* MSPACES */
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+  Guidelines for creating a custom version of MORECORE:
+
+  * For best performance, MORECORE should allocate in multiples of pagesize.
+  * MORECORE may allocate more memory than requested. (Or even less,
+      but this will usually result in a malloc failure.)
+  * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call.
+  * For best performance, consecutive calls to MORECORE with positive
+      arguments should return increasing addresses, indicating that
+      space has been contiguously extended.
+  * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+  * MORECORE need not handle negative arguments -- it may instead
+      just return MFAIL when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  As an example alternative MORECORE, here is a custom allocator
+  kindly contributed for pre-OSX macOS.  It uses virtually but not
+  necessarily physically contiguous non-paged memory (locked in,
+  present and won't get swapped out).  You can use it by uncommenting
+  this section, adding some #includes, and setting up the appropriate
+  defines above:
+
+      #define MORECORE osMoreCore
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MFAIL;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MFAIL;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
+      * Add max_footprint functions
+      * Ensure all appropriate literals are size_t
+      * Fix conditional compilation problem for some #define settings
+      * Avoid concatenating segments with the one provided
+        in create_mspace_with_base
+      * Rename some variables to avoid compiler shadowing warnings
+      * Use explicit lock initialization.
+      * Better handling of sbrk interference.
+      * Simplify and fix segment insertion, trimming and mspace_destroy
+      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+      * Thanks especially to Dennis Flanagan for help on these.
+
+    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
+      * Fix memalign brace error.
+
+    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
+      * Fix improper #endif nesting in C++
+      * Add explicit casts needed for C++
+
+    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
+      * Use trees for large bins
+      * Support mspaces
+      * Use segments to unify sbrk-based and mmap-based system allocation,
+        removing need for emulation on most platforms without sbrk.
+      * Default safety checks
+      * Optional footer checks. Thanks to William Robertson for the idea.
+      * Internal code refactoring
+      * Incorporate suggestions and platform-specific changes.
+        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+        Aaron Bachmann,  Emery Berger, and others.
+      * Speed up non-fastbin processing enough to remove fastbins.
+      * Remove useless cfree() to avoid conflicts with other apps.
+      * Remove internal memcpy, memset. Compilers handle builtins better.
+      * Remove some options that no one ever used and rename others.
+
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sysmalloc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
       * Use ordered bins instead of best-fit threshold
-      * Eliminate block-local decls to simplify tracing and debugging. 
-      * Support another case of realloc via move into top 
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
       * Fix error occurring when initial sbrk_base not word-aligned.
-      * Rely on page size for units instead of SBRK_UNIT to 
-        avoid surprises about sbrk alignment conventions. 
-      * Add mallinfo, mallopt. Thanks to Raymond Nijssen 
-        (raymond@es.ele.tue.nl) for the suggestion. 
-      * Add `pad' argument to malloc_trim and top_pad mallopt parameter. 
-      * More precautions for cases where other routines call sbrk, 
-        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). 
-      * Added macros etc., allowing use in linux libc from 
-        H.J. Lu (hjl@gnu.ai.mit.edu) 
-      * Inverted this history list 
- 
-    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee) 
-      * Re-tuned and fixed to behave more nicely with V2.6.0 changes. 
-      * Removed all preallocation code since under current scheme 
-        the work required to undo bad preallocations exceeds 
-        the work saved in good cases for most test programs. 
-      * No longer use return list or unconsolidated bins since 
-        no scheme using them consistently outperforms those that don't 
-        given above changes. 
-      * Use best fit for very large chunks to prevent some worst-cases. 
-      * Added some support for debugging 
- 
-    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee) 
-      * Removed footers when chunks are in use. Thanks to 
-        Paul Wilson (wilson@cs.texas.edu) for the suggestion. 
- 
-    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee) 
-      * Added malloc_trim, with help from Wolfram Gloger 
-        (wmglo@Dent.MED.Uni-Muenchen.DE). 
- 
-    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g) 
- 
-    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g) 
-      * realloc: try to expand in both directions 
-      * malloc: swap order of clean-bin strategy; 
-      * realloc: only conditionally expand backwards 
-      * Try not to scavenge used bins 
-      * Use bin counts as a guide to preallocation 
-      * Occasionally bin return list chunks in first scan 
-      * Add a few optimizations from colin@nyx10.cs.du.edu 
- 
-    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g) 
-      * faster bin computation & slightly different binning 
-      * merged all consolidations to one part of malloc proper 
-         (eliminating old malloc_find_space & malloc_clean_bin) 
-      * Scan 2 returns chunks (not just 1) 
-      * Propagate failure in realloc if malloc returns 0 
-      * Add stuff to allow compilation on non-ANSI compilers 
-          from kpv@research.att.com 
- 
-    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu) 
-      * removed potential for odd address access in prev_chunk 
-      * removed dependency on getpagesize.h 
-      * misc cosmetics and a bit more internal documentation 
-      * anticosmetics: mangled names in macros to evade debugger strangeness 
-      * tested on sparc, hp-700, dec-mips, rs6000 
-          with gcc & native cc (hp, dec only) allowing 
-          Detlefs & Zorn comparison study (in SIGPLAN Notices.) 
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
  
-    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu) 
-      * Based loosely on libg++-1.2X malloc. (It retains some of the overall 
-         structure of old version,  but most details differ.) 
-  
-*/ 
+*/
diff --git a/contrib/restricted/libffi/src/java_raw_api.c b/contrib/restricted/libffi/src/java_raw_api.c
index 0f1ef3821b..114d3e47fc 100644
--- a/contrib/restricted/libffi/src/java_raw_api.c
+++ b/contrib/restricted/libffi/src/java_raw_api.c
@@ -1,374 +1,374 @@
-/* ----------------------------------------------------------------------- 
-   java_raw_api.c - Copyright (c) 1999, 2007, 2008  Red Hat, Inc. 
- 
-   Cloned from raw_api.c 
- 
-   Raw_api.c author: Kresten Krab Thorup <krab@gnu.org> 
-   Java_raw_api.c author: Hans-J. Boehm <hboehm@hpl.hp.com> 
- 
-   $Id $ 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-/* This defines a Java- and 64-bit specific variant of the raw API.	*/ 
-/* It assumes that "raw" argument blocks look like Java stacks on a	*/ 
-/* 64-bit machine.  Arguments that can be stored in a single stack	*/ 
-/* stack slots (longs, doubles) occupy 128 bits, but only the first	*/ 
-/* 64 bits are actually used.						*/ 
- 
-#include <ffi.h> 
-#include <ffi_common.h> 
-#include <stdlib.h> 
- 
+/* -----------------------------------------------------------------------
+   java_raw_api.c - Copyright (c) 1999, 2007, 2008  Red Hat, Inc.
+
+   Cloned from raw_api.c
+
+   Raw_api.c author: Kresten Krab Thorup <krab@gnu.org>
+   Java_raw_api.c author: Hans-J. Boehm <hboehm@hpl.hp.com>
+
+   $Id $
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* This defines a Java- and 64-bit specific variant of the raw API.	*/
+/* It assumes that "raw" argument blocks look like Java stacks on a	*/
+/* 64-bit machine.  Arguments that can be stored in a single stack	*/
+/* stack slots (longs, doubles) occupy 128 bits, but only the first	*/
+/* 64 bits are actually used.						*/
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
 #if !defined(NO_JAVA_RAW_API)
- 
-size_t 
-ffi_java_raw_size (ffi_cif *cif) 
-{ 
-  size_t result = 0; 
-  int i; 
- 
-  ffi_type **at = cif->arg_types; 
- 
-  for (i = cif->nargs-1; i >= 0; i--, at++) 
-    { 
-      switch((*at) -> type) { 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_DOUBLE: 
-	  result += 2 * FFI_SIZEOF_JAVA_RAW; 
-	  break; 
-	case FFI_TYPE_STRUCT: 
-	  /* No structure parameters in Java.	*/ 
-	  abort(); 
+
+size_t
+ffi_java_raw_size (ffi_cif *cif)
+{
+  size_t result = 0;
+  int i;
+
+  ffi_type **at = cif->arg_types;
+
+  for (i = cif->nargs-1; i >= 0; i--, at++)
+    {
+      switch((*at) -> type) {
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  result += 2 * FFI_SIZEOF_JAVA_RAW;
+	  break;
+	case FFI_TYPE_STRUCT:
+	  /* No structure parameters in Java.	*/
+	  abort();
 	case FFI_TYPE_COMPLEX:
 	  /* Not supported yet.  */
 	  abort();
-	default: 
-	  result += FFI_SIZEOF_JAVA_RAW; 
-      } 
-    } 
- 
-  return result; 
-} 
- 
- 
-void 
-ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args) 
-{ 
-  unsigned i; 
-  ffi_type **tp = cif->arg_types; 
- 
-#if WORDS_BIGENDIAN 
- 
-  for (i = 0; i < cif->nargs; i++, tp++, args++) 
-    { 
-      switch ((*tp)->type) 
-	{ 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	  *args = (void*) ((char*)(raw++) + 3); 
-	  break; 
- 
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	  *args = (void*) ((char*)(raw++) + 2); 
-	  break; 
- 
-#if FFI_SIZEOF_JAVA_RAW == 8 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_DOUBLE: 
-	  *args = (void *)raw; 
-	  raw += 2; 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_POINTER: 
-	  *args = (void*) &(raw++)->ptr; 
-	  break; 
- 
+	default:
+	  result += FFI_SIZEOF_JAVA_RAW;
+      }
+    }
+
+  return result;
+}
+
+
+void
+ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+#if WORDS_BIGENDIAN
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  *args = (void*) ((char*)(raw++) + 3);
+	  break;
+
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	  *args = (void*) ((char*)(raw++) + 2);
+	  break;
+
+#if FFI_SIZEOF_JAVA_RAW == 8
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  *args = (void *)raw;
+	  raw += 2;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  *args = (void*) &(raw++)->ptr;
+	  break;
+
 	case FFI_TYPE_COMPLEX:
 	  /* Not supported yet.  */
 	  abort();
 
-	default: 
-	  *args = raw; 
-	  raw += 
+	default:
+	  *args = raw;
+	  raw +=
 	    FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
-	} 
-    } 
- 
-#else /* WORDS_BIGENDIAN */ 
- 
-#if !PDP 
- 
-  /* then assume little endian */ 
-  for (i = 0; i < cif->nargs; i++, tp++, args++) 
-    { 
-#if FFI_SIZEOF_JAVA_RAW == 8 
-      switch((*tp)->type) { 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_DOUBLE: 
-	  *args = (void*) raw; 
-	  raw += 2; 
-	  break; 
+	}
+    }
+
+#else /* WORDS_BIGENDIAN */
+
+#if !PDP
+
+  /* then assume little endian */
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {
+#if FFI_SIZEOF_JAVA_RAW == 8
+      switch((*tp)->type) {
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  *args = (void*) raw;
+	  raw += 2;
+	  break;
 	case FFI_TYPE_COMPLEX:
 	  /* Not supported yet.  */
 	  abort();
-	default: 
-	  *args = (void*) raw++; 
-      } 
-#else /* FFI_SIZEOF_JAVA_RAW != 8 */ 
-	*args = (void*) raw; 
-	raw += 
+	default:
+	  *args = (void*) raw++;
+      }
+#else /* FFI_SIZEOF_JAVA_RAW != 8 */
+	*args = (void*) raw;
+	raw +=
 	  FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
-#endif /* FFI_SIZEOF_JAVA_RAW == 8 */ 
-    } 
- 
-#else 
-#error "pdp endian not supported" 
-#endif /* ! PDP */ 
- 
-#endif /* WORDS_BIGENDIAN */ 
-} 
- 
-void 
-ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw) 
-{ 
-  unsigned i; 
-  ffi_type **tp = cif->arg_types; 
- 
-  for (i = 0; i < cif->nargs; i++, tp++, args++) 
-    { 
-      switch ((*tp)->type) 
-	{ 
-	case FFI_TYPE_UINT8: 
-#if WORDS_BIGENDIAN 
-	  *(UINT32*)(raw++) = *(UINT8*) (*args); 
-#else 
-	  (raw++)->uint = *(UINT8*) (*args); 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_SINT8: 
-#if WORDS_BIGENDIAN 
-	  *(SINT32*)(raw++) = *(SINT8*) (*args); 
-#else 
-	  (raw++)->sint = *(SINT8*) (*args); 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_UINT16: 
-#if WORDS_BIGENDIAN 
-	  *(UINT32*)(raw++) = *(UINT16*) (*args); 
-#else 
-	  (raw++)->uint = *(UINT16*) (*args); 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_SINT16: 
-#if WORDS_BIGENDIAN 
-	  *(SINT32*)(raw++) = *(SINT16*) (*args); 
-#else 
-	  (raw++)->sint = *(SINT16*) (*args); 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_UINT32: 
-#if WORDS_BIGENDIAN 
-	  *(UINT32*)(raw++) = *(UINT32*) (*args); 
-#else 
-	  (raw++)->uint = *(UINT32*) (*args); 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_SINT32: 
-#if WORDS_BIGENDIAN 
-	  *(SINT32*)(raw++) = *(SINT32*) (*args); 
-#else 
-	  (raw++)->sint = *(SINT32*) (*args); 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-	  (raw++)->flt = *(FLOAT32*) (*args); 
-	  break; 
- 
-#if FFI_SIZEOF_JAVA_RAW == 8 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_DOUBLE: 
-	  raw->uint = *(UINT64*) (*args); 
-	  raw += 2; 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_POINTER: 
-	  (raw++)->ptr = **(void***) args; 
-	  break; 
- 
-	default: 
-#if FFI_SIZEOF_JAVA_RAW == 8 
-	  FFI_ASSERT(0);	/* Should have covered all cases */ 
-#else 
-	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size); 
-	  raw += 
+#endif /* FFI_SIZEOF_JAVA_RAW == 8 */
+    }
+
+#else
+#error "pdp endian not supported"
+#endif /* ! PDP */
+
+#endif /* WORDS_BIGENDIAN */
+}
+
+void
+ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+#if WORDS_BIGENDIAN
+	  *(UINT32*)(raw++) = *(UINT8*) (*args);
+#else
+	  (raw++)->uint = *(UINT8*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_SINT8:
+#if WORDS_BIGENDIAN
+	  *(SINT32*)(raw++) = *(SINT8*) (*args);
+#else
+	  (raw++)->sint = *(SINT8*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_UINT16:
+#if WORDS_BIGENDIAN
+	  *(UINT32*)(raw++) = *(UINT16*) (*args);
+#else
+	  (raw++)->uint = *(UINT16*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_SINT16:
+#if WORDS_BIGENDIAN
+	  *(SINT32*)(raw++) = *(SINT16*) (*args);
+#else
+	  (raw++)->sint = *(SINT16*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_UINT32:
+#if WORDS_BIGENDIAN
+	  *(UINT32*)(raw++) = *(UINT32*) (*args);
+#else
+	  (raw++)->uint = *(UINT32*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_SINT32:
+#if WORDS_BIGENDIAN
+	  *(SINT32*)(raw++) = *(SINT32*) (*args);
+#else
+	  (raw++)->sint = *(SINT32*) (*args);
+#endif
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  (raw++)->flt = *(FLOAT32*) (*args);
+	  break;
+
+#if FFI_SIZEOF_JAVA_RAW == 8
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_DOUBLE:
+	  raw->uint = *(UINT64*) (*args);
+	  raw += 2;
+	  break;
+#endif
+
+	case FFI_TYPE_POINTER:
+	  (raw++)->ptr = **(void***) args;
+	  break;
+
+	default:
+#if FFI_SIZEOF_JAVA_RAW == 8
+	  FFI_ASSERT(0);	/* Should have covered all cases */
+#else
+	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
+	  raw +=
 	    FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
-#endif 
-	} 
-    } 
-} 
- 
-#if !FFI_NATIVE_RAW_API 
- 
-static void 
-ffi_java_rvalue_to_raw (ffi_cif *cif, void *rvalue) 
-{ 
-#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8 
-  switch (cif->rtype->type) 
-    { 
-    case FFI_TYPE_UINT8: 
-    case FFI_TYPE_UINT16: 
-    case FFI_TYPE_UINT32: 
-      *(UINT64 *)rvalue <<= 32; 
-      break; 
- 
-    case FFI_TYPE_SINT8: 
-    case FFI_TYPE_SINT16: 
-    case FFI_TYPE_SINT32: 
-    case FFI_TYPE_INT: 
-#if FFI_SIZEOF_JAVA_RAW == 4 
-    case FFI_TYPE_POINTER: 
-#endif 
-      *(SINT64 *)rvalue <<= 32; 
-      break; 
- 
+#endif
+	}
+    }
+}
+
+#if !FFI_NATIVE_RAW_API
+
+static void
+ffi_java_rvalue_to_raw (ffi_cif *cif, void *rvalue)
+{
+#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_UINT32:
+      *(UINT64 *)rvalue <<= 32;
+      break;
+
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+#if FFI_SIZEOF_JAVA_RAW == 4
+    case FFI_TYPE_POINTER:
+#endif
+      *(SINT64 *)rvalue <<= 32;
+      break;
+
     case FFI_TYPE_COMPLEX:
       /* Not supported yet.  */
       abort();
 
-    default: 
-      break; 
-    } 
-#endif 
-} 
- 
-static void 
-ffi_java_raw_to_rvalue (ffi_cif *cif, void *rvalue) 
-{ 
-#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8 
-  switch (cif->rtype->type) 
-    { 
-    case FFI_TYPE_UINT8: 
-    case FFI_TYPE_UINT16: 
-    case FFI_TYPE_UINT32: 
-      *(UINT64 *)rvalue >>= 32; 
-      break; 
- 
-    case FFI_TYPE_SINT8: 
-    case FFI_TYPE_SINT16: 
-    case FFI_TYPE_SINT32: 
-    case FFI_TYPE_INT: 
-      *(SINT64 *)rvalue >>= 32; 
-      break; 
- 
+    default:
+      break;
+    }
+#endif
+}
+
+static void
+ffi_java_raw_to_rvalue (ffi_cif *cif, void *rvalue)
+{
+#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_UINT32:
+      *(UINT64 *)rvalue >>= 32;
+      break;
+
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_INT:
+      *(SINT64 *)rvalue >>= 32;
+      break;
+
     case FFI_TYPE_COMPLEX:
       /* Not supported yet.  */
       abort();
 
-    default: 
-      break; 
-    } 
-#endif 
-} 
- 
-/* This is a generic definition of ffi_raw_call, to be used if the 
- * native system does not provide a machine-specific implementation. 
- * Having this, allows code to be written for the raw API, without 
- * the need for system-specific code to handle input in that format; 
- * these following couple of functions will handle the translation forth 
- * and back automatically. */ 
- 
-void ffi_java_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, 
-			ffi_java_raw *raw) 
-{ 
-  void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); 
-  ffi_java_raw_to_ptrarray (cif, raw, avalue); 
-  ffi_call (cif, fn, rvalue, avalue); 
-  ffi_java_rvalue_to_raw (cif, rvalue); 
-} 
- 
-#if FFI_CLOSURES		/* base system provides closures */ 
- 
-static void 
-ffi_java_translate_args (ffi_cif *cif, void *rvalue, 
-		    void **avalue, void *user_data) 
-{ 
-  ffi_java_raw *raw = (ffi_java_raw*)alloca (ffi_java_raw_size (cif)); 
-  ffi_raw_closure *cl = (ffi_raw_closure*)user_data; 
- 
-  ffi_java_ptrarray_to_raw (cif, avalue, raw); 
-  (*cl->fun) (cif, rvalue, (ffi_raw*)raw, cl->user_data); 
-  ffi_java_raw_to_rvalue (cif, rvalue); 
-} 
- 
-ffi_status 
-ffi_prep_java_raw_closure_loc (ffi_java_raw_closure* cl, 
-			       ffi_cif *cif, 
-			       void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*), 
-			       void *user_data, 
-			       void *codeloc) 
-{ 
-  ffi_status status; 
- 
-  status = ffi_prep_closure_loc ((ffi_closure*) cl, 
-				 cif, 
-				 &ffi_java_translate_args, 
-				 codeloc, 
-				 codeloc); 
-  if (status == FFI_OK) 
-    { 
-      cl->fun       = fun; 
-      cl->user_data = user_data; 
-    } 
- 
-  return status; 
-} 
- 
-/* Again, here is the generic version of ffi_prep_raw_closure, which 
- * will install an intermediate "hub" for translation of arguments from 
- * the pointer-array format, to the raw format */ 
- 
-ffi_status 
-ffi_prep_java_raw_closure (ffi_java_raw_closure* cl, 
-			   ffi_cif *cif, 
-			   void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*), 
-			   void *user_data) 
-{ 
-  return ffi_prep_java_raw_closure_loc (cl, cif, fun, user_data, cl); 
-} 
- 
-#endif /* FFI_CLOSURES */ 
-#endif /* !FFI_NATIVE_RAW_API */ 
+    default:
+      break;
+    }
+#endif
+}
+
+/* This is a generic definition of ffi_raw_call, to be used if the
+ * native system does not provide a machine-specific implementation.
+ * Having this, allows code to be written for the raw API, without
+ * the need for system-specific code to handle input in that format;
+ * these following couple of functions will handle the translation forth
+ * and back automatically. */
+
+void ffi_java_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue,
+			ffi_java_raw *raw)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  ffi_java_raw_to_ptrarray (cif, raw, avalue);
+  ffi_call (cif, fn, rvalue, avalue);
+  ffi_java_rvalue_to_raw (cif, rvalue);
+}
+
+#if FFI_CLOSURES		/* base system provides closures */
+
+static void
+ffi_java_translate_args (ffi_cif *cif, void *rvalue,
+		    void **avalue, void *user_data)
+{
+  ffi_java_raw *raw = (ffi_java_raw*)alloca (ffi_java_raw_size (cif));
+  ffi_raw_closure *cl = (ffi_raw_closure*)user_data;
+
+  ffi_java_ptrarray_to_raw (cif, avalue, raw);
+  (*cl->fun) (cif, rvalue, (ffi_raw*)raw, cl->user_data);
+  ffi_java_raw_to_rvalue (cif, rvalue);
+}
+
+ffi_status
+ffi_prep_java_raw_closure_loc (ffi_java_raw_closure* cl,
+			       ffi_cif *cif,
+			       void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*),
+			       void *user_data,
+			       void *codeloc)
+{
+  ffi_status status;
+
+  status = ffi_prep_closure_loc ((ffi_closure*) cl,
+				 cif,
+				 &ffi_java_translate_args,
+				 codeloc,
+				 codeloc);
+  if (status == FFI_OK)
+    {
+      cl->fun       = fun;
+      cl->user_data = user_data;
+    }
+
+  return status;
+}
+
+/* Again, here is the generic version of ffi_prep_raw_closure, which
+ * will install an intermediate "hub" for translation of arguments from
+ * the pointer-array format, to the raw format */
+
+ffi_status
+ffi_prep_java_raw_closure (ffi_java_raw_closure* cl,
+			   ffi_cif *cif,
+			   void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*),
+			   void *user_data)
+{
+  return ffi_prep_java_raw_closure_loc (cl, cif, fun, user_data, cl);
+}
+
+#endif /* FFI_CLOSURES */
+#endif /* !FFI_NATIVE_RAW_API */
 #endif /* !NO_JAVA_RAW_API */
diff --git a/contrib/restricted/libffi/src/prep_cif.c b/contrib/restricted/libffi/src/prep_cif.c
index 034f6822a1..06c6544036 100644
--- a/contrib/restricted/libffi/src/prep_cif.c
+++ b/contrib/restricted/libffi/src/prep_cif.c
@@ -1,83 +1,83 @@
-/* ----------------------------------------------------------------------- 
-   prep_cif.c - Copyright (c) 2011, 2012  Anthony Green 
-                Copyright (c) 1996, 1998, 2007  Red Hat, Inc. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#include <ffi.h> 
-#include <ffi_common.h> 
-#include <stdlib.h> 
- 
-/* Round up to FFI_SIZEOF_ARG. */ 
- 
+/* -----------------------------------------------------------------------
+   prep_cif.c - Copyright (c) 2011, 2012  Anthony Green
+                Copyright (c) 1996, 1998, 2007  Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+
+/* Round up to FFI_SIZEOF_ARG. */
+
 #define STACK_ARG_SIZE(x) FFI_ALIGN(x, FFI_SIZEOF_ARG)
- 
-/* Perform machine independent initialization of aggregate type 
-   specifications. */ 
- 
+
+/* Perform machine independent initialization of aggregate type
+   specifications. */
+
 static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets)
-{ 
-  ffi_type **ptr; 
- 
-  if (UNLIKELY(arg == NULL || arg->elements == NULL)) 
-    return FFI_BAD_TYPEDEF; 
- 
-  arg->size = 0; 
-  arg->alignment = 0; 
- 
-  ptr = &(arg->elements[0]); 
- 
-  if (UNLIKELY(ptr == 0)) 
-    return FFI_BAD_TYPEDEF; 
- 
-  while ((*ptr) != NULL) 
-    { 
-      if (UNLIKELY(((*ptr)->size == 0) 
+{
+  ffi_type **ptr;
+
+  if (UNLIKELY(arg == NULL || arg->elements == NULL))
+    return FFI_BAD_TYPEDEF;
+
+  arg->size = 0;
+  arg->alignment = 0;
+
+  ptr = &(arg->elements[0]);
+
+  if (UNLIKELY(ptr == 0))
+    return FFI_BAD_TYPEDEF;
+
+  while ((*ptr) != NULL)
+    {
+      if (UNLIKELY(((*ptr)->size == 0)
 		    && (initialize_aggregate((*ptr), NULL) != FFI_OK)))
-	return FFI_BAD_TYPEDEF; 
- 
-      /* Perform a sanity check on the argument type */ 
-      FFI_ASSERT_VALID_TYPE(*ptr); 
- 
+	return FFI_BAD_TYPEDEF;
+
+      /* Perform a sanity check on the argument type */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
       arg->size = FFI_ALIGN(arg->size, (*ptr)->alignment);
       if (offsets)
 	*offsets++ = arg->size;
-      arg->size += (*ptr)->size; 
- 
-      arg->alignment = (arg->alignment > (*ptr)->alignment) ? 
-	arg->alignment : (*ptr)->alignment; 
- 
-      ptr++; 
-    } 
- 
-  /* Structure size includes tail padding.  This is important for 
-     structures that fit in one register on ABIs like the PowerPC64 
-     Linux ABI that right justify small structs in a register. 
-     It's also needed for nested structure layout, for example 
-     struct A { long a; char b; }; struct B { struct A x; char y; }; 
-     should find y at an offset of 2*sizeof(long) and result in a 
-     total size of 3*sizeof(long).  */ 
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ?
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  /* Structure size includes tail padding.  This is important for
+     structures that fit in one register on ABIs like the PowerPC64
+     Linux ABI that right justify small structs in a register.
+     It's also needed for nested structure layout, for example
+     struct A { long a; char b; }; struct B { struct A x; char y; };
+     should find y at an offset of 2*sizeof(long) and result in a
+     total size of 3*sizeof(long).  */
   arg->size = FFI_ALIGN (arg->size, arg->alignment);
- 
+
   /* On some targets, the ABI defines that structures have an additional
      alignment beyond the "natural" one based on their elements.  */
 #ifdef FFI_AGGREGATE_ALIGNMENT
@@ -85,167 +85,167 @@ static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets)
     arg->alignment = FFI_AGGREGATE_ALIGNMENT;
 #endif
 
-  if (arg->size == 0) 
-    return FFI_BAD_TYPEDEF; 
-  else 
-    return FFI_OK; 
-} 
- 
-#ifndef __CRIS__ 
-/* The CRIS ABI specifies structure elements to have byte 
-   alignment only, so it completely overrides this functions, 
-   which assumes "natural" alignment and padding.  */ 
- 
-/* Perform machine independent ffi_cif preparation, then call 
-   machine dependent routine. */ 
- 
-/* For non variadic functions isvariadic should be 0 and 
-   nfixedargs==ntotalargs. 
- 
-   For variadic calls, isvariadic should be 1 and nfixedargs 
-   and ntotalargs set as appropriate. nfixedargs must always be >=1 */ 
- 
- 
-ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi, 
-			     unsigned int isvariadic, 
-                             unsigned int nfixedargs, 
-                             unsigned int ntotalargs, 
-			     ffi_type *rtype, ffi_type **atypes) 
-{ 
-  unsigned bytes = 0; 
-  unsigned int i; 
-  ffi_type **ptr; 
- 
-  FFI_ASSERT(cif != NULL); 
-  FFI_ASSERT((!isvariadic) || (nfixedargs >= 1)); 
-  FFI_ASSERT(nfixedargs <= ntotalargs); 
- 
-  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)) 
-    return FFI_BAD_ABI; 
- 
-  cif->abi = abi; 
-  cif->arg_types = atypes; 
-  cif->nargs = ntotalargs; 
-  cif->rtype = rtype; 
- 
-  cif->flags = 0; 
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+}
+
+#ifndef __CRIS__
+/* The CRIS ABI specifies structure elements to have byte
+   alignment only, so it completely overrides this functions,
+   which assumes "natural" alignment and padding.  */
+
+/* Perform machine independent ffi_cif preparation, then call
+   machine dependent routine. */
+
+/* For non variadic functions isvariadic should be 0 and
+   nfixedargs==ntotalargs.
+
+   For variadic calls, isvariadic should be 1 and nfixedargs
+   and ntotalargs set as appropriate. nfixedargs must always be >=1 */
+
+
+ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
+			     unsigned int isvariadic,
+                             unsigned int nfixedargs,
+                             unsigned int ntotalargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT(cif != NULL);
+  FFI_ASSERT((!isvariadic) || (nfixedargs >= 1));
+  FFI_ASSERT(nfixedargs <= ntotalargs);
+
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+    return FFI_BAD_ABI;
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = ntotalargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
 #ifdef _M_ARM64
   cif->is_variadic = isvariadic;
 #endif
 #if HAVE_LONG_DOUBLE_VARIANT
   ffi_prep_types (abi);
 #endif
- 
-  /* Initialize the return type if necessary */ 
+
+  /* Initialize the return type if necessary */
   if ((cif->rtype->size == 0)
       && (initialize_aggregate(cif->rtype, NULL) != FFI_OK))
-    return FFI_BAD_TYPEDEF; 
- 
+    return FFI_BAD_TYPEDEF;
+
 #ifndef FFI_TARGET_HAS_COMPLEX_TYPE
   if (rtype->type == FFI_TYPE_COMPLEX)
     abort();
 #endif
-  /* Perform a sanity check on the return type */ 
-  FFI_ASSERT_VALID_TYPE(cif->rtype); 
- 
-  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */ 
+  /* Perform a sanity check on the return type */
+  FFI_ASSERT_VALID_TYPE(cif->rtype);
+
+  /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
 #if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
-  /* Make space for the return structure pointer */ 
-  if (cif->rtype->type == FFI_TYPE_STRUCT 
-#ifdef TILE 
-      && (cif->rtype->size > 10 * FFI_SIZEOF_ARG) 
-#endif 
-#ifdef XTENSA 
-      && (cif->rtype->size > 16) 
-#endif 
+  /* Make space for the return structure pointer */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+#ifdef TILE
+      && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
+#endif
+#ifdef XTENSA
+      && (cif->rtype->size > 16)
+#endif
 #ifdef NIOS2
       && (cif->rtype->size > 8)
 #endif
-     ) 
-    bytes = STACK_ARG_SIZE(sizeof(void*)); 
-#endif 
- 
-  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) 
-    { 
- 
-      /* Initialize any uninitialized aggregate type definitions */ 
+     )
+    bytes = STACK_ARG_SIZE(sizeof(void*));
+#endif
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+
+      /* Initialize any uninitialized aggregate type definitions */
       if (((*ptr)->size == 0)
 	  && (initialize_aggregate((*ptr), NULL) != FFI_OK))
-	return FFI_BAD_TYPEDEF; 
- 
+	return FFI_BAD_TYPEDEF;
+
 #ifndef FFI_TARGET_HAS_COMPLEX_TYPE
       if ((*ptr)->type == FFI_TYPE_COMPLEX)
 	abort();
 #endif
-      /* Perform a sanity check on the argument type, do this 
-	 check after the initialization.  */ 
-      FFI_ASSERT_VALID_TYPE(*ptr); 
- 
+      /* Perform a sanity check on the argument type, do this
+	 check after the initialization.  */
+      FFI_ASSERT_VALID_TYPE(*ptr);
+
 #if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
-	{ 
-	  /* Add any padding if necessary */ 
-	  if (((*ptr)->alignment - 1) & bytes) 
+	{
+	  /* Add any padding if necessary */
+	  if (((*ptr)->alignment - 1) & bytes)
 	    bytes = (unsigned)FFI_ALIGN(bytes, (*ptr)->alignment);
- 
-#ifdef TILE 
-	  if (bytes < 10 * FFI_SIZEOF_ARG && 
-	      bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG) 
-	    { 
-	      /* An argument is never split between the 10 parameter 
-		 registers and the stack.  */ 
-	      bytes = 10 * FFI_SIZEOF_ARG; 
-	    } 
-#endif 
-#ifdef XTENSA 
-	  if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4) 
-	    bytes = 6*4; 
-#endif 
- 
+
+#ifdef TILE
+	  if (bytes < 10 * FFI_SIZEOF_ARG &&
+	      bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG)
+	    {
+	      /* An argument is never split between the 10 parameter
+		 registers and the stack.  */
+	      bytes = 10 * FFI_SIZEOF_ARG;
+	    }
+#endif
+#ifdef XTENSA
+	  if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4)
+	    bytes = 6*4;
+#endif
+
 	  bytes += (unsigned int)STACK_ARG_SIZE((*ptr)->size);
-	} 
-#endif 
-    } 
- 
-  cif->bytes = bytes; 
- 
-  /* Perform machine dependent cif processing */ 
-#ifdef FFI_TARGET_SPECIFIC_VARIADIC 
-  if (isvariadic) 
-	return ffi_prep_cif_machdep_var(cif, nfixedargs, ntotalargs); 
-#endif 
- 
-  return ffi_prep_cif_machdep(cif); 
-} 
-#endif /* not __CRIS__ */ 
- 
-ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs, 
-			     ffi_type *rtype, ffi_type **atypes) 
-{ 
-  return ffi_prep_cif_core(cif, abi, 0, nargs, nargs, rtype, atypes); 
-} 
- 
-ffi_status ffi_prep_cif_var(ffi_cif *cif, 
-                            ffi_abi abi, 
-                            unsigned int nfixedargs, 
-                            unsigned int ntotalargs, 
-                            ffi_type *rtype, 
-                            ffi_type **atypes) 
-{ 
-  return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes); 
-} 
- 
-#if FFI_CLOSURES 
- 
-ffi_status 
-ffi_prep_closure (ffi_closure* closure, 
-		  ffi_cif* cif, 
-		  void (*fun)(ffi_cif*,void*,void**,void*), 
-		  void *user_data) 
-{ 
-  return ffi_prep_closure_loc (closure, cif, fun, user_data, closure); 
-} 
- 
-#endif 
+	}
+#endif
+    }
+
+  cif->bytes = bytes;
+
+  /* Perform machine dependent cif processing */
+#ifdef FFI_TARGET_SPECIFIC_VARIADIC
+  if (isvariadic)
+	return ffi_prep_cif_machdep_var(cif, nfixedargs, ntotalargs);
+#endif
+
+  return ffi_prep_cif_machdep(cif);
+}
+#endif /* not __CRIS__ */
+
+ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs,
+			     ffi_type *rtype, ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 0, nargs, nargs, rtype, atypes);
+}
+
+ffi_status ffi_prep_cif_var(ffi_cif *cif,
+                            ffi_abi abi,
+                            unsigned int nfixedargs,
+                            unsigned int ntotalargs,
+                            ffi_type *rtype,
+                            ffi_type **atypes)
+{
+  return ffi_prep_cif_core(cif, abi, 1, nfixedargs, ntotalargs, rtype, atypes);
+}
+
+#if FFI_CLOSURES
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data)
+{
+  return ffi_prep_closure_loc (closure, cif, fun, user_data, closure);
+}
+
+#endif
 
 ffi_status
 ffi_get_struct_offsets (ffi_abi abi, ffi_type *struct_type, size_t *offsets)
diff --git a/contrib/restricted/libffi/src/raw_api.c b/contrib/restricted/libffi/src/raw_api.c
index bded12239d..be156116cb 100644
--- a/contrib/restricted/libffi/src/raw_api.c
+++ b/contrib/restricted/libffi/src/raw_api.c
@@ -1,267 +1,267 @@
-/* ----------------------------------------------------------------------- 
-   raw_api.c - Copyright (c) 1999, 2008  Red Hat, Inc. 
- 
-   Author: Kresten Krab Thorup <krab@gnu.org> 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-/* This file defines generic functions for use with the raw api. */ 
- 
-#include <ffi.h> 
-#include <ffi_common.h> 
- 
-#if !FFI_NO_RAW_API 
- 
-size_t 
-ffi_raw_size (ffi_cif *cif) 
-{ 
-  size_t result = 0; 
-  int i; 
- 
-  ffi_type **at = cif->arg_types; 
- 
-  for (i = cif->nargs-1; i >= 0; i--, at++) 
-    { 
-#if !FFI_NO_STRUCTS 
-      if ((*at)->type == FFI_TYPE_STRUCT) 
+/* -----------------------------------------------------------------------
+   raw_api.c - Copyright (c) 1999, 2008  Red Hat, Inc.
+
+   Author: Kresten Krab Thorup <krab@gnu.org>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* This file defines generic functions for use with the raw api. */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#if !FFI_NO_RAW_API
+
+size_t
+ffi_raw_size (ffi_cif *cif)
+{
+  size_t result = 0;
+  int i;
+
+  ffi_type **at = cif->arg_types;
+
+  for (i = cif->nargs-1; i >= 0; i--, at++)
+    {
+#if !FFI_NO_STRUCTS
+      if ((*at)->type == FFI_TYPE_STRUCT)
 	result += FFI_ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
-      else 
-#endif 
+      else
+#endif
 	result += FFI_ALIGN ((*at)->size, FFI_SIZEOF_ARG);
-    } 
- 
-  return result; 
-} 
- 
- 
-void 
-ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args) 
-{ 
-  unsigned i; 
-  ffi_type **tp = cif->arg_types; 
- 
-#if WORDS_BIGENDIAN 
- 
-  for (i = 0; i < cif->nargs; i++, tp++, args++) 
-    {	   
-      switch ((*tp)->type) 
-	{ 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 1); 
-	  break; 
-	   
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 2); 
-	  break; 
- 
-#if FFI_SIZEOF_ARG >= 4	   
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 4); 
-	  break; 
-#endif 
-	 
-#if !FFI_NO_STRUCTS   
-	case FFI_TYPE_STRUCT: 
-	  *args = (raw++)->ptr; 
-	  break; 
-#endif 
- 
+    }
+
+  return result;
+}
+
+
+void
+ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+#if WORDS_BIGENDIAN
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 1);
+	  break;
+	  
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 2);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4	  
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	  *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 4);
+	  break;
+#endif
+	
+#if !FFI_NO_STRUCTS  
+	case FFI_TYPE_STRUCT:
+	  *args = (raw++)->ptr;
+	  break;
+#endif
+
 	case FFI_TYPE_COMPLEX:
 	  *args = (raw++)->ptr;
 	  break;
 
-	case FFI_TYPE_POINTER: 
-	  *args = (void*) &(raw++)->ptr; 
-	  break; 
-	   
-	default: 
-	  *args = raw; 
+	case FFI_TYPE_POINTER:
+	  *args = (void*) &(raw++)->ptr;
+	  break;
+	  
+	default:
+	  *args = raw;
 	  raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
-	} 
-    } 
- 
-#else /* WORDS_BIGENDIAN */ 
- 
-#if !PDP 
- 
-  /* then assume little endian */ 
-  for (i = 0; i < cif->nargs; i++, tp++, args++) 
-    {	   
-#if !FFI_NO_STRUCTS 
-      if ((*tp)->type == FFI_TYPE_STRUCT) 
-	{ 
-	  *args = (raw++)->ptr; 
-	} 
-      else 
-#endif 
+	}
+    }
+
+#else /* WORDS_BIGENDIAN */
+
+#if !PDP
+
+  /* then assume little endian */
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+#if !FFI_NO_STRUCTS
+      if ((*tp)->type == FFI_TYPE_STRUCT)
+	{
+	  *args = (raw++)->ptr;
+	}
+      else
+#endif
       if ((*tp)->type == FFI_TYPE_COMPLEX)
-	{ 
+	{
 	  *args = (raw++)->ptr;
 	}
       else
 	{
-	  *args = (void*) raw; 
+	  *args = (void*) raw;
 	  raw += FFI_ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
-	} 
-    } 
- 
-#else 
-#error "pdp endian not supported" 
-#endif /* ! PDP */ 
- 
-#endif /* WORDS_BIGENDIAN */ 
-} 
- 
-void 
-ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw) 
-{ 
-  unsigned i; 
-  ffi_type **tp = cif->arg_types; 
- 
-  for (i = 0; i < cif->nargs; i++, tp++, args++) 
-    {	   
-      switch ((*tp)->type) 
-	{ 
-	case FFI_TYPE_UINT8: 
-	  (raw++)->uint = *(UINT8*) (*args); 
-	  break; 
- 
-	case FFI_TYPE_SINT8: 
-	  (raw++)->sint = *(SINT8*) (*args); 
-	  break; 
- 
-	case FFI_TYPE_UINT16: 
-	  (raw++)->uint = *(UINT16*) (*args); 
-	  break; 
- 
-	case FFI_TYPE_SINT16: 
-	  (raw++)->sint = *(SINT16*) (*args); 
-	  break; 
- 
-#if FFI_SIZEOF_ARG >= 4 
-	case FFI_TYPE_UINT32: 
-	  (raw++)->uint = *(UINT32*) (*args); 
-	  break; 
- 
-	case FFI_TYPE_SINT32: 
-	  (raw++)->sint = *(SINT32*) (*args); 
-	  break; 
-#endif 
- 
-#if !FFI_NO_STRUCTS 
-	case FFI_TYPE_STRUCT: 
-	  (raw++)->ptr = *args; 
-	  break; 
-#endif 
- 
+	}
+    }
+
+#else
+#error "pdp endian not supported"
+#endif /* ! PDP */
+
+#endif /* WORDS_BIGENDIAN */
+}
+
+void
+ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw)
+{
+  unsigned i;
+  ffi_type **tp = cif->arg_types;
+
+  for (i = 0; i < cif->nargs; i++, tp++, args++)
+    {	  
+      switch ((*tp)->type)
+	{
+	case FFI_TYPE_UINT8:
+	  (raw++)->uint = *(UINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT8:
+	  (raw++)->sint = *(SINT8*) (*args);
+	  break;
+
+	case FFI_TYPE_UINT16:
+	  (raw++)->uint = *(UINT16*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT16:
+	  (raw++)->sint = *(SINT16*) (*args);
+	  break;
+
+#if FFI_SIZEOF_ARG >= 4
+	case FFI_TYPE_UINT32:
+	  (raw++)->uint = *(UINT32*) (*args);
+	  break;
+
+	case FFI_TYPE_SINT32:
+	  (raw++)->sint = *(SINT32*) (*args);
+	  break;
+#endif
+
+#if !FFI_NO_STRUCTS
+	case FFI_TYPE_STRUCT:
+	  (raw++)->ptr = *args;
+	  break;
+#endif
+
 	case FFI_TYPE_COMPLEX:
 	  (raw++)->ptr = *args;
 	  break;
 
-	case FFI_TYPE_POINTER: 
-	  (raw++)->ptr = **(void***) args; 
-	  break; 
- 
-	default: 
-	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size); 
+	case FFI_TYPE_POINTER:
+	  (raw++)->ptr = **(void***) args;
+	  break;
+
+	default:
+	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
 	  raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
-	} 
-    } 
-} 
- 
-#if !FFI_NATIVE_RAW_API 
- 
- 
-/* This is a generic definition of ffi_raw_call, to be used if the 
- * native system does not provide a machine-specific implementation. 
- * Having this, allows code to be written for the raw API, without 
- * the need for system-specific code to handle input in that format; 
- * these following couple of functions will handle the translation forth 
- * and back automatically. */ 
- 
-void ffi_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *raw) 
-{ 
-  void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); 
-  ffi_raw_to_ptrarray (cif, raw, avalue); 
-  ffi_call (cif, fn, rvalue, avalue); 
-} 
- 
-#if FFI_CLOSURES		/* base system provides closures */ 
- 
-static void 
-ffi_translate_args (ffi_cif *cif, void *rvalue, 
-		    void **avalue, void *user_data) 
-{ 
-  ffi_raw *raw = (ffi_raw*)alloca (ffi_raw_size (cif)); 
-  ffi_raw_closure *cl = (ffi_raw_closure*)user_data; 
- 
-  ffi_ptrarray_to_raw (cif, avalue, raw); 
-  (*cl->fun) (cif, rvalue, raw, cl->user_data); 
-} 
- 
-ffi_status 
-ffi_prep_raw_closure_loc (ffi_raw_closure* cl, 
-			  ffi_cif *cif, 
-			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*), 
-			  void *user_data, 
-			  void *codeloc) 
-{ 
-  ffi_status status; 
- 
-  status = ffi_prep_closure_loc ((ffi_closure*) cl, 
-				 cif, 
-				 &ffi_translate_args, 
-				 codeloc, 
-				 codeloc); 
-  if (status == FFI_OK) 
-    { 
-      cl->fun       = fun; 
-      cl->user_data = user_data; 
-    } 
- 
-  return status; 
-} 
- 
-#endif /* FFI_CLOSURES */ 
-#endif /* !FFI_NATIVE_RAW_API */ 
- 
-#if FFI_CLOSURES 
- 
-/* Again, here is the generic version of ffi_prep_raw_closure, which 
- * will install an intermediate "hub" for translation of arguments from 
- * the pointer-array format, to the raw format */ 
- 
-ffi_status 
-ffi_prep_raw_closure (ffi_raw_closure* cl, 
-		      ffi_cif *cif, 
-		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*), 
-		      void *user_data) 
-{ 
-  return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl); 
-} 
- 
-#endif /* FFI_CLOSURES */ 
- 
-#endif /* !FFI_NO_RAW_API */ 
+	}
+    }
+}
+
+#if !FFI_NATIVE_RAW_API
+
+
+/* This is a generic definition of ffi_raw_call, to be used if the
+ * native system does not provide a machine-specific implementation.
+ * Having this, allows code to be written for the raw API, without
+ * the need for system-specific code to handle input in that format;
+ * these following couple of functions will handle the translation forth
+ * and back automatically. */
+
+void ffi_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *raw)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  ffi_raw_to_ptrarray (cif, raw, avalue);
+  ffi_call (cif, fn, rvalue, avalue);
+}
+
+#if FFI_CLOSURES		/* base system provides closures */
+
+static void
+ffi_translate_args (ffi_cif *cif, void *rvalue,
+		    void **avalue, void *user_data)
+{
+  ffi_raw *raw = (ffi_raw*)alloca (ffi_raw_size (cif));
+  ffi_raw_closure *cl = (ffi_raw_closure*)user_data;
+
+  ffi_ptrarray_to_raw (cif, avalue, raw);
+  (*cl->fun) (cif, rvalue, raw, cl->user_data);
+}
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* cl,
+			  ffi_cif *cif,
+			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+			  void *user_data,
+			  void *codeloc)
+{
+  ffi_status status;
+
+  status = ffi_prep_closure_loc ((ffi_closure*) cl,
+				 cif,
+				 &ffi_translate_args,
+				 codeloc,
+				 codeloc);
+  if (status == FFI_OK)
+    {
+      cl->fun       = fun;
+      cl->user_data = user_data;
+    }
+
+  return status;
+}
+
+#endif /* FFI_CLOSURES */
+#endif /* !FFI_NATIVE_RAW_API */
+
+#if FFI_CLOSURES
+
+/* Again, here is the generic version of ffi_prep_raw_closure, which
+ * will install an intermediate "hub" for translation of arguments from
+ * the pointer-array format, to the raw format */
+
+ffi_status
+ffi_prep_raw_closure (ffi_raw_closure* cl,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data)
+{
+  return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl);
+}
+
+#endif /* FFI_CLOSURES */
+
+#endif /* !FFI_NO_RAW_API */
diff --git a/contrib/restricted/libffi/src/types.c b/contrib/restricted/libffi/src/types.c
index 4f8e13f504..9ec27f6cf3 100644
--- a/contrib/restricted/libffi/src/types.c
+++ b/contrib/restricted/libffi/src/types.c
@@ -1,50 +1,50 @@
-/* ----------------------------------------------------------------------- 
-   types.c - Copyright (c) 1996, 1998  Red Hat, Inc. 
-    
-   Predefined ffi_types needed by libffi. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-/* Hide the basic type definitions from the header file, so that we 
-   can redefine them here as "const".  */ 
-#define LIBFFI_HIDE_BASIC_TYPES 
- 
-#include <ffi.h> 
-#include <ffi_common.h> 
- 
-/* Type definitions */ 
- 
+/* -----------------------------------------------------------------------
+   types.c - Copyright (c) 1996, 1998  Red Hat, Inc.
+   
+   Predefined ffi_types needed by libffi.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+/* Hide the basic type definitions from the header file, so that we
+   can redefine them here as "const".  */
+#define LIBFFI_HIDE_BASIC_TYPES
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/* Type definitions */
+
 #define FFI_TYPEDEF(name, type, id, maybe_const)\
-struct struct_align_##name {			\ 
-  char c;					\ 
-  type x;					\ 
-};						\ 
+struct struct_align_##name {			\
+  char c;					\
+  type x;					\
+};						\
 FFI_EXTERN					\
 maybe_const ffi_type ffi_type_##name = {	\
-  sizeof(type),					\ 
-  offsetof(struct struct_align_##name, x),	\ 
-  id, NULL					\ 
-} 
- 
+  sizeof(type),					\
+  offsetof(struct struct_align_##name, x),	\
+  id, NULL					\
+}
+
 #define FFI_COMPLEX_TYPEDEF(name, type, maybe_const)	\
 static ffi_type *ffi_elements_complex_##name [2] = {	\
 	(ffi_type *)(&ffi_type_##name), NULL		\
@@ -61,11 +61,11 @@ maybe_const ffi_type ffi_type_complex_##name = {	\
   (ffi_type **)ffi_elements_complex_##name		\
 }
 
-/* Size and alignment are fake here. They must not be 0. */ 
+/* Size and alignment are fake here. They must not be 0. */
 FFI_EXTERN const ffi_type ffi_type_void = {
-  1, 1, FFI_TYPE_VOID, NULL 
-}; 
- 
+  1, 1, FFI_TYPE_VOID, NULL
+};
+
 FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8, const);
 FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8, const);
 FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16, const);
@@ -74,30 +74,30 @@ FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32, const);
 FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32, const);
 FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64, const);
 FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64, const);
- 
+
 FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER, const);
- 
+
 FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT, const);
 FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const);
- 
+
 #if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha__
 #define FFI_LDBL_CONST const
 #else
 #define FFI_LDBL_CONST
 #endif
 
-#ifdef __alpha__ 
-/* Even if we're not configured to default to 128-bit long double,  
-   maintain binary compatibility, as -mlong-double-128 can be used 
-   at any time.  */ 
-/* Validate the hard-coded number below.  */ 
-# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4 
-#  error FFI_TYPE_LONGDOUBLE out of date 
-# endif 
-const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL }; 
-#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
+#ifdef __alpha__
+/* Even if we're not configured to default to 128-bit long double, 
+   maintain binary compatibility, as -mlong-double-128 can be used
+   at any time.  */
+/* Validate the hard-coded number below.  */
+# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
+#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE, FFI_LDBL_CONST);
-#endif 
+#endif
 
 #ifdef FFI_TARGET_HAS_COMPLEX_TYPE
 FFI_COMPLEX_TYPEDEF(float, float, const);
diff --git a/contrib/restricted/libffi/src/x86/ffi.c b/contrib/restricted/libffi/src/x86/ffi.c
index 19a3cb8a5f..9a592185a1 100644
--- a/contrib/restricted/libffi/src/x86/ffi.c
+++ b/contrib/restricted/libffi/src/x86/ffi.c
@@ -1,41 +1,41 @@
-/* ----------------------------------------------------------------------- 
+/* -----------------------------------------------------------------------
    ffi.c - Copyright (c) 2017  Anthony Green
            Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
-           Copyright (c) 2002  Ranjit Mathew 
-           Copyright (c) 2002  Bo Thorsen 
-           Copyright (c) 2002  Roger Sayle 
-           Copyright (C) 2008, 2010  Free Software Foundation, Inc. 
- 
-   x86 Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
 #if defined(__i386__) || defined(_M_IX86)
-#include <ffi.h> 
-#include <ffi_common.h> 
+#include <ffi.h>
+#include <ffi_common.h>
 #include <stdint.h>
-#include <stdlib.h> 
+#include <stdlib.h>
 #include "internal.h"
- 
+
 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
    all further uses in this file will refer to the 80-bit type.  */
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
@@ -45,28 +45,28 @@
 #else
 # undef FFI_TYPE_LONGDOUBLE
 # define FFI_TYPE_LONGDOUBLE 4
-#endif 
- 
+#endif
+
 #if defined(__GNUC__) && !defined(__declspec)
 # define __declspec(x)  __attribute__((x))
-#endif 
- 
+#endif
+
 #if defined(_MSC_VER) && defined(_M_IX86)
 /* Stack is not 16-byte aligned on Windows.  */
 #define STACK_ALIGN(bytes) (bytes)
 #else
 #define STACK_ALIGN(bytes) FFI_ALIGN (bytes, 16)
-#endif 
- 
+#endif
+
 /* Perform machine dependent cif processing.  */
 ffi_status FFI_HIDDEN
 ffi_prep_cif_machdep(ffi_cif *cif)
 {
   size_t bytes = 0;
   int i, n, flags, cabi = cif->abi;
- 
+
   switch (cabi)
-    { 
+    {
     case FFI_SYSV:
     case FFI_STDCALL:
     case FFI_THISCALL:
@@ -77,11 +77,11 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       break;
     default:
       return FFI_BAD_ABI;
-    } 
- 
-  switch (cif->rtype->type) 
-    { 
-    case FFI_TYPE_VOID: 
+    }
+
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
       flags = X86_RET_VOID;
       break;
     case FFI_TYPE_FLOAT:
@@ -93,41 +93,41 @@ ffi_prep_cif_machdep(ffi_cif *cif)
     case FFI_TYPE_LONGDOUBLE:
       flags = X86_RET_LDOUBLE;
       break;
-    case FFI_TYPE_UINT8: 
+    case FFI_TYPE_UINT8:
       flags = X86_RET_UINT8;
       break;
-    case FFI_TYPE_UINT16: 
+    case FFI_TYPE_UINT16:
       flags = X86_RET_UINT16;
       break;
-    case FFI_TYPE_SINT8: 
+    case FFI_TYPE_SINT8:
       flags = X86_RET_SINT8;
       break;
-    case FFI_TYPE_SINT16: 
+    case FFI_TYPE_SINT16:
       flags = X86_RET_SINT16;
       break;
     case FFI_TYPE_INT:
     case FFI_TYPE_SINT32:
-    case FFI_TYPE_UINT32: 
+    case FFI_TYPE_UINT32:
     case FFI_TYPE_POINTER:
       flags = X86_RET_INT32;
       break;
-    case FFI_TYPE_SINT64: 
-    case FFI_TYPE_UINT64: 
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
       flags = X86_RET_INT64;
-      break; 
-    case FFI_TYPE_STRUCT: 
-#ifndef X86 
+      break;
+    case FFI_TYPE_STRUCT:
+#ifndef X86
       /* ??? This should be a different ABI rather than an ifdef.  */
-      if (cif->rtype->size == 1) 
+      if (cif->rtype->size == 1)
 	flags = X86_RET_STRUCT_1B;
-      else if (cif->rtype->size == 2) 
+      else if (cif->rtype->size == 2)
 	flags = X86_RET_STRUCT_2B;
-      else if (cif->rtype->size == 4) 
+      else if (cif->rtype->size == 4)
 	flags = X86_RET_INT32;
-      else if (cif->rtype->size == 8) 
+      else if (cif->rtype->size == 8)
 	flags = X86_RET_INT64;
-      else 
-#endif 
+      else
+#endif
 	{
 	do_struct:
 	  switch (cabi)
@@ -145,7 +145,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	  /* Allocate space for return value pointer.  */
 	  bytes += FFI_ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
 	}
-      break; 
+      break;
     case FFI_TYPE_COMPLEX:
       switch (cif->rtype->elements[0]->type)
 	{
@@ -172,23 +172,23 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	  return FFI_BAD_TYPEDEF;
 	}
       break;
-    default: 
+    default:
       return FFI_BAD_TYPEDEF;
-    } 
+    }
   cif->flags = flags;
- 
+
   for (i = 0, n = cif->nargs; i < n; i++)
-    { 
+    {
       ffi_type *t = cif->arg_types[i];
 
       bytes = FFI_ALIGN (bytes, t->alignment);
       bytes += FFI_ALIGN (t->size, FFI_SIZEOF_ARG);
-    } 
+    }
   cif->bytes = bytes;
- 
+
   return FFI_OK;
 }
- 
+
 static ffi_arg
 extend_basic_type(void *arg, int type)
 {
@@ -202,7 +202,7 @@ extend_basic_type(void *arg, int type)
       return *(SINT16 *)arg;
     case FFI_TYPE_UINT16:
       return *(UINT16 *)arg;
- 
+
     case FFI_TYPE_SINT32:
     case FFI_TYPE_UINT32:
     case FFI_TYPE_POINTER:
@@ -212,8 +212,8 @@ extend_basic_type(void *arg, int type)
     default:
       abort();
     }
-} 
- 
+}
+
 struct call_frame
 {
   void *ebp;		/* 0 */
@@ -249,31 +249,31 @@ static const struct abi_params abi_params[FFI_LAST_ABI] = {
   #else
     #define FFI_DECLARE_FASTCALL __declspec(fastcall)
   #endif
-#else 
+#else
   #define FFI_DECLARE_FASTCALL
-#endif 
- 
+#endif
+
 extern void FFI_DECLARE_FASTCALL ffi_call_i386(struct call_frame *, char *) FFI_HIDDEN;
 
 static void
 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	      void **avalue, void *closure)
-{ 
+{
   size_t rsize, bytes;
   struct call_frame *frame;
   char *stack, *argp;
   ffi_type **arg_types;
   int flags, cabi, i, n, dir, narg_reg;
   const struct abi_params *pabi;
- 
+
   flags = cif->flags;
   cabi = cif->abi;
   pabi = &abi_params[cabi];
   dir = pabi->dir;
- 
+
   rsize = 0;
   if (rvalue == NULL)
-    { 
+    {
       switch (flags)
 	{
 	case X86_RET_FLOAT:
@@ -290,20 +290,20 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	  flags = X86_RET_VOID;
 	  break;
 	}
-    } 
- 
+    }
+
   bytes = STACK_ALIGN (cif->bytes);
   stack = alloca(bytes + sizeof(*frame) + rsize);
   argp = (dir < 0 ? stack + bytes : stack);
   frame = (struct call_frame *)(stack + bytes);
   if (rsize)
     rvalue = frame + 1;
- 
+
   frame->fn = fn;
   frame->flags = flags;
   frame->rvalue = rvalue;
   frame->regs[pabi->static_chain] = (unsigned)closure;
- 
+
   narg_reg = 0;
   switch (flags)
     {
@@ -319,9 +319,9 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
     case X86_RET_STRUCTPOP:
       *(void **)argp = rvalue;
       argp += sizeof(void *);
-      break; 
-    } 
- 
+      break;
+    }
+
   arg_types = cif->arg_types;
   for (i = 0, n = cif->nargs; i < n; i++)
     {
@@ -329,11 +329,11 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
       void *valp = avalue[i];
       size_t z = ty->size;
       int t = ty->type;
- 
+
       if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
         {
 	  ffi_arg val = extend_basic_type (valp, t);
- 
+
 	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
 	    frame->regs[pabi->regs[narg_reg++]] = val;
 	  else if (dir < 0)
@@ -351,7 +351,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	{
 	  size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG);
 	  size_t align = FFI_SIZEOF_ARG;
- 
+
 	  /* Issue 434: For thiscall and fastcall, if the paramter passed
 	     as 64-bit integer or struct, all following integer paramters
 	     will be passed on stack.  */
@@ -360,7 +360,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 		  || t == FFI_TYPE_UINT64
 		  || t == FFI_TYPE_STRUCT))
 	    narg_reg = 2;
- 
+
 	  /* Alignment rules for arguments are quite complex.  Vectors and
 	     structures with 16 byte alignment get it.  Note that long double
 	     on Darwin does have 16 byte alignment, and does not get this
@@ -387,29 +387,29 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	}
     }
   FFI_ASSERT (dir > 0 || argp == stack);
- 
+
   ffi_call_i386 (frame, stack);
 }
- 
+
 void
 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 {
   ffi_call_int (cif, fn, rvalue, avalue, NULL);
-} 
- 
+}
+
 void
 ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	     void **avalue, void *closure)
-{ 
+{
   ffi_call_int (cif, fn, rvalue, avalue, closure);
 }
- 
+
 /** private members **/
- 
+
 void FFI_HIDDEN ffi_closure_i386(void);
 void FFI_HIDDEN ffi_closure_STDCALL(void);
 void FFI_HIDDEN ffi_closure_REGISTER(void);
- 
+
 struct closure_frame
 {
   unsigned rettemp[4];				/* 0 */
@@ -418,10 +418,10 @@ struct closure_frame
   void (*fun)(ffi_cif*,void*,void**,void*);	/* 32 */
   void *user_data;				/* 36 */
 };
- 
+
 int FFI_HIDDEN FFI_DECLARE_FASTCALL
 ffi_closure_inner (struct closure_frame *frame, char *stack)
-{ 
+{
   ffi_cif *cif = frame->cif;
   int cabi, i, n, flags, dir, narg_reg;
   const struct abi_params *pabi;
@@ -429,7 +429,7 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
   char *argp;
   void *rvalue;
   void **avalue;
- 
+
   cabi = cif->abi;
   flags = cif->flags;
   narg_reg = 0;
@@ -437,7 +437,7 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
   pabi = &abi_params[cabi];
   dir = pabi->dir;
   argp = (dir < 0 ? stack + STACK_ALIGN (cif->bytes) : stack);
- 
+
   switch (flags)
     {
     case X86_RET_STRUCTARG:
@@ -455,18 +455,18 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
       frame->rettemp[0] = (unsigned)rvalue;
       break;
     }
- 
+
   n = cif->nargs;
   avalue = alloca(sizeof(void *) * n);
- 
+
   arg_types = cif->arg_types;
   for (i = 0; i < n; ++i)
-    { 
+    {
       ffi_type *ty = arg_types[i];
       size_t z = ty->size;
       int t = ty->type;
       void *valp;
- 
+
       if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
 	{
 	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
@@ -482,15 +482,15 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
 	      argp += 4;
 	    }
 	}
-      else 
+      else
 	{
 	  size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG);
 	  size_t align = FFI_SIZEOF_ARG;
- 
+
 	  /* See the comment in ffi_call_int.  */
 	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
 	    align = 16;
- 
+
 	  /* Issue 434: For thiscall and fastcall, if the paramter passed
 	     as 64-bit integer or struct, all following integer paramters
 	     will be passed on stack.  */
@@ -499,7 +499,7 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
 		  || t == FFI_TYPE_UINT64
 		  || t == FFI_TYPE_STRUCT))
 	    narg_reg = 2;
- 
+
 	  if (dir < 0)
 	    {
 	      /* ??? These reverse argument ABIs are probably too old
@@ -514,31 +514,31 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
 	      argp += za;
 	    }
 	}
- 
+
       avalue[i] = valp;
     }
- 
+
   frame->fun (cif, rvalue, avalue, frame->user_data);
- 
+
   if (cabi == FFI_STDCALL)
     return flags + (cif->bytes << X86_RET_POP_SHIFT);
   else
     return flags;
 }
- 
-ffi_status 
-ffi_prep_closure_loc (ffi_closure* closure, 
-                      ffi_cif* cif, 
-                      void (*fun)(ffi_cif*,void*,void**,void*), 
-                      void *user_data, 
-                      void *codeloc) 
-{ 
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
   char *tramp = closure->tramp;
   void (*dest)(void);
   int op = 0xb8;  /* movl imm, %eax */
 
   switch (cif->abi)
-    { 
+    {
     case FFI_SYSV:
     case FFI_THISCALL:
     case FFI_FASTCALL:
@@ -555,7 +555,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
       break;
     default:
       return FFI_BAD_ABI;
-    } 
+    }
 
   /* movl or pushl immediate.  */
   tramp[0] = op;
@@ -583,7 +583,7 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
   void (*dest)(void);
 
   switch (cif->abi)
-    { 
+    {
     case FFI_SYSV:
     case FFI_MS_CDECL:
       dest = ffi_go_closure_ECX;
@@ -598,38 +598,38 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
       break;
     case FFI_REGISTER:
     default:
-      return FFI_BAD_ABI; 
-    } 
- 
+      return FFI_BAD_ABI;
+    }
+
   closure->tramp = dest;
   closure->cif = cif;
   closure->fun = fun;
 
-  return FFI_OK; 
-} 
- 
-/* ------- Native raw API support -------------------------------- */ 
- 
-#if !FFI_NO_RAW_API 
- 
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
 void FFI_HIDDEN ffi_closure_raw_SYSV(void);
 void FFI_HIDDEN ffi_closure_raw_THISCALL(void);
 
-ffi_status 
+ffi_status
 ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
                           ffi_cif *cif,
-                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*), 
-                          void *user_data, 
-                          void *codeloc) 
-{ 
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
   char *tramp = closure->tramp;
   void (*dest)(void);
-  int i; 
- 
+  int i;
+
   /* We currently don't support certain kinds of arguments for raw
-     closures.  This should be implemented by a separate assembly 
-     language routine, since it would require argument processing, 
-     something we don't do now for performance.  */ 
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
   for (i = cif->nargs-1; i >= 0; i--)
     switch (cif->arg_types[i]->type)
       {
@@ -637,9 +637,9 @@ ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
       case FFI_TYPE_LONGDOUBLE:
 	return FFI_BAD_TYPEDEF;
       }
- 
+
   switch (cif->abi)
-    { 
+    {
     case FFI_THISCALL:
       dest = ffi_closure_raw_THISCALL;
       break;
@@ -648,7 +648,7 @@ ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
       break;
     default:
       return FFI_BAD_ABI;
-    } 
+    }
 
   /* movl imm, %eax.  */
   tramp[0] = 0xb8;
@@ -660,28 +660,28 @@ ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
 
   closure->cif = cif;
   closure->fun = fun;
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
-void 
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+void
 ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
-{ 
+{
   size_t rsize, bytes;
   struct call_frame *frame;
   char *stack, *argp;
   ffi_type **arg_types;
   int flags, cabi, i, n, narg_reg;
   const struct abi_params *pabi;
- 
+
   flags = cif->flags;
   cabi = cif->abi;
   pabi = &abi_params[cabi];
- 
+
   rsize = 0;
   if (rvalue == NULL)
-    { 
+    {
       switch (flags)
 	{
 	case X86_RET_FLOAT:
@@ -698,19 +698,19 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
 	  flags = X86_RET_VOID;
 	  break;
 	}
-    } 
- 
+    }
+
   bytes = STACK_ALIGN (cif->bytes);
   argp = stack =
       (void *)((uintptr_t)alloca(bytes + sizeof(*frame) + rsize + 15) & ~16);
   frame = (struct call_frame *)(stack + bytes);
   if (rsize)
     rvalue = frame + 1;
- 
+
   frame->fn = fn;
   frame->flags = flags;
   frame->rvalue = rvalue;
- 
+
   narg_reg = 0;
   switch (flags)
     {
@@ -727,16 +727,16 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
       *(void **)argp = rvalue;
       argp += sizeof(void *);
       bytes -= sizeof(void *);
-      break; 
-    } 
- 
+      break;
+    }
+
   arg_types = cif->arg_types;
   for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++)
     {
       ffi_type *ty = arg_types[i];
       size_t z = ty->size;
       int t = ty->type;
- 
+
       if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT)
 	{
 	  ffi_arg val = extend_basic_type (avalue, t);
@@ -754,7 +754,7 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
     }
   if (i < n)
     memcpy (argp, avalue, bytes);
- 
+
   ffi_call_i386 (frame, stack);
 }
 #endif /* !FFI_NO_RAW_API */
diff --git a/contrib/restricted/libffi/src/x86/ffi64.c b/contrib/restricted/libffi/src/x86/ffi64.c
index e59e396ff0..dec331c958 100644
--- a/contrib/restricted/libffi/src/x86/ffi64.c
+++ b/contrib/restricted/libffi/src/x86/ffi64.c
@@ -1,308 +1,308 @@
-/* ----------------------------------------------------------------------- 
+/* -----------------------------------------------------------------------
    ffi64.c - Copyright (c) 2011, 2018  Anthony Green
              Copyright (c) 2013  The Written Word, Inc.
-             Copyright (c) 2008, 2010  Red Hat, Inc. 
-             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de> 
- 
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+
    x86-64 Foreign Function Interface
 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#include <ffi.h> 
-#include <ffi_common.h> 
- 
-#include <stdlib.h> 
-#include <stdarg.h> 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
 #include <stdint.h>
 #include "internal64.h"
- 
-#ifdef __x86_64__ 
- 
-#define MAX_GPR_REGS 6 
-#define MAX_SSE_REGS 8 
- 
-#if defined(__INTEL_COMPILER) 
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
 #include "xmmintrin.h"
-#define UINT128 __m128 
-#else 
-#if defined(__SUNPRO_C) 
-#include <sunmedia_types.h> 
-#define UINT128 __m128i 
-#else 
-#define UINT128 __int128_t 
-#endif 
-#endif 
- 
-union big_int_union 
-{ 
-  UINT32 i32; 
-  UINT64 i64; 
-  UINT128 i128; 
-}; 
- 
-struct register_args 
-{ 
-  /* Registers for argument passing.  */ 
-  UINT64 gpr[MAX_GPR_REGS]; 
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
   union big_int_union sse[MAX_SSE_REGS];
   UINT64 rax;	/* ssecount */
   UINT64 r10;	/* static chain */
-}; 
- 
-extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, 
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
 			     void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
- 
-/* All reference to register classes here is identical to the code in 
-   gcc/config/i386/i386.c. Do *not* change one without the other.  */ 
- 
-/* Register class used for passing given 64bit part of the argument. 
-   These represent classes as documented by the PS ABI, with the 
-   exception of SSESF, SSEDF classes, that are basically SSE class, 
-   just gcc will use SF or DFmode move instead of DImode to avoid 
-   reformatting penalties. 
- 
-   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 
-   whenever possible (upper half does contain padding).  */ 
-enum x86_64_reg_class 
-  { 
-    X86_64_NO_CLASS, 
-    X86_64_INTEGER_CLASS, 
-    X86_64_INTEGERSI_CLASS, 
-    X86_64_SSE_CLASS, 
-    X86_64_SSESF_CLASS, 
-    X86_64_SSEDF_CLASS, 
-    X86_64_SSEUP_CLASS, 
-    X86_64_X87_CLASS, 
-    X86_64_X87UP_CLASS, 
-    X86_64_COMPLEX_X87_CLASS, 
-    X86_64_MEMORY_CLASS 
-  }; 
- 
-#define MAX_CLASSES 4 
- 
-#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS) 
- 
-/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal 
-   of this code is to classify each 8bytes of incoming argument by the register 
-   class and assign registers accordingly.  */ 
- 
-/* Return the union class of CLASS1 and CLASS2. 
-   See the x86-64 PS ABI for details.  */ 
- 
-static enum x86_64_reg_class 
-merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 
-{ 
-  /* Rule #1: If both classes are equal, this is the resulting class.  */ 
-  if (class1 == class2) 
-    return class1; 
- 
-  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 
-     the other class.  */ 
-  if (class1 == X86_64_NO_CLASS) 
-    return class2; 
-  if (class2 == X86_64_NO_CLASS) 
-    return class1; 
- 
-  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */ 
-  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 
-    return X86_64_MEMORY_CLASS; 
- 
-  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */ 
-  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 
-      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 
-    return X86_64_INTEGERSI_CLASS; 
-  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 
-      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 
-    return X86_64_INTEGER_CLASS; 
- 
-  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 
-     MEMORY is used.  */ 
-  if (class1 == X86_64_X87_CLASS 
-      || class1 == X86_64_X87UP_CLASS 
-      || class1 == X86_64_COMPLEX_X87_CLASS 
-      || class2 == X86_64_X87_CLASS 
-      || class2 == X86_64_X87UP_CLASS 
-      || class2 == X86_64_COMPLEX_X87_CLASS) 
-    return X86_64_MEMORY_CLASS; 
- 
-  /* Rule #6: Otherwise class SSE is used.  */ 
-  return X86_64_SSE_CLASS; 
-} 
- 
-/* Classify the argument of type TYPE and mode MODE. 
-   CLASSES will be filled by the register class used to pass each word 
-   of the operand.  The number of words is returned.  In case the parameter 
-   should be passed in memory, 0 is returned. As a special case for zero 
-   sized containers, classes[0] will be NO_CLASS and 1 is returned. 
- 
-   See the x86-64 PS ABI for details. 
-*/ 
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
 static size_t
-classify_argument (ffi_type *type, enum x86_64_reg_class classes[], 
-		   size_t byte_offset) 
-{ 
-  switch (type->type) 
-    { 
-    case FFI_TYPE_UINT8: 
-    case FFI_TYPE_SINT8: 
-    case FFI_TYPE_UINT16: 
-    case FFI_TYPE_SINT16: 
-    case FFI_TYPE_UINT32: 
-    case FFI_TYPE_SINT32: 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_SINT64: 
-    case FFI_TYPE_POINTER: 
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
     do_integer:
-      { 
+      {
 	size_t size = byte_offset + type->size;
- 
-	if (size <= 4) 
-	  { 
-	    classes[0] = X86_64_INTEGERSI_CLASS; 
-	    return 1; 
-	  } 
-	else if (size <= 8) 
-	  { 
-	    classes[0] = X86_64_INTEGER_CLASS; 
-	    return 1; 
-	  } 
-	else if (size <= 12) 
-	  { 
-	    classes[0] = X86_64_INTEGER_CLASS; 
-	    classes[1] = X86_64_INTEGERSI_CLASS; 
-	    return 2; 
-	  } 
-	else if (size <= 16) 
-	  { 
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
 	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
-	    return 2; 
-	  } 
-	else 
-	  FFI_ASSERT (0); 
-      } 
-    case FFI_TYPE_FLOAT: 
-      if (!(byte_offset % 8)) 
-	classes[0] = X86_64_SSESF_CLASS; 
-      else 
-	classes[0] = X86_64_SSE_CLASS; 
-      return 1; 
-    case FFI_TYPE_DOUBLE: 
-      classes[0] = X86_64_SSEDF_CLASS; 
-      return 1; 
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-    case FFI_TYPE_LONGDOUBLE: 
-      classes[0] = X86_64_X87_CLASS; 
-      classes[1] = X86_64_X87UP_CLASS; 
-      return 2; 
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
 #endif
-    case FFI_TYPE_STRUCT: 
-      { 
+    case FFI_TYPE_STRUCT:
+      {
 	const size_t UNITS_PER_WORD = 8;
 	size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 	ffi_type **ptr;
 	unsigned int i;
-	enum x86_64_reg_class subclasses[MAX_CLASSES]; 
- 
-	/* If the struct is larger than 32 bytes, pass it on the stack.  */ 
-	if (type->size > 32) 
-	  return 0; 
- 
-	for (i = 0; i < words; i++) 
-	  classes[i] = X86_64_NO_CLASS; 
- 
-	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to 
-	   signalize memory class, so handle it as special case.  */ 
-	if (!words) 
-	  { 
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
     case FFI_TYPE_VOID:
-	    classes[0] = X86_64_NO_CLASS; 
-	    return 1; 
-	  } 
- 
-	/* Merge the fields of structure.  */ 
-	for (ptr = type->elements; *ptr != NULL; ptr++) 
-	  { 
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
 	    size_t num;
- 
+
 	    byte_offset = FFI_ALIGN (byte_offset, (*ptr)->alignment);
- 
-	    num = classify_argument (*ptr, subclasses, byte_offset % 8); 
-	    if (num == 0) 
-	      return 0; 
-	    for (i = 0; i < num; i++) 
-	      { 
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
 		size_t pos = byte_offset / 8;
-		classes[i + pos] = 
-		  merge_classes (subclasses[i], classes[i + pos]); 
-	      } 
- 
-	    byte_offset += (*ptr)->size; 
-	  } 
- 
-	if (words > 2) 
-	  { 
-	    /* When size > 16 bytes, if the first one isn't 
-	       X86_64_SSE_CLASS or any other ones aren't 
-	       X86_64_SSEUP_CLASS, everything should be passed in 
-	       memory.  */ 
-	    if (classes[0] != X86_64_SSE_CLASS) 
-	      return 0; 
- 
-	    for (i = 1; i < words; i++) 
-	      if (classes[i] != X86_64_SSEUP_CLASS) 
-		return 0; 
-	  } 
- 
-	/* Final merger cleanup.  */ 
-	for (i = 0; i < words; i++) 
-	  { 
-	    /* If one class is MEMORY, everything should be passed in 
-	       memory.  */ 
-	    if (classes[i] == X86_64_MEMORY_CLASS) 
-	      return 0; 
- 
-	    /* The X86_64_SSEUP_CLASS should be always preceded by 
-	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */ 
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
 	    if (i > 1 && classes[i] == X86_64_SSEUP_CLASS
-		&& classes[i - 1] != X86_64_SSE_CLASS 
-		&& classes[i - 1] != X86_64_SSEUP_CLASS) 
-	      { 
-		/* The first one should never be X86_64_SSEUP_CLASS.  */ 
-		FFI_ASSERT (i != 0); 
-		classes[i] = X86_64_SSE_CLASS; 
-	      } 
- 
-	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, 
-		everything should be passed in memory.  */ 
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
 	    if (i > 1 && classes[i] == X86_64_X87UP_CLASS
-		&& (classes[i - 1] != X86_64_X87_CLASS)) 
-	      { 
-		/* The first one should never be X86_64_X87UP_CLASS.  */ 
-		FFI_ASSERT (i != 0); 
-		return 0; 
-	      } 
-	  } 
-	return words; 
-      } 
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
     case FFI_TYPE_COMPLEX:
       {
 	ffi_type *inner = type->elements[0];
@@ -318,7 +318,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	  case FFI_TYPE_UINT64:
 	  case FFI_TYPE_SINT64:
 	    goto do_integer;
- 
+
 	  case FFI_TYPE_FLOAT:
 	    classes[0] = X86_64_SSE_CLASS;
 	    if (byte_offset % 8)
@@ -337,72 +337,72 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 #endif
 	  }
       }
-    } 
+    }
   abort();
-} 
- 
-/* Examine the argument and return set number of register required in each 
-   class.  Return zero iff parameter should be passed in memory, otherwise 
-   the number of registers.  */ 
- 
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
 static size_t
-examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], 
-		  _Bool in_return, int *pngpr, int *pnsse) 
-{ 
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
   size_t n;
   unsigned int i;
   int ngpr, nsse;
- 
-  n = classify_argument (type, classes, 0); 
-  if (n == 0) 
-    return 0; 
- 
-  ngpr = nsse = 0; 
-  for (i = 0; i < n; ++i) 
-    switch (classes[i]) 
-      { 
-      case X86_64_INTEGER_CLASS: 
-      case X86_64_INTEGERSI_CLASS: 
-	ngpr++; 
-	break; 
-      case X86_64_SSE_CLASS: 
-      case X86_64_SSESF_CLASS: 
-      case X86_64_SSEDF_CLASS: 
-	nsse++; 
-	break; 
-      case X86_64_NO_CLASS: 
-      case X86_64_SSEUP_CLASS: 
-	break; 
-      case X86_64_X87_CLASS: 
-      case X86_64_X87UP_CLASS: 
-      case X86_64_COMPLEX_X87_CLASS: 
-	return in_return != 0; 
-      default: 
-	abort (); 
-      } 
- 
-  *pngpr = ngpr; 
-  *pnsse = nsse; 
- 
-  return n; 
-} 
- 
-/* Perform machine dependent cif processing.  */ 
- 
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
 #ifndef __ILP32__
 extern ffi_status
 ffi_prep_cif_machdep_efi64(ffi_cif *cif);
 #endif
 
 ffi_status FFI_HIDDEN
-ffi_prep_cif_machdep (ffi_cif *cif) 
-{ 
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
   int gprcount, ssecount, i, avn, ngpr, nsse;
   unsigned flags;
-  enum x86_64_reg_class classes[MAX_CLASSES]; 
+  enum x86_64_reg_class classes[MAX_CLASSES];
   size_t bytes, n, rtype_size;
   ffi_type *rtype;
- 
+
 #ifndef __ILP32__
   if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
     return ffi_prep_cif_machdep_efi64(cif);
@@ -410,12 +410,12 @@ ffi_prep_cif_machdep (ffi_cif *cif)
   if (cif->abi != FFI_UNIX64)
     return FFI_BAD_ABI;
 
-  gprcount = ssecount = 0; 
- 
+  gprcount = ssecount = 0;
+
   rtype = cif->rtype;
   rtype_size = rtype->size;
   switch (rtype->type)
-    { 
+    {
     case FFI_TYPE_VOID:
       flags = UNIX64_RET_VOID;
       break;
@@ -457,18 +457,18 @@ ffi_prep_cif_machdep (ffi_cif *cif)
       break;
 #endif
     case FFI_TYPE_STRUCT:
-      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 
-      if (n == 0) 
-	{ 
-	  /* The return value is passed in memory.  A pointer to that 
-	     memory is the first argument.  Allocate a register for it.  */ 
-	  gprcount++; 
-	  /* We don't have to do anything in asm for the return.  */ 
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
 	  flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
-	} 
+	}
       else
-	{ 
-	  _Bool sse0 = SSE_CLASS_P (classes[0]); 
+	{
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
 
 	  if (rtype_size == 4 && sse0)
 	    flags = UNIX64_RET_XMM32;
@@ -487,7 +487,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 		flags = UNIX64_RET_ST_RAX_RDX;
 	      flags |= rtype_size << UNIX64_SIZE_SHIFT;
 	    }
-	} 
+	}
       break;
     case FFI_TYPE_COMPLEX:
       switch (rtype->elements[0]->type)
@@ -520,54 +520,54 @@ ffi_prep_cif_machdep (ffi_cif *cif)
       break;
     default:
       return FFI_BAD_TYPEDEF;
-    } 
- 
-  /* Go over all arguments and determine the way they should be passed. 
-     If it's in a register and there is space for it, let that be so. If 
-     not, add it's size to the stack byte count.  */ 
-  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++) 
-    { 
-      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0 
-	  || gprcount + ngpr > MAX_GPR_REGS 
-	  || ssecount + nsse > MAX_SSE_REGS) 
-	{ 
-	  long align = cif->arg_types[i]->alignment; 
- 
-	  if (align < 8) 
-	    align = 8; 
- 
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
 	  bytes = FFI_ALIGN (bytes, align);
-	  bytes += cif->arg_types[i]->size; 
-	} 
-      else 
-	{ 
-	  gprcount += ngpr; 
-	  ssecount += nsse; 
-	} 
-    } 
-  if (ssecount) 
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
     flags |= UNIX64_FLAG_XMM_ARGS;
 
-  cif->flags = flags; 
+  cif->flags = flags;
   cif->bytes = (unsigned) FFI_ALIGN (bytes, 8);
- 
-  return FFI_OK; 
-} 
- 
+
+  return FFI_OK;
+}
+
 static void
 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	      void **avalue, void *closure)
-{ 
-  enum x86_64_reg_class classes[MAX_CLASSES]; 
-  char *stack, *argp; 
-  ffi_type **arg_types; 
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
   int gprcount, ssecount, ngpr, nsse, i, avn, flags;
-  struct register_args *reg_args; 
- 
-  /* Can't call 32-bit mode from 64-bit mode.  */ 
-  FFI_ASSERT (cif->abi == FFI_UNIX64); 
- 
-  /* If the return value is a struct and we don't have a return value 
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
      address then we need to make one.  Otherwise we can ignore it.  */
   flags = cif->flags;
   if (rvalue == NULL)
@@ -577,104 +577,104 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
       else
 	flags = UNIX64_RET_VOID;
     }
- 
-  /* Allocate the space for the arguments, plus 4 words of temp space.  */ 
-  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); 
-  reg_args = (struct register_args *) stack; 
-  argp = stack + sizeof (struct register_args); 
- 
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
   reg_args->r10 = (uintptr_t) closure;
 
-  gprcount = ssecount = 0; 
- 
-  /* If the return value is passed in memory, add the pointer as the 
-     first integer argument.  */ 
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
   if (flags & UNIX64_FLAG_RET_IN_MEM)
-    reg_args->gpr[gprcount++] = (unsigned long) rvalue; 
- 
-  avn = cif->nargs; 
-  arg_types = cif->arg_types; 
- 
-  for (i = 0; i < avn; ++i) 
-    { 
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
       size_t n, size = arg_types[i]->size;
- 
-      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 
-      if (n == 0 
-	  || gprcount + ngpr > MAX_GPR_REGS 
-	  || ssecount + nsse > MAX_SSE_REGS) 
-	{ 
-	  long align = arg_types[i]->alignment; 
- 
-	  /* Stack arguments are *always* at least 8 byte aligned.  */ 
-	  if (align < 8) 
-	    align = 8; 
- 
-	  /* Pass this argument in memory.  */ 
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
 	  argp = (void *) FFI_ALIGN (argp, align);
-	  memcpy (argp, avalue[i], size); 
-	  argp += size; 
-	} 
-      else 
-	{ 
-	  /* The argument is passed entirely in registers.  */ 
-	  char *a = (char *) avalue[i]; 
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
 	  unsigned int j;
- 
-	  for (j = 0; j < n; j++, a += 8, size -= 8) 
-	    { 
-	      switch (classes[j]) 
-		{ 
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
 		case X86_64_NO_CLASS:
 		case X86_64_SSEUP_CLASS:
 		  break;
-		case X86_64_INTEGER_CLASS: 
-		case X86_64_INTEGERSI_CLASS: 
-		  /* Sign-extend integer arguments passed in general 
-		     purpose registers, to cope with the fact that 
-		     LLVM incorrectly assumes that this will be done 
-		     (the x86-64 PS ABI does not specify this). */ 
-		  switch (arg_types[i]->type) 
-		    { 
-		    case FFI_TYPE_SINT8: 
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
 		      reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
-		      break; 
-		    case FFI_TYPE_SINT16: 
+		      break;
+		    case FFI_TYPE_SINT16:
 		      reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
-		      break; 
-		    case FFI_TYPE_SINT32: 
+		      break;
+		    case FFI_TYPE_SINT32:
 		      reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
-		      break; 
-		    default: 
-		      reg_args->gpr[gprcount] = 0; 
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
 		      memcpy (&reg_args->gpr[gprcount], a, size);
-		    } 
-		  gprcount++; 
-		  break; 
-		case X86_64_SSE_CLASS: 
-		case X86_64_SSEDF_CLASS: 
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
 		  memcpy (&reg_args->sse[ssecount++].i64, a, sizeof(UINT64));
-		  break; 
-		case X86_64_SSESF_CLASS: 
+		  break;
+		case X86_64_SSESF_CLASS:
 		  memcpy (&reg_args->sse[ssecount++].i32, a, sizeof(UINT32));
-		  break; 
-		default: 
-		  abort(); 
-		} 
-	    } 
-	} 
-    } 
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
   reg_args->rax = ssecount;
- 
-  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), 
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
 		   flags, rvalue, fn);
-} 
- 
+}
+
 #ifndef __ILP32__
 extern void
 ffi_call_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue);
 #endif
- 
+
 void
 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 {
@@ -687,7 +687,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 #endif
   ffi_call_int (cif, fn, rvalue, avalue, NULL);
 }
- 
+
 #ifndef __ILP32__
 extern void
 ffi_call_go_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue,
@@ -721,13 +721,13 @@ ffi_prep_closure_loc_efi64(ffi_closure* closure,
 			   void *codeloc);
 #endif
 
-ffi_status 
-ffi_prep_closure_loc (ffi_closure* closure, 
-		      ffi_cif* cif, 
-		      void (*fun)(ffi_cif*, void*, void**, void*), 
-		      void *user_data, 
-		      void *codeloc) 
-{ 
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
   static const unsigned char trampoline[16] = {
     /* leaq  -0x7(%rip),%r10   # 0x0  */
     0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
@@ -738,29 +738,29 @@ ffi_prep_closure_loc (ffi_closure* closure,
   };
   void (*dest)(void);
   char *tramp = closure->tramp;
- 
+
 #ifndef __ILP32__
   if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
     return ffi_prep_closure_loc_efi64(closure, cif, fun, user_data, codeloc);
 #endif
   if (cif->abi != FFI_UNIX64)
     return FFI_BAD_ABI;
- 
+
   if (cif->flags & UNIX64_FLAG_XMM_ARGS)
     dest = ffi_closure_unix64_sse;
   else
     dest = ffi_closure_unix64;
- 
+
   memcpy (tramp, trampoline, sizeof(trampoline));
   *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
 int FFI_HIDDEN
 ffi_closure_unix64_inner(ffi_cif *cif,
 			 void (*fun)(ffi_cif*, void*, void**, void*),
@@ -768,92 +768,92 @@ ffi_closure_unix64_inner(ffi_cif *cif,
 			 void *rvalue,
 			 struct register_args *reg_args,
 			 char *argp)
-{ 
-  void **avalue; 
-  ffi_type **arg_types; 
-  long i, avn; 
-  int gprcount, ssecount, ngpr, nsse; 
+{
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
   int flags;
- 
+
   avn = cif->nargs;
   flags = cif->flags;
   avalue = alloca(avn * sizeof(void *));
-  gprcount = ssecount = 0; 
- 
+  gprcount = ssecount = 0;
+
   if (flags & UNIX64_FLAG_RET_IN_MEM)
-    { 
+    {
       /* On return, %rax will contain the address that was passed
 	 by the caller in %rdi.  */
       void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++];
       *(void **)rvalue = r;
       rvalue = r;
       flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
-    } 
- 
-  arg_types = cif->arg_types; 
-  for (i = 0; i < avn; ++i) 
-    { 
-      enum x86_64_reg_class classes[MAX_CLASSES]; 
+    }
+
+  arg_types = cif->arg_types;
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
       size_t n;
- 
-      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 
-      if (n == 0 
-	  || gprcount + ngpr > MAX_GPR_REGS 
-	  || ssecount + nsse > MAX_SSE_REGS) 
-	{ 
-	  long align = arg_types[i]->alignment; 
- 
-	  /* Stack arguments are *always* at least 8 byte aligned.  */ 
-	  if (align < 8) 
-	    align = 8; 
- 
-	  /* Pass this argument in memory.  */ 
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
 	  argp = (void *) FFI_ALIGN (argp, align);
-	  avalue[i] = argp; 
-	  argp += arg_types[i]->size; 
-	} 
-      /* If the argument is in a single register, or two consecutive 
-	 integer registers, then we can use that address directly.  */ 
-      else if (n == 1 
-	       || (n == 2 && !(SSE_CLASS_P (classes[0]) 
-			       || SSE_CLASS_P (classes[1])))) 
-	{ 
-	  /* The argument is in a single register.  */ 
-	  if (SSE_CLASS_P (classes[0])) 
-	    { 
-	      avalue[i] = &reg_args->sse[ssecount]; 
-	      ssecount += n; 
-	    } 
-	  else 
-	    { 
-	      avalue[i] = &reg_args->gpr[gprcount]; 
-	      gprcount += n; 
-	    } 
-	} 
-      /* Otherwise, allocate space to make them consecutive.  */ 
-      else 
-	{ 
-	  char *a = alloca (16); 
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
 	  unsigned int j;
- 
-	  avalue[i] = a; 
-	  for (j = 0; j < n; j++, a += 8) 
-	    { 
-	      if (SSE_CLASS_P (classes[j])) 
-		memcpy (a, &reg_args->sse[ssecount++], 8); 
-	      else 
-		memcpy (a, &reg_args->gpr[gprcount++], 8); 
-	    } 
-	} 
-    } 
- 
-  /* Invoke the closure.  */ 
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
   fun (cif, rvalue, avalue, user_data);
- 
-  /* Tell assembly how to perform return type promotions.  */ 
+
+  /* Tell assembly how to perform return type promotions.  */
   return flags;
-} 
- 
+}
+
 extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
 extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
 
@@ -883,4 +883,4 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
   return FFI_OK;
 }
 
-#endif /* __x86_64__ */ 
+#endif /* __x86_64__ */
diff --git a/contrib/restricted/libffi/src/x86/ffitarget.h b/contrib/restricted/libffi/src/x86/ffitarget.h
index 170b5865fe..85ccedfedc 100644
--- a/contrib/restricted/libffi/src/x86/ffitarget.h
+++ b/contrib/restricted/libffi/src/x86/ffitarget.h
@@ -1,103 +1,103 @@
-/* -----------------------------------------------------------------*-C-*- 
+/* -----------------------------------------------------------------*-C-*-
    ffitarget.h - Copyright (c) 2012, 2014, 2018  Anthony Green
-                 Copyright (c) 1996-2003, 2010  Red Hat, Inc. 
-                 Copyright (C) 2008  Free Software Foundation, Inc. 
- 
-   Target configuration macros for x86 and x86-64. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
- 
-   ----------------------------------------------------------------------- */ 
- 
-#ifndef LIBFFI_TARGET_H 
-#define LIBFFI_TARGET_H 
- 
-#ifndef LIBFFI_H 
-#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead." 
-#endif 
- 
-/* ---- System specific configurations ----------------------------------- */ 
- 
-/* For code common to all platforms on x86 and x86_64. */ 
-#define X86_ANY 
- 
-#if defined (X86_64) && defined (__i386__) 
-#undef X86_64 
-#define X86 
-#endif 
- 
-#ifdef X86_WIN64 
-#define FFI_SIZEOF_ARG 8 
-#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */ 
-#endif 
- 
+                 Copyright (c) 1996-2003, 2010  Red Hat, Inc.
+                 Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+/* For code common to all platforms on x86 and x86_64. */
+#define X86_ANY
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
 #define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
 #ifndef _MSC_VER
 #define FFI_TARGET_HAS_COMPLEX_TYPE
 #endif
 
-/* ---- Generic type definitions ----------------------------------------- */ 
- 
-#ifndef LIBFFI_ASM 
-#ifdef X86_WIN64 
-#ifdef _MSC_VER 
-typedef unsigned __int64       ffi_arg; 
-typedef __int64                ffi_sarg; 
-#else 
-typedef unsigned long long     ffi_arg; 
-typedef long long              ffi_sarg; 
-#endif 
-#else 
-#if defined __x86_64__ && defined __ILP32__ 
-#define FFI_SIZEOF_ARG 8 
-#define FFI_SIZEOF_JAVA_RAW  4 
-typedef unsigned long long     ffi_arg; 
-typedef long long              ffi_sarg; 
-#else 
-typedef unsigned long          ffi_arg; 
-typedef signed long            ffi_sarg; 
-#endif 
-#endif 
- 
-typedef enum ffi_abi { 
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64       ffi_arg;
+typedef __int64                ffi_sarg;
+#else
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#endif
+#else
+#if defined __x86_64__ && defined __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#else
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+#endif
+#endif
+
+typedef enum ffi_abi {
 #if defined(X86_WIN64)
-  FFI_FIRST_ABI = 0, 
+  FFI_FIRST_ABI = 0,
   FFI_WIN64,            /* sizeof(long double) == 8  - microsoft compilers */
   FFI_GNUW64,           /* sizeof(long double) == 16 - GNU compilers */
-  FFI_LAST_ABI, 
+  FFI_LAST_ABI,
 #ifdef __GNUC__
   FFI_DEFAULT_ABI = FFI_GNUW64
 #else  
   FFI_DEFAULT_ABI = FFI_WIN64
 #endif  
- 
+
 #elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
   FFI_FIRST_ABI = 1,
   FFI_UNIX64,
-  FFI_WIN64, 
+  FFI_WIN64,
   FFI_EFI64 = FFI_WIN64,
   FFI_GNUW64,
-  FFI_LAST_ABI, 
+  FFI_LAST_ABI,
   FFI_DEFAULT_ABI = FFI_UNIX64
- 
+
 #elif defined(X86_WIN32)
   FFI_FIRST_ABI = 0,
   FFI_SYSV      = 1,
@@ -109,7 +109,7 @@ typedef enum ffi_abi {
   FFI_REGISTER  = 7,
   FFI_LAST_ABI,
   FFI_DEFAULT_ABI = FFI_MS_CDECL
-#else 
+#else
   FFI_FIRST_ABI = 0,
   FFI_SYSV      = 1,
   FFI_THISCALL  = 3,
@@ -118,30 +118,30 @@ typedef enum ffi_abi {
   FFI_PASCAL    = 6,
   FFI_REGISTER  = 7,
   FFI_MS_CDECL  = 8,
-  FFI_LAST_ABI, 
-  FFI_DEFAULT_ABI = FFI_SYSV 
-#endif 
-} ffi_abi; 
-#endif 
- 
-/* ---- Definitions for closures ----------------------------------------- */ 
- 
-#define FFI_CLOSURES 1 
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_SYSV
+#endif
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
 #define FFI_GO_CLOSURES 1
 
-#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1) 
-#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2) 
-#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3) 
-#define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4) 
- 
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4)
+
 #if defined (X86_64) || defined(X86_WIN64) \
     || (defined (__x86_64__) && defined (X86_DARWIN))
 # define FFI_TRAMPOLINE_SIZE 24
 # define FFI_NATIVE_RAW_API 0
-#else 
+#else
 # define FFI_TRAMPOLINE_SIZE 12
 # define FFI_NATIVE_RAW_API 1  /* x86 has native raw api support */
-#endif 
- 
-#endif 
- 
+#endif
+
+#endif
+
diff --git a/contrib/restricted/libffi/src/x86/sysv.S b/contrib/restricted/libffi/src/x86/sysv.S
index 8d857a341f..7c9598c93c 100644
--- a/contrib/restricted/libffi/src/x86/sysv.S
+++ b/contrib/restricted/libffi/src/x86/sysv.S
@@ -1,39 +1,39 @@
-/* ----------------------------------------------------------------------- 
+/* -----------------------------------------------------------------------
    sysv.S - Copyright (c) 2017  Anthony Green
           - Copyright (c) 2013  The Written Word, Inc.
           - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
-    
-   X86 Foreign Function Interface  
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
 #ifdef __i386__
 #ifndef _MSC_VER
- 
-#define LIBFFI_ASM	 
-#include <fficonfig.h> 
-#include <ffi.h> 
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
 #include "internal.h"
- 
+
 #define C2(X, Y)  X ## Y
 #define C1(X, Y)  C2(X, Y)
 #ifdef __USER_LABEL_PREFIX__
@@ -41,19 +41,19 @@
 #else
 # define C(X)     X
 #endif
- 
+
 #ifdef X86_DARWIN
 # define L(X)     C1(L, X)
 #else
 # define L(X)     C1(.L, X)
 #endif
- 
+
 #ifdef __ELF__
 # define ENDF(X)  .type	X,@function; .size X, . - X
 #else
 # define ENDF(X)
 #endif
- 
+
 /* Handle win32 fastcall name mangling.  */
 #ifdef X86_WIN32
 # define ffi_call_i386		@ffi_call_i386@8
@@ -62,7 +62,7 @@
 # define ffi_call_i386		C(ffi_call_i386)
 # define ffi_closure_inner	C(ffi_closure_inner)
 #endif
- 
+
 /* This macro allows the safe creation of jump tables without an
    actual table.  The entry points into the table are all 8 bytes.
    The use of ORG asserts that we're at the correct location.  */
@@ -72,23 +72,23 @@
 #else
 # define E(BASE, X)	.balign 8; .org BASE + X * 8
 #endif
- 
+
 	.text
 	.balign	16
 	.globl	ffi_call_i386
 	FFI_HIDDEN(ffi_call_i386)
- 
+
 /* This is declared as
- 
+
    void ffi_call_i386(struct call_frame *frame, char *argp)
         __attribute__((fastcall));
- 
+
    Thus the arguments are present in
- 
+
         ecx: frame
         edx: argp
 */
- 
+
 ffi_call_i386:
 L(UW0):
 	# cfi_startproc
@@ -99,7 +99,7 @@ L(UW0):
 	movl	(%esp), %eax		/* move the return address */
 	movl	%ebp, (%ecx)		/* store %ebp into local frame */
 	movl	%eax, 4(%ecx)		/* store retaddr into local frame */
- 
+
 	/* New stack frame based off ebp.  This is a itty bit of unwind
 	   trickery in that the CFA *has* changed.  There is no easy way
 	   to describe it correctly on entry to the function.  Fortunately,
@@ -111,19 +111,19 @@ L(UW0):
 L(UW1):
 	# cfi_def_cfa(%ebp, 8)
 	# cfi_rel_offset(%ebp, 0)
- 
+
 	movl	%edx, %esp		/* set outgoing argument stack */
 	movl	20+R_EAX*4(%ebp), %eax	/* set register arguments */
 	movl	20+R_EDX*4(%ebp), %edx
 	movl	20+R_ECX*4(%ebp), %ecx
- 
+
 	call	*8(%ebp)
- 
+
 	movl	12(%ebp), %ecx		/* load return type code */
 	movl	%ebx, 8(%ebp)		/* preserve %ebx */
 L(UW2):
 	# cfi_rel_offset(%ebx, 8)
- 
+
 	andl	$X86_RET_TYPE_MASK, %ecx
 #ifdef __PIC__
 	call	C(__x86.get_pc_thunk.bx)
@@ -134,7 +134,7 @@ L(pc1):
 #endif
 	movl	16(%ebp), %ecx		/* load result address */
 	jmp	*%ebx
- 
+
 	.balign	8
 L(store_table):
 E(L(store_table), X86_RET_FLOAT)
@@ -181,7 +181,7 @@ L(UW3):
 	ret
 L(UW4):
 	# cfi_restore_state
- 
+
 E(L(store_table), X86_RET_STRUCTPOP)
 	jmp	L(e1)
 E(L(store_table), X86_RET_STRUCTARG)
@@ -192,30 +192,30 @@ E(L(store_table), X86_RET_STRUCT_1B)
 E(L(store_table), X86_RET_STRUCT_2B)
 	movw	%ax, (%ecx)
 	jmp	L(e1)
- 
+
 	/* Fill out the table so that bad values are predictable.  */
 E(L(store_table), X86_RET_UNUSED14)
 	ud2
 E(L(store_table), X86_RET_UNUSED15)
 	ud2
- 
+
 L(UW5):
 	# cfi_endproc
 ENDF(ffi_call_i386)
- 
+
 /* The inner helper is declared as
- 
+
    void ffi_closure_inner(struct closure_frame *frame, char *argp)
 	__attribute_((fastcall))
- 
+
    Thus the arguments are placed in
- 
+
 	ecx:	frame
 	edx:	argp
 */
- 
+
 /* Macros to help setting up the closure_data structure.  */
- 
+
 #if HAVE_FASTCALL
 # define closure_FS	(40 + 4)
 # define closure_CF	0
@@ -223,12 +223,12 @@ ENDF(ffi_call_i386)
 # define closure_FS	(8 + 40 + 12)
 # define closure_CF	8
 #endif
- 
+
 #define FFI_CLOSURE_SAVE_REGS		\
 	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\
 	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\
 	movl	%ecx, closure_CF+16+R_ECX*4(%esp)
- 
+
 #define FFI_CLOSURE_COPY_TRAMP_DATA					\
 	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\
 	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\
@@ -241,14 +241,14 @@ ENDF(ffi_call_i386)
 # define FFI_CLOSURE_PREP_CALL						\
 	movl	%esp, %ecx;			/* load closure_data */	\
 	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */
-#else 
+#else
 # define FFI_CLOSURE_PREP_CALL						\
 	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\
 	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
 	movl	%ecx, (%esp);						\
 	movl	%edx, 4(%esp)
-#endif 
- 
+#endif
+
 #define FFI_CLOSURE_CALL_INNER(UWN) \
 	call	ffi_closure_inner
 
@@ -388,14 +388,14 @@ L(e2):
 	addl	$closure_FS, %esp
 L(UW16):
 	# cfi_adjust_cfa_offset(-closure_FS)
-	ret 
+	ret
 L(UW17):
 	# cfi_adjust_cfa_offset(closure_FS)
 E(L(load_table2), X86_RET_STRUCTPOP)
 	addl	$closure_FS, %esp
 L(UW18):
 	# cfi_adjust_cfa_offset(-closure_FS)
-	ret	$4 
+	ret	$4
 L(UW19):
 	# cfi_adjust_cfa_offset(closure_FS)
 E(L(load_table2), X86_RET_STRUCTARG)
@@ -406,7 +406,7 @@ E(L(load_table2), X86_RET_STRUCT_1B)
 E(L(load_table2), X86_RET_STRUCT_2B)
 	movzwl	%ax, %eax
 	jmp	L(e2)
- 
+
 	/* Fill out the table so that bad values are predictable.  */
 E(L(load_table2), X86_RET_UNUSED14)
 	ud2
@@ -566,8 +566,8 @@ L(UW31):
 	# cfi_endproc
 ENDF(C(ffi_closure_STDCALL))
 
-#if !FFI_NO_RAW_API 
- 
+#if !FFI_NO_RAW_API
+
 #define raw_closure_S_FS	(16+16+12)
 
 	.balign	16
@@ -599,15 +599,15 @@ L(UW34):
 	call	C(__x86.get_pc_thunk.bx)
 L(pc4):
 	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
-#else 
+#else
 	leal	L(load_table4)(,%eax, 8), %ecx
-#endif 
+#endif
 	movl	raw_closure_S_FS-4(%esp), %ebx
 L(UW35):
 	# cfi_restore(%ebx)
 	movl	16(%esp), %eax				/* Optimistic load */
 	jmp	*%ecx
- 
+
 	.balign	8
 L(load_table4):
 E(L(load_table4), X86_RET_FLOAT)
@@ -660,13 +660,13 @@ E(L(load_table4), X86_RET_STRUCT_1B)
 E(L(load_table4), X86_RET_STRUCT_2B)
 	movzwl	%ax, %eax
 	jmp	L(e4)
- 
+
 	/* Fill out the table so that bad values are predictable.  */
 E(L(load_table4), X86_RET_UNUSED14)
 	ud2
 E(L(load_table4), X86_RET_UNUSED15)
 	ud2
- 
+
 L(UW40):
 	# cfi_endproc
 ENDF(C(ffi_closure_raw_SYSV))
@@ -717,13 +717,13 @@ L(pc5):
 	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
 #else
 	leal	L(load_table5)(,%eax, 8), %ecx
-#endif 
+#endif
 	movl	raw_closure_T_FS-4(%esp), %ebx
 L(UW47):
 	# cfi_restore(%ebx)
 	movl	16(%esp), %eax				/* Optimistic load */
 	jmp	*%ecx
- 
+
 	.balign	8
 L(load_table5):
 E(L(load_table5), X86_RET_FLOAT)
@@ -777,7 +777,7 @@ E(L(load_table5), X86_RET_STRUCT_1B)
 E(L(load_table5), X86_RET_STRUCT_2B)
 	movzwl	%ax, %eax
 	jmp	L(e5)
- 
+
 	/* Fill out the table so that bad values are predictable.  */
 E(L(load_table5), X86_RET_UNUSED14)
 	ud2
@@ -800,10 +800,10 @@ ENDF(C(ffi_closure_raw_THISCALL))
 	.section .text.X,"axG",@progbits,X,comdat;			\
 	.globl	X;							\
 	FFI_HIDDEN(X)
-#else 
+#else
 # define COMDAT(X)
-#endif 
- 
+#endif
+
 #if defined(__PIC__)
 	COMDAT(C(__x86.get_pc_thunk.bx))
 C(__x86.get_pc_thunk.bx):
@@ -828,15 +828,15 @@ EHFrame0:
 .section .eh_frame,"r"
 #elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
 .section .eh_frame,EH_FRAME_FLAGS,@unwind
-#else 
+#else
 .section .eh_frame,EH_FRAME_FLAGS,@progbits
-#endif 
+#endif
 
 #ifdef HAVE_AS_X86_PCREL
 # define PCREL(X)	X - .
-#else 
+#else
 # define PCREL(X)	X@rel
-#endif 
+#endif
 
 /* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
 #define ADV(N, P)	.byte 2, L(N)-L(P)
@@ -920,9 +920,9 @@ L(SFDE4):
 	ADV(UW15, UW14)
 	.byte	0xc0+3			/* DW_CFA_restore %ebx */
 	ADV(UW16, UW15)
-#else 
+#else
 	ADV(UW16, UW13)
-#endif 
+#endif
 	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
 	ADV(UW17, UW16)
 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
@@ -973,11 +973,11 @@ L(SFDE7):
 	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
 	ADV(UW30, UW29)
 	.byte	0xc0+3			/* DW_CFA_restore %ebx */
-#endif 
+#endif
 	.balign	4
 L(EFDE7):
- 
-#if !FFI_NO_RAW_API 
+
+#if !FFI_NO_RAW_API
 	.set	L(set8),L(EFDE8)-L(SFDE8)
 	.long	L(set8)			/* FDE Length */
 L(SFDE8):
@@ -1001,7 +1001,7 @@ L(SFDE8):
 	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
 	.balign	4
 L(EFDE8):
- 
+
 	.set	L(set9),L(EFDE9)-L(SFDE9)
 	.long	L(set9)			/* FDE Length */
 L(SFDE9):
@@ -1034,7 +1034,7 @@ L(SFDE9):
 	.balign	4
 L(EFDE9):
 #endif /* !FFI_NO_RAW_API */
- 
+
 #ifdef _WIN32
 	.def	 @feat.00;
 	.scl	3;
@@ -1042,12 +1042,12 @@ L(EFDE9):
 	.endef
 	.globl	@feat.00
 @feat.00 = 1
-#endif 
- 
+#endif
+
 #ifdef __APPLE__
     .subsections_via_symbols
     .section __LD,__compact_unwind,regular,debug
- 
+
     /* compact unwind for ffi_call_i386 */
     .long    C(ffi_call_i386)
     .set     L1,L(UW5)-L(UW0)
@@ -1124,6 +1124,6 @@ L(EFDE9):
 #endif /* ifndef _MSC_VER */
 #endif /* ifdef __i386__ */
 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/contrib/restricted/libffi/src/x86/unix64.S b/contrib/restricted/libffi/src/x86/unix64.S
index 90b847311f..41563f5c60 100644
--- a/contrib/restricted/libffi/src/x86/unix64.S
+++ b/contrib/restricted/libffi/src/x86/unix64.S
@@ -1,40 +1,40 @@
-/* ----------------------------------------------------------------------- 
-   unix64.S - Copyright (c) 2013  The Written Word, Inc. 
-	    - Copyright (c) 2008  Red Hat, Inc 
-	    - Copyright (c) 2002  Bo Thorsen <bo@suse.de> 
- 
-   x86-64 Foreign Function Interface  
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#ifdef __x86_64__ 
-#define LIBFFI_ASM	 
-#include <fficonfig.h> 
-#include <ffi.h> 
+/* -----------------------------------------------------------------------
+   unix64.S - Copyright (c) 2013  The Written Word, Inc.
+	    - Copyright (c) 2008  Red Hat, Inc
+	    - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
 #include "internal64.h"
 #include "asmnames.h"
- 
+
 	.text
- 
+
 /* This macro allows the safe creation of jump tables without an
    actual table.  The entry points into the table are all 8 bytes.
    The use of ORG asserts that we're at the correct location.  */
@@ -45,26 +45,26 @@
 # define E(BASE, X)	.balign 8; .org BASE + X * 8
 #endif
 
-/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, 
-	            void *raddr, void (*fnaddr)(void)); 
- 
-   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame 
-   for this function.  This has been allocated by ffi_call.  We also 
-   deallocate some of the stack that has been alloca'd.  */ 
- 
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
 	.balign	8
 	.globl	C(ffi_call_unix64)
 	FFI_HIDDEN(C(ffi_call_unix64))
- 
+
 C(ffi_call_unix64):
 L(UW0):
-	movq	(%rsp), %r10		/* Load return address.  */ 
-	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */ 
-	movq	%rdx, (%rax)		/* Save flags.  */ 
-	movq	%rcx, 8(%rax)		/* Save raddr.  */ 
-	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */ 
-	movq	%r10, 24(%rax)		/* Relocate return address.  */ 
-	movq	%rax, %rbp		/* Finalize local stack frame.  */ 
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
 
 	/* New stack frame based off rbp.  This is a itty bit of unwind
 	   trickery in that the CFA *has* changed.  There is no easy way
@@ -77,59 +77,59 @@ L(UW1):
 	/* cfi_def_cfa(%rbp, 32) */
 	/* cfi_rel_offset(%rbp, 16) */
 
-	movq	%rdi, %r10		/* Save a copy of the register area. */ 
-	movq	%r8, %r11		/* Save a copy of the target fn.  */ 
-	movl	%r9d, %eax		/* Set number of SSE registers.  */ 
- 
-	/* Load up all argument registers.  */ 
-	movq	(%r10), %rdi 
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
 	movq	0x08(%r10), %rsi
 	movq	0x10(%r10), %rdx
 	movq	0x18(%r10), %rcx
 	movq	0x20(%r10), %r8
 	movq	0x28(%r10), %r9
 	movl	0xb0(%r10), %eax
-	testl	%eax, %eax 
+	testl	%eax, %eax
 	jnz	L(load_sse)
 L(ret_from_load_sse):
- 
+
 	/* Deallocate the reg arg area, except for r10, then load via pop.  */
 	leaq	0xb8(%r10), %rsp
 	popq	%r10
- 
-	/* Call the user function.  */ 
-	call	*%r11 
- 
-	/* Deallocate stack arg area; local stack frame in redzone.  */ 
-	leaq	24(%rbp), %rsp 
- 
-	movq	0(%rbp), %rcx		/* Reload flags.  */ 
-	movq	8(%rbp), %rdi		/* Reload raddr.  */ 
-	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */ 
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
 L(UW2):
 	/* cfi_remember_state */
 	/* cfi_def_cfa(%rsp, 8) */
 	/* cfi_restore(%rbp) */
- 
-	/* The first byte of the flags contains the FFI_TYPE.  */ 
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
 	cmpb	$UNIX64_RET_LAST, %cl
-	movzbl	%cl, %r10d 
+	movzbl	%cl, %r10d
 	leaq	L(store_table)(%rip), %r11
 	ja	L(sa)
 	leaq	(%r11, %r10, 8), %r10
 
 	/* Prep for the structure cases: scratch area in redzone.  */
 	leaq	-20(%rsp), %rsi
-	jmp	*%r10 
- 
+	jmp	*%r10
+
 	.balign	8
 L(store_table):
 E(L(store_table), UNIX64_RET_VOID)
-	ret 
+	ret
 E(L(store_table), UNIX64_RET_UINT8)
 	movzbl	%al, %eax
-	movq	%rax, (%rdi) 
-	ret 
+	movq	%rax, (%rdi)
+	ret
 E(L(store_table), UNIX64_RET_UINT16)
 	movzwl	%ax, %eax
 	movq	%rax, (%rdi)
@@ -139,29 +139,29 @@ E(L(store_table), UNIX64_RET_UINT32)
 	movq	%rax, (%rdi)
 	ret
 E(L(store_table), UNIX64_RET_SINT8)
-	movsbq	%al, %rax 
-	movq	%rax, (%rdi) 
-	ret 
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
 E(L(store_table), UNIX64_RET_SINT16)
-	movswq	%ax, %rax 
-	movq	%rax, (%rdi) 
-	ret 
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
 E(L(store_table), UNIX64_RET_SINT32)
-	cltq 
-	movq	%rax, (%rdi) 
-	ret 
+	cltq
+	movq	%rax, (%rdi)
+	ret
 E(L(store_table), UNIX64_RET_INT64)
-	movq	%rax, (%rdi) 
-	ret 
+	movq	%rax, (%rdi)
+	ret
 E(L(store_table), UNIX64_RET_XMM32)
 	movd	%xmm0, (%rdi)
-	ret 
+	ret
 E(L(store_table), UNIX64_RET_XMM64)
 	movq	%xmm0, (%rdi)
-	ret 
+	ret
 E(L(store_table), UNIX64_RET_X87)
-	fstpt	(%rdi) 
-	ret 
+	fstpt	(%rdi)
+	ret
 E(L(store_table), UNIX64_RET_X87_2)
 	fstpt	(%rdi)
 	fstpt	16(%rdi)
@@ -178,22 +178,22 @@ E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
 E(L(store_table), UNIX64_RET_ST_RAX_RDX)
 	movq	%rdx, 8(%rsi)
 L(s2):
-	movq	%rax, (%rsi) 
+	movq	%rax, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
-	rep movsb 
-	ret 
+	rep movsb
+	ret
 	.balign 8
 L(s3):
 	movq	%xmm0, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
- 
+
 L(sa):	call	PLT(C(abort))
 
-	/* Many times we can avoid loading any SSE registers at all. 
-	   It's not worth an indirect jump to load the exact set of 
-	   SSE registers needed; zero or all is a good compromise.  */ 
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
 	.balign 2
 L(UW3):
 	/* cfi_restore_state */
@@ -207,24 +207,24 @@ L(load_sse):
 	movdqa	0x90(%r10), %xmm6
 	movdqa	0xa0(%r10), %xmm7
 	jmp	L(ret_from_load_sse)
- 
+
 L(UW4):
 ENDF(C(ffi_call_unix64))
- 
+
 /* 6 general registers, 8 vector registers,
    32 bytes of rvalue, 8 bytes of alignment.  */
 #define ffi_closure_OFS_G	0
 #define ffi_closure_OFS_V	(6*8)
 #define ffi_closure_OFS_RVALUE	(ffi_closure_OFS_V + 8*16)
 #define ffi_closure_FS		(ffi_closure_OFS_RVALUE + 32 + 8)
- 
+
 /* The location of rvalue within the red zone after deallocating the frame.  */
 #define ffi_closure_RED_RVALUE	(ffi_closure_OFS_RVALUE - ffi_closure_FS)
- 
+
 	.balign	2
 	.globl	C(ffi_closure_unix64_sse)
 	FFI_HIDDEN(C(ffi_closure_unix64_sse))
- 
+
 C(ffi_closure_unix64_sse):
 L(UW5):
 	subq	$ffi_closure_FS, %rsp
@@ -276,48 +276,48 @@ L(do_closure):
 	leaq	ffi_closure_FS+8(%rsp), %r9		/* Load argp */
 	call	PLT(C(ffi_closure_unix64_inner))
 
-	/* Deallocate stack frame early; return value is now in redzone.  */ 
+	/* Deallocate stack frame early; return value is now in redzone.  */
 	addq	$ffi_closure_FS, %rsp
 L(UW10):
 	/* cfi_adjust_cfa_offset(-ffi_closure_FS) */
- 
-	/* The first byte of the return value contains the FFI_TYPE.  */ 
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
 	cmpb	$UNIX64_RET_LAST, %al
-	movzbl	%al, %r10d 
+	movzbl	%al, %r10d
 	leaq	L(load_table)(%rip), %r11
 	ja	L(la)
 	leaq	(%r11, %r10, 8), %r10
 	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi
-	jmp	*%r10 
- 
+	jmp	*%r10
+
 	.balign	8
 L(load_table):
 E(L(load_table), UNIX64_RET_VOID)
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_UINT8)
 	movzbl	(%rsi), %eax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_UINT16)
 	movzwl	(%rsi), %eax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_UINT32)
 	movl	(%rsi), %eax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_SINT8)
 	movsbl	(%rsi), %eax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_SINT16)
 	movswl	(%rsi), %eax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_SINT32)
 	movl	(%rsi), %eax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_INT64)
 	movq	(%rsi), %rax
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_XMM32)
 	movd	(%rsi), %xmm0
-	ret 
+	ret
 E(L(load_table), UNIX64_RET_XMM64)
 	movq	(%rsi), %xmm0
 	ret
@@ -346,16 +346,16 @@ L(l2):
 L(l3):
 	movq	(%rsi), %xmm0
 	ret
- 
+
 L(la):	call	PLT(C(abort))
- 
+
 L(UW11):
 ENDF(C(ffi_closure_unix64))
- 
+
 	.balign	2
 	.globl	C(ffi_go_closure_unix64_sse)
 	FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
- 
+
 C(ffi_go_closure_unix64_sse):
 L(UW12):
 	subq	$ffi_closure_FS, %rsp
@@ -396,11 +396,11 @@ L(sse_entry2):
 	movl	4(%r10), %edi		/* Load cif */
 	movl	8(%r10), %esi		/* Load fun */
 	movl	%r10d, %edx		/* Load closure (user_data) */
-#else 
+#else
 	movq	8(%r10), %rdi		/* Load cif */
 	movq	16(%r10), %rsi		/* Load fun */
 	movq	%r10, %rdx		/* Load closure (user_data) */
-#endif 
+#endif
 	jmp	L(do_closure)
 
 L(UW17):
@@ -431,19 +431,19 @@ L(CIE):
 	.set	L(set0),L(ECIE)-L(SCIE)
 	.long	L(set0)			/* CIE Length */
 L(SCIE):
-	.long	0			/* CIE Identifier Tag */ 
-	.byte	1			/* CIE Version */ 
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
 	.ascii	"zR\0"			/* CIE Augmentation */
 	.byte	1			/* CIE Code Alignment Factor */
 	.byte	0x78			/* CIE Data Alignment Factor */
-	.byte	0x10			/* CIE RA Column */ 
+	.byte	0x10			/* CIE RA Column */
 	.byte	1			/* Augmentation size */
-	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */ 
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
 	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp offset 8 */
 	.byte	0x80+16, 1		/* DW_CFA_offset, %rip offset 1*-8 */
 	.balign 8
 L(ECIE):
- 
+
 	.set	L(set1),L(EFDE1)-L(SFDE1)
 	.long	L(set1)			/* FDE Length */
 L(SFDE1):
@@ -455,14 +455,14 @@ L(SFDE1):
 	.byte	0xc, 6, 32		/* DW_CFA_def_cfa, %rbp 32 */
 	.byte	0x80+6, 2		/* DW_CFA_offset, %rbp 2*-8 */
 	ADV(UW2, UW1)
-	.byte	0xa			/* DW_CFA_remember_state */ 
+	.byte	0xa			/* DW_CFA_remember_state */
 	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp 8 */
-	.byte	0xc0+6			/* DW_CFA_restore, %rbp */ 
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
 	ADV(UW3, UW2)
-	.byte	0xb			/* DW_CFA_restore_state */ 
+	.byte	0xb			/* DW_CFA_restore_state */
 	.balign	8
 L(EFDE1):
- 
+
 	.set	L(set2),L(EFDE2)-L(SFDE2)
 	.long	L(set2)			/* FDE Length */
 L(SFDE2):
@@ -475,7 +475,7 @@ L(SFDE2):
 	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
 	.balign	8
 L(EFDE2):
- 
+
 	.set	L(set3),L(EFDE3)-L(SFDE3)
 	.long	L(set3)			/* FDE Length */
 L(SFDE3):
@@ -484,12 +484,12 @@ L(SFDE3):
 	.long	L(UW11)-L(UW8)		/* Address range */
 	.byte	0			/* Augmentation size */
 	ADV(UW9, UW8)
-	.byte	0xe			/* DW_CFA_def_cfa_offset */ 
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
 	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
 	ADV(UW10, UW9)
 	.byte	0xe, 8			/* DW_CFA_def_cfa_offset 8 */
 L(EFDE3):
- 
+
 	.set	L(set4),L(EFDE4)-L(SFDE4)
 	.long	L(set4)			/* FDE Length */
 L(SFDE4):
@@ -498,11 +498,11 @@ L(SFDE4):
 	.long	L(UW14)-L(UW12)		/* Address range */
 	.byte	0			/* Augmentation size */
 	ADV(UW13, UW12)
-	.byte	0xe			/* DW_CFA_def_cfa_offset */ 
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
 	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
 	.balign	8
 L(EFDE4):
- 
+
 	.set	L(set5),L(EFDE5)-L(SFDE5)
 	.long	L(set5)			/* FDE Length */
 L(SFDE5):
@@ -518,7 +518,7 @@ L(EFDE5):
 #ifdef __APPLE__
 	.subsections_via_symbols
 	.section __LD,__compact_unwind,regular,debug
- 
+
 	/* compact unwind for ffi_call_unix64 */
 	.quad    C(ffi_call_unix64)
 	.set     L1,L(UW4)-L(UW0)
@@ -526,7 +526,7 @@ L(EFDE5):
 	.long    0x04000000 /* use dwarf unwind info */
 	.quad    0
 	.quad    0
- 
+
 	/* compact unwind for ffi_closure_unix64_sse */
 	.quad    C(ffi_closure_unix64_sse)
 	.set     L2,L(UW7)-L(UW5)
@@ -560,7 +560,7 @@ L(EFDE5):
 	.quad    0
 #endif
 
-#endif /* __x86_64__ */ 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
+#endif /* __x86_64__ */
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/contrib/restricted/libffi/src/x86/win64.S b/contrib/restricted/libffi/src/x86/win64.S
index ed60453d94..2c334c82f9 100644
--- a/contrib/restricted/libffi/src/x86/win64.S
+++ b/contrib/restricted/libffi/src/x86/win64.S
@@ -1,50 +1,50 @@
 #ifdef __x86_64__
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
 #include <ffi_cfi.h>
 #include "asmnames.h"
- 
+
 #if defined(HAVE_AS_CFI_PSEUDO_OP)
         .cfi_sections   .debug_frame
 #endif
- 
+
 #ifdef X86_WIN64
 #define SEH(...) __VA_ARGS__
 #define arg0	%rcx
 #define arg1	%rdx
 #define arg2	%r8
 #define arg3	%r9
-#else 
+#else
 #define SEH(...)
 #define arg0	%rdi
 #define arg1	%rsi
 #define arg2	%rdx
 #define arg3	%rcx
 #endif
- 
+
 /* This macro allows the safe creation of jump tables without an
    actual table.  The entry points into the table are all 8 bytes.
    The use of ORG asserts that we're at the correct location.  */
 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
 #if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
 # define E(BASE, X)	.balign 8
-#else 
+#else
 # define E(BASE, X)	.balign 8; .org BASE + X * 8
-#endif 
- 
+#endif
+
 	.text
- 
+
 /* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
- 
+
    Bit o trickiness here -- FRAME is the base of the stack frame
    for this function.  This has been allocated by ffi_call.  We also
    deallocate some of the stack that has been alloca'd.  */
- 
+
 	.align	8
 	.globl	C(ffi_call_win64)
 	FFI_HIDDEN(C(ffi_call_win64))
- 
+
 	SEH(.seh_proc ffi_call_win64)
 C(ffi_call_win64):
 	cfi_startproc
@@ -59,9 +59,9 @@ C(ffi_call_win64):
 	SEH(.seh_setframe %rbp, 0)
 	SEH(.seh_endprologue)
 	movq	arg0, %rsp
- 
+
 	movq	arg2, %r10
- 
+
 	/* Load all slots into both general and xmm registers.  */
 	movq	(%rsp), %rcx
 	movsd	(%rsp), %xmm0
@@ -71,9 +71,9 @@ C(ffi_call_win64):
 	movsd	16(%rsp), %xmm2
 	movq	24(%rsp), %r9
 	movsd	24(%rsp), %xmm3
- 
+
 	call	*16(%rbp)
- 
+
 	movl	24(%rbp), %ecx
 	movq	32(%rbp), %r8
 	leaq	0f(%rip), %r10
@@ -81,7 +81,7 @@ C(ffi_call_win64):
 	leaq	(%r10, %rcx, 8), %r10
 	ja	99f
 	jmp	*%r10
- 
+
 /* Below, we're space constrained most of the time.  Thus we eschew the
    modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
 .macro epilogue
@@ -92,7 +92,7 @@ C(ffi_call_win64):
 	ret
 	cfi_restore_state
 .endm
- 
+
 	.align	8
 0:
 E(0b, FFI_TYPE_VOID)
@@ -153,26 +153,26 @@ E(0b, FFI_TYPE_SMALL_STRUCT_2B)
 E(0b, FFI_TYPE_SMALL_STRUCT_4B)
 	movl	%eax, (%r8)
 	epilogue
- 
+
 	.align	8
 99:	call	PLT(C(abort))
- 
+
 	epilogue
- 
+
 	cfi_endproc
 	SEH(.seh_endproc)
- 
- 
+
+
 /* 32 bytes of outgoing register stack space, 8 bytes of alignment,
    16 bytes of result, 32 bytes of xmm registers.  */
 #define ffi_clo_FS	(32+8+16+32)
 #define ffi_clo_OFF_R	(32+8)
 #define ffi_clo_OFF_X	(32+8+16)
- 
+
 	.align	8
 	.globl	C(ffi_go_closure_win64)
 	FFI_HIDDEN(C(ffi_go_closure_win64))
- 
+
 	SEH(.seh_proc ffi_go_closure_win64)
 C(ffi_go_closure_win64):
 	cfi_startproc
@@ -181,18 +181,18 @@ C(ffi_go_closure_win64):
 	movq	%rdx, 16(%rsp)
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
- 
+
 	movq	8(%r10), %rcx			/* load cif */
 	movq	16(%r10), %rdx			/* load fun */
 	movq	%r10, %r8			/* closure is user_data */
 	jmp	0f
 	cfi_endproc
 	SEH(.seh_endproc)
- 
+
 	.align	8
 	.globl	C(ffi_closure_win64)
 	FFI_HIDDEN(C(ffi_closure_win64))
- 
+
 	SEH(.seh_proc ffi_closure_win64)
 C(ffi_closure_win64):
 	cfi_startproc
@@ -201,7 +201,7 @@ C(ffi_closure_win64):
 	movq	%rdx, 16(%rsp)
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
- 
+
 	movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */
 	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */
 	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */
@@ -210,28 +210,28 @@ C(ffi_closure_win64):
 	cfi_adjust_cfa_offset(ffi_clo_FS)
 	SEH(.seh_stackalloc ffi_clo_FS)
 	SEH(.seh_endprologue)
- 
+
 	/* Save all sse arguments into the stack frame.  */
 	movsd	%xmm0, ffi_clo_OFF_X(%rsp)
 	movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
 	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp)
 	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp)
- 
+
 	leaq	ffi_clo_OFF_R(%rsp), %r9
 	call	PLT(C(ffi_closure_win64_inner))
- 
+
 	/* Load the result into both possible result registers.  */
 	movq    ffi_clo_OFF_R(%rsp), %rax
 	movsd   ffi_clo_OFF_R(%rsp), %xmm0
- 
+
 	addq	$ffi_clo_FS, %rsp
 	cfi_adjust_cfa_offset(-ffi_clo_FS)
 	ret
- 
+
 	cfi_endproc
 	SEH(.seh_endproc)
 #endif /* __x86_64__ */
- 
+
 #if defined __ELF__ && defined __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
diff --git a/contrib/restricted/libffi/ya.make b/contrib/restricted/libffi/ya.make
index a5a9ee9381..f39d7b6fe3 100644
--- a/contrib/restricted/libffi/ya.make
+++ b/contrib/restricted/libffi/ya.make
@@ -1,12 +1,12 @@
 # Generated by devtools/yamaker from nixpkgs 5852a21819542e6809f68ba5a798600e69874e76.
 
-LIBRARY() 
- 
+LIBRARY()
+
 OWNER(
     borman
     g:cpp-contrib
 )
- 
+
 VERSION(3.3)
 
 ORIGINAL_SOURCE(https://sourceware.org/pub/libffi/libffi-3.3.tar.gz)
@@ -18,13 +18,13 @@ LICENSE(
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-ADDINCL( 
+ADDINCL(
     contrib/restricted/libffi
     contrib/restricted/libffi/include
     contrib/restricted/libffi/src
     GLOBAL contrib/restricted/libffi/include
-) 
- 
+)
+
 NO_COMPILER_WARNINGS()
 
 NO_RUNTIME()
@@ -34,20 +34,20 @@ CFLAGS(
     GLOBAL -DFFI_BUILDING
 )
 
-SRCS( 
+SRCS(
     src/closures.c
     src/java_raw_api.c
-    src/prep_cif.c 
+    src/prep_cif.c
     src/raw_api.c
-    src/types.c 
-) 
- 
+    src/types.c
+)
+
 IF (ARCH_ARM64 AND OS_ANDROID)
     ADDINCL(
         contrib/restricted/libffi/configs/aarch64-unknown-linux-android21
         GLOBAL contrib/restricted/libffi/configs/aarch64-unknown-linux-android21/include
     )
-    SRCS( 
+    SRCS(
         src/aarch64/ffi.c
         src/aarch64/sysv.S
     )
@@ -201,9 +201,9 @@ ELSEIF (ARCH_X86_64 AND OS_WINDOWS)
     )
 ELSE()
     MESSAGE(FATAL_ERROR Unsupported libffi platform: ${TARGET_PLATFORM} / ${HARDWARE_TYPE})
-ENDIF() 
- 
-END() 
+ENDIF()
+
+END()
 
 RECURSE(
     testsuite
diff --git a/contrib/tools/bison/bison/src/files.c b/contrib/tools/bison/bison/src/files.c
index 24b78214ed..3b219229bc 100644
--- a/contrib/tools/bison/bison/src/files.c
+++ b/contrib/tools/bison/bison/src/files.c
@@ -28,8 +28,8 @@
 #include <quotearg.h>
 #include <stdio-safer.h>
 #include <xstrndup.h>
-#include <stdlib.h> 
-#include <string.h> 
+#include <stdlib.h>
+#include <string.h>
 
 #include "complain.h"
 #include "files.h"
@@ -90,9 +90,9 @@ static char *header_extension = NULL;
 `-----------------------------------------------------------------*/
 
 #if defined _win_ || defined _WIN64 || defined _WIN32 || defined __WIN32__
-char *stpcpy(char *dst, const char *src); 
+char *stpcpy(char *dst, const char *src);
 #endif
- 
+
 static char *
 concat2 (char const *str1, char const *str2)
 {
diff --git a/contrib/tools/bison/bison/src/parse-gram.y b/contrib/tools/bison/bison/src/parse-gram.y
index 48985eb167..8c438dfeda 100644
--- a/contrib/tools/bison/bison/src/parse-gram.y
+++ b/contrib/tools/bison/bison/src/parse-gram.y
@@ -779,7 +779,7 @@ translate_code_braceless (char *code, location loc)
 static void
 add_param (param_type type, char *decl, location loc)
 {
-  static char const alphanum[26 + 26 + 1 + 10 + 1] = 
+  static char const alphanum[26 + 26 + 1 + 10 + 1] =
     "abcdefghijklmnopqrstuvwxyz"
     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     "_"
diff --git a/contrib/tools/bison/bison/src/symtab.c b/contrib/tools/bison/bison/src/symtab.c
index 79aa8489cc..c06fcd2621 100644
--- a/contrib/tools/bison/bison/src/symtab.c
+++ b/contrib/tools/bison/bison/src/symtab.c
@@ -173,7 +173,7 @@ symbol_print (symbol const *s, FILE *f)
 static bool
 is_identifier (uniqstr s)
 {
-  static char const alphanum[26 + 26 + 1 + 10 + 1] = 
+  static char const alphanum[26 + 26 + 1 + 10 + 1] =
     "abcdefghijklmnopqrstuvwxyz"
     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     "_"
diff --git a/contrib/tools/bison/bison/ya.make b/contrib/tools/bison/bison/ya.make
index 3bd5fe644f..04f8ae3758 100644
--- a/contrib/tools/bison/bison/ya.make
+++ b/contrib/tools/bison/bison/ya.make
@@ -14,7 +14,7 @@ IF (NOT MUSL)
 ENDIF()
 
 NO_COMPILER_WARNINGS()
- 
+
 ADDINCLSELF()
 
 SRCS(
@@ -57,13 +57,13 @@ SRCS(
     arcadia_root.cpp.in
 )
 
-CFLAGS( 
-    -Daccept=bison_accept 
+CFLAGS(
+    -Daccept=bison_accept
     -DBISON_DATA_DIR="contrib/tools/bison/bison/data"
-) 
+)
 
-PEERDIR( 
-    contrib/tools/bison/gnulib 
-) 
+PEERDIR(
+    contrib/tools/bison/gnulib
+)
 
 END()
diff --git a/contrib/tools/bison/gnulib/platform/posix/config.h b/contrib/tools/bison/gnulib/platform/posix/config.h
index 31241faf2d..8fa4b4f092 100644
--- a/contrib/tools/bison/gnulib/platform/posix/config.h
+++ b/contrib/tools/bison/gnulib/platform/posix/config.h
@@ -78,7 +78,7 @@
    # define _FORTIFY_SOURCE 2
    #endif
 
- 
+
 /* Define to 1 if the system's ftello function has the Solaris bug. */
 /* #undef FTELLO_BROKEN_AFTER_SWITCHING_FROM_READ_TO_WRITE */
 
@@ -475,19 +475,19 @@
    */
 #define HAVE_DECL_ALARM 1
 
-#if defined(__linux__) 
-    #define HAVE_UNLOCKED_IO 1 
-#else 
-    #define HAVE_UNLOCKED_IO 0 
-#endif 
- 
+#if defined(__linux__)
+    #define HAVE_UNLOCKED_IO 1
+#else
+    #define HAVE_UNLOCKED_IO 0
+#endif
+
 /* Define to 1 if you have the declaration of `clearerr_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_CLEARERR_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_CLEARERR_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_CLEARERR_UNLOCKED 1
+#else
+    #define HAVE_DECL_CLEARERR_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `copysign', and to 0 if you
    don't. */
@@ -503,35 +503,35 @@
 
 /* Define to 1 if you have the declaration of `feof_unlocked', and to 0 if you
    don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FEOF_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FEOF_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FEOF_UNLOCKED 1
+#else
+    #define HAVE_DECL_FEOF_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `ferror_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FERROR_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FERROR_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FERROR_UNLOCKED 1
+#else
+    #define HAVE_DECL_FERROR_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `fflush_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FFLUSH_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FFLUSH_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FFLUSH_UNLOCKED 1
+#else
+    #define HAVE_DECL_FFLUSH_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `fgets_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FGETS_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FGETS_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FGETS_UNLOCKED 1
+#else
+    #define HAVE_DECL_FGETS_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `fpurge', and to 0 if you don't.
    */
@@ -539,27 +539,27 @@
 
 /* Define to 1 if you have the declaration of `fputc_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FPUTC_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FPUTC_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FPUTC_UNLOCKED 1
+#else
+    #define HAVE_DECL_FPUTC_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `fputs_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FPUTS_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FPUTS_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FPUTS_UNLOCKED 1
+#else
+    #define HAVE_DECL_FPUTS_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `fread_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FREAD_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FREAD_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FREAD_UNLOCKED 1
+#else
+    #define HAVE_DECL_FREAD_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `fseeko', and to 0 if you don't.
    */
@@ -571,27 +571,27 @@
 
 /* Define to 1 if you have the declaration of `fwrite_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_FWRITE_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_FWRITE_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_FWRITE_UNLOCKED 1
+#else
+    #define HAVE_DECL_FWRITE_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `getchar_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_GETCHAR_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_GETCHAR_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_GETCHAR_UNLOCKED 1
+#else
+    #define HAVE_DECL_GETCHAR_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `getc_unlocked', and to 0 if you
    don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_GETC_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_GETC_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_GETC_UNLOCKED 1
+#else
+    #define HAVE_DECL_GETC_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `getenv', and to 0 if you don't.
    */
@@ -609,7 +609,7 @@
    don't. */
 /* #undef HAVE_DECL_MBSINIT */
 
-#if defined(__linux__) 
+#if defined(__linux__)
 /* Define to 1 if you have the declaration of `program_invocation_name', and
    to 0 if you don't. */
 #define HAVE_DECL_PROGRAM_INVOCATION_NAME 1
@@ -617,31 +617,31 @@
 /* Define to 1 if you have the declaration of `program_invocation_short_name',
    and to 0 if you don't. */
 #define HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME 1
-#else 
-/* Define to 1 if you have the declaration of `program_invocation_name', and 
-   to 0 if you don't. */ 
-#define HAVE_DECL_PROGRAM_INVOCATION_NAME 0 
-
-/* Define to 1 if you have the declaration of `program_invocation_short_name', 
-   and to 0 if you don't. */ 
-#define HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME 0 
-#endif 
- 
+#else
+/* Define to 1 if you have the declaration of `program_invocation_name', and
+   to 0 if you don't. */
+#define HAVE_DECL_PROGRAM_INVOCATION_NAME 0
+
+/* Define to 1 if you have the declaration of `program_invocation_short_name',
+   and to 0 if you don't. */
+#define HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME 0
+#endif
+
 /* Define to 1 if you have the declaration of `putchar_unlocked', and to 0 if
    you don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_PUTCHAR_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_PUTCHAR_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_PUTCHAR_UNLOCKED 1
+#else
+    #define HAVE_DECL_PUTCHAR_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `putc_unlocked', and to 0 if you
    don't. */
-#if HAVE_UNLOCKED_IO 
-    #define HAVE_DECL_PUTC_UNLOCKED 1 
-#else 
-    #define HAVE_DECL_PUTC_UNLOCKED 0 
-#endif 
+#if HAVE_UNLOCKED_IO
+    #define HAVE_DECL_PUTC_UNLOCKED 1
+#else
+    #define HAVE_DECL_PUTC_UNLOCKED 0
+#endif
 
 /* Define to 1 if you have the declaration of `setenv', and to 0 if you don't.
    */
@@ -681,11 +681,11 @@
 
 /* Define to 1 if you have the declaration of `sys_siglist', and to 0 if you
    don't. */
-#if defined(_musl_) 
-    #define HAVE_DECL_SYS_SIGLIST 0 
-#else 
-    #define HAVE_DECL_SYS_SIGLIST 1 
-#endif 
+#if defined(_musl_)
+    #define HAVE_DECL_SYS_SIGLIST 0
+#else
+    #define HAVE_DECL_SYS_SIGLIST 1
+#endif
 
 /* Define to 1 if you have the declaration of `towlower', and to 0 if you
    don't. */
@@ -728,11 +728,11 @@
 #define HAVE_FCNTL 1
 
 /* Define to 1 if you have the <features.h> header file. */
-#if defined(__linux__) 
-    #define HAVE_FEATURES_H 1 
-#else 
-    #define HAVE_FEATURES_H 0 
-#endif 
+#if defined(__linux__)
+    #define HAVE_FEATURES_H 1
+#else
+    #define HAVE_FEATURES_H 0
+#endif
 
 /* Define to 1 if you have the `fpurge' function. */
 /* #undef HAVE_FPURGE */
@@ -777,16 +777,16 @@
 /* Define to 1 if you have the `isblank' function. */
 #define HAVE_ISBLANK 1
 
-#if defined(_musl_) 
-#define HAVE_ISNAND_IN_LIBC 0 
-#define HAVE_ISNANF_IN_LIBC 0 
-#else 
+#if defined(_musl_)
+#define HAVE_ISNAND_IN_LIBC 0
+#define HAVE_ISNANF_IN_LIBC 0
+#else
 /* Define if the isnan(double) function is available in libc. */
 #define HAVE_ISNAND_IN_LIBC 1
 
 /* Define if the isnan(float) function is available in libc. */
 #define HAVE_ISNANF_IN_LIBC 1
-#endif 
+#endif
 
 /* Define if the isnan(long double) function is available in libc. */
 /* #undef HAVE_ISNANL_IN_LIBC */
@@ -857,9 +857,9 @@
 #define HAVE_MEMORY_H 1
 
 /* Define to 1 if you have the `mempcpy' function. */
-#if defined(__linux__) 
+#if defined(__linux__)
 #define HAVE_MEMPCPY 1
-#endif 
+#endif
 
 /* Define to 1 if you have the `mkdtemp' function. */
 #define HAVE_MKDTEMP 1
@@ -881,9 +881,9 @@
 #define HAVE_NL_LANGINFO 1
 
 /* Define to 1 if libc includes obstacks. */
-#if !defined(_musl_) 
+#if !defined(_musl_)
 #define HAVE_OBSTACK 1
-#endif 
+#endif
 
 /* Define to 1 if you have the `opendir' function. */
 #define HAVE_OPENDIR 1
@@ -1020,10 +1020,10 @@
 
 /* Define to 1 if you have the <stdio_ext.h> header file. */
 #if defined(__FreeBSD__) || defined(__MACH__)
-#define HAVE_STDIO_EXT_H 0 
-#else 
+#define HAVE_STDIO_EXT_H 0
+#else
 #define HAVE_STDIO_EXT_H 1
-#endif 
+#endif
 
 /* Define to 1 if you have the <stdlib.h> header file. */
 #define HAVE_STDLIB_H 1
@@ -1194,26 +1194,26 @@
 /* #undef HAVE__SET_INVALID_PARAMETER_HANDLER */
 
 /* Define to 1 if you have the `__fpurge' function. */
-#if HAVE_STDIO_EXT_H 
-    #define HAVE___FPURGE 1 
-#else 
-    #define HAVE___FPURGE 0 
-#endif 
+#if HAVE_STDIO_EXT_H
+    #define HAVE___FPURGE 1
+#else
+    #define HAVE___FPURGE 0
+#endif
 
-#if defined(_musl_) 
-    /* Define to 1 if you have the `__freadahead' function. */ 
-    #define HAVE___FREADAHEAD 1 
-#endif 
+#if defined(_musl_)
+    /* Define to 1 if you have the `__freadahead' function. */
+    #define HAVE___FREADAHEAD 1
+#endif
 
 /* Define to 1 if you have the `__freading' function. */
-#if HAVE_STDIO_EXT_H 
-    #define HAVE___FREADING 1 
-#else 
-    #define HAVE___FREADING 0 
-#endif 
+#if HAVE_STDIO_EXT_H
+    #define HAVE___FREADING 1
+#else
+    #define HAVE___FREADING 0
+#endif
 
 /* Define to 1 if you have the `__secure_getenv' function. */
-//#define HAVE___SECURE_GETENV 1 
+//#define HAVE___SECURE_GETENV 1
 
 /* Define as the bit index in the word where to find bit 0 of the exponent of
    'long double'. */
@@ -1438,7 +1438,7 @@
 
 /* Define to the prefix of C symbols at the assembler and linker level, either
    an underscore or empty. */
-#define USER_LABEL_PREFIX 
+#define USER_LABEL_PREFIX
 
 /* Define if the POSIX multithreading library can be used. */
 /* #undef USE_POSIX_THREADS */
diff --git a/contrib/tools/bison/gnulib/src/canonicalize-lgpl.c b/contrib/tools/bison/gnulib/src/canonicalize-lgpl.c
index 74a381325e..5cc9c5b4a0 100644
--- a/contrib/tools/bison/gnulib/src/canonicalize-lgpl.c
+++ b/contrib/tools/bison/gnulib/src/canonicalize-lgpl.c
@@ -29,7 +29,7 @@
 /* Specification.  */
 #include <stdlib.h>
 
-#include "palloca.h" 
+#include "palloca.h"
 #include <string.h>
 #include <unistd.h>
 #include <limits.h>
diff --git a/contrib/tools/bison/gnulib/src/execute.c b/contrib/tools/bison/gnulib/src/execute.c
index 7df28682b3..1bb577100e 100644
--- a/contrib/tools/bison/gnulib/src/execute.c
+++ b/contrib/tools/bison/gnulib/src/execute.c
@@ -28,7 +28,7 @@
 #include <signal.h>
 #include <unistd.h>
 
-#include "penviron.h" 
+#include "penviron.h"
 #include "error.h"
 #include "fatal-signal.h"
 #include "wait-process.h"
diff --git a/contrib/tools/bison/gnulib/src/fpending.c b/contrib/tools/bison/gnulib/src/fpending.c
index f6db79f891..1bc4568923 100644
--- a/contrib/tools/bison/gnulib/src/fpending.c
+++ b/contrib/tools/bison/gnulib/src/fpending.c
@@ -26,9 +26,9 @@
 size_t
 __fpending (FILE *fp)
 {
-#if defined(PENDING_OUTPUT_N_BYTES) 
+#if defined(PENDING_OUTPUT_N_BYTES)
   return PENDING_OUTPUT_N_BYTES;
-#endif 
- 
-  return 0; 
+#endif
+
+  return 0;
 }
diff --git a/contrib/tools/bison/gnulib/src/malloca.h b/contrib/tools/bison/gnulib/src/malloca.h
index 0d8cad0582..8ec4465dc7 100644
--- a/contrib/tools/bison/gnulib/src/malloca.h
+++ b/contrib/tools/bison/gnulib/src/malloca.h
@@ -18,7 +18,7 @@
 #ifndef _MALLOCA_H
 #define _MALLOCA_H
 
-#include "palloca.h" 
+#include "palloca.h"
 #include <stddef.h>
 #include <stdlib.h>
 
diff --git a/contrib/tools/bison/gnulib/src/palloca.h b/contrib/tools/bison/gnulib/src/palloca.h
index 08eef69d76..a7ed8a2f9c 100644
--- a/contrib/tools/bison/gnulib/src/palloca.h
+++ b/contrib/tools/bison/gnulib/src/palloca.h
@@ -1,7 +1,7 @@
-#pragma once 
- 
-#if defined(__FreeBSD__) 
-    #include <stdlib.h> 
-#else 
-    #include <alloca.h> 
-#endif 
+#pragma once
+
+#if defined(__FreeBSD__)
+    #include <stdlib.h>
+#else
+    #include <alloca.h>
+#endif
diff --git a/contrib/tools/bison/gnulib/src/penviron.h b/contrib/tools/bison/gnulib/src/penviron.h
index a6e378b888..fd83197378 100644
--- a/contrib/tools/bison/gnulib/src/penviron.h
+++ b/contrib/tools/bison/gnulib/src/penviron.h
@@ -1,5 +1,5 @@
-#pragma once 
- 
+#pragma once
+
 #if defined(__FreeBSD__) || defined(__MACH__)
-    extern char** environ; 
-#endif 
+    extern char** environ;
+#endif
diff --git a/contrib/tools/bison/gnulib/src/regex_internal.h b/contrib/tools/bison/gnulib/src/regex_internal.h
index 61f50d1e70..6205dbe726 100644
--- a/contrib/tools/bison/gnulib/src/regex_internal.h
+++ b/contrib/tools/bison/gnulib/src/regex_internal.h
@@ -482,7 +482,7 @@ static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
 
 #if defined _LIBC || HAVE_ALLOCA
-# include "palloca.h" 
+# include "palloca.h"
 #endif
 
 #ifndef _LIBC
diff --git a/contrib/tools/bison/gnulib/src/spawn-pipe.c b/contrib/tools/bison/gnulib/src/spawn-pipe.c
index e14fad3a07..86b929a7f4 100644
--- a/contrib/tools/bison/gnulib/src/spawn-pipe.c
+++ b/contrib/tools/bison/gnulib/src/spawn-pipe.c
@@ -27,7 +27,7 @@
 #include <signal.h>
 #include <unistd.h>
 
-#include "penviron.h" 
+#include "penviron.h"
 #include "error.h"
 #include "fatal-signal.h"
 #include "unistd-safer.h"
diff --git a/contrib/tools/bison/gnulib/src/spawni.c b/contrib/tools/bison/gnulib/src/spawni.c
index 47c890210b..e2f7b45b60 100644
--- a/contrib/tools/bison/gnulib/src/spawni.c
+++ b/contrib/tools/bison/gnulib/src/spawni.c
@@ -21,7 +21,7 @@
 #include <spawn.h>
 #include "spawn_int.h"
 
-#include "palloca.h" 
+#include "palloca.h"
 #include <errno.h>
 
 #include <fcntl.h>
diff --git a/contrib/tools/bison/gnulib/src/stpcpy.c b/contrib/tools/bison/gnulib/src/stpcpy.c
index 7a262efd04..f5aa8d67e9 100644
--- a/contrib/tools/bison/gnulib/src/stpcpy.c
+++ b/contrib/tools/bison/gnulib/src/stpcpy.c
@@ -21,7 +21,7 @@
 #include <config.h>
 
 #include <string.h>
-#include <stdlib.h> 
+#include <stdlib.h>
 
 #undef __stpcpy
 #if defined(_LIBC) || (defined(__MACH__) && defined(stpcpy))
diff --git a/contrib/tools/bison/gnulib/src/strsignal.c b/contrib/tools/bison/gnulib/src/strsignal.c
index bb2046c92c..20d604ff38 100644
--- a/contrib/tools/bison/gnulib/src/strsignal.c
+++ b/contrib/tools/bison/gnulib/src/strsignal.c
@@ -44,11 +44,11 @@
 # define __libc_key_t gl_tls_key_t
 # define __libc_getspecific(NAME) gl_tls_get ((NAME))
 # define __libc_setspecific(NAME, POINTER) gl_tls_set ((NAME), (POINTER))
-#if defined(_MSC_VER) 
-# define __snprintf _snprintf 
-#else 
+#if defined(_MSC_VER)
+# define __snprintf _snprintf
+#else
 # define __snprintf snprintf
-#endif 
+#endif
 #endif /* _LIBC */
 
 #ifdef _LIBC
diff --git a/contrib/tools/bison/gnulib/src/timevar.c b/contrib/tools/bison/gnulib/src/timevar.c
index 7044e18b9e..a9dbdbdee5 100644
--- a/contrib/tools/bison/gnulib/src/timevar.c
+++ b/contrib/tools/bison/gnulib/src/timevar.c
@@ -28,12 +28,12 @@
 
 #else
 
-#if defined(_musl_) 
-    #define HAVE_SYS_TIMES_H 1 
-    #define HAVE_STRUCT_TMS 1 
-    #define HAVE_CLOCK_T 1 
-#endif 
- 
+#if defined(_musl_)
+    #define HAVE_SYS_TIMES_H 1
+    #define HAVE_STRUCT_TMS 1
+    #define HAVE_CLOCK_T 1
+#endif
+
 /* This source file is taken from the GCC source code, with slight
    modifications that are under control of the IN_GCC preprocessor
    variable.  The !IN_GCC part of this file is specific to Bison.  */
diff --git a/contrib/tools/bison/gnulib/src/vasnprintf.c b/contrib/tools/bison/gnulib/src/vasnprintf.c
index a8b305cd61..4de22819fa 100644
--- a/contrib/tools/bison/gnulib/src/vasnprintf.c
+++ b/contrib/tools/bison/gnulib/src/vasnprintf.c
@@ -54,7 +54,7 @@
 # include <config.h>
 #endif
 #ifndef IN_LIBINTL
-# include "palloca.h" 
+# include "palloca.h"
 #endif
 
 /* Specification.  */
diff --git a/contrib/tools/bison/gnulib/src/xstrndup.c b/contrib/tools/bison/gnulib/src/xstrndup.c
index 9c490ab3e1..741d5a1b05 100644
--- a/contrib/tools/bison/gnulib/src/xstrndup.c
+++ b/contrib/tools/bison/gnulib/src/xstrndup.c
@@ -21,25 +21,25 @@
 #include "xstrndup.h"
 
 #include <string.h>
-#include <stdlib.h> 
- 
+#include <stdlib.h>
+
 #include "xalloc.h"
 
-#if defined(_MSC_VER) 
-static char * 
-strndup(char const *s, size_t n) 
-{ 
-	size_t len = strnlen(s, n); 
-	char *new = malloc(len + 1); 
- 
-	if (new == NULL) 
-		return NULL; 
- 
-	new[len] = '\0'; 
-	return memcpy(new, s, len); 
-} 
-#endif 
- 
+#if defined(_MSC_VER)
+static char *
+strndup(char const *s, size_t n)
+{
+	size_t len = strnlen(s, n);
+	char *new = malloc(len + 1);
+
+	if (new == NULL)
+		return NULL;
+
+	new[len] = '\0';
+	return memcpy(new, s, len);
+}
+#endif
+
 /* Return a newly allocated copy of at most N bytes of STRING.
    In other words, return a copy of the initial segment of length N of
    STRING.  */
diff --git a/contrib/tools/bison/gnulib/src/xvasprintf.c b/contrib/tools/bison/gnulib/src/xvasprintf.c
index fba46653ee..9c93492b80 100644
--- a/contrib/tools/bison/gnulib/src/xvasprintf.c
+++ b/contrib/tools/bison/gnulib/src/xvasprintf.c
@@ -75,10 +75,10 @@ xstrcat (size_t argcount, va_list args)
   return result;
 }
 
-#if defined(_MSC_VER) 
-int vasprintf(char **resultp, const char *format, va_list args); 
-#endif 
- 
+#if defined(_MSC_VER)
+int vasprintf(char **resultp, const char *format, va_list args);
+#endif
+
 char *
 xvasprintf (const char *format, va_list args)
 {
diff --git a/contrib/tools/bison/gnulib/ya.make b/contrib/tools/bison/gnulib/ya.make
index 102942d0ef..b674fd9ada 100644
--- a/contrib/tools/bison/gnulib/ya.make
+++ b/contrib/tools/bison/gnulib/ya.make
@@ -37,14 +37,14 @@ IF (OS_DARWIN)
 ENDIF()
 
 IF (NOT OS_WINDOWS)
-    CFLAGS( 
-        GLOBAL -Dregcomp=gnu_regcomp 
-        GLOBAL -Dregerror=gnu_regerror 
-        GLOBAL -Dregfree=gnu_regfree 
-        GLOBAL -Dregexec=gnu_regexec 
-    ) 
-ENDIF() 
- 
+    CFLAGS(
+        GLOBAL -Dregcomp=gnu_regcomp
+        GLOBAL -Dregerror=gnu_regerror
+        GLOBAL -Dregfree=gnu_regfree
+        GLOBAL -Dregexec=gnu_regexec
+    )
+ENDIF()
+
 SRCS(
     src/abitset.c
     src/argmatch.c
@@ -168,21 +168,21 @@ SRCS(
     src/xvasprintf.c
 )
 
-IF (NOT MUSL) 
-    SRCS( 
-        src/freadahead.c 
-        src/fseterr.c 
+IF (NOT MUSL)
+    SRCS(
+        src/freadahead.c
+        src/fseterr.c
         #        src/fseek.c
-    ) 
+    )
 ENDIF()
- 
+
 IF (NOT OS_LINUX)
-    SRCS( 
-        src/pipe2.c 
-        src/strverscmp.c 
-    ) 
+    SRCS(
+        src/pipe2.c
+        src/strverscmp.c
+    )
 ENDIF()
- 
+
 IF (NOT OS_WINDOWS)
     SRCS(
         src/stdio-write.c
@@ -191,56 +191,56 @@ ENDIF()
 
 IF (OS_WINDOWS)
     SRCS(
-        src/frexp.c 
-        src/wcrtomb.c 
-        src/perror.c 
-        src/strstr.c 
-        src/mkstemp.c 
-        src/vasprintf.c 
-        src/strsignal.c 
-        src/mkdtemp.c 
-        src/fseeko.c 
-        src/fopen.c 
-        src/ftello.c 
-        src/gettimeofday.c 
-        src/localeconv.c 
-        src/msvc-inval.c 
-        src/msvc-nothrow.c 
-        src/open.c 
-        src/sigaction.c 
-        src/sigprocmask.c 
-        src/snprintf.c 
-        src/spawn_faction_addclose.c 
-        src/spawn_faction_adddup2.c 
-        src/spawn_faction_addopen.c 
-        src/spawn_faction_destroy.c 
-        src/spawn_faction_init.c 
-        src/spawnattr_destroy.c 
-        src/spawnattr_init.c 
-        src/spawnattr_setflags.c 
-        src/spawnattr_setsigmask.c 
-        src/spawni.c 
-        src/spawnp.c 
-        src/strndup.c 
-        src/waitpid.c 
-        src/wcwidth.c 
-        src/uniwidth/width.c 
+        src/frexp.c
+        src/wcrtomb.c
+        src/perror.c
+        src/strstr.c
+        src/mkstemp.c
+        src/vasprintf.c
+        src/strsignal.c
+        src/mkdtemp.c
+        src/fseeko.c
+        src/fopen.c
+        src/ftello.c
+        src/gettimeofday.c
+        src/localeconv.c
+        src/msvc-inval.c
+        src/msvc-nothrow.c
+        src/open.c
+        src/sigaction.c
+        src/sigprocmask.c
+        src/snprintf.c
+        src/spawn_faction_addclose.c
+        src/spawn_faction_adddup2.c
+        src/spawn_faction_addopen.c
+        src/spawn_faction_destroy.c
+        src/spawn_faction_init.c
+        src/spawnattr_destroy.c
+        src/spawnattr_init.c
+        src/spawnattr_setflags.c
+        src/spawnattr_setsigmask.c
+        src/spawni.c
+        src/spawnp.c
+        src/strndup.c
+        src/waitpid.c
+        src/wcwidth.c
+        src/uniwidth/width.c
     )
 ENDIF()
 
 IF (NOT OS_LINUX OR MUSL)
-    SRCS( 
-        src/obstack.c 
-        src/obstack_printf.c 
-    ) 
+    SRCS(
+        src/obstack.c
+        src/obstack_printf.c
+    )
 ENDIF()
- 
+
 IF (OS_CYGWIN OR OS_LINUX)
-    #not need it 
+    #not need it
 ELSE()
-    SRCS( 
-        src/fpending.c 
-    ) 
+    SRCS(
+        src/fpending.c
+    )
 ENDIF()
- 
+
 END()
diff --git a/contrib/tools/bison/m4/src/builtin.c b/contrib/tools/bison/m4/src/builtin.c
index c4df2707c4..01ede38017 100644
--- a/contrib/tools/bison/m4/src/builtin.c
+++ b/contrib/tools/bison/m4/src/builtin.c
@@ -27,7 +27,7 @@
 #include "execute.h"
 #include "memchr2.h"
 #include "progname.h"
-#include <contrib/tools/bison/gnulib/src/regex.h> 
+#include <contrib/tools/bison/gnulib/src/regex.h>
 #include "spawn-pipe.h"
 #include "wait-process.h"
 
diff --git a/contrib/tools/bison/m4/src/input.c b/contrib/tools/bison/m4/src/input.c
index d182cf6f61..836d706489 100644
--- a/contrib/tools/bison/m4/src/input.c
+++ b/contrib/tools/bison/m4/src/input.c
@@ -60,7 +60,7 @@
    accordingly.  */
 
 #ifdef ENABLE_CHANGEWORD
-#include <contrib/tools/bison/gnulib/src/regex.h> 
+#include <contrib/tools/bison/gnulib/src/regex.h>
 #endif
 
 enum input_type
diff --git a/contrib/tools/bison/m4/ya.make b/contrib/tools/bison/m4/ya.make
index b46c28d3de..3a54fa2c8f 100644
--- a/contrib/tools/bison/m4/ya.make
+++ b/contrib/tools/bison/m4/ya.make
@@ -6,16 +6,16 @@ LICENSE(GPL-3.0-or-later)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-NO_RUNTIME() 
+NO_RUNTIME()
 
 NO_COMPILER_WARNINGS()
 
-IF (MUSL) 
+IF (MUSL)
     CFLAGS(
         -DO_BINARY=0
     )
-ENDIF() 
- 
+ENDIF()
+
 SRCS(
     src/builtin.c
     src/debug.c
@@ -31,8 +31,8 @@ SRCS(
     src/cpp.cpp
 )
 
-PEERDIR( 
-    contrib/tools/bison/gnulib 
-) 
+PEERDIR(
+    contrib/tools/bison/gnulib
+)
 
 END()
diff --git a/contrib/tools/bison/ya.make b/contrib/tools/bison/ya.make
index eb269d2d08..26f5d96df1 100644
--- a/contrib/tools/bison/ya.make
+++ b/contrib/tools/bison/ya.make
@@ -4,4 +4,4 @@ RECURSE(
     bison
     gnulib
     m4
-) 
+)
diff --git a/contrib/tools/cython/Cython/Build/BuildExecutable.py b/contrib/tools/cython/Cython/Build/BuildExecutable.py
index 7bd27c30db..2db9e5d745 100644
--- a/contrib/tools/cython/Cython/Build/BuildExecutable.py
+++ b/contrib/tools/cython/Cython/Build/BuildExecutable.py
@@ -1,142 +1,142 @@
-""" 
-Compile a Python script into an executable that embeds CPython and run it. 
-Requires CPython to be built as a shared library ('libpythonX.Y'). 
- 
-Basic usage: 
- 
-    python cythonrun somefile.py [ARGS] 
-""" 
- 
-from __future__ import absolute_import 
- 
-DEBUG = True 
- 
-import sys 
-import os 
-from distutils import sysconfig 
- 
- 
-def get_config_var(name, default=''): 
-    return sysconfig.get_config_var(name) or default 
- 
-INCDIR = sysconfig.get_python_inc() 
-LIBDIR1 = get_config_var('LIBDIR') 
-LIBDIR2 = get_config_var('LIBPL') 
-PYLIB = get_config_var('LIBRARY') 
-PYLIB_DYN = get_config_var('LDLIBRARY') 
-if PYLIB_DYN == PYLIB: 
-    # no shared library 
-    PYLIB_DYN = '' 
-else: 
-    PYLIB_DYN = os.path.splitext(PYLIB_DYN[3:])[0] # 'lib(XYZ).so' -> XYZ 
- 
-CC = get_config_var('CC', os.environ.get('CC', '')) 
-CFLAGS = get_config_var('CFLAGS') + ' ' + os.environ.get('CFLAGS', '') 
-LINKCC = get_config_var('LINKCC', os.environ.get('LINKCC', CC)) 
-LINKFORSHARED = get_config_var('LINKFORSHARED') 
-LIBS = get_config_var('LIBS') 
-SYSLIBS = get_config_var('SYSLIBS') 
-EXE_EXT = sysconfig.get_config_var('EXE') 
- 
-def _debug(msg, *args): 
-    if DEBUG: 
-        if args: 
-            msg = msg % args 
-        sys.stderr.write(msg + '\n') 
- 
-def dump_config(): 
-    _debug('INCDIR: %s', INCDIR) 
-    _debug('LIBDIR1: %s', LIBDIR1) 
-    _debug('LIBDIR2: %s', LIBDIR2) 
-    _debug('PYLIB: %s', PYLIB) 
-    _debug('PYLIB_DYN: %s', PYLIB_DYN) 
-    _debug('CC: %s', CC) 
-    _debug('CFLAGS: %s', CFLAGS) 
-    _debug('LINKCC: %s', LINKCC) 
-    _debug('LINKFORSHARED: %s', LINKFORSHARED) 
-    _debug('LIBS: %s', LIBS) 
-    _debug('SYSLIBS: %s', SYSLIBS) 
-    _debug('EXE_EXT: %s', EXE_EXT) 
- 
-def runcmd(cmd, shell=True): 
-    if shell: 
-        cmd = ' '.join(cmd) 
-        _debug(cmd) 
-    else: 
-        _debug(' '.join(cmd)) 
- 
-    try: 
-        import subprocess 
-    except ImportError: # Python 2.3 ... 
-        returncode = os.system(cmd) 
-    else: 
-        returncode = subprocess.call(cmd, shell=shell) 
-
-    if returncode: 
-        sys.exit(returncode) 
- 
-def clink(basename): 
-    runcmd([LINKCC, '-o', basename + EXE_EXT, basename+'.o', '-L'+LIBDIR1, '-L'+LIBDIR2] 
-           + [PYLIB_DYN and ('-l'+PYLIB_DYN) or os.path.join(LIBDIR1, PYLIB)] 
-           + LIBS.split() + SYSLIBS.split() + LINKFORSHARED.split()) 
- 
-def ccompile(basename): 
-    runcmd([CC, '-c', '-o', basename+'.o', basename+'.c', '-I' + INCDIR] + CFLAGS.split()) 
- 
-def cycompile(input_file, options=()): 
-    from ..Compiler import Version, CmdLine, Main 
-    options, sources = CmdLine.parse_command_line(list(options or ()) + ['--embed', input_file]) 
-    _debug('Using Cython %s to compile %s', Version.version, input_file) 
-    result = Main.compile(sources, options) 
-    if result.num_errors > 0: 
-        sys.exit(1) 
- 
-def exec_file(program_name, args=()): 
-    runcmd([os.path.abspath(program_name)] + list(args), shell=False) 
- 
-def build(input_file, compiler_args=(), force=False): 
-    """ 
-    Build an executable program from a Cython module. 
- 
-    Returns the name of the executable file. 
-    """ 
-    basename = os.path.splitext(input_file)[0] 
-    exe_file = basename + EXE_EXT 
-    if not force and os.path.abspath(exe_file) == os.path.abspath(input_file): 
-        raise ValueError("Input and output file names are the same, refusing to overwrite") 
-    if (not force and os.path.exists(exe_file) and os.path.exists(input_file) 
-        and os.path.getmtime(input_file) <= os.path.getmtime(exe_file)): 
-        _debug("File is up to date, not regenerating %s", exe_file) 
-        return exe_file 
-    cycompile(input_file, compiler_args) 
-    ccompile(basename) 
-    clink(basename) 
-    return exe_file 
- 
-def build_and_run(args): 
-    """ 
-    Build an executable program from a Cython module and runs it. 
- 
-    Arguments after the module name will be passed verbatimely to the 
-    program. 
-    """ 
-    cy_args = [] 
-    last_arg = None 
-    for i, arg in enumerate(args): 
-        if arg.startswith('-'): 
-            cy_args.append(arg) 
-        elif last_arg in ('-X', '--directive'): 
-            cy_args.append(arg) 
-        else: 
-            input_file = arg 
-            args = args[i+1:] 
-            break 
-        last_arg = arg 
-    else: 
-        raise ValueError('no input file provided') 
- 
-    program_name = build(input_file, cy_args) 
-    exec_file(program_name, args) 
- 
-if __name__ == '__main__': 
-    build_and_run(sys.argv[1:]) 
+"""
+Compile a Python script into an executable that embeds CPython and run it.
+Requires CPython to be built as a shared library ('libpythonX.Y').
+
+Basic usage:
+
+    python cythonrun somefile.py [ARGS]
+"""
+
+from __future__ import absolute_import
+
+DEBUG = True
+
+import sys
+import os
+from distutils import sysconfig
+
+
+def get_config_var(name, default=''):
+    return sysconfig.get_config_var(name) or default
+
+INCDIR = sysconfig.get_python_inc()
+LIBDIR1 = get_config_var('LIBDIR')
+LIBDIR2 = get_config_var('LIBPL')
+PYLIB = get_config_var('LIBRARY')
+PYLIB_DYN = get_config_var('LDLIBRARY')
+if PYLIB_DYN == PYLIB:
+    # no shared library
+    PYLIB_DYN = ''
+else:
+    PYLIB_DYN = os.path.splitext(PYLIB_DYN[3:])[0] # 'lib(XYZ).so' -> XYZ
+
+CC = get_config_var('CC', os.environ.get('CC', ''))
+CFLAGS = get_config_var('CFLAGS') + ' ' + os.environ.get('CFLAGS', '')
+LINKCC = get_config_var('LINKCC', os.environ.get('LINKCC', CC))
+LINKFORSHARED = get_config_var('LINKFORSHARED')
+LIBS = get_config_var('LIBS')
+SYSLIBS = get_config_var('SYSLIBS')
+EXE_EXT = sysconfig.get_config_var('EXE')
+
+def _debug(msg, *args):
+    if DEBUG:
+        if args:
+            msg = msg % args
+        sys.stderr.write(msg + '\n')
+
+def dump_config():
+    _debug('INCDIR: %s', INCDIR)
+    _debug('LIBDIR1: %s', LIBDIR1)
+    _debug('LIBDIR2: %s', LIBDIR2)
+    _debug('PYLIB: %s', PYLIB)
+    _debug('PYLIB_DYN: %s', PYLIB_DYN)
+    _debug('CC: %s', CC)
+    _debug('CFLAGS: %s', CFLAGS)
+    _debug('LINKCC: %s', LINKCC)
+    _debug('LINKFORSHARED: %s', LINKFORSHARED)
+    _debug('LIBS: %s', LIBS)
+    _debug('SYSLIBS: %s', SYSLIBS)
+    _debug('EXE_EXT: %s', EXE_EXT)
+
+def runcmd(cmd, shell=True):
+    if shell:
+        cmd = ' '.join(cmd)
+        _debug(cmd)
+    else:
+        _debug(' '.join(cmd))
+
+    try:
+        import subprocess
+    except ImportError: # Python 2.3 ...
+        returncode = os.system(cmd)
+    else:
+        returncode = subprocess.call(cmd, shell=shell)
+
+    if returncode:
+        sys.exit(returncode)
+
+def clink(basename):
+    runcmd([LINKCC, '-o', basename + EXE_EXT, basename+'.o', '-L'+LIBDIR1, '-L'+LIBDIR2]
+           + [PYLIB_DYN and ('-l'+PYLIB_DYN) or os.path.join(LIBDIR1, PYLIB)]
+           + LIBS.split() + SYSLIBS.split() + LINKFORSHARED.split())
+
+def ccompile(basename):
+    runcmd([CC, '-c', '-o', basename+'.o', basename+'.c', '-I' + INCDIR] + CFLAGS.split())
+
+def cycompile(input_file, options=()):
+    from ..Compiler import Version, CmdLine, Main
+    options, sources = CmdLine.parse_command_line(list(options or ()) + ['--embed', input_file])
+    _debug('Using Cython %s to compile %s', Version.version, input_file)
+    result = Main.compile(sources, options)
+    if result.num_errors > 0:
+        sys.exit(1)
+
+def exec_file(program_name, args=()):
+    runcmd([os.path.abspath(program_name)] + list(args), shell=False)
+
+def build(input_file, compiler_args=(), force=False):
+    """
+    Build an executable program from a Cython module.
+
+    Returns the name of the executable file.
+    """
+    basename = os.path.splitext(input_file)[0]
+    exe_file = basename + EXE_EXT
+    if not force and os.path.abspath(exe_file) == os.path.abspath(input_file):
+        raise ValueError("Input and output file names are the same, refusing to overwrite")
+    if (not force and os.path.exists(exe_file) and os.path.exists(input_file)
+        and os.path.getmtime(input_file) <= os.path.getmtime(exe_file)):
+        _debug("File is up to date, not regenerating %s", exe_file)
+        return exe_file
+    cycompile(input_file, compiler_args)
+    ccompile(basename)
+    clink(basename)
+    return exe_file
+
+def build_and_run(args):
+    """
+    Build an executable program from a Cython module and runs it.
+
+    Arguments after the module name will be passed verbatimely to the
+    program.
+    """
+    cy_args = []
+    last_arg = None
+    for i, arg in enumerate(args):
+        if arg.startswith('-'):
+            cy_args.append(arg)
+        elif last_arg in ('-X', '--directive'):
+            cy_args.append(arg)
+        else:
+            input_file = arg
+            args = args[i+1:]
+            break
+        last_arg = arg
+    else:
+        raise ValueError('no input file provided')
+
+    program_name = build(input_file, cy_args)
+    exec_file(program_name, args)
+
+if __name__ == '__main__':
+    build_and_run(sys.argv[1:])
diff --git a/contrib/tools/cython/Cython/Build/Cythonize.py b/contrib/tools/cython/Cython/Build/Cythonize.py
index 170961e289..c85b6eabab 100644
--- a/contrib/tools/cython/Cython/Build/Cythonize.py
+++ b/contrib/tools/cython/Cython/Build/Cythonize.py
@@ -1,65 +1,65 @@
-#!/usr/bin/env python 
- 
-from __future__ import absolute_import 
- 
-import os 
-import shutil 
-import tempfile 
-from distutils.core import setup 
- 
-from .Dependencies import cythonize, extended_iglob 
-from ..Utils import is_package_dir 
-from ..Compiler import Options 
- 
-try: 
-    import multiprocessing 
-    parallel_compiles = int(multiprocessing.cpu_count() * 1.5) 
-except ImportError: 
-    multiprocessing = None 
-    parallel_compiles = 0 
- 
- 
-class _FakePool(object): 
-    def map_async(self, func, args): 
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+
+import os
+import shutil
+import tempfile
+from distutils.core import setup
+
+from .Dependencies import cythonize, extended_iglob
+from ..Utils import is_package_dir
+from ..Compiler import Options
+
+try:
+    import multiprocessing
+    parallel_compiles = int(multiprocessing.cpu_count() * 1.5)
+except ImportError:
+    multiprocessing = None
+    parallel_compiles = 0
+
+
+class _FakePool(object):
+    def map_async(self, func, args):
         try:
             from itertools import imap
         except ImportError:
             imap=map
-        for _ in imap(func, args): 
-            pass 
- 
+        for _ in imap(func, args):
+            pass
+
     def close(self):
         pass
- 
+
     def terminate(self):
         pass
- 
+
     def join(self):
         pass
 
 
-def parse_directives(option, name, value, parser): 
-    dest = option.dest 
-    old_directives = dict(getattr(parser.values, dest, 
+def parse_directives(option, name, value, parser):
+    dest = option.dest
+    old_directives = dict(getattr(parser.values, dest,
                                   Options.get_directive_defaults()))
-    directives = Options.parse_directive_list( 
-        value, relaxed_bool=True, current_settings=old_directives) 
-    setattr(parser.values, dest, directives) 
- 
- 
-def parse_options(option, name, value, parser): 
-    dest = option.dest 
-    options = dict(getattr(parser.values, dest, {})) 
-    for opt in value.split(','): 
-        if '=' in opt: 
-            n, v = opt.split('=', 1) 
-            v = v.lower() not in ('false', 'f', '0', 'no') 
-        else: 
-            n, v = opt, True 
-        options[n] = v 
-    setattr(parser.values, dest, options) 
- 
- 
+    directives = Options.parse_directive_list(
+        value, relaxed_bool=True, current_settings=old_directives)
+    setattr(parser.values, dest, directives)
+
+
+def parse_options(option, name, value, parser):
+    dest = option.dest
+    options = dict(getattr(parser.values, dest, {}))
+    for opt in value.split(','):
+        if '=' in opt:
+            n, v = opt.split('=', 1)
+            v = v.lower() not in ('false', 'f', '0', 'no')
+        else:
+            n, v = opt, True
+        options[n] = v
+    setattr(parser.values, dest, options)
+
+
 def parse_compile_time_env(option, name, value, parser):
     dest = option.dest
     old_env = dict(getattr(parser.values, dest, {}))
@@ -67,96 +67,96 @@ def parse_compile_time_env(option, name, value, parser):
     setattr(parser.values, dest, new_env)
 
 
-def find_package_base(path): 
-    base_dir, package_path = os.path.split(path) 
-    while os.path.isfile(os.path.join(base_dir, '__init__.py')): 
-        base_dir, parent = os.path.split(base_dir) 
-        package_path = '%s/%s' % (parent, package_path) 
-    return base_dir, package_path 
- 
- 
-def cython_compile(path_pattern, options): 
-    pool = None 
+def find_package_base(path):
+    base_dir, package_path = os.path.split(path)
+    while os.path.isfile(os.path.join(base_dir, '__init__.py')):
+        base_dir, parent = os.path.split(base_dir)
+        package_path = '%s/%s' % (parent, package_path)
+    return base_dir, package_path
+
+
+def cython_compile(path_pattern, options):
+    pool = None
     all_paths = map(os.path.abspath, extended_iglob(path_pattern))
-    try: 
+    try:
         for path in all_paths:
-            if options.build_inplace: 
-                base_dir = path 
-                while not os.path.isdir(base_dir) or is_package_dir(base_dir): 
-                    base_dir = os.path.dirname(base_dir) 
-            else: 
-                base_dir = None 
- 
-            if os.path.isdir(path): 
-                # recursively compiling a package 
+            if options.build_inplace:
+                base_dir = path
+                while not os.path.isdir(base_dir) or is_package_dir(base_dir):
+                    base_dir = os.path.dirname(base_dir)
+            else:
+                base_dir = None
+
+            if os.path.isdir(path):
+                # recursively compiling a package
                 paths = [os.path.join(path, '**', '*.{py,pyx}')]
-            else: 
-                # assume it's a file(-like thing) 
-                paths = [path] 
- 
-            ext_modules = cythonize( 
-                paths, 
-                nthreads=options.parallel, 
-                exclude_failures=options.keep_going, 
-                exclude=options.excludes, 
-                compiler_directives=options.directives, 
+            else:
+                # assume it's a file(-like thing)
+                paths = [path]
+
+            ext_modules = cythonize(
+                paths,
+                nthreads=options.parallel,
+                exclude_failures=options.keep_going,
+                exclude=options.excludes,
+                compiler_directives=options.directives,
                 compile_time_env=options.compile_time_env,
-                force=options.force, 
-                quiet=options.quiet, 
+                force=options.force,
+                quiet=options.quiet,
                 depfile=options.depfile,
-                **options.options) 
- 
-            if ext_modules and options.build: 
-                if len(ext_modules) > 1 and options.parallel > 1: 
-                    if pool is None: 
-                        try: 
-                            pool = multiprocessing.Pool(options.parallel) 
-                        except OSError: 
-                            pool = _FakePool() 
-                    pool.map_async(run_distutils, [ 
-                        (base_dir, [ext]) for ext in ext_modules]) 
-                else: 
-                    run_distutils((base_dir, ext_modules)) 
-    except: 
-        if pool is not None: 
-            pool.terminate() 
-        raise 
-    else: 
-        if pool is not None: 
-            pool.close() 
-            pool.join() 
- 
- 
-def run_distutils(args): 
-    base_dir, ext_modules = args 
-    script_args = ['build_ext', '-i'] 
-    cwd = os.getcwd() 
-    temp_dir = None 
-    try: 
-        if base_dir: 
-            os.chdir(base_dir) 
-            temp_dir = tempfile.mkdtemp(dir=base_dir) 
-            script_args.extend(['--build-temp', temp_dir]) 
-        setup( 
-            script_name='setup.py', 
-            script_args=script_args, 
-            ext_modules=ext_modules, 
-        ) 
-    finally: 
-        if base_dir: 
-            os.chdir(cwd) 
-            if temp_dir and os.path.isdir(temp_dir): 
-                shutil.rmtree(temp_dir) 
- 
- 
-def parse_args(args): 
-    from optparse import OptionParser 
-    parser = OptionParser(usage='%prog [options] [sources and packages]+') 
- 
+                **options.options)
+
+            if ext_modules and options.build:
+                if len(ext_modules) > 1 and options.parallel > 1:
+                    if pool is None:
+                        try:
+                            pool = multiprocessing.Pool(options.parallel)
+                        except OSError:
+                            pool = _FakePool()
+                    pool.map_async(run_distutils, [
+                        (base_dir, [ext]) for ext in ext_modules])
+                else:
+                    run_distutils((base_dir, ext_modules))
+    except:
+        if pool is not None:
+            pool.terminate()
+        raise
+    else:
+        if pool is not None:
+            pool.close()
+            pool.join()
+
+
+def run_distutils(args):
+    base_dir, ext_modules = args
+    script_args = ['build_ext', '-i']
+    cwd = os.getcwd()
+    temp_dir = None
+    try:
+        if base_dir:
+            os.chdir(base_dir)
+            temp_dir = tempfile.mkdtemp(dir=base_dir)
+            script_args.extend(['--build-temp', temp_dir])
+        setup(
+            script_name='setup.py',
+            script_args=script_args,
+            ext_modules=ext_modules,
+        )
+    finally:
+        if base_dir:
+            os.chdir(cwd)
+            if temp_dir and os.path.isdir(temp_dir):
+                shutil.rmtree(temp_dir)
+
+
+def parse_args(args):
+    from optparse import OptionParser
+    parser = OptionParser(usage='%prog [options] [sources and packages]+')
+
     parser.add_option('-X', '--directive', metavar='NAME=VALUE,...',
                       dest='directives', default={}, type="str",
                       action='callback', callback=parse_directives,
-                      help='set a compiler directive') 
+                      help='set a compiler directive')
     parser.add_option('-E', '--compile-time-env', metavar='NAME=VALUE,...',
                       dest='compile_time_env', default={}, type="str",
                       action='callback', callback=parse_compile_time_env,
@@ -164,66 +164,66 @@ def parse_args(args):
     parser.add_option('-s', '--option', metavar='NAME=VALUE',
                       dest='options', default={}, type="str",
                       action='callback', callback=parse_options,
-                      help='set a cythonize option') 
+                      help='set a cythonize option')
     parser.add_option('-2', dest='language_level', action='store_const', const=2, default=None,
                       help='use Python 2 syntax mode by default')
     parser.add_option('-3', dest='language_level', action='store_const', const=3,
-                      help='use Python 3 syntax mode by default') 
+                      help='use Python 3 syntax mode by default')
     parser.add_option('--3str', dest='language_level', action='store_const', const='3str',
                       help='use Python 3 syntax mode by default')
     parser.add_option('-a', '--annotate', dest='annotate', action='store_true',
                       help='generate annotated HTML page for source files')
- 
-    parser.add_option('-x', '--exclude', metavar='PATTERN', dest='excludes', 
-                      action='append', default=[], 
-                      help='exclude certain file patterns from the compilation') 
- 
-    parser.add_option('-b', '--build', dest='build', action='store_true', 
-                      help='build extension modules using distutils') 
-    parser.add_option('-i', '--inplace', dest='build_inplace', action='store_true', 
-                      help='build extension modules in place using distutils (implies -b)') 
-    parser.add_option('-j', '--parallel', dest='parallel', metavar='N', 
-                      type=int, default=parallel_compiles, 
-                      help=('run builds in N parallel jobs (default: %d)' % 
-                            parallel_compiles or 1)) 
-    parser.add_option('-f', '--force', dest='force', action='store_true', 
-                      help='force recompilation') 
-    parser.add_option('-q', '--quiet', dest='quiet', action='store_true', 
-                      help='be less verbose during compilation') 
- 
-    parser.add_option('--lenient', dest='lenient', action='store_true', 
-                      help='increase Python compatibility by ignoring some compile time errors') 
-    parser.add_option('-k', '--keep-going', dest='keep_going', action='store_true', 
-                      help='compile as much as possible, ignore compilation failures') 
+
+    parser.add_option('-x', '--exclude', metavar='PATTERN', dest='excludes',
+                      action='append', default=[],
+                      help='exclude certain file patterns from the compilation')
+
+    parser.add_option('-b', '--build', dest='build', action='store_true',
+                      help='build extension modules using distutils')
+    parser.add_option('-i', '--inplace', dest='build_inplace', action='store_true',
+                      help='build extension modules in place using distutils (implies -b)')
+    parser.add_option('-j', '--parallel', dest='parallel', metavar='N',
+                      type=int, default=parallel_compiles,
+                      help=('run builds in N parallel jobs (default: %d)' %
+                            parallel_compiles or 1))
+    parser.add_option('-f', '--force', dest='force', action='store_true',
+                      help='force recompilation')
+    parser.add_option('-q', '--quiet', dest='quiet', action='store_true',
+                      help='be less verbose during compilation')
+
+    parser.add_option('--lenient', dest='lenient', action='store_true',
+                      help='increase Python compatibility by ignoring some compile time errors')
+    parser.add_option('-k', '--keep-going', dest='keep_going', action='store_true',
+                      help='compile as much as possible, ignore compilation failures')
     parser.add_option('-M', '--depfile', action='store_true', help='produce depfiles for the sources')
- 
-    options, args = parser.parse_args(args) 
-    if not args: 
-        parser.error("no source files provided") 
-    if options.build_inplace: 
-        options.build = True 
-    if multiprocessing is None: 
-        options.parallel = 0 
+
+    options, args = parser.parse_args(args)
+    if not args:
+        parser.error("no source files provided")
+    if options.build_inplace:
+        options.build = True
+    if multiprocessing is None:
+        options.parallel = 0
     if options.language_level:
         assert options.language_level in (2, 3, '3str')
         options.options['language_level'] = options.language_level
-    return options, args 
- 
- 
-def main(args=None): 
-    options, paths = parse_args(args) 
- 
-    if options.lenient: 
-        # increase Python compatibility by ignoring compile time errors 
-        Options.error_on_unknown_names = False 
-        Options.error_on_uninitialized = False 
- 
+    return options, args
+
+
+def main(args=None):
+    options, paths = parse_args(args)
+
+    if options.lenient:
+        # increase Python compatibility by ignoring compile time errors
+        Options.error_on_unknown_names = False
+        Options.error_on_uninitialized = False
+
     if options.annotate:
         Options.annotate = True
 
-    for path in paths: 
-        cython_compile(path, options) 
- 
- 
-if __name__ == '__main__': 
-    main() 
+    for path in paths:
+        cython_compile(path, options)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/contrib/tools/cython/Cython/Build/Dependencies.py b/contrib/tools/cython/Cython/Build/Dependencies.py
index d55ee7cb88..7eb55e2607 100644
--- a/contrib/tools/cython/Cython/Build/Dependencies.py
+++ b/contrib/tools/cython/Cython/Build/Dependencies.py
@@ -1,8 +1,8 @@
 from __future__ import absolute_import, print_function
- 
-import cython 
-from .. import __version__ 
- 
+
+import cython
+from .. import __version__
+
 import collections
 import contextlib
 import hashlib
@@ -11,27 +11,27 @@ import shutil
 import subprocess
 import re, sys, time
 import warnings
-from glob import iglob 
+from glob import iglob
 from io import open as io_open
 from os.path import relpath as _relpath
 from distutils.extension import Extension
 from distutils.util import strtobool
 import zipfile
- 
-try: 
+
+try:
     from collections.abc import Iterable
 except ImportError:
     from collections import Iterable
 
 try:
-    import gzip 
-    gzip_open = gzip.open 
-    gzip_ext = '.gz' 
-except ImportError: 
-    gzip_open = open 
-    gzip_ext = '' 
- 
-try: 
+    import gzip
+    gzip_open = gzip.open
+    gzip_ext = '.gz'
+except ImportError:
+    gzip_open = open
+    gzip_ext = ''
+
+try:
     import zlib
     zipfile_compression_mode = zipfile.ZIP_DEFLATED
 except ImportError:
@@ -41,30 +41,30 @@ try:
     import pythran
 except:
     pythran = None
- 
-from .. import Utils 
+
+from .. import Utils
 from ..Utils import (cached_function, cached_method, path_exists,
     safe_makedirs, copy_file_to_dir_if_newer, is_package_dir, replace_suffix)
-from ..Compiler.Main import Context, CompilationOptions, default_options 
- 
-join_path = cached_function(os.path.join) 
+from ..Compiler.Main import Context, CompilationOptions, default_options
+
+join_path = cached_function(os.path.join)
 copy_once_if_newer = cached_function(copy_file_to_dir_if_newer)
 safe_makedirs_once = cached_function(safe_makedirs)
- 
-if sys.version_info[0] < 3: 
-    # stupid Py2 distutils enforces str type in list of sources 
-    _fs_encoding = sys.getfilesystemencoding() 
-    if _fs_encoding is None: 
-        _fs_encoding = sys.getdefaultencoding() 
-    def encode_filename_in_py2(filename): 
+
+if sys.version_info[0] < 3:
+    # stupid Py2 distutils enforces str type in list of sources
+    _fs_encoding = sys.getfilesystemencoding()
+    if _fs_encoding is None:
+        _fs_encoding = sys.getdefaultencoding()
+    def encode_filename_in_py2(filename):
         if not isinstance(filename, bytes):
-            return filename.encode(_fs_encoding) 
-        return filename 
-else: 
-    def encode_filename_in_py2(filename): 
-        return filename 
-    basestring = str 
- 
+            return filename.encode(_fs_encoding)
+        return filename
+else:
+    def encode_filename_in_py2(filename):
+        return filename
+    basestring = str
+
 
 def _make_relative(file_paths, base=None):
     if not base:
@@ -75,7 +75,7 @@ def _make_relative(file_paths, base=None):
             for path in file_paths]
 
 
-def extended_iglob(pattern): 
+def extended_iglob(pattern):
     if '{' in pattern:
         m = re.match('(.*){([^}]+)}(.*)', pattern)
         if m:
@@ -84,26 +84,26 @@ def extended_iglob(pattern):
                 for path in extended_iglob(before + case + after):
                     yield path
             return
-    if '**/' in pattern: 
-        seen = set() 
-        first, rest = pattern.split('**/', 1) 
-        if first: 
-            first = iglob(first+'/') 
-        else: 
-            first = [''] 
-        for root in first: 
-            for path in extended_iglob(join_path(root, rest)): 
-                if path not in seen: 
-                    seen.add(path) 
-                    yield path 
-            for path in extended_iglob(join_path(root, '*', '**/' + rest)): 
-                if path not in seen: 
-                    seen.add(path) 
-                    yield path 
-    else: 
-        for path in iglob(pattern): 
-            yield path 
- 
+    if '**/' in pattern:
+        seen = set()
+        first, rest = pattern.split('**/', 1)
+        if first:
+            first = iglob(first+'/')
+        else:
+            first = ['']
+        for root in first:
+            for path in extended_iglob(join_path(root, rest)):
+                if path not in seen:
+                    seen.add(path)
+                    yield path
+            for path in extended_iglob(join_path(root, '*', '**/' + rest)):
+                if path not in seen:
+                    seen.add(path)
+                    yield path
+    else:
+        for path in iglob(pattern):
+            yield path
+
 
 def nonempty(it, error_msg="expected non-empty iterator"):
     empty = True
@@ -114,18 +114,18 @@ def nonempty(it, error_msg="expected non-empty iterator"):
         raise ValueError(error_msg)
 
 
-@cached_function 
-def file_hash(filename): 
+@cached_function
+def file_hash(filename):
     path = os.path.normpath(filename)
     prefix = ('%d:%s' % (len(path), path)).encode("UTF-8")
     m = hashlib.md5(prefix)
     with open(path, 'rb') as f:
-        data = f.read(65000) 
-        while data: 
-            m.update(data) 
-            data = f.read(65000) 
-    return m.hexdigest() 
- 
+        data = f.read(65000)
+        while data:
+            m.update(data)
+            data = f.read(65000)
+    return m.hexdigest()
+
 
 def update_pythran_extension(ext):
     if pythran is None:
@@ -152,250 +152,250 @@ def update_pythran_extension(ext):
             pass
 
 
-def parse_list(s): 
-    """ 
+def parse_list(s):
+    """
     >>> parse_list("")
     []
     >>> parse_list("a")
     ['a']
-    >>> parse_list("a b c") 
-    ['a', 'b', 'c'] 
-    >>> parse_list("[a, b, c]") 
-    ['a', 'b', 'c'] 
-    >>> parse_list('a " " b') 
-    ['a', ' ', 'b'] 
-    >>> parse_list('[a, ",a", "a,", ",", ]') 
-    ['a', ',a', 'a,', ','] 
-    """ 
+    >>> parse_list("a b c")
+    ['a', 'b', 'c']
+    >>> parse_list("[a, b, c]")
+    ['a', 'b', 'c']
+    >>> parse_list('a " " b')
+    ['a', ' ', 'b']
+    >>> parse_list('[a, ",a", "a,", ",", ]')
+    ['a', ',a', 'a,', ',']
+    """
     if len(s) >= 2 and s[0] == '[' and s[-1] == ']':
-        s = s[1:-1] 
-        delimiter = ',' 
-    else: 
-        delimiter = ' ' 
-    s, literals = strip_string_literals(s) 
-    def unquote(literal): 
-        literal = literal.strip() 
-        if literal[0] in "'\"": 
-            return literals[literal[1:-1]] 
-        else: 
-            return literal 
-    return [unquote(item) for item in s.split(delimiter) if item.strip()] 
- 
-
-transitive_str = object() 
-transitive_list = object() 
+        s = s[1:-1]
+        delimiter = ','
+    else:
+        delimiter = ' '
+    s, literals = strip_string_literals(s)
+    def unquote(literal):
+        literal = literal.strip()
+        if literal[0] in "'\"":
+            return literals[literal[1:-1]]
+        else:
+            return literal
+    return [unquote(item) for item in s.split(delimiter) if item.strip()]
+
+
+transitive_str = object()
+transitive_list = object()
 bool_or = object()
- 
-distutils_settings = { 
-    'name':                 str, 
-    'sources':              list, 
-    'define_macros':        list, 
-    'undef_macros':         list, 
-    'libraries':            transitive_list, 
-    'library_dirs':         transitive_list, 
-    'runtime_library_dirs': transitive_list, 
-    'include_dirs':         transitive_list, 
-    'extra_objects':        list, 
-    'extra_compile_args':   transitive_list, 
-    'extra_link_args':      transitive_list, 
-    'export_symbols':       list, 
-    'depends':              transitive_list, 
-    'language':             transitive_str, 
+
+distutils_settings = {
+    'name':                 str,
+    'sources':              list,
+    'define_macros':        list,
+    'undef_macros':         list,
+    'libraries':            transitive_list,
+    'library_dirs':         transitive_list,
+    'runtime_library_dirs': transitive_list,
+    'include_dirs':         transitive_list,
+    'extra_objects':        list,
+    'extra_compile_args':   transitive_list,
+    'extra_link_args':      transitive_list,
+    'export_symbols':       list,
+    'depends':              transitive_list,
+    'language':             transitive_str,
     'np_pythran':           bool_or
-} 
- 
+}
+
 
 @cython.locals(start=cython.Py_ssize_t, end=cython.Py_ssize_t)
-def line_iter(source): 
-    if isinstance(source, basestring): 
-        start = 0 
-        while True: 
-            end = source.find('\n', start) 
-            if end == -1: 
-                yield source[start:] 
-                return 
-            yield source[start:end] 
-            start = end+1 
-    else: 
-        for line in source: 
-            yield line 
- 
-
-class DistutilsInfo(object): 
- 
-    def __init__(self, source=None, exn=None): 
-        self.values = {} 
-        if source is not None: 
-            for line in line_iter(source): 
+def line_iter(source):
+    if isinstance(source, basestring):
+        start = 0
+        while True:
+            end = source.find('\n', start)
+            if end == -1:
+                yield source[start:]
+                return
+            yield source[start:end]
+            start = end+1
+    else:
+        for line in source:
+            yield line
+
+
+class DistutilsInfo(object):
+
+    def __init__(self, source=None, exn=None):
+        self.values = {}
+        if source is not None:
+            for line in line_iter(source):
                 line = line.lstrip()
                 if not line:
                     continue
                 if line[0] != '#':
-                    break 
+                    break
                 line = line[1:].lstrip()
                 kind = next((k for k in ("distutils:","cython:") if line.startswith(k)), None)
                 if kind is not None:
                     key, _, value = [s.strip() for s in line[len(kind):].partition('=')]
                     type = distutils_settings.get(key, None)
                     if line.startswith("cython:") and type is None: continue
-                    if type in (list, transitive_list): 
-                        value = parse_list(value) 
-                        if key == 'define_macros': 
+                    if type in (list, transitive_list):
+                        value = parse_list(value)
+                        if key == 'define_macros':
                             value = [tuple(macro.split('=', 1))
                                      if '=' in macro else (macro, None)
                                      for macro in value]
                     if type is bool_or:
                         value = strtobool(value)
-                    self.values[key] = value 
-        elif exn is not None: 
-            for key in distutils_settings: 
+                    self.values[key] = value
+        elif exn is not None:
+            for key in distutils_settings:
                 if key in ('name', 'sources','np_pythran'):
-                    continue 
-                value = getattr(exn, key, None) 
-                if value: 
-                    self.values[key] = value 
- 
-    def merge(self, other): 
-        if other is None: 
-            return self 
-        for key, value in other.values.items(): 
-            type = distutils_settings[key] 
-            if type is transitive_str and key not in self.values: 
-                self.values[key] = value 
-            elif type is transitive_list: 
-                if key in self.values: 
+                    continue
+                value = getattr(exn, key, None)
+                if value:
+                    self.values[key] = value
+
+    def merge(self, other):
+        if other is None:
+            return self
+        for key, value in other.values.items():
+            type = distutils_settings[key]
+            if type is transitive_str and key not in self.values:
+                self.values[key] = value
+            elif type is transitive_list:
+                if key in self.values:
                     # Change a *copy* of the list (Trac #845)
                     all = self.values[key][:]
-                    for v in value: 
-                        if v not in all: 
-                            all.append(v) 
+                    for v in value:
+                        if v not in all:
+                            all.append(v)
                     value = all
                 self.values[key] = value
             elif type is bool_or:
                 self.values[key] = self.values.get(key, False) | value
-        return self 
- 
-    def subs(self, aliases): 
-        if aliases is None: 
-            return self 
-        resolved = DistutilsInfo() 
-        for key, value in self.values.items(): 
-            type = distutils_settings[key] 
-            if type in [list, transitive_list]: 
-                new_value_list = [] 
-                for v in value: 
-                    if v in aliases: 
-                        v = aliases[v] 
-                    if isinstance(v, list): 
-                        new_value_list += v 
-                    else: 
-                        new_value_list.append(v) 
-                value = new_value_list 
-            else: 
-                if value in aliases: 
-                    value = aliases[value] 
-            resolved.values[key] = value 
-        return resolved 
- 
-    def apply(self, extension): 
-        for key, value in self.values.items(): 
-            type = distutils_settings[key] 
-            if type in [list, transitive_list]: 
+        return self
+
+    def subs(self, aliases):
+        if aliases is None:
+            return self
+        resolved = DistutilsInfo()
+        for key, value in self.values.items():
+            type = distutils_settings[key]
+            if type in [list, transitive_list]:
+                new_value_list = []
+                for v in value:
+                    if v in aliases:
+                        v = aliases[v]
+                    if isinstance(v, list):
+                        new_value_list += v
+                    else:
+                        new_value_list.append(v)
+                value = new_value_list
+            else:
+                if value in aliases:
+                    value = aliases[value]
+            resolved.values[key] = value
+        return resolved
+
+    def apply(self, extension):
+        for key, value in self.values.items():
+            type = distutils_settings[key]
+            if type in [list, transitive_list]:
                 value = getattr(extension, key) + list(value)
             setattr(extension, key, value)
- 
+
 
 @cython.locals(start=cython.Py_ssize_t, q=cython.Py_ssize_t,
                single_q=cython.Py_ssize_t, double_q=cython.Py_ssize_t,
                hash_mark=cython.Py_ssize_t, end=cython.Py_ssize_t,
                k=cython.Py_ssize_t, counter=cython.Py_ssize_t, quote_len=cython.Py_ssize_t)
-def strip_string_literals(code, prefix='__Pyx_L'): 
-    """ 
-    Normalizes every string literal to be of the form '__Pyx_Lxxx', 
-    returning the normalized code and a mapping of labels to 
-    string literals. 
-    """ 
-    new_code = [] 
-    literals = {} 
-    counter = 0 
-    start = q = 0 
-    in_quote = False 
-    hash_mark = single_q = double_q = -1 
-    code_len = len(code) 
+def strip_string_literals(code, prefix='__Pyx_L'):
+    """
+    Normalizes every string literal to be of the form '__Pyx_Lxxx',
+    returning the normalized code and a mapping of labels to
+    string literals.
+    """
+    new_code = []
+    literals = {}
+    counter = 0
+    start = q = 0
+    in_quote = False
+    hash_mark = single_q = double_q = -1
+    code_len = len(code)
     quote_type = None
     quote_len = -1
- 
-    while True: 
-        if hash_mark < q: 
-            hash_mark = code.find('#', q) 
-        if single_q < q: 
-            single_q = code.find("'", q) 
-        if double_q < q: 
-            double_q = code.find('"', q) 
-        q = min(single_q, double_q) 
+
+    while True:
+        if hash_mark < q:
+            hash_mark = code.find('#', q)
+        if single_q < q:
+            single_q = code.find("'", q)
+        if double_q < q:
+            double_q = code.find('"', q)
+        q = min(single_q, double_q)
         if q == -1:
             q = max(single_q, double_q)
- 
-        # We're done. 
-        if q == -1 and hash_mark == -1: 
-            new_code.append(code[start:]) 
-            break 
- 
-        # Try to close the quote. 
-        elif in_quote: 
-            if code[q-1] == u'\\': 
-                k = 2 
-                while q >= k and code[q-k] == u'\\': 
-                    k += 1 
-                if k % 2 == 0: 
-                    q += 1 
-                    continue 
+
+        # We're done.
+        if q == -1 and hash_mark == -1:
+            new_code.append(code[start:])
+            break
+
+        # Try to close the quote.
+        elif in_quote:
+            if code[q-1] == u'\\':
+                k = 2
+                while q >= k and code[q-k] == u'\\':
+                    k += 1
+                if k % 2 == 0:
+                    q += 1
+                    continue
             if code[q] == quote_type and (
                     quote_len == 1 or (code_len > q + 2 and quote_type == code[q+1] == code[q+2])):
-                counter += 1 
-                label = "%s%s_" % (prefix, counter) 
-                literals[label] = code[start+quote_len:q] 
-                full_quote = code[q:q+quote_len] 
-                new_code.append(full_quote) 
-                new_code.append(label) 
-                new_code.append(full_quote) 
-                q += quote_len 
-                in_quote = False 
-                start = q 
-            else: 
-                q += 1 
- 
-        # Process comment. 
-        elif -1 != hash_mark and (hash_mark < q or q == -1): 
-            new_code.append(code[start:hash_mark+1]) 
-            end = code.find('\n', hash_mark) 
-            counter += 1 
-            label = "%s%s_" % (prefix, counter) 
-            if end == -1: 
-                end_or_none = None 
-            else: 
-                end_or_none = end 
-            literals[label] = code[hash_mark+1:end_or_none] 
-            new_code.append(label) 
-            if end == -1: 
-                break 
-            start = q = end 
- 
-        # Open the quote. 
-        else: 
-            if code_len >= q+3 and (code[q] == code[q+1] == code[q+2]): 
-                quote_len = 3 
-            else: 
-                quote_len = 1 
-            in_quote = True 
-            quote_type = code[q] 
-            new_code.append(code[start:q]) 
-            start = q 
-            q += quote_len 
- 
-    return "".join(new_code), literals 
- 
- 
+                counter += 1
+                label = "%s%s_" % (prefix, counter)
+                literals[label] = code[start+quote_len:q]
+                full_quote = code[q:q+quote_len]
+                new_code.append(full_quote)
+                new_code.append(label)
+                new_code.append(full_quote)
+                q += quote_len
+                in_quote = False
+                start = q
+            else:
+                q += 1
+
+        # Process comment.
+        elif -1 != hash_mark and (hash_mark < q or q == -1):
+            new_code.append(code[start:hash_mark+1])
+            end = code.find('\n', hash_mark)
+            counter += 1
+            label = "%s%s_" % (prefix, counter)
+            if end == -1:
+                end_or_none = None
+            else:
+                end_or_none = end
+            literals[label] = code[hash_mark+1:end_or_none]
+            new_code.append(label)
+            if end == -1:
+                break
+            start = q = end
+
+        # Open the quote.
+        else:
+            if code_len >= q+3 and (code[q] == code[q+1] == code[q+2]):
+                quote_len = 3
+            else:
+                quote_len = 1
+            in_quote = True
+            quote_type = code[q]
+            new_code.append(code[start:q])
+            start = q
+            q += quote_len
+
+    return "".join(new_code), literals
+
+
 # We need to allow spaces to allow for conditional compilation like
 # IF ...:
 #     cimport ...
@@ -407,14 +407,14 @@ dependency_after_from_regex = re.compile(
     r"(?:^\s+\(([0-9a-zA-Z_., ]*)\)[#\n])|"
     r"(?:^\s+([0-9a-zA-Z_., ]*)[#\n])",
     re.M)
- 
 
-def normalize_existing(base_path, rel_paths): 
-    return normalize_existing0(os.path.dirname(base_path), tuple(set(rel_paths))) 
- 
 
-@cached_function 
-def normalize_existing0(base_dir, rel_paths): 
+def normalize_existing(base_path, rel_paths):
+    return normalize_existing0(os.path.dirname(base_path), tuple(set(rel_paths)))
+
+
+@cached_function
+def normalize_existing0(base_dir, rel_paths):
     """
     Given some base directory ``base_dir`` and a list of path names
     ``rel_paths``, normalize each relative path name ``rel`` by
@@ -426,76 +426,76 @@ def normalize_existing0(base_dir, rel_paths):
     changed (for example, if all paths were already absolute), then
     ``needed_base`` is ``None``.
     """
-    normalized = [] 
+    normalized = []
     needed_base = None
-    for rel in rel_paths: 
+    for rel in rel_paths:
         if os.path.isabs(rel):
             normalized.append(rel)
             continue
-        path = join_path(base_dir, rel) 
-        if path_exists(path): 
-            normalized.append(os.path.normpath(path)) 
+        path = join_path(base_dir, rel)
+        if path_exists(path):
+            normalized.append(os.path.normpath(path))
             needed_base = base_dir
-        else: 
-            normalized.append(rel) 
+        else:
+            normalized.append(rel)
     return (normalized, needed_base)
- 
-
-def resolve_depends(depends, include_dirs): 
-    include_dirs = tuple(include_dirs) 
-    resolved = [] 
-    for depend in depends: 
-        path = resolve_depend(depend, include_dirs) 
-        if path is not None: 
-            resolved.append(path) 
-    return resolved 
- 
-
-@cached_function 
-def resolve_depend(depend, include_dirs): 
-    if depend[0] == '<' and depend[-1] == '>': 
-        return None 
-    for dir in include_dirs: 
-        path = join_path(dir, depend) 
-        if path_exists(path): 
-            return os.path.normpath(path) 
-    return None 
- 
-
-@cached_function 
-def package(filename): 
-    dir = os.path.dirname(os.path.abspath(str(filename))) 
+
+
+def resolve_depends(depends, include_dirs):
+    include_dirs = tuple(include_dirs)
+    resolved = []
+    for depend in depends:
+        path = resolve_depend(depend, include_dirs)
+        if path is not None:
+            resolved.append(path)
+    return resolved
+
+
+@cached_function
+def resolve_depend(depend, include_dirs):
+    if depend[0] == '<' and depend[-1] == '>':
+        return None
+    for dir in include_dirs:
+        path = join_path(dir, depend)
+        if path_exists(path):
+            return os.path.normpath(path)
+    return None
+
+
+@cached_function
+def package(filename):
+    dir = os.path.dirname(os.path.abspath(str(filename)))
     if dir != filename and is_package_dir(dir):
-        return package(dir) + (os.path.basename(dir),) 
-    else: 
-        return () 
- 
-
-@cached_function 
-def fully_qualified_name(filename): 
-    module = os.path.splitext(os.path.basename(filename))[0] 
-    return '.'.join(package(filename) + (module,)) 
- 
- 
-@cached_function 
-def parse_dependencies(source_filename): 
+        return package(dir) + (os.path.basename(dir),)
+    else:
+        return ()
+
+
+@cached_function
+def fully_qualified_name(filename):
+    module = os.path.splitext(os.path.basename(filename))[0]
+    return '.'.join(package(filename) + (module,))
+
+
+@cached_function
+def parse_dependencies(source_filename):
     # Actual parsing is way too slow, so we use regular expressions.
-    # The only catch is that we must strip comments and string 
-    # literals ahead of time. 
+    # The only catch is that we must strip comments and string
+    # literals ahead of time.
     with Utils.open_source_file(source_filename, error_handling='ignore') as fh:
-        source = fh.read() 
-    distutils_info = DistutilsInfo(source) 
-    source, literals = strip_string_literals(source) 
-    source = source.replace('\\\n', ' ').replace('\t', ' ') 
- 
-    # TODO: pure mode 
-    cimports = [] 
-    includes = [] 
-    externs  = [] 
+        source = fh.read()
+    distutils_info = DistutilsInfo(source)
+    source, literals = strip_string_literals(source)
+    source = source.replace('\\\n', ' ').replace('\t', ' ')
+
+    # TODO: pure mode
+    cimports = []
+    includes = []
+    externs  = []
     for m in dependency_regex.finditer(source):
         cimport_from, cimport_list, extern, include = m.groups()
-        if cimport_from: 
-            cimports.append(cimport_from) 
+        if cimport_from:
+            cimports.append(cimport_from)
             m_after_from = dependency_after_from_regex.search(source, pos=m.end())
             if m_after_from:
                 multiline, one_line = m_after_from.groups()
@@ -505,130 +505,130 @@ def parse_dependencies(source_filename):
 
         elif cimport_list:
             cimports.extend(x.strip() for x in cimport_list.split(","))
-        elif extern: 
-            externs.append(literals[extern]) 
-        else: 
-            includes.append(literals[include]) 
-    return cimports, includes, externs, distutils_info 
- 
- 
-class DependencyTree(object): 
- 
-    def __init__(self, context, quiet=False): 
-        self.context = context 
-        self.quiet = quiet 
-        self._transitive_cache = {} 
- 
-    def parse_dependencies(self, source_filename): 
+        elif extern:
+            externs.append(literals[extern])
+        else:
+            includes.append(literals[include])
+    return cimports, includes, externs, distutils_info
+
+
+class DependencyTree(object):
+
+    def __init__(self, context, quiet=False):
+        self.context = context
+        self.quiet = quiet
+        self._transitive_cache = {}
+
+    def parse_dependencies(self, source_filename):
         if path_exists(source_filename):
             source_filename = os.path.normpath(source_filename)
-        return parse_dependencies(source_filename) 
- 
-    @cached_method 
-    def included_files(self, filename): 
-        # This is messy because included files are textually included, resolving 
-        # cimports (but not includes) relative to the including file. 
-        all = set() 
-        for include in self.parse_dependencies(filename)[1]: 
-            include_path = join_path(os.path.dirname(filename), include) 
-            if not path_exists(include_path): 
-                include_path = self.context.find_include_file(include, None) 
-            if include_path: 
-                if '.' + os.path.sep in include_path: 
-                    include_path = os.path.normpath(include_path) 
-                all.add(include_path) 
-                all.update(self.included_files(include_path)) 
-            elif not self.quiet: 
-                print("Unable to locate '%s' referenced from '%s'" % (filename, include)) 
-        return all 
- 
-    @cached_method 
+        return parse_dependencies(source_filename)
+
+    @cached_method
+    def included_files(self, filename):
+        # This is messy because included files are textually included, resolving
+        # cimports (but not includes) relative to the including file.
+        all = set()
+        for include in self.parse_dependencies(filename)[1]:
+            include_path = join_path(os.path.dirname(filename), include)
+            if not path_exists(include_path):
+                include_path = self.context.find_include_file(include, None)
+            if include_path:
+                if '.' + os.path.sep in include_path:
+                    include_path = os.path.normpath(include_path)
+                all.add(include_path)
+                all.update(self.included_files(include_path))
+            elif not self.quiet:
+                print("Unable to locate '%s' referenced from '%s'" % (filename, include))
+        return all
+
+    @cached_method
     def cimports_externs_incdirs(self, filename):
-        # This is really ugly. Nested cimports are resolved with respect to the 
-        # includer, but includes are resolved with respect to the includee. 
-        cimports, includes, externs = self.parse_dependencies(filename)[:3] 
-        cimports = set(cimports) 
-        externs = set(externs) 
+        # This is really ugly. Nested cimports are resolved with respect to the
+        # includer, but includes are resolved with respect to the includee.
+        cimports, includes, externs = self.parse_dependencies(filename)[:3]
+        cimports = set(cimports)
+        externs = set(externs)
         incdirs = set()
-        for include in self.included_files(filename): 
+        for include in self.included_files(filename):
             included_cimports, included_externs, included_incdirs = self.cimports_externs_incdirs(include)
-            cimports.update(included_cimports) 
-            externs.update(included_externs) 
+            cimports.update(included_cimports)
+            externs.update(included_externs)
             incdirs.update(included_incdirs)
         externs, incdir = normalize_existing(filename, externs)
         if incdir:
             incdirs.add(incdir)
         return tuple(cimports), externs, incdirs
- 
-    def cimports(self, filename): 
+
+    def cimports(self, filename):
         return self.cimports_externs_incdirs(filename)[0]
- 
-    def package(self, filename): 
-        return package(filename) 
- 
-    def fully_qualified_name(self, filename): 
-        return fully_qualified_name(filename) 
- 
-    @cached_method 
-    def find_pxd(self, module, filename=None): 
-        is_relative = module[0] == '.' 
-        if is_relative and not filename: 
-            raise NotImplementedError("New relative imports.") 
-        if filename is not None: 
-            module_path = module.split('.') 
-            if is_relative: 
-                module_path.pop(0)  # just explicitly relative 
-            package_path = list(self.package(filename)) 
-            while module_path and not module_path[0]: 
-                try: 
-                    package_path.pop() 
-                except IndexError: 
-                    return None   # FIXME: error? 
-                module_path.pop(0) 
-            relative = '.'.join(package_path + module_path) 
-            pxd = self.context.find_pxd_file(relative, None) 
-            if pxd: 
-                return pxd 
-        if is_relative: 
-            return None   # FIXME: error? 
-        return self.context.find_pxd_file(module, None) 
- 
-    @cached_method 
-    def cimported_files(self, filename): 
-        if filename[-4:] == '.pyx' and path_exists(filename[:-4] + '.pxd'): 
-            pxd_list = [filename[:-4] + '.pxd'] 
-        else: 
-            pxd_list = [] 
+
+    def package(self, filename):
+        return package(filename)
+
+    def fully_qualified_name(self, filename):
+        return fully_qualified_name(filename)
+
+    @cached_method
+    def find_pxd(self, module, filename=None):
+        is_relative = module[0] == '.'
+        if is_relative and not filename:
+            raise NotImplementedError("New relative imports.")
+        if filename is not None:
+            module_path = module.split('.')
+            if is_relative:
+                module_path.pop(0)  # just explicitly relative
+            package_path = list(self.package(filename))
+            while module_path and not module_path[0]:
+                try:
+                    package_path.pop()
+                except IndexError:
+                    return None   # FIXME: error?
+                module_path.pop(0)
+            relative = '.'.join(package_path + module_path)
+            pxd = self.context.find_pxd_file(relative, None)
+            if pxd:
+                return pxd
+        if is_relative:
+            return None   # FIXME: error?
+        return self.context.find_pxd_file(module, None)
+
+    @cached_method
+    def cimported_files(self, filename):
+        if filename[-4:] == '.pyx' and path_exists(filename[:-4] + '.pxd'):
+            pxd_list = [filename[:-4] + '.pxd']
+        else:
+            pxd_list = []
         # Cimports generates all possible combinations package.module
         # when imported as from package cimport module.
-        for module in self.cimports(filename): 
-            if module[:7] == 'cython.' or module == 'cython': 
-                continue 
-            pxd_file = self.find_pxd(module, filename) 
-            if pxd_file is not None: 
-                pxd_list.append(pxd_file) 
-        return tuple(pxd_list) 
- 
-    @cached_method 
-    def immediate_dependencies(self, filename): 
-        all = set([filename]) 
-        all.update(self.cimported_files(filename)) 
-        all.update(self.included_files(filename)) 
-        return all 
- 
-    def all_dependencies(self, filename): 
-        return self.transitive_merge(filename, self.immediate_dependencies, set.union) 
- 
-    @cached_method 
-    def timestamp(self, filename): 
-        return os.path.getmtime(filename) 
- 
-    def extract_timestamp(self, filename): 
-        return self.timestamp(filename), filename 
- 
-    def newest_dependency(self, filename): 
-        return max([self.extract_timestamp(f) for f in self.all_dependencies(filename)]) 
- 
+        for module in self.cimports(filename):
+            if module[:7] == 'cython.' or module == 'cython':
+                continue
+            pxd_file = self.find_pxd(module, filename)
+            if pxd_file is not None:
+                pxd_list.append(pxd_file)
+        return tuple(pxd_list)
+
+    @cached_method
+    def immediate_dependencies(self, filename):
+        all = set([filename])
+        all.update(self.cimported_files(filename))
+        all.update(self.included_files(filename))
+        return all
+
+    def all_dependencies(self, filename):
+        return self.transitive_merge(filename, self.immediate_dependencies, set.union)
+
+    @cached_method
+    def timestamp(self, filename):
+        return os.path.getmtime(filename)
+
+    def extract_timestamp(self, filename):
+        return self.timestamp(filename), filename
+
+    def newest_dependency(self, filename):
+        return max([self.extract_timestamp(f) for f in self.all_dependencies(filename)])
+
     def transitive_fingerprint(self, filename, module, compilation_options):
         r"""
         Return a fingerprint of a cython file that is about to be cythonized.
@@ -637,11 +637,11 @@ class DependencyTree(object):
         is found, the cythonization can be skipped. The fingerprint must
         incorporate everything that has an influence on the generated code.
         """
-        try: 
+        try:
             m = hashlib.md5(__version__.encode('UTF-8'))
             m.update(file_hash(filename).encode('UTF-8'))
-            for x in sorted(self.all_dependencies(filename)): 
-                if os.path.splitext(x)[1] not in ('.c', '.cpp', '.h'): 
+            for x in sorted(self.all_dependencies(filename)):
+                if os.path.splitext(x)[1] not in ('.c', '.cpp', '.h'):
                     m.update(file_hash(x).encode('UTF-8'))
             # Include the module attributes that change the compilation result
             # in the fingerprint. We do not iterate over module.__dict__ and
@@ -655,21 +655,21 @@ class DependencyTree(object):
             )).encode('UTF-8'))
 
             m.update(compilation_options.get_fingerprint().encode('UTF-8'))
-            return m.hexdigest() 
-        except IOError: 
-            return None 
- 
-    def distutils_info0(self, filename): 
-        info = self.parse_dependencies(filename)[3] 
+            return m.hexdigest()
+        except IOError:
+            return None
+
+    def distutils_info0(self, filename):
+        info = self.parse_dependencies(filename)[3]
         kwds = info.values
         cimports, externs, incdirs = self.cimports_externs_incdirs(filename)
         basedir = os.getcwd()
         # Add dependencies on "cdef extern from ..." files
-        if externs: 
+        if externs:
             externs = _make_relative(externs, basedir)
             if 'depends' in kwds:
                 kwds['depends'] = list(set(kwds['depends']).union(externs))
-            else: 
+            else:
                 kwds['depends'] = list(externs)
         # Add include_dirs to ensure that the C compiler will find the
         # "cdef extern from ..." files
@@ -679,58 +679,58 @@ class DependencyTree(object):
                 if inc not in include_dirs:
                     include_dirs.append(inc)
             kwds['include_dirs'] = include_dirs
-        return info 
- 
-    def distutils_info(self, filename, aliases=None, base=None): 
-        return (self.transitive_merge(filename, self.distutils_info0, DistutilsInfo.merge) 
-            .subs(aliases) 
-            .merge(base)) 
- 
-    def transitive_merge(self, node, extract, merge): 
-        try: 
-            seen = self._transitive_cache[extract, merge] 
-        except KeyError: 
-            seen = self._transitive_cache[extract, merge] = {} 
-        return self.transitive_merge_helper( 
-            node, extract, merge, seen, {}, self.cimported_files)[0] 
- 
-    def transitive_merge_helper(self, node, extract, merge, seen, stack, outgoing): 
-        if node in seen: 
-            return seen[node], None 
-        deps = extract(node) 
-        if node in stack: 
-            return deps, node 
-        try: 
-            stack[node] = len(stack) 
-            loop = None 
-            for next in outgoing(node): 
-                sub_deps, sub_loop = self.transitive_merge_helper(next, extract, merge, seen, stack, outgoing) 
-                if sub_loop is not None: 
-                    if loop is not None and stack[loop] < stack[sub_loop]: 
-                        pass 
-                    else: 
-                        loop = sub_loop 
-                deps = merge(deps, sub_deps) 
-            if loop == node: 
-                loop = None 
-            if loop is None: 
-                seen[node] = deps 
-            return deps, loop 
-        finally: 
-            del stack[node] 
- 
-
-_dep_tree = None 
-
-def create_dependency_tree(ctx=None, quiet=False): 
-    global _dep_tree 
-    if _dep_tree is None: 
-        if ctx is None: 
-            ctx = Context(["."], CompilationOptions(default_options)) 
-        _dep_tree = DependencyTree(ctx, quiet=quiet) 
-    return _dep_tree 
- 
- 
+        return info
+
+    def distutils_info(self, filename, aliases=None, base=None):
+        return (self.transitive_merge(filename, self.distutils_info0, DistutilsInfo.merge)
+            .subs(aliases)
+            .merge(base))
+
+    def transitive_merge(self, node, extract, merge):
+        try:
+            seen = self._transitive_cache[extract, merge]
+        except KeyError:
+            seen = self._transitive_cache[extract, merge] = {}
+        return self.transitive_merge_helper(
+            node, extract, merge, seen, {}, self.cimported_files)[0]
+
+    def transitive_merge_helper(self, node, extract, merge, seen, stack, outgoing):
+        if node in seen:
+            return seen[node], None
+        deps = extract(node)
+        if node in stack:
+            return deps, node
+        try:
+            stack[node] = len(stack)
+            loop = None
+            for next in outgoing(node):
+                sub_deps, sub_loop = self.transitive_merge_helper(next, extract, merge, seen, stack, outgoing)
+                if sub_loop is not None:
+                    if loop is not None and stack[loop] < stack[sub_loop]:
+                        pass
+                    else:
+                        loop = sub_loop
+                deps = merge(deps, sub_deps)
+            if loop == node:
+                loop = None
+            if loop is None:
+                seen[node] = deps
+            return deps, loop
+        finally:
+            del stack[node]
+
+
+_dep_tree = None
+
+def create_dependency_tree(ctx=None, quiet=False):
+    global _dep_tree
+    if _dep_tree is None:
+        if ctx is None:
+            ctx = Context(["."], CompilationOptions(default_options))
+        _dep_tree = DependencyTree(ctx, quiet=quiet)
+    return _dep_tree
+
+
 # If this changes, change also docs/src/reference/compilation.rst
 # which mentions this function
 def default_create_extension(template, kwds):
@@ -745,7 +745,7 @@ def default_create_extension(template, kwds):
     return (ext, metadata)
 
 
-# This may be useful for advanced users? 
+# This may be useful for advanced users?
 def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=False, language=None,
                           exclude_failures=False):
     if language is not None:
@@ -756,17 +756,17 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
     if patterns is None:
         return [], {}
     elif isinstance(patterns, basestring) or not isinstance(patterns, Iterable):
-        patterns = [patterns] 
-    explicit_modules = set([m.name for m in patterns if isinstance(m, Extension)]) 
-    seen = set() 
-    deps = create_dependency_tree(ctx, quiet=quiet) 
-    to_exclude = set() 
-    if not isinstance(exclude, list): 
-        exclude = [exclude] 
-    for pattern in exclude: 
-        to_exclude.update(map(os.path.abspath, extended_iglob(pattern))) 
- 
-    module_list = [] 
+        patterns = [patterns]
+    explicit_modules = set([m.name for m in patterns if isinstance(m, Extension)])
+    seen = set()
+    deps = create_dependency_tree(ctx, quiet=quiet)
+    to_exclude = set()
+    if not isinstance(exclude, list):
+        exclude = [exclude]
+    for pattern in exclude:
+        to_exclude.update(map(os.path.abspath, extended_iglob(pattern)))
+
+    module_list = []
     module_metadata = {}
 
     # workaround for setuptools
@@ -782,12 +782,12 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
     # default function.
     create_extension = ctx.options.create_extension or default_create_extension
 
-    for pattern in patterns: 
-        if isinstance(pattern, str): 
-            filepattern = pattern 
+    for pattern in patterns:
+        if isinstance(pattern, str):
+            filepattern = pattern
             template = Extension(pattern, [])  # Fake Extension without sources
-            name = '*' 
-            base = None 
+            name = '*'
+            base = None
             ext_language = language
         elif isinstance(pattern, (Extension_distutils, Extension_setuptools)):
             cython_sources = [s for s in pattern.sources
@@ -798,56 +798,56 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
                     print("Warning: Multiple cython sources found for extension '%s': %s\n"
                           "See http://cython.readthedocs.io/en/latest/src/userguide/sharing_declarations.html "
                           "for sharing declarations among Cython files." % (pattern.name, cython_sources))
-            else: 
-                # ignore non-cython modules 
-                module_list.append(pattern) 
-                continue 
-            template = pattern 
-            name = template.name 
-            base = DistutilsInfo(exn=template) 
+            else:
+                # ignore non-cython modules
+                module_list.append(pattern)
+                continue
+            template = pattern
+            name = template.name
+            base = DistutilsInfo(exn=template)
             ext_language = None  # do not override whatever the Extension says
-        else: 
+        else:
             msg = str("pattern is not of type str nor subclass of Extension (%s)"
                       " but of type %s and class %s" % (repr(Extension),
                                                         type(pattern),
                                                         pattern.__class__))
             raise TypeError(msg)
- 
+
         for file in nonempty(sorted(extended_iglob(filepattern)), "'%s' doesn't match any files" % filepattern):
-            if os.path.abspath(file) in to_exclude: 
-                continue 
+            if os.path.abspath(file) in to_exclude:
+                continue
             module_name = deps.fully_qualified_name(file)
-            if '*' in name: 
-                if module_name in explicit_modules: 
-                    continue 
+            if '*' in name:
+                if module_name in explicit_modules:
+                    continue
             elif name:
-                module_name = name 
- 
+                module_name = name
+
             Utils.raise_error_if_module_name_forbidden(module_name)
 
-            if module_name not in seen: 
-                try: 
-                    kwds = deps.distutils_info(file, aliases, base).values 
-                except Exception: 
-                    if exclude_failures: 
-                        continue 
-                    raise 
-                if base is not None: 
-                    for key, value in base.values.items(): 
-                        if key not in kwds: 
-                            kwds[key] = value 
- 
+            if module_name not in seen:
+                try:
+                    kwds = deps.distutils_info(file, aliases, base).values
+                except Exception:
+                    if exclude_failures:
+                        continue
+                    raise
+                if base is not None:
+                    for key, value in base.values.items():
+                        if key not in kwds:
+                            kwds[key] = value
+
                 kwds['name'] = module_name
 
                 sources = [file] + [m for m in template.sources if m != filepattern]
-                if 'sources' in kwds: 
-                    # allow users to add .c files etc. 
-                    for source in kwds['sources']: 
-                        source = encode_filename_in_py2(source) 
-                        if source not in sources: 
-                            sources.append(source) 
+                if 'sources' in kwds:
+                    # allow users to add .c files etc.
+                    for source in kwds['sources']:
+                        source = encode_filename_in_py2(source)
+                        if source not in sources:
+                            sources.append(source)
                 kwds['sources'] = sources
- 
+
                 if ext_language and 'language' not in kwds:
                     kwds['language'] = ext_language
 
@@ -873,17 +873,17 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
                         # never seen this in the wild, but probably better to warn about this unexpected case
                         print("Warning: Cython source file not found in sources list, adding %s" % file)
                     m.sources.insert(0, file)
-                seen.add(name) 
+                seen.add(name)
     return module_list, module_metadata
- 
- 
-# This is the user-exposed entry point. 
+
+
+# This is the user-exposed entry point.
 def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, force=False, language=None,
-              exclude_failures=False, **options): 
-    """ 
-    Compile a set of source modules into C/C++ files and return a list of distutils 
-    Extension objects for them. 
- 
+              exclude_failures=False, **options):
+    """
+    Compile a set of source modules into C/C++ files and return a list of distutils
+    Extension objects for them.
+
     :param module_list: As module list, pass either a glob pattern, a list of glob
                         patterns or a list of Extension objects.  The latter
                         allows you to configure the extensions separately
@@ -892,10 +892,10 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
                         glob patterns as their sources. Then, cythonize
                         will resolve the pattern and create a
                         copy of the Extension for every matching file.
- 
+
     :param exclude: When passing glob patterns as ``module_list``, you can exclude certain
                     module names explicitly by passing them into the ``exclude`` option.
- 
+
     :param nthreads: The number of concurrent builds for parallel compilation
                      (requires the ``multiprocessing`` module).
 
@@ -910,10 +910,10 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
                     find the value of ``MY_HEADERS`` in the ``setup.py``, put it in a python
                     variable called ``foo`` as a string, and then call
                     ``cythonize(..., aliases={'MY_HEADERS': foo})``.
- 
+
     :param quiet: If True, Cython won't print error, warning, or status messages during the
                   compilation.
- 
+
     :param force: Forces the recompilation of the Cython modules, even if the timestamps
                   don't indicate that a recompilation is necessary.
 
@@ -946,12 +946,12 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
                                 See :ref:`compiler-directives`.
 
     :param depfile: produce depfiles for the sources if True.
-    """ 
+    """
     if exclude is None:
         exclude = []
-    if 'include_path' not in options: 
-        options['include_path'] = ['.'] 
-    if 'common_utility_include_dir' in options: 
+    if 'include_path' not in options:
+        options['include_path'] = ['.']
+    if 'common_utility_include_dir' in options:
         safe_makedirs(options['common_utility_include_dir'])
 
     depfile = options.pop('depfile', None)
@@ -963,21 +963,21 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
         pythran_options.cplus = True
         pythran_options.np_pythran = True
 
-    c_options = CompilationOptions(**options) 
-    cpp_options = CompilationOptions(**options); cpp_options.cplus = True 
-    ctx = c_options.create_context() 
-    options = c_options 
+    c_options = CompilationOptions(**options)
+    cpp_options = CompilationOptions(**options); cpp_options.cplus = True
+    ctx = c_options.create_context()
+    options = c_options
     module_list, module_metadata = create_extension_list(
-        module_list, 
-        exclude=exclude, 
-        ctx=ctx, 
-        quiet=quiet, 
-        exclude_failures=exclude_failures, 
+        module_list,
+        exclude=exclude,
+        ctx=ctx,
+        quiet=quiet,
+        exclude_failures=exclude_failures,
         language=language,
-        aliases=aliases) 
-    deps = create_dependency_tree(ctx, quiet=quiet) 
-    build_dir = getattr(options, 'build_dir', None) 
- 
+        aliases=aliases)
+    deps = create_dependency_tree(ctx, quiet=quiet)
+    build_dir = getattr(options, 'build_dir', None)
+
     def copy_to_build_dir(filepath, root=os.getcwd()):
         filepath_abs = os.path.abspath(filepath)
         if os.path.isabs(filepath):
@@ -989,12 +989,12 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
             copy_once_if_newer(filepath_abs, mod_dir)
 
     modules_by_cfile = collections.defaultdict(list)
-    to_compile = [] 
-    for m in module_list: 
-        if build_dir: 
-            for dep in m.depends: 
-                copy_to_build_dir(dep) 
- 
+    to_compile = []
+    for m in module_list:
+        if build_dir:
+            for dep in m.depends:
+                copy_to_build_dir(dep)
+
         cy_sources = [
             source for source in m.sources
             if os.path.splitext(source)[1] in ('.pyx', '.py')]
@@ -1005,28 +1005,28 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
             # infer FQMN from source files
             full_module_name = None
 
-        new_sources = [] 
-        for source in m.sources: 
-            base, ext = os.path.splitext(source) 
-            if ext in ('.pyx', '.py'): 
+        new_sources = []
+        for source in m.sources:
+            base, ext = os.path.splitext(source)
+            if ext in ('.pyx', '.py'):
                 if m.np_pythran:
-                    c_file = base + '.cpp' 
+                    c_file = base + '.cpp'
                     options = pythran_options
                 elif m.language == 'c++':
                     c_file = base + '.cpp'
-                    options = cpp_options 
-                else: 
-                    c_file = base + '.c' 
-                    options = c_options 
- 
-                # setup for out of place build directory if enabled 
-                if build_dir: 
+                    options = cpp_options
+                else:
+                    c_file = base + '.c'
+                    options = c_options
+
+                # setup for out of place build directory if enabled
+                if build_dir:
                     if os.path.isabs(c_file):
                       warnings.warn("build_dir has no effect for absolute source paths")
-                    c_file = os.path.join(build_dir, c_file) 
-                    dir = os.path.dirname(c_file) 
+                    c_file = os.path.join(build_dir, c_file)
+                    dir = os.path.dirname(c_file)
                     safe_makedirs_once(dir)
- 
+
                 # write out the depfile, if requested
                 if depfile:
                     dependencies = deps.all_dependencies(source)
@@ -1047,45 +1047,45 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
                     with open(c_file+'.dep', 'w') as outfile:
                         outfile.write(depline)
 
-                if os.path.exists(c_file): 
-                    c_timestamp = os.path.getmtime(c_file) 
-                else: 
-                    c_timestamp = -1 
- 
-                # Priority goes first to modified files, second to direct 
-                # dependents, and finally to indirect dependents. 
-                if c_timestamp < deps.timestamp(source): 
-                    dep_timestamp, dep = deps.timestamp(source), source 
-                    priority = 0 
-                else: 
-                    dep_timestamp, dep = deps.newest_dependency(source) 
-                    priority = 2 - (dep in deps.immediate_dependencies(source)) 
-                if force or c_timestamp < dep_timestamp: 
+                if os.path.exists(c_file):
+                    c_timestamp = os.path.getmtime(c_file)
+                else:
+                    c_timestamp = -1
+
+                # Priority goes first to modified files, second to direct
+                # dependents, and finally to indirect dependents.
+                if c_timestamp < deps.timestamp(source):
+                    dep_timestamp, dep = deps.timestamp(source), source
+                    priority = 0
+                else:
+                    dep_timestamp, dep = deps.newest_dependency(source)
+                    priority = 2 - (dep in deps.immediate_dependencies(source))
+                if force or c_timestamp < dep_timestamp:
                     if not quiet and not force:
-                        if source == dep: 
-                            print("Compiling %s because it changed." % source) 
-                        else: 
-                            print("Compiling %s because it depends on %s." % (source, dep)) 
+                        if source == dep:
+                            print("Compiling %s because it changed." % source)
+                        else:
+                            print("Compiling %s because it depends on %s." % (source, dep))
                     if not force and options.cache:
                         fingerprint = deps.transitive_fingerprint(source, m, options)
-                    else: 
-                        fingerprint = None 
+                    else:
+                        fingerprint = None
                     to_compile.append((
                         priority, source, c_file, fingerprint, quiet,
                         options, not exclude_failures, module_metadata.get(m.name),
                         full_module_name))
-                new_sources.append(c_file) 
+                new_sources.append(c_file)
                 modules_by_cfile[c_file].append(m)
-            else: 
-                new_sources.append(source) 
-                if build_dir: 
-                    copy_to_build_dir(source) 
-        m.sources = new_sources 
- 
+            else:
+                new_sources.append(source)
+                if build_dir:
+                    copy_to_build_dir(source)
+        m.sources = new_sources
+
     if options.cache:
-        if not os.path.exists(options.cache): 
-            os.makedirs(options.cache) 
-    to_compile.sort() 
+        if not os.path.exists(options.cache):
+            os.makedirs(options.cache)
+    to_compile.sort()
     # Drop "priority" component of "to_compile" entries and add a
     # simple progress indicator.
     N = len(to_compile)
@@ -1095,119 +1095,119 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
         to_compile[i] = to_compile[i][1:] + (progress,)
 
     if N <= 1:
-        nthreads = 0 
-    if nthreads: 
-        # Requires multiprocessing (or Python >= 2.6) 
-        try: 
-            import multiprocessing 
-            pool = multiprocessing.Pool( 
-                nthreads, initializer=_init_multiprocessing_helper) 
-        except (ImportError, OSError): 
-            print("multiprocessing required for parallel cythonization") 
-            nthreads = 0 
-        else: 
-            # This is a bit more involved than it should be, because KeyboardInterrupts 
-            # break the multiprocessing workers when using a normal pool.map(). 
-            # See, for example: 
-            # http://noswap.com/blog/python-multiprocessing-keyboardinterrupt 
-            try: 
-                result = pool.map_async(cythonize_one_helper, to_compile, chunksize=1) 
-                pool.close() 
-                while not result.ready(): 
-                    try: 
-                        result.get(99999)  # seconds 
-                    except multiprocessing.TimeoutError: 
-                        pass 
-            except KeyboardInterrupt: 
-                pool.terminate() 
-                raise 
-            pool.join() 
-    if not nthreads: 
-        for args in to_compile: 
+        nthreads = 0
+    if nthreads:
+        # Requires multiprocessing (or Python >= 2.6)
+        try:
+            import multiprocessing
+            pool = multiprocessing.Pool(
+                nthreads, initializer=_init_multiprocessing_helper)
+        except (ImportError, OSError):
+            print("multiprocessing required for parallel cythonization")
+            nthreads = 0
+        else:
+            # This is a bit more involved than it should be, because KeyboardInterrupts
+            # break the multiprocessing workers when using a normal pool.map().
+            # See, for example:
+            # http://noswap.com/blog/python-multiprocessing-keyboardinterrupt
+            try:
+                result = pool.map_async(cythonize_one_helper, to_compile, chunksize=1)
+                pool.close()
+                while not result.ready():
+                    try:
+                        result.get(99999)  # seconds
+                    except multiprocessing.TimeoutError:
+                        pass
+            except KeyboardInterrupt:
+                pool.terminate()
+                raise
+            pool.join()
+    if not nthreads:
+        for args in to_compile:
             cythonize_one(*args)
- 
-    if exclude_failures: 
-        failed_modules = set() 
+
+    if exclude_failures:
+        failed_modules = set()
         for c_file, modules in modules_by_cfile.items():
-            if not os.path.exists(c_file): 
-                failed_modules.update(modules) 
-            elif os.path.getsize(c_file) < 200: 
-                f = io_open(c_file, 'r', encoding='iso8859-1') 
-                try: 
-                    if f.read(len('#error ')) == '#error ': 
-                        # dead compilation result 
-                        failed_modules.update(modules) 
-                finally: 
-                    f.close() 
-        if failed_modules: 
-            for module in failed_modules: 
-                module_list.remove(module) 
-            print("Failed compilations: %s" % ', '.join(sorted([ 
-                module.name for module in failed_modules]))) 
- 
+            if not os.path.exists(c_file):
+                failed_modules.update(modules)
+            elif os.path.getsize(c_file) < 200:
+                f = io_open(c_file, 'r', encoding='iso8859-1')
+                try:
+                    if f.read(len('#error ')) == '#error ':
+                        # dead compilation result
+                        failed_modules.update(modules)
+                finally:
+                    f.close()
+        if failed_modules:
+            for module in failed_modules:
+                module_list.remove(module)
+            print("Failed compilations: %s" % ', '.join(sorted([
+                module.name for module in failed_modules])))
+
     if options.cache:
-        cleanup_cache(options.cache, getattr(options, 'cache_size', 1024 * 1024 * 100)) 
-    # cythonize() is often followed by the (non-Python-buffered) 
-    # compiler output, flush now to avoid interleaving output. 
-    sys.stdout.flush() 
-    return module_list 
- 
- 
-if os.environ.get('XML_RESULTS'): 
-    compile_result_dir = os.environ['XML_RESULTS'] 
-    def record_results(func): 
-        def with_record(*args): 
-            t = time.time() 
-            success = True 
-            try: 
-                try: 
-                    func(*args) 
-                except: 
-                    success = False 
-            finally: 
-                t = time.time() - t 
-                module = fully_qualified_name(args[0]) 
-                name = "cythonize." + module 
-                failures = 1 - success 
-                if success: 
-                    failure_item = "" 
-                else: 
-                    failure_item = "failure" 
-                output = open(os.path.join(compile_result_dir, name + ".xml"), "w") 
-                output.write(""" 
-                    <?xml version="1.0" ?> 
-                    <testsuite name="%(name)s" errors="0" failures="%(failures)s" tests="1" time="%(t)s"> 
-                    <testcase classname="%(name)s" name="cythonize"> 
-                    %(failure_item)s 
-                    </testcase> 
-                    </testsuite> 
-                """.strip() % locals()) 
-                output.close() 
-        return with_record 
-else: 
+        cleanup_cache(options.cache, getattr(options, 'cache_size', 1024 * 1024 * 100))
+    # cythonize() is often followed by the (non-Python-buffered)
+    # compiler output, flush now to avoid interleaving output.
+    sys.stdout.flush()
+    return module_list
+
+
+if os.environ.get('XML_RESULTS'):
+    compile_result_dir = os.environ['XML_RESULTS']
+    def record_results(func):
+        def with_record(*args):
+            t = time.time()
+            success = True
+            try:
+                try:
+                    func(*args)
+                except:
+                    success = False
+            finally:
+                t = time.time() - t
+                module = fully_qualified_name(args[0])
+                name = "cythonize." + module
+                failures = 1 - success
+                if success:
+                    failure_item = ""
+                else:
+                    failure_item = "failure"
+                output = open(os.path.join(compile_result_dir, name + ".xml"), "w")
+                output.write("""
+                    <?xml version="1.0" ?>
+                    <testsuite name="%(name)s" errors="0" failures="%(failures)s" tests="1" time="%(t)s">
+                    <testcase classname="%(name)s" name="cythonize">
+                    %(failure_item)s
+                    </testcase>
+                    </testsuite>
+                """.strip() % locals())
+                output.close()
+        return with_record
+else:
     def record_results(func):
         return func
- 
 
-# TODO: Share context? Issue: pyx processing leaks into pxd module 
-@record_results 
+
+# TODO: Share context? Issue: pyx processing leaks into pxd module
+@record_results
 def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
                   raise_on_failure=True, embedded_metadata=None, full_module_name=None,
                   progress=""):
     from ..Compiler.Main import compile_single, default_options
-    from ..Compiler.Errors import CompileError, PyrexError 
- 
-    if fingerprint: 
-        if not os.path.exists(options.cache): 
+    from ..Compiler.Errors import CompileError, PyrexError
+
+    if fingerprint:
+        if not os.path.exists(options.cache):
             safe_makedirs(options.cache)
-        # Cython-generated c files are highly compressible. 
-        # (E.g. a compression ratio of about 10 for Sage). 
+        # Cython-generated c files are highly compressible.
+        # (E.g. a compression ratio of about 10 for Sage).
         fingerprint_file_base = join_path(
             options.cache, "%s-%s" % (os.path.basename(c_file), fingerprint))
         gz_fingerprint_file = fingerprint_file_base + gzip_ext
         zip_fingerprint_file = fingerprint_file_base + '.zip'
         if os.path.exists(gz_fingerprint_file) or os.path.exists(zip_fingerprint_file):
-            if not quiet: 
+            if not quiet:
                 print("%sFound compiled %s in cache" % (progress, pyx_file))
             if os.path.exists(gz_fingerprint_file):
                 os.utime(gz_fingerprint_file, None)
@@ -1220,37 +1220,37 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
                 with contextlib.closing(zipfile.ZipFile(zip_fingerprint_file)) as z:
                     for artifact in z.namelist():
                         z.extract(artifact, os.path.join(dirname, artifact))
-            return 
-    if not quiet: 
+            return
+    if not quiet:
         print("%sCythonizing %s" % (progress, pyx_file))
-    if options is None: 
-        options = CompilationOptions(default_options) 
-    options.output_file = c_file 
+    if options is None:
+        options = CompilationOptions(default_options)
+    options.output_file = c_file
     options.embedded_metadata = embedded_metadata
- 
-    any_failures = 0 
-    try: 
+
+    any_failures = 0
+    try:
         result = compile_single(pyx_file, options, full_module_name=full_module_name)
-        if result.num_errors > 0: 
-            any_failures = 1 
+        if result.num_errors > 0:
+            any_failures = 1
     except (EnvironmentError, PyrexError) as e:
-        sys.stderr.write('%s\n' % e) 
-        any_failures = 1 
-        # XXX 
-        import traceback 
-        traceback.print_exc() 
-    except Exception: 
-        if raise_on_failure: 
-            raise 
-        import traceback 
-        traceback.print_exc() 
-        any_failures = 1 
-    if any_failures: 
-        if raise_on_failure: 
-            raise CompileError(None, pyx_file) 
-        elif os.path.exists(c_file): 
-            os.remove(c_file) 
-    elif fingerprint: 
+        sys.stderr.write('%s\n' % e)
+        any_failures = 1
+        # XXX
+        import traceback
+        traceback.print_exc()
+    except Exception:
+        if raise_on_failure:
+            raise
+        import traceback
+        traceback.print_exc()
+        any_failures = 1
+    if any_failures:
+        if raise_on_failure:
+            raise CompileError(None, pyx_file)
+        elif os.path.exists(c_file):
+            os.remove(c_file)
+    elif fingerprint:
         artifacts = list(filter(None, [
             getattr(result, attr, None)
             for attr in ('c_file', 'h_file', 'api_file', 'i_file')]))
@@ -1266,43 +1266,43 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
                 for artifact in artifacts:
                     zip.write(artifact, os.path.basename(artifact))
         os.rename(fingerprint_file + '.tmp', fingerprint_file)
- 
- 
-def cythonize_one_helper(m): 
-    import traceback 
-    try: 
+
+
+def cythonize_one_helper(m):
+    import traceback
+    try:
         return cythonize_one(*m)
-    except Exception: 
-        traceback.print_exc() 
-        raise 
- 
- 
-def _init_multiprocessing_helper(): 
-    # KeyboardInterrupt kills workers, so don't let them get it 
-    import signal 
-    signal.signal(signal.SIGINT, signal.SIG_IGN) 
- 
- 
-def cleanup_cache(cache, target_size, ratio=.85): 
-    try: 
-        p = subprocess.Popen(['du', '-s', '-k', os.path.abspath(cache)], stdout=subprocess.PIPE) 
-        res = p.wait() 
-        if res == 0: 
-            total_size = 1024 * int(p.stdout.read().strip().split()[0]) 
-            if total_size < target_size: 
-                return 
-    except (OSError, ValueError): 
-        pass 
-    total_size = 0 
-    all = [] 
-    for file in os.listdir(cache): 
-        path = join_path(cache, file) 
-        s = os.stat(path) 
-        total_size += s.st_size 
-        all.append((s.st_atime, s.st_size, path)) 
-    if total_size > target_size: 
-        for time, size, file in reversed(sorted(all)): 
-            os.unlink(file) 
-            total_size -= size 
-            if total_size < target_size * ratio: 
-                break 
+    except Exception:
+        traceback.print_exc()
+        raise
+
+
+def _init_multiprocessing_helper():
+    # KeyboardInterrupt kills workers, so don't let them get it
+    import signal
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
+
+
+def cleanup_cache(cache, target_size, ratio=.85):
+    try:
+        p = subprocess.Popen(['du', '-s', '-k', os.path.abspath(cache)], stdout=subprocess.PIPE)
+        res = p.wait()
+        if res == 0:
+            total_size = 1024 * int(p.stdout.read().strip().split()[0])
+            if total_size < target_size:
+                return
+    except (OSError, ValueError):
+        pass
+    total_size = 0
+    all = []
+    for file in os.listdir(cache):
+        path = join_path(cache, file)
+        s = os.stat(path)
+        total_size += s.st_size
+        all.append((s.st_atime, s.st_size, path))
+    if total_size > target_size:
+        for time, size, file in reversed(sorted(all)):
+            os.unlink(file)
+            total_size -= size
+            if total_size < target_size * ratio:
+                break
diff --git a/contrib/tools/cython/Cython/Build/Inline.py b/contrib/tools/cython/Cython/Build/Inline.py
index eb3c40bc77..db6d2640a5 100644
--- a/contrib/tools/cython/Cython/Build/Inline.py
+++ b/contrib/tools/cython/Cython/Build/Inline.py
@@ -1,39 +1,39 @@
-from __future__ import absolute_import 
- 
+from __future__ import absolute_import
+
 import hashlib
 import inspect
 import os
 import re
 import sys
- 
-from distutils.core import Distribution, Extension 
-from distutils.command.build_ext import build_ext 
- 
-import Cython 
+
+from distutils.core import Distribution, Extension
+from distutils.command.build_ext import build_ext
+
+import Cython
 from ..Compiler.Main import Context, default_options
- 
+
 from ..Compiler.Visitor import CythonTransform, EnvTransform
 from ..Compiler.ParseTreeTransforms import SkipDeclarations
-from ..Compiler.TreeFragment import parse_from_strings 
+from ..Compiler.TreeFragment import parse_from_strings
 from ..Compiler.StringEncoding import _unicode
-from .Dependencies import strip_string_literals, cythonize, cached_function 
+from .Dependencies import strip_string_literals, cythonize, cached_function
 from ..Compiler import Pipeline
-from ..Utils import get_cython_cache_dir 
-import cython as cython_module 
- 
+from ..Utils import get_cython_cache_dir
+import cython as cython_module
+
 
 IS_PY3 = sys.version_info >= (3,)
 
-# A utility function to convert user-supplied ASCII strings to unicode. 
+# A utility function to convert user-supplied ASCII strings to unicode.
 if not IS_PY3:
-    def to_unicode(s): 
+    def to_unicode(s):
         if isinstance(s, bytes):
-            return s.decode('ascii') 
-        else: 
-            return s 
-else: 
-    to_unicode = lambda x: x 
- 
+            return s.decode('ascii')
+        else:
+            return s
+else:
+    to_unicode = lambda x: x
+
 if sys.version_info < (3, 5):
     import imp
     def load_dynamic(name, module_path):
@@ -46,87 +46,87 @@ else:
         # sys.modules[name] = module
         spec.loader.exec_module(module)
         return module
- 
-class UnboundSymbols(EnvTransform, SkipDeclarations): 
-    def __init__(self): 
-        CythonTransform.__init__(self, None) 
-        self.unbound = set() 
-    def visit_NameNode(self, node): 
-        if not self.current_env().lookup(node.name): 
-            self.unbound.add(node.name) 
-        return node 
-    def __call__(self, node): 
-        super(UnboundSymbols, self).__call__(node) 
-        return self.unbound 
- 
-
-@cached_function 
-def unbound_symbols(code, context=None): 
-    code = to_unicode(code) 
-    if context is None: 
-        context = Context([], default_options) 
-    from ..Compiler.ParseTreeTransforms import AnalyseDeclarationsTransform 
-    tree = parse_from_strings('(tree fragment)', code) 
-    for phase in Pipeline.create_pipeline(context, 'pyx'): 
-        if phase is None: 
-            continue 
-        tree = phase(tree) 
-        if isinstance(phase, AnalyseDeclarationsTransform): 
-            break 
-    try: 
-        import builtins 
-    except ImportError: 
-        import __builtin__ as builtins 
+
+class UnboundSymbols(EnvTransform, SkipDeclarations):
+    def __init__(self):
+        CythonTransform.__init__(self, None)
+        self.unbound = set()
+    def visit_NameNode(self, node):
+        if not self.current_env().lookup(node.name):
+            self.unbound.add(node.name)
+        return node
+    def __call__(self, node):
+        super(UnboundSymbols, self).__call__(node)
+        return self.unbound
+
+
+@cached_function
+def unbound_symbols(code, context=None):
+    code = to_unicode(code)
+    if context is None:
+        context = Context([], default_options)
+    from ..Compiler.ParseTreeTransforms import AnalyseDeclarationsTransform
+    tree = parse_from_strings('(tree fragment)', code)
+    for phase in Pipeline.create_pipeline(context, 'pyx'):
+        if phase is None:
+            continue
+        tree = phase(tree)
+        if isinstance(phase, AnalyseDeclarationsTransform):
+            break
+    try:
+        import builtins
+    except ImportError:
+        import __builtin__ as builtins
     return tuple(UnboundSymbols()(tree) - set(dir(builtins)))
- 
-
-def unsafe_type(arg, context=None): 
-    py_type = type(arg) 
-    if py_type is int: 
-        return 'long' 
-    else: 
-        return safe_type(arg, context) 
- 
-
-def safe_type(arg, context=None): 
-    py_type = type(arg) 
+
+
+def unsafe_type(arg, context=None):
+    py_type = type(arg)
+    if py_type is int:
+        return 'long'
+    else:
+        return safe_type(arg, context)
+
+
+def safe_type(arg, context=None):
+    py_type = type(arg)
     if py_type in (list, tuple, dict, str):
-        return py_type.__name__ 
-    elif py_type is complex: 
-        return 'double complex' 
-    elif py_type is float: 
-        return 'double' 
-    elif py_type is bool: 
-        return 'bint' 
-    elif 'numpy' in sys.modules and isinstance(arg, sys.modules['numpy'].ndarray): 
-        return 'numpy.ndarray[numpy.%s_t, ndim=%s]' % (arg.dtype.name, arg.ndim) 
-    else: 
+        return py_type.__name__
+    elif py_type is complex:
+        return 'double complex'
+    elif py_type is float:
+        return 'double'
+    elif py_type is bool:
+        return 'bint'
+    elif 'numpy' in sys.modules and isinstance(arg, sys.modules['numpy'].ndarray):
+        return 'numpy.ndarray[numpy.%s_t, ndim=%s]' % (arg.dtype.name, arg.ndim)
+    else:
         for base_type in py_type.__mro__:
-            if base_type.__module__ in ('__builtin__', 'builtins'): 
-                return 'object' 
-            module = context.find_module(base_type.__module__, need_pxd=False) 
-            if module: 
-                entry = module.lookup(base_type.__name__) 
-                if entry.is_type: 
-                    return '%s.%s' % (base_type.__module__, base_type.__name__) 
-        return 'object' 
- 
-
-def _get_build_extension(): 
-    dist = Distribution() 
-    # Ensure the build respects distutils configuration by parsing 
-    # the configuration files 
-    config_files = dist.find_config_files() 
-    dist.parse_config_files(config_files) 
-    build_extension = build_ext(dist) 
-    build_extension.finalize_options() 
-    return build_extension 
- 
-
-@cached_function 
-def _create_context(cython_include_dirs): 
-    return Context(list(cython_include_dirs), default_options) 
- 
+            if base_type.__module__ in ('__builtin__', 'builtins'):
+                return 'object'
+            module = context.find_module(base_type.__module__, need_pxd=False)
+            if module:
+                entry = module.lookup(base_type.__name__)
+                if entry.is_type:
+                    return '%s.%s' % (base_type.__module__, base_type.__name__)
+        return 'object'
+
+
+def _get_build_extension():
+    dist = Distribution()
+    # Ensure the build respects distutils configuration by parsing
+    # the configuration files
+    config_files = dist.find_config_files()
+    dist.parse_config_files(config_files)
+    build_extension = build_ext(dist)
+    build_extension.finalize_options()
+    return build_extension
+
+
+@cached_function
+def _create_context(cython_include_dirs):
+    return Context(list(cython_include_dirs), default_options)
+
 
 _cython_inline_cache = {}
 _cython_inline_default_context = _create_context(('.',))
@@ -159,8 +159,8 @@ def cython_inline(code, get_type=unsafe_type,
                   cython_include_dirs=None, cython_compiler_directives=None,
                   force=False, quiet=False, locals=None, globals=None, language_level=None, **kwds):
 
-    if get_type is None: 
-        get_type = lambda x: 'object' 
+    if get_type is None:
+        get_type = lambda x: 'object'
     ctx = _create_context(tuple(cython_include_dirs)) if cython_include_dirs else _cython_inline_default_context
 
     cython_compiler_directives = dict(cython_compiler_directives) if cython_compiler_directives else {}
@@ -182,194 +182,194 @@ def cython_inline(code, get_type=unsafe_type,
             return invoke(*arg_list)
 
     orig_code = code
-    code = to_unicode(code) 
-    code, literals = strip_string_literals(code) 
-    code = strip_common_indent(code) 
-    if locals is None: 
-        locals = inspect.currentframe().f_back.f_back.f_locals 
-    if globals is None: 
-        globals = inspect.currentframe().f_back.f_back.f_globals 
-    try: 
+    code = to_unicode(code)
+    code, literals = strip_string_literals(code)
+    code = strip_common_indent(code)
+    if locals is None:
+        locals = inspect.currentframe().f_back.f_back.f_locals
+    if globals is None:
+        globals = inspect.currentframe().f_back.f_back.f_globals
+    try:
         _cython_inline_cache[orig_code] = _unbound_symbols = unbound_symbols(code)
         _populate_unbound(kwds, _unbound_symbols, locals, globals)
-    except AssertionError: 
-        if not quiet: 
-            # Parsing from strings not fully supported (e.g. cimports). 
-            print("Could not parse code as a string (to extract unbound symbols).") 
+    except AssertionError:
+        if not quiet:
+            # Parsing from strings not fully supported (e.g. cimports).
+            print("Could not parse code as a string (to extract unbound symbols).")
 
-    cimports = [] 
+    cimports = []
     for name, arg in list(kwds.items()):
-        if arg is cython_module: 
-            cimports.append('\ncimport cython as %s' % name) 
-            del kwds[name] 
+        if arg is cython_module:
+            cimports.append('\ncimport cython as %s' % name)
+            del kwds[name]
     arg_names = sorted(kwds)
-    arg_sigs = tuple([(get_type(kwds[arg], ctx), arg) for arg in arg_names]) 
+    arg_sigs = tuple([(get_type(kwds[arg], ctx), arg) for arg in arg_names])
     key_hash = _inline_key(orig_code, arg_sigs, language_level)
     module_name = "_cython_inline_" + key_hash
- 
-    if module_name in sys.modules: 
-        module = sys.modules[module_name] 
- 
-    else: 
-        build_extension = None 
-        if cython_inline.so_ext is None: 
-            # Figure out and cache current extension suffix 
-            build_extension = _get_build_extension() 
-            cython_inline.so_ext = build_extension.get_ext_filename('') 
- 
-        module_path = os.path.join(lib_dir, module_name + cython_inline.so_ext) 
- 
-        if not os.path.exists(lib_dir): 
-            os.makedirs(lib_dir) 
-        if force or not os.path.isfile(module_path): 
-            cflags = [] 
-            c_include_dirs = [] 
-            qualified = re.compile(r'([.\w]+)[.]') 
-            for type, _ in arg_sigs: 
-                m = qualified.match(type) 
-                if m: 
-                    cimports.append('\ncimport %s' % m.groups()[0]) 
-                    # one special case 
-                    if m.groups()[0] == 'numpy': 
-                        import numpy 
-                        c_include_dirs.append(numpy.get_include()) 
-                        # cflags.append('-Wno-unused') 
-            module_body, func_body = extract_func_code(code) 
-            params = ', '.join(['%s %s' % a for a in arg_sigs]) 
-            module_code = """ 
-%(module_body)s 
-%(cimports)s 
-def __invoke(%(params)s): 
-%(func_body)s 
-    return locals() 
-            """ % {'cimports': '\n'.join(cimports), 
-                   'module_body': module_body, 
-                   'params': params, 
-                   'func_body': func_body } 
-            for key, value in literals.items(): 
-                module_code = module_code.replace(key, value) 
-            pyx_file = os.path.join(lib_dir, module_name + '.pyx') 
-            fh = open(pyx_file, 'w') 
-            try: 
-                fh.write(module_code) 
-            finally: 
-                fh.close() 
-            extension = Extension( 
-                name = module_name, 
-                sources = [pyx_file], 
-                include_dirs = c_include_dirs, 
-                extra_compile_args = cflags) 
-            if build_extension is None: 
-                build_extension = _get_build_extension() 
+
+    if module_name in sys.modules:
+        module = sys.modules[module_name]
+
+    else:
+        build_extension = None
+        if cython_inline.so_ext is None:
+            # Figure out and cache current extension suffix
+            build_extension = _get_build_extension()
+            cython_inline.so_ext = build_extension.get_ext_filename('')
+
+        module_path = os.path.join(lib_dir, module_name + cython_inline.so_ext)
+
+        if not os.path.exists(lib_dir):
+            os.makedirs(lib_dir)
+        if force or not os.path.isfile(module_path):
+            cflags = []
+            c_include_dirs = []
+            qualified = re.compile(r'([.\w]+)[.]')
+            for type, _ in arg_sigs:
+                m = qualified.match(type)
+                if m:
+                    cimports.append('\ncimport %s' % m.groups()[0])
+                    # one special case
+                    if m.groups()[0] == 'numpy':
+                        import numpy
+                        c_include_dirs.append(numpy.get_include())
+                        # cflags.append('-Wno-unused')
+            module_body, func_body = extract_func_code(code)
+            params = ', '.join(['%s %s' % a for a in arg_sigs])
+            module_code = """
+%(module_body)s
+%(cimports)s
+def __invoke(%(params)s):
+%(func_body)s
+    return locals()
+            """ % {'cimports': '\n'.join(cimports),
+                   'module_body': module_body,
+                   'params': params,
+                   'func_body': func_body }
+            for key, value in literals.items():
+                module_code = module_code.replace(key, value)
+            pyx_file = os.path.join(lib_dir, module_name + '.pyx')
+            fh = open(pyx_file, 'w')
+            try:
+                fh.write(module_code)
+            finally:
+                fh.close()
+            extension = Extension(
+                name = module_name,
+                sources = [pyx_file],
+                include_dirs = c_include_dirs,
+                extra_compile_args = cflags)
+            if build_extension is None:
+                build_extension = _get_build_extension()
             build_extension.extensions = cythonize(
                 [extension],
                 include_path=cython_include_dirs or ['.'],
                 compiler_directives=cython_compiler_directives,
                 quiet=quiet)
-            build_extension.build_temp = os.path.dirname(pyx_file) 
-            build_extension.build_lib  = lib_dir 
-            build_extension.run() 
- 
+            build_extension.build_temp = os.path.dirname(pyx_file)
+            build_extension.build_lib  = lib_dir
+            build_extension.run()
+
         module = load_dynamic(module_name, module_path)
- 
+
     _cython_inline_cache[orig_code, arg_sigs, key_hash] = module.__invoke
-    arg_list = [kwds[arg] for arg in arg_names] 
-    return module.__invoke(*arg_list) 
- 
-
-# Cached suffix used by cython_inline above.  None should get 
-# overridden with actual value upon the first cython_inline invocation 
-cython_inline.so_ext = None 
- 
+    arg_list = [kwds[arg] for arg in arg_names]
+    return module.__invoke(*arg_list)
+
+
+# Cached suffix used by cython_inline above.  None should get
+# overridden with actual value upon the first cython_inline invocation
+cython_inline.so_ext = None
+
 _find_non_space = re.compile('[^ ]').search
 
 
-def strip_common_indent(code): 
-    min_indent = None 
+def strip_common_indent(code):
+    min_indent = None
     lines = code.splitlines()
-    for line in lines: 
+    for line in lines:
         match = _find_non_space(line)
-        if not match: 
+        if not match:
             continue  # blank
-        indent = match.start() 
-        if line[indent] == '#': 
+        indent = match.start()
+        if line[indent] == '#':
             continue  # comment
         if min_indent is None or min_indent > indent:
-            min_indent = indent 
-    for ix, line in enumerate(lines): 
+            min_indent = indent
+    for ix, line in enumerate(lines):
         match = _find_non_space(line)
         if not match or not line or line[indent:indent+1] == '#':
-            continue 
+            continue
         lines[ix] = line[min_indent:]
-    return '\n'.join(lines) 
- 
-
-module_statement = re.compile(r'^((cdef +(extern|class))|cimport|(from .+ cimport)|(from .+ import +[*]))') 
-def extract_func_code(code): 
-    module = [] 
-    function = [] 
-    current = function 
-    code = code.replace('\t', ' ') 
-    lines = code.split('\n') 
-    for line in lines: 
-        if not line.startswith(' '): 
-            if module_statement.match(line): 
-                current = module 
-            else: 
-                current = function 
-        current.append(line) 
-    return '\n'.join(module), '    ' + '\n    '.join(function) 
- 
- 
-try: 
-    from inspect import getcallargs 
-except ImportError: 
-    def getcallargs(func, *arg_values, **kwd_values): 
-        all = {} 
-        args, varargs, kwds, defaults = inspect.getargspec(func) 
-        if varargs is not None: 
-            all[varargs] = arg_values[len(args):] 
-        for name, value in zip(args, arg_values): 
-            all[name] = value 
+    return '\n'.join(lines)
+
+
+module_statement = re.compile(r'^((cdef +(extern|class))|cimport|(from .+ cimport)|(from .+ import +[*]))')
+def extract_func_code(code):
+    module = []
+    function = []
+    current = function
+    code = code.replace('\t', ' ')
+    lines = code.split('\n')
+    for line in lines:
+        if not line.startswith(' '):
+            if module_statement.match(line):
+                current = module
+            else:
+                current = function
+        current.append(line)
+    return '\n'.join(module), '    ' + '\n    '.join(function)
+
+
+try:
+    from inspect import getcallargs
+except ImportError:
+    def getcallargs(func, *arg_values, **kwd_values):
+        all = {}
+        args, varargs, kwds, defaults = inspect.getargspec(func)
+        if varargs is not None:
+            all[varargs] = arg_values[len(args):]
+        for name, value in zip(args, arg_values):
+            all[name] = value
         for name, value in list(kwd_values.items()):
-            if name in args: 
-                if name in all: 
-                    raise TypeError("Duplicate argument %s" % name) 
-                all[name] = kwd_values.pop(name) 
-        if kwds is not None: 
-            all[kwds] = kwd_values 
-        elif kwd_values: 
+            if name in args:
+                if name in all:
+                    raise TypeError("Duplicate argument %s" % name)
+                all[name] = kwd_values.pop(name)
+        if kwds is not None:
+            all[kwds] = kwd_values
+        elif kwd_values:
             raise TypeError("Unexpected keyword arguments: %s" % list(kwd_values))
-        if defaults is None: 
-            defaults = () 
-        first_default = len(args) - len(defaults) 
-        for ix, name in enumerate(args): 
-            if name not in all: 
-                if ix >= first_default: 
-                    all[name] = defaults[ix - first_default] 
-                else: 
-                    raise TypeError("Missing argument: %s" % name) 
-        return all 
- 
-
-def get_body(source): 
-    ix = source.index(':') 
-    if source[:5] == 'lambda': 
-        return "return %s" % source[ix+1:] 
-    else: 
-        return source[ix+1:] 
- 
-
-# Lots to be done here... It would be especially cool if compiled functions 
-# could invoke each other quickly. 
-class RuntimeCompiledFunction(object): 
- 
-    def __init__(self, f): 
-        self._f = f 
-        self._body = get_body(inspect.getsource(f)) 
- 
-    def __call__(self, *args, **kwds): 
-        all = getcallargs(self._f, *args, **kwds) 
+        if defaults is None:
+            defaults = ()
+        first_default = len(args) - len(defaults)
+        for ix, name in enumerate(args):
+            if name not in all:
+                if ix >= first_default:
+                    all[name] = defaults[ix - first_default]
+                else:
+                    raise TypeError("Missing argument: %s" % name)
+        return all
+
+
+def get_body(source):
+    ix = source.index(':')
+    if source[:5] == 'lambda':
+        return "return %s" % source[ix+1:]
+    else:
+        return source[ix+1:]
+
+
+# Lots to be done here... It would be especially cool if compiled functions
+# could invoke each other quickly.
+class RuntimeCompiledFunction(object):
+
+    def __init__(self, f):
+        self._f = f
+        self._body = get_body(inspect.getsource(f))
+
+    def __call__(self, *args, **kwds):
+        all = getcallargs(self._f, *args, **kwds)
         if IS_PY3:
             return cython_inline(self._body, locals=self._f.__globals__, globals=self._f.__globals__, **all)
         else:
diff --git a/contrib/tools/cython/Cython/Build/IpythonMagic.py b/contrib/tools/cython/Cython/Build/IpythonMagic.py
index 4b513bfd67..7abb97ec70 100644
--- a/contrib/tools/cython/Cython/Build/IpythonMagic.py
+++ b/contrib/tools/cython/Cython/Build/IpythonMagic.py
@@ -1,93 +1,93 @@
-# -*- coding: utf-8 -*- 
-""" 
-===================== 
-Cython related magics 
-===================== 
- 
-Magic command interface for interactive work with Cython 
- 
-.. note:: 
- 
-  The ``Cython`` package needs to be installed separately. It 
-  can be obtained using ``easy_install`` or ``pip``. 
- 
-Usage 
-===== 
- 
+# -*- coding: utf-8 -*-
+"""
+=====================
+Cython related magics
+=====================
+
+Magic command interface for interactive work with Cython
+
+.. note::
+
+  The ``Cython`` package needs to be installed separately. It
+  can be obtained using ``easy_install`` or ``pip``.
+
+Usage
+=====
+
 To enable the magics below, execute ``%load_ext cython``.
- 
-``%%cython`` 
- 
-{CYTHON_DOC} 
- 
-``%%cython_inline`` 
- 
-{CYTHON_INLINE_DOC} 
- 
-``%%cython_pyximport`` 
- 
-{CYTHON_PYXIMPORT_DOC} 
- 
-Author: 
-* Brian Granger 
- 
-Code moved from IPython and adapted by: 
-* Martín Gaitán 
- 
-Parts of this code were taken from Cython.inline. 
-""" 
-#----------------------------------------------------------------------------- 
-# Copyright (C) 2010-2011, IPython Development Team. 
-# 
-# Distributed under the terms of the Modified BSD License. 
-# 
+
+``%%cython``
+
+{CYTHON_DOC}
+
+``%%cython_inline``
+
+{CYTHON_INLINE_DOC}
+
+``%%cython_pyximport``
+
+{CYTHON_PYXIMPORT_DOC}
+
+Author:
+* Brian Granger
+
+Code moved from IPython and adapted by:
+* Martín Gaitán
+
+Parts of this code were taken from Cython.inline.
+"""
+#-----------------------------------------------------------------------------
+# Copyright (C) 2010-2011, IPython Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
 # The full license is in the file ipython-COPYING.rst, distributed with this software.
-#----------------------------------------------------------------------------- 
- 
-from __future__ import absolute_import, print_function 
- 
-import imp 
-import io 
-import os 
-import re 
-import sys 
-import time 
+#-----------------------------------------------------------------------------
+
+from __future__ import absolute_import, print_function
+
+import imp
+import io
+import os
+import re
+import sys
+import time
 import copy
 import distutils.log
 import textwrap
- 
+
 IO_ENCODING = sys.getfilesystemencoding()
 IS_PY2 = sys.version_info[0] < 3
 
-try: 
-    reload 
-except NameError:   # Python 3 
-    from imp import reload 
- 
-try: 
-    import hashlib 
-except ImportError: 
-    import md5 as hashlib 
- 
-from distutils.core import Distribution, Extension 
-from distutils.command.build_ext import build_ext 
- 
-from IPython.core import display 
-from IPython.core import magic_arguments 
-from IPython.core.magic import Magics, magics_class, cell_magic 
+try:
+    reload
+except NameError:   # Python 3
+    from imp import reload
+
+try:
+    import hashlib
+except ImportError:
+    import md5 as hashlib
+
+from distutils.core import Distribution, Extension
+from distutils.command.build_ext import build_ext
+
+from IPython.core import display
+from IPython.core import magic_arguments
+from IPython.core.magic import Magics, magics_class, cell_magic
 try:
     from IPython.paths import get_ipython_cache_dir
 except ImportError:
     # older IPython version
     from IPython.utils.path import get_ipython_cache_dir
-from IPython.utils.text import dedent 
- 
-from ..Shadow import __version__ as cython_version 
-from ..Compiler.Errors import CompileError 
-from .Inline import cython_inline 
-from .Dependencies import cythonize 
- 
- 
+from IPython.utils.text import dedent
+
+from ..Shadow import __version__ as cython_version
+from ..Compiler.Errors import CompileError
+from .Inline import cython_inline
+from .Dependencies import cythonize
+
+
 PGO_CONFIG = {
     'gcc': {
         'gen': ['-fprofile-generate', '-fprofile-dir={TEMPDIR}'],
@@ -110,22 +110,22 @@ else:
         return name
 
 
-@magics_class 
-class CythonMagics(Magics): 
- 
-    def __init__(self, shell): 
+@magics_class
+class CythonMagics(Magics):
+
+    def __init__(self, shell):
         super(CythonMagics, self).__init__(shell)
-        self._reloads = {} 
-        self._code_cache = {} 
-        self._pyximport_installed = False 
- 
-    def _import_all(self, module): 
+        self._reloads = {}
+        self._code_cache = {}
+        self._pyximport_installed = False
+
+    def _import_all(self, module):
         mdict = module.__dict__
         if '__all__' in mdict:
             keys = mdict['__all__']
         else:
             keys = [k for k in mdict if not k.startswith('_')]
- 
+
         for k in keys:
             try:
                 self.shell.push({k: mdict[k]})
@@ -133,65 +133,65 @@ class CythonMagics(Magics):
                 msg = "'module' object has no attribute '%s'" % k
                 raise AttributeError(msg)
 
-    @cell_magic 
-    def cython_inline(self, line, cell): 
-        """Compile and run a Cython code cell using Cython.inline. 
- 
-        This magic simply passes the body of the cell to Cython.inline 
-        and returns the result. If the variables `a` and `b` are defined 
-        in the user's namespace, here is a simple example that returns 
-        their sum:: 
- 
-            %%cython_inline 
-            return a+b 
- 
-        For most purposes, we recommend the usage of the `%%cython` magic. 
-        """ 
-        locs = self.shell.user_global_ns 
-        globs = self.shell.user_ns 
-        return cython_inline(cell, locals=locs, globals=globs) 
- 
-    @cell_magic 
-    def cython_pyximport(self, line, cell): 
-        """Compile and import a Cython code cell using pyximport. 
- 
-        The contents of the cell are written to a `.pyx` file in the current 
-        working directory, which is then imported using `pyximport`. This 
-        magic requires a module name to be passed:: 
- 
-            %%cython_pyximport modulename 
-            def f(x): 
-                return 2.0*x 
- 
-        The compiled module is then imported and all of its symbols are 
-        injected into the user's namespace. For most purposes, we recommend 
-        the usage of the `%%cython` magic. 
-        """ 
-        module_name = line.strip() 
-        if not module_name: 
-            raise ValueError('module name must be given') 
-        fname = module_name + '.pyx' 
-        with io.open(fname, 'w', encoding='utf-8') as f: 
-            f.write(cell) 
-        if 'pyximport' not in sys.modules or not self._pyximport_installed: 
-            import pyximport 
+    @cell_magic
+    def cython_inline(self, line, cell):
+        """Compile and run a Cython code cell using Cython.inline.
+
+        This magic simply passes the body of the cell to Cython.inline
+        and returns the result. If the variables `a` and `b` are defined
+        in the user's namespace, here is a simple example that returns
+        their sum::
+
+            %%cython_inline
+            return a+b
+
+        For most purposes, we recommend the usage of the `%%cython` magic.
+        """
+        locs = self.shell.user_global_ns
+        globs = self.shell.user_ns
+        return cython_inline(cell, locals=locs, globals=globs)
+
+    @cell_magic
+    def cython_pyximport(self, line, cell):
+        """Compile and import a Cython code cell using pyximport.
+
+        The contents of the cell are written to a `.pyx` file in the current
+        working directory, which is then imported using `pyximport`. This
+        magic requires a module name to be passed::
+
+            %%cython_pyximport modulename
+            def f(x):
+                return 2.0*x
+
+        The compiled module is then imported and all of its symbols are
+        injected into the user's namespace. For most purposes, we recommend
+        the usage of the `%%cython` magic.
+        """
+        module_name = line.strip()
+        if not module_name:
+            raise ValueError('module name must be given')
+        fname = module_name + '.pyx'
+        with io.open(fname, 'w', encoding='utf-8') as f:
+            f.write(cell)
+        if 'pyximport' not in sys.modules or not self._pyximport_installed:
+            import pyximport
             pyximport.install()
-            self._pyximport_installed = True 
-        if module_name in self._reloads: 
-            module = self._reloads[module_name] 
+            self._pyximport_installed = True
+        if module_name in self._reloads:
+            module = self._reloads[module_name]
             # Note: reloading extension modules is not actually supported
             # (requires PEP-489 reinitialisation support).
             # Don't know why this should ever have worked as it reads here.
             # All we really need to do is to update the globals below.
             #reload(module)
-        else: 
-            __import__(module_name) 
-            module = sys.modules[module_name] 
-            self._reloads[module_name] = module 
-        self._import_all(module) 
- 
-    @magic_arguments.magic_arguments() 
-    @magic_arguments.argument( 
+        else:
+            __import__(module_name)
+            module = sys.modules[module_name]
+            self._reloads[module_name] = module
+        self._import_all(module)
+
+    @magic_arguments.magic_arguments()
+    @magic_arguments.argument(
         '-a', '--annotate', action='store_true', default=False,
         help="Produce a colorized HTML version of the source."
     )
@@ -213,69 +213,69 @@ class CythonMagics(Magics):
              "previously compiled."
     )
     @magic_arguments.argument(
-        '-c', '--compile-args', action='append', default=[], 
-        help="Extra flags to pass to compiler via the `extra_compile_args` " 
-             "Extension flag (can be specified  multiple times)." 
-    ) 
-    @magic_arguments.argument( 
-        '--link-args', action='append', default=[], 
-        help="Extra flags to pass to linker via the `extra_link_args` " 
-             "Extension flag (can be specified  multiple times)." 
-    ) 
-    @magic_arguments.argument( 
-        '-l', '--lib', action='append', default=[], 
-        help="Add a library to link the extension against (can be specified " 
-             "multiple times)." 
-    ) 
-    @magic_arguments.argument( 
-        '-n', '--name', 
-        help="Specify a name for the Cython module." 
-    ) 
-    @magic_arguments.argument( 
-        '-L', dest='library_dirs', metavar='dir', action='append', default=[], 
+        '-c', '--compile-args', action='append', default=[],
+        help="Extra flags to pass to compiler via the `extra_compile_args` "
+             "Extension flag (can be specified  multiple times)."
+    )
+    @magic_arguments.argument(
+        '--link-args', action='append', default=[],
+        help="Extra flags to pass to linker via the `extra_link_args` "
+             "Extension flag (can be specified  multiple times)."
+    )
+    @magic_arguments.argument(
+        '-l', '--lib', action='append', default=[],
+        help="Add a library to link the extension against (can be specified "
+             "multiple times)."
+    )
+    @magic_arguments.argument(
+        '-n', '--name',
+        help="Specify a name for the Cython module."
+    )
+    @magic_arguments.argument(
+        '-L', dest='library_dirs', metavar='dir', action='append', default=[],
         help="Add a path to the list of library directories (can be specified "
-             "multiple times)." 
-    ) 
-    @magic_arguments.argument( 
-        '-I', '--include', action='append', default=[], 
-        help="Add a path to the list of include directories (can be specified " 
-             "multiple times)." 
-    ) 
-    @magic_arguments.argument( 
+             "multiple times)."
+    )
+    @magic_arguments.argument(
+        '-I', '--include', action='append', default=[],
+        help="Add a path to the list of include directories (can be specified "
+             "multiple times)."
+    )
+    @magic_arguments.argument(
         '-S', '--src', action='append', default=[],
         help="Add a path to the list of src files (can be specified "
              "multiple times)."
-    ) 
-    @magic_arguments.argument( 
+    )
+    @magic_arguments.argument(
         '--pgo', dest='pgo', action='store_true', default=False,
         help=("Enable profile guided optimisation in the C compiler. "
               "Compiles the cell twice and executes it in between to generate a runtime profile.")
-    ) 
-    @magic_arguments.argument( 
+    )
+    @magic_arguments.argument(
         '--verbose', dest='quiet', action='store_false', default=True,
         help=("Print debug information like generated .c/.cpp file location "
               "and exact gcc/g++ command invoked.")
-    ) 
-    @cell_magic 
-    def cython(self, line, cell): 
-        """Compile and import everything from a Cython code cell. 
- 
-        The contents of the cell are written to a `.pyx` file in the 
-        directory `IPYTHONDIR/cython` using a filename with the hash of the 
-        code. This file is then cythonized and compiled. The resulting module 
-        is imported and all of its symbols are injected into the user's 
-        namespace. The usage is similar to that of `%%cython_pyximport` but 
-        you don't have to pass a module name:: 
- 
-            %%cython 
-            def f(x): 
-                return 2.0*x 
- 
-        To compile OpenMP codes, pass the required  `--compile-args` 
-        and `--link-args`.  For example with gcc:: 
- 
-            %%cython --compile-args=-fopenmp --link-args=-fopenmp 
-            ... 
+    )
+    @cell_magic
+    def cython(self, line, cell):
+        """Compile and import everything from a Cython code cell.
+
+        The contents of the cell are written to a `.pyx` file in the
+        directory `IPYTHONDIR/cython` using a filename with the hash of the
+        code. This file is then cythonized and compiled. The resulting module
+        is imported and all of its symbols are injected into the user's
+        namespace. The usage is similar to that of `%%cython_pyximport` but
+        you don't have to pass a module name::
+
+            %%cython
+            def f(x):
+                return 2.0*x
+
+        To compile OpenMP codes, pass the required  `--compile-args`
+        and `--link-args`.  For example with gcc::
+
+            %%cython --compile-args=-fopenmp --link-args=-fopenmp
+            ...
 
         To enable profile guided optimisation, pass the ``--pgo`` option.
         Note that the cell itself needs to take care of establishing a suitable
@@ -298,46 +298,46 @@ class CythonMagics(Magics):
 
             if "_pgo_" in __name__:
                 ...  # execute critical code here
-        """ 
-        args = magic_arguments.parse_argstring(self.cython, line) 
+        """
+        args = magic_arguments.parse_argstring(self.cython, line)
         code = cell if cell.endswith('\n') else cell + '\n'
-        lib_dir = os.path.join(get_ipython_cache_dir(), 'cython') 
+        lib_dir = os.path.join(get_ipython_cache_dir(), 'cython')
         key = (code, line, sys.version_info, sys.executable, cython_version)
- 
-        if not os.path.exists(lib_dir): 
-            os.makedirs(lib_dir) 
- 
+
+        if not os.path.exists(lib_dir):
+            os.makedirs(lib_dir)
+
         if args.pgo:
             key += ('pgo',)
-        if args.force: 
-            # Force a new module name by adding the current time to the 
-            # key which is hashed to determine the module name. 
+        if args.force:
+            # Force a new module name by adding the current time to the
+            # key which is hashed to determine the module name.
             key += (time.time(),)
- 
-        if args.name: 
+
+        if args.name:
             module_name = str(args.name)  # no-op in Py3
-        else: 
-            module_name = "_cython_magic_" + hashlib.md5(str(key).encode('utf-8')).hexdigest() 
+        else:
+            module_name = "_cython_magic_" + hashlib.md5(str(key).encode('utf-8')).hexdigest()
         html_file = os.path.join(lib_dir, module_name + '.html')
-        module_path = os.path.join(lib_dir, module_name + self.so_ext) 
- 
-        have_module = os.path.isfile(module_path) 
+        module_path = os.path.join(lib_dir, module_name + self.so_ext)
+
+        have_module = os.path.isfile(module_path)
         need_cythonize = args.pgo or not have_module
- 
-        if args.annotate: 
-            if not os.path.isfile(html_file): 
-                need_cythonize = True 
- 
+
+        if args.annotate:
+            if not os.path.isfile(html_file):
+                need_cythonize = True
+
         extension = None
-        if need_cythonize: 
+        if need_cythonize:
             extensions = self._cythonize(module_name, code, lib_dir, args, quiet=args.quiet)
             if extensions is None:
                 # Compilation failed and printed error message
                 return None
             assert len(extensions) == 1
             extension = extensions[0]
-            self._code_cache[key] = module_name 
- 
+            self._code_cache[key] = module_name
+
             if args.pgo:
                 self._profile_pgo_wrapper(extension, lib_dir)
 
@@ -348,24 +348,24 @@ class CythonMagics(Magics):
             # Build failed and printed error message
             return None
 
-        module = imp.load_dynamic(module_name, module_path) 
-        self._import_all(module) 
- 
-        if args.annotate: 
-            try: 
-                with io.open(html_file, encoding='utf-8') as f: 
-                    annotated_html = f.read() 
-            except IOError as e: 
-                # File could not be opened. Most likely the user has a version 
-                # of Cython before 0.15.1 (when `cythonize` learned the 
-                # `force` keyword argument) and has already compiled this 
-                # exact source without annotation. 
-                print('Cython completed successfully but the annotated ' 
-                      'source could not be read.', file=sys.stderr) 
-                print(e, file=sys.stderr) 
-            else: 
-                return display.HTML(self.clean_annotated_html(annotated_html)) 
- 
+        module = imp.load_dynamic(module_name, module_path)
+        self._import_all(module)
+
+        if args.annotate:
+            try:
+                with io.open(html_file, encoding='utf-8') as f:
+                    annotated_html = f.read()
+            except IOError as e:
+                # File could not be opened. Most likely the user has a version
+                # of Cython before 0.15.1 (when `cythonize` learned the
+                # `force` keyword argument) and has already compiled this
+                # exact source without annotation.
+                print('Cython completed successfully but the annotated '
+                      'source could not be read.', file=sys.stderr)
+                print(e, file=sys.stderr)
+            else:
+                return display.HTML(self.clean_annotated_html(annotated_html))
+
     def _profile_pgo_wrapper(self, extension, lib_dir):
         """
         Generate a .c file for a separate extension module that calls the
@@ -489,37 +489,37 @@ class CythonMagics(Magics):
                   file=sys.stderr)
         return orig_flags
 
-    @property 
-    def so_ext(self): 
-        """The extension suffix for compiled modules.""" 
-        try: 
-            return self._so_ext 
-        except AttributeError: 
-            self._so_ext = self._get_build_extension().get_ext_filename('') 
-            return self._so_ext 
- 
-    def _clear_distutils_mkpath_cache(self): 
-        """clear distutils mkpath cache 
- 
-        prevents distutils from skipping re-creation of dirs that have been removed 
-        """ 
-        try: 
-            from distutils.dir_util import _path_created 
-        except ImportError: 
-            pass 
-        else: 
-            _path_created.clear() 
- 
+    @property
+    def so_ext(self):
+        """The extension suffix for compiled modules."""
+        try:
+            return self._so_ext
+        except AttributeError:
+            self._so_ext = self._get_build_extension().get_ext_filename('')
+            return self._so_ext
+
+    def _clear_distutils_mkpath_cache(self):
+        """clear distutils mkpath cache
+
+        prevents distutils from skipping re-creation of dirs that have been removed
+        """
+        try:
+            from distutils.dir_util import _path_created
+        except ImportError:
+            pass
+        else:
+            _path_created.clear()
+
     def _get_build_extension(self, extension=None, lib_dir=None, temp_dir=None,
                              pgo_step_name=None, _build_ext=build_ext):
-        self._clear_distutils_mkpath_cache() 
-        dist = Distribution() 
-        config_files = dist.find_config_files() 
-        try: 
-            config_files.remove('setup.cfg') 
-        except ValueError: 
-            pass 
-        dist.parse_config_files(config_files) 
+        self._clear_distutils_mkpath_cache()
+        dist = Distribution()
+        config_files = dist.find_config_files()
+        try:
+            config_files.remove('setup.cfg')
+        except ValueError:
+            pass
+        dist.parse_config_files(config_files)
 
         if not temp_dir:
             temp_dir = lib_dir
@@ -533,7 +533,7 @@ class CythonMagics(Magics):
                     base_build_ext.build_extensions(self)
 
         build_extension = _build_ext(dist)
-        build_extension.finalize_options() 
+        build_extension.finalize_options()
         if temp_dir:
             temp_dir = encode_fs(temp_dir)
             build_extension.build_temp = temp_dir
@@ -542,24 +542,24 @@ class CythonMagics(Magics):
             build_extension.build_lib = lib_dir
         if extension is not None:
             build_extension.extensions = [extension]
-        return build_extension 
- 
-    @staticmethod 
-    def clean_annotated_html(html): 
-        """Clean up the annotated HTML source. 
- 
-        Strips the link to the generated C or C++ file, which we do not 
-        present to the user. 
-        """ 
-        r = re.compile('<p>Raw output: <a href="(.*)">(.*)</a>') 
-        html = '\n'.join(l for l in html.splitlines() if not r.match(l)) 
-        return html 
- 
-__doc__ = __doc__.format( 
+        return build_extension
+
+    @staticmethod
+    def clean_annotated_html(html):
+        """Clean up the annotated HTML source.
+
+        Strips the link to the generated C or C++ file, which we do not
+        present to the user.
+        """
+        r = re.compile('<p>Raw output: <a href="(.*)">(.*)</a>')
+        html = '\n'.join(l for l in html.splitlines() if not r.match(l))
+        return html
+
+__doc__ = __doc__.format(
     # rST doesn't see the -+ flag as part of an option list, so we
     # hide it from the module-level docstring.
     CYTHON_DOC=dedent(CythonMagics.cython.__doc__\
                                   .replace('-+, --cplus', '--cplus    ')),
     CYTHON_INLINE_DOC=dedent(CythonMagics.cython_inline.__doc__),
     CYTHON_PYXIMPORT_DOC=dedent(CythonMagics.cython_pyximport.__doc__),
-) 
+)
diff --git a/contrib/tools/cython/Cython/Build/Tests/TestInline.py b/contrib/tools/cython/Cython/Build/Tests/TestInline.py
index 0877dee03f..d209488083 100644
--- a/contrib/tools/cython/Cython/Build/Tests/TestInline.py
+++ b/contrib/tools/cython/Cython/Build/Tests/TestInline.py
@@ -1,71 +1,71 @@
-import os, tempfile 
-from Cython.Shadow import inline 
-from Cython.Build.Inline import safe_type 
-from Cython.TestUtils import CythonTest 
- 
-try: 
-    import numpy 
-    has_numpy = True 
-except: 
-    has_numpy = False 
- 
-test_kwds = dict(force=True, quiet=True) 
- 
-global_value = 100 
- 
-class TestInline(CythonTest): 
-    def setUp(self): 
-        CythonTest.setUp(self) 
-        self.test_kwds = dict(test_kwds) 
+import os, tempfile
+from Cython.Shadow import inline
+from Cython.Build.Inline import safe_type
+from Cython.TestUtils import CythonTest
+
+try:
+    import numpy
+    has_numpy = True
+except:
+    has_numpy = False
+
+test_kwds = dict(force=True, quiet=True)
+
+global_value = 100
+
+class TestInline(CythonTest):
+    def setUp(self):
+        CythonTest.setUp(self)
+        self.test_kwds = dict(test_kwds)
         if os.path.isdir('TEST_TMP'):
             lib_dir = os.path.join('TEST_TMP','inline')
-        else: 
-            lib_dir = tempfile.mkdtemp(prefix='cython_inline_') 
-        self.test_kwds['lib_dir'] = lib_dir 
- 
-    def test_simple(self): 
+        else:
+            lib_dir = tempfile.mkdtemp(prefix='cython_inline_')
+        self.test_kwds['lib_dir'] = lib_dir
+
+    def test_simple(self):
         self.assertEqual(inline("return 1+2", **self.test_kwds), 3)
- 
-    def test_types(self): 
+
+    def test_types(self):
         self.assertEqual(inline("""
-            cimport cython 
-            return cython.typeof(a), cython.typeof(b) 
-        """, a=1.0, b=[], **self.test_kwds), ('double', 'list object')) 
- 
-    def test_locals(self): 
-        a = 1 
-        b = 2 
+            cimport cython
+            return cython.typeof(a), cython.typeof(b)
+        """, a=1.0, b=[], **self.test_kwds), ('double', 'list object'))
+
+    def test_locals(self):
+        a = 1
+        b = 2
         self.assertEqual(inline("return a+b", **self.test_kwds), 3)
- 
-    def test_globals(self): 
+
+    def test_globals(self):
         self.assertEqual(inline("return global_value + 1", **self.test_kwds), global_value + 1)
- 
-    def test_no_return(self): 
+
+    def test_no_return(self):
         self.assertEqual(inline("""
-            a = 1 
-            cdef double b = 2 
-            cdef c = [] 
+            a = 1
+            cdef double b = 2
+            cdef c = []
         """, **self.test_kwds), dict(a=1, b=2.0, c=[]))
- 
-    def test_def_node(self): 
+
+    def test_def_node(self):
         foo = inline("def foo(x): return x * x", **self.test_kwds)['foo']
         self.assertEqual(foo(7), 49)
- 
+
     def test_class_ref(self):
         class Type(object):
             pass
         tp = inline("Type")['Type']
         self.assertEqual(tp, Type)
 
-    def test_pure(self): 
-        import cython as cy 
-        b = inline(""" 
-        b = cy.declare(float, a) 
-        c = cy.declare(cy.pointer(cy.float), &b) 
-        return b 
+    def test_pure(self):
+        import cython as cy
+        b = inline("""
+        b = cy.declare(float, a)
+        c = cy.declare(cy.pointer(cy.float), &b)
+        return b
         """, a=3, **self.test_kwds)
         self.assertEqual(type(b), float)
- 
+
     def test_compiler_directives(self):
         self.assertEqual(
             inline('return sum(x)',
@@ -86,11 +86,11 @@ class TestInline(CythonTest):
             2.5
         )
 
-    if has_numpy: 
- 
-        def test_numpy(self): 
-            import numpy 
-            a = numpy.ndarray((10, 20)) 
-            a[0,0] = 10 
+    if has_numpy:
+
+        def test_numpy(self):
+            import numpy
+            a = numpy.ndarray((10, 20))
+            a[0,0] = 10
             self.assertEqual(safe_type(a), 'numpy.ndarray[numpy.float64_t, ndim=2]')
             self.assertEqual(inline("return a[0,0]", a=a, **self.test_kwds), 10.0)
diff --git a/contrib/tools/cython/Cython/Build/Tests/TestIpythonMagic.py b/contrib/tools/cython/Cython/Build/Tests/TestIpythonMagic.py
index 1b67a970c6..24213091b2 100644
--- a/contrib/tools/cython/Cython/Build/Tests/TestIpythonMagic.py
+++ b/contrib/tools/cython/Cython/Build/Tests/TestIpythonMagic.py
@@ -1,17 +1,17 @@
-# -*- coding: utf-8 -*- 
-# tag: ipython 
- 
-"""Tests for the Cython magics extension.""" 
- 
+# -*- coding: utf-8 -*-
+# tag: ipython
+
+"""Tests for the Cython magics extension."""
+
 from __future__ import absolute_import
 
-import os 
-import sys 
+import os
+import sys
 from contextlib import contextmanager
 from Cython.Build import IpythonMagic
 from Cython.TestUtils import CythonTest
- 
-try: 
+
+try:
     import IPython.testing.globalipapp
 except ImportError:
     # Disable tests and fake helpers for initialisation below.
@@ -21,22 +21,22 @@ else:
     def skip_if_not_installed(c):
         return c
 
-try: 
+try:
     # disable IPython history thread before it gets started to avoid having to clean it up
-    from IPython.core.history import HistoryManager 
-    HistoryManager.enabled = False 
-except ImportError: 
-    pass 
- 
+    from IPython.core.history import HistoryManager
+    HistoryManager.enabled = False
+except ImportError:
+    pass
+
 code = u"""\
 def f(x):
-    return 2*x 
+    return 2*x
 """
- 
+
 cython3_code = u"""\
 def f(int x):
     return 2 / x
- 
+
 def call(x):
     return f(*(x,))
 """
@@ -48,72 +48,72 @@ main()
 """
 
 
-if sys.platform == 'win32': 
-    # not using IPython's decorators here because they depend on "nose" 
-    try: 
-        from unittest import skip as skip_win32 
-    except ImportError: 
-        # poor dev's silent @unittest.skip() 
+if sys.platform == 'win32':
+    # not using IPython's decorators here because they depend on "nose"
+    try:
+        from unittest import skip as skip_win32
+    except ImportError:
+        # poor dev's silent @unittest.skip()
         def skip_win32(dummy):
             def _skip_win32(func):
                 return None
             return _skip_win32
-else: 
+else:
     def skip_win32(dummy):
         def _skip_win32(func):
             def wrapper(*args, **kwargs):
                 func(*args, **kwargs)
             return wrapper
         return _skip_win32
- 
- 
+
+
 @skip_if_not_installed
-class TestIPythonMagic(CythonTest): 
- 
+class TestIPythonMagic(CythonTest):
+
     @classmethod
     def setUpClass(cls):
         CythonTest.setUpClass()
         cls._ip = IPython.testing.globalipapp.get_ipython()
 
-    def setUp(self): 
-        CythonTest.setUp(self) 
+    def setUp(self):
+        CythonTest.setUp(self)
         self._ip.extension_manager.load_extension('cython')
- 
-    def test_cython_inline(self): 
+
+    def test_cython_inline(self):
         ip = self._ip
-        ip.ex('a=10; b=20') 
-        result = ip.run_cell_magic('cython_inline', '', 'return a+b') 
-        self.assertEqual(result, 30) 
- 
+        ip.ex('a=10; b=20')
+        result = ip.run_cell_magic('cython_inline', '', 'return a+b')
+        self.assertEqual(result, 30)
+
     @skip_win32('Skip on Windows')
-    def test_cython_pyximport(self): 
+    def test_cython_pyximport(self):
         ip = self._ip
-        module_name = '_test_cython_pyximport' 
-        ip.run_cell_magic('cython_pyximport', module_name, code) 
-        ip.ex('g = f(10)') 
-        self.assertEqual(ip.user_ns['g'], 20.0) 
-        ip.run_cell_magic('cython_pyximport', module_name, code) 
-        ip.ex('h = f(-10)') 
-        self.assertEqual(ip.user_ns['h'], -20.0) 
-        try: 
-            os.remove(module_name + '.pyx') 
-        except OSError: 
-            pass 
- 
-    def test_cython(self): 
+        module_name = '_test_cython_pyximport'
+        ip.run_cell_magic('cython_pyximport', module_name, code)
+        ip.ex('g = f(10)')
+        self.assertEqual(ip.user_ns['g'], 20.0)
+        ip.run_cell_magic('cython_pyximport', module_name, code)
+        ip.ex('h = f(-10)')
+        self.assertEqual(ip.user_ns['h'], -20.0)
+        try:
+            os.remove(module_name + '.pyx')
+        except OSError:
+            pass
+
+    def test_cython(self):
         ip = self._ip
-        ip.run_cell_magic('cython', '', code) 
-        ip.ex('g = f(10)') 
-        self.assertEqual(ip.user_ns['g'], 20.0) 
- 
-    def test_cython_name(self): 
-        # The Cython module named 'mymodule' defines the function f. 
+        ip.run_cell_magic('cython', '', code)
+        ip.ex('g = f(10)')
+        self.assertEqual(ip.user_ns['g'], 20.0)
+
+    def test_cython_name(self):
+        # The Cython module named 'mymodule' defines the function f.
         ip = self._ip
-        ip.run_cell_magic('cython', '--name=mymodule', code) 
-        # This module can now be imported in the interactive namespace. 
-        ip.ex('import mymodule; g = mymodule.f(10)') 
-        self.assertEqual(ip.user_ns['g'], 20.0) 
- 
+        ip.run_cell_magic('cython', '--name=mymodule', code)
+        # This module can now be imported in the interactive namespace.
+        ip.ex('import mymodule; g = mymodule.f(10)')
+        self.assertEqual(ip.user_ns['g'], 20.0)
+
     def test_cython_language_level(self):
         # The Cython cell defines the functions f() and call().
         ip = self._ip
@@ -152,15 +152,15 @@ class TestIPythonMagic(CythonTest):
         self.assertEqual(ip.user_ns['h'], 2.0 / 10.0)
 
     @skip_win32('Skip on Windows')
-    def test_extlibs(self): 
+    def test_extlibs(self):
         ip = self._ip
         code = u"""
-from libc.math cimport sin 
-x = sin(0.0) 
+from libc.math cimport sin
+x = sin(0.0)
         """
-        ip.user_ns['x'] = 1 
-        ip.run_cell_magic('cython', '-l m', code) 
-        self.assertEqual(ip.user_ns['x'], 0) 
+        ip.user_ns['x'] = 1
+        ip.run_cell_magic('cython', '-l m', code)
+        self.assertEqual(ip.user_ns['x'], 0)
 
 
     def test_cython_verbose(self):
diff --git a/contrib/tools/cython/Cython/Build/Tests/TestStripLiterals.py b/contrib/tools/cython/Cython/Build/Tests/TestStripLiterals.py
index 3f4261128f..a7572a5083 100644
--- a/contrib/tools/cython/Cython/Build/Tests/TestStripLiterals.py
+++ b/contrib/tools/cython/Cython/Build/Tests/TestStripLiterals.py
@@ -1,57 +1,57 @@
-from Cython.Build.Dependencies import strip_string_literals 
- 
-from Cython.TestUtils import CythonTest 
- 
-class TestStripLiterals(CythonTest): 
- 
-    def t(self, before, expected): 
-        actual, literals = strip_string_literals(before, prefix="_L") 
+from Cython.Build.Dependencies import strip_string_literals
+
+from Cython.TestUtils import CythonTest
+
+class TestStripLiterals(CythonTest):
+
+    def t(self, before, expected):
+        actual, literals = strip_string_literals(before, prefix="_L")
         self.assertEqual(expected, actual)
-        for key, value in literals.items(): 
-            actual = actual.replace(key, value) 
+        for key, value in literals.items():
+            actual = actual.replace(key, value)
         self.assertEqual(before, actual)
- 
-    def test_empty(self): 
-        self.t("", "") 
- 
-    def test_single_quote(self): 
-        self.t("'x'", "'_L1_'") 
- 
-    def test_double_quote(self): 
-        self.t('"x"', '"_L1_"') 
- 
-    def test_nested_quotes(self): 
-        self.t(""" '"' "'" """, """ '_L1_' "_L2_" """) 
- 
-    def test_triple_quote(self): 
-        self.t(" '''a\n''' ", " '''_L1_''' ") 
- 
-    def test_backslash(self): 
-        self.t(r"'a\'b'", "'_L1_'") 
-        self.t(r"'a\\'", "'_L1_'") 
-        self.t(r"'a\\\'b'", "'_L1_'") 
- 
-    def test_unicode(self): 
-        self.t("u'abc'", "u'_L1_'") 
- 
-    def test_raw(self): 
-        self.t(r"r'abc\\'", "r'_L1_'") 
- 
-    def test_raw_unicode(self): 
-        self.t(r"ru'abc\\'", "ru'_L1_'") 
- 
-    def test_comment(self): 
-        self.t("abc # foo", "abc #_L1_") 
- 
-    def test_comment_and_quote(self): 
-        self.t("abc # 'x'", "abc #_L1_") 
-        self.t("'abc#'", "'_L1_'") 
- 
-    def test_include(self): 
-        self.t("include 'a.pxi' # something here", 
-               "include '_L1_' #_L2_") 
- 
-    def test_extern(self): 
-        self.t("cdef extern from 'a.h': # comment", 
-               "cdef extern from '_L1_': #_L2_") 
- 
+
+    def test_empty(self):
+        self.t("", "")
+
+    def test_single_quote(self):
+        self.t("'x'", "'_L1_'")
+
+    def test_double_quote(self):
+        self.t('"x"', '"_L1_"')
+
+    def test_nested_quotes(self):
+        self.t(""" '"' "'" """, """ '_L1_' "_L2_" """)
+
+    def test_triple_quote(self):
+        self.t(" '''a\n''' ", " '''_L1_''' ")
+
+    def test_backslash(self):
+        self.t(r"'a\'b'", "'_L1_'")
+        self.t(r"'a\\'", "'_L1_'")
+        self.t(r"'a\\\'b'", "'_L1_'")
+
+    def test_unicode(self):
+        self.t("u'abc'", "u'_L1_'")
+
+    def test_raw(self):
+        self.t(r"r'abc\\'", "r'_L1_'")
+
+    def test_raw_unicode(self):
+        self.t(r"ru'abc\\'", "ru'_L1_'")
+
+    def test_comment(self):
+        self.t("abc # foo", "abc #_L1_")
+
+    def test_comment_and_quote(self):
+        self.t("abc # 'x'", "abc #_L1_")
+        self.t("'abc#'", "'_L1_'")
+
+    def test_include(self):
+        self.t("include 'a.pxi' # something here",
+               "include '_L1_' #_L2_")
+
+    def test_extern(self):
+        self.t("cdef extern from 'a.h': # comment",
+               "cdef extern from '_L1_': #_L2_")
+
diff --git a/contrib/tools/cython/Cython/Build/Tests/__init__.py b/contrib/tools/cython/Cython/Build/Tests/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Build/Tests/__init__.py
+++ b/contrib/tools/cython/Cython/Build/Tests/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Build/__init__.py b/contrib/tools/cython/Cython/Build/__init__.py
index 265c27a045..d6f3986597 100644
--- a/contrib/tools/cython/Cython/Build/__init__.py
+++ b/contrib/tools/cython/Cython/Build/__init__.py
@@ -1,2 +1,2 @@
-from .Dependencies import cythonize 
+from .Dependencies import cythonize
 from .Distutils import build_ext
diff --git a/contrib/tools/cython/Cython/CodeWriter.py b/contrib/tools/cython/Cython/CodeWriter.py
index e5e8fe15e1..2e4646a654 100644
--- a/contrib/tools/cython/Cython/CodeWriter.py
+++ b/contrib/tools/cython/Cython/CodeWriter.py
@@ -1,524 +1,524 @@
-""" 
-Serializes a Cython code tree to Cython code. This is primarily useful for 
-debugging and testing purposes. 
- 
-The output is in a strict format, no whitespace or comments from the input 
-is preserved (and it could not be as it is not present in the code tree). 
-""" 
- 
+"""
+Serializes a Cython code tree to Cython code. This is primarily useful for
+debugging and testing purposes.
+
+The output is in a strict format, no whitespace or comments from the input
+is preserved (and it could not be as it is not present in the code tree).
+"""
+
 from __future__ import absolute_import, print_function
- 
+
 from .Compiler.Visitor import TreeVisitor
 from .Compiler.ExprNodes import *
 
 
-class LinesResult(object): 
-    def __init__(self): 
-        self.lines = [] 
-        self.s = u"" 
- 
-    def put(self, s): 
-        self.s += s 
- 
-    def newline(self): 
-        self.lines.append(self.s) 
-        self.s = u"" 
- 
-    def putline(self, s): 
-        self.put(s) 
-        self.newline() 
- 
-class DeclarationWriter(TreeVisitor): 
- 
-    indent_string = u"    " 
- 
+class LinesResult(object):
+    def __init__(self):
+        self.lines = []
+        self.s = u""
+
+    def put(self, s):
+        self.s += s
+
+    def newline(self):
+        self.lines.append(self.s)
+        self.s = u""
+
+    def putline(self, s):
+        self.put(s)
+        self.newline()
+
+class DeclarationWriter(TreeVisitor):
+
+    indent_string = u"    "
+
     def __init__(self, result=None):
-        super(DeclarationWriter, self).__init__() 
-        if result is None: 
-            result = LinesResult() 
-        self.result = result 
-        self.numindents = 0 
-        self.tempnames = {} 
-        self.tempblockindex = 0 
- 
-    def write(self, tree): 
-        self.visit(tree) 
-        return self.result 
- 
-    def indent(self): 
-        self.numindents += 1 
- 
-    def dedent(self): 
-        self.numindents -= 1 
- 
+        super(DeclarationWriter, self).__init__()
+        if result is None:
+            result = LinesResult()
+        self.result = result
+        self.numindents = 0
+        self.tempnames = {}
+        self.tempblockindex = 0
+
+    def write(self, tree):
+        self.visit(tree)
+        return self.result
+
+    def indent(self):
+        self.numindents += 1
+
+    def dedent(self):
+        self.numindents -= 1
+
     def startline(self, s=u""):
-        self.result.put(self.indent_string * self.numindents + s) 
- 
-    def put(self, s): 
-        self.result.put(s) 
- 
-    def putline(self, s): 
-        self.result.putline(self.indent_string * self.numindents + s) 
- 
+        self.result.put(self.indent_string * self.numindents + s)
+
+    def put(self, s):
+        self.result.put(s)
+
+    def putline(self, s):
+        self.result.putline(self.indent_string * self.numindents + s)
+
     def endline(self, s=u""):
-        self.result.putline(s) 
- 
-    def line(self, s): 
-        self.startline(s) 
-        self.endline() 
- 
-    def comma_separated_list(self, items, output_rhs=False): 
-        if len(items) > 0: 
-            for item in items[:-1]: 
-                self.visit(item) 
-                if output_rhs and item.default is not None: 
-                    self.put(u" = ") 
-                    self.visit(item.default) 
-                self.put(u", ") 
-            self.visit(items[-1]) 
- 
-    def visit_Node(self, node): 
-        raise AssertionError("Node not handled by serializer: %r" % node) 
- 
-    def visit_ModuleNode(self, node): 
-        self.visitchildren(node) 
- 
-    def visit_StatListNode(self, node): 
-        self.visitchildren(node) 
-
-    def visit_CDefExternNode(self, node): 
-        if node.include_file is None: 
-            file = u'*' 
-        else: 
-            file = u'"%s"' % node.include_file 
-        self.putline(u"cdef extern from %s:" % file) 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
- 
-    def visit_CPtrDeclaratorNode(self, node): 
-        self.put('*') 
-        self.visit(node.base) 
- 
-    def visit_CReferenceDeclaratorNode(self, node): 
-        self.put('&') 
-        self.visit(node.base) 
- 
-    def visit_CArrayDeclaratorNode(self, node): 
-        self.visit(node.base) 
-        self.put(u'[') 
-        if node.dimension is not None: 
-            self.visit(node.dimension) 
-        self.put(u']') 
- 
-    def visit_CArrayDeclaratorNode(self, node): 
-        self.visit(node.base) 
-        self.put(u'[') 
-        if node.dimension is not None: 
-            self.visit(node.dimension) 
-        self.put(u']') 
- 
-    def visit_CFuncDeclaratorNode(self, node): 
-        # TODO: except, gil, etc. 
-        self.visit(node.base) 
-        self.put(u'(') 
-        self.comma_separated_list(node.args) 
-        self.endline(u')') 
- 
-    def visit_CNameDeclaratorNode(self, node): 
-        self.put(node.name) 
- 
-    def visit_CSimpleBaseTypeNode(self, node): 
-        # See Parsing.p_sign_and_longness 
-        if node.is_basic_c_type: 
-            self.put(("unsigned ", "", "signed ")[node.signed]) 
-            if node.longness < 0: 
-                self.put("short " * -node.longness) 
-            elif node.longness > 0: 
-                self.put("long " * node.longness) 
-        self.put(node.name) 
- 
-    def visit_CComplexBaseTypeNode(self, node): 
-        self.put(u'(') 
-        self.visit(node.base_type) 
-        self.visit(node.declarator) 
-        self.put(u')') 
- 
-    def visit_CNestedBaseTypeNode(self, node): 
-        self.visit(node.base_type) 
-        self.put(u'.') 
-        self.put(node.name) 
- 
-    def visit_TemplatedTypeNode(self, node): 
-        self.visit(node.base_type_node) 
-        self.put(u'[') 
-        self.comma_separated_list(node.positional_args + node.keyword_args.key_value_pairs) 
-        self.put(u']') 
- 
-    def visit_CVarDefNode(self, node): 
-        self.startline(u"cdef ") 
-        self.visit(node.base_type) 
-        self.put(u" ") 
-        self.comma_separated_list(node.declarators, output_rhs=True) 
-        self.endline() 
- 
-    def visit_container_node(self, node, decl, extras, attributes): 
-        # TODO: visibility 
-        self.startline(decl) 
-        if node.name: 
-            self.put(u' ') 
-            self.put(node.name) 
-            if node.cname is not None: 
-                self.put(u' "%s"' % node.cname) 
-        if extras: 
-            self.put(extras) 
-        self.endline(':') 
-        self.indent() 
-        if not attributes: 
-            self.putline('pass') 
-        else: 
-            for attribute in attributes: 
-                self.visit(attribute) 
-        self.dedent() 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        if node.typedef_flag: 
-            decl = u'ctypedef ' 
-        else: 
-            decl = u'cdef ' 
-        if node.visibility == 'public': 
-            decl += u'public ' 
-        if node.packed: 
-            decl += u'packed ' 
-        decl += node.kind 
-        self.visit_container_node(node, decl, None, node.attributes) 
- 
-    def visit_CppClassNode(self, node): 
-        extras = "" 
-        if node.templates: 
-            extras = u"[%s]" % ", ".join(node.templates) 
-        if node.base_classes: 
-            extras += "(%s)" % ", ".join(node.base_classes) 
-        self.visit_container_node(node, u"cdef cppclass", extras, node.attributes) 
- 
-    def visit_CEnumDefNode(self, node): 
-        self.visit_container_node(node, u"cdef enum", None, node.items) 
- 
-    def visit_CEnumDefItemNode(self, node): 
-        self.startline(node.name) 
-        if node.cname: 
-            self.put(u' "%s"' % node.cname) 
-        if node.value: 
-            self.put(u" = ") 
-            self.visit(node.value) 
-        self.endline() 
- 
-    def visit_CClassDefNode(self, node): 
-        assert not node.module_name 
-        if node.decorators: 
-            for decorator in node.decorators: 
-                self.visit(decorator) 
-        self.startline(u"cdef class ") 
-        self.put(node.class_name) 
-        if node.base_class_name: 
-            self.put(u"(") 
-            if node.base_class_module: 
-                self.put(node.base_class_module) 
-                self.put(u".") 
-            self.put(node.base_class_name) 
-            self.put(u")") 
-        self.endline(u":") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
- 
-    def visit_CTypeDefNode(self, node): 
-        self.startline(u"ctypedef ") 
-        self.visit(node.base_type) 
-        self.put(u" ") 
-        self.visit(node.declarator) 
-        self.endline() 
- 
-    def visit_FuncDefNode(self, node): 
-        self.startline(u"def %s(" % node.name) 
-        self.comma_separated_list(node.args) 
-        self.endline(u"):") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
- 
-    def visit_CArgDeclNode(self, node): 
-        if node.base_type.name is not None: 
-            self.visit(node.base_type) 
-            self.put(u" ") 
-        self.visit(node.declarator) 
-        if node.default is not None: 
-            self.put(u" = ") 
-            self.visit(node.default) 
- 
-    def visit_CImportStatNode(self, node): 
-        self.startline(u"cimport ") 
-        self.put(node.module_name) 
-        if node.as_name: 
-            self.put(u" as ") 
-            self.put(node.as_name) 
-        self.endline() 
- 
-    def visit_FromCImportStatNode(self, node): 
-        self.startline(u"from ") 
-        self.put(node.module_name) 
-        self.put(u" cimport ") 
-        first = True 
-        for pos, name, as_name, kind in node.imported_names: 
-            assert kind is None 
-            if first: 
-                first = False 
-            else: 
-                self.put(u", ") 
-            self.put(name) 
-            if as_name: 
-                self.put(u" as ") 
-                self.put(as_name) 
-        self.endline() 
- 
-    def visit_NameNode(self, node): 
-        self.put(node.name) 
- 
-    def visit_IntNode(self, node): 
-        self.put(node.value) 
- 
-    def visit_NoneNode(self, node): 
-        self.put(u"None") 
- 
-    def visit_NotNode(self, node): 
-        self.put(u"(not ") 
-        self.visit(node.operand) 
-        self.put(u")") 
- 
-    def visit_DecoratorNode(self, node): 
-        self.startline("@") 
-        self.visit(node.decorator) 
-        self.endline() 
- 
-    def visit_BinopNode(self, node): 
-        self.visit(node.operand1) 
-        self.put(u" %s " % node.operator) 
-        self.visit(node.operand2) 
- 
-    def visit_AttributeNode(self, node): 
-        self.visit(node.obj) 
-        self.put(u".%s" % node.attribute) 
- 
-    def visit_BoolNode(self, node): 
-        self.put(str(node.value)) 
- 
-    # FIXME: represent string nodes correctly 
-    def visit_StringNode(self, node): 
-        value = node.value 
-        if value.encoding is not None: 
-            value = value.encode(value.encoding) 
-        self.put(repr(value)) 
- 
-    def visit_PassStatNode(self, node): 
-        self.startline(u"pass") 
-        self.endline() 
- 
-class CodeWriter(DeclarationWriter): 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        self.startline() 
-        self.visit(node.lhs) 
-        self.put(u" = ") 
-        self.visit(node.rhs) 
-        self.endline() 
- 
-    def visit_CascadedAssignmentNode(self, node): 
-        self.startline() 
-        for lhs in node.lhs_list: 
-            self.visit(lhs) 
-            self.put(u" = ") 
-        self.visit(node.rhs) 
-        self.endline() 
- 
-    def visit_PrintStatNode(self, node): 
-        self.startline(u"print ") 
-        self.comma_separated_list(node.arg_tuple.args) 
-        if not node.append_newline: 
-            self.put(u",") 
-        self.endline() 
- 
-    def visit_ForInStatNode(self, node): 
-        self.startline(u"for ") 
-        self.visit(node.target) 
-        self.put(u" in ") 
-        self.visit(node.iterator.sequence) 
-        self.endline(u":") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
-        if node.else_clause is not None: 
-            self.line(u"else:") 
-            self.indent() 
-            self.visit(node.else_clause) 
-            self.dedent() 
- 
-    def visit_IfStatNode(self, node): 
+        self.result.putline(s)
+
+    def line(self, s):
+        self.startline(s)
+        self.endline()
+
+    def comma_separated_list(self, items, output_rhs=False):
+        if len(items) > 0:
+            for item in items[:-1]:
+                self.visit(item)
+                if output_rhs and item.default is not None:
+                    self.put(u" = ")
+                    self.visit(item.default)
+                self.put(u", ")
+            self.visit(items[-1])
+
+    def visit_Node(self, node):
+        raise AssertionError("Node not handled by serializer: %r" % node)
+
+    def visit_ModuleNode(self, node):
+        self.visitchildren(node)
+
+    def visit_StatListNode(self, node):
+        self.visitchildren(node)
+
+    def visit_CDefExternNode(self, node):
+        if node.include_file is None:
+            file = u'*'
+        else:
+            file = u'"%s"' % node.include_file
+        self.putline(u"cdef extern from %s:" % file)
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+
+    def visit_CPtrDeclaratorNode(self, node):
+        self.put('*')
+        self.visit(node.base)
+
+    def visit_CReferenceDeclaratorNode(self, node):
+        self.put('&')
+        self.visit(node.base)
+
+    def visit_CArrayDeclaratorNode(self, node):
+        self.visit(node.base)
+        self.put(u'[')
+        if node.dimension is not None:
+            self.visit(node.dimension)
+        self.put(u']')
+
+    def visit_CArrayDeclaratorNode(self, node):
+        self.visit(node.base)
+        self.put(u'[')
+        if node.dimension is not None:
+            self.visit(node.dimension)
+        self.put(u']')
+
+    def visit_CFuncDeclaratorNode(self, node):
+        # TODO: except, gil, etc.
+        self.visit(node.base)
+        self.put(u'(')
+        self.comma_separated_list(node.args)
+        self.endline(u')')
+
+    def visit_CNameDeclaratorNode(self, node):
+        self.put(node.name)
+
+    def visit_CSimpleBaseTypeNode(self, node):
+        # See Parsing.p_sign_and_longness
+        if node.is_basic_c_type:
+            self.put(("unsigned ", "", "signed ")[node.signed])
+            if node.longness < 0:
+                self.put("short " * -node.longness)
+            elif node.longness > 0:
+                self.put("long " * node.longness)
+        self.put(node.name)
+
+    def visit_CComplexBaseTypeNode(self, node):
+        self.put(u'(')
+        self.visit(node.base_type)
+        self.visit(node.declarator)
+        self.put(u')')
+
+    def visit_CNestedBaseTypeNode(self, node):
+        self.visit(node.base_type)
+        self.put(u'.')
+        self.put(node.name)
+
+    def visit_TemplatedTypeNode(self, node):
+        self.visit(node.base_type_node)
+        self.put(u'[')
+        self.comma_separated_list(node.positional_args + node.keyword_args.key_value_pairs)
+        self.put(u']')
+
+    def visit_CVarDefNode(self, node):
+        self.startline(u"cdef ")
+        self.visit(node.base_type)
+        self.put(u" ")
+        self.comma_separated_list(node.declarators, output_rhs=True)
+        self.endline()
+
+    def visit_container_node(self, node, decl, extras, attributes):
+        # TODO: visibility
+        self.startline(decl)
+        if node.name:
+            self.put(u' ')
+            self.put(node.name)
+            if node.cname is not None:
+                self.put(u' "%s"' % node.cname)
+        if extras:
+            self.put(extras)
+        self.endline(':')
+        self.indent()
+        if not attributes:
+            self.putline('pass')
+        else:
+            for attribute in attributes:
+                self.visit(attribute)
+        self.dedent()
+
+    def visit_CStructOrUnionDefNode(self, node):
+        if node.typedef_flag:
+            decl = u'ctypedef '
+        else:
+            decl = u'cdef '
+        if node.visibility == 'public':
+            decl += u'public '
+        if node.packed:
+            decl += u'packed '
+        decl += node.kind
+        self.visit_container_node(node, decl, None, node.attributes)
+
+    def visit_CppClassNode(self, node):
+        extras = ""
+        if node.templates:
+            extras = u"[%s]" % ", ".join(node.templates)
+        if node.base_classes:
+            extras += "(%s)" % ", ".join(node.base_classes)
+        self.visit_container_node(node, u"cdef cppclass", extras, node.attributes)
+
+    def visit_CEnumDefNode(self, node):
+        self.visit_container_node(node, u"cdef enum", None, node.items)
+
+    def visit_CEnumDefItemNode(self, node):
+        self.startline(node.name)
+        if node.cname:
+            self.put(u' "%s"' % node.cname)
+        if node.value:
+            self.put(u" = ")
+            self.visit(node.value)
+        self.endline()
+
+    def visit_CClassDefNode(self, node):
+        assert not node.module_name
+        if node.decorators:
+            for decorator in node.decorators:
+                self.visit(decorator)
+        self.startline(u"cdef class ")
+        self.put(node.class_name)
+        if node.base_class_name:
+            self.put(u"(")
+            if node.base_class_module:
+                self.put(node.base_class_module)
+                self.put(u".")
+            self.put(node.base_class_name)
+            self.put(u")")
+        self.endline(u":")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+
+    def visit_CTypeDefNode(self, node):
+        self.startline(u"ctypedef ")
+        self.visit(node.base_type)
+        self.put(u" ")
+        self.visit(node.declarator)
+        self.endline()
+
+    def visit_FuncDefNode(self, node):
+        self.startline(u"def %s(" % node.name)
+        self.comma_separated_list(node.args)
+        self.endline(u"):")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+
+    def visit_CArgDeclNode(self, node):
+        if node.base_type.name is not None:
+            self.visit(node.base_type)
+            self.put(u" ")
+        self.visit(node.declarator)
+        if node.default is not None:
+            self.put(u" = ")
+            self.visit(node.default)
+
+    def visit_CImportStatNode(self, node):
+        self.startline(u"cimport ")
+        self.put(node.module_name)
+        if node.as_name:
+            self.put(u" as ")
+            self.put(node.as_name)
+        self.endline()
+
+    def visit_FromCImportStatNode(self, node):
+        self.startline(u"from ")
+        self.put(node.module_name)
+        self.put(u" cimport ")
+        first = True
+        for pos, name, as_name, kind in node.imported_names:
+            assert kind is None
+            if first:
+                first = False
+            else:
+                self.put(u", ")
+            self.put(name)
+            if as_name:
+                self.put(u" as ")
+                self.put(as_name)
+        self.endline()
+
+    def visit_NameNode(self, node):
+        self.put(node.name)
+
+    def visit_IntNode(self, node):
+        self.put(node.value)
+
+    def visit_NoneNode(self, node):
+        self.put(u"None")
+
+    def visit_NotNode(self, node):
+        self.put(u"(not ")
+        self.visit(node.operand)
+        self.put(u")")
+
+    def visit_DecoratorNode(self, node):
+        self.startline("@")
+        self.visit(node.decorator)
+        self.endline()
+
+    def visit_BinopNode(self, node):
+        self.visit(node.operand1)
+        self.put(u" %s " % node.operator)
+        self.visit(node.operand2)
+
+    def visit_AttributeNode(self, node):
+        self.visit(node.obj)
+        self.put(u".%s" % node.attribute)
+
+    def visit_BoolNode(self, node):
+        self.put(str(node.value))
+
+    # FIXME: represent string nodes correctly
+    def visit_StringNode(self, node):
+        value = node.value
+        if value.encoding is not None:
+            value = value.encode(value.encoding)
+        self.put(repr(value))
+
+    def visit_PassStatNode(self, node):
+        self.startline(u"pass")
+        self.endline()
+
+class CodeWriter(DeclarationWriter):
+
+    def visit_SingleAssignmentNode(self, node):
+        self.startline()
+        self.visit(node.lhs)
+        self.put(u" = ")
+        self.visit(node.rhs)
+        self.endline()
+
+    def visit_CascadedAssignmentNode(self, node):
+        self.startline()
+        for lhs in node.lhs_list:
+            self.visit(lhs)
+            self.put(u" = ")
+        self.visit(node.rhs)
+        self.endline()
+
+    def visit_PrintStatNode(self, node):
+        self.startline(u"print ")
+        self.comma_separated_list(node.arg_tuple.args)
+        if not node.append_newline:
+            self.put(u",")
+        self.endline()
+
+    def visit_ForInStatNode(self, node):
+        self.startline(u"for ")
+        self.visit(node.target)
+        self.put(u" in ")
+        self.visit(node.iterator.sequence)
+        self.endline(u":")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+        if node.else_clause is not None:
+            self.line(u"else:")
+            self.indent()
+            self.visit(node.else_clause)
+            self.dedent()
+
+    def visit_IfStatNode(self, node):
         # The IfClauseNode is handled directly without a separate match
-        # for clariy. 
-        self.startline(u"if ") 
-        self.visit(node.if_clauses[0].condition) 
-        self.endline(":") 
-        self.indent() 
-        self.visit(node.if_clauses[0].body) 
-        self.dedent() 
-        for clause in node.if_clauses[1:]: 
-            self.startline("elif ") 
-            self.visit(clause.condition) 
-            self.endline(":") 
-            self.indent() 
-            self.visit(clause.body) 
-            self.dedent() 
-        if node.else_clause is not None: 
-            self.line("else:") 
-            self.indent() 
-            self.visit(node.else_clause) 
-            self.dedent() 
- 
-    def visit_SequenceNode(self, node): 
-        self.comma_separated_list(node.args) # Might need to discover whether we need () around tuples...hmm... 
- 
-    def visit_SimpleCallNode(self, node): 
-        self.visit(node.function) 
-        self.put(u"(") 
-        self.comma_separated_list(node.args) 
-        self.put(")") 
- 
-    def visit_GeneralCallNode(self, node): 
-        self.visit(node.function) 
-        self.put(u"(") 
-        posarg = node.positional_args 
-        if isinstance(posarg, AsTupleNode): 
-            self.visit(posarg.arg) 
-        else: 
-            self.comma_separated_list(posarg.args)  # TupleNode.args 
-        if node.keyword_args: 
-            if isinstance(node.keyword_args, DictNode): 
-                for i, (name, value) in enumerate(node.keyword_args.key_value_pairs): 
-                    if i > 0: 
-                        self.put(', ') 
-                    self.visit(name) 
-                    self.put('=') 
-                    self.visit(value) 
-            else: 
-                raise Exception("Not implemented yet") 
-        self.put(u")") 
- 
-    def visit_ExprStatNode(self, node): 
-        self.startline() 
-        self.visit(node.expr) 
-        self.endline() 
- 
-    def visit_InPlaceAssignmentNode(self, node): 
-        self.startline() 
-        self.visit(node.lhs) 
-        self.put(u" %s= " % node.operator) 
-        self.visit(node.rhs) 
-        self.endline() 
- 
-    def visit_WithStatNode(self, node): 
-        self.startline() 
-        self.put(u"with ") 
-        self.visit(node.manager) 
-        if node.target is not None: 
-            self.put(u" as ") 
-            self.visit(node.target) 
-        self.endline(u":") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
- 
-    def visit_TryFinallyStatNode(self, node): 
-        self.line(u"try:") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
-        self.line(u"finally:") 
-        self.indent() 
-        self.visit(node.finally_clause) 
-        self.dedent() 
- 
-    def visit_TryExceptStatNode(self, node): 
-        self.line(u"try:") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
-        for x in node.except_clauses: 
-            self.visit(x) 
-        if node.else_clause is not None: 
-            self.visit(node.else_clause) 
- 
-    def visit_ExceptClauseNode(self, node): 
-        self.startline(u"except") 
-        if node.pattern is not None: 
-            self.put(u" ") 
-            self.visit(node.pattern) 
-        if node.target is not None: 
-            self.put(u", ") 
-            self.visit(node.target) 
-        self.endline(":") 
-        self.indent() 
-        self.visit(node.body) 
-        self.dedent() 
- 
-    def visit_ReturnStatNode(self, node): 
-        self.startline("return ") 
-        self.visit(node.value) 
-        self.endline() 
- 
-    def visit_ReraiseStatNode(self, node): 
-        self.line("raise") 
- 
-    def visit_ImportNode(self, node): 
-        self.put(u"(import %s)" % node.module_name.value) 
- 
-    def visit_TempsBlockNode(self, node): 
-        """ 
-        Temporaries are output like $1_1', where the first number is 
-        an index of the TempsBlockNode and the second number is an index 
-        of the temporary which that block allocates. 
-        """ 
-        idx = 0 
-        for handle in node.temps: 
-            self.tempnames[handle] = "$%d_%d" % (self.tempblockindex, idx) 
-            idx += 1 
-        self.tempblockindex += 1 
-        self.visit(node.body) 
- 
-    def visit_TempRefNode(self, node): 
-        self.put(self.tempnames[node.handle]) 
- 
- 
-class PxdWriter(DeclarationWriter): 
-    def __call__(self, node): 
+        # for clariy.
+        self.startline(u"if ")
+        self.visit(node.if_clauses[0].condition)
+        self.endline(":")
+        self.indent()
+        self.visit(node.if_clauses[0].body)
+        self.dedent()
+        for clause in node.if_clauses[1:]:
+            self.startline("elif ")
+            self.visit(clause.condition)
+            self.endline(":")
+            self.indent()
+            self.visit(clause.body)
+            self.dedent()
+        if node.else_clause is not None:
+            self.line("else:")
+            self.indent()
+            self.visit(node.else_clause)
+            self.dedent()
+
+    def visit_SequenceNode(self, node):
+        self.comma_separated_list(node.args) # Might need to discover whether we need () around tuples...hmm...
+
+    def visit_SimpleCallNode(self, node):
+        self.visit(node.function)
+        self.put(u"(")
+        self.comma_separated_list(node.args)
+        self.put(")")
+
+    def visit_GeneralCallNode(self, node):
+        self.visit(node.function)
+        self.put(u"(")
+        posarg = node.positional_args
+        if isinstance(posarg, AsTupleNode):
+            self.visit(posarg.arg)
+        else:
+            self.comma_separated_list(posarg.args)  # TupleNode.args
+        if node.keyword_args:
+            if isinstance(node.keyword_args, DictNode):
+                for i, (name, value) in enumerate(node.keyword_args.key_value_pairs):
+                    if i > 0:
+                        self.put(', ')
+                    self.visit(name)
+                    self.put('=')
+                    self.visit(value)
+            else:
+                raise Exception("Not implemented yet")
+        self.put(u")")
+
+    def visit_ExprStatNode(self, node):
+        self.startline()
+        self.visit(node.expr)
+        self.endline()
+
+    def visit_InPlaceAssignmentNode(self, node):
+        self.startline()
+        self.visit(node.lhs)
+        self.put(u" %s= " % node.operator)
+        self.visit(node.rhs)
+        self.endline()
+
+    def visit_WithStatNode(self, node):
+        self.startline()
+        self.put(u"with ")
+        self.visit(node.manager)
+        if node.target is not None:
+            self.put(u" as ")
+            self.visit(node.target)
+        self.endline(u":")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+
+    def visit_TryFinallyStatNode(self, node):
+        self.line(u"try:")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+        self.line(u"finally:")
+        self.indent()
+        self.visit(node.finally_clause)
+        self.dedent()
+
+    def visit_TryExceptStatNode(self, node):
+        self.line(u"try:")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+        for x in node.except_clauses:
+            self.visit(x)
+        if node.else_clause is not None:
+            self.visit(node.else_clause)
+
+    def visit_ExceptClauseNode(self, node):
+        self.startline(u"except")
+        if node.pattern is not None:
+            self.put(u" ")
+            self.visit(node.pattern)
+        if node.target is not None:
+            self.put(u", ")
+            self.visit(node.target)
+        self.endline(":")
+        self.indent()
+        self.visit(node.body)
+        self.dedent()
+
+    def visit_ReturnStatNode(self, node):
+        self.startline("return ")
+        self.visit(node.value)
+        self.endline()
+
+    def visit_ReraiseStatNode(self, node):
+        self.line("raise")
+
+    def visit_ImportNode(self, node):
+        self.put(u"(import %s)" % node.module_name.value)
+
+    def visit_TempsBlockNode(self, node):
+        """
+        Temporaries are output like $1_1', where the first number is
+        an index of the TempsBlockNode and the second number is an index
+        of the temporary which that block allocates.
+        """
+        idx = 0
+        for handle in node.temps:
+            self.tempnames[handle] = "$%d_%d" % (self.tempblockindex, idx)
+            idx += 1
+        self.tempblockindex += 1
+        self.visit(node.body)
+
+    def visit_TempRefNode(self, node):
+        self.put(self.tempnames[node.handle])
+
+
+class PxdWriter(DeclarationWriter):
+    def __call__(self, node):
         print(u'\n'.join(self.write(node).lines))
-        return node 
- 
-    def visit_CFuncDefNode(self, node): 
-        if 'inline' in node.modifiers: 
-            return 
-        if node.overridable: 
-            self.startline(u'cpdef ') 
-        else: 
-            self.startline(u'cdef ') 
-        if node.visibility != 'private': 
-            self.put(node.visibility) 
-            self.put(u' ') 
-        if node.api: 
-            self.put(u'api ') 
-        self.visit(node.declarator) 
-
-    def visit_StatNode(self, node): 
-        pass 
+        return node
+
+    def visit_CFuncDefNode(self, node):
+        if 'inline' in node.modifiers:
+            return
+        if node.overridable:
+            self.startline(u'cpdef ')
+        else:
+            self.startline(u'cdef ')
+        if node.visibility != 'private':
+            self.put(node.visibility)
+            self.put(u' ')
+        if node.api:
+            self.put(u'api ')
+        self.visit(node.declarator)
+
+    def visit_StatNode(self, node):
+        pass
 
 
 class ExpressionWriter(TreeVisitor):
diff --git a/contrib/tools/cython/Cython/Compiler/AnalysedTreeTransforms.py b/contrib/tools/cython/Cython/Compiler/AnalysedTreeTransforms.py
index 6f6731345e..07bf31f3e0 100644
--- a/contrib/tools/cython/Cython/Compiler/AnalysedTreeTransforms.py
+++ b/contrib/tools/cython/Cython/Compiler/AnalysedTreeTransforms.py
@@ -1,99 +1,99 @@
-from __future__ import absolute_import 
- 
-from .Visitor import ScopeTrackingTransform 
-from .Nodes import StatListNode, SingleAssignmentNode, CFuncDefNode, DefNode 
-from .ExprNodes import DictNode, DictItemNode, NameNode, UnicodeNode 
-from .PyrexTypes import py_object_type 
-from .StringEncoding import EncodedString 
-from . import Symtab 
- 
-class AutoTestDictTransform(ScopeTrackingTransform): 
-    # Handles autotestdict directive 
- 
-    blacklist = ['__cinit__', '__dealloc__', '__richcmp__', 
-                 '__nonzero__', '__bool__', 
-                 '__len__', '__contains__'] 
- 
-    def visit_ModuleNode(self, node): 
-        if node.is_pxd: 
-            return node 
-        self.scope_type = 'module' 
-        self.scope_node = node 
- 
-        if not self.current_directives['autotestdict']: 
-            return node 
-        self.all_docstrings = self.current_directives['autotestdict.all'] 
-        self.cdef_docstrings = self.all_docstrings or self.current_directives['autotestdict.cdef'] 
- 
-        assert isinstance(node.body, StatListNode) 
- 
-        # First see if __test__ is already created 
-        if u'__test__' in node.scope.entries: 
-            # Do nothing 
-            return node 
- 
-        pos = node.pos 
- 
-        self.tests = [] 
-        self.testspos = node.pos 
- 
-        test_dict_entry = node.scope.declare_var(EncodedString(u'__test__'), 
-                                                 py_object_type, 
-                                                 pos, 
-                                                 visibility='public') 
-        create_test_dict_assignment = SingleAssignmentNode(pos, 
-            lhs=NameNode(pos, name=EncodedString(u'__test__'), 
-                         entry=test_dict_entry), 
-            rhs=DictNode(pos, key_value_pairs=self.tests)) 
-        self.visitchildren(node) 
-        node.body.stats.append(create_test_dict_assignment) 
-        return node 
- 
-    def add_test(self, testpos, path, doctest): 
-        pos = self.testspos 
-        keystr = u'%s (line %d)' % (path, testpos[1]) 
-        key = UnicodeNode(pos, value=EncodedString(keystr)) 
-        value = UnicodeNode(pos, value=doctest) 
-        self.tests.append(DictItemNode(pos, key=key, value=value)) 
- 
-    def visit_ExprNode(self, node): 
-        # expressions cannot contain functions and lambda expressions 
-        # do not have a docstring 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        if not node.doc or (isinstance(node, DefNode) and node.fused_py_func): 
-            return node 
-        if not self.cdef_docstrings: 
-            if isinstance(node, CFuncDefNode) and not node.py_func: 
-                return node 
-        if not self.all_docstrings and '>>>' not in node.doc: 
-            return node 
- 
-        pos = self.testspos 
-        if self.scope_type == 'module': 
-            path = node.entry.name 
-        elif self.scope_type in ('pyclass', 'cclass'): 
-            if isinstance(node, CFuncDefNode): 
-                if node.py_func is not None: 
-                    name = node.py_func.name 
-                else: 
-                    name = node.entry.name 
-            else: 
-                name = node.name 
-            if self.scope_type == 'cclass' and name in self.blacklist: 
-                return node 
-            if self.scope_type == 'pyclass': 
-                class_name = self.scope_node.name 
-            else: 
-                class_name = self.scope_node.class_name 
-            if isinstance(node.entry.scope, Symtab.PropertyScope): 
-                property_method_name = node.entry.scope.name 
-                path = "%s.%s.%s" % (class_name, node.entry.scope.name, 
-                                     node.entry.name) 
-            else: 
-                path = "%s.%s" % (class_name, node.entry.name) 
-        else: 
-            assert False 
-        self.add_test(node.pos, path, node.doc) 
-        return node 
+from __future__ import absolute_import
+
+from .Visitor import ScopeTrackingTransform
+from .Nodes import StatListNode, SingleAssignmentNode, CFuncDefNode, DefNode
+from .ExprNodes import DictNode, DictItemNode, NameNode, UnicodeNode
+from .PyrexTypes import py_object_type
+from .StringEncoding import EncodedString
+from . import Symtab
+
+class AutoTestDictTransform(ScopeTrackingTransform):
+    # Handles autotestdict directive
+
+    blacklist = ['__cinit__', '__dealloc__', '__richcmp__',
+                 '__nonzero__', '__bool__',
+                 '__len__', '__contains__']
+
+    def visit_ModuleNode(self, node):
+        if node.is_pxd:
+            return node
+        self.scope_type = 'module'
+        self.scope_node = node
+
+        if not self.current_directives['autotestdict']:
+            return node
+        self.all_docstrings = self.current_directives['autotestdict.all']
+        self.cdef_docstrings = self.all_docstrings or self.current_directives['autotestdict.cdef']
+
+        assert isinstance(node.body, StatListNode)
+
+        # First see if __test__ is already created
+        if u'__test__' in node.scope.entries:
+            # Do nothing
+            return node
+
+        pos = node.pos
+
+        self.tests = []
+        self.testspos = node.pos
+
+        test_dict_entry = node.scope.declare_var(EncodedString(u'__test__'),
+                                                 py_object_type,
+                                                 pos,
+                                                 visibility='public')
+        create_test_dict_assignment = SingleAssignmentNode(pos,
+            lhs=NameNode(pos, name=EncodedString(u'__test__'),
+                         entry=test_dict_entry),
+            rhs=DictNode(pos, key_value_pairs=self.tests))
+        self.visitchildren(node)
+        node.body.stats.append(create_test_dict_assignment)
+        return node
+
+    def add_test(self, testpos, path, doctest):
+        pos = self.testspos
+        keystr = u'%s (line %d)' % (path, testpos[1])
+        key = UnicodeNode(pos, value=EncodedString(keystr))
+        value = UnicodeNode(pos, value=doctest)
+        self.tests.append(DictItemNode(pos, key=key, value=value))
+
+    def visit_ExprNode(self, node):
+        # expressions cannot contain functions and lambda expressions
+        # do not have a docstring
+        return node
+
+    def visit_FuncDefNode(self, node):
+        if not node.doc or (isinstance(node, DefNode) and node.fused_py_func):
+            return node
+        if not self.cdef_docstrings:
+            if isinstance(node, CFuncDefNode) and not node.py_func:
+                return node
+        if not self.all_docstrings and '>>>' not in node.doc:
+            return node
+
+        pos = self.testspos
+        if self.scope_type == 'module':
+            path = node.entry.name
+        elif self.scope_type in ('pyclass', 'cclass'):
+            if isinstance(node, CFuncDefNode):
+                if node.py_func is not None:
+                    name = node.py_func.name
+                else:
+                    name = node.entry.name
+            else:
+                name = node.name
+            if self.scope_type == 'cclass' and name in self.blacklist:
+                return node
+            if self.scope_type == 'pyclass':
+                class_name = self.scope_node.name
+            else:
+                class_name = self.scope_node.class_name
+            if isinstance(node.entry.scope, Symtab.PropertyScope):
+                property_method_name = node.entry.scope.name
+                path = "%s.%s.%s" % (class_name, node.entry.scope.name,
+                                     node.entry.name)
+            else:
+                path = "%s.%s" % (class_name, node.entry.name)
+        else:
+            assert False
+        self.add_test(node.pos, path, node.doc)
+        return node
diff --git a/contrib/tools/cython/Cython/Compiler/Annotate.py b/contrib/tools/cython/Cython/Compiler/Annotate.py
index 6f8358225d..2ea38c00c7 100644
--- a/contrib/tools/cython/Cython/Compiler/Annotate.py
+++ b/contrib/tools/cython/Cython/Compiler/Annotate.py
@@ -1,35 +1,35 @@
-# Note: Work in progress 
- 
-from __future__ import absolute_import 
- 
-import os 
+# Note: Work in progress
+
+from __future__ import absolute_import
+
+import os
 import os.path
-import re 
-import codecs 
-import textwrap 
+import re
+import codecs
+import textwrap
 from datetime import datetime
 from functools import partial
 from collections import defaultdict
-try: 
-    from xml.sax.saxutils import escape as html_escape 
-except ImportError: 
-    pass 
+try:
+    from xml.sax.saxutils import escape as html_escape
+except ImportError:
+    pass
 try:
     from StringIO import StringIO
 except ImportError:
     from io import StringIO  # does not support writing 'str' in Py2
- 
-from . import Version 
-from .Code import CCodeWriter 
-from .. import Utils 
- 
- 
-class AnnotationCCodeWriter(CCodeWriter): 
- 
-    def __init__(self, create_from=None, buffer=None, copy_formatting=True): 
+
+from . import Version
+from .Code import CCodeWriter
+from .. import Utils
+
+
+class AnnotationCCodeWriter(CCodeWriter):
+
+    def __init__(self, create_from=None, buffer=None, copy_formatting=True):
         CCodeWriter.__init__(self, create_from, buffer, copy_formatting=copy_formatting)
-        if create_from is None: 
-            self.annotation_buffer = StringIO() 
+        if create_from is None:
+            self.annotation_buffer = StringIO()
             self.last_annotated_pos = None
             # annotations[filename][line] -> [(column, AnnotationItem)*]
             self.annotations = defaultdict(partial(defaultdict, list))
@@ -37,23 +37,23 @@ class AnnotationCCodeWriter(CCodeWriter):
             self.code = defaultdict(partial(defaultdict, str))
             # scopes[filename][line] -> set(scopes)
             self.scopes = defaultdict(partial(defaultdict, set))
-        else: 
-            # When creating an insertion point, keep references to the same database 
-            self.annotation_buffer = create_from.annotation_buffer 
-            self.annotations = create_from.annotations 
-            self.code = create_from.code 
+        else:
+            # When creating an insertion point, keep references to the same database
+            self.annotation_buffer = create_from.annotation_buffer
+            self.annotations = create_from.annotations
+            self.code = create_from.code
             self.scopes = create_from.scopes
             self.last_annotated_pos = create_from.last_annotated_pos
- 
-    def create_new(self, create_from, buffer, copy_formatting): 
-        return AnnotationCCodeWriter(create_from, buffer, copy_formatting) 
- 
-    def write(self, s): 
-        CCodeWriter.write(self, s) 
-        self.annotation_buffer.write(s) 
- 
+
+    def create_new(self, create_from, buffer, copy_formatting):
+        return AnnotationCCodeWriter(create_from, buffer, copy_formatting)
+
+    def write(self, s):
+        CCodeWriter.write(self, s)
+        self.annotation_buffer.write(s)
+
     def mark_pos(self, pos, trace=True):
-        if pos is not None: 
+        if pos is not None:
             CCodeWriter.mark_pos(self, pos, trace)
             if self.funcstate and self.funcstate.scope:
                 # lambdas and genexprs can result in multiple scopes per line => keep them in a set
@@ -62,53 +62,53 @@ class AnnotationCCodeWriter(CCodeWriter):
             source_desc, line, _ = self.last_annotated_pos
             pos_code = self.code[source_desc.filename]
             pos_code[line] += self.annotation_buffer.getvalue()
-        self.annotation_buffer = StringIO() 
+        self.annotation_buffer = StringIO()
         self.last_annotated_pos = pos
- 
-    def annotate(self, pos, item): 
+
+    def annotate(self, pos, item):
         self.annotations[pos[0].filename][pos[1]].append((pos[2], item))
- 
-    def _css(self): 
-        """css template will later allow to choose a colormap""" 
-        css = [self._css_template] 
-        for i in range(255): 
-            color = u"FFFF%02x" % int(255/(1+i/10.0)) 
+
+    def _css(self):
+        """css template will later allow to choose a colormap"""
+        css = [self._css_template]
+        for i in range(255):
+            color = u"FFFF%02x" % int(255/(1+i/10.0))
             css.append('.cython.score-%d {background-color: #%s;}' % (i, color))
-        try: 
-            from pygments.formatters import HtmlFormatter 
-        except ImportError: 
-            pass 
+        try:
+            from pygments.formatters import HtmlFormatter
+        except ImportError:
+            pass
         else:
             css.append(HtmlFormatter().get_style_defs('.cython'))
         return '\n'.join(css)
- 
-    _css_template = textwrap.dedent(""" 
-        body.cython { font-family: courier; font-size: 12; } 
- 
-        .cython.tag  {  } 
-        .cython.line { margin: 0em } 
+
+    _css_template = textwrap.dedent("""
+        body.cython { font-family: courier; font-size: 12; }
+
+        .cython.tag  {  }
+        .cython.line { margin: 0em }
         .cython.code { font-size: 9; color: #444444; display: none; margin: 0px 0px 0px 8px; border-left: 8px none; }
- 
+
         .cython.line .run { background-color: #B0FFB0; }
         .cython.line .mis { background-color: #FFB0B0; }
         .cython.code.run  { border-left: 8px solid #B0FFB0; }
         .cython.code.mis  { border-left: 8px solid #FFB0B0; }
 
-        .cython.code .py_c_api  { color: red; } 
-        .cython.code .py_macro_api  { color: #FF7000; } 
-        .cython.code .pyx_c_api  { color: #FF3000; } 
-        .cython.code .pyx_macro_api  { color: #FF7000; } 
-        .cython.code .refnanny  { color: #FFA000; } 
+        .cython.code .py_c_api  { color: red; }
+        .cython.code .py_macro_api  { color: #FF7000; }
+        .cython.code .pyx_c_api  { color: #FF3000; }
+        .cython.code .pyx_macro_api  { color: #FF7000; }
+        .cython.code .refnanny  { color: #FFA000; }
         .cython.code .trace  { color: #FFA000; }
-        .cython.code .error_goto  { color: #FFA000; } 
- 
-        .cython.code .coerce  { color: #008000; border: 1px dotted #008000 } 
-        .cython.code .py_attr { color: #FF0000; font-weight: bold; } 
-        .cython.code .c_attr  { color: #0000FF; } 
-        .cython.code .py_call { color: #FF0000; font-weight: bold; } 
-        .cython.code .c_call  { color: #0000FF; } 
-    """) 
- 
+        .cython.code .error_goto  { color: #FFA000; }
+
+        .cython.code .coerce  { color: #008000; border: 1px dotted #008000 }
+        .cython.code .py_attr { color: #FF0000; font-weight: bold; }
+        .cython.code .c_attr  { color: #0000FF; }
+        .cython.code .py_call { color: #FF0000; font-weight: bold; }
+        .cython.code .c_call  { color: #0000FF; }
+    """)
+
     # on-click toggle function to show/hide C source code
     _onclick_attr = ' onclick="{0}"'.format((
         "(function(s){"
@@ -118,34 +118,34 @@ class AnnotationCCodeWriter(CCodeWriter):
     )
 
     def save_annotation(self, source_filename, target_filename, coverage_xml=None):
-        with Utils.open_source_file(source_filename) as f: 
-            code = f.read() 
-        generated_code = self.code.get(source_filename, {}) 
-        c_file = Utils.decode_filename(os.path.basename(target_filename)) 
-        html_filename = os.path.splitext(target_filename)[0] + ".html" 
+        with Utils.open_source_file(source_filename) as f:
+            code = f.read()
+        generated_code = self.code.get(source_filename, {})
+        c_file = Utils.decode_filename(os.path.basename(target_filename))
+        html_filename = os.path.splitext(target_filename)[0] + ".html"
 
-        with codecs.open(html_filename, "w", encoding="UTF-8") as out_buffer: 
+        with codecs.open(html_filename, "w", encoding="UTF-8") as out_buffer:
             out_buffer.write(self._save_annotation(code, generated_code, c_file, source_filename, coverage_xml))
- 
+
     def _save_annotation_header(self, c_file, source_filename, coverage_timestamp=None):
         coverage_info = ''
         if coverage_timestamp:
             coverage_info = u' with coverage data from {timestamp}'.format(
                 timestamp=datetime.fromtimestamp(int(coverage_timestamp) // 1000))
 
-        outlist = [ 
-            textwrap.dedent(u'''\ 
-            <!DOCTYPE html> 
-            <!-- Generated by Cython {watermark} --> 
-            <html> 
-            <head> 
-                <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
+        outlist = [
+            textwrap.dedent(u'''\
+            <!DOCTYPE html>
+            <!-- Generated by Cython {watermark} -->
+            <html>
+            <head>
+                <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
                 <title>Cython: {filename}</title>
-                <style type="text/css"> 
-                {css} 
-                </style> 
-            </head> 
-            <body class="cython"> 
+                <style type="text/css">
+                {css}
+                </style>
+            </head>
+            <body class="cython">
             <p><span style="border-bottom: solid 1px grey;">Generated by Cython {watermark}</span>{more_info}</p>
             <p>
                 <span style="background-color: #FFFF00">Yellow lines</span> hint at Python interaction.<br />
@@ -154,21 +154,21 @@ class AnnotationCCodeWriter(CCodeWriter):
             ''').format(css=self._css(), watermark=Version.watermark,
                         filename=os.path.basename(source_filename) if source_filename else '',
                         more_info=coverage_info)
-        ] 
-        if c_file: 
-            outlist.append(u'<p>Raw output: <a href="%s">%s</a></p>\n' % (c_file, c_file)) 
-        return outlist 
- 
-    def _save_annotation_footer(self): 
-        return (u'</body></html>\n',) 
- 
+        ]
+        if c_file:
+            outlist.append(u'<p>Raw output: <a href="%s">%s</a></p>\n' % (c_file, c_file))
+        return outlist
+
+    def _save_annotation_footer(self):
+        return (u'</body></html>\n',)
+
     def _save_annotation(self, code, generated_code, c_file=None, source_filename=None, coverage_xml=None):
-        """ 
-        lines : original cython source code split by lines 
-        generated_code : generated c code keyed by line number in original file 
-        target filename : name of the file in which to store the generated html 
-        c_file : filename in which the c_code has been written 
-        """ 
+        """
+        lines : original cython source code split by lines
+        generated_code : generated c code keyed by line number in original file
+        target filename : name of the file in which to store the generated html
+        c_file : filename in which the c_code has been written
+        """
         if coverage_xml is not None and source_filename:
             coverage_timestamp = coverage_xml.get('timestamp', '').strip()
             covered_lines = self._get_line_coverage(coverage_xml, source_filename)
@@ -177,12 +177,12 @@ class AnnotationCCodeWriter(CCodeWriter):
         annotation_items = dict(self.annotations[source_filename])
         scopes = dict(self.scopes[source_filename])
 
-        outlist = [] 
+        outlist = []
         outlist.extend(self._save_annotation_header(c_file, source_filename, coverage_timestamp))
         outlist.extend(self._save_annotation_body(code, generated_code, annotation_items, scopes, covered_lines))
-        outlist.extend(self._save_annotation_footer()) 
-        return ''.join(outlist) 
- 
+        outlist.extend(self._save_annotation_footer())
+        return ''.join(outlist)
+
     def _get_line_coverage(self, coverage_xml, source_filename):
         coverage_data = None
         for entry in coverage_xml.iterfind('.//class'):
@@ -201,90 +201,90 @@ class AnnotationCCodeWriter(CCodeWriter):
             for line in coverage_data.iterfind('lines/line')
         )
 
-    def _htmlify_code(self, code): 
-        try: 
-            from pygments import highlight 
-            from pygments.lexers import CythonLexer 
-            from pygments.formatters import HtmlFormatter 
-        except ImportError: 
-            # no Pygments, just escape the code 
-            return html_escape(code) 
- 
-        html_code = highlight( 
-            code, CythonLexer(stripnl=False, stripall=False), 
-            HtmlFormatter(nowrap=True)) 
-        return html_code 
- 
+    def _htmlify_code(self, code):
+        try:
+            from pygments import highlight
+            from pygments.lexers import CythonLexer
+            from pygments.formatters import HtmlFormatter
+        except ImportError:
+            # no Pygments, just escape the code
+            return html_escape(code)
+
+        html_code = highlight(
+            code, CythonLexer(stripnl=False, stripall=False),
+            HtmlFormatter(nowrap=True))
+        return html_code
+
     def _save_annotation_body(self, cython_code, generated_code, annotation_items, scopes, covered_lines=None):
-        outlist = [u'<div class="cython">'] 
-        pos_comment_marker = u'/* \N{HORIZONTAL ELLIPSIS} */\n' 
-        new_calls_map = dict( 
-            (name, 0) for name in 
+        outlist = [u'<div class="cython">']
+        pos_comment_marker = u'/* \N{HORIZONTAL ELLIPSIS} */\n'
+        new_calls_map = dict(
+            (name, 0) for name in
             'refnanny trace py_macro_api py_c_api pyx_macro_api pyx_c_api error_goto'.split()
-        ).copy 
- 
-        self.mark_pos(None) 
- 
-        def annotate(match): 
-            group_name = match.lastgroup 
-            calls[group_name] += 1 
+        ).copy
+
+        self.mark_pos(None)
+
+        def annotate(match):
+            group_name = match.lastgroup
+            calls[group_name] += 1
             return u"<span class='%s'>%s</span>" % (
-                group_name, match.group(group_name)) 
- 
-        lines = self._htmlify_code(cython_code).splitlines() 
-        lineno_width = len(str(len(lines))) 
+                group_name, match.group(group_name))
+
+        lines = self._htmlify_code(cython_code).splitlines()
+        lineno_width = len(str(len(lines)))
         if not covered_lines:
             covered_lines = None
- 
-        for k, line in enumerate(lines, 1): 
-            try: 
-                c_code = generated_code[k] 
-            except KeyError: 
-                c_code = '' 
-            else: 
-                c_code = _replace_pos_comment(pos_comment_marker, c_code) 
-                if c_code.startswith(pos_comment_marker): 
-                    c_code = c_code[len(pos_comment_marker):] 
-                c_code = html_escape(c_code) 
- 
-            calls = new_calls_map() 
-            c_code = _parse_code(annotate, c_code) 
-            score = (5 * calls['py_c_api'] + 2 * calls['pyx_c_api'] + 
-                     calls['py_macro_api'] + calls['pyx_macro_api']) 
- 
-            if c_code: 
+
+        for k, line in enumerate(lines, 1):
+            try:
+                c_code = generated_code[k]
+            except KeyError:
+                c_code = ''
+            else:
+                c_code = _replace_pos_comment(pos_comment_marker, c_code)
+                if c_code.startswith(pos_comment_marker):
+                    c_code = c_code[len(pos_comment_marker):]
+                c_code = html_escape(c_code)
+
+            calls = new_calls_map()
+            c_code = _parse_code(annotate, c_code)
+            score = (5 * calls['py_c_api'] + 2 * calls['pyx_c_api'] +
+                     calls['py_macro_api'] + calls['pyx_macro_api'])
+
+            if c_code:
                 onclick = self._onclick_attr
-                expandsymbol = '+' 
-            else: 
-                onclick = '' 
-                expandsymbol = '&#xA0;' 
- 
+                expandsymbol = '+'
+            else:
+                onclick = ''
+                expandsymbol = '&#xA0;'
+
             covered = ''
             if covered_lines is not None and k in covered_lines:
                 hits = covered_lines[k]
                 if hits is not None:
                     covered = 'run' if hits else 'mis'
 
-            outlist.append( 
+            outlist.append(
                 u'<pre class="cython line score-{score}"{onclick}>'
-                # generate line number with expand symbol in front, 
-                # and the right  number of digit 
+                # generate line number with expand symbol in front,
+                # and the right  number of digit
                 u'{expandsymbol}<span class="{covered}">{line:0{lineno_width}d}</span>: {code}</pre>\n'.format(
-                    score=score, 
-                    expandsymbol=expandsymbol, 
+                    score=score,
+                    expandsymbol=expandsymbol,
                     covered=covered,
-                    lineno_width=lineno_width, 
-                    line=k, 
-                    code=line.rstrip(), 
-                    onclick=onclick, 
-                )) 
-            if c_code: 
+                    lineno_width=lineno_width,
+                    line=k,
+                    code=line.rstrip(),
+                    onclick=onclick,
+                ))
+            if c_code:
                 outlist.append(u"<pre class='cython code score-{score} {covered}'>{code}</pre>".format(
                     score=score, covered=covered, code=c_code))
-        outlist.append(u"</div>") 
-        return outlist 
- 
- 
+        outlist.append(u"</div>")
+        return outlist
+
+
 _parse_code = re.compile((
     br'(?P<refnanny>__Pyx_X?(?:GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)|'
     br'(?P<trace>__Pyx_Trace[A-Za-z]+)|'
@@ -296,25 +296,25 @@ _parse_code = re.compile((
     br')(?=\()|'       # look-ahead to exclude subsequent '(' from replacement
     br'(?P<error_goto>(?:(?<=;) *if [^;]* +)?__PYX_ERR\([^)]+\))'
 ).decode('ascii')).sub
- 
- 
-_replace_pos_comment = re.compile( 
-    # this matches what Cython generates as code line marker comment 
+
+
+_replace_pos_comment = re.compile(
+    # this matches what Cython generates as code line marker comment
     br'^\s*/\*(?:(?:[^*]|\*[^/])*\n)+\s*\*/\s*\n'.decode('ascii'),
-    re.M 
-).sub 
- 
- 
-class AnnotationItem(object): 
- 
-    def __init__(self, style, text, tag="", size=0): 
-        self.style = style 
-        self.text = text 
-        self.tag = tag 
-        self.size = size 
- 
-    def start(self): 
-        return u"<span class='cython tag %s' title='%s'>%s" % (self.style, self.text, self.tag) 
- 
-    def end(self): 
-        return self.size, u"</span>" 
+    re.M
+).sub
+
+
+class AnnotationItem(object):
+
+    def __init__(self, style, text, tag="", size=0):
+        self.style = style
+        self.text = text
+        self.tag = tag
+        self.size = size
+
+    def start(self):
+        return u"<span class='cython tag %s' title='%s'>%s" % (self.style, self.text, self.tag)
+
+    def end(self):
+        return self.size, u"</span>"
diff --git a/contrib/tools/cython/Cython/Compiler/AutoDocTransforms.py b/contrib/tools/cython/Cython/Compiler/AutoDocTransforms.py
index 1406d32a7c..d3c0a1d0da 100644
--- a/contrib/tools/cython/Cython/Compiler/AutoDocTransforms.py
+++ b/contrib/tools/cython/Cython/Compiler/AutoDocTransforms.py
@@ -1,11 +1,11 @@
 from __future__ import absolute_import, print_function
- 
-from .Visitor import CythonTransform 
-from .StringEncoding import EncodedString 
-from . import Options 
-from . import PyrexTypes, ExprNodes 
+
+from .Visitor import CythonTransform
+from .StringEncoding import EncodedString
+from . import Options
+from . import PyrexTypes, ExprNodes
 from ..CodeWriter import ExpressionWriter
- 
+
 
 class AnnotationWriter(ExpressionWriter):
 
@@ -17,24 +17,24 @@ class AnnotationWriter(ExpressionWriter):
         self.put("<lambda>")
 
 
-class EmbedSignature(CythonTransform): 
- 
-    def __init__(self, context): 
-        super(EmbedSignature, self).__init__(context) 
-        self.class_name = None 
-        self.class_node = None 
- 
+class EmbedSignature(CythonTransform):
+
+    def __init__(self, context):
+        super(EmbedSignature, self).__init__(context)
+        self.class_name = None
+        self.class_node = None
+
     def _fmt_expr(self, node):
         writer = AnnotationWriter()
         result = writer.write(node)
         # print(type(node).__name__, '-->', result)
-        return result 
- 
-    def _fmt_arg(self, arg): 
-        if arg.type is PyrexTypes.py_object_type or arg.is_self_arg: 
-            doc = arg.name 
-        else: 
-            doc = arg.type.declaration_code(arg.name, for_display=1) 
+        return result
+
+    def _fmt_arg(self, arg):
+        if arg.type is PyrexTypes.py_object_type or arg.is_self_arg:
+            doc = arg.name
+        else:
+            doc = arg.type.declaration_code(arg.name, for_display=1)
 
         if arg.annotation:
             annotation = self._fmt_expr(arg.annotation)
@@ -45,8 +45,8 @@ class EmbedSignature(CythonTransform):
         elif arg.default:
             default = self._fmt_expr(arg.default)
             doc = doc + ('=%s' % default)
-        return doc 
- 
+        return doc
+
     def _fmt_star_arg(self, arg):
         arg_doc = arg.name
         if arg.annotation:
@@ -54,161 +54,161 @@ class EmbedSignature(CythonTransform):
             arg_doc = arg_doc + (': %s' % annotation)
         return arg_doc
 
-    def _fmt_arglist(self, args, 
-                     npargs=0, pargs=None, 
-                     nkargs=0, kargs=None, 
-                     hide_self=False): 
-        arglist = [] 
-        for arg in args: 
-            if not hide_self or not arg.entry.is_self_arg: 
-                arg_doc = self._fmt_arg(arg) 
-                arglist.append(arg_doc) 
-        if pargs: 
+    def _fmt_arglist(self, args,
+                     npargs=0, pargs=None,
+                     nkargs=0, kargs=None,
+                     hide_self=False):
+        arglist = []
+        for arg in args:
+            if not hide_self or not arg.entry.is_self_arg:
+                arg_doc = self._fmt_arg(arg)
+                arglist.append(arg_doc)
+        if pargs:
             arg_doc = self._fmt_star_arg(pargs)
             arglist.insert(npargs, '*%s' % arg_doc)
-        elif nkargs: 
-            arglist.insert(npargs, '*') 
-        if kargs: 
+        elif nkargs:
+            arglist.insert(npargs, '*')
+        if kargs:
             arg_doc = self._fmt_star_arg(kargs)
             arglist.append('**%s' % arg_doc)
-        return arglist 
- 
-    def _fmt_ret_type(self, ret): 
-        if ret is PyrexTypes.py_object_type: 
-            return None 
-        else: 
-            return ret.declaration_code("", for_display=1) 
- 
-    def _fmt_signature(self, cls_name, func_name, args, 
-                       npargs=0, pargs=None, 
-                       nkargs=0, kargs=None, 
+        return arglist
+
+    def _fmt_ret_type(self, ret):
+        if ret is PyrexTypes.py_object_type:
+            return None
+        else:
+            return ret.declaration_code("", for_display=1)
+
+    def _fmt_signature(self, cls_name, func_name, args,
+                       npargs=0, pargs=None,
+                       nkargs=0, kargs=None,
                        return_expr=None,
-                       return_type=None, hide_self=False): 
-        arglist = self._fmt_arglist(args, 
-                                    npargs, pargs, 
-                                    nkargs, kargs, 
-                                    hide_self=hide_self) 
-        arglist_doc = ', '.join(arglist) 
-        func_doc = '%s(%s)' % (func_name, arglist_doc) 
-        if cls_name: 
-            func_doc = '%s.%s' % (cls_name, func_doc) 
+                       return_type=None, hide_self=False):
+        arglist = self._fmt_arglist(args,
+                                    npargs, pargs,
+                                    nkargs, kargs,
+                                    hide_self=hide_self)
+        arglist_doc = ', '.join(arglist)
+        func_doc = '%s(%s)' % (func_name, arglist_doc)
+        if cls_name:
+            func_doc = '%s.%s' % (cls_name, func_doc)
         ret_doc = None
         if return_expr:
             ret_doc = self._fmt_expr(return_expr)
         elif return_type:
-            ret_doc = self._fmt_ret_type(return_type) 
+            ret_doc = self._fmt_ret_type(return_type)
         if ret_doc:
             func_doc = '%s -> %s' % (func_doc, ret_doc)
-        return func_doc 
- 
-    def _embed_signature(self, signature, node_doc): 
-        if node_doc: 
-            return "%s\n%s" % (signature, node_doc) 
-        else: 
-            return signature 
- 
-    def __call__(self, node): 
-        if not Options.docstrings: 
-            return node 
-        else: 
-            return super(EmbedSignature, self).__call__(node) 
- 
-    def visit_ClassDefNode(self, node): 
-        oldname = self.class_name 
-        oldclass = self.class_node 
-        self.class_node = node 
-        try: 
-            # PyClassDefNode 
-            self.class_name = node.name 
-        except AttributeError: 
-            # CClassDefNode 
-            self.class_name = node.class_name 
-        self.visitchildren(node) 
-        self.class_name = oldname 
-        self.class_node = oldclass 
-        return node 
- 
-    def visit_LambdaNode(self, node): 
-        # lambda expressions so not have signature or inner functions 
-        return node 
- 
-    def visit_DefNode(self, node): 
-        if not self.current_directives['embedsignature']: 
-            return node 
- 
-        is_constructor = False 
-        hide_self = False 
-        if node.entry.is_special: 
-            is_constructor = self.class_node and node.name == '__init__' 
-            if not is_constructor: 
-                return node 
-            class_name, func_name = None, self.class_name 
-            hide_self = True 
-        else: 
-            class_name, func_name = self.class_name, node.name 
- 
-        nkargs = getattr(node, 'num_kwonly_args', 0) 
-        npargs = len(node.args) - nkargs 
-        signature = self._fmt_signature( 
-            class_name, func_name, node.args, 
-            npargs, node.star_arg, 
-            nkargs, node.starstar_arg, 
+        return func_doc
+
+    def _embed_signature(self, signature, node_doc):
+        if node_doc:
+            return "%s\n%s" % (signature, node_doc)
+        else:
+            return signature
+
+    def __call__(self, node):
+        if not Options.docstrings:
+            return node
+        else:
+            return super(EmbedSignature, self).__call__(node)
+
+    def visit_ClassDefNode(self, node):
+        oldname = self.class_name
+        oldclass = self.class_node
+        self.class_node = node
+        try:
+            # PyClassDefNode
+            self.class_name = node.name
+        except AttributeError:
+            # CClassDefNode
+            self.class_name = node.class_name
+        self.visitchildren(node)
+        self.class_name = oldname
+        self.class_node = oldclass
+        return node
+
+    def visit_LambdaNode(self, node):
+        # lambda expressions so not have signature or inner functions
+        return node
+
+    def visit_DefNode(self, node):
+        if not self.current_directives['embedsignature']:
+            return node
+
+        is_constructor = False
+        hide_self = False
+        if node.entry.is_special:
+            is_constructor = self.class_node and node.name == '__init__'
+            if not is_constructor:
+                return node
+            class_name, func_name = None, self.class_name
+            hide_self = True
+        else:
+            class_name, func_name = self.class_name, node.name
+
+        nkargs = getattr(node, 'num_kwonly_args', 0)
+        npargs = len(node.args) - nkargs
+        signature = self._fmt_signature(
+            class_name, func_name, node.args,
+            npargs, node.star_arg,
+            nkargs, node.starstar_arg,
             return_expr=node.return_type_annotation,
-            return_type=None, hide_self=hide_self) 
-        if signature: 
-            if is_constructor: 
-                doc_holder = self.class_node.entry.type.scope 
-            else: 
-                doc_holder = node.entry 
- 
-            if doc_holder.doc is not None: 
-                old_doc = doc_holder.doc 
-            elif not is_constructor and getattr(node, 'py_func', None) is not None: 
-                old_doc = node.py_func.entry.doc 
-            else: 
-                old_doc = None 
+            return_type=None, hide_self=hide_self)
+        if signature:
+            if is_constructor:
+                doc_holder = self.class_node.entry.type.scope
+            else:
+                doc_holder = node.entry
+
+            if doc_holder.doc is not None:
+                old_doc = doc_holder.doc
+            elif not is_constructor and getattr(node, 'py_func', None) is not None:
+                old_doc = node.py_func.entry.doc
+            else:
+                old_doc = None
             new_doc = self._embed_signature(signature, old_doc)
-            doc_holder.doc = EncodedString(new_doc) 
-            if not is_constructor and getattr(node, 'py_func', None) is not None: 
-                node.py_func.entry.doc = EncodedString(new_doc) 
-        return node 
- 
-    def visit_CFuncDefNode(self, node): 
-        if not self.current_directives['embedsignature']: 
-            return node 
-        if not node.overridable: # not cpdef FOO(...): 
-            return node 
- 
-        signature = self._fmt_signature( 
-            self.class_name, node.declarator.base.name, 
-            node.declarator.args, 
-            return_type=node.return_type) 
-        if signature: 
-            if node.entry.doc is not None: 
-                old_doc = node.entry.doc 
-            elif getattr(node, 'py_func', None) is not None: 
-                old_doc = node.py_func.entry.doc 
-            else: 
-                old_doc = None 
-            new_doc = self._embed_signature(signature, old_doc) 
-            node.entry.doc = EncodedString(new_doc) 
-            if hasattr(node, 'py_func') and node.py_func is not None: 
-                node.py_func.entry.doc = EncodedString(new_doc) 
-        return node 
- 
-    def visit_PropertyNode(self, node): 
-        if not self.current_directives['embedsignature']: 
-            return node 
- 
-        entry = node.entry 
-        if entry.visibility == 'public': 
-            # property synthesised from a cdef public attribute 
-            type_name = entry.type.declaration_code("", for_display=1) 
-            if not entry.type.is_pyobject: 
-                type_name = "'%s'" % type_name 
-            elif entry.type.is_extension_type: 
-                type_name = entry.type.module_name + '.' + type_name 
-            signature = '%s: %s' % (entry.name, type_name) 
-            new_doc = self._embed_signature(signature, entry.doc) 
-            entry.doc = EncodedString(new_doc) 
-        return node 
+            doc_holder.doc = EncodedString(new_doc)
+            if not is_constructor and getattr(node, 'py_func', None) is not None:
+                node.py_func.entry.doc = EncodedString(new_doc)
+        return node
+
+    def visit_CFuncDefNode(self, node):
+        if not self.current_directives['embedsignature']:
+            return node
+        if not node.overridable: # not cpdef FOO(...):
+            return node
+
+        signature = self._fmt_signature(
+            self.class_name, node.declarator.base.name,
+            node.declarator.args,
+            return_type=node.return_type)
+        if signature:
+            if node.entry.doc is not None:
+                old_doc = node.entry.doc
+            elif getattr(node, 'py_func', None) is not None:
+                old_doc = node.py_func.entry.doc
+            else:
+                old_doc = None
+            new_doc = self._embed_signature(signature, old_doc)
+            node.entry.doc = EncodedString(new_doc)
+            if hasattr(node, 'py_func') and node.py_func is not None:
+                node.py_func.entry.doc = EncodedString(new_doc)
+        return node
+
+    def visit_PropertyNode(self, node):
+        if not self.current_directives['embedsignature']:
+            return node
+
+        entry = node.entry
+        if entry.visibility == 'public':
+            # property synthesised from a cdef public attribute
+            type_name = entry.type.declaration_code("", for_display=1)
+            if not entry.type.is_pyobject:
+                type_name = "'%s'" % type_name
+            elif entry.type.is_extension_type:
+                type_name = entry.type.module_name + '.' + type_name
+            signature = '%s: %s' % (entry.name, type_name)
+            new_doc = self._embed_signature(signature, entry.doc)
+            entry.doc = EncodedString(new_doc)
+        return node
diff --git a/contrib/tools/cython/Cython/Compiler/Buffer.py b/contrib/tools/cython/Cython/Compiler/Buffer.py
index 477c038c2e..c62a24f568 100644
--- a/contrib/tools/cython/Cython/Compiler/Buffer.py
+++ b/contrib/tools/cython/Cython/Compiler/Buffer.py
@@ -1,740 +1,740 @@
-from __future__ import absolute_import 
- 
-from .Visitor import CythonTransform 
-from .ModuleNode import ModuleNode 
-from .Errors import CompileError 
-from .UtilityCode import CythonUtilityCode 
-from .Code import UtilityCode, TempitaUtilityCode 
- 
-from . import Options 
-from . import Interpreter 
-from . import PyrexTypes 
-from . import Naming 
-from . import Symtab 
- 
-def dedent(text, reindent=0): 
-    from textwrap import dedent 
-    text = dedent(text) 
-    if reindent > 0: 
-        indent = " " * reindent 
-        text = '\n'.join([indent + x for x in text.split('\n')]) 
-    return text 
- 
-class IntroduceBufferAuxiliaryVars(CythonTransform): 
- 
-    # 
-    # Entry point 
-    # 
- 
-    buffers_exists = False 
-    using_memoryview = False 
- 
-    def __call__(self, node): 
-        assert isinstance(node, ModuleNode) 
-        self.max_ndim = 0 
-        result = super(IntroduceBufferAuxiliaryVars, self).__call__(node) 
-        if self.buffers_exists: 
-            use_bufstruct_declare_code(node.scope) 
-            use_py2_buffer_functions(node.scope) 
- 
-        return result 
- 
- 
-    # 
-    # Basic operations for transforms 
-    # 
-    def handle_scope(self, node, scope): 
-        # For all buffers, insert extra variables in the scope. 
-        # The variables are also accessible from the buffer_info 
-        # on the buffer entry 
+from __future__ import absolute_import
+
+from .Visitor import CythonTransform
+from .ModuleNode import ModuleNode
+from .Errors import CompileError
+from .UtilityCode import CythonUtilityCode
+from .Code import UtilityCode, TempitaUtilityCode
+
+from . import Options
+from . import Interpreter
+from . import PyrexTypes
+from . import Naming
+from . import Symtab
+
+def dedent(text, reindent=0):
+    from textwrap import dedent
+    text = dedent(text)
+    if reindent > 0:
+        indent = " " * reindent
+        text = '\n'.join([indent + x for x in text.split('\n')])
+    return text
+
+class IntroduceBufferAuxiliaryVars(CythonTransform):
+
+    #
+    # Entry point
+    #
+
+    buffers_exists = False
+    using_memoryview = False
+
+    def __call__(self, node):
+        assert isinstance(node, ModuleNode)
+        self.max_ndim = 0
+        result = super(IntroduceBufferAuxiliaryVars, self).__call__(node)
+        if self.buffers_exists:
+            use_bufstruct_declare_code(node.scope)
+            use_py2_buffer_functions(node.scope)
+
+        return result
+
+
+    #
+    # Basic operations for transforms
+    #
+    def handle_scope(self, node, scope):
+        # For all buffers, insert extra variables in the scope.
+        # The variables are also accessible from the buffer_info
+        # on the buffer entry
         scope_items = scope.entries.items()
         bufvars = [entry for name, entry in scope_items if entry.type.is_buffer]
-        if len(bufvars) > 0: 
-            bufvars.sort(key=lambda entry: entry.name) 
-            self.buffers_exists = True 
- 
+        if len(bufvars) > 0:
+            bufvars.sort(key=lambda entry: entry.name)
+            self.buffers_exists = True
+
         memviewslicevars = [entry for name, entry in scope_items if entry.type.is_memoryviewslice]
-        if len(memviewslicevars) > 0: 
-            self.buffers_exists = True 
- 
- 
+        if len(memviewslicevars) > 0:
+            self.buffers_exists = True
+
+
         for (name, entry) in scope_items:
-            if name == 'memoryview' and isinstance(entry.utility_code_definition, CythonUtilityCode): 
-                self.using_memoryview = True 
-                break 
+            if name == 'memoryview' and isinstance(entry.utility_code_definition, CythonUtilityCode):
+                self.using_memoryview = True
+                break
         del scope_items
- 
-        if isinstance(node, ModuleNode) and len(bufvars) > 0: 
-            # for now...note that pos is wrong 
-            raise CompileError(node.pos, "Buffer vars not allowed in module scope") 
-        for entry in bufvars: 
-            if entry.type.dtype.is_ptr: 
-                raise CompileError(node.pos, "Buffers with pointer types not yet supported.") 
- 
-            name = entry.name 
-            buftype = entry.type 
-            if buftype.ndim > Options.buffer_max_dims: 
-                raise CompileError(node.pos, 
-                        "Buffer ndims exceeds Options.buffer_max_dims = %d" % Options.buffer_max_dims) 
-            if buftype.ndim > self.max_ndim: 
-                self.max_ndim = buftype.ndim 
- 
-            # Declare auxiliary vars 
-            def decvar(type, prefix): 
-                cname = scope.mangle(prefix, name) 
-                aux_var = scope.declare_var(name=None, cname=cname, 
-                                            type=type, pos=node.pos) 
-                if entry.is_arg: 
-                    aux_var.used = True # otherwise, NameNode will mark whether it is used 
- 
-                return aux_var 
- 
-            auxvars = ((PyrexTypes.c_pyx_buffer_nd_type, Naming.pybuffernd_prefix), 
-                       (PyrexTypes.c_pyx_buffer_type, Naming.pybufferstruct_prefix)) 
-            pybuffernd, rcbuffer = [decvar(type, prefix) for (type, prefix) in auxvars] 
- 
-            entry.buffer_aux = Symtab.BufferAux(pybuffernd, rcbuffer) 
- 
-        scope.buffer_entries = bufvars 
-        self.scope = scope 
- 
-    def visit_ModuleNode(self, node): 
-        self.handle_scope(node, node.scope) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        self.handle_scope(node, node.local_scope) 
-        self.visitchildren(node) 
-        return node 
- 
-# 
-# Analysis 
-# 
-buffer_options = ("dtype", "ndim", "mode", "negative_indices", "cast") # ordered! 
-buffer_defaults = {"ndim": 1, "mode": "full", "negative_indices": True, "cast": False} 
-buffer_positional_options_count = 1 # anything beyond this needs keyword argument 
- 
-ERR_BUF_OPTION_UNKNOWN = '"%s" is not a buffer option' 
-ERR_BUF_TOO_MANY = 'Too many buffer options' 
-ERR_BUF_DUP = '"%s" buffer option already supplied' 
-ERR_BUF_MISSING = '"%s" missing' 
-ERR_BUF_MODE = 'Only allowed buffer modes are: "c", "fortran", "full", "strided" (as a compile-time string)' 
-ERR_BUF_NDIM = 'ndim must be a non-negative integer' 
-ERR_BUF_DTYPE = 'dtype must be "object", numeric type or a struct' 
-ERR_BUF_BOOL = '"%s" must be a boolean' 
- 
-def analyse_buffer_options(globalpos, env, posargs, dictargs, defaults=None, need_complete=True): 
-    """ 
-    Must be called during type analysis, as analyse is called 
-    on the dtype argument. 
- 
-    posargs and dictargs should consist of a list and a dict 
-    of tuples (value, pos). Defaults should be a dict of values. 
- 
-    Returns a dict containing all the options a buffer can have and 
-    its value (with the positions stripped). 
-    """ 
-    if defaults is None: 
-        defaults = buffer_defaults 
- 
+
+        if isinstance(node, ModuleNode) and len(bufvars) > 0:
+            # for now...note that pos is wrong
+            raise CompileError(node.pos, "Buffer vars not allowed in module scope")
+        for entry in bufvars:
+            if entry.type.dtype.is_ptr:
+                raise CompileError(node.pos, "Buffers with pointer types not yet supported.")
+
+            name = entry.name
+            buftype = entry.type
+            if buftype.ndim > Options.buffer_max_dims:
+                raise CompileError(node.pos,
+                        "Buffer ndims exceeds Options.buffer_max_dims = %d" % Options.buffer_max_dims)
+            if buftype.ndim > self.max_ndim:
+                self.max_ndim = buftype.ndim
+
+            # Declare auxiliary vars
+            def decvar(type, prefix):
+                cname = scope.mangle(prefix, name)
+                aux_var = scope.declare_var(name=None, cname=cname,
+                                            type=type, pos=node.pos)
+                if entry.is_arg:
+                    aux_var.used = True # otherwise, NameNode will mark whether it is used
+
+                return aux_var
+
+            auxvars = ((PyrexTypes.c_pyx_buffer_nd_type, Naming.pybuffernd_prefix),
+                       (PyrexTypes.c_pyx_buffer_type, Naming.pybufferstruct_prefix))
+            pybuffernd, rcbuffer = [decvar(type, prefix) for (type, prefix) in auxvars]
+
+            entry.buffer_aux = Symtab.BufferAux(pybuffernd, rcbuffer)
+
+        scope.buffer_entries = bufvars
+        self.scope = scope
+
+    def visit_ModuleNode(self, node):
+        self.handle_scope(node, node.scope)
+        self.visitchildren(node)
+        return node
+
+    def visit_FuncDefNode(self, node):
+        self.handle_scope(node, node.local_scope)
+        self.visitchildren(node)
+        return node
+
+#
+# Analysis
+#
+buffer_options = ("dtype", "ndim", "mode", "negative_indices", "cast") # ordered!
+buffer_defaults = {"ndim": 1, "mode": "full", "negative_indices": True, "cast": False}
+buffer_positional_options_count = 1 # anything beyond this needs keyword argument
+
+ERR_BUF_OPTION_UNKNOWN = '"%s" is not a buffer option'
+ERR_BUF_TOO_MANY = 'Too many buffer options'
+ERR_BUF_DUP = '"%s" buffer option already supplied'
+ERR_BUF_MISSING = '"%s" missing'
+ERR_BUF_MODE = 'Only allowed buffer modes are: "c", "fortran", "full", "strided" (as a compile-time string)'
+ERR_BUF_NDIM = 'ndim must be a non-negative integer'
+ERR_BUF_DTYPE = 'dtype must be "object", numeric type or a struct'
+ERR_BUF_BOOL = '"%s" must be a boolean'
+
+def analyse_buffer_options(globalpos, env, posargs, dictargs, defaults=None, need_complete=True):
+    """
+    Must be called during type analysis, as analyse is called
+    on the dtype argument.
+
+    posargs and dictargs should consist of a list and a dict
+    of tuples (value, pos). Defaults should be a dict of values.
+
+    Returns a dict containing all the options a buffer can have and
+    its value (with the positions stripped).
+    """
+    if defaults is None:
+        defaults = buffer_defaults
+
     posargs, dictargs = Interpreter.interpret_compiletime_options(
         posargs, dictargs, type_env=env, type_args=(0, 'dtype'))
- 
-    if len(posargs) > buffer_positional_options_count: 
-        raise CompileError(posargs[-1][1], ERR_BUF_TOO_MANY) 
- 
-    options = {} 
+
+    if len(posargs) > buffer_positional_options_count:
+        raise CompileError(posargs[-1][1], ERR_BUF_TOO_MANY)
+
+    options = {}
     for name, (value, pos) in dictargs.items():
-        if not name in buffer_options: 
-            raise CompileError(pos, ERR_BUF_OPTION_UNKNOWN % name) 
-        options[name] = value 
- 
-    for name, (value, pos) in zip(buffer_options, posargs): 
-        if not name in buffer_options: 
-            raise CompileError(pos, ERR_BUF_OPTION_UNKNOWN % name) 
-        if name in options: 
-            raise CompileError(pos, ERR_BUF_DUP % name) 
-        options[name] = value 
- 
-    # Check that they are all there and copy defaults 
-    for name in buffer_options: 
-        if not name in options: 
-            try: 
-                options[name] = defaults[name] 
-            except KeyError: 
-                if need_complete: 
-                    raise CompileError(globalpos, ERR_BUF_MISSING % name) 
- 
-    dtype = options.get("dtype") 
-    if dtype and dtype.is_extension_type: 
-        raise CompileError(globalpos, ERR_BUF_DTYPE) 
- 
-    ndim = options.get("ndim") 
-    if ndim and (not isinstance(ndim, int) or ndim < 0): 
-        raise CompileError(globalpos, ERR_BUF_NDIM) 
- 
-    mode = options.get("mode") 
-    if mode and not (mode in ('full', 'strided', 'c', 'fortran')): 
-        raise CompileError(globalpos, ERR_BUF_MODE) 
- 
-    def assert_bool(name): 
-        x = options.get(name) 
-        if not isinstance(x, bool): 
-            raise CompileError(globalpos, ERR_BUF_BOOL % name) 
- 
-    assert_bool('negative_indices') 
-    assert_bool('cast') 
- 
-    return options 
- 
- 
-# 
-# Code generation 
-# 
- 
-class BufferEntry(object): 
-    def __init__(self, entry): 
-        self.entry = entry 
-        self.type = entry.type 
-        self.cname = entry.buffer_aux.buflocal_nd_var.cname 
-        self.buf_ptr = "%s.rcbuffer->pybuffer.buf" % self.cname 
+        if not name in buffer_options:
+            raise CompileError(pos, ERR_BUF_OPTION_UNKNOWN % name)
+        options[name] = value
+
+    for name, (value, pos) in zip(buffer_options, posargs):
+        if not name in buffer_options:
+            raise CompileError(pos, ERR_BUF_OPTION_UNKNOWN % name)
+        if name in options:
+            raise CompileError(pos, ERR_BUF_DUP % name)
+        options[name] = value
+
+    # Check that they are all there and copy defaults
+    for name in buffer_options:
+        if not name in options:
+            try:
+                options[name] = defaults[name]
+            except KeyError:
+                if need_complete:
+                    raise CompileError(globalpos, ERR_BUF_MISSING % name)
+
+    dtype = options.get("dtype")
+    if dtype and dtype.is_extension_type:
+        raise CompileError(globalpos, ERR_BUF_DTYPE)
+
+    ndim = options.get("ndim")
+    if ndim and (not isinstance(ndim, int) or ndim < 0):
+        raise CompileError(globalpos, ERR_BUF_NDIM)
+
+    mode = options.get("mode")
+    if mode and not (mode in ('full', 'strided', 'c', 'fortran')):
+        raise CompileError(globalpos, ERR_BUF_MODE)
+
+    def assert_bool(name):
+        x = options.get(name)
+        if not isinstance(x, bool):
+            raise CompileError(globalpos, ERR_BUF_BOOL % name)
+
+    assert_bool('negative_indices')
+    assert_bool('cast')
+
+    return options
+
+
+#
+# Code generation
+#
+
+class BufferEntry(object):
+    def __init__(self, entry):
+        self.entry = entry
+        self.type = entry.type
+        self.cname = entry.buffer_aux.buflocal_nd_var.cname
+        self.buf_ptr = "%s.rcbuffer->pybuffer.buf" % self.cname
         self.buf_ptr_type = entry.type.buffer_ptr_type
         self.init_attributes()
- 
+
     def init_attributes(self):
         self.shape = self.get_buf_shapevars()
         self.strides = self.get_buf_stridevars()
         self.suboffsets = self.get_buf_suboffsetvars()
 
-    def get_buf_suboffsetvars(self): 
-        return self._for_all_ndim("%s.diminfo[%d].suboffsets") 
- 
-    def get_buf_stridevars(self): 
-        return self._for_all_ndim("%s.diminfo[%d].strides") 
- 
-    def get_buf_shapevars(self): 
-        return self._for_all_ndim("%s.diminfo[%d].shape") 
- 
-    def _for_all_ndim(self, s): 
-        return [s % (self.cname, i) for i in range(self.type.ndim)] 
- 
-    def generate_buffer_lookup_code(self, code, index_cnames): 
-        # Create buffer lookup and return it 
-        # This is done via utility macros/inline functions, which vary 
-        # according to the access mode used. 
-        params = [] 
-        nd = self.type.ndim 
-        mode = self.type.mode 
-        if mode == 'full': 
-            for i, s, o in zip(index_cnames, 
-                               self.get_buf_stridevars(), 
-                               self.get_buf_suboffsetvars()): 
-                params.append(i) 
-                params.append(s) 
-                params.append(o) 
-            funcname = "__Pyx_BufPtrFull%dd" % nd 
-            funcgen = buf_lookup_full_code 
-        else: 
-            if mode == 'strided': 
-                funcname = "__Pyx_BufPtrStrided%dd" % nd 
-                funcgen = buf_lookup_strided_code 
-            elif mode == 'c': 
-                funcname = "__Pyx_BufPtrCContig%dd" % nd 
-                funcgen = buf_lookup_c_code 
-            elif mode == 'fortran': 
-                funcname = "__Pyx_BufPtrFortranContig%dd" % nd 
-                funcgen = buf_lookup_fortran_code 
-            else: 
-                assert False 
-            for i, s in zip(index_cnames, self.get_buf_stridevars()): 
-                params.append(i) 
-                params.append(s) 
- 
-        # Make sure the utility code is available 
-        if funcname not in code.globalstate.utility_codes: 
-            code.globalstate.utility_codes.add(funcname) 
-            protocode = code.globalstate['utility_code_proto'] 
-            defcode = code.globalstate['utility_code_def'] 
-            funcgen(protocode, defcode, name=funcname, nd=nd) 
- 
+    def get_buf_suboffsetvars(self):
+        return self._for_all_ndim("%s.diminfo[%d].suboffsets")
+
+    def get_buf_stridevars(self):
+        return self._for_all_ndim("%s.diminfo[%d].strides")
+
+    def get_buf_shapevars(self):
+        return self._for_all_ndim("%s.diminfo[%d].shape")
+
+    def _for_all_ndim(self, s):
+        return [s % (self.cname, i) for i in range(self.type.ndim)]
+
+    def generate_buffer_lookup_code(self, code, index_cnames):
+        # Create buffer lookup and return it
+        # This is done via utility macros/inline functions, which vary
+        # according to the access mode used.
+        params = []
+        nd = self.type.ndim
+        mode = self.type.mode
+        if mode == 'full':
+            for i, s, o in zip(index_cnames,
+                               self.get_buf_stridevars(),
+                               self.get_buf_suboffsetvars()):
+                params.append(i)
+                params.append(s)
+                params.append(o)
+            funcname = "__Pyx_BufPtrFull%dd" % nd
+            funcgen = buf_lookup_full_code
+        else:
+            if mode == 'strided':
+                funcname = "__Pyx_BufPtrStrided%dd" % nd
+                funcgen = buf_lookup_strided_code
+            elif mode == 'c':
+                funcname = "__Pyx_BufPtrCContig%dd" % nd
+                funcgen = buf_lookup_c_code
+            elif mode == 'fortran':
+                funcname = "__Pyx_BufPtrFortranContig%dd" % nd
+                funcgen = buf_lookup_fortran_code
+            else:
+                assert False
+            for i, s in zip(index_cnames, self.get_buf_stridevars()):
+                params.append(i)
+                params.append(s)
+
+        # Make sure the utility code is available
+        if funcname not in code.globalstate.utility_codes:
+            code.globalstate.utility_codes.add(funcname)
+            protocode = code.globalstate['utility_code_proto']
+            defcode = code.globalstate['utility_code_def']
+            funcgen(protocode, defcode, name=funcname, nd=nd)
+
         buf_ptr_type_code = self.buf_ptr_type.empty_declaration_code()
-        ptrcode = "%s(%s, %s, %s)" % (funcname, buf_ptr_type_code, self.buf_ptr, 
-                                      ", ".join(params)) 
-        return ptrcode 
- 
- 
-def get_flags(buffer_aux, buffer_type): 
-    flags = 'PyBUF_FORMAT' 
-    mode = buffer_type.mode 
-    if mode == 'full': 
-        flags += '| PyBUF_INDIRECT' 
-    elif mode == 'strided': 
-        flags += '| PyBUF_STRIDES' 
-    elif mode == 'c': 
-        flags += '| PyBUF_C_CONTIGUOUS' 
-    elif mode == 'fortran': 
-        flags += '| PyBUF_F_CONTIGUOUS' 
-    else: 
-        assert False 
-    if buffer_aux.writable_needed: flags += "| PyBUF_WRITABLE" 
-    return flags 
- 
-def used_buffer_aux_vars(entry): 
-    buffer_aux = entry.buffer_aux 
-    buffer_aux.buflocal_nd_var.used = True 
-    buffer_aux.rcbuf_var.used = True 
- 
-def put_unpack_buffer_aux_into_scope(buf_entry, code): 
-    # Generate code to copy the needed struct info into local 
-    # variables. 
-    buffer_aux, mode = buf_entry.buffer_aux, buf_entry.type.mode 
-    pybuffernd_struct = buffer_aux.buflocal_nd_var.cname 
- 
-    fldnames = ['strides', 'shape'] 
-    if mode == 'full': 
-        fldnames.append('suboffsets') 
- 
-    ln = [] 
-    for i in range(buf_entry.type.ndim): 
-        for fldname in fldnames: 
-            ln.append("%s.diminfo[%d].%s = %s.rcbuffer->pybuffer.%s[%d];" % \ 
-                    (pybuffernd_struct, i, fldname, 
-                     pybuffernd_struct, fldname, i)) 
-    code.putln(' '.join(ln)) 
- 
-def put_init_vars(entry, code): 
-    bufaux = entry.buffer_aux 
-    pybuffernd_struct = bufaux.buflocal_nd_var.cname 
-    pybuffer_struct = bufaux.rcbuf_var.cname 
-    # init pybuffer_struct 
-    code.putln("%s.pybuffer.buf = NULL;" % pybuffer_struct) 
-    code.putln("%s.refcount = 0;" % pybuffer_struct) 
-    # init the buffer object 
-    # code.put_init_var_to_py_none(entry) 
-    # init the pybuffernd_struct 
-    code.putln("%s.data = NULL;" % pybuffernd_struct) 
-    code.putln("%s.rcbuffer = &%s;" % (pybuffernd_struct, pybuffer_struct)) 
- 
-
-def put_acquire_arg_buffer(entry, code, pos): 
-    buffer_aux = entry.buffer_aux 
-    getbuffer = get_getbuffer_call(code, entry.cname, buffer_aux, entry.type) 
- 
-    # Acquire any new buffer 
-    code.putln("{") 
-    code.putln("__Pyx_BufFmt_StackElem __pyx_stack[%d];" % entry.type.dtype.struct_nesting_depth()) 
-    code.putln(code.error_goto_if("%s == -1" % getbuffer, pos)) 
-    code.putln("}") 
+        ptrcode = "%s(%s, %s, %s)" % (funcname, buf_ptr_type_code, self.buf_ptr,
+                                      ", ".join(params))
+        return ptrcode
+
+
+def get_flags(buffer_aux, buffer_type):
+    flags = 'PyBUF_FORMAT'
+    mode = buffer_type.mode
+    if mode == 'full':
+        flags += '| PyBUF_INDIRECT'
+    elif mode == 'strided':
+        flags += '| PyBUF_STRIDES'
+    elif mode == 'c':
+        flags += '| PyBUF_C_CONTIGUOUS'
+    elif mode == 'fortran':
+        flags += '| PyBUF_F_CONTIGUOUS'
+    else:
+        assert False
+    if buffer_aux.writable_needed: flags += "| PyBUF_WRITABLE"
+    return flags
+
+def used_buffer_aux_vars(entry):
+    buffer_aux = entry.buffer_aux
+    buffer_aux.buflocal_nd_var.used = True
+    buffer_aux.rcbuf_var.used = True
+
+def put_unpack_buffer_aux_into_scope(buf_entry, code):
+    # Generate code to copy the needed struct info into local
+    # variables.
+    buffer_aux, mode = buf_entry.buffer_aux, buf_entry.type.mode
+    pybuffernd_struct = buffer_aux.buflocal_nd_var.cname
+
+    fldnames = ['strides', 'shape']
+    if mode == 'full':
+        fldnames.append('suboffsets')
+
+    ln = []
+    for i in range(buf_entry.type.ndim):
+        for fldname in fldnames:
+            ln.append("%s.diminfo[%d].%s = %s.rcbuffer->pybuffer.%s[%d];" % \
+                    (pybuffernd_struct, i, fldname,
+                     pybuffernd_struct, fldname, i))
+    code.putln(' '.join(ln))
+
+def put_init_vars(entry, code):
+    bufaux = entry.buffer_aux
+    pybuffernd_struct = bufaux.buflocal_nd_var.cname
+    pybuffer_struct = bufaux.rcbuf_var.cname
+    # init pybuffer_struct
+    code.putln("%s.pybuffer.buf = NULL;" % pybuffer_struct)
+    code.putln("%s.refcount = 0;" % pybuffer_struct)
+    # init the buffer object
+    # code.put_init_var_to_py_none(entry)
+    # init the pybuffernd_struct
+    code.putln("%s.data = NULL;" % pybuffernd_struct)
+    code.putln("%s.rcbuffer = &%s;" % (pybuffernd_struct, pybuffer_struct))
+
+
+def put_acquire_arg_buffer(entry, code, pos):
+    buffer_aux = entry.buffer_aux
+    getbuffer = get_getbuffer_call(code, entry.cname, buffer_aux, entry.type)
+
+    # Acquire any new buffer
+    code.putln("{")
+    code.putln("__Pyx_BufFmt_StackElem __pyx_stack[%d];" % entry.type.dtype.struct_nesting_depth())
+    code.putln(code.error_goto_if("%s == -1" % getbuffer, pos))
+    code.putln("}")
     # An exception raised in arg parsing cannot be caught, so no
-    # need to care about the buffer then. 
-    put_unpack_buffer_aux_into_scope(entry, code) 
- 
-
-def put_release_buffer_code(code, entry): 
-    code.globalstate.use_utility_code(acquire_utility_code) 
-    code.putln("__Pyx_SafeReleaseBuffer(&%s.rcbuffer->pybuffer);" % entry.buffer_aux.buflocal_nd_var.cname) 
- 
-
-def get_getbuffer_call(code, obj_cname, buffer_aux, buffer_type): 
-    ndim = buffer_type.ndim 
-    cast = int(buffer_type.cast) 
-    flags = get_flags(buffer_aux, buffer_type) 
-    pybuffernd_struct = buffer_aux.buflocal_nd_var.cname 
- 
-    dtype_typeinfo = get_type_information_cname(code, buffer_type.dtype) 
- 
+    # need to care about the buffer then.
+    put_unpack_buffer_aux_into_scope(entry, code)
+
+
+def put_release_buffer_code(code, entry):
+    code.globalstate.use_utility_code(acquire_utility_code)
+    code.putln("__Pyx_SafeReleaseBuffer(&%s.rcbuffer->pybuffer);" % entry.buffer_aux.buflocal_nd_var.cname)
+
+
+def get_getbuffer_call(code, obj_cname, buffer_aux, buffer_type):
+    ndim = buffer_type.ndim
+    cast = int(buffer_type.cast)
+    flags = get_flags(buffer_aux, buffer_type)
+    pybuffernd_struct = buffer_aux.buflocal_nd_var.cname
+
+    dtype_typeinfo = get_type_information_cname(code, buffer_type.dtype)
+
     code.globalstate.use_utility_code(acquire_utility_code)
-    return ("__Pyx_GetBufferAndValidate(&%(pybuffernd_struct)s.rcbuffer->pybuffer, " 
-            "(PyObject*)%(obj_cname)s, &%(dtype_typeinfo)s, %(flags)s, %(ndim)d, " 
-            "%(cast)d, __pyx_stack)" % locals()) 
- 
-
-def put_assign_to_buffer(lhs_cname, rhs_cname, buf_entry, 
-                         is_initialized, pos, code): 
-    """ 
-    Generate code for reassigning a buffer variables. This only deals with getting 
-    the buffer auxiliary structure and variables set up correctly, the assignment 
-    itself and refcounting is the responsibility of the caller. 
- 
-    However, the assignment operation may throw an exception so that the reassignment 
-    never happens. 
- 
-    Depending on the circumstances there are two possible outcomes: 
-    - Old buffer released, new acquired, rhs assigned to lhs 
-    - Old buffer released, new acquired which fails, reaqcuire old lhs buffer 
-      (which may or may not succeed). 
-    """ 
- 
-    buffer_aux, buffer_type = buf_entry.buffer_aux, buf_entry.type 
-    pybuffernd_struct = buffer_aux.buflocal_nd_var.cname 
-    flags = get_flags(buffer_aux, buffer_type) 
- 
+    return ("__Pyx_GetBufferAndValidate(&%(pybuffernd_struct)s.rcbuffer->pybuffer, "
+            "(PyObject*)%(obj_cname)s, &%(dtype_typeinfo)s, %(flags)s, %(ndim)d, "
+            "%(cast)d, __pyx_stack)" % locals())
+
+
+def put_assign_to_buffer(lhs_cname, rhs_cname, buf_entry,
+                         is_initialized, pos, code):
+    """
+    Generate code for reassigning a buffer variables. This only deals with getting
+    the buffer auxiliary structure and variables set up correctly, the assignment
+    itself and refcounting is the responsibility of the caller.
+
+    However, the assignment operation may throw an exception so that the reassignment
+    never happens.
+
+    Depending on the circumstances there are two possible outcomes:
+    - Old buffer released, new acquired, rhs assigned to lhs
+    - Old buffer released, new acquired which fails, reaqcuire old lhs buffer
+      (which may or may not succeed).
+    """
+
+    buffer_aux, buffer_type = buf_entry.buffer_aux, buf_entry.type
+    pybuffernd_struct = buffer_aux.buflocal_nd_var.cname
+    flags = get_flags(buffer_aux, buffer_type)
+
     code.putln("{")  # Set up necessary stack for getbuffer
-    code.putln("__Pyx_BufFmt_StackElem __pyx_stack[%d];" % buffer_type.dtype.struct_nesting_depth()) 
- 
-    getbuffer = get_getbuffer_call(code, "%s", buffer_aux, buffer_type) # fill in object below 
- 
-    if is_initialized: 
-        # Release any existing buffer 
-        code.putln('__Pyx_SafeReleaseBuffer(&%s.rcbuffer->pybuffer);' % pybuffernd_struct) 
-        # Acquire 
-        retcode_cname = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False) 
-        code.putln("%s = %s;" % (retcode_cname, getbuffer % rhs_cname)) 
-        code.putln('if (%s) {' % (code.unlikely("%s < 0" % retcode_cname))) 
-        # If acquisition failed, attempt to reacquire the old buffer 
-        # before raising the exception. A failure of reacquisition 
-        # will cause the reacquisition exception to be reported, one 
-        # can consider working around this later. 
+    code.putln("__Pyx_BufFmt_StackElem __pyx_stack[%d];" % buffer_type.dtype.struct_nesting_depth())
+
+    getbuffer = get_getbuffer_call(code, "%s", buffer_aux, buffer_type) # fill in object below
+
+    if is_initialized:
+        # Release any existing buffer
+        code.putln('__Pyx_SafeReleaseBuffer(&%s.rcbuffer->pybuffer);' % pybuffernd_struct)
+        # Acquire
+        retcode_cname = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
+        code.putln("%s = %s;" % (retcode_cname, getbuffer % rhs_cname))
+        code.putln('if (%s) {' % (code.unlikely("%s < 0" % retcode_cname)))
+        # If acquisition failed, attempt to reacquire the old buffer
+        # before raising the exception. A failure of reacquisition
+        # will cause the reacquisition exception to be reported, one
+        # can consider working around this later.
         exc_temps = tuple(code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=False)
                           for _ in range(3))
         code.putln('PyErr_Fetch(&%s, &%s, &%s);' % exc_temps)
-        code.putln('if (%s) {' % code.unlikely("%s == -1" % (getbuffer % lhs_cname))) 
+        code.putln('if (%s) {' % code.unlikely("%s == -1" % (getbuffer % lhs_cname)))
         code.putln('Py_XDECREF(%s); Py_XDECREF(%s); Py_XDECREF(%s);' % exc_temps)  # Do not refnanny these!
-        code.globalstate.use_utility_code(raise_buffer_fallback_code) 
-        code.putln('__Pyx_RaiseBufferFallbackError();') 
-        code.putln('} else {') 
+        code.globalstate.use_utility_code(raise_buffer_fallback_code)
+        code.putln('__Pyx_RaiseBufferFallbackError();')
+        code.putln('} else {')
         code.putln('PyErr_Restore(%s, %s, %s);' % exc_temps)
         code.putln('}')
         code.putln('%s = %s = %s = 0;' % exc_temps)
         for t in exc_temps:
-            code.funcstate.release_temp(t) 
-        code.putln('}') 
-        # Unpack indices 
-        put_unpack_buffer_aux_into_scope(buf_entry, code) 
-        code.putln(code.error_goto_if_neg(retcode_cname, pos)) 
-        code.funcstate.release_temp(retcode_cname) 
-    else: 
-        # Our entry had no previous value, so set to None when acquisition fails. 
-        # In this case, auxiliary vars should be set up right in initialization to a zero-buffer, 
-        # so it suffices to set the buf field to NULL. 
-        code.putln('if (%s) {' % code.unlikely("%s == -1" % (getbuffer % rhs_cname))) 
-        code.putln('%s = %s; __Pyx_INCREF(Py_None); %s.rcbuffer->pybuffer.buf = NULL;' % 
-                   (lhs_cname, 
-                    PyrexTypes.typecast(buffer_type, PyrexTypes.py_object_type, "Py_None"), 
-                    pybuffernd_struct)) 
-        code.putln(code.error_goto(pos)) 
-        code.put('} else {') 
-        # Unpack indices 
-        put_unpack_buffer_aux_into_scope(buf_entry, code) 
-        code.putln('}') 
- 
-    code.putln("}") # Release stack 
- 
-
-def put_buffer_lookup_code(entry, index_signeds, index_cnames, directives, 
-                           pos, code, negative_indices, in_nogil_context): 
-    """ 
-    Generates code to process indices and calculate an offset into 
-    a buffer. Returns a C string which gives a pointer which can be 
-    read from or written to at will (it is an expression so caller should 
-    store it in a temporary if it is used more than once). 
- 
-    As the bounds checking can have any number of combinations of unsigned 
-    arguments, smart optimizations etc. we insert it directly in the function 
-    body. The lookup however is delegated to a inline function that is instantiated 
-    once per ndim (lookup with suboffsets tend to get quite complicated). 
- 
-    entry is a BufferEntry 
-    """ 
-    negative_indices = directives['wraparound'] and negative_indices 
- 
-    if directives['boundscheck']: 
-        # Check bounds and fix negative indices. 
-        # We allocate a temporary which is initialized to -1, meaning OK (!). 
+            code.funcstate.release_temp(t)
+        code.putln('}')
+        # Unpack indices
+        put_unpack_buffer_aux_into_scope(buf_entry, code)
+        code.putln(code.error_goto_if_neg(retcode_cname, pos))
+        code.funcstate.release_temp(retcode_cname)
+    else:
+        # Our entry had no previous value, so set to None when acquisition fails.
+        # In this case, auxiliary vars should be set up right in initialization to a zero-buffer,
+        # so it suffices to set the buf field to NULL.
+        code.putln('if (%s) {' % code.unlikely("%s == -1" % (getbuffer % rhs_cname)))
+        code.putln('%s = %s; __Pyx_INCREF(Py_None); %s.rcbuffer->pybuffer.buf = NULL;' %
+                   (lhs_cname,
+                    PyrexTypes.typecast(buffer_type, PyrexTypes.py_object_type, "Py_None"),
+                    pybuffernd_struct))
+        code.putln(code.error_goto(pos))
+        code.put('} else {')
+        # Unpack indices
+        put_unpack_buffer_aux_into_scope(buf_entry, code)
+        code.putln('}')
+
+    code.putln("}") # Release stack
+
+
+def put_buffer_lookup_code(entry, index_signeds, index_cnames, directives,
+                           pos, code, negative_indices, in_nogil_context):
+    """
+    Generates code to process indices and calculate an offset into
+    a buffer. Returns a C string which gives a pointer which can be
+    read from or written to at will (it is an expression so caller should
+    store it in a temporary if it is used more than once).
+
+    As the bounds checking can have any number of combinations of unsigned
+    arguments, smart optimizations etc. we insert it directly in the function
+    body. The lookup however is delegated to a inline function that is instantiated
+    once per ndim (lookup with suboffsets tend to get quite complicated).
+
+    entry is a BufferEntry
+    """
+    negative_indices = directives['wraparound'] and negative_indices
+
+    if directives['boundscheck']:
+        # Check bounds and fix negative indices.
+        # We allocate a temporary which is initialized to -1, meaning OK (!).
         # If an error occurs, the temp is set to the index dimension the
         # error is occurring at.
         failed_dim_temp = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
         code.putln("%s = -1;" % failed_dim_temp)
         for dim, (signed, cname, shape) in enumerate(zip(index_signeds, index_cnames, entry.get_buf_shapevars())):
-            if signed != 0: 
-                # not unsigned, deal with negative index 
-                code.putln("if (%s < 0) {" % cname) 
-                if negative_indices: 
-                    code.putln("%s += %s;" % (cname, shape)) 
-                    code.putln("if (%s) %s = %d;" % ( 
+            if signed != 0:
+                # not unsigned, deal with negative index
+                code.putln("if (%s < 0) {" % cname)
+                if negative_indices:
+                    code.putln("%s += %s;" % (cname, shape))
+                    code.putln("if (%s) %s = %d;" % (
                         code.unlikely("%s < 0" % cname),
                         failed_dim_temp, dim))
-                else: 
+                else:
                     code.putln("%s = %d;" % (failed_dim_temp, dim))
-                code.put("} else ") 
-            # check bounds in positive direction 
-            if signed != 0: 
-                cast = "" 
-            else: 
-                cast = "(size_t)" 
-            code.putln("if (%s) %s = %d;" % ( 
-                code.unlikely("%s >= %s%s" % (cname, cast, shape)), 
+                code.put("} else ")
+            # check bounds in positive direction
+            if signed != 0:
+                cast = ""
+            else:
+                cast = "(size_t)"
+            code.putln("if (%s) %s = %d;" % (
+                code.unlikely("%s >= %s%s" % (cname, cast, shape)),
                 failed_dim_temp, dim))
- 
-        if in_nogil_context: 
-            code.globalstate.use_utility_code(raise_indexerror_nogil) 
-            func = '__Pyx_RaiseBufferIndexErrorNogil' 
-        else: 
-            code.globalstate.use_utility_code(raise_indexerror_code) 
-            func = '__Pyx_RaiseBufferIndexError' 
- 
+
+        if in_nogil_context:
+            code.globalstate.use_utility_code(raise_indexerror_nogil)
+            func = '__Pyx_RaiseBufferIndexErrorNogil'
+        else:
+            code.globalstate.use_utility_code(raise_indexerror_code)
+            func = '__Pyx_RaiseBufferIndexError'
+
         code.putln("if (%s) {" % code.unlikely("%s != -1" % failed_dim_temp))
         code.putln('%s(%s);' % (func, failed_dim_temp))
-        code.putln(code.error_goto(pos)) 
-        code.putln('}') 
+        code.putln(code.error_goto(pos))
+        code.putln('}')
         code.funcstate.release_temp(failed_dim_temp)
-    elif negative_indices: 
-        # Only fix negative indices. 
+    elif negative_indices:
+        # Only fix negative indices.
         for signed, cname, shape in zip(index_signeds, index_cnames, entry.get_buf_shapevars()):
-            if signed != 0: 
-                code.putln("if (%s < 0) %s += %s;" % (cname, cname, shape)) 
- 
-    return entry.generate_buffer_lookup_code(code, index_cnames) 
- 
- 
-def use_bufstruct_declare_code(env): 
-    env.use_utility_code(buffer_struct_declare_code) 
- 
- 
-def buf_lookup_full_code(proto, defin, name, nd): 
-    """ 
-    Generates a buffer lookup function for the right number 
-    of dimensions. The function gives back a void* at the right location. 
-    """ 
-    # _i_ndex, _s_tride, sub_o_ffset 
-    macroargs = ", ".join(["i%d, s%d, o%d" % (i, i, i) for i in range(nd)]) 
-    proto.putln("#define %s(type, buf, %s) (type)(%s_imp(buf, %s))" % (name, macroargs, name, macroargs)) 
- 
-    funcargs = ", ".join(["Py_ssize_t i%d, Py_ssize_t s%d, Py_ssize_t o%d" % (i, i, i) for i in range(nd)]) 
-    proto.putln("static CYTHON_INLINE void* %s_imp(void* buf, %s);" % (name, funcargs)) 
-    defin.putln(dedent(""" 
-        static CYTHON_INLINE void* %s_imp(void* buf, %s) { 
-          char* ptr = (char*)buf; 
-        """) % (name, funcargs) + "".join([dedent("""\ 
-          ptr += s%d * i%d; 
-          if (o%d >= 0) ptr = *((char**)ptr) + o%d; 
-        """) % (i, i, i, i) for i in range(nd)] 
-        ) + "\nreturn ptr;\n}") 
- 
-
-def buf_lookup_strided_code(proto, defin, name, nd): 
-    """ 
-    Generates a buffer lookup function for the right number 
-    of dimensions. The function gives back a void* at the right location. 
-    """ 
-    # _i_ndex, _s_tride 
-    args = ", ".join(["i%d, s%d" % (i, i) for i in range(nd)]) 
-    offset = " + ".join(["i%d * s%d" % (i, i) for i in range(nd)]) 
-    proto.putln("#define %s(type, buf, %s) (type)((char*)buf + %s)" % (name, args, offset)) 
- 
-
-def buf_lookup_c_code(proto, defin, name, nd): 
-    """ 
-    Similar to strided lookup, but can assume that the last dimension 
-    doesn't need a multiplication as long as. 
-    Still we keep the same signature for now. 
-    """ 
-    if nd == 1: 
-        proto.putln("#define %s(type, buf, i0, s0) ((type)buf + i0)" % name) 
-    else: 
-        args = ", ".join(["i%d, s%d" % (i, i) for i in range(nd)]) 
-        offset = " + ".join(["i%d * s%d" % (i, i) for i in range(nd - 1)]) 
-        proto.putln("#define %s(type, buf, %s) ((type)((char*)buf + %s) + i%d)" % (name, args, offset, nd - 1)) 
- 
-
-def buf_lookup_fortran_code(proto, defin, name, nd): 
-    """ 
-    Like C lookup, but the first index is optimized instead. 
-    """ 
-    if nd == 1: 
-        proto.putln("#define %s(type, buf, i0, s0) ((type)buf + i0)" % name) 
-    else: 
-        args = ", ".join(["i%d, s%d" % (i, i) for i in range(nd)]) 
-        offset = " + ".join(["i%d * s%d" % (i, i) for i in range(1, nd)]) 
-        proto.putln("#define %s(type, buf, %s) ((type)((char*)buf + %s) + i%d)" % (name, args, offset, 0)) 
- 
- 
-def use_py2_buffer_functions(env): 
-    env.use_utility_code(GetAndReleaseBufferUtilityCode()) 
- 
-
-class GetAndReleaseBufferUtilityCode(object): 
-    # Emulation of PyObject_GetBuffer and PyBuffer_Release for Python 2. 
-    # For >= 2.6 we do double mode -- use the new buffer interface on objects 
-    # which has the right tp_flags set, but emulation otherwise. 
- 
-    requires = None 
-    is_cython_utility = False 
- 
-    def __init__(self): 
-        pass 
- 
-    def __eq__(self, other): 
-        return isinstance(other, GetAndReleaseBufferUtilityCode) 
- 
-    def __hash__(self): 
-        return 24342342 
- 
+            if signed != 0:
+                code.putln("if (%s < 0) %s += %s;" % (cname, cname, shape))
+
+    return entry.generate_buffer_lookup_code(code, index_cnames)
+
+
+def use_bufstruct_declare_code(env):
+    env.use_utility_code(buffer_struct_declare_code)
+
+
+def buf_lookup_full_code(proto, defin, name, nd):
+    """
+    Generates a buffer lookup function for the right number
+    of dimensions. The function gives back a void* at the right location.
+    """
+    # _i_ndex, _s_tride, sub_o_ffset
+    macroargs = ", ".join(["i%d, s%d, o%d" % (i, i, i) for i in range(nd)])
+    proto.putln("#define %s(type, buf, %s) (type)(%s_imp(buf, %s))" % (name, macroargs, name, macroargs))
+
+    funcargs = ", ".join(["Py_ssize_t i%d, Py_ssize_t s%d, Py_ssize_t o%d" % (i, i, i) for i in range(nd)])
+    proto.putln("static CYTHON_INLINE void* %s_imp(void* buf, %s);" % (name, funcargs))
+    defin.putln(dedent("""
+        static CYTHON_INLINE void* %s_imp(void* buf, %s) {
+          char* ptr = (char*)buf;
+        """) % (name, funcargs) + "".join([dedent("""\
+          ptr += s%d * i%d;
+          if (o%d >= 0) ptr = *((char**)ptr) + o%d;
+        """) % (i, i, i, i) for i in range(nd)]
+        ) + "\nreturn ptr;\n}")
+
+
+def buf_lookup_strided_code(proto, defin, name, nd):
+    """
+    Generates a buffer lookup function for the right number
+    of dimensions. The function gives back a void* at the right location.
+    """
+    # _i_ndex, _s_tride
+    args = ", ".join(["i%d, s%d" % (i, i) for i in range(nd)])
+    offset = " + ".join(["i%d * s%d" % (i, i) for i in range(nd)])
+    proto.putln("#define %s(type, buf, %s) (type)((char*)buf + %s)" % (name, args, offset))
+
+
+def buf_lookup_c_code(proto, defin, name, nd):
+    """
+    Similar to strided lookup, but can assume that the last dimension
+    doesn't need a multiplication as long as.
+    Still we keep the same signature for now.
+    """
+    if nd == 1:
+        proto.putln("#define %s(type, buf, i0, s0) ((type)buf + i0)" % name)
+    else:
+        args = ", ".join(["i%d, s%d" % (i, i) for i in range(nd)])
+        offset = " + ".join(["i%d * s%d" % (i, i) for i in range(nd - 1)])
+        proto.putln("#define %s(type, buf, %s) ((type)((char*)buf + %s) + i%d)" % (name, args, offset, nd - 1))
+
+
+def buf_lookup_fortran_code(proto, defin, name, nd):
+    """
+    Like C lookup, but the first index is optimized instead.
+    """
+    if nd == 1:
+        proto.putln("#define %s(type, buf, i0, s0) ((type)buf + i0)" % name)
+    else:
+        args = ", ".join(["i%d, s%d" % (i, i) for i in range(nd)])
+        offset = " + ".join(["i%d * s%d" % (i, i) for i in range(1, nd)])
+        proto.putln("#define %s(type, buf, %s) ((type)((char*)buf + %s) + i%d)" % (name, args, offset, 0))
+
+
+def use_py2_buffer_functions(env):
+    env.use_utility_code(GetAndReleaseBufferUtilityCode())
+
+
+class GetAndReleaseBufferUtilityCode(object):
+    # Emulation of PyObject_GetBuffer and PyBuffer_Release for Python 2.
+    # For >= 2.6 we do double mode -- use the new buffer interface on objects
+    # which has the right tp_flags set, but emulation otherwise.
+
+    requires = None
+    is_cython_utility = False
+
+    def __init__(self):
+        pass
+
+    def __eq__(self, other):
+        return isinstance(other, GetAndReleaseBufferUtilityCode)
+
+    def __hash__(self):
+        return 24342342
+
     def get_tree(self, **kwargs): pass
- 
-    def put_code(self, output): 
-        code = output['utility_code_def'] 
-        proto_code = output['utility_code_proto'] 
-        env = output.module_node.scope 
-        cython_scope = env.context.cython_scope 
-
-        # Search all types for __getbuffer__ overloads 
-        types = [] 
-        visited_scopes = set() 
-        def find_buffer_types(scope): 
-            if scope in visited_scopes: 
-                return 
-            visited_scopes.add(scope) 
-            for m in scope.cimported_modules: 
-                find_buffer_types(m) 
-            for e in scope.type_entries: 
-                if isinstance(e.utility_code_definition, CythonUtilityCode): 
-                    continue 
-                t = e.type 
-                if t.is_extension_type: 
-                    if scope is cython_scope and not e.used: 
-                        continue 
-                    release = get = None 
-                    for x in t.scope.pyfunc_entries: 
-                        if x.name == u"__getbuffer__": get = x.func_cname 
-                        elif x.name == u"__releasebuffer__": release = x.func_cname 
-                    if get: 
-                        types.append((t.typeptr_cname, get, release)) 
- 
-        find_buffer_types(env) 
- 
-        util_code = TempitaUtilityCode.load( 
-            "GetAndReleaseBuffer", from_file="Buffer.c", 
-            context=dict(types=types)) 
- 
-        proto = util_code.format_code(util_code.proto) 
-        impl = util_code.format_code( 
-            util_code.inject_string_constants(util_code.impl, output)[1]) 
- 
-        proto_code.putln(proto) 
-        code.putln(impl) 
- 
- 
-def mangle_dtype_name(dtype): 
+
+    def put_code(self, output):
+        code = output['utility_code_def']
+        proto_code = output['utility_code_proto']
+        env = output.module_node.scope
+        cython_scope = env.context.cython_scope
+
+        # Search all types for __getbuffer__ overloads
+        types = []
+        visited_scopes = set()
+        def find_buffer_types(scope):
+            if scope in visited_scopes:
+                return
+            visited_scopes.add(scope)
+            for m in scope.cimported_modules:
+                find_buffer_types(m)
+            for e in scope.type_entries:
+                if isinstance(e.utility_code_definition, CythonUtilityCode):
+                    continue
+                t = e.type
+                if t.is_extension_type:
+                    if scope is cython_scope and not e.used:
+                        continue
+                    release = get = None
+                    for x in t.scope.pyfunc_entries:
+                        if x.name == u"__getbuffer__": get = x.func_cname
+                        elif x.name == u"__releasebuffer__": release = x.func_cname
+                    if get:
+                        types.append((t.typeptr_cname, get, release))
+
+        find_buffer_types(env)
+
+        util_code = TempitaUtilityCode.load(
+            "GetAndReleaseBuffer", from_file="Buffer.c",
+            context=dict(types=types))
+
+        proto = util_code.format_code(util_code.proto)
+        impl = util_code.format_code(
+            util_code.inject_string_constants(util_code.impl, output)[1])
+
+        proto_code.putln(proto)
+        code.putln(impl)
+
+
+def mangle_dtype_name(dtype):
     # Use prefixes to separate user defined types from builtins
-    # (consider "typedef float unsigned_int") 
-    if dtype.is_pyobject: 
-        return "object" 
-    elif dtype.is_ptr: 
-        return "ptr" 
-    else: 
-        if dtype.is_typedef or dtype.is_struct_or_union: 
-            prefix = "nn_" 
-        else: 
-            prefix = "" 
+    # (consider "typedef float unsigned_int")
+    if dtype.is_pyobject:
+        return "object"
+    elif dtype.is_ptr:
+        return "ptr"
+    else:
+        if dtype.is_typedef or dtype.is_struct_or_union:
+            prefix = "nn_"
+        else:
+            prefix = ""
         return prefix + dtype.specialization_name()
- 
-def get_type_information_cname(code, dtype, maxdepth=None): 
-    """ 
-    Output the run-time type information (__Pyx_TypeInfo) for given dtype, 
-    and return the name of the type info struct. 
- 
-    Structs with two floats of the same size are encoded as complex numbers. 
+
+def get_type_information_cname(code, dtype, maxdepth=None):
+    """
+    Output the run-time type information (__Pyx_TypeInfo) for given dtype,
+    and return the name of the type info struct.
+
+    Structs with two floats of the same size are encoded as complex numbers.
     One can separate between complex numbers declared as struct or with native
-    encoding by inspecting to see if the fields field of the type is 
-    filled in. 
-    """ 
-    namesuffix = mangle_dtype_name(dtype) 
-    name = "__Pyx_TypeInfo_%s" % namesuffix 
-    structinfo_name = "__Pyx_StructFields_%s" % namesuffix 
- 
-    if dtype.is_error: return "<error>" 
- 
-    # It's critical that walking the type info doesn't use more stack 
-    # depth than dtype.struct_nesting_depth() returns, so use an assertion for this 
-    if maxdepth is None: maxdepth = dtype.struct_nesting_depth() 
-    if maxdepth <= 0: 
-        assert False 
- 
-    if name not in code.globalstate.utility_codes: 
-        code.globalstate.utility_codes.add(name) 
-        typecode = code.globalstate['typeinfo'] 
- 
-        arraysizes = [] 
-        if dtype.is_array: 
-            while dtype.is_array: 
-                arraysizes.append(dtype.size) 
-                dtype = dtype.base_type 
- 
-        complex_possible = dtype.is_struct_or_union and dtype.can_be_complex() 
- 
+    encoding by inspecting to see if the fields field of the type is
+    filled in.
+    """
+    namesuffix = mangle_dtype_name(dtype)
+    name = "__Pyx_TypeInfo_%s" % namesuffix
+    structinfo_name = "__Pyx_StructFields_%s" % namesuffix
+
+    if dtype.is_error: return "<error>"
+
+    # It's critical that walking the type info doesn't use more stack
+    # depth than dtype.struct_nesting_depth() returns, so use an assertion for this
+    if maxdepth is None: maxdepth = dtype.struct_nesting_depth()
+    if maxdepth <= 0:
+        assert False
+
+    if name not in code.globalstate.utility_codes:
+        code.globalstate.utility_codes.add(name)
+        typecode = code.globalstate['typeinfo']
+
+        arraysizes = []
+        if dtype.is_array:
+            while dtype.is_array:
+                arraysizes.append(dtype.size)
+                dtype = dtype.base_type
+
+        complex_possible = dtype.is_struct_or_union and dtype.can_be_complex()
+
         declcode = dtype.empty_declaration_code()
-        if dtype.is_simple_buffer_dtype(): 
-            structinfo_name = "NULL" 
-        elif dtype.is_struct: 
+        if dtype.is_simple_buffer_dtype():
+            structinfo_name = "NULL"
+        elif dtype.is_struct:
             struct_scope = dtype.scope
             if dtype.is_const:
                 struct_scope = struct_scope.const_base_type_scope
             # Must pre-call all used types in order not to recurse during utility code writing.
             fields = struct_scope.var_entries
-            assert len(fields) > 0 
-            types = [get_type_information_cname(code, f.type, maxdepth - 1) 
-                     for f in fields] 
-            typecode.putln("static __Pyx_StructField %s[] = {" % structinfo_name, safe=True) 
-            for f, typeinfo in zip(fields, types): 
-                typecode.putln('  {&%s, "%s", offsetof(%s, %s)},' % 
+            assert len(fields) > 0
+            types = [get_type_information_cname(code, f.type, maxdepth - 1)
+                     for f in fields]
+            typecode.putln("static __Pyx_StructField %s[] = {" % structinfo_name, safe=True)
+            for f, typeinfo in zip(fields, types):
+                typecode.putln('  {&%s, "%s", offsetof(%s, %s)},' %
                            (typeinfo, f.name, dtype.empty_declaration_code(), f.cname), safe=True)
-            typecode.putln('  {NULL, NULL, 0}', safe=True) 
-            typecode.putln("};", safe=True) 
-        else: 
-            assert False 
- 
-        rep = str(dtype) 
- 
-        flags = "0" 
-        is_unsigned = "0" 
-        if dtype is PyrexTypes.c_char_type: 
-            is_unsigned = "IS_UNSIGNED(%s)" % declcode 
-            typegroup = "'H'" 
-        elif dtype.is_int: 
-            is_unsigned = "IS_UNSIGNED(%s)" % declcode 
-            typegroup = "%s ? 'U' : 'I'" % is_unsigned 
-        elif complex_possible or dtype.is_complex: 
-            typegroup = "'C'" 
-        elif dtype.is_float: 
-            typegroup = "'R'" 
-        elif dtype.is_struct: 
-            typegroup = "'S'" 
-            if dtype.packed: 
-                flags = "__PYX_BUF_FLAGS_PACKED_STRUCT" 
-        elif dtype.is_pyobject: 
-            typegroup = "'O'" 
-        else: 
-            assert False, dtype 
- 
-        typeinfo = ('static __Pyx_TypeInfo %s = ' 
-                        '{ "%s", %s, sizeof(%s), { %s }, %s, %s, %s, %s };') 
-        tup = (name, rep, structinfo_name, declcode, 
-               ', '.join([str(x) for x in arraysizes]) or '0', len(arraysizes), 
-               typegroup, is_unsigned, flags) 
-        typecode.putln(typeinfo % tup, safe=True) 
- 
-    return name 
- 
-def load_buffer_utility(util_code_name, context=None, **kwargs): 
-    if context is None: 
-        return UtilityCode.load(util_code_name, "Buffer.c", **kwargs) 
-    else: 
-        return TempitaUtilityCode.load(util_code_name, "Buffer.c", context=context, **kwargs) 
- 
+            typecode.putln('  {NULL, NULL, 0}', safe=True)
+            typecode.putln("};", safe=True)
+        else:
+            assert False
+
+        rep = str(dtype)
+
+        flags = "0"
+        is_unsigned = "0"
+        if dtype is PyrexTypes.c_char_type:
+            is_unsigned = "IS_UNSIGNED(%s)" % declcode
+            typegroup = "'H'"
+        elif dtype.is_int:
+            is_unsigned = "IS_UNSIGNED(%s)" % declcode
+            typegroup = "%s ? 'U' : 'I'" % is_unsigned
+        elif complex_possible or dtype.is_complex:
+            typegroup = "'C'"
+        elif dtype.is_float:
+            typegroup = "'R'"
+        elif dtype.is_struct:
+            typegroup = "'S'"
+            if dtype.packed:
+                flags = "__PYX_BUF_FLAGS_PACKED_STRUCT"
+        elif dtype.is_pyobject:
+            typegroup = "'O'"
+        else:
+            assert False, dtype
+
+        typeinfo = ('static __Pyx_TypeInfo %s = '
+                        '{ "%s", %s, sizeof(%s), { %s }, %s, %s, %s, %s };')
+        tup = (name, rep, structinfo_name, declcode,
+               ', '.join([str(x) for x in arraysizes]) or '0', len(arraysizes),
+               typegroup, is_unsigned, flags)
+        typecode.putln(typeinfo % tup, safe=True)
+
+    return name
+
+def load_buffer_utility(util_code_name, context=None, **kwargs):
+    if context is None:
+        return UtilityCode.load(util_code_name, "Buffer.c", **kwargs)
+    else:
+        return TempitaUtilityCode.load(util_code_name, "Buffer.c", context=context, **kwargs)
+
 context = dict(max_dims=Options.buffer_max_dims)
 buffer_struct_declare_code = load_buffer_utility("BufferStructDeclare", context=context)
 buffer_formats_declare_code = load_buffer_utility("BufferFormatStructs")
- 
-# Utility function to set the right exception 
-# The caller should immediately goto_error 
-raise_indexerror_code = load_buffer_utility("BufferIndexError") 
-raise_indexerror_nogil = load_buffer_utility("BufferIndexErrorNogil") 
-raise_buffer_fallback_code = load_buffer_utility("BufferFallbackError") 
- 
+
+# Utility function to set the right exception
+# The caller should immediately goto_error
+raise_indexerror_code = load_buffer_utility("BufferIndexError")
+raise_indexerror_nogil = load_buffer_utility("BufferIndexErrorNogil")
+raise_buffer_fallback_code = load_buffer_utility("BufferFallbackError")
+
 acquire_utility_code = load_buffer_utility("BufferGetAndValidate", context=context)
 buffer_format_check_code = load_buffer_utility("BufferFormatCheck", context=context)
 
-# See utility code BufferFormatFromTypeInfo 
+# See utility code BufferFormatFromTypeInfo
 _typeinfo_to_format_code = load_buffer_utility("TypeInfoToFormat")
diff --git a/contrib/tools/cython/Cython/Compiler/Builtin.py b/contrib/tools/cython/Cython/Compiler/Builtin.py
index b4a97a9322..5fa717507d 100644
--- a/contrib/tools/cython/Cython/Compiler/Builtin.py
+++ b/contrib/tools/cython/Cython/Compiler/Builtin.py
@@ -1,100 +1,100 @@
-# 
-#   Builtin Definitions 
-# 
- 
-from __future__ import absolute_import 
- 
-from .Symtab import BuiltinScope, StructOrUnionScope 
-from .Code import UtilityCode 
-from .TypeSlots import Signature 
-from . import PyrexTypes 
-from . import Options 
- 
- 
-# C-level implementations of builtin types, functions and methods 
- 
-iter_next_utility_code = UtilityCode.load("IterNext", "ObjectHandling.c") 
-getattr_utility_code = UtilityCode.load("GetAttr", "ObjectHandling.c") 
-getattr3_utility_code = UtilityCode.load("GetAttr3", "Builtins.c") 
-pyexec_utility_code = UtilityCode.load("PyExec", "Builtins.c") 
-pyexec_globals_utility_code = UtilityCode.load("PyExecGlobals", "Builtins.c") 
-globals_utility_code = UtilityCode.load("Globals", "Builtins.c") 
- 
-builtin_utility_code = { 
+#
+#   Builtin Definitions
+#
+
+from __future__ import absolute_import
+
+from .Symtab import BuiltinScope, StructOrUnionScope
+from .Code import UtilityCode
+from .TypeSlots import Signature
+from . import PyrexTypes
+from . import Options
+
+
+# C-level implementations of builtin types, functions and methods
+
+iter_next_utility_code = UtilityCode.load("IterNext", "ObjectHandling.c")
+getattr_utility_code = UtilityCode.load("GetAttr", "ObjectHandling.c")
+getattr3_utility_code = UtilityCode.load("GetAttr3", "Builtins.c")
+pyexec_utility_code = UtilityCode.load("PyExec", "Builtins.c")
+pyexec_globals_utility_code = UtilityCode.load("PyExecGlobals", "Builtins.c")
+globals_utility_code = UtilityCode.load("Globals", "Builtins.c")
+
+builtin_utility_code = {
     'StopAsyncIteration': UtilityCode.load_cached("StopAsyncIteration", "Coroutine.c"),
-} 
- 
- 
-# mapping from builtins to their C-level equivalents 
- 
-class _BuiltinOverride(object): 
-    def __init__(self, py_name, args, ret_type, cname, py_equiv="*", 
-                 utility_code=None, sig=None, func_type=None, 
-                 is_strict_signature=False, builtin_return_type=None): 
-        self.py_name, self.cname, self.py_equiv = py_name, cname, py_equiv 
-        self.args, self.ret_type = args, ret_type 
-        self.func_type, self.sig = func_type, sig 
-        self.builtin_return_type = builtin_return_type 
-        self.is_strict_signature = is_strict_signature 
-        self.utility_code = utility_code 
- 
-    def build_func_type(self, sig=None, self_arg=None): 
-        if sig is None: 
-            sig = Signature(self.args, self.ret_type) 
-            sig.exception_check = False  # not needed for the current builtins 
-        func_type = sig.function_type(self_arg) 
-        if self.is_strict_signature: 
-            func_type.is_strict_signature = True 
-        if self.builtin_return_type: 
-            func_type.return_type = builtin_types[self.builtin_return_type] 
-        return func_type 
- 
- 
-class BuiltinAttribute(object): 
-    def __init__(self, py_name, cname=None, field_type=None, field_type_name=None): 
-        self.py_name = py_name 
-        self.cname = cname or py_name 
-        self.field_type_name = field_type_name # can't do the lookup before the type is declared! 
-        self.field_type = field_type 
- 
-    def declare_in_type(self, self_type): 
-        if self.field_type_name is not None: 
-            # lazy type lookup 
-            field_type = builtin_scope.lookup(self.field_type_name).type 
-        else: 
-            field_type = self.field_type or PyrexTypes.py_object_type 
-        entry = self_type.scope.declare(self.py_name, self.cname, field_type, None, 'private') 
-        entry.is_variable = True 
- 
- 
-class BuiltinFunction(_BuiltinOverride): 
-    def declare_in_scope(self, scope): 
-        func_type, sig = self.func_type, self.sig 
-        if func_type is None: 
-            func_type = self.build_func_type(sig) 
-        scope.declare_builtin_cfunction(self.py_name, func_type, self.cname, 
-                                        self.py_equiv, self.utility_code) 
- 
- 
-class BuiltinMethod(_BuiltinOverride): 
-    def declare_in_type(self, self_type): 
-        method_type, sig = self.func_type, self.sig 
-        if method_type is None: 
-            # override 'self' type (first argument) 
-            self_arg = PyrexTypes.CFuncTypeArg("", self_type, None) 
-            self_arg.not_none = True 
-            self_arg.accept_builtin_subtypes = True 
-            method_type = self.build_func_type(sig, self_arg) 
-        self_type.scope.declare_builtin_cfunction( 
-            self.py_name, method_type, self.cname, utility_code=self.utility_code) 
- 
- 
-builtin_function_table = [ 
-    # name,        args,   return,  C API func,           py equiv = "*" 
-    BuiltinFunction('abs',        "d",    "d",     "fabs", 
-                    is_strict_signature = True), 
-    BuiltinFunction('abs',        "f",    "f",     "fabsf", 
-                    is_strict_signature = True), 
+}
+
+
+# mapping from builtins to their C-level equivalents
+
+class _BuiltinOverride(object):
+    def __init__(self, py_name, args, ret_type, cname, py_equiv="*",
+                 utility_code=None, sig=None, func_type=None,
+                 is_strict_signature=False, builtin_return_type=None):
+        self.py_name, self.cname, self.py_equiv = py_name, cname, py_equiv
+        self.args, self.ret_type = args, ret_type
+        self.func_type, self.sig = func_type, sig
+        self.builtin_return_type = builtin_return_type
+        self.is_strict_signature = is_strict_signature
+        self.utility_code = utility_code
+
+    def build_func_type(self, sig=None, self_arg=None):
+        if sig is None:
+            sig = Signature(self.args, self.ret_type)
+            sig.exception_check = False  # not needed for the current builtins
+        func_type = sig.function_type(self_arg)
+        if self.is_strict_signature:
+            func_type.is_strict_signature = True
+        if self.builtin_return_type:
+            func_type.return_type = builtin_types[self.builtin_return_type]
+        return func_type
+
+
+class BuiltinAttribute(object):
+    def __init__(self, py_name, cname=None, field_type=None, field_type_name=None):
+        self.py_name = py_name
+        self.cname = cname or py_name
+        self.field_type_name = field_type_name # can't do the lookup before the type is declared!
+        self.field_type = field_type
+
+    def declare_in_type(self, self_type):
+        if self.field_type_name is not None:
+            # lazy type lookup
+            field_type = builtin_scope.lookup(self.field_type_name).type
+        else:
+            field_type = self.field_type or PyrexTypes.py_object_type
+        entry = self_type.scope.declare(self.py_name, self.cname, field_type, None, 'private')
+        entry.is_variable = True
+
+
+class BuiltinFunction(_BuiltinOverride):
+    def declare_in_scope(self, scope):
+        func_type, sig = self.func_type, self.sig
+        if func_type is None:
+            func_type = self.build_func_type(sig)
+        scope.declare_builtin_cfunction(self.py_name, func_type, self.cname,
+                                        self.py_equiv, self.utility_code)
+
+
+class BuiltinMethod(_BuiltinOverride):
+    def declare_in_type(self, self_type):
+        method_type, sig = self.func_type, self.sig
+        if method_type is None:
+            # override 'self' type (first argument)
+            self_arg = PyrexTypes.CFuncTypeArg("", self_type, None)
+            self_arg.not_none = True
+            self_arg.accept_builtin_subtypes = True
+            method_type = self.build_func_type(sig, self_arg)
+        self_type.scope.declare_builtin_cfunction(
+            self.py_name, method_type, self.cname, utility_code=self.utility_code)
+
+
+builtin_function_table = [
+    # name,        args,   return,  C API func,           py equiv = "*"
+    BuiltinFunction('abs',        "d",    "d",     "fabs",
+                    is_strict_signature = True),
+    BuiltinFunction('abs',        "f",    "f",     "fabsf",
+                    is_strict_signature = True),
     BuiltinFunction('abs',        "i",    "i",     "abs",
                     is_strict_signature = True),
     BuiltinFunction('abs',        "l",    "l",     "labs",
@@ -108,17 +108,17 @@ builtin_function_table = [
                     is_strict_signature = True, nogil=True)),
     ] + list(
         BuiltinFunction('abs',        None,    None,   "/*abs_{0}*/".format(t.specialization_name()),
-                    func_type = PyrexTypes.CFuncType( 
+                    func_type = PyrexTypes.CFuncType(
                         t,
                         [PyrexTypes.CFuncTypeArg("arg", t, None)],
                         is_strict_signature = True, nogil=True))
                             for t in (PyrexTypes.c_uint_type, PyrexTypes.c_ulong_type, PyrexTypes.c_ulonglong_type)
              ) + list(
         BuiltinFunction('abs',        None,    None,   "__Pyx_c_abs{0}".format(t.funcsuffix),
-                    func_type = PyrexTypes.CFuncType( 
+                    func_type = PyrexTypes.CFuncType(
                         t.real_type, [
                             PyrexTypes.CFuncTypeArg("arg", t, None)
-                            ], 
+                            ],
                             is_strict_signature = True, nogil=True))
                         for t in (PyrexTypes.c_float_complex_type,
                                   PyrexTypes.c_double_complex_type,
@@ -130,52 +130,52 @@ builtin_function_table = [
     #('any',       "",     "",      ""),
     #('ascii',     "",     "",      ""),
     #('bin',       "",     "",      ""),
-    BuiltinFunction('callable',   "O",    "b",     "__Pyx_PyCallable_Check", 
-                    utility_code = UtilityCode.load("CallableCheck", "ObjectHandling.c")), 
-    #('chr',       "",     "",      ""), 
-    #('cmp', "",   "",     "",      ""), # int PyObject_Cmp(PyObject *o1, PyObject *o2, int *result) 
-    #('compile',   "",     "",      ""), # PyObject* Py_CompileString(    char *str, char *filename, int start) 
-    BuiltinFunction('delattr',    "OO",   "r",     "PyObject_DelAttr"), 
-    BuiltinFunction('dir',        "O",    "O",     "PyObject_Dir"), 
-    BuiltinFunction('divmod',     "OO",   "O",     "PyNumber_Divmod"), 
-    BuiltinFunction('exec',       "O",    "O",     "__Pyx_PyExecGlobals", 
-                    utility_code = pyexec_globals_utility_code), 
-    BuiltinFunction('exec',       "OO",   "O",     "__Pyx_PyExec2", 
-                    utility_code = pyexec_utility_code), 
-    BuiltinFunction('exec',       "OOO",  "O",     "__Pyx_PyExec3", 
-                    utility_code = pyexec_utility_code), 
-    #('eval',      "",     "",      ""), 
-    #('execfile',  "",     "",      ""), 
-    #('filter',    "",     "",      ""), 
-    BuiltinFunction('getattr3',   "OOO",  "O",     "__Pyx_GetAttr3",     "getattr", 
-                    utility_code=getattr3_utility_code),  # Pyrex legacy 
-    BuiltinFunction('getattr',    "OOO",  "O",     "__Pyx_GetAttr3", 
-                    utility_code=getattr3_utility_code), 
-    BuiltinFunction('getattr',    "OO",   "O",     "__Pyx_GetAttr", 
-                    utility_code=getattr_utility_code), 
+    BuiltinFunction('callable',   "O",    "b",     "__Pyx_PyCallable_Check",
+                    utility_code = UtilityCode.load("CallableCheck", "ObjectHandling.c")),
+    #('chr',       "",     "",      ""),
+    #('cmp', "",   "",     "",      ""), # int PyObject_Cmp(PyObject *o1, PyObject *o2, int *result)
+    #('compile',   "",     "",      ""), # PyObject* Py_CompileString(    char *str, char *filename, int start)
+    BuiltinFunction('delattr',    "OO",   "r",     "PyObject_DelAttr"),
+    BuiltinFunction('dir',        "O",    "O",     "PyObject_Dir"),
+    BuiltinFunction('divmod',     "OO",   "O",     "PyNumber_Divmod"),
+    BuiltinFunction('exec',       "O",    "O",     "__Pyx_PyExecGlobals",
+                    utility_code = pyexec_globals_utility_code),
+    BuiltinFunction('exec',       "OO",   "O",     "__Pyx_PyExec2",
+                    utility_code = pyexec_utility_code),
+    BuiltinFunction('exec',       "OOO",  "O",     "__Pyx_PyExec3",
+                    utility_code = pyexec_utility_code),
+    #('eval',      "",     "",      ""),
+    #('execfile',  "",     "",      ""),
+    #('filter',    "",     "",      ""),
+    BuiltinFunction('getattr3',   "OOO",  "O",     "__Pyx_GetAttr3",     "getattr",
+                    utility_code=getattr3_utility_code),  # Pyrex legacy
+    BuiltinFunction('getattr',    "OOO",  "O",     "__Pyx_GetAttr3",
+                    utility_code=getattr3_utility_code),
+    BuiltinFunction('getattr',    "OO",   "O",     "__Pyx_GetAttr",
+                    utility_code=getattr_utility_code),
     BuiltinFunction('hasattr',    "OO",   "b",     "__Pyx_HasAttr",
                     utility_code = UtilityCode.load("HasAttr", "Builtins.c")),
-    BuiltinFunction('hash',       "O",    "h",     "PyObject_Hash"), 
-    #('hex',       "",     "",      ""), 
-    #('id',        "",     "",      ""), 
-    #('input',     "",     "",      ""), 
-    BuiltinFunction('intern',     "O",    "O",     "__Pyx_Intern", 
-                    utility_code = UtilityCode.load("Intern", "Builtins.c")), 
-    BuiltinFunction('isinstance', "OO",   "b",     "PyObject_IsInstance"), 
-    BuiltinFunction('issubclass', "OO",   "b",     "PyObject_IsSubclass"), 
-    BuiltinFunction('iter',       "OO",   "O",     "PyCallIter_New"), 
-    BuiltinFunction('iter',       "O",    "O",     "PyObject_GetIter"), 
-    BuiltinFunction('len',        "O",    "z",     "PyObject_Length"), 
-    BuiltinFunction('locals',     "",     "O",     "__pyx_locals"), 
-    #('map',       "",     "",      ""), 
-    #('max',       "",     "",      ""), 
-    #('min',       "",     "",      ""), 
-    BuiltinFunction('next',       "O",    "O",     "__Pyx_PyIter_Next", 
-                    utility_code = iter_next_utility_code),   # not available in Py2 => implemented here 
-    BuiltinFunction('next',      "OO",    "O",     "__Pyx_PyIter_Next2", 
-                    utility_code = iter_next_utility_code),  # not available in Py2 => implemented here 
-    #('oct',       "",     "",      ""), 
-    #('open',       "ss",   "O",     "PyFile_FromString"),   # not in Py3 
+    BuiltinFunction('hash',       "O",    "h",     "PyObject_Hash"),
+    #('hex',       "",     "",      ""),
+    #('id',        "",     "",      ""),
+    #('input',     "",     "",      ""),
+    BuiltinFunction('intern',     "O",    "O",     "__Pyx_Intern",
+                    utility_code = UtilityCode.load("Intern", "Builtins.c")),
+    BuiltinFunction('isinstance', "OO",   "b",     "PyObject_IsInstance"),
+    BuiltinFunction('issubclass', "OO",   "b",     "PyObject_IsSubclass"),
+    BuiltinFunction('iter',       "OO",   "O",     "PyCallIter_New"),
+    BuiltinFunction('iter',       "O",    "O",     "PyObject_GetIter"),
+    BuiltinFunction('len',        "O",    "z",     "PyObject_Length"),
+    BuiltinFunction('locals',     "",     "O",     "__pyx_locals"),
+    #('map',       "",     "",      ""),
+    #('max',       "",     "",      ""),
+    #('min',       "",     "",      ""),
+    BuiltinFunction('next',       "O",    "O",     "__Pyx_PyIter_Next",
+                    utility_code = iter_next_utility_code),   # not available in Py2 => implemented here
+    BuiltinFunction('next',      "OO",    "O",     "__Pyx_PyIter_Next2",
+                    utility_code = iter_next_utility_code),  # not available in Py2 => implemented here
+    #('oct',       "",     "",      ""),
+    #('open',       "ss",   "O",     "PyFile_FromString"),   # not in Py3
 ] + [
     BuiltinFunction('ord',        None,    None,   "__Pyx_long_cast",
                     func_type=PyrexTypes.CFuncType(
@@ -196,139 +196,139 @@ builtin_function_table = [
                             PyrexTypes.CFuncTypeArg("c", PyrexTypes.py_object_type, None)
                         ],
                         exception_value="(long)(Py_UCS4)-1")),
-    BuiltinFunction('pow',        "OOO",  "O",     "PyNumber_Power"), 
-    BuiltinFunction('pow',        "OO",   "O",     "__Pyx_PyNumber_Power2", 
-                    utility_code = UtilityCode.load("pow2", "Builtins.c")), 
-    #('range',     "",     "",      ""), 
-    #('raw_input', "",     "",      ""), 
-    #('reduce',    "",     "",      ""), 
-    BuiltinFunction('reload',     "O",    "O",     "PyImport_ReloadModule"), 
+    BuiltinFunction('pow',        "OOO",  "O",     "PyNumber_Power"),
+    BuiltinFunction('pow',        "OO",   "O",     "__Pyx_PyNumber_Power2",
+                    utility_code = UtilityCode.load("pow2", "Builtins.c")),
+    #('range',     "",     "",      ""),
+    #('raw_input', "",     "",      ""),
+    #('reduce',    "",     "",      ""),
+    BuiltinFunction('reload',     "O",    "O",     "PyImport_ReloadModule"),
     BuiltinFunction('repr',       "O",    "O",     "PyObject_Repr"),  # , builtin_return_type='str'),  # add in Cython 3.1
-    #('round',     "",     "",      ""), 
-    BuiltinFunction('setattr',    "OOO",  "r",     "PyObject_SetAttr"), 
-    #('sum',       "",     "",      ""), 
+    #('round',     "",     "",      ""),
+    BuiltinFunction('setattr',    "OOO",  "r",     "PyObject_SetAttr"),
+    #('sum',       "",     "",      ""),
     #('sorted',    "",     "",      ""),
-    #('type',       "O",    "O",     "PyObject_Type"), 
-    #('unichr',    "",     "",      ""), 
-    #('unicode',   "",     "",      ""), 
-    #('vars',      "",     "",      ""), 
-    #('zip',       "",     "",      ""), 
-    #  Can't do these easily until we have builtin type entries. 
-    #('typecheck',  "OO",   "i",     "PyObject_TypeCheck", False), 
-    #('issubtype',  "OO",   "i",     "PyType_IsSubtype",   False), 
- 
-    # Put in namespace append optimization. 
-    BuiltinFunction('__Pyx_PyObject_Append', "OO",  "O",     "__Pyx_PyObject_Append"), 
+    #('type',       "O",    "O",     "PyObject_Type"),
+    #('unichr',    "",     "",      ""),
+    #('unicode',   "",     "",      ""),
+    #('vars',      "",     "",      ""),
+    #('zip',       "",     "",      ""),
+    #  Can't do these easily until we have builtin type entries.
+    #('typecheck',  "OO",   "i",     "PyObject_TypeCheck", False),
+    #('issubtype',  "OO",   "i",     "PyType_IsSubtype",   False),
+
+    # Put in namespace append optimization.
+    BuiltinFunction('__Pyx_PyObject_Append', "OO",  "O",     "__Pyx_PyObject_Append"),
 
     # This is conditionally looked up based on a compiler directive.
     BuiltinFunction('__Pyx_Globals',    "",     "O",     "__Pyx_Globals",
                     utility_code=globals_utility_code),
-] 
- 
- 
-# Builtin types 
-#  bool 
-#  buffer 
-#  classmethod 
-#  dict 
-#  enumerate 
-#  file 
-#  float 
-#  int 
-#  list 
-#  long 
-#  object 
-#  property 
-#  slice 
-#  staticmethod 
-#  super 
-#  str 
-#  tuple 
-#  type 
-#  xrange 
- 
-builtin_types_table = [ 
- 
-    ("type",    "PyType_Type",     []), 
- 
-# This conflicts with the C++ bool type, and unfortunately 
-# C++ is too liberal about PyObject* <-> bool conversions, 
-# resulting in unintuitive runtime behavior and segfaults. 
-#    ("bool",    "PyBool_Type",     []), 
- 
-    ("int",     "PyInt_Type",      []), 
-    ("long",    "PyLong_Type",     []), 
-    ("float",   "PyFloat_Type",    []), 
- 
-    ("complex", "PyComplex_Type",  [BuiltinAttribute('cval', field_type_name = 'Py_complex'), 
-                                    BuiltinAttribute('real', 'cval.real', field_type = PyrexTypes.c_double_type), 
-                                    BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type), 
-                                    ]), 
- 
-    ("basestring", "PyBaseString_Type", [ 
-                                    BuiltinMethod("join",  "TO",   "T", "__Pyx_PyBaseString_Join", 
-                                                  utility_code=UtilityCode.load("StringJoin", "StringTools.c")), 
-                                    ]), 
-    ("bytearray", "PyByteArray_Type", [ 
-                                    ]), 
-    ("bytes",   "PyBytes_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"), 
-                                    BuiltinMethod("join",  "TO",   "O", "__Pyx_PyBytes_Join", 
-                                                  utility_code=UtilityCode.load("StringJoin", "StringTools.c")), 
-                                    ]), 
-    ("str",     "PyString_Type",   [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"), 
-                                    BuiltinMethod("join",  "TO",   "O", "__Pyx_PyString_Join", 
-                                                  builtin_return_type='basestring', 
-                                                  utility_code=UtilityCode.load("StringJoin", "StringTools.c")), 
-                                    ]), 
-    ("unicode", "PyUnicode_Type",  [BuiltinMethod("__contains__",  "TO",   "b", "PyUnicode_Contains"), 
-                                    BuiltinMethod("join",  "TO",   "T", "PyUnicode_Join"), 
-                                    ]), 
- 
-    ("tuple",   "PyTuple_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"), 
-                                    ]), 
- 
-    ("list",    "PyList_Type",     [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"), 
-                                    BuiltinMethod("insert",  "TzO",  "r", "PyList_Insert"), 
-                                    BuiltinMethod("reverse", "T",    "r", "PyList_Reverse"), 
-                                    BuiltinMethod("append",  "TO",   "r", "__Pyx_PyList_Append", 
-                                                  utility_code=UtilityCode.load("ListAppend", "Optimize.c")), 
-                                    BuiltinMethod("extend",  "TO",   "r", "__Pyx_PyList_Extend", 
-                                                  utility_code=UtilityCode.load("ListExtend", "Optimize.c")), 
-                                    ]), 
- 
-    ("dict",    "PyDict_Type",     [BuiltinMethod("__contains__",  "TO",   "b", "PyDict_Contains"), 
-                                    BuiltinMethod("has_key",       "TO",   "b", "PyDict_Contains"), 
-                                    BuiltinMethod("items",  "T",   "O", "__Pyx_PyDict_Items", 
-                                                  utility_code=UtilityCode.load("py_dict_items", "Builtins.c")), 
-                                    BuiltinMethod("keys",   "T",   "O", "__Pyx_PyDict_Keys", 
-                                                  utility_code=UtilityCode.load("py_dict_keys", "Builtins.c")), 
-                                    BuiltinMethod("values", "T",   "O", "__Pyx_PyDict_Values", 
-                                                  utility_code=UtilityCode.load("py_dict_values", "Builtins.c")), 
-                                    BuiltinMethod("iteritems",  "T",   "O", "__Pyx_PyDict_IterItems", 
-                                                  utility_code=UtilityCode.load("py_dict_iteritems", "Builtins.c")), 
-                                    BuiltinMethod("iterkeys",   "T",   "O", "__Pyx_PyDict_IterKeys", 
-                                                  utility_code=UtilityCode.load("py_dict_iterkeys", "Builtins.c")), 
-                                    BuiltinMethod("itervalues", "T",   "O", "__Pyx_PyDict_IterValues", 
-                                                  utility_code=UtilityCode.load("py_dict_itervalues", "Builtins.c")), 
-                                    BuiltinMethod("viewitems",  "T",   "O", "__Pyx_PyDict_ViewItems", 
-                                                  utility_code=UtilityCode.load("py_dict_viewitems", "Builtins.c")), 
-                                    BuiltinMethod("viewkeys",   "T",   "O", "__Pyx_PyDict_ViewKeys", 
-                                                  utility_code=UtilityCode.load("py_dict_viewkeys", "Builtins.c")), 
-                                    BuiltinMethod("viewvalues", "T",   "O", "__Pyx_PyDict_ViewValues", 
-                                                  utility_code=UtilityCode.load("py_dict_viewvalues", "Builtins.c")), 
-                                    BuiltinMethod("clear",  "T",   "r", "__Pyx_PyDict_Clear", 
-                                                  utility_code=UtilityCode.load("py_dict_clear", "Optimize.c")), 
-                                    BuiltinMethod("copy",   "T",   "T", "PyDict_Copy")]), 
- 
-    ("slice",   "PySlice_Type",    [BuiltinAttribute('start'), 
-                                    BuiltinAttribute('stop'), 
-                                    BuiltinAttribute('step'), 
-                                    ]), 
-#    ("file",    "PyFile_Type",     []),  # not in Py3 
- 
-    ("set",       "PySet_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"), 
-                                    BuiltinMethod("clear",   "T",  "r", "PySet_Clear"), 
-                                    # discard() and remove() have a special treatment for unhashable values 
+]
+
+
+# Builtin types
+#  bool
+#  buffer
+#  classmethod
+#  dict
+#  enumerate
+#  file
+#  float
+#  int
+#  list
+#  long
+#  object
+#  property
+#  slice
+#  staticmethod
+#  super
+#  str
+#  tuple
+#  type
+#  xrange
+
+builtin_types_table = [
+
+    ("type",    "PyType_Type",     []),
+
+# This conflicts with the C++ bool type, and unfortunately
+# C++ is too liberal about PyObject* <-> bool conversions,
+# resulting in unintuitive runtime behavior and segfaults.
+#    ("bool",    "PyBool_Type",     []),
+
+    ("int",     "PyInt_Type",      []),
+    ("long",    "PyLong_Type",     []),
+    ("float",   "PyFloat_Type",    []),
+
+    ("complex", "PyComplex_Type",  [BuiltinAttribute('cval', field_type_name = 'Py_complex'),
+                                    BuiltinAttribute('real', 'cval.real', field_type = PyrexTypes.c_double_type),
+                                    BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type),
+                                    ]),
+
+    ("basestring", "PyBaseString_Type", [
+                                    BuiltinMethod("join",  "TO",   "T", "__Pyx_PyBaseString_Join",
+                                                  utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
+                                    ]),
+    ("bytearray", "PyByteArray_Type", [
+                                    ]),
+    ("bytes",   "PyBytes_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
+                                    BuiltinMethod("join",  "TO",   "O", "__Pyx_PyBytes_Join",
+                                                  utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
+                                    ]),
+    ("str",     "PyString_Type",   [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
+                                    BuiltinMethod("join",  "TO",   "O", "__Pyx_PyString_Join",
+                                                  builtin_return_type='basestring',
+                                                  utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
+                                    ]),
+    ("unicode", "PyUnicode_Type",  [BuiltinMethod("__contains__",  "TO",   "b", "PyUnicode_Contains"),
+                                    BuiltinMethod("join",  "TO",   "T", "PyUnicode_Join"),
+                                    ]),
+
+    ("tuple",   "PyTuple_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
+                                    ]),
+
+    ("list",    "PyList_Type",     [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
+                                    BuiltinMethod("insert",  "TzO",  "r", "PyList_Insert"),
+                                    BuiltinMethod("reverse", "T",    "r", "PyList_Reverse"),
+                                    BuiltinMethod("append",  "TO",   "r", "__Pyx_PyList_Append",
+                                                  utility_code=UtilityCode.load("ListAppend", "Optimize.c")),
+                                    BuiltinMethod("extend",  "TO",   "r", "__Pyx_PyList_Extend",
+                                                  utility_code=UtilityCode.load("ListExtend", "Optimize.c")),
+                                    ]),
+
+    ("dict",    "PyDict_Type",     [BuiltinMethod("__contains__",  "TO",   "b", "PyDict_Contains"),
+                                    BuiltinMethod("has_key",       "TO",   "b", "PyDict_Contains"),
+                                    BuiltinMethod("items",  "T",   "O", "__Pyx_PyDict_Items",
+                                                  utility_code=UtilityCode.load("py_dict_items", "Builtins.c")),
+                                    BuiltinMethod("keys",   "T",   "O", "__Pyx_PyDict_Keys",
+                                                  utility_code=UtilityCode.load("py_dict_keys", "Builtins.c")),
+                                    BuiltinMethod("values", "T",   "O", "__Pyx_PyDict_Values",
+                                                  utility_code=UtilityCode.load("py_dict_values", "Builtins.c")),
+                                    BuiltinMethod("iteritems",  "T",   "O", "__Pyx_PyDict_IterItems",
+                                                  utility_code=UtilityCode.load("py_dict_iteritems", "Builtins.c")),
+                                    BuiltinMethod("iterkeys",   "T",   "O", "__Pyx_PyDict_IterKeys",
+                                                  utility_code=UtilityCode.load("py_dict_iterkeys", "Builtins.c")),
+                                    BuiltinMethod("itervalues", "T",   "O", "__Pyx_PyDict_IterValues",
+                                                  utility_code=UtilityCode.load("py_dict_itervalues", "Builtins.c")),
+                                    BuiltinMethod("viewitems",  "T",   "O", "__Pyx_PyDict_ViewItems",
+                                                  utility_code=UtilityCode.load("py_dict_viewitems", "Builtins.c")),
+                                    BuiltinMethod("viewkeys",   "T",   "O", "__Pyx_PyDict_ViewKeys",
+                                                  utility_code=UtilityCode.load("py_dict_viewkeys", "Builtins.c")),
+                                    BuiltinMethod("viewvalues", "T",   "O", "__Pyx_PyDict_ViewValues",
+                                                  utility_code=UtilityCode.load("py_dict_viewvalues", "Builtins.c")),
+                                    BuiltinMethod("clear",  "T",   "r", "__Pyx_PyDict_Clear",
+                                                  utility_code=UtilityCode.load("py_dict_clear", "Optimize.c")),
+                                    BuiltinMethod("copy",   "T",   "T", "PyDict_Copy")]),
+
+    ("slice",   "PySlice_Type",    [BuiltinAttribute('start'),
+                                    BuiltinAttribute('stop'),
+                                    BuiltinAttribute('step'),
+                                    ]),
+#    ("file",    "PyFile_Type",     []),  # not in Py3
+
+    ("set",       "PySet_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
+                                    BuiltinMethod("clear",   "T",  "r", "PySet_Clear"),
+                                    # discard() and remove() have a special treatment for unhashable values
                                     BuiltinMethod("discard", "TO", "r", "__Pyx_PySet_Discard",
                                                   utility_code=UtilityCode.load("py_set_discard", "Optimize.c")),
                                     BuiltinMethod("remove",  "TO", "r", "__Pyx_PySet_Remove",
@@ -336,114 +336,114 @@ builtin_types_table = [
                                     # update is actually variadic (see Github issue #1645)
 #                                    BuiltinMethod("update",     "TO", "r", "__Pyx_PySet_Update",
 #                                                  utility_code=UtilityCode.load_cached("PySet_Update", "Builtins.c")),
-                                    BuiltinMethod("add",     "TO", "r", "PySet_Add"), 
-                                    BuiltinMethod("pop",     "T",  "O", "PySet_Pop")]), 
-    ("frozenset", "PyFrozenSet_Type", []), 
+                                    BuiltinMethod("add",     "TO", "r", "PySet_Add"),
+                                    BuiltinMethod("pop",     "T",  "O", "PySet_Pop")]),
+    ("frozenset", "PyFrozenSet_Type", []),
     ("Exception", "((PyTypeObject*)PyExc_Exception)[0]", []),
     ("StopAsyncIteration", "((PyTypeObject*)__Pyx_PyExc_StopAsyncIteration)[0]", []),
-] 
- 
- 
-types_that_construct_their_instance = set([ 
-    # some builtin types do not always return an instance of 
-    # themselves - these do: 
-    'type', 'bool', 'long', 'float', 'complex', 
-    'bytes', 'unicode', 'bytearray', 
-    'tuple', 'list', 'dict', 'set', 'frozenset' 
-    # 'str',             # only in Py3.x 
-    # 'file',            # only in Py2.x 
-]) 
- 
- 
-builtin_structs_table = [ 
-    ('Py_buffer', 'Py_buffer', 
-     [("buf",        PyrexTypes.c_void_ptr_type), 
-      ("obj",        PyrexTypes.py_object_type), 
-      ("len",        PyrexTypes.c_py_ssize_t_type), 
-      ("itemsize",   PyrexTypes.c_py_ssize_t_type), 
-      ("readonly",   PyrexTypes.c_bint_type), 
-      ("ndim",       PyrexTypes.c_int_type), 
-      ("format",     PyrexTypes.c_char_ptr_type), 
-      ("shape",      PyrexTypes.c_py_ssize_t_ptr_type), 
-      ("strides",    PyrexTypes.c_py_ssize_t_ptr_type), 
-      ("suboffsets", PyrexTypes.c_py_ssize_t_ptr_type), 
-      ("smalltable", PyrexTypes.CArrayType(PyrexTypes.c_py_ssize_t_type, 2)), 
-      ("internal",   PyrexTypes.c_void_ptr_type), 
-      ]), 
-    ('Py_complex', 'Py_complex', 
-     [('real', PyrexTypes.c_double_type), 
-      ('imag', PyrexTypes.c_double_type), 
-      ]) 
-] 
- 
-# set up builtin scope 
- 
-builtin_scope = BuiltinScope() 
- 
-def init_builtin_funcs(): 
-    for bf in builtin_function_table: 
-        bf.declare_in_scope(builtin_scope) 
- 
-builtin_types = {} 
- 
-def init_builtin_types(): 
-    global builtin_types 
-    for name, cname, methods in builtin_types_table: 
-        utility = builtin_utility_code.get(name) 
-        if name == 'frozenset': 
-            objstruct_cname = 'PySetObject' 
+]
+
+
+types_that_construct_their_instance = set([
+    # some builtin types do not always return an instance of
+    # themselves - these do:
+    'type', 'bool', 'long', 'float', 'complex',
+    'bytes', 'unicode', 'bytearray',
+    'tuple', 'list', 'dict', 'set', 'frozenset'
+    # 'str',             # only in Py3.x
+    # 'file',            # only in Py2.x
+])
+
+
+builtin_structs_table = [
+    ('Py_buffer', 'Py_buffer',
+     [("buf",        PyrexTypes.c_void_ptr_type),
+      ("obj",        PyrexTypes.py_object_type),
+      ("len",        PyrexTypes.c_py_ssize_t_type),
+      ("itemsize",   PyrexTypes.c_py_ssize_t_type),
+      ("readonly",   PyrexTypes.c_bint_type),
+      ("ndim",       PyrexTypes.c_int_type),
+      ("format",     PyrexTypes.c_char_ptr_type),
+      ("shape",      PyrexTypes.c_py_ssize_t_ptr_type),
+      ("strides",    PyrexTypes.c_py_ssize_t_ptr_type),
+      ("suboffsets", PyrexTypes.c_py_ssize_t_ptr_type),
+      ("smalltable", PyrexTypes.CArrayType(PyrexTypes.c_py_ssize_t_type, 2)),
+      ("internal",   PyrexTypes.c_void_ptr_type),
+      ]),
+    ('Py_complex', 'Py_complex',
+     [('real', PyrexTypes.c_double_type),
+      ('imag', PyrexTypes.c_double_type),
+      ])
+]
+
+# set up builtin scope
+
+builtin_scope = BuiltinScope()
+
+def init_builtin_funcs():
+    for bf in builtin_function_table:
+        bf.declare_in_scope(builtin_scope)
+
+builtin_types = {}
+
+def init_builtin_types():
+    global builtin_types
+    for name, cname, methods in builtin_types_table:
+        utility = builtin_utility_code.get(name)
+        if name == 'frozenset':
+            objstruct_cname = 'PySetObject'
         elif name == 'bytearray':
             objstruct_cname = 'PyByteArrayObject'
-        elif name == 'bool': 
-            objstruct_cname = None 
+        elif name == 'bool':
+            objstruct_cname = None
         elif name == 'Exception':
             objstruct_cname = "PyBaseExceptionObject"
         elif name == 'StopAsyncIteration':
             objstruct_cname = "PyBaseExceptionObject"
-        else: 
-            objstruct_cname = 'Py%sObject' % name.capitalize() 
-        the_type = builtin_scope.declare_builtin_type(name, cname, utility, objstruct_cname) 
-        builtin_types[name] = the_type 
-        for method in methods: 
-            method.declare_in_type(the_type) 
- 
-def init_builtin_structs(): 
-    for name, cname, attribute_types in builtin_structs_table: 
-        scope = StructOrUnionScope(name) 
-        for attribute_name, attribute_type in attribute_types: 
-            scope.declare_var(attribute_name, attribute_type, None, 
-                              attribute_name, allow_pyobject=True) 
-        builtin_scope.declare_struct_or_union( 
-            name, "struct", scope, 1, None, cname = cname) 
- 
- 
-def init_builtins(): 
-    init_builtin_structs() 
-    init_builtin_types() 
-    init_builtin_funcs() 
-
-    builtin_scope.declare_var( 
-        '__debug__', PyrexTypes.c_const_type(PyrexTypes.c_bint_type), 
-        pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True) 
-
-    global list_type, tuple_type, dict_type, set_type, frozenset_type 
-    global bytes_type, str_type, unicode_type, basestring_type, slice_type 
-    global float_type, bool_type, type_type, complex_type, bytearray_type 
-    type_type  = builtin_scope.lookup('type').type 
-    list_type  = builtin_scope.lookup('list').type 
-    tuple_type = builtin_scope.lookup('tuple').type 
-    dict_type  = builtin_scope.lookup('dict').type 
-    set_type   = builtin_scope.lookup('set').type 
-    frozenset_type = builtin_scope.lookup('frozenset').type 
-    slice_type   = builtin_scope.lookup('slice').type 
-    bytes_type = builtin_scope.lookup('bytes').type 
-    str_type   = builtin_scope.lookup('str').type 
-    unicode_type = builtin_scope.lookup('unicode').type 
-    basestring_type = builtin_scope.lookup('basestring').type 
-    bytearray_type = builtin_scope.lookup('bytearray').type 
-    float_type = builtin_scope.lookup('float').type 
-    bool_type  = builtin_scope.lookup('bool').type 
-    complex_type  = builtin_scope.lookup('complex').type 
- 
- 
-init_builtins() 
+        else:
+            objstruct_cname = 'Py%sObject' % name.capitalize()
+        the_type = builtin_scope.declare_builtin_type(name, cname, utility, objstruct_cname)
+        builtin_types[name] = the_type
+        for method in methods:
+            method.declare_in_type(the_type)
+
+def init_builtin_structs():
+    for name, cname, attribute_types in builtin_structs_table:
+        scope = StructOrUnionScope(name)
+        for attribute_name, attribute_type in attribute_types:
+            scope.declare_var(attribute_name, attribute_type, None,
+                              attribute_name, allow_pyobject=True)
+        builtin_scope.declare_struct_or_union(
+            name, "struct", scope, 1, None, cname = cname)
+
+
+def init_builtins():
+    init_builtin_structs()
+    init_builtin_types()
+    init_builtin_funcs()
+
+    builtin_scope.declare_var(
+        '__debug__', PyrexTypes.c_const_type(PyrexTypes.c_bint_type),
+        pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True)
+
+    global list_type, tuple_type, dict_type, set_type, frozenset_type
+    global bytes_type, str_type, unicode_type, basestring_type, slice_type
+    global float_type, bool_type, type_type, complex_type, bytearray_type
+    type_type  = builtin_scope.lookup('type').type
+    list_type  = builtin_scope.lookup('list').type
+    tuple_type = builtin_scope.lookup('tuple').type
+    dict_type  = builtin_scope.lookup('dict').type
+    set_type   = builtin_scope.lookup('set').type
+    frozenset_type = builtin_scope.lookup('frozenset').type
+    slice_type   = builtin_scope.lookup('slice').type
+    bytes_type = builtin_scope.lookup('bytes').type
+    str_type   = builtin_scope.lookup('str').type
+    unicode_type = builtin_scope.lookup('unicode').type
+    basestring_type = builtin_scope.lookup('basestring').type
+    bytearray_type = builtin_scope.lookup('bytearray').type
+    float_type = builtin_scope.lookup('float').type
+    bool_type  = builtin_scope.lookup('bool').type
+    complex_type  = builtin_scope.lookup('complex').type
+
+
+init_builtins()
diff --git a/contrib/tools/cython/Cython/Compiler/CmdLine.py b/contrib/tools/cython/Cython/Compiler/CmdLine.py
index 77c2de9ef9..a20ab38dc2 100644
--- a/contrib/tools/cython/Cython/Compiler/CmdLine.py
+++ b/contrib/tools/cython/Cython/Compiler/CmdLine.py
@@ -1,83 +1,83 @@
-# 
-#   Cython - Command Line Parsing 
-# 
- 
-from __future__ import absolute_import 
- 
-import os 
-import sys 
-from . import Options 
- 
-usage = """\ 
-Cython (http://cython.org) is a compiler for code written in the 
-Cython language.  Cython is based on Pyrex by Greg Ewing. 
- 
-Usage: cython [options] sourcefile.{pyx,py} ... 
- 
-Options: 
-  -V, --version                  Display version number of cython compiler 
-  -l, --create-listing           Write error messages to a listing file 
-  -I, --include-dir <directory>  Search for include files in named directory 
-                                 (multiple include directories are allowed). 
-  -o, --output-file <filename>   Specify name of generated C file 
-  -t, --timestamps               Only compile newer source files 
-  -f, --force                    Compile all source files (overrides implied -t) 
-  -v, --verbose                  Be verbose, print file names on multiple compilation 
-  -p, --embed-positions          If specified, the positions in Cython files of each 
-                                 function definition is embedded in its docstring. 
-  --cleanup <level>              Release interned objects on python exit, for memory debugging. 
-                                 Level indicates aggressiveness, default 0 releases nothing. 
-  -w, --working <directory>      Sets the working directory for Cython (the directory modules 
-                                 are searched from) 
-  --gdb                          Output debug information for cygdb 
-  --gdb-outdir <directory>       Specify gdb debug information output directory. Implies --gdb. 
- 
-  -D, --no-docstrings            Strip docstrings from the compiled module. 
-  -a, --annotate                 Produce a colorized HTML version of the source. 
+#
+#   Cython - Command Line Parsing
+#
+
+from __future__ import absolute_import
+
+import os
+import sys
+from . import Options
+
+usage = """\
+Cython (http://cython.org) is a compiler for code written in the
+Cython language.  Cython is based on Pyrex by Greg Ewing.
+
+Usage: cython [options] sourcefile.{pyx,py} ...
+
+Options:
+  -V, --version                  Display version number of cython compiler
+  -l, --create-listing           Write error messages to a listing file
+  -I, --include-dir <directory>  Search for include files in named directory
+                                 (multiple include directories are allowed).
+  -o, --output-file <filename>   Specify name of generated C file
+  -t, --timestamps               Only compile newer source files
+  -f, --force                    Compile all source files (overrides implied -t)
+  -v, --verbose                  Be verbose, print file names on multiple compilation
+  -p, --embed-positions          If specified, the positions in Cython files of each
+                                 function definition is embedded in its docstring.
+  --cleanup <level>              Release interned objects on python exit, for memory debugging.
+                                 Level indicates aggressiveness, default 0 releases nothing.
+  -w, --working <directory>      Sets the working directory for Cython (the directory modules
+                                 are searched from)
+  --gdb                          Output debug information for cygdb
+  --gdb-outdir <directory>       Specify gdb debug information output directory. Implies --gdb.
+
+  -D, --no-docstrings            Strip docstrings from the compiled module.
+  -a, --annotate                 Produce a colorized HTML version of the source.
   --annotate-coverage <cov.xml>  Annotate and include coverage information from cov.xml.
-  --line-directives              Produce #line directives pointing to the .pyx source 
-  --cplus                        Output a C++ rather than C file. 
-  --embed[=<method_name>]        Generate a main() function that embeds the Python interpreter. 
-  -2                             Compile based on Python-2 syntax and code semantics. 
-  -3                             Compile based on Python-3 syntax and code semantics. 
+  --line-directives              Produce #line directives pointing to the .pyx source
+  --cplus                        Output a C++ rather than C file.
+  --embed[=<method_name>]        Generate a main() function that embeds the Python interpreter.
+  -2                             Compile based on Python-2 syntax and code semantics.
+  -3                             Compile based on Python-3 syntax and code semantics.
   --3str                         Compile based on Python-3 syntax and code semantics without
                                  assuming unicode by default for string literals under Python 2.
-  --lenient                      Change some compile time errors to runtime errors to 
-                                 improve Python compatibility 
-  --capi-reexport-cincludes      Add cincluded headers to any auto-generated header files. 
-  --fast-fail                    Abort the compilation on the first error 
-  --warning-errors, -Werror      Make all warnings into errors 
-  --warning-extra, -Wextra       Enable extra warnings 
-  -X, --directive <name>=<value>[,<name=value,...] Overrides a compiler directive 
+  --lenient                      Change some compile time errors to runtime errors to
+                                 improve Python compatibility
+  --capi-reexport-cincludes      Add cincluded headers to any auto-generated header files.
+  --fast-fail                    Abort the compilation on the first error
+  --warning-errors, -Werror      Make all warnings into errors
+  --warning-extra, -Wextra       Enable extra warnings
+  -X, --directive <name>=<value>[,<name=value,...] Overrides a compiler directive
   -E, --compile-time-env name=value[,<name=value,...] Provides compile time env like DEF would do.
-""" 
- 
- 
+"""
+
+
 # The following experimental options are supported only on MacOSX:
-#  -C, --compile    Compile generated .c file to .o file 
-#  --link           Link .o file to produce extension module (implies -C) 
-#  -+, --cplus      Use C++ compiler for compiling and linking 
-#  Additional .o files to link may be supplied when using -X.""" 
- 
-def bad_usage(): 
-    sys.stderr.write(usage) 
-    sys.exit(1) 
- 
- 
-def parse_command_line(args): 
-    from .Main import CompilationOptions, default_options 
- 
+#  -C, --compile    Compile generated .c file to .o file
+#  --link           Link .o file to produce extension module (implies -C)
+#  -+, --cplus      Use C++ compiler for compiling and linking
+#  Additional .o files to link may be supplied when using -X."""
+
+def bad_usage():
+    sys.stderr.write(usage)
+    sys.exit(1)
+
+
+def parse_command_line(args):
+    from .Main import CompilationOptions, default_options
+
     pending_arg = []
 
-    def pop_arg(): 
+    def pop_arg():
         if not args or pending_arg:
-            bad_usage() 
+            bad_usage()
         if '=' in args[0] and args[0].startswith('--'):  # allow "--long-option=xyz"
             name, value = args.pop(0).split('=', 1)
             pending_arg.append(value)
             return name
         return args.pop(0)
- 
+
     def pop_value(default=None):
         if pending_arg:
             return pending_arg.pop()
@@ -87,104 +87,104 @@ def parse_command_line(args):
             bad_usage()
         return args.pop(0)
 
-    def get_param(option): 
-        tail = option[2:] 
-        if tail: 
-            return tail 
-        else: 
-            return pop_arg() 
- 
-    options = CompilationOptions(default_options) 
-    sources = [] 
-    while args: 
-        if args[0].startswith("-"): 
-            option = pop_arg() 
-            if option in ("-V", "--version"): 
-                options.show_version = 1 
-            elif option in ("-l", "--create-listing"): 
-                options.use_listing_file = 1 
-            elif option in ("-+", "--cplus"): 
-                options.cplus = 1 
-            elif option == "--embed": 
+    def get_param(option):
+        tail = option[2:]
+        if tail:
+            return tail
+        else:
+            return pop_arg()
+
+    options = CompilationOptions(default_options)
+    sources = []
+    while args:
+        if args[0].startswith("-"):
+            option = pop_arg()
+            if option in ("-V", "--version"):
+                options.show_version = 1
+            elif option in ("-l", "--create-listing"):
+                options.use_listing_file = 1
+            elif option in ("-+", "--cplus"):
+                options.cplus = 1
+            elif option == "--embed":
                 Options.embed = pop_value("main")
-            elif option.startswith("-I"): 
-                options.include_path.append(get_param(option)) 
-            elif option == "--include-dir": 
+            elif option.startswith("-I"):
+                options.include_path.append(get_param(option))
+            elif option == "--include-dir":
                 options.include_path.append(pop_value())
-            elif option in ("-w", "--working"): 
+            elif option in ("-w", "--working"):
                 options.working_path = pop_value()
-            elif option in ("-o", "--output-file"): 
+            elif option in ("-o", "--output-file"):
                 options.output_file = pop_value()
-            elif option in ("-t", "--timestamps"): 
-                options.timestamps = 1 
-            elif option in ("-f", "--force"): 
-                options.timestamps = 0 
-            elif option in ("-v", "--verbose"): 
-                options.verbose += 1 
-            elif option in ("-p", "--embed-positions"): 
-                Options.embed_pos_in_docstring = 1 
-            elif option in ("-z", "--pre-import"): 
+            elif option in ("-t", "--timestamps"):
+                options.timestamps = 1
+            elif option in ("-f", "--force"):
+                options.timestamps = 0
+            elif option in ("-v", "--verbose"):
+                options.verbose += 1
+            elif option in ("-p", "--embed-positions"):
+                Options.embed_pos_in_docstring = 1
+            elif option in ("-z", "--pre-import"):
                 Options.pre_import = pop_value()
-            elif option == "--cleanup": 
+            elif option == "--cleanup":
                 Options.generate_cleanup_code = int(pop_value())
-            elif option in ("-D", "--no-docstrings"): 
-                Options.docstrings = False 
-            elif option in ("-a", "--annotate"): 
-                Options.annotate = True 
+            elif option in ("-D", "--no-docstrings"):
+                Options.docstrings = False
+            elif option in ("-a", "--annotate"):
+                Options.annotate = True
             elif option == "--annotate-coverage":
                 Options.annotate = True
                 Options.annotate_coverage_xml = pop_value()
-            elif option == "--convert-range": 
-                Options.convert_range = True 
-            elif option == "--line-directives": 
-                options.emit_linenums = True 
-            elif option == "--no-c-in-traceback": 
-                options.c_line_in_traceback = False 
-            elif option == "--gdb": 
-                options.gdb_debug = True 
-                options.output_dir = os.curdir 
-            elif option == "--gdb-outdir": 
-                options.gdb_debug = True 
+            elif option == "--convert-range":
+                Options.convert_range = True
+            elif option == "--line-directives":
+                options.emit_linenums = True
+            elif option == "--no-c-in-traceback":
+                options.c_line_in_traceback = False
+            elif option == "--gdb":
+                options.gdb_debug = True
+                options.output_dir = os.curdir
+            elif option == "--gdb-outdir":
+                options.gdb_debug = True
                 options.output_dir = pop_value()
-            elif option == "--lenient": 
-                Options.error_on_unknown_names = False 
-                Options.error_on_uninitialized = False 
+            elif option == "--lenient":
+                Options.error_on_unknown_names = False
+                Options.error_on_uninitialized = False
             elif option == '--module-name':
                 options.module_name = pop_arg()
             elif option == '--init-suffix':
                 options.init_suffix = pop_arg()
             elif option == '--source-root':
                 Options.source_root = pop_arg()
-            elif option == '-2': 
-                options.language_level = 2 
-            elif option == '-3': 
-                options.language_level = 3 
+            elif option == '-2':
+                options.language_level = 2
+            elif option == '-3':
+                options.language_level = 3
             elif option == '--3str':
                 options.language_level = '3str'
-            elif option == "--capi-reexport-cincludes": 
-                options.capi_reexport_cincludes = True 
-            elif option == "--fast-fail": 
-                Options.fast_fail = True 
+            elif option == "--capi-reexport-cincludes":
+                options.capi_reexport_cincludes = True
+            elif option == "--fast-fail":
+                Options.fast_fail = True
             elif option == "--cimport-from-pyx":
                 Options.cimport_from_pyx = True
-            elif option in ('-Werror', '--warning-errors'): 
-                Options.warning_errors = True 
-            elif option in ('-Wextra', '--warning-extra'): 
-                options.compiler_directives.update(Options.extra_warnings) 
-            elif option == "--old-style-globals": 
-                Options.old_style_globals = True 
-            elif option == "--directive" or option.startswith('-X'): 
-                if option.startswith('-X') and option[2:].strip(): 
-                    x_args = option[2:] 
-                else: 
+            elif option in ('-Werror', '--warning-errors'):
+                Options.warning_errors = True
+            elif option in ('-Wextra', '--warning-extra'):
+                options.compiler_directives.update(Options.extra_warnings)
+            elif option == "--old-style-globals":
+                Options.old_style_globals = True
+            elif option == "--directive" or option.startswith('-X'):
+                if option.startswith('-X') and option[2:].strip():
+                    x_args = option[2:]
+                else:
                     x_args = pop_value()
-                try: 
-                    options.compiler_directives = Options.parse_directive_list( 
-                        x_args, relaxed_bool=True, 
-                        current_settings=options.compiler_directives) 
+                try:
+                    options.compiler_directives = Options.parse_directive_list(
+                        x_args, relaxed_bool=True,
+                        current_settings=options.compiler_directives)
                 except ValueError as e:
-                    sys.stderr.write("Error in compiler directive: %s\n" % e.args[0]) 
-                    sys.exit(1) 
+                    sys.stderr.write("Error in compiler directive: %s\n" % e.args[0])
+                    sys.exit(1)
             elif option == "--compile-time-env" or option.startswith('-E'):
                 if option.startswith('-E') and option[2:].strip():
                     x_args = option[2:]
@@ -196,35 +196,35 @@ def parse_command_line(args):
                 except ValueError as e:
                     sys.stderr.write("Error in compile-time-env: %s\n" % e.args[0])
                     sys.exit(1)
-            elif option.startswith('--debug'): 
-                option = option[2:].replace('-', '_') 
-                from . import DebugFlags 
-                if option in dir(DebugFlags): 
-                    setattr(DebugFlags, option, True) 
-                else: 
-                    sys.stderr.write("Unknown debug flag: %s\n" % option) 
-                    bad_usage() 
-            elif option in ('-h', '--help'): 
-                sys.stdout.write(usage) 
-                sys.exit(0) 
-            else: 
-                sys.stderr.write("Unknown compiler flag: %s\n" % option) 
-                sys.exit(1) 
-        else: 
-            sources.append(pop_arg()) 
+            elif option.startswith('--debug'):
+                option = option[2:].replace('-', '_')
+                from . import DebugFlags
+                if option in dir(DebugFlags):
+                    setattr(DebugFlags, option, True)
+                else:
+                    sys.stderr.write("Unknown debug flag: %s\n" % option)
+                    bad_usage()
+            elif option in ('-h', '--help'):
+                sys.stdout.write(usage)
+                sys.exit(0)
+            else:
+                sys.stderr.write("Unknown compiler flag: %s\n" % option)
+                sys.exit(1)
+        else:
+            sources.append(pop_arg())
 
     if pending_arg:
         bad_usage()
 
-    if options.use_listing_file and len(sources) > 1: 
-        sys.stderr.write( 
-            "cython: Only one source file allowed when using -o\n") 
-        sys.exit(1) 
-    if len(sources) == 0 and not options.show_version: 
-        bad_usage() 
-    if Options.embed and len(sources) > 1: 
-        sys.stderr.write( 
-            "cython: Only one source file allowed when using -embed\n") 
-        sys.exit(1) 
-    return options, sources 
- 
+    if options.use_listing_file and len(sources) > 1:
+        sys.stderr.write(
+            "cython: Only one source file allowed when using -o\n")
+        sys.exit(1)
+    if len(sources) == 0 and not options.show_version:
+        bad_usage()
+    if Options.embed and len(sources) > 1:
+        sys.stderr.write(
+            "cython: Only one source file allowed when using -embed\n")
+        sys.exit(1)
+    return options, sources
+
diff --git a/contrib/tools/cython/Cython/Compiler/Code.pxd b/contrib/tools/cython/Cython/Compiler/Code.pxd
index 7c2d048720..acad0c1cf4 100644
--- a/contrib/tools/cython/Cython/Compiler/Code.pxd
+++ b/contrib/tools/cython/Cython/Compiler/Code.pxd
@@ -1,10 +1,10 @@
- 
-from __future__ import absolute_import 
- 
-cimport cython 
+
+from __future__ import absolute_import
+
+cimport cython
 from ..StringIOTree cimport StringIOTree
- 
- 
+
+
 cdef class UtilityCodeBase(object):
     cpdef format_code(self, code_string, replace_empty_lines=*)
 
@@ -24,79 +24,79 @@ cdef class UtilityCode(UtilityCodeBase):
     cpdef none_or_sub(self, s, context)
 
 
-cdef class FunctionState: 
-    cdef public set names_taken 
-    cdef public object owner 
+cdef class FunctionState:
+    cdef public set names_taken
+    cdef public object owner
     cdef public object scope
- 
-    cdef public object error_label 
-    cdef public size_t label_counter 
-    cdef public set labels_used 
-    cdef public object return_label 
-    cdef public object continue_label 
-    cdef public object break_label 
-    cdef public list yield_labels 
- 
-    cdef public object return_from_error_cleanup_label # not used in __init__ ? 
- 
+
+    cdef public object error_label
+    cdef public size_t label_counter
+    cdef public set labels_used
+    cdef public object return_label
+    cdef public object continue_label
+    cdef public object break_label
+    cdef public list yield_labels
+
+    cdef public object return_from_error_cleanup_label # not used in __init__ ?
+
     cdef public object exc_vars
     cdef public object current_except
-    cdef public bint in_try_finally 
-    cdef public bint can_trace 
+    cdef public bint in_try_finally
+    cdef public bint can_trace
     cdef public bint gil_owned
- 
-    cdef public list temps_allocated 
-    cdef public dict temps_free 
-    cdef public dict temps_used_type 
+
+    cdef public list temps_allocated
+    cdef public dict temps_free
+    cdef public dict temps_used_type
     cdef public set zombie_temps
-    cdef public size_t temp_counter 
-    cdef public list collect_temps_stack 
- 
-    cdef public object closure_temps 
-    cdef public bint should_declare_error_indicator 
-    cdef public bint uses_error_indicator 
- 
-    @cython.locals(n=size_t) 
-    cpdef new_label(self, name=*) 
-    cpdef tuple get_loop_labels(self) 
-    cpdef set_loop_labels(self, labels) 
-    cpdef tuple get_all_labels(self) 
-    cpdef set_all_labels(self, labels) 
-    cpdef start_collecting_temps(self) 
-    cpdef stop_collecting_temps(self) 
- 
-    cpdef list temps_in_use(self) 
- 
-cdef class IntConst: 
-    cdef public object cname 
-    cdef public object value 
-    cdef public bint is_long 
- 
-cdef class PyObjectConst: 
-    cdef public object cname 
-    cdef public object type 
- 
-cdef class StringConst: 
-    cdef public object cname 
-    cdef public object text 
-    cdef public object escaped_value 
-    cdef public dict py_strings 
-    cdef public list py_versions 
- 
-    @cython.locals(intern=bint, is_str=bint, is_unicode=bint) 
-    cpdef get_py_string_const(self, encoding, identifier=*, is_str=*, py3str_cstring=*) 
- 
-## cdef class PyStringConst: 
-##     cdef public object cname 
-##     cdef public object encoding 
-##     cdef public bint is_str 
-##     cdef public bint is_unicode 
-##     cdef public bint intern 
- 
-#class GlobalState(object): 
- 
-#def funccontext_property(name): 
- 
+    cdef public size_t temp_counter
+    cdef public list collect_temps_stack
+
+    cdef public object closure_temps
+    cdef public bint should_declare_error_indicator
+    cdef public bint uses_error_indicator
+
+    @cython.locals(n=size_t)
+    cpdef new_label(self, name=*)
+    cpdef tuple get_loop_labels(self)
+    cpdef set_loop_labels(self, labels)
+    cpdef tuple get_all_labels(self)
+    cpdef set_all_labels(self, labels)
+    cpdef start_collecting_temps(self)
+    cpdef stop_collecting_temps(self)
+
+    cpdef list temps_in_use(self)
+
+cdef class IntConst:
+    cdef public object cname
+    cdef public object value
+    cdef public bint is_long
+
+cdef class PyObjectConst:
+    cdef public object cname
+    cdef public object type
+
+cdef class StringConst:
+    cdef public object cname
+    cdef public object text
+    cdef public object escaped_value
+    cdef public dict py_strings
+    cdef public list py_versions
+
+    @cython.locals(intern=bint, is_str=bint, is_unicode=bint)
+    cpdef get_py_string_const(self, encoding, identifier=*, is_str=*, py3str_cstring=*)
+
+## cdef class PyStringConst:
+##     cdef public object cname
+##     cdef public object encoding
+##     cdef public bint is_str
+##     cdef public bint is_unicode
+##     cdef public bint intern
+
+#class GlobalState(object):
+
+#def funccontext_property(name):
+
 cdef class CCodeWriter(object):
     cdef readonly StringIOTree buffer
     cdef readonly list pyclass_stack
@@ -108,7 +108,7 @@ cdef class CCodeWriter(object):
     cdef Py_ssize_t level
     cdef public Py_ssize_t call_level  # debug-only, see Nodes.py
     cdef bint bol
- 
+
     cpdef write(self, s)
     cpdef put(self, code)
     cpdef put_safe(self, code)
@@ -119,6 +119,6 @@ cdef class CCodeWriter(object):
     cdef decrease_indent(self)
 
 
-cdef class PyrexCodeWriter: 
-    cdef public object f 
-    cdef public Py_ssize_t level 
+cdef class PyrexCodeWriter:
+    cdef public object f
+    cdef public Py_ssize_t level
diff --git a/contrib/tools/cython/Cython/Compiler/Code.py b/contrib/tools/cython/Cython/Compiler/Code.py
index 0ab5551a87..f43c4b2b8e 100644
--- a/contrib/tools/cython/Cython/Compiler/Code.py
+++ b/contrib/tools/cython/Cython/Compiler/Code.py
@@ -1,60 +1,60 @@
-# cython: language_level = 2 
+# cython: language_level = 2
 # cython: auto_pickle=False
-# 
-#   Code output module 
-# 
- 
-from __future__ import absolute_import 
- 
-import cython 
+#
+#   Code output module
+#
+
+from __future__ import absolute_import
+
+import cython
 cython.declare(os=object, re=object, operator=object, textwrap=object,
                Template=object, Naming=object, Options=object, StringEncoding=object,
-               Utils=object, SourceDescriptor=object, StringIOTree=object, 
+               Utils=object, SourceDescriptor=object, StringIOTree=object,
                DebugFlags=object, basestring=object, defaultdict=object,
                closing=object, partial=object)
- 
-import os 
-import re 
+
+import os
+import re
 import shutil
-import sys 
-import operator 
-import textwrap 
+import sys
+import operator
+import textwrap
 from string import Template
 from functools import partial
 from contextlib import closing
 from collections import defaultdict
- 
-try: 
-    import hashlib 
-except ImportError: 
-    import md5 as hashlib 
- 
-from . import Naming 
-from . import Options 
-from . import DebugFlags 
-from . import StringEncoding 
+
+try:
+    import hashlib
+except ImportError:
+    import md5 as hashlib
+
+from . import Naming
+from . import Options
+from . import DebugFlags
+from . import StringEncoding
 from . import Version
-from .. import Utils 
-from .Scanning import SourceDescriptor 
-from ..StringIOTree import StringIOTree 
- 
-try: 
-    from __builtin__ import basestring 
-except ImportError: 
-    from builtins import str as basestring 
- 
+from .. import Utils
+from .Scanning import SourceDescriptor
+from ..StringIOTree import StringIOTree
+
+try:
+    from __builtin__ import basestring
+except ImportError:
+    from builtins import str as basestring
+
 KEYWORDS_MUST_BE_BYTES = sys.version_info < (2, 7)
- 
- 
-non_portable_builtins_map = { 
-    # builtins that have different names in different Python versions 
-    'bytes'         : ('PY_MAJOR_VERSION < 3',  'str'), 
-    'unicode'       : ('PY_MAJOR_VERSION >= 3', 'str'), 
-    'basestring'    : ('PY_MAJOR_VERSION >= 3', 'str'), 
-    'xrange'        : ('PY_MAJOR_VERSION >= 3', 'range'), 
-    'raw_input'     : ('PY_MAJOR_VERSION >= 3', 'input'), 
+
+
+non_portable_builtins_map = {
+    # builtins that have different names in different Python versions
+    'bytes'         : ('PY_MAJOR_VERSION < 3',  'str'),
+    'unicode'       : ('PY_MAJOR_VERSION >= 3', 'str'),
+    'basestring'    : ('PY_MAJOR_VERSION >= 3', 'str'),
+    'xrange'        : ('PY_MAJOR_VERSION >= 3', 'range'),
+    'raw_input'     : ('PY_MAJOR_VERSION >= 3', 'input'),
 }
- 
+
 ctypedef_builtins_map = {
     # types of builtins in "ctypedef class" statements which we don't
     # import either because the names conflict with C types or because
@@ -65,12 +65,12 @@ ctypedef_builtins_map = {
     'wrapper_descriptor' : '&PyWrapperDescr_Type',
 }
 
-basicsize_builtins_map = { 
-    # builtins whose type has a different tp_basicsize than sizeof(...) 
+basicsize_builtins_map = {
+    # builtins whose type has a different tp_basicsize than sizeof(...)
     'PyTypeObject': 'PyHeapTypeObject',
 }
- 
-uncachable_builtins = [ 
+
+uncachable_builtins = [
     # Global/builtin names that cannot be cached because they may or may not
     # be available at import time, for various reasons:
     ## - Py3.7+
@@ -104,11 +104,11 @@ uncachable_builtins = [
     ## - Py2.7+
     'memoryview',
     ## - platform specific
-    'WindowsError', 
+    'WindowsError',
     ## - others
     '_',  # e.g. used by gettext
 ]
- 
+
 special_py_methods = set([
     '__cinit__', '__dealloc__', '__richcmp__', '__next__',
     '__await__', '__aiter__', '__anext__',
@@ -116,11 +116,11 @@ special_py_methods = set([
     '__getcharbuffer__', '__getbuffer__', '__releasebuffer__'
 ])
 
-modifier_output_mapper = { 
-    'inline': 'CYTHON_INLINE' 
-}.get 
- 
- 
+modifier_output_mapper = {
+    'inline': 'CYTHON_INLINE'
+}.get
+
+
 class IncludeCode(object):
     """
     An include file and/or verbatim C code to be included in the
@@ -197,325 +197,325 @@ class IncludeCode(object):
             code.putln(self.pieces[k])
 
 
-def get_utility_dir(): 
-    # make this a function and not global variables: 
-    # http://trac.cython.org/cython_trac/ticket/475 
-    Cython_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 
-    return os.path.join(Cython_dir, "Utility") 
- 
- 
-class UtilityCodeBase(object): 
-    """ 
-    Support for loading utility code from a file. 
- 
-    Code sections in the file can be specified as follows: 
- 
-        ##### MyUtility.proto ##### 
- 
-        [proto declarations] 
- 
-        ##### MyUtility.init ##### 
- 
-        [code run at module initialization] 
- 
-        ##### MyUtility ##### 
-        #@requires: MyOtherUtility 
-        #@substitute: naming 
- 
-        [definitions] 
- 
-    for prototypes and implementation respectively.  For non-python or 
-    -cython files backslashes should be used instead.  5 to 30 comment 
-    characters may be used on either side. 
- 
-    If the @cname decorator is not used and this is a CythonUtilityCode, 
-    one should pass in the 'name' keyword argument to be used for name 
-    mangling of such entries. 
-    """ 
- 
-    is_cython_utility = False 
-    _utility_cache = {} 
- 
-    @classmethod 
-    def _add_utility(cls, utility, type, lines, begin_lineno, tags=None): 
-        if utility is None: 
-            return 
- 
-        code = '\n'.join(lines) 
-        if tags and 'substitute' in tags and tags['substitute'] == set(['naming']): 
-            del tags['substitute'] 
-            try: 
-                code = Template(code).substitute(vars(Naming)) 
+def get_utility_dir():
+    # make this a function and not global variables:
+    # http://trac.cython.org/cython_trac/ticket/475
+    Cython_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    return os.path.join(Cython_dir, "Utility")
+
+
+class UtilityCodeBase(object):
+    """
+    Support for loading utility code from a file.
+
+    Code sections in the file can be specified as follows:
+
+        ##### MyUtility.proto #####
+
+        [proto declarations]
+
+        ##### MyUtility.init #####
+
+        [code run at module initialization]
+
+        ##### MyUtility #####
+        #@requires: MyOtherUtility
+        #@substitute: naming
+
+        [definitions]
+
+    for prototypes and implementation respectively.  For non-python or
+    -cython files backslashes should be used instead.  5 to 30 comment
+    characters may be used on either side.
+
+    If the @cname decorator is not used and this is a CythonUtilityCode,
+    one should pass in the 'name' keyword argument to be used for name
+    mangling of such entries.
+    """
+
+    is_cython_utility = False
+    _utility_cache = {}
+
+    @classmethod
+    def _add_utility(cls, utility, type, lines, begin_lineno, tags=None):
+        if utility is None:
+            return
+
+        code = '\n'.join(lines)
+        if tags and 'substitute' in tags and tags['substitute'] == set(['naming']):
+            del tags['substitute']
+            try:
+                code = Template(code).substitute(vars(Naming))
             except (KeyError, ValueError) as e:
-                raise RuntimeError("Error parsing templated utility code of type '%s' at line %d: %s" % ( 
-                    type, begin_lineno, e)) 
- 
-        # remember correct line numbers at least until after templating 
-        code = '\n' * begin_lineno + code 
- 
-        if type == 'proto': 
-            utility[0] = code 
-        elif type == 'impl': 
+                raise RuntimeError("Error parsing templated utility code of type '%s' at line %d: %s" % (
+                    type, begin_lineno, e))
+
+        # remember correct line numbers at least until after templating
+        code = '\n' * begin_lineno + code
+
+        if type == 'proto':
+            utility[0] = code
+        elif type == 'impl':
             utility[1] = code
-        else: 
+        else:
             all_tags = utility[2]
-            if KEYWORDS_MUST_BE_BYTES: 
-                type = type.encode('ASCII') 
-            all_tags[type] = code 
- 
-        if tags: 
+            if KEYWORDS_MUST_BE_BYTES:
+                type = type.encode('ASCII')
+            all_tags[type] = code
+
+        if tags:
             all_tags = utility[2]
-            for name, values in tags.items(): 
-                if KEYWORDS_MUST_BE_BYTES: 
-                    name = name.encode('ASCII') 
-                all_tags.setdefault(name, set()).update(values) 
- 
-    @classmethod 
-    def load_utilities_from_file(cls, path): 
-        utilities = cls._utility_cache.get(path) 
-        if utilities: 
-            return utilities 
- 
-        filename = os.path.join(get_utility_dir(), path) 
-        _, ext = os.path.splitext(path) 
-        if ext in ('.pyx', '.py', '.pxd', '.pxi'): 
-            comment = '#' 
+            for name, values in tags.items():
+                if KEYWORDS_MUST_BE_BYTES:
+                    name = name.encode('ASCII')
+                all_tags.setdefault(name, set()).update(values)
+
+    @classmethod
+    def load_utilities_from_file(cls, path):
+        utilities = cls._utility_cache.get(path)
+        if utilities:
+            return utilities
+
+        filename = os.path.join(get_utility_dir(), path)
+        _, ext = os.path.splitext(path)
+        if ext in ('.pyx', '.py', '.pxd', '.pxi'):
+            comment = '#'
             strip_comments = partial(re.compile(r'^\s*#(?!\s*cython\s*:).*').sub, '')
             rstrip = StringEncoding._unicode.rstrip
-        else: 
-            comment = '/' 
+        else:
+            comment = '/'
             strip_comments = partial(re.compile(r'^\s*//.*|/\*[^*]*\*/').sub, '')
             rstrip = partial(re.compile(r'\s+(\\?)$').sub, r'\1')
-        match_special = re.compile( 
-            (r'^%(C)s{5,30}\s*(?P<name>(?:\w|\.)+)\s*%(C)s{5,30}|' 
+        match_special = re.compile(
+            (r'^%(C)s{5,30}\s*(?P<name>(?:\w|\.)+)\s*%(C)s{5,30}|'
              r'^%(C)s+@(?P<tag>\w+)\s*:\s*(?P<value>(?:\w|[.:])+)') %
             {'C': comment}).match
         match_type = re.compile(r'(.+)[.](proto(?:[.]\S+)?|impl|init|cleanup)$').match
- 
+
         with closing(Utils.open_source_file(filename, encoding='UTF-8')) as f:
-            all_lines = f.readlines() 
- 
+            all_lines = f.readlines()
+
         utilities = defaultdict(lambda: [None, None, {}])
-        lines = [] 
+        lines = []
         tags = defaultdict(set)
-        utility = type = None 
-        begin_lineno = 0 
- 
-        for lineno, line in enumerate(all_lines): 
-            m = match_special(line) 
-            if m: 
-                if m.group('name'): 
-                    cls._add_utility(utility, type, lines, begin_lineno, tags) 
- 
-                    begin_lineno = lineno + 1 
-                    del lines[:] 
-                    tags.clear() 
- 
-                    name = m.group('name') 
-                    mtype = match_type(name) 
-                    if mtype: 
-                        name, type = mtype.groups() 
-                    else: 
-                        type = 'impl' 
+        utility = type = None
+        begin_lineno = 0
+
+        for lineno, line in enumerate(all_lines):
+            m = match_special(line)
+            if m:
+                if m.group('name'):
+                    cls._add_utility(utility, type, lines, begin_lineno, tags)
+
+                    begin_lineno = lineno + 1
+                    del lines[:]
+                    tags.clear()
+
+                    name = m.group('name')
+                    mtype = match_type(name)
+                    if mtype:
+                        name, type = mtype.groups()
+                    else:
+                        type = 'impl'
                     utility = utilities[name]
-                else: 
+                else:
                     tags[m.group('tag')].add(m.group('value'))
                     lines.append('')  # keep line number correct
-            else: 
+            else:
                 lines.append(rstrip(strip_comments(line)))
- 
-        if utility is None: 
-            raise ValueError("Empty utility code file") 
- 
-        # Don't forget to add the last utility code 
-        cls._add_utility(utility, type, lines, begin_lineno, tags) 
- 
+
+        if utility is None:
+            raise ValueError("Empty utility code file")
+
+        # Don't forget to add the last utility code
+        cls._add_utility(utility, type, lines, begin_lineno, tags)
+
         utilities = dict(utilities)  # un-defaultdict-ify
-        cls._utility_cache[path] = utilities 
-        return utilities 
- 
-    @classmethod 
-    def load(cls, util_code_name, from_file=None, **kwargs): 
-        """ 
-        Load utility code from a file specified by from_file (relative to 
-        Cython/Utility) and name util_code_name.  If from_file is not given, 
-        load it from the file util_code_name.*.  There should be only one 
-        file matched by this pattern. 
-        """ 
-        if '::' in util_code_name: 
-            from_file, util_code_name = util_code_name.rsplit('::', 1) 
-        if not from_file: 
-            utility_dir = get_utility_dir() 
-            prefix = util_code_name + '.' 
-            try: 
-                listing = os.listdir(utility_dir) 
-            except OSError: 
-                # XXX the code below assumes as 'zipimport.zipimporter' instance 
-                # XXX should be easy to generalize, but too lazy right now to write it 
-                import zipfile 
-                global __loader__ 
-                loader = __loader__ 
-                archive = loader.archive 
+        cls._utility_cache[path] = utilities
+        return utilities
+
+    @classmethod
+    def load(cls, util_code_name, from_file=None, **kwargs):
+        """
+        Load utility code from a file specified by from_file (relative to
+        Cython/Utility) and name util_code_name.  If from_file is not given,
+        load it from the file util_code_name.*.  There should be only one
+        file matched by this pattern.
+        """
+        if '::' in util_code_name:
+            from_file, util_code_name = util_code_name.rsplit('::', 1)
+        if not from_file:
+            utility_dir = get_utility_dir()
+            prefix = util_code_name + '.'
+            try:
+                listing = os.listdir(utility_dir)
+            except OSError:
+                # XXX the code below assumes as 'zipimport.zipimporter' instance
+                # XXX should be easy to generalize, but too lazy right now to write it
+                import zipfile
+                global __loader__
+                loader = __loader__
+                archive = loader.archive
                 with closing(zipfile.ZipFile(archive)) as fileobj:
                     listing = [os.path.basename(name)
                                for name in fileobj.namelist()
                                if os.path.join(archive, name).startswith(utility_dir)]
             files = [filename for filename in listing
                      if filename.startswith(prefix)]
-            if not files: 
-                raise ValueError("No match found for utility code " + util_code_name) 
-            if len(files) > 1: 
-                raise ValueError("More than one filename match found for utility code " + util_code_name) 
-            from_file = files[0] 
- 
-        utilities = cls.load_utilities_from_file(from_file) 
+            if not files:
+                raise ValueError("No match found for utility code " + util_code_name)
+            if len(files) > 1:
+                raise ValueError("More than one filename match found for utility code " + util_code_name)
+            from_file = files[0]
+
+        utilities = cls.load_utilities_from_file(from_file)
         proto, impl, tags = utilities[util_code_name]
- 
-        if tags: 
-            orig_kwargs = kwargs.copy() 
-            for name, values in tags.items(): 
-                if name in kwargs: 
-                    continue 
-                # only pass lists when we have to: most argument expect one value or None 
-                if name == 'requires': 
-                    if orig_kwargs: 
-                        values = [cls.load(dep, from_file, **orig_kwargs) 
-                                  for dep in sorted(values)] 
-                    else: 
-                        # dependencies are rarely unique, so use load_cached() when we can 
-                        values = [cls.load_cached(dep, from_file) 
-                                  for dep in sorted(values)] 
-                elif not values: 
-                    values = None 
-                elif len(values) == 1: 
+
+        if tags:
+            orig_kwargs = kwargs.copy()
+            for name, values in tags.items():
+                if name in kwargs:
+                    continue
+                # only pass lists when we have to: most argument expect one value or None
+                if name == 'requires':
+                    if orig_kwargs:
+                        values = [cls.load(dep, from_file, **orig_kwargs)
+                                  for dep in sorted(values)]
+                    else:
+                        # dependencies are rarely unique, so use load_cached() when we can
+                        values = [cls.load_cached(dep, from_file)
+                                  for dep in sorted(values)]
+                elif not values:
+                    values = None
+                elif len(values) == 1:
                     values = list(values)[0]
-                kwargs[name] = values 
- 
-        if proto is not None: 
-            kwargs['proto'] = proto 
-        if impl is not None: 
-            kwargs['impl'] = impl 
- 
-        if 'name' not in kwargs: 
-            kwargs['name'] = util_code_name 
- 
-        if 'file' not in kwargs and from_file: 
-            kwargs['file'] = from_file 
-        return cls(**kwargs) 
- 
-    @classmethod 
-    def load_cached(cls, utility_code_name, from_file=None, __cache={}): 
-        """ 
-        Calls .load(), but using a per-type cache based on utility name and file name. 
-        """ 
-        key = (cls, from_file, utility_code_name) 
-        try: 
-            return __cache[key] 
-        except KeyError: 
-            pass 
-        code = __cache[key] = cls.load(utility_code_name, from_file) 
-        return code 
- 
-    @classmethod 
-    def load_as_string(cls, util_code_name, from_file=None, **kwargs): 
-        """ 
-        Load a utility code as a string. Returns (proto, implementation) 
-        """ 
-        util = cls.load(util_code_name, from_file, **kwargs) 
-        proto, impl = util.proto, util.impl 
-        return util.format_code(proto), util.format_code(impl) 
- 
-    def format_code(self, code_string, replace_empty_lines=re.compile(r'\n\n+').sub): 
-        """ 
-        Format a code section for output. 
-        """ 
-        if code_string: 
-            code_string = replace_empty_lines('\n', code_string.strip()) + '\n\n' 
-        return code_string 
- 
-    def __str__(self): 
+                kwargs[name] = values
+
+        if proto is not None:
+            kwargs['proto'] = proto
+        if impl is not None:
+            kwargs['impl'] = impl
+
+        if 'name' not in kwargs:
+            kwargs['name'] = util_code_name
+
+        if 'file' not in kwargs and from_file:
+            kwargs['file'] = from_file
+        return cls(**kwargs)
+
+    @classmethod
+    def load_cached(cls, utility_code_name, from_file=None, __cache={}):
+        """
+        Calls .load(), but using a per-type cache based on utility name and file name.
+        """
+        key = (cls, from_file, utility_code_name)
+        try:
+            return __cache[key]
+        except KeyError:
+            pass
+        code = __cache[key] = cls.load(utility_code_name, from_file)
+        return code
+
+    @classmethod
+    def load_as_string(cls, util_code_name, from_file=None, **kwargs):
+        """
+        Load a utility code as a string. Returns (proto, implementation)
+        """
+        util = cls.load(util_code_name, from_file, **kwargs)
+        proto, impl = util.proto, util.impl
+        return util.format_code(proto), util.format_code(impl)
+
+    def format_code(self, code_string, replace_empty_lines=re.compile(r'\n\n+').sub):
+        """
+        Format a code section for output.
+        """
+        if code_string:
+            code_string = replace_empty_lines('\n', code_string.strip()) + '\n\n'
+        return code_string
+
+    def __str__(self):
         return "<%s(%s)>" % (type(self).__name__, self.name)
- 
+
     def get_tree(self, **kwargs):
-        pass 
- 
+        pass
+
     def __deepcopy__(self, memodict=None):
         # No need to deep-copy utility code since it's essentially immutable.
         return self
- 
-
-class UtilityCode(UtilityCodeBase): 
-    """ 
-    Stores utility code to add during code generation. 
- 
-    See GlobalState.put_utility_code. 
- 
-    hashes/equals by instance 
- 
-    proto           C prototypes 
+
+
+class UtilityCode(UtilityCodeBase):
+    """
+    Stores utility code to add during code generation.
+
+    See GlobalState.put_utility_code.
+
+    hashes/equals by instance
+
+    proto           C prototypes
     impl            implementation code
-    init            code to call on module initialization 
-    requires        utility code dependencies 
-    proto_block     the place in the resulting file where the prototype should 
-                    end up 
-    name            name of the utility code (or None) 
-    file            filename of the utility code file this utility was loaded 
-                    from (or None) 
-    """ 
- 
-    def __init__(self, proto=None, impl=None, init=None, cleanup=None, requires=None, 
-                 proto_block='utility_code_proto', name=None, file=None): 
-        # proto_block: Which code block to dump prototype in. See GlobalState. 
-        self.proto = proto 
-        self.impl = impl 
-        self.init = init 
-        self.cleanup = cleanup 
-        self.requires = requires 
-        self._cache = {} 
-        self.specialize_list = [] 
-        self.proto_block = proto_block 
-        self.name = name 
-        self.file = file 
- 
-    def __hash__(self): 
-        return hash((self.proto, self.impl)) 
- 
-    def __eq__(self, other): 
-        if self is other: 
-            return True 
+    init            code to call on module initialization
+    requires        utility code dependencies
+    proto_block     the place in the resulting file where the prototype should
+                    end up
+    name            name of the utility code (or None)
+    file            filename of the utility code file this utility was loaded
+                    from (or None)
+    """
+
+    def __init__(self, proto=None, impl=None, init=None, cleanup=None, requires=None,
+                 proto_block='utility_code_proto', name=None, file=None):
+        # proto_block: Which code block to dump prototype in. See GlobalState.
+        self.proto = proto
+        self.impl = impl
+        self.init = init
+        self.cleanup = cleanup
+        self.requires = requires
+        self._cache = {}
+        self.specialize_list = []
+        self.proto_block = proto_block
+        self.name = name
+        self.file = file
+
+    def __hash__(self):
+        return hash((self.proto, self.impl))
+
+    def __eq__(self, other):
+        if self is other:
+            return True
         self_type, other_type = type(self), type(other)
         if self_type is not other_type and not (isinstance(other, self_type) or isinstance(self, other_type)):
-            return False 
- 
-        self_proto = getattr(self, 'proto', None) 
-        other_proto = getattr(other, 'proto', None) 
-        return (self_proto, self.impl) == (other_proto, other.impl) 
- 
-    def none_or_sub(self, s, context): 
-        """ 
-        Format a string in this utility code with context. If None, do nothing. 
-        """ 
-        if s is None: 
-            return None 
-        return s % context 
- 
-    def specialize(self, pyrex_type=None, **data): 
-        # Dicts aren't hashable... 
+            return False
+
+        self_proto = getattr(self, 'proto', None)
+        other_proto = getattr(other, 'proto', None)
+        return (self_proto, self.impl) == (other_proto, other.impl)
+
+    def none_or_sub(self, s, context):
+        """
+        Format a string in this utility code with context. If None, do nothing.
+        """
+        if s is None:
+            return None
+        return s % context
+
+    def specialize(self, pyrex_type=None, **data):
+        # Dicts aren't hashable...
         name = self.name
-        if pyrex_type is not None: 
+        if pyrex_type is not None:
             data['type'] = pyrex_type.empty_declaration_code()
-            data['type_name'] = pyrex_type.specialization_name() 
+            data['type_name'] = pyrex_type.specialization_name()
             name = "%s[%s]" % (name, data['type_name'])
-        key = tuple(sorted(data.items())) 
-        try: 
-            return self._cache[key] 
-        except KeyError: 
-            if self.requires is None: 
-                requires = None 
-            else: 
-                requires = [r.specialize(data) for r in self.requires] 
- 
-            s = self._cache[key] = UtilityCode( 
+        key = tuple(sorted(data.items()))
+        try:
+            return self._cache[key]
+        except KeyError:
+            if self.requires is None:
+                requires = None
+            else:
+                requires = [r.specialize(data) for r in self.requires]
+
+            s = self._cache[key] = UtilityCode(
                 self.none_or_sub(self.proto, data),
                 self.none_or_sub(self.impl, data),
                 self.none_or_sub(self.init, data),
@@ -524,31 +524,31 @@ class UtilityCode(UtilityCodeBase):
                 self.proto_block,
                 name,
             )
- 
-            self.specialize_list.append(s) 
-            return s 
- 
-    def inject_string_constants(self, impl, output): 
-        """Replace 'PYIDENT("xyz")' by a constant Python identifier cname. 
-        """ 
+
+            self.specialize_list.append(s)
+            return s
+
+    def inject_string_constants(self, impl, output):
+        """Replace 'PYIDENT("xyz")' by a constant Python identifier cname.
+        """
         if 'PYIDENT(' not in impl and 'PYUNICODE(' not in impl:
             return False, impl
 
-        replacements = {} 
-        def externalise(matchobj): 
+        replacements = {}
+        def externalise(matchobj):
             key = matchobj.groups()
-            try: 
+            try:
                 cname = replacements[key]
-            except KeyError: 
+            except KeyError:
                 str_type, name = key
                 cname = replacements[key] = output.get_py_string_const(
                         StringEncoding.EncodedString(name), identifier=str_type == 'IDENT').cname
-            return cname 
- 
+            return cname
+
         impl = re.sub(r'PY(IDENT|UNICODE)\("([^"]+)"\)', externalise, impl)
         assert 'PYIDENT(' not in impl and 'PYUNICODE(' not in impl
         return True, impl
- 
+
     def inject_unbound_methods(self, impl, output):
         """Replace 'UNBOUND_METHOD(type, "name")' by a constant Python identifier cname.
         """
@@ -588,70 +588,70 @@ class UtilityCode(UtilityCodeBase):
         assert 'CSTRING(' not in impl
         return impl
 
-    def put_code(self, output): 
-        if self.requires: 
-            for dependency in self.requires: 
-                output.use_utility_code(dependency) 
-        if self.proto: 
+    def put_code(self, output):
+        if self.requires:
+            for dependency in self.requires:
+                output.use_utility_code(dependency)
+        if self.proto:
             writer = output[self.proto_block]
             writer.putln("/* %s.proto */" % self.name)
             writer.put_or_include(
                 self.format_code(self.proto), '%s_proto' % self.name)
-        if self.impl: 
+        if self.impl:
             impl = self.format_code(self.wrap_c_strings(self.impl))
             is_specialised1, impl = self.inject_string_constants(impl, output)
             is_specialised2, impl = self.inject_unbound_methods(impl, output)
             writer = output['utility_code_def']
             writer.putln("/* %s */" % self.name)
             if not (is_specialised1 or is_specialised2):
-                # no module specific adaptations => can be reused 
+                # no module specific adaptations => can be reused
                 writer.put_or_include(impl, '%s_impl' % self.name)
-            else: 
+            else:
                 writer.put(impl)
-        if self.init: 
-            writer = output['init_globals'] 
-            writer.putln("/* %s.init */" % self.name) 
-            if isinstance(self.init, basestring): 
-                writer.put(self.format_code(self.init)) 
-            else: 
-                self.init(writer, output.module_pos) 
-            writer.putln(writer.error_goto_if_PyErr(output.module_pos)) 
-            writer.putln() 
-        if self.cleanup and Options.generate_cleanup_code: 
-            writer = output['cleanup_globals'] 
+        if self.init:
+            writer = output['init_globals']
+            writer.putln("/* %s.init */" % self.name)
+            if isinstance(self.init, basestring):
+                writer.put(self.format_code(self.init))
+            else:
+                self.init(writer, output.module_pos)
+            writer.putln(writer.error_goto_if_PyErr(output.module_pos))
+            writer.putln()
+        if self.cleanup and Options.generate_cleanup_code:
+            writer = output['cleanup_globals']
             writer.putln("/* %s.cleanup */" % self.name)
-            if isinstance(self.cleanup, basestring): 
-                writer.put_or_include( 
-                    self.format_code(self.cleanup), 
-                    '%s_cleanup' % self.name) 
-            else: 
-                self.cleanup(writer, output.module_pos) 
- 
- 
-def sub_tempita(s, context, file=None, name=None): 
-    "Run tempita on string s with given context." 
-    if not s: 
-        return None 
- 
-    if file: 
-        context['__name'] = "%s:%s" % (file, name) 
-    elif name: 
-        context['__name'] = name 
- 
-    from ..Tempita import sub 
-    return sub(s, **context) 
- 
-
-class TempitaUtilityCode(UtilityCode): 
-    def __init__(self, name=None, proto=None, impl=None, init=None, file=None, context=None, **kwargs): 
-        if context is None: 
-            context = {} 
-        proto = sub_tempita(proto, context, file, name) 
-        impl = sub_tempita(impl, context, file, name) 
-        init = sub_tempita(init, context, file, name) 
-        super(TempitaUtilityCode, self).__init__( 
-            proto, impl, init=init, name=name, file=file, **kwargs) 
- 
+            if isinstance(self.cleanup, basestring):
+                writer.put_or_include(
+                    self.format_code(self.cleanup),
+                    '%s_cleanup' % self.name)
+            else:
+                self.cleanup(writer, output.module_pos)
+
+
+def sub_tempita(s, context, file=None, name=None):
+    "Run tempita on string s with given context."
+    if not s:
+        return None
+
+    if file:
+        context['__name'] = "%s:%s" % (file, name)
+    elif name:
+        context['__name'] = name
+
+    from ..Tempita import sub
+    return sub(s, **context)
+
+
+class TempitaUtilityCode(UtilityCode):
+    def __init__(self, name=None, proto=None, impl=None, init=None, file=None, context=None, **kwargs):
+        if context is None:
+            context = {}
+        proto = sub_tempita(proto, context, file, name)
+        impl = sub_tempita(impl, context, file, name)
+        init = sub_tempita(init, context, file, name)
+        super(TempitaUtilityCode, self).__init__(
+            proto, impl, init=init, name=name, file=file, **kwargs)
+
     @classmethod
     def load_cached(cls, utility_code_name, from_file=None, context=None, __cache={}):
         context_key = tuple(sorted(context.items())) if context else None
@@ -664,82 +664,82 @@ class TempitaUtilityCode(UtilityCode):
         code = __cache[key] = cls.load(utility_code_name, from_file, context=context)
         return code
 
-    def none_or_sub(self, s, context): 
-        """ 
-        Format a string in this utility code with context. If None, do nothing. 
-        """ 
-        if s is None: 
-            return None 
-        return sub_tempita(s, context, self.file, self.name) 
- 
- 
-class LazyUtilityCode(UtilityCodeBase): 
-    """ 
-    Utility code that calls a callback with the root code writer when 
-    available. Useful when you only have 'env' but not 'code'. 
-    """ 
+    def none_or_sub(self, s, context):
+        """
+        Format a string in this utility code with context. If None, do nothing.
+        """
+        if s is None:
+            return None
+        return sub_tempita(s, context, self.file, self.name)
+
+
+class LazyUtilityCode(UtilityCodeBase):
+    """
+    Utility code that calls a callback with the root code writer when
+    available. Useful when you only have 'env' but not 'code'.
+    """
     __name__ = '<lazy>'
     requires = None
- 
-    def __init__(self, callback): 
-        self.callback = callback 
- 
-    def put_code(self, globalstate): 
-        utility = self.callback(globalstate.rootwriter) 
-        globalstate.use_utility_code(utility) 
- 
- 
-class FunctionState(object): 
-    # return_label     string          function return point label 
-    # error_label      string          error catch point label 
-    # continue_label   string          loop continue point label 
-    # break_label      string          loop break point label 
-    # return_from_error_cleanup_label string 
-    # label_counter    integer         counter for naming labels 
-    # in_try_finally   boolean         inside try of try...finally 
-    # exc_vars         (string * 3)    exception variables for reraise, or None 
-    # can_trace        boolean         line tracing is supported in the current context 
+
+    def __init__(self, callback):
+        self.callback = callback
+
+    def put_code(self, globalstate):
+        utility = self.callback(globalstate.rootwriter)
+        globalstate.use_utility_code(utility)
+
+
+class FunctionState(object):
+    # return_label     string          function return point label
+    # error_label      string          error catch point label
+    # continue_label   string          loop continue point label
+    # break_label      string          loop break point label
+    # return_from_error_cleanup_label string
+    # label_counter    integer         counter for naming labels
+    # in_try_finally   boolean         inside try of try...finally
+    # exc_vars         (string * 3)    exception variables for reraise, or None
+    # can_trace        boolean         line tracing is supported in the current context
     # scope            Scope           the scope object of the current function
- 
-    # Not used for now, perhaps later 
+
+    # Not used for now, perhaps later
     def __init__(self, owner, names_taken=set(), scope=None):
-        self.names_taken = names_taken 
-        self.owner = owner 
+        self.names_taken = names_taken
+        self.owner = owner
         self.scope = scope
- 
-        self.error_label = None 
-        self.label_counter = 0 
-        self.labels_used = set() 
-        self.return_label = self.new_label() 
-        self.new_error_label() 
-        self.continue_label = None 
-        self.break_label = None 
-        self.yield_labels = [] 
- 
-        self.in_try_finally = 0 
-        self.exc_vars = None 
+
+        self.error_label = None
+        self.label_counter = 0
+        self.labels_used = set()
+        self.return_label = self.new_label()
+        self.new_error_label()
+        self.continue_label = None
+        self.break_label = None
+        self.yield_labels = []
+
+        self.in_try_finally = 0
+        self.exc_vars = None
         self.current_except = None
-        self.can_trace = False 
+        self.can_trace = False
         self.gil_owned = True
- 
+
         self.temps_allocated = []  # of (name, type, manage_ref, static)
         self.temps_free = {}  # (type, manage_ref) -> list of free vars with same type/managed status
         self.temps_used_type = {}  # name -> (type, manage_ref)
         self.zombie_temps = set()  # temps that must not be reused after release
-        self.temp_counter = 0 
-        self.closure_temps = None 
- 
-        # This is used to collect temporaries, useful to find out which temps 
-        # need to be privatized in parallel sections 
-        self.collect_temps_stack = [] 
- 
-        # This is used for the error indicator, which needs to be local to the 
-        # function. It used to be global, which relies on the GIL being held. 
-        # However, exceptions may need to be propagated through 'nogil' 
-        # sections, in which case we introduce a race condition. 
-        self.should_declare_error_indicator = False 
-        self.uses_error_indicator = False 
- 
+        self.temp_counter = 0
+        self.closure_temps = None
+
+        # This is used to collect temporaries, useful to find out which temps
+        # need to be privatized in parallel sections
+        self.collect_temps_stack = []
+
+        # This is used for the error indicator, which needs to be local to the
+        # function. It used to be global, which relies on the GIL being held.
+        # However, exceptions may need to be propagated through 'nogil'
+        # sections, in which case we introduce a race condition.
+        self.should_declare_error_indicator = False
+        self.uses_error_indicator = False
+
     # safety checks
 
     def validate_exit(self):
@@ -754,651 +754,651 @@ class FunctionState(object):
                 #print(msg)
                 raise RuntimeError(msg)
 
-    # labels 
- 
-    def new_label(self, name=None): 
-        n = self.label_counter 
-        self.label_counter = n + 1 
-        label = "%s%d" % (Naming.label_prefix, n) 
-        if name is not None: 
-            label += '_' + name 
-        return label 
- 
+    # labels
+
+    def new_label(self, name=None):
+        n = self.label_counter
+        self.label_counter = n + 1
+        label = "%s%d" % (Naming.label_prefix, n)
+        if name is not None:
+            label += '_' + name
+        return label
+
     def new_yield_label(self, expr_type='yield'):
         label = self.new_label('resume_from_%s' % expr_type)
-        num_and_label = (len(self.yield_labels) + 1, label) 
-        self.yield_labels.append(num_and_label) 
-        return num_and_label 
- 
-    def new_error_label(self): 
-        old_err_lbl = self.error_label 
-        self.error_label = self.new_label('error') 
-        return old_err_lbl 
- 
-    def get_loop_labels(self): 
-        return ( 
-            self.continue_label, 
-            self.break_label) 
- 
-    def set_loop_labels(self, labels): 
-        (self.continue_label, 
-         self.break_label) = labels 
- 
-    def new_loop_labels(self): 
-        old_labels = self.get_loop_labels() 
-        self.set_loop_labels( 
-            (self.new_label("continue"), 
-             self.new_label("break"))) 
-        return old_labels 
- 
-    def get_all_labels(self): 
-        return ( 
-            self.continue_label, 
-            self.break_label, 
-            self.return_label, 
-            self.error_label) 
- 
-    def set_all_labels(self, labels): 
-        (self.continue_label, 
-         self.break_label, 
-         self.return_label, 
-         self.error_label) = labels 
- 
-    def all_new_labels(self): 
-        old_labels = self.get_all_labels() 
-        new_labels = [] 
-        for old_label, name in zip(old_labels, ['continue', 'break', 'return', 'error']): 
-            if old_label: 
-                new_labels.append(self.new_label(name)) 
-            else: 
-                new_labels.append(old_label) 
-        self.set_all_labels(new_labels) 
-        return old_labels 
- 
-    def use_label(self, lbl): 
-        self.labels_used.add(lbl) 
- 
-    def label_used(self, lbl): 
-        return lbl in self.labels_used 
- 
-    # temp handling 
- 
+        num_and_label = (len(self.yield_labels) + 1, label)
+        self.yield_labels.append(num_and_label)
+        return num_and_label
+
+    def new_error_label(self):
+        old_err_lbl = self.error_label
+        self.error_label = self.new_label('error')
+        return old_err_lbl
+
+    def get_loop_labels(self):
+        return (
+            self.continue_label,
+            self.break_label)
+
+    def set_loop_labels(self, labels):
+        (self.continue_label,
+         self.break_label) = labels
+
+    def new_loop_labels(self):
+        old_labels = self.get_loop_labels()
+        self.set_loop_labels(
+            (self.new_label("continue"),
+             self.new_label("break")))
+        return old_labels
+
+    def get_all_labels(self):
+        return (
+            self.continue_label,
+            self.break_label,
+            self.return_label,
+            self.error_label)
+
+    def set_all_labels(self, labels):
+        (self.continue_label,
+         self.break_label,
+         self.return_label,
+         self.error_label) = labels
+
+    def all_new_labels(self):
+        old_labels = self.get_all_labels()
+        new_labels = []
+        for old_label, name in zip(old_labels, ['continue', 'break', 'return', 'error']):
+            if old_label:
+                new_labels.append(self.new_label(name))
+            else:
+                new_labels.append(old_label)
+        self.set_all_labels(new_labels)
+        return old_labels
+
+    def use_label(self, lbl):
+        self.labels_used.add(lbl)
+
+    def label_used(self, lbl):
+        return lbl in self.labels_used
+
+    # temp handling
+
     def allocate_temp(self, type, manage_ref, static=False, reusable=True):
-        """ 
-        Allocates a temporary (which may create a new one or get a previously 
-        allocated and released one of the same type). Type is simply registered 
-        and handed back, but will usually be a PyrexType. 
- 
-        If type.is_pyobject, manage_ref comes into play. If manage_ref is set to 
-        True, the temp will be decref-ed on return statements and in exception 
-        handling clauses. Otherwise the caller has to deal with any reference 
-        counting of the variable. 
- 
-        If not type.is_pyobject, then manage_ref will be ignored, but it 
-        still has to be passed. It is recommended to pass False by convention 
-        if it is known that type will never be a Python object. 
- 
-        static=True marks the temporary declaration with "static". 
-        This is only used when allocating backing store for a module-level 
-        C array literals. 
- 
+        """
+        Allocates a temporary (which may create a new one or get a previously
+        allocated and released one of the same type). Type is simply registered
+        and handed back, but will usually be a PyrexType.
+
+        If type.is_pyobject, manage_ref comes into play. If manage_ref is set to
+        True, the temp will be decref-ed on return statements and in exception
+        handling clauses. Otherwise the caller has to deal with any reference
+        counting of the variable.
+
+        If not type.is_pyobject, then manage_ref will be ignored, but it
+        still has to be passed. It is recommended to pass False by convention
+        if it is known that type will never be a Python object.
+
+        static=True marks the temporary declaration with "static".
+        This is only used when allocating backing store for a module-level
+        C array literals.
+
         if reusable=False, the temp will not be reused after release.
 
-        A C string referring to the variable is returned. 
-        """ 
+        A C string referring to the variable is returned.
+        """
         if type.is_const and not type.is_reference:
-            type = type.const_base_type 
+            type = type.const_base_type
         elif type.is_reference and not type.is_fake_reference:
             type = type.ref_base_type
         elif type.is_cfunction:
             from . import PyrexTypes
             type = PyrexTypes.c_ptr_type(type)  # A function itself isn't an l-value
-        if not type.is_pyobject and not type.is_memoryviewslice: 
-            # Make manage_ref canonical, so that manage_ref will always mean 
-            # a decref is needed. 
-            manage_ref = False 
- 
-        freelist = self.temps_free.get((type, manage_ref)) 
+        if not type.is_pyobject and not type.is_memoryviewslice:
+            # Make manage_ref canonical, so that manage_ref will always mean
+            # a decref is needed.
+            manage_ref = False
+
+        freelist = self.temps_free.get((type, manage_ref))
         if reusable and freelist is not None and freelist[0]:
             result = freelist[0].pop()
             freelist[1].remove(result)
-        else: 
-            while True: 
-                self.temp_counter += 1 
-                result = "%s%d" % (Naming.codewriter_temp_prefix, self.temp_counter) 
+        else:
+            while True:
+                self.temp_counter += 1
+                result = "%s%d" % (Naming.codewriter_temp_prefix, self.temp_counter)
                 if result not in self.names_taken: break
-            self.temps_allocated.append((result, type, manage_ref, static)) 
+            self.temps_allocated.append((result, type, manage_ref, static))
             if not reusable:
                 self.zombie_temps.add(result)
-        self.temps_used_type[result] = (type, manage_ref) 
-        if DebugFlags.debug_temp_code_comments: 
+        self.temps_used_type[result] = (type, manage_ref)
+        if DebugFlags.debug_temp_code_comments:
             self.owner.putln("/* %s allocated (%s)%s */" % (result, type, "" if reusable else " - zombie"))
- 
-        if self.collect_temps_stack: 
-            self.collect_temps_stack[-1].add((result, type)) 
- 
-        return result 
- 
-    def release_temp(self, name): 
-        """ 
-        Releases a temporary so that it can be reused by other code needing 
-        a temp of the same type. 
-        """ 
-        type, manage_ref = self.temps_used_type[name] 
-        freelist = self.temps_free.get((type, manage_ref)) 
-        if freelist is None: 
+
+        if self.collect_temps_stack:
+            self.collect_temps_stack[-1].add((result, type))
+
+        return result
+
+    def release_temp(self, name):
+        """
+        Releases a temporary so that it can be reused by other code needing
+        a temp of the same type.
+        """
+        type, manage_ref = self.temps_used_type[name]
+        freelist = self.temps_free.get((type, manage_ref))
+        if freelist is None:
             freelist = ([], set())  # keep order in list and make lookups in set fast
-            self.temps_free[(type, manage_ref)] = freelist 
+            self.temps_free[(type, manage_ref)] = freelist
         if name in freelist[1]:
-            raise RuntimeError("Temp %s freed twice!" % name) 
+            raise RuntimeError("Temp %s freed twice!" % name)
         if name not in self.zombie_temps:
             freelist[0].append(name)
         freelist[1].add(name)
-        if DebugFlags.debug_temp_code_comments: 
+        if DebugFlags.debug_temp_code_comments:
             self.owner.putln("/* %s released %s*/" % (
                 name, " - zombie" if name in self.zombie_temps else ""))
- 
-    def temps_in_use(self): 
-        """Return a list of (cname,type,manage_ref) tuples of temp names and their type 
-        that are currently in use. 
-        """ 
-        used = [] 
-        for name, type, manage_ref, static in self.temps_allocated: 
-            freelist = self.temps_free.get((type, manage_ref)) 
+
+    def temps_in_use(self):
+        """Return a list of (cname,type,manage_ref) tuples of temp names and their type
+        that are currently in use.
+        """
+        used = []
+        for name, type, manage_ref, static in self.temps_allocated:
+            freelist = self.temps_free.get((type, manage_ref))
             if freelist is None or name not in freelist[1]:
-                used.append((name, type, manage_ref and type.is_pyobject)) 
-        return used 
- 
-    def temps_holding_reference(self): 
-        """Return a list of (cname,type) tuples of temp names and their type 
-        that are currently in use. This includes only temps of a 
-        Python object type which owns its reference. 
-        """ 
-        return [(name, type) 
-                for name, type, manage_ref in self.temps_in_use() 
-                if manage_ref  and type.is_pyobject] 
- 
-    def all_managed_temps(self): 
-        """Return a list of (cname, type) tuples of refcount-managed Python objects. 
-        """ 
-        return [(cname, type) 
+                used.append((name, type, manage_ref and type.is_pyobject))
+        return used
+
+    def temps_holding_reference(self):
+        """Return a list of (cname,type) tuples of temp names and their type
+        that are currently in use. This includes only temps of a
+        Python object type which owns its reference.
+        """
+        return [(name, type)
+                for name, type, manage_ref in self.temps_in_use()
+                if manage_ref  and type.is_pyobject]
+
+    def all_managed_temps(self):
+        """Return a list of (cname, type) tuples of refcount-managed Python objects.
+        """
+        return [(cname, type)
                 for cname, type, manage_ref, static in self.temps_allocated
                 if manage_ref]
- 
-    def all_free_managed_temps(self): 
-        """Return a list of (cname, type) tuples of refcount-managed Python 
-        objects that are not currently in use.  This is used by 
-        try-except and try-finally blocks to clean up temps in the 
-        error case. 
-        """ 
+
+    def all_free_managed_temps(self):
+        """Return a list of (cname, type) tuples of refcount-managed Python
+        objects that are not currently in use.  This is used by
+        try-except and try-finally blocks to clean up temps in the
+        error case.
+        """
         return sorted([  # Enforce deterministic order.
             (cname, type)
             for (type, manage_ref), freelist in self.temps_free.items() if manage_ref
             for cname in freelist[0]
         ])
- 
-    def start_collecting_temps(self): 
-        """ 
-        Useful to find out which temps were used in a code block 
-        """ 
-        self.collect_temps_stack.append(set()) 
- 
-    def stop_collecting_temps(self): 
-        return self.collect_temps_stack.pop() 
- 
-    def init_closure_temps(self, scope): 
-        self.closure_temps = ClosureTempAllocator(scope) 
- 
- 
-class NumConst(object): 
-    """Global info about a Python number constant held by GlobalState. 
- 
-    cname       string 
-    value       string 
-    py_type     string     int, long, float 
-    value_code  string     evaluation code if different from value 
-    """ 
- 
-    def __init__(self, cname, value, py_type, value_code=None): 
-        self.cname = cname 
-        self.value = value 
-        self.py_type = py_type 
-        self.value_code = value_code or value 
- 
- 
-class PyObjectConst(object): 
-    """Global info about a generic constant held by GlobalState. 
-    """ 
-    # cname       string 
-    # type        PyrexType 
- 
-    def __init__(self, cname, type): 
-        self.cname = cname 
-        self.type = type 
- 
- 
-cython.declare(possible_unicode_identifier=object, possible_bytes_identifier=object, 
-               replace_identifier=object, find_alphanums=object) 
+
+    def start_collecting_temps(self):
+        """
+        Useful to find out which temps were used in a code block
+        """
+        self.collect_temps_stack.append(set())
+
+    def stop_collecting_temps(self):
+        return self.collect_temps_stack.pop()
+
+    def init_closure_temps(self, scope):
+        self.closure_temps = ClosureTempAllocator(scope)
+
+
+class NumConst(object):
+    """Global info about a Python number constant held by GlobalState.
+
+    cname       string
+    value       string
+    py_type     string     int, long, float
+    value_code  string     evaluation code if different from value
+    """
+
+    def __init__(self, cname, value, py_type, value_code=None):
+        self.cname = cname
+        self.value = value
+        self.py_type = py_type
+        self.value_code = value_code or value
+
+
+class PyObjectConst(object):
+    """Global info about a generic constant held by GlobalState.
+    """
+    # cname       string
+    # type        PyrexType
+
+    def __init__(self, cname, type):
+        self.cname = cname
+        self.type = type
+
+
+cython.declare(possible_unicode_identifier=object, possible_bytes_identifier=object,
+               replace_identifier=object, find_alphanums=object)
 possible_unicode_identifier = re.compile(br"(?![0-9])\w+$".decode('ascii'), re.U).match
-possible_bytes_identifier = re.compile(r"(?![0-9])\w+$".encode('ASCII')).match 
-replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').sub 
-find_alphanums = re.compile('([a-zA-Z0-9]+)').findall 
- 
-class StringConst(object): 
-    """Global info about a C string constant held by GlobalState. 
-    """ 
-    # cname            string 
-    # text             EncodedString or BytesLiteral 
-    # py_strings       {(identifier, encoding) : PyStringConst} 
- 
-    def __init__(self, cname, text, byte_string): 
-        self.cname = cname 
-        self.text = text 
-        self.escaped_value = StringEncoding.escape_byte_string(byte_string) 
-        self.py_strings = None 
-        self.py_versions = [] 
- 
-    def add_py_version(self, version): 
-        if not version: 
+possible_bytes_identifier = re.compile(r"(?![0-9])\w+$".encode('ASCII')).match
+replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').sub
+find_alphanums = re.compile('([a-zA-Z0-9]+)').findall
+
+class StringConst(object):
+    """Global info about a C string constant held by GlobalState.
+    """
+    # cname            string
+    # text             EncodedString or BytesLiteral
+    # py_strings       {(identifier, encoding) : PyStringConst}
+
+    def __init__(self, cname, text, byte_string):
+        self.cname = cname
+        self.text = text
+        self.escaped_value = StringEncoding.escape_byte_string(byte_string)
+        self.py_strings = None
+        self.py_versions = []
+
+    def add_py_version(self, version):
+        if not version:
             self.py_versions = [2, 3]
-        elif version not in self.py_versions: 
-            self.py_versions.append(version) 
- 
-    def get_py_string_const(self, encoding, identifier=None, 
-                            is_str=False, py3str_cstring=None): 
-        py_strings = self.py_strings 
-        text = self.text 
- 
-        is_str = bool(identifier or is_str) 
-        is_unicode = encoding is None and not is_str 
- 
-        if encoding is None: 
-            # unicode string 
-            encoding_key = None 
-        else: 
-            # bytes or str 
-            encoding = encoding.lower() 
-            if encoding in ('utf8', 'utf-8', 'ascii', 'usascii', 'us-ascii'): 
-                encoding = None 
-                encoding_key = None 
-            else: 
-                encoding_key = ''.join(find_alphanums(encoding)) 
- 
-        key = (is_str, is_unicode, encoding_key, py3str_cstring) 
-        if py_strings is not None: 
-            try: 
-                return py_strings[key] 
-            except KeyError: 
-                pass 
-        else: 
-            self.py_strings = {} 
- 
-        if identifier: 
-            intern = True 
-        elif identifier is None: 
+        elif version not in self.py_versions:
+            self.py_versions.append(version)
+
+    def get_py_string_const(self, encoding, identifier=None,
+                            is_str=False, py3str_cstring=None):
+        py_strings = self.py_strings
+        text = self.text
+
+        is_str = bool(identifier or is_str)
+        is_unicode = encoding is None and not is_str
+
+        if encoding is None:
+            # unicode string
+            encoding_key = None
+        else:
+            # bytes or str
+            encoding = encoding.lower()
+            if encoding in ('utf8', 'utf-8', 'ascii', 'usascii', 'us-ascii'):
+                encoding = None
+                encoding_key = None
+            else:
+                encoding_key = ''.join(find_alphanums(encoding))
+
+        key = (is_str, is_unicode, encoding_key, py3str_cstring)
+        if py_strings is not None:
+            try:
+                return py_strings[key]
+            except KeyError:
+                pass
+        else:
+            self.py_strings = {}
+
+        if identifier:
+            intern = True
+        elif identifier is None:
             if isinstance(text, bytes):
                 intern = bool(possible_bytes_identifier(text))
             else:
-                intern = bool(possible_unicode_identifier(text)) 
-        else: 
-            intern = False 
-        if intern: 
-            prefix = Naming.interned_prefixes['str'] 
-        else: 
-            prefix = Naming.py_const_prefix 
- 
-        if encoding_key: 
-            encoding_prefix = '_%s' % encoding_key 
-        else: 
-            encoding_prefix = '' 
- 
-        pystring_cname = "%s%s%s_%s" % ( 
-            prefix, 
-            (is_str and 's') or (is_unicode and 'u') or 'b', 
-            encoding_prefix, 
-            self.cname[len(Naming.const_prefix):]) 
- 
-        py_string = PyStringConst( 
-            pystring_cname, encoding, is_unicode, is_str, py3str_cstring, intern) 
-        self.py_strings[key] = py_string 
-        return py_string 
- 
-class PyStringConst(object): 
-    """Global info about a Python string constant held by GlobalState. 
-    """ 
-    # cname       string 
-    # py3str_cstring string 
-    # encoding    string 
-    # intern      boolean 
-    # is_unicode  boolean 
-    # is_str      boolean 
- 
-    def __init__(self, cname, encoding, is_unicode, is_str=False, 
-                 py3str_cstring=None, intern=False): 
-        self.cname = cname 
-        self.py3str_cstring = py3str_cstring 
-        self.encoding = encoding 
-        self.is_str = is_str 
-        self.is_unicode = is_unicode 
-        self.intern = intern 
- 
-    def __lt__(self, other): 
-        return self.cname < other.cname 
- 
- 
-class GlobalState(object): 
-    # filename_table   {string : int}  for finding filename table indexes 
-    # filename_list    [string]        filenames in filename table order 
-    # input_file_contents dict         contents (=list of lines) of any file that was used as input 
-    #                                  to create this output C code.  This is 
-    #                                  used to annotate the comments. 
-    # 
-    # utility_codes   set                IDs of used utility code (to avoid reinsertion) 
-    # 
-    # declared_cnames  {string:Entry}  used in a transition phase to merge pxd-declared 
-    #                                  constants etc. into the pyx-declared ones (i.e, 
-    #                                  check if constants are already added). 
-    #                                  In time, hopefully the literals etc. will be 
-    #                                  supplied directly instead. 
-    # 
-    # const_cnames_used  dict          global counter for unique constant identifiers 
-    # 
- 
-    # parts            {string:CCodeWriter} 
- 
- 
-    # interned_strings 
-    # consts 
-    # interned_nums 
- 
-    # directives       set             Temporary variable used to track 
-    #                                  the current set of directives in the code generation 
-    #                                  process. 
- 
-    directives = {} 
- 
-    code_layout = [ 
-        'h_code', 
-        'filename_table', 
-        'utility_code_proto_before_types', 
-        'numeric_typedefs',          # Let these detailed individual parts stay!, 
-        'complex_type_declarations', # as the proper solution is to make a full DAG... 
-        'type_declarations',         # More coarse-grained blocks would simply hide 
-        'utility_code_proto',        # the ugliness, not fix it 
-        'module_declarations', 
-        'typeinfo', 
-        'before_global_var', 
-        'global_var', 
+                intern = bool(possible_unicode_identifier(text))
+        else:
+            intern = False
+        if intern:
+            prefix = Naming.interned_prefixes['str']
+        else:
+            prefix = Naming.py_const_prefix
+
+        if encoding_key:
+            encoding_prefix = '_%s' % encoding_key
+        else:
+            encoding_prefix = ''
+
+        pystring_cname = "%s%s%s_%s" % (
+            prefix,
+            (is_str and 's') or (is_unicode and 'u') or 'b',
+            encoding_prefix,
+            self.cname[len(Naming.const_prefix):])
+
+        py_string = PyStringConst(
+            pystring_cname, encoding, is_unicode, is_str, py3str_cstring, intern)
+        self.py_strings[key] = py_string
+        return py_string
+
+class PyStringConst(object):
+    """Global info about a Python string constant held by GlobalState.
+    """
+    # cname       string
+    # py3str_cstring string
+    # encoding    string
+    # intern      boolean
+    # is_unicode  boolean
+    # is_str      boolean
+
+    def __init__(self, cname, encoding, is_unicode, is_str=False,
+                 py3str_cstring=None, intern=False):
+        self.cname = cname
+        self.py3str_cstring = py3str_cstring
+        self.encoding = encoding
+        self.is_str = is_str
+        self.is_unicode = is_unicode
+        self.intern = intern
+
+    def __lt__(self, other):
+        return self.cname < other.cname
+
+
+class GlobalState(object):
+    # filename_table   {string : int}  for finding filename table indexes
+    # filename_list    [string]        filenames in filename table order
+    # input_file_contents dict         contents (=list of lines) of any file that was used as input
+    #                                  to create this output C code.  This is
+    #                                  used to annotate the comments.
+    #
+    # utility_codes   set                IDs of used utility code (to avoid reinsertion)
+    #
+    # declared_cnames  {string:Entry}  used in a transition phase to merge pxd-declared
+    #                                  constants etc. into the pyx-declared ones (i.e,
+    #                                  check if constants are already added).
+    #                                  In time, hopefully the literals etc. will be
+    #                                  supplied directly instead.
+    #
+    # const_cnames_used  dict          global counter for unique constant identifiers
+    #
+
+    # parts            {string:CCodeWriter}
+
+
+    # interned_strings
+    # consts
+    # interned_nums
+
+    # directives       set             Temporary variable used to track
+    #                                  the current set of directives in the code generation
+    #                                  process.
+
+    directives = {}
+
+    code_layout = [
+        'h_code',
+        'filename_table',
+        'utility_code_proto_before_types',
+        'numeric_typedefs',          # Let these detailed individual parts stay!,
+        'complex_type_declarations', # as the proper solution is to make a full DAG...
+        'type_declarations',         # More coarse-grained blocks would simply hide
+        'utility_code_proto',        # the ugliness, not fix it
+        'module_declarations',
+        'typeinfo',
+        'before_global_var',
+        'global_var',
         'string_decls',
-        'decls', 
+        'decls',
         'late_includes',
-        'all_the_rest', 
-        'pystring_table', 
-        'cached_builtins', 
-        'cached_constants', 
-        'init_globals', 
-        'init_module', 
-        'cleanup_globals', 
-        'cleanup_module', 
-        'main_method', 
-        'utility_code_def', 
-        'end' 
-    ] 
- 
- 
+        'all_the_rest',
+        'pystring_table',
+        'cached_builtins',
+        'cached_constants',
+        'init_globals',
+        'init_module',
+        'cleanup_globals',
+        'cleanup_module',
+        'main_method',
+        'utility_code_def',
+        'end'
+    ]
+
+
     def __init__(self, writer, module_node, code_config, common_utility_include_dir=None):
-        self.filename_table = {} 
-        self.filename_list = [] 
-        self.input_file_contents = {} 
-        self.utility_codes = set() 
-        self.declared_cnames = {} 
-        self.in_utility_code_generation = False 
+        self.filename_table = {}
+        self.filename_list = []
+        self.input_file_contents = {}
+        self.utility_codes = set()
+        self.declared_cnames = {}
+        self.in_utility_code_generation = False
         self.code_config = code_config
-        self.common_utility_include_dir = common_utility_include_dir 
-        self.parts = {} 
-        self.module_node = module_node # because some utility code generation needs it 
-                                       # (generating backwards-compatible Get/ReleaseBuffer 
- 
-        self.const_cnames_used = {} 
-        self.string_const_index = {} 
+        self.common_utility_include_dir = common_utility_include_dir
+        self.parts = {}
+        self.module_node = module_node # because some utility code generation needs it
+                                       # (generating backwards-compatible Get/ReleaseBuffer
+
+        self.const_cnames_used = {}
+        self.string_const_index = {}
         self.dedup_const_index = {}
-        self.pyunicode_ptr_const_index = {} 
-        self.num_const_index = {} 
-        self.py_constants = [] 
+        self.pyunicode_ptr_const_index = {}
+        self.num_const_index = {}
+        self.py_constants = []
         self.cached_cmethods = {}
         self.initialised_constants = set()
- 
+
         writer.set_global_state(self)
-        self.rootwriter = writer 
- 
-    def initialize_main_c_code(self): 
-        rootwriter = self.rootwriter 
-        for part in self.code_layout: 
-            self.parts[part] = rootwriter.insertion_point() 
- 
-        if not Options.cache_builtins: 
-            del self.parts['cached_builtins'] 
-        else: 
-            w = self.parts['cached_builtins'] 
-            w.enter_cfunc_scope() 
+        self.rootwriter = writer
+
+    def initialize_main_c_code(self):
+        rootwriter = self.rootwriter
+        for part in self.code_layout:
+            self.parts[part] = rootwriter.insertion_point()
+
+        if not Options.cache_builtins:
+            del self.parts['cached_builtins']
+        else:
+            w = self.parts['cached_builtins']
+            w.enter_cfunc_scope()
             w.putln("static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {")
- 
-        w = self.parts['cached_constants'] 
-        w.enter_cfunc_scope() 
-        w.putln("") 
+
+        w = self.parts['cached_constants']
+        w.enter_cfunc_scope()
+        w.putln("")
         w.putln("static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {")
-        w.put_declare_refcount_context() 
-        w.put_setup_refcount_context("__Pyx_InitCachedConstants") 
- 
-        w = self.parts['init_globals'] 
-        w.enter_cfunc_scope() 
-        w.putln("") 
+        w.put_declare_refcount_context()
+        w.put_setup_refcount_context("__Pyx_InitCachedConstants")
+
+        w = self.parts['init_globals']
+        w.enter_cfunc_scope()
+        w.putln("")
         w.putln("static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {")
- 
-        if not Options.generate_cleanup_code: 
-            del self.parts['cleanup_globals'] 
-        else: 
-            w = self.parts['cleanup_globals'] 
-            w.enter_cfunc_scope() 
-            w.putln("") 
+
+        if not Options.generate_cleanup_code:
+            del self.parts['cleanup_globals']
+        else:
+            w = self.parts['cleanup_globals']
+            w.enter_cfunc_scope()
+            w.putln("")
             w.putln("static CYTHON_SMALL_CODE void __Pyx_CleanupGlobals(void) {")
- 
+
         code = self.parts['utility_code_proto']
         code.putln("")
         code.putln("/* --- Runtime support code (head) --- */")
 
-        code = self.parts['utility_code_def'] 
+        code = self.parts['utility_code_def']
         if self.code_config.emit_linenums:
-            code.write('\n#line 1 "cython_utility"\n') 
-        code.putln("") 
+            code.write('\n#line 1 "cython_utility"\n')
+        code.putln("")
         code.putln("/* --- Runtime support code --- */")
- 
-    def finalize_main_c_code(self): 
-        self.close_global_decls() 
- 
-        # 
-        # utility_code_def 
-        # 
-        code = self.parts['utility_code_def'] 
+
+    def finalize_main_c_code(self):
+        self.close_global_decls()
+
+        #
+        # utility_code_def
+        #
+        code = self.parts['utility_code_def']
         util = TempitaUtilityCode.load_cached("TypeConversions", "TypeConversion.c")
         code.put(util.format_code(util.impl))
-        code.putln("") 
- 
-    def __getitem__(self, key): 
-        return self.parts[key] 
- 
-    # 
-    # Global constants, interned objects, etc. 
-    # 
-    def close_global_decls(self): 
-        # This is called when it is known that no more global declarations will 
-        # declared. 
-        self.generate_const_declarations() 
-        if Options.cache_builtins: 
-            w = self.parts['cached_builtins'] 
-            w.putln("return 0;") 
-            if w.label_used(w.error_label): 
-                w.put_label(w.error_label) 
-                w.putln("return -1;") 
-            w.putln("}") 
-            w.exit_cfunc_scope() 
- 
-        w = self.parts['cached_constants'] 
-        w.put_finish_refcount_context() 
-        w.putln("return 0;") 
-        if w.label_used(w.error_label): 
-            w.put_label(w.error_label) 
-            w.put_finish_refcount_context() 
-            w.putln("return -1;") 
-        w.putln("}") 
-        w.exit_cfunc_scope() 
- 
-        w = self.parts['init_globals'] 
-        w.putln("return 0;") 
-        if w.label_used(w.error_label): 
-            w.put_label(w.error_label) 
-            w.putln("return -1;") 
-        w.putln("}") 
-        w.exit_cfunc_scope() 
- 
-        if Options.generate_cleanup_code: 
-            w = self.parts['cleanup_globals'] 
-            w.putln("}") 
-            w.exit_cfunc_scope() 
- 
-        if Options.generate_cleanup_code: 
-            w = self.parts['cleanup_module'] 
-            w.putln("}") 
-            w.exit_cfunc_scope() 
- 
-    def put_pyobject_decl(self, entry): 
-        self['global_var'].putln("static PyObject *%s;" % entry.cname) 
- 
-    # constant handling at code generation time 
- 
+        code.putln("")
+
+    def __getitem__(self, key):
+        return self.parts[key]
+
+    #
+    # Global constants, interned objects, etc.
+    #
+    def close_global_decls(self):
+        # This is called when it is known that no more global declarations will
+        # declared.
+        self.generate_const_declarations()
+        if Options.cache_builtins:
+            w = self.parts['cached_builtins']
+            w.putln("return 0;")
+            if w.label_used(w.error_label):
+                w.put_label(w.error_label)
+                w.putln("return -1;")
+            w.putln("}")
+            w.exit_cfunc_scope()
+
+        w = self.parts['cached_constants']
+        w.put_finish_refcount_context()
+        w.putln("return 0;")
+        if w.label_used(w.error_label):
+            w.put_label(w.error_label)
+            w.put_finish_refcount_context()
+            w.putln("return -1;")
+        w.putln("}")
+        w.exit_cfunc_scope()
+
+        w = self.parts['init_globals']
+        w.putln("return 0;")
+        if w.label_used(w.error_label):
+            w.put_label(w.error_label)
+            w.putln("return -1;")
+        w.putln("}")
+        w.exit_cfunc_scope()
+
+        if Options.generate_cleanup_code:
+            w = self.parts['cleanup_globals']
+            w.putln("}")
+            w.exit_cfunc_scope()
+
+        if Options.generate_cleanup_code:
+            w = self.parts['cleanup_module']
+            w.putln("}")
+            w.exit_cfunc_scope()
+
+    def put_pyobject_decl(self, entry):
+        self['global_var'].putln("static PyObject *%s;" % entry.cname)
+
+    # constant handling at code generation time
+
     def get_cached_constants_writer(self, target=None):
         if target is not None:
             if target in self.initialised_constants:
                 # Return None on second/later calls to prevent duplicate creation code.
                 return None
             self.initialised_constants.add(target)
-        return self.parts['cached_constants'] 
- 
-    def get_int_const(self, str_value, longness=False): 
-        py_type = longness and 'long' or 'int' 
-        try: 
-            c = self.num_const_index[(str_value, py_type)] 
-        except KeyError: 
-            c = self.new_num_const(str_value, py_type) 
-        return c 
- 
-    def get_float_const(self, str_value, value_code): 
-        try: 
-            c = self.num_const_index[(str_value, 'float')] 
-        except KeyError: 
-            c = self.new_num_const(str_value, 'float', value_code) 
-        return c 
- 
+        return self.parts['cached_constants']
+
+    def get_int_const(self, str_value, longness=False):
+        py_type = longness and 'long' or 'int'
+        try:
+            c = self.num_const_index[(str_value, py_type)]
+        except KeyError:
+            c = self.new_num_const(str_value, py_type)
+        return c
+
+    def get_float_const(self, str_value, value_code):
+        try:
+            c = self.num_const_index[(str_value, 'float')]
+        except KeyError:
+            c = self.new_num_const(str_value, 'float', value_code)
+        return c
+
     def get_py_const(self, type, prefix='', cleanup_level=None, dedup_key=None):
         if dedup_key is not None:
             const = self.dedup_const_index.get(dedup_key)
             if const is not None:
                 return const
-        # create a new Python object constant 
-        const = self.new_py_const(type, prefix) 
-        if cleanup_level is not None \ 
-                and cleanup_level <= Options.generate_cleanup_code: 
-            cleanup_writer = self.parts['cleanup_globals'] 
-            cleanup_writer.putln('Py_CLEAR(%s);' % const.cname) 
+        # create a new Python object constant
+        const = self.new_py_const(type, prefix)
+        if cleanup_level is not None \
+                and cleanup_level <= Options.generate_cleanup_code:
+            cleanup_writer = self.parts['cleanup_globals']
+            cleanup_writer.putln('Py_CLEAR(%s);' % const.cname)
         if dedup_key is not None:
             self.dedup_const_index[dedup_key] = const
-        return const 
- 
-    def get_string_const(self, text, py_version=None): 
-        # return a C string constant, creating a new one if necessary 
-        if text.is_unicode: 
-            byte_string = text.utf8encode() 
-        else: 
-            byte_string = text.byteencode() 
-        try: 
-            c = self.string_const_index[byte_string] 
-        except KeyError: 
-            c = self.new_string_const(text, byte_string) 
-        c.add_py_version(py_version) 
-        return c 
- 
-    def get_pyunicode_ptr_const(self, text): 
-        # return a Py_UNICODE[] constant, creating a new one if necessary 
-        assert text.is_unicode 
-        try: 
-            c = self.pyunicode_ptr_const_index[text] 
-        except KeyError: 
-            c = self.pyunicode_ptr_const_index[text] = self.new_const_cname() 
-        return c 
- 
-    def get_py_string_const(self, text, identifier=None, 
-                            is_str=False, unicode_value=None): 
-        # return a Python string constant, creating a new one if necessary 
-        py3str_cstring = None 
-        if is_str and unicode_value is not None \ 
-               and unicode_value.utf8encode() != text.byteencode(): 
-            py3str_cstring = self.get_string_const(unicode_value, py_version=3) 
-            c_string = self.get_string_const(text, py_version=2) 
-        else: 
-            c_string = self.get_string_const(text) 
-        py_string = c_string.get_py_string_const( 
-            text.encoding, identifier, is_str, py3str_cstring) 
-        return py_string 
- 
-    def get_interned_identifier(self, text): 
-        return self.get_py_string_const(text, identifier=True) 
- 
-    def new_string_const(self, text, byte_string): 
-        cname = self.new_string_const_cname(byte_string) 
-        c = StringConst(cname, text, byte_string) 
-        self.string_const_index[byte_string] = c 
-        return c 
- 
-    def new_num_const(self, value, py_type, value_code=None): 
-        cname = self.new_num_const_cname(value, py_type) 
-        c = NumConst(cname, value, py_type, value_code) 
-        self.num_const_index[(value, py_type)] = c 
-        return c 
- 
-    def new_py_const(self, type, prefix=''): 
-        cname = self.new_const_cname(prefix) 
-        c = PyObjectConst(cname, type) 
-        self.py_constants.append(c) 
-        return c 
- 
-    def new_string_const_cname(self, bytes_value): 
-        # Create a new globally-unique nice name for a C string constant. 
-        value = bytes_value.decode('ASCII', 'ignore') 
-        return self.new_const_cname(value=value) 
- 
-    def new_num_const_cname(self, value, py_type): 
-        if py_type == 'long': 
-            value += 'L' 
-            py_type = 'int' 
-        prefix = Naming.interned_prefixes[py_type] 
-        cname = "%s%s" % (prefix, value) 
-        cname = cname.replace('+', '_').replace('-', 'neg_').replace('.', '_') 
-        return cname 
- 
-    def new_const_cname(self, prefix='', value=''): 
-        value = replace_identifier('_', value)[:32].strip('_') 
-        used = self.const_cnames_used 
-        name_suffix = value 
-        while name_suffix in used: 
-            counter = used[value] = used[value] + 1 
-            name_suffix = '%s_%d' % (value, counter) 
-        used[name_suffix] = 1 
-        if prefix: 
-            prefix = Naming.interned_prefixes[prefix] 
-        else: 
-            prefix = Naming.const_prefix 
-        return "%s%s" % (prefix, name_suffix) 
- 
+        return const
+
+    def get_string_const(self, text, py_version=None):
+        # return a C string constant, creating a new one if necessary
+        if text.is_unicode:
+            byte_string = text.utf8encode()
+        else:
+            byte_string = text.byteencode()
+        try:
+            c = self.string_const_index[byte_string]
+        except KeyError:
+            c = self.new_string_const(text, byte_string)
+        c.add_py_version(py_version)
+        return c
+
+    def get_pyunicode_ptr_const(self, text):
+        # return a Py_UNICODE[] constant, creating a new one if necessary
+        assert text.is_unicode
+        try:
+            c = self.pyunicode_ptr_const_index[text]
+        except KeyError:
+            c = self.pyunicode_ptr_const_index[text] = self.new_const_cname()
+        return c
+
+    def get_py_string_const(self, text, identifier=None,
+                            is_str=False, unicode_value=None):
+        # return a Python string constant, creating a new one if necessary
+        py3str_cstring = None
+        if is_str and unicode_value is not None \
+               and unicode_value.utf8encode() != text.byteencode():
+            py3str_cstring = self.get_string_const(unicode_value, py_version=3)
+            c_string = self.get_string_const(text, py_version=2)
+        else:
+            c_string = self.get_string_const(text)
+        py_string = c_string.get_py_string_const(
+            text.encoding, identifier, is_str, py3str_cstring)
+        return py_string
+
+    def get_interned_identifier(self, text):
+        return self.get_py_string_const(text, identifier=True)
+
+    def new_string_const(self, text, byte_string):
+        cname = self.new_string_const_cname(byte_string)
+        c = StringConst(cname, text, byte_string)
+        self.string_const_index[byte_string] = c
+        return c
+
+    def new_num_const(self, value, py_type, value_code=None):
+        cname = self.new_num_const_cname(value, py_type)
+        c = NumConst(cname, value, py_type, value_code)
+        self.num_const_index[(value, py_type)] = c
+        return c
+
+    def new_py_const(self, type, prefix=''):
+        cname = self.new_const_cname(prefix)
+        c = PyObjectConst(cname, type)
+        self.py_constants.append(c)
+        return c
+
+    def new_string_const_cname(self, bytes_value):
+        # Create a new globally-unique nice name for a C string constant.
+        value = bytes_value.decode('ASCII', 'ignore')
+        return self.new_const_cname(value=value)
+
+    def new_num_const_cname(self, value, py_type):
+        if py_type == 'long':
+            value += 'L'
+            py_type = 'int'
+        prefix = Naming.interned_prefixes[py_type]
+        cname = "%s%s" % (prefix, value)
+        cname = cname.replace('+', '_').replace('-', 'neg_').replace('.', '_')
+        return cname
+
+    def new_const_cname(self, prefix='', value=''):
+        value = replace_identifier('_', value)[:32].strip('_')
+        used = self.const_cnames_used
+        name_suffix = value
+        while name_suffix in used:
+            counter = used[value] = used[value] + 1
+            name_suffix = '%s_%d' % (value, counter)
+        used[name_suffix] = 1
+        if prefix:
+            prefix = Naming.interned_prefixes[prefix]
+        else:
+            prefix = Naming.const_prefix
+        return "%s%s" % (prefix, name_suffix)
+
     def get_cached_unbound_method(self, type_cname, method_name):
         key = (type_cname, method_name)
         try:
@@ -1420,51 +1420,51 @@ class GlobalState(object):
             ', '.join(args),
         )
 
-    def add_cached_builtin_decl(self, entry): 
-        if entry.is_builtin and entry.is_const: 
-            if self.should_declare(entry.cname, entry): 
-                self.put_pyobject_decl(entry) 
-                w = self.parts['cached_builtins'] 
-                condition = None 
-                if entry.name in non_portable_builtins_map: 
-                    condition, replacement = non_portable_builtins_map[entry.name] 
-                    w.putln('#if %s' % condition) 
-                    self.put_cached_builtin_init( 
-                        entry.pos, StringEncoding.EncodedString(replacement), 
-                        entry.cname) 
-                    w.putln('#else') 
-                self.put_cached_builtin_init( 
-                    entry.pos, StringEncoding.EncodedString(entry.name), 
-                    entry.cname) 
-                if condition: 
-                    w.putln('#endif') 
- 
-    def put_cached_builtin_init(self, pos, name, cname): 
-        w = self.parts['cached_builtins'] 
-        interned_cname = self.get_interned_identifier(name).cname 
-        self.use_utility_code( 
-            UtilityCode.load_cached("GetBuiltinName", "ObjectHandling.c")) 
-        w.putln('%s = __Pyx_GetBuiltinName(%s); if (!%s) %s' % ( 
-            cname, 
-            interned_cname, 
-            cname, 
-            w.error_goto(pos))) 
- 
-    def generate_const_declarations(self): 
+    def add_cached_builtin_decl(self, entry):
+        if entry.is_builtin and entry.is_const:
+            if self.should_declare(entry.cname, entry):
+                self.put_pyobject_decl(entry)
+                w = self.parts['cached_builtins']
+                condition = None
+                if entry.name in non_portable_builtins_map:
+                    condition, replacement = non_portable_builtins_map[entry.name]
+                    w.putln('#if %s' % condition)
+                    self.put_cached_builtin_init(
+                        entry.pos, StringEncoding.EncodedString(replacement),
+                        entry.cname)
+                    w.putln('#else')
+                self.put_cached_builtin_init(
+                    entry.pos, StringEncoding.EncodedString(entry.name),
+                    entry.cname)
+                if condition:
+                    w.putln('#endif')
+
+    def put_cached_builtin_init(self, pos, name, cname):
+        w = self.parts['cached_builtins']
+        interned_cname = self.get_interned_identifier(name).cname
+        self.use_utility_code(
+            UtilityCode.load_cached("GetBuiltinName", "ObjectHandling.c"))
+        w.putln('%s = __Pyx_GetBuiltinName(%s); if (!%s) %s' % (
+            cname,
+            interned_cname,
+            cname,
+            w.error_goto(pos)))
+
+    def generate_const_declarations(self):
         self.generate_cached_methods_decls()
-        self.generate_string_constants() 
-        self.generate_num_constants() 
-        self.generate_object_constant_decls() 
- 
-    def generate_object_constant_decls(self): 
+        self.generate_string_constants()
+        self.generate_num_constants()
+        self.generate_object_constant_decls()
+
+    def generate_object_constant_decls(self):
         consts = [(len(c.cname), c.cname, c)
                   for c in self.py_constants]
-        consts.sort() 
-        decls_writer = self.parts['decls'] 
-        for _, cname, c in consts: 
-            decls_writer.putln( 
-                "static %s;" % c.type.declaration_code(cname)) 
- 
+        consts.sort()
+        decls_writer = self.parts['decls']
+        for _, cname, c in consts:
+            decls_writer.putln(
+                "static %s;" % c.type.declaration_code(cname))
+
     def generate_cached_methods_decls(self):
         if not self.cached_cmethods:
             return
@@ -1486,173 +1486,173 @@ class GlobalState(object):
             for cname in cnames:
                 cleanup.putln("Py_CLEAR(%s.method);" % cname)
 
-    def generate_string_constants(self): 
+    def generate_string_constants(self):
         c_consts = [(len(c.cname), c.cname, c) for c in self.string_const_index.values()]
-        c_consts.sort() 
-        py_strings = [] 
- 
+        c_consts.sort()
+        py_strings = []
+
         decls_writer = self.parts['string_decls']
-        for _, cname, c in c_consts: 
-            conditional = False 
-            if c.py_versions and (2 not in c.py_versions or 3 not in c.py_versions): 
-                conditional = True 
-                decls_writer.putln("#if PY_MAJOR_VERSION %s 3" % ( 
-                    (2 in c.py_versions) and '<' or '>=')) 
+        for _, cname, c in c_consts:
+            conditional = False
+            if c.py_versions and (2 not in c.py_versions or 3 not in c.py_versions):
+                conditional = True
+                decls_writer.putln("#if PY_MAJOR_VERSION %s 3" % (
+                    (2 in c.py_versions) and '<' or '>='))
             decls_writer.putln('static const char %s[] = "%s";' % (
-                cname, StringEncoding.split_string_literal(c.escaped_value))) 
-            if conditional: 
-                decls_writer.putln("#endif") 
-            if c.py_strings is not None: 
-                for py_string in c.py_strings.values(): 
-                    py_strings.append((c.cname, len(py_string.cname), py_string)) 
- 
+                cname, StringEncoding.split_string_literal(c.escaped_value)))
+            if conditional:
+                decls_writer.putln("#endif")
+            if c.py_strings is not None:
+                for py_string in c.py_strings.values():
+                    py_strings.append((c.cname, len(py_string.cname), py_string))
+
         for c, cname in sorted(self.pyunicode_ptr_const_index.items()):
-            utf16_array, utf32_array = StringEncoding.encode_pyunicode_string(c) 
-            if utf16_array: 
-                # Narrow and wide representations differ 
-                decls_writer.putln("#ifdef Py_UNICODE_WIDE") 
-            decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf32_array)) 
-            if utf16_array: 
-                decls_writer.putln("#else") 
-                decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf16_array)) 
-                decls_writer.putln("#endif") 
- 
-        if py_strings: 
-            self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c")) 
-            py_strings.sort() 
-            w = self.parts['pystring_table'] 
-            w.putln("") 
+            utf16_array, utf32_array = StringEncoding.encode_pyunicode_string(c)
+            if utf16_array:
+                # Narrow and wide representations differ
+                decls_writer.putln("#ifdef Py_UNICODE_WIDE")
+            decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf32_array))
+            if utf16_array:
+                decls_writer.putln("#else")
+                decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf16_array))
+                decls_writer.putln("#endif")
+
+        if py_strings:
+            self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c"))
+            py_strings.sort()
+            w = self.parts['pystring_table']
+            w.putln("")
             w.putln("static __Pyx_StringTabEntry %s[] = {" % Naming.stringtab_cname)
-            for c_cname, _, py_string in py_strings: 
-                if not py_string.is_str or not py_string.encoding or \ 
+            for c_cname, _, py_string in py_strings:
+                if not py_string.is_str or not py_string.encoding or \
                         py_string.encoding in ('ASCII', 'USASCII', 'US-ASCII',
                                                'UTF8', 'UTF-8'):
-                    encoding = '0' 
-                else: 
-                    encoding = '"%s"' % py_string.encoding.lower() 
- 
-                decls_writer.putln( 
-                    "static PyObject *%s;" % py_string.cname) 
-                if py_string.py3str_cstring: 
-                    w.putln("#if PY_MAJOR_VERSION >= 3") 
+                    encoding = '0'
+                else:
+                    encoding = '"%s"' % py_string.encoding.lower()
+
+                decls_writer.putln(
+                    "static PyObject *%s;" % py_string.cname)
+                if py_string.py3str_cstring:
+                    w.putln("#if PY_MAJOR_VERSION >= 3")
                     w.putln("{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
-                        py_string.cname, 
-                        py_string.py3str_cstring.cname, 
-                        py_string.py3str_cstring.cname, 
-                        '0', 1, 0, 
-                        py_string.intern 
-                        )) 
-                    w.putln("#else") 
+                        py_string.cname,
+                        py_string.py3str_cstring.cname,
+                        py_string.py3str_cstring.cname,
+                        '0', 1, 0,
+                        py_string.intern
+                        ))
+                    w.putln("#else")
                 w.putln("{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
-                    py_string.cname, 
-                    c_cname, 
-                    c_cname, 
-                    encoding, 
-                    py_string.is_unicode, 
-                    py_string.is_str, 
-                    py_string.intern 
-                    )) 
-                if py_string.py3str_cstring: 
-                    w.putln("#endif") 
-            w.putln("{0, 0, 0, 0, 0, 0, 0}") 
-            w.putln("};") 
- 
-            init_globals = self.parts['init_globals'] 
-            init_globals.putln( 
-                "if (__Pyx_InitStrings(%s) < 0) %s;" % ( 
-                    Naming.stringtab_cname, 
-                    init_globals.error_goto(self.module_pos))) 
- 
-    def generate_num_constants(self): 
-        consts = [(c.py_type, c.value[0] == '-', len(c.value), c.value, c.value_code, c) 
-                  for c in self.num_const_index.values()] 
-        consts.sort() 
-        decls_writer = self.parts['decls'] 
-        init_globals = self.parts['init_globals'] 
-        for py_type, _, _, value, value_code, c in consts: 
-            cname = c.cname 
-            decls_writer.putln("static PyObject *%s;" % cname) 
-            if py_type == 'float': 
-                function = 'PyFloat_FromDouble(%s)' 
-            elif py_type == 'long': 
-                function = 'PyLong_FromString((char *)"%s", 0, 0)' 
-            elif Utils.long_literal(value): 
-                function = 'PyInt_FromString((char *)"%s", 0, 0)' 
-            elif len(value.lstrip('-')) > 4: 
-                function = "PyInt_FromLong(%sL)" 
-            else: 
-                function = "PyInt_FromLong(%s)" 
-            init_globals.putln('%s = %s; %s' % ( 
-                cname, function % value_code, 
-                init_globals.error_goto_if_null(cname, self.module_pos))) 
- 
-    # The functions below are there in a transition phase only 
-    # and will be deprecated. They are called from Nodes.BlockNode. 
-    # The copy&paste duplication is intentional in order to be able 
-    # to see quickly how BlockNode worked, until this is replaced. 
- 
-    def should_declare(self, cname, entry): 
-        if cname in self.declared_cnames: 
-            other = self.declared_cnames[cname] 
-            assert str(entry.type) == str(other.type) 
-            assert entry.init == other.init 
-            return False 
-        else: 
-            self.declared_cnames[cname] = entry 
-            return True 
- 
-    # 
-    # File name state 
-    # 
- 
+                    py_string.cname,
+                    c_cname,
+                    c_cname,
+                    encoding,
+                    py_string.is_unicode,
+                    py_string.is_str,
+                    py_string.intern
+                    ))
+                if py_string.py3str_cstring:
+                    w.putln("#endif")
+            w.putln("{0, 0, 0, 0, 0, 0, 0}")
+            w.putln("};")
+
+            init_globals = self.parts['init_globals']
+            init_globals.putln(
+                "if (__Pyx_InitStrings(%s) < 0) %s;" % (
+                    Naming.stringtab_cname,
+                    init_globals.error_goto(self.module_pos)))
+
+    def generate_num_constants(self):
+        consts = [(c.py_type, c.value[0] == '-', len(c.value), c.value, c.value_code, c)
+                  for c in self.num_const_index.values()]
+        consts.sort()
+        decls_writer = self.parts['decls']
+        init_globals = self.parts['init_globals']
+        for py_type, _, _, value, value_code, c in consts:
+            cname = c.cname
+            decls_writer.putln("static PyObject *%s;" % cname)
+            if py_type == 'float':
+                function = 'PyFloat_FromDouble(%s)'
+            elif py_type == 'long':
+                function = 'PyLong_FromString((char *)"%s", 0, 0)'
+            elif Utils.long_literal(value):
+                function = 'PyInt_FromString((char *)"%s", 0, 0)'
+            elif len(value.lstrip('-')) > 4:
+                function = "PyInt_FromLong(%sL)"
+            else:
+                function = "PyInt_FromLong(%s)"
+            init_globals.putln('%s = %s; %s' % (
+                cname, function % value_code,
+                init_globals.error_goto_if_null(cname, self.module_pos)))
+
+    # The functions below are there in a transition phase only
+    # and will be deprecated. They are called from Nodes.BlockNode.
+    # The copy&paste duplication is intentional in order to be able
+    # to see quickly how BlockNode worked, until this is replaced.
+
+    def should_declare(self, cname, entry):
+        if cname in self.declared_cnames:
+            other = self.declared_cnames[cname]
+            assert str(entry.type) == str(other.type)
+            assert entry.init == other.init
+            return False
+        else:
+            self.declared_cnames[cname] = entry
+            return True
+
+    #
+    # File name state
+    #
+
     def lookup_filename(self, source_desc):
         entry = source_desc.get_filenametable_entry()
-        try: 
+        try:
             index = self.filename_table[entry]
-        except KeyError: 
-            index = len(self.filename_list) 
+        except KeyError:
+            index = len(self.filename_list)
             self.filename_list.append(source_desc)
             self.filename_table[entry] = index
-        return index 
- 
-    def commented_file_contents(self, source_desc): 
-        try: 
-            return self.input_file_contents[source_desc] 
-        except KeyError: 
-            pass 
-        source_file = source_desc.get_lines(encoding='ASCII', 
-                                            error_handling='ignore') 
-        try: 
-            F = [u' * ' + line.rstrip().replace( 
-                    u'*/', u'*[inserted by cython to avoid comment closer]/' 
-                    ).replace( 
-                    u'/*', u'/[inserted by cython to avoid comment start]*' 
-                    ) 
-                 for line in source_file] 
-        finally: 
-            if hasattr(source_file, 'close'): 
-                source_file.close() 
-        if not F: F.append(u'') 
-        self.input_file_contents[source_desc] = F 
-        return F 
- 
-    # 
-    # Utility code state 
-    # 
- 
-    def use_utility_code(self, utility_code): 
-        """ 
-        Adds code to the C file. utility_code should 
-        a) implement __eq__/__hash__ for the purpose of knowing whether the same 
-           code has already been included 
-        b) implement put_code, which takes a globalstate instance 
- 
-        See UtilityCode. 
-        """ 
+        return index
+
+    def commented_file_contents(self, source_desc):
+        try:
+            return self.input_file_contents[source_desc]
+        except KeyError:
+            pass
+        source_file = source_desc.get_lines(encoding='ASCII',
+                                            error_handling='ignore')
+        try:
+            F = [u' * ' + line.rstrip().replace(
+                    u'*/', u'*[inserted by cython to avoid comment closer]/'
+                    ).replace(
+                    u'/*', u'/[inserted by cython to avoid comment start]*'
+                    )
+                 for line in source_file]
+        finally:
+            if hasattr(source_file, 'close'):
+                source_file.close()
+        if not F: F.append(u'')
+        self.input_file_contents[source_desc] = F
+        return F
+
+    #
+    # Utility code state
+    #
+
+    def use_utility_code(self, utility_code):
+        """
+        Adds code to the C file. utility_code should
+        a) implement __eq__/__hash__ for the purpose of knowing whether the same
+           code has already been included
+        b) implement put_code, which takes a globalstate instance
+
+        See UtilityCode.
+        """
         if utility_code and utility_code not in self.utility_codes:
-            self.utility_codes.add(utility_code) 
-            utility_code.put_code(self) 
- 
+            self.utility_codes.add(utility_code)
+            utility_code.put_code(self)
+
     def use_entry_utility_code(self, entry):
         if entry is None:
             return
@@ -1660,18 +1660,18 @@ class GlobalState(object):
             self.use_utility_code(entry.utility_code)
         if entry.utility_code_definition:
             self.use_utility_code(entry.utility_code_definition)
- 
+
 
 def funccontext_property(func):
     name = func.__name__
-    attribute_of = operator.attrgetter(name) 
-    def get(self): 
-        return attribute_of(self.funcstate) 
-    def set(self, value): 
-        setattr(self.funcstate, name, value) 
-    return property(get, set) 
- 
- 
+    attribute_of = operator.attrgetter(name)
+    def get(self):
+        return attribute_of(self.funcstate)
+    def set(self, value):
+        setattr(self.funcstate, name, value)
+    return property(get, set)
+
+
 class CCodeConfig(object):
     # emit_linenums       boolean         write #line pragmas?
     # emit_code_comments  boolean         copy the original code into C comments?
@@ -1683,110 +1683,110 @@ class CCodeConfig(object):
         self.c_line_in_traceback = c_line_in_traceback
 
 
-class CCodeWriter(object): 
-    """ 
-    Utility class to output C code. 
- 
-    When creating an insertion point one must care about the state that is 
-    kept: 
-    - formatting state (level, bol) is cloned and used in insertion points 
-      as well 
-    - labels, temps, exc_vars: One must construct a scope in which these can 
-      exist by calling enter_cfunc_scope/exit_cfunc_scope (these are for 
+class CCodeWriter(object):
+    """
+    Utility class to output C code.
+
+    When creating an insertion point one must care about the state that is
+    kept:
+    - formatting state (level, bol) is cloned and used in insertion points
+      as well
+    - labels, temps, exc_vars: One must construct a scope in which these can
+      exist by calling enter_cfunc_scope/exit_cfunc_scope (these are for
       sanity checking and forward compatibility). Created insertion points
-      looses this scope and cannot access it. 
-    - marker: Not copied to insertion point 
-    - filename_table, filename_list, input_file_contents: All codewriters 
-      coming from the same root share the same instances simultaneously. 
-    """ 
- 
-    # f                   file            output file 
-    # buffer              StringIOTree 
- 
-    # level               int             indentation level 
-    # bol                 bool            beginning of line? 
-    # marker              string          comment to emit before next line 
-    # funcstate           FunctionState   contains state local to a C function used for code 
-    #                                     generation (labels and temps state etc.) 
-    # globalstate         GlobalState     contains state global for a C file (input file info, 
-    #                                     utility code, declared constants etc.) 
-    # pyclass_stack       list            used during recursive code generation to pass information 
-    #                                     about the current class one is in 
+      looses this scope and cannot access it.
+    - marker: Not copied to insertion point
+    - filename_table, filename_list, input_file_contents: All codewriters
+      coming from the same root share the same instances simultaneously.
+    """
+
+    # f                   file            output file
+    # buffer              StringIOTree
+
+    # level               int             indentation level
+    # bol                 bool            beginning of line?
+    # marker              string          comment to emit before next line
+    # funcstate           FunctionState   contains state local to a C function used for code
+    #                                     generation (labels and temps state etc.)
+    # globalstate         GlobalState     contains state global for a C file (input file info,
+    #                                     utility code, declared constants etc.)
+    # pyclass_stack       list            used during recursive code generation to pass information
+    #                                     about the current class one is in
     # code_config         CCodeConfig     configuration options for the C code writer
- 
+
     @cython.locals(create_from='CCodeWriter')
     def __init__(self, create_from=None, buffer=None, copy_formatting=False):
-        if buffer is None: buffer = StringIOTree() 
-        self.buffer = buffer 
+        if buffer is None: buffer = StringIOTree()
+        self.buffer = buffer
         self.last_pos = None
         self.last_marked_pos = None
-        self.pyclass_stack = [] 
- 
-        self.funcstate = None 
+        self.pyclass_stack = []
+
+        self.funcstate = None
         self.globalstate = None
         self.code_config = None
-        self.level = 0 
-        self.call_level = 0 
-        self.bol = 1 
- 
-        if create_from is not None: 
-            # Use same global state 
+        self.level = 0
+        self.call_level = 0
+        self.bol = 1
+
+        if create_from is not None:
+            # Use same global state
             self.set_global_state(create_from.globalstate)
-            self.funcstate = create_from.funcstate 
-            # Clone formatting state 
-            if copy_formatting: 
-                self.level = create_from.level 
-                self.bol = create_from.bol 
-                self.call_level = create_from.call_level 
+            self.funcstate = create_from.funcstate
+            # Clone formatting state
+            if copy_formatting:
+                self.level = create_from.level
+                self.bol = create_from.bol
+                self.call_level = create_from.call_level
             self.last_pos = create_from.last_pos
             self.last_marked_pos = create_from.last_marked_pos
- 
-    def create_new(self, create_from, buffer, copy_formatting): 
-        # polymorphic constructor -- very slightly more versatile 
-        # than using __class__ 
+
+    def create_new(self, create_from, buffer, copy_formatting):
+        # polymorphic constructor -- very slightly more versatile
+        # than using __class__
         result = CCodeWriter(create_from, buffer, copy_formatting)
-        return result 
- 
+        return result
+
     def set_global_state(self, global_state):
         assert self.globalstate is None  # prevent overwriting once it's set
         self.globalstate = global_state
         self.code_config = global_state.code_config
 
-    def copyto(self, f): 
-        self.buffer.copyto(f) 
- 
-    def getvalue(self): 
-        return self.buffer.getvalue() 
- 
-    def write(self, s): 
-        # also put invalid markers (lineno 0), to indicate that those lines 
-        # have no Cython source code correspondence 
+    def copyto(self, f):
+        self.buffer.copyto(f)
+
+    def getvalue(self):
+        return self.buffer.getvalue()
+
+    def write(self, s):
+        # also put invalid markers (lineno 0), to indicate that those lines
+        # have no Cython source code correspondence
         cython_lineno = self.last_marked_pos[1] if self.last_marked_pos else 0
-        self.buffer.markers.extend([cython_lineno] * s.count('\n')) 
-        self.buffer.write(s) 
- 
-    def insertion_point(self): 
-        other = self.create_new(create_from=self, buffer=self.buffer.insertion_point(), copy_formatting=True) 
-        return other 
- 
-    def new_writer(self): 
-        """ 
-        Creates a new CCodeWriter connected to the same global state, which 
-        can later be inserted using insert. 
-        """ 
+        self.buffer.markers.extend([cython_lineno] * s.count('\n'))
+        self.buffer.write(s)
+
+    def insertion_point(self):
+        other = self.create_new(create_from=self, buffer=self.buffer.insertion_point(), copy_formatting=True)
+        return other
+
+    def new_writer(self):
+        """
+        Creates a new CCodeWriter connected to the same global state, which
+        can later be inserted using insert.
+        """
         return CCodeWriter(create_from=self)
- 
-    def insert(self, writer): 
-        """ 
-        Inserts the contents of another code writer (created with 
-        the same global state) in the current location. 
- 
-        It is ok to write to the inserted writer also after insertion. 
-        """ 
-        assert writer.globalstate is self.globalstate 
-        self.buffer.insert(writer.buffer) 
- 
-    # Properties delegated to function scope 
+
+    def insert(self, writer):
+        """
+        Inserts the contents of another code writer (created with
+        the same global state) in the current location.
+
+        It is ok to write to the inserted writer also after insertion.
+        """
+        assert writer.globalstate is self.globalstate
+        self.buffer.insert(writer.buffer)
+
+    # Properties delegated to function scope
     @funccontext_property
     def label_counter(self): pass
     @funccontext_property
@@ -1803,77 +1803,77 @@ class CCodeWriter(object):
     def return_from_error_cleanup_label(self): pass
     @funccontext_property
     def yield_labels(self): pass
- 
-    # Functions delegated to function scope 
-    def new_label(self, name=None):    return self.funcstate.new_label(name) 
-    def new_error_label(self):         return self.funcstate.new_error_label() 
+
+    # Functions delegated to function scope
+    def new_label(self, name=None):    return self.funcstate.new_label(name)
+    def new_error_label(self):         return self.funcstate.new_error_label()
     def new_yield_label(self, *args):  return self.funcstate.new_yield_label(*args)
-    def get_loop_labels(self):         return self.funcstate.get_loop_labels() 
-    def set_loop_labels(self, labels): return self.funcstate.set_loop_labels(labels) 
-    def new_loop_labels(self):         return self.funcstate.new_loop_labels() 
-    def get_all_labels(self):          return self.funcstate.get_all_labels() 
-    def set_all_labels(self, labels):  return self.funcstate.set_all_labels(labels) 
-    def all_new_labels(self):          return self.funcstate.all_new_labels() 
-    def use_label(self, lbl):          return self.funcstate.use_label(lbl) 
-    def label_used(self, lbl):         return self.funcstate.label_used(lbl) 
- 
- 
+    def get_loop_labels(self):         return self.funcstate.get_loop_labels()
+    def set_loop_labels(self, labels): return self.funcstate.set_loop_labels(labels)
+    def new_loop_labels(self):         return self.funcstate.new_loop_labels()
+    def get_all_labels(self):          return self.funcstate.get_all_labels()
+    def set_all_labels(self, labels):  return self.funcstate.set_all_labels(labels)
+    def all_new_labels(self):          return self.funcstate.all_new_labels()
+    def use_label(self, lbl):          return self.funcstate.use_label(lbl)
+    def label_used(self, lbl):         return self.funcstate.label_used(lbl)
+
+
     def enter_cfunc_scope(self, scope=None):
         self.funcstate = FunctionState(self, scope=scope)
- 
-    def exit_cfunc_scope(self): 
-        self.funcstate = None 
- 
-    # constant handling 
- 
-    def get_py_int(self, str_value, longness): 
-        return self.globalstate.get_int_const(str_value, longness).cname 
- 
-    def get_py_float(self, str_value, value_code): 
-        return self.globalstate.get_float_const(str_value, value_code).cname 
- 
+
+    def exit_cfunc_scope(self):
+        self.funcstate = None
+
+    # constant handling
+
+    def get_py_int(self, str_value, longness):
+        return self.globalstate.get_int_const(str_value, longness).cname
+
+    def get_py_float(self, str_value, value_code):
+        return self.globalstate.get_float_const(str_value, value_code).cname
+
     def get_py_const(self, type, prefix='', cleanup_level=None, dedup_key=None):
         return self.globalstate.get_py_const(type, prefix, cleanup_level, dedup_key).cname
- 
-    def get_string_const(self, text): 
-        return self.globalstate.get_string_const(text).cname 
- 
-    def get_pyunicode_ptr_const(self, text): 
-        return self.globalstate.get_pyunicode_ptr_const(text) 
- 
-    def get_py_string_const(self, text, identifier=None, 
-                            is_str=False, unicode_value=None): 
-        return self.globalstate.get_py_string_const( 
-            text, identifier, is_str, unicode_value).cname 
- 
-    def get_argument_default_const(self, type): 
-        return self.globalstate.get_py_const(type).cname 
- 
-    def intern(self, text): 
-        return self.get_py_string_const(text) 
- 
-    def intern_identifier(self, text): 
-        return self.get_py_string_const(text, identifier=True) 
- 
+
+    def get_string_const(self, text):
+        return self.globalstate.get_string_const(text).cname
+
+    def get_pyunicode_ptr_const(self, text):
+        return self.globalstate.get_pyunicode_ptr_const(text)
+
+    def get_py_string_const(self, text, identifier=None,
+                            is_str=False, unicode_value=None):
+        return self.globalstate.get_py_string_const(
+            text, identifier, is_str, unicode_value).cname
+
+    def get_argument_default_const(self, type):
+        return self.globalstate.get_py_const(type).cname
+
+    def intern(self, text):
+        return self.get_py_string_const(text)
+
+    def intern_identifier(self, text):
+        return self.get_py_string_const(text, identifier=True)
+
     def get_cached_constants_writer(self, target=None):
         return self.globalstate.get_cached_constants_writer(target)
- 
-    # code generation 
- 
-    def putln(self, code="", safe=False): 
+
+    # code generation
+
+    def putln(self, code="", safe=False):
         if self.last_pos and self.bol:
-            self.emit_marker() 
+            self.emit_marker()
         if self.code_config.emit_linenums and self.last_marked_pos:
             source_desc, line, _ = self.last_marked_pos
             self.write('\n#line %s "%s"\n' % (line, source_desc.get_escaped_description()))
-        if code: 
-            if safe: 
-                self.put_safe(code) 
-            else: 
-                self.put(code) 
-        self.write("\n") 
-        self.bol = 1 
- 
+        if code:
+            if safe:
+                self.put_safe(code)
+            else:
+                self.put(code)
+        self.write("\n")
+        self.bol = 1
+
     def mark_pos(self, pos, trace=True):
         if pos is None:
             return
@@ -1881,19 +1881,19 @@ class CCodeWriter(object):
             return
         self.last_pos = (pos, trace)
 
-    def emit_marker(self): 
+    def emit_marker(self):
         pos, trace = self.last_pos
         self.last_marked_pos = pos
         self.last_pos = None
-        self.write("\n") 
+        self.write("\n")
         if self.code_config.emit_code_comments:
             self.indent()
             self.write("/* %s */\n" % self._build_marker(pos))
         if trace and self.funcstate and self.funcstate.can_trace and self.globalstate.directives['linetrace']:
-            self.indent() 
+            self.indent()
             self.write('__Pyx_TraceLine(%d,%d,%s)\n' % (
                 pos[1], not self.funcstate.gil_owned, self.error_goto(pos)))
- 
+
     def _build_marker(self, pos):
         source_desc, line, col = pos
         assert isinstance(source_desc, SourceDescriptor)
@@ -1903,334 +1903,334 @@ class CCodeWriter(object):
         lines += contents[line:line+2]
         return u'"%s":%d\n%s\n' % (source_desc.get_escaped_description(), line, u'\n'.join(lines))
 
-    def put_safe(self, code): 
-        # put code, but ignore {} 
-        self.write(code) 
-        self.bol = 0 
- 
-    def put_or_include(self, code, name): 
-        include_dir = self.globalstate.common_utility_include_dir 
-        if include_dir and len(code) > 1024: 
-            include_file = "%s_%s.h" % ( 
-                name, hashlib.md5(code.encode('utf8')).hexdigest()) 
-            path = os.path.join(include_dir, include_file) 
-            if not os.path.exists(path): 
-                tmp_path = '%s.tmp%s' % (path, os.getpid()) 
+    def put_safe(self, code):
+        # put code, but ignore {}
+        self.write(code)
+        self.bol = 0
+
+    def put_or_include(self, code, name):
+        include_dir = self.globalstate.common_utility_include_dir
+        if include_dir and len(code) > 1024:
+            include_file = "%s_%s.h" % (
+                name, hashlib.md5(code.encode('utf8')).hexdigest())
+            path = os.path.join(include_dir, include_file)
+            if not os.path.exists(path):
+                tmp_path = '%s.tmp%s' % (path, os.getpid())
                 with closing(Utils.open_new_file(tmp_path)) as f:
-                    f.write(code) 
+                    f.write(code)
                 shutil.move(tmp_path, path)
-            code = '#include "%s"\n' % path 
-        self.put(code) 
- 
-    def put(self, code): 
-        fix_indent = False 
-        if "{" in code: 
-            dl = code.count("{") 
-        else: 
-            dl = 0 
-        if "}" in code: 
-            dl -= code.count("}") 
-            if dl < 0: 
-                self.level += dl 
-            elif dl == 0 and code[0] == "}": 
-                # special cases like "} else {" need a temporary dedent 
-                fix_indent = True 
-                self.level -= 1 
-        if self.bol: 
-            self.indent() 
-        self.write(code) 
-        self.bol = 0 
-        if dl > 0: 
-            self.level += dl 
-        elif fix_indent: 
-            self.level += 1 
- 
-    def putln_tempita(self, code, **context): 
-        from ..Tempita import sub 
-        self.putln(sub(code, **context)) 
- 
-    def put_tempita(self, code, **context): 
-        from ..Tempita import sub 
-        self.put(sub(code, **context)) 
- 
-    def increase_indent(self): 
-        self.level += 1 
- 
-    def decrease_indent(self): 
-        self.level -= 1 
- 
-    def begin_block(self): 
-        self.putln("{") 
-        self.increase_indent() 
- 
-    def end_block(self): 
-        self.decrease_indent() 
-        self.putln("}") 
- 
-    def indent(self): 
-        self.write("  " * self.level) 
- 
-    def get_py_version_hex(self, pyversion): 
-        return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4] 
- 
-    def put_label(self, lbl): 
-        if lbl in self.funcstate.labels_used: 
-            self.putln("%s:;" % lbl) 
- 
-    def put_goto(self, lbl): 
-        self.funcstate.use_label(lbl) 
-        self.putln("goto %s;" % lbl) 
- 
-    def put_var_declaration(self, entry, storage_class="", 
-                            dll_linkage=None, definition=True): 
-        #print "Code.put_var_declaration:", entry.name, "definition =", definition ### 
-        if entry.visibility == 'private' and not (definition or entry.defined_in_pxd): 
-            #print "...private and not definition, skipping", entry.cname ### 
-            return 
-        if entry.visibility == "private" and not entry.used: 
-            #print "...private and not used, skipping", entry.cname ### 
-            return 
-        if storage_class: 
-            self.put("%s " % storage_class) 
-        if not entry.cf_used: 
-            self.put('CYTHON_UNUSED ') 
-        self.put(entry.type.declaration_code( 
-            entry.cname, dll_linkage=dll_linkage)) 
-        if entry.init is not None: 
-            self.put_safe(" = %s" % entry.type.literal_code(entry.init)) 
-        elif entry.type.is_pyobject: 
-            self.put(" = NULL") 
-        self.putln(";") 
- 
-    def put_temp_declarations(self, func_context): 
-        for name, type, manage_ref, static in func_context.temps_allocated: 
-            decl = type.declaration_code(name) 
-            if type.is_pyobject: 
-                self.putln("%s = NULL;" % decl) 
-            elif type.is_memoryviewslice: 
-                from . import MemoryView 
-                self.putln("%s = %s;" % (decl, MemoryView.memslice_entry_init)) 
-            else: 
-                self.putln("%s%s;" % (static and "static " or "", decl)) 
- 
-        if func_context.should_declare_error_indicator: 
-            if self.funcstate.uses_error_indicator: 
-                unused = '' 
-            else: 
-                unused = 'CYTHON_UNUSED ' 
-            # Initialize these variables to silence compiler warnings 
-            self.putln("%sint %s = 0;" % (unused, Naming.lineno_cname)) 
-            self.putln("%sconst char *%s = NULL;" % (unused, Naming.filename_cname)) 
-            self.putln("%sint %s = 0;" % (unused, Naming.clineno_cname)) 
- 
+            code = '#include "%s"\n' % path
+        self.put(code)
+
+    def put(self, code):
+        fix_indent = False
+        if "{" in code:
+            dl = code.count("{")
+        else:
+            dl = 0
+        if "}" in code:
+            dl -= code.count("}")
+            if dl < 0:
+                self.level += dl
+            elif dl == 0 and code[0] == "}":
+                # special cases like "} else {" need a temporary dedent
+                fix_indent = True
+                self.level -= 1
+        if self.bol:
+            self.indent()
+        self.write(code)
+        self.bol = 0
+        if dl > 0:
+            self.level += dl
+        elif fix_indent:
+            self.level += 1
+
+    def putln_tempita(self, code, **context):
+        from ..Tempita import sub
+        self.putln(sub(code, **context))
+
+    def put_tempita(self, code, **context):
+        from ..Tempita import sub
+        self.put(sub(code, **context))
+
+    def increase_indent(self):
+        self.level += 1
+
+    def decrease_indent(self):
+        self.level -= 1
+
+    def begin_block(self):
+        self.putln("{")
+        self.increase_indent()
+
+    def end_block(self):
+        self.decrease_indent()
+        self.putln("}")
+
+    def indent(self):
+        self.write("  " * self.level)
+
+    def get_py_version_hex(self, pyversion):
+        return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
+
+    def put_label(self, lbl):
+        if lbl in self.funcstate.labels_used:
+            self.putln("%s:;" % lbl)
+
+    def put_goto(self, lbl):
+        self.funcstate.use_label(lbl)
+        self.putln("goto %s;" % lbl)
+
+    def put_var_declaration(self, entry, storage_class="",
+                            dll_linkage=None, definition=True):
+        #print "Code.put_var_declaration:", entry.name, "definition =", definition ###
+        if entry.visibility == 'private' and not (definition or entry.defined_in_pxd):
+            #print "...private and not definition, skipping", entry.cname ###
+            return
+        if entry.visibility == "private" and not entry.used:
+            #print "...private and not used, skipping", entry.cname ###
+            return
+        if storage_class:
+            self.put("%s " % storage_class)
+        if not entry.cf_used:
+            self.put('CYTHON_UNUSED ')
+        self.put(entry.type.declaration_code(
+            entry.cname, dll_linkage=dll_linkage))
+        if entry.init is not None:
+            self.put_safe(" = %s" % entry.type.literal_code(entry.init))
+        elif entry.type.is_pyobject:
+            self.put(" = NULL")
+        self.putln(";")
+
+    def put_temp_declarations(self, func_context):
+        for name, type, manage_ref, static in func_context.temps_allocated:
+            decl = type.declaration_code(name)
+            if type.is_pyobject:
+                self.putln("%s = NULL;" % decl)
+            elif type.is_memoryviewslice:
+                from . import MemoryView
+                self.putln("%s = %s;" % (decl, MemoryView.memslice_entry_init))
+            else:
+                self.putln("%s%s;" % (static and "static " or "", decl))
+
+        if func_context.should_declare_error_indicator:
+            if self.funcstate.uses_error_indicator:
+                unused = ''
+            else:
+                unused = 'CYTHON_UNUSED '
+            # Initialize these variables to silence compiler warnings
+            self.putln("%sint %s = 0;" % (unused, Naming.lineno_cname))
+            self.putln("%sconst char *%s = NULL;" % (unused, Naming.filename_cname))
+            self.putln("%sint %s = 0;" % (unused, Naming.clineno_cname))
+
     def put_generated_by(self):
         self.putln("/* Generated by Cython %s */" % Version.watermark)
         self.putln("")
 
-    def put_h_guard(self, guard): 
-        self.putln("#ifndef %s" % guard) 
-        self.putln("#define %s" % guard) 
- 
-    def unlikely(self, cond): 
-        if Options.gcc_branch_hints: 
-            return 'unlikely(%s)' % cond 
-        else: 
-            return cond 
- 
-    def build_function_modifiers(self, modifiers, mapper=modifier_output_mapper): 
-        if not modifiers: 
-            return '' 
-        return '%s ' % ' '.join([mapper(m,m) for m in modifiers]) 
- 
-    # Python objects and reference counting 
- 
-    def entry_as_pyobject(self, entry): 
-        type = entry.type 
-        if (not entry.is_self_arg and not entry.type.is_complete() 
-            or entry.type.is_extension_type): 
-            return "(PyObject *)" + entry.cname 
-        else: 
-            return entry.cname 
- 
-    def as_pyobject(self, cname, type): 
-        from .PyrexTypes import py_object_type, typecast 
-        return typecast(py_object_type, type, cname) 
- 
-    def put_gotref(self, cname): 
-        self.putln("__Pyx_GOTREF(%s);" % cname) 
- 
-    def put_giveref(self, cname): 
-        self.putln("__Pyx_GIVEREF(%s);" % cname) 
- 
-    def put_xgiveref(self, cname): 
-        self.putln("__Pyx_XGIVEREF(%s);" % cname) 
- 
-    def put_xgotref(self, cname): 
-        self.putln("__Pyx_XGOTREF(%s);" % cname) 
- 
-    def put_incref(self, cname, type, nanny=True): 
-        if nanny: 
-            self.putln("__Pyx_INCREF(%s);" % self.as_pyobject(cname, type)) 
-        else: 
-            self.putln("Py_INCREF(%s);" % self.as_pyobject(cname, type)) 
- 
-    def put_decref(self, cname, type, nanny=True): 
-        self._put_decref(cname, type, nanny, null_check=False, clear=False) 
- 
-    def put_var_gotref(self, entry): 
-        if entry.type.is_pyobject: 
-            self.putln("__Pyx_GOTREF(%s);" % self.entry_as_pyobject(entry)) 
- 
-    def put_var_giveref(self, entry): 
-        if entry.type.is_pyobject: 
-            self.putln("__Pyx_GIVEREF(%s);" % self.entry_as_pyobject(entry)) 
- 
-    def put_var_xgotref(self, entry): 
-        if entry.type.is_pyobject: 
-            self.putln("__Pyx_XGOTREF(%s);" % self.entry_as_pyobject(entry)) 
- 
-    def put_var_xgiveref(self, entry): 
-        if entry.type.is_pyobject: 
-            self.putln("__Pyx_XGIVEREF(%s);" % self.entry_as_pyobject(entry)) 
- 
+    def put_h_guard(self, guard):
+        self.putln("#ifndef %s" % guard)
+        self.putln("#define %s" % guard)
+
+    def unlikely(self, cond):
+        if Options.gcc_branch_hints:
+            return 'unlikely(%s)' % cond
+        else:
+            return cond
+
+    def build_function_modifiers(self, modifiers, mapper=modifier_output_mapper):
+        if not modifiers:
+            return ''
+        return '%s ' % ' '.join([mapper(m,m) for m in modifiers])
+
+    # Python objects and reference counting
+
+    def entry_as_pyobject(self, entry):
+        type = entry.type
+        if (not entry.is_self_arg and not entry.type.is_complete()
+            or entry.type.is_extension_type):
+            return "(PyObject *)" + entry.cname
+        else:
+            return entry.cname
+
+    def as_pyobject(self, cname, type):
+        from .PyrexTypes import py_object_type, typecast
+        return typecast(py_object_type, type, cname)
+
+    def put_gotref(self, cname):
+        self.putln("__Pyx_GOTREF(%s);" % cname)
+
+    def put_giveref(self, cname):
+        self.putln("__Pyx_GIVEREF(%s);" % cname)
+
+    def put_xgiveref(self, cname):
+        self.putln("__Pyx_XGIVEREF(%s);" % cname)
+
+    def put_xgotref(self, cname):
+        self.putln("__Pyx_XGOTREF(%s);" % cname)
+
+    def put_incref(self, cname, type, nanny=True):
+        if nanny:
+            self.putln("__Pyx_INCREF(%s);" % self.as_pyobject(cname, type))
+        else:
+            self.putln("Py_INCREF(%s);" % self.as_pyobject(cname, type))
+
+    def put_decref(self, cname, type, nanny=True):
+        self._put_decref(cname, type, nanny, null_check=False, clear=False)
+
+    def put_var_gotref(self, entry):
+        if entry.type.is_pyobject:
+            self.putln("__Pyx_GOTREF(%s);" % self.entry_as_pyobject(entry))
+
+    def put_var_giveref(self, entry):
+        if entry.type.is_pyobject:
+            self.putln("__Pyx_GIVEREF(%s);" % self.entry_as_pyobject(entry))
+
+    def put_var_xgotref(self, entry):
+        if entry.type.is_pyobject:
+            self.putln("__Pyx_XGOTREF(%s);" % self.entry_as_pyobject(entry))
+
+    def put_var_xgiveref(self, entry):
+        if entry.type.is_pyobject:
+            self.putln("__Pyx_XGIVEREF(%s);" % self.entry_as_pyobject(entry))
+
     def put_var_incref(self, entry, nanny=True):
-        if entry.type.is_pyobject: 
+        if entry.type.is_pyobject:
             if nanny:
                 self.putln("__Pyx_INCREF(%s);" % self.entry_as_pyobject(entry))
             else:
                 self.putln("Py_INCREF(%s);" % self.entry_as_pyobject(entry))
- 
+
     def put_var_xincref(self, entry):
         if entry.type.is_pyobject:
             self.putln("__Pyx_XINCREF(%s);" % self.entry_as_pyobject(entry))
 
-    def put_decref_clear(self, cname, type, nanny=True, clear_before_decref=False): 
-        self._put_decref(cname, type, nanny, null_check=False, 
-                         clear=True, clear_before_decref=clear_before_decref) 
- 
-    def put_xdecref(self, cname, type, nanny=True, have_gil=True): 
-        self._put_decref(cname, type, nanny, null_check=True, 
-                         have_gil=have_gil, clear=False) 
- 
-    def put_xdecref_clear(self, cname, type, nanny=True, clear_before_decref=False): 
-        self._put_decref(cname, type, nanny, null_check=True, 
-                         clear=True, clear_before_decref=clear_before_decref) 
- 
-    def _put_decref(self, cname, type, nanny=True, null_check=False, 
-                    have_gil=True, clear=False, clear_before_decref=False): 
-        if type.is_memoryviewslice: 
-            self.put_xdecref_memoryviewslice(cname, have_gil=have_gil) 
-            return 
- 
+    def put_decref_clear(self, cname, type, nanny=True, clear_before_decref=False):
+        self._put_decref(cname, type, nanny, null_check=False,
+                         clear=True, clear_before_decref=clear_before_decref)
+
+    def put_xdecref(self, cname, type, nanny=True, have_gil=True):
+        self._put_decref(cname, type, nanny, null_check=True,
+                         have_gil=have_gil, clear=False)
+
+    def put_xdecref_clear(self, cname, type, nanny=True, clear_before_decref=False):
+        self._put_decref(cname, type, nanny, null_check=True,
+                         clear=True, clear_before_decref=clear_before_decref)
+
+    def _put_decref(self, cname, type, nanny=True, null_check=False,
+                    have_gil=True, clear=False, clear_before_decref=False):
+        if type.is_memoryviewslice:
+            self.put_xdecref_memoryviewslice(cname, have_gil=have_gil)
+            return
+
         prefix = '__Pyx' if nanny else 'Py'
         X = 'X' if null_check else ''
- 
-        if clear: 
-            if clear_before_decref: 
-                if not nanny: 
-                    X = ''  # CPython doesn't have a Py_XCLEAR() 
-                self.putln("%s_%sCLEAR(%s);" % (prefix, X, cname)) 
-            else: 
-                self.putln("%s_%sDECREF(%s); %s = 0;" % ( 
-                    prefix, X, self.as_pyobject(cname, type), cname)) 
-        else: 
-            self.putln("%s_%sDECREF(%s);" % ( 
-                prefix, X, self.as_pyobject(cname, type))) 
- 
-    def put_decref_set(self, cname, rhs_cname): 
-        self.putln("__Pyx_DECREF_SET(%s, %s);" % (cname, rhs_cname)) 
- 
-    def put_xdecref_set(self, cname, rhs_cname): 
-        self.putln("__Pyx_XDECREF_SET(%s, %s);" % (cname, rhs_cname)) 
- 
-    def put_var_decref(self, entry): 
-        if entry.type.is_pyobject: 
-            self.putln("__Pyx_XDECREF(%s);" % self.entry_as_pyobject(entry)) 
- 
+
+        if clear:
+            if clear_before_decref:
+                if not nanny:
+                    X = ''  # CPython doesn't have a Py_XCLEAR()
+                self.putln("%s_%sCLEAR(%s);" % (prefix, X, cname))
+            else:
+                self.putln("%s_%sDECREF(%s); %s = 0;" % (
+                    prefix, X, self.as_pyobject(cname, type), cname))
+        else:
+            self.putln("%s_%sDECREF(%s);" % (
+                prefix, X, self.as_pyobject(cname, type)))
+
+    def put_decref_set(self, cname, rhs_cname):
+        self.putln("__Pyx_DECREF_SET(%s, %s);" % (cname, rhs_cname))
+
+    def put_xdecref_set(self, cname, rhs_cname):
+        self.putln("__Pyx_XDECREF_SET(%s, %s);" % (cname, rhs_cname))
+
+    def put_var_decref(self, entry):
+        if entry.type.is_pyobject:
+            self.putln("__Pyx_XDECREF(%s);" % self.entry_as_pyobject(entry))
+
     def put_var_xdecref(self, entry, nanny=True):
-        if entry.type.is_pyobject: 
+        if entry.type.is_pyobject:
             if nanny:
                 self.putln("__Pyx_XDECREF(%s);" % self.entry_as_pyobject(entry))
             else:
                 self.putln("Py_XDECREF(%s);" % self.entry_as_pyobject(entry))
- 
-    def put_var_decref_clear(self, entry): 
-        self._put_var_decref_clear(entry, null_check=False) 
- 
-    def put_var_xdecref_clear(self, entry): 
-        self._put_var_decref_clear(entry, null_check=True) 
- 
-    def _put_var_decref_clear(self, entry, null_check): 
-        if entry.type.is_pyobject: 
-            if entry.in_closure: 
-                # reset before DECREF to make sure closure state is 
-                # consistent during call to DECREF() 
-                self.putln("__Pyx_%sCLEAR(%s);" % ( 
-                    null_check and 'X' or '', 
-                    entry.cname)) 
-            else: 
-                self.putln("__Pyx_%sDECREF(%s); %s = 0;" % ( 
-                    null_check and 'X' or '', 
-                    self.entry_as_pyobject(entry), 
-                    entry.cname)) 
- 
-    def put_var_decrefs(self, entries, used_only = 0): 
-        for entry in entries: 
-            if not used_only or entry.used: 
-                if entry.xdecref_cleanup: 
-                    self.put_var_xdecref(entry) 
-                else: 
-                    self.put_var_decref(entry) 
- 
-    def put_var_xdecrefs(self, entries): 
-        for entry in entries: 
-            self.put_var_xdecref(entry) 
- 
-    def put_var_xdecrefs_clear(self, entries): 
-        for entry in entries: 
-            self.put_var_xdecref_clear(entry) 
- 
-    def put_incref_memoryviewslice(self, slice_cname, have_gil=False): 
-        from . import MemoryView 
-        self.globalstate.use_utility_code(MemoryView.memviewslice_init_code) 
-        self.putln("__PYX_INC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil))) 
- 
-    def put_xdecref_memoryviewslice(self, slice_cname, have_gil=False): 
-        from . import MemoryView 
-        self.globalstate.use_utility_code(MemoryView.memviewslice_init_code) 
-        self.putln("__PYX_XDEC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil))) 
- 
-    def put_xgiveref_memoryviewslice(self, slice_cname): 
-        self.put_xgiveref("%s.memview" % slice_cname) 
- 
-    def put_init_to_py_none(self, cname, type, nanny=True): 
-        from .PyrexTypes import py_object_type, typecast 
-        py_none = typecast(type, py_object_type, "Py_None") 
-        if nanny: 
-            self.putln("%s = %s; __Pyx_INCREF(Py_None);" % (cname, py_none)) 
-        else: 
-            self.putln("%s = %s; Py_INCREF(Py_None);" % (cname, py_none)) 
- 
-    def put_init_var_to_py_none(self, entry, template = "%s", nanny=True): 
-        code = template % entry.cname 
-        #if entry.type.is_extension_type: 
-        #    code = "((PyObject*)%s)" % code 
-        self.put_init_to_py_none(code, entry.type, nanny) 
-        if entry.in_closure: 
-            self.put_giveref('Py_None') 
- 
+
+    def put_var_decref_clear(self, entry):
+        self._put_var_decref_clear(entry, null_check=False)
+
+    def put_var_xdecref_clear(self, entry):
+        self._put_var_decref_clear(entry, null_check=True)
+
+    def _put_var_decref_clear(self, entry, null_check):
+        if entry.type.is_pyobject:
+            if entry.in_closure:
+                # reset before DECREF to make sure closure state is
+                # consistent during call to DECREF()
+                self.putln("__Pyx_%sCLEAR(%s);" % (
+                    null_check and 'X' or '',
+                    entry.cname))
+            else:
+                self.putln("__Pyx_%sDECREF(%s); %s = 0;" % (
+                    null_check and 'X' or '',
+                    self.entry_as_pyobject(entry),
+                    entry.cname))
+
+    def put_var_decrefs(self, entries, used_only = 0):
+        for entry in entries:
+            if not used_only or entry.used:
+                if entry.xdecref_cleanup:
+                    self.put_var_xdecref(entry)
+                else:
+                    self.put_var_decref(entry)
+
+    def put_var_xdecrefs(self, entries):
+        for entry in entries:
+            self.put_var_xdecref(entry)
+
+    def put_var_xdecrefs_clear(self, entries):
+        for entry in entries:
+            self.put_var_xdecref_clear(entry)
+
+    def put_incref_memoryviewslice(self, slice_cname, have_gil=False):
+        from . import MemoryView
+        self.globalstate.use_utility_code(MemoryView.memviewslice_init_code)
+        self.putln("__PYX_INC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil)))
+
+    def put_xdecref_memoryviewslice(self, slice_cname, have_gil=False):
+        from . import MemoryView
+        self.globalstate.use_utility_code(MemoryView.memviewslice_init_code)
+        self.putln("__PYX_XDEC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil)))
+
+    def put_xgiveref_memoryviewslice(self, slice_cname):
+        self.put_xgiveref("%s.memview" % slice_cname)
+
+    def put_init_to_py_none(self, cname, type, nanny=True):
+        from .PyrexTypes import py_object_type, typecast
+        py_none = typecast(type, py_object_type, "Py_None")
+        if nanny:
+            self.putln("%s = %s; __Pyx_INCREF(Py_None);" % (cname, py_none))
+        else:
+            self.putln("%s = %s; Py_INCREF(Py_None);" % (cname, py_none))
+
+    def put_init_var_to_py_none(self, entry, template = "%s", nanny=True):
+        code = template % entry.cname
+        #if entry.type.is_extension_type:
+        #    code = "((PyObject*)%s)" % code
+        self.put_init_to_py_none(code, entry.type, nanny)
+        if entry.in_closure:
+            self.put_giveref('Py_None')
+
     def put_pymethoddef(self, entry, term, allow_skip=True, wrapper_code_writer=None):
-        if entry.is_special or entry.name == '__getattribute__': 
+        if entry.is_special or entry.name == '__getattribute__':
             if entry.name not in special_py_methods:
-                if entry.name == '__getattr__' and not self.globalstate.directives['fast_getattr']: 
-                    pass 
-                # Python's typeobject.c will automatically fill in our slot 
-                # in add_operators() (called by PyType_Ready) with a value 
-                # that's better than ours. 
-                elif allow_skip: 
-                    return 
-
-        method_flags = entry.signature.method_flags() 
+                if entry.name == '__getattr__' and not self.globalstate.directives['fast_getattr']:
+                    pass
+                # Python's typeobject.c will automatically fill in our slot
+                # in add_operators() (called by PyType_Ready) with a value
+                # that's better than ours.
+                elif allow_skip:
+                    return
+
+        method_flags = entry.signature.method_flags()
         if not method_flags:
             return
         if entry.is_special:
@@ -2250,7 +2250,7 @@ class CCodeWriter(object):
                 "|".join(method_flags),
                 entry.doc_cname if entry.doc else '0',
                 term))
- 
+
     def put_pymethoddef_wrapper(self, entry):
         func_cname = entry.func_cname
         if entry.is_special:
@@ -2262,117 +2262,117 @@ class CCodeWriter(object):
                     func_cname, entry.func_cname))
         return func_cname
 
-    # GIL methods 
- 
-    def put_ensure_gil(self, declare_gilstate=True, variable=None): 
-        """ 
-        Acquire the GIL. The generated code is safe even when no PyThreadState 
-        has been allocated for this thread (for threads not initialized by 
-        using the Python API). Additionally, the code generated by this method 
-        may be called recursively. 
-        """ 
-        self.globalstate.use_utility_code( 
-            UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c")) 
+    # GIL methods
+
+    def put_ensure_gil(self, declare_gilstate=True, variable=None):
+        """
+        Acquire the GIL. The generated code is safe even when no PyThreadState
+        has been allocated for this thread (for threads not initialized by
+        using the Python API). Additionally, the code generated by this method
+        may be called recursively.
+        """
+        self.globalstate.use_utility_code(
+            UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
         if self.globalstate.directives['fast_gil']:
           self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
         else:
           self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
-        self.putln("#ifdef WITH_THREAD") 
-        if not variable: 
-            variable = '__pyx_gilstate_save' 
-            if declare_gilstate: 
-                self.put("PyGILState_STATE ") 
+        self.putln("#ifdef WITH_THREAD")
+        if not variable:
+            variable = '__pyx_gilstate_save'
+            if declare_gilstate:
+                self.put("PyGILState_STATE ")
         self.putln("%s = __Pyx_PyGILState_Ensure();" % variable)
-        self.putln("#endif") 
- 
-    def put_release_ensured_gil(self, variable=None): 
-        """ 
-        Releases the GIL, corresponds to `put_ensure_gil`. 
-        """ 
+        self.putln("#endif")
+
+    def put_release_ensured_gil(self, variable=None):
+        """
+        Releases the GIL, corresponds to `put_ensure_gil`.
+        """
         if self.globalstate.directives['fast_gil']:
           self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
         else:
           self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
-        if not variable: 
-            variable = '__pyx_gilstate_save' 
-        self.putln("#ifdef WITH_THREAD") 
+        if not variable:
+            variable = '__pyx_gilstate_save'
+        self.putln("#ifdef WITH_THREAD")
         self.putln("__Pyx_PyGILState_Release(%s);" % variable)
-        self.putln("#endif") 
- 
-    def put_acquire_gil(self, variable=None): 
-        """ 
-        Acquire the GIL. The thread's thread state must have been initialized 
-        by a previous `put_release_gil` 
-        """ 
+        self.putln("#endif")
+
+    def put_acquire_gil(self, variable=None):
+        """
+        Acquire the GIL. The thread's thread state must have been initialized
+        by a previous `put_release_gil`
+        """
         if self.globalstate.directives['fast_gil']:
           self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
         else:
           self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
-        self.putln("#ifdef WITH_THREAD") 
+        self.putln("#ifdef WITH_THREAD")
         self.putln("__Pyx_FastGIL_Forget();")
-        if variable: 
-            self.putln('_save = %s;' % variable) 
-        self.putln("Py_BLOCK_THREADS") 
-        self.putln("#endif") 
- 
-    def put_release_gil(self, variable=None): 
-        "Release the GIL, corresponds to `put_acquire_gil`." 
+        if variable:
+            self.putln('_save = %s;' % variable)
+        self.putln("Py_BLOCK_THREADS")
+        self.putln("#endif")
+
+    def put_release_gil(self, variable=None):
+        "Release the GIL, corresponds to `put_acquire_gil`."
         if self.globalstate.directives['fast_gil']:
           self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
         else:
           self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
-        self.putln("#ifdef WITH_THREAD") 
-        self.putln("PyThreadState *_save;") 
-        self.putln("Py_UNBLOCK_THREADS") 
-        if variable: 
-            self.putln('%s = _save;' % variable) 
+        self.putln("#ifdef WITH_THREAD")
+        self.putln("PyThreadState *_save;")
+        self.putln("Py_UNBLOCK_THREADS")
+        if variable:
+            self.putln('%s = _save;' % variable)
         self.putln("__Pyx_FastGIL_Remember();")
-        self.putln("#endif") 
- 
-    def declare_gilstate(self): 
-        self.putln("#ifdef WITH_THREAD") 
-        self.putln("PyGILState_STATE __pyx_gilstate_save;") 
-        self.putln("#endif") 
- 
-    # error handling 
- 
-    def put_error_if_neg(self, pos, value): 
+        self.putln("#endif")
+
+    def declare_gilstate(self):
+        self.putln("#ifdef WITH_THREAD")
+        self.putln("PyGILState_STATE __pyx_gilstate_save;")
+        self.putln("#endif")
+
+    # error handling
+
+    def put_error_if_neg(self, pos, value):
         # TODO this path is almost _never_ taken, yet this macro makes is slower!
         # return self.putln("if (unlikely(%s < 0)) %s" % (value, self.error_goto(pos)))
-        return self.putln("if (%s < 0) %s" % (value, self.error_goto(pos))) 
- 
-    def put_error_if_unbound(self, pos, entry, in_nogil_context=False): 
-        from . import ExprNodes 
-        if entry.from_closure: 
-            func = '__Pyx_RaiseClosureNameError' 
-            self.globalstate.use_utility_code( 
-                ExprNodes.raise_closure_name_error_utility_code) 
-        elif entry.type.is_memoryviewslice and in_nogil_context: 
-            func = '__Pyx_RaiseUnboundMemoryviewSliceNogil' 
-            self.globalstate.use_utility_code( 
-                ExprNodes.raise_unbound_memoryview_utility_code_nogil) 
-        else: 
-            func = '__Pyx_RaiseUnboundLocalError' 
-            self.globalstate.use_utility_code( 
-                ExprNodes.raise_unbound_local_error_utility_code) 
- 
-        self.putln('if (unlikely(!%s)) { %s("%s"); %s }' % ( 
-                                entry.type.check_for_null_code(entry.cname), 
-                                func, 
-                                entry.name, 
-                                self.error_goto(pos))) 
- 
-    def set_error_info(self, pos, used=False): 
-        self.funcstate.should_declare_error_indicator = True 
-        if used: 
-            self.funcstate.uses_error_indicator = True 
+        return self.putln("if (%s < 0) %s" % (value, self.error_goto(pos)))
+
+    def put_error_if_unbound(self, pos, entry, in_nogil_context=False):
+        from . import ExprNodes
+        if entry.from_closure:
+            func = '__Pyx_RaiseClosureNameError'
+            self.globalstate.use_utility_code(
+                ExprNodes.raise_closure_name_error_utility_code)
+        elif entry.type.is_memoryviewslice and in_nogil_context:
+            func = '__Pyx_RaiseUnboundMemoryviewSliceNogil'
+            self.globalstate.use_utility_code(
+                ExprNodes.raise_unbound_memoryview_utility_code_nogil)
+        else:
+            func = '__Pyx_RaiseUnboundLocalError'
+            self.globalstate.use_utility_code(
+                ExprNodes.raise_unbound_local_error_utility_code)
+
+        self.putln('if (unlikely(!%s)) { %s("%s"); %s }' % (
+                                entry.type.check_for_null_code(entry.cname),
+                                func,
+                                entry.name,
+                                self.error_goto(pos)))
+
+    def set_error_info(self, pos, used=False):
+        self.funcstate.should_declare_error_indicator = True
+        if used:
+            self.funcstate.uses_error_indicator = True
         return "__PYX_MARK_ERR_POS(%s, %s)" % (
-            self.lookup_filename(pos[0]), 
+            self.lookup_filename(pos[0]),
             pos[1])
- 
+
     def error_goto(self, pos, used=True):
-        lbl = self.funcstate.error_label 
-        self.funcstate.use_label(lbl) 
+        lbl = self.funcstate.error_label
+        self.funcstate.use_label(lbl)
         if pos is None:
             return 'goto %s;' % lbl
         self.funcstate.should_declare_error_indicator = True
@@ -2381,217 +2381,217 @@ class CCodeWriter(object):
         return "__PYX_ERR(%s, %s, %s)" % (
             self.lookup_filename(pos[0]),
             pos[1],
-            lbl) 
- 
-    def error_goto_if(self, cond, pos): 
-        return "if (%s) %s" % (self.unlikely(cond), self.error_goto(pos)) 
- 
-    def error_goto_if_null(self, cname, pos): 
-        return self.error_goto_if("!%s" % cname, pos) 
- 
-    def error_goto_if_neg(self, cname, pos): 
-        return self.error_goto_if("%s < 0" % cname, pos) 
- 
-    def error_goto_if_PyErr(self, pos): 
-        return self.error_goto_if("PyErr_Occurred()", pos) 
- 
-    def lookup_filename(self, filename): 
-        return self.globalstate.lookup_filename(filename) 
- 
-    def put_declare_refcount_context(self): 
-        self.putln('__Pyx_RefNannyDeclarations') 
- 
-    def put_setup_refcount_context(self, name, acquire_gil=False): 
-        if acquire_gil: 
-            self.globalstate.use_utility_code( 
-                UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c")) 
-        self.putln('__Pyx_RefNannySetupContext("%s", %d);' % (name, acquire_gil and 1 or 0)) 
- 
-    def put_finish_refcount_context(self): 
-        self.putln("__Pyx_RefNannyFinishContext();") 
- 
+            lbl)
+
+    def error_goto_if(self, cond, pos):
+        return "if (%s) %s" % (self.unlikely(cond), self.error_goto(pos))
+
+    def error_goto_if_null(self, cname, pos):
+        return self.error_goto_if("!%s" % cname, pos)
+
+    def error_goto_if_neg(self, cname, pos):
+        return self.error_goto_if("%s < 0" % cname, pos)
+
+    def error_goto_if_PyErr(self, pos):
+        return self.error_goto_if("PyErr_Occurred()", pos)
+
+    def lookup_filename(self, filename):
+        return self.globalstate.lookup_filename(filename)
+
+    def put_declare_refcount_context(self):
+        self.putln('__Pyx_RefNannyDeclarations')
+
+    def put_setup_refcount_context(self, name, acquire_gil=False):
+        if acquire_gil:
+            self.globalstate.use_utility_code(
+                UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
+        self.putln('__Pyx_RefNannySetupContext("%s", %d);' % (name, acquire_gil and 1 or 0))
+
+    def put_finish_refcount_context(self):
+        self.putln("__Pyx_RefNannyFinishContext();")
+
     def put_add_traceback(self, qualified_name, include_cline=True):
-        """ 
-        Build a Python traceback for propagating exceptions. 
- 
-        qualified_name should be the qualified name of the function. 
-        """ 
-        format_tuple = ( 
-            qualified_name, 
+        """
+        Build a Python traceback for propagating exceptions.
+
+        qualified_name should be the qualified name of the function.
+        """
+        format_tuple = (
+            qualified_name,
             Naming.clineno_cname if include_cline else 0,
-            Naming.lineno_cname, 
-            Naming.filename_cname, 
-        ) 
-        self.funcstate.uses_error_indicator = True 
-        self.putln('__Pyx_AddTraceback("%s", %s, %s, %s);' % format_tuple) 
- 
+            Naming.lineno_cname,
+            Naming.filename_cname,
+        )
+        self.funcstate.uses_error_indicator = True
+        self.putln('__Pyx_AddTraceback("%s", %s, %s, %s);' % format_tuple)
+
     def put_unraisable(self, qualified_name, nogil=False):
-        """ 
-        Generate code to print a Python warning for an unraisable exception. 
- 
-        qualified_name should be the qualified name of the function. 
-        """ 
-        format_tuple = ( 
-            qualified_name, 
-            Naming.clineno_cname, 
-            Naming.lineno_cname, 
-            Naming.filename_cname, 
+        """
+        Generate code to print a Python warning for an unraisable exception.
+
+        qualified_name should be the qualified name of the function.
+        """
+        format_tuple = (
+            qualified_name,
+            Naming.clineno_cname,
+            Naming.lineno_cname,
+            Naming.filename_cname,
             self.globalstate.directives['unraisable_tracebacks'],
             nogil,
-        ) 
-        self.funcstate.uses_error_indicator = True 
+        )
+        self.funcstate.uses_error_indicator = True
         self.putln('__Pyx_WriteUnraisable("%s", %s, %s, %s, %d, %d);' % format_tuple)
-        self.globalstate.use_utility_code( 
-            UtilityCode.load_cached("WriteUnraisableException", "Exceptions.c")) 
- 
-    def put_trace_declarations(self): 
-        self.putln('__Pyx_TraceDeclarations') 
- 
+        self.globalstate.use_utility_code(
+            UtilityCode.load_cached("WriteUnraisableException", "Exceptions.c"))
+
+    def put_trace_declarations(self):
+        self.putln('__Pyx_TraceDeclarations')
+
     def put_trace_frame_init(self, codeobj=None):
         if codeobj:
             self.putln('__Pyx_TraceFrameInit(%s)' % codeobj)
- 
+
     def put_trace_call(self, name, pos, nogil=False):
         self.putln('__Pyx_TraceCall("%s", %s[%s], %s, %d, %s);' % (
             name, Naming.filetable_cname, self.lookup_filename(pos[0]), pos[1], nogil, self.error_goto(pos)))
 
-    def put_trace_exception(self): 
-        self.putln("__Pyx_TraceException();") 
- 
+    def put_trace_exception(self):
+        self.putln("__Pyx_TraceException();")
+
     def put_trace_return(self, retvalue_cname, nogil=False):
         self.putln("__Pyx_TraceReturn(%s, %d);" % (retvalue_cname, nogil))
- 
-    def putln_openmp(self, string): 
-        self.putln("#ifdef _OPENMP") 
-        self.putln(string) 
-        self.putln("#endif /* _OPENMP */") 
- 
-    def undef_builtin_expect(self, cond): 
-        """ 
-        Redefine the macros likely() and unlikely to no-ops, depending on 
-        condition 'cond' 
-        """ 
-        self.putln("#if %s" % cond) 
-        self.putln("    #undef likely") 
-        self.putln("    #undef unlikely") 
-        self.putln("    #define likely(x)   (x)") 
-        self.putln("    #define unlikely(x) (x)") 
-        self.putln("#endif") 
- 
-    def redef_builtin_expect(self, cond): 
-        self.putln("#if %s" % cond) 
-        self.putln("    #undef likely") 
-        self.putln("    #undef unlikely") 
-        self.putln("    #define likely(x)   __builtin_expect(!!(x), 1)") 
-        self.putln("    #define unlikely(x) __builtin_expect(!!(x), 0)") 
-        self.putln("#endif") 
- 
-
-class PyrexCodeWriter(object): 
-    # f                file      output file 
-    # level            int       indentation level 
- 
-    def __init__(self, outfile_name): 
-        self.f = Utils.open_new_file(outfile_name) 
-        self.level = 0 
- 
-    def putln(self, code): 
-        self.f.write("%s%s\n" % (" " * self.level, code)) 
- 
-    def indent(self): 
-        self.level += 1 
- 
-    def dedent(self): 
-        self.level -= 1 
- 
-class PyxCodeWriter(object): 
-    """ 
-    Can be used for writing out some Cython code. To use the indenter 
-    functionality, the Cython.Compiler.Importer module will have to be used 
-    to load the code to support python 2.4 
-    """ 
- 
-    def __init__(self, buffer=None, indent_level=0, context=None, encoding='ascii'): 
-        self.buffer = buffer or StringIOTree() 
-        self.level = indent_level 
-        self.context = context 
-        self.encoding = encoding 
- 
-    def indent(self, levels=1): 
-        self.level += levels 
-        return True 
- 
-    def dedent(self, levels=1): 
-        self.level -= levels 
- 
-    def indenter(self, line): 
-        """ 
-        Instead of 
- 
-            with pyx_code.indenter("for i in range(10):"): 
-                pyx_code.putln("print i") 
- 
-        write 
- 
-            if pyx_code.indenter("for i in range(10);"): 
-                pyx_code.putln("print i") 
-                pyx_code.dedent() 
-        """ 
-        self.putln(line) 
-        self.indent() 
-        return True 
- 
-    def getvalue(self): 
-        result = self.buffer.getvalue() 
+
+    def putln_openmp(self, string):
+        self.putln("#ifdef _OPENMP")
+        self.putln(string)
+        self.putln("#endif /* _OPENMP */")
+
+    def undef_builtin_expect(self, cond):
+        """
+        Redefine the macros likely() and unlikely to no-ops, depending on
+        condition 'cond'
+        """
+        self.putln("#if %s" % cond)
+        self.putln("    #undef likely")
+        self.putln("    #undef unlikely")
+        self.putln("    #define likely(x)   (x)")
+        self.putln("    #define unlikely(x) (x)")
+        self.putln("#endif")
+
+    def redef_builtin_expect(self, cond):
+        self.putln("#if %s" % cond)
+        self.putln("    #undef likely")
+        self.putln("    #undef unlikely")
+        self.putln("    #define likely(x)   __builtin_expect(!!(x), 1)")
+        self.putln("    #define unlikely(x) __builtin_expect(!!(x), 0)")
+        self.putln("#endif")
+
+
+class PyrexCodeWriter(object):
+    # f                file      output file
+    # level            int       indentation level
+
+    def __init__(self, outfile_name):
+        self.f = Utils.open_new_file(outfile_name)
+        self.level = 0
+
+    def putln(self, code):
+        self.f.write("%s%s\n" % (" " * self.level, code))
+
+    def indent(self):
+        self.level += 1
+
+    def dedent(self):
+        self.level -= 1
+
+class PyxCodeWriter(object):
+    """
+    Can be used for writing out some Cython code. To use the indenter
+    functionality, the Cython.Compiler.Importer module will have to be used
+    to load the code to support python 2.4
+    """
+
+    def __init__(self, buffer=None, indent_level=0, context=None, encoding='ascii'):
+        self.buffer = buffer or StringIOTree()
+        self.level = indent_level
+        self.context = context
+        self.encoding = encoding
+
+    def indent(self, levels=1):
+        self.level += levels
+        return True
+
+    def dedent(self, levels=1):
+        self.level -= levels
+
+    def indenter(self, line):
+        """
+        Instead of
+
+            with pyx_code.indenter("for i in range(10):"):
+                pyx_code.putln("print i")
+
+        write
+
+            if pyx_code.indenter("for i in range(10);"):
+                pyx_code.putln("print i")
+                pyx_code.dedent()
+        """
+        self.putln(line)
+        self.indent()
+        return True
+
+    def getvalue(self):
+        result = self.buffer.getvalue()
         if isinstance(result, bytes):
-            result = result.decode(self.encoding) 
-        return result 
- 
-    def putln(self, line, context=None): 
-        context = context or self.context 
-        if context: 
-            line = sub_tempita(line, context) 
-        self._putln(line) 
- 
-    def _putln(self, line): 
-        self.buffer.write("%s%s\n" % (self.level * "    ", line)) 
- 
-    def put_chunk(self, chunk, context=None): 
-        context = context or self.context 
-        if context: 
-            chunk = sub_tempita(chunk, context) 
- 
-        chunk = textwrap.dedent(chunk) 
-        for line in chunk.splitlines(): 
-            self._putln(line) 
- 
-    def insertion_point(self): 
-        return PyxCodeWriter(self.buffer.insertion_point(), self.level, 
-                             self.context) 
- 
-    def named_insertion_point(self, name): 
-        setattr(self, name, self.insertion_point()) 
- 
- 
-class ClosureTempAllocator(object): 
-    def __init__(self, klass): 
-        self.klass = klass 
-        self.temps_allocated = {} 
-        self.temps_free = {} 
-        self.temps_count = 0 
- 
-    def reset(self): 
-        for type, cnames in self.temps_allocated.items(): 
-            self.temps_free[type] = list(cnames) 
- 
-    def allocate_temp(self, type): 
+            result = result.decode(self.encoding)
+        return result
+
+    def putln(self, line, context=None):
+        context = context or self.context
+        if context:
+            line = sub_tempita(line, context)
+        self._putln(line)
+
+    def _putln(self, line):
+        self.buffer.write("%s%s\n" % (self.level * "    ", line))
+
+    def put_chunk(self, chunk, context=None):
+        context = context or self.context
+        if context:
+            chunk = sub_tempita(chunk, context)
+
+        chunk = textwrap.dedent(chunk)
+        for line in chunk.splitlines():
+            self._putln(line)
+
+    def insertion_point(self):
+        return PyxCodeWriter(self.buffer.insertion_point(), self.level,
+                             self.context)
+
+    def named_insertion_point(self, name):
+        setattr(self, name, self.insertion_point())
+
+
+class ClosureTempAllocator(object):
+    def __init__(self, klass):
+        self.klass = klass
+        self.temps_allocated = {}
+        self.temps_free = {}
+        self.temps_count = 0
+
+    def reset(self):
+        for type, cnames in self.temps_allocated.items():
+            self.temps_free[type] = list(cnames)
+
+    def allocate_temp(self, type):
         if type not in self.temps_allocated:
-            self.temps_allocated[type] = [] 
-            self.temps_free[type] = [] 
-        elif self.temps_free[type]: 
-            return self.temps_free[type].pop(0) 
-        cname = '%s%d' % (Naming.codewriter_temp_prefix, self.temps_count) 
-        self.klass.declare_var(pos=None, name=cname, cname=cname, type=type, is_cdef=True) 
-        self.temps_allocated[type].append(cname) 
-        self.temps_count += 1 
-        return cname 
+            self.temps_allocated[type] = []
+            self.temps_free[type] = []
+        elif self.temps_free[type]:
+            return self.temps_free[type].pop(0)
+        cname = '%s%d' % (Naming.codewriter_temp_prefix, self.temps_count)
+        self.klass.declare_var(pos=None, name=cname, cname=cname, type=type, is_cdef=True)
+        self.temps_allocated[type].append(cname)
+        self.temps_count += 1
+        return cname
diff --git a/contrib/tools/cython/Cython/Compiler/CodeGeneration.py b/contrib/tools/cython/Cython/Compiler/CodeGeneration.py
index 30c464db00..e64049c7f5 100644
--- a/contrib/tools/cython/Cython/Compiler/CodeGeneration.py
+++ b/contrib/tools/cython/Cython/Compiler/CodeGeneration.py
@@ -1,35 +1,35 @@
-from __future__ import absolute_import 
- 
-from .Visitor import VisitorTransform 
-from .Nodes import StatListNode 
- 
- 
-class ExtractPxdCode(VisitorTransform): 
-    """ 
-    Finds nodes in a pxd file that should generate code, and 
-    returns them in a StatListNode. 
- 
-    The result is a tuple (StatListNode, ModuleScope), i.e. 
-    everything that is needed from the pxd after it is processed. 
- 
+from __future__ import absolute_import
+
+from .Visitor import VisitorTransform
+from .Nodes import StatListNode
+
+
+class ExtractPxdCode(VisitorTransform):
+    """
+    Finds nodes in a pxd file that should generate code, and
+    returns them in a StatListNode.
+
+    The result is a tuple (StatListNode, ModuleScope), i.e.
+    everything that is needed from the pxd after it is processed.
+
     A purer approach would be to separately compile the pxd code,
-    but the result would have to be slightly more sophisticated 
-    than pure strings (functions + wanted interned strings + 
-    wanted utility code + wanted cached objects) so for now this 
-    approach is taken. 
-    """ 
- 
-    def __call__(self, root): 
-        self.funcs = [] 
-        self.visitchildren(root) 
-        return (StatListNode(root.pos, stats=self.funcs), root.scope) 
- 
-    def visit_FuncDefNode(self, node): 
-        self.funcs.append(node) 
-        # Do not visit children, nested funcdefnodes will 
-        # also be moved by this action... 
-        return node 
- 
-    def visit_Node(self, node): 
-        self.visitchildren(node) 
-        return node 
+    but the result would have to be slightly more sophisticated
+    than pure strings (functions + wanted interned strings +
+    wanted utility code + wanted cached objects) so for now this
+    approach is taken.
+    """
+
+    def __call__(self, root):
+        self.funcs = []
+        self.visitchildren(root)
+        return (StatListNode(root.pos, stats=self.funcs), root.scope)
+
+    def visit_FuncDefNode(self, node):
+        self.funcs.append(node)
+        # Do not visit children, nested funcdefnodes will
+        # also be moved by this action...
+        return node
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+        return node
diff --git a/contrib/tools/cython/Cython/Compiler/CythonScope.py b/contrib/tools/cython/Cython/Compiler/CythonScope.py
index 692654a83e..1c25d1a6b4 100644
--- a/contrib/tools/cython/Cython/Compiler/CythonScope.py
+++ b/contrib/tools/cython/Cython/Compiler/CythonScope.py
@@ -1,164 +1,164 @@
-from __future__ import absolute_import 
- 
-from .Symtab import ModuleScope 
-from .PyrexTypes import * 
-from .UtilityCode import CythonUtilityCode 
-from .Errors import error 
-from .Scanning import StringSourceDescriptor 
-from . import MemoryView 
- 
- 
-class CythonScope(ModuleScope): 
-    is_cython_builtin = 1 
-    _cythonscope_initialized = False 
- 
-    def __init__(self, context): 
-        ModuleScope.__init__(self, u'cython', None, None) 
-        self.pxd_file_loaded = True 
-        self.populate_cython_scope() 
-        # The Main.Context object 
-        self.context = context 
- 
-        for fused_type in (cy_integral_type, cy_floating_type, cy_numeric_type): 
-            entry = self.declare_typedef(fused_type.name, 
-                                         fused_type, 
-                                         None, 
-                                         cname='<error>') 
-            entry.in_cinclude = True 
- 
+from __future__ import absolute_import
+
+from .Symtab import ModuleScope
+from .PyrexTypes import *
+from .UtilityCode import CythonUtilityCode
+from .Errors import error
+from .Scanning import StringSourceDescriptor
+from . import MemoryView
+
+
+class CythonScope(ModuleScope):
+    is_cython_builtin = 1
+    _cythonscope_initialized = False
+
+    def __init__(self, context):
+        ModuleScope.__init__(self, u'cython', None, None)
+        self.pxd_file_loaded = True
+        self.populate_cython_scope()
+        # The Main.Context object
+        self.context = context
+
+        for fused_type in (cy_integral_type, cy_floating_type, cy_numeric_type):
+            entry = self.declare_typedef(fused_type.name,
+                                         fused_type,
+                                         None,
+                                         cname='<error>')
+            entry.in_cinclude = True
+
     def is_cpp(self):
         # Allow C++ utility code in C++ contexts.
         return self.context.cpp
 
-    def lookup_type(self, name): 
-        # This function should go away when types are all first-level objects. 
-        type = parse_basic_type(name) 
-        if type: 
-            return type 
- 
-        return super(CythonScope, self).lookup_type(name) 
- 
-    def lookup(self, name): 
-        entry = super(CythonScope, self).lookup(name) 
- 
-        if entry is None and not self._cythonscope_initialized: 
-            self.load_cythonscope() 
-            entry = super(CythonScope, self).lookup(name) 
- 
-        return entry 
- 
-    def find_module(self, module_name, pos): 
-        error("cython.%s is not available" % module_name, pos) 
- 
-    def find_submodule(self, module_name): 
-        entry = self.entries.get(module_name, None) 
-        if not entry: 
-            self.load_cythonscope() 
-            entry = self.entries.get(module_name, None) 
- 
-        if entry and entry.as_module: 
-            return entry.as_module 
-        else: 
-            # TODO: fix find_submodule control flow so that we're not 
-            # expected to create a submodule here (to protect CythonScope's 
-            # possible immutability). Hack ourselves out of the situation 
-            # for now. 
-            raise error((StringSourceDescriptor(u"cython", u""), 0, 0), 
-                  "cython.%s is not available" % module_name) 
- 
-    def lookup_qualified_name(self, qname): 
-        # ExprNode.as_cython_attribute generates qnames and we untangle it here... 
-        name_path = qname.split(u'.') 
-        scope = self 
-        while len(name_path) > 1: 
+    def lookup_type(self, name):
+        # This function should go away when types are all first-level objects.
+        type = parse_basic_type(name)
+        if type:
+            return type
+
+        return super(CythonScope, self).lookup_type(name)
+
+    def lookup(self, name):
+        entry = super(CythonScope, self).lookup(name)
+
+        if entry is None and not self._cythonscope_initialized:
+            self.load_cythonscope()
+            entry = super(CythonScope, self).lookup(name)
+
+        return entry
+
+    def find_module(self, module_name, pos):
+        error("cython.%s is not available" % module_name, pos)
+
+    def find_submodule(self, module_name):
+        entry = self.entries.get(module_name, None)
+        if not entry:
+            self.load_cythonscope()
+            entry = self.entries.get(module_name, None)
+
+        if entry and entry.as_module:
+            return entry.as_module
+        else:
+            # TODO: fix find_submodule control flow so that we're not
+            # expected to create a submodule here (to protect CythonScope's
+            # possible immutability). Hack ourselves out of the situation
+            # for now.
+            raise error((StringSourceDescriptor(u"cython", u""), 0, 0),
+                  "cython.%s is not available" % module_name)
+
+    def lookup_qualified_name(self, qname):
+        # ExprNode.as_cython_attribute generates qnames and we untangle it here...
+        name_path = qname.split(u'.')
+        scope = self
+        while len(name_path) > 1:
             scope = scope.lookup_here(name_path[0])
             if scope:
                 scope = scope.as_module
-            del name_path[0] 
-            if scope is None: 
-                return None 
-        else: 
-            return scope.lookup_here(name_path[0]) 
- 
-    def populate_cython_scope(self): 
-        # These are used to optimize isinstance in FinalOptimizePhase 
-        type_object = self.declare_typedef( 
-            'PyTypeObject', 
-            base_type = c_void_type, 
-            pos = None, 
-            cname = 'PyTypeObject') 
-        type_object.is_void = True 
-        type_object_type = type_object.type 
- 
-        self.declare_cfunction( 
-            'PyObject_TypeCheck', 
-            CFuncType(c_bint_type, [CFuncTypeArg("o", py_object_type, None), 
-                                    CFuncTypeArg("t", c_ptr_type(type_object_type), None)]), 
-            pos = None, 
-            defining = 1, 
-            cname = 'PyObject_TypeCheck') 
- 
-    def load_cythonscope(self): 
-        """ 
-        Creates some entries for testing purposes and entries for 
-        cython.array() and for cython.view.*. 
-        """ 
-        if self._cythonscope_initialized: 
-            return 
- 
-        self._cythonscope_initialized = True 
-        cython_testscope_utility_code.declare_in_scope( 
-                                self, cython_scope=self) 
-        cython_test_extclass_utility_code.declare_in_scope( 
-                                    self, cython_scope=self) 
- 
-        # 
-        # The view sub-scope 
-        # 
-        self.viewscope = viewscope = ModuleScope(u'view', self, None) 
-        self.declare_module('view', viewscope, None).as_module = viewscope 
-        viewscope.is_cython_builtin = True 
-        viewscope.pxd_file_loaded = True 
- 
-        cythonview_testscope_utility_code.declare_in_scope( 
-                                            viewscope, cython_scope=self) 
- 
-        view_utility_scope = MemoryView.view_utility_code.declare_in_scope( 
-                                            self.viewscope, cython_scope=self, 
-                                            whitelist=MemoryView.view_utility_whitelist) 
- 
-        # self.entries["array"] = view_utility_scope.entries.pop("array") 
- 
- 
-def create_cython_scope(context): 
-    # One could in fact probably make it a singleton, 
-    # but not sure yet whether any code mutates it (which would kill reusing 
-    # it across different contexts) 
-    return CythonScope(context) 
- 
-# Load test utilities for the cython scope 
- 
-def load_testscope_utility(cy_util_name, **kwargs): 
-    return CythonUtilityCode.load(cy_util_name, "TestCythonScope.pyx", **kwargs) 
- 
- 
-undecorated_methods_protos = UtilityCode(proto=u""" 
-    /* These methods are undecorated and have therefore no prototype */ 
-    static PyObject *__pyx_TestClass_cdef_method( 
-            struct __pyx_TestClass_obj *self, int value); 
-    static PyObject *__pyx_TestClass_cpdef_method( 
-            struct __pyx_TestClass_obj *self, int value, int skip_dispatch); 
-    static PyObject *__pyx_TestClass_def_method( 
-            PyObject *self, PyObject *value); 
-""") 
- 
-cython_testscope_utility_code = load_testscope_utility("TestScope") 
- 
-test_cython_utility_dep = load_testscope_utility("TestDep") 
- 
-cython_test_extclass_utility_code = \ 
-    load_testscope_utility("TestClass", name="TestClass", 
-                           requires=[undecorated_methods_protos, 
-                                     test_cython_utility_dep]) 
- 
-cythonview_testscope_utility_code = load_testscope_utility("View.TestScope") 
+            del name_path[0]
+            if scope is None:
+                return None
+        else:
+            return scope.lookup_here(name_path[0])
+
+    def populate_cython_scope(self):
+        # These are used to optimize isinstance in FinalOptimizePhase
+        type_object = self.declare_typedef(
+            'PyTypeObject',
+            base_type = c_void_type,
+            pos = None,
+            cname = 'PyTypeObject')
+        type_object.is_void = True
+        type_object_type = type_object.type
+
+        self.declare_cfunction(
+            'PyObject_TypeCheck',
+            CFuncType(c_bint_type, [CFuncTypeArg("o", py_object_type, None),
+                                    CFuncTypeArg("t", c_ptr_type(type_object_type), None)]),
+            pos = None,
+            defining = 1,
+            cname = 'PyObject_TypeCheck')
+
+    def load_cythonscope(self):
+        """
+        Creates some entries for testing purposes and entries for
+        cython.array() and for cython.view.*.
+        """
+        if self._cythonscope_initialized:
+            return
+
+        self._cythonscope_initialized = True
+        cython_testscope_utility_code.declare_in_scope(
+                                self, cython_scope=self)
+        cython_test_extclass_utility_code.declare_in_scope(
+                                    self, cython_scope=self)
+
+        #
+        # The view sub-scope
+        #
+        self.viewscope = viewscope = ModuleScope(u'view', self, None)
+        self.declare_module('view', viewscope, None).as_module = viewscope
+        viewscope.is_cython_builtin = True
+        viewscope.pxd_file_loaded = True
+
+        cythonview_testscope_utility_code.declare_in_scope(
+                                            viewscope, cython_scope=self)
+
+        view_utility_scope = MemoryView.view_utility_code.declare_in_scope(
+                                            self.viewscope, cython_scope=self,
+                                            whitelist=MemoryView.view_utility_whitelist)
+
+        # self.entries["array"] = view_utility_scope.entries.pop("array")
+
+
+def create_cython_scope(context):
+    # One could in fact probably make it a singleton,
+    # but not sure yet whether any code mutates it (which would kill reusing
+    # it across different contexts)
+    return CythonScope(context)
+
+# Load test utilities for the cython scope
+
+def load_testscope_utility(cy_util_name, **kwargs):
+    return CythonUtilityCode.load(cy_util_name, "TestCythonScope.pyx", **kwargs)
+
+
+undecorated_methods_protos = UtilityCode(proto=u"""
+    /* These methods are undecorated and have therefore no prototype */
+    static PyObject *__pyx_TestClass_cdef_method(
+            struct __pyx_TestClass_obj *self, int value);
+    static PyObject *__pyx_TestClass_cpdef_method(
+            struct __pyx_TestClass_obj *self, int value, int skip_dispatch);
+    static PyObject *__pyx_TestClass_def_method(
+            PyObject *self, PyObject *value);
+""")
+
+cython_testscope_utility_code = load_testscope_utility("TestScope")
+
+test_cython_utility_dep = load_testscope_utility("TestDep")
+
+cython_test_extclass_utility_code = \
+    load_testscope_utility("TestClass", name="TestClass",
+                           requires=[undecorated_methods_protos,
+                                     test_cython_utility_dep])
+
+cythonview_testscope_utility_code = load_testscope_utility("View.TestScope")
diff --git a/contrib/tools/cython/Cython/Compiler/DebugFlags.py b/contrib/tools/cython/Cython/Compiler/DebugFlags.py
index 031589dd69..e830ab1849 100644
--- a/contrib/tools/cython/Cython/Compiler/DebugFlags.py
+++ b/contrib/tools/cython/Cython/Compiler/DebugFlags.py
@@ -1,21 +1,21 @@
-# Can be enabled at the command line with --debug-xxx. 
- 
-debug_disposal_code = 0 
-debug_temp_alloc = 0 
-debug_coercion = 0 
- 
-# Write comments into the C code that show where temporary variables 
-# are allocated and released. 
-debug_temp_code_comments = 0 
- 
-# Write a call trace of the code generation phase into the C code. 
-debug_trace_code_generation = 0 
- 
-# Do not replace exceptions with user-friendly error messages. 
-debug_no_exception_intercept = 0 
- 
-# Print a message each time a new stage in the pipeline is entered. 
-debug_verbose_pipeline = 0 
- 
-# Raise an exception when an error is encountered. 
-debug_exception_on_error = 0 
+# Can be enabled at the command line with --debug-xxx.
+
+debug_disposal_code = 0
+debug_temp_alloc = 0
+debug_coercion = 0
+
+# Write comments into the C code that show where temporary variables
+# are allocated and released.
+debug_temp_code_comments = 0
+
+# Write a call trace of the code generation phase into the C code.
+debug_trace_code_generation = 0
+
+# Do not replace exceptions with user-friendly error messages.
+debug_no_exception_intercept = 0
+
+# Print a message each time a new stage in the pipeline is entered.
+debug_verbose_pipeline = 0
+
+# Raise an exception when an error is encountered.
+debug_exception_on_error = 0
diff --git a/contrib/tools/cython/Cython/Compiler/Errors.py b/contrib/tools/cython/Cython/Compiler/Errors.py
index 82fe19a0e7..9761b52c32 100644
--- a/contrib/tools/cython/Cython/Compiler/Errors.py
+++ b/contrib/tools/cython/Cython/Compiler/Errors.py
@@ -1,250 +1,250 @@
-# 
-#   Errors 
-# 
- 
-from __future__ import absolute_import 
- 
+#
+#   Errors
+#
+
+from __future__ import absolute_import
+
 try:
     from __builtin__ import basestring as any_string_type
 except ImportError:
     any_string_type = (bytes, str)
 
-import sys 
+import sys
 from contextlib import contextmanager
- 
-from ..Utils import open_new_file 
-from . import DebugFlags 
-from . import Options 
- 
- 
-class PyrexError(Exception): 
-    pass 
- 
- 
-class PyrexWarning(Exception): 
-    pass 
- 
- 
-def context(position): 
-    source = position[0] 
+
+from ..Utils import open_new_file
+from . import DebugFlags
+from . import Options
+
+
+class PyrexError(Exception):
+    pass
+
+
+class PyrexWarning(Exception):
+    pass
+
+
+def context(position):
+    source = position[0]
     assert not (isinstance(source, any_string_type)), (
-        "Please replace filename strings with Scanning.FileSourceDescriptor instances %r" % source) 
-    try: 
-        F = source.get_lines() 
-    except UnicodeDecodeError: 
-        # file has an encoding problem 
-        s = u"[unprintable code]\n" 
-    else: 
-        s = u''.join(F[max(0, position[1]-6):position[1]]) 
-        s = u'...\n%s%s^\n' % (s, u' '*(position[2]-1)) 
-    s = u'%s\n%s%s\n' % (u'-'*60, s, u'-'*60) 
-    return s 
- 
-def format_position(position): 
-    if position: 
-        return u"%s:%d:%d: " % (position[0].get_error_description(), 
-                                position[1], position[2]) 
-    return u'' 
- 
-def format_error(message, position): 
-    if position: 
-        pos_str = format_position(position) 
-        cont = context(position) 
-        message = u'\nError compiling Cython file:\n%s\n%s%s' % (cont, pos_str, message or u'') 
-    return message 
- 
-class CompileError(PyrexError): 
- 
-    def __init__(self, position = None, message = u""): 
-        self.position = position 
-        self.message_only = message 
-        self.formatted_message = format_error(message, position) 
-        self.reported = False 
-    # Deprecated and withdrawn in 2.6: 
-    #   self.message = message 
-        Exception.__init__(self, self.formatted_message) 
-        # Python Exception subclass pickling is broken, 
-        # see http://bugs.python.org/issue1692335 
-        self.args = (position, message) 
- 
-    def __str__(self): 
-        return self.formatted_message 
- 
-class CompileWarning(PyrexWarning): 
- 
-    def __init__(self, position = None, message = ""): 
-        self.position = position 
-    # Deprecated and withdrawn in 2.6: 
-    #   self.message = message 
-        Exception.__init__(self, format_position(position) + message) 
- 
-class InternalError(Exception): 
-    # If this is ever raised, there is a bug in the compiler. 
- 
-    def __init__(self, message): 
-        self.message_only = message 
-        Exception.__init__(self, u"Internal compiler error: %s" 
-            % message) 
- 
-class AbortError(Exception): 
-    # Throw this to stop the compilation immediately. 
- 
-    def __init__(self, message): 
-        self.message_only = message 
-        Exception.__init__(self, u"Abort error: %s" % message) 
- 
-class CompilerCrash(CompileError): 
-    # raised when an unexpected exception occurs in a transform 
-    def __init__(self, pos, context, message, cause, stacktrace=None): 
-        if message: 
-            message = u'\n' + message 
-        else: 
-            message = u'\n' 
-        self.message_only = message 
-        if context: 
-            message = u"Compiler crash in %s%s" % (context, message) 
-        if stacktrace: 
-            import traceback 
-            message += ( 
-                u'\n\nCompiler crash traceback from this point on:\n' + 
-                u''.join(traceback.format_tb(stacktrace))) 
-        if cause: 
-            if not stacktrace: 
-                message += u'\n' 
-            message += u'%s: %s' % (cause.__class__.__name__, cause) 
-        CompileError.__init__(self, pos, message) 
-        # Python Exception subclass pickling is broken, 
-        # see http://bugs.python.org/issue1692335 
-        self.args = (pos, context, message, cause, stacktrace) 
- 
-class NoElementTreeInstalledException(PyrexError): 
-    """raised when the user enabled options.gdb_debug but no ElementTree 
-    implementation was found 
-    """ 
- 
-listing_file = None 
-num_errors = 0 
-echo_file = None 
- 
-def open_listing_file(path, echo_to_stderr = 1): 
-    # Begin a new error listing. If path is None, no file 
-    # is opened, the error counter is just reset. 
-    global listing_file, num_errors, echo_file 
-    if path is not None: 
-        listing_file = open_new_file(path) 
-    else: 
-        listing_file = None 
-    if echo_to_stderr: 
-        echo_file = sys.stderr 
-    else: 
-        echo_file = None 
-    num_errors = 0 
- 
-def close_listing_file(): 
-    global listing_file 
-    if listing_file: 
-        listing_file.close() 
-        listing_file = None 
- 
+        "Please replace filename strings with Scanning.FileSourceDescriptor instances %r" % source)
+    try:
+        F = source.get_lines()
+    except UnicodeDecodeError:
+        # file has an encoding problem
+        s = u"[unprintable code]\n"
+    else:
+        s = u''.join(F[max(0, position[1]-6):position[1]])
+        s = u'...\n%s%s^\n' % (s, u' '*(position[2]-1))
+    s = u'%s\n%s%s\n' % (u'-'*60, s, u'-'*60)
+    return s
+
+def format_position(position):
+    if position:
+        return u"%s:%d:%d: " % (position[0].get_error_description(),
+                                position[1], position[2])
+    return u''
+
+def format_error(message, position):
+    if position:
+        pos_str = format_position(position)
+        cont = context(position)
+        message = u'\nError compiling Cython file:\n%s\n%s%s' % (cont, pos_str, message or u'')
+    return message
+
+class CompileError(PyrexError):
+
+    def __init__(self, position = None, message = u""):
+        self.position = position
+        self.message_only = message
+        self.formatted_message = format_error(message, position)
+        self.reported = False
+    # Deprecated and withdrawn in 2.6:
+    #   self.message = message
+        Exception.__init__(self, self.formatted_message)
+        # Python Exception subclass pickling is broken,
+        # see http://bugs.python.org/issue1692335
+        self.args = (position, message)
+
+    def __str__(self):
+        return self.formatted_message
+
+class CompileWarning(PyrexWarning):
+
+    def __init__(self, position = None, message = ""):
+        self.position = position
+    # Deprecated and withdrawn in 2.6:
+    #   self.message = message
+        Exception.__init__(self, format_position(position) + message)
+
+class InternalError(Exception):
+    # If this is ever raised, there is a bug in the compiler.
+
+    def __init__(self, message):
+        self.message_only = message
+        Exception.__init__(self, u"Internal compiler error: %s"
+            % message)
+
+class AbortError(Exception):
+    # Throw this to stop the compilation immediately.
+
+    def __init__(self, message):
+        self.message_only = message
+        Exception.__init__(self, u"Abort error: %s" % message)
+
+class CompilerCrash(CompileError):
+    # raised when an unexpected exception occurs in a transform
+    def __init__(self, pos, context, message, cause, stacktrace=None):
+        if message:
+            message = u'\n' + message
+        else:
+            message = u'\n'
+        self.message_only = message
+        if context:
+            message = u"Compiler crash in %s%s" % (context, message)
+        if stacktrace:
+            import traceback
+            message += (
+                u'\n\nCompiler crash traceback from this point on:\n' +
+                u''.join(traceback.format_tb(stacktrace)))
+        if cause:
+            if not stacktrace:
+                message += u'\n'
+            message += u'%s: %s' % (cause.__class__.__name__, cause)
+        CompileError.__init__(self, pos, message)
+        # Python Exception subclass pickling is broken,
+        # see http://bugs.python.org/issue1692335
+        self.args = (pos, context, message, cause, stacktrace)
+
+class NoElementTreeInstalledException(PyrexError):
+    """raised when the user enabled options.gdb_debug but no ElementTree
+    implementation was found
+    """
+
+listing_file = None
+num_errors = 0
+echo_file = None
+
+def open_listing_file(path, echo_to_stderr = 1):
+    # Begin a new error listing. If path is None, no file
+    # is opened, the error counter is just reset.
+    global listing_file, num_errors, echo_file
+    if path is not None:
+        listing_file = open_new_file(path)
+    else:
+        listing_file = None
+    if echo_to_stderr:
+        echo_file = sys.stderr
+    else:
+        echo_file = None
+    num_errors = 0
+
+def close_listing_file():
+    global listing_file
+    if listing_file:
+        listing_file.close()
+        listing_file = None
+
 def report_error(err, use_stack=True):
     if error_stack and use_stack:
-        error_stack[-1].append(err) 
-    else: 
-        global num_errors 
-        # See Main.py for why dual reporting occurs. Quick fix for now. 
-        if err.reported: return 
-        err.reported = True 
-        try: line = u"%s\n" % err 
-        except UnicodeEncodeError: 
-            # Python <= 2.5 does this for non-ASCII Unicode exceptions 
-            line = format_error(getattr(err, 'message_only', "[unprintable exception message]"), 
-                                getattr(err, 'position', None)) + u'\n' 
-        if listing_file: 
-            try: listing_file.write(line) 
-            except UnicodeEncodeError: 
-                listing_file.write(line.encode('ASCII', 'replace')) 
-        if echo_file: 
-            try: echo_file.write(line) 
-            except UnicodeEncodeError: 
-                echo_file.write(line.encode('ASCII', 'replace')) 
-        num_errors += 1 
-        if Options.fast_fail: 
-            raise AbortError("fatal errors") 
- 
- 
-def error(position, message): 
+        error_stack[-1].append(err)
+    else:
+        global num_errors
+        # See Main.py for why dual reporting occurs. Quick fix for now.
+        if err.reported: return
+        err.reported = True
+        try: line = u"%s\n" % err
+        except UnicodeEncodeError:
+            # Python <= 2.5 does this for non-ASCII Unicode exceptions
+            line = format_error(getattr(err, 'message_only', "[unprintable exception message]"),
+                                getattr(err, 'position', None)) + u'\n'
+        if listing_file:
+            try: listing_file.write(line)
+            except UnicodeEncodeError:
+                listing_file.write(line.encode('ASCII', 'replace'))
+        if echo_file:
+            try: echo_file.write(line)
+            except UnicodeEncodeError:
+                echo_file.write(line.encode('ASCII', 'replace'))
+        num_errors += 1
+        if Options.fast_fail:
+            raise AbortError("fatal errors")
+
+
+def error(position, message):
     #print("Errors.error:", repr(position), repr(message)) ###
-    if position is None: 
-        raise InternalError(message) 
-    err = CompileError(position, message) 
-    if DebugFlags.debug_exception_on_error: raise Exception(err) # debug 
-    report_error(err) 
-    return err 
- 
- 
-LEVEL = 1 # warn about all errors level 1 or higher 
- 
- 
-def message(position, message, level=1): 
-    if level < LEVEL: 
-        return 
-    warn = CompileWarning(position, message) 
-    line = "note: %s\n" % warn 
-    if listing_file: 
-        listing_file.write(line) 
-    if echo_file: 
-        echo_file.write(line) 
-    return warn 
- 
- 
-def warning(position, message, level=0): 
-    if level < LEVEL: 
-        return 
-    if Options.warning_errors and position: 
-        return error(position, message) 
-    warn = CompileWarning(position, message) 
-    line = "warning: %s\n" % warn 
-    if listing_file: 
-        listing_file.write(line) 
-    if echo_file: 
-        echo_file.write(line) 
-    return warn 
- 
- 
-_warn_once_seen = {} 
-def warn_once(position, message, level=0): 
-    if level < LEVEL or message in _warn_once_seen: 
-        return 
-    warn = CompileWarning(position, message) 
-    line = "warning: %s\n" % warn 
-    if listing_file: 
-        listing_file.write(line) 
-    if echo_file: 
-        echo_file.write(line) 
-    _warn_once_seen[message] = True 
-    return warn 
- 
- 
-# These functions can be used to momentarily suppress errors. 
- 
-error_stack = [] 
- 
-
-def hold_errors(): 
-    error_stack.append([]) 
- 
-
-def release_errors(ignore=False): 
-    held_errors = error_stack.pop() 
-    if not ignore: 
-        for err in held_errors: 
-            report_error(err) 
- 
-
-def held_errors(): 
-    return error_stack[-1] 
- 
- 
+    if position is None:
+        raise InternalError(message)
+    err = CompileError(position, message)
+    if DebugFlags.debug_exception_on_error: raise Exception(err) # debug
+    report_error(err)
+    return err
+
+
+LEVEL = 1 # warn about all errors level 1 or higher
+
+
+def message(position, message, level=1):
+    if level < LEVEL:
+        return
+    warn = CompileWarning(position, message)
+    line = "note: %s\n" % warn
+    if listing_file:
+        listing_file.write(line)
+    if echo_file:
+        echo_file.write(line)
+    return warn
+
+
+def warning(position, message, level=0):
+    if level < LEVEL:
+        return
+    if Options.warning_errors and position:
+        return error(position, message)
+    warn = CompileWarning(position, message)
+    line = "warning: %s\n" % warn
+    if listing_file:
+        listing_file.write(line)
+    if echo_file:
+        echo_file.write(line)
+    return warn
+
+
+_warn_once_seen = {}
+def warn_once(position, message, level=0):
+    if level < LEVEL or message in _warn_once_seen:
+        return
+    warn = CompileWarning(position, message)
+    line = "warning: %s\n" % warn
+    if listing_file:
+        listing_file.write(line)
+    if echo_file:
+        echo_file.write(line)
+    _warn_once_seen[message] = True
+    return warn
+
+
+# These functions can be used to momentarily suppress errors.
+
+error_stack = []
+
+
+def hold_errors():
+    error_stack.append([])
+
+
+def release_errors(ignore=False):
+    held_errors = error_stack.pop()
+    if not ignore:
+        for err in held_errors:
+            report_error(err)
+
+
+def held_errors():
+    return error_stack[-1]
+
+
 # same as context manager:
 
 @contextmanager
@@ -257,9 +257,9 @@ def local_errors(ignore=False):
         release_errors(ignore=ignore)
 
 
-# this module needs a redesign to support parallel cythonisation, but 
-# for now, the following works at least in sequential compiler runs 
- 
-def reset(): 
-    _warn_once_seen.clear() 
-    del error_stack[:] 
+# this module needs a redesign to support parallel cythonisation, but
+# for now, the following works at least in sequential compiler runs
+
+def reset():
+    _warn_once_seen.clear()
+    del error_stack[:]
diff --git a/contrib/tools/cython/Cython/Compiler/ExprNodes.py b/contrib/tools/cython/Cython/Compiler/ExprNodes.py
index 4d711fa128..4a402f8126 100644
--- a/contrib/tools/cython/Cython/Compiler/ExprNodes.py
+++ b/contrib/tools/cython/Cython/Compiler/ExprNodes.py
@@ -1,91 +1,91 @@
-# 
-#   Parse tree nodes for expressions 
-# 
- 
-from __future__ import absolute_import 
- 
-import cython 
-cython.declare(error=object, warning=object, warn_once=object, InternalError=object, 
-               CompileError=object, UtilityCode=object, TempitaUtilityCode=object, 
+#
+#   Parse tree nodes for expressions
+#
+
+from __future__ import absolute_import
+
+import cython
+cython.declare(error=object, warning=object, warn_once=object, InternalError=object,
+               CompileError=object, UtilityCode=object, TempitaUtilityCode=object,
                StringEncoding=object, operator=object, local_errors=object, report_error=object,
-               Naming=object, Nodes=object, PyrexTypes=object, py_object_type=object, 
-               list_type=object, tuple_type=object, set_type=object, dict_type=object, 
-               unicode_type=object, str_type=object, bytes_type=object, type_type=object, 
-               Builtin=object, Symtab=object, Utils=object, find_coercion_error=object, 
-               debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object, 
+               Naming=object, Nodes=object, PyrexTypes=object, py_object_type=object,
+               list_type=object, tuple_type=object, set_type=object, dict_type=object,
+               unicode_type=object, str_type=object, bytes_type=object, type_type=object,
+               Builtin=object, Symtab=object, Utils=object, find_coercion_error=object,
+               debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object,
                bytearray_type=object, slice_type=object, _py_int_types=object,
                IS_PYTHON3=cython.bint)
- 
+
 import re
 import sys
 import copy
-import os.path 
-import operator 
- 
+import os.path
+import operator
+
 from .Errors import (
     error, warning, InternalError, CompileError, report_error, local_errors)
-from .Code import UtilityCode, TempitaUtilityCode 
-from . import StringEncoding 
-from . import Naming 
-from . import Nodes 
+from .Code import UtilityCode, TempitaUtilityCode
+from . import StringEncoding
+from . import Naming
+from . import Nodes
 from .Nodes import Node, utility_code_for_imports, analyse_type_annotation
-from . import PyrexTypes 
-from .PyrexTypes import py_object_type, c_long_type, typecast, error_type, \ 
-    unspecified_type 
-from . import TypeSlots 
-from .Builtin import list_type, tuple_type, set_type, dict_type, type_type, \ 
-     unicode_type, str_type, bytes_type, bytearray_type, basestring_type, slice_type 
-from . import Builtin 
-from . import Symtab 
-from .. import Utils 
-from .Annotate import AnnotationItem 
-from . import Future 
-from ..Debugging import print_call_chain 
-from .DebugFlags import debug_disposal_code, debug_temp_alloc, \ 
-    debug_coercion 
+from . import PyrexTypes
+from .PyrexTypes import py_object_type, c_long_type, typecast, error_type, \
+    unspecified_type
+from . import TypeSlots
+from .Builtin import list_type, tuple_type, set_type, dict_type, type_type, \
+     unicode_type, str_type, bytes_type, bytearray_type, basestring_type, slice_type
+from . import Builtin
+from . import Symtab
+from .. import Utils
+from .Annotate import AnnotationItem
+from . import Future
+from ..Debugging import print_call_chain
+from .DebugFlags import debug_disposal_code, debug_temp_alloc, \
+    debug_coercion
 from .Pythran import (to_pythran, is_pythran_supported_type, is_pythran_supported_operation_type,
      is_pythran_expr, pythran_func_type, pythran_binop_type, pythran_unaryop_type, has_np_pythran,
      pythran_indexing_code, pythran_indexing_type, is_pythran_supported_node_or_none, pythran_type,
      pythran_is_numpy_func_supported, pythran_get_func_include_file, pythran_functor)
 from .PyrexTypes import PythranExpr
- 
-try: 
-    from __builtin__ import basestring 
-except ImportError: 
+
+try:
+    from __builtin__ import basestring
+except ImportError:
     # Python 3
     basestring = str
     any_string_type = (bytes, str)
 else:
     # Python 2
     any_string_type = (bytes, unicode)
- 
- 
+
+
 if sys.version_info[0] >= 3:
     IS_PYTHON3 = True
     _py_int_types = int
 else:
     IS_PYTHON3 = False
     _py_int_types = (int, long)
- 
-
-class NotConstant(object): 
-    _obj = None 
- 
-    def __new__(cls): 
-        if NotConstant._obj is None: 
-            NotConstant._obj = super(NotConstant, cls).__new__(cls) 
- 
-        return NotConstant._obj 
- 
-    def __repr__(self): 
-        return "<NOT CONSTANT>" 
- 
-not_a_constant = NotConstant() 
-constant_value_not_set = object() 
- 
-# error messages when coercing from key[0] to key[1] 
-coercion_error_dict = { 
-    # string related errors 
+
+
+class NotConstant(object):
+    _obj = None
+
+    def __new__(cls):
+        if NotConstant._obj is None:
+            NotConstant._obj = super(NotConstant, cls).__new__(cls)
+
+        return NotConstant._obj
+
+    def __repr__(self):
+        return "<NOT CONSTANT>"
+
+not_a_constant = NotConstant()
+constant_value_not_set = object()
+
+# error messages when coercing from key[0] to key[1]
+coercion_error_dict = {
+    # string related errors
     (unicode_type, str_type): ("Cannot convert Unicode string to 'str' implicitly."
                                " This is not portable and requires explicit encoding."),
     (unicode_type, bytes_type): "Cannot convert Unicode string to 'bytes' implicitly, encoding required.",
@@ -117,76 +117,76 @@ coercion_error_dict = {
     (PyrexTypes.c_uchar_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
     (PyrexTypes.c_const_uchar_ptr_type, unicode_type): (
         "Cannot convert 'char*' to unicode implicitly, decoding required"),
-} 
- 
-def find_coercion_error(type_tuple, default, env): 
-    err = coercion_error_dict.get(type_tuple) 
-    if err is None: 
-        return default 
+}
+
+def find_coercion_error(type_tuple, default, env):
+    err = coercion_error_dict.get(type_tuple)
+    if err is None:
+        return default
     elif (env.directives['c_string_encoding'] and
               any(t in type_tuple for t in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_uchar_ptr_type,
                                             PyrexTypes.c_const_char_ptr_type, PyrexTypes.c_const_uchar_ptr_type))):
-        if type_tuple[1].is_pyobject: 
-            return default 
-        elif env.directives['c_string_encoding'] in ('ascii', 'default'): 
-            return default 
-        else: 
-            return "'%s' objects do not support coercion to C types with non-ascii or non-default c_string_encoding" % type_tuple[0].name 
-    else: 
-        return err 
- 
- 
-def default_str_type(env): 
-    return { 
-        'bytes': bytes_type, 
-        'bytearray': bytearray_type, 
-        'str': str_type, 
-        'unicode': unicode_type 
-    }.get(env.directives['c_string_type']) 
- 
- 
-def check_negative_indices(*nodes): 
-    """ 
-    Raise a warning on nodes that are known to have negative numeric values. 
-    Used to find (potential) bugs inside of "wraparound=False" sections. 
-    """ 
-    for node in nodes: 
+        if type_tuple[1].is_pyobject:
+            return default
+        elif env.directives['c_string_encoding'] in ('ascii', 'default'):
+            return default
+        else:
+            return "'%s' objects do not support coercion to C types with non-ascii or non-default c_string_encoding" % type_tuple[0].name
+    else:
+        return err
+
+
+def default_str_type(env):
+    return {
+        'bytes': bytes_type,
+        'bytearray': bytearray_type,
+        'str': str_type,
+        'unicode': unicode_type
+    }.get(env.directives['c_string_type'])
+
+
+def check_negative_indices(*nodes):
+    """
+    Raise a warning on nodes that are known to have negative numeric values.
+    Used to find (potential) bugs inside of "wraparound=False" sections.
+    """
+    for node in nodes:
         if node is None or (
                 not isinstance(node.constant_result, _py_int_types) and
                 not isinstance(node.constant_result, float)):
-            continue 
-        if node.constant_result < 0: 
-            warning(node.pos, 
-                    "the result of using negative indices inside of " 
-                    "code sections marked as 'wraparound=False' is " 
-                    "undefined", level=1) 
- 
- 
-def infer_sequence_item_type(env, seq_node, index_node=None, seq_type=None): 
-    if not seq_node.is_sequence_constructor: 
-        if seq_type is None: 
-            seq_type = seq_node.infer_type(env) 
-        if seq_type is tuple_type: 
-            # tuples are immutable => we can safely follow assignments 
-            if seq_node.cf_state and len(seq_node.cf_state) == 1: 
-                try: 
-                    seq_node = seq_node.cf_state[0].rhs 
-                except AttributeError: 
-                    pass 
-    if seq_node is not None and seq_node.is_sequence_constructor: 
-        if index_node is not None and index_node.has_constant_result(): 
-            try: 
-                item = seq_node.args[index_node.constant_result] 
-            except (ValueError, TypeError, IndexError): 
-                pass 
-            else: 
-                return item.infer_type(env) 
-        # if we're lucky, all items have the same type 
-        item_types = set([item.infer_type(env) for item in seq_node.args]) 
-        if len(item_types) == 1: 
-            return item_types.pop() 
-    return None 
- 
+            continue
+        if node.constant_result < 0:
+            warning(node.pos,
+                    "the result of using negative indices inside of "
+                    "code sections marked as 'wraparound=False' is "
+                    "undefined", level=1)
+
+
+def infer_sequence_item_type(env, seq_node, index_node=None, seq_type=None):
+    if not seq_node.is_sequence_constructor:
+        if seq_type is None:
+            seq_type = seq_node.infer_type(env)
+        if seq_type is tuple_type:
+            # tuples are immutable => we can safely follow assignments
+            if seq_node.cf_state and len(seq_node.cf_state) == 1:
+                try:
+                    seq_node = seq_node.cf_state[0].rhs
+                except AttributeError:
+                    pass
+    if seq_node is not None and seq_node.is_sequence_constructor:
+        if index_node is not None and index_node.has_constant_result():
+            try:
+                item = seq_node.args[index_node.constant_result]
+            except (ValueError, TypeError, IndexError):
+                pass
+            else:
+                return item.infer_type(env)
+        # if we're lucky, all items have the same type
+        item_types = set([item.infer_type(env) for item in seq_node.args])
+        if len(item_types) == 1:
+            return item_types.pop()
+    return None
+
 
 def make_dedup_key(outer_type, item_nodes):
     """
@@ -236,7 +236,7 @@ def get_exception_handler(exception_value):
             '"Error converting c++ exception.");' % (
                 exception_value.entry.cname),
             False)
- 
+
 def maybe_check_py_error(code, check_py_exception, pos, nogil):
     if check_py_exception:
         if nogil:
@@ -292,212 +292,212 @@ def translate_double_cpp_exception(code, pos, lhs_type, lhs_code, rhs_code,
     code.putln('}')
 
 
-class ExprNode(Node): 
-    #  subexprs     [string]     Class var holding names of subexpr node attrs 
-    #  type         PyrexType    Type of the result 
-    #  result_code  string       Code fragment 
-    #  result_ctype string       C type of result_code if different from type 
-    #  is_temp      boolean      Result is in a temporary variable 
-    #  is_sequence_constructor 
-    #               boolean      Is a list or tuple constructor expression 
-    #  is_starred   boolean      Is a starred expression (e.g. '*a') 
-    #  saved_subexpr_nodes 
-    #               [ExprNode or [ExprNode or None] or None] 
-    #                            Cached result of subexpr_nodes() 
-    #  use_managed_ref boolean   use ref-counted temps/assignments/etc. 
-    #  result_is_used  boolean   indicates that the result will be dropped and the 
+class ExprNode(Node):
+    #  subexprs     [string]     Class var holding names of subexpr node attrs
+    #  type         PyrexType    Type of the result
+    #  result_code  string       Code fragment
+    #  result_ctype string       C type of result_code if different from type
+    #  is_temp      boolean      Result is in a temporary variable
+    #  is_sequence_constructor
+    #               boolean      Is a list or tuple constructor expression
+    #  is_starred   boolean      Is a starred expression (e.g. '*a')
+    #  saved_subexpr_nodes
+    #               [ExprNode or [ExprNode or None] or None]
+    #                            Cached result of subexpr_nodes()
+    #  use_managed_ref boolean   use ref-counted temps/assignments/etc.
+    #  result_is_used  boolean   indicates that the result will be dropped and the
     #  is_numpy_attribute   boolean   Is a Numpy module attribute
-    #                            result_code/temp_result can safely be set to None 
+    #                            result_code/temp_result can safely be set to None
     #  annotation   ExprNode or None    PEP526 annotation for names or expressions
- 
-    result_ctype = None 
-    type = None 
+
+    result_ctype = None
+    type = None
     annotation = None
-    temp_code = None 
-    old_temp = None # error checker for multiple frees etc. 
-    use_managed_ref = True # can be set by optimisation transforms 
-    result_is_used = True 
+    temp_code = None
+    old_temp = None # error checker for multiple frees etc.
+    use_managed_ref = True # can be set by optimisation transforms
+    result_is_used = True
     is_numpy_attribute = False
- 
-    #  The Analyse Expressions phase for expressions is split 
-    #  into two sub-phases: 
-    # 
-    #    Analyse Types 
-    #      Determines the result type of the expression based 
-    #      on the types of its sub-expressions, and inserts 
-    #      coercion nodes into the expression tree where needed. 
-    #      Marks nodes which will need to have temporary variables 
-    #      allocated. 
-    # 
-    #    Allocate Temps 
-    #      Allocates temporary variables where needed, and fills 
-    #      in the result_code field of each node. 
-    # 
-    #  ExprNode provides some convenience routines which 
-    #  perform both of the above phases. These should only 
-    #  be called from statement nodes, and only when no 
-    #  coercion nodes need to be added around the expression 
-    #  being analysed. In that case, the above two phases 
-    #  should be invoked separately. 
-    # 
-    #  Framework code in ExprNode provides much of the common 
-    #  processing for the various phases. It makes use of the 
-    #  'subexprs' class attribute of ExprNodes, which should 
-    #  contain a list of the names of attributes which can 
-    #  hold sub-nodes or sequences of sub-nodes. 
-    # 
-    #  The framework makes use of a number of abstract methods. 
-    #  Their responsibilities are as follows. 
-    # 
-    #    Declaration Analysis phase 
-    # 
-    #      analyse_target_declaration 
-    #        Called during the Analyse Declarations phase to analyse 
-    #        the LHS of an assignment or argument of a del statement. 
-    #        Nodes which cannot be the LHS of an assignment need not 
-    #        implement it. 
-    # 
-    #    Expression Analysis phase 
-    # 
-    #      analyse_types 
-    #        - Call analyse_types on all sub-expressions. 
-    #        - Check operand types, and wrap coercion nodes around 
-    #          sub-expressions where needed. 
-    #        - Set the type of this node. 
-    #        - If a temporary variable will be required for the 
-    #          result, set the is_temp flag of this node. 
-    # 
-    #      analyse_target_types 
-    #        Called during the Analyse Types phase to analyse 
-    #        the LHS of an assignment or argument of a del 
-    #        statement. Similar responsibilities to analyse_types. 
-    # 
-    #      target_code 
-    #        Called by the default implementation of allocate_target_temps. 
-    #        Should return a C lvalue for assigning to the node. The default 
-    #        implementation calls calculate_result_code. 
-    # 
-    #      check_const 
-    #        - Check that this node and its subnodes form a 
-    #          legal constant expression. If so, do nothing, 
-    #          otherwise call not_const. 
-    # 
-    #        The default implementation of check_const 
-    #        assumes that the expression is not constant. 
-    # 
-    #      check_const_addr 
-    #        - Same as check_const, except check that the 
-    #          expression is a C lvalue whose address is 
-    #          constant. Otherwise, call addr_not_const. 
-    # 
-    #        The default implementation of calc_const_addr 
-    #        assumes that the expression is not a constant 
-    #        lvalue. 
-    # 
-    #   Code Generation phase 
-    # 
-    #      generate_evaluation_code 
-    #        - Call generate_evaluation_code for sub-expressions. 
-    #        - Perform the functions of generate_result_code 
-    #          (see below). 
-    #        - If result is temporary, call generate_disposal_code 
-    #          on all sub-expressions. 
-    # 
-    #        A default implementation of generate_evaluation_code 
-    #        is provided which uses the following abstract methods: 
-    # 
-    #          generate_result_code 
-    #            - Generate any C statements necessary to calculate 
-    #              the result of this node from the results of its 
-    #              sub-expressions. 
-    # 
-    #          calculate_result_code 
-    #            - Should return a C code fragment evaluating to the 
-    #              result. This is only called when the result is not 
-    #              a temporary. 
-    # 
-    #      generate_assignment_code 
-    #        Called on the LHS of an assignment. 
-    #        - Call generate_evaluation_code for sub-expressions. 
-    #        - Generate code to perform the assignment. 
-    #        - If the assignment absorbed a reference, call 
-    #          generate_post_assignment_code on the RHS, 
-    #          otherwise call generate_disposal_code on it. 
-    # 
-    #      generate_deletion_code 
-    #        Called on an argument of a del statement. 
-    #        - Call generate_evaluation_code for sub-expressions. 
-    #        - Generate code to perform the deletion. 
-    #        - Call generate_disposal_code on all sub-expressions. 
-    # 
-    # 
- 
-    is_sequence_constructor = False 
-    is_dict_literal = False 
+
+    #  The Analyse Expressions phase for expressions is split
+    #  into two sub-phases:
+    #
+    #    Analyse Types
+    #      Determines the result type of the expression based
+    #      on the types of its sub-expressions, and inserts
+    #      coercion nodes into the expression tree where needed.
+    #      Marks nodes which will need to have temporary variables
+    #      allocated.
+    #
+    #    Allocate Temps
+    #      Allocates temporary variables where needed, and fills
+    #      in the result_code field of each node.
+    #
+    #  ExprNode provides some convenience routines which
+    #  perform both of the above phases. These should only
+    #  be called from statement nodes, and only when no
+    #  coercion nodes need to be added around the expression
+    #  being analysed. In that case, the above two phases
+    #  should be invoked separately.
+    #
+    #  Framework code in ExprNode provides much of the common
+    #  processing for the various phases. It makes use of the
+    #  'subexprs' class attribute of ExprNodes, which should
+    #  contain a list of the names of attributes which can
+    #  hold sub-nodes or sequences of sub-nodes.
+    #
+    #  The framework makes use of a number of abstract methods.
+    #  Their responsibilities are as follows.
+    #
+    #    Declaration Analysis phase
+    #
+    #      analyse_target_declaration
+    #        Called during the Analyse Declarations phase to analyse
+    #        the LHS of an assignment or argument of a del statement.
+    #        Nodes which cannot be the LHS of an assignment need not
+    #        implement it.
+    #
+    #    Expression Analysis phase
+    #
+    #      analyse_types
+    #        - Call analyse_types on all sub-expressions.
+    #        - Check operand types, and wrap coercion nodes around
+    #          sub-expressions where needed.
+    #        - Set the type of this node.
+    #        - If a temporary variable will be required for the
+    #          result, set the is_temp flag of this node.
+    #
+    #      analyse_target_types
+    #        Called during the Analyse Types phase to analyse
+    #        the LHS of an assignment or argument of a del
+    #        statement. Similar responsibilities to analyse_types.
+    #
+    #      target_code
+    #        Called by the default implementation of allocate_target_temps.
+    #        Should return a C lvalue for assigning to the node. The default
+    #        implementation calls calculate_result_code.
+    #
+    #      check_const
+    #        - Check that this node and its subnodes form a
+    #          legal constant expression. If so, do nothing,
+    #          otherwise call not_const.
+    #
+    #        The default implementation of check_const
+    #        assumes that the expression is not constant.
+    #
+    #      check_const_addr
+    #        - Same as check_const, except check that the
+    #          expression is a C lvalue whose address is
+    #          constant. Otherwise, call addr_not_const.
+    #
+    #        The default implementation of calc_const_addr
+    #        assumes that the expression is not a constant
+    #        lvalue.
+    #
+    #   Code Generation phase
+    #
+    #      generate_evaluation_code
+    #        - Call generate_evaluation_code for sub-expressions.
+    #        - Perform the functions of generate_result_code
+    #          (see below).
+    #        - If result is temporary, call generate_disposal_code
+    #          on all sub-expressions.
+    #
+    #        A default implementation of generate_evaluation_code
+    #        is provided which uses the following abstract methods:
+    #
+    #          generate_result_code
+    #            - Generate any C statements necessary to calculate
+    #              the result of this node from the results of its
+    #              sub-expressions.
+    #
+    #          calculate_result_code
+    #            - Should return a C code fragment evaluating to the
+    #              result. This is only called when the result is not
+    #              a temporary.
+    #
+    #      generate_assignment_code
+    #        Called on the LHS of an assignment.
+    #        - Call generate_evaluation_code for sub-expressions.
+    #        - Generate code to perform the assignment.
+    #        - If the assignment absorbed a reference, call
+    #          generate_post_assignment_code on the RHS,
+    #          otherwise call generate_disposal_code on it.
+    #
+    #      generate_deletion_code
+    #        Called on an argument of a del statement.
+    #        - Call generate_evaluation_code for sub-expressions.
+    #        - Generate code to perform the deletion.
+    #        - Call generate_disposal_code on all sub-expressions.
+    #
+    #
+
+    is_sequence_constructor = False
+    is_dict_literal = False
     is_set_literal = False
-    is_string_literal = False 
-    is_attribute = False 
-    is_subscript = False 
+    is_string_literal = False
+    is_attribute = False
+    is_subscript = False
     is_slice = False
- 
+
     is_buffer_access = False
     is_memview_index = False
     is_memview_slice = False
     is_memview_broadcast = False
     is_memview_copy_assignment = False
 
-    saved_subexpr_nodes = None 
-    is_temp = False 
-    is_target = False 
-    is_starred = False 
- 
-    constant_result = constant_value_not_set 
- 
-    child_attrs = property(fget=operator.attrgetter('subexprs')) 
- 
-    def not_implemented(self, method_name): 
-        print_call_chain(method_name, "not implemented") ### 
-        raise InternalError( 
-            "%s.%s not implemented" % 
-                (self.__class__.__name__, method_name)) 
- 
-    def is_lvalue(self): 
-        return 0 
- 
-    def is_addressable(self): 
-        return self.is_lvalue() and not self.type.is_memoryviewslice 
- 
-    def is_ephemeral(self): 
-        #  An ephemeral node is one whose result is in 
-        #  a Python temporary and we suspect there are no 
-        #  other references to it. Certain operations are 
-        #  disallowed on such values, since they are 
-        #  likely to result in a dangling pointer. 
-        return self.type.is_pyobject and self.is_temp 
- 
-    def subexpr_nodes(self): 
-        #  Extract a list of subexpression nodes based 
-        #  on the contents of the subexprs class attribute. 
-        nodes = [] 
-        for name in self.subexprs: 
-            item = getattr(self, name) 
-            if item is not None: 
-                if type(item) is list: 
-                    nodes.extend(item) 
-                else: 
-                    nodes.append(item) 
-        return nodes 
- 
-    def result(self): 
-        if self.is_temp: 
-            #if not self.temp_code: 
-            #    pos = (os.path.basename(self.pos[0].get_description()),) + self.pos[1:] if self.pos else '(?)' 
-            #    raise RuntimeError("temp result name not set in %s at %r" % ( 
-            #        self.__class__.__name__, pos)) 
-            return self.temp_code 
-        else: 
-            return self.calculate_result_code() 
- 
+    saved_subexpr_nodes = None
+    is_temp = False
+    is_target = False
+    is_starred = False
+
+    constant_result = constant_value_not_set
+
+    child_attrs = property(fget=operator.attrgetter('subexprs'))
+
+    def not_implemented(self, method_name):
+        print_call_chain(method_name, "not implemented") ###
+        raise InternalError(
+            "%s.%s not implemented" %
+                (self.__class__.__name__, method_name))
+
+    def is_lvalue(self):
+        return 0
+
+    def is_addressable(self):
+        return self.is_lvalue() and not self.type.is_memoryviewslice
+
+    def is_ephemeral(self):
+        #  An ephemeral node is one whose result is in
+        #  a Python temporary and we suspect there are no
+        #  other references to it. Certain operations are
+        #  disallowed on such values, since they are
+        #  likely to result in a dangling pointer.
+        return self.type.is_pyobject and self.is_temp
+
+    def subexpr_nodes(self):
+        #  Extract a list of subexpression nodes based
+        #  on the contents of the subexprs class attribute.
+        nodes = []
+        for name in self.subexprs:
+            item = getattr(self, name)
+            if item is not None:
+                if type(item) is list:
+                    nodes.extend(item)
+                else:
+                    nodes.append(item)
+        return nodes
+
+    def result(self):
+        if self.is_temp:
+            #if not self.temp_code:
+            #    pos = (os.path.basename(self.pos[0].get_description()),) + self.pos[1:] if self.pos else '(?)'
+            #    raise RuntimeError("temp result name not set in %s at %r" % (
+            #        self.__class__.__name__, pos))
+            return self.temp_code
+        else:
+            return self.calculate_result_code()
+
     def pythran_result(self, type_=None):
         if is_pythran_supported_node_or_none(self):
             return to_pythran(self)
@@ -511,130 +511,130 @@ class ExprNode(Node):
         """
         return True
 
-    def result_as(self, type = None): 
-        #  Return the result code cast to the specified C type. 
-        if (self.is_temp and self.type.is_pyobject and 
-                type != py_object_type): 
-            # Allocated temporaries are always PyObject *, which may not 
-            # reflect the actual type (e.g. an extension type) 
-            return typecast(type, py_object_type, self.result()) 
-        return typecast(type, self.ctype(), self.result()) 
- 
-    def py_result(self): 
-        #  Return the result code cast to PyObject *. 
-        return self.result_as(py_object_type) 
- 
-    def ctype(self): 
-        #  Return the native C type of the result (i.e. the 
-        #  C type of the result_code expression). 
-        return self.result_ctype or self.type 
- 
-    def get_constant_c_result_code(self): 
-        # Return the constant value of this node as a result code 
-        # string, or None if the node is not constant.  This method 
-        # can be called when the constant result code is required 
-        # before the code generation phase. 
-        # 
-        # The return value is a string that can represent a simple C 
-        # value, a constant C name or a constant C expression.  If the 
-        # node type depends on Python code, this must return None. 
-        return None 
- 
-    def calculate_constant_result(self): 
-        # Calculate the constant compile time result value of this 
-        # expression and store it in ``self.constant_result``.  Does 
-        # nothing by default, thus leaving ``self.constant_result`` 
-        # unknown.  If valid, the result can be an arbitrary Python 
-        # value. 
-        # 
-        # This must only be called when it is assured that all 
-        # sub-expressions have a valid constant_result value.  The 
-        # ConstantFolding transform will do this. 
-        pass 
- 
-    def has_constant_result(self): 
-        return self.constant_result is not constant_value_not_set and \ 
-               self.constant_result is not not_a_constant 
- 
-    def compile_time_value(self, denv): 
-        #  Return value of compile-time expression, or report error. 
-        error(self.pos, "Invalid compile-time expression") 
- 
-    def compile_time_value_error(self, e): 
-        error(self.pos, "Error in compile-time expression: %s: %s" % ( 
-            e.__class__.__name__, e)) 
- 
-    # ------------- Declaration Analysis ---------------- 
- 
-    def analyse_target_declaration(self, env): 
-        error(self.pos, "Cannot assign to or delete this") 
- 
-    # ------------- Expression Analysis ---------------- 
- 
-    def analyse_const_expression(self, env): 
-        #  Called during the analyse_declarations phase of a 
-        #  constant expression. Analyses the expression's type, 
-        #  checks whether it is a legal const expression, 
-        #  and determines its value. 
-        node = self.analyse_types(env) 
-        node.check_const() 
-        return node 
- 
-    def analyse_expressions(self, env): 
-        #  Convenience routine performing both the Type 
-        #  Analysis and Temp Allocation phases for a whole 
-        #  expression. 
-        return self.analyse_types(env) 
- 
-    def analyse_target_expression(self, env, rhs): 
-        #  Convenience routine performing both the Type 
-        #  Analysis and Temp Allocation phases for the LHS of 
-        #  an assignment. 
-        return self.analyse_target_types(env) 
- 
-    def analyse_boolean_expression(self, env): 
-        #  Analyse expression and coerce to a boolean. 
-        node = self.analyse_types(env) 
-        bool = node.coerce_to_boolean(env) 
-        return bool 
- 
-    def analyse_temp_boolean_expression(self, env): 
-        #  Analyse boolean expression and coerce result into 
-        #  a temporary. This is used when a branch is to be 
-        #  performed on the result and we won't have an 
-        #  opportunity to ensure disposal code is executed 
-        #  afterwards. By forcing the result into a temporary, 
-        #  we ensure that all disposal has been done by the 
-        #  time we get the result. 
-        node = self.analyse_types(env) 
-        return node.coerce_to_boolean(env).coerce_to_simple(env) 
- 
-    # --------------- Type Inference ----------------- 
- 
-    def type_dependencies(self, env): 
-        # Returns the list of entries whose types must be determined 
-        # before the type of self can be inferred. 
-        if hasattr(self, 'type') and self.type is not None: 
-            return () 
-        return sum([node.type_dependencies(env) for node in self.subexpr_nodes()], ()) 
- 
-    def infer_type(self, env): 
-        # Attempt to deduce the type of self. 
-        # Differs from analyse_types as it avoids unnecessary 
-        # analysis of subexpressions, but can assume everything 
-        # in self.type_dependencies() has been resolved. 
-        if hasattr(self, 'type') and self.type is not None: 
-            return self.type 
-        elif hasattr(self, 'entry') and self.entry is not None: 
-            return self.entry.type 
-        else: 
-            self.not_implemented("infer_type") 
- 
-    def nonlocally_immutable(self): 
-        # Returns whether this variable is a safe reference, i.e. 
-        # can't be modified as part of globals or closures. 
-        return self.is_literal or self.is_temp or self.type.is_array or self.type.is_cfunction 
- 
+    def result_as(self, type = None):
+        #  Return the result code cast to the specified C type.
+        if (self.is_temp and self.type.is_pyobject and
+                type != py_object_type):
+            # Allocated temporaries are always PyObject *, which may not
+            # reflect the actual type (e.g. an extension type)
+            return typecast(type, py_object_type, self.result())
+        return typecast(type, self.ctype(), self.result())
+
+    def py_result(self):
+        #  Return the result code cast to PyObject *.
+        return self.result_as(py_object_type)
+
+    def ctype(self):
+        #  Return the native C type of the result (i.e. the
+        #  C type of the result_code expression).
+        return self.result_ctype or self.type
+
+    def get_constant_c_result_code(self):
+        # Return the constant value of this node as a result code
+        # string, or None if the node is not constant.  This method
+        # can be called when the constant result code is required
+        # before the code generation phase.
+        #
+        # The return value is a string that can represent a simple C
+        # value, a constant C name or a constant C expression.  If the
+        # node type depends on Python code, this must return None.
+        return None
+
+    def calculate_constant_result(self):
+        # Calculate the constant compile time result value of this
+        # expression and store it in ``self.constant_result``.  Does
+        # nothing by default, thus leaving ``self.constant_result``
+        # unknown.  If valid, the result can be an arbitrary Python
+        # value.
+        #
+        # This must only be called when it is assured that all
+        # sub-expressions have a valid constant_result value.  The
+        # ConstantFolding transform will do this.
+        pass
+
+    def has_constant_result(self):
+        return self.constant_result is not constant_value_not_set and \
+               self.constant_result is not not_a_constant
+
+    def compile_time_value(self, denv):
+        #  Return value of compile-time expression, or report error.
+        error(self.pos, "Invalid compile-time expression")
+
+    def compile_time_value_error(self, e):
+        error(self.pos, "Error in compile-time expression: %s: %s" % (
+            e.__class__.__name__, e))
+
+    # ------------- Declaration Analysis ----------------
+
+    def analyse_target_declaration(self, env):
+        error(self.pos, "Cannot assign to or delete this")
+
+    # ------------- Expression Analysis ----------------
+
+    def analyse_const_expression(self, env):
+        #  Called during the analyse_declarations phase of a
+        #  constant expression. Analyses the expression's type,
+        #  checks whether it is a legal const expression,
+        #  and determines its value.
+        node = self.analyse_types(env)
+        node.check_const()
+        return node
+
+    def analyse_expressions(self, env):
+        #  Convenience routine performing both the Type
+        #  Analysis and Temp Allocation phases for a whole
+        #  expression.
+        return self.analyse_types(env)
+
+    def analyse_target_expression(self, env, rhs):
+        #  Convenience routine performing both the Type
+        #  Analysis and Temp Allocation phases for the LHS of
+        #  an assignment.
+        return self.analyse_target_types(env)
+
+    def analyse_boolean_expression(self, env):
+        #  Analyse expression and coerce to a boolean.
+        node = self.analyse_types(env)
+        bool = node.coerce_to_boolean(env)
+        return bool
+
+    def analyse_temp_boolean_expression(self, env):
+        #  Analyse boolean expression and coerce result into
+        #  a temporary. This is used when a branch is to be
+        #  performed on the result and we won't have an
+        #  opportunity to ensure disposal code is executed
+        #  afterwards. By forcing the result into a temporary,
+        #  we ensure that all disposal has been done by the
+        #  time we get the result.
+        node = self.analyse_types(env)
+        return node.coerce_to_boolean(env).coerce_to_simple(env)
+
+    # --------------- Type Inference -----------------
+
+    def type_dependencies(self, env):
+        # Returns the list of entries whose types must be determined
+        # before the type of self can be inferred.
+        if hasattr(self, 'type') and self.type is not None:
+            return ()
+        return sum([node.type_dependencies(env) for node in self.subexpr_nodes()], ())
+
+    def infer_type(self, env):
+        # Attempt to deduce the type of self.
+        # Differs from analyse_types as it avoids unnecessary
+        # analysis of subexpressions, but can assume everything
+        # in self.type_dependencies() has been resolved.
+        if hasattr(self, 'type') and self.type is not None:
+            return self.type
+        elif hasattr(self, 'entry') and self.entry is not None:
+            return self.entry.type
+        else:
+            self.not_implemented("infer_type")
+
+    def nonlocally_immutable(self):
+        # Returns whether this variable is a safe reference, i.e.
+        # can't be modified as part of globals or closures.
+        return self.is_literal or self.is_temp or self.type.is_array or self.type.is_cfunction
+
     def inferable_item_node(self, index=0):
         """
         Return a node that represents the (type) result of an indexing operation,
@@ -643,289 +643,289 @@ class ExprNode(Node):
         return IndexNode(self.pos, base=self, index=IntNode(
             self.pos, value=str(index), constant_result=index, type=PyrexTypes.c_py_ssize_t_type))
 
-    # --------------- Type Analysis ------------------ 
- 
-    def analyse_as_module(self, env): 
-        # If this node can be interpreted as a reference to a 
-        # cimported module, return its scope, else None. 
-        return None 
- 
-    def analyse_as_type(self, env): 
-        # If this node can be interpreted as a reference to a 
-        # type, return that type, else None. 
-        return None 
- 
-    def analyse_as_extension_type(self, env): 
-        # If this node can be interpreted as a reference to an 
-        # extension type or builtin type, return its type, else None. 
-        return None 
- 
-    def analyse_types(self, env): 
-        self.not_implemented("analyse_types") 
- 
-    def analyse_target_types(self, env): 
-        return self.analyse_types(env) 
- 
-    def nogil_check(self, env): 
-        # By default, any expression based on Python objects is 
-        # prevented in nogil environments.  Subtypes must override 
-        # this if they can work without the GIL. 
-        if self.type and self.type.is_pyobject: 
-            self.gil_error() 
- 
-    def gil_assignment_check(self, env): 
-        if env.nogil and self.type.is_pyobject: 
-            error(self.pos, "Assignment of Python object not allowed without gil") 
- 
-    def check_const(self): 
-        self.not_const() 
-        return False 
- 
-    def not_const(self): 
-        error(self.pos, "Not allowed in a constant expression") 
- 
-    def check_const_addr(self): 
-        self.addr_not_const() 
-        return False 
- 
-    def addr_not_const(self): 
-        error(self.pos, "Address is not constant") 
- 
-    # ----------------- Result Allocation ----------------- 
- 
-    def result_in_temp(self): 
-        #  Return true if result is in a temporary owned by 
-        #  this node or one of its subexpressions. Overridden 
-        #  by certain nodes which can share the result of 
-        #  a subnode. 
-        return self.is_temp 
- 
-    def target_code(self): 
-        #  Return code fragment for use as LHS of a C assignment. 
-        return self.calculate_result_code() 
- 
-    def calculate_result_code(self): 
-        self.not_implemented("calculate_result_code") 
- 
-#    def release_target_temp(self, env): 
-#        #  Release temporaries used by LHS of an assignment. 
-#        self.release_subexpr_temps(env) 
- 
-    def allocate_temp_result(self, code): 
-        if self.temp_code: 
-            raise RuntimeError("Temp allocated multiple times in %r: %r" % (self.__class__.__name__, self.pos)) 
-        type = self.type 
-        if not type.is_void: 
-            if type.is_pyobject: 
-                type = PyrexTypes.py_object_type 
+    # --------------- Type Analysis ------------------
+
+    def analyse_as_module(self, env):
+        # If this node can be interpreted as a reference to a
+        # cimported module, return its scope, else None.
+        return None
+
+    def analyse_as_type(self, env):
+        # If this node can be interpreted as a reference to a
+        # type, return that type, else None.
+        return None
+
+    def analyse_as_extension_type(self, env):
+        # If this node can be interpreted as a reference to an
+        # extension type or builtin type, return its type, else None.
+        return None
+
+    def analyse_types(self, env):
+        self.not_implemented("analyse_types")
+
+    def analyse_target_types(self, env):
+        return self.analyse_types(env)
+
+    def nogil_check(self, env):
+        # By default, any expression based on Python objects is
+        # prevented in nogil environments.  Subtypes must override
+        # this if they can work without the GIL.
+        if self.type and self.type.is_pyobject:
+            self.gil_error()
+
+    def gil_assignment_check(self, env):
+        if env.nogil and self.type.is_pyobject:
+            error(self.pos, "Assignment of Python object not allowed without gil")
+
+    def check_const(self):
+        self.not_const()
+        return False
+
+    def not_const(self):
+        error(self.pos, "Not allowed in a constant expression")
+
+    def check_const_addr(self):
+        self.addr_not_const()
+        return False
+
+    def addr_not_const(self):
+        error(self.pos, "Address is not constant")
+
+    # ----------------- Result Allocation -----------------
+
+    def result_in_temp(self):
+        #  Return true if result is in a temporary owned by
+        #  this node or one of its subexpressions. Overridden
+        #  by certain nodes which can share the result of
+        #  a subnode.
+        return self.is_temp
+
+    def target_code(self):
+        #  Return code fragment for use as LHS of a C assignment.
+        return self.calculate_result_code()
+
+    def calculate_result_code(self):
+        self.not_implemented("calculate_result_code")
+
+#    def release_target_temp(self, env):
+#        #  Release temporaries used by LHS of an assignment.
+#        self.release_subexpr_temps(env)
+
+    def allocate_temp_result(self, code):
+        if self.temp_code:
+            raise RuntimeError("Temp allocated multiple times in %r: %r" % (self.__class__.__name__, self.pos))
+        type = self.type
+        if not type.is_void:
+            if type.is_pyobject:
+                type = PyrexTypes.py_object_type
             elif not (self.result_is_used or type.is_memoryviewslice or self.is_c_result_required()):
                 self.temp_code = None
                 return
-            self.temp_code = code.funcstate.allocate_temp( 
-                type, manage_ref=self.use_managed_ref) 
-        else: 
-            self.temp_code = None 
- 
-    def release_temp_result(self, code): 
-        if not self.temp_code: 
-            if not self.result_is_used: 
-                # not used anyway, so ignore if not set up 
-                return 
-            pos = (os.path.basename(self.pos[0].get_description()),) + self.pos[1:] if self.pos else '(?)' 
-            if self.old_temp: 
-                raise RuntimeError("temp %s released multiple times in %s at %r" % ( 
-                    self.old_temp, self.__class__.__name__, pos)) 
-            else: 
-                raise RuntimeError("no temp, but release requested in %s at %r" % ( 
-                    self.__class__.__name__, pos)) 
-        code.funcstate.release_temp(self.temp_code) 
-        self.old_temp = self.temp_code 
-        self.temp_code = None 
- 
-    # ---------------- Code Generation ----------------- 
- 
-    def make_owned_reference(self, code): 
-        """ 
-        If result is a pyobject, make sure we own a reference to it. 
-        If the result is in a temp, it is already a new reference. 
-        """ 
-        if self.type.is_pyobject and not self.result_in_temp(): 
-            code.put_incref(self.result(), self.ctype()) 
- 
-    def make_owned_memoryviewslice(self, code): 
-        """ 
-        Make sure we own the reference to this memoryview slice. 
-        """ 
-        if not self.result_in_temp(): 
-            code.put_incref_memoryviewslice(self.result(), 
-                                            have_gil=self.in_nogil_context) 
- 
-    def generate_evaluation_code(self, code): 
-        #  Generate code to evaluate this node and 
-        #  its sub-expressions, and dispose of any 
-        #  temporary results of its sub-expressions. 
-        self.generate_subexpr_evaluation_code(code) 
- 
-        code.mark_pos(self.pos) 
-        if self.is_temp: 
-            self.allocate_temp_result(code) 
- 
-        self.generate_result_code(code) 
-        if self.is_temp and not (self.type.is_string or self.type.is_pyunicode_ptr): 
-            # If we are temp we do not need to wait until this node is disposed 
-            # before disposing children. 
-            self.generate_subexpr_disposal_code(code) 
-            self.free_subexpr_temps(code) 
- 
-    def generate_subexpr_evaluation_code(self, code): 
-        for node in self.subexpr_nodes(): 
-            node.generate_evaluation_code(code) 
- 
-    def generate_result_code(self, code): 
-        self.not_implemented("generate_result_code") 
- 
-    def generate_disposal_code(self, code): 
-        if self.is_temp: 
-            if self.type.is_string or self.type.is_pyunicode_ptr: 
-                # postponed from self.generate_evaluation_code() 
-                self.generate_subexpr_disposal_code(code) 
-                self.free_subexpr_temps(code) 
-            if self.result(): 
-                if self.type.is_pyobject: 
-                    code.put_decref_clear(self.result(), self.ctype()) 
-                elif self.type.is_memoryviewslice: 
-                    code.put_xdecref_memoryviewslice( 
-                            self.result(), have_gil=not self.in_nogil_context) 
+            self.temp_code = code.funcstate.allocate_temp(
+                type, manage_ref=self.use_managed_ref)
+        else:
+            self.temp_code = None
+
+    def release_temp_result(self, code):
+        if not self.temp_code:
+            if not self.result_is_used:
+                # not used anyway, so ignore if not set up
+                return
+            pos = (os.path.basename(self.pos[0].get_description()),) + self.pos[1:] if self.pos else '(?)'
+            if self.old_temp:
+                raise RuntimeError("temp %s released multiple times in %s at %r" % (
+                    self.old_temp, self.__class__.__name__, pos))
+            else:
+                raise RuntimeError("no temp, but release requested in %s at %r" % (
+                    self.__class__.__name__, pos))
+        code.funcstate.release_temp(self.temp_code)
+        self.old_temp = self.temp_code
+        self.temp_code = None
+
+    # ---------------- Code Generation -----------------
+
+    def make_owned_reference(self, code):
+        """
+        If result is a pyobject, make sure we own a reference to it.
+        If the result is in a temp, it is already a new reference.
+        """
+        if self.type.is_pyobject and not self.result_in_temp():
+            code.put_incref(self.result(), self.ctype())
+
+    def make_owned_memoryviewslice(self, code):
+        """
+        Make sure we own the reference to this memoryview slice.
+        """
+        if not self.result_in_temp():
+            code.put_incref_memoryviewslice(self.result(),
+                                            have_gil=self.in_nogil_context)
+
+    def generate_evaluation_code(self, code):
+        #  Generate code to evaluate this node and
+        #  its sub-expressions, and dispose of any
+        #  temporary results of its sub-expressions.
+        self.generate_subexpr_evaluation_code(code)
+
+        code.mark_pos(self.pos)
+        if self.is_temp:
+            self.allocate_temp_result(code)
+
+        self.generate_result_code(code)
+        if self.is_temp and not (self.type.is_string or self.type.is_pyunicode_ptr):
+            # If we are temp we do not need to wait until this node is disposed
+            # before disposing children.
+            self.generate_subexpr_disposal_code(code)
+            self.free_subexpr_temps(code)
+
+    def generate_subexpr_evaluation_code(self, code):
+        for node in self.subexpr_nodes():
+            node.generate_evaluation_code(code)
+
+    def generate_result_code(self, code):
+        self.not_implemented("generate_result_code")
+
+    def generate_disposal_code(self, code):
+        if self.is_temp:
+            if self.type.is_string or self.type.is_pyunicode_ptr:
+                # postponed from self.generate_evaluation_code()
+                self.generate_subexpr_disposal_code(code)
+                self.free_subexpr_temps(code)
+            if self.result():
+                if self.type.is_pyobject:
+                    code.put_decref_clear(self.result(), self.ctype())
+                elif self.type.is_memoryviewslice:
+                    code.put_xdecref_memoryviewslice(
+                            self.result(), have_gil=not self.in_nogil_context)
                     code.putln("%s.memview = NULL;" % self.result())
                     code.putln("%s.data = NULL;" % self.result())
-        else: 
-            # Already done if self.is_temp 
-            self.generate_subexpr_disposal_code(code) 
- 
-    def generate_subexpr_disposal_code(self, code): 
-        #  Generate code to dispose of temporary results 
-        #  of all sub-expressions. 
-        for node in self.subexpr_nodes(): 
-            node.generate_disposal_code(code) 
- 
-    def generate_post_assignment_code(self, code): 
-        if self.is_temp: 
-            if self.type.is_string or self.type.is_pyunicode_ptr: 
-                # postponed from self.generate_evaluation_code() 
-                self.generate_subexpr_disposal_code(code) 
-                self.free_subexpr_temps(code) 
-            elif self.type.is_pyobject: 
-                code.putln("%s = 0;" % self.result()) 
-            elif self.type.is_memoryviewslice: 
-                code.putln("%s.memview = NULL;" % self.result()) 
-                code.putln("%s.data = NULL;" % self.result()) 
-        else: 
-            self.generate_subexpr_disposal_code(code) 
- 
+        else:
+            # Already done if self.is_temp
+            self.generate_subexpr_disposal_code(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        #  Generate code to dispose of temporary results
+        #  of all sub-expressions.
+        for node in self.subexpr_nodes():
+            node.generate_disposal_code(code)
+
+    def generate_post_assignment_code(self, code):
+        if self.is_temp:
+            if self.type.is_string or self.type.is_pyunicode_ptr:
+                # postponed from self.generate_evaluation_code()
+                self.generate_subexpr_disposal_code(code)
+                self.free_subexpr_temps(code)
+            elif self.type.is_pyobject:
+                code.putln("%s = 0;" % self.result())
+            elif self.type.is_memoryviewslice:
+                code.putln("%s.memview = NULL;" % self.result())
+                code.putln("%s.data = NULL;" % self.result())
+        else:
+            self.generate_subexpr_disposal_code(code)
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
         exception_check=None, exception_value=None):
-        #  Stub method for nodes which are not legal as 
-        #  the LHS of an assignment. An error will have 
-        #  been reported earlier. 
-        pass 
- 
-    def generate_deletion_code(self, code, ignore_nonexisting=False): 
-        #  Stub method for nodes that are not legal as 
-        #  the argument of a del statement. An error 
-        #  will have been reported earlier. 
-        pass 
- 
-    def free_temps(self, code): 
-        if self.is_temp: 
-            if not self.type.is_void: 
-                self.release_temp_result(code) 
-        else: 
-            self.free_subexpr_temps(code) 
- 
-    def free_subexpr_temps(self, code): 
-        for sub in self.subexpr_nodes(): 
-            sub.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        pass 
- 
-    # ---------------- Annotation --------------------- 
- 
-    def annotate(self, code): 
-        for node in self.subexpr_nodes(): 
-            node.annotate(code) 
- 
-    # ----------------- Coercion ---------------------- 
- 
-    def coerce_to(self, dst_type, env): 
-        #   Coerce the result so that it can be assigned to 
-        #   something of type dst_type. If processing is necessary, 
-        #   wraps this node in a coercion node and returns that. 
-        #   Otherwise, returns this node unchanged. 
-        # 
-        #   This method is called during the analyse_expressions 
-        #   phase of the src_node's processing. 
-        # 
-        #   Note that subclasses that override this (especially 
-        #   ConstNodes) must not (re-)set their own .type attribute 
-        #   here.  Since expression nodes may turn up in different 
-        #   places in the tree (e.g. inside of CloneNodes in cascaded 
-        #   assignments), this method must return a new node instance 
-        #   if it changes the type. 
-        # 
-        src = self 
-        src_type = self.type 
- 
-        if self.check_for_coercion_error(dst_type, env): 
-            return self 
- 
+        #  Stub method for nodes which are not legal as
+        #  the LHS of an assignment. An error will have
+        #  been reported earlier.
+        pass
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        #  Stub method for nodes that are not legal as
+        #  the argument of a del statement. An error
+        #  will have been reported earlier.
+        pass
+
+    def free_temps(self, code):
+        if self.is_temp:
+            if not self.type.is_void:
+                self.release_temp_result(code)
+        else:
+            self.free_subexpr_temps(code)
+
+    def free_subexpr_temps(self, code):
+        for sub in self.subexpr_nodes():
+            sub.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        pass
+
+    # ---------------- Annotation ---------------------
+
+    def annotate(self, code):
+        for node in self.subexpr_nodes():
+            node.annotate(code)
+
+    # ----------------- Coercion ----------------------
+
+    def coerce_to(self, dst_type, env):
+        #   Coerce the result so that it can be assigned to
+        #   something of type dst_type. If processing is necessary,
+        #   wraps this node in a coercion node and returns that.
+        #   Otherwise, returns this node unchanged.
+        #
+        #   This method is called during the analyse_expressions
+        #   phase of the src_node's processing.
+        #
+        #   Note that subclasses that override this (especially
+        #   ConstNodes) must not (re-)set their own .type attribute
+        #   here.  Since expression nodes may turn up in different
+        #   places in the tree (e.g. inside of CloneNodes in cascaded
+        #   assignments), this method must return a new node instance
+        #   if it changes the type.
+        #
+        src = self
+        src_type = self.type
+
+        if self.check_for_coercion_error(dst_type, env):
+            return self
+
         used_as_reference = dst_type.is_reference
         if used_as_reference and not src_type.is_reference:
-            dst_type = dst_type.ref_base_type 
- 
-        if src_type.is_const: 
-            src_type = src_type.const_base_type 
- 
-        if src_type.is_fused or dst_type.is_fused: 
-            # See if we are coercing a fused function to a pointer to a 
-            # specialized function 
-            if (src_type.is_cfunction and not dst_type.is_fused and 
-                    dst_type.is_ptr and dst_type.base_type.is_cfunction): 
- 
-                dst_type = dst_type.base_type 
- 
-                for signature in src_type.get_all_specialized_function_types(): 
-                    if signature.same_as(dst_type): 
-                        src.type = signature 
-                        src.entry = src.type.entry 
-                        src.entry.used = True 
-                        return self 
- 
-            if src_type.is_fused: 
-                error(self.pos, "Type is not specialized") 
+            dst_type = dst_type.ref_base_type
+
+        if src_type.is_const:
+            src_type = src_type.const_base_type
+
+        if src_type.is_fused or dst_type.is_fused:
+            # See if we are coercing a fused function to a pointer to a
+            # specialized function
+            if (src_type.is_cfunction and not dst_type.is_fused and
+                    dst_type.is_ptr and dst_type.base_type.is_cfunction):
+
+                dst_type = dst_type.base_type
+
+                for signature in src_type.get_all_specialized_function_types():
+                    if signature.same_as(dst_type):
+                        src.type = signature
+                        src.entry = src.type.entry
+                        src.entry.used = True
+                        return self
+
+            if src_type.is_fused:
+                error(self.pos, "Type is not specialized")
             elif src_type.is_null_ptr and dst_type.is_ptr:
                 # NULL can be implicitly cast to any pointer type
                 return self
-            else: 
-                error(self.pos, "Cannot coerce to a type that is not specialized") 
- 
-            self.type = error_type 
-            return self 
- 
-        if self.coercion_type is not None: 
-            # This is purely for error checking purposes! 
-            node = NameNode(self.pos, name='', type=self.coercion_type) 
-            node.coerce_to(dst_type, env) 
- 
-        if dst_type.is_memoryviewslice: 
-            from . import MemoryView 
-            if not src.type.is_memoryviewslice: 
-                if src.type.is_pyobject: 
-                    src = CoerceToMemViewSliceNode(src, dst_type, env) 
-                elif src.type.is_array: 
+            else:
+                error(self.pos, "Cannot coerce to a type that is not specialized")
+
+            self.type = error_type
+            return self
+
+        if self.coercion_type is not None:
+            # This is purely for error checking purposes!
+            node = NameNode(self.pos, name='', type=self.coercion_type)
+            node.coerce_to(dst_type, env)
+
+        if dst_type.is_memoryviewslice:
+            from . import MemoryView
+            if not src.type.is_memoryviewslice:
+                if src.type.is_pyobject:
+                    src = CoerceToMemViewSliceNode(src, dst_type, env)
+                elif src.type.is_array:
                     src = CythonArrayNode.from_carray(src, env).coerce_to(dst_type, env)
-                elif not src_type.is_error: 
-                    error(self.pos, 
+                elif not src_type.is_error:
+                    error(self.pos,
                           "Cannot convert '%s' to memoryviewslice" % (src_type,))
             else:
                 if src.type.writable_needed:
@@ -938,18 +938,18 @@ class ExprNode(Node):
                     else:
                         msg = "Different base types for memoryviews (%s, %s)"
                         tup = src.type.dtype, dst_type.dtype
- 
+
                     error(self.pos, msg % tup)
- 
-        elif dst_type.is_pyobject: 
-            if not src.type.is_pyobject: 
-                if dst_type is bytes_type and src.type.is_int: 
-                    src = CoerceIntToBytesNode(src, env) 
-                else: 
-                    src = CoerceToPyTypeNode(src, env, type=dst_type) 
-            if not src.type.subtype_of(dst_type): 
-                if src.constant_result is not None: 
-                    src = PyTypeTestNode(src, dst_type, env) 
+
+        elif dst_type.is_pyobject:
+            if not src.type.is_pyobject:
+                if dst_type is bytes_type and src.type.is_int:
+                    src = CoerceIntToBytesNode(src, env)
+                else:
+                    src = CoerceToPyTypeNode(src, env, type=dst_type)
+            if not src.type.subtype_of(dst_type):
+                if src.constant_result is not None:
+                    src = PyTypeTestNode(src, dst_type, env)
         elif is_pythran_expr(dst_type) and is_pythran_supported_type(src.type):
             # We let the compiler decide whether this is valid
             return src
@@ -960,58 +960,58 @@ class ExprNode(Node):
                 return src
             # Else, we need to convert the Pythran expression to a Python object
             src = CoerceToPyTypeNode(src, env, type=dst_type)
-        elif src.type.is_pyobject: 
+        elif src.type.is_pyobject:
             if used_as_reference and dst_type.is_cpp_class:
                 warning(
                     self.pos,
                     "Cannot pass Python object as C++ data structure reference (%s &), will pass by copy." % dst_type)
-            src = CoerceFromPyTypeNode(dst_type, src, env) 
-        elif (dst_type.is_complex 
-              and src_type != dst_type 
-              and dst_type.assignable_from(src_type)): 
-            src = CoerceToComplexNode(src, dst_type, env) 
-        else: # neither src nor dst are py types 
-            # Added the string comparison, since for c types that 
-            # is enough, but Cython gets confused when the types are 
-            # in different pxi files. 
+            src = CoerceFromPyTypeNode(dst_type, src, env)
+        elif (dst_type.is_complex
+              and src_type != dst_type
+              and dst_type.assignable_from(src_type)):
+            src = CoerceToComplexNode(src, dst_type, env)
+        else: # neither src nor dst are py types
+            # Added the string comparison, since for c types that
+            # is enough, but Cython gets confused when the types are
+            # in different pxi files.
             # TODO: Remove this hack and require shared declarations.
             if not (src.type == dst_type or str(src.type) == str(dst_type) or dst_type.assignable_from(src_type)):
-                self.fail_assignment(dst_type) 
-        return src 
- 
-    def fail_assignment(self, dst_type): 
-        error(self.pos, "Cannot assign type '%s' to '%s'" % (self.type, dst_type)) 
- 
-    def check_for_coercion_error(self, dst_type, env, fail=False, default=None): 
-        if fail and not default: 
-            default = "Cannot assign type '%(FROM)s' to '%(TO)s'" 
-        message = find_coercion_error((self.type, dst_type), default, env) 
-        if message is not None: 
-            error(self.pos, message % {'FROM': self.type, 'TO': dst_type}) 
-            return True 
-        if fail: 
-            self.fail_assignment(dst_type) 
-            return True 
-        return False 
- 
-    def coerce_to_pyobject(self, env): 
-        return self.coerce_to(PyrexTypes.py_object_type, env) 
- 
-    def coerce_to_boolean(self, env): 
-        #  Coerce result to something acceptable as 
-        #  a boolean value. 
- 
-        # if it's constant, calculate the result now 
-        if self.has_constant_result(): 
-            bool_value = bool(self.constant_result) 
-            return BoolNode(self.pos, value=bool_value, 
-                            constant_result=bool_value) 
- 
-        type = self.type 
-        if type.is_enum or type.is_error: 
-            return self 
-        elif type.is_pyobject or type.is_int or type.is_ptr or type.is_float: 
-            return CoerceToBooleanNode(self, env) 
+                self.fail_assignment(dst_type)
+        return src
+
+    def fail_assignment(self, dst_type):
+        error(self.pos, "Cannot assign type '%s' to '%s'" % (self.type, dst_type))
+
+    def check_for_coercion_error(self, dst_type, env, fail=False, default=None):
+        if fail and not default:
+            default = "Cannot assign type '%(FROM)s' to '%(TO)s'"
+        message = find_coercion_error((self.type, dst_type), default, env)
+        if message is not None:
+            error(self.pos, message % {'FROM': self.type, 'TO': dst_type})
+            return True
+        if fail:
+            self.fail_assignment(dst_type)
+            return True
+        return False
+
+    def coerce_to_pyobject(self, env):
+        return self.coerce_to(PyrexTypes.py_object_type, env)
+
+    def coerce_to_boolean(self, env):
+        #  Coerce result to something acceptable as
+        #  a boolean value.
+
+        # if it's constant, calculate the result now
+        if self.has_constant_result():
+            bool_value = bool(self.constant_result)
+            return BoolNode(self.pos, value=bool_value,
+                            constant_result=bool_value)
+
+        type = self.type
+        if type.is_enum or type.is_error:
+            return self
+        elif type.is_pyobject or type.is_int or type.is_ptr or type.is_float:
+            return CoerceToBooleanNode(self, env)
         elif type.is_cpp_class and type.scope and type.scope.lookup("operator bool"):
             return SimpleCallNode(
                 self.pos,
@@ -1022,319 +1022,319 @@ class ExprNode(Node):
             bool_value = len(type.components) == 0
             return BoolNode(self.pos, value=bool_value,
                             constant_result=bool_value)
-        else: 
-            error(self.pos, "Type '%s' not acceptable as a boolean" % type) 
-            return self 
- 
-    def coerce_to_integer(self, env): 
-        # If not already some C integer type, coerce to longint. 
-        if self.type.is_int: 
-            return self 
-        else: 
-            return self.coerce_to(PyrexTypes.c_long_type, env) 
- 
-    def coerce_to_temp(self, env): 
-        #  Ensure that the result is in a temporary. 
-        if self.result_in_temp(): 
-            return self 
-        else: 
-            return CoerceToTempNode(self, env) 
- 
-    def coerce_to_simple(self, env): 
-        #  Ensure that the result is simple (see is_simple). 
-        if self.is_simple(): 
-            return self 
-        else: 
-            return self.coerce_to_temp(env) 
- 
-    def is_simple(self): 
-        #  A node is simple if its result is something that can 
-        #  be referred to without performing any operations, e.g. 
-        #  a constant, local var, C global var, struct member 
-        #  reference, or temporary. 
-        return self.result_in_temp() 
- 
-    def may_be_none(self): 
-        if self.type and not (self.type.is_pyobject or 
-                              self.type.is_memoryviewslice): 
-            return False 
-        if self.has_constant_result(): 
-            return self.constant_result is not None 
-        return True 
- 
-    def as_cython_attribute(self): 
-        return None 
- 
-    def as_none_safe_node(self, message, error="PyExc_TypeError", format_args=()): 
-        # Wraps the node in a NoneCheckNode if it is not known to be 
-        # not-None (e.g. because it is a Python literal). 
-        if self.may_be_none(): 
-            return NoneCheckNode(self, error, message, format_args) 
-        else: 
-            return self 
- 
-    @classmethod 
-    def from_node(cls, node, **kwargs): 
-        """Instantiate this node class from another node, properly 
-        copying over all attributes that one would forget otherwise. 
-        """ 
-        attributes = "cf_state cf_maybe_null cf_is_null constant_result".split() 
-        for attr_name in attributes: 
-            if attr_name in kwargs: 
-                continue 
-            try: 
-                value = getattr(node, attr_name) 
-            except AttributeError: 
-                pass 
-            else: 
-                kwargs[attr_name] = value 
-        return cls(node.pos, **kwargs) 
- 
- 
-class AtomicExprNode(ExprNode): 
-    #  Abstract base class for expression nodes which have 
-    #  no sub-expressions. 
- 
-    subexprs = [] 
- 
-    # Override to optimize -- we know we have no children 
-    def generate_subexpr_evaluation_code(self, code): 
-        pass 
-    def generate_subexpr_disposal_code(self, code): 
-        pass 
- 
-class PyConstNode(AtomicExprNode): 
-    #  Abstract base class for constant Python values. 
- 
-    is_literal = 1 
-    type = py_object_type 
- 
-    def is_simple(self): 
-        return 1 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def calculate_result_code(self): 
-        return self.value 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
- 
-class NoneNode(PyConstNode): 
-    #  The constant value None 
- 
-    is_none = 1 
-    value = "Py_None" 
- 
-    constant_result = None 
- 
-    nogil_check = None 
- 
-    def compile_time_value(self, denv): 
-        return None 
- 
-    def may_be_none(self): 
-        return True 
- 
+        else:
+            error(self.pos, "Type '%s' not acceptable as a boolean" % type)
+            return self
+
+    def coerce_to_integer(self, env):
+        # If not already some C integer type, coerce to longint.
+        if self.type.is_int:
+            return self
+        else:
+            return self.coerce_to(PyrexTypes.c_long_type, env)
+
+    def coerce_to_temp(self, env):
+        #  Ensure that the result is in a temporary.
+        if self.result_in_temp():
+            return self
+        else:
+            return CoerceToTempNode(self, env)
+
+    def coerce_to_simple(self, env):
+        #  Ensure that the result is simple (see is_simple).
+        if self.is_simple():
+            return self
+        else:
+            return self.coerce_to_temp(env)
+
+    def is_simple(self):
+        #  A node is simple if its result is something that can
+        #  be referred to without performing any operations, e.g.
+        #  a constant, local var, C global var, struct member
+        #  reference, or temporary.
+        return self.result_in_temp()
+
+    def may_be_none(self):
+        if self.type and not (self.type.is_pyobject or
+                              self.type.is_memoryviewslice):
+            return False
+        if self.has_constant_result():
+            return self.constant_result is not None
+        return True
+
+    def as_cython_attribute(self):
+        return None
+
+    def as_none_safe_node(self, message, error="PyExc_TypeError", format_args=()):
+        # Wraps the node in a NoneCheckNode if it is not known to be
+        # not-None (e.g. because it is a Python literal).
+        if self.may_be_none():
+            return NoneCheckNode(self, error, message, format_args)
+        else:
+            return self
+
+    @classmethod
+    def from_node(cls, node, **kwargs):
+        """Instantiate this node class from another node, properly
+        copying over all attributes that one would forget otherwise.
+        """
+        attributes = "cf_state cf_maybe_null cf_is_null constant_result".split()
+        for attr_name in attributes:
+            if attr_name in kwargs:
+                continue
+            try:
+                value = getattr(node, attr_name)
+            except AttributeError:
+                pass
+            else:
+                kwargs[attr_name] = value
+        return cls(node.pos, **kwargs)
+
+
+class AtomicExprNode(ExprNode):
+    #  Abstract base class for expression nodes which have
+    #  no sub-expressions.
+
+    subexprs = []
+
+    # Override to optimize -- we know we have no children
+    def generate_subexpr_evaluation_code(self, code):
+        pass
+    def generate_subexpr_disposal_code(self, code):
+        pass
+
+class PyConstNode(AtomicExprNode):
+    #  Abstract base class for constant Python values.
+
+    is_literal = 1
+    type = py_object_type
+
+    def is_simple(self):
+        return 1
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        return self
+
+    def calculate_result_code(self):
+        return self.value
+
+    def generate_result_code(self, code):
+        pass
+
+
+class NoneNode(PyConstNode):
+    #  The constant value None
+
+    is_none = 1
+    value = "Py_None"
+
+    constant_result = None
+
+    nogil_check = None
+
+    def compile_time_value(self, denv):
+        return None
+
+    def may_be_none(self):
+        return True
+
     def coerce_to(self, dst_type, env):
         if not (dst_type.is_pyobject or dst_type.is_memoryviewslice or dst_type.is_error):
             # Catch this error early and loudly.
             error(self.pos, "Cannot assign None to %s" % dst_type)
         return super(NoneNode, self).coerce_to(dst_type, env)
- 
-
-class EllipsisNode(PyConstNode): 
-    #  '...' in a subscript list. 
- 
-    value = "Py_Ellipsis" 
- 
-    constant_result = Ellipsis 
- 
-    def compile_time_value(self, denv): 
-        return Ellipsis 
- 
- 
-class ConstNode(AtomicExprNode): 
-    # Abstract base type for literal constant nodes. 
-    # 
-    # value     string      C code fragment 
- 
-    is_literal = 1 
-    nogil_check = None 
- 
-    def is_simple(self): 
-        return 1 
- 
-    def nonlocally_immutable(self): 
-        return 1 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def analyse_types(self, env): 
-        return self  # Types are held in class variables 
- 
-    def check_const(self): 
-        return True 
- 
-    def get_constant_c_result_code(self): 
-        return self.calculate_result_code() 
- 
-    def calculate_result_code(self): 
-        return str(self.value) 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
- 
-class BoolNode(ConstNode): 
-    type = PyrexTypes.c_bint_type 
-    #  The constant value True or False 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = self.value 
- 
-    def compile_time_value(self, denv): 
-        return self.value 
- 
-    def calculate_result_code(self): 
-        if self.type.is_pyobject: 
-            return self.value and 'Py_True' or 'Py_False' 
-        else: 
-            return str(int(self.value)) 
- 
-    def coerce_to(self, dst_type, env): 
+
+
+class EllipsisNode(PyConstNode):
+    #  '...' in a subscript list.
+
+    value = "Py_Ellipsis"
+
+    constant_result = Ellipsis
+
+    def compile_time_value(self, denv):
+        return Ellipsis
+
+
+class ConstNode(AtomicExprNode):
+    # Abstract base type for literal constant nodes.
+    #
+    # value     string      C code fragment
+
+    is_literal = 1
+    nogil_check = None
+
+    def is_simple(self):
+        return 1
+
+    def nonlocally_immutable(self):
+        return 1
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        return self  # Types are held in class variables
+
+    def check_const(self):
+        return True
+
+    def get_constant_c_result_code(self):
+        return self.calculate_result_code()
+
+    def calculate_result_code(self):
+        return str(self.value)
+
+    def generate_result_code(self, code):
+        pass
+
+
+class BoolNode(ConstNode):
+    type = PyrexTypes.c_bint_type
+    #  The constant value True or False
+
+    def calculate_constant_result(self):
+        self.constant_result = self.value
+
+    def compile_time_value(self, denv):
+        return self.value
+
+    def calculate_result_code(self):
+        if self.type.is_pyobject:
+            return self.value and 'Py_True' or 'Py_False'
+        else:
+            return str(int(self.value))
+
+    def coerce_to(self, dst_type, env):
         if dst_type == self.type:
             return self
         if dst_type is py_object_type and self.type is Builtin.bool_type:
             return self
-        if dst_type.is_pyobject and self.type.is_int: 
-            return BoolNode( 
-                self.pos, value=self.value, 
-                constant_result=self.constant_result, 
-                type=Builtin.bool_type) 
-        if dst_type.is_int and self.type.is_pyobject: 
-            return BoolNode( 
-                self.pos, value=self.value, 
-                constant_result=self.constant_result, 
-                type=PyrexTypes.c_bint_type) 
-        return ConstNode.coerce_to(self, dst_type, env) 
- 
- 
-class NullNode(ConstNode): 
-    type = PyrexTypes.c_null_ptr_type 
-    value = "NULL" 
-    constant_result = 0 
- 
-    def get_constant_c_result_code(self): 
-        return self.value 
- 
- 
-class CharNode(ConstNode): 
-    type = PyrexTypes.c_char_type 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = ord(self.value) 
- 
-    def compile_time_value(self, denv): 
-        return ord(self.value) 
- 
-    def calculate_result_code(self): 
-        return "'%s'" % StringEncoding.escape_char(self.value) 
- 
- 
-class IntNode(ConstNode): 
- 
-    # unsigned     "" or "U" 
-    # longness     "" or "L" or "LL" 
-    # is_c_literal   True/False/None   creator considers this a C integer literal 
- 
-    unsigned = "" 
-    longness = "" 
-    is_c_literal = None # unknown 
- 
-    def __init__(self, pos, **kwds): 
-        ExprNode.__init__(self, pos, **kwds) 
-        if 'type' not in kwds: 
-            self.type = self.find_suitable_type_for_value() 
- 
-    def find_suitable_type_for_value(self): 
-        if self.constant_result is constant_value_not_set: 
-            try: 
-                self.calculate_constant_result() 
-            except ValueError: 
-                pass 
-        # we ignore 'is_c_literal = True' and instead map signed 32bit 
-        # integers as C long values 
-        if self.is_c_literal or \ 
+        if dst_type.is_pyobject and self.type.is_int:
+            return BoolNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=Builtin.bool_type)
+        if dst_type.is_int and self.type.is_pyobject:
+            return BoolNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=PyrexTypes.c_bint_type)
+        return ConstNode.coerce_to(self, dst_type, env)
+
+
+class NullNode(ConstNode):
+    type = PyrexTypes.c_null_ptr_type
+    value = "NULL"
+    constant_result = 0
+
+    def get_constant_c_result_code(self):
+        return self.value
+
+
+class CharNode(ConstNode):
+    type = PyrexTypes.c_char_type
+
+    def calculate_constant_result(self):
+        self.constant_result = ord(self.value)
+
+    def compile_time_value(self, denv):
+        return ord(self.value)
+
+    def calculate_result_code(self):
+        return "'%s'" % StringEncoding.escape_char(self.value)
+
+
+class IntNode(ConstNode):
+
+    # unsigned     "" or "U"
+    # longness     "" or "L" or "LL"
+    # is_c_literal   True/False/None   creator considers this a C integer literal
+
+    unsigned = ""
+    longness = ""
+    is_c_literal = None # unknown
+
+    def __init__(self, pos, **kwds):
+        ExprNode.__init__(self, pos, **kwds)
+        if 'type' not in kwds:
+            self.type = self.find_suitable_type_for_value()
+
+    def find_suitable_type_for_value(self):
+        if self.constant_result is constant_value_not_set:
+            try:
+                self.calculate_constant_result()
+            except ValueError:
+                pass
+        # we ignore 'is_c_literal = True' and instead map signed 32bit
+        # integers as C long values
+        if self.is_c_literal or \
                not self.has_constant_result() or \
-               self.unsigned or self.longness == 'LL': 
-            # clearly a C literal 
-            rank = (self.longness == 'LL') and 2 or 1 
-            suitable_type = PyrexTypes.modifiers_and_name_to_type[not self.unsigned, rank, "int"] 
-            if self.type: 
-                suitable_type = PyrexTypes.widest_numeric_type(suitable_type, self.type) 
-        else: 
-            # C literal or Python literal - split at 32bit boundary 
-            if -2**31 <= self.constant_result < 2**31: 
-                if self.type and self.type.is_int: 
-                    suitable_type = self.type 
-                else: 
-                    suitable_type = PyrexTypes.c_long_type 
-            else: 
-                suitable_type = PyrexTypes.py_object_type 
-        return suitable_type 
- 
-    def coerce_to(self, dst_type, env): 
-        if self.type is dst_type: 
-            return self 
-        elif dst_type.is_float: 
-            if self.has_constant_result(): 
-                return FloatNode(self.pos, value='%d.0' % int(self.constant_result), type=dst_type, 
-                                 constant_result=float(self.constant_result)) 
-            else: 
-                return FloatNode(self.pos, value=self.value, type=dst_type, 
-                                 constant_result=not_a_constant) 
-        if dst_type.is_numeric and not dst_type.is_complex: 
-            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result, 
+               self.unsigned or self.longness == 'LL':
+            # clearly a C literal
+            rank = (self.longness == 'LL') and 2 or 1
+            suitable_type = PyrexTypes.modifiers_and_name_to_type[not self.unsigned, rank, "int"]
+            if self.type:
+                suitable_type = PyrexTypes.widest_numeric_type(suitable_type, self.type)
+        else:
+            # C literal or Python literal - split at 32bit boundary
+            if -2**31 <= self.constant_result < 2**31:
+                if self.type and self.type.is_int:
+                    suitable_type = self.type
+                else:
+                    suitable_type = PyrexTypes.c_long_type
+            else:
+                suitable_type = PyrexTypes.py_object_type
+        return suitable_type
+
+    def coerce_to(self, dst_type, env):
+        if self.type is dst_type:
+            return self
+        elif dst_type.is_float:
+            if self.has_constant_result():
+                return FloatNode(self.pos, value='%d.0' % int(self.constant_result), type=dst_type,
+                                 constant_result=float(self.constant_result))
+            else:
+                return FloatNode(self.pos, value=self.value, type=dst_type,
+                                 constant_result=not_a_constant)
+        if dst_type.is_numeric and not dst_type.is_complex:
+            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
                            type=dst_type, is_c_literal=True,
-                           unsigned=self.unsigned, longness=self.longness) 
-            return node 
-        elif dst_type.is_pyobject: 
-            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result, 
+                           unsigned=self.unsigned, longness=self.longness)
+            return node
+        elif dst_type.is_pyobject:
+            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
                            type=PyrexTypes.py_object_type, is_c_literal=False,
-                           unsigned=self.unsigned, longness=self.longness) 
-        else: 
-            # FIXME: not setting the type here to keep it working with 
-            # complex numbers. Should they be special cased? 
-            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result, 
-                           unsigned=self.unsigned, longness=self.longness) 
-        # We still need to perform normal coerce_to processing on the 
-        # result, because we might be coercing to an extension type, 
-        # in which case a type test node will be needed. 
-        return ConstNode.coerce_to(node, dst_type, env) 
- 
-    def coerce_to_boolean(self, env): 
-        return IntNode( 
-            self.pos, value=self.value, 
-            constant_result=self.constant_result, 
-            type=PyrexTypes.c_bint_type, 
-            unsigned=self.unsigned, longness=self.longness) 
- 
-    def generate_evaluation_code(self, code): 
-        if self.type.is_pyobject: 
-            # pre-allocate a Python version of the number 
-            plain_integer_string = str(Utils.str_to_number(self.value)) 
-            self.result_code = code.get_py_int(plain_integer_string, self.longness) 
-        else: 
-            self.result_code = self.get_constant_c_result_code() 
- 
-    def get_constant_c_result_code(self): 
+                           unsigned=self.unsigned, longness=self.longness)
+        else:
+            # FIXME: not setting the type here to keep it working with
+            # complex numbers. Should they be special cased?
+            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
+                           unsigned=self.unsigned, longness=self.longness)
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+        return ConstNode.coerce_to(node, dst_type, env)
+
+    def coerce_to_boolean(self, env):
+        return IntNode(
+            self.pos, value=self.value,
+            constant_result=self.constant_result,
+            type=PyrexTypes.c_bint_type,
+            unsigned=self.unsigned, longness=self.longness)
+
+    def generate_evaluation_code(self, code):
+        if self.type.is_pyobject:
+            # pre-allocate a Python version of the number
+            plain_integer_string = str(Utils.str_to_number(self.value))
+            self.result_code = code.get_py_int(plain_integer_string, self.longness)
+        else:
+            self.result_code = self.get_constant_c_result_code()
+
+    def get_constant_c_result_code(self):
         unsigned, longness = self.unsigned, self.longness
         literal = self.value_as_c_integer_string()
         if not (unsigned or longness) and self.type.is_int and literal[0] == '-' and literal[1] != '0':
@@ -1344,9 +1344,9 @@ class IntNode(ConstNode):
             elif self.type.rank >= PyrexTypes.c_long_type.rank:
                 longness = 'L'
         return literal + unsigned + longness
- 
-    def value_as_c_integer_string(self): 
-        value = self.value 
+
+    def value_as_c_integer_string(self):
+        value = self.value
         if len(value) <= 2:
             # too short to go wrong (and simplifies code below)
             return value
@@ -1372,66 +1372,66 @@ class IntNode(ConstNode):
                 # but they do for hex (see C standard 6.4.4.1)
                 value = '0x%X' % int(value)
         return neg_sign + value
- 
-    def calculate_result_code(self): 
-        return self.result_code 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = Utils.str_to_number(self.value) 
- 
-    def compile_time_value(self, denv): 
-        return Utils.str_to_number(self.value) 
- 
-class FloatNode(ConstNode): 
-    type = PyrexTypes.c_double_type 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = float(self.value) 
- 
-    def compile_time_value(self, denv): 
-        return float(self.value) 
- 
-    def coerce_to(self, dst_type, env): 
-        if dst_type.is_pyobject and self.type.is_float: 
-            return FloatNode( 
-                self.pos, value=self.value, 
-                constant_result=self.constant_result, 
-                type=Builtin.float_type) 
-        if dst_type.is_float and self.type.is_pyobject: 
-            return FloatNode( 
-                self.pos, value=self.value, 
-                constant_result=self.constant_result, 
-                type=dst_type) 
-        return ConstNode.coerce_to(self, dst_type, env) 
- 
-    def calculate_result_code(self): 
-        return self.result_code 
- 
-    def get_constant_c_result_code(self): 
-        strval = self.value 
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def calculate_constant_result(self):
+        self.constant_result = Utils.str_to_number(self.value)
+
+    def compile_time_value(self, denv):
+        return Utils.str_to_number(self.value)
+
+class FloatNode(ConstNode):
+    type = PyrexTypes.c_double_type
+
+    def calculate_constant_result(self):
+        self.constant_result = float(self.value)
+
+    def compile_time_value(self, denv):
+        return float(self.value)
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_pyobject and self.type.is_float:
+            return FloatNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=Builtin.float_type)
+        if dst_type.is_float and self.type.is_pyobject:
+            return FloatNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=dst_type)
+        return ConstNode.coerce_to(self, dst_type, env)
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def get_constant_c_result_code(self):
+        strval = self.value
         assert isinstance(strval, basestring)
-        cmpval = repr(float(strval)) 
-        if cmpval == 'nan': 
-            return "(Py_HUGE_VAL * 0)" 
-        elif cmpval == 'inf': 
-            return "Py_HUGE_VAL" 
-        elif cmpval == '-inf': 
-            return "(-Py_HUGE_VAL)" 
-        else: 
-            return strval 
- 
-    def generate_evaluation_code(self, code): 
-        c_value = self.get_constant_c_result_code() 
-        if self.type.is_pyobject: 
-            self.result_code = code.get_py_float(self.value, c_value) 
-        else: 
-            self.result_code = c_value 
- 
- 
-def _analyse_name_as_type(name, pos, env): 
-    type = PyrexTypes.parse_basic_type(name) 
-    if type is not None: 
-        return type 
+        cmpval = repr(float(strval))
+        if cmpval == 'nan':
+            return "(Py_HUGE_VAL * 0)"
+        elif cmpval == 'inf':
+            return "Py_HUGE_VAL"
+        elif cmpval == '-inf':
+            return "(-Py_HUGE_VAL)"
+        else:
+            return strval
+
+    def generate_evaluation_code(self, code):
+        c_value = self.get_constant_c_result_code()
+        if self.type.is_pyobject:
+            self.result_code = code.get_py_float(self.value, c_value)
+        else:
+            self.result_code = c_value
+
+
+def _analyse_name_as_type(name, pos, env):
+    type = PyrexTypes.parse_basic_type(name)
+    if type is not None:
+        return type
 
     global_entry = env.global_scope().lookup(name)
     if global_entry and global_entry.type and (
@@ -1441,7 +1441,7 @@ def _analyse_name_as_type(name, pos, env):
             or global_entry.type.is_cpp_class):
         return global_entry.type
 
-    from .TreeFragment import TreeFragment 
+    from .TreeFragment import TreeFragment
     with local_errors(ignore=True):
         pos = (pos[0], pos[1], pos[2]-7)
         try:
@@ -1454,175 +1454,175 @@ def _analyse_name_as_type(name, pos, env):
                 sizeof_node = sizeof_node.analyse_types(env)
                 if isinstance(sizeof_node, SizeofTypeNode):
                     return sizeof_node.arg_type
-    return None 
- 
- 
-class BytesNode(ConstNode): 
-    # A char* or bytes literal 
-    # 
-    # value      BytesLiteral 
- 
-    is_string_literal = True 
-    # start off as Python 'bytes' to support len() in O(1) 
-    type = bytes_type 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = self.value 
- 
-    def as_sliced_node(self, start, stop, step=None): 
+    return None
+
+
+class BytesNode(ConstNode):
+    # A char* or bytes literal
+    #
+    # value      BytesLiteral
+
+    is_string_literal = True
+    # start off as Python 'bytes' to support len() in O(1)
+    type = bytes_type
+
+    def calculate_constant_result(self):
+        self.constant_result = self.value
+
+    def as_sliced_node(self, start, stop, step=None):
         value = StringEncoding.bytes_literal(self.value[start:stop:step], self.value.encoding)
         return BytesNode(self.pos, value=value, constant_result=value)
- 
-    def compile_time_value(self, denv): 
+
+    def compile_time_value(self, denv):
         return self.value.byteencode()
- 
-    def analyse_as_type(self, env): 
-        return _analyse_name_as_type(self.value.decode('ISO8859-1'), self.pos, env) 
- 
-    def can_coerce_to_char_literal(self): 
-        return len(self.value) == 1 
- 
-    def coerce_to_boolean(self, env): 
-        # This is special because testing a C char* for truth directly 
-        # would yield the wrong result. 
-        bool_value = bool(self.value) 
-        return BoolNode(self.pos, value=bool_value, constant_result=bool_value) 
- 
-    def coerce_to(self, dst_type, env): 
-        if self.type == dst_type: 
-            return self 
-        if dst_type.is_int: 
-            if not self.can_coerce_to_char_literal(): 
-                error(self.pos, "Only single-character string literals can be coerced into ints.") 
-                return self 
-            if dst_type.is_unicode_char: 
-                error(self.pos, "Bytes literals cannot coerce to Py_UNICODE/Py_UCS4, use a unicode literal instead.") 
-                return self 
-            return CharNode(self.pos, value=self.value, 
-                            constant_result=ord(self.value)) 
- 
+
+    def analyse_as_type(self, env):
+        return _analyse_name_as_type(self.value.decode('ISO8859-1'), self.pos, env)
+
+    def can_coerce_to_char_literal(self):
+        return len(self.value) == 1
+
+    def coerce_to_boolean(self, env):
+        # This is special because testing a C char* for truth directly
+        # would yield the wrong result.
+        bool_value = bool(self.value)
+        return BoolNode(self.pos, value=bool_value, constant_result=bool_value)
+
+    def coerce_to(self, dst_type, env):
+        if self.type == dst_type:
+            return self
+        if dst_type.is_int:
+            if not self.can_coerce_to_char_literal():
+                error(self.pos, "Only single-character string literals can be coerced into ints.")
+                return self
+            if dst_type.is_unicode_char:
+                error(self.pos, "Bytes literals cannot coerce to Py_UNICODE/Py_UCS4, use a unicode literal instead.")
+                return self
+            return CharNode(self.pos, value=self.value,
+                            constant_result=ord(self.value))
+
         node = BytesNode(self.pos, value=self.value, constant_result=self.constant_result)
-        if dst_type.is_pyobject: 
-            if dst_type in (py_object_type, Builtin.bytes_type): 
-                node.type = Builtin.bytes_type 
-            else: 
-                self.check_for_coercion_error(dst_type, env, fail=True) 
+        if dst_type.is_pyobject:
+            if dst_type in (py_object_type, Builtin.bytes_type):
+                node.type = Builtin.bytes_type
+            else:
+                self.check_for_coercion_error(dst_type, env, fail=True)
             return node
         elif dst_type in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
-            node.type = dst_type 
-            return node 
+            node.type = dst_type
+            return node
         elif dst_type in (PyrexTypes.c_uchar_ptr_type, PyrexTypes.c_const_uchar_ptr_type, PyrexTypes.c_void_ptr_type):
             node.type = (PyrexTypes.c_const_char_ptr_type if dst_type == PyrexTypes.c_const_uchar_ptr_type
                          else PyrexTypes.c_char_ptr_type)
             return CastNode(node, dst_type)
-        elif dst_type.assignable_from(PyrexTypes.c_char_ptr_type): 
+        elif dst_type.assignable_from(PyrexTypes.c_char_ptr_type):
             # Exclude the case of passing a C string literal into a non-const C++ string.
             if not dst_type.is_cpp_class or dst_type.is_const:
                 node.type = dst_type
                 return node
- 
-        # We still need to perform normal coerce_to processing on the 
-        # result, because we might be coercing to an extension type, 
-        # in which case a type test node will be needed. 
-        return ConstNode.coerce_to(node, dst_type, env) 
- 
-    def generate_evaluation_code(self, code): 
-        if self.type.is_pyobject: 
+
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+        return ConstNode.coerce_to(node, dst_type, env)
+
+    def generate_evaluation_code(self, code):
+        if self.type.is_pyobject:
             result = code.get_py_string_const(self.value)
         elif self.type.is_const:
             result = code.get_string_const(self.value)
-        else: 
+        else:
             # not const => use plain C string literal and cast to mutable type
             literal = self.value.as_c_string_literal()
             # C++ may require a cast
             result = typecast(self.type, PyrexTypes.c_void_ptr_type, literal)
         self.result_code = result
- 
-    def get_constant_c_result_code(self): 
-        return None # FIXME 
- 
-    def calculate_result_code(self): 
-        return self.result_code 
- 
- 
-class UnicodeNode(ConstNode): 
-    # A Py_UNICODE* or unicode literal 
-    # 
-    # value        EncodedString 
-    # bytes_value  BytesLiteral    the literal parsed as bytes string 
-    #                              ('-3' unicode literals only) 
- 
-    is_string_literal = True 
-    bytes_value = None 
-    type = unicode_type 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = self.value 
- 
-    def analyse_as_type(self, env): 
-        return _analyse_name_as_type(self.value, self.pos, env) 
- 
-    def as_sliced_node(self, start, stop, step=None): 
-        if StringEncoding.string_contains_surrogates(self.value[:stop]): 
-            # this is unsafe as it may give different results 
-            # in different runtimes 
-            return None 
-        value = StringEncoding.EncodedString(self.value[start:stop:step]) 
-        value.encoding = self.value.encoding 
-        if self.bytes_value is not None: 
+
+    def get_constant_c_result_code(self):
+        return None # FIXME
+
+    def calculate_result_code(self):
+        return self.result_code
+
+
+class UnicodeNode(ConstNode):
+    # A Py_UNICODE* or unicode literal
+    #
+    # value        EncodedString
+    # bytes_value  BytesLiteral    the literal parsed as bytes string
+    #                              ('-3' unicode literals only)
+
+    is_string_literal = True
+    bytes_value = None
+    type = unicode_type
+
+    def calculate_constant_result(self):
+        self.constant_result = self.value
+
+    def analyse_as_type(self, env):
+        return _analyse_name_as_type(self.value, self.pos, env)
+
+    def as_sliced_node(self, start, stop, step=None):
+        if StringEncoding.string_contains_surrogates(self.value[:stop]):
+            # this is unsafe as it may give different results
+            # in different runtimes
+            return None
+        value = StringEncoding.EncodedString(self.value[start:stop:step])
+        value.encoding = self.value.encoding
+        if self.bytes_value is not None:
             bytes_value = StringEncoding.bytes_literal(
                 self.bytes_value[start:stop:step], self.bytes_value.encoding)
-        else: 
-            bytes_value = None 
-        return UnicodeNode( 
-            self.pos, value=value, bytes_value=bytes_value, 
-            constant_result=value) 
- 
-    def coerce_to(self, dst_type, env): 
-        if dst_type is self.type: 
-            pass 
-        elif dst_type.is_unicode_char: 
-            if not self.can_coerce_to_char_literal(): 
-                error(self.pos, 
-                      "Only single-character Unicode string literals or " 
-                      "surrogate pairs can be coerced into Py_UCS4/Py_UNICODE.") 
-                return self 
-            int_value = ord(self.value) 
-            return IntNode(self.pos, type=dst_type, value=str(int_value), 
-                           constant_result=int_value) 
-        elif not dst_type.is_pyobject: 
-            if dst_type.is_string and self.bytes_value is not None: 
-                # special case: '-3' enforced unicode literal used in a 
-                # C char* context 
-                return BytesNode(self.pos, value=self.bytes_value 
-                    ).coerce_to(dst_type, env) 
-            if dst_type.is_pyunicode_ptr: 
-                node = UnicodeNode(self.pos, value=self.value) 
-                node.type = dst_type 
-                return node 
-            error(self.pos, 
-                  "Unicode literals do not support coercion to C types other " 
-                  "than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* " 
-                  "(for strings).") 
-        elif dst_type not in (py_object_type, Builtin.basestring_type): 
-            self.check_for_coercion_error(dst_type, env, fail=True) 
-        return self 
- 
-    def can_coerce_to_char_literal(self): 
-        return len(self.value) == 1 
-            ## or (len(self.value) == 2 
-            ##     and (0xD800 <= self.value[0] <= 0xDBFF) 
-            ##     and (0xDC00 <= self.value[1] <= 0xDFFF)) 
- 
-    def coerce_to_boolean(self, env): 
-        bool_value = bool(self.value) 
-        return BoolNode(self.pos, value=bool_value, constant_result=bool_value) 
- 
-    def contains_surrogates(self): 
-        return StringEncoding.string_contains_surrogates(self.value) 
- 
-    def generate_evaluation_code(self, code): 
-        if self.type.is_pyobject: 
+        else:
+            bytes_value = None
+        return UnicodeNode(
+            self.pos, value=value, bytes_value=bytes_value,
+            constant_result=value)
+
+    def coerce_to(self, dst_type, env):
+        if dst_type is self.type:
+            pass
+        elif dst_type.is_unicode_char:
+            if not self.can_coerce_to_char_literal():
+                error(self.pos,
+                      "Only single-character Unicode string literals or "
+                      "surrogate pairs can be coerced into Py_UCS4/Py_UNICODE.")
+                return self
+            int_value = ord(self.value)
+            return IntNode(self.pos, type=dst_type, value=str(int_value),
+                           constant_result=int_value)
+        elif not dst_type.is_pyobject:
+            if dst_type.is_string and self.bytes_value is not None:
+                # special case: '-3' enforced unicode literal used in a
+                # C char* context
+                return BytesNode(self.pos, value=self.bytes_value
+                    ).coerce_to(dst_type, env)
+            if dst_type.is_pyunicode_ptr:
+                node = UnicodeNode(self.pos, value=self.value)
+                node.type = dst_type
+                return node
+            error(self.pos,
+                  "Unicode literals do not support coercion to C types other "
+                  "than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* "
+                  "(for strings).")
+        elif dst_type not in (py_object_type, Builtin.basestring_type):
+            self.check_for_coercion_error(dst_type, env, fail=True)
+        return self
+
+    def can_coerce_to_char_literal(self):
+        return len(self.value) == 1
+            ## or (len(self.value) == 2
+            ##     and (0xD800 <= self.value[0] <= 0xDBFF)
+            ##     and (0xDC00 <= self.value[1] <= 0xDFFF))
+
+    def coerce_to_boolean(self, env):
+        bool_value = bool(self.value)
+        return BoolNode(self.pos, value=bool_value, constant_result=bool_value)
+
+    def contains_surrogates(self):
+        return StringEncoding.string_contains_surrogates(self.value)
+
+    def generate_evaluation_code(self, code):
+        if self.type.is_pyobject:
             # FIXME: this should go away entirely!
             # Since string_contains_lone_surrogates() returns False for surrogate pairs in Py2/UCS2,
             # Py2 can generate different code from Py3 here.  Let's hope we get away with claiming that
@@ -1630,8 +1630,8 @@ class UnicodeNode(ConstNode):
             # on P16/32bit Unicode platforms.
             if StringEncoding.string_contains_lone_surrogates(self.value):
                 # lone (unpaired) surrogates are not really portable and cannot be
-                # decoded by the UTF-8 codec in Py3.3 
-                self.result_code = code.get_py_const(py_object_type, 'ustring') 
+                # decoded by the UTF-8 codec in Py3.3
+                self.result_code = code.get_py_const(py_object_type, 'ustring')
                 data_cname = code.get_string_const(
                     StringEncoding.BytesLiteral(self.value.encode('unicode_escape')))
                 const_code = code.get_cached_constants_writer(self.result_code)
@@ -1640,86 +1640,86 @@ class UnicodeNode(ConstNode):
                 const_code.mark_pos(self.pos)
                 const_code.putln(
                     "%s = PyUnicode_DecodeUnicodeEscape(%s, sizeof(%s) - 1, NULL); %s" % (
-                        self.result_code, 
-                        data_cname, 
-                        data_cname, 
+                        self.result_code,
+                        data_cname,
+                        data_cname,
                         const_code.error_goto_if_null(self.result_code, self.pos)))
                 const_code.put_error_if_neg(
-                    self.pos, "__Pyx_PyUnicode_READY(%s)" % self.result_code) 
-            else: 
-                self.result_code = code.get_py_string_const(self.value) 
-        else: 
-            self.result_code = code.get_pyunicode_ptr_const(self.value) 
- 
-    def calculate_result_code(self): 
-        return self.result_code 
- 
-    def compile_time_value(self, env): 
-        return self.value 
- 
- 
-class StringNode(PyConstNode): 
-    # A Python str object, i.e. a byte string in Python 2.x and a 
-    # unicode string in Python 3.x 
-    # 
-    # value          BytesLiteral (or EncodedString with ASCII content) 
-    # unicode_value  EncodedString or None 
-    # is_identifier  boolean 
- 
-    type = str_type 
-    is_string_literal = True 
-    is_identifier = None 
-    unicode_value = None 
- 
-    def calculate_constant_result(self): 
-        if self.unicode_value is not None: 
-            # only the Unicode value is portable across Py2/3 
-            self.constant_result = self.unicode_value 
- 
-    def analyse_as_type(self, env): 
-        return _analyse_name_as_type(self.unicode_value or self.value.decode('ISO8859-1'), self.pos, env) 
- 
-    def as_sliced_node(self, start, stop, step=None): 
-        value = type(self.value)(self.value[start:stop:step]) 
-        value.encoding = self.value.encoding 
-        if self.unicode_value is not None: 
-            if StringEncoding.string_contains_surrogates(self.unicode_value[:stop]): 
-                # this is unsafe as it may give different results in different runtimes 
-                return None 
-            unicode_value = StringEncoding.EncodedString( 
-                self.unicode_value[start:stop:step]) 
-        else: 
-            unicode_value = None 
-        return StringNode( 
-            self.pos, value=value, unicode_value=unicode_value, 
-            constant_result=value, is_identifier=self.is_identifier) 
- 
-    def coerce_to(self, dst_type, env): 
-        if dst_type is not py_object_type and not str_type.subtype_of(dst_type): 
-#            if dst_type is Builtin.bytes_type: 
-#                # special case: bytes = 'str literal' 
-#                return BytesNode(self.pos, value=self.value) 
-            if not dst_type.is_pyobject: 
-                return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env) 
-            if dst_type is not Builtin.basestring_type: 
-                self.check_for_coercion_error(dst_type, env, fail=True) 
-        return self 
- 
-    def can_coerce_to_char_literal(self): 
-        return not self.is_identifier and len(self.value) == 1 
- 
-    def generate_evaluation_code(self, code): 
-        self.result_code = code.get_py_string_const( 
-            self.value, identifier=self.is_identifier, is_str=True, 
-            unicode_value=self.unicode_value) 
- 
-    def get_constant_c_result_code(self): 
-        return None 
- 
-    def calculate_result_code(self): 
-        return self.result_code 
- 
-    def compile_time_value(self, env): 
+                    self.pos, "__Pyx_PyUnicode_READY(%s)" % self.result_code)
+            else:
+                self.result_code = code.get_py_string_const(self.value)
+        else:
+            self.result_code = code.get_pyunicode_ptr_const(self.value)
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def compile_time_value(self, env):
+        return self.value
+
+
+class StringNode(PyConstNode):
+    # A Python str object, i.e. a byte string in Python 2.x and a
+    # unicode string in Python 3.x
+    #
+    # value          BytesLiteral (or EncodedString with ASCII content)
+    # unicode_value  EncodedString or None
+    # is_identifier  boolean
+
+    type = str_type
+    is_string_literal = True
+    is_identifier = None
+    unicode_value = None
+
+    def calculate_constant_result(self):
+        if self.unicode_value is not None:
+            # only the Unicode value is portable across Py2/3
+            self.constant_result = self.unicode_value
+
+    def analyse_as_type(self, env):
+        return _analyse_name_as_type(self.unicode_value or self.value.decode('ISO8859-1'), self.pos, env)
+
+    def as_sliced_node(self, start, stop, step=None):
+        value = type(self.value)(self.value[start:stop:step])
+        value.encoding = self.value.encoding
+        if self.unicode_value is not None:
+            if StringEncoding.string_contains_surrogates(self.unicode_value[:stop]):
+                # this is unsafe as it may give different results in different runtimes
+                return None
+            unicode_value = StringEncoding.EncodedString(
+                self.unicode_value[start:stop:step])
+        else:
+            unicode_value = None
+        return StringNode(
+            self.pos, value=value, unicode_value=unicode_value,
+            constant_result=value, is_identifier=self.is_identifier)
+
+    def coerce_to(self, dst_type, env):
+        if dst_type is not py_object_type and not str_type.subtype_of(dst_type):
+#            if dst_type is Builtin.bytes_type:
+#                # special case: bytes = 'str literal'
+#                return BytesNode(self.pos, value=self.value)
+            if not dst_type.is_pyobject:
+                return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env)
+            if dst_type is not Builtin.basestring_type:
+                self.check_for_coercion_error(dst_type, env, fail=True)
+        return self
+
+    def can_coerce_to_char_literal(self):
+        return not self.is_identifier and len(self.value) == 1
+
+    def generate_evaluation_code(self, code):
+        self.result_code = code.get_py_string_const(
+            self.value, identifier=self.is_identifier, is_str=True,
+            unicode_value=self.unicode_value)
+
+    def get_constant_c_result_code(self):
+        return None
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def compile_time_value(self, env):
         if self.value.is_unicode:
             return self.value
         if not IS_PYTHON3:
@@ -1729,193 +1729,193 @@ class StringNode(PyConstNode):
         if self.unicode_value is not None:
             return self.unicode_value
         return self.value.decode('iso8859-1')
- 
- 
-class IdentifierStringNode(StringNode): 
-    # A special str value that represents an identifier (bytes in Py2, 
-    # unicode in Py3). 
-    is_identifier = True 
- 
- 
-class ImagNode(AtomicExprNode): 
-    #  Imaginary number literal 
-    # 
+
+
+class IdentifierStringNode(StringNode):
+    # A special str value that represents an identifier (bytes in Py2,
+    # unicode in Py3).
+    is_identifier = True
+
+
+class ImagNode(AtomicExprNode):
+    #  Imaginary number literal
+    #
     #  value   string    imaginary part (float value)
- 
-    type = PyrexTypes.c_double_complex_type 
- 
-    def calculate_constant_result(self): 
+
+    type = PyrexTypes.c_double_complex_type
+
+    def calculate_constant_result(self):
         self.constant_result = complex(0.0, float(self.value))
- 
-    def compile_time_value(self, denv): 
+
+    def compile_time_value(self, denv):
         return complex(0.0, float(self.value))
- 
-    def analyse_types(self, env): 
-        self.type.create_declaration_utility_code(env) 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def coerce_to(self, dst_type, env): 
-        if self.type is dst_type: 
-            return self 
-        node = ImagNode(self.pos, value=self.value) 
-        if dst_type.is_pyobject: 
-            node.is_temp = 1 
+
+    def analyse_types(self, env):
+        self.type.create_declaration_utility_code(env)
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def coerce_to(self, dst_type, env):
+        if self.type is dst_type:
+            return self
+        node = ImagNode(self.pos, value=self.value)
+        if dst_type.is_pyobject:
+            node.is_temp = 1
             node.type = Builtin.complex_type
-        # We still need to perform normal coerce_to processing on the 
-        # result, because we might be coercing to an extension type, 
-        # in which case a type test node will be needed. 
-        return AtomicExprNode.coerce_to(node, dst_type, env) 
- 
-    gil_message = "Constructing complex number" 
- 
-    def calculate_result_code(self): 
-        if self.type.is_pyobject: 
-            return self.result() 
-        else: 
-            return "%s(0, %r)" % (self.type.from_parts, float(self.value)) 
- 
-    def generate_result_code(self, code): 
-        if self.type.is_pyobject: 
-            code.putln( 
-                "%s = PyComplex_FromDoubles(0.0, %r); %s" % ( 
-                    self.result(), 
-                    float(self.value), 
-                    code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
- 
- 
-class NewExprNode(AtomicExprNode): 
- 
-    # C++ new statement 
-    # 
-    # cppclass              node                 c++ class to create 
- 
-    type = None 
- 
-    def infer_type(self, env): 
-        type = self.cppclass.analyse_as_type(env) 
-        if type is None or not type.is_cpp_class: 
-            error(self.pos, "new operator can only be applied to a C++ class") 
-            self.type = error_type 
-            return 
-        self.cpp_check(env) 
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+        return AtomicExprNode.coerce_to(node, dst_type, env)
+
+    gil_message = "Constructing complex number"
+
+    def calculate_result_code(self):
+        if self.type.is_pyobject:
+            return self.result()
+        else:
+            return "%s(0, %r)" % (self.type.from_parts, float(self.value))
+
+    def generate_result_code(self, code):
+        if self.type.is_pyobject:
+            code.putln(
+                "%s = PyComplex_FromDoubles(0.0, %r); %s" % (
+                    self.result(),
+                    float(self.value),
+                    code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+
+
+class NewExprNode(AtomicExprNode):
+
+    # C++ new statement
+    #
+    # cppclass              node                 c++ class to create
+
+    type = None
+
+    def infer_type(self, env):
+        type = self.cppclass.analyse_as_type(env)
+        if type is None or not type.is_cpp_class:
+            error(self.pos, "new operator can only be applied to a C++ class")
+            self.type = error_type
+            return
+        self.cpp_check(env)
         constructor = type.get_constructor(self.pos)
-        self.class_type = type 
-        self.entry = constructor 
-        self.type = constructor.type 
-        return self.type 
- 
-    def analyse_types(self, env): 
-        if self.type is None: 
-            self.infer_type(env) 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def calculate_result_code(self): 
+        self.class_type = type
+        self.entry = constructor
+        self.type = constructor.type
+        return self.type
+
+    def analyse_types(self, env):
+        if self.type is None:
+            self.infer_type(env)
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        pass
+
+    def calculate_result_code(self):
         return "new " + self.class_type.empty_declaration_code()
- 
- 
-class NameNode(AtomicExprNode): 
-    #  Reference to a local or global variable name. 
-    # 
-    #  name            string    Python name of the variable 
-    #  entry           Entry     Symbol table entry 
-    #  type_entry      Entry     For extension type names, the original type entry 
-    #  cf_is_null      boolean   Is uninitialized before this node 
-    #  cf_maybe_null   boolean   Maybe uninitialized before this node 
-    #  allow_null      boolean   Don't raise UnboundLocalError 
-    #  nogil           boolean   Whether it is used in a nogil context 
- 
-    is_name = True 
-    is_cython_module = False 
-    cython_attribute = None 
-    lhs_of_first_assignment = False # TODO: remove me 
-    is_used_as_rvalue = 0 
-    entry = None 
-    type_entry = None 
-    cf_maybe_null = True 
-    cf_is_null = False 
-    allow_null = False 
-    nogil = False 
-    inferred_type = None 
- 
-    def as_cython_attribute(self): 
-        return self.cython_attribute 
- 
-    def type_dependencies(self, env): 
-        if self.entry is None: 
-            self.entry = env.lookup(self.name) 
-        if self.entry is not None and self.entry.type.is_unspecified: 
-            return (self,) 
-        else: 
-            return () 
- 
-    def infer_type(self, env): 
-        if self.entry is None: 
-            self.entry = env.lookup(self.name) 
-        if self.entry is None or self.entry.type is unspecified_type: 
-            if self.inferred_type is not None: 
-                return self.inferred_type 
-            return py_object_type 
-        elif (self.entry.type.is_extension_type or self.entry.type.is_builtin_type) and \ 
-                self.name == self.entry.type.name: 
-            # Unfortunately the type attribute of type objects 
-            # is used for the pointer to the type they represent. 
-            return type_type 
-        elif self.entry.type.is_cfunction: 
-            if self.entry.scope.is_builtin_scope: 
-                # special case: optimised builtin functions must be treated as Python objects 
-                return py_object_type 
-            else: 
-                # special case: referring to a C function must return its pointer 
-                return PyrexTypes.CPtrType(self.entry.type) 
-        else: 
-            # If entry is inferred as pyobject it's safe to use local 
-            # NameNode's inferred_type. 
-            if self.entry.type.is_pyobject and self.inferred_type: 
-                # Overflow may happen if integer 
-                if not (self.inferred_type.is_int and self.entry.might_overflow): 
-                    return self.inferred_type 
-            return self.entry.type 
- 
-    def compile_time_value(self, denv): 
-        try: 
-            return denv.lookup(self.name) 
-        except KeyError: 
-            error(self.pos, "Compile-time name '%s' not defined" % self.name) 
- 
-    def get_constant_c_result_code(self): 
-        if not self.entry or self.entry.type.is_pyobject: 
-            return None 
-        return self.entry.cname 
- 
-    def coerce_to(self, dst_type, env): 
-        #  If coercing to a generic pyobject and this is a builtin 
-        #  C function with a Python equivalent, manufacture a NameNode 
-        #  referring to the Python builtin. 
-        #print "NameNode.coerce_to:", self.name, dst_type ### 
-        if dst_type is py_object_type: 
-            entry = self.entry 
-            if entry and entry.is_cfunction: 
-                var_entry = entry.as_variable 
-                if var_entry: 
-                    if var_entry.is_builtin and var_entry.is_const: 
-                        var_entry = env.declare_builtin(var_entry.name, self.pos) 
-                    node = NameNode(self.pos, name = self.name) 
-                    node.entry = var_entry 
-                    node.analyse_rvalue_entry(env) 
-                    return node 
- 
-        return super(NameNode, self).coerce_to(dst_type, env) 
- 
+
+
+class NameNode(AtomicExprNode):
+    #  Reference to a local or global variable name.
+    #
+    #  name            string    Python name of the variable
+    #  entry           Entry     Symbol table entry
+    #  type_entry      Entry     For extension type names, the original type entry
+    #  cf_is_null      boolean   Is uninitialized before this node
+    #  cf_maybe_null   boolean   Maybe uninitialized before this node
+    #  allow_null      boolean   Don't raise UnboundLocalError
+    #  nogil           boolean   Whether it is used in a nogil context
+
+    is_name = True
+    is_cython_module = False
+    cython_attribute = None
+    lhs_of_first_assignment = False # TODO: remove me
+    is_used_as_rvalue = 0
+    entry = None
+    type_entry = None
+    cf_maybe_null = True
+    cf_is_null = False
+    allow_null = False
+    nogil = False
+    inferred_type = None
+
+    def as_cython_attribute(self):
+        return self.cython_attribute
+
+    def type_dependencies(self, env):
+        if self.entry is None:
+            self.entry = env.lookup(self.name)
+        if self.entry is not None and self.entry.type.is_unspecified:
+            return (self,)
+        else:
+            return ()
+
+    def infer_type(self, env):
+        if self.entry is None:
+            self.entry = env.lookup(self.name)
+        if self.entry is None or self.entry.type is unspecified_type:
+            if self.inferred_type is not None:
+                return self.inferred_type
+            return py_object_type
+        elif (self.entry.type.is_extension_type or self.entry.type.is_builtin_type) and \
+                self.name == self.entry.type.name:
+            # Unfortunately the type attribute of type objects
+            # is used for the pointer to the type they represent.
+            return type_type
+        elif self.entry.type.is_cfunction:
+            if self.entry.scope.is_builtin_scope:
+                # special case: optimised builtin functions must be treated as Python objects
+                return py_object_type
+            else:
+                # special case: referring to a C function must return its pointer
+                return PyrexTypes.CPtrType(self.entry.type)
+        else:
+            # If entry is inferred as pyobject it's safe to use local
+            # NameNode's inferred_type.
+            if self.entry.type.is_pyobject and self.inferred_type:
+                # Overflow may happen if integer
+                if not (self.inferred_type.is_int and self.entry.might_overflow):
+                    return self.inferred_type
+            return self.entry.type
+
+    def compile_time_value(self, denv):
+        try:
+            return denv.lookup(self.name)
+        except KeyError:
+            error(self.pos, "Compile-time name '%s' not defined" % self.name)
+
+    def get_constant_c_result_code(self):
+        if not self.entry or self.entry.type.is_pyobject:
+            return None
+        return self.entry.cname
+
+    def coerce_to(self, dst_type, env):
+        #  If coercing to a generic pyobject and this is a builtin
+        #  C function with a Python equivalent, manufacture a NameNode
+        #  referring to the Python builtin.
+        #print "NameNode.coerce_to:", self.name, dst_type ###
+        if dst_type is py_object_type:
+            entry = self.entry
+            if entry and entry.is_cfunction:
+                var_entry = entry.as_variable
+                if var_entry:
+                    if var_entry.is_builtin and var_entry.is_const:
+                        var_entry = env.declare_builtin(var_entry.name, self.pos)
+                    node = NameNode(self.pos, name = self.name)
+                    node.entry = var_entry
+                    node.analyse_rvalue_entry(env)
+                    return node
+
+        return super(NameNode, self).coerce_to(dst_type, env)
+
     def declare_from_annotation(self, env, as_target=False):
         """Implements PEP 526 annotation typing in a fairly relaxed way.
 
@@ -1944,65 +1944,65 @@ class NameNode(AtomicExprNode):
         self.entry = env.declare_var(name, atype, self.pos, is_cdef=not as_target)
         self.entry.annotation = annotation
 
-    def analyse_as_module(self, env): 
-        # Try to interpret this as a reference to a cimported module. 
-        # Returns the module scope, or None. 
-        entry = self.entry 
-        if not entry: 
-            entry = env.lookup(self.name) 
-        if entry and entry.as_module: 
-            return entry.as_module 
-        return None 
- 
-    def analyse_as_type(self, env): 
-        if self.cython_attribute: 
-            type = PyrexTypes.parse_basic_type(self.cython_attribute) 
-        else: 
-            type = PyrexTypes.parse_basic_type(self.name) 
-        if type: 
-            return type 
-        entry = self.entry 
-        if not entry: 
-            entry = env.lookup(self.name) 
-        if entry and entry.is_type: 
-            return entry.type 
-        else: 
-            return None 
- 
-    def analyse_as_extension_type(self, env): 
-        # Try to interpret this as a reference to an extension type. 
-        # Returns the extension type, or None. 
-        entry = self.entry 
-        if not entry: 
-            entry = env.lookup(self.name) 
-        if entry and entry.is_type: 
-            if entry.type.is_extension_type or entry.type.is_builtin_type: 
-                return entry.type 
-        return None 
- 
-    def analyse_target_declaration(self, env): 
-        if not self.entry: 
-            self.entry = env.lookup_here(self.name) 
+    def analyse_as_module(self, env):
+        # Try to interpret this as a reference to a cimported module.
+        # Returns the module scope, or None.
+        entry = self.entry
+        if not entry:
+            entry = env.lookup(self.name)
+        if entry and entry.as_module:
+            return entry.as_module
+        return None
+
+    def analyse_as_type(self, env):
+        if self.cython_attribute:
+            type = PyrexTypes.parse_basic_type(self.cython_attribute)
+        else:
+            type = PyrexTypes.parse_basic_type(self.name)
+        if type:
+            return type
+        entry = self.entry
+        if not entry:
+            entry = env.lookup(self.name)
+        if entry and entry.is_type:
+            return entry.type
+        else:
+            return None
+
+    def analyse_as_extension_type(self, env):
+        # Try to interpret this as a reference to an extension type.
+        # Returns the extension type, or None.
+        entry = self.entry
+        if not entry:
+            entry = env.lookup(self.name)
+        if entry and entry.is_type:
+            if entry.type.is_extension_type or entry.type.is_builtin_type:
+                return entry.type
+        return None
+
+    def analyse_target_declaration(self, env):
+        if not self.entry:
+            self.entry = env.lookup_here(self.name)
         if not self.entry and self.annotation is not None:
             # name : type = ...
             self.declare_from_annotation(env, as_target=True)
-        if not self.entry: 
-            if env.directives['warn.undeclared']: 
-                warning(self.pos, "implicit declaration of '%s'" % self.name, 1) 
-            if env.directives['infer_types'] != False: 
-                type = unspecified_type 
-            else: 
-                type = py_object_type 
-            self.entry = env.declare_var(self.name, type, self.pos) 
-        if self.entry.is_declared_generic: 
-            self.result_ctype = py_object_type 
+        if not self.entry:
+            if env.directives['warn.undeclared']:
+                warning(self.pos, "implicit declaration of '%s'" % self.name, 1)
+            if env.directives['infer_types'] != False:
+                type = unspecified_type
+            else:
+                type = py_object_type
+            self.entry = env.declare_var(self.name, type, self.pos)
+        if self.entry.is_declared_generic:
+            self.result_ctype = py_object_type
         if self.entry.as_module:
             # cimported modules namespace can shadow actual variables
             self.entry.is_variable = 1
- 
-    def analyse_types(self, env): 
-        self.initialized_check = env.directives['initializedcheck'] 
-        entry = self.entry 
+
+    def analyse_types(self, env):
+        self.initialized_check = env.directives['initializedcheck']
+        entry = self.entry
         if entry is None:
             entry = env.lookup(self.name)
             if not entry:
@@ -2017,12 +2017,12 @@ class NameNode(AtomicExprNode):
         if entry.type.is_buffer:
             from . import Buffer
             Buffer.used_buffer_aux_vars(entry)
-        self.analyse_rvalue_entry(env) 
-        return self 
- 
-    def analyse_target_types(self, env): 
-        self.analyse_entry(env, is_target=True) 
- 
+        self.analyse_rvalue_entry(env)
+        return self
+
+    def analyse_target_types(self, env):
+        self.analyse_entry(env, is_target=True)
+
         entry = self.entry
         if entry.is_cfunction and entry.as_variable:
             # FIXME: unify "is_overridable" flags below
@@ -2030,72 +2030,72 @@ class NameNode(AtomicExprNode):
                 # We need this for assigning to cpdef names and for the fused 'def' TreeFragment
                 entry = self.entry = entry.as_variable
                 self.type = entry.type
- 
-        if self.type.is_const: 
-            error(self.pos, "Assignment to const '%s'" % self.name) 
-        if self.type.is_reference: 
-            error(self.pos, "Assignment to reference '%s'" % self.name) 
-        if not self.is_lvalue(): 
+
+        if self.type.is_const:
+            error(self.pos, "Assignment to const '%s'" % self.name)
+        if self.type.is_reference:
+            error(self.pos, "Assignment to reference '%s'" % self.name)
+        if not self.is_lvalue():
             error(self.pos, "Assignment to non-lvalue '%s'" % self.name)
-            self.type = PyrexTypes.error_type 
+            self.type = PyrexTypes.error_type
         entry.used = 1
         if entry.type.is_buffer:
-            from . import Buffer 
+            from . import Buffer
             Buffer.used_buffer_aux_vars(entry)
-        return self 
- 
-    def analyse_rvalue_entry(self, env): 
-        #print "NameNode.analyse_rvalue_entry:", self.name ### 
-        #print "Entry:", self.entry.__dict__ ### 
-        self.analyse_entry(env) 
-        entry = self.entry 
- 
-        if entry.is_declared_generic: 
-            self.result_ctype = py_object_type 
- 
-        if entry.is_pyglobal or entry.is_builtin: 
-            if entry.is_builtin and entry.is_const: 
-                self.is_temp = 0 
-            else: 
-                self.is_temp = 1 
- 
-            self.is_used_as_rvalue = 1 
-        elif entry.type.is_memoryviewslice: 
-            self.is_temp = False 
-            self.is_used_as_rvalue = True 
-            self.use_managed_ref = True 
-        return self 
- 
-    def nogil_check(self, env): 
-        self.nogil = True 
-        if self.is_used_as_rvalue: 
-            entry = self.entry 
-            if entry.is_builtin: 
-                if not entry.is_const: # cached builtins are ok 
-                    self.gil_error() 
-            elif entry.is_pyglobal: 
-                self.gil_error() 
- 
-    gil_message = "Accessing Python global or builtin" 
- 
-    def analyse_entry(self, env, is_target=False): 
-        #print "NameNode.analyse_entry:", self.name ### 
-        self.check_identifier_kind() 
-        entry = self.entry 
-        type = entry.type 
-        if (not is_target and type.is_pyobject and self.inferred_type and 
-                self.inferred_type.is_builtin_type): 
-            # assume that type inference is smarter than the static entry 
-            type = self.inferred_type 
-        self.type = type 
- 
-    def check_identifier_kind(self): 
-        # Check that this is an appropriate kind of name for use in an 
-        # expression.  Also finds the variable entry associated with 
-        # an extension type. 
-        entry = self.entry 
-        if entry.is_type and entry.type.is_extension_type: 
-            self.type_entry = entry 
+        return self
+
+    def analyse_rvalue_entry(self, env):
+        #print "NameNode.analyse_rvalue_entry:", self.name ###
+        #print "Entry:", self.entry.__dict__ ###
+        self.analyse_entry(env)
+        entry = self.entry
+
+        if entry.is_declared_generic:
+            self.result_ctype = py_object_type
+
+        if entry.is_pyglobal or entry.is_builtin:
+            if entry.is_builtin and entry.is_const:
+                self.is_temp = 0
+            else:
+                self.is_temp = 1
+
+            self.is_used_as_rvalue = 1
+        elif entry.type.is_memoryviewslice:
+            self.is_temp = False
+            self.is_used_as_rvalue = True
+            self.use_managed_ref = True
+        return self
+
+    def nogil_check(self, env):
+        self.nogil = True
+        if self.is_used_as_rvalue:
+            entry = self.entry
+            if entry.is_builtin:
+                if not entry.is_const: # cached builtins are ok
+                    self.gil_error()
+            elif entry.is_pyglobal:
+                self.gil_error()
+
+    gil_message = "Accessing Python global or builtin"
+
+    def analyse_entry(self, env, is_target=False):
+        #print "NameNode.analyse_entry:", self.name ###
+        self.check_identifier_kind()
+        entry = self.entry
+        type = entry.type
+        if (not is_target and type.is_pyobject and self.inferred_type and
+                self.inferred_type.is_builtin_type):
+            # assume that type inference is smarter than the static entry
+            type = self.inferred_type
+        self.type = type
+
+    def check_identifier_kind(self):
+        # Check that this is an appropriate kind of name for use in an
+        # expression.  Also finds the variable entry associated with
+        # an extension type.
+        entry = self.entry
+        if entry.is_type and entry.type.is_extension_type:
+            self.type_entry = entry
         if entry.is_type and entry.type.is_enum:
             py_entry = Symtab.Entry(self.name, None, py_object_type)
             py_entry.is_pyglobal = True
@@ -2108,265 +2108,265 @@ class NameNode(AtomicExprNode):
                 self.entry = self.entry.as_variable
             elif not self.is_cython_module:
                 error(self.pos, "'%s' is not a constant, variable or function identifier" % self.name)
- 
+
     def is_cimported_module_without_shadow(self, env):
         if self.is_cython_module or self.cython_attribute:
             return False
         entry = self.entry or env.lookup(self.name)
         return entry.as_module and not entry.is_variable
 
-    def is_simple(self): 
-        #  If it's not a C variable, it'll be in a temp. 
-        return 1 
- 
-    def may_be_none(self): 
-        if self.cf_state and self.type and (self.type.is_pyobject or 
-                                            self.type.is_memoryviewslice): 
-            # gard against infinite recursion on self-dependencies 
-            if getattr(self, '_none_checking', False): 
-                # self-dependency - either this node receives a None 
-                # value from *another* node, or it can not reference 
-                # None at this point => safe to assume "not None" 
-                return False 
-            self._none_checking = True 
-            # evaluate control flow state to see if there were any 
-            # potential None values assigned to the node so far 
-            may_be_none = False 
-            for assignment in self.cf_state: 
-                if assignment.rhs.may_be_none(): 
-                    may_be_none = True 
-                    break 
-            del self._none_checking 
-            return may_be_none 
-        return super(NameNode, self).may_be_none() 
- 
-    def nonlocally_immutable(self): 
-        if ExprNode.nonlocally_immutable(self): 
-            return True 
-        entry = self.entry 
-        if not entry or entry.in_closure: 
-            return False 
-        return entry.is_local or entry.is_arg or entry.is_builtin or entry.is_readonly 
- 
-    def calculate_target_results(self, env): 
-        pass 
- 
-    def check_const(self): 
-        entry = self.entry 
+    def is_simple(self):
+        #  If it's not a C variable, it'll be in a temp.
+        return 1
+
+    def may_be_none(self):
+        if self.cf_state and self.type and (self.type.is_pyobject or
+                                            self.type.is_memoryviewslice):
+            # gard against infinite recursion on self-dependencies
+            if getattr(self, '_none_checking', False):
+                # self-dependency - either this node receives a None
+                # value from *another* node, or it can not reference
+                # None at this point => safe to assume "not None"
+                return False
+            self._none_checking = True
+            # evaluate control flow state to see if there were any
+            # potential None values assigned to the node so far
+            may_be_none = False
+            for assignment in self.cf_state:
+                if assignment.rhs.may_be_none():
+                    may_be_none = True
+                    break
+            del self._none_checking
+            return may_be_none
+        return super(NameNode, self).may_be_none()
+
+    def nonlocally_immutable(self):
+        if ExprNode.nonlocally_immutable(self):
+            return True
+        entry = self.entry
+        if not entry or entry.in_closure:
+            return False
+        return entry.is_local or entry.is_arg or entry.is_builtin or entry.is_readonly
+
+    def calculate_target_results(self, env):
+        pass
+
+    def check_const(self):
+        entry = self.entry
         if entry is not None and not (
                 entry.is_const or
                 entry.is_cfunction or
                 entry.is_builtin or
                 entry.type.is_const):
-            self.not_const() 
-            return False 
-        return True 
- 
-    def check_const_addr(self): 
-        entry = self.entry 
-        if not (entry.is_cglobal or entry.is_cfunction or entry.is_builtin): 
-            self.addr_not_const() 
-            return False 
-        return True 
- 
-    def is_lvalue(self): 
+            self.not_const()
+            return False
+        return True
+
+    def check_const_addr(self):
+        entry = self.entry
+        if not (entry.is_cglobal or entry.is_cfunction or entry.is_builtin):
+            self.addr_not_const()
+            return False
+        return True
+
+    def is_lvalue(self):
         return (
             self.entry.is_variable and
-            not self.entry.is_readonly 
+            not self.entry.is_readonly
         ) or (
             self.entry.is_cfunction and
             self.entry.is_overridable
         )
- 
-    def is_addressable(self): 
-        return self.entry.is_variable and not self.type.is_memoryviewslice 
- 
-    def is_ephemeral(self): 
-        #  Name nodes are never ephemeral, even if the 
-        #  result is in a temporary. 
-        return 0 
- 
-    def calculate_result_code(self): 
-        entry = self.entry 
-        if not entry: 
-            return "<error>" # There was an error earlier 
-        return entry.cname 
- 
-    def generate_result_code(self, code): 
-        assert hasattr(self, 'entry') 
-        entry = self.entry 
-        if entry is None: 
-            return # There was an error earlier 
+
+    def is_addressable(self):
+        return self.entry.is_variable and not self.type.is_memoryviewslice
+
+    def is_ephemeral(self):
+        #  Name nodes are never ephemeral, even if the
+        #  result is in a temporary.
+        return 0
+
+    def calculate_result_code(self):
+        entry = self.entry
+        if not entry:
+            return "<error>" # There was an error earlier
+        return entry.cname
+
+    def generate_result_code(self, code):
+        assert hasattr(self, 'entry')
+        entry = self.entry
+        if entry is None:
+            return # There was an error earlier
         if entry.utility_code:
             code.globalstate.use_utility_code(entry.utility_code)
-        if entry.is_builtin and entry.is_const: 
-            return # Lookup already cached 
-        elif entry.is_pyclass_attr: 
-            assert entry.type.is_pyobject, "Python global or builtin not a Python object" 
-            interned_cname = code.intern_identifier(self.entry.name) 
-            if entry.is_builtin: 
-                namespace = Naming.builtins_cname 
-            else: # entry.is_pyglobal 
-                namespace = entry.scope.namespace_cname 
-            if not self.cf_is_null: 
-                code.putln( 
-                    '%s = PyObject_GetItem(%s, %s);' % ( 
-                        self.result(), 
-                        namespace, 
-                        interned_cname)) 
-                code.putln('if (unlikely(!%s)) {' % self.result()) 
-                code.putln('PyErr_Clear();') 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c")) 
-            code.putln( 
+        if entry.is_builtin and entry.is_const:
+            return # Lookup already cached
+        elif entry.is_pyclass_attr:
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            if entry.is_builtin:
+                namespace = Naming.builtins_cname
+            else: # entry.is_pyglobal
+                namespace = entry.scope.namespace_cname
+            if not self.cf_is_null:
+                code.putln(
+                    '%s = PyObject_GetItem(%s, %s);' % (
+                        self.result(),
+                        namespace,
+                        interned_cname))
+                code.putln('if (unlikely(!%s)) {' % self.result())
+                code.putln('PyErr_Clear();')
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c"))
+            code.putln(
                 '__Pyx_GetModuleGlobalName(%s, %s);' % (
-                    self.result(), 
-                    interned_cname)) 
-            if not self.cf_is_null: 
-                code.putln("}") 
-            code.putln(code.error_goto_if_null(self.result(), self.pos)) 
-            code.put_gotref(self.py_result()) 
- 
-        elif entry.is_builtin and not entry.scope.is_module_scope: 
-            # known builtin 
-            assert entry.type.is_pyobject, "Python global or builtin not a Python object" 
-            interned_cname = code.intern_identifier(self.entry.name) 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("GetBuiltinName", "ObjectHandling.c")) 
-            code.putln( 
-                '%s = __Pyx_GetBuiltinName(%s); %s' % ( 
-                self.result(), 
-                interned_cname, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
- 
-        elif entry.is_pyglobal or (entry.is_builtin and entry.scope.is_module_scope): 
-            # name in class body, global name or unknown builtin 
-            assert entry.type.is_pyobject, "Python global or builtin not a Python object" 
-            interned_cname = code.intern_identifier(self.entry.name) 
-            if entry.scope.is_module_scope: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c")) 
-                code.putln( 
+                    self.result(),
+                    interned_cname))
+            if not self.cf_is_null:
+                code.putln("}")
+            code.putln(code.error_goto_if_null(self.result(), self.pos))
+            code.put_gotref(self.py_result())
+
+        elif entry.is_builtin and not entry.scope.is_module_scope:
+            # known builtin
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("GetBuiltinName", "ObjectHandling.c"))
+            code.putln(
+                '%s = __Pyx_GetBuiltinName(%s); %s' % (
+                self.result(),
+                interned_cname,
+                code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+
+        elif entry.is_pyglobal or (entry.is_builtin and entry.scope.is_module_scope):
+            # name in class body, global name or unknown builtin
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            if entry.scope.is_module_scope:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c"))
+                code.putln(
                     '__Pyx_GetModuleGlobalName(%s, %s); %s' % (
-                        self.result(), 
-                        interned_cname, 
-                        code.error_goto_if_null(self.result(), self.pos))) 
-            else: 
-                # FIXME: is_pyglobal is also used for class namespace 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("GetNameInClass", "ObjectHandling.c")) 
-                code.putln( 
+                        self.result(),
+                        interned_cname,
+                        code.error_goto_if_null(self.result(), self.pos)))
+            else:
+                # FIXME: is_pyglobal is also used for class namespace
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("GetNameInClass", "ObjectHandling.c"))
+                code.putln(
                     '__Pyx_GetNameInClass(%s, %s, %s); %s' % (
-                        self.result(), 
-                        entry.scope.namespace_cname, 
-                        interned_cname, 
-                        code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
- 
-        elif entry.is_local or entry.in_closure or entry.from_closure or entry.type.is_memoryviewslice: 
-            # Raise UnboundLocalError for objects and memoryviewslices 
-            raise_unbound = ( 
-                (self.cf_maybe_null or self.cf_is_null) and not self.allow_null) 
-            null_code = entry.type.check_for_null_code(entry.cname) 
- 
-            memslice_check = entry.type.is_memoryviewslice and self.initialized_check 
- 
-            if null_code and raise_unbound and (entry.type.is_pyobject or memslice_check): 
-                code.put_error_if_unbound(self.pos, entry, self.in_nogil_context) 
- 
+                        self.result(),
+                        entry.scope.namespace_cname,
+                        interned_cname,
+                        code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+
+        elif entry.is_local or entry.in_closure or entry.from_closure or entry.type.is_memoryviewslice:
+            # Raise UnboundLocalError for objects and memoryviewslices
+            raise_unbound = (
+                (self.cf_maybe_null or self.cf_is_null) and not self.allow_null)
+            null_code = entry.type.check_for_null_code(entry.cname)
+
+            memslice_check = entry.type.is_memoryviewslice and self.initialized_check
+
+            if null_code and raise_unbound and (entry.type.is_pyobject or memslice_check):
+                code.put_error_if_unbound(self.pos, entry, self.in_nogil_context)
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
         exception_check=None, exception_value=None):
-        #print "NameNode.generate_assignment_code:", self.name ### 
-        entry = self.entry 
-        if entry is None: 
-            return # There was an error earlier 
- 
-        if (self.entry.type.is_ptr and isinstance(rhs, ListNode) 
+        #print "NameNode.generate_assignment_code:", self.name ###
+        entry = self.entry
+        if entry is None:
+            return # There was an error earlier
+
+        if (self.entry.type.is_ptr and isinstance(rhs, ListNode)
                 and not self.lhs_of_first_assignment and not rhs.in_module_scope):
-            error(self.pos, "Literal list must be assigned to pointer at time of declaration") 
- 
-        # is_pyglobal seems to be True for module level-globals only. 
-        # We use this to access class->tp_dict if necessary. 
-        if entry.is_pyglobal: 
-            assert entry.type.is_pyobject, "Python global or builtin not a Python object" 
-            interned_cname = code.intern_identifier(self.entry.name) 
-            namespace = self.entry.scope.namespace_cname 
-            if entry.is_member: 
-                # if the entry is a member we have to cheat: SetAttr does not work 
-                # on types, so we create a descriptor which is then added to tp_dict 
-                setter = 'PyDict_SetItem' 
-                namespace = '%s->tp_dict' % namespace 
-            elif entry.scope.is_module_scope: 
-                setter = 'PyDict_SetItem' 
-                namespace = Naming.moddict_cname 
-            elif entry.is_pyclass_attr: 
+            error(self.pos, "Literal list must be assigned to pointer at time of declaration")
+
+        # is_pyglobal seems to be True for module level-globals only.
+        # We use this to access class->tp_dict if necessary.
+        if entry.is_pyglobal:
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            namespace = self.entry.scope.namespace_cname
+            if entry.is_member:
+                # if the entry is a member we have to cheat: SetAttr does not work
+                # on types, so we create a descriptor which is then added to tp_dict
+                setter = 'PyDict_SetItem'
+                namespace = '%s->tp_dict' % namespace
+            elif entry.scope.is_module_scope:
+                setter = 'PyDict_SetItem'
+                namespace = Naming.moddict_cname
+            elif entry.is_pyclass_attr:
                 code.globalstate.use_utility_code(UtilityCode.load_cached("SetNameInClass", "ObjectHandling.c"))
                 setter = '__Pyx_SetNameInClass'
-            else: 
-                assert False, repr(entry) 
-            code.put_error_if_neg( 
-                self.pos, 
-                '%s(%s, %s, %s)' % ( 
-                    setter, 
-                    namespace, 
-                    interned_cname, 
-                    rhs.py_result())) 
-            if debug_disposal_code: 
-                print("NameNode.generate_assignment_code:") 
-                print("...generating disposal code for %s" % rhs) 
-            rhs.generate_disposal_code(code) 
-            rhs.free_temps(code) 
-            if entry.is_member: 
-                # in Py2.6+, we need to invalidate the method cache 
-                code.putln("PyType_Modified(%s);" % 
-                           entry.scope.parent_type.typeptr_cname) 
-        else: 
-            if self.type.is_memoryviewslice: 
-                self.generate_acquire_memoryviewslice(rhs, code) 
- 
-            elif self.type.is_buffer: 
-                # Generate code for doing the buffer release/acquisition. 
-                # This might raise an exception in which case the assignment (done 
-                # below) will not happen. 
-                # 
-                # The reason this is not in a typetest-like node is because the 
-                # variables that the acquired buffer info is stored to is allocated 
-                # per entry and coupled with it. 
-                self.generate_acquire_buffer(rhs, code) 
-            assigned = False 
-            if self.type.is_pyobject: 
-                #print "NameNode.generate_assignment_code: to", self.name ### 
-                #print "...from", rhs ### 
-                #print "...LHS type", self.type, "ctype", self.ctype() ### 
-                #print "...RHS type", rhs.type, "ctype", rhs.ctype() ### 
-                if self.use_managed_ref: 
-                    rhs.make_owned_reference(code) 
-                    is_external_ref = entry.is_cglobal or self.entry.in_closure or self.entry.from_closure 
-                    if is_external_ref: 
-                        if not self.cf_is_null: 
-                            if self.cf_maybe_null: 
-                                code.put_xgotref(self.py_result()) 
-                            else: 
-                                code.put_gotref(self.py_result()) 
-                    assigned = True 
-                    if entry.is_cglobal: 
-                        code.put_decref_set( 
-                            self.result(), rhs.result_as(self.ctype())) 
-                    else: 
-                        if not self.cf_is_null: 
-                            if self.cf_maybe_null: 
-                                code.put_xdecref_set( 
-                                    self.result(), rhs.result_as(self.ctype())) 
-                            else: 
-                                code.put_decref_set( 
-                                    self.result(), rhs.result_as(self.ctype())) 
-                        else: 
-                            assigned = False 
-                    if is_external_ref: 
-                        code.put_giveref(rhs.py_result()) 
-            if not self.type.is_memoryviewslice: 
-                if not assigned: 
+            else:
+                assert False, repr(entry)
+            code.put_error_if_neg(
+                self.pos,
+                '%s(%s, %s, %s)' % (
+                    setter,
+                    namespace,
+                    interned_cname,
+                    rhs.py_result()))
+            if debug_disposal_code:
+                print("NameNode.generate_assignment_code:")
+                print("...generating disposal code for %s" % rhs)
+            rhs.generate_disposal_code(code)
+            rhs.free_temps(code)
+            if entry.is_member:
+                # in Py2.6+, we need to invalidate the method cache
+                code.putln("PyType_Modified(%s);" %
+                           entry.scope.parent_type.typeptr_cname)
+        else:
+            if self.type.is_memoryviewslice:
+                self.generate_acquire_memoryviewslice(rhs, code)
+
+            elif self.type.is_buffer:
+                # Generate code for doing the buffer release/acquisition.
+                # This might raise an exception in which case the assignment (done
+                # below) will not happen.
+                #
+                # The reason this is not in a typetest-like node is because the
+                # variables that the acquired buffer info is stored to is allocated
+                # per entry and coupled with it.
+                self.generate_acquire_buffer(rhs, code)
+            assigned = False
+            if self.type.is_pyobject:
+                #print "NameNode.generate_assignment_code: to", self.name ###
+                #print "...from", rhs ###
+                #print "...LHS type", self.type, "ctype", self.ctype() ###
+                #print "...RHS type", rhs.type, "ctype", rhs.ctype() ###
+                if self.use_managed_ref:
+                    rhs.make_owned_reference(code)
+                    is_external_ref = entry.is_cglobal or self.entry.in_closure or self.entry.from_closure
+                    if is_external_ref:
+                        if not self.cf_is_null:
+                            if self.cf_maybe_null:
+                                code.put_xgotref(self.py_result())
+                            else:
+                                code.put_gotref(self.py_result())
+                    assigned = True
+                    if entry.is_cglobal:
+                        code.put_decref_set(
+                            self.result(), rhs.result_as(self.ctype()))
+                    else:
+                        if not self.cf_is_null:
+                            if self.cf_maybe_null:
+                                code.put_xdecref_set(
+                                    self.result(), rhs.result_as(self.ctype()))
+                            else:
+                                code.put_decref_set(
+                                    self.result(), rhs.result_as(self.ctype()))
+                        else:
+                            assigned = False
+                    if is_external_ref:
+                        code.put_giveref(rhs.py_result())
+            if not self.type.is_memoryviewslice:
+                if not assigned:
                     if overloaded_assignment:
                         result = rhs.result()
                         if exception_check == '+':
@@ -2384,188 +2384,188 @@ class NameNode(AtomicExprNode):
                             code.putln('new (&%s) decltype(%s){%s};' % (self.result(), self.result(), result))
                         elif result != self.result():
                             code.putln('%s = %s;' % (self.result(), result))
-                if debug_disposal_code: 
-                    print("NameNode.generate_assignment_code:") 
-                    print("...generating post-assignment code for %s" % rhs) 
-                rhs.generate_post_assignment_code(code) 
-            elif rhs.result_in_temp(): 
-                rhs.generate_post_assignment_code(code) 
- 
-            rhs.free_temps(code) 
- 
-    def generate_acquire_memoryviewslice(self, rhs, code): 
-        """ 
-        Slices, coercions from objects, return values etc are new references. 
-        We have a borrowed reference in case of dst = src 
-        """ 
-        from . import MemoryView 
- 
-        MemoryView.put_acquire_memoryviewslice( 
-            lhs_cname=self.result(), 
-            lhs_type=self.type, 
-            lhs_pos=self.pos, 
-            rhs=rhs, 
-            code=code, 
-            have_gil=not self.in_nogil_context, 
-            first_assignment=self.cf_is_null) 
- 
-    def generate_acquire_buffer(self, rhs, code): 
-        # rhstmp is only used in case the rhs is a complicated expression leading to 
-        # the object, to avoid repeating the same C expression for every reference 
-        # to the rhs. It does NOT hold a reference. 
-        pretty_rhs = isinstance(rhs, NameNode) or rhs.is_temp 
-        if pretty_rhs: 
-            rhstmp = rhs.result_as(self.ctype()) 
-        else: 
-            rhstmp = code.funcstate.allocate_temp(self.entry.type, manage_ref=False) 
-            code.putln('%s = %s;' % (rhstmp, rhs.result_as(self.ctype()))) 
- 
-        from . import Buffer 
-        Buffer.put_assign_to_buffer(self.result(), rhstmp, self.entry, 
-                                    is_initialized=not self.lhs_of_first_assignment, 
-                                    pos=self.pos, code=code) 
- 
-        if not pretty_rhs: 
-            code.putln("%s = 0;" % rhstmp) 
-            code.funcstate.release_temp(rhstmp) 
- 
-    def generate_deletion_code(self, code, ignore_nonexisting=False): 
-        if self.entry is None: 
-            return # There was an error earlier 
-        elif self.entry.is_pyclass_attr: 
-            namespace = self.entry.scope.namespace_cname 
-            interned_cname = code.intern_identifier(self.entry.name) 
-            if ignore_nonexisting: 
-                key_error_code = 'PyErr_Clear(); else' 
-            else: 
-                # minor hack: fake a NameError on KeyError 
-                key_error_code = ( 
-                    '{ PyErr_Clear(); PyErr_Format(PyExc_NameError, "name \'%%s\' is not defined", "%s"); }' % 
-                    self.entry.name) 
-            code.putln( 
-                'if (unlikely(PyObject_DelItem(%s, %s) < 0)) {' 
-                ' if (likely(PyErr_ExceptionMatches(PyExc_KeyError))) %s' 
-                ' %s ' 
-                '}' % (namespace, interned_cname, 
-                       key_error_code, 
-                       code.error_goto(self.pos))) 
-        elif self.entry.is_pyglobal: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c")) 
-            interned_cname = code.intern_identifier(self.entry.name) 
-            del_code = '__Pyx_PyObject_DelAttrStr(%s, %s)' % ( 
-                Naming.module_cname, interned_cname) 
-            if ignore_nonexisting: 
+                if debug_disposal_code:
+                    print("NameNode.generate_assignment_code:")
+                    print("...generating post-assignment code for %s" % rhs)
+                rhs.generate_post_assignment_code(code)
+            elif rhs.result_in_temp():
+                rhs.generate_post_assignment_code(code)
+
+            rhs.free_temps(code)
+
+    def generate_acquire_memoryviewslice(self, rhs, code):
+        """
+        Slices, coercions from objects, return values etc are new references.
+        We have a borrowed reference in case of dst = src
+        """
+        from . import MemoryView
+
+        MemoryView.put_acquire_memoryviewslice(
+            lhs_cname=self.result(),
+            lhs_type=self.type,
+            lhs_pos=self.pos,
+            rhs=rhs,
+            code=code,
+            have_gil=not self.in_nogil_context,
+            first_assignment=self.cf_is_null)
+
+    def generate_acquire_buffer(self, rhs, code):
+        # rhstmp is only used in case the rhs is a complicated expression leading to
+        # the object, to avoid repeating the same C expression for every reference
+        # to the rhs. It does NOT hold a reference.
+        pretty_rhs = isinstance(rhs, NameNode) or rhs.is_temp
+        if pretty_rhs:
+            rhstmp = rhs.result_as(self.ctype())
+        else:
+            rhstmp = code.funcstate.allocate_temp(self.entry.type, manage_ref=False)
+            code.putln('%s = %s;' % (rhstmp, rhs.result_as(self.ctype())))
+
+        from . import Buffer
+        Buffer.put_assign_to_buffer(self.result(), rhstmp, self.entry,
+                                    is_initialized=not self.lhs_of_first_assignment,
+                                    pos=self.pos, code=code)
+
+        if not pretty_rhs:
+            code.putln("%s = 0;" % rhstmp)
+            code.funcstate.release_temp(rhstmp)
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        if self.entry is None:
+            return # There was an error earlier
+        elif self.entry.is_pyclass_attr:
+            namespace = self.entry.scope.namespace_cname
+            interned_cname = code.intern_identifier(self.entry.name)
+            if ignore_nonexisting:
+                key_error_code = 'PyErr_Clear(); else'
+            else:
+                # minor hack: fake a NameError on KeyError
+                key_error_code = (
+                    '{ PyErr_Clear(); PyErr_Format(PyExc_NameError, "name \'%%s\' is not defined", "%s"); }' %
+                    self.entry.name)
+            code.putln(
+                'if (unlikely(PyObject_DelItem(%s, %s) < 0)) {'
+                ' if (likely(PyErr_ExceptionMatches(PyExc_KeyError))) %s'
+                ' %s '
+                '}' % (namespace, interned_cname,
+                       key_error_code,
+                       code.error_goto(self.pos)))
+        elif self.entry.is_pyglobal:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c"))
+            interned_cname = code.intern_identifier(self.entry.name)
+            del_code = '__Pyx_PyObject_DelAttrStr(%s, %s)' % (
+                Naming.module_cname, interned_cname)
+            if ignore_nonexisting:
                 code.putln(
                     'if (unlikely(%s < 0)) {'
                     ' if (likely(PyErr_ExceptionMatches(PyExc_AttributeError))) PyErr_Clear(); else %s '
                     '}' % (del_code, code.error_goto(self.pos)))
-            else: 
-                code.put_error_if_neg(self.pos, del_code) 
-        elif self.entry.type.is_pyobject or self.entry.type.is_memoryviewslice: 
-            if not self.cf_is_null: 
-                if self.cf_maybe_null and not ignore_nonexisting: 
-                    code.put_error_if_unbound(self.pos, self.entry) 
- 
-                if self.entry.type.is_pyobject: 
-                    if self.entry.in_closure: 
-                        # generator 
-                        if ignore_nonexisting and self.cf_maybe_null: 
-                            code.put_xgotref(self.result()) 
-                        else: 
-                            code.put_gotref(self.result()) 
-                    if ignore_nonexisting and self.cf_maybe_null: 
-                        code.put_xdecref(self.result(), self.ctype()) 
-                    else: 
-                        code.put_decref(self.result(), self.ctype()) 
-                    code.putln('%s = NULL;' % self.result()) 
-                else: 
-                    code.put_xdecref_memoryviewslice(self.entry.cname, 
-                                                     have_gil=not self.nogil) 
-        else: 
-            error(self.pos, "Deletion of C names not supported") 
- 
-    def annotate(self, code): 
-        if hasattr(self, 'is_called') and self.is_called: 
-            pos = (self.pos[0], self.pos[1], self.pos[2] - len(self.name) - 1) 
-            if self.type.is_pyobject: 
-                style, text = 'py_call', 'python function (%s)' 
-            else: 
-                style, text = 'c_call', 'c function (%s)' 
-            code.annotate(pos, AnnotationItem(style, text % self.type, size=len(self.name))) 
- 
-class BackquoteNode(ExprNode): 
-    #  `expr` 
-    # 
-    #  arg    ExprNode 
- 
-    type = py_object_type 
- 
-    subexprs = ['arg'] 
- 
-    def analyse_types(self, env): 
-        self.arg = self.arg.analyse_types(env) 
-        self.arg = self.arg.coerce_to_pyobject(env) 
-        self.is_temp = 1 
-        return self 
- 
-    gil_message = "Backquote expression" 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = repr(self.arg.constant_result) 
- 
-    def generate_result_code(self, code): 
-        code.putln( 
-            "%s = PyObject_Repr(%s); %s" % ( 
-                self.result(), 
-                self.arg.py_result(), 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class ImportNode(ExprNode): 
-    #  Used as part of import statement implementation. 
-    #  Implements result = 
-    #    __import__(module_name, globals(), None, name_list, level) 
-    # 
-    #  module_name   StringNode            dotted name of module. Empty module 
-    #                       name means importing the parent package according 
-    #                       to level 
-    #  name_list     ListNode or None      list of names to be imported 
-    #  level         int                   relative import level: 
-    #                       -1: attempt both relative import and absolute import; 
-    #                        0: absolute import; 
-    #                       >0: the number of parent directories to search 
-    #                           relative to the current module. 
-    #                     None: decide the level according to language level and 
-    #                           directives 
- 
-    type = py_object_type 
- 
-    subexprs = ['module_name', 'name_list'] 
- 
-    def analyse_types(self, env): 
-        if self.level is None: 
-            if (env.directives['py2_import'] or 
-                Future.absolute_import not in env.global_scope().context.future_directives): 
-                self.level = -1 
-            else: 
-                self.level = 0 
-        module_name = self.module_name.analyse_types(env) 
-        self.module_name = module_name.coerce_to_pyobject(env) 
-        if self.name_list: 
-            name_list = self.name_list.analyse_types(env) 
-            self.name_list = name_list.coerce_to_pyobject(env) 
-        self.is_temp = 1 
-        return self 
- 
-    gil_message = "Python import" 
- 
-    def generate_result_code(self, code): 
-        if self.name_list: 
-            name_list_code = self.name_list.py_result() 
-        else: 
-            name_list_code = "0" 
+            else:
+                code.put_error_if_neg(self.pos, del_code)
+        elif self.entry.type.is_pyobject or self.entry.type.is_memoryviewslice:
+            if not self.cf_is_null:
+                if self.cf_maybe_null and not ignore_nonexisting:
+                    code.put_error_if_unbound(self.pos, self.entry)
+
+                if self.entry.type.is_pyobject:
+                    if self.entry.in_closure:
+                        # generator
+                        if ignore_nonexisting and self.cf_maybe_null:
+                            code.put_xgotref(self.result())
+                        else:
+                            code.put_gotref(self.result())
+                    if ignore_nonexisting and self.cf_maybe_null:
+                        code.put_xdecref(self.result(), self.ctype())
+                    else:
+                        code.put_decref(self.result(), self.ctype())
+                    code.putln('%s = NULL;' % self.result())
+                else:
+                    code.put_xdecref_memoryviewslice(self.entry.cname,
+                                                     have_gil=not self.nogil)
+        else:
+            error(self.pos, "Deletion of C names not supported")
+
+    def annotate(self, code):
+        if hasattr(self, 'is_called') and self.is_called:
+            pos = (self.pos[0], self.pos[1], self.pos[2] - len(self.name) - 1)
+            if self.type.is_pyobject:
+                style, text = 'py_call', 'python function (%s)'
+            else:
+                style, text = 'c_call', 'c function (%s)'
+            code.annotate(pos, AnnotationItem(style, text % self.type, size=len(self.name)))
+
+class BackquoteNode(ExprNode):
+    #  `expr`
+    #
+    #  arg    ExprNode
+
+    type = py_object_type
+
+    subexprs = ['arg']
+
+    def analyse_types(self, env):
+        self.arg = self.arg.analyse_types(env)
+        self.arg = self.arg.coerce_to_pyobject(env)
+        self.is_temp = 1
+        return self
+
+    gil_message = "Backquote expression"
+
+    def calculate_constant_result(self):
+        self.constant_result = repr(self.arg.constant_result)
+
+    def generate_result_code(self, code):
+        code.putln(
+            "%s = PyObject_Repr(%s); %s" % (
+                self.result(),
+                self.arg.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class ImportNode(ExprNode):
+    #  Used as part of import statement implementation.
+    #  Implements result =
+    #    __import__(module_name, globals(), None, name_list, level)
+    #
+    #  module_name   StringNode            dotted name of module. Empty module
+    #                       name means importing the parent package according
+    #                       to level
+    #  name_list     ListNode or None      list of names to be imported
+    #  level         int                   relative import level:
+    #                       -1: attempt both relative import and absolute import;
+    #                        0: absolute import;
+    #                       >0: the number of parent directories to search
+    #                           relative to the current module.
+    #                     None: decide the level according to language level and
+    #                           directives
+
+    type = py_object_type
+
+    subexprs = ['module_name', 'name_list']
+
+    def analyse_types(self, env):
+        if self.level is None:
+            if (env.directives['py2_import'] or
+                Future.absolute_import not in env.global_scope().context.future_directives):
+                self.level = -1
+            else:
+                self.level = 0
+        module_name = self.module_name.analyse_types(env)
+        self.module_name = module_name.coerce_to_pyobject(env)
+        if self.name_list:
+            name_list = self.name_list.analyse_types(env)
+            self.name_list = name_list.coerce_to_pyobject(env)
+        self.is_temp = 1
+        return self
+
+    gil_message = "Python import"
+
+    def generate_result_code(self, code):
+        if self.name_list:
+            name_list_code = self.name_list.py_result()
+        else:
+            name_list_code = "0"
 
         code.globalstate.use_utility_code(UtilityCode.load_cached("Import", "ImportExport.c"))
         import_code = "__Pyx_Import(%s, %s, %d)" % (
@@ -2584,323 +2584,323 @@ class ImportNode(ExprNode):
             self.result(),
             import_code,
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result()) 
- 
- 
-class IteratorNode(ExprNode): 
-    #  Used as part of for statement implementation. 
-    # 
-    #  Implements result = iter(sequence) 
-    # 
-    #  sequence   ExprNode 
- 
-    type = py_object_type 
-    iter_func_ptr = None 
-    counter_cname = None 
-    cpp_iterator_cname = None 
-    reversed = False      # currently only used for list/tuple types (see Optimize.py) 
+        code.put_gotref(self.py_result())
+
+
+class IteratorNode(ExprNode):
+    #  Used as part of for statement implementation.
+    #
+    #  Implements result = iter(sequence)
+    #
+    #  sequence   ExprNode
+
+    type = py_object_type
+    iter_func_ptr = None
+    counter_cname = None
+    cpp_iterator_cname = None
+    reversed = False      # currently only used for list/tuple types (see Optimize.py)
     is_async = False
- 
-    subexprs = ['sequence'] 
- 
-    def analyse_types(self, env): 
-        self.sequence = self.sequence.analyse_types(env) 
-        if (self.sequence.type.is_array or self.sequence.type.is_ptr) and \ 
-                not self.sequence.type.is_string: 
-            # C array iteration will be transformed later on 
-            self.type = self.sequence.type 
-        elif self.sequence.type.is_cpp_class: 
-            self.analyse_cpp_types(env) 
-        else: 
-            self.sequence = self.sequence.coerce_to_pyobject(env) 
+
+    subexprs = ['sequence']
+
+    def analyse_types(self, env):
+        self.sequence = self.sequence.analyse_types(env)
+        if (self.sequence.type.is_array or self.sequence.type.is_ptr) and \
+                not self.sequence.type.is_string:
+            # C array iteration will be transformed later on
+            self.type = self.sequence.type
+        elif self.sequence.type.is_cpp_class:
+            self.analyse_cpp_types(env)
+        else:
+            self.sequence = self.sequence.coerce_to_pyobject(env)
             if self.sequence.type in (list_type, tuple_type):
-                self.sequence = self.sequence.as_none_safe_node("'NoneType' object is not iterable") 
-        self.is_temp = 1 
-        return self 
- 
-    gil_message = "Iterating over Python object" 
- 
-    _func_iternext_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None), 
-            ])) 
- 
-    def type_dependencies(self, env): 
-        return self.sequence.type_dependencies(env) 
- 
-    def infer_type(self, env): 
-        sequence_type = self.sequence.infer_type(env) 
-        if sequence_type.is_array or sequence_type.is_ptr: 
-            return sequence_type 
-        elif sequence_type.is_cpp_class: 
-            begin = sequence_type.scope.lookup("begin") 
-            if begin is not None: 
-                return begin.type.return_type 
-        elif sequence_type.is_pyobject: 
-            return sequence_type 
-        return py_object_type 
- 
-    def analyse_cpp_types(self, env): 
-        sequence_type = self.sequence.type 
-        if sequence_type.is_ptr: 
-            sequence_type = sequence_type.base_type 
-        begin = sequence_type.scope.lookup("begin") 
-        end = sequence_type.scope.lookup("end") 
-        if (begin is None 
-            or not begin.type.is_cfunction 
-            or begin.type.args): 
-            error(self.pos, "missing begin() on %s" % self.sequence.type) 
-            self.type = error_type 
-            return 
-        if (end is None 
-            or not end.type.is_cfunction 
-            or end.type.args): 
-            error(self.pos, "missing end() on %s" % self.sequence.type) 
-            self.type = error_type 
-            return 
-        iter_type = begin.type.return_type 
-        if iter_type.is_cpp_class: 
-            if env.lookup_operator_for_types( 
-                    self.pos, 
-                    "!=", 
-                    [iter_type, end.type.return_type]) is None: 
-                error(self.pos, "missing operator!= on result of begin() on %s" % self.sequence.type) 
-                self.type = error_type 
-                return 
-            if env.lookup_operator_for_types(self.pos, '++', [iter_type]) is None: 
-                error(self.pos, "missing operator++ on result of begin() on %s" % self.sequence.type) 
-                self.type = error_type 
-                return 
-            if env.lookup_operator_for_types(self.pos, '*', [iter_type]) is None: 
-                error(self.pos, "missing operator* on result of begin() on %s" % self.sequence.type) 
-                self.type = error_type 
-                return 
-            self.type = iter_type 
-        elif iter_type.is_ptr: 
-            if not (iter_type == end.type.return_type): 
-                error(self.pos, "incompatible types for begin() and end()") 
-            self.type = iter_type 
-        else: 
-            error(self.pos, "result type of begin() on %s must be a C++ class or pointer" % self.sequence.type) 
-            self.type = error_type 
-            return 
- 
-    def generate_result_code(self, code): 
-        sequence_type = self.sequence.type 
-        if sequence_type.is_cpp_class: 
-            if self.sequence.is_name: 
-                # safe: C++ won't allow you to reassign to class references 
-                begin_func = "%s.begin" % self.sequence.result() 
-            else: 
-                sequence_type = PyrexTypes.c_ptr_type(sequence_type) 
-                self.cpp_iterator_cname = code.funcstate.allocate_temp(sequence_type, manage_ref=False) 
-                code.putln("%s = &%s;" % (self.cpp_iterator_cname, self.sequence.result())) 
-                begin_func = "%s->begin" % self.cpp_iterator_cname 
-            # TODO: Limit scope. 
-            code.putln("%s = %s();" % (self.result(), begin_func)) 
-            return 
-        if sequence_type.is_array or sequence_type.is_ptr: 
-            raise InternalError("for in carray slice not transformed") 
+                self.sequence = self.sequence.as_none_safe_node("'NoneType' object is not iterable")
+        self.is_temp = 1
+        return self
+
+    gil_message = "Iterating over Python object"
+
+    _func_iternext_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None),
+            ]))
+
+    def type_dependencies(self, env):
+        return self.sequence.type_dependencies(env)
+
+    def infer_type(self, env):
+        sequence_type = self.sequence.infer_type(env)
+        if sequence_type.is_array or sequence_type.is_ptr:
+            return sequence_type
+        elif sequence_type.is_cpp_class:
+            begin = sequence_type.scope.lookup("begin")
+            if begin is not None:
+                return begin.type.return_type
+        elif sequence_type.is_pyobject:
+            return sequence_type
+        return py_object_type
+
+    def analyse_cpp_types(self, env):
+        sequence_type = self.sequence.type
+        if sequence_type.is_ptr:
+            sequence_type = sequence_type.base_type
+        begin = sequence_type.scope.lookup("begin")
+        end = sequence_type.scope.lookup("end")
+        if (begin is None
+            or not begin.type.is_cfunction
+            or begin.type.args):
+            error(self.pos, "missing begin() on %s" % self.sequence.type)
+            self.type = error_type
+            return
+        if (end is None
+            or not end.type.is_cfunction
+            or end.type.args):
+            error(self.pos, "missing end() on %s" % self.sequence.type)
+            self.type = error_type
+            return
+        iter_type = begin.type.return_type
+        if iter_type.is_cpp_class:
+            if env.lookup_operator_for_types(
+                    self.pos,
+                    "!=",
+                    [iter_type, end.type.return_type]) is None:
+                error(self.pos, "missing operator!= on result of begin() on %s" % self.sequence.type)
+                self.type = error_type
+                return
+            if env.lookup_operator_for_types(self.pos, '++', [iter_type]) is None:
+                error(self.pos, "missing operator++ on result of begin() on %s" % self.sequence.type)
+                self.type = error_type
+                return
+            if env.lookup_operator_for_types(self.pos, '*', [iter_type]) is None:
+                error(self.pos, "missing operator* on result of begin() on %s" % self.sequence.type)
+                self.type = error_type
+                return
+            self.type = iter_type
+        elif iter_type.is_ptr:
+            if not (iter_type == end.type.return_type):
+                error(self.pos, "incompatible types for begin() and end()")
+            self.type = iter_type
+        else:
+            error(self.pos, "result type of begin() on %s must be a C++ class or pointer" % self.sequence.type)
+            self.type = error_type
+            return
+
+    def generate_result_code(self, code):
+        sequence_type = self.sequence.type
+        if sequence_type.is_cpp_class:
+            if self.sequence.is_name:
+                # safe: C++ won't allow you to reassign to class references
+                begin_func = "%s.begin" % self.sequence.result()
+            else:
+                sequence_type = PyrexTypes.c_ptr_type(sequence_type)
+                self.cpp_iterator_cname = code.funcstate.allocate_temp(sequence_type, manage_ref=False)
+                code.putln("%s = &%s;" % (self.cpp_iterator_cname, self.sequence.result()))
+                begin_func = "%s->begin" % self.cpp_iterator_cname
+            # TODO: Limit scope.
+            code.putln("%s = %s();" % (self.result(), begin_func))
+            return
+        if sequence_type.is_array or sequence_type.is_ptr:
+            raise InternalError("for in carray slice not transformed")
 
         is_builtin_sequence = sequence_type in (list_type, tuple_type)
-        if not is_builtin_sequence: 
-            # reversed() not currently optimised (see Optimize.py) 
-            assert not self.reversed, "internal error: reversed() only implemented for list/tuple objects" 
-        self.may_be_a_sequence = not sequence_type.is_builtin_type 
-        if self.may_be_a_sequence: 
-            code.putln( 
-                "if (likely(PyList_CheckExact(%s)) || PyTuple_CheckExact(%s)) {" % ( 
-                    self.sequence.py_result(), 
-                    self.sequence.py_result())) 
-
-        if is_builtin_sequence or self.may_be_a_sequence: 
-            self.counter_cname = code.funcstate.allocate_temp( 
-                PyrexTypes.c_py_ssize_t_type, manage_ref=False) 
-            if self.reversed: 
-                if sequence_type is list_type: 
-                    init_value = 'PyList_GET_SIZE(%s) - 1' % self.result() 
-                else: 
-                    init_value = 'PyTuple_GET_SIZE(%s) - 1' % self.result() 
-            else: 
-                init_value = '0' 
+        if not is_builtin_sequence:
+            # reversed() not currently optimised (see Optimize.py)
+            assert not self.reversed, "internal error: reversed() only implemented for list/tuple objects"
+        self.may_be_a_sequence = not sequence_type.is_builtin_type
+        if self.may_be_a_sequence:
+            code.putln(
+                "if (likely(PyList_CheckExact(%s)) || PyTuple_CheckExact(%s)) {" % (
+                    self.sequence.py_result(),
+                    self.sequence.py_result()))
+
+        if is_builtin_sequence or self.may_be_a_sequence:
+            self.counter_cname = code.funcstate.allocate_temp(
+                PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+            if self.reversed:
+                if sequence_type is list_type:
+                    init_value = 'PyList_GET_SIZE(%s) - 1' % self.result()
+                else:
+                    init_value = 'PyTuple_GET_SIZE(%s) - 1' % self.result()
+            else:
+                init_value = '0'
             code.putln("%s = %s; __Pyx_INCREF(%s); %s = %s;" % (
                 self.result(),
                 self.sequence.py_result(),
                 self.result(),
                 self.counter_cname,
                 init_value))
-        if not is_builtin_sequence: 
-            self.iter_func_ptr = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False) 
-            if self.may_be_a_sequence: 
-                code.putln("%s = NULL;" % self.iter_func_ptr) 
-                code.putln("} else {") 
-                code.put("%s = -1; " % self.counter_cname) 
-
-            code.putln("%s = PyObject_GetIter(%s); %s" % ( 
+        if not is_builtin_sequence:
+            self.iter_func_ptr = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False)
+            if self.may_be_a_sequence:
+                code.putln("%s = NULL;" % self.iter_func_ptr)
+                code.putln("} else {")
+                code.put("%s = -1; " % self.counter_cname)
+
+            code.putln("%s = PyObject_GetIter(%s); %s" % (
                 self.result(),
                 self.sequence.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result()) 
-
-            # PyObject_GetIter() fails if "tp_iternext" is not set, but the check below 
-            # makes it visible to the C compiler that the pointer really isn't NULL, so that 
-            # it can distinguish between the special cases and the generic case 
-            code.putln("%s = Py_TYPE(%s)->tp_iternext; %s" % ( 
-                self.iter_func_ptr, self.py_result(), 
-                code.error_goto_if_null(self.iter_func_ptr, self.pos))) 
-        if self.may_be_a_sequence: 
-            code.putln("}") 
- 
-    def generate_next_sequence_item(self, test_name, result_name, code): 
-        assert self.counter_cname, "internal error: counter_cname temp not prepared" 
-        final_size = 'Py%s_GET_SIZE(%s)' % (test_name, self.py_result()) 
-        if self.sequence.is_sequence_constructor: 
-            item_count = len(self.sequence.args) 
-            if self.sequence.mult_factor is None: 
-                final_size = item_count 
+            code.put_gotref(self.py_result())
+
+            # PyObject_GetIter() fails if "tp_iternext" is not set, but the check below
+            # makes it visible to the C compiler that the pointer really isn't NULL, so that
+            # it can distinguish between the special cases and the generic case
+            code.putln("%s = Py_TYPE(%s)->tp_iternext; %s" % (
+                self.iter_func_ptr, self.py_result(),
+                code.error_goto_if_null(self.iter_func_ptr, self.pos)))
+        if self.may_be_a_sequence:
+            code.putln("}")
+
+    def generate_next_sequence_item(self, test_name, result_name, code):
+        assert self.counter_cname, "internal error: counter_cname temp not prepared"
+        final_size = 'Py%s_GET_SIZE(%s)' % (test_name, self.py_result())
+        if self.sequence.is_sequence_constructor:
+            item_count = len(self.sequence.args)
+            if self.sequence.mult_factor is None:
+                final_size = item_count
             elif isinstance(self.sequence.mult_factor.constant_result, _py_int_types):
-                final_size = item_count * self.sequence.mult_factor.constant_result 
-        code.putln("if (%s >= %s) break;" % (self.counter_cname, final_size)) 
-        if self.reversed: 
-            inc_dec = '--' 
-        else: 
-            inc_dec = '++' 
+                final_size = item_count * self.sequence.mult_factor.constant_result
+        code.putln("if (%s >= %s) break;" % (self.counter_cname, final_size))
+        if self.reversed:
+            inc_dec = '--'
+        else:
+            inc_dec = '++'
         code.putln("#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS")
-        code.putln( 
-            "%s = Py%s_GET_ITEM(%s, %s); __Pyx_INCREF(%s); %s%s; %s" % ( 
-                result_name, 
-                test_name, 
-                self.py_result(), 
-                self.counter_cname, 
-                result_name, 
-                self.counter_cname, 
-                inc_dec, 
-                # use the error label to avoid C compiler warnings if we only use it below 
-                code.error_goto_if_neg('0', self.pos) 
-                )) 
-        code.putln("#else") 
-        code.putln( 
-            "%s = PySequence_ITEM(%s, %s); %s%s; %s" % ( 
-                result_name, 
-                self.py_result(), 
-                self.counter_cname, 
-                self.counter_cname, 
-                inc_dec, 
-                code.error_goto_if_null(result_name, self.pos))) 
+        code.putln(
+            "%s = Py%s_GET_ITEM(%s, %s); __Pyx_INCREF(%s); %s%s; %s" % (
+                result_name,
+                test_name,
+                self.py_result(),
+                self.counter_cname,
+                result_name,
+                self.counter_cname,
+                inc_dec,
+                # use the error label to avoid C compiler warnings if we only use it below
+                code.error_goto_if_neg('0', self.pos)
+                ))
+        code.putln("#else")
+        code.putln(
+            "%s = PySequence_ITEM(%s, %s); %s%s; %s" % (
+                result_name,
+                self.py_result(),
+                self.counter_cname,
+                self.counter_cname,
+                inc_dec,
+                code.error_goto_if_null(result_name, self.pos)))
         code.put_gotref(result_name)
-        code.putln("#endif") 
- 
-    def generate_iter_next_result_code(self, result_name, code): 
-        sequence_type = self.sequence.type 
-        if self.reversed: 
-            code.putln("if (%s < 0) break;" % self.counter_cname) 
-        if sequence_type.is_cpp_class: 
-            if self.cpp_iterator_cname: 
-                end_func = "%s->end" % self.cpp_iterator_cname 
-            else: 
-                end_func = "%s.end" % self.sequence.result() 
-            # TODO: Cache end() call? 
-            code.putln("if (!(%s != %s())) break;" % ( 
-                            self.result(), 
-                            end_func)) 
-            code.putln("%s = *%s;" % ( 
-                            result_name, 
-                            self.result())) 
-            code.putln("++%s;" % self.result()) 
-            return 
-        elif sequence_type is list_type: 
-            self.generate_next_sequence_item('List', result_name, code) 
-            return 
-        elif sequence_type is tuple_type: 
-            self.generate_next_sequence_item('Tuple', result_name, code) 
-            return 
- 
-        if self.may_be_a_sequence: 
-            code.putln("if (likely(!%s)) {" % self.iter_func_ptr) 
-            code.putln("if (likely(PyList_CheckExact(%s))) {" % self.py_result()) 
-            self.generate_next_sequence_item('List', result_name, code) 
-            code.putln("} else {") 
-            self.generate_next_sequence_item('Tuple', result_name, code) 
-            code.putln("}") 
-            code.put("} else ") 
- 
-        code.putln("{") 
-        code.putln( 
-            "%s = %s(%s);" % ( 
-                result_name, 
-                self.iter_func_ptr, 
-                self.py_result())) 
-        code.putln("if (unlikely(!%s)) {" % result_name) 
-        code.putln("PyObject* exc_type = PyErr_Occurred();") 
-        code.putln("if (exc_type) {") 
+        code.putln("#endif")
+
+    def generate_iter_next_result_code(self, result_name, code):
+        sequence_type = self.sequence.type
+        if self.reversed:
+            code.putln("if (%s < 0) break;" % self.counter_cname)
+        if sequence_type.is_cpp_class:
+            if self.cpp_iterator_cname:
+                end_func = "%s->end" % self.cpp_iterator_cname
+            else:
+                end_func = "%s.end" % self.sequence.result()
+            # TODO: Cache end() call?
+            code.putln("if (!(%s != %s())) break;" % (
+                            self.result(),
+                            end_func))
+            code.putln("%s = *%s;" % (
+                            result_name,
+                            self.result()))
+            code.putln("++%s;" % self.result())
+            return
+        elif sequence_type is list_type:
+            self.generate_next_sequence_item('List', result_name, code)
+            return
+        elif sequence_type is tuple_type:
+            self.generate_next_sequence_item('Tuple', result_name, code)
+            return
+
+        if self.may_be_a_sequence:
+            code.putln("if (likely(!%s)) {" % self.iter_func_ptr)
+            code.putln("if (likely(PyList_CheckExact(%s))) {" % self.py_result())
+            self.generate_next_sequence_item('List', result_name, code)
+            code.putln("} else {")
+            self.generate_next_sequence_item('Tuple', result_name, code)
+            code.putln("}")
+            code.put("} else ")
+
+        code.putln("{")
+        code.putln(
+            "%s = %s(%s);" % (
+                result_name,
+                self.iter_func_ptr,
+                self.py_result()))
+        code.putln("if (unlikely(!%s)) {" % result_name)
+        code.putln("PyObject* exc_type = PyErr_Occurred();")
+        code.putln("if (exc_type) {")
         code.putln("if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();")
-        code.putln("else %s" % code.error_goto(self.pos)) 
-        code.putln("}") 
-        code.putln("break;") 
-        code.putln("}") 
-        code.put_gotref(result_name) 
-        code.putln("}") 
- 
-    def free_temps(self, code): 
-        if self.counter_cname: 
-            code.funcstate.release_temp(self.counter_cname) 
-        if self.iter_func_ptr: 
-            code.funcstate.release_temp(self.iter_func_ptr) 
-            self.iter_func_ptr = None 
-        if self.cpp_iterator_cname: 
-            code.funcstate.release_temp(self.cpp_iterator_cname) 
-        ExprNode.free_temps(self, code) 
- 
- 
-class NextNode(AtomicExprNode): 
-    #  Used as part of for statement implementation. 
+        code.putln("else %s" % code.error_goto(self.pos))
+        code.putln("}")
+        code.putln("break;")
+        code.putln("}")
+        code.put_gotref(result_name)
+        code.putln("}")
+
+    def free_temps(self, code):
+        if self.counter_cname:
+            code.funcstate.release_temp(self.counter_cname)
+        if self.iter_func_ptr:
+            code.funcstate.release_temp(self.iter_func_ptr)
+            self.iter_func_ptr = None
+        if self.cpp_iterator_cname:
+            code.funcstate.release_temp(self.cpp_iterator_cname)
+        ExprNode.free_temps(self, code)
+
+
+class NextNode(AtomicExprNode):
+    #  Used as part of for statement implementation.
     #  Implements result = next(iterator)
-    #  Created during analyse_types phase. 
-    #  The iterator is not owned by this node. 
-    # 
-    #  iterator   IteratorNode 
- 
-    def __init__(self, iterator): 
-        AtomicExprNode.__init__(self, iterator.pos) 
-        self.iterator = iterator 
- 
+    #  Created during analyse_types phase.
+    #  The iterator is not owned by this node.
+    #
+    #  iterator   IteratorNode
+
+    def __init__(self, iterator):
+        AtomicExprNode.__init__(self, iterator.pos)
+        self.iterator = iterator
+
     def nogil_check(self, env):
         # ignore - errors (if any) are already handled by IteratorNode
         pass
 
-    def type_dependencies(self, env): 
-        return self.iterator.type_dependencies(env) 
- 
+    def type_dependencies(self, env):
+        return self.iterator.type_dependencies(env)
+
     def infer_type(self, env, iterator_type=None):
-        if iterator_type is None: 
-            iterator_type = self.iterator.infer_type(env) 
-        if iterator_type.is_ptr or iterator_type.is_array: 
-            return iterator_type.base_type 
-        elif iterator_type.is_cpp_class: 
-            item_type = env.lookup_operator_for_types(self.pos, "*", [iterator_type]).type.return_type 
-            if item_type.is_reference: 
-                item_type = item_type.ref_base_type 
-            if item_type.is_const: 
-                item_type = item_type.const_base_type 
-            return item_type 
-        else: 
-            # Avoid duplication of complicated logic. 
-            fake_index_node = IndexNode( 
-                self.pos, 
-                base=self.iterator.sequence, 
-                index=IntNode(self.pos, value='PY_SSIZE_T_MAX', 
-                              type=PyrexTypes.c_py_ssize_t_type)) 
-            return fake_index_node.infer_type(env) 
- 
-    def analyse_types(self, env): 
-        self.type = self.infer_type(env, self.iterator.type) 
-        self.is_temp = 1 
-        return self 
- 
-    def generate_result_code(self, code): 
-        self.iterator.generate_iter_next_result_code(self.result(), code) 
- 
- 
+        if iterator_type is None:
+            iterator_type = self.iterator.infer_type(env)
+        if iterator_type.is_ptr or iterator_type.is_array:
+            return iterator_type.base_type
+        elif iterator_type.is_cpp_class:
+            item_type = env.lookup_operator_for_types(self.pos, "*", [iterator_type]).type.return_type
+            if item_type.is_reference:
+                item_type = item_type.ref_base_type
+            if item_type.is_const:
+                item_type = item_type.const_base_type
+            return item_type
+        else:
+            # Avoid duplication of complicated logic.
+            fake_index_node = IndexNode(
+                self.pos,
+                base=self.iterator.sequence,
+                index=IntNode(self.pos, value='PY_SSIZE_T_MAX',
+                              type=PyrexTypes.c_py_ssize_t_type))
+            return fake_index_node.infer_type(env)
+
+    def analyse_types(self, env):
+        self.type = self.infer_type(env, self.iterator.type)
+        self.is_temp = 1
+        return self
+
+    def generate_result_code(self, code):
+        self.iterator.generate_iter_next_result_code(self.result(), code)
+
+
 class AsyncIteratorNode(ExprNode):
     #  Used as part of 'async for' statement implementation.
     #
@@ -2963,47 +2963,47 @@ class AsyncNextNode(AtomicExprNode):
         code.put_gotref(self.result())
 
 
-class WithExitCallNode(ExprNode): 
-    # The __exit__() call of a 'with' statement.  Used in both the 
-    # except and finally clauses. 
- 
+class WithExitCallNode(ExprNode):
+    # The __exit__() call of a 'with' statement.  Used in both the
+    # except and finally clauses.
+
     # with_stat   WithStatNode                the surrounding 'with' statement
     # args        TupleNode or ResultStatNode the exception info tuple
     # await_expr  AwaitExprNode               the await expression of an 'async with' statement
- 
+
     subexprs = ['args', 'await_expr']
-    test_if_run = True 
+    test_if_run = True
     await_expr = None
- 
-    def analyse_types(self, env): 
-        self.args = self.args.analyse_types(env) 
+
+    def analyse_types(self, env):
+        self.args = self.args.analyse_types(env)
         if self.await_expr:
             self.await_expr = self.await_expr.analyse_types(env)
-        self.type = PyrexTypes.c_bint_type 
-        self.is_temp = True 
-        return self 
- 
-    def generate_evaluation_code(self, code): 
-        if self.test_if_run: 
-            # call only if it was not already called (and decref-cleared) 
-            code.putln("if (%s) {" % self.with_stat.exit_var) 
- 
-        self.args.generate_evaluation_code(code) 
-        result_var = code.funcstate.allocate_temp(py_object_type, manage_ref=False) 
- 
-        code.mark_pos(self.pos) 
-        code.globalstate.use_utility_code(UtilityCode.load_cached( 
-            "PyObjectCall", "ObjectHandling.c")) 
-        code.putln("%s = __Pyx_PyObject_Call(%s, %s, NULL);" % ( 
-            result_var, 
-            self.with_stat.exit_var, 
-            self.args.result())) 
-        code.put_decref_clear(self.with_stat.exit_var, type=py_object_type) 
-        self.args.generate_disposal_code(code) 
-        self.args.free_temps(code) 
- 
-        code.putln(code.error_goto_if_null(result_var, self.pos)) 
-        code.put_gotref(result_var) 
+        self.type = PyrexTypes.c_bint_type
+        self.is_temp = True
+        return self
+
+    def generate_evaluation_code(self, code):
+        if self.test_if_run:
+            # call only if it was not already called (and decref-cleared)
+            code.putln("if (%s) {" % self.with_stat.exit_var)
+
+        self.args.generate_evaluation_code(code)
+        result_var = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
+
+        code.mark_pos(self.pos)
+        code.globalstate.use_utility_code(UtilityCode.load_cached(
+            "PyObjectCall", "ObjectHandling.c"))
+        code.putln("%s = __Pyx_PyObject_Call(%s, %s, NULL);" % (
+            result_var,
+            self.with_stat.exit_var,
+            self.args.result()))
+        code.put_decref_clear(self.with_stat.exit_var, type=py_object_type)
+        self.args.generate_disposal_code(code)
+        self.args.free_temps(code)
+
+        code.putln(code.error_goto_if_null(result_var, self.pos))
+        code.put_gotref(result_var)
 
         if self.await_expr:
             # FIXME: result_var temp currently leaks into the closure
@@ -3012,117 +3012,117 @@ class WithExitCallNode(ExprNode):
             self.await_expr.generate_post_assignment_code(code)
             self.await_expr.free_temps(code)
 
-        if self.result_is_used: 
-            self.allocate_temp_result(code) 
-            code.putln("%s = __Pyx_PyObject_IsTrue(%s);" % (self.result(), result_var)) 
-        code.put_decref_clear(result_var, type=py_object_type) 
-        if self.result_is_used: 
-            code.put_error_if_neg(self.pos, self.result()) 
-        code.funcstate.release_temp(result_var) 
-        if self.test_if_run: 
-            code.putln("}") 
- 
- 
-class ExcValueNode(AtomicExprNode): 
-    #  Node created during analyse_types phase 
-    #  of an ExceptClauseNode to fetch the current 
-    #  exception value. 
- 
-    type = py_object_type 
- 
-    def __init__(self, pos): 
-        ExprNode.__init__(self, pos) 
- 
-    def set_var(self, var): 
-        self.var = var 
- 
-    def calculate_result_code(self): 
-        return self.var 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def analyse_types(self, env): 
-        return self 
- 
- 
-class TempNode(ExprNode): 
-    # Node created during analyse_types phase 
-    # of some nodes to hold a temporary value. 
-    # 
-    # Note: One must call "allocate" and "release" on 
-    # the node during code generation to get/release the temp. 
-    # This is because the temp result is often used outside of 
-    # the regular cycle. 
- 
-    subexprs = [] 
- 
-    def __init__(self, pos, type, env=None): 
-        ExprNode.__init__(self, pos) 
-        self.type = type 
-        if type.is_pyobject: 
-            self.result_ctype = py_object_type 
-        self.is_temp = 1 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def analyse_target_declaration(self, env): 
-        pass 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def allocate(self, code): 
-        self.temp_cname = code.funcstate.allocate_temp(self.type, manage_ref=True) 
- 
-    def release(self, code): 
-        code.funcstate.release_temp(self.temp_cname) 
-        self.temp_cname = None 
- 
-    def result(self): 
-        try: 
-            return self.temp_cname 
-        except: 
-            assert False, "Remember to call allocate/release on TempNode" 
-            raise 
- 
-    # Do not participate in normal temp alloc/dealloc: 
-    def allocate_temp_result(self, code): 
-        pass 
- 
-    def release_temp_result(self, code): 
-        pass 
- 
-class PyTempNode(TempNode): 
-    #  TempNode holding a Python value. 
- 
-    def __init__(self, pos, env): 
-        TempNode.__init__(self, pos, PyrexTypes.py_object_type, env) 
- 
-class RawCNameExprNode(ExprNode): 
-    subexprs = [] 
- 
-    def __init__(self, pos, type=None, cname=None): 
-        ExprNode.__init__(self, pos, type=type) 
-        if cname is not None: 
-            self.cname = cname 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def set_cname(self, cname): 
-        self.cname = cname 
- 
-    def result(self): 
-        return self.cname 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
- 
-#------------------------------------------------------------------- 
-# 
+        if self.result_is_used:
+            self.allocate_temp_result(code)
+            code.putln("%s = __Pyx_PyObject_IsTrue(%s);" % (self.result(), result_var))
+        code.put_decref_clear(result_var, type=py_object_type)
+        if self.result_is_used:
+            code.put_error_if_neg(self.pos, self.result())
+        code.funcstate.release_temp(result_var)
+        if self.test_if_run:
+            code.putln("}")
+
+
+class ExcValueNode(AtomicExprNode):
+    #  Node created during analyse_types phase
+    #  of an ExceptClauseNode to fetch the current
+    #  exception value.
+
+    type = py_object_type
+
+    def __init__(self, pos):
+        ExprNode.__init__(self, pos)
+
+    def set_var(self, var):
+        self.var = var
+
+    def calculate_result_code(self):
+        return self.var
+
+    def generate_result_code(self, code):
+        pass
+
+    def analyse_types(self, env):
+        return self
+
+
+class TempNode(ExprNode):
+    # Node created during analyse_types phase
+    # of some nodes to hold a temporary value.
+    #
+    # Note: One must call "allocate" and "release" on
+    # the node during code generation to get/release the temp.
+    # This is because the temp result is often used outside of
+    # the regular cycle.
+
+    subexprs = []
+
+    def __init__(self, pos, type, env=None):
+        ExprNode.__init__(self, pos)
+        self.type = type
+        if type.is_pyobject:
+            self.result_ctype = py_object_type
+        self.is_temp = 1
+
+    def analyse_types(self, env):
+        return self
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def generate_result_code(self, code):
+        pass
+
+    def allocate(self, code):
+        self.temp_cname = code.funcstate.allocate_temp(self.type, manage_ref=True)
+
+    def release(self, code):
+        code.funcstate.release_temp(self.temp_cname)
+        self.temp_cname = None
+
+    def result(self):
+        try:
+            return self.temp_cname
+        except:
+            assert False, "Remember to call allocate/release on TempNode"
+            raise
+
+    # Do not participate in normal temp alloc/dealloc:
+    def allocate_temp_result(self, code):
+        pass
+
+    def release_temp_result(self, code):
+        pass
+
+class PyTempNode(TempNode):
+    #  TempNode holding a Python value.
+
+    def __init__(self, pos, env):
+        TempNode.__init__(self, pos, PyrexTypes.py_object_type, env)
+
+class RawCNameExprNode(ExprNode):
+    subexprs = []
+
+    def __init__(self, pos, type=None, cname=None):
+        ExprNode.__init__(self, pos, type=type)
+        if cname is not None:
+            self.cname = cname
+
+    def analyse_types(self, env):
+        return self
+
+    def set_cname(self, cname):
+        self.cname = cname
+
+    def result(self):
+        return self.cname
+
+    def generate_result_code(self, code):
+        pass
+
+
+#-------------------------------------------------------------------
+#
 #  F-strings
 #
 #-------------------------------------------------------------------
@@ -3318,68 +3318,68 @@ class FormattedValueNode(ExprNode):
 
 #-------------------------------------------------------------------
 #
-#  Parallel nodes (cython.parallel.thread(savailable|id)) 
-# 
-#------------------------------------------------------------------- 
- 
-class ParallelThreadsAvailableNode(AtomicExprNode): 
-    """ 
-    Note: this is disabled and not a valid directive at this moment 
- 
-    Implements cython.parallel.threadsavailable(). If we are called from the 
-    sequential part of the application, we need to call omp_get_max_threads(), 
-    and in the parallel part we can just call omp_get_num_threads() 
-    """ 
- 
-    type = PyrexTypes.c_int_type 
- 
-    def analyse_types(self, env): 
-        self.is_temp = True 
-        # env.add_include_file("omp.h") 
-        return self 
- 
-    def generate_result_code(self, code): 
-        code.putln("#ifdef _OPENMP") 
-        code.putln("if (omp_in_parallel()) %s = omp_get_max_threads();" % 
-                                                            self.temp_code) 
-        code.putln("else %s = omp_get_num_threads();" % self.temp_code) 
-        code.putln("#else") 
-        code.putln("%s = 1;" % self.temp_code) 
-        code.putln("#endif") 
- 
-    def result(self): 
-        return self.temp_code 
- 
- 
-class ParallelThreadIdNode(AtomicExprNode): #, Nodes.ParallelNode): 
-    """ 
-    Implements cython.parallel.threadid() 
-    """ 
- 
-    type = PyrexTypes.c_int_type 
- 
-    def analyse_types(self, env): 
-        self.is_temp = True 
-        # env.add_include_file("omp.h") 
-        return self 
- 
-    def generate_result_code(self, code): 
-        code.putln("#ifdef _OPENMP") 
-        code.putln("%s = omp_get_thread_num();" % self.temp_code) 
-        code.putln("#else") 
-        code.putln("%s = 0;" % self.temp_code) 
-        code.putln("#endif") 
- 
-    def result(self): 
-        return self.temp_code 
- 
- 
-#------------------------------------------------------------------- 
-# 
-#  Trailer nodes 
-# 
-#------------------------------------------------------------------- 
- 
+#  Parallel nodes (cython.parallel.thread(savailable|id))
+#
+#-------------------------------------------------------------------
+
+class ParallelThreadsAvailableNode(AtomicExprNode):
+    """
+    Note: this is disabled and not a valid directive at this moment
+
+    Implements cython.parallel.threadsavailable(). If we are called from the
+    sequential part of the application, we need to call omp_get_max_threads(),
+    and in the parallel part we can just call omp_get_num_threads()
+    """
+
+    type = PyrexTypes.c_int_type
+
+    def analyse_types(self, env):
+        self.is_temp = True
+        # env.add_include_file("omp.h")
+        return self
+
+    def generate_result_code(self, code):
+        code.putln("#ifdef _OPENMP")
+        code.putln("if (omp_in_parallel()) %s = omp_get_max_threads();" %
+                                                            self.temp_code)
+        code.putln("else %s = omp_get_num_threads();" % self.temp_code)
+        code.putln("#else")
+        code.putln("%s = 1;" % self.temp_code)
+        code.putln("#endif")
+
+    def result(self):
+        return self.temp_code
+
+
+class ParallelThreadIdNode(AtomicExprNode): #, Nodes.ParallelNode):
+    """
+    Implements cython.parallel.threadid()
+    """
+
+    type = PyrexTypes.c_int_type
+
+    def analyse_types(self, env):
+        self.is_temp = True
+        # env.add_include_file("omp.h")
+        return self
+
+    def generate_result_code(self, code):
+        code.putln("#ifdef _OPENMP")
+        code.putln("%s = omp_get_thread_num();" % self.temp_code)
+        code.putln("#else")
+        code.putln("%s = 0;" % self.temp_code)
+        code.putln("#endif")
+
+    def result(self):
+        return self.temp_code
+
+
+#-------------------------------------------------------------------
+#
+#  Trailer nodes
+#
+#-------------------------------------------------------------------
+
 
 class _IndexingBaseNode(ExprNode):
     # Base class for indexing nodes.
@@ -3412,61 +3412,61 @@ class _IndexingBaseNode(ExprNode):
 
 
 class IndexNode(_IndexingBaseNode):
-    #  Sequence indexing. 
-    # 
-    #  base     ExprNode 
-    #  index    ExprNode 
-    #  type_indices  [PyrexType] 
-    # 
-    #  is_fused_index boolean   Whether the index is used to specialize a 
-    #                           c(p)def function 
- 
+    #  Sequence indexing.
+    #
+    #  base     ExprNode
+    #  index    ExprNode
+    #  type_indices  [PyrexType]
+    #
+    #  is_fused_index boolean   Whether the index is used to specialize a
+    #                           c(p)def function
+
     subexprs = ['base', 'index']
-    type_indices = None 
- 
-    is_subscript = True 
-    is_fused_index = False 
- 
-    def calculate_constant_result(self): 
+    type_indices = None
+
+    is_subscript = True
+    is_fused_index = False
+
+    def calculate_constant_result(self):
         self.constant_result = self.base.constant_result[self.index.constant_result]
- 
-    def compile_time_value(self, denv): 
-        base = self.base.compile_time_value(denv) 
-        index = self.index.compile_time_value(denv) 
-        try: 
-            return base[index] 
+
+    def compile_time_value(self, denv):
+        base = self.base.compile_time_value(denv)
+        index = self.index.compile_time_value(denv)
+        try:
+            return base[index]
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def is_simple(self): 
-        base = self.base 
-        return (base.is_simple() and self.index.is_simple() 
-                and base.type and (base.type.is_ptr or base.type.is_array)) 
- 
-    def may_be_none(self): 
-        base_type = self.base.type 
-        if base_type: 
-            if base_type.is_string: 
-                return False 
-            if isinstance(self.index, SliceNode): 
-                # slicing! 
+            self.compile_time_value_error(e)
+
+    def is_simple(self):
+        base = self.base
+        return (base.is_simple() and self.index.is_simple()
+                and base.type and (base.type.is_ptr or base.type.is_array))
+
+    def may_be_none(self):
+        base_type = self.base.type
+        if base_type:
+            if base_type.is_string:
+                return False
+            if isinstance(self.index, SliceNode):
+                # slicing!
                 if base_type in (bytes_type, bytearray_type, str_type, unicode_type,
-                                 basestring_type, list_type, tuple_type): 
-                    return False 
-        return ExprNode.may_be_none(self) 
- 
-    def analyse_target_declaration(self, env): 
-        pass 
- 
-    def analyse_as_type(self, env): 
-        base_type = self.base.analyse_as_type(env) 
-        if base_type and not base_type.is_pyobject: 
-            if base_type.is_cpp_class: 
-                if isinstance(self.index, TupleNode): 
-                    template_values = self.index.args 
-                else: 
-                    template_values = [self.index] 
-                type_node = Nodes.TemplatedTypeNode( 
+                                 basestring_type, list_type, tuple_type):
+                    return False
+        return ExprNode.may_be_none(self)
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def analyse_as_type(self, env):
+        base_type = self.base.analyse_as_type(env)
+        if base_type and not base_type.is_pyobject:
+            if base_type.is_cpp_class:
+                if isinstance(self.index, TupleNode):
+                    template_values = self.index.args
+                else:
+                    template_values = [self.index]
+                type_node = Nodes.TemplatedTypeNode(
                     pos=self.pos,
                     positional_args=template_values,
                     keyword_args=None)
@@ -3477,71 +3477,71 @@ class IndexNode(_IndexingBaseNode):
                 env.use_utility_code(MemoryView.view_utility_code)
                 axes = [self.index] if self.index.is_slice else list(self.index.args)
                 return PyrexTypes.MemoryViewSliceType(base_type, MemoryView.get_axes_specs(env, axes))
-            else: 
+            else:
                 # C array
-                index = self.index.compile_time_value(env) 
-                if index is not None: 
+                index = self.index.compile_time_value(env)
+                if index is not None:
                     try:
                         index = int(index)
                     except (ValueError, TypeError):
                         pass
                     else:
                         return PyrexTypes.CArrayType(base_type, index)
-                error(self.pos, "Array size must be a compile time constant") 
-        return None 
- 
-    def type_dependencies(self, env): 
-        return self.base.type_dependencies(env) + self.index.type_dependencies(env) 
- 
-    def infer_type(self, env): 
-        base_type = self.base.infer_type(env) 
+                error(self.pos, "Array size must be a compile time constant")
+        return None
+
+    def type_dependencies(self, env):
+        return self.base.type_dependencies(env) + self.index.type_dependencies(env)
+
+    def infer_type(self, env):
+        base_type = self.base.infer_type(env)
         if self.index.is_slice:
-            # slicing! 
-            if base_type.is_string: 
-                # sliced C strings must coerce to Python 
-                return bytes_type 
-            elif base_type.is_pyunicode_ptr: 
-                # sliced Py_UNICODE* strings must coerce to Python 
-                return unicode_type 
-            elif base_type in (unicode_type, bytes_type, str_type, 
-                               bytearray_type, list_type, tuple_type): 
-                # slicing these returns the same type 
-                return base_type 
-            else: 
-                # TODO: Handle buffers (hopefully without too much redundancy). 
-                return py_object_type 
- 
-        index_type = self.index.infer_type(env) 
-        if index_type and index_type.is_int or isinstance(self.index, IntNode): 
-            # indexing! 
-            if base_type is unicode_type: 
-                # Py_UCS4 will automatically coerce to a unicode string 
-                # if required, so this is safe.  We only infer Py_UCS4 
-                # when the index is a C integer type.  Otherwise, we may 
-                # need to use normal Python item access, in which case 
-                # it's faster to return the one-char unicode string than 
-                # to receive it, throw it away, and potentially rebuild it 
-                # on a subsequent PyObject coercion. 
-                return PyrexTypes.c_py_ucs4_type 
-            elif base_type is str_type: 
-                # always returns str - Py2: bytes, Py3: unicode 
-                return base_type 
-            elif base_type is bytearray_type: 
-                return PyrexTypes.c_uchar_type 
-            elif isinstance(self.base, BytesNode): 
-                #if env.global_scope().context.language_level >= 3: 
-                #    # inferring 'char' can be made to work in Python 3 mode 
-                #    return PyrexTypes.c_char_type 
-                # Py2/3 return different types on indexing bytes objects 
-                return py_object_type 
-            elif base_type in (tuple_type, list_type): 
-                # if base is a literal, take a look at its values 
-                item_type = infer_sequence_item_type( 
-                    env, self.base, self.index, seq_type=base_type) 
-                if item_type is not None: 
-                    return item_type 
-            elif base_type.is_ptr or base_type.is_array: 
-                return base_type.base_type 
+            # slicing!
+            if base_type.is_string:
+                # sliced C strings must coerce to Python
+                return bytes_type
+            elif base_type.is_pyunicode_ptr:
+                # sliced Py_UNICODE* strings must coerce to Python
+                return unicode_type
+            elif base_type in (unicode_type, bytes_type, str_type,
+                               bytearray_type, list_type, tuple_type):
+                # slicing these returns the same type
+                return base_type
+            else:
+                # TODO: Handle buffers (hopefully without too much redundancy).
+                return py_object_type
+
+        index_type = self.index.infer_type(env)
+        if index_type and index_type.is_int or isinstance(self.index, IntNode):
+            # indexing!
+            if base_type is unicode_type:
+                # Py_UCS4 will automatically coerce to a unicode string
+                # if required, so this is safe.  We only infer Py_UCS4
+                # when the index is a C integer type.  Otherwise, we may
+                # need to use normal Python item access, in which case
+                # it's faster to return the one-char unicode string than
+                # to receive it, throw it away, and potentially rebuild it
+                # on a subsequent PyObject coercion.
+                return PyrexTypes.c_py_ucs4_type
+            elif base_type is str_type:
+                # always returns str - Py2: bytes, Py3: unicode
+                return base_type
+            elif base_type is bytearray_type:
+                return PyrexTypes.c_uchar_type
+            elif isinstance(self.base, BytesNode):
+                #if env.global_scope().context.language_level >= 3:
+                #    # inferring 'char' can be made to work in Python 3 mode
+                #    return PyrexTypes.c_char_type
+                # Py2/3 return different types on indexing bytes objects
+                return py_object_type
+            elif base_type in (tuple_type, list_type):
+                # if base is a literal, take a look at its values
+                item_type = infer_sequence_item_type(
+                    env, self.base, self.index, seq_type=base_type)
+                if item_type is not None:
+                    return item_type
+            elif base_type.is_ptr or base_type.is_array:
+                return base_type.base_type
             elif base_type.is_ctuple and isinstance(self.index, IntNode):
                 if self.index.has_constant_result():
                     index = self.index.constant_result
@@ -3549,94 +3549,94 @@ class IndexNode(_IndexingBaseNode):
                         index += base_type.size
                     if 0 <= index < base_type.size:
                         return base_type.components[index]
- 
-        if base_type.is_cpp_class: 
-            class FakeOperand: 
-                def __init__(self, **kwds): 
-                    self.__dict__.update(kwds) 
-            operands = [ 
-                FakeOperand(pos=self.pos, type=base_type), 
-                FakeOperand(pos=self.pos, type=index_type), 
-            ] 
-            index_func = env.lookup_operator('[]', operands) 
-            if index_func is not None: 
-                return index_func.type.return_type 
- 
+
+        if base_type.is_cpp_class:
+            class FakeOperand:
+                def __init__(self, **kwds):
+                    self.__dict__.update(kwds)
+            operands = [
+                FakeOperand(pos=self.pos, type=base_type),
+                FakeOperand(pos=self.pos, type=index_type),
+            ]
+            index_func = env.lookup_operator('[]', operands)
+            if index_func is not None:
+                return index_func.type.return_type
+
         if is_pythran_expr(base_type) and is_pythran_expr(index_type):
             index_with_type = (self.index, index_type)
             return PythranExpr(pythran_indexing_type(base_type, [index_with_type]))
 
-        # may be slicing or indexing, we don't know 
-        if base_type in (unicode_type, str_type): 
-            # these types always returns their own type on Python indexing/slicing 
-            return base_type 
-        else: 
-            # TODO: Handle buffers (hopefully without too much redundancy). 
-            return py_object_type 
- 
-    def analyse_types(self, env): 
-        return self.analyse_base_and_index_types(env, getting=True) 
- 
-    def analyse_target_types(self, env): 
-        node = self.analyse_base_and_index_types(env, setting=True) 
-        if node.type.is_const: 
-            error(self.pos, "Assignment to const dereference") 
+        # may be slicing or indexing, we don't know
+        if base_type in (unicode_type, str_type):
+            # these types always returns their own type on Python indexing/slicing
+            return base_type
+        else:
+            # TODO: Handle buffers (hopefully without too much redundancy).
+            return py_object_type
+
+    def analyse_types(self, env):
+        return self.analyse_base_and_index_types(env, getting=True)
+
+    def analyse_target_types(self, env):
+        node = self.analyse_base_and_index_types(env, setting=True)
+        if node.type.is_const:
+            error(self.pos, "Assignment to const dereference")
         if node is self and not node.is_lvalue():
-            error(self.pos, "Assignment to non-lvalue of type '%s'" % node.type) 
-        return node 
- 
-    def analyse_base_and_index_types(self, env, getting=False, setting=False, 
-                                     analyse_base=True): 
-        # Note: This might be cleaned up by having IndexNode 
-        # parsed in a saner way and only construct the tuple if 
-        # needed. 
-        if analyse_base: 
-            self.base = self.base.analyse_types(env) 
- 
-        if self.base.type.is_error: 
-            # Do not visit child tree if base is undeclared to avoid confusing 
-            # error messages 
-            self.type = PyrexTypes.error_type 
-            return self 
- 
+            error(self.pos, "Assignment to non-lvalue of type '%s'" % node.type)
+        return node
+
+    def analyse_base_and_index_types(self, env, getting=False, setting=False,
+                                     analyse_base=True):
+        # Note: This might be cleaned up by having IndexNode
+        # parsed in a saner way and only construct the tuple if
+        # needed.
+        if analyse_base:
+            self.base = self.base.analyse_types(env)
+
+        if self.base.type.is_error:
+            # Do not visit child tree if base is undeclared to avoid confusing
+            # error messages
+            self.type = PyrexTypes.error_type
+            return self
+
         is_slice = self.index.is_slice
-        if not env.directives['wraparound']: 
-            if is_slice: 
-                check_negative_indices(self.index.start, self.index.stop) 
-            else: 
-                check_negative_indices(self.index) 
- 
-        # Potentially overflowing index value. 
-        if not is_slice and isinstance(self.index, IntNode) and Utils.long_literal(self.index.value): 
-            self.index = self.index.coerce_to_pyobject(env) 
- 
-        is_memslice = self.base.type.is_memoryviewslice 
-        # Handle the case where base is a literal char* (and we expect a string, not an int) 
-        if not is_memslice and (isinstance(self.base, BytesNode) or is_slice): 
-            if self.base.type.is_string or not (self.base.type.is_ptr or self.base.type.is_array): 
-                self.base = self.base.coerce_to_pyobject(env) 
- 
+        if not env.directives['wraparound']:
+            if is_slice:
+                check_negative_indices(self.index.start, self.index.stop)
+            else:
+                check_negative_indices(self.index)
+
+        # Potentially overflowing index value.
+        if not is_slice and isinstance(self.index, IntNode) and Utils.long_literal(self.index.value):
+            self.index = self.index.coerce_to_pyobject(env)
+
+        is_memslice = self.base.type.is_memoryviewslice
+        # Handle the case where base is a literal char* (and we expect a string, not an int)
+        if not is_memslice and (isinstance(self.base, BytesNode) or is_slice):
+            if self.base.type.is_string or not (self.base.type.is_ptr or self.base.type.is_array):
+                self.base = self.base.coerce_to_pyobject(env)
+
         replacement_node = self.analyse_as_buffer_operation(env, getting)
         if replacement_node is not None:
             return replacement_node
- 
-        self.nogil = env.nogil 
+
+        self.nogil = env.nogil
         base_type = self.base.type
- 
+
         if not base_type.is_cfunction:
             self.index = self.index.analyse_types(env)
             self.original_index_type = self.index.type
- 
-            if base_type.is_unicode_char: 
-                # we infer Py_UNICODE/Py_UCS4 for unicode strings in some 
-                # cases, but indexing must still work for them 
-                if setting: 
-                    warning(self.pos, "cannot assign to Unicode string index", level=1) 
-                elif self.index.constant_result in (0, -1): 
-                    # uchar[0] => uchar 
-                    return self.base 
-                self.base = self.base.coerce_to_pyobject(env) 
-                base_type = self.base.type 
+
+            if base_type.is_unicode_char:
+                # we infer Py_UNICODE/Py_UCS4 for unicode strings in some
+                # cases, but indexing must still work for them
+                if setting:
+                    warning(self.pos, "cannot assign to Unicode string index", level=1)
+                elif self.index.constant_result in (0, -1):
+                    # uchar[0] => uchar
+                    return self.base
+                self.base = self.base.coerce_to_pyobject(env)
+                base_type = self.base.type
 
         if base_type.is_pyobject:
             return self.analyse_as_pyobject(env, is_slice, getting, setting)
@@ -3673,14 +3673,14 @@ class IndexNode(_IndexingBaseNode):
                              self.index.has_constant_result() and self.index.constant_result >= 0))
                     and not env.directives['boundscheck']):
                 self.is_temp = 0
-            else: 
+            else:
                 self.is_temp = 1
             self.index = self.index.coerce_to(PyrexTypes.c_py_ssize_t_type, env).coerce_to_simple(env)
             self.original_index_type.create_to_py_utility_code(env)
         else:
             self.index = self.index.coerce_to_pyobject(env)
             self.is_temp = 1
- 
+
         if self.index.type.is_int and base_type is unicode_type:
             # Py_UNICODE/Py_UCS4 will automatically coerce to a unicode string
             # if required, so this is fast and safe
@@ -3705,9 +3705,9 @@ class IndexNode(_IndexingBaseNode):
                 # do the None check explicitly (not in a helper) to allow optimising it away
                 self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
 
-        self.wrap_in_nonecheck_node(env, getting) 
-        return self 
- 
+        self.wrap_in_nonecheck_node(env, getting)
+        return self
+
     def analyse_as_c_array(self, env, is_slice):
         base_type = self.base.type
         self.type = base_type.base_type
@@ -3718,7 +3718,7 @@ class IndexNode(_IndexingBaseNode):
         elif not self.index.type.is_int:
             error(self.pos, "Invalid index type '%s'" % self.index.type)
         return self
- 
+
     def analyse_as_cpp(self, env, setting):
         base_type = self.base.type
         function = env.lookup_operator("[]", [self.base, self.index])
@@ -3760,7 +3760,7 @@ class IndexNode(_IndexingBaseNode):
                 error(self.pos, "Wrong number of template arguments: expected %s, got %s" % (
                         (len(base_type.templates), len(self.type_indices))))
                 self.type = error_type
-            else: 
+            else:
                 self.type = base_type.specialize(dict(zip(base_type.templates, self.type_indices)))
         # FIXME: use a dedicated Node class instead of generic IndexNode
         return self
@@ -3779,10 +3779,10 @@ class IndexNode(_IndexingBaseNode):
                       (index, base_type))
                 self.type = PyrexTypes.error_type
             return self
-        else: 
+        else:
             self.base = self.base.coerce_to_pyobject(env)
             return self.analyse_base_and_index_types(env, getting=getting, setting=setting, analyse_base=False)
- 
+
     def analyse_as_buffer_operation(self, env, getting):
         """
         Analyse buffer indexing and memoryview indexing/slicing
@@ -3791,7 +3791,7 @@ class IndexNode(_IndexingBaseNode):
             indices = self.index.args
         else:
             indices = [self.index]
- 
+
         base = self.base
         base_type = base.type
         replacement_node = None
@@ -3841,157 +3841,157 @@ class IndexNode(_IndexingBaseNode):
             return
         self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
 
-    def parse_index_as_types(self, env, required=True): 
-        if isinstance(self.index, TupleNode): 
-            indices = self.index.args 
-        else: 
-            indices = [self.index] 
-        type_indices = [] 
-        for index in indices: 
-            type_indices.append(index.analyse_as_type(env)) 
-            if type_indices[-1] is None: 
-                if required: 
-                    error(index.pos, "not parsable as a type") 
-                return None 
-        return type_indices 
- 
-    def parse_indexed_fused_cdef(self, env): 
-        """ 
-        Interpret fused_cdef_func[specific_type1, ...] 
- 
-        Note that if this method is called, we are an indexed cdef function 
-        with fused argument types, and this IndexNode will be replaced by the 
-        NameNode with specific entry just after analysis of expressions by 
-        AnalyseExpressionsTransform. 
-        """ 
-        self.type = PyrexTypes.error_type 
- 
-        self.is_fused_index = True 
- 
-        base_type = self.base.type 
-        positions = [] 
- 
-        if self.index.is_name or self.index.is_attribute: 
-            positions.append(self.index.pos) 
-        elif isinstance(self.index, TupleNode): 
-            for arg in self.index.args: 
-                positions.append(arg.pos) 
-        specific_types = self.parse_index_as_types(env, required=False) 
- 
-        if specific_types is None: 
-            self.index = self.index.analyse_types(env) 
- 
-            if not self.base.entry.as_variable: 
-                error(self.pos, "Can only index fused functions with types") 
-            else: 
-                # A cpdef function indexed with Python objects 
-                self.base.entry = self.entry = self.base.entry.as_variable 
-                self.base.type = self.type = self.entry.type 
- 
-                self.base.is_temp = True 
-                self.is_temp = True 
- 
-                self.entry.used = True 
- 
-            self.is_fused_index = False 
-            return 
- 
-        for i, type in enumerate(specific_types): 
-            specific_types[i] = type.specialize_fused(env) 
- 
-        fused_types = base_type.get_fused_types() 
-        if len(specific_types) > len(fused_types): 
-            return error(self.pos, "Too many types specified") 
-        elif len(specific_types) < len(fused_types): 
-            t = fused_types[len(specific_types)] 
-            return error(self.pos, "Not enough types specified to specialize " 
-                                   "the function, %s is still fused" % t) 
- 
-        # See if our index types form valid specializations 
-        for pos, specific_type, fused_type in zip(positions, 
-                                                  specific_types, 
-                                                  fused_types): 
-            if not any([specific_type.same_as(t) for t in fused_type.types]): 
-                return error(pos, "Type not in fused type") 
- 
-            if specific_type is None or specific_type.is_error: 
-                return 
- 
-        fused_to_specific = dict(zip(fused_types, specific_types)) 
-        type = base_type.specialize(fused_to_specific) 
- 
-        if type.is_fused: 
-            # Only partially specific, this is invalid 
-            error(self.pos, 
-                  "Index operation makes function only partially specific") 
-        else: 
-            # Fully specific, find the signature with the specialized entry 
-            for signature in self.base.type.get_all_specialized_function_types(): 
-                if type.same_as(signature): 
-                    self.type = signature 
- 
-                    if self.base.is_attribute: 
-                        # Pretend to be a normal attribute, for cdef extension 
-                        # methods 
-                        self.entry = signature.entry 
-                        self.is_attribute = True 
-                        self.obj = self.base.obj 
- 
-                    self.type.entry.used = True 
-                    self.base.type = signature 
-                    self.base.entry = signature.entry 
- 
-                    break 
-            else: 
-                # This is a bug 
-                raise InternalError("Couldn't find the right signature") 
- 
-    gil_message = "Indexing Python object" 
- 
-    def calculate_result_code(self): 
+    def parse_index_as_types(self, env, required=True):
+        if isinstance(self.index, TupleNode):
+            indices = self.index.args
+        else:
+            indices = [self.index]
+        type_indices = []
+        for index in indices:
+            type_indices.append(index.analyse_as_type(env))
+            if type_indices[-1] is None:
+                if required:
+                    error(index.pos, "not parsable as a type")
+                return None
+        return type_indices
+
+    def parse_indexed_fused_cdef(self, env):
+        """
+        Interpret fused_cdef_func[specific_type1, ...]
+
+        Note that if this method is called, we are an indexed cdef function
+        with fused argument types, and this IndexNode will be replaced by the
+        NameNode with specific entry just after analysis of expressions by
+        AnalyseExpressionsTransform.
+        """
+        self.type = PyrexTypes.error_type
+
+        self.is_fused_index = True
+
+        base_type = self.base.type
+        positions = []
+
+        if self.index.is_name or self.index.is_attribute:
+            positions.append(self.index.pos)
+        elif isinstance(self.index, TupleNode):
+            for arg in self.index.args:
+                positions.append(arg.pos)
+        specific_types = self.parse_index_as_types(env, required=False)
+
+        if specific_types is None:
+            self.index = self.index.analyse_types(env)
+
+            if not self.base.entry.as_variable:
+                error(self.pos, "Can only index fused functions with types")
+            else:
+                # A cpdef function indexed with Python objects
+                self.base.entry = self.entry = self.base.entry.as_variable
+                self.base.type = self.type = self.entry.type
+
+                self.base.is_temp = True
+                self.is_temp = True
+
+                self.entry.used = True
+
+            self.is_fused_index = False
+            return
+
+        for i, type in enumerate(specific_types):
+            specific_types[i] = type.specialize_fused(env)
+
+        fused_types = base_type.get_fused_types()
+        if len(specific_types) > len(fused_types):
+            return error(self.pos, "Too many types specified")
+        elif len(specific_types) < len(fused_types):
+            t = fused_types[len(specific_types)]
+            return error(self.pos, "Not enough types specified to specialize "
+                                   "the function, %s is still fused" % t)
+
+        # See if our index types form valid specializations
+        for pos, specific_type, fused_type in zip(positions,
+                                                  specific_types,
+                                                  fused_types):
+            if not any([specific_type.same_as(t) for t in fused_type.types]):
+                return error(pos, "Type not in fused type")
+
+            if specific_type is None or specific_type.is_error:
+                return
+
+        fused_to_specific = dict(zip(fused_types, specific_types))
+        type = base_type.specialize(fused_to_specific)
+
+        if type.is_fused:
+            # Only partially specific, this is invalid
+            error(self.pos,
+                  "Index operation makes function only partially specific")
+        else:
+            # Fully specific, find the signature with the specialized entry
+            for signature in self.base.type.get_all_specialized_function_types():
+                if type.same_as(signature):
+                    self.type = signature
+
+                    if self.base.is_attribute:
+                        # Pretend to be a normal attribute, for cdef extension
+                        # methods
+                        self.entry = signature.entry
+                        self.is_attribute = True
+                        self.obj = self.base.obj
+
+                    self.type.entry.used = True
+                    self.base.type = signature
+                    self.base.entry = signature.entry
+
+                    break
+            else:
+                # This is a bug
+                raise InternalError("Couldn't find the right signature")
+
+    gil_message = "Indexing Python object"
+
+    def calculate_result_code(self):
         if self.base.type in (list_type, tuple_type, bytearray_type):
-            if self.base.type is list_type: 
-                index_code = "PyList_GET_ITEM(%s, %s)" 
-            elif self.base.type is tuple_type: 
-                index_code = "PyTuple_GET_ITEM(%s, %s)" 
-            elif self.base.type is bytearray_type: 
-                index_code = "((unsigned char)(PyByteArray_AS_STRING(%s)[%s]))" 
-            else: 
-                assert False, "unexpected base type in indexing: %s" % self.base.type 
-        elif self.base.type.is_cfunction: 
-            return "%s<%s>" % ( 
-                self.base.result(), 
+            if self.base.type is list_type:
+                index_code = "PyList_GET_ITEM(%s, %s)"
+            elif self.base.type is tuple_type:
+                index_code = "PyTuple_GET_ITEM(%s, %s)"
+            elif self.base.type is bytearray_type:
+                index_code = "((unsigned char)(PyByteArray_AS_STRING(%s)[%s]))"
+            else:
+                assert False, "unexpected base type in indexing: %s" % self.base.type
+        elif self.base.type.is_cfunction:
+            return "%s<%s>" % (
+                self.base.result(),
                 ",".join([param.empty_declaration_code() for param in self.type_indices]))
         elif self.base.type.is_ctuple:
             index = self.index.constant_result
             if index < 0:
                 index += self.base.type.size
             return "%s.f%s" % (self.base.result(), index)
-        else: 
-            if (self.type.is_ptr or self.type.is_array) and self.type == self.base.type: 
-                error(self.pos, "Invalid use of pointer slice") 
-                return 
-            index_code = "(%s[%s])" 
-        return index_code % (self.base.result(), self.index.result()) 
- 
-    def extra_index_params(self, code): 
-        if self.index.type.is_int: 
-            is_list = self.base.type is list_type 
-            wraparound = ( 
-                bool(code.globalstate.directives['wraparound']) and 
-                self.original_index_type.signed and 
+        else:
+            if (self.type.is_ptr or self.type.is_array) and self.type == self.base.type:
+                error(self.pos, "Invalid use of pointer slice")
+                return
+            index_code = "(%s[%s])"
+        return index_code % (self.base.result(), self.index.result())
+
+    def extra_index_params(self, code):
+        if self.index.type.is_int:
+            is_list = self.base.type is list_type
+            wraparound = (
+                bool(code.globalstate.directives['wraparound']) and
+                self.original_index_type.signed and
                 not (isinstance(self.index.constant_result, _py_int_types)
-                     and self.index.constant_result >= 0)) 
-            boundscheck = bool(code.globalstate.directives['boundscheck']) 
-            return ", %s, %d, %s, %d, %d, %d" % ( 
+                     and self.index.constant_result >= 0))
+            boundscheck = bool(code.globalstate.directives['boundscheck'])
+            return ", %s, %d, %s, %d, %d, %d" % (
                 self.original_index_type.empty_declaration_code(),
-                self.original_index_type.signed and 1 or 0, 
-                self.original_index_type.to_py_function, 
-                is_list, wraparound, boundscheck) 
-        else: 
-            return "" 
- 
-    def generate_result_code(self, code): 
+                self.original_index_type.signed and 1 or 0,
+                self.original_index_type.to_py_function,
+                is_list, wraparound, boundscheck)
+        else:
+            return ""
+
+    def generate_result_code(self, code):
         if not self.is_temp:
             # all handled in self.calculate_result_code()
             return
@@ -4004,10 +4004,10 @@ class IndexNode(_IndexingBaseNode):
                     function = "__Pyx_GetItemInt_List"
                 elif self.base.type is tuple_type:
                     function = "__Pyx_GetItemInt_Tuple"
-                else: 
+                else:
                     function = "__Pyx_GetItemInt"
                 utility_code = TempitaUtilityCode.load_cached("GetItemInt", "ObjectHandling.c")
-            else: 
+            else:
                 if self.base.type is dict_type:
                     function = "__Pyx_PyDict_GetItem"
                     utility_code = UtilityCode.load_cached("DictGetItem", "ObjectHandling.c")
@@ -4034,7 +4034,7 @@ class IndexNode(_IndexingBaseNode):
         elif not (self.base.type.is_cpp_class and self.exception_check):
             assert False, "unexpected type %s and base type %s for indexing" % (
                 self.type, self.base.type)
- 
+
         if utility_code is not None:
             code.globalstate.use_utility_code(utility_code)
 
@@ -4042,7 +4042,7 @@ class IndexNode(_IndexingBaseNode):
             index_code = self.index.result()
         else:
             index_code = self.index.py_result()
- 
+
         if self.base.type.is_cpp_class and self.exception_check:
             translate_cpp_exception(code, self.pos,
                 "%s = %s[%s];" % (self.result(), self.base.result(),
@@ -4051,59 +4051,59 @@ class IndexNode(_IndexingBaseNode):
                 self.exception_value, self.in_nogil_context)
         else:
             error_check = '!%s' if error_value == 'NULL' else '%%s == %s' % error_value
-            code.putln( 
+            code.putln(
                 "%s = %s(%s, %s%s); %s" % (
-                    self.result(), 
-                    function, 
-                    self.base.py_result(), 
-                    index_code, 
-                    self.extra_index_params(code), 
+                    self.result(),
+                    function,
+                    self.base.py_result(),
+                    index_code,
+                    self.extra_index_params(code),
                     code.error_goto_if(error_check % self.result(), self.pos)))
         if self.type.is_pyobject:
             code.put_gotref(self.py_result())
- 
-    def generate_setitem_code(self, value_code, code): 
-        if self.index.type.is_int: 
-            if self.base.type is bytearray_type: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("SetItemIntByteArray", "StringTools.c")) 
-                function = "__Pyx_SetItemInt_ByteArray" 
-            else: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("SetItemInt", "ObjectHandling.c")) 
-                function = "__Pyx_SetItemInt" 
-            index_code = self.index.result() 
-        else: 
-            index_code = self.index.py_result() 
-            if self.base.type is dict_type: 
-                function = "PyDict_SetItem" 
-            # It would seem that we could specialized lists/tuples, but that 
-            # shouldn't happen here. 
-            # Both PyList_SetItem() and PyTuple_SetItem() take a Py_ssize_t as 
-            # index instead of an object, and bad conversion here would give 
-            # the wrong exception. Also, tuples are supposed to be immutable, 
-            # and raise a TypeError when trying to set their entries 
-            # (PyTuple_SetItem() is for creating new tuples from scratch). 
-            else: 
-                function = "PyObject_SetItem" 
+
+    def generate_setitem_code(self, value_code, code):
+        if self.index.type.is_int:
+            if self.base.type is bytearray_type:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("SetItemIntByteArray", "StringTools.c"))
+                function = "__Pyx_SetItemInt_ByteArray"
+            else:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("SetItemInt", "ObjectHandling.c"))
+                function = "__Pyx_SetItemInt"
+            index_code = self.index.result()
+        else:
+            index_code = self.index.py_result()
+            if self.base.type is dict_type:
+                function = "PyDict_SetItem"
+            # It would seem that we could specialized lists/tuples, but that
+            # shouldn't happen here.
+            # Both PyList_SetItem() and PyTuple_SetItem() take a Py_ssize_t as
+            # index instead of an object, and bad conversion here would give
+            # the wrong exception. Also, tuples are supposed to be immutable,
+            # and raise a TypeError when trying to set their entries
+            # (PyTuple_SetItem() is for creating new tuples from scratch).
+            else:
+                function = "PyObject_SetItem"
         code.putln(code.error_goto_if_neg(
             "%s(%s, %s, %s%s)" % (
-                function, 
-                self.base.py_result(), 
-                index_code, 
-                value_code, 
+                function,
+                self.base.py_result(),
+                index_code,
+                value_code,
                 self.extra_index_params(code)),
             self.pos))
- 
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
         exception_check=None, exception_value=None):
         self.generate_subexpr_evaluation_code(code)
- 
+
         if self.type.is_pyobject:
-            self.generate_setitem_code(rhs.py_result(), code) 
-        elif self.base.type is bytearray_type: 
-            value_code = self._check_byte_value(code, rhs) 
-            self.generate_setitem_code(value_code, code) 
+            self.generate_setitem_code(rhs.py_result(), code)
+        elif self.base.type is bytearray_type:
+            value_code = self._check_byte_value(code, rhs)
+            self.generate_setitem_code(value_code, code)
         elif self.base.type.is_cpp_class and self.exception_check and self.exception_check == '+':
             if overloaded_assignment and exception_check and \
                 self.exception_value != exception_value:
@@ -4120,75 +4120,75 @@ class IndexNode(_IndexingBaseNode):
                     "%s = %s;" % (self.result(), rhs.result()),
                     self.result() if self.type.is_pyobject else None,
                     self.exception_value, self.in_nogil_context)
-        else: 
-            code.putln( 
+        else:
+            code.putln(
                 "%s = %s;" % (self.result(), rhs.result()))
- 
+
         self.generate_subexpr_disposal_code(code)
         self.free_subexpr_temps(code)
-        rhs.generate_disposal_code(code) 
-        rhs.free_temps(code) 
- 
-    def _check_byte_value(self, code, rhs): 
-        # TODO: should we do this generally on downcasts, or just here? 
-        assert rhs.type.is_int, repr(rhs.type) 
-        value_code = rhs.result() 
-        if rhs.has_constant_result(): 
-            if 0 <= rhs.constant_result < 256: 
-                return value_code 
-            needs_cast = True  # make at least the C compiler happy 
-            warning(rhs.pos, 
-                    "value outside of range(0, 256)" 
-                    " when assigning to byte: %s" % rhs.constant_result, 
-                    level=1) 
-        else: 
-            needs_cast = rhs.type != PyrexTypes.c_uchar_type 
- 
-        if not self.nogil: 
-            conditions = [] 
-            if rhs.is_literal or rhs.type.signed: 
-                conditions.append('%s < 0' % value_code) 
-            if (rhs.is_literal or not 
-                    (rhs.is_temp and rhs.type in ( 
-                        PyrexTypes.c_uchar_type, PyrexTypes.c_char_type, 
-                        PyrexTypes.c_schar_type))): 
-                conditions.append('%s > 255' % value_code) 
-            if conditions: 
-                code.putln("if (unlikely(%s)) {" % ' || '.join(conditions)) 
-                code.putln( 
-                    'PyErr_SetString(PyExc_ValueError,' 
-                    ' "byte must be in range(0, 256)"); %s' % 
-                    code.error_goto(self.pos)) 
-                code.putln("}") 
- 
-        if needs_cast: 
-            value_code = '((unsigned char)%s)' % value_code 
-        return value_code 
- 
-    def generate_deletion_code(self, code, ignore_nonexisting=False): 
-        self.generate_subexpr_evaluation_code(code) 
-        #if self.type.is_pyobject: 
-        if self.index.type.is_int: 
-            function = "__Pyx_DelItemInt" 
-            index_code = self.index.result() 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("DelItemInt", "ObjectHandling.c")) 
-        else: 
-            index_code = self.index.py_result() 
-            if self.base.type is dict_type: 
-                function = "PyDict_DelItem" 
-            else: 
-                function = "PyObject_DelItem" 
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+    def _check_byte_value(self, code, rhs):
+        # TODO: should we do this generally on downcasts, or just here?
+        assert rhs.type.is_int, repr(rhs.type)
+        value_code = rhs.result()
+        if rhs.has_constant_result():
+            if 0 <= rhs.constant_result < 256:
+                return value_code
+            needs_cast = True  # make at least the C compiler happy
+            warning(rhs.pos,
+                    "value outside of range(0, 256)"
+                    " when assigning to byte: %s" % rhs.constant_result,
+                    level=1)
+        else:
+            needs_cast = rhs.type != PyrexTypes.c_uchar_type
+
+        if not self.nogil:
+            conditions = []
+            if rhs.is_literal or rhs.type.signed:
+                conditions.append('%s < 0' % value_code)
+            if (rhs.is_literal or not
+                    (rhs.is_temp and rhs.type in (
+                        PyrexTypes.c_uchar_type, PyrexTypes.c_char_type,
+                        PyrexTypes.c_schar_type))):
+                conditions.append('%s > 255' % value_code)
+            if conditions:
+                code.putln("if (unlikely(%s)) {" % ' || '.join(conditions))
+                code.putln(
+                    'PyErr_SetString(PyExc_ValueError,'
+                    ' "byte must be in range(0, 256)"); %s' %
+                    code.error_goto(self.pos))
+                code.putln("}")
+
+        if needs_cast:
+            value_code = '((unsigned char)%s)' % value_code
+        return value_code
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        self.generate_subexpr_evaluation_code(code)
+        #if self.type.is_pyobject:
+        if self.index.type.is_int:
+            function = "__Pyx_DelItemInt"
+            index_code = self.index.result()
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("DelItemInt", "ObjectHandling.c"))
+        else:
+            index_code = self.index.py_result()
+            if self.base.type is dict_type:
+                function = "PyDict_DelItem"
+            else:
+                function = "PyObject_DelItem"
         code.putln(code.error_goto_if_neg(
             "%s(%s, %s%s)" % (
-                function, 
-                self.base.py_result(), 
-                index_code, 
+                function,
+                self.base.py_result(),
+                index_code,
                 self.extra_index_params(code)),
             self.pos))
-        self.generate_subexpr_disposal_code(code) 
-        self.free_subexpr_temps(code) 
- 
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+
 
 class BufferIndexNode(_IndexingBaseNode):
     """
@@ -4269,12 +4269,12 @@ class BufferIndexNode(_IndexingBaseNode):
     def calculate_result_code(self):
         return "(*%s)" % self.buffer_ptr_code
 
-    def buffer_entry(self): 
-        base = self.base 
-        if self.base.is_nonecheck: 
-            base = base.arg 
+    def buffer_entry(self):
+        base = self.base
+        if self.base.is_nonecheck:
+            base = base.arg
         return base.type.get_entry(base)
- 
+
     def get_index_in_temp(self, code, ivar):
         ret = code.funcstate.allocate_temp(
             PyrexTypes.widest_numeric_type(
@@ -4284,7 +4284,7 @@ class BufferIndexNode(_IndexingBaseNode):
         code.putln("%s = %s;" % (ret, ivar.result()))
         return ret
 
-    def buffer_lookup_code(self, code): 
+    def buffer_lookup_code(self, code):
         """
         ndarray[1, 2, 3] and memslice[1, 2, 3]
         """
@@ -4295,16 +4295,16 @@ class BufferIndexNode(_IndexingBaseNode):
 
         # Assign indices to temps of at least (s)size_t to allow further index calculations.
         self.index_temps = index_temps = [self.get_index_in_temp(code,ivar) for ivar in self.indices]
- 
-        # Generate buffer access code using these temps 
-        from . import Buffer 
-        buffer_entry = self.buffer_entry() 
-        if buffer_entry.type.is_buffer: 
-            negative_indices = buffer_entry.type.negative_indices 
-        else: 
-            negative_indices = Buffer.buffer_defaults['negative_indices'] 
- 
-        return buffer_entry, Buffer.put_buffer_lookup_code( 
+
+        # Generate buffer access code using these temps
+        from . import Buffer
+        buffer_entry = self.buffer_entry()
+        if buffer_entry.type.is_buffer:
+            negative_indices = buffer_entry.type.negative_indices
+        else:
+            negative_indices = Buffer.buffer_defaults['negative_indices']
+
+        return buffer_entry, Buffer.put_buffer_lookup_code(
             entry=buffer_entry,
             index_signeds=[ivar.type.signed for ivar in self.indices],
             index_cnames=index_temps,
@@ -4312,7 +4312,7 @@ class BufferIndexNode(_IndexingBaseNode):
             pos=self.pos, code=code,
             negative_indices=negative_indices,
             in_nogil_context=self.in_nogil_context)
- 
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
         self.generate_subexpr_evaluation_code(code)
         self.generate_buffer_setitem_code(rhs, code)
@@ -4599,37 +4599,37 @@ class MemoryViewSliceNode(MemoryViewIndexNode):
     def generate_result_code(self, code):
         if self.is_ellipsis_noop:
             return  ### FIXME: remove
-        buffer_entry = self.buffer_entry() 
-        have_gil = not self.in_nogil_context 
- 
+        buffer_entry = self.buffer_entry()
+        have_gil = not self.in_nogil_context
+
         # TODO Mark: this is insane, do it better
-        have_slices = False 
-        it = iter(self.indices) 
-        for index in self.original_indices: 
+        have_slices = False
+        it = iter(self.indices)
+        for index in self.original_indices:
             if index.is_slice:
                 have_slices = True
-                if not index.start.is_none: 
-                    index.start = next(it) 
-                if not index.stop.is_none: 
-                    index.stop = next(it) 
-                if not index.step.is_none: 
-                    index.step = next(it) 
-            else: 
-                next(it) 
- 
-        assert not list(it) 
- 
+                if not index.start.is_none:
+                    index.start = next(it)
+                if not index.stop.is_none:
+                    index.stop = next(it)
+                if not index.step.is_none:
+                    index.step = next(it)
+            else:
+                next(it)
+
+        assert not list(it)
+
         buffer_entry.generate_buffer_slice_code(
             code, self.original_indices, self.result(),
             have_gil=have_gil, have_slices=have_slices,
             directives=code.globalstate.directives)
- 
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
         if self.is_ellipsis_noop:
             self.generate_subexpr_evaluation_code(code)
         else:
             self.generate_evaluation_code(code)
- 
+
         if self.is_memview_scalar_assignment:
             self.generate_memoryviewslice_assign_scalar_code(rhs, code)
         else:
@@ -4707,10 +4707,10 @@ class MemoryCopyScalar(MemoryCopyNode):
         self.type = dst.type.dtype
 
     def _generate_assignment_code(self, scalar, code):
-        from . import MemoryView 
- 
+        from . import MemoryView
+
         self.dst.type.assert_direct_dims(self.dst.pos)
- 
+
         dtype = self.dst.type.dtype
         type_decl = dtype.declaration_code("")
         slice_decl = self.dst.type.declaration_code("")
@@ -4739,31 +4739,31 @@ class MemoryCopyScalar(MemoryCopyNode):
         code.end_block()
 
 
-class SliceIndexNode(ExprNode): 
-    #  2-element slice indexing 
-    # 
-    #  base      ExprNode 
-    #  start     ExprNode or None 
-    #  stop      ExprNode or None 
-    #  slice     ExprNode or None   constant slice object 
- 
-    subexprs = ['base', 'start', 'stop', 'slice'] 
- 
-    slice = None 
- 
-    def infer_type(self, env): 
-        base_type = self.base.infer_type(env) 
-        if base_type.is_string or base_type.is_cpp_class: 
-            return bytes_type 
-        elif base_type.is_pyunicode_ptr: 
-            return unicode_type 
+class SliceIndexNode(ExprNode):
+    #  2-element slice indexing
+    #
+    #  base      ExprNode
+    #  start     ExprNode or None
+    #  stop      ExprNode or None
+    #  slice     ExprNode or None   constant slice object
+
+    subexprs = ['base', 'start', 'stop', 'slice']
+
+    slice = None
+
+    def infer_type(self, env):
+        base_type = self.base.infer_type(env)
+        if base_type.is_string or base_type.is_cpp_class:
+            return bytes_type
+        elif base_type.is_pyunicode_ptr:
+            return unicode_type
         elif base_type in (bytes_type, bytearray_type, str_type, unicode_type,
-                           basestring_type, list_type, tuple_type): 
-            return base_type 
-        elif base_type.is_ptr or base_type.is_array: 
-            return PyrexTypes.c_array_type(base_type.base_type, None) 
-        return py_object_type 
- 
+                           basestring_type, list_type, tuple_type):
+            return base_type
+        elif base_type.is_ptr or base_type.is_array:
+            return PyrexTypes.c_array_type(base_type.base_type, None)
+        return py_object_type
+
     def inferable_item_node(self, index=0):
         # slicing shouldn't change the result type of the base, but the index might
         if index is not not_a_constant and self.start:
@@ -4773,75 +4773,75 @@ class SliceIndexNode(ExprNode):
                 index = not_a_constant
         return self.base.inferable_item_node(index)
 
-    def may_be_none(self): 
-        base_type = self.base.type 
-        if base_type: 
-            if base_type.is_string: 
-                return False 
-            if base_type in (bytes_type, str_type, unicode_type, 
-                             basestring_type, list_type, tuple_type): 
-                return False 
-        return ExprNode.may_be_none(self) 
- 
-    def calculate_constant_result(self): 
-        if self.start is None: 
-            start = None 
-        else: 
-            start = self.start.constant_result 
-        if self.stop is None: 
-            stop = None 
-        else: 
-            stop = self.stop.constant_result 
-        self.constant_result = self.base.constant_result[start:stop] 
- 
-    def compile_time_value(self, denv): 
-        base = self.base.compile_time_value(denv) 
-        if self.start is None: 
-            start = 0 
-        else: 
-            start = self.start.compile_time_value(denv) 
-        if self.stop is None: 
-            stop = None 
-        else: 
-            stop = self.stop.compile_time_value(denv) 
-        try: 
-            return base[start:stop] 
+    def may_be_none(self):
+        base_type = self.base.type
+        if base_type:
+            if base_type.is_string:
+                return False
+            if base_type in (bytes_type, str_type, unicode_type,
+                             basestring_type, list_type, tuple_type):
+                return False
+        return ExprNode.may_be_none(self)
+
+    def calculate_constant_result(self):
+        if self.start is None:
+            start = None
+        else:
+            start = self.start.constant_result
+        if self.stop is None:
+            stop = None
+        else:
+            stop = self.stop.constant_result
+        self.constant_result = self.base.constant_result[start:stop]
+
+    def compile_time_value(self, denv):
+        base = self.base.compile_time_value(denv)
+        if self.start is None:
+            start = 0
+        else:
+            start = self.start.compile_time_value(denv)
+        if self.stop is None:
+            stop = None
+        else:
+            stop = self.stop.compile_time_value(denv)
+        try:
+            return base[start:stop]
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def analyse_target_declaration(self, env): 
-        pass 
- 
-    def analyse_target_types(self, env): 
-        node = self.analyse_types(env, getting=False) 
-        # when assigning, we must accept any Python type 
-        if node.type.is_pyobject: 
-            node.type = py_object_type 
-        return node 
- 
-    def analyse_types(self, env, getting=True): 
-        self.base = self.base.analyse_types(env) 
- 
+            self.compile_time_value_error(e)
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def analyse_target_types(self, env):
+        node = self.analyse_types(env, getting=False)
+        # when assigning, we must accept any Python type
+        if node.type.is_pyobject:
+            node.type = py_object_type
+        return node
+
+    def analyse_types(self, env, getting=True):
+        self.base = self.base.analyse_types(env)
+
         if self.base.type.is_buffer or self.base.type.is_pythran_expr or self.base.type.is_memoryviewslice:
-            none_node = NoneNode(self.pos) 
-            index = SliceNode(self.pos, 
-                              start=self.start or none_node, 
-                              stop=self.stop or none_node, 
-                              step=none_node) 
+            none_node = NoneNode(self.pos)
+            index = SliceNode(self.pos,
+                              start=self.start or none_node,
+                              stop=self.stop or none_node,
+                              step=none_node)
             index_node = IndexNode(self.pos, index=index, base=self.base)
-            return index_node.analyse_base_and_index_types( 
-                env, getting=getting, setting=not getting, 
-                analyse_base=False) 
- 
-        if self.start: 
-            self.start = self.start.analyse_types(env) 
-        if self.stop: 
-            self.stop = self.stop.analyse_types(env) 
- 
-        if not env.directives['wraparound']: 
-            check_negative_indices(self.start, self.stop) 
- 
-        base_type = self.base.type 
+            return index_node.analyse_base_and_index_types(
+                env, getting=getting, setting=not getting,
+                analyse_base=False)
+
+        if self.start:
+            self.start = self.start.analyse_types(env)
+        if self.stop:
+            self.stop = self.stop.analyse_types(env)
+
+        if not env.directives['wraparound']:
+            check_negative_indices(self.start, self.stop)
+
+        base_type = self.base.type
         if base_type.is_array and not getting:
             # cannot assign directly to C array => try to assign by making a copy
             if not self.start and not self.stop:
@@ -4849,37 +4849,37 @@ class SliceIndexNode(ExprNode):
             else:
                 self.type = PyrexTypes.CPtrType(base_type.base_type)
         elif base_type.is_string or base_type.is_cpp_string:
-            self.type = default_str_type(env) 
-        elif base_type.is_pyunicode_ptr: 
-            self.type = unicode_type 
-        elif base_type.is_ptr: 
-            self.type = base_type 
-        elif base_type.is_array: 
-            # we need a ptr type here instead of an array type, as 
-            # array types can result in invalid type casts in the C 
-            # code 
-            self.type = PyrexTypes.CPtrType(base_type.base_type) 
-        else: 
-            self.base = self.base.coerce_to_pyobject(env) 
-            self.type = py_object_type 
-        if base_type.is_builtin_type: 
-            # slicing builtin types returns something of the same type 
-            self.type = base_type 
-            self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable") 
- 
-        if self.type is py_object_type: 
-            if (not self.start or self.start.is_literal) and \ 
-                    (not self.stop or self.stop.is_literal): 
-                # cache the constant slice object, in case we need it 
-                none_node = NoneNode(self.pos) 
-                self.slice = SliceNode( 
-                    self.pos, 
-                    start=copy.deepcopy(self.start or none_node), 
-                    stop=copy.deepcopy(self.stop or none_node), 
-                    step=none_node 
-                ).analyse_types(env) 
-        else: 
-            c_int = PyrexTypes.c_py_ssize_t_type 
+            self.type = default_str_type(env)
+        elif base_type.is_pyunicode_ptr:
+            self.type = unicode_type
+        elif base_type.is_ptr:
+            self.type = base_type
+        elif base_type.is_array:
+            # we need a ptr type here instead of an array type, as
+            # array types can result in invalid type casts in the C
+            # code
+            self.type = PyrexTypes.CPtrType(base_type.base_type)
+        else:
+            self.base = self.base.coerce_to_pyobject(env)
+            self.type = py_object_type
+        if base_type.is_builtin_type:
+            # slicing builtin types returns something of the same type
+            self.type = base_type
+            self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
+
+        if self.type is py_object_type:
+            if (not self.start or self.start.is_literal) and \
+                    (not self.stop or self.stop.is_literal):
+                # cache the constant slice object, in case we need it
+                none_node = NoneNode(self.pos)
+                self.slice = SliceNode(
+                    self.pos,
+                    start=copy.deepcopy(self.start or none_node),
+                    stop=copy.deepcopy(self.stop or none_node),
+                    step=none_node
+                ).analyse_types(env)
+        else:
+            c_int = PyrexTypes.c_py_ssize_t_type
 
             def allow_none(node, default_value, env):
                 # Coerce to Py_ssize_t, but allow None as meaning the default slice bound.
@@ -4904,17 +4904,17 @@ class SliceIndexNode(ExprNode):
                 ).analyse_result_type(env)
                 return EvalWithTempExprNode(node_ref, new_expr)
 
-            if self.start: 
+            if self.start:
                 if self.start.type.is_pyobject:
                     self.start = allow_none(self.start, '0', env)
-                self.start = self.start.coerce_to(c_int, env) 
-            if self.stop: 
+                self.start = self.start.coerce_to(c_int, env)
+            if self.stop:
                 if self.stop.type.is_pyobject:
                     self.stop = allow_none(self.stop, 'PY_SSIZE_T_MAX', env)
-                self.stop = self.stop.coerce_to(c_int, env) 
-        self.is_temp = 1 
-        return self 
- 
+                self.stop = self.stop.coerce_to(c_int, env)
+        self.is_temp = 1
+        return self
+
     def analyse_as_type(self, env):
         base_type = self.base.analyse_as_type(env)
         if base_type and not base_type.is_pyobject:
@@ -4933,153 +4933,153 @@ class SliceIndexNode(ExprNode):
                     base_type, MemoryView.get_axes_specs(env, [slice_node]))
         return None
 
-    nogil_check = Node.gil_error 
-    gil_message = "Slicing Python object" 
- 
-    get_slice_utility_code = TempitaUtilityCode.load( 
-        "SliceObject", "ObjectHandling.c", context={'access': 'Get'}) 
- 
-    set_slice_utility_code = TempitaUtilityCode.load( 
-        "SliceObject", "ObjectHandling.c", context={'access': 'Set'}) 
- 
-    def coerce_to(self, dst_type, env): 
-        if ((self.base.type.is_string or self.base.type.is_cpp_string) 
-                and dst_type in (bytes_type, bytearray_type, str_type, unicode_type)): 
-            if (dst_type not in (bytes_type, bytearray_type) 
-                    and not env.directives['c_string_encoding']): 
-                error(self.pos, 
-                    "default encoding required for conversion from '%s' to '%s'" % 
-                    (self.base.type, dst_type)) 
-            self.type = dst_type 
+    nogil_check = Node.gil_error
+    gil_message = "Slicing Python object"
+
+    get_slice_utility_code = TempitaUtilityCode.load(
+        "SliceObject", "ObjectHandling.c", context={'access': 'Get'})
+
+    set_slice_utility_code = TempitaUtilityCode.load(
+        "SliceObject", "ObjectHandling.c", context={'access': 'Set'})
+
+    def coerce_to(self, dst_type, env):
+        if ((self.base.type.is_string or self.base.type.is_cpp_string)
+                and dst_type in (bytes_type, bytearray_type, str_type, unicode_type)):
+            if (dst_type not in (bytes_type, bytearray_type)
+                    and not env.directives['c_string_encoding']):
+                error(self.pos,
+                    "default encoding required for conversion from '%s' to '%s'" %
+                    (self.base.type, dst_type))
+            self.type = dst_type
         if dst_type.is_array and self.base.type.is_array:
             if not self.start and not self.stop:
                 # redundant slice building, copy C arrays directly
                 return self.base.coerce_to(dst_type, env)
             # else: check array size if possible
-        return super(SliceIndexNode, self).coerce_to(dst_type, env) 
- 
-    def generate_result_code(self, code): 
-        if not self.type.is_pyobject: 
-            error(self.pos, 
-                  "Slicing is not currently supported for '%s'." % self.type) 
-            return 
- 
-        base_result = self.base.result() 
-        result = self.result() 
-        start_code = self.start_code() 
-        stop_code = self.stop_code() 
-        if self.base.type.is_string: 
-            base_result = self.base.result() 
+        return super(SliceIndexNode, self).coerce_to(dst_type, env)
+
+    def generate_result_code(self, code):
+        if not self.type.is_pyobject:
+            error(self.pos,
+                  "Slicing is not currently supported for '%s'." % self.type)
+            return
+
+        base_result = self.base.result()
+        result = self.result()
+        start_code = self.start_code()
+        stop_code = self.stop_code()
+        if self.base.type.is_string:
+            base_result = self.base.result()
             if self.base.type not in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
-                base_result = '((const char*)%s)' % base_result 
-            if self.type is bytearray_type: 
-                type_name = 'ByteArray' 
-            else: 
-                type_name = self.type.name.title() 
-            if self.stop is None: 
-                code.putln( 
-                    "%s = __Pyx_Py%s_FromString(%s + %s); %s" % ( 
-                        result, 
-                        type_name, 
-                        base_result, 
-                        start_code, 
-                        code.error_goto_if_null(result, self.pos))) 
-            else: 
-                code.putln( 
-                    "%s = __Pyx_Py%s_FromStringAndSize(%s + %s, %s - %s); %s" % ( 
-                        result, 
-                        type_name, 
-                        base_result, 
-                        start_code, 
-                        stop_code, 
-                        start_code, 
-                        code.error_goto_if_null(result, self.pos))) 
-        elif self.base.type.is_pyunicode_ptr: 
-            base_result = self.base.result() 
-            if self.base.type != PyrexTypes.c_py_unicode_ptr_type: 
-                base_result = '((const Py_UNICODE*)%s)' % base_result 
-            if self.stop is None: 
-                code.putln( 
-                    "%s = __Pyx_PyUnicode_FromUnicode(%s + %s); %s" % ( 
-                        result, 
-                        base_result, 
-                        start_code, 
-                        code.error_goto_if_null(result, self.pos))) 
-            else: 
-                code.putln( 
-                    "%s = __Pyx_PyUnicode_FromUnicodeAndLength(%s + %s, %s - %s); %s" % ( 
-                        result, 
-                        base_result, 
-                        start_code, 
-                        stop_code, 
-                        start_code, 
-                        code.error_goto_if_null(result, self.pos))) 
- 
-        elif self.base.type is unicode_type: 
-            code.globalstate.use_utility_code( 
-                          UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c")) 
-            code.putln( 
-                "%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s" % ( 
-                    result, 
-                    base_result, 
-                    start_code, 
-                    stop_code, 
-                    code.error_goto_if_null(result, self.pos))) 
-        elif self.type is py_object_type: 
-            code.globalstate.use_utility_code(self.get_slice_utility_code) 
-            (has_c_start, has_c_stop, c_start, c_stop, 
-             py_start, py_stop, py_slice) = self.get_slice_config() 
-            code.putln( 
-                "%s = __Pyx_PyObject_GetSlice(%s, %s, %s, %s, %s, %s, %d, %d, %d); %s" % ( 
-                    result, 
-                    self.base.py_result(), 
-                    c_start, c_stop, 
-                    py_start, py_stop, py_slice, 
-                    has_c_start, has_c_stop, 
-                    bool(code.globalstate.directives['wraparound']), 
-                    code.error_goto_if_null(result, self.pos))) 
-        else: 
-            if self.base.type is list_type: 
-                code.globalstate.use_utility_code( 
-                    TempitaUtilityCode.load_cached("SliceTupleAndList", "ObjectHandling.c")) 
-                cfunc = '__Pyx_PyList_GetSlice' 
-            elif self.base.type is tuple_type: 
-                code.globalstate.use_utility_code( 
-                    TempitaUtilityCode.load_cached("SliceTupleAndList", "ObjectHandling.c")) 
-                cfunc = '__Pyx_PyTuple_GetSlice' 
-            else: 
-                cfunc = 'PySequence_GetSlice' 
-            code.putln( 
-                "%s = %s(%s, %s, %s); %s" % ( 
-                    result, 
-                    cfunc, 
-                    self.base.py_result(), 
-                    start_code, 
-                    stop_code, 
-                    code.error_goto_if_null(result, self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
+                base_result = '((const char*)%s)' % base_result
+            if self.type is bytearray_type:
+                type_name = 'ByteArray'
+            else:
+                type_name = self.type.name.title()
+            if self.stop is None:
+                code.putln(
+                    "%s = __Pyx_Py%s_FromString(%s + %s); %s" % (
+                        result,
+                        type_name,
+                        base_result,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+            else:
+                code.putln(
+                    "%s = __Pyx_Py%s_FromStringAndSize(%s + %s, %s - %s); %s" % (
+                        result,
+                        type_name,
+                        base_result,
+                        start_code,
+                        stop_code,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+        elif self.base.type.is_pyunicode_ptr:
+            base_result = self.base.result()
+            if self.base.type != PyrexTypes.c_py_unicode_ptr_type:
+                base_result = '((const Py_UNICODE*)%s)' % base_result
+            if self.stop is None:
+                code.putln(
+                    "%s = __Pyx_PyUnicode_FromUnicode(%s + %s); %s" % (
+                        result,
+                        base_result,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+            else:
+                code.putln(
+                    "%s = __Pyx_PyUnicode_FromUnicodeAndLength(%s + %s, %s - %s); %s" % (
+                        result,
+                        base_result,
+                        start_code,
+                        stop_code,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+
+        elif self.base.type is unicode_type:
+            code.globalstate.use_utility_code(
+                          UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c"))
+            code.putln(
+                "%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s" % (
+                    result,
+                    base_result,
+                    start_code,
+                    stop_code,
+                    code.error_goto_if_null(result, self.pos)))
+        elif self.type is py_object_type:
+            code.globalstate.use_utility_code(self.get_slice_utility_code)
+            (has_c_start, has_c_stop, c_start, c_stop,
+             py_start, py_stop, py_slice) = self.get_slice_config()
+            code.putln(
+                "%s = __Pyx_PyObject_GetSlice(%s, %s, %s, %s, %s, %s, %d, %d, %d); %s" % (
+                    result,
+                    self.base.py_result(),
+                    c_start, c_stop,
+                    py_start, py_stop, py_slice,
+                    has_c_start, has_c_stop,
+                    bool(code.globalstate.directives['wraparound']),
+                    code.error_goto_if_null(result, self.pos)))
+        else:
+            if self.base.type is list_type:
+                code.globalstate.use_utility_code(
+                    TempitaUtilityCode.load_cached("SliceTupleAndList", "ObjectHandling.c"))
+                cfunc = '__Pyx_PyList_GetSlice'
+            elif self.base.type is tuple_type:
+                code.globalstate.use_utility_code(
+                    TempitaUtilityCode.load_cached("SliceTupleAndList", "ObjectHandling.c"))
+                cfunc = '__Pyx_PyTuple_GetSlice'
+            else:
+                cfunc = 'PySequence_GetSlice'
+            code.putln(
+                "%s = %s(%s, %s, %s); %s" % (
+                    result,
+                    cfunc,
+                    self.base.py_result(),
+                    start_code,
+                    stop_code,
+                    code.error_goto_if_null(result, self.pos)))
+        code.put_gotref(self.py_result())
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
         exception_check=None, exception_value=None):
-        self.generate_subexpr_evaluation_code(code) 
-        if self.type.is_pyobject: 
-            code.globalstate.use_utility_code(self.set_slice_utility_code) 
-            (has_c_start, has_c_stop, c_start, c_stop, 
-             py_start, py_stop, py_slice) = self.get_slice_config() 
-            code.put_error_if_neg(self.pos, 
-                "__Pyx_PyObject_SetSlice(%s, %s, %s, %s, %s, %s, %s, %d, %d, %d)" % ( 
-                    self.base.py_result(), 
-                    rhs.py_result(), 
-                    c_start, c_stop, 
-                    py_start, py_stop, py_slice, 
-                    has_c_start, has_c_stop, 
-                    bool(code.globalstate.directives['wraparound']))) 
-        else: 
+        self.generate_subexpr_evaluation_code(code)
+        if self.type.is_pyobject:
+            code.globalstate.use_utility_code(self.set_slice_utility_code)
+            (has_c_start, has_c_stop, c_start, c_stop,
+             py_start, py_stop, py_slice) = self.get_slice_config()
+            code.put_error_if_neg(self.pos,
+                "__Pyx_PyObject_SetSlice(%s, %s, %s, %s, %s, %s, %s, %d, %d, %d)" % (
+                    self.base.py_result(),
+                    rhs.py_result(),
+                    c_start, c_stop,
+                    py_start, py_stop, py_slice,
+                    has_c_start, has_c_stop,
+                    bool(code.globalstate.directives['wraparound'])))
+        else:
             start_offset = self.start_code() if self.start else '0'
-            if rhs.type.is_array: 
-                array_length = rhs.type.size 
-                self.generate_slice_guard_code(code, array_length) 
-            else: 
+            if rhs.type.is_array:
+                array_length = rhs.type.size
+                self.generate_slice_guard_code(code, array_length)
+            else:
                 array_length = '%s - %s' % (self.stop_code(), start_offset)
 
             code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
@@ -5089,79 +5089,79 @@ class SliceIndexNode(ExprNode):
                 self.base.result(), array_length
             ))
 
-        self.generate_subexpr_disposal_code(code) 
-        self.free_subexpr_temps(code) 
-        rhs.generate_disposal_code(code) 
-        rhs.free_temps(code) 
- 
-    def generate_deletion_code(self, code, ignore_nonexisting=False): 
-        if not self.base.type.is_pyobject: 
-            error(self.pos, 
-                  "Deleting slices is only supported for Python types, not '%s'." % self.type) 
-            return 
-        self.generate_subexpr_evaluation_code(code) 
-        code.globalstate.use_utility_code(self.set_slice_utility_code) 
-        (has_c_start, has_c_stop, c_start, c_stop, 
-         py_start, py_stop, py_slice) = self.get_slice_config() 
-        code.put_error_if_neg(self.pos, 
-            "__Pyx_PyObject_DelSlice(%s, %s, %s, %s, %s, %s, %d, %d, %d)" % ( 
-                self.base.py_result(), 
-                c_start, c_stop, 
-                py_start, py_stop, py_slice, 
-                has_c_start, has_c_stop, 
-                bool(code.globalstate.directives['wraparound']))) 
-        self.generate_subexpr_disposal_code(code) 
-        self.free_subexpr_temps(code) 
- 
-    def get_slice_config(self): 
-        has_c_start, c_start, py_start = False, '0', 'NULL' 
-        if self.start: 
-            has_c_start = not self.start.type.is_pyobject 
-            if has_c_start: 
-                c_start = self.start.result() 
-            else: 
-                py_start = '&%s' % self.start.py_result() 
-        has_c_stop, c_stop, py_stop = False, '0', 'NULL' 
-        if self.stop: 
-            has_c_stop = not self.stop.type.is_pyobject 
-            if has_c_stop: 
-                c_stop = self.stop.result() 
-            else: 
-                py_stop = '&%s' % self.stop.py_result() 
-        py_slice = self.slice and '&%s' % self.slice.py_result() or 'NULL' 
-        return (has_c_start, has_c_stop, c_start, c_stop, 
-                py_start, py_stop, py_slice) 
- 
-    def generate_slice_guard_code(self, code, target_size): 
-        if not self.base.type.is_array: 
-            return 
-        slice_size = self.base.type.size 
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        if not self.base.type.is_pyobject:
+            error(self.pos,
+                  "Deleting slices is only supported for Python types, not '%s'." % self.type)
+            return
+        self.generate_subexpr_evaluation_code(code)
+        code.globalstate.use_utility_code(self.set_slice_utility_code)
+        (has_c_start, has_c_stop, c_start, c_stop,
+         py_start, py_stop, py_slice) = self.get_slice_config()
+        code.put_error_if_neg(self.pos,
+            "__Pyx_PyObject_DelSlice(%s, %s, %s, %s, %s, %s, %d, %d, %d)" % (
+                self.base.py_result(),
+                c_start, c_stop,
+                py_start, py_stop, py_slice,
+                has_c_start, has_c_stop,
+                bool(code.globalstate.directives['wraparound'])))
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+
+    def get_slice_config(self):
+        has_c_start, c_start, py_start = False, '0', 'NULL'
+        if self.start:
+            has_c_start = not self.start.type.is_pyobject
+            if has_c_start:
+                c_start = self.start.result()
+            else:
+                py_start = '&%s' % self.start.py_result()
+        has_c_stop, c_stop, py_stop = False, '0', 'NULL'
+        if self.stop:
+            has_c_stop = not self.stop.type.is_pyobject
+            if has_c_stop:
+                c_stop = self.stop.result()
+            else:
+                py_stop = '&%s' % self.stop.py_result()
+        py_slice = self.slice and '&%s' % self.slice.py_result() or 'NULL'
+        return (has_c_start, has_c_stop, c_start, c_stop,
+                py_start, py_stop, py_slice)
+
+    def generate_slice_guard_code(self, code, target_size):
+        if not self.base.type.is_array:
+            return
+        slice_size = self.base.type.size
         try:
             total_length = slice_size = int(slice_size)
         except ValueError:
             total_length = None
 
-        start = stop = None 
-        if self.stop: 
-            stop = self.stop.result() 
-            try: 
-                stop = int(stop) 
-                if stop < 0: 
+        start = stop = None
+        if self.stop:
+            stop = self.stop.result()
+            try:
+                stop = int(stop)
+                if stop < 0:
                     if total_length is None:
                         slice_size = '%s + %d' % (slice_size, stop)
                     else:
                         slice_size += stop
-                else: 
-                    slice_size = stop 
-                stop = None 
-            except ValueError: 
-                pass 
-
-        if self.start: 
-            start = self.start.result() 
-            try: 
-                start = int(start) 
-                if start < 0: 
+                else:
+                    slice_size = stop
+                stop = None
+            except ValueError:
+                pass
+
+        if self.start:
+            start = self.start.result()
+            try:
+                start = int(start)
+                if start < 0:
                     if total_length is None:
                         start = '%s + %d' % (self.base.type.size, start)
                     else:
@@ -5170,9 +5170,9 @@ class SliceIndexNode(ExprNode):
                     slice_size -= start
                 else:
                     slice_size = '%s - (%s)' % (slice_size, start)
-                start = None 
-            except ValueError: 
-                pass 
+                start = None
+            except ValueError:
+                pass
 
         runtime_check = None
         compile_time_check = False
@@ -5185,15 +5185,15 @@ class SliceIndexNode(ExprNode):
 
         if compile_time_check and slice_size < 0:
             if int_target_size > 0:
-                error(self.pos, "Assignment to empty slice.") 
+                error(self.pos, "Assignment to empty slice.")
         elif compile_time_check and start is None and stop is None:
-            # we know the exact slice length 
+            # we know the exact slice length
             if int_target_size != slice_size:
                 error(self.pos, "Assignment to slice of wrong length, expected %s, got %s" % (
                       slice_size, target_size))
-        elif start is not None: 
-            if stop is None: 
-                stop = slice_size 
+        elif start is not None:
+            if stop is None:
+                stop = slice_size
             runtime_check = "(%s)-(%s)" % (stop, start)
         elif stop is not None:
             runtime_check = stop
@@ -5207,95 +5207,95 @@ class SliceIndexNode(ExprNode):
                 ' expected %%" CYTHON_FORMAT_SSIZE_T "d, got %%" CYTHON_FORMAT_SSIZE_T "d",'
                 ' (Py_ssize_t)(%s), (Py_ssize_t)(%s));' % (
                     target_size, runtime_check))
-            code.putln(code.error_goto(self.pos)) 
-            code.putln("}") 
- 
-    def start_code(self): 
-        if self.start: 
-            return self.start.result() 
-        else: 
-            return "0" 
- 
-    def stop_code(self): 
-        if self.stop: 
-            return self.stop.result() 
-        elif self.base.type.is_array: 
-            return self.base.type.size 
-        else: 
-            return "PY_SSIZE_T_MAX" 
- 
-    def calculate_result_code(self): 
-        # self.result() is not used, but this method must exist 
-        return "<unused>" 
- 
- 
-class SliceNode(ExprNode): 
-    #  start:stop:step in subscript list 
-    # 
-    #  start     ExprNode 
-    #  stop      ExprNode 
-    #  step      ExprNode 
- 
-    subexprs = ['start', 'stop', 'step'] 
+            code.putln(code.error_goto(self.pos))
+            code.putln("}")
+
+    def start_code(self):
+        if self.start:
+            return self.start.result()
+        else:
+            return "0"
+
+    def stop_code(self):
+        if self.stop:
+            return self.stop.result()
+        elif self.base.type.is_array:
+            return self.base.type.size
+        else:
+            return "PY_SSIZE_T_MAX"
+
+    def calculate_result_code(self):
+        # self.result() is not used, but this method must exist
+        return "<unused>"
+
+
+class SliceNode(ExprNode):
+    #  start:stop:step in subscript list
+    #
+    #  start     ExprNode
+    #  stop      ExprNode
+    #  step      ExprNode
+
+    subexprs = ['start', 'stop', 'step']
     is_slice = True
-    type = slice_type 
-    is_temp = 1 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = slice( 
-            self.start.constant_result, 
-            self.stop.constant_result, 
-            self.step.constant_result) 
- 
-    def compile_time_value(self, denv): 
-        start = self.start.compile_time_value(denv) 
-        stop = self.stop.compile_time_value(denv) 
-        step = self.step.compile_time_value(denv) 
-        try: 
-            return slice(start, stop, step) 
+    type = slice_type
+    is_temp = 1
+
+    def calculate_constant_result(self):
+        self.constant_result = slice(
+            self.start.constant_result,
+            self.stop.constant_result,
+            self.step.constant_result)
+
+    def compile_time_value(self, denv):
+        start = self.start.compile_time_value(denv)
+        stop = self.stop.compile_time_value(denv)
+        step = self.step.compile_time_value(denv)
+        try:
+            return slice(start, stop, step)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def analyse_types(self, env): 
-        start = self.start.analyse_types(env) 
-        stop = self.stop.analyse_types(env) 
-        step = self.step.analyse_types(env) 
-        self.start = start.coerce_to_pyobject(env) 
-        self.stop = stop.coerce_to_pyobject(env) 
-        self.step = step.coerce_to_pyobject(env) 
-        if self.start.is_literal and self.stop.is_literal and self.step.is_literal: 
-            self.is_literal = True 
-            self.is_temp = False 
-        return self 
- 
-    gil_message = "Constructing Python slice object" 
- 
-    def calculate_result_code(self): 
-        return self.result_code 
- 
-    def generate_result_code(self, code): 
-        if self.is_literal: 
+            self.compile_time_value_error(e)
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        start = self.start.analyse_types(env)
+        stop = self.stop.analyse_types(env)
+        step = self.step.analyse_types(env)
+        self.start = start.coerce_to_pyobject(env)
+        self.stop = stop.coerce_to_pyobject(env)
+        self.step = step.coerce_to_pyobject(env)
+        if self.start.is_literal and self.stop.is_literal and self.step.is_literal:
+            self.is_literal = True
+            self.is_temp = False
+        return self
+
+    gil_message = "Constructing Python slice object"
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def generate_result_code(self, code):
+        if self.is_literal:
             dedup_key = make_dedup_key(self.type, (self,))
             self.result_code = code.get_py_const(py_object_type, 'slice', cleanup_level=2, dedup_key=dedup_key)
             code = code.get_cached_constants_writer(self.result_code)
             if code is None:
                 return  # already initialised
-            code.mark_pos(self.pos) 
- 
-        code.putln( 
-            "%s = PySlice_New(%s, %s, %s); %s" % ( 
-                self.result(), 
-                self.start.py_result(), 
-                self.stop.py_result(), 
-                self.step.py_result(), 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
-        if self.is_literal: 
-            code.put_giveref(self.py_result()) 
- 
+            code.mark_pos(self.pos)
+
+        code.putln(
+            "%s = PySlice_New(%s, %s, %s); %s" % (
+                self.result(),
+                self.start.py_result(),
+                self.stop.py_result(),
+                self.step.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+        if self.is_literal:
+            code.put_giveref(self.py_result())
+
 class SliceIntNode(SliceNode):
     #  start:stop:step in subscript list
     # This is just a node to hold start,stop and step nodes that can be
@@ -5304,7 +5304,7 @@ class SliceIntNode(SliceNode):
     #  start     ExprNode
     #  stop      ExprNode
     #  step      ExprNode
- 
+
     is_temp = 0
 
     def calculate_constant_result(self):
@@ -5351,26 +5351,26 @@ class SliceIntNode(SliceNode):
                 a.arg.result()
 
 
-class CallNode(ExprNode): 
- 
-    # allow overriding the default 'may_be_none' behaviour 
-    may_return_none = None 
- 
-    def infer_type(self, env): 
+class CallNode(ExprNode):
+
+    # allow overriding the default 'may_be_none' behaviour
+    may_return_none = None
+
+    def infer_type(self, env):
         # TODO(robertwb): Reduce redundancy with analyse_types.
-        function = self.function 
-        func_type = function.infer_type(env) 
-        if isinstance(function, NewExprNode): 
-            # note: needs call to infer_type() above 
-            return PyrexTypes.CPtrType(function.class_type) 
-        if func_type is py_object_type: 
-            # function might have lied for safety => try to find better type 
-            entry = getattr(function, 'entry', None) 
-            if entry is not None: 
-                func_type = entry.type or func_type 
-        if func_type.is_ptr: 
-            func_type = func_type.base_type 
-        if func_type.is_cfunction: 
+        function = self.function
+        func_type = function.infer_type(env)
+        if isinstance(function, NewExprNode):
+            # note: needs call to infer_type() above
+            return PyrexTypes.CPtrType(function.class_type)
+        if func_type is py_object_type:
+            # function might have lied for safety => try to find better type
+            entry = getattr(function, 'entry', None)
+            if entry is not None:
+                func_type = entry.type or func_type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        if func_type.is_cfunction:
             if getattr(self.function, 'entry', None) and hasattr(self, 'args'):
                 alternatives = self.function.entry.all_alternatives()
                 arg_types = [arg.infer_type(env) for arg in self.args]
@@ -5380,44 +5380,44 @@ class CallNode(ExprNode):
                     if func_type.is_ptr:
                         func_type = func_type.base_type
                     return func_type.return_type
-            return func_type.return_type 
-        elif func_type is type_type: 
-            if function.is_name and function.entry and function.entry.type: 
-                result_type = function.entry.type 
-                if result_type.is_extension_type: 
-                    return result_type 
-                elif result_type.is_builtin_type: 
-                    if function.entry.name == 'float': 
-                        return PyrexTypes.c_double_type 
-                    elif function.entry.name in Builtin.types_that_construct_their_instance: 
-                        return result_type 
-        return py_object_type 
- 
-    def type_dependencies(self, env): 
-        # TODO: Update when Danilo's C++ code merged in to handle the 
-        # the case of function overloading. 
-        return self.function.type_dependencies(env) 
- 
-    def is_simple(self): 
-        # C function calls could be considered simple, but they may 
-        # have side-effects that may hit when multiple operations must 
-        # be effected in order, e.g. when constructing the argument 
-        # sequence for a function call or comparing values. 
-        return False 
- 
-    def may_be_none(self): 
-        if self.may_return_none is not None: 
-            return self.may_return_none 
-        func_type = self.function.type 
-        if func_type is type_type and self.function.is_name: 
-            entry = self.function.entry 
-            if entry.type.is_extension_type: 
-                return False 
-            if (entry.type.is_builtin_type and 
-                    entry.name in Builtin.types_that_construct_their_instance): 
-                return False 
-        return ExprNode.may_be_none(self) 
- 
+            return func_type.return_type
+        elif func_type is type_type:
+            if function.is_name and function.entry and function.entry.type:
+                result_type = function.entry.type
+                if result_type.is_extension_type:
+                    return result_type
+                elif result_type.is_builtin_type:
+                    if function.entry.name == 'float':
+                        return PyrexTypes.c_double_type
+                    elif function.entry.name in Builtin.types_that_construct_their_instance:
+                        return result_type
+        return py_object_type
+
+    def type_dependencies(self, env):
+        # TODO: Update when Danilo's C++ code merged in to handle the
+        # the case of function overloading.
+        return self.function.type_dependencies(env)
+
+    def is_simple(self):
+        # C function calls could be considered simple, but they may
+        # have side-effects that may hit when multiple operations must
+        # be effected in order, e.g. when constructing the argument
+        # sequence for a function call or comparing values.
+        return False
+
+    def may_be_none(self):
+        if self.may_return_none is not None:
+            return self.may_return_none
+        func_type = self.function.type
+        if func_type is type_type and self.function.is_name:
+            entry = self.function.entry
+            if entry.type.is_extension_type:
+                return False
+            if (entry.type.is_builtin_type and
+                    entry.name in Builtin.types_that_construct_their_instance):
+                return False
+        return ExprNode.may_be_none(self)
+
     def set_py_result_type(self, function, func_type=None):
         if func_type is None:
             func_type = function.type
@@ -5444,120 +5444,120 @@ class CallNode(ExprNode):
         else:
             self.type = py_object_type
 
-    def analyse_as_type_constructor(self, env): 
-        type = self.function.analyse_as_type(env) 
-        if type and type.is_struct_or_union: 
-            args, kwds = self.explicit_args_kwds() 
-            items = [] 
-            for arg, member in zip(args, type.scope.var_entries): 
-                items.append(DictItemNode(pos=arg.pos, key=StringNode(pos=arg.pos, value=member.name), value=arg)) 
-            if kwds: 
-                items += kwds.key_value_pairs 
-            self.key_value_pairs = items 
-            self.__class__ = DictNode 
-            self.analyse_types(env)    # FIXME 
-            self.coerce_to(type, env) 
-            return True 
-        elif type and type.is_cpp_class: 
-            self.args = [ arg.analyse_types(env) for arg in self.args ] 
-            constructor = type.scope.lookup("<init>") 
+    def analyse_as_type_constructor(self, env):
+        type = self.function.analyse_as_type(env)
+        if type and type.is_struct_or_union:
+            args, kwds = self.explicit_args_kwds()
+            items = []
+            for arg, member in zip(args, type.scope.var_entries):
+                items.append(DictItemNode(pos=arg.pos, key=StringNode(pos=arg.pos, value=member.name), value=arg))
+            if kwds:
+                items += kwds.key_value_pairs
+            self.key_value_pairs = items
+            self.__class__ = DictNode
+            self.analyse_types(env)    # FIXME
+            self.coerce_to(type, env)
+            return True
+        elif type and type.is_cpp_class:
+            self.args = [ arg.analyse_types(env) for arg in self.args ]
+            constructor = type.scope.lookup("<init>")
             if not constructor:
                 error(self.function.pos, "no constructor found for C++  type '%s'" % self.function.name)
                 self.type = error_type
                 return self
-            self.function = RawCNameExprNode(self.function.pos, constructor.type) 
-            self.function.entry = constructor 
+            self.function = RawCNameExprNode(self.function.pos, constructor.type)
+            self.function.entry = constructor
             self.function.set_cname(type.empty_declaration_code())
-            self.analyse_c_function_call(env) 
-            self.type = type 
-            return True 
- 
-    def is_lvalue(self): 
-        return self.type.is_reference 
- 
-    def nogil_check(self, env): 
-        func_type = self.function_type() 
-        if func_type.is_pyobject: 
-            self.gil_error() 
+            self.analyse_c_function_call(env)
+            self.type = type
+            return True
+
+    def is_lvalue(self):
+        return self.type.is_reference
+
+    def nogil_check(self, env):
+        func_type = self.function_type()
+        if func_type.is_pyobject:
+            self.gil_error()
         elif not func_type.is_error and not getattr(func_type, 'nogil', False):
-            self.gil_error() 
- 
-    gil_message = "Calling gil-requiring function" 
- 
- 
-class SimpleCallNode(CallNode): 
-    #  Function call without keyword, * or ** args. 
-    # 
-    #  function       ExprNode 
-    #  args           [ExprNode] 
-    #  arg_tuple      ExprNode or None     used internally 
-    #  self           ExprNode or None     used internally 
-    #  coerced_self   ExprNode or None     used internally 
-    #  wrapper_call   bool                 used internally 
-    #  has_optional_args   bool            used internally 
-    #  nogil          bool                 used internally 
- 
-    subexprs = ['self', 'coerced_self', 'function', 'args', 'arg_tuple'] 
- 
-    self = None 
-    coerced_self = None 
-    arg_tuple = None 
-    wrapper_call = False 
-    has_optional_args = False 
-    nogil = False 
-    analysed = False 
+            self.gil_error()
+
+    gil_message = "Calling gil-requiring function"
+
+
+class SimpleCallNode(CallNode):
+    #  Function call without keyword, * or ** args.
+    #
+    #  function       ExprNode
+    #  args           [ExprNode]
+    #  arg_tuple      ExprNode or None     used internally
+    #  self           ExprNode or None     used internally
+    #  coerced_self   ExprNode or None     used internally
+    #  wrapper_call   bool                 used internally
+    #  has_optional_args   bool            used internally
+    #  nogil          bool                 used internally
+
+    subexprs = ['self', 'coerced_self', 'function', 'args', 'arg_tuple']
+
+    self = None
+    coerced_self = None
+    arg_tuple = None
+    wrapper_call = False
+    has_optional_args = False
+    nogil = False
+    analysed = False
     overflowcheck = False
- 
-    def compile_time_value(self, denv): 
-        function = self.function.compile_time_value(denv) 
-        args = [arg.compile_time_value(denv) for arg in self.args] 
-        try: 
-            return function(*args) 
+
+    def compile_time_value(self, denv):
+        function = self.function.compile_time_value(denv)
+        args = [arg.compile_time_value(denv) for arg in self.args]
+        try:
+            return function(*args)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def analyse_as_type(self, env): 
-        attr = self.function.as_cython_attribute() 
-        if attr == 'pointer': 
-            if len(self.args) != 1: 
-                error(self.args.pos, "only one type allowed.") 
-            else: 
-                type = self.args[0].analyse_as_type(env) 
-                if not type: 
-                    error(self.args[0].pos, "Unknown type") 
-                else: 
-                    return PyrexTypes.CPtrType(type) 
+            self.compile_time_value_error(e)
+
+    def analyse_as_type(self, env):
+        attr = self.function.as_cython_attribute()
+        if attr == 'pointer':
+            if len(self.args) != 1:
+                error(self.args.pos, "only one type allowed.")
+            else:
+                type = self.args[0].analyse_as_type(env)
+                if not type:
+                    error(self.args[0].pos, "Unknown type")
+                else:
+                    return PyrexTypes.CPtrType(type)
         elif attr == 'typeof':
             if len(self.args) != 1:
                 error(self.args.pos, "only one type allowed.")
             operand = self.args[0].analyse_types(env)
             return operand.type
- 
-    def explicit_args_kwds(self): 
-        return self.args, None 
- 
-    def analyse_types(self, env): 
-        if self.analyse_as_type_constructor(env): 
-            return self 
-        if self.analysed: 
-            return self 
-        self.analysed = True 
-        self.function.is_called = 1 
-        self.function = self.function.analyse_types(env) 
-        function = self.function 
- 
-        if function.is_attribute and function.entry and function.entry.is_cmethod: 
-            # Take ownership of the object from which the attribute 
-            # was obtained, because we need to pass it as 'self'. 
-            self.self = function.obj 
-            function.obj = CloneNode(self.self) 
- 
-        func_type = self.function_type() 
+
+    def explicit_args_kwds(self):
+        return self.args, None
+
+    def analyse_types(self, env):
+        if self.analyse_as_type_constructor(env):
+            return self
+        if self.analysed:
+            return self
+        self.analysed = True
+        self.function.is_called = 1
+        self.function = self.function.analyse_types(env)
+        function = self.function
+
+        if function.is_attribute and function.entry and function.entry.is_cmethod:
+            # Take ownership of the object from which the attribute
+            # was obtained, because we need to pass it as 'self'.
+            self.self = function.obj
+            function.obj = CloneNode(self.self)
+
+        func_type = self.function_type()
         self.is_numpy_call_with_exprs = False
         if (has_np_pythran(env) and function.is_numpy_attribute and
                 pythran_is_numpy_func_supported(function)):
             has_pythran_args = True
-            self.arg_tuple = TupleNode(self.pos, args = self.args) 
+            self.arg_tuple = TupleNode(self.pos, args = self.args)
             self.arg_tuple = self.arg_tuple.analyse_types(env)
             for arg in self.arg_tuple.args:
                 has_pythran_args &= is_pythran_supported_node_or_none(arg)
@@ -5573,269 +5573,269 @@ class SimpleCallNode(CallNode):
         elif func_type.is_pyobject:
             self.arg_tuple = TupleNode(self.pos, args = self.args)
             self.arg_tuple = self.arg_tuple.analyse_types(env).coerce_to_pyobject(env)
-            self.args = None 
+            self.args = None
             self.set_py_result_type(function, func_type)
-            self.is_temp = 1 
-        else: 
-            self.args = [ arg.analyse_types(env) for arg in self.args ] 
-            self.analyse_c_function_call(env) 
+            self.is_temp = 1
+        else:
+            self.args = [ arg.analyse_types(env) for arg in self.args ]
+            self.analyse_c_function_call(env)
             if func_type.exception_check == '+':
                 self.is_temp = True
-        return self 
- 
-    def function_type(self): 
-        # Return the type of the function being called, coercing a function 
-        # pointer to a function if necessary. If the function has fused 
-        # arguments, return the specific type. 
-        func_type = self.function.type 
- 
-        if func_type.is_ptr: 
-            func_type = func_type.base_type 
- 
-        return func_type 
- 
-    def analyse_c_function_call(self, env): 
-        func_type = self.function.type 
-        if func_type is error_type: 
-            self.type = error_type 
-            return 
- 
-        if func_type.is_cfunction and func_type.is_static_method: 
-            if self.self and self.self.type.is_extension_type: 
-                # To support this we'd need to pass self to determine whether 
-                # it was overloaded in Python space (possibly via a Cython 
-                # superclass turning a cdef method into a cpdef one). 
-                error(self.pos, "Cannot call a static method on an instance variable.") 
-            args = self.args 
-        elif self.self: 
-            args = [self.self] + self.args 
-        else: 
-            args = self.args 
- 
-        if func_type.is_cpp_class: 
-            overloaded_entry = self.function.type.scope.lookup("operator()") 
-            if overloaded_entry is None: 
-                self.type = PyrexTypes.error_type 
-                self.result_code = "<error>" 
-                return 
-        elif hasattr(self.function, 'entry'): 
-            overloaded_entry = self.function.entry 
+        return self
+
+    def function_type(self):
+        # Return the type of the function being called, coercing a function
+        # pointer to a function if necessary. If the function has fused
+        # arguments, return the specific type.
+        func_type = self.function.type
+
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+
+        return func_type
+
+    def analyse_c_function_call(self, env):
+        func_type = self.function.type
+        if func_type is error_type:
+            self.type = error_type
+            return
+
+        if func_type.is_cfunction and func_type.is_static_method:
+            if self.self and self.self.type.is_extension_type:
+                # To support this we'd need to pass self to determine whether
+                # it was overloaded in Python space (possibly via a Cython
+                # superclass turning a cdef method into a cpdef one).
+                error(self.pos, "Cannot call a static method on an instance variable.")
+            args = self.args
+        elif self.self:
+            args = [self.self] + self.args
+        else:
+            args = self.args
+
+        if func_type.is_cpp_class:
+            overloaded_entry = self.function.type.scope.lookup("operator()")
+            if overloaded_entry is None:
+                self.type = PyrexTypes.error_type
+                self.result_code = "<error>"
+                return
+        elif hasattr(self.function, 'entry'):
+            overloaded_entry = self.function.entry
         elif self.function.is_subscript and self.function.is_fused_index:
-            overloaded_entry = self.function.type.entry 
-        else: 
-            overloaded_entry = None 
- 
-        if overloaded_entry: 
-            if self.function.type.is_fused: 
-                functypes = self.function.type.get_all_specialized_function_types() 
-                alternatives = [f.entry for f in functypes] 
-            else: 
-                alternatives = overloaded_entry.all_alternatives() 
- 
+            overloaded_entry = self.function.type.entry
+        else:
+            overloaded_entry = None
+
+        if overloaded_entry:
+            if self.function.type.is_fused:
+                functypes = self.function.type.get_all_specialized_function_types()
+                alternatives = [f.entry for f in functypes]
+            else:
+                alternatives = overloaded_entry.all_alternatives()
+
             entry = PyrexTypes.best_match(
                 [arg.type for arg in args], alternatives, self.pos, env, args)
- 
-            if not entry: 
-                self.type = PyrexTypes.error_type 
-                self.result_code = "<error>" 
-                return 
- 
-            entry.used = True 
+
+            if not entry:
+                self.type = PyrexTypes.error_type
+                self.result_code = "<error>"
+                return
+
+            entry.used = True
             if not func_type.is_cpp_class:
                 self.function.entry = entry
-            self.function.type = entry.type 
-            func_type = self.function_type() 
-        else: 
-            entry = None 
-            func_type = self.function_type() 
-            if not func_type.is_cfunction: 
-                error(self.pos, "Calling non-function type '%s'" % func_type) 
-                self.type = PyrexTypes.error_type 
-                self.result_code = "<error>" 
-                return 
- 
-        # Check no. of args 
-        max_nargs = len(func_type.args) 
-        expected_nargs = max_nargs - func_type.optional_arg_count 
-        actual_nargs = len(args) 
-        if func_type.optional_arg_count and expected_nargs != actual_nargs: 
-            self.has_optional_args = 1 
-            self.is_temp = 1 
- 
-        # check 'self' argument 
-        if entry and entry.is_cmethod and func_type.args and not func_type.is_static_method: 
-            formal_arg = func_type.args[0] 
-            arg = args[0] 
-            if formal_arg.not_none: 
-                if self.self: 
-                    self.self = self.self.as_none_safe_node( 
+            self.function.type = entry.type
+            func_type = self.function_type()
+        else:
+            entry = None
+            func_type = self.function_type()
+            if not func_type.is_cfunction:
+                error(self.pos, "Calling non-function type '%s'" % func_type)
+                self.type = PyrexTypes.error_type
+                self.result_code = "<error>"
+                return
+
+        # Check no. of args
+        max_nargs = len(func_type.args)
+        expected_nargs = max_nargs - func_type.optional_arg_count
+        actual_nargs = len(args)
+        if func_type.optional_arg_count and expected_nargs != actual_nargs:
+            self.has_optional_args = 1
+            self.is_temp = 1
+
+        # check 'self' argument
+        if entry and entry.is_cmethod and func_type.args and not func_type.is_static_method:
+            formal_arg = func_type.args[0]
+            arg = args[0]
+            if formal_arg.not_none:
+                if self.self:
+                    self.self = self.self.as_none_safe_node(
                         "'NoneType' object has no attribute '%{0}s'".format('.30' if len(entry.name) <= 30 else ''),
-                        error='PyExc_AttributeError', 
-                        format_args=[entry.name]) 
-                else: 
-                    # unbound method 
-                    arg = arg.as_none_safe_node( 
-                        "descriptor '%s' requires a '%s' object but received a 'NoneType'", 
-                        format_args=[entry.name, formal_arg.type.name]) 
-            if self.self: 
-                if formal_arg.accept_builtin_subtypes: 
-                    arg = CMethodSelfCloneNode(self.self) 
-                else: 
-                    arg = CloneNode(self.self) 
-                arg = self.coerced_self = arg.coerce_to(formal_arg.type, env) 
-            elif formal_arg.type.is_builtin_type: 
-                # special case: unbound methods of builtins accept subtypes 
-                arg = arg.coerce_to(formal_arg.type, env) 
-                if arg.type.is_builtin_type and isinstance(arg, PyTypeTestNode): 
-                    arg.exact_builtin_type = False 
-            args[0] = arg 
- 
-        # Coerce arguments 
-        some_args_in_temps = False 
+                        error='PyExc_AttributeError',
+                        format_args=[entry.name])
+                else:
+                    # unbound method
+                    arg = arg.as_none_safe_node(
+                        "descriptor '%s' requires a '%s' object but received a 'NoneType'",
+                        format_args=[entry.name, formal_arg.type.name])
+            if self.self:
+                if formal_arg.accept_builtin_subtypes:
+                    arg = CMethodSelfCloneNode(self.self)
+                else:
+                    arg = CloneNode(self.self)
+                arg = self.coerced_self = arg.coerce_to(formal_arg.type, env)
+            elif formal_arg.type.is_builtin_type:
+                # special case: unbound methods of builtins accept subtypes
+                arg = arg.coerce_to(formal_arg.type, env)
+                if arg.type.is_builtin_type and isinstance(arg, PyTypeTestNode):
+                    arg.exact_builtin_type = False
+            args[0] = arg
+
+        # Coerce arguments
+        some_args_in_temps = False
         for i in range(min(max_nargs, actual_nargs)):
-            formal_arg = func_type.args[i] 
-            formal_type = formal_arg.type 
-            arg = args[i].coerce_to(formal_type, env) 
-            if formal_arg.not_none: 
-                # C methods must do the None checks at *call* time 
-                arg = arg.as_none_safe_node( 
-                    "cannot pass None into a C function argument that is declared 'not None'") 
-            if arg.is_temp: 
-                if i > 0: 
-                    # first argument in temp doesn't impact subsequent arguments 
-                    some_args_in_temps = True 
-            elif arg.type.is_pyobject and not env.nogil: 
-                if i == 0 and self.self is not None: 
-                    # a method's cloned "self" argument is ok 
-                    pass 
-                elif arg.nonlocally_immutable(): 
-                    # plain local variables are ok 
-                    pass 
-                else: 
-                    # we do not safely own the argument's reference, 
-                    # but we must make sure it cannot be collected 
-                    # before we return from the function, so we create 
-                    # an owned temp reference to it 
-                    if i > 0: # first argument doesn't matter 
-                        some_args_in_temps = True 
-                    arg = arg.coerce_to_temp(env) 
-            args[i] = arg 
- 
-        # handle additional varargs parameters 
+            formal_arg = func_type.args[i]
+            formal_type = formal_arg.type
+            arg = args[i].coerce_to(formal_type, env)
+            if formal_arg.not_none:
+                # C methods must do the None checks at *call* time
+                arg = arg.as_none_safe_node(
+                    "cannot pass None into a C function argument that is declared 'not None'")
+            if arg.is_temp:
+                if i > 0:
+                    # first argument in temp doesn't impact subsequent arguments
+                    some_args_in_temps = True
+            elif arg.type.is_pyobject and not env.nogil:
+                if i == 0 and self.self is not None:
+                    # a method's cloned "self" argument is ok
+                    pass
+                elif arg.nonlocally_immutable():
+                    # plain local variables are ok
+                    pass
+                else:
+                    # we do not safely own the argument's reference,
+                    # but we must make sure it cannot be collected
+                    # before we return from the function, so we create
+                    # an owned temp reference to it
+                    if i > 0: # first argument doesn't matter
+                        some_args_in_temps = True
+                    arg = arg.coerce_to_temp(env)
+            args[i] = arg
+
+        # handle additional varargs parameters
         for i in range(max_nargs, actual_nargs):
-            arg = args[i] 
-            if arg.type.is_pyobject: 
+            arg = args[i]
+            if arg.type.is_pyobject:
                 if arg.type is str_type:
                     arg_ctype = PyrexTypes.c_char_ptr_type
                 else:
                     arg_ctype = arg.type.default_coerced_ctype()
-                if arg_ctype is None: 
-                    error(self.args[i].pos, 
-                          "Python object cannot be passed as a varargs parameter") 
-                else: 
-                    args[i] = arg = arg.coerce_to(arg_ctype, env) 
-            if arg.is_temp and i > 0: 
-                some_args_in_temps = True 
- 
-        if some_args_in_temps: 
-            # if some args are temps and others are not, they may get 
-            # constructed in the wrong order (temps first) => make 
-            # sure they are either all temps or all not temps (except 
-            # for the last argument, which is evaluated last in any 
-            # case) 
+                if arg_ctype is None:
+                    error(self.args[i].pos,
+                          "Python object cannot be passed as a varargs parameter")
+                else:
+                    args[i] = arg = arg.coerce_to(arg_ctype, env)
+            if arg.is_temp and i > 0:
+                some_args_in_temps = True
+
+        if some_args_in_temps:
+            # if some args are temps and others are not, they may get
+            # constructed in the wrong order (temps first) => make
+            # sure they are either all temps or all not temps (except
+            # for the last argument, which is evaluated last in any
+            # case)
             for i in range(actual_nargs-1):
-                if i == 0 and self.self is not None: 
-                    continue # self is ok 
-                arg = args[i] 
-                if arg.nonlocally_immutable(): 
-                    # locals, C functions, unassignable types are safe. 
-                    pass 
-                elif arg.type.is_cpp_class: 
-                    # Assignment has side effects, avoid. 
-                    pass 
-                elif env.nogil and arg.type.is_pyobject: 
-                    # can't copy a Python reference into a temp in nogil 
-                    # env (this is safe: a construction would fail in 
-                    # nogil anyway) 
-                    pass 
-                else: 
-                    #self.args[i] = arg.coerce_to_temp(env) 
-                    # instead: issue a warning 
-                    if i > 0 or i == 1 and self.self is not None: # skip first arg 
-                        warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0) 
-                        break 
- 
-        self.args[:] = args 
- 
-        # Calc result type and code fragment 
-        if isinstance(self.function, NewExprNode): 
-            self.type = PyrexTypes.CPtrType(self.function.class_type) 
-        else: 
-            self.type = func_type.return_type 
- 
-        if self.function.is_name or self.function.is_attribute: 
+                if i == 0 and self.self is not None:
+                    continue # self is ok
+                arg = args[i]
+                if arg.nonlocally_immutable():
+                    # locals, C functions, unassignable types are safe.
+                    pass
+                elif arg.type.is_cpp_class:
+                    # Assignment has side effects, avoid.
+                    pass
+                elif env.nogil and arg.type.is_pyobject:
+                    # can't copy a Python reference into a temp in nogil
+                    # env (this is safe: a construction would fail in
+                    # nogil anyway)
+                    pass
+                else:
+                    #self.args[i] = arg.coerce_to_temp(env)
+                    # instead: issue a warning
+                    if i > 0 or i == 1 and self.self is not None: # skip first arg
+                        warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0)
+                        break
+
+        self.args[:] = args
+
+        # Calc result type and code fragment
+        if isinstance(self.function, NewExprNode):
+            self.type = PyrexTypes.CPtrType(self.function.class_type)
+        else:
+            self.type = func_type.return_type
+
+        if self.function.is_name or self.function.is_attribute:
             func_entry = self.function.entry
             if func_entry and (func_entry.utility_code or func_entry.utility_code_definition):
                 self.is_temp = 1  # currently doesn't work for self.calculate_result_code()
- 
-        if self.type.is_pyobject: 
-            self.result_ctype = py_object_type 
-            self.is_temp = 1 
+
+        if self.type.is_pyobject:
+            self.result_ctype = py_object_type
+            self.is_temp = 1
         elif func_type.exception_value is not None or func_type.exception_check:
-            self.is_temp = 1 
-        elif self.type.is_memoryviewslice: 
-            self.is_temp = 1 
-            # func_type.exception_check = True 
- 
+            self.is_temp = 1
+        elif self.type.is_memoryviewslice:
+            self.is_temp = 1
+            # func_type.exception_check = True
+
         if self.is_temp and self.type.is_reference:
             self.type = PyrexTypes.CFakeReferenceType(self.type.ref_base_type)
 
-        # Called in 'nogil' context? 
-        self.nogil = env.nogil 
-        if (self.nogil and 
-            func_type.exception_check and 
-            func_type.exception_check != '+'): 
-            env.use_utility_code(pyerr_occurred_withgil_utility_code) 
-        # C++ exception handler 
-        if func_type.exception_check == '+': 
-            if func_type.exception_value is None: 
-                env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp")) 
- 
+        # Called in 'nogil' context?
+        self.nogil = env.nogil
+        if (self.nogil and
+            func_type.exception_check and
+            func_type.exception_check != '+'):
+            env.use_utility_code(pyerr_occurred_withgil_utility_code)
+        # C++ exception handler
+        if func_type.exception_check == '+':
+            if func_type.exception_value is None:
+                env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+
         self.overflowcheck = env.directives['overflowcheck']
 
-    def calculate_result_code(self): 
-        return self.c_call_code() 
- 
-    def c_call_code(self): 
-        func_type = self.function_type() 
-        if self.type is PyrexTypes.error_type or not func_type.is_cfunction: 
-            return "<error>" 
-        formal_args = func_type.args 
-        arg_list_code = [] 
-        args = list(zip(formal_args, self.args)) 
-        max_nargs = len(func_type.args) 
-        expected_nargs = max_nargs - func_type.optional_arg_count 
-        actual_nargs = len(self.args) 
-        for formal_arg, actual_arg in args[:expected_nargs]: 
-                arg_code = actual_arg.result_as(formal_arg.type) 
-                arg_list_code.append(arg_code) 
- 
-        if func_type.is_overridable: 
-            arg_list_code.append(str(int(self.wrapper_call or self.function.entry.is_unbound_cmethod))) 
- 
-        if func_type.optional_arg_count: 
-            if expected_nargs == actual_nargs: 
-                optional_args = 'NULL' 
-            else: 
-                optional_args = "&%s" % self.opt_arg_struct 
-            arg_list_code.append(optional_args) 
- 
-        for actual_arg in self.args[len(formal_args):]: 
-            arg_list_code.append(actual_arg.result()) 
- 
-        result = "%s(%s)" % (self.function.result(), ', '.join(arg_list_code)) 
-        return result 
- 
+    def calculate_result_code(self):
+        return self.c_call_code()
+
+    def c_call_code(self):
+        func_type = self.function_type()
+        if self.type is PyrexTypes.error_type or not func_type.is_cfunction:
+            return "<error>"
+        formal_args = func_type.args
+        arg_list_code = []
+        args = list(zip(formal_args, self.args))
+        max_nargs = len(func_type.args)
+        expected_nargs = max_nargs - func_type.optional_arg_count
+        actual_nargs = len(self.args)
+        for formal_arg, actual_arg in args[:expected_nargs]:
+                arg_code = actual_arg.result_as(formal_arg.type)
+                arg_list_code.append(arg_code)
+
+        if func_type.is_overridable:
+            arg_list_code.append(str(int(self.wrapper_call or self.function.entry.is_unbound_cmethod)))
+
+        if func_type.optional_arg_count:
+            if expected_nargs == actual_nargs:
+                optional_args = 'NULL'
+            else:
+                optional_args = "&%s" % self.opt_arg_struct
+            arg_list_code.append(optional_args)
+
+        for actual_arg in self.args[len(formal_args):]:
+            arg_list_code.append(actual_arg.result())
+
+        result = "%s(%s)" % (self.function.result(), ', '.join(arg_list_code))
+        return result
+
     def is_c_result_required(self):
         func_type = self.function_type()
         if not func_type.exception_value or func_type.exception_check == '+':
@@ -5899,9 +5899,9 @@ class SimpleCallNode(CallNode):
                 subexpr.generate_disposal_code(code)
                 subexpr.free_temps(code)
 
-    def generate_result_code(self, code): 
-        func_type = self.function_type() 
-        if func_type.is_pyobject: 
+    def generate_result_code(self, code):
+        func_type = self.function_type()
+        if func_type.is_pyobject:
             arg_code = self.arg_tuple.py_result()
             code.globalstate.use_utility_code(UtilityCode.load_cached(
                 "PyObjectCall", "ObjectHandling.c"))
@@ -5911,66 +5911,66 @@ class SimpleCallNode(CallNode):
                     self.function.py_result(),
                     arg_code,
                     code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result()) 
-        elif func_type.is_cfunction: 
-            if self.has_optional_args: 
-                actual_nargs = len(self.args) 
-                expected_nargs = len(func_type.args) - func_type.optional_arg_count 
-                self.opt_arg_struct = code.funcstate.allocate_temp( 
-                    func_type.op_arg_struct.base_type, manage_ref=True) 
-                code.putln("%s.%s = %s;" % ( 
-                        self.opt_arg_struct, 
-                        Naming.pyrex_prefix + "n", 
-                        len(self.args) - expected_nargs)) 
-                args = list(zip(func_type.args, self.args)) 
-                for formal_arg, actual_arg in args[expected_nargs:actual_nargs]: 
-                    code.putln("%s.%s = %s;" % ( 
-                            self.opt_arg_struct, 
-                            func_type.opt_arg_cname(formal_arg.name), 
-                            actual_arg.result_as(formal_arg.type))) 
-            exc_checks = [] 
-            if self.type.is_pyobject and self.is_temp: 
-                exc_checks.append("!%s" % self.result()) 
-            elif self.type.is_memoryviewslice: 
-                assert self.is_temp 
-                exc_checks.append(self.type.error_condition(self.result())) 
+            code.put_gotref(self.py_result())
+        elif func_type.is_cfunction:
+            if self.has_optional_args:
+                actual_nargs = len(self.args)
+                expected_nargs = len(func_type.args) - func_type.optional_arg_count
+                self.opt_arg_struct = code.funcstate.allocate_temp(
+                    func_type.op_arg_struct.base_type, manage_ref=True)
+                code.putln("%s.%s = %s;" % (
+                        self.opt_arg_struct,
+                        Naming.pyrex_prefix + "n",
+                        len(self.args) - expected_nargs))
+                args = list(zip(func_type.args, self.args))
+                for formal_arg, actual_arg in args[expected_nargs:actual_nargs]:
+                    code.putln("%s.%s = %s;" % (
+                            self.opt_arg_struct,
+                            func_type.opt_arg_cname(formal_arg.name),
+                            actual_arg.result_as(formal_arg.type)))
+            exc_checks = []
+            if self.type.is_pyobject and self.is_temp:
+                exc_checks.append("!%s" % self.result())
+            elif self.type.is_memoryviewslice:
+                assert self.is_temp
+                exc_checks.append(self.type.error_condition(self.result()))
             elif func_type.exception_check != '+':
-                exc_val = func_type.exception_value 
-                exc_check = func_type.exception_check 
-                if exc_val is not None: 
+                exc_val = func_type.exception_value
+                exc_check = func_type.exception_check
+                if exc_val is not None:
                     exc_checks.append("%s == %s" % (self.result(), func_type.return_type.cast_code(exc_val)))
-                if exc_check: 
-                    if self.nogil: 
-                        exc_checks.append("__Pyx_ErrOccurredWithGIL()") 
-                    else: 
-                        exc_checks.append("PyErr_Occurred()") 
-            if self.is_temp or exc_checks: 
-                rhs = self.c_call_code() 
-                if self.result(): 
-                    lhs = "%s = " % self.result() 
-                    if self.is_temp and self.type.is_pyobject: 
-                        #return_type = self.type # func_type.return_type 
-                        #print "SimpleCallNode.generate_result_code: casting", rhs, \ 
-                        #    "from", return_type, "to pyobject" ### 
-                        rhs = typecast(py_object_type, self.type, rhs) 
-                else: 
-                    lhs = "" 
-                if func_type.exception_check == '+': 
+                if exc_check:
+                    if self.nogil:
+                        exc_checks.append("__Pyx_ErrOccurredWithGIL()")
+                    else:
+                        exc_checks.append("PyErr_Occurred()")
+            if self.is_temp or exc_checks:
+                rhs = self.c_call_code()
+                if self.result():
+                    lhs = "%s = " % self.result()
+                    if self.is_temp and self.type.is_pyobject:
+                        #return_type = self.type # func_type.return_type
+                        #print "SimpleCallNode.generate_result_code: casting", rhs, \
+                        #    "from", return_type, "to pyobject" ###
+                        rhs = typecast(py_object_type, self.type, rhs)
+                else:
+                    lhs = ""
+                if func_type.exception_check == '+':
                     translate_cpp_exception(code, self.pos, '%s%s;' % (lhs, rhs),
                                             self.result() if self.type.is_pyobject else None,
                                             func_type.exception_value, self.nogil)
-                else: 
+                else:
                     if exc_checks:
-                        goto_error = code.error_goto_if(" && ".join(exc_checks), self.pos) 
-                    else: 
-                        goto_error = "" 
-                    code.putln("%s%s; %s" % (lhs, rhs, goto_error)) 
-                if self.type.is_pyobject and self.result(): 
-                    code.put_gotref(self.py_result()) 
-            if self.has_optional_args: 
-                code.funcstate.release_temp(self.opt_arg_struct) 
- 
- 
+                        goto_error = code.error_goto_if(" && ".join(exc_checks), self.pos)
+                    else:
+                        goto_error = ""
+                    code.putln("%s%s; %s" % (lhs, rhs, goto_error))
+                if self.type.is_pyobject and self.result():
+                    code.put_gotref(self.py_result())
+            if self.has_optional_args:
+                code.funcstate.release_temp(self.opt_arg_struct)
+
+
 class NumPyMethodCallNode(ExprNode):
     # Pythran call to a NumPy function or method.
     #
@@ -5999,103 +5999,103 @@ class NumPyMethodCallNode(ExprNode):
             ", ".join(a.pythran_result() for a in args)))
 
 
-class PyMethodCallNode(SimpleCallNode): 
-    # Specialised call to a (potential) PyMethodObject with non-constant argument tuple. 
-    # Allows the self argument to be injected directly instead of repacking a tuple for it. 
-    # 
-    # function    ExprNode      the function/method object to call 
-    # arg_tuple   TupleNode     the arguments for the args tuple 
- 
-    subexprs = ['function', 'arg_tuple'] 
-    is_temp = True 
- 
-    def generate_evaluation_code(self, code): 
-        code.mark_pos(self.pos) 
-        self.allocate_temp_result(code) 
- 
-        self.function.generate_evaluation_code(code) 
-        assert self.arg_tuple.mult_factor is None 
-        args = self.arg_tuple.args 
-        for arg in args: 
-            arg.generate_evaluation_code(code) 
- 
-        # make sure function is in temp so that we can replace the reference below if it's a method 
-        reuse_function_temp = self.function.is_temp 
-        if reuse_function_temp: 
-            function = self.function.result() 
-        else: 
-            function = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-            self.function.make_owned_reference(code) 
-            code.put("%s = %s; " % (function, self.function.py_result())) 
-            self.function.generate_disposal_code(code) 
-            self.function.free_temps(code) 
- 
-        self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-        code.putln("%s = NULL;" % self_arg) 
-        arg_offset_cname = None 
-        if len(args) > 1: 
+class PyMethodCallNode(SimpleCallNode):
+    # Specialised call to a (potential) PyMethodObject with non-constant argument tuple.
+    # Allows the self argument to be injected directly instead of repacking a tuple for it.
+    #
+    # function    ExprNode      the function/method object to call
+    # arg_tuple   TupleNode     the arguments for the args tuple
+
+    subexprs = ['function', 'arg_tuple']
+    is_temp = True
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        self.function.generate_evaluation_code(code)
+        assert self.arg_tuple.mult_factor is None
+        args = self.arg_tuple.args
+        for arg in args:
+            arg.generate_evaluation_code(code)
+
+        # make sure function is in temp so that we can replace the reference below if it's a method
+        reuse_function_temp = self.function.is_temp
+        if reuse_function_temp:
+            function = self.function.result()
+        else:
+            function = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            self.function.make_owned_reference(code)
+            code.put("%s = %s; " % (function, self.function.py_result()))
+            self.function.generate_disposal_code(code)
+            self.function.free_temps(code)
+
+        self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        code.putln("%s = NULL;" % self_arg)
+        arg_offset_cname = None
+        if len(args) > 1:
             arg_offset_cname = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
-            code.putln("%s = 0;" % arg_offset_cname) 
- 
-        def attribute_is_likely_method(attr): 
-            obj = attr.obj 
-            if obj.is_name and obj.entry.is_pyglobal: 
-                return False  # more likely to be a function 
-            return True 
- 
-        if self.function.is_attribute: 
-            likely_method = 'likely' if attribute_is_likely_method(self.function) else 'unlikely' 
-        elif self.function.is_name and self.function.cf_state: 
-            # not an attribute itself, but might have been assigned from one (e.g. bound method) 
-            for assignment in self.function.cf_state: 
-                value = assignment.rhs 
+            code.putln("%s = 0;" % arg_offset_cname)
+
+        def attribute_is_likely_method(attr):
+            obj = attr.obj
+            if obj.is_name and obj.entry.is_pyglobal:
+                return False  # more likely to be a function
+            return True
+
+        if self.function.is_attribute:
+            likely_method = 'likely' if attribute_is_likely_method(self.function) else 'unlikely'
+        elif self.function.is_name and self.function.cf_state:
+            # not an attribute itself, but might have been assigned from one (e.g. bound method)
+            for assignment in self.function.cf_state:
+                value = assignment.rhs
                 if value and value.is_attribute and value.obj.type and value.obj.type.is_pyobject:
-                    if attribute_is_likely_method(value): 
-                        likely_method = 'likely' 
-                        break 
-            else: 
-                likely_method = 'unlikely' 
-        else: 
-            likely_method = 'unlikely' 
- 
+                    if attribute_is_likely_method(value):
+                        likely_method = 'likely'
+                        break
+            else:
+                likely_method = 'unlikely'
+        else:
+            likely_method = 'unlikely'
+
         code.putln("if (CYTHON_UNPACK_METHODS && %s(PyMethod_Check(%s))) {" % (likely_method, function))
-        code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function)) 
-        # the following is always true in Py3 (kept only for safety), 
-        # but is false for unbound methods in Py2 
-        code.putln("if (likely(%s)) {" % self_arg) 
-        code.putln("PyObject* function = PyMethod_GET_FUNCTION(%s);" % function) 
-        code.put_incref(self_arg, py_object_type) 
-        code.put_incref("function", py_object_type) 
-        # free method object as early to possible to enable reuse from CPython's freelist 
-        code.put_decref_set(function, "function") 
-        if len(args) > 1: 
-            code.putln("%s = 1;" % arg_offset_cname) 
-        code.putln("}") 
-        code.putln("}") 
- 
-        if not args: 
-            # fastest special case: try to avoid tuple creation 
-            code.globalstate.use_utility_code( 
+        code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function))
+        # the following is always true in Py3 (kept only for safety),
+        # but is false for unbound methods in Py2
+        code.putln("if (likely(%s)) {" % self_arg)
+        code.putln("PyObject* function = PyMethod_GET_FUNCTION(%s);" % function)
+        code.put_incref(self_arg, py_object_type)
+        code.put_incref("function", py_object_type)
+        # free method object as early to possible to enable reuse from CPython's freelist
+        code.put_decref_set(function, "function")
+        if len(args) > 1:
+            code.putln("%s = 1;" % arg_offset_cname)
+        code.putln("}")
+        code.putln("}")
+
+        if not args:
+            # fastest special case: try to avoid tuple creation
+            code.globalstate.use_utility_code(
                 UtilityCode.load_cached("PyObjectCallNoArg", "ObjectHandling.c"))
             code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c")) 
-            code.putln( 
+                UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c"))
+            code.putln(
                 "%s = (%s) ? __Pyx_PyObject_CallOneArg(%s, %s) : __Pyx_PyObject_CallNoArg(%s);" % (
                     self.result(), self_arg,
-                    function, self_arg, 
+                    function, self_arg,
                     function))
             code.put_xdecref_clear(self_arg, py_object_type)
-            code.funcstate.release_temp(self_arg) 
+            code.funcstate.release_temp(self_arg)
             code.putln(code.error_goto_if_null(self.result(), self.pos))
             code.put_gotref(self.py_result())
         elif len(args) == 1:
             # fastest special case: try to avoid tuple creation
-            code.globalstate.use_utility_code( 
+            code.globalstate.use_utility_code(
                 UtilityCode.load_cached("PyObjectCall2Args", "ObjectHandling.c"))
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c"))
             arg = args[0]
-            code.putln( 
+            code.putln(
                 "%s = (%s) ? __Pyx_PyObject_Call2Args(%s, %s, %s) : __Pyx_PyObject_CallOneArg(%s, %s);" % (
                     self.result(), self_arg,
                     function, self_arg, arg.py_result(),
@@ -6105,8 +6105,8 @@ class PyMethodCallNode(SimpleCallNode):
             arg.generate_disposal_code(code)
             arg.free_temps(code)
             code.putln(code.error_goto_if_null(self.result(), self.pos))
-            code.put_gotref(self.py_result()) 
-        else: 
+            code.put_gotref(self.py_result())
+        else:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("PyFunctionFastCall", "ObjectHandling.c"))
             code.globalstate.use_utility_code(
@@ -6136,193 +6136,193 @@ class PyMethodCallNode(SimpleCallNode):
                 code.putln("#endif")
 
             code.putln("{")
-            args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-            code.putln("%s = PyTuple_New(%d+%s); %s" % ( 
+            args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            code.putln("%s = PyTuple_New(%d+%s); %s" % (
                 args_tuple, len(args), arg_offset_cname,
-                code.error_goto_if_null(args_tuple, self.pos))) 
-            code.put_gotref(args_tuple) 
- 
-            if len(args) > 1: 
-                code.putln("if (%s) {" % self_arg) 
+                code.error_goto_if_null(args_tuple, self.pos)))
+            code.put_gotref(args_tuple)
+
+            if len(args) > 1:
+                code.putln("if (%s) {" % self_arg)
             code.putln("__Pyx_GIVEREF(%s); PyTuple_SET_ITEM(%s, 0, %s); %s = NULL;" % (
                 self_arg, args_tuple, self_arg, self_arg))  # stealing owned ref in this case
-            code.funcstate.release_temp(self_arg) 
-            if len(args) > 1: 
-                code.putln("}") 
- 
-            for i, arg in enumerate(args): 
-                arg.make_owned_reference(code) 
+            code.funcstate.release_temp(self_arg)
+            if len(args) > 1:
+                code.putln("}")
+
+            for i, arg in enumerate(args):
+                arg.make_owned_reference(code)
                 code.put_giveref(arg.py_result())
-                code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % ( 
+                code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % (
                     args_tuple, i, arg_offset_cname, arg.py_result()))
-            if len(args) > 1: 
-                code.funcstate.release_temp(arg_offset_cname) 
- 
-            for arg in args: 
-                arg.generate_post_assignment_code(code) 
-                arg.free_temps(code) 
- 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c")) 
-            code.putln( 
-                "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % ( 
-                    self.result(), 
-                    function, args_tuple, 
-                    code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
- 
-            code.put_decref_clear(args_tuple, py_object_type) 
-            code.funcstate.release_temp(args_tuple) 
- 
-            if len(args) == 1: 
-                code.putln("}") 
+            if len(args) > 1:
+                code.funcstate.release_temp(arg_offset_cname)
+
+            for arg in args:
+                arg.generate_post_assignment_code(code)
+                arg.free_temps(code)
+
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c"))
+            code.putln(
+                "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
+                    self.result(),
+                    function, args_tuple,
+                    code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+
+            code.put_decref_clear(args_tuple, py_object_type)
+            code.funcstate.release_temp(args_tuple)
+
+            if len(args) == 1:
+                code.putln("}")
             code.putln("}")  # !CYTHON_FAST_PYCALL
- 
-        if reuse_function_temp: 
-            self.function.generate_disposal_code(code) 
-            self.function.free_temps(code) 
-        else: 
-            code.put_decref_clear(function, py_object_type) 
-            code.funcstate.release_temp(function) 
- 
- 
-class InlinedDefNodeCallNode(CallNode): 
-    #  Inline call to defnode 
-    # 
-    #  function       PyCFunctionNode 
-    #  function_name  NameNode 
-    #  args           [ExprNode] 
- 
-    subexprs = ['args', 'function_name'] 
-    is_temp = 1 
-    type = py_object_type 
-    function = None 
-    function_name = None 
- 
-    def can_be_inlined(self): 
-        func_type= self.function.def_node 
-        if func_type.star_arg or func_type.starstar_arg: 
-            return False 
-        if len(func_type.args) != len(self.args): 
-            return False 
+
+        if reuse_function_temp:
+            self.function.generate_disposal_code(code)
+            self.function.free_temps(code)
+        else:
+            code.put_decref_clear(function, py_object_type)
+            code.funcstate.release_temp(function)
+
+
+class InlinedDefNodeCallNode(CallNode):
+    #  Inline call to defnode
+    #
+    #  function       PyCFunctionNode
+    #  function_name  NameNode
+    #  args           [ExprNode]
+
+    subexprs = ['args', 'function_name']
+    is_temp = 1
+    type = py_object_type
+    function = None
+    function_name = None
+
+    def can_be_inlined(self):
+        func_type= self.function.def_node
+        if func_type.star_arg or func_type.starstar_arg:
+            return False
+        if len(func_type.args) != len(self.args):
+            return False
         if func_type.num_kwonly_args:
             return False  # actually wrong number of arguments
-        return True 
- 
-    def analyse_types(self, env): 
-        self.function_name = self.function_name.analyse_types(env) 
- 
-        self.args = [ arg.analyse_types(env) for arg in self.args ] 
-        func_type = self.function.def_node 
-        actual_nargs = len(self.args) 
- 
-        # Coerce arguments 
-        some_args_in_temps = False 
+        return True
+
+    def analyse_types(self, env):
+        self.function_name = self.function_name.analyse_types(env)
+
+        self.args = [ arg.analyse_types(env) for arg in self.args ]
+        func_type = self.function.def_node
+        actual_nargs = len(self.args)
+
+        # Coerce arguments
+        some_args_in_temps = False
         for i in range(actual_nargs):
-            formal_type = func_type.args[i].type 
-            arg = self.args[i].coerce_to(formal_type, env) 
-            if arg.is_temp: 
-                if i > 0: 
-                    # first argument in temp doesn't impact subsequent arguments 
-                    some_args_in_temps = True 
-            elif arg.type.is_pyobject and not env.nogil: 
-                if arg.nonlocally_immutable(): 
-                    # plain local variables are ok 
-                    pass 
-                else: 
-                    # we do not safely own the argument's reference, 
-                    # but we must make sure it cannot be collected 
-                    # before we return from the function, so we create 
-                    # an owned temp reference to it 
-                    if i > 0: # first argument doesn't matter 
-                        some_args_in_temps = True 
-                    arg = arg.coerce_to_temp(env) 
-            self.args[i] = arg 
- 
-        if some_args_in_temps: 
-            # if some args are temps and others are not, they may get 
-            # constructed in the wrong order (temps first) => make 
-            # sure they are either all temps or all not temps (except 
-            # for the last argument, which is evaluated last in any 
-            # case) 
+            formal_type = func_type.args[i].type
+            arg = self.args[i].coerce_to(formal_type, env)
+            if arg.is_temp:
+                if i > 0:
+                    # first argument in temp doesn't impact subsequent arguments
+                    some_args_in_temps = True
+            elif arg.type.is_pyobject and not env.nogil:
+                if arg.nonlocally_immutable():
+                    # plain local variables are ok
+                    pass
+                else:
+                    # we do not safely own the argument's reference,
+                    # but we must make sure it cannot be collected
+                    # before we return from the function, so we create
+                    # an owned temp reference to it
+                    if i > 0: # first argument doesn't matter
+                        some_args_in_temps = True
+                    arg = arg.coerce_to_temp(env)
+            self.args[i] = arg
+
+        if some_args_in_temps:
+            # if some args are temps and others are not, they may get
+            # constructed in the wrong order (temps first) => make
+            # sure they are either all temps or all not temps (except
+            # for the last argument, which is evaluated last in any
+            # case)
             for i in range(actual_nargs-1):
-                arg = self.args[i] 
-                if arg.nonlocally_immutable(): 
-                    # locals, C functions, unassignable types are safe. 
-                    pass 
-                elif arg.type.is_cpp_class: 
-                    # Assignment has side effects, avoid. 
-                    pass 
-                elif env.nogil and arg.type.is_pyobject: 
-                    # can't copy a Python reference into a temp in nogil 
-                    # env (this is safe: a construction would fail in 
-                    # nogil anyway) 
-                    pass 
-                else: 
-                    #self.args[i] = arg.coerce_to_temp(env) 
-                    # instead: issue a warning 
-                    if i > 0: 
-                        warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0) 
-                        break 
-        return self 
- 
-    def generate_result_code(self, code): 
-        arg_code = [self.function_name.py_result()] 
-        func_type = self.function.def_node 
-        for arg, proto_arg in zip(self.args, func_type.args): 
-            if arg.type.is_pyobject: 
-                arg_code.append(arg.result_as(proto_arg.type)) 
-            else: 
-                arg_code.append(arg.result()) 
-        arg_code = ', '.join(arg_code) 
-        code.putln( 
-            "%s = %s(%s); %s" % ( 
-                self.result(), 
-                self.function.def_node.entry.pyfunc_cname, 
-                arg_code, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class PythonCapiFunctionNode(ExprNode): 
-    subexprs = [] 
- 
-    def __init__(self, pos, py_name, cname, func_type, utility_code = None): 
-        ExprNode.__init__(self, pos, name=py_name, cname=cname, 
-                          type=func_type, utility_code=utility_code) 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def generate_result_code(self, code): 
-        if self.utility_code: 
-            code.globalstate.use_utility_code(self.utility_code) 
- 
-    def calculate_result_code(self): 
-        return self.cname 
- 
- 
-class PythonCapiCallNode(SimpleCallNode): 
-    # Python C-API Function call (only created in transforms) 
- 
-    # By default, we assume that the call never returns None, as this 
-    # is true for most C-API functions in CPython.  If this does not 
-    # apply to a call, set the following to True (or None to inherit 
-    # the default behaviour). 
-    may_return_none = False 
- 
-    def __init__(self, pos, function_name, func_type, 
-                 utility_code = None, py_name=None, **kwargs): 
-        self.type = func_type.return_type 
-        self.result_ctype = self.type 
-        self.function = PythonCapiFunctionNode( 
-            pos, py_name, function_name, func_type, 
-            utility_code = utility_code) 
-        # call this last so that we can override the constructed 
-        # attributes above with explicit keyword arguments if required 
-        SimpleCallNode.__init__(self, pos, **kwargs) 
- 
- 
+                arg = self.args[i]
+                if arg.nonlocally_immutable():
+                    # locals, C functions, unassignable types are safe.
+                    pass
+                elif arg.type.is_cpp_class:
+                    # Assignment has side effects, avoid.
+                    pass
+                elif env.nogil and arg.type.is_pyobject:
+                    # can't copy a Python reference into a temp in nogil
+                    # env (this is safe: a construction would fail in
+                    # nogil anyway)
+                    pass
+                else:
+                    #self.args[i] = arg.coerce_to_temp(env)
+                    # instead: issue a warning
+                    if i > 0:
+                        warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0)
+                        break
+        return self
+
+    def generate_result_code(self, code):
+        arg_code = [self.function_name.py_result()]
+        func_type = self.function.def_node
+        for arg, proto_arg in zip(self.args, func_type.args):
+            if arg.type.is_pyobject:
+                arg_code.append(arg.result_as(proto_arg.type))
+            else:
+                arg_code.append(arg.result())
+        arg_code = ', '.join(arg_code)
+        code.putln(
+            "%s = %s(%s); %s" % (
+                self.result(),
+                self.function.def_node.entry.pyfunc_cname,
+                arg_code,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class PythonCapiFunctionNode(ExprNode):
+    subexprs = []
+
+    def __init__(self, pos, py_name, cname, func_type, utility_code = None):
+        ExprNode.__init__(self, pos, name=py_name, cname=cname,
+                          type=func_type, utility_code=utility_code)
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        if self.utility_code:
+            code.globalstate.use_utility_code(self.utility_code)
+
+    def calculate_result_code(self):
+        return self.cname
+
+
+class PythonCapiCallNode(SimpleCallNode):
+    # Python C-API Function call (only created in transforms)
+
+    # By default, we assume that the call never returns None, as this
+    # is true for most C-API functions in CPython.  If this does not
+    # apply to a call, set the following to True (or None to inherit
+    # the default behaviour).
+    may_return_none = False
+
+    def __init__(self, pos, function_name, func_type,
+                 utility_code = None, py_name=None, **kwargs):
+        self.type = func_type.return_type
+        self.result_ctype = self.type
+        self.function = PythonCapiFunctionNode(
+            pos, py_name, function_name, func_type,
+            utility_code = utility_code)
+        # call this last so that we can override the constructed
+        # attributes above with explicit keyword arguments if required
+        SimpleCallNode.__init__(self, pos, **kwargs)
+
+
 class CachedBuiltinMethodCallNode(CallNode):
     # Python call to a method of a known Python builtin (only created in transforms)
 
@@ -6354,266 +6354,266 @@ class CachedBuiltinMethodCallNode(CallNode):
         code.put_gotref(self.result())
 
 
-class GeneralCallNode(CallNode): 
-    #  General Python function call, including keyword, 
-    #  * and ** arguments. 
-    # 
-    #  function         ExprNode 
-    #  positional_args  ExprNode          Tuple of positional arguments 
-    #  keyword_args     ExprNode or None  Dict of keyword arguments 
- 
-    type = py_object_type 
- 
-    subexprs = ['function', 'positional_args', 'keyword_args'] 
- 
-    nogil_check = Node.gil_error 
- 
-    def compile_time_value(self, denv): 
-        function = self.function.compile_time_value(denv) 
-        positional_args = self.positional_args.compile_time_value(denv) 
-        keyword_args = self.keyword_args.compile_time_value(denv) 
-        try: 
-            return function(*positional_args, **keyword_args) 
+class GeneralCallNode(CallNode):
+    #  General Python function call, including keyword,
+    #  * and ** arguments.
+    #
+    #  function         ExprNode
+    #  positional_args  ExprNode          Tuple of positional arguments
+    #  keyword_args     ExprNode or None  Dict of keyword arguments
+
+    type = py_object_type
+
+    subexprs = ['function', 'positional_args', 'keyword_args']
+
+    nogil_check = Node.gil_error
+
+    def compile_time_value(self, denv):
+        function = self.function.compile_time_value(denv)
+        positional_args = self.positional_args.compile_time_value(denv)
+        keyword_args = self.keyword_args.compile_time_value(denv)
+        try:
+            return function(*positional_args, **keyword_args)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def explicit_args_kwds(self): 
+            self.compile_time_value_error(e)
+
+    def explicit_args_kwds(self):
         if (self.keyword_args and not self.keyword_args.is_dict_literal or
                 not self.positional_args.is_sequence_constructor):
-            raise CompileError(self.pos, 
-                'Compile-time keyword arguments must be explicit.') 
-        return self.positional_args.args, self.keyword_args 
- 
-    def analyse_types(self, env): 
-        if self.analyse_as_type_constructor(env): 
-            return self 
-        self.function = self.function.analyse_types(env) 
-        if not self.function.type.is_pyobject: 
-            if self.function.type.is_error: 
-                self.type = error_type 
-                return self 
-            if hasattr(self.function, 'entry'): 
-                node = self.map_to_simple_call_node() 
-                if node is not None and node is not self: 
-                    return node.analyse_types(env) 
-                elif self.function.entry.as_variable: 
-                    self.function = self.function.coerce_to_pyobject(env) 
-                elif node is self: 
-                    error(self.pos, 
-                          "Non-trivial keyword arguments and starred " 
-                          "arguments not allowed in cdef functions.") 
-                else: 
-                    # error was already reported 
-                    pass 
-            else: 
-                self.function = self.function.coerce_to_pyobject(env) 
-        if self.keyword_args: 
-            self.keyword_args = self.keyword_args.analyse_types(env) 
-        self.positional_args = self.positional_args.analyse_types(env) 
-        self.positional_args = \ 
-            self.positional_args.coerce_to_pyobject(env) 
+            raise CompileError(self.pos,
+                'Compile-time keyword arguments must be explicit.')
+        return self.positional_args.args, self.keyword_args
+
+    def analyse_types(self, env):
+        if self.analyse_as_type_constructor(env):
+            return self
+        self.function = self.function.analyse_types(env)
+        if not self.function.type.is_pyobject:
+            if self.function.type.is_error:
+                self.type = error_type
+                return self
+            if hasattr(self.function, 'entry'):
+                node = self.map_to_simple_call_node()
+                if node is not None and node is not self:
+                    return node.analyse_types(env)
+                elif self.function.entry.as_variable:
+                    self.function = self.function.coerce_to_pyobject(env)
+                elif node is self:
+                    error(self.pos,
+                          "Non-trivial keyword arguments and starred "
+                          "arguments not allowed in cdef functions.")
+                else:
+                    # error was already reported
+                    pass
+            else:
+                self.function = self.function.coerce_to_pyobject(env)
+        if self.keyword_args:
+            self.keyword_args = self.keyword_args.analyse_types(env)
+        self.positional_args = self.positional_args.analyse_types(env)
+        self.positional_args = \
+            self.positional_args.coerce_to_pyobject(env)
         self.set_py_result_type(self.function)
-        self.is_temp = 1 
-        return self 
- 
-    def map_to_simple_call_node(self): 
-        """ 
-        Tries to map keyword arguments to declared positional arguments. 
-        Returns self to try a Python call, None to report an error 
-        or a SimpleCallNode if the mapping succeeds. 
-        """ 
-        if not isinstance(self.positional_args, TupleNode): 
-            # has starred argument 
-            return self 
+        self.is_temp = 1
+        return self
+
+    def map_to_simple_call_node(self):
+        """
+        Tries to map keyword arguments to declared positional arguments.
+        Returns self to try a Python call, None to report an error
+        or a SimpleCallNode if the mapping succeeds.
+        """
+        if not isinstance(self.positional_args, TupleNode):
+            # has starred argument
+            return self
         if not self.keyword_args.is_dict_literal:
-            # keywords come from arbitrary expression => nothing to do here 
-            return self 
-        function = self.function 
-        entry = getattr(function, 'entry', None) 
-        if not entry: 
-            return self 
-        function_type = entry.type 
-        if function_type.is_ptr: 
-            function_type = function_type.base_type 
-        if not function_type.is_cfunction: 
-            return self 
- 
-        pos_args = self.positional_args.args 
-        kwargs = self.keyword_args 
-        declared_args = function_type.args 
-        if entry.is_cmethod: 
-            declared_args = declared_args[1:] # skip 'self' 
- 
-        if len(pos_args) > len(declared_args): 
-            error(self.pos, "function call got too many positional arguments, " 
-                            "expected %d, got %s" % (len(declared_args), 
-                                                     len(pos_args))) 
-            return None 
- 
-        matched_args = set([ arg.name for arg in declared_args[:len(pos_args)] 
-                             if arg.name ]) 
-        unmatched_args = declared_args[len(pos_args):] 
-        matched_kwargs_count = 0 
-        args = list(pos_args) 
- 
-        # check for duplicate keywords 
-        seen = set(matched_args) 
-        has_errors = False 
-        for arg in kwargs.key_value_pairs: 
-            name = arg.key.value 
-            if name in seen: 
-                error(arg.pos, "argument '%s' passed twice" % name) 
-                has_errors = True 
-                # continue to report more errors if there are any 
-            seen.add(name) 
- 
-        # match keywords that are passed in order 
-        for decl_arg, arg in zip(unmatched_args, kwargs.key_value_pairs): 
-            name = arg.key.value 
-            if decl_arg.name == name: 
-                matched_args.add(name) 
-                matched_kwargs_count += 1 
-                args.append(arg.value) 
-            else: 
-                break 
- 
-        # match keyword arguments that are passed out-of-order, but keep 
-        # the evaluation of non-simple arguments in order by moving them 
-        # into temps 
-        from .UtilNodes import EvalWithTempExprNode, LetRefNode 
-        temps = [] 
-        if len(kwargs.key_value_pairs) > matched_kwargs_count: 
-            unmatched_args = declared_args[len(args):] 
-            keywords = dict([ (arg.key.value, (i+len(pos_args), arg)) 
-                              for i, arg in enumerate(kwargs.key_value_pairs) ]) 
-            first_missing_keyword = None 
-            for decl_arg in unmatched_args: 
-                name = decl_arg.name 
-                if name not in keywords: 
-                    # missing keyword argument => either done or error 
-                    if not first_missing_keyword: 
-                        first_missing_keyword = name 
-                    continue 
-                elif first_missing_keyword: 
-                    if entry.as_variable: 
-                        # we might be able to convert the function to a Python 
-                        # object, which then allows full calling semantics 
-                        # with default values in gaps - currently, we only 
-                        # support optional arguments at the end 
-                        return self 
-                    # wasn't the last keyword => gaps are not supported 
-                    error(self.pos, "C function call is missing " 
-                                    "argument '%s'" % first_missing_keyword) 
-                    return None 
-                pos, arg = keywords[name] 
-                matched_args.add(name) 
-                matched_kwargs_count += 1 
-                if arg.value.is_simple(): 
-                    args.append(arg.value) 
-                else: 
-                    temp = LetRefNode(arg.value) 
-                    assert temp.is_simple() 
-                    args.append(temp) 
-                    temps.append((pos, temp)) 
- 
-            if temps: 
-                # may have to move preceding non-simple args into temps 
-                final_args = [] 
-                new_temps = [] 
-                first_temp_arg = temps[0][-1] 
-                for arg_value in args: 
-                    if arg_value is first_temp_arg: 
-                        break  # done 
-                    if arg_value.is_simple(): 
-                        final_args.append(arg_value) 
-                    else: 
-                        temp = LetRefNode(arg_value) 
-                        new_temps.append(temp) 
-                        final_args.append(temp) 
-                if new_temps: 
-                    args = final_args 
-                temps = new_temps + [ arg for i,arg in sorted(temps) ] 
- 
-        # check for unexpected keywords 
-        for arg in kwargs.key_value_pairs: 
-            name = arg.key.value 
-            if name not in matched_args: 
-                has_errors = True 
-                error(arg.pos, 
-                      "C function got unexpected keyword argument '%s'" % 
-                      name) 
- 
-        if has_errors: 
-            # error was reported already 
-            return None 
- 
-        # all keywords mapped to positional arguments 
-        # if we are missing arguments, SimpleCallNode will figure it out 
-        node = SimpleCallNode(self.pos, function=function, args=args) 
-        for temp in temps[::-1]: 
-            node = EvalWithTempExprNode(temp, node) 
-        return node 
- 
-    def generate_result_code(self, code): 
-        if self.type.is_error: return 
-        if self.keyword_args: 
-            kwargs = self.keyword_args.py_result() 
-        else: 
-            kwargs = 'NULL' 
-        code.globalstate.use_utility_code(UtilityCode.load_cached( 
-            "PyObjectCall", "ObjectHandling.c")) 
-        code.putln( 
-            "%s = __Pyx_PyObject_Call(%s, %s, %s); %s" % ( 
-                self.result(), 
-                self.function.py_result(), 
-                self.positional_args.py_result(), 
-                kwargs, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class AsTupleNode(ExprNode): 
-    #  Convert argument to tuple. Used for normalising 
-    #  the * argument of a function call. 
-    # 
-    #  arg    ExprNode 
- 
-    subexprs = ['arg'] 
+            # keywords come from arbitrary expression => nothing to do here
+            return self
+        function = self.function
+        entry = getattr(function, 'entry', None)
+        if not entry:
+            return self
+        function_type = entry.type
+        if function_type.is_ptr:
+            function_type = function_type.base_type
+        if not function_type.is_cfunction:
+            return self
+
+        pos_args = self.positional_args.args
+        kwargs = self.keyword_args
+        declared_args = function_type.args
+        if entry.is_cmethod:
+            declared_args = declared_args[1:] # skip 'self'
+
+        if len(pos_args) > len(declared_args):
+            error(self.pos, "function call got too many positional arguments, "
+                            "expected %d, got %s" % (len(declared_args),
+                                                     len(pos_args)))
+            return None
+
+        matched_args = set([ arg.name for arg in declared_args[:len(pos_args)]
+                             if arg.name ])
+        unmatched_args = declared_args[len(pos_args):]
+        matched_kwargs_count = 0
+        args = list(pos_args)
+
+        # check for duplicate keywords
+        seen = set(matched_args)
+        has_errors = False
+        for arg in kwargs.key_value_pairs:
+            name = arg.key.value
+            if name in seen:
+                error(arg.pos, "argument '%s' passed twice" % name)
+                has_errors = True
+                # continue to report more errors if there are any
+            seen.add(name)
+
+        # match keywords that are passed in order
+        for decl_arg, arg in zip(unmatched_args, kwargs.key_value_pairs):
+            name = arg.key.value
+            if decl_arg.name == name:
+                matched_args.add(name)
+                matched_kwargs_count += 1
+                args.append(arg.value)
+            else:
+                break
+
+        # match keyword arguments that are passed out-of-order, but keep
+        # the evaluation of non-simple arguments in order by moving them
+        # into temps
+        from .UtilNodes import EvalWithTempExprNode, LetRefNode
+        temps = []
+        if len(kwargs.key_value_pairs) > matched_kwargs_count:
+            unmatched_args = declared_args[len(args):]
+            keywords = dict([ (arg.key.value, (i+len(pos_args), arg))
+                              for i, arg in enumerate(kwargs.key_value_pairs) ])
+            first_missing_keyword = None
+            for decl_arg in unmatched_args:
+                name = decl_arg.name
+                if name not in keywords:
+                    # missing keyword argument => either done or error
+                    if not first_missing_keyword:
+                        first_missing_keyword = name
+                    continue
+                elif first_missing_keyword:
+                    if entry.as_variable:
+                        # we might be able to convert the function to a Python
+                        # object, which then allows full calling semantics
+                        # with default values in gaps - currently, we only
+                        # support optional arguments at the end
+                        return self
+                    # wasn't the last keyword => gaps are not supported
+                    error(self.pos, "C function call is missing "
+                                    "argument '%s'" % first_missing_keyword)
+                    return None
+                pos, arg = keywords[name]
+                matched_args.add(name)
+                matched_kwargs_count += 1
+                if arg.value.is_simple():
+                    args.append(arg.value)
+                else:
+                    temp = LetRefNode(arg.value)
+                    assert temp.is_simple()
+                    args.append(temp)
+                    temps.append((pos, temp))
+
+            if temps:
+                # may have to move preceding non-simple args into temps
+                final_args = []
+                new_temps = []
+                first_temp_arg = temps[0][-1]
+                for arg_value in args:
+                    if arg_value is first_temp_arg:
+                        break  # done
+                    if arg_value.is_simple():
+                        final_args.append(arg_value)
+                    else:
+                        temp = LetRefNode(arg_value)
+                        new_temps.append(temp)
+                        final_args.append(temp)
+                if new_temps:
+                    args = final_args
+                temps = new_temps + [ arg for i,arg in sorted(temps) ]
+
+        # check for unexpected keywords
+        for arg in kwargs.key_value_pairs:
+            name = arg.key.value
+            if name not in matched_args:
+                has_errors = True
+                error(arg.pos,
+                      "C function got unexpected keyword argument '%s'" %
+                      name)
+
+        if has_errors:
+            # error was reported already
+            return None
+
+        # all keywords mapped to positional arguments
+        # if we are missing arguments, SimpleCallNode will figure it out
+        node = SimpleCallNode(self.pos, function=function, args=args)
+        for temp in temps[::-1]:
+            node = EvalWithTempExprNode(temp, node)
+        return node
+
+    def generate_result_code(self, code):
+        if self.type.is_error: return
+        if self.keyword_args:
+            kwargs = self.keyword_args.py_result()
+        else:
+            kwargs = 'NULL'
+        code.globalstate.use_utility_code(UtilityCode.load_cached(
+            "PyObjectCall", "ObjectHandling.c"))
+        code.putln(
+            "%s = __Pyx_PyObject_Call(%s, %s, %s); %s" % (
+                self.result(),
+                self.function.py_result(),
+                self.positional_args.py_result(),
+                kwargs,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class AsTupleNode(ExprNode):
+    #  Convert argument to tuple. Used for normalising
+    #  the * argument of a function call.
+    #
+    #  arg    ExprNode
+
+    subexprs = ['arg']
     is_temp = 1
- 
-    def calculate_constant_result(self): 
-        self.constant_result = tuple(self.arg.constant_result) 
- 
-    def compile_time_value(self, denv): 
-        arg = self.arg.compile_time_value(denv) 
-        try: 
-            return tuple(arg) 
+
+    def calculate_constant_result(self):
+        self.constant_result = tuple(self.arg.constant_result)
+
+    def compile_time_value(self, denv):
+        arg = self.arg.compile_time_value(denv)
+        try:
+            return tuple(arg)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def analyse_types(self, env): 
+            self.compile_time_value_error(e)
+
+    def analyse_types(self, env):
         self.arg = self.arg.analyse_types(env).coerce_to_pyobject(env)
         if self.arg.type is tuple_type:
             return self.arg.as_none_safe_node("'NoneType' object is not iterable")
-        self.type = tuple_type 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Constructing Python tuple" 
- 
-    def generate_result_code(self, code): 
+        self.type = tuple_type
+        return self
+
+    def may_be_none(self):
+        return False
+
+    nogil_check = Node.gil_error
+    gil_message = "Constructing Python tuple"
+
+    def generate_result_code(self, code):
         cfunc = "__Pyx_PySequence_Tuple" if self.arg.type in (py_object_type, tuple_type) else "PySequence_Tuple"
-        code.putln( 
+        code.putln(
             "%s = %s(%s); %s" % (
-                self.result(), 
+                self.result(),
                 cfunc, self.arg.py_result(),
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
 class MergedDictNode(ExprNode):
     #  Helper class for keyword arguments and other merged dicts.
     #
@@ -6767,152 +6767,152 @@ class MergedDictNode(ExprNode):
             item.annotate(code)
 
 
-class AttributeNode(ExprNode): 
-    #  obj.attribute 
-    # 
-    #  obj          ExprNode 
-    #  attribute    string 
-    #  needs_none_check boolean        Used if obj is an extension type. 
-    #                                  If set to True, it is known that the type is not None. 
-    # 
-    #  Used internally: 
-    # 
-    #  is_py_attr           boolean   Is a Python getattr operation 
-    #  member               string    C name of struct member 
-    #  is_called            boolean   Function call is being done on result 
-    #  entry                Entry     Symbol table entry of attribute 
- 
-    is_attribute = 1 
-    subexprs = ['obj'] 
- 
-    type = PyrexTypes.error_type 
-    entry = None 
-    is_called = 0 
-    needs_none_check = True 
-    is_memslice_transpose = False 
-    is_special_lookup = False 
+class AttributeNode(ExprNode):
+    #  obj.attribute
+    #
+    #  obj          ExprNode
+    #  attribute    string
+    #  needs_none_check boolean        Used if obj is an extension type.
+    #                                  If set to True, it is known that the type is not None.
+    #
+    #  Used internally:
+    #
+    #  is_py_attr           boolean   Is a Python getattr operation
+    #  member               string    C name of struct member
+    #  is_called            boolean   Function call is being done on result
+    #  entry                Entry     Symbol table entry of attribute
+
+    is_attribute = 1
+    subexprs = ['obj']
+
+    type = PyrexTypes.error_type
+    entry = None
+    is_called = 0
+    needs_none_check = True
+    is_memslice_transpose = False
+    is_special_lookup = False
     is_py_attr = 0
- 
-    def as_cython_attribute(self): 
-        if (isinstance(self.obj, NameNode) and 
-                self.obj.is_cython_module and not 
-                self.attribute == u"parallel"): 
-            return self.attribute 
- 
-        cy = self.obj.as_cython_attribute() 
-        if cy: 
-            return "%s.%s" % (cy, self.attribute) 
-        return None 
- 
-    def coerce_to(self, dst_type, env): 
-        #  If coercing to a generic pyobject and this is a cpdef function 
-        #  we can create the corresponding attribute 
-        if dst_type is py_object_type: 
-            entry = self.entry 
-            if entry and entry.is_cfunction and entry.as_variable: 
-                # must be a cpdef function 
-                self.is_temp = 1 
-                self.entry = entry.as_variable 
-                self.analyse_as_python_attribute(env) 
-                return self 
-        return ExprNode.coerce_to(self, dst_type, env) 
- 
-    def calculate_constant_result(self): 
-        attr = self.attribute 
-        if attr.startswith("__") and attr.endswith("__"): 
-            return 
-        self.constant_result = getattr(self.obj.constant_result, attr) 
- 
-    def compile_time_value(self, denv): 
-        attr = self.attribute 
-        if attr.startswith("__") and attr.endswith("__"): 
-            error(self.pos, 
-                  "Invalid attribute name '%s' in compile-time expression" % attr) 
-            return None 
-        obj = self.obj.compile_time_value(denv) 
-        try: 
-            return getattr(obj, attr) 
+
+    def as_cython_attribute(self):
+        if (isinstance(self.obj, NameNode) and
+                self.obj.is_cython_module and not
+                self.attribute == u"parallel"):
+            return self.attribute
+
+        cy = self.obj.as_cython_attribute()
+        if cy:
+            return "%s.%s" % (cy, self.attribute)
+        return None
+
+    def coerce_to(self, dst_type, env):
+        #  If coercing to a generic pyobject and this is a cpdef function
+        #  we can create the corresponding attribute
+        if dst_type is py_object_type:
+            entry = self.entry
+            if entry and entry.is_cfunction and entry.as_variable:
+                # must be a cpdef function
+                self.is_temp = 1
+                self.entry = entry.as_variable
+                self.analyse_as_python_attribute(env)
+                return self
+        return ExprNode.coerce_to(self, dst_type, env)
+
+    def calculate_constant_result(self):
+        attr = self.attribute
+        if attr.startswith("__") and attr.endswith("__"):
+            return
+        self.constant_result = getattr(self.obj.constant_result, attr)
+
+    def compile_time_value(self, denv):
+        attr = self.attribute
+        if attr.startswith("__") and attr.endswith("__"):
+            error(self.pos,
+                  "Invalid attribute name '%s' in compile-time expression" % attr)
+            return None
+        obj = self.obj.compile_time_value(denv)
+        try:
+            return getattr(obj, attr)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def type_dependencies(self, env): 
-        return self.obj.type_dependencies(env) 
- 
-    def infer_type(self, env): 
-        # FIXME: this is way too redundant with analyse_types() 
-        node = self.analyse_as_cimported_attribute_node(env, target=False) 
-        if node is not None: 
+            self.compile_time_value_error(e)
+
+    def type_dependencies(self, env):
+        return self.obj.type_dependencies(env)
+
+    def infer_type(self, env):
+        # FIXME: this is way too redundant with analyse_types()
+        node = self.analyse_as_cimported_attribute_node(env, target=False)
+        if node is not None:
             if node.entry.type and node.entry.type.is_cfunction:
                 # special-case - function converted to pointer
                 return PyrexTypes.CPtrType(node.entry.type)
             else:
                 return node.entry.type
         node = self.analyse_as_type_attribute(env)
-        if node is not None: 
-            return node.entry.type 
-        obj_type = self.obj.infer_type(env) 
-        self.analyse_attribute(env, obj_type=obj_type) 
-        if obj_type.is_builtin_type and self.type.is_cfunction: 
-            # special case: C-API replacements for C methods of 
-            # builtin types cannot be inferred as C functions as 
-            # that would prevent their use as bound methods 
-            return py_object_type 
+        if node is not None:
+            return node.entry.type
+        obj_type = self.obj.infer_type(env)
+        self.analyse_attribute(env, obj_type=obj_type)
+        if obj_type.is_builtin_type and self.type.is_cfunction:
+            # special case: C-API replacements for C methods of
+            # builtin types cannot be inferred as C functions as
+            # that would prevent their use as bound methods
+            return py_object_type
         elif self.entry and self.entry.is_cmethod:
             # special case: bound methods should not be inferred
             # as their unbound method types
             return py_object_type
-        return self.type 
- 
-    def analyse_target_declaration(self, env): 
-        pass 
- 
-    def analyse_target_types(self, env): 
-        node = self.analyse_types(env, target = 1) 
-        if node.type.is_const: 
-            error(self.pos, "Assignment to const attribute '%s'" % self.attribute) 
-        if not node.is_lvalue(): 
-            error(self.pos, "Assignment to non-lvalue of type '%s'" % self.type) 
-        return node 
- 
-    def analyse_types(self, env, target = 0): 
-        self.initialized_check = env.directives['initializedcheck'] 
-        node = self.analyse_as_cimported_attribute_node(env, target) 
-        if node is None and not target: 
+        return self.type
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def analyse_target_types(self, env):
+        node = self.analyse_types(env, target = 1)
+        if node.type.is_const:
+            error(self.pos, "Assignment to const attribute '%s'" % self.attribute)
+        if not node.is_lvalue():
+            error(self.pos, "Assignment to non-lvalue of type '%s'" % self.type)
+        return node
+
+    def analyse_types(self, env, target = 0):
+        self.initialized_check = env.directives['initializedcheck']
+        node = self.analyse_as_cimported_attribute_node(env, target)
+        if node is None and not target:
             node = self.analyse_as_type_attribute(env)
-        if node is None: 
-            node = self.analyse_as_ordinary_attribute_node(env, target) 
-            assert node is not None 
-        if node.entry: 
-            node.entry.used = True 
-        if node.is_attribute: 
-            node.wrap_obj_in_nonecheck(env) 
-        return node 
- 
-    def analyse_as_cimported_attribute_node(self, env, target): 
-        # Try to interpret this as a reference to an imported 
-        # C const, type, var or function. If successful, mutates 
-        # this node into a NameNode and returns 1, otherwise 
-        # returns 0. 
-        module_scope = self.obj.analyse_as_module(env) 
-        if module_scope: 
-            entry = module_scope.lookup_here(self.attribute) 
-            if entry and ( 
-                    entry.is_cglobal or entry.is_cfunction 
-                    or entry.is_type or entry.is_const): 
-                return self.as_name_node(env, entry, target) 
+        if node is None:
+            node = self.analyse_as_ordinary_attribute_node(env, target)
+            assert node is not None
+        if node.entry:
+            node.entry.used = True
+        if node.is_attribute:
+            node.wrap_obj_in_nonecheck(env)
+        return node
+
+    def analyse_as_cimported_attribute_node(self, env, target):
+        # Try to interpret this as a reference to an imported
+        # C const, type, var or function. If successful, mutates
+        # this node into a NameNode and returns 1, otherwise
+        # returns 0.
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            entry = module_scope.lookup_here(self.attribute)
+            if entry and (
+                    entry.is_cglobal or entry.is_cfunction
+                    or entry.is_type or entry.is_const):
+                return self.as_name_node(env, entry, target)
             if self.is_cimported_module_without_shadow(env):
                 error(self.pos, "cimported module has no attribute '%s'" % self.attribute)
                 return self
-        return None 
- 
+        return None
+
     def analyse_as_type_attribute(self, env):
-        # Try to interpret this as a reference to an unbound 
-        # C method of an extension type or builtin type.  If successful, 
-        # creates a corresponding NameNode and returns it, otherwise 
-        # returns None. 
-        if self.obj.is_string_literal: 
-            return 
-        type = self.obj.analyse_as_type(env) 
+        # Try to interpret this as a reference to an unbound
+        # C method of an extension type or builtin type.  If successful,
+        # creates a corresponding NameNode and returns it, otherwise
+        # returns None.
+        if self.obj.is_string_literal:
+            return
+        type = self.obj.analyse_as_type(env)
         if type:
             if type.is_extension_type or type.is_builtin_type or type.is_cpp_class:
                 entry = type.scope.lookup_here(self.attribute)
@@ -6940,7 +6940,7 @@ class AttributeNode(ExprNode):
                                 ctype.args[0] = PyrexTypes.CFuncTypeArg('self', type, 'self', None)
                         else:
                             cname = "%s->%s" % (type.vtabptr_cname, entry.cname)
-                            ctype = entry.type 
+                            ctype = entry.type
                         ubcm_entry = Symtab.Entry(entry.name, cname, ctype)
                         ubcm_entry.is_cfunction = 1
                         ubcm_entry.func_cname = entry.func_cname
@@ -6952,489 +6952,489 @@ class AttributeNode(ExprNode):
                     for entry in type.entry.enum_values:
                         if entry.name == self.attribute:
                             return self.as_name_node(env, entry, target=False)
-                    else: 
+                    else:
                         error(self.pos, "%s not a known value of %s" % (self.attribute, type))
                 else:
                     error(self.pos, "%s not a known value of %s" % (self.attribute, type))
-        return None 
- 
-    def analyse_as_type(self, env): 
-        module_scope = self.obj.analyse_as_module(env) 
-        if module_scope: 
-            return module_scope.lookup_type(self.attribute) 
-        if not self.obj.is_string_literal: 
-            base_type = self.obj.analyse_as_type(env) 
-            if base_type and hasattr(base_type, 'scope') and base_type.scope is not None: 
-                return base_type.scope.lookup_type(self.attribute) 
-        return None 
- 
-    def analyse_as_extension_type(self, env): 
-        # Try to interpret this as a reference to an extension type 
-        # in a cimported module. Returns the extension type, or None. 
-        module_scope = self.obj.analyse_as_module(env) 
-        if module_scope: 
-            entry = module_scope.lookup_here(self.attribute) 
-            if entry and entry.is_type: 
-                if entry.type.is_extension_type or entry.type.is_builtin_type: 
-                    return entry.type 
-        return None 
- 
-    def analyse_as_module(self, env): 
-        # Try to interpret this as a reference to a cimported module 
-        # in another cimported module. Returns the module scope, or None. 
-        module_scope = self.obj.analyse_as_module(env) 
-        if module_scope: 
-            entry = module_scope.lookup_here(self.attribute) 
-            if entry and entry.as_module: 
-                return entry.as_module 
-        return None 
- 
-    def as_name_node(self, env, entry, target): 
-        # Create a corresponding NameNode from this node and complete the 
-        # analyse_types phase. 
-        node = NameNode.from_node(self, name=self.attribute, entry=entry) 
-        if target: 
-            node = node.analyse_target_types(env) 
-        else: 
-            node = node.analyse_rvalue_entry(env) 
-        node.entry.used = 1 
-        return node 
- 
-    def analyse_as_ordinary_attribute_node(self, env, target): 
-        self.obj = self.obj.analyse_types(env) 
-        self.analyse_attribute(env) 
-        if self.entry and self.entry.is_cmethod and not self.is_called: 
-#            error(self.pos, "C method can only be called") 
-            pass 
-        ## Reference to C array turns into pointer to first element. 
-        #while self.type.is_array: 
-        #    self.type = self.type.element_ptr_type() 
-        if self.is_py_attr: 
-            if not target: 
-                self.is_temp = 1 
-                self.result_ctype = py_object_type 
-        elif target and self.obj.type.is_builtin_type: 
-            error(self.pos, "Assignment to an immutable object field") 
-        #elif self.type.is_memoryviewslice and not target: 
-        #    self.is_temp = True 
-        return self 
- 
-    def analyse_attribute(self, env, obj_type = None): 
-        # Look up attribute and set self.type and self.member. 
-        immutable_obj = obj_type is not None # used during type inference 
-        self.is_py_attr = 0 
-        self.member = self.attribute 
-        if obj_type is None: 
-            if self.obj.type.is_string or self.obj.type.is_pyunicode_ptr: 
-                self.obj = self.obj.coerce_to_pyobject(env) 
-            obj_type = self.obj.type 
-        else: 
-            if obj_type.is_string or obj_type.is_pyunicode_ptr: 
-                obj_type = py_object_type 
-        if obj_type.is_ptr or obj_type.is_array: 
-            obj_type = obj_type.base_type 
-            self.op = "->" 
-        elif obj_type.is_extension_type or obj_type.is_builtin_type: 
-            self.op = "->" 
+        return None
+
+    def analyse_as_type(self, env):
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            return module_scope.lookup_type(self.attribute)
+        if not self.obj.is_string_literal:
+            base_type = self.obj.analyse_as_type(env)
+            if base_type and hasattr(base_type, 'scope') and base_type.scope is not None:
+                return base_type.scope.lookup_type(self.attribute)
+        return None
+
+    def analyse_as_extension_type(self, env):
+        # Try to interpret this as a reference to an extension type
+        # in a cimported module. Returns the extension type, or None.
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            entry = module_scope.lookup_here(self.attribute)
+            if entry and entry.is_type:
+                if entry.type.is_extension_type or entry.type.is_builtin_type:
+                    return entry.type
+        return None
+
+    def analyse_as_module(self, env):
+        # Try to interpret this as a reference to a cimported module
+        # in another cimported module. Returns the module scope, or None.
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            entry = module_scope.lookup_here(self.attribute)
+            if entry and entry.as_module:
+                return entry.as_module
+        return None
+
+    def as_name_node(self, env, entry, target):
+        # Create a corresponding NameNode from this node and complete the
+        # analyse_types phase.
+        node = NameNode.from_node(self, name=self.attribute, entry=entry)
+        if target:
+            node = node.analyse_target_types(env)
+        else:
+            node = node.analyse_rvalue_entry(env)
+        node.entry.used = 1
+        return node
+
+    def analyse_as_ordinary_attribute_node(self, env, target):
+        self.obj = self.obj.analyse_types(env)
+        self.analyse_attribute(env)
+        if self.entry and self.entry.is_cmethod and not self.is_called:
+#            error(self.pos, "C method can only be called")
+            pass
+        ## Reference to C array turns into pointer to first element.
+        #while self.type.is_array:
+        #    self.type = self.type.element_ptr_type()
+        if self.is_py_attr:
+            if not target:
+                self.is_temp = 1
+                self.result_ctype = py_object_type
+        elif target and self.obj.type.is_builtin_type:
+            error(self.pos, "Assignment to an immutable object field")
+        #elif self.type.is_memoryviewslice and not target:
+        #    self.is_temp = True
+        return self
+
+    def analyse_attribute(self, env, obj_type = None):
+        # Look up attribute and set self.type and self.member.
+        immutable_obj = obj_type is not None # used during type inference
+        self.is_py_attr = 0
+        self.member = self.attribute
+        if obj_type is None:
+            if self.obj.type.is_string or self.obj.type.is_pyunicode_ptr:
+                self.obj = self.obj.coerce_to_pyobject(env)
+            obj_type = self.obj.type
+        else:
+            if obj_type.is_string or obj_type.is_pyunicode_ptr:
+                obj_type = py_object_type
+        if obj_type.is_ptr or obj_type.is_array:
+            obj_type = obj_type.base_type
+            self.op = "->"
+        elif obj_type.is_extension_type or obj_type.is_builtin_type:
+            self.op = "->"
         elif obj_type.is_reference and obj_type.is_fake_reference:
             self.op = "->"
-        else: 
-            self.op = "." 
-        if obj_type.has_attributes: 
-            if obj_type.attributes_known(): 
+        else:
+            self.op = "."
+        if obj_type.has_attributes:
+            if obj_type.attributes_known():
                 entry = obj_type.scope.lookup_here(self.attribute)
                 if obj_type.is_memoryviewslice and not entry:
-                    if self.attribute == 'T': 
-                        self.is_memslice_transpose = True 
-                        self.is_temp = True 
-                        self.use_managed_ref = True 
+                    if self.attribute == 'T':
+                        self.is_memslice_transpose = True
+                        self.is_temp = True
+                        self.use_managed_ref = True
                         self.type = self.obj.type.transpose(self.pos)
-                        return 
-                    else: 
-                        obj_type.declare_attribute(self.attribute, env, self.pos) 
+                        return
+                    else:
+                        obj_type.declare_attribute(self.attribute, env, self.pos)
                         entry = obj_type.scope.lookup_here(self.attribute)
-                if entry and entry.is_member: 
-                    entry = None 
-            else: 
-                error(self.pos, 
-                    "Cannot select attribute of incomplete type '%s'" 
-                    % obj_type) 
-                self.type = PyrexTypes.error_type 
-                return 
-            self.entry = entry 
-            if entry: 
-                if obj_type.is_extension_type and entry.name == "__weakref__": 
-                    error(self.pos, "Illegal use of special attribute __weakref__") 
- 
-                # def methods need the normal attribute lookup 
-                # because they do not have struct entries 
-                # fused function go through assignment synthesis 
-                # (foo = pycfunction(foo_func_obj)) and need to go through 
-                # regular Python lookup as well 
-                if (entry.is_variable and not entry.fused_cfunction) or entry.is_cmethod: 
-                    self.type = entry.type 
-                    self.member = entry.cname 
-                    return 
-                else: 
-                    # If it's not a variable or C method, it must be a Python 
-                    # method of an extension type, so we treat it like a Python 
-                    # attribute. 
-                    pass 
-        # If we get here, the base object is not a struct/union/extension 
-        # type, or it is an extension type and the attribute is either not 
-        # declared or is declared as a Python method. Treat it as a Python 
-        # attribute reference. 
-        self.analyse_as_python_attribute(env, obj_type, immutable_obj) 
- 
-    def analyse_as_python_attribute(self, env, obj_type=None, immutable_obj=False): 
-        if obj_type is None: 
-            obj_type = self.obj.type 
-        # mangle private '__*' Python attributes used inside of a class 
-        self.attribute = env.mangle_class_private_name(self.attribute) 
-        self.member = self.attribute 
-        self.type = py_object_type 
-        self.is_py_attr = 1 
-
-        if not obj_type.is_pyobject and not obj_type.is_error: 
+                if entry and entry.is_member:
+                    entry = None
+            else:
+                error(self.pos,
+                    "Cannot select attribute of incomplete type '%s'"
+                    % obj_type)
+                self.type = PyrexTypes.error_type
+                return
+            self.entry = entry
+            if entry:
+                if obj_type.is_extension_type and entry.name == "__weakref__":
+                    error(self.pos, "Illegal use of special attribute __weakref__")
+
+                # def methods need the normal attribute lookup
+                # because they do not have struct entries
+                # fused function go through assignment synthesis
+                # (foo = pycfunction(foo_func_obj)) and need to go through
+                # regular Python lookup as well
+                if (entry.is_variable and not entry.fused_cfunction) or entry.is_cmethod:
+                    self.type = entry.type
+                    self.member = entry.cname
+                    return
+                else:
+                    # If it's not a variable or C method, it must be a Python
+                    # method of an extension type, so we treat it like a Python
+                    # attribute.
+                    pass
+        # If we get here, the base object is not a struct/union/extension
+        # type, or it is an extension type and the attribute is either not
+        # declared or is declared as a Python method. Treat it as a Python
+        # attribute reference.
+        self.analyse_as_python_attribute(env, obj_type, immutable_obj)
+
+    def analyse_as_python_attribute(self, env, obj_type=None, immutable_obj=False):
+        if obj_type is None:
+            obj_type = self.obj.type
+        # mangle private '__*' Python attributes used inside of a class
+        self.attribute = env.mangle_class_private_name(self.attribute)
+        self.member = self.attribute
+        self.type = py_object_type
+        self.is_py_attr = 1
+
+        if not obj_type.is_pyobject and not obj_type.is_error:
             # Expose python methods for immutable objects.
             if (obj_type.is_string or obj_type.is_cpp_string
                 or obj_type.is_buffer or obj_type.is_memoryviewslice
                 or obj_type.is_numeric
                 or (obj_type.is_ctuple and obj_type.can_coerce_to_pyobject(env))
                 or (obj_type.is_struct and obj_type.can_coerce_to_pyobject(env))):
-                if not immutable_obj: 
-                    self.obj = self.obj.coerce_to_pyobject(env) 
-            elif (obj_type.is_cfunction and (self.obj.is_name or self.obj.is_attribute) 
-                  and self.obj.entry.as_variable 
-                  and self.obj.entry.as_variable.type.is_pyobject): 
-                # might be an optimised builtin function => unpack it 
-                if not immutable_obj: 
-                    self.obj = self.obj.coerce_to_pyobject(env) 
-            else: 
-                error(self.pos, 
-                      "Object of type '%s' has no attribute '%s'" % 
-                      (obj_type, self.attribute)) 
- 
-    def wrap_obj_in_nonecheck(self, env): 
-        if not env.directives['nonecheck']: 
-            return 
- 
-        msg = None 
-        format_args = () 
-        if (self.obj.type.is_extension_type and self.needs_none_check and not 
-                self.is_py_attr): 
+                if not immutable_obj:
+                    self.obj = self.obj.coerce_to_pyobject(env)
+            elif (obj_type.is_cfunction and (self.obj.is_name or self.obj.is_attribute)
+                  and self.obj.entry.as_variable
+                  and self.obj.entry.as_variable.type.is_pyobject):
+                # might be an optimised builtin function => unpack it
+                if not immutable_obj:
+                    self.obj = self.obj.coerce_to_pyobject(env)
+            else:
+                error(self.pos,
+                      "Object of type '%s' has no attribute '%s'" %
+                      (obj_type, self.attribute))
+
+    def wrap_obj_in_nonecheck(self, env):
+        if not env.directives['nonecheck']:
+            return
+
+        msg = None
+        format_args = ()
+        if (self.obj.type.is_extension_type and self.needs_none_check and not
+                self.is_py_attr):
             msg = "'NoneType' object has no attribute '%{0}s'".format('.30' if len(self.attribute) <= 30 else '')
-            format_args = (self.attribute,) 
-        elif self.obj.type.is_memoryviewslice: 
-            if self.is_memslice_transpose: 
-                msg = "Cannot transpose None memoryview slice" 
-            else: 
-                entry = self.obj.type.scope.lookup_here(self.attribute) 
-                if entry: 
-                    # copy/is_c_contig/shape/strides etc 
-                    msg = "Cannot access '%s' attribute of None memoryview slice" 
-                    format_args = (entry.name,) 
- 
-        if msg: 
-            self.obj = self.obj.as_none_safe_node(msg, 'PyExc_AttributeError', 
-                                                  format_args=format_args) 
- 
-    def nogil_check(self, env): 
-        if self.is_py_attr: 
-            self.gil_error() 
- 
-    gil_message = "Accessing Python attribute" 
- 
+            format_args = (self.attribute,)
+        elif self.obj.type.is_memoryviewslice:
+            if self.is_memslice_transpose:
+                msg = "Cannot transpose None memoryview slice"
+            else:
+                entry = self.obj.type.scope.lookup_here(self.attribute)
+                if entry:
+                    # copy/is_c_contig/shape/strides etc
+                    msg = "Cannot access '%s' attribute of None memoryview slice"
+                    format_args = (entry.name,)
+
+        if msg:
+            self.obj = self.obj.as_none_safe_node(msg, 'PyExc_AttributeError',
+                                                  format_args=format_args)
+
+    def nogil_check(self, env):
+        if self.is_py_attr:
+            self.gil_error()
+
+    gil_message = "Accessing Python attribute"
+
     def is_cimported_module_without_shadow(self, env):
         return self.obj.is_cimported_module_without_shadow(env)
 
-    def is_simple(self): 
-        if self.obj: 
-            return self.result_in_temp() or self.obj.is_simple() 
-        else: 
-            return NameNode.is_simple(self) 
- 
-    def is_lvalue(self): 
-        if self.obj: 
+    def is_simple(self):
+        if self.obj:
+            return self.result_in_temp() or self.obj.is_simple()
+        else:
+            return NameNode.is_simple(self)
+
+    def is_lvalue(self):
+        if self.obj:
             return True
-        else: 
-            return NameNode.is_lvalue(self) 
- 
-    def is_ephemeral(self): 
-        if self.obj: 
-            return self.obj.is_ephemeral() 
-        else: 
-            return NameNode.is_ephemeral(self) 
- 
-    def calculate_result_code(self): 
-        #print "AttributeNode.calculate_result_code:", self.member ### 
-        #print "...obj node =", self.obj, "code", self.obj.result() ### 
-        #print "...obj type", self.obj.type, "ctype", self.obj.ctype() ### 
-        obj = self.obj 
-        obj_code = obj.result_as(obj.type) 
-        #print "...obj_code =", obj_code ### 
-        if self.entry and self.entry.is_cmethod: 
-            if obj.type.is_extension_type and not self.entry.is_builtin_cmethod: 
-                if self.entry.final_func_cname: 
-                    return self.entry.final_func_cname 
- 
-                if self.type.from_fused: 
-                    # If the attribute was specialized through indexing, make 
-                    # sure to get the right fused name, as our entry was 
-                    # replaced by our parent index node 
-                    # (AnalyseExpressionsTransform) 
-                    self.member = self.entry.cname 
- 
-                return "((struct %s *)%s%s%s)->%s" % ( 
-                    obj.type.vtabstruct_cname, obj_code, self.op, 
-                    obj.type.vtabslot_cname, self.member) 
-            elif self.result_is_used: 
-                return self.member 
-            # Generating no code at all for unused access to optimised builtin 
-            # methods fixes the problem that some optimisations only exist as 
-            # macros, i.e. there is no function pointer to them, so we would 
-            # generate invalid C code here. 
-            return 
-        elif obj.type.is_complex: 
-            return "__Pyx_C%s(%s)" % (self.member.upper(), obj_code) 
-        else: 
-            if obj.type.is_builtin_type and self.entry and self.entry.is_variable: 
-                # accessing a field of a builtin type, need to cast better than result_as() does 
-                obj_code = obj.type.cast_code(obj.result(), to_object_struct = True) 
-            return "%s%s%s" % (obj_code, self.op, self.member) 
- 
-    def generate_result_code(self, code): 
-        if self.is_py_attr: 
-            if self.is_special_lookup: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("PyObjectLookupSpecial", "ObjectHandling.c")) 
-                lookup_func_name = '__Pyx_PyObject_LookupSpecial' 
-            else: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("PyObjectGetAttrStr", "ObjectHandling.c")) 
-                lookup_func_name = '__Pyx_PyObject_GetAttrStr' 
-            code.putln( 
-                '%s = %s(%s, %s); %s' % ( 
-                    self.result(), 
-                    lookup_func_name, 
-                    self.obj.py_result(), 
-                    code.intern_identifier(self.attribute), 
-                    code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
-        elif self.type.is_memoryviewslice: 
-            if self.is_memslice_transpose: 
-                # transpose the slice 
-                for access, packing in self.type.axes: 
-                    if access == 'ptr': 
-                        error(self.pos, "Transposing not supported for slices " 
-                                        "with indirect dimensions") 
-                        return 
- 
-                code.putln("%s = %s;" % (self.result(), self.obj.result())) 
+        else:
+            return NameNode.is_lvalue(self)
+
+    def is_ephemeral(self):
+        if self.obj:
+            return self.obj.is_ephemeral()
+        else:
+            return NameNode.is_ephemeral(self)
+
+    def calculate_result_code(self):
+        #print "AttributeNode.calculate_result_code:", self.member ###
+        #print "...obj node =", self.obj, "code", self.obj.result() ###
+        #print "...obj type", self.obj.type, "ctype", self.obj.ctype() ###
+        obj = self.obj
+        obj_code = obj.result_as(obj.type)
+        #print "...obj_code =", obj_code ###
+        if self.entry and self.entry.is_cmethod:
+            if obj.type.is_extension_type and not self.entry.is_builtin_cmethod:
+                if self.entry.final_func_cname:
+                    return self.entry.final_func_cname
+
+                if self.type.from_fused:
+                    # If the attribute was specialized through indexing, make
+                    # sure to get the right fused name, as our entry was
+                    # replaced by our parent index node
+                    # (AnalyseExpressionsTransform)
+                    self.member = self.entry.cname
+
+                return "((struct %s *)%s%s%s)->%s" % (
+                    obj.type.vtabstruct_cname, obj_code, self.op,
+                    obj.type.vtabslot_cname, self.member)
+            elif self.result_is_used:
+                return self.member
+            # Generating no code at all for unused access to optimised builtin
+            # methods fixes the problem that some optimisations only exist as
+            # macros, i.e. there is no function pointer to them, so we would
+            # generate invalid C code here.
+            return
+        elif obj.type.is_complex:
+            return "__Pyx_C%s(%s)" % (self.member.upper(), obj_code)
+        else:
+            if obj.type.is_builtin_type and self.entry and self.entry.is_variable:
+                # accessing a field of a builtin type, need to cast better than result_as() does
+                obj_code = obj.type.cast_code(obj.result(), to_object_struct = True)
+            return "%s%s%s" % (obj_code, self.op, self.member)
+
+    def generate_result_code(self, code):
+        if self.is_py_attr:
+            if self.is_special_lookup:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyObjectLookupSpecial", "ObjectHandling.c"))
+                lookup_func_name = '__Pyx_PyObject_LookupSpecial'
+            else:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyObjectGetAttrStr", "ObjectHandling.c"))
+                lookup_func_name = '__Pyx_PyObject_GetAttrStr'
+            code.putln(
+                '%s = %s(%s, %s); %s' % (
+                    self.result(),
+                    lookup_func_name,
+                    self.obj.py_result(),
+                    code.intern_identifier(self.attribute),
+                    code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+        elif self.type.is_memoryviewslice:
+            if self.is_memslice_transpose:
+                # transpose the slice
+                for access, packing in self.type.axes:
+                    if access == 'ptr':
+                        error(self.pos, "Transposing not supported for slices "
+                                        "with indirect dimensions")
+                        return
+
+                code.putln("%s = %s;" % (self.result(), self.obj.result()))
                 code.put_incref_memoryviewslice(self.result(), have_gil=True)
- 
-                T = "__pyx_memslice_transpose(&%s) == 0" 
-                code.putln(code.error_goto_if(T % self.result(), self.pos)) 
-            elif self.initialized_check: 
-                code.putln( 
-                    'if (unlikely(!%s.memview)) {' 
-                        'PyErr_SetString(PyExc_AttributeError,' 
-                                        '"Memoryview is not initialized");' 
-                        '%s' 
-                    '}' % (self.result(), code.error_goto(self.pos))) 
-        else: 
-            # result_code contains what is needed, but we may need to insert 
-            # a check and raise an exception 
+
+                T = "__pyx_memslice_transpose(&%s) == 0"
+                code.putln(code.error_goto_if(T % self.result(), self.pos))
+            elif self.initialized_check:
+                code.putln(
+                    'if (unlikely(!%s.memview)) {'
+                        'PyErr_SetString(PyExc_AttributeError,'
+                                        '"Memoryview is not initialized");'
+                        '%s'
+                    '}' % (self.result(), code.error_goto(self.pos)))
+        else:
+            # result_code contains what is needed, but we may need to insert
+            # a check and raise an exception
             if self.obj.type and self.obj.type.is_extension_type:
-                pass 
+                pass
             elif self.entry and self.entry.is_cmethod:
-                # C method implemented as function call with utility code 
+                # C method implemented as function call with utility code
                 code.globalstate.use_entry_utility_code(self.entry)
- 
-    def generate_disposal_code(self, code): 
-        if self.is_temp and self.type.is_memoryviewslice and self.is_memslice_transpose: 
-            # mirror condition for putting the memview incref here: 
+
+    def generate_disposal_code(self, code):
+        if self.is_temp and self.type.is_memoryviewslice and self.is_memslice_transpose:
+            # mirror condition for putting the memview incref here:
             code.put_xdecref_memoryviewslice(
                     self.result(), have_gil=True)
             code.putln("%s.memview = NULL;" % self.result())
             code.putln("%s.data = NULL;" % self.result())
-        else: 
-            ExprNode.generate_disposal_code(self, code) 
- 
+        else:
+            ExprNode.generate_disposal_code(self, code)
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
         exception_check=None, exception_value=None):
-        self.obj.generate_evaluation_code(code) 
-        if self.is_py_attr: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c")) 
-            code.put_error_if_neg(self.pos, 
-                '__Pyx_PyObject_SetAttrStr(%s, %s, %s)' % ( 
-                    self.obj.py_result(), 
-                    code.intern_identifier(self.attribute), 
-                    rhs.py_result())) 
-            rhs.generate_disposal_code(code) 
-            rhs.free_temps(code) 
-        elif self.obj.type.is_complex: 
-            code.putln("__Pyx_SET_C%s(%s, %s);" % ( 
-                self.member.upper(), 
-                self.obj.result_as(self.obj.type), 
-                rhs.result_as(self.ctype()))) 
+        self.obj.generate_evaluation_code(code)
+        if self.is_py_attr:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c"))
+            code.put_error_if_neg(self.pos,
+                '__Pyx_PyObject_SetAttrStr(%s, %s, %s)' % (
+                    self.obj.py_result(),
+                    code.intern_identifier(self.attribute),
+                    rhs.py_result()))
+            rhs.generate_disposal_code(code)
+            rhs.free_temps(code)
+        elif self.obj.type.is_complex:
+            code.putln("__Pyx_SET_C%s(%s, %s);" % (
+                self.member.upper(),
+                self.obj.result_as(self.obj.type),
+                rhs.result_as(self.ctype())))
             rhs.generate_disposal_code(code)
             rhs.free_temps(code)
-        else: 
-            select_code = self.result() 
-            if self.type.is_pyobject and self.use_managed_ref: 
-                rhs.make_owned_reference(code) 
-                code.put_giveref(rhs.py_result()) 
-                code.put_gotref(select_code) 
-                code.put_decref(select_code, self.ctype()) 
-            elif self.type.is_memoryviewslice: 
-                from . import MemoryView 
-                MemoryView.put_assign_to_memviewslice( 
-                        select_code, rhs, rhs.result(), self.type, code) 
- 
-            if not self.type.is_memoryviewslice: 
-                code.putln( 
-                    "%s = %s;" % ( 
-                        select_code, 
-                        rhs.result_as(self.ctype()))) 
-                        #rhs.result())) 
-            rhs.generate_post_assignment_code(code) 
-            rhs.free_temps(code) 
-        self.obj.generate_disposal_code(code) 
-        self.obj.free_temps(code) 
- 
-    def generate_deletion_code(self, code, ignore_nonexisting=False): 
-        self.obj.generate_evaluation_code(code) 
-        if self.is_py_attr or (self.entry.scope.is_property_scope 
-                               and u'__del__' in self.entry.scope.entries): 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c")) 
-            code.put_error_if_neg(self.pos, 
-                '__Pyx_PyObject_DelAttrStr(%s, %s)' % ( 
-                    self.obj.py_result(), 
-                    code.intern_identifier(self.attribute))) 
-        else: 
-            error(self.pos, "Cannot delete C attribute of extension type") 
-        self.obj.generate_disposal_code(code) 
-        self.obj.free_temps(code) 
- 
-    def annotate(self, code): 
-        if self.is_py_attr: 
-            style, text = 'py_attr', 'python attribute (%s)' 
-        else: 
-            style, text = 'c_attr', 'c attribute (%s)' 
-        code.annotate(self.pos, AnnotationItem(style, text % self.type, size=len(self.attribute))) 
- 
- 
-#------------------------------------------------------------------- 
-# 
-#  Constructor nodes 
-# 
-#------------------------------------------------------------------- 
- 
+        else:
+            select_code = self.result()
+            if self.type.is_pyobject and self.use_managed_ref:
+                rhs.make_owned_reference(code)
+                code.put_giveref(rhs.py_result())
+                code.put_gotref(select_code)
+                code.put_decref(select_code, self.ctype())
+            elif self.type.is_memoryviewslice:
+                from . import MemoryView
+                MemoryView.put_assign_to_memviewslice(
+                        select_code, rhs, rhs.result(), self.type, code)
+
+            if not self.type.is_memoryviewslice:
+                code.putln(
+                    "%s = %s;" % (
+                        select_code,
+                        rhs.result_as(self.ctype())))
+                        #rhs.result()))
+            rhs.generate_post_assignment_code(code)
+            rhs.free_temps(code)
+        self.obj.generate_disposal_code(code)
+        self.obj.free_temps(code)
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        self.obj.generate_evaluation_code(code)
+        if self.is_py_attr or (self.entry.scope.is_property_scope
+                               and u'__del__' in self.entry.scope.entries):
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c"))
+            code.put_error_if_neg(self.pos,
+                '__Pyx_PyObject_DelAttrStr(%s, %s)' % (
+                    self.obj.py_result(),
+                    code.intern_identifier(self.attribute)))
+        else:
+            error(self.pos, "Cannot delete C attribute of extension type")
+        self.obj.generate_disposal_code(code)
+        self.obj.free_temps(code)
+
+    def annotate(self, code):
+        if self.is_py_attr:
+            style, text = 'py_attr', 'python attribute (%s)'
+        else:
+            style, text = 'c_attr', 'c attribute (%s)'
+        code.annotate(self.pos, AnnotationItem(style, text % self.type, size=len(self.attribute)))
+
+
+#-------------------------------------------------------------------
+#
+#  Constructor nodes
+#
+#-------------------------------------------------------------------
+
 class StarredUnpackingNode(ExprNode):
-    #  A starred expression like "*a" 
-    # 
+    #  A starred expression like "*a"
+    #
     #  This is only allowed in sequence assignment or construction such as
-    # 
-    #      a, *b = (1,2,3,4)    =>     a = 1 ; b = [2,3,4] 
-    # 
+    #
+    #      a, *b = (1,2,3,4)    =>     a = 1 ; b = [2,3,4]
+    #
     #  and will be special cased during type analysis (or generate an error
-    #  if it's found at unexpected places). 
-    # 
-    #  target          ExprNode 
- 
-    subexprs = ['target'] 
-    is_starred = 1 
-    type = py_object_type 
-    is_temp = 1 
+    #  if it's found at unexpected places).
+    #
+    #  target          ExprNode
+
+    subexprs = ['target']
+    is_starred = 1
+    type = py_object_type
+    is_temp = 1
     starred_expr_allowed_here = False
- 
-    def __init__(self, pos, target): 
+
+    def __init__(self, pos, target):
         ExprNode.__init__(self, pos, target=target)
- 
-    def analyse_declarations(self, env): 
+
+    def analyse_declarations(self, env):
         if not self.starred_expr_allowed_here:
             error(self.pos, "starred expression is not allowed here")
-        self.target.analyse_declarations(env) 
- 
+        self.target.analyse_declarations(env)
+
     def infer_type(self, env):
         return self.target.infer_type(env)
 
-    def analyse_types(self, env): 
+    def analyse_types(self, env):
         if not self.starred_expr_allowed_here:
             error(self.pos, "starred expression is not allowed here")
-        self.target = self.target.analyse_types(env) 
-        self.type = self.target.type 
-        return self 
- 
-    def analyse_target_declaration(self, env): 
-        self.target.analyse_target_declaration(env) 
- 
-    def analyse_target_types(self, env): 
-        self.target = self.target.analyse_target_types(env) 
-        self.type = self.target.type 
-        return self 
- 
-    def calculate_result_code(self): 
-        return "" 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
- 
-class SequenceNode(ExprNode): 
-    #  Base class for list and tuple constructor nodes. 
-    #  Contains common code for performing sequence unpacking. 
-    # 
-    #  args                    [ExprNode] 
-    #  unpacked_items          [ExprNode] or None 
-    #  coerced_unpacked_items  [ExprNode] or None 
-    # mult_factor              ExprNode     the integer number of content repetitions ([1,2]*3) 
- 
-    subexprs = ['args', 'mult_factor'] 
- 
-    is_sequence_constructor = 1 
-    unpacked_items = None 
-    mult_factor = None 
-    slow = False  # trade speed for code size (e.g. use PyTuple_Pack()) 
- 
-    def compile_time_value_list(self, denv): 
-        return [arg.compile_time_value(denv) for arg in self.args] 
- 
-    def replace_starred_target_node(self): 
-        # replace a starred node in the targets by the contained expression 
-        self.starred_assignment = False 
-        args = [] 
-        for arg in self.args: 
-            if arg.is_starred: 
-                if self.starred_assignment: 
-                    error(arg.pos, "more than 1 starred expression in assignment") 
-                self.starred_assignment = True 
-                arg = arg.target 
-                arg.is_starred = True 
-            args.append(arg) 
-        self.args = args 
- 
-    def analyse_target_declaration(self, env): 
-        self.replace_starred_target_node() 
-        for arg in self.args: 
-            arg.analyse_target_declaration(env) 
- 
-    def analyse_types(self, env, skip_children=False): 
+        self.target = self.target.analyse_types(env)
+        self.type = self.target.type
+        return self
+
+    def analyse_target_declaration(self, env):
+        self.target.analyse_target_declaration(env)
+
+    def analyse_target_types(self, env):
+        self.target = self.target.analyse_target_types(env)
+        self.type = self.target.type
+        return self
+
+    def calculate_result_code(self):
+        return ""
+
+    def generate_result_code(self, code):
+        pass
+
+
+class SequenceNode(ExprNode):
+    #  Base class for list and tuple constructor nodes.
+    #  Contains common code for performing sequence unpacking.
+    #
+    #  args                    [ExprNode]
+    #  unpacked_items          [ExprNode] or None
+    #  coerced_unpacked_items  [ExprNode] or None
+    # mult_factor              ExprNode     the integer number of content repetitions ([1,2]*3)
+
+    subexprs = ['args', 'mult_factor']
+
+    is_sequence_constructor = 1
+    unpacked_items = None
+    mult_factor = None
+    slow = False  # trade speed for code size (e.g. use PyTuple_Pack())
+
+    def compile_time_value_list(self, denv):
+        return [arg.compile_time_value(denv) for arg in self.args]
+
+    def replace_starred_target_node(self):
+        # replace a starred node in the targets by the contained expression
+        self.starred_assignment = False
+        args = []
+        for arg in self.args:
+            if arg.is_starred:
+                if self.starred_assignment:
+                    error(arg.pos, "more than 1 starred expression in assignment")
+                self.starred_assignment = True
+                arg = arg.target
+                arg.is_starred = True
+            args.append(arg)
+        self.args = args
+
+    def analyse_target_declaration(self, env):
+        self.replace_starred_target_node()
+        for arg in self.args:
+            arg.analyse_target_declaration(env)
+
+    def analyse_types(self, env, skip_children=False):
         for i, arg in enumerate(self.args):
             if not skip_children:
                 arg = arg.analyse_types(env)
-            self.args[i] = arg.coerce_to_pyobject(env) 
-        if self.mult_factor: 
-            self.mult_factor = self.mult_factor.analyse_types(env) 
-            if not self.mult_factor.type.is_int: 
-                self.mult_factor = self.mult_factor.coerce_to_pyobject(env) 
-        self.is_temp = 1 
-        # not setting self.type here, subtypes do this 
-        return self 
- 
+            self.args[i] = arg.coerce_to_pyobject(env)
+        if self.mult_factor:
+            self.mult_factor = self.mult_factor.analyse_types(env)
+            if not self.mult_factor.type.is_int:
+                self.mult_factor = self.mult_factor.coerce_to_pyobject(env)
+        self.is_temp = 1
+        # not setting self.type here, subtypes do this
+        return self
+
     def coerce_to_ctuple(self, dst_type, env):
         if self.type == dst_type:
             return self
@@ -7478,436 +7478,436 @@ class SequenceNode(ExprNode):
                 args.append(arg)
         self.args[:] = args
 
-    def may_be_none(self): 
-        return False 
- 
-    def analyse_target_types(self, env): 
-        if self.mult_factor: 
-            error(self.pos, "can't assign to multiplied sequence") 
-        self.unpacked_items = [] 
-        self.coerced_unpacked_items = [] 
-        self.any_coerced_items = False 
-        for i, arg in enumerate(self.args): 
-            arg = self.args[i] = arg.analyse_target_types(env) 
-            if arg.is_starred: 
+    def may_be_none(self):
+        return False
+
+    def analyse_target_types(self, env):
+        if self.mult_factor:
+            error(self.pos, "can't assign to multiplied sequence")
+        self.unpacked_items = []
+        self.coerced_unpacked_items = []
+        self.any_coerced_items = False
+        for i, arg in enumerate(self.args):
+            arg = self.args[i] = arg.analyse_target_types(env)
+            if arg.is_starred:
                 if not arg.type.assignable_from(list_type):
-                    error(arg.pos, 
-                          "starred target must have Python object (list) type") 
-                if arg.type is py_object_type: 
+                    error(arg.pos,
+                          "starred target must have Python object (list) type")
+                if arg.type is py_object_type:
                     arg.type = list_type
-            unpacked_item = PyTempNode(self.pos, env) 
-            coerced_unpacked_item = unpacked_item.coerce_to(arg.type, env) 
-            if unpacked_item is not coerced_unpacked_item: 
-                self.any_coerced_items = True 
-            self.unpacked_items.append(unpacked_item) 
-            self.coerced_unpacked_items.append(coerced_unpacked_item) 
-        self.type = py_object_type 
-        return self 
- 
-    def generate_result_code(self, code): 
-        self.generate_operation_code(code) 
- 
-    def generate_sequence_packing_code(self, code, target=None, plain=False): 
-        if target is None: 
-            target = self.result() 
-        size_factor = c_mult = '' 
-        mult_factor = None 
- 
-        if self.mult_factor and not plain: 
-            mult_factor = self.mult_factor 
-            if mult_factor.type.is_int: 
-                c_mult = mult_factor.result() 
+            unpacked_item = PyTempNode(self.pos, env)
+            coerced_unpacked_item = unpacked_item.coerce_to(arg.type, env)
+            if unpacked_item is not coerced_unpacked_item:
+                self.any_coerced_items = True
+            self.unpacked_items.append(unpacked_item)
+            self.coerced_unpacked_items.append(coerced_unpacked_item)
+        self.type = py_object_type
+        return self
+
+    def generate_result_code(self, code):
+        self.generate_operation_code(code)
+
+    def generate_sequence_packing_code(self, code, target=None, plain=False):
+        if target is None:
+            target = self.result()
+        size_factor = c_mult = ''
+        mult_factor = None
+
+        if self.mult_factor and not plain:
+            mult_factor = self.mult_factor
+            if mult_factor.type.is_int:
+                c_mult = mult_factor.result()
                 if (isinstance(mult_factor.constant_result, _py_int_types) and
                         mult_factor.constant_result > 0):
-                    size_factor = ' * %s' % mult_factor.constant_result 
+                    size_factor = ' * %s' % mult_factor.constant_result
                 elif mult_factor.type.signed:
                     size_factor = ' * ((%s<0) ? 0:%s)' % (c_mult, c_mult)
-                else: 
+                else:
                     size_factor = ' * (%s)' % (c_mult,)
- 
+
         if self.type is tuple_type and (self.is_literal or self.slow) and not c_mult:
-            # use PyTuple_Pack() to avoid generating huge amounts of one-time code 
-            code.putln('%s = PyTuple_Pack(%d, %s); %s' % ( 
-                target, 
-                len(self.args), 
+            # use PyTuple_Pack() to avoid generating huge amounts of one-time code
+            code.putln('%s = PyTuple_Pack(%d, %s); %s' % (
+                target,
+                len(self.args),
                 ', '.join(arg.py_result() for arg in self.args),
-                code.error_goto_if_null(target, self.pos))) 
-            code.put_gotref(target) 
+                code.error_goto_if_null(target, self.pos)))
+            code.put_gotref(target)
         elif self.type.is_ctuple:
             for i, arg in enumerate(self.args):
                 code.putln("%s.f%s = %s;" % (
                     target, i, arg.result()))
-        else: 
-            # build the tuple/list step by step, potentially multiplying it as we go 
+        else:
+            # build the tuple/list step by step, potentially multiplying it as we go
             if self.type is list_type:
-                create_func, set_item_func = 'PyList_New', 'PyList_SET_ITEM' 
+                create_func, set_item_func = 'PyList_New', 'PyList_SET_ITEM'
             elif self.type is tuple_type:
-                create_func, set_item_func = 'PyTuple_New', 'PyTuple_SET_ITEM' 
-            else: 
-                raise InternalError("sequence packing for unexpected type %s" % self.type) 
-            arg_count = len(self.args) 
-            code.putln("%s = %s(%s%s); %s" % ( 
-                target, create_func, arg_count, size_factor, 
-                code.error_goto_if_null(target, self.pos))) 
-            code.put_gotref(target) 
- 
-            if c_mult: 
-                # FIXME: can't use a temp variable here as the code may 
-                # end up in the constant building function.  Temps 
-                # currently don't work there. 
- 
-                #counter = code.funcstate.allocate_temp(mult_factor.type, manage_ref=False) 
-                counter = Naming.quick_temp_cname 
-                code.putln('{ Py_ssize_t %s;' % counter) 
-                if arg_count == 1: 
-                    offset = counter 
-                else: 
-                    offset = '%s * %s' % (counter, arg_count) 
-                code.putln('for (%s=0; %s < %s; %s++) {' % ( 
-                    counter, counter, c_mult, counter 
-                    )) 
-            else: 
-                offset = '' 
- 
+                create_func, set_item_func = 'PyTuple_New', 'PyTuple_SET_ITEM'
+            else:
+                raise InternalError("sequence packing for unexpected type %s" % self.type)
+            arg_count = len(self.args)
+            code.putln("%s = %s(%s%s); %s" % (
+                target, create_func, arg_count, size_factor,
+                code.error_goto_if_null(target, self.pos)))
+            code.put_gotref(target)
+
+            if c_mult:
+                # FIXME: can't use a temp variable here as the code may
+                # end up in the constant building function.  Temps
+                # currently don't work there.
+
+                #counter = code.funcstate.allocate_temp(mult_factor.type, manage_ref=False)
+                counter = Naming.quick_temp_cname
+                code.putln('{ Py_ssize_t %s;' % counter)
+                if arg_count == 1:
+                    offset = counter
+                else:
+                    offset = '%s * %s' % (counter, arg_count)
+                code.putln('for (%s=0; %s < %s; %s++) {' % (
+                    counter, counter, c_mult, counter
+                    ))
+            else:
+                offset = ''
+
             for i in range(arg_count):
-                arg = self.args[i] 
-                if c_mult or not arg.result_in_temp(): 
-                    code.put_incref(arg.result(), arg.ctype()) 
+                arg = self.args[i]
+                if c_mult or not arg.result_in_temp():
+                    code.put_incref(arg.result(), arg.ctype())
                 code.put_giveref(arg.py_result())
-                code.putln("%s(%s, %s, %s);" % ( 
-                    set_item_func, 
-                    target, 
-                    (offset and i) and ('%s + %s' % (offset, i)) or (offset or i), 
-                    arg.py_result())) 
- 
-            if c_mult: 
-                code.putln('}') 
-                #code.funcstate.release_temp(counter) 
-                code.putln('}') 
- 
-        if mult_factor is not None and mult_factor.type.is_pyobject: 
-            code.putln('{ PyObject* %s = PyNumber_InPlaceMultiply(%s, %s); %s' % ( 
-                Naming.quick_temp_cname, target, mult_factor.py_result(), 
-                code.error_goto_if_null(Naming.quick_temp_cname, self.pos) 
-                )) 
-            code.put_gotref(Naming.quick_temp_cname) 
-            code.put_decref(target, py_object_type) 
-            code.putln('%s = %s;' % (target, Naming.quick_temp_cname)) 
-            code.putln('}') 
- 
-    def generate_subexpr_disposal_code(self, code): 
-        if self.mult_factor and self.mult_factor.type.is_int: 
-            super(SequenceNode, self).generate_subexpr_disposal_code(code) 
+                code.putln("%s(%s, %s, %s);" % (
+                    set_item_func,
+                    target,
+                    (offset and i) and ('%s + %s' % (offset, i)) or (offset or i),
+                    arg.py_result()))
+
+            if c_mult:
+                code.putln('}')
+                #code.funcstate.release_temp(counter)
+                code.putln('}')
+
+        if mult_factor is not None and mult_factor.type.is_pyobject:
+            code.putln('{ PyObject* %s = PyNumber_InPlaceMultiply(%s, %s); %s' % (
+                Naming.quick_temp_cname, target, mult_factor.py_result(),
+                code.error_goto_if_null(Naming.quick_temp_cname, self.pos)
+                ))
+            code.put_gotref(Naming.quick_temp_cname)
+            code.put_decref(target, py_object_type)
+            code.putln('%s = %s;' % (target, Naming.quick_temp_cname))
+            code.putln('}')
+
+    def generate_subexpr_disposal_code(self, code):
+        if self.mult_factor and self.mult_factor.type.is_int:
+            super(SequenceNode, self).generate_subexpr_disposal_code(code)
         elif self.type is tuple_type and (self.is_literal or self.slow):
-            super(SequenceNode, self).generate_subexpr_disposal_code(code) 
-        else: 
-            # We call generate_post_assignment_code here instead 
-            # of generate_disposal_code, because values were stored 
-            # in the tuple using a reference-stealing operation. 
-            for arg in self.args: 
-                arg.generate_post_assignment_code(code) 
-                # Should NOT call free_temps -- this is invoked by the default 
-                # generate_evaluation_code which will do that. 
-            if self.mult_factor: 
-                self.mult_factor.generate_disposal_code(code) 
- 
+            super(SequenceNode, self).generate_subexpr_disposal_code(code)
+        else:
+            # We call generate_post_assignment_code here instead
+            # of generate_disposal_code, because values were stored
+            # in the tuple using a reference-stealing operation.
+            for arg in self.args:
+                arg.generate_post_assignment_code(code)
+                # Should NOT call free_temps -- this is invoked by the default
+                # generate_evaluation_code which will do that.
+            if self.mult_factor:
+                self.mult_factor.generate_disposal_code(code)
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
         exception_check=None, exception_value=None):
-        if self.starred_assignment: 
-            self.generate_starred_assignment_code(rhs, code) 
-        else: 
-            self.generate_parallel_assignment_code(rhs, code) 
- 
-        for item in self.unpacked_items: 
-            item.release(code) 
-        rhs.free_temps(code) 
- 
-    _func_iternext_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None), 
-            ])) 
- 
-    def generate_parallel_assignment_code(self, rhs, code): 
-        # Need to work around the fact that generate_evaluation_code 
-        # allocates the temps in a rather hacky way -- the assignment 
-        # is evaluated twice, within each if-block. 
-        for item in self.unpacked_items: 
-            item.allocate(code) 
-        special_unpack = (rhs.type is py_object_type 
-                          or rhs.type in (tuple_type, list_type) 
-                          or not rhs.type.is_builtin_type) 
-        long_enough_for_a_loop = len(self.unpacked_items) > 3 
- 
-        if special_unpack: 
-            self.generate_special_parallel_unpacking_code( 
-                code, rhs, use_loop=long_enough_for_a_loop) 
-        else: 
-            code.putln("{") 
-            self.generate_generic_parallel_unpacking_code( 
-                code, rhs, self.unpacked_items, use_loop=long_enough_for_a_loop) 
-            code.putln("}") 
- 
-        for value_node in self.coerced_unpacked_items: 
-            value_node.generate_evaluation_code(code) 
-        for i in range(len(self.args)): 
-            self.args[i].generate_assignment_code( 
-                self.coerced_unpacked_items[i], code) 
- 
-    def generate_special_parallel_unpacking_code(self, code, rhs, use_loop): 
-        sequence_type_test = '1' 
-        none_check = "likely(%s != Py_None)" % rhs.py_result() 
-        if rhs.type is list_type: 
-            sequence_types = ['List'] 
-            if rhs.may_be_none(): 
-                sequence_type_test = none_check 
-        elif rhs.type is tuple_type: 
-            sequence_types = ['Tuple'] 
-            if rhs.may_be_none(): 
-                sequence_type_test = none_check 
-        else: 
-            sequence_types = ['Tuple', 'List'] 
-            tuple_check = 'likely(PyTuple_CheckExact(%s))' % rhs.py_result() 
-            list_check  = 'PyList_CheckExact(%s)' % rhs.py_result() 
-            sequence_type_test = "(%s) || (%s)" % (tuple_check, list_check) 
- 
-        code.putln("if (%s) {" % sequence_type_test) 
-        code.putln("PyObject* sequence = %s;" % rhs.py_result()) 
- 
-        # list/tuple => check size 
+        if self.starred_assignment:
+            self.generate_starred_assignment_code(rhs, code)
+        else:
+            self.generate_parallel_assignment_code(rhs, code)
+
+        for item in self.unpacked_items:
+            item.release(code)
+        rhs.free_temps(code)
+
+    _func_iternext_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None),
+            ]))
+
+    def generate_parallel_assignment_code(self, rhs, code):
+        # Need to work around the fact that generate_evaluation_code
+        # allocates the temps in a rather hacky way -- the assignment
+        # is evaluated twice, within each if-block.
+        for item in self.unpacked_items:
+            item.allocate(code)
+        special_unpack = (rhs.type is py_object_type
+                          or rhs.type in (tuple_type, list_type)
+                          or not rhs.type.is_builtin_type)
+        long_enough_for_a_loop = len(self.unpacked_items) > 3
+
+        if special_unpack:
+            self.generate_special_parallel_unpacking_code(
+                code, rhs, use_loop=long_enough_for_a_loop)
+        else:
+            code.putln("{")
+            self.generate_generic_parallel_unpacking_code(
+                code, rhs, self.unpacked_items, use_loop=long_enough_for_a_loop)
+            code.putln("}")
+
+        for value_node in self.coerced_unpacked_items:
+            value_node.generate_evaluation_code(code)
+        for i in range(len(self.args)):
+            self.args[i].generate_assignment_code(
+                self.coerced_unpacked_items[i], code)
+
+    def generate_special_parallel_unpacking_code(self, code, rhs, use_loop):
+        sequence_type_test = '1'
+        none_check = "likely(%s != Py_None)" % rhs.py_result()
+        if rhs.type is list_type:
+            sequence_types = ['List']
+            if rhs.may_be_none():
+                sequence_type_test = none_check
+        elif rhs.type is tuple_type:
+            sequence_types = ['Tuple']
+            if rhs.may_be_none():
+                sequence_type_test = none_check
+        else:
+            sequence_types = ['Tuple', 'List']
+            tuple_check = 'likely(PyTuple_CheckExact(%s))' % rhs.py_result()
+            list_check  = 'PyList_CheckExact(%s)' % rhs.py_result()
+            sequence_type_test = "(%s) || (%s)" % (tuple_check, list_check)
+
+        code.putln("if (%s) {" % sequence_type_test)
+        code.putln("PyObject* sequence = %s;" % rhs.py_result())
+
+        # list/tuple => check size
         code.putln("Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);")
-        code.putln("if (unlikely(size != %d)) {" % len(self.args)) 
-        code.globalstate.use_utility_code(raise_too_many_values_to_unpack) 
-        code.putln("if (size > %d) __Pyx_RaiseTooManyValuesError(%d);" % ( 
-            len(self.args), len(self.args))) 
-        code.globalstate.use_utility_code(raise_need_more_values_to_unpack) 
-        code.putln("else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);") 
+        code.putln("if (unlikely(size != %d)) {" % len(self.args))
+        code.globalstate.use_utility_code(raise_too_many_values_to_unpack)
+        code.putln("if (size > %d) __Pyx_RaiseTooManyValuesError(%d);" % (
+            len(self.args), len(self.args)))
+        code.globalstate.use_utility_code(raise_need_more_values_to_unpack)
+        code.putln("else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);")
         # < 0 => exception
-        code.putln(code.error_goto(self.pos)) 
-        code.putln("}") 
- 
+        code.putln(code.error_goto(self.pos))
+        code.putln("}")
+
         code.putln("#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS")
-        # unpack items from list/tuple in unrolled loop (can't fail) 
-        if len(sequence_types) == 2: 
-            code.putln("if (likely(Py%s_CheckExact(sequence))) {" % sequence_types[0]) 
-        for i, item in enumerate(self.unpacked_items): 
-            code.putln("%s = Py%s_GET_ITEM(sequence, %d); " % ( 
-                item.result(), sequence_types[0], i)) 
-        if len(sequence_types) == 2: 
-            code.putln("} else {") 
-            for i, item in enumerate(self.unpacked_items): 
-                code.putln("%s = Py%s_GET_ITEM(sequence, %d); " % ( 
-                    item.result(), sequence_types[1], i)) 
-            code.putln("}") 
-        for item in self.unpacked_items: 
-            code.put_incref(item.result(), item.ctype()) 
- 
-        code.putln("#else") 
-        # in non-CPython, use the PySequence protocol (which can fail) 
-        if not use_loop: 
-            for i, item in enumerate(self.unpacked_items): 
-                code.putln("%s = PySequence_ITEM(sequence, %d); %s" % ( 
-                    item.result(), i, 
-                    code.error_goto_if_null(item.result(), self.pos))) 
-                code.put_gotref(item.result()) 
-        else: 
-            code.putln("{") 
-            code.putln("Py_ssize_t i;") 
-            code.putln("PyObject** temps[%s] = {%s};" % ( 
-                len(self.unpacked_items), 
-                ','.join(['&%s' % item.result() for item in self.unpacked_items]))) 
-            code.putln("for (i=0; i < %s; i++) {" % len(self.unpacked_items)) 
-            code.putln("PyObject* item = PySequence_ITEM(sequence, i); %s" % ( 
-                code.error_goto_if_null('item', self.pos))) 
-            code.put_gotref('item') 
-            code.putln("*(temps[i]) = item;") 
-            code.putln("}") 
-            code.putln("}") 
- 
-        code.putln("#endif") 
-        rhs.generate_disposal_code(code) 
- 
-        if sequence_type_test == '1': 
-            code.putln("}")  # all done 
-        elif sequence_type_test == none_check: 
-            # either tuple/list or None => save some code by generating the error directly 
-            code.putln("} else {") 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("RaiseNoneIterError", "ObjectHandling.c")) 
-            code.putln("__Pyx_RaiseNoneNotIterableError(); %s" % code.error_goto(self.pos)) 
-            code.putln("}")  # all done 
-        else: 
-            code.putln("} else {")  # needs iteration fallback code 
-            self.generate_generic_parallel_unpacking_code( 
-                code, rhs, self.unpacked_items, use_loop=use_loop) 
-            code.putln("}") 
- 
-    def generate_generic_parallel_unpacking_code(self, code, rhs, unpacked_items, use_loop, terminate=True): 
-        code.globalstate.use_utility_code(raise_need_more_values_to_unpack) 
-        code.globalstate.use_utility_code(UtilityCode.load_cached("IterFinish", "ObjectHandling.c")) 
-        code.putln("Py_ssize_t index = -1;") # must be at the start of a C block! 
- 
-        if use_loop: 
-            code.putln("PyObject** temps[%s] = {%s};" % ( 
-                len(self.unpacked_items), 
-                ','.join(['&%s' % item.result() for item in unpacked_items]))) 
- 
-        iterator_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-        code.putln( 
-            "%s = PyObject_GetIter(%s); %s" % ( 
-                iterator_temp, 
-                rhs.py_result(), 
-                code.error_goto_if_null(iterator_temp, self.pos))) 
-        code.put_gotref(iterator_temp) 
-        rhs.generate_disposal_code(code) 
- 
-        iternext_func = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False) 
-        code.putln("%s = Py_TYPE(%s)->tp_iternext;" % ( 
-            iternext_func, iterator_temp)) 
- 
-        unpacking_error_label = code.new_label('unpacking_failed') 
-        unpack_code = "%s(%s)" % (iternext_func, iterator_temp) 
-        if use_loop: 
-            code.putln("for (index=0; index < %s; index++) {" % len(unpacked_items)) 
-            code.put("PyObject* item = %s; if (unlikely(!item)) " % unpack_code) 
-            code.put_goto(unpacking_error_label) 
-            code.put_gotref("item") 
-            code.putln("*(temps[index]) = item;") 
-            code.putln("}") 
-        else: 
-            for i, item in enumerate(unpacked_items): 
-                code.put( 
-                    "index = %d; %s = %s; if (unlikely(!%s)) " % ( 
-                        i, 
-                        item.result(), 
-                        unpack_code, 
-                        item.result())) 
-                code.put_goto(unpacking_error_label) 
-                code.put_gotref(item.py_result()) 
- 
-        if terminate: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("UnpackItemEndCheck", "ObjectHandling.c")) 
-            code.put_error_if_neg(self.pos, "__Pyx_IternextUnpackEndCheck(%s, %d)" % ( 
-                unpack_code, 
-                len(unpacked_items))) 
-            code.putln("%s = NULL;" % iternext_func) 
-            code.put_decref_clear(iterator_temp, py_object_type) 
- 
-        unpacking_done_label = code.new_label('unpacking_done') 
-        code.put_goto(unpacking_done_label) 
- 
-        code.put_label(unpacking_error_label) 
-        code.put_decref_clear(iterator_temp, py_object_type) 
-        code.putln("%s = NULL;" % iternext_func) 
-        code.putln("if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);") 
-        code.putln(code.error_goto(self.pos)) 
-        code.put_label(unpacking_done_label) 
- 
-        code.funcstate.release_temp(iternext_func) 
-        if terminate: 
-            code.funcstate.release_temp(iterator_temp) 
-            iterator_temp = None 
- 
-        return iterator_temp 
- 
-    def generate_starred_assignment_code(self, rhs, code): 
-        for i, arg in enumerate(self.args): 
-            if arg.is_starred: 
-                starred_target = self.unpacked_items[i] 
-                unpacked_fixed_items_left  = self.unpacked_items[:i] 
-                unpacked_fixed_items_right = self.unpacked_items[i+1:] 
-                break 
-        else: 
-            assert False 
- 
-        iterator_temp = None 
-        if unpacked_fixed_items_left: 
-            for item in unpacked_fixed_items_left: 
-                item.allocate(code) 
-            code.putln('{') 
-            iterator_temp = self.generate_generic_parallel_unpacking_code( 
-                code, rhs, unpacked_fixed_items_left, 
-                use_loop=True, terminate=False) 
-            for i, item in enumerate(unpacked_fixed_items_left): 
-                value_node = self.coerced_unpacked_items[i] 
-                value_node.generate_evaluation_code(code) 
-            code.putln('}') 
- 
-        starred_target.allocate(code) 
-        target_list = starred_target.result() 
-        code.putln("%s = PySequence_List(%s); %s" % ( 
-            target_list, 
-            iterator_temp or rhs.py_result(), 
-            code.error_goto_if_null(target_list, self.pos))) 
-        code.put_gotref(target_list) 
- 
-        if iterator_temp: 
-            code.put_decref_clear(iterator_temp, py_object_type) 
-            code.funcstate.release_temp(iterator_temp) 
-        else: 
-            rhs.generate_disposal_code(code) 
- 
-        if unpacked_fixed_items_right: 
-            code.globalstate.use_utility_code(raise_need_more_values_to_unpack) 
-            length_temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False) 
-            code.putln('%s = PyList_GET_SIZE(%s);' % (length_temp, target_list)) 
-            code.putln("if (unlikely(%s < %d)) {" % (length_temp, len(unpacked_fixed_items_right))) 
-            code.putln("__Pyx_RaiseNeedMoreValuesError(%d+%s); %s" % ( 
-                 len(unpacked_fixed_items_left), length_temp, 
-                 code.error_goto(self.pos))) 
-            code.putln('}') 
- 
-            for item in unpacked_fixed_items_right[::-1]: 
-                item.allocate(code) 
-            for i, (item, coerced_arg) in enumerate(zip(unpacked_fixed_items_right[::-1], 
-                                                        self.coerced_unpacked_items[::-1])): 
-                code.putln('#if CYTHON_COMPILING_IN_CPYTHON') 
-                code.putln("%s = PyList_GET_ITEM(%s, %s-%d); " % ( 
-                    item.py_result(), target_list, length_temp, i+1)) 
-                # resize the list the hard way 
-                code.putln("((PyVarObject*)%s)->ob_size--;" % target_list) 
-                code.putln('#else') 
-                code.putln("%s = PySequence_ITEM(%s, %s-%d); " % ( 
-                    item.py_result(), target_list, length_temp, i+1)) 
-                code.putln('#endif') 
-                code.put_gotref(item.py_result()) 
-                coerced_arg.generate_evaluation_code(code) 
- 
-            code.putln('#if !CYTHON_COMPILING_IN_CPYTHON') 
-            sublist_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-            code.putln('%s = PySequence_GetSlice(%s, 0, %s-%d); %s' % ( 
-                sublist_temp, target_list, length_temp, len(unpacked_fixed_items_right), 
-                code.error_goto_if_null(sublist_temp, self.pos))) 
-            code.put_gotref(sublist_temp) 
-            code.funcstate.release_temp(length_temp) 
-            code.put_decref(target_list, py_object_type) 
-            code.putln('%s = %s; %s = NULL;' % (target_list, sublist_temp, sublist_temp)) 
-            code.putln('#else') 
+        # unpack items from list/tuple in unrolled loop (can't fail)
+        if len(sequence_types) == 2:
+            code.putln("if (likely(Py%s_CheckExact(sequence))) {" % sequence_types[0])
+        for i, item in enumerate(self.unpacked_items):
+            code.putln("%s = Py%s_GET_ITEM(sequence, %d); " % (
+                item.result(), sequence_types[0], i))
+        if len(sequence_types) == 2:
+            code.putln("} else {")
+            for i, item in enumerate(self.unpacked_items):
+                code.putln("%s = Py%s_GET_ITEM(sequence, %d); " % (
+                    item.result(), sequence_types[1], i))
+            code.putln("}")
+        for item in self.unpacked_items:
+            code.put_incref(item.result(), item.ctype())
+
+        code.putln("#else")
+        # in non-CPython, use the PySequence protocol (which can fail)
+        if not use_loop:
+            for i, item in enumerate(self.unpacked_items):
+                code.putln("%s = PySequence_ITEM(sequence, %d); %s" % (
+                    item.result(), i,
+                    code.error_goto_if_null(item.result(), self.pos)))
+                code.put_gotref(item.result())
+        else:
+            code.putln("{")
+            code.putln("Py_ssize_t i;")
+            code.putln("PyObject** temps[%s] = {%s};" % (
+                len(self.unpacked_items),
+                ','.join(['&%s' % item.result() for item in self.unpacked_items])))
+            code.putln("for (i=0; i < %s; i++) {" % len(self.unpacked_items))
+            code.putln("PyObject* item = PySequence_ITEM(sequence, i); %s" % (
+                code.error_goto_if_null('item', self.pos)))
+            code.put_gotref('item')
+            code.putln("*(temps[i]) = item;")
+            code.putln("}")
+            code.putln("}")
+
+        code.putln("#endif")
+        rhs.generate_disposal_code(code)
+
+        if sequence_type_test == '1':
+            code.putln("}")  # all done
+        elif sequence_type_test == none_check:
+            # either tuple/list or None => save some code by generating the error directly
+            code.putln("} else {")
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseNoneIterError", "ObjectHandling.c"))
+            code.putln("__Pyx_RaiseNoneNotIterableError(); %s" % code.error_goto(self.pos))
+            code.putln("}")  # all done
+        else:
+            code.putln("} else {")  # needs iteration fallback code
+            self.generate_generic_parallel_unpacking_code(
+                code, rhs, self.unpacked_items, use_loop=use_loop)
+            code.putln("}")
+
+    def generate_generic_parallel_unpacking_code(self, code, rhs, unpacked_items, use_loop, terminate=True):
+        code.globalstate.use_utility_code(raise_need_more_values_to_unpack)
+        code.globalstate.use_utility_code(UtilityCode.load_cached("IterFinish", "ObjectHandling.c"))
+        code.putln("Py_ssize_t index = -1;") # must be at the start of a C block!
+
+        if use_loop:
+            code.putln("PyObject** temps[%s] = {%s};" % (
+                len(self.unpacked_items),
+                ','.join(['&%s' % item.result() for item in unpacked_items])))
+
+        iterator_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        code.putln(
+            "%s = PyObject_GetIter(%s); %s" % (
+                iterator_temp,
+                rhs.py_result(),
+                code.error_goto_if_null(iterator_temp, self.pos)))
+        code.put_gotref(iterator_temp)
+        rhs.generate_disposal_code(code)
+
+        iternext_func = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False)
+        code.putln("%s = Py_TYPE(%s)->tp_iternext;" % (
+            iternext_func, iterator_temp))
+
+        unpacking_error_label = code.new_label('unpacking_failed')
+        unpack_code = "%s(%s)" % (iternext_func, iterator_temp)
+        if use_loop:
+            code.putln("for (index=0; index < %s; index++) {" % len(unpacked_items))
+            code.put("PyObject* item = %s; if (unlikely(!item)) " % unpack_code)
+            code.put_goto(unpacking_error_label)
+            code.put_gotref("item")
+            code.putln("*(temps[index]) = item;")
+            code.putln("}")
+        else:
+            for i, item in enumerate(unpacked_items):
+                code.put(
+                    "index = %d; %s = %s; if (unlikely(!%s)) " % (
+                        i,
+                        item.result(),
+                        unpack_code,
+                        item.result()))
+                code.put_goto(unpacking_error_label)
+                code.put_gotref(item.py_result())
+
+        if terminate:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("UnpackItemEndCheck", "ObjectHandling.c"))
+            code.put_error_if_neg(self.pos, "__Pyx_IternextUnpackEndCheck(%s, %d)" % (
+                unpack_code,
+                len(unpacked_items)))
+            code.putln("%s = NULL;" % iternext_func)
+            code.put_decref_clear(iterator_temp, py_object_type)
+
+        unpacking_done_label = code.new_label('unpacking_done')
+        code.put_goto(unpacking_done_label)
+
+        code.put_label(unpacking_error_label)
+        code.put_decref_clear(iterator_temp, py_object_type)
+        code.putln("%s = NULL;" % iternext_func)
+        code.putln("if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);")
+        code.putln(code.error_goto(self.pos))
+        code.put_label(unpacking_done_label)
+
+        code.funcstate.release_temp(iternext_func)
+        if terminate:
+            code.funcstate.release_temp(iterator_temp)
+            iterator_temp = None
+
+        return iterator_temp
+
+    def generate_starred_assignment_code(self, rhs, code):
+        for i, arg in enumerate(self.args):
+            if arg.is_starred:
+                starred_target = self.unpacked_items[i]
+                unpacked_fixed_items_left  = self.unpacked_items[:i]
+                unpacked_fixed_items_right = self.unpacked_items[i+1:]
+                break
+        else:
+            assert False
+
+        iterator_temp = None
+        if unpacked_fixed_items_left:
+            for item in unpacked_fixed_items_left:
+                item.allocate(code)
+            code.putln('{')
+            iterator_temp = self.generate_generic_parallel_unpacking_code(
+                code, rhs, unpacked_fixed_items_left,
+                use_loop=True, terminate=False)
+            for i, item in enumerate(unpacked_fixed_items_left):
+                value_node = self.coerced_unpacked_items[i]
+                value_node.generate_evaluation_code(code)
+            code.putln('}')
+
+        starred_target.allocate(code)
+        target_list = starred_target.result()
+        code.putln("%s = PySequence_List(%s); %s" % (
+            target_list,
+            iterator_temp or rhs.py_result(),
+            code.error_goto_if_null(target_list, self.pos)))
+        code.put_gotref(target_list)
+
+        if iterator_temp:
+            code.put_decref_clear(iterator_temp, py_object_type)
+            code.funcstate.release_temp(iterator_temp)
+        else:
+            rhs.generate_disposal_code(code)
+
+        if unpacked_fixed_items_right:
+            code.globalstate.use_utility_code(raise_need_more_values_to_unpack)
+            length_temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+            code.putln('%s = PyList_GET_SIZE(%s);' % (length_temp, target_list))
+            code.putln("if (unlikely(%s < %d)) {" % (length_temp, len(unpacked_fixed_items_right)))
+            code.putln("__Pyx_RaiseNeedMoreValuesError(%d+%s); %s" % (
+                 len(unpacked_fixed_items_left), length_temp,
+                 code.error_goto(self.pos)))
+            code.putln('}')
+
+            for item in unpacked_fixed_items_right[::-1]:
+                item.allocate(code)
+            for i, (item, coerced_arg) in enumerate(zip(unpacked_fixed_items_right[::-1],
+                                                        self.coerced_unpacked_items[::-1])):
+                code.putln('#if CYTHON_COMPILING_IN_CPYTHON')
+                code.putln("%s = PyList_GET_ITEM(%s, %s-%d); " % (
+                    item.py_result(), target_list, length_temp, i+1))
+                # resize the list the hard way
+                code.putln("((PyVarObject*)%s)->ob_size--;" % target_list)
+                code.putln('#else')
+                code.putln("%s = PySequence_ITEM(%s, %s-%d); " % (
+                    item.py_result(), target_list, length_temp, i+1))
+                code.putln('#endif')
+                code.put_gotref(item.py_result())
+                coerced_arg.generate_evaluation_code(code)
+
+            code.putln('#if !CYTHON_COMPILING_IN_CPYTHON')
+            sublist_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            code.putln('%s = PySequence_GetSlice(%s, 0, %s-%d); %s' % (
+                sublist_temp, target_list, length_temp, len(unpacked_fixed_items_right),
+                code.error_goto_if_null(sublist_temp, self.pos)))
+            code.put_gotref(sublist_temp)
+            code.funcstate.release_temp(length_temp)
+            code.put_decref(target_list, py_object_type)
+            code.putln('%s = %s; %s = NULL;' % (target_list, sublist_temp, sublist_temp))
+            code.putln('#else')
             code.putln('(void)%s;' % sublist_temp)  # avoid warning about unused variable
-            code.funcstate.release_temp(sublist_temp) 
-            code.putln('#endif') 
- 
-        for i, arg in enumerate(self.args): 
-            arg.generate_assignment_code(self.coerced_unpacked_items[i], code) 
- 
-    def annotate(self, code): 
-        for arg in self.args: 
-            arg.annotate(code) 
-        if self.unpacked_items: 
-            for arg in self.unpacked_items: 
-                arg.annotate(code) 
-            for arg in self.coerced_unpacked_items: 
-                arg.annotate(code) 
- 
- 
-class TupleNode(SequenceNode): 
-    #  Tuple constructor. 
- 
-    type = tuple_type 
-    is_partly_literal = False 
- 
-    gil_message = "Constructing Python tuple" 
- 
+            code.funcstate.release_temp(sublist_temp)
+            code.putln('#endif')
+
+        for i, arg in enumerate(self.args):
+            arg.generate_assignment_code(self.coerced_unpacked_items[i], code)
+
+    def annotate(self, code):
+        for arg in self.args:
+            arg.annotate(code)
+        if self.unpacked_items:
+            for arg in self.unpacked_items:
+                arg.annotate(code)
+            for arg in self.coerced_unpacked_items:
+                arg.annotate(code)
+
+
+class TupleNode(SequenceNode):
+    #  Tuple constructor.
+
+    type = tuple_type
+    is_partly_literal = False
+
+    gil_message = "Constructing Python tuple"
+
     def infer_type(self, env):
         if self.mult_factor or not self.args:
             return tuple_type
@@ -7917,8 +7917,8 @@ class TupleNode(SequenceNode):
             return tuple_type
         return env.declare_tuple_type(self.pos, arg_types).type
 
-    def analyse_types(self, env, skip_children=False): 
-        if len(self.args) == 0: 
+    def analyse_types(self, env, skip_children=False):
+        if len(self.args) == 0:
             self.is_temp = False
             self.is_literal = True
             return self
@@ -7945,15 +7945,15 @@ class TupleNode(SequenceNode):
         if not node.mult_factor or (
                 node.mult_factor.is_literal and
                 isinstance(node.mult_factor.constant_result, _py_int_types)):
-            node.is_temp = False 
-            node.is_literal = True 
-        else: 
+            node.is_temp = False
+            node.is_literal = True
+        else:
             if not node.mult_factor.type.is_pyobject:
                 node.mult_factor = node.mult_factor.coerce_to_pyobject(env)
             node.is_temp = True
             node.is_partly_literal = True
-        return node 
- 
+        return node
+
     def analyse_as_type(self, env):
         # ctuple type
         if not self.args:
@@ -7984,35 +7984,35 @@ class TupleNode(SequenceNode):
             t.constant_result = list(self.constant_result)
         return t
 
-    def is_simple(self): 
-        # either temp or constant => always simple 
-        return True 
- 
-    def nonlocally_immutable(self): 
-        # either temp or constant => always safe 
-        return True 
- 
-    def calculate_result_code(self): 
-        if len(self.args) > 0: 
-            return self.result_code 
-        else: 
-            return Naming.empty_tuple 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = tuple([ 
-                arg.constant_result for arg in self.args]) 
- 
-    def compile_time_value(self, denv): 
-        values = self.compile_time_value_list(denv) 
-        try: 
-            return tuple(values) 
+    def is_simple(self):
+        # either temp or constant => always simple
+        return True
+
+    def nonlocally_immutable(self):
+        # either temp or constant => always safe
+        return True
+
+    def calculate_result_code(self):
+        if len(self.args) > 0:
+            return self.result_code
+        else:
+            return Naming.empty_tuple
+
+    def calculate_constant_result(self):
+        self.constant_result = tuple([
+                arg.constant_result for arg in self.args])
+
+    def compile_time_value(self, denv):
+        values = self.compile_time_value_list(denv)
+        try:
+            return tuple(values)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def generate_operation_code(self, code): 
-        if len(self.args) == 0: 
-            # result_code is Naming.empty_tuple 
-            return 
+            self.compile_time_value_error(e)
+
+    def generate_operation_code(self, code):
+        if len(self.args) == 0:
+            # result_code is Naming.empty_tuple
+            return
 
         if self.is_literal or self.is_partly_literal:
             # The "mult_factor" is part of the deduplication if it is also constant, i.e. when
@@ -8031,56 +8031,56 @@ class TupleNode(SequenceNode):
                 code.putln('%s = PyNumber_Multiply(%s, %s); %s' % (
                     self.result(), tuple_target, self.mult_factor.py_result(),
                     code.error_goto_if_null(self.result(), self.pos)
-                )) 
+                ))
                 code.put_gotref(self.py_result())
-        else: 
+        else:
             self.type.entry.used = True
-            self.generate_sequence_packing_code(code) 
- 
- 
-class ListNode(SequenceNode): 
-    #  List constructor. 
- 
-    # obj_conversion_errors    [PyrexError]   used internally 
-    # orignial_args            [ExprNode]     used internally 
- 
-    obj_conversion_errors = [] 
-    type = list_type 
-    in_module_scope = False 
- 
-    gil_message = "Constructing Python list" 
- 
-    def type_dependencies(self, env): 
-        return () 
- 
-    def infer_type(self, env): 
+            self.generate_sequence_packing_code(code)
+
+
+class ListNode(SequenceNode):
+    #  List constructor.
+
+    # obj_conversion_errors    [PyrexError]   used internally
+    # orignial_args            [ExprNode]     used internally
+
+    obj_conversion_errors = []
+    type = list_type
+    in_module_scope = False
+
+    gil_message = "Constructing Python list"
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
         # TODO: Infer non-object list arrays.
-        return list_type 
- 
-    def analyse_expressions(self, env): 
+        return list_type
+
+    def analyse_expressions(self, env):
         for arg in self.args:
             if arg.is_starred:
                 arg.starred_expr_allowed_here = True
-        node = SequenceNode.analyse_expressions(self, env) 
-        return node.coerce_to_pyobject(env) 
- 
-    def analyse_types(self, env): 
+        node = SequenceNode.analyse_expressions(self, env)
+        return node.coerce_to_pyobject(env)
+
+    def analyse_types(self, env):
         with local_errors(ignore=True) as errors:
             self.original_args = list(self.args)
             node = SequenceNode.analyse_types(self, env)
         node.obj_conversion_errors = errors
-        if env.is_module_scope: 
-            self.in_module_scope = True 
+        if env.is_module_scope:
+            self.in_module_scope = True
         node = node._create_merge_node_if_necessary(env)
-        return node 
- 
-    def coerce_to(self, dst_type, env): 
-        if dst_type.is_pyobject: 
-            for err in self.obj_conversion_errors: 
-                report_error(err) 
-            self.obj_conversion_errors = [] 
-            if not self.type.subtype_of(dst_type): 
-                error(self.pos, "Cannot coerce list to type '%s'" % dst_type) 
+        return node
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_pyobject:
+            for err in self.obj_conversion_errors:
+                report_error(err)
+            self.obj_conversion_errors = []
+            if not self.type.subtype_of(dst_type):
+                error(self.pos, "Cannot coerce list to type '%s'" % dst_type)
         elif (dst_type.is_array or dst_type.is_ptr) and dst_type.base_type is not PyrexTypes.c_void_type:
             array_length = len(self.args)
             if self.mult_factor:
@@ -8091,46 +8091,46 @@ class ListNode(SequenceNode):
                         array_length *= self.mult_factor.constant_result
                 else:
                     error(self.pos, "Cannot coerce dynamically multiplied list to '%s'" % dst_type)
-            base_type = dst_type.base_type 
+            base_type = dst_type.base_type
             self.type = PyrexTypes.CArrayType(base_type, array_length)
-            for i in range(len(self.original_args)): 
-                arg = self.args[i] 
-                if isinstance(arg, CoerceToPyTypeNode): 
-                    arg = arg.arg 
-                self.args[i] = arg.coerce_to(base_type, env) 
+            for i in range(len(self.original_args)):
+                arg = self.args[i]
+                if isinstance(arg, CoerceToPyTypeNode):
+                    arg = arg.arg
+                self.args[i] = arg.coerce_to(base_type, env)
         elif dst_type.is_cpp_class:
             # TODO(robertwb): Avoid object conversion for vector/list/set.
             return TypecastNode(self.pos, operand=self, type=PyrexTypes.py_object_type).coerce_to(dst_type, env)
         elif self.mult_factor:
             error(self.pos, "Cannot coerce multiplied list to '%s'" % dst_type)
-        elif dst_type.is_struct: 
-            if len(self.args) > len(dst_type.scope.var_entries): 
+        elif dst_type.is_struct:
+            if len(self.args) > len(dst_type.scope.var_entries):
                 error(self.pos, "Too many members for '%s'" % dst_type)
-            else: 
-                if len(self.args) < len(dst_type.scope.var_entries): 
-                    warning(self.pos, "Too few members for '%s'" % dst_type, 1) 
-                for i, (arg, member) in enumerate(zip(self.original_args, dst_type.scope.var_entries)): 
-                    if isinstance(arg, CoerceToPyTypeNode): 
-                        arg = arg.arg 
-                    self.args[i] = arg.coerce_to(member.type, env) 
-            self.type = dst_type 
+            else:
+                if len(self.args) < len(dst_type.scope.var_entries):
+                    warning(self.pos, "Too few members for '%s'" % dst_type, 1)
+                for i, (arg, member) in enumerate(zip(self.original_args, dst_type.scope.var_entries)):
+                    if isinstance(arg, CoerceToPyTypeNode):
+                        arg = arg.arg
+                    self.args[i] = arg.coerce_to(member.type, env)
+            self.type = dst_type
         elif dst_type.is_ctuple:
             return self.coerce_to_ctuple(dst_type, env)
-        else: 
-            self.type = error_type 
-            error(self.pos, "Cannot coerce list to type '%s'" % dst_type) 
-        return self 
- 
+        else:
+            self.type = error_type
+            error(self.pos, "Cannot coerce list to type '%s'" % dst_type)
+        return self
+
     def as_list(self):  # dummy for compatibility with TupleNode
         return self
 
-    def as_tuple(self): 
-        t = TupleNode(self.pos, args=self.args, mult_factor=self.mult_factor) 
-        if isinstance(self.constant_result, list): 
-            t.constant_result = tuple(self.constant_result) 
-        return t 
- 
-    def allocate_temp_result(self, code): 
+    def as_tuple(self):
+        t = TupleNode(self.pos, args=self.args, mult_factor=self.mult_factor)
+        if isinstance(self.constant_result, list):
+            t.constant_result = tuple(self.constant_result)
+        return t
+
+    def allocate_temp_result(self, code):
         if self.type.is_array:
             if self.in_module_scope:
                 self.temp_code = code.funcstate.allocate_temp(
@@ -8141,27 +8141,27 @@ class ListNode(SequenceNode):
                 # Yes, this means that we leak a temp array variable.
                 self.temp_code = code.funcstate.allocate_temp(
                     self.type, manage_ref=False, reusable=False)
-        else: 
-            SequenceNode.allocate_temp_result(self, code) 
- 
-    def calculate_constant_result(self): 
-        if self.mult_factor: 
+        else:
+            SequenceNode.allocate_temp_result(self, code)
+
+    def calculate_constant_result(self):
+        if self.mult_factor:
             raise ValueError()  # may exceed the compile time memory
-        self.constant_result = [ 
-            arg.constant_result for arg in self.args] 
- 
-    def compile_time_value(self, denv): 
-        l = self.compile_time_value_list(denv) 
-        if self.mult_factor: 
-            l *= self.mult_factor.compile_time_value(denv) 
-        return l 
- 
-    def generate_operation_code(self, code): 
-        if self.type.is_pyobject: 
-            for err in self.obj_conversion_errors: 
-                report_error(err) 
-            self.generate_sequence_packing_code(code) 
-        elif self.type.is_array: 
+        self.constant_result = [
+            arg.constant_result for arg in self.args]
+
+    def compile_time_value(self, denv):
+        l = self.compile_time_value_list(denv)
+        if self.mult_factor:
+            l *= self.mult_factor.compile_time_value(denv)
+        return l
+
+    def generate_operation_code(self, code):
+        if self.type.is_pyobject:
+            for err in self.obj_conversion_errors:
+                report_error(err)
+            self.generate_sequence_packing_code(code)
+        elif self.type.is_array:
             if self.mult_factor:
                 code.putln("{")
                 code.putln("Py_ssize_t %s;" % Naming.quick_temp_cname)
@@ -8170,7 +8170,7 @@ class ListNode(SequenceNode):
                 offset = '+ (%d * %s)' % (len(self.args), Naming.quick_temp_cname)
             else:
                 offset = ''
-            for i, arg in enumerate(self.args): 
+            for i, arg in enumerate(self.args):
                 if arg.type.is_array:
                     code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
                     code.putln("memcpy(&(%s[%s%s]), %s, sizeof(%s[0]));" % (
@@ -8186,98 +8186,98 @@ class ListNode(SequenceNode):
             if self.mult_factor:
                 code.putln("}")
                 code.putln("}")
-        elif self.type.is_struct: 
-            for arg, member in zip(self.args, self.type.scope.var_entries): 
-                code.putln("%s.%s = %s;" % ( 
+        elif self.type.is_struct:
+            for arg, member in zip(self.args, self.type.scope.var_entries):
+                code.putln("%s.%s = %s;" % (
                     self.result(),
                     member.cname,
                     arg.result()))
-        else: 
-            raise InternalError("List type never specified") 
- 
- 
-class ScopedExprNode(ExprNode): 
-    # Abstract base class for ExprNodes that have their own local 
-    # scope, such as generator expressions. 
-    # 
-    # expr_scope    Scope  the inner scope of the expression 
- 
-    subexprs = [] 
-    expr_scope = None 
- 
-    # does this node really have a local scope, e.g. does it leak loop 
-    # variables or not?  non-leaking Py3 behaviour is default, except 
-    # for list comprehensions where the behaviour differs in Py2 and 
-    # Py3 (set in Parsing.py based on parser context) 
-    has_local_scope = True 
- 
-    def init_scope(self, outer_scope, expr_scope=None): 
-        if expr_scope is not None: 
-            self.expr_scope = expr_scope 
-        elif self.has_local_scope: 
-            self.expr_scope = Symtab.GeneratorExpressionScope(outer_scope) 
-        else: 
-            self.expr_scope = None 
- 
-    def analyse_declarations(self, env): 
-        self.init_scope(env) 
- 
-    def analyse_scoped_declarations(self, env): 
-        # this is called with the expr_scope as env 
-        pass 
- 
-    def analyse_types(self, env): 
-        # no recursion here, the children will be analysed separately below 
-        return self 
- 
-    def analyse_scoped_expressions(self, env): 
-        # this is called with the expr_scope as env 
-        return self 
- 
-    def generate_evaluation_code(self, code): 
-        # set up local variables and free their references on exit 
-        generate_inner_evaluation_code = super(ScopedExprNode, self).generate_evaluation_code 
-        if not self.has_local_scope or not self.expr_scope.var_entries: 
-            # no local variables => delegate, done 
-            generate_inner_evaluation_code(code) 
-            return 
- 
-        code.putln('{ /* enter inner scope */') 
-        py_entries = [] 
+        else:
+            raise InternalError("List type never specified")
+
+
+class ScopedExprNode(ExprNode):
+    # Abstract base class for ExprNodes that have their own local
+    # scope, such as generator expressions.
+    #
+    # expr_scope    Scope  the inner scope of the expression
+
+    subexprs = []
+    expr_scope = None
+
+    # does this node really have a local scope, e.g. does it leak loop
+    # variables or not?  non-leaking Py3 behaviour is default, except
+    # for list comprehensions where the behaviour differs in Py2 and
+    # Py3 (set in Parsing.py based on parser context)
+    has_local_scope = True
+
+    def init_scope(self, outer_scope, expr_scope=None):
+        if expr_scope is not None:
+            self.expr_scope = expr_scope
+        elif self.has_local_scope:
+            self.expr_scope = Symtab.GeneratorExpressionScope(outer_scope)
+        else:
+            self.expr_scope = None
+
+    def analyse_declarations(self, env):
+        self.init_scope(env)
+
+    def analyse_scoped_declarations(self, env):
+        # this is called with the expr_scope as env
+        pass
+
+    def analyse_types(self, env):
+        # no recursion here, the children will be analysed separately below
+        return self
+
+    def analyse_scoped_expressions(self, env):
+        # this is called with the expr_scope as env
+        return self
+
+    def generate_evaluation_code(self, code):
+        # set up local variables and free their references on exit
+        generate_inner_evaluation_code = super(ScopedExprNode, self).generate_evaluation_code
+        if not self.has_local_scope or not self.expr_scope.var_entries:
+            # no local variables => delegate, done
+            generate_inner_evaluation_code(code)
+            return
+
+        code.putln('{ /* enter inner scope */')
+        py_entries = []
         for _, entry in sorted(item for item in self.expr_scope.entries.items() if item[0]):
-            if not entry.in_closure: 
-                if entry.type.is_pyobject and entry.used: 
-                    py_entries.append(entry) 
-        if not py_entries: 
-            # no local Python references => no cleanup required 
-            generate_inner_evaluation_code(code) 
-            code.putln('} /* exit inner scope */') 
-            return 
- 
-        # must free all local Python references at each exit point 
+            if not entry.in_closure:
+                if entry.type.is_pyobject and entry.used:
+                    py_entries.append(entry)
+        if not py_entries:
+            # no local Python references => no cleanup required
+            generate_inner_evaluation_code(code)
+            code.putln('} /* exit inner scope */')
+            return
+
+        # must free all local Python references at each exit point
         old_loop_labels = code.new_loop_labels()
-        old_error_label = code.new_error_label() 
- 
-        generate_inner_evaluation_code(code) 
- 
-        # normal (non-error) exit 
+        old_error_label = code.new_error_label()
+
+        generate_inner_evaluation_code(code)
+
+        # normal (non-error) exit
         self._generate_vars_cleanup(code, py_entries)
- 
-        # error/loop body exit points 
-        exit_scope = code.new_label('exit_scope') 
-        code.put_goto(exit_scope) 
-        for label, old_label in ([(code.error_label, old_error_label)] + 
-                                 list(zip(code.get_loop_labels(), old_loop_labels))): 
-            if code.label_used(label): 
-                code.put_label(label) 
+
+        # error/loop body exit points
+        exit_scope = code.new_label('exit_scope')
+        code.put_goto(exit_scope)
+        for label, old_label in ([(code.error_label, old_error_label)] +
+                                 list(zip(code.get_loop_labels(), old_loop_labels))):
+            if code.label_used(label):
+                code.put_label(label)
                 self._generate_vars_cleanup(code, py_entries)
-                code.put_goto(old_label) 
-        code.put_label(exit_scope) 
-        code.putln('} /* exit inner scope */') 
- 
-        code.set_loop_labels(old_loop_labels) 
-        code.error_label = old_error_label 
- 
+                code.put_goto(old_label)
+        code.put_label(exit_scope)
+        code.putln('} /* exit inner scope */')
+
+        code.set_loop_labels(old_loop_labels)
+        code.error_label = old_error_label
+
     def _generate_vars_cleanup(self, code, py_entries):
         for entry in py_entries:
             if entry.is_cglobal:
@@ -8285,154 +8285,154 @@ class ScopedExprNode(ExprNode):
                 code.put_decref_set(entry.cname, "Py_None")
             else:
                 code.put_var_xdecref_clear(entry)
- 
-
-class ComprehensionNode(ScopedExprNode): 
-    # A list/set/dict comprehension 
- 
-    child_attrs = ["loop"] 
- 
-    is_temp = True 
+
+
+class ComprehensionNode(ScopedExprNode):
+    # A list/set/dict comprehension
+
+    child_attrs = ["loop"]
+
+    is_temp = True
     constant_result = not_a_constant
- 
-    def infer_type(self, env): 
-        return self.type 
- 
-    def analyse_declarations(self, env): 
-        self.append.target = self # this is used in the PyList_Append of the inner loop 
-        self.init_scope(env) 
- 
-    def analyse_scoped_declarations(self, env): 
-        self.loop.analyse_declarations(env) 
- 
-    def analyse_types(self, env): 
-        if not self.has_local_scope: 
-            self.loop = self.loop.analyse_expressions(env) 
-        return self 
- 
-    def analyse_scoped_expressions(self, env): 
-        if self.has_local_scope: 
-            self.loop = self.loop.analyse_expressions(env) 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def generate_result_code(self, code): 
-        self.generate_operation_code(code) 
- 
-    def generate_operation_code(self, code): 
-        if self.type is Builtin.list_type: 
-            create_code = 'PyList_New(0)' 
-        elif self.type is Builtin.set_type: 
-            create_code = 'PySet_New(NULL)' 
-        elif self.type is Builtin.dict_type: 
-            create_code = 'PyDict_New()' 
-        else: 
-            raise InternalError("illegal type for comprehension: %s" % self.type) 
-        code.putln('%s = %s; %s' % ( 
-            self.result(), create_code, 
-            code.error_goto_if_null(self.result(), self.pos))) 
- 
-        code.put_gotref(self.result()) 
-        self.loop.generate_execution_code(code) 
- 
-    def annotate(self, code): 
-        self.loop.annotate(code) 
- 
- 
-class ComprehensionAppendNode(Node): 
-    # Need to be careful to avoid infinite recursion: 
-    # target must not be in child_attrs/subexprs 
- 
-    child_attrs = ['expr'] 
-    target = None 
- 
-    type = PyrexTypes.c_int_type 
- 
-    def analyse_expressions(self, env): 
-        self.expr = self.expr.analyse_expressions(env) 
-        if not self.expr.type.is_pyobject: 
-            self.expr = self.expr.coerce_to_pyobject(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        if self.target.type is list_type: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("ListCompAppend", "Optimize.c")) 
-            function = "__Pyx_ListComp_Append" 
-        elif self.target.type is set_type: 
-            function = "PySet_Add" 
-        else: 
-            raise InternalError( 
-                "Invalid type for comprehension node: %s" % self.target.type) 
- 
-        self.expr.generate_evaluation_code(code) 
-        code.putln(code.error_goto_if("%s(%s, (PyObject*)%s)" % ( 
-            function, 
-            self.target.result(), 
-            self.expr.result() 
-            ), self.pos)) 
-        self.expr.generate_disposal_code(code) 
-        self.expr.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        self.expr.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.expr.annotate(code) 
- 
-class DictComprehensionAppendNode(ComprehensionAppendNode): 
-    child_attrs = ['key_expr', 'value_expr'] 
- 
-    def analyse_expressions(self, env): 
-        self.key_expr = self.key_expr.analyse_expressions(env) 
-        if not self.key_expr.type.is_pyobject: 
-            self.key_expr = self.key_expr.coerce_to_pyobject(env) 
-        self.value_expr = self.value_expr.analyse_expressions(env) 
-        if not self.value_expr.type.is_pyobject: 
-            self.value_expr = self.value_expr.coerce_to_pyobject(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        self.key_expr.generate_evaluation_code(code) 
-        self.value_expr.generate_evaluation_code(code) 
-        code.putln(code.error_goto_if("PyDict_SetItem(%s, (PyObject*)%s, (PyObject*)%s)" % ( 
-            self.target.result(), 
-            self.key_expr.result(), 
-            self.value_expr.result() 
-            ), self.pos)) 
-        self.key_expr.generate_disposal_code(code) 
-        self.key_expr.free_temps(code) 
-        self.value_expr.generate_disposal_code(code) 
-        self.value_expr.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        self.key_expr.generate_function_definitions(env, code) 
-        self.value_expr.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.key_expr.annotate(code) 
-        self.value_expr.annotate(code) 
- 
- 
+
+    def infer_type(self, env):
+        return self.type
+
+    def analyse_declarations(self, env):
+        self.append.target = self # this is used in the PyList_Append of the inner loop
+        self.init_scope(env)
+
+    def analyse_scoped_declarations(self, env):
+        self.loop.analyse_declarations(env)
+
+    def analyse_types(self, env):
+        if not self.has_local_scope:
+            self.loop = self.loop.analyse_expressions(env)
+        return self
+
+    def analyse_scoped_expressions(self, env):
+        if self.has_local_scope:
+            self.loop = self.loop.analyse_expressions(env)
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        self.generate_operation_code(code)
+
+    def generate_operation_code(self, code):
+        if self.type is Builtin.list_type:
+            create_code = 'PyList_New(0)'
+        elif self.type is Builtin.set_type:
+            create_code = 'PySet_New(NULL)'
+        elif self.type is Builtin.dict_type:
+            create_code = 'PyDict_New()'
+        else:
+            raise InternalError("illegal type for comprehension: %s" % self.type)
+        code.putln('%s = %s; %s' % (
+            self.result(), create_code,
+            code.error_goto_if_null(self.result(), self.pos)))
+
+        code.put_gotref(self.result())
+        self.loop.generate_execution_code(code)
+
+    def annotate(self, code):
+        self.loop.annotate(code)
+
+
+class ComprehensionAppendNode(Node):
+    # Need to be careful to avoid infinite recursion:
+    # target must not be in child_attrs/subexprs
+
+    child_attrs = ['expr']
+    target = None
+
+    type = PyrexTypes.c_int_type
+
+    def analyse_expressions(self, env):
+        self.expr = self.expr.analyse_expressions(env)
+        if not self.expr.type.is_pyobject:
+            self.expr = self.expr.coerce_to_pyobject(env)
+        return self
+
+    def generate_execution_code(self, code):
+        if self.target.type is list_type:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ListCompAppend", "Optimize.c"))
+            function = "__Pyx_ListComp_Append"
+        elif self.target.type is set_type:
+            function = "PySet_Add"
+        else:
+            raise InternalError(
+                "Invalid type for comprehension node: %s" % self.target.type)
+
+        self.expr.generate_evaluation_code(code)
+        code.putln(code.error_goto_if("%s(%s, (PyObject*)%s)" % (
+            function,
+            self.target.result(),
+            self.expr.result()
+            ), self.pos))
+        self.expr.generate_disposal_code(code)
+        self.expr.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.expr.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.expr.annotate(code)
+
+class DictComprehensionAppendNode(ComprehensionAppendNode):
+    child_attrs = ['key_expr', 'value_expr']
+
+    def analyse_expressions(self, env):
+        self.key_expr = self.key_expr.analyse_expressions(env)
+        if not self.key_expr.type.is_pyobject:
+            self.key_expr = self.key_expr.coerce_to_pyobject(env)
+        self.value_expr = self.value_expr.analyse_expressions(env)
+        if not self.value_expr.type.is_pyobject:
+            self.value_expr = self.value_expr.coerce_to_pyobject(env)
+        return self
+
+    def generate_execution_code(self, code):
+        self.key_expr.generate_evaluation_code(code)
+        self.value_expr.generate_evaluation_code(code)
+        code.putln(code.error_goto_if("PyDict_SetItem(%s, (PyObject*)%s, (PyObject*)%s)" % (
+            self.target.result(),
+            self.key_expr.result(),
+            self.value_expr.result()
+            ), self.pos))
+        self.key_expr.generate_disposal_code(code)
+        self.key_expr.free_temps(code)
+        self.value_expr.generate_disposal_code(code)
+        self.value_expr.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.key_expr.generate_function_definitions(env, code)
+        self.value_expr.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.key_expr.annotate(code)
+        self.value_expr.annotate(code)
+
+
 class InlinedGeneratorExpressionNode(ExprNode):
     # An inlined generator expression for which the result is calculated
     # inside of the loop and returned as a single, first and only Generator
     # return value.
     # This will only be created by transforms when replacing safe builtin
     # calls on generator expressions.
-    # 
+    #
     # gen            GeneratorExpressionNode      the generator, not containing any YieldExprNodes
     # orig_func      String                       the name of the builtin function this node replaces
     # target         ExprNode or None             a 'target' for a ComprehensionAppend node
- 
+
     subexprs = ["gen"]
     orig_func = None
     target = None
     is_temp = True
-    type = py_object_type 
- 
+    type = py_object_type
+
     def __init__(self, pos, gen, comprehension_type=None, **kwargs):
         gbody = gen.def_node.gbody
         gbody.is_inlined = True
@@ -8444,17 +8444,17 @@ class InlinedGeneratorExpressionNode(ExprNode):
                 type=comprehension_type,
             )
         super(InlinedGeneratorExpressionNode, self).__init__(pos, gen=gen, **kwargs)
- 
-    def may_be_none(self): 
+
+    def may_be_none(self):
         return self.orig_func not in ('any', 'all', 'sorted')
- 
-    def infer_type(self, env): 
+
+    def infer_type(self, env):
         return self.type
- 
-    def analyse_types(self, env): 
+
+    def analyse_types(self, env):
         self.gen = self.gen.analyse_expressions(env)
-        return self 
- 
+        return self
+
     def generate_result_code(self, code):
         code.putln("%s = __Pyx_Generator_Next(%s); %s" % (
             self.result(), self.gen.result(),
@@ -8546,17 +8546,17 @@ class MergedSequenceNode(ExprNode):
         assert self.type in (set_type, list_type, tuple_type)
 
         self.args = args
-        return self 
- 
+        return self
+
     def may_be_none(self):
         return False
- 
+
     def generate_evaluation_code(self, code):
         code.mark_pos(self.pos)
         self.allocate_temp_result(code)
- 
+
         is_set = self.type is set_type
- 
+
         args = iter(self.args)
         item = next(args)
         item.generate_evaluation_code(code)
@@ -8573,7 +8573,7 @@ class MergedSequenceNode(ExprNode):
             code.put_gotref(self.py_result())
             item.generate_disposal_code(code)
         item.free_temps(code)
- 
+
         helpers = set()
         if is_set:
             add_func = "PySet_Add"
@@ -8581,7 +8581,7 @@ class MergedSequenceNode(ExprNode):
         else:
             add_func = "__Pyx_ListComp_Append"
             extend_func = "__Pyx_PyList_Extend"
- 
+
         for item in args:
             if (is_set and (item.is_set_literal or item.is_sequence_constructor) or
                     (item.is_sequence_constructor and not item.mult_factor)):
@@ -8635,106 +8635,106 @@ class SetNode(ExprNode):
     """
     Set constructor.
     """
-    subexprs = ['args'] 
+    subexprs = ['args']
     type = set_type
     is_set_literal = True
-    gil_message = "Constructing Python set" 
- 
-    def analyse_types(self, env): 
-        for i in range(len(self.args)): 
-            arg = self.args[i] 
-            arg = arg.analyse_types(env) 
-            self.args[i] = arg.coerce_to_pyobject(env) 
-        self.type = set_type 
-        self.is_temp = 1 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = set([arg.constant_result for arg in self.args]) 
- 
-    def compile_time_value(self, denv): 
-        values = [arg.compile_time_value(denv) for arg in self.args] 
-        try: 
-            return set(values) 
+    gil_message = "Constructing Python set"
+
+    def analyse_types(self, env):
+        for i in range(len(self.args)):
+            arg = self.args[i]
+            arg = arg.analyse_types(env)
+            self.args[i] = arg.coerce_to_pyobject(env)
+        self.type = set_type
+        self.is_temp = 1
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def calculate_constant_result(self):
+        self.constant_result = set([arg.constant_result for arg in self.args])
+
+    def compile_time_value(self, denv):
+        values = [arg.compile_time_value(denv) for arg in self.args]
+        try:
+            return set(values)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def generate_evaluation_code(self, code): 
-        for arg in self.args: 
-            arg.generate_evaluation_code(code) 
-        self.allocate_temp_result(code) 
-        code.putln( 
-            "%s = PySet_New(0); %s" % ( 
-                self.result(), 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
-        for arg in self.args: 
-            code.put_error_if_neg( 
-                self.pos, 
-                "PySet_Add(%s, %s)" % (self.result(), arg.py_result())) 
-            arg.generate_disposal_code(code) 
-            arg.free_temps(code) 
- 
- 
-class DictNode(ExprNode): 
-    #  Dictionary constructor. 
-    # 
-    #  key_value_pairs     [DictItemNode] 
-    #  exclude_null_values [boolean]          Do not add NULL values to dict 
-    # 
-    # obj_conversion_errors    [PyrexError]   used internally 
- 
-    subexprs = ['key_value_pairs'] 
-    is_temp = 1 
-    exclude_null_values = False 
-    type = dict_type 
-    is_dict_literal = True 
+            self.compile_time_value_error(e)
+
+    def generate_evaluation_code(self, code):
+        for arg in self.args:
+            arg.generate_evaluation_code(code)
+        self.allocate_temp_result(code)
+        code.putln(
+            "%s = PySet_New(0); %s" % (
+                self.result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+        for arg in self.args:
+            code.put_error_if_neg(
+                self.pos,
+                "PySet_Add(%s, %s)" % (self.result(), arg.py_result()))
+            arg.generate_disposal_code(code)
+            arg.free_temps(code)
+
+
+class DictNode(ExprNode):
+    #  Dictionary constructor.
+    #
+    #  key_value_pairs     [DictItemNode]
+    #  exclude_null_values [boolean]          Do not add NULL values to dict
+    #
+    # obj_conversion_errors    [PyrexError]   used internally
+
+    subexprs = ['key_value_pairs']
+    is_temp = 1
+    exclude_null_values = False
+    type = dict_type
+    is_dict_literal = True
     reject_duplicates = False
- 
-    obj_conversion_errors = [] 
- 
-    @classmethod 
-    def from_pairs(cls, pos, pairs): 
-        return cls(pos, key_value_pairs=[ 
-                DictItemNode(pos, key=k, value=v) for k, v in pairs]) 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = dict([ 
-                item.constant_result for item in self.key_value_pairs]) 
- 
-    def compile_time_value(self, denv): 
-        pairs = [(item.key.compile_time_value(denv), item.value.compile_time_value(denv)) 
-            for item in self.key_value_pairs] 
-        try: 
-            return dict(pairs) 
+
+    obj_conversion_errors = []
+
+    @classmethod
+    def from_pairs(cls, pos, pairs):
+        return cls(pos, key_value_pairs=[
+                DictItemNode(pos, key=k, value=v) for k, v in pairs])
+
+    def calculate_constant_result(self):
+        self.constant_result = dict([
+                item.constant_result for item in self.key_value_pairs])
+
+    def compile_time_value(self, denv):
+        pairs = [(item.key.compile_time_value(denv), item.value.compile_time_value(denv))
+            for item in self.key_value_pairs]
+        try:
+            return dict(pairs)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def type_dependencies(self, env): 
-        return () 
- 
-    def infer_type(self, env): 
+            self.compile_time_value_error(e)
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
         # TODO: Infer struct constructors.
-        return dict_type 
- 
-    def analyse_types(self, env): 
+        return dict_type
+
+    def analyse_types(self, env):
         with local_errors(ignore=True) as errors:
             self.key_value_pairs = [
                 item.analyse_types(env)
                 for item in self.key_value_pairs
             ]
         self.obj_conversion_errors = errors
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def coerce_to(self, dst_type, env): 
-        if dst_type.is_pyobject: 
-            self.release_errors() 
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_pyobject:
+            self.release_errors()
             if self.type.is_struct_or_union:
                 if not dict_type.subtype_of(dst_type):
                     error(self.pos, "Cannot interpret struct as non-dict type '%s'" % dst_type)
@@ -8742,67 +8742,67 @@ class DictNode(ExprNode):
                     DictItemNode(item.pos, key=item.key.coerce_to_pyobject(env),
                                  value=item.value.coerce_to_pyobject(env))
                     for item in self.key_value_pairs])
-            if not self.type.subtype_of(dst_type): 
-                error(self.pos, "Cannot interpret dict as type '%s'" % dst_type) 
-        elif dst_type.is_struct_or_union: 
-            self.type = dst_type 
-            if not dst_type.is_struct and len(self.key_value_pairs) != 1: 
-                error(self.pos, "Exactly one field must be specified to convert to union '%s'" % dst_type) 
-            elif dst_type.is_struct and len(self.key_value_pairs) < len(dst_type.scope.var_entries): 
-                warning(self.pos, "Not all members given for struct '%s'" % dst_type, 1) 
-            for item in self.key_value_pairs: 
-                if isinstance(item.key, CoerceToPyTypeNode): 
-                    item.key = item.key.arg 
-                if not item.key.is_string_literal: 
-                    error(item.key.pos, "Invalid struct field identifier") 
-                    item.key = StringNode(item.key.pos, value="<error>") 
-                else: 
-                    key = str(item.key.value) # converts string literals to unicode in Py3 
-                    member = dst_type.scope.lookup_here(key) 
-                    if not member: 
-                        error(item.key.pos, "struct '%s' has no field '%s'" % (dst_type, key)) 
-                    else: 
-                        value = item.value 
-                        if isinstance(value, CoerceToPyTypeNode): 
-                            value = value.arg 
-                        item.value = value.coerce_to(member.type, env) 
-        else: 
-            self.type = error_type 
-            error(self.pos, "Cannot interpret dict as type '%s'" % dst_type) 
-        return self 
- 
-    def release_errors(self): 
-        for err in self.obj_conversion_errors: 
-            report_error(err) 
-        self.obj_conversion_errors = [] 
- 
-    gil_message = "Constructing Python dict" 
- 
-    def generate_evaluation_code(self, code): 
-        #  Custom method used here because key-value 
-        #  pairs are evaluated and used one at a time. 
-        code.mark_pos(self.pos) 
-        self.allocate_temp_result(code) 
+            if not self.type.subtype_of(dst_type):
+                error(self.pos, "Cannot interpret dict as type '%s'" % dst_type)
+        elif dst_type.is_struct_or_union:
+            self.type = dst_type
+            if not dst_type.is_struct and len(self.key_value_pairs) != 1:
+                error(self.pos, "Exactly one field must be specified to convert to union '%s'" % dst_type)
+            elif dst_type.is_struct and len(self.key_value_pairs) < len(dst_type.scope.var_entries):
+                warning(self.pos, "Not all members given for struct '%s'" % dst_type, 1)
+            for item in self.key_value_pairs:
+                if isinstance(item.key, CoerceToPyTypeNode):
+                    item.key = item.key.arg
+                if not item.key.is_string_literal:
+                    error(item.key.pos, "Invalid struct field identifier")
+                    item.key = StringNode(item.key.pos, value="<error>")
+                else:
+                    key = str(item.key.value) # converts string literals to unicode in Py3
+                    member = dst_type.scope.lookup_here(key)
+                    if not member:
+                        error(item.key.pos, "struct '%s' has no field '%s'" % (dst_type, key))
+                    else:
+                        value = item.value
+                        if isinstance(value, CoerceToPyTypeNode):
+                            value = value.arg
+                        item.value = value.coerce_to(member.type, env)
+        else:
+            self.type = error_type
+            error(self.pos, "Cannot interpret dict as type '%s'" % dst_type)
+        return self
+
+    def release_errors(self):
+        for err in self.obj_conversion_errors:
+            report_error(err)
+        self.obj_conversion_errors = []
+
+    gil_message = "Constructing Python dict"
+
+    def generate_evaluation_code(self, code):
+        #  Custom method used here because key-value
+        #  pairs are evaluated and used one at a time.
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
 
         is_dict = self.type.is_pyobject
         if is_dict:
-            self.release_errors() 
-            code.putln( 
+            self.release_errors()
+            code.putln(
                 "%s = __Pyx_PyDict_NewPresized(%d); %s" % (
-                    self.result(), 
+                    self.result(),
                     len(self.key_value_pairs),
-                    code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
+                    code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
 
         keys_seen = set()
         key_type = None
         needs_error_helper = False
 
-        for item in self.key_value_pairs: 
-            item.generate_evaluation_code(code) 
+        for item in self.key_value_pairs:
+            item.generate_evaluation_code(code)
             if is_dict:
-                if self.exclude_null_values: 
-                    code.putln('if (%s) {' % item.value.py_result()) 
+                if self.exclude_null_values:
+                    code.putln('if (%s) {' % item.value.py_result())
                 key = item.key
                 if self.reject_duplicates:
                     if keys_seen is not None:
@@ -8838,310 +8838,310 @@ class DictNode(ExprNode):
                     item.value.py_result()))
                 if self.reject_duplicates and keys_seen is None:
                     code.putln('}')
-                if self.exclude_null_values: 
-                    code.putln('}') 
-            else: 
-                code.putln("%s.%s = %s;" % ( 
-                        self.result(), 
-                        item.key.value, 
-                        item.value.result())) 
-            item.generate_disposal_code(code) 
-            item.free_temps(code) 
- 
+                if self.exclude_null_values:
+                    code.putln('}')
+            else:
+                code.putln("%s.%s = %s;" % (
+                        self.result(),
+                        item.key.value,
+                        item.value.result()))
+            item.generate_disposal_code(code)
+            item.free_temps(code)
+
         if needs_error_helper:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("RaiseDoubleKeywords", "FunctionArguments.c"))
 
-    def annotate(self, code): 
-        for item in self.key_value_pairs: 
-            item.annotate(code) 
- 
-
-class DictItemNode(ExprNode): 
-    # Represents a single item in a DictNode 
-    # 
-    # key          ExprNode 
-    # value        ExprNode 
-    subexprs = ['key', 'value'] 
- 
-    nogil_check = None # Parent DictNode takes care of it 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = ( 
-            self.key.constant_result, self.value.constant_result) 
- 
-    def analyse_types(self, env): 
-        self.key = self.key.analyse_types(env) 
-        self.value = self.value.analyse_types(env) 
-        self.key = self.key.coerce_to_pyobject(env) 
-        self.value = self.value.coerce_to_pyobject(env) 
-        return self 
- 
-    def generate_evaluation_code(self, code): 
-        self.key.generate_evaluation_code(code) 
-        self.value.generate_evaluation_code(code) 
- 
-    def generate_disposal_code(self, code): 
-        self.key.generate_disposal_code(code) 
-        self.value.generate_disposal_code(code) 
- 
-    def free_temps(self, code): 
-        self.key.free_temps(code) 
-        self.value.free_temps(code) 
- 
-    def __iter__(self): 
-        return iter([self.key, self.value]) 
- 
- 
-class SortedDictKeysNode(ExprNode): 
-    # build sorted list of dict keys, e.g. for dir() 
-    subexprs = ['arg'] 
- 
-    is_temp = True 
- 
-    def __init__(self, arg): 
-        ExprNode.__init__(self, arg.pos, arg=arg) 
-        self.type = Builtin.list_type 
- 
-    def analyse_types(self, env): 
-        arg = self.arg.analyse_types(env) 
-        if arg.type is Builtin.dict_type: 
-            arg = arg.as_none_safe_node( 
-                "'NoneType' object is not iterable") 
-        self.arg = arg 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def generate_result_code(self, code): 
-        dict_result = self.arg.py_result() 
-        if self.arg.type is Builtin.dict_type: 
-            code.putln('%s = PyDict_Keys(%s); %s' % ( 
-                self.result(), dict_result, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
-        else: 
-            # originally used PyMapping_Keys() here, but that may return a tuple 
-            code.globalstate.use_utility_code(UtilityCode.load_cached( 
-                'PyObjectCallMethod0', 'ObjectHandling.c')) 
-            keys_cname = code.intern_identifier(StringEncoding.EncodedString("keys")) 
-            code.putln('%s = __Pyx_PyObject_CallMethod0(%s, %s); %s' % ( 
-                self.result(), dict_result, keys_cname, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
-            code.putln("if (unlikely(!PyList_Check(%s))) {" % self.result()) 
-            code.put_decref_set(self.result(), "PySequence_List(%s)" % self.result()) 
-            code.putln(code.error_goto_if_null(self.result(), self.pos)) 
-            code.put_gotref(self.py_result()) 
-            code.putln("}") 
-        code.put_error_if_neg( 
-            self.pos, 'PyList_Sort(%s)' % self.py_result()) 
- 
- 
-class ModuleNameMixin(object): 
-    def get_py_mod_name(self, code): 
-        return code.get_py_string_const( 
-            self.module_name, identifier=True) 
- 
-    def get_py_qualified_name(self, code): 
-        return code.get_py_string_const( 
-            self.qualname, identifier=True) 
- 
- 
-class ClassNode(ExprNode, ModuleNameMixin): 
-    #  Helper class used in the implementation of Python 
-    #  class definitions. Constructs a class object given 
-    #  a name, tuple of bases and class dictionary. 
-    # 
-    #  name         EncodedString      Name of the class 
+    def annotate(self, code):
+        for item in self.key_value_pairs:
+            item.annotate(code)
+
+
+class DictItemNode(ExprNode):
+    # Represents a single item in a DictNode
+    #
+    # key          ExprNode
+    # value        ExprNode
+    subexprs = ['key', 'value']
+
+    nogil_check = None # Parent DictNode takes care of it
+
+    def calculate_constant_result(self):
+        self.constant_result = (
+            self.key.constant_result, self.value.constant_result)
+
+    def analyse_types(self, env):
+        self.key = self.key.analyse_types(env)
+        self.value = self.value.analyse_types(env)
+        self.key = self.key.coerce_to_pyobject(env)
+        self.value = self.value.coerce_to_pyobject(env)
+        return self
+
+    def generate_evaluation_code(self, code):
+        self.key.generate_evaluation_code(code)
+        self.value.generate_evaluation_code(code)
+
+    def generate_disposal_code(self, code):
+        self.key.generate_disposal_code(code)
+        self.value.generate_disposal_code(code)
+
+    def free_temps(self, code):
+        self.key.free_temps(code)
+        self.value.free_temps(code)
+
+    def __iter__(self):
+        return iter([self.key, self.value])
+
+
+class SortedDictKeysNode(ExprNode):
+    # build sorted list of dict keys, e.g. for dir()
+    subexprs = ['arg']
+
+    is_temp = True
+
+    def __init__(self, arg):
+        ExprNode.__init__(self, arg.pos, arg=arg)
+        self.type = Builtin.list_type
+
+    def analyse_types(self, env):
+        arg = self.arg.analyse_types(env)
+        if arg.type is Builtin.dict_type:
+            arg = arg.as_none_safe_node(
+                "'NoneType' object is not iterable")
+        self.arg = arg
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        dict_result = self.arg.py_result()
+        if self.arg.type is Builtin.dict_type:
+            code.putln('%s = PyDict_Keys(%s); %s' % (
+                self.result(), dict_result,
+                code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+        else:
+            # originally used PyMapping_Keys() here, but that may return a tuple
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                'PyObjectCallMethod0', 'ObjectHandling.c'))
+            keys_cname = code.intern_identifier(StringEncoding.EncodedString("keys"))
+            code.putln('%s = __Pyx_PyObject_CallMethod0(%s, %s); %s' % (
+                self.result(), dict_result, keys_cname,
+                code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+            code.putln("if (unlikely(!PyList_Check(%s))) {" % self.result())
+            code.put_decref_set(self.result(), "PySequence_List(%s)" % self.result())
+            code.putln(code.error_goto_if_null(self.result(), self.pos))
+            code.put_gotref(self.py_result())
+            code.putln("}")
+        code.put_error_if_neg(
+            self.pos, 'PyList_Sort(%s)' % self.py_result())
+
+
+class ModuleNameMixin(object):
+    def get_py_mod_name(self, code):
+        return code.get_py_string_const(
+            self.module_name, identifier=True)
+
+    def get_py_qualified_name(self, code):
+        return code.get_py_string_const(
+            self.qualname, identifier=True)
+
+
+class ClassNode(ExprNode, ModuleNameMixin):
+    #  Helper class used in the implementation of Python
+    #  class definitions. Constructs a class object given
+    #  a name, tuple of bases and class dictionary.
+    #
+    #  name         EncodedString      Name of the class
     #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
-    #  doc          ExprNode or None   Doc string 
-    #  module_name  EncodedString      Name of defining module 
- 
+    #  doc          ExprNode or None   Doc string
+    #  module_name  EncodedString      Name of defining module
+
     subexprs = ['doc']
-    type = py_object_type 
-    is_temp = True 
- 
-    def infer_type(self, env): 
-        # TODO: could return 'type' in some cases 
-        return py_object_type 
- 
-    def analyse_types(self, env): 
-        if self.doc: 
-            self.doc = self.doc.analyse_types(env) 
-            self.doc = self.doc.coerce_to_pyobject(env) 
-        env.use_utility_code(UtilityCode.load_cached("CreateClass", "ObjectHandling.c")) 
-        return self 
- 
-    def may_be_none(self): 
-        return True 
- 
-    gil_message = "Constructing Python class" 
- 
-    def generate_result_code(self, code): 
+    type = py_object_type
+    is_temp = True
+
+    def infer_type(self, env):
+        # TODO: could return 'type' in some cases
+        return py_object_type
+
+    def analyse_types(self, env):
+        if self.doc:
+            self.doc = self.doc.analyse_types(env)
+            self.doc = self.doc.coerce_to_pyobject(env)
+        env.use_utility_code(UtilityCode.load_cached("CreateClass", "ObjectHandling.c"))
+        return self
+
+    def may_be_none(self):
+        return True
+
+    gil_message = "Constructing Python class"
+
+    def generate_result_code(self, code):
         class_def_node = self.class_def_node
-        cname = code.intern_identifier(self.name) 
- 
-        if self.doc: 
-            code.put_error_if_neg(self.pos, 
-                'PyDict_SetItem(%s, %s, %s)' % ( 
+        cname = code.intern_identifier(self.name)
+
+        if self.doc:
+            code.put_error_if_neg(self.pos,
+                'PyDict_SetItem(%s, %s, %s)' % (
                     class_def_node.dict.py_result(),
-                    code.intern_identifier( 
-                        StringEncoding.EncodedString("__doc__")), 
-                    self.doc.py_result())) 
-        py_mod_name = self.get_py_mod_name(code) 
-        qualname = self.get_py_qualified_name(code) 
-        code.putln( 
-            '%s = __Pyx_CreateClass(%s, %s, %s, %s, %s); %s' % ( 
-                self.result(), 
+                    code.intern_identifier(
+                        StringEncoding.EncodedString("__doc__")),
+                    self.doc.py_result()))
+        py_mod_name = self.get_py_mod_name(code)
+        qualname = self.get_py_qualified_name(code)
+        code.putln(
+            '%s = __Pyx_CreateClass(%s, %s, %s, %s, %s); %s' % (
+                self.result(),
                 class_def_node.bases.py_result(),
                 class_def_node.dict.py_result(),
-                cname, 
-                qualname, 
-                py_mod_name, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class Py3ClassNode(ExprNode): 
-    #  Helper class used in the implementation of Python3+ 
-    #  class definitions. Constructs a class object given 
-    #  a name, tuple of bases and class dictionary. 
-    # 
-    #  name         EncodedString      Name of the class 
-    #  module_name  EncodedString      Name of defining module 
+                cname,
+                qualname,
+                py_mod_name,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class Py3ClassNode(ExprNode):
+    #  Helper class used in the implementation of Python3+
+    #  class definitions. Constructs a class object given
+    #  a name, tuple of bases and class dictionary.
+    #
+    #  name         EncodedString      Name of the class
+    #  module_name  EncodedString      Name of defining module
     #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
-    #  calculate_metaclass  bool       should call CalculateMetaclass() 
-    #  allow_py2_metaclass  bool       should look for Py2 metaclass 
- 
-    subexprs = [] 
-    type = py_object_type 
-    is_temp = True 
- 
-    def infer_type(self, env): 
-        # TODO: could return 'type' in some cases 
-        return py_object_type 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def may_be_none(self): 
-        return True 
- 
-    gil_message = "Constructing Python class" 
- 
-    def generate_result_code(self, code): 
-        code.globalstate.use_utility_code(UtilityCode.load_cached("Py3ClassCreate", "ObjectHandling.c")) 
-        cname = code.intern_identifier(self.name) 
+    #  calculate_metaclass  bool       should call CalculateMetaclass()
+    #  allow_py2_metaclass  bool       should look for Py2 metaclass
+
+    subexprs = []
+    type = py_object_type
+    is_temp = True
+
+    def infer_type(self, env):
+        # TODO: could return 'type' in some cases
+        return py_object_type
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        return True
+
+    gil_message = "Constructing Python class"
+
+    def generate_result_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("Py3ClassCreate", "ObjectHandling.c"))
+        cname = code.intern_identifier(self.name)
         class_def_node = self.class_def_node
         mkw = class_def_node.mkw.py_result() if class_def_node.mkw else 'NULL'
         if class_def_node.metaclass:
             metaclass = class_def_node.metaclass.py_result()
-        else: 
-            metaclass = "((PyObject*)&__Pyx_DefaultClassType)" 
-        code.putln( 
-            '%s = __Pyx_Py3ClassCreate(%s, %s, %s, %s, %s, %d, %d); %s' % ( 
-                self.result(), 
-                metaclass, 
-                cname, 
+        else:
+            metaclass = "((PyObject*)&__Pyx_DefaultClassType)"
+        code.putln(
+            '%s = __Pyx_Py3ClassCreate(%s, %s, %s, %s, %s, %d, %d); %s' % (
+                self.result(),
+                metaclass,
+                cname,
                 class_def_node.bases.py_result(),
                 class_def_node.dict.py_result(),
-                mkw, 
-                self.calculate_metaclass, 
-                self.allow_py2_metaclass, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class PyClassMetaclassNode(ExprNode): 
-    # Helper class holds Python3 metaclass object 
-    # 
+                mkw,
+                self.calculate_metaclass,
+                self.allow_py2_metaclass,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class PyClassMetaclassNode(ExprNode):
+    # Helper class holds Python3 metaclass object
+    #
     #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
- 
-    subexprs = [] 
- 
-    def analyse_types(self, env): 
-        self.type = py_object_type 
-        self.is_temp = True 
-        return self 
- 
-    def may_be_none(self): 
-        return True 
- 
-    def generate_result_code(self, code): 
+
+    subexprs = []
+
+    def analyse_types(self, env):
+        self.type = py_object_type
+        self.is_temp = True
+        return self
+
+    def may_be_none(self):
+        return True
+
+    def generate_result_code(self, code):
         bases = self.class_def_node.bases
         mkw = self.class_def_node.mkw
         if mkw:
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("Py3MetaclassGet", "ObjectHandling.c")) 
-            call = "__Pyx_Py3MetaclassGet(%s, %s)" % ( 
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("Py3MetaclassGet", "ObjectHandling.c"))
+            call = "__Pyx_Py3MetaclassGet(%s, %s)" % (
                 bases.result(),
                 mkw.result())
-        else: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("CalculateMetaclass", "ObjectHandling.c")) 
-            call = "__Pyx_CalculateMetaclass(NULL, %s)" % ( 
+        else:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("CalculateMetaclass", "ObjectHandling.c"))
+            call = "__Pyx_CalculateMetaclass(NULL, %s)" % (
                 bases.result())
-        code.putln( 
-            "%s = %s; %s" % ( 
-                self.result(), call, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
-
-class PyClassNamespaceNode(ExprNode, ModuleNameMixin): 
-    # Helper class holds Python3 namespace object 
-    # 
-    # All this are not owned by this node 
+        code.putln(
+            "%s = %s; %s" % (
+                self.result(), call,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class PyClassNamespaceNode(ExprNode, ModuleNameMixin):
+    # Helper class holds Python3 namespace object
+    #
+    # All this are not owned by this node
     #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
-    #  doc          ExprNode or None   Doc string (owned) 
- 
-    subexprs = ['doc'] 
- 
-    def analyse_types(self, env): 
-        if self.doc: 
+    #  doc          ExprNode or None   Doc string (owned)
+
+    subexprs = ['doc']
+
+    def analyse_types(self, env):
+        if self.doc:
             self.doc = self.doc.analyse_types(env).coerce_to_pyobject(env)
-        self.type = py_object_type 
-        self.is_temp = 1 
-        return self 
- 
-    def may_be_none(self): 
-        return True 
- 
-    def generate_result_code(self, code): 
-        cname = code.intern_identifier(self.name) 
-        py_mod_name = self.get_py_mod_name(code) 
-        qualname = self.get_py_qualified_name(code) 
+        self.type = py_object_type
+        self.is_temp = 1
+        return self
+
+    def may_be_none(self):
+        return True
+
+    def generate_result_code(self, code):
+        cname = code.intern_identifier(self.name)
+        py_mod_name = self.get_py_mod_name(code)
+        qualname = self.get_py_qualified_name(code)
         class_def_node = self.class_def_node
         null = "(PyObject *) NULL"
         doc_code = self.doc.result() if self.doc else null
         mkw = class_def_node.mkw.py_result() if class_def_node.mkw else null
         metaclass = class_def_node.metaclass.py_result() if class_def_node.metaclass else null
-        code.putln( 
-            "%s = __Pyx_Py3MetaclassPrepare(%s, %s, %s, %s, %s, %s, %s); %s" % ( 
-                self.result(), 
-                metaclass, 
+        code.putln(
+            "%s = __Pyx_Py3MetaclassPrepare(%s, %s, %s, %s, %s, %s, %s); %s" % (
+                self.result(),
+                metaclass,
                 class_def_node.bases.result(),
-                cname, 
-                qualname, 
-                mkw, 
-                py_mod_name, 
-                doc_code, 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class ClassCellInjectorNode(ExprNode): 
-    # Initialize CyFunction.func_classobj 
-    is_temp = True 
-    type = py_object_type 
-    subexprs = [] 
-    is_active = False 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
+                cname,
+                qualname,
+                mkw,
+                py_mod_name,
+                doc_code,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class ClassCellInjectorNode(ExprNode):
+    # Initialize CyFunction.func_classobj
+    is_temp = True
+    type = py_object_type
+    subexprs = []
+    is_active = False
+
+    def analyse_expressions(self, env):
+        return self
+
     def generate_result_code(self, code):
         assert self.is_active
         code.putln(
@@ -9149,96 +9149,96 @@ class ClassCellInjectorNode(ExprNode):
                 self.result(),
                 code.error_goto_if_null(self.result(), self.pos)))
         code.put_gotref(self.result())
- 
-    def generate_injection_code(self, code, classobj_cname): 
+
+    def generate_injection_code(self, code, classobj_cname):
         assert self.is_active
         code.globalstate.use_utility_code(
             UtilityCode.load_cached("CyFunctionClassCell", "CythonFunction.c"))
         code.put_error_if_neg(self.pos, '__Pyx_CyFunction_InitClassCell(%s, %s)' % (
             self.result(), classobj_cname))
- 
- 
-class ClassCellNode(ExprNode): 
-    # Class Cell for noargs super() 
-    subexprs = [] 
-    is_temp = True 
-    is_generator = False 
-    type = py_object_type 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def generate_result_code(self, code): 
-        if not self.is_generator: 
-            code.putln('%s = __Pyx_CyFunction_GetClassObj(%s);' % ( 
-                self.result(), 
-                Naming.self_cname)) 
-        else: 
-            code.putln('%s =  %s->classobj;' % ( 
-                self.result(), Naming.generator_cname)) 
-        code.putln( 
-            'if (!%s) { PyErr_SetString(PyExc_SystemError, ' 
-            '"super(): empty __class__ cell"); %s }' % ( 
-                self.result(), 
-                code.error_goto(self.pos))) 
-        code.put_incref(self.result(), py_object_type) 
- 
- 
-class PyCFunctionNode(ExprNode, ModuleNameMixin): 
-    #  Helper class used in the implementation of Python 
-    #  functions.  Constructs a PyCFunction object 
-    #  from a PyMethodDef struct. 
-    # 
-    #  pymethdef_cname   string             PyMethodDef structure 
-    #  self_object       ExprNode or None 
-    #  binding           bool 
-    #  def_node          DefNode            the Python function node 
-    #  module_name       EncodedString      Name of defining module 
-    #  code_object       CodeObjectNode     the PyCodeObject creator node 
- 
-    subexprs = ['code_object', 'defaults_tuple', 'defaults_kwdict', 
-                'annotations_dict'] 
- 
-    self_object = None 
-    code_object = None 
-    binding = False 
-    def_node = None 
-    defaults = None 
-    defaults_struct = None 
-    defaults_pyobjects = 0 
-    defaults_tuple = None 
-    defaults_kwdict = None 
-    annotations_dict = None 
- 
-    type = py_object_type 
-    is_temp = 1 
- 
-    specialized_cpdefs = None 
-    is_specialization = False 
- 
-    @classmethod 
-    def from_defnode(cls, node, binding): 
-        return cls(node.pos, 
-                   def_node=node, 
-                   pymethdef_cname=node.entry.pymethdef_cname, 
-                   binding=binding or node.specialized_cpdefs, 
-                   specialized_cpdefs=node.specialized_cpdefs, 
-                   code_object=CodeObjectNode(node)) 
- 
-    def analyse_types(self, env): 
-        if self.binding: 
-            self.analyse_default_args(env) 
-        return self 
- 
-    def analyse_default_args(self, env): 
-        """ 
-        Handle non-literal function's default arguments. 
-        """ 
-        nonliteral_objects = [] 
-        nonliteral_other = [] 
-        default_args = [] 
-        default_kwargs = [] 
-        annotations = [] 
+
+
+class ClassCellNode(ExprNode):
+    # Class Cell for noargs super()
+    subexprs = []
+    is_temp = True
+    is_generator = False
+    type = py_object_type
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        if not self.is_generator:
+            code.putln('%s = __Pyx_CyFunction_GetClassObj(%s);' % (
+                self.result(),
+                Naming.self_cname))
+        else:
+            code.putln('%s =  %s->classobj;' % (
+                self.result(), Naming.generator_cname))
+        code.putln(
+            'if (!%s) { PyErr_SetString(PyExc_SystemError, '
+            '"super(): empty __class__ cell"); %s }' % (
+                self.result(),
+                code.error_goto(self.pos)))
+        code.put_incref(self.result(), py_object_type)
+
+
+class PyCFunctionNode(ExprNode, ModuleNameMixin):
+    #  Helper class used in the implementation of Python
+    #  functions.  Constructs a PyCFunction object
+    #  from a PyMethodDef struct.
+    #
+    #  pymethdef_cname   string             PyMethodDef structure
+    #  self_object       ExprNode or None
+    #  binding           bool
+    #  def_node          DefNode            the Python function node
+    #  module_name       EncodedString      Name of defining module
+    #  code_object       CodeObjectNode     the PyCodeObject creator node
+
+    subexprs = ['code_object', 'defaults_tuple', 'defaults_kwdict',
+                'annotations_dict']
+
+    self_object = None
+    code_object = None
+    binding = False
+    def_node = None
+    defaults = None
+    defaults_struct = None
+    defaults_pyobjects = 0
+    defaults_tuple = None
+    defaults_kwdict = None
+    annotations_dict = None
+
+    type = py_object_type
+    is_temp = 1
+
+    specialized_cpdefs = None
+    is_specialization = False
+
+    @classmethod
+    def from_defnode(cls, node, binding):
+        return cls(node.pos,
+                   def_node=node,
+                   pymethdef_cname=node.entry.pymethdef_cname,
+                   binding=binding or node.specialized_cpdefs,
+                   specialized_cpdefs=node.specialized_cpdefs,
+                   code_object=CodeObjectNode(node))
+
+    def analyse_types(self, env):
+        if self.binding:
+            self.analyse_default_args(env)
+        return self
+
+    def analyse_default_args(self, env):
+        """
+        Handle non-literal function's default arguments.
+        """
+        nonliteral_objects = []
+        nonliteral_other = []
+        default_args = []
+        default_kwargs = []
+        annotations = []
 
         # For global cpdef functions and def/cpdef methods in cdef classes, we must use global constants
         # for default arguments to avoid the dependency on the CyFunction object as 'self' argument
@@ -9247,23 +9247,23 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
         # TODO: change CyFunction implementation to pass both function object and owning object for method calls
         must_use_constants = env.is_c_class_scope or (self.def_node.is_wrapper and env.is_module_scope)
 
-        for arg in self.def_node.args: 
+        for arg in self.def_node.args:
             if arg.default and not must_use_constants:
-                if not arg.default.is_literal: 
-                    arg.is_dynamic = True 
-                    if arg.type.is_pyobject: 
-                        nonliteral_objects.append(arg) 
-                    else: 
-                        nonliteral_other.append(arg) 
-                else: 
-                    arg.default = DefaultLiteralArgNode(arg.pos, arg.default) 
-                if arg.kw_only: 
-                    default_kwargs.append(arg) 
-                else: 
-                    default_args.append(arg) 
-            if arg.annotation: 
+                if not arg.default.is_literal:
+                    arg.is_dynamic = True
+                    if arg.type.is_pyobject:
+                        nonliteral_objects.append(arg)
+                    else:
+                        nonliteral_other.append(arg)
+                else:
+                    arg.default = DefaultLiteralArgNode(arg.pos, arg.default)
+                if arg.kw_only:
+                    default_kwargs.append(arg)
+                else:
+                    default_args.append(arg)
+            if arg.annotation:
                 arg.annotation = self.analyse_annotation(env, arg.annotation)
-                annotations.append((arg.pos, arg.name, arg.annotation)) 
+                annotations.append((arg.pos, arg.name, arg.annotation))
 
         for arg in (self.def_node.star_arg, self.def_node.starstar_arg):
             if arg and arg.annotation:
@@ -9275,82 +9275,82 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
             annotation = self.analyse_annotation(env, annotation)
             self.def_node.return_type_annotation = annotation
             annotations.append((annotation.pos, StringEncoding.EncodedString("return"), annotation))
- 
-        if nonliteral_objects or nonliteral_other: 
-            module_scope = env.global_scope() 
-            cname = module_scope.next_id(Naming.defaults_struct_prefix) 
-            scope = Symtab.StructOrUnionScope(cname) 
-            self.defaults = [] 
-            for arg in nonliteral_objects: 
-                entry = scope.declare_var(arg.name, arg.type, None, 
-                                          Naming.arg_prefix + arg.name, 
-                                          allow_pyobject=True) 
-                self.defaults.append((arg, entry)) 
-            for arg in nonliteral_other: 
-                entry = scope.declare_var(arg.name, arg.type, None, 
-                                          Naming.arg_prefix + arg.name, 
+
+        if nonliteral_objects or nonliteral_other:
+            module_scope = env.global_scope()
+            cname = module_scope.next_id(Naming.defaults_struct_prefix)
+            scope = Symtab.StructOrUnionScope(cname)
+            self.defaults = []
+            for arg in nonliteral_objects:
+                entry = scope.declare_var(arg.name, arg.type, None,
+                                          Naming.arg_prefix + arg.name,
+                                          allow_pyobject=True)
+                self.defaults.append((arg, entry))
+            for arg in nonliteral_other:
+                entry = scope.declare_var(arg.name, arg.type, None,
+                                          Naming.arg_prefix + arg.name,
                                           allow_pyobject=False, allow_memoryview=True)
-                self.defaults.append((arg, entry)) 
-            entry = module_scope.declare_struct_or_union( 
-                None, 'struct', scope, 1, None, cname=cname) 
-            self.defaults_struct = scope 
-            self.defaults_pyobjects = len(nonliteral_objects) 
-            for arg, entry in self.defaults: 
-                arg.default_value = '%s->%s' % ( 
-                    Naming.dynamic_args_cname, entry.cname) 
-            self.def_node.defaults_struct = self.defaults_struct.name 
- 
-        if default_args or default_kwargs: 
-            if self.defaults_struct is None: 
-                if default_args: 
-                    defaults_tuple = TupleNode(self.pos, args=[ 
-                        arg.default for arg in default_args]) 
+                self.defaults.append((arg, entry))
+            entry = module_scope.declare_struct_or_union(
+                None, 'struct', scope, 1, None, cname=cname)
+            self.defaults_struct = scope
+            self.defaults_pyobjects = len(nonliteral_objects)
+            for arg, entry in self.defaults:
+                arg.default_value = '%s->%s' % (
+                    Naming.dynamic_args_cname, entry.cname)
+            self.def_node.defaults_struct = self.defaults_struct.name
+
+        if default_args or default_kwargs:
+            if self.defaults_struct is None:
+                if default_args:
+                    defaults_tuple = TupleNode(self.pos, args=[
+                        arg.default for arg in default_args])
                     self.defaults_tuple = defaults_tuple.analyse_types(env).coerce_to_pyobject(env)
-                if default_kwargs: 
-                    defaults_kwdict = DictNode(self.pos, key_value_pairs=[ 
-                        DictItemNode( 
-                            arg.pos, 
-                            key=IdentifierStringNode(arg.pos, value=arg.name), 
-                            value=arg.default) 
-                        for arg in default_kwargs]) 
-                    self.defaults_kwdict = defaults_kwdict.analyse_types(env) 
-            else: 
-                if default_args: 
-                    defaults_tuple = DefaultsTupleNode( 
-                        self.pos, default_args, self.defaults_struct) 
-                else: 
-                    defaults_tuple = NoneNode(self.pos) 
-                if default_kwargs: 
-                    defaults_kwdict = DefaultsKwDictNode( 
-                        self.pos, default_kwargs, self.defaults_struct) 
-                else: 
-                    defaults_kwdict = NoneNode(self.pos) 
- 
-                defaults_getter = Nodes.DefNode( 
-                    self.pos, args=[], star_arg=None, starstar_arg=None, 
-                    body=Nodes.ReturnStatNode( 
-                        self.pos, return_type=py_object_type, 
-                        value=TupleNode( 
-                            self.pos, args=[defaults_tuple, defaults_kwdict])), 
-                    decorators=None, 
-                    name=StringEncoding.EncodedString("__defaults__")) 
+                if default_kwargs:
+                    defaults_kwdict = DictNode(self.pos, key_value_pairs=[
+                        DictItemNode(
+                            arg.pos,
+                            key=IdentifierStringNode(arg.pos, value=arg.name),
+                            value=arg.default)
+                        for arg in default_kwargs])
+                    self.defaults_kwdict = defaults_kwdict.analyse_types(env)
+            else:
+                if default_args:
+                    defaults_tuple = DefaultsTupleNode(
+                        self.pos, default_args, self.defaults_struct)
+                else:
+                    defaults_tuple = NoneNode(self.pos)
+                if default_kwargs:
+                    defaults_kwdict = DefaultsKwDictNode(
+                        self.pos, default_kwargs, self.defaults_struct)
+                else:
+                    defaults_kwdict = NoneNode(self.pos)
+
+                defaults_getter = Nodes.DefNode(
+                    self.pos, args=[], star_arg=None, starstar_arg=None,
+                    body=Nodes.ReturnStatNode(
+                        self.pos, return_type=py_object_type,
+                        value=TupleNode(
+                            self.pos, args=[defaults_tuple, defaults_kwdict])),
+                    decorators=None,
+                    name=StringEncoding.EncodedString("__defaults__"))
                 # defaults getter must never live in class scopes, it's always a module function
                 module_scope = env.global_scope()
                 defaults_getter.analyse_declarations(module_scope)
                 defaults_getter = defaults_getter.analyse_expressions(module_scope)
-                defaults_getter.body = defaults_getter.body.analyse_expressions( 
-                    defaults_getter.local_scope) 
-                defaults_getter.py_wrapper_required = False 
-                defaults_getter.pymethdef_required = False 
-                self.def_node.defaults_getter = defaults_getter 
-        if annotations: 
-            annotations_dict = DictNode(self.pos, key_value_pairs=[ 
-                DictItemNode( 
-                    pos, key=IdentifierStringNode(pos, value=name), 
-                    value=value) 
-                for pos, name, value in annotations]) 
-            self.annotations_dict = annotations_dict.analyse_types(env) 
- 
+                defaults_getter.body = defaults_getter.body.analyse_expressions(
+                    defaults_getter.local_scope)
+                defaults_getter.py_wrapper_required = False
+                defaults_getter.pymethdef_required = False
+                self.def_node.defaults_getter = defaults_getter
+        if annotations:
+            annotations_dict = DictNode(self.pos, key_value_pairs=[
+                DictItemNode(
+                    pos, key=IdentifierStringNode(pos, value=name),
+                    value=value)
+                for pos, name, value in annotations])
+            self.annotations_dict = annotations_dict.analyse_types(env)
+
     def analyse_annotation(self, env, annotation):
         if annotation is None:
             return None
@@ -9365,425 +9365,425 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
             annotation = annotation.coerce_to_pyobject(env)
         return annotation
 
-    def may_be_none(self): 
-        return False 
- 
-    gil_message = "Constructing Python function" 
- 
-    def self_result_code(self): 
-        if self.self_object is None: 
-            self_result = "NULL" 
-        else: 
-            self_result = self.self_object.py_result() 
-        return self_result 
- 
-    def generate_result_code(self, code): 
-        if self.binding: 
-            self.generate_cyfunction_code(code) 
-        else: 
-            self.generate_pycfunction_code(code) 
- 
-    def generate_pycfunction_code(self, code): 
-        py_mod_name = self.get_py_mod_name(code) 
-        code.putln( 
-            '%s = PyCFunction_NewEx(&%s, %s, %s); %s' % ( 
-                self.result(), 
-                self.pymethdef_cname, 
-                self.self_result_code(), 
-                py_mod_name, 
-                code.error_goto_if_null(self.result(), self.pos))) 
- 
-        code.put_gotref(self.py_result()) 
- 
-    def generate_cyfunction_code(self, code): 
-        if self.specialized_cpdefs: 
-            def_node = self.specialized_cpdefs[0] 
-        else: 
-            def_node = self.def_node 
- 
-        if self.specialized_cpdefs or self.is_specialization: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("FusedFunction", "CythonFunction.c")) 
+    def may_be_none(self):
+        return False
+
+    gil_message = "Constructing Python function"
+
+    def self_result_code(self):
+        if self.self_object is None:
+            self_result = "NULL"
+        else:
+            self_result = self.self_object.py_result()
+        return self_result
+
+    def generate_result_code(self, code):
+        if self.binding:
+            self.generate_cyfunction_code(code)
+        else:
+            self.generate_pycfunction_code(code)
+
+    def generate_pycfunction_code(self, code):
+        py_mod_name = self.get_py_mod_name(code)
+        code.putln(
+            '%s = PyCFunction_NewEx(&%s, %s, %s); %s' % (
+                self.result(),
+                self.pymethdef_cname,
+                self.self_result_code(),
+                py_mod_name,
+                code.error_goto_if_null(self.result(), self.pos)))
+
+        code.put_gotref(self.py_result())
+
+    def generate_cyfunction_code(self, code):
+        if self.specialized_cpdefs:
+            def_node = self.specialized_cpdefs[0]
+        else:
+            def_node = self.def_node
+
+        if self.specialized_cpdefs or self.is_specialization:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("FusedFunction", "CythonFunction.c"))
             constructor = "__pyx_FusedFunction_New"
-        else: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("CythonFunction", "CythonFunction.c")) 
+        else:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("CythonFunction", "CythonFunction.c"))
             constructor = "__Pyx_CyFunction_New"
- 
-        if self.code_object: 
-            code_object_result = self.code_object.py_result() 
-        else: 
-            code_object_result = 'NULL' 
- 
-        flags = [] 
-        if def_node.is_staticmethod: 
-            flags.append('__Pyx_CYFUNCTION_STATICMETHOD') 
-        elif def_node.is_classmethod: 
-            flags.append('__Pyx_CYFUNCTION_CLASSMETHOD') 
- 
+
+        if self.code_object:
+            code_object_result = self.code_object.py_result()
+        else:
+            code_object_result = 'NULL'
+
+        flags = []
+        if def_node.is_staticmethod:
+            flags.append('__Pyx_CYFUNCTION_STATICMETHOD')
+        elif def_node.is_classmethod:
+            flags.append('__Pyx_CYFUNCTION_CLASSMETHOD')
+
         if def_node.local_scope.parent_scope.is_c_class_scope and not def_node.entry.is_anonymous:
-            flags.append('__Pyx_CYFUNCTION_CCLASS') 
- 
-        if flags: 
-            flags = ' | '.join(flags) 
-        else: 
-            flags = '0' 
- 
-        code.putln( 
-            '%s = %s(&%s, %s, %s, %s, %s, %s, %s); %s' % ( 
-                self.result(), 
-                constructor, 
-                self.pymethdef_cname, 
-                flags, 
-                self.get_py_qualified_name(code), 
-                self.self_result_code(), 
-                self.get_py_mod_name(code), 
-                Naming.moddict_cname, 
-                code_object_result, 
-                code.error_goto_if_null(self.result(), self.pos))) 
- 
-        code.put_gotref(self.py_result()) 
- 
-        if def_node.requires_classobj: 
-            assert code.pyclass_stack, "pyclass_stack is empty" 
-            class_node = code.pyclass_stack[-1] 
-            code.put_incref(self.py_result(), py_object_type) 
-            code.putln( 
-                'PyList_Append(%s, %s);' % ( 
-                    class_node.class_cell.result(), 
-                    self.result())) 
-            code.put_giveref(self.py_result()) 
- 
-        if self.defaults: 
-            code.putln( 
-                'if (!__Pyx_CyFunction_InitDefaults(%s, sizeof(%s), %d)) %s' % ( 
-                    self.result(), self.defaults_struct.name, 
-                    self.defaults_pyobjects, code.error_goto(self.pos))) 
-            defaults = '__Pyx_CyFunction_Defaults(%s, %s)' % ( 
-                self.defaults_struct.name, self.result()) 
-            for arg, entry in self.defaults: 
-                arg.generate_assignment_code(code, target='%s->%s' % ( 
-                    defaults, entry.cname)) 
- 
-        if self.defaults_tuple: 
-            code.putln('__Pyx_CyFunction_SetDefaultsTuple(%s, %s);' % ( 
-                self.result(), self.defaults_tuple.py_result())) 
-        if self.defaults_kwdict: 
-            code.putln('__Pyx_CyFunction_SetDefaultsKwDict(%s, %s);' % ( 
-                self.result(), self.defaults_kwdict.py_result())) 
+            flags.append('__Pyx_CYFUNCTION_CCLASS')
+
+        if flags:
+            flags = ' | '.join(flags)
+        else:
+            flags = '0'
+
+        code.putln(
+            '%s = %s(&%s, %s, %s, %s, %s, %s, %s); %s' % (
+                self.result(),
+                constructor,
+                self.pymethdef_cname,
+                flags,
+                self.get_py_qualified_name(code),
+                self.self_result_code(),
+                self.get_py_mod_name(code),
+                Naming.moddict_cname,
+                code_object_result,
+                code.error_goto_if_null(self.result(), self.pos)))
+
+        code.put_gotref(self.py_result())
+
+        if def_node.requires_classobj:
+            assert code.pyclass_stack, "pyclass_stack is empty"
+            class_node = code.pyclass_stack[-1]
+            code.put_incref(self.py_result(), py_object_type)
+            code.putln(
+                'PyList_Append(%s, %s);' % (
+                    class_node.class_cell.result(),
+                    self.result()))
+            code.put_giveref(self.py_result())
+
+        if self.defaults:
+            code.putln(
+                'if (!__Pyx_CyFunction_InitDefaults(%s, sizeof(%s), %d)) %s' % (
+                    self.result(), self.defaults_struct.name,
+                    self.defaults_pyobjects, code.error_goto(self.pos)))
+            defaults = '__Pyx_CyFunction_Defaults(%s, %s)' % (
+                self.defaults_struct.name, self.result())
+            for arg, entry in self.defaults:
+                arg.generate_assignment_code(code, target='%s->%s' % (
+                    defaults, entry.cname))
+
+        if self.defaults_tuple:
+            code.putln('__Pyx_CyFunction_SetDefaultsTuple(%s, %s);' % (
+                self.result(), self.defaults_tuple.py_result()))
+        if self.defaults_kwdict:
+            code.putln('__Pyx_CyFunction_SetDefaultsKwDict(%s, %s);' % (
+                self.result(), self.defaults_kwdict.py_result()))
         if def_node.defaults_getter and not self.specialized_cpdefs:
             # Fused functions do not support dynamic defaults, only their specialisations can have them for now.
-            code.putln('__Pyx_CyFunction_SetDefaultsGetter(%s, %s);' % ( 
-                self.result(), def_node.defaults_getter.entry.pyfunc_cname)) 
-        if self.annotations_dict: 
-            code.putln('__Pyx_CyFunction_SetAnnotationsDict(%s, %s);' % ( 
-                self.result(), self.annotations_dict.py_result())) 
- 
- 
-class InnerFunctionNode(PyCFunctionNode): 
-    # Special PyCFunctionNode that depends on a closure class 
-    # 
- 
-    binding = True 
-    needs_self_code = True 
- 
-    def self_result_code(self): 
-        if self.needs_self_code: 
-            return "((PyObject*)%s)" % Naming.cur_scope_cname 
-        return "NULL" 
- 
- 
-class CodeObjectNode(ExprNode): 
-    # Create a PyCodeObject for a CyFunction instance. 
-    # 
-    # def_node   DefNode    the Python function node 
-    # varnames   TupleNode  a tuple with all local variable names 
- 
-    subexprs = ['varnames'] 
-    is_temp = False 
+            code.putln('__Pyx_CyFunction_SetDefaultsGetter(%s, %s);' % (
+                self.result(), def_node.defaults_getter.entry.pyfunc_cname))
+        if self.annotations_dict:
+            code.putln('__Pyx_CyFunction_SetAnnotationsDict(%s, %s);' % (
+                self.result(), self.annotations_dict.py_result()))
+
+
+class InnerFunctionNode(PyCFunctionNode):
+    # Special PyCFunctionNode that depends on a closure class
+    #
+
+    binding = True
+    needs_self_code = True
+
+    def self_result_code(self):
+        if self.needs_self_code:
+            return "((PyObject*)%s)" % Naming.cur_scope_cname
+        return "NULL"
+
+
+class CodeObjectNode(ExprNode):
+    # Create a PyCodeObject for a CyFunction instance.
+    #
+    # def_node   DefNode    the Python function node
+    # varnames   TupleNode  a tuple with all local variable names
+
+    subexprs = ['varnames']
+    is_temp = False
     result_code = None
- 
-    def __init__(self, def_node): 
-        ExprNode.__init__(self, def_node.pos, def_node=def_node) 
-        args = list(def_node.args) 
-        # if we have args/kwargs, then the first two in var_entries are those 
-        local_vars = [arg for arg in def_node.local_scope.var_entries if arg.name] 
-        self.varnames = TupleNode( 
-            def_node.pos, 
-            args=[IdentifierStringNode(arg.pos, value=arg.name) 
-                  for arg in args + local_vars], 
-            is_temp=0, 
-            is_literal=1) 
- 
-    def may_be_none(self): 
-        return False 
- 
+
+    def __init__(self, def_node):
+        ExprNode.__init__(self, def_node.pos, def_node=def_node)
+        args = list(def_node.args)
+        # if we have args/kwargs, then the first two in var_entries are those
+        local_vars = [arg for arg in def_node.local_scope.var_entries if arg.name]
+        self.varnames = TupleNode(
+            def_node.pos,
+            args=[IdentifierStringNode(arg.pos, value=arg.name)
+                  for arg in args + local_vars],
+            is_temp=0,
+            is_literal=1)
+
+    def may_be_none(self):
+        return False
+
     def calculate_result_code(self, code=None):
         if self.result_code is None:
             self.result_code = code.get_py_const(py_object_type, 'codeobj', cleanup_level=2)
-        return self.result_code 
- 
-    def generate_result_code(self, code): 
+        return self.result_code
+
+    def generate_result_code(self, code):
         if self.result_code is None:
             self.result_code = code.get_py_const(py_object_type, 'codeobj', cleanup_level=2)
- 
+
         code = code.get_cached_constants_writer(self.result_code)
         if code is None:
             return  # already initialised
-        code.mark_pos(self.pos) 
-        func = self.def_node 
-        func_name = code.get_py_string_const( 
-            func.name, identifier=True, is_str=False, unicode_value=func.name) 
-        # FIXME: better way to get the module file path at module init time? Encoding to use? 
+        code.mark_pos(self.pos)
+        func = self.def_node
+        func_name = code.get_py_string_const(
+            func.name, identifier=True, is_str=False, unicode_value=func.name)
+        # FIXME: better way to get the module file path at module init time? Encoding to use?
         file_path = StringEncoding.bytes_literal(func.pos[0].get_filenametable_entry().encode('utf8'), 'utf8')
         # XXX Use get_description() to set arcadia root relative filename
         file_path = StringEncoding.bytes_literal(func.pos[0].get_description().encode('utf8'), 'utf8')
-        file_path_const = code.get_py_string_const(file_path, identifier=False, is_str=True) 
- 
+        file_path_const = code.get_py_string_const(file_path, identifier=False, is_str=True)
+
         # This combination makes CPython create a new dict for "frame.f_locals" (see GH #1836).
         flags = ['CO_OPTIMIZED', 'CO_NEWLOCALS']
 
-        if self.def_node.star_arg: 
-            flags.append('CO_VARARGS') 
-        if self.def_node.starstar_arg: 
-            flags.append('CO_VARKEYWORDS') 
- 
-        code.putln("%s = (PyObject*)__Pyx_PyCode_New(%d, %d, %d, 0, %s, %s, %s, %s, %s, %s, %s, %s, %s, %d, %s); %s" % ( 
-            self.result_code, 
-            len(func.args) - func.num_kwonly_args,  # argcount 
-            func.num_kwonly_args,      # kwonlyargcount (Py3 only) 
-            len(self.varnames.args),   # nlocals 
-            '|'.join(flags) or '0',    # flags 
-            Naming.empty_bytes,        # code 
-            Naming.empty_tuple,        # consts 
-            Naming.empty_tuple,        # names (FIXME) 
-            self.varnames.result(),    # varnames 
-            Naming.empty_tuple,        # freevars (FIXME) 
-            Naming.empty_tuple,        # cellvars (FIXME) 
-            file_path_const,           # filename 
-            func_name,                 # name 
-            self.pos[1],               # firstlineno 
-            Naming.empty_bytes,        # lnotab 
-            code.error_goto_if_null(self.result_code, self.pos), 
-            )) 
- 
- 
-class DefaultLiteralArgNode(ExprNode): 
-    # CyFunction's literal argument default value 
-    # 
-    # Evaluate literal only once. 
- 
-    subexprs = [] 
-    is_literal = True 
-    is_temp = False 
- 
-    def __init__(self, pos, arg): 
-        super(DefaultLiteralArgNode, self).__init__(pos) 
-        self.arg = arg 
-        self.type = self.arg.type 
-        self.evaluated = False 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def generate_evaluation_code(self, code): 
-        if not self.evaluated: 
-            self.arg.generate_evaluation_code(code) 
-            self.evaluated = True 
- 
-    def result(self): 
-        return self.type.cast_code(self.arg.result()) 
- 
- 
-class DefaultNonLiteralArgNode(ExprNode): 
-    # CyFunction's non-literal argument default value 
- 
-    subexprs = [] 
- 
-    def __init__(self, pos, arg, defaults_struct): 
-        super(DefaultNonLiteralArgNode, self).__init__(pos) 
-        self.arg = arg 
-        self.defaults_struct = defaults_struct 
- 
-    def analyse_types(self, env): 
-        self.type = self.arg.type 
-        self.is_temp = False 
-        return self 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def result(self): 
-        return '__Pyx_CyFunction_Defaults(%s, %s)->%s' % ( 
-            self.defaults_struct.name, Naming.self_cname, 
-            self.defaults_struct.lookup(self.arg.name).cname) 
- 
- 
-class DefaultsTupleNode(TupleNode): 
-    # CyFunction's __defaults__ tuple 
- 
-    def __init__(self, pos, defaults, defaults_struct): 
-        args = [] 
-        for arg in defaults: 
-            if not arg.default.is_literal: 
-                arg = DefaultNonLiteralArgNode(pos, arg, defaults_struct) 
-            else: 
-                arg = arg.default 
-            args.append(arg) 
-        super(DefaultsTupleNode, self).__init__(pos, args=args) 
- 
+        if self.def_node.star_arg:
+            flags.append('CO_VARARGS')
+        if self.def_node.starstar_arg:
+            flags.append('CO_VARKEYWORDS')
+
+        code.putln("%s = (PyObject*)__Pyx_PyCode_New(%d, %d, %d, 0, %s, %s, %s, %s, %s, %s, %s, %s, %s, %d, %s); %s" % (
+            self.result_code,
+            len(func.args) - func.num_kwonly_args,  # argcount
+            func.num_kwonly_args,      # kwonlyargcount (Py3 only)
+            len(self.varnames.args),   # nlocals
+            '|'.join(flags) or '0',    # flags
+            Naming.empty_bytes,        # code
+            Naming.empty_tuple,        # consts
+            Naming.empty_tuple,        # names (FIXME)
+            self.varnames.result(),    # varnames
+            Naming.empty_tuple,        # freevars (FIXME)
+            Naming.empty_tuple,        # cellvars (FIXME)
+            file_path_const,           # filename
+            func_name,                 # name
+            self.pos[1],               # firstlineno
+            Naming.empty_bytes,        # lnotab
+            code.error_goto_if_null(self.result_code, self.pos),
+            ))
+
+
+class DefaultLiteralArgNode(ExprNode):
+    # CyFunction's literal argument default value
+    #
+    # Evaluate literal only once.
+
+    subexprs = []
+    is_literal = True
+    is_temp = False
+
+    def __init__(self, pos, arg):
+        super(DefaultLiteralArgNode, self).__init__(pos)
+        self.arg = arg
+        self.type = self.arg.type
+        self.evaluated = False
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_evaluation_code(self, code):
+        if not self.evaluated:
+            self.arg.generate_evaluation_code(code)
+            self.evaluated = True
+
+    def result(self):
+        return self.type.cast_code(self.arg.result())
+
+
+class DefaultNonLiteralArgNode(ExprNode):
+    # CyFunction's non-literal argument default value
+
+    subexprs = []
+
+    def __init__(self, pos, arg, defaults_struct):
+        super(DefaultNonLiteralArgNode, self).__init__(pos)
+        self.arg = arg
+        self.defaults_struct = defaults_struct
+
+    def analyse_types(self, env):
+        self.type = self.arg.type
+        self.is_temp = False
+        return self
+
+    def generate_result_code(self, code):
+        pass
+
+    def result(self):
+        return '__Pyx_CyFunction_Defaults(%s, %s)->%s' % (
+            self.defaults_struct.name, Naming.self_cname,
+            self.defaults_struct.lookup(self.arg.name).cname)
+
+
+class DefaultsTupleNode(TupleNode):
+    # CyFunction's __defaults__ tuple
+
+    def __init__(self, pos, defaults, defaults_struct):
+        args = []
+        for arg in defaults:
+            if not arg.default.is_literal:
+                arg = DefaultNonLiteralArgNode(pos, arg, defaults_struct)
+            else:
+                arg = arg.default
+            args.append(arg)
+        super(DefaultsTupleNode, self).__init__(pos, args=args)
+
     def analyse_types(self, env, skip_children=False):
         return super(DefaultsTupleNode, self).analyse_types(env, skip_children).coerce_to_pyobject(env)
- 
-
-class DefaultsKwDictNode(DictNode): 
-    # CyFunction's __kwdefaults__ dict 
- 
-    def __init__(self, pos, defaults, defaults_struct): 
-        items = [] 
-        for arg in defaults: 
-            name = IdentifierStringNode(arg.pos, value=arg.name) 
-            if not arg.default.is_literal: 
-                arg = DefaultNonLiteralArgNode(pos, arg, defaults_struct) 
-            else: 
-                arg = arg.default 
-            items.append(DictItemNode(arg.pos, key=name, value=arg)) 
-        super(DefaultsKwDictNode, self).__init__(pos, key_value_pairs=items) 
- 
- 
-class LambdaNode(InnerFunctionNode): 
-    # Lambda expression node (only used as a function reference) 
-    # 
-    # args          [CArgDeclNode]         formal arguments 
-    # star_arg      PyArgDeclNode or None  * argument 
-    # starstar_arg  PyArgDeclNode or None  ** argument 
-    # lambda_name   string                 a module-globally unique lambda name 
-    # result_expr   ExprNode 
-    # def_node      DefNode                the underlying function 'def' node 
- 
-    child_attrs = ['def_node'] 
- 
-    name = StringEncoding.EncodedString('<lambda>') 
- 
-    def analyse_declarations(self, env): 
+
+
+class DefaultsKwDictNode(DictNode):
+    # CyFunction's __kwdefaults__ dict
+
+    def __init__(self, pos, defaults, defaults_struct):
+        items = []
+        for arg in defaults:
+            name = IdentifierStringNode(arg.pos, value=arg.name)
+            if not arg.default.is_literal:
+                arg = DefaultNonLiteralArgNode(pos, arg, defaults_struct)
+            else:
+                arg = arg.default
+            items.append(DictItemNode(arg.pos, key=name, value=arg))
+        super(DefaultsKwDictNode, self).__init__(pos, key_value_pairs=items)
+
+
+class LambdaNode(InnerFunctionNode):
+    # Lambda expression node (only used as a function reference)
+    #
+    # args          [CArgDeclNode]         formal arguments
+    # star_arg      PyArgDeclNode or None  * argument
+    # starstar_arg  PyArgDeclNode or None  ** argument
+    # lambda_name   string                 a module-globally unique lambda name
+    # result_expr   ExprNode
+    # def_node      DefNode                the underlying function 'def' node
+
+    child_attrs = ['def_node']
+
+    name = StringEncoding.EncodedString('<lambda>')
+
+    def analyse_declarations(self, env):
         self.lambda_name = self.def_node.lambda_name = env.next_id('lambda')
-        self.def_node.no_assignment_synthesis = True 
-        self.def_node.pymethdef_required = True 
-        self.def_node.analyse_declarations(env) 
-        self.def_node.is_cyfunction = True 
-        self.pymethdef_cname = self.def_node.entry.pymethdef_cname 
-        env.add_lambda_def(self.def_node) 
- 
-    def analyse_types(self, env): 
-        self.def_node = self.def_node.analyse_expressions(env) 
-        return super(LambdaNode, self).analyse_types(env) 
- 
-    def generate_result_code(self, code): 
-        self.def_node.generate_execution_code(code) 
-        super(LambdaNode, self).generate_result_code(code) 
- 
- 
-class GeneratorExpressionNode(LambdaNode): 
-    # A generator expression, e.g.  (i for i in range(10)) 
-    # 
-    # Result is a generator. 
-    # 
-    # loop      ForStatNode   the for-loop, containing a YieldExprNode 
-    # def_node  DefNode       the underlying generator 'def' node 
- 
-    name = StringEncoding.EncodedString('genexpr') 
-    binding = False 
- 
-    def analyse_declarations(self, env): 
+        self.def_node.no_assignment_synthesis = True
+        self.def_node.pymethdef_required = True
+        self.def_node.analyse_declarations(env)
+        self.def_node.is_cyfunction = True
+        self.pymethdef_cname = self.def_node.entry.pymethdef_cname
+        env.add_lambda_def(self.def_node)
+
+    def analyse_types(self, env):
+        self.def_node = self.def_node.analyse_expressions(env)
+        return super(LambdaNode, self).analyse_types(env)
+
+    def generate_result_code(self, code):
+        self.def_node.generate_execution_code(code)
+        super(LambdaNode, self).generate_result_code(code)
+
+
+class GeneratorExpressionNode(LambdaNode):
+    # A generator expression, e.g.  (i for i in range(10))
+    #
+    # Result is a generator.
+    #
+    # loop      ForStatNode   the for-loop, containing a YieldExprNode
+    # def_node  DefNode       the underlying generator 'def' node
+
+    name = StringEncoding.EncodedString('genexpr')
+    binding = False
+
+    def analyse_declarations(self, env):
         self.genexpr_name = env.next_id('genexpr')
-        super(GeneratorExpressionNode, self).analyse_declarations(env) 
-        # No pymethdef required 
-        self.def_node.pymethdef_required = False 
-        self.def_node.py_wrapper_required = False 
-        self.def_node.is_cyfunction = False 
-        # Force genexpr signature 
-        self.def_node.entry.signature = TypeSlots.pyfunction_noargs 
- 
-    def generate_result_code(self, code): 
-        code.putln( 
-            '%s = %s(%s); %s' % ( 
-                self.result(), 
-                self.def_node.entry.pyfunc_cname, 
-                self.self_result_code(), 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class YieldExprNode(ExprNode): 
-    # Yield expression node 
-    # 
-    # arg         ExprNode   the value to return from the generator 
-    # label_num   integer    yield label number 
-    # is_yield_from  boolean is a YieldFromExprNode to delegate to another generator 
- 
-    subexprs = ['arg'] 
-    type = py_object_type 
-    label_num = 0 
-    is_yield_from = False 
+        super(GeneratorExpressionNode, self).analyse_declarations(env)
+        # No pymethdef required
+        self.def_node.pymethdef_required = False
+        self.def_node.py_wrapper_required = False
+        self.def_node.is_cyfunction = False
+        # Force genexpr signature
+        self.def_node.entry.signature = TypeSlots.pyfunction_noargs
+
+    def generate_result_code(self, code):
+        code.putln(
+            '%s = %s(%s); %s' % (
+                self.result(),
+                self.def_node.entry.pyfunc_cname,
+                self.self_result_code(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+
+class YieldExprNode(ExprNode):
+    # Yield expression node
+    #
+    # arg         ExprNode   the value to return from the generator
+    # label_num   integer    yield label number
+    # is_yield_from  boolean is a YieldFromExprNode to delegate to another generator
+
+    subexprs = ['arg']
+    type = py_object_type
+    label_num = 0
+    is_yield_from = False
     is_await = False
     in_async_gen = False
     expr_keyword = 'yield'
- 
-    def analyse_types(self, env): 
+
+    def analyse_types(self, env):
         if not self.label_num or (self.is_yield_from and self.in_async_gen):
             error(self.pos, "'%s' not supported here" % self.expr_keyword)
-        self.is_temp = 1 
-        if self.arg is not None: 
-            self.arg = self.arg.analyse_types(env) 
-            if not self.arg.type.is_pyobject: 
-                self.coerce_yield_argument(env) 
-        return self 
- 
-    def coerce_yield_argument(self, env): 
-        self.arg = self.arg.coerce_to_pyobject(env) 
- 
-    def generate_evaluation_code(self, code): 
-        if self.arg: 
-            self.arg.generate_evaluation_code(code) 
-            self.arg.make_owned_reference(code) 
-            code.putln( 
-                "%s = %s;" % ( 
-                    Naming.retval_cname, 
-                    self.arg.result_as(py_object_type))) 
-            self.arg.generate_post_assignment_code(code) 
-            self.arg.free_temps(code) 
-        else: 
-            code.put_init_to_py_none(Naming.retval_cname, py_object_type) 
-        self.generate_yield_code(code) 
- 
-    def generate_yield_code(self, code): 
-        """ 
-        Generate the code to return the argument in 'Naming.retval_cname' 
-        and to continue at the yield label. 
-        """ 
+        self.is_temp = 1
+        if self.arg is not None:
+            self.arg = self.arg.analyse_types(env)
+            if not self.arg.type.is_pyobject:
+                self.coerce_yield_argument(env)
+        return self
+
+    def coerce_yield_argument(self, env):
+        self.arg = self.arg.coerce_to_pyobject(env)
+
+    def generate_evaluation_code(self, code):
+        if self.arg:
+            self.arg.generate_evaluation_code(code)
+            self.arg.make_owned_reference(code)
+            code.putln(
+                "%s = %s;" % (
+                    Naming.retval_cname,
+                    self.arg.result_as(py_object_type)))
+            self.arg.generate_post_assignment_code(code)
+            self.arg.free_temps(code)
+        else:
+            code.put_init_to_py_none(Naming.retval_cname, py_object_type)
+        self.generate_yield_code(code)
+
+    def generate_yield_code(self, code):
+        """
+        Generate the code to return the argument in 'Naming.retval_cname'
+        and to continue at the yield label.
+        """
         label_num, label_name = code.new_yield_label(
             self.expr_keyword.replace(' ', '_'))
-        code.use_label(label_name) 
- 
-        saved = [] 
-        code.funcstate.closure_temps.reset() 
-        for cname, type, manage_ref in code.funcstate.temps_in_use(): 
-            save_cname = code.funcstate.closure_temps.allocate_temp(type) 
-            saved.append((cname, save_cname, type)) 
-            if type.is_pyobject: 
-                code.put_xgiveref(cname) 
-            code.putln('%s->%s = %s;' % (Naming.cur_scope_cname, save_cname, cname)) 
- 
-        code.put_xgiveref(Naming.retval_cname) 
+        code.use_label(label_name)
+
+        saved = []
+        code.funcstate.closure_temps.reset()
+        for cname, type, manage_ref in code.funcstate.temps_in_use():
+            save_cname = code.funcstate.closure_temps.allocate_temp(type)
+            saved.append((cname, save_cname, type))
+            if type.is_pyobject:
+                code.put_xgiveref(cname)
+            code.putln('%s->%s = %s;' % (Naming.cur_scope_cname, save_cname, cname))
+
+        code.put_xgiveref(Naming.retval_cname)
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
         if profile or linetrace:
             code.put_trace_return(Naming.retval_cname,
                                   nogil=not code.funcstate.gil_owned)
-        code.put_finish_refcount_context() 
+        code.put_finish_refcount_context()
 
         if code.funcstate.current_except is not None:
             # inside of an except block => save away currently handled exception
@@ -9795,59 +9795,59 @@ class YieldExprNode(ExprNode):
         code.putln("/* return from %sgenerator, %sing value */" % (
             'async ' if self.in_async_gen else '',
             'await' if self.is_await else 'yield'))
-        code.putln("%s->resume_label = %d;" % ( 
-            Naming.generator_cname, label_num)) 
+        code.putln("%s->resume_label = %d;" % (
+            Naming.generator_cname, label_num))
         if self.in_async_gen and not self.is_await:
             # __Pyx__PyAsyncGenValueWrapperNew() steals a reference to the return value
             code.putln("return __Pyx__PyAsyncGenValueWrapperNew(%s);" % Naming.retval_cname)
         else:
             code.putln("return %s;" % Naming.retval_cname)
- 
-        code.put_label(label_name) 
-        for cname, save_cname, type in saved: 
-            code.putln('%s = %s->%s;' % (cname, Naming.cur_scope_cname, save_cname)) 
-            if type.is_pyobject: 
-                code.putln('%s->%s = 0;' % (Naming.cur_scope_cname, save_cname)) 
-                code.put_xgotref(cname) 
+
+        code.put_label(label_name)
+        for cname, save_cname, type in saved:
+            code.putln('%s = %s->%s;' % (cname, Naming.cur_scope_cname, save_cname))
+            if type.is_pyobject:
+                code.putln('%s->%s = 0;' % (Naming.cur_scope_cname, save_cname))
+                code.put_xgotref(cname)
         self.generate_sent_value_handling_code(code, Naming.sent_value_cname)
-        if self.result_is_used: 
-            self.allocate_temp_result(code) 
-            code.put('%s = %s; ' % (self.result(), Naming.sent_value_cname)) 
-            code.put_incref(self.result(), py_object_type) 
- 
+        if self.result_is_used:
+            self.allocate_temp_result(code)
+            code.put('%s = %s; ' % (self.result(), Naming.sent_value_cname))
+            code.put_incref(self.result(), py_object_type)
+
     def generate_sent_value_handling_code(self, code, value_cname):
         code.putln(code.error_goto_if_null(value_cname, self.pos))
- 
- 
+
+
 class _YieldDelegationExprNode(YieldExprNode):
     def yield_from_func(self, code):
         raise NotImplementedError()
- 
+
     def generate_evaluation_code(self, code, source_cname=None, decref_source=False):
         if source_cname is None:
             self.arg.generate_evaluation_code(code)
         code.putln("%s = %s(%s, %s);" % (
-            Naming.retval_cname, 
+            Naming.retval_cname,
             self.yield_from_func(code),
-            Naming.generator_cname, 
+            Naming.generator_cname,
             self.arg.py_result() if source_cname is None else source_cname))
         if source_cname is None:
             self.arg.generate_disposal_code(code)
             self.arg.free_temps(code)
         elif decref_source:
             code.put_decref_clear(source_cname, py_object_type)
-        code.put_xgotref(Naming.retval_cname) 
- 
-        code.putln("if (likely(%s)) {" % Naming.retval_cname) 
-        self.generate_yield_code(code) 
-        code.putln("} else {") 
-        # either error or sub-generator has normally terminated: return value => node result 
-        if self.result_is_used: 
+        code.put_xgotref(Naming.retval_cname)
+
+        code.putln("if (likely(%s)) {" % Naming.retval_cname)
+        self.generate_yield_code(code)
+        code.putln("} else {")
+        # either error or sub-generator has normally terminated: return value => node result
+        if self.result_is_used:
             self.fetch_iteration_result(code)
-        else: 
+        else:
             self.handle_iteration_exception(code)
-        code.putln("}") 
- 
+        code.putln("}")
+
     def fetch_iteration_result(self, code):
         # YieldExprNode has allocated the result temp for us
         code.putln("%s = NULL;" % self.result())
@@ -9927,186 +9927,186 @@ class AwaitIterNextExprNode(AwaitExprNode):
         code.putln("}")
 
 
-class GlobalsExprNode(AtomicExprNode): 
-    type = dict_type 
-    is_temp = 1 
- 
-    def analyse_types(self, env): 
-        env.use_utility_code(Builtin.globals_utility_code) 
-        return self 
- 
-    gil_message = "Constructing globals dict" 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def generate_result_code(self, code): 
-        code.putln('%s = __Pyx_Globals(); %s' % ( 
-            self.result(), 
-            code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.result()) 
- 
- 
-class LocalsDictItemNode(DictItemNode): 
-    def analyse_types(self, env): 
-        self.key = self.key.analyse_types(env) 
-        self.value = self.value.analyse_types(env) 
-        self.key = self.key.coerce_to_pyobject(env) 
-        if self.value.type.can_coerce_to_pyobject(env): 
-            self.value = self.value.coerce_to_pyobject(env) 
-        else: 
-            self.value = None 
-        return self 
- 
- 
-class FuncLocalsExprNode(DictNode): 
-    def __init__(self, pos, env): 
-        local_vars = sorted([ 
-            entry.name for entry in env.entries.values() if entry.name]) 
-        items = [LocalsDictItemNode( 
-            pos, key=IdentifierStringNode(pos, value=var), 
-            value=NameNode(pos, name=var, allow_null=True)) 
-                 for var in local_vars] 
-        DictNode.__init__(self, pos, key_value_pairs=items, 
-                          exclude_null_values=True) 
- 
-    def analyse_types(self, env): 
-        node = super(FuncLocalsExprNode, self).analyse_types(env) 
-        node.key_value_pairs = [ i for i in node.key_value_pairs 
-                                 if i.value is not None ] 
-        return node 
- 
- 
-class PyClassLocalsExprNode(AtomicExprNode): 
-    def __init__(self, pos, pyclass_dict): 
-        AtomicExprNode.__init__(self, pos) 
-        self.pyclass_dict = pyclass_dict 
- 
-    def analyse_types(self, env): 
-        self.type = self.pyclass_dict.type 
-        self.is_temp = False 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def result(self): 
-        return self.pyclass_dict.result() 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
- 
-def LocalsExprNode(pos, scope_node, env): 
-    if env.is_module_scope: 
-        return GlobalsExprNode(pos) 
-    if env.is_py_class_scope: 
-        return PyClassLocalsExprNode(pos, scope_node.dict) 
-    return FuncLocalsExprNode(pos, env) 
- 
- 
-#------------------------------------------------------------------- 
-# 
-#  Unary operator nodes 
-# 
-#------------------------------------------------------------------- 
- 
-compile_time_unary_operators = { 
-    'not': operator.not_, 
-    '~': operator.inv, 
-    '-': operator.neg, 
-    '+': operator.pos, 
-} 
- 
-class UnopNode(ExprNode): 
-    #  operator     string 
-    #  operand      ExprNode 
-    # 
-    #  Processing during analyse_expressions phase: 
-    # 
-    #    analyse_c_operation 
-    #      Called when the operand is not a pyobject. 
-    #      - Check operand type and coerce if needed. 
-    #      - Determine result type and result code fragment. 
-    #      - Allocate temporary for result if needed. 
- 
-    subexprs = ['operand'] 
-    infix = True 
- 
-    def calculate_constant_result(self): 
-        func = compile_time_unary_operators[self.operator] 
-        self.constant_result = func(self.operand.constant_result) 
- 
-    def compile_time_value(self, denv): 
-        func = compile_time_unary_operators.get(self.operator) 
-        if not func: 
-            error(self.pos, 
-                "Unary '%s' not supported in compile-time expression" 
-                    % self.operator) 
-        operand = self.operand.compile_time_value(denv) 
-        try: 
-            return func(operand) 
+class GlobalsExprNode(AtomicExprNode):
+    type = dict_type
+    is_temp = 1
+
+    def analyse_types(self, env):
+        env.use_utility_code(Builtin.globals_utility_code)
+        return self
+
+    gil_message = "Constructing globals dict"
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        code.putln('%s = __Pyx_Globals(); %s' % (
+            self.result(),
+            code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.result())
+
+
+class LocalsDictItemNode(DictItemNode):
+    def analyse_types(self, env):
+        self.key = self.key.analyse_types(env)
+        self.value = self.value.analyse_types(env)
+        self.key = self.key.coerce_to_pyobject(env)
+        if self.value.type.can_coerce_to_pyobject(env):
+            self.value = self.value.coerce_to_pyobject(env)
+        else:
+            self.value = None
+        return self
+
+
+class FuncLocalsExprNode(DictNode):
+    def __init__(self, pos, env):
+        local_vars = sorted([
+            entry.name for entry in env.entries.values() if entry.name])
+        items = [LocalsDictItemNode(
+            pos, key=IdentifierStringNode(pos, value=var),
+            value=NameNode(pos, name=var, allow_null=True))
+                 for var in local_vars]
+        DictNode.__init__(self, pos, key_value_pairs=items,
+                          exclude_null_values=True)
+
+    def analyse_types(self, env):
+        node = super(FuncLocalsExprNode, self).analyse_types(env)
+        node.key_value_pairs = [ i for i in node.key_value_pairs
+                                 if i.value is not None ]
+        return node
+
+
+class PyClassLocalsExprNode(AtomicExprNode):
+    def __init__(self, pos, pyclass_dict):
+        AtomicExprNode.__init__(self, pos)
+        self.pyclass_dict = pyclass_dict
+
+    def analyse_types(self, env):
+        self.type = self.pyclass_dict.type
+        self.is_temp = False
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def result(self):
+        return self.pyclass_dict.result()
+
+    def generate_result_code(self, code):
+        pass
+
+
+def LocalsExprNode(pos, scope_node, env):
+    if env.is_module_scope:
+        return GlobalsExprNode(pos)
+    if env.is_py_class_scope:
+        return PyClassLocalsExprNode(pos, scope_node.dict)
+    return FuncLocalsExprNode(pos, env)
+
+
+#-------------------------------------------------------------------
+#
+#  Unary operator nodes
+#
+#-------------------------------------------------------------------
+
+compile_time_unary_operators = {
+    'not': operator.not_,
+    '~': operator.inv,
+    '-': operator.neg,
+    '+': operator.pos,
+}
+
+class UnopNode(ExprNode):
+    #  operator     string
+    #  operand      ExprNode
+    #
+    #  Processing during analyse_expressions phase:
+    #
+    #    analyse_c_operation
+    #      Called when the operand is not a pyobject.
+    #      - Check operand type and coerce if needed.
+    #      - Determine result type and result code fragment.
+    #      - Allocate temporary for result if needed.
+
+    subexprs = ['operand']
+    infix = True
+
+    def calculate_constant_result(self):
+        func = compile_time_unary_operators[self.operator]
+        self.constant_result = func(self.operand.constant_result)
+
+    def compile_time_value(self, denv):
+        func = compile_time_unary_operators.get(self.operator)
+        if not func:
+            error(self.pos,
+                "Unary '%s' not supported in compile-time expression"
+                    % self.operator)
+        operand = self.operand.compile_time_value(denv)
+        try:
+            return func(operand)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def infer_type(self, env): 
-        operand_type = self.operand.infer_type(env) 
-        if operand_type.is_cpp_class or operand_type.is_ptr: 
-            cpp_type = operand_type.find_cpp_operation_type(self.operator) 
-            if cpp_type is not None: 
-                return cpp_type 
-        return self.infer_unop_type(env, operand_type) 
- 
-    def infer_unop_type(self, env, operand_type): 
-        if operand_type.is_pyobject: 
-            return py_object_type 
-        else: 
-            return operand_type 
- 
-    def may_be_none(self): 
-        if self.operand.type and self.operand.type.is_builtin_type: 
-            if self.operand.type is not type_type: 
-                return False 
-        return ExprNode.may_be_none(self) 
- 
-    def analyse_types(self, env): 
-        self.operand = self.operand.analyse_types(env) 
+            self.compile_time_value_error(e)
+
+    def infer_type(self, env):
+        operand_type = self.operand.infer_type(env)
+        if operand_type.is_cpp_class or operand_type.is_ptr:
+            cpp_type = operand_type.find_cpp_operation_type(self.operator)
+            if cpp_type is not None:
+                return cpp_type
+        return self.infer_unop_type(env, operand_type)
+
+    def infer_unop_type(self, env, operand_type):
+        if operand_type.is_pyobject:
+            return py_object_type
+        else:
+            return operand_type
+
+    def may_be_none(self):
+        if self.operand.type and self.operand.type.is_builtin_type:
+            if self.operand.type is not type_type:
+                return False
+        return ExprNode.may_be_none(self)
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
         if self.is_pythran_operation(env):
             self.type = PythranExpr(pythran_unaryop_type(self.operator, self.operand.type))
             self.is_temp = 1
         elif self.is_py_operation():
-            self.coerce_operand_to_pyobject(env) 
-            self.type = py_object_type 
-            self.is_temp = 1 
-        elif self.is_cpp_operation(): 
-            self.analyse_cpp_operation(env) 
-        else: 
-            self.analyse_c_operation(env) 
-        return self 
- 
-    def check_const(self): 
-        return self.operand.check_const() 
- 
-    def is_py_operation(self): 
+            self.coerce_operand_to_pyobject(env)
+            self.type = py_object_type
+            self.is_temp = 1
+        elif self.is_cpp_operation():
+            self.analyse_cpp_operation(env)
+        else:
+            self.analyse_c_operation(env)
+        return self
+
+    def check_const(self):
+        return self.operand.check_const()
+
+    def is_py_operation(self):
         return self.operand.type.is_pyobject or self.operand.type.is_ctuple
- 
+
     def is_pythran_operation(self, env):
         np_pythran = has_np_pythran(env)
         op_type = self.operand.type
         return np_pythran and (op_type.is_buffer or op_type.is_pythran_expr)
 
-    def nogil_check(self, env): 
-        if self.is_py_operation(): 
-            self.gil_error() 
- 
-    def is_cpp_operation(self): 
-        type = self.operand.type 
-        return type.is_cpp_class 
- 
-    def coerce_operand_to_pyobject(self, env): 
-        self.operand = self.operand.coerce_to_pyobject(env) 
- 
-    def generate_result_code(self, code): 
+    def nogil_check(self, env):
+        if self.is_py_operation():
+            self.gil_error()
+
+    def is_cpp_operation(self):
+        type = self.operand.type
+        return type.is_cpp_class
+
+    def coerce_operand_to_pyobject(self, env):
+        self.operand = self.operand.coerce_to_pyobject(env)
+
+    def generate_result_code(self, code):
         if self.type.is_pythran_expr:
             code.putln("// Pythran unaryop")
             code.putln("__Pyx_call_destructor(%s);" % self.result())
@@ -10116,7 +10116,7 @@ class UnopNode(ExprNode):
                 self.operator,
                 self.operand.pythran_result()))
         elif self.operand.type.is_pyobject:
-            self.generate_py_operation_code(code) 
+            self.generate_py_operation_code(code)
         elif self.is_temp:
             if self.is_cpp_operation() and self.exception_check == '+':
                 translate_cpp_exception(code, self.pos,
@@ -10125,23 +10125,23 @@ class UnopNode(ExprNode):
                     self.exception_value, self.in_nogil_context)
             else:
                 code.putln("%s = %s %s;" % (self.result(), self.operator, self.operand.result()))
- 
-    def generate_py_operation_code(self, code): 
-        function = self.py_operation_function(code) 
-        code.putln( 
-            "%s = %s(%s); %s" % ( 
-                self.result(), 
-                function, 
-                self.operand.py_result(), 
-                code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.py_result()) 
- 
-    def type_error(self): 
-        if not self.operand.type.is_error: 
-            error(self.pos, "Invalid operand type for '%s' (%s)" % 
-                (self.operator, self.operand.type)) 
-        self.type = PyrexTypes.error_type 
- 
+
+    def generate_py_operation_code(self, code):
+        function = self.py_operation_function(code)
+        code.putln(
+            "%s = %s(%s); %s" % (
+                self.result(),
+                function,
+                self.operand.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+    def type_error(self):
+        if not self.operand.type.is_error:
+            error(self.pos, "Invalid operand type for '%s' (%s)" %
+                (self.operator, self.operand.type))
+        self.type = PyrexTypes.error_type
+
     def analyse_cpp_operation(self, env, overload_check=True):
         entry = env.lookup_operator(self.operator, [self.operand])
         if overload_check and not entry:
@@ -10157,311 +10157,311 @@ class UnopNode(ExprNode):
         else:
             self.exception_check = ''
             self.exception_value = ''
-        cpp_type = self.operand.type.find_cpp_operation_type(self.operator) 
+        cpp_type = self.operand.type.find_cpp_operation_type(self.operator)
         if overload_check and cpp_type is None:
-            error(self.pos, "'%s' operator not defined for %s" % ( 
-                self.operator, type)) 
-            self.type_error() 
-            return 
-        self.type = cpp_type 
- 
- 
-class NotNode(UnopNode): 
-    #  'not' operator 
-    # 
-    #  operand   ExprNode 
-    operator = '!' 
- 
-    type = PyrexTypes.c_bint_type 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = not self.operand.constant_result 
- 
-    def compile_time_value(self, denv): 
-        operand = self.operand.compile_time_value(denv) 
-        try: 
-            return not operand 
+            error(self.pos, "'%s' operator not defined for %s" % (
+                self.operator, type))
+            self.type_error()
+            return
+        self.type = cpp_type
+
+
+class NotNode(UnopNode):
+    #  'not' operator
+    #
+    #  operand   ExprNode
+    operator = '!'
+
+    type = PyrexTypes.c_bint_type
+
+    def calculate_constant_result(self):
+        self.constant_result = not self.operand.constant_result
+
+    def compile_time_value(self, denv):
+        operand = self.operand.compile_time_value(denv)
+        try:
+            return not operand
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def infer_unop_type(self, env, operand_type): 
-        return PyrexTypes.c_bint_type 
- 
-    def analyse_types(self, env): 
-        self.operand = self.operand.analyse_types(env) 
-        operand_type = self.operand.type 
-        if operand_type.is_cpp_class: 
+            self.compile_time_value_error(e)
+
+    def infer_unop_type(self, env, operand_type):
+        return PyrexTypes.c_bint_type
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        operand_type = self.operand.type
+        if operand_type.is_cpp_class:
             self.analyse_cpp_operation(env)
-        else: 
-            self.operand = self.operand.coerce_to_boolean(env) 
-        return self 
- 
-    def calculate_result_code(self): 
-        return "(!%s)" % self.operand.result() 
- 
- 
-class UnaryPlusNode(UnopNode): 
-    #  unary '+' operator 
- 
-    operator = '+' 
- 
-    def analyse_c_operation(self, env): 
-        self.type = PyrexTypes.widest_numeric_type( 
-            self.operand.type, PyrexTypes.c_int_type) 
- 
-    def py_operation_function(self, code): 
-        return "PyNumber_Positive" 
- 
-    def calculate_result_code(self): 
-        if self.is_cpp_operation(): 
-            return "(+%s)" % self.operand.result() 
-        else: 
-            return self.operand.result() 
- 
- 
-class UnaryMinusNode(UnopNode): 
-    #  unary '-' operator 
- 
-    operator = '-' 
- 
-    def analyse_c_operation(self, env): 
-        if self.operand.type.is_numeric: 
-            self.type = PyrexTypes.widest_numeric_type( 
-                self.operand.type, PyrexTypes.c_int_type) 
-        elif self.operand.type.is_enum: 
-            self.type = PyrexTypes.c_int_type 
-        else: 
-            self.type_error() 
-        if self.type.is_complex: 
-            self.infix = False 
- 
-    def py_operation_function(self, code): 
-        return "PyNumber_Negative" 
- 
-    def calculate_result_code(self): 
-        if self.infix: 
-            return "(-%s)" % self.operand.result() 
-        else: 
-            return "%s(%s)" % (self.operand.type.unary_op('-'), self.operand.result()) 
- 
-    def get_constant_c_result_code(self): 
-        value = self.operand.get_constant_c_result_code() 
-        if value: 
-            return "(-%s)" % value 
- 
-class TildeNode(UnopNode): 
-    #  unary '~' operator 
- 
-    def analyse_c_operation(self, env): 
-        if self.operand.type.is_int: 
-            self.type = PyrexTypes.widest_numeric_type( 
-                self.operand.type, PyrexTypes.c_int_type) 
-        elif self.operand.type.is_enum: 
-            self.type = PyrexTypes.c_int_type 
-        else: 
-            self.type_error() 
- 
-    def py_operation_function(self, code): 
-        return "PyNumber_Invert" 
- 
-    def calculate_result_code(self): 
-        return "(~%s)" % self.operand.result() 
- 
- 
-class CUnopNode(UnopNode): 
- 
-    def is_py_operation(self): 
-        return False 
- 
-class DereferenceNode(CUnopNode): 
-    #  unary * operator 
- 
-    operator = '*' 
- 
-    def infer_unop_type(self, env, operand_type): 
-        if operand_type.is_ptr: 
-            return operand_type.base_type 
-        else: 
-            return PyrexTypes.error_type 
- 
-    def analyse_c_operation(self, env): 
-        if self.operand.type.is_ptr: 
-            self.type = self.operand.type.base_type 
-        else: 
-            self.type_error() 
- 
-    def calculate_result_code(self): 
-        return "(*%s)" % self.operand.result() 
- 
- 
-class DecrementIncrementNode(CUnopNode): 
-    #  unary ++/-- operator 
- 
-    def analyse_c_operation(self, env): 
-        if self.operand.type.is_numeric: 
-            self.type = PyrexTypes.widest_numeric_type( 
-                self.operand.type, PyrexTypes.c_int_type) 
-        elif self.operand.type.is_ptr: 
-            self.type = self.operand.type 
-        else: 
-            self.type_error() 
- 
-    def calculate_result_code(self): 
-        if self.is_prefix: 
-            return "(%s%s)" % (self.operator, self.operand.result()) 
-        else: 
-            return "(%s%s)" % (self.operand.result(), self.operator) 
- 
-def inc_dec_constructor(is_prefix, operator): 
-    return lambda pos, **kwds: DecrementIncrementNode(pos, is_prefix=is_prefix, operator=operator, **kwds) 
- 
- 
-class AmpersandNode(CUnopNode): 
-    #  The C address-of operator. 
-    # 
-    #  operand  ExprNode 
-    operator = '&' 
- 
-    def infer_unop_type(self, env, operand_type): 
-        return PyrexTypes.c_ptr_type(operand_type) 
- 
-    def analyse_types(self, env): 
-        self.operand = self.operand.analyse_types(env) 
-        argtype = self.operand.type 
-        if argtype.is_cpp_class: 
+        else:
+            self.operand = self.operand.coerce_to_boolean(env)
+        return self
+
+    def calculate_result_code(self):
+        return "(!%s)" % self.operand.result()
+
+
+class UnaryPlusNode(UnopNode):
+    #  unary '+' operator
+
+    operator = '+'
+
+    def analyse_c_operation(self, env):
+        self.type = PyrexTypes.widest_numeric_type(
+            self.operand.type, PyrexTypes.c_int_type)
+
+    def py_operation_function(self, code):
+        return "PyNumber_Positive"
+
+    def calculate_result_code(self):
+        if self.is_cpp_operation():
+            return "(+%s)" % self.operand.result()
+        else:
+            return self.operand.result()
+
+
+class UnaryMinusNode(UnopNode):
+    #  unary '-' operator
+
+    operator = '-'
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_numeric:
+            self.type = PyrexTypes.widest_numeric_type(
+                self.operand.type, PyrexTypes.c_int_type)
+        elif self.operand.type.is_enum:
+            self.type = PyrexTypes.c_int_type
+        else:
+            self.type_error()
+        if self.type.is_complex:
+            self.infix = False
+
+    def py_operation_function(self, code):
+        return "PyNumber_Negative"
+
+    def calculate_result_code(self):
+        if self.infix:
+            return "(-%s)" % self.operand.result()
+        else:
+            return "%s(%s)" % (self.operand.type.unary_op('-'), self.operand.result())
+
+    def get_constant_c_result_code(self):
+        value = self.operand.get_constant_c_result_code()
+        if value:
+            return "(-%s)" % value
+
+class TildeNode(UnopNode):
+    #  unary '~' operator
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_int:
+            self.type = PyrexTypes.widest_numeric_type(
+                self.operand.type, PyrexTypes.c_int_type)
+        elif self.operand.type.is_enum:
+            self.type = PyrexTypes.c_int_type
+        else:
+            self.type_error()
+
+    def py_operation_function(self, code):
+        return "PyNumber_Invert"
+
+    def calculate_result_code(self):
+        return "(~%s)" % self.operand.result()
+
+
+class CUnopNode(UnopNode):
+
+    def is_py_operation(self):
+        return False
+
+class DereferenceNode(CUnopNode):
+    #  unary * operator
+
+    operator = '*'
+
+    def infer_unop_type(self, env, operand_type):
+        if operand_type.is_ptr:
+            return operand_type.base_type
+        else:
+            return PyrexTypes.error_type
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_ptr:
+            self.type = self.operand.type.base_type
+        else:
+            self.type_error()
+
+    def calculate_result_code(self):
+        return "(*%s)" % self.operand.result()
+
+
+class DecrementIncrementNode(CUnopNode):
+    #  unary ++/-- operator
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_numeric:
+            self.type = PyrexTypes.widest_numeric_type(
+                self.operand.type, PyrexTypes.c_int_type)
+        elif self.operand.type.is_ptr:
+            self.type = self.operand.type
+        else:
+            self.type_error()
+
+    def calculate_result_code(self):
+        if self.is_prefix:
+            return "(%s%s)" % (self.operator, self.operand.result())
+        else:
+            return "(%s%s)" % (self.operand.result(), self.operator)
+
+def inc_dec_constructor(is_prefix, operator):
+    return lambda pos, **kwds: DecrementIncrementNode(pos, is_prefix=is_prefix, operator=operator, **kwds)
+
+
+class AmpersandNode(CUnopNode):
+    #  The C address-of operator.
+    #
+    #  operand  ExprNode
+    operator = '&'
+
+    def infer_unop_type(self, env, operand_type):
+        return PyrexTypes.c_ptr_type(operand_type)
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        argtype = self.operand.type
+        if argtype.is_cpp_class:
             self.analyse_cpp_operation(env, overload_check=False)
-        if not (argtype.is_cfunction or argtype.is_reference or self.operand.is_addressable()): 
-            if argtype.is_memoryviewslice: 
-                self.error("Cannot take address of memoryview slice") 
-            else: 
+        if not (argtype.is_cfunction or argtype.is_reference or self.operand.is_addressable()):
+            if argtype.is_memoryviewslice:
+                self.error("Cannot take address of memoryview slice")
+            else:
                 self.error("Taking address of non-lvalue (type %s)" % argtype)
-            return self 
-        if argtype.is_pyobject: 
+            return self
+        if argtype.is_pyobject:
             self.error("Cannot take address of Python %s" % (
                 "variable '%s'" % self.operand.name if self.operand.is_name else
                 "object attribute '%s'" % self.operand.attribute if self.operand.is_attribute else
                 "object"))
-            return self 
+            return self
         if not argtype.is_cpp_class or not self.type:
             self.type = PyrexTypes.c_ptr_type(argtype)
-        return self 
- 
-    def check_const(self): 
-        return self.operand.check_const_addr() 
- 
-    def error(self, mess): 
-        error(self.pos, mess) 
-        self.type = PyrexTypes.error_type 
-        self.result_code = "<error>" 
- 
-    def calculate_result_code(self): 
-        return "(&%s)" % self.operand.result() 
- 
-    def generate_result_code(self, code): 
+        return self
+
+    def check_const(self):
+        return self.operand.check_const_addr()
+
+    def error(self, mess):
+        error(self.pos, mess)
+        self.type = PyrexTypes.error_type
+        self.result_code = "<error>"
+
+    def calculate_result_code(self):
+        return "(&%s)" % self.operand.result()
+
+    def generate_result_code(self, code):
         if (self.operand.type.is_cpp_class and self.exception_check == '+'):
             translate_cpp_exception(code, self.pos,
                 "%s = %s %s;" % (self.result(), self.operator, self.operand.result()),
                 self.result() if self.type.is_pyobject else None,
                 self.exception_value, self.in_nogil_context)
- 
- 
-unop_node_classes = { 
-    "+":  UnaryPlusNode, 
-    "-":  UnaryMinusNode, 
-    "~":  TildeNode, 
-} 
- 
-def unop_node(pos, operator, operand): 
-    # Construct unnop node of appropriate class for 
-    # given operator. 
-    if isinstance(operand, IntNode) and operator == '-': 
-        return IntNode(pos = operand.pos, value = str(-Utils.str_to_number(operand.value)), 
-                       longness=operand.longness, unsigned=operand.unsigned) 
-    elif isinstance(operand, UnopNode) and operand.operator == operator in '+-': 
-        warning(pos, "Python has no increment/decrement operator: %s%sx == %s(%sx) == x" % ((operator,)*4), 5) 
-    return unop_node_classes[operator](pos, 
-        operator = operator, 
-        operand = operand) 
- 
- 
-class TypecastNode(ExprNode): 
-    #  C type cast 
-    # 
-    #  operand      ExprNode 
-    #  base_type    CBaseTypeNode 
-    #  declarator   CDeclaratorNode 
-    #  typecheck    boolean 
-    # 
-    #  If used from a transform, one can if wanted specify the attribute 
-    #  "type" directly and leave base_type and declarator to None 
- 
-    subexprs = ['operand'] 
-    base_type = declarator = type = None 
- 
-    def type_dependencies(self, env): 
-        return () 
- 
-    def infer_type(self, env): 
-        if self.type is None: 
-            base_type = self.base_type.analyse(env) 
-            _, self.type = self.declarator.analyse(base_type, env) 
-        return self.type 
- 
-    def analyse_types(self, env): 
-        if self.type is None: 
-            base_type = self.base_type.analyse(env) 
-            _, self.type = self.declarator.analyse(base_type, env) 
-        if self.operand.has_constant_result(): 
-            # Must be done after self.type is resolved. 
-            self.calculate_constant_result() 
-        if self.type.is_cfunction: 
-            error(self.pos, 
-                "Cannot cast to a function type") 
-            self.type = PyrexTypes.error_type 
-        self.operand = self.operand.analyse_types(env) 
-        if self.type is PyrexTypes.c_bint_type: 
-            # short circuit this to a coercion 
-            return self.operand.coerce_to_boolean(env) 
-        to_py = self.type.is_pyobject 
-        from_py = self.operand.type.is_pyobject 
-        if from_py and not to_py and self.operand.is_ephemeral(): 
-            if not self.type.is_numeric and not self.type.is_cpp_class: 
-                error(self.pos, "Casting temporary Python object to non-numeric non-Python type") 
-        if to_py and not from_py: 
-            if self.type is bytes_type and self.operand.type.is_int: 
-                return CoerceIntToBytesNode(self.operand, env) 
-            elif self.operand.type.can_coerce_to_pyobject(env): 
-                self.result_ctype = py_object_type 
+
+
+unop_node_classes = {
+    "+":  UnaryPlusNode,
+    "-":  UnaryMinusNode,
+    "~":  TildeNode,
+}
+
+def unop_node(pos, operator, operand):
+    # Construct unnop node of appropriate class for
+    # given operator.
+    if isinstance(operand, IntNode) and operator == '-':
+        return IntNode(pos = operand.pos, value = str(-Utils.str_to_number(operand.value)),
+                       longness=operand.longness, unsigned=operand.unsigned)
+    elif isinstance(operand, UnopNode) and operand.operator == operator in '+-':
+        warning(pos, "Python has no increment/decrement operator: %s%sx == %s(%sx) == x" % ((operator,)*4), 5)
+    return unop_node_classes[operator](pos,
+        operator = operator,
+        operand = operand)
+
+
+class TypecastNode(ExprNode):
+    #  C type cast
+    #
+    #  operand      ExprNode
+    #  base_type    CBaseTypeNode
+    #  declarator   CDeclaratorNode
+    #  typecheck    boolean
+    #
+    #  If used from a transform, one can if wanted specify the attribute
+    #  "type" directly and leave base_type and declarator to None
+
+    subexprs = ['operand']
+    base_type = declarator = type = None
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
+        if self.type is None:
+            base_type = self.base_type.analyse(env)
+            _, self.type = self.declarator.analyse(base_type, env)
+        return self.type
+
+    def analyse_types(self, env):
+        if self.type is None:
+            base_type = self.base_type.analyse(env)
+            _, self.type = self.declarator.analyse(base_type, env)
+        if self.operand.has_constant_result():
+            # Must be done after self.type is resolved.
+            self.calculate_constant_result()
+        if self.type.is_cfunction:
+            error(self.pos,
+                "Cannot cast to a function type")
+            self.type = PyrexTypes.error_type
+        self.operand = self.operand.analyse_types(env)
+        if self.type is PyrexTypes.c_bint_type:
+            # short circuit this to a coercion
+            return self.operand.coerce_to_boolean(env)
+        to_py = self.type.is_pyobject
+        from_py = self.operand.type.is_pyobject
+        if from_py and not to_py and self.operand.is_ephemeral():
+            if not self.type.is_numeric and not self.type.is_cpp_class:
+                error(self.pos, "Casting temporary Python object to non-numeric non-Python type")
+        if to_py and not from_py:
+            if self.type is bytes_type and self.operand.type.is_int:
+                return CoerceIntToBytesNode(self.operand, env)
+            elif self.operand.type.can_coerce_to_pyobject(env):
+                self.result_ctype = py_object_type
                 self.operand = self.operand.coerce_to(self.type, env)
-            else: 
-                if self.operand.type.is_ptr: 
-                    if not (self.operand.type.base_type.is_void or self.operand.type.base_type.is_struct): 
-                        error(self.pos, "Python objects cannot be cast from pointers of primitive types") 
-                else: 
-                    # Should this be an error? 
+            else:
+                if self.operand.type.is_ptr:
+                    if not (self.operand.type.base_type.is_void or self.operand.type.base_type.is_struct):
+                        error(self.pos, "Python objects cannot be cast from pointers of primitive types")
+                else:
+                    # Should this be an error?
                     warning(self.pos, "No conversion from %s to %s, python object pointer used." % (
                         self.operand.type, self.type))
-                self.operand = self.operand.coerce_to_simple(env) 
-        elif from_py and not to_py: 
-            if self.type.create_from_py_utility_code(env): 
-                self.operand = self.operand.coerce_to(self.type, env) 
-            elif self.type.is_ptr: 
-                if not (self.type.base_type.is_void or self.type.base_type.is_struct): 
-                    error(self.pos, "Python objects cannot be cast to pointers of primitive types") 
-            else: 
+                self.operand = self.operand.coerce_to_simple(env)
+        elif from_py and not to_py:
+            if self.type.create_from_py_utility_code(env):
+                self.operand = self.operand.coerce_to(self.type, env)
+            elif self.type.is_ptr:
+                if not (self.type.base_type.is_void or self.type.base_type.is_struct):
+                    error(self.pos, "Python objects cannot be cast to pointers of primitive types")
+            else:
                 warning(self.pos, "No conversion from %s to %s, python object pointer used." % (
                     self.type, self.operand.type))
-        elif from_py and to_py: 
-            if self.typecheck: 
-                self.operand = PyTypeTestNode(self.operand, self.type, env, notnone=True) 
-            elif isinstance(self.operand, SliceIndexNode): 
-                # This cast can influence the created type of string slices. 
-                self.operand = self.operand.coerce_to(self.type, env) 
-        elif self.type.is_complex and self.operand.type.is_complex: 
-            self.operand = self.operand.coerce_to_simple(env) 
-        elif self.operand.type.is_fused: 
-            self.operand = self.operand.coerce_to(self.type, env) 
-            #self.type = self.operand.type 
+        elif from_py and to_py:
+            if self.typecheck:
+                self.operand = PyTypeTestNode(self.operand, self.type, env, notnone=True)
+            elif isinstance(self.operand, SliceIndexNode):
+                # This cast can influence the created type of string slices.
+                self.operand = self.operand.coerce_to(self.type, env)
+        elif self.type.is_complex and self.operand.type.is_complex:
+            self.operand = self.operand.coerce_to_simple(env)
+        elif self.operand.type.is_fused:
+            self.operand = self.operand.coerce_to(self.type, env)
+            #self.type = self.operand.type
         if self.type.is_ptr and self.type.base_type.is_cfunction and self.type.base_type.nogil:
             op_type = self.operand.type
             if op_type.is_ptr:
@@ -10469,244 +10469,244 @@ class TypecastNode(ExprNode):
             if op_type.is_cfunction and not op_type.nogil:
                 warning(self.pos,
                         "Casting a GIL-requiring function into a nogil function circumvents GIL validation", 1)
-        return self 
- 
-    def is_simple(self): 
-        # either temp or a C cast => no side effects other than the operand's 
-        return self.operand.is_simple() 
- 
-    def is_ephemeral(self): 
-        # either temp or a C cast => no side effects other than the operand's 
-        return self.operand.is_ephemeral() 
- 
-    def nonlocally_immutable(self): 
-        return self.is_temp or self.operand.nonlocally_immutable() 
- 
-    def nogil_check(self, env): 
-        if self.type and self.type.is_pyobject and self.is_temp: 
-            self.gil_error() 
- 
-    def check_const(self): 
-        return self.operand.check_const() 
- 
-    def calculate_constant_result(self): 
-        self.constant_result = self.calculate_result_code(self.operand.constant_result) 
- 
-    def calculate_result_code(self, operand_result = None): 
-        if operand_result is None: 
-            operand_result = self.operand.result() 
-        if self.type.is_complex: 
-            operand_result = self.operand.result() 
-            if self.operand.type.is_complex: 
-                real_part = self.type.real_type.cast_code("__Pyx_CREAL(%s)" % operand_result) 
-                imag_part = self.type.real_type.cast_code("__Pyx_CIMAG(%s)" % operand_result) 
-            else: 
-                real_part = self.type.real_type.cast_code(operand_result) 
-                imag_part = "0" 
-            return "%s(%s, %s)" % ( 
-                    self.type.from_parts, 
-                    real_part, 
-                    imag_part) 
-        else: 
-            return self.type.cast_code(operand_result) 
- 
-    def get_constant_c_result_code(self): 
-        operand_result = self.operand.get_constant_c_result_code() 
-        if operand_result: 
-            return self.type.cast_code(operand_result) 
- 
-    def result_as(self, type): 
-        if self.type.is_pyobject and not self.is_temp: 
-            #  Optimise away some unnecessary casting 
-            return self.operand.result_as(type) 
-        else: 
-            return ExprNode.result_as(self, type) 
- 
-    def generate_result_code(self, code): 
-        if self.is_temp: 
-            code.putln( 
-                "%s = (PyObject *)%s;" % ( 
-                    self.result(), 
-                    self.operand.result())) 
-            code.put_incref(self.result(), self.ctype()) 
- 
- 
-ERR_START = "Start may not be given" 
-ERR_NOT_STOP = "Stop must be provided to indicate shape" 
-ERR_STEPS = ("Strides may only be given to indicate contiguity. " 
-             "Consider slicing it after conversion") 
-ERR_NOT_POINTER = "Can only create cython.array from pointer or array" 
-ERR_BASE_TYPE = "Pointer base type does not match cython.array base type" 
- 
-
-class CythonArrayNode(ExprNode): 
-    """ 
-    Used when a pointer of base_type is cast to a memoryviewslice with that 
-    base type. i.e. 
- 
-        <int[:M:1, :N]> p 
- 
-    creates a fortran-contiguous cython.array. 
- 
-    We leave the type set to object so coercions to object are more efficient 
-    and less work. Acquiring a memoryviewslice from this will be just as 
-    efficient. ExprNode.coerce_to() will do the additional typecheck on 
-    self.compile_time_type 
- 
-    This also handles <int[:, :]> my_c_array 
- 
- 
-    operand             ExprNode                 the thing we're casting 
-    base_type_node      MemoryViewSliceTypeNode  the cast expression node 
-    """ 
- 
-    subexprs = ['operand', 'shapes'] 
- 
-    shapes = None 
-    is_temp = True 
-    mode = "c" 
-    array_dtype = None 
- 
-    shape_type = PyrexTypes.c_py_ssize_t_type 
- 
-    def analyse_types(self, env): 
-        from . import MemoryView 
- 
-        self.operand = self.operand.analyse_types(env) 
-        if self.array_dtype: 
-            array_dtype = self.array_dtype 
-        else: 
-            array_dtype = self.base_type_node.base_type_node.analyse(env) 
-        axes = self.base_type_node.axes 
- 
-        self.type = error_type 
-        self.shapes = [] 
-        ndim = len(axes) 
- 
-        # Base type of the pointer or C array we are converting 
-        base_type = self.operand.type 
- 
-        if not self.operand.type.is_ptr and not self.operand.type.is_array: 
-            error(self.operand.pos, ERR_NOT_POINTER) 
-            return self 
- 
-        # Dimension sizes of C array 
-        array_dimension_sizes = [] 
-        if base_type.is_array: 
-            while base_type.is_array: 
-                array_dimension_sizes.append(base_type.size) 
-                base_type = base_type.base_type 
-        elif base_type.is_ptr: 
-            base_type = base_type.base_type 
-        else: 
-            error(self.pos, "unexpected base type %s found" % base_type) 
-            return self 
- 
-        if not (base_type.same_as(array_dtype) or base_type.is_void): 
-            error(self.operand.pos, ERR_BASE_TYPE) 
-            return self 
-        elif self.operand.type.is_array and len(array_dimension_sizes) != ndim: 
-            error(self.operand.pos, 
-                  "Expected %d dimensions, array has %d dimensions" % 
-                                            (ndim, len(array_dimension_sizes))) 
-            return self 
- 
-        # Verify the start, stop and step values 
-        # In case of a C array, use the size of C array in each dimension to 
-        # get an automatic cast 
-        for axis_no, axis in enumerate(axes): 
-            if not axis.start.is_none: 
-                error(axis.start.pos, ERR_START) 
-                return self 
- 
-            if axis.stop.is_none: 
-                if array_dimension_sizes: 
-                    dimsize = array_dimension_sizes[axis_no] 
-                    axis.stop = IntNode(self.pos, value=str(dimsize), 
-                                        constant_result=dimsize, 
-                                        type=PyrexTypes.c_int_type) 
-                else: 
-                    error(axis.pos, ERR_NOT_STOP) 
-                    return self 
- 
-            axis.stop = axis.stop.analyse_types(env) 
-            shape = axis.stop.coerce_to(self.shape_type, env) 
-            if not shape.is_literal: 
-                shape.coerce_to_temp(env) 
- 
-            self.shapes.append(shape) 
- 
-            first_or_last = axis_no in (0, ndim - 1) 
-            if not axis.step.is_none and first_or_last: 
-                # '1' in the first or last dimension denotes F or C contiguity 
-                axis.step = axis.step.analyse_types(env) 
-                if (not axis.step.type.is_int and axis.step.is_literal and not 
-                        axis.step.type.is_error): 
-                    error(axis.step.pos, "Expected an integer literal") 
-                    return self 
- 
-                if axis.step.compile_time_value(env) != 1: 
-                    error(axis.step.pos, ERR_STEPS) 
-                    return self 
- 
-                if axis_no == 0: 
-                    self.mode = "fortran" 
- 
-            elif not axis.step.is_none and not first_or_last: 
-                # step provided in some other dimension 
-                error(axis.step.pos, ERR_STEPS) 
-                return self 
- 
-        if not self.operand.is_name: 
-            self.operand = self.operand.coerce_to_temp(env) 
- 
-        axes = [('direct', 'follow')] * len(axes) 
-        if self.mode == "fortran": 
-            axes[0] = ('direct', 'contig') 
-        else: 
-            axes[-1] = ('direct', 'contig') 
- 
-        self.coercion_type = PyrexTypes.MemoryViewSliceType(array_dtype, axes) 
+        return self
+
+    def is_simple(self):
+        # either temp or a C cast => no side effects other than the operand's
+        return self.operand.is_simple()
+
+    def is_ephemeral(self):
+        # either temp or a C cast => no side effects other than the operand's
+        return self.operand.is_ephemeral()
+
+    def nonlocally_immutable(self):
+        return self.is_temp or self.operand.nonlocally_immutable()
+
+    def nogil_check(self, env):
+        if self.type and self.type.is_pyobject and self.is_temp:
+            self.gil_error()
+
+    def check_const(self):
+        return self.operand.check_const()
+
+    def calculate_constant_result(self):
+        self.constant_result = self.calculate_result_code(self.operand.constant_result)
+
+    def calculate_result_code(self, operand_result = None):
+        if operand_result is None:
+            operand_result = self.operand.result()
+        if self.type.is_complex:
+            operand_result = self.operand.result()
+            if self.operand.type.is_complex:
+                real_part = self.type.real_type.cast_code("__Pyx_CREAL(%s)" % operand_result)
+                imag_part = self.type.real_type.cast_code("__Pyx_CIMAG(%s)" % operand_result)
+            else:
+                real_part = self.type.real_type.cast_code(operand_result)
+                imag_part = "0"
+            return "%s(%s, %s)" % (
+                    self.type.from_parts,
+                    real_part,
+                    imag_part)
+        else:
+            return self.type.cast_code(operand_result)
+
+    def get_constant_c_result_code(self):
+        operand_result = self.operand.get_constant_c_result_code()
+        if operand_result:
+            return self.type.cast_code(operand_result)
+
+    def result_as(self, type):
+        if self.type.is_pyobject and not self.is_temp:
+            #  Optimise away some unnecessary casting
+            return self.operand.result_as(type)
+        else:
+            return ExprNode.result_as(self, type)
+
+    def generate_result_code(self, code):
+        if self.is_temp:
+            code.putln(
+                "%s = (PyObject *)%s;" % (
+                    self.result(),
+                    self.operand.result()))
+            code.put_incref(self.result(), self.ctype())
+
+
+ERR_START = "Start may not be given"
+ERR_NOT_STOP = "Stop must be provided to indicate shape"
+ERR_STEPS = ("Strides may only be given to indicate contiguity. "
+             "Consider slicing it after conversion")
+ERR_NOT_POINTER = "Can only create cython.array from pointer or array"
+ERR_BASE_TYPE = "Pointer base type does not match cython.array base type"
+
+
+class CythonArrayNode(ExprNode):
+    """
+    Used when a pointer of base_type is cast to a memoryviewslice with that
+    base type. i.e.
+
+        <int[:M:1, :N]> p
+
+    creates a fortran-contiguous cython.array.
+
+    We leave the type set to object so coercions to object are more efficient
+    and less work. Acquiring a memoryviewslice from this will be just as
+    efficient. ExprNode.coerce_to() will do the additional typecheck on
+    self.compile_time_type
+
+    This also handles <int[:, :]> my_c_array
+
+
+    operand             ExprNode                 the thing we're casting
+    base_type_node      MemoryViewSliceTypeNode  the cast expression node
+    """
+
+    subexprs = ['operand', 'shapes']
+
+    shapes = None
+    is_temp = True
+    mode = "c"
+    array_dtype = None
+
+    shape_type = PyrexTypes.c_py_ssize_t_type
+
+    def analyse_types(self, env):
+        from . import MemoryView
+
+        self.operand = self.operand.analyse_types(env)
+        if self.array_dtype:
+            array_dtype = self.array_dtype
+        else:
+            array_dtype = self.base_type_node.base_type_node.analyse(env)
+        axes = self.base_type_node.axes
+
+        self.type = error_type
+        self.shapes = []
+        ndim = len(axes)
+
+        # Base type of the pointer or C array we are converting
+        base_type = self.operand.type
+
+        if not self.operand.type.is_ptr and not self.operand.type.is_array:
+            error(self.operand.pos, ERR_NOT_POINTER)
+            return self
+
+        # Dimension sizes of C array
+        array_dimension_sizes = []
+        if base_type.is_array:
+            while base_type.is_array:
+                array_dimension_sizes.append(base_type.size)
+                base_type = base_type.base_type
+        elif base_type.is_ptr:
+            base_type = base_type.base_type
+        else:
+            error(self.pos, "unexpected base type %s found" % base_type)
+            return self
+
+        if not (base_type.same_as(array_dtype) or base_type.is_void):
+            error(self.operand.pos, ERR_BASE_TYPE)
+            return self
+        elif self.operand.type.is_array and len(array_dimension_sizes) != ndim:
+            error(self.operand.pos,
+                  "Expected %d dimensions, array has %d dimensions" %
+                                            (ndim, len(array_dimension_sizes)))
+            return self
+
+        # Verify the start, stop and step values
+        # In case of a C array, use the size of C array in each dimension to
+        # get an automatic cast
+        for axis_no, axis in enumerate(axes):
+            if not axis.start.is_none:
+                error(axis.start.pos, ERR_START)
+                return self
+
+            if axis.stop.is_none:
+                if array_dimension_sizes:
+                    dimsize = array_dimension_sizes[axis_no]
+                    axis.stop = IntNode(self.pos, value=str(dimsize),
+                                        constant_result=dimsize,
+                                        type=PyrexTypes.c_int_type)
+                else:
+                    error(axis.pos, ERR_NOT_STOP)
+                    return self
+
+            axis.stop = axis.stop.analyse_types(env)
+            shape = axis.stop.coerce_to(self.shape_type, env)
+            if not shape.is_literal:
+                shape.coerce_to_temp(env)
+
+            self.shapes.append(shape)
+
+            first_or_last = axis_no in (0, ndim - 1)
+            if not axis.step.is_none and first_or_last:
+                # '1' in the first or last dimension denotes F or C contiguity
+                axis.step = axis.step.analyse_types(env)
+                if (not axis.step.type.is_int and axis.step.is_literal and not
+                        axis.step.type.is_error):
+                    error(axis.step.pos, "Expected an integer literal")
+                    return self
+
+                if axis.step.compile_time_value(env) != 1:
+                    error(axis.step.pos, ERR_STEPS)
+                    return self
+
+                if axis_no == 0:
+                    self.mode = "fortran"
+
+            elif not axis.step.is_none and not first_or_last:
+                # step provided in some other dimension
+                error(axis.step.pos, ERR_STEPS)
+                return self
+
+        if not self.operand.is_name:
+            self.operand = self.operand.coerce_to_temp(env)
+
+        axes = [('direct', 'follow')] * len(axes)
+        if self.mode == "fortran":
+            axes[0] = ('direct', 'contig')
+        else:
+            axes[-1] = ('direct', 'contig')
+
+        self.coercion_type = PyrexTypes.MemoryViewSliceType(array_dtype, axes)
         self.coercion_type.validate_memslice_dtype(self.pos)
-        self.type = self.get_cython_array_type(env) 
-        MemoryView.use_cython_array_utility_code(env) 
-        env.use_utility_code(MemoryView.typeinfo_to_format_code) 
-        return self 
- 
-    def allocate_temp_result(self, code): 
-        if self.temp_code: 
+        self.type = self.get_cython_array_type(env)
+        MemoryView.use_cython_array_utility_code(env)
+        env.use_utility_code(MemoryView.typeinfo_to_format_code)
+        return self
+
+    def allocate_temp_result(self, code):
+        if self.temp_code:
             raise RuntimeError("temp allocated multiple times")
- 
-        self.temp_code = code.funcstate.allocate_temp(self.type, True) 
- 
-    def infer_type(self, env): 
-        return self.get_cython_array_type(env) 
- 
-    def get_cython_array_type(self, env): 
+
+        self.temp_code = code.funcstate.allocate_temp(self.type, True)
+
+    def infer_type(self, env):
+        return self.get_cython_array_type(env)
+
+    def get_cython_array_type(self, env):
         cython_scope = env.global_scope().context.cython_scope
         cython_scope.load_cythonscope()
         return cython_scope.viewscope.lookup("array").type
- 
-    def generate_result_code(self, code): 
-        from . import Buffer 
- 
-        shapes = [self.shape_type.cast_code(shape.result()) 
-                      for shape in self.shapes] 
-        dtype = self.coercion_type.dtype 
- 
-        shapes_temp = code.funcstate.allocate_temp(py_object_type, True) 
-        format_temp = code.funcstate.allocate_temp(py_object_type, True) 
- 
+
+    def generate_result_code(self, code):
+        from . import Buffer
+
+        shapes = [self.shape_type.cast_code(shape.result())
+                      for shape in self.shapes]
+        dtype = self.coercion_type.dtype
+
+        shapes_temp = code.funcstate.allocate_temp(py_object_type, True)
+        format_temp = code.funcstate.allocate_temp(py_object_type, True)
+
         itemsize = "sizeof(%s)" % dtype.empty_declaration_code()
-        type_info = Buffer.get_type_information_cname(code, dtype) 
- 
-        if self.operand.type.is_ptr: 
-            code.putln("if (!%s) {" % self.operand.result()) 
-            code.putln(    'PyErr_SetString(PyExc_ValueError,' 
-                                '"Cannot create cython.array from NULL pointer");') 
-            code.putln(code.error_goto(self.operand.pos)) 
-            code.putln("}") 
- 
+        type_info = Buffer.get_type_information_cname(code, dtype)
+
+        if self.operand.type.is_ptr:
+            code.putln("if (!%s) {" % self.operand.result())
+            code.putln(    'PyErr_SetString(PyExc_ValueError,'
+                                '"Cannot create cython.array from NULL pointer");')
+            code.putln(code.error_goto(self.operand.pos))
+            code.putln("}")
+
         code.putln("%s = __pyx_format_from_typeinfo(&%s); %s" % (
             format_temp,
             type_info,
@@ -10714,143 +10714,143 @@ class CythonArrayNode(ExprNode):
         ))
         code.put_gotref(format_temp)
 
-        buildvalue_fmt = " __PYX_BUILD_PY_SSIZE_T " * len(shapes) 
+        buildvalue_fmt = " __PYX_BUILD_PY_SSIZE_T " * len(shapes)
         code.putln('%s = Py_BuildValue((char*) "(" %s ")", %s); %s' % (
             shapes_temp,
             buildvalue_fmt,
             ", ".join(shapes),
             code.error_goto_if_null(shapes_temp, self.pos),
         ))
-        code.put_gotref(shapes_temp) 
- 
-        tup = (self.result(), shapes_temp, itemsize, format_temp, 
-               self.mode, self.operand.result()) 
-        code.putln('%s = __pyx_array_new(' 
-                            '%s, %s, PyBytes_AS_STRING(%s), ' 
-                            '(char *) "%s", (char *) %s);' % tup) 
-        code.putln(code.error_goto_if_null(self.result(), self.pos)) 
-        code.put_gotref(self.result()) 
- 
-        def dispose(temp): 
-            code.put_decref_clear(temp, py_object_type) 
-            code.funcstate.release_temp(temp) 
- 
-        dispose(shapes_temp) 
-        dispose(format_temp) 
- 
-    @classmethod 
-    def from_carray(cls, src_node, env): 
-        """ 
-        Given a C array type, return a CythonArrayNode 
-        """ 
-        pos = src_node.pos 
-        base_type = src_node.type 
- 
-        none_node = NoneNode(pos) 
-        axes = [] 
- 
-        while base_type.is_array: 
-            axes.append(SliceNode(pos, start=none_node, stop=none_node, 
-                                       step=none_node)) 
-            base_type = base_type.base_type 
-        axes[-1].step = IntNode(pos, value="1", is_c_literal=True) 
- 
-        memslicenode = Nodes.MemoryViewSliceTypeNode(pos, axes=axes, 
-                                                     base_type_node=base_type) 
-        result = CythonArrayNode(pos, base_type_node=memslicenode, 
-                                 operand=src_node, array_dtype=base_type) 
-        result = result.analyse_types(env) 
-        return result 
- 
-class SizeofNode(ExprNode): 
-    #  Abstract base class for sizeof(x) expression nodes. 
- 
-    type = PyrexTypes.c_size_t_type 
- 
-    def check_const(self): 
-        return True 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
- 
-class SizeofTypeNode(SizeofNode): 
-    #  C sizeof function applied to a type 
-    # 
-    #  base_type   CBaseTypeNode 
-    #  declarator  CDeclaratorNode 
- 
-    subexprs = [] 
-    arg_type = None 
- 
-    def analyse_types(self, env): 
-        # we may have incorrectly interpreted a dotted name as a type rather than an attribute 
-        # this could be better handled by more uniformly treating types as runtime-available objects 
-        if 0 and self.base_type.module_path: 
-            path = self.base_type.module_path 
-            obj = env.lookup(path[0]) 
-            if obj.as_module is None: 
-                operand = NameNode(pos=self.pos, name=path[0]) 
-                for attr in path[1:]: 
-                    operand = AttributeNode(pos=self.pos, obj=operand, attribute=attr) 
-                operand = AttributeNode(pos=self.pos, obj=operand, attribute=self.base_type.name) 
+        code.put_gotref(shapes_temp)
+
+        tup = (self.result(), shapes_temp, itemsize, format_temp,
+               self.mode, self.operand.result())
+        code.putln('%s = __pyx_array_new('
+                            '%s, %s, PyBytes_AS_STRING(%s), '
+                            '(char *) "%s", (char *) %s);' % tup)
+        code.putln(code.error_goto_if_null(self.result(), self.pos))
+        code.put_gotref(self.result())
+
+        def dispose(temp):
+            code.put_decref_clear(temp, py_object_type)
+            code.funcstate.release_temp(temp)
+
+        dispose(shapes_temp)
+        dispose(format_temp)
+
+    @classmethod
+    def from_carray(cls, src_node, env):
+        """
+        Given a C array type, return a CythonArrayNode
+        """
+        pos = src_node.pos
+        base_type = src_node.type
+
+        none_node = NoneNode(pos)
+        axes = []
+
+        while base_type.is_array:
+            axes.append(SliceNode(pos, start=none_node, stop=none_node,
+                                       step=none_node))
+            base_type = base_type.base_type
+        axes[-1].step = IntNode(pos, value="1", is_c_literal=True)
+
+        memslicenode = Nodes.MemoryViewSliceTypeNode(pos, axes=axes,
+                                                     base_type_node=base_type)
+        result = CythonArrayNode(pos, base_type_node=memslicenode,
+                                 operand=src_node, array_dtype=base_type)
+        result = result.analyse_types(env)
+        return result
+
+class SizeofNode(ExprNode):
+    #  Abstract base class for sizeof(x) expression nodes.
+
+    type = PyrexTypes.c_size_t_type
+
+    def check_const(self):
+        return True
+
+    def generate_result_code(self, code):
+        pass
+
+
+class SizeofTypeNode(SizeofNode):
+    #  C sizeof function applied to a type
+    #
+    #  base_type   CBaseTypeNode
+    #  declarator  CDeclaratorNode
+
+    subexprs = []
+    arg_type = None
+
+    def analyse_types(self, env):
+        # we may have incorrectly interpreted a dotted name as a type rather than an attribute
+        # this could be better handled by more uniformly treating types as runtime-available objects
+        if 0 and self.base_type.module_path:
+            path = self.base_type.module_path
+            obj = env.lookup(path[0])
+            if obj.as_module is None:
+                operand = NameNode(pos=self.pos, name=path[0])
+                for attr in path[1:]:
+                    operand = AttributeNode(pos=self.pos, obj=operand, attribute=attr)
+                operand = AttributeNode(pos=self.pos, obj=operand, attribute=self.base_type.name)
                 node = SizeofVarNode(self.pos, operand=operand).analyse_types(env)
-                return node 
-        if self.arg_type is None: 
-            base_type = self.base_type.analyse(env) 
-            _, arg_type = self.declarator.analyse(base_type, env) 
-            self.arg_type = arg_type 
-        self.check_type() 
-        return self 
- 
-    def check_type(self): 
-        arg_type = self.arg_type 
+                return node
+        if self.arg_type is None:
+            base_type = self.base_type.analyse(env)
+            _, arg_type = self.declarator.analyse(base_type, env)
+            self.arg_type = arg_type
+        self.check_type()
+        return self
+
+    def check_type(self):
+        arg_type = self.arg_type
         if not arg_type:
             return
-        if arg_type.is_pyobject and not arg_type.is_extension_type: 
-            error(self.pos, "Cannot take sizeof Python object") 
-        elif arg_type.is_void: 
-            error(self.pos, "Cannot take sizeof void") 
-        elif not arg_type.is_complete(): 
-            error(self.pos, "Cannot take sizeof incomplete type '%s'" % arg_type) 
- 
-    def calculate_result_code(self): 
-        if self.arg_type.is_extension_type: 
-            # the size of the pointer is boring 
-            # we want the size of the actual struct 
-            arg_code = self.arg_type.declaration_code("", deref=1) 
-        else: 
+        if arg_type.is_pyobject and not arg_type.is_extension_type:
+            error(self.pos, "Cannot take sizeof Python object")
+        elif arg_type.is_void:
+            error(self.pos, "Cannot take sizeof void")
+        elif not arg_type.is_complete():
+            error(self.pos, "Cannot take sizeof incomplete type '%s'" % arg_type)
+
+    def calculate_result_code(self):
+        if self.arg_type.is_extension_type:
+            # the size of the pointer is boring
+            # we want the size of the actual struct
+            arg_code = self.arg_type.declaration_code("", deref=1)
+        else:
             arg_code = self.arg_type.empty_declaration_code()
-        return "(sizeof(%s))" % arg_code 
- 
- 
-class SizeofVarNode(SizeofNode): 
-    #  C sizeof function applied to a variable 
-    # 
-    #  operand   ExprNode 
- 
-    subexprs = ['operand'] 
- 
-    def analyse_types(self, env): 
-        # We may actually be looking at a type rather than a variable... 
-        # If we are, traditional analysis would fail... 
-        operand_as_type = self.operand.analyse_as_type(env) 
-        if operand_as_type: 
-            self.arg_type = operand_as_type 
-            if self.arg_type.is_fused: 
-                self.arg_type = self.arg_type.specialize(env.fused_to_specific) 
-            self.__class__ = SizeofTypeNode 
-            self.check_type() 
-        else: 
-            self.operand = self.operand.analyse_types(env) 
-        return self 
- 
-    def calculate_result_code(self): 
-        return "(sizeof(%s))" % self.operand.result() 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
+        return "(sizeof(%s))" % arg_code
+
+
+class SizeofVarNode(SizeofNode):
+    #  C sizeof function applied to a variable
+    #
+    #  operand   ExprNode
+
+    subexprs = ['operand']
+
+    def analyse_types(self, env):
+        # We may actually be looking at a type rather than a variable...
+        # If we are, traditional analysis would fail...
+        operand_as_type = self.operand.analyse_as_type(env)
+        if operand_as_type:
+            self.arg_type = operand_as_type
+            if self.arg_type.is_fused:
+                self.arg_type = self.arg_type.specialize(env.fused_to_specific)
+            self.__class__ = SizeofTypeNode
+            self.check_type()
+        else:
+            self.operand = self.operand.analyse_types(env)
+        return self
+
+    def calculate_result_code(self):
+        return "(sizeof(%s))" % self.operand.result()
+
+    def generate_result_code(self, code):
+        pass
+
 
 class TypeidNode(ExprNode):
     #  C++ typeid operator applied to a type or variable
@@ -10923,153 +10923,153 @@ class TypeidNode(ExprNode):
             "%s = typeid(%s);" % (self.temp_code, arg_code),
             None, None, self.in_nogil_context)
 
-class TypeofNode(ExprNode): 
-    #  Compile-time type of an expression, as a string. 
-    # 
-    #  operand   ExprNode 
-    #  literal   StringNode # internal 
- 
-    literal = None 
-    type = py_object_type 
- 
-    subexprs = ['literal'] # 'operand' will be ignored after type analysis! 
- 
-    def analyse_types(self, env): 
-        self.operand = self.operand.analyse_types(env) 
-        value = StringEncoding.EncodedString(str(self.operand.type)) #self.operand.type.typeof_name()) 
-        literal = StringNode(self.pos, value=value) 
-        literal = literal.analyse_types(env) 
-        self.literal = literal.coerce_to_pyobject(env) 
-        return self 
- 
+class TypeofNode(ExprNode):
+    #  Compile-time type of an expression, as a string.
+    #
+    #  operand   ExprNode
+    #  literal   StringNode # internal
+
+    literal = None
+    type = py_object_type
+
+    subexprs = ['literal'] # 'operand' will be ignored after type analysis!
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        value = StringEncoding.EncodedString(str(self.operand.type)) #self.operand.type.typeof_name())
+        literal = StringNode(self.pos, value=value)
+        literal = literal.analyse_types(env)
+        self.literal = literal.coerce_to_pyobject(env)
+        return self
+
     def analyse_as_type(self, env):
         self.operand = self.operand.analyse_types(env)
         return self.operand.type
 
-    def may_be_none(self): 
-        return False 
- 
-    def generate_evaluation_code(self, code): 
-        self.literal.generate_evaluation_code(code) 
- 
-    def calculate_result_code(self): 
-        return self.literal.calculate_result_code() 
- 
-#------------------------------------------------------------------- 
-# 
-#  Binary operator nodes 
-# 
-#------------------------------------------------------------------- 
- 
-try: 
-    matmul_operator = operator.matmul 
-except AttributeError: 
-    def matmul_operator(a, b): 
-        try: 
-            func = a.__matmul__ 
-        except AttributeError: 
-            func = b.__rmatmul__ 
-        return func(a, b) 
- 
-compile_time_binary_operators = { 
-    '<': operator.lt, 
-    '<=': operator.le, 
-    '==': operator.eq, 
-    '!=': operator.ne, 
-    '>=': operator.ge, 
-    '>': operator.gt, 
-    'is': operator.is_, 
-    'is_not': operator.is_not, 
-    '+': operator.add, 
-    '&': operator.and_, 
-    '/': operator.truediv, 
-    '//': operator.floordiv, 
-    '<<': operator.lshift, 
-    '%': operator.mod, 
-    '*': operator.mul, 
-    '|': operator.or_, 
-    '**': operator.pow, 
-    '>>': operator.rshift, 
-    '-': operator.sub, 
-    '^': operator.xor, 
-    '@': matmul_operator, 
-    'in': lambda x, seq: x in seq, 
-    'not_in': lambda x, seq: x not in seq, 
-} 
- 
-def get_compile_time_binop(node): 
-    func = compile_time_binary_operators.get(node.operator) 
-    if not func: 
-        error(node.pos, 
-            "Binary '%s' not supported in compile-time expression" 
-                % node.operator) 
-    return func 
- 
- 
-class BinopNode(ExprNode): 
-    #  operator     string 
-    #  operand1     ExprNode 
-    #  operand2     ExprNode 
-    # 
-    #  Processing during analyse_expressions phase: 
-    # 
-    #    analyse_c_operation 
-    #      Called when neither operand is a pyobject. 
-    #      - Check operand types and coerce if needed. 
-    #      - Determine result type and result code fragment. 
-    #      - Allocate temporary for result if needed. 
- 
-    subexprs = ['operand1', 'operand2'] 
-    inplace = False 
- 
-    def calculate_constant_result(self): 
-        func = compile_time_binary_operators[self.operator] 
-        self.constant_result = func( 
-            self.operand1.constant_result, 
-            self.operand2.constant_result) 
- 
-    def compile_time_value(self, denv): 
-        func = get_compile_time_binop(self) 
-        operand1 = self.operand1.compile_time_value(denv) 
-        operand2 = self.operand2.compile_time_value(denv) 
-        try: 
-            return func(operand1, operand2) 
+    def may_be_none(self):
+        return False
+
+    def generate_evaluation_code(self, code):
+        self.literal.generate_evaluation_code(code)
+
+    def calculate_result_code(self):
+        return self.literal.calculate_result_code()
+
+#-------------------------------------------------------------------
+#
+#  Binary operator nodes
+#
+#-------------------------------------------------------------------
+
+try:
+    matmul_operator = operator.matmul
+except AttributeError:
+    def matmul_operator(a, b):
+        try:
+            func = a.__matmul__
+        except AttributeError:
+            func = b.__rmatmul__
+        return func(a, b)
+
+compile_time_binary_operators = {
+    '<': operator.lt,
+    '<=': operator.le,
+    '==': operator.eq,
+    '!=': operator.ne,
+    '>=': operator.ge,
+    '>': operator.gt,
+    'is': operator.is_,
+    'is_not': operator.is_not,
+    '+': operator.add,
+    '&': operator.and_,
+    '/': operator.truediv,
+    '//': operator.floordiv,
+    '<<': operator.lshift,
+    '%': operator.mod,
+    '*': operator.mul,
+    '|': operator.or_,
+    '**': operator.pow,
+    '>>': operator.rshift,
+    '-': operator.sub,
+    '^': operator.xor,
+    '@': matmul_operator,
+    'in': lambda x, seq: x in seq,
+    'not_in': lambda x, seq: x not in seq,
+}
+
+def get_compile_time_binop(node):
+    func = compile_time_binary_operators.get(node.operator)
+    if not func:
+        error(node.pos,
+            "Binary '%s' not supported in compile-time expression"
+                % node.operator)
+    return func
+
+
+class BinopNode(ExprNode):
+    #  operator     string
+    #  operand1     ExprNode
+    #  operand2     ExprNode
+    #
+    #  Processing during analyse_expressions phase:
+    #
+    #    analyse_c_operation
+    #      Called when neither operand is a pyobject.
+    #      - Check operand types and coerce if needed.
+    #      - Determine result type and result code fragment.
+    #      - Allocate temporary for result if needed.
+
+    subexprs = ['operand1', 'operand2']
+    inplace = False
+
+    def calculate_constant_result(self):
+        func = compile_time_binary_operators[self.operator]
+        self.constant_result = func(
+            self.operand1.constant_result,
+            self.operand2.constant_result)
+
+    def compile_time_value(self, denv):
+        func = get_compile_time_binop(self)
+        operand1 = self.operand1.compile_time_value(denv)
+        operand2 = self.operand2.compile_time_value(denv)
+        try:
+            return func(operand1, operand2)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
-    def infer_type(self, env): 
-        return self.result_type(self.operand1.infer_type(env), 
+            self.compile_time_value_error(e)
+
+    def infer_type(self, env):
+        return self.result_type(self.operand1.infer_type(env),
                                 self.operand2.infer_type(env), env)
- 
-    def analyse_types(self, env): 
-        self.operand1 = self.operand1.analyse_types(env) 
-        self.operand2 = self.operand2.analyse_types(env) 
-        self.analyse_operation(env) 
-        return self 
- 
-    def analyse_operation(self, env): 
+
+    def analyse_types(self, env):
+        self.operand1 = self.operand1.analyse_types(env)
+        self.operand2 = self.operand2.analyse_types(env)
+        self.analyse_operation(env)
+        return self
+
+    def analyse_operation(self, env):
         if self.is_pythran_operation(env):
             self.type = self.result_type(self.operand1.type,
                                          self.operand2.type, env)
             assert self.type.is_pythran_expr
             self.is_temp = 1
         elif self.is_py_operation():
-            self.coerce_operands_to_pyobjects(env) 
-            self.type = self.result_type(self.operand1.type, 
+            self.coerce_operands_to_pyobjects(env)
+            self.type = self.result_type(self.operand1.type,
                                          self.operand2.type, env)
-            assert self.type.is_pyobject 
-            self.is_temp = 1 
-        elif self.is_cpp_operation(): 
-            self.analyse_cpp_operation(env) 
-        else: 
-            self.analyse_c_operation(env) 
- 
-    def is_py_operation(self): 
-        return self.is_py_operation_types(self.operand1.type, self.operand2.type) 
- 
-    def is_py_operation_types(self, type1, type2): 
+            assert self.type.is_pyobject
+            self.is_temp = 1
+        elif self.is_cpp_operation():
+            self.analyse_cpp_operation(env)
+        else:
+            self.analyse_c_operation(env)
+
+    def is_py_operation(self):
+        return self.is_py_operation_types(self.operand1.type, self.operand2.type)
+
+    def is_py_operation_types(self, type1, type2):
         return type1.is_pyobject or type2.is_pyobject or type1.is_ctuple or type2.is_ctuple
- 
+
     def is_pythran_operation(self, env):
         return self.is_pythran_operation_types(self.operand1.type, self.operand2.type, env)
 
@@ -11079,16 +11079,16 @@ class BinopNode(ExprNode):
                (is_pythran_supported_operation_type(type1) and is_pythran_supported_operation_type(type2)) and \
                (is_pythran_expr(type1) or is_pythran_expr(type2))
 
-    def is_cpp_operation(self): 
-        return (self.operand1.type.is_cpp_class 
-            or self.operand2.type.is_cpp_class) 
- 
-    def analyse_cpp_operation(self, env): 
-        entry = env.lookup_operator(self.operator, [self.operand1, self.operand2]) 
-        if not entry: 
-            self.type_error() 
-            return 
-        func_type = entry.type 
+    def is_cpp_operation(self):
+        return (self.operand1.type.is_cpp_class
+            or self.operand2.type.is_cpp_class)
+
+    def analyse_cpp_operation(self, env):
+        entry = env.lookup_operator(self.operator, [self.operand1, self.operand2])
+        if not entry:
+            self.type_error()
+            return
+        func_type = entry.type
         self.exception_check = func_type.exception_check
         self.exception_value = func_type.exception_value
         if self.exception_check == '+':
@@ -11097,59 +11097,59 @@ class BinopNode(ExprNode):
             self.is_temp = 1
             if self.exception_value is None:
                 env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
-        if func_type.is_ptr: 
-            func_type = func_type.base_type 
-        if len(func_type.args) == 1: 
-            self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env) 
-        else: 
-            self.operand1 = self.operand1.coerce_to(func_type.args[0].type, env) 
-            self.operand2 = self.operand2.coerce_to(func_type.args[1].type, env) 
-        self.type = func_type.return_type 
- 
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        if len(func_type.args) == 1:
+            self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env)
+        else:
+            self.operand1 = self.operand1.coerce_to(func_type.args[0].type, env)
+            self.operand2 = self.operand2.coerce_to(func_type.args[1].type, env)
+        self.type = func_type.return_type
+
     def result_type(self, type1, type2, env):
         if self.is_pythran_operation_types(type1, type2, env):
             return PythranExpr(pythran_binop_type(self.operator, type1, type2))
-        if self.is_py_operation_types(type1, type2): 
-            if type2.is_string: 
-                type2 = Builtin.bytes_type 
-            elif type2.is_pyunicode_ptr: 
-                type2 = Builtin.unicode_type 
-            if type1.is_string: 
-                type1 = Builtin.bytes_type 
-            elif type1.is_pyunicode_ptr: 
-                type1 = Builtin.unicode_type 
-            if type1.is_builtin_type or type2.is_builtin_type: 
-                if type1 is type2 and self.operator in '**%+|&^': 
-                    # FIXME: at least these operators should be safe - others? 
-                    return type1 
-                result_type = self.infer_builtin_types_operation(type1, type2) 
-                if result_type is not None: 
-                    return result_type 
-            return py_object_type 
+        if self.is_py_operation_types(type1, type2):
+            if type2.is_string:
+                type2 = Builtin.bytes_type
+            elif type2.is_pyunicode_ptr:
+                type2 = Builtin.unicode_type
+            if type1.is_string:
+                type1 = Builtin.bytes_type
+            elif type1.is_pyunicode_ptr:
+                type1 = Builtin.unicode_type
+            if type1.is_builtin_type or type2.is_builtin_type:
+                if type1 is type2 and self.operator in '**%+|&^':
+                    # FIXME: at least these operators should be safe - others?
+                    return type1
+                result_type = self.infer_builtin_types_operation(type1, type2)
+                if result_type is not None:
+                    return result_type
+            return py_object_type
         elif type1.is_error or type2.is_error:
             return PyrexTypes.error_type
-        else: 
-            return self.compute_c_result_type(type1, type2) 
- 
-    def infer_builtin_types_operation(self, type1, type2): 
-        return None 
- 
-    def nogil_check(self, env): 
-        if self.is_py_operation(): 
-            self.gil_error() 
- 
-    def coerce_operands_to_pyobjects(self, env): 
-        self.operand1 = self.operand1.coerce_to_pyobject(env) 
-        self.operand2 = self.operand2.coerce_to_pyobject(env) 
- 
-    def check_const(self): 
-        return self.operand1.check_const() and self.operand2.check_const() 
- 
-    def is_ephemeral(self): 
-        return (super(BinopNode, self).is_ephemeral() or 
-                self.operand1.is_ephemeral() or self.operand2.is_ephemeral()) 
- 
-    def generate_result_code(self, code): 
+        else:
+            return self.compute_c_result_type(type1, type2)
+
+    def infer_builtin_types_operation(self, type1, type2):
+        return None
+
+    def nogil_check(self, env):
+        if self.is_py_operation():
+            self.gil_error()
+
+    def coerce_operands_to_pyobjects(self, env):
+        self.operand1 = self.operand1.coerce_to_pyobject(env)
+        self.operand2 = self.operand2.coerce_to_pyobject(env)
+
+    def check_const(self):
+        return self.operand1.check_const() and self.operand2.check_const()
+
+    def is_ephemeral(self):
+        return (super(BinopNode, self).is_ephemeral() or
+                self.operand1.is_ephemeral() or self.operand2.is_ephemeral())
+
+    def generate_result_code(self, code):
         if self.type.is_pythran_expr:
             code.putln("// Pythran binop")
             code.putln("__Pyx_call_destructor(%s);" % self.result())
@@ -11167,21 +11167,21 @@ class BinopNode(ExprNode):
                     self.operator,
                     self.operand2.pythran_result()))
         elif self.operand1.type.is_pyobject:
-            function = self.py_operation_function(code) 
-            if self.operator == '**': 
-                extra_args = ", Py_None" 
-            else: 
-                extra_args = "" 
-            code.putln( 
-                "%s = %s(%s, %s%s); %s" % ( 
-                    self.result(), 
-                    function, 
-                    self.operand1.py_result(), 
-                    self.operand2.py_result(), 
-                    extra_args, 
-                    code.error_goto_if_null(self.result(), self.pos))) 
-            code.put_gotref(self.py_result()) 
-        elif self.is_temp: 
+            function = self.py_operation_function(code)
+            if self.operator == '**':
+                extra_args = ", Py_None"
+            else:
+                extra_args = ""
+            code.putln(
+                "%s = %s(%s, %s%s); %s" % (
+                    self.result(),
+                    function,
+                    self.operand1.py_result(),
+                    self.operand2.py_result(),
+                    extra_args,
+                    code.error_goto_if_null(self.result(), self.pos)))
+            code.put_gotref(self.py_result())
+        elif self.is_temp:
             # C++ overloaded operators with exception values are currently all
             # handled through temporaries.
             if self.is_cpp_operation() and self.exception_check == '+':
@@ -11191,230 +11191,230 @@ class BinopNode(ExprNode):
                                         self.exception_value, self.in_nogil_context)
             else:
                 code.putln("%s = %s;" % (self.result(), self.calculate_result_code()))
- 
-    def type_error(self): 
-        if not (self.operand1.type.is_error 
-                or self.operand2.type.is_error): 
-            error(self.pos, "Invalid operand types for '%s' (%s; %s)" % 
-                (self.operator, self.operand1.type, 
-                    self.operand2.type)) 
-        self.type = PyrexTypes.error_type 
- 
- 
-class CBinopNode(BinopNode): 
- 
-    def analyse_types(self, env): 
-        node = BinopNode.analyse_types(self, env) 
-        if node.is_py_operation(): 
-            node.type = PyrexTypes.error_type 
-        return node 
- 
-    def py_operation_function(self, code): 
-        return "" 
- 
-    def calculate_result_code(self): 
-        return "(%s %s %s)" % ( 
-            self.operand1.result(), 
-            self.operator, 
-            self.operand2.result()) 
- 
-    def compute_c_result_type(self, type1, type2): 
-        cpp_type = None 
-        if type1.is_cpp_class or type1.is_ptr: 
-            cpp_type = type1.find_cpp_operation_type(self.operator, type2) 
+
+    def type_error(self):
+        if not (self.operand1.type.is_error
+                or self.operand2.type.is_error):
+            error(self.pos, "Invalid operand types for '%s' (%s; %s)" %
+                (self.operator, self.operand1.type,
+                    self.operand2.type))
+        self.type = PyrexTypes.error_type
+
+
+class CBinopNode(BinopNode):
+
+    def analyse_types(self, env):
+        node = BinopNode.analyse_types(self, env)
+        if node.is_py_operation():
+            node.type = PyrexTypes.error_type
+        return node
+
+    def py_operation_function(self, code):
+        return ""
+
+    def calculate_result_code(self):
+        return "(%s %s %s)" % (
+            self.operand1.result(),
+            self.operator,
+            self.operand2.result())
+
+    def compute_c_result_type(self, type1, type2):
+        cpp_type = None
+        if type1.is_cpp_class or type1.is_ptr:
+            cpp_type = type1.find_cpp_operation_type(self.operator, type2)
         if cpp_type is None and (type2.is_cpp_class or type2.is_ptr):
             cpp_type = type2.find_cpp_operation_type(self.operator, type1)
-        # FIXME: do we need to handle other cases here? 
-        return cpp_type 
- 
- 
-def c_binop_constructor(operator): 
-    def make_binop_node(pos, **operands): 
-        return CBinopNode(pos, operator=operator, **operands) 
-    return make_binop_node 
- 
-class NumBinopNode(BinopNode): 
-    #  Binary operation taking numeric arguments. 
- 
-    infix = True 
-    overflow_check = False 
-    overflow_bit_node = None 
- 
-    def analyse_c_operation(self, env): 
-        type1 = self.operand1.type 
-        type2 = self.operand2.type 
-        self.type = self.compute_c_result_type(type1, type2) 
-        if not self.type: 
-            self.type_error() 
-            return 
-        if self.type.is_complex: 
-            self.infix = False 
-        if (self.type.is_int 
-                and env.directives['overflowcheck'] 
-                and self.operator in self.overflow_op_names): 
-            if (self.operator in ('+', '*') 
-                    and self.operand1.has_constant_result() 
-                    and not self.operand2.has_constant_result()): 
-                self.operand1, self.operand2 = self.operand2, self.operand1 
-            self.overflow_check = True 
-            self.overflow_fold = env.directives['overflowcheck.fold'] 
-            self.func = self.type.overflow_check_binop( 
-                self.overflow_op_names[self.operator], 
-                env, 
-                const_rhs = self.operand2.has_constant_result()) 
-            self.is_temp = True 
-        if not self.infix or (type1.is_numeric and type2.is_numeric): 
-            self.operand1 = self.operand1.coerce_to(self.type, env) 
-            self.operand2 = self.operand2.coerce_to(self.type, env) 
- 
-    def compute_c_result_type(self, type1, type2): 
-        if self.c_types_okay(type1, type2): 
-            widest_type = PyrexTypes.widest_numeric_type(type1, type2) 
-            if widest_type is PyrexTypes.c_bint_type: 
-                if self.operator not in '|^&': 
-                    # False + False == 0 # not False! 
-                    widest_type = PyrexTypes.c_int_type 
-            else: 
-                widest_type = PyrexTypes.widest_numeric_type( 
-                    widest_type, PyrexTypes.c_int_type) 
-            return widest_type 
-        else: 
-            return None 
- 
-    def may_be_none(self): 
-        if self.type and self.type.is_builtin_type: 
-            # if we know the result type, we know the operation, so it can't be None 
-            return False 
-        type1 = self.operand1.type 
-        type2 = self.operand2.type 
-        if type1 and type1.is_builtin_type and type2 and type2.is_builtin_type: 
-            # XXX: I can't think of any case where a binary operation 
-            # on builtin types evaluates to None - add a special case 
-            # here if there is one. 
-            return False 
-        return super(NumBinopNode, self).may_be_none() 
- 
-    def get_constant_c_result_code(self): 
-        value1 = self.operand1.get_constant_c_result_code() 
-        value2 = self.operand2.get_constant_c_result_code() 
-        if value1 and value2: 
-            return "(%s %s %s)" % (value1, self.operator, value2) 
-        else: 
-            return None 
- 
-    def c_types_okay(self, type1, type2): 
-        #print "NumBinopNode.c_types_okay:", type1, type2 ### 
-        return (type1.is_numeric  or type1.is_enum) \ 
-            and (type2.is_numeric  or type2.is_enum) 
- 
-    def generate_evaluation_code(self, code): 
-        if self.overflow_check: 
-            self.overflow_bit_node = self 
-            self.overflow_bit = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False) 
-            code.putln("%s = 0;" % self.overflow_bit) 
-        super(NumBinopNode, self).generate_evaluation_code(code) 
-        if self.overflow_check: 
-            code.putln("if (unlikely(%s)) {" % self.overflow_bit) 
-            code.putln('PyErr_SetString(PyExc_OverflowError, "value too large");') 
-            code.putln(code.error_goto(self.pos)) 
-            code.putln("}") 
-            code.funcstate.release_temp(self.overflow_bit) 
- 
-    def calculate_result_code(self): 
-        if self.overflow_bit_node is not None: 
-            return "%s(%s, %s, &%s)" % ( 
-                self.func, 
-                self.operand1.result(), 
-                self.operand2.result(), 
-                self.overflow_bit_node.overflow_bit) 
+        # FIXME: do we need to handle other cases here?
+        return cpp_type
+
+
+def c_binop_constructor(operator):
+    def make_binop_node(pos, **operands):
+        return CBinopNode(pos, operator=operator, **operands)
+    return make_binop_node
+
+class NumBinopNode(BinopNode):
+    #  Binary operation taking numeric arguments.
+
+    infix = True
+    overflow_check = False
+    overflow_bit_node = None
+
+    def analyse_c_operation(self, env):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        self.type = self.compute_c_result_type(type1, type2)
+        if not self.type:
+            self.type_error()
+            return
+        if self.type.is_complex:
+            self.infix = False
+        if (self.type.is_int
+                and env.directives['overflowcheck']
+                and self.operator in self.overflow_op_names):
+            if (self.operator in ('+', '*')
+                    and self.operand1.has_constant_result()
+                    and not self.operand2.has_constant_result()):
+                self.operand1, self.operand2 = self.operand2, self.operand1
+            self.overflow_check = True
+            self.overflow_fold = env.directives['overflowcheck.fold']
+            self.func = self.type.overflow_check_binop(
+                self.overflow_op_names[self.operator],
+                env,
+                const_rhs = self.operand2.has_constant_result())
+            self.is_temp = True
+        if not self.infix or (type1.is_numeric and type2.is_numeric):
+            self.operand1 = self.operand1.coerce_to(self.type, env)
+            self.operand2 = self.operand2.coerce_to(self.type, env)
+
+    def compute_c_result_type(self, type1, type2):
+        if self.c_types_okay(type1, type2):
+            widest_type = PyrexTypes.widest_numeric_type(type1, type2)
+            if widest_type is PyrexTypes.c_bint_type:
+                if self.operator not in '|^&':
+                    # False + False == 0 # not False!
+                    widest_type = PyrexTypes.c_int_type
+            else:
+                widest_type = PyrexTypes.widest_numeric_type(
+                    widest_type, PyrexTypes.c_int_type)
+            return widest_type
+        else:
+            return None
+
+    def may_be_none(self):
+        if self.type and self.type.is_builtin_type:
+            # if we know the result type, we know the operation, so it can't be None
+            return False
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        if type1 and type1.is_builtin_type and type2 and type2.is_builtin_type:
+            # XXX: I can't think of any case where a binary operation
+            # on builtin types evaluates to None - add a special case
+            # here if there is one.
+            return False
+        return super(NumBinopNode, self).may_be_none()
+
+    def get_constant_c_result_code(self):
+        value1 = self.operand1.get_constant_c_result_code()
+        value2 = self.operand2.get_constant_c_result_code()
+        if value1 and value2:
+            return "(%s %s %s)" % (value1, self.operator, value2)
+        else:
+            return None
+
+    def c_types_okay(self, type1, type2):
+        #print "NumBinopNode.c_types_okay:", type1, type2 ###
+        return (type1.is_numeric  or type1.is_enum) \
+            and (type2.is_numeric  or type2.is_enum)
+
+    def generate_evaluation_code(self, code):
+        if self.overflow_check:
+            self.overflow_bit_node = self
+            self.overflow_bit = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
+            code.putln("%s = 0;" % self.overflow_bit)
+        super(NumBinopNode, self).generate_evaluation_code(code)
+        if self.overflow_check:
+            code.putln("if (unlikely(%s)) {" % self.overflow_bit)
+            code.putln('PyErr_SetString(PyExc_OverflowError, "value too large");')
+            code.putln(code.error_goto(self.pos))
+            code.putln("}")
+            code.funcstate.release_temp(self.overflow_bit)
+
+    def calculate_result_code(self):
+        if self.overflow_bit_node is not None:
+            return "%s(%s, %s, &%s)" % (
+                self.func,
+                self.operand1.result(),
+                self.operand2.result(),
+                self.overflow_bit_node.overflow_bit)
         elif self.type.is_cpp_class or self.infix:
             if is_pythran_expr(self.type):
                 result1, result2 = self.operand1.pythran_result(), self.operand2.pythran_result()
             else:
                 result1, result2 = self.operand1.result(), self.operand2.result()
             return "(%s %s %s)" % (result1, self.operator, result2)
-        else: 
-            func = self.type.binary_op(self.operator) 
-            if func is None: 
-                error(self.pos, "binary operator %s not supported for %s" % (self.operator, self.type)) 
-            return "%s(%s, %s)" % ( 
-                func, 
-                self.operand1.result(), 
-                self.operand2.result()) 
- 
-    def is_py_operation_types(self, type1, type2): 
-        return (type1.is_unicode_char or 
-                type2.is_unicode_char or 
-                BinopNode.is_py_operation_types(self, type1, type2)) 
- 
-    def py_operation_function(self, code): 
-        function_name = self.py_functions[self.operator] 
-        if self.inplace: 
-            function_name = function_name.replace('PyNumber_', 'PyNumber_InPlace') 
-        return function_name 
- 
-    py_functions = { 
-        "|":        "PyNumber_Or", 
-        "^":        "PyNumber_Xor", 
-        "&":        "PyNumber_And", 
-        "<<":       "PyNumber_Lshift", 
-        ">>":       "PyNumber_Rshift", 
-        "+":        "PyNumber_Add", 
-        "-":        "PyNumber_Subtract", 
-        "*":        "PyNumber_Multiply", 
-        "@":        "__Pyx_PyNumber_MatrixMultiply", 
-        "/":        "__Pyx_PyNumber_Divide", 
-        "//":       "PyNumber_FloorDivide", 
-        "%":        "PyNumber_Remainder", 
-        "**":       "PyNumber_Power", 
-    } 
- 
-    overflow_op_names = { 
-        "+":  "add", 
-        "-":  "sub", 
-        "*":  "mul", 
-        "<<":  "lshift", 
-    } 
- 
- 
-class IntBinopNode(NumBinopNode): 
-    #  Binary operation taking integer arguments. 
- 
-    def c_types_okay(self, type1, type2): 
-        #print "IntBinopNode.c_types_okay:", type1, type2 ### 
-        return (type1.is_int or type1.is_enum) \ 
-            and (type2.is_int or type2.is_enum) 
- 
- 
-class AddNode(NumBinopNode): 
-    #  '+' operator. 
- 
-    def is_py_operation_types(self, type1, type2): 
-        if type1.is_string and type2.is_string or type1.is_pyunicode_ptr and type2.is_pyunicode_ptr: 
-            return 1 
-        else: 
-            return NumBinopNode.is_py_operation_types(self, type1, type2) 
- 
-    def infer_builtin_types_operation(self, type1, type2): 
-        # b'abc' + 'abc' raises an exception in Py3, 
-        # so we can safely infer the Py2 type for bytes here 
+        else:
+            func = self.type.binary_op(self.operator)
+            if func is None:
+                error(self.pos, "binary operator %s not supported for %s" % (self.operator, self.type))
+            return "%s(%s, %s)" % (
+                func,
+                self.operand1.result(),
+                self.operand2.result())
+
+    def is_py_operation_types(self, type1, type2):
+        return (type1.is_unicode_char or
+                type2.is_unicode_char or
+                BinopNode.is_py_operation_types(self, type1, type2))
+
+    def py_operation_function(self, code):
+        function_name = self.py_functions[self.operator]
+        if self.inplace:
+            function_name = function_name.replace('PyNumber_', 'PyNumber_InPlace')
+        return function_name
+
+    py_functions = {
+        "|":        "PyNumber_Or",
+        "^":        "PyNumber_Xor",
+        "&":        "PyNumber_And",
+        "<<":       "PyNumber_Lshift",
+        ">>":       "PyNumber_Rshift",
+        "+":        "PyNumber_Add",
+        "-":        "PyNumber_Subtract",
+        "*":        "PyNumber_Multiply",
+        "@":        "__Pyx_PyNumber_MatrixMultiply",
+        "/":        "__Pyx_PyNumber_Divide",
+        "//":       "PyNumber_FloorDivide",
+        "%":        "PyNumber_Remainder",
+        "**":       "PyNumber_Power",
+    }
+
+    overflow_op_names = {
+        "+":  "add",
+        "-":  "sub",
+        "*":  "mul",
+        "<<":  "lshift",
+    }
+
+
+class IntBinopNode(NumBinopNode):
+    #  Binary operation taking integer arguments.
+
+    def c_types_okay(self, type1, type2):
+        #print "IntBinopNode.c_types_okay:", type1, type2 ###
+        return (type1.is_int or type1.is_enum) \
+            and (type2.is_int or type2.is_enum)
+
+
+class AddNode(NumBinopNode):
+    #  '+' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        if type1.is_string and type2.is_string or type1.is_pyunicode_ptr and type2.is_pyunicode_ptr:
+            return 1
+        else:
+            return NumBinopNode.is_py_operation_types(self, type1, type2)
+
+    def infer_builtin_types_operation(self, type1, type2):
+        # b'abc' + 'abc' raises an exception in Py3,
+        # so we can safely infer the Py2 type for bytes here
         string_types = (bytes_type, bytearray_type, str_type, basestring_type, unicode_type)
-        if type1 in string_types and type2 in string_types: 
-            return string_types[max(string_types.index(type1), 
-                                    string_types.index(type2))] 
-        return None 
- 
-    def compute_c_result_type(self, type1, type2): 
-        #print "AddNode.compute_c_result_type:", type1, self.operator, type2 ### 
-        if (type1.is_ptr or type1.is_array) and (type2.is_int or type2.is_enum): 
-            return type1 
-        elif (type2.is_ptr or type2.is_array) and (type1.is_int or type1.is_enum): 
-            return type2 
-        else: 
-            return NumBinopNode.compute_c_result_type( 
-                self, type1, type2) 
- 
-    def py_operation_function(self, code): 
+        if type1 in string_types and type2 in string_types:
+            return string_types[max(string_types.index(type1),
+                                    string_types.index(type2))]
+        return None
+
+    def compute_c_result_type(self, type1, type2):
+        #print "AddNode.compute_c_result_type:", type1, self.operator, type2 ###
+        if (type1.is_ptr or type1.is_array) and (type2.is_int or type2.is_enum):
+            return type1
+        elif (type2.is_ptr or type2.is_array) and (type1.is_int or type1.is_enum):
+            return type2
+        else:
+            return NumBinopNode.compute_c_result_type(
+                self, type1, type2)
+
+    def py_operation_function(self, code):
         type1, type2 = self.operand1.type, self.operand2.type
 
         if type1 is unicode_type or type2 is unicode_type:
@@ -11433,100 +11433,100 @@ class AddNode(NumBinopNode):
                 else:
                     return '__Pyx_PyUnicode_Concat'
 
-        return super(AddNode, self).py_operation_function(code) 
- 
- 
-class SubNode(NumBinopNode): 
-    #  '-' operator. 
- 
-    def compute_c_result_type(self, type1, type2): 
-        if (type1.is_ptr or type1.is_array) and (type2.is_int or type2.is_enum): 
-            return type1 
-        elif (type1.is_ptr or type1.is_array) and (type2.is_ptr or type2.is_array): 
-            return PyrexTypes.c_ptrdiff_t_type 
-        else: 
-            return NumBinopNode.compute_c_result_type( 
-                self, type1, type2) 
- 
- 
-class MulNode(NumBinopNode): 
-    #  '*' operator. 
- 
-    def is_py_operation_types(self, type1, type2): 
-        if ((type1.is_string and type2.is_int) or 
-                (type2.is_string and type1.is_int)): 
-            return 1 
-        else: 
-            return NumBinopNode.is_py_operation_types(self, type1, type2) 
- 
-    def infer_builtin_types_operation(self, type1, type2): 
-        # let's assume that whatever builtin type you multiply a string with 
-        # will either return a string of the same type or fail with an exception 
+        return super(AddNode, self).py_operation_function(code)
+
+
+class SubNode(NumBinopNode):
+    #  '-' operator.
+
+    def compute_c_result_type(self, type1, type2):
+        if (type1.is_ptr or type1.is_array) and (type2.is_int or type2.is_enum):
+            return type1
+        elif (type1.is_ptr or type1.is_array) and (type2.is_ptr or type2.is_array):
+            return PyrexTypes.c_ptrdiff_t_type
+        else:
+            return NumBinopNode.compute_c_result_type(
+                self, type1, type2)
+
+
+class MulNode(NumBinopNode):
+    #  '*' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        if ((type1.is_string and type2.is_int) or
+                (type2.is_string and type1.is_int)):
+            return 1
+        else:
+            return NumBinopNode.is_py_operation_types(self, type1, type2)
+
+    def infer_builtin_types_operation(self, type1, type2):
+        # let's assume that whatever builtin type you multiply a string with
+        # will either return a string of the same type or fail with an exception
         string_types = (bytes_type, bytearray_type, str_type, basestring_type, unicode_type)
-        if type1 in string_types and type2.is_builtin_type: 
-            return type1 
-        if type2 in string_types and type1.is_builtin_type: 
-            return type2 
-        # multiplication of containers/numbers with an integer value 
-        # always (?) returns the same type 
-        if type1.is_int: 
-            return type2 
-        if type2.is_int: 
-            return type1 
-        return None 
- 
- 
-class MatMultNode(NumBinopNode): 
-    #  '@' operator. 
- 
-    def is_py_operation_types(self, type1, type2): 
-        return True 
- 
-    def generate_evaluation_code(self, code): 
-        code.globalstate.use_utility_code(UtilityCode.load_cached("MatrixMultiply", "ObjectHandling.c")) 
-        super(MatMultNode, self).generate_evaluation_code(code) 
- 
- 
-class DivNode(NumBinopNode): 
-    #  '/' or '//' operator. 
- 
-    cdivision = None 
-    truedivision = None   # == "unknown" if operator == '/' 
-    ctruedivision = False 
-    cdivision_warnings = False 
-    zerodivision_check = None 
- 
-    def find_compile_time_binary_operator(self, op1, op2): 
-        func = compile_time_binary_operators[self.operator] 
-        if self.operator == '/' and self.truedivision is None: 
-            # => true div for floats, floor div for integers 
+        if type1 in string_types and type2.is_builtin_type:
+            return type1
+        if type2 in string_types and type1.is_builtin_type:
+            return type2
+        # multiplication of containers/numbers with an integer value
+        # always (?) returns the same type
+        if type1.is_int:
+            return type2
+        if type2.is_int:
+            return type1
+        return None
+
+
+class MatMultNode(NumBinopNode):
+    #  '@' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        return True
+
+    def generate_evaluation_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("MatrixMultiply", "ObjectHandling.c"))
+        super(MatMultNode, self).generate_evaluation_code(code)
+
+
+class DivNode(NumBinopNode):
+    #  '/' or '//' operator.
+
+    cdivision = None
+    truedivision = None   # == "unknown" if operator == '/'
+    ctruedivision = False
+    cdivision_warnings = False
+    zerodivision_check = None
+
+    def find_compile_time_binary_operator(self, op1, op2):
+        func = compile_time_binary_operators[self.operator]
+        if self.operator == '/' and self.truedivision is None:
+            # => true div for floats, floor div for integers
             if isinstance(op1, _py_int_types) and isinstance(op2, _py_int_types):
-                func = compile_time_binary_operators['//'] 
-        return func 
- 
-    def calculate_constant_result(self): 
-        op1 = self.operand1.constant_result 
-        op2 = self.operand2.constant_result 
-        func = self.find_compile_time_binary_operator(op1, op2) 
-        self.constant_result = func( 
-            self.operand1.constant_result, 
-            self.operand2.constant_result) 
- 
-    def compile_time_value(self, denv): 
-        operand1 = self.operand1.compile_time_value(denv) 
-        operand2 = self.operand2.compile_time_value(denv) 
-        try: 
-            func = self.find_compile_time_binary_operator( 
-                operand1, operand2) 
-            return func(operand1, operand2) 
+                func = compile_time_binary_operators['//']
+        return func
+
+    def calculate_constant_result(self):
+        op1 = self.operand1.constant_result
+        op2 = self.operand2.constant_result
+        func = self.find_compile_time_binary_operator(op1, op2)
+        self.constant_result = func(
+            self.operand1.constant_result,
+            self.operand2.constant_result)
+
+    def compile_time_value(self, denv):
+        operand1 = self.operand1.compile_time_value(denv)
+        operand2 = self.operand2.compile_time_value(denv)
+        try:
+            func = self.find_compile_time_binary_operator(
+                operand1, operand2)
+            return func(operand1, operand2)
         except Exception as e:
-            self.compile_time_value_error(e) 
- 
+            self.compile_time_value_error(e)
+
     def _check_truedivision(self, env):
-        if self.cdivision or env.directives['cdivision']: 
-            self.ctruedivision = False 
-        else: 
-            self.ctruedivision = self.truedivision 
+        if self.cdivision or env.directives['cdivision']:
+            self.ctruedivision = False
+        else:
+            self.ctruedivision = self.truedivision
 
     def infer_type(self, env):
         self._check_truedivision(env)
@@ -11536,93 +11536,93 @@ class DivNode(NumBinopNode):
 
     def analyse_operation(self, env):
         self._check_truedivision(env)
-        NumBinopNode.analyse_operation(self, env) 
-        if self.is_cpp_operation(): 
-            self.cdivision = True 
-        if not self.type.is_pyobject: 
-            self.zerodivision_check = ( 
-                self.cdivision is None and not env.directives['cdivision'] 
-                and (not self.operand2.has_constant_result() or 
-                     self.operand2.constant_result == 0)) 
-            if self.zerodivision_check or env.directives['cdivision_warnings']: 
-                # Need to check ahead of time to warn or raise zero division error 
-                self.operand1 = self.operand1.coerce_to_simple(env) 
-                self.operand2 = self.operand2.coerce_to_simple(env) 
- 
-    def compute_c_result_type(self, type1, type2): 
+        NumBinopNode.analyse_operation(self, env)
+        if self.is_cpp_operation():
+            self.cdivision = True
+        if not self.type.is_pyobject:
+            self.zerodivision_check = (
+                self.cdivision is None and not env.directives['cdivision']
+                and (not self.operand2.has_constant_result() or
+                     self.operand2.constant_result == 0))
+            if self.zerodivision_check or env.directives['cdivision_warnings']:
+                # Need to check ahead of time to warn or raise zero division error
+                self.operand1 = self.operand1.coerce_to_simple(env)
+                self.operand2 = self.operand2.coerce_to_simple(env)
+
+    def compute_c_result_type(self, type1, type2):
         if self.operator == '/' and self.ctruedivision and not type1.is_cpp_class and not type2.is_cpp_class:
-            if not type1.is_float and not type2.is_float: 
-                widest_type = PyrexTypes.widest_numeric_type(type1, PyrexTypes.c_double_type) 
-                widest_type = PyrexTypes.widest_numeric_type(type2, widest_type) 
-                return widest_type 
-        return NumBinopNode.compute_c_result_type(self, type1, type2) 
- 
-    def zero_division_message(self): 
-        if self.type.is_int: 
-            return "integer division or modulo by zero" 
-        else: 
-            return "float division" 
- 
-    def generate_evaluation_code(self, code): 
-        if not self.type.is_pyobject and not self.type.is_complex: 
-            if self.cdivision is None: 
+            if not type1.is_float and not type2.is_float:
+                widest_type = PyrexTypes.widest_numeric_type(type1, PyrexTypes.c_double_type)
+                widest_type = PyrexTypes.widest_numeric_type(type2, widest_type)
+                return widest_type
+        return NumBinopNode.compute_c_result_type(self, type1, type2)
+
+    def zero_division_message(self):
+        if self.type.is_int:
+            return "integer division or modulo by zero"
+        else:
+            return "float division"
+
+    def generate_evaluation_code(self, code):
+        if not self.type.is_pyobject and not self.type.is_complex:
+            if self.cdivision is None:
                 self.cdivision = (
                     code.globalstate.directives['cdivision']
                     or self.type.is_float
                     or ((self.type.is_numeric or self.type.is_enum) and not self.type.signed)
                 )
-            if not self.cdivision: 
+            if not self.cdivision:
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached("DivInt", "CMath.c").specialize(self.type))
-        NumBinopNode.generate_evaluation_code(self, code) 
-        self.generate_div_warning_code(code) 
- 
-    def generate_div_warning_code(self, code): 
+        NumBinopNode.generate_evaluation_code(self, code)
+        self.generate_div_warning_code(code)
+
+    def generate_div_warning_code(self, code):
         in_nogil = self.in_nogil_context
-        if not self.type.is_pyobject: 
-            if self.zerodivision_check: 
-                if not self.infix: 
-                    zero_test = "%s(%s)" % (self.type.unary_op('zero'), self.operand2.result()) 
-                else: 
-                    zero_test = "%s == 0" % self.operand2.result() 
-                code.putln("if (unlikely(%s)) {" % zero_test) 
+        if not self.type.is_pyobject:
+            if self.zerodivision_check:
+                if not self.infix:
+                    zero_test = "%s(%s)" % (self.type.unary_op('zero'), self.operand2.result())
+                else:
+                    zero_test = "%s == 0" % self.operand2.result()
+                code.putln("if (unlikely(%s)) {" % zero_test)
                 if in_nogil:
                     code.put_ensure_gil()
-                code.putln('PyErr_SetString(PyExc_ZeroDivisionError, "%s");' % self.zero_division_message()) 
+                code.putln('PyErr_SetString(PyExc_ZeroDivisionError, "%s");' % self.zero_division_message())
                 if in_nogil:
                     code.put_release_ensured_gil()
-                code.putln(code.error_goto(self.pos)) 
-                code.putln("}") 
-                if self.type.is_int and self.type.signed and self.operator != '%': 
+                code.putln(code.error_goto(self.pos))
+                code.putln("}")
+                if self.type.is_int and self.type.signed and self.operator != '%':
                     code.globalstate.use_utility_code(UtilityCode.load_cached("UnaryNegOverflows", "Overflow.c"))
-                    if self.operand2.type.signed == 2: 
-                        # explicitly signed, no runtime check needed 
-                        minus1_check = 'unlikely(%s == -1)' % self.operand2.result() 
-                    else: 
+                    if self.operand2.type.signed == 2:
+                        # explicitly signed, no runtime check needed
+                        minus1_check = 'unlikely(%s == -1)' % self.operand2.result()
+                    else:
                         type_of_op2 = self.operand2.type.empty_declaration_code()
-                        minus1_check = '(!(((%s)-1) > 0)) && unlikely(%s == (%s)-1)' % ( 
-                            type_of_op2, self.operand2.result(), type_of_op2) 
-                    code.putln("else if (sizeof(%s) == sizeof(long) && %s " 
-                               " && unlikely(UNARY_NEG_WOULD_OVERFLOW(%s))) {" % ( 
+                        minus1_check = '(!(((%s)-1) > 0)) && unlikely(%s == (%s)-1)' % (
+                            type_of_op2, self.operand2.result(), type_of_op2)
+                    code.putln("else if (sizeof(%s) == sizeof(long) && %s "
+                               " && unlikely(UNARY_NEG_WOULD_OVERFLOW(%s))) {" % (
                                self.type.empty_declaration_code(),
-                               minus1_check, 
-                               self.operand1.result())) 
+                               minus1_check,
+                               self.operand1.result()))
                     if in_nogil:
                         code.put_ensure_gil()
-                    code.putln('PyErr_SetString(PyExc_OverflowError, "value too large to perform division");') 
+                    code.putln('PyErr_SetString(PyExc_OverflowError, "value too large to perform division");')
                     if in_nogil:
                         code.put_release_ensured_gil()
-                    code.putln(code.error_goto(self.pos)) 
-                    code.putln("}") 
-            if code.globalstate.directives['cdivision_warnings'] and self.operator != '/': 
+                    code.putln(code.error_goto(self.pos))
+                    code.putln("}")
+            if code.globalstate.directives['cdivision_warnings'] and self.operator != '/':
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached("CDivisionWarning", "CMath.c"))
-                code.putln("if (unlikely((%s < 0) ^ (%s < 0))) {" % ( 
-                                self.operand1.result(), 
-                                self.operand2.result())) 
+                code.putln("if (unlikely((%s < 0) ^ (%s < 0))) {" % (
+                                self.operand1.result(),
+                                self.operand2.result()))
                 warning_code = "__Pyx_cdivision_warning(%(FILENAME)s, %(LINENO)s)" % {
-                    'FILENAME': Naming.filename_cname, 
-                    'LINENO':  Naming.lineno_cname, 
+                    'FILENAME': Naming.filename_cname,
+                    'LINENO':  Naming.lineno_cname,
                 }
 
                 if in_nogil:
@@ -11637,32 +11637,32 @@ class DivNode(NumBinopNode):
                     code.putln(code.set_error_info(self.pos, used=True))
 
                 code.put("if (unlikely(%s)) " % result_code)
-                code.put_goto(code.error_label) 
-                code.putln("}") 
- 
-    def calculate_result_code(self): 
+                code.put_goto(code.error_label)
+                code.putln("}")
+
+    def calculate_result_code(self):
         if self.type.is_complex or self.is_cpp_operation():
-            return NumBinopNode.calculate_result_code(self) 
-        elif self.type.is_float and self.operator == '//': 
-            return "floor(%s / %s)" % ( 
-                self.operand1.result(), 
-                self.operand2.result()) 
-        elif self.truedivision or self.cdivision: 
-            op1 = self.operand1.result() 
-            op2 = self.operand2.result() 
-            if self.truedivision: 
-                if self.type != self.operand1.type: 
-                    op1 = self.type.cast_code(op1) 
-                if self.type != self.operand2.type: 
-                    op2 = self.type.cast_code(op2) 
-            return "(%s / %s)" % (op1, op2) 
-        else: 
-            return "__Pyx_div_%s(%s, %s)" % ( 
-                self.type.specialization_name(), 
-                self.operand1.result(), 
-                self.operand2.result()) 
- 
- 
+            return NumBinopNode.calculate_result_code(self)
+        elif self.type.is_float and self.operator == '//':
+            return "floor(%s / %s)" % (
+                self.operand1.result(),
+                self.operand2.result())
+        elif self.truedivision or self.cdivision:
+            op1 = self.operand1.result()
+            op2 = self.operand2.result()
+            if self.truedivision:
+                if self.type != self.operand1.type:
+                    op1 = self.type.cast_code(op1)
+                if self.type != self.operand2.type:
+                    op2 = self.type.cast_code(op2)
+            return "(%s / %s)" % (op1, op2)
+        else:
+            return "__Pyx_div_%s(%s, %s)" % (
+                self.type.specialization_name(),
+                self.operand1.result(),
+                self.operand2.result())
+
+
 _find_formatting_types = re.compile(
     br"%"
     br"(?:%|"  # %%
@@ -11677,835 +11677,835 @@ _safe_bytes_formats = set([
 ])
 
 
-class ModNode(DivNode): 
-    #  '%' operator. 
- 
-    def is_py_operation_types(self, type1, type2): 
-        return (type1.is_string 
-                or type2.is_string 
-                or NumBinopNode.is_py_operation_types(self, type1, type2)) 
- 
-    def infer_builtin_types_operation(self, type1, type2): 
+class ModNode(DivNode):
+    #  '%' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        return (type1.is_string
+                or type2.is_string
+                or NumBinopNode.is_py_operation_types(self, type1, type2))
+
+    def infer_builtin_types_operation(self, type1, type2):
         # b'%s' % xyz  raises an exception in Py3<3.5, so it's safe to infer the type for Py2 and later Py3's.
-        if type1 is unicode_type: 
-            # None + xyz  may be implemented by RHS 
-            if type2.is_builtin_type or not self.operand1.may_be_none(): 
-                return type1 
-        elif type1 in (bytes_type, str_type, basestring_type): 
-            if type2 is unicode_type: 
-                return type2 
-            elif type2.is_numeric: 
-                return type1 
+        if type1 is unicode_type:
+            # None + xyz  may be implemented by RHS
+            if type2.is_builtin_type or not self.operand1.may_be_none():
+                return type1
+        elif type1 in (bytes_type, str_type, basestring_type):
+            if type2 is unicode_type:
+                return type2
+            elif type2.is_numeric:
+                return type1
             elif self.operand1.is_string_literal:
                 if type1 is str_type or type1 is bytes_type:
                     if set(_find_formatting_types(self.operand1.value)) <= _safe_bytes_formats:
                         return type1
                 return basestring_type
-            elif type1 is bytes_type and not type2.is_builtin_type: 
-                return None   # RHS might implement '% operator differently in Py3 
-            else: 
-                return basestring_type  # either str or unicode, can't tell 
-        return None 
- 
-    def zero_division_message(self): 
-        if self.type.is_int: 
-            return "integer division or modulo by zero" 
-        else: 
-            return "float divmod()" 
- 
-    def analyse_operation(self, env): 
-        DivNode.analyse_operation(self, env) 
-        if not self.type.is_pyobject: 
-            if self.cdivision is None: 
-                self.cdivision = env.directives['cdivision'] or not self.type.signed 
-            if not self.cdivision and not self.type.is_int and not self.type.is_float: 
-                error(self.pos, "mod operator not supported for type '%s'" % self.type) 
- 
-    def generate_evaluation_code(self, code): 
-        if not self.type.is_pyobject and not self.cdivision: 
-            if self.type.is_int: 
-                code.globalstate.use_utility_code( 
+            elif type1 is bytes_type and not type2.is_builtin_type:
+                return None   # RHS might implement '% operator differently in Py3
+            else:
+                return basestring_type  # either str or unicode, can't tell
+        return None
+
+    def zero_division_message(self):
+        if self.type.is_int:
+            return "integer division or modulo by zero"
+        else:
+            return "float divmod()"
+
+    def analyse_operation(self, env):
+        DivNode.analyse_operation(self, env)
+        if not self.type.is_pyobject:
+            if self.cdivision is None:
+                self.cdivision = env.directives['cdivision'] or not self.type.signed
+            if not self.cdivision and not self.type.is_int and not self.type.is_float:
+                error(self.pos, "mod operator not supported for type '%s'" % self.type)
+
+    def generate_evaluation_code(self, code):
+        if not self.type.is_pyobject and not self.cdivision:
+            if self.type.is_int:
+                code.globalstate.use_utility_code(
                     UtilityCode.load_cached("ModInt", "CMath.c").specialize(self.type))
-            else:  # float 
-                code.globalstate.use_utility_code( 
+            else:  # float
+                code.globalstate.use_utility_code(
                     UtilityCode.load_cached("ModFloat", "CMath.c").specialize(
-                        self.type, math_h_modifier=self.type.math_h_modifier)) 
+                        self.type, math_h_modifier=self.type.math_h_modifier))
         # NOTE: skipping over DivNode here
-        NumBinopNode.generate_evaluation_code(self, code) 
-        self.generate_div_warning_code(code) 
- 
-    def calculate_result_code(self): 
-        if self.cdivision: 
-            if self.type.is_float: 
-                return "fmod%s(%s, %s)" % ( 
-                    self.type.math_h_modifier, 
-                    self.operand1.result(), 
-                    self.operand2.result()) 
-            else: 
-                return "(%s %% %s)" % ( 
-                    self.operand1.result(), 
-                    self.operand2.result()) 
-        else: 
-            return "__Pyx_mod_%s(%s, %s)" % ( 
-                    self.type.specialization_name(), 
-                    self.operand1.result(), 
-                    self.operand2.result()) 
- 
-    def py_operation_function(self, code): 
+        NumBinopNode.generate_evaluation_code(self, code)
+        self.generate_div_warning_code(code)
+
+    def calculate_result_code(self):
+        if self.cdivision:
+            if self.type.is_float:
+                return "fmod%s(%s, %s)" % (
+                    self.type.math_h_modifier,
+                    self.operand1.result(),
+                    self.operand2.result())
+            else:
+                return "(%s %% %s)" % (
+                    self.operand1.result(),
+                    self.operand2.result())
+        else:
+            return "__Pyx_mod_%s(%s, %s)" % (
+                    self.type.specialization_name(),
+                    self.operand1.result(),
+                    self.operand2.result())
+
+    def py_operation_function(self, code):
         type1, type2 = self.operand1.type, self.operand2.type
         # ("..." % x)  must call "x.__rmod__()" for string subtypes.
         if type1 is unicode_type:
             if self.operand1.may_be_none() or (
                     type2.is_extension_type and type2.subtype_of(type1) or
                     type2 is py_object_type and not isinstance(self.operand2, CoerceToPyTypeNode)):
-                return '__Pyx_PyUnicode_FormatSafe' 
-            else: 
-                return 'PyUnicode_Format' 
+                return '__Pyx_PyUnicode_FormatSafe'
+            else:
+                return 'PyUnicode_Format'
         elif type1 is str_type:
             if self.operand1.may_be_none() or (
                     type2.is_extension_type and type2.subtype_of(type1) or
                     type2 is py_object_type and not isinstance(self.operand2, CoerceToPyTypeNode)):
-                return '__Pyx_PyString_FormatSafe' 
-            else: 
-                return '__Pyx_PyString_Format' 
-        return super(ModNode, self).py_operation_function(code) 
- 
- 
-class PowNode(NumBinopNode): 
-    #  '**' operator. 
- 
-    def analyse_c_operation(self, env): 
-        NumBinopNode.analyse_c_operation(self, env) 
-        if self.type.is_complex: 
-            if self.type.real_type.is_float: 
-                self.operand1 = self.operand1.coerce_to(self.type, env) 
-                self.operand2 = self.operand2.coerce_to(self.type, env) 
+                return '__Pyx_PyString_FormatSafe'
+            else:
+                return '__Pyx_PyString_Format'
+        return super(ModNode, self).py_operation_function(code)
+
+
+class PowNode(NumBinopNode):
+    #  '**' operator.
+
+    def analyse_c_operation(self, env):
+        NumBinopNode.analyse_c_operation(self, env)
+        if self.type.is_complex:
+            if self.type.real_type.is_float:
+                self.operand1 = self.operand1.coerce_to(self.type, env)
+                self.operand2 = self.operand2.coerce_to(self.type, env)
                 self.pow_func = self.type.binary_op('**')
-            else: 
-                error(self.pos, "complex int powers not supported") 
-                self.pow_func = "<error>" 
-        elif self.type.is_float: 
-            self.pow_func = "pow" + self.type.math_h_modifier 
-        elif self.type.is_int: 
+            else:
+                error(self.pos, "complex int powers not supported")
+                self.pow_func = "<error>"
+        elif self.type.is_float:
+            self.pow_func = "pow" + self.type.math_h_modifier
+        elif self.type.is_int:
             self.pow_func = "__Pyx_pow_%s" % self.type.empty_declaration_code().replace(' ', '_')
-            env.use_utility_code( 
+            env.use_utility_code(
                 UtilityCode.load_cached("IntPow", "CMath.c").specialize(
-                    func_name=self.pow_func, 
+                    func_name=self.pow_func,
                     type=self.type.empty_declaration_code(),
-                    signed=self.type.signed and 1 or 0)) 
-        elif not self.type.is_error: 
-            error(self.pos, "got unexpected types for C power operator: %s, %s" % 
-                            (self.operand1.type, self.operand2.type)) 
- 
-    def calculate_result_code(self): 
-        # Work around MSVC overloading ambiguity. 
-        def typecast(operand): 
-            if self.type == operand.type: 
-                return operand.result() 
-            else: 
-                return self.type.cast_code(operand.result()) 
-        return "%s(%s, %s)" % ( 
-            self.pow_func, 
-            typecast(self.operand1), 
-            typecast(self.operand2)) 
- 
-    def py_operation_function(self, code): 
-        if (self.type.is_pyobject and 
-                self.operand1.constant_result == 2 and 
+                    signed=self.type.signed and 1 or 0))
+        elif not self.type.is_error:
+            error(self.pos, "got unexpected types for C power operator: %s, %s" %
+                            (self.operand1.type, self.operand2.type))
+
+    def calculate_result_code(self):
+        # Work around MSVC overloading ambiguity.
+        def typecast(operand):
+            if self.type == operand.type:
+                return operand.result()
+            else:
+                return self.type.cast_code(operand.result())
+        return "%s(%s, %s)" % (
+            self.pow_func,
+            typecast(self.operand1),
+            typecast(self.operand2))
+
+    def py_operation_function(self, code):
+        if (self.type.is_pyobject and
+                self.operand1.constant_result == 2 and
                 isinstance(self.operand1.constant_result, _py_int_types) and
-                self.operand2.type is py_object_type): 
-            code.globalstate.use_utility_code(UtilityCode.load_cached('PyNumberPow2', 'Optimize.c')) 
-            if self.inplace: 
-                return '__Pyx_PyNumber_InPlacePowerOf2' 
-            else: 
-                return '__Pyx_PyNumber_PowerOf2' 
-        return super(PowNode, self).py_operation_function(code) 
- 
- 
-class BoolBinopNode(ExprNode): 
-    """ 
-    Short-circuiting boolean operation. 
- 
-    Note that this node provides the same code generation method as 
-    BoolBinopResultNode to simplify expression nesting. 
- 
-    operator  string                              "and"/"or" 
-    operand1  BoolBinopNode/BoolBinopResultNode   left operand 
-    operand2  BoolBinopNode/BoolBinopResultNode   right operand 
-    """ 
-    subexprs = ['operand1', 'operand2'] 
-    is_temp = True 
-    operator = None 
-    operand1 = None 
-    operand2 = None 
- 
-    def infer_type(self, env): 
-        type1 = self.operand1.infer_type(env) 
-        type2 = self.operand2.infer_type(env) 
-        return PyrexTypes.independent_spanning_type(type1, type2) 
- 
-    def may_be_none(self): 
-        if self.operator == 'or': 
-            return self.operand2.may_be_none() 
-        else: 
-            return self.operand1.may_be_none() or self.operand2.may_be_none() 
- 
-    def calculate_constant_result(self): 
-        operand1 = self.operand1.constant_result 
-        operand2 = self.operand2.constant_result 
-        if self.operator == 'and': 
-            self.constant_result = operand1 and operand2 
-        else: 
-            self.constant_result = operand1 or operand2 
- 
-    def compile_time_value(self, denv): 
-        operand1 = self.operand1.compile_time_value(denv) 
-        operand2 = self.operand2.compile_time_value(denv) 
-        if self.operator == 'and': 
-            return operand1 and operand2 
-        else: 
-            return operand1 or operand2 
- 
-    def is_ephemeral(self): 
-        return self.operand1.is_ephemeral() or self.operand2.is_ephemeral() 
- 
-    def analyse_types(self, env): 
-        # Note: we do not do any coercion here as we most likely do not know the final type anyway. 
-        # We even accept to set self.type to ErrorType if both operands do not have a spanning type. 
-        # The coercion to the final type and to a "simple" value is left to coerce_to(). 
-        operand1 = self.operand1.analyse_types(env) 
-        operand2 = self.operand2.analyse_types(env) 
-        self.type = PyrexTypes.independent_spanning_type( 
-            operand1.type, operand2.type) 
-        self.operand1 = self._wrap_operand(operand1, env) 
-        self.operand2 = self._wrap_operand(operand2, env) 
-        return self 
- 
-    def _wrap_operand(self, operand, env): 
-        if not isinstance(operand, (BoolBinopNode, BoolBinopResultNode)): 
-            operand = BoolBinopResultNode(operand, self.type, env) 
-        return operand 
- 
-    def wrap_operands(self, env): 
-        """ 
-        Must get called by transforms that want to create a correct BoolBinopNode 
-        after the type analysis phase. 
-        """ 
-        self.operand1 = self._wrap_operand(self.operand1, env) 
-        self.operand2 = self._wrap_operand(self.operand2, env) 
- 
-    def coerce_to_boolean(self, env): 
-        return self.coerce_to(PyrexTypes.c_bint_type, env) 
- 
-    def coerce_to(self, dst_type, env): 
-        operand1 = self.operand1.coerce_to(dst_type, env) 
-        operand2 = self.operand2.coerce_to(dst_type, env) 
-        return BoolBinopNode.from_node( 
-            self, type=dst_type, 
-            operator=self.operator, 
-            operand1=operand1, operand2=operand2) 
- 
+                self.operand2.type is py_object_type):
+            code.globalstate.use_utility_code(UtilityCode.load_cached('PyNumberPow2', 'Optimize.c'))
+            if self.inplace:
+                return '__Pyx_PyNumber_InPlacePowerOf2'
+            else:
+                return '__Pyx_PyNumber_PowerOf2'
+        return super(PowNode, self).py_operation_function(code)
+
+
+class BoolBinopNode(ExprNode):
+    """
+    Short-circuiting boolean operation.
+
+    Note that this node provides the same code generation method as
+    BoolBinopResultNode to simplify expression nesting.
+
+    operator  string                              "and"/"or"
+    operand1  BoolBinopNode/BoolBinopResultNode   left operand
+    operand2  BoolBinopNode/BoolBinopResultNode   right operand
+    """
+    subexprs = ['operand1', 'operand2']
+    is_temp = True
+    operator = None
+    operand1 = None
+    operand2 = None
+
+    def infer_type(self, env):
+        type1 = self.operand1.infer_type(env)
+        type2 = self.operand2.infer_type(env)
+        return PyrexTypes.independent_spanning_type(type1, type2)
+
+    def may_be_none(self):
+        if self.operator == 'or':
+            return self.operand2.may_be_none()
+        else:
+            return self.operand1.may_be_none() or self.operand2.may_be_none()
+
+    def calculate_constant_result(self):
+        operand1 = self.operand1.constant_result
+        operand2 = self.operand2.constant_result
+        if self.operator == 'and':
+            self.constant_result = operand1 and operand2
+        else:
+            self.constant_result = operand1 or operand2
+
+    def compile_time_value(self, denv):
+        operand1 = self.operand1.compile_time_value(denv)
+        operand2 = self.operand2.compile_time_value(denv)
+        if self.operator == 'and':
+            return operand1 and operand2
+        else:
+            return operand1 or operand2
+
+    def is_ephemeral(self):
+        return self.operand1.is_ephemeral() or self.operand2.is_ephemeral()
+
+    def analyse_types(self, env):
+        # Note: we do not do any coercion here as we most likely do not know the final type anyway.
+        # We even accept to set self.type to ErrorType if both operands do not have a spanning type.
+        # The coercion to the final type and to a "simple" value is left to coerce_to().
+        operand1 = self.operand1.analyse_types(env)
+        operand2 = self.operand2.analyse_types(env)
+        self.type = PyrexTypes.independent_spanning_type(
+            operand1.type, operand2.type)
+        self.operand1 = self._wrap_operand(operand1, env)
+        self.operand2 = self._wrap_operand(operand2, env)
+        return self
+
+    def _wrap_operand(self, operand, env):
+        if not isinstance(operand, (BoolBinopNode, BoolBinopResultNode)):
+            operand = BoolBinopResultNode(operand, self.type, env)
+        return operand
+
+    def wrap_operands(self, env):
+        """
+        Must get called by transforms that want to create a correct BoolBinopNode
+        after the type analysis phase.
+        """
+        self.operand1 = self._wrap_operand(self.operand1, env)
+        self.operand2 = self._wrap_operand(self.operand2, env)
+
+    def coerce_to_boolean(self, env):
+        return self.coerce_to(PyrexTypes.c_bint_type, env)
+
+    def coerce_to(self, dst_type, env):
+        operand1 = self.operand1.coerce_to(dst_type, env)
+        operand2 = self.operand2.coerce_to(dst_type, env)
+        return BoolBinopNode.from_node(
+            self, type=dst_type,
+            operator=self.operator,
+            operand1=operand1, operand2=operand2)
+
     def generate_bool_evaluation_code(self, code, final_result_temp, final_result_type, and_label, or_label, end_label, fall_through):
-        code.mark_pos(self.pos) 
- 
-        outer_labels = (and_label, or_label) 
-        if self.operator == 'and': 
-            my_label = and_label = code.new_label('next_and') 
-        else: 
-            my_label = or_label = code.new_label('next_or') 
+        code.mark_pos(self.pos)
+
+        outer_labels = (and_label, or_label)
+        if self.operator == 'and':
+            my_label = and_label = code.new_label('next_and')
+        else:
+            my_label = or_label = code.new_label('next_or')
         self.operand1.generate_bool_evaluation_code(
             code, final_result_temp, final_result_type, and_label, or_label, end_label, my_label)
- 
-        and_label, or_label = outer_labels 
- 
-        code.put_label(my_label) 
+
+        and_label, or_label = outer_labels
+
+        code.put_label(my_label)
         self.operand2.generate_bool_evaluation_code(
             code, final_result_temp, final_result_type, and_label, or_label, end_label, fall_through)
- 
-    def generate_evaluation_code(self, code): 
-        self.allocate_temp_result(code) 
+
+    def generate_evaluation_code(self, code):
+        self.allocate_temp_result(code)
         result_type = PyrexTypes.py_object_type if self.type.is_pyobject else self.type
-        or_label = and_label = None 
-        end_label = code.new_label('bool_binop_done') 
+        or_label = and_label = None
+        end_label = code.new_label('bool_binop_done')
         self.generate_bool_evaluation_code(code, self.result(), result_type, and_label, or_label, end_label, end_label)
         code.put_label(end_label)
- 
-    gil_message = "Truth-testing Python object" 
- 
-    def check_const(self): 
-        return self.operand1.check_const() and self.operand2.check_const() 
- 
-    def generate_subexpr_disposal_code(self, code): 
-        pass  # nothing to do here, all done in generate_evaluation_code() 
- 
-    def free_subexpr_temps(self, code): 
-        pass  # nothing to do here, all done in generate_evaluation_code() 
- 
-    def generate_operand1_test(self, code): 
-        #  Generate code to test the truth of the first operand. 
-        if self.type.is_pyobject: 
-            test_result = code.funcstate.allocate_temp( 
-                PyrexTypes.c_bint_type, manage_ref=False) 
-            code.putln( 
-                "%s = __Pyx_PyObject_IsTrue(%s); %s" % ( 
-                    test_result, 
-                    self.operand1.py_result(), 
-                    code.error_goto_if_neg(test_result, self.pos))) 
-        else: 
-            test_result = self.operand1.result() 
-        return (test_result, self.type.is_pyobject) 
- 
- 
-class BoolBinopResultNode(ExprNode): 
-    """ 
-    Intermediate result of a short-circuiting and/or expression. 
-    Tests the result for 'truthiness' and takes care of coercing the final result 
-    of the overall expression to the target type. 
- 
-    Note that this node provides the same code generation method as 
-    BoolBinopNode to simplify expression nesting. 
- 
-    arg     ExprNode    the argument to test 
-    value   ExprNode    the coerced result value node 
-    """ 
- 
-    subexprs = ['arg', 'value'] 
-    is_temp = True 
-    arg = None 
-    value = None 
- 
-    def __init__(self, arg, result_type, env): 
-        # using 'arg' multiple times, so it must be a simple/temp value 
-        arg = arg.coerce_to_simple(env) 
-        # wrap in ProxyNode, in case a transform wants to replace self.arg later 
-        arg = ProxyNode(arg) 
-        super(BoolBinopResultNode, self).__init__( 
-            arg.pos, arg=arg, type=result_type, 
-            value=CloneNode(arg).coerce_to(result_type, env)) 
- 
-    def coerce_to_boolean(self, env): 
-        return self.coerce_to(PyrexTypes.c_bint_type, env) 
- 
-    def coerce_to(self, dst_type, env): 
-        # unwrap, coerce, rewrap 
-        arg = self.arg.arg 
-        if dst_type is PyrexTypes.c_bint_type: 
-            arg = arg.coerce_to_boolean(env) 
-        # TODO: unwrap more coercion nodes? 
-        return BoolBinopResultNode(arg, dst_type, env) 
- 
-    def nogil_check(self, env): 
-        # let's leave all errors to BoolBinopNode 
-        pass 
- 
-    def generate_operand_test(self, code): 
-        #  Generate code to test the truth of the first operand. 
-        if self.arg.type.is_pyobject: 
-            test_result = code.funcstate.allocate_temp( 
-                PyrexTypes.c_bint_type, manage_ref=False) 
-            code.putln( 
-                "%s = __Pyx_PyObject_IsTrue(%s); %s" % ( 
-                    test_result, 
-                    self.arg.py_result(), 
-                    code.error_goto_if_neg(test_result, self.pos))) 
-        else: 
-            test_result = self.arg.result() 
-        return (test_result, self.arg.type.is_pyobject) 
- 
+
+    gil_message = "Truth-testing Python object"
+
+    def check_const(self):
+        return self.operand1.check_const() and self.operand2.check_const()
+
+    def generate_subexpr_disposal_code(self, code):
+        pass  # nothing to do here, all done in generate_evaluation_code()
+
+    def free_subexpr_temps(self, code):
+        pass  # nothing to do here, all done in generate_evaluation_code()
+
+    def generate_operand1_test(self, code):
+        #  Generate code to test the truth of the first operand.
+        if self.type.is_pyobject:
+            test_result = code.funcstate.allocate_temp(
+                PyrexTypes.c_bint_type, manage_ref=False)
+            code.putln(
+                "%s = __Pyx_PyObject_IsTrue(%s); %s" % (
+                    test_result,
+                    self.operand1.py_result(),
+                    code.error_goto_if_neg(test_result, self.pos)))
+        else:
+            test_result = self.operand1.result()
+        return (test_result, self.type.is_pyobject)
+
+
+class BoolBinopResultNode(ExprNode):
+    """
+    Intermediate result of a short-circuiting and/or expression.
+    Tests the result for 'truthiness' and takes care of coercing the final result
+    of the overall expression to the target type.
+
+    Note that this node provides the same code generation method as
+    BoolBinopNode to simplify expression nesting.
+
+    arg     ExprNode    the argument to test
+    value   ExprNode    the coerced result value node
+    """
+
+    subexprs = ['arg', 'value']
+    is_temp = True
+    arg = None
+    value = None
+
+    def __init__(self, arg, result_type, env):
+        # using 'arg' multiple times, so it must be a simple/temp value
+        arg = arg.coerce_to_simple(env)
+        # wrap in ProxyNode, in case a transform wants to replace self.arg later
+        arg = ProxyNode(arg)
+        super(BoolBinopResultNode, self).__init__(
+            arg.pos, arg=arg, type=result_type,
+            value=CloneNode(arg).coerce_to(result_type, env))
+
+    def coerce_to_boolean(self, env):
+        return self.coerce_to(PyrexTypes.c_bint_type, env)
+
+    def coerce_to(self, dst_type, env):
+        # unwrap, coerce, rewrap
+        arg = self.arg.arg
+        if dst_type is PyrexTypes.c_bint_type:
+            arg = arg.coerce_to_boolean(env)
+        # TODO: unwrap more coercion nodes?
+        return BoolBinopResultNode(arg, dst_type, env)
+
+    def nogil_check(self, env):
+        # let's leave all errors to BoolBinopNode
+        pass
+
+    def generate_operand_test(self, code):
+        #  Generate code to test the truth of the first operand.
+        if self.arg.type.is_pyobject:
+            test_result = code.funcstate.allocate_temp(
+                PyrexTypes.c_bint_type, manage_ref=False)
+            code.putln(
+                "%s = __Pyx_PyObject_IsTrue(%s); %s" % (
+                    test_result,
+                    self.arg.py_result(),
+                    code.error_goto_if_neg(test_result, self.pos)))
+        else:
+            test_result = self.arg.result()
+        return (test_result, self.arg.type.is_pyobject)
+
     def generate_bool_evaluation_code(self, code, final_result_temp, final_result_type, and_label, or_label, end_label, fall_through):
-        code.mark_pos(self.pos) 
- 
-        # x => x 
-        # x and ... or ... => next 'and' / 'or' 
-        # False ... or x => next 'or' 
-        # True and x => next 'and' 
-        # True or x => True (operand) 
- 
-        self.arg.generate_evaluation_code(code) 
-        if and_label or or_label: 
-            test_result, uses_temp = self.generate_operand_test(code) 
+        code.mark_pos(self.pos)
+
+        # x => x
+        # x and ... or ... => next 'and' / 'or'
+        # False ... or x => next 'or'
+        # True and x => next 'and'
+        # True or x => True (operand)
+
+        self.arg.generate_evaluation_code(code)
+        if and_label or or_label:
+            test_result, uses_temp = self.generate_operand_test(code)
             if uses_temp and (and_label and or_label):
                 # cannot become final result => free early
                 # disposal: uses_temp and (and_label and or_label)
                 self.arg.generate_disposal_code(code)
-            sense = '!' if or_label else '' 
-            code.putln("if (%s%s) {" % (sense, test_result)) 
-            if uses_temp: 
-                code.funcstate.release_temp(test_result) 
+            sense = '!' if or_label else ''
+            code.putln("if (%s%s) {" % (sense, test_result))
+            if uses_temp:
+                code.funcstate.release_temp(test_result)
             if not uses_temp or not (and_label and or_label):
                 # disposal: (not uses_temp) or {not (and_label and or_label) [if]}
                 self.arg.generate_disposal_code(code)
- 
+
             if or_label and or_label != fall_through:
-                # value is false => short-circuit to next 'or' 
-                code.put_goto(or_label) 
-            if and_label: 
-                # value is true => go to next 'and' 
+                # value is false => short-circuit to next 'or'
+                code.put_goto(or_label)
+            if and_label:
+                # value is true => go to next 'and'
                 if or_label:
-                    code.putln("} else {") 
+                    code.putln("} else {")
                     if not uses_temp:
                         # disposal: (not uses_temp) and {(and_label and or_label) [else]}
                         self.arg.generate_disposal_code(code)
                 if and_label != fall_through:
                     code.put_goto(and_label)
- 
-        if not and_label or not or_label: 
-            # if no next 'and' or 'or', we provide the result 
+
+        if not and_label or not or_label:
+            # if no next 'and' or 'or', we provide the result
             if and_label or or_label:
                 code.putln("} else {")
-            self.value.generate_evaluation_code(code) 
-            self.value.make_owned_reference(code) 
+            self.value.generate_evaluation_code(code)
+            self.value.make_owned_reference(code)
             code.putln("%s = %s;" % (final_result_temp, self.value.result_as(final_result_type)))
-            self.value.generate_post_assignment_code(code) 
+            self.value.generate_post_assignment_code(code)
             # disposal: {not (and_label and or_label) [else]}
-            self.arg.generate_disposal_code(code) 
-            self.value.free_temps(code) 
+            self.arg.generate_disposal_code(code)
+            self.value.free_temps(code)
             if end_label != fall_through:
-                code.put_goto(end_label) 
- 
-        if and_label or or_label: 
-            code.putln("}") 
-        self.arg.free_temps(code) 
- 
- 
-class CondExprNode(ExprNode): 
-    #  Short-circuiting conditional expression. 
-    # 
-    #  test        ExprNode 
-    #  true_val    ExprNode 
-    #  false_val   ExprNode 
- 
-    true_val = None 
-    false_val = None 
+                code.put_goto(end_label)
+
+        if and_label or or_label:
+            code.putln("}")
+        self.arg.free_temps(code)
+
+
+class CondExprNode(ExprNode):
+    #  Short-circuiting conditional expression.
+    #
+    #  test        ExprNode
+    #  true_val    ExprNode
+    #  false_val   ExprNode
+
+    true_val = None
+    false_val = None
     is_temp = True
- 
-    subexprs = ['test', 'true_val', 'false_val'] 
- 
-    def type_dependencies(self, env): 
-        return self.true_val.type_dependencies(env) + self.false_val.type_dependencies(env) 
- 
-    def infer_type(self, env): 
-        return PyrexTypes.independent_spanning_type( 
-            self.true_val.infer_type(env), 
-            self.false_val.infer_type(env)) 
- 
-    def calculate_constant_result(self): 
-        if self.test.constant_result: 
-            self.constant_result = self.true_val.constant_result 
-        else: 
-            self.constant_result = self.false_val.constant_result 
- 
-    def is_ephemeral(self): 
-        return self.true_val.is_ephemeral() or self.false_val.is_ephemeral() 
- 
-    def analyse_types(self, env): 
-        self.test = self.test.analyse_types(env).coerce_to_boolean(env) 
-        self.true_val = self.true_val.analyse_types(env) 
-        self.false_val = self.false_val.analyse_types(env) 
-        return self.analyse_result_type(env) 
- 
-    def analyse_result_type(self, env): 
-        self.type = PyrexTypes.independent_spanning_type( 
-            self.true_val.type, self.false_val.type) 
+
+    subexprs = ['test', 'true_val', 'false_val']
+
+    def type_dependencies(self, env):
+        return self.true_val.type_dependencies(env) + self.false_val.type_dependencies(env)
+
+    def infer_type(self, env):
+        return PyrexTypes.independent_spanning_type(
+            self.true_val.infer_type(env),
+            self.false_val.infer_type(env))
+
+    def calculate_constant_result(self):
+        if self.test.constant_result:
+            self.constant_result = self.true_val.constant_result
+        else:
+            self.constant_result = self.false_val.constant_result
+
+    def is_ephemeral(self):
+        return self.true_val.is_ephemeral() or self.false_val.is_ephemeral()
+
+    def analyse_types(self, env):
+        self.test = self.test.analyse_types(env).coerce_to_boolean(env)
+        self.true_val = self.true_val.analyse_types(env)
+        self.false_val = self.false_val.analyse_types(env)
+        return self.analyse_result_type(env)
+
+    def analyse_result_type(self, env):
+        self.type = PyrexTypes.independent_spanning_type(
+            self.true_val.type, self.false_val.type)
         if self.type.is_reference:
             self.type = PyrexTypes.CFakeReferenceType(self.type.ref_base_type)
-        if self.type.is_pyobject: 
-            self.result_ctype = py_object_type 
-        elif self.true_val.is_ephemeral() or self.false_val.is_ephemeral(): 
-            error(self.pos, "Unsafe C derivative of temporary Python reference used in conditional expression") 
-        if self.true_val.type.is_pyobject or self.false_val.type.is_pyobject: 
-            self.true_val = self.true_val.coerce_to(self.type, env) 
-            self.false_val = self.false_val.coerce_to(self.type, env) 
+        if self.type.is_pyobject:
+            self.result_ctype = py_object_type
+        elif self.true_val.is_ephemeral() or self.false_val.is_ephemeral():
+            error(self.pos, "Unsafe C derivative of temporary Python reference used in conditional expression")
+        if self.true_val.type.is_pyobject or self.false_val.type.is_pyobject:
+            self.true_val = self.true_val.coerce_to(self.type, env)
+            self.false_val = self.false_val.coerce_to(self.type, env)
         if self.type.is_error:
-            self.type_error() 
-        return self 
- 
+            self.type_error()
+        return self
+
     def coerce_to_integer(self, env):
         self.true_val = self.true_val.coerce_to_integer(env)
         self.false_val = self.false_val.coerce_to_integer(env)
         self.result_ctype = None
         return self.analyse_result_type(env)
 
-    def coerce_to(self, dst_type, env): 
-        self.true_val = self.true_val.coerce_to(dst_type, env) 
-        self.false_val = self.false_val.coerce_to(dst_type, env) 
-        self.result_ctype = None 
-        return self.analyse_result_type(env) 
- 
-    def type_error(self): 
-        if not (self.true_val.type.is_error or self.false_val.type.is_error): 
-            error(self.pos, "Incompatible types in conditional expression (%s; %s)" % 
-                (self.true_val.type, self.false_val.type)) 
-        self.type = PyrexTypes.error_type 
- 
-    def check_const(self): 
-        return (self.test.check_const() 
-            and self.true_val.check_const() 
-            and self.false_val.check_const()) 
- 
-    def generate_evaluation_code(self, code): 
-        # Because subexprs may not be evaluated we can use a more optimal 
-        # subexpr allocation strategy than the default, so override evaluation_code. 
- 
-        code.mark_pos(self.pos) 
-        self.allocate_temp_result(code) 
-        self.test.generate_evaluation_code(code) 
-        code.putln("if (%s) {" % self.test.result()) 
-        self.eval_and_get(code, self.true_val) 
-        code.putln("} else {") 
-        self.eval_and_get(code, self.false_val) 
-        code.putln("}") 
-        self.test.generate_disposal_code(code) 
-        self.test.free_temps(code) 
- 
-    def eval_and_get(self, code, expr): 
-        expr.generate_evaluation_code(code) 
-        if self.type.is_memoryviewslice: 
-            expr.make_owned_memoryviewslice(code) 
-        else: 
-            expr.make_owned_reference(code) 
-        code.putln('%s = %s;' % (self.result(), expr.result_as(self.ctype()))) 
-        expr.generate_post_assignment_code(code) 
-        expr.free_temps(code) 
- 
-    def generate_subexpr_disposal_code(self, code): 
-        pass  # done explicitly above (cleanup must separately happen within the if/else blocks) 
- 
-    def free_subexpr_temps(self, code): 
-        pass  # done explicitly above (cleanup must separately happen within the if/else blocks) 
- 
- 
-richcmp_constants = { 
-    "<" : "Py_LT", 
-    "<=": "Py_LE", 
-    "==": "Py_EQ", 
-    "!=": "Py_NE", 
-    "<>": "Py_NE", 
-    ">" : "Py_GT", 
-    ">=": "Py_GE", 
-    # the following are faked by special compare functions 
-    "in"    : "Py_EQ", 
-    "not_in": "Py_NE", 
-} 
- 
-class CmpNode(object): 
-    #  Mixin class containing code common to PrimaryCmpNodes 
-    #  and CascadedCmpNodes. 
- 
-    special_bool_cmp_function = None 
-    special_bool_cmp_utility_code = None 
- 
-    def infer_type(self, env): 
-        # TODO: Actually implement this (after merging with -unstable). 
-        return py_object_type 
- 
-    def calculate_cascaded_constant_result(self, operand1_result): 
-        func = compile_time_binary_operators[self.operator] 
-        operand2_result = self.operand2.constant_result 
+    def coerce_to(self, dst_type, env):
+        self.true_val = self.true_val.coerce_to(dst_type, env)
+        self.false_val = self.false_val.coerce_to(dst_type, env)
+        self.result_ctype = None
+        return self.analyse_result_type(env)
+
+    def type_error(self):
+        if not (self.true_val.type.is_error or self.false_val.type.is_error):
+            error(self.pos, "Incompatible types in conditional expression (%s; %s)" %
+                (self.true_val.type, self.false_val.type))
+        self.type = PyrexTypes.error_type
+
+    def check_const(self):
+        return (self.test.check_const()
+            and self.true_val.check_const()
+            and self.false_val.check_const())
+
+    def generate_evaluation_code(self, code):
+        # Because subexprs may not be evaluated we can use a more optimal
+        # subexpr allocation strategy than the default, so override evaluation_code.
+
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+        self.test.generate_evaluation_code(code)
+        code.putln("if (%s) {" % self.test.result())
+        self.eval_and_get(code, self.true_val)
+        code.putln("} else {")
+        self.eval_and_get(code, self.false_val)
+        code.putln("}")
+        self.test.generate_disposal_code(code)
+        self.test.free_temps(code)
+
+    def eval_and_get(self, code, expr):
+        expr.generate_evaluation_code(code)
+        if self.type.is_memoryviewslice:
+            expr.make_owned_memoryviewslice(code)
+        else:
+            expr.make_owned_reference(code)
+        code.putln('%s = %s;' % (self.result(), expr.result_as(self.ctype())))
+        expr.generate_post_assignment_code(code)
+        expr.free_temps(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        pass  # done explicitly above (cleanup must separately happen within the if/else blocks)
+
+    def free_subexpr_temps(self, code):
+        pass  # done explicitly above (cleanup must separately happen within the if/else blocks)
+
+
+richcmp_constants = {
+    "<" : "Py_LT",
+    "<=": "Py_LE",
+    "==": "Py_EQ",
+    "!=": "Py_NE",
+    "<>": "Py_NE",
+    ">" : "Py_GT",
+    ">=": "Py_GE",
+    # the following are faked by special compare functions
+    "in"    : "Py_EQ",
+    "not_in": "Py_NE",
+}
+
+class CmpNode(object):
+    #  Mixin class containing code common to PrimaryCmpNodes
+    #  and CascadedCmpNodes.
+
+    special_bool_cmp_function = None
+    special_bool_cmp_utility_code = None
+
+    def infer_type(self, env):
+        # TODO: Actually implement this (after merging with -unstable).
+        return py_object_type
+
+    def calculate_cascaded_constant_result(self, operand1_result):
+        func = compile_time_binary_operators[self.operator]
+        operand2_result = self.operand2.constant_result
         if (isinstance(operand1_result, any_string_type) and
                 isinstance(operand2_result, any_string_type) and
-                type(operand1_result) != type(operand2_result)): 
-            # string comparison of different types isn't portable 
-            return 
- 
-        if self.operator in ('in', 'not_in'): 
-            if isinstance(self.operand2, (ListNode, TupleNode, SetNode)): 
-                if not self.operand2.args: 
-                    self.constant_result = self.operator == 'not_in' 
-                    return 
-                elif isinstance(self.operand2, ListNode) and not self.cascade: 
-                    # tuples are more efficient to store than lists 
-                    self.operand2 = self.operand2.as_tuple() 
-            elif isinstance(self.operand2, DictNode): 
-                if not self.operand2.key_value_pairs: 
-                    self.constant_result = self.operator == 'not_in' 
-                    return 
- 
-        self.constant_result = func(operand1_result, operand2_result) 
- 
-    def cascaded_compile_time_value(self, operand1, denv): 
-        func = get_compile_time_binop(self) 
-        operand2 = self.operand2.compile_time_value(denv) 
-        try: 
-            result = func(operand1, operand2) 
+                type(operand1_result) != type(operand2_result)):
+            # string comparison of different types isn't portable
+            return
+
+        if self.operator in ('in', 'not_in'):
+            if isinstance(self.operand2, (ListNode, TupleNode, SetNode)):
+                if not self.operand2.args:
+                    self.constant_result = self.operator == 'not_in'
+                    return
+                elif isinstance(self.operand2, ListNode) and not self.cascade:
+                    # tuples are more efficient to store than lists
+                    self.operand2 = self.operand2.as_tuple()
+            elif isinstance(self.operand2, DictNode):
+                if not self.operand2.key_value_pairs:
+                    self.constant_result = self.operator == 'not_in'
+                    return
+
+        self.constant_result = func(operand1_result, operand2_result)
+
+    def cascaded_compile_time_value(self, operand1, denv):
+        func = get_compile_time_binop(self)
+        operand2 = self.operand2.compile_time_value(denv)
+        try:
+            result = func(operand1, operand2)
         except Exception as e:
-            self.compile_time_value_error(e) 
-            result = None 
-        if result: 
-            cascade = self.cascade 
-            if cascade: 
-                result = result and cascade.cascaded_compile_time_value(operand2, denv) 
-        return result 
- 
-    def is_cpp_comparison(self): 
-        return self.operand1.type.is_cpp_class or self.operand2.type.is_cpp_class 
- 
-    def find_common_int_type(self, env, op, operand1, operand2): 
-        # type1 != type2 and at least one of the types is not a C int 
-        type1 = operand1.type 
-        type2 = operand2.type 
-        type1_can_be_int = False 
-        type2_can_be_int = False 
- 
-        if operand1.is_string_literal and operand1.can_coerce_to_char_literal(): 
-            type1_can_be_int = True 
-        if operand2.is_string_literal and operand2.can_coerce_to_char_literal(): 
-            type2_can_be_int = True 
- 
-        if type1.is_int: 
-            if type2_can_be_int: 
-                return type1 
-        elif type2.is_int: 
-            if type1_can_be_int: 
-                return type2 
-        elif type1_can_be_int: 
-            if type2_can_be_int: 
-                if Builtin.unicode_type in (type1, type2): 
-                    return PyrexTypes.c_py_ucs4_type 
-                else: 
-                    return PyrexTypes.c_uchar_type 
- 
-        return None 
- 
-    def find_common_type(self, env, op, operand1, common_type=None): 
-        operand2 = self.operand2 
-        type1 = operand1.type 
-        type2 = operand2.type 
- 
-        new_common_type = None 
- 
-        # catch general errors 
+            self.compile_time_value_error(e)
+            result = None
+        if result:
+            cascade = self.cascade
+            if cascade:
+                result = result and cascade.cascaded_compile_time_value(operand2, denv)
+        return result
+
+    def is_cpp_comparison(self):
+        return self.operand1.type.is_cpp_class or self.operand2.type.is_cpp_class
+
+    def find_common_int_type(self, env, op, operand1, operand2):
+        # type1 != type2 and at least one of the types is not a C int
+        type1 = operand1.type
+        type2 = operand2.type
+        type1_can_be_int = False
+        type2_can_be_int = False
+
+        if operand1.is_string_literal and operand1.can_coerce_to_char_literal():
+            type1_can_be_int = True
+        if operand2.is_string_literal and operand2.can_coerce_to_char_literal():
+            type2_can_be_int = True
+
+        if type1.is_int:
+            if type2_can_be_int:
+                return type1
+        elif type2.is_int:
+            if type1_can_be_int:
+                return type2
+        elif type1_can_be_int:
+            if type2_can_be_int:
+                if Builtin.unicode_type in (type1, type2):
+                    return PyrexTypes.c_py_ucs4_type
+                else:
+                    return PyrexTypes.c_uchar_type
+
+        return None
+
+    def find_common_type(self, env, op, operand1, common_type=None):
+        operand2 = self.operand2
+        type1 = operand1.type
+        type2 = operand2.type
+
+        new_common_type = None
+
+        # catch general errors
         if (type1 == str_type and (type2.is_string or type2 in (bytes_type, unicode_type)) or
                 type2 == str_type and (type1.is_string or type1 in (bytes_type, unicode_type))):
-            error(self.pos, "Comparisons between bytes/unicode and str are not portable to Python 3") 
-            new_common_type = error_type 
- 
-        # try to use numeric comparisons where possible 
-        elif type1.is_complex or type2.is_complex: 
+            error(self.pos, "Comparisons between bytes/unicode and str are not portable to Python 3")
+            new_common_type = error_type
+
+        # try to use numeric comparisons where possible
+        elif type1.is_complex or type2.is_complex:
             if (op not in ('==', '!=')
                     and (type1.is_complex or type1.is_numeric)
                     and (type2.is_complex or type2.is_numeric)):
-                error(self.pos, "complex types are unordered") 
-                new_common_type = error_type 
-            elif type1.is_pyobject: 
+                error(self.pos, "complex types are unordered")
+                new_common_type = error_type
+            elif type1.is_pyobject:
                 new_common_type = Builtin.complex_type if type1.subtype_of(Builtin.complex_type) else py_object_type
-            elif type2.is_pyobject: 
+            elif type2.is_pyobject:
                 new_common_type = Builtin.complex_type if type2.subtype_of(Builtin.complex_type) else py_object_type
-            else: 
-                new_common_type = PyrexTypes.widest_numeric_type(type1, type2) 
-        elif type1.is_numeric and type2.is_numeric: 
-            new_common_type = PyrexTypes.widest_numeric_type(type1, type2) 
-        elif common_type is None or not common_type.is_pyobject: 
-            new_common_type = self.find_common_int_type(env, op, operand1, operand2) 
- 
-        if new_common_type is None: 
-            # fall back to generic type compatibility tests 
+            else:
+                new_common_type = PyrexTypes.widest_numeric_type(type1, type2)
+        elif type1.is_numeric and type2.is_numeric:
+            new_common_type = PyrexTypes.widest_numeric_type(type1, type2)
+        elif common_type is None or not common_type.is_pyobject:
+            new_common_type = self.find_common_int_type(env, op, operand1, operand2)
+
+        if new_common_type is None:
+            # fall back to generic type compatibility tests
             if type1.is_ctuple or type2.is_ctuple:
                 new_common_type = py_object_type
             elif type1 == type2:
-                new_common_type = type1 
-            elif type1.is_pyobject or type2.is_pyobject: 
-                if type2.is_numeric or type2.is_string: 
-                    if operand2.check_for_coercion_error(type1, env): 
-                        new_common_type = error_type 
-                    else: 
-                        new_common_type = py_object_type 
-                elif type1.is_numeric or type1.is_string: 
-                    if operand1.check_for_coercion_error(type2, env): 
-                        new_common_type = error_type 
-                    else: 
-                        new_common_type = py_object_type 
-                elif py_object_type.assignable_from(type1) and py_object_type.assignable_from(type2): 
-                    new_common_type = py_object_type 
-                else: 
-                    # one Python type and one non-Python type, not assignable 
-                    self.invalid_types_error(operand1, op, operand2) 
-                    new_common_type = error_type 
-            elif type1.assignable_from(type2): 
-                new_common_type = type1 
-            elif type2.assignable_from(type1): 
-                new_common_type = type2 
-            else: 
-                # C types that we couldn't handle up to here are an error 
-                self.invalid_types_error(operand1, op, operand2) 
-                new_common_type = error_type 
- 
-        if new_common_type.is_string and (isinstance(operand1, BytesNode) or 
-                                          isinstance(operand2, BytesNode)): 
-            # special case when comparing char* to bytes literal: must 
-            # compare string values! 
-            new_common_type = bytes_type 
- 
-        # recursively merge types 
-        if common_type is None or new_common_type.is_error: 
-            common_type = new_common_type 
-        else: 
-            # we could do a lot better by splitting the comparison 
-            # into a non-Python part and a Python part, but this is 
-            # safer for now 
-            common_type = PyrexTypes.spanning_type(common_type, new_common_type) 
- 
-        if self.cascade: 
-            common_type = self.cascade.find_common_type(env, self.operator, operand2, common_type) 
- 
-        return common_type 
- 
-    def invalid_types_error(self, operand1, op, operand2): 
-        error(self.pos, "Invalid types for '%s' (%s, %s)" % 
-              (op, operand1.type, operand2.type)) 
- 
-    def is_python_comparison(self): 
-        return (not self.is_ptr_contains() 
-            and not self.is_c_string_contains() 
-            and (self.has_python_operands() 
-                 or (self.cascade and self.cascade.is_python_comparison()) 
-                 or self.operator in ('in', 'not_in'))) 
- 
-    def coerce_operands_to(self, dst_type, env): 
-        operand2 = self.operand2 
-        if operand2.type != dst_type: 
-            self.operand2 = operand2.coerce_to(dst_type, env) 
-        if self.cascade: 
-            self.cascade.coerce_operands_to(dst_type, env) 
- 
-    def is_python_result(self): 
-        return ((self.has_python_operands() and 
-                 self.special_bool_cmp_function is None and 
-                 self.operator not in ('is', 'is_not', 'in', 'not_in') and 
-                 not self.is_c_string_contains() and 
-                 not self.is_ptr_contains()) 
-            or (self.cascade and self.cascade.is_python_result())) 
- 
-    def is_c_string_contains(self): 
-        return self.operator in ('in', 'not_in') and \ 
-               ((self.operand1.type.is_int 
-                 and (self.operand2.type.is_string or self.operand2.type is bytes_type)) or 
-                (self.operand1.type.is_unicode_char 
-                 and self.operand2.type is unicode_type)) 
- 
-    def is_ptr_contains(self): 
-        if self.operator in ('in', 'not_in'): 
-            container_type = self.operand2.type 
-            return (container_type.is_ptr or container_type.is_array) \ 
-                and not container_type.is_string 
- 
-    def find_special_bool_compare_function(self, env, operand1, result_is_bool=False): 
-        # note: currently operand1 must get coerced to a Python object if we succeed here! 
-        if self.operator in ('==', '!='): 
-            type1, type2 = operand1.type, self.operand2.type 
-            if result_is_bool or (type1.is_builtin_type and type2.is_builtin_type): 
-                if type1 is Builtin.unicode_type or type2 is Builtin.unicode_type: 
-                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("UnicodeEquals", "StringTools.c") 
-                    self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals" 
-                    return True 
-                elif type1 is Builtin.bytes_type or type2 is Builtin.bytes_type: 
-                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("BytesEquals", "StringTools.c") 
-                    self.special_bool_cmp_function = "__Pyx_PyBytes_Equals" 
-                    return True 
-                elif type1 is Builtin.basestring_type or type2 is Builtin.basestring_type: 
-                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("UnicodeEquals", "StringTools.c") 
-                    self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals" 
-                    return True 
-                elif type1 is Builtin.str_type or type2 is Builtin.str_type: 
-                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("StrEquals", "StringTools.c") 
-                    self.special_bool_cmp_function = "__Pyx_PyString_Equals" 
-                    return True 
-        elif self.operator in ('in', 'not_in'): 
-            if self.operand2.type is Builtin.dict_type: 
-                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable") 
-                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PyDictContains", "ObjectHandling.c") 
+                new_common_type = type1
+            elif type1.is_pyobject or type2.is_pyobject:
+                if type2.is_numeric or type2.is_string:
+                    if operand2.check_for_coercion_error(type1, env):
+                        new_common_type = error_type
+                    else:
+                        new_common_type = py_object_type
+                elif type1.is_numeric or type1.is_string:
+                    if operand1.check_for_coercion_error(type2, env):
+                        new_common_type = error_type
+                    else:
+                        new_common_type = py_object_type
+                elif py_object_type.assignable_from(type1) and py_object_type.assignable_from(type2):
+                    new_common_type = py_object_type
+                else:
+                    # one Python type and one non-Python type, not assignable
+                    self.invalid_types_error(operand1, op, operand2)
+                    new_common_type = error_type
+            elif type1.assignable_from(type2):
+                new_common_type = type1
+            elif type2.assignable_from(type1):
+                new_common_type = type2
+            else:
+                # C types that we couldn't handle up to here are an error
+                self.invalid_types_error(operand1, op, operand2)
+                new_common_type = error_type
+
+        if new_common_type.is_string and (isinstance(operand1, BytesNode) or
+                                          isinstance(operand2, BytesNode)):
+            # special case when comparing char* to bytes literal: must
+            # compare string values!
+            new_common_type = bytes_type
+
+        # recursively merge types
+        if common_type is None or new_common_type.is_error:
+            common_type = new_common_type
+        else:
+            # we could do a lot better by splitting the comparison
+            # into a non-Python part and a Python part, but this is
+            # safer for now
+            common_type = PyrexTypes.spanning_type(common_type, new_common_type)
+
+        if self.cascade:
+            common_type = self.cascade.find_common_type(env, self.operator, operand2, common_type)
+
+        return common_type
+
+    def invalid_types_error(self, operand1, op, operand2):
+        error(self.pos, "Invalid types for '%s' (%s, %s)" %
+              (op, operand1.type, operand2.type))
+
+    def is_python_comparison(self):
+        return (not self.is_ptr_contains()
+            and not self.is_c_string_contains()
+            and (self.has_python_operands()
+                 or (self.cascade and self.cascade.is_python_comparison())
+                 or self.operator in ('in', 'not_in')))
+
+    def coerce_operands_to(self, dst_type, env):
+        operand2 = self.operand2
+        if operand2.type != dst_type:
+            self.operand2 = operand2.coerce_to(dst_type, env)
+        if self.cascade:
+            self.cascade.coerce_operands_to(dst_type, env)
+
+    def is_python_result(self):
+        return ((self.has_python_operands() and
+                 self.special_bool_cmp_function is None and
+                 self.operator not in ('is', 'is_not', 'in', 'not_in') and
+                 not self.is_c_string_contains() and
+                 not self.is_ptr_contains())
+            or (self.cascade and self.cascade.is_python_result()))
+
+    def is_c_string_contains(self):
+        return self.operator in ('in', 'not_in') and \
+               ((self.operand1.type.is_int
+                 and (self.operand2.type.is_string or self.operand2.type is bytes_type)) or
+                (self.operand1.type.is_unicode_char
+                 and self.operand2.type is unicode_type))
+
+    def is_ptr_contains(self):
+        if self.operator in ('in', 'not_in'):
+            container_type = self.operand2.type
+            return (container_type.is_ptr or container_type.is_array) \
+                and not container_type.is_string
+
+    def find_special_bool_compare_function(self, env, operand1, result_is_bool=False):
+        # note: currently operand1 must get coerced to a Python object if we succeed here!
+        if self.operator in ('==', '!='):
+            type1, type2 = operand1.type, self.operand2.type
+            if result_is_bool or (type1.is_builtin_type and type2.is_builtin_type):
+                if type1 is Builtin.unicode_type or type2 is Builtin.unicode_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("UnicodeEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals"
+                    return True
+                elif type1 is Builtin.bytes_type or type2 is Builtin.bytes_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("BytesEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyBytes_Equals"
+                    return True
+                elif type1 is Builtin.basestring_type or type2 is Builtin.basestring_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("UnicodeEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals"
+                    return True
+                elif type1 is Builtin.str_type or type2 is Builtin.str_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("StrEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyString_Equals"
+                    return True
+        elif self.operator in ('in', 'not_in'):
+            if self.operand2.type is Builtin.dict_type:
+                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PyDictContains", "ObjectHandling.c")
                 self.special_bool_cmp_function = "__Pyx_PyDict_ContainsTF"
-                return True 
+                return True
             elif self.operand2.type is Builtin.set_type:
                 self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
                 self.special_bool_cmp_utility_code = UtilityCode.load_cached("PySetContains", "ObjectHandling.c")
                 self.special_bool_cmp_function = "__Pyx_PySet_ContainsTF"
                 return True
-            elif self.operand2.type is Builtin.unicode_type: 
-                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable") 
-                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PyUnicodeContains", "StringTools.c") 
+            elif self.operand2.type is Builtin.unicode_type:
+                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PyUnicodeContains", "StringTools.c")
                 self.special_bool_cmp_function = "__Pyx_PyUnicode_ContainsTF"
-                return True 
-            else: 
-                if not self.operand2.type.is_pyobject: 
-                    self.operand2 = self.operand2.coerce_to_pyobject(env) 
-                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PySequenceContains", "ObjectHandling.c") 
+                return True
+            else:
+                if not self.operand2.type.is_pyobject:
+                    self.operand2 = self.operand2.coerce_to_pyobject(env)
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PySequenceContains", "ObjectHandling.c")
                 self.special_bool_cmp_function = "__Pyx_PySequence_ContainsTF"
-                return True 
-        return False 
- 
-    def generate_operation_code(self, code, result_code, 
-            operand1, op , operand2): 
-        if self.type.is_pyobject: 
-            error_clause = code.error_goto_if_null 
-            got_ref = "__Pyx_XGOTREF(%s); " % result_code 
-            if self.special_bool_cmp_function: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("PyBoolOrNullFromLong", "ObjectHandling.c")) 
-                coerce_result = "__Pyx_PyBoolOrNull_FromLong" 
-            else: 
-                coerce_result = "__Pyx_PyBool_FromLong" 
-        else: 
-            error_clause = code.error_goto_if_neg 
-            got_ref = "" 
-            coerce_result = "" 
- 
-        if self.special_bool_cmp_function: 
-            if operand1.type.is_pyobject: 
-                result1 = operand1.py_result() 
-            else: 
-                result1 = operand1.result() 
-            if operand2.type.is_pyobject: 
-                result2 = operand2.py_result() 
-            else: 
-                result2 = operand2.result() 
-            if self.special_bool_cmp_utility_code: 
-                code.globalstate.use_utility_code(self.special_bool_cmp_utility_code) 
-            code.putln( 
-                "%s = %s(%s(%s, %s, %s)); %s%s" % ( 
-                    result_code, 
-                    coerce_result, 
-                    self.special_bool_cmp_function, 
-                    result1, result2, richcmp_constants[op], 
-                    got_ref, 
-                    error_clause(result_code, self.pos))) 
- 
-        elif operand1.type.is_pyobject and op not in ('is', 'is_not'): 
-            assert op not in ('in', 'not_in'), op 
-            code.putln("%s = PyObject_RichCompare(%s, %s, %s); %s%s" % ( 
-                    result_code, 
-                    operand1.py_result(), 
-                    operand2.py_result(), 
-                    richcmp_constants[op], 
-                    got_ref, 
-                    error_clause(result_code, self.pos))) 
- 
-        elif operand1.type.is_complex: 
-            code.putln("%s = %s(%s%s(%s, %s));" % ( 
-                result_code, 
-                coerce_result, 
-                op == "!=" and "!" or "", 
-                operand1.type.unary_op('eq'), 
-                operand1.result(), 
-                operand2.result())) 
- 
-        else: 
-            type1 = operand1.type 
-            type2 = operand2.type 
-            if (type1.is_extension_type or type2.is_extension_type) \ 
-                    and not type1.same_as(type2): 
-                common_type = py_object_type 
-            elif type1.is_numeric: 
-                common_type = PyrexTypes.widest_numeric_type(type1, type2) 
-            else: 
-                common_type = type1 
-            code1 = operand1.result_as(common_type) 
-            code2 = operand2.result_as(common_type) 
+                return True
+        return False
+
+    def generate_operation_code(self, code, result_code,
+            operand1, op , operand2):
+        if self.type.is_pyobject:
+            error_clause = code.error_goto_if_null
+            got_ref = "__Pyx_XGOTREF(%s); " % result_code
+            if self.special_bool_cmp_function:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyBoolOrNullFromLong", "ObjectHandling.c"))
+                coerce_result = "__Pyx_PyBoolOrNull_FromLong"
+            else:
+                coerce_result = "__Pyx_PyBool_FromLong"
+        else:
+            error_clause = code.error_goto_if_neg
+            got_ref = ""
+            coerce_result = ""
+
+        if self.special_bool_cmp_function:
+            if operand1.type.is_pyobject:
+                result1 = operand1.py_result()
+            else:
+                result1 = operand1.result()
+            if operand2.type.is_pyobject:
+                result2 = operand2.py_result()
+            else:
+                result2 = operand2.result()
+            if self.special_bool_cmp_utility_code:
+                code.globalstate.use_utility_code(self.special_bool_cmp_utility_code)
+            code.putln(
+                "%s = %s(%s(%s, %s, %s)); %s%s" % (
+                    result_code,
+                    coerce_result,
+                    self.special_bool_cmp_function,
+                    result1, result2, richcmp_constants[op],
+                    got_ref,
+                    error_clause(result_code, self.pos)))
+
+        elif operand1.type.is_pyobject and op not in ('is', 'is_not'):
+            assert op not in ('in', 'not_in'), op
+            code.putln("%s = PyObject_RichCompare(%s, %s, %s); %s%s" % (
+                    result_code,
+                    operand1.py_result(),
+                    operand2.py_result(),
+                    richcmp_constants[op],
+                    got_ref,
+                    error_clause(result_code, self.pos)))
+
+        elif operand1.type.is_complex:
+            code.putln("%s = %s(%s%s(%s, %s));" % (
+                result_code,
+                coerce_result,
+                op == "!=" and "!" or "",
+                operand1.type.unary_op('eq'),
+                operand1.result(),
+                operand2.result()))
+
+        else:
+            type1 = operand1.type
+            type2 = operand2.type
+            if (type1.is_extension_type or type2.is_extension_type) \
+                    and not type1.same_as(type2):
+                common_type = py_object_type
+            elif type1.is_numeric:
+                common_type = PyrexTypes.widest_numeric_type(type1, type2)
+            else:
+                common_type = type1
+            code1 = operand1.result_as(common_type)
+            code2 = operand2.result_as(common_type)
             statement = "%s = %s(%s %s %s);" % (
-                result_code, 
-                coerce_result, 
-                code1, 
-                self.c_operator(op), 
+                result_code,
+                coerce_result,
+                code1,
+                self.c_operator(op),
                 code2)
             if self.is_cpp_comparison() and self.exception_check == '+':
                 translate_cpp_exception(
@@ -12517,36 +12517,36 @@ class CmpNode(object):
                     self.in_nogil_context)
             else:
                 code.putln(statement)
- 
-    def c_operator(self, op): 
-        if op == 'is': 
-            return "==" 
-        elif op == 'is_not': 
-            return "!=" 
-        else: 
-            return op 
- 
-class PrimaryCmpNode(ExprNode, CmpNode): 
-    #  Non-cascaded comparison or first comparison of 
-    #  a cascaded sequence. 
-    # 
-    #  operator      string 
-    #  operand1      ExprNode 
-    #  operand2      ExprNode 
-    #  cascade       CascadedCmpNode 
- 
-    #  We don't use the subexprs mechanism, because 
-    #  things here are too complicated for it to handle. 
-    #  Instead, we override all the framework methods 
-    #  which use it. 
- 
-    child_attrs = ['operand1', 'operand2', 'coerced_operand2', 'cascade'] 
- 
-    cascade = None 
-    coerced_operand2 = None 
-    is_memslice_nonecheck = False 
- 
-    def infer_type(self, env): 
+
+    def c_operator(self, op):
+        if op == 'is':
+            return "=="
+        elif op == 'is_not':
+            return "!="
+        else:
+            return op
+
+class PrimaryCmpNode(ExprNode, CmpNode):
+    #  Non-cascaded comparison or first comparison of
+    #  a cascaded sequence.
+    #
+    #  operator      string
+    #  operand1      ExprNode
+    #  operand2      ExprNode
+    #  cascade       CascadedCmpNode
+
+    #  We don't use the subexprs mechanism, because
+    #  things here are too complicated for it to handle.
+    #  Instead, we override all the framework methods
+    #  which use it.
+
+    child_attrs = ['operand1', 'operand2', 'coerced_operand2', 'cascade']
+
+    cascade = None
+    coerced_operand2 = None
+    is_memslice_nonecheck = False
+
+    def infer_type(self, env):
         type1 = self.operand1.infer_type(env)
         type2 = self.operand2.infer_type(env)
 
@@ -12555,28 +12555,28 @@ class PrimaryCmpNode(ExprNode, CmpNode):
                 return PythranExpr(pythran_binop_type(self.operator, type1, type2))
 
         # TODO: implement this for other types.
-        return py_object_type 
- 
-    def type_dependencies(self, env): 
-        return () 
- 
-    def calculate_constant_result(self): 
-        assert not self.cascade 
-        self.calculate_cascaded_constant_result(self.operand1.constant_result) 
- 
-    def compile_time_value(self, denv): 
-        operand1 = self.operand1.compile_time_value(denv) 
-        return self.cascaded_compile_time_value(operand1, denv) 
- 
-    def analyse_types(self, env): 
-        self.operand1 = self.operand1.analyse_types(env) 
-        self.operand2 = self.operand2.analyse_types(env) 
-        if self.is_cpp_comparison(): 
-            self.analyse_cpp_comparison(env) 
-            if self.cascade: 
-                error(self.pos, "Cascading comparison not yet supported for cpp types.") 
-            return self 
- 
+        return py_object_type
+
+    def type_dependencies(self, env):
+        return ()
+
+    def calculate_constant_result(self):
+        assert not self.cascade
+        self.calculate_cascaded_constant_result(self.operand1.constant_result)
+
+    def compile_time_value(self, denv):
+        operand1 = self.operand1.compile_time_value(denv)
+        return self.cascaded_compile_time_value(operand1, denv)
+
+    def analyse_types(self, env):
+        self.operand1 = self.operand1.analyse_types(env)
+        self.operand2 = self.operand2.analyse_types(env)
+        if self.is_cpp_comparison():
+            self.analyse_cpp_comparison(env)
+            if self.cascade:
+                error(self.pos, "Cascading comparison not yet supported for cpp types.")
+            return self
+
         type1 = self.operand1.type
         type2 = self.operand2.type
         if is_pythran_expr(type1) or is_pythran_expr(type2):
@@ -12585,171 +12585,171 @@ class PrimaryCmpNode(ExprNode, CmpNode):
                 self.is_pycmp = False
                 return self
 
-        if self.analyse_memoryviewslice_comparison(env): 
-            return self 
- 
-        if self.cascade: 
-            self.cascade = self.cascade.analyse_types(env) 
- 
-        if self.operator in ('in', 'not_in'): 
-            if self.is_c_string_contains(): 
-                self.is_pycmp = False 
-                common_type = None 
-                if self.cascade: 
-                    error(self.pos, "Cascading comparison not yet supported for 'int_val in string'.") 
-                    return self 
-                if self.operand2.type is unicode_type: 
-                    env.use_utility_code(UtilityCode.load_cached("PyUCS4InUnicode", "StringTools.c")) 
-                else: 
-                    if self.operand1.type is PyrexTypes.c_uchar_type: 
-                        self.operand1 = self.operand1.coerce_to(PyrexTypes.c_char_type, env) 
-                    if self.operand2.type is not bytes_type: 
-                        self.operand2 = self.operand2.coerce_to(bytes_type, env) 
-                    env.use_utility_code(UtilityCode.load_cached("BytesContains", "StringTools.c")) 
-                self.operand2 = self.operand2.as_none_safe_node( 
-                    "argument of type 'NoneType' is not iterable") 
-            elif self.is_ptr_contains(): 
-                if self.cascade: 
-                    error(self.pos, "Cascading comparison not supported for 'val in sliced pointer'.") 
-                self.type = PyrexTypes.c_bint_type 
-                # Will be transformed by IterationTransform 
-                return self 
-            elif self.find_special_bool_compare_function(env, self.operand1): 
-                if not self.operand1.type.is_pyobject: 
-                    self.operand1 = self.operand1.coerce_to_pyobject(env) 
-                common_type = None # if coercion needed, the method call above has already done it 
-                self.is_pycmp = False # result is bint 
-            else: 
-                common_type = py_object_type 
-                self.is_pycmp = True 
-        elif self.find_special_bool_compare_function(env, self.operand1): 
-            if not self.operand1.type.is_pyobject: 
-                self.operand1 = self.operand1.coerce_to_pyobject(env) 
-            common_type = None # if coercion needed, the method call above has already done it 
-            self.is_pycmp = False # result is bint 
-        else: 
-            common_type = self.find_common_type(env, self.operator, self.operand1) 
-            self.is_pycmp = common_type.is_pyobject 
- 
-        if common_type is not None and not common_type.is_error: 
-            if self.operand1.type != common_type: 
-                self.operand1 = self.operand1.coerce_to(common_type, env) 
-            self.coerce_operands_to(common_type, env) 
- 
-        if self.cascade: 
-            self.operand2 = self.operand2.coerce_to_simple(env) 
-            self.cascade.coerce_cascaded_operands_to_temp(env) 
-            operand2 = self.cascade.optimise_comparison(self.operand2, env) 
-            if operand2 is not self.operand2: 
-                self.coerced_operand2 = operand2 
-        if self.is_python_result(): 
-            self.type = PyrexTypes.py_object_type 
-        else: 
-            self.type = PyrexTypes.c_bint_type 
-        cdr = self.cascade 
-        while cdr: 
-            cdr.type = self.type 
-            cdr = cdr.cascade 
-        if self.is_pycmp or self.cascade or self.special_bool_cmp_function: 
-            # 1) owned reference, 2) reused value, 3) potential function error return value 
-            self.is_temp = 1 
-        return self 
- 
-    def analyse_cpp_comparison(self, env): 
-        type1 = self.operand1.type 
-        type2 = self.operand2.type 
+        if self.analyse_memoryviewslice_comparison(env):
+            return self
+
+        if self.cascade:
+            self.cascade = self.cascade.analyse_types(env)
+
+        if self.operator in ('in', 'not_in'):
+            if self.is_c_string_contains():
+                self.is_pycmp = False
+                common_type = None
+                if self.cascade:
+                    error(self.pos, "Cascading comparison not yet supported for 'int_val in string'.")
+                    return self
+                if self.operand2.type is unicode_type:
+                    env.use_utility_code(UtilityCode.load_cached("PyUCS4InUnicode", "StringTools.c"))
+                else:
+                    if self.operand1.type is PyrexTypes.c_uchar_type:
+                        self.operand1 = self.operand1.coerce_to(PyrexTypes.c_char_type, env)
+                    if self.operand2.type is not bytes_type:
+                        self.operand2 = self.operand2.coerce_to(bytes_type, env)
+                    env.use_utility_code(UtilityCode.load_cached("BytesContains", "StringTools.c"))
+                self.operand2 = self.operand2.as_none_safe_node(
+                    "argument of type 'NoneType' is not iterable")
+            elif self.is_ptr_contains():
+                if self.cascade:
+                    error(self.pos, "Cascading comparison not supported for 'val in sliced pointer'.")
+                self.type = PyrexTypes.c_bint_type
+                # Will be transformed by IterationTransform
+                return self
+            elif self.find_special_bool_compare_function(env, self.operand1):
+                if not self.operand1.type.is_pyobject:
+                    self.operand1 = self.operand1.coerce_to_pyobject(env)
+                common_type = None # if coercion needed, the method call above has already done it
+                self.is_pycmp = False # result is bint
+            else:
+                common_type = py_object_type
+                self.is_pycmp = True
+        elif self.find_special_bool_compare_function(env, self.operand1):
+            if not self.operand1.type.is_pyobject:
+                self.operand1 = self.operand1.coerce_to_pyobject(env)
+            common_type = None # if coercion needed, the method call above has already done it
+            self.is_pycmp = False # result is bint
+        else:
+            common_type = self.find_common_type(env, self.operator, self.operand1)
+            self.is_pycmp = common_type.is_pyobject
+
+        if common_type is not None and not common_type.is_error:
+            if self.operand1.type != common_type:
+                self.operand1 = self.operand1.coerce_to(common_type, env)
+            self.coerce_operands_to(common_type, env)
+
+        if self.cascade:
+            self.operand2 = self.operand2.coerce_to_simple(env)
+            self.cascade.coerce_cascaded_operands_to_temp(env)
+            operand2 = self.cascade.optimise_comparison(self.operand2, env)
+            if operand2 is not self.operand2:
+                self.coerced_operand2 = operand2
+        if self.is_python_result():
+            self.type = PyrexTypes.py_object_type
+        else:
+            self.type = PyrexTypes.c_bint_type
+        cdr = self.cascade
+        while cdr:
+            cdr.type = self.type
+            cdr = cdr.cascade
+        if self.is_pycmp or self.cascade or self.special_bool_cmp_function:
+            # 1) owned reference, 2) reused value, 3) potential function error return value
+            self.is_temp = 1
+        return self
+
+    def analyse_cpp_comparison(self, env):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
         self.is_pycmp = False
-        entry = env.lookup_operator(self.operator, [self.operand1, self.operand2]) 
-        if entry is None: 
-            error(self.pos, "Invalid types for '%s' (%s, %s)" % 
-                (self.operator, type1, type2)) 
-            self.type = PyrexTypes.error_type 
-            self.result_code = "<error>" 
-            return 
-        func_type = entry.type 
-        if func_type.is_ptr: 
-            func_type = func_type.base_type 
+        entry = env.lookup_operator(self.operator, [self.operand1, self.operand2])
+        if entry is None:
+            error(self.pos, "Invalid types for '%s' (%s, %s)" %
+                (self.operator, type1, type2))
+            self.type = PyrexTypes.error_type
+            self.result_code = "<error>"
+            return
+        func_type = entry.type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
         self.exception_check = func_type.exception_check
         self.exception_value = func_type.exception_value
         if self.exception_check == '+':
             self.is_temp = True
             if self.exception_value is None:
                 env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
-        if len(func_type.args) == 1: 
-            self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env) 
-        else: 
-            self.operand1 = self.operand1.coerce_to(func_type.args[0].type, env) 
-            self.operand2 = self.operand2.coerce_to(func_type.args[1].type, env) 
-        self.type = func_type.return_type 
- 
-    def analyse_memoryviewslice_comparison(self, env): 
-        have_none = self.operand1.is_none or self.operand2.is_none 
-        have_slice = (self.operand1.type.is_memoryviewslice or 
-                      self.operand2.type.is_memoryviewslice) 
-        ops = ('==', '!=', 'is', 'is_not') 
-        if have_slice and have_none and self.operator in ops: 
-            self.is_pycmp = False 
-            self.type = PyrexTypes.c_bint_type 
-            self.is_memslice_nonecheck = True 
-            return True 
- 
-        return False 
- 
-    def coerce_to_boolean(self, env): 
-        if self.is_pycmp: 
-            # coercing to bool => may allow for more efficient comparison code 
-            if self.find_special_bool_compare_function( 
-                    env, self.operand1, result_is_bool=True): 
-                self.is_pycmp = False 
-                self.type = PyrexTypes.c_bint_type 
-                self.is_temp = 1 
-                if self.cascade: 
-                    operand2 = self.cascade.optimise_comparison( 
-                        self.operand2, env, result_is_bool=True) 
-                    if operand2 is not self.operand2: 
-                        self.coerced_operand2 = operand2 
-                return self 
-        # TODO: check if we can optimise parts of the cascade here 
-        return ExprNode.coerce_to_boolean(self, env) 
- 
-    def has_python_operands(self): 
-        return (self.operand1.type.is_pyobject 
-            or self.operand2.type.is_pyobject) 
- 
-    def check_const(self): 
-        if self.cascade: 
-            self.not_const() 
-            return False 
-        else: 
-            return self.operand1.check_const() and self.operand2.check_const() 
- 
-    def calculate_result_code(self): 
+        if len(func_type.args) == 1:
+            self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env)
+        else:
+            self.operand1 = self.operand1.coerce_to(func_type.args[0].type, env)
+            self.operand2 = self.operand2.coerce_to(func_type.args[1].type, env)
+        self.type = func_type.return_type
+
+    def analyse_memoryviewslice_comparison(self, env):
+        have_none = self.operand1.is_none or self.operand2.is_none
+        have_slice = (self.operand1.type.is_memoryviewslice or
+                      self.operand2.type.is_memoryviewslice)
+        ops = ('==', '!=', 'is', 'is_not')
+        if have_slice and have_none and self.operator in ops:
+            self.is_pycmp = False
+            self.type = PyrexTypes.c_bint_type
+            self.is_memslice_nonecheck = True
+            return True
+
+        return False
+
+    def coerce_to_boolean(self, env):
+        if self.is_pycmp:
+            # coercing to bool => may allow for more efficient comparison code
+            if self.find_special_bool_compare_function(
+                    env, self.operand1, result_is_bool=True):
+                self.is_pycmp = False
+                self.type = PyrexTypes.c_bint_type
+                self.is_temp = 1
+                if self.cascade:
+                    operand2 = self.cascade.optimise_comparison(
+                        self.operand2, env, result_is_bool=True)
+                    if operand2 is not self.operand2:
+                        self.coerced_operand2 = operand2
+                return self
+        # TODO: check if we can optimise parts of the cascade here
+        return ExprNode.coerce_to_boolean(self, env)
+
+    def has_python_operands(self):
+        return (self.operand1.type.is_pyobject
+            or self.operand2.type.is_pyobject)
+
+    def check_const(self):
+        if self.cascade:
+            self.not_const()
+            return False
+        else:
+            return self.operand1.check_const() and self.operand2.check_const()
+
+    def calculate_result_code(self):
         operand1, operand2 = self.operand1, self.operand2
         if operand1.type.is_complex:
-            if self.operator == "!=": 
-                negation = "!" 
-            else: 
-                negation = "" 
-            return "(%s%s(%s, %s))" % ( 
-                negation, 
+            if self.operator == "!=":
+                negation = "!"
+            else:
+                negation = ""
+            return "(%s%s(%s, %s))" % (
+                negation,
                 operand1.type.binary_op('=='),
                 operand1.result(),
                 operand2.result())
-        elif self.is_c_string_contains(): 
+        elif self.is_c_string_contains():
             if operand2.type is unicode_type:
-                method = "__Pyx_UnicodeContainsUCS4" 
-            else: 
-                method = "__Pyx_BytesContains" 
-            if self.operator == "not_in": 
-                negation = "!" 
-            else: 
-                negation = "" 
-            return "(%s%s(%s, %s))" % ( 
-                negation, 
-                method, 
+                method = "__Pyx_UnicodeContainsUCS4"
+            else:
+                method = "__Pyx_BytesContains"
+            if self.operator == "not_in":
+                negation = "!"
+            else:
+                negation = ""
+            return "(%s%s(%s, %s))" % (
+                negation,
+                method,
                 operand2.result(),
                 operand1.result())
-        else: 
+        else:
             if is_pythran_expr(self.type):
                 result1, result2 = operand1.pythran_result(), operand2.pythran_result()
             else:
@@ -12759,163 +12759,163 @@ class PrimaryCmpNode(ExprNode, CmpNode):
                         result1 = "((PyObject *) %s.memview)" % result1
                     else:
                         result2 = "((PyObject *) %s.memview)" % result2
- 
-            return "(%s %s %s)" % ( 
-                result1, 
-                self.c_operator(self.operator), 
-                result2) 
- 
-    def generate_evaluation_code(self, code): 
-        self.operand1.generate_evaluation_code(code) 
-        self.operand2.generate_evaluation_code(code) 
-        if self.is_temp: 
-            self.allocate_temp_result(code) 
-            self.generate_operation_code(code, self.result(), 
-                self.operand1, self.operator, self.operand2) 
-            if self.cascade: 
-                self.cascade.generate_evaluation_code( 
-                    code, self.result(), self.coerced_operand2 or self.operand2, 
-                    needs_evaluation=self.coerced_operand2 is not None) 
-            self.operand1.generate_disposal_code(code) 
-            self.operand1.free_temps(code) 
-            self.operand2.generate_disposal_code(code) 
-            self.operand2.free_temps(code) 
- 
-    def generate_subexpr_disposal_code(self, code): 
-        #  If this is called, it is a non-cascaded cmp, 
-        #  so only need to dispose of the two main operands. 
-        self.operand1.generate_disposal_code(code) 
-        self.operand2.generate_disposal_code(code) 
- 
-    def free_subexpr_temps(self, code): 
-        #  If this is called, it is a non-cascaded cmp, 
-        #  so only need to dispose of the two main operands. 
-        self.operand1.free_temps(code) 
-        self.operand2.free_temps(code) 
- 
-    def annotate(self, code): 
-        self.operand1.annotate(code) 
-        self.operand2.annotate(code) 
-        if self.cascade: 
-            self.cascade.annotate(code) 
- 
- 
-class CascadedCmpNode(Node, CmpNode): 
-    #  A CascadedCmpNode is not a complete expression node. It 
-    #  hangs off the side of another comparison node, shares 
-    #  its left operand with that node, and shares its result 
-    #  with the PrimaryCmpNode at the head of the chain. 
-    # 
-    #  operator      string 
-    #  operand2      ExprNode 
-    #  cascade       CascadedCmpNode 
- 
-    child_attrs = ['operand2', 'coerced_operand2', 'cascade'] 
- 
-    cascade = None 
-    coerced_operand2 = None 
-    constant_result = constant_value_not_set # FIXME: where to calculate this? 
- 
-    def infer_type(self, env): 
-        # TODO: Actually implement this (after merging with -unstable). 
-        return py_object_type 
- 
-    def type_dependencies(self, env): 
-        return () 
- 
-    def has_constant_result(self): 
-        return self.constant_result is not constant_value_not_set and \ 
-               self.constant_result is not not_a_constant 
- 
-    def analyse_types(self, env): 
-        self.operand2 = self.operand2.analyse_types(env) 
-        if self.cascade: 
-            self.cascade = self.cascade.analyse_types(env) 
-        return self 
- 
-    def has_python_operands(self): 
-        return self.operand2.type.is_pyobject 
- 
+
+            return "(%s %s %s)" % (
+                result1,
+                self.c_operator(self.operator),
+                result2)
+
+    def generate_evaluation_code(self, code):
+        self.operand1.generate_evaluation_code(code)
+        self.operand2.generate_evaluation_code(code)
+        if self.is_temp:
+            self.allocate_temp_result(code)
+            self.generate_operation_code(code, self.result(),
+                self.operand1, self.operator, self.operand2)
+            if self.cascade:
+                self.cascade.generate_evaluation_code(
+                    code, self.result(), self.coerced_operand2 or self.operand2,
+                    needs_evaluation=self.coerced_operand2 is not None)
+            self.operand1.generate_disposal_code(code)
+            self.operand1.free_temps(code)
+            self.operand2.generate_disposal_code(code)
+            self.operand2.free_temps(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        #  If this is called, it is a non-cascaded cmp,
+        #  so only need to dispose of the two main operands.
+        self.operand1.generate_disposal_code(code)
+        self.operand2.generate_disposal_code(code)
+
+    def free_subexpr_temps(self, code):
+        #  If this is called, it is a non-cascaded cmp,
+        #  so only need to dispose of the two main operands.
+        self.operand1.free_temps(code)
+        self.operand2.free_temps(code)
+
+    def annotate(self, code):
+        self.operand1.annotate(code)
+        self.operand2.annotate(code)
+        if self.cascade:
+            self.cascade.annotate(code)
+
+
+class CascadedCmpNode(Node, CmpNode):
+    #  A CascadedCmpNode is not a complete expression node. It
+    #  hangs off the side of another comparison node, shares
+    #  its left operand with that node, and shares its result
+    #  with the PrimaryCmpNode at the head of the chain.
+    #
+    #  operator      string
+    #  operand2      ExprNode
+    #  cascade       CascadedCmpNode
+
+    child_attrs = ['operand2', 'coerced_operand2', 'cascade']
+
+    cascade = None
+    coerced_operand2 = None
+    constant_result = constant_value_not_set # FIXME: where to calculate this?
+
+    def infer_type(self, env):
+        # TODO: Actually implement this (after merging with -unstable).
+        return py_object_type
+
+    def type_dependencies(self, env):
+        return ()
+
+    def has_constant_result(self):
+        return self.constant_result is not constant_value_not_set and \
+               self.constant_result is not not_a_constant
+
+    def analyse_types(self, env):
+        self.operand2 = self.operand2.analyse_types(env)
+        if self.cascade:
+            self.cascade = self.cascade.analyse_types(env)
+        return self
+
+    def has_python_operands(self):
+        return self.operand2.type.is_pyobject
+
     def is_cpp_comparison(self):
         # cascaded comparisons aren't currently implemented for c++ classes.
         return False
 
-    def optimise_comparison(self, operand1, env, result_is_bool=False): 
-        if self.find_special_bool_compare_function(env, operand1, result_is_bool): 
-            self.is_pycmp = False 
-            self.type = PyrexTypes.c_bint_type 
-            if not operand1.type.is_pyobject: 
-                operand1 = operand1.coerce_to_pyobject(env) 
-        if self.cascade: 
-            operand2 = self.cascade.optimise_comparison(self.operand2, env, result_is_bool) 
-            if operand2 is not self.operand2: 
-                self.coerced_operand2 = operand2 
-        return operand1 
- 
-    def coerce_operands_to_pyobjects(self, env): 
-        self.operand2 = self.operand2.coerce_to_pyobject(env) 
-        if self.operand2.type is dict_type and self.operator in ('in', 'not_in'): 
-            self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable") 
-        if self.cascade: 
-            self.cascade.coerce_operands_to_pyobjects(env) 
- 
-    def coerce_cascaded_operands_to_temp(self, env): 
-        if self.cascade: 
-            #self.operand2 = self.operand2.coerce_to_temp(env) #CTT 
-            self.operand2 = self.operand2.coerce_to_simple(env) 
-            self.cascade.coerce_cascaded_operands_to_temp(env) 
- 
-    def generate_evaluation_code(self, code, result, operand1, needs_evaluation=False): 
-        if self.type.is_pyobject: 
-            code.putln("if (__Pyx_PyObject_IsTrue(%s)) {" % result) 
-            code.put_decref(result, self.type) 
-        else: 
-            code.putln("if (%s) {" % result) 
-        if needs_evaluation: 
-            operand1.generate_evaluation_code(code) 
-        self.operand2.generate_evaluation_code(code) 
-        self.generate_operation_code(code, result, 
-            operand1, self.operator, self.operand2) 
-        if self.cascade: 
-            self.cascade.generate_evaluation_code( 
-                code, result, self.coerced_operand2 or self.operand2, 
-                needs_evaluation=self.coerced_operand2 is not None) 
-        if needs_evaluation: 
-            operand1.generate_disposal_code(code) 
-            operand1.free_temps(code) 
-        # Cascaded cmp result is always temp 
-        self.operand2.generate_disposal_code(code) 
-        self.operand2.free_temps(code) 
-        code.putln("}") 
- 
-    def annotate(self, code): 
-        self.operand2.annotate(code) 
-        if self.cascade: 
-            self.cascade.annotate(code) 
- 
- 
-binop_node_classes = { 
-    "or":       BoolBinopNode, 
-    "and":      BoolBinopNode, 
-    "|":        IntBinopNode, 
-    "^":        IntBinopNode, 
-    "&":        IntBinopNode, 
-    "<<":       IntBinopNode, 
-    ">>":       IntBinopNode, 
-    "+":        AddNode, 
-    "-":        SubNode, 
-    "*":        MulNode, 
-    "@":        MatMultNode, 
-    "/":        DivNode, 
-    "//":       DivNode, 
-    "%":        ModNode, 
-    "**":       PowNode, 
-} 
- 
+    def optimise_comparison(self, operand1, env, result_is_bool=False):
+        if self.find_special_bool_compare_function(env, operand1, result_is_bool):
+            self.is_pycmp = False
+            self.type = PyrexTypes.c_bint_type
+            if not operand1.type.is_pyobject:
+                operand1 = operand1.coerce_to_pyobject(env)
+        if self.cascade:
+            operand2 = self.cascade.optimise_comparison(self.operand2, env, result_is_bool)
+            if operand2 is not self.operand2:
+                self.coerced_operand2 = operand2
+        return operand1
+
+    def coerce_operands_to_pyobjects(self, env):
+        self.operand2 = self.operand2.coerce_to_pyobject(env)
+        if self.operand2.type is dict_type and self.operator in ('in', 'not_in'):
+            self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+        if self.cascade:
+            self.cascade.coerce_operands_to_pyobjects(env)
+
+    def coerce_cascaded_operands_to_temp(self, env):
+        if self.cascade:
+            #self.operand2 = self.operand2.coerce_to_temp(env) #CTT
+            self.operand2 = self.operand2.coerce_to_simple(env)
+            self.cascade.coerce_cascaded_operands_to_temp(env)
+
+    def generate_evaluation_code(self, code, result, operand1, needs_evaluation=False):
+        if self.type.is_pyobject:
+            code.putln("if (__Pyx_PyObject_IsTrue(%s)) {" % result)
+            code.put_decref(result, self.type)
+        else:
+            code.putln("if (%s) {" % result)
+        if needs_evaluation:
+            operand1.generate_evaluation_code(code)
+        self.operand2.generate_evaluation_code(code)
+        self.generate_operation_code(code, result,
+            operand1, self.operator, self.operand2)
+        if self.cascade:
+            self.cascade.generate_evaluation_code(
+                code, result, self.coerced_operand2 or self.operand2,
+                needs_evaluation=self.coerced_operand2 is not None)
+        if needs_evaluation:
+            operand1.generate_disposal_code(code)
+            operand1.free_temps(code)
+        # Cascaded cmp result is always temp
+        self.operand2.generate_disposal_code(code)
+        self.operand2.free_temps(code)
+        code.putln("}")
+
+    def annotate(self, code):
+        self.operand2.annotate(code)
+        if self.cascade:
+            self.cascade.annotate(code)
+
+
+binop_node_classes = {
+    "or":       BoolBinopNode,
+    "and":      BoolBinopNode,
+    "|":        IntBinopNode,
+    "^":        IntBinopNode,
+    "&":        IntBinopNode,
+    "<<":       IntBinopNode,
+    ">>":       IntBinopNode,
+    "+":        AddNode,
+    "-":        SubNode,
+    "*":        MulNode,
+    "@":        MatMultNode,
+    "/":        DivNode,
+    "//":       DivNode,
+    "%":        ModNode,
+    "**":       PowNode,
+}
+
 
 def binop_node(pos, operator, operand1, operand2, inplace=False, **kwargs):
-    # Construct binop node of appropriate class for 
-    # given operator. 
+    # Construct binop node of appropriate class for
+    # given operator.
     return binop_node_classes[operator](
         pos,
         operator=operator,
@@ -12923,126 +12923,126 @@ def binop_node(pos, operator, operand1, operand2, inplace=False, **kwargs):
         operand2=operand2,
         inplace=inplace,
         **kwargs)
- 
-
-#------------------------------------------------------------------- 
-# 
-#  Coercion nodes 
-# 
-#  Coercion nodes are special in that they are created during 
-#  the analyse_types phase of parse tree processing. 
-#  Their __init__ methods consequently incorporate some aspects 
-#  of that phase. 
-# 
-#------------------------------------------------------------------- 
- 
-class CoercionNode(ExprNode): 
-    #  Abstract base class for coercion nodes. 
-    # 
-    #  arg       ExprNode       node being coerced 
- 
-    subexprs = ['arg'] 
-    constant_result = not_a_constant 
- 
-    def __init__(self, arg): 
-        super(CoercionNode, self).__init__(arg.pos) 
-        self.arg = arg 
-        if debug_coercion: 
-            print("%s Coercing %s" % (self, self.arg)) 
- 
-    def calculate_constant_result(self): 
-        # constant folding can break type coercion, so this is disabled 
-        pass 
- 
-    def annotate(self, code): 
-        self.arg.annotate(code) 
-        if self.arg.type != self.type: 
-            file, line, col = self.pos 
-            code.annotate((file, line, col-1), AnnotationItem( 
-                style='coerce', tag='coerce', text='[%s] to [%s]' % (self.arg.type, self.type))) 
- 
-
-class CoerceToMemViewSliceNode(CoercionNode): 
-    """ 
-    Coerce an object to a memoryview slice. This holds a new reference in 
-    a managed temp. 
-    """ 
- 
-    def __init__(self, arg, dst_type, env): 
-        assert dst_type.is_memoryviewslice 
-        assert not arg.type.is_memoryviewslice 
-        CoercionNode.__init__(self, arg) 
-        self.type = dst_type 
-        self.is_temp = 1 
-        self.use_managed_ref = True 
-        self.arg = arg 
+
+
+#-------------------------------------------------------------------
+#
+#  Coercion nodes
+#
+#  Coercion nodes are special in that they are created during
+#  the analyse_types phase of parse tree processing.
+#  Their __init__ methods consequently incorporate some aspects
+#  of that phase.
+#
+#-------------------------------------------------------------------
+
+class CoercionNode(ExprNode):
+    #  Abstract base class for coercion nodes.
+    #
+    #  arg       ExprNode       node being coerced
+
+    subexprs = ['arg']
+    constant_result = not_a_constant
+
+    def __init__(self, arg):
+        super(CoercionNode, self).__init__(arg.pos)
+        self.arg = arg
+        if debug_coercion:
+            print("%s Coercing %s" % (self, self.arg))
+
+    def calculate_constant_result(self):
+        # constant folding can break type coercion, so this is disabled
+        pass
+
+    def annotate(self, code):
+        self.arg.annotate(code)
+        if self.arg.type != self.type:
+            file, line, col = self.pos
+            code.annotate((file, line, col-1), AnnotationItem(
+                style='coerce', tag='coerce', text='[%s] to [%s]' % (self.arg.type, self.type)))
+
+
+class CoerceToMemViewSliceNode(CoercionNode):
+    """
+    Coerce an object to a memoryview slice. This holds a new reference in
+    a managed temp.
+    """
+
+    def __init__(self, arg, dst_type, env):
+        assert dst_type.is_memoryviewslice
+        assert not arg.type.is_memoryviewslice
+        CoercionNode.__init__(self, arg)
+        self.type = dst_type
+        self.is_temp = 1
+        self.use_managed_ref = True
+        self.arg = arg
         self.type.create_from_py_utility_code(env)
- 
-    def generate_result_code(self, code): 
+
+    def generate_result_code(self, code):
         code.putln(self.type.from_py_call_code(
             self.arg.py_result(),
             self.result(),
             self.pos,
             code
         ))
- 
- 
-class CastNode(CoercionNode): 
-    #  Wrap a node in a C type cast. 
- 
-    def __init__(self, arg, new_type): 
-        CoercionNode.__init__(self, arg) 
-        self.type = new_type 
- 
-    def may_be_none(self): 
-        return self.arg.may_be_none() 
- 
-    def calculate_result_code(self): 
-        return self.arg.result_as(self.type) 
- 
-    def generate_result_code(self, code): 
-        self.arg.generate_result_code(code) 
- 
- 
-class PyTypeTestNode(CoercionNode): 
-    #  This node is used to check that a generic Python 
-    #  object is an instance of a particular extension type. 
-    #  This node borrows the result of its argument node. 
- 
-    exact_builtin_type = True 
- 
-    def __init__(self, arg, dst_type, env, notnone=False): 
-        #  The arg is know to be a Python object, and 
-        #  the dst_type is known to be an extension type. 
-        assert dst_type.is_extension_type or dst_type.is_builtin_type, "PyTypeTest on non extension type" 
-        CoercionNode.__init__(self, arg) 
-        self.type = dst_type 
-        self.result_ctype = arg.ctype() 
-        self.notnone = notnone 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Python type test" 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def may_be_none(self): 
-        if self.notnone: 
-            return False 
-        return self.arg.may_be_none() 
- 
-    def is_simple(self): 
-        return self.arg.is_simple() 
- 
-    def result_in_temp(self): 
-        return self.arg.result_in_temp() 
- 
-    def is_ephemeral(self): 
-        return self.arg.is_ephemeral() 
- 
-    def nonlocally_immutable(self): 
-        return self.arg.nonlocally_immutable() 
- 
+
+
+class CastNode(CoercionNode):
+    #  Wrap a node in a C type cast.
+
+    def __init__(self, arg, new_type):
+        CoercionNode.__init__(self, arg)
+        self.type = new_type
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def calculate_result_code(self):
+        return self.arg.result_as(self.type)
+
+    def generate_result_code(self, code):
+        self.arg.generate_result_code(code)
+
+
+class PyTypeTestNode(CoercionNode):
+    #  This node is used to check that a generic Python
+    #  object is an instance of a particular extension type.
+    #  This node borrows the result of its argument node.
+
+    exact_builtin_type = True
+
+    def __init__(self, arg, dst_type, env, notnone=False):
+        #  The arg is know to be a Python object, and
+        #  the dst_type is known to be an extension type.
+        assert dst_type.is_extension_type or dst_type.is_builtin_type, "PyTypeTest on non extension type"
+        CoercionNode.__init__(self, arg)
+        self.type = dst_type
+        self.result_ctype = arg.ctype()
+        self.notnone = notnone
+
+    nogil_check = Node.gil_error
+    gil_message = "Python type test"
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        if self.notnone:
+            return False
+        return self.arg.may_be_none()
+
+    def is_simple(self):
+        return self.arg.is_simple()
+
+    def result_in_temp(self):
+        return self.arg.result_in_temp()
+
+    def is_ephemeral(self):
+        return self.arg.is_ephemeral()
+
+    def nonlocally_immutable(self):
+        return self.arg.nonlocally_immutable()
+
     def reanalyse(self):
         if self.type != self.arg.type or not self.arg.is_temp:
             return self
@@ -13052,90 +13052,90 @@ class PyTypeTestNode(CoercionNode):
             return self.arg.as_none_safe_node("Cannot convert NoneType to %.200s" % self.type.name)
         return self.arg
 
-    def calculate_constant_result(self): 
-        # FIXME 
-        pass 
- 
-    def calculate_result_code(self): 
-        return self.arg.result() 
- 
-    def generate_result_code(self, code): 
-        if self.type.typeobj_is_available(): 
-            if self.type.is_builtin_type: 
-                type_test = self.type.type_test_code( 
-                    self.arg.py_result(), 
-                    self.notnone, exact=self.exact_builtin_type) 
-            else: 
-                type_test = self.type.type_test_code( 
-                    self.arg.py_result(), self.notnone) 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("ExtTypeTest", "ObjectHandling.c")) 
-            code.putln("if (!(%s)) %s" % ( 
-                type_test, code.error_goto(self.pos))) 
-        else: 
-            error(self.pos, "Cannot test type of extern C class " 
-                "without type object name specification") 
- 
-    def generate_post_assignment_code(self, code): 
-        self.arg.generate_post_assignment_code(code) 
- 
+    def calculate_constant_result(self):
+        # FIXME
+        pass
+
+    def calculate_result_code(self):
+        return self.arg.result()
+
+    def generate_result_code(self, code):
+        if self.type.typeobj_is_available():
+            if self.type.is_builtin_type:
+                type_test = self.type.type_test_code(
+                    self.arg.py_result(),
+                    self.notnone, exact=self.exact_builtin_type)
+            else:
+                type_test = self.type.type_test_code(
+                    self.arg.py_result(), self.notnone)
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("ExtTypeTest", "ObjectHandling.c"))
+            code.putln("if (!(%s)) %s" % (
+                type_test, code.error_goto(self.pos)))
+        else:
+            error(self.pos, "Cannot test type of extern C class "
+                "without type object name specification")
+
+    def generate_post_assignment_code(self, code):
+        self.arg.generate_post_assignment_code(code)
+
     def allocate_temp_result(self, code):
         pass
 
     def release_temp_result(self, code):
         pass
 
-    def free_temps(self, code): 
-        self.arg.free_temps(code) 
- 
+    def free_temps(self, code):
+        self.arg.free_temps(code)
+
     def free_subexpr_temps(self, code):
         self.arg.free_subexpr_temps(code)
- 
-
-class NoneCheckNode(CoercionNode): 
-    # This node is used to check that a Python object is not None and 
-    # raises an appropriate exception (as specified by the creating 
-    # transform). 
- 
-    is_nonecheck = True 
- 
-    def __init__(self, arg, exception_type_cname, exception_message, 
+
+
+class NoneCheckNode(CoercionNode):
+    # This node is used to check that a Python object is not None and
+    # raises an appropriate exception (as specified by the creating
+    # transform).
+
+    is_nonecheck = True
+
+    def __init__(self, arg, exception_type_cname, exception_message,
                  exception_format_args=()):
-        CoercionNode.__init__(self, arg) 
-        self.type = arg.type 
-        self.result_ctype = arg.ctype() 
-        self.exception_type_cname = exception_type_cname 
-        self.exception_message = exception_message 
-        self.exception_format_args = tuple(exception_format_args or ()) 
- 
-    nogil_check = None # this node only guards an operation that would fail already 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def is_simple(self): 
-        return self.arg.is_simple() 
- 
-    def result_in_temp(self): 
-        return self.arg.result_in_temp() 
- 
-    def nonlocally_immutable(self): 
-        return self.arg.nonlocally_immutable() 
- 
-    def calculate_result_code(self): 
-        return self.arg.result() 
- 
-    def condition(self): 
-        if self.type.is_pyobject: 
-            return self.arg.py_result() 
-        elif self.type.is_memoryviewslice: 
-            return "((PyObject *) %s.memview)" % self.arg.result() 
-        else: 
-            raise Exception("unsupported type") 
- 
+        CoercionNode.__init__(self, arg)
+        self.type = arg.type
+        self.result_ctype = arg.ctype()
+        self.exception_type_cname = exception_type_cname
+        self.exception_message = exception_message
+        self.exception_format_args = tuple(exception_format_args or ())
+
+    nogil_check = None # this node only guards an operation that would fail already
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def is_simple(self):
+        return self.arg.is_simple()
+
+    def result_in_temp(self):
+        return self.arg.result_in_temp()
+
+    def nonlocally_immutable(self):
+        return self.arg.nonlocally_immutable()
+
+    def calculate_result_code(self):
+        return self.arg.result()
+
+    def condition(self):
+        if self.type.is_pyobject:
+            return self.arg.py_result()
+        elif self.type.is_memoryviewslice:
+            return "((PyObject *) %s.memview)" % self.arg.result()
+        else:
+            raise Exception("unsupported type")
+
     @classmethod
     def generate(cls, arg, code, exception_message,
                  exception_type_cname="PyExc_TypeError", exception_format_args=(), in_nogil_context=False):
@@ -13149,180 +13149,180 @@ class NoneCheckNode(CoercionNode):
         if arg.may_be_none():
             cls.generate(arg, code, exception_message, exception_type_cname, exception_format_args, in_nogil_context)
 
-    def put_nonecheck(self, code): 
-        code.putln( 
-            "if (unlikely(%s == Py_None)) {" % self.condition()) 
- 
-        if self.in_nogil_context: 
-            code.put_ensure_gil() 
- 
-        escape = StringEncoding.escape_byte_string 
-        if self.exception_format_args: 
-            code.putln('PyErr_Format(%s, "%s", %s);' % ( 
-                self.exception_type_cname, 
-                StringEncoding.escape_byte_string( 
-                    self.exception_message.encode('UTF-8')), 
-                ', '.join([ '"%s"' % escape(str(arg).encode('UTF-8')) 
-                            for arg in self.exception_format_args ]))) 
-        else: 
-            code.putln('PyErr_SetString(%s, "%s");' % ( 
-                self.exception_type_cname, 
-                escape(self.exception_message.encode('UTF-8')))) 
- 
-        if self.in_nogil_context: 
-            code.put_release_ensured_gil() 
- 
-        code.putln(code.error_goto(self.pos)) 
-        code.putln("}") 
- 
-    def generate_result_code(self, code): 
-        self.put_nonecheck(code) 
- 
-    def generate_post_assignment_code(self, code): 
-        self.arg.generate_post_assignment_code(code) 
- 
-    def free_temps(self, code): 
-        self.arg.free_temps(code) 
- 
- 
-class CoerceToPyTypeNode(CoercionNode): 
-    #  This node is used to convert a C data type 
-    #  to a Python object. 
- 
-    type = py_object_type 
+    def put_nonecheck(self, code):
+        code.putln(
+            "if (unlikely(%s == Py_None)) {" % self.condition())
+
+        if self.in_nogil_context:
+            code.put_ensure_gil()
+
+        escape = StringEncoding.escape_byte_string
+        if self.exception_format_args:
+            code.putln('PyErr_Format(%s, "%s", %s);' % (
+                self.exception_type_cname,
+                StringEncoding.escape_byte_string(
+                    self.exception_message.encode('UTF-8')),
+                ', '.join([ '"%s"' % escape(str(arg).encode('UTF-8'))
+                            for arg in self.exception_format_args ])))
+        else:
+            code.putln('PyErr_SetString(%s, "%s");' % (
+                self.exception_type_cname,
+                escape(self.exception_message.encode('UTF-8'))))
+
+        if self.in_nogil_context:
+            code.put_release_ensured_gil()
+
+        code.putln(code.error_goto(self.pos))
+        code.putln("}")
+
+    def generate_result_code(self, code):
+        self.put_nonecheck(code)
+
+    def generate_post_assignment_code(self, code):
+        self.arg.generate_post_assignment_code(code)
+
+    def free_temps(self, code):
+        self.arg.free_temps(code)
+
+
+class CoerceToPyTypeNode(CoercionNode):
+    #  This node is used to convert a C data type
+    #  to a Python object.
+
+    type = py_object_type
     target_type = py_object_type
-    is_temp = 1 
- 
-    def __init__(self, arg, env, type=py_object_type): 
-        if not arg.type.create_to_py_utility_code(env): 
-            error(arg.pos, "Cannot convert '%s' to Python object" % arg.type) 
-        elif arg.type.is_complex: 
-            # special case: complex coercion is so complex that it 
-            # uses a macro ("__pyx_PyComplex_FromComplex()"), for 
-            # which the argument must be simple 
-            arg = arg.coerce_to_simple(env) 
-        CoercionNode.__init__(self, arg) 
-        if type is py_object_type: 
-            # be specific about some known types 
-            if arg.type.is_string or arg.type.is_cpp_string: 
-                self.type = default_str_type(env) 
-            elif arg.type.is_pyunicode_ptr or arg.type.is_unicode_char: 
-                self.type = unicode_type 
-            elif arg.type.is_complex: 
-                self.type = Builtin.complex_type 
+    is_temp = 1
+
+    def __init__(self, arg, env, type=py_object_type):
+        if not arg.type.create_to_py_utility_code(env):
+            error(arg.pos, "Cannot convert '%s' to Python object" % arg.type)
+        elif arg.type.is_complex:
+            # special case: complex coercion is so complex that it
+            # uses a macro ("__pyx_PyComplex_FromComplex()"), for
+            # which the argument must be simple
+            arg = arg.coerce_to_simple(env)
+        CoercionNode.__init__(self, arg)
+        if type is py_object_type:
+            # be specific about some known types
+            if arg.type.is_string or arg.type.is_cpp_string:
+                self.type = default_str_type(env)
+            elif arg.type.is_pyunicode_ptr or arg.type.is_unicode_char:
+                self.type = unicode_type
+            elif arg.type.is_complex:
+                self.type = Builtin.complex_type
             self.target_type = self.type
-        elif arg.type.is_string or arg.type.is_cpp_string: 
-            if (type not in (bytes_type, bytearray_type) 
-                    and not env.directives['c_string_encoding']): 
-                error(arg.pos, 
-                    "default encoding required for conversion from '%s' to '%s'" % 
-                    (arg.type, type)) 
+        elif arg.type.is_string or arg.type.is_cpp_string:
+            if (type not in (bytes_type, bytearray_type)
+                    and not env.directives['c_string_encoding']):
+                error(arg.pos,
+                    "default encoding required for conversion from '%s' to '%s'" %
+                    (arg.type, type))
             self.type = self.target_type = type
-        else: 
-            # FIXME: check that the target type and the resulting type are compatible 
+        else:
+            # FIXME: check that the target type and the resulting type are compatible
             self.target_type = type
- 
-    gil_message = "Converting to Python object" 
- 
-    def may_be_none(self): 
-        # FIXME: is this always safe? 
-        return False 
- 
-    def coerce_to_boolean(self, env): 
-        arg_type = self.arg.type 
-        if (arg_type == PyrexTypes.c_bint_type or 
-            (arg_type.is_pyobject and arg_type.name == 'bool')): 
-            return self.arg.coerce_to_temp(env) 
-        else: 
-            return CoerceToBooleanNode(self, env) 
- 
-    def coerce_to_integer(self, env): 
-        # If not already some C integer type, coerce to longint. 
-        if self.arg.type.is_int: 
-            return self.arg 
-        else: 
-            return self.arg.coerce_to(PyrexTypes.c_long_type, env) 
- 
-    def analyse_types(self, env): 
-        # The arg is always already analysed 
-        return self 
- 
-    def generate_result_code(self, code): 
+
+    gil_message = "Converting to Python object"
+
+    def may_be_none(self):
+        # FIXME: is this always safe?
+        return False
+
+    def coerce_to_boolean(self, env):
+        arg_type = self.arg.type
+        if (arg_type == PyrexTypes.c_bint_type or
+            (arg_type.is_pyobject and arg_type.name == 'bool')):
+            return self.arg.coerce_to_temp(env)
+        else:
+            return CoerceToBooleanNode(self, env)
+
+    def coerce_to_integer(self, env):
+        # If not already some C integer type, coerce to longint.
+        if self.arg.type.is_int:
+            return self.arg
+        else:
+            return self.arg.coerce_to(PyrexTypes.c_long_type, env)
+
+    def analyse_types(self, env):
+        # The arg is always already analysed
+        return self
+
+    def generate_result_code(self, code):
         code.putln('%s; %s' % (
             self.arg.type.to_py_call_code(
                 self.arg.result(),
                 self.result(),
                 self.target_type),
-            code.error_goto_if_null(self.result(), self.pos))) 
- 
-        code.put_gotref(self.py_result()) 
- 
- 
-class CoerceIntToBytesNode(CoerceToPyTypeNode): 
-    #  This node is used to convert a C int type to a Python bytes 
-    #  object. 
- 
-    is_temp = 1 
- 
-    def __init__(self, arg, env): 
-        arg = arg.coerce_to_simple(env) 
-        CoercionNode.__init__(self, arg) 
-        self.type = Builtin.bytes_type 
- 
-    def generate_result_code(self, code): 
-        arg = self.arg 
-        arg_result = arg.result() 
-        if arg.type not in (PyrexTypes.c_char_type, 
-                            PyrexTypes.c_uchar_type, 
-                            PyrexTypes.c_schar_type): 
-            if arg.type.signed: 
-                code.putln("if ((%s < 0) || (%s > 255)) {" % ( 
-                    arg_result, arg_result)) 
-            else: 
-                code.putln("if (%s > 255) {" % arg_result) 
-            code.putln('PyErr_SetString(PyExc_OverflowError, ' 
-                       '"value too large to pack into a byte"); %s' % ( 
-                           code.error_goto(self.pos))) 
-            code.putln('}') 
-        temp = None 
-        if arg.type is not PyrexTypes.c_char_type: 
-            temp = code.funcstate.allocate_temp(PyrexTypes.c_char_type, manage_ref=False) 
-            code.putln("%s = (char)%s;" % (temp, arg_result)) 
-            arg_result = temp 
-        code.putln('%s = PyBytes_FromStringAndSize(&%s, 1); %s' % ( 
-            self.result(), 
-            arg_result, 
-            code.error_goto_if_null(self.result(), self.pos))) 
-        if temp is not None: 
-            code.funcstate.release_temp(temp) 
-        code.put_gotref(self.py_result()) 
- 
- 
-class CoerceFromPyTypeNode(CoercionNode): 
-    #  This node is used to convert a Python object 
-    #  to a C data type. 
- 
-    def __init__(self, result_type, arg, env): 
-        CoercionNode.__init__(self, arg) 
-        self.type = result_type 
-        self.is_temp = 1 
-        if not result_type.create_from_py_utility_code(env): 
-            error(arg.pos, 
-                  "Cannot convert Python object to '%s'" % result_type) 
-        if self.type.is_string or self.type.is_pyunicode_ptr: 
-            if self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal: 
-                warning(arg.pos, 
-                        "Obtaining '%s' from externally modifiable global Python value" % result_type, 
-                        level=1) 
- 
-    def analyse_types(self, env): 
-        # The arg is always already analysed 
-        return self 
- 
-    def is_ephemeral(self): 
+            code.error_goto_if_null(self.result(), self.pos)))
+
+        code.put_gotref(self.py_result())
+
+
+class CoerceIntToBytesNode(CoerceToPyTypeNode):
+    #  This node is used to convert a C int type to a Python bytes
+    #  object.
+
+    is_temp = 1
+
+    def __init__(self, arg, env):
+        arg = arg.coerce_to_simple(env)
+        CoercionNode.__init__(self, arg)
+        self.type = Builtin.bytes_type
+
+    def generate_result_code(self, code):
+        arg = self.arg
+        arg_result = arg.result()
+        if arg.type not in (PyrexTypes.c_char_type,
+                            PyrexTypes.c_uchar_type,
+                            PyrexTypes.c_schar_type):
+            if arg.type.signed:
+                code.putln("if ((%s < 0) || (%s > 255)) {" % (
+                    arg_result, arg_result))
+            else:
+                code.putln("if (%s > 255) {" % arg_result)
+            code.putln('PyErr_SetString(PyExc_OverflowError, '
+                       '"value too large to pack into a byte"); %s' % (
+                           code.error_goto(self.pos)))
+            code.putln('}')
+        temp = None
+        if arg.type is not PyrexTypes.c_char_type:
+            temp = code.funcstate.allocate_temp(PyrexTypes.c_char_type, manage_ref=False)
+            code.putln("%s = (char)%s;" % (temp, arg_result))
+            arg_result = temp
+        code.putln('%s = PyBytes_FromStringAndSize(&%s, 1); %s' % (
+            self.result(),
+            arg_result,
+            code.error_goto_if_null(self.result(), self.pos)))
+        if temp is not None:
+            code.funcstate.release_temp(temp)
+        code.put_gotref(self.py_result())
+
+
+class CoerceFromPyTypeNode(CoercionNode):
+    #  This node is used to convert a Python object
+    #  to a C data type.
+
+    def __init__(self, result_type, arg, env):
+        CoercionNode.__init__(self, arg)
+        self.type = result_type
+        self.is_temp = 1
+        if not result_type.create_from_py_utility_code(env):
+            error(arg.pos,
+                  "Cannot convert Python object to '%s'" % result_type)
+        if self.type.is_string or self.type.is_pyunicode_ptr:
+            if self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal:
+                warning(arg.pos,
+                        "Obtaining '%s' from externally modifiable global Python value" % result_type,
+                        level=1)
+
+    def analyse_types(self, env):
+        # The arg is always already analysed
+        return self
+
+    def is_ephemeral(self):
         return (self.type.is_ptr and not self.type.is_array) and self.arg.is_ephemeral()
- 
-    def generate_result_code(self, code): 
+
+    def generate_result_code(self, code):
         from_py_function = None
         # for certain source types, we can do better than the generic coercion
         if self.type.is_string and self.arg.type is bytes_type:
@@ -13332,20 +13332,20 @@ class CoerceFromPyTypeNode(CoercionNode):
 
         code.putln(self.type.from_py_call_code(
             self.arg.py_result(), self.result(), self.pos, code, from_py_function=from_py_function))
-        if self.type.is_pyobject: 
-            code.put_gotref(self.py_result()) 
- 
-    def nogil_check(self, env): 
-        error(self.pos, "Coercion from Python not allowed without the GIL") 
- 
- 
-class CoerceToBooleanNode(CoercionNode): 
-    #  This node is used when a result needs to be used 
-    #  in a boolean context. 
- 
-    type = PyrexTypes.c_bint_type 
- 
-    _special_builtins = { 
+        if self.type.is_pyobject:
+            code.put_gotref(self.py_result())
+
+    def nogil_check(self, env):
+        error(self.pos, "Coercion from Python not allowed without the GIL")
+
+
+class CoerceToBooleanNode(CoercionNode):
+    #  This node is used when a result needs to be used
+    #  in a boolean context.
+
+    type = PyrexTypes.c_bint_type
+
+    _special_builtins = {
         Builtin.list_type:       'PyList_GET_SIZE',
         Builtin.tuple_type:      'PyTuple_GET_SIZE',
         Builtin.set_type:        'PySet_GET_SIZE',
@@ -13354,341 +13354,341 @@ class CoerceToBooleanNode(CoercionNode):
         Builtin.bytearray_type:  'PyByteArray_GET_SIZE',
         Builtin.unicode_type:    '__Pyx_PyUnicode_IS_TRUE',
     }
- 
-    def __init__(self, arg, env): 
-        CoercionNode.__init__(self, arg) 
-        if arg.type.is_pyobject: 
-            self.is_temp = 1 
- 
-    def nogil_check(self, env): 
-        if self.arg.type.is_pyobject and self._special_builtins.get(self.arg.type) is None: 
-            self.gil_error() 
- 
-    gil_message = "Truth-testing Python object" 
- 
-    def check_const(self): 
-        if self.is_temp: 
-            self.not_const() 
-            return False 
-        return self.arg.check_const() 
- 
-    def calculate_result_code(self): 
-        return "(%s != 0)" % self.arg.result() 
- 
-    def generate_result_code(self, code): 
-        if not self.is_temp: 
-            return 
-        test_func = self._special_builtins.get(self.arg.type) 
-        if test_func is not None: 
+
+    def __init__(self, arg, env):
+        CoercionNode.__init__(self, arg)
+        if arg.type.is_pyobject:
+            self.is_temp = 1
+
+    def nogil_check(self, env):
+        if self.arg.type.is_pyobject and self._special_builtins.get(self.arg.type) is None:
+            self.gil_error()
+
+    gil_message = "Truth-testing Python object"
+
+    def check_const(self):
+        if self.is_temp:
+            self.not_const()
+            return False
+        return self.arg.check_const()
+
+    def calculate_result_code(self):
+        return "(%s != 0)" % self.arg.result()
+
+    def generate_result_code(self, code):
+        if not self.is_temp:
+            return
+        test_func = self._special_builtins.get(self.arg.type)
+        if test_func is not None:
             checks = ["(%s != Py_None)" % self.arg.py_result()] if self.arg.may_be_none() else []
             checks.append("(%s(%s) != 0)" % (test_func, self.arg.py_result()))
             code.putln("%s = %s;" % (self.result(), '&&'.join(checks)))
-        else: 
-            code.putln( 
-                "%s = __Pyx_PyObject_IsTrue(%s); %s" % ( 
-                    self.result(), 
-                    self.arg.py_result(), 
-                    code.error_goto_if_neg(self.result(), self.pos))) 
- 
-
-class CoerceToComplexNode(CoercionNode): 
- 
-    def __init__(self, arg, dst_type, env): 
-        if arg.type.is_complex: 
-            arg = arg.coerce_to_simple(env) 
-        self.type = dst_type 
-        CoercionNode.__init__(self, arg) 
-        dst_type.create_declaration_utility_code(env) 
- 
-    def calculate_result_code(self): 
-        if self.arg.type.is_complex: 
-            real_part = "__Pyx_CREAL(%s)" % self.arg.result() 
-            imag_part = "__Pyx_CIMAG(%s)" % self.arg.result() 
-        else: 
-            real_part = self.arg.result() 
-            imag_part = "0" 
-        return "%s(%s, %s)" % ( 
-                self.type.from_parts, 
-                real_part, 
-                imag_part) 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-class CoerceToTempNode(CoercionNode): 
-    #  This node is used to force the result of another node 
-    #  to be stored in a temporary. It is only used if the 
-    #  argument node's result is not already in a temporary. 
- 
-    def __init__(self, arg, env): 
-        CoercionNode.__init__(self, arg) 
-        self.type = self.arg.type.as_argument_type() 
-        self.constant_result = self.arg.constant_result 
-        self.is_temp = 1 
-        if self.type.is_pyobject: 
-            self.result_ctype = py_object_type 
- 
-    gil_message = "Creating temporary Python reference" 
- 
-    def analyse_types(self, env): 
-        # The arg is always already analysed 
-        return self 
- 
-    def coerce_to_boolean(self, env): 
-        self.arg = self.arg.coerce_to_boolean(env) 
-        if self.arg.is_simple(): 
-            return self.arg 
-        self.type = self.arg.type 
-        self.result_ctype = self.type 
-        return self 
- 
-    def generate_result_code(self, code): 
-        #self.arg.generate_evaluation_code(code) # Already done 
-        # by generic generate_subexpr_evaluation_code! 
-        code.putln("%s = %s;" % ( 
-            self.result(), self.arg.result_as(self.ctype()))) 
-        if self.use_managed_ref: 
-            if self.type.is_pyobject: 
-                code.put_incref(self.result(), self.ctype()) 
-            elif self.type.is_memoryviewslice: 
-                code.put_incref_memoryviewslice(self.result(), 
-                                                not self.in_nogil_context) 
- 
-class ProxyNode(CoercionNode): 
-    """ 
-    A node that should not be replaced by transforms or other means, 
-    and hence can be useful to wrap the argument to a clone node 
- 
-    MyNode    -> ProxyNode -> ArgNode 
-    CloneNode -^ 
-    """ 
- 
-    nogil_check = None 
- 
-    def __init__(self, arg): 
-        super(ProxyNode, self).__init__(arg) 
-        self.constant_result = arg.constant_result 
-        self._proxy_type() 
- 
-    def analyse_types(self, env): 
-        self.arg = self.arg.analyse_expressions(env) 
-        self._proxy_type() 
-        return self 
- 
-    def infer_type(self, env): 
-        return self.arg.infer_type(env) 
- 
-    def _proxy_type(self): 
-        if hasattr(self.arg, 'type'): 
-            self.type = self.arg.type 
-            self.result_ctype = self.arg.result_ctype 
-        if hasattr(self.arg, 'entry'): 
-            self.entry = self.arg.entry 
- 
-    def generate_result_code(self, code): 
-        self.arg.generate_result_code(code) 
- 
-    def result(self): 
-        return self.arg.result() 
- 
-    def is_simple(self): 
-        return self.arg.is_simple() 
- 
-    def may_be_none(self): 
-        return self.arg.may_be_none() 
- 
-    def generate_evaluation_code(self, code): 
-        self.arg.generate_evaluation_code(code) 
- 
-    def generate_disposal_code(self, code): 
-        self.arg.generate_disposal_code(code) 
- 
-    def free_temps(self, code): 
-        self.arg.free_temps(code) 
- 
-class CloneNode(CoercionNode): 
-    #  This node is employed when the result of another node needs 
-    #  to be used multiple times. The argument node's result must 
-    #  be in a temporary. This node "borrows" the result from the 
-    #  argument node, and does not generate any evaluation or 
-    #  disposal code for it. The original owner of the argument 
-    #  node is responsible for doing those things. 
- 
-    subexprs = [] # Arg is not considered a subexpr 
-    nogil_check = None 
- 
-    def __init__(self, arg): 
-        CoercionNode.__init__(self, arg) 
-        self.constant_result = arg.constant_result 
-        if hasattr(arg, 'type'): 
-            self.type = arg.type 
-            self.result_ctype = arg.result_ctype 
-        if hasattr(arg, 'entry'): 
-            self.entry = arg.entry 
- 
-    def result(self): 
-        return self.arg.result() 
- 
-    def may_be_none(self): 
-        return self.arg.may_be_none() 
- 
-    def type_dependencies(self, env): 
-        return self.arg.type_dependencies(env) 
- 
-    def infer_type(self, env): 
-        return self.arg.infer_type(env) 
- 
-    def analyse_types(self, env): 
-        self.type = self.arg.type 
-        self.result_ctype = self.arg.result_ctype 
-        self.is_temp = 1 
-        if hasattr(self.arg, 'entry'): 
-            self.entry = self.arg.entry 
-        return self 
- 
-    def coerce_to(self, dest_type, env): 
-        if self.arg.is_literal: 
-            return self.arg.coerce_to(dest_type, env) 
-        return super(CloneNode, self).coerce_to(dest_type, env) 
- 
-    def is_simple(self): 
-        return True # result is always in a temp (or a name) 
- 
-    def generate_evaluation_code(self, code): 
-        pass 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def generate_disposal_code(self, code): 
-        pass 
- 
-    def free_temps(self, code): 
-        pass 
- 
- 
-class CMethodSelfCloneNode(CloneNode): 
-    # Special CloneNode for the self argument of builtin C methods 
-    # that accepts subtypes of the builtin type.  This is safe only 
-    # for 'final' subtypes, as subtypes of the declared type may 
-    # override the C method. 
- 
-    def coerce_to(self, dst_type, env): 
-        if dst_type.is_builtin_type and self.type.subtype_of(dst_type): 
-            return self 
-        return CloneNode.coerce_to(self, dst_type, env) 
- 
- 
-class ModuleRefNode(ExprNode): 
-    # Simple returns the module object 
- 
-    type = py_object_type 
-    is_temp = False 
-    subexprs = [] 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def may_be_none(self): 
-        return False 
- 
-    def calculate_result_code(self): 
-        return Naming.module_cname 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-class DocstringRefNode(ExprNode): 
-    # Extracts the docstring of the body element 
- 
-    subexprs = ['body'] 
-    type = py_object_type 
-    is_temp = True 
- 
-    def __init__(self, pos, body): 
-        ExprNode.__init__(self, pos) 
-        assert body.type.is_pyobject 
-        self.body = body 
- 
-    def analyse_types(self, env): 
-        return self 
- 
-    def generate_result_code(self, code): 
-        code.putln('%s = __Pyx_GetAttr(%s, %s); %s' % ( 
-            self.result(), self.body.result(), 
-            code.intern_identifier(StringEncoding.EncodedString("__doc__")), 
-            code.error_goto_if_null(self.result(), self.pos))) 
-        code.put_gotref(self.result()) 
- 
- 
- 
-#------------------------------------------------------------------------------------ 
-# 
-#  Runtime support code 
-# 
-#------------------------------------------------------------------------------------ 
- 
-pyerr_occurred_withgil_utility_code= UtilityCode( 
-proto = """ 
-static CYTHON_INLINE int __Pyx_ErrOccurredWithGIL(void); /* proto */ 
-""", 
-impl = """ 
-static CYTHON_INLINE int __Pyx_ErrOccurredWithGIL(void) { 
-  int err; 
-  #ifdef WITH_THREAD 
-  PyGILState_STATE _save = PyGILState_Ensure(); 
-  #endif 
-  err = !!PyErr_Occurred(); 
-  #ifdef WITH_THREAD 
-  PyGILState_Release(_save); 
-  #endif 
-  return err; 
-} 
-""" 
-) 
- 
-#------------------------------------------------------------------------------------ 
- 
-raise_unbound_local_error_utility_code = UtilityCode( 
-proto = """ 
-static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname); 
-""", 
-impl = """ 
-static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) { 
-    PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname); 
-} 
-""") 
- 
-raise_closure_name_error_utility_code = UtilityCode( 
-proto = """ 
-static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname); 
-""", 
-impl = """ 
-static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname) { 
-    PyErr_Format(PyExc_NameError, "free variable '%s' referenced before assignment in enclosing scope", varname); 
-} 
-""") 
- 
-# Don't inline the function, it should really never be called in production 
-raise_unbound_memoryview_utility_code_nogil = UtilityCode( 
-proto = """ 
-static void __Pyx_RaiseUnboundMemoryviewSliceNogil(const char *varname); 
-""", 
-impl = """ 
-static void __Pyx_RaiseUnboundMemoryviewSliceNogil(const char *varname) { 
-    #ifdef WITH_THREAD 
-    PyGILState_STATE gilstate = PyGILState_Ensure(); 
-    #endif 
-    __Pyx_RaiseUnboundLocalError(varname); 
-    #ifdef WITH_THREAD 
-    PyGILState_Release(gilstate); 
-    #endif 
-} 
-""", 
-requires = [raise_unbound_local_error_utility_code]) 
- 
-#------------------------------------------------------------------------------------ 
- 
-raise_too_many_values_to_unpack = UtilityCode.load_cached("RaiseTooManyValuesToUnpack", "ObjectHandling.c") 
-raise_need_more_values_to_unpack = UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c") 
-tuple_unpacking_error_code = UtilityCode.load_cached("UnpackTupleError", "ObjectHandling.c") 
+        else:
+            code.putln(
+                "%s = __Pyx_PyObject_IsTrue(%s); %s" % (
+                    self.result(),
+                    self.arg.py_result(),
+                    code.error_goto_if_neg(self.result(), self.pos)))
+
+
+class CoerceToComplexNode(CoercionNode):
+
+    def __init__(self, arg, dst_type, env):
+        if arg.type.is_complex:
+            arg = arg.coerce_to_simple(env)
+        self.type = dst_type
+        CoercionNode.__init__(self, arg)
+        dst_type.create_declaration_utility_code(env)
+
+    def calculate_result_code(self):
+        if self.arg.type.is_complex:
+            real_part = "__Pyx_CREAL(%s)" % self.arg.result()
+            imag_part = "__Pyx_CIMAG(%s)" % self.arg.result()
+        else:
+            real_part = self.arg.result()
+            imag_part = "0"
+        return "%s(%s, %s)" % (
+                self.type.from_parts,
+                real_part,
+                imag_part)
+
+    def generate_result_code(self, code):
+        pass
+
+class CoerceToTempNode(CoercionNode):
+    #  This node is used to force the result of another node
+    #  to be stored in a temporary. It is only used if the
+    #  argument node's result is not already in a temporary.
+
+    def __init__(self, arg, env):
+        CoercionNode.__init__(self, arg)
+        self.type = self.arg.type.as_argument_type()
+        self.constant_result = self.arg.constant_result
+        self.is_temp = 1
+        if self.type.is_pyobject:
+            self.result_ctype = py_object_type
+
+    gil_message = "Creating temporary Python reference"
+
+    def analyse_types(self, env):
+        # The arg is always already analysed
+        return self
+
+    def coerce_to_boolean(self, env):
+        self.arg = self.arg.coerce_to_boolean(env)
+        if self.arg.is_simple():
+            return self.arg
+        self.type = self.arg.type
+        self.result_ctype = self.type
+        return self
+
+    def generate_result_code(self, code):
+        #self.arg.generate_evaluation_code(code) # Already done
+        # by generic generate_subexpr_evaluation_code!
+        code.putln("%s = %s;" % (
+            self.result(), self.arg.result_as(self.ctype())))
+        if self.use_managed_ref:
+            if self.type.is_pyobject:
+                code.put_incref(self.result(), self.ctype())
+            elif self.type.is_memoryviewslice:
+                code.put_incref_memoryviewslice(self.result(),
+                                                not self.in_nogil_context)
+
+class ProxyNode(CoercionNode):
+    """
+    A node that should not be replaced by transforms or other means,
+    and hence can be useful to wrap the argument to a clone node
+
+    MyNode    -> ProxyNode -> ArgNode
+    CloneNode -^
+    """
+
+    nogil_check = None
+
+    def __init__(self, arg):
+        super(ProxyNode, self).__init__(arg)
+        self.constant_result = arg.constant_result
+        self._proxy_type()
+
+    def analyse_types(self, env):
+        self.arg = self.arg.analyse_expressions(env)
+        self._proxy_type()
+        return self
+
+    def infer_type(self, env):
+        return self.arg.infer_type(env)
+
+    def _proxy_type(self):
+        if hasattr(self.arg, 'type'):
+            self.type = self.arg.type
+            self.result_ctype = self.arg.result_ctype
+        if hasattr(self.arg, 'entry'):
+            self.entry = self.arg.entry
+
+    def generate_result_code(self, code):
+        self.arg.generate_result_code(code)
+
+    def result(self):
+        return self.arg.result()
+
+    def is_simple(self):
+        return self.arg.is_simple()
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def generate_evaluation_code(self, code):
+        self.arg.generate_evaluation_code(code)
+
+    def generate_disposal_code(self, code):
+        self.arg.generate_disposal_code(code)
+
+    def free_temps(self, code):
+        self.arg.free_temps(code)
+
+class CloneNode(CoercionNode):
+    #  This node is employed when the result of another node needs
+    #  to be used multiple times. The argument node's result must
+    #  be in a temporary. This node "borrows" the result from the
+    #  argument node, and does not generate any evaluation or
+    #  disposal code for it. The original owner of the argument
+    #  node is responsible for doing those things.
+
+    subexprs = [] # Arg is not considered a subexpr
+    nogil_check = None
+
+    def __init__(self, arg):
+        CoercionNode.__init__(self, arg)
+        self.constant_result = arg.constant_result
+        if hasattr(arg, 'type'):
+            self.type = arg.type
+            self.result_ctype = arg.result_ctype
+        if hasattr(arg, 'entry'):
+            self.entry = arg.entry
+
+    def result(self):
+        return self.arg.result()
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def type_dependencies(self, env):
+        return self.arg.type_dependencies(env)
+
+    def infer_type(self, env):
+        return self.arg.infer_type(env)
+
+    def analyse_types(self, env):
+        self.type = self.arg.type
+        self.result_ctype = self.arg.result_ctype
+        self.is_temp = 1
+        if hasattr(self.arg, 'entry'):
+            self.entry = self.arg.entry
+        return self
+
+    def coerce_to(self, dest_type, env):
+        if self.arg.is_literal:
+            return self.arg.coerce_to(dest_type, env)
+        return super(CloneNode, self).coerce_to(dest_type, env)
+
+    def is_simple(self):
+        return True # result is always in a temp (or a name)
+
+    def generate_evaluation_code(self, code):
+        pass
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_disposal_code(self, code):
+        pass
+
+    def free_temps(self, code):
+        pass
+
+
+class CMethodSelfCloneNode(CloneNode):
+    # Special CloneNode for the self argument of builtin C methods
+    # that accepts subtypes of the builtin type.  This is safe only
+    # for 'final' subtypes, as subtypes of the declared type may
+    # override the C method.
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_builtin_type and self.type.subtype_of(dst_type):
+            return self
+        return CloneNode.coerce_to(self, dst_type, env)
+
+
+class ModuleRefNode(ExprNode):
+    # Simple returns the module object
+
+    type = py_object_type
+    is_temp = False
+    subexprs = []
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def calculate_result_code(self):
+        return Naming.module_cname
+
+    def generate_result_code(self, code):
+        pass
+
+class DocstringRefNode(ExprNode):
+    # Extracts the docstring of the body element
+
+    subexprs = ['body']
+    type = py_object_type
+    is_temp = True
+
+    def __init__(self, pos, body):
+        ExprNode.__init__(self, pos)
+        assert body.type.is_pyobject
+        self.body = body
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        code.putln('%s = __Pyx_GetAttr(%s, %s); %s' % (
+            self.result(), self.body.result(),
+            code.intern_identifier(StringEncoding.EncodedString("__doc__")),
+            code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.result())
+
+
+
+#------------------------------------------------------------------------------------
+#
+#  Runtime support code
+#
+#------------------------------------------------------------------------------------
+
+pyerr_occurred_withgil_utility_code= UtilityCode(
+proto = """
+static CYTHON_INLINE int __Pyx_ErrOccurredWithGIL(void); /* proto */
+""",
+impl = """
+static CYTHON_INLINE int __Pyx_ErrOccurredWithGIL(void) {
+  int err;
+  #ifdef WITH_THREAD
+  PyGILState_STATE _save = PyGILState_Ensure();
+  #endif
+  err = !!PyErr_Occurred();
+  #ifdef WITH_THREAD
+  PyGILState_Release(_save);
+  #endif
+  return err;
+}
+"""
+)
+
+#------------------------------------------------------------------------------------
+
+raise_unbound_local_error_utility_code = UtilityCode(
+proto = """
+static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname);
+""",
+impl = """
+static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) {
+    PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname);
+}
+""")
+
+raise_closure_name_error_utility_code = UtilityCode(
+proto = """
+static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname);
+""",
+impl = """
+static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname) {
+    PyErr_Format(PyExc_NameError, "free variable '%s' referenced before assignment in enclosing scope", varname);
+}
+""")
+
+# Don't inline the function, it should really never be called in production
+raise_unbound_memoryview_utility_code_nogil = UtilityCode(
+proto = """
+static void __Pyx_RaiseUnboundMemoryviewSliceNogil(const char *varname);
+""",
+impl = """
+static void __Pyx_RaiseUnboundMemoryviewSliceNogil(const char *varname) {
+    #ifdef WITH_THREAD
+    PyGILState_STATE gilstate = PyGILState_Ensure();
+    #endif
+    __Pyx_RaiseUnboundLocalError(varname);
+    #ifdef WITH_THREAD
+    PyGILState_Release(gilstate);
+    #endif
+}
+""",
+requires = [raise_unbound_local_error_utility_code])
+
+#------------------------------------------------------------------------------------
+
+raise_too_many_values_to_unpack = UtilityCode.load_cached("RaiseTooManyValuesToUnpack", "ObjectHandling.c")
+raise_need_more_values_to_unpack = UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c")
+tuple_unpacking_error_code = UtilityCode.load_cached("UnpackTupleError", "ObjectHandling.c")
diff --git a/contrib/tools/cython/Cython/Compiler/FlowControl.pxd b/contrib/tools/cython/Cython/Compiler/FlowControl.pxd
index 22eaa10581..c87370b819 100644
--- a/contrib/tools/cython/Cython/Compiler/FlowControl.pxd
+++ b/contrib/tools/cython/Cython/Compiler/FlowControl.pxd
@@ -1,111 +1,111 @@
-from __future__ import absolute_import 
- 
-cimport cython 
- 
-from .Visitor cimport CythonTransform, TreeVisitor 
- 
-cdef class ControlBlock: 
-     cdef public set children 
-     cdef public set parents 
-     cdef public set positions 
-     cdef public list stats 
-     cdef public dict gen 
-     cdef public set bounded 
- 
+from __future__ import absolute_import
+
+cimport cython
+
+from .Visitor cimport CythonTransform, TreeVisitor
+
+cdef class ControlBlock:
+     cdef public set children
+     cdef public set parents
+     cdef public set positions
+     cdef public list stats
+     cdef public dict gen
+     cdef public set bounded
+
      # Big integer bitsets
-     cdef public object i_input 
-     cdef public object i_output 
-     cdef public object i_gen 
-     cdef public object i_kill 
-     cdef public object i_state 
- 
-     cpdef bint empty(self) 
-     cpdef detach(self) 
-     cpdef add_child(self, block) 
- 
-cdef class ExitBlock(ControlBlock): 
-     cpdef bint empty(self) 
- 
-cdef class NameAssignment: 
-    cdef public bint is_arg 
-    cdef public bint is_deletion 
-    cdef public object lhs 
-    cdef public object rhs 
-    cdef public object entry 
-    cdef public object pos 
-    cdef public set refs 
-    cdef public object bit 
-    cdef public object inferred_type 
- 
-cdef class AssignmentList: 
-    cdef public object bit 
-    cdef public object mask 
-    cdef public list stats 
- 
-cdef class AssignmentCollector(TreeVisitor): 
-    cdef list assignments 
- 
-@cython.final 
-cdef class ControlFlow: 
-     cdef public set blocks 
-     cdef public set entries 
-     cdef public list loops 
-     cdef public list exceptions 
- 
-     cdef public ControlBlock entry_point 
-     cdef public ExitBlock exit_point 
-     cdef public ControlBlock block 
- 
-     cdef public dict assmts 
- 
-     cpdef newblock(self, ControlBlock parent=*) 
-     cpdef nextblock(self, ControlBlock parent=*) 
-     cpdef bint is_tracked(self, entry) 
-     cpdef bint is_statically_assigned(self, entry) 
-     cpdef mark_position(self, node) 
-     cpdef mark_assignment(self, lhs, rhs, entry) 
-     cpdef mark_argument(self, lhs, rhs, entry) 
-     cpdef mark_deletion(self, node, entry) 
-     cpdef mark_reference(self, node, entry) 
- 
-     @cython.locals(block=ControlBlock, parent=ControlBlock, unreachable=set) 
-     cpdef normalize(self) 
- 
-     @cython.locals(bit=object, assmts=AssignmentList, 
-                    block=ControlBlock) 
-     cpdef initialize(self) 
- 
-     @cython.locals(assmts=AssignmentList, assmt=NameAssignment) 
-     cpdef set map_one(self, istate, entry) 
- 
-     @cython.locals(block=ControlBlock, parent=ControlBlock) 
-     cdef reaching_definitions(self) 
- 
-cdef class Uninitialized: 
-     pass 
- 
-cdef class Unknown: 
-    pass 
- 
+     cdef public object i_input
+     cdef public object i_output
+     cdef public object i_gen
+     cdef public object i_kill
+     cdef public object i_state
+
+     cpdef bint empty(self)
+     cpdef detach(self)
+     cpdef add_child(self, block)
+
+cdef class ExitBlock(ControlBlock):
+     cpdef bint empty(self)
+
+cdef class NameAssignment:
+    cdef public bint is_arg
+    cdef public bint is_deletion
+    cdef public object lhs
+    cdef public object rhs
+    cdef public object entry
+    cdef public object pos
+    cdef public set refs
+    cdef public object bit
+    cdef public object inferred_type
+
+cdef class AssignmentList:
+    cdef public object bit
+    cdef public object mask
+    cdef public list stats
+
+cdef class AssignmentCollector(TreeVisitor):
+    cdef list assignments
+
+@cython.final
+cdef class ControlFlow:
+     cdef public set blocks
+     cdef public set entries
+     cdef public list loops
+     cdef public list exceptions
+
+     cdef public ControlBlock entry_point
+     cdef public ExitBlock exit_point
+     cdef public ControlBlock block
+
+     cdef public dict assmts
+
+     cpdef newblock(self, ControlBlock parent=*)
+     cpdef nextblock(self, ControlBlock parent=*)
+     cpdef bint is_tracked(self, entry)
+     cpdef bint is_statically_assigned(self, entry)
+     cpdef mark_position(self, node)
+     cpdef mark_assignment(self, lhs, rhs, entry)
+     cpdef mark_argument(self, lhs, rhs, entry)
+     cpdef mark_deletion(self, node, entry)
+     cpdef mark_reference(self, node, entry)
+
+     @cython.locals(block=ControlBlock, parent=ControlBlock, unreachable=set)
+     cpdef normalize(self)
+
+     @cython.locals(bit=object, assmts=AssignmentList,
+                    block=ControlBlock)
+     cpdef initialize(self)
+
+     @cython.locals(assmts=AssignmentList, assmt=NameAssignment)
+     cpdef set map_one(self, istate, entry)
+
+     @cython.locals(block=ControlBlock, parent=ControlBlock)
+     cdef reaching_definitions(self)
+
+cdef class Uninitialized:
+     pass
+
+cdef class Unknown:
+    pass
+
 
 cdef class MessageCollection:
     cdef set messages
 
 
-@cython.locals(dirty=bint, block=ControlBlock, parent=ControlBlock, 
-               assmt=NameAssignment) 
-cdef check_definitions(ControlFlow flow, dict compiler_directives) 
- 
-@cython.final 
-cdef class ControlFlowAnalysis(CythonTransform): 
-    cdef object gv_ctx 
+@cython.locals(dirty=bint, block=ControlBlock, parent=ControlBlock,
+               assmt=NameAssignment)
+cdef check_definitions(ControlFlow flow, dict compiler_directives)
+
+@cython.final
+cdef class ControlFlowAnalysis(CythonTransform):
+    cdef object gv_ctx
     cdef object constant_folder
-    cdef set reductions 
-    cdef list env_stack 
-    cdef list stack 
-    cdef object env 
-    cdef ControlFlow flow 
-    cdef bint in_inplace_assignment 
- 
-    cpdef mark_assignment(self, lhs, rhs=*) 
-    cpdef mark_position(self, node) 
+    cdef set reductions
+    cdef list env_stack
+    cdef list stack
+    cdef object env
+    cdef ControlFlow flow
+    cdef bint in_inplace_assignment
+
+    cpdef mark_assignment(self, lhs, rhs=*)
+    cpdef mark_position(self, node)
diff --git a/contrib/tools/cython/Cython/Compiler/FlowControl.py b/contrib/tools/cython/Cython/Compiler/FlowControl.py
index 9b54d154af..df04471f90 100644
--- a/contrib/tools/cython/Cython/Compiler/FlowControl.py
+++ b/contrib/tools/cython/Cython/Compiler/FlowControl.py
@@ -1,783 +1,783 @@
-from __future__ import absolute_import 
- 
-import cython 
-cython.declare(PyrexTypes=object, ExprNodes=object, Nodes=object, 
+from __future__ import absolute_import
+
+import cython
+cython.declare(PyrexTypes=object, ExprNodes=object, Nodes=object,
                Builtin=object, InternalError=object, error=object, warning=object,
-               py_object_type=object, unspecified_type=object, 
-               object_expr=object, fake_rhs_expr=object, TypedExprNode=object) 
- 
-from . import Builtin 
-from . import ExprNodes 
-from . import Nodes 
-from . import Options 
-from .PyrexTypes import py_object_type, unspecified_type 
-from . import PyrexTypes 
- 
-from .Visitor import TreeVisitor, CythonTransform 
-from .Errors import error, warning, InternalError 
+               py_object_type=object, unspecified_type=object,
+               object_expr=object, fake_rhs_expr=object, TypedExprNode=object)
+
+from . import Builtin
+from . import ExprNodes
+from . import Nodes
+from . import Options
+from .PyrexTypes import py_object_type, unspecified_type
+from . import PyrexTypes
+
+from .Visitor import TreeVisitor, CythonTransform
+from .Errors import error, warning, InternalError
 from .Optimize import ConstantFolding
- 
-
-class TypedExprNode(ExprNodes.ExprNode): 
-    # Used for declaring assignments of a specified type without a known entry. 
-    def __init__(self, type, may_be_none=None, pos=None): 
-        super(TypedExprNode, self).__init__(pos) 
-        self.type = type 
-        self._may_be_none = may_be_none 
- 
-    def may_be_none(self): 
-        return self._may_be_none != False 
- 
-object_expr = TypedExprNode(py_object_type, may_be_none=True) 
-# Fake rhs to silence "unused variable" warning 
-fake_rhs_expr = TypedExprNode(unspecified_type) 
- 
- 
-class ControlBlock(object): 
-    """Control flow graph node. Sequence of assignments and name references. 
- 
-       children  set of children nodes 
-       parents   set of parent nodes 
-       positions set of position markers 
- 
-       stats     list of block statements 
-       gen       dict of assignments generated by this block 
-       bounded   set  of entries that are definitely bounded in this block 
- 
-       Example: 
- 
-        a = 1 
-        b = a + c # 'c' is already bounded or exception here 
- 
-        stats = [Assignment(a), NameReference(a), NameReference(c), 
-                     Assignment(b)] 
-        gen = {Entry(a): Assignment(a), Entry(b): Assignment(b)} 
-        bounded = set([Entry(a), Entry(c)]) 
- 
-    """ 
- 
-    def __init__(self): 
-        self.children = set() 
-        self.parents = set() 
-        self.positions = set() 
- 
-        self.stats = [] 
-        self.gen = {} 
-        self.bounded = set() 
- 
-        self.i_input = 0 
-        self.i_output = 0 
-        self.i_gen = 0 
-        self.i_kill = 0 
-        self.i_state = 0 
- 
-    def empty(self): 
-        return (not self.stats and not self.positions) 
- 
-    def detach(self): 
-        """Detach block from parents and children.""" 
-        for child in self.children: 
-            child.parents.remove(self) 
-        for parent in self.parents: 
-            parent.children.remove(self) 
-        self.parents.clear() 
-        self.children.clear() 
- 
-    def add_child(self, block): 
-        self.children.add(block) 
-        block.parents.add(self) 
- 
- 
-class ExitBlock(ControlBlock): 
-    """Non-empty exit point block.""" 
- 
-    def empty(self): 
-        return False 
- 
- 
-class AssignmentList(object): 
-    def __init__(self): 
-        self.stats = [] 
- 
- 
-class ControlFlow(object): 
-    """Control-flow graph. 
- 
-       entry_point ControlBlock entry point for this graph 
-       exit_point  ControlBlock normal exit point 
-       block       ControlBlock current block 
-       blocks      set    children nodes 
-       entries     set    tracked entries 
-       loops       list   stack for loop descriptors 
-       exceptions  list   stack for exception descriptors 
-    """ 
- 
-    def __init__(self): 
-        self.blocks = set() 
-        self.entries = set() 
-        self.loops = [] 
-        self.exceptions = [] 
- 
-        self.entry_point = ControlBlock() 
-        self.exit_point = ExitBlock() 
-        self.blocks.add(self.exit_point) 
-        self.block = self.entry_point 
- 
-    def newblock(self, parent=None): 
-        """Create floating block linked to `parent` if given. 
- 
-           NOTE: Block is NOT added to self.blocks 
-        """ 
-        block = ControlBlock() 
-        self.blocks.add(block) 
-        if parent: 
-            parent.add_child(block) 
-        return block 
- 
-    def nextblock(self, parent=None): 
-        """Create block children block linked to current or `parent` if given. 
- 
-           NOTE: Block is added to self.blocks 
-        """ 
-        block = ControlBlock() 
-        self.blocks.add(block) 
-        if parent: 
-            parent.add_child(block) 
-        elif self.block: 
-            self.block.add_child(block) 
-        self.block = block 
-        return self.block 
- 
-    def is_tracked(self, entry): 
-        if entry.is_anonymous: 
-            return False 
-        return (entry.is_local or entry.is_pyclass_attr or entry.is_arg or 
-                entry.from_closure or entry.in_closure or 
-                entry.error_on_uninitialized) 
- 
-    def is_statically_assigned(self, entry): 
-        if (entry.is_local and entry.is_variable and 
-                (entry.type.is_struct_or_union or 
-                 entry.type.is_complex or 
-                 entry.type.is_array or 
-                 entry.type.is_cpp_class)): 
-            # stack allocated structured variable => never uninitialised 
-            return True 
-        return False 
- 
-    def mark_position(self, node): 
-        """Mark position, will be used to draw graph nodes.""" 
-        if self.block: 
-            self.block.positions.add(node.pos[:2]) 
- 
-    def mark_assignment(self, lhs, rhs, entry): 
-        if self.block and self.is_tracked(entry): 
-            assignment = NameAssignment(lhs, rhs, entry) 
-            self.block.stats.append(assignment) 
-            self.block.gen[entry] = assignment 
-            self.entries.add(entry) 
- 
-    def mark_argument(self, lhs, rhs, entry): 
-        if self.block and self.is_tracked(entry): 
-            assignment = Argument(lhs, rhs, entry) 
-            self.block.stats.append(assignment) 
-            self.block.gen[entry] = assignment 
-            self.entries.add(entry) 
- 
-    def mark_deletion(self, node, entry): 
-        if self.block and self.is_tracked(entry): 
-            assignment = NameDeletion(node, entry) 
-            self.block.stats.append(assignment) 
-            self.block.gen[entry] = Uninitialized 
-            self.entries.add(entry) 
- 
-    def mark_reference(self, node, entry): 
-        if self.block and self.is_tracked(entry): 
-            self.block.stats.append(NameReference(node, entry)) 
-            ## XXX: We don't track expression evaluation order so we can't use 
-            ## XXX: successful reference as initialization sign. 
-            ## # Local variable is definitely bound after this reference 
-            ## if not node.allow_null: 
-            ##     self.block.bounded.add(entry) 
-            self.entries.add(entry) 
- 
-    def normalize(self): 
-        """Delete unreachable and orphan blocks.""" 
-        queue = set([self.entry_point]) 
-        visited = set() 
-        while queue: 
-            root = queue.pop() 
-            visited.add(root) 
-            for child in root.children: 
-                if child not in visited: 
-                    queue.add(child) 
-        unreachable = self.blocks - visited 
-        for block in unreachable: 
-            block.detach() 
-        visited.remove(self.entry_point) 
-        for block in visited: 
-            if block.empty(): 
-                for parent in block.parents: # Re-parent 
-                    for child in block.children: 
-                        parent.add_child(child) 
-                block.detach() 
-                unreachable.add(block) 
-        self.blocks -= unreachable 
- 
-    def initialize(self): 
-        """Set initial state, map assignments to bits.""" 
-        self.assmts = {} 
- 
-        bit = 1 
-        for entry in self.entries: 
-            assmts = AssignmentList() 
-            assmts.mask = assmts.bit = bit 
-            self.assmts[entry] = assmts 
-            bit <<= 1 
- 
-        for block in self.blocks: 
-            for stat in block.stats: 
-                if isinstance(stat, NameAssignment): 
-                    stat.bit = bit 
-                    assmts = self.assmts[stat.entry] 
-                    assmts.stats.append(stat) 
-                    assmts.mask |= bit 
-                    bit <<= 1 
- 
-        for block in self.blocks: 
-            for entry, stat in block.gen.items(): 
-                assmts = self.assmts[entry] 
-                if stat is Uninitialized: 
-                    block.i_gen |= assmts.bit 
-                else: 
-                    block.i_gen |= stat.bit 
-                block.i_kill |= assmts.mask 
-            block.i_output = block.i_gen 
-            for entry in block.bounded: 
-                block.i_kill |= self.assmts[entry].bit 
- 
+
+
+class TypedExprNode(ExprNodes.ExprNode):
+    # Used for declaring assignments of a specified type without a known entry.
+    def __init__(self, type, may_be_none=None, pos=None):
+        super(TypedExprNode, self).__init__(pos)
+        self.type = type
+        self._may_be_none = may_be_none
+
+    def may_be_none(self):
+        return self._may_be_none != False
+
+object_expr = TypedExprNode(py_object_type, may_be_none=True)
+# Fake rhs to silence "unused variable" warning
+fake_rhs_expr = TypedExprNode(unspecified_type)
+
+
+class ControlBlock(object):
+    """Control flow graph node. Sequence of assignments and name references.
+
+       children  set of children nodes
+       parents   set of parent nodes
+       positions set of position markers
+
+       stats     list of block statements
+       gen       dict of assignments generated by this block
+       bounded   set  of entries that are definitely bounded in this block
+
+       Example:
+
+        a = 1
+        b = a + c # 'c' is already bounded or exception here
+
+        stats = [Assignment(a), NameReference(a), NameReference(c),
+                     Assignment(b)]
+        gen = {Entry(a): Assignment(a), Entry(b): Assignment(b)}
+        bounded = set([Entry(a), Entry(c)])
+
+    """
+
+    def __init__(self):
+        self.children = set()
+        self.parents = set()
+        self.positions = set()
+
+        self.stats = []
+        self.gen = {}
+        self.bounded = set()
+
+        self.i_input = 0
+        self.i_output = 0
+        self.i_gen = 0
+        self.i_kill = 0
+        self.i_state = 0
+
+    def empty(self):
+        return (not self.stats and not self.positions)
+
+    def detach(self):
+        """Detach block from parents and children."""
+        for child in self.children:
+            child.parents.remove(self)
+        for parent in self.parents:
+            parent.children.remove(self)
+        self.parents.clear()
+        self.children.clear()
+
+    def add_child(self, block):
+        self.children.add(block)
+        block.parents.add(self)
+
+
+class ExitBlock(ControlBlock):
+    """Non-empty exit point block."""
+
+    def empty(self):
+        return False
+
+
+class AssignmentList(object):
+    def __init__(self):
+        self.stats = []
+
+
+class ControlFlow(object):
+    """Control-flow graph.
+
+       entry_point ControlBlock entry point for this graph
+       exit_point  ControlBlock normal exit point
+       block       ControlBlock current block
+       blocks      set    children nodes
+       entries     set    tracked entries
+       loops       list   stack for loop descriptors
+       exceptions  list   stack for exception descriptors
+    """
+
+    def __init__(self):
+        self.blocks = set()
+        self.entries = set()
+        self.loops = []
+        self.exceptions = []
+
+        self.entry_point = ControlBlock()
+        self.exit_point = ExitBlock()
+        self.blocks.add(self.exit_point)
+        self.block = self.entry_point
+
+    def newblock(self, parent=None):
+        """Create floating block linked to `parent` if given.
+
+           NOTE: Block is NOT added to self.blocks
+        """
+        block = ControlBlock()
+        self.blocks.add(block)
+        if parent:
+            parent.add_child(block)
+        return block
+
+    def nextblock(self, parent=None):
+        """Create block children block linked to current or `parent` if given.
+
+           NOTE: Block is added to self.blocks
+        """
+        block = ControlBlock()
+        self.blocks.add(block)
+        if parent:
+            parent.add_child(block)
+        elif self.block:
+            self.block.add_child(block)
+        self.block = block
+        return self.block
+
+    def is_tracked(self, entry):
+        if entry.is_anonymous:
+            return False
+        return (entry.is_local or entry.is_pyclass_attr or entry.is_arg or
+                entry.from_closure or entry.in_closure or
+                entry.error_on_uninitialized)
+
+    def is_statically_assigned(self, entry):
+        if (entry.is_local and entry.is_variable and
+                (entry.type.is_struct_or_union or
+                 entry.type.is_complex or
+                 entry.type.is_array or
+                 entry.type.is_cpp_class)):
+            # stack allocated structured variable => never uninitialised
+            return True
+        return False
+
+    def mark_position(self, node):
+        """Mark position, will be used to draw graph nodes."""
+        if self.block:
+            self.block.positions.add(node.pos[:2])
+
+    def mark_assignment(self, lhs, rhs, entry):
+        if self.block and self.is_tracked(entry):
+            assignment = NameAssignment(lhs, rhs, entry)
+            self.block.stats.append(assignment)
+            self.block.gen[entry] = assignment
+            self.entries.add(entry)
+
+    def mark_argument(self, lhs, rhs, entry):
+        if self.block and self.is_tracked(entry):
+            assignment = Argument(lhs, rhs, entry)
+            self.block.stats.append(assignment)
+            self.block.gen[entry] = assignment
+            self.entries.add(entry)
+
+    def mark_deletion(self, node, entry):
+        if self.block and self.is_tracked(entry):
+            assignment = NameDeletion(node, entry)
+            self.block.stats.append(assignment)
+            self.block.gen[entry] = Uninitialized
+            self.entries.add(entry)
+
+    def mark_reference(self, node, entry):
+        if self.block and self.is_tracked(entry):
+            self.block.stats.append(NameReference(node, entry))
+            ## XXX: We don't track expression evaluation order so we can't use
+            ## XXX: successful reference as initialization sign.
+            ## # Local variable is definitely bound after this reference
+            ## if not node.allow_null:
+            ##     self.block.bounded.add(entry)
+            self.entries.add(entry)
+
+    def normalize(self):
+        """Delete unreachable and orphan blocks."""
+        queue = set([self.entry_point])
+        visited = set()
+        while queue:
+            root = queue.pop()
+            visited.add(root)
+            for child in root.children:
+                if child not in visited:
+                    queue.add(child)
+        unreachable = self.blocks - visited
+        for block in unreachable:
+            block.detach()
+        visited.remove(self.entry_point)
+        for block in visited:
+            if block.empty():
+                for parent in block.parents: # Re-parent
+                    for child in block.children:
+                        parent.add_child(child)
+                block.detach()
+                unreachable.add(block)
+        self.blocks -= unreachable
+
+    def initialize(self):
+        """Set initial state, map assignments to bits."""
+        self.assmts = {}
+
+        bit = 1
+        for entry in self.entries:
+            assmts = AssignmentList()
+            assmts.mask = assmts.bit = bit
+            self.assmts[entry] = assmts
+            bit <<= 1
+
+        for block in self.blocks:
+            for stat in block.stats:
+                if isinstance(stat, NameAssignment):
+                    stat.bit = bit
+                    assmts = self.assmts[stat.entry]
+                    assmts.stats.append(stat)
+                    assmts.mask |= bit
+                    bit <<= 1
+
+        for block in self.blocks:
+            for entry, stat in block.gen.items():
+                assmts = self.assmts[entry]
+                if stat is Uninitialized:
+                    block.i_gen |= assmts.bit
+                else:
+                    block.i_gen |= stat.bit
+                block.i_kill |= assmts.mask
+            block.i_output = block.i_gen
+            for entry in block.bounded:
+                block.i_kill |= self.assmts[entry].bit
+
         for assmts in self.assmts.values():
-            self.entry_point.i_gen |= assmts.bit 
-        self.entry_point.i_output = self.entry_point.i_gen 
- 
-    def map_one(self, istate, entry): 
-        ret = set() 
-        assmts = self.assmts[entry] 
-        if istate & assmts.bit: 
-            if self.is_statically_assigned(entry): 
-                ret.add(StaticAssignment(entry)) 
-            elif entry.from_closure: 
-                ret.add(Unknown) 
-            else: 
-                ret.add(Uninitialized) 
-        for assmt in assmts.stats: 
-            if istate & assmt.bit: 
-                ret.add(assmt) 
-        return ret 
- 
-    def reaching_definitions(self): 
-        """Per-block reaching definitions analysis.""" 
-        dirty = True 
-        while dirty: 
-            dirty = False 
-            for block in self.blocks: 
-                i_input = 0 
-                for parent in block.parents: 
-                    i_input |= parent.i_output 
-                i_output = (i_input & ~block.i_kill) | block.i_gen 
-                if i_output != block.i_output: 
-                    dirty = True 
-                block.i_input = i_input 
-                block.i_output = i_output 
- 
- 
-class LoopDescr(object): 
-    def __init__(self, next_block, loop_block): 
-        self.next_block = next_block 
-        self.loop_block = loop_block 
-        self.exceptions = [] 
- 
- 
-class ExceptionDescr(object): 
-    """Exception handling helper. 
- 
-    entry_point   ControlBlock Exception handling entry point 
-    finally_enter ControlBlock Normal finally clause entry point 
-    finally_exit  ControlBlock Normal finally clause exit point 
-    """ 
- 
-    def __init__(self, entry_point, finally_enter=None, finally_exit=None): 
-        self.entry_point = entry_point 
-        self.finally_enter = finally_enter 
-        self.finally_exit = finally_exit 
- 
- 
-class NameAssignment(object): 
-    def __init__(self, lhs, rhs, entry): 
-        if lhs.cf_state is None: 
-            lhs.cf_state = set() 
-        self.lhs = lhs 
-        self.rhs = rhs 
-        self.entry = entry 
-        self.pos = lhs.pos 
-        self.refs = set() 
-        self.is_arg = False 
-        self.is_deletion = False 
-        self.inferred_type = None 
- 
-    def __repr__(self): 
-        return '%s(entry=%r)' % (self.__class__.__name__, self.entry) 
- 
-    def infer_type(self): 
-        self.inferred_type = self.rhs.infer_type(self.entry.scope) 
-        return self.inferred_type 
- 
-    def type_dependencies(self): 
-        return self.rhs.type_dependencies(self.entry.scope) 
- 
-    @property 
-    def type(self): 
-        if not self.entry.type.is_unspecified: 
-            return self.entry.type 
-        return self.inferred_type 
- 
- 
-class StaticAssignment(NameAssignment): 
-    """Initialised at declaration time, e.g. stack allocation.""" 
-    def __init__(self, entry): 
-        if not entry.type.is_pyobject: 
-            may_be_none = False 
-        else: 
-            may_be_none = None  # unknown 
-        lhs = TypedExprNode( 
-            entry.type, may_be_none=may_be_none, pos=entry.pos) 
-        super(StaticAssignment, self).__init__(lhs, lhs, entry) 
- 
-    def infer_type(self): 
-        return self.entry.type 
- 
-    def type_dependencies(self): 
-        return () 
- 
- 
-class Argument(NameAssignment): 
-    def __init__(self, lhs, rhs, entry): 
-        NameAssignment.__init__(self, lhs, rhs, entry) 
-        self.is_arg = True 
- 
- 
-class NameDeletion(NameAssignment): 
-    def __init__(self, lhs, entry): 
-        NameAssignment.__init__(self, lhs, lhs, entry) 
-        self.is_deletion = True 
- 
-    def infer_type(self): 
-        inferred_type = self.rhs.infer_type(self.entry.scope) 
-        if (not inferred_type.is_pyobject and 
-            inferred_type.can_coerce_to_pyobject(self.entry.scope)): 
-            return py_object_type 
-        self.inferred_type = inferred_type 
-        return inferred_type 
- 
- 
-class Uninitialized(object): 
-    """Definitely not initialised yet.""" 
- 
- 
-class Unknown(object): 
-    """Coming from outer closure, might be initialised or not.""" 
- 
- 
-class NameReference(object): 
-    def __init__(self, node, entry): 
-        if node.cf_state is None: 
-            node.cf_state = set() 
-        self.node = node 
-        self.entry = entry 
-        self.pos = node.pos 
- 
-    def __repr__(self): 
-        return '%s(entry=%r)' % (self.__class__.__name__, self.entry) 
- 
- 
-class ControlFlowState(list): 
-    # Keeps track of Node's entry assignments 
-    # 
-    # cf_is_null        [boolean] It is uninitialized 
-    # cf_maybe_null     [boolean] May be uninitialized 
-    # is_single         [boolean] Has only one assignment at this point 
- 
-    cf_maybe_null = False 
-    cf_is_null = False 
-    is_single = False 
- 
-    def __init__(self, state): 
-        if Uninitialized in state: 
-            state.discard(Uninitialized) 
-            self.cf_maybe_null = True 
-            if not state: 
-                self.cf_is_null = True 
-        elif Unknown in state: 
-            state.discard(Unknown) 
-            self.cf_maybe_null = True 
-        else: 
-            if len(state) == 1: 
-                self.is_single = True 
-        # XXX: Remove fake_rhs_expr 
-        super(ControlFlowState, self).__init__( 
-            [i for i in state if i.rhs is not fake_rhs_expr]) 
- 
-    def one(self): 
-        return self[0] 
- 
- 
-class GVContext(object): 
-    """Graphviz subgraph object.""" 
- 
-    def __init__(self): 
-        self.blockids = {} 
-        self.nextid = 0 
-        self.children = [] 
-        self.sources = {} 
- 
-    def add(self, child): 
-        self.children.append(child) 
- 
-    def nodeid(self, block): 
-        if block not in self.blockids: 
-            self.blockids[block] = 'block%d' % self.nextid 
-            self.nextid += 1 
-        return self.blockids[block] 
- 
-    def extract_sources(self, block): 
-        if not block.positions: 
-            return '' 
-        start = min(block.positions) 
-        stop = max(block.positions) 
-        srcdescr = start[0] 
-        if not srcdescr in self.sources: 
-            self.sources[srcdescr] = list(srcdescr.get_lines()) 
-        lines = self.sources[srcdescr] 
-        return '\\n'.join([l.strip() for l in lines[start[1] - 1:stop[1]]]) 
- 
-    def render(self, fp, name, annotate_defs=False): 
-        """Render graphviz dot graph""" 
-        fp.write('digraph %s {\n' % name) 
-        fp.write(' node [shape=box];\n') 
-        for child in self.children: 
-            child.render(fp, self, annotate_defs) 
-        fp.write('}\n') 
- 
-    def escape(self, text): 
-        return text.replace('"', '\\"').replace('\n', '\\n') 
- 
- 
-class GV(object): 
-    """Graphviz DOT renderer.""" 
- 
-    def __init__(self, name, flow): 
-        self.name = name 
-        self.flow = flow 
- 
-    def render(self, fp, ctx, annotate_defs=False): 
-        fp.write(' subgraph %s {\n' % self.name) 
-        for block in self.flow.blocks: 
-            label = ctx.extract_sources(block) 
-            if annotate_defs: 
-                for stat in block.stats: 
-                    if isinstance(stat, NameAssignment): 
+            self.entry_point.i_gen |= assmts.bit
+        self.entry_point.i_output = self.entry_point.i_gen
+
+    def map_one(self, istate, entry):
+        ret = set()
+        assmts = self.assmts[entry]
+        if istate & assmts.bit:
+            if self.is_statically_assigned(entry):
+                ret.add(StaticAssignment(entry))
+            elif entry.from_closure:
+                ret.add(Unknown)
+            else:
+                ret.add(Uninitialized)
+        for assmt in assmts.stats:
+            if istate & assmt.bit:
+                ret.add(assmt)
+        return ret
+
+    def reaching_definitions(self):
+        """Per-block reaching definitions analysis."""
+        dirty = True
+        while dirty:
+            dirty = False
+            for block in self.blocks:
+                i_input = 0
+                for parent in block.parents:
+                    i_input |= parent.i_output
+                i_output = (i_input & ~block.i_kill) | block.i_gen
+                if i_output != block.i_output:
+                    dirty = True
+                block.i_input = i_input
+                block.i_output = i_output
+
+
+class LoopDescr(object):
+    def __init__(self, next_block, loop_block):
+        self.next_block = next_block
+        self.loop_block = loop_block
+        self.exceptions = []
+
+
+class ExceptionDescr(object):
+    """Exception handling helper.
+
+    entry_point   ControlBlock Exception handling entry point
+    finally_enter ControlBlock Normal finally clause entry point
+    finally_exit  ControlBlock Normal finally clause exit point
+    """
+
+    def __init__(self, entry_point, finally_enter=None, finally_exit=None):
+        self.entry_point = entry_point
+        self.finally_enter = finally_enter
+        self.finally_exit = finally_exit
+
+
+class NameAssignment(object):
+    def __init__(self, lhs, rhs, entry):
+        if lhs.cf_state is None:
+            lhs.cf_state = set()
+        self.lhs = lhs
+        self.rhs = rhs
+        self.entry = entry
+        self.pos = lhs.pos
+        self.refs = set()
+        self.is_arg = False
+        self.is_deletion = False
+        self.inferred_type = None
+
+    def __repr__(self):
+        return '%s(entry=%r)' % (self.__class__.__name__, self.entry)
+
+    def infer_type(self):
+        self.inferred_type = self.rhs.infer_type(self.entry.scope)
+        return self.inferred_type
+
+    def type_dependencies(self):
+        return self.rhs.type_dependencies(self.entry.scope)
+
+    @property
+    def type(self):
+        if not self.entry.type.is_unspecified:
+            return self.entry.type
+        return self.inferred_type
+
+
+class StaticAssignment(NameAssignment):
+    """Initialised at declaration time, e.g. stack allocation."""
+    def __init__(self, entry):
+        if not entry.type.is_pyobject:
+            may_be_none = False
+        else:
+            may_be_none = None  # unknown
+        lhs = TypedExprNode(
+            entry.type, may_be_none=may_be_none, pos=entry.pos)
+        super(StaticAssignment, self).__init__(lhs, lhs, entry)
+
+    def infer_type(self):
+        return self.entry.type
+
+    def type_dependencies(self):
+        return ()
+
+
+class Argument(NameAssignment):
+    def __init__(self, lhs, rhs, entry):
+        NameAssignment.__init__(self, lhs, rhs, entry)
+        self.is_arg = True
+
+
+class NameDeletion(NameAssignment):
+    def __init__(self, lhs, entry):
+        NameAssignment.__init__(self, lhs, lhs, entry)
+        self.is_deletion = True
+
+    def infer_type(self):
+        inferred_type = self.rhs.infer_type(self.entry.scope)
+        if (not inferred_type.is_pyobject and
+            inferred_type.can_coerce_to_pyobject(self.entry.scope)):
+            return py_object_type
+        self.inferred_type = inferred_type
+        return inferred_type
+
+
+class Uninitialized(object):
+    """Definitely not initialised yet."""
+
+
+class Unknown(object):
+    """Coming from outer closure, might be initialised or not."""
+
+
+class NameReference(object):
+    def __init__(self, node, entry):
+        if node.cf_state is None:
+            node.cf_state = set()
+        self.node = node
+        self.entry = entry
+        self.pos = node.pos
+
+    def __repr__(self):
+        return '%s(entry=%r)' % (self.__class__.__name__, self.entry)
+
+
+class ControlFlowState(list):
+    # Keeps track of Node's entry assignments
+    #
+    # cf_is_null        [boolean] It is uninitialized
+    # cf_maybe_null     [boolean] May be uninitialized
+    # is_single         [boolean] Has only one assignment at this point
+
+    cf_maybe_null = False
+    cf_is_null = False
+    is_single = False
+
+    def __init__(self, state):
+        if Uninitialized in state:
+            state.discard(Uninitialized)
+            self.cf_maybe_null = True
+            if not state:
+                self.cf_is_null = True
+        elif Unknown in state:
+            state.discard(Unknown)
+            self.cf_maybe_null = True
+        else:
+            if len(state) == 1:
+                self.is_single = True
+        # XXX: Remove fake_rhs_expr
+        super(ControlFlowState, self).__init__(
+            [i for i in state if i.rhs is not fake_rhs_expr])
+
+    def one(self):
+        return self[0]
+
+
+class GVContext(object):
+    """Graphviz subgraph object."""
+
+    def __init__(self):
+        self.blockids = {}
+        self.nextid = 0
+        self.children = []
+        self.sources = {}
+
+    def add(self, child):
+        self.children.append(child)
+
+    def nodeid(self, block):
+        if block not in self.blockids:
+            self.blockids[block] = 'block%d' % self.nextid
+            self.nextid += 1
+        return self.blockids[block]
+
+    def extract_sources(self, block):
+        if not block.positions:
+            return ''
+        start = min(block.positions)
+        stop = max(block.positions)
+        srcdescr = start[0]
+        if not srcdescr in self.sources:
+            self.sources[srcdescr] = list(srcdescr.get_lines())
+        lines = self.sources[srcdescr]
+        return '\\n'.join([l.strip() for l in lines[start[1] - 1:stop[1]]])
+
+    def render(self, fp, name, annotate_defs=False):
+        """Render graphviz dot graph"""
+        fp.write('digraph %s {\n' % name)
+        fp.write(' node [shape=box];\n')
+        for child in self.children:
+            child.render(fp, self, annotate_defs)
+        fp.write('}\n')
+
+    def escape(self, text):
+        return text.replace('"', '\\"').replace('\n', '\\n')
+
+
+class GV(object):
+    """Graphviz DOT renderer."""
+
+    def __init__(self, name, flow):
+        self.name = name
+        self.flow = flow
+
+    def render(self, fp, ctx, annotate_defs=False):
+        fp.write(' subgraph %s {\n' % self.name)
+        for block in self.flow.blocks:
+            label = ctx.extract_sources(block)
+            if annotate_defs:
+                for stat in block.stats:
+                    if isinstance(stat, NameAssignment):
                         label += '\n %s [%s %s]' % (
                             stat.entry.name, 'deletion' if stat.is_deletion else 'definition', stat.pos[1])
-                    elif isinstance(stat, NameReference): 
-                        if stat.entry: 
+                    elif isinstance(stat, NameReference):
+                        if stat.entry:
                             label += '\n %s [reference %s]' % (stat.entry.name, stat.pos[1])
-            if not label: 
-                label = 'empty' 
-            pid = ctx.nodeid(block) 
-            fp.write('  %s [label="%s"];\n' % (pid, ctx.escape(label))) 
-        for block in self.flow.blocks: 
-            pid = ctx.nodeid(block) 
-            for child in block.children: 
-                fp.write('  %s -> %s;\n' % (pid, ctx.nodeid(child))) 
-        fp.write(' }\n') 
- 
- 
-class MessageCollection(object): 
-    """Collect error/warnings messages first then sort""" 
-    def __init__(self): 
+            if not label:
+                label = 'empty'
+            pid = ctx.nodeid(block)
+            fp.write('  %s [label="%s"];\n' % (pid, ctx.escape(label)))
+        for block in self.flow.blocks:
+            pid = ctx.nodeid(block)
+            for child in block.children:
+                fp.write('  %s -> %s;\n' % (pid, ctx.nodeid(child)))
+        fp.write(' }\n')
+
+
+class MessageCollection(object):
+    """Collect error/warnings messages first then sort"""
+    def __init__(self):
         self.messages = set()
- 
-    def error(self, pos, message): 
+
+    def error(self, pos, message):
         self.messages.add((pos, True, message))
- 
-    def warning(self, pos, message): 
+
+    def warning(self, pos, message):
         self.messages.add((pos, False, message))
- 
-    def report(self): 
+
+    def report(self):
         for pos, is_error, message in sorted(self.messages):
-            if is_error: 
-                error(pos, message) 
-            else: 
-                warning(pos, message, 2) 
- 
- 
-def check_definitions(flow, compiler_directives): 
-    flow.initialize() 
-    flow.reaching_definitions() 
- 
-    # Track down state 
-    assignments = set() 
-    # Node to entry map 
-    references = {} 
-    assmt_nodes = set() 
- 
-    for block in flow.blocks: 
-        i_state = block.i_input 
-        for stat in block.stats: 
-            i_assmts = flow.assmts[stat.entry] 
-            state = flow.map_one(i_state, stat.entry) 
-            if isinstance(stat, NameAssignment): 
-                stat.lhs.cf_state.update(state) 
-                assmt_nodes.add(stat.lhs) 
-                i_state = i_state & ~i_assmts.mask 
-                if stat.is_deletion: 
-                    i_state |= i_assmts.bit 
-                else: 
-                    i_state |= stat.bit 
-                assignments.add(stat) 
-                if stat.rhs is not fake_rhs_expr: 
-                    stat.entry.cf_assignments.append(stat) 
-            elif isinstance(stat, NameReference): 
-                references[stat.node] = stat.entry 
-                stat.entry.cf_references.append(stat) 
-                stat.node.cf_state.update(state) 
-                ## if not stat.node.allow_null: 
-                ##     i_state &= ~i_assmts.bit 
-                ## # after successful read, the state is known to be initialised 
-                state.discard(Uninitialized) 
-                state.discard(Unknown) 
-                for assmt in state: 
-                    assmt.refs.add(stat) 
- 
-    # Check variable usage 
-    warn_maybe_uninitialized = compiler_directives['warn.maybe_uninitialized'] 
-    warn_unused_result = compiler_directives['warn.unused_result'] 
-    warn_unused = compiler_directives['warn.unused'] 
-    warn_unused_arg = compiler_directives['warn.unused_arg'] 
- 
-    messages = MessageCollection() 
- 
-    # assignment hints 
-    for node in assmt_nodes: 
-        if Uninitialized in node.cf_state: 
-            node.cf_maybe_null = True 
-            if len(node.cf_state) == 1: 
-                node.cf_is_null = True 
-            else: 
-                node.cf_is_null = False 
-        elif Unknown in node.cf_state: 
-            node.cf_maybe_null = True 
-        else: 
-            node.cf_is_null = False 
-            node.cf_maybe_null = False 
- 
-    # Find uninitialized references and cf-hints 
+            if is_error:
+                error(pos, message)
+            else:
+                warning(pos, message, 2)
+
+
+def check_definitions(flow, compiler_directives):
+    flow.initialize()
+    flow.reaching_definitions()
+
+    # Track down state
+    assignments = set()
+    # Node to entry map
+    references = {}
+    assmt_nodes = set()
+
+    for block in flow.blocks:
+        i_state = block.i_input
+        for stat in block.stats:
+            i_assmts = flow.assmts[stat.entry]
+            state = flow.map_one(i_state, stat.entry)
+            if isinstance(stat, NameAssignment):
+                stat.lhs.cf_state.update(state)
+                assmt_nodes.add(stat.lhs)
+                i_state = i_state & ~i_assmts.mask
+                if stat.is_deletion:
+                    i_state |= i_assmts.bit
+                else:
+                    i_state |= stat.bit
+                assignments.add(stat)
+                if stat.rhs is not fake_rhs_expr:
+                    stat.entry.cf_assignments.append(stat)
+            elif isinstance(stat, NameReference):
+                references[stat.node] = stat.entry
+                stat.entry.cf_references.append(stat)
+                stat.node.cf_state.update(state)
+                ## if not stat.node.allow_null:
+                ##     i_state &= ~i_assmts.bit
+                ## # after successful read, the state is known to be initialised
+                state.discard(Uninitialized)
+                state.discard(Unknown)
+                for assmt in state:
+                    assmt.refs.add(stat)
+
+    # Check variable usage
+    warn_maybe_uninitialized = compiler_directives['warn.maybe_uninitialized']
+    warn_unused_result = compiler_directives['warn.unused_result']
+    warn_unused = compiler_directives['warn.unused']
+    warn_unused_arg = compiler_directives['warn.unused_arg']
+
+    messages = MessageCollection()
+
+    # assignment hints
+    for node in assmt_nodes:
+        if Uninitialized in node.cf_state:
+            node.cf_maybe_null = True
+            if len(node.cf_state) == 1:
+                node.cf_is_null = True
+            else:
+                node.cf_is_null = False
+        elif Unknown in node.cf_state:
+            node.cf_maybe_null = True
+        else:
+            node.cf_is_null = False
+            node.cf_maybe_null = False
+
+    # Find uninitialized references and cf-hints
     for node, entry in references.items():
-        if Uninitialized in node.cf_state: 
-            node.cf_maybe_null = True 
-            if not entry.from_closure and len(node.cf_state) == 1: 
-                node.cf_is_null = True 
-            if (node.allow_null or entry.from_closure 
+        if Uninitialized in node.cf_state:
+            node.cf_maybe_null = True
+            if not entry.from_closure and len(node.cf_state) == 1:
+                node.cf_is_null = True
+            if (node.allow_null or entry.from_closure
                     or entry.is_pyclass_attr or entry.type.is_error):
                 pass  # Can be uninitialized here
-            elif node.cf_is_null: 
-                if entry.error_on_uninitialized or ( 
-                        Options.error_on_uninitialized and ( 
-                        entry.type.is_pyobject or entry.type.is_unspecified)): 
-                    messages.error( 
-                        node.pos, 
-                        "local variable '%s' referenced before assignment" 
-                        % entry.name) 
-                else: 
-                    messages.warning( 
-                        node.pos, 
-                        "local variable '%s' referenced before assignment" 
-                        % entry.name) 
-            elif warn_maybe_uninitialized: 
-                messages.warning( 
-                    node.pos, 
-                    "local variable '%s' might be referenced before assignment" 
-                    % entry.name) 
-        elif Unknown in node.cf_state: 
-            # TODO: better cross-closure analysis to know when inner functions 
-            #       are being called before a variable is being set, and when 
-            #       a variable is known to be set before even defining the 
-            #       inner function, etc. 
-            node.cf_maybe_null = True 
-        else: 
-            node.cf_is_null = False 
-            node.cf_maybe_null = False 
- 
-    # Unused result 
-    for assmt in assignments: 
-        if (not assmt.refs and not assmt.entry.is_pyclass_attr 
-            and not assmt.entry.in_closure): 
-            if assmt.entry.cf_references and warn_unused_result: 
-                if assmt.is_arg: 
-                    messages.warning(assmt.pos, "Unused argument value '%s'" % 
-                                     assmt.entry.name) 
-                else: 
-                    messages.warning(assmt.pos, "Unused result in '%s'" % 
-                                     assmt.entry.name) 
-            assmt.lhs.cf_used = False 
- 
-    # Unused entries 
-    for entry in flow.entries: 
-        if (not entry.cf_references 
-                and not entry.is_pyclass_attr): 
+            elif node.cf_is_null:
+                if entry.error_on_uninitialized or (
+                        Options.error_on_uninitialized and (
+                        entry.type.is_pyobject or entry.type.is_unspecified)):
+                    messages.error(
+                        node.pos,
+                        "local variable '%s' referenced before assignment"
+                        % entry.name)
+                else:
+                    messages.warning(
+                        node.pos,
+                        "local variable '%s' referenced before assignment"
+                        % entry.name)
+            elif warn_maybe_uninitialized:
+                messages.warning(
+                    node.pos,
+                    "local variable '%s' might be referenced before assignment"
+                    % entry.name)
+        elif Unknown in node.cf_state:
+            # TODO: better cross-closure analysis to know when inner functions
+            #       are being called before a variable is being set, and when
+            #       a variable is known to be set before even defining the
+            #       inner function, etc.
+            node.cf_maybe_null = True
+        else:
+            node.cf_is_null = False
+            node.cf_maybe_null = False
+
+    # Unused result
+    for assmt in assignments:
+        if (not assmt.refs and not assmt.entry.is_pyclass_attr
+            and not assmt.entry.in_closure):
+            if assmt.entry.cf_references and warn_unused_result:
+                if assmt.is_arg:
+                    messages.warning(assmt.pos, "Unused argument value '%s'" %
+                                     assmt.entry.name)
+                else:
+                    messages.warning(assmt.pos, "Unused result in '%s'" %
+                                     assmt.entry.name)
+            assmt.lhs.cf_used = False
+
+    # Unused entries
+    for entry in flow.entries:
+        if (not entry.cf_references
+                and not entry.is_pyclass_attr):
             if entry.name != '_' and not entry.name.startswith('unused'):
-                # '_' is often used for unused variables, e.g. in loops 
-                if entry.is_arg: 
-                    if warn_unused_arg: 
-                        messages.warning(entry.pos, "Unused argument '%s'" % 
-                                         entry.name) 
-                else: 
-                    if warn_unused: 
-                        messages.warning(entry.pos, "Unused entry '%s'" % 
-                                         entry.name) 
-            entry.cf_used = False 
- 
-    messages.report() 
- 
-    for node in assmt_nodes: 
-        node.cf_state = ControlFlowState(node.cf_state) 
-    for node in references: 
-        node.cf_state = ControlFlowState(node.cf_state) 
- 
- 
-class AssignmentCollector(TreeVisitor): 
-    def __init__(self): 
-        super(AssignmentCollector, self).__init__() 
-        self.assignments = [] 
- 
-    def visit_Node(self): 
-        self._visitchildren(self, None) 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        self.assignments.append((node.lhs, node.rhs)) 
- 
-    def visit_CascadedAssignmentNode(self, node): 
-        for lhs in node.lhs_list: 
-            self.assignments.append((lhs, node.rhs)) 
- 
- 
-class ControlFlowAnalysis(CythonTransform): 
- 
-    def visit_ModuleNode(self, node): 
-        self.gv_ctx = GVContext() 
+                # '_' is often used for unused variables, e.g. in loops
+                if entry.is_arg:
+                    if warn_unused_arg:
+                        messages.warning(entry.pos, "Unused argument '%s'" %
+                                         entry.name)
+                else:
+                    if warn_unused:
+                        messages.warning(entry.pos, "Unused entry '%s'" %
+                                         entry.name)
+            entry.cf_used = False
+
+    messages.report()
+
+    for node in assmt_nodes:
+        node.cf_state = ControlFlowState(node.cf_state)
+    for node in references:
+        node.cf_state = ControlFlowState(node.cf_state)
+
+
+class AssignmentCollector(TreeVisitor):
+    def __init__(self):
+        super(AssignmentCollector, self).__init__()
+        self.assignments = []
+
+    def visit_Node(self):
+        self._visitchildren(self, None)
+
+    def visit_SingleAssignmentNode(self, node):
+        self.assignments.append((node.lhs, node.rhs))
+
+    def visit_CascadedAssignmentNode(self, node):
+        for lhs in node.lhs_list:
+            self.assignments.append((lhs, node.rhs))
+
+
+class ControlFlowAnalysis(CythonTransform):
+
+    def visit_ModuleNode(self, node):
+        self.gv_ctx = GVContext()
         self.constant_folder = ConstantFolding()
- 
-        # Set of NameNode reductions 
-        self.reductions = set() 
- 
-        self.in_inplace_assignment = False 
-        self.env_stack = [] 
-        self.env = node.scope 
-        self.stack = [] 
-        self.flow = ControlFlow() 
-        self.visitchildren(node) 
- 
-        check_definitions(self.flow, self.current_directives) 
- 
-        dot_output = self.current_directives['control_flow.dot_output'] 
-        if dot_output: 
-            annotate_defs = self.current_directives['control_flow.dot_annotate_defs'] 
-            fp = open(dot_output, 'wt') 
-            try: 
-                self.gv_ctx.render(fp, 'module', annotate_defs=annotate_defs) 
-            finally: 
-                fp.close() 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        for arg in node.args: 
-            if arg.default: 
-                self.visitchildren(arg) 
-        self.visitchildren(node, ('decorators',)) 
-        self.env_stack.append(self.env) 
-        self.env = node.local_scope 
-        self.stack.append(self.flow) 
-        self.flow = ControlFlow() 
- 
-        # Collect all entries 
-        for entry in node.local_scope.entries.values(): 
-            if self.flow.is_tracked(entry): 
-                self.flow.entries.add(entry) 
- 
-        self.mark_position(node) 
-        # Function body block 
-        self.flow.nextblock() 
- 
-        for arg in node.args: 
-            self._visit(arg) 
-        if node.star_arg: 
-            self.flow.mark_argument(node.star_arg, 
-                                    TypedExprNode(Builtin.tuple_type, 
-                                                  may_be_none=False), 
-                                    node.star_arg.entry) 
-        if node.starstar_arg: 
-            self.flow.mark_argument(node.starstar_arg, 
-                                    TypedExprNode(Builtin.dict_type, 
-                                                  may_be_none=False), 
-                                    node.starstar_arg.entry) 
-        self._visit(node.body) 
-        # Workaround for generators 
-        if node.is_generator: 
-            self._visit(node.gbody.body) 
- 
-        # Exit point 
-        if self.flow.block: 
-            self.flow.block.add_child(self.flow.exit_point) 
- 
-        # Cleanup graph 
-        self.flow.normalize() 
-        check_definitions(self.flow, self.current_directives) 
-        self.flow.blocks.add(self.flow.entry_point) 
- 
-        self.gv_ctx.add(GV(node.local_scope.name, self.flow)) 
- 
-        self.flow = self.stack.pop() 
-        self.env = self.env_stack.pop() 
-        return node 
- 
-    def visit_DefNode(self, node): 
-        node.used = True 
-        return self.visit_FuncDefNode(node) 
- 
-    def visit_GeneratorBodyDefNode(self, node): 
-        return node 
- 
-    def visit_CTypeDefNode(self, node): 
-        return node 
- 
-    def mark_assignment(self, lhs, rhs=None): 
-        if not self.flow.block: 
-            return 
-        if self.flow.exceptions: 
-            exc_descr = self.flow.exceptions[-1] 
-            self.flow.block.add_child(exc_descr.entry_point) 
-            self.flow.nextblock() 
- 
-        if not rhs: 
-            rhs = object_expr 
-        if lhs.is_name: 
-            if lhs.entry is not None: 
-                entry = lhs.entry 
-            else: 
-                entry = self.env.lookup(lhs.name) 
-            if entry is None: # TODO: This shouldn't happen... 
-                return 
-            self.flow.mark_assignment(lhs, rhs, entry) 
+
+        # Set of NameNode reductions
+        self.reductions = set()
+
+        self.in_inplace_assignment = False
+        self.env_stack = []
+        self.env = node.scope
+        self.stack = []
+        self.flow = ControlFlow()
+        self.visitchildren(node)
+
+        check_definitions(self.flow, self.current_directives)
+
+        dot_output = self.current_directives['control_flow.dot_output']
+        if dot_output:
+            annotate_defs = self.current_directives['control_flow.dot_annotate_defs']
+            fp = open(dot_output, 'wt')
+            try:
+                self.gv_ctx.render(fp, 'module', annotate_defs=annotate_defs)
+            finally:
+                fp.close()
+        return node
+
+    def visit_FuncDefNode(self, node):
+        for arg in node.args:
+            if arg.default:
+                self.visitchildren(arg)
+        self.visitchildren(node, ('decorators',))
+        self.env_stack.append(self.env)
+        self.env = node.local_scope
+        self.stack.append(self.flow)
+        self.flow = ControlFlow()
+
+        # Collect all entries
+        for entry in node.local_scope.entries.values():
+            if self.flow.is_tracked(entry):
+                self.flow.entries.add(entry)
+
+        self.mark_position(node)
+        # Function body block
+        self.flow.nextblock()
+
+        for arg in node.args:
+            self._visit(arg)
+        if node.star_arg:
+            self.flow.mark_argument(node.star_arg,
+                                    TypedExprNode(Builtin.tuple_type,
+                                                  may_be_none=False),
+                                    node.star_arg.entry)
+        if node.starstar_arg:
+            self.flow.mark_argument(node.starstar_arg,
+                                    TypedExprNode(Builtin.dict_type,
+                                                  may_be_none=False),
+                                    node.starstar_arg.entry)
+        self._visit(node.body)
+        # Workaround for generators
+        if node.is_generator:
+            self._visit(node.gbody.body)
+
+        # Exit point
+        if self.flow.block:
+            self.flow.block.add_child(self.flow.exit_point)
+
+        # Cleanup graph
+        self.flow.normalize()
+        check_definitions(self.flow, self.current_directives)
+        self.flow.blocks.add(self.flow.entry_point)
+
+        self.gv_ctx.add(GV(node.local_scope.name, self.flow))
+
+        self.flow = self.stack.pop()
+        self.env = self.env_stack.pop()
+        return node
+
+    def visit_DefNode(self, node):
+        node.used = True
+        return self.visit_FuncDefNode(node)
+
+    def visit_GeneratorBodyDefNode(self, node):
+        return node
+
+    def visit_CTypeDefNode(self, node):
+        return node
+
+    def mark_assignment(self, lhs, rhs=None):
+        if not self.flow.block:
+            return
+        if self.flow.exceptions:
+            exc_descr = self.flow.exceptions[-1]
+            self.flow.block.add_child(exc_descr.entry_point)
+            self.flow.nextblock()
+
+        if not rhs:
+            rhs = object_expr
+        if lhs.is_name:
+            if lhs.entry is not None:
+                entry = lhs.entry
+            else:
+                entry = self.env.lookup(lhs.name)
+            if entry is None: # TODO: This shouldn't happen...
+                return
+            self.flow.mark_assignment(lhs, rhs, entry)
         elif lhs.is_sequence_constructor:
             for i, arg in enumerate(lhs.args):
                 if not rhs or arg.is_starred:
@@ -785,453 +785,453 @@ class ControlFlowAnalysis(CythonTransform):
                 else:
                     item_node = rhs.inferable_item_node(i)
                 self.mark_assignment(arg, item_node)
-        else: 
-            self._visit(lhs) 
- 
-        if self.flow.exceptions: 
-            exc_descr = self.flow.exceptions[-1] 
-            self.flow.block.add_child(exc_descr.entry_point) 
-            self.flow.nextblock() 
- 
-    def mark_position(self, node): 
-        """Mark position if DOT output is enabled.""" 
-        if self.current_directives['control_flow.dot_output']: 
-            self.flow.mark_position(node) 
- 
-    def visit_FromImportStatNode(self, node): 
-        for name, target in node.items: 
-            if name != "*": 
-                self.mark_assignment(target) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_AssignmentNode(self, node): 
-        raise InternalError("Unhandled assignment node") 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        self._visit(node.rhs) 
-        self.mark_assignment(node.lhs, node.rhs) 
-        return node 
- 
-    def visit_CascadedAssignmentNode(self, node): 
-        self._visit(node.rhs) 
-        for lhs in node.lhs_list: 
-            self.mark_assignment(lhs, node.rhs) 
-        return node 
- 
-    def visit_ParallelAssignmentNode(self, node): 
-        collector = AssignmentCollector() 
-        collector.visitchildren(node) 
-        for lhs, rhs in collector.assignments: 
-            self._visit(rhs) 
-        for lhs, rhs in collector.assignments: 
-            self.mark_assignment(lhs, rhs) 
-        return node 
- 
-    def visit_InPlaceAssignmentNode(self, node): 
-        self.in_inplace_assignment = True 
-        self.visitchildren(node) 
-        self.in_inplace_assignment = False 
+        else:
+            self._visit(lhs)
+
+        if self.flow.exceptions:
+            exc_descr = self.flow.exceptions[-1]
+            self.flow.block.add_child(exc_descr.entry_point)
+            self.flow.nextblock()
+
+    def mark_position(self, node):
+        """Mark position if DOT output is enabled."""
+        if self.current_directives['control_flow.dot_output']:
+            self.flow.mark_position(node)
+
+    def visit_FromImportStatNode(self, node):
+        for name, target in node.items:
+            if name != "*":
+                self.mark_assignment(target)
+        self.visitchildren(node)
+        return node
+
+    def visit_AssignmentNode(self, node):
+        raise InternalError("Unhandled assignment node")
+
+    def visit_SingleAssignmentNode(self, node):
+        self._visit(node.rhs)
+        self.mark_assignment(node.lhs, node.rhs)
+        return node
+
+    def visit_CascadedAssignmentNode(self, node):
+        self._visit(node.rhs)
+        for lhs in node.lhs_list:
+            self.mark_assignment(lhs, node.rhs)
+        return node
+
+    def visit_ParallelAssignmentNode(self, node):
+        collector = AssignmentCollector()
+        collector.visitchildren(node)
+        for lhs, rhs in collector.assignments:
+            self._visit(rhs)
+        for lhs, rhs in collector.assignments:
+            self.mark_assignment(lhs, rhs)
+        return node
+
+    def visit_InPlaceAssignmentNode(self, node):
+        self.in_inplace_assignment = True
+        self.visitchildren(node)
+        self.in_inplace_assignment = False
         self.mark_assignment(node.lhs, self.constant_folder(node.create_binop_node()))
-        return node 
- 
-    def visit_DelStatNode(self, node): 
-        for arg in node.args: 
-            if arg.is_name: 
-                entry = arg.entry or self.env.lookup(arg.name) 
-                if entry.in_closure or entry.from_closure: 
-                    error(arg.pos, 
-                          "can not delete variable '%s' " 
-                          "referenced in nested scope" % entry.name) 
+        return node
+
+    def visit_DelStatNode(self, node):
+        for arg in node.args:
+            if arg.is_name:
+                entry = arg.entry or self.env.lookup(arg.name)
+                if entry.in_closure or entry.from_closure:
+                    error(arg.pos,
+                          "can not delete variable '%s' "
+                          "referenced in nested scope" % entry.name)
                 if not node.ignore_nonexisting:
                     self._visit(arg)  # mark reference
-                self.flow.mark_deletion(arg, entry) 
-            else: 
-                self._visit(arg) 
-        return node 
- 
-    def visit_CArgDeclNode(self, node): 
-        entry = self.env.lookup(node.name) 
-        if entry: 
-            may_be_none = not node.not_none 
-            self.flow.mark_argument( 
-                node, TypedExprNode(entry.type, may_be_none), entry) 
-        return node 
- 
-    def visit_NameNode(self, node): 
-        if self.flow.block: 
-            entry = node.entry or self.env.lookup(node.name) 
-            if entry: 
-                self.flow.mark_reference(node, entry) 
- 
-                if entry in self.reductions and not self.in_inplace_assignment: 
-                    error(node.pos, 
-                          "Cannot read reduction variable in loop body") 
- 
-        return node 
- 
-    def visit_StatListNode(self, node): 
-        if self.flow.block: 
-            for stat in node.stats: 
-                self._visit(stat) 
-                if not self.flow.block: 
-                    stat.is_terminator = True 
-                    break 
-        return node 
- 
-    def visit_Node(self, node): 
-        self.visitchildren(node) 
-        self.mark_position(node) 
-        return node 
- 
+                self.flow.mark_deletion(arg, entry)
+            else:
+                self._visit(arg)
+        return node
+
+    def visit_CArgDeclNode(self, node):
+        entry = self.env.lookup(node.name)
+        if entry:
+            may_be_none = not node.not_none
+            self.flow.mark_argument(
+                node, TypedExprNode(entry.type, may_be_none), entry)
+        return node
+
+    def visit_NameNode(self, node):
+        if self.flow.block:
+            entry = node.entry or self.env.lookup(node.name)
+            if entry:
+                self.flow.mark_reference(node, entry)
+
+                if entry in self.reductions and not self.in_inplace_assignment:
+                    error(node.pos,
+                          "Cannot read reduction variable in loop body")
+
+        return node
+
+    def visit_StatListNode(self, node):
+        if self.flow.block:
+            for stat in node.stats:
+                self._visit(stat)
+                if not self.flow.block:
+                    stat.is_terminator = True
+                    break
+        return node
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+        self.mark_position(node)
+        return node
+
     def visit_SizeofVarNode(self, node):
         return node
 
     def visit_TypeidNode(self, node):
         return node
 
-    def visit_IfStatNode(self, node): 
-        next_block = self.flow.newblock() 
-        parent = self.flow.block 
-        # If clauses 
-        for clause in node.if_clauses: 
-            parent = self.flow.nextblock(parent) 
-            self._visit(clause.condition) 
-            self.flow.nextblock() 
-            self._visit(clause.body) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
-        # Else clause 
-        if node.else_clause: 
-            self.flow.nextblock(parent=parent) 
-            self._visit(node.else_clause) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
-        else: 
-            parent.add_child(next_block) 
- 
-        if next_block.parents: 
-            self.flow.block = next_block 
-        else: 
-            self.flow.block = None 
-        return node 
- 
-    def visit_WhileStatNode(self, node): 
-        condition_block = self.flow.nextblock() 
-        next_block = self.flow.newblock() 
-        # Condition block 
-        self.flow.loops.append(LoopDescr(next_block, condition_block)) 
-        if node.condition: 
-            self._visit(node.condition) 
-        # Body block 
-        self.flow.nextblock() 
-        self._visit(node.body) 
-        self.flow.loops.pop() 
-        # Loop it 
-        if self.flow.block: 
-            self.flow.block.add_child(condition_block) 
-            self.flow.block.add_child(next_block) 
-        # Else clause 
-        if node.else_clause: 
-            self.flow.nextblock(parent=condition_block) 
-            self._visit(node.else_clause) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
-        else: 
-            condition_block.add_child(next_block) 
- 
-        if next_block.parents: 
-            self.flow.block = next_block 
-        else: 
-            self.flow.block = None 
-        return node 
- 
-    def mark_forloop_target(self, node): 
-        # TODO: Remove redundancy with range optimization... 
-        is_special = False 
-        sequence = node.iterator.sequence 
-        target = node.target 
-        if isinstance(sequence, ExprNodes.SimpleCallNode): 
-            function = sequence.function 
-            if sequence.self is None and function.is_name: 
-                entry = self.env.lookup(function.name) 
-                if not entry or entry.is_builtin: 
-                    if function.name == 'reversed' and len(sequence.args) == 1: 
-                        sequence = sequence.args[0] 
-                    elif function.name == 'enumerate' and len(sequence.args) == 1: 
-                        if target.is_sequence_constructor and len(target.args) == 2: 
-                            iterator = sequence.args[0] 
-                            if iterator.is_name: 
-                                iterator_type = iterator.infer_type(self.env) 
-                                if iterator_type.is_builtin_type: 
-                                    # assume that builtin types have a length within Py_ssize_t 
-                                    self.mark_assignment( 
-                                        target.args[0], 
-                                        ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX', 
-                                                          type=PyrexTypes.c_py_ssize_t_type)) 
-                                    target = target.args[1] 
-                                    sequence = sequence.args[0] 
-        if isinstance(sequence, ExprNodes.SimpleCallNode): 
-            function = sequence.function 
-            if sequence.self is None and function.is_name: 
-                entry = self.env.lookup(function.name) 
-                if not entry or entry.is_builtin: 
-                    if function.name in ('range', 'xrange'): 
-                        is_special = True 
-                        for arg in sequence.args[:2]: 
-                            self.mark_assignment(target, arg) 
-                        if len(sequence.args) > 2: 
+    def visit_IfStatNode(self, node):
+        next_block = self.flow.newblock()
+        parent = self.flow.block
+        # If clauses
+        for clause in node.if_clauses:
+            parent = self.flow.nextblock(parent)
+            self._visit(clause.condition)
+            self.flow.nextblock()
+            self._visit(clause.body)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=parent)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            parent.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_WhileStatNode(self, node):
+        condition_block = self.flow.nextblock()
+        next_block = self.flow.newblock()
+        # Condition block
+        self.flow.loops.append(LoopDescr(next_block, condition_block))
+        if node.condition:
+            self._visit(node.condition)
+        # Body block
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.loops.pop()
+        # Loop it
+        if self.flow.block:
+            self.flow.block.add_child(condition_block)
+            self.flow.block.add_child(next_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=condition_block)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            condition_block.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def mark_forloop_target(self, node):
+        # TODO: Remove redundancy with range optimization...
+        is_special = False
+        sequence = node.iterator.sequence
+        target = node.target
+        if isinstance(sequence, ExprNodes.SimpleCallNode):
+            function = sequence.function
+            if sequence.self is None and function.is_name:
+                entry = self.env.lookup(function.name)
+                if not entry or entry.is_builtin:
+                    if function.name == 'reversed' and len(sequence.args) == 1:
+                        sequence = sequence.args[0]
+                    elif function.name == 'enumerate' and len(sequence.args) == 1:
+                        if target.is_sequence_constructor and len(target.args) == 2:
+                            iterator = sequence.args[0]
+                            if iterator.is_name:
+                                iterator_type = iterator.infer_type(self.env)
+                                if iterator_type.is_builtin_type:
+                                    # assume that builtin types have a length within Py_ssize_t
+                                    self.mark_assignment(
+                                        target.args[0],
+                                        ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX',
+                                                          type=PyrexTypes.c_py_ssize_t_type))
+                                    target = target.args[1]
+                                    sequence = sequence.args[0]
+        if isinstance(sequence, ExprNodes.SimpleCallNode):
+            function = sequence.function
+            if sequence.self is None and function.is_name:
+                entry = self.env.lookup(function.name)
+                if not entry or entry.is_builtin:
+                    if function.name in ('range', 'xrange'):
+                        is_special = True
+                        for arg in sequence.args[:2]:
+                            self.mark_assignment(target, arg)
+                        if len(sequence.args) > 2:
                             self.mark_assignment(target, self.constant_folder(
-                                ExprNodes.binop_node(node.pos, 
-                                                     '+', 
-                                                     sequence.args[0], 
+                                ExprNodes.binop_node(node.pos,
+                                                     '+',
+                                                     sequence.args[0],
                                                      sequence.args[2])))
- 
-        if not is_special: 
-            # A for-loop basically translates to subsequent calls to 
-            # __getitem__(), so using an IndexNode here allows us to 
-            # naturally infer the base type of pointers, C arrays, 
-            # Python strings, etc., while correctly falling back to an 
-            # object type when the base type cannot be handled. 
- 
-            self.mark_assignment(target, node.item) 
- 
+
+        if not is_special:
+            # A for-loop basically translates to subsequent calls to
+            # __getitem__(), so using an IndexNode here allows us to
+            # naturally infer the base type of pointers, C arrays,
+            # Python strings, etc., while correctly falling back to an
+            # object type when the base type cannot be handled.
+
+            self.mark_assignment(target, node.item)
+
     def visit_AsyncForStatNode(self, node):
         return self.visit_ForInStatNode(node)
 
-    def visit_ForInStatNode(self, node): 
-        condition_block = self.flow.nextblock() 
-        next_block = self.flow.newblock() 
-        # Condition with iterator 
-        self.flow.loops.append(LoopDescr(next_block, condition_block)) 
-        self._visit(node.iterator) 
-        # Target assignment 
-        self.flow.nextblock() 
- 
-        if isinstance(node, Nodes.ForInStatNode): 
-            self.mark_forloop_target(node) 
+    def visit_ForInStatNode(self, node):
+        condition_block = self.flow.nextblock()
+        next_block = self.flow.newblock()
+        # Condition with iterator
+        self.flow.loops.append(LoopDescr(next_block, condition_block))
+        self._visit(node.iterator)
+        # Target assignment
+        self.flow.nextblock()
+
+        if isinstance(node, Nodes.ForInStatNode):
+            self.mark_forloop_target(node)
         elif isinstance(node, Nodes.AsyncForStatNode):
             # not entirely correct, but good enough for now
             self.mark_assignment(node.target, node.item)
-        else: # Parallel 
-            self.mark_assignment(node.target) 
- 
-        # Body block 
-        if isinstance(node, Nodes.ParallelRangeNode): 
-            # In case of an invalid 
-            self._delete_privates(node, exclude=node.target.entry) 
- 
-        self.flow.nextblock() 
-        self._visit(node.body) 
-        self.flow.loops.pop() 
- 
-        # Loop it 
-        if self.flow.block: 
-            self.flow.block.add_child(condition_block) 
-        # Else clause 
-        if node.else_clause: 
-            self.flow.nextblock(parent=condition_block) 
-            self._visit(node.else_clause) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
-        else: 
-            condition_block.add_child(next_block) 
- 
-        if next_block.parents: 
-            self.flow.block = next_block 
-        else: 
-            self.flow.block = None 
-        return node 
- 
-    def _delete_privates(self, node, exclude=None): 
-        for private_node in node.assigned_nodes: 
-            if not exclude or private_node.entry is not exclude: 
-                self.flow.mark_deletion(private_node, private_node.entry) 
- 
-    def visit_ParallelRangeNode(self, node): 
-        reductions = self.reductions 
- 
-        # if node.target is None or not a NameNode, an error will have 
-        # been previously issued 
-        if hasattr(node.target, 'entry'): 
-            self.reductions = set(reductions) 
- 
-            for private_node in node.assigned_nodes: 
-                private_node.entry.error_on_uninitialized = True 
-                pos, reduction = node.assignments[private_node.entry] 
-                if reduction: 
-                    self.reductions.add(private_node.entry) 
- 
-            node = self.visit_ForInStatNode(node) 
- 
-        self.reductions = reductions 
-        return node 
- 
-    def visit_ParallelWithBlockNode(self, node): 
-        for private_node in node.assigned_nodes: 
-            private_node.entry.error_on_uninitialized = True 
- 
-        self._delete_privates(node) 
-        self.visitchildren(node) 
-        self._delete_privates(node) 
- 
-        return node 
- 
-    def visit_ForFromStatNode(self, node): 
-        condition_block = self.flow.nextblock() 
-        next_block = self.flow.newblock() 
-        # Condition with iterator 
-        self.flow.loops.append(LoopDescr(next_block, condition_block)) 
-        self._visit(node.bound1) 
-        self._visit(node.bound2) 
-        if node.step is not None: 
-            self._visit(node.step) 
-        # Target assignment 
-        self.flow.nextblock() 
-        self.mark_assignment(node.target, node.bound1) 
-        if node.step is not None: 
+        else: # Parallel
+            self.mark_assignment(node.target)
+
+        # Body block
+        if isinstance(node, Nodes.ParallelRangeNode):
+            # In case of an invalid
+            self._delete_privates(node, exclude=node.target.entry)
+
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.loops.pop()
+
+        # Loop it
+        if self.flow.block:
+            self.flow.block.add_child(condition_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=condition_block)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            condition_block.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def _delete_privates(self, node, exclude=None):
+        for private_node in node.assigned_nodes:
+            if not exclude or private_node.entry is not exclude:
+                self.flow.mark_deletion(private_node, private_node.entry)
+
+    def visit_ParallelRangeNode(self, node):
+        reductions = self.reductions
+
+        # if node.target is None or not a NameNode, an error will have
+        # been previously issued
+        if hasattr(node.target, 'entry'):
+            self.reductions = set(reductions)
+
+            for private_node in node.assigned_nodes:
+                private_node.entry.error_on_uninitialized = True
+                pos, reduction = node.assignments[private_node.entry]
+                if reduction:
+                    self.reductions.add(private_node.entry)
+
+            node = self.visit_ForInStatNode(node)
+
+        self.reductions = reductions
+        return node
+
+    def visit_ParallelWithBlockNode(self, node):
+        for private_node in node.assigned_nodes:
+            private_node.entry.error_on_uninitialized = True
+
+        self._delete_privates(node)
+        self.visitchildren(node)
+        self._delete_privates(node)
+
+        return node
+
+    def visit_ForFromStatNode(self, node):
+        condition_block = self.flow.nextblock()
+        next_block = self.flow.newblock()
+        # Condition with iterator
+        self.flow.loops.append(LoopDescr(next_block, condition_block))
+        self._visit(node.bound1)
+        self._visit(node.bound2)
+        if node.step is not None:
+            self._visit(node.step)
+        # Target assignment
+        self.flow.nextblock()
+        self.mark_assignment(node.target, node.bound1)
+        if node.step is not None:
             self.mark_assignment(node.target, self.constant_folder(
                 ExprNodes.binop_node(node.pos, '+', node.bound1, node.step)))
-        # Body block 
-        self.flow.nextblock() 
-        self._visit(node.body) 
-        self.flow.loops.pop() 
-        # Loop it 
-        if self.flow.block: 
-            self.flow.block.add_child(condition_block) 
-        # Else clause 
-        if node.else_clause: 
-            self.flow.nextblock(parent=condition_block) 
-            self._visit(node.else_clause) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
-        else: 
-            condition_block.add_child(next_block) 
- 
-        if next_block.parents: 
-            self.flow.block = next_block 
-        else: 
-            self.flow.block = None 
-        return node 
- 
-    def visit_LoopNode(self, node): 
-        raise InternalError("Generic loops are not supported") 
- 
-    def visit_WithTargetAssignmentStatNode(self, node): 
-        self.mark_assignment(node.lhs, node.with_node.enter_call) 
-        return node 
- 
-    def visit_WithStatNode(self, node): 
-        self._visit(node.manager) 
-        self._visit(node.enter_call) 
-        self._visit(node.body) 
-        return node 
- 
-    def visit_TryExceptStatNode(self, node): 
-        # After exception handling 
-        next_block = self.flow.newblock() 
-        # Body block 
-        self.flow.newblock() 
-        # Exception entry point 
-        entry_point = self.flow.newblock() 
-        self.flow.exceptions.append(ExceptionDescr(entry_point)) 
-        self.flow.nextblock() 
-        ## XXX: links to exception handling point should be added by 
-        ## XXX: children nodes 
-        self.flow.block.add_child(entry_point) 
-        self.flow.nextblock() 
-        self._visit(node.body) 
-        self.flow.exceptions.pop() 
- 
-        # After exception 
-        if self.flow.block: 
-            if node.else_clause: 
-                self.flow.nextblock() 
-                self._visit(node.else_clause) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
- 
-        for clause in node.except_clauses: 
-            self.flow.block = entry_point 
-            if clause.pattern: 
-                for pattern in clause.pattern: 
-                    self._visit(pattern) 
-            else: 
-                # TODO: handle * pattern 
-                pass 
-            entry_point = self.flow.newblock(parent=self.flow.block) 
-            self.flow.nextblock() 
-            if clause.target: 
-                self.mark_assignment(clause.target) 
-            self._visit(clause.body) 
-            if self.flow.block: 
-                self.flow.block.add_child(next_block) 
- 
-        if self.flow.exceptions: 
-            entry_point.add_child(self.flow.exceptions[-1].entry_point) 
- 
-        if next_block.parents: 
-            self.flow.block = next_block 
-        else: 
-            self.flow.block = None 
-        return node 
- 
-    def visit_TryFinallyStatNode(self, node): 
-        body_block = self.flow.nextblock() 
- 
-        # Exception entry point 
-        entry_point = self.flow.newblock() 
-        self.flow.block = entry_point 
+        # Body block
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.loops.pop()
+        # Loop it
+        if self.flow.block:
+            self.flow.block.add_child(condition_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=condition_block)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            condition_block.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_LoopNode(self, node):
+        raise InternalError("Generic loops are not supported")
+
+    def visit_WithTargetAssignmentStatNode(self, node):
+        self.mark_assignment(node.lhs, node.with_node.enter_call)
+        return node
+
+    def visit_WithStatNode(self, node):
+        self._visit(node.manager)
+        self._visit(node.enter_call)
+        self._visit(node.body)
+        return node
+
+    def visit_TryExceptStatNode(self, node):
+        # After exception handling
+        next_block = self.flow.newblock()
+        # Body block
+        self.flow.newblock()
+        # Exception entry point
+        entry_point = self.flow.newblock()
+        self.flow.exceptions.append(ExceptionDescr(entry_point))
+        self.flow.nextblock()
+        ## XXX: links to exception handling point should be added by
+        ## XXX: children nodes
+        self.flow.block.add_child(entry_point)
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.exceptions.pop()
+
+        # After exception
+        if self.flow.block:
+            if node.else_clause:
+                self.flow.nextblock()
+                self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+
+        for clause in node.except_clauses:
+            self.flow.block = entry_point
+            if clause.pattern:
+                for pattern in clause.pattern:
+                    self._visit(pattern)
+            else:
+                # TODO: handle * pattern
+                pass
+            entry_point = self.flow.newblock(parent=self.flow.block)
+            self.flow.nextblock()
+            if clause.target:
+                self.mark_assignment(clause.target)
+            self._visit(clause.body)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+
+        if self.flow.exceptions:
+            entry_point.add_child(self.flow.exceptions[-1].entry_point)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_TryFinallyStatNode(self, node):
+        body_block = self.flow.nextblock()
+
+        # Exception entry point
+        entry_point = self.flow.newblock()
+        self.flow.block = entry_point
         self._visit(node.finally_except_clause)
- 
-        if self.flow.block and self.flow.exceptions: 
-            self.flow.block.add_child(self.flow.exceptions[-1].entry_point) 
- 
-        # Normal execution 
-        finally_enter = self.flow.newblock() 
-        self.flow.block = finally_enter 
-        self._visit(node.finally_clause) 
-        finally_exit = self.flow.block 
- 
-        descr = ExceptionDescr(entry_point, finally_enter, finally_exit) 
-        self.flow.exceptions.append(descr) 
-        if self.flow.loops: 
-            self.flow.loops[-1].exceptions.append(descr) 
-        self.flow.block = body_block 
-        body_block.add_child(entry_point) 
-        self.flow.nextblock() 
-        self._visit(node.body) 
-        self.flow.exceptions.pop() 
-        if self.flow.loops: 
-            self.flow.loops[-1].exceptions.pop() 
- 
-        if self.flow.block: 
-            self.flow.block.add_child(finally_enter) 
-            if finally_exit: 
-                self.flow.block = self.flow.nextblock(parent=finally_exit) 
-            else: 
-                self.flow.block = None 
-        return node 
- 
-    def visit_RaiseStatNode(self, node): 
-        self.mark_position(node) 
-        self.visitchildren(node) 
-        if self.flow.exceptions: 
-            self.flow.block.add_child(self.flow.exceptions[-1].entry_point) 
-        self.flow.block = None 
-        return node 
- 
-    def visit_ReraiseStatNode(self, node): 
-        self.mark_position(node) 
-        if self.flow.exceptions: 
-            self.flow.block.add_child(self.flow.exceptions[-1].entry_point) 
-        self.flow.block = None 
-        return node 
- 
-    def visit_ReturnStatNode(self, node): 
-        self.mark_position(node) 
-        self.visitchildren(node) 
- 
+
+        if self.flow.block and self.flow.exceptions:
+            self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
+
+        # Normal execution
+        finally_enter = self.flow.newblock()
+        self.flow.block = finally_enter
+        self._visit(node.finally_clause)
+        finally_exit = self.flow.block
+
+        descr = ExceptionDescr(entry_point, finally_enter, finally_exit)
+        self.flow.exceptions.append(descr)
+        if self.flow.loops:
+            self.flow.loops[-1].exceptions.append(descr)
+        self.flow.block = body_block
+        body_block.add_child(entry_point)
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.exceptions.pop()
+        if self.flow.loops:
+            self.flow.loops[-1].exceptions.pop()
+
+        if self.flow.block:
+            self.flow.block.add_child(finally_enter)
+            if finally_exit:
+                self.flow.block = self.flow.nextblock(parent=finally_exit)
+            else:
+                self.flow.block = None
+        return node
+
+    def visit_RaiseStatNode(self, node):
+        self.mark_position(node)
+        self.visitchildren(node)
+        if self.flow.exceptions:
+            self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
+        self.flow.block = None
+        return node
+
+    def visit_ReraiseStatNode(self, node):
+        self.mark_position(node)
+        if self.flow.exceptions:
+            self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
+        self.flow.block = None
+        return node
+
+    def visit_ReturnStatNode(self, node):
+        self.mark_position(node)
+        self.visitchildren(node)
+
         outer_exception_handlers = iter(self.flow.exceptions[::-1])
         for handler in outer_exception_handlers:
             if handler.finally_enter:
@@ -1244,82 +1244,82 @@ class ControlFlowAnalysis(CythonTransform):
                             exit_point = next_handler.finally_enter
                             break
                     handler.finally_exit.add_child(exit_point)
-                break 
-        else: 
-            if self.flow.block: 
-                self.flow.block.add_child(self.flow.exit_point) 
-        self.flow.block = None 
-        return node 
- 
-    def visit_BreakStatNode(self, node): 
-        if not self.flow.loops: 
-            #error(node.pos, "break statement not inside loop") 
-            return node 
-        loop = self.flow.loops[-1] 
-        self.mark_position(node) 
-        for exception in loop.exceptions[::-1]: 
-            if exception.finally_enter: 
-                self.flow.block.add_child(exception.finally_enter) 
-                if exception.finally_exit: 
-                    exception.finally_exit.add_child(loop.next_block) 
-                break 
-        else: 
-            self.flow.block.add_child(loop.next_block) 
-        self.flow.block = None 
-        return node 
- 
-    def visit_ContinueStatNode(self, node): 
-        if not self.flow.loops: 
-            #error(node.pos, "continue statement not inside loop") 
-            return node 
-        loop = self.flow.loops[-1] 
-        self.mark_position(node) 
-        for exception in loop.exceptions[::-1]: 
-            if exception.finally_enter: 
-                self.flow.block.add_child(exception.finally_enter) 
-                if exception.finally_exit: 
-                    exception.finally_exit.add_child(loop.loop_block) 
-                break 
-        else: 
-            self.flow.block.add_child(loop.loop_block) 
-        self.flow.block = None 
-        return node 
- 
-    def visit_ComprehensionNode(self, node): 
-        if node.expr_scope: 
-            self.env_stack.append(self.env) 
-            self.env = node.expr_scope 
-        # Skip append node here 
-        self._visit(node.loop) 
-        if node.expr_scope: 
-            self.env = self.env_stack.pop() 
-        return node 
- 
-    def visit_ScopedExprNode(self, node): 
-        if node.expr_scope: 
-            self.env_stack.append(self.env) 
-            self.env = node.expr_scope 
-        self.visitchildren(node) 
-        if node.expr_scope: 
-            self.env = self.env_stack.pop() 
-        return node 
- 
-    def visit_PyClassDefNode(self, node): 
-        self.visitchildren(node, ('dict', 'metaclass', 
-                                  'mkw', 'bases', 'class_result')) 
-        self.flow.mark_assignment(node.target, node.classobj, 
-                                  self.env.lookup(node.name)) 
-        self.env_stack.append(self.env) 
-        self.env = node.scope 
-        self.flow.nextblock() 
-        self.visitchildren(node, ('body',)) 
-        self.flow.nextblock() 
-        self.env = self.env_stack.pop() 
-        return node 
- 
-    def visit_AmpersandNode(self, node): 
-        if node.operand.is_name: 
-            # Fake assignment to silence warning 
-            self.mark_assignment(node.operand, fake_rhs_expr) 
-        self.visitchildren(node) 
-        return node 
+                break
+        else:
+            if self.flow.block:
+                self.flow.block.add_child(self.flow.exit_point)
+        self.flow.block = None
+        return node
+
+    def visit_BreakStatNode(self, node):
+        if not self.flow.loops:
+            #error(node.pos, "break statement not inside loop")
+            return node
+        loop = self.flow.loops[-1]
+        self.mark_position(node)
+        for exception in loop.exceptions[::-1]:
+            if exception.finally_enter:
+                self.flow.block.add_child(exception.finally_enter)
+                if exception.finally_exit:
+                    exception.finally_exit.add_child(loop.next_block)
+                break
+        else:
+            self.flow.block.add_child(loop.next_block)
+        self.flow.block = None
+        return node
+
+    def visit_ContinueStatNode(self, node):
+        if not self.flow.loops:
+            #error(node.pos, "continue statement not inside loop")
+            return node
+        loop = self.flow.loops[-1]
+        self.mark_position(node)
+        for exception in loop.exceptions[::-1]:
+            if exception.finally_enter:
+                self.flow.block.add_child(exception.finally_enter)
+                if exception.finally_exit:
+                    exception.finally_exit.add_child(loop.loop_block)
+                break
+        else:
+            self.flow.block.add_child(loop.loop_block)
+        self.flow.block = None
+        return node
+
+    def visit_ComprehensionNode(self, node):
+        if node.expr_scope:
+            self.env_stack.append(self.env)
+            self.env = node.expr_scope
+        # Skip append node here
+        self._visit(node.loop)
+        if node.expr_scope:
+            self.env = self.env_stack.pop()
+        return node
+
+    def visit_ScopedExprNode(self, node):
+        if node.expr_scope:
+            self.env_stack.append(self.env)
+            self.env = node.expr_scope
+        self.visitchildren(node)
+        if node.expr_scope:
+            self.env = self.env_stack.pop()
+        return node
+
+    def visit_PyClassDefNode(self, node):
+        self.visitchildren(node, ('dict', 'metaclass',
+                                  'mkw', 'bases', 'class_result'))
+        self.flow.mark_assignment(node.target, node.classobj,
+                                  self.env.lookup(node.name))
+        self.env_stack.append(self.env)
+        self.env = node.scope
+        self.flow.nextblock()
+        self.visitchildren(node, ('body',))
+        self.flow.nextblock()
+        self.env = self.env_stack.pop()
+        return node
+
+    def visit_AmpersandNode(self, node):
+        if node.operand.is_name:
+            # Fake assignment to silence warning
+            self.mark_assignment(node.operand, fake_rhs_expr)
+        self.visitchildren(node)
+        return node
diff --git a/contrib/tools/cython/Cython/Compiler/FusedNode.py b/contrib/tools/cython/Cython/Compiler/FusedNode.py
index 23144aeca3..26d6ffd3d6 100644
--- a/contrib/tools/cython/Cython/Compiler/FusedNode.py
+++ b/contrib/tools/cython/Cython/Compiler/FusedNode.py
@@ -1,150 +1,150 @@
-from __future__ import absolute_import 
- 
-import copy 
- 
-from . import (ExprNodes, PyrexTypes, MemoryView, 
-               ParseTreeTransforms, StringEncoding, Errors) 
-from .ExprNodes import CloneNode, ProxyNode, TupleNode 
-from .Nodes import FuncDefNode, CFuncDefNode, StatListNode, DefNode 
+from __future__ import absolute_import
+
+import copy
+
+from . import (ExprNodes, PyrexTypes, MemoryView,
+               ParseTreeTransforms, StringEncoding, Errors)
+from .ExprNodes import CloneNode, ProxyNode, TupleNode
+from .Nodes import FuncDefNode, CFuncDefNode, StatListNode, DefNode
 from ..Utils import OrderedSet
- 
- 
-class FusedCFuncDefNode(StatListNode): 
-    """ 
-    This node replaces a function with fused arguments. It deep-copies the 
-    function for every permutation of fused types, and allocates a new local 
-    scope for it. It keeps track of the original function in self.node, and 
-    the entry of the original function in the symbol table is given the 
-    'fused_cfunction' attribute which points back to us. 
-    Then when a function lookup occurs (to e.g. call it), the call can be 
-    dispatched to the right function. 
- 
-    node    FuncDefNode    the original function 
-    nodes   [FuncDefNode]  list of copies of node with different specific types 
-    py_func DefNode        the fused python function subscriptable from 
-                           Python space 
-    __signatures__         A DictNode mapping signature specialization strings 
-                           to PyCFunction nodes 
-    resulting_fused_function  PyCFunction for the fused DefNode that delegates 
-                              to specializations 
-    fused_func_assignment   Assignment of the fused function to the function name 
-    defaults_tuple          TupleNode of defaults (letting PyCFunctionNode build 
-                            defaults would result in many different tuples) 
-    specialized_pycfuncs    List of synthesized pycfunction nodes for the 
-                            specializations 
-    code_object             CodeObjectNode shared by all specializations and the 
-                            fused function 
- 
-    fused_compound_types    All fused (compound) types (e.g. floating[:]) 
-    """ 
- 
-    __signatures__ = None 
-    resulting_fused_function = None 
-    fused_func_assignment = None 
-    defaults_tuple = None 
-    decorators = None 
- 
-    child_attrs = StatListNode.child_attrs + [ 
-        '__signatures__', 'resulting_fused_function', 'fused_func_assignment'] 
- 
-    def __init__(self, node, env): 
-        super(FusedCFuncDefNode, self).__init__(node.pos) 
- 
-        self.nodes = [] 
-        self.node = node 
- 
-        is_def = isinstance(self.node, DefNode) 
-        if is_def: 
-            # self.node.decorators = [] 
-            self.copy_def(env) 
-        else: 
-            self.copy_cdef(env) 
- 
-        # Perform some sanity checks. If anything fails, it's a bug 
-        for n in self.nodes: 
-            assert not n.entry.type.is_fused 
-            assert not n.local_scope.return_type.is_fused 
-            if node.return_type.is_fused: 
-                assert not n.return_type.is_fused 
- 
-            if not is_def and n.cfunc_declarator.optional_arg_count: 
-                assert n.type.op_arg_struct 
- 
-        node.entry.fused_cfunction = self 
-        # Copy the nodes as AnalyseDeclarationsTransform will prepend 
-        # self.py_func to self.stats, as we only want specialized 
-        # CFuncDefNodes in self.nodes 
-        self.stats = self.nodes[:] 
- 
-    def copy_def(self, env): 
-        """ 
-        Create a copy of the original def or lambda function for specialized 
-        versions. 
-        """ 
-        fused_compound_types = PyrexTypes.unique( 
-            [arg.type for arg in self.node.args if arg.type.is_fused]) 
-        fused_types = self._get_fused_base_types(fused_compound_types) 
-        permutations = PyrexTypes.get_all_specialized_permutations(fused_types) 
- 
-        self.fused_compound_types = fused_compound_types 
- 
-        if self.node.entry in env.pyfunc_entries: 
-            env.pyfunc_entries.remove(self.node.entry) 
- 
-        for cname, fused_to_specific in permutations: 
-            copied_node = copy.deepcopy(self.node) 
+
+
+class FusedCFuncDefNode(StatListNode):
+    """
+    This node replaces a function with fused arguments. It deep-copies the
+    function for every permutation of fused types, and allocates a new local
+    scope for it. It keeps track of the original function in self.node, and
+    the entry of the original function in the symbol table is given the
+    'fused_cfunction' attribute which points back to us.
+    Then when a function lookup occurs (to e.g. call it), the call can be
+    dispatched to the right function.
+
+    node    FuncDefNode    the original function
+    nodes   [FuncDefNode]  list of copies of node with different specific types
+    py_func DefNode        the fused python function subscriptable from
+                           Python space
+    __signatures__         A DictNode mapping signature specialization strings
+                           to PyCFunction nodes
+    resulting_fused_function  PyCFunction for the fused DefNode that delegates
+                              to specializations
+    fused_func_assignment   Assignment of the fused function to the function name
+    defaults_tuple          TupleNode of defaults (letting PyCFunctionNode build
+                            defaults would result in many different tuples)
+    specialized_pycfuncs    List of synthesized pycfunction nodes for the
+                            specializations
+    code_object             CodeObjectNode shared by all specializations and the
+                            fused function
+
+    fused_compound_types    All fused (compound) types (e.g. floating[:])
+    """
+
+    __signatures__ = None
+    resulting_fused_function = None
+    fused_func_assignment = None
+    defaults_tuple = None
+    decorators = None
+
+    child_attrs = StatListNode.child_attrs + [
+        '__signatures__', 'resulting_fused_function', 'fused_func_assignment']
+
+    def __init__(self, node, env):
+        super(FusedCFuncDefNode, self).__init__(node.pos)
+
+        self.nodes = []
+        self.node = node
+
+        is_def = isinstance(self.node, DefNode)
+        if is_def:
+            # self.node.decorators = []
+            self.copy_def(env)
+        else:
+            self.copy_cdef(env)
+
+        # Perform some sanity checks. If anything fails, it's a bug
+        for n in self.nodes:
+            assert not n.entry.type.is_fused
+            assert not n.local_scope.return_type.is_fused
+            if node.return_type.is_fused:
+                assert not n.return_type.is_fused
+
+            if not is_def and n.cfunc_declarator.optional_arg_count:
+                assert n.type.op_arg_struct
+
+        node.entry.fused_cfunction = self
+        # Copy the nodes as AnalyseDeclarationsTransform will prepend
+        # self.py_func to self.stats, as we only want specialized
+        # CFuncDefNodes in self.nodes
+        self.stats = self.nodes[:]
+
+    def copy_def(self, env):
+        """
+        Create a copy of the original def or lambda function for specialized
+        versions.
+        """
+        fused_compound_types = PyrexTypes.unique(
+            [arg.type for arg in self.node.args if arg.type.is_fused])
+        fused_types = self._get_fused_base_types(fused_compound_types)
+        permutations = PyrexTypes.get_all_specialized_permutations(fused_types)
+
+        self.fused_compound_types = fused_compound_types
+
+        if self.node.entry in env.pyfunc_entries:
+            env.pyfunc_entries.remove(self.node.entry)
+
+        for cname, fused_to_specific in permutations:
+            copied_node = copy.deepcopy(self.node)
             # keep signature object identity for special casing in DefNode.analyse_declarations()
             copied_node.entry.signature = self.node.entry.signature
- 
-            self._specialize_function_args(copied_node.args, fused_to_specific) 
-            copied_node.return_type = self.node.return_type.specialize( 
-                                                    fused_to_specific) 
- 
-            copied_node.analyse_declarations(env) 
-            # copied_node.is_staticmethod = self.node.is_staticmethod 
-            # copied_node.is_classmethod = self.node.is_classmethod 
-            self.create_new_local_scope(copied_node, env, fused_to_specific) 
-            self.specialize_copied_def(copied_node, cname, self.node.entry, 
-                                       fused_to_specific, fused_compound_types) 
- 
-            PyrexTypes.specialize_entry(copied_node.entry, cname) 
-            copied_node.entry.used = True 
-            env.entries[copied_node.entry.name] = copied_node.entry 
- 
-            if not self.replace_fused_typechecks(copied_node): 
-                break 
- 
-        self.orig_py_func = self.node 
-        self.py_func = self.make_fused_cpdef(self.node, env, is_def=True) 
- 
-    def copy_cdef(self, env): 
-        """ 
-        Create a copy of the original c(p)def function for all specialized 
-        versions. 
-        """ 
-        permutations = self.node.type.get_all_specialized_permutations() 
-        # print 'Node %s has %d specializations:' % (self.node.entry.name, 
-        #                                            len(permutations)) 
-        # import pprint; pprint.pprint([d for cname, d in permutations]) 
- 
-        # Prevent copying of the python function 
-        self.orig_py_func = orig_py_func = self.node.py_func 
-        self.node.py_func = None 
-        if orig_py_func: 
-            env.pyfunc_entries.remove(orig_py_func.entry) 
- 
-        fused_types = self.node.type.get_fused_types() 
-        self.fused_compound_types = fused_types 
- 
+
+            self._specialize_function_args(copied_node.args, fused_to_specific)
+            copied_node.return_type = self.node.return_type.specialize(
+                                                    fused_to_specific)
+
+            copied_node.analyse_declarations(env)
+            # copied_node.is_staticmethod = self.node.is_staticmethod
+            # copied_node.is_classmethod = self.node.is_classmethod
+            self.create_new_local_scope(copied_node, env, fused_to_specific)
+            self.specialize_copied_def(copied_node, cname, self.node.entry,
+                                       fused_to_specific, fused_compound_types)
+
+            PyrexTypes.specialize_entry(copied_node.entry, cname)
+            copied_node.entry.used = True
+            env.entries[copied_node.entry.name] = copied_node.entry
+
+            if not self.replace_fused_typechecks(copied_node):
+                break
+
+        self.orig_py_func = self.node
+        self.py_func = self.make_fused_cpdef(self.node, env, is_def=True)
+
+    def copy_cdef(self, env):
+        """
+        Create a copy of the original c(p)def function for all specialized
+        versions.
+        """
+        permutations = self.node.type.get_all_specialized_permutations()
+        # print 'Node %s has %d specializations:' % (self.node.entry.name,
+        #                                            len(permutations))
+        # import pprint; pprint.pprint([d for cname, d in permutations])
+
+        # Prevent copying of the python function
+        self.orig_py_func = orig_py_func = self.node.py_func
+        self.node.py_func = None
+        if orig_py_func:
+            env.pyfunc_entries.remove(orig_py_func.entry)
+
+        fused_types = self.node.type.get_fused_types()
+        self.fused_compound_types = fused_types
+
         new_cfunc_entries = []
-        for cname, fused_to_specific in permutations: 
-            copied_node = copy.deepcopy(self.node) 
- 
+        for cname, fused_to_specific in permutations:
+            copied_node = copy.deepcopy(self.node)
+
             # Make the types in our CFuncType specific.
-            type = copied_node.type.specialize(fused_to_specific) 
-            entry = copied_node.entry 
+            type = copied_node.type.specialize(fused_to_specific)
+            entry = copied_node.entry
             type.specialize_entry(entry, cname)
- 
+
             # Reuse existing Entries (e.g. from .pxd files).
             for i, orig_entry in enumerate(env.cfunc_entries):
                 if entry.cname == orig_entry.cname and type.same_as_resolved_type(orig_entry.type):
@@ -157,38 +157,38 @@ class FusedCFuncDefNode(StatListNode):
             else:
                 new_cfunc_entries.append(entry)
 
-            copied_node.type = type 
-            entry.type, type.entry = type, entry 
- 
-            entry.used = (entry.used or 
-                          self.node.entry.defined_in_pxd or 
-                          env.is_c_class_scope or 
-                          entry.is_cmethod) 
- 
-            if self.node.cfunc_declarator.optional_arg_count: 
-                self.node.cfunc_declarator.declare_optional_arg_struct( 
-                                           type, env, fused_cname=cname) 
- 
-            copied_node.return_type = type.return_type 
-            self.create_new_local_scope(copied_node, env, fused_to_specific) 
- 
-            # Make the argument types in the CFuncDeclarator specific 
-            self._specialize_function_args(copied_node.cfunc_declarator.args, 
-                                           fused_to_specific) 
- 
-            # If a cpdef, declare all specialized cpdefs (this 
-            # also calls analyse_declarations) 
-            copied_node.declare_cpdef_wrapper(env) 
-            if copied_node.py_func: 
-                env.pyfunc_entries.remove(copied_node.py_func.entry) 
- 
-                self.specialize_copied_def( 
-                        copied_node.py_func, cname, self.node.entry.as_variable, 
-                        fused_to_specific, fused_types) 
- 
-            if not self.replace_fused_typechecks(copied_node): 
-                break 
- 
+            copied_node.type = type
+            entry.type, type.entry = type, entry
+
+            entry.used = (entry.used or
+                          self.node.entry.defined_in_pxd or
+                          env.is_c_class_scope or
+                          entry.is_cmethod)
+
+            if self.node.cfunc_declarator.optional_arg_count:
+                self.node.cfunc_declarator.declare_optional_arg_struct(
+                                           type, env, fused_cname=cname)
+
+            copied_node.return_type = type.return_type
+            self.create_new_local_scope(copied_node, env, fused_to_specific)
+
+            # Make the argument types in the CFuncDeclarator specific
+            self._specialize_function_args(copied_node.cfunc_declarator.args,
+                                           fused_to_specific)
+
+            # If a cpdef, declare all specialized cpdefs (this
+            # also calls analyse_declarations)
+            copied_node.declare_cpdef_wrapper(env)
+            if copied_node.py_func:
+                env.pyfunc_entries.remove(copied_node.py_func.entry)
+
+                self.specialize_copied_def(
+                        copied_node.py_func, cname, self.node.entry.as_variable,
+                        fused_to_specific, fused_types)
+
+            if not self.replace_fused_typechecks(copied_node):
+                break
+
         # replace old entry with new entries
         try:
             cindex = env.cfunc_entries.index(self.node.entry)
@@ -197,227 +197,227 @@ class FusedCFuncDefNode(StatListNode):
         else:
             env.cfunc_entries[cindex:cindex+1] = new_cfunc_entries
 
-        if orig_py_func: 
-            self.py_func = self.make_fused_cpdef(orig_py_func, env, 
-                                                 is_def=False) 
-        else: 
-            self.py_func = orig_py_func 
- 
-    def _get_fused_base_types(self, fused_compound_types): 
-        """ 
-        Get a list of unique basic fused types, from a list of 
-        (possibly) compound fused types. 
-        """ 
-        base_types = [] 
-        seen = set() 
-        for fused_type in fused_compound_types: 
-            fused_type.get_fused_types(result=base_types, seen=seen) 
-        return base_types 
- 
-    def _specialize_function_args(self, args, fused_to_specific): 
-        for arg in args: 
-            if arg.type.is_fused: 
-                arg.type = arg.type.specialize(fused_to_specific) 
-                if arg.type.is_memoryviewslice: 
+        if orig_py_func:
+            self.py_func = self.make_fused_cpdef(orig_py_func, env,
+                                                 is_def=False)
+        else:
+            self.py_func = orig_py_func
+
+    def _get_fused_base_types(self, fused_compound_types):
+        """
+        Get a list of unique basic fused types, from a list of
+        (possibly) compound fused types.
+        """
+        base_types = []
+        seen = set()
+        for fused_type in fused_compound_types:
+            fused_type.get_fused_types(result=base_types, seen=seen)
+        return base_types
+
+    def _specialize_function_args(self, args, fused_to_specific):
+        for arg in args:
+            if arg.type.is_fused:
+                arg.type = arg.type.specialize(fused_to_specific)
+                if arg.type.is_memoryviewslice:
                     arg.type.validate_memslice_dtype(arg.pos)
- 
-    def create_new_local_scope(self, node, env, f2s): 
-        """ 
-        Create a new local scope for the copied node and append it to 
-        self.nodes. A new local scope is needed because the arguments with the 
+
+    def create_new_local_scope(self, node, env, f2s):
+        """
+        Create a new local scope for the copied node and append it to
+        self.nodes. A new local scope is needed because the arguments with the
         fused types are already in the local scope, and we need the specialized
-        entries created after analyse_declarations on each specialized version 
-        of the (CFunc)DefNode. 
-        f2s is a dict mapping each fused type to its specialized version 
-        """ 
-        node.create_local_scope(env) 
-        node.local_scope.fused_to_specific = f2s 
- 
-        # This is copied from the original function, set it to false to 
-        # stop recursion 
-        node.has_fused_arguments = False 
-        self.nodes.append(node) 
- 
-    def specialize_copied_def(self, node, cname, py_entry, f2s, fused_compound_types): 
-        """Specialize the copy of a DefNode given the copied node, 
-        the specialization cname and the original DefNode entry""" 
-        fused_types = self._get_fused_base_types(fused_compound_types) 
-        type_strings = [ 
-            PyrexTypes.specialization_signature_string(fused_type, f2s) 
-                for fused_type in fused_types 
-        ] 
- 
-        node.specialized_signature_string = '|'.join(type_strings) 
- 
-        node.entry.pymethdef_cname = PyrexTypes.get_fused_cname( 
-                                        cname, node.entry.pymethdef_cname) 
-        node.entry.doc = py_entry.doc 
-        node.entry.doc_cname = py_entry.doc_cname 
- 
-    def replace_fused_typechecks(self, copied_node): 
-        """ 
-        Branch-prune fused type checks like 
- 
-            if fused_t is int: 
-                ... 
- 
-        Returns whether an error was issued and whether we should stop in 
-        in order to prevent a flood of errors. 
-        """ 
-        num_errors = Errors.num_errors 
-        transform = ParseTreeTransforms.ReplaceFusedTypeChecks( 
-                                       copied_node.local_scope) 
-        transform(copied_node) 
- 
-        if Errors.num_errors > num_errors: 
-            return False 
- 
-        return True 
- 
-    def _fused_instance_checks(self, normal_types, pyx_code, env): 
-        """ 
+        entries created after analyse_declarations on each specialized version
+        of the (CFunc)DefNode.
+        f2s is a dict mapping each fused type to its specialized version
+        """
+        node.create_local_scope(env)
+        node.local_scope.fused_to_specific = f2s
+
+        # This is copied from the original function, set it to false to
+        # stop recursion
+        node.has_fused_arguments = False
+        self.nodes.append(node)
+
+    def specialize_copied_def(self, node, cname, py_entry, f2s, fused_compound_types):
+        """Specialize the copy of a DefNode given the copied node,
+        the specialization cname and the original DefNode entry"""
+        fused_types = self._get_fused_base_types(fused_compound_types)
+        type_strings = [
+            PyrexTypes.specialization_signature_string(fused_type, f2s)
+                for fused_type in fused_types
+        ]
+
+        node.specialized_signature_string = '|'.join(type_strings)
+
+        node.entry.pymethdef_cname = PyrexTypes.get_fused_cname(
+                                        cname, node.entry.pymethdef_cname)
+        node.entry.doc = py_entry.doc
+        node.entry.doc_cname = py_entry.doc_cname
+
+    def replace_fused_typechecks(self, copied_node):
+        """
+        Branch-prune fused type checks like
+
+            if fused_t is int:
+                ...
+
+        Returns whether an error was issued and whether we should stop in
+        in order to prevent a flood of errors.
+        """
+        num_errors = Errors.num_errors
+        transform = ParseTreeTransforms.ReplaceFusedTypeChecks(
+                                       copied_node.local_scope)
+        transform(copied_node)
+
+        if Errors.num_errors > num_errors:
+            return False
+
+        return True
+
+    def _fused_instance_checks(self, normal_types, pyx_code, env):
+        """
         Generate Cython code for instance checks, matching an object to
-        specialized types. 
-        """ 
-        for specialized_type in normal_types: 
-            # all_numeric = all_numeric and specialized_type.is_numeric 
+        specialized types.
+        """
+        for specialized_type in normal_types:
+            # all_numeric = all_numeric and specialized_type.is_numeric
             pyx_code.context.update(
                 py_type_name=specialized_type.py_type_name(),
                 specialized_type_name=specialized_type.specialization_string,
             )
-            pyx_code.put_chunk( 
-                u""" 
+            pyx_code.put_chunk(
+                u"""
                     if isinstance(arg, {{py_type_name}}):
                         dest_sig[{{dest_sig_idx}}] = '{{specialized_type_name}}'; break
-                """) 
- 
-    def _dtype_name(self, dtype): 
-        if dtype.is_typedef: 
-            return '___pyx_%s' % dtype 
-        return str(dtype).replace(' ', '_') 
- 
-    def _dtype_type(self, dtype): 
-        if dtype.is_typedef: 
-            return self._dtype_name(dtype) 
-        return str(dtype) 
- 
-    def _sizeof_dtype(self, dtype): 
-        if dtype.is_pyobject: 
-            return 'sizeof(void *)' 
-        else: 
-            return "sizeof(%s)" % self._dtype_type(dtype) 
- 
-    def _buffer_check_numpy_dtype_setup_cases(self, pyx_code): 
-        "Setup some common cases to match dtypes against specializations" 
-        if pyx_code.indenter("if kind in b'iu':"): 
-            pyx_code.putln("pass") 
-            pyx_code.named_insertion_point("dtype_int") 
-            pyx_code.dedent() 
- 
-        if pyx_code.indenter("elif kind == b'f':"): 
-            pyx_code.putln("pass") 
-            pyx_code.named_insertion_point("dtype_float") 
-            pyx_code.dedent() 
- 
-        if pyx_code.indenter("elif kind == b'c':"): 
-            pyx_code.putln("pass") 
-            pyx_code.named_insertion_point("dtype_complex") 
-            pyx_code.dedent() 
- 
-        if pyx_code.indenter("elif kind == b'O':"): 
-            pyx_code.putln("pass") 
-            pyx_code.named_insertion_point("dtype_object") 
-            pyx_code.dedent() 
- 
-    match = "dest_sig[{{dest_sig_idx}}] = '{{specialized_type_name}}'" 
-    no_match = "dest_sig[{{dest_sig_idx}}] = None" 
+                """)
+
+    def _dtype_name(self, dtype):
+        if dtype.is_typedef:
+            return '___pyx_%s' % dtype
+        return str(dtype).replace(' ', '_')
+
+    def _dtype_type(self, dtype):
+        if dtype.is_typedef:
+            return self._dtype_name(dtype)
+        return str(dtype)
+
+    def _sizeof_dtype(self, dtype):
+        if dtype.is_pyobject:
+            return 'sizeof(void *)'
+        else:
+            return "sizeof(%s)" % self._dtype_type(dtype)
+
+    def _buffer_check_numpy_dtype_setup_cases(self, pyx_code):
+        "Setup some common cases to match dtypes against specializations"
+        if pyx_code.indenter("if kind in b'iu':"):
+            pyx_code.putln("pass")
+            pyx_code.named_insertion_point("dtype_int")
+            pyx_code.dedent()
+
+        if pyx_code.indenter("elif kind == b'f':"):
+            pyx_code.putln("pass")
+            pyx_code.named_insertion_point("dtype_float")
+            pyx_code.dedent()
+
+        if pyx_code.indenter("elif kind == b'c':"):
+            pyx_code.putln("pass")
+            pyx_code.named_insertion_point("dtype_complex")
+            pyx_code.dedent()
+
+        if pyx_code.indenter("elif kind == b'O':"):
+            pyx_code.putln("pass")
+            pyx_code.named_insertion_point("dtype_object")
+            pyx_code.dedent()
+
+    match = "dest_sig[{{dest_sig_idx}}] = '{{specialized_type_name}}'"
+    no_match = "dest_sig[{{dest_sig_idx}}] = None"
     def _buffer_check_numpy_dtype(self, pyx_code, specialized_buffer_types, pythran_types):
-        """ 
-        Match a numpy dtype object to the individual specializations. 
-        """ 
-        self._buffer_check_numpy_dtype_setup_cases(pyx_code) 
- 
+        """
+        Match a numpy dtype object to the individual specializations.
+        """
+        self._buffer_check_numpy_dtype_setup_cases(pyx_code)
+
         for specialized_type in pythran_types+specialized_buffer_types:
             final_type = specialized_type
             if specialized_type.is_pythran_expr:
                 specialized_type = specialized_type.org_buffer
-            dtype = specialized_type.dtype 
-            pyx_code.context.update( 
-                itemsize_match=self._sizeof_dtype(dtype) + " == itemsize", 
-                signed_match="not (%s_is_signed ^ dtype_signed)" % self._dtype_name(dtype), 
-                dtype=dtype, 
+            dtype = specialized_type.dtype
+            pyx_code.context.update(
+                itemsize_match=self._sizeof_dtype(dtype) + " == itemsize",
+                signed_match="not (%s_is_signed ^ dtype_signed)" % self._dtype_name(dtype),
+                dtype=dtype,
                 specialized_type_name=final_type.specialization_string)
- 
-            dtypes = [ 
-                (dtype.is_int, pyx_code.dtype_int), 
-                (dtype.is_float, pyx_code.dtype_float), 
-                (dtype.is_complex, pyx_code.dtype_complex) 
-            ] 
- 
-            for dtype_category, codewriter in dtypes: 
-                if dtype_category: 
-                    cond = '{{itemsize_match}} and (<Py_ssize_t>arg.ndim) == %d' % ( 
-                                                    specialized_type.ndim,) 
-                    if dtype.is_int: 
-                        cond += ' and {{signed_match}}' 
- 
+
+            dtypes = [
+                (dtype.is_int, pyx_code.dtype_int),
+                (dtype.is_float, pyx_code.dtype_float),
+                (dtype.is_complex, pyx_code.dtype_complex)
+            ]
+
+            for dtype_category, codewriter in dtypes:
+                if dtype_category:
+                    cond = '{{itemsize_match}} and (<Py_ssize_t>arg.ndim) == %d' % (
+                                                    specialized_type.ndim,)
+                    if dtype.is_int:
+                        cond += ' and {{signed_match}}'
+
                     if final_type.is_pythran_expr:
                         cond += ' and arg_is_pythran_compatible'
 
-                    if codewriter.indenter("if %s:" % cond): 
+                    if codewriter.indenter("if %s:" % cond):
                         #codewriter.putln("print 'buffer match found based on numpy dtype'")
-                        codewriter.putln(self.match) 
-                        codewriter.putln("break") 
-                        codewriter.dedent() 
- 
-    def _buffer_parse_format_string_check(self, pyx_code, decl_code, 
-                                          specialized_type, env): 
-        """ 
-        For each specialized type, try to coerce the object to a memoryview 
-        slice of that type. This means obtaining a buffer and parsing the 
-        format string. 
-        TODO: separate buffer acquisition from format parsing 
-        """ 
-        dtype = specialized_type.dtype 
-        if specialized_type.is_buffer: 
-            axes = [('direct', 'strided')] * specialized_type.ndim 
-        else: 
-            axes = specialized_type.axes 
- 
-        memslice_type = PyrexTypes.MemoryViewSliceType(dtype, axes) 
-        memslice_type.create_from_py_utility_code(env) 
-        pyx_code.context.update( 
-            coerce_from_py_func=memslice_type.from_py_function, 
-            dtype=dtype) 
-        decl_code.putln( 
+                        codewriter.putln(self.match)
+                        codewriter.putln("break")
+                        codewriter.dedent()
+
+    def _buffer_parse_format_string_check(self, pyx_code, decl_code,
+                                          specialized_type, env):
+        """
+        For each specialized type, try to coerce the object to a memoryview
+        slice of that type. This means obtaining a buffer and parsing the
+        format string.
+        TODO: separate buffer acquisition from format parsing
+        """
+        dtype = specialized_type.dtype
+        if specialized_type.is_buffer:
+            axes = [('direct', 'strided')] * specialized_type.ndim
+        else:
+            axes = specialized_type.axes
+
+        memslice_type = PyrexTypes.MemoryViewSliceType(dtype, axes)
+        memslice_type.create_from_py_utility_code(env)
+        pyx_code.context.update(
+            coerce_from_py_func=memslice_type.from_py_function,
+            dtype=dtype)
+        decl_code.putln(
             "{{memviewslice_cname}} {{coerce_from_py_func}}(object, int)")
- 
-        pyx_code.context.update( 
-            specialized_type_name=specialized_type.specialization_string, 
-            sizeof_dtype=self._sizeof_dtype(dtype)) 
- 
-        pyx_code.put_chunk( 
-            u""" 
-                # try {{dtype}} 
-                if itemsize == -1 or itemsize == {{sizeof_dtype}}: 
+
+        pyx_code.context.update(
+            specialized_type_name=specialized_type.specialization_string,
+            sizeof_dtype=self._sizeof_dtype(dtype))
+
+        pyx_code.put_chunk(
+            u"""
+                # try {{dtype}}
+                if itemsize == -1 or itemsize == {{sizeof_dtype}}:
                     memslice = {{coerce_from_py_func}}(arg, 0)
-                    if memslice.memview: 
-                        __PYX_XDEC_MEMVIEW(&memslice, 1) 
-                        # print 'found a match for the buffer through format parsing' 
-                        %s 
-                        break 
-                    else: 
-                        __pyx_PyErr_Clear() 
-            """ % self.match) 
- 
+                    if memslice.memview:
+                        __PYX_XDEC_MEMVIEW(&memslice, 1)
+                        # print 'found a match for the buffer through format parsing'
+                        %s
+                        break
+                    else:
+                        __pyx_PyErr_Clear()
+            """ % self.match)
+
     def _buffer_checks(self, buffer_types, pythran_types, pyx_code, decl_code, env):
-        """ 
-        Generate Cython code to match objects to buffer specializations. 
-        First try to get a numpy dtype object and match it against the individual 
-        specializations. If that fails, try naively to coerce the object 
-        to each specialization, which obtains the buffer each time and tries 
-        to match the format string. 
-        """ 
+        """
+        Generate Cython code to match objects to buffer specializations.
+        First try to get a numpy dtype object and match it against the individual
+        specializations. If that fails, try naively to coerce the object
+        to each specialization, which obtains the buffer each time and tries
+        to match the format string.
+        """
         # The first thing to find a match in this loop breaks out of the loop
         pyx_code.put_chunk(
             u"""
@@ -434,7 +434,7 @@ class FusedCFuncDefNode(StatListNode):
                             dtype = None
                     else:
                         dtype = None
- 
+
                     itemsize = -1
                     if dtype is not None:
                         itemsize = dtype.itemsize
@@ -466,199 +466,199 @@ class FusedCFuncDefNode(StatListNode):
         pyx_code.named_insertion_point("numpy_dtype_checks")
         self._buffer_check_numpy_dtype(pyx_code, buffer_types, pythran_types)
         pyx_code.dedent(2)
- 
+
         for specialized_type in buffer_types:
             self._buffer_parse_format_string_check(
                     pyx_code, decl_code, specialized_type, env)
- 
+
     def _buffer_declarations(self, pyx_code, decl_code, all_buffer_types, pythran_types):
-        """ 
-        If we have any buffer specializations, write out some variable 
-        declarations and imports. 
-        """ 
-        decl_code.put_chunk( 
-            u""" 
-                ctypedef struct {{memviewslice_cname}}: 
-                    void *memview 
- 
-                void __PYX_XDEC_MEMVIEW({{memviewslice_cname}} *, int have_gil) 
-                bint __pyx_memoryview_check(object) 
-            """) 
- 
-        pyx_code.local_variable_declarations.put_chunk( 
-            u""" 
-                cdef {{memviewslice_cname}} memslice 
-                cdef Py_ssize_t itemsize 
-                cdef bint dtype_signed 
-                cdef char kind 
- 
-                itemsize = -1 
-            """) 
- 
+        """
+        If we have any buffer specializations, write out some variable
+        declarations and imports.
+        """
+        decl_code.put_chunk(
+            u"""
+                ctypedef struct {{memviewslice_cname}}:
+                    void *memview
+
+                void __PYX_XDEC_MEMVIEW({{memviewslice_cname}} *, int have_gil)
+                bint __pyx_memoryview_check(object)
+            """)
+
+        pyx_code.local_variable_declarations.put_chunk(
+            u"""
+                cdef {{memviewslice_cname}} memslice
+                cdef Py_ssize_t itemsize
+                cdef bint dtype_signed
+                cdef char kind
+
+                itemsize = -1
+            """)
+
         if pythran_types:
             pyx_code.local_variable_declarations.put_chunk(u"""
                 cdef bint arg_is_pythran_compatible
                 cdef Py_ssize_t cur_stride
             """)
 
-        pyx_code.imports.put_chunk( 
-            u""" 
-                cdef type ndarray 
+        pyx_code.imports.put_chunk(
+            u"""
+                cdef type ndarray
                 ndarray = __Pyx_ImportNumPyArrayTypeIfAvailable()
-            """) 
- 
+            """)
+
         seen_typedefs = set()
-        seen_int_dtypes = set() 
-        for buffer_type in all_buffer_types: 
-            dtype = buffer_type.dtype 
+        seen_int_dtypes = set()
+        for buffer_type in all_buffer_types:
+            dtype = buffer_type.dtype
             dtype_name = self._dtype_name(dtype)
-            if dtype.is_typedef: 
+            if dtype.is_typedef:
                 if dtype_name not in seen_typedefs:
                     seen_typedefs.add(dtype_name)
                     decl_code.putln(
                         'ctypedef %s %s "%s"' % (dtype.resolve(), dtype_name,
                                                  dtype.empty_declaration_code()))
- 
-            if buffer_type.dtype.is_int: 
-                if str(dtype) not in seen_int_dtypes: 
-                    seen_int_dtypes.add(str(dtype)) 
+
+            if buffer_type.dtype.is_int:
+                if str(dtype) not in seen_int_dtypes:
+                    seen_int_dtypes.add(str(dtype))
                     pyx_code.context.update(dtype_name=dtype_name,
-                                            dtype_type=self._dtype_type(dtype)) 
-                    pyx_code.local_variable_declarations.put_chunk( 
-                        u""" 
-                            cdef bint {{dtype_name}}_is_signed 
+                                            dtype_type=self._dtype_type(dtype))
+                    pyx_code.local_variable_declarations.put_chunk(
+                        u"""
+                            cdef bint {{dtype_name}}_is_signed
                             {{dtype_name}}_is_signed = not (<{{dtype_type}}> -1 > 0)
-                        """) 
- 
-    def _split_fused_types(self, arg): 
-        """ 
-        Specialize fused types and split into normal types and buffer types. 
-        """ 
-        specialized_types = PyrexTypes.get_specialized_types(arg.type) 
+                        """)
+
+    def _split_fused_types(self, arg):
+        """
+        Specialize fused types and split into normal types and buffer types.
+        """
+        specialized_types = PyrexTypes.get_specialized_types(arg.type)
 
         # Prefer long over int, etc by sorting (see type classes in PyrexTypes.py)
         specialized_types.sort()
 
-        seen_py_type_names = set() 
+        seen_py_type_names = set()
         normal_types, buffer_types, pythran_types = [], [], []
         has_object_fallback = False
-        for specialized_type in specialized_types: 
-            py_type_name = specialized_type.py_type_name() 
-            if py_type_name: 
-                if py_type_name in seen_py_type_names: 
-                    continue 
-                seen_py_type_names.add(py_type_name) 
+        for specialized_type in specialized_types:
+            py_type_name = specialized_type.py_type_name()
+            if py_type_name:
+                if py_type_name in seen_py_type_names:
+                    continue
+                seen_py_type_names.add(py_type_name)
                 if py_type_name == 'object':
                     has_object_fallback = True
                 else:
                     normal_types.append(specialized_type)
             elif specialized_type.is_pythran_expr:
                 pythran_types.append(specialized_type)
-            elif specialized_type.is_buffer or specialized_type.is_memoryviewslice: 
-                buffer_types.append(specialized_type) 
- 
+            elif specialized_type.is_buffer or specialized_type.is_memoryviewslice:
+                buffer_types.append(specialized_type)
+
         return normal_types, buffer_types, pythran_types, has_object_fallback
- 
-    def _unpack_argument(self, pyx_code): 
-        pyx_code.put_chunk( 
-            u""" 
-                # PROCESSING ARGUMENT {{arg_tuple_idx}} 
-                if {{arg_tuple_idx}} < len(<tuple>args): 
-                    arg = (<tuple>args)[{{arg_tuple_idx}}] 
+
+    def _unpack_argument(self, pyx_code):
+        pyx_code.put_chunk(
+            u"""
+                # PROCESSING ARGUMENT {{arg_tuple_idx}}
+                if {{arg_tuple_idx}} < len(<tuple>args):
+                    arg = (<tuple>args)[{{arg_tuple_idx}}]
                 elif kwargs is not None and '{{arg.name}}' in <dict>kwargs:
-                    arg = (<dict>kwargs)['{{arg.name}}'] 
-                else: 
-                {{if arg.default}} 
-                    arg = (<tuple>defaults)[{{default_idx}}] 
-                {{else}} 
+                    arg = (<dict>kwargs)['{{arg.name}}']
+                else:
+                {{if arg.default}}
+                    arg = (<tuple>defaults)[{{default_idx}}]
+                {{else}}
                     {{if arg_tuple_idx < min_positional_args}}
                         raise TypeError("Expected at least %d argument%s, got %d" % (
                             {{min_positional_args}}, {{'"s"' if min_positional_args != 1 else '""'}}, len(<tuple>args)))
                     {{else}}
                         raise TypeError("Missing keyword-only argument: '%s'" % "{{arg.default}}")
                     {{endif}}
-                {{endif}} 
-            """) 
- 
-    def make_fused_cpdef(self, orig_py_func, env, is_def): 
-        """ 
-        This creates the function that is indexable from Python and does 
-        runtime dispatch based on the argument types. The function gets the 
-        arg tuple and kwargs dict (or None) and the defaults tuple 
-        as arguments from the Binding Fused Function's tp_call. 
-        """ 
-        from . import TreeFragment, Code, UtilityCode 
- 
-        fused_types = self._get_fused_base_types([ 
-            arg.type for arg in self.node.args if arg.type.is_fused]) 
- 
-        context = { 
-            'memviewslice_cname': MemoryView.memviewslice_cname, 
-            'func_args': self.node.args, 
-            'n_fused': len(fused_types), 
+                {{endif}}
+            """)
+
+    def make_fused_cpdef(self, orig_py_func, env, is_def):
+        """
+        This creates the function that is indexable from Python and does
+        runtime dispatch based on the argument types. The function gets the
+        arg tuple and kwargs dict (or None) and the defaults tuple
+        as arguments from the Binding Fused Function's tp_call.
+        """
+        from . import TreeFragment, Code, UtilityCode
+
+        fused_types = self._get_fused_base_types([
+            arg.type for arg in self.node.args if arg.type.is_fused])
+
+        context = {
+            'memviewslice_cname': MemoryView.memviewslice_cname,
+            'func_args': self.node.args,
+            'n_fused': len(fused_types),
             'min_positional_args':
                 self.node.num_required_args - self.node.num_required_kw_args
                 if is_def else
                 sum(1 for arg in self.node.args if arg.default is None),
-            'name': orig_py_func.entry.name, 
-        } 
- 
-        pyx_code = Code.PyxCodeWriter(context=context) 
-        decl_code = Code.PyxCodeWriter(context=context) 
-        decl_code.put_chunk( 
-            u""" 
-                cdef extern from *: 
-                    void __pyx_PyErr_Clear "PyErr_Clear" () 
+            'name': orig_py_func.entry.name,
+        }
+
+        pyx_code = Code.PyxCodeWriter(context=context)
+        decl_code = Code.PyxCodeWriter(context=context)
+        decl_code.put_chunk(
+            u"""
+                cdef extern from *:
+                    void __pyx_PyErr_Clear "PyErr_Clear" ()
                     type __Pyx_ImportNumPyArrayTypeIfAvailable()
                     int __Pyx_Is_Little_Endian()
-            """) 
-        decl_code.indent() 
- 
-        pyx_code.put_chunk( 
-            u""" 
-                def __pyx_fused_cpdef(signatures, args, kwargs, defaults): 
-                    # FIXME: use a typed signature - currently fails badly because 
-                    #        default arguments inherit the types we specify here! 
- 
+            """)
+        decl_code.indent()
+
+        pyx_code.put_chunk(
+            u"""
+                def __pyx_fused_cpdef(signatures, args, kwargs, defaults):
+                    # FIXME: use a typed signature - currently fails badly because
+                    #        default arguments inherit the types we specify here!
+
                     dest_sig = [None] * {{n_fused}}
- 
+
                     if kwargs is not None and not kwargs:
                         kwargs = None
- 
-                    cdef Py_ssize_t i 
- 
-                    # instance check body 
-            """) 
-
-        pyx_code.indent() # indent following code to function body 
-        pyx_code.named_insertion_point("imports") 
+
+                    cdef Py_ssize_t i
+
+                    # instance check body
+            """)
+
+        pyx_code.indent() # indent following code to function body
+        pyx_code.named_insertion_point("imports")
         pyx_code.named_insertion_point("func_defs")
-        pyx_code.named_insertion_point("local_variable_declarations") 
- 
-        fused_index = 0 
-        default_idx = 0 
+        pyx_code.named_insertion_point("local_variable_declarations")
+
+        fused_index = 0
+        default_idx = 0
         all_buffer_types = OrderedSet()
-        seen_fused_types = set() 
-        for i, arg in enumerate(self.node.args): 
-            if arg.type.is_fused: 
-                arg_fused_types = arg.type.get_fused_types() 
-                if len(arg_fused_types) > 1: 
-                    raise NotImplementedError("Determination of more than one fused base " 
-                                              "type per argument is not implemented.") 
-                fused_type = arg_fused_types[0] 
- 
-            if arg.type.is_fused and fused_type not in seen_fused_types: 
-                seen_fused_types.add(fused_type) 
- 
-                context.update( 
-                    arg_tuple_idx=i, 
-                    arg=arg, 
-                    dest_sig_idx=fused_index, 
-                    default_idx=default_idx, 
-                ) 
- 
+        seen_fused_types = set()
+        for i, arg in enumerate(self.node.args):
+            if arg.type.is_fused:
+                arg_fused_types = arg.type.get_fused_types()
+                if len(arg_fused_types) > 1:
+                    raise NotImplementedError("Determination of more than one fused base "
+                                              "type per argument is not implemented.")
+                fused_type = arg_fused_types[0]
+
+            if arg.type.is_fused and fused_type not in seen_fused_types:
+                seen_fused_types.add(fused_type)
+
+                context.update(
+                    arg_tuple_idx=i,
+                    arg=arg,
+                    dest_sig_idx=fused_index,
+                    default_idx=default_idx,
+                )
+
                 normal_types, buffer_types, pythran_types, has_object_fallback = self._split_fused_types(arg)
-                self._unpack_argument(pyx_code) 
+                self._unpack_argument(pyx_code)
 
                 # 'unrolled' loop, first match breaks out of it
                 if pyx_code.indenter("while 1:"):
@@ -675,227 +675,227 @@ class FusedCFuncDefNode(StatListNode):
                     pyx_code.putln("break")
                     pyx_code.dedent()
 
-                fused_index += 1 
-                all_buffer_types.update(buffer_types) 
+                fused_index += 1
+                all_buffer_types.update(buffer_types)
                 all_buffer_types.update(ty.org_buffer for ty in pythran_types)
- 
-            if arg.default: 
-                default_idx += 1 
- 
-        if all_buffer_types: 
+
+            if arg.default:
+                default_idx += 1
+
+        if all_buffer_types:
             self._buffer_declarations(pyx_code, decl_code, all_buffer_types, pythran_types)
-            env.use_utility_code(Code.UtilityCode.load_cached("Import", "ImportExport.c")) 
+            env.use_utility_code(Code.UtilityCode.load_cached("Import", "ImportExport.c"))
             env.use_utility_code(Code.UtilityCode.load_cached("ImportNumPyArray", "ImportExport.c"))
- 
-        pyx_code.put_chunk( 
-            u""" 
-                candidates = [] 
-                for sig in <dict>signatures: 
-                    match_found = False 
+
+        pyx_code.put_chunk(
+            u"""
+                candidates = []
+                for sig in <dict>signatures:
+                    match_found = False
                     src_sig = sig.strip('()').split('|')
                     for i in range(len(dest_sig)):
                         dst_type = dest_sig[i]
-                        if dst_type is not None: 
+                        if dst_type is not None:
                             if src_sig[i] == dst_type:
-                                match_found = True 
-                            else: 
-                                match_found = False 
-                                break 
- 
-                    if match_found: 
-                        candidates.append(sig) 
- 
-                if not candidates: 
-                    raise TypeError("No matching signature found") 
-                elif len(candidates) > 1: 
-                    raise TypeError("Function call with ambiguous argument types") 
-                else: 
-                    return (<dict>signatures)[candidates[0]] 
-            """) 
- 
-        fragment_code = pyx_code.getvalue() 
-        # print decl_code.getvalue() 
-        # print fragment_code 
+                                match_found = True
+                            else:
+                                match_found = False
+                                break
+
+                    if match_found:
+                        candidates.append(sig)
+
+                if not candidates:
+                    raise TypeError("No matching signature found")
+                elif len(candidates) > 1:
+                    raise TypeError("Function call with ambiguous argument types")
+                else:
+                    return (<dict>signatures)[candidates[0]]
+            """)
+
+        fragment_code = pyx_code.getvalue()
+        # print decl_code.getvalue()
+        # print fragment_code
         from .Optimize import ConstantFolding
         fragment = TreeFragment.TreeFragment(
             fragment_code, level='module', pipeline=[ConstantFolding()])
-        ast = TreeFragment.SetPosTransform(self.node.pos)(fragment.root) 
-        UtilityCode.declare_declarations_in_scope( 
-            decl_code.getvalue(), env.global_scope()) 
-        ast.scope = env 
+        ast = TreeFragment.SetPosTransform(self.node.pos)(fragment.root)
+        UtilityCode.declare_declarations_in_scope(
+            decl_code.getvalue(), env.global_scope())
+        ast.scope = env
         # FIXME: for static methods of cdef classes, we build the wrong signature here: first arg becomes 'self'
-        ast.analyse_declarations(env) 
-        py_func = ast.stats[-1]  # the DefNode 
-        self.fragment_scope = ast.scope 
- 
-        if isinstance(self.node, DefNode): 
-            py_func.specialized_cpdefs = self.nodes[:] 
-        else: 
-            py_func.specialized_cpdefs = [n.py_func for n in self.nodes] 
- 
-        return py_func 
- 
-    def update_fused_defnode_entry(self, env): 
-        copy_attributes = ( 
-            'name', 'pos', 'cname', 'func_cname', 'pyfunc_cname', 
-            'pymethdef_cname', 'doc', 'doc_cname', 'is_member', 
-            'scope' 
-        ) 
- 
-        entry = self.py_func.entry 
- 
-        for attr in copy_attributes: 
-            setattr(entry, attr, 
-                    getattr(self.orig_py_func.entry, attr)) 
- 
-        self.py_func.name = self.orig_py_func.name 
-        self.py_func.doc = self.orig_py_func.doc 
- 
-        env.entries.pop('__pyx_fused_cpdef', None) 
-        if isinstance(self.node, DefNode): 
-            env.entries[entry.name] = entry 
-        else: 
-            env.entries[entry.name].as_variable = entry 
- 
-        env.pyfunc_entries.append(entry) 
- 
-        self.py_func.entry.fused_cfunction = self 
-        for node in self.nodes: 
-            if isinstance(self.node, DefNode): 
-                node.fused_py_func = self.py_func 
-            else: 
-                node.py_func.fused_py_func = self.py_func 
-                node.entry.as_variable = entry 
- 
-        self.synthesize_defnodes() 
-        self.stats.append(self.__signatures__) 
- 
-    def analyse_expressions(self, env): 
-        """ 
-        Analyse the expressions. Take care to only evaluate default arguments 
-        once and clone the result for all specializations 
-        """ 
-        for fused_compound_type in self.fused_compound_types: 
-            for fused_type in fused_compound_type.get_fused_types(): 
-                for specialization_type in fused_type.types: 
-                    if specialization_type.is_complex: 
-                        specialization_type.create_declaration_utility_code(env) 
- 
-        if self.py_func: 
-            self.__signatures__ = self.__signatures__.analyse_expressions(env) 
-            self.py_func = self.py_func.analyse_expressions(env) 
-            self.resulting_fused_function = self.resulting_fused_function.analyse_expressions(env) 
-            self.fused_func_assignment = self.fused_func_assignment.analyse_expressions(env) 
- 
-        self.defaults = defaults = [] 
- 
-        for arg in self.node.args: 
-            if arg.default: 
-                arg.default = arg.default.analyse_expressions(env) 
-                defaults.append(ProxyNode(arg.default)) 
-            else: 
-                defaults.append(None) 
- 
-        for i, stat in enumerate(self.stats): 
-            stat = self.stats[i] = stat.analyse_expressions(env) 
-            if isinstance(stat, FuncDefNode): 
-                for arg, default in zip(stat.args, defaults): 
-                    if default is not None: 
-                        arg.default = CloneNode(default).coerce_to(arg.type, env) 
- 
-        if self.py_func: 
-            args = [CloneNode(default) for default in defaults if default] 
-            self.defaults_tuple = TupleNode(self.pos, args=args) 
+        ast.analyse_declarations(env)
+        py_func = ast.stats[-1]  # the DefNode
+        self.fragment_scope = ast.scope
+
+        if isinstance(self.node, DefNode):
+            py_func.specialized_cpdefs = self.nodes[:]
+        else:
+            py_func.specialized_cpdefs = [n.py_func for n in self.nodes]
+
+        return py_func
+
+    def update_fused_defnode_entry(self, env):
+        copy_attributes = (
+            'name', 'pos', 'cname', 'func_cname', 'pyfunc_cname',
+            'pymethdef_cname', 'doc', 'doc_cname', 'is_member',
+            'scope'
+        )
+
+        entry = self.py_func.entry
+
+        for attr in copy_attributes:
+            setattr(entry, attr,
+                    getattr(self.orig_py_func.entry, attr))
+
+        self.py_func.name = self.orig_py_func.name
+        self.py_func.doc = self.orig_py_func.doc
+
+        env.entries.pop('__pyx_fused_cpdef', None)
+        if isinstance(self.node, DefNode):
+            env.entries[entry.name] = entry
+        else:
+            env.entries[entry.name].as_variable = entry
+
+        env.pyfunc_entries.append(entry)
+
+        self.py_func.entry.fused_cfunction = self
+        for node in self.nodes:
+            if isinstance(self.node, DefNode):
+                node.fused_py_func = self.py_func
+            else:
+                node.py_func.fused_py_func = self.py_func
+                node.entry.as_variable = entry
+
+        self.synthesize_defnodes()
+        self.stats.append(self.__signatures__)
+
+    def analyse_expressions(self, env):
+        """
+        Analyse the expressions. Take care to only evaluate default arguments
+        once and clone the result for all specializations
+        """
+        for fused_compound_type in self.fused_compound_types:
+            for fused_type in fused_compound_type.get_fused_types():
+                for specialization_type in fused_type.types:
+                    if specialization_type.is_complex:
+                        specialization_type.create_declaration_utility_code(env)
+
+        if self.py_func:
+            self.__signatures__ = self.__signatures__.analyse_expressions(env)
+            self.py_func = self.py_func.analyse_expressions(env)
+            self.resulting_fused_function = self.resulting_fused_function.analyse_expressions(env)
+            self.fused_func_assignment = self.fused_func_assignment.analyse_expressions(env)
+
+        self.defaults = defaults = []
+
+        for arg in self.node.args:
+            if arg.default:
+                arg.default = arg.default.analyse_expressions(env)
+                defaults.append(ProxyNode(arg.default))
+            else:
+                defaults.append(None)
+
+        for i, stat in enumerate(self.stats):
+            stat = self.stats[i] = stat.analyse_expressions(env)
+            if isinstance(stat, FuncDefNode):
+                for arg, default in zip(stat.args, defaults):
+                    if default is not None:
+                        arg.default = CloneNode(default).coerce_to(arg.type, env)
+
+        if self.py_func:
+            args = [CloneNode(default) for default in defaults if default]
+            self.defaults_tuple = TupleNode(self.pos, args=args)
             self.defaults_tuple = self.defaults_tuple.analyse_types(env, skip_children=True).coerce_to_pyobject(env)
-            self.defaults_tuple = ProxyNode(self.defaults_tuple) 
-            self.code_object = ProxyNode(self.specialized_pycfuncs[0].code_object) 
- 
-            fused_func = self.resulting_fused_function.arg 
-            fused_func.defaults_tuple = CloneNode(self.defaults_tuple) 
-            fused_func.code_object = CloneNode(self.code_object) 
- 
-            for i, pycfunc in enumerate(self.specialized_pycfuncs): 
-                pycfunc.code_object = CloneNode(self.code_object) 
-                pycfunc = self.specialized_pycfuncs[i] = pycfunc.analyse_types(env) 
-                pycfunc.defaults_tuple = CloneNode(self.defaults_tuple) 
-        return self 
- 
-    def synthesize_defnodes(self): 
-        """ 
-        Create the __signatures__ dict of PyCFunctionNode specializations. 
-        """ 
-        if isinstance(self.nodes[0], CFuncDefNode): 
-            nodes = [node.py_func for node in self.nodes] 
-        else: 
-            nodes = self.nodes 
- 
+            self.defaults_tuple = ProxyNode(self.defaults_tuple)
+            self.code_object = ProxyNode(self.specialized_pycfuncs[0].code_object)
+
+            fused_func = self.resulting_fused_function.arg
+            fused_func.defaults_tuple = CloneNode(self.defaults_tuple)
+            fused_func.code_object = CloneNode(self.code_object)
+
+            for i, pycfunc in enumerate(self.specialized_pycfuncs):
+                pycfunc.code_object = CloneNode(self.code_object)
+                pycfunc = self.specialized_pycfuncs[i] = pycfunc.analyse_types(env)
+                pycfunc.defaults_tuple = CloneNode(self.defaults_tuple)
+        return self
+
+    def synthesize_defnodes(self):
+        """
+        Create the __signatures__ dict of PyCFunctionNode specializations.
+        """
+        if isinstance(self.nodes[0], CFuncDefNode):
+            nodes = [node.py_func for node in self.nodes]
+        else:
+            nodes = self.nodes
+
         signatures = [StringEncoding.EncodedString(node.specialized_signature_string)
                       for node in nodes]
-        keys = [ExprNodes.StringNode(node.pos, value=sig) 
+        keys = [ExprNodes.StringNode(node.pos, value=sig)
                 for node, sig in zip(nodes, signatures)]
         values = [ExprNodes.PyCFunctionNode.from_defnode(node, binding=True)
                   for node in nodes]
- 
+
         self.__signatures__ = ExprNodes.DictNode.from_pairs(self.pos, zip(keys, values))
 
-        self.specialized_pycfuncs = values 
-        for pycfuncnode in values: 
-            pycfuncnode.is_specialization = True 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.py_func: 
-            self.py_func.pymethdef_required = True 
-            self.fused_func_assignment.generate_function_definitions(env, code) 
- 
-        for stat in self.stats: 
-            if isinstance(stat, FuncDefNode) and stat.entry.used: 
-                code.mark_pos(stat.pos) 
-                stat.generate_function_definitions(env, code) 
- 
-    def generate_execution_code(self, code): 
-        # Note: all def function specialization are wrapped in PyCFunction 
-        # nodes in the self.__signatures__ dictnode. 
-        for default in self.defaults: 
-            if default is not None: 
-                default.generate_evaluation_code(code) 
- 
-        if self.py_func: 
-            self.defaults_tuple.generate_evaluation_code(code) 
-            self.code_object.generate_evaluation_code(code) 
- 
-        for stat in self.stats: 
-            code.mark_pos(stat.pos) 
-            if isinstance(stat, ExprNodes.ExprNode): 
-                stat.generate_evaluation_code(code) 
-            else: 
-                stat.generate_execution_code(code) 
- 
-        if self.__signatures__: 
-            self.resulting_fused_function.generate_evaluation_code(code) 
- 
-            code.putln( 
-                "((__pyx_FusedFunctionObject *) %s)->__signatures__ = %s;" % 
-                                    (self.resulting_fused_function.result(), 
-                                     self.__signatures__.result())) 
-            code.put_giveref(self.__signatures__.result()) 
+        self.specialized_pycfuncs = values
+        for pycfuncnode in values:
+            pycfuncnode.is_specialization = True
+
+    def generate_function_definitions(self, env, code):
+        if self.py_func:
+            self.py_func.pymethdef_required = True
+            self.fused_func_assignment.generate_function_definitions(env, code)
+
+        for stat in self.stats:
+            if isinstance(stat, FuncDefNode) and stat.entry.used:
+                code.mark_pos(stat.pos)
+                stat.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
+        # Note: all def function specialization are wrapped in PyCFunction
+        # nodes in the self.__signatures__ dictnode.
+        for default in self.defaults:
+            if default is not None:
+                default.generate_evaluation_code(code)
+
+        if self.py_func:
+            self.defaults_tuple.generate_evaluation_code(code)
+            self.code_object.generate_evaluation_code(code)
+
+        for stat in self.stats:
+            code.mark_pos(stat.pos)
+            if isinstance(stat, ExprNodes.ExprNode):
+                stat.generate_evaluation_code(code)
+            else:
+                stat.generate_execution_code(code)
+
+        if self.__signatures__:
+            self.resulting_fused_function.generate_evaluation_code(code)
+
+            code.putln(
+                "((__pyx_FusedFunctionObject *) %s)->__signatures__ = %s;" %
+                                    (self.resulting_fused_function.result(),
+                                     self.__signatures__.result()))
+            code.put_giveref(self.__signatures__.result())
             self.__signatures__.generate_post_assignment_code(code)
             self.__signatures__.free_temps(code)
- 
-            self.fused_func_assignment.generate_execution_code(code) 
- 
-            # Dispose of results 
-            self.resulting_fused_function.generate_disposal_code(code) 
+
+            self.fused_func_assignment.generate_execution_code(code)
+
+            # Dispose of results
+            self.resulting_fused_function.generate_disposal_code(code)
             self.resulting_fused_function.free_temps(code)
-            self.defaults_tuple.generate_disposal_code(code) 
+            self.defaults_tuple.generate_disposal_code(code)
             self.defaults_tuple.free_temps(code)
-            self.code_object.generate_disposal_code(code) 
+            self.code_object.generate_disposal_code(code)
             self.code_object.free_temps(code)
- 
-        for default in self.defaults: 
-            if default is not None: 
-                default.generate_disposal_code(code) 
+
+        for default in self.defaults:
+            if default is not None:
+                default.generate_disposal_code(code)
                 default.free_temps(code)
- 
-    def annotate(self, code): 
-        for stat in self.stats: 
-            stat.annotate(code) 
+
+    def annotate(self, code):
+        for stat in self.stats:
+            stat.annotate(code)
diff --git a/contrib/tools/cython/Cython/Compiler/Future.py b/contrib/tools/cython/Cython/Compiler/Future.py
index a4350f99ec..848792e00b 100644
--- a/contrib/tools/cython/Cython/Compiler/Future.py
+++ b/contrib/tools/cython/Cython/Compiler/Future.py
@@ -1,15 +1,15 @@
-def _get_feature(name): 
-    import __future__ 
-    # fall back to a unique fake object for earlier Python versions or Python 3 
-    return getattr(__future__, name, object()) 
- 
-unicode_literals = _get_feature("unicode_literals") 
+def _get_feature(name):
+    import __future__
+    # fall back to a unique fake object for earlier Python versions or Python 3
+    return getattr(__future__, name, object())
+
+unicode_literals = _get_feature("unicode_literals")
 with_statement = _get_feature("with_statement")  # dummy
-division = _get_feature("division") 
-print_function = _get_feature("print_function") 
-absolute_import = _get_feature("absolute_import") 
-nested_scopes = _get_feature("nested_scopes")  # dummy 
-generators = _get_feature("generators")  # dummy 
+division = _get_feature("division")
+print_function = _get_feature("print_function")
+absolute_import = _get_feature("absolute_import")
+nested_scopes = _get_feature("nested_scopes")  # dummy
+generators = _get_feature("generators")  # dummy
 generator_stop = _get_feature("generator_stop")
- 
-del _get_feature 
+
+del _get_feature
diff --git a/contrib/tools/cython/Cython/Compiler/Interpreter.py b/contrib/tools/cython/Cython/Compiler/Interpreter.py
index 3d91db2e75..9ec391f2a0 100644
--- a/contrib/tools/cython/Cython/Compiler/Interpreter.py
+++ b/contrib/tools/cython/Cython/Compiler/Interpreter.py
@@ -1,64 +1,64 @@
-""" 
-This module deals with interpreting the parse tree as Python 
-would have done, in the compiler. 
- 
-For now this only covers parse tree to value conversion of 
-compile-time values. 
-""" 
- 
-from __future__ import absolute_import 
- 
-from .Nodes import * 
-from .ExprNodes import * 
-from .Errors import CompileError 
- 
- 
-class EmptyScope(object): 
-    def lookup(self, name): 
-        return None 
- 
-empty_scope = EmptyScope() 
- 
-def interpret_compiletime_options(optlist, optdict, type_env=None, type_args=()): 
-    """ 
-    Tries to interpret a list of compile time option nodes. 
-    The result will be a tuple (optlist, optdict) but where 
-    all expression nodes have been interpreted. The result is 
-    in the form of tuples (value, pos). 
- 
-    optlist is a list of nodes, while optdict is a DictNode (the 
-    result optdict is a dict) 
- 
-    If type_env is set, all type nodes will be analysed and the resulting 
-    type set. Otherwise only interpretateable ExprNodes 
-    are allowed, other nodes raises errors. 
- 
-    A CompileError will be raised if there are problems. 
-    """ 
- 
-    def interpret(node, ix): 
-        if ix in type_args: 
-            if type_env: 
-                type = node.analyse_as_type(type_env) 
-                if not type: 
-                    raise CompileError(node.pos, "Invalid type.") 
-                return (type, node.pos) 
-            else: 
-                raise CompileError(node.pos, "Type not allowed here.") 
-        else: 
-            if (sys.version_info[0] >=3 and 
-                isinstance(node, StringNode) and 
-                node.unicode_value is not None): 
-                return (node.unicode_value, node.pos) 
-            return (node.compile_time_value(empty_scope), node.pos) 
- 
-    if optlist: 
-        optlist = [interpret(x, ix) for ix, x in enumerate(optlist)] 
-    if optdict: 
-        assert isinstance(optdict, DictNode) 
-        new_optdict = {} 
-        for item in optdict.key_value_pairs: 
-            new_key, dummy = interpret(item.key, None) 
-            new_optdict[new_key] = interpret(item.value, item.key.value) 
-        optdict = new_optdict 
-    return (optlist, new_optdict) 
+"""
+This module deals with interpreting the parse tree as Python
+would have done, in the compiler.
+
+For now this only covers parse tree to value conversion of
+compile-time values.
+"""
+
+from __future__ import absolute_import
+
+from .Nodes import *
+from .ExprNodes import *
+from .Errors import CompileError
+
+
+class EmptyScope(object):
+    def lookup(self, name):
+        return None
+
+empty_scope = EmptyScope()
+
+def interpret_compiletime_options(optlist, optdict, type_env=None, type_args=()):
+    """
+    Tries to interpret a list of compile time option nodes.
+    The result will be a tuple (optlist, optdict) but where
+    all expression nodes have been interpreted. The result is
+    in the form of tuples (value, pos).
+
+    optlist is a list of nodes, while optdict is a DictNode (the
+    result optdict is a dict)
+
+    If type_env is set, all type nodes will be analysed and the resulting
+    type set. Otherwise only interpretateable ExprNodes
+    are allowed, other nodes raises errors.
+
+    A CompileError will be raised if there are problems.
+    """
+
+    def interpret(node, ix):
+        if ix in type_args:
+            if type_env:
+                type = node.analyse_as_type(type_env)
+                if not type:
+                    raise CompileError(node.pos, "Invalid type.")
+                return (type, node.pos)
+            else:
+                raise CompileError(node.pos, "Type not allowed here.")
+        else:
+            if (sys.version_info[0] >=3 and
+                isinstance(node, StringNode) and
+                node.unicode_value is not None):
+                return (node.unicode_value, node.pos)
+            return (node.compile_time_value(empty_scope), node.pos)
+
+    if optlist:
+        optlist = [interpret(x, ix) for ix, x in enumerate(optlist)]
+    if optdict:
+        assert isinstance(optdict, DictNode)
+        new_optdict = {}
+        for item in optdict.key_value_pairs:
+            new_key, dummy = interpret(item.key, None)
+            new_optdict[new_key] = interpret(item.value, item.key.value)
+        optdict = new_optdict
+    return (optlist, new_optdict)
diff --git a/contrib/tools/cython/Cython/Compiler/Lexicon.py b/contrib/tools/cython/Cython/Compiler/Lexicon.py
index 3b79d869c5..72c9ceaefd 100644
--- a/contrib/tools/cython/Cython/Compiler/Lexicon.py
+++ b/contrib/tools/cython/Cython/Compiler/Lexicon.py
@@ -1,138 +1,138 @@
-# cython: language_level=3, py2_import=True 
-# 
-#   Cython Scanner - Lexical Definitions 
-# 
- 
+# cython: language_level=3, py2_import=True
+#
+#   Cython Scanner - Lexical Definitions
+#
+
 from __future__ import absolute_import, unicode_literals
- 
-raw_prefixes = "rR" 
-bytes_prefixes = "bB" 
+
+raw_prefixes = "rR"
+bytes_prefixes = "bB"
 string_prefixes = "fFuU" + bytes_prefixes
-char_prefixes = "cC" 
-any_string_prefix = raw_prefixes + string_prefixes + char_prefixes 
-IDENT = 'IDENT' 
- 
- 
-def make_lexicon(): 
-    from ..Plex import \ 
-        Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \ 
-        TEXT, IGNORE, State, Lexicon 
-    from .Scanning import Method 
- 
-    letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_") 
-    digit = Any("0123456789") 
-    bindigit = Any("01") 
-    octdigit = Any("01234567") 
-    hexdigit = Any("0123456789ABCDEFabcdef") 
-    indentation = Bol + Rep(Any(" \t")) 
- 
+char_prefixes = "cC"
+any_string_prefix = raw_prefixes + string_prefixes + char_prefixes
+IDENT = 'IDENT'
+
+
+def make_lexicon():
+    from ..Plex import \
+        Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
+        TEXT, IGNORE, State, Lexicon
+    from .Scanning import Method
+
+    letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
+    digit = Any("0123456789")
+    bindigit = Any("01")
+    octdigit = Any("01234567")
+    hexdigit = Any("0123456789ABCDEFabcdef")
+    indentation = Bol + Rep(Any(" \t"))
+
     def underscore_digits(d):
         return Rep1(d) + Rep(Str("_") + Rep1(d))
 
     decimal = underscore_digits(digit)
-    dot = Str(".") 
-    exponent = Any("Ee") + Opt(Any("+-")) + decimal 
-    decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) 
- 
-    name = letter + Rep(letter | digit) 
+    dot = Str(".")
+    exponent = Any("Ee") + Opt(Any("+-")) + decimal
+    decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
+
+    name = letter + Rep(letter | digit)
     intconst = decimal | (Str("0") + ((Any("Xx") + underscore_digits(hexdigit)) |
                                       (Any("Oo") + underscore_digits(octdigit)) |
                                       (Any("Bb") + underscore_digits(bindigit)) ))
-    intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) 
-    intliteral = intconst + intsuffix 
-    fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) 
-    imagconst = (intconst | fltconst) + Any("jJ") 
- 
+    intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
+    intliteral = intconst + intsuffix
+    fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
+    imagconst = (intconst | fltconst) + Any("jJ")
+
     # invalid combinations of prefixes are caught in p_string_literal
     beginstring = Opt(Rep(Any(string_prefixes + raw_prefixes)) |
-                      Any(char_prefixes) 
-                      ) + (Str("'") | Str('"') | Str("'''") | Str('"""')) 
-    two_oct = octdigit + octdigit 
-    three_oct = octdigit + octdigit + octdigit 
-    two_hex = hexdigit + hexdigit 
-    four_hex = two_hex + two_hex 
-    escapeseq = Str("\\") + (two_oct | three_oct | 
-                             Str('N{') + Rep(AnyBut('}')) + Str('}') | 
-                             Str('u') + four_hex | Str('x') + two_hex | 
-                             Str('U') + four_hex + four_hex | AnyChar) 
- 
-    bra = Any("([{") 
-    ket = Any(")]}") 
-    punct = Any(":,;+-*/|&<>=.%`~^?!@") 
-    diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//", 
-                    "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=", 
-                    "<<=", ">>=", "**=", "//=", "->", "@=") 
-    spaces = Rep1(Any(" \t\f")) 
-    escaped_newline = Str("\\\n") 
-    lineterm = Eol + Opt(Str("\n")) 
- 
-    comment = Str("#") + Rep(AnyBut("\n")) 
- 
-    return Lexicon([ 
-        (name, IDENT), 
+                      Any(char_prefixes)
+                      ) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
+    two_oct = octdigit + octdigit
+    three_oct = octdigit + octdigit + octdigit
+    two_hex = hexdigit + hexdigit
+    four_hex = two_hex + two_hex
+    escapeseq = Str("\\") + (two_oct | three_oct |
+                             Str('N{') + Rep(AnyBut('}')) + Str('}') |
+                             Str('u') + four_hex | Str('x') + two_hex |
+                             Str('U') + four_hex + four_hex | AnyChar)
+
+    bra = Any("([{")
+    ket = Any(")]}")
+    punct = Any(":,;+-*/|&<>=.%`~^?!@")
+    diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//",
+                    "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=",
+                    "<<=", ">>=", "**=", "//=", "->", "@=")
+    spaces = Rep1(Any(" \t\f"))
+    escaped_newline = Str("\\\n")
+    lineterm = Eol + Opt(Str("\n"))
+
+    comment = Str("#") + Rep(AnyBut("\n"))
+
+    return Lexicon([
+        (name, IDENT),
         (intliteral, Method('strip_underscores', symbol='INT')),
         (fltconst, Method('strip_underscores', symbol='FLOAT')),
         (imagconst, Method('strip_underscores', symbol='IMAG')),
-        (punct | diphthong, TEXT), 
- 
-        (bra, Method('open_bracket_action')), 
-        (ket, Method('close_bracket_action')), 
-        (lineterm, Method('newline_action')), 
- 
-        (beginstring, Method('begin_string_action')), 
- 
-        (comment, IGNORE), 
-        (spaces, IGNORE), 
-        (escaped_newline, IGNORE), 
- 
-        State('INDENT', [ 
-            (comment + lineterm, Method('commentline')), 
-            (Opt(spaces) + Opt(comment) + lineterm, IGNORE), 
-            (indentation, Method('indentation_action')), 
-            (Eof, Method('eof_action')) 
-        ]), 
- 
-        State('SQ_STRING', [ 
-            (escapeseq, 'ESCAPE'), 
-            (Rep1(AnyBut("'\"\n\\")), 'CHARS'), 
-            (Str('"'), 'CHARS'), 
-            (Str("\n"), Method('unclosed_string_action')), 
-            (Str("'"), Method('end_string_action')), 
-            (Eof, 'EOF') 
-        ]), 
- 
-        State('DQ_STRING', [ 
-            (escapeseq, 'ESCAPE'), 
-            (Rep1(AnyBut('"\n\\')), 'CHARS'), 
-            (Str("'"), 'CHARS'), 
-            (Str("\n"), Method('unclosed_string_action')), 
-            (Str('"'), Method('end_string_action')), 
-            (Eof, 'EOF') 
-        ]), 
- 
-        State('TSQ_STRING', [ 
-            (escapeseq, 'ESCAPE'), 
-            (Rep1(AnyBut("'\"\n\\")), 'CHARS'), 
-            (Any("'\""), 'CHARS'), 
-            (Str("\n"), 'NEWLINE'), 
-            (Str("'''"), Method('end_string_action')), 
-            (Eof, 'EOF') 
-        ]), 
- 
-        State('TDQ_STRING', [ 
-            (escapeseq, 'ESCAPE'), 
-            (Rep1(AnyBut('"\'\n\\')), 'CHARS'), 
-            (Any("'\""), 'CHARS'), 
-            (Str("\n"), 'NEWLINE'), 
-            (Str('"""'), Method('end_string_action')), 
-            (Eof, 'EOF') 
-        ]), 
- 
-        (Eof, Method('eof_action')) 
-        ], 
- 
-        # FIXME: Plex 1.9 needs different args here from Plex 1.1.4 
-        #debug_flags = scanner_debug_flags, 
-        #debug_file = scanner_dump_file 
-        ) 
- 
+        (punct | diphthong, TEXT),
+
+        (bra, Method('open_bracket_action')),
+        (ket, Method('close_bracket_action')),
+        (lineterm, Method('newline_action')),
+
+        (beginstring, Method('begin_string_action')),
+
+        (comment, IGNORE),
+        (spaces, IGNORE),
+        (escaped_newline, IGNORE),
+
+        State('INDENT', [
+            (comment + lineterm, Method('commentline')),
+            (Opt(spaces) + Opt(comment) + lineterm, IGNORE),
+            (indentation, Method('indentation_action')),
+            (Eof, Method('eof_action'))
+        ]),
+
+        State('SQ_STRING', [
+            (escapeseq, 'ESCAPE'),
+            (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
+            (Str('"'), 'CHARS'),
+            (Str("\n"), Method('unclosed_string_action')),
+            (Str("'"), Method('end_string_action')),
+            (Eof, 'EOF')
+        ]),
+
+        State('DQ_STRING', [
+            (escapeseq, 'ESCAPE'),
+            (Rep1(AnyBut('"\n\\')), 'CHARS'),
+            (Str("'"), 'CHARS'),
+            (Str("\n"), Method('unclosed_string_action')),
+            (Str('"'), Method('end_string_action')),
+            (Eof, 'EOF')
+        ]),
+
+        State('TSQ_STRING', [
+            (escapeseq, 'ESCAPE'),
+            (Rep1(AnyBut("'\"\n\\")), 'CHARS'),
+            (Any("'\""), 'CHARS'),
+            (Str("\n"), 'NEWLINE'),
+            (Str("'''"), Method('end_string_action')),
+            (Eof, 'EOF')
+        ]),
+
+        State('TDQ_STRING', [
+            (escapeseq, 'ESCAPE'),
+            (Rep1(AnyBut('"\'\n\\')), 'CHARS'),
+            (Any("'\""), 'CHARS'),
+            (Str("\n"), 'NEWLINE'),
+            (Str('"""'), Method('end_string_action')),
+            (Eof, 'EOF')
+        ]),
+
+        (Eof, Method('eof_action'))
+        ],
+
+        # FIXME: Plex 1.9 needs different args here from Plex 1.1.4
+        #debug_flags = scanner_debug_flags,
+        #debug_file = scanner_dump_file
+        )
+
diff --git a/contrib/tools/cython/Cython/Compiler/Main.py b/contrib/tools/cython/Cython/Compiler/Main.py
index b7aac5ff5e..af873843b5 100644
--- a/contrib/tools/cython/Cython/Compiler/Main.py
+++ b/contrib/tools/cython/Cython/Compiler/Main.py
@@ -1,101 +1,101 @@
-# 
-#   Cython Top Level 
-# 
- 
-from __future__ import absolute_import 
- 
-import os 
-import re 
-import sys 
+#
+#   Cython Top Level
+#
+
+from __future__ import absolute_import
+
+import os
+import re
+import sys
 import io
- 
+
 if sys.version_info[:2] < (2, 6) or (3, 0) <= sys.version_info[:2] < (3, 3):
     sys.stderr.write("Sorry, Cython requires Python 2.6+ or 3.3+, found %d.%d\n" % tuple(sys.version_info[:2]))
-    sys.exit(1) 
- 
+    sys.exit(1)
+
 try:
     from __builtin__ import basestring
 except ImportError:
     basestring = str
 
-# Do not import Parsing here, import it when needed, because Parsing imports 
-# Nodes, which globally needs debug command line options initialized to set a 
-# conditional metaclass. These options are processed by CmdLine called from 
-# main() in this file. 
-# import Parsing 
+# Do not import Parsing here, import it when needed, because Parsing imports
+# Nodes, which globally needs debug command line options initialized to set a
+# conditional metaclass. These options are processed by CmdLine called from
+# main() in this file.
+# import Parsing
 from . import Errors
 from .StringEncoding import EncodedString
-from .Scanning import PyrexScanner, FileSourceDescriptor 
-from .Errors import PyrexError, CompileError, error, warning 
-from .Symtab import ModuleScope 
-from .. import Utils 
-from . import Options 
- 
+from .Scanning import PyrexScanner, FileSourceDescriptor
+from .Errors import PyrexError, CompileError, error, warning
+from .Symtab import ModuleScope
+from .. import Utils
+from . import Options
+
 from . import Version  # legacy import needed by old PyTables versions
 version = Version.version  # legacy attribute - use "Cython.__version__" instead
 
-module_name_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$") 
- 
-verbose = 0 
- 
+module_name_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$")
+
+verbose = 0
+
 standard_include_path = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                         os.path.pardir, 'Includes'))
 
-class CompilationData(object): 
-    #  Bundles the information that is passed from transform to transform. 
-    #  (For now, this is only) 
- 
-    #  While Context contains every pxd ever loaded, path information etc., 
-    #  this only contains the data related to a single compilation pass 
-    # 
-    #  pyx                   ModuleNode              Main code tree of this compilation. 
-    #  pxds                  {string : ModuleNode}   Trees for the pxds used in the pyx. 
-    #  codewriter            CCodeWriter             Where to output final code. 
-    #  options               CompilationOptions 
-    #  result                CompilationResult 
-    pass 
- 
-
-class Context(object): 
-    #  This class encapsulates the context needed for compiling 
-    #  one or more Cython implementation files along with their 
-    #  associated and imported declaration files. It includes 
-    #  the root of the module import namespace and the list 
-    #  of directories to search for include files. 
-    # 
-    #  modules               {string : ModuleScope} 
-    #  include_directories   [string] 
-    #  future_directives     [object] 
-    #  language_level        int     currently 2 or 3 for Python 2/3 
- 
-    cython_scope = None 
+class CompilationData(object):
+    #  Bundles the information that is passed from transform to transform.
+    #  (For now, this is only)
+
+    #  While Context contains every pxd ever loaded, path information etc.,
+    #  this only contains the data related to a single compilation pass
+    #
+    #  pyx                   ModuleNode              Main code tree of this compilation.
+    #  pxds                  {string : ModuleNode}   Trees for the pxds used in the pyx.
+    #  codewriter            CCodeWriter             Where to output final code.
+    #  options               CompilationOptions
+    #  result                CompilationResult
+    pass
+
+
+class Context(object):
+    #  This class encapsulates the context needed for compiling
+    #  one or more Cython implementation files along with their
+    #  associated and imported declaration files. It includes
+    #  the root of the module import namespace and the list
+    #  of directories to search for include files.
+    #
+    #  modules               {string : ModuleScope}
+    #  include_directories   [string]
+    #  future_directives     [object]
+    #  language_level        int     currently 2 or 3 for Python 2/3
+
+    cython_scope = None
     language_level = None  # warn when not set but default to Py2
- 
-    def __init__(self, include_directories, compiler_directives, cpp=False, 
+
+    def __init__(self, include_directories, compiler_directives, cpp=False,
                  language_level=None, options=None):
-        # cython_scope is a hack, set to False by subclasses, in order to break 
-        # an infinite loop. 
-        # Better code organization would fix it. 
- 
-        from . import Builtin, CythonScope 
-        self.modules = {"__builtin__" : Builtin.builtin_scope} 
-        self.cython_scope = CythonScope.create_cython_scope(self) 
-        self.modules["cython"] = self.cython_scope 
-        self.include_directories = include_directories 
-        self.future_directives = set() 
-        self.compiler_directives = compiler_directives 
-        self.cpp = cpp 
-        self.options = options 
- 
+        # cython_scope is a hack, set to False by subclasses, in order to break
+        # an infinite loop.
+        # Better code organization would fix it.
+
+        from . import Builtin, CythonScope
+        self.modules = {"__builtin__" : Builtin.builtin_scope}
+        self.cython_scope = CythonScope.create_cython_scope(self)
+        self.modules["cython"] = self.cython_scope
+        self.include_directories = include_directories
+        self.future_directives = set()
+        self.compiler_directives = compiler_directives
+        self.cpp = cpp
+        self.options = options
+
         self.pxds = {}  # full name -> node tree
         self._interned = {}  # (type(value), value, *key_args) -> interned_value
- 
+
         if language_level is not None:
             self.set_language_level(language_level)
- 
-        self.gdb_debug_outputwriter = None 
- 
-    def set_language_level(self, level): 
+
+        self.gdb_debug_outputwriter = None
+
+    def set_language_level(self, level):
         from .Future import print_function, unicode_literals, absolute_import, division
         future_directives = set()
         if level == '3str':
@@ -106,11 +106,11 @@ class Context(object):
                 future_directives.add(unicode_literals)
         if level >= 3:
             future_directives.update([print_function, absolute_import, division])
-        self.language_level = level 
+        self.language_level = level
         self.future_directives = future_directives
-        if level >= 3: 
-            self.modules['builtins'] = self.modules['__builtin__'] 
- 
+        if level >= 3:
+            self.modules['builtins'] = self.modules['__builtin__']
+
     def intern_ustring(self, value, encoding=None):
         key = (EncodedString, value, encoding)
         try:
@@ -132,40 +132,40 @@ class Context(object):
         self._interned[key] = value
         return value
 
-    # pipeline creation functions can now be found in Pipeline.py 
- 
-    def process_pxd(self, source_desc, scope, module_name): 
-        from . import Pipeline 
-        if isinstance(source_desc, FileSourceDescriptor) and source_desc._file_type == 'pyx': 
-            source = CompilationSource(source_desc, module_name, os.getcwd()) 
-            result_sink = create_default_resultobj(source, self.options) 
-            pipeline = Pipeline.create_pyx_as_pxd_pipeline(self, result_sink) 
-            result = Pipeline.run_pipeline(pipeline, source) 
-        else: 
-            pipeline = Pipeline.create_pxd_pipeline(self, scope, module_name) 
-            result = Pipeline.run_pipeline(pipeline, source_desc) 
-        return result 
- 
-    def nonfatal_error(self, exc): 
-        return Errors.report_error(exc) 
- 
+    # pipeline creation functions can now be found in Pipeline.py
+
+    def process_pxd(self, source_desc, scope, module_name):
+        from . import Pipeline
+        if isinstance(source_desc, FileSourceDescriptor) and source_desc._file_type == 'pyx':
+            source = CompilationSource(source_desc, module_name, os.getcwd())
+            result_sink = create_default_resultobj(source, self.options)
+            pipeline = Pipeline.create_pyx_as_pxd_pipeline(self, result_sink)
+            result = Pipeline.run_pipeline(pipeline, source)
+        else:
+            pipeline = Pipeline.create_pxd_pipeline(self, scope, module_name)
+            result = Pipeline.run_pipeline(pipeline, source_desc)
+        return result
+
+    def nonfatal_error(self, exc):
+        return Errors.report_error(exc)
+
     def find_module(self, module_name, relative_to=None, pos=None, need_pxd=1,
                     absolute_fallback=True):
-        # Finds and returns the module scope corresponding to 
-        # the given relative or absolute module name. If this 
-        # is the first time the module has been requested, finds 
-        # the corresponding .pxd file and process it. 
-        # If relative_to is not None, it must be a module scope, 
-        # and the module will first be searched for relative to 
-        # that module, provided its name is not a dotted name. 
-        debug_find_module = 0 
-        if debug_find_module: 
-            print("Context.find_module: module_name = %s, relative_to = %s, pos = %s, need_pxd = %s" % ( 
+        # Finds and returns the module scope corresponding to
+        # the given relative or absolute module name. If this
+        # is the first time the module has been requested, finds
+        # the corresponding .pxd file and process it.
+        # If relative_to is not None, it must be a module scope,
+        # and the module will first be searched for relative to
+        # that module, provided its name is not a dotted name.
+        debug_find_module = 0
+        if debug_find_module:
+            print("Context.find_module: module_name = %s, relative_to = %s, pos = %s, need_pxd = %s" % (
                 module_name, relative_to, pos, need_pxd))
- 
-        scope = None 
-        pxd_pathname = None 
-        if relative_to: 
+
+        scope = None
+        pxd_pathname = None
+        if relative_to:
             if module_name:
                 # from .module import ...
                 qualified_name = relative_to.qualify_name(module_name)
@@ -182,111 +182,111 @@ class Context(object):
                                "'%s' is not a valid module name" % module_name)
 
         if relative_to:
-            if debug_find_module: 
-                print("...trying relative import") 
-            scope = relative_to.lookup_submodule(module_name) 
-            if not scope: 
-                pxd_pathname = self.find_pxd_file(qualified_name, pos) 
-                if pxd_pathname: 
-                    scope = relative_to.find_submodule(module_name) 
-        if not scope: 
-            if debug_find_module: 
-                print("...trying absolute import") 
+            if debug_find_module:
+                print("...trying relative import")
+            scope = relative_to.lookup_submodule(module_name)
+            if not scope:
+                pxd_pathname = self.find_pxd_file(qualified_name, pos)
+                if pxd_pathname:
+                    scope = relative_to.find_submodule(module_name)
+        if not scope:
+            if debug_find_module:
+                print("...trying absolute import")
             if absolute_fallback:
                 qualified_name = module_name
-            scope = self 
+            scope = self
             for name in qualified_name.split("."):
-                scope = scope.find_submodule(name) 
-
-        if debug_find_module: 
-            print("...scope = %s" % scope) 
-        if not scope.pxd_file_loaded: 
-            if debug_find_module: 
-                print("...pxd not loaded") 
-            if not pxd_pathname: 
-                if debug_find_module: 
-                    print("...looking for pxd file") 
+                scope = scope.find_submodule(name)
+
+        if debug_find_module:
+            print("...scope = %s" % scope)
+        if not scope.pxd_file_loaded:
+            if debug_find_module:
+                print("...pxd not loaded")
+            if not pxd_pathname:
+                if debug_find_module:
+                    print("...looking for pxd file")
                 pxd_pathname = self.find_pxd_file(qualified_name, pos)
-                if debug_find_module: 
-                    print("......found %s" % pxd_pathname) 
-                if not pxd_pathname and need_pxd: 
+                if debug_find_module:
+                    print("......found %s" % pxd_pathname)
+                if not pxd_pathname and need_pxd:
                     # Set pxd_file_loaded such that we don't need to
                     # look for the non-existing pxd file next time.
                     scope.pxd_file_loaded = True
                     package_pathname = self.search_include_directories(qualified_name, ".py", pos)
-                    if package_pathname and package_pathname.endswith('__init__.py'): 
-                        pass 
-                    else: 
+                    if package_pathname and package_pathname.endswith('__init__.py'):
+                        pass
+                    else:
                         error(pos, "'%s.pxd' not found" % qualified_name.replace('.', os.sep))
-            if pxd_pathname: 
+            if pxd_pathname:
                 scope.pxd_file_loaded = True
-                try: 
-                    if debug_find_module: 
-                        print("Context.find_module: Parsing %s" % pxd_pathname) 
-                    rel_path = module_name.replace('.', os.sep) + os.path.splitext(pxd_pathname)[1] 
-                    if not pxd_pathname.endswith(rel_path): 
-                        rel_path = pxd_pathname  # safety measure to prevent printing incorrect paths 
+                try:
+                    if debug_find_module:
+                        print("Context.find_module: Parsing %s" % pxd_pathname)
+                    rel_path = module_name.replace('.', os.sep) + os.path.splitext(pxd_pathname)[1]
+                    if not pxd_pathname.endswith(rel_path):
+                        rel_path = pxd_pathname  # safety measure to prevent printing incorrect paths
                     if Options.source_root:
                         rel_path = os.path.relpath(pxd_pathname, Options.source_root)
-                    source_desc = FileSourceDescriptor(pxd_pathname, rel_path) 
+                    source_desc = FileSourceDescriptor(pxd_pathname, rel_path)
                     err, result = self.process_pxd(source_desc, scope, qualified_name)
-                    if err: 
-                        raise err 
-                    (pxd_codenodes, pxd_scope) = result 
-                    self.pxds[module_name] = (pxd_codenodes, pxd_scope) 
-                except CompileError: 
-                    pass 
-        return scope 
- 
+                    if err:
+                        raise err
+                    (pxd_codenodes, pxd_scope) = result
+                    self.pxds[module_name] = (pxd_codenodes, pxd_scope)
+                except CompileError:
+                    pass
+        return scope
+
     def find_pxd_file(self, qualified_name, pos, sys_path=False):
         # Search include path (and sys.path if sys_path is True) for
         # the .pxd file corresponding to the given fully-qualified
         # module name.
-        # Will find either a dotted filename or a file in a 
-        # package directory. If a source file position is given, 
-        # the directory containing the source file is searched first 
-        # for a dotted filename, and its containing package root 
-        # directory is searched first for a non-dotted filename. 
+        # Will find either a dotted filename or a file in a
+        # package directory. If a source file position is given,
+        # the directory containing the source file is searched first
+        # for a dotted filename, and its containing package root
+        # directory is searched first for a non-dotted filename.
         pxd = self.search_include_directories(qualified_name, ".pxd", pos, sys_path=sys_path)
-        if pxd is None: # XXX Keep this until Includes/Deprecated is removed 
-            if (qualified_name.startswith('python') or 
+        if pxd is None: # XXX Keep this until Includes/Deprecated is removed
+            if (qualified_name.startswith('python') or
                     qualified_name in ('stdlib', 'stdio', 'stl')):
-                standard_include_path = os.path.abspath(os.path.normpath( 
-                        os.path.join(os.path.dirname(__file__), os.path.pardir, 'Includes'))) 
-                deprecated_include_path = os.path.join(standard_include_path, 'Deprecated') 
-                self.include_directories.append(deprecated_include_path) 
-                try: 
-                    pxd = self.search_include_directories(qualified_name, ".pxd", pos) 
-                finally: 
-                    self.include_directories.pop() 
-                if pxd: 
-                    name = qualified_name 
-                    if name.startswith('python'): 
-                        warning(pos, "'%s' is deprecated, use 'cpython'" % name, 1) 
-                    elif name in ('stdlib', 'stdio'): 
-                        warning(pos, "'%s' is deprecated, use 'libc.%s'" % (name, name), 1) 
-                    elif name in ('stl'): 
-                        warning(pos, "'%s' is deprecated, use 'libcpp.*.*'" % name, 1) 
-        if pxd is None and Options.cimport_from_pyx: 
-            return self.find_pyx_file(qualified_name, pos) 
-        return pxd 
- 
-    def find_pyx_file(self, qualified_name, pos): 
-        # Search include path for the .pyx file corresponding to the 
-        # given fully-qualified module name, as for find_pxd_file(). 
-        return self.search_include_directories(qualified_name, ".pyx", pos) 
- 
-    def find_include_file(self, filename, pos): 
-        # Search list of include directories for filename. 
-        # Reports an error and returns None if not found. 
-        path = self.search_include_directories(filename, "", pos, 
-                                               include=True) 
-        if not path: 
-            error(pos, "'%s' not found" % filename) 
-        return path 
- 
-    def search_include_directories(self, qualified_name, suffix, pos, 
-                                   include=False, sys_path=False): 
+                standard_include_path = os.path.abspath(os.path.normpath(
+                        os.path.join(os.path.dirname(__file__), os.path.pardir, 'Includes')))
+                deprecated_include_path = os.path.join(standard_include_path, 'Deprecated')
+                self.include_directories.append(deprecated_include_path)
+                try:
+                    pxd = self.search_include_directories(qualified_name, ".pxd", pos)
+                finally:
+                    self.include_directories.pop()
+                if pxd:
+                    name = qualified_name
+                    if name.startswith('python'):
+                        warning(pos, "'%s' is deprecated, use 'cpython'" % name, 1)
+                    elif name in ('stdlib', 'stdio'):
+                        warning(pos, "'%s' is deprecated, use 'libc.%s'" % (name, name), 1)
+                    elif name in ('stl'):
+                        warning(pos, "'%s' is deprecated, use 'libcpp.*.*'" % name, 1)
+        if pxd is None and Options.cimport_from_pyx:
+            return self.find_pyx_file(qualified_name, pos)
+        return pxd
+
+    def find_pyx_file(self, qualified_name, pos):
+        # Search include path for the .pyx file corresponding to the
+        # given fully-qualified module name, as for find_pxd_file().
+        return self.search_include_directories(qualified_name, ".pyx", pos)
+
+    def find_include_file(self, filename, pos):
+        # Search list of include directories for filename.
+        # Reports an error and returns None if not found.
+        path = self.search_include_directories(filename, "", pos,
+                                               include=True)
+        if not path:
+            error(pos, "'%s' not found" % filename)
+        return path
+
+    def search_include_directories(self, qualified_name, suffix, pos,
+                                   include=False, sys_path=False):
         include_dirs = self.include_directories
         if sys_path:
             include_dirs = include_dirs + sys.path
@@ -294,79 +294,79 @@ class Context(object):
         include_dirs = tuple(include_dirs + [standard_include_path])
         return search_include_directories(include_dirs, qualified_name,
                                           suffix, pos, include)
- 
-    def find_root_package_dir(self, file_path): 
-        return Utils.find_root_package_dir(file_path) 
- 
-    def check_package_dir(self, dir, package_names): 
-        return Utils.check_package_dir(dir, tuple(package_names)) 
- 
+
+    def find_root_package_dir(self, file_path):
+        return Utils.find_root_package_dir(file_path)
+
+    def check_package_dir(self, dir, package_names):
+        return Utils.check_package_dir(dir, tuple(package_names))
+
     def c_file_out_of_date(self, source_path, output_path):
         if not os.path.exists(output_path):
-            return 1 
+            return 1
         c_time = Utils.modification_time(output_path)
-        if Utils.file_newer_than(source_path, c_time): 
-            return 1 
-        pos = [source_path] 
-        pxd_path = Utils.replace_suffix(source_path, ".pxd") 
-        if os.path.exists(pxd_path) and Utils.file_newer_than(pxd_path, c_time): 
-            return 1 
-        for kind, name in self.read_dependency_file(source_path): 
-            if kind == "cimport": 
-                dep_path = self.find_pxd_file(name, pos) 
-            elif kind == "include": 
-                dep_path = self.search_include_directories(name, pos) 
-            else: 
-                continue 
-            if dep_path and Utils.file_newer_than(dep_path, c_time): 
-                return 1 
-        return 0 
- 
-    def find_cimported_module_names(self, source_path): 
-        return [ name for kind, name in self.read_dependency_file(source_path) 
-                 if kind == "cimport" ] 
- 
-    def is_package_dir(self, dir_path): 
-        return Utils.is_package_dir(dir_path) 
- 
-    def read_dependency_file(self, source_path): 
-        dep_path = Utils.replace_suffix(source_path, ".dep") 
-        if os.path.exists(dep_path): 
-            f = open(dep_path, "rU") 
-            chunks = [ line.strip().split(" ", 1) 
-                       for line in f.readlines() 
-                       if " " in line.strip() ] 
-            f.close() 
-            return chunks 
-        else: 
-            return () 
- 
-    def lookup_submodule(self, name): 
-        # Look up a top-level module. Returns None if not found. 
-        return self.modules.get(name, None) 
- 
-    def find_submodule(self, name): 
-        # Find a top-level module, creating a new one if needed. 
-        scope = self.lookup_submodule(name) 
-        if not scope: 
-            scope = ModuleScope(name, 
-                parent_module = None, context = self) 
-            self.modules[name] = scope 
-        return scope 
- 
-    def parse(self, source_desc, scope, pxd, full_module_name): 
-        if not isinstance(source_desc, FileSourceDescriptor): 
-            raise RuntimeError("Only file sources for code supported") 
-        source_filename = source_desc.filename 
-        scope.cpp = self.cpp 
-        # Parse the given source file and return a parse tree. 
-        num_errors = Errors.num_errors 
-        try: 
+        if Utils.file_newer_than(source_path, c_time):
+            return 1
+        pos = [source_path]
+        pxd_path = Utils.replace_suffix(source_path, ".pxd")
+        if os.path.exists(pxd_path) and Utils.file_newer_than(pxd_path, c_time):
+            return 1
+        for kind, name in self.read_dependency_file(source_path):
+            if kind == "cimport":
+                dep_path = self.find_pxd_file(name, pos)
+            elif kind == "include":
+                dep_path = self.search_include_directories(name, pos)
+            else:
+                continue
+            if dep_path and Utils.file_newer_than(dep_path, c_time):
+                return 1
+        return 0
+
+    def find_cimported_module_names(self, source_path):
+        return [ name for kind, name in self.read_dependency_file(source_path)
+                 if kind == "cimport" ]
+
+    def is_package_dir(self, dir_path):
+        return Utils.is_package_dir(dir_path)
+
+    def read_dependency_file(self, source_path):
+        dep_path = Utils.replace_suffix(source_path, ".dep")
+        if os.path.exists(dep_path):
+            f = open(dep_path, "rU")
+            chunks = [ line.strip().split(" ", 1)
+                       for line in f.readlines()
+                       if " " in line.strip() ]
+            f.close()
+            return chunks
+        else:
+            return ()
+
+    def lookup_submodule(self, name):
+        # Look up a top-level module. Returns None if not found.
+        return self.modules.get(name, None)
+
+    def find_submodule(self, name):
+        # Find a top-level module, creating a new one if needed.
+        scope = self.lookup_submodule(name)
+        if not scope:
+            scope = ModuleScope(name,
+                parent_module = None, context = self)
+            self.modules[name] = scope
+        return scope
+
+    def parse(self, source_desc, scope, pxd, full_module_name):
+        if not isinstance(source_desc, FileSourceDescriptor):
+            raise RuntimeError("Only file sources for code supported")
+        source_filename = source_desc.filename
+        scope.cpp = self.cpp
+        # Parse the given source file and return a parse tree.
+        num_errors = Errors.num_errors
+        try:
             with Utils.open_source_file(source_filename) as f:
-                from . import Parsing 
-                s = PyrexScanner(f, source_desc, source_encoding = f.encoding, 
-                                 scope = scope, context = self) 
-                tree = Parsing.p_module(s, pxd, full_module_name) 
+                from . import Parsing
+                s = PyrexScanner(f, source_desc, source_encoding = f.encoding,
+                                 scope = scope, context = self)
+                tree = Parsing.p_module(s, pxd, full_module_name)
                 if self.options.formal_grammar:
                     try:
                         from ..Parser import ConcreteSyntaxTree
@@ -375,14 +375,14 @@ class Context(object):
                             "Formal grammar can only be used with compiled Cython with an available pgen.")
                     ConcreteSyntaxTree.p_module(source_filename)
         except UnicodeDecodeError as e:
-            #import traceback 
-            #traceback.print_exc() 
+            #import traceback
+            #traceback.print_exc()
             raise self._report_decode_error(source_desc, e)
- 
+
         if Errors.num_errors > num_errors:
             raise CompileError()
         return tree
- 
+
     def _report_decode_error(self, source_desc, exc):
         msg = exc.args[-1]
         position = exc.args[2]
@@ -395,53 +395,53 @@ class Context(object):
                 idx += len(data)
                 if idx >= position:
                     column = position - (idx - len(data)) + 1
-                    break 
- 
+                    break
+
         return error((source_desc, line, column),
                      "Decoding error, missing or incorrect coding=<encoding-name> "
                      "at top of source (cannot decode with encoding %r: %s)" % (encoding, msg))
- 
-    def extract_module_name(self, path, options): 
-        # Find fully_qualified module name from the full pathname 
-        # of a source file. 
-        dir, filename = os.path.split(path) 
-        module_name, _ = os.path.splitext(filename) 
-        if "." in module_name: 
-            return module_name 
-        names = [module_name] 
-        while self.is_package_dir(dir): 
-            parent, package_name = os.path.split(dir) 
-            if parent == dir: 
-                break 
-            names.append(package_name) 
-            dir = parent 
-        names.reverse() 
-        return ".".join(names) 
- 
-    def setup_errors(self, options, result): 
+
+    def extract_module_name(self, path, options):
+        # Find fully_qualified module name from the full pathname
+        # of a source file.
+        dir, filename = os.path.split(path)
+        module_name, _ = os.path.splitext(filename)
+        if "." in module_name:
+            return module_name
+        names = [module_name]
+        while self.is_package_dir(dir):
+            parent, package_name = os.path.split(dir)
+            if parent == dir:
+                break
+            names.append(package_name)
+            dir = parent
+        names.reverse()
+        return ".".join(names)
+
+    def setup_errors(self, options, result):
         Errors.reset()  # clear any remaining error state
-        if options.use_listing_file: 
+        if options.use_listing_file:
             path = result.listing_file = Utils.replace_suffix(result.main_source_file, ".lis")
-        else: 
-            path = None 
-        Errors.open_listing_file(path=path, 
-                                 echo_to_stderr=options.errors_to_stderr) 
- 
-    def teardown_errors(self, err, options, result): 
-        source_desc = result.compilation_source.source_desc 
-        if not isinstance(source_desc, FileSourceDescriptor): 
-            raise RuntimeError("Only file sources for code supported") 
-        Errors.close_listing_file() 
-        result.num_errors = Errors.num_errors 
-        if result.num_errors > 0: 
-            err = True 
-        if err and result.c_file: 
-            try: 
-                Utils.castrate_file(result.c_file, os.stat(source_desc.filename)) 
-            except EnvironmentError: 
-                pass 
-            result.c_file = None 
- 
+        else:
+            path = None
+        Errors.open_listing_file(path=path,
+                                 echo_to_stderr=options.errors_to_stderr)
+
+    def teardown_errors(self, err, options, result):
+        source_desc = result.compilation_source.source_desc
+        if not isinstance(source_desc, FileSourceDescriptor):
+            raise RuntimeError("Only file sources for code supported")
+        Errors.close_listing_file()
+        result.num_errors = Errors.num_errors
+        if result.num_errors > 0:
+            err = True
+        if err and result.c_file:
+            try:
+                Utils.castrate_file(result.c_file, os.stat(source_desc.filename))
+            except EnvironmentError:
+                pass
+            result.c_file = None
+
 
 def get_output_filename(source_filename, cwd, options):
     if options.cplus:
@@ -459,111 +459,111 @@ def get_output_filename(source_filename, cwd, options):
         return suggested_file_name
 
 
-def create_default_resultobj(compilation_source, options): 
-    result = CompilationResult() 
-    result.main_source_file = compilation_source.source_desc.filename 
-    result.compilation_source = compilation_source 
-    source_desc = compilation_source.source_desc 
+def create_default_resultobj(compilation_source, options):
+    result = CompilationResult()
+    result.main_source_file = compilation_source.source_desc.filename
+    result.compilation_source = compilation_source
+    source_desc = compilation_source.source_desc
     result.c_file = get_output_filename(source_desc.filename,
                         compilation_source.cwd, options)
     result.embedded_metadata = options.embedded_metadata
-    return result 
- 
-
-def run_pipeline(source, options, full_module_name=None, context=None): 
-    from . import Pipeline 
- 
-    source_ext = os.path.splitext(source)[1] 
-    options.configure_language_defaults(source_ext[1:]) # py/pyx 
-    if context is None: 
-        context = options.create_context() 
- 
-    # Set up source object 
-    cwd = os.getcwd() 
-    abs_path = os.path.abspath(source) 
+    return result
+
+
+def run_pipeline(source, options, full_module_name=None, context=None):
+    from . import Pipeline
+
+    source_ext = os.path.splitext(source)[1]
+    options.configure_language_defaults(source_ext[1:]) # py/pyx
+    if context is None:
+        context = options.create_context()
+
+    # Set up source object
+    cwd = os.getcwd()
+    abs_path = os.path.abspath(source)
     full_module_name = full_module_name or options.module_name or context.extract_module_name(source, options)
- 
+
     Utils.raise_error_if_module_name_forbidden(full_module_name)
 
-    if options.relative_path_in_code_position_comments: 
-        rel_path = full_module_name.replace('.', os.sep) + source_ext 
-        if not abs_path.endswith(rel_path): 
-            rel_path = source # safety measure to prevent printing incorrect paths 
-    else: 
-        rel_path = abs_path 
+    if options.relative_path_in_code_position_comments:
+        rel_path = full_module_name.replace('.', os.sep) + source_ext
+        if not abs_path.endswith(rel_path):
+            rel_path = source # safety measure to prevent printing incorrect paths
+    else:
+        rel_path = abs_path
     if Options.source_root:
         rel_path = os.path.relpath(abs_path, Options.source_root)
-    source_desc = FileSourceDescriptor(abs_path, rel_path) 
-    source = CompilationSource(source_desc, full_module_name, cwd) 
- 
-    # Set up result object 
-    result = create_default_resultobj(source, options) 
- 
-    if options.annotate is None: 
-        # By default, decide based on whether an html file already exists. 
-        html_filename = os.path.splitext(result.c_file)[0] + ".html" 
-        if os.path.exists(html_filename): 
+    source_desc = FileSourceDescriptor(abs_path, rel_path)
+    source = CompilationSource(source_desc, full_module_name, cwd)
+
+    # Set up result object
+    result = create_default_resultobj(source, options)
+
+    if options.annotate is None:
+        # By default, decide based on whether an html file already exists.
+        html_filename = os.path.splitext(result.c_file)[0] + ".html"
+        if os.path.exists(html_filename):
             with io.open(html_filename, "r", encoding="UTF-8") as html_file:
                 if u'<!-- Generated by Cython' in html_file.read(100):
                     options.annotate = True
- 
-    # Get pipeline 
-    if source_ext.lower() == '.py' or not source_ext: 
-        pipeline = Pipeline.create_py_pipeline(context, options, result) 
-    else: 
-        pipeline = Pipeline.create_pyx_pipeline(context, options, result) 
- 
-    context.setup_errors(options, result) 
-    err, enddata = Pipeline.run_pipeline(pipeline, source) 
-    context.teardown_errors(err, options, result) 
-    return result 
- 
- 
+
+    # Get pipeline
+    if source_ext.lower() == '.py' or not source_ext:
+        pipeline = Pipeline.create_py_pipeline(context, options, result)
+    else:
+        pipeline = Pipeline.create_pyx_pipeline(context, options, result)
+
+    context.setup_errors(options, result)
+    err, enddata = Pipeline.run_pipeline(pipeline, source)
+    context.teardown_errors(err, options, result)
+    return result
+
+
 # ------------------------------------------------------------------------
-# 
-#  Main Python entry points 
-# 
+#
+#  Main Python entry points
+#
 # ------------------------------------------------------------------------
- 
-class CompilationSource(object): 
-    """ 
+
+class CompilationSource(object):
+    """
     Contains the data necessary to start up a compilation pipeline for
-    a single compilation unit. 
-    """ 
-    def __init__(self, source_desc, full_module_name, cwd): 
-        self.source_desc = source_desc 
-        self.full_module_name = full_module_name 
-        self.cwd = cwd 
- 
-
-class CompilationOptions(object): 
+    a single compilation unit.
+    """
+    def __init__(self, source_desc, full_module_name, cwd):
+        self.source_desc = source_desc
+        self.full_module_name = full_module_name
+        self.cwd = cwd
+
+
+class CompilationOptions(object):
     r"""
     See default_options at the end of this module for a list of all possible
     options and CmdLine.usage and CmdLine.parse_command_line() for their
     meaning.
-    """ 
-    def __init__(self, defaults=None, **kw): 
-        self.include_path = [] 
-        if defaults: 
-            if isinstance(defaults, CompilationOptions): 
-                defaults = defaults.__dict__ 
-        else: 
-            defaults = default_options 
- 
-        options = dict(defaults) 
-        options.update(kw) 
- 
-        # let's assume 'default_options' contains a value for most known compiler options 
-        # and validate against them 
-        unknown_options = set(options) - set(default_options) 
-        # ignore valid options that are not in the defaults 
-        unknown_options.difference_update(['include_path']) 
-        if unknown_options: 
+    """
+    def __init__(self, defaults=None, **kw):
+        self.include_path = []
+        if defaults:
+            if isinstance(defaults, CompilationOptions):
+                defaults = defaults.__dict__
+        else:
+            defaults = default_options
+
+        options = dict(defaults)
+        options.update(kw)
+
+        # let's assume 'default_options' contains a value for most known compiler options
+        # and validate against them
+        unknown_options = set(options) - set(default_options)
+        # ignore valid options that are not in the defaults
+        unknown_options.difference_update(['include_path'])
+        if unknown_options:
             message = "got unknown compilation option%s, please remove: %s" % (
-                's' if len(unknown_options) > 1 else '', 
+                's' if len(unknown_options) > 1 else '',
                 ', '.join(unknown_options))
             raise ValueError(message)
- 
+
         directive_defaults = Options.get_directive_defaults()
         directives = dict(options['compiler_directives'])  # copy mutable field
         # check for invalid directives
@@ -573,12 +573,12 @@ class CompilationOptions(object):
                 's' if len(unknown_directives) > 1 else '',
                 ', '.join(unknown_directives))
             raise ValueError(message)
-        options['compiler_directives'] = directives 
+        options['compiler_directives'] = directives
         if directives.get('np_pythran', False) and not options['cplus']:
             import warnings
             warnings.warn("C++ mode forced when in Pythran mode!")
             options['cplus'] = True
-        if 'language_level' in directives and 'language_level' not in kw: 
+        if 'language_level' in directives and 'language_level' not in kw:
             options['language_level'] = directives['language_level']
         elif not options.get('language_level'):
             options['language_level'] = directive_defaults.get('language_level')
@@ -586,25 +586,25 @@ class CompilationOptions(object):
             options['formal_grammar'] = directives['formal_grammar']
         if options['cache'] is True:
             options['cache'] = os.path.join(Utils.get_cython_cache_dir(), 'compiler')
- 
-        self.__dict__.update(options) 
- 
-    def configure_language_defaults(self, source_extension): 
-        if source_extension == 'py': 
-            if self.compiler_directives.get('binding') is None: 
-                self.compiler_directives['binding'] = True 
- 
-    def create_context(self): 
-        return Context(self.include_path, self.compiler_directives, 
-                       self.cplus, self.language_level, options=self) 
- 
+
+        self.__dict__.update(options)
+
+    def configure_language_defaults(self, source_extension):
+        if source_extension == 'py':
+            if self.compiler_directives.get('binding') is None:
+                self.compiler_directives['binding'] = True
+
+    def create_context(self):
+        return Context(self.include_path, self.compiler_directives,
+                       self.cplus, self.language_level, options=self)
+
     def get_fingerprint(self):
         r"""
         Return a string that contains all the options that are relevant for cache invalidation.
         """
         # Collect only the data that can affect the generated file(s).
         data = {}
- 
+
         for key, value in self.__dict__.items():
             if key in ['show_version', 'errors_to_stderr', 'verbose', 'quiet']:
                 # verbosity flags have no influence on the compilation result
@@ -676,111 +676,111 @@ class CompilationOptions(object):
         return to_fingerprint(data)
 
 
-class CompilationResult(object): 
-    """ 
-    Results from the Cython compiler: 
- 
-    c_file           string or None   The generated C source file 
-    h_file           string or None   The generated C header file 
-    i_file           string or None   The generated .pxi file 
-    api_file         string or None   The generated C API .h file 
-    listing_file     string or None   File of error messages 
-    object_file      string or None   Result of compiling the C file 
-    extension_file   string or None   Result of linking the object file 
-    num_errors       integer          Number of compilation errors 
-    compilation_source CompilationSource 
-    """ 
- 
-    def __init__(self): 
-        self.c_file = None 
-        self.h_file = None 
-        self.i_file = None 
-        self.api_file = None 
-        self.listing_file = None 
-        self.object_file = None 
-        self.extension_file = None 
-        self.main_source_file = None 
- 
- 
-class CompilationResultSet(dict): 
-    """ 
-    Results from compiling multiple Pyrex source files. A mapping 
-    from source file paths to CompilationResult instances. Also 
-    has the following attributes: 
- 
-    num_errors   integer   Total number of compilation errors 
-    """ 
- 
-    num_errors = 0 
- 
-    def add(self, source, result): 
-        self[source] = result 
-        self.num_errors += result.num_errors 
- 
- 
-def compile_single(source, options, full_module_name = None): 
-    """ 
-    compile_single(source, options, full_module_name) 
- 
-    Compile the given Pyrex implementation file and return a CompilationResult. 
-    Always compiles a single file; does not perform timestamp checking or 
-    recursion. 
-    """ 
-    return run_pipeline(source, options, full_module_name) 
- 
- 
-def compile_multiple(sources, options): 
-    """ 
-    compile_multiple(sources, options) 
- 
-    Compiles the given sequence of Pyrex implementation files and returns 
-    a CompilationResultSet. Performs timestamp checking and/or recursion 
-    if these are specified in the options. 
-    """ 
-    # run_pipeline creates the context 
-    # context = options.create_context() 
-    sources = [os.path.abspath(source) for source in sources] 
-    processed = set() 
-    results = CompilationResultSet() 
-    timestamps = options.timestamps 
-    verbose = options.verbose 
-    context = None 
+class CompilationResult(object):
+    """
+    Results from the Cython compiler:
+
+    c_file           string or None   The generated C source file
+    h_file           string or None   The generated C header file
+    i_file           string or None   The generated .pxi file
+    api_file         string or None   The generated C API .h file
+    listing_file     string or None   File of error messages
+    object_file      string or None   Result of compiling the C file
+    extension_file   string or None   Result of linking the object file
+    num_errors       integer          Number of compilation errors
+    compilation_source CompilationSource
+    """
+
+    def __init__(self):
+        self.c_file = None
+        self.h_file = None
+        self.i_file = None
+        self.api_file = None
+        self.listing_file = None
+        self.object_file = None
+        self.extension_file = None
+        self.main_source_file = None
+
+
+class CompilationResultSet(dict):
+    """
+    Results from compiling multiple Pyrex source files. A mapping
+    from source file paths to CompilationResult instances. Also
+    has the following attributes:
+
+    num_errors   integer   Total number of compilation errors
+    """
+
+    num_errors = 0
+
+    def add(self, source, result):
+        self[source] = result
+        self.num_errors += result.num_errors
+
+
+def compile_single(source, options, full_module_name = None):
+    """
+    compile_single(source, options, full_module_name)
+
+    Compile the given Pyrex implementation file and return a CompilationResult.
+    Always compiles a single file; does not perform timestamp checking or
+    recursion.
+    """
+    return run_pipeline(source, options, full_module_name)
+
+
+def compile_multiple(sources, options):
+    """
+    compile_multiple(sources, options)
+
+    Compiles the given sequence of Pyrex implementation files and returns
+    a CompilationResultSet. Performs timestamp checking and/or recursion
+    if these are specified in the options.
+    """
+    # run_pipeline creates the context
+    # context = options.create_context()
+    sources = [os.path.abspath(source) for source in sources]
+    processed = set()
+    results = CompilationResultSet()
+    timestamps = options.timestamps
+    verbose = options.verbose
+    context = None
     cwd = os.getcwd()
-    for source in sources: 
-        if source not in processed: 
-            if context is None: 
-                context = options.create_context() 
+    for source in sources:
+        if source not in processed:
+            if context is None:
+                context = options.create_context()
             output_filename = get_output_filename(source, cwd, options)
             out_of_date = context.c_file_out_of_date(source, output_filename)
             if (not timestamps) or out_of_date:
-                if verbose: 
-                    sys.stderr.write("Compiling %s\n" % source) 
- 
-                result = run_pipeline(source, options, context=context) 
-                results.add(source, result) 
-                # Compiling multiple sources in one context doesn't quite 
-                # work properly yet. 
-                context = None 
-            processed.add(source) 
-    return results 
- 
-
-def compile(source, options = None, full_module_name = None, **kwds): 
-    """ 
-    compile(source [, options], [, <option> = <value>]...) 
- 
-    Compile one or more Pyrex implementation files, with optional timestamp 
+                if verbose:
+                    sys.stderr.write("Compiling %s\n" % source)
+
+                result = run_pipeline(source, options, context=context)
+                results.add(source, result)
+                # Compiling multiple sources in one context doesn't quite
+                # work properly yet.
+                context = None
+            processed.add(source)
+    return results
+
+
+def compile(source, options = None, full_module_name = None, **kwds):
+    """
+    compile(source [, options], [, <option> = <value>]...)
+
+    Compile one or more Pyrex implementation files, with optional timestamp
     checking and recursing on dependencies.  The source argument may be a string
     or a sequence of strings.  If it is a string and no recursion or timestamp
-    checking is requested, a CompilationResult is returned, otherwise a 
-    CompilationResultSet is returned. 
-    """ 
-    options = CompilationOptions(defaults = options, **kwds) 
-    if isinstance(source, basestring) and not options.timestamps: 
-        return compile_single(source, options, full_module_name) 
-    else: 
-        return compile_multiple(source, options) 
- 
+    checking is requested, a CompilationResult is returned, otherwise a
+    CompilationResultSet is returned.
+    """
+    options = CompilationOptions(defaults = options, **kwds)
+    if isinstance(source, basestring) and not options.timestamps:
+        return compile_single(source, options, full_module_name)
+    else:
+        return compile_multiple(source, options)
+
 
 @Utils.cached_function
 def search_include_directories(dirs, qualified_name, suffix, pos, include=False):
@@ -848,76 +848,76 @@ def search_include_directories(dirs, qualified_name, suffix, pos, include=False)
 
 
 # ------------------------------------------------------------------------
-# 
-#  Main command-line entry point 
-# 
+#
+#  Main command-line entry point
+#
 # ------------------------------------------------------------------------
 
-def setuptools_main(): 
-    return main(command_line = 1) 
- 
-
-def main(command_line = 0): 
-    args = sys.argv[1:] 
-    any_failures = 0 
-    if command_line: 
-        from .CmdLine import parse_command_line 
-        options, sources = parse_command_line(args) 
-    else: 
-        options = CompilationOptions(default_options) 
-        sources = args 
- 
-    if options.show_version: 
-        sys.stderr.write("Cython version %s\n" % version) 
-    if options.working_path!="": 
-        os.chdir(options.working_path) 
-    try: 
-        result = compile(sources, options) 
-        if result.num_errors > 0: 
-            any_failures = 1 
+def setuptools_main():
+    return main(command_line = 1)
+
+
+def main(command_line = 0):
+    args = sys.argv[1:]
+    any_failures = 0
+    if command_line:
+        from .CmdLine import parse_command_line
+        options, sources = parse_command_line(args)
+    else:
+        options = CompilationOptions(default_options)
+        sources = args
+
+    if options.show_version:
+        sys.stderr.write("Cython version %s\n" % version)
+    if options.working_path!="":
+        os.chdir(options.working_path)
+    try:
+        result = compile(sources, options)
+        if result.num_errors > 0:
+            any_failures = 1
     except (EnvironmentError, PyrexError) as e:
-        sys.stderr.write(str(e) + '\n') 
-        any_failures = 1 
-    if any_failures: 
-        sys.exit(1) 
- 
- 
+        sys.stderr.write(str(e) + '\n')
+        any_failures = 1
+    if any_failures:
+        sys.exit(1)
+
+
 # ------------------------------------------------------------------------
-# 
-#  Set the default options depending on the platform 
-# 
+#
+#  Set the default options depending on the platform
+#
 # ------------------------------------------------------------------------
- 
-default_options = dict( 
-    show_version = 0, 
-    use_listing_file = 0, 
-    errors_to_stderr = 1, 
-    cplus = 0, 
-    output_file = None, 
-    annotate = None, 
+
+default_options = dict(
+    show_version = 0,
+    use_listing_file = 0,
+    errors_to_stderr = 1,
+    cplus = 0,
+    output_file = None,
+    annotate = None,
     annotate_coverage_xml = None,
-    generate_pxi = 0, 
-    capi_reexport_cincludes = 0, 
-    working_path = "", 
-    timestamps = None, 
-    verbose = 0, 
-    quiet = 0, 
-    compiler_directives = {}, 
+    generate_pxi = 0,
+    capi_reexport_cincludes = 0,
+    working_path = "",
+    timestamps = None,
+    verbose = 0,
+    quiet = 0,
+    compiler_directives = {},
     embedded_metadata = {},
-    evaluate_tree_assertions = False, 
-    emit_linenums = False, 
-    relative_path_in_code_position_comments = True, 
-    c_line_in_traceback = True, 
+    evaluate_tree_assertions = False,
+    emit_linenums = False,
+    relative_path_in_code_position_comments = True,
+    c_line_in_traceback = True,
     language_level = None,  # warn but default to 2
     formal_grammar = False,
-    gdb_debug = False, 
+    gdb_debug = False,
     module_name = None,
     init_suffix = None,
-    compile_time_env = None, 
-    common_utility_include_dir = None, 
-    output_dir=None, 
-    build_dir=None, 
+    compile_time_env = None,
+    common_utility_include_dir = None,
+    output_dir=None,
+    build_dir=None,
     cache=None,
     create_extension=None,
     np_pythran=False
-) 
+)
diff --git a/contrib/tools/cython/Cython/Compiler/MemoryView.py b/contrib/tools/cython/Cython/Compiler/MemoryView.py
index 28b67e29b1..0406d6c716 100644
--- a/contrib/tools/cython/Cython/Compiler/MemoryView.py
+++ b/contrib/tools/cython/Cython/Compiler/MemoryView.py
@@ -1,272 +1,272 @@
-from __future__ import absolute_import 
- 
-from .Errors import CompileError, error 
-from . import ExprNodes 
-from .ExprNodes import IntNode, NameNode, AttributeNode 
-from . import Options 
-from .Code import UtilityCode, TempitaUtilityCode 
-from .UtilityCode import CythonUtilityCode 
-from . import Buffer 
-from . import PyrexTypes 
-from . import ModuleNode 
- 
-START_ERR = "Start must not be given." 
-STOP_ERR = "Axis specification only allowed in the 'step' slot." 
-STEP_ERR = "Step must be omitted, 1, or a valid specifier." 
-BOTH_CF_ERR = "Cannot specify an array that is both C and Fortran contiguous." 
-INVALID_ERR = "Invalid axis specification." 
-NOT_CIMPORTED_ERR = "Variable was not cimported from cython.view" 
-EXPR_ERR = "no expressions allowed in axis spec, only names and literals." 
-CF_ERR = "Invalid axis specification for a C/Fortran contiguous array." 
-ERR_UNINITIALIZED = ("Cannot check if memoryview %s is initialized without the " 
-                     "GIL, consider using initializedcheck(False)") 
- 
- 
-def concat_flags(*flags): 
-    return "(%s)" % "|".join(flags) 
- 
-
-format_flag = "PyBUF_FORMAT" 
- 
+from __future__ import absolute_import
+
+from .Errors import CompileError, error
+from . import ExprNodes
+from .ExprNodes import IntNode, NameNode, AttributeNode
+from . import Options
+from .Code import UtilityCode, TempitaUtilityCode
+from .UtilityCode import CythonUtilityCode
+from . import Buffer
+from . import PyrexTypes
+from . import ModuleNode
+
+START_ERR = "Start must not be given."
+STOP_ERR = "Axis specification only allowed in the 'step' slot."
+STEP_ERR = "Step must be omitted, 1, or a valid specifier."
+BOTH_CF_ERR = "Cannot specify an array that is both C and Fortran contiguous."
+INVALID_ERR = "Invalid axis specification."
+NOT_CIMPORTED_ERR = "Variable was not cimported from cython.view"
+EXPR_ERR = "no expressions allowed in axis spec, only names and literals."
+CF_ERR = "Invalid axis specification for a C/Fortran contiguous array."
+ERR_UNINITIALIZED = ("Cannot check if memoryview %s is initialized without the "
+                     "GIL, consider using initializedcheck(False)")
+
+
+def concat_flags(*flags):
+    return "(%s)" % "|".join(flags)
+
+
+format_flag = "PyBUF_FORMAT"
+
 memview_c_contiguous = "(PyBUF_C_CONTIGUOUS | PyBUF_FORMAT)"
 memview_f_contiguous = "(PyBUF_F_CONTIGUOUS | PyBUF_FORMAT)"
 memview_any_contiguous = "(PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT)"
 memview_full_access = "PyBUF_FULL_RO"
 #memview_strided_access = "PyBUF_STRIDED_RO"
 memview_strided_access = "PyBUF_RECORDS_RO"
- 
-MEMVIEW_DIRECT = '__Pyx_MEMVIEW_DIRECT' 
-MEMVIEW_PTR    = '__Pyx_MEMVIEW_PTR' 
-MEMVIEW_FULL   = '__Pyx_MEMVIEW_FULL' 
-MEMVIEW_CONTIG = '__Pyx_MEMVIEW_CONTIG' 
-MEMVIEW_STRIDED= '__Pyx_MEMVIEW_STRIDED' 
-MEMVIEW_FOLLOW = '__Pyx_MEMVIEW_FOLLOW' 
- 
-_spec_to_const = { 
-        'direct' : MEMVIEW_DIRECT, 
-        'ptr'    : MEMVIEW_PTR, 
-        'full'   : MEMVIEW_FULL, 
-        'contig' : MEMVIEW_CONTIG, 
-        'strided': MEMVIEW_STRIDED, 
-        'follow' : MEMVIEW_FOLLOW, 
-        } 
- 
-_spec_to_abbrev = { 
-    'direct'  : 'd', 
-    'ptr'     : 'p', 
-    'full'    : 'f', 
-    'contig'  : 'c', 
-    'strided' : 's', 
-    'follow'  : '_', 
-} 
- 
-memslice_entry_init = "{ 0, 0, { 0 }, { 0 }, { 0 } }" 
- 
-memview_name = u'memoryview' 
-memview_typeptr_cname = '__pyx_memoryview_type' 
-memview_objstruct_cname = '__pyx_memoryview_obj' 
-memviewslice_cname = u'__Pyx_memviewslice' 
- 
-
-def put_init_entry(mv_cname, code): 
-    code.putln("%s.data = NULL;" % mv_cname) 
-    code.putln("%s.memview = NULL;" % mv_cname) 
- 
- 
-#def axes_to_str(axes): 
-#    return "".join([access[0].upper()+packing[0] for (access, packing) in axes]) 
- 
-
-def put_acquire_memoryviewslice(lhs_cname, lhs_type, lhs_pos, rhs, code, 
-                                have_gil=False, first_assignment=True): 
-    "We can avoid decreffing the lhs if we know it is the first assignment" 
-    assert rhs.type.is_memoryviewslice 
- 
-    pretty_rhs = rhs.result_in_temp() or rhs.is_simple() 
-    if pretty_rhs: 
-        rhstmp = rhs.result() 
-    else: 
-        rhstmp = code.funcstate.allocate_temp(lhs_type, manage_ref=False) 
-        code.putln("%s = %s;" % (rhstmp, rhs.result_as(lhs_type))) 
- 
-    # Allow uninitialized assignment 
-    #code.putln(code.put_error_if_unbound(lhs_pos, rhs.entry)) 
-    put_assign_to_memviewslice(lhs_cname, rhs, rhstmp, lhs_type, code, 
-                               have_gil=have_gil, first_assignment=first_assignment) 
- 
-    if not pretty_rhs: 
-        code.funcstate.release_temp(rhstmp) 
- 
-
-def put_assign_to_memviewslice(lhs_cname, rhs, rhs_cname, memviewslicetype, code, 
-                               have_gil=False, first_assignment=False): 
-    if not first_assignment: 
-        code.put_xdecref_memoryviewslice(lhs_cname, have_gil=have_gil) 
- 
-    if not rhs.result_in_temp(): 
-        rhs.make_owned_memoryviewslice(code) 
- 
-    code.putln("%s = %s;" % (lhs_cname, rhs_cname)) 
- 
-
-def get_buf_flags(specs): 
-    is_c_contig, is_f_contig = is_cf_contig(specs) 
- 
-    if is_c_contig: 
-        return memview_c_contiguous 
-    elif is_f_contig: 
-        return memview_f_contiguous 
- 
-    access, packing = zip(*specs) 
- 
-    if 'full' in access or 'ptr' in access: 
-        return memview_full_access 
-    else: 
-        return memview_strided_access 
- 
-
-def insert_newaxes(memoryviewtype, n): 
-    axes = [('direct', 'strided')] * n 
-    axes.extend(memoryviewtype.axes) 
-    return PyrexTypes.MemoryViewSliceType(memoryviewtype.dtype, axes) 
- 
-
-def broadcast_types(src, dst): 
-    n = abs(src.ndim - dst.ndim) 
-    if src.ndim < dst.ndim: 
-        return insert_newaxes(src, n), dst 
-    else: 
-        return src, insert_newaxes(dst, n) 
- 
- 
-def valid_memslice_dtype(dtype, i=0): 
-    """ 
-    Return whether type dtype can be used as the base type of a 
-    memoryview slice. 
- 
-    We support structs, numeric types and objects 
-    """ 
-    if dtype.is_complex and dtype.real_type.is_int: 
-        return False 
- 
-    if dtype is PyrexTypes.c_bint_type: 
-        return False 
- 
-    if dtype.is_struct and dtype.kind == 'struct': 
-        for member in dtype.scope.var_entries: 
-            if not valid_memslice_dtype(member.type): 
-                return False 
- 
-        return True 
- 
-    return ( 
-        dtype.is_error or 
-        # Pointers are not valid (yet) 
-        # (dtype.is_ptr and valid_memslice_dtype(dtype.base_type)) or 
-        (dtype.is_array and i < 8 and 
-         valid_memslice_dtype(dtype.base_type, i + 1)) or 
-        dtype.is_numeric or 
-        dtype.is_pyobject or 
-        dtype.is_fused or # accept this as it will be replaced by specializations later 
-        (dtype.is_typedef and valid_memslice_dtype(dtype.typedef_base_type)) 
-    ) 
- 
- 
-class MemoryViewSliceBufferEntry(Buffer.BufferEntry): 
+
+MEMVIEW_DIRECT = '__Pyx_MEMVIEW_DIRECT'
+MEMVIEW_PTR    = '__Pyx_MEMVIEW_PTR'
+MEMVIEW_FULL   = '__Pyx_MEMVIEW_FULL'
+MEMVIEW_CONTIG = '__Pyx_MEMVIEW_CONTIG'
+MEMVIEW_STRIDED= '__Pyx_MEMVIEW_STRIDED'
+MEMVIEW_FOLLOW = '__Pyx_MEMVIEW_FOLLOW'
+
+_spec_to_const = {
+        'direct' : MEMVIEW_DIRECT,
+        'ptr'    : MEMVIEW_PTR,
+        'full'   : MEMVIEW_FULL,
+        'contig' : MEMVIEW_CONTIG,
+        'strided': MEMVIEW_STRIDED,
+        'follow' : MEMVIEW_FOLLOW,
+        }
+
+_spec_to_abbrev = {
+    'direct'  : 'd',
+    'ptr'     : 'p',
+    'full'    : 'f',
+    'contig'  : 'c',
+    'strided' : 's',
+    'follow'  : '_',
+}
+
+memslice_entry_init = "{ 0, 0, { 0 }, { 0 }, { 0 } }"
+
+memview_name = u'memoryview'
+memview_typeptr_cname = '__pyx_memoryview_type'
+memview_objstruct_cname = '__pyx_memoryview_obj'
+memviewslice_cname = u'__Pyx_memviewslice'
+
+
+def put_init_entry(mv_cname, code):
+    code.putln("%s.data = NULL;" % mv_cname)
+    code.putln("%s.memview = NULL;" % mv_cname)
+
+
+#def axes_to_str(axes):
+#    return "".join([access[0].upper()+packing[0] for (access, packing) in axes])
+
+
+def put_acquire_memoryviewslice(lhs_cname, lhs_type, lhs_pos, rhs, code,
+                                have_gil=False, first_assignment=True):
+    "We can avoid decreffing the lhs if we know it is the first assignment"
+    assert rhs.type.is_memoryviewslice
+
+    pretty_rhs = rhs.result_in_temp() or rhs.is_simple()
+    if pretty_rhs:
+        rhstmp = rhs.result()
+    else:
+        rhstmp = code.funcstate.allocate_temp(lhs_type, manage_ref=False)
+        code.putln("%s = %s;" % (rhstmp, rhs.result_as(lhs_type)))
+
+    # Allow uninitialized assignment
+    #code.putln(code.put_error_if_unbound(lhs_pos, rhs.entry))
+    put_assign_to_memviewslice(lhs_cname, rhs, rhstmp, lhs_type, code,
+                               have_gil=have_gil, first_assignment=first_assignment)
+
+    if not pretty_rhs:
+        code.funcstate.release_temp(rhstmp)
+
+
+def put_assign_to_memviewslice(lhs_cname, rhs, rhs_cname, memviewslicetype, code,
+                               have_gil=False, first_assignment=False):
+    if not first_assignment:
+        code.put_xdecref_memoryviewslice(lhs_cname, have_gil=have_gil)
+
+    if not rhs.result_in_temp():
+        rhs.make_owned_memoryviewslice(code)
+
+    code.putln("%s = %s;" % (lhs_cname, rhs_cname))
+
+
+def get_buf_flags(specs):
+    is_c_contig, is_f_contig = is_cf_contig(specs)
+
+    if is_c_contig:
+        return memview_c_contiguous
+    elif is_f_contig:
+        return memview_f_contiguous
+
+    access, packing = zip(*specs)
+
+    if 'full' in access or 'ptr' in access:
+        return memview_full_access
+    else:
+        return memview_strided_access
+
+
+def insert_newaxes(memoryviewtype, n):
+    axes = [('direct', 'strided')] * n
+    axes.extend(memoryviewtype.axes)
+    return PyrexTypes.MemoryViewSliceType(memoryviewtype.dtype, axes)
+
+
+def broadcast_types(src, dst):
+    n = abs(src.ndim - dst.ndim)
+    if src.ndim < dst.ndim:
+        return insert_newaxes(src, n), dst
+    else:
+        return src, insert_newaxes(dst, n)
+
+
+def valid_memslice_dtype(dtype, i=0):
+    """
+    Return whether type dtype can be used as the base type of a
+    memoryview slice.
+
+    We support structs, numeric types and objects
+    """
+    if dtype.is_complex and dtype.real_type.is_int:
+        return False
+
+    if dtype is PyrexTypes.c_bint_type:
+        return False
+
+    if dtype.is_struct and dtype.kind == 'struct':
+        for member in dtype.scope.var_entries:
+            if not valid_memslice_dtype(member.type):
+                return False
+
+        return True
+
+    return (
+        dtype.is_error or
+        # Pointers are not valid (yet)
+        # (dtype.is_ptr and valid_memslice_dtype(dtype.base_type)) or
+        (dtype.is_array and i < 8 and
+         valid_memslice_dtype(dtype.base_type, i + 1)) or
+        dtype.is_numeric or
+        dtype.is_pyobject or
+        dtype.is_fused or # accept this as it will be replaced by specializations later
+        (dtype.is_typedef and valid_memslice_dtype(dtype.typedef_base_type))
+    )
+
+
+class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
     """
     May be used during code generation time to be queried for
     shape/strides/suboffsets attributes, or to perform indexing or slicing.
     """
-    def __init__(self, entry): 
-        self.entry = entry 
-        self.type = entry.type 
-        self.cname = entry.cname 
-
-        self.buf_ptr = "%s.data" % self.cname 
- 
-        dtype = self.entry.type.dtype 
+    def __init__(self, entry):
+        self.entry = entry
+        self.type = entry.type
+        self.cname = entry.cname
+
+        self.buf_ptr = "%s.data" % self.cname
+
+        dtype = self.entry.type.dtype
         self.buf_ptr_type = PyrexTypes.CPtrType(dtype)
         self.init_attributes()
- 
-    def get_buf_suboffsetvars(self): 
-        return self._for_all_ndim("%s.suboffsets[%d]") 
- 
-    def get_buf_stridevars(self): 
-        return self._for_all_ndim("%s.strides[%d]") 
- 
-    def get_buf_shapevars(self): 
-        return self._for_all_ndim("%s.shape[%d]") 
- 
-    def generate_buffer_lookup_code(self, code, index_cnames): 
-        axes = [(dim, index_cnames[dim], access, packing) 
-                    for dim, (access, packing) in enumerate(self.type.axes)] 
-        return self._generate_buffer_lookup_code(code, axes) 
- 
-    def _generate_buffer_lookup_code(self, code, axes, cast_result=True): 
+
+    def get_buf_suboffsetvars(self):
+        return self._for_all_ndim("%s.suboffsets[%d]")
+
+    def get_buf_stridevars(self):
+        return self._for_all_ndim("%s.strides[%d]")
+
+    def get_buf_shapevars(self):
+        return self._for_all_ndim("%s.shape[%d]")
+
+    def generate_buffer_lookup_code(self, code, index_cnames):
+        axes = [(dim, index_cnames[dim], access, packing)
+                    for dim, (access, packing) in enumerate(self.type.axes)]
+        return self._generate_buffer_lookup_code(code, axes)
+
+    def _generate_buffer_lookup_code(self, code, axes, cast_result=True):
         """
         Generate a single expression that indexes the memory view slice
         in each dimension.
         """
-        bufp = self.buf_ptr 
+        bufp = self.buf_ptr
         type_decl = self.type.dtype.empty_declaration_code()
- 
-        for dim, index, access, packing in axes: 
-            shape = "%s.shape[%d]" % (self.cname, dim) 
-            stride = "%s.strides[%d]" % (self.cname, dim) 
-            suboffset = "%s.suboffsets[%d]" % (self.cname, dim) 
- 
-            flag = get_memoryview_flag(access, packing) 
- 
-            if flag in ("generic", "generic_contiguous"): 
-                # Note: we cannot do cast tricks to avoid stride multiplication 
-                #       for generic_contiguous, as we may have to do (dtype *) 
-                #       or (dtype **) arithmetic, we won't know which unless 
-                #       we check suboffsets 
-                code.globalstate.use_utility_code(memviewslice_index_helpers) 
-                bufp = ('__pyx_memviewslice_index_full(%s, %s, %s, %s)' % 
-                                            (bufp, index, stride, suboffset)) 
- 
-            elif flag == "indirect": 
-                bufp = "(%s + %s * %s)" % (bufp, index, stride) 
-                bufp = ("(*((char **) %s) + %s)" % (bufp, suboffset)) 
- 
-            elif flag == "indirect_contiguous": 
-                # Note: we do char ** arithmetic 
-                bufp = "(*((char **) %s + %s) + %s)" % (bufp, index, suboffset) 
- 
-            elif flag == "strided": 
-                bufp = "(%s + %s * %s)" % (bufp, index, stride) 
- 
-            else: 
-                assert flag == 'contiguous', flag 
-                bufp = '((char *) (((%s *) %s) + %s))' % (type_decl, bufp, index) 
- 
-            bufp = '( /* dim=%d */ %s )' % (dim, bufp) 
- 
-        if cast_result: 
-            return "((%s *) %s)" % (type_decl, bufp) 
- 
-        return bufp 
- 
-    def generate_buffer_slice_code(self, code, indices, dst, have_gil, 
-                                   have_slices, directives): 
-        """ 
-        Slice a memoryviewslice. 
- 
-        indices     - list of index nodes. If not a SliceNode, or NoneNode, 
-                      then it must be coercible to Py_ssize_t 
- 
-        Simply call __pyx_memoryview_slice_memviewslice with the right 
+
+        for dim, index, access, packing in axes:
+            shape = "%s.shape[%d]" % (self.cname, dim)
+            stride = "%s.strides[%d]" % (self.cname, dim)
+            suboffset = "%s.suboffsets[%d]" % (self.cname, dim)
+
+            flag = get_memoryview_flag(access, packing)
+
+            if flag in ("generic", "generic_contiguous"):
+                # Note: we cannot do cast tricks to avoid stride multiplication
+                #       for generic_contiguous, as we may have to do (dtype *)
+                #       or (dtype **) arithmetic, we won't know which unless
+                #       we check suboffsets
+                code.globalstate.use_utility_code(memviewslice_index_helpers)
+                bufp = ('__pyx_memviewslice_index_full(%s, %s, %s, %s)' %
+                                            (bufp, index, stride, suboffset))
+
+            elif flag == "indirect":
+                bufp = "(%s + %s * %s)" % (bufp, index, stride)
+                bufp = ("(*((char **) %s) + %s)" % (bufp, suboffset))
+
+            elif flag == "indirect_contiguous":
+                # Note: we do char ** arithmetic
+                bufp = "(*((char **) %s + %s) + %s)" % (bufp, index, suboffset)
+
+            elif flag == "strided":
+                bufp = "(%s + %s * %s)" % (bufp, index, stride)
+
+            else:
+                assert flag == 'contiguous', flag
+                bufp = '((char *) (((%s *) %s) + %s))' % (type_decl, bufp, index)
+
+            bufp = '( /* dim=%d */ %s )' % (dim, bufp)
+
+        if cast_result:
+            return "((%s *) %s)" % (type_decl, bufp)
+
+        return bufp
+
+    def generate_buffer_slice_code(self, code, indices, dst, have_gil,
+                                   have_slices, directives):
+        """
+        Slice a memoryviewslice.
+
+        indices     - list of index nodes. If not a SliceNode, or NoneNode,
+                      then it must be coercible to Py_ssize_t
+
+        Simply call __pyx_memoryview_slice_memviewslice with the right
         arguments, unless the dimension is omitted or a bare ':', in which
         case we copy over the shape/strides/suboffsets attributes directly
         for that dimension.
-        """ 
-        src = self.cname 
- 
-        code.putln("%(dst)s.data = %(src)s.data;" % locals()) 
-        code.putln("%(dst)s.memview = %(src)s.memview;" % locals()) 
-        code.put_incref_memoryviewslice(dst) 
- 
+        """
+        src = self.cname
+
+        code.putln("%(dst)s.data = %(src)s.data;" % locals())
+        code.putln("%(dst)s.memview = %(src)s.memview;" % locals())
+        code.put_incref_memoryviewslice(dst)
+
         all_dimensions_direct = all(access == 'direct' for access, packing in self.type.axes)
         suboffset_dim_temp = []
 
@@ -278,9 +278,9 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
                 suboffset_dim_temp.append(suboffset_dim)
             return suboffset_dim_temp[0]
 
-        dim = -1 
+        dim = -1
         new_ndim = 0
-        for index in indices: 
+        for index in indices:
             if index.is_none:
                 # newaxis
                 for attrib, value in [('shape', 1), ('strides', 0), ('suboffsets', -1)]:
@@ -291,37 +291,37 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
 
             dim += 1
             access, packing = self.type.axes[dim]
- 
-            if isinstance(index, ExprNodes.SliceNode): 
-                # slice, unspecified dimension, or part of ellipsis 
+
+            if isinstance(index, ExprNodes.SliceNode):
+                # slice, unspecified dimension, or part of ellipsis
                 d = dict(locals())
-                for s in "start stop step".split(): 
-                    idx = getattr(index, s) 
-                    have_idx = d['have_' + s] = not idx.is_none 
+                for s in "start stop step".split():
+                    idx = getattr(index, s)
+                    have_idx = d['have_' + s] = not idx.is_none
                     d[s] = idx.result() if have_idx else "0"
- 
+
                 if not (d['have_start'] or d['have_stop'] or d['have_step']):
-                    # full slice (:), simply copy over the extent, stride 
-                    # and suboffset. Also update suboffset_dim if needed 
-                    d['access'] = access 
+                    # full slice (:), simply copy over the extent, stride
+                    # and suboffset. Also update suboffset_dim if needed
+                    d['access'] = access
                     util_name = "SimpleSlice"
-                else: 
+                else:
                     util_name = "ToughSlice"
                     d['error_goto'] = code.error_goto(index.pos)
- 
-                new_ndim += 1 
-            else: 
-                # normal index 
-                idx = index.result() 
- 
+
+                new_ndim += 1
+            else:
+                # normal index
+                idx = index.result()
+
                 indirect = access != 'direct'
                 if indirect:
                     generic = access == 'full'
-                    if new_ndim != 0: 
-                        return error(index.pos, 
-                                     "All preceding dimensions must be " 
-                                     "indexed and not sliced") 
- 
+                    if new_ndim != 0:
+                        return error(index.pos,
+                                     "All preceding dimensions must be "
+                                     "indexed and not sliced")
+
                 d = dict(
                     locals(),
                     wraparound=int(directives['wraparound']),
@@ -330,529 +330,529 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
                 if d['boundscheck']:
                     d['error_goto'] = code.error_goto(index.pos)
                 util_name = "SliceIndex"
- 
+
             _, impl = TempitaUtilityCode.load_as_string(util_name, "MemoryView_C.c", context=d)
             code.put(impl)
- 
+
         if suboffset_dim_temp:
             code.funcstate.release_temp(suboffset_dim_temp[0])
- 
 
-def empty_slice(pos): 
-    none = ExprNodes.NoneNode(pos) 
-    return ExprNodes.SliceNode(pos, start=none, 
-                               stop=none, step=none) 
- 
+
+def empty_slice(pos):
+    none = ExprNodes.NoneNode(pos)
+    return ExprNodes.SliceNode(pos, start=none,
+                               stop=none, step=none)
+
 
 def unellipsify(indices, ndim):
-    result = [] 
-    seen_ellipsis = False 
-    have_slices = False 
- 
+    result = []
+    seen_ellipsis = False
+    have_slices = False
+
     newaxes = [newaxis for newaxis in indices if newaxis.is_none]
-    n_indices = len(indices) - len(newaxes) 
- 
-    for index in indices: 
-        if isinstance(index, ExprNodes.EllipsisNode): 
-            have_slices = True 
-            full_slice = empty_slice(index.pos) 
- 
-            if seen_ellipsis: 
-                result.append(full_slice) 
-            else: 
-                nslices = ndim - n_indices + 1 
-                result.extend([full_slice] * nslices) 
-                seen_ellipsis = True 
-        else: 
+    n_indices = len(indices) - len(newaxes)
+
+    for index in indices:
+        if isinstance(index, ExprNodes.EllipsisNode):
+            have_slices = True
+            full_slice = empty_slice(index.pos)
+
+            if seen_ellipsis:
+                result.append(full_slice)
+            else:
+                nslices = ndim - n_indices + 1
+                result.extend([full_slice] * nslices)
+                seen_ellipsis = True
+        else:
             have_slices = have_slices or index.is_slice or index.is_none
-            result.append(index) 
- 
-    result_length = len(result) - len(newaxes) 
-    if result_length < ndim: 
-        have_slices = True 
-        nslices = ndim - result_length 
-        result.extend([empty_slice(indices[-1].pos)] * nslices) 
- 
+            result.append(index)
+
+    result_length = len(result) - len(newaxes)
+    if result_length < ndim:
+        have_slices = True
+        nslices = ndim - result_length
+        result.extend([empty_slice(indices[-1].pos)] * nslices)
+
     return have_slices, result, newaxes
- 
-
-def get_memoryview_flag(access, packing): 
-    if access == 'full' and packing in ('strided', 'follow'): 
-        return 'generic' 
-    elif access == 'full' and packing == 'contig': 
-        return 'generic_contiguous' 
-    elif access == 'ptr' and packing in ('strided', 'follow'): 
-        return 'indirect' 
-    elif access == 'ptr' and packing == 'contig': 
-        return 'indirect_contiguous' 
-    elif access == 'direct' and packing in ('strided', 'follow'): 
-        return 'strided' 
-    else: 
-        assert (access, packing) == ('direct', 'contig'), (access, packing) 
-        return 'contiguous' 
- 
+
+
+def get_memoryview_flag(access, packing):
+    if access == 'full' and packing in ('strided', 'follow'):
+        return 'generic'
+    elif access == 'full' and packing == 'contig':
+        return 'generic_contiguous'
+    elif access == 'ptr' and packing in ('strided', 'follow'):
+        return 'indirect'
+    elif access == 'ptr' and packing == 'contig':
+        return 'indirect_contiguous'
+    elif access == 'direct' and packing in ('strided', 'follow'):
+        return 'strided'
+    else:
+        assert (access, packing) == ('direct', 'contig'), (access, packing)
+        return 'contiguous'
+
 
 def get_is_contig_func_name(contig_type, ndim):
     assert contig_type in ('C', 'F')
     return "__pyx_memviewslice_is_contig_%s%d" % (contig_type, ndim)
- 
+
 
 def get_is_contig_utility(contig_type, ndim):
     assert contig_type in ('C', 'F')
     C = dict(context, ndim=ndim, contig_type=contig_type)
     utility = load_memview_c_utility("MemviewSliceCheckContig", C, requires=[is_contig_utility])
-    return utility 
- 
+    return utility
+
 
 def slice_iter(slice_type, slice_result, ndim, code):
     if slice_type.is_c_contig or slice_type.is_f_contig:
         return ContigSliceIter(slice_type, slice_result, ndim, code)
-    else: 
+    else:
         return StridedSliceIter(slice_type, slice_result, ndim, code)
- 
- 
-class SliceIter(object): 
+
+
+class SliceIter(object):
     def __init__(self, slice_type, slice_result, ndim, code):
-        self.slice_type = slice_type 
+        self.slice_type = slice_type
         self.slice_result = slice_result
-        self.code = code 
-        self.ndim = ndim 
- 
-
-class ContigSliceIter(SliceIter): 
-    def start_loops(self): 
-        code = self.code 
-        code.begin_block() 
- 
+        self.code = code
+        self.ndim = ndim
+
+
+class ContigSliceIter(SliceIter):
+    def start_loops(self):
+        code = self.code
+        code.begin_block()
+
         type_decl = self.slice_type.dtype.empty_declaration_code()
- 
+
         total_size = ' * '.join("%s.shape[%d]" % (self.slice_result, i)
                                 for i in range(self.ndim))
-        code.putln("Py_ssize_t __pyx_temp_extent = %s;" % total_size) 
-        code.putln("Py_ssize_t __pyx_temp_idx;") 
-        code.putln("%s *__pyx_temp_pointer = (%s *) %s.data;" % ( 
+        code.putln("Py_ssize_t __pyx_temp_extent = %s;" % total_size)
+        code.putln("Py_ssize_t __pyx_temp_idx;")
+        code.putln("%s *__pyx_temp_pointer = (%s *) %s.data;" % (
             type_decl, type_decl, self.slice_result))
-        code.putln("for (__pyx_temp_idx = 0; " 
-                        "__pyx_temp_idx < __pyx_temp_extent; " 
-                        "__pyx_temp_idx++) {") 
- 
-        return "__pyx_temp_pointer" 
- 
-    def end_loops(self): 
-        self.code.putln("__pyx_temp_pointer += 1;") 
-        self.code.putln("}") 
-        self.code.end_block() 
- 
-
-class StridedSliceIter(SliceIter): 
-    def start_loops(self): 
-        code = self.code 
-        code.begin_block() 
- 
-        for i in range(self.ndim): 
+        code.putln("for (__pyx_temp_idx = 0; "
+                        "__pyx_temp_idx < __pyx_temp_extent; "
+                        "__pyx_temp_idx++) {")
+
+        return "__pyx_temp_pointer"
+
+    def end_loops(self):
+        self.code.putln("__pyx_temp_pointer += 1;")
+        self.code.putln("}")
+        self.code.end_block()
+
+
+class StridedSliceIter(SliceIter):
+    def start_loops(self):
+        code = self.code
+        code.begin_block()
+
+        for i in range(self.ndim):
             t = i, self.slice_result, i
-            code.putln("Py_ssize_t __pyx_temp_extent_%d = %s.shape[%d];" % t) 
-            code.putln("Py_ssize_t __pyx_temp_stride_%d = %s.strides[%d];" % t) 
-            code.putln("char *__pyx_temp_pointer_%d;" % i) 
-            code.putln("Py_ssize_t __pyx_temp_idx_%d;" % i) 
- 
+            code.putln("Py_ssize_t __pyx_temp_extent_%d = %s.shape[%d];" % t)
+            code.putln("Py_ssize_t __pyx_temp_stride_%d = %s.strides[%d];" % t)
+            code.putln("char *__pyx_temp_pointer_%d;" % i)
+            code.putln("Py_ssize_t __pyx_temp_idx_%d;" % i)
+
         code.putln("__pyx_temp_pointer_0 = %s.data;" % self.slice_result)
- 
-        for i in range(self.ndim): 
-            if i > 0: 
-                code.putln("__pyx_temp_pointer_%d = __pyx_temp_pointer_%d;" % (i, i - 1)) 
- 
-            code.putln("for (__pyx_temp_idx_%d = 0; " 
-                            "__pyx_temp_idx_%d < __pyx_temp_extent_%d; " 
-                            "__pyx_temp_idx_%d++) {" % (i, i, i, i)) 
- 
-        return "__pyx_temp_pointer_%d" % (self.ndim - 1) 
- 
-    def end_loops(self): 
-        code = self.code 
-        for i in range(self.ndim - 1, -1, -1): 
-            code.putln("__pyx_temp_pointer_%d += __pyx_temp_stride_%d;" % (i, i)) 
-            code.putln("}") 
- 
-        code.end_block() 
- 
- 
-def copy_c_or_fortran_cname(memview): 
-    if memview.is_c_contig: 
-        c_or_f = 'c' 
-    else: 
-        c_or_f = 'f' 
- 
-    return "__pyx_memoryview_copy_slice_%s_%s" % ( 
-            memview.specialization_suffix(), c_or_f) 
- 
-
-def get_copy_new_utility(pos, from_memview, to_memview): 
+
+        for i in range(self.ndim):
+            if i > 0:
+                code.putln("__pyx_temp_pointer_%d = __pyx_temp_pointer_%d;" % (i, i - 1))
+
+            code.putln("for (__pyx_temp_idx_%d = 0; "
+                            "__pyx_temp_idx_%d < __pyx_temp_extent_%d; "
+                            "__pyx_temp_idx_%d++) {" % (i, i, i, i))
+
+        return "__pyx_temp_pointer_%d" % (self.ndim - 1)
+
+    def end_loops(self):
+        code = self.code
+        for i in range(self.ndim - 1, -1, -1):
+            code.putln("__pyx_temp_pointer_%d += __pyx_temp_stride_%d;" % (i, i))
+            code.putln("}")
+
+        code.end_block()
+
+
+def copy_c_or_fortran_cname(memview):
+    if memview.is_c_contig:
+        c_or_f = 'c'
+    else:
+        c_or_f = 'f'
+
+    return "__pyx_memoryview_copy_slice_%s_%s" % (
+            memview.specialization_suffix(), c_or_f)
+
+
+def get_copy_new_utility(pos, from_memview, to_memview):
     if (from_memview.dtype != to_memview.dtype and
             not (from_memview.dtype.is_const and from_memview.dtype.const_base_type == to_memview.dtype)):
         error(pos, "dtypes must be the same!")
         return
-    if len(from_memview.axes) != len(to_memview.axes): 
+    if len(from_memview.axes) != len(to_memview.axes):
         error(pos, "number of dimensions must be same")
         return
-    if not (to_memview.is_c_contig or to_memview.is_f_contig): 
+    if not (to_memview.is_c_contig or to_memview.is_f_contig):
         error(pos, "to_memview must be c or f contiguous.")
         return
- 
-    for (access, packing) in from_memview.axes: 
-        if access != 'direct': 
+
+    for (access, packing) in from_memview.axes:
+        if access != 'direct':
             error(pos, "cannot handle 'full' or 'ptr' access at this time.")
             return
- 
-    if to_memview.is_c_contig: 
-        mode = 'c' 
-        contig_flag = memview_c_contiguous 
-    elif to_memview.is_f_contig: 
-        mode = 'fortran' 
-        contig_flag = memview_f_contiguous 
- 
-    return load_memview_c_utility( 
-        "CopyContentsUtility", 
-        context=dict( 
-            context, 
-            mode=mode, 
+
+    if to_memview.is_c_contig:
+        mode = 'c'
+        contig_flag = memview_c_contiguous
+    elif to_memview.is_f_contig:
+        mode = 'fortran'
+        contig_flag = memview_f_contiguous
+
+    return load_memview_c_utility(
+        "CopyContentsUtility",
+        context=dict(
+            context,
+            mode=mode,
             dtype_decl=to_memview.dtype.empty_declaration_code(),
-            contig_flag=contig_flag, 
-            ndim=to_memview.ndim, 
-            func_cname=copy_c_or_fortran_cname(to_memview), 
-            dtype_is_object=int(to_memview.dtype.is_pyobject)), 
-        requires=[copy_contents_new_utility]) 
- 
-
-def get_axes_specs(env, axes): 
-    ''' 
-    get_axes_specs(env, axes) -> list of (access, packing) specs for each axis. 
-    access is one of 'full', 'ptr' or 'direct' 
-    packing is one of 'contig', 'strided' or 'follow' 
-    ''' 
- 
-    cythonscope = env.global_scope().context.cython_scope 
-    cythonscope.load_cythonscope() 
-    viewscope = cythonscope.viewscope 
- 
-    access_specs = tuple([viewscope.lookup(name) 
-                    for name in ('full', 'direct', 'ptr')]) 
-    packing_specs = tuple([viewscope.lookup(name) 
-                    for name in ('contig', 'strided', 'follow')]) 
- 
-    is_f_contig, is_c_contig = False, False 
-    default_access, default_packing = 'direct', 'strided' 
-    cf_access, cf_packing = default_access, 'follow' 
- 
-    axes_specs = [] 
-    # analyse all axes. 
-    for idx, axis in enumerate(axes): 
-        if not axis.start.is_none: 
-            raise CompileError(axis.start.pos,  START_ERR) 
- 
-        if not axis.stop.is_none: 
-            raise CompileError(axis.stop.pos, STOP_ERR) 
- 
-        if axis.step.is_none: 
-            axes_specs.append((default_access, default_packing)) 
- 
-        elif isinstance(axis.step, IntNode): 
-            # the packing for the ::1 axis is contiguous, 
-            # all others are cf_packing. 
-            if axis.step.compile_time_value(env) != 1: 
-                raise CompileError(axis.step.pos, STEP_ERR) 
- 
-            axes_specs.append((cf_access, 'cfcontig')) 
- 
-        elif isinstance(axis.step, (NameNode, AttributeNode)): 
-            entry = _get_resolved_spec(env, axis.step) 
-            if entry.name in view_constant_to_access_packing: 
-                axes_specs.append(view_constant_to_access_packing[entry.name]) 
-            else: 
+            contig_flag=contig_flag,
+            ndim=to_memview.ndim,
+            func_cname=copy_c_or_fortran_cname(to_memview),
+            dtype_is_object=int(to_memview.dtype.is_pyobject)),
+        requires=[copy_contents_new_utility])
+
+
+def get_axes_specs(env, axes):
+    '''
+    get_axes_specs(env, axes) -> list of (access, packing) specs for each axis.
+    access is one of 'full', 'ptr' or 'direct'
+    packing is one of 'contig', 'strided' or 'follow'
+    '''
+
+    cythonscope = env.global_scope().context.cython_scope
+    cythonscope.load_cythonscope()
+    viewscope = cythonscope.viewscope
+
+    access_specs = tuple([viewscope.lookup(name)
+                    for name in ('full', 'direct', 'ptr')])
+    packing_specs = tuple([viewscope.lookup(name)
+                    for name in ('contig', 'strided', 'follow')])
+
+    is_f_contig, is_c_contig = False, False
+    default_access, default_packing = 'direct', 'strided'
+    cf_access, cf_packing = default_access, 'follow'
+
+    axes_specs = []
+    # analyse all axes.
+    for idx, axis in enumerate(axes):
+        if not axis.start.is_none:
+            raise CompileError(axis.start.pos,  START_ERR)
+
+        if not axis.stop.is_none:
+            raise CompileError(axis.stop.pos, STOP_ERR)
+
+        if axis.step.is_none:
+            axes_specs.append((default_access, default_packing))
+
+        elif isinstance(axis.step, IntNode):
+            # the packing for the ::1 axis is contiguous,
+            # all others are cf_packing.
+            if axis.step.compile_time_value(env) != 1:
+                raise CompileError(axis.step.pos, STEP_ERR)
+
+            axes_specs.append((cf_access, 'cfcontig'))
+
+        elif isinstance(axis.step, (NameNode, AttributeNode)):
+            entry = _get_resolved_spec(env, axis.step)
+            if entry.name in view_constant_to_access_packing:
+                axes_specs.append(view_constant_to_access_packing[entry.name])
+            else:
                 raise CompileError(axis.step.pos, INVALID_ERR)
- 
-        else: 
-            raise CompileError(axis.step.pos, INVALID_ERR) 
- 
-    # First, find out if we have a ::1 somewhere 
-    contig_dim = 0 
-    is_contig = False 
-    for idx, (access, packing) in enumerate(axes_specs): 
-        if packing == 'cfcontig': 
-            if is_contig: 
-                raise CompileError(axis.step.pos, BOTH_CF_ERR) 
- 
-            contig_dim = idx 
-            axes_specs[idx] = (access, 'contig') 
-            is_contig = True 
- 
-    if is_contig: 
-        # We have a ::1 somewhere, see if we're C or Fortran contiguous 
-        if contig_dim == len(axes) - 1: 
-            is_c_contig = True 
-        else: 
-            is_f_contig = True 
- 
-            if contig_dim and not axes_specs[contig_dim - 1][0] in ('full', 'ptr'): 
-                raise CompileError(axes[contig_dim].pos, 
-                                   "Fortran contiguous specifier must follow an indirect dimension") 
- 
-        if is_c_contig: 
-            # Contiguous in the last dimension, find the last indirect dimension 
-            contig_dim = -1 
-            for idx, (access, packing) in enumerate(reversed(axes_specs)): 
-                if access in ('ptr', 'full'): 
-                    contig_dim = len(axes) - idx - 1 
- 
-        # Replace 'strided' with 'follow' for any dimension following the last 
-        # indirect dimension, the first dimension or the dimension following 
-        # the ::1. 
-        #               int[::indirect, ::1, :, :] 
-        #                                    ^  ^ 
-        #               int[::indirect, :, :, ::1] 
-        #                               ^  ^ 
-        start = contig_dim + 1 
-        stop = len(axes) - is_c_contig 
-        for idx, (access, packing) in enumerate(axes_specs[start:stop]): 
-            idx = contig_dim + 1 + idx 
-            if access != 'direct': 
-                raise CompileError(axes[idx].pos, 
-                                   "Indirect dimension may not follow " 
-                                   "Fortran contiguous dimension") 
-            if packing == 'contig': 
-                raise CompileError(axes[idx].pos, 
-                                   "Dimension may not be contiguous") 
-            axes_specs[idx] = (access, cf_packing) 
- 
-        if is_c_contig: 
-            # For C contiguity, we need to fix the 'contig' dimension 
-            # after the loop 
-            a, p = axes_specs[-1] 
-            axes_specs[-1] = a, 'contig' 
- 
-    validate_axes_specs([axis.start.pos for axis in axes], 
-                        axes_specs, 
-                        is_c_contig, 
-                        is_f_contig) 
- 
-    return axes_specs 
- 
- 
-def validate_axes(pos, axes): 
-    if len(axes) >= Options.buffer_max_dims: 
-        error(pos, "More dimensions than the maximum number" 
-                   " of buffer dimensions were used.") 
-        return False 
- 
-    return True 
- 
- 
-def is_cf_contig(specs): 
-    is_c_contig = is_f_contig = False 
- 
-    if len(specs) == 1 and specs == [('direct', 'contig')]: 
-        is_c_contig = True 
- 
-    elif (specs[-1] == ('direct','contig') and 
-          all(axis == ('direct','follow') for axis in specs[:-1])): 
-        # c_contiguous: 'follow', 'follow', ..., 'follow', 'contig' 
-        is_c_contig = True 
- 
-    elif (len(specs) > 1 and 
-        specs[0] == ('direct','contig') and 
-        all(axis == ('direct','follow') for axis in specs[1:])): 
-        # f_contiguous: 'contig', 'follow', 'follow', ..., 'follow' 
-        is_f_contig = True 
- 
-    return is_c_contig, is_f_contig 
- 
- 
-def get_mode(specs): 
-    is_c_contig, is_f_contig = is_cf_contig(specs) 
- 
-    if is_c_contig: 
-        return 'c' 
-    elif is_f_contig: 
-        return 'fortran' 
- 
-    for access, packing in specs: 
-        if access in ('ptr', 'full'): 
-            return 'full' 
- 
-    return 'strided' 
- 
-view_constant_to_access_packing = { 
-    'generic':              ('full',   'strided'), 
-    'strided':              ('direct', 'strided'), 
-    'indirect':             ('ptr',    'strided'), 
-    'generic_contiguous':   ('full',   'contig'), 
-    'contiguous':           ('direct', 'contig'), 
-    'indirect_contiguous':  ('ptr',    'contig'), 
-} 
- 
-def validate_axes_specs(positions, specs, is_c_contig, is_f_contig): 
- 
-    packing_specs = ('contig', 'strided', 'follow') 
-    access_specs = ('direct', 'ptr', 'full') 
- 
-    # is_c_contig, is_f_contig = is_cf_contig(specs) 
- 
-    has_contig = has_follow = has_strided = has_generic_contig = False 
- 
-    last_indirect_dimension = -1 
-    for idx, (access, packing) in enumerate(specs): 
-        if access == 'ptr': 
-            last_indirect_dimension = idx 
- 
+
+        else:
+            raise CompileError(axis.step.pos, INVALID_ERR)
+
+    # First, find out if we have a ::1 somewhere
+    contig_dim = 0
+    is_contig = False
+    for idx, (access, packing) in enumerate(axes_specs):
+        if packing == 'cfcontig':
+            if is_contig:
+                raise CompileError(axis.step.pos, BOTH_CF_ERR)
+
+            contig_dim = idx
+            axes_specs[idx] = (access, 'contig')
+            is_contig = True
+
+    if is_contig:
+        # We have a ::1 somewhere, see if we're C or Fortran contiguous
+        if contig_dim == len(axes) - 1:
+            is_c_contig = True
+        else:
+            is_f_contig = True
+
+            if contig_dim and not axes_specs[contig_dim - 1][0] in ('full', 'ptr'):
+                raise CompileError(axes[contig_dim].pos,
+                                   "Fortran contiguous specifier must follow an indirect dimension")
+
+        if is_c_contig:
+            # Contiguous in the last dimension, find the last indirect dimension
+            contig_dim = -1
+            for idx, (access, packing) in enumerate(reversed(axes_specs)):
+                if access in ('ptr', 'full'):
+                    contig_dim = len(axes) - idx - 1
+
+        # Replace 'strided' with 'follow' for any dimension following the last
+        # indirect dimension, the first dimension or the dimension following
+        # the ::1.
+        #               int[::indirect, ::1, :, :]
+        #                                    ^  ^
+        #               int[::indirect, :, :, ::1]
+        #                               ^  ^
+        start = contig_dim + 1
+        stop = len(axes) - is_c_contig
+        for idx, (access, packing) in enumerate(axes_specs[start:stop]):
+            idx = contig_dim + 1 + idx
+            if access != 'direct':
+                raise CompileError(axes[idx].pos,
+                                   "Indirect dimension may not follow "
+                                   "Fortran contiguous dimension")
+            if packing == 'contig':
+                raise CompileError(axes[idx].pos,
+                                   "Dimension may not be contiguous")
+            axes_specs[idx] = (access, cf_packing)
+
+        if is_c_contig:
+            # For C contiguity, we need to fix the 'contig' dimension
+            # after the loop
+            a, p = axes_specs[-1]
+            axes_specs[-1] = a, 'contig'
+
+    validate_axes_specs([axis.start.pos for axis in axes],
+                        axes_specs,
+                        is_c_contig,
+                        is_f_contig)
+
+    return axes_specs
+
+
+def validate_axes(pos, axes):
+    if len(axes) >= Options.buffer_max_dims:
+        error(pos, "More dimensions than the maximum number"
+                   " of buffer dimensions were used.")
+        return False
+
+    return True
+
+
+def is_cf_contig(specs):
+    is_c_contig = is_f_contig = False
+
+    if len(specs) == 1 and specs == [('direct', 'contig')]:
+        is_c_contig = True
+
+    elif (specs[-1] == ('direct','contig') and
+          all(axis == ('direct','follow') for axis in specs[:-1])):
+        # c_contiguous: 'follow', 'follow', ..., 'follow', 'contig'
+        is_c_contig = True
+
+    elif (len(specs) > 1 and
+        specs[0] == ('direct','contig') and
+        all(axis == ('direct','follow') for axis in specs[1:])):
+        # f_contiguous: 'contig', 'follow', 'follow', ..., 'follow'
+        is_f_contig = True
+
+    return is_c_contig, is_f_contig
+
+
+def get_mode(specs):
+    is_c_contig, is_f_contig = is_cf_contig(specs)
+
+    if is_c_contig:
+        return 'c'
+    elif is_f_contig:
+        return 'fortran'
+
+    for access, packing in specs:
+        if access in ('ptr', 'full'):
+            return 'full'
+
+    return 'strided'
+
+view_constant_to_access_packing = {
+    'generic':              ('full',   'strided'),
+    'strided':              ('direct', 'strided'),
+    'indirect':             ('ptr',    'strided'),
+    'generic_contiguous':   ('full',   'contig'),
+    'contiguous':           ('direct', 'contig'),
+    'indirect_contiguous':  ('ptr',    'contig'),
+}
+
+def validate_axes_specs(positions, specs, is_c_contig, is_f_contig):
+
+    packing_specs = ('contig', 'strided', 'follow')
+    access_specs = ('direct', 'ptr', 'full')
+
+    # is_c_contig, is_f_contig = is_cf_contig(specs)
+
+    has_contig = has_follow = has_strided = has_generic_contig = False
+
+    last_indirect_dimension = -1
+    for idx, (access, packing) in enumerate(specs):
+        if access == 'ptr':
+            last_indirect_dimension = idx
+
     for idx, (pos, (access, packing)) in enumerate(zip(positions, specs)):
- 
-        if not (access in access_specs and 
-                packing in packing_specs): 
-            raise CompileError(pos, "Invalid axes specification.") 
- 
-        if packing == 'strided': 
-            has_strided = True 
-        elif packing == 'contig': 
-            if has_contig: 
-                raise CompileError(pos, "Only one direct contiguous " 
-                                        "axis may be specified.") 
- 
-            valid_contig_dims = last_indirect_dimension + 1, len(specs) - 1 
-            if idx not in valid_contig_dims and access != 'ptr': 
-                if last_indirect_dimension + 1 != len(specs) - 1: 
-                    dims = "dimensions %d and %d" % valid_contig_dims 
-                else: 
-                    dims = "dimension %d" % valid_contig_dims[0] 
- 
-                raise CompileError(pos, "Only %s may be contiguous and direct" % dims) 
- 
-            has_contig = access != 'ptr' 
-        elif packing == 'follow': 
-            if has_strided: 
-                raise CompileError(pos, "A memoryview cannot have both follow and strided axis specifiers.") 
-            if not (is_c_contig or is_f_contig): 
-                raise CompileError(pos, "Invalid use of the follow specifier.") 
- 
-        if access in ('ptr', 'full'): 
-            has_strided = False 
- 
-def _get_resolved_spec(env, spec): 
-    # spec must be a NameNode or an AttributeNode 
-    if isinstance(spec, NameNode): 
-        return _resolve_NameNode(env, spec) 
-    elif isinstance(spec, AttributeNode): 
-        return _resolve_AttributeNode(env, spec) 
-    else: 
-        raise CompileError(spec.pos, INVALID_ERR) 
- 
-def _resolve_NameNode(env, node): 
-    try: 
-        resolved_name = env.lookup(node.name).name 
-    except AttributeError: 
-        raise CompileError(node.pos, INVALID_ERR) 
- 
-    viewscope = env.global_scope().context.cython_scope.viewscope 
-    entry = viewscope.lookup(resolved_name) 
-    if entry is None: 
-        raise CompileError(node.pos, NOT_CIMPORTED_ERR) 
- 
-    return entry 
- 
-def _resolve_AttributeNode(env, node): 
-    path = [] 
-    while isinstance(node, AttributeNode): 
-        path.insert(0, node.attribute) 
-        node = node.obj 
-    if isinstance(node, NameNode): 
-        path.insert(0, node.name) 
-    else: 
-        raise CompileError(node.pos, EXPR_ERR) 
-    modnames = path[:-1] 
-    # must be at least 1 module name, o/w not an AttributeNode. 
-    assert modnames 
- 
-    scope = env 
-    for modname in modnames: 
-        mod = scope.lookup(modname) 
-        if not mod or not mod.as_module: 
-            raise CompileError( 
-                    node.pos, "undeclared name not builtin: %s" % modname) 
-        scope = mod.as_module 
- 
-    entry = scope.lookup(path[-1]) 
-    if not entry: 
-        raise CompileError(node.pos, "No such attribute '%s'" % path[-1]) 
- 
-    return entry 
- 
-# 
-### Utility loading 
-# 
- 
-def load_memview_cy_utility(util_code_name, context=None, **kwargs): 
-    return CythonUtilityCode.load(util_code_name, "MemoryView.pyx", 
-                                  context=context, **kwargs) 
- 
-def load_memview_c_utility(util_code_name, context=None, **kwargs): 
-    if context is None: 
-        return UtilityCode.load(util_code_name, "MemoryView_C.c", **kwargs) 
-    else: 
-        return TempitaUtilityCode.load(util_code_name, "MemoryView_C.c", 
-                                       context=context, **kwargs) 
- 
-def use_cython_array_utility_code(env): 
-    cython_scope = env.global_scope().context.cython_scope 
-    cython_scope.load_cythonscope() 
-    cython_scope.viewscope.lookup('array_cwrapper').used = True 
- 
-context = { 
-    'memview_struct_name': memview_objstruct_cname, 
-    'max_dims': Options.buffer_max_dims, 
-    'memviewslice_name': memviewslice_cname, 
-    'memslice_init': memslice_entry_init, 
-} 
-memviewslice_declare_code = load_memview_c_utility( 
-        "MemviewSliceStruct", 
-        context=context, 
-        requires=[]) 
- 
+
+        if not (access in access_specs and
+                packing in packing_specs):
+            raise CompileError(pos, "Invalid axes specification.")
+
+        if packing == 'strided':
+            has_strided = True
+        elif packing == 'contig':
+            if has_contig:
+                raise CompileError(pos, "Only one direct contiguous "
+                                        "axis may be specified.")
+
+            valid_contig_dims = last_indirect_dimension + 1, len(specs) - 1
+            if idx not in valid_contig_dims and access != 'ptr':
+                if last_indirect_dimension + 1 != len(specs) - 1:
+                    dims = "dimensions %d and %d" % valid_contig_dims
+                else:
+                    dims = "dimension %d" % valid_contig_dims[0]
+
+                raise CompileError(pos, "Only %s may be contiguous and direct" % dims)
+
+            has_contig = access != 'ptr'
+        elif packing == 'follow':
+            if has_strided:
+                raise CompileError(pos, "A memoryview cannot have both follow and strided axis specifiers.")
+            if not (is_c_contig or is_f_contig):
+                raise CompileError(pos, "Invalid use of the follow specifier.")
+
+        if access in ('ptr', 'full'):
+            has_strided = False
+
+def _get_resolved_spec(env, spec):
+    # spec must be a NameNode or an AttributeNode
+    if isinstance(spec, NameNode):
+        return _resolve_NameNode(env, spec)
+    elif isinstance(spec, AttributeNode):
+        return _resolve_AttributeNode(env, spec)
+    else:
+        raise CompileError(spec.pos, INVALID_ERR)
+
+def _resolve_NameNode(env, node):
+    try:
+        resolved_name = env.lookup(node.name).name
+    except AttributeError:
+        raise CompileError(node.pos, INVALID_ERR)
+
+    viewscope = env.global_scope().context.cython_scope.viewscope
+    entry = viewscope.lookup(resolved_name)
+    if entry is None:
+        raise CompileError(node.pos, NOT_CIMPORTED_ERR)
+
+    return entry
+
+def _resolve_AttributeNode(env, node):
+    path = []
+    while isinstance(node, AttributeNode):
+        path.insert(0, node.attribute)
+        node = node.obj
+    if isinstance(node, NameNode):
+        path.insert(0, node.name)
+    else:
+        raise CompileError(node.pos, EXPR_ERR)
+    modnames = path[:-1]
+    # must be at least 1 module name, o/w not an AttributeNode.
+    assert modnames
+
+    scope = env
+    for modname in modnames:
+        mod = scope.lookup(modname)
+        if not mod or not mod.as_module:
+            raise CompileError(
+                    node.pos, "undeclared name not builtin: %s" % modname)
+        scope = mod.as_module
+
+    entry = scope.lookup(path[-1])
+    if not entry:
+        raise CompileError(node.pos, "No such attribute '%s'" % path[-1])
+
+    return entry
+
+#
+### Utility loading
+#
+
+def load_memview_cy_utility(util_code_name, context=None, **kwargs):
+    return CythonUtilityCode.load(util_code_name, "MemoryView.pyx",
+                                  context=context, **kwargs)
+
+def load_memview_c_utility(util_code_name, context=None, **kwargs):
+    if context is None:
+        return UtilityCode.load(util_code_name, "MemoryView_C.c", **kwargs)
+    else:
+        return TempitaUtilityCode.load(util_code_name, "MemoryView_C.c",
+                                       context=context, **kwargs)
+
+def use_cython_array_utility_code(env):
+    cython_scope = env.global_scope().context.cython_scope
+    cython_scope.load_cythonscope()
+    cython_scope.viewscope.lookup('array_cwrapper').used = True
+
+context = {
+    'memview_struct_name': memview_objstruct_cname,
+    'max_dims': Options.buffer_max_dims,
+    'memviewslice_name': memviewslice_cname,
+    'memslice_init': memslice_entry_init,
+}
+memviewslice_declare_code = load_memview_c_utility(
+        "MemviewSliceStruct",
+        context=context,
+        requires=[])
+
 atomic_utility = load_memview_c_utility("Atomics", context)
- 
-memviewslice_init_code = load_memview_c_utility( 
-    "MemviewSliceInit", 
-    context=dict(context, BUF_MAX_NDIMS=Options.buffer_max_dims), 
-    requires=[memviewslice_declare_code, 
-              atomic_utility], 
-) 
- 
-memviewslice_index_helpers = load_memview_c_utility("MemviewSliceIndex") 
- 
-typeinfo_to_format_code = load_memview_cy_utility( 
-        "BufferFormatFromTypeInfo", requires=[Buffer._typeinfo_to_format_code]) 
- 
-is_contig_utility = load_memview_c_utility("MemviewSliceIsContig", context) 
-overlapping_utility = load_memview_c_utility("OverlappingSlices", context) 
-copy_contents_new_utility = load_memview_c_utility( 
-    "MemviewSliceCopyTemplate", 
-    context, 
-    requires=[], # require cython_array_utility_code 
-) 
- 
-view_utility_code = load_memview_cy_utility( 
-        "View.MemoryView", 
-        context=context, 
-        requires=[Buffer.GetAndReleaseBufferUtilityCode(), 
-                  Buffer.buffer_struct_declare_code, 
+
+memviewslice_init_code = load_memview_c_utility(
+    "MemviewSliceInit",
+    context=dict(context, BUF_MAX_NDIMS=Options.buffer_max_dims),
+    requires=[memviewslice_declare_code,
+              atomic_utility],
+)
+
+memviewslice_index_helpers = load_memview_c_utility("MemviewSliceIndex")
+
+typeinfo_to_format_code = load_memview_cy_utility(
+        "BufferFormatFromTypeInfo", requires=[Buffer._typeinfo_to_format_code])
+
+is_contig_utility = load_memview_c_utility("MemviewSliceIsContig", context)
+overlapping_utility = load_memview_c_utility("OverlappingSlices", context)
+copy_contents_new_utility = load_memview_c_utility(
+    "MemviewSliceCopyTemplate",
+    context,
+    requires=[], # require cython_array_utility_code
+)
+
+view_utility_code = load_memview_cy_utility(
+        "View.MemoryView",
+        context=context,
+        requires=[Buffer.GetAndReleaseBufferUtilityCode(),
+                  Buffer.buffer_struct_declare_code,
                   Buffer.buffer_formats_declare_code,
-                  memviewslice_init_code, 
-                  is_contig_utility, 
-                  overlapping_utility, 
-                  copy_contents_new_utility, 
-                  ModuleNode.capsule_utility_code], 
-) 
-view_utility_whitelist = ('array', 'memoryview', 'array_cwrapper', 
-                          'generic', 'strided', 'indirect', 'contiguous', 
-                          'indirect_contiguous') 
- 
-memviewslice_declare_code.requires.append(view_utility_code) 
-copy_contents_new_utility.requires.append(view_utility_code) 
+                  memviewslice_init_code,
+                  is_contig_utility,
+                  overlapping_utility,
+                  copy_contents_new_utility,
+                  ModuleNode.capsule_utility_code],
+)
+view_utility_whitelist = ('array', 'memoryview', 'array_cwrapper',
+                          'generic', 'strided', 'indirect', 'contiguous',
+                          'indirect_contiguous')
+
+memviewslice_declare_code.requires.append(view_utility_code)
+copy_contents_new_utility.requires.append(view_utility_code)
diff --git a/contrib/tools/cython/Cython/Compiler/ModuleNode.py b/contrib/tools/cython/Cython/Compiler/ModuleNode.py
index 5b69ca34af..cd7166408e 100644
--- a/contrib/tools/cython/Cython/Compiler/ModuleNode.py
+++ b/contrib/tools/cython/Cython/Compiler/ModuleNode.py
@@ -1,48 +1,48 @@
-# 
-#   Module parse tree node 
-# 
- 
-from __future__ import absolute_import 
- 
-import cython 
-cython.declare(Naming=object, Options=object, PyrexTypes=object, TypeSlots=object, 
-               error=object, warning=object, py_object_type=object, UtilityCode=object, 
+#
+#   Module parse tree node
+#
+
+from __future__ import absolute_import
+
+import cython
+cython.declare(Naming=object, Options=object, PyrexTypes=object, TypeSlots=object,
+               error=object, warning=object, py_object_type=object, UtilityCode=object,
                EncodedString=object, re=object)
- 
+
 from collections import defaultdict
 import json
 import operator
-import os 
+import os
 import re
 
-from .PyrexTypes import CPtrType 
-from . import Future 
-from . import Annotate 
-from . import Code 
-from . import Naming 
-from . import Nodes 
-from . import Options 
-from . import TypeSlots 
-from . import PyrexTypes 
+from .PyrexTypes import CPtrType
+from . import Future
+from . import Annotate
+from . import Code
+from . import Naming
+from . import Nodes
+from . import Options
+from . import TypeSlots
+from . import PyrexTypes
 from . import Pythran
- 
-from .Errors import error, warning 
-from .PyrexTypes import py_object_type 
+
+from .Errors import error, warning
+from .PyrexTypes import py_object_type
 from ..Utils import open_new_file, replace_suffix, decode_filename, build_hex_version
 from .Code import UtilityCode, IncludeCode
-from .StringEncoding import EncodedString 
+from .StringEncoding import EncodedString
 from .Pythran import has_np_pythran
- 
-def check_c_declarations_pxd(module_node): 
-    module_node.scope.check_c_classes_pxd() 
-    return module_node 
- 
-
-def check_c_declarations(module_node): 
-    module_node.scope.check_c_classes() 
-    module_node.scope.check_c_functions() 
-    return module_node 
- 
+
+def check_c_declarations_pxd(module_node):
+    module_node.scope.check_c_classes_pxd()
+    return module_node
+
+
+def check_c_declarations(module_node):
+    module_node.scope.check_c_classes()
+    module_node.scope.check_c_functions()
+    return module_node
+
 
 def generate_c_code_config(env, options):
     if Options.annotate or options.annotate:
@@ -56,73 +56,73 @@ def generate_c_code_config(env, options):
         c_line_in_traceback=options.c_line_in_traceback)
 
 
-class ModuleNode(Nodes.Node, Nodes.BlockNode): 
-    #  doc       string or None 
-    #  body      StatListNode 
-    # 
-    #  referenced_modules   [ModuleScope] 
-    #  full_module_name     string 
-    # 
-    #  scope                The module scope. 
-    #  compilation_source   A CompilationSource (see Main) 
-    #  directives           Top-level compiler directives 
- 
-    child_attrs = ["body"] 
-    directives = None 
- 
-    def merge_in(self, tree, scope, merge_scope=False): 
-        # Merges in the contents of another tree, and possibly scope. With the 
-        # current implementation below, this must be done right prior 
-        # to code generation. 
-        # 
-        # Note: This way of doing it seems strange -- I believe the 
-        # right concept is to split ModuleNode into a ModuleNode and a 
-        # CodeGenerator, and tell that CodeGenerator to generate code 
-        # from multiple sources. 
-        assert isinstance(self.body, Nodes.StatListNode) 
-        if isinstance(tree, Nodes.StatListNode): 
-            self.body.stats.extend(tree.stats) 
-        else: 
-            self.body.stats.append(tree) 
- 
-        self.scope.utility_code_list.extend(scope.utility_code_list) 
- 
+class ModuleNode(Nodes.Node, Nodes.BlockNode):
+    #  doc       string or None
+    #  body      StatListNode
+    #
+    #  referenced_modules   [ModuleScope]
+    #  full_module_name     string
+    #
+    #  scope                The module scope.
+    #  compilation_source   A CompilationSource (see Main)
+    #  directives           Top-level compiler directives
+
+    child_attrs = ["body"]
+    directives = None
+
+    def merge_in(self, tree, scope, merge_scope=False):
+        # Merges in the contents of another tree, and possibly scope. With the
+        # current implementation below, this must be done right prior
+        # to code generation.
+        #
+        # Note: This way of doing it seems strange -- I believe the
+        # right concept is to split ModuleNode into a ModuleNode and a
+        # CodeGenerator, and tell that CodeGenerator to generate code
+        # from multiple sources.
+        assert isinstance(self.body, Nodes.StatListNode)
+        if isinstance(tree, Nodes.StatListNode):
+            self.body.stats.extend(tree.stats)
+        else:
+            self.body.stats.append(tree)
+
+        self.scope.utility_code_list.extend(scope.utility_code_list)
+
         for inc in scope.c_includes.values():
             self.scope.process_include(inc)
 
-        def extend_if_not_in(L1, L2): 
-            for x in L2: 
-                if x not in L1: 
-                    L1.append(x) 
- 
-        extend_if_not_in(self.scope.included_files, scope.included_files) 
- 
-        if merge_scope: 
-            # Ensure that we don't generate import code for these entries! 
-            for entry in scope.c_class_entries: 
-                entry.type.module_name = self.full_module_name 
-                entry.type.scope.directives["internal"] = True 
- 
-            self.scope.merge_in(scope) 
- 
-    def analyse_declarations(self, env): 
+        def extend_if_not_in(L1, L2):
+            for x in L2:
+                if x not in L1:
+                    L1.append(x)
+
+        extend_if_not_in(self.scope.included_files, scope.included_files)
+
+        if merge_scope:
+            # Ensure that we don't generate import code for these entries!
+            for entry in scope.c_class_entries:
+                entry.type.module_name = self.full_module_name
+                entry.type.scope.directives["internal"] = True
+
+            self.scope.merge_in(scope)
+
+    def analyse_declarations(self, env):
         if has_np_pythran(env):
             Pythran.include_pythran_generic(env)
         if self.directives:
             env.old_style_globals = self.directives['old_style_globals']
-        if not Options.docstrings: 
-            env.doc = self.doc = None 
-        elif Options.embed_pos_in_docstring: 
-            env.doc = EncodedString(u'File: %s (starting at line %s)' % Nodes.relative_position(self.pos)) 
+        if not Options.docstrings:
+            env.doc = self.doc = None
+        elif Options.embed_pos_in_docstring:
+            env.doc = EncodedString(u'File: %s (starting at line %s)' % Nodes.relative_position(self.pos))
             if self.doc is not None:
-                env.doc = EncodedString(env.doc + u'\n' + self.doc) 
-                env.doc.encoding = self.doc.encoding 
-        else: 
-            env.doc = self.doc 
-        env.directives = self.directives 
-
-        self.body.analyse_declarations(env) 
- 
+                env.doc = EncodedString(env.doc + u'\n' + self.doc)
+                env.doc.encoding = self.doc.encoding
+        else:
+            env.doc = self.doc
+        env.directives = self.directives
+
+        self.body.analyse_declarations(env)
+
     def prepare_utility_code(self):
         # prepare any utility code that must be created before code generation
         # specifically: CythonUtilityCode
@@ -134,217 +134,217 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 and entry.is_type and entry.type.is_enum):
                     entry.type.create_type_wrapper(env)
 
-    def process_implementation(self, options, result): 
-        env = self.scope 
-        env.return_type = PyrexTypes.c_void_type 
-        self.referenced_modules = [] 
-        self.find_referenced_modules(env, self.referenced_modules, {}) 
-        self.sort_cdef_classes(env) 
-        self.generate_c_code(env, options, result) 
-        self.generate_h_code(env, options, result) 
+    def process_implementation(self, options, result):
+        env = self.scope
+        env.return_type = PyrexTypes.c_void_type
+        self.referenced_modules = []
+        self.find_referenced_modules(env, self.referenced_modules, {})
+        self.sort_cdef_classes(env)
+        self.generate_c_code(env, options, result)
+        self.generate_h_code(env, options, result)
         self.generate_api_code(env, options, result)
- 
-    def has_imported_c_functions(self): 
-        for module in self.referenced_modules: 
-            for entry in module.cfunc_entries: 
-                if entry.defined_in_pxd: 
-                    return 1 
-        return 0 
- 
-    def generate_h_code(self, env, options, result): 
-        def h_entries(entries, api=0, pxd=0): 
-            return [entry for entry in entries 
-                    if ((entry.visibility == 'public') or 
-                        (api and entry.api) or 
-                        (pxd and entry.defined_in_pxd))] 
-        h_types = h_entries(env.type_entries, api=1) 
-        h_vars = h_entries(env.var_entries) 
-        h_funcs = h_entries(env.cfunc_entries) 
-        h_extension_types = h_entries(env.c_class_entries) 
+
+    def has_imported_c_functions(self):
+        for module in self.referenced_modules:
+            for entry in module.cfunc_entries:
+                if entry.defined_in_pxd:
+                    return 1
+        return 0
+
+    def generate_h_code(self, env, options, result):
+        def h_entries(entries, api=0, pxd=0):
+            return [entry for entry in entries
+                    if ((entry.visibility == 'public') or
+                        (api and entry.api) or
+                        (pxd and entry.defined_in_pxd))]
+        h_types = h_entries(env.type_entries, api=1)
+        h_vars = h_entries(env.var_entries)
+        h_funcs = h_entries(env.cfunc_entries)
+        h_extension_types = h_entries(env.c_class_entries)
         if h_types or  h_vars or h_funcs or h_extension_types:
-            result.h_file = replace_suffix(result.c_file, ".h") 
-            h_code = Code.CCodeWriter() 
+            result.h_file = replace_suffix(result.c_file, ".h")
+            h_code = Code.CCodeWriter()
             c_code_config = generate_c_code_config(env, options)
             Code.GlobalState(h_code, self, c_code_config)
-            if options.generate_pxi: 
-                result.i_file = replace_suffix(result.c_file, ".pxi") 
-                i_code = Code.PyrexCodeWriter(result.i_file) 
-            else: 
-                i_code = None 
- 
+            if options.generate_pxi:
+                result.i_file = replace_suffix(result.c_file, ".pxi")
+                i_code = Code.PyrexCodeWriter(result.i_file)
+            else:
+                i_code = None
+
             h_code.put_generated_by()
-            h_guard = Naming.h_guard_prefix + self.api_name(env) 
-            h_code.put_h_guard(h_guard) 
-            h_code.putln("") 
+            h_guard = Naming.h_guard_prefix + self.api_name(env)
+            h_code.put_h_guard(h_guard)
+            h_code.putln("")
             h_code.putln('#include "Python.h"')
-            self.generate_type_header_code(h_types, h_code) 
-            if options.capi_reexport_cincludes: 
-                self.generate_includes(env, [], h_code) 
-            h_code.putln("") 
-            api_guard = Naming.api_guard_prefix + self.api_name(env) 
-            h_code.putln("#ifndef %s" % api_guard) 
-            h_code.putln("") 
-            self.generate_extern_c_macro_definition(h_code) 
+            self.generate_type_header_code(h_types, h_code)
+            if options.capi_reexport_cincludes:
+                self.generate_includes(env, [], h_code)
+            h_code.putln("")
+            api_guard = Naming.api_guard_prefix + self.api_name(env)
+            h_code.putln("#ifndef %s" % api_guard)
+            h_code.putln("")
+            self.generate_extern_c_macro_definition(h_code)
             h_code.putln("")
             self.generate_dl_import_macro(h_code)
-            if h_extension_types: 
-                h_code.putln("") 
-                for entry in h_extension_types: 
-                    self.generate_cclass_header_code(entry.type, h_code) 
-                    if i_code: 
-                        self.generate_cclass_include_code(entry.type, i_code) 
-            if h_funcs: 
-                h_code.putln("") 
-                for entry in h_funcs: 
-                    self.generate_public_declaration(entry, h_code, i_code) 
-            if h_vars: 
-                h_code.putln("") 
-                for entry in h_vars: 
-                    self.generate_public_declaration(entry, h_code, i_code) 
-            h_code.putln("") 
-            h_code.putln("#endif /* !%s */" % api_guard) 
-            h_code.putln("") 
+            if h_extension_types:
+                h_code.putln("")
+                for entry in h_extension_types:
+                    self.generate_cclass_header_code(entry.type, h_code)
+                    if i_code:
+                        self.generate_cclass_include_code(entry.type, i_code)
+            if h_funcs:
+                h_code.putln("")
+                for entry in h_funcs:
+                    self.generate_public_declaration(entry, h_code, i_code)
+            if h_vars:
+                h_code.putln("")
+                for entry in h_vars:
+                    self.generate_public_declaration(entry, h_code, i_code)
+            h_code.putln("")
+            h_code.putln("#endif /* !%s */" % api_guard)
+            h_code.putln("")
             h_code.putln("/* WARNING: the interface of the module init function changed in CPython 3.5. */")
             h_code.putln("/* It now returns a PyModuleDef instance instead of a PyModule instance. */")
             h_code.putln("")
-            h_code.putln("#if PY_MAJOR_VERSION < 3") 
+            h_code.putln("#if PY_MAJOR_VERSION < 3")
             init_name = 'init' + (options.init_suffix or env.module_name)
             h_code.putln("PyMODINIT_FUNC %s(void);" % init_name)
-            h_code.putln("#else") 
+            h_code.putln("#else")
             h_code.putln("PyMODINIT_FUNC %s(void);" % self.mod_init_func_cname('PyInit', env, options))
-            h_code.putln("#endif") 
-            h_code.putln("") 
-            h_code.putln("#endif /* !%s */" % h_guard) 
- 
-            f = open_new_file(result.h_file) 
-            try: 
-                h_code.copyto(f) 
-            finally: 
-                f.close() 
- 
-    def generate_public_declaration(self, entry, h_code, i_code): 
-        h_code.putln("%s %s;" % ( 
-            Naming.extern_c_macro, 
+            h_code.putln("#endif")
+            h_code.putln("")
+            h_code.putln("#endif /* !%s */" % h_guard)
+
+            f = open_new_file(result.h_file)
+            try:
+                h_code.copyto(f)
+            finally:
+                f.close()
+
+    def generate_public_declaration(self, entry, h_code, i_code):
+        h_code.putln("%s %s;" % (
+            Naming.extern_c_macro,
             entry.type.declaration_code(entry.cname)))
-        if i_code: 
+        if i_code:
             i_code.putln("cdef extern %s" % (
                 entry.type.declaration_code(entry.cname, pyrex=1)))
- 
-    def api_name(self, env): 
-        return env.qualified_name.replace(".", "__") 
- 
+
+    def api_name(self, env):
+        return env.qualified_name.replace(".", "__")
+
     def generate_api_code(self, env, options, result):
-        def api_entries(entries, pxd=0): 
-            return [entry for entry in entries 
-                    if entry.api or (pxd and entry.defined_in_pxd)] 
-        api_vars = api_entries(env.var_entries) 
-        api_funcs = api_entries(env.cfunc_entries) 
-        api_extension_types = api_entries(env.c_class_entries) 
-        if api_vars or api_funcs or api_extension_types: 
-            result.api_file = replace_suffix(result.c_file, "_api.h") 
-            h_code = Code.CCodeWriter() 
+        def api_entries(entries, pxd=0):
+            return [entry for entry in entries
+                    if entry.api or (pxd and entry.defined_in_pxd)]
+        api_vars = api_entries(env.var_entries)
+        api_funcs = api_entries(env.cfunc_entries)
+        api_extension_types = api_entries(env.c_class_entries)
+        if api_vars or api_funcs or api_extension_types:
+            result.api_file = replace_suffix(result.c_file, "_api.h")
+            h_code = Code.CCodeWriter()
             c_code_config = generate_c_code_config(env, options)
             Code.GlobalState(h_code, self, c_code_config)
             h_code.put_generated_by()
-            api_guard = Naming.api_guard_prefix + self.api_name(env) 
-            h_code.put_h_guard(api_guard) 
+            api_guard = Naming.api_guard_prefix + self.api_name(env)
+            h_code.put_h_guard(api_guard)
             # Work around https://bugs.python.org/issue4709
             h_code.putln('#ifdef __MINGW64__')
             h_code.putln('#define MS_WIN64')
             h_code.putln('#endif')
 
-            h_code.putln('#include "Python.h"') 
-            if result.h_file: 
-                h_code.putln('#include "%s"' % os.path.basename(result.h_file)) 
-            if api_extension_types: 
-                h_code.putln("") 
-                for entry in api_extension_types: 
-                    type = entry.type 
-                    h_code.putln("static PyTypeObject *%s = 0;" % type.typeptr_cname) 
-                    h_code.putln("#define %s (*%s)" % ( 
-                        type.typeobj_cname, type.typeptr_cname)) 
-            if api_funcs: 
-                h_code.putln("") 
-                for entry in api_funcs: 
-                    type = CPtrType(entry.type) 
+            h_code.putln('#include "Python.h"')
+            if result.h_file:
+                h_code.putln('#include "%s"' % os.path.basename(result.h_file))
+            if api_extension_types:
+                h_code.putln("")
+                for entry in api_extension_types:
+                    type = entry.type
+                    h_code.putln("static PyTypeObject *%s = 0;" % type.typeptr_cname)
+                    h_code.putln("#define %s (*%s)" % (
+                        type.typeobj_cname, type.typeptr_cname))
+            if api_funcs:
+                h_code.putln("")
+                for entry in api_funcs:
+                    type = CPtrType(entry.type)
                     cname = env.mangle(Naming.func_prefix_api, entry.name)
-                    h_code.putln("static %s = 0;" % type.declaration_code(cname)) 
-                    h_code.putln("#define %s %s" % (entry.name, cname)) 
-            if api_vars: 
-                h_code.putln("") 
-                for entry in api_vars: 
-                    type = CPtrType(entry.type) 
+                    h_code.putln("static %s = 0;" % type.declaration_code(cname))
+                    h_code.putln("#define %s %s" % (entry.name, cname))
+            if api_vars:
+                h_code.putln("")
+                for entry in api_vars:
+                    type = CPtrType(entry.type)
                     cname = env.mangle(Naming.varptr_prefix_api, entry.name)
-                    h_code.putln("static %s = 0;" %  type.declaration_code(cname)) 
-                    h_code.putln("#define %s (*%s)" % (entry.name, cname)) 
-            h_code.put(UtilityCode.load_as_string("PyIdentifierFromString", "ImportExport.c")[0]) 
-            if api_vars: 
-                h_code.put(UtilityCode.load_as_string("VoidPtrImport", "ImportExport.c")[1]) 
-            if api_funcs: 
-                h_code.put(UtilityCode.load_as_string("FunctionImport", "ImportExport.c")[1]) 
-            if api_extension_types: 
+                    h_code.putln("static %s = 0;" %  type.declaration_code(cname))
+                    h_code.putln("#define %s (*%s)" % (entry.name, cname))
+            h_code.put(UtilityCode.load_as_string("PyIdentifierFromString", "ImportExport.c")[0])
+            if api_vars:
+                h_code.put(UtilityCode.load_as_string("VoidPtrImport", "ImportExport.c")[1])
+            if api_funcs:
+                h_code.put(UtilityCode.load_as_string("FunctionImport", "ImportExport.c")[1])
+            if api_extension_types:
                 h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[0])
-                h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[1]) 
-            h_code.putln("") 
-            h_code.putln("static int import_%s(void) {" % self.api_name(env)) 
-            h_code.putln("PyObject *module = 0;") 
+                h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[1])
+            h_code.putln("")
+            h_code.putln("static int import_%s(void) {" % self.api_name(env))
+            h_code.putln("PyObject *module = 0;")
             h_code.putln('module = PyImport_ImportModule("%s");' % env.qualified_name)
-            h_code.putln("if (!module) goto bad;") 
-            for entry in api_funcs: 
+            h_code.putln("if (!module) goto bad;")
+            for entry in api_funcs:
                 cname = env.mangle(Naming.func_prefix_api, entry.name)
-                sig = entry.type.signature_string() 
-                h_code.putln( 
-                    'if (__Pyx_ImportFunction(module, "%s", (void (**)(void))&%s, "%s") < 0) goto bad;' 
-                    % (entry.name, cname, sig)) 
-            for entry in api_vars: 
+                sig = entry.type.signature_string()
+                h_code.putln(
+                    'if (__Pyx_ImportFunction(module, "%s", (void (**)(void))&%s, "%s") < 0) goto bad;'
+                    % (entry.name, cname, sig))
+            for entry in api_vars:
                 cname = env.mangle(Naming.varptr_prefix_api, entry.name)
                 sig = entry.type.empty_declaration_code()
-                h_code.putln( 
-                    'if (__Pyx_ImportVoidPtr(module, "%s", (void **)&%s, "%s") < 0) goto bad;' 
-                    % (entry.name, cname, sig)) 
+                h_code.putln(
+                    'if (__Pyx_ImportVoidPtr(module, "%s", (void **)&%s, "%s") < 0) goto bad;'
+                    % (entry.name, cname, sig))
             with ModuleImportGenerator(h_code, imported_modules={env.qualified_name: 'module'}) as import_generator:
                 for entry in api_extension_types:
                     self.generate_type_import_call(entry.type, h_code, import_generator, error_code="goto bad;")
-            h_code.putln("Py_DECREF(module); module = 0;") 
-            h_code.putln("return 0;") 
-            h_code.putln("bad:") 
-            h_code.putln("Py_XDECREF(module);") 
-            h_code.putln("return -1;") 
-            h_code.putln("}") 
-            h_code.putln("") 
-            h_code.putln("#endif /* !%s */" % api_guard) 
- 
-            f = open_new_file(result.api_file) 
-            try: 
-                h_code.copyto(f) 
-            finally: 
-                f.close() 
- 
-    def generate_cclass_header_code(self, type, h_code): 
-        h_code.putln("%s %s %s;" % ( 
-            Naming.extern_c_macro, 
-            PyrexTypes.public_decl("PyTypeObject", "DL_IMPORT"), 
-            type.typeobj_cname)) 
- 
-    def generate_cclass_include_code(self, type, i_code): 
-        i_code.putln("cdef extern class %s.%s:" % ( 
-            type.module_name, type.name)) 
-        i_code.indent() 
-        var_entries = type.scope.var_entries 
-        if var_entries: 
-            for entry in var_entries: 
+            h_code.putln("Py_DECREF(module); module = 0;")
+            h_code.putln("return 0;")
+            h_code.putln("bad:")
+            h_code.putln("Py_XDECREF(module);")
+            h_code.putln("return -1;")
+            h_code.putln("}")
+            h_code.putln("")
+            h_code.putln("#endif /* !%s */" % api_guard)
+
+            f = open_new_file(result.api_file)
+            try:
+                h_code.copyto(f)
+            finally:
+                f.close()
+
+    def generate_cclass_header_code(self, type, h_code):
+        h_code.putln("%s %s %s;" % (
+            Naming.extern_c_macro,
+            PyrexTypes.public_decl("PyTypeObject", "DL_IMPORT"),
+            type.typeobj_cname))
+
+    def generate_cclass_include_code(self, type, i_code):
+        i_code.putln("cdef extern class %s.%s:" % (
+            type.module_name, type.name))
+        i_code.indent()
+        var_entries = type.scope.var_entries
+        if var_entries:
+            for entry in var_entries:
                 i_code.putln("cdef %s" % (
                     entry.type.declaration_code(entry.cname, pyrex=1)))
-        else: 
-            i_code.putln("pass") 
-        i_code.dedent() 
- 
-    def generate_c_code(self, env, options, result): 
-        modules = self.referenced_modules 
- 
-        if Options.annotate or options.annotate: 
-            rootwriter = Annotate.AnnotationCCodeWriter() 
-        else: 
+        else:
+            i_code.putln("pass")
+        i_code.dedent()
+
+    def generate_c_code(self, env, options, result):
+        modules = self.referenced_modules
+
+        if Options.annotate or options.annotate:
+            rootwriter = Annotate.AnnotationCCodeWriter()
+        else:
             rootwriter = Code.CCodeWriter()
 
         c_code_config = generate_c_code_config(env, options)
@@ -354,75 +354,75 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code_config=c_code_config,
             common_utility_include_dir=options.common_utility_include_dir,
         )
-        globalstate.initialize_main_c_code() 
-        h_code = globalstate['h_code'] 
- 
+        globalstate.initialize_main_c_code()
+        h_code = globalstate['h_code']
+
         self.generate_module_preamble(env, options, modules, result.embedded_metadata, h_code)
- 
-        globalstate.module_pos = self.pos 
-        globalstate.directives = self.directives 
- 
-        globalstate.use_utility_code(refnanny_utility_code) 
- 
-        code = globalstate['before_global_var'] 
-        code.putln('#define __Pyx_MODULE_NAME "%s"' % self.full_module_name) 
+
+        globalstate.module_pos = self.pos
+        globalstate.directives = self.directives
+
+        globalstate.use_utility_code(refnanny_utility_code)
+
+        code = globalstate['before_global_var']
+        code.putln('#define __Pyx_MODULE_NAME "%s"' % self.full_module_name)
         module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
         code.putln("extern int %s;" % module_is_main)
         code.putln("int %s = 0;" % module_is_main)
-        code.putln("") 
-        code.putln("/* Implementation of '%s' */" % env.qualified_name) 
- 
+        code.putln("")
+        code.putln("/* Implementation of '%s' */" % env.qualified_name)
+
         code = globalstate['late_includes']
         code.putln("/* Late includes */")
         self.generate_includes(env, modules, code, early=False)
 
-        code = globalstate['all_the_rest'] 
- 
-        self.generate_cached_builtins_decls(env, code) 
-        self.generate_lambda_definitions(env, code) 
-        # generate normal variable and function definitions 
-        self.generate_variable_definitions(env, code) 
+        code = globalstate['all_the_rest']
 
-        self.body.generate_function_definitions(env, code) 
+        self.generate_cached_builtins_decls(env, code)
+        self.generate_lambda_definitions(env, code)
+        # generate normal variable and function definitions
+        self.generate_variable_definitions(env, code)
 
-        code.mark_pos(None) 
-        self.generate_typeobj_definitions(env, code) 
-        self.generate_method_table(env, code) 
-        if env.has_import_star: 
-            self.generate_import_star(env, code) 
+        self.body.generate_function_definitions(env, code)
+
+        code.mark_pos(None)
+        self.generate_typeobj_definitions(env, code)
+        self.generate_method_table(env, code)
+        if env.has_import_star:
+            self.generate_import_star(env, code)
         self.generate_pymoduledef_struct(env, options, code)
- 
+
         # initialise the macro to reduce the code size of one-time functionality
         code.putln(UtilityCode.load_as_string("SmallCodeConfig", "ModuleSetupCode.c")[0].strip())
 
-        # init_globals is inserted before this 
+        # init_globals is inserted before this
         self.generate_module_init_func(modules[:-1], env, options, globalstate['init_module'])
-        self.generate_module_cleanup_func(env, globalstate['cleanup_module']) 
-        if Options.embed: 
-            self.generate_main_method(env, globalstate['main_method']) 
-        self.generate_filename_table(globalstate['filename_table']) 
- 
-        self.generate_declarations_for_modules(env, modules, globalstate) 
-        h_code.write('\n') 
- 
-        for utilcode in env.utility_code_list[:]: 
-            globalstate.use_utility_code(utilcode) 
-        globalstate.finalize_main_c_code() 
- 
-        f = open_new_file(result.c_file) 
-        try: 
-            rootwriter.copyto(f) 
-        finally: 
-            f.close() 
-        result.c_file_generated = 1 
-        if options.gdb_debug: 
-            self._serialize_lineno_map(env, rootwriter) 
-        if Options.annotate or options.annotate: 
+        self.generate_module_cleanup_func(env, globalstate['cleanup_module'])
+        if Options.embed:
+            self.generate_main_method(env, globalstate['main_method'])
+        self.generate_filename_table(globalstate['filename_table'])
+
+        self.generate_declarations_for_modules(env, modules, globalstate)
+        h_code.write('\n')
+
+        for utilcode in env.utility_code_list[:]:
+            globalstate.use_utility_code(utilcode)
+        globalstate.finalize_main_c_code()
+
+        f = open_new_file(result.c_file)
+        try:
+            rootwriter.copyto(f)
+        finally:
+            f.close()
+        result.c_file_generated = 1
+        if options.gdb_debug:
+            self._serialize_lineno_map(env, rootwriter)
+        if Options.annotate or options.annotate:
             self._generate_annotations(rootwriter, result, options)
- 
+
     def _generate_annotations(self, rootwriter, result, options):
-        self.annotate(rootwriter) 
- 
+        self.annotate(rootwriter)
+
         coverage_xml_filename = Options.annotate_coverage_xml or options.annotate_coverage_xml
         if coverage_xml_filename and os.path.exists(coverage_xml_filename):
             try:
@@ -441,190 +441,190 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         rootwriter.save_annotation(result.main_source_file, result.c_file, coverage_xml=coverage_xml)
 
-        # if we included files, additionally generate one annotation file for each 
-        if not self.scope.included_files: 
-            return 
- 
-        search_include_file = self.scope.context.search_include_directories 
-        target_dir = os.path.abspath(os.path.dirname(result.c_file)) 
-        for included_file in self.scope.included_files: 
-            target_file = os.path.abspath(os.path.join(target_dir, included_file)) 
-            target_file_dir = os.path.dirname(target_file) 
-            if not target_file_dir.startswith(target_dir): 
-                # any other directories may not be writable => avoid trying 
-                continue 
-            source_file = search_include_file(included_file, "", self.pos, include=True) 
-            if not source_file: 
-                continue 
-            if target_file_dir != target_dir and not os.path.exists(target_file_dir): 
-                try: 
-                    os.makedirs(target_file_dir) 
+        # if we included files, additionally generate one annotation file for each
+        if not self.scope.included_files:
+            return
+
+        search_include_file = self.scope.context.search_include_directories
+        target_dir = os.path.abspath(os.path.dirname(result.c_file))
+        for included_file in self.scope.included_files:
+            target_file = os.path.abspath(os.path.join(target_dir, included_file))
+            target_file_dir = os.path.dirname(target_file)
+            if not target_file_dir.startswith(target_dir):
+                # any other directories may not be writable => avoid trying
+                continue
+            source_file = search_include_file(included_file, "", self.pos, include=True)
+            if not source_file:
+                continue
+            if target_file_dir != target_dir and not os.path.exists(target_file_dir):
+                try:
+                    os.makedirs(target_file_dir)
                 except OSError as e:
-                    import errno 
-                    if e.errno != errno.EEXIST: 
-                        raise 
+                    import errno
+                    if e.errno != errno.EEXIST:
+                        raise
             rootwriter.save_annotation(source_file, target_file, coverage_xml=coverage_xml)
- 
-    def _serialize_lineno_map(self, env, ccodewriter): 
-        tb = env.context.gdb_debug_outputwriter 
-        markers = ccodewriter.buffer.allmarkers() 
- 
+
+    def _serialize_lineno_map(self, env, ccodewriter):
+        tb = env.context.gdb_debug_outputwriter
+        markers = ccodewriter.buffer.allmarkers()
+
         d = defaultdict(list)
-        for c_lineno, cython_lineno in enumerate(markers): 
-            if cython_lineno > 0: 
+        for c_lineno, cython_lineno in enumerate(markers):
+            if cython_lineno > 0:
                 d[cython_lineno].append(c_lineno + 1)
- 
-        tb.start('LineNumberMapping') 
+
+        tb.start('LineNumberMapping')
         for cython_lineno, c_linenos in sorted(d.items()):
             tb.add_entry(
                 'LineNumber',
                 c_linenos=' '.join(map(str, c_linenos)),
                 cython_lineno=str(cython_lineno),
             )
-        tb.end('LineNumberMapping') 
-        tb.serialize() 
- 
-    def find_referenced_modules(self, env, module_list, modules_seen): 
-        if env not in modules_seen: 
-            modules_seen[env] = 1 
-            for imported_module in env.cimported_modules: 
-                self.find_referenced_modules(imported_module, module_list, modules_seen) 
-            module_list.append(env) 
- 
-    def sort_types_by_inheritance(self, type_dict, type_order, getkey): 
-        # copy the types into a list moving each parent type before 
-        # its first child 
-        type_list = [] 
-        for i, key in enumerate(type_order): 
-            new_entry = type_dict[key] 
- 
-            # collect all base classes to check for children 
-            hierarchy = set() 
-            base = new_entry 
-            while base: 
-                base_type = base.type.base_type 
-                if not base_type: 
-                    break 
-                base_key = getkey(base_type) 
-                hierarchy.add(base_key) 
-                base = type_dict.get(base_key) 
-            new_entry.base_keys = hierarchy 
- 
-            # find the first (sub-)subclass and insert before that 
-            for j in range(i): 
-                entry = type_list[j] 
-                if key in entry.base_keys: 
-                    type_list.insert(j, new_entry) 
-                    break 
-            else: 
-                type_list.append(new_entry) 
-        return type_list 
- 
-    def sort_type_hierarchy(self, module_list, env): 
-        # poor developer's OrderedDict 
-        vtab_dict, vtab_dict_order = {}, [] 
-        vtabslot_dict, vtabslot_dict_order = {}, [] 
- 
-        for module in module_list: 
-            for entry in module.c_class_entries: 
-                if entry.used and not entry.in_cinclude: 
-                    type = entry.type 
-                    key = type.vtabstruct_cname 
-                    if not key: 
-                        continue 
-                    if key in vtab_dict: 
-                        # FIXME: this should *never* happen, but apparently it does 
-                        # for Cython generated utility code 
-                        from .UtilityCode import NonManglingModuleScope 
-                        assert isinstance(entry.scope, NonManglingModuleScope), str(entry.scope) 
-                        assert isinstance(vtab_dict[key].scope, NonManglingModuleScope), str(vtab_dict[key].scope) 
-                    else: 
-                        vtab_dict[key] = entry 
-                        vtab_dict_order.append(key) 
-            all_defined_here = module is env 
-            for entry in module.type_entries: 
-                if entry.used and (all_defined_here or entry.defined_in_pxd): 
-                    type = entry.type 
-                    if type.is_extension_type and not entry.in_cinclude: 
-                        type = entry.type 
-                        key = type.objstruct_cname 
-                        assert key not in vtabslot_dict, key 
-                        vtabslot_dict[key] = entry 
-                        vtabslot_dict_order.append(key) 
- 
-        def vtabstruct_cname(entry_type): 
-            return entry_type.vtabstruct_cname 
-        vtab_list = self.sort_types_by_inheritance( 
-            vtab_dict, vtab_dict_order, vtabstruct_cname) 
- 
-        def objstruct_cname(entry_type): 
-            return entry_type.objstruct_cname 
-        vtabslot_list = self.sort_types_by_inheritance( 
-            vtabslot_dict, vtabslot_dict_order, objstruct_cname) 
- 
-        return (vtab_list, vtabslot_list) 
- 
-    def sort_cdef_classes(self, env): 
-        key_func = operator.attrgetter('objstruct_cname') 
-        entry_dict, entry_order = {}, [] 
-        for entry in env.c_class_entries: 
-            key = key_func(entry.type) 
-            assert key not in entry_dict, key 
-            entry_dict[key] = entry 
-            entry_order.append(key) 
-        env.c_class_entries[:] = self.sort_types_by_inheritance( 
-            entry_dict, entry_order, key_func) 
- 
-    def generate_type_definitions(self, env, modules, vtab_list, vtabslot_list, code): 
-        # TODO: Why are these separated out? 
-        for entry in vtabslot_list: 
-            self.generate_objstruct_predeclaration(entry.type, code) 
-        vtabslot_entries = set(vtabslot_list) 
+        tb.end('LineNumberMapping')
+        tb.serialize()
+
+    def find_referenced_modules(self, env, module_list, modules_seen):
+        if env not in modules_seen:
+            modules_seen[env] = 1
+            for imported_module in env.cimported_modules:
+                self.find_referenced_modules(imported_module, module_list, modules_seen)
+            module_list.append(env)
+
+    def sort_types_by_inheritance(self, type_dict, type_order, getkey):
+        # copy the types into a list moving each parent type before
+        # its first child
+        type_list = []
+        for i, key in enumerate(type_order):
+            new_entry = type_dict[key]
+
+            # collect all base classes to check for children
+            hierarchy = set()
+            base = new_entry
+            while base:
+                base_type = base.type.base_type
+                if not base_type:
+                    break
+                base_key = getkey(base_type)
+                hierarchy.add(base_key)
+                base = type_dict.get(base_key)
+            new_entry.base_keys = hierarchy
+
+            # find the first (sub-)subclass and insert before that
+            for j in range(i):
+                entry = type_list[j]
+                if key in entry.base_keys:
+                    type_list.insert(j, new_entry)
+                    break
+            else:
+                type_list.append(new_entry)
+        return type_list
+
+    def sort_type_hierarchy(self, module_list, env):
+        # poor developer's OrderedDict
+        vtab_dict, vtab_dict_order = {}, []
+        vtabslot_dict, vtabslot_dict_order = {}, []
+
+        for module in module_list:
+            for entry in module.c_class_entries:
+                if entry.used and not entry.in_cinclude:
+                    type = entry.type
+                    key = type.vtabstruct_cname
+                    if not key:
+                        continue
+                    if key in vtab_dict:
+                        # FIXME: this should *never* happen, but apparently it does
+                        # for Cython generated utility code
+                        from .UtilityCode import NonManglingModuleScope
+                        assert isinstance(entry.scope, NonManglingModuleScope), str(entry.scope)
+                        assert isinstance(vtab_dict[key].scope, NonManglingModuleScope), str(vtab_dict[key].scope)
+                    else:
+                        vtab_dict[key] = entry
+                        vtab_dict_order.append(key)
+            all_defined_here = module is env
+            for entry in module.type_entries:
+                if entry.used and (all_defined_here or entry.defined_in_pxd):
+                    type = entry.type
+                    if type.is_extension_type and not entry.in_cinclude:
+                        type = entry.type
+                        key = type.objstruct_cname
+                        assert key not in vtabslot_dict, key
+                        vtabslot_dict[key] = entry
+                        vtabslot_dict_order.append(key)
+
+        def vtabstruct_cname(entry_type):
+            return entry_type.vtabstruct_cname
+        vtab_list = self.sort_types_by_inheritance(
+            vtab_dict, vtab_dict_order, vtabstruct_cname)
+
+        def objstruct_cname(entry_type):
+            return entry_type.objstruct_cname
+        vtabslot_list = self.sort_types_by_inheritance(
+            vtabslot_dict, vtabslot_dict_order, objstruct_cname)
+
+        return (vtab_list, vtabslot_list)
+
+    def sort_cdef_classes(self, env):
+        key_func = operator.attrgetter('objstruct_cname')
+        entry_dict, entry_order = {}, []
+        for entry in env.c_class_entries:
+            key = key_func(entry.type)
+            assert key not in entry_dict, key
+            entry_dict[key] = entry
+            entry_order.append(key)
+        env.c_class_entries[:] = self.sort_types_by_inheritance(
+            entry_dict, entry_order, key_func)
+
+    def generate_type_definitions(self, env, modules, vtab_list, vtabslot_list, code):
+        # TODO: Why are these separated out?
+        for entry in vtabslot_list:
+            self.generate_objstruct_predeclaration(entry.type, code)
+        vtabslot_entries = set(vtabslot_list)
         ctuple_names = set()
-        for module in modules: 
-            definition = module is env 
+        for module in modules:
+            definition = module is env
             type_entries = []
             for entry in module.type_entries:
                 if entry.type.is_ctuple and entry.used:
                     if entry.name not in ctuple_names:
                         ctuple_names.add(entry.name)
-                        type_entries.append(entry) 
+                        type_entries.append(entry)
                 elif definition or entry.defined_in_pxd:
                     type_entries.append(entry)
-            type_entries = [t for t in type_entries if t not in vtabslot_entries] 
-            self.generate_type_header_code(type_entries, code) 
-        for entry in vtabslot_list: 
-            self.generate_objstruct_definition(entry.type, code) 
-            self.generate_typeobj_predeclaration(entry, code) 
-        for entry in vtab_list: 
-            self.generate_typeobj_predeclaration(entry, code) 
-            self.generate_exttype_vtable_struct(entry, code) 
-            self.generate_exttype_vtabptr_declaration(entry, code) 
-            self.generate_exttype_final_methods_declaration(entry, code) 
- 
-    def generate_declarations_for_modules(self, env, modules, globalstate): 
-        typecode = globalstate['type_declarations'] 
-        typecode.putln("") 
-        typecode.putln("/*--- Type declarations ---*/") 
-        # This is to work around the fact that array.h isn't part of the C-API, 
-        # but we need to declare it earlier than utility code. 
-        if 'cpython.array' in [m.qualified_name for m in modules]: 
-            typecode.putln('#ifndef _ARRAYARRAY_H') 
-            typecode.putln('struct arrayobject;') 
-            typecode.putln('typedef struct arrayobject arrayobject;') 
-            typecode.putln('#endif') 
-        vtab_list, vtabslot_list = self.sort_type_hierarchy(modules, env) 
-        self.generate_type_definitions( 
-            env, modules, vtab_list, vtabslot_list, typecode) 
-        modulecode = globalstate['module_declarations'] 
-        for module in modules: 
-            defined_here = module is env 
-            modulecode.putln("") 
-            modulecode.putln("/* Module declarations from '%s' */" % module.qualified_name) 
-            self.generate_c_class_declarations(module, modulecode, defined_here) 
-            self.generate_cvariable_declarations(module, modulecode, defined_here) 
-            self.generate_cfunction_declarations(module, modulecode, defined_here) 
- 
+            type_entries = [t for t in type_entries if t not in vtabslot_entries]
+            self.generate_type_header_code(type_entries, code)
+        for entry in vtabslot_list:
+            self.generate_objstruct_definition(entry.type, code)
+            self.generate_typeobj_predeclaration(entry, code)
+        for entry in vtab_list:
+            self.generate_typeobj_predeclaration(entry, code)
+            self.generate_exttype_vtable_struct(entry, code)
+            self.generate_exttype_vtabptr_declaration(entry, code)
+            self.generate_exttype_final_methods_declaration(entry, code)
+
+    def generate_declarations_for_modules(self, env, modules, globalstate):
+        typecode = globalstate['type_declarations']
+        typecode.putln("")
+        typecode.putln("/*--- Type declarations ---*/")
+        # This is to work around the fact that array.h isn't part of the C-API,
+        # but we need to declare it earlier than utility code.
+        if 'cpython.array' in [m.qualified_name for m in modules]:
+            typecode.putln('#ifndef _ARRAYARRAY_H')
+            typecode.putln('struct arrayobject;')
+            typecode.putln('typedef struct arrayobject arrayobject;')
+            typecode.putln('#endif')
+        vtab_list, vtabslot_list = self.sort_type_hierarchy(modules, env)
+        self.generate_type_definitions(
+            env, modules, vtab_list, vtabslot_list, typecode)
+        modulecode = globalstate['module_declarations']
+        for module in modules:
+            defined_here = module is env
+            modulecode.putln("")
+            modulecode.putln("/* Module declarations from '%s' */" % module.qualified_name)
+            self.generate_c_class_declarations(module, modulecode, defined_here)
+            self.generate_cvariable_declarations(module, modulecode, defined_here)
+            self.generate_cfunction_declarations(module, modulecode, defined_here)
+
     def _put_setup_code(self, code, name):
         code.put(UtilityCode.load_as_string(name, "ModuleSetupCode.c")[1])
 
@@ -637,27 +637,27 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code.putln("")
 
         code.putln("#ifndef PY_SSIZE_T_CLEAN")
-        code.putln("#define PY_SSIZE_T_CLEAN") 
+        code.putln("#define PY_SSIZE_T_CLEAN")
         code.putln("#endif /* PY_SSIZE_T_CLEAN */")
- 
+
         for inc in sorted(env.c_includes.values(), key=IncludeCode.sortkey):
             if inc.location == inc.INITIAL:
                 inc.write(code)
-        code.putln("#ifndef Py_PYTHON_H") 
+        code.putln("#ifndef Py_PYTHON_H")
         code.putln("    #error Python headers needed to compile C extensions, "
                    "please install development version of Python.")
         code.putln("#elif PY_VERSION_HEX < 0x02060000 || "
                    "(0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)")
         code.putln("    #error Cython requires Python 2.6+ or Python 3.3+.")
-        code.putln("#else") 
-        code.globalstate["end"].putln("#endif /* Py_PYTHON_H */") 
- 
-        from .. import __version__ 
-        code.putln('#define CYTHON_ABI "%s"' % __version__.replace('.', '_')) 
+        code.putln("#else")
+        code.globalstate["end"].putln("#endif /* Py_PYTHON_H */")
+
+        from .. import __version__
+        code.putln('#define CYTHON_ABI "%s"' % __version__.replace('.', '_'))
         code.putln('#define CYTHON_HEX_VERSION %s' % build_hex_version(__version__))
         code.putln("#define CYTHON_FUTURE_DIVISION %d" % (
             Future.division in env.context.future_directives))
- 
+
         self._put_setup_code(code, "CModulePreamble")
         if env.context.options.cplus:
             self._put_setup_code(code, "CppInitCode")
@@ -665,7 +665,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             self._put_setup_code(code, "CInitCode")
         self._put_setup_code(code, "PythonCompatibility")
         self._put_setup_code(code, "MathInitCode")
- 
+
         # Using "(void)cname" to prevent "unused" warnings.
         if options.c_line_in_traceback:
             cinfo = "%s = %s; (void)%s; " % (Naming.clineno_cname, Naming.line_c_macro, Naming.clineno_cname)
@@ -680,92 +680,92 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#define __PYX_ERR(f_index, lineno, Ln_error) \\")
         code.putln("    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }")
 
-        code.putln("") 
-        self.generate_extern_c_macro_definition(code) 
-        code.putln("") 
- 
-        code.putln("#define %s" % Naming.h_guard_prefix + self.api_name(env)) 
-        code.putln("#define %s" % Naming.api_guard_prefix + self.api_name(env)) 
+        code.putln("")
+        self.generate_extern_c_macro_definition(code)
+        code.putln("")
+
+        code.putln("#define %s" % Naming.h_guard_prefix + self.api_name(env))
+        code.putln("#define %s" % Naming.api_guard_prefix + self.api_name(env))
         code.putln("/* Early includes */")
         self.generate_includes(env, cimported_modules, code, late=False)
-        code.putln("") 
+        code.putln("")
         code.putln("#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)")
-        code.putln("#define CYTHON_WITHOUT_ASSERTIONS") 
-        code.putln("#endif") 
-        code.putln("") 
- 
-        if env.directives['ccomplex']: 
-            code.putln("") 
-            code.putln("#if !defined(CYTHON_CCOMPLEX)") 
-            code.putln("#define CYTHON_CCOMPLEX 1") 
-            code.putln("#endif") 
-            code.putln("") 
-        code.put(UtilityCode.load_as_string("UtilityFunctionPredeclarations", "ModuleSetupCode.c")[0]) 
- 
-        c_string_type = env.directives['c_string_type'] 
-        c_string_encoding = env.directives['c_string_encoding'] 
-        if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding: 
-            error(self.pos, "a default encoding must be provided if c_string_type is not a byte type") 
-        code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii')) 
+        code.putln("#define CYTHON_WITHOUT_ASSERTIONS")
+        code.putln("#endif")
+        code.putln("")
+
+        if env.directives['ccomplex']:
+            code.putln("")
+            code.putln("#if !defined(CYTHON_CCOMPLEX)")
+            code.putln("#define CYTHON_CCOMPLEX 1")
+            code.putln("#endif")
+            code.putln("")
+        code.put(UtilityCode.load_as_string("UtilityFunctionPredeclarations", "ModuleSetupCode.c")[0])
+
+        c_string_type = env.directives['c_string_type']
+        c_string_encoding = env.directives['c_string_encoding']
+        if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding:
+            error(self.pos, "a default encoding must be provided if c_string_type is not a byte type")
+        code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
         code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 %s' %
                 int(c_string_encoding.replace('-', '').lower() == 'utf8'))
-        if c_string_encoding == 'default': 
-            code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1') 
-        else: 
+        if c_string_encoding == 'default':
+            code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1')
+        else:
             code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT '
                     '(PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)')
-            code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding) 
-        if c_string_type == 'bytearray': 
-            c_string_func_name = 'ByteArray' 
-        else: 
-            c_string_func_name = c_string_type.title() 
-        code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name) 
-        code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name) 
-        code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0]) 
- 
-        # These utility functions are assumed to exist and used elsewhere. 
-        PyrexTypes.c_long_type.create_to_py_utility_code(env) 
-        PyrexTypes.c_long_type.create_from_py_utility_code(env) 
-        PyrexTypes.c_int_type.create_from_py_utility_code(env) 
- 
-        code.put(Nodes.branch_prediction_macros) 
+            code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
+        if c_string_type == 'bytearray':
+            c_string_func_name = 'ByteArray'
+        else:
+            c_string_func_name = c_string_type.title()
+        code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name)
+        code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name)
+        code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0])
+
+        # These utility functions are assumed to exist and used elsewhere.
+        PyrexTypes.c_long_type.create_to_py_utility_code(env)
+        PyrexTypes.c_long_type.create_from_py_utility_code(env)
+        PyrexTypes.c_int_type.create_from_py_utility_code(env)
+
+        code.put(Nodes.branch_prediction_macros)
         code.putln('static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }')
-        code.putln('') 
+        code.putln('')
         code.putln('static PyObject *%s = NULL;' % env.module_cname)
-        code.putln('static PyObject *%s;' % env.module_dict_cname) 
-        code.putln('static PyObject *%s;' % Naming.builtins_cname) 
+        code.putln('static PyObject *%s;' % env.module_dict_cname)
+        code.putln('static PyObject *%s;' % Naming.builtins_cname)
         code.putln('static PyObject *%s = NULL;' % Naming.cython_runtime_cname)
-        code.putln('static PyObject *%s;' % Naming.empty_tuple) 
-        code.putln('static PyObject *%s;' % Naming.empty_bytes) 
+        code.putln('static PyObject *%s;' % Naming.empty_tuple)
+        code.putln('static PyObject *%s;' % Naming.empty_bytes)
         code.putln('static PyObject *%s;' % Naming.empty_unicode)
-        if Options.pre_import is not None: 
-            code.putln('static PyObject *%s;' % Naming.preimport_cname) 
-        code.putln('static int %s;' % Naming.lineno_cname) 
-        code.putln('static int %s = 0;' % Naming.clineno_cname) 
-        code.putln('static const char * %s= %s;' % (Naming.cfilenm_cname, Naming.file_c_macro)) 
-        code.putln('static const char *%s;' % Naming.filename_cname) 
- 
+        if Options.pre_import is not None:
+            code.putln('static PyObject *%s;' % Naming.preimport_cname)
+        code.putln('static int %s;' % Naming.lineno_cname)
+        code.putln('static int %s = 0;' % Naming.clineno_cname)
+        code.putln('static const char * %s= %s;' % (Naming.cfilenm_cname, Naming.file_c_macro))
+        code.putln('static const char *%s;' % Naming.filename_cname)
+
         env.use_utility_code(UtilityCode.load_cached("FastTypeChecks", "ModuleSetupCode.c"))
         if has_np_pythran(env):
             env.use_utility_code(UtilityCode.load_cached("PythranConversion", "CppSupport.cpp"))
 
-    def generate_extern_c_macro_definition(self, code): 
-        name = Naming.extern_c_macro 
-        code.putln("#ifndef %s" % name) 
-        code.putln("  #ifdef __cplusplus") 
-        code.putln('    #define %s extern "C"' % name) 
-        code.putln("  #else") 
-        code.putln("    #define %s extern" % name) 
-        code.putln("  #endif") 
-        code.putln("#endif") 
- 
+    def generate_extern_c_macro_definition(self, code):
+        name = Naming.extern_c_macro
+        code.putln("#ifndef %s" % name)
+        code.putln("  #ifdef __cplusplus")
+        code.putln('    #define %s extern "C"' % name)
+        code.putln("  #else")
+        code.putln("    #define %s extern" % name)
+        code.putln("  #endif")
+        code.putln("#endif")
+
     def generate_dl_import_macro(self, code):
         code.putln("#ifndef DL_IMPORT")
         code.putln("  #define DL_IMPORT(_T) _T")
         code.putln("#endif")
 
     def generate_includes(self, env, cimported_modules, code, early=True, late=True):
-        includes = [] 
+        includes = []
         for inc in sorted(env.c_includes.values(), key=IncludeCode.sortkey):
             if inc.location == inc.EARLY:
                 if early:
@@ -775,13 +775,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     inc.write(code)
         if early:
             code.putln_openmp("#include <omp.h>")
- 
-    def generate_filename_table(self, code): 
+
+    def generate_filename_table(self, code):
         from os.path import isabs, basename
-        code.putln("") 
-        code.putln("static const char *%s[] = {" % Naming.filetable_cname) 
-        if code.globalstate.filename_list: 
-            for source_desc in code.globalstate.filename_list: 
+        code.putln("")
+        code.putln("static const char *%s[] = {" % Naming.filetable_cname)
+        if code.globalstate.filename_list:
+            for source_desc in code.globalstate.filename_list:
                 file_path = source_desc.get_filenametable_entry()
                 if Options.source_root:
                     # If source root specified, dump description - it's source root relative filename
@@ -789,160 +789,160 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 if isabs(file_path):
                     file_path = basename(file_path)  # never include absolute paths
                 escaped_filename = file_path.replace("\\", "\\\\").replace('"', r'\"')
-                code.putln('"%s",' % escaped_filename) 
-        else: 
-            # Some C compilers don't like an empty array 
-            code.putln("0") 
-        code.putln("};") 
- 
-    def generate_type_predeclarations(self, env, code): 
-        pass 
- 
-    def generate_type_header_code(self, type_entries, code): 
-        # Generate definitions of structs/unions/enums/typedefs/objstructs. 
-        #self.generate_gcc33_hack(env, code) # Is this still needed? 
-        # Forward declarations 
-        for entry in type_entries: 
-            if not entry.in_cinclude: 
-                #print "generate_type_header_code:", entry.name, repr(entry.type) ### 
-                type = entry.type 
-                if type.is_typedef: # Must test this first! 
-                    pass 
-                elif type.is_struct_or_union or type.is_cpp_class: 
-                    self.generate_struct_union_predeclaration(entry, code) 
+                code.putln('"%s",' % escaped_filename)
+        else:
+            # Some C compilers don't like an empty array
+            code.putln("0")
+        code.putln("};")
+
+    def generate_type_predeclarations(self, env, code):
+        pass
+
+    def generate_type_header_code(self, type_entries, code):
+        # Generate definitions of structs/unions/enums/typedefs/objstructs.
+        #self.generate_gcc33_hack(env, code) # Is this still needed?
+        # Forward declarations
+        for entry in type_entries:
+            if not entry.in_cinclude:
+                #print "generate_type_header_code:", entry.name, repr(entry.type) ###
+                type = entry.type
+                if type.is_typedef: # Must test this first!
+                    pass
+                elif type.is_struct_or_union or type.is_cpp_class:
+                    self.generate_struct_union_predeclaration(entry, code)
                 elif type.is_ctuple and entry.used:
                     self.generate_struct_union_predeclaration(entry.type.struct_entry, code)
-                elif type.is_extension_type: 
-                    self.generate_objstruct_predeclaration(type, code) 
-        # Actual declarations 
-        for entry in type_entries: 
-            if not entry.in_cinclude: 
-                #print "generate_type_header_code:", entry.name, repr(entry.type) ### 
-                type = entry.type 
-                if type.is_typedef: # Must test this first! 
-                    self.generate_typedef(entry, code) 
-                elif type.is_enum: 
-                    self.generate_enum_definition(entry, code) 
-                elif type.is_struct_or_union: 
-                    self.generate_struct_union_definition(entry, code) 
+                elif type.is_extension_type:
+                    self.generate_objstruct_predeclaration(type, code)
+        # Actual declarations
+        for entry in type_entries:
+            if not entry.in_cinclude:
+                #print "generate_type_header_code:", entry.name, repr(entry.type) ###
+                type = entry.type
+                if type.is_typedef: # Must test this first!
+                    self.generate_typedef(entry, code)
+                elif type.is_enum:
+                    self.generate_enum_definition(entry, code)
+                elif type.is_struct_or_union:
+                    self.generate_struct_union_definition(entry, code)
                 elif type.is_ctuple and entry.used:
                     self.generate_struct_union_definition(entry.type.struct_entry, code)
-                elif type.is_cpp_class: 
-                    self.generate_cpp_class_definition(entry, code) 
-                elif type.is_extension_type: 
-                    self.generate_objstruct_definition(type, code) 
- 
-    def generate_gcc33_hack(self, env, code): 
-        # Workaround for spurious warning generation in gcc 3.3 
-        code.putln("") 
-        for entry in env.c_class_entries: 
-            type = entry.type 
-            if not type.typedef_flag: 
-                name = type.objstruct_cname 
-                if name.startswith("__pyx_"): 
-                    tail = name[6:] 
-                else: 
-                    tail = name 
-                code.putln("typedef struct %s __pyx_gcc33_%s;" % ( 
-                    name, tail)) 
- 
-    def generate_typedef(self, entry, code): 
-        base_type = entry.type.typedef_base_type 
-        if base_type.is_numeric: 
-            try: 
-                writer = code.globalstate['numeric_typedefs'] 
-            except KeyError: 
-                writer = code 
-        else: 
-            writer = code 
-        writer.mark_pos(entry.pos) 
-        writer.putln("typedef %s;" % base_type.declaration_code(entry.cname)) 
- 
-    def sue_predeclaration(self, type, kind, name): 
-        if type.typedef_flag: 
-            return "%s %s;\ntypedef %s %s %s;" % ( 
-                kind, name, 
-                kind, name, name) 
-        else: 
-            return "%s %s;" % (kind, name) 
- 
-    def generate_struct_union_predeclaration(self, entry, code): 
-        type = entry.type 
-        if type.is_cpp_class and type.templates: 
+                elif type.is_cpp_class:
+                    self.generate_cpp_class_definition(entry, code)
+                elif type.is_extension_type:
+                    self.generate_objstruct_definition(type, code)
+
+    def generate_gcc33_hack(self, env, code):
+        # Workaround for spurious warning generation in gcc 3.3
+        code.putln("")
+        for entry in env.c_class_entries:
+            type = entry.type
+            if not type.typedef_flag:
+                name = type.objstruct_cname
+                if name.startswith("__pyx_"):
+                    tail = name[6:]
+                else:
+                    tail = name
+                code.putln("typedef struct %s __pyx_gcc33_%s;" % (
+                    name, tail))
+
+    def generate_typedef(self, entry, code):
+        base_type = entry.type.typedef_base_type
+        if base_type.is_numeric:
+            try:
+                writer = code.globalstate['numeric_typedefs']
+            except KeyError:
+                writer = code
+        else:
+            writer = code
+        writer.mark_pos(entry.pos)
+        writer.putln("typedef %s;" % base_type.declaration_code(entry.cname))
+
+    def sue_predeclaration(self, type, kind, name):
+        if type.typedef_flag:
+            return "%s %s;\ntypedef %s %s %s;" % (
+                kind, name,
+                kind, name, name)
+        else:
+            return "%s %s;" % (kind, name)
+
+    def generate_struct_union_predeclaration(self, entry, code):
+        type = entry.type
+        if type.is_cpp_class and type.templates:
             code.putln("template <typename %s>" % ", typename ".join(
                 [T.empty_declaration_code() for T in type.templates]))
-        code.putln(self.sue_predeclaration(type, type.kind, type.cname)) 
- 
-    def sue_header_footer(self, type, kind, name): 
-        header = "%s %s {" % (kind, name) 
-        footer = "};" 
-        return header, footer 
- 
-    def generate_struct_union_definition(self, entry, code): 
-        code.mark_pos(entry.pos) 
-        type = entry.type 
-        scope = type.scope 
-        if scope: 
-            kind = type.kind 
-            packed = type.is_struct and type.packed 
-            if packed: 
-                kind = "%s %s" % (type.kind, "__Pyx_PACKED") 
-                code.globalstate.use_utility_code(packed_struct_utility_code) 
-            header, footer = \ 
-                self.sue_header_footer(type, kind, type.cname) 
-            if packed: 
-                code.putln("#if defined(__SUNPRO_C)") 
-                code.putln("  #pragma pack(1)") 
-                code.putln("#elif !defined(__GNUC__)") 
-                code.putln("  #pragma pack(push, 1)") 
-                code.putln("#endif") 
-            code.putln(header) 
-            var_entries = scope.var_entries 
-            if not var_entries: 
+        code.putln(self.sue_predeclaration(type, type.kind, type.cname))
+
+    def sue_header_footer(self, type, kind, name):
+        header = "%s %s {" % (kind, name)
+        footer = "};"
+        return header, footer
+
+    def generate_struct_union_definition(self, entry, code):
+        code.mark_pos(entry.pos)
+        type = entry.type
+        scope = type.scope
+        if scope:
+            kind = type.kind
+            packed = type.is_struct and type.packed
+            if packed:
+                kind = "%s %s" % (type.kind, "__Pyx_PACKED")
+                code.globalstate.use_utility_code(packed_struct_utility_code)
+            header, footer = \
+                self.sue_header_footer(type, kind, type.cname)
+            if packed:
+                code.putln("#if defined(__SUNPRO_C)")
+                code.putln("  #pragma pack(1)")
+                code.putln("#elif !defined(__GNUC__)")
+                code.putln("  #pragma pack(push, 1)")
+                code.putln("#endif")
+            code.putln(header)
+            var_entries = scope.var_entries
+            if not var_entries:
                 error(entry.pos, "Empty struct or union definition not allowed outside a 'cdef extern from' block")
-            for attr in var_entries: 
-                code.putln( 
+            for attr in var_entries:
+                code.putln(
                     "%s;" % attr.type.declaration_code(attr.cname))
-            code.putln(footer) 
-            if packed: 
-                code.putln("#if defined(__SUNPRO_C)") 
-                code.putln("  #pragma pack()") 
-                code.putln("#elif !defined(__GNUC__)") 
-                code.putln("  #pragma pack(pop)") 
-                code.putln("#endif") 
- 
-    def generate_cpp_class_definition(self, entry, code): 
-        code.mark_pos(entry.pos) 
-        type = entry.type 
-        scope = type.scope 
-        if scope: 
-            if type.templates: 
+            code.putln(footer)
+            if packed:
+                code.putln("#if defined(__SUNPRO_C)")
+                code.putln("  #pragma pack()")
+                code.putln("#elif !defined(__GNUC__)")
+                code.putln("  #pragma pack(pop)")
+                code.putln("#endif")
+
+    def generate_cpp_class_definition(self, entry, code):
+        code.mark_pos(entry.pos)
+        type = entry.type
+        scope = type.scope
+        if scope:
+            if type.templates:
                 code.putln("template <class %s>" % ", class ".join(
                     [T.empty_declaration_code() for T in type.templates]))
-            # Just let everything be public. 
-            code.put("struct %s" % type.cname) 
-            if type.base_classes: 
-                base_class_decl = ", public ".join( 
+            # Just let everything be public.
+            code.put("struct %s" % type.cname)
+            if type.base_classes:
+                base_class_decl = ", public ".join(
                     [base_class.empty_declaration_code() for base_class in type.base_classes])
-                code.put(" : public %s" % base_class_decl) 
-            code.putln(" {") 
+                code.put(" : public %s" % base_class_decl)
+            code.putln(" {")
             py_attrs = [e for e in scope.entries.values()
                         if e.type.is_pyobject and not e.is_inherited]
-            has_virtual_methods = False 
+            has_virtual_methods = False
             constructor = None
             destructor = None
-            for attr in scope.var_entries: 
+            for attr in scope.var_entries:
                 if attr.type.is_cfunction:
                     code.put("inline ")
-                if attr.type.is_cfunction and attr.type.is_static_method: 
-                    code.put("static ") 
+                if attr.type.is_cfunction and attr.type.is_static_method:
+                    code.put("static ")
                 elif attr.name == "<init>":
                     constructor = attr
                 elif attr.name == "<del>":
                     destructor = attr
                 elif attr.type.is_cfunction:
-                    code.put("virtual ") 
-                    has_virtual_methods = True 
+                    code.put("virtual ")
+                    has_virtual_methods = True
                 code.putln("%s;" % attr.type.declaration_code(attr.cname))
             is_implementing = 'init_module' in code.globalstate.parts
             if constructor or py_attrs:
@@ -1016,831 +1016,831 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 else:
                   code.putln("%s(const %s& __Pyx_other);" % (type.cname, type.cname))
                   code.putln("%s& operator=(const %s& __Pyx_other);" % (type.cname, type.cname))
-            code.putln("};") 
- 
-    def generate_enum_definition(self, entry, code): 
-        code.mark_pos(entry.pos) 
-        type = entry.type 
-        name = entry.cname or entry.name or "" 
+            code.putln("};")
+
+    def generate_enum_definition(self, entry, code):
+        code.mark_pos(entry.pos)
+        type = entry.type
+        name = entry.cname or entry.name or ""
         header, footer = self.sue_header_footer(type, "enum", name)
-        code.putln(header) 
-        enum_values = entry.enum_values 
-        if not enum_values: 
+        code.putln(header)
+        enum_values = entry.enum_values
+        if not enum_values:
             error(entry.pos, "Empty enum definition not allowed outside a 'cdef extern from' block")
-        else: 
-            last_entry = enum_values[-1] 
-            # this does not really generate code, just builds the result value 
-            for value_entry in enum_values: 
-                if value_entry.value_node is not None: 
-                    value_entry.value_node.generate_evaluation_code(code) 
- 
-            for value_entry in enum_values: 
-                if value_entry.value_node is None: 
-                    value_code = value_entry.cname 
-                else: 
-                    value_code = ("%s = %s" % ( 
-                        value_entry.cname, 
-                        value_entry.value_node.result())) 
-                if value_entry is not last_entry: 
-                    value_code += "," 
-                code.putln(value_code) 
-        code.putln(footer) 
-        if entry.type.typedef_flag: 
-            # Not pre-declared. 
-            code.putln("typedef enum %s %s;" % (name, name)) 
- 
-    def generate_typeobj_predeclaration(self, entry, code): 
-        code.putln("") 
-        name = entry.type.typeobj_cname 
-        if name: 
-            if entry.visibility == 'extern' and not entry.in_cinclude: 
-                code.putln("%s %s %s;" % ( 
-                    Naming.extern_c_macro, 
-                    PyrexTypes.public_decl("PyTypeObject", "DL_IMPORT"), 
-                    name)) 
-            elif entry.visibility == 'public': 
-                code.putln("%s %s %s;" % ( 
-                    Naming.extern_c_macro, 
-                    PyrexTypes.public_decl("PyTypeObject", "DL_EXPORT"), 
-                    name)) 
-            # ??? Do we really need the rest of this? ??? 
-            #else: 
-            #    code.putln("static PyTypeObject %s;" % name) 
- 
-    def generate_exttype_vtable_struct(self, entry, code): 
-        if not entry.used: 
-            return 
- 
-        code.mark_pos(entry.pos) 
-        # Generate struct declaration for an extension type's vtable. 
-        type = entry.type 
-        scope = type.scope 
- 
-        self.specialize_fused_types(scope) 
- 
-        if type.vtabstruct_cname: 
-            code.putln("") 
+        else:
+            last_entry = enum_values[-1]
+            # this does not really generate code, just builds the result value
+            for value_entry in enum_values:
+                if value_entry.value_node is not None:
+                    value_entry.value_node.generate_evaluation_code(code)
+
+            for value_entry in enum_values:
+                if value_entry.value_node is None:
+                    value_code = value_entry.cname
+                else:
+                    value_code = ("%s = %s" % (
+                        value_entry.cname,
+                        value_entry.value_node.result()))
+                if value_entry is not last_entry:
+                    value_code += ","
+                code.putln(value_code)
+        code.putln(footer)
+        if entry.type.typedef_flag:
+            # Not pre-declared.
+            code.putln("typedef enum %s %s;" % (name, name))
+
+    def generate_typeobj_predeclaration(self, entry, code):
+        code.putln("")
+        name = entry.type.typeobj_cname
+        if name:
+            if entry.visibility == 'extern' and not entry.in_cinclude:
+                code.putln("%s %s %s;" % (
+                    Naming.extern_c_macro,
+                    PyrexTypes.public_decl("PyTypeObject", "DL_IMPORT"),
+                    name))
+            elif entry.visibility == 'public':
+                code.putln("%s %s %s;" % (
+                    Naming.extern_c_macro,
+                    PyrexTypes.public_decl("PyTypeObject", "DL_EXPORT"),
+                    name))
+            # ??? Do we really need the rest of this? ???
+            #else:
+            #    code.putln("static PyTypeObject %s;" % name)
+
+    def generate_exttype_vtable_struct(self, entry, code):
+        if not entry.used:
+            return
+
+        code.mark_pos(entry.pos)
+        # Generate struct declaration for an extension type's vtable.
+        type = entry.type
+        scope = type.scope
+
+        self.specialize_fused_types(scope)
+
+        if type.vtabstruct_cname:
+            code.putln("")
             code.putln("struct %s {" % type.vtabstruct_cname)
-            if type.base_type and type.base_type.vtabstruct_cname: 
-                code.putln("struct %s %s;" % ( 
-                    type.base_type.vtabstruct_cname, 
-                    Naming.obj_base_cname)) 
-            for method_entry in scope.cfunc_entries: 
-                if not method_entry.is_inherited: 
+            if type.base_type and type.base_type.vtabstruct_cname:
+                code.putln("struct %s %s;" % (
+                    type.base_type.vtabstruct_cname,
+                    Naming.obj_base_cname))
+            for method_entry in scope.cfunc_entries:
+                if not method_entry.is_inherited:
                     code.putln("%s;" % method_entry.type.declaration_code("(*%s)" % method_entry.cname))
             code.putln("};")
- 
-    def generate_exttype_vtabptr_declaration(self, entry, code): 
-        if not entry.used: 
-            return 
- 
-        code.mark_pos(entry.pos) 
-        # Generate declaration of pointer to an extension type's vtable. 
-        type = entry.type 
-        if type.vtabptr_cname: 
-            code.putln("static struct %s *%s;" % ( 
-                type.vtabstruct_cname, 
-                type.vtabptr_cname)) 
- 
-    def generate_exttype_final_methods_declaration(self, entry, code): 
-        if not entry.used: 
-            return 
- 
-        code.mark_pos(entry.pos) 
-        # Generate final methods prototypes 
-        type = entry.type 
-        for method_entry in entry.type.scope.cfunc_entries: 
-            if not method_entry.is_inherited and method_entry.final_func_cname: 
-                declaration = method_entry.type.declaration_code( 
-                    method_entry.final_func_cname) 
-                modifiers = code.build_function_modifiers(method_entry.func_modifiers) 
-                code.putln("static %s%s;" % (modifiers, declaration)) 
- 
-    def generate_objstruct_predeclaration(self, type, code): 
-        if not type.scope: 
-            return 
-        code.putln(self.sue_predeclaration(type, "struct", type.objstruct_cname)) 
- 
-    def generate_objstruct_definition(self, type, code): 
-        code.mark_pos(type.pos) 
-        # Generate object struct definition for an 
-        # extension type. 
-        if not type.scope: 
-            return # Forward declared but never defined 
-        header, footer = \ 
-            self.sue_header_footer(type, "struct", type.objstruct_cname) 
-        code.putln(header) 
-        base_type = type.base_type 
-        if base_type: 
-            basestruct_cname = base_type.objstruct_cname 
-            if basestruct_cname == "PyTypeObject": 
-                # User-defined subclasses of type are heap allocated. 
-                basestruct_cname = "PyHeapTypeObject" 
-            code.putln( 
-                "%s%s %s;" % ( 
-                    ("struct ", "")[base_type.typedef_flag], 
-                    basestruct_cname, 
-                    Naming.obj_base_cname)) 
-        else: 
-            code.putln( 
-                "PyObject_HEAD") 
-        if type.vtabslot_cname and not (type.base_type and type.base_type.vtabslot_cname): 
-            code.putln( 
-                "struct %s *%s;" % ( 
-                    type.vtabstruct_cname, 
-                    type.vtabslot_cname)) 
-        for attr in type.scope.var_entries: 
-            if attr.is_declared_generic: 
-                attr_type = py_object_type 
-            else: 
-                attr_type = attr.type 
-            code.putln( 
+
+    def generate_exttype_vtabptr_declaration(self, entry, code):
+        if not entry.used:
+            return
+
+        code.mark_pos(entry.pos)
+        # Generate declaration of pointer to an extension type's vtable.
+        type = entry.type
+        if type.vtabptr_cname:
+            code.putln("static struct %s *%s;" % (
+                type.vtabstruct_cname,
+                type.vtabptr_cname))
+
+    def generate_exttype_final_methods_declaration(self, entry, code):
+        if not entry.used:
+            return
+
+        code.mark_pos(entry.pos)
+        # Generate final methods prototypes
+        type = entry.type
+        for method_entry in entry.type.scope.cfunc_entries:
+            if not method_entry.is_inherited and method_entry.final_func_cname:
+                declaration = method_entry.type.declaration_code(
+                    method_entry.final_func_cname)
+                modifiers = code.build_function_modifiers(method_entry.func_modifiers)
+                code.putln("static %s%s;" % (modifiers, declaration))
+
+    def generate_objstruct_predeclaration(self, type, code):
+        if not type.scope:
+            return
+        code.putln(self.sue_predeclaration(type, "struct", type.objstruct_cname))
+
+    def generate_objstruct_definition(self, type, code):
+        code.mark_pos(type.pos)
+        # Generate object struct definition for an
+        # extension type.
+        if not type.scope:
+            return # Forward declared but never defined
+        header, footer = \
+            self.sue_header_footer(type, "struct", type.objstruct_cname)
+        code.putln(header)
+        base_type = type.base_type
+        if base_type:
+            basestruct_cname = base_type.objstruct_cname
+            if basestruct_cname == "PyTypeObject":
+                # User-defined subclasses of type are heap allocated.
+                basestruct_cname = "PyHeapTypeObject"
+            code.putln(
+                "%s%s %s;" % (
+                    ("struct ", "")[base_type.typedef_flag],
+                    basestruct_cname,
+                    Naming.obj_base_cname))
+        else:
+            code.putln(
+                "PyObject_HEAD")
+        if type.vtabslot_cname and not (type.base_type and type.base_type.vtabslot_cname):
+            code.putln(
+                "struct %s *%s;" % (
+                    type.vtabstruct_cname,
+                    type.vtabslot_cname))
+        for attr in type.scope.var_entries:
+            if attr.is_declared_generic:
+                attr_type = py_object_type
+            else:
+                attr_type = attr.type
+            code.putln(
                 "%s;" % attr_type.declaration_code(attr.cname))
-        code.putln(footer) 
-        if type.objtypedef_cname is not None: 
-            # Only for exposing public typedef name. 
-            code.putln("typedef struct %s %s;" % (type.objstruct_cname, type.objtypedef_cname)) 
- 
-    def generate_c_class_declarations(self, env, code, definition): 
-        for entry in env.c_class_entries: 
-            if definition or entry.defined_in_pxd: 
+        code.putln(footer)
+        if type.objtypedef_cname is not None:
+            # Only for exposing public typedef name.
+            code.putln("typedef struct %s %s;" % (type.objstruct_cname, type.objtypedef_cname))
+
+    def generate_c_class_declarations(self, env, code, definition):
+        for entry in env.c_class_entries:
+            if definition or entry.defined_in_pxd:
                 code.putln("static PyTypeObject *%s = 0;" % (
                     entry.type.typeptr_cname))
- 
-    def generate_cvariable_declarations(self, env, code, definition): 
-        if env.is_cython_builtin: 
-            return 
-        for entry in env.var_entries: 
-            if (entry.in_cinclude or entry.in_closure or 
+
+    def generate_cvariable_declarations(self, env, code, definition):
+        if env.is_cython_builtin:
+            return
+        for entry in env.var_entries:
+            if (entry.in_cinclude or entry.in_closure or
                     (entry.visibility == 'private' and not (entry.defined_in_pxd or entry.used))):
-                continue 
- 
-            storage_class = None 
-            dll_linkage = None 
-            init = None 
- 
-            if entry.visibility == 'extern': 
-                storage_class = Naming.extern_c_macro 
-                dll_linkage = "DL_IMPORT" 
-            elif entry.visibility == 'public': 
-                storage_class = Naming.extern_c_macro 
-                if definition: 
-                    dll_linkage = "DL_EXPORT" 
-                else: 
-                    dll_linkage = "DL_IMPORT" 
-            elif entry.visibility == 'private': 
-                storage_class = "static" 
-                dll_linkage = None 
-                if entry.init is not None: 
+                continue
+
+            storage_class = None
+            dll_linkage = None
+            init = None
+
+            if entry.visibility == 'extern':
+                storage_class = Naming.extern_c_macro
+                dll_linkage = "DL_IMPORT"
+            elif entry.visibility == 'public':
+                storage_class = Naming.extern_c_macro
+                if definition:
+                    dll_linkage = "DL_EXPORT"
+                else:
+                    dll_linkage = "DL_IMPORT"
+            elif entry.visibility == 'private':
+                storage_class = "static"
+                dll_linkage = None
+                if entry.init is not None:
                     init = entry.type.literal_code(entry.init)
-            type = entry.type 
-            cname = entry.cname 
- 
-            if entry.defined_in_pxd and not definition: 
-                storage_class = "static" 
-                dll_linkage = None 
-                type = CPtrType(type) 
-                cname = env.mangle(Naming.varptr_prefix, entry.name) 
-                init = 0 
- 
-            if storage_class: 
-                code.put("%s " % storage_class) 
-            code.put(type.declaration_code( 
+            type = entry.type
+            cname = entry.cname
+
+            if entry.defined_in_pxd and not definition:
+                storage_class = "static"
+                dll_linkage = None
+                type = CPtrType(type)
+                cname = env.mangle(Naming.varptr_prefix, entry.name)
+                init = 0
+
+            if storage_class:
+                code.put("%s " % storage_class)
+            code.put(type.declaration_code(
                 cname, dll_linkage=dll_linkage))
-            if init is not None: 
-                code.put_safe(" = %s" % init) 
-            code.putln(";") 
-            if entry.cname != cname: 
-                code.putln("#define %s (*%s)" % (entry.cname, cname)) 
- 
-    def generate_cfunction_declarations(self, env, code, definition): 
-        for entry in env.cfunc_entries: 
-            if entry.used or (entry.visibility == 'public' or entry.api): 
-                generate_cfunction_declaration(entry, env, code, definition) 
- 
-    def generate_variable_definitions(self, env, code): 
-        for entry in env.var_entries: 
+            if init is not None:
+                code.put_safe(" = %s" % init)
+            code.putln(";")
+            if entry.cname != cname:
+                code.putln("#define %s (*%s)" % (entry.cname, cname))
+
+    def generate_cfunction_declarations(self, env, code, definition):
+        for entry in env.cfunc_entries:
+            if entry.used or (entry.visibility == 'public' or entry.api):
+                generate_cfunction_declaration(entry, env, code, definition)
+
+    def generate_variable_definitions(self, env, code):
+        for entry in env.var_entries:
             if not entry.in_cinclude and entry.visibility == "public":
-                code.put(entry.type.declaration_code(entry.cname)) 
-                if entry.init is not None: 
+                code.put(entry.type.declaration_code(entry.cname))
+                if entry.init is not None:
                     init = entry.type.literal_code(entry.init)
-                    code.put_safe(" = %s" % init) 
-                code.putln(";") 
- 
-    def generate_typeobj_definitions(self, env, code): 
-        full_module_name = env.qualified_name 
-        for entry in env.c_class_entries: 
-            #print "generate_typeobj_definitions:", entry.name 
-            #print "...visibility =", entry.visibility 
-            if entry.visibility != 'extern': 
-                type = entry.type 
-                scope = type.scope 
-                if scope: # could be None if there was an error 
+                    code.put_safe(" = %s" % init)
+                code.putln(";")
+
+    def generate_typeobj_definitions(self, env, code):
+        full_module_name = env.qualified_name
+        for entry in env.c_class_entries:
+            #print "generate_typeobj_definitions:", entry.name
+            #print "...visibility =", entry.visibility
+            if entry.visibility != 'extern':
+                type = entry.type
+                scope = type.scope
+                if scope: # could be None if there was an error
                     if not scope.directives['c_api_binop_methods']:
                         error(self.pos,
                               "The 'c_api_binop_methods' directive is only supported for forward compatibility"
                               " and must be True.")
-                    self.generate_exttype_vtable(scope, code) 
-                    self.generate_new_function(scope, code, entry) 
-                    self.generate_dealloc_function(scope, code) 
-                    if scope.needs_gc(): 
-                        self.generate_traverse_function(scope, code, entry) 
-                        if scope.needs_tp_clear(): 
-                            self.generate_clear_function(scope, code, entry) 
+                    self.generate_exttype_vtable(scope, code)
+                    self.generate_new_function(scope, code, entry)
+                    self.generate_dealloc_function(scope, code)
+                    if scope.needs_gc():
+                        self.generate_traverse_function(scope, code, entry)
+                        if scope.needs_tp_clear():
+                            self.generate_clear_function(scope, code, entry)
                     if scope.defines_any_special(["__getitem__"]):
-                        self.generate_getitem_int_function(scope, code) 
+                        self.generate_getitem_int_function(scope, code)
                     if scope.defines_any_special(["__setitem__", "__delitem__"]):
-                        self.generate_ass_subscript_function(scope, code) 
+                        self.generate_ass_subscript_function(scope, code)
                     if scope.defines_any_special(["__getslice__", "__setslice__", "__delslice__"]):
                         warning(self.pos,
                                 "__getslice__, __setslice__, and __delslice__ are not supported by Python 3, "
                                 "use __getitem__, __setitem__, and __delitem__ instead", 1)
-                        code.putln("#if PY_MAJOR_VERSION >= 3") 
-                        code.putln("#error __getslice__, __setslice__, and __delslice__ not supported in Python 3.") 
-                        code.putln("#endif") 
+                        code.putln("#if PY_MAJOR_VERSION >= 3")
+                        code.putln("#error __getslice__, __setslice__, and __delslice__ not supported in Python 3.")
+                        code.putln("#endif")
                     if scope.defines_any_special(["__setslice__", "__delslice__"]):
-                        self.generate_ass_slice_function(scope, code) 
+                        self.generate_ass_slice_function(scope, code)
                     if scope.defines_any_special(["__getattr__", "__getattribute__"]):
-                        self.generate_getattro_function(scope, code) 
+                        self.generate_getattro_function(scope, code)
                     if scope.defines_any_special(["__setattr__", "__delattr__"]):
-                        self.generate_setattro_function(scope, code) 
+                        self.generate_setattro_function(scope, code)
                     if scope.defines_any_special(["__get__"]):
-                        self.generate_descr_get_function(scope, code) 
+                        self.generate_descr_get_function(scope, code)
                     if scope.defines_any_special(["__set__", "__delete__"]):
-                        self.generate_descr_set_function(scope, code) 
+                        self.generate_descr_set_function(scope, code)
                     if not scope.is_closure_class_scope and scope.defines_any(["__dict__"]):
                         self.generate_dict_getter_function(scope, code)
                     if scope.defines_any_special(TypeSlots.richcmp_special_methods):
                         self.generate_richcmp_function(scope, code)
-                    self.generate_property_accessors(scope, code) 
-                    self.generate_method_table(scope, code) 
-                    self.generate_getset_table(scope, code) 
-                    self.generate_typeobj_definition(full_module_name, entry, code) 
- 
-    def generate_exttype_vtable(self, scope, code): 
-        # Generate the definition of an extension type's vtable. 
-        type = scope.parent_type 
-        if type.vtable_cname: 
-            code.putln("static struct %s %s;" % ( 
-                type.vtabstruct_cname, 
-                type.vtable_cname)) 
- 
-    def generate_self_cast(self, scope, code): 
-        type = scope.parent_type 
-        code.putln( 
-            "%s = (%s)o;" % ( 
-                type.declaration_code("p"), 
+                    self.generate_property_accessors(scope, code)
+                    self.generate_method_table(scope, code)
+                    self.generate_getset_table(scope, code)
+                    self.generate_typeobj_definition(full_module_name, entry, code)
+
+    def generate_exttype_vtable(self, scope, code):
+        # Generate the definition of an extension type's vtable.
+        type = scope.parent_type
+        if type.vtable_cname:
+            code.putln("static struct %s %s;" % (
+                type.vtabstruct_cname,
+                type.vtable_cname))
+
+    def generate_self_cast(self, scope, code):
+        type = scope.parent_type
+        code.putln(
+            "%s = (%s)o;" % (
+                type.declaration_code("p"),
                 type.empty_declaration_code()))
- 
-    def generate_new_function(self, scope, code, cclass_entry): 
-        tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__') 
-        slot_func = scope.mangle_internal("tp_new") 
-        type = scope.parent_type 
-        base_type = type.base_type 
- 
-        have_entries, (py_attrs, py_buffers, memoryview_slices) = \ 
-                        scope.get_refcounted_entries() 
-        is_final_type = scope.parent_type.is_final_type 
-        if scope.is_internal: 
-            # internal classes (should) never need None inits, normal zeroing will do 
-            py_attrs = [] 
-        cpp_class_attrs = [entry for entry in scope.var_entries 
-                           if entry.type.is_cpp_class] 
- 
-        new_func_entry = scope.lookup_here("__new__") 
-        if base_type or (new_func_entry and new_func_entry.is_special 
-                         and not new_func_entry.trivial_signature): 
-            unused_marker = '' 
-        else: 
-            unused_marker = 'CYTHON_UNUSED ' 
- 
-        if base_type: 
-            freelist_size = 0  # not currently supported 
-        else: 
-            freelist_size = scope.directives.get('freelist', 0) 
-        freelist_name = scope.mangle_internal(Naming.freelist_name) 
-        freecount_name = scope.mangle_internal(Naming.freecount_name) 
- 
-        decls = code.globalstate['decls'] 
-        decls.putln("static PyObject *%s(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/" % 
-                    slot_func) 
-        code.putln("") 
-        if freelist_size: 
-            code.putln("static %s[%d];" % ( 
-                scope.parent_type.declaration_code(freelist_name), 
-                freelist_size)) 
-            code.putln("static int %s = 0;" % freecount_name) 
-            code.putln("") 
-        code.putln( 
+
+    def generate_new_function(self, scope, code, cclass_entry):
+        tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__')
+        slot_func = scope.mangle_internal("tp_new")
+        type = scope.parent_type
+        base_type = type.base_type
+
+        have_entries, (py_attrs, py_buffers, memoryview_slices) = \
+                        scope.get_refcounted_entries()
+        is_final_type = scope.parent_type.is_final_type
+        if scope.is_internal:
+            # internal classes (should) never need None inits, normal zeroing will do
+            py_attrs = []
+        cpp_class_attrs = [entry for entry in scope.var_entries
+                           if entry.type.is_cpp_class]
+
+        new_func_entry = scope.lookup_here("__new__")
+        if base_type or (new_func_entry and new_func_entry.is_special
+                         and not new_func_entry.trivial_signature):
+            unused_marker = ''
+        else:
+            unused_marker = 'CYTHON_UNUSED '
+
+        if base_type:
+            freelist_size = 0  # not currently supported
+        else:
+            freelist_size = scope.directives.get('freelist', 0)
+        freelist_name = scope.mangle_internal(Naming.freelist_name)
+        freecount_name = scope.mangle_internal(Naming.freecount_name)
+
+        decls = code.globalstate['decls']
+        decls.putln("static PyObject *%s(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/" %
+                    slot_func)
+        code.putln("")
+        if freelist_size:
+            code.putln("static %s[%d];" % (
+                scope.parent_type.declaration_code(freelist_name),
+                freelist_size))
+            code.putln("static int %s = 0;" % freecount_name)
+            code.putln("")
+        code.putln(
             "static PyObject *%s(PyTypeObject *t, %sPyObject *a, %sPyObject *k) {" % (
                 slot_func, unused_marker, unused_marker))
- 
-        need_self_cast = (type.vtabslot_cname or 
-                          (py_buffers or memoryview_slices or py_attrs) or 
-                          cpp_class_attrs) 
-        if need_self_cast: 
-            code.putln("%s;" % scope.parent_type.declaration_code("p")) 
-        if base_type: 
-            tp_new = TypeSlots.get_base_slot_function(scope, tp_slot) 
-            if tp_new is None: 
-                tp_new = "%s->tp_new" % base_type.typeptr_cname 
-            code.putln("PyObject *o = %s(t, a, k);" % tp_new) 
-        else: 
-            code.putln("PyObject *o;") 
-            if freelist_size: 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("IncludeStringH", "StringTools.c")) 
-                if is_final_type: 
-                    type_safety_check = '' 
-                else: 
-                    type_safety_check = ' & ((t->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)) == 0)' 
-                obj_struct = type.declaration_code("", deref=True) 
+
+        need_self_cast = (type.vtabslot_cname or
+                          (py_buffers or memoryview_slices or py_attrs) or
+                          cpp_class_attrs)
+        if need_self_cast:
+            code.putln("%s;" % scope.parent_type.declaration_code("p"))
+        if base_type:
+            tp_new = TypeSlots.get_base_slot_function(scope, tp_slot)
+            if tp_new is None:
+                tp_new = "%s->tp_new" % base_type.typeptr_cname
+            code.putln("PyObject *o = %s(t, a, k);" % tp_new)
+        else:
+            code.putln("PyObject *o;")
+            if freelist_size:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
+                if is_final_type:
+                    type_safety_check = ''
+                else:
+                    type_safety_check = ' & ((t->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)) == 0)'
+                obj_struct = type.declaration_code("", deref=True)
                 code.putln(
                     "if (CYTHON_COMPILING_IN_CPYTHON && likely((%s > 0) & (t->tp_basicsize == sizeof(%s))%s)) {" % (
                         freecount_name, obj_struct, type_safety_check))
-                code.putln("o = (PyObject*)%s[--%s];" % ( 
-                    freelist_name, freecount_name)) 
-                code.putln("memset(o, 0, sizeof(%s));" % obj_struct) 
-                code.putln("(void) PyObject_INIT(o, t);") 
-                if scope.needs_gc(): 
-                    code.putln("PyObject_GC_Track(o);") 
-                code.putln("} else {") 
-            if not is_final_type: 
-                code.putln("if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {") 
-            code.putln("o = (*t->tp_alloc)(t, 0);") 
-            if not is_final_type: 
-                code.putln("} else {") 
-                code.putln("o = (PyObject *) PyBaseObject_Type.tp_new(t, %s, 0);" % Naming.empty_tuple) 
-                code.putln("}") 
-        code.putln("if (unlikely(!o)) return 0;") 
-        if freelist_size and not base_type: 
-            code.putln('}') 
-        if need_self_cast: 
-            code.putln("p = %s;" % type.cast_code("o")) 
-        #if need_self_cast: 
-        #    self.generate_self_cast(scope, code) 
+                code.putln("o = (PyObject*)%s[--%s];" % (
+                    freelist_name, freecount_name))
+                code.putln("memset(o, 0, sizeof(%s));" % obj_struct)
+                code.putln("(void) PyObject_INIT(o, t);")
+                if scope.needs_gc():
+                    code.putln("PyObject_GC_Track(o);")
+                code.putln("} else {")
+            if not is_final_type:
+                code.putln("if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {")
+            code.putln("o = (*t->tp_alloc)(t, 0);")
+            if not is_final_type:
+                code.putln("} else {")
+                code.putln("o = (PyObject *) PyBaseObject_Type.tp_new(t, %s, 0);" % Naming.empty_tuple)
+                code.putln("}")
+        code.putln("if (unlikely(!o)) return 0;")
+        if freelist_size and not base_type:
+            code.putln('}')
+        if need_self_cast:
+            code.putln("p = %s;" % type.cast_code("o"))
+        #if need_self_cast:
+        #    self.generate_self_cast(scope, code)
 
         # from this point on, ensure DECREF(o) on failure
         needs_error_cleanup = False
 
-        if type.vtabslot_cname: 
-            vtab_base_type = type 
-            while vtab_base_type.base_type and vtab_base_type.base_type.vtabstruct_cname: 
-                vtab_base_type = vtab_base_type.base_type 
-            if vtab_base_type is not type: 
-                struct_type_cast = "(struct %s*)" % vtab_base_type.vtabstruct_cname 
-            else: 
-                struct_type_cast = "" 
-            code.putln("p->%s = %s%s;" % ( 
-                type.vtabslot_cname, 
-                struct_type_cast, type.vtabptr_cname)) 
- 
-        for entry in cpp_class_attrs: 
+        if type.vtabslot_cname:
+            vtab_base_type = type
+            while vtab_base_type.base_type and vtab_base_type.base_type.vtabstruct_cname:
+                vtab_base_type = vtab_base_type.base_type
+            if vtab_base_type is not type:
+                struct_type_cast = "(struct %s*)" % vtab_base_type.vtabstruct_cname
+            else:
+                struct_type_cast = ""
+            code.putln("p->%s = %s%s;" % (
+                type.vtabslot_cname,
+                struct_type_cast, type.vtabptr_cname))
+
+        for entry in cpp_class_attrs:
             code.putln("new((void*)&(p->%s)) %s();" % (
                 entry.cname, entry.type.empty_declaration_code()))
- 
-        for entry in py_attrs: 
+
+        for entry in py_attrs:
             if entry.name == "__dict__":
                 needs_error_cleanup = True
                 code.put("p->%s = PyDict_New(); if (unlikely(!p->%s)) goto bad;" % (
                     entry.cname, entry.cname))
             else:
                 code.put_init_var_to_py_none(entry, "p->%s", nanny=False)
- 
-        for entry in memoryview_slices: 
-            code.putln("p->%s.data = NULL;" % entry.cname) 
-            code.putln("p->%s.memview = NULL;" % entry.cname) 
- 
-        for entry in py_buffers: 
-            code.putln("p->%s.obj = NULL;" % entry.cname) 
- 
-        if cclass_entry.cname == '__pyx_memoryviewslice': 
-            code.putln("p->from_slice.memview = NULL;") 
- 
-        if new_func_entry and new_func_entry.is_special: 
-            if new_func_entry.trivial_signature: 
-                cinit_args = "o, %s, NULL" % Naming.empty_tuple 
-            else: 
-                cinit_args = "o, a, k" 
+
+        for entry in memoryview_slices:
+            code.putln("p->%s.data = NULL;" % entry.cname)
+            code.putln("p->%s.memview = NULL;" % entry.cname)
+
+        for entry in py_buffers:
+            code.putln("p->%s.obj = NULL;" % entry.cname)
+
+        if cclass_entry.cname == '__pyx_memoryviewslice':
+            code.putln("p->from_slice.memview = NULL;")
+
+        if new_func_entry and new_func_entry.is_special:
+            if new_func_entry.trivial_signature:
+                cinit_args = "o, %s, NULL" % Naming.empty_tuple
+            else:
+                cinit_args = "o, a, k"
             needs_error_cleanup = True
             code.putln("if (unlikely(%s(%s) < 0)) goto bad;" % (
                 new_func_entry.func_cname, cinit_args))
 
-        code.putln( 
-            "return o;") 
+        code.putln(
+            "return o;")
         if needs_error_cleanup:
             code.putln("bad:")
             code.put_decref_clear("o", py_object_type, nanny=False)
             code.putln("return NULL;")
-        code.putln( 
-            "}") 
- 
-    def generate_dealloc_function(self, scope, code): 
-        tp_slot = TypeSlots.ConstructorSlot("tp_dealloc", '__dealloc__') 
-        slot_func = scope.mangle_internal("tp_dealloc") 
-        base_type = scope.parent_type.base_type 
-        if tp_slot.slot_code(scope) != slot_func: 
-            return  # never used 
- 
-        slot_func_cname = scope.mangle_internal("tp_dealloc") 
-        code.putln("") 
-        code.putln( 
-            "static void %s(PyObject *o) {" % slot_func_cname) 
- 
-        is_final_type = scope.parent_type.is_final_type 
-        needs_gc = scope.needs_gc() 
- 
+        code.putln(
+            "}")
+
+    def generate_dealloc_function(self, scope, code):
+        tp_slot = TypeSlots.ConstructorSlot("tp_dealloc", '__dealloc__')
+        slot_func = scope.mangle_internal("tp_dealloc")
+        base_type = scope.parent_type.base_type
+        if tp_slot.slot_code(scope) != slot_func:
+            return  # never used
+
+        slot_func_cname = scope.mangle_internal("tp_dealloc")
+        code.putln("")
+        code.putln(
+            "static void %s(PyObject *o) {" % slot_func_cname)
+
+        is_final_type = scope.parent_type.is_final_type
+        needs_gc = scope.needs_gc()
+
         weakref_slot = scope.lookup_here("__weakref__") if not scope.is_closure_class_scope else None
-        if weakref_slot not in scope.var_entries: 
-            weakref_slot = None 
- 
+        if weakref_slot not in scope.var_entries:
+            weakref_slot = None
+
         dict_slot = scope.lookup_here("__dict__") if not scope.is_closure_class_scope else None
         if dict_slot not in scope.var_entries:
             dict_slot = None
 
-        _, (py_attrs, _, memoryview_slices) = scope.get_refcounted_entries() 
-        cpp_class_attrs = [entry for entry in scope.var_entries 
-                           if entry.type.is_cpp_class] 
- 
+        _, (py_attrs, _, memoryview_slices) = scope.get_refcounted_entries()
+        cpp_class_attrs = [entry for entry in scope.var_entries
+                           if entry.type.is_cpp_class]
+
         if py_attrs or cpp_class_attrs or memoryview_slices or weakref_slot or dict_slot:
-            self.generate_self_cast(scope, code) 
- 
-        if not is_final_type: 
-            # in Py3.4+, call tp_finalize() as early as possible 
+            self.generate_self_cast(scope, code)
+
+        if not is_final_type:
+            # in Py3.4+, call tp_finalize() as early as possible
             code.putln("#if CYTHON_USE_TP_FINALIZE")
-            if needs_gc: 
-                finalised_check = '!_PyGC_FINALIZED(o)' 
-            else: 
-                finalised_check = ( 
-                    '(!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))') 
+            if needs_gc:
+                finalised_check = '!_PyGC_FINALIZED(o)'
+            else:
+                finalised_check = (
+                    '(!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))')
             code.putln(
                 "if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)"
                 " && Py_TYPE(o)->tp_finalize) && %s) {" % finalised_check)
-            # if instance was resurrected by finaliser, return 
-            code.putln("if (PyObject_CallFinalizerFromDealloc(o)) return;") 
-            code.putln("}") 
-            code.putln("#endif") 
- 
-        if needs_gc: 
-            # We must mark this object as (gc) untracked while tearing 
-            # it down, lest the garbage collection is invoked while 
-            # running this destructor. 
-            code.putln("PyObject_GC_UnTrack(o);") 
- 
-        # call the user's __dealloc__ 
-        self.generate_usr_dealloc_call(scope, code) 
- 
-        if weakref_slot: 
-            code.putln("if (p->__weakref__) PyObject_ClearWeakRefs(o);") 
- 
+            # if instance was resurrected by finaliser, return
+            code.putln("if (PyObject_CallFinalizerFromDealloc(o)) return;")
+            code.putln("}")
+            code.putln("#endif")
+
+        if needs_gc:
+            # We must mark this object as (gc) untracked while tearing
+            # it down, lest the garbage collection is invoked while
+            # running this destructor.
+            code.putln("PyObject_GC_UnTrack(o);")
+
+        # call the user's __dealloc__
+        self.generate_usr_dealloc_call(scope, code)
+
+        if weakref_slot:
+            code.putln("if (p->__weakref__) PyObject_ClearWeakRefs(o);")
+
         if dict_slot:
             code.putln("if (p->__dict__) PyDict_Clear(p->__dict__);")
 
-        for entry in cpp_class_attrs: 
+        for entry in cpp_class_attrs:
             code.putln("__Pyx_call_destructor(p->%s);" % entry.cname)
- 
-        for entry in py_attrs: 
-            code.put_xdecref_clear("p->%s" % entry.cname, entry.type, nanny=False, 
-                                   clear_before_decref=True) 
- 
-        for entry in memoryview_slices: 
-            code.put_xdecref_memoryviewslice("p->%s" % entry.cname, 
-                                             have_gil=True) 
- 
-        if base_type: 
-            if needs_gc: 
-                # The base class deallocator probably expects this to be tracked, 
-                # so undo the untracking above. 
-                if base_type.scope and base_type.scope.needs_gc(): 
-                    code.putln("PyObject_GC_Track(o);") 
-                else: 
+
+        for entry in py_attrs:
+            code.put_xdecref_clear("p->%s" % entry.cname, entry.type, nanny=False,
+                                   clear_before_decref=True)
+
+        for entry in memoryview_slices:
+            code.put_xdecref_memoryviewslice("p->%s" % entry.cname,
+                                             have_gil=True)
+
+        if base_type:
+            if needs_gc:
+                # The base class deallocator probably expects this to be tracked,
+                # so undo the untracking above.
+                if base_type.scope and base_type.scope.needs_gc():
+                    code.putln("PyObject_GC_Track(o);")
+                else:
                     code.putln("#if CYTHON_USE_TYPE_SLOTS")
-                    code.putln("if (PyType_IS_GC(Py_TYPE(o)->tp_base))") 
-                    code.putln("#endif") 
-                    code.putln("PyObject_GC_Track(o);") 
- 
-            tp_dealloc = TypeSlots.get_base_slot_function(scope, tp_slot) 
-            if tp_dealloc is not None: 
-                code.putln("%s(o);" % tp_dealloc) 
-            elif base_type.is_builtin_type: 
-                code.putln("%s->tp_dealloc(o);" % base_type.typeptr_cname) 
-            else: 
-                # This is an externally defined type.  Calling through the 
-                # cimported base type pointer directly interacts badly with 
-                # the module cleanup, which may already have cleared it. 
-                # In that case, fall back to traversing the type hierarchy. 
-                base_cname = base_type.typeptr_cname 
-                code.putln("if (likely(%s)) %s->tp_dealloc(o); " 
-                           "else __Pyx_call_next_tp_dealloc(o, %s);" % ( 
-                               base_cname, base_cname, slot_func_cname)) 
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("CallNextTpDealloc", "ExtensionTypes.c")) 
-        else: 
-            freelist_size = scope.directives.get('freelist', 0) 
-            if freelist_size: 
-                freelist_name = scope.mangle_internal(Naming.freelist_name) 
-                freecount_name = scope.mangle_internal(Naming.freecount_name) 
- 
-                if is_final_type: 
-                    type_safety_check = '' 
-                else: 
-                    type_safety_check = ( 
-                        ' & ((Py_TYPE(o)->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)) == 0)') 
- 
-                type = scope.parent_type 
+                    code.putln("if (PyType_IS_GC(Py_TYPE(o)->tp_base))")
+                    code.putln("#endif")
+                    code.putln("PyObject_GC_Track(o);")
+
+            tp_dealloc = TypeSlots.get_base_slot_function(scope, tp_slot)
+            if tp_dealloc is not None:
+                code.putln("%s(o);" % tp_dealloc)
+            elif base_type.is_builtin_type:
+                code.putln("%s->tp_dealloc(o);" % base_type.typeptr_cname)
+            else:
+                # This is an externally defined type.  Calling through the
+                # cimported base type pointer directly interacts badly with
+                # the module cleanup, which may already have cleared it.
+                # In that case, fall back to traversing the type hierarchy.
+                base_cname = base_type.typeptr_cname
+                code.putln("if (likely(%s)) %s->tp_dealloc(o); "
+                           "else __Pyx_call_next_tp_dealloc(o, %s);" % (
+                               base_cname, base_cname, slot_func_cname))
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("CallNextTpDealloc", "ExtensionTypes.c"))
+        else:
+            freelist_size = scope.directives.get('freelist', 0)
+            if freelist_size:
+                freelist_name = scope.mangle_internal(Naming.freelist_name)
+                freecount_name = scope.mangle_internal(Naming.freecount_name)
+
+                if is_final_type:
+                    type_safety_check = ''
+                else:
+                    type_safety_check = (
+                        ' & ((Py_TYPE(o)->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)) == 0)')
+
+                type = scope.parent_type
                 code.putln(
                     "if (CYTHON_COMPILING_IN_CPYTHON && ((%s < %d) & (Py_TYPE(o)->tp_basicsize == sizeof(%s))%s)) {" % (
                         freecount_name,
                         freelist_size,
                         type.declaration_code("", deref=True),
                         type_safety_check))
-                code.putln("%s[%s++] = %s;" % ( 
-                    freelist_name, freecount_name, type.cast_code("o"))) 
-                code.putln("} else {") 
-            code.putln("(*Py_TYPE(o)->tp_free)(o);") 
-            if freelist_size: 
-                code.putln("}") 
-        code.putln( 
-            "}") 
- 
-    def generate_usr_dealloc_call(self, scope, code): 
-        entry = scope.lookup_here("__dealloc__") 
-        if not entry: 
-            return 
- 
-        code.putln("{") 
-        code.putln("PyObject *etype, *eval, *etb;") 
-        code.putln("PyErr_Fetch(&etype, &eval, &etb);") 
+                code.putln("%s[%s++] = %s;" % (
+                    freelist_name, freecount_name, type.cast_code("o")))
+                code.putln("} else {")
+            code.putln("(*Py_TYPE(o)->tp_free)(o);")
+            if freelist_size:
+                code.putln("}")
+        code.putln(
+            "}")
+
+    def generate_usr_dealloc_call(self, scope, code):
+        entry = scope.lookup_here("__dealloc__")
+        if not entry:
+            return
+
+        code.putln("{")
+        code.putln("PyObject *etype, *eval, *etb;")
+        code.putln("PyErr_Fetch(&etype, &eval, &etb);")
         # increase the refcount while we are calling into user code
         # to prevent recursive deallocation
         code.putln("__Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1);")
-        code.putln("%s(o);" % entry.func_cname) 
+        code.putln("%s(o);" % entry.func_cname)
         code.putln("__Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1);")
-        code.putln("PyErr_Restore(etype, eval, etb);") 
-        code.putln("}") 
- 
-    def generate_traverse_function(self, scope, code, cclass_entry): 
-        tp_slot = TypeSlots.GCDependentSlot("tp_traverse") 
-        slot_func = scope.mangle_internal("tp_traverse") 
-        base_type = scope.parent_type.base_type 
-        if tp_slot.slot_code(scope) != slot_func: 
+        code.putln("PyErr_Restore(etype, eval, etb);")
+        code.putln("}")
+
+    def generate_traverse_function(self, scope, code, cclass_entry):
+        tp_slot = TypeSlots.GCDependentSlot("tp_traverse")
+        slot_func = scope.mangle_internal("tp_traverse")
+        base_type = scope.parent_type.base_type
+        if tp_slot.slot_code(scope) != slot_func:
             return  # never used
-        code.putln("") 
-        code.putln( 
+        code.putln("")
+        code.putln(
             "static int %s(PyObject *o, visitproc v, void *a) {" % slot_func)
- 
-        have_entries, (py_attrs, py_buffers, memoryview_slices) = ( 
-            scope.get_refcounted_entries(include_gc_simple=False)) 
- 
-        if base_type or py_attrs: 
-            code.putln("int e;") 
- 
-        if py_attrs or py_buffers: 
-            self.generate_self_cast(scope, code) 
- 
-        if base_type: 
-            # want to call it explicitly if possible so inlining can be performed 
-            static_call = TypeSlots.get_base_slot_function(scope, tp_slot) 
-            if static_call: 
-                code.putln("e = %s(o, v, a); if (e) return e;" % static_call) 
-            elif base_type.is_builtin_type: 
-                base_cname = base_type.typeptr_cname 
-                code.putln("if (!%s->tp_traverse); else { e = %s->tp_traverse(o,v,a); if (e) return e; }" % ( 
-                    base_cname, base_cname)) 
-            else: 
-                # This is an externally defined type.  Calling through the 
-                # cimported base type pointer directly interacts badly with 
-                # the module cleanup, which may already have cleared it. 
-                # In that case, fall back to traversing the type hierarchy. 
-                base_cname = base_type.typeptr_cname 
+
+        have_entries, (py_attrs, py_buffers, memoryview_slices) = (
+            scope.get_refcounted_entries(include_gc_simple=False))
+
+        if base_type or py_attrs:
+            code.putln("int e;")
+
+        if py_attrs or py_buffers:
+            self.generate_self_cast(scope, code)
+
+        if base_type:
+            # want to call it explicitly if possible so inlining can be performed
+            static_call = TypeSlots.get_base_slot_function(scope, tp_slot)
+            if static_call:
+                code.putln("e = %s(o, v, a); if (e) return e;" % static_call)
+            elif base_type.is_builtin_type:
+                base_cname = base_type.typeptr_cname
+                code.putln("if (!%s->tp_traverse); else { e = %s->tp_traverse(o,v,a); if (e) return e; }" % (
+                    base_cname, base_cname))
+            else:
+                # This is an externally defined type.  Calling through the
+                # cimported base type pointer directly interacts badly with
+                # the module cleanup, which may already have cleared it.
+                # In that case, fall back to traversing the type hierarchy.
+                base_cname = base_type.typeptr_cname
                 code.putln(
                     "e = ((likely(%s)) ? ((%s->tp_traverse) ? %s->tp_traverse(o, v, a) : 0) : "
                     "__Pyx_call_next_tp_traverse(o, v, a, %s)); if (e) return e;" % (
                         base_cname, base_cname, base_cname, slot_func))
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("CallNextTpTraverse", "ExtensionTypes.c")) 
- 
-        for entry in py_attrs: 
-            var_code = "p->%s" % entry.cname 
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("CallNextTpTraverse", "ExtensionTypes.c"))
+
+        for entry in py_attrs:
+            var_code = "p->%s" % entry.cname
             var_as_pyobject = PyrexTypes.typecast(py_object_type, entry.type, var_code)
             code.putln("if (%s) {" % var_code)
             code.putln("e = (*v)(%s, a); if (e) return e;" % var_as_pyobject)
             code.putln("}")
- 
-        # Traverse buffer exporting objects. 
-        # Note: not traversing memoryview attributes of memoryview slices! 
-        # When triggered by the GC, it would cause multiple visits (gc_refs 
-        # subtractions which is not matched by its reference count!) 
-        for entry in py_buffers: 
-            cname = entry.cname + ".obj" 
-            code.putln("if (p->%s) {" % cname) 
+
+        # Traverse buffer exporting objects.
+        # Note: not traversing memoryview attributes of memoryview slices!
+        # When triggered by the GC, it would cause multiple visits (gc_refs
+        # subtractions which is not matched by its reference count!)
+        for entry in py_buffers:
+            cname = entry.cname + ".obj"
+            code.putln("if (p->%s) {" % cname)
             code.putln("e = (*v)(p->%s, a); if (e) return e;" % cname)
-            code.putln("}") 
- 
+            code.putln("}")
+
         code.putln("return 0;")
         code.putln("}")
- 
-    def generate_clear_function(self, scope, code, cclass_entry): 
+
+    def generate_clear_function(self, scope, code, cclass_entry):
         tp_slot = TypeSlots.get_slot_by_name("tp_clear")
-        slot_func = scope.mangle_internal("tp_clear") 
-        base_type = scope.parent_type.base_type 
-        if tp_slot.slot_code(scope) != slot_func: 
-            return # never used 
- 
-        have_entries, (py_attrs, py_buffers, memoryview_slices) = ( 
-            scope.get_refcounted_entries(include_gc_simple=False)) 
- 
-        if py_attrs or py_buffers or base_type: 
-            unused = '' 
-        else: 
-            unused = 'CYTHON_UNUSED ' 
- 
-        code.putln("") 
-        code.putln("static int %s(%sPyObject *o) {" % (slot_func, unused)) 
- 
-        if py_attrs and Options.clear_to_none: 
-            code.putln("PyObject* tmp;") 
- 
-        if py_attrs or py_buffers: 
-            self.generate_self_cast(scope, code) 
- 
-        if base_type: 
-            # want to call it explicitly if possible so inlining can be performed 
-            static_call = TypeSlots.get_base_slot_function(scope, tp_slot) 
-            if static_call: 
-                code.putln("%s(o);" % static_call) 
-            elif base_type.is_builtin_type: 
-                base_cname = base_type.typeptr_cname 
-                code.putln("if (!%s->tp_clear); else %s->tp_clear(o);" % ( 
-                    base_cname, base_cname)) 
-            else: 
-                # This is an externally defined type.  Calling through the 
-                # cimported base type pointer directly interacts badly with 
-                # the module cleanup, which may already have cleared it. 
-                # In that case, fall back to traversing the type hierarchy. 
-                base_cname = base_type.typeptr_cname 
+        slot_func = scope.mangle_internal("tp_clear")
+        base_type = scope.parent_type.base_type
+        if tp_slot.slot_code(scope) != slot_func:
+            return # never used
+
+        have_entries, (py_attrs, py_buffers, memoryview_slices) = (
+            scope.get_refcounted_entries(include_gc_simple=False))
+
+        if py_attrs or py_buffers or base_type:
+            unused = ''
+        else:
+            unused = 'CYTHON_UNUSED '
+
+        code.putln("")
+        code.putln("static int %s(%sPyObject *o) {" % (slot_func, unused))
+
+        if py_attrs and Options.clear_to_none:
+            code.putln("PyObject* tmp;")
+
+        if py_attrs or py_buffers:
+            self.generate_self_cast(scope, code)
+
+        if base_type:
+            # want to call it explicitly if possible so inlining can be performed
+            static_call = TypeSlots.get_base_slot_function(scope, tp_slot)
+            if static_call:
+                code.putln("%s(o);" % static_call)
+            elif base_type.is_builtin_type:
+                base_cname = base_type.typeptr_cname
+                code.putln("if (!%s->tp_clear); else %s->tp_clear(o);" % (
+                    base_cname, base_cname))
+            else:
+                # This is an externally defined type.  Calling through the
+                # cimported base type pointer directly interacts badly with
+                # the module cleanup, which may already have cleared it.
+                # In that case, fall back to traversing the type hierarchy.
+                base_cname = base_type.typeptr_cname
                 code.putln(
                     "if (likely(%s)) { if (%s->tp_clear) %s->tp_clear(o); } else __Pyx_call_next_tp_clear(o, %s);" % (
                         base_cname, base_cname, base_cname, slot_func))
-                code.globalstate.use_utility_code( 
-                    UtilityCode.load_cached("CallNextTpClear", "ExtensionTypes.c")) 
- 
-        if Options.clear_to_none: 
-            for entry in py_attrs: 
-                name = "p->%s" % entry.cname 
-                code.putln("tmp = ((PyObject*)%s);" % name) 
-                if entry.is_declared_generic: 
-                    code.put_init_to_py_none(name, py_object_type, nanny=False) 
-                else: 
-                    code.put_init_to_py_none(name, entry.type, nanny=False) 
-                code.putln("Py_XDECREF(tmp);") 
-        else: 
-            for entry in py_attrs: 
-                code.putln("Py_CLEAR(p->%s);" % entry.cname) 
- 
-        for entry in py_buffers: 
-            # Note: shouldn't this call __Pyx_ReleaseBuffer ?? 
-            code.putln("Py_CLEAR(p->%s.obj);" % entry.cname) 
- 
-        if cclass_entry.cname == '__pyx_memoryviewslice': 
-            code.putln("__PYX_XDEC_MEMVIEW(&p->from_slice, 1);") 
- 
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("CallNextTpClear", "ExtensionTypes.c"))
+
+        if Options.clear_to_none:
+            for entry in py_attrs:
+                name = "p->%s" % entry.cname
+                code.putln("tmp = ((PyObject*)%s);" % name)
+                if entry.is_declared_generic:
+                    code.put_init_to_py_none(name, py_object_type, nanny=False)
+                else:
+                    code.put_init_to_py_none(name, entry.type, nanny=False)
+                code.putln("Py_XDECREF(tmp);")
+        else:
+            for entry in py_attrs:
+                code.putln("Py_CLEAR(p->%s);" % entry.cname)
+
+        for entry in py_buffers:
+            # Note: shouldn't this call __Pyx_ReleaseBuffer ??
+            code.putln("Py_CLEAR(p->%s.obj);" % entry.cname)
+
+        if cclass_entry.cname == '__pyx_memoryviewslice':
+            code.putln("__PYX_XDEC_MEMVIEW(&p->from_slice, 1);")
+
         code.putln("return 0;")
         code.putln("}")
- 
-    def generate_getitem_int_function(self, scope, code): 
-        # This function is put into the sq_item slot when 
-        # a __getitem__ method is present. It converts its 
-        # argument to a Python integer and calls mp_subscript. 
-        code.putln( 
+
+    def generate_getitem_int_function(self, scope, code):
+        # This function is put into the sq_item slot when
+        # a __getitem__ method is present. It converts its
+        # argument to a Python integer and calls mp_subscript.
+        code.putln(
             "static PyObject *%s(PyObject *o, Py_ssize_t i) {" % (
                 scope.mangle_internal("sq_item")))
-        code.putln( 
+        code.putln(
             "PyObject *r;")
-        code.putln( 
+        code.putln(
             "PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0;")
-        code.putln( 
+        code.putln(
             "r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x);")
-        code.putln( 
+        code.putln(
             "Py_DECREF(x);")
-        code.putln( 
+        code.putln(
             "return r;")
-        code.putln( 
-            "}") 
- 
-    def generate_ass_subscript_function(self, scope, code): 
-        # Setting and deleting an item are both done through 
-        # the ass_subscript method, so we dispatch to user's __setitem__ 
-        # or __delitem__, or raise an exception. 
-        base_type = scope.parent_type.base_type 
-        set_entry = scope.lookup_here("__setitem__") 
-        del_entry = scope.lookup_here("__delitem__") 
-        code.putln("") 
-        code.putln( 
+        code.putln(
+            "}")
+
+    def generate_ass_subscript_function(self, scope, code):
+        # Setting and deleting an item are both done through
+        # the ass_subscript method, so we dispatch to user's __setitem__
+        # or __delitem__, or raise an exception.
+        base_type = scope.parent_type.base_type
+        set_entry = scope.lookup_here("__setitem__")
+        del_entry = scope.lookup_here("__delitem__")
+        code.putln("")
+        code.putln(
             "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % (
                 scope.mangle_internal("mp_ass_subscript")))
-        code.putln( 
+        code.putln(
             "if (v) {")
-        if set_entry: 
+        if set_entry:
             code.putln("return %s(o, i, v);" % set_entry.func_cname)
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code)
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
-            code.putln( 
+            code.putln(
                 '  "Subscript assignment not supported by %.200s", Py_TYPE(o)->tp_name);')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
+        code.putln(
             "else {")
-        if del_entry: 
-            code.putln( 
+        if del_entry:
+            code.putln(
                 "return %s(o, i);" % (
                     del_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code)
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
-            code.putln( 
+            code.putln(
                 '  "Subscript deletion not supported by %.200s", Py_TYPE(o)->tp_name);')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
-            "}") 
- 
-    def generate_guarded_basetype_call( 
-            self, base_type, substructure, slot, args, code): 
-        if base_type: 
-            base_tpname = base_type.typeptr_cname 
-            if substructure: 
-                code.putln( 
-                    "if (%s->%s && %s->%s->%s)" % ( 
-                        base_tpname, substructure, base_tpname, substructure, slot)) 
-                code.putln( 
-                    "  return %s->%s->%s(%s);" % ( 
-                        base_tpname, substructure, slot, args)) 
-            else: 
-                code.putln( 
-                    "if (%s->%s)" % ( 
-                        base_tpname, slot)) 
-                code.putln( 
-                    "  return %s->%s(%s);" % ( 
-                        base_tpname, slot, args)) 
- 
-    def generate_ass_slice_function(self, scope, code): 
-        # Setting and deleting a slice are both done through 
-        # the ass_slice method, so we dispatch to user's __setslice__ 
-        # or __delslice__, or raise an exception. 
-        base_type = scope.parent_type.base_type 
-        set_entry = scope.lookup_here("__setslice__") 
-        del_entry = scope.lookup_here("__delslice__") 
-        code.putln("") 
-        code.putln( 
+        code.putln(
+            "}")
+
+    def generate_guarded_basetype_call(
+            self, base_type, substructure, slot, args, code):
+        if base_type:
+            base_tpname = base_type.typeptr_cname
+            if substructure:
+                code.putln(
+                    "if (%s->%s && %s->%s->%s)" % (
+                        base_tpname, substructure, base_tpname, substructure, slot))
+                code.putln(
+                    "  return %s->%s->%s(%s);" % (
+                        base_tpname, substructure, slot, args))
+            else:
+                code.putln(
+                    "if (%s->%s)" % (
+                        base_tpname, slot))
+                code.putln(
+                    "  return %s->%s(%s);" % (
+                        base_tpname, slot, args))
+
+    def generate_ass_slice_function(self, scope, code):
+        # Setting and deleting a slice are both done through
+        # the ass_slice method, so we dispatch to user's __setslice__
+        # or __delslice__, or raise an exception.
+        base_type = scope.parent_type.base_type
+        set_entry = scope.lookup_here("__setslice__")
+        del_entry = scope.lookup_here("__delslice__")
+        code.putln("")
+        code.putln(
             "static int %s(PyObject *o, Py_ssize_t i, Py_ssize_t j, PyObject *v) {" % (
                 scope.mangle_internal("sq_ass_slice")))
-        code.putln( 
+        code.putln(
             "if (v) {")
-        if set_entry: 
-            code.putln( 
+        if set_entry:
+            code.putln(
                 "return %s(o, i, j, v);" % (
                     set_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code)
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
-            code.putln( 
+            code.putln(
                 '  "2-element slice assignment not supported by %.200s", Py_TYPE(o)->tp_name);')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
+        code.putln(
             "else {")
-        if del_entry: 
-            code.putln( 
+        if del_entry:
+            code.putln(
                 "return %s(o, i, j);" % (
                     del_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code)
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
-            code.putln( 
+            code.putln(
                 '  "2-element slice deletion not supported by %.200s", Py_TYPE(o)->tp_name);')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
-            "}") 
- 
+        code.putln(
+            "}")
+
     def generate_richcmp_function(self, scope, code):
         if scope.lookup_here("__richcmp__"):
             # user implemented, nothing to do
@@ -1911,15 +1911,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("}")  # switch
         code.putln("}")
 
-    def generate_getattro_function(self, scope, code): 
-        # First try to get the attribute using __getattribute__, if defined, or 
-        # PyObject_GenericGetAttr. 
-        # 
-        # If that raises an AttributeError, call the __getattr__ if defined. 
-        # 
-        # In both cases, defined can be in this class, or any base class. 
+    def generate_getattro_function(self, scope, code):
+        # First try to get the attribute using __getattribute__, if defined, or
+        # PyObject_GenericGetAttr.
+        #
+        # If that raises an AttributeError, call the __getattr__ if defined.
+        #
+        # In both cases, defined can be in this class, or any base class.
         def lookup_here_or_base(n, tp=None, extern_return=None):
-            # Recursive lookup 
+            # Recursive lookup
             if tp is None:
                 tp = scope.parent_type
             r = tp.scope.lookup_here(n)
@@ -1931,17 +1931,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             return r
 
         has_instance_dict = lookup_here_or_base("__dict__", extern_return="extern")
-        getattr_entry = lookup_here_or_base("__getattr__") 
-        getattribute_entry = lookup_here_or_base("__getattribute__") 
-        code.putln("") 
-        code.putln( 
+        getattr_entry = lookup_here_or_base("__getattr__")
+        getattribute_entry = lookup_here_or_base("__getattribute__")
+        code.putln("")
+        code.putln(
             "static PyObject *%s(PyObject *o, PyObject *n) {" % (
                 scope.mangle_internal("tp_getattro")))
-        if getattribute_entry is not None: 
-            code.putln( 
+        if getattribute_entry is not None:
+            code.putln(
                 "PyObject *v = %s(o, n);" % (
                     getattribute_entry.func_cname))
-        else: 
+        else:
             if not has_instance_dict and scope.parent_type.is_final_type:
                 # Final with no dict => use faster type attribute lookup.
                 code.globalstate.use_utility_code(
@@ -1954,246 +1954,246 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 generic_getattr_cfunc = "__Pyx_PyObject_GenericGetAttr"
             else:
                 generic_getattr_cfunc = "PyObject_GenericGetAttr"
-            code.putln( 
+            code.putln(
                 "PyObject *v = %s(o, n);" % generic_getattr_cfunc)
-        if getattr_entry is not None: 
-            code.putln( 
-                "if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) {") 
-            code.putln( 
-                "PyErr_Clear();") 
-            code.putln( 
+        if getattr_entry is not None:
+            code.putln(
+                "if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) {")
+            code.putln(
+                "PyErr_Clear();")
+            code.putln(
                 "v = %s(o, n);" % (
                     getattr_entry.func_cname))
-            code.putln( 
-                "}") 
-        code.putln( 
-            "return v;") 
-        code.putln( 
-            "}") 
- 
-    def generate_setattro_function(self, scope, code): 
-        # Setting and deleting an attribute are both done through 
-        # the setattro method, so we dispatch to user's __setattr__ 
-        # or __delattr__ or fall back on PyObject_GenericSetAttr. 
-        base_type = scope.parent_type.base_type 
-        set_entry = scope.lookup_here("__setattr__") 
-        del_entry = scope.lookup_here("__delattr__") 
-        code.putln("") 
-        code.putln( 
+            code.putln(
+                "}")
+        code.putln(
+            "return v;")
+        code.putln(
+            "}")
+
+    def generate_setattro_function(self, scope, code):
+        # Setting and deleting an attribute are both done through
+        # the setattro method, so we dispatch to user's __setattr__
+        # or __delattr__ or fall back on PyObject_GenericSetAttr.
+        base_type = scope.parent_type.base_type
+        set_entry = scope.lookup_here("__setattr__")
+        del_entry = scope.lookup_here("__delattr__")
+        code.putln("")
+        code.putln(
             "static int %s(PyObject *o, PyObject *n, PyObject *v) {" % (
                 scope.mangle_internal("tp_setattro")))
-        code.putln( 
+        code.putln(
             "if (v) {")
-        if set_entry: 
-            code.putln( 
+        if set_entry:
+            code.putln(
                 "return %s(o, n, v);" % (
                     set_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, None, "tp_setattro", "o, n, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, None, "tp_setattro", "o, n, v", code)
+            code.putln(
                 "return PyObject_GenericSetAttr(o, n, v);")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
+        code.putln(
             "else {")
-        if del_entry: 
-            code.putln( 
+        if del_entry:
+            code.putln(
                 "return %s(o, n);" % (
                     del_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, None, "tp_setattro", "o, n, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, None, "tp_setattro", "o, n, v", code)
+            code.putln(
                 "return PyObject_GenericSetAttr(o, n, 0);")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
-            "}") 
- 
-    def generate_descr_get_function(self, scope, code): 
-        # The __get__ function of a descriptor object can be 
-        # called with NULL for the second or third arguments 
-        # under some circumstances, so we replace them with 
-        # None in that case. 
-        user_get_entry = scope.lookup_here("__get__") 
-        code.putln("") 
-        code.putln( 
+        code.putln(
+            "}")
+
+    def generate_descr_get_function(self, scope, code):
+        # The __get__ function of a descriptor object can be
+        # called with NULL for the second or third arguments
+        # under some circumstances, so we replace them with
+        # None in that case.
+        user_get_entry = scope.lookup_here("__get__")
+        code.putln("")
+        code.putln(
             "static PyObject *%s(PyObject *o, PyObject *i, PyObject *c) {" % (
                 scope.mangle_internal("tp_descr_get")))
-        code.putln( 
-            "PyObject *r = 0;") 
-        code.putln( 
-            "if (!i) i = Py_None;") 
-        code.putln( 
-            "if (!c) c = Py_None;") 
-        #code.put_incref("i", py_object_type) 
-        #code.put_incref("c", py_object_type) 
-        code.putln( 
+        code.putln(
+            "PyObject *r = 0;")
+        code.putln(
+            "if (!i) i = Py_None;")
+        code.putln(
+            "if (!c) c = Py_None;")
+        #code.put_incref("i", py_object_type)
+        #code.put_incref("c", py_object_type)
+        code.putln(
             "r = %s(o, i, c);" % (
                 user_get_entry.func_cname))
-        #code.put_decref("i", py_object_type) 
-        #code.put_decref("c", py_object_type) 
-        code.putln( 
-            "return r;") 
-        code.putln( 
-            "}") 
- 
-    def generate_descr_set_function(self, scope, code): 
-        # Setting and deleting are both done through the __set__ 
-        # method of a descriptor, so we dispatch to user's __set__ 
-        # or __delete__ or raise an exception. 
-        base_type = scope.parent_type.base_type 
-        user_set_entry = scope.lookup_here("__set__") 
-        user_del_entry = scope.lookup_here("__delete__") 
-        code.putln("") 
-        code.putln( 
+        #code.put_decref("i", py_object_type)
+        #code.put_decref("c", py_object_type)
+        code.putln(
+            "return r;")
+        code.putln(
+            "}")
+
+    def generate_descr_set_function(self, scope, code):
+        # Setting and deleting are both done through the __set__
+        # method of a descriptor, so we dispatch to user's __set__
+        # or __delete__ or raise an exception.
+        base_type = scope.parent_type.base_type
+        user_set_entry = scope.lookup_here("__set__")
+        user_del_entry = scope.lookup_here("__delete__")
+        code.putln("")
+        code.putln(
             "static int %s(PyObject *o, PyObject *i, PyObject *v) {" % (
                 scope.mangle_internal("tp_descr_set")))
-        code.putln( 
+        code.putln(
             "if (v) {")
-        if user_set_entry: 
-            code.putln( 
+        if user_set_entry:
+            code.putln(
                 "return %s(o, i, v);" % (
                     user_set_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, None, "tp_descr_set", "o, i, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, None, "tp_descr_set", "o, i, v", code)
+            code.putln(
                 'PyErr_SetString(PyExc_NotImplementedError, "__set__");')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
+        code.putln(
             "else {")
-        if user_del_entry: 
-            code.putln( 
+        if user_del_entry:
+            code.putln(
                 "return %s(o, i);" % (
                     user_del_entry.func_cname))
-        else: 
-            self.generate_guarded_basetype_call( 
-                base_type, None, "tp_descr_set", "o, i, v", code) 
-            code.putln( 
+        else:
+            self.generate_guarded_basetype_call(
+                base_type, None, "tp_descr_set", "o, i, v", code)
+            code.putln(
                 'PyErr_SetString(PyExc_NotImplementedError, "__delete__");')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
-            "}") 
- 
-    def generate_property_accessors(self, cclass_scope, code): 
-        for entry in cclass_scope.property_entries: 
-            property_scope = entry.scope 
-            if property_scope.defines_any(["__get__"]): 
-                self.generate_property_get_function(entry, code) 
-            if property_scope.defines_any(["__set__", "__del__"]): 
-                self.generate_property_set_function(entry, code) 
- 
-    def generate_property_get_function(self, property_entry, code): 
-        property_scope = property_entry.scope 
-        property_entry.getter_cname = property_scope.parent_scope.mangle( 
-            Naming.prop_get_prefix, property_entry.name) 
-        get_entry = property_scope.lookup_here("__get__") 
-        code.putln("") 
-        code.putln( 
+        code.putln(
+            "}")
+
+    def generate_property_accessors(self, cclass_scope, code):
+        for entry in cclass_scope.property_entries:
+            property_scope = entry.scope
+            if property_scope.defines_any(["__get__"]):
+                self.generate_property_get_function(entry, code)
+            if property_scope.defines_any(["__set__", "__del__"]):
+                self.generate_property_set_function(entry, code)
+
+    def generate_property_get_function(self, property_entry, code):
+        property_scope = property_entry.scope
+        property_entry.getter_cname = property_scope.parent_scope.mangle(
+            Naming.prop_get_prefix, property_entry.name)
+        get_entry = property_scope.lookup_here("__get__")
+        code.putln("")
+        code.putln(
             "static PyObject *%s(PyObject *o, CYTHON_UNUSED void *x) {" % (
                 property_entry.getter_cname))
-        code.putln( 
+        code.putln(
             "return %s(o);" % (
                 get_entry.func_cname))
-        code.putln( 
-            "}") 
- 
-    def generate_property_set_function(self, property_entry, code): 
-        property_scope = property_entry.scope 
-        property_entry.setter_cname = property_scope.parent_scope.mangle( 
-            Naming.prop_set_prefix, property_entry.name) 
-        set_entry = property_scope.lookup_here("__set__") 
-        del_entry = property_scope.lookup_here("__del__") 
-        code.putln("") 
-        code.putln( 
+        code.putln(
+            "}")
+
+    def generate_property_set_function(self, property_entry, code):
+        property_scope = property_entry.scope
+        property_entry.setter_cname = property_scope.parent_scope.mangle(
+            Naming.prop_set_prefix, property_entry.name)
+        set_entry = property_scope.lookup_here("__set__")
+        del_entry = property_scope.lookup_here("__del__")
+        code.putln("")
+        code.putln(
             "static int %s(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) {" % (
                 property_entry.setter_cname))
-        code.putln( 
+        code.putln(
             "if (v) {")
-        if set_entry: 
-            code.putln( 
+        if set_entry:
+            code.putln(
                 "return %s(o, v);" % (
                     set_entry.func_cname))
-        else: 
-            code.putln( 
+        else:
+            code.putln(
                 'PyErr_SetString(PyExc_NotImplementedError, "__set__");')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
             "}")
-        code.putln( 
+        code.putln(
             "else {")
-        if del_entry: 
-            code.putln( 
+        if del_entry:
+            code.putln(
                 "return %s(o);" % (
                     del_entry.func_cname))
-        else: 
-            code.putln( 
+        else:
+            code.putln(
                 'PyErr_SetString(PyExc_NotImplementedError, "__del__");')
-            code.putln( 
+            code.putln(
                 "return -1;")
-        code.putln( 
+        code.putln(
+            "}")
+        code.putln(
             "}")
-        code.putln( 
-            "}") 
- 
-    def generate_typeobj_definition(self, modname, entry, code): 
-        type = entry.type 
-        scope = type.scope 
-        for suite in TypeSlots.substructures: 
-            suite.generate_substructure(scope, code) 
-        code.putln("") 
-        if entry.visibility == 'public': 
-            header = "DL_EXPORT(PyTypeObject) %s = {" 
-        else: 
-            header = "static PyTypeObject %s = {" 
-        #code.putln(header % scope.parent_type.typeobj_cname) 
-        code.putln(header % type.typeobj_cname) 
-        code.putln( 
-            "PyVarObject_HEAD_INIT(0, 0)") 
-        code.putln( 
-            '"%s.%s", /*tp_name*/' % ( 
-                self.full_module_name, scope.class_name)) 
-        if type.typedef_flag: 
-            objstruct = type.objstruct_cname 
-        else: 
-            objstruct = "struct %s" % type.objstruct_cname 
-        code.putln( 
+
+    def generate_typeobj_definition(self, modname, entry, code):
+        type = entry.type
+        scope = type.scope
+        for suite in TypeSlots.substructures:
+            suite.generate_substructure(scope, code)
+        code.putln("")
+        if entry.visibility == 'public':
+            header = "DL_EXPORT(PyTypeObject) %s = {"
+        else:
+            header = "static PyTypeObject %s = {"
+        #code.putln(header % scope.parent_type.typeobj_cname)
+        code.putln(header % type.typeobj_cname)
+        code.putln(
+            "PyVarObject_HEAD_INIT(0, 0)")
+        code.putln(
+            '"%s.%s", /*tp_name*/' % (
+                self.full_module_name, scope.class_name))
+        if type.typedef_flag:
+            objstruct = type.objstruct_cname
+        else:
+            objstruct = "struct %s" % type.objstruct_cname
+        code.putln(
             "sizeof(%s), /*tp_basicsize*/" % objstruct)
-        code.putln( 
-            "0, /*tp_itemsize*/") 
-        for slot in TypeSlots.slot_table: 
-            slot.generate(scope, code) 
-        code.putln( 
-            "};") 
- 
-    def generate_method_table(self, env, code): 
-        if env.is_c_class_scope and not env.pyfunc_entries: 
-            return 
+        code.putln(
+            "0, /*tp_itemsize*/")
+        for slot in TypeSlots.slot_table:
+            slot.generate(scope, code)
+        code.putln(
+            "};")
+
+    def generate_method_table(self, env, code):
+        if env.is_c_class_scope and not env.pyfunc_entries:
+            return
         binding = env.directives['binding']
 
-        code.putln("") 
+        code.putln("")
         wrapper_code_writer = code.insertion_point()
 
-        code.putln( 
+        code.putln(
             "static PyMethodDef %s[] = {" % (
                 env.method_table_cname))
-        for entry in env.pyfunc_entries: 
+        for entry in env.pyfunc_entries:
             if not entry.fused_cfunction and not (binding and entry.is_overridable):
                 code.put_pymethoddef(entry, ",", wrapper_code_writer=wrapper_code_writer)
-        code.putln( 
+        code.putln(
             "{0, 0, 0, 0}")
-        code.putln( 
-            "};") 
- 
+        code.putln(
+            "};")
+
         if wrapper_code_writer.getvalue():
             wrapper_code_writer.putln("")
 
@@ -2213,31 +2213,31 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("return p->%s;" % dict_name)
         code.putln("}")
 
-    def generate_getset_table(self, env, code): 
-        if env.property_entries: 
-            code.putln("") 
-            code.putln( 
-                "static struct PyGetSetDef %s[] = {" % 
+    def generate_getset_table(self, env, code):
+        if env.property_entries:
+            code.putln("")
+            code.putln(
+                "static struct PyGetSetDef %s[] = {" %
                 env.getset_table_cname)
-            for entry in env.property_entries: 
+            for entry in env.property_entries:
                 doc = entry.doc
                 if doc:
                     if doc.is_unicode:
                         doc = doc.as_utf8_string()
                     doc_code = doc.as_c_string_literal()
-                else: 
-                    doc_code = "0" 
-                code.putln( 
+                else:
+                    doc_code = "0"
+                code.putln(
                     '{(char *)"%s", %s, %s, (char *)%s, 0},' % (
-                        entry.name, 
-                        entry.getter_cname or "0", 
-                        entry.setter_cname or "0", 
-                        doc_code)) 
-            code.putln( 
+                        entry.name,
+                        entry.getter_cname or "0",
+                        entry.setter_cname or "0",
+                        doc_code))
+            code.putln(
                 "{0, 0, 0, 0, 0}")
-            code.putln( 
-                "};") 
- 
+            code.putln(
+                "};")
+
     def create_import_star_conversion_utility_code(self, env):
         # Create all conversion helpers that are needed for "import *" assignments.
         # Must be done before code generation to support CythonUtilityCode.
@@ -2246,29 +2246,29 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 if not entry.type.is_pyobject:
                     entry.type.create_from_py_utility_code(env)
 
-    def generate_import_star(self, env, code): 
+    def generate_import_star(self, env, code):
         env.use_utility_code(UtilityCode.load_cached("CStringEquals", "StringTools.c"))
-        code.putln() 
+        code.putln()
         code.enter_cfunc_scope()  # as we need labels
         code.putln("static int %s(PyObject *o, PyObject* py_name, char *name) {" % Naming.import_star_set)
 
         code.putln("static const char* internal_type_names[] = {")
-        for name, entry in sorted(env.entries.items()): 
-            if entry.is_type: 
-                code.putln('"%s",' % name) 
-        code.putln("0") 
-        code.putln("};") 
+        for name, entry in sorted(env.entries.items()):
+            if entry.is_type:
+                code.putln('"%s",' % name)
+        code.putln("0")
+        code.putln("};")
 
         code.putln("const char** type_name = internal_type_names;")
-        code.putln("while (*type_name) {") 
-        code.putln("if (__Pyx_StrEq(name, *type_name)) {") 
-        code.putln('PyErr_Format(PyExc_TypeError, "Cannot overwrite C type %s", name);') 
-        code.putln('goto bad;') 
-        code.putln("}") 
-        code.putln("type_name++;") 
-        code.putln("}") 
-
-        old_error_label = code.new_error_label() 
+        code.putln("while (*type_name) {")
+        code.putln("if (__Pyx_StrEq(name, *type_name)) {")
+        code.putln('PyErr_Format(PyExc_TypeError, "Cannot overwrite C type %s", name);')
+        code.putln('goto bad;')
+        code.putln("}")
+        code.putln("type_name++;")
+        code.putln("}")
+
+        old_error_label = code.new_error_label()
         code.putln("if (0);")  # so the first one can be "else if"
         msvc_count = 0
         for name, entry in sorted(env.entries.items()):
@@ -2278,62 +2278,62 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     code.putln("#ifdef _MSC_VER")
                     code.putln("if (0);  /* Workaround for MSVC C1061. */")
                     code.putln("#endif")
-                code.putln('else if (__Pyx_StrEq(name, "%s")) {' % name) 
-                if entry.type.is_pyobject: 
-                    if entry.type.is_extension_type or entry.type.is_builtin_type: 
-                        code.putln("if (!(%s)) %s;" % ( 
-                            entry.type.type_test_code("o"), 
-                            code.error_goto(entry.pos))) 
-                    code.putln("Py_INCREF(o);") 
-                    code.put_decref(entry.cname, entry.type, nanny=False) 
-                    code.putln("%s = %s;" % ( 
-                        entry.cname, 
-                        PyrexTypes.typecast(entry.type, py_object_type, "o"))) 
+                code.putln('else if (__Pyx_StrEq(name, "%s")) {' % name)
+                if entry.type.is_pyobject:
+                    if entry.type.is_extension_type or entry.type.is_builtin_type:
+                        code.putln("if (!(%s)) %s;" % (
+                            entry.type.type_test_code("o"),
+                            code.error_goto(entry.pos)))
+                    code.putln("Py_INCREF(o);")
+                    code.put_decref(entry.cname, entry.type, nanny=False)
+                    code.putln("%s = %s;" % (
+                        entry.cname,
+                        PyrexTypes.typecast(entry.type, py_object_type, "o")))
                 elif entry.type.create_from_py_utility_code(env):
                     # if available, utility code was already created in self.prepare_utility_code()
                     code.putln(entry.type.from_py_call_code(
                         'o', entry.cname, entry.pos, code))
-                else: 
+                else:
                     code.putln('PyErr_Format(PyExc_TypeError, "Cannot convert Python object %s to %s");' % (
                         name, entry.type))
-                    code.putln(code.error_goto(entry.pos)) 
-                code.putln("}") 
-        code.putln("else {") 
-        code.putln("if (PyObject_SetAttr(%s, py_name, o) < 0) goto bad;" % Naming.module_cname) 
-        code.putln("}") 
-        code.putln("return 0;") 
-        if code.label_used(code.error_label): 
-            code.put_label(code.error_label) 
-            # This helps locate the offending name. 
-            code.put_add_traceback(self.full_module_name) 
-        code.error_label = old_error_label 
-        code.putln("bad:") 
-        code.putln("return -1;") 
-        code.putln("}") 
+                    code.putln(code.error_goto(entry.pos))
+                code.putln("}")
+        code.putln("else {")
+        code.putln("if (PyObject_SetAttr(%s, py_name, o) < 0) goto bad;" % Naming.module_cname)
+        code.putln("}")
+        code.putln("return 0;")
+        if code.label_used(code.error_label):
+            code.put_label(code.error_label)
+            # This helps locate the offending name.
+            code.put_add_traceback(self.full_module_name)
+        code.error_label = old_error_label
+        code.putln("bad:")
+        code.putln("return -1;")
+        code.putln("}")
         code.putln("")
         code.putln(UtilityCode.load_as_string("ImportStar", "ImportExport.c")[1])
         code.exit_cfunc_scope()  # done with labels
- 
+
     def generate_module_init_func(self, imported_modules, env, options, code):
         subfunction = self.mod_init_subfunction(self.pos, self.scope, code)
 
         code.enter_cfunc_scope(self.scope)
-        code.putln("") 
+        code.putln("")
         code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0])
         init_name = 'init' + (options.init_suffix or env.module_name)
         header2 = "__Pyx_PyMODINIT_FUNC %s(void)" % init_name
         header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env, options)
-        code.putln("#if PY_MAJOR_VERSION < 3") 
+        code.putln("#if PY_MAJOR_VERSION < 3")
         # Optimise for small code size as the module init function is only executed once.
         code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2)
-        code.putln(header2) 
-        code.putln("#else") 
+        code.putln(header2)
+        code.putln("#else")
         code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header3)
-        code.putln(header3) 
+        code.putln(header3)
 
         # CPython 3.5+ supports multi-phase module initialisation (gives access to __spec__, __file__, etc.)
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
-        code.putln("{") 
+        code.putln("{")
         code.putln("return PyModuleDef_Init(&%s);" % Naming.pymoduledef_cname)
         code.putln("}")
 
@@ -2352,14 +2352,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         # start of module init/exec function (pre/post PEP 489)
         code.putln("{")
 
-        tempdecl_code = code.insertion_point() 
- 
+        tempdecl_code = code.insertion_point()
+
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
         if profile or linetrace:
             code.globalstate.use_utility_code(UtilityCode.load_cached("Profile", "Profile.c"))
 
-        code.put_declare_refcount_context() 
+        code.put_declare_refcount_context()
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         # Most extension modules simply can't deal with it, and Cython isn't ready either.
         # See issues listed here: https://docs.python.org/3/c-api/init.html#sub-interpreter-support
@@ -2388,11 +2388,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         refnanny_import_code = UtilityCode.load_as_string("ImportRefnannyAPI", "ModuleSetupCode.c")[1]
         code.putln(refnanny_import_code.rstrip())
-        code.put_setup_refcount_context(header3) 
- 
-        env.use_utility_code(UtilityCode.load("CheckBinaryVersion", "ModuleSetupCode.c")) 
+        code.put_setup_refcount_context(header3)
+
+        env.use_utility_code(UtilityCode.load("CheckBinaryVersion", "ModuleSetupCode.c"))
         code.put_error_if_neg(self.pos, "__Pyx_check_binary_version()")
- 
+
         code.putln("#ifdef __Pxy_PyFrame_Initialize_Offsets")
         code.putln("__Pxy_PyFrame_Initialize_Offsets();")
         code.putln("#endif")
@@ -2402,80 +2402,80 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             Naming.empty_bytes, code.error_goto_if_null(Naming.empty_bytes, self.pos)))
         code.putln("%s = PyUnicode_FromStringAndSize(\"\", 0); %s" % (
             Naming.empty_unicode, code.error_goto_if_null(Naming.empty_unicode, self.pos)))
- 
+
         for ext_type in ('CyFunction', 'FusedFunction', 'Coroutine', 'Generator', 'AsyncGen', 'StopAsyncIteration'):
             code.putln("#ifdef __Pyx_%s_USED" % ext_type)
             code.put_error_if_neg(self.pos, "__pyx_%s_init()" % ext_type)
             code.putln("#endif")
- 
-        code.putln("/*--- Library function declarations ---*/") 
+
+        code.putln("/*--- Library function declarations ---*/")
         if env.directives['np_pythran']:
             code.put_error_if_neg(self.pos, "_import_array()")
- 
-        code.putln("/*--- Threads initialization code ---*/") 
+
+        code.putln("/*--- Threads initialization code ---*/")
         code.putln("#if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 "
                    "&& defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS")
-        code.putln("PyEval_InitThreads();") 
-        code.putln("#endif") 
- 
-        code.putln("/*--- Module creation code ---*/") 
+        code.putln("PyEval_InitThreads();")
+        code.putln("#endif")
+
+        code.putln("/*--- Module creation code ---*/")
         self.generate_module_creation_code(env, options, code)
- 
-        code.putln("/*--- Initialize various global constants etc. ---*/") 
+
+        code.putln("/*--- Initialize various global constants etc. ---*/")
         code.put_error_if_neg(self.pos, "__Pyx_InitGlobals()")
- 
+
         code.putln("#if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || "
                    "__PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)")
         code.put_error_if_neg(self.pos, "__Pyx_init_sys_getdefaultencoding_params()")
-        code.putln("#endif") 
- 
-        code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))) 
+        code.putln("#endif")
+
+        code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__')))
         code.put_error_if_neg(self.pos, 'PyObject_SetAttr(%s, %s, %s)' % (
             env.module_cname,
             code.intern_identifier(EncodedString("__name__")),
             code.intern_identifier(EncodedString("__main__"))))
-        code.putln("}") 
- 
-        # set up __file__ and __path__, then add the module to sys.modules 
-        self.generate_module_import_setup(env, code) 
- 
-        if Options.cache_builtins: 
-            code.putln("/*--- Builtin init code ---*/") 
+        code.putln("}")
+
+        # set up __file__ and __path__, then add the module to sys.modules
+        self.generate_module_import_setup(env, code)
+
+        if Options.cache_builtins:
+            code.putln("/*--- Builtin init code ---*/")
             code.put_error_if_neg(self.pos, "__Pyx_InitCachedBuiltins()")
- 
-        code.putln("/*--- Constants init code ---*/") 
+
+        code.putln("/*--- Constants init code ---*/")
         code.put_error_if_neg(self.pos, "__Pyx_InitCachedConstants()")
- 
+
         code.putln("/*--- Global type/function init code ---*/")
- 
+
         with subfunction("Global init code") as inner_code:
             self.generate_global_init_code(env, inner_code)
- 
+
         with subfunction("Variable export code") as inner_code:
             self.generate_c_variable_export_code(env, inner_code)
- 
+
         with subfunction("Function export code") as inner_code:
             self.generate_c_function_export_code(env, inner_code)
- 
+
         with subfunction("Type init code") as inner_code:
             self.generate_type_init_code(env, inner_code)
- 
+
         with subfunction("Type import code") as inner_code:
             for module in imported_modules:
                 self.generate_type_import_code_for_module(module, env, inner_code)
- 
+
         with subfunction("Variable import code") as inner_code:
             for module in imported_modules:
                 self.generate_c_variable_import_code_for_module(module, env, inner_code)
- 
+
         with subfunction("Function import code") as inner_code:
             for module in imported_modules:
                 self.specialize_fused_types(module)
                 self.generate_c_function_import_code_for_module(module, env, inner_code)
 
-        code.putln("/*--- Execution code ---*/") 
-        code.mark_pos(None) 
- 
+        code.putln("/*--- Execution code ---*/")
+        code.mark_pos(None)
+
         code.putln("#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)")
         code.put_error_if_neg(self.pos, "__Pyx_patch_abc()")
         code.putln("#endif")
@@ -2484,57 +2484,57 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code.put_trace_call(header3, self.pos, nogil=not code.funcstate.gil_owned)
             code.funcstate.can_trace = True
 
-        self.body.generate_execution_code(code) 
- 
+        self.body.generate_execution_code(code)
+
         if profile or linetrace:
             code.funcstate.can_trace = False
             code.put_trace_return("Py_None", nogil=not code.funcstate.gil_owned)
 
-        code.putln() 
-        code.putln("/*--- Wrapped vars code ---*/") 
-        self.generate_wrapped_entries_code(env, code) 
-        code.putln() 
- 
-        if Options.generate_cleanup_code: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("RegisterModuleCleanup", "ModuleSetupCode.c")) 
-            code.putln("if (__Pyx_RegisterCleanup()) %s;" % code.error_goto(self.pos)) 
- 
-        code.put_goto(code.return_label) 
-        code.put_label(code.error_label) 
-        for cname, type in code.funcstate.all_managed_temps(): 
-            code.put_xdecref(cname, type) 
-        code.putln('if (%s) {' % env.module_cname) 
-        code.putln('if (%s) {' % env.module_dict_cname) 
+        code.putln()
+        code.putln("/*--- Wrapped vars code ---*/")
+        self.generate_wrapped_entries_code(env, code)
+        code.putln()
+
+        if Options.generate_cleanup_code:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RegisterModuleCleanup", "ModuleSetupCode.c"))
+            code.putln("if (__Pyx_RegisterCleanup()) %s;" % code.error_goto(self.pos))
+
+        code.put_goto(code.return_label)
+        code.put_label(code.error_label)
+        for cname, type in code.funcstate.all_managed_temps():
+            code.put_xdecref(cname, type)
+        code.putln('if (%s) {' % env.module_cname)
+        code.putln('if (%s) {' % env.module_dict_cname)
         code.put_add_traceback("init %s" % env.qualified_name)
-        code.globalstate.use_utility_code(Nodes.traceback_utility_code) 
+        code.globalstate.use_utility_code(Nodes.traceback_utility_code)
         # Module reference and module dict are in global variables which might still be needed
         # for cleanup, atexit code, etc., so leaking is better than crashing.
         # At least clearing the module dict here might be a good idea, but could still break
         # user code in atexit or other global registries.
         ##code.put_decref_clear(env.module_dict_cname, py_object_type, nanny=False)
-        code.putln('}') 
+        code.putln('}')
         code.put_decref_clear(env.module_cname, py_object_type, nanny=False, clear_before_decref=True)
-        code.putln('} else if (!PyErr_Occurred()) {') 
-        code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' % env.qualified_name) 
-        code.putln('}') 
-        code.put_label(code.return_label) 
- 
-        code.put_finish_refcount_context() 
- 
+        code.putln('} else if (!PyErr_Occurred()) {')
+        code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' % env.qualified_name)
+        code.putln('}')
+        code.put_label(code.return_label)
+
+        code.put_finish_refcount_context()
+
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         code.putln("return (%s != NULL) ? 0 : -1;" % env.module_cname)
         code.putln("#elif PY_MAJOR_VERSION >= 3")
         code.putln("return %s;" % env.module_cname)
         code.putln("#else")
-        code.putln("return;") 
-        code.putln("#endif") 
-        code.putln('}') 
- 
-        tempdecl_code.put_temp_declarations(code.funcstate) 
- 
-        code.exit_cfunc_scope() 
- 
+        code.putln("return;")
+        code.putln("#endif")
+        code.putln('}')
+
+        tempdecl_code.put_temp_declarations(code.funcstate)
+
+        code.exit_cfunc_scope()
+
     def mod_init_subfunction(self, pos, scope, orig_code):
         """
         Return a context manager that allows deviating the module init code generation
@@ -2599,172 +2599,172 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         return ModInitSubfunction
 
-    def generate_module_import_setup(self, env, code): 
-        module_path = env.directives['set_initial_path'] 
-        if module_path == 'SOURCEFILE': 
-            module_path = self.pos[0].filename 
- 
-        if module_path: 
+    def generate_module_import_setup(self, env, code):
+        module_path = env.directives['set_initial_path']
+        if module_path == 'SOURCEFILE':
+            module_path = self.pos[0].filename
+
+        if module_path:
             code.putln('if (!CYTHON_PEP489_MULTI_PHASE_INIT) {')
-            code.putln('if (PyObject_SetAttrString(%s, "__file__", %s) < 0) %s;' % ( 
-                env.module_cname, 
-                code.globalstate.get_py_string_const( 
-                    EncodedString(decode_filename(module_path))).cname, 
-                code.error_goto(self.pos))) 
+            code.putln('if (PyObject_SetAttrString(%s, "__file__", %s) < 0) %s;' % (
+                env.module_cname,
+                code.globalstate.get_py_string_const(
+                    EncodedString(decode_filename(module_path))).cname,
+                code.error_goto(self.pos)))
             code.putln("}")
- 
-            if env.is_package: 
-                # set __path__ to mark the module as package 
+
+            if env.is_package:
+                # set __path__ to mark the module as package
                 code.putln('if (!CYTHON_PEP489_MULTI_PHASE_INIT) {')
-                temp = code.funcstate.allocate_temp(py_object_type, True) 
-                code.putln('%s = Py_BuildValue("[O]", %s); %s' % ( 
-                    temp, 
-                    code.globalstate.get_py_string_const( 
-                        EncodedString(decode_filename( 
-                            os.path.dirname(module_path)))).cname, 
-                    code.error_goto_if_null(temp, self.pos))) 
-                code.put_gotref(temp) 
-                code.putln( 
-                    'if (PyObject_SetAttrString(%s, "__path__", %s) < 0) %s;' % ( 
-                        env.module_cname, temp, code.error_goto(self.pos))) 
-                code.put_decref_clear(temp, py_object_type) 
-                code.funcstate.release_temp(temp) 
+                temp = code.funcstate.allocate_temp(py_object_type, True)
+                code.putln('%s = Py_BuildValue("[O]", %s); %s' % (
+                    temp,
+                    code.globalstate.get_py_string_const(
+                        EncodedString(decode_filename(
+                            os.path.dirname(module_path)))).cname,
+                    code.error_goto_if_null(temp, self.pos)))
+                code.put_gotref(temp)
+                code.putln(
+                    'if (PyObject_SetAttrString(%s, "__path__", %s) < 0) %s;' % (
+                        env.module_cname, temp, code.error_goto(self.pos)))
+                code.put_decref_clear(temp, py_object_type)
+                code.funcstate.release_temp(temp)
                 code.putln("}")
- 
-        elif env.is_package: 
-            # packages require __path__, so all we can do is try to figure 
-            # out the module path at runtime by rerunning the import lookup 
+
+        elif env.is_package:
+            # packages require __path__, so all we can do is try to figure
+            # out the module path at runtime by rerunning the import lookup
             code.putln("if (!CYTHON_PEP489_MULTI_PHASE_INIT) {")
-            package_name, _ = self.full_module_name.rsplit('.', 1) 
-            if '.' in package_name: 
-                parent_name = '"%s"' % (package_name.rsplit('.', 1)[0],) 
-            else: 
-                parent_name = 'NULL' 
-            code.globalstate.use_utility_code(UtilityCode.load( 
-                "SetPackagePathFromImportLib", "ImportExport.c")) 
-            code.putln(code.error_goto_if_neg( 
-                '__Pyx_SetPackagePathFromImportLib(%s, %s)' % ( 
-                    parent_name, 
-                    code.globalstate.get_py_string_const( 
-                        EncodedString(env.module_name)).cname), 
-                self.pos)) 
+            package_name, _ = self.full_module_name.rsplit('.', 1)
+            if '.' in package_name:
+                parent_name = '"%s"' % (package_name.rsplit('.', 1)[0],)
+            else:
+                parent_name = 'NULL'
+            code.globalstate.use_utility_code(UtilityCode.load(
+                "SetPackagePathFromImportLib", "ImportExport.c"))
+            code.putln(code.error_goto_if_neg(
+                '__Pyx_SetPackagePathFromImportLib(%s, %s)' % (
+                    parent_name,
+                    code.globalstate.get_py_string_const(
+                        EncodedString(env.module_name)).cname),
+                self.pos))
             code.putln("}")
- 
-        # CPython may not have put us into sys.modules yet, but relative imports and reimports require it 
-        fq_module_name = self.full_module_name 
-        if fq_module_name.endswith('.__init__'): 
-            fq_module_name = fq_module_name[:-len('.__init__')] 
-        code.putln("#if PY_MAJOR_VERSION >= 3") 
-        code.putln("{") 
-        code.putln("PyObject *modules = PyImport_GetModuleDict(); %s" % 
-                   code.error_goto_if_null("modules", self.pos)) 
-        code.putln('if (!PyDict_GetItemString(modules, "%s")) {' % fq_module_name) 
-        code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, "%s", %s)' % ( 
-            fq_module_name, env.module_cname), self.pos)) 
-        code.putln("}") 
-        code.putln("}") 
-        code.putln("#endif") 
- 
-    def generate_module_cleanup_func(self, env, code): 
-        if not Options.generate_cleanup_code: 
-            return 
- 
-        code.putln('static void %s(CYTHON_UNUSED PyObject *self) {' % 
-                   Naming.cleanup_cname) 
+
+        # CPython may not have put us into sys.modules yet, but relative imports and reimports require it
+        fq_module_name = self.full_module_name
+        if fq_module_name.endswith('.__init__'):
+            fq_module_name = fq_module_name[:-len('.__init__')]
+        code.putln("#if PY_MAJOR_VERSION >= 3")
+        code.putln("{")
+        code.putln("PyObject *modules = PyImport_GetModuleDict(); %s" %
+                   code.error_goto_if_null("modules", self.pos))
+        code.putln('if (!PyDict_GetItemString(modules, "%s")) {' % fq_module_name)
+        code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, "%s", %s)' % (
+            fq_module_name, env.module_cname), self.pos))
+        code.putln("}")
+        code.putln("}")
+        code.putln("#endif")
+
+    def generate_module_cleanup_func(self, env, code):
+        if not Options.generate_cleanup_code:
+            return
+
+        code.putln('static void %s(CYTHON_UNUSED PyObject *self) {' %
+                   Naming.cleanup_cname)
         code.enter_cfunc_scope(env)
 
-        if Options.generate_cleanup_code >= 2: 
-            code.putln("/*--- Global cleanup code ---*/") 
-            rev_entries = list(env.var_entries) 
-            rev_entries.reverse() 
-            for entry in rev_entries: 
-                if entry.visibility != 'extern': 
-                    if entry.type.is_pyobject and entry.used: 
-                        code.put_xdecref_clear( 
-                            entry.cname, entry.type, 
-                            clear_before_decref=True, 
-                            nanny=False) 
-        code.putln("__Pyx_CleanupGlobals();") 
-        if Options.generate_cleanup_code >= 3: 
-            code.putln("/*--- Type import cleanup code ---*/") 
-            for ext_type in sorted(env.types_imported, key=operator.attrgetter('typeptr_cname')): 
-                code.put_xdecref_clear( 
-                    ext_type.typeptr_cname, ext_type, 
-                    clear_before_decref=True, 
-                    nanny=False) 
-        if Options.cache_builtins: 
-            code.putln("/*--- Builtin cleanup code ---*/") 
-            for entry in env.cached_builtins: 
-                code.put_xdecref_clear( 
-                    entry.cname, PyrexTypes.py_object_type, 
-                    clear_before_decref=True, 
-                    nanny=False) 
-        code.putln("/*--- Intern cleanup code ---*/") 
-        code.put_decref_clear(Naming.empty_tuple, 
-                              PyrexTypes.py_object_type, 
-                              clear_before_decref=True, 
-                              nanny=False) 
-        for entry in env.c_class_entries: 
-            cclass_type = entry.type 
-            if cclass_type.is_external or cclass_type.base_type: 
-                continue 
-            if cclass_type.scope.directives.get('freelist', 0): 
-                scope = cclass_type.scope 
-                freelist_name = scope.mangle_internal(Naming.freelist_name) 
-                freecount_name = scope.mangle_internal(Naming.freecount_name) 
-                code.putln("while (%s > 0) {" % freecount_name) 
-                code.putln("PyObject* o = (PyObject*)%s[--%s];" % ( 
-                    freelist_name, freecount_name)) 
-                code.putln("(*Py_TYPE(o)->tp_free)(o);") 
-                code.putln("}") 
-#        for entry in env.pynum_entries: 
-#            code.put_decref_clear(entry.cname, 
-#                                  PyrexTypes.py_object_type, 
-#                                  nanny=False) 
-#        for entry in env.all_pystring_entries: 
-#            if entry.is_interned: 
-#                code.put_decref_clear(entry.pystring_cname, 
-#                                      PyrexTypes.py_object_type, 
-#                                      nanny=False) 
-#        for entry in env.default_entries: 
-#            if entry.type.is_pyobject and entry.used: 
-#                code.putln("Py_DECREF(%s); %s = 0;" % ( 
-#                    code.entry_as_pyobject(entry), entry.cname)) 
+        if Options.generate_cleanup_code >= 2:
+            code.putln("/*--- Global cleanup code ---*/")
+            rev_entries = list(env.var_entries)
+            rev_entries.reverse()
+            for entry in rev_entries:
+                if entry.visibility != 'extern':
+                    if entry.type.is_pyobject and entry.used:
+                        code.put_xdecref_clear(
+                            entry.cname, entry.type,
+                            clear_before_decref=True,
+                            nanny=False)
+        code.putln("__Pyx_CleanupGlobals();")
+        if Options.generate_cleanup_code >= 3:
+            code.putln("/*--- Type import cleanup code ---*/")
+            for ext_type in sorted(env.types_imported, key=operator.attrgetter('typeptr_cname')):
+                code.put_xdecref_clear(
+                    ext_type.typeptr_cname, ext_type,
+                    clear_before_decref=True,
+                    nanny=False)
+        if Options.cache_builtins:
+            code.putln("/*--- Builtin cleanup code ---*/")
+            for entry in env.cached_builtins:
+                code.put_xdecref_clear(
+                    entry.cname, PyrexTypes.py_object_type,
+                    clear_before_decref=True,
+                    nanny=False)
+        code.putln("/*--- Intern cleanup code ---*/")
+        code.put_decref_clear(Naming.empty_tuple,
+                              PyrexTypes.py_object_type,
+                              clear_before_decref=True,
+                              nanny=False)
+        for entry in env.c_class_entries:
+            cclass_type = entry.type
+            if cclass_type.is_external or cclass_type.base_type:
+                continue
+            if cclass_type.scope.directives.get('freelist', 0):
+                scope = cclass_type.scope
+                freelist_name = scope.mangle_internal(Naming.freelist_name)
+                freecount_name = scope.mangle_internal(Naming.freecount_name)
+                code.putln("while (%s > 0) {" % freecount_name)
+                code.putln("PyObject* o = (PyObject*)%s[--%s];" % (
+                    freelist_name, freecount_name))
+                code.putln("(*Py_TYPE(o)->tp_free)(o);")
+                code.putln("}")
+#        for entry in env.pynum_entries:
+#            code.put_decref_clear(entry.cname,
+#                                  PyrexTypes.py_object_type,
+#                                  nanny=False)
+#        for entry in env.all_pystring_entries:
+#            if entry.is_interned:
+#                code.put_decref_clear(entry.pystring_cname,
+#                                      PyrexTypes.py_object_type,
+#                                      nanny=False)
+#        for entry in env.default_entries:
+#            if entry.type.is_pyobject and entry.used:
+#                code.putln("Py_DECREF(%s); %s = 0;" % (
+#                    code.entry_as_pyobject(entry), entry.cname))
         if Options.pre_import is not None:
             code.put_decref_clear(Naming.preimport_cname, py_object_type,
                                   nanny=False, clear_before_decref=True)
         for cname in [env.module_dict_cname, Naming.cython_runtime_cname, Naming.builtins_cname]:
             code.put_decref_clear(cname, py_object_type, nanny=False, clear_before_decref=True)
- 
-    def generate_main_method(self, env, code): 
-        module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__')) 
-        if Options.embed == "main": 
-            wmain = "wmain" 
-        else: 
-            wmain = Options.embed 
+
+    def generate_main_method(self, env, code):
+        module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
+        if Options.embed == "main":
+            wmain = "wmain"
+        else:
+            wmain = Options.embed
         main_method = UtilityCode.load_cached("MainFunction", "Embed.c")
-        code.globalstate.use_utility_code( 
-            main_method.specialize( 
+        code.globalstate.use_utility_code(
+            main_method.specialize(
                 module_name=env.module_name,
                 module_is_main=module_is_main,
                 main_method=Options.embed,
                 wmain_method=wmain))
- 
+
     def mod_init_func_cname(self, prefix, env, options=None):
         return '%s_%s' % (prefix, options and options.init_suffix or env.module_name)
 
     def generate_pymoduledef_struct(self, env, options, code):
-        if env.doc: 
-            doc = "%s" % code.get_string_const(env.doc) 
-        else: 
-            doc = "0" 
-        if Options.generate_cleanup_code: 
-            cleanup_func = "(freefunc)%s" % Naming.cleanup_cname 
-        else: 
-            cleanup_func = 'NULL' 
- 
-        code.putln("") 
-        code.putln("#if PY_MAJOR_VERSION >= 3") 
+        if env.doc:
+            doc = "%s" % code.get_string_const(env.doc)
+        else:
+            doc = "0"
+        if Options.generate_cleanup_code:
+            cleanup_func = "(freefunc)%s" % Naming.cleanup_cname
+        else:
+            cleanup_func = 'NULL'
+
+        code.putln("")
+        code.putln("#if PY_MAJOR_VERSION >= 3")
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         exec_func_cname = self.mod_init_func_cname(Naming.pymodule_exec_func_cname, env)
         code.putln("static PyObject* %s(PyObject *spec, PyModuleDef *def); /*proto*/" %
@@ -2779,34 +2779,34 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#endif")
 
         code.putln("")
-        code.putln("static struct PyModuleDef %s = {" % Naming.pymoduledef_cname) 
-        code.putln("  PyModuleDef_HEAD_INIT,") 
+        code.putln("static struct PyModuleDef %s = {" % Naming.pymoduledef_cname)
+        code.putln("  PyModuleDef_HEAD_INIT,")
         code.putln('  "%s",' % (options.module_name or env.module_name))
-        code.putln("  %s, /* m_doc */" % doc) 
+        code.putln("  %s, /* m_doc */" % doc)
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         code.putln("  0, /* m_size */")
         code.putln("#else")
-        code.putln("  -1, /* m_size */") 
+        code.putln("  -1, /* m_size */")
         code.putln("#endif")
-        code.putln("  %s /* m_methods */," % env.method_table_cname) 
+        code.putln("  %s /* m_methods */," % env.method_table_cname)
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         code.putln("  %s, /* m_slots */" % Naming.pymoduledef_slots_cname)
         code.putln("#else")
-        code.putln("  NULL, /* m_reload */") 
+        code.putln("  NULL, /* m_reload */")
+        code.putln("#endif")
+        code.putln("  NULL, /* m_traverse */")
+        code.putln("  NULL, /* m_clear */")
+        code.putln("  %s /* m_free */" % cleanup_func)
+        code.putln("};")
         code.putln("#endif")
-        code.putln("  NULL, /* m_traverse */") 
-        code.putln("  NULL, /* m_clear */") 
-        code.putln("  %s /* m_free */" % cleanup_func) 
-        code.putln("};") 
-        code.putln("#endif") 
- 
+
     def generate_module_creation_code(self, env, options, code):
-        # Generate code to create the module object and 
-        # install the builtins. 
-        if env.doc: 
-            doc = "%s" % code.get_string_const(env.doc) 
-        else: 
-            doc = "0" 
+        # Generate code to create the module object and
+        # install the builtins.
+        if env.doc:
+            doc = "%s" % code.get_string_const(env.doc)
+        else:
+            doc = "0"
 
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         code.putln("%s = %s;" % (
@@ -2814,199 +2814,199 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             Naming.pymodinit_module_arg))
         code.put_incref(env.module_cname, py_object_type, nanny=False)
         code.putln("#else")
-        code.putln("#if PY_MAJOR_VERSION < 3") 
-        code.putln( 
-            '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % ( 
-                env.module_cname, 
+        code.putln("#if PY_MAJOR_VERSION < 3")
+        code.putln(
+            '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % (
+                env.module_cname,
                 options.module_name or env.module_name,
-                env.method_table_cname, 
-                doc, 
-                env.module_cname)) 
-        code.putln("#else") 
-        code.putln( 
-            "%s = PyModule_Create(&%s);" % ( 
-                env.module_cname, 
-                Naming.pymoduledef_cname)) 
-        code.putln("#endif") 
-        code.putln(code.error_goto_if_null(env.module_cname, self.pos)) 
+                env.method_table_cname,
+                doc,
+                env.module_cname))
+        code.putln("#else")
+        code.putln(
+            "%s = PyModule_Create(&%s);" % (
+                env.module_cname,
+                Naming.pymoduledef_cname))
+        code.putln("#endif")
+        code.putln(code.error_goto_if_null(env.module_cname, self.pos))
         code.putln("#endif")  # CYTHON_PEP489_MULTI_PHASE_INIT
 
-        code.putln( 
-            "%s = PyModule_GetDict(%s); %s" % ( 
-                env.module_dict_cname, env.module_cname, 
-                code.error_goto_if_null(env.module_dict_cname, self.pos))) 
-        code.put_incref(env.module_dict_cname, py_object_type, nanny=False) 
- 
-        code.putln( 
-            '%s = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); %s' % ( 
-                Naming.builtins_cname, 
-                code.error_goto_if_null(Naming.builtins_cname, self.pos))) 
+        code.putln(
+            "%s = PyModule_GetDict(%s); %s" % (
+                env.module_dict_cname, env.module_cname,
+                code.error_goto_if_null(env.module_dict_cname, self.pos)))
+        code.put_incref(env.module_dict_cname, py_object_type, nanny=False)
+
+        code.putln(
+            '%s = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); %s' % (
+                Naming.builtins_cname,
+                code.error_goto_if_null(Naming.builtins_cname, self.pos)))
         code.put_incref(Naming.builtins_cname, py_object_type, nanny=False)
         code.putln(
             '%s = PyImport_AddModule((char *) "cython_runtime"); %s' % (
                 Naming.cython_runtime_cname,
                 code.error_goto_if_null(Naming.cython_runtime_cname, self.pos)))
         code.put_incref(Naming.cython_runtime_cname, py_object_type, nanny=False)
-        code.putln( 
-            'if (PyObject_SetAttrString(%s, "__builtins__", %s) < 0) %s;' % ( 
-                env.module_cname, 
-                Naming.builtins_cname, 
-                code.error_goto(self.pos))) 
-        if Options.pre_import is not None: 
-            code.putln( 
-                '%s = PyImport_AddModule("%s"); %s' % ( 
-                    Naming.preimport_cname, 
-                    Options.pre_import, 
-                    code.error_goto_if_null(Naming.preimport_cname, self.pos))) 
+        code.putln(
+            'if (PyObject_SetAttrString(%s, "__builtins__", %s) < 0) %s;' % (
+                env.module_cname,
+                Naming.builtins_cname,
+                code.error_goto(self.pos)))
+        if Options.pre_import is not None:
+            code.putln(
+                '%s = PyImport_AddModule("%s"); %s' % (
+                    Naming.preimport_cname,
+                    Options.pre_import,
+                    code.error_goto_if_null(Naming.preimport_cname, self.pos)))
             code.put_incref(Naming.preimport_cname, py_object_type, nanny=False)
- 
-    def generate_global_init_code(self, env, code): 
-        # Generate code to initialise global PyObject * 
-        # variables to None. 
-        for entry in env.var_entries: 
-            if entry.visibility != 'extern': 
-                if entry.used: 
-                    entry.type.global_init_code(entry, code) 
- 
-    def generate_wrapped_entries_code(self, env, code): 
+
+    def generate_global_init_code(self, env, code):
+        # Generate code to initialise global PyObject *
+        # variables to None.
+        for entry in env.var_entries:
+            if entry.visibility != 'extern':
+                if entry.used:
+                    entry.type.global_init_code(entry, code)
+
+    def generate_wrapped_entries_code(self, env, code):
         for name, entry in sorted(env.entries.items()):
-            if (entry.create_wrapper 
-                    and not entry.is_type 
-                    and entry.scope is env): 
-                if not entry.type.create_to_py_utility_code(env): 
-                    error(entry.pos, "Cannot convert '%s' to Python object" % entry.type) 
-                code.putln("{") 
-                code.putln("PyObject* wrapped = %s(%s);"  % ( 
-                    entry.type.to_py_function, 
-                    entry.cname)) 
-                code.putln(code.error_goto_if_null("wrapped", entry.pos)) 
-                code.putln( 
-                    'if (PyObject_SetAttrString(%s, "%s", wrapped) < 0) %s;' % ( 
-                        env.module_cname, 
-                        name, 
-                        code.error_goto(entry.pos))) 
-                code.putln("}") 
- 
-    def generate_c_variable_export_code(self, env, code): 
-        # Generate code to create PyCFunction wrappers for exported C functions. 
-        entries = [] 
-        for entry in env.var_entries: 
-            if (entry.api 
+            if (entry.create_wrapper
+                    and not entry.is_type
+                    and entry.scope is env):
+                if not entry.type.create_to_py_utility_code(env):
+                    error(entry.pos, "Cannot convert '%s' to Python object" % entry.type)
+                code.putln("{")
+                code.putln("PyObject* wrapped = %s(%s);"  % (
+                    entry.type.to_py_function,
+                    entry.cname))
+                code.putln(code.error_goto_if_null("wrapped", entry.pos))
+                code.putln(
+                    'if (PyObject_SetAttrString(%s, "%s", wrapped) < 0) %s;' % (
+                        env.module_cname,
+                        name,
+                        code.error_goto(entry.pos)))
+                code.putln("}")
+
+    def generate_c_variable_export_code(self, env, code):
+        # Generate code to create PyCFunction wrappers for exported C functions.
+        entries = []
+        for entry in env.var_entries:
+            if (entry.api
                     or entry.defined_in_pxd
                     or (Options.cimport_from_pyx and not entry.visibility == 'extern')):
-                entries.append(entry) 
-        if entries: 
-            env.use_utility_code(UtilityCode.load_cached("VoidPtrExport", "ImportExport.c")) 
-            for entry in entries: 
+                entries.append(entry)
+        if entries:
+            env.use_utility_code(UtilityCode.load_cached("VoidPtrExport", "ImportExport.c"))
+            for entry in entries:
                 signature = entry.type.empty_declaration_code()
-                name = code.intern_identifier(entry.name) 
-                code.putln('if (__Pyx_ExportVoidPtr(%s, (void *)&%s, "%s") < 0) %s' % ( 
-                    name, entry.cname, signature, 
-                    code.error_goto(self.pos))) 
- 
-    def generate_c_function_export_code(self, env, code): 
-        # Generate code to create PyCFunction wrappers for exported C functions. 
-        entries = [] 
-        for entry in env.cfunc_entries: 
-            if (entry.api 
+                name = code.intern_identifier(entry.name)
+                code.putln('if (__Pyx_ExportVoidPtr(%s, (void *)&%s, "%s") < 0) %s' % (
+                    name, entry.cname, signature,
+                    code.error_goto(self.pos)))
+
+    def generate_c_function_export_code(self, env, code):
+        # Generate code to create PyCFunction wrappers for exported C functions.
+        entries = []
+        for entry in env.cfunc_entries:
+            if (entry.api
                     or entry.defined_in_pxd
                     or (Options.cimport_from_pyx and not entry.visibility == 'extern')):
-                entries.append(entry) 
-        if entries: 
-            env.use_utility_code( 
-                UtilityCode.load_cached("FunctionExport", "ImportExport.c")) 
+                entries.append(entry)
+        if entries:
+            env.use_utility_code(
+                UtilityCode.load_cached("FunctionExport", "ImportExport.c"))
             # Note: while this looks like it could be more cheaply stored and read from a struct array,
             # investigation shows that the resulting binary is smaller with repeated functions calls.
-            for entry in entries: 
-                signature = entry.type.signature_string() 
-                code.putln('if (__Pyx_ExportFunction("%s", (void (*)(void))%s, "%s") < 0) %s' % ( 
-                    entry.name, 
-                    entry.cname, 
-                    signature, 
-                    code.error_goto(self.pos))) 
- 
-    def generate_type_import_code_for_module(self, module, env, code): 
-        # Generate type import code for all exported extension types in 
-        # an imported module. 
-        #if module.c_class_entries: 
+            for entry in entries:
+                signature = entry.type.signature_string()
+                code.putln('if (__Pyx_ExportFunction("%s", (void (*)(void))%s, "%s") < 0) %s' % (
+                    entry.name,
+                    entry.cname,
+                    signature,
+                    code.error_goto(self.pos)))
+
+    def generate_type_import_code_for_module(self, module, env, code):
+        # Generate type import code for all exported extension types in
+        # an imported module.
+        #if module.c_class_entries:
         with ModuleImportGenerator(code) as import_generator:
             for entry in module.c_class_entries:
                 if entry.defined_in_pxd:
                     self.generate_type_import_code(env, entry.type, entry.pos, code, import_generator)
- 
-    def specialize_fused_types(self, pxd_env): 
-        """ 
-        If fused c(p)def functions are defined in an imported pxd, but not 
-        used in this implementation file, we still have fused entries and 
-        not specialized ones. This method replaces any fused entries with their 
-        specialized ones. 
-        """ 
-        for entry in pxd_env.cfunc_entries[:]: 
-            if entry.type.is_fused: 
-                # This call modifies the cfunc_entries in-place 
-                entry.type.get_all_specialized_function_types() 
- 
-    def generate_c_variable_import_code_for_module(self, module, env, code): 
-        # Generate import code for all exported C functions in a cimported module. 
-        entries = [] 
-        for entry in module.var_entries: 
-            if entry.defined_in_pxd: 
-                entries.append(entry) 
-        if entries: 
-            env.use_utility_code( 
-                UtilityCode.load_cached("VoidPtrImport", "ImportExport.c")) 
-            temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-            code.putln( 
+
+    def specialize_fused_types(self, pxd_env):
+        """
+        If fused c(p)def functions are defined in an imported pxd, but not
+        used in this implementation file, we still have fused entries and
+        not specialized ones. This method replaces any fused entries with their
+        specialized ones.
+        """
+        for entry in pxd_env.cfunc_entries[:]:
+            if entry.type.is_fused:
+                # This call modifies the cfunc_entries in-place
+                entry.type.get_all_specialized_function_types()
+
+    def generate_c_variable_import_code_for_module(self, module, env, code):
+        # Generate import code for all exported C functions in a cimported module.
+        entries = []
+        for entry in module.var_entries:
+            if entry.defined_in_pxd:
+                entries.append(entry)
+        if entries:
+            env.use_utility_code(
+                UtilityCode.load_cached("VoidPtrImport", "ImportExport.c"))
+            temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            code.putln(
                 '%s = PyImport_ImportModule("%s"); if (!%s) %s' % (
-                    temp, 
-                    module.qualified_name, 
-                    temp, 
-                    code.error_goto(self.pos))) 
+                    temp,
+                    module.qualified_name,
+                    temp,
+                    code.error_goto(self.pos)))
             code.put_gotref(temp)
-            for entry in entries: 
-                if env is module: 
-                    cname = entry.cname 
-                else: 
-                    cname = module.mangle(Naming.varptr_prefix, entry.name) 
+            for entry in entries:
+                if env is module:
+                    cname = entry.cname
+                else:
+                    cname = module.mangle(Naming.varptr_prefix, entry.name)
                 signature = entry.type.empty_declaration_code()
-                code.putln( 
-                    'if (__Pyx_ImportVoidPtr(%s, "%s", (void **)&%s, "%s") < 0) %s' % ( 
-                        temp, entry.name, cname, signature, 
-                        code.error_goto(self.pos))) 
+                code.putln(
+                    'if (__Pyx_ImportVoidPtr(%s, "%s", (void **)&%s, "%s") < 0) %s' % (
+                        temp, entry.name, cname, signature,
+                        code.error_goto(self.pos)))
             code.put_decref_clear(temp, py_object_type)
             code.funcstate.release_temp(temp)
- 
-    def generate_c_function_import_code_for_module(self, module, env, code): 
-        # Generate import code for all exported C functions in a cimported module. 
-        entries = [] 
-        for entry in module.cfunc_entries: 
-            if entry.defined_in_pxd and entry.used: 
-                entries.append(entry) 
-        if entries: 
-            env.use_utility_code( 
-                UtilityCode.load_cached("FunctionImport", "ImportExport.c")) 
-            temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-            code.putln( 
+
+    def generate_c_function_import_code_for_module(self, module, env, code):
+        # Generate import code for all exported C functions in a cimported module.
+        entries = []
+        for entry in module.cfunc_entries:
+            if entry.defined_in_pxd and entry.used:
+                entries.append(entry)
+        if entries:
+            env.use_utility_code(
+                UtilityCode.load_cached("FunctionImport", "ImportExport.c"))
+            temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            code.putln(
                 '%s = PyImport_ImportModule("%s"); if (!%s) %s' % (
-                    temp, 
-                    module.qualified_name, 
-                    temp, 
-                    code.error_goto(self.pos))) 
+                    temp,
+                    module.qualified_name,
+                    temp,
+                    code.error_goto(self.pos)))
             code.put_gotref(temp)
-            for entry in entries: 
-                code.putln( 
-                    'if (__Pyx_ImportFunction(%s, "%s", (void (**)(void))&%s, "%s") < 0) %s' % ( 
-                        temp, 
-                        entry.name, 
-                        entry.cname, 
-                        entry.type.signature_string(), 
-                        code.error_goto(self.pos))) 
+            for entry in entries:
+                code.putln(
+                    'if (__Pyx_ImportFunction(%s, "%s", (void (**)(void))&%s, "%s") < 0) %s' % (
+                        temp,
+                        entry.name,
+                        entry.cname,
+                        entry.type.signature_string(),
+                        code.error_goto(self.pos)))
             code.put_decref_clear(temp, py_object_type)
             code.funcstate.release_temp(temp)
- 
-    def generate_type_init_code(self, env, code): 
-        # Generate type import code for extern extension types 
-        # and type ready code for non-extern ones. 
+
+    def generate_type_init_code(self, env, code):
+        # Generate type import code for extern extension types
+        # and type ready code for non-extern ones.
         with ModuleImportGenerator(code) as import_generator:
             for entry in env.c_class_entries:
                 if entry.visibility == 'extern' and not entry.utility_code_definition:
@@ -3016,88 +3016,88 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     self.generate_exttype_vtable_init_code(entry, code)
                     if entry.type.early_init:
                         self.generate_type_ready_code(entry, code)
- 
+
     def generate_base_type_import_code(self, env, entry, code, import_generator):
-        base_type = entry.type.base_type 
-        if (base_type and base_type.module_name != env.qualified_name and not 
+        base_type = entry.type.base_type
+        if (base_type and base_type.module_name != env.qualified_name and not
                 base_type.is_builtin_type and not entry.utility_code_definition):
             self.generate_type_import_code(env, base_type, self.pos, code, import_generator)
- 
+
     def generate_type_import_code(self, env, type, pos, code, import_generator):
-        # If not already done, generate code to import the typeobject of an 
-        # extension type defined in another module, and extract its C method 
-        # table pointer if any. 
-        if type in env.types_imported: 
-            return 
+        # If not already done, generate code to import the typeobject of an
+        # extension type defined in another module, and extract its C method
+        # table pointer if any.
+        if type in env.types_imported:
+            return
         if type.name not in Code.ctypedef_builtins_map:
             # see corresponding condition in generate_type_import_call() below!
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("TypeImport", "ImportExport.c"))
         self.generate_type_import_call(type, code, import_generator, error_pos=pos)
-        if type.vtabptr_cname: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached('GetVTable', 'ImportExport.c')) 
-            code.putln("%s = (struct %s*)__Pyx_GetVtable(%s->tp_dict); %s" % ( 
-                type.vtabptr_cname, 
-                type.vtabstruct_cname, 
-                type.typeptr_cname, 
-                code.error_goto_if_null(type.vtabptr_cname, pos))) 
-        env.types_imported.add(type) 
- 
+        if type.vtabptr_cname:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached('GetVTable', 'ImportExport.c'))
+            code.putln("%s = (struct %s*)__Pyx_GetVtable(%s->tp_dict); %s" % (
+                type.vtabptr_cname,
+                type.vtabstruct_cname,
+                type.typeptr_cname,
+                code.error_goto_if_null(type.vtabptr_cname, pos)))
+        env.types_imported.add(type)
+
     def generate_type_import_call(self, type, code, import_generator, error_code=None, error_pos=None):
-        if type.typedef_flag: 
-            objstruct = type.objstruct_cname 
-        else: 
-            objstruct = "struct %s" % type.objstruct_cname 
-        sizeof_objstruct = objstruct 
-        module_name = type.module_name 
-        condition = replacement = None 
-        if module_name not in ('__builtin__', 'builtins'): 
-            module_name = '"%s"' % module_name 
+        if type.typedef_flag:
+            objstruct = type.objstruct_cname
+        else:
+            objstruct = "struct %s" % type.objstruct_cname
+        sizeof_objstruct = objstruct
+        module_name = type.module_name
+        condition = replacement = None
+        if module_name not in ('__builtin__', 'builtins'):
+            module_name = '"%s"' % module_name
         elif type.name in Code.ctypedef_builtins_map:
             # Fast path for special builtins, don't actually import
             ctypename = Code.ctypedef_builtins_map[type.name]
             code.putln('%s = %s;' % (type.typeptr_cname, ctypename))
             return
-        else: 
-            module_name = '__Pyx_BUILTIN_MODULE_NAME' 
-            if type.name in Code.non_portable_builtins_map: 
-                condition, replacement = Code.non_portable_builtins_map[type.name] 
-            if objstruct in Code.basicsize_builtins_map: 
-                # Some builtin types have a tp_basicsize which differs from sizeof(...): 
-                sizeof_objstruct = Code.basicsize_builtins_map[objstruct] 
- 
+        else:
+            module_name = '__Pyx_BUILTIN_MODULE_NAME'
+            if type.name in Code.non_portable_builtins_map:
+                condition, replacement = Code.non_portable_builtins_map[type.name]
+            if objstruct in Code.basicsize_builtins_map:
+                # Some builtin types have a tp_basicsize which differs from sizeof(...):
+                sizeof_objstruct = Code.basicsize_builtins_map[objstruct]
+
         if not error_code:
             assert error_pos is not None
             error_code = code.error_goto(error_pos)
 
         module = import_generator.imported_module(module_name, error_code)
         code.put('%s = __Pyx_ImportType(%s, %s,' % (
-            type.typeptr_cname, 
+            type.typeptr_cname,
             module,
-            module_name)) 
- 
-        if condition and replacement: 
-            code.putln("")  # start in new line 
-            code.putln("#if %s" % condition) 
-            code.putln('"%s",' % replacement) 
-            code.putln("#else") 
-            code.putln('"%s",' % type.name) 
-            code.putln("#endif") 
-        else: 
-            code.put(' "%s", ' % type.name) 
- 
-        if sizeof_objstruct != objstruct: 
-            if not condition: 
-                code.putln("")  # start in new line 
+            module_name))
+
+        if condition and replacement:
+            code.putln("")  # start in new line
+            code.putln("#if %s" % condition)
+            code.putln('"%s",' % replacement)
+            code.putln("#else")
+            code.putln('"%s",' % type.name)
+            code.putln("#endif")
+        else:
+            code.put(' "%s", ' % type.name)
+
+        if sizeof_objstruct != objstruct:
+            if not condition:
+                code.putln("")  # start in new line
             code.putln("#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000")
-            code.putln('sizeof(%s),' % objstruct) 
-            code.putln("#else") 
-            code.putln('sizeof(%s),' % sizeof_objstruct) 
-            code.putln("#endif") 
-        else: 
-            code.put('sizeof(%s), ' % objstruct) 
- 
+            code.putln('sizeof(%s),' % objstruct)
+            code.putln("#else")
+            code.putln('sizeof(%s),' % sizeof_objstruct)
+            code.putln("#endif")
+        else:
+            code.put('sizeof(%s), ' % objstruct)
+
         # check_size
         if type.check_size and type.check_size in ('error', 'warn', 'ignore'):
             check_size = type.check_size
@@ -3107,41 +3107,41 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             raise RuntimeError("invalid value for check_size '%s' when compiling %s.%s" % (
                 type.check_size, module_name, type.name))
         code.putln('__Pyx_ImportType_CheckSize_%s);' % check_size.title())
- 
+
         code.putln(' if (!%s) %s' % (type.typeptr_cname, error_code))
 
     def generate_type_ready_code(self, entry, code):
         Nodes.CClassDefNode.generate_type_ready_code(entry, code)
- 
-    def generate_exttype_vtable_init_code(self, entry, code): 
-        # Generate code to initialise the C method table of an 
-        # extension type. 
-        type = entry.type 
-        if type.vtable_cname: 
-            code.putln( 
-                "%s = &%s;" % ( 
-                    type.vtabptr_cname, 
-                    type.vtable_cname)) 
-            if type.base_type and type.base_type.vtabptr_cname: 
-                code.putln( 
-                    "%s.%s = *%s;" % ( 
-                        type.vtable_cname, 
-                        Naming.obj_base_cname, 
-                        type.base_type.vtabptr_cname)) 
- 
-            c_method_entries = [ 
-                entry for entry in type.scope.cfunc_entries 
+
+    def generate_exttype_vtable_init_code(self, entry, code):
+        # Generate code to initialise the C method table of an
+        # extension type.
+        type = entry.type
+        if type.vtable_cname:
+            code.putln(
+                "%s = &%s;" % (
+                    type.vtabptr_cname,
+                    type.vtable_cname))
+            if type.base_type and type.base_type.vtabptr_cname:
+                code.putln(
+                    "%s.%s = *%s;" % (
+                        type.vtable_cname,
+                        Naming.obj_base_cname,
+                        type.base_type.vtabptr_cname))
+
+            c_method_entries = [
+                entry for entry in type.scope.cfunc_entries
                 if entry.func_cname]
-            if c_method_entries: 
-                for meth_entry in c_method_entries: 
-                    cast = meth_entry.type.signature_cast_string() 
-                    code.putln( 
-                        "%s.%s = %s%s;" % ( 
-                            type.vtable_cname, 
-                            meth_entry.cname, 
-                            cast, 
-                            meth_entry.func_cname)) 
- 
+            if c_method_entries:
+                for meth_entry in c_method_entries:
+                    cast = meth_entry.type.signature_cast_string()
+                    code.putln(
+                        "%s.%s = %s%s;" % (
+                            type.vtable_cname,
+                            meth_entry.cname,
+                            cast,
+                            meth_entry.func_cname))
+
 
 class ModuleImportGenerator(object):
     """
@@ -3179,51 +3179,51 @@ class ModuleImportGenerator(object):
             code.funcstate.release_temp(temp)
 
 
-def generate_cfunction_declaration(entry, env, code, definition): 
-    from_cy_utility = entry.used and entry.utility_code_definition 
+def generate_cfunction_declaration(entry, env, code, definition):
+    from_cy_utility = entry.used and entry.utility_code_definition
     if entry.used and entry.inline_func_in_pxd or (not entry.in_cinclude and (
             definition or entry.defined_in_pxd or entry.visibility == 'extern' or from_cy_utility)):
-        if entry.visibility == 'extern': 
-            storage_class = Naming.extern_c_macro 
-            dll_linkage = "DL_IMPORT" 
-        elif entry.visibility == 'public': 
-            storage_class = Naming.extern_c_macro 
+        if entry.visibility == 'extern':
+            storage_class = Naming.extern_c_macro
+            dll_linkage = "DL_IMPORT"
+        elif entry.visibility == 'public':
+            storage_class = Naming.extern_c_macro
             dll_linkage = None
-        elif entry.visibility == 'private': 
-            storage_class = "static" 
-            dll_linkage = None 
-        else: 
-            storage_class = "static" 
-            dll_linkage = None 
-        type = entry.type 
- 
-        if entry.defined_in_pxd and not definition: 
-            storage_class = "static" 
-            dll_linkage = None 
-            type = CPtrType(type) 
- 
-        header = type.declaration_code( 
+        elif entry.visibility == 'private':
+            storage_class = "static"
+            dll_linkage = None
+        else:
+            storage_class = "static"
+            dll_linkage = None
+        type = entry.type
+
+        if entry.defined_in_pxd and not definition:
+            storage_class = "static"
+            dll_linkage = None
+            type = CPtrType(type)
+
+        header = type.declaration_code(
             entry.cname, dll_linkage=dll_linkage)
-        modifiers = code.build_function_modifiers(entry.func_modifiers) 
-        code.putln("%s %s%s; /*proto*/" % ( 
-            storage_class, 
-            modifiers, 
-            header)) 
- 
-#------------------------------------------------------------------------------------ 
-# 
-#  Runtime support code 
-# 
-#------------------------------------------------------------------------------------ 
- 
+        modifiers = code.build_function_modifiers(entry.func_modifiers)
+        code.putln("%s %s%s; /*proto*/" % (
+            storage_class,
+            modifiers,
+            header))
+
+#------------------------------------------------------------------------------------
+#
+#  Runtime support code
+#
+#------------------------------------------------------------------------------------
+
 refnanny_utility_code = UtilityCode.load("Refnanny", "ModuleSetupCode.c")
- 
-packed_struct_utility_code = UtilityCode(proto=""" 
-#if defined(__GNUC__) 
-#define __Pyx_PACKED __attribute__((__packed__)) 
-#else 
-#define __Pyx_PACKED 
-#endif 
-""", impl="", proto_block='utility_code_proto_before_types') 
- 
-capsule_utility_code = UtilityCode.load("Capsule") 
+
+packed_struct_utility_code = UtilityCode(proto="""
+#if defined(__GNUC__)
+#define __Pyx_PACKED __attribute__((__packed__))
+#else
+#define __Pyx_PACKED
+#endif
+""", impl="", proto_block='utility_code_proto_before_types')
+
+capsule_utility_code = UtilityCode.load("Capsule")
diff --git a/contrib/tools/cython/Cython/Compiler/Naming.py b/contrib/tools/cython/Cython/Compiler/Naming.py
index c1201ca743..2c9b620788 100644
--- a/contrib/tools/cython/Cython/Compiler/Naming.py
+++ b/contrib/tools/cython/Cython/Compiler/Naming.py
@@ -1,162 +1,162 @@
-# 
-#   C naming conventions 
-# 
-# 
-#   Prefixes for generating C names. 
-#   Collected here to facilitate ensuring uniqueness. 
-# 
- 
-pyrex_prefix    = "__pyx_" 
- 
- 
-codewriter_temp_prefix = pyrex_prefix + "t_" 
- 
-temp_prefix       = u"__cyt_" 
- 
-builtin_prefix    = pyrex_prefix + "builtin_" 
-arg_prefix        = pyrex_prefix + "arg_" 
-funcdoc_prefix    = pyrex_prefix + "doc_" 
-enum_prefix       = pyrex_prefix + "e_" 
-func_prefix       = pyrex_prefix + "f_" 
+#
+#   C naming conventions
+#
+#
+#   Prefixes for generating C names.
+#   Collected here to facilitate ensuring uniqueness.
+#
+
+pyrex_prefix    = "__pyx_"
+
+
+codewriter_temp_prefix = pyrex_prefix + "t_"
+
+temp_prefix       = u"__cyt_"
+
+builtin_prefix    = pyrex_prefix + "builtin_"
+arg_prefix        = pyrex_prefix + "arg_"
+funcdoc_prefix    = pyrex_prefix + "doc_"
+enum_prefix       = pyrex_prefix + "e_"
+func_prefix       = pyrex_prefix + "f_"
 func_prefix_api   = pyrex_prefix + "api_f_"
-pyfunc_prefix     = pyrex_prefix + "pf_" 
-pywrap_prefix     = pyrex_prefix + "pw_" 
-genbody_prefix    = pyrex_prefix + "gb_" 
-gstab_prefix      = pyrex_prefix + "getsets_" 
-prop_get_prefix   = pyrex_prefix + "getprop_" 
-const_prefix      = pyrex_prefix + "k_" 
-py_const_prefix   = pyrex_prefix + "kp_" 
-label_prefix      = pyrex_prefix + "L" 
-pymethdef_prefix  = pyrex_prefix + "mdef_" 
+pyfunc_prefix     = pyrex_prefix + "pf_"
+pywrap_prefix     = pyrex_prefix + "pw_"
+genbody_prefix    = pyrex_prefix + "gb_"
+gstab_prefix      = pyrex_prefix + "getsets_"
+prop_get_prefix   = pyrex_prefix + "getprop_"
+const_prefix      = pyrex_prefix + "k_"
+py_const_prefix   = pyrex_prefix + "kp_"
+label_prefix      = pyrex_prefix + "L"
+pymethdef_prefix  = pyrex_prefix + "mdef_"
 method_wrapper_prefix = pyrex_prefix + "specialmethod_"
-methtab_prefix    = pyrex_prefix + "methods_" 
-memtab_prefix     = pyrex_prefix + "members_" 
-objstruct_prefix  = pyrex_prefix + "obj_" 
-typeptr_prefix    = pyrex_prefix + "ptype_" 
-prop_set_prefix   = pyrex_prefix + "setprop_" 
-type_prefix       = pyrex_prefix + "t_" 
-typeobj_prefix    = pyrex_prefix + "type_" 
-var_prefix        = pyrex_prefix + "v_" 
-varptr_prefix     = pyrex_prefix + "vp_" 
+methtab_prefix    = pyrex_prefix + "methods_"
+memtab_prefix     = pyrex_prefix + "members_"
+objstruct_prefix  = pyrex_prefix + "obj_"
+typeptr_prefix    = pyrex_prefix + "ptype_"
+prop_set_prefix   = pyrex_prefix + "setprop_"
+type_prefix       = pyrex_prefix + "t_"
+typeobj_prefix    = pyrex_prefix + "type_"
+var_prefix        = pyrex_prefix + "v_"
+varptr_prefix     = pyrex_prefix + "vp_"
 varptr_prefix_api = pyrex_prefix + "api_vp_"
-wrapperbase_prefix= pyrex_prefix + "wrapperbase_" 
-pybuffernd_prefix   = pyrex_prefix + "pybuffernd_" 
-pybufferstruct_prefix  = pyrex_prefix + "pybuffer_" 
-vtable_prefix     = pyrex_prefix + "vtable_" 
-vtabptr_prefix    = pyrex_prefix + "vtabptr_" 
-vtabstruct_prefix = pyrex_prefix + "vtabstruct_" 
-opt_arg_prefix    = pyrex_prefix + "opt_args_" 
-convert_func_prefix = pyrex_prefix + "convert_" 
-closure_scope_prefix = pyrex_prefix + "scope_" 
-closure_class_prefix = pyrex_prefix + "scope_struct_" 
-lambda_func_prefix = pyrex_prefix + "lambda_" 
-module_is_main   = pyrex_prefix + "module_is_main_" 
-defaults_struct_prefix = pyrex_prefix + "defaults" 
-dynamic_args_cname = pyrex_prefix + "dynamic_args" 
- 
-interned_prefixes = { 
-    'str': pyrex_prefix + "n_", 
-    'int': pyrex_prefix + "int_", 
-    'float': pyrex_prefix + "float_", 
-    'tuple': pyrex_prefix + "tuple_", 
-    'codeobj': pyrex_prefix + "codeobj_", 
-    'slice': pyrex_prefix + "slice_", 
-    'ustring': pyrex_prefix + "ustring_", 
+wrapperbase_prefix= pyrex_prefix + "wrapperbase_"
+pybuffernd_prefix   = pyrex_prefix + "pybuffernd_"
+pybufferstruct_prefix  = pyrex_prefix + "pybuffer_"
+vtable_prefix     = pyrex_prefix + "vtable_"
+vtabptr_prefix    = pyrex_prefix + "vtabptr_"
+vtabstruct_prefix = pyrex_prefix + "vtabstruct_"
+opt_arg_prefix    = pyrex_prefix + "opt_args_"
+convert_func_prefix = pyrex_prefix + "convert_"
+closure_scope_prefix = pyrex_prefix + "scope_"
+closure_class_prefix = pyrex_prefix + "scope_struct_"
+lambda_func_prefix = pyrex_prefix + "lambda_"
+module_is_main   = pyrex_prefix + "module_is_main_"
+defaults_struct_prefix = pyrex_prefix + "defaults"
+dynamic_args_cname = pyrex_prefix + "dynamic_args"
+
+interned_prefixes = {
+    'str': pyrex_prefix + "n_",
+    'int': pyrex_prefix + "int_",
+    'float': pyrex_prefix + "float_",
+    'tuple': pyrex_prefix + "tuple_",
+    'codeobj': pyrex_prefix + "codeobj_",
+    'slice': pyrex_prefix + "slice_",
+    'ustring': pyrex_prefix + "ustring_",
     'umethod': pyrex_prefix + "umethod_",
-} 
- 
+}
+
 ctuple_type_prefix = pyrex_prefix + "ctuple_"
-args_cname       = pyrex_prefix + "args" 
-generator_cname  = pyrex_prefix + "generator" 
-sent_value_cname = pyrex_prefix + "sent_value" 
-pykwdlist_cname  = pyrex_prefix + "pyargnames" 
-obj_base_cname   = pyrex_prefix + "base" 
-builtins_cname   = pyrex_prefix + "b" 
-preimport_cname  = pyrex_prefix + "i" 
-moddict_cname    = pyrex_prefix + "d" 
-dummy_cname      = pyrex_prefix + "dummy" 
-filename_cname   = pyrex_prefix + "filename" 
-modulename_cname = pyrex_prefix + "modulename" 
-filetable_cname  = pyrex_prefix + "f" 
-intern_tab_cname = pyrex_prefix + "intern_tab" 
-kwds_cname       = pyrex_prefix + "kwds" 
-lineno_cname     = pyrex_prefix + "lineno" 
-clineno_cname    = pyrex_prefix + "clineno" 
-cfilenm_cname    = pyrex_prefix + "cfilenm" 
+args_cname       = pyrex_prefix + "args"
+generator_cname  = pyrex_prefix + "generator"
+sent_value_cname = pyrex_prefix + "sent_value"
+pykwdlist_cname  = pyrex_prefix + "pyargnames"
+obj_base_cname   = pyrex_prefix + "base"
+builtins_cname   = pyrex_prefix + "b"
+preimport_cname  = pyrex_prefix + "i"
+moddict_cname    = pyrex_prefix + "d"
+dummy_cname      = pyrex_prefix + "dummy"
+filename_cname   = pyrex_prefix + "filename"
+modulename_cname = pyrex_prefix + "modulename"
+filetable_cname  = pyrex_prefix + "f"
+intern_tab_cname = pyrex_prefix + "intern_tab"
+kwds_cname       = pyrex_prefix + "kwds"
+lineno_cname     = pyrex_prefix + "lineno"
+clineno_cname    = pyrex_prefix + "clineno"
+cfilenm_cname    = pyrex_prefix + "cfilenm"
 local_tstate_cname = pyrex_prefix + "tstate"
-module_cname     = pyrex_prefix + "m" 
-moddoc_cname     = pyrex_prefix + "mdoc" 
-methtable_cname  = pyrex_prefix + "methods" 
-retval_cname     = pyrex_prefix + "r" 
-reqd_kwds_cname  = pyrex_prefix + "reqd_kwds" 
-self_cname       = pyrex_prefix + "self" 
-stringtab_cname  = pyrex_prefix + "string_tab" 
-vtabslot_cname   = pyrex_prefix + "vtab" 
-c_api_tab_cname  = pyrex_prefix + "c_api_tab" 
-gilstate_cname   = pyrex_prefix + "state" 
-skip_dispatch_cname = pyrex_prefix + "skip_dispatch" 
-empty_tuple      = pyrex_prefix + "empty_tuple" 
-empty_bytes      = pyrex_prefix + "empty_bytes" 
+module_cname     = pyrex_prefix + "m"
+moddoc_cname     = pyrex_prefix + "mdoc"
+methtable_cname  = pyrex_prefix + "methods"
+retval_cname     = pyrex_prefix + "r"
+reqd_kwds_cname  = pyrex_prefix + "reqd_kwds"
+self_cname       = pyrex_prefix + "self"
+stringtab_cname  = pyrex_prefix + "string_tab"
+vtabslot_cname   = pyrex_prefix + "vtab"
+c_api_tab_cname  = pyrex_prefix + "c_api_tab"
+gilstate_cname   = pyrex_prefix + "state"
+skip_dispatch_cname = pyrex_prefix + "skip_dispatch"
+empty_tuple      = pyrex_prefix + "empty_tuple"
+empty_bytes      = pyrex_prefix + "empty_bytes"
 empty_unicode	 = pyrex_prefix + "empty_unicode"
-print_function   = pyrex_prefix + "print" 
-print_function_kwargs   = pyrex_prefix + "print_kwargs" 
-cleanup_cname    = pyrex_prefix + "module_cleanup" 
-pymoduledef_cname = pyrex_prefix + "moduledef" 
+print_function   = pyrex_prefix + "print"
+print_function_kwargs   = pyrex_prefix + "print_kwargs"
+cleanup_cname    = pyrex_prefix + "module_cleanup"
+pymoduledef_cname = pyrex_prefix + "moduledef"
 pymoduledef_slots_cname = pyrex_prefix + "moduledef_slots"
 pymodinit_module_arg = pyrex_prefix + "pyinit_module"
 pymodule_create_func_cname = pyrex_prefix + "pymod_create"
 pymodule_exec_func_cname = pyrex_prefix + "pymod_exec"
-optional_args_cname = pyrex_prefix + "optional_args" 
-import_star      = pyrex_prefix + "import_star" 
-import_star_set  = pyrex_prefix + "import_star_set" 
-outer_scope_cname= pyrex_prefix + "outer_scope" 
-cur_scope_cname  = pyrex_prefix + "cur_scope" 
-enc_scope_cname  = pyrex_prefix + "enc_scope" 
-frame_cname      = pyrex_prefix + "frame" 
-frame_code_cname = pyrex_prefix + "frame_code" 
-binding_cfunc    = pyrex_prefix + "binding_PyCFunctionType" 
-fused_func_prefix = pyrex_prefix + 'fuse_' 
-quick_temp_cname = pyrex_prefix + "temp" # temp variable for quick'n'dirty temping 
+optional_args_cname = pyrex_prefix + "optional_args"
+import_star      = pyrex_prefix + "import_star"
+import_star_set  = pyrex_prefix + "import_star_set"
+outer_scope_cname= pyrex_prefix + "outer_scope"
+cur_scope_cname  = pyrex_prefix + "cur_scope"
+enc_scope_cname  = pyrex_prefix + "enc_scope"
+frame_cname      = pyrex_prefix + "frame"
+frame_code_cname = pyrex_prefix + "frame_code"
+binding_cfunc    = pyrex_prefix + "binding_PyCFunctionType"
+fused_func_prefix = pyrex_prefix + 'fuse_'
+quick_temp_cname = pyrex_prefix + "temp" # temp variable for quick'n'dirty temping
 tp_dict_version_temp = pyrex_prefix + "tp_dict_version"
 obj_dict_version_temp = pyrex_prefix + "obj_dict_version"
 type_dict_guard_temp = pyrex_prefix + "type_dict_guard"
 cython_runtime_cname   = pyrex_prefix + "cython_runtime"
- 
-global_code_object_cache_find = pyrex_prefix + 'find_code_object' 
-global_code_object_cache_insert = pyrex_prefix + 'insert_code_object' 
- 
-genexpr_id_ref = 'genexpr' 
-freelist_name  = 'freelist' 
-freecount_name = 'freecount' 
- 
-line_c_macro = "__LINE__" 
- 
-file_c_macro = "__FILE__" 
- 
-extern_c_macro  = pyrex_prefix.upper() + "EXTERN_C" 
- 
-exc_type_name   = pyrex_prefix + "exc_type" 
-exc_value_name  = pyrex_prefix + "exc_value" 
-exc_tb_name     = pyrex_prefix + "exc_tb" 
-exc_lineno_name = pyrex_prefix + "exc_lineno" 
- 
-parallel_exc_type = pyrex_prefix + "parallel_exc_type" 
-parallel_exc_value = pyrex_prefix + "parallel_exc_value" 
-parallel_exc_tb = pyrex_prefix + "parallel_exc_tb" 
-parallel_filename = pyrex_prefix + "parallel_filename" 
-parallel_lineno = pyrex_prefix + "parallel_lineno" 
-parallel_clineno = pyrex_prefix + "parallel_clineno" 
-parallel_why = pyrex_prefix + "parallel_why" 
- 
-exc_vars = (exc_type_name, exc_value_name, exc_tb_name) 
- 
-api_name        = pyrex_prefix + "capi__" 
- 
-h_guard_prefix   = "__PYX_HAVE__" 
-api_guard_prefix = "__PYX_HAVE_API__" 
-api_func_guard   = "__PYX_HAVE_API_FUNC_" 
- 
-PYX_NAN          = "__PYX_NAN()" 
- 
-def py_version_hex(major, minor=0, micro=0, release_level=0, release_serial=0): 
-    return (major << 24) | (minor << 16) | (micro << 8) | (release_level << 4) | (release_serial) 
+
+global_code_object_cache_find = pyrex_prefix + 'find_code_object'
+global_code_object_cache_insert = pyrex_prefix + 'insert_code_object'
+
+genexpr_id_ref = 'genexpr'
+freelist_name  = 'freelist'
+freecount_name = 'freecount'
+
+line_c_macro = "__LINE__"
+
+file_c_macro = "__FILE__"
+
+extern_c_macro  = pyrex_prefix.upper() + "EXTERN_C"
+
+exc_type_name   = pyrex_prefix + "exc_type"
+exc_value_name  = pyrex_prefix + "exc_value"
+exc_tb_name     = pyrex_prefix + "exc_tb"
+exc_lineno_name = pyrex_prefix + "exc_lineno"
+
+parallel_exc_type = pyrex_prefix + "parallel_exc_type"
+parallel_exc_value = pyrex_prefix + "parallel_exc_value"
+parallel_exc_tb = pyrex_prefix + "parallel_exc_tb"
+parallel_filename = pyrex_prefix + "parallel_filename"
+parallel_lineno = pyrex_prefix + "parallel_lineno"
+parallel_clineno = pyrex_prefix + "parallel_clineno"
+parallel_why = pyrex_prefix + "parallel_why"
+
+exc_vars = (exc_type_name, exc_value_name, exc_tb_name)
+
+api_name        = pyrex_prefix + "capi__"
+
+h_guard_prefix   = "__PYX_HAVE__"
+api_guard_prefix = "__PYX_HAVE_API__"
+api_func_guard   = "__PYX_HAVE_API_FUNC_"
+
+PYX_NAN          = "__PYX_NAN()"
+
+def py_version_hex(major, minor=0, micro=0, release_level=0, release_serial=0):
+    return (major << 24) | (minor << 16) | (micro << 8) | (release_level << 4) | (release_serial)
diff --git a/contrib/tools/cython/Cython/Compiler/Nodes.py b/contrib/tools/cython/Cython/Compiler/Nodes.py
index c54707ec40..6436c5002d 100644
--- a/contrib/tools/cython/Cython/Compiler/Nodes.py
+++ b/contrib/tools/cython/Cython/Compiler/Nodes.py
@@ -1,73 +1,73 @@
-# 
-#   Parse tree nodes 
-# 
- 
-from __future__ import absolute_import 
- 
-import cython 
-cython.declare(sys=object, os=object, copy=object, 
-               Builtin=object, error=object, warning=object, Naming=object, PyrexTypes=object, 
-               py_object_type=object, ModuleScope=object, LocalScope=object, ClosureScope=object, 
-               StructOrUnionScope=object, PyClassScope=object, 
-               CppClassScope=object, UtilityCode=object, EncodedString=object, 
+#
+#   Parse tree nodes
+#
+
+from __future__ import absolute_import
+
+import cython
+cython.declare(sys=object, os=object, copy=object,
+               Builtin=object, error=object, warning=object, Naming=object, PyrexTypes=object,
+               py_object_type=object, ModuleScope=object, LocalScope=object, ClosureScope=object,
+               StructOrUnionScope=object, PyClassScope=object,
+               CppClassScope=object, UtilityCode=object, EncodedString=object,
                error_type=object, _py_int_types=object)
- 
-import sys, os, copy 
-from itertools import chain 
- 
-from . import Builtin 
-from .Errors import error, warning, InternalError, CompileError 
-from . import Naming 
-from . import PyrexTypes 
-from . import TypeSlots 
-from .PyrexTypes import py_object_type, error_type 
-from .Symtab import (ModuleScope, LocalScope, ClosureScope, 
+
+import sys, os, copy
+from itertools import chain
+
+from . import Builtin
+from .Errors import error, warning, InternalError, CompileError
+from . import Naming
+from . import PyrexTypes
+from . import TypeSlots
+from .PyrexTypes import py_object_type, error_type
+from .Symtab import (ModuleScope, LocalScope, ClosureScope,
                      StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope)
-from .Code import UtilityCode 
+from .Code import UtilityCode
 from .StringEncoding import EncodedString
 from . import Future
-from . import Options 
-from . import DebugFlags 
+from . import Options
+from . import DebugFlags
 from .Pythran import has_np_pythran, pythran_type, is_pythran_buffer
 from ..Utils import add_metaclass
- 
- 
+
+
 if sys.version_info[0] >= 3:
     _py_int_types = int
 else:
     _py_int_types = (int, long)
- 
 
-def relative_position(pos): 
+
+def relative_position(pos):
     return (pos[0].get_filenametable_entry(), pos[1])
- 
- 
-def embed_position(pos, docstring): 
-    if not Options.embed_pos_in_docstring: 
-        return docstring 
-    pos_line = u'File: %s (starting at line %s)' % relative_position(pos) 
-    if docstring is None: 
-        # unicode string 
-        return EncodedString(pos_line) 
- 
-    # make sure we can encode the filename in the docstring encoding 
-    # otherwise make the docstring a unicode string 
-    encoding = docstring.encoding 
-    if encoding is not None: 
-        try: 
-            pos_line.encode(encoding) 
-        except UnicodeEncodeError: 
-            encoding = None 
- 
-    if not docstring: 
-        # reuse the string encoding of the original docstring 
-        doc = EncodedString(pos_line) 
-    else: 
-        doc = EncodedString(pos_line + u'\n' + docstring) 
-    doc.encoding = encoding 
-    return doc 
- 
- 
+
+
+def embed_position(pos, docstring):
+    if not Options.embed_pos_in_docstring:
+        return docstring
+    pos_line = u'File: %s (starting at line %s)' % relative_position(pos)
+    if docstring is None:
+        # unicode string
+        return EncodedString(pos_line)
+
+    # make sure we can encode the filename in the docstring encoding
+    # otherwise make the docstring a unicode string
+    encoding = docstring.encoding
+    if encoding is not None:
+        try:
+            pos_line.encode(encoding)
+        except UnicodeEncodeError:
+            encoding = None
+
+    if not docstring:
+        # reuse the string encoding of the original docstring
+        doc = EncodedString(pos_line)
+    else:
+        doc = EncodedString(pos_line + u'\n' + docstring)
+    doc.encoding = encoding
+    return doc
+
+
 def analyse_type_annotation(annotation, env, assigned_value=None):
     base_type = None
     is_ambiguous = False
@@ -115,72 +115,72 @@ def analyse_type_annotation(annotation, env, assigned_value=None):
     return base_type, arg_type
 
 
-def write_func_call(func, codewriter_class): 
-    def f(*args, **kwds): 
-        if len(args) > 1 and isinstance(args[1], codewriter_class): 
-            # here we annotate the code with this function call 
-            # but only if new code is generated 
-            node, code = args[:2] 
-            marker = '                    /* %s -> %s.%s %s */' % ( 
+def write_func_call(func, codewriter_class):
+    def f(*args, **kwds):
+        if len(args) > 1 and isinstance(args[1], codewriter_class):
+            # here we annotate the code with this function call
+            # but only if new code is generated
+            node, code = args[:2]
+            marker = '                    /* %s -> %s.%s %s */' % (
                 ' ' * code.call_level,
                 node.__class__.__name__,
                 func.__name__,
                 node.pos[1:])
-            pristine = code.buffer.stream.tell() 
-            code.putln(marker) 
-            start = code.buffer.stream.tell() 
-            code.call_level += 4 
-            res = func(*args, **kwds) 
-            code.call_level -= 4 
-            if start == code.buffer.stream.tell(): 
+            pristine = code.buffer.stream.tell()
+            code.putln(marker)
+            start = code.buffer.stream.tell()
+            code.call_level += 4
+            res = func(*args, **kwds)
+            code.call_level -= 4
+            if start == code.buffer.stream.tell():
                 # no code written => undo writing marker
                 code.buffer.stream.truncate(pristine)
-            else: 
+            else:
                 marker = marker.replace('->', '<-', 1)
-                code.putln(marker) 
-            return res 
-        else: 
-            return func(*args, **kwds) 
-    return f 
- 
- 
-class VerboseCodeWriter(type): 
-    # Set this as a metaclass to trace function calls in code. 
-    # This slows down code generation and makes much larger files. 
-    def __new__(cls, name, bases, attrs): 
-        from types import FunctionType 
-        from .Code import CCodeWriter 
-        attrs = dict(attrs) 
-        for mname, m in attrs.items(): 
-            if isinstance(m, FunctionType): 
-                attrs[mname] = write_func_call(m, CCodeWriter) 
-        return super(VerboseCodeWriter, cls).__new__(cls, name, bases, attrs) 
- 
- 
-class CheckAnalysers(type): 
-    """Metaclass to check that type analysis functions return a node. 
-    """ 
-    methods = set(['analyse_types', 
-                   'analyse_expressions', 
-                   'analyse_target_types']) 
- 
-    def __new__(cls, name, bases, attrs): 
-        from types import FunctionType 
-        def check(name, func): 
-            def call(*args, **kwargs): 
-                retval = func(*args, **kwargs) 
-                if retval is None: 
+                code.putln(marker)
+            return res
+        else:
+            return func(*args, **kwds)
+    return f
+
+
+class VerboseCodeWriter(type):
+    # Set this as a metaclass to trace function calls in code.
+    # This slows down code generation and makes much larger files.
+    def __new__(cls, name, bases, attrs):
+        from types import FunctionType
+        from .Code import CCodeWriter
+        attrs = dict(attrs)
+        for mname, m in attrs.items():
+            if isinstance(m, FunctionType):
+                attrs[mname] = write_func_call(m, CCodeWriter)
+        return super(VerboseCodeWriter, cls).__new__(cls, name, bases, attrs)
+
+
+class CheckAnalysers(type):
+    """Metaclass to check that type analysis functions return a node.
+    """
+    methods = set(['analyse_types',
+                   'analyse_expressions',
+                   'analyse_target_types'])
+
+    def __new__(cls, name, bases, attrs):
+        from types import FunctionType
+        def check(name, func):
+            def call(*args, **kwargs):
+                retval = func(*args, **kwargs)
+                if retval is None:
                     print('%s %s %s' % (name, args, kwargs))
-                return retval 
-            return call 
- 
-        attrs = dict(attrs) 
-        for mname, m in attrs.items(): 
-            if isinstance(m, FunctionType) and mname in cls.methods: 
-                attrs[mname] = check(mname, m) 
-        return super(CheckAnalysers, cls).__new__(cls, name, bases, attrs) 
- 
- 
+                return retval
+            return call
+
+        attrs = dict(attrs)
+        for mname, m in attrs.items():
+            if isinstance(m, FunctionType) and mname in cls.methods:
+                attrs[mname] = check(mname, m)
+        return super(CheckAnalysers, cls).__new__(cls, name, bases, attrs)
+
+
 def _with_metaclass(cls):
     if DebugFlags.debug_trace_code_generation:
         return add_metaclass(VerboseCodeWriter)(cls)
@@ -189,303 +189,303 @@ def _with_metaclass(cls):
 
 
 @_with_metaclass
-class Node(object): 
-    #  pos         (string, int, int)   Source file position 
-    #  is_name     boolean              Is a NameNode 
-    #  is_literal  boolean              Is a ConstNode 
- 
-    is_name = 0 
-    is_none = 0 
-    is_nonecheck = 0 
-    is_literal = 0 
-    is_terminator = 0 
+class Node(object):
+    #  pos         (string, int, int)   Source file position
+    #  is_name     boolean              Is a NameNode
+    #  is_literal  boolean              Is a ConstNode
+
+    is_name = 0
+    is_none = 0
+    is_nonecheck = 0
+    is_literal = 0
+    is_terminator = 0
     is_wrapper = False  # is a DefNode wrapper for a C function
-    temps = None 
- 
-    # All descendants should set child_attrs to a list of the attributes 
-    # containing nodes considered "children" in the tree. Each such attribute 
-    # can either contain a single node or a list of nodes. See Visitor.py. 
-    child_attrs = None 
- 
+    temps = None
+
+    # All descendants should set child_attrs to a list of the attributes
+    # containing nodes considered "children" in the tree. Each such attribute
+    # can either contain a single node or a list of nodes. See Visitor.py.
+    child_attrs = None
+
     # Subset of attributes that are evaluated in the outer scope (e.g. function default arguments).
     outer_attrs = None
 
-    cf_state = None 
- 
-    # This may be an additional (or 'actual') type that will be checked when 
-    # this node is coerced to another type. This could be useful to set when 
-    # the actual type to which it can coerce is known, but you want to leave 
-    # the type a py_object_type 
-    coercion_type = None 
- 
-    def __init__(self, pos, **kw): 
-        self.pos = pos 
-        self.__dict__.update(kw) 
- 
-    gil_message = "Operation" 
- 
-    nogil_check = None 
+    cf_state = None
+
+    # This may be an additional (or 'actual') type that will be checked when
+    # this node is coerced to another type. This could be useful to set when
+    # the actual type to which it can coerce is known, but you want to leave
+    # the type a py_object_type
+    coercion_type = None
+
+    def __init__(self, pos, **kw):
+        self.pos = pos
+        self.__dict__.update(kw)
+
+    gil_message = "Operation"
+
+    nogil_check = None
     in_nogil_context = False  # For use only during code generation.
- 
-    def gil_error(self, env=None): 
-        error(self.pos, "%s not allowed without gil" % self.gil_message) 
- 
-    cpp_message = "Operation" 
- 
-    def cpp_check(self, env): 
-        if not env.is_cpp(): 
-            self.cpp_error() 
- 
-    def cpp_error(self): 
-        error(self.pos, "%s only allowed in c++" % self.cpp_message) 
- 
-    def clone_node(self): 
-        """Clone the node. This is defined as a shallow copy, except for member lists 
-           amongst the child attributes (from get_child_accessors) which are also 
-           copied. Lists containing child nodes are thus seen as a way for the node 
-           to hold multiple children directly; the list is not treated as a separate 
-           level in the tree.""" 
-        result = copy.copy(self) 
-        for attrname in result.child_attrs: 
-            value = getattr(result, attrname) 
-            if isinstance(value, list): 
-                setattr(result, attrname, [x for x in value]) 
-        return result 
- 
- 
-    # 
-    #  There are 3 phases of parse tree processing, applied in order to 
-    #  all the statements in a given scope-block: 
-    # 
-    #  (0) analyse_declarations 
-    #        Make symbol table entries for all declarations at the current 
-    #        level, both explicit (def, cdef, etc.) and implicit (assignment 
-    #        to an otherwise undeclared name). 
-    # 
-    #  (1) analyse_expressions 
-    #         Determine the result types of expressions and fill in the 
-    #         'type' attribute of each ExprNode. Insert coercion nodes into the 
-    #         tree where needed to convert to and from Python objects. 
-    #         Allocate temporary locals for intermediate results. Fill 
-    #         in the 'result_code' attribute of each ExprNode with a C code 
-    #         fragment. 
-    # 
-    #  (2) generate_code 
-    #         Emit C code for all declarations, statements and expressions. 
-    #         Recursively applies the 3 processing phases to the bodies of 
-    #         functions. 
-    # 
- 
-    def analyse_declarations(self, env): 
-        pass 
- 
-    def analyse_expressions(self, env): 
-        raise InternalError("analyse_expressions not implemented for %s" % \ 
-            self.__class__.__name__) 
- 
-    def generate_code(self, code): 
-        raise InternalError("generate_code not implemented for %s" % \ 
-            self.__class__.__name__) 
- 
-    def annotate(self, code): 
-        # mro does the wrong thing 
-        if isinstance(self, BlockNode): 
-            self.body.annotate(code) 
- 
-    def end_pos(self): 
-        try: 
-            return self._end_pos 
-        except AttributeError: 
-            pos = self.pos 
-            if not self.child_attrs: 
-                self._end_pos = pos 
-                return pos 
-            for attr in self.child_attrs: 
-                child = getattr(self, attr) 
-                # Sometimes lists, sometimes nodes 
-                if child is None: 
-                    pass 
-                elif isinstance(child, list): 
-                    for c in child: 
-                        pos = max(pos, c.end_pos()) 
-                else: 
-                    pos = max(pos, child.end_pos()) 
-            self._end_pos = pos 
-            return pos 
- 
-    def dump(self, level=0, filter_out=("pos",), cutoff=100, encountered=None): 
-        """Debug helper method that returns a recursive string representation of this node. 
-        """ 
-        if cutoff == 0: 
-            return "<...nesting level cutoff...>" 
-        if encountered is None: 
-            encountered = set() 
-        if id(self) in encountered: 
-            return "<%s (0x%x) -- already output>" % (self.__class__.__name__, id(self)) 
-        encountered.add(id(self)) 
- 
-        def dump_child(x, level): 
-            if isinstance(x, Node): 
-                return x.dump(level, filter_out, cutoff-1, encountered) 
-            elif isinstance(x, list): 
-                return "[%s]" % ", ".join([dump_child(item, level) for item in x]) 
-            else: 
-                return repr(x) 
- 
-        attrs = [(key, value) for key, value in self.__dict__.items() if key not in filter_out] 
-        if len(attrs) == 0: 
-            return "<%s (0x%x)>" % (self.__class__.__name__, id(self)) 
-        else: 
-            indent = "  " * level 
-            res = "<%s (0x%x)\n" % (self.__class__.__name__, id(self)) 
-            for key, value in attrs: 
-                res += "%s  %s: %s\n" % (indent, key, dump_child(value, level + 1)) 
-            res += "%s>" % indent 
-            return res 
- 
-    def dump_pos(self, mark_column=False, marker='(#)'): 
-        """Debug helper method that returns the source code context of this node as a string. 
-        """ 
-        if not self.pos: 
-            return u'' 
-        source_desc, line, col = self.pos 
+
+    def gil_error(self, env=None):
+        error(self.pos, "%s not allowed without gil" % self.gil_message)
+
+    cpp_message = "Operation"
+
+    def cpp_check(self, env):
+        if not env.is_cpp():
+            self.cpp_error()
+
+    def cpp_error(self):
+        error(self.pos, "%s only allowed in c++" % self.cpp_message)
+
+    def clone_node(self):
+        """Clone the node. This is defined as a shallow copy, except for member lists
+           amongst the child attributes (from get_child_accessors) which are also
+           copied. Lists containing child nodes are thus seen as a way for the node
+           to hold multiple children directly; the list is not treated as a separate
+           level in the tree."""
+        result = copy.copy(self)
+        for attrname in result.child_attrs:
+            value = getattr(result, attrname)
+            if isinstance(value, list):
+                setattr(result, attrname, [x for x in value])
+        return result
+
+
+    #
+    #  There are 3 phases of parse tree processing, applied in order to
+    #  all the statements in a given scope-block:
+    #
+    #  (0) analyse_declarations
+    #        Make symbol table entries for all declarations at the current
+    #        level, both explicit (def, cdef, etc.) and implicit (assignment
+    #        to an otherwise undeclared name).
+    #
+    #  (1) analyse_expressions
+    #         Determine the result types of expressions and fill in the
+    #         'type' attribute of each ExprNode. Insert coercion nodes into the
+    #         tree where needed to convert to and from Python objects.
+    #         Allocate temporary locals for intermediate results. Fill
+    #         in the 'result_code' attribute of each ExprNode with a C code
+    #         fragment.
+    #
+    #  (2) generate_code
+    #         Emit C code for all declarations, statements and expressions.
+    #         Recursively applies the 3 processing phases to the bodies of
+    #         functions.
+    #
+
+    def analyse_declarations(self, env):
+        pass
+
+    def analyse_expressions(self, env):
+        raise InternalError("analyse_expressions not implemented for %s" % \
+            self.__class__.__name__)
+
+    def generate_code(self, code):
+        raise InternalError("generate_code not implemented for %s" % \
+            self.__class__.__name__)
+
+    def annotate(self, code):
+        # mro does the wrong thing
+        if isinstance(self, BlockNode):
+            self.body.annotate(code)
+
+    def end_pos(self):
+        try:
+            return self._end_pos
+        except AttributeError:
+            pos = self.pos
+            if not self.child_attrs:
+                self._end_pos = pos
+                return pos
+            for attr in self.child_attrs:
+                child = getattr(self, attr)
+                # Sometimes lists, sometimes nodes
+                if child is None:
+                    pass
+                elif isinstance(child, list):
+                    for c in child:
+                        pos = max(pos, c.end_pos())
+                else:
+                    pos = max(pos, child.end_pos())
+            self._end_pos = pos
+            return pos
+
+    def dump(self, level=0, filter_out=("pos",), cutoff=100, encountered=None):
+        """Debug helper method that returns a recursive string representation of this node.
+        """
+        if cutoff == 0:
+            return "<...nesting level cutoff...>"
+        if encountered is None:
+            encountered = set()
+        if id(self) in encountered:
+            return "<%s (0x%x) -- already output>" % (self.__class__.__name__, id(self))
+        encountered.add(id(self))
+
+        def dump_child(x, level):
+            if isinstance(x, Node):
+                return x.dump(level, filter_out, cutoff-1, encountered)
+            elif isinstance(x, list):
+                return "[%s]" % ", ".join([dump_child(item, level) for item in x])
+            else:
+                return repr(x)
+
+        attrs = [(key, value) for key, value in self.__dict__.items() if key not in filter_out]
+        if len(attrs) == 0:
+            return "<%s (0x%x)>" % (self.__class__.__name__, id(self))
+        else:
+            indent = "  " * level
+            res = "<%s (0x%x)\n" % (self.__class__.__name__, id(self))
+            for key, value in attrs:
+                res += "%s  %s: %s\n" % (indent, key, dump_child(value, level + 1))
+            res += "%s>" % indent
+            return res
+
+    def dump_pos(self, mark_column=False, marker='(#)'):
+        """Debug helper method that returns the source code context of this node as a string.
+        """
+        if not self.pos:
+            return u''
+        source_desc, line, col = self.pos
         contents = source_desc.get_lines(encoding='ASCII', error_handling='ignore')
-        # line numbers start at 1 
+        # line numbers start at 1
         lines = contents[max(0, line-3):line]
-        current = lines[-1] 
-        if mark_column: 
-            current = current[:col] + marker + current[col:] 
-        lines[-1] = current.rstrip() + u'             # <<<<<<<<<<<<<<\n' 
-        lines += contents[line:line+2] 
-        return u'"%s":%d:%d\n%s\n' % ( 
-            source_desc.get_escaped_description(), line, col, u''.join(lines)) 
- 
-class CompilerDirectivesNode(Node): 
-    """ 
-    Sets compiler directives for the children nodes 
-    """ 
-    #  directives     {string:value}  A dictionary holding the right value for 
-    #                                 *all* possible directives. 
-    #  body           Node 
-    child_attrs = ["body"] 
- 
-    def analyse_declarations(self, env): 
-        old = env.directives 
-        env.directives = self.directives 
-        self.body.analyse_declarations(env) 
-        env.directives = old 
- 
-    def analyse_expressions(self, env): 
-        old = env.directives 
-        env.directives = self.directives 
-        self.body = self.body.analyse_expressions(env) 
-        env.directives = old 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        env_old = env.directives 
-        code_old = code.globalstate.directives 
-        code.globalstate.directives = self.directives 
-        self.body.generate_function_definitions(env, code) 
-        env.directives = env_old 
-        code.globalstate.directives = code_old 
- 
-    def generate_execution_code(self, code): 
-        old = code.globalstate.directives 
-        code.globalstate.directives = self.directives 
-        self.body.generate_execution_code(code) 
-        code.globalstate.directives = old 
- 
-    def annotate(self, code): 
-        old = code.globalstate.directives 
-        code.globalstate.directives = self.directives 
-        self.body.annotate(code) 
-        code.globalstate.directives = old 
- 
-class BlockNode(object): 
-    #  Mixin class for nodes representing a declaration block. 
- 
-    def generate_cached_builtins_decls(self, env, code): 
-        entries = env.global_scope().undeclared_cached_builtins 
-        for entry in entries: 
-            code.globalstate.add_cached_builtin_decl(entry) 
-        del entries[:] 
- 
-    def generate_lambda_definitions(self, env, code): 
-        for node in env.lambda_defs: 
-            node.generate_function_definitions(env, code) 
- 
-class StatListNode(Node): 
-    # stats     a list of StatNode 
- 
-    child_attrs = ["stats"] 
- 
+        current = lines[-1]
+        if mark_column:
+            current = current[:col] + marker + current[col:]
+        lines[-1] = current.rstrip() + u'             # <<<<<<<<<<<<<<\n'
+        lines += contents[line:line+2]
+        return u'"%s":%d:%d\n%s\n' % (
+            source_desc.get_escaped_description(), line, col, u''.join(lines))
+
+class CompilerDirectivesNode(Node):
+    """
+    Sets compiler directives for the children nodes
+    """
+    #  directives     {string:value}  A dictionary holding the right value for
+    #                                 *all* possible directives.
+    #  body           Node
+    child_attrs = ["body"]
+
+    def analyse_declarations(self, env):
+        old = env.directives
+        env.directives = self.directives
+        self.body.analyse_declarations(env)
+        env.directives = old
+
+    def analyse_expressions(self, env):
+        old = env.directives
+        env.directives = self.directives
+        self.body = self.body.analyse_expressions(env)
+        env.directives = old
+        return self
+
+    def generate_function_definitions(self, env, code):
+        env_old = env.directives
+        code_old = code.globalstate.directives
+        code.globalstate.directives = self.directives
+        self.body.generate_function_definitions(env, code)
+        env.directives = env_old
+        code.globalstate.directives = code_old
+
+    def generate_execution_code(self, code):
+        old = code.globalstate.directives
+        code.globalstate.directives = self.directives
+        self.body.generate_execution_code(code)
+        code.globalstate.directives = old
+
+    def annotate(self, code):
+        old = code.globalstate.directives
+        code.globalstate.directives = self.directives
+        self.body.annotate(code)
+        code.globalstate.directives = old
+
+class BlockNode(object):
+    #  Mixin class for nodes representing a declaration block.
+
+    def generate_cached_builtins_decls(self, env, code):
+        entries = env.global_scope().undeclared_cached_builtins
+        for entry in entries:
+            code.globalstate.add_cached_builtin_decl(entry)
+        del entries[:]
+
+    def generate_lambda_definitions(self, env, code):
+        for node in env.lambda_defs:
+            node.generate_function_definitions(env, code)
+
+class StatListNode(Node):
+    # stats     a list of StatNode
+
+    child_attrs = ["stats"]
+
     @staticmethod
-    def create_analysed(pos, env, *args, **kw): 
-        node = StatListNode(pos, *args, **kw) 
+    def create_analysed(pos, env, *args, **kw):
+        node = StatListNode(pos, *args, **kw)
         return node  # No node-specific analysis needed
- 
-    def analyse_declarations(self, env): 
-        #print "StatListNode.analyse_declarations" ### 
-        for stat in self.stats: 
-            stat.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        #print "StatListNode.analyse_expressions" ### 
+
+    def analyse_declarations(self, env):
+        #print "StatListNode.analyse_declarations" ###
+        for stat in self.stats:
+            stat.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        #print "StatListNode.analyse_expressions" ###
         self.stats = [stat.analyse_expressions(env)
                       for stat in self.stats]
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        #print "StatListNode.generate_function_definitions" ### 
-        for stat in self.stats: 
-            stat.generate_function_definitions(env, code) 
- 
-    def generate_execution_code(self, code): 
-        #print "StatListNode.generate_execution_code" ### 
-        for stat in self.stats: 
-            code.mark_pos(stat.pos) 
-            stat.generate_execution_code(code) 
- 
-    def annotate(self, code): 
-        for stat in self.stats: 
-            stat.annotate(code) 
- 
- 
-class StatNode(Node): 
-    # 
-    #  Code generation for statements is split into the following subphases: 
-    # 
-    #  (1) generate_function_definitions 
-    #        Emit C code for the definitions of any structs, 
-    #        unions, enums and functions defined in the current 
-    #        scope-block. 
-    # 
-    #  (2) generate_execution_code 
-    #        Emit C code for executable statements. 
-    # 
- 
-    def generate_function_definitions(self, env, code): 
-        pass 
- 
-    def generate_execution_code(self, code): 
-        raise InternalError("generate_execution_code not implemented for %s" % \ 
-            self.__class__.__name__) 
- 
- 
-class CDefExternNode(StatNode): 
+        return self
+
+    def generate_function_definitions(self, env, code):
+        #print "StatListNode.generate_function_definitions" ###
+        for stat in self.stats:
+            stat.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
+        #print "StatListNode.generate_execution_code" ###
+        for stat in self.stats:
+            code.mark_pos(stat.pos)
+            stat.generate_execution_code(code)
+
+    def annotate(self, code):
+        for stat in self.stats:
+            stat.annotate(code)
+
+
+class StatNode(Node):
+    #
+    #  Code generation for statements is split into the following subphases:
+    #
+    #  (1) generate_function_definitions
+    #        Emit C code for the definitions of any structs,
+    #        unions, enums and functions defined in the current
+    #        scope-block.
+    #
+    #  (2) generate_execution_code
+    #        Emit C code for executable statements.
+    #
+
+    def generate_function_definitions(self, env, code):
+        pass
+
+    def generate_execution_code(self, code):
+        raise InternalError("generate_execution_code not implemented for %s" % \
+            self.__class__.__name__)
+
+
+class CDefExternNode(StatNode):
     #  include_file       string or None
     #  verbatim_include   string or None
     #  body               StatListNode
- 
-    child_attrs = ["body"] 
- 
-    def analyse_declarations(self, env): 
-        old_cinclude_flag = env.in_cinclude 
-        env.in_cinclude = 1 
-        self.body.analyse_declarations(env) 
-        env.in_cinclude = old_cinclude_flag 
- 
+
+    child_attrs = ["body"]
+
+    def analyse_declarations(self, env):
+        old_cinclude_flag = env.in_cinclude
+        env.in_cinclude = 1
+        self.body.analyse_declarations(env)
+        env.in_cinclude = old_cinclude_flag
+
         if self.include_file or self.verbatim_include:
             # Determine whether include should be late
             stats = self.body.stats
@@ -498,231 +498,231 @@ class CDefExternNode(StatNode):
                 late = all(isinstance(node, CVarDefNode) for node in stats)
             env.add_include_file(self.include_file, self.verbatim_include, late)
 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
-    def annotate(self, code): 
-        self.body.annotate(code) 
- 
- 
-class CDeclaratorNode(Node): 
-    # Part of a C declaration. 
-    # 
-    # Processing during analyse_declarations phase: 
-    # 
-    #   analyse 
-    #      Returns (name, type) pair where name is the 
-    #      CNameDeclaratorNode of the name being declared 
-    #      and type is the type it is being declared as. 
-    # 
-    #  calling_convention  string   Calling convention of CFuncDeclaratorNode 
-    #                               for which this is a base 
- 
-    child_attrs = [] 
- 
-    calling_convention = "" 
- 
-    def analyse_templates(self): 
-        # Only C++ functions have templates. 
-        return None 
- 
-
-class CNameDeclaratorNode(CDeclaratorNode): 
-    #  name    string             The Cython name being declared 
-    #  cname   string or None     C name, if specified 
-    #  default ExprNode or None   the value assigned on declaration 
- 
-    child_attrs = ['default'] 
- 
-    default = None 
- 
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+    def annotate(self, code):
+        self.body.annotate(code)
+
+
+class CDeclaratorNode(Node):
+    # Part of a C declaration.
+    #
+    # Processing during analyse_declarations phase:
+    #
+    #   analyse
+    #      Returns (name, type) pair where name is the
+    #      CNameDeclaratorNode of the name being declared
+    #      and type is the type it is being declared as.
+    #
+    #  calling_convention  string   Calling convention of CFuncDeclaratorNode
+    #                               for which this is a base
+
+    child_attrs = []
+
+    calling_convention = ""
+
+    def analyse_templates(self):
+        # Only C++ functions have templates.
+        return None
+
+
+class CNameDeclaratorNode(CDeclaratorNode):
+    #  name    string             The Cython name being declared
+    #  cname   string or None     C name, if specified
+    #  default ExprNode or None   the value assigned on declaration
+
+    child_attrs = ['default']
+
+    default = None
+
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
-        if nonempty and self.name == '': 
-            # May have mistaken the name for the type. 
-            if base_type.is_ptr or base_type.is_array or base_type.is_buffer: 
-                error(self.pos, "Missing argument name") 
-            elif base_type.is_void: 
-                error(self.pos, "Use spam() rather than spam(void) to declare a function with no arguments.") 
-            else: 
-                self.name = base_type.declaration_code("", for_display=1, pyrex=1) 
-                base_type = py_object_type 
- 
-        if base_type.is_fused and env.fused_to_specific: 
-            base_type = base_type.specialize(env.fused_to_specific) 
- 
-        self.type = base_type 
-        return self, base_type 
- 
-
-class CPtrDeclaratorNode(CDeclaratorNode): 
-    # base     CDeclaratorNode 
- 
-    child_attrs = ["base"] 
- 
+        if nonempty and self.name == '':
+            # May have mistaken the name for the type.
+            if base_type.is_ptr or base_type.is_array or base_type.is_buffer:
+                error(self.pos, "Missing argument name")
+            elif base_type.is_void:
+                error(self.pos, "Use spam() rather than spam(void) to declare a function with no arguments.")
+            else:
+                self.name = base_type.declaration_code("", for_display=1, pyrex=1)
+                base_type = py_object_type
+
+        if base_type.is_fused and env.fused_to_specific:
+            base_type = base_type.specialize(env.fused_to_specific)
+
+        self.type = base_type
+        return self, base_type
+
+
+class CPtrDeclaratorNode(CDeclaratorNode):
+    # base     CDeclaratorNode
+
+    child_attrs = ["base"]
+
     def analyse_templates(self):
         return self.base.analyse_templates()
 
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
-        if base_type.is_pyobject: 
+        if base_type.is_pyobject:
             error(self.pos, "Pointer base type cannot be a Python object")
-        ptr_type = PyrexTypes.c_ptr_type(base_type) 
+        ptr_type = PyrexTypes.c_ptr_type(base_type)
         return self.base.analyse(ptr_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
- 
 
-class CReferenceDeclaratorNode(CDeclaratorNode): 
-    # base     CDeclaratorNode 
- 
-    child_attrs = ["base"] 
- 
+
+class CReferenceDeclaratorNode(CDeclaratorNode):
+    # base     CDeclaratorNode
+
+    child_attrs = ["base"]
+
     def analyse_templates(self):
         return self.base.analyse_templates()
 
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
-        if base_type.is_pyobject: 
+        if base_type.is_pyobject:
             error(self.pos, "Reference base type cannot be a Python object")
-        ref_type = PyrexTypes.c_ref_type(base_type) 
+        ref_type = PyrexTypes.c_ref_type(base_type)
         return self.base.analyse(ref_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
- 
-
-class CArrayDeclaratorNode(CDeclaratorNode): 
-    # base        CDeclaratorNode 
-    # dimension   ExprNode 
- 
-    child_attrs = ["base", "dimension"] 
- 
+
+
+class CArrayDeclaratorNode(CDeclaratorNode):
+    # base        CDeclaratorNode
+    # dimension   ExprNode
+
+    child_attrs = ["base", "dimension"]
+
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
         if (base_type.is_cpp_class and base_type.is_template_type()) or base_type.is_cfunction:
-            from .ExprNodes import TupleNode 
-            if isinstance(self.dimension, TupleNode): 
-                args = self.dimension.args 
-            else: 
-                args = self.dimension, 
-            values = [v.analyse_as_type(env) for v in args] 
-            if None in values: 
-                ix = values.index(None) 
-                error(args[ix].pos, "Template parameter not a type") 
-                base_type = error_type 
-            else: 
-                base_type = base_type.specialize_here(self.pos, values) 
+            from .ExprNodes import TupleNode
+            if isinstance(self.dimension, TupleNode):
+                args = self.dimension.args
+            else:
+                args = self.dimension,
+            values = [v.analyse_as_type(env) for v in args]
+            if None in values:
+                ix = values.index(None)
+                error(args[ix].pos, "Template parameter not a type")
+                base_type = error_type
+            else:
+                base_type = base_type.specialize_here(self.pos, values)
             return self.base.analyse(base_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
-        if self.dimension: 
-            self.dimension = self.dimension.analyse_const_expression(env) 
-            if not self.dimension.type.is_int: 
-                error(self.dimension.pos, "Array dimension not integer") 
-            size = self.dimension.get_constant_c_result_code() 
-            if size is not None: 
-                try: 
-                    size = int(size) 
-                except ValueError: 
-                    # runtime constant? 
-                    pass 
-        else: 
-            size = None 
-        if not base_type.is_complete(): 
+        if self.dimension:
+            self.dimension = self.dimension.analyse_const_expression(env)
+            if not self.dimension.type.is_int:
+                error(self.dimension.pos, "Array dimension not integer")
+            size = self.dimension.get_constant_c_result_code()
+            if size is not None:
+                try:
+                    size = int(size)
+                except ValueError:
+                    # runtime constant?
+                    pass
+        else:
+            size = None
+        if not base_type.is_complete():
             error(self.pos, "Array element type '%s' is incomplete" % base_type)
-        if base_type.is_pyobject: 
+        if base_type.is_pyobject:
             error(self.pos, "Array element cannot be a Python object")
-        if base_type.is_cfunction: 
+        if base_type.is_cfunction:
             error(self.pos, "Array element cannot be a function")
-        array_type = PyrexTypes.c_array_type(base_type, size) 
+        array_type = PyrexTypes.c_array_type(base_type, size)
         return self.base.analyse(array_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
- 
- 
-class CFuncDeclaratorNode(CDeclaratorNode): 
-    # base             CDeclaratorNode 
-    # args             [CArgDeclNode] 
-    # templates        [TemplatePlaceholderType] 
-    # has_varargs      boolean 
-    # exception_value  ConstNode 
-    # exception_check  boolean    True if PyErr_Occurred check needed 
-    # nogil            boolean    Can be called without gil 
-    # with_gil         boolean    Acquire gil around function body 
-    # is_const_method  boolean    Whether this is a const method 
- 
-    child_attrs = ["base", "args", "exception_value"] 
- 
-    overridable = 0 
-    optional_arg_count = 0 
-    is_const_method = 0 
-    templates = None 
- 
-    def analyse_templates(self): 
-        if isinstance(self.base, CArrayDeclaratorNode): 
-            from .ExprNodes import TupleNode, NameNode 
-            template_node = self.base.dimension 
-            if isinstance(template_node, TupleNode): 
-                template_nodes = template_node.args 
-            elif isinstance(template_node, NameNode): 
-                template_nodes = [template_node] 
-            else: 
-                error(template_node.pos, "Template arguments must be a list of names") 
-                return None 
-            self.templates = [] 
-            for template in template_nodes: 
-                if isinstance(template, NameNode): 
-                    self.templates.append(PyrexTypes.TemplatePlaceholderType(template.name)) 
-                else: 
-                    error(template.pos, "Template arguments must be a list of names") 
-            self.base = self.base.base 
-            return self.templates 
-        else: 
-            return None 
- 
+
+
+class CFuncDeclaratorNode(CDeclaratorNode):
+    # base             CDeclaratorNode
+    # args             [CArgDeclNode]
+    # templates        [TemplatePlaceholderType]
+    # has_varargs      boolean
+    # exception_value  ConstNode
+    # exception_check  boolean    True if PyErr_Occurred check needed
+    # nogil            boolean    Can be called without gil
+    # with_gil         boolean    Acquire gil around function body
+    # is_const_method  boolean    Whether this is a const method
+
+    child_attrs = ["base", "args", "exception_value"]
+
+    overridable = 0
+    optional_arg_count = 0
+    is_const_method = 0
+    templates = None
+
+    def analyse_templates(self):
+        if isinstance(self.base, CArrayDeclaratorNode):
+            from .ExprNodes import TupleNode, NameNode
+            template_node = self.base.dimension
+            if isinstance(template_node, TupleNode):
+                template_nodes = template_node.args
+            elif isinstance(template_node, NameNode):
+                template_nodes = [template_node]
+            else:
+                error(template_node.pos, "Template arguments must be a list of names")
+                return None
+            self.templates = []
+            for template in template_nodes:
+                if isinstance(template, NameNode):
+                    self.templates.append(PyrexTypes.TemplatePlaceholderType(template.name))
+                else:
+                    error(template.pos, "Template arguments must be a list of names")
+            self.base = self.base.base
+            return self.templates
+        else:
+            return None
+
     def analyse(self, return_type, env, nonempty=0, directive_locals=None, visibility=None, in_pxd=False):
         if directive_locals is None:
             directive_locals = {}
-        if nonempty: 
-            nonempty -= 1 
-        func_type_args = [] 
-        for i, arg_node in enumerate(self.args): 
-            name_declarator, type = arg_node.analyse( 
+        if nonempty:
+            nonempty -= 1
+        func_type_args = []
+        for i, arg_node in enumerate(self.args):
+            name_declarator, type = arg_node.analyse(
                 env, nonempty=nonempty,
                 is_self_arg=(i == 0 and env.is_c_class_scope and 'staticmethod' not in env.directives))
-            name = name_declarator.name 
-            if name in directive_locals: 
-                type_node = directive_locals[name] 
-                other_type = type_node.analyse_as_type(env) 
-                if other_type is None: 
-                    error(type_node.pos, "Not a type") 
-                elif (type is not PyrexTypes.py_object_type 
-                      and not type.same_as(other_type)): 
-                    error(self.base.pos, "Signature does not agree with previous declaration") 
-                    error(type_node.pos, "Previous declaration here") 
-                else: 
-                    type = other_type 
-            if name_declarator.cname: 
+            name = name_declarator.name
+            if name in directive_locals:
+                type_node = directive_locals[name]
+                other_type = type_node.analyse_as_type(env)
+                if other_type is None:
+                    error(type_node.pos, "Not a type")
+                elif (type is not PyrexTypes.py_object_type
+                      and not type.same_as(other_type)):
+                    error(self.base.pos, "Signature does not agree with previous declaration")
+                    error(type_node.pos, "Previous declaration here")
+                else:
+                    type = other_type
+            if name_declarator.cname:
                 error(self.pos, "Function argument cannot have C name specification")
             if i == 0 and env.is_c_class_scope and type.is_unspecified:
-                # fix the type of self 
-                type = env.parent_type 
-            # Turn *[] argument into ** 
-            if type.is_array: 
-                type = PyrexTypes.c_ptr_type(type.base_type) 
-            # Catch attempted C-style func(void) decl 
-            if type.is_void: 
-                error(arg_node.pos, "Use spam() rather than spam(void) to declare a function with no arguments.") 
-            func_type_args.append( 
-                PyrexTypes.CFuncTypeArg(name, type, arg_node.pos)) 
-            if arg_node.default: 
-                self.optional_arg_count += 1 
-            elif self.optional_arg_count: 
-                error(self.pos, "Non-default argument follows default argument") 
- 
-        exc_val = None 
-        exc_check = 0 
-        if self.exception_check == '+': 
-            env.add_include_file('ios')         # for std::ios_base::failure 
-            env.add_include_file('new')         # for std::bad_alloc 
-            env.add_include_file('stdexcept') 
-            env.add_include_file('typeinfo')    # for std::bad_cast 
-        if (return_type.is_pyobject 
-                and (self.exception_value or self.exception_check) 
-                and self.exception_check != '+'): 
+                # fix the type of self
+                type = env.parent_type
+            # Turn *[] argument into **
+            if type.is_array:
+                type = PyrexTypes.c_ptr_type(type.base_type)
+            # Catch attempted C-style func(void) decl
+            if type.is_void:
+                error(arg_node.pos, "Use spam() rather than spam(void) to declare a function with no arguments.")
+            func_type_args.append(
+                PyrexTypes.CFuncTypeArg(name, type, arg_node.pos))
+            if arg_node.default:
+                self.optional_arg_count += 1
+            elif self.optional_arg_count:
+                error(self.pos, "Non-default argument follows default argument")
+
+        exc_val = None
+        exc_check = 0
+        if self.exception_check == '+':
+            env.add_include_file('ios')         # for std::ios_base::failure
+            env.add_include_file('new')         # for std::bad_alloc
+            env.add_include_file('stdexcept')
+            env.add_include_file('typeinfo')    # for std::bad_cast
+        if (return_type.is_pyobject
+                and (self.exception_value or self.exception_check)
+                and self.exception_check != '+'):
             error(self.pos, "Exception clause not allowed for function returning Python object")
-        else: 
+        else:
             if self.exception_value is None and self.exception_check and self.exception_check != '+':
                 # Use an explicit exception return value to speed up exception checks.
                 # Even if it is not declared, we can use the default exception value of the return type,
@@ -733,489 +733,489 @@ class CFuncDeclaratorNode(CDeclaratorNode):
                         from .ExprNodes import ConstNode
                         self.exception_value = ConstNode(
                             self.pos, value=return_type.exception_value, type=return_type)
-            if self.exception_value: 
-                self.exception_value = self.exception_value.analyse_const_expression(env) 
-                if self.exception_check == '+': 
-                    exc_val_type = self.exception_value.type 
-                    if (not exc_val_type.is_error 
-                            and not exc_val_type.is_pyobject 
-                            and not (exc_val_type.is_cfunction 
-                                     and not exc_val_type.return_type.is_pyobject 
+            if self.exception_value:
+                self.exception_value = self.exception_value.analyse_const_expression(env)
+                if self.exception_check == '+':
+                    exc_val_type = self.exception_value.type
+                    if (not exc_val_type.is_error
+                            and not exc_val_type.is_pyobject
+                            and not (exc_val_type.is_cfunction
+                                     and not exc_val_type.return_type.is_pyobject
                                      and not exc_val_type.args)
                             and not (exc_val_type == PyrexTypes.c_char_type
                                      and self.exception_value.value == '*')):
-                        error(self.exception_value.pos, 
+                        error(self.exception_value.pos,
                               "Exception value must be a Python exception or cdef function with no arguments or *.")
-                    exc_val = self.exception_value 
-                else: 
-                    self.exception_value = self.exception_value.coerce_to( 
-                        return_type, env).analyse_const_expression(env) 
-                    exc_val = self.exception_value.get_constant_c_result_code() 
-                    if exc_val is None: 
-                        raise InternalError( 
-                            "get_constant_c_result_code not implemented for %s" % 
-                            self.exception_value.__class__.__name__) 
-                    if not return_type.assignable_from(self.exception_value.type): 
-                        error(self.exception_value.pos, 
-                              "Exception value incompatible with function return type") 
-            exc_check = self.exception_check 
-        if return_type.is_cfunction: 
+                    exc_val = self.exception_value
+                else:
+                    self.exception_value = self.exception_value.coerce_to(
+                        return_type, env).analyse_const_expression(env)
+                    exc_val = self.exception_value.get_constant_c_result_code()
+                    if exc_val is None:
+                        raise InternalError(
+                            "get_constant_c_result_code not implemented for %s" %
+                            self.exception_value.__class__.__name__)
+                    if not return_type.assignable_from(self.exception_value.type):
+                        error(self.exception_value.pos,
+                              "Exception value incompatible with function return type")
+            exc_check = self.exception_check
+        if return_type.is_cfunction:
             error(self.pos, "Function cannot return a function")
-        func_type = PyrexTypes.CFuncType( 
-            return_type, func_type_args, self.has_varargs, 
+        func_type = PyrexTypes.CFuncType(
+            return_type, func_type_args, self.has_varargs,
             optional_arg_count=self.optional_arg_count,
             exception_value=exc_val, exception_check=exc_check,
             calling_convention=self.base.calling_convention,
             nogil=self.nogil, with_gil=self.with_gil, is_overridable=self.overridable,
             is_const_method=self.is_const_method,
             templates=self.templates)
- 
-        if self.optional_arg_count: 
-            if func_type.is_fused: 
-                # This is a bit of a hack... When we need to create specialized CFuncTypes 
-                # on the fly because the cdef is defined in a pxd, we need to declare the specialized optional arg 
-                # struct 
-                def declare_opt_arg_struct(func_type, fused_cname): 
-                    self.declare_optional_arg_struct(func_type, env, fused_cname) 
- 
-                func_type.declare_opt_arg_struct = declare_opt_arg_struct 
-            else: 
-                self.declare_optional_arg_struct(func_type, env) 
- 
-        callspec = env.directives['callspec'] 
-        if callspec: 
-            current = func_type.calling_convention 
-            if current and current != callspec: 
-                error(self.pos, "cannot have both '%s' and '%s' " 
-                      "calling conventions" % (current, callspec)) 
-            func_type.calling_convention = callspec 
+
+        if self.optional_arg_count:
+            if func_type.is_fused:
+                # This is a bit of a hack... When we need to create specialized CFuncTypes
+                # on the fly because the cdef is defined in a pxd, we need to declare the specialized optional arg
+                # struct
+                def declare_opt_arg_struct(func_type, fused_cname):
+                    self.declare_optional_arg_struct(func_type, env, fused_cname)
+
+                func_type.declare_opt_arg_struct = declare_opt_arg_struct
+            else:
+                self.declare_optional_arg_struct(func_type, env)
+
+        callspec = env.directives['callspec']
+        if callspec:
+            current = func_type.calling_convention
+            if current and current != callspec:
+                error(self.pos, "cannot have both '%s' and '%s' "
+                      "calling conventions" % (current, callspec))
+            func_type.calling_convention = callspec
         return self.base.analyse(func_type, env, visibility=visibility, in_pxd=in_pxd)
- 
-    def declare_optional_arg_struct(self, func_type, env, fused_cname=None): 
-        """ 
-        Declares the optional argument struct (the struct used to hold the 
-        values for optional arguments). For fused cdef functions, this is 
-        deferred as analyse_declarations is called only once (on the fused 
-        cdef function). 
-        """ 
-        scope = StructOrUnionScope() 
-        arg_count_member = '%sn' % Naming.pyrex_prefix 
-        scope.declare_var(arg_count_member, PyrexTypes.c_int_type, self.pos) 
- 
+
+    def declare_optional_arg_struct(self, func_type, env, fused_cname=None):
+        """
+        Declares the optional argument struct (the struct used to hold the
+        values for optional arguments). For fused cdef functions, this is
+        deferred as analyse_declarations is called only once (on the fused
+        cdef function).
+        """
+        scope = StructOrUnionScope()
+        arg_count_member = '%sn' % Naming.pyrex_prefix
+        scope.declare_var(arg_count_member, PyrexTypes.c_int_type, self.pos)
+
         for arg in func_type.args[len(func_type.args) - self.optional_arg_count:]:
             scope.declare_var(arg.name, arg.type, arg.pos, allow_pyobject=True, allow_memoryview=True)
- 
-        struct_cname = env.mangle(Naming.opt_arg_prefix, self.base.name) 
- 
-        if fused_cname is not None: 
-            struct_cname = PyrexTypes.get_fused_cname(fused_cname, struct_cname) 
- 
-        op_args_struct = env.global_scope().declare_struct_or_union( 
+
+        struct_cname = env.mangle(Naming.opt_arg_prefix, self.base.name)
+
+        if fused_cname is not None:
+            struct_cname = PyrexTypes.get_fused_cname(fused_cname, struct_cname)
+
+        op_args_struct = env.global_scope().declare_struct_or_union(
             name=struct_cname,
             kind='struct',
             scope=scope,
             typedef_flag=0,
             pos=self.pos,
             cname=struct_cname)
- 
-        op_args_struct.defined_in_pxd = 1 
-        op_args_struct.used = 1 
- 
-        func_type.op_arg_struct = PyrexTypes.c_ptr_type(op_args_struct.type) 
- 
- 
-class CConstDeclaratorNode(CDeclaratorNode): 
-    # base     CDeclaratorNode 
- 
-    child_attrs = ["base"] 
- 
+
+        op_args_struct.defined_in_pxd = 1
+        op_args_struct.used = 1
+
+        func_type.op_arg_struct = PyrexTypes.c_ptr_type(op_args_struct.type)
+
+
+class CConstDeclaratorNode(CDeclaratorNode):
+    # base     CDeclaratorNode
+
+    child_attrs = ["base"]
+
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
-        if base_type.is_pyobject: 
-            error(self.pos, 
-                  "Const base type cannot be a Python object") 
-        const = PyrexTypes.c_const_type(base_type) 
+        if base_type.is_pyobject:
+            error(self.pos,
+                  "Const base type cannot be a Python object")
+        const = PyrexTypes.c_const_type(base_type)
         return self.base.analyse(const, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
- 
- 
-class CArgDeclNode(Node): 
-    # Item in a function declaration argument list. 
-    # 
-    # base_type      CBaseTypeNode 
-    # declarator     CDeclaratorNode 
-    # not_none       boolean            Tagged with 'not None' 
-    # or_none        boolean            Tagged with 'or None' 
-    # accept_none    boolean            Resolved boolean for not_none/or_none 
-    # default        ExprNode or None 
-    # default_value  PyObjectConst      constant for default value 
-    # annotation     ExprNode or None   Py3 function arg annotation 
-    # is_self_arg    boolean            Is the "self" arg of an extension type method 
-    # is_type_arg    boolean            Is the "class" arg of an extension type classmethod 
-    # is_kw_only     boolean            Is a keyword-only argument 
-    # is_dynamic     boolean            Non-literal arg stored inside CyFunction 
- 
-    child_attrs = ["base_type", "declarator", "default", "annotation"] 
+
+
+class CArgDeclNode(Node):
+    # Item in a function declaration argument list.
+    #
+    # base_type      CBaseTypeNode
+    # declarator     CDeclaratorNode
+    # not_none       boolean            Tagged with 'not None'
+    # or_none        boolean            Tagged with 'or None'
+    # accept_none    boolean            Resolved boolean for not_none/or_none
+    # default        ExprNode or None
+    # default_value  PyObjectConst      constant for default value
+    # annotation     ExprNode or None   Py3 function arg annotation
+    # is_self_arg    boolean            Is the "self" arg of an extension type method
+    # is_type_arg    boolean            Is the "class" arg of an extension type classmethod
+    # is_kw_only     boolean            Is a keyword-only argument
+    # is_dynamic     boolean            Non-literal arg stored inside CyFunction
+
+    child_attrs = ["base_type", "declarator", "default", "annotation"]
     outer_attrs = ["default", "annotation"]
- 
-    is_self_arg = 0 
-    is_type_arg = 0 
-    is_generic = 1 
-    kw_only = 0 
-    not_none = 0 
-    or_none = 0 
-    type = None 
-    name_declarator = None 
-    default_value = None 
-    annotation = None 
-    is_dynamic = 0 
- 
+
+    is_self_arg = 0
+    is_type_arg = 0
+    is_generic = 1
+    kw_only = 0
+    not_none = 0
+    or_none = 0
+    type = None
+    name_declarator = None
+    default_value = None
+    annotation = None
+    is_dynamic = 0
+
     def analyse(self, env, nonempty=0, is_self_arg=False):
-        if is_self_arg: 
-            self.base_type.is_self_arg = self.is_self_arg = True 
-        if self.type is None: 
-            # The parser may misinterpret names as types. We fix that here. 
-            if isinstance(self.declarator, CNameDeclaratorNode) and self.declarator.name == '': 
-                if nonempty: 
-                    if self.base_type.is_basic_c_type: 
-                        # char, short, long called "int" 
-                        type = self.base_type.analyse(env, could_be_name=True) 
+        if is_self_arg:
+            self.base_type.is_self_arg = self.is_self_arg = True
+        if self.type is None:
+            # The parser may misinterpret names as types. We fix that here.
+            if isinstance(self.declarator, CNameDeclaratorNode) and self.declarator.name == '':
+                if nonempty:
+                    if self.base_type.is_basic_c_type:
+                        # char, short, long called "int"
+                        type = self.base_type.analyse(env, could_be_name=True)
                         arg_name = type.empty_declaration_code()
-                    else: 
-                        arg_name = self.base_type.name 
-                    self.declarator.name = EncodedString(arg_name) 
-                    self.base_type.name = None 
-                    self.base_type.is_basic_c_type = False 
-                could_be_name = True 
-            else: 
-                could_be_name = False 
-            self.base_type.is_arg = True 
-            base_type = self.base_type.analyse(env, could_be_name=could_be_name) 
-            if hasattr(self.base_type, 'arg_name') and self.base_type.arg_name: 
-                self.declarator.name = self.base_type.arg_name 
- 
-            # The parser is unable to resolve the ambiguity of [] as part of the 
-            # type (e.g. in buffers) or empty declarator (as with arrays). 
-            # This is only arises for empty multi-dimensional arrays. 
-            if (base_type.is_array 
-                    and isinstance(self.base_type, TemplatedTypeNode) 
-                    and isinstance(self.declarator, CArrayDeclaratorNode)): 
-                declarator = self.declarator 
-                while isinstance(declarator.base, CArrayDeclaratorNode): 
-                    declarator = declarator.base 
-                declarator.base = self.base_type.array_declarator 
-                base_type = base_type.base_type 
- 
-            # inject type declaration from annotations 
+                    else:
+                        arg_name = self.base_type.name
+                    self.declarator.name = EncodedString(arg_name)
+                    self.base_type.name = None
+                    self.base_type.is_basic_c_type = False
+                could_be_name = True
+            else:
+                could_be_name = False
+            self.base_type.is_arg = True
+            base_type = self.base_type.analyse(env, could_be_name=could_be_name)
+            if hasattr(self.base_type, 'arg_name') and self.base_type.arg_name:
+                self.declarator.name = self.base_type.arg_name
+
+            # The parser is unable to resolve the ambiguity of [] as part of the
+            # type (e.g. in buffers) or empty declarator (as with arrays).
+            # This is only arises for empty multi-dimensional arrays.
+            if (base_type.is_array
+                    and isinstance(self.base_type, TemplatedTypeNode)
+                    and isinstance(self.declarator, CArrayDeclaratorNode)):
+                declarator = self.declarator
+                while isinstance(declarator.base, CArrayDeclaratorNode):
+                    declarator = declarator.base
+                declarator.base = self.base_type.array_declarator
+                base_type = base_type.base_type
+
+            # inject type declaration from annotations
             # this is called without 'env' by AdjustDefByDirectives transform before declaration analysis
             if self.annotation and env and env.directives['annotation_typing'] and self.base_type.name is None:
-                arg_type = self.inject_type_from_annotations(env) 
-                if arg_type is not None: 
-                    base_type = arg_type 
-            return self.declarator.analyse(base_type, env, nonempty=nonempty) 
-        else: 
-            return self.name_declarator, self.type 
- 
-    def inject_type_from_annotations(self, env): 
-        annotation = self.annotation 
-        if not annotation: 
-            return None 
+                arg_type = self.inject_type_from_annotations(env)
+                if arg_type is not None:
+                    base_type = arg_type
+            return self.declarator.analyse(base_type, env, nonempty=nonempty)
+        else:
+            return self.name_declarator, self.type
+
+    def inject_type_from_annotations(self, env):
+        annotation = self.annotation
+        if not annotation:
+            return None
         base_type, arg_type = analyse_type_annotation(annotation, env, assigned_value=self.default)
         if base_type is not None:
             self.base_type = base_type
-        return arg_type 
- 
-    def calculate_default_value_code(self, code): 
-        if self.default_value is None: 
-            if self.default: 
-                if self.default.is_literal: 
-                    # will not output any code, just assign the result_code 
-                    self.default.generate_evaluation_code(code) 
-                    return self.type.cast_code(self.default.result()) 
-                self.default_value = code.get_argument_default_const(self.type) 
-        return self.default_value 
- 
-    def annotate(self, code): 
-        if self.default: 
-            self.default.annotate(code) 
- 
+        return arg_type
+
+    def calculate_default_value_code(self, code):
+        if self.default_value is None:
+            if self.default:
+                if self.default.is_literal:
+                    # will not output any code, just assign the result_code
+                    self.default.generate_evaluation_code(code)
+                    return self.type.cast_code(self.default.result())
+                self.default_value = code.get_argument_default_const(self.type)
+        return self.default_value
+
+    def annotate(self, code):
+        if self.default:
+            self.default.annotate(code)
+
     def generate_assignment_code(self, code, target=None, overloaded_assignment=False):
-        default = self.default 
-        if default is None or default.is_literal: 
-            return 
-        if target is None: 
-            target = self.calculate_default_value_code(code) 
-        default.generate_evaluation_code(code) 
-        default.make_owned_reference(code) 
+        default = self.default
+        if default is None or default.is_literal:
+            return
+        if target is None:
+            target = self.calculate_default_value_code(code)
+        default.generate_evaluation_code(code)
+        default.make_owned_reference(code)
         result = default.result() if overloaded_assignment else default.result_as(self.type)
-        code.putln("%s = %s;" % (target, result)) 
-        if self.type.is_pyobject: 
-            code.put_giveref(default.result()) 
-        default.generate_post_assignment_code(code) 
-        default.free_temps(code) 
- 
- 
-class CBaseTypeNode(Node): 
-    # Abstract base class for C base type nodes. 
-    # 
-    # Processing during analyse_declarations phase: 
-    # 
-    #   analyse 
-    #     Returns the type. 
- 
-    def analyse_as_type(self, env): 
-        return self.analyse(env) 
- 
-
-class CAnalysedBaseTypeNode(Node): 
-    # type            type 
- 
-    child_attrs = [] 
- 
+        code.putln("%s = %s;" % (target, result))
+        if self.type.is_pyobject:
+            code.put_giveref(default.result())
+        default.generate_post_assignment_code(code)
+        default.free_temps(code)
+
+
+class CBaseTypeNode(Node):
+    # Abstract base class for C base type nodes.
+    #
+    # Processing during analyse_declarations phase:
+    #
+    #   analyse
+    #     Returns the type.
+
+    def analyse_as_type(self, env):
+        return self.analyse(env)
+
+
+class CAnalysedBaseTypeNode(Node):
+    # type            type
+
+    child_attrs = []
+
     def analyse(self, env, could_be_name=False):
-        return self.type 
- 
-
-class CSimpleBaseTypeNode(CBaseTypeNode): 
-    # name             string 
-    # module_path      [string]     Qualifying name components 
-    # is_basic_c_type  boolean 
-    # signed           boolean 
-    # longness         integer 
-    # complex          boolean 
-    # is_self_arg      boolean      Is self argument of C method 
-    # ##is_type_arg      boolean      Is type argument of class method 
- 
-    child_attrs = [] 
-    arg_name = None   # in case the argument name was interpreted as a type 
-    module_path = [] 
-    is_basic_c_type = False 
-    complex = False 
- 
+        return self.type
+
+
+class CSimpleBaseTypeNode(CBaseTypeNode):
+    # name             string
+    # module_path      [string]     Qualifying name components
+    # is_basic_c_type  boolean
+    # signed           boolean
+    # longness         integer
+    # complex          boolean
+    # is_self_arg      boolean      Is self argument of C method
+    # ##is_type_arg      boolean      Is type argument of class method
+
+    child_attrs = []
+    arg_name = None   # in case the argument name was interpreted as a type
+    module_path = []
+    is_basic_c_type = False
+    complex = False
+
     def analyse(self, env, could_be_name=False):
-        # Return type descriptor. 
-        #print "CSimpleBaseTypeNode.analyse: is_self_arg =", self.is_self_arg ### 
-        type = None 
-        if self.is_basic_c_type: 
-            type = PyrexTypes.simple_c_type(self.signed, self.longness, self.name) 
-            if not type: 
-                error(self.pos, "Unrecognised type modifier combination") 
-        elif self.name == "object" and not self.module_path: 
-            type = py_object_type 
-        elif self.name is None: 
-            if self.is_self_arg and env.is_c_class_scope: 
-                #print "CSimpleBaseTypeNode.analyse: defaulting to parent type" ### 
-                type = env.parent_type 
-            ## elif self.is_type_arg and env.is_c_class_scope: 
-            ##     type = Builtin.type_type 
-            else: 
-                type = py_object_type 
-        else: 
-            if self.module_path: 
-                # Maybe it's a nested C++ class. 
-                scope = env 
-                for item in self.module_path: 
-                    entry = scope.lookup(item) 
-                    if entry is not None and entry.is_cpp_class: 
-                        scope = entry.type.scope 
-                    else: 
-                        scope = None 
-                        break 
- 
-                if scope is None: 
-                    # Maybe it's a cimport. 
-                    scope = env.find_imported_module(self.module_path, self.pos) 
-            else: 
-                scope = env 
- 
-            if scope: 
-                if scope.is_c_class_scope: 
-                    scope = scope.global_scope() 
- 
-                type = scope.lookup_type(self.name) 
-                if type is not None: 
-                    pass 
-                elif could_be_name: 
-                    if self.is_self_arg and env.is_c_class_scope: 
-                        type = env.parent_type 
-                    ## elif self.is_type_arg and env.is_c_class_scope: 
-                    ##     type = Builtin.type_type 
-                    else: 
-                        type = py_object_type 
-                    self.arg_name = EncodedString(self.name) 
-                else: 
-                    if self.templates: 
-                        if not self.name in self.templates: 
-                            error(self.pos, "'%s' is not a type identifier" % self.name) 
-                        type = PyrexTypes.TemplatePlaceholderType(self.name) 
-                    else: 
-                        error(self.pos, "'%s' is not a type identifier" % self.name) 
+        # Return type descriptor.
+        #print "CSimpleBaseTypeNode.analyse: is_self_arg =", self.is_self_arg ###
+        type = None
+        if self.is_basic_c_type:
+            type = PyrexTypes.simple_c_type(self.signed, self.longness, self.name)
+            if not type:
+                error(self.pos, "Unrecognised type modifier combination")
+        elif self.name == "object" and not self.module_path:
+            type = py_object_type
+        elif self.name is None:
+            if self.is_self_arg and env.is_c_class_scope:
+                #print "CSimpleBaseTypeNode.analyse: defaulting to parent type" ###
+                type = env.parent_type
+            ## elif self.is_type_arg and env.is_c_class_scope:
+            ##     type = Builtin.type_type
+            else:
+                type = py_object_type
+        else:
+            if self.module_path:
+                # Maybe it's a nested C++ class.
+                scope = env
+                for item in self.module_path:
+                    entry = scope.lookup(item)
+                    if entry is not None and entry.is_cpp_class:
+                        scope = entry.type.scope
+                    else:
+                        scope = None
+                        break
+
+                if scope is None:
+                    # Maybe it's a cimport.
+                    scope = env.find_imported_module(self.module_path, self.pos)
+            else:
+                scope = env
+
+            if scope:
+                if scope.is_c_class_scope:
+                    scope = scope.global_scope()
+
+                type = scope.lookup_type(self.name)
+                if type is not None:
+                    pass
+                elif could_be_name:
+                    if self.is_self_arg and env.is_c_class_scope:
+                        type = env.parent_type
+                    ## elif self.is_type_arg and env.is_c_class_scope:
+                    ##     type = Builtin.type_type
+                    else:
+                        type = py_object_type
+                    self.arg_name = EncodedString(self.name)
+                else:
+                    if self.templates:
+                        if not self.name in self.templates:
+                            error(self.pos, "'%s' is not a type identifier" % self.name)
+                        type = PyrexTypes.TemplatePlaceholderType(self.name)
+                    else:
+                        error(self.pos, "'%s' is not a type identifier" % self.name)
         if type and type.is_fused and env.fused_to_specific:
             type = type.specialize(env.fused_to_specific)
-        if self.complex: 
-            if not type.is_numeric or type.is_complex: 
-                error(self.pos, "can only complexify c numeric types") 
-            type = PyrexTypes.CComplexType(type) 
-            type.create_declaration_utility_code(env) 
-        elif type is Builtin.complex_type: 
-            # Special case: optimise builtin complex type into C's 
-            # double complex.  The parser cannot do this (as for the 
-            # normal scalar types) as the user may have redeclared the 
-            # 'complex' type.  Testing for the exact type here works. 
-            type = PyrexTypes.c_double_complex_type 
-            type.create_declaration_utility_code(env) 
-            self.complex = True 
-        if type: 
-            return type 
-        else: 
-            return PyrexTypes.error_type 
- 
-class MemoryViewSliceTypeNode(CBaseTypeNode): 
- 
-    name = 'memoryview' 
-    child_attrs = ['base_type_node', 'axes'] 
- 
+        if self.complex:
+            if not type.is_numeric or type.is_complex:
+                error(self.pos, "can only complexify c numeric types")
+            type = PyrexTypes.CComplexType(type)
+            type.create_declaration_utility_code(env)
+        elif type is Builtin.complex_type:
+            # Special case: optimise builtin complex type into C's
+            # double complex.  The parser cannot do this (as for the
+            # normal scalar types) as the user may have redeclared the
+            # 'complex' type.  Testing for the exact type here works.
+            type = PyrexTypes.c_double_complex_type
+            type.create_declaration_utility_code(env)
+            self.complex = True
+        if type:
+            return type
+        else:
+            return PyrexTypes.error_type
+
+class MemoryViewSliceTypeNode(CBaseTypeNode):
+
+    name = 'memoryview'
+    child_attrs = ['base_type_node', 'axes']
+
     def analyse(self, env, could_be_name=False):
- 
-        base_type = self.base_type_node.analyse(env) 
-        if base_type.is_error: return base_type 
- 
-        from . import MemoryView 
- 
-        try: 
-            axes_specs = MemoryView.get_axes_specs(env, self.axes) 
+
+        base_type = self.base_type_node.analyse(env)
+        if base_type.is_error: return base_type
+
+        from . import MemoryView
+
+        try:
+            axes_specs = MemoryView.get_axes_specs(env, self.axes)
         except CompileError as e:
-            error(e.position, e.message_only) 
-            self.type = PyrexTypes.ErrorType() 
-            return self.type 
- 
-        if not MemoryView.validate_axes(self.pos, axes_specs): 
-            self.type = error_type 
-        else: 
-            self.type = PyrexTypes.MemoryViewSliceType(base_type, axes_specs) 
+            error(e.position, e.message_only)
+            self.type = PyrexTypes.ErrorType()
+            return self.type
+
+        if not MemoryView.validate_axes(self.pos, axes_specs):
+            self.type = error_type
+        else:
+            self.type = PyrexTypes.MemoryViewSliceType(base_type, axes_specs)
             self.type.validate_memslice_dtype(self.pos)
-            self.use_memview_utilities(env) 
- 
-        return self.type 
- 
-    def use_memview_utilities(self, env): 
-        from . import MemoryView 
-        env.use_utility_code(MemoryView.view_utility_code) 
- 
- 
-class CNestedBaseTypeNode(CBaseTypeNode): 
-    # For C++ classes that live inside other C++ classes. 
- 
-    # name             string 
-    # base_type        CBaseTypeNode 
- 
-    child_attrs = ['base_type'] 
- 
+            self.use_memview_utilities(env)
+
+        return self.type
+
+    def use_memview_utilities(self, env):
+        from . import MemoryView
+        env.use_utility_code(MemoryView.view_utility_code)
+
+
+class CNestedBaseTypeNode(CBaseTypeNode):
+    # For C++ classes that live inside other C++ classes.
+
+    # name             string
+    # base_type        CBaseTypeNode
+
+    child_attrs = ['base_type']
+
     def analyse(self, env, could_be_name=None):
-        base_type = self.base_type.analyse(env) 
-        if base_type is PyrexTypes.error_type: 
-            return PyrexTypes.error_type 
-        if not base_type.is_cpp_class: 
-            error(self.pos, "'%s' is not a valid type scope" % base_type) 
-            return PyrexTypes.error_type 
-        type_entry = base_type.scope.lookup_here(self.name) 
-        if not type_entry or not type_entry.is_type: 
-            error(self.pos, "'%s.%s' is not a type identifier" % (base_type, self.name)) 
-            return PyrexTypes.error_type 
-        return type_entry.type 
- 
- 
-class TemplatedTypeNode(CBaseTypeNode): 
-    #  After parsing: 
-    #  positional_args  [ExprNode]        List of positional arguments 
-    #  keyword_args     DictNode          Keyword arguments 
-    #  base_type_node   CBaseTypeNode 
- 
-    #  After analysis: 
-    #  type             PyrexTypes.BufferType or PyrexTypes.CppClassType  ...containing the right options 
- 
-    child_attrs = ["base_type_node", "positional_args", 
-                   "keyword_args", "dtype_node"] 
- 
-    dtype_node = None 
- 
-    name = None 
- 
+        base_type = self.base_type.analyse(env)
+        if base_type is PyrexTypes.error_type:
+            return PyrexTypes.error_type
+        if not base_type.is_cpp_class:
+            error(self.pos, "'%s' is not a valid type scope" % base_type)
+            return PyrexTypes.error_type
+        type_entry = base_type.scope.lookup_here(self.name)
+        if not type_entry or not type_entry.is_type:
+            error(self.pos, "'%s.%s' is not a type identifier" % (base_type, self.name))
+            return PyrexTypes.error_type
+        return type_entry.type
+
+
+class TemplatedTypeNode(CBaseTypeNode):
+    #  After parsing:
+    #  positional_args  [ExprNode]        List of positional arguments
+    #  keyword_args     DictNode          Keyword arguments
+    #  base_type_node   CBaseTypeNode
+
+    #  After analysis:
+    #  type             PyrexTypes.BufferType or PyrexTypes.CppClassType  ...containing the right options
+
+    child_attrs = ["base_type_node", "positional_args",
+                   "keyword_args", "dtype_node"]
+
+    dtype_node = None
+
+    name = None
+
     def analyse(self, env, could_be_name=False, base_type=None):
-        if base_type is None: 
-            base_type = self.base_type_node.analyse(env) 
-        if base_type.is_error: return base_type 
- 
+        if base_type is None:
+            base_type = self.base_type_node.analyse(env)
+        if base_type.is_error: return base_type
+
         if base_type.is_cpp_class and base_type.is_template_type():
-            # Templated class 
-            if self.keyword_args and self.keyword_args.key_value_pairs: 
-                error(self.pos, "c++ templates cannot take keyword arguments") 
-                self.type = PyrexTypes.error_type 
-            else: 
-                template_types = [] 
-                for template_node in self.positional_args: 
-                    type = template_node.analyse_as_type(env) 
-                    if type is None: 
-                        error(template_node.pos, "unknown type in template argument") 
+            # Templated class
+            if self.keyword_args and self.keyword_args.key_value_pairs:
+                error(self.pos, "c++ templates cannot take keyword arguments")
+                self.type = PyrexTypes.error_type
+            else:
+                template_types = []
+                for template_node in self.positional_args:
+                    type = template_node.analyse_as_type(env)
+                    if type is None:
+                        error(template_node.pos, "unknown type in template argument")
                         type = error_type
-                    template_types.append(type) 
-                self.type = base_type.specialize_here(self.pos, template_types) 
- 
-        elif base_type.is_pyobject: 
-            # Buffer 
-            from . import Buffer 
- 
-            options = Buffer.analyse_buffer_options( 
-                self.pos, 
-                env, 
-                self.positional_args, 
-                self.keyword_args, 
-                base_type.buffer_defaults) 
- 
-            if sys.version_info[0] < 3: 
-                # Py 2.x enforces byte strings as keyword arguments ... 
+                    template_types.append(type)
+                self.type = base_type.specialize_here(self.pos, template_types)
+
+        elif base_type.is_pyobject:
+            # Buffer
+            from . import Buffer
+
+            options = Buffer.analyse_buffer_options(
+                self.pos,
+                env,
+                self.positional_args,
+                self.keyword_args,
+                base_type.buffer_defaults)
+
+            if sys.version_info[0] < 3:
+                # Py 2.x enforces byte strings as keyword arguments ...
                 options = dict([(name.encode('ASCII'), value)
                                 for name, value in options.items()])
- 
-            self.type = PyrexTypes.BufferType(base_type, **options) 
+
+            self.type = PyrexTypes.BufferType(base_type, **options)
             if has_np_pythran(env) and is_pythran_buffer(self.type):
                 self.type = PyrexTypes.PythranExpr(pythran_type(self.type), self.type)
- 
-        else: 
-            # Array 
-            empty_declarator = CNameDeclaratorNode(self.pos, name="", cname=None) 
-            if len(self.positional_args) > 1 or self.keyword_args.key_value_pairs: 
-                error(self.pos, "invalid array declaration") 
-                self.type = PyrexTypes.error_type 
-            else: 
-                # It would be nice to merge this class with CArrayDeclaratorNode, 
-                # but arrays are part of the declaration, not the type... 
-                if not self.positional_args: 
-                    dimension = None 
-                else: 
-                    dimension = self.positional_args[0] 
+
+        else:
+            # Array
+            empty_declarator = CNameDeclaratorNode(self.pos, name="", cname=None)
+            if len(self.positional_args) > 1 or self.keyword_args.key_value_pairs:
+                error(self.pos, "invalid array declaration")
+                self.type = PyrexTypes.error_type
+            else:
+                # It would be nice to merge this class with CArrayDeclaratorNode,
+                # but arrays are part of the declaration, not the type...
+                if not self.positional_args:
+                    dimension = None
+                else:
+                    dimension = self.positional_args[0]
                 self.array_declarator = CArrayDeclaratorNode(
                     self.pos,
                     base=empty_declarator,
                     dimension=dimension)
-                self.type = self.array_declarator.analyse(base_type, env)[1] 
- 
-        if self.type.is_fused and env.fused_to_specific: 
-            self.type = self.type.specialize(env.fused_to_specific) 
- 
-        return self.type 
- 
-
-class CComplexBaseTypeNode(CBaseTypeNode): 
-    # base_type   CBaseTypeNode 
-    # declarator  CDeclaratorNode 
- 
-    child_attrs = ["base_type", "declarator"] 
- 
+                self.type = self.array_declarator.analyse(base_type, env)[1]
+
+        if self.type.is_fused and env.fused_to_specific:
+            self.type = self.type.specialize(env.fused_to_specific)
+
+        return self.type
+
+
+class CComplexBaseTypeNode(CBaseTypeNode):
+    # base_type   CBaseTypeNode
+    # declarator  CDeclaratorNode
+
+    child_attrs = ["base_type", "declarator"]
+
     def analyse(self, env, could_be_name=False):
-        base = self.base_type.analyse(env, could_be_name) 
-        _, type = self.declarator.analyse(base, env) 
-        return type 
- 
- 
+        base = self.base_type.analyse(env, could_be_name)
+        _, type = self.declarator.analyse(base, env)
+        return type
+
+
 class CTupleBaseTypeNode(CBaseTypeNode):
     # components [CBaseTypeNode]
 
@@ -1234,456 +1234,456 @@ class CTupleBaseTypeNode(CBaseTypeNode):
         return entry.type
 
 
-class FusedTypeNode(CBaseTypeNode): 
-    """ 
-    Represents a fused type in a ctypedef statement: 
- 
-        ctypedef cython.fused_type(int, long, long long) integral 
- 
-    name            str                     name of this fused type 
-    types           [CSimpleBaseTypeNode]   is the list of types to be fused 
-    """ 
- 
-    child_attrs = [] 
- 
-    def analyse_declarations(self, env): 
-        type = self.analyse(env) 
-        entry = env.declare_typedef(self.name, type, self.pos) 
- 
-        # Omit the typedef declaration that self.declarator would produce 
-        entry.in_cinclude = True 
- 
+class FusedTypeNode(CBaseTypeNode):
+    """
+    Represents a fused type in a ctypedef statement:
+
+        ctypedef cython.fused_type(int, long, long long) integral
+
+    name            str                     name of this fused type
+    types           [CSimpleBaseTypeNode]   is the list of types to be fused
+    """
+
+    child_attrs = []
+
+    def analyse_declarations(self, env):
+        type = self.analyse(env)
+        entry = env.declare_typedef(self.name, type, self.pos)
+
+        # Omit the typedef declaration that self.declarator would produce
+        entry.in_cinclude = True
+
     def analyse(self, env, could_be_name=False):
-        types = [] 
-        for type_node in self.types: 
-            type = type_node.analyse_as_type(env) 
- 
-            if not type: 
-                error(type_node.pos, "Not a type") 
-                continue 
- 
-            if type in types: 
-                error(type_node.pos, "Type specified multiple times") 
-            else: 
-                types.append(type) 
- 
-        # if len(self.types) == 1: 
-        #     return types[0] 
- 
-        return PyrexTypes.FusedType(types, name=self.name) 
- 
- 
-class CConstTypeNode(CBaseTypeNode): 
-    # base_type     CBaseTypeNode 
- 
-    child_attrs = ["base_type"] 
- 
+        types = []
+        for type_node in self.types:
+            type = type_node.analyse_as_type(env)
+
+            if not type:
+                error(type_node.pos, "Not a type")
+                continue
+
+            if type in types:
+                error(type_node.pos, "Type specified multiple times")
+            else:
+                types.append(type)
+
+        # if len(self.types) == 1:
+        #     return types[0]
+
+        return PyrexTypes.FusedType(types, name=self.name)
+
+
+class CConstTypeNode(CBaseTypeNode):
+    # base_type     CBaseTypeNode
+
+    child_attrs = ["base_type"]
+
     def analyse(self, env, could_be_name=False):
-        base = self.base_type.analyse(env, could_be_name) 
-        if base.is_pyobject: 
-            error(self.pos, 
-                  "Const base type cannot be a Python object") 
-        return PyrexTypes.c_const_type(base) 
- 
- 
-class CVarDefNode(StatNode): 
-    #  C variable definition or forward/extern function declaration. 
-    # 
-    #  visibility    'private' or 'public' or 'extern' 
-    #  base_type     CBaseTypeNode 
-    #  declarators   [CDeclaratorNode] 
-    #  in_pxd        boolean 
-    #  api           boolean 
-    #  overridable   boolean        whether it is a cpdef 
-    #  modifiers     ['inline'] 
- 
-    #  decorators    [cython.locals(...)] or None 
-    #  directive_locals { string : NameNode } locals defined by cython.locals(...) 
- 
-    child_attrs = ["base_type", "declarators"] 
- 
-    decorators = None 
-    directive_locals = None 
- 
+        base = self.base_type.analyse(env, could_be_name)
+        if base.is_pyobject:
+            error(self.pos,
+                  "Const base type cannot be a Python object")
+        return PyrexTypes.c_const_type(base)
+
+
+class CVarDefNode(StatNode):
+    #  C variable definition or forward/extern function declaration.
+    #
+    #  visibility    'private' or 'public' or 'extern'
+    #  base_type     CBaseTypeNode
+    #  declarators   [CDeclaratorNode]
+    #  in_pxd        boolean
+    #  api           boolean
+    #  overridable   boolean        whether it is a cpdef
+    #  modifiers     ['inline']
+
+    #  decorators    [cython.locals(...)] or None
+    #  directive_locals { string : NameNode } locals defined by cython.locals(...)
+
+    child_attrs = ["base_type", "declarators"]
+
+    decorators = None
+    directive_locals = None
+
     def analyse_declarations(self, env, dest_scope=None):
-        if self.directive_locals is None: 
-            self.directive_locals = {} 
-        if not dest_scope: 
-            dest_scope = env 
-        self.dest_scope = dest_scope 
- 
-        if self.declarators: 
-            templates = self.declarators[0].analyse_templates() 
-        else: 
-            templates = None 
-        if templates is not None: 
-            if self.visibility != 'extern': 
-                error(self.pos, "Only extern functions allowed") 
-            if len(self.declarators) > 1: 
-                error(self.declarators[1].pos, "Can't multiply declare template types") 
-            env = TemplateScope('func_template', env) 
-            env.directives = env.outer_scope.directives 
-            for template_param in templates: 
-                env.declare_type(template_param.name, template_param, self.pos) 
- 
-        base_type = self.base_type.analyse(env) 
- 
-        if base_type.is_fused and not self.in_pxd and (env.is_c_class_scope or 
-                                                       env.is_module_scope): 
-            error(self.pos, "Fused types not allowed here") 
-            return error_type 
- 
-        self.entry = None 
-        visibility = self.visibility 
- 
-        for declarator in self.declarators: 
- 
-            if (len(self.declarators) > 1 
+        if self.directive_locals is None:
+            self.directive_locals = {}
+        if not dest_scope:
+            dest_scope = env
+        self.dest_scope = dest_scope
+
+        if self.declarators:
+            templates = self.declarators[0].analyse_templates()
+        else:
+            templates = None
+        if templates is not None:
+            if self.visibility != 'extern':
+                error(self.pos, "Only extern functions allowed")
+            if len(self.declarators) > 1:
+                error(self.declarators[1].pos, "Can't multiply declare template types")
+            env = TemplateScope('func_template', env)
+            env.directives = env.outer_scope.directives
+            for template_param in templates:
+                env.declare_type(template_param.name, template_param, self.pos)
+
+        base_type = self.base_type.analyse(env)
+
+        if base_type.is_fused and not self.in_pxd and (env.is_c_class_scope or
+                                                       env.is_module_scope):
+            error(self.pos, "Fused types not allowed here")
+            return error_type
+
+        self.entry = None
+        visibility = self.visibility
+
+        for declarator in self.declarators:
+
+            if (len(self.declarators) > 1
                     and not isinstance(declarator, CNameDeclaratorNode)
                     and env.directives['warn.multiple_declarators']):
                 warning(
                     declarator.pos,
                     "Non-trivial type declarators in shared declaration (e.g. mix of pointers and values). "
-                    "Each pointer declaration should be on its own line.", 1) 
- 
+                    "Each pointer declaration should be on its own line.", 1)
+
             create_extern_wrapper = (self.overridable
                                      and self.visibility == 'extern'
                                      and env.is_module_scope)
             if create_extern_wrapper:
                 declarator.overridable = False
-            if isinstance(declarator, CFuncDeclaratorNode): 
+            if isinstance(declarator, CFuncDeclaratorNode):
                 name_declarator, type = declarator.analyse(
                     base_type, env, directive_locals=self.directive_locals, visibility=visibility, in_pxd=self.in_pxd)
-            else: 
+            else:
                 name_declarator, type = declarator.analyse(
                     base_type, env, visibility=visibility, in_pxd=self.in_pxd)
-            if not type.is_complete(): 
-                if not (self.visibility == 'extern' and type.is_array or type.is_memoryviewslice): 
+            if not type.is_complete():
+                if not (self.visibility == 'extern' and type.is_array or type.is_memoryviewslice):
                     error(declarator.pos, "Variable type '%s' is incomplete" % type)
-            if self.visibility == 'extern' and type.is_pyobject: 
+            if self.visibility == 'extern' and type.is_pyobject:
                 error(declarator.pos, "Python object cannot be declared extern")
-            name = name_declarator.name 
-            cname = name_declarator.cname 
-            if name == '': 
-                error(declarator.pos, "Missing name in declaration.") 
-                return 
+            name = name_declarator.name
+            cname = name_declarator.cname
+            if name == '':
+                error(declarator.pos, "Missing name in declaration.")
+                return
             if type.is_reference and self.visibility != 'extern':
                 error(declarator.pos, "C++ references cannot be declared; use a pointer instead")
-            if type.is_cfunction: 
+            if type.is_cfunction:
                 if 'staticmethod' in env.directives:
                     type.is_static_method = True
                 self.entry = dest_scope.declare_cfunction(
                     name, type, declarator.pos,
                     cname=cname, visibility=self.visibility, in_pxd=self.in_pxd,
                     api=self.api, modifiers=self.modifiers, overridable=self.overridable)
-                if self.entry is not None: 
-                    self.entry.directive_locals = copy.copy(self.directive_locals) 
+                if self.entry is not None:
+                    self.entry.directive_locals = copy.copy(self.directive_locals)
                 if create_extern_wrapper:
                     self.entry.type.create_to_py_utility_code(env)
                     self.entry.create_wrapper = True
-            else: 
+            else:
                 if self.overridable:
                     warning(self.pos, "cpdef variables will not be supported in Cython 3; "
                             "currently they are no different from cdef variables", 2)
-                if self.directive_locals: 
-                    error(self.pos, "Decorators can only be followed by functions") 
+                if self.directive_locals:
+                    error(self.pos, "Decorators can only be followed by functions")
                 self.entry = dest_scope.declare_var(
                     name, type, declarator.pos,
                     cname=cname, visibility=visibility, in_pxd=self.in_pxd,
                     api=self.api, is_cdef=1)
-                if Options.docstrings: 
-                    self.entry.doc = embed_position(self.pos, self.doc) 
- 
- 
-class CStructOrUnionDefNode(StatNode): 
-    #  name          string 
-    #  cname         string or None 
-    #  kind          "struct" or "union" 
-    #  typedef_flag  boolean 
-    #  visibility    "public" or "private" 
-    #  api           boolean 
-    #  in_pxd        boolean 
-    #  attributes    [CVarDefNode] or None 
-    #  entry         Entry 
-    #  packed        boolean 
- 
-    child_attrs = ["attributes"] 
- 
-    def declare(self, env, scope=None): 
-        self.entry = env.declare_struct_or_union( 
-            self.name, self.kind, scope, self.typedef_flag, self.pos, 
+                if Options.docstrings:
+                    self.entry.doc = embed_position(self.pos, self.doc)
+
+
+class CStructOrUnionDefNode(StatNode):
+    #  name          string
+    #  cname         string or None
+    #  kind          "struct" or "union"
+    #  typedef_flag  boolean
+    #  visibility    "public" or "private"
+    #  api           boolean
+    #  in_pxd        boolean
+    #  attributes    [CVarDefNode] or None
+    #  entry         Entry
+    #  packed        boolean
+
+    child_attrs = ["attributes"]
+
+    def declare(self, env, scope=None):
+        self.entry = env.declare_struct_or_union(
+            self.name, self.kind, scope, self.typedef_flag, self.pos,
             self.cname, visibility=self.visibility, api=self.api,
             packed=self.packed)
- 
-    def analyse_declarations(self, env): 
-        scope = None 
-        if self.attributes is not None: 
-            scope = StructOrUnionScope(self.name) 
-        self.declare(env, scope) 
-        if self.attributes is not None: 
-            if self.in_pxd and not env.in_cinclude: 
-                self.entry.defined_in_pxd = 1 
-            for attr in self.attributes: 
-                attr.analyse_declarations(env, scope) 
-            if self.visibility != 'extern': 
-                for attr in scope.var_entries: 
-                    type = attr.type 
-                    while type.is_array: 
-                        type = type.base_type 
-                    if type == self.entry.type: 
-                        error(attr.pos, "Struct cannot contain itself as a member.") 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class CppClassNode(CStructOrUnionDefNode, BlockNode): 
- 
-    #  name          string 
-    #  cname         string or None 
-    #  visibility    "extern" 
-    #  in_pxd        boolean 
-    #  attributes    [CVarDefNode] or None 
-    #  entry         Entry 
-    #  base_classes  [CBaseTypeNode] 
+
+    def analyse_declarations(self, env):
+        scope = None
+        if self.attributes is not None:
+            scope = StructOrUnionScope(self.name)
+        self.declare(env, scope)
+        if self.attributes is not None:
+            if self.in_pxd and not env.in_cinclude:
+                self.entry.defined_in_pxd = 1
+            for attr in self.attributes:
+                attr.analyse_declarations(env, scope)
+            if self.visibility != 'extern':
+                for attr in scope.var_entries:
+                    type = attr.type
+                    while type.is_array:
+                        type = type.base_type
+                    if type == self.entry.type:
+                        error(attr.pos, "Struct cannot contain itself as a member.")
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class CppClassNode(CStructOrUnionDefNode, BlockNode):
+
+    #  name          string
+    #  cname         string or None
+    #  visibility    "extern"
+    #  in_pxd        boolean
+    #  attributes    [CVarDefNode] or None
+    #  entry         Entry
+    #  base_classes  [CBaseTypeNode]
     #  templates     [(string, bool)] or None
-    #  decorators    [DecoratorNode] or None 
- 
-    decorators = None 
- 
-    def declare(self, env): 
-        if self.templates is None: 
-            template_types = None 
-        else: 
+    #  decorators    [DecoratorNode] or None
+
+    decorators = None
+
+    def declare(self, env):
+        if self.templates is None:
+            template_types = None
+        else:
             template_types = [PyrexTypes.TemplatePlaceholderType(template_name, not required)
                               for template_name, required in self.templates]
             num_optional_templates = sum(not required for _, required in self.templates)
             if num_optional_templates and not all(required for _, required in self.templates[:-num_optional_templates]):
                 error(self.pos, "Required template parameters must precede optional template parameters.")
-        self.entry = env.declare_cpp_class( 
+        self.entry = env.declare_cpp_class(
             self.name, None, self.pos, self.cname,
             base_classes=[], visibility=self.visibility, templates=template_types)
- 
-    def analyse_declarations(self, env): 
+
+    def analyse_declarations(self, env):
         if self.templates is None:
             template_types = template_names = None
         else:
             template_names = [template_name for template_name, _ in self.templates]
             template_types = [PyrexTypes.TemplatePlaceholderType(template_name, not required)
                               for template_name, required in self.templates]
-        scope = None 
-        if self.attributes is not None: 
+        scope = None
+        if self.attributes is not None:
             scope = CppClassScope(self.name, env, templates=template_names)
-        def base_ok(base_class): 
-            if base_class.is_cpp_class or base_class.is_struct: 
-                return True 
-            else: 
-                error(self.pos, "Base class '%s' not a struct or class." % base_class) 
-        base_class_types = filter(base_ok, [b.analyse(scope or env) for b in self.base_classes]) 
-        self.entry = env.declare_cpp_class( 
-            self.name, scope, self.pos, 
+        def base_ok(base_class):
+            if base_class.is_cpp_class or base_class.is_struct:
+                return True
+            else:
+                error(self.pos, "Base class '%s' not a struct or class." % base_class)
+        base_class_types = filter(base_ok, [b.analyse(scope or env) for b in self.base_classes])
+        self.entry = env.declare_cpp_class(
+            self.name, scope, self.pos,
             self.cname, base_class_types, visibility=self.visibility, templates=template_types)
-        if self.entry is None: 
-            return 
-        self.entry.is_cpp_class = 1 
-        if scope is not None: 
-            scope.type = self.entry.type 
-        defined_funcs = [] 
-        def func_attributes(attributes): 
-            for attr in attributes: 
-                if isinstance(attr, CFuncDefNode): 
-                    yield attr 
-                elif isinstance(attr, CompilerDirectivesNode): 
-                    for sub_attr in func_attributes(attr.body.stats): 
-                        yield sub_attr 
-        if self.attributes is not None: 
-            if self.in_pxd and not env.in_cinclude: 
-                self.entry.defined_in_pxd = 1 
-            for attr in self.attributes: 
+        if self.entry is None:
+            return
+        self.entry.is_cpp_class = 1
+        if scope is not None:
+            scope.type = self.entry.type
+        defined_funcs = []
+        def func_attributes(attributes):
+            for attr in attributes:
+                if isinstance(attr, CFuncDefNode):
+                    yield attr
+                elif isinstance(attr, CompilerDirectivesNode):
+                    for sub_attr in func_attributes(attr.body.stats):
+                        yield sub_attr
+        if self.attributes is not None:
+            if self.in_pxd and not env.in_cinclude:
+                self.entry.defined_in_pxd = 1
+            for attr in self.attributes:
                 declare = getattr(attr, 'declare', None)
                 if declare:
                     attr.declare(scope)
-                attr.analyse_declarations(scope) 
-            for func in func_attributes(self.attributes): 
-                defined_funcs.append(func) 
-                if self.templates is not None: 
+                attr.analyse_declarations(scope)
+            for func in func_attributes(self.attributes):
+                defined_funcs.append(func)
+                if self.templates is not None:
                     func.template_declaration = "template <typename %s>" % ", typename ".join(template_names)
-        self.body = StatListNode(self.pos, stats=defined_funcs) 
-        self.scope = scope 
- 
-    def analyse_expressions(self, env): 
-        self.body = self.body.analyse_expressions(self.entry.type.scope) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        self.body.generate_function_definitions(self.entry.type.scope, code) 
- 
-    def generate_execution_code(self, code): 
-        self.body.generate_execution_code(code) 
- 
-    def annotate(self, code): 
-        self.body.annotate(code) 
- 
- 
-class CEnumDefNode(StatNode): 
-    #  name           string or None 
-    #  cname          string or None 
-    #  items          [CEnumDefItemNode] 
-    #  typedef_flag   boolean 
-    #  visibility     "public" or "private" or "extern" 
-    #  api            boolean 
-    #  in_pxd         boolean 
-    #  create_wrapper boolean 
-    #  entry          Entry 
- 
-    child_attrs = ["items"] 
- 
-    def declare(self, env): 
+        self.body = StatListNode(self.pos, stats=defined_funcs)
+        self.scope = scope
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(self.entry.type.scope)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(self.entry.type.scope, code)
+
+    def generate_execution_code(self, code):
+        self.body.generate_execution_code(code)
+
+    def annotate(self, code):
+        self.body.annotate(code)
+
+
+class CEnumDefNode(StatNode):
+    #  name           string or None
+    #  cname          string or None
+    #  items          [CEnumDefItemNode]
+    #  typedef_flag   boolean
+    #  visibility     "public" or "private" or "extern"
+    #  api            boolean
+    #  in_pxd         boolean
+    #  create_wrapper boolean
+    #  entry          Entry
+
+    child_attrs = ["items"]
+
+    def declare(self, env):
          self.entry = env.declare_enum(
              self.name, self.pos,
              cname=self.cname, typedef_flag=self.typedef_flag,
              visibility=self.visibility, api=self.api,
              create_wrapper=self.create_wrapper)
- 
-    def analyse_declarations(self, env): 
-        if self.items is not None: 
-            if self.in_pxd and not env.in_cinclude: 
-                self.entry.defined_in_pxd = 1 
-            for item in self.items: 
-                item.analyse_declarations(env, self.entry) 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        if self.visibility == 'public' or self.api: 
+
+    def analyse_declarations(self, env):
+        if self.items is not None:
+            if self.in_pxd and not env.in_cinclude:
+                self.entry.defined_in_pxd = 1
+            for item in self.items:
+                item.analyse_declarations(env, self.entry)
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        if self.visibility == 'public' or self.api:
             code.mark_pos(self.pos)
-            temp = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True) 
-            for item in self.entry.enum_values: 
-                code.putln("%s = PyInt_FromLong(%s); %s" % ( 
+            temp = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True)
+            for item in self.entry.enum_values:
+                code.putln("%s = PyInt_FromLong(%s); %s" % (
                     temp,
                     item.cname,
                     code.error_goto_if_null(temp, item.pos)))
-                code.put_gotref(temp) 
-                code.putln('if (PyDict_SetItemString(%s, "%s", %s) < 0) %s' % ( 
+                code.put_gotref(temp)
+                code.putln('if (PyDict_SetItemString(%s, "%s", %s) < 0) %s' % (
                     Naming.moddict_cname,
                     item.name,
                     temp,
                     code.error_goto(item.pos)))
-                code.put_decref_clear(temp, PyrexTypes.py_object_type) 
-            code.funcstate.release_temp(temp) 
- 
- 
-class CEnumDefItemNode(StatNode): 
-    #  name     string 
-    #  cname    string or None 
-    #  value    ExprNode or None 
- 
-    child_attrs = ["value"] 
- 
-    def analyse_declarations(self, env, enum_entry): 
-        if self.value: 
-            self.value = self.value.analyse_const_expression(env) 
-            if not self.value.type.is_int: 
-                self.value = self.value.coerce_to(PyrexTypes.c_int_type, env) 
-                self.value = self.value.analyse_const_expression(env) 
+                code.put_decref_clear(temp, PyrexTypes.py_object_type)
+            code.funcstate.release_temp(temp)
+
+
+class CEnumDefItemNode(StatNode):
+    #  name     string
+    #  cname    string or None
+    #  value    ExprNode or None
+
+    child_attrs = ["value"]
+
+    def analyse_declarations(self, env, enum_entry):
+        if self.value:
+            self.value = self.value.analyse_const_expression(env)
+            if not self.value.type.is_int:
+                self.value = self.value.coerce_to(PyrexTypes.c_int_type, env)
+                self.value = self.value.analyse_const_expression(env)
         entry = env.declare_const(
             self.name, enum_entry.type,
             self.value, self.pos, cname=self.cname,
             visibility=enum_entry.visibility, api=enum_entry.api,
             create_wrapper=enum_entry.create_wrapper and enum_entry.name is None)
-        enum_entry.enum_values.append(entry) 
+        enum_entry.enum_values.append(entry)
         if enum_entry.name:
             enum_entry.type.values.append(entry.name)
- 
- 
-class CTypeDefNode(StatNode): 
-    #  base_type    CBaseTypeNode 
-    #  declarator   CDeclaratorNode 
-    #  visibility   "public" or "private" 
-    #  api          boolean 
-    #  in_pxd       boolean 
- 
-    child_attrs = ["base_type", "declarator"] 
- 
-    def analyse_declarations(self, env): 
-        base = self.base_type.analyse(env) 
+
+
+class CTypeDefNode(StatNode):
+    #  base_type    CBaseTypeNode
+    #  declarator   CDeclaratorNode
+    #  visibility   "public" or "private"
+    #  api          boolean
+    #  in_pxd       boolean
+
+    child_attrs = ["base_type", "declarator"]
+
+    def analyse_declarations(self, env):
+        base = self.base_type.analyse(env)
         name_declarator, type = self.declarator.analyse(
             base, env, visibility=self.visibility, in_pxd=self.in_pxd)
-        name = name_declarator.name 
-        cname = name_declarator.cname 
- 
+        name = name_declarator.name
+        cname = name_declarator.cname
+
         entry = env.declare_typedef(
             name, type, self.pos,
             cname=cname, visibility=self.visibility, api=self.api)
- 
-        if type.is_fused: 
-            entry.in_cinclude = True 
- 
-        if self.in_pxd and not env.in_cinclude: 
-            entry.defined_in_pxd = 1 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class FuncDefNode(StatNode, BlockNode): 
-    #  Base class for function definition nodes. 
-    # 
-    #  return_type     PyrexType 
-    #  #filename        string        C name of filename string const 
-    #  entry           Symtab.Entry 
-    #  needs_closure   boolean        Whether or not this function has inner functions/classes/yield 
-    #  needs_outer_scope boolean      Whether or not this function requires outer scope 
-    #  pymethdef_required boolean     Force Python method struct generation 
-    #  directive_locals { string : ExprNode } locals defined by cython.locals(...) 
-    #  directive_returns [ExprNode] type defined by cython.returns(...) 
+
+        if type.is_fused:
+            entry.in_cinclude = True
+
+        if self.in_pxd and not env.in_cinclude:
+            entry.defined_in_pxd = 1
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class FuncDefNode(StatNode, BlockNode):
+    #  Base class for function definition nodes.
+    #
+    #  return_type     PyrexType
+    #  #filename        string        C name of filename string const
+    #  entry           Symtab.Entry
+    #  needs_closure   boolean        Whether or not this function has inner functions/classes/yield
+    #  needs_outer_scope boolean      Whether or not this function requires outer scope
+    #  pymethdef_required boolean     Force Python method struct generation
+    #  directive_locals { string : ExprNode } locals defined by cython.locals(...)
+    #  directive_returns [ExprNode] type defined by cython.returns(...)
     #  star_arg      PyArgDeclNode or None  * argument
     #  starstar_arg  PyArgDeclNode or None  ** argument
     #
     #  is_async_def  boolean          is a Coroutine function
     #
-    #  has_fused_arguments  boolean 
-    #       Whether this cdef function has fused parameters. This is needed 
-    #       by AnalyseDeclarationsTransform, so it can replace CFuncDefNodes 
-    #       with fused argument types with a FusedCFuncDefNode 
- 
-    py_func = None 
-    needs_closure = False 
-    needs_outer_scope = False 
-    pymethdef_required = False 
-    is_generator = False 
-    is_generator_body = False 
+    #  has_fused_arguments  boolean
+    #       Whether this cdef function has fused parameters. This is needed
+    #       by AnalyseDeclarationsTransform, so it can replace CFuncDefNodes
+    #       with fused argument types with a FusedCFuncDefNode
+
+    py_func = None
+    needs_closure = False
+    needs_outer_scope = False
+    pymethdef_required = False
+    is_generator = False
+    is_generator_body = False
     is_async_def = False
-    modifiers = [] 
-    has_fused_arguments = False 
-    star_arg = None 
-    starstar_arg = None 
-    is_cyfunction = False 
+    modifiers = []
+    has_fused_arguments = False
+    star_arg = None
+    starstar_arg = None
+    is_cyfunction = False
     code_object = None
- 
-    def analyse_default_values(self, env): 
-        default_seen = 0 
-        for arg in self.args: 
-            if arg.default: 
-                default_seen = 1 
-                if arg.is_generic: 
-                    arg.default = arg.default.analyse_types(env) 
-                    arg.default = arg.default.coerce_to(arg.type, env) 
-                else: 
+
+    def analyse_default_values(self, env):
+        default_seen = 0
+        for arg in self.args:
+            if arg.default:
+                default_seen = 1
+                if arg.is_generic:
+                    arg.default = arg.default.analyse_types(env)
+                    arg.default = arg.default.coerce_to(arg.type, env)
+                else:
                     error(arg.pos, "This argument cannot have a default value")
-                    arg.default = None 
-            elif arg.kw_only: 
-                default_seen = 1 
-            elif default_seen: 
-                error(arg.pos, "Non-default argument following default argument") 
- 
+                    arg.default = None
+            elif arg.kw_only:
+                default_seen = 1
+            elif default_seen:
+                error(arg.pos, "Non-default argument following default argument")
+
     def analyse_annotation(self, env, annotation):
         # Annotations can not only contain valid Python expressions but arbitrary type references.
         if annotation is None:
@@ -1697,177 +1697,177 @@ class FuncDefNode(StatNode, BlockNode):
             if arg.annotation:
                 arg.annotation = self.analyse_annotation(env, arg.annotation)
 
-    def align_argument_type(self, env, arg): 
-        # @cython.locals() 
-        directive_locals = self.directive_locals 
-        orig_type = arg.type 
-        if arg.name in directive_locals: 
-            type_node = directive_locals[arg.name] 
-            other_type = type_node.analyse_as_type(env) 
+    def align_argument_type(self, env, arg):
+        # @cython.locals()
+        directive_locals = self.directive_locals
+        orig_type = arg.type
+        if arg.name in directive_locals:
+            type_node = directive_locals[arg.name]
+            other_type = type_node.analyse_as_type(env)
         elif isinstance(arg, CArgDeclNode) and arg.annotation and env.directives['annotation_typing']:
-            type_node = arg.annotation 
-            other_type = arg.inject_type_from_annotations(env) 
-            if other_type is None: 
-                return arg 
-        else: 
-            return arg 
-        if other_type is None: 
-            error(type_node.pos, "Not a type") 
+            type_node = arg.annotation
+            other_type = arg.inject_type_from_annotations(env)
+            if other_type is None:
+                return arg
+        else:
+            return arg
+        if other_type is None:
+            error(type_node.pos, "Not a type")
         elif orig_type is not py_object_type and not orig_type.same_as(other_type):
-            error(arg.base_type.pos, "Signature does not agree with previous declaration") 
-            error(type_node.pos, "Previous declaration here") 
-        else: 
-            arg.type = other_type 
-        return arg 
- 
-    def need_gil_acquisition(self, lenv): 
-        return 0 
- 
-    def create_local_scope(self, env): 
-        genv = env 
-        while genv.is_py_class_scope or genv.is_c_class_scope: 
-            genv = genv.outer_scope 
-        if self.needs_closure: 
-            lenv = ClosureScope(name=self.entry.name, 
+            error(arg.base_type.pos, "Signature does not agree with previous declaration")
+            error(type_node.pos, "Previous declaration here")
+        else:
+            arg.type = other_type
+        return arg
+
+    def need_gil_acquisition(self, lenv):
+        return 0
+
+    def create_local_scope(self, env):
+        genv = env
+        while genv.is_py_class_scope or genv.is_c_class_scope:
+            genv = genv.outer_scope
+        if self.needs_closure:
+            lenv = ClosureScope(name=self.entry.name,
                                 outer_scope=genv,
                                 parent_scope=env,
-                                scope_name=self.entry.cname) 
-        else: 
-            lenv = LocalScope(name=self.entry.name, 
-                              outer_scope=genv, 
-                              parent_scope=env) 
-        lenv.return_type = self.return_type 
-        type = self.entry.type 
-        if type.is_cfunction: 
-            lenv.nogil = type.nogil and not type.with_gil 
-        self.local_scope = lenv 
-        lenv.directives = env.directives 
-        return lenv 
- 
-    def generate_function_body(self, env, code): 
-        self.body.generate_execution_code(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        from . import Buffer 
-        if self.return_type.is_memoryviewslice: 
-            from . import MemoryView 
- 
-        lenv = self.local_scope 
-        if lenv.is_closure_scope and not lenv.is_passthrough: 
-            outer_scope_cname = "%s->%s" % (Naming.cur_scope_cname, 
-                                            Naming.outer_scope_cname) 
-        else: 
-            outer_scope_cname = Naming.outer_scope_cname 
-        lenv.mangle_closure_cnames(outer_scope_cname) 
-        # Generate closure function definitions 
-        self.body.generate_function_definitions(lenv, code) 
-        # generate lambda function definitions 
-        self.generate_lambda_definitions(lenv, code) 
- 
-        is_getbuffer_slot = (self.entry.name == "__getbuffer__" and 
-                             self.entry.scope.is_c_class_scope) 
-        is_releasebuffer_slot = (self.entry.name == "__releasebuffer__" and 
-                                 self.entry.scope.is_c_class_scope) 
-        is_buffer_slot = is_getbuffer_slot or is_releasebuffer_slot 
-        if is_buffer_slot: 
-            if 'cython_unused' not in self.modifiers: 
-                self.modifiers = self.modifiers + ['cython_unused'] 
- 
-        preprocessor_guard = self.get_preprocessor_guard() 
- 
-        profile = code.globalstate.directives['profile'] 
-        linetrace = code.globalstate.directives['linetrace'] 
-        if profile or linetrace: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("Profile", "Profile.c")) 
- 
-        # Generate C code for header and body of function 
+                                scope_name=self.entry.cname)
+        else:
+            lenv = LocalScope(name=self.entry.name,
+                              outer_scope=genv,
+                              parent_scope=env)
+        lenv.return_type = self.return_type
+        type = self.entry.type
+        if type.is_cfunction:
+            lenv.nogil = type.nogil and not type.with_gil
+        self.local_scope = lenv
+        lenv.directives = env.directives
+        return lenv
+
+    def generate_function_body(self, env, code):
+        self.body.generate_execution_code(code)
+
+    def generate_function_definitions(self, env, code):
+        from . import Buffer
+        if self.return_type.is_memoryviewslice:
+            from . import MemoryView
+
+        lenv = self.local_scope
+        if lenv.is_closure_scope and not lenv.is_passthrough:
+            outer_scope_cname = "%s->%s" % (Naming.cur_scope_cname,
+                                            Naming.outer_scope_cname)
+        else:
+            outer_scope_cname = Naming.outer_scope_cname
+        lenv.mangle_closure_cnames(outer_scope_cname)
+        # Generate closure function definitions
+        self.body.generate_function_definitions(lenv, code)
+        # generate lambda function definitions
+        self.generate_lambda_definitions(lenv, code)
+
+        is_getbuffer_slot = (self.entry.name == "__getbuffer__" and
+                             self.entry.scope.is_c_class_scope)
+        is_releasebuffer_slot = (self.entry.name == "__releasebuffer__" and
+                                 self.entry.scope.is_c_class_scope)
+        is_buffer_slot = is_getbuffer_slot or is_releasebuffer_slot
+        if is_buffer_slot:
+            if 'cython_unused' not in self.modifiers:
+                self.modifiers = self.modifiers + ['cython_unused']
+
+        preprocessor_guard = self.get_preprocessor_guard()
+
+        profile = code.globalstate.directives['profile']
+        linetrace = code.globalstate.directives['linetrace']
+        if profile or linetrace:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("Profile", "Profile.c"))
+
+        # Generate C code for header and body of function
         code.enter_cfunc_scope(lenv)
-        code.return_from_error_cleanup_label = code.new_label() 
+        code.return_from_error_cleanup_label = code.new_label()
         code.funcstate.gil_owned = not lenv.nogil
- 
-        # ----- Top-level constants used by this function 
-        code.mark_pos(self.pos) 
-        self.generate_cached_builtins_decls(lenv, code) 
-        # ----- Function header 
-        code.putln("") 
- 
-        if preprocessor_guard: 
-            code.putln(preprocessor_guard) 
- 
-        with_pymethdef = (self.needs_assignment_synthesis(env, code) or 
-                          self.pymethdef_required) 
-        if self.py_func: 
+
+        # ----- Top-level constants used by this function
+        code.mark_pos(self.pos)
+        self.generate_cached_builtins_decls(lenv, code)
+        # ----- Function header
+        code.putln("")
+
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+
+        with_pymethdef = (self.needs_assignment_synthesis(env, code) or
+                          self.pymethdef_required)
+        if self.py_func:
             self.py_func.generate_function_header(
                 code, with_pymethdef=with_pymethdef, proto_only=True)
         self.generate_function_header(code, with_pymethdef=with_pymethdef)
-        # ----- Local variable declarations 
-        # Find function scope 
-        cenv = env 
-        while cenv.is_py_class_scope or cenv.is_c_class_scope: 
-            cenv = cenv.outer_scope 
-        if self.needs_closure: 
-            code.put(lenv.scope_class.type.declaration_code(Naming.cur_scope_cname)) 
-            code.putln(";") 
-        elif self.needs_outer_scope: 
-            if lenv.is_passthrough: 
-                code.put(lenv.scope_class.type.declaration_code(Naming.cur_scope_cname)) 
-                code.putln(";") 
-            code.put(cenv.scope_class.type.declaration_code(Naming.outer_scope_cname)) 
-            code.putln(";") 
-        self.generate_argument_declarations(lenv, code) 
- 
-        for entry in lenv.var_entries: 
-            if not (entry.in_closure or entry.is_arg): 
-                code.put_var_declaration(entry) 
- 
-        # Initialize the return variable __pyx_r 
-        init = "" 
-        if not self.return_type.is_void: 
-            if self.return_type.is_pyobject: 
-                init = " = NULL" 
-            elif self.return_type.is_memoryviewslice: 
-                init = ' = ' + MemoryView.memslice_entry_init 
- 
+        # ----- Local variable declarations
+        # Find function scope
+        cenv = env
+        while cenv.is_py_class_scope or cenv.is_c_class_scope:
+            cenv = cenv.outer_scope
+        if self.needs_closure:
+            code.put(lenv.scope_class.type.declaration_code(Naming.cur_scope_cname))
+            code.putln(";")
+        elif self.needs_outer_scope:
+            if lenv.is_passthrough:
+                code.put(lenv.scope_class.type.declaration_code(Naming.cur_scope_cname))
+                code.putln(";")
+            code.put(cenv.scope_class.type.declaration_code(Naming.outer_scope_cname))
+            code.putln(";")
+        self.generate_argument_declarations(lenv, code)
+
+        for entry in lenv.var_entries:
+            if not (entry.in_closure or entry.is_arg):
+                code.put_var_declaration(entry)
+
+        # Initialize the return variable __pyx_r
+        init = ""
+        if not self.return_type.is_void:
+            if self.return_type.is_pyobject:
+                init = " = NULL"
+            elif self.return_type.is_memoryviewslice:
+                init = ' = ' + MemoryView.memslice_entry_init
+
             code.putln("%s%s;" % (
                 self.return_type.declaration_code(Naming.retval_cname),
                 init))
- 
-        tempvardecl_code = code.insertion_point() 
-        self.generate_keyword_list(code) 
- 
-        # ----- GIL acquisition 
-        acquire_gil = self.acquire_gil 
- 
-        # See if we need to acquire the GIL for variable declarations, or for 
-        # refnanny only 
- 
+
+        tempvardecl_code = code.insertion_point()
+        self.generate_keyword_list(code)
+
+        # ----- GIL acquisition
+        acquire_gil = self.acquire_gil
+
+        # See if we need to acquire the GIL for variable declarations, or for
+        # refnanny only
+
         # Closures are not currently possible for cdef nogil functions,
         # but check them anyway
         have_object_args = self.needs_closure or self.needs_outer_scope
-        for arg in lenv.arg_entries: 
-            if arg.type.is_pyobject: 
-                have_object_args = True 
-                break 
- 
+        for arg in lenv.arg_entries:
+            if arg.type.is_pyobject:
+                have_object_args = True
+                break
+
         used_buffer_entries = [entry for entry in lenv.buffer_entries if entry.used]
 
-        acquire_gil_for_var_decls_only = ( 
+        acquire_gil_for_var_decls_only = (
             lenv.nogil and lenv.has_with_gil_block and
             (have_object_args or used_buffer_entries))
- 
-        acquire_gil_for_refnanny_only = ( 
+
+        acquire_gil_for_refnanny_only = (
             lenv.nogil and lenv.has_with_gil_block and not
             acquire_gil_for_var_decls_only)
- 
-        use_refnanny = not lenv.nogil or lenv.has_with_gil_block 
- 
-        if acquire_gil or acquire_gil_for_var_decls_only: 
-            code.put_ensure_gil() 
+
+        use_refnanny = not lenv.nogil or lenv.has_with_gil_block
+
+        if acquire_gil or acquire_gil_for_var_decls_only:
+            code.put_ensure_gil()
             code.funcstate.gil_owned = True
-        elif lenv.nogil and lenv.has_with_gil_block: 
-            code.declare_gilstate() 
- 
+        elif lenv.nogil and lenv.has_with_gil_block:
+            code.declare_gilstate()
+
         if profile or linetrace:
             if not self.is_generator:
                 # generators are traced when iterated, not at creation
@@ -1879,28 +1879,28 @@ class FuncDefNode(StatNode, BlockNode):
         if is_getbuffer_slot:
             self.getbuffer_check(code)
 
-        # ----- set up refnanny 
-        if use_refnanny: 
-            tempvardecl_code.put_declare_refcount_context() 
-            code.put_setup_refcount_context( 
-                self.entry.name, acquire_gil=acquire_gil_for_refnanny_only) 
- 
-        # ----- Automatic lead-ins for certain special functions 
-        if is_getbuffer_slot: 
-            self.getbuffer_init(code) 
-        # ----- Create closure scope object 
-        if self.needs_closure: 
-            tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__') 
-            slot_func_cname = TypeSlots.get_slot_function(lenv.scope_class.type.scope, tp_slot) 
-            if not slot_func_cname: 
-                slot_func_cname = '%s->tp_new' % lenv.scope_class.type.typeptr_cname 
-            code.putln("%s = (%s)%s(%s, %s, NULL);" % ( 
-                Naming.cur_scope_cname, 
+        # ----- set up refnanny
+        if use_refnanny:
+            tempvardecl_code.put_declare_refcount_context()
+            code.put_setup_refcount_context(
+                self.entry.name, acquire_gil=acquire_gil_for_refnanny_only)
+
+        # ----- Automatic lead-ins for certain special functions
+        if is_getbuffer_slot:
+            self.getbuffer_init(code)
+        # ----- Create closure scope object
+        if self.needs_closure:
+            tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__')
+            slot_func_cname = TypeSlots.get_slot_function(lenv.scope_class.type.scope, tp_slot)
+            if not slot_func_cname:
+                slot_func_cname = '%s->tp_new' % lenv.scope_class.type.typeptr_cname
+            code.putln("%s = (%s)%s(%s, %s, NULL);" % (
+                Naming.cur_scope_cname,
                 lenv.scope_class.type.empty_declaration_code(),
-                slot_func_cname, 
-                lenv.scope_class.type.typeptr_cname, 
-                Naming.empty_tuple)) 
-            code.putln("if (unlikely(!%s)) {" % Naming.cur_scope_cname) 
+                slot_func_cname,
+                lenv.scope_class.type.typeptr_cname,
+                Naming.empty_tuple))
+            code.putln("if (unlikely(!%s)) {" % Naming.cur_scope_cname)
             # Scope unconditionally DECREFed on return.
             code.putln("%s = %s;" % (
                 Naming.cur_scope_cname,
@@ -1909,29 +1909,29 @@ class FuncDefNode(StatNode, BlockNode):
             code.putln(code.error_goto(self.pos))
             code.putln("} else {")
             code.put_gotref(Naming.cur_scope_cname)
-            code.putln("}") 
-            # Note that it is unsafe to decref the scope at this point. 
-        if self.needs_outer_scope: 
-            if self.is_cyfunction: 
-                code.putln("%s = (%s) __Pyx_CyFunction_GetClosure(%s);" % ( 
-                    outer_scope_cname, 
+            code.putln("}")
+            # Note that it is unsafe to decref the scope at this point.
+        if self.needs_outer_scope:
+            if self.is_cyfunction:
+                code.putln("%s = (%s) __Pyx_CyFunction_GetClosure(%s);" % (
+                    outer_scope_cname,
                     cenv.scope_class.type.empty_declaration_code(),
-                    Naming.self_cname)) 
-            else: 
-                code.putln("%s = (%s) %s;" % ( 
-                    outer_scope_cname, 
+                    Naming.self_cname))
+            else:
+                code.putln("%s = (%s) %s;" % (
+                    outer_scope_cname,
                     cenv.scope_class.type.empty_declaration_code(),
-                    Naming.self_cname)) 
-            if lenv.is_passthrough: 
-                code.putln("%s = %s;" % (Naming.cur_scope_cname, outer_scope_cname)) 
-            elif self.needs_closure: 
-                # inner closures own a reference to their outer parent 
-                code.put_incref(outer_scope_cname, cenv.scope_class.type) 
-                code.put_giveref(outer_scope_cname) 
-        # ----- Trace function call 
-        if profile or linetrace: 
-            # this looks a bit late, but if we don't get here due to a 
-            # fatal error before hand, it's not really worth tracing 
+                    Naming.self_cname))
+            if lenv.is_passthrough:
+                code.putln("%s = %s;" % (Naming.cur_scope_cname, outer_scope_cname))
+            elif self.needs_closure:
+                # inner closures own a reference to their outer parent
+                code.put_incref(outer_scope_cname, cenv.scope_class.type)
+                code.put_giveref(outer_scope_cname)
+        # ----- Trace function call
+        if profile or linetrace:
+            # this looks a bit late, but if we don't get here due to a
+            # fatal error before hand, it's not really worth tracing
             if not self.is_generator:
                 # generators are traced when iterated, not at creation
                 if self.is_wrapper:
@@ -1940,207 +1940,207 @@ class FuncDefNode(StatNode, BlockNode):
                     trace_name = self.entry.name
                 code.put_trace_call(
                     trace_name, self.pos, nogil=not code.funcstate.gil_owned)
-            code.funcstate.can_trace = True 
-        # ----- Fetch arguments 
-        self.generate_argument_parsing_code(env, code) 
-        # If an argument is assigned to in the body, we must 
-        # incref it to properly keep track of refcounts. 
-        is_cdef = isinstance(self, CFuncDefNode) 
-        for entry in lenv.arg_entries: 
-            if entry.type.is_pyobject: 
+            code.funcstate.can_trace = True
+        # ----- Fetch arguments
+        self.generate_argument_parsing_code(env, code)
+        # If an argument is assigned to in the body, we must
+        # incref it to properly keep track of refcounts.
+        is_cdef = isinstance(self, CFuncDefNode)
+        for entry in lenv.arg_entries:
+            if entry.type.is_pyobject:
                 if (acquire_gil or len(entry.cf_assignments) > 1) and not entry.in_closure:
-                    code.put_var_incref(entry) 
- 
-            # Note: defaults are always incref-ed. For def functions, we 
+                    code.put_var_incref(entry)
+
+            # Note: defaults are always incref-ed. For def functions, we
             #       we acquire arguments from object conversion, so we have
-            #       new references. If we are a cdef function, we need to 
-            #       incref our arguments 
+            #       new references. If we are a cdef function, we need to
+            #       incref our arguments
             elif is_cdef and entry.type.is_memoryviewslice and len(entry.cf_assignments) > 1:
                 code.put_incref_memoryviewslice(entry.cname, have_gil=code.funcstate.gil_owned)
-        for entry in lenv.var_entries: 
+        for entry in lenv.var_entries:
             if entry.is_arg and len(entry.cf_assignments) > 1 and not entry.in_closure:
                 if entry.xdecref_cleanup:
                     code.put_var_xincref(entry)
                 else:
                     code.put_var_incref(entry)
- 
-        # ----- Initialise local buffer auxiliary variables 
-        for entry in lenv.var_entries + lenv.arg_entries: 
-            if entry.type.is_buffer and entry.buffer_aux.buflocal_nd_var.used: 
-                Buffer.put_init_vars(entry, code) 
- 
-        # ----- Check and convert arguments 
-        self.generate_argument_type_tests(code) 
-        # ----- Acquire buffer arguments 
-        for entry in lenv.arg_entries: 
-            if entry.type.is_buffer: 
-                Buffer.put_acquire_arg_buffer(entry, code, self.pos) 
- 
-        if acquire_gil_for_var_decls_only: 
-            code.put_release_ensured_gil() 
+
+        # ----- Initialise local buffer auxiliary variables
+        for entry in lenv.var_entries + lenv.arg_entries:
+            if entry.type.is_buffer and entry.buffer_aux.buflocal_nd_var.used:
+                Buffer.put_init_vars(entry, code)
+
+        # ----- Check and convert arguments
+        self.generate_argument_type_tests(code)
+        # ----- Acquire buffer arguments
+        for entry in lenv.arg_entries:
+            if entry.type.is_buffer:
+                Buffer.put_acquire_arg_buffer(entry, code, self.pos)
+
+        if acquire_gil_for_var_decls_only:
+            code.put_release_ensured_gil()
             code.funcstate.gil_owned = False
- 
-        # ------------------------- 
-        # ----- Function body ----- 
-        # ------------------------- 
-        self.generate_function_body(env, code) 
- 
+
+        # -------------------------
+        # ----- Function body -----
+        # -------------------------
+        self.generate_function_body(env, code)
+
         code.mark_pos(self.pos, trace=False)
-        code.putln("") 
-        code.putln("/* function exit code */") 
- 
-        # ----- Default return value 
-        if not self.body.is_terminator: 
-            if self.return_type.is_pyobject: 
-                #if self.return_type.is_extension_type: 
-                #    lhs = "(PyObject *)%s" % Naming.retval_cname 
-                #else: 
-                lhs = Naming.retval_cname 
-                code.put_init_to_py_none(lhs, self.return_type) 
-            else: 
-                val = self.return_type.default_value 
-                if val: 
-                    code.putln("%s = %s;" % (Naming.retval_cname, val)) 
+        code.putln("")
+        code.putln("/* function exit code */")
+
+        # ----- Default return value
+        if not self.body.is_terminator:
+            if self.return_type.is_pyobject:
+                #if self.return_type.is_extension_type:
+                #    lhs = "(PyObject *)%s" % Naming.retval_cname
+                #else:
+                lhs = Naming.retval_cname
+                code.put_init_to_py_none(lhs, self.return_type)
+            else:
+                val = self.return_type.default_value
+                if val:
+                    code.putln("%s = %s;" % (Naming.retval_cname, val))
                 elif not self.return_type.is_void:
                     code.putln("__Pyx_pretend_to_initialize(&%s);" % Naming.retval_cname)
-        # ----- Error cleanup 
-        if code.error_label in code.labels_used: 
-            if not self.body.is_terminator: 
-                code.put_goto(code.return_label) 
-            code.put_label(code.error_label) 
-            for cname, type in code.funcstate.all_managed_temps(): 
-                code.put_xdecref(cname, type, have_gil=not lenv.nogil) 
- 
-            # Clean up buffers -- this calls a Python function 
-            # so need to save and restore error state 
+        # ----- Error cleanup
+        if code.error_label in code.labels_used:
+            if not self.body.is_terminator:
+                code.put_goto(code.return_label)
+            code.put_label(code.error_label)
+            for cname, type in code.funcstate.all_managed_temps():
+                code.put_xdecref(cname, type, have_gil=not lenv.nogil)
+
+            # Clean up buffers -- this calls a Python function
+            # so need to save and restore error state
             buffers_present = len(used_buffer_entries) > 0
             #memslice_entries = [e for e in lenv.entries.values() if e.type.is_memoryviewslice]
-            if buffers_present: 
-                code.globalstate.use_utility_code(restore_exception_utility_code) 
-                code.putln("{ PyObject *__pyx_type, *__pyx_value, *__pyx_tb;") 
+            if buffers_present:
+                code.globalstate.use_utility_code(restore_exception_utility_code)
+                code.putln("{ PyObject *__pyx_type, *__pyx_value, *__pyx_tb;")
                 code.putln("__Pyx_PyThreadState_declare")
                 code.putln("__Pyx_PyThreadState_assign")
-                code.putln("__Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);") 
+                code.putln("__Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);")
                 for entry in used_buffer_entries:
                     Buffer.put_release_buffer_code(code, entry)
-                    #code.putln("%s = 0;" % entry.cname) 
-                code.putln("__Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}") 
- 
-            if self.return_type.is_memoryviewslice: 
-                MemoryView.put_init_entry(Naming.retval_cname, code) 
-                err_val = Naming.retval_cname 
-            else: 
-                err_val = self.error_value() 
- 
-            exc_check = self.caller_will_check_exceptions() 
-            if err_val is not None or exc_check: 
-                # TODO: Fix exception tracing (though currently unused by cProfile). 
-                # code.globalstate.use_utility_code(get_exception_tuple_utility_code) 
-                # code.put_trace_exception() 
- 
-                if lenv.nogil and not lenv.has_with_gil_block: 
-                    code.putln("{") 
-                    code.put_ensure_gil() 
- 
-                code.put_add_traceback(self.entry.qualified_name) 
- 
-                if lenv.nogil and not lenv.has_with_gil_block: 
-                    code.put_release_ensured_gil() 
-                    code.putln("}") 
-            else: 
-                warning(self.entry.pos, 
-                        "Unraisable exception in function '%s'." % 
-                        self.entry.qualified_name, 0) 
+                    #code.putln("%s = 0;" % entry.cname)
+                code.putln("__Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}")
+
+            if self.return_type.is_memoryviewslice:
+                MemoryView.put_init_entry(Naming.retval_cname, code)
+                err_val = Naming.retval_cname
+            else:
+                err_val = self.error_value()
+
+            exc_check = self.caller_will_check_exceptions()
+            if err_val is not None or exc_check:
+                # TODO: Fix exception tracing (though currently unused by cProfile).
+                # code.globalstate.use_utility_code(get_exception_tuple_utility_code)
+                # code.put_trace_exception()
+
+                if lenv.nogil and not lenv.has_with_gil_block:
+                    code.putln("{")
+                    code.put_ensure_gil()
+
+                code.put_add_traceback(self.entry.qualified_name)
+
+                if lenv.nogil and not lenv.has_with_gil_block:
+                    code.put_release_ensured_gil()
+                    code.putln("}")
+            else:
+                warning(self.entry.pos,
+                        "Unraisable exception in function '%s'." %
+                        self.entry.qualified_name, 0)
                 code.put_unraisable(self.entry.qualified_name, lenv.nogil)
-            default_retval = self.return_type.default_value 
-            if err_val is None and default_retval: 
-                err_val = default_retval 
-            if err_val is not None: 
+            default_retval = self.return_type.default_value
+            if err_val is None and default_retval:
+                err_val = default_retval
+            if err_val is not None:
                 if err_val != Naming.retval_cname:
                     code.putln("%s = %s;" % (Naming.retval_cname, err_val))
             elif not self.return_type.is_void:
                 code.putln("__Pyx_pretend_to_initialize(&%s);" % Naming.retval_cname)
- 
-            if is_getbuffer_slot: 
-                self.getbuffer_error_cleanup(code) 
- 
-            # If we are using the non-error cleanup section we should 
-            # jump past it if we have an error. The if-test below determine 
-            # whether this section is used. 
-            if buffers_present or is_getbuffer_slot or self.return_type.is_memoryviewslice: 
-                code.put_goto(code.return_from_error_cleanup_label) 
- 
-        # ----- Non-error return cleanup 
-        code.put_label(code.return_label) 
+
+            if is_getbuffer_slot:
+                self.getbuffer_error_cleanup(code)
+
+            # If we are using the non-error cleanup section we should
+            # jump past it if we have an error. The if-test below determine
+            # whether this section is used.
+            if buffers_present or is_getbuffer_slot or self.return_type.is_memoryviewslice:
+                code.put_goto(code.return_from_error_cleanup_label)
+
+        # ----- Non-error return cleanup
+        code.put_label(code.return_label)
         for entry in used_buffer_entries:
             Buffer.put_release_buffer_code(code, entry)
-        if is_getbuffer_slot: 
-            self.getbuffer_normal_cleanup(code) 
- 
-        if self.return_type.is_memoryviewslice: 
-            # See if our return value is uninitialized on non-error return 
-            # from . import MemoryView 
-            # MemoryView.err_if_nogil_initialized_check(self.pos, env) 
+        if is_getbuffer_slot:
+            self.getbuffer_normal_cleanup(code)
+
+        if self.return_type.is_memoryviewslice:
+            # See if our return value is uninitialized on non-error return
+            # from . import MemoryView
+            # MemoryView.err_if_nogil_initialized_check(self.pos, env)
             cond = code.unlikely(self.return_type.error_condition(Naming.retval_cname))
-            code.putln( 
-                'if (%s) {' % cond) 
-            if env.nogil: 
-                code.put_ensure_gil() 
-            code.putln( 
+            code.putln(
+                'if (%s) {' % cond)
+            if env.nogil:
+                code.put_ensure_gil()
+            code.putln(
                 'PyErr_SetString(PyExc_TypeError, "Memoryview return value is not initialized");')
-            if env.nogil: 
-                code.put_release_ensured_gil() 
-            code.putln( 
-                '}') 
- 
-        # ----- Return cleanup for both error and no-error return 
-        code.put_label(code.return_from_error_cleanup_label) 
- 
-        for entry in lenv.var_entries: 
-            if not entry.used or entry.in_closure: 
-                continue 
- 
-            if entry.type.is_memoryviewslice: 
+            if env.nogil:
+                code.put_release_ensured_gil()
+            code.putln(
+                '}')
+
+        # ----- Return cleanup for both error and no-error return
+        code.put_label(code.return_from_error_cleanup_label)
+
+        for entry in lenv.var_entries:
+            if not entry.used or entry.in_closure:
+                continue
+
+            if entry.type.is_memoryviewslice:
                 code.put_xdecref_memoryviewslice(entry.cname, have_gil=not lenv.nogil)
-            elif entry.type.is_pyobject: 
-                if not entry.is_arg or len(entry.cf_assignments) > 1: 
+            elif entry.type.is_pyobject:
+                if not entry.is_arg or len(entry.cf_assignments) > 1:
                     if entry.xdecref_cleanup:
                         code.put_var_xdecref(entry)
                     else:
                         code.put_var_decref(entry)
- 
-        # Decref any increfed args 
-        for entry in lenv.arg_entries: 
-            if entry.type.is_pyobject: 
+
+        # Decref any increfed args
+        for entry in lenv.arg_entries:
+            if entry.type.is_pyobject:
                 if (acquire_gil or len(entry.cf_assignments) > 1) and not entry.in_closure:
-                    code.put_var_decref(entry) 
-            elif (entry.type.is_memoryviewslice and 
-                  (not is_cdef or len(entry.cf_assignments) > 1)): 
-                # decref slices of def functions and acquired slices from cdef 
-                # functions, but not borrowed slices from cdef functions. 
-                code.put_xdecref_memoryviewslice(entry.cname, 
-                                                 have_gil=not lenv.nogil) 
-        if self.needs_closure: 
-            code.put_decref(Naming.cur_scope_cname, lenv.scope_class.type) 
- 
-        # ----- Return 
-        # This code is duplicated in ModuleNode.generate_module_init_func 
-        if not lenv.nogil: 
-            default_retval = self.return_type.default_value 
-            err_val = self.error_value() 
-            if err_val is None and default_retval: 
-                err_val = default_retval  # FIXME: why is err_val not used? 
-            if self.return_type.is_pyobject: 
-                code.put_xgiveref(self.return_type.as_pyobject(Naming.retval_cname)) 
- 
-        if self.entry.is_special and self.entry.name == "__hash__": 
-            # Returning -1 for __hash__ is supposed to signal an error 
-            # We do as Python instances and coerce -1 into -2. 
-            code.putln("if (unlikely(%s == -1) && !PyErr_Occurred()) %s = -2;" % ( 
+                    code.put_var_decref(entry)
+            elif (entry.type.is_memoryviewslice and
+                  (not is_cdef or len(entry.cf_assignments) > 1)):
+                # decref slices of def functions and acquired slices from cdef
+                # functions, but not borrowed slices from cdef functions.
+                code.put_xdecref_memoryviewslice(entry.cname,
+                                                 have_gil=not lenv.nogil)
+        if self.needs_closure:
+            code.put_decref(Naming.cur_scope_cname, lenv.scope_class.type)
+
+        # ----- Return
+        # This code is duplicated in ModuleNode.generate_module_init_func
+        if not lenv.nogil:
+            default_retval = self.return_type.default_value
+            err_val = self.error_value()
+            if err_val is None and default_retval:
+                err_val = default_retval  # FIXME: why is err_val not used?
+            if self.return_type.is_pyobject:
+                code.put_xgiveref(self.return_type.as_pyobject(Naming.retval_cname))
+
+        if self.entry.is_special and self.entry.name == "__hash__":
+            # Returning -1 for __hash__ is supposed to signal an error
+            # We do as Python instances and coerce -1 into -2.
+            code.putln("if (unlikely(%s == -1) && !PyErr_Occurred()) %s = -2;" % (
                 Naming.retval_cname, Naming.retval_cname))
- 
-        if profile or linetrace: 
-            code.funcstate.can_trace = False 
+
+        if profile or linetrace:
+            code.funcstate.can_trace = False
             if not self.is_generator:
                 # generators are traced when iterated, not at creation
                 if self.return_type.is_pyobject:
@@ -2149,87 +2149,87 @@ class FuncDefNode(StatNode, BlockNode):
                 else:
                     code.put_trace_return(
                         "Py_None", nogil=not code.funcstate.gil_owned)
- 
-        if not lenv.nogil: 
-            # GIL holding function 
-            code.put_finish_refcount_context() 
- 
-        if acquire_gil or (lenv.nogil and lenv.has_with_gil_block): 
-            # release the GIL (note that with-gil blocks acquire it on exit in their EnsureGILNode) 
-            code.put_release_ensured_gil() 
+
+        if not lenv.nogil:
+            # GIL holding function
+            code.put_finish_refcount_context()
+
+        if acquire_gil or (lenv.nogil and lenv.has_with_gil_block):
+            # release the GIL (note that with-gil blocks acquire it on exit in their EnsureGILNode)
+            code.put_release_ensured_gil()
             code.funcstate.gil_owned = False
- 
-        if not self.return_type.is_void: 
-            code.putln("return %s;" % Naming.retval_cname) 
- 
-        code.putln("}") 
- 
-        if preprocessor_guard: 
-            code.putln("#endif /*!(%s)*/" % preprocessor_guard) 
- 
-        # ----- Go back and insert temp variable declarations 
-        tempvardecl_code.put_temp_declarations(code.funcstate) 
- 
-        # ----- Python version 
-        code.exit_cfunc_scope() 
-        if self.py_func: 
-            self.py_func.generate_function_definitions(env, code) 
-        self.generate_wrapper_functions(code) 
- 
-    def declare_argument(self, env, arg): 
-        if arg.type.is_void: 
-            error(arg.pos, "Invalid use of 'void'") 
-        elif not arg.type.is_complete() and not (arg.type.is_array or arg.type.is_memoryviewslice): 
+
+        if not self.return_type.is_void:
+            code.putln("return %s;" % Naming.retval_cname)
+
+        code.putln("}")
+
+        if preprocessor_guard:
+            code.putln("#endif /*!(%s)*/" % preprocessor_guard)
+
+        # ----- Go back and insert temp variable declarations
+        tempvardecl_code.put_temp_declarations(code.funcstate)
+
+        # ----- Python version
+        code.exit_cfunc_scope()
+        if self.py_func:
+            self.py_func.generate_function_definitions(env, code)
+        self.generate_wrapper_functions(code)
+
+    def declare_argument(self, env, arg):
+        if arg.type.is_void:
+            error(arg.pos, "Invalid use of 'void'")
+        elif not arg.type.is_complete() and not (arg.type.is_array or arg.type.is_memoryviewslice):
             error(arg.pos, "Argument type '%s' is incomplete" % arg.type)
         entry = env.declare_arg(arg.name, arg.type, arg.pos)
         if arg.annotation:
             entry.annotation = arg.annotation
         return entry
- 
-    def generate_arg_type_test(self, arg, code): 
-        # Generate type test for one argument. 
-        if arg.type.typeobj_is_available(): 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("ArgTypeTest", "FunctionArguments.c")) 
-            typeptr_cname = arg.type.typeptr_cname 
-            arg_code = "((PyObject *)%s)" % arg.entry.cname 
-            code.putln( 
-                'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, "%s", %s))) %s' % ( 
-                    arg_code, 
-                    typeptr_cname, 
-                    arg.accept_none, 
-                    arg.name, 
+
+    def generate_arg_type_test(self, arg, code):
+        # Generate type test for one argument.
+        if arg.type.typeobj_is_available():
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ArgTypeTest", "FunctionArguments.c"))
+            typeptr_cname = arg.type.typeptr_cname
+            arg_code = "((PyObject *)%s)" % arg.entry.cname
+            code.putln(
+                'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, "%s", %s))) %s' % (
+                    arg_code,
+                    typeptr_cname,
+                    arg.accept_none,
+                    arg.name,
                     arg.type.is_builtin_type and arg.type.require_exact,
-                    code.error_goto(arg.pos))) 
-        else: 
+                    code.error_goto(arg.pos)))
+        else:
             error(arg.pos, "Cannot test type of extern C class without type object name specification")
- 
-    def generate_arg_none_check(self, arg, code): 
-        # Generate None check for one argument. 
-        if arg.type.is_memoryviewslice: 
-            cname = "%s.memview" % arg.entry.cname 
-        else: 
-            cname = arg.entry.cname 
- 
-        code.putln('if (unlikely(((PyObject *)%s) == Py_None)) {' % cname) 
-        code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", "%s"); %s''' % ( 
-            max(200, len(arg.name)), arg.name, 
-            code.error_goto(arg.pos))) 
-        code.putln('}') 
- 
-    def generate_wrapper_functions(self, code): 
-        pass 
- 
-    def generate_execution_code(self, code): 
+
+    def generate_arg_none_check(self, arg, code):
+        # Generate None check for one argument.
+        if arg.type.is_memoryviewslice:
+            cname = "%s.memview" % arg.entry.cname
+        else:
+            cname = arg.entry.cname
+
+        code.putln('if (unlikely(((PyObject *)%s) == Py_None)) {' % cname)
+        code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", "%s"); %s''' % (
+            max(200, len(arg.name)), arg.name,
+            code.error_goto(arg.pos)))
+        code.putln('}')
+
+    def generate_wrapper_functions(self, code):
+        pass
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        # Evaluate and store argument default values 
-        for arg in self.args: 
-            if not arg.is_dynamic: 
-                arg.generate_assignment_code(code) 
- 
-    # 
-    # Special code for the __getbuffer__ function 
-    # 
+        # Evaluate and store argument default values
+        for arg in self.args:
+            if not arg.is_dynamic:
+                arg.generate_assignment_code(code)
+
+    #
+    # Special code for the __getbuffer__ function
+    #
     def _get_py_buffer_info(self):
         py_buffer = self.local_scope.arg_entries[1]
         try:
@@ -2253,8 +2253,8 @@ class FuncDefNode(StatNode, BlockNode):
         code.putln("PyErr_SetString(PyExc_BufferError, "
                    "\"PyObject_GetBuffer: view==NULL argument is obsolete\");")
         code.putln("return -1;")
-        code.putln("}") 
- 
+        code.putln("}")
+
     def getbuffer_init(self, code):
         py_buffer, obj_type = self._get_py_buffer_info()
         view = py_buffer.cname
@@ -2264,7 +2264,7 @@ class FuncDefNode(StatNode, BlockNode):
         else:
             code.putln("%s->obj = NULL;" % view)
 
-    def getbuffer_error_cleanup(self, code): 
+    def getbuffer_error_cleanup(self, code):
         py_buffer, obj_type = self._get_py_buffer_info()
         view = py_buffer.cname
         if obj_type and obj_type.is_pyobject:
@@ -2274,8 +2274,8 @@ class FuncDefNode(StatNode, BlockNode):
             code.putln("}")
         else:
             code.putln("Py_CLEAR(%s->obj);" % view)
- 
-    def getbuffer_normal_cleanup(self, code): 
+
+    def getbuffer_normal_cleanup(self, code):
         py_buffer, obj_type = self._get_py_buffer_info()
         view = py_buffer.cname
         if obj_type and obj_type.is_pyobject:
@@ -2283,170 +2283,170 @@ class FuncDefNode(StatNode, BlockNode):
             code.put_gotref("%s->obj" % view)
             code.put_decref_clear("%s->obj" % view, obj_type)
             code.putln("}")
- 
-    def get_preprocessor_guard(self): 
-        if not self.entry.is_special: 
-            return None 
-        name = self.entry.name 
-        slot = TypeSlots.method_name_to_slot.get(name) 
-        if not slot: 
-            return None 
-        if name == '__long__' and not self.entry.scope.lookup_here('__int__'): 
-            return None 
-        if name in ("__getbuffer__", "__releasebuffer__") and self.entry.scope.is_c_class_scope: 
-            return None 
-        return slot.preprocessor_guard_code() 
- 
- 
-class CFuncDefNode(FuncDefNode): 
-    #  C function definition. 
-    # 
-    #  modifiers     ['inline'] 
-    #  visibility    'private' or 'public' or 'extern' 
-    #  base_type     CBaseTypeNode 
-    #  declarator    CDeclaratorNode 
-    #  cfunc_declarator  the CFuncDeclarator of this function 
-    #                    (this is also available through declarator or a 
-    #                     base thereof) 
-    #  body          StatListNode 
-    #  api           boolean 
-    #  decorators    [DecoratorNode]        list of decorators 
-    # 
-    #  with_gil      boolean    Acquire GIL around body 
-    #  type          CFuncType 
-    #  py_func       wrapper for calling from Python 
-    #  overridable   whether or not this is a cpdef function 
-    #  inline_in_pxd whether this is an inline function in a pxd file 
-    #  template_declaration  String or None   Used for c++ class methods 
-    #  is_const_method whether this is a const method 
-    #  is_static_method whether this is a static method 
-    #  is_c_class_method whether this is a cclass method 
- 
-    child_attrs = ["base_type", "declarator", "body", "py_func_stat"] 
- 
-    inline_in_pxd = False 
-    decorators = None 
-    directive_locals = None 
-    directive_returns = None 
-    override = None 
-    template_declaration = None 
-    is_const_method = False 
-    py_func_stat = None 
- 
-    def unqualified_name(self): 
-        return self.entry.name 
- 
+
+    def get_preprocessor_guard(self):
+        if not self.entry.is_special:
+            return None
+        name = self.entry.name
+        slot = TypeSlots.method_name_to_slot.get(name)
+        if not slot:
+            return None
+        if name == '__long__' and not self.entry.scope.lookup_here('__int__'):
+            return None
+        if name in ("__getbuffer__", "__releasebuffer__") and self.entry.scope.is_c_class_scope:
+            return None
+        return slot.preprocessor_guard_code()
+
+
+class CFuncDefNode(FuncDefNode):
+    #  C function definition.
+    #
+    #  modifiers     ['inline']
+    #  visibility    'private' or 'public' or 'extern'
+    #  base_type     CBaseTypeNode
+    #  declarator    CDeclaratorNode
+    #  cfunc_declarator  the CFuncDeclarator of this function
+    #                    (this is also available through declarator or a
+    #                     base thereof)
+    #  body          StatListNode
+    #  api           boolean
+    #  decorators    [DecoratorNode]        list of decorators
+    #
+    #  with_gil      boolean    Acquire GIL around body
+    #  type          CFuncType
+    #  py_func       wrapper for calling from Python
+    #  overridable   whether or not this is a cpdef function
+    #  inline_in_pxd whether this is an inline function in a pxd file
+    #  template_declaration  String or None   Used for c++ class methods
+    #  is_const_method whether this is a const method
+    #  is_static_method whether this is a static method
+    #  is_c_class_method whether this is a cclass method
+
+    child_attrs = ["base_type", "declarator", "body", "py_func_stat"]
+
+    inline_in_pxd = False
+    decorators = None
+    directive_locals = None
+    directive_returns = None
+    override = None
+    template_declaration = None
+    is_const_method = False
+    py_func_stat = None
+
+    def unqualified_name(self):
+        return self.entry.name
+
     @property
     def code_object(self):
         # share the CodeObject with the cpdef wrapper (if available)
         return self.py_func.code_object if self.py_func else None
 
-    def analyse_declarations(self, env): 
-        self.is_c_class_method = env.is_c_class_scope 
-        if self.directive_locals is None: 
-            self.directive_locals = {} 
+    def analyse_declarations(self, env):
+        self.is_c_class_method = env.is_c_class_scope
+        if self.directive_locals is None:
+            self.directive_locals = {}
         self.directive_locals.update(env.directives.get('locals', {}))
-        if self.directive_returns is not None: 
-            base_type = self.directive_returns.analyse_as_type(env) 
-            if base_type is None: 
-                error(self.directive_returns.pos, "Not a type") 
-                base_type = PyrexTypes.error_type 
-        else: 
-            base_type = self.base_type.analyse(env) 
-        self.is_static_method = 'staticmethod' in env.directives and not env.lookup_here('staticmethod') 
-        # The 2 here is because we need both function and argument names. 
-        if isinstance(self.declarator, CFuncDeclaratorNode): 
+        if self.directive_returns is not None:
+            base_type = self.directive_returns.analyse_as_type(env)
+            if base_type is None:
+                error(self.directive_returns.pos, "Not a type")
+                base_type = PyrexTypes.error_type
+        else:
+            base_type = self.base_type.analyse(env)
+        self.is_static_method = 'staticmethod' in env.directives and not env.lookup_here('staticmethod')
+        # The 2 here is because we need both function and argument names.
+        if isinstance(self.declarator, CFuncDeclaratorNode):
             name_declarator, type = self.declarator.analyse(
                 base_type, env, nonempty=2 * (self.body is not None),
                 directive_locals=self.directive_locals, visibility=self.visibility)
-        else: 
+        else:
             name_declarator, type = self.declarator.analyse(
                 base_type, env, nonempty=2 * (self.body is not None), visibility=self.visibility)
-        if not type.is_cfunction: 
+        if not type.is_cfunction:
             error(self.pos, "Suite attached to non-function declaration")
-        # Remember the actual type according to the function header 
-        # written here, because the type in the symbol table entry 
-        # may be different if we're overriding a C method inherited 
-        # from the base type of an extension type. 
-        self.type = type 
-        type.is_overridable = self.overridable 
-        declarator = self.declarator 
-        while not hasattr(declarator, 'args'): 
-            declarator = declarator.base 
- 
-        self.cfunc_declarator = declarator 
-        self.args = declarator.args 
- 
-        opt_arg_count = self.cfunc_declarator.optional_arg_count 
-        if (self.visibility == 'public' or self.api) and opt_arg_count: 
-            error(self.cfunc_declarator.pos, 
+        # Remember the actual type according to the function header
+        # written here, because the type in the symbol table entry
+        # may be different if we're overriding a C method inherited
+        # from the base type of an extension type.
+        self.type = type
+        type.is_overridable = self.overridable
+        declarator = self.declarator
+        while not hasattr(declarator, 'args'):
+            declarator = declarator.base
+
+        self.cfunc_declarator = declarator
+        self.args = declarator.args
+
+        opt_arg_count = self.cfunc_declarator.optional_arg_count
+        if (self.visibility == 'public' or self.api) and opt_arg_count:
+            error(self.cfunc_declarator.pos,
                   "Function with optional arguments may not be declared public or api")
- 
+
         if type.exception_check == '+' and self.visibility != 'extern':
-            warning(self.cfunc_declarator.pos, 
-                    "Only extern functions can throw C++ exceptions.") 
- 
-        for formal_arg, type_arg in zip(self.args, type.args): 
-            self.align_argument_type(env, type_arg) 
-            formal_arg.type = type_arg.type 
-            formal_arg.name = type_arg.name 
-            formal_arg.cname = type_arg.cname 
- 
-            self._validate_type_visibility(type_arg.type, type_arg.pos, env) 
- 
-            if type_arg.type.is_fused: 
-                self.has_fused_arguments = True 
- 
-            if type_arg.type.is_buffer and 'inline' in self.modifiers: 
-                warning(formal_arg.pos, "Buffer unpacking not optimized away.", 1) 
- 
+            warning(self.cfunc_declarator.pos,
+                    "Only extern functions can throw C++ exceptions.")
+
+        for formal_arg, type_arg in zip(self.args, type.args):
+            self.align_argument_type(env, type_arg)
+            formal_arg.type = type_arg.type
+            formal_arg.name = type_arg.name
+            formal_arg.cname = type_arg.cname
+
+            self._validate_type_visibility(type_arg.type, type_arg.pos, env)
+
+            if type_arg.type.is_fused:
+                self.has_fused_arguments = True
+
+            if type_arg.type.is_buffer and 'inline' in self.modifiers:
+                warning(formal_arg.pos, "Buffer unpacking not optimized away.", 1)
+
             if type_arg.type.is_buffer or type_arg.type.is_pythran_expr:
-                if self.type.nogil: 
-                    error(formal_arg.pos, 
+                if self.type.nogil:
+                    error(formal_arg.pos,
                           "Buffer may not be acquired without the GIL. Consider using memoryview slices instead.")
-                elif 'inline' in self.modifiers: 
-                    warning(formal_arg.pos, "Buffer unpacking not optimized away.", 1) 
- 
-        self._validate_type_visibility(type.return_type, self.pos, env) 
- 
-        name = name_declarator.name 
-        cname = name_declarator.cname 
- 
-        type.is_const_method = self.is_const_method 
-        type.is_static_method = self.is_static_method 
-        self.entry = env.declare_cfunction( 
-            name, type, self.pos, 
+                elif 'inline' in self.modifiers:
+                    warning(formal_arg.pos, "Buffer unpacking not optimized away.", 1)
+
+        self._validate_type_visibility(type.return_type, self.pos, env)
+
+        name = name_declarator.name
+        cname = name_declarator.cname
+
+        type.is_const_method = self.is_const_method
+        type.is_static_method = self.is_static_method
+        self.entry = env.declare_cfunction(
+            name, type, self.pos,
             cname=cname, visibility=self.visibility, api=self.api,
             defining=self.body is not None, modifiers=self.modifiers,
             overridable=self.overridable)
-        self.entry.inline_func_in_pxd = self.inline_in_pxd 
-        self.return_type = type.return_type 
-        if self.return_type.is_array and self.visibility != 'extern': 
+        self.entry.inline_func_in_pxd = self.inline_in_pxd
+        self.return_type = type.return_type
+        if self.return_type.is_array and self.visibility != 'extern':
             error(self.pos, "Function cannot return an array")
-        if self.return_type.is_cpp_class: 
-            self.return_type.check_nullary_constructor(self.pos, "used as a return value") 
- 
-        if self.overridable and not env.is_module_scope and not self.is_static_method: 
-            if len(self.args) < 1 or not self.args[0].type.is_pyobject: 
-                # An error will be produced in the cdef function 
-                self.overridable = False 
- 
-        self.declare_cpdef_wrapper(env) 
-        self.create_local_scope(env) 
- 
-    def declare_cpdef_wrapper(self, env): 
-        if self.overridable: 
-            if self.is_static_method: 
-                # TODO(robertwb): Finish this up, perhaps via more function refactoring. 
-                error(self.pos, "static cpdef methods not yet supported") 
-            name = self.entry.name 
+        if self.return_type.is_cpp_class:
+            self.return_type.check_nullary_constructor(self.pos, "used as a return value")
+
+        if self.overridable and not env.is_module_scope and not self.is_static_method:
+            if len(self.args) < 1 or not self.args[0].type.is_pyobject:
+                # An error will be produced in the cdef function
+                self.overridable = False
+
+        self.declare_cpdef_wrapper(env)
+        self.create_local_scope(env)
+
+    def declare_cpdef_wrapper(self, env):
+        if self.overridable:
+            if self.is_static_method:
+                # TODO(robertwb): Finish this up, perhaps via more function refactoring.
+                error(self.pos, "static cpdef methods not yet supported")
+            name = self.entry.name
             py_func_body = self.call_self_node(is_module_scope=env.is_module_scope)
-            if self.is_static_method: 
-                from .ExprNodes import NameNode 
-                decorators = [DecoratorNode(self.pos, decorator=NameNode(self.pos, name='staticmethod'))] 
-                decorators[0].decorator.analyse_types(env) 
-            else: 
-                decorators = [] 
+            if self.is_static_method:
+                from .ExprNodes import NameNode
+                decorators = [DecoratorNode(self.pos, decorator=NameNode(self.pos, name='staticmethod'))]
+                decorators[0].decorator.analyse_types(env)
+            else:
+                decorators = []
             self.py_func = DefNode(pos=self.pos,
                                    name=self.entry.name,
                                    args=self.args,
@@ -2456,353 +2456,353 @@ class CFuncDefNode(FuncDefNode):
                                    body=py_func_body,
                                    decorators=decorators,
                                    is_wrapper=1)
-            self.py_func.is_module_scope = env.is_module_scope 
-            self.py_func.analyse_declarations(env) 
+            self.py_func.is_module_scope = env.is_module_scope
+            self.py_func.analyse_declarations(env)
             self.py_func.entry.is_overridable = True
             self.py_func_stat = StatListNode(self.pos, stats=[self.py_func])
-            self.py_func.type = PyrexTypes.py_object_type 
-            self.entry.as_variable = self.py_func.entry 
-            self.entry.used = self.entry.as_variable.used = True 
-            # Reset scope entry the above cfunction 
-            env.entries[name] = self.entry 
-            if (not self.entry.is_final_cmethod and 
+            self.py_func.type = PyrexTypes.py_object_type
+            self.entry.as_variable = self.py_func.entry
+            self.entry.used = self.entry.as_variable.used = True
+            # Reset scope entry the above cfunction
+            env.entries[name] = self.entry
+            if (not self.entry.is_final_cmethod and
                     (not env.is_module_scope or Options.lookup_module_cpdef)):
                 self.override = OverrideCheckNode(self.pos, py_func=self.py_func)
-                self.body = StatListNode(self.pos, stats=[self.override, self.body]) 
- 
-    def _validate_type_visibility(self, type, pos, env): 
-        """ 
-        Ensure that types used in cdef functions are public or api, or 
-        defined in a C header. 
-        """ 
-        public_or_api = (self.visibility == 'public' or self.api) 
-        entry = getattr(type, 'entry', None) 
-        if public_or_api and entry and env.is_module_scope: 
-            if not (entry.visibility in ('public', 'extern') or 
-                    entry.api or entry.in_cinclude): 
+                self.body = StatListNode(self.pos, stats=[self.override, self.body])
+
+    def _validate_type_visibility(self, type, pos, env):
+        """
+        Ensure that types used in cdef functions are public or api, or
+        defined in a C header.
+        """
+        public_or_api = (self.visibility == 'public' or self.api)
+        entry = getattr(type, 'entry', None)
+        if public_or_api and entry and env.is_module_scope:
+            if not (entry.visibility in ('public', 'extern') or
+                    entry.api or entry.in_cinclude):
                 error(pos, "Function declared public or api may not have private types")
- 
-    def call_self_node(self, omit_optional_args=0, is_module_scope=0): 
-        from . import ExprNodes 
-        args = self.type.args 
-        if omit_optional_args: 
-            args = args[:len(args) - self.type.optional_arg_count] 
-        arg_names = [arg.name for arg in args] 
-        if is_module_scope: 
-            cfunc = ExprNodes.NameNode(self.pos, name=self.entry.name) 
-            call_arg_names = arg_names 
-            skip_dispatch = Options.lookup_module_cpdef 
-        elif self.type.is_static_method: 
-            class_entry = self.entry.scope.parent_type.entry 
-            class_node = ExprNodes.NameNode(self.pos, name=class_entry.name) 
-            class_node.entry = class_entry 
-            cfunc = ExprNodes.AttributeNode(self.pos, obj=class_node, attribute=self.entry.name) 
-            # Calling static c(p)def methods on an instance disallowed. 
-            # TODO(robertwb): Support by passing self to check for override? 
-            skip_dispatch = True 
-        else: 
-            type_entry = self.type.args[0].type.entry 
-            type_arg = ExprNodes.NameNode(self.pos, name=type_entry.name) 
-            type_arg.entry = type_entry 
-            cfunc = ExprNodes.AttributeNode(self.pos, obj=type_arg, attribute=self.entry.name) 
-        skip_dispatch = not is_module_scope or Options.lookup_module_cpdef 
-        c_call = ExprNodes.SimpleCallNode( 
-            self.pos, 
-            function=cfunc, 
-            args=[ExprNodes.NameNode(self.pos, name=n) for n in arg_names], 
-            wrapper_call=skip_dispatch) 
-        return ReturnStatNode(pos=self.pos, return_type=PyrexTypes.py_object_type, value=c_call) 
- 
-    def declare_arguments(self, env): 
-        for arg in self.type.args: 
-            if not arg.name: 
-                error(arg.pos, "Missing argument name") 
-            self.declare_argument(env, arg) 
- 
-    def need_gil_acquisition(self, lenv): 
-        return self.type.with_gil 
- 
-    def nogil_check(self, env): 
-        type = self.type 
-        with_gil = type.with_gil 
-        if type.nogil and not with_gil: 
-            if type.return_type.is_pyobject: 
-                error(self.pos, 
-                      "Function with Python return type cannot be declared nogil") 
-            for entry in self.local_scope.var_entries: 
-                if entry.type.is_pyobject and not entry.in_with_gil_block: 
-                    error(self.pos, "Function declared nogil has Python locals or temporaries") 
- 
-    def analyse_expressions(self, env): 
-        self.local_scope.directives = env.directives 
+
+    def call_self_node(self, omit_optional_args=0, is_module_scope=0):
+        from . import ExprNodes
+        args = self.type.args
+        if omit_optional_args:
+            args = args[:len(args) - self.type.optional_arg_count]
+        arg_names = [arg.name for arg in args]
+        if is_module_scope:
+            cfunc = ExprNodes.NameNode(self.pos, name=self.entry.name)
+            call_arg_names = arg_names
+            skip_dispatch = Options.lookup_module_cpdef
+        elif self.type.is_static_method:
+            class_entry = self.entry.scope.parent_type.entry
+            class_node = ExprNodes.NameNode(self.pos, name=class_entry.name)
+            class_node.entry = class_entry
+            cfunc = ExprNodes.AttributeNode(self.pos, obj=class_node, attribute=self.entry.name)
+            # Calling static c(p)def methods on an instance disallowed.
+            # TODO(robertwb): Support by passing self to check for override?
+            skip_dispatch = True
+        else:
+            type_entry = self.type.args[0].type.entry
+            type_arg = ExprNodes.NameNode(self.pos, name=type_entry.name)
+            type_arg.entry = type_entry
+            cfunc = ExprNodes.AttributeNode(self.pos, obj=type_arg, attribute=self.entry.name)
+        skip_dispatch = not is_module_scope or Options.lookup_module_cpdef
+        c_call = ExprNodes.SimpleCallNode(
+            self.pos,
+            function=cfunc,
+            args=[ExprNodes.NameNode(self.pos, name=n) for n in arg_names],
+            wrapper_call=skip_dispatch)
+        return ReturnStatNode(pos=self.pos, return_type=PyrexTypes.py_object_type, value=c_call)
+
+    def declare_arguments(self, env):
+        for arg in self.type.args:
+            if not arg.name:
+                error(arg.pos, "Missing argument name")
+            self.declare_argument(env, arg)
+
+    def need_gil_acquisition(self, lenv):
+        return self.type.with_gil
+
+    def nogil_check(self, env):
+        type = self.type
+        with_gil = type.with_gil
+        if type.nogil and not with_gil:
+            if type.return_type.is_pyobject:
+                error(self.pos,
+                      "Function with Python return type cannot be declared nogil")
+            for entry in self.local_scope.var_entries:
+                if entry.type.is_pyobject and not entry.in_with_gil_block:
+                    error(self.pos, "Function declared nogil has Python locals or temporaries")
+
+    def analyse_expressions(self, env):
+        self.local_scope.directives = env.directives
         if self.py_func_stat is not None:
             # this will also analyse the default values and the function name assignment
             self.py_func_stat = self.py_func_stat.analyse_expressions(env)
         elif self.py_func is not None:
-            # this will also analyse the default values 
-            self.py_func = self.py_func.analyse_expressions(env) 
-        else: 
-            self.analyse_default_values(env) 
+            # this will also analyse the default values
+            self.py_func = self.py_func.analyse_expressions(env)
+        else:
+            self.analyse_default_values(env)
             self.analyse_annotations(env)
-        self.acquire_gil = self.need_gil_acquisition(self.local_scope) 
-        return self 
- 
-    def needs_assignment_synthesis(self, env, code=None): 
-        return False 
- 
+        self.acquire_gil = self.need_gil_acquisition(self.local_scope)
+        return self
+
+    def needs_assignment_synthesis(self, env, code=None):
+        return False
+
     def generate_function_header(self, code, with_pymethdef, with_opt_args=1, with_dispatch=1, cname=None):
-        scope = self.local_scope 
-        arg_decls = [] 
-        type = self.type 
-        for arg in type.args[:len(type.args)-type.optional_arg_count]: 
-            arg_decl = arg.declaration_code() 
-            entry = scope.lookup(arg.name) 
-            if not entry.cf_used: 
-                arg_decl = 'CYTHON_UNUSED %s' % arg_decl 
-            arg_decls.append(arg_decl) 
-        if with_dispatch and self.overridable: 
-            dispatch_arg = PyrexTypes.c_int_type.declaration_code( 
-                Naming.skip_dispatch_cname) 
-            if self.override: 
-                arg_decls.append(dispatch_arg) 
-            else: 
-                arg_decls.append('CYTHON_UNUSED %s' % dispatch_arg) 
-        if type.optional_arg_count and with_opt_args: 
-            arg_decls.append(type.op_arg_struct.declaration_code(Naming.optional_args_cname)) 
-        if type.has_varargs: 
-            arg_decls.append("...") 
-        if not arg_decls: 
-            arg_decls = ["void"] 
-        if cname is None: 
-            cname = self.entry.func_cname 
-        entity = type.function_header_code(cname, ', '.join(arg_decls)) 
-        if self.entry.visibility == 'private' and '::' not in cname: 
-            storage_class = "static " 
-        else: 
-            storage_class = "" 
-        dll_linkage = None 
-        modifiers = code.build_function_modifiers(self.entry.func_modifiers) 
- 
-        header = self.return_type.declaration_code(entity, dll_linkage=dll_linkage) 
-        #print (storage_class, modifiers, header) 
-        needs_proto = self.is_c_class_method 
-        if self.template_declaration: 
-            if needs_proto: 
-                code.globalstate.parts['module_declarations'].putln(self.template_declaration) 
-            code.putln(self.template_declaration) 
-        if needs_proto: 
+        scope = self.local_scope
+        arg_decls = []
+        type = self.type
+        for arg in type.args[:len(type.args)-type.optional_arg_count]:
+            arg_decl = arg.declaration_code()
+            entry = scope.lookup(arg.name)
+            if not entry.cf_used:
+                arg_decl = 'CYTHON_UNUSED %s' % arg_decl
+            arg_decls.append(arg_decl)
+        if with_dispatch and self.overridable:
+            dispatch_arg = PyrexTypes.c_int_type.declaration_code(
+                Naming.skip_dispatch_cname)
+            if self.override:
+                arg_decls.append(dispatch_arg)
+            else:
+                arg_decls.append('CYTHON_UNUSED %s' % dispatch_arg)
+        if type.optional_arg_count and with_opt_args:
+            arg_decls.append(type.op_arg_struct.declaration_code(Naming.optional_args_cname))
+        if type.has_varargs:
+            arg_decls.append("...")
+        if not arg_decls:
+            arg_decls = ["void"]
+        if cname is None:
+            cname = self.entry.func_cname
+        entity = type.function_header_code(cname, ', '.join(arg_decls))
+        if self.entry.visibility == 'private' and '::' not in cname:
+            storage_class = "static "
+        else:
+            storage_class = ""
+        dll_linkage = None
+        modifiers = code.build_function_modifiers(self.entry.func_modifiers)
+
+        header = self.return_type.declaration_code(entity, dll_linkage=dll_linkage)
+        #print (storage_class, modifiers, header)
+        needs_proto = self.is_c_class_method
+        if self.template_declaration:
+            if needs_proto:
+                code.globalstate.parts['module_declarations'].putln(self.template_declaration)
+            code.putln(self.template_declaration)
+        if needs_proto:
             code.globalstate.parts['module_declarations'].putln(
                 "%s%s%s; /* proto*/" % (storage_class, modifiers, header))
-        code.putln("%s%s%s {" % (storage_class, modifiers, header)) 
- 
-    def generate_argument_declarations(self, env, code): 
-        scope = self.local_scope 
-        for arg in self.args: 
-            if arg.default: 
-                entry = scope.lookup(arg.name) 
-                if self.override or entry.cf_used: 
-                    result = arg.calculate_default_value_code(code) 
-                    code.putln('%s = %s;' % ( 
-                        arg.type.declaration_code(arg.cname), result)) 
- 
-    def generate_keyword_list(self, code): 
-        pass 
- 
-    def generate_argument_parsing_code(self, env, code): 
-        i = 0 
-        used = 0 
-        scope = self.local_scope 
-        if self.type.optional_arg_count: 
-            code.putln('if (%s) {' % Naming.optional_args_cname) 
-            for arg in self.args: 
-                if arg.default: 
-                    entry = scope.lookup(arg.name) 
-                    if self.override or entry.cf_used: 
-                        code.putln('if (%s->%sn > %s) {' % 
-                                   (Naming.optional_args_cname, 
-                                    Naming.pyrex_prefix, i)) 
-                        declarator = arg.declarator 
-                        while not hasattr(declarator, 'name'): 
-                            declarator = declarator.base 
-                        code.putln('%s = %s->%s;' % 
-                                   (arg.cname, Naming.optional_args_cname, 
-                                    self.type.opt_arg_cname(declarator.name))) 
-                        used += 1 
-                    i += 1 
-            for _ in range(used): 
-                code.putln('}') 
-            code.putln('}') 
- 
-        # Move arguments into closure if required 
-        def put_into_closure(entry): 
-            if entry.in_closure and not arg.default: 
-                code.putln('%s = %s;' % (entry.cname, entry.original_cname)) 
-                code.put_var_incref(entry) 
-                code.put_var_giveref(entry) 
-        for arg in self.args: 
-            put_into_closure(scope.lookup_here(arg.name)) 
- 
- 
-    def generate_argument_conversion_code(self, code): 
-        pass 
- 
-    def generate_argument_type_tests(self, code): 
-        # Generate type tests for args whose type in a parent 
-        # class is a supertype of the declared type. 
-        for arg in self.type.args: 
-            if arg.needs_type_test: 
-                self.generate_arg_type_test(arg, code) 
-            elif arg.type.is_pyobject and not arg.accept_none: 
-                self.generate_arg_none_check(arg, code) 
- 
-    def generate_execution_code(self, code): 
+        code.putln("%s%s%s {" % (storage_class, modifiers, header))
+
+    def generate_argument_declarations(self, env, code):
+        scope = self.local_scope
+        for arg in self.args:
+            if arg.default:
+                entry = scope.lookup(arg.name)
+                if self.override or entry.cf_used:
+                    result = arg.calculate_default_value_code(code)
+                    code.putln('%s = %s;' % (
+                        arg.type.declaration_code(arg.cname), result))
+
+    def generate_keyword_list(self, code):
+        pass
+
+    def generate_argument_parsing_code(self, env, code):
+        i = 0
+        used = 0
+        scope = self.local_scope
+        if self.type.optional_arg_count:
+            code.putln('if (%s) {' % Naming.optional_args_cname)
+            for arg in self.args:
+                if arg.default:
+                    entry = scope.lookup(arg.name)
+                    if self.override or entry.cf_used:
+                        code.putln('if (%s->%sn > %s) {' %
+                                   (Naming.optional_args_cname,
+                                    Naming.pyrex_prefix, i))
+                        declarator = arg.declarator
+                        while not hasattr(declarator, 'name'):
+                            declarator = declarator.base
+                        code.putln('%s = %s->%s;' %
+                                   (arg.cname, Naming.optional_args_cname,
+                                    self.type.opt_arg_cname(declarator.name)))
+                        used += 1
+                    i += 1
+            for _ in range(used):
+                code.putln('}')
+            code.putln('}')
+
+        # Move arguments into closure if required
+        def put_into_closure(entry):
+            if entry.in_closure and not arg.default:
+                code.putln('%s = %s;' % (entry.cname, entry.original_cname))
+                code.put_var_incref(entry)
+                code.put_var_giveref(entry)
+        for arg in self.args:
+            put_into_closure(scope.lookup_here(arg.name))
+
+
+    def generate_argument_conversion_code(self, code):
+        pass
+
+    def generate_argument_type_tests(self, code):
+        # Generate type tests for args whose type in a parent
+        # class is a supertype of the declared type.
+        for arg in self.type.args:
+            if arg.needs_type_test:
+                self.generate_arg_type_test(arg, code)
+            elif arg.type.is_pyobject and not arg.accept_none:
+                self.generate_arg_none_check(arg, code)
+
+    def generate_execution_code(self, code):
         if code.globalstate.directives['linetrace']:
             code.mark_pos(self.pos)
             code.putln("")  # generate line tracing code
-        super(CFuncDefNode, self).generate_execution_code(code) 
-        if self.py_func_stat: 
-            self.py_func_stat.generate_execution_code(code) 
- 
-    def error_value(self): 
-        if self.return_type.is_pyobject: 
-            return "0" 
-        else: 
-            #return None 
-            return self.entry.type.exception_value 
- 
-    def caller_will_check_exceptions(self): 
-        return self.entry.type.exception_check 
- 
-    def generate_wrapper_functions(self, code): 
-        # If the C signature of a function has changed, we need to generate 
-        # wrappers to put in the slots here. 
-        k = 0 
-        entry = self.entry 
-        func_type = entry.type 
-        while entry.prev_entry is not None: 
-            k += 1 
-            entry = entry.prev_entry 
-            entry.func_cname = "%s%swrap_%s" % (self.entry.func_cname, Naming.pyrex_prefix, k) 
-            code.putln() 
+        super(CFuncDefNode, self).generate_execution_code(code)
+        if self.py_func_stat:
+            self.py_func_stat.generate_execution_code(code)
+
+    def error_value(self):
+        if self.return_type.is_pyobject:
+            return "0"
+        else:
+            #return None
+            return self.entry.type.exception_value
+
+    def caller_will_check_exceptions(self):
+        return self.entry.type.exception_check
+
+    def generate_wrapper_functions(self, code):
+        # If the C signature of a function has changed, we need to generate
+        # wrappers to put in the slots here.
+        k = 0
+        entry = self.entry
+        func_type = entry.type
+        while entry.prev_entry is not None:
+            k += 1
+            entry = entry.prev_entry
+            entry.func_cname = "%s%swrap_%s" % (self.entry.func_cname, Naming.pyrex_prefix, k)
+            code.putln()
             self.generate_function_header(
                 code, 0,
                 with_dispatch=entry.type.is_overridable,
                 with_opt_args=entry.type.optional_arg_count,
                 cname=entry.func_cname)
-            if not self.return_type.is_void: 
-                code.put('return ') 
-            args = self.type.args 
-            arglist = [arg.cname for arg in args[:len(args)-self.type.optional_arg_count]] 
-            if entry.type.is_overridable: 
-                arglist.append(Naming.skip_dispatch_cname) 
-            elif func_type.is_overridable: 
-                arglist.append('0') 
-            if entry.type.optional_arg_count: 
-                arglist.append(Naming.optional_args_cname) 
-            elif func_type.optional_arg_count: 
-                arglist.append('NULL') 
-            code.putln('%s(%s);' % (self.entry.func_cname, ', '.join(arglist))) 
-            code.putln('}') 
- 
- 
-class PyArgDeclNode(Node): 
-    # Argument which must be a Python object (used 
-    # for * and ** arguments). 
-    # 
-    # name        string 
-    # entry       Symtab.Entry 
-    # annotation  ExprNode or None   Py3 argument annotation 
-    child_attrs = [] 
-    is_self_arg = False 
-    is_type_arg = False 
- 
-    def generate_function_definitions(self, env, code): 
-        self.entry.generate_function_definitions(env, code) 
- 
-
-class DecoratorNode(Node): 
-    # A decorator 
-    # 
-    # decorator    NameNode or CallNode or AttributeNode 
-    child_attrs = ['decorator'] 
- 
- 
-class DefNode(FuncDefNode): 
-    # A Python function definition. 
-    # 
-    # name          string                 the Python name of the function 
-    # lambda_name   string                 the internal name of a lambda 'function' 
-    # decorators    [DecoratorNode]        list of decorators 
-    # args          [CArgDeclNode]         formal arguments 
-    # doc           EncodedString or None 
-    # body          StatListNode 
-    # return_type_annotation 
-    #               ExprNode or None       the Py3 return type annotation 
-    # 
-    #  The following subnode is constructed internally 
-    #  when the def statement is inside a Python class definition. 
-    # 
-    #  fused_py_func        DefNode     The original fused cpdef DefNode 
-    #                                   (in case this is a specialization) 
-    #  specialized_cpdefs   [DefNode]   list of specialized cpdef DefNodes 
-    #  py_cfunc_node  PyCFunctionNode/InnerFunctionNode   The PyCFunction to create and assign 
-    # 
-    # decorator_indirection IndirectionNode Used to remove __Pyx_Method_ClassMethod for fused functions 
- 
+            if not self.return_type.is_void:
+                code.put('return ')
+            args = self.type.args
+            arglist = [arg.cname for arg in args[:len(args)-self.type.optional_arg_count]]
+            if entry.type.is_overridable:
+                arglist.append(Naming.skip_dispatch_cname)
+            elif func_type.is_overridable:
+                arglist.append('0')
+            if entry.type.optional_arg_count:
+                arglist.append(Naming.optional_args_cname)
+            elif func_type.optional_arg_count:
+                arglist.append('NULL')
+            code.putln('%s(%s);' % (self.entry.func_cname, ', '.join(arglist)))
+            code.putln('}')
+
+
+class PyArgDeclNode(Node):
+    # Argument which must be a Python object (used
+    # for * and ** arguments).
+    #
+    # name        string
+    # entry       Symtab.Entry
+    # annotation  ExprNode or None   Py3 argument annotation
+    child_attrs = []
+    is_self_arg = False
+    is_type_arg = False
+
+    def generate_function_definitions(self, env, code):
+        self.entry.generate_function_definitions(env, code)
+
+
+class DecoratorNode(Node):
+    # A decorator
+    #
+    # decorator    NameNode or CallNode or AttributeNode
+    child_attrs = ['decorator']
+
+
+class DefNode(FuncDefNode):
+    # A Python function definition.
+    #
+    # name          string                 the Python name of the function
+    # lambda_name   string                 the internal name of a lambda 'function'
+    # decorators    [DecoratorNode]        list of decorators
+    # args          [CArgDeclNode]         formal arguments
+    # doc           EncodedString or None
+    # body          StatListNode
+    # return_type_annotation
+    #               ExprNode or None       the Py3 return type annotation
+    #
+    #  The following subnode is constructed internally
+    #  when the def statement is inside a Python class definition.
+    #
+    #  fused_py_func        DefNode     The original fused cpdef DefNode
+    #                                   (in case this is a specialization)
+    #  specialized_cpdefs   [DefNode]   list of specialized cpdef DefNodes
+    #  py_cfunc_node  PyCFunctionNode/InnerFunctionNode   The PyCFunction to create and assign
+    #
+    # decorator_indirection IndirectionNode Used to remove __Pyx_Method_ClassMethod for fused functions
+
     child_attrs = ["args", "star_arg", "starstar_arg", "body", "decorators", "return_type_annotation"]
     outer_attrs = ["decorators", "return_type_annotation"]
- 
+
     is_staticmethod = False
     is_classmethod = False
 
-    lambda_name = None 
-    reqd_kw_flags_cname = "0" 
-    is_wrapper = 0 
-    no_assignment_synthesis = 0 
-    decorators = None 
-    return_type_annotation = None 
-    entry = None 
-    acquire_gil = 0 
-    self_in_stararg = 0 
-    py_cfunc_node = None 
-    requires_classobj = False 
-    defaults_struct = None # Dynamic kwrds structure name 
-    doc = None 
- 
-    fused_py_func = False 
-    specialized_cpdefs = None 
-    py_wrapper = None 
-    py_wrapper_required = True 
-    func_cname = None 
- 
-    defaults_getter = None 
- 
-    def __init__(self, pos, **kwds): 
-        FuncDefNode.__init__(self, pos, **kwds) 
-        k = rk = r = 0 
-        for arg in self.args: 
-            if arg.kw_only: 
-                k += 1 
-                if not arg.default: 
-                    rk += 1 
-            if not arg.default: 
-                r += 1 
-        self.num_kwonly_args = k 
-        self.num_required_kw_args = rk 
-        self.num_required_args = r 
- 
+    lambda_name = None
+    reqd_kw_flags_cname = "0"
+    is_wrapper = 0
+    no_assignment_synthesis = 0
+    decorators = None
+    return_type_annotation = None
+    entry = None
+    acquire_gil = 0
+    self_in_stararg = 0
+    py_cfunc_node = None
+    requires_classobj = False
+    defaults_struct = None # Dynamic kwrds structure name
+    doc = None
+
+    fused_py_func = False
+    specialized_cpdefs = None
+    py_wrapper = None
+    py_wrapper_required = True
+    func_cname = None
+
+    defaults_getter = None
+
+    def __init__(self, pos, **kwds):
+        FuncDefNode.__init__(self, pos, **kwds)
+        k = rk = r = 0
+        for arg in self.args:
+            if arg.kw_only:
+                k += 1
+                if not arg.default:
+                    rk += 1
+            if not arg.default:
+                r += 1
+        self.num_kwonly_args = k
+        self.num_required_kw_args = rk
+        self.num_required_args = r
+
     def as_cfunction(self, cfunc=None, scope=None, overridable=True, returns=None, except_val=None, modifiers=None,
                      nogil=False, with_gil=False):
-        if self.star_arg: 
-            error(self.star_arg.pos, "cdef function cannot have star argument") 
-        if self.starstar_arg: 
-            error(self.starstar_arg.pos, "cdef function cannot have starstar argument") 
+        if self.star_arg:
+            error(self.star_arg.pos, "cdef function cannot have star argument")
+        if self.starstar_arg:
+            error(self.starstar_arg.pos, "cdef function cannot have starstar argument")
         exception_value, exception_check = except_val or (None, False)
 
-        if cfunc is None: 
-            cfunc_args = [] 
-            for formal_arg in self.args: 
-                name_declarator, type = formal_arg.analyse(scope, nonempty=1) 
+        if cfunc is None:
+            cfunc_args = []
+            for formal_arg in self.args:
+                name_declarator, type = formal_arg.analyse(scope, nonempty=1)
                 cfunc_args.append(PyrexTypes.CFuncTypeArg(name=name_declarator.name,
                                                           cname=None,
                                                           annotation=formal_arg.annotation,
@@ -2816,26 +2816,26 @@ class DefNode(FuncDefNode):
                                               nogil=nogil,
                                               with_gil=with_gil,
                                               is_overridable=overridable)
-            cfunc = CVarDefNode(self.pos, type=cfunc_type) 
-        else: 
-            if scope is None: 
-                scope = cfunc.scope 
-            cfunc_type = cfunc.type 
-            if len(self.args) != len(cfunc_type.args) or cfunc_type.has_varargs: 
-                error(self.pos, "wrong number of arguments") 
-                error(cfunc.pos, "previous declaration here") 
-            for i, (formal_arg, type_arg) in enumerate(zip(self.args, cfunc_type.args)): 
-                name_declarator, type = formal_arg.analyse(scope, nonempty=1, 
+            cfunc = CVarDefNode(self.pos, type=cfunc_type)
+        else:
+            if scope is None:
+                scope = cfunc.scope
+            cfunc_type = cfunc.type
+            if len(self.args) != len(cfunc_type.args) or cfunc_type.has_varargs:
+                error(self.pos, "wrong number of arguments")
+                error(cfunc.pos, "previous declaration here")
+            for i, (formal_arg, type_arg) in enumerate(zip(self.args, cfunc_type.args)):
+                name_declarator, type = formal_arg.analyse(scope, nonempty=1,
                                                            is_self_arg=(i == 0 and scope.is_c_class_scope))
-                if type is None or type is PyrexTypes.py_object_type: 
-                    formal_arg.type = type_arg.type 
-                    formal_arg.name_declarator = name_declarator 
+                if type is None or type is PyrexTypes.py_object_type:
+                    formal_arg.type = type_arg.type
+                    formal_arg.name_declarator = name_declarator
 
         if exception_value is None and cfunc_type.exception_value is not None:
             from .ExprNodes import ConstNode
             exception_value = ConstNode(
                 self.pos, value=cfunc_type.exception_value, type=cfunc_type.return_type)
-        declarator = CFuncDeclaratorNode(self.pos, 
+        declarator = CFuncDeclaratorNode(self.pos,
                                          base=CNameDeclaratorNode(self.pos, name=self.name, cname=None),
                                          args=self.args,
                                          has_varargs=False,
@@ -2843,7 +2843,7 @@ class DefNode(FuncDefNode):
                                          exception_value=exception_value,
                                          with_gil=cfunc_type.with_gil,
                                          nogil=cfunc_type.nogil)
-        return CFuncDefNode(self.pos, 
+        return CFuncDefNode(self.pos,
                             modifiers=modifiers or [],
                             base_type=CAnalysedBaseTypeNode(self.pos, type=cfunc_type.return_type),
                             declarator=declarator,
@@ -2857,44 +2857,44 @@ class DefNode(FuncDefNode):
                             api=False,
                             directive_locals=getattr(cfunc, 'directive_locals', {}),
                             directive_returns=returns)
- 
-    def is_cdef_func_compatible(self): 
-        """Determines if the function's signature is compatible with a 
-        cdef function.  This can be used before calling 
-        .as_cfunction() to see if that will be successful. 
-        """ 
-        if self.needs_closure: 
-            return False 
-        if self.star_arg or self.starstar_arg: 
-            return False 
-        return True 
- 
-    def analyse_declarations(self, env): 
-        if self.decorators: 
-            for decorator in self.decorators: 
-                func = decorator.decorator 
-                if func.is_name: 
-                    self.is_classmethod |= func.name == 'classmethod' 
-                    self.is_staticmethod |= func.name == 'staticmethod' 
- 
-        if self.is_classmethod and env.lookup_here('classmethod'): 
-            # classmethod() was overridden - not much we can do here ... 
-            self.is_classmethod = False 
-        if self.is_staticmethod and env.lookup_here('staticmethod'): 
-            # staticmethod() was overridden - not much we can do here ... 
-            self.is_staticmethod = False 
- 
-        if self.name == '__new__' and env.is_py_class_scope: 
-            self.is_staticmethod = 1 
- 
-        self.analyse_argument_types(env) 
-        if self.name == '<lambda>': 
-            self.declare_lambda_function(env) 
-        else: 
-            self.declare_pyfunction(env) 
- 
-        self.analyse_signature(env) 
-        self.return_type = self.entry.signature.return_type() 
+
+    def is_cdef_func_compatible(self):
+        """Determines if the function's signature is compatible with a
+        cdef function.  This can be used before calling
+        .as_cfunction() to see if that will be successful.
+        """
+        if self.needs_closure:
+            return False
+        if self.star_arg or self.starstar_arg:
+            return False
+        return True
+
+    def analyse_declarations(self, env):
+        if self.decorators:
+            for decorator in self.decorators:
+                func = decorator.decorator
+                if func.is_name:
+                    self.is_classmethod |= func.name == 'classmethod'
+                    self.is_staticmethod |= func.name == 'staticmethod'
+
+        if self.is_classmethod and env.lookup_here('classmethod'):
+            # classmethod() was overridden - not much we can do here ...
+            self.is_classmethod = False
+        if self.is_staticmethod and env.lookup_here('staticmethod'):
+            # staticmethod() was overridden - not much we can do here ...
+            self.is_staticmethod = False
+
+        if self.name == '__new__' and env.is_py_class_scope:
+            self.is_staticmethod = 1
+
+        self.analyse_argument_types(env)
+        if self.name == '<lambda>':
+            self.declare_lambda_function(env)
+        else:
+            self.declare_pyfunction(env)
+
+        self.analyse_signature(env)
+        self.return_type = self.entry.signature.return_type()
         # if a signature annotation provides a more specific return object type, use it
         if self.return_type is py_object_type and self.return_type_annotation:
             if env.directives['annotation_typing'] and not self.entry.is_special:
@@ -2902,30 +2902,30 @@ class DefNode(FuncDefNode):
                 if return_type and return_type.is_pyobject:
                     self.return_type = return_type
 
-        self.create_local_scope(env) 
- 
-        self.py_wrapper = DefNodeWrapper( 
-            self.pos, 
-            target=self, 
-            name=self.entry.name, 
-            args=self.args, 
-            star_arg=self.star_arg, 
-            starstar_arg=self.starstar_arg, 
-            return_type=self.return_type) 
-        self.py_wrapper.analyse_declarations(env) 
- 
-    def analyse_argument_types(self, env): 
+        self.create_local_scope(env)
+
+        self.py_wrapper = DefNodeWrapper(
+            self.pos,
+            target=self,
+            name=self.entry.name,
+            args=self.args,
+            star_arg=self.star_arg,
+            starstar_arg=self.starstar_arg,
+            return_type=self.return_type)
+        self.py_wrapper.analyse_declarations(env)
+
+    def analyse_argument_types(self, env):
         self.directive_locals = env.directives.get('locals', {})
-        allow_none_for_extension_args = env.directives['allow_none_for_extension_args'] 
- 
-        f2s = env.fused_to_specific 
-        env.fused_to_specific = None 
- 
-        for arg in self.args: 
-            if hasattr(arg, 'name'): 
-                name_declarator = None 
-            else: 
-                base_type = arg.base_type.analyse(env) 
+        allow_none_for_extension_args = env.directives['allow_none_for_extension_args']
+
+        f2s = env.fused_to_specific
+        env.fused_to_specific = None
+
+        for arg in self.args:
+            if hasattr(arg, 'name'):
+                name_declarator = None
+            else:
+                base_type = arg.base_type.analyse(env)
                 # If we hare in pythran mode and we got a buffer supported by
                 # Pythran, we change this node to a fused type
                 if has_np_pythran(env) and base_type.is_pythran_expr:
@@ -2933,308 +2933,308 @@ class DefNode(FuncDefNode):
                         base_type,
                         #PyrexTypes.PythranExpr(pythran_type(self.type, "numpy_texpr")),
                         base_type.org_buffer])
-                name_declarator, type = \ 
-                    arg.declarator.analyse(base_type, env) 
-                arg.name = name_declarator.name 
-                arg.type = type 
- 
-                if type.is_fused: 
-                    self.has_fused_arguments = True 
- 
-            self.align_argument_type(env, arg) 
-            if name_declarator and name_declarator.cname: 
+                name_declarator, type = \
+                    arg.declarator.analyse(base_type, env)
+                arg.name = name_declarator.name
+                arg.type = type
+
+                if type.is_fused:
+                    self.has_fused_arguments = True
+
+            self.align_argument_type(env, arg)
+            if name_declarator and name_declarator.cname:
                 error(self.pos, "Python function argument cannot have C name specification")
-            arg.type = arg.type.as_argument_type() 
-            arg.hdr_type = None 
-            arg.needs_conversion = 0 
-            arg.needs_type_test = 0 
-            arg.is_generic = 1 
-            if arg.type.is_pyobject or arg.type.is_buffer or arg.type.is_memoryviewslice: 
-                if arg.or_none: 
-                    arg.accept_none = True 
-                elif arg.not_none: 
-                    arg.accept_none = False 
-                elif (arg.type.is_extension_type or arg.type.is_builtin_type 
+            arg.type = arg.type.as_argument_type()
+            arg.hdr_type = None
+            arg.needs_conversion = 0
+            arg.needs_type_test = 0
+            arg.is_generic = 1
+            if arg.type.is_pyobject or arg.type.is_buffer or arg.type.is_memoryviewslice:
+                if arg.or_none:
+                    arg.accept_none = True
+                elif arg.not_none:
+                    arg.accept_none = False
+                elif (arg.type.is_extension_type or arg.type.is_builtin_type
                         or arg.type.is_buffer or arg.type.is_memoryviewslice):
-                    if arg.default and arg.default.constant_result is None: 
-                        # special case: def func(MyType obj = None) 
-                        arg.accept_none = True 
-                    else: 
-                        # default depends on compiler directive 
-                        arg.accept_none = allow_none_for_extension_args 
-                else: 
-                    # probably just a plain 'object' 
-                    arg.accept_none = True 
-            else: 
-                arg.accept_none = True # won't be used, but must be there 
-                if arg.not_none: 
-                    error(arg.pos, "Only Python type arguments can have 'not None'") 
-                if arg.or_none: 
-                    error(arg.pos, "Only Python type arguments can have 'or None'") 
-        env.fused_to_specific = f2s 
- 
+                    if arg.default and arg.default.constant_result is None:
+                        # special case: def func(MyType obj = None)
+                        arg.accept_none = True
+                    else:
+                        # default depends on compiler directive
+                        arg.accept_none = allow_none_for_extension_args
+                else:
+                    # probably just a plain 'object'
+                    arg.accept_none = True
+            else:
+                arg.accept_none = True # won't be used, but must be there
+                if arg.not_none:
+                    error(arg.pos, "Only Python type arguments can have 'not None'")
+                if arg.or_none:
+                    error(arg.pos, "Only Python type arguments can have 'or None'")
+        env.fused_to_specific = f2s
+
         if has_np_pythran(env):
             self.np_args_idx = [i for i,a in enumerate(self.args) if a.type.is_numpy_buffer]
         else:
             self.np_args_idx = []
 
-    def analyse_signature(self, env): 
-        if self.entry.is_special: 
-            if self.decorators: 
-                error(self.pos, "special functions of cdef classes cannot have decorators") 
-            self.entry.trivial_signature = len(self.args) == 1 and not (self.star_arg or self.starstar_arg) 
-        elif not env.directives['always_allow_keywords'] and not (self.star_arg or self.starstar_arg): 
-            # Use the simpler calling signature for zero- and one-argument functions. 
-            if self.entry.signature is TypeSlots.pyfunction_signature: 
-                if len(self.args) == 0: 
-                    self.entry.signature = TypeSlots.pyfunction_noargs 
-                elif len(self.args) == 1: 
-                    if self.args[0].default is None and not self.args[0].kw_only: 
-                        self.entry.signature = TypeSlots.pyfunction_onearg 
-            elif self.entry.signature is TypeSlots.pymethod_signature: 
-                if len(self.args) == 1: 
-                    self.entry.signature = TypeSlots.unaryfunc 
-                elif len(self.args) == 2: 
-                    if self.args[1].default is None and not self.args[1].kw_only: 
-                        self.entry.signature = TypeSlots.ibinaryfunc 
- 
-        sig = self.entry.signature 
-        nfixed = sig.num_fixed_args() 
+    def analyse_signature(self, env):
+        if self.entry.is_special:
+            if self.decorators:
+                error(self.pos, "special functions of cdef classes cannot have decorators")
+            self.entry.trivial_signature = len(self.args) == 1 and not (self.star_arg or self.starstar_arg)
+        elif not env.directives['always_allow_keywords'] and not (self.star_arg or self.starstar_arg):
+            # Use the simpler calling signature for zero- and one-argument functions.
+            if self.entry.signature is TypeSlots.pyfunction_signature:
+                if len(self.args) == 0:
+                    self.entry.signature = TypeSlots.pyfunction_noargs
+                elif len(self.args) == 1:
+                    if self.args[0].default is None and not self.args[0].kw_only:
+                        self.entry.signature = TypeSlots.pyfunction_onearg
+            elif self.entry.signature is TypeSlots.pymethod_signature:
+                if len(self.args) == 1:
+                    self.entry.signature = TypeSlots.unaryfunc
+                elif len(self.args) == 2:
+                    if self.args[1].default is None and not self.args[1].kw_only:
+                        self.entry.signature = TypeSlots.ibinaryfunc
+
+        sig = self.entry.signature
+        nfixed = sig.num_fixed_args()
         if (sig is TypeSlots.pymethod_signature and nfixed == 1
                and len(self.args) == 0 and self.star_arg):
-            # this is the only case where a diverging number of 
-            # arguments is not an error - when we have no explicit 
-            # 'self' parameter as in method(*args) 
-            sig = self.entry.signature = TypeSlots.pyfunction_signature # self is not 'really' used 
-            self.self_in_stararg = 1 
-            nfixed = 0 
- 
-        if self.is_staticmethod and env.is_c_class_scope: 
-            nfixed = 0 
-            self.self_in_stararg = True  # FIXME: why for staticmethods? 
- 
-            self.entry.signature = sig = copy.copy(sig) 
-            sig.fixed_arg_format = "*" 
-            sig.is_staticmethod = True 
-            sig.has_generic_args = True 
- 
-        if ((self.is_classmethod or self.is_staticmethod) and 
+            # this is the only case where a diverging number of
+            # arguments is not an error - when we have no explicit
+            # 'self' parameter as in method(*args)
+            sig = self.entry.signature = TypeSlots.pyfunction_signature # self is not 'really' used
+            self.self_in_stararg = 1
+            nfixed = 0
+
+        if self.is_staticmethod and env.is_c_class_scope:
+            nfixed = 0
+            self.self_in_stararg = True  # FIXME: why for staticmethods?
+
+            self.entry.signature = sig = copy.copy(sig)
+            sig.fixed_arg_format = "*"
+            sig.is_staticmethod = True
+            sig.has_generic_args = True
+
+        if ((self.is_classmethod or self.is_staticmethod) and
                 self.has_fused_arguments and env.is_c_class_scope):
-            del self.decorator_indirection.stats[:] 
- 
-        for i in range(min(nfixed, len(self.args))): 
-            arg = self.args[i] 
-            arg.is_generic = 0 
-            if sig.is_self_arg(i) and not self.is_staticmethod: 
-                if self.is_classmethod: 
-                    arg.is_type_arg = 1 
-                    arg.hdr_type = arg.type = Builtin.type_type 
-                else: 
-                    arg.is_self_arg = 1 
-                    arg.hdr_type = arg.type = env.parent_type 
-                arg.needs_conversion = 0 
-            else: 
-                arg.hdr_type = sig.fixed_arg_type(i) 
-                if not arg.type.same_as(arg.hdr_type): 
-                    if arg.hdr_type.is_pyobject and arg.type.is_pyobject: 
-                        arg.needs_type_test = 1 
-                    else: 
-                        arg.needs_conversion = 1 
-            if arg.needs_conversion: 
-                arg.hdr_cname = Naming.arg_prefix + arg.name 
-            else: 
-                arg.hdr_cname = Naming.var_prefix + arg.name 
- 
-        if nfixed > len(self.args): 
-            self.bad_signature() 
-            return 
-        elif nfixed < len(self.args): 
-            if not sig.has_generic_args: 
-                self.bad_signature() 
-            for arg in self.args: 
+            del self.decorator_indirection.stats[:]
+
+        for i in range(min(nfixed, len(self.args))):
+            arg = self.args[i]
+            arg.is_generic = 0
+            if sig.is_self_arg(i) and not self.is_staticmethod:
+                if self.is_classmethod:
+                    arg.is_type_arg = 1
+                    arg.hdr_type = arg.type = Builtin.type_type
+                else:
+                    arg.is_self_arg = 1
+                    arg.hdr_type = arg.type = env.parent_type
+                arg.needs_conversion = 0
+            else:
+                arg.hdr_type = sig.fixed_arg_type(i)
+                if not arg.type.same_as(arg.hdr_type):
+                    if arg.hdr_type.is_pyobject and arg.type.is_pyobject:
+                        arg.needs_type_test = 1
+                    else:
+                        arg.needs_conversion = 1
+            if arg.needs_conversion:
+                arg.hdr_cname = Naming.arg_prefix + arg.name
+            else:
+                arg.hdr_cname = Naming.var_prefix + arg.name
+
+        if nfixed > len(self.args):
+            self.bad_signature()
+            return
+        elif nfixed < len(self.args):
+            if not sig.has_generic_args:
+                self.bad_signature()
+            for arg in self.args:
                 if arg.is_generic and (arg.type.is_extension_type or arg.type.is_builtin_type):
-                    arg.needs_type_test = 1 
- 
-    def bad_signature(self): 
-        sig = self.entry.signature 
-        expected_str = "%d" % sig.num_fixed_args() 
-        if sig.has_generic_args: 
-            expected_str += " or more" 
-        name = self.name 
-        if name.startswith("__") and name.endswith("__"): 
-            desc = "Special method" 
-        else: 
-            desc = "Method" 
+                    arg.needs_type_test = 1
+
+    def bad_signature(self):
+        sig = self.entry.signature
+        expected_str = "%d" % sig.num_fixed_args()
+        if sig.has_generic_args:
+            expected_str += " or more"
+        name = self.name
+        if name.startswith("__") and name.endswith("__"):
+            desc = "Special method"
+        else:
+            desc = "Method"
         error(self.pos, "%s %s has wrong number of arguments (%d declared, %s expected)" % (
             desc, self.name, len(self.args), expected_str))
- 
-    def declare_pyfunction(self, env): 
-        #print "DefNode.declare_pyfunction:", self.name, "in", env ### 
-        name = self.name 
-        entry = env.lookup_here(name) 
-        if entry: 
-            if entry.is_final_cmethod and not env.parent_type.is_final_type: 
-                error(self.pos, "Only final types can have final Python (def/cpdef) methods") 
+
+    def declare_pyfunction(self, env):
+        #print "DefNode.declare_pyfunction:", self.name, "in", env ###
+        name = self.name
+        entry = env.lookup_here(name)
+        if entry:
+            if entry.is_final_cmethod and not env.parent_type.is_final_type:
+                error(self.pos, "Only final types can have final Python (def/cpdef) methods")
             if entry.type.is_cfunction and not entry.is_builtin_cmethod and not self.is_wrapper:
-                warning(self.pos, "Overriding cdef method with def method.", 5) 
-        entry = env.declare_pyfunction(name, self.pos, allow_redefine=not self.is_wrapper) 
-        self.entry = entry 
-        prefix = env.next_id(env.scope_prefix) 
-        self.entry.pyfunc_cname = Naming.pyfunc_prefix + prefix + name 
-        if Options.docstrings: 
-            entry.doc = embed_position(self.pos, self.doc) 
-            entry.doc_cname = Naming.funcdoc_prefix + prefix + name 
-            if entry.is_special: 
+                warning(self.pos, "Overriding cdef method with def method.", 5)
+        entry = env.declare_pyfunction(name, self.pos, allow_redefine=not self.is_wrapper)
+        self.entry = entry
+        prefix = env.next_id(env.scope_prefix)
+        self.entry.pyfunc_cname = Naming.pyfunc_prefix + prefix + name
+        if Options.docstrings:
+            entry.doc = embed_position(self.pos, self.doc)
+            entry.doc_cname = Naming.funcdoc_prefix + prefix + name
+            if entry.is_special:
                 if entry.name in TypeSlots.invisible or not entry.doc or (
                         entry.name in '__getattr__' and env.directives['fast_getattr']):
-                    entry.wrapperbase_cname = None 
-                else: 
-                    entry.wrapperbase_cname = Naming.wrapperbase_prefix + prefix + name 
-        else: 
-            entry.doc = None 
- 
-    def declare_lambda_function(self, env): 
-        entry = env.declare_lambda_function(self.lambda_name, self.pos) 
-        entry.doc = None 
-        self.entry = entry 
-        self.entry.pyfunc_cname = entry.cname 
- 
-    def declare_arguments(self, env): 
-        for arg in self.args: 
-            if not arg.name: 
-                error(arg.pos, "Missing argument name") 
-            if arg.needs_conversion: 
-                arg.entry = env.declare_var(arg.name, arg.type, arg.pos) 
-                if arg.type.is_pyobject: 
-                    arg.entry.init = "0" 
-            else: 
-                arg.entry = self.declare_argument(env, arg) 
-            arg.entry.is_arg = 1 
-            arg.entry.used = 1 
-            arg.entry.is_self_arg = arg.is_self_arg 
-        self.declare_python_arg(env, self.star_arg) 
-        self.declare_python_arg(env, self.starstar_arg) 
- 
-    def declare_python_arg(self, env, arg): 
-        if arg: 
-            if env.directives['infer_types'] != False: 
-                type = PyrexTypes.unspecified_type 
-            else: 
-                type = py_object_type 
-            entry = env.declare_var(arg.name, type, arg.pos) 
-            entry.is_arg = 1 
-            entry.used = 1 
-            entry.init = "0" 
-            entry.xdecref_cleanup = 1 
-            arg.entry = entry 
- 
-    def analyse_expressions(self, env): 
-        self.local_scope.directives = env.directives 
-        self.analyse_default_values(env) 
+                    entry.wrapperbase_cname = None
+                else:
+                    entry.wrapperbase_cname = Naming.wrapperbase_prefix + prefix + name
+        else:
+            entry.doc = None
+
+    def declare_lambda_function(self, env):
+        entry = env.declare_lambda_function(self.lambda_name, self.pos)
+        entry.doc = None
+        self.entry = entry
+        self.entry.pyfunc_cname = entry.cname
+
+    def declare_arguments(self, env):
+        for arg in self.args:
+            if not arg.name:
+                error(arg.pos, "Missing argument name")
+            if arg.needs_conversion:
+                arg.entry = env.declare_var(arg.name, arg.type, arg.pos)
+                if arg.type.is_pyobject:
+                    arg.entry.init = "0"
+            else:
+                arg.entry = self.declare_argument(env, arg)
+            arg.entry.is_arg = 1
+            arg.entry.used = 1
+            arg.entry.is_self_arg = arg.is_self_arg
+        self.declare_python_arg(env, self.star_arg)
+        self.declare_python_arg(env, self.starstar_arg)
+
+    def declare_python_arg(self, env, arg):
+        if arg:
+            if env.directives['infer_types'] != False:
+                type = PyrexTypes.unspecified_type
+            else:
+                type = py_object_type
+            entry = env.declare_var(arg.name, type, arg.pos)
+            entry.is_arg = 1
+            entry.used = 1
+            entry.init = "0"
+            entry.xdecref_cleanup = 1
+            arg.entry = entry
+
+    def analyse_expressions(self, env):
+        self.local_scope.directives = env.directives
+        self.analyse_default_values(env)
         self.analyse_annotations(env)
         if self.return_type_annotation:
             self.return_type_annotation = self.analyse_annotation(env, self.return_type_annotation)
- 
-        if not self.needs_assignment_synthesis(env) and self.decorators: 
-            for decorator in self.decorators[::-1]: 
-                decorator.decorator = decorator.decorator.analyse_expressions(env) 
- 
-        self.py_wrapper.prepare_argument_coercion(env) 
-        return self 
- 
-    def needs_assignment_synthesis(self, env, code=None): 
-        if self.is_staticmethod: 
-            return True 
+
+        if not self.needs_assignment_synthesis(env) and self.decorators:
+            for decorator in self.decorators[::-1]:
+                decorator.decorator = decorator.decorator.analyse_expressions(env)
+
+        self.py_wrapper.prepare_argument_coercion(env)
+        return self
+
+    def needs_assignment_synthesis(self, env, code=None):
+        if self.is_staticmethod:
+            return True
         if self.specialized_cpdefs or self.entry.is_fused_specialized:
-            return False 
-        if self.no_assignment_synthesis: 
-            return False 
+            return False
+        if self.no_assignment_synthesis:
+            return False
         if self.entry.is_special:
             return False
-        if self.entry.is_anonymous: 
-            return True 
+        if self.entry.is_anonymous:
+            return True
         if env.is_module_scope or env.is_c_class_scope:
-            if code is None: 
+            if code is None:
                 return self.local_scope.directives['binding']
-            else: 
-                return code.globalstate.directives['binding'] 
-        return env.is_py_class_scope or env.is_closure_scope 
- 
-    def error_value(self): 
-        return self.entry.signature.error_value 
- 
-    def caller_will_check_exceptions(self): 
-        return self.entry.signature.exception_check 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.defaults_getter: 
+            else:
+                return code.globalstate.directives['binding']
+        return env.is_py_class_scope or env.is_closure_scope
+
+    def error_value(self):
+        return self.entry.signature.error_value
+
+    def caller_will_check_exceptions(self):
+        return self.entry.signature.exception_check
+
+    def generate_function_definitions(self, env, code):
+        if self.defaults_getter:
             # defaults getter must never live in class scopes, it's always a module function
             self.defaults_getter.generate_function_definitions(env.global_scope(), code)
- 
-        # Before closure cnames are mangled 
-        if self.py_wrapper_required: 
-            # func_cname might be modified by @cname 
-            self.py_wrapper.func_cname = self.entry.func_cname 
-            self.py_wrapper.generate_function_definitions(env, code) 
-        FuncDefNode.generate_function_definitions(self, env, code) 
- 
-    def generate_function_header(self, code, with_pymethdef, proto_only=0): 
-        if proto_only: 
-            if self.py_wrapper_required: 
-                self.py_wrapper.generate_function_header( 
-                    code, with_pymethdef, True) 
-            return 
-        arg_code_list = [] 
-        if self.entry.signature.has_dummy_arg: 
-            self_arg = 'PyObject *%s' % Naming.self_cname 
-            if not self.needs_outer_scope: 
-                self_arg = 'CYTHON_UNUSED ' + self_arg 
-            arg_code_list.append(self_arg) 
- 
-        def arg_decl_code(arg): 
-            entry = arg.entry 
-            if entry.in_closure: 
-                cname = entry.original_cname 
-            else: 
-                cname = entry.cname 
-            decl = entry.type.declaration_code(cname) 
-            if not entry.cf_used: 
-                decl = 'CYTHON_UNUSED ' + decl 
-            return decl 
- 
-        for arg in self.args: 
-            arg_code_list.append(arg_decl_code(arg)) 
-        if self.star_arg: 
-            arg_code_list.append(arg_decl_code(self.star_arg)) 
-        if self.starstar_arg: 
-            arg_code_list.append(arg_decl_code(self.starstar_arg)) 
+
+        # Before closure cnames are mangled
+        if self.py_wrapper_required:
+            # func_cname might be modified by @cname
+            self.py_wrapper.func_cname = self.entry.func_cname
+            self.py_wrapper.generate_function_definitions(env, code)
+        FuncDefNode.generate_function_definitions(self, env, code)
+
+    def generate_function_header(self, code, with_pymethdef, proto_only=0):
+        if proto_only:
+            if self.py_wrapper_required:
+                self.py_wrapper.generate_function_header(
+                    code, with_pymethdef, True)
+            return
+        arg_code_list = []
+        if self.entry.signature.has_dummy_arg:
+            self_arg = 'PyObject *%s' % Naming.self_cname
+            if not self.needs_outer_scope:
+                self_arg = 'CYTHON_UNUSED ' + self_arg
+            arg_code_list.append(self_arg)
+
+        def arg_decl_code(arg):
+            entry = arg.entry
+            if entry.in_closure:
+                cname = entry.original_cname
+            else:
+                cname = entry.cname
+            decl = entry.type.declaration_code(cname)
+            if not entry.cf_used:
+                decl = 'CYTHON_UNUSED ' + decl
+            return decl
+
+        for arg in self.args:
+            arg_code_list.append(arg_decl_code(arg))
+        if self.star_arg:
+            arg_code_list.append(arg_decl_code(self.star_arg))
+        if self.starstar_arg:
+            arg_code_list.append(arg_decl_code(self.starstar_arg))
         if arg_code_list:
             arg_code = ', '.join(arg_code_list)
         else:
             arg_code = 'void'  # No arguments
-        dc = self.return_type.declaration_code(self.entry.pyfunc_cname) 
- 
-        decls_code = code.globalstate['decls'] 
-        preprocessor_guard = self.get_preprocessor_guard() 
-        if preprocessor_guard: 
-            decls_code.putln(preprocessor_guard) 
-        decls_code.putln( 
-            "static %s(%s); /* proto */" % (dc, arg_code)) 
-        if preprocessor_guard: 
-            decls_code.putln("#endif") 
-        code.putln("static %s(%s) {" % (dc, arg_code)) 
- 
-    def generate_argument_declarations(self, env, code): 
-        pass 
- 
-    def generate_keyword_list(self, code): 
-        pass 
- 
-    def generate_argument_parsing_code(self, env, code): 
-        # Move arguments into closure if required 
-        def put_into_closure(entry): 
-            if entry.in_closure: 
-                code.putln('%s = %s;' % (entry.cname, entry.original_cname)) 
+        dc = self.return_type.declaration_code(self.entry.pyfunc_cname)
+
+        decls_code = code.globalstate['decls']
+        preprocessor_guard = self.get_preprocessor_guard()
+        if preprocessor_guard:
+            decls_code.putln(preprocessor_guard)
+        decls_code.putln(
+            "static %s(%s); /* proto */" % (dc, arg_code))
+        if preprocessor_guard:
+            decls_code.putln("#endif")
+        code.putln("static %s(%s) {" % (dc, arg_code))
+
+    def generate_argument_declarations(self, env, code):
+        pass
+
+    def generate_keyword_list(self, code):
+        pass
+
+    def generate_argument_parsing_code(self, env, code):
+        # Move arguments into closure if required
+        def put_into_closure(entry):
+            if entry.in_closure:
+                code.putln('%s = %s;' % (entry.cname, entry.original_cname))
                 if entry.xdecref_cleanup:
                     # mostly applies to the starstar arg - this can sometimes be NULL
                     # so must be xincrefed instead
@@ -3243,54 +3243,54 @@ class DefNode(FuncDefNode):
                 else:
                     code.put_var_incref(entry)
                     code.put_var_giveref(entry)
-        for arg in self.args: 
-            put_into_closure(arg.entry) 
-        for arg in self.star_arg, self.starstar_arg: 
-            if arg: 
-                put_into_closure(arg.entry) 
- 
-    def generate_argument_type_tests(self, code): 
-        pass 
- 
- 
-class DefNodeWrapper(FuncDefNode): 
-    # DefNode python wrapper code generator 
- 
-    defnode = None 
-    target = None # Target DefNode 
- 
-    def __init__(self, *args, **kwargs): 
-        FuncDefNode.__init__(self, *args, **kwargs) 
-        self.num_kwonly_args = self.target.num_kwonly_args 
-        self.num_required_kw_args = self.target.num_required_kw_args 
-        self.num_required_args = self.target.num_required_args 
-        self.self_in_stararg = self.target.self_in_stararg 
-        self.signature = None 
- 
-    def analyse_declarations(self, env): 
-        target_entry = self.target.entry 
-        name = self.name 
-        prefix = env.next_id(env.scope_prefix) 
-        target_entry.func_cname = Naming.pywrap_prefix + prefix + name 
-        target_entry.pymethdef_cname = Naming.pymethdef_prefix + prefix + name 
- 
-        self.signature = target_entry.signature 
- 
+        for arg in self.args:
+            put_into_closure(arg.entry)
+        for arg in self.star_arg, self.starstar_arg:
+            if arg:
+                put_into_closure(arg.entry)
+
+    def generate_argument_type_tests(self, code):
+        pass
+
+
+class DefNodeWrapper(FuncDefNode):
+    # DefNode python wrapper code generator
+
+    defnode = None
+    target = None # Target DefNode
+
+    def __init__(self, *args, **kwargs):
+        FuncDefNode.__init__(self, *args, **kwargs)
+        self.num_kwonly_args = self.target.num_kwonly_args
+        self.num_required_kw_args = self.target.num_required_kw_args
+        self.num_required_args = self.target.num_required_args
+        self.self_in_stararg = self.target.self_in_stararg
+        self.signature = None
+
+    def analyse_declarations(self, env):
+        target_entry = self.target.entry
+        name = self.name
+        prefix = env.next_id(env.scope_prefix)
+        target_entry.func_cname = Naming.pywrap_prefix + prefix + name
+        target_entry.pymethdef_cname = Naming.pymethdef_prefix + prefix + name
+
+        self.signature = target_entry.signature
+
         self.np_args_idx = self.target.np_args_idx
 
-    def prepare_argument_coercion(self, env): 
-        # This is only really required for Cython utility code at this time, 
-        # everything else can be done during code generation.  But we expand 
-        # all utility code here, simply because we cannot easily distinguish 
-        # different code types. 
-        for arg in self.args: 
-            if not arg.type.is_pyobject: 
-                if not arg.type.create_from_py_utility_code(env): 
-                    pass # will fail later 
-            elif arg.hdr_type and not arg.hdr_type.is_pyobject: 
-                if not arg.hdr_type.create_to_py_utility_code(env): 
-                    pass # will fail later 
- 
+    def prepare_argument_coercion(self, env):
+        # This is only really required for Cython utility code at this time,
+        # everything else can be done during code generation.  But we expand
+        # all utility code here, simply because we cannot easily distinguish
+        # different code types.
+        for arg in self.args:
+            if not arg.type.is_pyobject:
+                if not arg.type.create_from_py_utility_code(env):
+                    pass # will fail later
+            elif arg.hdr_type and not arg.hdr_type.is_pyobject:
+                if not arg.hdr_type.create_to_py_utility_code(env):
+                    pass # will fail later
+
         if self.starstar_arg and not self.starstar_arg.entry.cf_used:
             # we will set the kwargs argument to NULL instead of a new dict
             # and must therefore correct the control flow state
@@ -3300,268 +3300,268 @@ class DefNodeWrapper(FuncDefNode):
                 if not ass.is_arg and ass.lhs.is_name:
                     ass.lhs.cf_maybe_null = True
 
-    def signature_has_nongeneric_args(self): 
-        argcount = len(self.args) 
-        if argcount == 0 or ( 
-                argcount == 1 and (self.args[0].is_self_arg or 
-                                   self.args[0].is_type_arg)): 
-            return 0 
-        return 1 
- 
-    def signature_has_generic_args(self): 
-        return self.signature.has_generic_args 
- 
-    def generate_function_body(self, code): 
-        args = [] 
-        if self.signature.has_dummy_arg: 
-            args.append(Naming.self_cname) 
-        for arg in self.args: 
-            if arg.hdr_type and not (arg.type.is_memoryviewslice or 
-                                     arg.type.is_struct or 
-                                     arg.type.is_complex): 
-                args.append(arg.type.cast_code(arg.entry.cname)) 
-            else: 
-                args.append(arg.entry.cname) 
-        if self.star_arg: 
-            args.append(self.star_arg.entry.cname) 
-        if self.starstar_arg: 
-            args.append(self.starstar_arg.entry.cname) 
-        args = ', '.join(args) 
-        if not self.return_type.is_void: 
-            code.put('%s = ' % Naming.retval_cname) 
-        code.putln('%s(%s);' % ( 
-            self.target.entry.pyfunc_cname, args)) 
- 
-    def generate_function_definitions(self, env, code): 
-        lenv = self.target.local_scope 
-        # Generate C code for header and body of function 
-        code.mark_pos(self.pos) 
-        code.putln("") 
-        code.putln("/* Python wrapper */") 
-        preprocessor_guard = self.target.get_preprocessor_guard() 
-        if preprocessor_guard: 
-            code.putln(preprocessor_guard) 
- 
+    def signature_has_nongeneric_args(self):
+        argcount = len(self.args)
+        if argcount == 0 or (
+                argcount == 1 and (self.args[0].is_self_arg or
+                                   self.args[0].is_type_arg)):
+            return 0
+        return 1
+
+    def signature_has_generic_args(self):
+        return self.signature.has_generic_args
+
+    def generate_function_body(self, code):
+        args = []
+        if self.signature.has_dummy_arg:
+            args.append(Naming.self_cname)
+        for arg in self.args:
+            if arg.hdr_type and not (arg.type.is_memoryviewslice or
+                                     arg.type.is_struct or
+                                     arg.type.is_complex):
+                args.append(arg.type.cast_code(arg.entry.cname))
+            else:
+                args.append(arg.entry.cname)
+        if self.star_arg:
+            args.append(self.star_arg.entry.cname)
+        if self.starstar_arg:
+            args.append(self.starstar_arg.entry.cname)
+        args = ', '.join(args)
+        if not self.return_type.is_void:
+            code.put('%s = ' % Naming.retval_cname)
+        code.putln('%s(%s);' % (
+            self.target.entry.pyfunc_cname, args))
+
+    def generate_function_definitions(self, env, code):
+        lenv = self.target.local_scope
+        # Generate C code for header and body of function
+        code.mark_pos(self.pos)
+        code.putln("")
+        code.putln("/* Python wrapper */")
+        preprocessor_guard = self.target.get_preprocessor_guard()
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+
         code.enter_cfunc_scope(lenv)
-        code.return_from_error_cleanup_label = code.new_label() 
- 
-        with_pymethdef = (self.target.needs_assignment_synthesis(env, code) or 
-                          self.target.pymethdef_required) 
-        self.generate_function_header(code, with_pymethdef) 
-        self.generate_argument_declarations(lenv, code) 
-        tempvardecl_code = code.insertion_point() 
- 
-        if self.return_type.is_pyobject: 
-            retval_init = ' = 0' 
-        else: 
-            retval_init = '' 
-        if not self.return_type.is_void: 
-            code.putln('%s%s;' % ( 
-                self.return_type.declaration_code(Naming.retval_cname), 
-                retval_init)) 
-        code.put_declare_refcount_context() 
-        code.put_setup_refcount_context('%s (wrapper)' % self.name) 
- 
-        self.generate_argument_parsing_code(lenv, code) 
-        self.generate_argument_type_tests(code) 
-        self.generate_function_body(code) 
- 
-        # ----- Go back and insert temp variable declarations 
-        tempvardecl_code.put_temp_declarations(code.funcstate) 
- 
-        code.mark_pos(self.pos) 
-        code.putln("") 
-        code.putln("/* function exit code */") 
- 
-        # ----- Error cleanup 
-        if code.error_label in code.labels_used: 
-            code.put_goto(code.return_label) 
-            code.put_label(code.error_label) 
-            for cname, type in code.funcstate.all_managed_temps(): 
-                code.put_xdecref(cname, type) 
-            err_val = self.error_value() 
-            if err_val is not None: 
-                code.putln("%s = %s;" % (Naming.retval_cname, err_val)) 
- 
-        # ----- Non-error return cleanup 
-        code.put_label(code.return_label) 
-        for entry in lenv.var_entries: 
-            if entry.is_arg and entry.type.is_pyobject: 
-                code.put_var_decref(entry) 
- 
-        code.put_finish_refcount_context() 
-        if not self.return_type.is_void: 
-            code.putln("return %s;" % Naming.retval_cname) 
-        code.putln('}') 
-        code.exit_cfunc_scope() 
-        if preprocessor_guard: 
-            code.putln("#endif /*!(%s)*/" % preprocessor_guard) 
- 
-    def generate_function_header(self, code, with_pymethdef, proto_only=0): 
-        arg_code_list = [] 
-        sig = self.signature 
- 
-        if sig.has_dummy_arg or self.self_in_stararg: 
-            arg_code = "PyObject *%s" % Naming.self_cname 
-            if not sig.has_dummy_arg: 
-                arg_code = 'CYTHON_UNUSED ' + arg_code 
-            arg_code_list.append(arg_code) 
- 
-        for arg in self.args: 
-            if not arg.is_generic: 
-                if arg.is_self_arg or arg.is_type_arg: 
-                    arg_code_list.append("PyObject *%s" % arg.hdr_cname) 
-                else: 
-                    arg_code_list.append( 
-                        arg.hdr_type.declaration_code(arg.hdr_cname)) 
-        entry = self.target.entry 
-        if not entry.is_special and sig.method_flags() == [TypeSlots.method_noargs]: 
-            arg_code_list.append("CYTHON_UNUSED PyObject *unused") 
-        if entry.scope.is_c_class_scope and entry.name == "__ipow__": 
-            arg_code_list.append("CYTHON_UNUSED PyObject *unused") 
-        if sig.has_generic_args: 
-            arg_code_list.append( 
+        code.return_from_error_cleanup_label = code.new_label()
+
+        with_pymethdef = (self.target.needs_assignment_synthesis(env, code) or
+                          self.target.pymethdef_required)
+        self.generate_function_header(code, with_pymethdef)
+        self.generate_argument_declarations(lenv, code)
+        tempvardecl_code = code.insertion_point()
+
+        if self.return_type.is_pyobject:
+            retval_init = ' = 0'
+        else:
+            retval_init = ''
+        if not self.return_type.is_void:
+            code.putln('%s%s;' % (
+                self.return_type.declaration_code(Naming.retval_cname),
+                retval_init))
+        code.put_declare_refcount_context()
+        code.put_setup_refcount_context('%s (wrapper)' % self.name)
+
+        self.generate_argument_parsing_code(lenv, code)
+        self.generate_argument_type_tests(code)
+        self.generate_function_body(code)
+
+        # ----- Go back and insert temp variable declarations
+        tempvardecl_code.put_temp_declarations(code.funcstate)
+
+        code.mark_pos(self.pos)
+        code.putln("")
+        code.putln("/* function exit code */")
+
+        # ----- Error cleanup
+        if code.error_label in code.labels_used:
+            code.put_goto(code.return_label)
+            code.put_label(code.error_label)
+            for cname, type in code.funcstate.all_managed_temps():
+                code.put_xdecref(cname, type)
+            err_val = self.error_value()
+            if err_val is not None:
+                code.putln("%s = %s;" % (Naming.retval_cname, err_val))
+
+        # ----- Non-error return cleanup
+        code.put_label(code.return_label)
+        for entry in lenv.var_entries:
+            if entry.is_arg and entry.type.is_pyobject:
+                code.put_var_decref(entry)
+
+        code.put_finish_refcount_context()
+        if not self.return_type.is_void:
+            code.putln("return %s;" % Naming.retval_cname)
+        code.putln('}')
+        code.exit_cfunc_scope()
+        if preprocessor_guard:
+            code.putln("#endif /*!(%s)*/" % preprocessor_guard)
+
+    def generate_function_header(self, code, with_pymethdef, proto_only=0):
+        arg_code_list = []
+        sig = self.signature
+
+        if sig.has_dummy_arg or self.self_in_stararg:
+            arg_code = "PyObject *%s" % Naming.self_cname
+            if not sig.has_dummy_arg:
+                arg_code = 'CYTHON_UNUSED ' + arg_code
+            arg_code_list.append(arg_code)
+
+        for arg in self.args:
+            if not arg.is_generic:
+                if arg.is_self_arg or arg.is_type_arg:
+                    arg_code_list.append("PyObject *%s" % arg.hdr_cname)
+                else:
+                    arg_code_list.append(
+                        arg.hdr_type.declaration_code(arg.hdr_cname))
+        entry = self.target.entry
+        if not entry.is_special and sig.method_flags() == [TypeSlots.method_noargs]:
+            arg_code_list.append("CYTHON_UNUSED PyObject *unused")
+        if entry.scope.is_c_class_scope and entry.name == "__ipow__":
+            arg_code_list.append("CYTHON_UNUSED PyObject *unused")
+        if sig.has_generic_args:
+            arg_code_list.append(
                 "PyObject *%s, PyObject *%s" % (
                     Naming.args_cname, Naming.kwds_cname))
-        arg_code = ", ".join(arg_code_list) 
- 
-        # Prevent warning: unused function '__pyx_pw_5numpy_7ndarray_1__getbuffer__' 
-        mf = "" 
-        if (entry.name in ("__getbuffer__", "__releasebuffer__") 
+        arg_code = ", ".join(arg_code_list)
+
+        # Prevent warning: unused function '__pyx_pw_5numpy_7ndarray_1__getbuffer__'
+        mf = ""
+        if (entry.name in ("__getbuffer__", "__releasebuffer__")
                 and entry.scope.is_c_class_scope):
-            mf = "CYTHON_UNUSED " 
-            with_pymethdef = False 
- 
-        dc = self.return_type.declaration_code(entry.func_cname) 
-        header = "static %s%s(%s)" % (mf, dc, arg_code) 
-        code.putln("%s; /*proto*/" % header) 
- 
-        if proto_only: 
-            if self.target.fused_py_func: 
-                # If we are the specialized version of the cpdef, we still 
-                # want the prototype for the "fused cpdef", in case we're 
-                # checking to see if our method was overridden in Python 
-                self.target.fused_py_func.generate_function_header( 
+            mf = "CYTHON_UNUSED "
+            with_pymethdef = False
+
+        dc = self.return_type.declaration_code(entry.func_cname)
+        header = "static %s%s(%s)" % (mf, dc, arg_code)
+        code.putln("%s; /*proto*/" % header)
+
+        if proto_only:
+            if self.target.fused_py_func:
+                # If we are the specialized version of the cpdef, we still
+                # want the prototype for the "fused cpdef", in case we're
+                # checking to see if our method was overridden in Python
+                self.target.fused_py_func.generate_function_header(
                     code, with_pymethdef, proto_only=True)
-            return 
- 
-        if (Options.docstrings and entry.doc and 
-                not self.target.fused_py_func and 
-                not entry.scope.is_property_scope and 
-                (not entry.is_special or entry.wrapperbase_cname)): 
-            # h_code = code.globalstate['h_code'] 
-            docstr = entry.doc 
- 
-            if docstr.is_unicode: 
+            return
+
+        if (Options.docstrings and entry.doc and
+                not self.target.fused_py_func and
+                not entry.scope.is_property_scope and
+                (not entry.is_special or entry.wrapperbase_cname)):
+            # h_code = code.globalstate['h_code']
+            docstr = entry.doc
+
+            if docstr.is_unicode:
                 docstr = docstr.as_utf8_string()
- 
+
             if not (entry.is_special and entry.name in ('__getbuffer__', '__releasebuffer__')):
                 code.putln('static char %s[] = %s;' % (
-                    entry.doc_cname, 
+                    entry.doc_cname,
                     docstr.as_c_string_literal()))
- 
-            if entry.is_special: 
-                code.putln('#if CYTHON_COMPILING_IN_CPYTHON') 
-                code.putln( 
-                    "struct wrapperbase %s;" % entry.wrapperbase_cname) 
-                code.putln('#endif') 
- 
-        if with_pymethdef or self.target.fused_py_func: 
-            code.put( 
+
+            if entry.is_special:
+                code.putln('#if CYTHON_COMPILING_IN_CPYTHON')
+                code.putln(
+                    "struct wrapperbase %s;" % entry.wrapperbase_cname)
+                code.putln('#endif')
+
+        if with_pymethdef or self.target.fused_py_func:
+            code.put(
                 "static PyMethodDef %s = " % entry.pymethdef_cname)
-            code.put_pymethoddef(self.target.entry, ";", allow_skip=False) 
-        code.putln("%s {" % header) 
- 
-    def generate_argument_declarations(self, env, code): 
-        for arg in self.args: 
-            if arg.is_generic: 
-                if arg.needs_conversion: 
-                    code.putln("PyObject *%s = 0;" % arg.hdr_cname) 
-                else: 
-                    code.put_var_declaration(arg.entry) 
-        for entry in env.var_entries: 
-            if entry.is_arg: 
-                code.put_var_declaration(entry) 
- 
-    def generate_argument_parsing_code(self, env, code): 
-        # Generate fast equivalent of PyArg_ParseTuple call for 
-        # generic arguments, if any, including args/kwargs 
-        old_error_label = code.new_error_label() 
-        our_error_label = code.error_label 
-        end_label = code.new_label("argument_unpacking_done") 
- 
-        has_kwonly_args = self.num_kwonly_args > 0 
-        has_star_or_kw_args = self.star_arg is not None \ 
-            or self.starstar_arg is not None or has_kwonly_args 
- 
-        for arg in self.args: 
-            if not arg.type.is_pyobject: 
-                if not arg.type.create_from_py_utility_code(env): 
+            code.put_pymethoddef(self.target.entry, ";", allow_skip=False)
+        code.putln("%s {" % header)
+
+    def generate_argument_declarations(self, env, code):
+        for arg in self.args:
+            if arg.is_generic:
+                if arg.needs_conversion:
+                    code.putln("PyObject *%s = 0;" % arg.hdr_cname)
+                else:
+                    code.put_var_declaration(arg.entry)
+        for entry in env.var_entries:
+            if entry.is_arg:
+                code.put_var_declaration(entry)
+
+    def generate_argument_parsing_code(self, env, code):
+        # Generate fast equivalent of PyArg_ParseTuple call for
+        # generic arguments, if any, including args/kwargs
+        old_error_label = code.new_error_label()
+        our_error_label = code.error_label
+        end_label = code.new_label("argument_unpacking_done")
+
+        has_kwonly_args = self.num_kwonly_args > 0
+        has_star_or_kw_args = self.star_arg is not None \
+            or self.starstar_arg is not None or has_kwonly_args
+
+        for arg in self.args:
+            if not arg.type.is_pyobject:
+                if not arg.type.create_from_py_utility_code(env):
                     pass  # will fail later
- 
-        if not self.signature_has_generic_args(): 
-            if has_star_or_kw_args: 
-                error(self.pos, "This method cannot have * or keyword arguments") 
-            self.generate_argument_conversion_code(code) 
- 
-        elif not self.signature_has_nongeneric_args(): 
-            # func(*args) or func(**kw) or func(*args, **kw) 
-            self.generate_stararg_copy_code(code) 
- 
-        else: 
-            self.generate_tuple_and_keyword_parsing_code(self.args, end_label, code) 
- 
-        code.error_label = old_error_label 
-        if code.label_used(our_error_label): 
-            if not code.label_used(end_label): 
-                code.put_goto(end_label) 
-            code.put_label(our_error_label) 
-            if has_star_or_kw_args: 
-                self.generate_arg_decref(self.star_arg, code) 
-                if self.starstar_arg: 
-                    if self.starstar_arg.entry.xdecref_cleanup: 
-                        code.put_var_xdecref_clear(self.starstar_arg.entry) 
-                    else: 
-                        code.put_var_decref_clear(self.starstar_arg.entry) 
-            code.put_add_traceback(self.target.entry.qualified_name) 
-            code.put_finish_refcount_context() 
-            code.putln("return %s;" % self.error_value()) 
-        if code.label_used(end_label): 
-            code.put_label(end_label) 
- 
-    def generate_arg_xdecref(self, arg, code): 
-        if arg: 
-            code.put_var_xdecref_clear(arg.entry) 
- 
-    def generate_arg_decref(self, arg, code): 
-        if arg: 
-            code.put_var_decref_clear(arg.entry) 
- 
-    def generate_stararg_copy_code(self, code): 
-        if not self.star_arg: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c")) 
-            code.putln("if (unlikely(PyTuple_GET_SIZE(%s) > 0)) {" % 
-                       Naming.args_cname) 
-            code.put('__Pyx_RaiseArgtupleInvalid("%s", 1, 0, 0, PyTuple_GET_SIZE(%s)); return %s;' % ( 
+
+        if not self.signature_has_generic_args():
+            if has_star_or_kw_args:
+                error(self.pos, "This method cannot have * or keyword arguments")
+            self.generate_argument_conversion_code(code)
+
+        elif not self.signature_has_nongeneric_args():
+            # func(*args) or func(**kw) or func(*args, **kw)
+            self.generate_stararg_copy_code(code)
+
+        else:
+            self.generate_tuple_and_keyword_parsing_code(self.args, end_label, code)
+
+        code.error_label = old_error_label
+        if code.label_used(our_error_label):
+            if not code.label_used(end_label):
+                code.put_goto(end_label)
+            code.put_label(our_error_label)
+            if has_star_or_kw_args:
+                self.generate_arg_decref(self.star_arg, code)
+                if self.starstar_arg:
+                    if self.starstar_arg.entry.xdecref_cleanup:
+                        code.put_var_xdecref_clear(self.starstar_arg.entry)
+                    else:
+                        code.put_var_decref_clear(self.starstar_arg.entry)
+            code.put_add_traceback(self.target.entry.qualified_name)
+            code.put_finish_refcount_context()
+            code.putln("return %s;" % self.error_value())
+        if code.label_used(end_label):
+            code.put_label(end_label)
+
+    def generate_arg_xdecref(self, arg, code):
+        if arg:
+            code.put_var_xdecref_clear(arg.entry)
+
+    def generate_arg_decref(self, arg, code):
+        if arg:
+            code.put_var_decref_clear(arg.entry)
+
+    def generate_stararg_copy_code(self, code):
+        if not self.star_arg:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
+            code.putln("if (unlikely(PyTuple_GET_SIZE(%s) > 0)) {" %
+                       Naming.args_cname)
+            code.put('__Pyx_RaiseArgtupleInvalid("%s", 1, 0, 0, PyTuple_GET_SIZE(%s)); return %s;' % (
                 self.name, Naming.args_cname, self.error_value()))
-            code.putln("}") 
- 
-        if self.starstar_arg: 
+            code.putln("}")
+
+        if self.starstar_arg:
             if self.star_arg or not self.starstar_arg.entry.cf_used:
-                kwarg_check = "unlikely(%s)" % Naming.kwds_cname 
-            else: 
-                kwarg_check = "%s" % Naming.kwds_cname 
-        else: 
-            kwarg_check = "unlikely(%s) && unlikely(PyDict_Size(%s) > 0)" % ( 
-                Naming.kwds_cname, Naming.kwds_cname) 
-        code.globalstate.use_utility_code( 
-            UtilityCode.load_cached("KeywordStringCheck", "FunctionArguments.c")) 
-        code.putln( 
-            "if (%s && unlikely(!__Pyx_CheckKeywordStrings(%s, \"%s\", %d))) return %s;" % ( 
-                kwarg_check, Naming.kwds_cname, self.name, 
-                bool(self.starstar_arg), self.error_value())) 
- 
+                kwarg_check = "unlikely(%s)" % Naming.kwds_cname
+            else:
+                kwarg_check = "%s" % Naming.kwds_cname
+        else:
+            kwarg_check = "unlikely(%s) && unlikely(PyDict_Size(%s) > 0)" % (
+                Naming.kwds_cname, Naming.kwds_cname)
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("KeywordStringCheck", "FunctionArguments.c"))
+        code.putln(
+            "if (%s && unlikely(!__Pyx_CheckKeywordStrings(%s, \"%s\", %d))) return %s;" % (
+                kwarg_check, Naming.kwds_cname, self.name,
+                bool(self.starstar_arg), self.error_value()))
+
         if self.starstar_arg and self.starstar_arg.entry.cf_used:
             if all(ref.node.allow_null for ref in self.starstar_arg.entry.cf_references):
                 code.putln("if (%s) {" % kwarg_check)
@@ -3584,376 +3584,376 @@ class DefNodeWrapper(FuncDefNode):
                     self.starstar_arg.entry.cname, self.error_value()))
                 self.starstar_arg.entry.xdecref_cleanup = 0
                 code.put_gotref(self.starstar_arg.entry.cname)
- 
-        if self.self_in_stararg and not self.target.is_staticmethod: 
-            # need to create a new tuple with 'self' inserted as first item 
-            code.put("%s = PyTuple_New(PyTuple_GET_SIZE(%s)+1); if (unlikely(!%s)) " % ( 
+
+        if self.self_in_stararg and not self.target.is_staticmethod:
+            # need to create a new tuple with 'self' inserted as first item
+            code.put("%s = PyTuple_New(PyTuple_GET_SIZE(%s)+1); if (unlikely(!%s)) " % (
                 self.star_arg.entry.cname,
                 Naming.args_cname,
                 self.star_arg.entry.cname))
             if self.starstar_arg and self.starstar_arg.entry.cf_used:
-                code.putln("{") 
+                code.putln("{")
                 code.put_xdecref_clear(self.starstar_arg.entry.cname, py_object_type)
-                code.putln("return %s;" % self.error_value()) 
-                code.putln("}") 
-            else: 
-                code.putln("return %s;" % self.error_value()) 
-            code.put_gotref(self.star_arg.entry.cname) 
-            code.put_incref(Naming.self_cname, py_object_type) 
-            code.put_giveref(Naming.self_cname) 
-            code.putln("PyTuple_SET_ITEM(%s, 0, %s);" % ( 
-                self.star_arg.entry.cname, Naming.self_cname)) 
-            temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False) 
-            code.putln("for (%s=0; %s < PyTuple_GET_SIZE(%s); %s++) {" % ( 
-                temp, temp, Naming.args_cname, temp)) 
-            code.putln("PyObject* item = PyTuple_GET_ITEM(%s, %s);" % ( 
-                Naming.args_cname, temp)) 
-            code.put_incref("item", py_object_type) 
-            code.put_giveref("item") 
-            code.putln("PyTuple_SET_ITEM(%s, %s+1, item);" % ( 
-                self.star_arg.entry.cname, temp)) 
-            code.putln("}") 
-            code.funcstate.release_temp(temp) 
-            self.star_arg.entry.xdecref_cleanup = 0 
-        elif self.star_arg: 
-            code.put_incref(Naming.args_cname, py_object_type) 
-            code.putln("%s = %s;" % ( 
+                code.putln("return %s;" % self.error_value())
+                code.putln("}")
+            else:
+                code.putln("return %s;" % self.error_value())
+            code.put_gotref(self.star_arg.entry.cname)
+            code.put_incref(Naming.self_cname, py_object_type)
+            code.put_giveref(Naming.self_cname)
+            code.putln("PyTuple_SET_ITEM(%s, 0, %s);" % (
+                self.star_arg.entry.cname, Naming.self_cname))
+            temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+            code.putln("for (%s=0; %s < PyTuple_GET_SIZE(%s); %s++) {" % (
+                temp, temp, Naming.args_cname, temp))
+            code.putln("PyObject* item = PyTuple_GET_ITEM(%s, %s);" % (
+                Naming.args_cname, temp))
+            code.put_incref("item", py_object_type)
+            code.put_giveref("item")
+            code.putln("PyTuple_SET_ITEM(%s, %s+1, item);" % (
+                self.star_arg.entry.cname, temp))
+            code.putln("}")
+            code.funcstate.release_temp(temp)
+            self.star_arg.entry.xdecref_cleanup = 0
+        elif self.star_arg:
+            code.put_incref(Naming.args_cname, py_object_type)
+            code.putln("%s = %s;" % (
                 self.star_arg.entry.cname,
                 Naming.args_cname))
-            self.star_arg.entry.xdecref_cleanup = 0 
- 
-    def generate_tuple_and_keyword_parsing_code(self, args, success_label, code): 
-        argtuple_error_label = code.new_label("argtuple_error") 
- 
-        positional_args = [] 
-        required_kw_only_args = [] 
-        optional_kw_only_args = [] 
-        for arg in args: 
-            if arg.is_generic: 
-                if arg.default: 
-                    if not arg.is_self_arg and not arg.is_type_arg: 
-                        if arg.kw_only: 
-                            optional_kw_only_args.append(arg) 
-                        else: 
-                            positional_args.append(arg) 
-                elif arg.kw_only: 
-                    required_kw_only_args.append(arg) 
-                elif not arg.is_self_arg and not arg.is_type_arg: 
-                    positional_args.append(arg) 
- 
-        # sort required kw-only args before optional ones to avoid special 
-        # cases in the unpacking code 
-        kw_only_args = required_kw_only_args + optional_kw_only_args 
- 
-        min_positional_args = self.num_required_args - self.num_required_kw_args 
-        if len(args) > 0 and (args[0].is_self_arg or args[0].is_type_arg): 
-            min_positional_args -= 1 
-        max_positional_args = len(positional_args) 
-        has_fixed_positional_count = not self.star_arg and \ 
-            min_positional_args == max_positional_args 
-        has_kw_only_args = bool(kw_only_args) 
- 
-        if self.starstar_arg or self.star_arg: 
-            self.generate_stararg_init_code(max_positional_args, code) 
- 
-        code.putln('{') 
-        all_args = tuple(positional_args) + tuple(kw_only_args) 
-        code.putln("static PyObject **%s[] = {%s,0};" % ( 
-            Naming.pykwdlist_cname, 
+            self.star_arg.entry.xdecref_cleanup = 0
+
+    def generate_tuple_and_keyword_parsing_code(self, args, success_label, code):
+        argtuple_error_label = code.new_label("argtuple_error")
+
+        positional_args = []
+        required_kw_only_args = []
+        optional_kw_only_args = []
+        for arg in args:
+            if arg.is_generic:
+                if arg.default:
+                    if not arg.is_self_arg and not arg.is_type_arg:
+                        if arg.kw_only:
+                            optional_kw_only_args.append(arg)
+                        else:
+                            positional_args.append(arg)
+                elif arg.kw_only:
+                    required_kw_only_args.append(arg)
+                elif not arg.is_self_arg and not arg.is_type_arg:
+                    positional_args.append(arg)
+
+        # sort required kw-only args before optional ones to avoid special
+        # cases in the unpacking code
+        kw_only_args = required_kw_only_args + optional_kw_only_args
+
+        min_positional_args = self.num_required_args - self.num_required_kw_args
+        if len(args) > 0 and (args[0].is_self_arg or args[0].is_type_arg):
+            min_positional_args -= 1
+        max_positional_args = len(positional_args)
+        has_fixed_positional_count = not self.star_arg and \
+            min_positional_args == max_positional_args
+        has_kw_only_args = bool(kw_only_args)
+
+        if self.starstar_arg or self.star_arg:
+            self.generate_stararg_init_code(max_positional_args, code)
+
+        code.putln('{')
+        all_args = tuple(positional_args) + tuple(kw_only_args)
+        code.putln("static PyObject **%s[] = {%s,0};" % (
+            Naming.pykwdlist_cname,
             ','.join(['&%s' % code.intern_identifier(arg.name)
                       for arg in all_args])))
- 
-        # Before being converted and assigned to the target variables, 
-        # borrowed references to all unpacked argument values are 
-        # collected into a local PyObject* array called "values", 
-        # regardless if they were taken from default arguments, 
-        # positional arguments or keyword arguments.  Note that 
-        # C-typed default arguments are handled at conversion time, 
-        # so their array value is NULL in the end if no argument 
-        # was passed for them. 
-        self.generate_argument_values_setup_code(all_args, code) 
- 
-        # --- optimised code when we receive keyword arguments 
-        code.putln("if (%s(%s)) {" % ( 
-            (self.num_required_kw_args > 0) and "likely" or "unlikely", 
-            Naming.kwds_cname)) 
-        self.generate_keyword_unpacking_code( 
-            min_positional_args, max_positional_args, 
-            has_fixed_positional_count, has_kw_only_args, 
-            all_args, argtuple_error_label, code) 
- 
-        # --- optimised code when we do not receive any keyword arguments 
-        if (self.num_required_kw_args and min_positional_args > 0) or min_positional_args == max_positional_args: 
-            # Python raises arg tuple related errors first, so we must 
-            # check the length here 
-            if min_positional_args == max_positional_args and not self.star_arg: 
-                compare = '!=' 
-            else: 
-                compare = '<' 
-            code.putln('} else if (PyTuple_GET_SIZE(%s) %s %d) {' % ( 
+
+        # Before being converted and assigned to the target variables,
+        # borrowed references to all unpacked argument values are
+        # collected into a local PyObject* array called "values",
+        # regardless if they were taken from default arguments,
+        # positional arguments or keyword arguments.  Note that
+        # C-typed default arguments are handled at conversion time,
+        # so their array value is NULL in the end if no argument
+        # was passed for them.
+        self.generate_argument_values_setup_code(all_args, code)
+
+        # --- optimised code when we receive keyword arguments
+        code.putln("if (%s(%s)) {" % (
+            (self.num_required_kw_args > 0) and "likely" or "unlikely",
+            Naming.kwds_cname))
+        self.generate_keyword_unpacking_code(
+            min_positional_args, max_positional_args,
+            has_fixed_positional_count, has_kw_only_args,
+            all_args, argtuple_error_label, code)
+
+        # --- optimised code when we do not receive any keyword arguments
+        if (self.num_required_kw_args and min_positional_args > 0) or min_positional_args == max_positional_args:
+            # Python raises arg tuple related errors first, so we must
+            # check the length here
+            if min_positional_args == max_positional_args and not self.star_arg:
+                compare = '!='
+            else:
+                compare = '<'
+            code.putln('} else if (PyTuple_GET_SIZE(%s) %s %d) {' % (
                 Naming.args_cname, compare, min_positional_args))
-            code.put_goto(argtuple_error_label) 
- 
-        if self.num_required_kw_args: 
-            # pure error case: keywords required but not passed 
-            if max_positional_args > min_positional_args and not self.star_arg: 
-                code.putln('} else if (PyTuple_GET_SIZE(%s) > %d) {' % ( 
+            code.put_goto(argtuple_error_label)
+
+        if self.num_required_kw_args:
+            # pure error case: keywords required but not passed
+            if max_positional_args > min_positional_args and not self.star_arg:
+                code.putln('} else if (PyTuple_GET_SIZE(%s) > %d) {' % (
                     Naming.args_cname, max_positional_args))
-                code.put_goto(argtuple_error_label) 
-            code.putln('} else {') 
-            for i, arg in enumerate(kw_only_args): 
-                if not arg.default: 
-                    pystring_cname = code.intern_identifier(arg.name) 
-                    # required keyword-only argument missing 
+                code.put_goto(argtuple_error_label)
+            code.putln('} else {')
+            for i, arg in enumerate(kw_only_args):
+                if not arg.default:
+                    pystring_cname = code.intern_identifier(arg.name)
+                    # required keyword-only argument missing
                     code.globalstate.use_utility_code(
                         UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c"))
-                    code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % ( 
+                    code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % (
                         self.name,
                         pystring_cname))
-                    code.putln(code.error_goto(self.pos)) 
-                    break 
- 
-        else: 
-            # optimised tuple unpacking code 
-            code.putln('} else {') 
-            if min_positional_args == max_positional_args: 
-                # parse the exact number of positional arguments from 
-                # the args tuple 
-                for i, arg in enumerate(positional_args): 
-                    code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (i, Naming.args_cname, i)) 
-            else: 
-                # parse the positional arguments from the variable length 
-                # args tuple and reject illegal argument tuple sizes 
-                code.putln('switch (PyTuple_GET_SIZE(%s)) {' % Naming.args_cname) 
-                if self.star_arg: 
-                    code.putln('default:') 
-                reversed_args = list(enumerate(positional_args))[::-1] 
-                for i, arg in reversed_args: 
-                    if i >= min_positional_args-1: 
+                    code.putln(code.error_goto(self.pos))
+                    break
+
+        else:
+            # optimised tuple unpacking code
+            code.putln('} else {')
+            if min_positional_args == max_positional_args:
+                # parse the exact number of positional arguments from
+                # the args tuple
+                for i, arg in enumerate(positional_args):
+                    code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (i, Naming.args_cname, i))
+            else:
+                # parse the positional arguments from the variable length
+                # args tuple and reject illegal argument tuple sizes
+                code.putln('switch (PyTuple_GET_SIZE(%s)) {' % Naming.args_cname)
+                if self.star_arg:
+                    code.putln('default:')
+                reversed_args = list(enumerate(positional_args))[::-1]
+                for i, arg in reversed_args:
+                    if i >= min_positional_args-1:
                         if i != reversed_args[0][0]:
                             code.putln('CYTHON_FALLTHROUGH;')
-                        code.put('case %2d: ' % (i+1)) 
-                    code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (i, Naming.args_cname, i)) 
-                if min_positional_args == 0: 
+                        code.put('case %2d: ' % (i+1))
+                    code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (i, Naming.args_cname, i))
+                if min_positional_args == 0:
                     code.putln('CYTHON_FALLTHROUGH;')
-                    code.put('case  0: ') 
-                code.putln('break;') 
-                if self.star_arg: 
-                    if min_positional_args: 
-                        for i in range(min_positional_args-1, -1, -1): 
-                            code.putln('case %2d:' % i) 
-                        code.put_goto(argtuple_error_label) 
-                else: 
-                    code.put('default: ') 
-                    code.put_goto(argtuple_error_label) 
-                code.putln('}') 
- 
-        code.putln('}') # end of the conditional unpacking blocks 
- 
-        # Convert arg values to their final type and assign them. 
-        # Also inject non-Python default arguments, which do cannot 
-        # live in the values[] array. 
-        for i, arg in enumerate(all_args): 
-            self.generate_arg_assignment(arg, "values[%d]" % i, code) 
- 
-        code.putln('}') # end of the whole argument unpacking block 
- 
-        if code.label_used(argtuple_error_label): 
-            code.put_goto(success_label) 
-            code.put_label(argtuple_error_label) 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c")) 
-            code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, PyTuple_GET_SIZE(%s)); ' % ( 
+                    code.put('case  0: ')
+                code.putln('break;')
+                if self.star_arg:
+                    if min_positional_args:
+                        for i in range(min_positional_args-1, -1, -1):
+                            code.putln('case %2d:' % i)
+                        code.put_goto(argtuple_error_label)
+                else:
+                    code.put('default: ')
+                    code.put_goto(argtuple_error_label)
+                code.putln('}')
+
+        code.putln('}') # end of the conditional unpacking blocks
+
+        # Convert arg values to their final type and assign them.
+        # Also inject non-Python default arguments, which do cannot
+        # live in the values[] array.
+        for i, arg in enumerate(all_args):
+            self.generate_arg_assignment(arg, "values[%d]" % i, code)
+
+        code.putln('}') # end of the whole argument unpacking block
+
+        if code.label_used(argtuple_error_label):
+            code.put_goto(success_label)
+            code.put_label(argtuple_error_label)
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
+            code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, PyTuple_GET_SIZE(%s)); ' % (
                 self.name, has_fixed_positional_count,
                 min_positional_args, max_positional_args,
                 Naming.args_cname))
-            code.putln(code.error_goto(self.pos)) 
- 
-    def generate_arg_assignment(self, arg, item, code): 
-        if arg.type.is_pyobject: 
-            # Python default arguments were already stored in 'item' at the very beginning 
-            if arg.is_generic: 
-                item = PyrexTypes.typecast(arg.type, PyrexTypes.py_object_type, item) 
-            entry = arg.entry 
-            code.putln("%s = %s;" % (entry.cname, item)) 
-        else: 
+            code.putln(code.error_goto(self.pos))
+
+    def generate_arg_assignment(self, arg, item, code):
+        if arg.type.is_pyobject:
+            # Python default arguments were already stored in 'item' at the very beginning
+            if arg.is_generic:
+                item = PyrexTypes.typecast(arg.type, PyrexTypes.py_object_type, item)
+            entry = arg.entry
+            code.putln("%s = %s;" % (entry.cname, item))
+        else:
             if arg.type.from_py_function:
-                if arg.default: 
-                    # C-typed default arguments must be handled here 
-                    code.putln('if (%s) {' % item) 
+                if arg.default:
+                    # C-typed default arguments must be handled here
+                    code.putln('if (%s) {' % item)
                 code.putln(arg.type.from_py_call_code(
                     item, arg.entry.cname, arg.pos, code))
-                if arg.default: 
-                    code.putln('} else {') 
+                if arg.default:
+                    code.putln('} else {')
                     code.putln("%s = %s;" % (
                         arg.entry.cname,
                         arg.calculate_default_value_code(code)))
-                    if arg.type.is_memoryviewslice: 
-                        code.put_incref_memoryviewslice(arg.entry.cname, 
-                                                        have_gil=True) 
-                    code.putln('}') 
-            else: 
-                error(arg.pos, "Cannot convert Python object argument to type '%s'" % arg.type) 
- 
-    def generate_stararg_init_code(self, max_positional_args, code): 
-        if self.starstar_arg: 
-            self.starstar_arg.entry.xdecref_cleanup = 0 
-            code.putln('%s = PyDict_New(); if (unlikely(!%s)) return %s;' % ( 
+                    if arg.type.is_memoryviewslice:
+                        code.put_incref_memoryviewslice(arg.entry.cname,
+                                                        have_gil=True)
+                    code.putln('}')
+            else:
+                error(arg.pos, "Cannot convert Python object argument to type '%s'" % arg.type)
+
+    def generate_stararg_init_code(self, max_positional_args, code):
+        if self.starstar_arg:
+            self.starstar_arg.entry.xdecref_cleanup = 0
+            code.putln('%s = PyDict_New(); if (unlikely(!%s)) return %s;' % (
                 self.starstar_arg.entry.cname,
                 self.starstar_arg.entry.cname,
                 self.error_value()))
-            code.put_gotref(self.starstar_arg.entry.cname) 
-        if self.star_arg: 
-            self.star_arg.entry.xdecref_cleanup = 0 
-            code.putln('if (PyTuple_GET_SIZE(%s) > %d) {' % ( 
+            code.put_gotref(self.starstar_arg.entry.cname)
+        if self.star_arg:
+            self.star_arg.entry.xdecref_cleanup = 0
+            code.putln('if (PyTuple_GET_SIZE(%s) > %d) {' % (
                 Naming.args_cname,
                 max_positional_args))
-            code.putln('%s = PyTuple_GetSlice(%s, %d, PyTuple_GET_SIZE(%s));' % ( 
+            code.putln('%s = PyTuple_GetSlice(%s, %d, PyTuple_GET_SIZE(%s));' % (
                 self.star_arg.entry.cname, Naming.args_cname,
                 max_positional_args, Naming.args_cname))
-            code.putln("if (unlikely(!%s)) {" % self.star_arg.entry.cname) 
-            if self.starstar_arg: 
-                code.put_decref_clear(self.starstar_arg.entry.cname, py_object_type) 
-            code.put_finish_refcount_context() 
-            code.putln('return %s;' % self.error_value()) 
-            code.putln('}') 
-            code.put_gotref(self.star_arg.entry.cname) 
-            code.putln('} else {') 
-            code.put("%s = %s; " % (self.star_arg.entry.cname, Naming.empty_tuple)) 
-            code.put_incref(Naming.empty_tuple, py_object_type) 
-            code.putln('}') 
- 
-    def generate_argument_values_setup_code(self, args, code): 
-        max_args = len(args) 
-        # the 'values' array collects borrowed references to arguments 
-        # before doing any type coercion etc. 
-        code.putln("PyObject* values[%d] = {%s};" % ( 
-            max_args, ','.join('0'*max_args))) 
- 
-        if self.target.defaults_struct: 
-            code.putln('%s *%s = __Pyx_CyFunction_Defaults(%s, %s);' % ( 
-                self.target.defaults_struct, Naming.dynamic_args_cname, 
-                self.target.defaults_struct, Naming.self_cname)) 
- 
-        # assign borrowed Python default values to the values array, 
-        # so that they can be overwritten by received arguments below 
-        for i, arg in enumerate(args): 
-            if arg.default and arg.type.is_pyobject: 
-                default_value = arg.calculate_default_value_code(code) 
-                code.putln('values[%d] = %s;' % (i, arg.type.as_pyobject(default_value))) 
- 
-    def generate_keyword_unpacking_code(self, min_positional_args, max_positional_args, 
-                                        has_fixed_positional_count, has_kw_only_args, 
-                                        all_args, argtuple_error_label, code): 
-        code.putln('Py_ssize_t kw_args;') 
-        code.putln('const Py_ssize_t pos_args = PyTuple_GET_SIZE(%s);' % Naming.args_cname) 
-        # copy the values from the args tuple and check that it's not too long 
-        code.putln('switch (pos_args) {') 
-        if self.star_arg: 
-            code.putln('default:') 
-        for i in range(max_positional_args-1, -1, -1): 
-            code.put('case %2d: ' % (i+1)) 
-            code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % ( 
+            code.putln("if (unlikely(!%s)) {" % self.star_arg.entry.cname)
+            if self.starstar_arg:
+                code.put_decref_clear(self.starstar_arg.entry.cname, py_object_type)
+            code.put_finish_refcount_context()
+            code.putln('return %s;' % self.error_value())
+            code.putln('}')
+            code.put_gotref(self.star_arg.entry.cname)
+            code.putln('} else {')
+            code.put("%s = %s; " % (self.star_arg.entry.cname, Naming.empty_tuple))
+            code.put_incref(Naming.empty_tuple, py_object_type)
+            code.putln('}')
+
+    def generate_argument_values_setup_code(self, args, code):
+        max_args = len(args)
+        # the 'values' array collects borrowed references to arguments
+        # before doing any type coercion etc.
+        code.putln("PyObject* values[%d] = {%s};" % (
+            max_args, ','.join('0'*max_args)))
+
+        if self.target.defaults_struct:
+            code.putln('%s *%s = __Pyx_CyFunction_Defaults(%s, %s);' % (
+                self.target.defaults_struct, Naming.dynamic_args_cname,
+                self.target.defaults_struct, Naming.self_cname))
+
+        # assign borrowed Python default values to the values array,
+        # so that they can be overwritten by received arguments below
+        for i, arg in enumerate(args):
+            if arg.default and arg.type.is_pyobject:
+                default_value = arg.calculate_default_value_code(code)
+                code.putln('values[%d] = %s;' % (i, arg.type.as_pyobject(default_value)))
+
+    def generate_keyword_unpacking_code(self, min_positional_args, max_positional_args,
+                                        has_fixed_positional_count, has_kw_only_args,
+                                        all_args, argtuple_error_label, code):
+        code.putln('Py_ssize_t kw_args;')
+        code.putln('const Py_ssize_t pos_args = PyTuple_GET_SIZE(%s);' % Naming.args_cname)
+        # copy the values from the args tuple and check that it's not too long
+        code.putln('switch (pos_args) {')
+        if self.star_arg:
+            code.putln('default:')
+        for i in range(max_positional_args-1, -1, -1):
+            code.put('case %2d: ' % (i+1))
+            code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (
                 i, Naming.args_cname, i))
             code.putln('CYTHON_FALLTHROUGH;')
-        code.putln('case  0: break;') 
-        if not self.star_arg: 
-            code.put('default: ') # more arguments than allowed 
-            code.put_goto(argtuple_error_label) 
-        code.putln('}') 
- 
-        # The code above is very often (but not always) the same as 
-        # the optimised non-kwargs tuple unpacking code, so we keep 
-        # the code block above at the very top, before the following 
-        # 'external' PyDict_Size() call, to make it easy for the C 
-        # compiler to merge the two separate tuple unpacking 
-        # implementations into one when they turn out to be identical. 
- 
-        # If we received kwargs, fill up the positional/required 
-        # arguments with values from the kw dict 
-        code.putln('kw_args = PyDict_Size(%s);' % Naming.kwds_cname) 
-        if self.num_required_args or max_positional_args > 0: 
-            last_required_arg = -1 
-            for i, arg in enumerate(all_args): 
-                if not arg.default: 
-                    last_required_arg = i 
-            if last_required_arg < max_positional_args: 
-                last_required_arg = max_positional_args-1 
-            if max_positional_args > 0: 
-                code.putln('switch (pos_args) {') 
-            for i, arg in enumerate(all_args[:last_required_arg+1]): 
-                if max_positional_args > 0 and i <= max_positional_args: 
+        code.putln('case  0: break;')
+        if not self.star_arg:
+            code.put('default: ') # more arguments than allowed
+            code.put_goto(argtuple_error_label)
+        code.putln('}')
+
+        # The code above is very often (but not always) the same as
+        # the optimised non-kwargs tuple unpacking code, so we keep
+        # the code block above at the very top, before the following
+        # 'external' PyDict_Size() call, to make it easy for the C
+        # compiler to merge the two separate tuple unpacking
+        # implementations into one when they turn out to be identical.
+
+        # If we received kwargs, fill up the positional/required
+        # arguments with values from the kw dict
+        code.putln('kw_args = PyDict_Size(%s);' % Naming.kwds_cname)
+        if self.num_required_args or max_positional_args > 0:
+            last_required_arg = -1
+            for i, arg in enumerate(all_args):
+                if not arg.default:
+                    last_required_arg = i
+            if last_required_arg < max_positional_args:
+                last_required_arg = max_positional_args-1
+            if max_positional_args > 0:
+                code.putln('switch (pos_args) {')
+            for i, arg in enumerate(all_args[:last_required_arg+1]):
+                if max_positional_args > 0 and i <= max_positional_args:
                     if i != 0:
                         code.putln('CYTHON_FALLTHROUGH;')
-                    if self.star_arg and i == max_positional_args: 
-                        code.putln('default:') 
-                    else: 
-                        code.putln('case %2d:' % i) 
-                pystring_cname = code.intern_identifier(arg.name) 
-                if arg.default: 
-                    if arg.kw_only: 
-                        # optional kw-only args are handled separately below 
-                        continue 
-                    code.putln('if (kw_args > 0) {') 
-                    # don't overwrite default argument 
+                    if self.star_arg and i == max_positional_args:
+                        code.putln('default:')
+                    else:
+                        code.putln('case %2d:' % i)
+                pystring_cname = code.intern_identifier(arg.name)
+                if arg.default:
+                    if arg.kw_only:
+                        # optional kw-only args are handled separately below
+                        continue
+                    code.putln('if (kw_args > 0) {')
+                    # don't overwrite default argument
                     code.putln('PyObject* value = __Pyx_PyDict_GetItemStr(%s, %s);' % (
-                        Naming.kwds_cname, pystring_cname)) 
-                    code.putln('if (value) { values[%d] = value; kw_args--; }' % i) 
-                    code.putln('}') 
-                else: 
+                        Naming.kwds_cname, pystring_cname))
+                    code.putln('if (value) { values[%d] = value; kw_args--; }' % i)
+                    code.putln('}')
+                else:
                     code.putln('if (likely((values[%d] = __Pyx_PyDict_GetItemStr(%s, %s)) != 0)) kw_args--;' % (
-                        i, Naming.kwds_cname, pystring_cname)) 
-                    if i < min_positional_args: 
-                        if i == 0: 
-                            # special case: we know arg 0 is missing 
-                            code.put('else ') 
-                            code.put_goto(argtuple_error_label) 
-                        else: 
-                            # print the correct number of values (args or 
-                            # kwargs) that were passed into positional 
-                            # arguments up to this point 
-                            code.putln('else {') 
-                            code.globalstate.use_utility_code( 
-                                UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c")) 
-                            code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %d); ' % ( 
+                        i, Naming.kwds_cname, pystring_cname))
+                    if i < min_positional_args:
+                        if i == 0:
+                            # special case: we know arg 0 is missing
+                            code.put('else ')
+                            code.put_goto(argtuple_error_label)
+                        else:
+                            # print the correct number of values (args or
+                            # kwargs) that were passed into positional
+                            # arguments up to this point
+                            code.putln('else {')
+                            code.globalstate.use_utility_code(
+                                UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
+                            code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %d); ' % (
                                 self.name, has_fixed_positional_count,
                                 min_positional_args, max_positional_args, i))
-                            code.putln(code.error_goto(self.pos)) 
-                            code.putln('}') 
-                    elif arg.kw_only: 
-                        code.putln('else {') 
+                            code.putln(code.error_goto(self.pos))
+                            code.putln('}')
+                    elif arg.kw_only:
+                        code.putln('else {')
                         code.globalstate.use_utility_code(
                             UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c"))
                         code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % (
                             self.name, pystring_cname))
-                        code.putln(code.error_goto(self.pos)) 
-                        code.putln('}') 
-            if max_positional_args > 0: 
-                code.putln('}') 
- 
-        if has_kw_only_args: 
-            # unpack optional keyword-only arguments separately because 
-            # checking for interned strings in a dict is faster than iterating 
-            self.generate_optional_kwonly_args_unpacking_code(all_args, code) 
- 
-        code.putln('if (unlikely(kw_args > 0)) {') 
-        # non-positional/-required kw args left in dict: default args, 
-        # kw-only args, **kwargs or error 
-        # 
-        # This is sort of a catch-all: except for checking required 
-        # arguments, this will always do the right thing for unpacking 
-        # keyword arguments, so that we can concentrate on optimising 
-        # common cases above. 
-        if max_positional_args == 0: 
-            pos_arg_count = "0" 
-        elif self.star_arg: 
-            code.putln("const Py_ssize_t used_pos_args = (pos_args < %d) ? pos_args : %d;" % ( 
+                        code.putln(code.error_goto(self.pos))
+                        code.putln('}')
+            if max_positional_args > 0:
+                code.putln('}')
+
+        if has_kw_only_args:
+            # unpack optional keyword-only arguments separately because
+            # checking for interned strings in a dict is faster than iterating
+            self.generate_optional_kwonly_args_unpacking_code(all_args, code)
+
+        code.putln('if (unlikely(kw_args > 0)) {')
+        # non-positional/-required kw args left in dict: default args,
+        # kw-only args, **kwargs or error
+        #
+        # This is sort of a catch-all: except for checking required
+        # arguments, this will always do the right thing for unpacking
+        # keyword arguments, so that we can concentrate on optimising
+        # common cases above.
+        if max_positional_args == 0:
+            pos_arg_count = "0"
+        elif self.star_arg:
+            code.putln("const Py_ssize_t used_pos_args = (pos_args < %d) ? pos_args : %d;" % (
                 max_positional_args, max_positional_args))
-            pos_arg_count = "used_pos_args" 
-        else: 
-            pos_arg_count = "pos_args" 
-        code.globalstate.use_utility_code( 
-            UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c")) 
+            pos_arg_count = "used_pos_args"
+        else:
+            pos_arg_count = "pos_args"
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
         code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, values, %s, "%s") < 0)) %s' % (
             Naming.kwds_cname,
             Naming.pykwdlist_cname,
@@ -3961,69 +3961,69 @@ class DefNodeWrapper(FuncDefNode):
             pos_arg_count,
             self.name,
             code.error_goto(self.pos)))
-        code.putln('}') 
- 
-    def generate_optional_kwonly_args_unpacking_code(self, all_args, code): 
-        optional_args = [] 
-        first_optional_arg = -1 
-        for i, arg in enumerate(all_args): 
-            if not arg.kw_only or not arg.default: 
-                continue 
-            if not optional_args: 
-                first_optional_arg = i 
-            optional_args.append(arg.name) 
-        if optional_args: 
-            if len(optional_args) > 1: 
-                # if we receive more than the named kwargs, we either have **kwargs 
-                # (in which case we must iterate anyway) or it's an error (which we 
-                # also handle during iteration) => skip this part if there are more 
-                code.putln('if (kw_args > 0 && %s(kw_args <= %d)) {' % ( 
-                    not self.starstar_arg and 'likely' or '', 
-                    len(optional_args))) 
-                code.putln('Py_ssize_t index;') 
-                # not unrolling the loop here reduces the C code overhead 
-                code.putln('for (index = %d; index < %d && kw_args > 0; index++) {' % ( 
-                    first_optional_arg, first_optional_arg + len(optional_args))) 
-            else: 
-                code.putln('if (kw_args == 1) {') 
-                code.putln('const Py_ssize_t index = %d;' % first_optional_arg) 
+        code.putln('}')
+
+    def generate_optional_kwonly_args_unpacking_code(self, all_args, code):
+        optional_args = []
+        first_optional_arg = -1
+        for i, arg in enumerate(all_args):
+            if not arg.kw_only or not arg.default:
+                continue
+            if not optional_args:
+                first_optional_arg = i
+            optional_args.append(arg.name)
+        if optional_args:
+            if len(optional_args) > 1:
+                # if we receive more than the named kwargs, we either have **kwargs
+                # (in which case we must iterate anyway) or it's an error (which we
+                # also handle during iteration) => skip this part if there are more
+                code.putln('if (kw_args > 0 && %s(kw_args <= %d)) {' % (
+                    not self.starstar_arg and 'likely' or '',
+                    len(optional_args)))
+                code.putln('Py_ssize_t index;')
+                # not unrolling the loop here reduces the C code overhead
+                code.putln('for (index = %d; index < %d && kw_args > 0; index++) {' % (
+                    first_optional_arg, first_optional_arg + len(optional_args)))
+            else:
+                code.putln('if (kw_args == 1) {')
+                code.putln('const Py_ssize_t index = %d;' % first_optional_arg)
             code.putln('PyObject* value = __Pyx_PyDict_GetItemStr(%s, *%s[index]);' % (
-                Naming.kwds_cname, Naming.pykwdlist_cname)) 
-            code.putln('if (value) { values[index] = value; kw_args--; }') 
-            if len(optional_args) > 1: 
-                code.putln('}') 
-            code.putln('}') 
- 
-    def generate_argument_conversion_code(self, code): 
-        # Generate code to convert arguments from signature type to 
-        # declared type, if needed.  Also copies signature arguments 
-        # into closure fields. 
-        for arg in self.args: 
-            if arg.needs_conversion: 
-                self.generate_arg_conversion(arg, code) 
- 
-    def generate_arg_conversion(self, arg, code): 
-        # Generate conversion code for one argument. 
-        old_type = arg.hdr_type 
-        new_type = arg.type 
-        if old_type.is_pyobject: 
-            if arg.default: 
-                code.putln("if (%s) {" % arg.hdr_cname) 
-            else: 
-                code.putln("assert(%s); {" % arg.hdr_cname) 
-            self.generate_arg_conversion_from_pyobject(arg, code) 
-            code.putln("}") 
-        elif new_type.is_pyobject: 
-            self.generate_arg_conversion_to_pyobject(arg, code) 
-        else: 
-            if new_type.assignable_from(old_type): 
+                Naming.kwds_cname, Naming.pykwdlist_cname))
+            code.putln('if (value) { values[index] = value; kw_args--; }')
+            if len(optional_args) > 1:
+                code.putln('}')
+            code.putln('}')
+
+    def generate_argument_conversion_code(self, code):
+        # Generate code to convert arguments from signature type to
+        # declared type, if needed.  Also copies signature arguments
+        # into closure fields.
+        for arg in self.args:
+            if arg.needs_conversion:
+                self.generate_arg_conversion(arg, code)
+
+    def generate_arg_conversion(self, arg, code):
+        # Generate conversion code for one argument.
+        old_type = arg.hdr_type
+        new_type = arg.type
+        if old_type.is_pyobject:
+            if arg.default:
+                code.putln("if (%s) {" % arg.hdr_cname)
+            else:
+                code.putln("assert(%s); {" % arg.hdr_cname)
+            self.generate_arg_conversion_from_pyobject(arg, code)
+            code.putln("}")
+        elif new_type.is_pyobject:
+            self.generate_arg_conversion_to_pyobject(arg, code)
+        else:
+            if new_type.assignable_from(old_type):
                 code.putln("%s = %s;" % (arg.entry.cname, arg.hdr_cname))
-            else: 
+            else:
                 error(arg.pos, "Cannot convert 1 argument from '%s' to '%s'" % (old_type, new_type))
- 
-    def generate_arg_conversion_from_pyobject(self, arg, code): 
-        new_type = arg.type 
-        # copied from CoerceFromPyTypeNode 
+
+    def generate_arg_conversion_from_pyobject(self, arg, code):
+        new_type = arg.type
+        # copied from CoerceFromPyTypeNode
         if new_type.from_py_function:
             code.putln(new_type.from_py_call_code(
                 arg.hdr_cname,
@@ -4031,94 +4031,94 @@ class DefNodeWrapper(FuncDefNode):
                 arg.pos,
                 code,
             ))
-        else: 
+        else:
             error(arg.pos, "Cannot convert Python object argument to type '%s'" % new_type)
- 
-    def generate_arg_conversion_to_pyobject(self, arg, code): 
-        old_type = arg.hdr_type 
-        func = old_type.to_py_function 
-        if func: 
-            code.putln("%s = %s(%s); %s" % ( 
-                arg.entry.cname, 
-                func, 
-                arg.hdr_cname, 
-                code.error_goto_if_null(arg.entry.cname, arg.pos))) 
-            code.put_var_gotref(arg.entry) 
-        else: 
+
+    def generate_arg_conversion_to_pyobject(self, arg, code):
+        old_type = arg.hdr_type
+        func = old_type.to_py_function
+        if func:
+            code.putln("%s = %s(%s); %s" % (
+                arg.entry.cname,
+                func,
+                arg.hdr_cname,
+                code.error_goto_if_null(arg.entry.cname, arg.pos)))
+            code.put_var_gotref(arg.entry)
+        else:
             error(arg.pos, "Cannot convert argument of type '%s' to Python object" % old_type)
- 
-    def generate_argument_type_tests(self, code): 
-        # Generate type tests for args whose signature 
-        # type is PyObject * and whose declared type is 
-        # a subtype thereof. 
-        for arg in self.args: 
-            if arg.needs_type_test: 
-                self.generate_arg_type_test(arg, code) 
-            elif not arg.accept_none and (arg.type.is_pyobject or 
-                                          arg.type.is_buffer or 
-                                          arg.type.is_memoryviewslice): 
-                self.generate_arg_none_check(arg, code) 
- 
-    def error_value(self): 
-        return self.signature.error_value 
- 
- 
-class GeneratorDefNode(DefNode): 
-    # Generator function node that creates a new generator instance when called. 
-    # 
-    # gbody          GeneratorBodyDefNode   the function implementing the generator 
-    # 
- 
-    is_generator = True 
+
+    def generate_argument_type_tests(self, code):
+        # Generate type tests for args whose signature
+        # type is PyObject * and whose declared type is
+        # a subtype thereof.
+        for arg in self.args:
+            if arg.needs_type_test:
+                self.generate_arg_type_test(arg, code)
+            elif not arg.accept_none and (arg.type.is_pyobject or
+                                          arg.type.is_buffer or
+                                          arg.type.is_memoryviewslice):
+                self.generate_arg_none_check(arg, code)
+
+    def error_value(self):
+        return self.signature.error_value
+
+
+class GeneratorDefNode(DefNode):
+    # Generator function node that creates a new generator instance when called.
+    #
+    # gbody          GeneratorBodyDefNode   the function implementing the generator
+    #
+
+    is_generator = True
     is_coroutine = False
     is_iterable_coroutine = False
     is_asyncgen = False
     gen_type_name = 'Generator'
-    needs_closure = True 
- 
-    child_attrs = DefNode.child_attrs + ["gbody"] 
- 
-    def __init__(self, pos, **kwargs): 
-        # XXX: don't actually needs a body 
-        kwargs['body'] = StatListNode(pos, stats=[], is_terminator=True) 
-        super(GeneratorDefNode, self).__init__(pos, **kwargs) 
- 
-    def analyse_declarations(self, env): 
-        super(GeneratorDefNode, self).analyse_declarations(env) 
-        self.gbody.local_scope = self.local_scope 
-        self.gbody.analyse_declarations(env) 
- 
-    def generate_function_body(self, env, code): 
-        body_cname = self.gbody.entry.func_cname 
-        name = code.intern_identifier(self.name) 
-        qualname = code.intern_identifier(self.qualname) 
+    needs_closure = True
+
+    child_attrs = DefNode.child_attrs + ["gbody"]
+
+    def __init__(self, pos, **kwargs):
+        # XXX: don't actually needs a body
+        kwargs['body'] = StatListNode(pos, stats=[], is_terminator=True)
+        super(GeneratorDefNode, self).__init__(pos, **kwargs)
+
+    def analyse_declarations(self, env):
+        super(GeneratorDefNode, self).analyse_declarations(env)
+        self.gbody.local_scope = self.local_scope
+        self.gbody.analyse_declarations(env)
+
+    def generate_function_body(self, env, code):
+        body_cname = self.gbody.entry.func_cname
+        name = code.intern_identifier(self.name)
+        qualname = code.intern_identifier(self.qualname)
         module_name = code.intern_identifier(self.module_name)
- 
-        code.putln('{') 
+
+        code.putln('{')
         code.putln('__pyx_CoroutineObject *gen = __Pyx_%s_New('
                    '(__pyx_coroutine_body_t) %s, %s, (PyObject *) %s, %s, %s, %s); %s' % (
                        self.gen_type_name,
                        body_cname, self.code_object.calculate_result_code(code) if self.code_object else 'NULL',
                        Naming.cur_scope_cname, name, qualname, module_name,
-                       code.error_goto_if_null('gen', self.pos))) 
-        code.put_decref(Naming.cur_scope_cname, py_object_type) 
-        if self.requires_classobj: 
-            classobj_cname = 'gen->classobj' 
-            code.putln('%s = __Pyx_CyFunction_GetClassObj(%s);' % ( 
-                classobj_cname, Naming.self_cname)) 
-            code.put_incref(classobj_cname, py_object_type) 
-            code.put_giveref(classobj_cname) 
-        code.put_finish_refcount_context() 
-        code.putln('return (PyObject *) gen;') 
-        code.putln('}') 
- 
-    def generate_function_definitions(self, env, code): 
+                       code.error_goto_if_null('gen', self.pos)))
+        code.put_decref(Naming.cur_scope_cname, py_object_type)
+        if self.requires_classobj:
+            classobj_cname = 'gen->classobj'
+            code.putln('%s = __Pyx_CyFunction_GetClassObj(%s);' % (
+                classobj_cname, Naming.self_cname))
+            code.put_incref(classobj_cname, py_object_type)
+            code.put_giveref(classobj_cname)
+        code.put_finish_refcount_context()
+        code.putln('return (PyObject *) gen;')
+        code.putln('}')
+
+    def generate_function_definitions(self, env, code):
         env.use_utility_code(UtilityCode.load_cached(self.gen_type_name, "Coroutine.c"))
-        self.gbody.generate_function_header(code, proto=True) 
-        super(GeneratorDefNode, self).generate_function_definitions(env, code) 
-        self.gbody.generate_function_definitions(env, code) 
- 
- 
+        self.gbody.generate_function_header(code, proto=True)
+        super(GeneratorDefNode, self).generate_function_definitions(env, code)
+        self.gbody.generate_function_definitions(env, code)
+
+
 class AsyncDefNode(GeneratorDefNode):
     gen_type_name = 'Coroutine'
     is_coroutine = True
@@ -4134,71 +4134,71 @@ class AsyncGenNode(AsyncDefNode):
     is_asyncgen = True
 
 
-class GeneratorBodyDefNode(DefNode): 
-    # Main code body of a generator implemented as a DefNode. 
-    # 
- 
-    is_generator_body = True 
+class GeneratorBodyDefNode(DefNode):
+    # Main code body of a generator implemented as a DefNode.
+    #
+
+    is_generator_body = True
     is_inlined = False
     is_async_gen_body = False
     inlined_comprehension_type = None  # container type for inlined comprehensions
- 
+
     def __init__(self, pos=None, name=None, body=None, is_async_gen_body=False):
-        super(GeneratorBodyDefNode, self).__init__( 
+        super(GeneratorBodyDefNode, self).__init__(
             pos=pos, body=body, name=name, is_async_gen_body=is_async_gen_body,
             doc=None, args=[], star_arg=None, starstar_arg=None)
- 
-    def declare_generator_body(self, env): 
-        prefix = env.next_id(env.scope_prefix) 
-        name = env.next_id('generator') 
-        cname = Naming.genbody_prefix + prefix + name 
-        entry = env.declare_var(None, py_object_type, self.pos, 
-                                cname=cname, visibility='private') 
-        entry.func_cname = cname 
-        entry.qualified_name = EncodedString(self.name) 
+
+    def declare_generator_body(self, env):
+        prefix = env.next_id(env.scope_prefix)
+        name = env.next_id('generator')
+        cname = Naming.genbody_prefix + prefix + name
+        entry = env.declare_var(None, py_object_type, self.pos,
+                                cname=cname, visibility='private')
+        entry.func_cname = cname
+        entry.qualified_name = EncodedString(self.name)
         # Work-around for https://github.com/cython/cython/issues/1699
         # We don't currently determine whether the generator entry is used or not,
         # so mark it as used to avoid false warnings.
         entry.used = True
-        self.entry = entry 
- 
-    def analyse_declarations(self, env): 
-        self.analyse_argument_types(env) 
-        self.declare_generator_body(env) 
- 
-    def generate_function_header(self, code, proto=False): 
+        self.entry = entry
+
+    def analyse_declarations(self, env):
+        self.analyse_argument_types(env)
+        self.declare_generator_body(env)
+
+    def generate_function_header(self, code, proto=False):
         header = "static PyObject *%s(PyObject *%s_obj, CYTHON_UNUSED PyThreadState *%s, PyObject *%s)" % (
-            self.entry.func_cname, 
-            Naming.generator_cname, 
+            self.entry.func_cname,
+            Naming.generator_cname,
             Naming.local_tstate_cname,
-            Naming.sent_value_cname) 
-        if proto: 
-            code.putln('%s; /* proto */' % header) 
-        else: 
-            code.putln('%s /* generator body */\n{' % header) 
- 
-    def generate_function_definitions(self, env, code): 
-        lenv = self.local_scope 
- 
-        # Generate closure function definitions 
-        self.body.generate_function_definitions(lenv, code) 
- 
-        # Generate C code for header and body of function 
+            Naming.sent_value_cname)
+        if proto:
+            code.putln('%s; /* proto */' % header)
+        else:
+            code.putln('%s /* generator body */\n{' % header)
+
+    def generate_function_definitions(self, env, code):
+        lenv = self.local_scope
+
+        # Generate closure function definitions
+        self.body.generate_function_definitions(lenv, code)
+
+        # Generate C code for header and body of function
         code.enter_cfunc_scope(lenv)
-        code.return_from_error_cleanup_label = code.new_label() 
- 
-        # ----- Top-level constants used by this function 
-        code.mark_pos(self.pos) 
-        self.generate_cached_builtins_decls(lenv, code) 
-        # ----- Function header 
-        code.putln("") 
-        self.generate_function_header(code) 
+        code.return_from_error_cleanup_label = code.new_label()
+
+        # ----- Top-level constants used by this function
+        code.mark_pos(self.pos)
+        self.generate_cached_builtins_decls(lenv, code)
+        # ----- Function header
+        code.putln("")
+        self.generate_function_header(code)
         code.putln("__pyx_CoroutineObject *%s = (__pyx_CoroutineObject *)%s_obj;" % (Naming.generator_cname, Naming.generator_cname))
-        closure_init_code = code.insertion_point() 
-        # ----- Local variables 
-        code.putln("PyObject *%s = NULL;" % Naming.retval_cname) 
-        tempvardecl_code = code.insertion_point() 
-        code.put_declare_refcount_context() 
+        closure_init_code = code.insertion_point()
+        # ----- Local variables
+        code.putln("PyObject *%s = NULL;" % Naming.retval_cname)
+        tempvardecl_code = code.insertion_point()
+        code.put_declare_refcount_context()
         code.put_setup_refcount_context(self.entry.name or self.entry.qualified_name)
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
@@ -4207,16 +4207,16 @@ class GeneratorBodyDefNode(DefNode):
             code.funcstate.can_trace = True
             code_object = self.code_object.calculate_result_code(code) if self.code_object else None
             code.put_trace_frame_init(code_object)
- 
-        # ----- Resume switch point. 
-        code.funcstate.init_closure_temps(lenv.scope_class.type.scope) 
-        resume_code = code.insertion_point() 
-        first_run_label = code.new_label('first_run') 
-        code.use_label(first_run_label) 
-        code.put_label(first_run_label) 
-        code.putln('%s' % 
-                   (code.error_goto_if_null(Naming.sent_value_cname, self.pos))) 
- 
+
+        # ----- Resume switch point.
+        code.funcstate.init_closure_temps(lenv.scope_class.type.scope)
+        resume_code = code.insertion_point()
+        first_run_label = code.new_label('first_run')
+        code.use_label(first_run_label)
+        code.put_label(first_run_label)
+        code.putln('%s' %
+                   (code.error_goto_if_null(Naming.sent_value_cname, self.pos)))
+
         # ----- prepare target container for inlined comprehension
         if self.is_inlined and self.inlined_comprehension_type is not None:
             target_type = self.inlined_comprehension_type
@@ -4234,49 +4234,49 @@ class GeneratorBodyDefNode(DefNode):
                 code.error_goto_if_null(Naming.retval_cname, self.pos)))
             code.put_gotref(Naming.retval_cname)
 
-        # ----- Function body 
-        self.generate_function_body(env, code) 
-        # ----- Closure initialization 
+        # ----- Function body
+        self.generate_function_body(env, code)
+        # ----- Closure initialization
         if lenv.scope_class.type.scope.var_entries:
-            closure_init_code.putln('%s = %s;' % ( 
-                lenv.scope_class.type.declaration_code(Naming.cur_scope_cname), 
-                lenv.scope_class.type.cast_code('%s->closure' % 
-                                                Naming.generator_cname))) 
+            closure_init_code.putln('%s = %s;' % (
+                lenv.scope_class.type.declaration_code(Naming.cur_scope_cname),
+                lenv.scope_class.type.cast_code('%s->closure' %
+                                                Naming.generator_cname)))
             # FIXME: this silences a potential "unused" warning => try to avoid unused closures in more cases
             code.putln("CYTHON_MAYBE_UNUSED_VAR(%s);" % Naming.cur_scope_cname)
- 
+
         if profile or linetrace:
             code.funcstate.can_trace = False
 
-        code.mark_pos(self.pos) 
-        code.putln("") 
-        code.putln("/* function exit code */") 
- 
-        # on normal generator termination, we do not take the exception propagation 
-        # path: no traceback info is required and not creating it is much faster 
+        code.mark_pos(self.pos)
+        code.putln("")
+        code.putln("/* function exit code */")
+
+        # on normal generator termination, we do not take the exception propagation
+        # path: no traceback info is required and not creating it is much faster
         if not self.is_inlined and not self.body.is_terminator:
             if self.is_async_gen_body:
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached("StopAsyncIteration", "Coroutine.c"))
             code.putln('PyErr_SetNone(%s);' % (
                 '__Pyx_PyExc_StopAsyncIteration' if self.is_async_gen_body else 'PyExc_StopIteration'))
-        # ----- Error cleanup 
+        # ----- Error cleanup
         if code.label_used(code.error_label):
-            if not self.body.is_terminator: 
-                code.put_goto(code.return_label) 
-            code.put_label(code.error_label) 
+            if not self.body.is_terminator:
+                code.put_goto(code.return_label)
+            code.put_label(code.error_label)
             if self.is_inlined and self.inlined_comprehension_type is not None:
                 code.put_xdecref_clear(Naming.retval_cname, py_object_type)
             if Future.generator_stop in env.global_scope().context.future_directives:
                 # PEP 479: turn accidental StopIteration exceptions into a RuntimeError
                 code.globalstate.use_utility_code(UtilityCode.load_cached("pep479", "Coroutine.c"))
                 code.putln("__Pyx_Generator_Replace_StopIteration(%d);" % bool(self.is_async_gen_body))
-            for cname, type in code.funcstate.all_managed_temps(): 
-                code.put_xdecref(cname, type) 
-            code.put_add_traceback(self.entry.qualified_name) 
- 
-        # ----- Non-error return cleanup 
-        code.put_label(code.return_label) 
+            for cname, type in code.funcstate.all_managed_temps():
+                code.put_xdecref(cname, type)
+            code.put_add_traceback(self.entry.qualified_name)
+
+        # ----- Non-error return cleanup
+        code.put_label(code.return_label)
         if self.is_inlined:
             code.put_xgiveref(Naming.retval_cname)
         else:
@@ -4285,64 +4285,64 @@ class GeneratorBodyDefNode(DefNode):
         code.putln("#if !CYTHON_USE_EXC_INFO_STACK")
         code.putln("__Pyx_Coroutine_ResetAndClearException(%s);" % Naming.generator_cname)
         code.putln("#endif")
-        code.putln('%s->resume_label = -1;' % Naming.generator_cname) 
-        # clean up as early as possible to help breaking any reference cycles 
+        code.putln('%s->resume_label = -1;' % Naming.generator_cname)
+        # clean up as early as possible to help breaking any reference cycles
         code.putln('__Pyx_Coroutine_clear((PyObject*)%s);' % Naming.generator_cname)
         if profile or linetrace:
             code.put_trace_return(Naming.retval_cname,
                                   nogil=not code.funcstate.gil_owned)
-        code.put_finish_refcount_context() 
+        code.put_finish_refcount_context()
         code.putln("return %s;" % Naming.retval_cname)
-        code.putln("}") 
- 
-        # ----- Go back and insert temp variable declarations 
-        tempvardecl_code.put_temp_declarations(code.funcstate) 
-        # ----- Generator resume code 
+        code.putln("}")
+
+        # ----- Go back and insert temp variable declarations
+        tempvardecl_code.put_temp_declarations(code.funcstate)
+        # ----- Generator resume code
         if profile or linetrace:
             resume_code.put_trace_call(self.entry.qualified_name, self.pos,
                                        nogil=not code.funcstate.gil_owned)
-        resume_code.putln("switch (%s->resume_label) {" % ( 
-                       Naming.generator_cname)) 
-
-        resume_code.putln("case 0: goto %s;" % first_run_label) 
- 
-        for i, label in code.yield_labels: 
-            resume_code.putln("case %d: goto %s;" % (i, label)) 
-        resume_code.putln("default: /* CPython raises the right error here */") 
+        resume_code.putln("switch (%s->resume_label) {" % (
+                       Naming.generator_cname))
+
+        resume_code.putln("case 0: goto %s;" % first_run_label)
+
+        for i, label in code.yield_labels:
+            resume_code.putln("case %d: goto %s;" % (i, label))
+        resume_code.putln("default: /* CPython raises the right error here */")
         if profile or linetrace:
             resume_code.put_trace_return("Py_None",
                                          nogil=not code.funcstate.gil_owned)
-        resume_code.put_finish_refcount_context() 
-        resume_code.putln("return NULL;") 
-        resume_code.putln("}") 
- 
-        code.exit_cfunc_scope() 
- 
- 
-class OverrideCheckNode(StatNode): 
-    # A Node for dispatching to the def method if it 
+        resume_code.put_finish_refcount_context()
+        resume_code.putln("return NULL;")
+        resume_code.putln("}")
+
+        code.exit_cfunc_scope()
+
+
+class OverrideCheckNode(StatNode):
+    # A Node for dispatching to the def method if it
     # is overridden.
-    # 
-    #  py_func 
-    # 
-    #  args 
-    #  func_temp 
-    #  body 
- 
-    child_attrs = ['body'] 
- 
-    body = None 
- 
-    def analyse_expressions(self, env): 
-        self.args = env.arg_entries 
-        if self.py_func.is_module_scope: 
-            first_arg = 0 
-        else: 
-            first_arg = 1 
-        from . import ExprNodes 
-        self.func_node = ExprNodes.RawCNameExprNode(self.pos, py_object_type) 
-        call_node = ExprNodes.SimpleCallNode( 
-            self.pos, function=self.func_node, 
+    #
+    #  py_func
+    #
+    #  args
+    #  func_temp
+    #  body
+
+    child_attrs = ['body']
+
+    body = None
+
+    def analyse_expressions(self, env):
+        self.args = env.arg_entries
+        if self.py_func.is_module_scope:
+            first_arg = 0
+        else:
+            first_arg = 1
+        from . import ExprNodes
+        self.func_node = ExprNodes.RawCNameExprNode(self.pos, py_object_type)
+        call_node = ExprNodes.SimpleCallNode(
+            self.pos, function=self.func_node,
             args=[ExprNodes.NameNode(self.pos, name=arg.name)
                   for arg in self.args[first_arg:]])
         if env.return_type.is_void or env.return_type.is_returncode:
@@ -4351,22 +4351,22 @@ class OverrideCheckNode(StatNode):
                 ReturnStatNode(self.pos, value=None)])
         else:
             self.body = ReturnStatNode(self.pos, value=call_node)
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        interned_attr_cname = code.intern_identifier(self.py_func.entry.name) 
-        # Check to see if we are an extension type 
-        if self.py_func.is_module_scope: 
-            self_arg = "((PyObject *)%s)" % Naming.module_cname 
-        else: 
-            self_arg = "((PyObject *)%s)" % self.args[0].cname 
-        code.putln("/* Check if called by wrapper */") 
-        code.putln("if (unlikely(%s)) ;" % Naming.skip_dispatch_cname) 
-        code.putln("/* Check if overridden in Python */") 
-        if self.py_func.is_module_scope: 
-            code.putln("else {") 
-        else: 
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_execution_code(self, code):
+        interned_attr_cname = code.intern_identifier(self.py_func.entry.name)
+        # Check to see if we are an extension type
+        if self.py_func.is_module_scope:
+            self_arg = "((PyObject *)%s)" % Naming.module_cname
+        else:
+            self_arg = "((PyObject *)%s)" % self.args[0].cname
+        code.putln("/* Check if called by wrapper */")
+        code.putln("if (unlikely(%s)) ;" % Naming.skip_dispatch_cname)
+        code.putln("/* Check if overridden in Python */")
+        if self.py_func.is_module_scope:
+            code.putln("else {")
+        else:
             code.putln("else if (unlikely((Py_TYPE(%s)->tp_dictoffset != 0)"
                        " || (Py_TYPE(%s)->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) {" % (
                 self_arg, self_arg))
@@ -4386,22 +4386,22 @@ class OverrideCheckNode(StatNode):
             Naming.type_dict_guard_temp, self_arg))
         code.putln("#endif")
 
-        func_node_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-        self.func_node.set_cname(func_node_temp) 
-        # need to get attribute manually--scope would return cdef method 
-        code.globalstate.use_utility_code( 
-            UtilityCode.load_cached("PyObjectGetAttrStr", "ObjectHandling.c")) 
-        err = code.error_goto_if_null(func_node_temp, self.pos) 
-        code.putln("%s = __Pyx_PyObject_GetAttrStr(%s, %s); %s" % ( 
-            func_node_temp, self_arg, interned_attr_cname, err)) 
-        code.put_gotref(func_node_temp) 
-
-        is_builtin_function_or_method = "PyCFunction_Check(%s)" % func_node_temp 
+        func_node_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        self.func_node.set_cname(func_node_temp)
+        # need to get attribute manually--scope would return cdef method
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("PyObjectGetAttrStr", "ObjectHandling.c"))
+        err = code.error_goto_if_null(func_node_temp, self.pos)
+        code.putln("%s = __Pyx_PyObject_GetAttrStr(%s, %s); %s" % (
+            func_node_temp, self_arg, interned_attr_cname, err))
+        code.put_gotref(func_node_temp)
+
+        is_builtin_function_or_method = "PyCFunction_Check(%s)" % func_node_temp
         is_overridden = "(PyCFunction_GET_FUNCTION(%s) != (PyCFunction)(void*)%s)" % (
-            func_node_temp, self.py_func.entry.func_cname) 
-        code.putln("if (!%s || %s) {" % (is_builtin_function_or_method, is_overridden)) 
-        self.body.generate_execution_code(code) 
-        code.putln("}") 
+            func_node_temp, self.py_func.entry.func_cname)
+        code.putln("if (!%s || %s) {" % (is_builtin_function_or_method, is_overridden))
+        self.body.generate_execution_code(code)
+        code.putln("}")
 
         # NOTE: it's not 100% sure that we catch the exact versions here that were used for the lookup,
         # but it is very unlikely that the versions change during lookup, and the type dict safe guard
@@ -4423,63 +4423,63 @@ class OverrideCheckNode(StatNode):
         code.putln("}")
         code.putln("#endif")
 
-        code.put_decref_clear(func_node_temp, PyrexTypes.py_object_type) 
-        code.funcstate.release_temp(func_node_temp) 
+        code.put_decref_clear(func_node_temp, PyrexTypes.py_object_type)
+        code.funcstate.release_temp(func_node_temp)
 
         code.putln("#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS")
-        code.putln("}") 
+        code.putln("}")
         code.putln("#endif")
- 
+
         code.putln("}")
 
 
-class ClassDefNode(StatNode, BlockNode): 
-    pass 
- 
-
-class PyClassDefNode(ClassDefNode): 
-    #  A Python class definition. 
-    # 
-    #  name     EncodedString   Name of the class 
-    #  doc      string or None 
-    #  body     StatNode        Attribute definition code 
-    #  entry    Symtab.Entry 
-    #  scope    PyClassScope 
-    #  decorators    [DecoratorNode]        list of decorators or None 
-    # 
-    #  The following subnodes are constructed internally: 
-    # 
-    #  dict     DictNode   Class dictionary or Py3 namespace 
-    #  classobj ClassNode  Class object 
-    #  target   NameNode   Variable to assign class object to 
- 
-    child_attrs = ["body", "dict", "metaclass", "mkw", "bases", "class_result", 
-                   "target", "class_cell", "decorators"] 
-    decorators = None 
-    class_result = None 
-    is_py3_style_class = False  # Python3 style class (kwargs) 
-    metaclass = None 
-    mkw = None 
- 
-    def __init__(self, pos, name, bases, doc, body, decorators=None, 
+class ClassDefNode(StatNode, BlockNode):
+    pass
+
+
+class PyClassDefNode(ClassDefNode):
+    #  A Python class definition.
+    #
+    #  name     EncodedString   Name of the class
+    #  doc      string or None
+    #  body     StatNode        Attribute definition code
+    #  entry    Symtab.Entry
+    #  scope    PyClassScope
+    #  decorators    [DecoratorNode]        list of decorators or None
+    #
+    #  The following subnodes are constructed internally:
+    #
+    #  dict     DictNode   Class dictionary or Py3 namespace
+    #  classobj ClassNode  Class object
+    #  target   NameNode   Variable to assign class object to
+
+    child_attrs = ["body", "dict", "metaclass", "mkw", "bases", "class_result",
+                   "target", "class_cell", "decorators"]
+    decorators = None
+    class_result = None
+    is_py3_style_class = False  # Python3 style class (kwargs)
+    metaclass = None
+    mkw = None
+
+    def __init__(self, pos, name, bases, doc, body, decorators=None,
                  keyword_args=None, force_py3_semantics=False):
-        StatNode.__init__(self, pos) 
-        self.name = name 
-        self.doc = doc 
-        self.body = body 
-        self.decorators = decorators 
-        self.bases = bases 
-        from . import ExprNodes 
-        if self.doc and Options.docstrings: 
-            doc = embed_position(self.pos, self.doc) 
-            doc_node = ExprNodes.StringNode(pos, value=doc) 
-        else: 
-            doc_node = None 
- 
-        allow_py2_metaclass = not force_py3_semantics 
+        StatNode.__init__(self, pos)
+        self.name = name
+        self.doc = doc
+        self.body = body
+        self.decorators = decorators
+        self.bases = bases
+        from . import ExprNodes
+        if self.doc and Options.docstrings:
+            doc = embed_position(self.pos, self.doc)
+            doc_node = ExprNodes.StringNode(pos, value=doc)
+        else:
+            doc_node = None
+
+        allow_py2_metaclass = not force_py3_semantics
         if keyword_args:
-            allow_py2_metaclass = False 
-            self.is_py3_style_class = True 
+            allow_py2_metaclass = False
+            self.is_py3_style_class = True
             if keyword_args.is_dict_literal:
                 if keyword_args.key_value_pairs:
                     for i, item in list(enumerate(keyword_args.key_value_pairs))[::-1]:
@@ -4494,53 +4494,53 @@ class PyClassDefNode(ClassDefNode):
                     self.mkw = keyword_args
                 else:
                     assert self.metaclass is not None
-            else: 
+            else:
                 # MergedDictNode
                 self.mkw = ExprNodes.ProxyNode(keyword_args)
- 
-        if force_py3_semantics or self.bases or self.mkw or self.metaclass: 
-            if self.metaclass is None: 
+
+        if force_py3_semantics or self.bases or self.mkw or self.metaclass:
+            if self.metaclass is None:
                 if keyword_args and not keyword_args.is_dict_literal:
-                    # **kwargs may contain 'metaclass' arg 
-                    mkdict = self.mkw 
-                else: 
-                    mkdict = None 
-                if (not mkdict and 
-                        self.bases.is_sequence_constructor and 
-                        not self.bases.args): 
-                    pass  # no base classes => no inherited metaclass 
-                else: 
-                    self.metaclass = ExprNodes.PyClassMetaclassNode( 
+                    # **kwargs may contain 'metaclass' arg
+                    mkdict = self.mkw
+                else:
+                    mkdict = None
+                if (not mkdict and
+                        self.bases.is_sequence_constructor and
+                        not self.bases.args):
+                    pass  # no base classes => no inherited metaclass
+                else:
+                    self.metaclass = ExprNodes.PyClassMetaclassNode(
                         pos, class_def_node=self)
-                needs_metaclass_calculation = False 
-            else: 
-                needs_metaclass_calculation = True 
- 
-            self.dict = ExprNodes.PyClassNamespaceNode( 
+                needs_metaclass_calculation = False
+            else:
+                needs_metaclass_calculation = True
+
+            self.dict = ExprNodes.PyClassNamespaceNode(
                 pos, name=name, doc=doc_node, class_def_node=self)
-            self.classobj = ExprNodes.Py3ClassNode( 
+            self.classobj = ExprNodes.Py3ClassNode(
                 pos, name=name, class_def_node=self, doc=doc_node,
-                calculate_metaclass=needs_metaclass_calculation, 
-                allow_py2_metaclass=allow_py2_metaclass) 
-        else: 
-            # no bases, no metaclass => old style class creation 
-            self.dict = ExprNodes.DictNode(pos, key_value_pairs=[]) 
-            self.classobj = ExprNodes.ClassNode( 
+                calculate_metaclass=needs_metaclass_calculation,
+                allow_py2_metaclass=allow_py2_metaclass)
+        else:
+            # no bases, no metaclass => old style class creation
+            self.dict = ExprNodes.DictNode(pos, key_value_pairs=[])
+            self.classobj = ExprNodes.ClassNode(
                 pos, name=name, class_def_node=self, doc=doc_node)
- 
-        self.target = ExprNodes.NameNode(pos, name=name) 
-        self.class_cell = ExprNodes.ClassCellInjectorNode(self.pos) 
- 
-    def as_cclass(self): 
-        """ 
-        Return this node as if it were declared as an extension class 
-        """ 
-        if self.is_py3_style_class: 
-            error(self.classobj.pos, "Python3 style class could not be represented as C class") 
-            return 
- 
+
+        self.target = ExprNodes.NameNode(pos, name=name)
+        self.class_cell = ExprNodes.ClassCellInjectorNode(self.pos)
+
+    def as_cclass(self):
+        """
+        Return this node as if it were declared as an extension class
+        """
+        if self.is_py3_style_class:
+            error(self.classobj.pos, "Python3 style class could not be represented as C class")
+            return
+
         from . import ExprNodes
-        return CClassDefNode(self.pos, 
+        return CClassDefNode(self.pos,
                              visibility='private',
                              module_name=None,
                              class_name=self.name,
@@ -4549,67 +4549,67 @@ class PyClassDefNode(ClassDefNode):
                              body=self.body,
                              in_pxd=False,
                              doc=self.doc)
- 
-    def create_scope(self, env): 
-        genv = env 
-        while genv.is_py_class_scope or genv.is_c_class_scope: 
-            genv = genv.outer_scope 
+
+    def create_scope(self, env):
+        genv = env
+        while genv.is_py_class_scope or genv.is_c_class_scope:
+            genv = genv.outer_scope
         cenv = self.scope = PyClassScope(name=self.name, outer_scope=genv)
-        return cenv 
- 
-    def analyse_declarations(self, env): 
-        class_result = self.classobj 
-        if self.decorators: 
-            from .ExprNodes import SimpleCallNode 
-            for decorator in self.decorators[::-1]: 
-                class_result = SimpleCallNode( 
-                    decorator.pos, 
+        return cenv
+
+    def analyse_declarations(self, env):
+        class_result = self.classobj
+        if self.decorators:
+            from .ExprNodes import SimpleCallNode
+            for decorator in self.decorators[::-1]:
+                class_result = SimpleCallNode(
+                    decorator.pos,
                     function=decorator.decorator,
                     args=[class_result])
-            self.decorators = None 
-        self.class_result = class_result 
+            self.decorators = None
+        self.class_result = class_result
         if self.bases:
             self.bases.analyse_declarations(env)
         if self.mkw:
             self.mkw.analyse_declarations(env)
-        self.class_result.analyse_declarations(env) 
-        self.target.analyse_target_declaration(env) 
-        cenv = self.create_scope(env) 
-        cenv.directives = env.directives 
-        cenv.class_obj_cname = self.target.entry.cname 
-        self.body.analyse_declarations(cenv) 
- 
-    def analyse_expressions(self, env): 
-        if self.bases: 
-            self.bases = self.bases.analyse_expressions(env) 
+        self.class_result.analyse_declarations(env)
+        self.target.analyse_target_declaration(env)
+        cenv = self.create_scope(env)
+        cenv.directives = env.directives
+        cenv.class_obj_cname = self.target.entry.cname
+        self.body.analyse_declarations(cenv)
+
+    def analyse_expressions(self, env):
+        if self.bases:
+            self.bases = self.bases.analyse_expressions(env)
         if self.mkw:
             self.mkw = self.mkw.analyse_expressions(env)
-        if self.metaclass: 
-            self.metaclass = self.metaclass.analyse_expressions(env) 
-        self.dict = self.dict.analyse_expressions(env) 
-        self.class_result = self.class_result.analyse_expressions(env) 
-        cenv = self.scope 
-        self.body = self.body.analyse_expressions(cenv) 
-        self.target.analyse_target_expression(env, self.classobj) 
-        self.class_cell = self.class_cell.analyse_expressions(cenv) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        self.generate_lambda_definitions(self.scope, code) 
-        self.body.generate_function_definitions(self.scope, code) 
- 
-    def generate_execution_code(self, code): 
+        if self.metaclass:
+            self.metaclass = self.metaclass.analyse_expressions(env)
+        self.dict = self.dict.analyse_expressions(env)
+        self.class_result = self.class_result.analyse_expressions(env)
+        cenv = self.scope
+        self.body = self.body.analyse_expressions(cenv)
+        self.target.analyse_target_expression(env, self.classobj)
+        self.class_cell = self.class_cell.analyse_expressions(cenv)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.generate_lambda_definitions(self.scope, code)
+        self.body.generate_function_definitions(self.scope, code)
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        code.pyclass_stack.append(self) 
-        cenv = self.scope 
-        if self.bases: 
-            self.bases.generate_evaluation_code(code) 
-        if self.mkw: 
-            self.mkw.generate_evaluation_code(code) 
-        if self.metaclass: 
-            self.metaclass.generate_evaluation_code(code) 
-        self.dict.generate_evaluation_code(code) 
-        cenv.namespace_cname = cenv.class_obj_cname = self.dict.result() 
+        code.pyclass_stack.append(self)
+        cenv = self.scope
+        if self.bases:
+            self.bases.generate_evaluation_code(code)
+        if self.mkw:
+            self.mkw.generate_evaluation_code(code)
+        if self.metaclass:
+            self.metaclass.generate_evaluation_code(code)
+        self.dict.generate_evaluation_code(code)
+        cenv.namespace_cname = cenv.class_obj_cname = self.dict.result()
 
         class_cell = self.class_cell
         if class_cell is not None and not class_cell.is_active:
@@ -4617,8 +4617,8 @@ class PyClassDefNode(ClassDefNode):
 
         if class_cell is not None:
             class_cell.generate_evaluation_code(code)
-        self.body.generate_execution_code(code) 
-        self.class_result.generate_evaluation_code(code) 
+        self.body.generate_execution_code(code)
+        self.class_result.generate_evaluation_code(code)
         if class_cell is not None:
             class_cell.generate_injection_code(
                 code, self.class_result.result())
@@ -4626,77 +4626,77 @@ class PyClassDefNode(ClassDefNode):
             class_cell.generate_disposal_code(code)
             class_cell.free_temps(code)
 
-        cenv.namespace_cname = cenv.class_obj_cname = self.classobj.result() 
-        self.target.generate_assignment_code(self.class_result, code) 
-        self.dict.generate_disposal_code(code) 
-        self.dict.free_temps(code) 
-        if self.metaclass: 
-            self.metaclass.generate_disposal_code(code) 
-            self.metaclass.free_temps(code) 
-        if self.mkw: 
-            self.mkw.generate_disposal_code(code) 
-            self.mkw.free_temps(code) 
-        if self.bases: 
-            self.bases.generate_disposal_code(code) 
-            self.bases.free_temps(code) 
-        code.pyclass_stack.pop() 
- 
-
-class CClassDefNode(ClassDefNode): 
-    #  An extension type definition. 
-    # 
-    #  visibility         'private' or 'public' or 'extern' 
-    #  typedef_flag       boolean 
-    #  api                boolean 
-    #  module_name        string or None    For import of extern type objects 
-    #  class_name         string            Unqualified name of class 
-    #  as_name            string or None    Name to declare as in this scope 
+        cenv.namespace_cname = cenv.class_obj_cname = self.classobj.result()
+        self.target.generate_assignment_code(self.class_result, code)
+        self.dict.generate_disposal_code(code)
+        self.dict.free_temps(code)
+        if self.metaclass:
+            self.metaclass.generate_disposal_code(code)
+            self.metaclass.free_temps(code)
+        if self.mkw:
+            self.mkw.generate_disposal_code(code)
+            self.mkw.free_temps(code)
+        if self.bases:
+            self.bases.generate_disposal_code(code)
+            self.bases.free_temps(code)
+        code.pyclass_stack.pop()
+
+
+class CClassDefNode(ClassDefNode):
+    #  An extension type definition.
+    #
+    #  visibility         'private' or 'public' or 'extern'
+    #  typedef_flag       boolean
+    #  api                boolean
+    #  module_name        string or None    For import of extern type objects
+    #  class_name         string            Unqualified name of class
+    #  as_name            string or None    Name to declare as in this scope
     #  bases              TupleNode         Base class(es)
-    #  objstruct_name     string or None    Specified C name of object struct 
-    #  typeobj_name       string or None    Specified C name of type object 
+    #  objstruct_name     string or None    Specified C name of object struct
+    #  typeobj_name       string or None    Specified C name of type object
     #  check_size         'warn', 'error', 'ignore'     What to do if tp_basicsize does not match
-    #  in_pxd             boolean           Is in a .pxd file 
-    #  decorators         [DecoratorNode]   list of decorators or None 
-    #  doc                string or None 
-    #  body               StatNode or None 
-    #  entry              Symtab.Entry 
-    #  base_type          PyExtensionType or None 
-    #  buffer_defaults_node DictNode or None Declares defaults for a buffer 
-    #  buffer_defaults_pos 
- 
-    child_attrs = ["body"] 
-    buffer_defaults_node = None 
-    buffer_defaults_pos = None 
-    typedef_flag = False 
-    api = False 
-    objstruct_name = None 
-    typeobj_name = None 
+    #  in_pxd             boolean           Is in a .pxd file
+    #  decorators         [DecoratorNode]   list of decorators or None
+    #  doc                string or None
+    #  body               StatNode or None
+    #  entry              Symtab.Entry
+    #  base_type          PyExtensionType or None
+    #  buffer_defaults_node DictNode or None Declares defaults for a buffer
+    #  buffer_defaults_pos
+
+    child_attrs = ["body"]
+    buffer_defaults_node = None
+    buffer_defaults_pos = None
+    typedef_flag = False
+    api = False
+    objstruct_name = None
+    typeobj_name = None
     check_size = None
-    decorators = None 
-    shadow = False 
- 
-    def buffer_defaults(self, env): 
-        if not hasattr(self, '_buffer_defaults'): 
-            from . import Buffer 
-            if self.buffer_defaults_node: 
-                self._buffer_defaults = Buffer.analyse_buffer_options( 
-                    self.buffer_defaults_pos, 
-                    env, [], self.buffer_defaults_node, 
-                    need_complete=False) 
-            else: 
-                self._buffer_defaults = None 
-        return self._buffer_defaults 
- 
-    def declare(self, env): 
-        if self.module_name and self.visibility != 'extern': 
-            module_path = self.module_name.split(".") 
-            home_scope = env.find_imported_module(module_path, self.pos) 
-            if not home_scope: 
-                return None 
-        else: 
-            home_scope = env 
- 
-        self.entry = home_scope.declare_c_class( 
+    decorators = None
+    shadow = False
+
+    def buffer_defaults(self, env):
+        if not hasattr(self, '_buffer_defaults'):
+            from . import Buffer
+            if self.buffer_defaults_node:
+                self._buffer_defaults = Buffer.analyse_buffer_options(
+                    self.buffer_defaults_pos,
+                    env, [], self.buffer_defaults_node,
+                    need_complete=False)
+            else:
+                self._buffer_defaults = None
+        return self._buffer_defaults
+
+    def declare(self, env):
+        if self.module_name and self.visibility != 'extern':
+            module_path = self.module_name.split(".")
+            home_scope = env.find_imported_module(module_path, self.pos)
+            if not home_scope:
+                return None
+        else:
+            home_scope = env
+
+        self.entry = home_scope.declare_c_class(
             name=self.class_name,
             pos=self.pos,
             defining=0,
@@ -4711,29 +4711,29 @@ class CClassDefNode(ClassDefNode):
             api=self.api,
             buffer_defaults=self.buffer_defaults(env),
             shadow=self.shadow)
- 
-    def analyse_declarations(self, env): 
-        #print "CClassDefNode.analyse_declarations:", self.class_name 
-        #print "...visibility =", self.visibility 
-        #print "...module_name =", self.module_name 
- 
-        if env.in_cinclude and not self.objstruct_name: 
+
+    def analyse_declarations(self, env):
+        #print "CClassDefNode.analyse_declarations:", self.class_name
+        #print "...visibility =", self.visibility
+        #print "...module_name =", self.module_name
+
+        if env.in_cinclude and not self.objstruct_name:
             error(self.pos, "Object struct name specification required for C class defined in 'extern from' block")
-        if self.decorators: 
+        if self.decorators:
             error(self.pos, "Decorators not allowed on cdef classes (used on type '%s')" % self.class_name)
-        self.base_type = None 
-        # Now that module imports are cached, we need to 
-        # import the modules for extern classes. 
-        if self.module_name: 
-            self.module = None 
-            for module in env.cimported_modules: 
-                if module.name == self.module_name: 
-                    self.module = module 
-            if self.module is None: 
-                self.module = ModuleScope(self.module_name, None, env.context) 
-                self.module.has_extern_class = 1 
-                env.add_imported_module(self.module) 
- 
+        self.base_type = None
+        # Now that module imports are cached, we need to
+        # import the modules for extern classes.
+        if self.module_name:
+            self.module = None
+            for module in env.cimported_modules:
+                if module.name == self.module_name:
+                    self.module = module
+            if self.module is None:
+                self.module = ModuleScope(self.module_name, None, env.context)
+                self.module.has_extern_class = 1
+                env.add_imported_module(self.module)
+
         if self.bases.args:
             base = self.bases.args[0]
             base_type = base.analyse_as_type(env)
@@ -4758,33 +4758,33 @@ class CClassDefNode(ClassDefNode):
                      base_type.name in ('tuple', 'str', 'bytes'):
                 error(base.pos, "inheritance from PyVarObject types like '%s' is not currently supported"
                       % base_type.name)
-            else: 
+            else:
                 self.base_type = base_type
             if env.directives.get('freelist', 0) > 0 and base_type != PyrexTypes.py_object_type:
                 warning(self.pos, "freelists cannot be used on subtypes, only the base class can manage them", 1)
- 
-        has_body = self.body is not None 
-        if has_body and self.base_type and not self.base_type.scope: 
-            # To properly initialize inherited attributes, the base type must 
-            # be analysed before this type. 
-            self.base_type.defered_declarations.append(lambda : self.analyse_declarations(env)) 
-            return 
- 
-        if self.module_name and self.visibility != 'extern': 
-            module_path = self.module_name.split(".") 
-            home_scope = env.find_imported_module(module_path, self.pos) 
-            if not home_scope: 
-                return 
-        else: 
-            home_scope = env 
- 
-        if self.visibility == 'extern': 
-            if (self.module_name == '__builtin__' and 
+
+        has_body = self.body is not None
+        if has_body and self.base_type and not self.base_type.scope:
+            # To properly initialize inherited attributes, the base type must
+            # be analysed before this type.
+            self.base_type.defered_declarations.append(lambda : self.analyse_declarations(env))
+            return
+
+        if self.module_name and self.visibility != 'extern':
+            module_path = self.module_name.split(".")
+            home_scope = env.find_imported_module(module_path, self.pos)
+            if not home_scope:
+                return
+        else:
+            home_scope = env
+
+        if self.visibility == 'extern':
+            if (self.module_name == '__builtin__' and
                     self.class_name in Builtin.builtin_types and
                     env.qualified_name[:8] != 'cpython.'): # allow overloaded names for cimporting from cpython
-                warning(self.pos, "%s already a builtin Cython type" % self.class_name, 1) 
- 
-        self.entry = home_scope.declare_c_class( 
+                warning(self.pos, "%s already a builtin Cython type" % self.class_name, 1)
+
+        self.entry = home_scope.declare_c_class(
             name=self.class_name,
             pos=self.pos,
             defining=has_body and self.in_pxd,
@@ -4799,28 +4799,28 @@ class CClassDefNode(ClassDefNode):
             api=self.api,
             buffer_defaults=self.buffer_defaults(env),
             shadow=self.shadow)
- 
-        if self.shadow: 
-            home_scope.lookup(self.class_name).as_variable = self.entry 
-        if home_scope is not env and self.visibility == 'extern': 
-            env.add_imported_entry(self.class_name, self.entry, self.pos) 
-        self.scope = scope = self.entry.type.scope 
-        if scope is not None: 
-            scope.directives = env.directives 
- 
-        if self.doc and Options.docstrings: 
-            scope.doc = embed_position(self.pos, self.doc) 
- 
-        if has_body: 
-            self.body.analyse_declarations(scope) 
+
+        if self.shadow:
+            home_scope.lookup(self.class_name).as_variable = self.entry
+        if home_scope is not env and self.visibility == 'extern':
+            env.add_imported_entry(self.class_name, self.entry, self.pos)
+        self.scope = scope = self.entry.type.scope
+        if scope is not None:
+            scope.directives = env.directives
+
+        if self.doc and Options.docstrings:
+            scope.doc = embed_position(self.pos, self.doc)
+
+        if has_body:
+            self.body.analyse_declarations(scope)
             dict_entry = self.scope.lookup_here("__dict__")
             if dict_entry and dict_entry.is_variable and (not scope.defined and not scope.implemented):
                 dict_entry.getter_cname = self.scope.mangle_internal("__dict__getter")
                 self.scope.declare_property("__dict__", dict_entry.doc, dict_entry.pos)
-            if self.in_pxd: 
-                scope.defined = 1 
-            else: 
-                scope.implemented = 1 
+            if self.in_pxd:
+                scope.defined = 1
+            else:
+                scope.implemented = 1
 
         if len(self.bases.args) > 1:
             if not has_body or self.in_pxd:
@@ -4844,30 +4844,30 @@ class CClassDefNode(ClassDefNode):
             self.entry.type.early_init = 1
             self.type_init_args = None
 
-        env.allocate_vtable_names(self.entry) 
- 
-        for thunk in self.entry.type.defered_declarations: 
-            thunk() 
- 
-    def analyse_expressions(self, env): 
-        if self.body: 
-            scope = self.entry.type.scope 
-            self.body = self.body.analyse_expressions(scope) 
+        env.allocate_vtable_names(self.entry)
+
+        for thunk in self.entry.type.defered_declarations:
+            thunk()
+
+    def analyse_expressions(self, env):
+        if self.body:
+            scope = self.entry.type.scope
+            self.body = self.body.analyse_expressions(scope)
         if self.type_init_args:
             self.type_init_args.analyse_expressions(env)
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.body: 
-            self.generate_lambda_definitions(self.scope, code) 
-            self.body.generate_function_definitions(self.scope, code) 
- 
-    def generate_execution_code(self, code): 
-        # This is needed to generate evaluation code for 
-        # default values of method arguments. 
+        return self
+
+    def generate_function_definitions(self, env, code):
+        if self.body:
+            self.generate_lambda_definitions(self.scope, code)
+            self.body.generate_function_definitions(self.scope, code)
+
+    def generate_execution_code(self, code):
+        # This is needed to generate evaluation code for
+        # default values of method arguments.
         code.mark_pos(self.pos)
-        if self.body: 
-            self.body.generate_execution_code(code) 
+        if self.body:
+            self.body.generate_execution_code(code)
         if not self.entry.type.early_init:
             if self.type_init_args:
                 self.type_init_args.generate_evaluation_code(code)
@@ -4893,7 +4893,7 @@ class CClassDefNode(ClassDefNode):
                 code.put_decref_clear(trial_type, PyrexTypes.py_object_type)
                 self.type_init_args.generate_disposal_code(code)
                 self.type_init_args.free_temps(code)
- 
+
             self.generate_type_ready_code(self.entry, code, True)
 
     # Also called from ModuleNode for early init types.
@@ -5034,101 +5034,101 @@ class CClassDefNode(ClassDefNode):
                 "%s = &%s;" % (
                     type.typeptr_cname, type.typeobj_cname))
 
-    def annotate(self, code): 
+    def annotate(self, code):
         if self.type_init_args:
             self.type_init_args.annotate(code)
-        if self.body: 
-            self.body.annotate(code) 
- 
- 
-class PropertyNode(StatNode): 
-    #  Definition of a property in an extension type. 
-    # 
-    #  name   string 
-    #  doc    EncodedString or None    Doc string 
-    #  entry  Symtab.Entry 
-    #  body   StatListNode 
- 
-    child_attrs = ["body"] 
- 
-    def analyse_declarations(self, env): 
-        self.entry = env.declare_property(self.name, self.doc, self.pos) 
-        self.entry.scope.directives = env.directives 
-        self.body.analyse_declarations(self.entry.scope) 
- 
-    def analyse_expressions(self, env): 
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        self.body.generate_function_definitions(env, code) 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
-    def annotate(self, code): 
-        self.body.annotate(code) 
- 
- 
-class GlobalNode(StatNode): 
-    # Global variable declaration. 
-    # 
-    # names    [string] 
- 
-    child_attrs = [] 
- 
-    def analyse_declarations(self, env): 
-        for name in self.names: 
-            env.declare_global(name, self.pos) 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class NonlocalNode(StatNode): 
-    # Nonlocal variable declaration via the 'nonlocal' keyword. 
-    # 
-    # names    [string] 
- 
-    child_attrs = [] 
- 
-    def analyse_declarations(self, env): 
-        for name in self.names: 
-            env.declare_nonlocal(name, self.pos) 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class ExprStatNode(StatNode): 
-    #  Expression used as a statement. 
-    # 
-    #  expr   ExprNode 
- 
-    child_attrs = ["expr"] 
- 
-    def analyse_declarations(self, env): 
-        from . import ExprNodes 
+        if self.body:
+            self.body.annotate(code)
+
+
+class PropertyNode(StatNode):
+    #  Definition of a property in an extension type.
+    #
+    #  name   string
+    #  doc    EncodedString or None    Doc string
+    #  entry  Symtab.Entry
+    #  body   StatListNode
+
+    child_attrs = ["body"]
+
+    def analyse_declarations(self, env):
+        self.entry = env.declare_property(self.name, self.doc, self.pos)
+        self.entry.scope.directives = env.directives
+        self.body.analyse_declarations(self.entry.scope)
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
+        pass
+
+    def annotate(self, code):
+        self.body.annotate(code)
+
+
+class GlobalNode(StatNode):
+    # Global variable declaration.
+    #
+    # names    [string]
+
+    child_attrs = []
+
+    def analyse_declarations(self, env):
+        for name in self.names:
+            env.declare_global(name, self.pos)
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class NonlocalNode(StatNode):
+    # Nonlocal variable declaration via the 'nonlocal' keyword.
+    #
+    # names    [string]
+
+    child_attrs = []
+
+    def analyse_declarations(self, env):
+        for name in self.names:
+            env.declare_nonlocal(name, self.pos)
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class ExprStatNode(StatNode):
+    #  Expression used as a statement.
+    #
+    #  expr   ExprNode
+
+    child_attrs = ["expr"]
+
+    def analyse_declarations(self, env):
+        from . import ExprNodes
         expr = self.expr
         if isinstance(expr, ExprNodes.GeneralCallNode):
             func = expr.function.as_cython_attribute()
-            if func == u'declare': 
+            if func == u'declare':
                 args, kwds = expr.explicit_args_kwds()
-                if len(args): 
+                if len(args):
                     error(expr.pos, "Variable names must be specified.")
-                for var, type_node in kwds.key_value_pairs: 
-                    type = type_node.analyse_as_type(env) 
-                    if type is None: 
-                        error(type_node.pos, "Unknown type") 
-                    else: 
+                for var, type_node in kwds.key_value_pairs:
+                    type = type_node.analyse_as_type(env)
+                    if type is None:
+                        error(type_node.pos, "Unknown type")
+                    else:
                         env.declare_var(var.value, type, var.pos, is_cdef=True)
-                self.__class__ = PassStatNode 
+                self.__class__ = PassStatNode
         elif getattr(expr, 'annotation', None) is not None:
             if expr.is_name:
                 # non-code variable annotation, e.g. "name: type"
@@ -5137,94 +5137,94 @@ class ExprStatNode(StatNode):
             elif expr.is_attribute or expr.is_subscript:
                 # unused expression with annotation, e.g. "a[0]: type" or "a.xyz : type"
                 self.__class__ = PassStatNode
- 
-    def analyse_expressions(self, env): 
+
+    def analyse_expressions(self, env):
         self.expr.result_is_used = False  # hint that .result() may safely be left empty
-        self.expr = self.expr.analyse_expressions(env) 
+        self.expr = self.expr.analyse_expressions(env)
         # Repeat in case of node replacement.
         self.expr.result_is_used = False  # hint that .result() may safely be left empty
-        return self 
- 
-    def nogil_check(self, env): 
-        if self.expr.type.is_pyobject and self.expr.is_temp: 
-            self.gil_error() 
- 
-    gil_message = "Discarding owned Python object" 
- 
-    def generate_execution_code(self, code): 
+        return self
+
+    def nogil_check(self, env):
+        if self.expr.type.is_pyobject and self.expr.is_temp:
+            self.gil_error()
+
+    gil_message = "Discarding owned Python object"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
         self.expr.result_is_used = False  # hint that .result() may safely be left empty
-        self.expr.generate_evaluation_code(code) 
-        if not self.expr.is_temp and self.expr.result(): 
+        self.expr.generate_evaluation_code(code)
+        if not self.expr.is_temp and self.expr.result():
             result = self.expr.result()
             if not self.expr.type.is_void:
                 result = "(void)(%s)" % result
             code.putln("%s;" % result)
-        self.expr.generate_disposal_code(code) 
-        self.expr.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        self.expr.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.expr.annotate(code) 
- 
- 
-class AssignmentNode(StatNode): 
-    #  Abstract base class for assignment nodes. 
-    # 
-    #  The analyse_expressions and generate_execution_code 
-    #  phases of assignments are split into two sub-phases 
-    #  each, to enable all the right hand sides of a 
-    #  parallel assignment to be evaluated before assigning 
-    #  to any of the left hand sides. 
- 
-    def analyse_expressions(self, env): 
-        node = self.analyse_types(env) 
+        self.expr.generate_disposal_code(code)
+        self.expr.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.expr.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.expr.annotate(code)
+
+
+class AssignmentNode(StatNode):
+    #  Abstract base class for assignment nodes.
+    #
+    #  The analyse_expressions and generate_execution_code
+    #  phases of assignments are split into two sub-phases
+    #  each, to enable all the right hand sides of a
+    #  parallel assignment to be evaluated before assigning
+    #  to any of the left hand sides.
+
+    def analyse_expressions(self, env):
+        node = self.analyse_types(env)
         if isinstance(node, AssignmentNode) and not isinstance(node, ParallelAssignmentNode):
-            if node.rhs.type.is_ptr and node.rhs.is_ephemeral(): 
-                error(self.pos, "Storing unsafe C derivative of temporary Python reference") 
-        return node 
- 
-#       def analyse_expressions(self, env): 
-#           self.analyse_expressions_1(env) 
-#           self.analyse_expressions_2(env) 
- 
-    def generate_execution_code(self, code): 
+            if node.rhs.type.is_ptr and node.rhs.is_ephemeral():
+                error(self.pos, "Storing unsafe C derivative of temporary Python reference")
+        return node
+
+#       def analyse_expressions(self, env):
+#           self.analyse_expressions_1(env)
+#           self.analyse_expressions_2(env)
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        self.generate_rhs_evaluation_code(code) 
-        self.generate_assignment_code(code) 
- 
- 
-class SingleAssignmentNode(AssignmentNode): 
-    #  The simplest case: 
-    # 
-    #    a = b 
-    # 
+        self.generate_rhs_evaluation_code(code)
+        self.generate_assignment_code(code)
+
+
+class SingleAssignmentNode(AssignmentNode):
+    #  The simplest case:
+    #
+    #    a = b
+    #
     #  lhs                      ExprNode      Left hand side
     #  rhs                      ExprNode      Right hand side
     #  first                    bool          Is this guaranteed the first assignment to lhs?
     #  is_overloaded_assignment bool          Is this assignment done via an overloaded operator=
     #  exception_check
     #  exception_value
- 
-    child_attrs = ["lhs", "rhs"] 
-    first = False 
+
+    child_attrs = ["lhs", "rhs"]
+    first = False
     is_overloaded_assignment = False
-    declaration_only = False 
- 
-    def analyse_declarations(self, env): 
-        from . import ExprNodes 
- 
-        # handle declarations of the form x = cython.foo() 
-        if isinstance(self.rhs, ExprNodes.CallNode): 
-            func_name = self.rhs.function.as_cython_attribute() 
-            if func_name: 
-                args, kwds = self.rhs.explicit_args_kwds() 
-                if func_name in ['declare', 'typedef']: 
+    declaration_only = False
+
+    def analyse_declarations(self, env):
+        from . import ExprNodes
+
+        # handle declarations of the form x = cython.foo()
+        if isinstance(self.rhs, ExprNodes.CallNode):
+            func_name = self.rhs.function.as_cython_attribute()
+            if func_name:
+                args, kwds = self.rhs.explicit_args_kwds()
+                if func_name in ['declare', 'typedef']:
                     if len(args) > 2:
                         error(args[2].pos, "Invalid positional argument.")
-                        return 
+                        return
                     if kwds is not None:
                         kwdict = kwds.compile_time_value(None)
                         if func_name == 'typedef' or 'visibility' not in kwdict:
@@ -5233,85 +5233,85 @@ class SingleAssignmentNode(AssignmentNode):
                         visibility = kwdict['visibility']
                     else:
                         visibility = 'private'
-                    type = args[0].analyse_as_type(env) 
-                    if type is None: 
-                        error(args[0].pos, "Unknown type") 
-                        return 
-                    lhs = self.lhs 
-                    if func_name == 'declare': 
-                        if isinstance(lhs, ExprNodes.NameNode): 
-                            vars = [(lhs.name, lhs.pos)] 
-                        elif isinstance(lhs, ExprNodes.TupleNode): 
-                            vars = [(var.name, var.pos) for var in lhs.args] 
-                        else: 
-                            error(lhs.pos, "Invalid declaration") 
-                            return 
-                        for var, pos in vars: 
+                    type = args[0].analyse_as_type(env)
+                    if type is None:
+                        error(args[0].pos, "Unknown type")
+                        return
+                    lhs = self.lhs
+                    if func_name == 'declare':
+                        if isinstance(lhs, ExprNodes.NameNode):
+                            vars = [(lhs.name, lhs.pos)]
+                        elif isinstance(lhs, ExprNodes.TupleNode):
+                            vars = [(var.name, var.pos) for var in lhs.args]
+                        else:
+                            error(lhs.pos, "Invalid declaration")
+                            return
+                        for var, pos in vars:
                             env.declare_var(var, type, pos, is_cdef=True, visibility=visibility)
-                        if len(args) == 2: 
-                            # we have a value 
-                            self.rhs = args[1] 
-                        else: 
-                            self.declaration_only = True 
-                    else: 
-                        self.declaration_only = True 
-                        if not isinstance(lhs, ExprNodes.NameNode): 
-                            error(lhs.pos, "Invalid declaration.") 
-                        env.declare_typedef(lhs.name, type, self.pos, visibility='private') 
- 
-                elif func_name in ['struct', 'union']: 
-                    self.declaration_only = True 
-                    if len(args) > 0 or kwds is None: 
-                        error(self.rhs.pos, "Struct or union members must be given by name.") 
-                        return 
-                    members = [] 
-                    for member, type_node in kwds.key_value_pairs: 
-                        type = type_node.analyse_as_type(env) 
-                        if type is None: 
-                            error(type_node.pos, "Unknown type") 
-                        else: 
-                            members.append((member.value, type, member.pos)) 
-                    if len(members) < len(kwds.key_value_pairs): 
-                        return 
-                    if not isinstance(self.lhs, ExprNodes.NameNode): 
-                        error(self.lhs.pos, "Invalid declaration.") 
-                    name = self.lhs.name 
-                    scope = StructOrUnionScope(name) 
-                    env.declare_struct_or_union(name, func_name, scope, False, self.rhs.pos) 
-                    for member, type, pos in members: 
-                        scope.declare_var(member, type, pos) 
- 
-                elif func_name == 'fused_type': 
-                    # dtype = cython.fused_type(...) 
-                    self.declaration_only = True 
-                    if kwds: 
-                        error(self.rhs.function.pos, 
-                              "fused_type does not take keyword arguments") 
- 
-                    fusednode = FusedTypeNode(self.rhs.pos, 
+                        if len(args) == 2:
+                            # we have a value
+                            self.rhs = args[1]
+                        else:
+                            self.declaration_only = True
+                    else:
+                        self.declaration_only = True
+                        if not isinstance(lhs, ExprNodes.NameNode):
+                            error(lhs.pos, "Invalid declaration.")
+                        env.declare_typedef(lhs.name, type, self.pos, visibility='private')
+
+                elif func_name in ['struct', 'union']:
+                    self.declaration_only = True
+                    if len(args) > 0 or kwds is None:
+                        error(self.rhs.pos, "Struct or union members must be given by name.")
+                        return
+                    members = []
+                    for member, type_node in kwds.key_value_pairs:
+                        type = type_node.analyse_as_type(env)
+                        if type is None:
+                            error(type_node.pos, "Unknown type")
+                        else:
+                            members.append((member.value, type, member.pos))
+                    if len(members) < len(kwds.key_value_pairs):
+                        return
+                    if not isinstance(self.lhs, ExprNodes.NameNode):
+                        error(self.lhs.pos, "Invalid declaration.")
+                    name = self.lhs.name
+                    scope = StructOrUnionScope(name)
+                    env.declare_struct_or_union(name, func_name, scope, False, self.rhs.pos)
+                    for member, type, pos in members:
+                        scope.declare_var(member, type, pos)
+
+                elif func_name == 'fused_type':
+                    # dtype = cython.fused_type(...)
+                    self.declaration_only = True
+                    if kwds:
+                        error(self.rhs.function.pos,
+                              "fused_type does not take keyword arguments")
+
+                    fusednode = FusedTypeNode(self.rhs.pos,
                                               name=self.lhs.name, types=args)
-                    fusednode.analyse_declarations(env) 
- 
-        if self.declaration_only: 
-            return 
-        else: 
-            self.lhs.analyse_target_declaration(env) 
- 
+                    fusednode.analyse_declarations(env)
+
+        if self.declaration_only:
+            return
+        else:
+            self.lhs.analyse_target_declaration(env)
+
     def analyse_types(self, env, use_temp=0):
-        from . import ExprNodes 
- 
-        self.rhs = self.rhs.analyse_types(env) 
+        from . import ExprNodes
+
+        self.rhs = self.rhs.analyse_types(env)
 
         unrolled_assignment = self.unroll_rhs(env)
         if unrolled_assignment:
             return unrolled_assignment
 
-        self.lhs = self.lhs.analyse_target_types(env) 
-        self.lhs.gil_assignment_check(env) 
+        self.lhs = self.lhs.analyse_target_types(env)
+        self.lhs.gil_assignment_check(env)
         unrolled_assignment = self.unroll_lhs(env)
         if unrolled_assignment:
             return unrolled_assignment
- 
+
         if isinstance(self.lhs, ExprNodes.MemoryViewIndexNode):
             self.lhs.analyse_broadcast_operation(self.rhs)
             self.lhs = self.lhs.analyse_as_memview_scalar_assignment(self.rhs)
@@ -5320,7 +5320,7 @@ class SingleAssignmentNode(AssignmentNode):
                 # cannot assign to C array, only to its full slice
                 self.lhs = ExprNodes.SliceIndexNode(self.lhs.pos, base=self.lhs, start=None, stop=None)
                 self.lhs = self.lhs.analyse_target_types(env)
- 
+
         if self.lhs.type.is_cpp_class:
             op = env.lookup_operator_for_types(self.pos, '=', [self.lhs.type, self.rhs.type])
             if op:
@@ -5335,16 +5335,16 @@ class SingleAssignmentNode(AssignmentNode):
         else:
             rhs = self.rhs.coerce_to(self.lhs.type, env)
 
-        if use_temp or rhs.is_attribute or ( 
-                not rhs.is_name and not rhs.is_literal and 
-                rhs.type.is_pyobject): 
-            # things like (cdef) attribute access are not safe (traverses pointers) 
-            rhs = rhs.coerce_to_temp(env) 
-        elif rhs.type.is_pyobject: 
-            rhs = rhs.coerce_to_simple(env) 
-        self.rhs = rhs 
-        return self 
- 
+        if use_temp or rhs.is_attribute or (
+                not rhs.is_name and not rhs.is_literal and
+                rhs.type.is_pyobject):
+            # things like (cdef) attribute access are not safe (traverses pointers)
+            rhs = rhs.coerce_to_temp(env)
+        elif rhs.type.is_pyobject:
+            rhs = rhs.coerce_to_simple(env)
+        self.rhs = rhs
+        return self
+
     def unroll(self, node, target_size, env):
         from . import ExprNodes, UtilNodes
 
@@ -5478,9 +5478,9 @@ class SingleAssignmentNode(AssignmentNode):
         check_node, refs, lhs = unrolled
         return self.unroll_assignments(refs, check_node, lhs, self.rhs.args, env)
 
-    def generate_rhs_evaluation_code(self, code): 
-        self.rhs.generate_evaluation_code(code) 
- 
+    def generate_rhs_evaluation_code(self, code):
+        self.rhs.generate_evaluation_code(code)
+
     def generate_assignment_code(self, code, overloaded_assignment=False):
         if self.is_overloaded_assignment:
             self.lhs.generate_assignment_code(
@@ -5491,51 +5491,51 @@ class SingleAssignmentNode(AssignmentNode):
                 exception_value=self.exception_value)
         else:
             self.lhs.generate_assignment_code(self.rhs, code)
- 
-    def generate_function_definitions(self, env, code): 
-        self.rhs.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.lhs.annotate(code) 
-        self.rhs.annotate(code) 
- 
- 
-class CascadedAssignmentNode(AssignmentNode): 
-    #  An assignment with multiple left hand sides: 
-    # 
-    #    a = b = c 
-    # 
-    #  lhs_list   [ExprNode]   Left hand sides 
-    #  rhs        ExprNode     Right hand sides 
-    # 
-    #  Used internally: 
-    # 
+
+    def generate_function_definitions(self, env, code):
+        self.rhs.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.lhs.annotate(code)
+        self.rhs.annotate(code)
+
+
+class CascadedAssignmentNode(AssignmentNode):
+    #  An assignment with multiple left hand sides:
+    #
+    #    a = b = c
+    #
+    #  lhs_list   [ExprNode]   Left hand sides
+    #  rhs        ExprNode     Right hand sides
+    #
+    #  Used internally:
+    #
     #  coerced_values       [ExprNode]   RHS coerced to all distinct LHS types
     #  cloned_values        [ExprNode]   cloned RHS value for each LHS
     #  assignment_overloads [Bool]       If each assignment uses a C++ operator=
- 
+
     child_attrs = ["lhs_list", "rhs", "coerced_values", "cloned_values"]
     cloned_values = None
-    coerced_values = None 
+    coerced_values = None
     assignment_overloads = None
- 
-    def analyse_declarations(self, env): 
-        for lhs in self.lhs_list: 
-            lhs.analyse_target_declaration(env) 
- 
-    def analyse_types(self, env, use_temp=0): 
-        from .ExprNodes import CloneNode, ProxyNode 
- 
+
+    def analyse_declarations(self, env):
+        for lhs in self.lhs_list:
+            lhs.analyse_target_declaration(env)
+
+    def analyse_types(self, env, use_temp=0):
+        from .ExprNodes import CloneNode, ProxyNode
+
         # collect distinct types used on the LHS
-        lhs_types = set() 
+        lhs_types = set()
         for i, lhs in enumerate(self.lhs_list):
             lhs = self.lhs_list[i] = lhs.analyse_target_types(env)
-            lhs.gil_assignment_check(env) 
-            lhs_types.add(lhs.type) 
- 
-        rhs = self.rhs.analyse_types(env) 
+            lhs.gil_assignment_check(env)
+            lhs_types.add(lhs.type)
+
+        rhs = self.rhs.analyse_types(env)
         # common special case: only one type needed on the LHS => coerce only once
-        if len(lhs_types) == 1: 
+        if len(lhs_types) == 1:
             # Avoid coercion for overloaded assignment operators.
             if next(iter(lhs_types)).is_cpp_class:
                 op = env.lookup_operator('=', [lhs, self.rhs])
@@ -5543,462 +5543,462 @@ class CascadedAssignmentNode(AssignmentNode):
                     rhs = rhs.coerce_to(lhs_types.pop(), env)
             else:
                 rhs = rhs.coerce_to(lhs_types.pop(), env)
- 
-        if not rhs.is_name and not rhs.is_literal and ( 
-                use_temp or rhs.is_attribute or rhs.type.is_pyobject): 
-            rhs = rhs.coerce_to_temp(env) 
-        else: 
-            rhs = rhs.coerce_to_simple(env) 
-        self.rhs = ProxyNode(rhs) if rhs.is_temp else rhs 
- 
+
+        if not rhs.is_name and not rhs.is_literal and (
+                use_temp or rhs.is_attribute or rhs.type.is_pyobject):
+            rhs = rhs.coerce_to_temp(env)
+        else:
+            rhs = rhs.coerce_to_simple(env)
+        self.rhs = ProxyNode(rhs) if rhs.is_temp else rhs
+
         # clone RHS and coerce it to all distinct LHS types
-        self.coerced_values = [] 
-        coerced_values = {} 
+        self.coerced_values = []
+        coerced_values = {}
         self.assignment_overloads = []
-        for lhs in self.lhs_list: 
+        for lhs in self.lhs_list:
             overloaded = lhs.type.is_cpp_class and env.lookup_operator('=', [lhs, self.rhs])
             self.assignment_overloads.append(overloaded)
-            if lhs.type not in coerced_values and lhs.type != rhs.type: 
+            if lhs.type not in coerced_values and lhs.type != rhs.type:
                 rhs = CloneNode(self.rhs)
                 if not overloaded:
                     rhs = rhs.coerce_to(lhs.type, env)
-                self.coerced_values.append(rhs) 
-                coerced_values[lhs.type] = rhs 
- 
+                self.coerced_values.append(rhs)
+                coerced_values[lhs.type] = rhs
+
         # clone coerced values for all LHS assignments
         self.cloned_values = []
-        for lhs in self.lhs_list: 
-            rhs = coerced_values.get(lhs.type, self.rhs) 
+        for lhs in self.lhs_list:
+            rhs = coerced_values.get(lhs.type, self.rhs)
             self.cloned_values.append(CloneNode(rhs))
-        return self 
- 
-    def generate_rhs_evaluation_code(self, code): 
-        self.rhs.generate_evaluation_code(code) 
- 
+        return self
+
+    def generate_rhs_evaluation_code(self, code):
+        self.rhs.generate_evaluation_code(code)
+
     def generate_assignment_code(self, code, overloaded_assignment=False):
         # prepare all coercions
-        for rhs in self.coerced_values: 
-            rhs.generate_evaluation_code(code) 
+        for rhs in self.coerced_values:
+            rhs.generate_evaluation_code(code)
         # assign clones to LHS
         for lhs, rhs, overload in zip(self.lhs_list, self.cloned_values, self.assignment_overloads):
-            rhs.generate_evaluation_code(code) 
+            rhs.generate_evaluation_code(code)
             lhs.generate_assignment_code(rhs, code, overloaded_assignment=overload)
         # dispose of coerced values and original RHS
         for rhs_value in self.coerced_values:
             rhs_value.generate_disposal_code(code)
             rhs_value.free_temps(code)
-        self.rhs.generate_disposal_code(code) 
-        self.rhs.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        self.rhs.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        for rhs in self.coerced_values: 
-            rhs.annotate(code) 
+        self.rhs.generate_disposal_code(code)
+        self.rhs.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.rhs.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        for rhs in self.coerced_values:
+            rhs.annotate(code)
         for lhs, rhs in zip(self.lhs_list, self.cloned_values):
-            lhs.annotate(code) 
-            rhs.annotate(code) 
-        self.rhs.annotate(code) 
- 
- 
-class ParallelAssignmentNode(AssignmentNode): 
-    #  A combined packing/unpacking assignment: 
-    # 
-    #    a, b, c =  d, e, f 
-    # 
-    #  This has been rearranged by the parser into 
-    # 
-    #    a = d ; b = e ; c = f 
-    # 
-    #  but we must evaluate all the right hand sides 
-    #  before assigning to any of the left hand sides. 
-    # 
-    #  stats     [AssignmentNode]   The constituent assignments 
- 
-    child_attrs = ["stats"] 
- 
-    def analyse_declarations(self, env): 
-        for stat in self.stats: 
-            stat.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
+            lhs.annotate(code)
+            rhs.annotate(code)
+        self.rhs.annotate(code)
+
+
+class ParallelAssignmentNode(AssignmentNode):
+    #  A combined packing/unpacking assignment:
+    #
+    #    a, b, c =  d, e, f
+    #
+    #  This has been rearranged by the parser into
+    #
+    #    a = d ; b = e ; c = f
+    #
+    #  but we must evaluate all the right hand sides
+    #  before assigning to any of the left hand sides.
+    #
+    #  stats     [AssignmentNode]   The constituent assignments
+
+    child_attrs = ["stats"]
+
+    def analyse_declarations(self, env):
+        for stat in self.stats:
+            stat.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
         self.stats = [stat.analyse_types(env, use_temp=1)
                       for stat in self.stats]
-        return self 
- 
-#    def analyse_expressions(self, env): 
-#        for stat in self.stats: 
+        return self
+
+#    def analyse_expressions(self, env):
+#        for stat in self.stats:
 #            stat.analyse_expressions_1(env, use_temp=1)
-#        for stat in self.stats: 
-#            stat.analyse_expressions_2(env) 
- 
-    def generate_execution_code(self, code): 
+#        for stat in self.stats:
+#            stat.analyse_expressions_2(env)
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        for stat in self.stats: 
-            stat.generate_rhs_evaluation_code(code) 
-        for stat in self.stats: 
-            stat.generate_assignment_code(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        for stat in self.stats: 
-            stat.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        for stat in self.stats: 
-            stat.annotate(code) 
- 
- 
-class InPlaceAssignmentNode(AssignmentNode): 
-    #  An in place arithmetic operand: 
-    # 
-    #    a += b 
-    #    a -= b 
-    #    ... 
-    # 
-    #  lhs      ExprNode      Left hand side 
-    #  rhs      ExprNode      Right hand side 
-    #  operator char          one of "+-*/%^&|" 
-    # 
-    #  This code is a bit tricky because in order to obey Python 
-    #  semantics the sub-expressions (e.g. indices) of the lhs must 
-    #  not be evaluated twice. So we must re-use the values calculated 
-    #  in evaluation phase for the assignment phase as well. 
-    #  Fortunately, the type of the lhs node is fairly constrained 
-    #  (it must be a NameNode, AttributeNode, or IndexNode). 
- 
-    child_attrs = ["lhs", "rhs"] 
- 
-    def analyse_declarations(self, env): 
-        self.lhs.analyse_target_declaration(env) 
- 
-    def analyse_types(self, env): 
-        self.rhs = self.rhs.analyse_types(env) 
-        self.lhs = self.lhs.analyse_target_types(env) 
- 
-        # When assigning to a fully indexed buffer or memoryview, coerce the rhs 
+        for stat in self.stats:
+            stat.generate_rhs_evaluation_code(code)
+        for stat in self.stats:
+            stat.generate_assignment_code(code)
+
+    def generate_function_definitions(self, env, code):
+        for stat in self.stats:
+            stat.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        for stat in self.stats:
+            stat.annotate(code)
+
+
+class InPlaceAssignmentNode(AssignmentNode):
+    #  An in place arithmetic operand:
+    #
+    #    a += b
+    #    a -= b
+    #    ...
+    #
+    #  lhs      ExprNode      Left hand side
+    #  rhs      ExprNode      Right hand side
+    #  operator char          one of "+-*/%^&|"
+    #
+    #  This code is a bit tricky because in order to obey Python
+    #  semantics the sub-expressions (e.g. indices) of the lhs must
+    #  not be evaluated twice. So we must re-use the values calculated
+    #  in evaluation phase for the assignment phase as well.
+    #  Fortunately, the type of the lhs node is fairly constrained
+    #  (it must be a NameNode, AttributeNode, or IndexNode).
+
+    child_attrs = ["lhs", "rhs"]
+
+    def analyse_declarations(self, env):
+        self.lhs.analyse_target_declaration(env)
+
+    def analyse_types(self, env):
+        self.rhs = self.rhs.analyse_types(env)
+        self.lhs = self.lhs.analyse_target_types(env)
+
+        # When assigning to a fully indexed buffer or memoryview, coerce the rhs
         if self.lhs.is_memview_index or self.lhs.is_buffer_access:
-            self.rhs = self.rhs.coerce_to(self.lhs.type, env) 
-        elif self.lhs.type.is_string and self.operator in '+-': 
-            # use pointer arithmetic for char* LHS instead of string concat 
-            self.rhs = self.rhs.coerce_to(PyrexTypes.c_py_ssize_t_type, env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
+            self.rhs = self.rhs.coerce_to(self.lhs.type, env)
+        elif self.lhs.type.is_string and self.operator in '+-':
+            # use pointer arithmetic for char* LHS instead of string concat
+            self.rhs = self.rhs.coerce_to(PyrexTypes.c_py_ssize_t_type, env)
+        return self
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
         lhs, rhs = self.lhs, self.rhs
         rhs.generate_evaluation_code(code)
         lhs.generate_subexpr_evaluation_code(code)
-        c_op = self.operator 
-        if c_op == "//": 
-            c_op = "/" 
-        elif c_op == "**": 
-            error(self.pos, "No C inplace power operator") 
+        c_op = self.operator
+        if c_op == "//":
+            c_op = "/"
+        elif c_op == "**":
+            error(self.pos, "No C inplace power operator")
         if lhs.is_buffer_access or lhs.is_memview_index:
             if lhs.type.is_pyobject:
-                error(self.pos, "In-place operators not allowed on object buffers in this release.") 
+                error(self.pos, "In-place operators not allowed on object buffers in this release.")
             if c_op in ('/', '%') and lhs.type.is_int and not code.globalstate.directives['cdivision']:
-                error(self.pos, "In-place non-c divide operators not allowed on int buffers.") 
+                error(self.pos, "In-place non-c divide operators not allowed on int buffers.")
             lhs.generate_buffer_setitem_code(rhs, code, c_op)
         elif lhs.is_memview_slice:
             error(self.pos, "Inplace operators not supported on memoryview slices")
-        else: 
-            # C++ 
-            # TODO: make sure overload is declared 
+        else:
+            # C++
+            # TODO: make sure overload is declared
             code.putln("%s %s= %s;" % (lhs.result(), c_op, rhs.result()))
         lhs.generate_subexpr_disposal_code(code)
         lhs.free_subexpr_temps(code)
         rhs.generate_disposal_code(code)
         rhs.free_temps(code)
- 
-    def annotate(self, code): 
-        self.lhs.annotate(code) 
-        self.rhs.annotate(code) 
- 
-    def create_binop_node(self): 
-        from . import ExprNodes 
-        return ExprNodes.binop_node(self.pos, self.operator, self.lhs, self.rhs) 
- 
- 
-class PrintStatNode(StatNode): 
-    #  print statement 
-    # 
-    #  arg_tuple         TupleNode 
-    #  stream            ExprNode or None (stdout) 
-    #  append_newline    boolean 
- 
-    child_attrs = ["arg_tuple", "stream"] 
- 
-    def analyse_expressions(self, env): 
-        if self.stream: 
-            stream = self.stream.analyse_expressions(env) 
-            self.stream = stream.coerce_to_pyobject(env) 
-        arg_tuple = self.arg_tuple.analyse_expressions(env) 
-        self.arg_tuple = arg_tuple.coerce_to_pyobject(env) 
-        env.use_utility_code(printing_utility_code) 
-        if len(self.arg_tuple.args) == 1 and self.append_newline: 
-            env.use_utility_code(printing_one_utility_code) 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Python print statement" 
- 
-    def generate_execution_code(self, code): 
+
+    def annotate(self, code):
+        self.lhs.annotate(code)
+        self.rhs.annotate(code)
+
+    def create_binop_node(self):
+        from . import ExprNodes
+        return ExprNodes.binop_node(self.pos, self.operator, self.lhs, self.rhs)
+
+
+class PrintStatNode(StatNode):
+    #  print statement
+    #
+    #  arg_tuple         TupleNode
+    #  stream            ExprNode or None (stdout)
+    #  append_newline    boolean
+
+    child_attrs = ["arg_tuple", "stream"]
+
+    def analyse_expressions(self, env):
+        if self.stream:
+            stream = self.stream.analyse_expressions(env)
+            self.stream = stream.coerce_to_pyobject(env)
+        arg_tuple = self.arg_tuple.analyse_expressions(env)
+        self.arg_tuple = arg_tuple.coerce_to_pyobject(env)
+        env.use_utility_code(printing_utility_code)
+        if len(self.arg_tuple.args) == 1 and self.append_newline:
+            env.use_utility_code(printing_one_utility_code)
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Python print statement"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        if self.stream: 
-            self.stream.generate_evaluation_code(code) 
-            stream_result = self.stream.py_result() 
-        else: 
-            stream_result = '0' 
-        if len(self.arg_tuple.args) == 1 and self.append_newline: 
-            arg = self.arg_tuple.args[0] 
-            arg.generate_evaluation_code(code) 
- 
-            code.putln( 
-                "if (__Pyx_PrintOne(%s, %s) < 0) %s" % ( 
-                    stream_result, 
-                    arg.py_result(), 
-                    code.error_goto(self.pos))) 
-            arg.generate_disposal_code(code) 
-            arg.free_temps(code) 
-        else: 
-            self.arg_tuple.generate_evaluation_code(code) 
-            code.putln( 
-                "if (__Pyx_Print(%s, %s, %d) < 0) %s" % ( 
-                    stream_result, 
-                    self.arg_tuple.py_result(), 
-                    self.append_newline, 
-                    code.error_goto(self.pos))) 
-            self.arg_tuple.generate_disposal_code(code) 
-            self.arg_tuple.free_temps(code) 
- 
-        if self.stream: 
-            self.stream.generate_disposal_code(code) 
-            self.stream.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.stream: 
-            self.stream.generate_function_definitions(env, code) 
-        self.arg_tuple.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        if self.stream: 
-            self.stream.annotate(code) 
-        self.arg_tuple.annotate(code) 
- 
- 
-class ExecStatNode(StatNode): 
-    #  exec statement 
-    # 
-    #  args     [ExprNode] 
- 
-    child_attrs = ["args"] 
- 
-    def analyse_expressions(self, env): 
-        for i, arg in enumerate(self.args): 
-            arg = arg.analyse_expressions(env) 
-            arg = arg.coerce_to_pyobject(env) 
-            self.args[i] = arg 
-        env.use_utility_code(Builtin.pyexec_utility_code) 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Python exec statement" 
- 
-    def generate_execution_code(self, code): 
+        if self.stream:
+            self.stream.generate_evaluation_code(code)
+            stream_result = self.stream.py_result()
+        else:
+            stream_result = '0'
+        if len(self.arg_tuple.args) == 1 and self.append_newline:
+            arg = self.arg_tuple.args[0]
+            arg.generate_evaluation_code(code)
+
+            code.putln(
+                "if (__Pyx_PrintOne(%s, %s) < 0) %s" % (
+                    stream_result,
+                    arg.py_result(),
+                    code.error_goto(self.pos)))
+            arg.generate_disposal_code(code)
+            arg.free_temps(code)
+        else:
+            self.arg_tuple.generate_evaluation_code(code)
+            code.putln(
+                "if (__Pyx_Print(%s, %s, %d) < 0) %s" % (
+                    stream_result,
+                    self.arg_tuple.py_result(),
+                    self.append_newline,
+                    code.error_goto(self.pos)))
+            self.arg_tuple.generate_disposal_code(code)
+            self.arg_tuple.free_temps(code)
+
+        if self.stream:
+            self.stream.generate_disposal_code(code)
+            self.stream.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        if self.stream:
+            self.stream.generate_function_definitions(env, code)
+        self.arg_tuple.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        if self.stream:
+            self.stream.annotate(code)
+        self.arg_tuple.annotate(code)
+
+
+class ExecStatNode(StatNode):
+    #  exec statement
+    #
+    #  args     [ExprNode]
+
+    child_attrs = ["args"]
+
+    def analyse_expressions(self, env):
+        for i, arg in enumerate(self.args):
+            arg = arg.analyse_expressions(env)
+            arg = arg.coerce_to_pyobject(env)
+            self.args[i] = arg
+        env.use_utility_code(Builtin.pyexec_utility_code)
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Python exec statement"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        args = [] 
-        for arg in self.args: 
-            arg.generate_evaluation_code(code) 
+        args = []
+        for arg in self.args:
+            arg.generate_evaluation_code(code)
             args.append(arg.py_result())
-        args = tuple(args + ['0', '0'][:3-len(args)]) 
-        temp_result = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True) 
+        args = tuple(args + ['0', '0'][:3-len(args)])
+        temp_result = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True)
         code.putln("%s = __Pyx_PyExec3(%s, %s, %s);" % ((temp_result,) + args))
-        for arg in self.args: 
-            arg.generate_disposal_code(code) 
-            arg.free_temps(code) 
-        code.putln( 
-            code.error_goto_if_null(temp_result, self.pos)) 
-        code.put_gotref(temp_result) 
-        code.put_decref_clear(temp_result, py_object_type) 
-        code.funcstate.release_temp(temp_result) 
- 
-    def annotate(self, code): 
-        for arg in self.args: 
-            arg.annotate(code) 
- 
- 
-class DelStatNode(StatNode): 
-    #  del statement 
-    # 
-    #  args     [ExprNode] 
- 
-    child_attrs = ["args"] 
-    ignore_nonexisting = False 
- 
-    def analyse_declarations(self, env): 
-        for arg in self.args: 
-            arg.analyse_target_declaration(env) 
- 
-    def analyse_expressions(self, env): 
-        for i, arg in enumerate(self.args): 
-            arg = self.args[i] = arg.analyse_target_expression(env, None) 
+        for arg in self.args:
+            arg.generate_disposal_code(code)
+            arg.free_temps(code)
+        code.putln(
+            code.error_goto_if_null(temp_result, self.pos))
+        code.put_gotref(temp_result)
+        code.put_decref_clear(temp_result, py_object_type)
+        code.funcstate.release_temp(temp_result)
+
+    def annotate(self, code):
+        for arg in self.args:
+            arg.annotate(code)
+
+
+class DelStatNode(StatNode):
+    #  del statement
+    #
+    #  args     [ExprNode]
+
+    child_attrs = ["args"]
+    ignore_nonexisting = False
+
+    def analyse_declarations(self, env):
+        for arg in self.args:
+            arg.analyse_target_declaration(env)
+
+    def analyse_expressions(self, env):
+        for i, arg in enumerate(self.args):
+            arg = self.args[i] = arg.analyse_target_expression(env, None)
             if arg.type.is_pyobject or (arg.is_name and arg.type.is_memoryviewslice):
-                if arg.is_name and arg.entry.is_cglobal: 
-                    error(arg.pos, "Deletion of global C variable") 
-            elif arg.type.is_ptr and arg.type.base_type.is_cpp_class: 
-                self.cpp_check(env) 
-            elif arg.type.is_cpp_class: 
-                error(arg.pos, "Deletion of non-heap C++ object") 
-            elif arg.is_subscript and arg.base.type is Builtin.bytearray_type: 
-                pass  # del ba[i] 
-            else: 
-                error(arg.pos, "Deletion of non-Python, non-C++ object") 
-            #arg.release_target_temp(env) 
-        return self 
- 
-    def nogil_check(self, env): 
-        for arg in self.args: 
-            if arg.type.is_pyobject: 
-                self.gil_error() 
- 
-    gil_message = "Deleting Python object" 
- 
-    def generate_execution_code(self, code): 
+                if arg.is_name and arg.entry.is_cglobal:
+                    error(arg.pos, "Deletion of global C variable")
+            elif arg.type.is_ptr and arg.type.base_type.is_cpp_class:
+                self.cpp_check(env)
+            elif arg.type.is_cpp_class:
+                error(arg.pos, "Deletion of non-heap C++ object")
+            elif arg.is_subscript and arg.base.type is Builtin.bytearray_type:
+                pass  # del ba[i]
+            else:
+                error(arg.pos, "Deletion of non-Python, non-C++ object")
+            #arg.release_target_temp(env)
+        return self
+
+    def nogil_check(self, env):
+        for arg in self.args:
+            if arg.type.is_pyobject:
+                self.gil_error()
+
+    gil_message = "Deleting Python object"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        for arg in self.args: 
-            if (arg.type.is_pyobject or 
-                    arg.type.is_memoryviewslice or 
-                    arg.is_subscript and arg.base.type is Builtin.bytearray_type): 
-                arg.generate_deletion_code( 
-                    code, ignore_nonexisting=self.ignore_nonexisting) 
-            elif arg.type.is_ptr and arg.type.base_type.is_cpp_class: 
+        for arg in self.args:
+            if (arg.type.is_pyobject or
+                    arg.type.is_memoryviewslice or
+                    arg.is_subscript and arg.base.type is Builtin.bytearray_type):
+                arg.generate_deletion_code(
+                    code, ignore_nonexisting=self.ignore_nonexisting)
+            elif arg.type.is_ptr and arg.type.base_type.is_cpp_class:
                 arg.generate_evaluation_code(code)
-                code.putln("delete %s;" % arg.result()) 
+                code.putln("delete %s;" % arg.result())
                 arg.generate_disposal_code(code)
                 arg.free_temps(code)
-            # else error reported earlier 
- 
-    def annotate(self, code): 
-        for arg in self.args: 
-            arg.annotate(code) 
- 
- 
-class PassStatNode(StatNode): 
-    #  pass statement 
- 
-    child_attrs = [] 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class IndirectionNode(StatListNode): 
-    """ 
-    This adds an indirection so that the node can be shared and a subtree can 
-    be removed at any time by clearing self.stats. 
-    """ 
- 
-    def __init__(self, stats): 
-        super(IndirectionNode, self).__init__(stats[0].pos, stats=stats) 
- 
-
-class BreakStatNode(StatNode): 
- 
-    child_attrs = [] 
-    is_terminator = True 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
+            # else error reported earlier
+
+    def annotate(self, code):
+        for arg in self.args:
+            arg.annotate(code)
+
+
+class PassStatNode(StatNode):
+    #  pass statement
+
+    child_attrs = []
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class IndirectionNode(StatListNode):
+    """
+    This adds an indirection so that the node can be shared and a subtree can
+    be removed at any time by clearing self.stats.
+    """
+
+    def __init__(self, stats):
+        super(IndirectionNode, self).__init__(stats[0].pos, stats=stats)
+
+
+class BreakStatNode(StatNode):
+
+    child_attrs = []
+    is_terminator = True
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        if not code.break_label: 
-            error(self.pos, "break statement not inside loop") 
-        else: 
-            code.put_goto(code.break_label) 
- 
- 
-class ContinueStatNode(StatNode): 
- 
-    child_attrs = [] 
-    is_terminator = True 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
+        if not code.break_label:
+            error(self.pos, "break statement not inside loop")
+        else:
+            code.put_goto(code.break_label)
+
+
+class ContinueStatNode(StatNode):
+
+    child_attrs = []
+    is_terminator = True
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
         if not code.continue_label:
             error(self.pos, "continue statement not inside loop")
             return
         code.mark_pos(self.pos)
         code.put_goto(code.continue_label)
- 
- 
-class ReturnStatNode(StatNode): 
-    #  return statement 
-    # 
-    #  value         ExprNode or None 
-    #  return_type   PyrexType 
-    #  in_generator  return inside of generator => raise StopIteration 
+
+
+class ReturnStatNode(StatNode):
+    #  return statement
+    #
+    #  value         ExprNode or None
+    #  return_type   PyrexType
+    #  in_generator  return inside of generator => raise StopIteration
     #  in_async_gen  return inside of async generator
- 
-    child_attrs = ["value"] 
-    is_terminator = True 
-    in_generator = False 
+
+    child_attrs = ["value"]
+    is_terminator = True
+    in_generator = False
     in_async_gen = False
- 
-    # Whether we are in a parallel section 
-    in_parallel = False 
- 
-    def analyse_expressions(self, env): 
-        return_type = env.return_type 
-        self.return_type = return_type 
-        if not return_type: 
-            error(self.pos, "Return not inside a function body") 
-            return self 
-        if self.value: 
+
+    # Whether we are in a parallel section
+    in_parallel = False
+
+    def analyse_expressions(self, env):
+        return_type = env.return_type
+        self.return_type = return_type
+        if not return_type:
+            error(self.pos, "Return not inside a function body")
+            return self
+        if self.value:
             if self.in_async_gen:
                 error(self.pos, "Return with value in async generator")
-            self.value = self.value.analyse_types(env) 
-            if return_type.is_void or return_type.is_returncode: 
+            self.value = self.value.analyse_types(env)
+            if return_type.is_void or return_type.is_returncode:
                 error(self.value.pos, "Return with value in void function")
-            else: 
-                self.value = self.value.coerce_to(env.return_type, env) 
-        else: 
-            if (not return_type.is_void 
+            else:
+                self.value = self.value.coerce_to(env.return_type, env)
+        else:
+            if (not return_type.is_void
                     and not return_type.is_pyobject
                     and not return_type.is_returncode):
                 error(self.pos, "Return value required")
-        return self 
- 
-    def nogil_check(self, env): 
-        if self.return_type.is_pyobject: 
-            self.gil_error() 
- 
-    gil_message = "Returning Python object" 
- 
-    def generate_execution_code(self, code): 
-        code.mark_pos(self.pos) 
-        if not self.return_type: 
-            # error reported earlier 
-            return 
+        return self
+
+    def nogil_check(self, env):
+        if self.return_type.is_pyobject:
+            self.gil_error()
+
+    gil_message = "Returning Python object"
+
+    def generate_execution_code(self, code):
+        code.mark_pos(self.pos)
+        if not self.return_type:
+            # error reported earlier
+            return
 
         value = self.value
-        if self.return_type.is_pyobject: 
+        if self.return_type.is_pyobject:
             code.put_xdecref(Naming.retval_cname, self.return_type)
             if value and value.is_none:
                 # Use specialised default handling for "return None".
                 value = None
- 
+
         if value:
             value.generate_evaluation_code(code)
-            if self.return_type.is_memoryviewslice: 
-                from . import MemoryView 
-                MemoryView.put_acquire_memoryviewslice( 
+            if self.return_type.is_memoryviewslice:
+                from . import MemoryView
+                MemoryView.put_acquire_memoryviewslice(
                     lhs_cname=Naming.retval_cname,
                     lhs_type=self.return_type,
                     lhs_pos=value.pos,
@@ -6006,23 +6006,23 @@ class ReturnStatNode(StatNode):
                     code=code,
                     have_gil=self.in_nogil_context)
                 value.generate_post_assignment_code(code)
-            elif self.in_generator: 
-                # return value == raise StopIteration(value), but uncatchable 
+            elif self.in_generator:
+                # return value == raise StopIteration(value), but uncatchable
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached("ReturnWithStopIteration", "Coroutine.c"))
                 code.putln("%s = NULL; __Pyx_ReturnWithStopIteration(%s);" % (
                     Naming.retval_cname,
                     value.py_result()))
                 value.generate_disposal_code(code)
-            else: 
+            else:
                 value.make_owned_reference(code)
                 code.putln("%s = %s;" % (
                     Naming.retval_cname,
                     value.result_as(self.return_type)))
                 value.generate_post_assignment_code(code)
             value.free_temps(code)
-        else: 
-            if self.return_type.is_pyobject: 
+        else:
+            if self.return_type.is_pyobject:
                 if self.in_generator:
                     if self.in_async_gen:
                         code.globalstate.use_utility_code(
@@ -6031,246 +6031,246 @@ class ReturnStatNode(StatNode):
                     code.putln("%s = NULL;" % Naming.retval_cname)
                 else:
                     code.put_init_to_py_none(Naming.retval_cname, self.return_type)
-            elif self.return_type.is_returncode: 
-                self.put_return(code, self.return_type.default_value) 
- 
-        for cname, type in code.funcstate.temps_holding_reference(): 
-            code.put_decref_clear(cname, type) 
- 
-        code.put_goto(code.return_label) 
- 
-    def put_return(self, code, value): 
-        if self.in_parallel: 
-            code.putln_openmp("#pragma omp critical(__pyx_returning)") 
-        code.putln("%s = %s;" % (Naming.retval_cname, value)) 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.value is not None: 
-            self.value.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        if self.value: 
-            self.value.annotate(code) 
- 
- 
-class RaiseStatNode(StatNode): 
-    #  raise statement 
-    # 
-    #  exc_type    ExprNode or None 
-    #  exc_value   ExprNode or None 
-    #  exc_tb      ExprNode or None 
-    #  cause       ExprNode or None 
- 
-    child_attrs = ["exc_type", "exc_value", "exc_tb", "cause"] 
-    is_terminator = True 
- 
-    def analyse_expressions(self, env): 
-        if self.exc_type: 
-            exc_type = self.exc_type.analyse_types(env) 
-            self.exc_type = exc_type.coerce_to_pyobject(env) 
-        if self.exc_value: 
-            exc_value = self.exc_value.analyse_types(env) 
-            self.exc_value = exc_value.coerce_to_pyobject(env) 
-        if self.exc_tb: 
-            exc_tb = self.exc_tb.analyse_types(env) 
-            self.exc_tb = exc_tb.coerce_to_pyobject(env) 
-        if self.cause: 
-            cause = self.cause.analyse_types(env) 
-            self.cause = cause.coerce_to_pyobject(env) 
-        # special cases for builtin exceptions 
-        self.builtin_exc_name = None 
-        if self.exc_type and not self.exc_value and not self.exc_tb: 
-            exc = self.exc_type 
-            from . import ExprNodes 
-            if (isinstance(exc, ExprNodes.SimpleCallNode) and 
+            elif self.return_type.is_returncode:
+                self.put_return(code, self.return_type.default_value)
+
+        for cname, type in code.funcstate.temps_holding_reference():
+            code.put_decref_clear(cname, type)
+
+        code.put_goto(code.return_label)
+
+    def put_return(self, code, value):
+        if self.in_parallel:
+            code.putln_openmp("#pragma omp critical(__pyx_returning)")
+        code.putln("%s = %s;" % (Naming.retval_cname, value))
+
+    def generate_function_definitions(self, env, code):
+        if self.value is not None:
+            self.value.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        if self.value:
+            self.value.annotate(code)
+
+
+class RaiseStatNode(StatNode):
+    #  raise statement
+    #
+    #  exc_type    ExprNode or None
+    #  exc_value   ExprNode or None
+    #  exc_tb      ExprNode or None
+    #  cause       ExprNode or None
+
+    child_attrs = ["exc_type", "exc_value", "exc_tb", "cause"]
+    is_terminator = True
+
+    def analyse_expressions(self, env):
+        if self.exc_type:
+            exc_type = self.exc_type.analyse_types(env)
+            self.exc_type = exc_type.coerce_to_pyobject(env)
+        if self.exc_value:
+            exc_value = self.exc_value.analyse_types(env)
+            self.exc_value = exc_value.coerce_to_pyobject(env)
+        if self.exc_tb:
+            exc_tb = self.exc_tb.analyse_types(env)
+            self.exc_tb = exc_tb.coerce_to_pyobject(env)
+        if self.cause:
+            cause = self.cause.analyse_types(env)
+            self.cause = cause.coerce_to_pyobject(env)
+        # special cases for builtin exceptions
+        self.builtin_exc_name = None
+        if self.exc_type and not self.exc_value and not self.exc_tb:
+            exc = self.exc_type
+            from . import ExprNodes
+            if (isinstance(exc, ExprNodes.SimpleCallNode) and
                     not (exc.args or (exc.arg_tuple is not None and exc.arg_tuple.args))):
                 exc = exc.function  # extract the exception type
-            if exc.is_name and exc.entry.is_builtin: 
-                self.builtin_exc_name = exc.name 
-                if self.builtin_exc_name == 'MemoryError': 
-                    self.exc_type = None # has a separate implementation 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Raising exception" 
- 
-    def generate_execution_code(self, code): 
+            if exc.is_name and exc.entry.is_builtin:
+                self.builtin_exc_name = exc.name
+                if self.builtin_exc_name == 'MemoryError':
+                    self.exc_type = None # has a separate implementation
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Raising exception"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        if self.builtin_exc_name == 'MemoryError': 
-            code.putln('PyErr_NoMemory(); %s' % code.error_goto(self.pos)) 
-            return 
- 
-        if self.exc_type: 
-            self.exc_type.generate_evaluation_code(code) 
-            type_code = self.exc_type.py_result() 
+        if self.builtin_exc_name == 'MemoryError':
+            code.putln('PyErr_NoMemory(); %s' % code.error_goto(self.pos))
+            return
+
+        if self.exc_type:
+            self.exc_type.generate_evaluation_code(code)
+            type_code = self.exc_type.py_result()
             if self.exc_type.is_name:
                 code.globalstate.use_entry_utility_code(self.exc_type.entry)
-        else: 
-            type_code = "0" 
-        if self.exc_value: 
-            self.exc_value.generate_evaluation_code(code) 
-            value_code = self.exc_value.py_result() 
-        else: 
-            value_code = "0" 
-        if self.exc_tb: 
-            self.exc_tb.generate_evaluation_code(code) 
-            tb_code = self.exc_tb.py_result() 
-        else: 
-            tb_code = "0" 
-        if self.cause: 
-            self.cause.generate_evaluation_code(code) 
-            cause_code = self.cause.py_result() 
-        else: 
-            cause_code = "0" 
-        code.globalstate.use_utility_code(raise_utility_code) 
-        code.putln( 
-            "__Pyx_Raise(%s, %s, %s, %s);" % ( 
-                type_code, 
-                value_code, 
-                tb_code, 
-                cause_code)) 
-        for obj in (self.exc_type, self.exc_value, self.exc_tb, self.cause): 
-            if obj: 
-                obj.generate_disposal_code(code) 
-                obj.free_temps(code) 
-        code.putln( 
-            code.error_goto(self.pos)) 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.exc_type is not None: 
-            self.exc_type.generate_function_definitions(env, code) 
-        if self.exc_value is not None: 
-            self.exc_value.generate_function_definitions(env, code) 
-        if self.exc_tb is not None: 
-            self.exc_tb.generate_function_definitions(env, code) 
-        if self.cause is not None: 
-            self.cause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        if self.exc_type: 
-            self.exc_type.annotate(code) 
-        if self.exc_value: 
-            self.exc_value.annotate(code) 
-        if self.exc_tb: 
-            self.exc_tb.annotate(code) 
-        if self.cause: 
-            self.cause.annotate(code) 
- 
- 
-class ReraiseStatNode(StatNode): 
- 
-    child_attrs = [] 
-    is_terminator = True 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Raising exception" 
- 
-    def generate_execution_code(self, code): 
+        else:
+            type_code = "0"
+        if self.exc_value:
+            self.exc_value.generate_evaluation_code(code)
+            value_code = self.exc_value.py_result()
+        else:
+            value_code = "0"
+        if self.exc_tb:
+            self.exc_tb.generate_evaluation_code(code)
+            tb_code = self.exc_tb.py_result()
+        else:
+            tb_code = "0"
+        if self.cause:
+            self.cause.generate_evaluation_code(code)
+            cause_code = self.cause.py_result()
+        else:
+            cause_code = "0"
+        code.globalstate.use_utility_code(raise_utility_code)
+        code.putln(
+            "__Pyx_Raise(%s, %s, %s, %s);" % (
+                type_code,
+                value_code,
+                tb_code,
+                cause_code))
+        for obj in (self.exc_type, self.exc_value, self.exc_tb, self.cause):
+            if obj:
+                obj.generate_disposal_code(code)
+                obj.free_temps(code)
+        code.putln(
+            code.error_goto(self.pos))
+
+    def generate_function_definitions(self, env, code):
+        if self.exc_type is not None:
+            self.exc_type.generate_function_definitions(env, code)
+        if self.exc_value is not None:
+            self.exc_value.generate_function_definitions(env, code)
+        if self.exc_tb is not None:
+            self.exc_tb.generate_function_definitions(env, code)
+        if self.cause is not None:
+            self.cause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        if self.exc_type:
+            self.exc_type.annotate(code)
+        if self.exc_value:
+            self.exc_value.annotate(code)
+        if self.exc_tb:
+            self.exc_tb.annotate(code)
+        if self.cause:
+            self.cause.annotate(code)
+
+
+class ReraiseStatNode(StatNode):
+
+    child_attrs = []
+    is_terminator = True
+
+    def analyse_expressions(self, env):
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Raising exception"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        vars = code.funcstate.exc_vars 
-        if vars: 
-            code.globalstate.use_utility_code(restore_exception_utility_code) 
-            code.put_giveref(vars[0]) 
-            code.put_giveref(vars[1]) 
-            # fresh exceptions may not have a traceback yet (-> finally!) 
-            code.put_xgiveref(vars[2]) 
+        vars = code.funcstate.exc_vars
+        if vars:
+            code.globalstate.use_utility_code(restore_exception_utility_code)
+            code.put_giveref(vars[0])
+            code.put_giveref(vars[1])
+            # fresh exceptions may not have a traceback yet (-> finally!)
+            code.put_xgiveref(vars[2])
             code.putln("__Pyx_ErrRestoreWithState(%s, %s, %s);" % tuple(vars))
-            for varname in vars: 
-                code.put("%s = 0; " % varname) 
-            code.putln() 
-            code.putln(code.error_goto(self.pos)) 
-        else: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("ReRaiseException", "Exceptions.c")) 
-            code.putln("__Pyx_ReraiseException(); %s" % code.error_goto(self.pos)) 
- 
-class AssertStatNode(StatNode): 
-    #  assert statement 
-    # 
-    #  cond    ExprNode 
-    #  value   ExprNode or None 
- 
-    child_attrs = ["cond", "value"] 
- 
-    def analyse_expressions(self, env): 
-        self.cond = self.cond.analyse_boolean_expression(env) 
-        if self.value: 
-            value = self.value.analyse_types(env) 
-            if value.type is Builtin.tuple_type or not value.type.is_builtin_type: 
-                # prevent tuple values from being interpreted as argument value tuples 
-                from .ExprNodes import TupleNode 
-                value = TupleNode(value.pos, args=[value], slow=True) 
+            for varname in vars:
+                code.put("%s = 0; " % varname)
+            code.putln()
+            code.putln(code.error_goto(self.pos))
+        else:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ReRaiseException", "Exceptions.c"))
+            code.putln("__Pyx_ReraiseException(); %s" % code.error_goto(self.pos))
+
+class AssertStatNode(StatNode):
+    #  assert statement
+    #
+    #  cond    ExprNode
+    #  value   ExprNode or None
+
+    child_attrs = ["cond", "value"]
+
+    def analyse_expressions(self, env):
+        self.cond = self.cond.analyse_boolean_expression(env)
+        if self.value:
+            value = self.value.analyse_types(env)
+            if value.type is Builtin.tuple_type or not value.type.is_builtin_type:
+                # prevent tuple values from being interpreted as argument value tuples
+                from .ExprNodes import TupleNode
+                value = TupleNode(value.pos, args=[value], slow=True)
                 self.value = value.analyse_types(env, skip_children=True).coerce_to_pyobject(env)
-            else: 
-                self.value = value.coerce_to_pyobject(env) 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Raising exception" 
- 
-    def generate_execution_code(self, code): 
-        code.putln("#ifndef CYTHON_WITHOUT_ASSERTIONS") 
-        code.putln("if (unlikely(!Py_OptimizeFlag)) {") 
+            else:
+                self.value = value.coerce_to_pyobject(env)
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Raising exception"
+
+    def generate_execution_code(self, code):
+        code.putln("#ifndef CYTHON_WITHOUT_ASSERTIONS")
+        code.putln("if (unlikely(!Py_OptimizeFlag)) {")
         code.mark_pos(self.pos)
-        self.cond.generate_evaluation_code(code) 
-        code.putln( 
+        self.cond.generate_evaluation_code(code)
+        code.putln(
             "if (unlikely(!%s)) {" % self.cond.result())
-        if self.value: 
-            self.value.generate_evaluation_code(code) 
-            code.putln( 
+        if self.value:
+            self.value.generate_evaluation_code(code)
+            code.putln(
                 "PyErr_SetObject(PyExc_AssertionError, %s);" % self.value.py_result())
-            self.value.generate_disposal_code(code) 
-            self.value.free_temps(code) 
-        else: 
-            code.putln( 
-                "PyErr_SetNone(PyExc_AssertionError);") 
-        code.putln( 
+            self.value.generate_disposal_code(code)
+            self.value.free_temps(code)
+        else:
+            code.putln(
+                "PyErr_SetNone(PyExc_AssertionError);")
+        code.putln(
             code.error_goto(self.pos))
-        code.putln( 
-            "}") 
-        self.cond.generate_disposal_code(code) 
-        self.cond.free_temps(code) 
-        code.putln( 
-            "}") 
-        code.putln("#endif") 
- 
-    def generate_function_definitions(self, env, code): 
-        self.cond.generate_function_definitions(env, code) 
-        if self.value is not None: 
-            self.value.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.cond.annotate(code) 
-        if self.value: 
-            self.value.annotate(code) 
- 
- 
-class IfStatNode(StatNode): 
-    #  if statement 
-    # 
-    #  if_clauses   [IfClauseNode] 
-    #  else_clause  StatNode or None 
- 
-    child_attrs = ["if_clauses", "else_clause"] 
- 
-    def analyse_declarations(self, env): 
-        for if_clause in self.if_clauses: 
-            if_clause.analyse_declarations(env) 
-        if self.else_clause: 
-            self.else_clause.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
+        code.putln(
+            "}")
+        self.cond.generate_disposal_code(code)
+        self.cond.free_temps(code)
+        code.putln(
+            "}")
+        code.putln("#endif")
+
+    def generate_function_definitions(self, env, code):
+        self.cond.generate_function_definitions(env, code)
+        if self.value is not None:
+            self.value.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.cond.annotate(code)
+        if self.value:
+            self.value.annotate(code)
+
+
+class IfStatNode(StatNode):
+    #  if statement
+    #
+    #  if_clauses   [IfClauseNode]
+    #  else_clause  StatNode or None
+
+    child_attrs = ["if_clauses", "else_clause"]
+
+    def analyse_declarations(self, env):
+        for if_clause in self.if_clauses:
+            if_clause.analyse_declarations(env)
+        if self.else_clause:
+            self.else_clause.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
         self.if_clauses = [if_clause.analyse_expressions(env) for if_clause in self.if_clauses]
-        if self.else_clause: 
-            self.else_clause = self.else_clause.analyse_expressions(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        code.mark_pos(self.pos) 
-        end_label = code.new_label() 
+        if self.else_clause:
+            self.else_clause = self.else_clause.analyse_expressions(env)
+        return self
+
+    def generate_execution_code(self, code):
+        code.mark_pos(self.pos)
+        end_label = code.new_label()
         last = len(self.if_clauses)
         if self.else_clause:
             # If the 'else' clause is 'unlikely', then set the preceding 'if' clause to 'likely' to reflect that.
@@ -6280,13 +6280,13 @@ class IfStatNode(StatNode):
         for i, if_clause in enumerate(self.if_clauses):
             self._set_branch_hint(if_clause, if_clause.body)
             if_clause.generate_execution_code(code, end_label, is_last=i == last)
-        if self.else_clause: 
+        if self.else_clause:
             code.mark_pos(self.else_clause.pos)
-            code.putln("/*else*/ {") 
-            self.else_clause.generate_execution_code(code) 
-            code.putln("}") 
-        code.put_label(end_label) 
- 
+            code.putln("/*else*/ {")
+            self.else_clause.generate_execution_code(code)
+            code.putln("}")
+        code.put_label(end_label)
+
     def _set_branch_hint(self, clause, statements_node, inverse=False):
         if not statements_node.is_terminator:
             return
@@ -6302,223 +6302,223 @@ class IfStatNode(StatNode):
                         return
             clause.branch_hint = 'likely' if inverse else 'unlikely'
 
-    def generate_function_definitions(self, env, code): 
-        for clause in self.if_clauses: 
-            clause.generate_function_definitions(env, code) 
-        if self.else_clause is not None: 
-            self.else_clause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        for if_clause in self.if_clauses: 
-            if_clause.annotate(code) 
-        if self.else_clause: 
-            self.else_clause.annotate(code) 
- 
- 
-class IfClauseNode(Node): 
-    #  if or elif clause in an if statement 
-    # 
-    #  condition   ExprNode 
-    #  body        StatNode 
- 
-    child_attrs = ["condition", "body"] 
+    def generate_function_definitions(self, env, code):
+        for clause in self.if_clauses:
+            clause.generate_function_definitions(env, code)
+        if self.else_clause is not None:
+            self.else_clause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        for if_clause in self.if_clauses:
+            if_clause.annotate(code)
+        if self.else_clause:
+            self.else_clause.annotate(code)
+
+
+class IfClauseNode(Node):
+    #  if or elif clause in an if statement
+    #
+    #  condition   ExprNode
+    #  body        StatNode
+
+    child_attrs = ["condition", "body"]
     branch_hint = None
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
         self.condition = self.condition.analyse_temp_boolean_expression(env)
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
+        self.body = self.body.analyse_expressions(env)
+        return self
+
     def generate_execution_code(self, code, end_label, is_last):
-        self.condition.generate_evaluation_code(code) 
+        self.condition.generate_evaluation_code(code)
         code.mark_pos(self.pos)
         condition = self.condition.result()
         if self.branch_hint:
             condition = '%s(%s)' % (self.branch_hint, condition)
         code.putln("if (%s) {" % condition)
-        self.condition.generate_disposal_code(code) 
-        self.condition.free_temps(code) 
-        self.body.generate_execution_code(code) 
+        self.condition.generate_disposal_code(code)
+        self.condition.free_temps(code)
+        self.body.generate_execution_code(code)
         code.mark_pos(self.pos, trace=False)
         if not (is_last or self.body.is_terminator):
-            code.put_goto(end_label) 
-        code.putln("}") 
- 
-    def generate_function_definitions(self, env, code): 
-        self.condition.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.condition.annotate(code) 
-        self.body.annotate(code) 
- 
- 
-class SwitchCaseNode(StatNode): 
-    # Generated in the optimization of an if-elif-else node 
-    # 
-    # conditions    [ExprNode] 
-    # body          StatNode 
- 
-    child_attrs = ['conditions', 'body'] 
- 
+            code.put_goto(end_label)
+        code.putln("}")
+
+    def generate_function_definitions(self, env, code):
+        self.condition.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.condition.annotate(code)
+        self.body.annotate(code)
+
+
+class SwitchCaseNode(StatNode):
+    # Generated in the optimization of an if-elif-else node
+    #
+    # conditions    [ExprNode]
+    # body          StatNode
+
+    child_attrs = ['conditions', 'body']
+
     def generate_condition_evaluation_code(self, code):
-        for cond in self.conditions: 
-            cond.generate_evaluation_code(code) 
+        for cond in self.conditions:
+            cond.generate_evaluation_code(code)
 
     def generate_execution_code(self, code):
         num_conditions = len(self.conditions)
         line_tracing_enabled = code.globalstate.directives['linetrace']
         for i, cond in enumerate(self.conditions, 1):
-            code.putln("case %s:" % cond.result()) 
+            code.putln("case %s:" % cond.result())
             code.mark_pos(cond.pos)  # Tracing code must appear *after* the 'case' statement.
             if line_tracing_enabled and i < num_conditions:
                 # Allow fall-through after the line tracing code.
                 code.putln('CYTHON_FALLTHROUGH;')
-        self.body.generate_execution_code(code) 
+        self.body.generate_execution_code(code)
         code.mark_pos(self.pos, trace=False)
-        code.putln("break;") 
- 
-    def generate_function_definitions(self, env, code): 
-        for cond in self.conditions: 
-            cond.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        for cond in self.conditions: 
-            cond.annotate(code) 
-        self.body.annotate(code) 
- 
-
-class SwitchStatNode(StatNode): 
-    # Generated in the optimization of an if-elif-else node 
-    # 
-    # test          ExprNode 
-    # cases         [SwitchCaseNode] 
-    # else_clause   StatNode or None 
- 
-    child_attrs = ['test', 'cases', 'else_clause'] 
- 
-    def generate_execution_code(self, code): 
-        self.test.generate_evaluation_code(code) 
+        code.putln("break;")
+
+    def generate_function_definitions(self, env, code):
+        for cond in self.conditions:
+            cond.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        for cond in self.conditions:
+            cond.annotate(code)
+        self.body.annotate(code)
+
+
+class SwitchStatNode(StatNode):
+    # Generated in the optimization of an if-elif-else node
+    #
+    # test          ExprNode
+    # cases         [SwitchCaseNode]
+    # else_clause   StatNode or None
+
+    child_attrs = ['test', 'cases', 'else_clause']
+
+    def generate_execution_code(self, code):
+        self.test.generate_evaluation_code(code)
         # Make sure all conditions are evaluated before going into the switch() statement.
         # This is required in order to prevent any execution code from leaking into the space between the cases.
         for case in self.cases:
             case.generate_condition_evaluation_code(code)
         code.mark_pos(self.pos)
-        code.putln("switch (%s) {" % self.test.result()) 
-        for case in self.cases: 
-            case.generate_execution_code(code) 
-        if self.else_clause is not None: 
-            code.putln("default:") 
-            self.else_clause.generate_execution_code(code) 
-            code.putln("break;") 
-        else: 
-            # Always generate a default clause to prevent C compiler warnings 
-            # about unmatched enum values (it was not the user who decided to 
-            # generate the switch statement, so shouldn't be bothered). 
-            code.putln("default: break;") 
-        code.putln("}") 
+        code.putln("switch (%s) {" % self.test.result())
+        for case in self.cases:
+            case.generate_execution_code(code)
+        if self.else_clause is not None:
+            code.putln("default:")
+            self.else_clause.generate_execution_code(code)
+            code.putln("break;")
+        else:
+            # Always generate a default clause to prevent C compiler warnings
+            # about unmatched enum values (it was not the user who decided to
+            # generate the switch statement, so shouldn't be bothered).
+            code.putln("default: break;")
+        code.putln("}")
         self.test.generate_disposal_code(code)
         self.test.free_temps(code)
- 
-    def generate_function_definitions(self, env, code): 
-        self.test.generate_function_definitions(env, code) 
-        for case in self.cases: 
-            case.generate_function_definitions(env, code) 
-        if self.else_clause is not None: 
-            self.else_clause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.test.annotate(code) 
-        for case in self.cases: 
-            case.annotate(code) 
-        if self.else_clause is not None: 
-            self.else_clause.annotate(code) 
- 
-
-class LoopNode(object): 
-    pass 
- 
- 
-class WhileStatNode(LoopNode, StatNode): 
-    #  while statement 
-    # 
-    #  condition    ExprNode 
-    #  body         StatNode 
-    #  else_clause  StatNode 
- 
-    child_attrs = ["condition", "body", "else_clause"] 
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
-        if self.else_clause: 
-            self.else_clause.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        if self.condition: 
-            self.condition = self.condition.analyse_temp_boolean_expression(env) 
-        self.body = self.body.analyse_expressions(env) 
-        if self.else_clause: 
-            self.else_clause = self.else_clause.analyse_expressions(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
+
+    def generate_function_definitions(self, env, code):
+        self.test.generate_function_definitions(env, code)
+        for case in self.cases:
+            case.generate_function_definitions(env, code)
+        if self.else_clause is not None:
+            self.else_clause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.test.annotate(code)
+        for case in self.cases:
+            case.annotate(code)
+        if self.else_clause is not None:
+            self.else_clause.annotate(code)
+
+
+class LoopNode(object):
+    pass
+
+
+class WhileStatNode(LoopNode, StatNode):
+    #  while statement
+    #
+    #  condition    ExprNode
+    #  body         StatNode
+    #  else_clause  StatNode
+
+    child_attrs = ["condition", "body", "else_clause"]
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+        if self.else_clause:
+            self.else_clause.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        if self.condition:
+            self.condition = self.condition.analyse_temp_boolean_expression(env)
+        self.body = self.body.analyse_expressions(env)
+        if self.else_clause:
+            self.else_clause = self.else_clause.analyse_expressions(env)
+        return self
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        old_loop_labels = code.new_loop_labels() 
-        code.putln( 
-            "while (1) {") 
-        if self.condition: 
-            self.condition.generate_evaluation_code(code) 
-            self.condition.generate_disposal_code(code) 
-            code.putln( 
+        old_loop_labels = code.new_loop_labels()
+        code.putln(
+            "while (1) {")
+        if self.condition:
+            self.condition.generate_evaluation_code(code)
+            self.condition.generate_disposal_code(code)
+            code.putln(
                 "if (!%s) break;" % self.condition.result())
-            self.condition.free_temps(code) 
-        self.body.generate_execution_code(code) 
-        code.put_label(code.continue_label) 
-        code.putln("}") 
-        break_label = code.break_label 
-        code.set_loop_labels(old_loop_labels) 
-        if self.else_clause: 
-            code.mark_pos(self.else_clause.pos) 
-            code.putln("/*else*/ {") 
-            self.else_clause.generate_execution_code(code) 
-            code.putln("}") 
-        code.put_label(break_label) 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.condition: 
-            self.condition.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
-        if self.else_clause is not None: 
-            self.else_clause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        if self.condition: 
-            self.condition.annotate(code) 
-        self.body.annotate(code) 
-        if self.else_clause: 
-            self.else_clause.annotate(code) 
- 
- 
-class DictIterationNextNode(Node): 
-    # Helper node for calling PyDict_Next() inside of a WhileStatNode 
-    # and checking the dictionary size for changes.  Created in 
-    # Optimize.py. 
-    child_attrs = ['dict_obj', 'expected_size', 'pos_index_var', 
-                   'coerced_key_var', 'coerced_value_var', 'coerced_tuple_var', 
-                   'key_target', 'value_target', 'tuple_target', 'is_dict_flag'] 
- 
-    coerced_key_var = key_ref = None 
-    coerced_value_var = value_ref = None 
-    coerced_tuple_var = tuple_ref = None 
- 
-    def __init__(self, dict_obj, expected_size, pos_index_var, 
-                 key_target, value_target, tuple_target, is_dict_flag): 
-        Node.__init__( 
-            self, dict_obj.pos, 
+            self.condition.free_temps(code)
+        self.body.generate_execution_code(code)
+        code.put_label(code.continue_label)
+        code.putln("}")
+        break_label = code.break_label
+        code.set_loop_labels(old_loop_labels)
+        if self.else_clause:
+            code.mark_pos(self.else_clause.pos)
+            code.putln("/*else*/ {")
+            self.else_clause.generate_execution_code(code)
+            code.putln("}")
+        code.put_label(break_label)
+
+    def generate_function_definitions(self, env, code):
+        if self.condition:
+            self.condition.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+        if self.else_clause is not None:
+            self.else_clause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        if self.condition:
+            self.condition.annotate(code)
+        self.body.annotate(code)
+        if self.else_clause:
+            self.else_clause.annotate(code)
+
+
+class DictIterationNextNode(Node):
+    # Helper node for calling PyDict_Next() inside of a WhileStatNode
+    # and checking the dictionary size for changes.  Created in
+    # Optimize.py.
+    child_attrs = ['dict_obj', 'expected_size', 'pos_index_var',
+                   'coerced_key_var', 'coerced_value_var', 'coerced_tuple_var',
+                   'key_target', 'value_target', 'tuple_target', 'is_dict_flag']
+
+    coerced_key_var = key_ref = None
+    coerced_value_var = value_ref = None
+    coerced_tuple_var = tuple_ref = None
+
+    def __init__(self, dict_obj, expected_size, pos_index_var,
+                 key_target, value_target, tuple_target, is_dict_flag):
+        Node.__init__(
+            self, dict_obj.pos,
             dict_obj=dict_obj,
             expected_size=expected_size,
             pos_index_var=pos_index_var,
@@ -6528,72 +6528,72 @@ class DictIterationNextNode(Node):
             is_dict_flag=is_dict_flag,
             is_temp=True,
             type=PyrexTypes.c_bint_type)
- 
-    def analyse_expressions(self, env): 
-        from . import ExprNodes 
-        self.dict_obj = self.dict_obj.analyse_types(env) 
-        self.expected_size = self.expected_size.analyse_types(env) 
-        if self.pos_index_var: 
-            self.pos_index_var = self.pos_index_var.analyse_types(env) 
-        if self.key_target: 
-            self.key_target = self.key_target.analyse_target_types(env) 
-            self.key_ref = ExprNodes.TempNode(self.key_target.pos, PyrexTypes.py_object_type) 
-            self.coerced_key_var = self.key_ref.coerce_to(self.key_target.type, env) 
-        if self.value_target: 
-            self.value_target = self.value_target.analyse_target_types(env) 
-            self.value_ref = ExprNodes.TempNode(self.value_target.pos, type=PyrexTypes.py_object_type) 
-            self.coerced_value_var = self.value_ref.coerce_to(self.value_target.type, env) 
-        if self.tuple_target: 
-            self.tuple_target = self.tuple_target.analyse_target_types(env) 
-            self.tuple_ref = ExprNodes.TempNode(self.tuple_target.pos, PyrexTypes.py_object_type) 
-            self.coerced_tuple_var = self.tuple_ref.coerce_to(self.tuple_target.type, env) 
-        self.is_dict_flag = self.is_dict_flag.analyse_types(env) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        self.dict_obj.generate_function_definitions(env, code) 
- 
-    def generate_execution_code(self, code): 
-        code.globalstate.use_utility_code(UtilityCode.load_cached("dict_iter", "Optimize.c")) 
-        self.dict_obj.generate_evaluation_code(code) 
- 
-        assignments = [] 
-        temp_addresses = [] 
-        for var, result, target in [(self.key_ref, self.coerced_key_var, self.key_target), 
-                                    (self.value_ref, self.coerced_value_var, self.value_target), 
-                                    (self.tuple_ref, self.coerced_tuple_var, self.tuple_target)]: 
-            if target is None: 
-                addr = 'NULL' 
-            else: 
-                assignments.append((var, result, target)) 
-                var.allocate(code) 
-                addr = '&%s' % var.result() 
-            temp_addresses.append(addr) 
- 
-        result_temp = code.funcstate.allocate_temp(PyrexTypes.c_int_type, False) 
-        code.putln("%s = __Pyx_dict_iter_next(%s, %s, &%s, %s, %s, %s, %s);" % ( 
-            result_temp, 
-            self.dict_obj.py_result(), 
-            self.expected_size.result(), 
-            self.pos_index_var.result(), 
-            temp_addresses[0], 
-            temp_addresses[1], 
-            temp_addresses[2], 
-            self.is_dict_flag.result() 
-        )) 
-        code.putln("if (unlikely(%s == 0)) break;" % result_temp) 
-        code.putln(code.error_goto_if("%s == -1" % result_temp, self.pos)) 
-        code.funcstate.release_temp(result_temp) 
- 
-        # evaluate all coercions before the assignments 
-        for var, result, target in assignments: 
-            code.put_gotref(var.result()) 
-        for var, result, target in assignments: 
-            result.generate_evaluation_code(code) 
-        for var, result, target in assignments: 
-            target.generate_assignment_code(result, code) 
-            var.release(code) 
- 
+
+    def analyse_expressions(self, env):
+        from . import ExprNodes
+        self.dict_obj = self.dict_obj.analyse_types(env)
+        self.expected_size = self.expected_size.analyse_types(env)
+        if self.pos_index_var:
+            self.pos_index_var = self.pos_index_var.analyse_types(env)
+        if self.key_target:
+            self.key_target = self.key_target.analyse_target_types(env)
+            self.key_ref = ExprNodes.TempNode(self.key_target.pos, PyrexTypes.py_object_type)
+            self.coerced_key_var = self.key_ref.coerce_to(self.key_target.type, env)
+        if self.value_target:
+            self.value_target = self.value_target.analyse_target_types(env)
+            self.value_ref = ExprNodes.TempNode(self.value_target.pos, type=PyrexTypes.py_object_type)
+            self.coerced_value_var = self.value_ref.coerce_to(self.value_target.type, env)
+        if self.tuple_target:
+            self.tuple_target = self.tuple_target.analyse_target_types(env)
+            self.tuple_ref = ExprNodes.TempNode(self.tuple_target.pos, PyrexTypes.py_object_type)
+            self.coerced_tuple_var = self.tuple_ref.coerce_to(self.tuple_target.type, env)
+        self.is_dict_flag = self.is_dict_flag.analyse_types(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.dict_obj.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("dict_iter", "Optimize.c"))
+        self.dict_obj.generate_evaluation_code(code)
+
+        assignments = []
+        temp_addresses = []
+        for var, result, target in [(self.key_ref, self.coerced_key_var, self.key_target),
+                                    (self.value_ref, self.coerced_value_var, self.value_target),
+                                    (self.tuple_ref, self.coerced_tuple_var, self.tuple_target)]:
+            if target is None:
+                addr = 'NULL'
+            else:
+                assignments.append((var, result, target))
+                var.allocate(code)
+                addr = '&%s' % var.result()
+            temp_addresses.append(addr)
+
+        result_temp = code.funcstate.allocate_temp(PyrexTypes.c_int_type, False)
+        code.putln("%s = __Pyx_dict_iter_next(%s, %s, &%s, %s, %s, %s, %s);" % (
+            result_temp,
+            self.dict_obj.py_result(),
+            self.expected_size.result(),
+            self.pos_index_var.result(),
+            temp_addresses[0],
+            temp_addresses[1],
+            temp_addresses[2],
+            self.is_dict_flag.result()
+        ))
+        code.putln("if (unlikely(%s == 0)) break;" % result_temp)
+        code.putln(code.error_goto_if("%s == -1" % result_temp, self.pos))
+        code.funcstate.release_temp(result_temp)
+
+        # evaluate all coercions before the assignments
+        for var, result, target in assignments:
+            code.put_gotref(var.result())
+        for var, result, target in assignments:
+            result.generate_evaluation_code(code)
+        for var, result, target in assignments:
+            target.generate_assignment_code(result, code)
+            var.release(code)
+
 
 class SetIterationNextNode(Node):
     # Helper node for calling _PySet_NextEntry() inside of a WhileStatNode
@@ -6655,113 +6655,113 @@ class SetIterationNextNode(Node):
         value_ref.release(code)
 
 
-def ForStatNode(pos, **kw): 
-    if 'iterator' in kw: 
+def ForStatNode(pos, **kw):
+    if 'iterator' in kw:
         if kw['iterator'].is_async:
             return AsyncForStatNode(pos, **kw)
         else:
             return ForInStatNode(pos, **kw)
-    else: 
-        return ForFromStatNode(pos, **kw) 
- 
+    else:
+        return ForFromStatNode(pos, **kw)
+
 
 class _ForInStatNode(LoopNode, StatNode):
     #  Base class of 'for-in' statements.
-    # 
-    #  target        ExprNode 
+    #
+    #  target        ExprNode
     #  iterator      IteratorNode | AIterAwaitExprNode(AsyncIteratorNode)
-    #  body          StatNode 
-    #  else_clause   StatNode 
+    #  body          StatNode
+    #  else_clause   StatNode
     #  item          NextNode | AwaitExprNode(AsyncNextNode)
     #  is_async      boolean        true for 'async for' statements
- 
+
     child_attrs = ["target", "item", "iterator", "body", "else_clause"]
-    item = None 
+    item = None
     is_async = False
- 
+
     def _create_item_node(self):
         raise NotImplementedError("must be implemented by subclasses")
 
-    def analyse_declarations(self, env): 
-        self.target.analyse_target_declaration(env) 
-        self.body.analyse_declarations(env) 
-        if self.else_clause: 
-            self.else_clause.analyse_declarations(env) 
+    def analyse_declarations(self, env):
+        self.target.analyse_target_declaration(env)
+        self.body.analyse_declarations(env)
+        if self.else_clause:
+            self.else_clause.analyse_declarations(env)
         self._create_item_node()
- 
-    def analyse_expressions(self, env): 
-        self.target = self.target.analyse_target_types(env) 
-        self.iterator = self.iterator.analyse_expressions(env) 
+
+    def analyse_expressions(self, env):
+        self.target = self.target.analyse_target_types(env)
+        self.iterator = self.iterator.analyse_expressions(env)
         self._create_item_node()  # must rewrap self.item after analysis
-        self.item = self.item.analyse_expressions(env) 
+        self.item = self.item.analyse_expressions(env)
         if (not self.is_async and
                 (self.iterator.type.is_ptr or self.iterator.type.is_array) and
                 self.target.type.assignable_from(self.iterator.type)):
-            # C array slice optimization. 
-            pass 
-        else: 
-            self.item = self.item.coerce_to(self.target.type, env) 
-        self.body = self.body.analyse_expressions(env) 
-        if self.else_clause: 
-            self.else_clause = self.else_clause.analyse_expressions(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
+            # C array slice optimization.
+            pass
+        else:
+            self.item = self.item.coerce_to(self.target.type, env)
+        self.body = self.body.analyse_expressions(env)
+        if self.else_clause:
+            self.else_clause = self.else_clause.analyse_expressions(env)
+        return self
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        old_loop_labels = code.new_loop_labels() 
-        self.iterator.generate_evaluation_code(code) 
-        code.putln("for (;;) {") 
-        self.item.generate_evaluation_code(code) 
-        self.target.generate_assignment_code(self.item, code) 
-        self.body.generate_execution_code(code) 
-        code.mark_pos(self.pos) 
-        code.put_label(code.continue_label) 
-        code.putln("}") 
-        break_label = code.break_label 
-        code.set_loop_labels(old_loop_labels) 
- 
-        if self.else_clause: 
-            # in nested loops, the 'else' block can contain a 
-            # 'continue' statement for the outer loop, but we may need 
-            # to generate cleanup code before taking that path, so we 
-            # intercept it here 
-            orig_continue_label = code.continue_label 
-            code.continue_label = code.new_label('outer_continue') 
- 
-            code.putln("/*else*/ {") 
-            self.else_clause.generate_execution_code(code) 
-            code.putln("}") 
- 
-            if code.label_used(code.continue_label): 
-                code.put_goto(break_label) 
-                code.mark_pos(self.pos) 
-                code.put_label(code.continue_label) 
-                self.iterator.generate_disposal_code(code) 
-                code.put_goto(orig_continue_label) 
-            code.set_loop_labels(old_loop_labels) 
- 
-        code.mark_pos(self.pos) 
-        if code.label_used(break_label): 
-            code.put_label(break_label) 
-        self.iterator.generate_disposal_code(code) 
-        self.iterator.free_temps(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        self.target.generate_function_definitions(env, code) 
-        self.iterator.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
-        if self.else_clause is not None: 
-            self.else_clause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.target.annotate(code) 
-        self.iterator.annotate(code) 
-        self.body.annotate(code) 
-        if self.else_clause: 
-            self.else_clause.annotate(code) 
-        self.item.annotate(code) 
- 
- 
+        old_loop_labels = code.new_loop_labels()
+        self.iterator.generate_evaluation_code(code)
+        code.putln("for (;;) {")
+        self.item.generate_evaluation_code(code)
+        self.target.generate_assignment_code(self.item, code)
+        self.body.generate_execution_code(code)
+        code.mark_pos(self.pos)
+        code.put_label(code.continue_label)
+        code.putln("}")
+        break_label = code.break_label
+        code.set_loop_labels(old_loop_labels)
+
+        if self.else_clause:
+            # in nested loops, the 'else' block can contain a
+            # 'continue' statement for the outer loop, but we may need
+            # to generate cleanup code before taking that path, so we
+            # intercept it here
+            orig_continue_label = code.continue_label
+            code.continue_label = code.new_label('outer_continue')
+
+            code.putln("/*else*/ {")
+            self.else_clause.generate_execution_code(code)
+            code.putln("}")
+
+            if code.label_used(code.continue_label):
+                code.put_goto(break_label)
+                code.mark_pos(self.pos)
+                code.put_label(code.continue_label)
+                self.iterator.generate_disposal_code(code)
+                code.put_goto(orig_continue_label)
+            code.set_loop_labels(old_loop_labels)
+
+        code.mark_pos(self.pos)
+        if code.label_used(break_label):
+            code.put_label(break_label)
+        self.iterator.generate_disposal_code(code)
+        self.iterator.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.target.generate_function_definitions(env, code)
+        self.iterator.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+        if self.else_clause is not None:
+            self.else_clause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.target.annotate(code)
+        self.iterator.annotate(code)
+        self.body.annotate(code)
+        if self.else_clause:
+            self.else_clause.annotate(code)
+        self.item.annotate(code)
+
+
 class ForInStatNode(_ForInStatNode):
     #  'for' statement
 
@@ -6792,55 +6792,55 @@ class AsyncForStatNode(_ForInStatNode):
         self.item.arg = ExprNodes.AsyncNextNode(self.iterator)
 
 
-class ForFromStatNode(LoopNode, StatNode): 
-    #  for name from expr rel name rel expr 
-    # 
-    #  target        NameNode 
-    #  bound1        ExprNode 
-    #  relation1     string 
-    #  relation2     string 
-    #  bound2        ExprNode 
-    #  step          ExprNode or None 
-    #  body          StatNode 
-    #  else_clause   StatNode or None 
-    # 
-    #  Used internally: 
-    # 
-    #  from_range         bool 
-    #  is_py_target       bool 
-    #  loopvar_node       ExprNode (usually a NameNode or temp node) 
-    #  py_loopvar_node    PyTempNode or None 
-    child_attrs = ["target", "bound1", "bound2", "step", "body", "else_clause"] 
- 
-    is_py_target = False 
-    loopvar_node = None 
-    py_loopvar_node = None 
-    from_range = False 
- 
-    gil_message = "For-loop using object bounds or target" 
- 
-    def nogil_check(self, env): 
-        for x in (self.target, self.bound1, self.bound2): 
-            if x.type.is_pyobject: 
-                self.gil_error() 
- 
-    def analyse_declarations(self, env): 
-        self.target.analyse_target_declaration(env) 
-        self.body.analyse_declarations(env) 
-        if self.else_clause: 
-            self.else_clause.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        from . import ExprNodes 
-        self.target = self.target.analyse_target_types(env) 
-        self.bound1 = self.bound1.analyse_types(env) 
-        self.bound2 = self.bound2.analyse_types(env) 
-        if self.step is not None: 
-            if isinstance(self.step, ExprNodes.UnaryMinusNode): 
+class ForFromStatNode(LoopNode, StatNode):
+    #  for name from expr rel name rel expr
+    #
+    #  target        NameNode
+    #  bound1        ExprNode
+    #  relation1     string
+    #  relation2     string
+    #  bound2        ExprNode
+    #  step          ExprNode or None
+    #  body          StatNode
+    #  else_clause   StatNode or None
+    #
+    #  Used internally:
+    #
+    #  from_range         bool
+    #  is_py_target       bool
+    #  loopvar_node       ExprNode (usually a NameNode or temp node)
+    #  py_loopvar_node    PyTempNode or None
+    child_attrs = ["target", "bound1", "bound2", "step", "body", "else_clause"]
+
+    is_py_target = False
+    loopvar_node = None
+    py_loopvar_node = None
+    from_range = False
+
+    gil_message = "For-loop using object bounds or target"
+
+    def nogil_check(self, env):
+        for x in (self.target, self.bound1, self.bound2):
+            if x.type.is_pyobject:
+                self.gil_error()
+
+    def analyse_declarations(self, env):
+        self.target.analyse_target_declaration(env)
+        self.body.analyse_declarations(env)
+        if self.else_clause:
+            self.else_clause.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        from . import ExprNodes
+        self.target = self.target.analyse_target_types(env)
+        self.bound1 = self.bound1.analyse_types(env)
+        self.bound2 = self.bound2.analyse_types(env)
+        if self.step is not None:
+            if isinstance(self.step, ExprNodes.UnaryMinusNode):
                 warning(self.step.pos, "Probable infinite loop in for-from-by statement. "
                         "Consider switching the directions of the relations.", 2)
-            self.step = self.step.analyse_types(env) 
- 
+            self.step = self.step.analyse_types(env)
+
         self.set_up_loop(env)
         target_type = self.target.type
         if not (target_type.is_pyobject or target_type.is_numeric):
@@ -6857,64 +6857,64 @@ class ForFromStatNode(LoopNode, StatNode):
         target_type = self.target.type
         if target_type.is_numeric:
             loop_type = target_type
-        else: 
+        else:
             if target_type.is_enum:
                 warning(self.target.pos,
                         "Integer loops over enum values are fragile. Please cast to a safe integer type instead.")
             loop_type = PyrexTypes.c_long_type if target_type.is_pyobject else PyrexTypes.c_int_type
-            if not self.bound1.type.is_pyobject: 
-                loop_type = PyrexTypes.widest_numeric_type(loop_type, self.bound1.type) 
-            if not self.bound2.type.is_pyobject: 
-                loop_type = PyrexTypes.widest_numeric_type(loop_type, self.bound2.type) 
-            if self.step is not None and not self.step.type.is_pyobject: 
-                loop_type = PyrexTypes.widest_numeric_type(loop_type, self.step.type) 
-        self.bound1 = self.bound1.coerce_to(loop_type, env) 
-        self.bound2 = self.bound2.coerce_to(loop_type, env) 
-        if not self.bound2.is_literal: 
-            self.bound2 = self.bound2.coerce_to_temp(env) 
-        if self.step is not None: 
-            self.step = self.step.coerce_to(loop_type, env) 
-            if not self.step.is_literal: 
-                self.step = self.step.coerce_to_temp(env) 
- 
+            if not self.bound1.type.is_pyobject:
+                loop_type = PyrexTypes.widest_numeric_type(loop_type, self.bound1.type)
+            if not self.bound2.type.is_pyobject:
+                loop_type = PyrexTypes.widest_numeric_type(loop_type, self.bound2.type)
+            if self.step is not None and not self.step.type.is_pyobject:
+                loop_type = PyrexTypes.widest_numeric_type(loop_type, self.step.type)
+        self.bound1 = self.bound1.coerce_to(loop_type, env)
+        self.bound2 = self.bound2.coerce_to(loop_type, env)
+        if not self.bound2.is_literal:
+            self.bound2 = self.bound2.coerce_to_temp(env)
+        if self.step is not None:
+            self.step = self.step.coerce_to(loop_type, env)
+            if not self.step.is_literal:
+                self.step = self.step.coerce_to_temp(env)
+
         if target_type.is_numeric or target_type.is_enum:
-            self.is_py_target = False 
+            self.is_py_target = False
             if isinstance(self.target, ExprNodes.BufferIndexNode):
                 raise error(self.pos, "Buffer or memoryview slicing/indexing not allowed as for-loop target.")
-            self.loopvar_node = self.target 
-            self.py_loopvar_node = None 
-        else: 
-            self.is_py_target = True 
-            c_loopvar_node = ExprNodes.TempNode(self.pos, loop_type, env) 
-            self.loopvar_node = c_loopvar_node 
+            self.loopvar_node = self.target
+            self.py_loopvar_node = None
+        else:
+            self.is_py_target = True
+            c_loopvar_node = ExprNodes.TempNode(self.pos, loop_type, env)
+            self.loopvar_node = c_loopvar_node
             self.py_loopvar_node = ExprNodes.CloneNode(c_loopvar_node).coerce_to_pyobject(env)
- 
-    def generate_execution_code(self, code): 
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        old_loop_labels = code.new_loop_labels() 
-        from_range = self.from_range 
-        self.bound1.generate_evaluation_code(code) 
-        self.bound2.generate_evaluation_code(code) 
-        offset, incop = self.relation_table[self.relation1] 
-        if self.step is not None: 
-            self.step.generate_evaluation_code(code) 
-            step = self.step.result() 
+        old_loop_labels = code.new_loop_labels()
+        from_range = self.from_range
+        self.bound1.generate_evaluation_code(code)
+        self.bound2.generate_evaluation_code(code)
+        offset, incop = self.relation_table[self.relation1]
+        if self.step is not None:
+            self.step.generate_evaluation_code(code)
+            step = self.step.result()
             incop = "%s=%s" % (incop[0], step)  # e.g. '++' => '+= STEP'
         else:
             step = '1'
 
-        from . import ExprNodes 
-        if isinstance(self.loopvar_node, ExprNodes.TempNode): 
-            self.loopvar_node.allocate(code) 
-        if isinstance(self.py_loopvar_node, ExprNodes.TempNode): 
-            self.py_loopvar_node.allocate(code) 
+        from . import ExprNodes
+        if isinstance(self.loopvar_node, ExprNodes.TempNode):
+            self.loopvar_node.allocate(code)
+        if isinstance(self.py_loopvar_node, ExprNodes.TempNode):
+            self.py_loopvar_node.allocate(code)
 
         loopvar_type = PyrexTypes.c_long_type if self.target.type.is_enum else self.target.type
 
         if from_range and not self.is_py_target:
             loopvar_name = code.funcstate.allocate_temp(loopvar_type, False)
-        else: 
-            loopvar_name = self.loopvar_node.result() 
+        else:
+            loopvar_name = self.loopvar_node.result()
         if loopvar_type.is_int and not loopvar_type.signed and self.relation2[0] == '>':
             # Handle the case where the endpoint of an unsigned int iteration
             # is within step of 0.
@@ -6937,454 +6937,454 @@ class ForFromStatNode(LoopNode, StatNode):
             coerced_loopvar_node.generate_evaluation_code(code)
             self.target.generate_assignment_code(coerced_loopvar_node, code)
 
-        self.body.generate_execution_code(code) 
-        code.put_label(code.continue_label) 
+        self.body.generate_execution_code(code)
+        code.put_label(code.continue_label)
 
         if not from_range and self.py_loopvar_node:
-            # This mess is to make for..from loops with python targets behave 
-            # exactly like those with C targets with regards to re-assignment 
-            # of the loop variable. 
-            if self.target.entry.is_pyglobal: 
-                # We know target is a NameNode, this is the only ugly case. 
-                target_node = ExprNodes.PyTempNode(self.target.pos, None) 
-                target_node.allocate(code) 
-                interned_cname = code.intern_identifier(self.target.entry.name) 
-                if self.target.entry.scope.is_module_scope: 
-                    code.globalstate.use_utility_code( 
-                        UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c")) 
+            # This mess is to make for..from loops with python targets behave
+            # exactly like those with C targets with regards to re-assignment
+            # of the loop variable.
+            if self.target.entry.is_pyglobal:
+                # We know target is a NameNode, this is the only ugly case.
+                target_node = ExprNodes.PyTempNode(self.target.pos, None)
+                target_node.allocate(code)
+                interned_cname = code.intern_identifier(self.target.entry.name)
+                if self.target.entry.scope.is_module_scope:
+                    code.globalstate.use_utility_code(
+                        UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c"))
                     lookup_func = '__Pyx_GetModuleGlobalName(%s, %s); %s'
-                else: 
-                    code.globalstate.use_utility_code( 
-                        UtilityCode.load_cached("GetNameInClass", "ObjectHandling.c")) 
+                else:
+                    code.globalstate.use_utility_code(
+                        UtilityCode.load_cached("GetNameInClass", "ObjectHandling.c"))
                     lookup_func = '__Pyx_GetNameInClass(%s, {}, %s); %s'.format(
-                        self.target.entry.scope.namespace_cname) 
+                        self.target.entry.scope.namespace_cname)
                 code.putln(lookup_func % (
-                    target_node.result(), 
+                    target_node.result(),
                     interned_cname,
-                    code.error_goto_if_null(target_node.result(), self.target.pos))) 
-                code.put_gotref(target_node.result()) 
-            else: 
-                target_node = self.target 
-            from_py_node = ExprNodes.CoerceFromPyTypeNode( 
-                self.loopvar_node.type, target_node, self.target.entry.scope) 
-            from_py_node.temp_code = loopvar_name 
-            from_py_node.generate_result_code(code) 
-            if self.target.entry.is_pyglobal: 
-                code.put_decref(target_node.result(), target_node.type) 
-                target_node.release(code) 
-
-        code.putln("}") 
+                    code.error_goto_if_null(target_node.result(), self.target.pos)))
+                code.put_gotref(target_node.result())
+            else:
+                target_node = self.target
+            from_py_node = ExprNodes.CoerceFromPyTypeNode(
+                self.loopvar_node.type, target_node, self.target.entry.scope)
+            from_py_node.temp_code = loopvar_name
+            from_py_node.generate_result_code(code)
+            if self.target.entry.is_pyglobal:
+                code.put_decref(target_node.result(), target_node.type)
+                target_node.release(code)
+
+        code.putln("}")
 
         if not from_range and self.py_loopvar_node:
-            # This is potentially wasteful, but we don't want the semantics to 
-            # depend on whether or not the loop is a python type. 
-            self.py_loopvar_node.generate_evaluation_code(code) 
-            self.target.generate_assignment_code(self.py_loopvar_node, code) 
+            # This is potentially wasteful, but we don't want the semantics to
+            # depend on whether or not the loop is a python type.
+            self.py_loopvar_node.generate_evaluation_code(code)
+            self.target.generate_assignment_code(self.py_loopvar_node, code)
         if from_range and not self.is_py_target:
-            code.funcstate.release_temp(loopvar_name) 
-
-        break_label = code.break_label 
-        code.set_loop_labels(old_loop_labels) 
-        if self.else_clause: 
-            code.putln("/*else*/ {") 
-            self.else_clause.generate_execution_code(code) 
-            code.putln("}") 
-        code.put_label(break_label) 
-        self.bound1.generate_disposal_code(code) 
-        self.bound1.free_temps(code) 
-        self.bound2.generate_disposal_code(code) 
-        self.bound2.free_temps(code) 
-        if isinstance(self.loopvar_node, ExprNodes.TempNode): 
-            self.loopvar_node.release(code) 
-        if isinstance(self.py_loopvar_node, ExprNodes.TempNode): 
-            self.py_loopvar_node.release(code) 
-        if self.step is not None: 
-            self.step.generate_disposal_code(code) 
-            self.step.free_temps(code) 
- 
-    relation_table = { 
-        # {relop : (initial offset, increment op)} 
-        '<=': ("",   "++"), 
-        '<' : ("+1", "++"), 
-        '>=': ("",   "--"), 
+            code.funcstate.release_temp(loopvar_name)
+
+        break_label = code.break_label
+        code.set_loop_labels(old_loop_labels)
+        if self.else_clause:
+            code.putln("/*else*/ {")
+            self.else_clause.generate_execution_code(code)
+            code.putln("}")
+        code.put_label(break_label)
+        self.bound1.generate_disposal_code(code)
+        self.bound1.free_temps(code)
+        self.bound2.generate_disposal_code(code)
+        self.bound2.free_temps(code)
+        if isinstance(self.loopvar_node, ExprNodes.TempNode):
+            self.loopvar_node.release(code)
+        if isinstance(self.py_loopvar_node, ExprNodes.TempNode):
+            self.py_loopvar_node.release(code)
+        if self.step is not None:
+            self.step.generate_disposal_code(code)
+            self.step.free_temps(code)
+
+    relation_table = {
+        # {relop : (initial offset, increment op)}
+        '<=': ("",   "++"),
+        '<' : ("+1", "++"),
+        '>=': ("",   "--"),
         '>' : ("-1", "--"),
-    } 
- 
-    def generate_function_definitions(self, env, code): 
-        self.target.generate_function_definitions(env, code) 
-        self.bound1.generate_function_definitions(env, code) 
-        self.bound2.generate_function_definitions(env, code) 
-        if self.step is not None: 
-            self.step.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
-        if self.else_clause is not None: 
-            self.else_clause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.target.annotate(code) 
-        self.bound1.annotate(code) 
-        self.bound2.annotate(code) 
-        if self.step: 
-            self.step.annotate(code) 
-        self.body.annotate(code) 
-        if self.else_clause: 
-            self.else_clause.annotate(code) 
- 
- 
-class WithStatNode(StatNode): 
-    """ 
-    Represents a Python with statement. 
- 
-    Implemented by the WithTransform as follows: 
- 
-        MGR = EXPR 
-        EXIT = MGR.__exit__ 
-        VALUE = MGR.__enter__() 
-        EXC = True 
-        try: 
-            try: 
-                TARGET = VALUE  # optional 
-                BODY 
-            except: 
-                EXC = False 
-                if not EXIT(*EXCINFO): 
-                    raise 
-        finally: 
-            if EXC: 
-                EXIT(None, None, None) 
-            MGR = EXIT = VALUE = None 
-    """ 
-    #  manager          The with statement manager object 
-    #  target           ExprNode  the target lhs of the __enter__() call 
-    #  body             StatNode 
-    #  enter_call       ExprNode  the call to the __enter__() method 
-    #  exit_var         String    the cname of the __exit__() method reference 
- 
-    child_attrs = ["manager", "enter_call", "target", "body"] 
- 
-    enter_call = None 
-    target_temp = None 
- 
-    def analyse_declarations(self, env): 
-        self.manager.analyse_declarations(env) 
-        self.enter_call.analyse_declarations(env) 
-        self.body.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        self.manager = self.manager.analyse_types(env) 
-        self.enter_call = self.enter_call.analyse_types(env) 
-        if self.target: 
-            # set up target_temp before descending into body (which uses it) 
-            from .ExprNodes import TempNode 
-            self.target_temp = TempNode(self.enter_call.pos, self.enter_call.type) 
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        self.manager.generate_function_definitions(env, code) 
-        self.enter_call.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
- 
-    def generate_execution_code(self, code): 
+    }
+
+    def generate_function_definitions(self, env, code):
+        self.target.generate_function_definitions(env, code)
+        self.bound1.generate_function_definitions(env, code)
+        self.bound2.generate_function_definitions(env, code)
+        if self.step is not None:
+            self.step.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+        if self.else_clause is not None:
+            self.else_clause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.target.annotate(code)
+        self.bound1.annotate(code)
+        self.bound2.annotate(code)
+        if self.step:
+            self.step.annotate(code)
+        self.body.annotate(code)
+        if self.else_clause:
+            self.else_clause.annotate(code)
+
+
+class WithStatNode(StatNode):
+    """
+    Represents a Python with statement.
+
+    Implemented by the WithTransform as follows:
+
+        MGR = EXPR
+        EXIT = MGR.__exit__
+        VALUE = MGR.__enter__()
+        EXC = True
+        try:
+            try:
+                TARGET = VALUE  # optional
+                BODY
+            except:
+                EXC = False
+                if not EXIT(*EXCINFO):
+                    raise
+        finally:
+            if EXC:
+                EXIT(None, None, None)
+            MGR = EXIT = VALUE = None
+    """
+    #  manager          The with statement manager object
+    #  target           ExprNode  the target lhs of the __enter__() call
+    #  body             StatNode
+    #  enter_call       ExprNode  the call to the __enter__() method
+    #  exit_var         String    the cname of the __exit__() method reference
+
+    child_attrs = ["manager", "enter_call", "target", "body"]
+
+    enter_call = None
+    target_temp = None
+
+    def analyse_declarations(self, env):
+        self.manager.analyse_declarations(env)
+        self.enter_call.analyse_declarations(env)
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.manager = self.manager.analyse_types(env)
+        self.enter_call = self.enter_call.analyse_types(env)
+        if self.target:
+            # set up target_temp before descending into body (which uses it)
+            from .ExprNodes import TempNode
+            self.target_temp = TempNode(self.enter_call.pos, self.enter_call.type)
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.manager.generate_function_definitions(env, code)
+        self.enter_call.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        code.putln("/*with:*/ {") 
-        self.manager.generate_evaluation_code(code) 
-        self.exit_var = code.funcstate.allocate_temp(py_object_type, manage_ref=False) 
-        code.globalstate.use_utility_code( 
-            UtilityCode.load_cached("PyObjectLookupSpecial", "ObjectHandling.c")) 
-        code.putln("%s = __Pyx_PyObject_LookupSpecial(%s, %s); %s" % ( 
-            self.exit_var, 
-            self.manager.py_result(), 
+        code.putln("/*with:*/ {")
+        self.manager.generate_evaluation_code(code)
+        self.exit_var = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("PyObjectLookupSpecial", "ObjectHandling.c"))
+        code.putln("%s = __Pyx_PyObject_LookupSpecial(%s, %s); %s" % (
+            self.exit_var,
+            self.manager.py_result(),
             code.intern_identifier(EncodedString('__aexit__' if self.is_async else '__exit__')),
-            code.error_goto_if_null(self.exit_var, self.pos), 
-            )) 
-        code.put_gotref(self.exit_var) 
- 
-        # need to free exit_var in the face of exceptions during setup 
-        old_error_label = code.new_error_label() 
-        intermediate_error_label = code.error_label 
- 
-        self.enter_call.generate_evaluation_code(code) 
-        if self.target: 
-            # The temp result will be cleaned up by the WithTargetAssignmentStatNode 
-            # after assigning its result to the target of the 'with' statement. 
-            self.target_temp.allocate(code) 
-            self.enter_call.make_owned_reference(code) 
-            code.putln("%s = %s;" % (self.target_temp.result(), self.enter_call.result())) 
-            self.enter_call.generate_post_assignment_code(code) 
-        else: 
-            self.enter_call.generate_disposal_code(code) 
-        self.enter_call.free_temps(code) 
- 
-        self.manager.generate_disposal_code(code) 
-        self.manager.free_temps(code) 
- 
-        code.error_label = old_error_label 
-        self.body.generate_execution_code(code) 
- 
-        if code.label_used(intermediate_error_label): 
-            step_over_label = code.new_label() 
-            code.put_goto(step_over_label) 
-            code.put_label(intermediate_error_label) 
-            code.put_decref_clear(self.exit_var, py_object_type) 
-            code.put_goto(old_error_label) 
-            code.put_label(step_over_label) 
- 
-        code.funcstate.release_temp(self.exit_var) 
-        code.putln('}') 
- 
- 
-class WithTargetAssignmentStatNode(AssignmentNode): 
-    # The target assignment of the 'with' statement value (return 
-    # value of the __enter__() call). 
-    # 
-    # This is a special cased assignment that properly cleans up the RHS. 
-    # 
-    # lhs       ExprNode      the assignment target 
-    # rhs       ExprNode      a (coerced) TempNode for the rhs (from WithStatNode) 
-    # with_node WithStatNode  the surrounding with-statement 
- 
-    child_attrs = ["rhs", "lhs"] 
-    with_node = None 
-    rhs = None 
- 
-    def analyse_declarations(self, env): 
-        self.lhs.analyse_target_declaration(env) 
- 
-    def analyse_expressions(self, env): 
-        self.lhs = self.lhs.analyse_target_types(env) 
-        self.lhs.gil_assignment_check(env) 
-        self.rhs = self.with_node.target_temp.coerce_to(self.lhs.type, env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        self.rhs.generate_evaluation_code(code) 
-        self.lhs.generate_assignment_code(self.rhs, code) 
-        self.with_node.target_temp.release(code) 
- 
-    def annotate(self, code): 
-        self.lhs.annotate(code) 
-        self.rhs.annotate(code) 
- 
- 
-class TryExceptStatNode(StatNode): 
-    #  try .. except statement 
-    # 
-    #  body             StatNode 
-    #  except_clauses   [ExceptClauseNode] 
-    #  else_clause      StatNode or None 
- 
-    child_attrs = ["body", "except_clauses", "else_clause"] 
+            code.error_goto_if_null(self.exit_var, self.pos),
+            ))
+        code.put_gotref(self.exit_var)
+
+        # need to free exit_var in the face of exceptions during setup
+        old_error_label = code.new_error_label()
+        intermediate_error_label = code.error_label
+
+        self.enter_call.generate_evaluation_code(code)
+        if self.target:
+            # The temp result will be cleaned up by the WithTargetAssignmentStatNode
+            # after assigning its result to the target of the 'with' statement.
+            self.target_temp.allocate(code)
+            self.enter_call.make_owned_reference(code)
+            code.putln("%s = %s;" % (self.target_temp.result(), self.enter_call.result()))
+            self.enter_call.generate_post_assignment_code(code)
+        else:
+            self.enter_call.generate_disposal_code(code)
+        self.enter_call.free_temps(code)
+
+        self.manager.generate_disposal_code(code)
+        self.manager.free_temps(code)
+
+        code.error_label = old_error_label
+        self.body.generate_execution_code(code)
+
+        if code.label_used(intermediate_error_label):
+            step_over_label = code.new_label()
+            code.put_goto(step_over_label)
+            code.put_label(intermediate_error_label)
+            code.put_decref_clear(self.exit_var, py_object_type)
+            code.put_goto(old_error_label)
+            code.put_label(step_over_label)
+
+        code.funcstate.release_temp(self.exit_var)
+        code.putln('}')
+
+
+class WithTargetAssignmentStatNode(AssignmentNode):
+    # The target assignment of the 'with' statement value (return
+    # value of the __enter__() call).
+    #
+    # This is a special cased assignment that properly cleans up the RHS.
+    #
+    # lhs       ExprNode      the assignment target
+    # rhs       ExprNode      a (coerced) TempNode for the rhs (from WithStatNode)
+    # with_node WithStatNode  the surrounding with-statement
+
+    child_attrs = ["rhs", "lhs"]
+    with_node = None
+    rhs = None
+
+    def analyse_declarations(self, env):
+        self.lhs.analyse_target_declaration(env)
+
+    def analyse_expressions(self, env):
+        self.lhs = self.lhs.analyse_target_types(env)
+        self.lhs.gil_assignment_check(env)
+        self.rhs = self.with_node.target_temp.coerce_to(self.lhs.type, env)
+        return self
+
+    def generate_execution_code(self, code):
+        self.rhs.generate_evaluation_code(code)
+        self.lhs.generate_assignment_code(self.rhs, code)
+        self.with_node.target_temp.release(code)
+
+    def annotate(self, code):
+        self.lhs.annotate(code)
+        self.rhs.annotate(code)
+
+
+class TryExceptStatNode(StatNode):
+    #  try .. except statement
+    #
+    #  body             StatNode
+    #  except_clauses   [ExceptClauseNode]
+    #  else_clause      StatNode or None
+
+    child_attrs = ["body", "except_clauses", "else_clause"]
     in_generator = False
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
-        for except_clause in self.except_clauses: 
-            except_clause.analyse_declarations(env) 
-        if self.else_clause: 
-            self.else_clause.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        self.body = self.body.analyse_expressions(env) 
-        default_clause_seen = 0 
-        for i, except_clause in enumerate(self.except_clauses): 
-            except_clause = self.except_clauses[i] = except_clause.analyse_expressions(env) 
-            if default_clause_seen: 
-                error(except_clause.pos, "default 'except:' must be last") 
-            if not except_clause.pattern: 
-                default_clause_seen = 1 
-        self.has_default_clause = default_clause_seen 
-        if self.else_clause: 
-            self.else_clause = self.else_clause.analyse_expressions(env) 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Try-except statement" 
- 
-    def generate_execution_code(self, code): 
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+        for except_clause in self.except_clauses:
+            except_clause.analyse_declarations(env)
+        if self.else_clause:
+            self.else_clause.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(env)
+        default_clause_seen = 0
+        for i, except_clause in enumerate(self.except_clauses):
+            except_clause = self.except_clauses[i] = except_clause.analyse_expressions(env)
+            if default_clause_seen:
+                error(except_clause.pos, "default 'except:' must be last")
+            if not except_clause.pattern:
+                default_clause_seen = 1
+        self.has_default_clause = default_clause_seen
+        if self.else_clause:
+            self.else_clause = self.else_clause.analyse_expressions(env)
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Try-except statement"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)  # before changing the error label, in case of tracing errors
         code.putln("{")
 
-        old_return_label = code.return_label 
-        old_break_label = code.break_label 
-        old_continue_label = code.continue_label 
-        old_error_label = code.new_error_label() 
-        our_error_label = code.error_label 
-        except_end_label = code.new_label('exception_handled') 
-        except_error_label = code.new_label('except_error') 
-        except_return_label = code.new_label('except_return') 
-        try_return_label = code.new_label('try_return') 
+        old_return_label = code.return_label
+        old_break_label = code.break_label
+        old_continue_label = code.continue_label
+        old_error_label = code.new_error_label()
+        our_error_label = code.error_label
+        except_end_label = code.new_label('exception_handled')
+        except_error_label = code.new_label('except_error')
+        except_return_label = code.new_label('except_return')
+        try_return_label = code.new_label('try_return')
         try_break_label = code.new_label('try_break') if old_break_label else None
         try_continue_label = code.new_label('try_continue') if old_continue_label else None
-        try_end_label = code.new_label('try_end') 
- 
-        exc_save_vars = [code.funcstate.allocate_temp(py_object_type, False) 
+        try_end_label = code.new_label('try_end')
+
+        exc_save_vars = [code.funcstate.allocate_temp(py_object_type, False)
                          for _ in range(3)]
-        save_exc = code.insertion_point() 
-        code.putln( 
-            "/*try:*/ {") 
-        code.return_label = try_return_label 
-        code.break_label = try_break_label 
-        code.continue_label = try_continue_label 
-        self.body.generate_execution_code(code) 
+        save_exc = code.insertion_point()
+        code.putln(
+            "/*try:*/ {")
+        code.return_label = try_return_label
+        code.break_label = try_break_label
+        code.continue_label = try_continue_label
+        self.body.generate_execution_code(code)
         code.mark_pos(self.pos, trace=False)
-        code.putln( 
-            "}") 
-        temps_to_clean_up = code.funcstate.all_free_managed_temps() 
-        can_raise = code.label_used(our_error_label) 
- 
-        if can_raise: 
-            # inject code before the try block to save away the exception state 
-            code.globalstate.use_utility_code(reset_exception_utility_code) 
+        code.putln(
+            "}")
+        temps_to_clean_up = code.funcstate.all_free_managed_temps()
+        can_raise = code.label_used(our_error_label)
+
+        if can_raise:
+            # inject code before the try block to save away the exception state
+            code.globalstate.use_utility_code(reset_exception_utility_code)
             if not self.in_generator:
                 save_exc.putln("__Pyx_PyThreadState_declare")
                 save_exc.putln("__Pyx_PyThreadState_assign")
             save_exc.putln("__Pyx_ExceptionSave(%s);" % (
                 ', '.join(['&%s' % var for var in exc_save_vars])))
-            for var in exc_save_vars: 
-                save_exc.put_xgotref(var) 
- 
-            def restore_saved_exception(): 
-                for name in exc_save_vars: 
-                    code.put_xgiveref(name) 
-                code.putln("__Pyx_ExceptionReset(%s);" % 
-                           ', '.join(exc_save_vars)) 
-        else: 
-            # try block cannot raise exceptions, but we had to allocate the temps above, 
-            # so just keep the C compiler from complaining about them being unused 
+            for var in exc_save_vars:
+                save_exc.put_xgotref(var)
+
+            def restore_saved_exception():
+                for name in exc_save_vars:
+                    code.put_xgiveref(name)
+                code.putln("__Pyx_ExceptionReset(%s);" %
+                           ', '.join(exc_save_vars))
+        else:
+            # try block cannot raise exceptions, but we had to allocate the temps above,
+            # so just keep the C compiler from complaining about them being unused
             mark_vars_used =  ["(void)%s;" % var for var in exc_save_vars]
             save_exc.putln("%s /* mark used */" % ' '.join(mark_vars_used))
- 
-            def restore_saved_exception(): 
-                pass 
- 
-        code.error_label = except_error_label 
-        code.return_label = except_return_label 
-        normal_case_terminates = self.body.is_terminator 
-        if self.else_clause: 
+
+            def restore_saved_exception():
+                pass
+
+        code.error_label = except_error_label
+        code.return_label = except_return_label
+        normal_case_terminates = self.body.is_terminator
+        if self.else_clause:
             code.mark_pos(self.else_clause.pos)
-            code.putln( 
-                "/*else:*/ {") 
-            self.else_clause.generate_execution_code(code) 
-            code.putln( 
-                "}") 
-            if not normal_case_terminates: 
-                normal_case_terminates = self.else_clause.is_terminator 
- 
-        if can_raise: 
-            if not normal_case_terminates: 
-                for var in exc_save_vars: 
-                    code.put_xdecref_clear(var, py_object_type) 
-                code.put_goto(try_end_label) 
-            code.put_label(our_error_label) 
-            for temp_name, temp_type in temps_to_clean_up: 
-                code.put_xdecref_clear(temp_name, temp_type) 
+            code.putln(
+                "/*else:*/ {")
+            self.else_clause.generate_execution_code(code)
+            code.putln(
+                "}")
+            if not normal_case_terminates:
+                normal_case_terminates = self.else_clause.is_terminator
+
+        if can_raise:
+            if not normal_case_terminates:
+                for var in exc_save_vars:
+                    code.put_xdecref_clear(var, py_object_type)
+                code.put_goto(try_end_label)
+            code.put_label(our_error_label)
+            for temp_name, temp_type in temps_to_clean_up:
+                code.put_xdecref_clear(temp_name, temp_type)
 
             outer_except = code.funcstate.current_except
             # Currently points to self, but the ExceptClauseNode would also be ok. Change if needed.
             code.funcstate.current_except = self
-            for except_clause in self.except_clauses: 
-                except_clause.generate_handling_code(code, except_end_label) 
+            for except_clause in self.except_clauses:
+                except_clause.generate_handling_code(code, except_end_label)
             code.funcstate.current_except = outer_except
 
-            if not self.has_default_clause: 
-                code.put_goto(except_error_label) 
- 
-        for exit_label, old_label in [(except_error_label, old_error_label), 
-                                      (try_break_label, old_break_label), 
-                                      (try_continue_label, old_continue_label), 
-                                      (try_return_label, old_return_label), 
-                                      (except_return_label, old_return_label)]: 
-            if code.label_used(exit_label): 
-                if not normal_case_terminates and not code.label_used(try_end_label): 
-                    code.put_goto(try_end_label) 
-                code.put_label(exit_label) 
+            if not self.has_default_clause:
+                code.put_goto(except_error_label)
+
+        for exit_label, old_label in [(except_error_label, old_error_label),
+                                      (try_break_label, old_break_label),
+                                      (try_continue_label, old_continue_label),
+                                      (try_return_label, old_return_label),
+                                      (except_return_label, old_return_label)]:
+            if code.label_used(exit_label):
+                if not normal_case_terminates and not code.label_used(try_end_label):
+                    code.put_goto(try_end_label)
+                code.put_label(exit_label)
                 code.mark_pos(self.pos, trace=False)
                 if can_raise:
                     restore_saved_exception()
-                code.put_goto(old_label) 
- 
-        if code.label_used(except_end_label): 
-            if not normal_case_terminates and not code.label_used(try_end_label): 
-                code.put_goto(try_end_label) 
-            code.put_label(except_end_label) 
+                code.put_goto(old_label)
+
+        if code.label_used(except_end_label):
+            if not normal_case_terminates and not code.label_used(try_end_label):
+                code.put_goto(try_end_label)
+            code.put_label(except_end_label)
             if can_raise:
                 restore_saved_exception()
-        if code.label_used(try_end_label): 
-            code.put_label(try_end_label) 
-        code.putln("}") 
- 
-        for cname in exc_save_vars: 
-            code.funcstate.release_temp(cname) 
- 
-        code.return_label = old_return_label 
-        code.break_label = old_break_label 
-        code.continue_label = old_continue_label 
-        code.error_label = old_error_label 
- 
-    def generate_function_definitions(self, env, code): 
-        self.body.generate_function_definitions(env, code) 
-        for except_clause in self.except_clauses: 
-            except_clause.generate_function_definitions(env, code) 
-        if self.else_clause is not None: 
-            self.else_clause.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.body.annotate(code) 
-        for except_node in self.except_clauses: 
-            except_node.annotate(code) 
-        if self.else_clause: 
-            self.else_clause.annotate(code) 
- 
- 
-class ExceptClauseNode(Node): 
-    #  Part of try ... except statement. 
-    # 
-    #  pattern        [ExprNode] 
-    #  target         ExprNode or None 
-    #  body           StatNode 
-    #  excinfo_target TupleNode(3*ResultRefNode) or None   optional target for exception info (not owned here!) 
-    #  match_flag     string             result of exception match 
-    #  exc_value      ExcValueNode       used internally 
-    #  function_name  string             qualified name of enclosing function 
-    #  exc_vars       (string * 3)       local exception variables 
-    #  is_except_as   bool               Py3-style "except ... as xyz" 
- 
-    # excinfo_target is never set by the parser, but can be set by a transform 
-    # in order to extract more extensive information about the exception as a 
-    # sys.exc_info()-style tuple into a target variable 
- 
-    child_attrs = ["pattern", "target", "body", "exc_value"] 
- 
-    exc_value = None 
-    excinfo_target = None 
-    is_except_as = False 
- 
-    def analyse_declarations(self, env): 
-        if self.target: 
-            self.target.analyse_target_declaration(env) 
-        self.body.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        self.function_name = env.qualified_name 
-        if self.pattern: 
-            # normalise/unpack self.pattern into a list 
-            for i, pattern in enumerate(self.pattern): 
-                pattern = pattern.analyse_expressions(env) 
-                self.pattern[i] = pattern.coerce_to_pyobject(env) 
- 
-        if self.target: 
-            from . import ExprNodes 
-            self.exc_value = ExprNodes.ExcValueNode(self.pos) 
-            self.target = self.target.analyse_target_expression(env, self.exc_value) 
- 
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_handling_code(self, code, end_label): 
-        code.mark_pos(self.pos) 
-
-        if self.pattern: 
+        if code.label_used(try_end_label):
+            code.put_label(try_end_label)
+        code.putln("}")
+
+        for cname in exc_save_vars:
+            code.funcstate.release_temp(cname)
+
+        code.return_label = old_return_label
+        code.break_label = old_break_label
+        code.continue_label = old_continue_label
+        code.error_label = old_error_label
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+        for except_clause in self.except_clauses:
+            except_clause.generate_function_definitions(env, code)
+        if self.else_clause is not None:
+            self.else_clause.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.body.annotate(code)
+        for except_node in self.except_clauses:
+            except_node.annotate(code)
+        if self.else_clause:
+            self.else_clause.annotate(code)
+
+
+class ExceptClauseNode(Node):
+    #  Part of try ... except statement.
+    #
+    #  pattern        [ExprNode]
+    #  target         ExprNode or None
+    #  body           StatNode
+    #  excinfo_target TupleNode(3*ResultRefNode) or None   optional target for exception info (not owned here!)
+    #  match_flag     string             result of exception match
+    #  exc_value      ExcValueNode       used internally
+    #  function_name  string             qualified name of enclosing function
+    #  exc_vars       (string * 3)       local exception variables
+    #  is_except_as   bool               Py3-style "except ... as xyz"
+
+    # excinfo_target is never set by the parser, but can be set by a transform
+    # in order to extract more extensive information about the exception as a
+    # sys.exc_info()-style tuple into a target variable
+
+    child_attrs = ["pattern", "target", "body", "exc_value"]
+
+    exc_value = None
+    excinfo_target = None
+    is_except_as = False
+
+    def analyse_declarations(self, env):
+        if self.target:
+            self.target.analyse_target_declaration(env)
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.function_name = env.qualified_name
+        if self.pattern:
+            # normalise/unpack self.pattern into a list
+            for i, pattern in enumerate(self.pattern):
+                pattern = pattern.analyse_expressions(env)
+                self.pattern[i] = pattern.coerce_to_pyobject(env)
+
+        if self.target:
+            from . import ExprNodes
+            self.exc_value = ExprNodes.ExcValueNode(self.pos)
+            self.target = self.target.analyse_target_expression(env, self.exc_value)
+
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_handling_code(self, code, end_label):
+        code.mark_pos(self.pos)
+
+        if self.pattern:
             has_non_literals = not all(
                 pattern.is_literal or pattern.is_simple() and not pattern.is_temp
                 for pattern in self.pattern)
@@ -7402,16 +7402,16 @@ class ExceptClauseNode(Node):
                 code.globalstate.use_utility_code(UtilityCode.load_cached("PyErrExceptionMatches", "Exceptions.c"))
                 exc_test_func = "__Pyx_PyErr_ExceptionMatches(%s)"
 
-            exc_tests = [] 
-            for pattern in self.pattern: 
-                pattern.generate_evaluation_code(code) 
+            exc_tests = []
+            for pattern in self.pattern:
+                pattern.generate_evaluation_code(code)
                 exc_tests.append(exc_test_func % pattern.py_result())
- 
+
             match_flag = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
             code.putln("%s = %s;" % (match_flag, ' || '.join(exc_tests)))
-            for pattern in self.pattern: 
-                pattern.generate_disposal_code(code) 
-                pattern.free_temps(code) 
+            for pattern in self.pattern:
+                pattern.generate_disposal_code(code)
+                pattern.free_temps(code)
 
             if has_non_literals:
                 code.putln("__Pyx_ErrRestore(%s, %s, %s);" % tuple(exc_vars))
@@ -7419,247 +7419,247 @@ class ExceptClauseNode(Node):
                 for temp in exc_vars:
                     code.funcstate.release_temp(temp)
 
-            code.putln( 
-                "if (%s) {" % 
-                    match_flag) 
-            code.funcstate.release_temp(match_flag) 
-        else: 
-            code.putln("/*except:*/ {") 
- 
-        if (not getattr(self.body, 'stats', True) 
-                and self.excinfo_target is None 
-                and self.target is None): 
-            # most simple case: no exception variable, empty body (pass) 
-            # => reset the exception state, done 
+            code.putln(
+                "if (%s) {" %
+                    match_flag)
+            code.funcstate.release_temp(match_flag)
+        else:
+            code.putln("/*except:*/ {")
+
+        if (not getattr(self.body, 'stats', True)
+                and self.excinfo_target is None
+                and self.target is None):
+            # most simple case: no exception variable, empty body (pass)
+            # => reset the exception state, done
             code.globalstate.use_utility_code(UtilityCode.load_cached("PyErrFetchRestore", "Exceptions.c"))
             code.putln("__Pyx_ErrRestore(0,0,0);")
-            code.put_goto(end_label) 
-            code.putln("}") 
-            return 
- 
+            code.put_goto(end_label)
+            code.putln("}")
+            return
+
         exc_vars = [code.funcstate.allocate_temp(py_object_type, manage_ref=True)
                     for _ in range(3)]
-        code.put_add_traceback(self.function_name) 
-        # We always have to fetch the exception value even if 
-        # there is no target, because this also normalises the 
-        # exception and stores it in the thread state. 
-        code.globalstate.use_utility_code(get_exception_utility_code) 
-        exc_args = "&%s, &%s, &%s" % tuple(exc_vars) 
+        code.put_add_traceback(self.function_name)
+        # We always have to fetch the exception value even if
+        # there is no target, because this also normalises the
+        # exception and stores it in the thread state.
+        code.globalstate.use_utility_code(get_exception_utility_code)
+        exc_args = "&%s, &%s, &%s" % tuple(exc_vars)
         code.putln("if (__Pyx_GetException(%s) < 0) %s" % (
             exc_args, code.error_goto(self.pos)))
         for var in exc_vars:
             code.put_gotref(var)
-        if self.target: 
-            self.exc_value.set_var(exc_vars[1]) 
-            self.exc_value.generate_evaluation_code(code) 
-            self.target.generate_assignment_code(self.exc_value, code) 
-        if self.excinfo_target is not None: 
-            for tempvar, node in zip(exc_vars, self.excinfo_target.args): 
-                node.set_var(tempvar) 
- 
-        old_break_label, old_continue_label = code.break_label, code.continue_label 
-        code.break_label = code.new_label('except_break') 
-        code.continue_label = code.new_label('except_continue') 
- 
-        old_exc_vars = code.funcstate.exc_vars 
-        code.funcstate.exc_vars = exc_vars 
-        self.body.generate_execution_code(code) 
-        code.funcstate.exc_vars = old_exc_vars 
-
-        if not self.body.is_terminator: 
-            for var in exc_vars: 
+        if self.target:
+            self.exc_value.set_var(exc_vars[1])
+            self.exc_value.generate_evaluation_code(code)
+            self.target.generate_assignment_code(self.exc_value, code)
+        if self.excinfo_target is not None:
+            for tempvar, node in zip(exc_vars, self.excinfo_target.args):
+                node.set_var(tempvar)
+
+        old_break_label, old_continue_label = code.break_label, code.continue_label
+        code.break_label = code.new_label('except_break')
+        code.continue_label = code.new_label('except_continue')
+
+        old_exc_vars = code.funcstate.exc_vars
+        code.funcstate.exc_vars = exc_vars
+        self.body.generate_execution_code(code)
+        code.funcstate.exc_vars = old_exc_vars
+
+        if not self.body.is_terminator:
+            for var in exc_vars:
                 # FIXME: XDECREF() is needed to allow re-raising (which clears the exc_vars),
                 # but I don't think it's the right solution.
                 code.put_xdecref_clear(var, py_object_type)
-            code.put_goto(end_label) 
- 
-        for new_label, old_label in [(code.break_label, old_break_label), 
-                                     (code.continue_label, old_continue_label)]: 
-            if code.label_used(new_label): 
-                code.put_label(new_label) 
-                for var in exc_vars: 
-                    code.put_decref_clear(var, py_object_type) 
-                code.put_goto(old_label) 
-        code.break_label = old_break_label 
-        code.continue_label = old_continue_label 
- 
-        for temp in exc_vars: 
-            code.funcstate.release_temp(temp) 
- 
-        code.putln( 
-            "}") 
- 
-    def generate_function_definitions(self, env, code): 
-        if self.target is not None: 
-            self.target.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        if self.pattern: 
-            for pattern in self.pattern: 
-                pattern.annotate(code) 
-        if self.target: 
-            self.target.annotate(code) 
-        self.body.annotate(code) 
- 
- 
-class TryFinallyStatNode(StatNode): 
-    #  try ... finally statement 
-    # 
-    #  body             StatNode 
-    #  finally_clause   StatNode 
+            code.put_goto(end_label)
+
+        for new_label, old_label in [(code.break_label, old_break_label),
+                                     (code.continue_label, old_continue_label)]:
+            if code.label_used(new_label):
+                code.put_label(new_label)
+                for var in exc_vars:
+                    code.put_decref_clear(var, py_object_type)
+                code.put_goto(old_label)
+        code.break_label = old_break_label
+        code.continue_label = old_continue_label
+
+        for temp in exc_vars:
+            code.funcstate.release_temp(temp)
+
+        code.putln(
+            "}")
+
+    def generate_function_definitions(self, env, code):
+        if self.target is not None:
+            self.target.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        if self.pattern:
+            for pattern in self.pattern:
+                pattern.annotate(code)
+        if self.target:
+            self.target.annotate(code)
+        self.body.annotate(code)
+
+
+class TryFinallyStatNode(StatNode):
+    #  try ... finally statement
+    #
+    #  body             StatNode
+    #  finally_clause   StatNode
     #  finally_except_clause  deep-copy of finally_clause for exception case
     #  in_generator     inside of generator => must store away current exception also in return case
-    # 
+    #
     #  Each of the continue, break, return and error gotos runs
     #  into its own deep-copy of the finally block code.
-    #  In addition, if we're doing an error, we save the 
-    #  exception on entry to the finally block and restore 
-    #  it on exit. 
- 
+    #  In addition, if we're doing an error, we save the
+    #  exception on entry to the finally block and restore
+    #  it on exit.
+
     child_attrs = ["body", "finally_clause", "finally_except_clause"]
- 
-    preserve_exception = 1 
- 
-    # handle exception case, in addition to return/break/continue 
-    handle_error_case = True 
-    func_return_type = None 
+
+    preserve_exception = 1
+
+    # handle exception case, in addition to return/break/continue
+    handle_error_case = True
+    func_return_type = None
     finally_except_clause = None
- 
-    is_try_finally_in_nogil = False 
+
+    is_try_finally_in_nogil = False
     in_generator = False
- 
+
     @staticmethod
-    def create_analysed(pos, env, body, finally_clause): 
-        node = TryFinallyStatNode(pos, body=body, finally_clause=finally_clause) 
-        return node 
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
+    def create_analysed(pos, env, body, finally_clause):
+        node = TryFinallyStatNode(pos, body=body, finally_clause=finally_clause)
+        return node
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
         self.finally_except_clause = copy.deepcopy(self.finally_clause)
         self.finally_except_clause.analyse_declarations(env)
-        self.finally_clause.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        self.body = self.body.analyse_expressions(env) 
-        self.finally_clause = self.finally_clause.analyse_expressions(env) 
+        self.finally_clause.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(env)
+        self.finally_clause = self.finally_clause.analyse_expressions(env)
         self.finally_except_clause = self.finally_except_clause.analyse_expressions(env)
-        if env.return_type and not env.return_type.is_void: 
-            self.func_return_type = env.return_type 
-        return self 
- 
-    nogil_check = Node.gil_error 
-    gil_message = "Try-finally statement" 
- 
-    def generate_execution_code(self, code): 
+        if env.return_type and not env.return_type.is_void:
+            self.func_return_type = env.return_type
+        return self
+
+    nogil_check = Node.gil_error
+    gil_message = "Try-finally statement"
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)  # before changing the error label, in case of tracing errors
         code.putln("/*try:*/ {")
 
-        old_error_label = code.error_label 
-        old_labels = code.all_new_labels() 
-        new_labels = code.get_all_labels() 
-        new_error_label = code.error_label 
-        if not self.handle_error_case: 
-            code.error_label = old_error_label 
-        catch_label = code.new_label() 
- 
+        old_error_label = code.error_label
+        old_labels = code.all_new_labels()
+        new_labels = code.get_all_labels()
+        new_error_label = code.error_label
+        if not self.handle_error_case:
+            code.error_label = old_error_label
+        catch_label = code.new_label()
+
         was_in_try_finally = code.funcstate.in_try_finally
         code.funcstate.in_try_finally = 1
- 
-        self.body.generate_execution_code(code) 
- 
+
+        self.body.generate_execution_code(code)
+
         code.funcstate.in_try_finally = was_in_try_finally
-        code.putln("}") 
- 
-        temps_to_clean_up = code.funcstate.all_free_managed_temps() 
-        code.mark_pos(self.finally_clause.pos) 
-        code.putln("/*finally:*/ {") 
- 
+        code.putln("}")
+
+        temps_to_clean_up = code.funcstate.all_free_managed_temps()
+        code.mark_pos(self.finally_clause.pos)
+        code.putln("/*finally:*/ {")
+
         # Reset labels only after writing out a potential line trace call for correct nogil error handling.
         code.set_all_labels(old_labels)
 
-        def fresh_finally_clause(_next=[self.finally_clause]): 
-            # generate the original subtree once and always keep a fresh copy 
-            node = _next[0] 
-            node_copy = copy.deepcopy(node) 
-            if node is self.finally_clause: 
-                _next[0] = node_copy 
-            else: 
-                node = node_copy 
-            return node 
- 
-        preserve_error = self.preserve_exception and code.label_used(new_error_label) 
-        needs_success_cleanup = not self.finally_clause.is_terminator 
- 
-        if not self.body.is_terminator: 
-            code.putln('/*normal exit:*/{') 
-            fresh_finally_clause().generate_execution_code(code) 
-            if not self.finally_clause.is_terminator: 
-                code.put_goto(catch_label) 
-            code.putln('}') 
- 
-        if preserve_error: 
+        def fresh_finally_clause(_next=[self.finally_clause]):
+            # generate the original subtree once and always keep a fresh copy
+            node = _next[0]
+            node_copy = copy.deepcopy(node)
+            if node is self.finally_clause:
+                _next[0] = node_copy
+            else:
+                node = node_copy
+            return node
+
+        preserve_error = self.preserve_exception and code.label_used(new_error_label)
+        needs_success_cleanup = not self.finally_clause.is_terminator
+
+        if not self.body.is_terminator:
+            code.putln('/*normal exit:*/{')
+            fresh_finally_clause().generate_execution_code(code)
+            if not self.finally_clause.is_terminator:
+                code.put_goto(catch_label)
+            code.putln('}')
+
+        if preserve_error:
             code.put_label(new_error_label)
-            code.putln('/*exception exit:*/{') 
+            code.putln('/*exception exit:*/{')
             if not self.in_generator:
                 code.putln("__Pyx_PyThreadState_declare")
-            if self.is_try_finally_in_nogil: 
-                code.declare_gilstate() 
-            if needs_success_cleanup: 
-                exc_lineno_cnames = tuple([ 
-                    code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False) 
-                    for _ in range(2)]) 
-                exc_filename_cname = code.funcstate.allocate_temp( 
-                    PyrexTypes.CPtrType(PyrexTypes.c_const_type(PyrexTypes.c_char_type)), 
-                    manage_ref=False) 
-            else: 
-                exc_lineno_cnames = exc_filename_cname = None 
-            exc_vars = tuple([ 
-                code.funcstate.allocate_temp(py_object_type, manage_ref=False) 
-                for _ in range(6)]) 
-            self.put_error_catcher( 
-                code, temps_to_clean_up, exc_vars, exc_lineno_cnames, exc_filename_cname) 
-            finally_old_labels = code.all_new_labels() 
- 
-            code.putln('{') 
-            old_exc_vars = code.funcstate.exc_vars 
-            code.funcstate.exc_vars = exc_vars[:3] 
+            if self.is_try_finally_in_nogil:
+                code.declare_gilstate()
+            if needs_success_cleanup:
+                exc_lineno_cnames = tuple([
+                    code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
+                    for _ in range(2)])
+                exc_filename_cname = code.funcstate.allocate_temp(
+                    PyrexTypes.CPtrType(PyrexTypes.c_const_type(PyrexTypes.c_char_type)),
+                    manage_ref=False)
+            else:
+                exc_lineno_cnames = exc_filename_cname = None
+            exc_vars = tuple([
+                code.funcstate.allocate_temp(py_object_type, manage_ref=False)
+                for _ in range(6)])
+            self.put_error_catcher(
+                code, temps_to_clean_up, exc_vars, exc_lineno_cnames, exc_filename_cname)
+            finally_old_labels = code.all_new_labels()
+
+            code.putln('{')
+            old_exc_vars = code.funcstate.exc_vars
+            code.funcstate.exc_vars = exc_vars[:3]
             self.finally_except_clause.generate_execution_code(code)
-            code.funcstate.exc_vars = old_exc_vars 
-            code.putln('}') 
- 
-            if needs_success_cleanup: 
-                self.put_error_uncatcher(code, exc_vars, exc_lineno_cnames, exc_filename_cname) 
-                if exc_lineno_cnames: 
-                    for cname in exc_lineno_cnames: 
-                        code.funcstate.release_temp(cname) 
-                if exc_filename_cname: 
-                    code.funcstate.release_temp(exc_filename_cname) 
-                code.put_goto(old_error_label) 
- 
-            for new_label, old_label in zip(code.get_all_labels(), finally_old_labels): 
-                if not code.label_used(new_label): 
-                    continue 
-                code.put_label(new_label) 
-                self.put_error_cleaner(code, exc_vars) 
-                code.put_goto(old_label) 
- 
-            for cname in exc_vars: 
-                code.funcstate.release_temp(cname) 
-            code.putln('}') 
- 
-        code.set_all_labels(old_labels) 
-        return_label = code.return_label 
+            code.funcstate.exc_vars = old_exc_vars
+            code.putln('}')
+
+            if needs_success_cleanup:
+                self.put_error_uncatcher(code, exc_vars, exc_lineno_cnames, exc_filename_cname)
+                if exc_lineno_cnames:
+                    for cname in exc_lineno_cnames:
+                        code.funcstate.release_temp(cname)
+                if exc_filename_cname:
+                    code.funcstate.release_temp(exc_filename_cname)
+                code.put_goto(old_error_label)
+
+            for new_label, old_label in zip(code.get_all_labels(), finally_old_labels):
+                if not code.label_used(new_label):
+                    continue
+                code.put_label(new_label)
+                self.put_error_cleaner(code, exc_vars)
+                code.put_goto(old_label)
+
+            for cname in exc_vars:
+                code.funcstate.release_temp(cname)
+            code.putln('}')
+
+        code.set_all_labels(old_labels)
+        return_label = code.return_label
         exc_vars = ()
 
-        for i, (new_label, old_label) in enumerate(zip(new_labels, old_labels)): 
-            if not code.label_used(new_label): 
-                continue 
-            if new_label == new_error_label and preserve_error: 
-                continue  # handled above 
- 
+        for i, (new_label, old_label) in enumerate(zip(new_labels, old_labels)):
+            if not code.label_used(new_label):
+                continue
+            if new_label == new_error_label and preserve_error:
+                continue  # handled above
+
             code.putln('%s: {' % new_label)
-            ret_temp = None 
+            ret_temp = None
             if old_label == return_label:
                 # return actually raises an (uncatchable) exception in generators that we must preserve
                 if self.in_generator:
@@ -7683,7 +7683,7 @@ class TryFinallyStatNode(StatNode):
             if old_label == return_label:
                 if ret_temp:
                     code.putln("%s = %s;" % (Naming.retval_cname, ret_temp))
-                    if self.func_return_type.is_pyobject: 
+                    if self.func_return_type.is_pyobject:
                         code.putln("%s = 0;" % ret_temp)
                     code.funcstate.release_temp(ret_temp)
                 if self.in_generator:
@@ -7691,232 +7691,232 @@ class TryFinallyStatNode(StatNode):
                     for cname in exc_vars:
                         code.funcstate.release_temp(cname)
 
-            if not self.finally_clause.is_terminator: 
-                code.put_goto(old_label) 
-            code.putln('}') 
- 
-        # End finally 
-        code.put_label(catch_label) 
-        code.putln( 
-            "}") 
- 
-    def generate_function_definitions(self, env, code): 
-        self.body.generate_function_definitions(env, code) 
-        self.finally_clause.generate_function_definitions(env, code) 
- 
-    def put_error_catcher(self, code, temps_to_clean_up, exc_vars, 
+            if not self.finally_clause.is_terminator:
+                code.put_goto(old_label)
+            code.putln('}')
+
+        # End finally
+        code.put_label(catch_label)
+        code.putln(
+            "}")
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+        self.finally_clause.generate_function_definitions(env, code)
+
+    def put_error_catcher(self, code, temps_to_clean_up, exc_vars,
                           exc_lineno_cnames=None, exc_filename_cname=None):
-        code.globalstate.use_utility_code(restore_exception_utility_code) 
-        code.globalstate.use_utility_code(get_exception_utility_code) 
-        code.globalstate.use_utility_code(swap_exception_utility_code) 
- 
-        if self.is_try_finally_in_nogil: 
-            code.put_ensure_gil(declare_gilstate=False) 
+        code.globalstate.use_utility_code(restore_exception_utility_code)
+        code.globalstate.use_utility_code(get_exception_utility_code)
+        code.globalstate.use_utility_code(swap_exception_utility_code)
+
+        if self.is_try_finally_in_nogil:
+            code.put_ensure_gil(declare_gilstate=False)
         code.putln("__Pyx_PyThreadState_assign")
- 
+
         code.putln(' '.join(["%s = 0;" % var for var in exc_vars]))
-        for temp_name, type in temps_to_clean_up: 
-            code.put_xdecref_clear(temp_name, type) 
- 
-        # not using preprocessor here to avoid warnings about 
-        # unused utility functions and/or temps 
-        code.putln("if (PY_MAJOR_VERSION >= 3)" 
-                   " __Pyx_ExceptionSwap(&%s, &%s, &%s);" % exc_vars[3:]) 
-        code.putln("if ((PY_MAJOR_VERSION < 3) ||" 
-                   # if __Pyx_GetException() fails in Py3, 
-                   # store the newly raised exception instead 
-                   " unlikely(__Pyx_GetException(&%s, &%s, &%s) < 0)) " 
-                   "__Pyx_ErrFetch(&%s, &%s, &%s);" % (exc_vars[:3] * 2)) 
-        for var in exc_vars: 
-            code.put_xgotref(var) 
-        if exc_lineno_cnames: 
-            code.putln("%s = %s; %s = %s; %s = %s;" % ( 
-                exc_lineno_cnames[0], Naming.lineno_cname, 
-                exc_lineno_cnames[1], Naming.clineno_cname, 
-                exc_filename_cname, Naming.filename_cname)) 
- 
-        if self.is_try_finally_in_nogil: 
-            code.put_release_ensured_gil() 
- 
+        for temp_name, type in temps_to_clean_up:
+            code.put_xdecref_clear(temp_name, type)
+
+        # not using preprocessor here to avoid warnings about
+        # unused utility functions and/or temps
+        code.putln("if (PY_MAJOR_VERSION >= 3)"
+                   " __Pyx_ExceptionSwap(&%s, &%s, &%s);" % exc_vars[3:])
+        code.putln("if ((PY_MAJOR_VERSION < 3) ||"
+                   # if __Pyx_GetException() fails in Py3,
+                   # store the newly raised exception instead
+                   " unlikely(__Pyx_GetException(&%s, &%s, &%s) < 0)) "
+                   "__Pyx_ErrFetch(&%s, &%s, &%s);" % (exc_vars[:3] * 2))
+        for var in exc_vars:
+            code.put_xgotref(var)
+        if exc_lineno_cnames:
+            code.putln("%s = %s; %s = %s; %s = %s;" % (
+                exc_lineno_cnames[0], Naming.lineno_cname,
+                exc_lineno_cnames[1], Naming.clineno_cname,
+                exc_filename_cname, Naming.filename_cname))
+
+        if self.is_try_finally_in_nogil:
+            code.put_release_ensured_gil()
+
     def put_error_uncatcher(self, code, exc_vars, exc_lineno_cnames=None, exc_filename_cname=None):
-        code.globalstate.use_utility_code(restore_exception_utility_code) 
-        code.globalstate.use_utility_code(reset_exception_utility_code) 
- 
-        if self.is_try_finally_in_nogil: 
-            code.put_ensure_gil(declare_gilstate=False) 
- 
-        # not using preprocessor here to avoid warnings about 
-        # unused utility functions and/or temps 
-        code.putln("if (PY_MAJOR_VERSION >= 3) {") 
-        for var in exc_vars[3:]: 
-            code.put_xgiveref(var) 
-        code.putln("__Pyx_ExceptionReset(%s, %s, %s);" % exc_vars[3:]) 
-        code.putln("}") 
-        for var in exc_vars[:3]: 
-            code.put_xgiveref(var) 
-        code.putln("__Pyx_ErrRestore(%s, %s, %s);" % exc_vars[:3]) 
- 
-        if self.is_try_finally_in_nogil: 
-            code.put_release_ensured_gil() 
- 
+        code.globalstate.use_utility_code(restore_exception_utility_code)
+        code.globalstate.use_utility_code(reset_exception_utility_code)
+
+        if self.is_try_finally_in_nogil:
+            code.put_ensure_gil(declare_gilstate=False)
+
+        # not using preprocessor here to avoid warnings about
+        # unused utility functions and/or temps
+        code.putln("if (PY_MAJOR_VERSION >= 3) {")
+        for var in exc_vars[3:]:
+            code.put_xgiveref(var)
+        code.putln("__Pyx_ExceptionReset(%s, %s, %s);" % exc_vars[3:])
+        code.putln("}")
+        for var in exc_vars[:3]:
+            code.put_xgiveref(var)
+        code.putln("__Pyx_ErrRestore(%s, %s, %s);" % exc_vars[:3])
+
+        if self.is_try_finally_in_nogil:
+            code.put_release_ensured_gil()
+
         code.putln(' '.join(["%s = 0;" % var for var in exc_vars]))
-        if exc_lineno_cnames: 
-            code.putln("%s = %s; %s = %s; %s = %s;" % ( 
-                Naming.lineno_cname, exc_lineno_cnames[0], 
-                Naming.clineno_cname, exc_lineno_cnames[1], 
-                Naming.filename_cname, exc_filename_cname)) 
- 
-    def put_error_cleaner(self, code, exc_vars): 
-        code.globalstate.use_utility_code(reset_exception_utility_code) 
-        if self.is_try_finally_in_nogil: 
-            code.put_ensure_gil(declare_gilstate=False) 
-
-        # not using preprocessor here to avoid warnings about 
-        # unused utility functions and/or temps 
-        code.putln("if (PY_MAJOR_VERSION >= 3) {") 
-        for var in exc_vars[3:]: 
-            code.put_xgiveref(var) 
-        code.putln("__Pyx_ExceptionReset(%s, %s, %s);" % exc_vars[3:]) 
-        code.putln("}") 
-        for var in exc_vars[:3]: 
-            code.put_xdecref_clear(var, py_object_type) 
-        if self.is_try_finally_in_nogil: 
-            code.put_release_ensured_gil() 
-        code.putln(' '.join(["%s = 0;"]*3) % exc_vars[3:]) 
- 
-    def annotate(self, code): 
-        self.body.annotate(code) 
-        self.finally_clause.annotate(code) 
- 
- 
-class NogilTryFinallyStatNode(TryFinallyStatNode): 
-    """ 
-    A try/finally statement that may be used in nogil code sections. 
-    """ 
- 
-    preserve_exception = False 
-    nogil_check = None 
- 
- 
-class GILStatNode(NogilTryFinallyStatNode): 
-    #  'with gil' or 'with nogil' statement 
-    # 
-    #   state   string   'gil' or 'nogil' 
- 
-    state_temp = None 
- 
-    def __init__(self, pos, state, body): 
-        self.state = state 
-        self.create_state_temp_if_needed(pos, state, body) 
+        if exc_lineno_cnames:
+            code.putln("%s = %s; %s = %s; %s = %s;" % (
+                Naming.lineno_cname, exc_lineno_cnames[0],
+                Naming.clineno_cname, exc_lineno_cnames[1],
+                Naming.filename_cname, exc_filename_cname))
+
+    def put_error_cleaner(self, code, exc_vars):
+        code.globalstate.use_utility_code(reset_exception_utility_code)
+        if self.is_try_finally_in_nogil:
+            code.put_ensure_gil(declare_gilstate=False)
+
+        # not using preprocessor here to avoid warnings about
+        # unused utility functions and/or temps
+        code.putln("if (PY_MAJOR_VERSION >= 3) {")
+        for var in exc_vars[3:]:
+            code.put_xgiveref(var)
+        code.putln("__Pyx_ExceptionReset(%s, %s, %s);" % exc_vars[3:])
+        code.putln("}")
+        for var in exc_vars[:3]:
+            code.put_xdecref_clear(var, py_object_type)
+        if self.is_try_finally_in_nogil:
+            code.put_release_ensured_gil()
+        code.putln(' '.join(["%s = 0;"]*3) % exc_vars[3:])
+
+    def annotate(self, code):
+        self.body.annotate(code)
+        self.finally_clause.annotate(code)
+
+
+class NogilTryFinallyStatNode(TryFinallyStatNode):
+    """
+    A try/finally statement that may be used in nogil code sections.
+    """
+
+    preserve_exception = False
+    nogil_check = None
+
+
+class GILStatNode(NogilTryFinallyStatNode):
+    #  'with gil' or 'with nogil' statement
+    #
+    #   state   string   'gil' or 'nogil'
+
+    state_temp = None
+
+    def __init__(self, pos, state, body):
+        self.state = state
+        self.create_state_temp_if_needed(pos, state, body)
         TryFinallyStatNode.__init__(
             self, pos,
-            body=body, 
-            finally_clause=GILExitNode( 
-                pos, state=state, state_temp=self.state_temp)) 
- 
-    def create_state_temp_if_needed(self, pos, state, body): 
-        from .ParseTreeTransforms import YieldNodeCollector 
-        collector = YieldNodeCollector() 
-        collector.visitchildren(body) 
+            body=body,
+            finally_clause=GILExitNode(
+                pos, state=state, state_temp=self.state_temp))
+
+    def create_state_temp_if_needed(self, pos, state, body):
+        from .ParseTreeTransforms import YieldNodeCollector
+        collector = YieldNodeCollector()
+        collector.visitchildren(body)
         if not collector.yields:
-            return 
- 
-        if state == 'gil': 
-            temp_type = PyrexTypes.c_gilstate_type 
-        else: 
-            temp_type = PyrexTypes.c_threadstate_ptr_type 
-        from . import ExprNodes 
-        self.state_temp = ExprNodes.TempNode(pos, temp_type) 
- 
-    def analyse_declarations(self, env): 
-        env._in_with_gil_block = (self.state == 'gil') 
-        if self.state == 'gil': 
-            env.has_with_gil_block = True 
- 
-        return super(GILStatNode, self).analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        env.use_utility_code( 
-            UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c")) 
-        was_nogil = env.nogil 
-        env.nogil = self.state == 'nogil' 
-        node = TryFinallyStatNode.analyse_expressions(self, env) 
-        env.nogil = was_nogil 
-        return node 
- 
-    def generate_execution_code(self, code): 
-        code.mark_pos(self.pos) 
-        code.begin_block() 
-        if self.state_temp: 
-            self.state_temp.allocate(code) 
-            variable = self.state_temp.result() 
-        else: 
-            variable = None 
- 
+            return
+
+        if state == 'gil':
+            temp_type = PyrexTypes.c_gilstate_type
+        else:
+            temp_type = PyrexTypes.c_threadstate_ptr_type
+        from . import ExprNodes
+        self.state_temp = ExprNodes.TempNode(pos, temp_type)
+
+    def analyse_declarations(self, env):
+        env._in_with_gil_block = (self.state == 'gil')
+        if self.state == 'gil':
+            env.has_with_gil_block = True
+
+        return super(GILStatNode, self).analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        env.use_utility_code(
+            UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
+        was_nogil = env.nogil
+        env.nogil = self.state == 'nogil'
+        node = TryFinallyStatNode.analyse_expressions(self, env)
+        env.nogil = was_nogil
+        return node
+
+    def generate_execution_code(self, code):
+        code.mark_pos(self.pos)
+        code.begin_block()
+        if self.state_temp:
+            self.state_temp.allocate(code)
+            variable = self.state_temp.result()
+        else:
+            variable = None
+
         old_gil_config = code.funcstate.gil_owned
-        if self.state == 'gil': 
-            code.put_ensure_gil(variable=variable) 
+        if self.state == 'gil':
+            code.put_ensure_gil(variable=variable)
             code.funcstate.gil_owned = True
-        else: 
-            code.put_release_gil(variable=variable) 
+        else:
+            code.put_release_gil(variable=variable)
             code.funcstate.gil_owned = False
- 
-        TryFinallyStatNode.generate_execution_code(self, code) 
- 
-        if self.state_temp: 
-            self.state_temp.release(code) 
- 
+
+        TryFinallyStatNode.generate_execution_code(self, code)
+
+        if self.state_temp:
+            self.state_temp.release(code)
+
         code.funcstate.gil_owned = old_gil_config
-        code.end_block() 
- 
- 
-class GILExitNode(StatNode): 
-    """ 
-    Used as the 'finally' block in a GILStatNode 
- 
-    state   string   'gil' or 'nogil' 
-    """ 
- 
-    child_attrs = [] 
-    state_temp = None 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        if self.state_temp: 
-            variable = self.state_temp.result() 
-        else: 
-            variable = None 
- 
-        if self.state == 'gil': 
-            code.put_release_ensured_gil(variable) 
-        else: 
-            code.put_acquire_gil(variable) 
- 
- 
-class EnsureGILNode(GILExitNode): 
-    """ 
-    Ensure the GIL in nogil functions for cleanup before returning. 
-    """ 
- 
-    def generate_execution_code(self, code): 
-        code.put_ensure_gil(declare_gilstate=False) 
- 
+        code.end_block()
+
+
+class GILExitNode(StatNode):
+    """
+    Used as the 'finally' block in a GILStatNode
+
+    state   string   'gil' or 'nogil'
+    """
+
+    child_attrs = []
+    state_temp = None
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        if self.state_temp:
+            variable = self.state_temp.result()
+        else:
+            variable = None
+
+        if self.state == 'gil':
+            code.put_release_ensured_gil(variable)
+        else:
+            code.put_acquire_gil(variable)
+
+
+class EnsureGILNode(GILExitNode):
+    """
+    Ensure the GIL in nogil functions for cleanup before returning.
+    """
+
+    def generate_execution_code(self, code):
+        code.put_ensure_gil(declare_gilstate=False)
+
 
 def cython_view_utility_code():
     from . import MemoryView
     return MemoryView.view_utility_code
 
 
-utility_code_for_cimports = { 
-    # utility code (or inlining c) in a pxd (or pyx) file. 
-    # TODO: Consider a generic user-level mechanism for importing 
+utility_code_for_cimports = {
+    # utility code (or inlining c) in a pxd (or pyx) file.
+    # TODO: Consider a generic user-level mechanism for importing
     'cpython.array'         : lambda : UtilityCode.load_cached("ArrayAPI", "arrayarray.h"),
     'cpython.array.array'   : lambda : UtilityCode.load_cached("ArrayAPI", "arrayarray.h"),
     'cython.view'           : cython_view_utility_code,
-} 
- 
+}
+
 utility_code_for_imports = {
     # utility code used when special modules are imported.
     # TODO: Consider a generic user-level mechanism for importing
@@ -7925,1522 +7925,1522 @@ utility_code_for_imports = {
 }
 
 
-class CImportStatNode(StatNode): 
-    #  cimport statement 
-    # 
-    #  module_name   string           Qualified name of module being imported 
-    #  as_name       string or None   Name specified in "as" clause, if any 
+class CImportStatNode(StatNode):
+    #  cimport statement
+    #
+    #  module_name   string           Qualified name of module being imported
+    #  as_name       string or None   Name specified in "as" clause, if any
     #  is_absolute   bool             True for absolute imports, False otherwise
- 
-    child_attrs = [] 
+
+    child_attrs = []
     is_absolute = False
- 
-    def analyse_declarations(self, env): 
-        if not env.is_module_scope: 
-            error(self.pos, "cimport only allowed at module level") 
-            return 
+
+    def analyse_declarations(self, env):
+        if not env.is_module_scope:
+            error(self.pos, "cimport only allowed at module level")
+            return
         module_scope = env.find_module(
             self.module_name, self.pos, relative_level=0 if self.is_absolute else -1)
-        if "." in self.module_name: 
-            names = [EncodedString(name) for name in self.module_name.split(".")] 
-            top_name = names[0] 
-            top_module_scope = env.context.find_submodule(top_name) 
-            module_scope = top_module_scope 
-            for name in names[1:]: 
-                submodule_scope = module_scope.find_submodule(name) 
-                module_scope.declare_module(name, submodule_scope, self.pos) 
-                module_scope = submodule_scope 
-            if self.as_name: 
-                env.declare_module(self.as_name, module_scope, self.pos) 
-            else: 
-                env.add_imported_module(module_scope) 
-                env.declare_module(top_name, top_module_scope, self.pos) 
-        else: 
-            name = self.as_name or self.module_name 
-            env.declare_module(name, module_scope, self.pos) 
-        if self.module_name in utility_code_for_cimports: 
+        if "." in self.module_name:
+            names = [EncodedString(name) for name in self.module_name.split(".")]
+            top_name = names[0]
+            top_module_scope = env.context.find_submodule(top_name)
+            module_scope = top_module_scope
+            for name in names[1:]:
+                submodule_scope = module_scope.find_submodule(name)
+                module_scope.declare_module(name, submodule_scope, self.pos)
+                module_scope = submodule_scope
+            if self.as_name:
+                env.declare_module(self.as_name, module_scope, self.pos)
+            else:
+                env.add_imported_module(module_scope)
+                env.declare_module(top_name, top_module_scope, self.pos)
+        else:
+            name = self.as_name or self.module_name
+            env.declare_module(name, module_scope, self.pos)
+        if self.module_name in utility_code_for_cimports:
             env.use_utility_code(utility_code_for_cimports[self.module_name]())
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class FromCImportStatNode(StatNode): 
-    #  from ... cimport statement 
-    # 
-    #  module_name     string                        Qualified name of module 
-    #  relative_level  int or None                   Relative import: number of dots before module_name 
-    #  imported_names  [(pos, name, as_name, kind)]  Names to be imported 
- 
-    child_attrs = [] 
-    module_name = None 
-    relative_level = None 
-    imported_names = None 
- 
-    def analyse_declarations(self, env): 
-        if not env.is_module_scope: 
-            error(self.pos, "cimport only allowed at module level") 
-            return 
-        if self.relative_level and self.relative_level > env.qualified_name.count('.'): 
-            error(self.pos, "relative cimport beyond main package is not allowed") 
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class FromCImportStatNode(StatNode):
+    #  from ... cimport statement
+    #
+    #  module_name     string                        Qualified name of module
+    #  relative_level  int or None                   Relative import: number of dots before module_name
+    #  imported_names  [(pos, name, as_name, kind)]  Names to be imported
+
+    child_attrs = []
+    module_name = None
+    relative_level = None
+    imported_names = None
+
+    def analyse_declarations(self, env):
+        if not env.is_module_scope:
+            error(self.pos, "cimport only allowed at module level")
             return
-        module_scope = env.find_module(self.module_name, self.pos, relative_level=self.relative_level) 
-        module_name = module_scope.qualified_name 
-        env.add_imported_module(module_scope) 
-        for pos, name, as_name, kind in self.imported_names: 
-            if name == "*": 
+        if self.relative_level and self.relative_level > env.qualified_name.count('.'):
+            error(self.pos, "relative cimport beyond main package is not allowed")
+            return
+        module_scope = env.find_module(self.module_name, self.pos, relative_level=self.relative_level)
+        module_name = module_scope.qualified_name
+        env.add_imported_module(module_scope)
+        for pos, name, as_name, kind in self.imported_names:
+            if name == "*":
                 for local_name, entry in list(module_scope.entries.items()):
-                    env.add_imported_entry(local_name, entry, pos) 
-            else: 
-                entry = module_scope.lookup(name) 
-                if entry: 
-                    if kind and not self.declaration_matches(entry, kind): 
-                        entry.redeclared(pos) 
-                    entry.used = 1 
-                else: 
-                    if kind == 'struct' or kind == 'union': 
-                        entry = module_scope.declare_struct_or_union( 
-                            name, kind=kind, scope=None, typedef_flag=0, pos=pos) 
-                    elif kind == 'class': 
-                        entry = module_scope.declare_c_class(name, pos=pos, module_name=module_name) 
-                    else: 
+                    env.add_imported_entry(local_name, entry, pos)
+            else:
+                entry = module_scope.lookup(name)
+                if entry:
+                    if kind and not self.declaration_matches(entry, kind):
+                        entry.redeclared(pos)
+                    entry.used = 1
+                else:
+                    if kind == 'struct' or kind == 'union':
+                        entry = module_scope.declare_struct_or_union(
+                            name, kind=kind, scope=None, typedef_flag=0, pos=pos)
+                    elif kind == 'class':
+                        entry = module_scope.declare_c_class(name, pos=pos, module_name=module_name)
+                    else:
                         submodule_scope = env.context.find_module(
                             name, relative_to=module_scope, pos=self.pos, absolute_fallback=False)
-                        if submodule_scope.parent_module is module_scope: 
-                            env.declare_module(as_name or name, submodule_scope, self.pos) 
-                        else: 
-                            error(pos, "Name '%s' not declared in module '%s'" % (name, module_name)) 
- 
-                if entry: 
-                    local_name = as_name or name 
-                    env.add_imported_entry(local_name, entry, pos) 
- 
+                        if submodule_scope.parent_module is module_scope:
+                            env.declare_module(as_name or name, submodule_scope, self.pos)
+                        else:
+                            error(pos, "Name '%s' not declared in module '%s'" % (name, module_name))
+
+                if entry:
+                    local_name = as_name or name
+                    env.add_imported_entry(local_name, entry, pos)
+
         if module_name.startswith('cpython') or module_name.startswith('cython'): # enough for now
-            if module_name in utility_code_for_cimports: 
+            if module_name in utility_code_for_cimports:
                 env.use_utility_code(utility_code_for_cimports[module_name]())
-            for _, name, _, _ in self.imported_names: 
-                fqname = '%s.%s' % (module_name, name) 
-                if fqname in utility_code_for_cimports: 
+            for _, name, _, _ in self.imported_names:
+                fqname = '%s.%s' % (module_name, name)
+                if fqname in utility_code_for_cimports:
                     env.use_utility_code(utility_code_for_cimports[fqname]())
- 
-    def declaration_matches(self, entry, kind): 
-        if not entry.is_type: 
-            return 0 
-        type = entry.type 
-        if kind == 'class': 
-            if not type.is_extension_type: 
-                return 0 
-        else: 
-            if not type.is_struct_or_union: 
-                return 0 
-            if kind != type.kind: 
-                return 0 
-        return 1 
- 
-    def analyse_expressions(self, env): 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        pass 
- 
- 
-class FromImportStatNode(StatNode): 
-    #  from ... import statement 
-    # 
-    #  module           ImportNode 
-    #  items            [(string, NameNode)] 
-    #  interned_items   [(string, NameNode, ExprNode)] 
-    #  item             PyTempNode            used internally 
-    #  import_star      boolean               used internally 
- 
-    child_attrs = ["module"] 
-    import_star = 0 
- 
-    def analyse_declarations(self, env): 
-        for name, target in self.items: 
-            if name == "*": 
-                if not env.is_module_scope: 
-                    error(self.pos, "import * only allowed at module level") 
-                    return 
-                env.has_import_star = 1 
-                self.import_star = 1 
-            else: 
-                target.analyse_target_declaration(env) 
- 
-    def analyse_expressions(self, env): 
-        from . import ExprNodes 
-        self.module = self.module.analyse_expressions(env) 
-        self.item = ExprNodes.RawCNameExprNode(self.pos, py_object_type) 
-        self.interned_items = [] 
-        for name, target in self.items: 
-            if name == '*': 
-                for _, entry in env.entries.items(): 
-                    if not entry.is_type and entry.type.is_extension_type: 
-                        env.use_utility_code(UtilityCode.load_cached("ExtTypeTest", "ObjectHandling.c")) 
-                        break 
-            else: 
-                entry = env.lookup(target.name) 
-                # check whether or not entry is already cimported 
-                if (entry.is_type and entry.type.name == name 
-                        and hasattr(entry.type, 'module_name')): 
-                    if entry.type.module_name == self.module.module_name.value: 
-                        # cimported with absolute name 
-                        continue 
-                    try: 
-                        # cimported with relative name 
-                        module = env.find_module(self.module.module_name.value, pos=self.pos, 
-                                                 relative_level=self.module.level) 
-                        if entry.type.module_name == module.qualified_name: 
-                            continue 
-                    except AttributeError: 
-                        pass 
-                target = target.analyse_target_expression(env, None)  # FIXME? 
-                if target.type is py_object_type: 
-                    coerced_item = None 
-                else: 
-                    coerced_item = self.item.coerce_to(target.type, env) 
-                self.interned_items.append((name, target, coerced_item)) 
-        return self 
- 
-    def generate_execution_code(self, code): 
+
+    def declaration_matches(self, entry, kind):
+        if not entry.is_type:
+            return 0
+        type = entry.type
+        if kind == 'class':
+            if not type.is_extension_type:
+                return 0
+        else:
+            if not type.is_struct_or_union:
+                return 0
+            if kind != type.kind:
+                return 0
+        return 1
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_execution_code(self, code):
+        pass
+
+
+class FromImportStatNode(StatNode):
+    #  from ... import statement
+    #
+    #  module           ImportNode
+    #  items            [(string, NameNode)]
+    #  interned_items   [(string, NameNode, ExprNode)]
+    #  item             PyTempNode            used internally
+    #  import_star      boolean               used internally
+
+    child_attrs = ["module"]
+    import_star = 0
+
+    def analyse_declarations(self, env):
+        for name, target in self.items:
+            if name == "*":
+                if not env.is_module_scope:
+                    error(self.pos, "import * only allowed at module level")
+                    return
+                env.has_import_star = 1
+                self.import_star = 1
+            else:
+                target.analyse_target_declaration(env)
+
+    def analyse_expressions(self, env):
+        from . import ExprNodes
+        self.module = self.module.analyse_expressions(env)
+        self.item = ExprNodes.RawCNameExprNode(self.pos, py_object_type)
+        self.interned_items = []
+        for name, target in self.items:
+            if name == '*':
+                for _, entry in env.entries.items():
+                    if not entry.is_type and entry.type.is_extension_type:
+                        env.use_utility_code(UtilityCode.load_cached("ExtTypeTest", "ObjectHandling.c"))
+                        break
+            else:
+                entry = env.lookup(target.name)
+                # check whether or not entry is already cimported
+                if (entry.is_type and entry.type.name == name
+                        and hasattr(entry.type, 'module_name')):
+                    if entry.type.module_name == self.module.module_name.value:
+                        # cimported with absolute name
+                        continue
+                    try:
+                        # cimported with relative name
+                        module = env.find_module(self.module.module_name.value, pos=self.pos,
+                                                 relative_level=self.module.level)
+                        if entry.type.module_name == module.qualified_name:
+                            continue
+                    except AttributeError:
+                        pass
+                target = target.analyse_target_expression(env, None)  # FIXME?
+                if target.type is py_object_type:
+                    coerced_item = None
+                else:
+                    coerced_item = self.item.coerce_to(target.type, env)
+                self.interned_items.append((name, target, coerced_item))
+        return self
+
+    def generate_execution_code(self, code):
         code.mark_pos(self.pos)
-        self.module.generate_evaluation_code(code) 
-        if self.import_star: 
-            code.putln( 
-                'if (%s(%s) < 0) %s;' % ( 
-                    Naming.import_star, 
-                    self.module.py_result(), 
-                    code.error_goto(self.pos))) 
-        item_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True) 
-        self.item.set_cname(item_temp) 
-        if self.interned_items: 
-            code.globalstate.use_utility_code( 
-                UtilityCode.load_cached("ImportFrom", "ImportExport.c")) 
-        for name, target, coerced_item in self.interned_items: 
-            code.putln( 
-                '%s = __Pyx_ImportFrom(%s, %s); %s' % ( 
-                    item_temp, 
-                    self.module.py_result(), 
-                    code.intern_identifier(name), 
-                    code.error_goto_if_null(item_temp, self.pos))) 
-            code.put_gotref(item_temp) 
-            if coerced_item is None: 
-                target.generate_assignment_code(self.item, code) 
-            else: 
-                coerced_item.allocate_temp_result(code) 
-                coerced_item.generate_result_code(code) 
-                target.generate_assignment_code(coerced_item, code) 
-            code.put_decref_clear(item_temp, py_object_type) 
-        code.funcstate.release_temp(item_temp) 
-        self.module.generate_disposal_code(code) 
-        self.module.free_temps(code) 
- 
- 
-class ParallelNode(Node): 
-    """ 
-    Base class for cython.parallel constructs. 
-    """ 
- 
-    nogil_check = None 
- 
- 
-class ParallelStatNode(StatNode, ParallelNode): 
-    """ 
-    Base class for 'with cython.parallel.parallel():' and 'for i in prange():'. 
- 
-    assignments     { Entry(var) : (var.pos, inplace_operator_or_None) } 
-                    assignments to variables in this parallel section 
- 
-    parent          parent ParallelStatNode or None 
-    is_parallel     indicates whether this node is OpenMP parallel 
-                    (true for #pragma omp parallel for and 
-                              #pragma omp parallel) 
- 
-    is_parallel is true for: 
- 
-        #pragma omp parallel 
-        #pragma omp parallel for 
- 
-    sections, but NOT for 
- 
-        #pragma omp for 
- 
-    We need this to determine the sharing attributes. 
- 
-    privatization_insertion_point   a code insertion point used to make temps 
-                                    private (esp. the "nsteps" temp) 
- 
-    args         tuple          the arguments passed to the parallel construct 
-    kwargs       DictNode       the keyword arguments passed to the parallel 
-                                construct (replaced by its compile time value) 
-    """ 
- 
-    child_attrs = ['body', 'num_threads'] 
- 
-    body = None 
- 
-    is_prange = False 
-    is_nested_prange = False 
- 
-    error_label_used = False 
- 
-    num_threads = None 
-    chunksize = None 
- 
-    parallel_exc = ( 
-        Naming.parallel_exc_type, 
-        Naming.parallel_exc_value, 
-        Naming.parallel_exc_tb, 
-    ) 
- 
-    parallel_pos_info = ( 
-        Naming.parallel_filename, 
-        Naming.parallel_lineno, 
-        Naming.parallel_clineno, 
-    ) 
- 
-    pos_info = ( 
-        Naming.filename_cname, 
-        Naming.lineno_cname, 
-        Naming.clineno_cname, 
-    ) 
- 
-    critical_section_counter = 0 
- 
-    def __init__(self, pos, **kwargs): 
-        super(ParallelStatNode, self).__init__(pos, **kwargs) 
- 
-        # All assignments in this scope 
-        self.assignments = kwargs.get('assignments') or {} 
- 
-        # All seen closure cnames and their temporary cnames 
-        self.seen_closure_vars = set() 
- 
-        # Dict of variables that should be declared (first|last|)private or 
-        # reduction { Entry: (op, lastprivate) }. 
-        # If op is not None, it's a reduction. 
-        self.privates = {} 
- 
-        # [NameNode] 
-        self.assigned_nodes = [] 
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
- 
-        self.num_threads = None 
- 
-        if self.kwargs: 
-            # Try to find num_threads and chunksize keyword arguments 
-            pairs = [] 
+        self.module.generate_evaluation_code(code)
+        if self.import_star:
+            code.putln(
+                'if (%s(%s) < 0) %s;' % (
+                    Naming.import_star,
+                    self.module.py_result(),
+                    code.error_goto(self.pos)))
+        item_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        self.item.set_cname(item_temp)
+        if self.interned_items:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ImportFrom", "ImportExport.c"))
+        for name, target, coerced_item in self.interned_items:
+            code.putln(
+                '%s = __Pyx_ImportFrom(%s, %s); %s' % (
+                    item_temp,
+                    self.module.py_result(),
+                    code.intern_identifier(name),
+                    code.error_goto_if_null(item_temp, self.pos)))
+            code.put_gotref(item_temp)
+            if coerced_item is None:
+                target.generate_assignment_code(self.item, code)
+            else:
+                coerced_item.allocate_temp_result(code)
+                coerced_item.generate_result_code(code)
+                target.generate_assignment_code(coerced_item, code)
+            code.put_decref_clear(item_temp, py_object_type)
+        code.funcstate.release_temp(item_temp)
+        self.module.generate_disposal_code(code)
+        self.module.free_temps(code)
+
+
+class ParallelNode(Node):
+    """
+    Base class for cython.parallel constructs.
+    """
+
+    nogil_check = None
+
+
+class ParallelStatNode(StatNode, ParallelNode):
+    """
+    Base class for 'with cython.parallel.parallel():' and 'for i in prange():'.
+
+    assignments     { Entry(var) : (var.pos, inplace_operator_or_None) }
+                    assignments to variables in this parallel section
+
+    parent          parent ParallelStatNode or None
+    is_parallel     indicates whether this node is OpenMP parallel
+                    (true for #pragma omp parallel for and
+                              #pragma omp parallel)
+
+    is_parallel is true for:
+
+        #pragma omp parallel
+        #pragma omp parallel for
+
+    sections, but NOT for
+
+        #pragma omp for
+
+    We need this to determine the sharing attributes.
+
+    privatization_insertion_point   a code insertion point used to make temps
+                                    private (esp. the "nsteps" temp)
+
+    args         tuple          the arguments passed to the parallel construct
+    kwargs       DictNode       the keyword arguments passed to the parallel
+                                construct (replaced by its compile time value)
+    """
+
+    child_attrs = ['body', 'num_threads']
+
+    body = None
+
+    is_prange = False
+    is_nested_prange = False
+
+    error_label_used = False
+
+    num_threads = None
+    chunksize = None
+
+    parallel_exc = (
+        Naming.parallel_exc_type,
+        Naming.parallel_exc_value,
+        Naming.parallel_exc_tb,
+    )
+
+    parallel_pos_info = (
+        Naming.parallel_filename,
+        Naming.parallel_lineno,
+        Naming.parallel_clineno,
+    )
+
+    pos_info = (
+        Naming.filename_cname,
+        Naming.lineno_cname,
+        Naming.clineno_cname,
+    )
+
+    critical_section_counter = 0
+
+    def __init__(self, pos, **kwargs):
+        super(ParallelStatNode, self).__init__(pos, **kwargs)
+
+        # All assignments in this scope
+        self.assignments = kwargs.get('assignments') or {}
+
+        # All seen closure cnames and their temporary cnames
+        self.seen_closure_vars = set()
+
+        # Dict of variables that should be declared (first|last|)private or
+        # reduction { Entry: (op, lastprivate) }.
+        # If op is not None, it's a reduction.
+        self.privates = {}
+
+        # [NameNode]
+        self.assigned_nodes = []
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+
+        self.num_threads = None
+
+        if self.kwargs:
+            # Try to find num_threads and chunksize keyword arguments
+            pairs = []
             seen = set()
-            for dictitem in self.kwargs.key_value_pairs: 
+            for dictitem in self.kwargs.key_value_pairs:
                 if dictitem.key.value in seen:
                     error(self.pos, "Duplicate keyword argument found: %s" % dictitem.key.value)
                 seen.add(dictitem.key.value)
-                if dictitem.key.value == 'num_threads': 
+                if dictitem.key.value == 'num_threads':
                     if not dictitem.value.is_none:
                        self.num_threads = dictitem.value
-                elif self.is_prange and dictitem.key.value == 'chunksize': 
+                elif self.is_prange and dictitem.key.value == 'chunksize':
                     if not dictitem.value.is_none:
                         self.chunksize = dictitem.value
-                else: 
-                    pairs.append(dictitem) 
- 
-            self.kwargs.key_value_pairs = pairs 
- 
-            try: 
-                self.kwargs = self.kwargs.compile_time_value(env) 
+                else:
+                    pairs.append(dictitem)
+
+            self.kwargs.key_value_pairs = pairs
+
+            try:
+                self.kwargs = self.kwargs.compile_time_value(env)
             except Exception as e:
-                error(self.kwargs.pos, "Only compile-time values may be " 
-                                       "supplied as keyword arguments") 
-        else: 
-            self.kwargs = {} 
- 
+                error(self.kwargs.pos, "Only compile-time values may be "
+                                       "supplied as keyword arguments")
+        else:
+            self.kwargs = {}
+
         for kw, val in self.kwargs.items():
-            if kw not in self.valid_keyword_arguments: 
-                error(self.pos, "Invalid keyword argument: %s" % kw) 
-            else: 
-                setattr(self, kw, val) 
- 
-    def analyse_expressions(self, env): 
-        if self.num_threads: 
-            self.num_threads = self.num_threads.analyse_expressions(env) 
- 
-        if self.chunksize: 
-            self.chunksize = self.chunksize.analyse_expressions(env) 
- 
-        self.body = self.body.analyse_expressions(env) 
-        self.analyse_sharing_attributes(env) 
- 
-        if self.num_threads is not None: 
+            if kw not in self.valid_keyword_arguments:
+                error(self.pos, "Invalid keyword argument: %s" % kw)
+            else:
+                setattr(self, kw, val)
+
+    def analyse_expressions(self, env):
+        if self.num_threads:
+            self.num_threads = self.num_threads.analyse_expressions(env)
+
+        if self.chunksize:
+            self.chunksize = self.chunksize.analyse_expressions(env)
+
+        self.body = self.body.analyse_expressions(env)
+        self.analyse_sharing_attributes(env)
+
+        if self.num_threads is not None:
             if self.parent and self.parent.num_threads is not None and not self.parent.is_prange:
                 error(self.pos, "num_threads already declared in outer section")
-            elif self.parent and not self.parent.is_prange: 
+            elif self.parent and not self.parent.is_prange:
                 error(self.pos, "num_threads must be declared in the parent parallel section")
-            elif (self.num_threads.type.is_int and 
+            elif (self.num_threads.type.is_int and
                     self.num_threads.is_literal and
                     self.num_threads.compile_time_value(env) <= 0):
                 error(self.pos, "argument to num_threads must be greater than 0")
- 
+
             if not self.num_threads.is_simple() or self.num_threads.type.is_pyobject:
-                self.num_threads = self.num_threads.coerce_to( 
-                    PyrexTypes.c_int_type, env).coerce_to_temp(env) 
-        return self 
- 
-    def analyse_sharing_attributes(self, env): 
-        """ 
-        Analyse the privates for this block and set them in self.privates. 
-        This should be called in a post-order fashion during the 
-        analyse_expressions phase 
-        """ 
+                self.num_threads = self.num_threads.coerce_to(
+                    PyrexTypes.c_int_type, env).coerce_to_temp(env)
+        return self
+
+    def analyse_sharing_attributes(self, env):
+        """
+        Analyse the privates for this block and set them in self.privates.
+        This should be called in a post-order fashion during the
+        analyse_expressions phase
+        """
         for entry, (pos, op) in self.assignments.items():
- 
-            if self.is_prange and not self.is_parallel: 
-                # closely nested prange in a with parallel block, disallow 
-                # assigning to privates in the with parallel block (we 
-                # consider it too implicit and magicky for users) 
-                if entry in self.parent.assignments: 
+
+            if self.is_prange and not self.is_parallel:
+                # closely nested prange in a with parallel block, disallow
+                # assigning to privates in the with parallel block (we
+                # consider it too implicit and magicky for users)
+                if entry in self.parent.assignments:
                     error(pos, "Cannot assign to private of outer parallel block")
-                    continue 
- 
-            if not self.is_prange and op: 
-                # Again possible, but considered to magicky 
-                error(pos, "Reductions not allowed for parallel blocks") 
-                continue 
- 
-            # By default all variables should have the same values as if 
-            # executed sequentially 
-            lastprivate = True 
-            self.propagate_var_privatization(entry, pos, op, lastprivate) 
- 
-    def propagate_var_privatization(self, entry, pos, op, lastprivate): 
-        """ 
-        Propagate the sharing attributes of a variable. If the privatization is 
-        determined by a parent scope, done propagate further. 
- 
-        If we are a prange, we propagate our sharing attributes outwards to 
-        other pranges. If we are a prange in parallel block and the parallel 
-        block does not determine the variable private, we propagate to the 
-        parent of the parent. Recursion stops at parallel blocks, as they have 
-        no concept of lastprivate or reduction. 
- 
-        So the following cases propagate: 
- 
-            sum is a reduction for all loops: 
- 
-                for i in prange(n): 
-                    for j in prange(n): 
-                        for k in prange(n): 
-                            sum += i * j * k 
- 
-            sum is a reduction for both loops, local_var is private to the 
-            parallel with block: 
- 
-                for i in prange(n): 
-                    with parallel: 
-                        local_var = ... # private to the parallel 
-                        for j in prange(n): 
-                            sum += i * j 
- 
-        Nested with parallel blocks are disallowed, because they wouldn't 
-        allow you to propagate lastprivates or reductions: 
- 
-            #pragma omp parallel for lastprivate(i) 
-            for i in prange(n): 
- 
-                sum = 0 
- 
-                #pragma omp parallel private(j, sum) 
-                with parallel: 
- 
-                    #pragma omp parallel 
-                    with parallel: 
- 
-                        #pragma omp for lastprivate(j) reduction(+:sum) 
-                        for j in prange(n): 
-                            sum += i 
- 
-                    # sum and j are well-defined here 
- 
-                # sum and j are undefined here 
- 
-            # sum and j are undefined here 
-        """ 
-        self.privates[entry] = (op, lastprivate) 
- 
-        if entry.type.is_memoryviewslice: 
-            error(pos, "Memoryview slices can only be shared in parallel sections") 
-            return 
- 
-        if self.is_prange: 
-            if not self.is_parallel and entry not in self.parent.assignments: 
-                # Parent is a parallel with block 
-                parent = self.parent.parent 
-            else: 
-                parent = self.parent 
- 
-            # We don't need to propagate privates, only reductions and 
-            # lastprivates 
-            if parent and (op or lastprivate): 
-                parent.propagate_var_privatization(entry, pos, op, lastprivate) 
- 
-    def _allocate_closure_temp(self, code, entry): 
-        """ 
-        Helper function that allocate a temporary for a closure variable that 
-        is assigned to. 
-        """ 
-        if self.parent: 
-            return self.parent._allocate_closure_temp(code, entry) 
- 
-        if entry.cname in self.seen_closure_vars: 
-            return entry.cname 
- 
-        cname = code.funcstate.allocate_temp(entry.type, True) 
- 
-        # Add both the actual cname and the temp cname, as the actual cname 
-        # will be replaced with the temp cname on the entry 
-        self.seen_closure_vars.add(entry.cname) 
-        self.seen_closure_vars.add(cname) 
- 
-        self.modified_entries.append((entry, entry.cname)) 
-        code.putln("%s = %s;" % (cname, entry.cname)) 
-        entry.cname = cname 
- 
-    def initialize_privates_to_nan(self, code, exclude=None): 
-        first = True 
- 
+                    continue
+
+            if not self.is_prange and op:
+                # Again possible, but considered to magicky
+                error(pos, "Reductions not allowed for parallel blocks")
+                continue
+
+            # By default all variables should have the same values as if
+            # executed sequentially
+            lastprivate = True
+            self.propagate_var_privatization(entry, pos, op, lastprivate)
+
+    def propagate_var_privatization(self, entry, pos, op, lastprivate):
+        """
+        Propagate the sharing attributes of a variable. If the privatization is
+        determined by a parent scope, done propagate further.
+
+        If we are a prange, we propagate our sharing attributes outwards to
+        other pranges. If we are a prange in parallel block and the parallel
+        block does not determine the variable private, we propagate to the
+        parent of the parent. Recursion stops at parallel blocks, as they have
+        no concept of lastprivate or reduction.
+
+        So the following cases propagate:
+
+            sum is a reduction for all loops:
+
+                for i in prange(n):
+                    for j in prange(n):
+                        for k in prange(n):
+                            sum += i * j * k
+
+            sum is a reduction for both loops, local_var is private to the
+            parallel with block:
+
+                for i in prange(n):
+                    with parallel:
+                        local_var = ... # private to the parallel
+                        for j in prange(n):
+                            sum += i * j
+
+        Nested with parallel blocks are disallowed, because they wouldn't
+        allow you to propagate lastprivates or reductions:
+
+            #pragma omp parallel for lastprivate(i)
+            for i in prange(n):
+
+                sum = 0
+
+                #pragma omp parallel private(j, sum)
+                with parallel:
+
+                    #pragma omp parallel
+                    with parallel:
+
+                        #pragma omp for lastprivate(j) reduction(+:sum)
+                        for j in prange(n):
+                            sum += i
+
+                    # sum and j are well-defined here
+
+                # sum and j are undefined here
+
+            # sum and j are undefined here
+        """
+        self.privates[entry] = (op, lastprivate)
+
+        if entry.type.is_memoryviewslice:
+            error(pos, "Memoryview slices can only be shared in parallel sections")
+            return
+
+        if self.is_prange:
+            if not self.is_parallel and entry not in self.parent.assignments:
+                # Parent is a parallel with block
+                parent = self.parent.parent
+            else:
+                parent = self.parent
+
+            # We don't need to propagate privates, only reductions and
+            # lastprivates
+            if parent and (op or lastprivate):
+                parent.propagate_var_privatization(entry, pos, op, lastprivate)
+
+    def _allocate_closure_temp(self, code, entry):
+        """
+        Helper function that allocate a temporary for a closure variable that
+        is assigned to.
+        """
+        if self.parent:
+            return self.parent._allocate_closure_temp(code, entry)
+
+        if entry.cname in self.seen_closure_vars:
+            return entry.cname
+
+        cname = code.funcstate.allocate_temp(entry.type, True)
+
+        # Add both the actual cname and the temp cname, as the actual cname
+        # will be replaced with the temp cname on the entry
+        self.seen_closure_vars.add(entry.cname)
+        self.seen_closure_vars.add(cname)
+
+        self.modified_entries.append((entry, entry.cname))
+        code.putln("%s = %s;" % (cname, entry.cname))
+        entry.cname = cname
+
+    def initialize_privates_to_nan(self, code, exclude=None):
+        first = True
+
         for entry, (op, lastprivate) in sorted(self.privates.items()):
-            if not op and (not exclude or entry != exclude): 
-                invalid_value = entry.type.invalid_value() 
- 
-                if invalid_value: 
-                    if first: 
-                        code.putln("/* Initialize private variables to " 
-                                   "invalid values */") 
-                        first = False 
-                    code.putln("%s = %s;" % (entry.cname, 
-                                             entry.type.cast_code(invalid_value))) 
- 
-    def evaluate_before_block(self, code, expr): 
-        c = self.begin_of_parallel_control_block_point_after_decls 
-        # we need to set the owner to ourselves temporarily, as 
-        # allocate_temp may generate a comment in the middle of our pragma 
-        # otherwise when DebugFlags.debug_temp_code_comments is in effect 
-        owner = c.funcstate.owner 
-        c.funcstate.owner = c 
-        expr.generate_evaluation_code(c) 
-        c.funcstate.owner = owner 
- 
-        return expr.result() 
- 
-    def put_num_threads(self, code): 
-        """ 
-        Write self.num_threads if set as the num_threads OpenMP directive 
-        """ 
-        if self.num_threads is not None: 
+            if not op and (not exclude or entry != exclude):
+                invalid_value = entry.type.invalid_value()
+
+                if invalid_value:
+                    if first:
+                        code.putln("/* Initialize private variables to "
+                                   "invalid values */")
+                        first = False
+                    code.putln("%s = %s;" % (entry.cname,
+                                             entry.type.cast_code(invalid_value)))
+
+    def evaluate_before_block(self, code, expr):
+        c = self.begin_of_parallel_control_block_point_after_decls
+        # we need to set the owner to ourselves temporarily, as
+        # allocate_temp may generate a comment in the middle of our pragma
+        # otherwise when DebugFlags.debug_temp_code_comments is in effect
+        owner = c.funcstate.owner
+        c.funcstate.owner = c
+        expr.generate_evaluation_code(c)
+        c.funcstate.owner = owner
+
+        return expr.result()
+
+    def put_num_threads(self, code):
+        """
+        Write self.num_threads if set as the num_threads OpenMP directive
+        """
+        if self.num_threads is not None:
             code.put(" num_threads(%s)" % self.evaluate_before_block(code, self.num_threads))
- 
- 
-    def declare_closure_privates(self, code): 
-        """ 
-        If a variable is in a scope object, we need to allocate a temp and 
-        assign the value from the temp to the variable in the scope object 
-        after the parallel section. This kind of copying should be done only 
-        in the outermost parallel section. 
-        """ 
-        self.modified_entries = [] 
- 
+
+
+    def declare_closure_privates(self, code):
+        """
+        If a variable is in a scope object, we need to allocate a temp and
+        assign the value from the temp to the variable in the scope object
+        after the parallel section. This kind of copying should be done only
+        in the outermost parallel section.
+        """
+        self.modified_entries = []
+
         for entry in sorted(self.assignments):
-            if entry.from_closure or entry.in_closure: 
-                self._allocate_closure_temp(code, entry) 
- 
-    def release_closure_privates(self, code): 
-        """ 
-        Release any temps used for variables in scope objects. As this is the 
-        outermost parallel block, we don't need to delete the cnames from 
-        self.seen_closure_vars. 
-        """ 
-        for entry, original_cname in self.modified_entries: 
-            code.putln("%s = %s;" % (original_cname, entry.cname)) 
-            code.funcstate.release_temp(entry.cname) 
-            entry.cname = original_cname 
- 
-    def privatize_temps(self, code, exclude_temps=()): 
-        """ 
-        Make any used temporaries private. Before the relevant code block 
-        code.start_collecting_temps() should have been called. 
-        """ 
+            if entry.from_closure or entry.in_closure:
+                self._allocate_closure_temp(code, entry)
+
+    def release_closure_privates(self, code):
+        """
+        Release any temps used for variables in scope objects. As this is the
+        outermost parallel block, we don't need to delete the cnames from
+        self.seen_closure_vars.
+        """
+        for entry, original_cname in self.modified_entries:
+            code.putln("%s = %s;" % (original_cname, entry.cname))
+            code.funcstate.release_temp(entry.cname)
+            entry.cname = original_cname
+
+    def privatize_temps(self, code, exclude_temps=()):
+        """
+        Make any used temporaries private. Before the relevant code block
+        code.start_collecting_temps() should have been called.
+        """
         c = self.privatization_insertion_point
         self.privatization_insertion_point = None
 
-        if self.is_parallel: 
-            self.temps = temps = code.funcstate.stop_collecting_temps() 
-            privates, firstprivates = [], [] 
+        if self.is_parallel:
+            self.temps = temps = code.funcstate.stop_collecting_temps()
+            privates, firstprivates = [], []
             for temp, type in sorted(temps):
-                if type.is_pyobject or type.is_memoryviewslice: 
-                    firstprivates.append(temp) 
-                else: 
-                    privates.append(temp) 
- 
-            if privates: 
-                c.put(" private(%s)" % ", ".join(privates)) 
-            if firstprivates: 
-                c.put(" firstprivate(%s)" % ", ".join(firstprivates)) 
- 
-            if self.breaking_label_used: 
-                shared_vars = [Naming.parallel_why] 
-                if self.error_label_used: 
-                    shared_vars.extend(self.parallel_exc) 
-                    c.put(" private(%s, %s, %s)" % self.pos_info) 
- 
-                c.put(" shared(%s)" % ', '.join(shared_vars)) 
- 
-    def cleanup_temps(self, code): 
-        # Now clean up any memoryview slice and object temporaries 
-        if self.is_parallel and not self.is_nested_prange: 
-            code.putln("/* Clean up any temporaries */") 
+                if type.is_pyobject or type.is_memoryviewslice:
+                    firstprivates.append(temp)
+                else:
+                    privates.append(temp)
+
+            if privates:
+                c.put(" private(%s)" % ", ".join(privates))
+            if firstprivates:
+                c.put(" firstprivate(%s)" % ", ".join(firstprivates))
+
+            if self.breaking_label_used:
+                shared_vars = [Naming.parallel_why]
+                if self.error_label_used:
+                    shared_vars.extend(self.parallel_exc)
+                    c.put(" private(%s, %s, %s)" % self.pos_info)
+
+                c.put(" shared(%s)" % ', '.join(shared_vars))
+
+    def cleanup_temps(self, code):
+        # Now clean up any memoryview slice and object temporaries
+        if self.is_parallel and not self.is_nested_prange:
+            code.putln("/* Clean up any temporaries */")
             for temp, type in sorted(self.temps):
-                if type.is_memoryviewslice: 
-                    code.put_xdecref_memoryviewslice(temp, have_gil=False) 
-                elif type.is_pyobject: 
-                    code.put_xdecref(temp, type) 
-                    code.putln("%s = NULL;" % temp) 
- 
-    def setup_parallel_control_flow_block(self, code): 
-        """ 
-        Sets up a block that surrounds the parallel block to determine 
-        how the parallel section was exited. Any kind of return is 
-        trapped (break, continue, return, exceptions). This is the idea: 
- 
-        { 
-            int why = 0; 
- 
-            #pragma omp parallel 
-            { 
-                return # -> goto new_return_label; 
-                goto end_parallel; 
- 
-            new_return_label: 
-                why = 3; 
-                goto end_parallel; 
- 
-            end_parallel:; 
-                #pragma omp flush(why) # we need to flush for every iteration 
-            } 
- 
-            if (why == 3) 
-                goto old_return_label; 
-        } 
-        """ 
-        self.old_loop_labels = code.new_loop_labels() 
-        self.old_error_label = code.new_error_label() 
-        self.old_return_label = code.return_label 
-        code.return_label = code.new_label(name="return") 
- 
-        code.begin_block() # parallel control flow block 
-        self.begin_of_parallel_control_block_point = code.insertion_point() 
-        self.begin_of_parallel_control_block_point_after_decls = code.insertion_point() 
- 
-        self.undef_builtin_expect_apple_gcc_bug(code) 
- 
-    def begin_parallel_block(self, code): 
-        """ 
-        Each OpenMP thread in a parallel section that contains a with gil block 
-        must have the thread-state initialized. The call to 
-        PyGILState_Release() then deallocates our threadstate. If we wouldn't 
-        do this, each with gil block would allocate and deallocate one, thereby 
-        losing exception information before it can be saved before leaving the 
-        parallel section. 
-        """ 
-        self.begin_of_parallel_block = code.insertion_point() 
- 
-    def end_parallel_block(self, code): 
-        """ 
-        To ensure all OpenMP threads have thread states, we ensure the GIL 
-        in each thread (which creates a thread state if it doesn't exist), 
-        after which we release the GIL. 
-        On exit, reacquire the GIL and release the thread state. 
- 
-        If compiled without OpenMP support (at the C level), then we still have 
-        to acquire the GIL to decref any object temporaries. 
-        """ 
+                if type.is_memoryviewslice:
+                    code.put_xdecref_memoryviewslice(temp, have_gil=False)
+                elif type.is_pyobject:
+                    code.put_xdecref(temp, type)
+                    code.putln("%s = NULL;" % temp)
+
+    def setup_parallel_control_flow_block(self, code):
+        """
+        Sets up a block that surrounds the parallel block to determine
+        how the parallel section was exited. Any kind of return is
+        trapped (break, continue, return, exceptions). This is the idea:
+
+        {
+            int why = 0;
+
+            #pragma omp parallel
+            {
+                return # -> goto new_return_label;
+                goto end_parallel;
+
+            new_return_label:
+                why = 3;
+                goto end_parallel;
+
+            end_parallel:;
+                #pragma omp flush(why) # we need to flush for every iteration
+            }
+
+            if (why == 3)
+                goto old_return_label;
+        }
+        """
+        self.old_loop_labels = code.new_loop_labels()
+        self.old_error_label = code.new_error_label()
+        self.old_return_label = code.return_label
+        code.return_label = code.new_label(name="return")
+
+        code.begin_block() # parallel control flow block
+        self.begin_of_parallel_control_block_point = code.insertion_point()
+        self.begin_of_parallel_control_block_point_after_decls = code.insertion_point()
+
+        self.undef_builtin_expect_apple_gcc_bug(code)
+
+    def begin_parallel_block(self, code):
+        """
+        Each OpenMP thread in a parallel section that contains a with gil block
+        must have the thread-state initialized. The call to
+        PyGILState_Release() then deallocates our threadstate. If we wouldn't
+        do this, each with gil block would allocate and deallocate one, thereby
+        losing exception information before it can be saved before leaving the
+        parallel section.
+        """
+        self.begin_of_parallel_block = code.insertion_point()
+
+    def end_parallel_block(self, code):
+        """
+        To ensure all OpenMP threads have thread states, we ensure the GIL
+        in each thread (which creates a thread state if it doesn't exist),
+        after which we release the GIL.
+        On exit, reacquire the GIL and release the thread state.
+
+        If compiled without OpenMP support (at the C level), then we still have
+        to acquire the GIL to decref any object temporaries.
+        """
         begin_code = self.begin_of_parallel_block
         self.begin_of_parallel_block = None
 
-        if self.error_label_used: 
-            end_code = code 
- 
-            begin_code.putln("#ifdef _OPENMP") 
-            begin_code.put_ensure_gil(declare_gilstate=True) 
-            begin_code.putln("Py_BEGIN_ALLOW_THREADS") 
-            begin_code.putln("#endif /* _OPENMP */") 
- 
-            end_code.putln("#ifdef _OPENMP") 
-            end_code.putln("Py_END_ALLOW_THREADS") 
-            end_code.putln("#else") 
-            end_code.put_safe("{\n") 
-            end_code.put_ensure_gil() 
-            end_code.putln("#endif /* _OPENMP */") 
-            self.cleanup_temps(end_code) 
-            end_code.put_release_ensured_gil() 
-            end_code.putln("#ifndef _OPENMP") 
-            end_code.put_safe("}\n") 
-            end_code.putln("#endif /* _OPENMP */") 
- 
-    def trap_parallel_exit(self, code, should_flush=False): 
-        """ 
-        Trap any kind of return inside a parallel construct. 'should_flush' 
-        indicates whether the variable should be flushed, which is needed by 
-        prange to skip the loop. It also indicates whether we need to register 
-        a continue (we need this for parallel blocks, but not for prange 
-        loops, as it is a direct jump there). 
- 
-        It uses the same mechanism as try/finally: 
-            1 continue 
-            2 break 
-            3 return 
-            4 error 
-        """ 
-        save_lastprivates_label = code.new_label() 
-        dont_return_label = code.new_label() 
- 
-        self.any_label_used = False 
-        self.breaking_label_used = False 
-        self.error_label_used = False 
- 
-        self.parallel_private_temps = [] 
- 
-        all_labels = code.get_all_labels() 
- 
-        # Figure this out before starting to generate any code 
-        for label in all_labels: 
-            if code.label_used(label): 
-                self.breaking_label_used = (self.breaking_label_used or 
-                                            label != code.continue_label) 
-                self.any_label_used = True 
- 
-        if self.any_label_used: 
-            code.put_goto(dont_return_label) 
- 
-        for i, label in enumerate(all_labels): 
-            if not code.label_used(label): 
-                continue 
- 
-            is_continue_label = label == code.continue_label 
- 
-            code.put_label(label) 
- 
-            if not (should_flush and is_continue_label): 
-                if label == code.error_label: 
-                    self.error_label_used = True 
-                    self.fetch_parallel_exception(code) 
- 
-                code.putln("%s = %d;" % (Naming.parallel_why, i + 1)) 
- 
-            if (self.breaking_label_used and self.is_prange and not 
-                    is_continue_label): 
-                code.put_goto(save_lastprivates_label) 
-            else: 
-                code.put_goto(dont_return_label) 
- 
-        if self.any_label_used: 
-            if self.is_prange and self.breaking_label_used: 
-                # Don't rely on lastprivate, save our lastprivates 
-                code.put_label(save_lastprivates_label) 
-                self.save_parallel_vars(code) 
- 
-            code.put_label(dont_return_label) 
- 
-            if should_flush and self.breaking_label_used: 
-                code.putln_openmp("#pragma omp flush(%s)" % Naming.parallel_why) 
- 
-    def save_parallel_vars(self, code): 
-        """ 
-        The following shenanigans are instated when we break, return or 
-        propagate errors from a prange. In this case we cannot rely on 
-        lastprivate() to do its job, as no iterations may have executed yet 
-        in the last thread, leaving the values undefined. It is most likely 
-        that the breaking thread has well-defined values of the lastprivate 
-        variables, so we keep those values. 
-        """ 
+        if self.error_label_used:
+            end_code = code
+
+            begin_code.putln("#ifdef _OPENMP")
+            begin_code.put_ensure_gil(declare_gilstate=True)
+            begin_code.putln("Py_BEGIN_ALLOW_THREADS")
+            begin_code.putln("#endif /* _OPENMP */")
+
+            end_code.putln("#ifdef _OPENMP")
+            end_code.putln("Py_END_ALLOW_THREADS")
+            end_code.putln("#else")
+            end_code.put_safe("{\n")
+            end_code.put_ensure_gil()
+            end_code.putln("#endif /* _OPENMP */")
+            self.cleanup_temps(end_code)
+            end_code.put_release_ensured_gil()
+            end_code.putln("#ifndef _OPENMP")
+            end_code.put_safe("}\n")
+            end_code.putln("#endif /* _OPENMP */")
+
+    def trap_parallel_exit(self, code, should_flush=False):
+        """
+        Trap any kind of return inside a parallel construct. 'should_flush'
+        indicates whether the variable should be flushed, which is needed by
+        prange to skip the loop. It also indicates whether we need to register
+        a continue (we need this for parallel blocks, but not for prange
+        loops, as it is a direct jump there).
+
+        It uses the same mechanism as try/finally:
+            1 continue
+            2 break
+            3 return
+            4 error
+        """
+        save_lastprivates_label = code.new_label()
+        dont_return_label = code.new_label()
+
+        self.any_label_used = False
+        self.breaking_label_used = False
+        self.error_label_used = False
+
+        self.parallel_private_temps = []
+
+        all_labels = code.get_all_labels()
+
+        # Figure this out before starting to generate any code
+        for label in all_labels:
+            if code.label_used(label):
+                self.breaking_label_used = (self.breaking_label_used or
+                                            label != code.continue_label)
+                self.any_label_used = True
+
+        if self.any_label_used:
+            code.put_goto(dont_return_label)
+
+        for i, label in enumerate(all_labels):
+            if not code.label_used(label):
+                continue
+
+            is_continue_label = label == code.continue_label
+
+            code.put_label(label)
+
+            if not (should_flush and is_continue_label):
+                if label == code.error_label:
+                    self.error_label_used = True
+                    self.fetch_parallel_exception(code)
+
+                code.putln("%s = %d;" % (Naming.parallel_why, i + 1))
+
+            if (self.breaking_label_used and self.is_prange and not
+                    is_continue_label):
+                code.put_goto(save_lastprivates_label)
+            else:
+                code.put_goto(dont_return_label)
+
+        if self.any_label_used:
+            if self.is_prange and self.breaking_label_used:
+                # Don't rely on lastprivate, save our lastprivates
+                code.put_label(save_lastprivates_label)
+                self.save_parallel_vars(code)
+
+            code.put_label(dont_return_label)
+
+            if should_flush and self.breaking_label_used:
+                code.putln_openmp("#pragma omp flush(%s)" % Naming.parallel_why)
+
+    def save_parallel_vars(self, code):
+        """
+        The following shenanigans are instated when we break, return or
+        propagate errors from a prange. In this case we cannot rely on
+        lastprivate() to do its job, as no iterations may have executed yet
+        in the last thread, leaving the values undefined. It is most likely
+        that the breaking thread has well-defined values of the lastprivate
+        variables, so we keep those values.
+        """
         section_name = "__pyx_parallel_lastprivates%d" % self.critical_section_counter
-        code.putln_openmp("#pragma omp critical(%s)" % section_name) 
-        ParallelStatNode.critical_section_counter += 1 
- 
-        code.begin_block() # begin critical section 
- 
-        c = self.begin_of_parallel_control_block_point 
- 
-        temp_count = 0 
+        code.putln_openmp("#pragma omp critical(%s)" % section_name)
+        ParallelStatNode.critical_section_counter += 1
+
+        code.begin_block() # begin critical section
+
+        c = self.begin_of_parallel_control_block_point
+
+        temp_count = 0
         for entry, (op, lastprivate) in sorted(self.privates.items()):
-            if not lastprivate or entry.type.is_pyobject: 
-                continue 
- 
+            if not lastprivate or entry.type.is_pyobject:
+                continue
+
             type_decl = entry.type.empty_declaration_code()
-            temp_cname = "__pyx_parallel_temp%d" % temp_count 
-            private_cname = entry.cname 
- 
-            temp_count += 1 
- 
-            invalid_value = entry.type.invalid_value() 
-            if invalid_value: 
+            temp_cname = "__pyx_parallel_temp%d" % temp_count
+            private_cname = entry.cname
+
+            temp_count += 1
+
+            invalid_value = entry.type.invalid_value()
+            if invalid_value:
                 init = ' = ' + entry.type.cast_code(invalid_value)
-            else: 
-                init = '' 
-            # Declare the parallel private in the outer block 
-            c.putln("%s %s%s;" % (type_decl, temp_cname, init)) 
- 
-            # Initialize before escaping 
-            code.putln("%s = %s;" % (temp_cname, private_cname)) 
- 
-            self.parallel_private_temps.append((temp_cname, private_cname)) 
- 
-        code.end_block() # end critical section 
- 
-    def fetch_parallel_exception(self, code): 
-        """ 
-        As each OpenMP thread may raise an exception, we need to fetch that 
-        exception from the threadstate and save it for after the parallel 
-        section where it can be re-raised in the master thread. 
- 
-        Although it would seem that __pyx_filename, __pyx_lineno and 
-        __pyx_clineno are only assigned to under exception conditions (i.e., 
-        when we have the GIL), and thus should be allowed to be shared without 
-        any race condition, they are in fact subject to the same race 
-        conditions that they were previously when they were global variables 
-        and functions were allowed to release the GIL: 
- 
-            thread A                thread B 
-                acquire 
-                set lineno 
-                release 
-                                        acquire 
-                                        set lineno 
-                                        release 
-                acquire 
-                fetch exception 
-                release 
-                                        skip the fetch 
- 
-                deallocate threadstate  deallocate threadstate 
-        """ 
-        code.begin_block() 
-        code.put_ensure_gil(declare_gilstate=True) 
- 
-        code.putln_openmp("#pragma omp flush(%s)" % Naming.parallel_exc_type) 
-        code.putln( 
-            "if (!%s) {" % Naming.parallel_exc_type) 
- 
+            else:
+                init = ''
+            # Declare the parallel private in the outer block
+            c.putln("%s %s%s;" % (type_decl, temp_cname, init))
+
+            # Initialize before escaping
+            code.putln("%s = %s;" % (temp_cname, private_cname))
+
+            self.parallel_private_temps.append((temp_cname, private_cname))
+
+        code.end_block() # end critical section
+
+    def fetch_parallel_exception(self, code):
+        """
+        As each OpenMP thread may raise an exception, we need to fetch that
+        exception from the threadstate and save it for after the parallel
+        section where it can be re-raised in the master thread.
+
+        Although it would seem that __pyx_filename, __pyx_lineno and
+        __pyx_clineno are only assigned to under exception conditions (i.e.,
+        when we have the GIL), and thus should be allowed to be shared without
+        any race condition, they are in fact subject to the same race
+        conditions that they were previously when they were global variables
+        and functions were allowed to release the GIL:
+
+            thread A                thread B
+                acquire
+                set lineno
+                release
+                                        acquire
+                                        set lineno
+                                        release
+                acquire
+                fetch exception
+                release
+                                        skip the fetch
+
+                deallocate threadstate  deallocate threadstate
+        """
+        code.begin_block()
+        code.put_ensure_gil(declare_gilstate=True)
+
+        code.putln_openmp("#pragma omp flush(%s)" % Naming.parallel_exc_type)
+        code.putln(
+            "if (!%s) {" % Naming.parallel_exc_type)
+
         code.putln("__Pyx_ErrFetchWithState(&%s, &%s, &%s);" % self.parallel_exc)
-        pos_info = chain(*zip(self.parallel_pos_info, self.pos_info)) 
-        code.funcstate.uses_error_indicator = True 
-        code.putln("%s = %s; %s = %s; %s = %s;" % tuple(pos_info)) 
-        code.put_gotref(Naming.parallel_exc_type) 
- 
-        code.putln( 
-            "}") 
- 
-        code.put_release_ensured_gil() 
-        code.end_block() 
- 
-    def restore_parallel_exception(self, code): 
-        "Re-raise a parallel exception" 
-        code.begin_block() 
-        code.put_ensure_gil(declare_gilstate=True) 
- 
-        code.put_giveref(Naming.parallel_exc_type) 
+        pos_info = chain(*zip(self.parallel_pos_info, self.pos_info))
+        code.funcstate.uses_error_indicator = True
+        code.putln("%s = %s; %s = %s; %s = %s;" % tuple(pos_info))
+        code.put_gotref(Naming.parallel_exc_type)
+
+        code.putln(
+            "}")
+
+        code.put_release_ensured_gil()
+        code.end_block()
+
+    def restore_parallel_exception(self, code):
+        "Re-raise a parallel exception"
+        code.begin_block()
+        code.put_ensure_gil(declare_gilstate=True)
+
+        code.put_giveref(Naming.parallel_exc_type)
         code.putln("__Pyx_ErrRestoreWithState(%s, %s, %s);" % self.parallel_exc)
-        pos_info = chain(*zip(self.pos_info, self.parallel_pos_info)) 
-        code.putln("%s = %s; %s = %s; %s = %s;" % tuple(pos_info)) 
- 
-        code.put_release_ensured_gil() 
-        code.end_block() 
- 
-    def restore_labels(self, code): 
-        """ 
-        Restore all old labels. Call this before the 'else' clause to for 
-        loops and always before ending the parallel control flow block. 
-        """ 
-        code.set_all_labels(self.old_loop_labels + (self.old_return_label, 
-                                                    self.old_error_label)) 
- 
+        pos_info = chain(*zip(self.pos_info, self.parallel_pos_info))
+        code.putln("%s = %s; %s = %s; %s = %s;" % tuple(pos_info))
+
+        code.put_release_ensured_gil()
+        code.end_block()
+
+    def restore_labels(self, code):
+        """
+        Restore all old labels. Call this before the 'else' clause to for
+        loops and always before ending the parallel control flow block.
+        """
+        code.set_all_labels(self.old_loop_labels + (self.old_return_label,
+                                                    self.old_error_label))
+
     def end_parallel_control_flow_block(
             self, code, break_=False, continue_=False, return_=False):
-        """ 
-        This ends the parallel control flow block and based on how the parallel 
-        section was exited, takes the corresponding action. The break_ and 
-        continue_ parameters indicate whether these should be propagated 
-        outwards: 
- 
-            for i in prange(...): 
-                with cython.parallel.parallel(): 
-                    continue 
- 
-        Here break should be trapped in the parallel block, and propagated to 
-        the for loop. 
-        """ 
-        c = self.begin_of_parallel_control_block_point 
+        """
+        This ends the parallel control flow block and based on how the parallel
+        section was exited, takes the corresponding action. The break_ and
+        continue_ parameters indicate whether these should be propagated
+        outwards:
+
+            for i in prange(...):
+                with cython.parallel.parallel():
+                    continue
+
+        Here break should be trapped in the parallel block, and propagated to
+        the for loop.
+        """
+        c = self.begin_of_parallel_control_block_point
         self.begin_of_parallel_control_block_point = None
         self.begin_of_parallel_control_block_point_after_decls = None
- 
+
         if self.num_threads is not None:
             # FIXME: is it the right place? should not normally produce code.
             self.num_threads.generate_disposal_code(code)
             self.num_threads.free_temps(code)
 
-        # Firstly, always prefer errors over returning, continue or break 
-        if self.error_label_used: 
+        # Firstly, always prefer errors over returning, continue or break
+        if self.error_label_used:
             c.putln("const char *%s = NULL; int %s = 0, %s = 0;" % self.parallel_pos_info)
             c.putln("PyObject *%s = NULL, *%s = NULL, *%s = NULL;" % self.parallel_exc)
- 
-            code.putln( 
-                "if (%s) {" % Naming.parallel_exc_type) 
-            code.putln("/* This may have been overridden by a continue, " 
-                       "break or return in another thread. Prefer the error. */") 
-            code.putln("%s = 4;" % Naming.parallel_why) 
-            code.putln( 
-                "}") 
- 
-        if continue_: 
-            any_label_used = self.any_label_used 
-        else: 
-            any_label_used = self.breaking_label_used 
- 
-        if any_label_used: 
-            # __pyx_parallel_why is used, declare and initialize 
-            c.putln("int %s;" % Naming.parallel_why) 
-            c.putln("%s = 0;" % Naming.parallel_why) 
- 
-            code.putln( 
-                "if (%s) {" % Naming.parallel_why) 
- 
-            for temp_cname, private_cname in self.parallel_private_temps: 
-                code.putln("%s = %s;" % (private_cname, temp_cname)) 
- 
-            code.putln("switch (%s) {" % Naming.parallel_why) 
-            if continue_: 
-                code.put("    case 1: ") 
-                code.put_goto(code.continue_label) 
- 
-            if break_: 
-                code.put("    case 2: ") 
-                code.put_goto(code.break_label) 
- 
+
+            code.putln(
+                "if (%s) {" % Naming.parallel_exc_type)
+            code.putln("/* This may have been overridden by a continue, "
+                       "break or return in another thread. Prefer the error. */")
+            code.putln("%s = 4;" % Naming.parallel_why)
+            code.putln(
+                "}")
+
+        if continue_:
+            any_label_used = self.any_label_used
+        else:
+            any_label_used = self.breaking_label_used
+
+        if any_label_used:
+            # __pyx_parallel_why is used, declare and initialize
+            c.putln("int %s;" % Naming.parallel_why)
+            c.putln("%s = 0;" % Naming.parallel_why)
+
+            code.putln(
+                "if (%s) {" % Naming.parallel_why)
+
+            for temp_cname, private_cname in self.parallel_private_temps:
+                code.putln("%s = %s;" % (private_cname, temp_cname))
+
+            code.putln("switch (%s) {" % Naming.parallel_why)
+            if continue_:
+                code.put("    case 1: ")
+                code.put_goto(code.continue_label)
+
+            if break_:
+                code.put("    case 2: ")
+                code.put_goto(code.break_label)
+
             if return_:
                 code.put("    case 3: ")
                 code.put_goto(code.return_label)
- 
-            if self.error_label_used: 
-                code.globalstate.use_utility_code(restore_exception_utility_code) 
-                code.putln("    case 4:") 
-                self.restore_parallel_exception(code) 
-                code.put_goto(code.error_label) 
- 
-            code.putln("}") # end switch 
-            code.putln( 
-                "}") # end if 
- 
-        code.end_block() # end parallel control flow block 
-        self.redef_builtin_expect_apple_gcc_bug(code) 
- 
-    # FIXME: improve with version number for OS X Lion 
-    buggy_platform_macro_condition = "(defined(__APPLE__) || defined(__OSX__))" 
-    have_expect_condition = "(defined(__GNUC__) && " \ 
-                             "(__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))))" 
-    redef_condition = "(%s && %s)" % (buggy_platform_macro_condition, have_expect_condition) 
- 
-    def undef_builtin_expect_apple_gcc_bug(self, code): 
-        """ 
-        A bug on OS X Lion disallows __builtin_expect macros. This code avoids them 
-        """ 
-        if not self.parent: 
-            code.undef_builtin_expect(self.redef_condition) 
- 
-    def redef_builtin_expect_apple_gcc_bug(self, code): 
-        if not self.parent: 
-            code.redef_builtin_expect(self.redef_condition) 
- 
- 
-class ParallelWithBlockNode(ParallelStatNode): 
-    """ 
-    This node represents a 'with cython.parallel.parallel():' block 
-    """ 
- 
-    valid_keyword_arguments = ['num_threads'] 
- 
-    num_threads = None 
- 
-    def analyse_declarations(self, env): 
-        super(ParallelWithBlockNode, self).analyse_declarations(env) 
-        if self.args: 
-            error(self.pos, "cython.parallel.parallel() does not take " 
-                            "positional arguments") 
- 
-    def generate_execution_code(self, code): 
-        self.declare_closure_privates(code) 
-        self.setup_parallel_control_flow_block(code) 
- 
-        code.putln("#ifdef _OPENMP") 
-        code.put("#pragma omp parallel ") 
- 
-        if self.privates: 
-            privates = [e.cname for e in self.privates 
+
+            if self.error_label_used:
+                code.globalstate.use_utility_code(restore_exception_utility_code)
+                code.putln("    case 4:")
+                self.restore_parallel_exception(code)
+                code.put_goto(code.error_label)
+
+            code.putln("}") # end switch
+            code.putln(
+                "}") # end if
+
+        code.end_block() # end parallel control flow block
+        self.redef_builtin_expect_apple_gcc_bug(code)
+
+    # FIXME: improve with version number for OS X Lion
+    buggy_platform_macro_condition = "(defined(__APPLE__) || defined(__OSX__))"
+    have_expect_condition = "(defined(__GNUC__) && " \
+                             "(__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))))"
+    redef_condition = "(%s && %s)" % (buggy_platform_macro_condition, have_expect_condition)
+
+    def undef_builtin_expect_apple_gcc_bug(self, code):
+        """
+        A bug on OS X Lion disallows __builtin_expect macros. This code avoids them
+        """
+        if not self.parent:
+            code.undef_builtin_expect(self.redef_condition)
+
+    def redef_builtin_expect_apple_gcc_bug(self, code):
+        if not self.parent:
+            code.redef_builtin_expect(self.redef_condition)
+
+
+class ParallelWithBlockNode(ParallelStatNode):
+    """
+    This node represents a 'with cython.parallel.parallel():' block
+    """
+
+    valid_keyword_arguments = ['num_threads']
+
+    num_threads = None
+
+    def analyse_declarations(self, env):
+        super(ParallelWithBlockNode, self).analyse_declarations(env)
+        if self.args:
+            error(self.pos, "cython.parallel.parallel() does not take "
+                            "positional arguments")
+
+    def generate_execution_code(self, code):
+        self.declare_closure_privates(code)
+        self.setup_parallel_control_flow_block(code)
+
+        code.putln("#ifdef _OPENMP")
+        code.put("#pragma omp parallel ")
+
+        if self.privates:
+            privates = [e.cname for e in self.privates
                         if not e.type.is_pyobject]
             code.put('private(%s)' % ', '.join(sorted(privates)))
- 
-        self.privatization_insertion_point = code.insertion_point() 
-        self.put_num_threads(code) 
-        code.putln("") 
- 
-        code.putln("#endif /* _OPENMP */") 
- 
+
+        self.privatization_insertion_point = code.insertion_point()
+        self.put_num_threads(code)
+        code.putln("")
+
+        code.putln("#endif /* _OPENMP */")
+
         code.begin_block()  # parallel block
-        self.begin_parallel_block(code) 
-        self.initialize_privates_to_nan(code) 
-        code.funcstate.start_collecting_temps() 
-        self.body.generate_execution_code(code) 
-        self.trap_parallel_exit(code) 
-        self.privatize_temps(code) 
-        self.end_parallel_block(code) 
+        self.begin_parallel_block(code)
+        self.initialize_privates_to_nan(code)
+        code.funcstate.start_collecting_temps()
+        self.body.generate_execution_code(code)
+        self.trap_parallel_exit(code)
+        self.privatize_temps(code)
+        self.end_parallel_block(code)
         code.end_block()  # end parallel block
- 
-        continue_ = code.label_used(code.continue_label) 
-        break_ = code.label_used(code.break_label) 
+
+        continue_ = code.label_used(code.continue_label)
+        break_ = code.label_used(code.break_label)
         return_ = code.label_used(code.return_label)
- 
-        self.restore_labels(code) 
-        self.end_parallel_control_flow_block(code, break_=break_, 
+
+        self.restore_labels(code)
+        self.end_parallel_control_flow_block(code, break_=break_,
                                              continue_=continue_,
                                              return_=return_)
-        self.release_closure_privates(code) 
- 
- 
-class ParallelRangeNode(ParallelStatNode): 
-    """ 
-    This node represents a 'for i in cython.parallel.prange():' construct. 
- 
-    target       NameNode       the target iteration variable 
-    else_clause  Node or None   the else clause of this loop 
-    """ 
- 
-    child_attrs = ['body', 'target', 'else_clause', 'args', 'num_threads', 
-                   'chunksize'] 
- 
-    body = target = else_clause = args = None 
- 
-    start = stop = step = None 
- 
-    is_prange = True 
- 
-    nogil = None 
-    schedule = None 
- 
-    valid_keyword_arguments = ['schedule', 'nogil', 'num_threads', 'chunksize'] 
- 
-    def __init__(self, pos, **kwds): 
-        super(ParallelRangeNode, self).__init__(pos, **kwds) 
-        # Pretend to be a ForInStatNode for control flow analysis 
-        self.iterator = PassStatNode(pos) 
- 
-    def analyse_declarations(self, env): 
-        super(ParallelRangeNode, self).analyse_declarations(env) 
-        self.target.analyse_target_declaration(env) 
-        if self.else_clause is not None: 
-            self.else_clause.analyse_declarations(env) 
- 
-        if not self.args or len(self.args) > 3: 
-            error(self.pos, "Invalid number of positional arguments to prange") 
-            return 
- 
-        if len(self.args) == 1: 
-            self.stop, = self.args 
-        elif len(self.args) == 2: 
-            self.start, self.stop = self.args 
-        else: 
-            self.start, self.stop, self.step = self.args 
- 
-        if hasattr(self.schedule, 'decode'): 
-            self.schedule = self.schedule.decode('ascii') 
- 
+        self.release_closure_privates(code)
+
+
+class ParallelRangeNode(ParallelStatNode):
+    """
+    This node represents a 'for i in cython.parallel.prange():' construct.
+
+    target       NameNode       the target iteration variable
+    else_clause  Node or None   the else clause of this loop
+    """
+
+    child_attrs = ['body', 'target', 'else_clause', 'args', 'num_threads',
+                   'chunksize']
+
+    body = target = else_clause = args = None
+
+    start = stop = step = None
+
+    is_prange = True
+
+    nogil = None
+    schedule = None
+
+    valid_keyword_arguments = ['schedule', 'nogil', 'num_threads', 'chunksize']
+
+    def __init__(self, pos, **kwds):
+        super(ParallelRangeNode, self).__init__(pos, **kwds)
+        # Pretend to be a ForInStatNode for control flow analysis
+        self.iterator = PassStatNode(pos)
+
+    def analyse_declarations(self, env):
+        super(ParallelRangeNode, self).analyse_declarations(env)
+        self.target.analyse_target_declaration(env)
+        if self.else_clause is not None:
+            self.else_clause.analyse_declarations(env)
+
+        if not self.args or len(self.args) > 3:
+            error(self.pos, "Invalid number of positional arguments to prange")
+            return
+
+        if len(self.args) == 1:
+            self.stop, = self.args
+        elif len(self.args) == 2:
+            self.start, self.stop = self.args
+        else:
+            self.start, self.stop, self.step = self.args
+
+        if hasattr(self.schedule, 'decode'):
+            self.schedule = self.schedule.decode('ascii')
+
         if self.schedule not in (None, 'static', 'dynamic', 'guided', 'runtime'):
             error(self.pos, "Invalid schedule argument to prange: %s" % (self.schedule,))
- 
-    def analyse_expressions(self, env): 
-        was_nogil = env.nogil 
-        if self.nogil: 
-            env.nogil = True 
- 
-        if self.target is None: 
-            error(self.pos, "prange() can only be used as part of a for loop") 
-            return self 
- 
-        self.target = self.target.analyse_target_types(env) 
- 
-        if not self.target.type.is_numeric: 
-            # Not a valid type, assume one for now anyway 
- 
-            if not self.target.type.is_pyobject: 
-                # nogil_check will catch the is_pyobject case 
-                error(self.target.pos, 
-                      "Must be of numeric type, not %s" % self.target.type) 
- 
-            self.index_type = PyrexTypes.c_py_ssize_t_type 
-        else: 
-            self.index_type = self.target.type 
- 
-        # Setup start, stop and step, allocating temps if needed 
-        self.names = 'start', 'stop', 'step' 
-        start_stop_step = self.start, self.stop, self.step 
- 
-        for node, name in zip(start_stop_step, self.names): 
-            if node is not None: 
-                node.analyse_types(env) 
-                if not node.type.is_numeric: 
-                    error(node.pos, "%s argument must be numeric" % name) 
-                    continue 
- 
-                if not node.is_literal: 
-                    node = node.coerce_to_temp(env) 
-                    setattr(self, name, node) 
- 
-                # As we range from 0 to nsteps, computing the index along the 
-                # way, we need a fitting type for 'i' and 'nsteps' 
-                self.index_type = PyrexTypes.widest_numeric_type( 
+
+    def analyse_expressions(self, env):
+        was_nogil = env.nogil
+        if self.nogil:
+            env.nogil = True
+
+        if self.target is None:
+            error(self.pos, "prange() can only be used as part of a for loop")
+            return self
+
+        self.target = self.target.analyse_target_types(env)
+
+        if not self.target.type.is_numeric:
+            # Not a valid type, assume one for now anyway
+
+            if not self.target.type.is_pyobject:
+                # nogil_check will catch the is_pyobject case
+                error(self.target.pos,
+                      "Must be of numeric type, not %s" % self.target.type)
+
+            self.index_type = PyrexTypes.c_py_ssize_t_type
+        else:
+            self.index_type = self.target.type
+
+        # Setup start, stop and step, allocating temps if needed
+        self.names = 'start', 'stop', 'step'
+        start_stop_step = self.start, self.stop, self.step
+
+        for node, name in zip(start_stop_step, self.names):
+            if node is not None:
+                node.analyse_types(env)
+                if not node.type.is_numeric:
+                    error(node.pos, "%s argument must be numeric" % name)
+                    continue
+
+                if not node.is_literal:
+                    node = node.coerce_to_temp(env)
+                    setattr(self, name, node)
+
+                # As we range from 0 to nsteps, computing the index along the
+                # way, we need a fitting type for 'i' and 'nsteps'
+                self.index_type = PyrexTypes.widest_numeric_type(
                     self.index_type, node.type)
- 
-        if self.else_clause is not None: 
-            self.else_clause = self.else_clause.analyse_expressions(env) 
- 
-        # Although not actually an assignment in this scope, it should be 
-        # treated as such to ensure it is unpacked if a closure temp, and to 
-        # ensure lastprivate behaviour and propagation. If the target index is 
-        # not a NameNode, it won't have an entry, and an error was issued by 
-        # ParallelRangeTransform 
-        if hasattr(self.target, 'entry'): 
-            self.assignments[self.target.entry] = self.target.pos, None 
- 
-        node = super(ParallelRangeNode, self).analyse_expressions(env) 
- 
-        if node.chunksize: 
-            if not node.schedule: 
-                error(node.chunksize.pos, 
-                      "Must provide schedule with chunksize") 
-            elif node.schedule == 'runtime': 
-                error(node.chunksize.pos, 
-                      "Chunksize not valid for the schedule runtime") 
-            elif (node.chunksize.type.is_int and 
-                  node.chunksize.is_literal and 
-                  node.chunksize.compile_time_value(env) <= 0): 
-                error(node.chunksize.pos, "Chunksize must not be negative") 
- 
-            node.chunksize = node.chunksize.coerce_to( 
-                PyrexTypes.c_int_type, env).coerce_to_temp(env) 
- 
-        if node.nogil: 
-            env.nogil = was_nogil 
- 
-        node.is_nested_prange = node.parent and node.parent.is_prange 
-        if node.is_nested_prange: 
-            parent = node 
-            while parent.parent and parent.parent.is_prange: 
-                parent = parent.parent 
- 
-            parent.assignments.update(node.assignments) 
-            parent.privates.update(node.privates) 
-            parent.assigned_nodes.extend(node.assigned_nodes) 
-        return node 
- 
-    def nogil_check(self, env): 
-        names = 'start', 'stop', 'step', 'target' 
-        nodes = self.start, self.stop, self.step, self.target 
-        for name, node in zip(names, nodes): 
-            if node is not None and node.type.is_pyobject: 
-                error(node.pos, "%s may not be a Python object " 
-                                "as we don't have the GIL" % name) 
- 
-    def generate_execution_code(self, code): 
-        """ 
-        Generate code in the following steps 
- 
-            1)  copy any closure variables determined thread-private 
-                into temporaries 
- 
-            2)  allocate temps for start, stop and step 
- 
-            3)  generate a loop that calculates the total number of steps, 
-                which then computes the target iteration variable for every step: 
- 
-                    for i in prange(start, stop, step): 
-                        ... 
- 
-                becomes 
- 
-                    nsteps = (stop - start) / step; 
-                    i = start; 
- 
-                    #pragma omp parallel for lastprivate(i) 
-                    for (temp = 0; temp < nsteps; temp++) { 
-                        i = start + step * temp; 
-                        ... 
-                    } 
- 
-                Note that accumulation of 'i' would have a data dependency 
-                between iterations. 
- 
-                Also, you can't do this 
- 
-                    for (i = start; i < stop; i += step) 
-                        ... 
- 
-                as the '<' operator should become '>' for descending loops. 
-                'for i from x < i < y:' does not suffer from this problem 
-                as the relational operator is known at compile time! 
- 
-            4) release our temps and write back any private closure variables 
-        """ 
-        self.declare_closure_privates(code) 
- 
-        # This can only be a NameNode 
-        target_index_cname = self.target.entry.cname 
- 
-        # This will be used as the dict to format our code strings, holding 
-        # the start, stop , step, temps and target cnames 
-        fmt_dict = { 
-            'target': target_index_cname, 
+
+        if self.else_clause is not None:
+            self.else_clause = self.else_clause.analyse_expressions(env)
+
+        # Although not actually an assignment in this scope, it should be
+        # treated as such to ensure it is unpacked if a closure temp, and to
+        # ensure lastprivate behaviour and propagation. If the target index is
+        # not a NameNode, it won't have an entry, and an error was issued by
+        # ParallelRangeTransform
+        if hasattr(self.target, 'entry'):
+            self.assignments[self.target.entry] = self.target.pos, None
+
+        node = super(ParallelRangeNode, self).analyse_expressions(env)
+
+        if node.chunksize:
+            if not node.schedule:
+                error(node.chunksize.pos,
+                      "Must provide schedule with chunksize")
+            elif node.schedule == 'runtime':
+                error(node.chunksize.pos,
+                      "Chunksize not valid for the schedule runtime")
+            elif (node.chunksize.type.is_int and
+                  node.chunksize.is_literal and
+                  node.chunksize.compile_time_value(env) <= 0):
+                error(node.chunksize.pos, "Chunksize must not be negative")
+
+            node.chunksize = node.chunksize.coerce_to(
+                PyrexTypes.c_int_type, env).coerce_to_temp(env)
+
+        if node.nogil:
+            env.nogil = was_nogil
+
+        node.is_nested_prange = node.parent and node.parent.is_prange
+        if node.is_nested_prange:
+            parent = node
+            while parent.parent and parent.parent.is_prange:
+                parent = parent.parent
+
+            parent.assignments.update(node.assignments)
+            parent.privates.update(node.privates)
+            parent.assigned_nodes.extend(node.assigned_nodes)
+        return node
+
+    def nogil_check(self, env):
+        names = 'start', 'stop', 'step', 'target'
+        nodes = self.start, self.stop, self.step, self.target
+        for name, node in zip(names, nodes):
+            if node is not None and node.type.is_pyobject:
+                error(node.pos, "%s may not be a Python object "
+                                "as we don't have the GIL" % name)
+
+    def generate_execution_code(self, code):
+        """
+        Generate code in the following steps
+
+            1)  copy any closure variables determined thread-private
+                into temporaries
+
+            2)  allocate temps for start, stop and step
+
+            3)  generate a loop that calculates the total number of steps,
+                which then computes the target iteration variable for every step:
+
+                    for i in prange(start, stop, step):
+                        ...
+
+                becomes
+
+                    nsteps = (stop - start) / step;
+                    i = start;
+
+                    #pragma omp parallel for lastprivate(i)
+                    for (temp = 0; temp < nsteps; temp++) {
+                        i = start + step * temp;
+                        ...
+                    }
+
+                Note that accumulation of 'i' would have a data dependency
+                between iterations.
+
+                Also, you can't do this
+
+                    for (i = start; i < stop; i += step)
+                        ...
+
+                as the '<' operator should become '>' for descending loops.
+                'for i from x < i < y:' does not suffer from this problem
+                as the relational operator is known at compile time!
+
+            4) release our temps and write back any private closure variables
+        """
+        self.declare_closure_privates(code)
+
+        # This can only be a NameNode
+        target_index_cname = self.target.entry.cname
+
+        # This will be used as the dict to format our code strings, holding
+        # the start, stop , step, temps and target cnames
+        fmt_dict = {
+            'target': target_index_cname,
             'target_type': self.target.type.empty_declaration_code()
-        } 
- 
-        # Setup start, stop and step, allocating temps if needed 
-        start_stop_step = self.start, self.stop, self.step 
-        defaults = '0', '0', '1' 
-        for node, name, default in zip(start_stop_step, self.names, defaults): 
-            if node is None: 
-                result = default 
-            elif node.is_literal: 
-                result = node.get_constant_c_result_code() 
-            else: 
-                node.generate_evaluation_code(code) 
-                result = node.result() 
- 
-            fmt_dict[name] = result 
- 
-        fmt_dict['i'] = code.funcstate.allocate_temp(self.index_type, False) 
-        fmt_dict['nsteps'] = code.funcstate.allocate_temp(self.index_type, False) 
- 
-        # TODO: check if the step is 0 and if so, raise an exception in a 
-        # 'with gil' block. For now, just abort 
+        }
+
+        # Setup start, stop and step, allocating temps if needed
+        start_stop_step = self.start, self.stop, self.step
+        defaults = '0', '0', '1'
+        for node, name, default in zip(start_stop_step, self.names, defaults):
+            if node is None:
+                result = default
+            elif node.is_literal:
+                result = node.get_constant_c_result_code()
+            else:
+                node.generate_evaluation_code(code)
+                result = node.result()
+
+            fmt_dict[name] = result
+
+        fmt_dict['i'] = code.funcstate.allocate_temp(self.index_type, False)
+        fmt_dict['nsteps'] = code.funcstate.allocate_temp(self.index_type, False)
+
+        # TODO: check if the step is 0 and if so, raise an exception in a
+        # 'with gil' block. For now, just abort
         code.putln("if ((%(step)s == 0)) abort();" % fmt_dict)
- 
-        self.setup_parallel_control_flow_block(code) # parallel control flow block 
- 
-        # Note: nsteps is private in an outer scope if present 
+
+        self.setup_parallel_control_flow_block(code) # parallel control flow block
+
+        # Note: nsteps is private in an outer scope if present
         code.putln("%(nsteps)s = (%(stop)s - %(start)s + %(step)s - %(step)s/abs(%(step)s)) / %(step)s;" % fmt_dict)
- 
-        # The target iteration variable might not be initialized, do it only if 
-        # we are executing at least 1 iteration, otherwise we should leave the 
-        # target unaffected. The target iteration variable is firstprivate to 
-        # shut up compiler warnings caused by lastprivate, as the compiler 
-        # erroneously believes that nsteps may be <= 0, leaving the private 
-        # target index uninitialized 
-        code.putln("if (%(nsteps)s > 0)" % fmt_dict) 
-        code.begin_block() # if block 
-        self.generate_loop(code, fmt_dict) 
-        code.end_block() # end if block 
- 
-        self.restore_labels(code) 
- 
-        if self.else_clause: 
-            if self.breaking_label_used: 
-                code.put("if (%s < 2)" % Naming.parallel_why) 
- 
-            code.begin_block() # else block 
-            code.putln("/* else */") 
-            self.else_clause.generate_execution_code(code) 
-            code.end_block() # end else block 
- 
-        # ------ cleanup ------ 
-        self.end_parallel_control_flow_block(code) # end parallel control flow block 
- 
-        # And finally, release our privates and write back any closure 
-        # variables 
+
+        # The target iteration variable might not be initialized, do it only if
+        # we are executing at least 1 iteration, otherwise we should leave the
+        # target unaffected. The target iteration variable is firstprivate to
+        # shut up compiler warnings caused by lastprivate, as the compiler
+        # erroneously believes that nsteps may be <= 0, leaving the private
+        # target index uninitialized
+        code.putln("if (%(nsteps)s > 0)" % fmt_dict)
+        code.begin_block() # if block
+        self.generate_loop(code, fmt_dict)
+        code.end_block() # end if block
+
+        self.restore_labels(code)
+
+        if self.else_clause:
+            if self.breaking_label_used:
+                code.put("if (%s < 2)" % Naming.parallel_why)
+
+            code.begin_block() # else block
+            code.putln("/* else */")
+            self.else_clause.generate_execution_code(code)
+            code.end_block() # end else block
+
+        # ------ cleanup ------
+        self.end_parallel_control_flow_block(code) # end parallel control flow block
+
+        # And finally, release our privates and write back any closure
+        # variables
         for temp in start_stop_step + (self.chunksize,):
-            if temp is not None: 
-                temp.generate_disposal_code(code) 
-                temp.free_temps(code) 
- 
-        code.funcstate.release_temp(fmt_dict['i']) 
-        code.funcstate.release_temp(fmt_dict['nsteps']) 
- 
-        self.release_closure_privates(code) 
- 
-    def generate_loop(self, code, fmt_dict): 
-        if self.is_nested_prange: 
-            code.putln("#if 0") 
-        else: 
-            code.putln("#ifdef _OPENMP") 
- 
-        if not self.is_parallel: 
-            code.put("#pragma omp for") 
-            self.privatization_insertion_point = code.insertion_point() 
-            reduction_codepoint = self.parent.privatization_insertion_point 
-        else: 
-            code.put("#pragma omp parallel") 
-            self.privatization_insertion_point = code.insertion_point() 
-            reduction_codepoint = self.privatization_insertion_point 
-            code.putln("") 
-            code.putln("#endif /* _OPENMP */") 
- 
-            code.begin_block() # pragma omp parallel begin block 
- 
-            # Initialize the GIL if needed for this thread 
-            self.begin_parallel_block(code) 
- 
-            if self.is_nested_prange: 
-                code.putln("#if 0") 
-            else: 
-                code.putln("#ifdef _OPENMP") 
-            code.put("#pragma omp for") 
- 
+            if temp is not None:
+                temp.generate_disposal_code(code)
+                temp.free_temps(code)
+
+        code.funcstate.release_temp(fmt_dict['i'])
+        code.funcstate.release_temp(fmt_dict['nsteps'])
+
+        self.release_closure_privates(code)
+
+    def generate_loop(self, code, fmt_dict):
+        if self.is_nested_prange:
+            code.putln("#if 0")
+        else:
+            code.putln("#ifdef _OPENMP")
+
+        if not self.is_parallel:
+            code.put("#pragma omp for")
+            self.privatization_insertion_point = code.insertion_point()
+            reduction_codepoint = self.parent.privatization_insertion_point
+        else:
+            code.put("#pragma omp parallel")
+            self.privatization_insertion_point = code.insertion_point()
+            reduction_codepoint = self.privatization_insertion_point
+            code.putln("")
+            code.putln("#endif /* _OPENMP */")
+
+            code.begin_block() # pragma omp parallel begin block
+
+            # Initialize the GIL if needed for this thread
+            self.begin_parallel_block(code)
+
+            if self.is_nested_prange:
+                code.putln("#if 0")
+            else:
+                code.putln("#ifdef _OPENMP")
+            code.put("#pragma omp for")
+
         for entry, (op, lastprivate) in sorted(self.privates.items()):
-            # Don't declare the index variable as a reduction 
-            if op and op in "+*-&^|" and entry != self.target.entry: 
-                if entry.type.is_pyobject: 
-                    error(self.pos, "Python objects cannot be reductions") 
-                else: 
-                    #code.put(" reduction(%s:%s)" % (op, entry.cname)) 
-                    # This is the only way reductions + nesting works in gcc4.5 
-                    reduction_codepoint.put( 
-                                " reduction(%s:%s)" % (op, entry.cname)) 
-            else: 
-                if entry == self.target.entry: 
-                    code.put(" firstprivate(%s)" % entry.cname) 
-                    code.put(" lastprivate(%s)" % entry.cname) 
-                    continue 
- 
-                if not entry.type.is_pyobject: 
-                    if lastprivate: 
-                        private = 'lastprivate' 
-                    else: 
-                        private = 'private' 
- 
-                    code.put(" %s(%s)" % (private, entry.cname)) 
- 
-        if self.schedule: 
-            if self.chunksize: 
+            # Don't declare the index variable as a reduction
+            if op and op in "+*-&^|" and entry != self.target.entry:
+                if entry.type.is_pyobject:
+                    error(self.pos, "Python objects cannot be reductions")
+                else:
+                    #code.put(" reduction(%s:%s)" % (op, entry.cname))
+                    # This is the only way reductions + nesting works in gcc4.5
+                    reduction_codepoint.put(
+                                " reduction(%s:%s)" % (op, entry.cname))
+            else:
+                if entry == self.target.entry:
+                    code.put(" firstprivate(%s)" % entry.cname)
+                    code.put(" lastprivate(%s)" % entry.cname)
+                    continue
+
+                if not entry.type.is_pyobject:
+                    if lastprivate:
+                        private = 'lastprivate'
+                    else:
+                        private = 'private'
+
+                    code.put(" %s(%s)" % (private, entry.cname))
+
+        if self.schedule:
+            if self.chunksize:
                 chunksize = ", %s" % self.evaluate_before_block(code, self.chunksize)
-            else: 
-                chunksize = "" 
- 
-            code.put(" schedule(%s%s)" % (self.schedule, chunksize)) 
- 
-        self.put_num_threads(reduction_codepoint) 
- 
-        code.putln("") 
-        code.putln("#endif /* _OPENMP */") 
- 
-        code.put("for (%(i)s = 0; %(i)s < %(nsteps)s; %(i)s++)" % fmt_dict) 
+            else:
+                chunksize = ""
+
+            code.put(" schedule(%s%s)" % (self.schedule, chunksize))
+
+        self.put_num_threads(reduction_codepoint)
+
+        code.putln("")
+        code.putln("#endif /* _OPENMP */")
+
+        code.put("for (%(i)s = 0; %(i)s < %(nsteps)s; %(i)s++)" % fmt_dict)
         code.begin_block()  # for loop block
- 
-        guard_around_body_codepoint = code.insertion_point() 
- 
-        # Start if guard block around the body. This may be unnecessary, but 
-        # at least it doesn't spoil indentation 
-        code.begin_block() 
- 
+
+        guard_around_body_codepoint = code.insertion_point()
+
+        # Start if guard block around the body. This may be unnecessary, but
+        # at least it doesn't spoil indentation
+        code.begin_block()
+
         code.putln("%(target)s = (%(target_type)s)(%(start)s + %(step)s * %(i)s);" % fmt_dict)
-        self.initialize_privates_to_nan(code, exclude=self.target.entry) 
- 
+        self.initialize_privates_to_nan(code, exclude=self.target.entry)
+
         if self.is_parallel and not self.is_nested_prange:
             # nested pranges are not omp'ified, temps go to outer loops
-            code.funcstate.start_collecting_temps() 
- 
-        self.body.generate_execution_code(code) 
-        self.trap_parallel_exit(code, should_flush=True) 
+            code.funcstate.start_collecting_temps()
+
+        self.body.generate_execution_code(code)
+        self.trap_parallel_exit(code, should_flush=True)
         if self.is_parallel and not self.is_nested_prange:
             # nested pranges are not omp'ified, temps go to outer loops
             self.privatize_temps(code)
- 
-        if self.breaking_label_used: 
-            # Put a guard around the loop body in case return, break or 
-            # exceptions might be used 
-            guard_around_body_codepoint.putln("if (%s < 2)" % Naming.parallel_why) 
- 
+
+        if self.breaking_label_used:
+            # Put a guard around the loop body in case return, break or
+            # exceptions might be used
+            guard_around_body_codepoint.putln("if (%s < 2)" % Naming.parallel_why)
+
         code.end_block()  # end guard around loop body
         code.end_block()  # end for loop block
- 
-        if self.is_parallel: 
-            # Release the GIL and deallocate the thread state 
-            self.end_parallel_block(code) 
+
+        if self.is_parallel:
+            # Release the GIL and deallocate the thread state
+            self.end_parallel_block(code)
             code.end_block()  # pragma omp parallel end block
- 
- 
-class CnameDecoratorNode(StatNode): 
-    """ 
-    This node is for the cname decorator in CythonUtilityCode: 
- 
-        @cname('the_cname') 
-        cdef func(...): 
-            ... 
- 
-    In case of a cdef class the cname specifies the objstruct_cname. 
- 
-    node        the node to which the cname decorator is applied 
-    cname       the cname the node should get 
-    """ 
- 
-    child_attrs = ['node'] 
- 
-    def analyse_declarations(self, env): 
-        self.node.analyse_declarations(env) 
- 
-        node = self.node 
-        if isinstance(node, CompilerDirectivesNode): 
-            node = node.body.stats[0] 
- 
-        self.is_function = isinstance(node, FuncDefNode) 
+
+
+class CnameDecoratorNode(StatNode):
+    """
+    This node is for the cname decorator in CythonUtilityCode:
+
+        @cname('the_cname')
+        cdef func(...):
+            ...
+
+    In case of a cdef class the cname specifies the objstruct_cname.
+
+    node        the node to which the cname decorator is applied
+    cname       the cname the node should get
+    """
+
+    child_attrs = ['node']
+
+    def analyse_declarations(self, env):
+        self.node.analyse_declarations(env)
+
+        node = self.node
+        if isinstance(node, CompilerDirectivesNode):
+            node = node.body.stats[0]
+
+        self.is_function = isinstance(node, FuncDefNode)
         is_struct_or_enum = isinstance(node, (CStructOrUnionDefNode, CEnumDefNode))
-        e = node.entry 
- 
-        if self.is_function: 
-            e.cname = self.cname 
-            e.func_cname = self.cname 
-            e.used = True 
-            if e.pyfunc_cname and '.' in e.pyfunc_cname: 
-                e.pyfunc_cname = self.mangle(e.pyfunc_cname) 
-        elif is_struct_or_enum: 
-            e.cname = e.type.cname = self.cname 
-        else: 
-            scope = node.scope 
- 
-            e.cname = self.cname 
-            e.type.objstruct_cname = self.cname + '_obj' 
-            e.type.typeobj_cname = Naming.typeobj_prefix + self.cname 
-            e.type.typeptr_cname = self.cname + '_type' 
-            e.type.scope.namespace_cname = e.type.typeptr_cname 
- 
+        e = node.entry
+
+        if self.is_function:
+            e.cname = self.cname
+            e.func_cname = self.cname
+            e.used = True
+            if e.pyfunc_cname and '.' in e.pyfunc_cname:
+                e.pyfunc_cname = self.mangle(e.pyfunc_cname)
+        elif is_struct_or_enum:
+            e.cname = e.type.cname = self.cname
+        else:
+            scope = node.scope
+
+            e.cname = self.cname
+            e.type.objstruct_cname = self.cname + '_obj'
+            e.type.typeobj_cname = Naming.typeobj_prefix + self.cname
+            e.type.typeptr_cname = self.cname + '_type'
+            e.type.scope.namespace_cname = e.type.typeptr_cname
+
             e.as_variable.cname = e.type.typeptr_cname
- 
-            scope.scope_prefix = self.cname + "_" 
- 
+
+            scope.scope_prefix = self.cname + "_"
+
             for name, entry in scope.entries.items():
-                if entry.func_cname: 
-                    entry.func_cname = self.mangle(entry.cname) 
-                if entry.pyfunc_cname: 
-                    entry.pyfunc_cname = self.mangle(entry.pyfunc_cname) 
- 
-    def mangle(self, cname): 
-        if '.' in cname: 
-            # remove __pyx_base from func_cname 
-            cname = cname.split('.')[-1] 
-        return '%s_%s' % (self.cname, cname) 
- 
-    def analyse_expressions(self, env): 
-        self.node = self.node.analyse_expressions(env) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        "Ensure a prototype for every @cname method in the right place" 
-        if self.is_function and env.is_c_class_scope: 
-            # method in cdef class, generate a prototype in the header 
-            h_code = code.globalstate['utility_code_proto'] 
- 
-            if isinstance(self.node, DefNode): 
-                self.node.generate_function_header( 
+                if entry.func_cname:
+                    entry.func_cname = self.mangle(entry.cname)
+                if entry.pyfunc_cname:
+                    entry.pyfunc_cname = self.mangle(entry.pyfunc_cname)
+
+    def mangle(self, cname):
+        if '.' in cname:
+            # remove __pyx_base from func_cname
+            cname = cname.split('.')[-1]
+        return '%s_%s' % (self.cname, cname)
+
+    def analyse_expressions(self, env):
+        self.node = self.node.analyse_expressions(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        "Ensure a prototype for every @cname method in the right place"
+        if self.is_function and env.is_c_class_scope:
+            # method in cdef class, generate a prototype in the header
+            h_code = code.globalstate['utility_code_proto']
+
+            if isinstance(self.node, DefNode):
+                self.node.generate_function_header(
                     h_code, with_pymethdef=False, proto_only=True)
-            else: 
-                from . import ModuleNode 
-                entry = self.node.entry 
-                cname = entry.cname 
-                entry.cname = entry.func_cname 
- 
-                ModuleNode.generate_cfunction_declaration( 
+            else:
+                from . import ModuleNode
+                entry = self.node.entry
+                cname = entry.cname
+                entry.cname = entry.func_cname
+
+                ModuleNode.generate_cfunction_declaration(
                     entry,
                     env.global_scope(),
                     h_code,
                     definition=True)
- 
-                entry.cname = cname 
- 
-        self.node.generate_function_definitions(env, code) 
- 
-    def generate_execution_code(self, code): 
-        self.node.generate_execution_code(code) 
- 
- 
-#------------------------------------------------------------------------------------ 
-# 
-#  Runtime support code 
-# 
-#------------------------------------------------------------------------------------ 
- 
-if Options.gcc_branch_hints: 
-    branch_prediction_macros = """ 
-/* Test for GCC > 2.95 */ 
-#if defined(__GNUC__) \ 
-    && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) 
-  #define likely(x)   __builtin_expect(!!(x), 1) 
-  #define unlikely(x) __builtin_expect(!!(x), 0) 
-#else /* !__GNUC__ or GCC < 2.95 */ 
-  #define likely(x)   (x) 
-  #define unlikely(x) (x) 
-#endif /* __GNUC__ */ 
-""" 
-else: 
-    branch_prediction_macros = """ 
-#define likely(x)   (x) 
-#define unlikely(x) (x) 
-""" 
- 
-#------------------------------------------------------------------------------------ 
- 
-printing_utility_code = UtilityCode.load_cached("Print", "Printing.c") 
-printing_one_utility_code = UtilityCode.load_cached("PrintOne", "Printing.c") 
- 
-#------------------------------------------------------------------------------------ 
- 
-# Exception raising code 
-# 
-# Exceptions are raised by __Pyx_Raise() and stored as plain 
-# type/value/tb in PyThreadState->curexc_*.  When being caught by an 
-# 'except' statement, curexc_* is moved over to exc_* by 
-# __Pyx_GetException() 
- 
-restore_exception_utility_code = UtilityCode.load_cached("PyErrFetchRestore", "Exceptions.c") 
-raise_utility_code = UtilityCode.load_cached("RaiseException", "Exceptions.c") 
-get_exception_utility_code = UtilityCode.load_cached("GetException", "Exceptions.c") 
-swap_exception_utility_code = UtilityCode.load_cached("SwapException", "Exceptions.c") 
-reset_exception_utility_code = UtilityCode.load_cached("SaveResetException", "Exceptions.c") 
-traceback_utility_code = UtilityCode.load_cached("AddTraceback", "Exceptions.c") 
- 
-#------------------------------------------------------------------------------------ 
- 
+
+                entry.cname = cname
+
+        self.node.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
+        self.node.generate_execution_code(code)
+
+
+#------------------------------------------------------------------------------------
+#
+#  Runtime support code
+#
+#------------------------------------------------------------------------------------
+
+if Options.gcc_branch_hints:
+    branch_prediction_macros = """
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__) \
+    && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+  #define likely(x)   __builtin_expect(!!(x), 1)
+  #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+  #define likely(x)   (x)
+  #define unlikely(x) (x)
+#endif /* __GNUC__ */
+"""
+else:
+    branch_prediction_macros = """
+#define likely(x)   (x)
+#define unlikely(x) (x)
+"""
+
+#------------------------------------------------------------------------------------
+
+printing_utility_code = UtilityCode.load_cached("Print", "Printing.c")
+printing_one_utility_code = UtilityCode.load_cached("PrintOne", "Printing.c")
+
+#------------------------------------------------------------------------------------
+
+# Exception raising code
+#
+# Exceptions are raised by __Pyx_Raise() and stored as plain
+# type/value/tb in PyThreadState->curexc_*.  When being caught by an
+# 'except' statement, curexc_* is moved over to exc_* by
+# __Pyx_GetException()
+
+restore_exception_utility_code = UtilityCode.load_cached("PyErrFetchRestore", "Exceptions.c")
+raise_utility_code = UtilityCode.load_cached("RaiseException", "Exceptions.c")
+get_exception_utility_code = UtilityCode.load_cached("GetException", "Exceptions.c")
+swap_exception_utility_code = UtilityCode.load_cached("SwapException", "Exceptions.c")
+reset_exception_utility_code = UtilityCode.load_cached("SaveResetException", "Exceptions.c")
+traceback_utility_code = UtilityCode.load_cached("AddTraceback", "Exceptions.c")
+
+#------------------------------------------------------------------------------------
+
 get_exception_tuple_utility_code = UtilityCode(
     proto="""
 static PyObject *__Pyx_GetExceptionTuple(PyThreadState *__pyx_tstate); /*proto*/
-""", 
+""",
     # I doubt that calling __Pyx_GetException() here is correct as it moves
     # the exception from tstate->curexc_* to tstate->exc_*, which prevents
     # exception handlers later on from receiving it.
     # NOTE: "__pyx_tstate" may be used by __Pyx_GetException() macro
     impl = """
 static PyObject *__Pyx_GetExceptionTuple(CYTHON_UNUSED PyThreadState *__pyx_tstate) {
-    PyObject *type = NULL, *value = NULL, *tb = NULL; 
-    if (__Pyx_GetException(&type, &value, &tb) == 0) { 
-        PyObject* exc_info = PyTuple_New(3); 
-        if (exc_info) { 
-            Py_INCREF(type); 
-            Py_INCREF(value); 
-            Py_INCREF(tb); 
-            PyTuple_SET_ITEM(exc_info, 0, type); 
-            PyTuple_SET_ITEM(exc_info, 1, value); 
-            PyTuple_SET_ITEM(exc_info, 2, tb); 
-            return exc_info; 
-        } 
-    } 
-    return NULL; 
-} 
-""", 
+    PyObject *type = NULL, *value = NULL, *tb = NULL;
+    if (__Pyx_GetException(&type, &value, &tb) == 0) {
+        PyObject* exc_info = PyTuple_New(3);
+        if (exc_info) {
+            Py_INCREF(type);
+            Py_INCREF(value);
+            Py_INCREF(tb);
+            PyTuple_SET_ITEM(exc_info, 0, type);
+            PyTuple_SET_ITEM(exc_info, 1, value);
+            PyTuple_SET_ITEM(exc_info, 2, tb);
+            return exc_info;
+        }
+    }
+    return NULL;
+}
+""",
     requires=[get_exception_utility_code])
diff --git a/contrib/tools/cython/Cython/Compiler/Optimize.py b/contrib/tools/cython/Cython/Compiler/Optimize.py
index fa479644cd..3cb77efe2c 100644
--- a/contrib/tools/cython/Cython/Compiler/Optimize.py
+++ b/contrib/tools/cython/Cython/Compiler/Optimize.py
@@ -1,18 +1,18 @@
-from __future__ import absolute_import 
- 
+from __future__ import absolute_import
+
 import re
 import sys
 import copy
 import codecs
 import itertools
 
-from . import TypeSlots 
-from .ExprNodes import not_a_constant 
-import cython 
+from . import TypeSlots
+from .ExprNodes import not_a_constant
+import cython
 cython.declare(UtilityCode=object, EncodedString=object, bytes_literal=object, encoded_string=object,
-               Nodes=object, ExprNodes=object, PyrexTypes=object, Builtin=object, 
+               Nodes=object, ExprNodes=object, PyrexTypes=object, Builtin=object,
                UtilNodes=object, _py_int_types=object)
- 
+
 if sys.version_info[0] >= 3:
     _py_int_types = int
     _py_string_types = (bytes, str)
@@ -20,61 +20,61 @@ else:
     _py_int_types = (int, long)
     _py_string_types = (bytes, unicode)
 
-from . import Nodes 
-from . import ExprNodes 
-from . import PyrexTypes 
-from . import Visitor 
-from . import Builtin 
-from . import UtilNodes 
-from . import Options 
- 
+from . import Nodes
+from . import ExprNodes
+from . import PyrexTypes
+from . import Visitor
+from . import Builtin
+from . import UtilNodes
+from . import Options
+
 from .Code import UtilityCode, TempitaUtilityCode
 from .StringEncoding import EncodedString, bytes_literal, encoded_string
 from .Errors import error, warning
-from .ParseTreeTransforms import SkipDeclarations 
- 
-try: 
-    from __builtin__ import reduce 
-except ImportError: 
-    from functools import reduce 
- 
-try: 
-    from __builtin__ import basestring 
-except ImportError: 
-    basestring = str # Python 3 
- 
-
-def load_c_utility(name): 
-    return UtilityCode.load_cached(name, "Optimize.c") 
- 
-
-def unwrap_coerced_node(node, coercion_nodes=(ExprNodes.CoerceToPyTypeNode, ExprNodes.CoerceFromPyTypeNode)): 
-    if isinstance(node, coercion_nodes): 
-        return node.arg 
-    return node 
- 
-
-def unwrap_node(node): 
-    while isinstance(node, UtilNodes.ResultRefNode): 
-        node = node.expression 
-    return node 
- 
-
-def is_common_value(a, b): 
-    a = unwrap_node(a) 
-    b = unwrap_node(b) 
-    if isinstance(a, ExprNodes.NameNode) and isinstance(b, ExprNodes.NameNode): 
-        return a.name == b.name 
-    if isinstance(a, ExprNodes.AttributeNode) and isinstance(b, ExprNodes.AttributeNode): 
-        return not a.is_py_attr and is_common_value(a.obj, b.obj) and a.attribute == b.attribute 
-    return False 
- 
-
-def filter_none_node(node): 
-    if node is not None and node.constant_result is None: 
-        return None 
-    return node 
- 
+from .ParseTreeTransforms import SkipDeclarations
+
+try:
+    from __builtin__ import reduce
+except ImportError:
+    from functools import reduce
+
+try:
+    from __builtin__ import basestring
+except ImportError:
+    basestring = str # Python 3
+
+
+def load_c_utility(name):
+    return UtilityCode.load_cached(name, "Optimize.c")
+
+
+def unwrap_coerced_node(node, coercion_nodes=(ExprNodes.CoerceToPyTypeNode, ExprNodes.CoerceFromPyTypeNode)):
+    if isinstance(node, coercion_nodes):
+        return node.arg
+    return node
+
+
+def unwrap_node(node):
+    while isinstance(node, UtilNodes.ResultRefNode):
+        node = node.expression
+    return node
+
+
+def is_common_value(a, b):
+    a = unwrap_node(a)
+    b = unwrap_node(b)
+    if isinstance(a, ExprNodes.NameNode) and isinstance(b, ExprNodes.NameNode):
+        return a.name == b.name
+    if isinstance(a, ExprNodes.AttributeNode) and isinstance(b, ExprNodes.AttributeNode):
+        return not a.is_py_attr and is_common_value(a.obj, b.obj) and a.attribute == b.attribute
+    return False
+
+
+def filter_none_node(node):
+    if node is not None and node.constant_result is None:
+        return None
+    return node
+
 
 class _YieldNodeCollector(Visitor.TreeVisitor):
     """
@@ -129,66 +129,66 @@ def _find_yield_statements(node):
     return yield_statements
 
 
-class IterationTransform(Visitor.EnvTransform): 
-    """Transform some common for-in loop patterns into efficient C loops: 
- 
-    - for-in-dict loop becomes a while loop calling PyDict_Next() 
-    - for-in-enumerate is replaced by an external counter variable 
-    - for-in-range loop becomes a plain C for loop 
-    """ 
-    def visit_PrimaryCmpNode(self, node): 
-        if node.is_ptr_contains(): 
- 
-            # for t in operand2: 
-            #     if operand1 == t: 
-            #         res = True 
-            #         break 
-            # else: 
-            #     res = False 
- 
-            pos = node.pos 
-            result_ref = UtilNodes.ResultRefNode(node) 
+class IterationTransform(Visitor.EnvTransform):
+    """Transform some common for-in loop patterns into efficient C loops:
+
+    - for-in-dict loop becomes a while loop calling PyDict_Next()
+    - for-in-enumerate is replaced by an external counter variable
+    - for-in-range loop becomes a plain C for loop
+    """
+    def visit_PrimaryCmpNode(self, node):
+        if node.is_ptr_contains():
+
+            # for t in operand2:
+            #     if operand1 == t:
+            #         res = True
+            #         break
+            # else:
+            #     res = False
+
+            pos = node.pos
+            result_ref = UtilNodes.ResultRefNode(node)
             if node.operand2.is_subscript:
-                base_type = node.operand2.base.type.base_type 
-            else: 
-                base_type = node.operand2.type.base_type 
-            target_handle = UtilNodes.TempHandle(base_type) 
-            target = target_handle.ref(pos) 
-            cmp_node = ExprNodes.PrimaryCmpNode( 
-                pos, operator=u'==', operand1=node.operand1, operand2=target) 
-            if_body = Nodes.StatListNode( 
-                pos, 
-                stats = [Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=1)), 
-                         Nodes.BreakStatNode(pos)]) 
-            if_node = Nodes.IfStatNode( 
-                pos, 
-                if_clauses=[Nodes.IfClauseNode(pos, condition=cmp_node, body=if_body)], 
-                else_clause=None) 
-            for_loop = UtilNodes.TempsBlockNode( 
-                pos, 
-                temps = [target_handle], 
-                body = Nodes.ForInStatNode( 
-                    pos, 
-                    target=target, 
-                    iterator=ExprNodes.IteratorNode(node.operand2.pos, sequence=node.operand2), 
-                    body=if_node, 
-                    else_clause=Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=0)))) 
-            for_loop = for_loop.analyse_expressions(self.current_env()) 
-            for_loop = self.visit(for_loop) 
-            new_node = UtilNodes.TempResultFromStatNode(result_ref, for_loop) 
- 
-            if node.operator == 'not_in': 
-                new_node = ExprNodes.NotNode(pos, operand=new_node) 
-            return new_node 
- 
-        else: 
-            self.visitchildren(node) 
-            return node 
- 
-    def visit_ForInStatNode(self, node): 
-        self.visitchildren(node) 
-        return self._optimise_for_loop(node, node.iterator.sequence) 
- 
+                base_type = node.operand2.base.type.base_type
+            else:
+                base_type = node.operand2.type.base_type
+            target_handle = UtilNodes.TempHandle(base_type)
+            target = target_handle.ref(pos)
+            cmp_node = ExprNodes.PrimaryCmpNode(
+                pos, operator=u'==', operand1=node.operand1, operand2=target)
+            if_body = Nodes.StatListNode(
+                pos,
+                stats = [Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=1)),
+                         Nodes.BreakStatNode(pos)])
+            if_node = Nodes.IfStatNode(
+                pos,
+                if_clauses=[Nodes.IfClauseNode(pos, condition=cmp_node, body=if_body)],
+                else_clause=None)
+            for_loop = UtilNodes.TempsBlockNode(
+                pos,
+                temps = [target_handle],
+                body = Nodes.ForInStatNode(
+                    pos,
+                    target=target,
+                    iterator=ExprNodes.IteratorNode(node.operand2.pos, sequence=node.operand2),
+                    body=if_node,
+                    else_clause=Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=0))))
+            for_loop = for_loop.analyse_expressions(self.current_env())
+            for_loop = self.visit(for_loop)
+            new_node = UtilNodes.TempResultFromStatNode(result_ref, for_loop)
+
+            if node.operator == 'not_in':
+                new_node = ExprNodes.NotNode(pos, operand=new_node)
+            return new_node
+
+        else:
+            self.visitchildren(node)
+            return node
+
+    def visit_ForInStatNode(self, node):
+        self.visitchildren(node)
+        return self._optimise_for_loop(node, node.iterator.sequence)
+
     def _optimise_for_loop(self, node, iterable, reversed=False):
         annotation_type = None
         if (iterable.is_name or iterable.is_attribute) and iterable.entry and iterable.entry.annotation:
@@ -207,13 +207,13 @@ class IterationTransform(Visitor.EnvTransform):
                     annotation_type = Builtin.set_type
 
         if Builtin.dict_type in (iterable.type, annotation_type):
-            # like iterating over dict.keys() 
-            if reversed: 
-                # CPython raises an error here: not a sequence 
-                return node 
-            return self._transform_dict_iteration( 
+            # like iterating over dict.keys()
+            if reversed:
+                # CPython raises an error here: not a sequence
+                return node
+            return self._transform_dict_iteration(
                 node, dict_obj=iterable, method=None, keys=True, values=False)
- 
+
         if (Builtin.set_type in (iterable.type, annotation_type) or
                 Builtin.frozenset_type in (iterable.type, annotation_type)):
             if reversed:
@@ -221,70 +221,70 @@ class IterationTransform(Visitor.EnvTransform):
                 return node
             return self._transform_set_iteration(node, iterable)
 
-        # C array (slice) iteration? 
+        # C array (slice) iteration?
         if iterable.type.is_ptr or iterable.type.is_array:
             return self._transform_carray_iteration(node, iterable, reversed=reversed)
         if iterable.type is Builtin.bytes_type:
             return self._transform_bytes_iteration(node, iterable, reversed=reversed)
         if iterable.type is Builtin.unicode_type:
             return self._transform_unicode_iteration(node, iterable, reversed=reversed)
- 
-        # the rest is based on function calls 
+
+        # the rest is based on function calls
         if not isinstance(iterable, ExprNodes.SimpleCallNode):
-            return node 
- 
+            return node
+
         if iterable.args is None:
             arg_count = iterable.arg_tuple and len(iterable.arg_tuple.args) or 0
-        else: 
+        else:
             arg_count = len(iterable.args)
             if arg_count and iterable.self is not None:
-                arg_count -= 1 
- 
+                arg_count -= 1
+
         function = iterable.function
-        # dict iteration? 
-        if function.is_attribute and not reversed and not arg_count: 
+        # dict iteration?
+        if function.is_attribute and not reversed and not arg_count:
             base_obj = iterable.self or function.obj
-            method = function.attribute 
-            # in Py3, items() is equivalent to Py2's iteritems() 
-            is_safe_iter = self.global_scope().context.language_level >= 3 
- 
-            if not is_safe_iter and method in ('keys', 'values', 'items'): 
-                # try to reduce this to the corresponding .iter*() methods 
+            method = function.attribute
+            # in Py3, items() is equivalent to Py2's iteritems()
+            is_safe_iter = self.global_scope().context.language_level >= 3
+
+            if not is_safe_iter and method in ('keys', 'values', 'items'):
+                # try to reduce this to the corresponding .iter*() methods
                 if isinstance(base_obj, ExprNodes.CallNode):
-                    inner_function = base_obj.function 
-                    if (inner_function.is_name and inner_function.name == 'dict' 
-                            and inner_function.entry 
-                            and inner_function.entry.is_builtin): 
-                        # e.g. dict(something).items() => safe to use .iter*() 
-                        is_safe_iter = True 
- 
-            keys = values = False 
-            if method == 'iterkeys' or (is_safe_iter and method == 'keys'): 
-                keys = True 
-            elif method == 'itervalues' or (is_safe_iter and method == 'values'): 
-                values = True 
-            elif method == 'iteritems' or (is_safe_iter and method == 'items'): 
-                keys = values = True 
- 
-            if keys or values: 
-                return self._transform_dict_iteration( 
-                    node, base_obj, method, keys, values) 
- 
-        # enumerate/reversed ? 
+                    inner_function = base_obj.function
+                    if (inner_function.is_name and inner_function.name == 'dict'
+                            and inner_function.entry
+                            and inner_function.entry.is_builtin):
+                        # e.g. dict(something).items() => safe to use .iter*()
+                        is_safe_iter = True
+
+            keys = values = False
+            if method == 'iterkeys' or (is_safe_iter and method == 'keys'):
+                keys = True
+            elif method == 'itervalues' or (is_safe_iter and method == 'values'):
+                values = True
+            elif method == 'iteritems' or (is_safe_iter and method == 'items'):
+                keys = values = True
+
+            if keys or values:
+                return self._transform_dict_iteration(
+                    node, base_obj, method, keys, values)
+
+        # enumerate/reversed ?
         if iterable.self is None and function.is_name and \
-               function.entry and function.entry.is_builtin: 
-            if function.name == 'enumerate': 
-                if reversed: 
-                    # CPython raises an error here: not a sequence 
-                    return node 
+               function.entry and function.entry.is_builtin:
+            if function.name == 'enumerate':
+                if reversed:
+                    # CPython raises an error here: not a sequence
+                    return node
                 return self._transform_enumerate_iteration(node, iterable)
-            elif function.name == 'reversed': 
-                if reversed: 
-                    # CPython raises an error here: not a sequence 
-                    return node 
+            elif function.name == 'reversed':
+                if reversed:
+                    # CPython raises an error here: not a sequence
+                    return node
                 return self._transform_reversed_iteration(node, iterable)
- 
-        # range() iteration? 
+
+        # range() iteration?
         if Options.convert_range and 1 <= arg_count <= 3 and (
                 iterable.self is None and
                 function.is_name and function.name in ('range', 'xrange') and
@@ -300,466 +300,466 @@ class IterationTransform(Visitor.EnvTransform):
                     break
                 else:
                     return self._transform_range_iteration(node, iterable, reversed=reversed)
- 
-        return node 
- 
-    def _transform_reversed_iteration(self, node, reversed_function): 
-        args = reversed_function.arg_tuple.args 
-        if len(args) == 0: 
-            error(reversed_function.pos, 
-                  "reversed() requires an iterable argument") 
-            return node 
-        elif len(args) > 1: 
-            error(reversed_function.pos, 
-                  "reversed() takes exactly 1 argument") 
-            return node 
-        arg = args[0] 
- 
-        # reversed(list/tuple) ? 
-        if arg.type in (Builtin.tuple_type, Builtin.list_type): 
-            node.iterator.sequence = arg.as_none_safe_node("'NoneType' object is not iterable") 
-            node.iterator.reversed = True 
-            return node 
- 
-        return self._optimise_for_loop(node, arg, reversed=True) 
- 
-    PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_char_ptr_type, [ 
-            PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None) 
-            ]) 
- 
-    PyBytes_GET_SIZE_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_py_ssize_t_type, [ 
-            PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None) 
-            ]) 
- 
-    def _transform_bytes_iteration(self, node, slice_node, reversed=False): 
-        target_type = node.target.type 
-        if not target_type.is_int and target_type is not Builtin.bytes_type: 
-            # bytes iteration returns bytes objects in Py2, but 
-            # integers in Py3 
-            return node 
- 
-        unpack_temp_node = UtilNodes.LetRefNode( 
-            slice_node.as_none_safe_node("'NoneType' is not iterable")) 
- 
-        slice_base_node = ExprNodes.PythonCapiCallNode( 
-            slice_node.pos, "PyBytes_AS_STRING", 
-            self.PyBytes_AS_STRING_func_type, 
-            args = [unpack_temp_node], 
-            is_temp = 0, 
-            ) 
-        len_node = ExprNodes.PythonCapiCallNode( 
-            slice_node.pos, "PyBytes_GET_SIZE", 
-            self.PyBytes_GET_SIZE_func_type, 
-            args = [unpack_temp_node], 
-            is_temp = 0, 
-            ) 
- 
-        return UtilNodes.LetNode( 
-            unpack_temp_node, 
-            self._transform_carray_iteration( 
-                node, 
-                ExprNodes.SliceIndexNode( 
-                    slice_node.pos, 
-                    base = slice_base_node, 
-                    start = None, 
-                    step = None, 
-                    stop = len_node, 
-                    type = slice_base_node.type, 
-                    is_temp = 1, 
-                    ), 
-                reversed = reversed)) 
- 
-    PyUnicode_READ_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_py_ucs4_type, [ 
-            PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_type, None), 
-            PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_type, None), 
-            PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None) 
-        ]) 
- 
-    init_unicode_iteration_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_int_type, [ 
-            PyrexTypes.CFuncTypeArg("s", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("length", PyrexTypes.c_py_ssize_t_ptr_type, None), 
-            PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_ptr_type, None), 
-            PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_ptr_type, None) 
-        ], 
-        exception_value = '-1') 
- 
-    def _transform_unicode_iteration(self, node, slice_node, reversed=False): 
-        if slice_node.is_literal: 
-            # try to reduce to byte iteration for plain Latin-1 strings 
-            try: 
+
+        return node
+
+    def _transform_reversed_iteration(self, node, reversed_function):
+        args = reversed_function.arg_tuple.args
+        if len(args) == 0:
+            error(reversed_function.pos,
+                  "reversed() requires an iterable argument")
+            return node
+        elif len(args) > 1:
+            error(reversed_function.pos,
+                  "reversed() takes exactly 1 argument")
+            return node
+        arg = args[0]
+
+        # reversed(list/tuple) ?
+        if arg.type in (Builtin.tuple_type, Builtin.list_type):
+            node.iterator.sequence = arg.as_none_safe_node("'NoneType' object is not iterable")
+            node.iterator.reversed = True
+            return node
+
+        return self._optimise_for_loop(node, arg, reversed=True)
+
+    PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_char_ptr_type, [
+            PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
+            ])
+
+    PyBytes_GET_SIZE_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
+            ])
+
+    def _transform_bytes_iteration(self, node, slice_node, reversed=False):
+        target_type = node.target.type
+        if not target_type.is_int and target_type is not Builtin.bytes_type:
+            # bytes iteration returns bytes objects in Py2, but
+            # integers in Py3
+            return node
+
+        unpack_temp_node = UtilNodes.LetRefNode(
+            slice_node.as_none_safe_node("'NoneType' is not iterable"))
+
+        slice_base_node = ExprNodes.PythonCapiCallNode(
+            slice_node.pos, "PyBytes_AS_STRING",
+            self.PyBytes_AS_STRING_func_type,
+            args = [unpack_temp_node],
+            is_temp = 0,
+            )
+        len_node = ExprNodes.PythonCapiCallNode(
+            slice_node.pos, "PyBytes_GET_SIZE",
+            self.PyBytes_GET_SIZE_func_type,
+            args = [unpack_temp_node],
+            is_temp = 0,
+            )
+
+        return UtilNodes.LetNode(
+            unpack_temp_node,
+            self._transform_carray_iteration(
+                node,
+                ExprNodes.SliceIndexNode(
+                    slice_node.pos,
+                    base = slice_base_node,
+                    start = None,
+                    step = None,
+                    stop = len_node,
+                    type = slice_base_node.type,
+                    is_temp = 1,
+                    ),
+                reversed = reversed))
+
+    PyUnicode_READ_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ucs4_type, [
+            PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_type, None),
+            PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None)
+        ])
+
+    init_unicode_iteration_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_int_type, [
+            PyrexTypes.CFuncTypeArg("s", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("length", PyrexTypes.c_py_ssize_t_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_ptr_type, None)
+        ],
+        exception_value = '-1')
+
+    def _transform_unicode_iteration(self, node, slice_node, reversed=False):
+        if slice_node.is_literal:
+            # try to reduce to byte iteration for plain Latin-1 strings
+            try:
                 bytes_value = bytes_literal(slice_node.value.encode('latin1'), 'iso8859-1')
-            except UnicodeEncodeError: 
-                pass 
-            else: 
-                bytes_slice = ExprNodes.SliceIndexNode( 
-                    slice_node.pos, 
-                    base=ExprNodes.BytesNode( 
-                        slice_node.pos, value=bytes_value, 
-                        constant_result=bytes_value, 
+            except UnicodeEncodeError:
+                pass
+            else:
+                bytes_slice = ExprNodes.SliceIndexNode(
+                    slice_node.pos,
+                    base=ExprNodes.BytesNode(
+                        slice_node.pos, value=bytes_value,
+                        constant_result=bytes_value,
                         type=PyrexTypes.c_const_char_ptr_type).coerce_to(
                             PyrexTypes.c_const_uchar_ptr_type, self.current_env()),
-                    start=None, 
-                    stop=ExprNodes.IntNode( 
-                        slice_node.pos, value=str(len(bytes_value)), 
-                        constant_result=len(bytes_value), 
-                        type=PyrexTypes.c_py_ssize_t_type), 
-                    type=Builtin.unicode_type,  # hint for Python conversion 
-                ) 
-                return self._transform_carray_iteration(node, bytes_slice, reversed) 
- 
-        unpack_temp_node = UtilNodes.LetRefNode( 
-            slice_node.as_none_safe_node("'NoneType' is not iterable")) 
- 
-        start_node = ExprNodes.IntNode( 
-            node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type) 
-        length_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) 
-        end_node = length_temp.ref(node.pos) 
-        if reversed: 
-            relation1, relation2 = '>', '>=' 
-            start_node, end_node = end_node, start_node 
-        else: 
-            relation1, relation2 = '<=', '<' 
- 
-        kind_temp = UtilNodes.TempHandle(PyrexTypes.c_int_type) 
-        data_temp = UtilNodes.TempHandle(PyrexTypes.c_void_ptr_type) 
-        counter_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) 
- 
-        target_value = ExprNodes.PythonCapiCallNode( 
-            slice_node.pos, "__Pyx_PyUnicode_READ", 
-            self.PyUnicode_READ_func_type, 
-            args = [kind_temp.ref(slice_node.pos), 
-                    data_temp.ref(slice_node.pos), 
-                    counter_temp.ref(node.target.pos)], 
-            is_temp = False, 
-            ) 
-        if target_value.type != node.target.type: 
-            target_value = target_value.coerce_to(node.target.type, 
-                                                  self.current_env()) 
-        target_assign = Nodes.SingleAssignmentNode( 
-            pos = node.target.pos, 
-            lhs = node.target, 
-            rhs = target_value) 
-        body = Nodes.StatListNode( 
-            node.pos, 
-            stats = [target_assign, node.body]) 
- 
-        loop_node = Nodes.ForFromStatNode( 
-            node.pos, 
-            bound1=start_node, relation1=relation1, 
-            target=counter_temp.ref(node.target.pos), 
-            relation2=relation2, bound2=end_node, 
-            step=None, body=body, 
-            else_clause=node.else_clause, 
-            from_range=True) 
- 
-        setup_node = Nodes.ExprStatNode( 
-            node.pos, 
-            expr = ExprNodes.PythonCapiCallNode( 
-                slice_node.pos, "__Pyx_init_unicode_iteration", 
-                self.init_unicode_iteration_func_type, 
-                args = [unpack_temp_node, 
-                        ExprNodes.AmpersandNode(slice_node.pos, operand=length_temp.ref(slice_node.pos), 
-                                                type=PyrexTypes.c_py_ssize_t_ptr_type), 
-                        ExprNodes.AmpersandNode(slice_node.pos, operand=data_temp.ref(slice_node.pos), 
-                                                type=PyrexTypes.c_void_ptr_ptr_type), 
-                        ExprNodes.AmpersandNode(slice_node.pos, operand=kind_temp.ref(slice_node.pos), 
-                                                type=PyrexTypes.c_int_ptr_type), 
-                        ], 
-                is_temp = True, 
-                result_is_used = False, 
-                utility_code=UtilityCode.load_cached("unicode_iter", "Optimize.c"), 
-                )) 
-        return UtilNodes.LetNode( 
-            unpack_temp_node, 
-            UtilNodes.TempsBlockNode( 
-                node.pos, temps=[counter_temp, length_temp, data_temp, kind_temp], 
-                body=Nodes.StatListNode(node.pos, stats=[setup_node, loop_node]))) 
- 
-    def _transform_carray_iteration(self, node, slice_node, reversed=False): 
-        neg_step = False 
-        if isinstance(slice_node, ExprNodes.SliceIndexNode): 
-            slice_base = slice_node.base 
-            start = filter_none_node(slice_node.start) 
-            stop = filter_none_node(slice_node.stop) 
-            step = None 
-            if not stop: 
-                if not slice_base.type.is_pyobject: 
-                    error(slice_node.pos, "C array iteration requires known end index") 
-                return node 
- 
+                    start=None,
+                    stop=ExprNodes.IntNode(
+                        slice_node.pos, value=str(len(bytes_value)),
+                        constant_result=len(bytes_value),
+                        type=PyrexTypes.c_py_ssize_t_type),
+                    type=Builtin.unicode_type,  # hint for Python conversion
+                )
+                return self._transform_carray_iteration(node, bytes_slice, reversed)
+
+        unpack_temp_node = UtilNodes.LetRefNode(
+            slice_node.as_none_safe_node("'NoneType' is not iterable"))
+
+        start_node = ExprNodes.IntNode(
+            node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type)
+        length_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        end_node = length_temp.ref(node.pos)
+        if reversed:
+            relation1, relation2 = '>', '>='
+            start_node, end_node = end_node, start_node
+        else:
+            relation1, relation2 = '<=', '<'
+
+        kind_temp = UtilNodes.TempHandle(PyrexTypes.c_int_type)
+        data_temp = UtilNodes.TempHandle(PyrexTypes.c_void_ptr_type)
+        counter_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+
+        target_value = ExprNodes.PythonCapiCallNode(
+            slice_node.pos, "__Pyx_PyUnicode_READ",
+            self.PyUnicode_READ_func_type,
+            args = [kind_temp.ref(slice_node.pos),
+                    data_temp.ref(slice_node.pos),
+                    counter_temp.ref(node.target.pos)],
+            is_temp = False,
+            )
+        if target_value.type != node.target.type:
+            target_value = target_value.coerce_to(node.target.type,
+                                                  self.current_env())
+        target_assign = Nodes.SingleAssignmentNode(
+            pos = node.target.pos,
+            lhs = node.target,
+            rhs = target_value)
+        body = Nodes.StatListNode(
+            node.pos,
+            stats = [target_assign, node.body])
+
+        loop_node = Nodes.ForFromStatNode(
+            node.pos,
+            bound1=start_node, relation1=relation1,
+            target=counter_temp.ref(node.target.pos),
+            relation2=relation2, bound2=end_node,
+            step=None, body=body,
+            else_clause=node.else_clause,
+            from_range=True)
+
+        setup_node = Nodes.ExprStatNode(
+            node.pos,
+            expr = ExprNodes.PythonCapiCallNode(
+                slice_node.pos, "__Pyx_init_unicode_iteration",
+                self.init_unicode_iteration_func_type,
+                args = [unpack_temp_node,
+                        ExprNodes.AmpersandNode(slice_node.pos, operand=length_temp.ref(slice_node.pos),
+                                                type=PyrexTypes.c_py_ssize_t_ptr_type),
+                        ExprNodes.AmpersandNode(slice_node.pos, operand=data_temp.ref(slice_node.pos),
+                                                type=PyrexTypes.c_void_ptr_ptr_type),
+                        ExprNodes.AmpersandNode(slice_node.pos, operand=kind_temp.ref(slice_node.pos),
+                                                type=PyrexTypes.c_int_ptr_type),
+                        ],
+                is_temp = True,
+                result_is_used = False,
+                utility_code=UtilityCode.load_cached("unicode_iter", "Optimize.c"),
+                ))
+        return UtilNodes.LetNode(
+            unpack_temp_node,
+            UtilNodes.TempsBlockNode(
+                node.pos, temps=[counter_temp, length_temp, data_temp, kind_temp],
+                body=Nodes.StatListNode(node.pos, stats=[setup_node, loop_node])))
+
+    def _transform_carray_iteration(self, node, slice_node, reversed=False):
+        neg_step = False
+        if isinstance(slice_node, ExprNodes.SliceIndexNode):
+            slice_base = slice_node.base
+            start = filter_none_node(slice_node.start)
+            stop = filter_none_node(slice_node.stop)
+            step = None
+            if not stop:
+                if not slice_base.type.is_pyobject:
+                    error(slice_node.pos, "C array iteration requires known end index")
+                return node
+
         elif slice_node.is_subscript:
-            assert isinstance(slice_node.index, ExprNodes.SliceNode) 
-            slice_base = slice_node.base 
-            index = slice_node.index 
-            start = filter_none_node(index.start) 
-            stop = filter_none_node(index.stop) 
-            step = filter_none_node(index.step) 
-            if step: 
+            assert isinstance(slice_node.index, ExprNodes.SliceNode)
+            slice_base = slice_node.base
+            index = slice_node.index
+            start = filter_none_node(index.start)
+            stop = filter_none_node(index.stop)
+            step = filter_none_node(index.step)
+            if step:
                 if not isinstance(step.constant_result, _py_int_types) \
-                       or step.constant_result == 0 \ 
-                       or step.constant_result > 0 and not stop \ 
-                       or step.constant_result < 0 and not start: 
-                    if not slice_base.type.is_pyobject: 
-                        error(step.pos, "C array iteration requires known step size and end index") 
-                    return node 
-                else: 
-                    # step sign is handled internally by ForFromStatNode 
-                    step_value = step.constant_result 
-                    if reversed: 
-                        step_value = -step_value 
-                    neg_step = step_value < 0 
-                    step = ExprNodes.IntNode(step.pos, type=PyrexTypes.c_py_ssize_t_type, 
-                                             value=str(abs(step_value)), 
-                                             constant_result=abs(step_value)) 
- 
-        elif slice_node.type.is_array: 
-            if slice_node.type.size is None: 
-                error(slice_node.pos, "C array iteration requires known end index") 
-                return node 
-            slice_base = slice_node 
-            start = None 
-            stop = ExprNodes.IntNode( 
-                slice_node.pos, value=str(slice_node.type.size), 
-                type=PyrexTypes.c_py_ssize_t_type, constant_result=slice_node.type.size) 
-            step = None 
- 
-        else: 
-            if not slice_node.type.is_pyobject: 
-                error(slice_node.pos, "C array iteration requires known end index") 
-            return node 
- 
-        if start: 
-            start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) 
-        if stop: 
-            stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) 
-        if stop is None: 
-            if neg_step: 
-                stop = ExprNodes.IntNode( 
-                    slice_node.pos, value='-1', type=PyrexTypes.c_py_ssize_t_type, constant_result=-1) 
-            else: 
-                error(slice_node.pos, "C array iteration requires known step size and end index") 
-                return node 
- 
-        if reversed: 
-            if not start: 
-                start = ExprNodes.IntNode(slice_node.pos, value="0",  constant_result=0, 
-                                          type=PyrexTypes.c_py_ssize_t_type) 
-            # if step was provided, it was already negated above 
-            start, stop = stop, start 
- 
-        ptr_type = slice_base.type 
-        if ptr_type.is_array: 
-            ptr_type = ptr_type.element_ptr_type() 
-        carray_ptr = slice_base.coerce_to_simple(self.current_env()) 
- 
-        if start and start.constant_result != 0: 
-            start_ptr_node = ExprNodes.AddNode( 
-                start.pos, 
-                operand1=carray_ptr, 
-                operator='+', 
-                operand2=start, 
-                type=ptr_type) 
-        else: 
-            start_ptr_node = carray_ptr 
- 
-        if stop and stop.constant_result != 0: 
-            stop_ptr_node = ExprNodes.AddNode( 
-                stop.pos, 
-                operand1=ExprNodes.CloneNode(carray_ptr), 
-                operator='+', 
-                operand2=stop, 
-                type=ptr_type 
-                ).coerce_to_simple(self.current_env()) 
-        else: 
-            stop_ptr_node = ExprNodes.CloneNode(carray_ptr) 
- 
-        counter = UtilNodes.TempHandle(ptr_type) 
-        counter_temp = counter.ref(node.target.pos) 
- 
-        if slice_base.type.is_string and node.target.type.is_pyobject: 
-            # special case: char* -> bytes/unicode 
-            if slice_node.type is Builtin.unicode_type: 
-                target_value = ExprNodes.CastNode( 
-                    ExprNodes.DereferenceNode( 
-                        node.target.pos, operand=counter_temp, 
-                        type=ptr_type.base_type), 
-                    PyrexTypes.c_py_ucs4_type).coerce_to( 
-                        node.target.type, self.current_env()) 
-            else: 
-                # char* -> bytes coercion requires slicing, not indexing 
-                target_value = ExprNodes.SliceIndexNode( 
-                    node.target.pos, 
-                    start=ExprNodes.IntNode(node.target.pos, value='0', 
-                                            constant_result=0, 
-                                            type=PyrexTypes.c_int_type), 
-                    stop=ExprNodes.IntNode(node.target.pos, value='1', 
-                                           constant_result=1, 
-                                           type=PyrexTypes.c_int_type), 
-                    base=counter_temp, 
-                    type=Builtin.bytes_type, 
-                    is_temp=1) 
-        elif node.target.type.is_ptr and not node.target.type.assignable_from(ptr_type.base_type): 
-            # Allow iteration with pointer target to avoid copy. 
-            target_value = counter_temp 
-        else: 
-            # TODO: can this safely be replaced with DereferenceNode() as above? 
-            target_value = ExprNodes.IndexNode( 
-                node.target.pos, 
-                index=ExprNodes.IntNode(node.target.pos, value='0', 
-                                        constant_result=0, 
-                                        type=PyrexTypes.c_int_type), 
-                base=counter_temp, 
-                type=ptr_type.base_type) 
- 
-        if target_value.type != node.target.type: 
-            target_value = target_value.coerce_to(node.target.type, 
-                                                  self.current_env()) 
- 
-        target_assign = Nodes.SingleAssignmentNode( 
-            pos = node.target.pos, 
-            lhs = node.target, 
-            rhs = target_value) 
- 
-        body = Nodes.StatListNode( 
-            node.pos, 
-            stats = [target_assign, node.body]) 
- 
-        relation1, relation2 = self._find_for_from_node_relations(neg_step, reversed) 
- 
-        for_node = Nodes.ForFromStatNode( 
-            node.pos, 
-            bound1=start_ptr_node, relation1=relation1, 
-            target=counter_temp, 
-            relation2=relation2, bound2=stop_ptr_node, 
-            step=step, body=body, 
-            else_clause=node.else_clause, 
-            from_range=True) 
- 
-        return UtilNodes.TempsBlockNode( 
-            node.pos, temps=[counter], 
-            body=for_node) 
- 
-    def _transform_enumerate_iteration(self, node, enumerate_function): 
-        args = enumerate_function.arg_tuple.args 
-        if len(args) == 0: 
-            error(enumerate_function.pos, 
-                  "enumerate() requires an iterable argument") 
-            return node 
-        elif len(args) > 2: 
-            error(enumerate_function.pos, 
-                  "enumerate() takes at most 2 arguments") 
-            return node 
- 
-        if not node.target.is_sequence_constructor: 
-            # leave this untouched for now 
-            return node 
-        targets = node.target.args 
-        if len(targets) != 2: 
-            # leave this untouched for now 
-            return node 
- 
-        enumerate_target, iterable_target = targets 
-        counter_type = enumerate_target.type 
- 
-        if not counter_type.is_pyobject and not counter_type.is_int: 
-            # nothing we can do here, I guess 
-            return node 
- 
-        if len(args) == 2: 
-            start = unwrap_coerced_node(args[1]).coerce_to(counter_type, self.current_env()) 
-        else: 
-            start = ExprNodes.IntNode(enumerate_function.pos, 
-                                      value='0', 
-                                      type=counter_type, 
-                                      constant_result=0) 
-        temp = UtilNodes.LetRefNode(start) 
- 
-        inc_expression = ExprNodes.AddNode( 
-            enumerate_function.pos, 
-            operand1 = temp, 
-            operand2 = ExprNodes.IntNode(node.pos, value='1', 
-                                         type=counter_type, 
-                                         constant_result=1), 
-            operator = '+', 
-            type = counter_type, 
-            #inplace = True,   # not worth using in-place operation for Py ints 
-            is_temp = counter_type.is_pyobject 
-            ) 
- 
-        loop_body = [ 
-            Nodes.SingleAssignmentNode( 
-                pos = enumerate_target.pos, 
-                lhs = enumerate_target, 
-                rhs = temp), 
-            Nodes.SingleAssignmentNode( 
-                pos = enumerate_target.pos, 
-                lhs = temp, 
-                rhs = inc_expression) 
-            ] 
- 
-        if isinstance(node.body, Nodes.StatListNode): 
-            node.body.stats = loop_body + node.body.stats 
-        else: 
-            loop_body.append(node.body) 
-            node.body = Nodes.StatListNode( 
-                node.body.pos, 
-                stats = loop_body) 
- 
-        node.target = iterable_target 
-        node.item = node.item.coerce_to(iterable_target.type, self.current_env()) 
-        node.iterator.sequence = args[0] 
- 
-        # recurse into loop to check for further optimisations 
-        return UtilNodes.LetNode(temp, self._optimise_for_loop(node, node.iterator.sequence)) 
- 
-    def _find_for_from_node_relations(self, neg_step_value, reversed): 
-        if reversed: 
-            if neg_step_value: 
-                return '<', '<=' 
-            else: 
-                return '>', '>=' 
-        else: 
-            if neg_step_value: 
-                return '>=', '>' 
-            else: 
-                return '<=', '<' 
- 
-    def _transform_range_iteration(self, node, range_function, reversed=False): 
-        args = range_function.arg_tuple.args 
-        if len(args) < 3: 
-            step_pos = range_function.pos 
-            step_value = 1 
+                       or step.constant_result == 0 \
+                       or step.constant_result > 0 and not stop \
+                       or step.constant_result < 0 and not start:
+                    if not slice_base.type.is_pyobject:
+                        error(step.pos, "C array iteration requires known step size and end index")
+                    return node
+                else:
+                    # step sign is handled internally by ForFromStatNode
+                    step_value = step.constant_result
+                    if reversed:
+                        step_value = -step_value
+                    neg_step = step_value < 0
+                    step = ExprNodes.IntNode(step.pos, type=PyrexTypes.c_py_ssize_t_type,
+                                             value=str(abs(step_value)),
+                                             constant_result=abs(step_value))
+
+        elif slice_node.type.is_array:
+            if slice_node.type.size is None:
+                error(slice_node.pos, "C array iteration requires known end index")
+                return node
+            slice_base = slice_node
+            start = None
+            stop = ExprNodes.IntNode(
+                slice_node.pos, value=str(slice_node.type.size),
+                type=PyrexTypes.c_py_ssize_t_type, constant_result=slice_node.type.size)
+            step = None
+
+        else:
+            if not slice_node.type.is_pyobject:
+                error(slice_node.pos, "C array iteration requires known end index")
+            return node
+
+        if start:
+            start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+        if stop:
+            stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+        if stop is None:
+            if neg_step:
+                stop = ExprNodes.IntNode(
+                    slice_node.pos, value='-1', type=PyrexTypes.c_py_ssize_t_type, constant_result=-1)
+            else:
+                error(slice_node.pos, "C array iteration requires known step size and end index")
+                return node
+
+        if reversed:
+            if not start:
+                start = ExprNodes.IntNode(slice_node.pos, value="0",  constant_result=0,
+                                          type=PyrexTypes.c_py_ssize_t_type)
+            # if step was provided, it was already negated above
+            start, stop = stop, start
+
+        ptr_type = slice_base.type
+        if ptr_type.is_array:
+            ptr_type = ptr_type.element_ptr_type()
+        carray_ptr = slice_base.coerce_to_simple(self.current_env())
+
+        if start and start.constant_result != 0:
+            start_ptr_node = ExprNodes.AddNode(
+                start.pos,
+                operand1=carray_ptr,
+                operator='+',
+                operand2=start,
+                type=ptr_type)
+        else:
+            start_ptr_node = carray_ptr
+
+        if stop and stop.constant_result != 0:
+            stop_ptr_node = ExprNodes.AddNode(
+                stop.pos,
+                operand1=ExprNodes.CloneNode(carray_ptr),
+                operator='+',
+                operand2=stop,
+                type=ptr_type
+                ).coerce_to_simple(self.current_env())
+        else:
+            stop_ptr_node = ExprNodes.CloneNode(carray_ptr)
+
+        counter = UtilNodes.TempHandle(ptr_type)
+        counter_temp = counter.ref(node.target.pos)
+
+        if slice_base.type.is_string and node.target.type.is_pyobject:
+            # special case: char* -> bytes/unicode
+            if slice_node.type is Builtin.unicode_type:
+                target_value = ExprNodes.CastNode(
+                    ExprNodes.DereferenceNode(
+                        node.target.pos, operand=counter_temp,
+                        type=ptr_type.base_type),
+                    PyrexTypes.c_py_ucs4_type).coerce_to(
+                        node.target.type, self.current_env())
+            else:
+                # char* -> bytes coercion requires slicing, not indexing
+                target_value = ExprNodes.SliceIndexNode(
+                    node.target.pos,
+                    start=ExprNodes.IntNode(node.target.pos, value='0',
+                                            constant_result=0,
+                                            type=PyrexTypes.c_int_type),
+                    stop=ExprNodes.IntNode(node.target.pos, value='1',
+                                           constant_result=1,
+                                           type=PyrexTypes.c_int_type),
+                    base=counter_temp,
+                    type=Builtin.bytes_type,
+                    is_temp=1)
+        elif node.target.type.is_ptr and not node.target.type.assignable_from(ptr_type.base_type):
+            # Allow iteration with pointer target to avoid copy.
+            target_value = counter_temp
+        else:
+            # TODO: can this safely be replaced with DereferenceNode() as above?
+            target_value = ExprNodes.IndexNode(
+                node.target.pos,
+                index=ExprNodes.IntNode(node.target.pos, value='0',
+                                        constant_result=0,
+                                        type=PyrexTypes.c_int_type),
+                base=counter_temp,
+                type=ptr_type.base_type)
+
+        if target_value.type != node.target.type:
+            target_value = target_value.coerce_to(node.target.type,
+                                                  self.current_env())
+
+        target_assign = Nodes.SingleAssignmentNode(
+            pos = node.target.pos,
+            lhs = node.target,
+            rhs = target_value)
+
+        body = Nodes.StatListNode(
+            node.pos,
+            stats = [target_assign, node.body])
+
+        relation1, relation2 = self._find_for_from_node_relations(neg_step, reversed)
+
+        for_node = Nodes.ForFromStatNode(
+            node.pos,
+            bound1=start_ptr_node, relation1=relation1,
+            target=counter_temp,
+            relation2=relation2, bound2=stop_ptr_node,
+            step=step, body=body,
+            else_clause=node.else_clause,
+            from_range=True)
+
+        return UtilNodes.TempsBlockNode(
+            node.pos, temps=[counter],
+            body=for_node)
+
+    def _transform_enumerate_iteration(self, node, enumerate_function):
+        args = enumerate_function.arg_tuple.args
+        if len(args) == 0:
+            error(enumerate_function.pos,
+                  "enumerate() requires an iterable argument")
+            return node
+        elif len(args) > 2:
+            error(enumerate_function.pos,
+                  "enumerate() takes at most 2 arguments")
+            return node
+
+        if not node.target.is_sequence_constructor:
+            # leave this untouched for now
+            return node
+        targets = node.target.args
+        if len(targets) != 2:
+            # leave this untouched for now
+            return node
+
+        enumerate_target, iterable_target = targets
+        counter_type = enumerate_target.type
+
+        if not counter_type.is_pyobject and not counter_type.is_int:
+            # nothing we can do here, I guess
+            return node
+
+        if len(args) == 2:
+            start = unwrap_coerced_node(args[1]).coerce_to(counter_type, self.current_env())
+        else:
+            start = ExprNodes.IntNode(enumerate_function.pos,
+                                      value='0',
+                                      type=counter_type,
+                                      constant_result=0)
+        temp = UtilNodes.LetRefNode(start)
+
+        inc_expression = ExprNodes.AddNode(
+            enumerate_function.pos,
+            operand1 = temp,
+            operand2 = ExprNodes.IntNode(node.pos, value='1',
+                                         type=counter_type,
+                                         constant_result=1),
+            operator = '+',
+            type = counter_type,
+            #inplace = True,   # not worth using in-place operation for Py ints
+            is_temp = counter_type.is_pyobject
+            )
+
+        loop_body = [
+            Nodes.SingleAssignmentNode(
+                pos = enumerate_target.pos,
+                lhs = enumerate_target,
+                rhs = temp),
+            Nodes.SingleAssignmentNode(
+                pos = enumerate_target.pos,
+                lhs = temp,
+                rhs = inc_expression)
+            ]
+
+        if isinstance(node.body, Nodes.StatListNode):
+            node.body.stats = loop_body + node.body.stats
+        else:
+            loop_body.append(node.body)
+            node.body = Nodes.StatListNode(
+                node.body.pos,
+                stats = loop_body)
+
+        node.target = iterable_target
+        node.item = node.item.coerce_to(iterable_target.type, self.current_env())
+        node.iterator.sequence = args[0]
+
+        # recurse into loop to check for further optimisations
+        return UtilNodes.LetNode(temp, self._optimise_for_loop(node, node.iterator.sequence))
+
+    def _find_for_from_node_relations(self, neg_step_value, reversed):
+        if reversed:
+            if neg_step_value:
+                return '<', '<='
+            else:
+                return '>', '>='
+        else:
+            if neg_step_value:
+                return '>=', '>'
+            else:
+                return '<=', '<'
+
+    def _transform_range_iteration(self, node, range_function, reversed=False):
+        args = range_function.arg_tuple.args
+        if len(args) < 3:
+            step_pos = range_function.pos
+            step_value = 1
             step = ExprNodes.IntNode(step_pos, value='1', constant_result=1)
-        else: 
-            step = args[2] 
-            step_pos = step.pos 
+        else:
+            step = args[2]
+            step_pos = step.pos
             if not isinstance(step.constant_result, _py_int_types):
-                # cannot determine step direction 
-                return node 
-            step_value = step.constant_result 
-            if step_value == 0: 
-                # will lead to an error elsewhere 
-                return node 
+                # cannot determine step direction
+                return node
+            step_value = step.constant_result
+            if step_value == 0:
+                # will lead to an error elsewhere
+                return node
             step = ExprNodes.IntNode(step_pos, value=str(step_value),
                                      constant_result=step_value)
- 
-        if len(args) == 1: 
-            bound1 = ExprNodes.IntNode(range_function.pos, value='0', 
-                                       constant_result=0) 
-            bound2 = args[0].coerce_to_integer(self.current_env()) 
-        else: 
-            bound1 = args[0].coerce_to_integer(self.current_env()) 
-            bound2 = args[1].coerce_to_integer(self.current_env()) 
- 
-        relation1, relation2 = self._find_for_from_node_relations(step_value < 0, reversed) 
- 
+
+        if len(args) == 1:
+            bound1 = ExprNodes.IntNode(range_function.pos, value='0',
+                                       constant_result=0)
+            bound2 = args[0].coerce_to_integer(self.current_env())
+        else:
+            bound1 = args[0].coerce_to_integer(self.current_env())
+            bound2 = args[1].coerce_to_integer(self.current_env())
+
+        relation1, relation2 = self._find_for_from_node_relations(step_value < 0, reversed)
+
         bound2_ref_node = None
-        if reversed: 
-            bound1, bound2 = bound2, bound1 
+        if reversed:
+            bound1, bound2 = bound2, bound1
             abs_step = abs(step_value)
             if abs_step != 1:
                 if (isinstance(bound1.constant_result, _py_int_types) and
@@ -773,7 +773,7 @@ class IterationTransform(Visitor.EnvTransform):
                         begin_value = bound1.constant_result
                         end_value = bound2.constant_result
                         bound1_value = end_value + abs_step * ((begin_value - end_value - 1) // abs_step) + 1
- 
+
                     bound1 = ExprNodes.IntNode(
                         bound1.pos, value=str(bound1_value), constant_result=bound1_value,
                         type=PyrexTypes.spanning_type(bound1.type, bound2.type))
@@ -785,32 +785,32 @@ class IterationTransform(Visitor.EnvTransform):
 
         if step_value < 0:
             step_value = -step_value
-        step.value = str(step_value) 
-        step.constant_result = step_value 
-        step = step.coerce_to_integer(self.current_env()) 
- 
-        if not bound2.is_literal: 
-            # stop bound must be immutable => keep it in a temp var 
-            bound2_is_temp = True 
+        step.value = str(step_value)
+        step.constant_result = step_value
+        step = step.coerce_to_integer(self.current_env())
+
+        if not bound2.is_literal:
+            # stop bound must be immutable => keep it in a temp var
+            bound2_is_temp = True
             bound2 = bound2_ref_node or UtilNodes.LetRefNode(bound2)
-        else: 
-            bound2_is_temp = False 
- 
-        for_node = Nodes.ForFromStatNode( 
-            node.pos, 
-            target=node.target, 
-            bound1=bound1, relation1=relation1, 
-            relation2=relation2, bound2=bound2, 
-            step=step, body=node.body, 
-            else_clause=node.else_clause, 
-            from_range=True) 
+        else:
+            bound2_is_temp = False
+
+        for_node = Nodes.ForFromStatNode(
+            node.pos,
+            target=node.target,
+            bound1=bound1, relation1=relation1,
+            relation2=relation2, bound2=bound2,
+            step=step, body=node.body,
+            else_clause=node.else_clause,
+            from_range=True)
         for_node.set_up_loop(self.current_env())
- 
-        if bound2_is_temp: 
-            for_node = UtilNodes.LetNode(bound2, for_node) 
- 
-        return for_node 
- 
+
+        if bound2_is_temp:
+            for_node = UtilNodes.LetNode(bound2, for_node)
+
+        return for_node
+
     def _build_range_step_calculation(self, bound1, bound2_ref_node, step, step_value):
         abs_step = abs(step_value)
         spanning_type = PyrexTypes.spanning_type(bound1.type, bound2_ref_node.type)
@@ -875,114 +875,114 @@ class IterationTransform(Visitor.EnvTransform):
             type=spanning_type)
         return step_calculation_node
 
-    def _transform_dict_iteration(self, node, dict_obj, method, keys, values): 
-        temps = [] 
-        temp = UtilNodes.TempHandle(PyrexTypes.py_object_type) 
-        temps.append(temp) 
-        dict_temp = temp.ref(dict_obj.pos) 
-        temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) 
-        temps.append(temp) 
-        pos_temp = temp.ref(node.pos) 
- 
-        key_target = value_target = tuple_target = None 
-        if keys and values: 
-            if node.target.is_sequence_constructor: 
-                if len(node.target.args) == 2: 
-                    key_target, value_target = node.target.args 
-                else: 
-                    # unusual case that may or may not lead to an error 
-                    return node 
-            else: 
-                tuple_target = node.target 
-        elif keys: 
-            key_target = node.target 
-        else: 
-            value_target = node.target 
- 
-        if isinstance(node.body, Nodes.StatListNode): 
-            body = node.body 
-        else: 
-            body = Nodes.StatListNode(pos = node.body.pos, 
-                                      stats = [node.body]) 
- 
-        # keep original length to guard against dict modification 
-        dict_len_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) 
-        temps.append(dict_len_temp) 
-        dict_len_temp_addr = ExprNodes.AmpersandNode( 
-            node.pos, operand=dict_len_temp.ref(dict_obj.pos), 
-            type=PyrexTypes.c_ptr_type(dict_len_temp.type)) 
-        temp = UtilNodes.TempHandle(PyrexTypes.c_int_type) 
-        temps.append(temp) 
-        is_dict_temp = temp.ref(node.pos) 
-        is_dict_temp_addr = ExprNodes.AmpersandNode( 
-            node.pos, operand=is_dict_temp, 
-            type=PyrexTypes.c_ptr_type(temp.type)) 
- 
-        iter_next_node = Nodes.DictIterationNextNode( 
-            dict_temp, dict_len_temp.ref(dict_obj.pos), pos_temp, 
-            key_target, value_target, tuple_target, 
-            is_dict_temp) 
-        iter_next_node = iter_next_node.analyse_expressions(self.current_env()) 
-        body.stats[0:0] = [iter_next_node] 
- 
-        if method: 
-            method_node = ExprNodes.StringNode( 
-                dict_obj.pos, is_identifier=True, value=method) 
-            dict_obj = dict_obj.as_none_safe_node( 
+    def _transform_dict_iteration(self, node, dict_obj, method, keys, values):
+        temps = []
+        temp = UtilNodes.TempHandle(PyrexTypes.py_object_type)
+        temps.append(temp)
+        dict_temp = temp.ref(dict_obj.pos)
+        temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        temps.append(temp)
+        pos_temp = temp.ref(node.pos)
+
+        key_target = value_target = tuple_target = None
+        if keys and values:
+            if node.target.is_sequence_constructor:
+                if len(node.target.args) == 2:
+                    key_target, value_target = node.target.args
+                else:
+                    # unusual case that may or may not lead to an error
+                    return node
+            else:
+                tuple_target = node.target
+        elif keys:
+            key_target = node.target
+        else:
+            value_target = node.target
+
+        if isinstance(node.body, Nodes.StatListNode):
+            body = node.body
+        else:
+            body = Nodes.StatListNode(pos = node.body.pos,
+                                      stats = [node.body])
+
+        # keep original length to guard against dict modification
+        dict_len_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        temps.append(dict_len_temp)
+        dict_len_temp_addr = ExprNodes.AmpersandNode(
+            node.pos, operand=dict_len_temp.ref(dict_obj.pos),
+            type=PyrexTypes.c_ptr_type(dict_len_temp.type))
+        temp = UtilNodes.TempHandle(PyrexTypes.c_int_type)
+        temps.append(temp)
+        is_dict_temp = temp.ref(node.pos)
+        is_dict_temp_addr = ExprNodes.AmpersandNode(
+            node.pos, operand=is_dict_temp,
+            type=PyrexTypes.c_ptr_type(temp.type))
+
+        iter_next_node = Nodes.DictIterationNextNode(
+            dict_temp, dict_len_temp.ref(dict_obj.pos), pos_temp,
+            key_target, value_target, tuple_target,
+            is_dict_temp)
+        iter_next_node = iter_next_node.analyse_expressions(self.current_env())
+        body.stats[0:0] = [iter_next_node]
+
+        if method:
+            method_node = ExprNodes.StringNode(
+                dict_obj.pos, is_identifier=True, value=method)
+            dict_obj = dict_obj.as_none_safe_node(
                 "'NoneType' object has no attribute '%{0}s'".format('.30' if len(method) <= 30 else ''),
-                error = "PyExc_AttributeError", 
-                format_args = [method]) 
-        else: 
-            method_node = ExprNodes.NullNode(dict_obj.pos) 
-            dict_obj = dict_obj.as_none_safe_node("'NoneType' object is not iterable") 
- 
-        def flag_node(value): 
-            value = value and 1 or 0 
-            return ExprNodes.IntNode(node.pos, value=str(value), constant_result=value) 
- 
-        result_code = [ 
-            Nodes.SingleAssignmentNode( 
-                node.pos, 
-                lhs = pos_temp, 
-                rhs = ExprNodes.IntNode(node.pos, value='0', 
-                                        constant_result=0)), 
-            Nodes.SingleAssignmentNode( 
-                dict_obj.pos, 
-                lhs = dict_temp, 
-                rhs = ExprNodes.PythonCapiCallNode( 
-                    dict_obj.pos, 
-                    "__Pyx_dict_iterator", 
-                    self.PyDict_Iterator_func_type, 
-                    utility_code = UtilityCode.load_cached("dict_iter", "Optimize.c"), 
-                    args = [dict_obj, flag_node(dict_obj.type is Builtin.dict_type), 
-                            method_node, dict_len_temp_addr, is_dict_temp_addr, 
-                            ], 
-                    is_temp=True, 
-                )), 
-            Nodes.WhileStatNode( 
-                node.pos, 
-                condition = None, 
-                body = body, 
-                else_clause = node.else_clause 
-                ) 
-            ] 
- 
-        return UtilNodes.TempsBlockNode( 
-            node.pos, temps=temps, 
-            body=Nodes.StatListNode( 
-                node.pos, 
-                stats = result_code 
-                )) 
- 
-    PyDict_Iterator_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("dict",  PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("is_dict",  PyrexTypes.c_int_type, None), 
-            PyrexTypes.CFuncTypeArg("method_name",  PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("p_orig_length",  PyrexTypes.c_py_ssize_t_ptr_type, None), 
-            PyrexTypes.CFuncTypeArg("p_is_dict",  PyrexTypes.c_int_ptr_type, None), 
-            ]) 
- 
+                error = "PyExc_AttributeError",
+                format_args = [method])
+        else:
+            method_node = ExprNodes.NullNode(dict_obj.pos)
+            dict_obj = dict_obj.as_none_safe_node("'NoneType' object is not iterable")
+
+        def flag_node(value):
+            value = value and 1 or 0
+            return ExprNodes.IntNode(node.pos, value=str(value), constant_result=value)
+
+        result_code = [
+            Nodes.SingleAssignmentNode(
+                node.pos,
+                lhs = pos_temp,
+                rhs = ExprNodes.IntNode(node.pos, value='0',
+                                        constant_result=0)),
+            Nodes.SingleAssignmentNode(
+                dict_obj.pos,
+                lhs = dict_temp,
+                rhs = ExprNodes.PythonCapiCallNode(
+                    dict_obj.pos,
+                    "__Pyx_dict_iterator",
+                    self.PyDict_Iterator_func_type,
+                    utility_code = UtilityCode.load_cached("dict_iter", "Optimize.c"),
+                    args = [dict_obj, flag_node(dict_obj.type is Builtin.dict_type),
+                            method_node, dict_len_temp_addr, is_dict_temp_addr,
+                            ],
+                    is_temp=True,
+                )),
+            Nodes.WhileStatNode(
+                node.pos,
+                condition = None,
+                body = body,
+                else_clause = node.else_clause
+                )
+            ]
+
+        return UtilNodes.TempsBlockNode(
+            node.pos, temps=temps,
+            body=Nodes.StatListNode(
+                node.pos,
+                stats = result_code
+                ))
+
+    PyDict_Iterator_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict",  PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("is_dict",  PyrexTypes.c_int_type, None),
+            PyrexTypes.CFuncTypeArg("method_name",  PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("p_orig_length",  PyrexTypes.c_py_ssize_t_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("p_is_dict",  PyrexTypes.c_int_ptr_type, None),
+            ])
+
     PySet_Iterator_func_type = PyrexTypes.CFuncType(
         PyrexTypes.py_object_type, [
             PyrexTypes.CFuncTypeArg("set",  PyrexTypes.py_object_type, None),
@@ -990,7 +990,7 @@ class IterationTransform(Visitor.EnvTransform):
             PyrexTypes.CFuncTypeArg("p_orig_length",  PyrexTypes.c_py_ssize_t_ptr_type, None),
             PyrexTypes.CFuncTypeArg("p_is_set",  PyrexTypes.c_int_ptr_type, None),
             ])
- 
+
     def _transform_set_iteration(self, node, set_obj):
         temps = []
         temp = UtilNodes.TempHandle(PyrexTypes.py_object_type)
@@ -1063,561 +1063,561 @@ class IterationTransform(Visitor.EnvTransform):
                 ))
 
 
-class SwitchTransform(Visitor.EnvTransform): 
-    """ 
-    This transformation tries to turn long if statements into C switch statements. 
-    The requirement is that every clause be an (or of) var == value, where the var 
-    is common among all clauses and both var and value are ints. 
-    """ 
-    NO_MATCH = (None, None, None) 
- 
-    def extract_conditions(self, cond, allow_not_in): 
-        while True: 
-            if isinstance(cond, (ExprNodes.CoerceToTempNode, 
-                                 ExprNodes.CoerceToBooleanNode)): 
-                cond = cond.arg 
-            elif isinstance(cond, ExprNodes.BoolBinopResultNode): 
-                cond = cond.arg.arg 
-            elif isinstance(cond, UtilNodes.EvalWithTempExprNode): 
-                # this is what we get from the FlattenInListTransform 
-                cond = cond.subexpression 
-            elif isinstance(cond, ExprNodes.TypecastNode): 
-                cond = cond.operand 
-            else: 
-                break 
- 
-        if isinstance(cond, ExprNodes.PrimaryCmpNode): 
-            if cond.cascade is not None: 
-                return self.NO_MATCH 
-            elif cond.is_c_string_contains() and \ 
-                   isinstance(cond.operand2, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)): 
-                not_in = cond.operator == 'not_in' 
-                if not_in and not allow_not_in: 
-                    return self.NO_MATCH 
-                if isinstance(cond.operand2, ExprNodes.UnicodeNode) and \ 
-                       cond.operand2.contains_surrogates(): 
-                    # dealing with surrogates leads to different 
-                    # behaviour on wide and narrow Unicode 
-                    # platforms => refuse to optimise this case 
-                    return self.NO_MATCH 
-                return not_in, cond.operand1, self.extract_in_string_conditions(cond.operand2) 
-            elif not cond.is_python_comparison(): 
-                if cond.operator == '==': 
-                    not_in = False 
-                elif allow_not_in and cond.operator == '!=': 
-                    not_in = True 
-                else: 
-                    return self.NO_MATCH 
-                # this looks somewhat silly, but it does the right 
-                # checks for NameNode and AttributeNode 
-                if is_common_value(cond.operand1, cond.operand1): 
-                    if cond.operand2.is_literal: 
-                        return not_in, cond.operand1, [cond.operand2] 
-                    elif getattr(cond.operand2, 'entry', None) \ 
-                             and cond.operand2.entry.is_const: 
-                        return not_in, cond.operand1, [cond.operand2] 
-                if is_common_value(cond.operand2, cond.operand2): 
-                    if cond.operand1.is_literal: 
-                        return not_in, cond.operand2, [cond.operand1] 
-                    elif getattr(cond.operand1, 'entry', None) \ 
-                             and cond.operand1.entry.is_const: 
-                        return not_in, cond.operand2, [cond.operand1] 
-        elif isinstance(cond, ExprNodes.BoolBinopNode): 
-            if cond.operator == 'or' or (allow_not_in and cond.operator == 'and'): 
-                allow_not_in = (cond.operator == 'and') 
-                not_in_1, t1, c1 = self.extract_conditions(cond.operand1, allow_not_in) 
-                not_in_2, t2, c2 = self.extract_conditions(cond.operand2, allow_not_in) 
-                if t1 is not None and not_in_1 == not_in_2 and is_common_value(t1, t2): 
-                    if (not not_in_1) or allow_not_in: 
-                        return not_in_1, t1, c1+c2 
-        return self.NO_MATCH 
- 
-    def extract_in_string_conditions(self, string_literal): 
-        if isinstance(string_literal, ExprNodes.UnicodeNode): 
-            charvals = list(map(ord, set(string_literal.value))) 
-            charvals.sort() 
-            return [ ExprNodes.IntNode(string_literal.pos, value=str(charval), 
-                                       constant_result=charval) 
-                     for charval in charvals ] 
-        else: 
-            # this is a bit tricky as Py3's bytes type returns 
-            # integers on iteration, whereas Py2 returns 1-char byte 
-            # strings 
-            characters = string_literal.value 
-            characters = list(set([ characters[i:i+1] for i in range(len(characters)) ])) 
-            characters.sort() 
-            return [ ExprNodes.CharNode(string_literal.pos, value=charval, 
-                                        constant_result=charval) 
-                     for charval in characters ] 
- 
-    def extract_common_conditions(self, common_var, condition, allow_not_in): 
-        not_in, var, conditions = self.extract_conditions(condition, allow_not_in) 
-        if var is None: 
-            return self.NO_MATCH 
-        elif common_var is not None and not is_common_value(var, common_var): 
-            return self.NO_MATCH 
-        elif not (var.type.is_int or var.type.is_enum) or sum([not (cond.type.is_int or cond.type.is_enum) for cond in conditions]): 
-            return self.NO_MATCH 
-        return not_in, var, conditions 
- 
-    def has_duplicate_values(self, condition_values): 
-        # duplicated values don't work in a switch statement 
-        seen = set() 
-        for value in condition_values: 
-            if value.has_constant_result(): 
-                if value.constant_result in seen: 
-                    return True 
-                seen.add(value.constant_result) 
-            else: 
-                # this isn't completely safe as we don't know the 
-                # final C value, but this is about the best we can do 
-                try: 
-                    if value.entry.cname in seen: 
-                        return True 
-                except AttributeError: 
-                    return True  # play safe 
-                seen.add(value.entry.cname) 
-        return False 
- 
-    def visit_IfStatNode(self, node): 
-        if not self.current_directives.get('optimize.use_switch'): 
-            self.visitchildren(node) 
-            return node 
- 
-        common_var = None 
-        cases = [] 
-        for if_clause in node.if_clauses: 
-            _, common_var, conditions = self.extract_common_conditions( 
-                common_var, if_clause.condition, False) 
-            if common_var is None: 
-                self.visitchildren(node) 
-                return node 
+class SwitchTransform(Visitor.EnvTransform):
+    """
+    This transformation tries to turn long if statements into C switch statements.
+    The requirement is that every clause be an (or of) var == value, where the var
+    is common among all clauses and both var and value are ints.
+    """
+    NO_MATCH = (None, None, None)
+
+    def extract_conditions(self, cond, allow_not_in):
+        while True:
+            if isinstance(cond, (ExprNodes.CoerceToTempNode,
+                                 ExprNodes.CoerceToBooleanNode)):
+                cond = cond.arg
+            elif isinstance(cond, ExprNodes.BoolBinopResultNode):
+                cond = cond.arg.arg
+            elif isinstance(cond, UtilNodes.EvalWithTempExprNode):
+                # this is what we get from the FlattenInListTransform
+                cond = cond.subexpression
+            elif isinstance(cond, ExprNodes.TypecastNode):
+                cond = cond.operand
+            else:
+                break
+
+        if isinstance(cond, ExprNodes.PrimaryCmpNode):
+            if cond.cascade is not None:
+                return self.NO_MATCH
+            elif cond.is_c_string_contains() and \
+                   isinstance(cond.operand2, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)):
+                not_in = cond.operator == 'not_in'
+                if not_in and not allow_not_in:
+                    return self.NO_MATCH
+                if isinstance(cond.operand2, ExprNodes.UnicodeNode) and \
+                       cond.operand2.contains_surrogates():
+                    # dealing with surrogates leads to different
+                    # behaviour on wide and narrow Unicode
+                    # platforms => refuse to optimise this case
+                    return self.NO_MATCH
+                return not_in, cond.operand1, self.extract_in_string_conditions(cond.operand2)
+            elif not cond.is_python_comparison():
+                if cond.operator == '==':
+                    not_in = False
+                elif allow_not_in and cond.operator == '!=':
+                    not_in = True
+                else:
+                    return self.NO_MATCH
+                # this looks somewhat silly, but it does the right
+                # checks for NameNode and AttributeNode
+                if is_common_value(cond.operand1, cond.operand1):
+                    if cond.operand2.is_literal:
+                        return not_in, cond.operand1, [cond.operand2]
+                    elif getattr(cond.operand2, 'entry', None) \
+                             and cond.operand2.entry.is_const:
+                        return not_in, cond.operand1, [cond.operand2]
+                if is_common_value(cond.operand2, cond.operand2):
+                    if cond.operand1.is_literal:
+                        return not_in, cond.operand2, [cond.operand1]
+                    elif getattr(cond.operand1, 'entry', None) \
+                             and cond.operand1.entry.is_const:
+                        return not_in, cond.operand2, [cond.operand1]
+        elif isinstance(cond, ExprNodes.BoolBinopNode):
+            if cond.operator == 'or' or (allow_not_in and cond.operator == 'and'):
+                allow_not_in = (cond.operator == 'and')
+                not_in_1, t1, c1 = self.extract_conditions(cond.operand1, allow_not_in)
+                not_in_2, t2, c2 = self.extract_conditions(cond.operand2, allow_not_in)
+                if t1 is not None and not_in_1 == not_in_2 and is_common_value(t1, t2):
+                    if (not not_in_1) or allow_not_in:
+                        return not_in_1, t1, c1+c2
+        return self.NO_MATCH
+
+    def extract_in_string_conditions(self, string_literal):
+        if isinstance(string_literal, ExprNodes.UnicodeNode):
+            charvals = list(map(ord, set(string_literal.value)))
+            charvals.sort()
+            return [ ExprNodes.IntNode(string_literal.pos, value=str(charval),
+                                       constant_result=charval)
+                     for charval in charvals ]
+        else:
+            # this is a bit tricky as Py3's bytes type returns
+            # integers on iteration, whereas Py2 returns 1-char byte
+            # strings
+            characters = string_literal.value
+            characters = list(set([ characters[i:i+1] for i in range(len(characters)) ]))
+            characters.sort()
+            return [ ExprNodes.CharNode(string_literal.pos, value=charval,
+                                        constant_result=charval)
+                     for charval in characters ]
+
+    def extract_common_conditions(self, common_var, condition, allow_not_in):
+        not_in, var, conditions = self.extract_conditions(condition, allow_not_in)
+        if var is None:
+            return self.NO_MATCH
+        elif common_var is not None and not is_common_value(var, common_var):
+            return self.NO_MATCH
+        elif not (var.type.is_int or var.type.is_enum) or sum([not (cond.type.is_int or cond.type.is_enum) for cond in conditions]):
+            return self.NO_MATCH
+        return not_in, var, conditions
+
+    def has_duplicate_values(self, condition_values):
+        # duplicated values don't work in a switch statement
+        seen = set()
+        for value in condition_values:
+            if value.has_constant_result():
+                if value.constant_result in seen:
+                    return True
+                seen.add(value.constant_result)
+            else:
+                # this isn't completely safe as we don't know the
+                # final C value, but this is about the best we can do
+                try:
+                    if value.entry.cname in seen:
+                        return True
+                except AttributeError:
+                    return True  # play safe
+                seen.add(value.entry.cname)
+        return False
+
+    def visit_IfStatNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        common_var = None
+        cases = []
+        for if_clause in node.if_clauses:
+            _, common_var, conditions = self.extract_common_conditions(
+                common_var, if_clause.condition, False)
+            if common_var is None:
+                self.visitchildren(node)
+                return node
             cases.append(Nodes.SwitchCaseNode(pos=if_clause.pos,
                                               conditions=conditions,
                                               body=if_clause.body))
- 
-        condition_values = [ 
-            cond for case in cases for cond in case.conditions] 
-        if len(condition_values) < 2: 
-            self.visitchildren(node) 
-            return node 
-        if self.has_duplicate_values(condition_values): 
-            self.visitchildren(node) 
-            return node 
- 
+
+        condition_values = [
+            cond for case in cases for cond in case.conditions]
+        if len(condition_values) < 2:
+            self.visitchildren(node)
+            return node
+        if self.has_duplicate_values(condition_values):
+            self.visitchildren(node)
+            return node
+
         # Recurse into body subtrees that we left untouched so far.
         self.visitchildren(node, 'else_clause')
         for case in cases:
             self.visitchildren(case, 'body')
 
-        common_var = unwrap_node(common_var) 
+        common_var = unwrap_node(common_var)
         switch_node = Nodes.SwitchStatNode(pos=node.pos,
                                            test=common_var,
                                            cases=cases,
                                            else_clause=node.else_clause)
-        return switch_node 
- 
-    def visit_CondExprNode(self, node): 
-        if not self.current_directives.get('optimize.use_switch'): 
-            self.visitchildren(node) 
-            return node 
- 
-        not_in, common_var, conditions = self.extract_common_conditions( 
-            None, node.test, True) 
-        if common_var is None \ 
+        return switch_node
+
+    def visit_CondExprNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        not_in, common_var, conditions = self.extract_common_conditions(
+            None, node.test, True)
+        if common_var is None \
                 or len(conditions) < 2 \
                 or self.has_duplicate_values(conditions):
-            self.visitchildren(node) 
-            return node 
-
-        return self.build_simple_switch_statement( 
-            node, common_var, conditions, not_in, 
-            node.true_val, node.false_val) 
- 
-    def visit_BoolBinopNode(self, node): 
-        if not self.current_directives.get('optimize.use_switch'): 
-            self.visitchildren(node) 
-            return node 
- 
-        not_in, common_var, conditions = self.extract_common_conditions( 
-            None, node, True) 
-        if common_var is None \ 
+            self.visitchildren(node)
+            return node
+
+        return self.build_simple_switch_statement(
+            node, common_var, conditions, not_in,
+            node.true_val, node.false_val)
+
+    def visit_BoolBinopNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        not_in, common_var, conditions = self.extract_common_conditions(
+            None, node, True)
+        if common_var is None \
                 or len(conditions) < 2 \
                 or self.has_duplicate_values(conditions):
-            self.visitchildren(node) 
-            node.wrap_operands(self.current_env())  # in case we changed the operands 
-            return node 
- 
-        return self.build_simple_switch_statement( 
-            node, common_var, conditions, not_in, 
-            ExprNodes.BoolNode(node.pos, value=True, constant_result=True), 
-            ExprNodes.BoolNode(node.pos, value=False, constant_result=False)) 
- 
-    def visit_PrimaryCmpNode(self, node): 
-        if not self.current_directives.get('optimize.use_switch'): 
-            self.visitchildren(node) 
-            return node 
- 
-        not_in, common_var, conditions = self.extract_common_conditions( 
-            None, node, True) 
-        if common_var is None \ 
+            self.visitchildren(node)
+            node.wrap_operands(self.current_env())  # in case we changed the operands
+            return node
+
+        return self.build_simple_switch_statement(
+            node, common_var, conditions, not_in,
+            ExprNodes.BoolNode(node.pos, value=True, constant_result=True),
+            ExprNodes.BoolNode(node.pos, value=False, constant_result=False))
+
+    def visit_PrimaryCmpNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        not_in, common_var, conditions = self.extract_common_conditions(
+            None, node, True)
+        if common_var is None \
                 or len(conditions) < 2 \
                 or self.has_duplicate_values(conditions):
-            self.visitchildren(node) 
-            return node 
- 
-        return self.build_simple_switch_statement( 
-            node, common_var, conditions, not_in, 
-            ExprNodes.BoolNode(node.pos, value=True, constant_result=True), 
-            ExprNodes.BoolNode(node.pos, value=False, constant_result=False)) 
- 
-    def build_simple_switch_statement(self, node, common_var, conditions, 
-                                      not_in, true_val, false_val): 
-        result_ref = UtilNodes.ResultRefNode(node) 
-        true_body = Nodes.SingleAssignmentNode( 
-            node.pos, 
-            lhs=result_ref, 
-            rhs=true_val.coerce_to(node.type, self.current_env()), 
-            first=True) 
-        false_body = Nodes.SingleAssignmentNode( 
-            node.pos, 
-            lhs=result_ref, 
-            rhs=false_val.coerce_to(node.type, self.current_env()), 
-            first=True) 
- 
-        if not_in: 
-            true_body, false_body = false_body, true_body 
- 
-        cases = [Nodes.SwitchCaseNode(pos = node.pos, 
-                                      conditions = conditions, 
-                                      body = true_body)] 
- 
-        common_var = unwrap_node(common_var) 
-        switch_node = Nodes.SwitchStatNode(pos = node.pos, 
-                                           test = common_var, 
-                                           cases = cases, 
-                                           else_clause = false_body) 
-        replacement = UtilNodes.TempResultFromStatNode(result_ref, switch_node) 
-        return replacement 
- 
-    def visit_EvalWithTempExprNode(self, node): 
-        if not self.current_directives.get('optimize.use_switch'): 
-            self.visitchildren(node) 
-            return node 
- 
-        # drop unused expression temp from FlattenInListTransform 
-        orig_expr = node.subexpression 
-        temp_ref = node.lazy_temp 
-        self.visitchildren(node) 
-        if node.subexpression is not orig_expr: 
-            # node was restructured => check if temp is still used 
-            if not Visitor.tree_contains(node.subexpression, temp_ref): 
-                return node.subexpression 
-        return node 
- 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
- 
-class FlattenInListTransform(Visitor.VisitorTransform, SkipDeclarations): 
-    """ 
-    This transformation flattens "x in [val1, ..., valn]" into a sequential list 
-    of comparisons. 
-    """ 
- 
-    def visit_PrimaryCmpNode(self, node): 
-        self.visitchildren(node) 
-        if node.cascade is not None: 
-            return node 
-        elif node.operator == 'in': 
-            conjunction = 'or' 
-            eq_or_neq = '==' 
-        elif node.operator == 'not_in': 
-            conjunction = 'and' 
-            eq_or_neq = '!=' 
-        else: 
-            return node 
- 
-        if not isinstance(node.operand2, (ExprNodes.TupleNode, 
-                                          ExprNodes.ListNode, 
-                                          ExprNodes.SetNode)): 
-            return node 
- 
-        args = node.operand2.args 
-        if len(args) == 0: 
-            # note: lhs may have side effects 
-            return node 
- 
+            self.visitchildren(node)
+            return node
+
+        return self.build_simple_switch_statement(
+            node, common_var, conditions, not_in,
+            ExprNodes.BoolNode(node.pos, value=True, constant_result=True),
+            ExprNodes.BoolNode(node.pos, value=False, constant_result=False))
+
+    def build_simple_switch_statement(self, node, common_var, conditions,
+                                      not_in, true_val, false_val):
+        result_ref = UtilNodes.ResultRefNode(node)
+        true_body = Nodes.SingleAssignmentNode(
+            node.pos,
+            lhs=result_ref,
+            rhs=true_val.coerce_to(node.type, self.current_env()),
+            first=True)
+        false_body = Nodes.SingleAssignmentNode(
+            node.pos,
+            lhs=result_ref,
+            rhs=false_val.coerce_to(node.type, self.current_env()),
+            first=True)
+
+        if not_in:
+            true_body, false_body = false_body, true_body
+
+        cases = [Nodes.SwitchCaseNode(pos = node.pos,
+                                      conditions = conditions,
+                                      body = true_body)]
+
+        common_var = unwrap_node(common_var)
+        switch_node = Nodes.SwitchStatNode(pos = node.pos,
+                                           test = common_var,
+                                           cases = cases,
+                                           else_clause = false_body)
+        replacement = UtilNodes.TempResultFromStatNode(result_ref, switch_node)
+        return replacement
+
+    def visit_EvalWithTempExprNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        # drop unused expression temp from FlattenInListTransform
+        orig_expr = node.subexpression
+        temp_ref = node.lazy_temp
+        self.visitchildren(node)
+        if node.subexpression is not orig_expr:
+            # node was restructured => check if temp is still used
+            if not Visitor.tree_contains(node.subexpression, temp_ref):
+                return node.subexpression
+        return node
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+
+class FlattenInListTransform(Visitor.VisitorTransform, SkipDeclarations):
+    """
+    This transformation flattens "x in [val1, ..., valn]" into a sequential list
+    of comparisons.
+    """
+
+    def visit_PrimaryCmpNode(self, node):
+        self.visitchildren(node)
+        if node.cascade is not None:
+            return node
+        elif node.operator == 'in':
+            conjunction = 'or'
+            eq_or_neq = '=='
+        elif node.operator == 'not_in':
+            conjunction = 'and'
+            eq_or_neq = '!='
+        else:
+            return node
+
+        if not isinstance(node.operand2, (ExprNodes.TupleNode,
+                                          ExprNodes.ListNode,
+                                          ExprNodes.SetNode)):
+            return node
+
+        args = node.operand2.args
+        if len(args) == 0:
+            # note: lhs may have side effects
+            return node
+
         if any([arg.is_starred for arg in args]):
             # Starred arguments do not directly translate to comparisons or "in" tests.
             return node
 
-        lhs = UtilNodes.ResultRefNode(node.operand1) 
- 
-        conds = [] 
-        temps = [] 
-        for arg in args: 
-            try: 
-                # Trial optimisation to avoid redundant temp 
-                # assignments.  However, since is_simple() is meant to 
-                # be called after type analysis, we ignore any errors 
-                # and just play safe in that case. 
-                is_simple_arg = arg.is_simple() 
-            except Exception: 
-                is_simple_arg = False 
-            if not is_simple_arg: 
-                # must evaluate all non-simple RHS before doing the comparisons 
-                arg = UtilNodes.LetRefNode(arg) 
-                temps.append(arg) 
-            cond = ExprNodes.PrimaryCmpNode( 
-                                pos = node.pos, 
-                                operand1 = lhs, 
-                                operator = eq_or_neq, 
-                                operand2 = arg, 
-                                cascade = None) 
-            conds.append(ExprNodes.TypecastNode( 
-                                pos = node.pos, 
-                                operand = cond, 
-                                type = PyrexTypes.c_bint_type)) 
-        def concat(left, right): 
-            return ExprNodes.BoolBinopNode( 
-                                pos = node.pos, 
-                                operator = conjunction, 
-                                operand1 = left, 
-                                operand2 = right) 
- 
-        condition = reduce(concat, conds) 
-        new_node = UtilNodes.EvalWithTempExprNode(lhs, condition) 
-        for temp in temps[::-1]: 
-            new_node = UtilNodes.EvalWithTempExprNode(temp, new_node) 
-        return new_node 
- 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
- 
-class DropRefcountingTransform(Visitor.VisitorTransform): 
-    """Drop ref-counting in safe places. 
-    """ 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
-    def visit_ParallelAssignmentNode(self, node): 
-        """ 
-        Parallel swap assignments like 'a,b = b,a' are safe. 
-        """ 
-        left_names, right_names = [], [] 
-        left_indices, right_indices = [], [] 
-        temps = [] 
- 
-        for stat in node.stats: 
-            if isinstance(stat, Nodes.SingleAssignmentNode): 
-                if not self._extract_operand(stat.lhs, left_names, 
-                                             left_indices, temps): 
-                    return node 
-                if not self._extract_operand(stat.rhs, right_names, 
-                                             right_indices, temps): 
-                    return node 
-            elif isinstance(stat, Nodes.CascadedAssignmentNode): 
-                # FIXME 
-                return node 
-            else: 
-                return node 
- 
-        if left_names or right_names: 
-            # lhs/rhs names must be a non-redundant permutation 
-            lnames = [ path for path, n in left_names ] 
-            rnames = [ path for path, n in right_names ] 
-            if set(lnames) != set(rnames): 
-                return node 
-            if len(set(lnames)) != len(right_names): 
-                return node 
- 
-        if left_indices or right_indices: 
-            # base name and index of index nodes must be a 
-            # non-redundant permutation 
-            lindices = [] 
-            for lhs_node in left_indices: 
-                index_id = self._extract_index_id(lhs_node) 
-                if not index_id: 
-                    return node 
-                lindices.append(index_id) 
-            rindices = [] 
-            for rhs_node in right_indices: 
-                index_id = self._extract_index_id(rhs_node) 
-                if not index_id: 
-                    return node 
-                rindices.append(index_id) 
- 
-            if set(lindices) != set(rindices): 
-                return node 
-            if len(set(lindices)) != len(right_indices): 
-                return node 
- 
-            # really supporting IndexNode requires support in 
-            # __Pyx_GetItemInt(), so let's stop short for now 
-            return node 
- 
-        temp_args = [t.arg for t in temps] 
-        for temp in temps: 
-            temp.use_managed_ref = False 
- 
-        for _, name_node in left_names + right_names: 
-            if name_node not in temp_args: 
-                name_node.use_managed_ref = False 
- 
-        for index_node in left_indices + right_indices: 
-            index_node.use_managed_ref = False 
- 
-        return node 
- 
-    def _extract_operand(self, node, names, indices, temps): 
-        node = unwrap_node(node) 
-        if not node.type.is_pyobject: 
-            return False 
-        if isinstance(node, ExprNodes.CoerceToTempNode): 
-            temps.append(node) 
-            node = node.arg 
-        name_path = [] 
-        obj_node = node 
+        lhs = UtilNodes.ResultRefNode(node.operand1)
+
+        conds = []
+        temps = []
+        for arg in args:
+            try:
+                # Trial optimisation to avoid redundant temp
+                # assignments.  However, since is_simple() is meant to
+                # be called after type analysis, we ignore any errors
+                # and just play safe in that case.
+                is_simple_arg = arg.is_simple()
+            except Exception:
+                is_simple_arg = False
+            if not is_simple_arg:
+                # must evaluate all non-simple RHS before doing the comparisons
+                arg = UtilNodes.LetRefNode(arg)
+                temps.append(arg)
+            cond = ExprNodes.PrimaryCmpNode(
+                                pos = node.pos,
+                                operand1 = lhs,
+                                operator = eq_or_neq,
+                                operand2 = arg,
+                                cascade = None)
+            conds.append(ExprNodes.TypecastNode(
+                                pos = node.pos,
+                                operand = cond,
+                                type = PyrexTypes.c_bint_type))
+        def concat(left, right):
+            return ExprNodes.BoolBinopNode(
+                                pos = node.pos,
+                                operator = conjunction,
+                                operand1 = left,
+                                operand2 = right)
+
+        condition = reduce(concat, conds)
+        new_node = UtilNodes.EvalWithTempExprNode(lhs, condition)
+        for temp in temps[::-1]:
+            new_node = UtilNodes.EvalWithTempExprNode(temp, new_node)
+        return new_node
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+
+class DropRefcountingTransform(Visitor.VisitorTransform):
+    """Drop ref-counting in safe places.
+    """
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def visit_ParallelAssignmentNode(self, node):
+        """
+        Parallel swap assignments like 'a,b = b,a' are safe.
+        """
+        left_names, right_names = [], []
+        left_indices, right_indices = [], []
+        temps = []
+
+        for stat in node.stats:
+            if isinstance(stat, Nodes.SingleAssignmentNode):
+                if not self._extract_operand(stat.lhs, left_names,
+                                             left_indices, temps):
+                    return node
+                if not self._extract_operand(stat.rhs, right_names,
+                                             right_indices, temps):
+                    return node
+            elif isinstance(stat, Nodes.CascadedAssignmentNode):
+                # FIXME
+                return node
+            else:
+                return node
+
+        if left_names or right_names:
+            # lhs/rhs names must be a non-redundant permutation
+            lnames = [ path for path, n in left_names ]
+            rnames = [ path for path, n in right_names ]
+            if set(lnames) != set(rnames):
+                return node
+            if len(set(lnames)) != len(right_names):
+                return node
+
+        if left_indices or right_indices:
+            # base name and index of index nodes must be a
+            # non-redundant permutation
+            lindices = []
+            for lhs_node in left_indices:
+                index_id = self._extract_index_id(lhs_node)
+                if not index_id:
+                    return node
+                lindices.append(index_id)
+            rindices = []
+            for rhs_node in right_indices:
+                index_id = self._extract_index_id(rhs_node)
+                if not index_id:
+                    return node
+                rindices.append(index_id)
+
+            if set(lindices) != set(rindices):
+                return node
+            if len(set(lindices)) != len(right_indices):
+                return node
+
+            # really supporting IndexNode requires support in
+            # __Pyx_GetItemInt(), so let's stop short for now
+            return node
+
+        temp_args = [t.arg for t in temps]
+        for temp in temps:
+            temp.use_managed_ref = False
+
+        for _, name_node in left_names + right_names:
+            if name_node not in temp_args:
+                name_node.use_managed_ref = False
+
+        for index_node in left_indices + right_indices:
+            index_node.use_managed_ref = False
+
+        return node
+
+    def _extract_operand(self, node, names, indices, temps):
+        node = unwrap_node(node)
+        if not node.type.is_pyobject:
+            return False
+        if isinstance(node, ExprNodes.CoerceToTempNode):
+            temps.append(node)
+            node = node.arg
+        name_path = []
+        obj_node = node
         while obj_node.is_attribute:
-            if obj_node.is_py_attr: 
-                return False 
-            name_path.append(obj_node.member) 
-            obj_node = obj_node.obj 
+            if obj_node.is_py_attr:
+                return False
+            name_path.append(obj_node.member)
+            obj_node = obj_node.obj
         if obj_node.is_name:
-            name_path.append(obj_node.name) 
-            names.append( ('.'.join(name_path[::-1]), node) ) 
+            name_path.append(obj_node.name)
+            names.append( ('.'.join(name_path[::-1]), node) )
         elif node.is_subscript:
-            if node.base.type != Builtin.list_type: 
-                return False 
-            if not node.index.type.is_int: 
-                return False 
+            if node.base.type != Builtin.list_type:
+                return False
+            if not node.index.type.is_int:
+                return False
             if not node.base.is_name:
-                return False 
-            indices.append(node) 
-        else: 
-            return False 
-        return True 
- 
-    def _extract_index_id(self, index_node): 
-        base = index_node.base 
-        index = index_node.index 
-        if isinstance(index, ExprNodes.NameNode): 
-            index_val = index.name 
-        elif isinstance(index, ExprNodes.ConstNode): 
-            # FIXME: 
-            return None 
-        else: 
-            return None 
-        return (base.name, index_val) 
- 
- 
-class EarlyReplaceBuiltinCalls(Visitor.EnvTransform): 
-    """Optimize some common calls to builtin types *before* the type 
-    analysis phase and *after* the declarations analysis phase. 
- 
-    This transform cannot make use of any argument types, but it can 
-    restructure the tree in a way that the type analysis phase can 
-    respond to. 
- 
-    Introducing C function calls here may not be a good idea.  Move 
-    them to the OptimizeBuiltinCalls transform instead, which runs 
-    after type analysis. 
-    """ 
-    # only intercept on call nodes 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
-    def visit_SimpleCallNode(self, node): 
-        self.visitchildren(node) 
-        function = node.function 
-        if not self._function_is_builtin_name(function): 
-            return node 
-        return self._dispatch_to_handler(node, function, node.args) 
- 
-    def visit_GeneralCallNode(self, node): 
-        self.visitchildren(node) 
-        function = node.function 
-        if not self._function_is_builtin_name(function): 
-            return node 
-        arg_tuple = node.positional_args 
-        if not isinstance(arg_tuple, ExprNodes.TupleNode): 
-            return node 
-        args = arg_tuple.args 
-        return self._dispatch_to_handler( 
-            node, function, args, node.keyword_args) 
- 
-    def _function_is_builtin_name(self, function): 
-        if not function.is_name: 
-            return False 
-        env = self.current_env() 
-        entry = env.lookup(function.name) 
-        if entry is not env.builtin_scope().lookup_here(function.name): 
-            return False 
-        # if entry is None, it's at least an undeclared name, so likely builtin 
-        return True 
- 
-    def _dispatch_to_handler(self, node, function, args, kwargs=None): 
-        if kwargs is None: 
-            handler_name = '_handle_simple_function_%s' % function.name 
-        else: 
-            handler_name = '_handle_general_function_%s' % function.name 
-        handle_call = getattr(self, handler_name, None) 
-        if handle_call is not None: 
-            if kwargs is None: 
-                return handle_call(node, args) 
-            else: 
-                return handle_call(node, args, kwargs) 
-        return node 
- 
-    def _inject_capi_function(self, node, cname, func_type, utility_code=None): 
-        node.function = ExprNodes.PythonCapiFunctionNode( 
-            node.function.pos, node.function.name, cname, func_type, 
-            utility_code = utility_code) 
- 
-    def _error_wrong_arg_count(self, function_name, node, args, expected=None): 
-        if not expected: # None or 0 
-            arg_str = '' 
-        elif isinstance(expected, basestring) or expected > 1: 
-            arg_str = '...' 
-        elif expected == 1: 
-            arg_str = 'x' 
-        else: 
-            arg_str = '' 
-        if expected is not None: 
-            expected_str = 'expected %s, ' % expected 
-        else: 
-            expected_str = '' 
-        error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % ( 
-            function_name, arg_str, expected_str, len(args))) 
- 
-    # specific handlers for simple call nodes 
- 
-    def _handle_simple_function_float(self, node, pos_args): 
-        if not pos_args: 
-            return ExprNodes.FloatNode(node.pos, value='0.0') 
-        if len(pos_args) > 1: 
-            self._error_wrong_arg_count('float', node, pos_args, 1) 
-        arg_type = getattr(pos_args[0], 'type', None) 
-        if arg_type in (PyrexTypes.c_double_type, Builtin.float_type): 
-            return pos_args[0] 
-        return node 
- 
-    def _handle_simple_function_slice(self, node, pos_args): 
-        arg_count = len(pos_args) 
-        start = step = None 
-        if arg_count == 1: 
-            stop, = pos_args 
-        elif arg_count == 2: 
-            start, stop = pos_args 
-        elif arg_count == 3: 
-            start, stop, step = pos_args 
-        else: 
-            self._error_wrong_arg_count('slice', node, pos_args) 
-            return node 
-        return ExprNodes.SliceNode( 
-            node.pos, 
-            start=start or ExprNodes.NoneNode(node.pos), 
-            stop=stop, 
-            step=step or ExprNodes.NoneNode(node.pos)) 
- 
+                return False
+            indices.append(node)
+        else:
+            return False
+        return True
+
+    def _extract_index_id(self, index_node):
+        base = index_node.base
+        index = index_node.index
+        if isinstance(index, ExprNodes.NameNode):
+            index_val = index.name
+        elif isinstance(index, ExprNodes.ConstNode):
+            # FIXME:
+            return None
+        else:
+            return None
+        return (base.name, index_val)
+
+
+class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
+    """Optimize some common calls to builtin types *before* the type
+    analysis phase and *after* the declarations analysis phase.
+
+    This transform cannot make use of any argument types, but it can
+    restructure the tree in a way that the type analysis phase can
+    respond to.
+
+    Introducing C function calls here may not be a good idea.  Move
+    them to the OptimizeBuiltinCalls transform instead, which runs
+    after type analysis.
+    """
+    # only intercept on call nodes
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def visit_SimpleCallNode(self, node):
+        self.visitchildren(node)
+        function = node.function
+        if not self._function_is_builtin_name(function):
+            return node
+        return self._dispatch_to_handler(node, function, node.args)
+
+    def visit_GeneralCallNode(self, node):
+        self.visitchildren(node)
+        function = node.function
+        if not self._function_is_builtin_name(function):
+            return node
+        arg_tuple = node.positional_args
+        if not isinstance(arg_tuple, ExprNodes.TupleNode):
+            return node
+        args = arg_tuple.args
+        return self._dispatch_to_handler(
+            node, function, args, node.keyword_args)
+
+    def _function_is_builtin_name(self, function):
+        if not function.is_name:
+            return False
+        env = self.current_env()
+        entry = env.lookup(function.name)
+        if entry is not env.builtin_scope().lookup_here(function.name):
+            return False
+        # if entry is None, it's at least an undeclared name, so likely builtin
+        return True
+
+    def _dispatch_to_handler(self, node, function, args, kwargs=None):
+        if kwargs is None:
+            handler_name = '_handle_simple_function_%s' % function.name
+        else:
+            handler_name = '_handle_general_function_%s' % function.name
+        handle_call = getattr(self, handler_name, None)
+        if handle_call is not None:
+            if kwargs is None:
+                return handle_call(node, args)
+            else:
+                return handle_call(node, args, kwargs)
+        return node
+
+    def _inject_capi_function(self, node, cname, func_type, utility_code=None):
+        node.function = ExprNodes.PythonCapiFunctionNode(
+            node.function.pos, node.function.name, cname, func_type,
+            utility_code = utility_code)
+
+    def _error_wrong_arg_count(self, function_name, node, args, expected=None):
+        if not expected: # None or 0
+            arg_str = ''
+        elif isinstance(expected, basestring) or expected > 1:
+            arg_str = '...'
+        elif expected == 1:
+            arg_str = 'x'
+        else:
+            arg_str = ''
+        if expected is not None:
+            expected_str = 'expected %s, ' % expected
+        else:
+            expected_str = ''
+        error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % (
+            function_name, arg_str, expected_str, len(args)))
+
+    # specific handlers for simple call nodes
+
+    def _handle_simple_function_float(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.FloatNode(node.pos, value='0.0')
+        if len(pos_args) > 1:
+            self._error_wrong_arg_count('float', node, pos_args, 1)
+        arg_type = getattr(pos_args[0], 'type', None)
+        if arg_type in (PyrexTypes.c_double_type, Builtin.float_type):
+            return pos_args[0]
+        return node
+
+    def _handle_simple_function_slice(self, node, pos_args):
+        arg_count = len(pos_args)
+        start = step = None
+        if arg_count == 1:
+            stop, = pos_args
+        elif arg_count == 2:
+            start, stop = pos_args
+        elif arg_count == 3:
+            start, stop, step = pos_args
+        else:
+            self._error_wrong_arg_count('slice', node, pos_args)
+            return node
+        return ExprNodes.SliceNode(
+            node.pos,
+            start=start or ExprNodes.NoneNode(node.pos),
+            stop=stop,
+            step=step or ExprNodes.NoneNode(node.pos))
+
     def _handle_simple_function_ord(self, node, pos_args):
         """Unpack ord('X').
         """
@@ -1640,59 +1640,59 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
                     constant_result=ord(arg.unicode_value)
                 )
         return node
- 
+
     # sequence processing
- 
-    def _handle_simple_function_all(self, node, pos_args): 
-        """Transform 
- 
+
+    def _handle_simple_function_all(self, node, pos_args):
+        """Transform
+
         _result = all(p(x) for L in LL for x in L)
- 
-        into 
- 
-        for L in LL: 
-            for x in L: 
+
+        into
+
+        for L in LL:
+            for x in L:
                 if not p(x):
                     return False
-        else: 
+        else:
             return True
-        """ 
-        return self._transform_any_all(node, pos_args, False) 
- 
-    def _handle_simple_function_any(self, node, pos_args): 
-        """Transform 
- 
+        """
+        return self._transform_any_all(node, pos_args, False)
+
+    def _handle_simple_function_any(self, node, pos_args):
+        """Transform
+
         _result = any(p(x) for L in LL for x in L)
- 
-        into 
- 
-        for L in LL: 
-            for x in L: 
+
+        into
+
+        for L in LL:
+            for x in L:
                 if p(x):
                     return True
-        else: 
+        else:
             return False
-        """ 
-        return self._transform_any_all(node, pos_args, True) 
- 
-    def _transform_any_all(self, node, pos_args, is_any): 
-        if len(pos_args) != 1: 
-            return node 
-        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode): 
-            return node 
-        gen_expr_node = pos_args[0] 
+        """
+        return self._transform_any_all(node, pos_args, True)
+
+    def _transform_any_all(self, node, pos_args, is_any):
+        if len(pos_args) != 1:
+            return node
+        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode):
+            return node
+        gen_expr_node = pos_args[0]
         generator_body = gen_expr_node.def_node.gbody
         loop_node = generator_body.body
         yield_expression, yield_stat_node = _find_single_yield_expression(loop_node)
-        if yield_expression is None: 
-            return node 
- 
-        if is_any: 
-            condition = yield_expression 
-        else: 
+        if yield_expression is None:
+            return node
+
+        if is_any:
+            condition = yield_expression
+        else:
             condition = ExprNodes.NotNode(yield_expression.pos, operand=yield_expression)
- 
-        test_node = Nodes.IfStatNode( 
+
+        test_node = Nodes.IfStatNode(
             yield_expression.pos, else_clause=None, if_clauses=[
                 Nodes.IfClauseNode(
                     yield_expression.pos,
@@ -1703,27 +1703,27 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
                 )]
         )
         loop_node.else_clause = Nodes.ReturnStatNode(
-            node.pos, 
+            node.pos,
             value=ExprNodes.BoolNode(yield_expression.pos, value=not is_any, constant_result=not is_any))
- 
+
         Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, test_node)
- 
-        return ExprNodes.InlinedGeneratorExpressionNode( 
+
+        return ExprNodes.InlinedGeneratorExpressionNode(
             gen_expr_node.pos, gen=gen_expr_node, orig_func='any' if is_any else 'all')
- 
+
     PySequence_List_func_type = PyrexTypes.CFuncType(
         Builtin.list_type,
         [PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)])
 
-    def _handle_simple_function_sorted(self, node, pos_args): 
-        """Transform sorted(genexpr) and sorted([listcomp]) into 
-        [listcomp].sort().  CPython just reads the iterable into a 
-        list and calls .sort() on it.  Expanding the iterable in a 
-        listcomp is still faster and the result can be sorted in 
-        place. 
-        """ 
-        if len(pos_args) != 1: 
-            return node 
+    def _handle_simple_function_sorted(self, node, pos_args):
+        """Transform sorted(genexpr) and sorted([listcomp]) into
+        [listcomp].sort().  CPython just reads the iterable into a
+        list and calls .sort() on it.  Expanding the iterable in a
+        listcomp is still faster and the result can be sorted in
+        place.
+        """
+        if len(pos_args) != 1:
+            return node
 
         arg = pos_args[0]
         if isinstance(arg, ExprNodes.ComprehensionNode) and arg.type is Builtin.list_type:
@@ -1732,11 +1732,11 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
 
         elif isinstance(arg, ExprNodes.GeneratorExpressionNode):
             gen_expr_node = arg
-            loop_node = gen_expr_node.loop 
+            loop_node = gen_expr_node.loop
             yield_statements = _find_yield_statements(loop_node)
             if not yield_statements:
-                return node 
- 
+                return node
+
             list_node = ExprNodes.InlinedGeneratorExpressionNode(
                 node.pos, gen_expr_node, orig_func='sorted',
                 comprehension_type=Builtin.list_type)
@@ -1747,187 +1747,187 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
                     expr=yield_expression,
                     target=list_node.target)
                 Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
- 
+
         elif arg.is_sequence_constructor:
             # sorted([a, b, c]) or sorted((a, b, c)).  The result is always a list,
             # so starting off with a fresh one is more efficient.
             list_node = loop_node = arg.as_list()
 
-        else: 
+        else:
             # Interestingly, PySequence_List works on a lot of non-sequence
             # things as well.
             list_node = loop_node = ExprNodes.PythonCapiCallNode(
                 node.pos, "PySequence_List", self.PySequence_List_func_type,
                 args=pos_args, is_temp=True)
- 
-        result_node = UtilNodes.ResultRefNode( 
+
+        result_node = UtilNodes.ResultRefNode(
             pos=loop_node.pos, type=Builtin.list_type, may_hold_none=False)
         list_assign_node = Nodes.SingleAssignmentNode(
             node.pos, lhs=result_node, rhs=list_node, first=True)
- 
-        sort_method = ExprNodes.AttributeNode( 
+
+        sort_method = ExprNodes.AttributeNode(
             node.pos, obj=result_node, attribute=EncodedString('sort'),
-            # entry ? type ? 
+            # entry ? type ?
             needs_none_check=False)
-        sort_node = Nodes.ExprStatNode( 
+        sort_node = Nodes.ExprStatNode(
             node.pos, expr=ExprNodes.SimpleCallNode(
                 node.pos, function=sort_method, args=[]))
- 
-        sort_node.analyse_declarations(self.current_env()) 
- 
-        return UtilNodes.TempResultFromStatNode( 
-            result_node, 
+
+        sort_node.analyse_declarations(self.current_env())
+
+        return UtilNodes.TempResultFromStatNode(
+            result_node,
             Nodes.StatListNode(node.pos, stats=[list_assign_node, sort_node]))
- 
+
     def __handle_simple_function_sum(self, node, pos_args):
-        """Transform sum(genexpr) into an equivalent inlined aggregation loop. 
-        """ 
-        if len(pos_args) not in (1,2): 
-            return node 
-        if not isinstance(pos_args[0], (ExprNodes.GeneratorExpressionNode, 
-                                        ExprNodes.ComprehensionNode)): 
-            return node 
-        gen_expr_node = pos_args[0] 
-        loop_node = gen_expr_node.loop 
- 
-        if isinstance(gen_expr_node, ExprNodes.GeneratorExpressionNode): 
+        """Transform sum(genexpr) into an equivalent inlined aggregation loop.
+        """
+        if len(pos_args) not in (1,2):
+            return node
+        if not isinstance(pos_args[0], (ExprNodes.GeneratorExpressionNode,
+                                        ExprNodes.ComprehensionNode)):
+            return node
+        gen_expr_node = pos_args[0]
+        loop_node = gen_expr_node.loop
+
+        if isinstance(gen_expr_node, ExprNodes.GeneratorExpressionNode):
             yield_expression, yield_stat_node = _find_single_yield_expression(loop_node)
             # FIXME: currently nonfunctional
             yield_expression = None
-            if yield_expression is None: 
-                return node 
+            if yield_expression is None:
+                return node
         else:  # ComprehensionNode
-            yield_stat_node = gen_expr_node.append 
-            yield_expression = yield_stat_node.expr 
-            try: 
-                if not yield_expression.is_literal or not yield_expression.type.is_int: 
-                    return node 
-            except AttributeError: 
-                return node # in case we don't have a type yet 
-            # special case: old Py2 backwards compatible "sum([int_const for ...])" 
-            # can safely be unpacked into a genexpr 
- 
-        if len(pos_args) == 1: 
-            start = ExprNodes.IntNode(node.pos, value='0', constant_result=0) 
-        else: 
-            start = pos_args[1] 
- 
-        result_ref = UtilNodes.ResultRefNode(pos=node.pos, type=PyrexTypes.py_object_type) 
-        add_node = Nodes.SingleAssignmentNode( 
-            yield_expression.pos, 
-            lhs = result_ref, 
-            rhs = ExprNodes.binop_node(node.pos, '+', result_ref, yield_expression) 
-            ) 
- 
+            yield_stat_node = gen_expr_node.append
+            yield_expression = yield_stat_node.expr
+            try:
+                if not yield_expression.is_literal or not yield_expression.type.is_int:
+                    return node
+            except AttributeError:
+                return node # in case we don't have a type yet
+            # special case: old Py2 backwards compatible "sum([int_const for ...])"
+            # can safely be unpacked into a genexpr
+
+        if len(pos_args) == 1:
+            start = ExprNodes.IntNode(node.pos, value='0', constant_result=0)
+        else:
+            start = pos_args[1]
+
+        result_ref = UtilNodes.ResultRefNode(pos=node.pos, type=PyrexTypes.py_object_type)
+        add_node = Nodes.SingleAssignmentNode(
+            yield_expression.pos,
+            lhs = result_ref,
+            rhs = ExprNodes.binop_node(node.pos, '+', result_ref, yield_expression)
+            )
+
         Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, add_node)
- 
-        exec_code = Nodes.StatListNode( 
-            node.pos, 
-            stats = [ 
-                Nodes.SingleAssignmentNode( 
-                    start.pos, 
-                    lhs = UtilNodes.ResultRefNode(pos=node.pos, expression=result_ref), 
-                    rhs = start, 
-                    first = True), 
-                loop_node 
-                ]) 
- 
-        return ExprNodes.InlinedGeneratorExpressionNode( 
-            gen_expr_node.pos, loop = exec_code, result_node = result_ref, 
-            expr_scope = gen_expr_node.expr_scope, orig_func = 'sum', 
-            has_local_scope = gen_expr_node.has_local_scope) 
- 
-    def _handle_simple_function_min(self, node, pos_args): 
-        return self._optimise_min_max(node, pos_args, '<') 
- 
-    def _handle_simple_function_max(self, node, pos_args): 
-        return self._optimise_min_max(node, pos_args, '>') 
- 
-    def _optimise_min_max(self, node, args, operator): 
-        """Replace min(a,b,...) and max(a,b,...) by explicit comparison code. 
-        """ 
-        if len(args) <= 1: 
-            if len(args) == 1 and args[0].is_sequence_constructor: 
-                args = args[0].args 
+
+        exec_code = Nodes.StatListNode(
+            node.pos,
+            stats = [
+                Nodes.SingleAssignmentNode(
+                    start.pos,
+                    lhs = UtilNodes.ResultRefNode(pos=node.pos, expression=result_ref),
+                    rhs = start,
+                    first = True),
+                loop_node
+                ])
+
+        return ExprNodes.InlinedGeneratorExpressionNode(
+            gen_expr_node.pos, loop = exec_code, result_node = result_ref,
+            expr_scope = gen_expr_node.expr_scope, orig_func = 'sum',
+            has_local_scope = gen_expr_node.has_local_scope)
+
+    def _handle_simple_function_min(self, node, pos_args):
+        return self._optimise_min_max(node, pos_args, '<')
+
+    def _handle_simple_function_max(self, node, pos_args):
+        return self._optimise_min_max(node, pos_args, '>')
+
+    def _optimise_min_max(self, node, args, operator):
+        """Replace min(a,b,...) and max(a,b,...) by explicit comparison code.
+        """
+        if len(args) <= 1:
+            if len(args) == 1 and args[0].is_sequence_constructor:
+                args = args[0].args
             if len(args) <= 1:
-                # leave this to Python 
-                return node 
- 
-        cascaded_nodes = list(map(UtilNodes.ResultRefNode, args[1:])) 
- 
-        last_result = args[0] 
-        for arg_node in cascaded_nodes: 
-            result_ref = UtilNodes.ResultRefNode(last_result) 
-            last_result = ExprNodes.CondExprNode( 
-                arg_node.pos, 
-                true_val = arg_node, 
-                false_val = result_ref, 
-                test = ExprNodes.PrimaryCmpNode( 
-                    arg_node.pos, 
-                    operand1 = arg_node, 
-                    operator = operator, 
-                    operand2 = result_ref, 
-                    ) 
-                ) 
-            last_result = UtilNodes.EvalWithTempExprNode(result_ref, last_result) 
- 
-        for ref_node in cascaded_nodes[::-1]: 
-            last_result = UtilNodes.EvalWithTempExprNode(ref_node, last_result) 
- 
-        return last_result 
- 
+                # leave this to Python
+                return node
+
+        cascaded_nodes = list(map(UtilNodes.ResultRefNode, args[1:]))
+
+        last_result = args[0]
+        for arg_node in cascaded_nodes:
+            result_ref = UtilNodes.ResultRefNode(last_result)
+            last_result = ExprNodes.CondExprNode(
+                arg_node.pos,
+                true_val = arg_node,
+                false_val = result_ref,
+                test = ExprNodes.PrimaryCmpNode(
+                    arg_node.pos,
+                    operand1 = arg_node,
+                    operator = operator,
+                    operand2 = result_ref,
+                    )
+                )
+            last_result = UtilNodes.EvalWithTempExprNode(result_ref, last_result)
+
+        for ref_node in cascaded_nodes[::-1]:
+            last_result = UtilNodes.EvalWithTempExprNode(ref_node, last_result)
+
+        return last_result
+
     # builtin type creation
 
-    def _DISABLED_handle_simple_function_tuple(self, node, pos_args): 
-        if not pos_args: 
-            return ExprNodes.TupleNode(node.pos, args=[], constant_result=()) 
-        # This is a bit special - for iterables (including genexps), 
-        # Python actually overallocates and resizes a newly created 
-        # tuple incrementally while reading items, which we can't 
-        # easily do without explicit node support. Instead, we read 
-        # the items into a list and then copy them into a tuple of the 
-        # final size.  This takes up to twice as much memory, but will 
-        # have to do until we have real support for genexps. 
-        result = self._transform_list_set_genexpr(node, pos_args, Builtin.list_type) 
-        if result is not node: 
-            return ExprNodes.AsTupleNode(node.pos, arg=result) 
-        return node 
- 
-    def _handle_simple_function_frozenset(self, node, pos_args): 
-        """Replace frozenset([...]) by frozenset((...)) as tuples are more efficient. 
-        """ 
-        if len(pos_args) != 1: 
-            return node 
-        if pos_args[0].is_sequence_constructor and not pos_args[0].args: 
-            del pos_args[0] 
-        elif isinstance(pos_args[0], ExprNodes.ListNode): 
-            pos_args[0] = pos_args[0].as_tuple() 
-        return node 
- 
-    def _handle_simple_function_list(self, node, pos_args): 
-        if not pos_args: 
-            return ExprNodes.ListNode(node.pos, args=[], constant_result=[]) 
-        return self._transform_list_set_genexpr(node, pos_args, Builtin.list_type) 
- 
-    def _handle_simple_function_set(self, node, pos_args): 
-        if not pos_args: 
-            return ExprNodes.SetNode(node.pos, args=[], constant_result=set()) 
-        return self._transform_list_set_genexpr(node, pos_args, Builtin.set_type) 
- 
-    def _transform_list_set_genexpr(self, node, pos_args, target_type): 
+    def _DISABLED_handle_simple_function_tuple(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.TupleNode(node.pos, args=[], constant_result=())
+        # This is a bit special - for iterables (including genexps),
+        # Python actually overallocates and resizes a newly created
+        # tuple incrementally while reading items, which we can't
+        # easily do without explicit node support. Instead, we read
+        # the items into a list and then copy them into a tuple of the
+        # final size.  This takes up to twice as much memory, but will
+        # have to do until we have real support for genexps.
+        result = self._transform_list_set_genexpr(node, pos_args, Builtin.list_type)
+        if result is not node:
+            return ExprNodes.AsTupleNode(node.pos, arg=result)
+        return node
+
+    def _handle_simple_function_frozenset(self, node, pos_args):
+        """Replace frozenset([...]) by frozenset((...)) as tuples are more efficient.
+        """
+        if len(pos_args) != 1:
+            return node
+        if pos_args[0].is_sequence_constructor and not pos_args[0].args:
+            del pos_args[0]
+        elif isinstance(pos_args[0], ExprNodes.ListNode):
+            pos_args[0] = pos_args[0].as_tuple()
+        return node
+
+    def _handle_simple_function_list(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.ListNode(node.pos, args=[], constant_result=[])
+        return self._transform_list_set_genexpr(node, pos_args, Builtin.list_type)
+
+    def _handle_simple_function_set(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.SetNode(node.pos, args=[], constant_result=set())
+        return self._transform_list_set_genexpr(node, pos_args, Builtin.set_type)
+
+    def _transform_list_set_genexpr(self, node, pos_args, target_type):
         """Replace set(genexpr) and list(genexpr) by an inlined comprehension.
-        """ 
-        if len(pos_args) > 1: 
-            return node 
-        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode): 
-            return node 
-        gen_expr_node = pos_args[0] 
-        loop_node = gen_expr_node.loop 
- 
+        """
+        if len(pos_args) > 1:
+            return node
+        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode):
+            return node
+        gen_expr_node = pos_args[0]
+        loop_node = gen_expr_node.loop
+
         yield_statements = _find_yield_statements(loop_node)
         if not yield_statements:
-            return node 
- 
+            return node
+
         result_node = ExprNodes.InlinedGeneratorExpressionNode(
             node.pos, gen_expr_node,
             orig_func='set' if target_type is Builtin.set_type else 'list',
@@ -1939,31 +1939,31 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
                 expr=yield_expression,
                 target=result_node.target)
             Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
- 
+
         return result_node
- 
-    def _handle_simple_function_dict(self, node, pos_args): 
+
+    def _handle_simple_function_dict(self, node, pos_args):
         """Replace dict( (a,b) for ... ) by an inlined { a:b for ... }
-        """ 
-        if len(pos_args) == 0: 
-            return ExprNodes.DictNode(node.pos, key_value_pairs=[], constant_result={}) 
-        if len(pos_args) > 1: 
-            return node 
-        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode): 
-            return node 
-        gen_expr_node = pos_args[0] 
-        loop_node = gen_expr_node.loop 
- 
+        """
+        if len(pos_args) == 0:
+            return ExprNodes.DictNode(node.pos, key_value_pairs=[], constant_result={})
+        if len(pos_args) > 1:
+            return node
+        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode):
+            return node
+        gen_expr_node = pos_args[0]
+        loop_node = gen_expr_node.loop
+
         yield_statements = _find_yield_statements(loop_node)
         if not yield_statements:
-            return node 
- 
+            return node
+
         for yield_expression, _ in yield_statements:
             if not isinstance(yield_expression, ExprNodes.TupleNode):
                 return node
             if len(yield_expression.args) != 2:
                 return node
- 
+
         result_node = ExprNodes.InlinedGeneratorExpressionNode(
             node.pos, gen_expr_node, orig_func='dict',
             comprehension_type=Builtin.dict_type)
@@ -1975,90 +1975,90 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
                 value_expr=yield_expression.args[1],
                 target=result_node.target)
             Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
- 
+
         return result_node
- 
-    # specific handlers for general call nodes 
- 
-    def _handle_general_function_dict(self, node, pos_args, kwargs): 
-        """Replace dict(a=b,c=d,...) by the underlying keyword dict 
-        construction which is done anyway. 
-        """ 
-        if len(pos_args) > 0: 
-            return node 
-        if not isinstance(kwargs, ExprNodes.DictNode): 
-            return node 
-        return kwargs 
- 
- 
-class InlineDefNodeCalls(Visitor.NodeRefCleanupMixin, Visitor.EnvTransform): 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
-    def get_constant_value_node(self, name_node): 
-        if name_node.cf_state is None: 
-            return None 
-        if name_node.cf_state.cf_is_null: 
-            return None 
-        entry = self.current_env().lookup(name_node.name) 
-        if not entry or (not entry.cf_assignments 
-                         or len(entry.cf_assignments) != 1): 
-            # not just a single assignment in all closures 
-            return None 
-        return entry.cf_assignments[0].rhs 
- 
-    def visit_SimpleCallNode(self, node): 
-        self.visitchildren(node) 
-        if not self.current_directives.get('optimize.inline_defnode_calls'): 
-            return node 
-        function_name = node.function 
-        if not function_name.is_name: 
-            return node 
-        function = self.get_constant_value_node(function_name) 
-        if not isinstance(function, ExprNodes.PyCFunctionNode): 
-            return node 
-        inlined = ExprNodes.InlinedDefNodeCallNode( 
-            node.pos, function_name=function_name, 
-            function=function, args=node.args) 
-        if inlined.can_be_inlined(): 
-            return self.replace(node, inlined) 
-        return node 
- 
- 
+
+    # specific handlers for general call nodes
+
+    def _handle_general_function_dict(self, node, pos_args, kwargs):
+        """Replace dict(a=b,c=d,...) by the underlying keyword dict
+        construction which is done anyway.
+        """
+        if len(pos_args) > 0:
+            return node
+        if not isinstance(kwargs, ExprNodes.DictNode):
+            return node
+        return kwargs
+
+
+class InlineDefNodeCalls(Visitor.NodeRefCleanupMixin, Visitor.EnvTransform):
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def get_constant_value_node(self, name_node):
+        if name_node.cf_state is None:
+            return None
+        if name_node.cf_state.cf_is_null:
+            return None
+        entry = self.current_env().lookup(name_node.name)
+        if not entry or (not entry.cf_assignments
+                         or len(entry.cf_assignments) != 1):
+            # not just a single assignment in all closures
+            return None
+        return entry.cf_assignments[0].rhs
+
+    def visit_SimpleCallNode(self, node):
+        self.visitchildren(node)
+        if not self.current_directives.get('optimize.inline_defnode_calls'):
+            return node
+        function_name = node.function
+        if not function_name.is_name:
+            return node
+        function = self.get_constant_value_node(function_name)
+        if not isinstance(function, ExprNodes.PyCFunctionNode):
+            return node
+        inlined = ExprNodes.InlinedDefNodeCallNode(
+            node.pos, function_name=function_name,
+            function=function, args=node.args)
+        if inlined.can_be_inlined():
+            return self.replace(node, inlined)
+        return node
+
+
 class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                            Visitor.MethodDispatcherTransform):
-    """Optimize some common methods calls and instantiation patterns 
-    for builtin types *after* the type analysis phase. 
- 
-    Running after type analysis, this transform can only perform 
-    function replacements that do not alter the function return type 
-    in a way that was not anticipated by the type analysis. 
-    """ 
-    ### cleanup to avoid redundant coercions to/from Python types 
- 
+    """Optimize some common methods calls and instantiation patterns
+    for builtin types *after* the type analysis phase.
+
+    Running after type analysis, this transform can only perform
+    function replacements that do not alter the function return type
+    in a way that was not anticipated by the type analysis.
+    """
+    ### cleanup to avoid redundant coercions to/from Python types
+
     def visit_PyTypeTestNode(self, node):
-        """Flatten redundant type checks after tree changes. 
-        """ 
-        self.visitchildren(node) 
+        """Flatten redundant type checks after tree changes.
+        """
+        self.visitchildren(node)
         return node.reanalyse()
- 
-    def _visit_TypecastNode(self, node): 
-        # disabled - the user may have had a reason to put a type 
-        # cast, even if it looks redundant to Cython 
-        """ 
-        Drop redundant type casts. 
-        """ 
-        self.visitchildren(node) 
-        if node.type == node.operand.type: 
-            return node.operand 
-        return node 
- 
-    def visit_ExprStatNode(self, node): 
-        """ 
+
+    def _visit_TypecastNode(self, node):
+        # disabled - the user may have had a reason to put a type
+        # cast, even if it looks redundant to Cython
+        """
+        Drop redundant type casts.
+        """
+        self.visitchildren(node)
+        if node.type == node.operand.type:
+            return node.operand
+        return node
+
+    def visit_ExprStatNode(self, node):
+        """
         Drop dead code and useless coercions.
-        """ 
-        self.visitchildren(node) 
-        if isinstance(node.expr, ExprNodes.CoerceToPyTypeNode): 
-            node.expr = node.expr.arg 
+        """
+        self.visitchildren(node)
+        if isinstance(node.expr, ExprNodes.CoerceToPyTypeNode):
+            node.expr = node.expr.arg
         expr = node.expr
         if expr is None or expr.is_none or expr.is_literal:
             # Expression was removed or is dead code => remove ExprStatNode as well.
@@ -2066,20 +2066,20 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         if expr.is_name and expr.entry and (expr.entry.is_local or expr.entry.is_arg):
             # Ignore dead references to local variables etc.
             return None
-        return node 
- 
-    def visit_CoerceToBooleanNode(self, node): 
-        """Drop redundant conversion nodes after tree changes. 
-        """ 
-        self.visitchildren(node) 
-        arg = node.arg 
-        if isinstance(arg, ExprNodes.PyTypeTestNode): 
-            arg = arg.arg 
-        if isinstance(arg, ExprNodes.CoerceToPyTypeNode): 
-            if arg.type in (PyrexTypes.py_object_type, Builtin.bool_type): 
-                return arg.arg.coerce_to_boolean(self.current_env()) 
-        return node 
- 
+        return node
+
+    def visit_CoerceToBooleanNode(self, node):
+        """Drop redundant conversion nodes after tree changes.
+        """
+        self.visitchildren(node)
+        arg = node.arg
+        if isinstance(arg, ExprNodes.PyTypeTestNode):
+            arg = arg.arg
+        if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            if arg.type in (PyrexTypes.py_object_type, Builtin.bool_type):
+                return arg.arg.coerce_to_boolean(self.current_env())
+        return node
+
     PyNumber_Float_func_type = PyrexTypes.CFuncType(
         PyrexTypes.py_object_type, [
             PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None)
@@ -2107,80 +2107,80 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                     ).coerce_to(node.type, self.current_env())
         return node
 
-    def visit_CoerceFromPyTypeNode(self, node): 
-        """Drop redundant conversion nodes after tree changes. 
- 
-        Also, optimise away calls to Python's builtin int() and 
-        float() if the result is going to be coerced back into a C 
-        type anyway. 
-        """ 
-        self.visitchildren(node) 
-        arg = node.arg 
-        if not arg.type.is_pyobject: 
-            # no Python conversion left at all, just do a C coercion instead 
+    def visit_CoerceFromPyTypeNode(self, node):
+        """Drop redundant conversion nodes after tree changes.
+
+        Also, optimise away calls to Python's builtin int() and
+        float() if the result is going to be coerced back into a C
+        type anyway.
+        """
+        self.visitchildren(node)
+        arg = node.arg
+        if not arg.type.is_pyobject:
+            # no Python conversion left at all, just do a C coercion instead
             if node.type != arg.type:
                 arg = arg.coerce_to(node.type, self.current_env())
             return arg
-        if isinstance(arg, ExprNodes.PyTypeTestNode): 
-            arg = arg.arg 
-        if arg.is_literal: 
-            if (node.type.is_int and isinstance(arg, ExprNodes.IntNode) or 
-                    node.type.is_float and isinstance(arg, ExprNodes.FloatNode) or 
-                    node.type.is_int and isinstance(arg, ExprNodes.BoolNode)): 
-                return arg.coerce_to(node.type, self.current_env()) 
-        elif isinstance(arg, ExprNodes.CoerceToPyTypeNode): 
-            if arg.type is PyrexTypes.py_object_type: 
-                if node.type.assignable_from(arg.arg.type): 
-                    # completely redundant C->Py->C coercion 
-                    return arg.arg.coerce_to(node.type, self.current_env()) 
+        if isinstance(arg, ExprNodes.PyTypeTestNode):
+            arg = arg.arg
+        if arg.is_literal:
+            if (node.type.is_int and isinstance(arg, ExprNodes.IntNode) or
+                    node.type.is_float and isinstance(arg, ExprNodes.FloatNode) or
+                    node.type.is_int and isinstance(arg, ExprNodes.BoolNode)):
+                return arg.coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            if arg.type is PyrexTypes.py_object_type:
+                if node.type.assignable_from(arg.arg.type):
+                    # completely redundant C->Py->C coercion
+                    return arg.arg.coerce_to(node.type, self.current_env())
             elif arg.type is Builtin.unicode_type:
                 if arg.arg.type.is_unicode_char and node.type.is_unicode_char:
                     return arg.arg.coerce_to(node.type, self.current_env())
-        elif isinstance(arg, ExprNodes.SimpleCallNode): 
-            if node.type.is_int or node.type.is_float: 
-                return self._optimise_numeric_cast_call(node, arg) 
+        elif isinstance(arg, ExprNodes.SimpleCallNode):
+            if node.type.is_int or node.type.is_float:
+                return self._optimise_numeric_cast_call(node, arg)
         elif arg.is_subscript:
-            index_node = arg.index 
-            if isinstance(index_node, ExprNodes.CoerceToPyTypeNode): 
-                index_node = index_node.arg 
-            if index_node.type.is_int: 
-                return self._optimise_int_indexing(node, arg, index_node) 
-        return node 
- 
-    PyBytes_GetItemInt_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_char_type, [ 
-            PyrexTypes.CFuncTypeArg("bytes", Builtin.bytes_type, None), 
-            PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("check_bounds", PyrexTypes.c_int_type, None), 
-            ], 
-        exception_value = "((char)-1)", 
-        exception_check = True) 
- 
-    def _optimise_int_indexing(self, coerce_node, arg, index_node): 
-        env = self.current_env() 
-        bound_check_bool = env.directives['boundscheck'] and 1 or 0 
-        if arg.base.type is Builtin.bytes_type: 
-            if coerce_node.type in (PyrexTypes.c_char_type, PyrexTypes.c_uchar_type): 
-                # bytes[index] -> char 
-                bound_check_node = ExprNodes.IntNode( 
-                    coerce_node.pos, value=str(bound_check_bool), 
-                    constant_result=bound_check_bool) 
-                node = ExprNodes.PythonCapiCallNode( 
-                    coerce_node.pos, "__Pyx_PyBytes_GetItemInt", 
-                    self.PyBytes_GetItemInt_func_type, 
-                    args=[ 
-                        arg.base.as_none_safe_node("'NoneType' object is not subscriptable"), 
-                        index_node.coerce_to(PyrexTypes.c_py_ssize_t_type, env), 
-                        bound_check_node, 
-                        ], 
-                    is_temp=True, 
-                    utility_code=UtilityCode.load_cached( 
-                        'bytes_index', 'StringTools.c')) 
-                if coerce_node.type is not PyrexTypes.c_char_type: 
-                    node = node.coerce_to(coerce_node.type, env) 
-                return node 
-        return coerce_node 
- 
+            index_node = arg.index
+            if isinstance(index_node, ExprNodes.CoerceToPyTypeNode):
+                index_node = index_node.arg
+            if index_node.type.is_int:
+                return self._optimise_int_indexing(node, arg, index_node)
+        return node
+
+    PyBytes_GetItemInt_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_char_type, [
+            PyrexTypes.CFuncTypeArg("bytes", Builtin.bytes_type, None),
+            PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("check_bounds", PyrexTypes.c_int_type, None),
+            ],
+        exception_value = "((char)-1)",
+        exception_check = True)
+
+    def _optimise_int_indexing(self, coerce_node, arg, index_node):
+        env = self.current_env()
+        bound_check_bool = env.directives['boundscheck'] and 1 or 0
+        if arg.base.type is Builtin.bytes_type:
+            if coerce_node.type in (PyrexTypes.c_char_type, PyrexTypes.c_uchar_type):
+                # bytes[index] -> char
+                bound_check_node = ExprNodes.IntNode(
+                    coerce_node.pos, value=str(bound_check_bool),
+                    constant_result=bound_check_bool)
+                node = ExprNodes.PythonCapiCallNode(
+                    coerce_node.pos, "__Pyx_PyBytes_GetItemInt",
+                    self.PyBytes_GetItemInt_func_type,
+                    args=[
+                        arg.base.as_none_safe_node("'NoneType' object is not subscriptable"),
+                        index_node.coerce_to(PyrexTypes.c_py_ssize_t_type, env),
+                        bound_check_node,
+                        ],
+                    is_temp=True,
+                    utility_code=UtilityCode.load_cached(
+                        'bytes_index', 'StringTools.c'))
+                if coerce_node.type is not PyrexTypes.c_char_type:
+                    node = node.coerce_to(coerce_node.type, env)
+                return node
+        return coerce_node
+
     float_float_func_types = dict(
         (float_type, PyrexTypes.CFuncType(
             float_type, [
@@ -2188,8 +2188,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             ]))
         for float_type in (PyrexTypes.c_float_type, PyrexTypes.c_double_type, PyrexTypes.c_longdouble_type))
 
-    def _optimise_numeric_cast_call(self, node, arg): 
-        function = arg.function 
+    def _optimise_numeric_cast_call(self, node, arg):
+        function = arg.function
         args = None
         if isinstance(arg, ExprNodes.PythonCapiCallNode):
             args = arg.args
@@ -2198,19 +2198,19 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                 args = arg.arg_tuple.args
 
         if args is None or len(args) != 1:
-            return node 
-        func_arg = args[0] 
-        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode): 
-            func_arg = func_arg.arg 
-        elif func_arg.type.is_pyobject: 
+            return node
+        func_arg = args[0]
+        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
+            func_arg = func_arg.arg
+        elif func_arg.type.is_pyobject:
             # play it safe: Python conversion might work on all sorts of things
-            return node 
+            return node
 
-        if function.name == 'int': 
-            if func_arg.type.is_int or node.type.is_int: 
-                if func_arg.type == node.type: 
-                    return func_arg 
-                elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float: 
+        if function.name == 'int':
+            if func_arg.type.is_int or node.type.is_int:
+                if func_arg.type == node.type:
+                    return func_arg
+                elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float:
                     return ExprNodes.TypecastNode(node.pos, operand=func_arg, type=node.type)
             elif func_arg.type.is_float and node.type.is_numeric:
                 if func_arg.type.math_h_modifier == 'l':
@@ -2226,80 +2226,80 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                     is_temp=node.is_temp,
                     result_is_used=node.result_is_used,
                 ).coerce_to(node.type, self.current_env())
-        elif function.name == 'float': 
-            if func_arg.type.is_float or node.type.is_float: 
-                if func_arg.type == node.type: 
-                    return func_arg 
-                elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float: 
-                    return ExprNodes.TypecastNode( 
-                        node.pos, operand=func_arg, type=node.type) 
-        return node 
- 
-    def _error_wrong_arg_count(self, function_name, node, args, expected=None): 
-        if not expected: # None or 0 
-            arg_str = '' 
-        elif isinstance(expected, basestring) or expected > 1: 
-            arg_str = '...' 
-        elif expected == 1: 
-            arg_str = 'x' 
-        else: 
-            arg_str = '' 
-        if expected is not None: 
-            expected_str = 'expected %s, ' % expected 
-        else: 
-            expected_str = '' 
-        error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % ( 
-            function_name, arg_str, expected_str, len(args))) 
- 
-    ### generic fallbacks 
- 
-    def _handle_function(self, node, function_name, function, arg_list, kwargs): 
-        return node 
- 
-    def _handle_method(self, node, type_name, attr_name, function, 
-                       arg_list, is_unbound_method, kwargs): 
-        """ 
-        Try to inject C-API calls for unbound method calls to builtin types. 
-        While the method declarations in Builtin.py already handle this, we 
-        can additionally resolve bound and unbound methods here that were 
-        assigned to variables ahead of time. 
-        """ 
-        if kwargs: 
-            return node 
-        if not function or not function.is_attribute or not function.obj.is_name: 
-            # cannot track unbound method calls over more than one indirection as 
-            # the names might have been reassigned in the meantime 
-            return node 
-        type_entry = self.current_env().lookup(type_name) 
-        if not type_entry: 
-            return node 
-        method = ExprNodes.AttributeNode( 
-            node.function.pos, 
-            obj=ExprNodes.NameNode( 
-                function.pos, 
-                name=type_name, 
-                entry=type_entry, 
-                type=type_entry.type), 
-            attribute=attr_name, 
+        elif function.name == 'float':
+            if func_arg.type.is_float or node.type.is_float:
+                if func_arg.type == node.type:
+                    return func_arg
+                elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float:
+                    return ExprNodes.TypecastNode(
+                        node.pos, operand=func_arg, type=node.type)
+        return node
+
+    def _error_wrong_arg_count(self, function_name, node, args, expected=None):
+        if not expected: # None or 0
+            arg_str = ''
+        elif isinstance(expected, basestring) or expected > 1:
+            arg_str = '...'
+        elif expected == 1:
+            arg_str = 'x'
+        else:
+            arg_str = ''
+        if expected is not None:
+            expected_str = 'expected %s, ' % expected
+        else:
+            expected_str = ''
+        error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % (
+            function_name, arg_str, expected_str, len(args)))
+
+    ### generic fallbacks
+
+    def _handle_function(self, node, function_name, function, arg_list, kwargs):
+        return node
+
+    def _handle_method(self, node, type_name, attr_name, function,
+                       arg_list, is_unbound_method, kwargs):
+        """
+        Try to inject C-API calls for unbound method calls to builtin types.
+        While the method declarations in Builtin.py already handle this, we
+        can additionally resolve bound and unbound methods here that were
+        assigned to variables ahead of time.
+        """
+        if kwargs:
+            return node
+        if not function or not function.is_attribute or not function.obj.is_name:
+            # cannot track unbound method calls over more than one indirection as
+            # the names might have been reassigned in the meantime
+            return node
+        type_entry = self.current_env().lookup(type_name)
+        if not type_entry:
+            return node
+        method = ExprNodes.AttributeNode(
+            node.function.pos,
+            obj=ExprNodes.NameNode(
+                function.pos,
+                name=type_name,
+                entry=type_entry,
+                type=type_entry.type),
+            attribute=attr_name,
             is_called=True).analyse_as_type_attribute(self.current_env())
-        if method is None: 
+        if method is None:
             return self._optimise_generic_builtin_method_call(
                 node, attr_name, function, arg_list, is_unbound_method)
-        args = node.args 
-        if args is None and node.arg_tuple: 
-            args = node.arg_tuple.args 
-        call_node = ExprNodes.SimpleCallNode( 
-            node.pos, 
-            function=method, 
-            args=args) 
-        if not is_unbound_method: 
-            call_node.self = function.obj 
-        call_node.analyse_c_function_call(self.current_env()) 
-        call_node.analysed = True 
-        return call_node.coerce_to(node.type, self.current_env()) 
- 
-    ### builtin types 
- 
+        args = node.args
+        if args is None and node.arg_tuple:
+            args = node.arg_tuple.args
+        call_node = ExprNodes.SimpleCallNode(
+            node.pos,
+            function=method,
+            args=args)
+        if not is_unbound_method:
+            call_node.self = function.obj
+        call_node.analyse_c_function_call(self.current_env())
+        call_node.analysed = True
+        return call_node.coerce_to(node.type, self.current_env())
+
+    ### builtin types
+
     def _optimise_generic_builtin_method_call(self, node, attr_name, function, arg_list, is_unbound_method):
         """
         Try to inject an unbound method call for a call to a method of a known builtin type.
@@ -2356,26 +2356,26 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                 return self._handle_simple_function_unicode(node, None, [node.value])
         return node
 
-    PyDict_Copy_func_type = PyrexTypes.CFuncType( 
-        Builtin.dict_type, [ 
-            PyrexTypes.CFuncTypeArg("dict", Builtin.dict_type, None) 
-            ]) 
- 
-    def _handle_simple_function_dict(self, node, function, pos_args): 
-        """Replace dict(some_dict) by PyDict_Copy(some_dict). 
-        """ 
-        if len(pos_args) != 1: 
-            return node 
-        arg = pos_args[0] 
-        if arg.type is Builtin.dict_type: 
-            arg = arg.as_none_safe_node("'NoneType' is not iterable") 
-            return ExprNodes.PythonCapiCallNode( 
-                node.pos, "PyDict_Copy", self.PyDict_Copy_func_type, 
-                args = [arg], 
-                is_temp = node.is_temp 
-                ) 
-        return node 
- 
+    PyDict_Copy_func_type = PyrexTypes.CFuncType(
+        Builtin.dict_type, [
+            PyrexTypes.CFuncTypeArg("dict", Builtin.dict_type, None)
+            ])
+
+    def _handle_simple_function_dict(self, node, function, pos_args):
+        """Replace dict(some_dict) by PyDict_Copy(some_dict).
+        """
+        if len(pos_args) != 1:
+            return node
+        arg = pos_args[0]
+        if arg.type is Builtin.dict_type:
+            arg = arg.as_none_safe_node("'NoneType' is not iterable")
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "PyDict_Copy", self.PyDict_Copy_func_type,
+                args = [arg],
+                is_temp = node.is_temp
+                )
+        return node
+
     PySequence_List_func_type = PyrexTypes.CFuncType(
         Builtin.list_type,
         [PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)])
@@ -2390,141 +2390,141 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             node.pos, "PySequence_List", self.PySequence_List_func_type,
             args=pos_args, is_temp=node.is_temp)
 
-    PyList_AsTuple_func_type = PyrexTypes.CFuncType( 
-        Builtin.tuple_type, [ 
-            PyrexTypes.CFuncTypeArg("list", Builtin.list_type, None) 
-            ]) 
- 
-    def _handle_simple_function_tuple(self, node, function, pos_args): 
+    PyList_AsTuple_func_type = PyrexTypes.CFuncType(
+        Builtin.tuple_type, [
+            PyrexTypes.CFuncTypeArg("list", Builtin.list_type, None)
+            ])
+
+    def _handle_simple_function_tuple(self, node, function, pos_args):
         """Replace tuple([...]) by PyList_AsTuple or PySequence_Tuple.
-        """ 
+        """
         if len(pos_args) != 1 or not node.is_temp:
-            return node 
-        arg = pos_args[0] 
-        if arg.type is Builtin.tuple_type and not arg.may_be_none(): 
-            return arg 
+            return node
+        arg = pos_args[0]
+        if arg.type is Builtin.tuple_type and not arg.may_be_none():
+            return arg
         if arg.type is Builtin.list_type:
             pos_args[0] = arg.as_none_safe_node(
                 "'NoneType' object is not iterable")
- 
+
             return ExprNodes.PythonCapiCallNode(
                 node.pos, "PyList_AsTuple", self.PyList_AsTuple_func_type,
                 args=pos_args, is_temp=node.is_temp)
         else:
             return ExprNodes.AsTupleNode(node.pos, arg=arg, type=Builtin.tuple_type)
- 
-    PySet_New_func_type = PyrexTypes.CFuncType( 
-        Builtin.set_type, [ 
-            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None) 
-        ]) 
- 
-    def _handle_simple_function_set(self, node, function, pos_args): 
-        if len(pos_args) != 1: 
-            return node 
-        if pos_args[0].is_sequence_constructor: 
-            # We can optimise set([x,y,z]) safely into a set literal, 
-            # but only if we create all items before adding them - 
-            # adding an item may raise an exception if it is not 
-            # hashable, but creating the later items may have 
-            # side-effects. 
-            args = [] 
-            temps = [] 
-            for arg in pos_args[0].args: 
-                if not arg.is_simple(): 
-                    arg = UtilNodes.LetRefNode(arg) 
-                    temps.append(arg) 
-                args.append(arg) 
-            result = ExprNodes.SetNode(node.pos, is_temp=1, args=args) 
+
+    PySet_New_func_type = PyrexTypes.CFuncType(
+        Builtin.set_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)
+        ])
+
+    def _handle_simple_function_set(self, node, function, pos_args):
+        if len(pos_args) != 1:
+            return node
+        if pos_args[0].is_sequence_constructor:
+            # We can optimise set([x,y,z]) safely into a set literal,
+            # but only if we create all items before adding them -
+            # adding an item may raise an exception if it is not
+            # hashable, but creating the later items may have
+            # side-effects.
+            args = []
+            temps = []
+            for arg in pos_args[0].args:
+                if not arg.is_simple():
+                    arg = UtilNodes.LetRefNode(arg)
+                    temps.append(arg)
+                args.append(arg)
+            result = ExprNodes.SetNode(node.pos, is_temp=1, args=args)
             self.replace(node, result)
-            for temp in temps[::-1]: 
-                result = UtilNodes.EvalWithTempExprNode(temp, result) 
-            return result 
-        else: 
-            # PySet_New(it) is better than a generic Python call to set(it) 
+            for temp in temps[::-1]:
+                result = UtilNodes.EvalWithTempExprNode(temp, result)
+            return result
+        else:
+            # PySet_New(it) is better than a generic Python call to set(it)
             return self.replace(node, ExprNodes.PythonCapiCallNode(
-                node.pos, "PySet_New", 
-                self.PySet_New_func_type, 
-                args=pos_args, 
-                is_temp=node.is_temp, 
+                node.pos, "PySet_New",
+                self.PySet_New_func_type,
+                args=pos_args,
+                is_temp=node.is_temp,
                 py_name="set"))
- 
-    PyFrozenSet_New_func_type = PyrexTypes.CFuncType( 
-        Builtin.frozenset_type, [ 
-            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None) 
-        ]) 
- 
-    def _handle_simple_function_frozenset(self, node, function, pos_args): 
-        if not pos_args: 
-            pos_args = [ExprNodes.NullNode(node.pos)] 
-        elif len(pos_args) > 1: 
-            return node 
-        elif pos_args[0].type is Builtin.frozenset_type and not pos_args[0].may_be_none(): 
-            return pos_args[0] 
-        # PyFrozenSet_New(it) is better than a generic Python call to frozenset(it) 
-        return ExprNodes.PythonCapiCallNode( 
-            node.pos, "__Pyx_PyFrozenSet_New", 
-            self.PyFrozenSet_New_func_type, 
-            args=pos_args, 
-            is_temp=node.is_temp, 
-            utility_code=UtilityCode.load_cached('pyfrozenset_new', 'Builtins.c'), 
-            py_name="frozenset") 
- 
-    PyObject_AsDouble_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_double_type, [ 
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None), 
-            ], 
-        exception_value = "((double)-1)", 
-        exception_check = True) 
- 
-    def _handle_simple_function_float(self, node, function, pos_args): 
-        """Transform float() into either a C type cast or a faster C 
-        function call. 
-        """ 
-        # Note: this requires the float() function to be typed as 
-        # returning a C 'double' 
-        if len(pos_args) == 0: 
-            return ExprNodes.FloatNode( 
-                node, value="0.0", constant_result=0.0 
-                ).coerce_to(Builtin.float_type, self.current_env()) 
-        elif len(pos_args) != 1: 
-            self._error_wrong_arg_count('float', node, pos_args, '0 or 1') 
-            return node 
-        func_arg = pos_args[0] 
-        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode): 
-            func_arg = func_arg.arg 
-        if func_arg.type is PyrexTypes.c_double_type: 
-            return func_arg 
-        elif node.type.assignable_from(func_arg.type) or func_arg.type.is_numeric: 
-            return ExprNodes.TypecastNode( 
-                node.pos, operand=func_arg, type=node.type) 
-        return ExprNodes.PythonCapiCallNode( 
-            node.pos, "__Pyx_PyObject_AsDouble", 
-            self.PyObject_AsDouble_func_type, 
-            args = pos_args, 
-            is_temp = node.is_temp, 
-            utility_code = load_c_utility('pyobject_as_double'), 
-            py_name = "float") 
- 
-    PyNumber_Int_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None) 
-            ]) 
- 
+
+    PyFrozenSet_New_func_type = PyrexTypes.CFuncType(
+        Builtin.frozenset_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)
+        ])
+
+    def _handle_simple_function_frozenset(self, node, function, pos_args):
+        if not pos_args:
+            pos_args = [ExprNodes.NullNode(node.pos)]
+        elif len(pos_args) > 1:
+            return node
+        elif pos_args[0].type is Builtin.frozenset_type and not pos_args[0].may_be_none():
+            return pos_args[0]
+        # PyFrozenSet_New(it) is better than a generic Python call to frozenset(it)
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_PyFrozenSet_New",
+            self.PyFrozenSet_New_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+            utility_code=UtilityCode.load_cached('pyfrozenset_new', 'Builtins.c'),
+            py_name="frozenset")
+
+    PyObject_AsDouble_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_double_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            ],
+        exception_value = "((double)-1)",
+        exception_check = True)
+
+    def _handle_simple_function_float(self, node, function, pos_args):
+        """Transform float() into either a C type cast or a faster C
+        function call.
+        """
+        # Note: this requires the float() function to be typed as
+        # returning a C 'double'
+        if len(pos_args) == 0:
+            return ExprNodes.FloatNode(
+                node, value="0.0", constant_result=0.0
+                ).coerce_to(Builtin.float_type, self.current_env())
+        elif len(pos_args) != 1:
+            self._error_wrong_arg_count('float', node, pos_args, '0 or 1')
+            return node
+        func_arg = pos_args[0]
+        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
+            func_arg = func_arg.arg
+        if func_arg.type is PyrexTypes.c_double_type:
+            return func_arg
+        elif node.type.assignable_from(func_arg.type) or func_arg.type.is_numeric:
+            return ExprNodes.TypecastNode(
+                node.pos, operand=func_arg, type=node.type)
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_PyObject_AsDouble",
+            self.PyObject_AsDouble_func_type,
+            args = pos_args,
+            is_temp = node.is_temp,
+            utility_code = load_c_utility('pyobject_as_double'),
+            py_name = "float")
+
+    PyNumber_Int_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None)
+            ])
+
     PyInt_FromDouble_func_type = PyrexTypes.CFuncType(
         PyrexTypes.py_object_type, [
             PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_double_type, None)
             ])
 
-    def _handle_simple_function_int(self, node, function, pos_args): 
-        """Transform int() into a faster C function call. 
-        """ 
-        if len(pos_args) == 0: 
-            return ExprNodes.IntNode(node.pos, value="0", constant_result=0, 
-                                     type=PyrexTypes.py_object_type) 
-        elif len(pos_args) != 1: 
-            return node  # int(x, base) 
-        func_arg = pos_args[0] 
-        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode): 
+    def _handle_simple_function_int(self, node, function, pos_args):
+        """Transform int() into a faster C function call.
+        """
+        if len(pos_args) == 0:
+            return ExprNodes.IntNode(node.pos, value="0", constant_result=0,
+                                     type=PyrexTypes.py_object_type)
+        elif len(pos_args) != 1:
+            return node  # int(x, base)
+        func_arg = pos_args[0]
+        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
             if func_arg.arg.type.is_float:
                 return ExprNodes.PythonCapiCallNode(
                     node.pos, "__Pyx_PyInt_FromDouble", self.PyInt_FromDouble_func_type,
@@ -2532,49 +2532,49 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                     utility_code=UtilityCode.load_cached("PyIntFromDouble", "TypeConversion.c"))
             else:
                 return node  # handled in visit_CoerceFromPyTypeNode()
-        if func_arg.type.is_pyobject and node.type.is_pyobject: 
-            return ExprNodes.PythonCapiCallNode( 
+        if func_arg.type.is_pyobject and node.type.is_pyobject:
+            return ExprNodes.PythonCapiCallNode(
                 node.pos, "__Pyx_PyNumber_Int", self.PyNumber_Int_func_type,
                 args=pos_args, is_temp=True, py_name='int')
-        return node 
- 
-    def _handle_simple_function_bool(self, node, function, pos_args): 
-        """Transform bool(x) into a type coercion to a boolean. 
-        """ 
-        if len(pos_args) == 0: 
-            return ExprNodes.BoolNode( 
-                node.pos, value=False, constant_result=False 
-                ).coerce_to(Builtin.bool_type, self.current_env()) 
-        elif len(pos_args) != 1: 
-            self._error_wrong_arg_count('bool', node, pos_args, '0 or 1') 
-            return node 
-        else: 
-            # => !!<bint>(x)  to make sure it's exactly 0 or 1 
-            operand = pos_args[0].coerce_to_boolean(self.current_env()) 
-            operand = ExprNodes.NotNode(node.pos, operand = operand) 
-            operand = ExprNodes.NotNode(node.pos, operand = operand) 
-            # coerce back to Python object as that's the result we are expecting 
-            return operand.coerce_to_pyobject(self.current_env()) 
- 
-    ### builtin functions 
- 
-    Pyx_strlen_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_size_t_type, [ 
+        return node
+
+    def _handle_simple_function_bool(self, node, function, pos_args):
+        """Transform bool(x) into a type coercion to a boolean.
+        """
+        if len(pos_args) == 0:
+            return ExprNodes.BoolNode(
+                node.pos, value=False, constant_result=False
+                ).coerce_to(Builtin.bool_type, self.current_env())
+        elif len(pos_args) != 1:
+            self._error_wrong_arg_count('bool', node, pos_args, '0 or 1')
+            return node
+        else:
+            # => !!<bint>(x)  to make sure it's exactly 0 or 1
+            operand = pos_args[0].coerce_to_boolean(self.current_env())
+            operand = ExprNodes.NotNode(node.pos, operand = operand)
+            operand = ExprNodes.NotNode(node.pos, operand = operand)
+            # coerce back to Python object as that's the result we are expecting
+            return operand.coerce_to_pyobject(self.current_env())
+
+    ### builtin functions
+
+    Pyx_strlen_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_size_t_type, [
             PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None)
         ])
- 
-    Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_size_t_type, [ 
+
+    Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_size_t_type, [
             PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_const_py_unicode_ptr_type, None)
         ])
- 
-    PyObject_Size_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_py_ssize_t_type, [ 
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None) 
+
+    PyObject_Size_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
         ],
-        exception_value="-1") 
- 
-    _map_to_capi_len_function = { 
+        exception_value="-1")
+
+    _map_to_capi_len_function = {
         Builtin.unicode_type:    "__Pyx_PyUnicode_GET_LENGTH",
         Builtin.bytes_type:      "PyBytes_GET_SIZE",
         Builtin.bytearray_type:  'PyByteArray_GET_SIZE',
@@ -2584,31 +2584,31 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         Builtin.frozenset_type:  "PySet_GET_SIZE",
         Builtin.dict_type:       "PyDict_Size",
     }.get
- 
-    _ext_types_with_pysize = set(["cpython.array.array"]) 
- 
-    def _handle_simple_function_len(self, node, function, pos_args): 
-        """Replace len(char*) by the equivalent call to strlen(), 
-        len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and 
-        len(known_builtin_type) by an equivalent C-API call. 
-        """ 
-        if len(pos_args) != 1: 
-            self._error_wrong_arg_count('len', node, pos_args, 1) 
-            return node 
-        arg = pos_args[0] 
-        if isinstance(arg, ExprNodes.CoerceToPyTypeNode): 
-            arg = arg.arg 
-        if arg.type.is_string: 
-            new_node = ExprNodes.PythonCapiCallNode( 
-                node.pos, "strlen", self.Pyx_strlen_func_type, 
-                args = [arg], 
-                is_temp = node.is_temp, 
-                utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c")) 
-        elif arg.type.is_pyunicode_ptr: 
-            new_node = ExprNodes.PythonCapiCallNode( 
-                node.pos, "__Pyx_Py_UNICODE_strlen", self.Pyx_Py_UNICODE_strlen_func_type, 
-                args = [arg], 
-                is_temp = node.is_temp) 
+
+    _ext_types_with_pysize = set(["cpython.array.array"])
+
+    def _handle_simple_function_len(self, node, function, pos_args):
+        """Replace len(char*) by the equivalent call to strlen(),
+        len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and
+        len(known_builtin_type) by an equivalent C-API call.
+        """
+        if len(pos_args) != 1:
+            self._error_wrong_arg_count('len', node, pos_args, 1)
+            return node
+        arg = pos_args[0]
+        if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            arg = arg.arg
+        if arg.type.is_string:
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, "strlen", self.Pyx_strlen_func_type,
+                args = [arg],
+                is_temp = node.is_temp,
+                utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
+        elif arg.type.is_pyunicode_ptr:
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_Py_UNICODE_strlen", self.Pyx_Py_UNICODE_strlen_func_type,
+                args = [arg],
+                is_temp = node.is_temp)
         elif arg.type.is_memoryviewslice:
             func_type = PyrexTypes.CFuncType(
                 PyrexTypes.c_size_t_type, [
@@ -2617,261 +2617,261 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             new_node = ExprNodes.PythonCapiCallNode(
                 node.pos, "__Pyx_MemoryView_Len", func_type,
                 args=[arg], is_temp=node.is_temp)
-        elif arg.type.is_pyobject: 
-            cfunc_name = self._map_to_capi_len_function(arg.type) 
-            if cfunc_name is None: 
-                arg_type = arg.type 
-                if ((arg_type.is_extension_type or arg_type.is_builtin_type) 
-                    and arg_type.entry.qualified_name in self._ext_types_with_pysize): 
-                    cfunc_name = 'Py_SIZE' 
-                else: 
-                    return node 
-            arg = arg.as_none_safe_node( 
-                "object of type 'NoneType' has no len()") 
-            new_node = ExprNodes.PythonCapiCallNode( 
-                node.pos, cfunc_name, self.PyObject_Size_func_type, 
+        elif arg.type.is_pyobject:
+            cfunc_name = self._map_to_capi_len_function(arg.type)
+            if cfunc_name is None:
+                arg_type = arg.type
+                if ((arg_type.is_extension_type or arg_type.is_builtin_type)
+                    and arg_type.entry.qualified_name in self._ext_types_with_pysize):
+                    cfunc_name = 'Py_SIZE'
+                else:
+                    return node
+            arg = arg.as_none_safe_node(
+                "object of type 'NoneType' has no len()")
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, cfunc_name, self.PyObject_Size_func_type,
                 args=[arg], is_temp=node.is_temp)
-        elif arg.type.is_unicode_char: 
-            return ExprNodes.IntNode(node.pos, value='1', constant_result=1, 
-                                     type=node.type) 
-        else: 
-            return node 
-        if node.type not in (PyrexTypes.c_size_t_type, PyrexTypes.c_py_ssize_t_type): 
-            new_node = new_node.coerce_to(node.type, self.current_env()) 
-        return new_node 
- 
-    Pyx_Type_func_type = PyrexTypes.CFuncType( 
-        Builtin.type_type, [ 
-            PyrexTypes.CFuncTypeArg("object", PyrexTypes.py_object_type, None) 
-            ]) 
- 
-    def _handle_simple_function_type(self, node, function, pos_args): 
-        """Replace type(o) by a macro call to Py_TYPE(o). 
-        """ 
-        if len(pos_args) != 1: 
-            return node 
-        node = ExprNodes.PythonCapiCallNode( 
-            node.pos, "Py_TYPE", self.Pyx_Type_func_type, 
-            args = pos_args, 
-            is_temp = False) 
-        return ExprNodes.CastNode(node, PyrexTypes.py_object_type) 
- 
-    Py_type_check_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_bint_type, [ 
-            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.py_object_type, None) 
-            ]) 
- 
-    def _handle_simple_function_isinstance(self, node, function, pos_args): 
-        """Replace isinstance() checks against builtin types by the 
-        corresponding C-API call. 
-        """ 
-        if len(pos_args) != 2: 
-            return node 
-        arg, types = pos_args 
+        elif arg.type.is_unicode_char:
+            return ExprNodes.IntNode(node.pos, value='1', constant_result=1,
+                                     type=node.type)
+        else:
+            return node
+        if node.type not in (PyrexTypes.c_size_t_type, PyrexTypes.c_py_ssize_t_type):
+            new_node = new_node.coerce_to(node.type, self.current_env())
+        return new_node
+
+    Pyx_Type_func_type = PyrexTypes.CFuncType(
+        Builtin.type_type, [
+            PyrexTypes.CFuncTypeArg("object", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_type(self, node, function, pos_args):
+        """Replace type(o) by a macro call to Py_TYPE(o).
+        """
+        if len(pos_args) != 1:
+            return node
+        node = ExprNodes.PythonCapiCallNode(
+            node.pos, "Py_TYPE", self.Pyx_Type_func_type,
+            args = pos_args,
+            is_temp = False)
+        return ExprNodes.CastNode(node, PyrexTypes.py_object_type)
+
+    Py_type_check_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_bint_type, [
+            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_isinstance(self, node, function, pos_args):
+        """Replace isinstance() checks against builtin types by the
+        corresponding C-API call.
+        """
+        if len(pos_args) != 2:
+            return node
+        arg, types = pos_args
         temps = []
-        if isinstance(types, ExprNodes.TupleNode): 
-            types = types.args 
+        if isinstance(types, ExprNodes.TupleNode):
+            types = types.args
             if len(types) == 1 and not types[0].type is Builtin.type_type:
                 return node  # nothing to improve here
-            if arg.is_attribute or not arg.is_simple(): 
+            if arg.is_attribute or not arg.is_simple():
                 arg = UtilNodes.ResultRefNode(arg)
                 temps.append(arg)
-        elif types.type is Builtin.type_type: 
-            types = [types] 
-        else: 
-            return node 
- 
-        tests = [] 
-        test_nodes = [] 
-        env = self.current_env() 
-        for test_type_node in types: 
-            builtin_type = None 
-            if test_type_node.is_name: 
-                if test_type_node.entry: 
-                    entry = env.lookup(test_type_node.entry.name) 
-                    if entry and entry.type and entry.type.is_builtin_type: 
-                        builtin_type = entry.type 
-            if builtin_type is Builtin.type_type: 
-                # all types have type "type", but there's only one 'type' 
-                if entry.name != 'type' or not ( 
-                        entry.scope and entry.scope.is_builtin_scope): 
-                    builtin_type = None 
-            if builtin_type is not None: 
-                type_check_function = entry.type.type_check_function(exact=False) 
-                if type_check_function in tests: 
-                    continue 
-                tests.append(type_check_function) 
-                type_check_args = [arg] 
-            elif test_type_node.type is Builtin.type_type: 
-                type_check_function = '__Pyx_TypeCheck' 
-                type_check_args = [arg, test_type_node] 
-            else: 
+        elif types.type is Builtin.type_type:
+            types = [types]
+        else:
+            return node
+
+        tests = []
+        test_nodes = []
+        env = self.current_env()
+        for test_type_node in types:
+            builtin_type = None
+            if test_type_node.is_name:
+                if test_type_node.entry:
+                    entry = env.lookup(test_type_node.entry.name)
+                    if entry and entry.type and entry.type.is_builtin_type:
+                        builtin_type = entry.type
+            if builtin_type is Builtin.type_type:
+                # all types have type "type", but there's only one 'type'
+                if entry.name != 'type' or not (
+                        entry.scope and entry.scope.is_builtin_scope):
+                    builtin_type = None
+            if builtin_type is not None:
+                type_check_function = entry.type.type_check_function(exact=False)
+                if type_check_function in tests:
+                    continue
+                tests.append(type_check_function)
+                type_check_args = [arg]
+            elif test_type_node.type is Builtin.type_type:
+                type_check_function = '__Pyx_TypeCheck'
+                type_check_args = [arg, test_type_node]
+            else:
                 if not test_type_node.is_literal:
                     test_type_node = UtilNodes.ResultRefNode(test_type_node)
                     temps.append(test_type_node)
                 type_check_function = 'PyObject_IsInstance'
                 type_check_args = [arg, test_type_node]
-            test_nodes.append( 
-                ExprNodes.PythonCapiCallNode( 
-                    test_type_node.pos, type_check_function, self.Py_type_check_func_type, 
+            test_nodes.append(
+                ExprNodes.PythonCapiCallNode(
+                    test_type_node.pos, type_check_function, self.Py_type_check_func_type,
                     args=type_check_args,
                     is_temp=True,
                 ))
- 
-        def join_with_or(a, b, make_binop_node=ExprNodes.binop_node): 
-            or_node = make_binop_node(node.pos, 'or', a, b) 
-            or_node.type = PyrexTypes.c_bint_type 
-            or_node.wrap_operands(env) 
-            return or_node 
- 
-        test_node = reduce(join_with_or, test_nodes).coerce_to(node.type, env) 
+
+        def join_with_or(a, b, make_binop_node=ExprNodes.binop_node):
+            or_node = make_binop_node(node.pos, 'or', a, b)
+            or_node.type = PyrexTypes.c_bint_type
+            or_node.wrap_operands(env)
+            return or_node
+
+        test_node = reduce(join_with_or, test_nodes).coerce_to(node.type, env)
         for temp in temps[::-1]:
-            test_node = UtilNodes.EvalWithTempExprNode(temp, test_node) 
-        return test_node 
- 
-    def _handle_simple_function_ord(self, node, function, pos_args): 
-        """Unpack ord(Py_UNICODE) and ord('X'). 
-        """ 
-        if len(pos_args) != 1: 
-            return node 
-        arg = pos_args[0] 
-        if isinstance(arg, ExprNodes.CoerceToPyTypeNode): 
-            if arg.arg.type.is_unicode_char: 
-                return ExprNodes.TypecastNode( 
+            test_node = UtilNodes.EvalWithTempExprNode(temp, test_node)
+        return test_node
+
+    def _handle_simple_function_ord(self, node, function, pos_args):
+        """Unpack ord(Py_UNICODE) and ord('X').
+        """
+        if len(pos_args) != 1:
+            return node
+        arg = pos_args[0]
+        if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            if arg.arg.type.is_unicode_char:
+                return ExprNodes.TypecastNode(
                     arg.pos, operand=arg.arg, type=PyrexTypes.c_long_type
-                    ).coerce_to(node.type, self.current_env()) 
-        elif isinstance(arg, ExprNodes.UnicodeNode): 
-            if len(arg.value) == 1: 
-                return ExprNodes.IntNode( 
-                    arg.pos, type=PyrexTypes.c_int_type, 
-                    value=str(ord(arg.value)), 
-                    constant_result=ord(arg.value) 
-                    ).coerce_to(node.type, self.current_env()) 
-        elif isinstance(arg, ExprNodes.StringNode): 
-            if arg.unicode_value and len(arg.unicode_value) == 1 \ 
-                    and ord(arg.unicode_value) <= 255:  # Py2/3 portability 
-                return ExprNodes.IntNode( 
-                    arg.pos, type=PyrexTypes.c_int_type, 
-                    value=str(ord(arg.unicode_value)), 
-                    constant_result=ord(arg.unicode_value) 
-                    ).coerce_to(node.type, self.current_env()) 
-        return node 
- 
-    ### special methods 
- 
-    Pyx_tp_new_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("type",   PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("args",   Builtin.tuple_type, None), 
-            ]) 
- 
-    Pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("type",   PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("args",   Builtin.tuple_type, None), 
-            PyrexTypes.CFuncTypeArg("kwargs", Builtin.dict_type, None), 
-        ]) 
- 
-    def _handle_any_slot__new__(self, node, function, args, 
-                                is_unbound_method, kwargs=None): 
-        """Replace 'exttype.__new__(exttype, ...)' by a call to exttype->tp_new() 
-        """ 
-        obj = function.obj 
-        if not is_unbound_method or len(args) < 1: 
-            return node 
-        type_arg = args[0] 
-        if not obj.is_name or not type_arg.is_name: 
-            # play safe 
-            return node 
-        if obj.type != Builtin.type_type or type_arg.type != Builtin.type_type: 
-            # not a known type, play safe 
-            return node 
-        if not type_arg.type_entry or not obj.type_entry: 
-            if obj.name != type_arg.name: 
-                return node 
-            # otherwise, we know it's a type and we know it's the same 
-            # type for both - that should do 
-        elif type_arg.type_entry != obj.type_entry: 
-            # different types - may or may not lead to an error at runtime 
-            return node 
- 
-        args_tuple = ExprNodes.TupleNode(node.pos, args=args[1:]) 
-        args_tuple = args_tuple.analyse_types( 
-            self.current_env(), skip_children=True) 
- 
-        if type_arg.type_entry: 
-            ext_type = type_arg.type_entry.type 
-            if (ext_type.is_extension_type and ext_type.typeobj_cname and 
-                    ext_type.scope.global_scope() == self.current_env().global_scope()): 
-                # known type in current module 
-                tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__') 
-                slot_func_cname = TypeSlots.get_slot_function(ext_type.scope, tp_slot) 
-                if slot_func_cname: 
-                    cython_scope = self.context.cython_scope 
-                    PyTypeObjectPtr = PyrexTypes.CPtrType( 
-                        cython_scope.lookup('PyTypeObject').type) 
-                    pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType( 
+                    ).coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.UnicodeNode):
+            if len(arg.value) == 1:
+                return ExprNodes.IntNode(
+                    arg.pos, type=PyrexTypes.c_int_type,
+                    value=str(ord(arg.value)),
+                    constant_result=ord(arg.value)
+                    ).coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.StringNode):
+            if arg.unicode_value and len(arg.unicode_value) == 1 \
+                    and ord(arg.unicode_value) <= 255:  # Py2/3 portability
+                return ExprNodes.IntNode(
+                    arg.pos, type=PyrexTypes.c_int_type,
+                    value=str(ord(arg.unicode_value)),
+                    constant_result=ord(arg.unicode_value)
+                    ).coerce_to(node.type, self.current_env())
+        return node
+
+    ### special methods
+
+    Pyx_tp_new_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("type",   PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("args",   Builtin.tuple_type, None),
+            ])
+
+    Pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("type",   PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("args",   Builtin.tuple_type, None),
+            PyrexTypes.CFuncTypeArg("kwargs", Builtin.dict_type, None),
+        ])
+
+    def _handle_any_slot__new__(self, node, function, args,
+                                is_unbound_method, kwargs=None):
+        """Replace 'exttype.__new__(exttype, ...)' by a call to exttype->tp_new()
+        """
+        obj = function.obj
+        if not is_unbound_method or len(args) < 1:
+            return node
+        type_arg = args[0]
+        if not obj.is_name or not type_arg.is_name:
+            # play safe
+            return node
+        if obj.type != Builtin.type_type or type_arg.type != Builtin.type_type:
+            # not a known type, play safe
+            return node
+        if not type_arg.type_entry or not obj.type_entry:
+            if obj.name != type_arg.name:
+                return node
+            # otherwise, we know it's a type and we know it's the same
+            # type for both - that should do
+        elif type_arg.type_entry != obj.type_entry:
+            # different types - may or may not lead to an error at runtime
+            return node
+
+        args_tuple = ExprNodes.TupleNode(node.pos, args=args[1:])
+        args_tuple = args_tuple.analyse_types(
+            self.current_env(), skip_children=True)
+
+        if type_arg.type_entry:
+            ext_type = type_arg.type_entry.type
+            if (ext_type.is_extension_type and ext_type.typeobj_cname and
+                    ext_type.scope.global_scope() == self.current_env().global_scope()):
+                # known type in current module
+                tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__')
+                slot_func_cname = TypeSlots.get_slot_function(ext_type.scope, tp_slot)
+                if slot_func_cname:
+                    cython_scope = self.context.cython_scope
+                    PyTypeObjectPtr = PyrexTypes.CPtrType(
+                        cython_scope.lookup('PyTypeObject').type)
+                    pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType(
                         ext_type, [
-                            PyrexTypes.CFuncTypeArg("type",   PyTypeObjectPtr, None), 
-                            PyrexTypes.CFuncTypeArg("args",   PyrexTypes.py_object_type, None), 
-                            PyrexTypes.CFuncTypeArg("kwargs", PyrexTypes.py_object_type, None), 
-                            ]) 
- 
-                    type_arg = ExprNodes.CastNode(type_arg, PyTypeObjectPtr) 
-                    if not kwargs: 
-                        kwargs = ExprNodes.NullNode(node.pos, type=PyrexTypes.py_object_type)  # hack? 
-                    return ExprNodes.PythonCapiCallNode( 
-                        node.pos, slot_func_cname, 
-                        pyx_tp_new_kwargs_func_type, 
-                        args=[type_arg, args_tuple, kwargs], 
+                            PyrexTypes.CFuncTypeArg("type",   PyTypeObjectPtr, None),
+                            PyrexTypes.CFuncTypeArg("args",   PyrexTypes.py_object_type, None),
+                            PyrexTypes.CFuncTypeArg("kwargs", PyrexTypes.py_object_type, None),
+                            ])
+
+                    type_arg = ExprNodes.CastNode(type_arg, PyTypeObjectPtr)
+                    if not kwargs:
+                        kwargs = ExprNodes.NullNode(node.pos, type=PyrexTypes.py_object_type)  # hack?
+                    return ExprNodes.PythonCapiCallNode(
+                        node.pos, slot_func_cname,
+                        pyx_tp_new_kwargs_func_type,
+                        args=[type_arg, args_tuple, kwargs],
                         may_return_none=False,
-                        is_temp=True) 
-        else: 
-            # arbitrary variable, needs a None check for safety 
-            type_arg = type_arg.as_none_safe_node( 
-                "object.__new__(X): X is not a type object (NoneType)") 
- 
-        utility_code = UtilityCode.load_cached('tp_new', 'ObjectHandling.c') 
-        if kwargs: 
-            return ExprNodes.PythonCapiCallNode( 
-                node.pos, "__Pyx_tp_new_kwargs", self.Pyx_tp_new_kwargs_func_type, 
-                args=[type_arg, args_tuple, kwargs], 
-                utility_code=utility_code, 
-                is_temp=node.is_temp 
-                ) 
-        else: 
-            return ExprNodes.PythonCapiCallNode( 
-                node.pos, "__Pyx_tp_new", self.Pyx_tp_new_func_type, 
-                args=[type_arg, args_tuple], 
-                utility_code=utility_code, 
-                is_temp=node.is_temp 
-            ) 
- 
-    ### methods of builtin types 
- 
-    PyObject_Append_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_returncode_type, [ 
-            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("item", PyrexTypes.py_object_type, None), 
-            ], 
-        exception_value="-1") 
- 
-    def _handle_simple_method_object_append(self, node, function, args, is_unbound_method): 
-        """Optimistic optimisation as X.append() is almost always 
-        referring to a list. 
-        """ 
-        if len(args) != 2 or node.result_is_used: 
-            return node 
- 
-        return ExprNodes.PythonCapiCallNode( 
-            node.pos, "__Pyx_PyObject_Append", self.PyObject_Append_func_type, 
-            args=args, 
-            may_return_none=False, 
-            is_temp=node.is_temp, 
-            result_is_used=False, 
-            utility_code=load_c_utility('append') 
-        ) 
- 
+                        is_temp=True)
+        else:
+            # arbitrary variable, needs a None check for safety
+            type_arg = type_arg.as_none_safe_node(
+                "object.__new__(X): X is not a type object (NoneType)")
+
+        utility_code = UtilityCode.load_cached('tp_new', 'ObjectHandling.c')
+        if kwargs:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_tp_new_kwargs", self.Pyx_tp_new_kwargs_func_type,
+                args=[type_arg, args_tuple, kwargs],
+                utility_code=utility_code,
+                is_temp=node.is_temp
+                )
+        else:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_tp_new", self.Pyx_tp_new_func_type,
+                args=[type_arg, args_tuple],
+                utility_code=utility_code,
+                is_temp=node.is_temp
+            )
+
+    ### methods of builtin types
+
+    PyObject_Append_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("item", PyrexTypes.py_object_type, None),
+            ],
+        exception_value="-1")
+
+    def _handle_simple_method_object_append(self, node, function, args, is_unbound_method):
+        """Optimistic optimisation as X.append() is almost always
+        referring to a list.
+        """
+        if len(args) != 2 or node.result_is_used:
+            return node
+
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_PyObject_Append", self.PyObject_Append_func_type,
+            args=args,
+            may_return_none=False,
+            is_temp=node.is_temp,
+            result_is_used=False,
+            utility_code=load_c_utility('append')
+        )
+
     def _handle_simple_method_list_extend(self, node, function, args, is_unbound_method):
         """Replace list.extend([...]) for short sequence literals values by sequential appends
         to avoid creating an intermediate sequence argument.
@@ -2935,211 +2935,211 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             new_node.result_is_used = node.result_is_used
         return new_node
 
-    PyByteArray_Append_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_returncode_type, [ 
-            PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_int_type, None), 
-            ], 
-        exception_value="-1") 
- 
-    PyByteArray_AppendObject_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_returncode_type, [ 
-            PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("value", PyrexTypes.py_object_type, None), 
-            ], 
-        exception_value="-1") 
- 
-    def _handle_simple_method_bytearray_append(self, node, function, args, is_unbound_method): 
-        if len(args) != 2: 
-            return node 
-        func_name = "__Pyx_PyByteArray_Append" 
-        func_type = self.PyByteArray_Append_func_type 
- 
-        value = unwrap_coerced_node(args[1]) 
-        if value.type.is_int or isinstance(value, ExprNodes.IntNode): 
-            value = value.coerce_to(PyrexTypes.c_int_type, self.current_env()) 
-            utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c") 
-        elif value.is_string_literal: 
-            if not value.can_coerce_to_char_literal(): 
-                return node 
-            value = value.coerce_to(PyrexTypes.c_char_type, self.current_env()) 
-            utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c") 
-        elif value.type.is_pyobject: 
-            func_name = "__Pyx_PyByteArray_AppendObject" 
-            func_type = self.PyByteArray_AppendObject_func_type 
-            utility_code = UtilityCode.load_cached("ByteArrayAppendObject", "StringTools.c") 
-        else: 
-            return node 
- 
-        new_node = ExprNodes.PythonCapiCallNode( 
-            node.pos, func_name, func_type, 
-            args=[args[0], value], 
-            may_return_none=False, 
-            is_temp=node.is_temp, 
-            utility_code=utility_code, 
-        ) 
-        if node.result_is_used: 
-            new_node = new_node.coerce_to(node.type, self.current_env()) 
-        return new_node 
- 
-    PyObject_Pop_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None), 
-            ]) 
- 
-    PyObject_PopIndex_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None), 
+    PyByteArray_Append_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_int_type, None),
+            ],
+        exception_value="-1")
+
+    PyByteArray_AppendObject_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.py_object_type, None),
+            ],
+        exception_value="-1")
+
+    def _handle_simple_method_bytearray_append(self, node, function, args, is_unbound_method):
+        if len(args) != 2:
+            return node
+        func_name = "__Pyx_PyByteArray_Append"
+        func_type = self.PyByteArray_Append_func_type
+
+        value = unwrap_coerced_node(args[1])
+        if value.type.is_int or isinstance(value, ExprNodes.IntNode):
+            value = value.coerce_to(PyrexTypes.c_int_type, self.current_env())
+            utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c")
+        elif value.is_string_literal:
+            if not value.can_coerce_to_char_literal():
+                return node
+            value = value.coerce_to(PyrexTypes.c_char_type, self.current_env())
+            utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c")
+        elif value.type.is_pyobject:
+            func_name = "__Pyx_PyByteArray_AppendObject"
+            func_type = self.PyByteArray_AppendObject_func_type
+            utility_code = UtilityCode.load_cached("ByteArrayAppendObject", "StringTools.c")
+        else:
+            return node
+
+        new_node = ExprNodes.PythonCapiCallNode(
+            node.pos, func_name, func_type,
+            args=[args[0], value],
+            may_return_none=False,
+            is_temp=node.is_temp,
+            utility_code=utility_code,
+        )
+        if node.result_is_used:
+            new_node = new_node.coerce_to(node.type, self.current_env())
+        return new_node
+
+    PyObject_Pop_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None),
+            ])
+
+    PyObject_PopIndex_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None),
             PyrexTypes.CFuncTypeArg("py_index", PyrexTypes.py_object_type, None),
             PyrexTypes.CFuncTypeArg("c_index", PyrexTypes.c_py_ssize_t_type, None),
-            PyrexTypes.CFuncTypeArg("is_signed", PyrexTypes.c_int_type, None), 
-        ], 
-        has_varargs=True)  # to fake the additional macro args that lack a proper C type 
- 
-    def _handle_simple_method_list_pop(self, node, function, args, is_unbound_method): 
-        return self._handle_simple_method_object_pop( 
-            node, function, args, is_unbound_method, is_list=True) 
- 
-    def _handle_simple_method_object_pop(self, node, function, args, is_unbound_method, is_list=False): 
-        """Optimistic optimisation as X.pop([n]) is almost always 
-        referring to a list. 
-        """ 
-        if not args: 
-            return node 
-        obj = args[0] 
-        if is_list: 
-            type_name = 'List' 
-            obj = obj.as_none_safe_node( 
+            PyrexTypes.CFuncTypeArg("is_signed", PyrexTypes.c_int_type, None),
+        ],
+        has_varargs=True)  # to fake the additional macro args that lack a proper C type
+
+    def _handle_simple_method_list_pop(self, node, function, args, is_unbound_method):
+        return self._handle_simple_method_object_pop(
+            node, function, args, is_unbound_method, is_list=True)
+
+    def _handle_simple_method_object_pop(self, node, function, args, is_unbound_method, is_list=False):
+        """Optimistic optimisation as X.pop([n]) is almost always
+        referring to a list.
+        """
+        if not args:
+            return node
+        obj = args[0]
+        if is_list:
+            type_name = 'List'
+            obj = obj.as_none_safe_node(
                 "'NoneType' object has no attribute '%.30s'",
-                error="PyExc_AttributeError", 
-                format_args=['pop']) 
-        else: 
-            type_name = 'Object' 
-        if len(args) == 1: 
-            return ExprNodes.PythonCapiCallNode( 
-                node.pos, "__Pyx_Py%s_Pop" % type_name, 
-                self.PyObject_Pop_func_type, 
-                args=[obj], 
-                may_return_none=True, 
-                is_temp=node.is_temp, 
-                utility_code=load_c_utility('pop'), 
-            ) 
-        elif len(args) == 2: 
-            index = unwrap_coerced_node(args[1]) 
+                error="PyExc_AttributeError",
+                format_args=['pop'])
+        else:
+            type_name = 'Object'
+        if len(args) == 1:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_Py%s_Pop" % type_name,
+                self.PyObject_Pop_func_type,
+                args=[obj],
+                may_return_none=True,
+                is_temp=node.is_temp,
+                utility_code=load_c_utility('pop'),
+            )
+        elif len(args) == 2:
+            index = unwrap_coerced_node(args[1])
             py_index = ExprNodes.NoneNode(index.pos)
-            orig_index_type = index.type 
-            if not index.type.is_int: 
+            orig_index_type = index.type
+            if not index.type.is_int:
                 if isinstance(index, ExprNodes.IntNode):
                     py_index = index.coerce_to_pyobject(self.current_env())
-                    index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) 
+                    index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
                 elif is_list:
                     if index.type.is_pyobject:
                         py_index = index.coerce_to_simple(self.current_env())
                         index = ExprNodes.CloneNode(py_index)
                     index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
-                else: 
-                    return node 
-            elif not PyrexTypes.numeric_type_fits(index.type, PyrexTypes.c_py_ssize_t_type): 
-                return node 
+                else:
+                    return node
+            elif not PyrexTypes.numeric_type_fits(index.type, PyrexTypes.c_py_ssize_t_type):
+                return node
             elif isinstance(index, ExprNodes.IntNode):
                 py_index = index.coerce_to_pyobject(self.current_env())
-            # real type might still be larger at runtime 
-            if not orig_index_type.is_int: 
-                orig_index_type = index.type 
-            if not orig_index_type.create_to_py_utility_code(self.current_env()): 
-                return node 
-            convert_func = orig_index_type.to_py_function 
-            conversion_type = PyrexTypes.CFuncType( 
-                PyrexTypes.py_object_type, [PyrexTypes.CFuncTypeArg("intval", orig_index_type, None)]) 
-            return ExprNodes.PythonCapiCallNode( 
-                node.pos, "__Pyx_Py%s_PopIndex" % type_name, 
-                self.PyObject_PopIndex_func_type, 
+            # real type might still be larger at runtime
+            if not orig_index_type.is_int:
+                orig_index_type = index.type
+            if not orig_index_type.create_to_py_utility_code(self.current_env()):
+                return node
+            convert_func = orig_index_type.to_py_function
+            conversion_type = PyrexTypes.CFuncType(
+                PyrexTypes.py_object_type, [PyrexTypes.CFuncTypeArg("intval", orig_index_type, None)])
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_Py%s_PopIndex" % type_name,
+                self.PyObject_PopIndex_func_type,
                 args=[obj, py_index, index,
-                      ExprNodes.IntNode(index.pos, value=str(orig_index_type.signed and 1 or 0), 
-                                        constant_result=orig_index_type.signed and 1 or 0, 
-                                        type=PyrexTypes.c_int_type), 
-                      ExprNodes.RawCNameExprNode(index.pos, PyrexTypes.c_void_type, 
+                      ExprNodes.IntNode(index.pos, value=str(orig_index_type.signed and 1 or 0),
+                                        constant_result=orig_index_type.signed and 1 or 0,
+                                        type=PyrexTypes.c_int_type),
+                      ExprNodes.RawCNameExprNode(index.pos, PyrexTypes.c_void_type,
                                                  orig_index_type.empty_declaration_code()),
-                      ExprNodes.RawCNameExprNode(index.pos, conversion_type, convert_func)], 
-                may_return_none=True, 
-                is_temp=node.is_temp, 
-                utility_code=load_c_utility("pop_index"), 
-            ) 
- 
-        return node 
- 
-    single_param_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_returncode_type, [ 
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None), 
-            ], 
-        exception_value = "-1") 
- 
-    def _handle_simple_method_list_sort(self, node, function, args, is_unbound_method): 
-        """Call PyList_Sort() instead of the 0-argument l.sort(). 
-        """ 
-        if len(args) != 1: 
-            return node 
-        return self._substitute_method_call( 
-            node, function, "PyList_Sort", self.single_param_func_type, 
-            'sort', is_unbound_method, args).coerce_to(node.type, self.current_env) 
- 
-    Pyx_PyDict_GetItem_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None), 
-            ]) 
- 
-    def _handle_simple_method_dict_get(self, node, function, args, is_unbound_method): 
-        """Replace dict.get() by a call to PyDict_GetItem(). 
-        """ 
-        if len(args) == 2: 
-            args.append(ExprNodes.NoneNode(node.pos)) 
-        elif len(args) != 3: 
-            self._error_wrong_arg_count('dict.get', node, args, "2 or 3") 
-            return node 
- 
-        return self._substitute_method_call( 
-            node, function, 
-            "__Pyx_PyDict_GetItemDefault", self.Pyx_PyDict_GetItem_func_type, 
-            'get', is_unbound_method, args, 
-            may_return_none = True, 
-            utility_code = load_c_utility("dict_getitem_default")) 
- 
-    Pyx_PyDict_SetDefault_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.py_object_type, [ 
-            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("is_safe_type", PyrexTypes.c_int_type, None), 
-            ]) 
- 
-    def _handle_simple_method_dict_setdefault(self, node, function, args, is_unbound_method): 
-        """Replace dict.setdefault() by calls to PyDict_GetItem() and PyDict_SetItem(). 
-        """ 
-        if len(args) == 2: 
-            args.append(ExprNodes.NoneNode(node.pos)) 
-        elif len(args) != 3: 
-            self._error_wrong_arg_count('dict.setdefault', node, args, "2 or 3") 
-            return node 
-        key_type = args[1].type 
-        if key_type.is_builtin_type: 
-            is_safe_type = int(key_type.name in 
-                               'str bytes unicode float int long bool') 
-        elif key_type is PyrexTypes.py_object_type: 
-            is_safe_type = -1  # don't know 
-        else: 
-            is_safe_type = 0   # definitely not 
-        args.append(ExprNodes.IntNode( 
-            node.pos, value=str(is_safe_type), constant_result=is_safe_type)) 
- 
-        return self._substitute_method_call( 
-            node, function, 
-            "__Pyx_PyDict_SetDefault", self.Pyx_PyDict_SetDefault_func_type, 
-            'setdefault', is_unbound_method, args, 
-            may_return_none=True, 
-            utility_code=load_c_utility('dict_setdefault')) 
- 
+                      ExprNodes.RawCNameExprNode(index.pos, conversion_type, convert_func)],
+                may_return_none=True,
+                is_temp=node.is_temp,
+                utility_code=load_c_utility("pop_index"),
+            )
+
+        return node
+
+    single_param_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            ],
+        exception_value = "-1")
+
+    def _handle_simple_method_list_sort(self, node, function, args, is_unbound_method):
+        """Call PyList_Sort() instead of the 0-argument l.sort().
+        """
+        if len(args) != 1:
+            return node
+        return self._substitute_method_call(
+            node, function, "PyList_Sort", self.single_param_func_type,
+            'sort', is_unbound_method, args).coerce_to(node.type, self.current_env)
+
+    Pyx_PyDict_GetItem_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None),
+            ])
+
+    def _handle_simple_method_dict_get(self, node, function, args, is_unbound_method):
+        """Replace dict.get() by a call to PyDict_GetItem().
+        """
+        if len(args) == 2:
+            args.append(ExprNodes.NoneNode(node.pos))
+        elif len(args) != 3:
+            self._error_wrong_arg_count('dict.get', node, args, "2 or 3")
+            return node
+
+        return self._substitute_method_call(
+            node, function,
+            "__Pyx_PyDict_GetItemDefault", self.Pyx_PyDict_GetItem_func_type,
+            'get', is_unbound_method, args,
+            may_return_none = True,
+            utility_code = load_c_utility("dict_getitem_default"))
+
+    Pyx_PyDict_SetDefault_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("is_safe_type", PyrexTypes.c_int_type, None),
+            ])
+
+    def _handle_simple_method_dict_setdefault(self, node, function, args, is_unbound_method):
+        """Replace dict.setdefault() by calls to PyDict_GetItem() and PyDict_SetItem().
+        """
+        if len(args) == 2:
+            args.append(ExprNodes.NoneNode(node.pos))
+        elif len(args) != 3:
+            self._error_wrong_arg_count('dict.setdefault', node, args, "2 or 3")
+            return node
+        key_type = args[1].type
+        if key_type.is_builtin_type:
+            is_safe_type = int(key_type.name in
+                               'str bytes unicode float int long bool')
+        elif key_type is PyrexTypes.py_object_type:
+            is_safe_type = -1  # don't know
+        else:
+            is_safe_type = 0   # definitely not
+        args.append(ExprNodes.IntNode(
+            node.pos, value=str(is_safe_type), constant_result=is_safe_type))
+
+        return self._substitute_method_call(
+            node, function,
+            "__Pyx_PyDict_SetDefault", self.Pyx_PyDict_SetDefault_func_type,
+            'setdefault', is_unbound_method, args,
+            may_return_none=True,
+            utility_code=load_c_utility('dict_setdefault'))
+
     PyDict_Pop_func_type = PyrexTypes.CFuncType(
         PyrexTypes.py_object_type, [
             PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None),
@@ -3175,7 +3175,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         for ctype in (PyrexTypes.c_long_type, PyrexTypes.c_double_type)
         for ret_type in (PyrexTypes.py_object_type, PyrexTypes.c_bint_type)
         )
- 
+
     def _handle_simple_method_object___add__(self, node, function, args, is_unbound_method):
         return self._optimise_num_binop('Add', node, function, args, is_unbound_method)
 
@@ -3341,121 +3341,121 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             call_node = ExprNodes.CoerceToPyTypeNode(call_node, self.current_env(), node.type)
         return call_node
 
-    ### unicode type methods 
- 
-    PyUnicode_uchar_predicate_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_bint_type, [ 
-            PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None), 
-            ]) 
- 
-    def _inject_unicode_predicate(self, node, function, args, is_unbound_method): 
-        if is_unbound_method or len(args) != 1: 
-            return node 
-        ustring = args[0] 
-        if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \ 
-               not ustring.arg.type.is_unicode_char: 
-            return node 
-        uchar = ustring.arg 
-        method_name = function.attribute 
-        if method_name == 'istitle': 
-            # istitle() doesn't directly map to Py_UNICODE_ISTITLE() 
-            utility_code = UtilityCode.load_cached( 
-                "py_unicode_istitle", "StringTools.c") 
-            function_name = '__Pyx_Py_UNICODE_ISTITLE' 
-        else: 
-            utility_code = None 
-            function_name = 'Py_UNICODE_%s' % method_name.upper() 
-        func_call = self._substitute_method_call( 
-            node, function, 
-            function_name, self.PyUnicode_uchar_predicate_func_type, 
-            method_name, is_unbound_method, [uchar], 
-            utility_code = utility_code) 
-        if node.type.is_pyobject: 
-            func_call = func_call.coerce_to_pyobject(self.current_env) 
-        return func_call 
- 
-    _handle_simple_method_unicode_isalnum   = _inject_unicode_predicate 
-    _handle_simple_method_unicode_isalpha   = _inject_unicode_predicate 
-    _handle_simple_method_unicode_isdecimal = _inject_unicode_predicate 
-    _handle_simple_method_unicode_isdigit   = _inject_unicode_predicate 
-    _handle_simple_method_unicode_islower   = _inject_unicode_predicate 
-    _handle_simple_method_unicode_isnumeric = _inject_unicode_predicate 
-    _handle_simple_method_unicode_isspace   = _inject_unicode_predicate 
-    _handle_simple_method_unicode_istitle   = _inject_unicode_predicate 
-    _handle_simple_method_unicode_isupper   = _inject_unicode_predicate 
- 
-    PyUnicode_uchar_conversion_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_py_ucs4_type, [ 
-            PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None), 
-            ]) 
- 
-    def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method): 
-        if is_unbound_method or len(args) != 1: 
-            return node 
-        ustring = args[0] 
-        if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \ 
-               not ustring.arg.type.is_unicode_char: 
-            return node 
-        uchar = ustring.arg 
-        method_name = function.attribute 
-        function_name = 'Py_UNICODE_TO%s' % method_name.upper() 
-        func_call = self._substitute_method_call( 
-            node, function, 
-            function_name, self.PyUnicode_uchar_conversion_func_type, 
-            method_name, is_unbound_method, [uchar]) 
-        if node.type.is_pyobject: 
-            func_call = func_call.coerce_to_pyobject(self.current_env) 
-        return func_call 
- 
-    _handle_simple_method_unicode_lower = _inject_unicode_character_conversion 
-    _handle_simple_method_unicode_upper = _inject_unicode_character_conversion 
-    _handle_simple_method_unicode_title = _inject_unicode_character_conversion 
- 
-    PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType( 
-        Builtin.list_type, [ 
-            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 
-            PyrexTypes.CFuncTypeArg("keepends", PyrexTypes.c_bint_type, None), 
-            ]) 
- 
-    def _handle_simple_method_unicode_splitlines(self, node, function, args, is_unbound_method): 
-        """Replace unicode.splitlines(...) by a direct call to the 
-        corresponding C-API function. 
-        """ 
-        if len(args) not in (1,2): 
-            self._error_wrong_arg_count('unicode.splitlines', node, args, "1 or 2") 
-            return node 
-        self._inject_bint_default_argument(node, args, 1, False) 
- 
-        return self._substitute_method_call( 
-            node, function, 
-            "PyUnicode_Splitlines", self.PyUnicode_Splitlines_func_type, 
-            'splitlines', is_unbound_method, args) 
- 
-    PyUnicode_Split_func_type = PyrexTypes.CFuncType( 
-        Builtin.list_type, [ 
-            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 
-            PyrexTypes.CFuncTypeArg("sep", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("maxsplit", PyrexTypes.c_py_ssize_t_type, None), 
-            ] 
-        ) 
- 
-    def _handle_simple_method_unicode_split(self, node, function, args, is_unbound_method): 
-        """Replace unicode.split(...) by a direct call to the 
-        corresponding C-API function. 
-        """ 
-        if len(args) not in (1,2,3): 
-            self._error_wrong_arg_count('unicode.split', node, args, "1-3") 
-            return node 
-        if len(args) < 2: 
-            args.append(ExprNodes.NullNode(node.pos)) 
-        self._inject_int_default_argument( 
-            node, args, 2, PyrexTypes.c_py_ssize_t_type, "-1") 
- 
-        return self._substitute_method_call( 
-            node, function, 
-            "PyUnicode_Split", self.PyUnicode_Split_func_type, 
-            'split', is_unbound_method, args) 
- 
+    ### unicode type methods
+
+    PyUnicode_uchar_predicate_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_bint_type, [
+            PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
+            ])
+
+    def _inject_unicode_predicate(self, node, function, args, is_unbound_method):
+        if is_unbound_method or len(args) != 1:
+            return node
+        ustring = args[0]
+        if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \
+               not ustring.arg.type.is_unicode_char:
+            return node
+        uchar = ustring.arg
+        method_name = function.attribute
+        if method_name == 'istitle':
+            # istitle() doesn't directly map to Py_UNICODE_ISTITLE()
+            utility_code = UtilityCode.load_cached(
+                "py_unicode_istitle", "StringTools.c")
+            function_name = '__Pyx_Py_UNICODE_ISTITLE'
+        else:
+            utility_code = None
+            function_name = 'Py_UNICODE_%s' % method_name.upper()
+        func_call = self._substitute_method_call(
+            node, function,
+            function_name, self.PyUnicode_uchar_predicate_func_type,
+            method_name, is_unbound_method, [uchar],
+            utility_code = utility_code)
+        if node.type.is_pyobject:
+            func_call = func_call.coerce_to_pyobject(self.current_env)
+        return func_call
+
+    _handle_simple_method_unicode_isalnum   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isalpha   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isdecimal = _inject_unicode_predicate
+    _handle_simple_method_unicode_isdigit   = _inject_unicode_predicate
+    _handle_simple_method_unicode_islower   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isnumeric = _inject_unicode_predicate
+    _handle_simple_method_unicode_isspace   = _inject_unicode_predicate
+    _handle_simple_method_unicode_istitle   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isupper   = _inject_unicode_predicate
+
+    PyUnicode_uchar_conversion_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ucs4_type, [
+            PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
+            ])
+
+    def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method):
+        if is_unbound_method or len(args) != 1:
+            return node
+        ustring = args[0]
+        if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \
+               not ustring.arg.type.is_unicode_char:
+            return node
+        uchar = ustring.arg
+        method_name = function.attribute
+        function_name = 'Py_UNICODE_TO%s' % method_name.upper()
+        func_call = self._substitute_method_call(
+            node, function,
+            function_name, self.PyUnicode_uchar_conversion_func_type,
+            method_name, is_unbound_method, [uchar])
+        if node.type.is_pyobject:
+            func_call = func_call.coerce_to_pyobject(self.current_env)
+        return func_call
+
+    _handle_simple_method_unicode_lower = _inject_unicode_character_conversion
+    _handle_simple_method_unicode_upper = _inject_unicode_character_conversion
+    _handle_simple_method_unicode_title = _inject_unicode_character_conversion
+
+    PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType(
+        Builtin.list_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("keepends", PyrexTypes.c_bint_type, None),
+            ])
+
+    def _handle_simple_method_unicode_splitlines(self, node, function, args, is_unbound_method):
+        """Replace unicode.splitlines(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (1,2):
+            self._error_wrong_arg_count('unicode.splitlines', node, args, "1 or 2")
+            return node
+        self._inject_bint_default_argument(node, args, 1, False)
+
+        return self._substitute_method_call(
+            node, function,
+            "PyUnicode_Splitlines", self.PyUnicode_Splitlines_func_type,
+            'splitlines', is_unbound_method, args)
+
+    PyUnicode_Split_func_type = PyrexTypes.CFuncType(
+        Builtin.list_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("sep", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("maxsplit", PyrexTypes.c_py_ssize_t_type, None),
+            ]
+        )
+
+    def _handle_simple_method_unicode_split(self, node, function, args, is_unbound_method):
+        """Replace unicode.split(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (1,2,3):
+            self._error_wrong_arg_count('unicode.split', node, args, "1-3")
+            return node
+        if len(args) < 2:
+            args.append(ExprNodes.NullNode(node.pos))
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "-1")
+
+        return self._substitute_method_call(
+            node, function,
+            "PyUnicode_Split", self.PyUnicode_Split_func_type,
+            'split', is_unbound_method, args)
+
     PyUnicode_Join_func_type = PyrexTypes.CFuncType(
         Builtin.unicode_type, [
             PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
@@ -3494,461 +3494,461 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             "PyUnicode_Join", self.PyUnicode_Join_func_type,
             'join', is_unbound_method, args)
 
-    PyString_Tailmatch_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_bint_type, [ 
-            PyrexTypes.CFuncTypeArg("str", PyrexTypes.py_object_type, None),  # bytes/str/unicode 
-            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None), 
-            ], 
-        exception_value = '-1') 
- 
-    def _handle_simple_method_unicode_endswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'unicode', 'endswith', 
-            unicode_tailmatch_utility_code, +1) 
- 
-    def _handle_simple_method_unicode_startswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'unicode', 'startswith', 
-            unicode_tailmatch_utility_code, -1) 
- 
-    def _inject_tailmatch(self, node, function, args, is_unbound_method, type_name, 
-                          method_name, utility_code, direction): 
-        """Replace unicode.startswith(...) and unicode.endswith(...) 
-        by a direct call to the corresponding C-API function. 
-        """ 
-        if len(args) not in (2,3,4): 
-            self._error_wrong_arg_count('%s.%s' % (type_name, method_name), node, args, "2-4") 
-            return node 
-        self._inject_int_default_argument( 
-            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0") 
-        self._inject_int_default_argument( 
-            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX") 
-        args.append(ExprNodes.IntNode( 
-            node.pos, value=str(direction), type=PyrexTypes.c_int_type)) 
- 
-        method_call = self._substitute_method_call( 
-            node, function, 
-            "__Pyx_Py%s_Tailmatch" % type_name.capitalize(), 
-            self.PyString_Tailmatch_func_type, 
-            method_name, is_unbound_method, args, 
-            utility_code = utility_code) 
-        return method_call.coerce_to(Builtin.bool_type, self.current_env()) 
- 
-    PyUnicode_Find_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_py_ssize_t_type, [ 
-            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 
-            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None), 
-            ], 
-        exception_value = '-2') 
- 
-    def _handle_simple_method_unicode_find(self, node, function, args, is_unbound_method): 
-        return self._inject_unicode_find( 
-            node, function, args, is_unbound_method, 'find', +1) 
- 
-    def _handle_simple_method_unicode_rfind(self, node, function, args, is_unbound_method): 
-        return self._inject_unicode_find( 
-            node, function, args, is_unbound_method, 'rfind', -1) 
- 
-    def _inject_unicode_find(self, node, function, args, is_unbound_method, 
-                             method_name, direction): 
-        """Replace unicode.find(...) and unicode.rfind(...) by a 
-        direct call to the corresponding C-API function. 
-        """ 
-        if len(args) not in (2,3,4): 
-            self._error_wrong_arg_count('unicode.%s' % method_name, node, args, "2-4") 
-            return node 
-        self._inject_int_default_argument( 
-            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0") 
-        self._inject_int_default_argument( 
-            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX") 
-        args.append(ExprNodes.IntNode( 
-            node.pos, value=str(direction), type=PyrexTypes.c_int_type)) 
- 
-        method_call = self._substitute_method_call( 
-            node, function, "PyUnicode_Find", self.PyUnicode_Find_func_type, 
-            method_name, is_unbound_method, args) 
-        return method_call.coerce_to_pyobject(self.current_env()) 
- 
-    PyUnicode_Count_func_type = PyrexTypes.CFuncType( 
-        PyrexTypes.c_py_ssize_t_type, [ 
-            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 
-            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None), 
-            ], 
-        exception_value = '-1') 
- 
-    def _handle_simple_method_unicode_count(self, node, function, args, is_unbound_method): 
-        """Replace unicode.count(...) by a direct call to the 
-        corresponding C-API function. 
-        """ 
-        if len(args) not in (2,3,4): 
-            self._error_wrong_arg_count('unicode.count', node, args, "2-4") 
-            return node 
-        self._inject_int_default_argument( 
-            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0") 
-        self._inject_int_default_argument( 
-            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX") 
- 
-        method_call = self._substitute_method_call( 
-            node, function, "PyUnicode_Count", self.PyUnicode_Count_func_type, 
-            'count', is_unbound_method, args) 
-        return method_call.coerce_to_pyobject(self.current_env()) 
- 
-    PyUnicode_Replace_func_type = PyrexTypes.CFuncType( 
-        Builtin.unicode_type, [ 
-            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), 
-            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("replstr", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("maxcount", PyrexTypes.c_py_ssize_t_type, None), 
-            ]) 
- 
-    def _handle_simple_method_unicode_replace(self, node, function, args, is_unbound_method): 
-        """Replace unicode.replace(...) by a direct call to the 
-        corresponding C-API function. 
-        """ 
-        if len(args) not in (3,4): 
-            self._error_wrong_arg_count('unicode.replace', node, args, "3-4") 
-            return node 
-        self._inject_int_default_argument( 
-            node, args, 3, PyrexTypes.c_py_ssize_t_type, "-1") 
- 
-        return self._substitute_method_call( 
-            node, function, "PyUnicode_Replace", self.PyUnicode_Replace_func_type, 
-            'replace', is_unbound_method, args) 
- 
-    PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType( 
-        Builtin.bytes_type, [ 
-            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), 
+    PyString_Tailmatch_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_bint_type, [
+            PyrexTypes.CFuncTypeArg("str", PyrexTypes.py_object_type, None),  # bytes/str/unicode
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None),
+            ],
+        exception_value = '-1')
+
+    def _handle_simple_method_unicode_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'unicode', 'endswith',
+            unicode_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_unicode_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'unicode', 'startswith',
+            unicode_tailmatch_utility_code, -1)
+
+    def _inject_tailmatch(self, node, function, args, is_unbound_method, type_name,
+                          method_name, utility_code, direction):
+        """Replace unicode.startswith(...) and unicode.endswith(...)
+        by a direct call to the corresponding C-API function.
+        """
+        if len(args) not in (2,3,4):
+            self._error_wrong_arg_count('%s.%s' % (type_name, method_name), node, args, "2-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0")
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX")
+        args.append(ExprNodes.IntNode(
+            node.pos, value=str(direction), type=PyrexTypes.c_int_type))
+
+        method_call = self._substitute_method_call(
+            node, function,
+            "__Pyx_Py%s_Tailmatch" % type_name.capitalize(),
+            self.PyString_Tailmatch_func_type,
+            method_name, is_unbound_method, args,
+            utility_code = utility_code)
+        return method_call.coerce_to(Builtin.bool_type, self.current_env())
+
+    PyUnicode_Find_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None),
+            ],
+        exception_value = '-2')
+
+    def _handle_simple_method_unicode_find(self, node, function, args, is_unbound_method):
+        return self._inject_unicode_find(
+            node, function, args, is_unbound_method, 'find', +1)
+
+    def _handle_simple_method_unicode_rfind(self, node, function, args, is_unbound_method):
+        return self._inject_unicode_find(
+            node, function, args, is_unbound_method, 'rfind', -1)
+
+    def _inject_unicode_find(self, node, function, args, is_unbound_method,
+                             method_name, direction):
+        """Replace unicode.find(...) and unicode.rfind(...) by a
+        direct call to the corresponding C-API function.
+        """
+        if len(args) not in (2,3,4):
+            self._error_wrong_arg_count('unicode.%s' % method_name, node, args, "2-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0")
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX")
+        args.append(ExprNodes.IntNode(
+            node.pos, value=str(direction), type=PyrexTypes.c_int_type))
+
+        method_call = self._substitute_method_call(
+            node, function, "PyUnicode_Find", self.PyUnicode_Find_func_type,
+            method_name, is_unbound_method, args)
+        return method_call.coerce_to_pyobject(self.current_env())
+
+    PyUnicode_Count_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
+            ],
+        exception_value = '-1')
+
+    def _handle_simple_method_unicode_count(self, node, function, args, is_unbound_method):
+        """Replace unicode.count(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (2,3,4):
+            self._error_wrong_arg_count('unicode.count', node, args, "2-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0")
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX")
+
+        method_call = self._substitute_method_call(
+            node, function, "PyUnicode_Count", self.PyUnicode_Count_func_type,
+            'count', is_unbound_method, args)
+        return method_call.coerce_to_pyobject(self.current_env())
+
+    PyUnicode_Replace_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("replstr", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("maxcount", PyrexTypes.c_py_ssize_t_type, None),
+            ])
+
+    def _handle_simple_method_unicode_replace(self, node, function, args, is_unbound_method):
+        """Replace unicode.replace(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (3,4):
+            self._error_wrong_arg_count('unicode.replace', node, args, "3-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "-1")
+
+        return self._substitute_method_call(
+            node, function, "PyUnicode_Replace", self.PyUnicode_Replace_func_type,
+            'replace', is_unbound_method, args)
+
+    PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
+        Builtin.bytes_type, [
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
             PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
             PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
-            ]) 
- 
-    PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType( 
-        Builtin.bytes_type, [ 
-            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), 
-            ]) 
- 
+            ])
+
+    PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
+        Builtin.bytes_type, [
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
+            ])
+
     _special_encodings = ['UTF8', 'UTF16', 'UTF-16LE', 'UTF-16BE', 'Latin1', 'ASCII',
-                          'unicode_escape', 'raw_unicode_escape'] 
- 
-    _special_codecs = [ (name, codecs.getencoder(name)) 
-                        for name in _special_encodings ] 
- 
-    def _handle_simple_method_unicode_encode(self, node, function, args, is_unbound_method): 
-        """Replace unicode.encode(...) by a direct C-API call to the 
-        corresponding codec. 
-        """ 
-        if len(args) < 1 or len(args) > 3: 
-            self._error_wrong_arg_count('unicode.encode', node, args, '1-3') 
-            return node 
- 
-        string_node = args[0] 
- 
-        if len(args) == 1: 
-            null_node = ExprNodes.NullNode(node.pos) 
-            return self._substitute_method_call( 
-                node, function, "PyUnicode_AsEncodedString", 
-                self.PyUnicode_AsEncodedString_func_type, 
-                'encode', is_unbound_method, [string_node, null_node, null_node]) 
- 
-        parameters = self._unpack_encoding_and_error_mode(node.pos, args) 
-        if parameters is None: 
-            return node 
-        encoding, encoding_node, error_handling, error_handling_node = parameters 
- 
-        if encoding and isinstance(string_node, ExprNodes.UnicodeNode): 
-            # constant, so try to do the encoding at compile time 
-            try: 
-                value = string_node.value.encode(encoding, error_handling) 
-            except: 
-                # well, looks like we can't 
-                pass 
-            else: 
+                          'unicode_escape', 'raw_unicode_escape']
+
+    _special_codecs = [ (name, codecs.getencoder(name))
+                        for name in _special_encodings ]
+
+    def _handle_simple_method_unicode_encode(self, node, function, args, is_unbound_method):
+        """Replace unicode.encode(...) by a direct C-API call to the
+        corresponding codec.
+        """
+        if len(args) < 1 or len(args) > 3:
+            self._error_wrong_arg_count('unicode.encode', node, args, '1-3')
+            return node
+
+        string_node = args[0]
+
+        if len(args) == 1:
+            null_node = ExprNodes.NullNode(node.pos)
+            return self._substitute_method_call(
+                node, function, "PyUnicode_AsEncodedString",
+                self.PyUnicode_AsEncodedString_func_type,
+                'encode', is_unbound_method, [string_node, null_node, null_node])
+
+        parameters = self._unpack_encoding_and_error_mode(node.pos, args)
+        if parameters is None:
+            return node
+        encoding, encoding_node, error_handling, error_handling_node = parameters
+
+        if encoding and isinstance(string_node, ExprNodes.UnicodeNode):
+            # constant, so try to do the encoding at compile time
+            try:
+                value = string_node.value.encode(encoding, error_handling)
+            except:
+                # well, looks like we can't
+                pass
+            else:
                 value = bytes_literal(value, encoding)
                 return ExprNodes.BytesNode(string_node.pos, value=value, type=Builtin.bytes_type)
- 
-        if encoding and error_handling == 'strict': 
-            # try to find a specific encoder function 
-            codec_name = self._find_special_codec_name(encoding) 
+
+        if encoding and error_handling == 'strict':
+            # try to find a specific encoder function
+            codec_name = self._find_special_codec_name(encoding)
             if codec_name is not None and '-' not in codec_name:
-                encode_function = "PyUnicode_As%sString" % codec_name 
-                return self._substitute_method_call( 
-                    node, function, encode_function, 
-                    self.PyUnicode_AsXyzString_func_type, 
-                    'encode', is_unbound_method, [string_node]) 
- 
-        return self._substitute_method_call( 
-            node, function, "PyUnicode_AsEncodedString", 
-            self.PyUnicode_AsEncodedString_func_type, 
-            'encode', is_unbound_method, 
-            [string_node, encoding_node, error_handling_node]) 
- 
-    PyUnicode_DecodeXyz_func_ptr_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType( 
-        Builtin.unicode_type, [ 
+                encode_function = "PyUnicode_As%sString" % codec_name
+                return self._substitute_method_call(
+                    node, function, encode_function,
+                    self.PyUnicode_AsXyzString_func_type,
+                    'encode', is_unbound_method, [string_node])
+
+        return self._substitute_method_call(
+            node, function, "PyUnicode_AsEncodedString",
+            self.PyUnicode_AsEncodedString_func_type,
+            'encode', is_unbound_method,
+            [string_node, encoding_node, error_handling_node])
+
+    PyUnicode_DecodeXyz_func_ptr_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
             PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
-            PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None), 
+            PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None),
             PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
         ]))
- 
-    _decode_c_string_func_type = PyrexTypes.CFuncType( 
-        Builtin.unicode_type, [ 
+
+    _decode_c_string_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
             PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
-            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None), 
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
             PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
             PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
-            PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None), 
+            PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
         ])
- 
-    _decode_bytes_func_type = PyrexTypes.CFuncType( 
-        Builtin.unicode_type, [ 
-            PyrexTypes.CFuncTypeArg("string", PyrexTypes.py_object_type, None), 
-            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), 
-            PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None), 
+
+    _decode_bytes_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("string", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
             PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
             PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
-            PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None), 
+            PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
         ])
- 
-    _decode_cpp_string_func_type = None  # lazy init 
- 
-    def _handle_simple_method_bytes_decode(self, node, function, args, is_unbound_method): 
-        """Replace char*.decode() by a direct C-API call to the 
-        corresponding codec, possibly resolving a slice on the char*. 
-        """ 
-        if not (1 <= len(args) <= 3): 
-            self._error_wrong_arg_count('bytes.decode', node, args, '1-3') 
-            return node 
- 
-        # normalise input nodes 
-        string_node = args[0] 
-        start = stop = None 
-        if isinstance(string_node, ExprNodes.SliceIndexNode): 
-            index_node = string_node 
-            string_node = index_node.base 
-            start, stop = index_node.start, index_node.stop 
-            if not start or start.constant_result == 0: 
-                start = None 
-        if isinstance(string_node, ExprNodes.CoerceToPyTypeNode): 
-            string_node = string_node.arg 
- 
-        string_type = string_node.type 
-        if string_type in (Builtin.bytes_type, Builtin.bytearray_type): 
-            if is_unbound_method: 
-                string_node = string_node.as_none_safe_node( 
-                    "descriptor '%s' requires a '%s' object but received a 'NoneType'", 
-                    format_args=['decode', string_type.name]) 
-            else: 
-                string_node = string_node.as_none_safe_node( 
+
+    _decode_cpp_string_func_type = None  # lazy init
+
+    def _handle_simple_method_bytes_decode(self, node, function, args, is_unbound_method):
+        """Replace char*.decode() by a direct C-API call to the
+        corresponding codec, possibly resolving a slice on the char*.
+        """
+        if not (1 <= len(args) <= 3):
+            self._error_wrong_arg_count('bytes.decode', node, args, '1-3')
+            return node
+
+        # normalise input nodes
+        string_node = args[0]
+        start = stop = None
+        if isinstance(string_node, ExprNodes.SliceIndexNode):
+            index_node = string_node
+            string_node = index_node.base
+            start, stop = index_node.start, index_node.stop
+            if not start or start.constant_result == 0:
+                start = None
+        if isinstance(string_node, ExprNodes.CoerceToPyTypeNode):
+            string_node = string_node.arg
+
+        string_type = string_node.type
+        if string_type in (Builtin.bytes_type, Builtin.bytearray_type):
+            if is_unbound_method:
+                string_node = string_node.as_none_safe_node(
+                    "descriptor '%s' requires a '%s' object but received a 'NoneType'",
+                    format_args=['decode', string_type.name])
+            else:
+                string_node = string_node.as_none_safe_node(
                     "'NoneType' object has no attribute '%.30s'",
-                    error="PyExc_AttributeError", 
-                    format_args=['decode']) 
-        elif not string_type.is_string and not string_type.is_cpp_string: 
-            # nothing to optimise here 
-            return node 
- 
-        parameters = self._unpack_encoding_and_error_mode(node.pos, args) 
-        if parameters is None: 
-            return node 
-        encoding, encoding_node, error_handling, error_handling_node = parameters 
- 
-        if not start: 
-            start = ExprNodes.IntNode(node.pos, value='0', constant_result=0) 
-        elif not start.type.is_int: 
-            start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) 
-        if stop and not stop.type.is_int: 
-            stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) 
- 
-        # try to find a specific encoder function 
-        codec_name = None 
-        if encoding is not None: 
-            codec_name = self._find_special_codec_name(encoding) 
-        if codec_name is not None: 
+                    error="PyExc_AttributeError",
+                    format_args=['decode'])
+        elif not string_type.is_string and not string_type.is_cpp_string:
+            # nothing to optimise here
+            return node
+
+        parameters = self._unpack_encoding_and_error_mode(node.pos, args)
+        if parameters is None:
+            return node
+        encoding, encoding_node, error_handling, error_handling_node = parameters
+
+        if not start:
+            start = ExprNodes.IntNode(node.pos, value='0', constant_result=0)
+        elif not start.type.is_int:
+            start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+        if stop and not stop.type.is_int:
+            stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+
+        # try to find a specific encoder function
+        codec_name = None
+        if encoding is not None:
+            codec_name = self._find_special_codec_name(encoding)
+        if codec_name is not None:
             if codec_name in ('UTF16', 'UTF-16LE', 'UTF-16BE'):
                 codec_cname = "__Pyx_PyUnicode_Decode%s" % codec_name.replace('-', '')
             else:
                 codec_cname = "PyUnicode_Decode%s" % codec_name
-            decode_function = ExprNodes.RawCNameExprNode( 
+            decode_function = ExprNodes.RawCNameExprNode(
                 node.pos, type=self.PyUnicode_DecodeXyz_func_ptr_type, cname=codec_cname)
-            encoding_node = ExprNodes.NullNode(node.pos) 
-        else: 
-            decode_function = ExprNodes.NullNode(node.pos) 
- 
-        # build the helper function call 
-        temps = [] 
-        if string_type.is_string: 
-            # C string 
-            if not stop: 
-                # use strlen() to find the string length, just as CPython would 
-                if not string_node.is_name: 
-                    string_node = UtilNodes.LetRefNode(string_node) # used twice 
-                    temps.append(string_node) 
-                stop = ExprNodes.PythonCapiCallNode( 
-                    string_node.pos, "strlen", self.Pyx_strlen_func_type, 
-                    args=[string_node], 
-                    is_temp=False, 
-                    utility_code=UtilityCode.load_cached("IncludeStringH", "StringTools.c"), 
-                ).coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) 
-            helper_func_type = self._decode_c_string_func_type 
-            utility_code_name = 'decode_c_string' 
-        elif string_type.is_cpp_string: 
-            # C++ std::string 
-            if not stop: 
-                stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX', 
-                                         constant_result=ExprNodes.not_a_constant) 
-            if self._decode_cpp_string_func_type is None: 
-                # lazy init to reuse the C++ string type 
-                self._decode_cpp_string_func_type = PyrexTypes.CFuncType( 
-                    Builtin.unicode_type, [ 
-                        PyrexTypes.CFuncTypeArg("string", string_type, None), 
-                        PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), 
-                        PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None), 
+            encoding_node = ExprNodes.NullNode(node.pos)
+        else:
+            decode_function = ExprNodes.NullNode(node.pos)
+
+        # build the helper function call
+        temps = []
+        if string_type.is_string:
+            # C string
+            if not stop:
+                # use strlen() to find the string length, just as CPython would
+                if not string_node.is_name:
+                    string_node = UtilNodes.LetRefNode(string_node) # used twice
+                    temps.append(string_node)
+                stop = ExprNodes.PythonCapiCallNode(
+                    string_node.pos, "strlen", self.Pyx_strlen_func_type,
+                    args=[string_node],
+                    is_temp=False,
+                    utility_code=UtilityCode.load_cached("IncludeStringH", "StringTools.c"),
+                ).coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+            helper_func_type = self._decode_c_string_func_type
+            utility_code_name = 'decode_c_string'
+        elif string_type.is_cpp_string:
+            # C++ std::string
+            if not stop:
+                stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX',
+                                         constant_result=ExprNodes.not_a_constant)
+            if self._decode_cpp_string_func_type is None:
+                # lazy init to reuse the C++ string type
+                self._decode_cpp_string_func_type = PyrexTypes.CFuncType(
+                    Builtin.unicode_type, [
+                        PyrexTypes.CFuncTypeArg("string", string_type, None),
+                        PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+                        PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
                         PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
                         PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
-                        PyrexTypes.CFuncTypeArg("decode_func", self.PyUnicode_DecodeXyz_func_ptr_type, None), 
-                    ]) 
-            helper_func_type = self._decode_cpp_string_func_type 
-            utility_code_name = 'decode_cpp_string' 
-        else: 
-            # Python bytes/bytearray object 
-            if not stop: 
-                stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX', 
-                                         constant_result=ExprNodes.not_a_constant) 
-            helper_func_type = self._decode_bytes_func_type 
-            if string_type is Builtin.bytes_type: 
-                utility_code_name = 'decode_bytes' 
-            else: 
-                utility_code_name = 'decode_bytearray' 
- 
-        node = ExprNodes.PythonCapiCallNode( 
-            node.pos, '__Pyx_%s' % utility_code_name, helper_func_type, 
-            args=[string_node, start, stop, encoding_node, error_handling_node, decode_function], 
-            is_temp=node.is_temp, 
-            utility_code=UtilityCode.load_cached(utility_code_name, 'StringTools.c'), 
-        ) 
- 
-        for temp in temps[::-1]: 
-            node = UtilNodes.EvalWithTempExprNode(temp, node) 
-        return node 
- 
-    _handle_simple_method_bytearray_decode = _handle_simple_method_bytes_decode 
- 
-    def _find_special_codec_name(self, encoding): 
-        try: 
-            requested_codec = codecs.getencoder(encoding) 
-        except LookupError: 
-            return None 
-        for name, codec in self._special_codecs: 
-            if codec == requested_codec: 
-                if '_' in name: 
-                    name = ''.join([s.capitalize() 
-                                    for s in name.split('_')]) 
-                return name 
-        return None 
- 
-    def _unpack_encoding_and_error_mode(self, pos, args): 
-        null_node = ExprNodes.NullNode(pos) 
- 
-        if len(args) >= 2: 
-            encoding, encoding_node = self._unpack_string_and_cstring_node(args[1]) 
-            if encoding_node is None: 
-                return None 
-        else: 
-            encoding = None 
-            encoding_node = null_node 
- 
-        if len(args) == 3: 
-            error_handling, error_handling_node = self._unpack_string_and_cstring_node(args[2]) 
-            if error_handling_node is None: 
-                return None 
-            if error_handling == 'strict': 
-                error_handling_node = null_node 
-        else: 
-            error_handling = 'strict' 
-            error_handling_node = null_node 
- 
-        return (encoding, encoding_node, error_handling, error_handling_node) 
- 
-    def _unpack_string_and_cstring_node(self, node): 
-        if isinstance(node, ExprNodes.CoerceToPyTypeNode): 
-            node = node.arg 
-        if isinstance(node, ExprNodes.UnicodeNode): 
-            encoding = node.value 
-            node = ExprNodes.BytesNode( 
+                        PyrexTypes.CFuncTypeArg("decode_func", self.PyUnicode_DecodeXyz_func_ptr_type, None),
+                    ])
+            helper_func_type = self._decode_cpp_string_func_type
+            utility_code_name = 'decode_cpp_string'
+        else:
+            # Python bytes/bytearray object
+            if not stop:
+                stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX',
+                                         constant_result=ExprNodes.not_a_constant)
+            helper_func_type = self._decode_bytes_func_type
+            if string_type is Builtin.bytes_type:
+                utility_code_name = 'decode_bytes'
+            else:
+                utility_code_name = 'decode_bytearray'
+
+        node = ExprNodes.PythonCapiCallNode(
+            node.pos, '__Pyx_%s' % utility_code_name, helper_func_type,
+            args=[string_node, start, stop, encoding_node, error_handling_node, decode_function],
+            is_temp=node.is_temp,
+            utility_code=UtilityCode.load_cached(utility_code_name, 'StringTools.c'),
+        )
+
+        for temp in temps[::-1]:
+            node = UtilNodes.EvalWithTempExprNode(temp, node)
+        return node
+
+    _handle_simple_method_bytearray_decode = _handle_simple_method_bytes_decode
+
+    def _find_special_codec_name(self, encoding):
+        try:
+            requested_codec = codecs.getencoder(encoding)
+        except LookupError:
+            return None
+        for name, codec in self._special_codecs:
+            if codec == requested_codec:
+                if '_' in name:
+                    name = ''.join([s.capitalize()
+                                    for s in name.split('_')])
+                return name
+        return None
+
+    def _unpack_encoding_and_error_mode(self, pos, args):
+        null_node = ExprNodes.NullNode(pos)
+
+        if len(args) >= 2:
+            encoding, encoding_node = self._unpack_string_and_cstring_node(args[1])
+            if encoding_node is None:
+                return None
+        else:
+            encoding = None
+            encoding_node = null_node
+
+        if len(args) == 3:
+            error_handling, error_handling_node = self._unpack_string_and_cstring_node(args[2])
+            if error_handling_node is None:
+                return None
+            if error_handling == 'strict':
+                error_handling_node = null_node
+        else:
+            error_handling = 'strict'
+            error_handling_node = null_node
+
+        return (encoding, encoding_node, error_handling, error_handling_node)
+
+    def _unpack_string_and_cstring_node(self, node):
+        if isinstance(node, ExprNodes.CoerceToPyTypeNode):
+            node = node.arg
+        if isinstance(node, ExprNodes.UnicodeNode):
+            encoding = node.value
+            node = ExprNodes.BytesNode(
                 node.pos, value=encoding.as_utf8_string(), type=PyrexTypes.c_const_char_ptr_type)
-        elif isinstance(node, (ExprNodes.StringNode, ExprNodes.BytesNode)): 
-            encoding = node.value.decode('ISO-8859-1') 
-            node = ExprNodes.BytesNode( 
+        elif isinstance(node, (ExprNodes.StringNode, ExprNodes.BytesNode)):
+            encoding = node.value.decode('ISO-8859-1')
+            node = ExprNodes.BytesNode(
                 node.pos, value=node.value, type=PyrexTypes.c_const_char_ptr_type)
-        elif node.type is Builtin.bytes_type: 
-            encoding = None 
+        elif node.type is Builtin.bytes_type:
+            encoding = None
             node = node.coerce_to(PyrexTypes.c_const_char_ptr_type, self.current_env())
-        elif node.type.is_string: 
-            encoding = None 
-        else: 
-            encoding = node = None 
-        return encoding, node 
- 
-    def _handle_simple_method_str_endswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'str', 'endswith', 
-            str_tailmatch_utility_code, +1) 
- 
-    def _handle_simple_method_str_startswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'str', 'startswith', 
-            str_tailmatch_utility_code, -1) 
- 
-    def _handle_simple_method_bytes_endswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'bytes', 'endswith', 
-            bytes_tailmatch_utility_code, +1) 
- 
-    def _handle_simple_method_bytes_startswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'bytes', 'startswith', 
-            bytes_tailmatch_utility_code, -1) 
- 
-    '''   # disabled for now, enable when we consider it worth it (see StringTools.c) 
-    def _handle_simple_method_bytearray_endswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'bytearray', 'endswith', 
-            bytes_tailmatch_utility_code, +1) 
- 
-    def _handle_simple_method_bytearray_startswith(self, node, function, args, is_unbound_method): 
-        return self._inject_tailmatch( 
-            node, function, args, is_unbound_method, 'bytearray', 'startswith', 
-            bytes_tailmatch_utility_code, -1) 
-    ''' 
- 
-    ### helpers 
- 
-    def _substitute_method_call(self, node, function, name, func_type, 
-                                attr_name, is_unbound_method, args=(), 
-                                utility_code=None, is_temp=None, 
+        elif node.type.is_string:
+            encoding = None
+        else:
+            encoding = node = None
+        return encoding, node
+
+    def _handle_simple_method_str_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'str', 'endswith',
+            str_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_str_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'str', 'startswith',
+            str_tailmatch_utility_code, -1)
+
+    def _handle_simple_method_bytes_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytes', 'endswith',
+            bytes_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_bytes_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytes', 'startswith',
+            bytes_tailmatch_utility_code, -1)
+
+    '''   # disabled for now, enable when we consider it worth it (see StringTools.c)
+    def _handle_simple_method_bytearray_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytearray', 'endswith',
+            bytes_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_bytearray_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytearray', 'startswith',
+            bytes_tailmatch_utility_code, -1)
+    '''
+
+    ### helpers
+
+    def _substitute_method_call(self, node, function, name, func_type,
+                                attr_name, is_unbound_method, args=(),
+                                utility_code=None, is_temp=None,
                                 may_return_none=ExprNodes.PythonCapiCallNode.may_return_none,
                                 with_none_check=True):
-        args = list(args) 
+        args = list(args)
         if with_none_check and args:
             args[0] = self._wrap_self_arg(args[0], function, is_unbound_method, attr_name)
-        if is_temp is None: 
-            is_temp = node.is_temp 
-        return ExprNodes.PythonCapiCallNode( 
-            node.pos, name, func_type, 
-            args = args, 
-            is_temp = is_temp, 
-            utility_code = utility_code, 
-            may_return_none = may_return_none, 
-            result_is_used = node.result_is_used, 
-            ) 
- 
+        if is_temp is None:
+            is_temp = node.is_temp
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, name, func_type,
+            args = args,
+            is_temp = is_temp,
+            utility_code = utility_code,
+            may_return_none = may_return_none,
+            result_is_used = node.result_is_used,
+            )
+
     def _wrap_self_arg(self, self_arg, function, is_unbound_method, attr_name):
         if self_arg.is_literal:
             return self_arg
@@ -3963,280 +3963,280 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                 format_args=[attr_name])
         return self_arg
 
-    def _inject_int_default_argument(self, node, args, arg_index, type, default_value): 
-        assert len(args) >= arg_index 
-        if len(args) == arg_index: 
-            args.append(ExprNodes.IntNode(node.pos, value=str(default_value), 
-                                          type=type, constant_result=default_value)) 
-        else: 
-            args[arg_index] = args[arg_index].coerce_to(type, self.current_env()) 
- 
-    def _inject_bint_default_argument(self, node, args, arg_index, default_value): 
-        assert len(args) >= arg_index 
-        if len(args) == arg_index: 
-            default_value = bool(default_value) 
-            args.append(ExprNodes.BoolNode(node.pos, value=default_value, 
-                                           constant_result=default_value)) 
-        else: 
-            args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env()) 
- 
- 
-unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c') 
-bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c') 
-str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c') 
- 
- 
-class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): 
-    """Calculate the result of constant expressions to store it in 
-    ``expr_node.constant_result``, and replace trivial cases by their 
-    constant result. 
- 
-    General rules: 
- 
-    - We calculate float constants to make them available to the 
-      compiler, but we do not aggregate them into a single literal 
-      node to prevent any loss of precision. 
- 
-    - We recursively calculate constants from non-literal nodes to 
-      make them available to the compiler, but we only aggregate 
-      literal nodes at each step.  Non-literal nodes are never merged 
-      into a single node. 
-    """ 
- 
-    def __init__(self, reevaluate=False): 
-        """ 
-        The reevaluate argument specifies whether constant values that were 
-        previously computed should be recomputed. 
-        """ 
-        super(ConstantFolding, self).__init__() 
-        self.reevaluate = reevaluate 
- 
-    def _calculate_const(self, node): 
-        if (not self.reevaluate and 
-                node.constant_result is not ExprNodes.constant_value_not_set): 
-            return 
- 
-        # make sure we always set the value 
-        not_a_constant = ExprNodes.not_a_constant 
-        node.constant_result = not_a_constant 
- 
-        # check if all children are constant 
-        children = self.visitchildren(node) 
-        for child_result in children.values(): 
-            if type(child_result) is list: 
-                for child in child_result: 
-                    if getattr(child, 'constant_result', not_a_constant) is not_a_constant: 
-                        return 
-            elif getattr(child_result, 'constant_result', not_a_constant) is not_a_constant: 
-                return 
- 
-        # now try to calculate the real constant value 
-        try: 
-            node.calculate_constant_result() 
-#            if node.constant_result is not ExprNodes.not_a_constant: 
-#                print node.__class__.__name__, node.constant_result 
-        except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError): 
-            # ignore all 'normal' errors here => no constant result 
-            pass 
-        except Exception: 
-            # this looks like a real error 
-            import traceback, sys 
-            traceback.print_exc(file=sys.stdout) 
- 
-    NODE_TYPE_ORDER = [ExprNodes.BoolNode, ExprNodes.CharNode, 
-                       ExprNodes.IntNode, ExprNodes.FloatNode] 
- 
-    def _widest_node_class(self, *nodes): 
-        try: 
-            return self.NODE_TYPE_ORDER[ 
-                max(map(self.NODE_TYPE_ORDER.index, map(type, nodes)))] 
-        except ValueError: 
-            return None 
- 
-    def _bool_node(self, node, value): 
-        value = bool(value) 
-        return ExprNodes.BoolNode(node.pos, value=value, constant_result=value) 
- 
-    def visit_ExprNode(self, node): 
-        self._calculate_const(node) 
-        return node 
- 
-    def visit_UnopNode(self, node): 
-        self._calculate_const(node) 
-        if not node.has_constant_result(): 
-            if node.operator == '!': 
-                return self._handle_NotNode(node) 
-            return node 
-        if not node.operand.is_literal: 
-            return node 
-        if node.operator == '!': 
-            return self._bool_node(node, node.constant_result) 
-        elif isinstance(node.operand, ExprNodes.BoolNode): 
-            return ExprNodes.IntNode(node.pos, value=str(int(node.constant_result)), 
-                                     type=PyrexTypes.c_int_type, 
-                                     constant_result=int(node.constant_result)) 
-        elif node.operator == '+': 
-            return self._handle_UnaryPlusNode(node) 
-        elif node.operator == '-': 
-            return self._handle_UnaryMinusNode(node) 
-        return node 
- 
-    _negate_operator = { 
-        'in': 'not_in', 
-        'not_in': 'in', 
-        'is': 'is_not', 
-        'is_not': 'is' 
-    }.get 
- 
-    def _handle_NotNode(self, node): 
-        operand = node.operand 
-        if isinstance(operand, ExprNodes.PrimaryCmpNode): 
-            operator = self._negate_operator(operand.operator) 
-            if operator: 
-                node = copy.copy(operand) 
-                node.operator = operator 
-                node = self.visit_PrimaryCmpNode(node) 
-        return node 
- 
-    def _handle_UnaryMinusNode(self, node): 
-        def _negate(value): 
-            if value.startswith('-'): 
-                value = value[1:] 
-            else: 
-                value = '-' + value 
-            return value 
- 
-        node_type = node.operand.type 
-        if isinstance(node.operand, ExprNodes.FloatNode): 
-            # this is a safe operation 
-            return ExprNodes.FloatNode(node.pos, value=_negate(node.operand.value), 
-                                       type=node_type, 
-                                       constant_result=node.constant_result) 
-        if node_type.is_int and node_type.signed or \ 
-                isinstance(node.operand, ExprNodes.IntNode) and node_type.is_pyobject: 
-            return ExprNodes.IntNode(node.pos, value=_negate(node.operand.value), 
-                                     type=node_type, 
-                                     longness=node.operand.longness, 
-                                     constant_result=node.constant_result) 
-        return node 
- 
-    def _handle_UnaryPlusNode(self, node): 
-        if (node.operand.has_constant_result() and 
-                    node.constant_result == node.operand.constant_result): 
-            return node.operand 
-        return node 
- 
-    def visit_BoolBinopNode(self, node): 
-        self._calculate_const(node) 
-        if not node.operand1.has_constant_result(): 
-            return node 
-        if node.operand1.constant_result: 
-            if node.operator == 'and': 
-                return node.operand2 
-            else: 
-                return node.operand1 
-        else: 
-            if node.operator == 'and': 
-                return node.operand1 
-            else: 
-                return node.operand2 
- 
-    def visit_BinopNode(self, node): 
-        self._calculate_const(node) 
-        if node.constant_result is ExprNodes.not_a_constant: 
-            return node 
-        if isinstance(node.constant_result, float): 
-            return node 
-        operand1, operand2 = node.operand1, node.operand2 
-        if not operand1.is_literal or not operand2.is_literal: 
-            return node 
- 
-        # now inject a new constant node with the calculated value 
-        try: 
-            type1, type2 = operand1.type, operand2.type 
-            if type1 is None or type2 is None: 
-                return node 
-        except AttributeError: 
-            return node 
- 
-        if type1.is_numeric and type2.is_numeric: 
-            widest_type = PyrexTypes.widest_numeric_type(type1, type2) 
-        else: 
-            widest_type = PyrexTypes.py_object_type 
- 
-        target_class = self._widest_node_class(operand1, operand2) 
-        if target_class is None: 
-            return node 
-        elif target_class is ExprNodes.BoolNode and node.operator in '+-//<<%**>>': 
-            # C arithmetic results in at least an int type 
-            target_class = ExprNodes.IntNode 
-        elif target_class is ExprNodes.CharNode and node.operator in '+-//<<%**>>&|^': 
-            # C arithmetic results in at least an int type 
-            target_class = ExprNodes.IntNode 
- 
-        if target_class is ExprNodes.IntNode: 
-            unsigned = getattr(operand1, 'unsigned', '') and \ 
-                       getattr(operand2, 'unsigned', '') 
-            longness = "LL"[:max(len(getattr(operand1, 'longness', '')), 
-                                 len(getattr(operand2, 'longness', '')))] 
-            new_node = ExprNodes.IntNode(pos=node.pos, 
-                                         unsigned=unsigned, longness=longness, 
-                                         value=str(int(node.constant_result)), 
-                                         constant_result=int(node.constant_result)) 
-            # IntNode is smart about the type it chooses, so we just 
-            # make sure we were not smarter this time 
-            if widest_type.is_pyobject or new_node.type.is_pyobject: 
-                new_node.type = PyrexTypes.py_object_type 
-            else: 
-                new_node.type = PyrexTypes.widest_numeric_type(widest_type, new_node.type) 
-        else: 
-            if target_class is ExprNodes.BoolNode: 
-                node_value = node.constant_result 
-            else: 
-                node_value = str(node.constant_result) 
-            new_node = target_class(pos=node.pos, type = widest_type, 
-                                    value = node_value, 
-                                    constant_result = node.constant_result) 
-        return new_node 
- 
-    def visit_AddNode(self, node): 
-        self._calculate_const(node) 
-        if node.constant_result is ExprNodes.not_a_constant: 
-            return node 
-        if node.operand1.is_string_literal and node.operand2.is_string_literal: 
-            # some people combine string literals with a '+' 
-            str1, str2 = node.operand1, node.operand2 
-            if isinstance(str1, ExprNodes.UnicodeNode) and isinstance(str2, ExprNodes.UnicodeNode): 
-                bytes_value = None 
-                if str1.bytes_value is not None and str2.bytes_value is not None: 
-                    if str1.bytes_value.encoding == str2.bytes_value.encoding: 
+    def _inject_int_default_argument(self, node, args, arg_index, type, default_value):
+        assert len(args) >= arg_index
+        if len(args) == arg_index:
+            args.append(ExprNodes.IntNode(node.pos, value=str(default_value),
+                                          type=type, constant_result=default_value))
+        else:
+            args[arg_index] = args[arg_index].coerce_to(type, self.current_env())
+
+    def _inject_bint_default_argument(self, node, args, arg_index, default_value):
+        assert len(args) >= arg_index
+        if len(args) == arg_index:
+            default_value = bool(default_value)
+            args.append(ExprNodes.BoolNode(node.pos, value=default_value,
+                                           constant_result=default_value))
+        else:
+            args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env())
+
+
+unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c')
+bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c')
+str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c')
+
+
+class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
+    """Calculate the result of constant expressions to store it in
+    ``expr_node.constant_result``, and replace trivial cases by their
+    constant result.
+
+    General rules:
+
+    - We calculate float constants to make them available to the
+      compiler, but we do not aggregate them into a single literal
+      node to prevent any loss of precision.
+
+    - We recursively calculate constants from non-literal nodes to
+      make them available to the compiler, but we only aggregate
+      literal nodes at each step.  Non-literal nodes are never merged
+      into a single node.
+    """
+
+    def __init__(self, reevaluate=False):
+        """
+        The reevaluate argument specifies whether constant values that were
+        previously computed should be recomputed.
+        """
+        super(ConstantFolding, self).__init__()
+        self.reevaluate = reevaluate
+
+    def _calculate_const(self, node):
+        if (not self.reevaluate and
+                node.constant_result is not ExprNodes.constant_value_not_set):
+            return
+
+        # make sure we always set the value
+        not_a_constant = ExprNodes.not_a_constant
+        node.constant_result = not_a_constant
+
+        # check if all children are constant
+        children = self.visitchildren(node)
+        for child_result in children.values():
+            if type(child_result) is list:
+                for child in child_result:
+                    if getattr(child, 'constant_result', not_a_constant) is not_a_constant:
+                        return
+            elif getattr(child_result, 'constant_result', not_a_constant) is not_a_constant:
+                return
+
+        # now try to calculate the real constant value
+        try:
+            node.calculate_constant_result()
+#            if node.constant_result is not ExprNodes.not_a_constant:
+#                print node.__class__.__name__, node.constant_result
+        except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError):
+            # ignore all 'normal' errors here => no constant result
+            pass
+        except Exception:
+            # this looks like a real error
+            import traceback, sys
+            traceback.print_exc(file=sys.stdout)
+
+    NODE_TYPE_ORDER = [ExprNodes.BoolNode, ExprNodes.CharNode,
+                       ExprNodes.IntNode, ExprNodes.FloatNode]
+
+    def _widest_node_class(self, *nodes):
+        try:
+            return self.NODE_TYPE_ORDER[
+                max(map(self.NODE_TYPE_ORDER.index, map(type, nodes)))]
+        except ValueError:
+            return None
+
+    def _bool_node(self, node, value):
+        value = bool(value)
+        return ExprNodes.BoolNode(node.pos, value=value, constant_result=value)
+
+    def visit_ExprNode(self, node):
+        self._calculate_const(node)
+        return node
+
+    def visit_UnopNode(self, node):
+        self._calculate_const(node)
+        if not node.has_constant_result():
+            if node.operator == '!':
+                return self._handle_NotNode(node)
+            return node
+        if not node.operand.is_literal:
+            return node
+        if node.operator == '!':
+            return self._bool_node(node, node.constant_result)
+        elif isinstance(node.operand, ExprNodes.BoolNode):
+            return ExprNodes.IntNode(node.pos, value=str(int(node.constant_result)),
+                                     type=PyrexTypes.c_int_type,
+                                     constant_result=int(node.constant_result))
+        elif node.operator == '+':
+            return self._handle_UnaryPlusNode(node)
+        elif node.operator == '-':
+            return self._handle_UnaryMinusNode(node)
+        return node
+
+    _negate_operator = {
+        'in': 'not_in',
+        'not_in': 'in',
+        'is': 'is_not',
+        'is_not': 'is'
+    }.get
+
+    def _handle_NotNode(self, node):
+        operand = node.operand
+        if isinstance(operand, ExprNodes.PrimaryCmpNode):
+            operator = self._negate_operator(operand.operator)
+            if operator:
+                node = copy.copy(operand)
+                node.operator = operator
+                node = self.visit_PrimaryCmpNode(node)
+        return node
+
+    def _handle_UnaryMinusNode(self, node):
+        def _negate(value):
+            if value.startswith('-'):
+                value = value[1:]
+            else:
+                value = '-' + value
+            return value
+
+        node_type = node.operand.type
+        if isinstance(node.operand, ExprNodes.FloatNode):
+            # this is a safe operation
+            return ExprNodes.FloatNode(node.pos, value=_negate(node.operand.value),
+                                       type=node_type,
+                                       constant_result=node.constant_result)
+        if node_type.is_int and node_type.signed or \
+                isinstance(node.operand, ExprNodes.IntNode) and node_type.is_pyobject:
+            return ExprNodes.IntNode(node.pos, value=_negate(node.operand.value),
+                                     type=node_type,
+                                     longness=node.operand.longness,
+                                     constant_result=node.constant_result)
+        return node
+
+    def _handle_UnaryPlusNode(self, node):
+        if (node.operand.has_constant_result() and
+                    node.constant_result == node.operand.constant_result):
+            return node.operand
+        return node
+
+    def visit_BoolBinopNode(self, node):
+        self._calculate_const(node)
+        if not node.operand1.has_constant_result():
+            return node
+        if node.operand1.constant_result:
+            if node.operator == 'and':
+                return node.operand2
+            else:
+                return node.operand1
+        else:
+            if node.operator == 'and':
+                return node.operand1
+            else:
+                return node.operand2
+
+    def visit_BinopNode(self, node):
+        self._calculate_const(node)
+        if node.constant_result is ExprNodes.not_a_constant:
+            return node
+        if isinstance(node.constant_result, float):
+            return node
+        operand1, operand2 = node.operand1, node.operand2
+        if not operand1.is_literal or not operand2.is_literal:
+            return node
+
+        # now inject a new constant node with the calculated value
+        try:
+            type1, type2 = operand1.type, operand2.type
+            if type1 is None or type2 is None:
+                return node
+        except AttributeError:
+            return node
+
+        if type1.is_numeric and type2.is_numeric:
+            widest_type = PyrexTypes.widest_numeric_type(type1, type2)
+        else:
+            widest_type = PyrexTypes.py_object_type
+
+        target_class = self._widest_node_class(operand1, operand2)
+        if target_class is None:
+            return node
+        elif target_class is ExprNodes.BoolNode and node.operator in '+-//<<%**>>':
+            # C arithmetic results in at least an int type
+            target_class = ExprNodes.IntNode
+        elif target_class is ExprNodes.CharNode and node.operator in '+-//<<%**>>&|^':
+            # C arithmetic results in at least an int type
+            target_class = ExprNodes.IntNode
+
+        if target_class is ExprNodes.IntNode:
+            unsigned = getattr(operand1, 'unsigned', '') and \
+                       getattr(operand2, 'unsigned', '')
+            longness = "LL"[:max(len(getattr(operand1, 'longness', '')),
+                                 len(getattr(operand2, 'longness', '')))]
+            new_node = ExprNodes.IntNode(pos=node.pos,
+                                         unsigned=unsigned, longness=longness,
+                                         value=str(int(node.constant_result)),
+                                         constant_result=int(node.constant_result))
+            # IntNode is smart about the type it chooses, so we just
+            # make sure we were not smarter this time
+            if widest_type.is_pyobject or new_node.type.is_pyobject:
+                new_node.type = PyrexTypes.py_object_type
+            else:
+                new_node.type = PyrexTypes.widest_numeric_type(widest_type, new_node.type)
+        else:
+            if target_class is ExprNodes.BoolNode:
+                node_value = node.constant_result
+            else:
+                node_value = str(node.constant_result)
+            new_node = target_class(pos=node.pos, type = widest_type,
+                                    value = node_value,
+                                    constant_result = node.constant_result)
+        return new_node
+
+    def visit_AddNode(self, node):
+        self._calculate_const(node)
+        if node.constant_result is ExprNodes.not_a_constant:
+            return node
+        if node.operand1.is_string_literal and node.operand2.is_string_literal:
+            # some people combine string literals with a '+'
+            str1, str2 = node.operand1, node.operand2
+            if isinstance(str1, ExprNodes.UnicodeNode) and isinstance(str2, ExprNodes.UnicodeNode):
+                bytes_value = None
+                if str1.bytes_value is not None and str2.bytes_value is not None:
+                    if str1.bytes_value.encoding == str2.bytes_value.encoding:
                         bytes_value = bytes_literal(
                             str1.bytes_value + str2.bytes_value,
                             str1.bytes_value.encoding)
-                string_value = EncodedString(node.constant_result) 
-                return ExprNodes.UnicodeNode( 
-                    str1.pos, value=string_value, constant_result=node.constant_result, bytes_value=bytes_value) 
-            elif isinstance(str1, ExprNodes.BytesNode) and isinstance(str2, ExprNodes.BytesNode): 
-                if str1.value.encoding == str2.value.encoding: 
+                string_value = EncodedString(node.constant_result)
+                return ExprNodes.UnicodeNode(
+                    str1.pos, value=string_value, constant_result=node.constant_result, bytes_value=bytes_value)
+            elif isinstance(str1, ExprNodes.BytesNode) and isinstance(str2, ExprNodes.BytesNode):
+                if str1.value.encoding == str2.value.encoding:
                     bytes_value = bytes_literal(node.constant_result, str1.value.encoding)
-                    return ExprNodes.BytesNode(str1.pos, value=bytes_value, constant_result=node.constant_result) 
-            # all other combinations are rather complicated 
-            # to get right in Py2/3: encodings, unicode escapes, ... 
-        return self.visit_BinopNode(node) 
- 
-    def visit_MulNode(self, node): 
-        self._calculate_const(node) 
-        if node.operand1.is_sequence_constructor: 
-            return self._calculate_constant_seq(node, node.operand1, node.operand2) 
-        if isinstance(node.operand1, ExprNodes.IntNode) and \ 
-                node.operand2.is_sequence_constructor: 
-            return self._calculate_constant_seq(node, node.operand2, node.operand1) 
+                    return ExprNodes.BytesNode(str1.pos, value=bytes_value, constant_result=node.constant_result)
+            # all other combinations are rather complicated
+            # to get right in Py2/3: encodings, unicode escapes, ...
+        return self.visit_BinopNode(node)
+
+    def visit_MulNode(self, node):
+        self._calculate_const(node)
+        if node.operand1.is_sequence_constructor:
+            return self._calculate_constant_seq(node, node.operand1, node.operand2)
+        if isinstance(node.operand1, ExprNodes.IntNode) and \
+                node.operand2.is_sequence_constructor:
+            return self._calculate_constant_seq(node, node.operand2, node.operand1)
         if node.operand1.is_string_literal:
             return self._multiply_string(node, node.operand1, node.operand2)
         elif node.operand2.is_string_literal:
             return self._multiply_string(node, node.operand2, node.operand1)
-        return self.visit_BinopNode(node) 
- 
+        return self.visit_BinopNode(node)
+
     def _multiply_string(self, node, string_node, multiplier_node):
         multiplier = multiplier_node.constant_result
         if not isinstance(multiplier, _py_int_types):
@@ -4273,25 +4273,25 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
             string_node.constant_result = string_node.value
         return string_node
 
-    def _calculate_constant_seq(self, node, sequence_node, factor): 
-        if factor.constant_result != 1 and sequence_node.args: 
+    def _calculate_constant_seq(self, node, sequence_node, factor):
+        if factor.constant_result != 1 and sequence_node.args:
             if isinstance(factor.constant_result, _py_int_types) and factor.constant_result <= 0:
-                del sequence_node.args[:] 
-                sequence_node.mult_factor = None 
-            elif sequence_node.mult_factor is not None: 
+                del sequence_node.args[:]
+                sequence_node.mult_factor = None
+            elif sequence_node.mult_factor is not None:
                 if (isinstance(factor.constant_result, _py_int_types) and
                         isinstance(sequence_node.mult_factor.constant_result, _py_int_types)):
-                    value = sequence_node.mult_factor.constant_result * factor.constant_result 
-                    sequence_node.mult_factor = ExprNodes.IntNode( 
-                        sequence_node.mult_factor.pos, 
-                        value=str(value), constant_result=value) 
-                else: 
-                    # don't know if we can combine the factors, so don't 
-                    return self.visit_BinopNode(node) 
-            else: 
-                sequence_node.mult_factor = factor 
-        return sequence_node 
- 
+                    value = sequence_node.mult_factor.constant_result * factor.constant_result
+                    sequence_node.mult_factor = ExprNodes.IntNode(
+                        sequence_node.mult_factor.pos,
+                        value=str(value), constant_result=value)
+                else:
+                    # don't know if we can combine the factors, so don't
+                    return self.visit_BinopNode(node)
+            else:
+                sequence_node.mult_factor = factor
+        return sequence_node
+
     def visit_ModNode(self, node):
         self.visitchildren(node)
         if isinstance(node.operand1, ExprNodes.UnicodeNode) and isinstance(node.operand2, ExprNodes.TupleNode):
@@ -4520,298 +4520,298 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
         self._calculate_const(node)
         return node
 
-    def visit_PrimaryCmpNode(self, node): 
-        # calculate constant partial results in the comparison cascade 
-        self.visitchildren(node, ['operand1']) 
-        left_node = node.operand1 
-        cmp_node = node 
-        while cmp_node is not None: 
-            self.visitchildren(cmp_node, ['operand2']) 
-            right_node = cmp_node.operand2 
-            cmp_node.constant_result = not_a_constant 
-            if left_node.has_constant_result() and right_node.has_constant_result(): 
-                try: 
-                    cmp_node.calculate_cascaded_constant_result(left_node.constant_result) 
-                except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError): 
-                    pass  # ignore all 'normal' errors here => no constant result 
-            left_node = right_node 
-            cmp_node = cmp_node.cascade 
- 
-        if not node.cascade: 
-            if node.has_constant_result(): 
-                return self._bool_node(node, node.constant_result) 
-            return node 
- 
-        # collect partial cascades: [[value, CmpNode...], [value, CmpNode, ...], ...] 
-        cascades = [[node.operand1]] 
-        final_false_result = [] 
- 
-        def split_cascades(cmp_node): 
-            if cmp_node.has_constant_result(): 
-                if not cmp_node.constant_result: 
-                    # False => short-circuit 
-                    final_false_result.append(self._bool_node(cmp_node, False)) 
-                    return 
-                else: 
-                    # True => discard and start new cascade 
-                    cascades.append([cmp_node.operand2]) 
-            else: 
-                # not constant => append to current cascade 
-                cascades[-1].append(cmp_node) 
-            if cmp_node.cascade: 
-                split_cascades(cmp_node.cascade) 
- 
-        split_cascades(node) 
- 
-        cmp_nodes = [] 
-        for cascade in cascades: 
-            if len(cascade) < 2: 
-                continue 
-            cmp_node = cascade[1] 
-            pcmp_node = ExprNodes.PrimaryCmpNode( 
-                cmp_node.pos, 
-                operand1=cascade[0], 
-                operator=cmp_node.operator, 
-                operand2=cmp_node.operand2, 
-                constant_result=not_a_constant) 
-            cmp_nodes.append(pcmp_node) 
- 
-            last_cmp_node = pcmp_node 
-            for cmp_node in cascade[2:]: 
-                last_cmp_node.cascade = cmp_node 
-                last_cmp_node = cmp_node 
-            last_cmp_node.cascade = None 
- 
-        if final_false_result: 
-            # last cascade was constant False 
-            cmp_nodes.append(final_false_result[0]) 
-        elif not cmp_nodes: 
-            # only constants, but no False result 
-            return self._bool_node(node, True) 
-        node = cmp_nodes[0] 
-        if len(cmp_nodes) == 1: 
-            if node.has_constant_result(): 
-                return self._bool_node(node, node.constant_result) 
-        else: 
-            for cmp_node in cmp_nodes[1:]: 
-                node = ExprNodes.BoolBinopNode( 
-                    node.pos, 
-                    operand1=node, 
-                    operator='and', 
-                    operand2=cmp_node, 
-                    constant_result=not_a_constant) 
-        return node 
- 
-    def visit_CondExprNode(self, node): 
-        self._calculate_const(node) 
-        if not node.test.has_constant_result(): 
-            return node 
-        if node.test.constant_result: 
-            return node.true_val 
-        else: 
-            return node.false_val 
- 
-    def visit_IfStatNode(self, node): 
-        self.visitchildren(node) 
-        # eliminate dead code based on constant condition results 
-        if_clauses = [] 
-        for if_clause in node.if_clauses: 
-            condition = if_clause.condition 
-            if condition.has_constant_result(): 
-                if condition.constant_result: 
-                    # always true => subsequent clauses can safely be dropped 
-                    node.else_clause = if_clause.body 
-                    break 
-                # else: false => drop clause 
-            else: 
-                # unknown result => normal runtime evaluation 
-                if_clauses.append(if_clause) 
-        if if_clauses: 
-            node.if_clauses = if_clauses 
-            return node 
-        elif node.else_clause: 
-            return node.else_clause 
-        else: 
-            return Nodes.StatListNode(node.pos, stats=[]) 
- 
-    def visit_SliceIndexNode(self, node): 
-        self._calculate_const(node) 
-        # normalise start/stop values 
-        if node.start is None or node.start.constant_result is None: 
-            start = node.start = None 
-        else: 
-            start = node.start.constant_result 
-        if node.stop is None or node.stop.constant_result is None: 
-            stop = node.stop = None 
-        else: 
-            stop = node.stop.constant_result 
-        # cut down sliced constant sequences 
-        if node.constant_result is not not_a_constant: 
-            base = node.base 
-            if base.is_sequence_constructor and base.mult_factor is None: 
-                base.args = base.args[start:stop] 
-                return base 
-            elif base.is_string_literal: 
-                base = base.as_sliced_node(start, stop) 
-                if base is not None: 
-                    return base 
-        return node 
- 
-    def visit_ComprehensionNode(self, node): 
-        self.visitchildren(node) 
-        if isinstance(node.loop, Nodes.StatListNode) and not node.loop.stats: 
-            # loop was pruned already => transform into literal 
-            if node.type is Builtin.list_type: 
-                return ExprNodes.ListNode( 
-                    node.pos, args=[], constant_result=[]) 
-            elif node.type is Builtin.set_type: 
-                return ExprNodes.SetNode( 
-                    node.pos, args=[], constant_result=set()) 
-            elif node.type is Builtin.dict_type: 
-                return ExprNodes.DictNode( 
-                    node.pos, key_value_pairs=[], constant_result={}) 
-        return node 
- 
-    def visit_ForInStatNode(self, node): 
-        self.visitchildren(node) 
-        sequence = node.iterator.sequence 
-        if isinstance(sequence, ExprNodes.SequenceNode): 
-            if not sequence.args: 
-                if node.else_clause: 
-                    return node.else_clause 
-                else: 
-                    # don't break list comprehensions 
-                    return Nodes.StatListNode(node.pos, stats=[]) 
-            # iterating over a list literal? => tuples are more efficient 
-            if isinstance(sequence, ExprNodes.ListNode): 
-                node.iterator.sequence = sequence.as_tuple() 
-        return node 
- 
-    def visit_WhileStatNode(self, node): 
-        self.visitchildren(node) 
-        if node.condition and node.condition.has_constant_result(): 
-            if node.condition.constant_result: 
-                node.condition = None 
-                node.else_clause = None 
-            else: 
-                return node.else_clause 
-        return node 
- 
-    def visit_ExprStatNode(self, node): 
-        self.visitchildren(node) 
-        if not isinstance(node.expr, ExprNodes.ExprNode): 
-            # ParallelRangeTransform does this ... 
-            return node 
-        # drop unused constant expressions 
-        if node.expr.has_constant_result(): 
-            return None 
-        return node 
- 
-    # in the future, other nodes can have their own handler method here 
-    # that can replace them with a constant result node 
- 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
- 
+    def visit_PrimaryCmpNode(self, node):
+        # calculate constant partial results in the comparison cascade
+        self.visitchildren(node, ['operand1'])
+        left_node = node.operand1
+        cmp_node = node
+        while cmp_node is not None:
+            self.visitchildren(cmp_node, ['operand2'])
+            right_node = cmp_node.operand2
+            cmp_node.constant_result = not_a_constant
+            if left_node.has_constant_result() and right_node.has_constant_result():
+                try:
+                    cmp_node.calculate_cascaded_constant_result(left_node.constant_result)
+                except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError):
+                    pass  # ignore all 'normal' errors here => no constant result
+            left_node = right_node
+            cmp_node = cmp_node.cascade
+
+        if not node.cascade:
+            if node.has_constant_result():
+                return self._bool_node(node, node.constant_result)
+            return node
+
+        # collect partial cascades: [[value, CmpNode...], [value, CmpNode, ...], ...]
+        cascades = [[node.operand1]]
+        final_false_result = []
+
+        def split_cascades(cmp_node):
+            if cmp_node.has_constant_result():
+                if not cmp_node.constant_result:
+                    # False => short-circuit
+                    final_false_result.append(self._bool_node(cmp_node, False))
+                    return
+                else:
+                    # True => discard and start new cascade
+                    cascades.append([cmp_node.operand2])
+            else:
+                # not constant => append to current cascade
+                cascades[-1].append(cmp_node)
+            if cmp_node.cascade:
+                split_cascades(cmp_node.cascade)
+
+        split_cascades(node)
+
+        cmp_nodes = []
+        for cascade in cascades:
+            if len(cascade) < 2:
+                continue
+            cmp_node = cascade[1]
+            pcmp_node = ExprNodes.PrimaryCmpNode(
+                cmp_node.pos,
+                operand1=cascade[0],
+                operator=cmp_node.operator,
+                operand2=cmp_node.operand2,
+                constant_result=not_a_constant)
+            cmp_nodes.append(pcmp_node)
+
+            last_cmp_node = pcmp_node
+            for cmp_node in cascade[2:]:
+                last_cmp_node.cascade = cmp_node
+                last_cmp_node = cmp_node
+            last_cmp_node.cascade = None
+
+        if final_false_result:
+            # last cascade was constant False
+            cmp_nodes.append(final_false_result[0])
+        elif not cmp_nodes:
+            # only constants, but no False result
+            return self._bool_node(node, True)
+        node = cmp_nodes[0]
+        if len(cmp_nodes) == 1:
+            if node.has_constant_result():
+                return self._bool_node(node, node.constant_result)
+        else:
+            for cmp_node in cmp_nodes[1:]:
+                node = ExprNodes.BoolBinopNode(
+                    node.pos,
+                    operand1=node,
+                    operator='and',
+                    operand2=cmp_node,
+                    constant_result=not_a_constant)
+        return node
+
+    def visit_CondExprNode(self, node):
+        self._calculate_const(node)
+        if not node.test.has_constant_result():
+            return node
+        if node.test.constant_result:
+            return node.true_val
+        else:
+            return node.false_val
+
+    def visit_IfStatNode(self, node):
+        self.visitchildren(node)
+        # eliminate dead code based on constant condition results
+        if_clauses = []
+        for if_clause in node.if_clauses:
+            condition = if_clause.condition
+            if condition.has_constant_result():
+                if condition.constant_result:
+                    # always true => subsequent clauses can safely be dropped
+                    node.else_clause = if_clause.body
+                    break
+                # else: false => drop clause
+            else:
+                # unknown result => normal runtime evaluation
+                if_clauses.append(if_clause)
+        if if_clauses:
+            node.if_clauses = if_clauses
+            return node
+        elif node.else_clause:
+            return node.else_clause
+        else:
+            return Nodes.StatListNode(node.pos, stats=[])
+
+    def visit_SliceIndexNode(self, node):
+        self._calculate_const(node)
+        # normalise start/stop values
+        if node.start is None or node.start.constant_result is None:
+            start = node.start = None
+        else:
+            start = node.start.constant_result
+        if node.stop is None or node.stop.constant_result is None:
+            stop = node.stop = None
+        else:
+            stop = node.stop.constant_result
+        # cut down sliced constant sequences
+        if node.constant_result is not not_a_constant:
+            base = node.base
+            if base.is_sequence_constructor and base.mult_factor is None:
+                base.args = base.args[start:stop]
+                return base
+            elif base.is_string_literal:
+                base = base.as_sliced_node(start, stop)
+                if base is not None:
+                    return base
+        return node
+
+    def visit_ComprehensionNode(self, node):
+        self.visitchildren(node)
+        if isinstance(node.loop, Nodes.StatListNode) and not node.loop.stats:
+            # loop was pruned already => transform into literal
+            if node.type is Builtin.list_type:
+                return ExprNodes.ListNode(
+                    node.pos, args=[], constant_result=[])
+            elif node.type is Builtin.set_type:
+                return ExprNodes.SetNode(
+                    node.pos, args=[], constant_result=set())
+            elif node.type is Builtin.dict_type:
+                return ExprNodes.DictNode(
+                    node.pos, key_value_pairs=[], constant_result={})
+        return node
+
+    def visit_ForInStatNode(self, node):
+        self.visitchildren(node)
+        sequence = node.iterator.sequence
+        if isinstance(sequence, ExprNodes.SequenceNode):
+            if not sequence.args:
+                if node.else_clause:
+                    return node.else_clause
+                else:
+                    # don't break list comprehensions
+                    return Nodes.StatListNode(node.pos, stats=[])
+            # iterating over a list literal? => tuples are more efficient
+            if isinstance(sequence, ExprNodes.ListNode):
+                node.iterator.sequence = sequence.as_tuple()
+        return node
+
+    def visit_WhileStatNode(self, node):
+        self.visitchildren(node)
+        if node.condition and node.condition.has_constant_result():
+            if node.condition.constant_result:
+                node.condition = None
+                node.else_clause = None
+            else:
+                return node.else_clause
+        return node
+
+    def visit_ExprStatNode(self, node):
+        self.visitchildren(node)
+        if not isinstance(node.expr, ExprNodes.ExprNode):
+            # ParallelRangeTransform does this ...
+            return node
+        # drop unused constant expressions
+        if node.expr.has_constant_result():
+            return None
+        return node
+
+    # in the future, other nodes can have their own handler method here
+    # that can replace them with a constant result node
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+
 class FinalOptimizePhase(Visitor.EnvTransform, Visitor.NodeRefCleanupMixin):
-    """ 
-    This visitor handles several commuting optimizations, and is run 
-    just before the C code generation phase. 
- 
-    The optimizations currently implemented in this class are: 
-        - eliminate None assignment and refcounting for first assignment. 
-        - isinstance -> typecheck for cdef types 
-        - eliminate checks for None and/or types that became redundant after tree changes 
+    """
+    This visitor handles several commuting optimizations, and is run
+    just before the C code generation phase.
+
+    The optimizations currently implemented in this class are:
+        - eliminate None assignment and refcounting for first assignment.
+        - isinstance -> typecheck for cdef types
+        - eliminate checks for None and/or types that became redundant after tree changes
         - eliminate useless string formatting steps
-        - replace Python function calls that look like method calls by a faster PyMethodCallNode 
-    """ 
+        - replace Python function calls that look like method calls by a faster PyMethodCallNode
+    """
     in_loop = False
 
-    def visit_SingleAssignmentNode(self, node): 
-        """Avoid redundant initialisation of local variables before their 
-        first assignment. 
-        """ 
-        self.visitchildren(node) 
-        if node.first: 
-            lhs = node.lhs 
-            lhs.lhs_of_first_assignment = True 
-        return node 
- 
-    def visit_SimpleCallNode(self, node): 
-        """ 
-        Replace generic calls to isinstance(x, type) by a more efficient type check. 
-        Replace likely Python method calls by a specialised PyMethodCallNode. 
-        """ 
-        self.visitchildren(node) 
-        function = node.function 
-        if function.type.is_cfunction and function.is_name: 
-            if function.name == 'isinstance' and len(node.args) == 2: 
-                type_arg = node.args[1] 
-                if type_arg.type.is_builtin_type and type_arg.type.name == 'type': 
-                    cython_scope = self.context.cython_scope 
-                    function.entry = cython_scope.lookup('PyObject_TypeCheck') 
-                    function.type = function.entry.type 
-                    PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type) 
-                    node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr) 
+    def visit_SingleAssignmentNode(self, node):
+        """Avoid redundant initialisation of local variables before their
+        first assignment.
+        """
+        self.visitchildren(node)
+        if node.first:
+            lhs = node.lhs
+            lhs.lhs_of_first_assignment = True
+        return node
+
+    def visit_SimpleCallNode(self, node):
+        """
+        Replace generic calls to isinstance(x, type) by a more efficient type check.
+        Replace likely Python method calls by a specialised PyMethodCallNode.
+        """
+        self.visitchildren(node)
+        function = node.function
+        if function.type.is_cfunction and function.is_name:
+            if function.name == 'isinstance' and len(node.args) == 2:
+                type_arg = node.args[1]
+                if type_arg.type.is_builtin_type and type_arg.type.name == 'type':
+                    cython_scope = self.context.cython_scope
+                    function.entry = cython_scope.lookup('PyObject_TypeCheck')
+                    function.type = function.entry.type
+                    PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type)
+                    node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr)
         elif (node.is_temp and function.type.is_pyobject and self.current_directives.get(
                 "optimize.unpack_method_calls_in_pyinit"
                 if not self.in_loop and self.current_env().is_module_scope
                 else "optimize.unpack_method_calls")):
-            # optimise simple Python methods calls 
-            if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not ( 
+            # optimise simple Python methods calls
+            if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not (
                     node.arg_tuple.mult_factor or (node.arg_tuple.is_literal and len(node.arg_tuple.args) > 1)):
-                # simple call, now exclude calls to objects that are definitely not methods 
-                may_be_a_method = True 
-                if function.type is Builtin.type_type: 
-                    may_be_a_method = False 
+                # simple call, now exclude calls to objects that are definitely not methods
+                may_be_a_method = True
+                if function.type is Builtin.type_type:
+                    may_be_a_method = False
                 elif function.is_attribute:
                     if function.entry and function.entry.type.is_cfunction:
                         # optimised builtin method
                         may_be_a_method = False
-                elif function.is_name: 
+                elif function.is_name:
                     entry = function.entry
                     if entry.is_builtin or entry.type.is_cfunction:
-                        may_be_a_method = False 
+                        may_be_a_method = False
                     elif entry.cf_assignments:
-                        # local functions/classes are definitely not methods 
-                        non_method_nodes = (ExprNodes.PyCFunctionNode, ExprNodes.ClassNode, ExprNodes.Py3ClassNode) 
-                        may_be_a_method = any( 
-                            assignment.rhs and not isinstance(assignment.rhs, non_method_nodes) 
+                        # local functions/classes are definitely not methods
+                        non_method_nodes = (ExprNodes.PyCFunctionNode, ExprNodes.ClassNode, ExprNodes.Py3ClassNode)
+                        may_be_a_method = any(
+                            assignment.rhs and not isinstance(assignment.rhs, non_method_nodes)
                             for assignment in entry.cf_assignments)
-                if may_be_a_method: 
+                if may_be_a_method:
                     if (node.self and function.is_attribute and
                             isinstance(function.obj, ExprNodes.CloneNode) and function.obj.arg is node.self):
                         # function self object was moved into a CloneNode => undo
                         function.obj = function.obj.arg
-                    node = self.replace(node, ExprNodes.PyMethodCallNode.from_node( 
-                        node, function=function, arg_tuple=node.arg_tuple, type=node.type)) 
-        return node 
- 
+                    node = self.replace(node, ExprNodes.PyMethodCallNode.from_node(
+                        node, function=function, arg_tuple=node.arg_tuple, type=node.type))
+        return node
+
     def visit_NumPyMethodCallNode(self, node):
         # Exclude from replacement above.
         self.visitchildren(node)
         return node
 
-    def visit_PyTypeTestNode(self, node): 
-        """Remove tests for alternatively allowed None values from 
-        type tests when we know that the argument cannot be None 
-        anyway. 
-        """ 
-        self.visitchildren(node) 
-        if not node.notnone: 
-            if not node.arg.may_be_none(): 
-                node.notnone = True 
-        return node 
- 
-    def visit_NoneCheckNode(self, node): 
-        """Remove None checks from expressions that definitely do not 
-        carry a None value. 
-        """ 
-        self.visitchildren(node) 
-        if not node.arg.may_be_none(): 
-            return node.arg 
-        return node 
- 
+    def visit_PyTypeTestNode(self, node):
+        """Remove tests for alternatively allowed None values from
+        type tests when we know that the argument cannot be None
+        anyway.
+        """
+        self.visitchildren(node)
+        if not node.notnone:
+            if not node.arg.may_be_none():
+                node.notnone = True
+        return node
+
+    def visit_NoneCheckNode(self, node):
+        """Remove None checks from expressions that definitely do not
+        carry a None value.
+        """
+        self.visitchildren(node)
+        if not node.arg.may_be_none():
+            return node.arg
+        return node
+
     def visit_LoopNode(self, node):
         """Remember when we enter a loop as some expensive optimisations might still be worth it there.
         """
@@ -4822,36 +4822,36 @@ class FinalOptimizePhase(Visitor.EnvTransform, Visitor.NodeRefCleanupMixin):
         return node
 
 
-class ConsolidateOverflowCheck(Visitor.CythonTransform): 
-    """ 
-    This class facilitates the sharing of overflow checking among all nodes 
-    of a nested arithmetic expression.  For example, given the expression 
-    a*b + c, where a, b, and x are all possibly overflowing ints, the entire 
-    sequence will be evaluated and the overflow bit checked only at the end. 
-    """ 
-    overflow_bit_node = None 
- 
-    def visit_Node(self, node): 
-        if self.overflow_bit_node is not None: 
-            saved = self.overflow_bit_node 
-            self.overflow_bit_node = None 
-            self.visitchildren(node) 
-            self.overflow_bit_node = saved 
-        else: 
-            self.visitchildren(node) 
-        return node 
- 
-    def visit_NumBinopNode(self, node): 
-        if node.overflow_check and node.overflow_fold: 
-            top_level_overflow = self.overflow_bit_node is None 
-            if top_level_overflow: 
-                self.overflow_bit_node = node 
-            else: 
-                node.overflow_bit_node = self.overflow_bit_node 
-                node.overflow_check = False 
-            self.visitchildren(node) 
-            if top_level_overflow: 
-                self.overflow_bit_node = None 
-        else: 
-            self.visitchildren(node) 
-        return node 
+class ConsolidateOverflowCheck(Visitor.CythonTransform):
+    """
+    This class facilitates the sharing of overflow checking among all nodes
+    of a nested arithmetic expression.  For example, given the expression
+    a*b + c, where a, b, and x are all possibly overflowing ints, the entire
+    sequence will be evaluated and the overflow bit checked only at the end.
+    """
+    overflow_bit_node = None
+
+    def visit_Node(self, node):
+        if self.overflow_bit_node is not None:
+            saved = self.overflow_bit_node
+            self.overflow_bit_node = None
+            self.visitchildren(node)
+            self.overflow_bit_node = saved
+        else:
+            self.visitchildren(node)
+        return node
+
+    def visit_NumBinopNode(self, node):
+        if node.overflow_check and node.overflow_fold:
+            top_level_overflow = self.overflow_bit_node is None
+            if top_level_overflow:
+                self.overflow_bit_node = node
+            else:
+                node.overflow_bit_node = self.overflow_bit_node
+                node.overflow_check = False
+            self.visitchildren(node)
+            if top_level_overflow:
+                self.overflow_bit_node = None
+        else:
+            self.visitchildren(node)
+        return node
diff --git a/contrib/tools/cython/Cython/Compiler/Options.py b/contrib/tools/cython/Cython/Compiler/Options.py
index dbd0308bf9..b3ffbcd927 100644
--- a/contrib/tools/cython/Cython/Compiler/Options.py
+++ b/contrib/tools/cython/Cython/Compiler/Options.py
@@ -1,14 +1,14 @@
-# 
-#  Cython - Compilation-wide options and pragma declarations 
-# 
- 
-from __future__ import absolute_import 
- 
+#
+#  Cython - Compilation-wide options and pragma declarations
+#
+
+from __future__ import absolute_import
+
 
 class ShouldBeFromDirective(object):
- 
+
     known_directives = []
- 
+
     def __init__(self, options_name, directive_name=None, disallow=False):
         self.options_name = options_name
         self.directive_name = directive_name or options_name
@@ -43,8 +43,8 @@ the members in both this file and the .rst file.
 #: Whether or not to include docstring in the Python extension. If False, the binary size
 #: will be smaller, but the ``__doc__`` attribute of any class or function will be an
 #: empty string.
-docstrings = True 
- 
+docstrings = True
+
 #: Embed the source code position in the docstrings of functions and classes.
 embed_pos_in_docstring = False
 
@@ -64,38 +64,38 @@ pre_import = None
 #: variables or types may no longer be safe when enabling the respective level since
 #: there is no guaranteed order in which the (reference counted) objects will
 #: be cleaned up.  The order can change due to live references and reference cycles.
-generate_cleanup_code = False 
- 
+generate_cleanup_code = False
+
 #: Should tp_clear() set object fields to None instead of clearing them to NULL?
 clear_to_none = True
 
 #: Generate an annotated HTML version of the input source files for debugging and optimisation purposes.
 #: This has the same effect as the ``annotate`` argument in :func:`cythonize`.
-annotate = False 
- 
+annotate = False
+
 # When annotating source files in HTML, include coverage information from
 # this file.
 annotate_coverage_xml = None
 
 #: This will abort the compilation on the first error occurred rather than trying
 #: to keep going and printing further error messages.
-fast_fail = False 
- 
+fast_fail = False
+
 #: Turn all warnings into errors.
-warning_errors = False 
- 
+warning_errors = False
+
 #: Make unknown names an error.  Python raises a NameError when
 #: encountering unknown names at runtime, whereas this option makes
 #: them a compile time error.  If you want full Python compatibility,
 #: you should disable this option and also 'cache_builtins'.
-error_on_unknown_names = True 
- 
+error_on_unknown_names = True
+
 #: Make uninitialized local variable reference a compile time error.
 #: Python raises UnboundLocalError at runtime, whereas this option makes
 #: them a compile time error. Note that this option affects only variables
 #: of "python object" type.
-error_on_uninitialized = True 
- 
+error_on_uninitialized = True
+
 #: This will convert statements of the form ``for i in range(...)``
 #: to ``for i from ...`` when ``i`` is a C integer type, and the direction
 #: (i.e. sign of step) can be determined.
@@ -103,8 +103,8 @@ error_on_uninitialized = True
 #: i to overflow. Specifically, if this option is set, an error will be
 #: raised before the loop is entered, whereas without this option the loop
 #: will execute until an overflowing value is encountered.
-convert_range = True 
- 
+convert_range = True
+
 #: Perform lookups on builtin names only once, at module initialisation
 #: time.  This will prevent the module from getting imported if a
 #: builtin name that it uses cannot be found during initialisation.
@@ -122,8 +122,8 @@ gcc_branch_hints = True
 #: definition if the cpdef function foo, at the cost of an extra dictionary
 #: lookup on every call.
 #: If this is false it generates only the Python wrapper and no override check.
-lookup_module_cpdef = False 
- 
+lookup_module_cpdef = False
+
 #: Whether or not to embed the Python interpreter, for use in making a
 #: standalone executable or calling from external libraries.
 #: This will provide a C function which initialises the interpreter and
@@ -133,26 +133,26 @@ lookup_module_cpdef = False
 #: If true, the initialisation function is the C main() function, but
 #: this option can also be set to a non-empty string to provide a function name explicitly.
 #: Default is False.
-embed = None 
- 
-# In previous iterations of Cython, globals() gave the first non-Cython module 
-# globals in the call stack.  Sage relies on this behavior for variable injection. 
+embed = None
+
+# In previous iterations of Cython, globals() gave the first non-Cython module
+# globals in the call stack.  Sage relies on this behavior for variable injection.
 old_style_globals = ShouldBeFromDirective('old_style_globals')
- 
+
 #: Allows cimporting from a pyx file without a pxd file.
-cimport_from_pyx = False 
- 
+cimport_from_pyx = False
+
 #: Maximum number of dimensions for buffers -- set lower than number of
 #: dimensions in numpy, as
 #: slices are passed by value and involve a lot of copying.
-buffer_max_dims = 8 
- 
+buffer_max_dims = 8
+
 #: Number of function closure instances to keep in a freelist (0: no freelists)
-closure_freelist_size = 8 
- 
+closure_freelist_size = 8
+
 # Arcadia specific
 source_root = None
- 
+
 
 def get_directive_defaults():
     # To add an item to this list, all accesses should be changed to use the new
@@ -170,192 +170,192 @@ def get_directive_defaults():
                 # Warn?
                 _directive_defaults[old_option.directive_name] = value
     return _directive_defaults
- 
-# Declare compiler directives 
+
+# Declare compiler directives
 _directive_defaults = {
-    'boundscheck' : True, 
-    'nonecheck' : False, 
-    'initializedcheck' : True, 
-    'embedsignature' : False, 
-    'auto_cpdef': False, 
+    'boundscheck' : True,
+    'nonecheck' : False,
+    'initializedcheck' : True,
+    'embedsignature' : False,
+    'auto_cpdef': False,
     'auto_pickle': None,
     'cdivision': False,  # was True before 0.12
-    'cdivision_warnings': False, 
+    'cdivision_warnings': False,
     'c_api_binop_methods': True,
-    'overflowcheck': False, 
-    'overflowcheck.fold': True, 
-    'always_allow_keywords': False, 
-    'allow_none_for_extension_args': True, 
-    'wraparound' : True, 
+    'overflowcheck': False,
+    'overflowcheck.fold': True,
+    'always_allow_keywords': False,
+    'allow_none_for_extension_args': True,
+    'wraparound' : True,
     'ccomplex' : False,  # use C99/C++ for complex types and arith
-    'callspec' : "", 
+    'callspec' : "",
     'nogil' : False,
-    'profile': False, 
-    'linetrace': False, 
+    'profile': False,
+    'linetrace': False,
     'emit_code_comments': True,  # copy original source code into C code comments
     'annotation_typing': True,  # read type declarations from Python function annotations
-    'infer_types': None, 
-    'infer_types.verbose': False, 
-    'autotestdict': True, 
-    'autotestdict.cdef': False, 
-    'autotestdict.all': False, 
+    'infer_types': None,
+    'infer_types.verbose': False,
+    'autotestdict': True,
+    'autotestdict.cdef': False,
+    'autotestdict.all': False,
     'language_level': None,
     'fast_getattr': False,  # Undocumented until we come up with a better way to handle this everywhere.
     'py2_import': False,  # For backward compatibility of Cython's source code in Py3 source mode
     'preliminary_late_includes_cy28': False,  # Temporary directive in 0.28, to be removed in a later version (see GH#2079).
     'iterable_coroutine': False,  # Make async coroutines backwards compatible with the old asyncio yield-from syntax.
-    'c_string_type': 'bytes', 
-    'c_string_encoding': '', 
+    'c_string_type': 'bytes',
+    'c_string_encoding': '',
     'type_version_tag': True,  # enables Py_TPFLAGS_HAVE_VERSION_TAG on extension types
     'unraisable_tracebacks': True,
     'old_style_globals': False,
     'np_pythran': False,
     'fast_gil': False,
- 
-    # set __file__ and/or __path__ to known source/target path at import time (instead of not having them available) 
-    'set_initial_path' : None,  # SOURCEFILE or "/full/path/to/module" 
- 
-    'warn': None, 
-    'warn.undeclared': False, 
-    'warn.unreachable': True, 
-    'warn.maybe_uninitialized': False, 
-    'warn.unused': False, 
-    'warn.unused_arg': False, 
-    'warn.unused_result': False, 
-    'warn.multiple_declarators': True, 
- 
-# optimizations 
-    'optimize.inline_defnode_calls': True, 
+
+    # set __file__ and/or __path__ to known source/target path at import time (instead of not having them available)
+    'set_initial_path' : None,  # SOURCEFILE or "/full/path/to/module"
+
+    'warn': None,
+    'warn.undeclared': False,
+    'warn.unreachable': True,
+    'warn.maybe_uninitialized': False,
+    'warn.unused': False,
+    'warn.unused_arg': False,
+    'warn.unused_result': False,
+    'warn.multiple_declarators': True,
+
+# optimizations
+    'optimize.inline_defnode_calls': True,
     'optimize.unpack_method_calls': True,  # increases code size when True
     'optimize.unpack_method_calls_in_pyinit': False,  # uselessly increases code size when True
-    'optimize.use_switch': True, 
- 
-# remove unreachable code 
-    'remove_unreachable': True, 
- 
-# control flow debug directives 
+    'optimize.use_switch': True,
+
+# remove unreachable code
+    'remove_unreachable': True,
+
+# control flow debug directives
     'control_flow.dot_output': "",  # Graphviz output filename
     'control_flow.dot_annotate_defs': False,  # Annotate definitions
- 
-# test support 
-    'test_assert_path_exists' : [], 
-    'test_fail_if_path_exists' : [], 
- 
-# experimental, subject to change 
-    'binding': None, 
+
+# test support
+    'test_assert_path_exists' : [],
+    'test_fail_if_path_exists' : [],
+
+# experimental, subject to change
+    'binding': None,
 
     'formal_grammar': False,
-} 
- 
-# Extra warning directives 
-extra_warnings = { 
-    'warn.maybe_uninitialized': True, 
-    'warn.unreachable': True, 
-    'warn.unused': True, 
-} 
- 
-def one_of(*args): 
-    def validate(name, value): 
-        if value not in args: 
-            raise ValueError("%s directive must be one of %s, got '%s'" % ( 
-                name, args, value)) 
-        else: 
-            return value 
-    return validate 
- 
- 
-def normalise_encoding_name(option_name, encoding): 
-    """ 
-    >>> normalise_encoding_name('c_string_encoding', 'ascii') 
-    'ascii' 
-    >>> normalise_encoding_name('c_string_encoding', 'AsCIi') 
-    'ascii' 
-    >>> normalise_encoding_name('c_string_encoding', 'us-ascii') 
-    'ascii' 
-    >>> normalise_encoding_name('c_string_encoding', 'utF8') 
-    'utf8' 
-    >>> normalise_encoding_name('c_string_encoding', 'utF-8') 
-    'utf8' 
-    >>> normalise_encoding_name('c_string_encoding', 'deFAuLT') 
-    'default' 
-    >>> normalise_encoding_name('c_string_encoding', 'default') 
-    'default' 
-    >>> normalise_encoding_name('c_string_encoding', 'SeriousLyNoSuch--Encoding') 
-    'SeriousLyNoSuch--Encoding' 
-    """ 
-    if not encoding: 
-        return '' 
-    if encoding.lower() in ('default', 'ascii', 'utf8'): 
-        return encoding.lower() 
-    import codecs 
-    try: 
-        decoder = codecs.getdecoder(encoding) 
-    except LookupError: 
-        return encoding  # may exists at runtime ... 
-    for name in ('ascii', 'utf8'): 
-        if codecs.getdecoder(name) == decoder: 
-            return name 
-    return encoding 
- 
- 
-# Override types possibilities above, if needed 
-directive_types = { 
+}
+
+# Extra warning directives
+extra_warnings = {
+    'warn.maybe_uninitialized': True,
+    'warn.unreachable': True,
+    'warn.unused': True,
+}
+
+def one_of(*args):
+    def validate(name, value):
+        if value not in args:
+            raise ValueError("%s directive must be one of %s, got '%s'" % (
+                name, args, value))
+        else:
+            return value
+    return validate
+
+
+def normalise_encoding_name(option_name, encoding):
+    """
+    >>> normalise_encoding_name('c_string_encoding', 'ascii')
+    'ascii'
+    >>> normalise_encoding_name('c_string_encoding', 'AsCIi')
+    'ascii'
+    >>> normalise_encoding_name('c_string_encoding', 'us-ascii')
+    'ascii'
+    >>> normalise_encoding_name('c_string_encoding', 'utF8')
+    'utf8'
+    >>> normalise_encoding_name('c_string_encoding', 'utF-8')
+    'utf8'
+    >>> normalise_encoding_name('c_string_encoding', 'deFAuLT')
+    'default'
+    >>> normalise_encoding_name('c_string_encoding', 'default')
+    'default'
+    >>> normalise_encoding_name('c_string_encoding', 'SeriousLyNoSuch--Encoding')
+    'SeriousLyNoSuch--Encoding'
+    """
+    if not encoding:
+        return ''
+    if encoding.lower() in ('default', 'ascii', 'utf8'):
+        return encoding.lower()
+    import codecs
+    try:
+        decoder = codecs.getdecoder(encoding)
+    except LookupError:
+        return encoding  # may exists at runtime ...
+    for name in ('ascii', 'utf8'):
+        if codecs.getdecoder(name) == decoder:
+            return name
+    return encoding
+
+
+# Override types possibilities above, if needed
+directive_types = {
     'language_level': str,  # values can be None/2/3/'3str', where None == 2+warning
     'auto_pickle': bool,
     'locals': dict,
-    'final' : bool,  # final cdef classes and methods 
+    'final' : bool,  # final cdef classes and methods
     'nogil' : bool,
-    'internal' : bool,  # cdef class visibility in the module dict 
+    'internal' : bool,  # cdef class visibility in the module dict
     'infer_types' : bool,  # values can be True/None/False
-    'binding' : bool, 
+    'binding' : bool,
     'cfunc' : None,  # decorators do not take directive value
-    'ccall' : None, 
-    'inline' : None, 
-    'staticmethod' : None, 
-    'cclass' : None, 
+    'ccall' : None,
+    'inline' : None,
+    'staticmethod' : None,
+    'cclass' : None,
     'no_gc_clear' : bool,
     'no_gc' : bool,
-    'returns' : type, 
+    'returns' : type,
     'exceptval': type,  # actually (type, check=True/False), but has its own parser
-    'set_initial_path': str, 
-    'freelist': int, 
-    'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'), 
-    'c_string_encoding': normalise_encoding_name, 
-} 
- 
+    'set_initial_path': str,
+    'freelist': int,
+    'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
+    'c_string_encoding': normalise_encoding_name,
+}
+
 for key, val in _directive_defaults.items():
-    if key not in directive_types: 
-        directive_types[key] = type(val) 
- 
+    if key not in directive_types:
+        directive_types[key] = type(val)
+
 directive_scopes = {  # defaults to available everywhere
-    # 'module', 'function', 'class', 'with statement' 
+    # 'module', 'function', 'class', 'with statement'
     'auto_pickle': ('module', 'cclass'),
-    'final' : ('cclass', 'function'), 
+    'final' : ('cclass', 'function'),
     'nogil' : ('function', 'with statement'),
-    'inline' : ('function',), 
+    'inline' : ('function',),
     'cfunc' : ('function', 'with statement'),
     'ccall' : ('function', 'with statement'),
     'returns' : ('function',),
     'exceptval' : ('function',),
     'locals' : ('function',),
-    'staticmethod' : ('function',),  # FIXME: analysis currently lacks more specific function scope 
-    'no_gc_clear' : ('cclass',), 
+    'staticmethod' : ('function',),  # FIXME: analysis currently lacks more specific function scope
+    'no_gc_clear' : ('cclass',),
     'no_gc' : ('cclass',),
-    'internal' : ('cclass',), 
+    'internal' : ('cclass',),
     'cclass' : ('class', 'cclass', 'with statement'),
-    'autotestdict' : ('module',), 
-    'autotestdict.all' : ('module',), 
-    'autotestdict.cdef' : ('module',), 
-    'set_initial_path' : ('module',), 
-    'test_assert_path_exists' : ('function', 'class', 'cclass'), 
-    'test_fail_if_path_exists' : ('function', 'class', 'cclass'), 
-    'freelist': ('cclass',), 
+    'autotestdict' : ('module',),
+    'autotestdict.all' : ('module',),
+    'autotestdict.cdef' : ('module',),
+    'set_initial_path' : ('module',),
+    'test_assert_path_exists' : ('function', 'class', 'cclass'),
+    'test_fail_if_path_exists' : ('function', 'class', 'cclass'),
+    'freelist': ('cclass',),
     'emit_code_comments': ('module',),
-    'annotation_typing': ('module',),  # FIXME: analysis currently lacks more specific function scope 
-    # Avoid scope-specific to/from_py_functions for c_string. 
-    'c_string_type': ('module',), 
-    'c_string_encoding': ('module',), 
-    'type_version_tag': ('module', 'cclass'), 
+    'annotation_typing': ('module',),  # FIXME: analysis currently lacks more specific function scope
+    # Avoid scope-specific to/from_py_functions for c_string.
+    'c_string_type': ('module',),
+    'c_string_encoding': ('module',),
+    'type_version_tag': ('module', 'cclass'),
     'language_level': ('module',),
     # globals() could conceivably be controlled at a finer granularity,
     # but that would complicate the implementation
@@ -363,124 +363,124 @@ directive_scopes = {  # defaults to available everywhere
     'np_pythran': ('module',),
     'fast_gil': ('module',),
     'iterable_coroutine': ('module', 'function'),
-} 
- 
-
-def parse_directive_value(name, value, relaxed_bool=False): 
-    """ 
-    Parses value as an option value for the given name and returns 
-    the interpreted value. None is returned if the option does not exist. 
- 
+}
+
+
+def parse_directive_value(name, value, relaxed_bool=False):
+    """
+    Parses value as an option value for the given name and returns
+    the interpreted value. None is returned if the option does not exist.
+
     >>> print(parse_directive_value('nonexisting', 'asdf asdfd'))
-    None 
-    >>> parse_directive_value('boundscheck', 'True') 
-    True 
-    >>> parse_directive_value('boundscheck', 'true') 
-    Traceback (most recent call last): 
-       ... 
-    ValueError: boundscheck directive must be set to True or False, got 'true' 
- 
-    >>> parse_directive_value('c_string_encoding', 'us-ascii') 
-    'ascii' 
-    >>> parse_directive_value('c_string_type', 'str') 
-    'str' 
-    >>> parse_directive_value('c_string_type', 'bytes') 
-    'bytes' 
-    >>> parse_directive_value('c_string_type', 'bytearray') 
-    'bytearray' 
-    >>> parse_directive_value('c_string_type', 'unicode') 
-    'unicode' 
-    >>> parse_directive_value('c_string_type', 'unnicode') 
-    Traceback (most recent call last): 
-    ValueError: c_string_type directive must be one of ('bytes', 'bytearray', 'str', 'unicode'), got 'unnicode' 
-    """ 
-    type = directive_types.get(name) 
+    None
+    >>> parse_directive_value('boundscheck', 'True')
+    True
+    >>> parse_directive_value('boundscheck', 'true')
+    Traceback (most recent call last):
+       ...
+    ValueError: boundscheck directive must be set to True or False, got 'true'
+
+    >>> parse_directive_value('c_string_encoding', 'us-ascii')
+    'ascii'
+    >>> parse_directive_value('c_string_type', 'str')
+    'str'
+    >>> parse_directive_value('c_string_type', 'bytes')
+    'bytes'
+    >>> parse_directive_value('c_string_type', 'bytearray')
+    'bytearray'
+    >>> parse_directive_value('c_string_type', 'unicode')
+    'unicode'
+    >>> parse_directive_value('c_string_type', 'unnicode')
+    Traceback (most recent call last):
+    ValueError: c_string_type directive must be one of ('bytes', 'bytearray', 'str', 'unicode'), got 'unnicode'
+    """
+    type = directive_types.get(name)
     if not type:
         return None
-    orig_value = value 
-    if type is bool: 
-        value = str(value) 
+    orig_value = value
+    if type is bool:
+        value = str(value)
         if value == 'True':
             return True
         if value == 'False':
             return False
-        if relaxed_bool: 
-            value = value.lower() 
+        if relaxed_bool:
+            value = value.lower()
             if value in ("true", "yes"):
                 return True
             elif value in ("false", "no"):
                 return False
-        raise ValueError("%s directive must be set to True or False, got '%s'" % ( 
-            name, orig_value)) 
-    elif type is int: 
-        try: 
-            return int(value) 
-        except ValueError: 
-            raise ValueError("%s directive must be set to an integer, got '%s'" % ( 
-                name, orig_value)) 
-    elif type is str: 
-        return str(value) 
-    elif callable(type): 
-        return type(name, value) 
-    else: 
-        assert False 
- 
-
-def parse_directive_list(s, relaxed_bool=False, ignore_unknown=False, 
-                         current_settings=None): 
-    """ 
-    Parses a comma-separated list of pragma options. Whitespace 
-    is not considered. 
- 
-    >>> parse_directive_list('      ') 
-    {} 
-    >>> (parse_directive_list('boundscheck=True') == 
-    ... {'boundscheck': True}) 
-    True 
-    >>> parse_directive_list('  asdf') 
-    Traceback (most recent call last): 
-       ... 
-    ValueError: Expected "=" in option "asdf" 
-    >>> parse_directive_list('boundscheck=hey') 
-    Traceback (most recent call last): 
-       ... 
-    ValueError: boundscheck directive must be set to True or False, got 'hey' 
-    >>> parse_directive_list('unknown=True') 
-    Traceback (most recent call last): 
-       ... 
-    ValueError: Unknown option: "unknown" 
-    >>> warnings = parse_directive_list('warn.all=True') 
-    >>> len(warnings) > 1 
-    True 
-    >>> sum(warnings.values()) == len(warnings)  # all true. 
-    True 
-    """ 
-    if current_settings is None: 
-        result = {} 
-    else: 
-        result = current_settings 
-    for item in s.split(','): 
-        item = item.strip() 
+        raise ValueError("%s directive must be set to True or False, got '%s'" % (
+            name, orig_value))
+    elif type is int:
+        try:
+            return int(value)
+        except ValueError:
+            raise ValueError("%s directive must be set to an integer, got '%s'" % (
+                name, orig_value))
+    elif type is str:
+        return str(value)
+    elif callable(type):
+        return type(name, value)
+    else:
+        assert False
+
+
+def parse_directive_list(s, relaxed_bool=False, ignore_unknown=False,
+                         current_settings=None):
+    """
+    Parses a comma-separated list of pragma options. Whitespace
+    is not considered.
+
+    >>> parse_directive_list('      ')
+    {}
+    >>> (parse_directive_list('boundscheck=True') ==
+    ... {'boundscheck': True})
+    True
+    >>> parse_directive_list('  asdf')
+    Traceback (most recent call last):
+       ...
+    ValueError: Expected "=" in option "asdf"
+    >>> parse_directive_list('boundscheck=hey')
+    Traceback (most recent call last):
+       ...
+    ValueError: boundscheck directive must be set to True or False, got 'hey'
+    >>> parse_directive_list('unknown=True')
+    Traceback (most recent call last):
+       ...
+    ValueError: Unknown option: "unknown"
+    >>> warnings = parse_directive_list('warn.all=True')
+    >>> len(warnings) > 1
+    True
+    >>> sum(warnings.values()) == len(warnings)  # all true.
+    True
+    """
+    if current_settings is None:
+        result = {}
+    else:
+        result = current_settings
+    for item in s.split(','):
+        item = item.strip()
         if not item:
             continue
         if '=' not in item:
             raise ValueError('Expected "=" in option "%s"' % item)
         name, value = [s.strip() for s in item.strip().split('=', 1)]
         if name not in _directive_defaults:
-            found = False 
-            if name.endswith('.all'): 
-                prefix = name[:-3] 
+            found = False
+            if name.endswith('.all'):
+                prefix = name[:-3]
                 for directive in _directive_defaults:
-                    if directive.startswith(prefix): 
-                        found = True 
-                        parsed_value = parse_directive_value(directive, value, relaxed_bool=relaxed_bool) 
-                        result[directive] = parsed_value 
-            if not found and not ignore_unknown: 
-                raise ValueError('Unknown option: "%s"' % name) 
-        else: 
-            parsed_value = parse_directive_value(name, value, relaxed_bool=relaxed_bool) 
-            result[name] = parsed_value 
-    return result 
+                    if directive.startswith(prefix):
+                        found = True
+                        parsed_value = parse_directive_value(directive, value, relaxed_bool=relaxed_bool)
+                        result[directive] = parsed_value
+            if not found and not ignore_unknown:
+                raise ValueError('Unknown option: "%s"' % name)
+        else:
+            parsed_value = parse_directive_value(name, value, relaxed_bool=relaxed_bool)
+            result[name] = parsed_value
+    return result
 
 
 def parse_variable_value(value):
diff --git a/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.pxd b/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.pxd
index ae222b5f7b..2c17901fa4 100644
--- a/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.pxd
+++ b/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.pxd
@@ -1,82 +1,82 @@
- 
-from __future__ import absolute_import 
- 
-cimport cython 
- 
-from .Visitor cimport ( 
-    CythonTransform, VisitorTransform, TreeVisitor, 
-    ScopeTrackingTransform, EnvTransform) 
- 
-cdef class SkipDeclarations: # (object): 
-    pass 
- 
-cdef class NormalizeTree(CythonTransform): 
-    cdef bint is_in_statlist 
-    cdef bint is_in_expr 
-    cpdef visit_StatNode(self, node, is_listcontainer=*) 
- 
-cdef class PostParse(ScopeTrackingTransform): 
-    cdef dict specialattribute_handlers 
-    cdef size_t lambda_counter 
-    cdef size_t genexpr_counter 
-    cdef _visit_assignment_node(self, node, list expr_list) 
- 
- 
-#def eliminate_rhs_duplicates(list expr_list_list, list ref_node_sequence) 
-#def sort_common_subsequences(list items) 
-@cython.locals(starred_targets=Py_ssize_t, lhs_size=Py_ssize_t, rhs_size=Py_ssize_t) 
-cdef flatten_parallel_assignments(list input, list output) 
-cdef map_starred_assignment(list lhs_targets, list starred_assignments, list lhs_args, list rhs_args) 
- 
-#class PxdPostParse(CythonTransform, SkipDeclarations): 
-#class InterpretCompilerDirectives(CythonTransform, SkipDeclarations): 
-#class WithTransform(CythonTransform, SkipDeclarations): 
-#class DecoratorTransform(CythonTransform, SkipDeclarations): 
- 
-#class AnalyseDeclarationsTransform(EnvTransform): 
- 
-cdef class AnalyseExpressionsTransform(CythonTransform): 
-    pass 
- 
-cdef class ExpandInplaceOperators(EnvTransform): 
-    pass 
- 
-cdef class AlignFunctionDefinitions(CythonTransform): 
-    cdef dict directives 
+
+from __future__ import absolute_import
+
+cimport cython
+
+from .Visitor cimport (
+    CythonTransform, VisitorTransform, TreeVisitor,
+    ScopeTrackingTransform, EnvTransform)
+
+cdef class SkipDeclarations: # (object):
+    pass
+
+cdef class NormalizeTree(CythonTransform):
+    cdef bint is_in_statlist
+    cdef bint is_in_expr
+    cpdef visit_StatNode(self, node, is_listcontainer=*)
+
+cdef class PostParse(ScopeTrackingTransform):
+    cdef dict specialattribute_handlers
+    cdef size_t lambda_counter
+    cdef size_t genexpr_counter
+    cdef _visit_assignment_node(self, node, list expr_list)
+
+
+#def eliminate_rhs_duplicates(list expr_list_list, list ref_node_sequence)
+#def sort_common_subsequences(list items)
+@cython.locals(starred_targets=Py_ssize_t, lhs_size=Py_ssize_t, rhs_size=Py_ssize_t)
+cdef flatten_parallel_assignments(list input, list output)
+cdef map_starred_assignment(list lhs_targets, list starred_assignments, list lhs_args, list rhs_args)
+
+#class PxdPostParse(CythonTransform, SkipDeclarations):
+#class InterpretCompilerDirectives(CythonTransform, SkipDeclarations):
+#class WithTransform(CythonTransform, SkipDeclarations):
+#class DecoratorTransform(CythonTransform, SkipDeclarations):
+
+#class AnalyseDeclarationsTransform(EnvTransform):
+
+cdef class AnalyseExpressionsTransform(CythonTransform):
+    pass
+
+cdef class ExpandInplaceOperators(EnvTransform):
+    pass
+
+cdef class AlignFunctionDefinitions(CythonTransform):
+    cdef dict directives
     cdef set imported_names
     cdef object scope
- 
+
 @cython.final
-cdef class YieldNodeCollector(TreeVisitor): 
-    cdef public list yields 
-    cdef public list returns 
+cdef class YieldNodeCollector(TreeVisitor):
+    cdef public list yields
+    cdef public list returns
     cdef public list finallys
     cdef public list excepts
-    cdef public bint has_return_value 
+    cdef public bint has_return_value
     cdef public bint has_yield
     cdef public bint has_await
- 
+
 @cython.final
-cdef class MarkClosureVisitor(CythonTransform): 
-    cdef bint needs_closure 
- 
+cdef class MarkClosureVisitor(CythonTransform):
+    cdef bint needs_closure
+
 @cython.final
-cdef class CreateClosureClasses(CythonTransform): 
-    cdef list path 
-    cdef bint in_lambda 
-    cdef module_scope 
-    cdef generator_class 
- 
+cdef class CreateClosureClasses(CythonTransform):
+    cdef list path
+    cdef bint in_lambda
+    cdef module_scope
+    cdef generator_class
+
     cdef create_class_from_scope(self, node, target_module_scope, inner_node=*)
     cdef find_entries_used_in_closures(self, node)
 
 #cdef class InjectGilHandling(VisitorTransform, SkipDeclarations):
 #    cdef bint nogil
 
-cdef class GilCheck(VisitorTransform): 
-    cdef list env_stack 
-    cdef bint nogil 
-    cdef bint nogil_declarator_only 
- 
-cdef class TransformBuiltinMethods(EnvTransform): 
-    cdef visit_cython_attribute(self, node) 
+cdef class GilCheck(VisitorTransform):
+    cdef list env_stack
+    cdef bint nogil
+    cdef bint nogil_declarator_only
+
+cdef class TransformBuiltinMethods(EnvTransform):
+    cdef visit_cython_attribute(self, node)
diff --git a/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.py b/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.py
index e6a6cfbae4..0da3670cae 100644
--- a/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.py
+++ b/contrib/tools/cython/Cython/Compiler/ParseTreeTransforms.py
@@ -1,210 +1,210 @@
-from __future__ import absolute_import 
- 
-import cython 
-cython.declare(PyrexTypes=object, Naming=object, ExprNodes=object, Nodes=object, 
-               Options=object, UtilNodes=object, LetNode=object, 
-               LetRefNode=object, TreeFragment=object, EncodedString=object, 
+from __future__ import absolute_import
+
+import cython
+cython.declare(PyrexTypes=object, Naming=object, ExprNodes=object, Nodes=object,
+               Options=object, UtilNodes=object, LetNode=object,
+               LetRefNode=object, TreeFragment=object, EncodedString=object,
                error=object, warning=object, copy=object, _unicode=object)
- 
+
 import copy
 import hashlib
 
-from . import PyrexTypes 
-from . import Naming 
-from . import ExprNodes 
-from . import Nodes 
-from . import Options 
-from . import Builtin 
+from . import PyrexTypes
+from . import Naming
+from . import ExprNodes
+from . import Nodes
+from . import Options
+from . import Builtin
 from . import Errors
- 
-from .Visitor import VisitorTransform, TreeVisitor 
-from .Visitor import CythonTransform, EnvTransform, ScopeTrackingTransform 
+
+from .Visitor import VisitorTransform, TreeVisitor
+from .Visitor import CythonTransform, EnvTransform, ScopeTrackingTransform
 from .UtilNodes import LetNode, LetRefNode
-from .TreeFragment import TreeFragment 
+from .TreeFragment import TreeFragment
 from .StringEncoding import EncodedString, _unicode
-from .Errors import error, warning, CompileError, InternalError 
-from .Code import UtilityCode 
- 
- 
-class SkipDeclarations(object): 
-    """ 
-    Variable and function declarations can often have a deep tree structure, 
-    and yet most transformations don't need to descend to this depth. 
- 
-    Declaration nodes are removed after AnalyseDeclarationsTransform, so there 
-    is no need to use this for transformations after that point. 
-    """ 
-    def visit_CTypeDefNode(self, node): 
-        return node 
- 
-    def visit_CVarDefNode(self, node): 
-        return node 
- 
-    def visit_CDeclaratorNode(self, node): 
-        return node 
- 
-    def visit_CBaseTypeNode(self, node): 
-        return node 
- 
-    def visit_CEnumDefNode(self, node): 
-        return node 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        return node 
- 
-
-class NormalizeTree(CythonTransform): 
-    """ 
-    This transform fixes up a few things after parsing 
-    in order to make the parse tree more suitable for 
-    transforms. 
- 
-    a) After parsing, blocks with only one statement will 
-    be represented by that statement, not by a StatListNode. 
-    When doing transforms this is annoying and inconsistent, 
-    as one cannot in general remove a statement in a consistent 
-    way and so on. This transform wraps any single statements 
-    in a StatListNode containing a single statement. 
- 
-    b) The PassStatNode is a noop and serves no purpose beyond 
-    plugging such one-statement blocks; i.e., once parsed a 
-`    "pass" can just as well be represented using an empty 
-    StatListNode. This means less special cases to worry about 
-    in subsequent transforms (one always checks to see if a 
-    StatListNode has no children to see if the block is empty). 
-    """ 
- 
-    def __init__(self, context): 
-        super(NormalizeTree, self).__init__(context) 
-        self.is_in_statlist = False 
-        self.is_in_expr = False 
- 
-    def visit_ExprNode(self, node): 
-        stacktmp = self.is_in_expr 
-        self.is_in_expr = True 
-        self.visitchildren(node) 
-        self.is_in_expr = stacktmp 
-        return node 
- 
-    def visit_StatNode(self, node, is_listcontainer=False): 
-        stacktmp = self.is_in_statlist 
-        self.is_in_statlist = is_listcontainer 
-        self.visitchildren(node) 
-        self.is_in_statlist = stacktmp 
-        if not self.is_in_statlist and not self.is_in_expr: 
-            return Nodes.StatListNode(pos=node.pos, stats=[node]) 
-        else: 
-            return node 
- 
-    def visit_StatListNode(self, node): 
-        self.is_in_statlist = True 
-        self.visitchildren(node) 
-        self.is_in_statlist = False 
-        return node 
- 
-    def visit_ParallelAssignmentNode(self, node): 
-        return self.visit_StatNode(node, True) 
- 
-    def visit_CEnumDefNode(self, node): 
-        return self.visit_StatNode(node, True) 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        return self.visit_StatNode(node, True) 
- 
-    def visit_PassStatNode(self, node): 
-        """Eliminate PassStatNode""" 
-        if not self.is_in_statlist: 
-            return Nodes.StatListNode(pos=node.pos, stats=[]) 
-        else: 
-            return [] 
- 
-    def visit_ExprStatNode(self, node): 
-        """Eliminate useless string literals""" 
-        if node.expr.is_string_literal: 
-            return self.visit_PassStatNode(node) 
-        else: 
-            return self.visit_StatNode(node) 
- 
-    def visit_CDeclaratorNode(self, node): 
-        return node 
- 
- 
-class PostParseError(CompileError): pass 
- 
-# error strings checked by unit tests, so define them 
-ERR_CDEF_INCLASS = 'Cannot assign default value to fields in cdef classes, structs or unions' 
-ERR_BUF_DEFAULTS = 'Invalid buffer defaults specification (see docs)' 
-ERR_INVALID_SPECIALATTR_TYPE = 'Special attributes must not have a type declared' 
-class PostParse(ScopeTrackingTransform): 
-    """ 
-    Basic interpretation of the parse tree, as well as validity 
-    checking that can be done on a very basic level on the parse 
-    tree (while still not being a problem with the basic syntax, 
-    as such). 
- 
-    Specifically: 
-    - Default values to cdef assignments are turned into single 
-    assignments following the declaration (everywhere but in class 
-    bodies, where they raise a compile error) 
- 
-    - Interpret some node structures into Python runtime values. 
-    Some nodes take compile-time arguments (currently: 
-    TemplatedTypeNode[args] and __cythonbufferdefaults__ = {args}), 
-    which should be interpreted. This happens in a general way 
-    and other steps should be taken to ensure validity. 
- 
-    Type arguments cannot be interpreted in this way. 
- 
-    - For __cythonbufferdefaults__ the arguments are checked for 
-    validity. 
- 
-    TemplatedTypeNode has its directives interpreted: 
-    Any first positional argument goes into the "dtype" attribute, 
-    any "ndim" keyword argument goes into the "ndim" attribute and 
-    so on. Also it is checked that the directive combination is valid. 
-    - __cythonbufferdefaults__ attributes are parsed and put into the 
-    type information. 
- 
-    Note: Currently Parsing.py does a lot of interpretation and 
-    reorganization that can be refactored into this transform 
-    if a more pure Abstract Syntax Tree is wanted. 
-    """ 
- 
-    def __init__(self, context): 
-        super(PostParse, self).__init__(context) 
-        self.specialattribute_handlers = { 
-            '__cythonbufferdefaults__' : self.handle_bufferdefaults 
-        } 
- 
-    def visit_LambdaNode(self, node): 
-        # unpack a lambda expression into the corresponding DefNode 
-        collector = YieldNodeCollector() 
-        collector.visitchildren(node.result_expr) 
+from .Errors import error, warning, CompileError, InternalError
+from .Code import UtilityCode
+
+
+class SkipDeclarations(object):
+    """
+    Variable and function declarations can often have a deep tree structure,
+    and yet most transformations don't need to descend to this depth.
+
+    Declaration nodes are removed after AnalyseDeclarationsTransform, so there
+    is no need to use this for transformations after that point.
+    """
+    def visit_CTypeDefNode(self, node):
+        return node
+
+    def visit_CVarDefNode(self, node):
+        return node
+
+    def visit_CDeclaratorNode(self, node):
+        return node
+
+    def visit_CBaseTypeNode(self, node):
+        return node
+
+    def visit_CEnumDefNode(self, node):
+        return node
+
+    def visit_CStructOrUnionDefNode(self, node):
+        return node
+
+
+class NormalizeTree(CythonTransform):
+    """
+    This transform fixes up a few things after parsing
+    in order to make the parse tree more suitable for
+    transforms.
+
+    a) After parsing, blocks with only one statement will
+    be represented by that statement, not by a StatListNode.
+    When doing transforms this is annoying and inconsistent,
+    as one cannot in general remove a statement in a consistent
+    way and so on. This transform wraps any single statements
+    in a StatListNode containing a single statement.
+
+    b) The PassStatNode is a noop and serves no purpose beyond
+    plugging such one-statement blocks; i.e., once parsed a
+`    "pass" can just as well be represented using an empty
+    StatListNode. This means less special cases to worry about
+    in subsequent transforms (one always checks to see if a
+    StatListNode has no children to see if the block is empty).
+    """
+
+    def __init__(self, context):
+        super(NormalizeTree, self).__init__(context)
+        self.is_in_statlist = False
+        self.is_in_expr = False
+
+    def visit_ExprNode(self, node):
+        stacktmp = self.is_in_expr
+        self.is_in_expr = True
+        self.visitchildren(node)
+        self.is_in_expr = stacktmp
+        return node
+
+    def visit_StatNode(self, node, is_listcontainer=False):
+        stacktmp = self.is_in_statlist
+        self.is_in_statlist = is_listcontainer
+        self.visitchildren(node)
+        self.is_in_statlist = stacktmp
+        if not self.is_in_statlist and not self.is_in_expr:
+            return Nodes.StatListNode(pos=node.pos, stats=[node])
+        else:
+            return node
+
+    def visit_StatListNode(self, node):
+        self.is_in_statlist = True
+        self.visitchildren(node)
+        self.is_in_statlist = False
+        return node
+
+    def visit_ParallelAssignmentNode(self, node):
+        return self.visit_StatNode(node, True)
+
+    def visit_CEnumDefNode(self, node):
+        return self.visit_StatNode(node, True)
+
+    def visit_CStructOrUnionDefNode(self, node):
+        return self.visit_StatNode(node, True)
+
+    def visit_PassStatNode(self, node):
+        """Eliminate PassStatNode"""
+        if not self.is_in_statlist:
+            return Nodes.StatListNode(pos=node.pos, stats=[])
+        else:
+            return []
+
+    def visit_ExprStatNode(self, node):
+        """Eliminate useless string literals"""
+        if node.expr.is_string_literal:
+            return self.visit_PassStatNode(node)
+        else:
+            return self.visit_StatNode(node)
+
+    def visit_CDeclaratorNode(self, node):
+        return node
+
+
+class PostParseError(CompileError): pass
+
+# error strings checked by unit tests, so define them
+ERR_CDEF_INCLASS = 'Cannot assign default value to fields in cdef classes, structs or unions'
+ERR_BUF_DEFAULTS = 'Invalid buffer defaults specification (see docs)'
+ERR_INVALID_SPECIALATTR_TYPE = 'Special attributes must not have a type declared'
+class PostParse(ScopeTrackingTransform):
+    """
+    Basic interpretation of the parse tree, as well as validity
+    checking that can be done on a very basic level on the parse
+    tree (while still not being a problem with the basic syntax,
+    as such).
+
+    Specifically:
+    - Default values to cdef assignments are turned into single
+    assignments following the declaration (everywhere but in class
+    bodies, where they raise a compile error)
+
+    - Interpret some node structures into Python runtime values.
+    Some nodes take compile-time arguments (currently:
+    TemplatedTypeNode[args] and __cythonbufferdefaults__ = {args}),
+    which should be interpreted. This happens in a general way
+    and other steps should be taken to ensure validity.
+
+    Type arguments cannot be interpreted in this way.
+
+    - For __cythonbufferdefaults__ the arguments are checked for
+    validity.
+
+    TemplatedTypeNode has its directives interpreted:
+    Any first positional argument goes into the "dtype" attribute,
+    any "ndim" keyword argument goes into the "ndim" attribute and
+    so on. Also it is checked that the directive combination is valid.
+    - __cythonbufferdefaults__ attributes are parsed and put into the
+    type information.
+
+    Note: Currently Parsing.py does a lot of interpretation and
+    reorganization that can be refactored into this transform
+    if a more pure Abstract Syntax Tree is wanted.
+    """
+
+    def __init__(self, context):
+        super(PostParse, self).__init__(context)
+        self.specialattribute_handlers = {
+            '__cythonbufferdefaults__' : self.handle_bufferdefaults
+        }
+
+    def visit_LambdaNode(self, node):
+        # unpack a lambda expression into the corresponding DefNode
+        collector = YieldNodeCollector()
+        collector.visitchildren(node.result_expr)
         if collector.has_yield or collector.has_await or isinstance(node.result_expr, ExprNodes.YieldExprNode):
-            body = Nodes.ExprStatNode( 
-                node.result_expr.pos, expr=node.result_expr) 
-        else: 
-            body = Nodes.ReturnStatNode( 
-                node.result_expr.pos, value=node.result_expr) 
-        node.def_node = Nodes.DefNode( 
+            body = Nodes.ExprStatNode(
+                node.result_expr.pos, expr=node.result_expr)
+        else:
+            body = Nodes.ReturnStatNode(
+                node.result_expr.pos, value=node.result_expr)
+        node.def_node = Nodes.DefNode(
             node.pos, name=node.name,
-            args=node.args, star_arg=node.star_arg, 
-            starstar_arg=node.starstar_arg, 
-            body=body, doc=None) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_GeneratorExpressionNode(self, node): 
-        # unpack a generator expression into the corresponding DefNode 
+            args=node.args, star_arg=node.star_arg,
+            starstar_arg=node.starstar_arg,
+            body=body, doc=None)
+        self.visitchildren(node)
+        return node
+
+    def visit_GeneratorExpressionNode(self, node):
+        # unpack a generator expression into the corresponding DefNode
         collector = YieldNodeCollector()
         collector.visitchildren(node.loop)
         node.def_node = Nodes.DefNode(
             node.pos, name=node.name, doc=None,
             args=[], star_arg=None, starstar_arg=None,
             body=node.loop, is_async_def=collector.has_await)
-        self.visitchildren(node) 
-        return node 
- 
+        self.visitchildren(node)
+        return node
+
     def visit_ComprehensionNode(self, node):
         # enforce local scope also in Py2 for async generators (seriously, that's a Py3.6 feature...)
         if not node.has_local_scope:
@@ -215,390 +215,390 @@ class PostParse(ScopeTrackingTransform):
         self.visitchildren(node)
         return node
 
-    # cdef variables 
-    def handle_bufferdefaults(self, decl): 
-        if not isinstance(decl.default, ExprNodes.DictNode): 
-            raise PostParseError(decl.pos, ERR_BUF_DEFAULTS) 
-        self.scope_node.buffer_defaults_node = decl.default 
-        self.scope_node.buffer_defaults_pos = decl.pos 
- 
-    def visit_CVarDefNode(self, node): 
-        # This assumes only plain names and pointers are assignable on 
-        # declaration. Also, it makes use of the fact that a cdef decl 
-        # must appear before the first use, so we don't have to deal with 
-        # "i = 3; cdef int i = i" and can simply move the nodes around. 
-        try: 
-            self.visitchildren(node) 
-            stats = [node] 
-            newdecls = [] 
-            for decl in node.declarators: 
-                declbase = decl 
-                while isinstance(declbase, Nodes.CPtrDeclaratorNode): 
-                    declbase = declbase.base 
-                if isinstance(declbase, Nodes.CNameDeclaratorNode): 
-                    if declbase.default is not None: 
-                        if self.scope_type in ('cclass', 'pyclass', 'struct'): 
-                            if isinstance(self.scope_node, Nodes.CClassDefNode): 
-                                handler = self.specialattribute_handlers.get(decl.name) 
-                                if handler: 
-                                    if decl is not declbase: 
-                                        raise PostParseError(decl.pos, ERR_INVALID_SPECIALATTR_TYPE) 
-                                    handler(decl) 
-                                    continue # Remove declaration 
-                            raise PostParseError(decl.pos, ERR_CDEF_INCLASS) 
-                        first_assignment = self.scope_type != 'module' 
-                        stats.append(Nodes.SingleAssignmentNode(node.pos, 
-                            lhs=ExprNodes.NameNode(node.pos, name=declbase.name), 
-                            rhs=declbase.default, first=first_assignment)) 
-                        declbase.default = None 
-                newdecls.append(decl) 
-            node.declarators = newdecls 
-            return stats 
+    # cdef variables
+    def handle_bufferdefaults(self, decl):
+        if not isinstance(decl.default, ExprNodes.DictNode):
+            raise PostParseError(decl.pos, ERR_BUF_DEFAULTS)
+        self.scope_node.buffer_defaults_node = decl.default
+        self.scope_node.buffer_defaults_pos = decl.pos
+
+    def visit_CVarDefNode(self, node):
+        # This assumes only plain names and pointers are assignable on
+        # declaration. Also, it makes use of the fact that a cdef decl
+        # must appear before the first use, so we don't have to deal with
+        # "i = 3; cdef int i = i" and can simply move the nodes around.
+        try:
+            self.visitchildren(node)
+            stats = [node]
+            newdecls = []
+            for decl in node.declarators:
+                declbase = decl
+                while isinstance(declbase, Nodes.CPtrDeclaratorNode):
+                    declbase = declbase.base
+                if isinstance(declbase, Nodes.CNameDeclaratorNode):
+                    if declbase.default is not None:
+                        if self.scope_type in ('cclass', 'pyclass', 'struct'):
+                            if isinstance(self.scope_node, Nodes.CClassDefNode):
+                                handler = self.specialattribute_handlers.get(decl.name)
+                                if handler:
+                                    if decl is not declbase:
+                                        raise PostParseError(decl.pos, ERR_INVALID_SPECIALATTR_TYPE)
+                                    handler(decl)
+                                    continue # Remove declaration
+                            raise PostParseError(decl.pos, ERR_CDEF_INCLASS)
+                        first_assignment = self.scope_type != 'module'
+                        stats.append(Nodes.SingleAssignmentNode(node.pos,
+                            lhs=ExprNodes.NameNode(node.pos, name=declbase.name),
+                            rhs=declbase.default, first=first_assignment))
+                        declbase.default = None
+                newdecls.append(decl)
+            node.declarators = newdecls
+            return stats
         except PostParseError as e:
-            # An error in a cdef clause is ok, simply remove the declaration 
-            # and try to move on to report more errors 
-            self.context.nonfatal_error(e) 
-            return None 
- 
-    # Split parallel assignments (a,b = b,a) into separate partial 
-    # assignments that are executed rhs-first using temps.  This 
-    # restructuring must be applied before type analysis so that known 
-    # types on rhs and lhs can be matched directly.  It is required in 
-    # the case that the types cannot be coerced to a Python type in 
-    # order to assign from a tuple. 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        self.visitchildren(node) 
-        return self._visit_assignment_node(node, [node.lhs, node.rhs]) 
- 
-    def visit_CascadedAssignmentNode(self, node): 
-        self.visitchildren(node) 
-        return self._visit_assignment_node(node, node.lhs_list + [node.rhs]) 
- 
-    def _visit_assignment_node(self, node, expr_list): 
-        """Flatten parallel assignments into separate single 
-        assignments or cascaded assignments. 
-        """ 
-        if sum([ 1 for expr in expr_list 
-                 if expr.is_sequence_constructor or expr.is_string_literal ]) < 2: 
-            # no parallel assignments => nothing to do 
-            return node 
- 
-        expr_list_list = [] 
-        flatten_parallel_assignments(expr_list, expr_list_list) 
-        temp_refs = [] 
-        eliminate_rhs_duplicates(expr_list_list, temp_refs) 
- 
-        nodes = [] 
-        for expr_list in expr_list_list: 
-            lhs_list = expr_list[:-1] 
-            rhs = expr_list[-1] 
-            if len(lhs_list) == 1: 
-                node = Nodes.SingleAssignmentNode(rhs.pos, 
-                    lhs = lhs_list[0], rhs = rhs) 
-            else: 
-                node = Nodes.CascadedAssignmentNode(rhs.pos, 
-                    lhs_list = lhs_list, rhs = rhs) 
-            nodes.append(node) 
- 
-        if len(nodes) == 1: 
-            assign_node = nodes[0] 
-        else: 
-            assign_node = Nodes.ParallelAssignmentNode(nodes[0].pos, stats = nodes) 
- 
-        if temp_refs: 
-            duplicates_and_temps = [ (temp.expression, temp) 
-                                     for temp in temp_refs ] 
-            sort_common_subsequences(duplicates_and_temps) 
-            for _, temp_ref in duplicates_and_temps[::-1]: 
-                assign_node = LetNode(temp_ref, assign_node) 
- 
-        return assign_node 
- 
-    def _flatten_sequence(self, seq, result): 
-        for arg in seq.args: 
-            if arg.is_sequence_constructor: 
-                self._flatten_sequence(arg, result) 
-            else: 
-                result.append(arg) 
-        return result 
- 
-    def visit_DelStatNode(self, node): 
-        self.visitchildren(node) 
-        node.args = self._flatten_sequence(node, []) 
-        return node 
- 
-    def visit_ExceptClauseNode(self, node): 
-        if node.is_except_as: 
-            # except-as must delete NameNode target at the end 
-            del_target = Nodes.DelStatNode( 
-                node.pos, 
-                args=[ExprNodes.NameNode( 
-                    node.target.pos, name=node.target.name)], 
-                ignore_nonexisting=True) 
-            node.body = Nodes.StatListNode( 
-                node.pos, 
-                stats=[Nodes.TryFinallyStatNode( 
-                    node.pos, 
-                    body=node.body, 
-                    finally_clause=Nodes.StatListNode( 
-                        node.pos, 
-                        stats=[del_target]))]) 
-        self.visitchildren(node) 
-        return node 
- 
- 
-def eliminate_rhs_duplicates(expr_list_list, ref_node_sequence): 
-    """Replace rhs items by LetRefNodes if they appear more than once. 
-    Creates a sequence of LetRefNodes that set up the required temps 
-    and appends them to ref_node_sequence.  The input list is modified 
-    in-place. 
-    """ 
-    seen_nodes = set() 
-    ref_nodes = {} 
-    def find_duplicates(node): 
-        if node.is_literal or node.is_name: 
-            # no need to replace those; can't include attributes here 
-            # as their access is not necessarily side-effect free 
-            return 
-        if node in seen_nodes: 
-            if node not in ref_nodes: 
-                ref_node = LetRefNode(node) 
-                ref_nodes[node] = ref_node 
-                ref_node_sequence.append(ref_node) 
-        else: 
-            seen_nodes.add(node) 
-            if node.is_sequence_constructor: 
-                for item in node.args: 
-                    find_duplicates(item) 
- 
-    for expr_list in expr_list_list: 
-        rhs = expr_list[-1] 
-        find_duplicates(rhs) 
-    if not ref_nodes: 
-        return 
- 
-    def substitute_nodes(node): 
-        if node in ref_nodes: 
-            return ref_nodes[node] 
-        elif node.is_sequence_constructor: 
-            node.args = list(map(substitute_nodes, node.args)) 
-        return node 
- 
-    # replace nodes inside of the common subexpressions 
-    for node in ref_nodes: 
-        if node.is_sequence_constructor: 
-            node.args = list(map(substitute_nodes, node.args)) 
- 
-    # replace common subexpressions on all rhs items 
-    for expr_list in expr_list_list: 
-        expr_list[-1] = substitute_nodes(expr_list[-1]) 
- 
-def sort_common_subsequences(items): 
-    """Sort items/subsequences so that all items and subsequences that 
-    an item contains appear before the item itself.  This is needed 
-    because each rhs item must only be evaluated once, so its value 
-    must be evaluated first and then reused when packing sequences 
-    that contain it. 
- 
-    This implies a partial order, and the sort must be stable to 
-    preserve the original order as much as possible, so we use a 
-    simple insertion sort (which is very fast for short sequences, the 
-    normal case in practice). 
-    """ 
-    def contains(seq, x): 
-        for item in seq: 
-            if item is x: 
-                return True 
-            elif item.is_sequence_constructor and contains(item.args, x): 
-                return True 
-        return False 
-    def lower_than(a,b): 
-        return b.is_sequence_constructor and contains(b.args, a) 
- 
-    for pos, item in enumerate(items): 
-        key = item[1] # the ResultRefNode which has already been injected into the sequences 
-        new_pos = pos 
+            # An error in a cdef clause is ok, simply remove the declaration
+            # and try to move on to report more errors
+            self.context.nonfatal_error(e)
+            return None
+
+    # Split parallel assignments (a,b = b,a) into separate partial
+    # assignments that are executed rhs-first using temps.  This
+    # restructuring must be applied before type analysis so that known
+    # types on rhs and lhs can be matched directly.  It is required in
+    # the case that the types cannot be coerced to a Python type in
+    # order to assign from a tuple.
+
+    def visit_SingleAssignmentNode(self, node):
+        self.visitchildren(node)
+        return self._visit_assignment_node(node, [node.lhs, node.rhs])
+
+    def visit_CascadedAssignmentNode(self, node):
+        self.visitchildren(node)
+        return self._visit_assignment_node(node, node.lhs_list + [node.rhs])
+
+    def _visit_assignment_node(self, node, expr_list):
+        """Flatten parallel assignments into separate single
+        assignments or cascaded assignments.
+        """
+        if sum([ 1 for expr in expr_list
+                 if expr.is_sequence_constructor or expr.is_string_literal ]) < 2:
+            # no parallel assignments => nothing to do
+            return node
+
+        expr_list_list = []
+        flatten_parallel_assignments(expr_list, expr_list_list)
+        temp_refs = []
+        eliminate_rhs_duplicates(expr_list_list, temp_refs)
+
+        nodes = []
+        for expr_list in expr_list_list:
+            lhs_list = expr_list[:-1]
+            rhs = expr_list[-1]
+            if len(lhs_list) == 1:
+                node = Nodes.SingleAssignmentNode(rhs.pos,
+                    lhs = lhs_list[0], rhs = rhs)
+            else:
+                node = Nodes.CascadedAssignmentNode(rhs.pos,
+                    lhs_list = lhs_list, rhs = rhs)
+            nodes.append(node)
+
+        if len(nodes) == 1:
+            assign_node = nodes[0]
+        else:
+            assign_node = Nodes.ParallelAssignmentNode(nodes[0].pos, stats = nodes)
+
+        if temp_refs:
+            duplicates_and_temps = [ (temp.expression, temp)
+                                     for temp in temp_refs ]
+            sort_common_subsequences(duplicates_and_temps)
+            for _, temp_ref in duplicates_and_temps[::-1]:
+                assign_node = LetNode(temp_ref, assign_node)
+
+        return assign_node
+
+    def _flatten_sequence(self, seq, result):
+        for arg in seq.args:
+            if arg.is_sequence_constructor:
+                self._flatten_sequence(arg, result)
+            else:
+                result.append(arg)
+        return result
+
+    def visit_DelStatNode(self, node):
+        self.visitchildren(node)
+        node.args = self._flatten_sequence(node, [])
+        return node
+
+    def visit_ExceptClauseNode(self, node):
+        if node.is_except_as:
+            # except-as must delete NameNode target at the end
+            del_target = Nodes.DelStatNode(
+                node.pos,
+                args=[ExprNodes.NameNode(
+                    node.target.pos, name=node.target.name)],
+                ignore_nonexisting=True)
+            node.body = Nodes.StatListNode(
+                node.pos,
+                stats=[Nodes.TryFinallyStatNode(
+                    node.pos,
+                    body=node.body,
+                    finally_clause=Nodes.StatListNode(
+                        node.pos,
+                        stats=[del_target]))])
+        self.visitchildren(node)
+        return node
+
+
+def eliminate_rhs_duplicates(expr_list_list, ref_node_sequence):
+    """Replace rhs items by LetRefNodes if they appear more than once.
+    Creates a sequence of LetRefNodes that set up the required temps
+    and appends them to ref_node_sequence.  The input list is modified
+    in-place.
+    """
+    seen_nodes = set()
+    ref_nodes = {}
+    def find_duplicates(node):
+        if node.is_literal or node.is_name:
+            # no need to replace those; can't include attributes here
+            # as their access is not necessarily side-effect free
+            return
+        if node in seen_nodes:
+            if node not in ref_nodes:
+                ref_node = LetRefNode(node)
+                ref_nodes[node] = ref_node
+                ref_node_sequence.append(ref_node)
+        else:
+            seen_nodes.add(node)
+            if node.is_sequence_constructor:
+                for item in node.args:
+                    find_duplicates(item)
+
+    for expr_list in expr_list_list:
+        rhs = expr_list[-1]
+        find_duplicates(rhs)
+    if not ref_nodes:
+        return
+
+    def substitute_nodes(node):
+        if node in ref_nodes:
+            return ref_nodes[node]
+        elif node.is_sequence_constructor:
+            node.args = list(map(substitute_nodes, node.args))
+        return node
+
+    # replace nodes inside of the common subexpressions
+    for node in ref_nodes:
+        if node.is_sequence_constructor:
+            node.args = list(map(substitute_nodes, node.args))
+
+    # replace common subexpressions on all rhs items
+    for expr_list in expr_list_list:
+        expr_list[-1] = substitute_nodes(expr_list[-1])
+
+def sort_common_subsequences(items):
+    """Sort items/subsequences so that all items and subsequences that
+    an item contains appear before the item itself.  This is needed
+    because each rhs item must only be evaluated once, so its value
+    must be evaluated first and then reused when packing sequences
+    that contain it.
+
+    This implies a partial order, and the sort must be stable to
+    preserve the original order as much as possible, so we use a
+    simple insertion sort (which is very fast for short sequences, the
+    normal case in practice).
+    """
+    def contains(seq, x):
+        for item in seq:
+            if item is x:
+                return True
+            elif item.is_sequence_constructor and contains(item.args, x):
+                return True
+        return False
+    def lower_than(a,b):
+        return b.is_sequence_constructor and contains(b.args, a)
+
+    for pos, item in enumerate(items):
+        key = item[1] # the ResultRefNode which has already been injected into the sequences
+        new_pos = pos
         for i in range(pos-1, -1, -1):
-            if lower_than(key, items[i][0]): 
-                new_pos = i 
-        if new_pos != pos: 
+            if lower_than(key, items[i][0]):
+                new_pos = i
+        if new_pos != pos:
             for i in range(pos, new_pos, -1):
-                items[i] = items[i-1] 
-            items[new_pos] = item 
- 
-def unpack_string_to_character_literals(literal): 
-    chars = [] 
-    pos = literal.pos 
-    stype = literal.__class__ 
-    sval = literal.value 
-    sval_type = sval.__class__ 
-    for char in sval: 
-        cval = sval_type(char) 
-        chars.append(stype(pos, value=cval, constant_result=cval)) 
-    return chars 
- 
-def flatten_parallel_assignments(input, output): 
-    #  The input is a list of expression nodes, representing the LHSs 
-    #  and RHS of one (possibly cascaded) assignment statement.  For 
-    #  sequence constructors, rearranges the matching parts of both 
-    #  sides into a list of equivalent assignments between the 
-    #  individual elements.  This transformation is applied 
-    #  recursively, so that nested structures get matched as well. 
-    rhs = input[-1] 
-    if (not (rhs.is_sequence_constructor or isinstance(rhs, ExprNodes.UnicodeNode)) 
-        or not sum([lhs.is_sequence_constructor for lhs in input[:-1]])): 
-        output.append(input) 
-        return 
- 
-    complete_assignments = [] 
- 
-    if rhs.is_sequence_constructor: 
-        rhs_args = rhs.args 
-    elif rhs.is_string_literal: 
-        rhs_args = unpack_string_to_character_literals(rhs) 
- 
-    rhs_size = len(rhs_args) 
+                items[i] = items[i-1]
+            items[new_pos] = item
+
+def unpack_string_to_character_literals(literal):
+    chars = []
+    pos = literal.pos
+    stype = literal.__class__
+    sval = literal.value
+    sval_type = sval.__class__
+    for char in sval:
+        cval = sval_type(char)
+        chars.append(stype(pos, value=cval, constant_result=cval))
+    return chars
+
+def flatten_parallel_assignments(input, output):
+    #  The input is a list of expression nodes, representing the LHSs
+    #  and RHS of one (possibly cascaded) assignment statement.  For
+    #  sequence constructors, rearranges the matching parts of both
+    #  sides into a list of equivalent assignments between the
+    #  individual elements.  This transformation is applied
+    #  recursively, so that nested structures get matched as well.
+    rhs = input[-1]
+    if (not (rhs.is_sequence_constructor or isinstance(rhs, ExprNodes.UnicodeNode))
+        or not sum([lhs.is_sequence_constructor for lhs in input[:-1]])):
+        output.append(input)
+        return
+
+    complete_assignments = []
+
+    if rhs.is_sequence_constructor:
+        rhs_args = rhs.args
+    elif rhs.is_string_literal:
+        rhs_args = unpack_string_to_character_literals(rhs)
+
+    rhs_size = len(rhs_args)
     lhs_targets = [[] for _ in range(rhs_size)]
-    starred_assignments = [] 
-    for lhs in input[:-1]: 
-        if not lhs.is_sequence_constructor: 
-            if lhs.is_starred: 
-                error(lhs.pos, "starred assignment target must be in a list or tuple") 
-            complete_assignments.append(lhs) 
-            continue 
-        lhs_size = len(lhs.args) 
-        starred_targets = sum([1 for expr in lhs.args if expr.is_starred]) 
-        if starred_targets > 1: 
-            error(lhs.pos, "more than 1 starred expression in assignment") 
-            output.append([lhs,rhs]) 
-            continue 
-        elif lhs_size - starred_targets > rhs_size: 
-            error(lhs.pos, "need more than %d value%s to unpack" 
-                  % (rhs_size, (rhs_size != 1) and 's' or '')) 
-            output.append([lhs,rhs]) 
-            continue 
-        elif starred_targets: 
-            map_starred_assignment(lhs_targets, starred_assignments, 
-                                   lhs.args, rhs_args) 
-        elif lhs_size < rhs_size: 
-            error(lhs.pos, "too many values to unpack (expected %d, got %d)" 
-                  % (lhs_size, rhs_size)) 
-            output.append([lhs,rhs]) 
-            continue 
-        else: 
-            for targets, expr in zip(lhs_targets, lhs.args): 
-                targets.append(expr) 
- 
-    if complete_assignments: 
-        complete_assignments.append(rhs) 
-        output.append(complete_assignments) 
- 
-    # recursively flatten partial assignments 
-    for cascade, rhs in zip(lhs_targets, rhs_args): 
-        if cascade: 
-            cascade.append(rhs) 
-            flatten_parallel_assignments(cascade, output) 
- 
-    # recursively flatten starred assignments 
-    for cascade in starred_assignments: 
-        if cascade[0].is_sequence_constructor: 
-            flatten_parallel_assignments(cascade, output) 
-        else: 
-            output.append(cascade) 
- 
-def map_starred_assignment(lhs_targets, starred_assignments, lhs_args, rhs_args): 
-    # Appends the fixed-position LHS targets to the target list that 
-    # appear left and right of the starred argument. 
-    # 
-    # The starred_assignments list receives a new tuple 
-    # (lhs_target, rhs_values_list) that maps the remaining arguments 
-    # (those that match the starred target) to a list. 
- 
-    # left side of the starred target 
-    for i, (targets, expr) in enumerate(zip(lhs_targets, lhs_args)): 
-        if expr.is_starred: 
-            starred = i 
-            lhs_remaining = len(lhs_args) - i - 1 
-            break 
-        targets.append(expr) 
-    else: 
-        raise InternalError("no starred arg found when splitting starred assignment") 
- 
-    # right side of the starred target 
-    for i, (targets, expr) in enumerate(zip(lhs_targets[-lhs_remaining:], 
-                                            lhs_args[starred + 1:])): 
-        targets.append(expr) 
- 
-    # the starred target itself, must be assigned a (potentially empty) list 
-    target = lhs_args[starred].target # unpack starred node 
-    starred_rhs = rhs_args[starred:] 
-    if lhs_remaining: 
-        starred_rhs = starred_rhs[:-lhs_remaining] 
-    if starred_rhs: 
-        pos = starred_rhs[0].pos 
-    else: 
-        pos = target.pos 
-    starred_assignments.append([ 
-        target, ExprNodes.ListNode(pos=pos, args=starred_rhs)]) 
- 
- 
-class PxdPostParse(CythonTransform, SkipDeclarations): 
-    """ 
-    Basic interpretation/validity checking that should only be 
-    done on pxd trees. 
- 
-    A lot of this checking currently happens in the parser; but 
-    what is listed below happens here. 
- 
-    - "def" functions are let through only if they fill the 
-    getbuffer/releasebuffer slots 
- 
-    - cdef functions are let through only if they are on the 
-    top level and are declared "inline" 
-    """ 
-    ERR_INLINE_ONLY = "function definition in pxd file must be declared 'cdef inline'" 
-    ERR_NOGO_WITH_INLINE = "inline function definition in pxd file cannot be '%s'" 
- 
-    def __call__(self, node): 
-        self.scope_type = 'pxd' 
-        return super(PxdPostParse, self).__call__(node) 
- 
-    def visit_CClassDefNode(self, node): 
-        old = self.scope_type 
-        self.scope_type = 'cclass' 
-        self.visitchildren(node) 
-        self.scope_type = old 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        # FuncDefNode always come with an implementation (without 
-        # an imp they are CVarDefNodes..) 
-        err = self.ERR_INLINE_ONLY 
- 
-        if (isinstance(node, Nodes.DefNode) and self.scope_type == 'cclass' 
-            and node.name in ('__getbuffer__', '__releasebuffer__')): 
-            err = None # allow these slots 
- 
-        if isinstance(node, Nodes.CFuncDefNode): 
-            if (u'inline' in node.modifiers and 
-                self.scope_type in ('pxd', 'cclass')): 
-                node.inline_in_pxd = True 
-                if node.visibility != 'private': 
-                    err = self.ERR_NOGO_WITH_INLINE % node.visibility 
-                elif node.api: 
-                    err = self.ERR_NOGO_WITH_INLINE % 'api' 
-                else: 
-                    err = None # allow inline function 
-            else: 
-                err = self.ERR_INLINE_ONLY 
- 
-        if err: 
-            self.context.nonfatal_error(PostParseError(node.pos, err)) 
-            return None 
-        else: 
-            return node 
- 
+    starred_assignments = []
+    for lhs in input[:-1]:
+        if not lhs.is_sequence_constructor:
+            if lhs.is_starred:
+                error(lhs.pos, "starred assignment target must be in a list or tuple")
+            complete_assignments.append(lhs)
+            continue
+        lhs_size = len(lhs.args)
+        starred_targets = sum([1 for expr in lhs.args if expr.is_starred])
+        if starred_targets > 1:
+            error(lhs.pos, "more than 1 starred expression in assignment")
+            output.append([lhs,rhs])
+            continue
+        elif lhs_size - starred_targets > rhs_size:
+            error(lhs.pos, "need more than %d value%s to unpack"
+                  % (rhs_size, (rhs_size != 1) and 's' or ''))
+            output.append([lhs,rhs])
+            continue
+        elif starred_targets:
+            map_starred_assignment(lhs_targets, starred_assignments,
+                                   lhs.args, rhs_args)
+        elif lhs_size < rhs_size:
+            error(lhs.pos, "too many values to unpack (expected %d, got %d)"
+                  % (lhs_size, rhs_size))
+            output.append([lhs,rhs])
+            continue
+        else:
+            for targets, expr in zip(lhs_targets, lhs.args):
+                targets.append(expr)
+
+    if complete_assignments:
+        complete_assignments.append(rhs)
+        output.append(complete_assignments)
+
+    # recursively flatten partial assignments
+    for cascade, rhs in zip(lhs_targets, rhs_args):
+        if cascade:
+            cascade.append(rhs)
+            flatten_parallel_assignments(cascade, output)
+
+    # recursively flatten starred assignments
+    for cascade in starred_assignments:
+        if cascade[0].is_sequence_constructor:
+            flatten_parallel_assignments(cascade, output)
+        else:
+            output.append(cascade)
+
+def map_starred_assignment(lhs_targets, starred_assignments, lhs_args, rhs_args):
+    # Appends the fixed-position LHS targets to the target list that
+    # appear left and right of the starred argument.
+    #
+    # The starred_assignments list receives a new tuple
+    # (lhs_target, rhs_values_list) that maps the remaining arguments
+    # (those that match the starred target) to a list.
+
+    # left side of the starred target
+    for i, (targets, expr) in enumerate(zip(lhs_targets, lhs_args)):
+        if expr.is_starred:
+            starred = i
+            lhs_remaining = len(lhs_args) - i - 1
+            break
+        targets.append(expr)
+    else:
+        raise InternalError("no starred arg found when splitting starred assignment")
+
+    # right side of the starred target
+    for i, (targets, expr) in enumerate(zip(lhs_targets[-lhs_remaining:],
+                                            lhs_args[starred + 1:])):
+        targets.append(expr)
+
+    # the starred target itself, must be assigned a (potentially empty) list
+    target = lhs_args[starred].target # unpack starred node
+    starred_rhs = rhs_args[starred:]
+    if lhs_remaining:
+        starred_rhs = starred_rhs[:-lhs_remaining]
+    if starred_rhs:
+        pos = starred_rhs[0].pos
+    else:
+        pos = target.pos
+    starred_assignments.append([
+        target, ExprNodes.ListNode(pos=pos, args=starred_rhs)])
+
+
+class PxdPostParse(CythonTransform, SkipDeclarations):
+    """
+    Basic interpretation/validity checking that should only be
+    done on pxd trees.
+
+    A lot of this checking currently happens in the parser; but
+    what is listed below happens here.
+
+    - "def" functions are let through only if they fill the
+    getbuffer/releasebuffer slots
+
+    - cdef functions are let through only if they are on the
+    top level and are declared "inline"
+    """
+    ERR_INLINE_ONLY = "function definition in pxd file must be declared 'cdef inline'"
+    ERR_NOGO_WITH_INLINE = "inline function definition in pxd file cannot be '%s'"
+
+    def __call__(self, node):
+        self.scope_type = 'pxd'
+        return super(PxdPostParse, self).__call__(node)
+
+    def visit_CClassDefNode(self, node):
+        old = self.scope_type
+        self.scope_type = 'cclass'
+        self.visitchildren(node)
+        self.scope_type = old
+        return node
+
+    def visit_FuncDefNode(self, node):
+        # FuncDefNode always come with an implementation (without
+        # an imp they are CVarDefNodes..)
+        err = self.ERR_INLINE_ONLY
+
+        if (isinstance(node, Nodes.DefNode) and self.scope_type == 'cclass'
+            and node.name in ('__getbuffer__', '__releasebuffer__')):
+            err = None # allow these slots
+
+        if isinstance(node, Nodes.CFuncDefNode):
+            if (u'inline' in node.modifiers and
+                self.scope_type in ('pxd', 'cclass')):
+                node.inline_in_pxd = True
+                if node.visibility != 'private':
+                    err = self.ERR_NOGO_WITH_INLINE % node.visibility
+                elif node.api:
+                    err = self.ERR_NOGO_WITH_INLINE % 'api'
+                else:
+                    err = None # allow inline function
+            else:
+                err = self.ERR_INLINE_ONLY
+
+        if err:
+            self.context.nonfatal_error(PostParseError(node.pos, err))
+            return None
+        else:
+            return node
+
 
 class TrackNumpyAttributes(VisitorTransform, SkipDeclarations):
     # TODO: Make name handling as good as in InterpretCompilerDirectives() below - probably best to merge the two.
@@ -622,280 +622,280 @@ class TrackNumpyAttributes(VisitorTransform, SkipDeclarations):
 
 
 class InterpretCompilerDirectives(CythonTransform):
-    """ 
-    After parsing, directives can be stored in a number of places: 
-    - #cython-comments at the top of the file (stored in ModuleNode) 
-    - Command-line arguments overriding these 
-    - @cython.directivename decorators 
-    - with cython.directivename: statements 
- 
-    This transform is responsible for interpreting these various sources 
-    and store the directive in two ways: 
-    - Set the directives attribute of the ModuleNode for global directives. 
-    - Use a CompilerDirectivesNode to override directives for a subtree. 
- 
-    (The first one is primarily to not have to modify with the tree 
-    structure, so that ModuleNode stay on top.) 
- 
-    The directives are stored in dictionaries from name to value in effect. 
-    Each such dictionary is always filled in for all possible directives, 
-    using default values where no value is given by the user. 
- 
-    The available directives are controlled in Options.py. 
- 
-    Note that we have to run this prior to analysis, and so some minor 
-    duplication of functionality has to occur: We manually track cimports 
-    and which names the "cython" module may have been imported to. 
-    """ 
-    unop_method_nodes = { 
-        'typeof': ExprNodes.TypeofNode, 
- 
-        'operator.address': ExprNodes.AmpersandNode, 
-        'operator.dereference': ExprNodes.DereferenceNode, 
-        'operator.preincrement' : ExprNodes.inc_dec_constructor(True, '++'), 
-        'operator.predecrement' : ExprNodes.inc_dec_constructor(True, '--'), 
-        'operator.postincrement': ExprNodes.inc_dec_constructor(False, '++'), 
-        'operator.postdecrement': ExprNodes.inc_dec_constructor(False, '--'), 
+    """
+    After parsing, directives can be stored in a number of places:
+    - #cython-comments at the top of the file (stored in ModuleNode)
+    - Command-line arguments overriding these
+    - @cython.directivename decorators
+    - with cython.directivename: statements
+
+    This transform is responsible for interpreting these various sources
+    and store the directive in two ways:
+    - Set the directives attribute of the ModuleNode for global directives.
+    - Use a CompilerDirectivesNode to override directives for a subtree.
+
+    (The first one is primarily to not have to modify with the tree
+    structure, so that ModuleNode stay on top.)
+
+    The directives are stored in dictionaries from name to value in effect.
+    Each such dictionary is always filled in for all possible directives,
+    using default values where no value is given by the user.
+
+    The available directives are controlled in Options.py.
+
+    Note that we have to run this prior to analysis, and so some minor
+    duplication of functionality has to occur: We manually track cimports
+    and which names the "cython" module may have been imported to.
+    """
+    unop_method_nodes = {
+        'typeof': ExprNodes.TypeofNode,
+
+        'operator.address': ExprNodes.AmpersandNode,
+        'operator.dereference': ExprNodes.DereferenceNode,
+        'operator.preincrement' : ExprNodes.inc_dec_constructor(True, '++'),
+        'operator.predecrement' : ExprNodes.inc_dec_constructor(True, '--'),
+        'operator.postincrement': ExprNodes.inc_dec_constructor(False, '++'),
+        'operator.postdecrement': ExprNodes.inc_dec_constructor(False, '--'),
         'operator.typeid'       : ExprNodes.TypeidNode,
- 
+
         # For backwards compatibility.
-        'address': ExprNodes.AmpersandNode, 
-    } 
- 
-    binop_method_nodes = { 
-        'operator.comma'        : ExprNodes.c_binop_constructor(','), 
-    } 
- 
-    special_methods = set(['declare', 'union', 'struct', 'typedef', 
-                           'sizeof', 'cast', 'pointer', 'compiled', 
-                           'NULL', 'fused_type', 'parallel']) 
+        'address': ExprNodes.AmpersandNode,
+    }
+
+    binop_method_nodes = {
+        'operator.comma'        : ExprNodes.c_binop_constructor(','),
+    }
+
+    special_methods = set(['declare', 'union', 'struct', 'typedef',
+                           'sizeof', 'cast', 'pointer', 'compiled',
+                           'NULL', 'fused_type', 'parallel'])
     special_methods.update(unop_method_nodes)
- 
-    valid_parallel_directives = set([ 
-        "parallel", 
-        "prange", 
-        "threadid", 
+
+    valid_parallel_directives = set([
+        "parallel",
+        "prange",
+        "threadid",
         #"threadsavailable",
-    ]) 
- 
-    def __init__(self, context, compilation_directive_defaults): 
-        super(InterpretCompilerDirectives, self).__init__(context) 
-        self.cython_module_names = set() 
-        self.directive_names = {'staticmethod': 'staticmethod'} 
-        self.parallel_directives = {} 
+    ])
+
+    def __init__(self, context, compilation_directive_defaults):
+        super(InterpretCompilerDirectives, self).__init__(context)
+        self.cython_module_names = set()
+        self.directive_names = {'staticmethod': 'staticmethod'}
+        self.parallel_directives = {}
         directives = copy.deepcopy(Options.get_directive_defaults())
-        for key, value in compilation_directive_defaults.items(): 
+        for key, value in compilation_directive_defaults.items():
             directives[_unicode(key)] = copy.deepcopy(value)
-        self.directives = directives 
- 
-    def check_directive_scope(self, pos, directive, scope): 
-        legal_scopes = Options.directive_scopes.get(directive, None) 
-        if legal_scopes and scope not in legal_scopes: 
-            self.context.nonfatal_error(PostParseError(pos, 'The %s compiler directive ' 
-                                        'is not allowed in %s scope' % (directive, scope))) 
-            return False 
-        else: 
+        self.directives = directives
+
+    def check_directive_scope(self, pos, directive, scope):
+        legal_scopes = Options.directive_scopes.get(directive, None)
+        if legal_scopes and scope not in legal_scopes:
+            self.context.nonfatal_error(PostParseError(pos, 'The %s compiler directive '
+                                        'is not allowed in %s scope' % (directive, scope)))
+            return False
+        else:
             if directive not in Options.directive_types:
-                error(pos, "Invalid directive: '%s'." % (directive,)) 
-            return True 
- 
-    # Set up processing and handle the cython: comments. 
-    def visit_ModuleNode(self, node): 
+                error(pos, "Invalid directive: '%s'." % (directive,))
+            return True
+
+    # Set up processing and handle the cython: comments.
+    def visit_ModuleNode(self, node):
         for key in sorted(node.directive_comments):
-            if not self.check_directive_scope(node.pos, key, 'module'): 
-                self.wrong_scope_error(node.pos, key, 'module') 
-                del node.directive_comments[key] 
- 
-        self.module_scope = node.scope 
- 
-        self.directives.update(node.directive_comments) 
-        node.directives = self.directives 
-        node.parallel_directives = self.parallel_directives 
-        self.visitchildren(node) 
-        node.cython_module_names = self.cython_module_names 
-        return node 
- 
-    # The following four functions track imports and cimports that 
-    # begin with "cython" 
-    def is_cython_directive(self, name): 
-        return (name in Options.directive_types or 
-                name in self.special_methods or 
-                PyrexTypes.parse_basic_type(name)) 
- 
-    def is_parallel_directive(self, full_name, pos): 
-        """ 
-        Checks to see if fullname (e.g. cython.parallel.prange) is a valid 
-        parallel directive. If it is a star import it also updates the 
-        parallel_directives. 
-        """ 
-        result = (full_name + ".").startswith("cython.parallel.") 
- 
-        if result: 
-            directive = full_name.split('.') 
-            if full_name == u"cython.parallel": 
-                self.parallel_directives[u"parallel"] = u"cython.parallel" 
-            elif full_name == u"cython.parallel.*": 
-                for name in self.valid_parallel_directives: 
-                    self.parallel_directives[name] = u"cython.parallel.%s" % name 
-            elif (len(directive) != 3 or 
-                  directive[-1] not in self.valid_parallel_directives): 
-                error(pos, "No such directive: %s" % full_name) 
- 
-            self.module_scope.use_utility_code( 
-                UtilityCode.load_cached("InitThreads", "ModuleSetupCode.c")) 
- 
-        return result 
- 
-    def visit_CImportStatNode(self, node): 
-        if node.module_name == u"cython": 
-            self.cython_module_names.add(node.as_name or u"cython") 
-        elif node.module_name.startswith(u"cython."): 
-            if node.module_name.startswith(u"cython.parallel."): 
-                error(node.pos, node.module_name + " is not a module") 
-            if node.module_name == u"cython.parallel": 
-                if node.as_name and node.as_name != u"cython": 
-                    self.parallel_directives[node.as_name] = node.module_name 
-                else: 
-                    self.cython_module_names.add(u"cython") 
-                    self.parallel_directives[ 
-                                    u"cython.parallel"] = node.module_name 
-                self.module_scope.use_utility_code( 
-                    UtilityCode.load_cached("InitThreads", "ModuleSetupCode.c")) 
-            elif node.as_name: 
-                self.directive_names[node.as_name] = node.module_name[7:] 
-            else: 
-                self.cython_module_names.add(u"cython") 
-            # if this cimport was a compiler directive, we don't 
-            # want to leave the cimport node sitting in the tree 
-            return None 
-        return node 
- 
-    def visit_FromCImportStatNode(self, node): 
-        if not node.relative_level and ( 
-                node.module_name == u"cython" or node.module_name.startswith(u"cython.")): 
-            submodule = (node.module_name + u".")[7:] 
-            newimp = [] 
- 
-            for pos, name, as_name, kind in node.imported_names: 
-                full_name = submodule + name 
-                qualified_name = u"cython." + full_name 
- 
-                if self.is_parallel_directive(qualified_name, node.pos): 
-                    # from cython cimport parallel, or 
-                    # from cython.parallel cimport parallel, prange, ... 
-                    self.parallel_directives[as_name or name] = qualified_name 
-                elif self.is_cython_directive(full_name): 
-                    self.directive_names[as_name or name] = full_name 
-                    if kind is not None: 
-                        self.context.nonfatal_error(PostParseError(pos, 
-                            "Compiler directive imports must be plain imports")) 
-                else: 
-                    newimp.append((pos, name, as_name, kind)) 
- 
-            if not newimp: 
-                return None 
- 
-            node.imported_names = newimp 
-        return node 
- 
-    def visit_FromImportStatNode(self, node): 
-        if (node.module.module_name.value == u"cython") or \ 
-               node.module.module_name.value.startswith(u"cython."): 
-            submodule = (node.module.module_name.value + u".")[7:] 
-            newimp = [] 
-            for name, name_node in node.items: 
-                full_name = submodule + name 
-                qualified_name = u"cython." + full_name 
-                if self.is_parallel_directive(qualified_name, node.pos): 
-                    self.parallel_directives[name_node.name] = qualified_name 
-                elif self.is_cython_directive(full_name): 
-                    self.directive_names[name_node.name] = full_name 
-                else: 
-                    newimp.append((name, name_node)) 
-            if not newimp: 
-                return None 
-            node.items = newimp 
-        return node 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        if isinstance(node.rhs, ExprNodes.ImportNode): 
-            module_name = node.rhs.module_name.value 
-            is_parallel = (module_name + u".").startswith(u"cython.parallel.") 
- 
-            if module_name != u"cython" and not is_parallel: 
-                return node 
- 
-            module_name = node.rhs.module_name.value 
-            as_name = node.lhs.name 
- 
-            node = Nodes.CImportStatNode(node.pos, 
-                                         module_name = module_name, 
-                                         as_name = as_name) 
-            node = self.visit_CImportStatNode(node) 
-        else: 
-            self.visitchildren(node) 
- 
-        return node 
- 
-    def visit_NameNode(self, node): 
-        if node.name in self.cython_module_names: 
-            node.is_cython_module = True 
-        else: 
+            if not self.check_directive_scope(node.pos, key, 'module'):
+                self.wrong_scope_error(node.pos, key, 'module')
+                del node.directive_comments[key]
+
+        self.module_scope = node.scope
+
+        self.directives.update(node.directive_comments)
+        node.directives = self.directives
+        node.parallel_directives = self.parallel_directives
+        self.visitchildren(node)
+        node.cython_module_names = self.cython_module_names
+        return node
+
+    # The following four functions track imports and cimports that
+    # begin with "cython"
+    def is_cython_directive(self, name):
+        return (name in Options.directive_types or
+                name in self.special_methods or
+                PyrexTypes.parse_basic_type(name))
+
+    def is_parallel_directive(self, full_name, pos):
+        """
+        Checks to see if fullname (e.g. cython.parallel.prange) is a valid
+        parallel directive. If it is a star import it also updates the
+        parallel_directives.
+        """
+        result = (full_name + ".").startswith("cython.parallel.")
+
+        if result:
+            directive = full_name.split('.')
+            if full_name == u"cython.parallel":
+                self.parallel_directives[u"parallel"] = u"cython.parallel"
+            elif full_name == u"cython.parallel.*":
+                for name in self.valid_parallel_directives:
+                    self.parallel_directives[name] = u"cython.parallel.%s" % name
+            elif (len(directive) != 3 or
+                  directive[-1] not in self.valid_parallel_directives):
+                error(pos, "No such directive: %s" % full_name)
+
+            self.module_scope.use_utility_code(
+                UtilityCode.load_cached("InitThreads", "ModuleSetupCode.c"))
+
+        return result
+
+    def visit_CImportStatNode(self, node):
+        if node.module_name == u"cython":
+            self.cython_module_names.add(node.as_name or u"cython")
+        elif node.module_name.startswith(u"cython."):
+            if node.module_name.startswith(u"cython.parallel."):
+                error(node.pos, node.module_name + " is not a module")
+            if node.module_name == u"cython.parallel":
+                if node.as_name and node.as_name != u"cython":
+                    self.parallel_directives[node.as_name] = node.module_name
+                else:
+                    self.cython_module_names.add(u"cython")
+                    self.parallel_directives[
+                                    u"cython.parallel"] = node.module_name
+                self.module_scope.use_utility_code(
+                    UtilityCode.load_cached("InitThreads", "ModuleSetupCode.c"))
+            elif node.as_name:
+                self.directive_names[node.as_name] = node.module_name[7:]
+            else:
+                self.cython_module_names.add(u"cython")
+            # if this cimport was a compiler directive, we don't
+            # want to leave the cimport node sitting in the tree
+            return None
+        return node
+
+    def visit_FromCImportStatNode(self, node):
+        if not node.relative_level and (
+                node.module_name == u"cython" or node.module_name.startswith(u"cython.")):
+            submodule = (node.module_name + u".")[7:]
+            newimp = []
+
+            for pos, name, as_name, kind in node.imported_names:
+                full_name = submodule + name
+                qualified_name = u"cython." + full_name
+
+                if self.is_parallel_directive(qualified_name, node.pos):
+                    # from cython cimport parallel, or
+                    # from cython.parallel cimport parallel, prange, ...
+                    self.parallel_directives[as_name or name] = qualified_name
+                elif self.is_cython_directive(full_name):
+                    self.directive_names[as_name or name] = full_name
+                    if kind is not None:
+                        self.context.nonfatal_error(PostParseError(pos,
+                            "Compiler directive imports must be plain imports"))
+                else:
+                    newimp.append((pos, name, as_name, kind))
+
+            if not newimp:
+                return None
+
+            node.imported_names = newimp
+        return node
+
+    def visit_FromImportStatNode(self, node):
+        if (node.module.module_name.value == u"cython") or \
+               node.module.module_name.value.startswith(u"cython."):
+            submodule = (node.module.module_name.value + u".")[7:]
+            newimp = []
+            for name, name_node in node.items:
+                full_name = submodule + name
+                qualified_name = u"cython." + full_name
+                if self.is_parallel_directive(qualified_name, node.pos):
+                    self.parallel_directives[name_node.name] = qualified_name
+                elif self.is_cython_directive(full_name):
+                    self.directive_names[name_node.name] = full_name
+                else:
+                    newimp.append((name, name_node))
+            if not newimp:
+                return None
+            node.items = newimp
+        return node
+
+    def visit_SingleAssignmentNode(self, node):
+        if isinstance(node.rhs, ExprNodes.ImportNode):
+            module_name = node.rhs.module_name.value
+            is_parallel = (module_name + u".").startswith(u"cython.parallel.")
+
+            if module_name != u"cython" and not is_parallel:
+                return node
+
+            module_name = node.rhs.module_name.value
+            as_name = node.lhs.name
+
+            node = Nodes.CImportStatNode(node.pos,
+                                         module_name = module_name,
+                                         as_name = as_name)
+            node = self.visit_CImportStatNode(node)
+        else:
+            self.visitchildren(node)
+
+        return node
+
+    def visit_NameNode(self, node):
+        if node.name in self.cython_module_names:
+            node.is_cython_module = True
+        else:
             directive = self.directive_names.get(node.name)
             if directive is not None:
                 node.cython_attribute = directive
-        return node 
- 
+        return node
+
     def visit_NewExprNode(self, node):
         self.visit(node.cppclass)
         self.visitchildren(node)
         return node
 
-    def try_to_parse_directives(self, node): 
-        # If node is the contents of an directive (in a with statement or 
-        # decorator), returns a list of (directivename, value) pairs. 
-        # Otherwise, returns None 
-        if isinstance(node, ExprNodes.CallNode): 
-            self.visit(node.function) 
-            optname = node.function.as_cython_attribute() 
-            if optname: 
-                directivetype = Options.directive_types.get(optname) 
-                if directivetype: 
-                    args, kwds = node.explicit_args_kwds() 
-                    directives = [] 
-                    key_value_pairs = [] 
-                    if kwds is not None and directivetype is not dict: 
-                        for keyvalue in kwds.key_value_pairs: 
-                            key, value = keyvalue 
-                            sub_optname = "%s.%s" % (optname, key.value) 
-                            if Options.directive_types.get(sub_optname): 
-                                directives.append(self.try_to_parse_directive(sub_optname, [value], None, keyvalue.pos)) 
-                            else: 
-                                key_value_pairs.append(keyvalue) 
-                        if not key_value_pairs: 
-                            kwds = None 
-                        else: 
-                            kwds.key_value_pairs = key_value_pairs 
-                        if directives and not kwds and not args: 
-                            return directives 
-                    directives.append(self.try_to_parse_directive(optname, args, kwds, node.function.pos)) 
-                    return directives 
-        elif isinstance(node, (ExprNodes.AttributeNode, ExprNodes.NameNode)): 
-            self.visit(node) 
-            optname = node.as_cython_attribute() 
-            if optname: 
-                directivetype = Options.directive_types.get(optname) 
-                if directivetype is bool: 
+    def try_to_parse_directives(self, node):
+        # If node is the contents of an directive (in a with statement or
+        # decorator), returns a list of (directivename, value) pairs.
+        # Otherwise, returns None
+        if isinstance(node, ExprNodes.CallNode):
+            self.visit(node.function)
+            optname = node.function.as_cython_attribute()
+            if optname:
+                directivetype = Options.directive_types.get(optname)
+                if directivetype:
+                    args, kwds = node.explicit_args_kwds()
+                    directives = []
+                    key_value_pairs = []
+                    if kwds is not None and directivetype is not dict:
+                        for keyvalue in kwds.key_value_pairs:
+                            key, value = keyvalue
+                            sub_optname = "%s.%s" % (optname, key.value)
+                            if Options.directive_types.get(sub_optname):
+                                directives.append(self.try_to_parse_directive(sub_optname, [value], None, keyvalue.pos))
+                            else:
+                                key_value_pairs.append(keyvalue)
+                        if not key_value_pairs:
+                            kwds = None
+                        else:
+                            kwds.key_value_pairs = key_value_pairs
+                        if directives and not kwds and not args:
+                            return directives
+                    directives.append(self.try_to_parse_directive(optname, args, kwds, node.function.pos))
+                    return directives
+        elif isinstance(node, (ExprNodes.AttributeNode, ExprNodes.NameNode)):
+            self.visit(node)
+            optname = node.as_cython_attribute()
+            if optname:
+                directivetype = Options.directive_types.get(optname)
+                if directivetype is bool:
                     arg = ExprNodes.BoolNode(node.pos, value=True)
                     return [self.try_to_parse_directive(optname, [arg], None, node.pos)]
-                elif directivetype is None: 
-                    return [(optname, None)] 
-                else: 
-                    raise PostParseError( 
-                        node.pos, "The '%s' directive should be used as a function call." % optname) 
-        return None 
- 
-    def try_to_parse_directive(self, optname, args, kwds, pos): 
+                elif directivetype is None:
+                    return [(optname, None)]
+                else:
+                    raise PostParseError(
+                        node.pos, "The '%s' directive should be used as a function call." % optname)
+        return None
+
+    def try_to_parse_directive(self, optname, args, kwds, pos):
         if optname == 'np_pythran' and not self.context.cpp:
             raise PostParseError(pos, 'The %s directive can only be used in C++ mode.' % optname)
         elif optname == 'exceptval':
@@ -915,53 +915,53 @@ class InterpretCompilerDirectives(CythonTransform):
                     pos, 'The exceptval directive takes 0 or 1 positional arguments and the boolean keyword "check"')
             return ('exceptval', (args[0] if args else None, check))
 
-        directivetype = Options.directive_types.get(optname) 
-        if len(args) == 1 and isinstance(args[0], ExprNodes.NoneNode): 
+        directivetype = Options.directive_types.get(optname)
+        if len(args) == 1 and isinstance(args[0], ExprNodes.NoneNode):
             return optname, Options.get_directive_defaults()[optname]
-        elif directivetype is bool: 
-            if kwds is not None or len(args) != 1 or not isinstance(args[0], ExprNodes.BoolNode): 
-                raise PostParseError(pos, 
-                    'The %s directive takes one compile-time boolean argument' % optname) 
-            return (optname, args[0].value) 
-        elif directivetype is int: 
-            if kwds is not None or len(args) != 1 or not isinstance(args[0], ExprNodes.IntNode): 
-                raise PostParseError(pos, 
-                    'The %s directive takes one compile-time integer argument' % optname) 
-            return (optname, int(args[0].value)) 
-        elif directivetype is str: 
-            if kwds is not None or len(args) != 1 or not isinstance( 
-                    args[0], (ExprNodes.StringNode, ExprNodes.UnicodeNode)): 
-                raise PostParseError(pos, 
-                    'The %s directive takes one compile-time string argument' % optname) 
-            return (optname, str(args[0].value)) 
-        elif directivetype is type: 
-            if kwds is not None or len(args) != 1: 
-                raise PostParseError(pos, 
-                    'The %s directive takes one type argument' % optname) 
-            return (optname, args[0]) 
-        elif directivetype is dict: 
-            if len(args) != 0: 
-                raise PostParseError(pos, 
-                    'The %s directive takes no prepositional arguments' % optname) 
-            return optname, dict([(key.value, value) for key, value in kwds.key_value_pairs]) 
-        elif directivetype is list: 
+        elif directivetype is bool:
+            if kwds is not None or len(args) != 1 or not isinstance(args[0], ExprNodes.BoolNode):
+                raise PostParseError(pos,
+                    'The %s directive takes one compile-time boolean argument' % optname)
+            return (optname, args[0].value)
+        elif directivetype is int:
+            if kwds is not None or len(args) != 1 or not isinstance(args[0], ExprNodes.IntNode):
+                raise PostParseError(pos,
+                    'The %s directive takes one compile-time integer argument' % optname)
+            return (optname, int(args[0].value))
+        elif directivetype is str:
+            if kwds is not None or len(args) != 1 or not isinstance(
+                    args[0], (ExprNodes.StringNode, ExprNodes.UnicodeNode)):
+                raise PostParseError(pos,
+                    'The %s directive takes one compile-time string argument' % optname)
+            return (optname, str(args[0].value))
+        elif directivetype is type:
+            if kwds is not None or len(args) != 1:
+                raise PostParseError(pos,
+                    'The %s directive takes one type argument' % optname)
+            return (optname, args[0])
+        elif directivetype is dict:
+            if len(args) != 0:
+                raise PostParseError(pos,
+                    'The %s directive takes no prepositional arguments' % optname)
+            return optname, dict([(key.value, value) for key, value in kwds.key_value_pairs])
+        elif directivetype is list:
             if kwds and len(kwds.key_value_pairs) != 0:
-                raise PostParseError(pos, 
-                    'The %s directive takes no keyword arguments' % optname) 
-            return optname, [ str(arg.value) for arg in args ] 
-        elif callable(directivetype): 
-            if kwds is not None or len(args) != 1 or not isinstance( 
-                    args[0], (ExprNodes.StringNode, ExprNodes.UnicodeNode)): 
-                raise PostParseError(pos, 
-                    'The %s directive takes one compile-time string argument' % optname) 
-            return (optname, directivetype(optname, str(args[0].value))) 
-        else: 
-            assert False 
- 
+                raise PostParseError(pos,
+                    'The %s directive takes no keyword arguments' % optname)
+            return optname, [ str(arg.value) for arg in args ]
+        elif callable(directivetype):
+            if kwds is not None or len(args) != 1 or not isinstance(
+                    args[0], (ExprNodes.StringNode, ExprNodes.UnicodeNode)):
+                raise PostParseError(pos,
+                    'The %s directive takes one compile-time string argument' % optname)
+            return (optname, directivetype(optname, str(args[0].value)))
+        else:
+            assert False
+
     def visit_with_directives(self, node, directives):
         if not directives:
             return self.visit_Node(node)
- 
+
         old_directives = self.directives
         new_directives = dict(old_directives)
         new_directives.update(directives)
@@ -978,353 +978,353 @@ class InterpretCompilerDirectives(CythonTransform):
         return Nodes.CompilerDirectivesNode(
             pos=retbody.pos, body=retbody, directives=new_directives)
 
-    # Handle decorators 
-    def visit_FuncDefNode(self, node): 
-        directives = self._extract_directives(node, 'function') 
+    # Handle decorators
+    def visit_FuncDefNode(self, node):
+        directives = self._extract_directives(node, 'function')
         return self.visit_with_directives(node, directives)
- 
-    def visit_CVarDefNode(self, node): 
-        directives = self._extract_directives(node, 'function') 
+
+    def visit_CVarDefNode(self, node):
+        directives = self._extract_directives(node, 'function')
         for name, value in directives.items():
-            if name == 'locals': 
-                node.directive_locals = value 
-            elif name not in ('final', 'staticmethod'): 
-                self.context.nonfatal_error(PostParseError( 
-                    node.pos, 
-                    "Cdef functions can only take cython.locals(), " 
-                    "staticmethod, or final decorators, got %s." % name)) 
+            if name == 'locals':
+                node.directive_locals = value
+            elif name not in ('final', 'staticmethod'):
+                self.context.nonfatal_error(PostParseError(
+                    node.pos,
+                    "Cdef functions can only take cython.locals(), "
+                    "staticmethod, or final decorators, got %s." % name))
         return self.visit_with_directives(node, directives)
- 
-    def visit_CClassDefNode(self, node): 
-        directives = self._extract_directives(node, 'cclass') 
+
+    def visit_CClassDefNode(self, node):
+        directives = self._extract_directives(node, 'cclass')
         return self.visit_with_directives(node, directives)
- 
-    def visit_CppClassNode(self, node): 
-        directives = self._extract_directives(node, 'cppclass') 
+
+    def visit_CppClassNode(self, node):
+        directives = self._extract_directives(node, 'cppclass')
         return self.visit_with_directives(node, directives)
- 
-    def visit_PyClassDefNode(self, node): 
-        directives = self._extract_directives(node, 'class') 
+
+    def visit_PyClassDefNode(self, node):
+        directives = self._extract_directives(node, 'class')
         return self.visit_with_directives(node, directives)
- 
-    def _extract_directives(self, node, scope_name): 
-        if not node.decorators: 
-            return {} 
-        # Split the decorators into two lists -- real decorators and directives 
-        directives = [] 
-        realdecs = [] 
-        both = [] 
+
+    def _extract_directives(self, node, scope_name):
+        if not node.decorators:
+            return {}
+        # Split the decorators into two lists -- real decorators and directives
+        directives = []
+        realdecs = []
+        both = []
         # Decorators coming first take precedence.
         for dec in node.decorators[::-1]:
-            new_directives = self.try_to_parse_directives(dec.decorator) 
-            if new_directives is not None: 
-                for directive in new_directives: 
-                    if self.check_directive_scope(node.pos, directive[0], scope_name): 
-                        name, value = directive 
-                        if self.directives.get(name, object()) != value: 
-                            directives.append(directive) 
-                        if directive[0] == 'staticmethod': 
-                            both.append(dec) 
+            new_directives = self.try_to_parse_directives(dec.decorator)
+            if new_directives is not None:
+                for directive in new_directives:
+                    if self.check_directive_scope(node.pos, directive[0], scope_name):
+                        name, value = directive
+                        if self.directives.get(name, object()) != value:
+                            directives.append(directive)
+                        if directive[0] == 'staticmethod':
+                            both.append(dec)
                     # Adapt scope type based on decorators that change it.
                     if directive[0] == 'cclass' and scope_name == 'class':
                         scope_name = 'cclass'
-            else: 
-                realdecs.append(dec) 
+            else:
+                realdecs.append(dec)
         if realdecs and (scope_name == 'cclass' or
                          isinstance(node, (Nodes.CFuncDefNode, Nodes.CClassDefNode, Nodes.CVarDefNode))):
-            raise PostParseError(realdecs[0].pos, "Cdef functions/classes cannot take arbitrary decorators.") 
+            raise PostParseError(realdecs[0].pos, "Cdef functions/classes cannot take arbitrary decorators.")
         node.decorators = realdecs[::-1] + both[::-1]
-        # merge or override repeated directives 
-        optdict = {} 
-        for directive in directives: 
-            name, value = directive 
-            if name in optdict: 
-                old_value = optdict[name] 
-                # keywords and arg lists can be merged, everything 
-                # else overrides completely 
-                if isinstance(old_value, dict): 
-                    old_value.update(value) 
-                elif isinstance(old_value, list): 
-                    old_value.extend(value) 
-                else: 
-                    optdict[name] = value 
-            else: 
-                optdict[name] = value 
-        return optdict 
- 
+        # merge or override repeated directives
+        optdict = {}
+        for directive in directives:
+            name, value = directive
+            if name in optdict:
+                old_value = optdict[name]
+                # keywords and arg lists can be merged, everything
+                # else overrides completely
+                if isinstance(old_value, dict):
+                    old_value.update(value)
+                elif isinstance(old_value, list):
+                    old_value.extend(value)
+                else:
+                    optdict[name] = value
+            else:
+                optdict[name] = value
+        return optdict
+
     # Handle with-statements
-    def visit_WithStatNode(self, node): 
-        directive_dict = {} 
-        for directive in self.try_to_parse_directives(node.manager) or []: 
-            if directive is not None: 
-                if node.target is not None: 
-                    self.context.nonfatal_error( 
-                        PostParseError(node.pos, "Compiler directive with statements cannot contain 'as'")) 
-                else: 
-                    name, value = directive 
-                    if name in ('nogil', 'gil'): 
-                        # special case: in pure mode, "with nogil" spells "with cython.nogil" 
-                        node = Nodes.GILStatNode(node.pos, state = name, body = node.body) 
-                        return self.visit_Node(node) 
-                    if self.check_directive_scope(node.pos, name, 'with statement'): 
-                        directive_dict[name] = value 
-        if directive_dict: 
-            return self.visit_with_directives(node.body, directive_dict) 
-        return self.visit_Node(node) 
- 
- 
-class ParallelRangeTransform(CythonTransform, SkipDeclarations): 
-    """ 
-    Transform cython.parallel stuff. The parallel_directives come from the 
-    module node, set there by InterpretCompilerDirectives. 
- 
-        x = cython.parallel.threadavailable()   -> ParallelThreadAvailableNode 
-        with nogil, cython.parallel.parallel(): -> ParallelWithBlockNode 
-            print cython.parallel.threadid()    -> ParallelThreadIdNode 
-            for i in cython.parallel.prange(...):  -> ParallelRangeNode 
-                ... 
-    """ 
- 
-    # a list of names, maps 'cython.parallel.prange' in the code to 
-    # ['cython', 'parallel', 'prange'] 
-    parallel_directive = None 
- 
-    # Indicates whether a namenode in an expression is the cython module 
-    namenode_is_cython_module = False 
- 
-    # Keep track of whether we are the context manager of a 'with' statement 
-    in_context_manager_section = False 
- 
-    # One of 'prange' or 'with parallel'. This is used to disallow closely 
-    # nested 'with parallel:' blocks 
-    state = None 
- 
-    directive_to_node = { 
-        u"cython.parallel.parallel": Nodes.ParallelWithBlockNode, 
-        # u"cython.parallel.threadsavailable": ExprNodes.ParallelThreadsAvailableNode, 
-        u"cython.parallel.threadid": ExprNodes.ParallelThreadIdNode, 
-        u"cython.parallel.prange": Nodes.ParallelRangeNode, 
-    } 
- 
-    def node_is_parallel_directive(self, node): 
-        return node.name in self.parallel_directives or node.is_cython_module 
- 
-    def get_directive_class_node(self, node): 
-        """ 
-        Figure out which parallel directive was used and return the associated 
-        Node class. 
- 
-        E.g. for a cython.parallel.prange() call we return ParallelRangeNode 
-        """ 
-        if self.namenode_is_cython_module: 
-            directive = '.'.join(self.parallel_directive) 
-        else: 
-            directive = self.parallel_directives[self.parallel_directive[0]] 
-            directive = '%s.%s' % (directive, 
-                                   '.'.join(self.parallel_directive[1:])) 
-            directive = directive.rstrip('.') 
- 
-        cls = self.directive_to_node.get(directive) 
-        if cls is None and not (self.namenode_is_cython_module and 
-                                self.parallel_directive[0] != 'parallel'): 
-            error(node.pos, "Invalid directive: %s" % directive) 
- 
-        self.namenode_is_cython_module = False 
-        self.parallel_directive = None 
- 
-        return cls 
- 
-    def visit_ModuleNode(self, node): 
-        """ 
-        If any parallel directives were imported, copy them over and visit 
-        the AST 
-        """ 
-        if node.parallel_directives: 
-            self.parallel_directives = node.parallel_directives 
-            return self.visit_Node(node) 
- 
-        # No parallel directives were imported, so they can't be used :) 
-        return node 
- 
-    def visit_NameNode(self, node): 
-        if self.node_is_parallel_directive(node): 
-            self.parallel_directive = [node.name] 
-            self.namenode_is_cython_module = node.is_cython_module 
-        return node 
- 
-    def visit_AttributeNode(self, node): 
-        self.visitchildren(node) 
-        if self.parallel_directive: 
-            self.parallel_directive.append(node.attribute) 
-        return node 
- 
-    def visit_CallNode(self, node): 
-        self.visit(node.function) 
-        if not self.parallel_directive: 
+    def visit_WithStatNode(self, node):
+        directive_dict = {}
+        for directive in self.try_to_parse_directives(node.manager) or []:
+            if directive is not None:
+                if node.target is not None:
+                    self.context.nonfatal_error(
+                        PostParseError(node.pos, "Compiler directive with statements cannot contain 'as'"))
+                else:
+                    name, value = directive
+                    if name in ('nogil', 'gil'):
+                        # special case: in pure mode, "with nogil" spells "with cython.nogil"
+                        node = Nodes.GILStatNode(node.pos, state = name, body = node.body)
+                        return self.visit_Node(node)
+                    if self.check_directive_scope(node.pos, name, 'with statement'):
+                        directive_dict[name] = value
+        if directive_dict:
+            return self.visit_with_directives(node.body, directive_dict)
+        return self.visit_Node(node)
+
+
+class ParallelRangeTransform(CythonTransform, SkipDeclarations):
+    """
+    Transform cython.parallel stuff. The parallel_directives come from the
+    module node, set there by InterpretCompilerDirectives.
+
+        x = cython.parallel.threadavailable()   -> ParallelThreadAvailableNode
+        with nogil, cython.parallel.parallel(): -> ParallelWithBlockNode
+            print cython.parallel.threadid()    -> ParallelThreadIdNode
+            for i in cython.parallel.prange(...):  -> ParallelRangeNode
+                ...
+    """
+
+    # a list of names, maps 'cython.parallel.prange' in the code to
+    # ['cython', 'parallel', 'prange']
+    parallel_directive = None
+
+    # Indicates whether a namenode in an expression is the cython module
+    namenode_is_cython_module = False
+
+    # Keep track of whether we are the context manager of a 'with' statement
+    in_context_manager_section = False
+
+    # One of 'prange' or 'with parallel'. This is used to disallow closely
+    # nested 'with parallel:' blocks
+    state = None
+
+    directive_to_node = {
+        u"cython.parallel.parallel": Nodes.ParallelWithBlockNode,
+        # u"cython.parallel.threadsavailable": ExprNodes.ParallelThreadsAvailableNode,
+        u"cython.parallel.threadid": ExprNodes.ParallelThreadIdNode,
+        u"cython.parallel.prange": Nodes.ParallelRangeNode,
+    }
+
+    def node_is_parallel_directive(self, node):
+        return node.name in self.parallel_directives or node.is_cython_module
+
+    def get_directive_class_node(self, node):
+        """
+        Figure out which parallel directive was used and return the associated
+        Node class.
+
+        E.g. for a cython.parallel.prange() call we return ParallelRangeNode
+        """
+        if self.namenode_is_cython_module:
+            directive = '.'.join(self.parallel_directive)
+        else:
+            directive = self.parallel_directives[self.parallel_directive[0]]
+            directive = '%s.%s' % (directive,
+                                   '.'.join(self.parallel_directive[1:]))
+            directive = directive.rstrip('.')
+
+        cls = self.directive_to_node.get(directive)
+        if cls is None and not (self.namenode_is_cython_module and
+                                self.parallel_directive[0] != 'parallel'):
+            error(node.pos, "Invalid directive: %s" % directive)
+
+        self.namenode_is_cython_module = False
+        self.parallel_directive = None
+
+        return cls
+
+    def visit_ModuleNode(self, node):
+        """
+        If any parallel directives were imported, copy them over and visit
+        the AST
+        """
+        if node.parallel_directives:
+            self.parallel_directives = node.parallel_directives
+            return self.visit_Node(node)
+
+        # No parallel directives were imported, so they can't be used :)
+        return node
+
+    def visit_NameNode(self, node):
+        if self.node_is_parallel_directive(node):
+            self.parallel_directive = [node.name]
+            self.namenode_is_cython_module = node.is_cython_module
+        return node
+
+    def visit_AttributeNode(self, node):
+        self.visitchildren(node)
+        if self.parallel_directive:
+            self.parallel_directive.append(node.attribute)
+        return node
+
+    def visit_CallNode(self, node):
+        self.visit(node.function)
+        if not self.parallel_directive:
             self.visitchildren(node, exclude=('function',))
-            return node 
- 
-        # We are a parallel directive, replace this node with the 
-        # corresponding ParallelSomethingSomething node 
- 
-        if isinstance(node, ExprNodes.GeneralCallNode): 
-            args = node.positional_args.args 
-            kwargs = node.keyword_args 
-        else: 
-            args = node.args 
-            kwargs = {} 
- 
-        parallel_directive_class = self.get_directive_class_node(node) 
-        if parallel_directive_class: 
-            # Note: in case of a parallel() the body is set by 
-            # visit_WithStatNode 
-            node = parallel_directive_class(node.pos, args=args, kwargs=kwargs) 
- 
-        return node 
- 
-    def visit_WithStatNode(self, node): 
-        "Rewrite with cython.parallel.parallel() blocks" 
-        newnode = self.visit(node.manager) 
- 
-        if isinstance(newnode, Nodes.ParallelWithBlockNode): 
-            if self.state == 'parallel with': 
-                error(node.manager.pos, 
-                      "Nested parallel with blocks are disallowed") 
- 
-            self.state = 'parallel with' 
-            body = self.visit(node.body) 
-            self.state = None 
- 
-            newnode.body = body 
-            return newnode 
-        elif self.parallel_directive: 
-            parallel_directive_class = self.get_directive_class_node(node) 
- 
-            if not parallel_directive_class: 
-                # There was an error, stop here and now 
-                return None 
- 
-            if parallel_directive_class is Nodes.ParallelWithBlockNode: 
-                error(node.pos, "The parallel directive must be called") 
-                return None 
- 
-        node.body = self.visit(node.body) 
-        return node 
- 
-    def visit_ForInStatNode(self, node): 
-        "Rewrite 'for i in cython.parallel.prange(...):'" 
-        self.visit(node.iterator) 
-        self.visit(node.target) 
- 
-        in_prange = isinstance(node.iterator.sequence, 
-                               Nodes.ParallelRangeNode) 
-        previous_state = self.state 
- 
-        if in_prange: 
-            # This will replace the entire ForInStatNode, so copy the 
-            # attributes 
-            parallel_range_node = node.iterator.sequence 
- 
-            parallel_range_node.target = node.target 
-            parallel_range_node.body = node.body 
-            parallel_range_node.else_clause = node.else_clause 
- 
-            node = parallel_range_node 
- 
-            if not isinstance(node.target, ExprNodes.NameNode): 
-                error(node.target.pos, 
-                      "Can only iterate over an iteration variable") 
- 
-            self.state = 'prange' 
- 
-        self.visit(node.body) 
-        self.state = previous_state 
-        self.visit(node.else_clause) 
-        return node 
- 
-    def visit(self, node): 
-        "Visit a node that may be None" 
-        if node is not None: 
-            return super(ParallelRangeTransform, self).visit(node) 
- 
- 
-class WithTransform(CythonTransform, SkipDeclarations): 
-    def visit_WithStatNode(self, node): 
-        self.visitchildren(node, 'body') 
-        pos = node.pos 
+            return node
+
+        # We are a parallel directive, replace this node with the
+        # corresponding ParallelSomethingSomething node
+
+        if isinstance(node, ExprNodes.GeneralCallNode):
+            args = node.positional_args.args
+            kwargs = node.keyword_args
+        else:
+            args = node.args
+            kwargs = {}
+
+        parallel_directive_class = self.get_directive_class_node(node)
+        if parallel_directive_class:
+            # Note: in case of a parallel() the body is set by
+            # visit_WithStatNode
+            node = parallel_directive_class(node.pos, args=args, kwargs=kwargs)
+
+        return node
+
+    def visit_WithStatNode(self, node):
+        "Rewrite with cython.parallel.parallel() blocks"
+        newnode = self.visit(node.manager)
+
+        if isinstance(newnode, Nodes.ParallelWithBlockNode):
+            if self.state == 'parallel with':
+                error(node.manager.pos,
+                      "Nested parallel with blocks are disallowed")
+
+            self.state = 'parallel with'
+            body = self.visit(node.body)
+            self.state = None
+
+            newnode.body = body
+            return newnode
+        elif self.parallel_directive:
+            parallel_directive_class = self.get_directive_class_node(node)
+
+            if not parallel_directive_class:
+                # There was an error, stop here and now
+                return None
+
+            if parallel_directive_class is Nodes.ParallelWithBlockNode:
+                error(node.pos, "The parallel directive must be called")
+                return None
+
+        node.body = self.visit(node.body)
+        return node
+
+    def visit_ForInStatNode(self, node):
+        "Rewrite 'for i in cython.parallel.prange(...):'"
+        self.visit(node.iterator)
+        self.visit(node.target)
+
+        in_prange = isinstance(node.iterator.sequence,
+                               Nodes.ParallelRangeNode)
+        previous_state = self.state
+
+        if in_prange:
+            # This will replace the entire ForInStatNode, so copy the
+            # attributes
+            parallel_range_node = node.iterator.sequence
+
+            parallel_range_node.target = node.target
+            parallel_range_node.body = node.body
+            parallel_range_node.else_clause = node.else_clause
+
+            node = parallel_range_node
+
+            if not isinstance(node.target, ExprNodes.NameNode):
+                error(node.target.pos,
+                      "Can only iterate over an iteration variable")
+
+            self.state = 'prange'
+
+        self.visit(node.body)
+        self.state = previous_state
+        self.visit(node.else_clause)
+        return node
+
+    def visit(self, node):
+        "Visit a node that may be None"
+        if node is not None:
+            return super(ParallelRangeTransform, self).visit(node)
+
+
+class WithTransform(CythonTransform, SkipDeclarations):
+    def visit_WithStatNode(self, node):
+        self.visitchildren(node, 'body')
+        pos = node.pos
         is_async = node.is_async
-        body, target, manager = node.body, node.target, node.manager 
-        node.enter_call = ExprNodes.SimpleCallNode( 
-            pos, function=ExprNodes.AttributeNode( 
-                pos, obj=ExprNodes.CloneNode(manager), 
+        body, target, manager = node.body, node.target, node.manager
+        node.enter_call = ExprNodes.SimpleCallNode(
+            pos, function=ExprNodes.AttributeNode(
+                pos, obj=ExprNodes.CloneNode(manager),
                 attribute=EncodedString('__aenter__' if is_async else '__enter__'),
-                is_special_lookup=True), 
-            args=[], 
-            is_temp=True) 
- 
+                is_special_lookup=True),
+            args=[],
+            is_temp=True)
+
         if is_async:
             node.enter_call = ExprNodes.AwaitExprNode(pos, arg=node.enter_call)
 
-        if target is not None: 
-            body = Nodes.StatListNode( 
-                pos, stats=[ 
-                    Nodes.WithTargetAssignmentStatNode( 
-                        pos, lhs=target, with_node=node), 
-                    body]) 
- 
-        excinfo_target = ExprNodes.TupleNode(pos, slow=True, args=[ 
-            ExprNodes.ExcValueNode(pos) for _ in range(3)]) 
-        except_clause = Nodes.ExceptClauseNode( 
-            pos, body=Nodes.IfStatNode( 
-                pos, if_clauses=[ 
-                    Nodes.IfClauseNode( 
-                        pos, condition=ExprNodes.NotNode( 
-                            pos, operand=ExprNodes.WithExitCallNode( 
-                                pos, with_stat=node, 
-                                test_if_run=False, 
+        if target is not None:
+            body = Nodes.StatListNode(
+                pos, stats=[
+                    Nodes.WithTargetAssignmentStatNode(
+                        pos, lhs=target, with_node=node),
+                    body])
+
+        excinfo_target = ExprNodes.TupleNode(pos, slow=True, args=[
+            ExprNodes.ExcValueNode(pos) for _ in range(3)])
+        except_clause = Nodes.ExceptClauseNode(
+            pos, body=Nodes.IfStatNode(
+                pos, if_clauses=[
+                    Nodes.IfClauseNode(
+                        pos, condition=ExprNodes.NotNode(
+                            pos, operand=ExprNodes.WithExitCallNode(
+                                pos, with_stat=node,
+                                test_if_run=False,
                                 args=excinfo_target,
                                 await_expr=ExprNodes.AwaitExprNode(pos, arg=None) if is_async else None)),
-                        body=Nodes.ReraiseStatNode(pos), 
-                    ), 
-                ], 
-                else_clause=None), 
-            pattern=None, 
-            target=None, 
-            excinfo_target=excinfo_target, 
-        ) 
- 
-        node.body = Nodes.TryFinallyStatNode( 
-            pos, body=Nodes.TryExceptStatNode( 
-                pos, body=body, 
-                except_clauses=[except_clause], 
-                else_clause=None, 
-            ), 
-            finally_clause=Nodes.ExprStatNode( 
-                pos, expr=ExprNodes.WithExitCallNode( 
-                    pos, with_stat=node, 
-                    test_if_run=True, 
-                    args=ExprNodes.TupleNode( 
+                        body=Nodes.ReraiseStatNode(pos),
+                    ),
+                ],
+                else_clause=None),
+            pattern=None,
+            target=None,
+            excinfo_target=excinfo_target,
+        )
+
+        node.body = Nodes.TryFinallyStatNode(
+            pos, body=Nodes.TryExceptStatNode(
+                pos, body=body,
+                except_clauses=[except_clause],
+                else_clause=None,
+            ),
+            finally_clause=Nodes.ExprStatNode(
+                pos, expr=ExprNodes.WithExitCallNode(
+                    pos, with_stat=node,
+                    test_if_run=True,
+                    args=ExprNodes.TupleNode(
                         pos, args=[ExprNodes.NoneNode(pos) for _ in range(3)]),
                     await_expr=ExprNodes.AwaitExprNode(pos, arg=None) if is_async else None)),
-            handle_error_case=False, 
-        ) 
-        return node 
- 
-    def visit_ExprNode(self, node): 
-        # With statements are never inside expressions. 
-        return node 
- 
- 
-class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations): 
+            handle_error_case=False,
+        )
+        return node
+
+    def visit_ExprNode(self, node):
+        # With statements are never inside expressions.
+        return node
+
+
+class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
     """
     Transforms method decorators in cdef classes into nested calls or properties.
- 
+
     Python-style decorator properties are transformed into a PropertyNode
     with up to the three getter, setter and deleter DefNodes.
     The functional style isn't supported yet.
-    """ 
+    """
     _properties = None
- 
+
     _map_property_attribute = {
         'getter': '__get__',
         'setter': '__set__',
@@ -1346,11 +1346,11 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
         return node
 
     def visit_DefNode(self, node):
-        scope_type = self.scope_type 
+        scope_type = self.scope_type
         node = self.visit_FuncDefNode(node)
         if scope_type != 'cclass' or not node.decorators:
             return node
- 
+
         # transform @property decorators
         properties = self._properties[-1]
         for decorator_node in node.decorators[::-1]:
@@ -1432,105 +1432,105 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
         The IndirectionNode allows DefNode to override the decorator.
         """
         decorator_result = ExprNodes.NameNode(node.pos, name=name)
-        for decorator in decorators[::-1]: 
-            decorator_result = ExprNodes.SimpleCallNode( 
-                decorator.pos, 
+        for decorator in decorators[::-1]:
+            decorator_result = ExprNodes.SimpleCallNode(
+                decorator.pos,
                 function=decorator.decorator,
                 args=[decorator_result])
- 
+
         name_node = ExprNodes.NameNode(node.pos, name=name)
-        reassignment = Nodes.SingleAssignmentNode( 
-            node.pos, 
+        reassignment = Nodes.SingleAssignmentNode(
+            node.pos,
             lhs=name_node,
             rhs=decorator_result)
- 
-        reassignment = Nodes.IndirectionNode([reassignment]) 
-        node.decorator_indirection = reassignment 
-        return [node, reassignment] 
- 
-
-class CnameDirectivesTransform(CythonTransform, SkipDeclarations): 
-    """ 
-    Only part of the CythonUtilityCode pipeline. Must be run before 
-    DecoratorTransform in case this is a decorator for a cdef class. 
-    It filters out @cname('my_cname') decorators and rewrites them to 
-    CnameDecoratorNodes. 
-    """ 
- 
-    def handle_function(self, node): 
-        if not getattr(node, 'decorators', None): 
-            return self.visit_Node(node) 
- 
-        for i, decorator in enumerate(node.decorators): 
-            decorator = decorator.decorator 
- 
-            if (isinstance(decorator, ExprNodes.CallNode) and 
-                    decorator.function.is_name and 
-                    decorator.function.name == 'cname'): 
-                args, kwargs = decorator.explicit_args_kwds() 
- 
-                if kwargs: 
-                    raise AssertionError( 
-                            "cname decorator does not take keyword arguments") 
- 
-                if len(args) != 1: 
-                    raise AssertionError( 
-                            "cname decorator takes exactly one argument") 
- 
-                if not (args[0].is_literal and 
-                        args[0].type == Builtin.str_type): 
-                    raise AssertionError( 
-                            "argument to cname decorator must be a string literal") 
- 
+
+        reassignment = Nodes.IndirectionNode([reassignment])
+        node.decorator_indirection = reassignment
+        return [node, reassignment]
+
+
+class CnameDirectivesTransform(CythonTransform, SkipDeclarations):
+    """
+    Only part of the CythonUtilityCode pipeline. Must be run before
+    DecoratorTransform in case this is a decorator for a cdef class.
+    It filters out @cname('my_cname') decorators and rewrites them to
+    CnameDecoratorNodes.
+    """
+
+    def handle_function(self, node):
+        if not getattr(node, 'decorators', None):
+            return self.visit_Node(node)
+
+        for i, decorator in enumerate(node.decorators):
+            decorator = decorator.decorator
+
+            if (isinstance(decorator, ExprNodes.CallNode) and
+                    decorator.function.is_name and
+                    decorator.function.name == 'cname'):
+                args, kwargs = decorator.explicit_args_kwds()
+
+                if kwargs:
+                    raise AssertionError(
+                            "cname decorator does not take keyword arguments")
+
+                if len(args) != 1:
+                    raise AssertionError(
+                            "cname decorator takes exactly one argument")
+
+                if not (args[0].is_literal and
+                        args[0].type == Builtin.str_type):
+                    raise AssertionError(
+                            "argument to cname decorator must be a string literal")
+
                 cname = args[0].compile_time_value(None)
-                del node.decorators[i] 
-                node = Nodes.CnameDecoratorNode(pos=node.pos, node=node, 
-                                                cname=cname) 
-                break 
- 
-        return self.visit_Node(node) 
- 
-    visit_FuncDefNode = handle_function 
-    visit_CClassDefNode = handle_function 
-    visit_CEnumDefNode = handle_function 
-    visit_CStructOrUnionDefNode = handle_function 
- 
- 
-class ForwardDeclareTypes(CythonTransform): 
- 
-    def visit_CompilerDirectivesNode(self, node): 
-        env = self.module_scope 
-        old = env.directives 
-        env.directives = node.directives 
-        self.visitchildren(node) 
-        env.directives = old 
-        return node 
- 
-    def visit_ModuleNode(self, node): 
-        self.module_scope = node.scope 
-        self.module_scope.directives = node.directives 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CDefExternNode(self, node): 
-        old_cinclude_flag = self.module_scope.in_cinclude 
-        self.module_scope.in_cinclude = 1 
-        self.visitchildren(node) 
-        self.module_scope.in_cinclude = old_cinclude_flag 
-        return node 
- 
-    def visit_CEnumDefNode(self, node): 
-        node.declare(self.module_scope) 
-        return node 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        if node.name not in self.module_scope.entries: 
-            node.declare(self.module_scope) 
-        return node 
- 
-    def visit_CClassDefNode(self, node): 
-        if node.class_name not in self.module_scope.entries: 
-            node.declare(self.module_scope) 
+                del node.decorators[i]
+                node = Nodes.CnameDecoratorNode(pos=node.pos, node=node,
+                                                cname=cname)
+                break
+
+        return self.visit_Node(node)
+
+    visit_FuncDefNode = handle_function
+    visit_CClassDefNode = handle_function
+    visit_CEnumDefNode = handle_function
+    visit_CStructOrUnionDefNode = handle_function
+
+
+class ForwardDeclareTypes(CythonTransform):
+
+    def visit_CompilerDirectivesNode(self, node):
+        env = self.module_scope
+        old = env.directives
+        env.directives = node.directives
+        self.visitchildren(node)
+        env.directives = old
+        return node
+
+    def visit_ModuleNode(self, node):
+        self.module_scope = node.scope
+        self.module_scope.directives = node.directives
+        self.visitchildren(node)
+        return node
+
+    def visit_CDefExternNode(self, node):
+        old_cinclude_flag = self.module_scope.in_cinclude
+        self.module_scope.in_cinclude = 1
+        self.visitchildren(node)
+        self.module_scope.in_cinclude = old_cinclude_flag
+        return node
+
+    def visit_CEnumDefNode(self, node):
+        node.declare(self.module_scope)
+        return node
+
+    def visit_CStructOrUnionDefNode(self, node):
+        if node.name not in self.module_scope.entries:
+            node.declare(self.module_scope)
+        return node
+
+    def visit_CClassDefNode(self, node):
+        if node.class_name not in self.module_scope.entries:
+            node.declare(self.module_scope)
         # Expand fused methods of .pxd declared types to construct the final vtable order.
         type = self.module_scope.entries[node.class_name].type
         if type is not None and type.is_extension_type and not type.is_builtin_type and type.scope:
@@ -1538,104 +1538,104 @@ class ForwardDeclareTypes(CythonTransform):
             for entry in scope.cfunc_entries:
                 if entry.type and entry.type.is_fused:
                     entry.type.get_all_specialized_function_types()
-        return node 
- 
- 
-class AnalyseDeclarationsTransform(EnvTransform): 
- 
-    basic_property = TreeFragment(u""" 
-property NAME: 
-    def __get__(self): 
-        return ATTR 
-    def __set__(self, value): 
-        ATTR = value 
-    """, level='c_class', pipeline=[NormalizeTree(None)]) 
-    basic_pyobject_property = TreeFragment(u""" 
-property NAME: 
-    def __get__(self): 
-        return ATTR 
-    def __set__(self, value): 
-        ATTR = value 
-    def __del__(self): 
-        ATTR = None 
-    """, level='c_class', pipeline=[NormalizeTree(None)]) 
-    basic_property_ro = TreeFragment(u""" 
-property NAME: 
-    def __get__(self): 
-        return ATTR 
-    """, level='c_class', pipeline=[NormalizeTree(None)]) 
- 
-    struct_or_union_wrapper = TreeFragment(u""" 
-cdef class NAME: 
-    cdef TYPE value 
-    def __init__(self, MEMBER=None): 
-        cdef int count 
-        count = 0 
-        INIT_ASSIGNMENTS 
-        if IS_UNION and count > 1: 
-            raise ValueError, "At most one union member should be specified." 
-    def __str__(self): 
-        return STR_FORMAT % MEMBER_TUPLE 
-    def __repr__(self): 
-        return REPR_FORMAT % MEMBER_TUPLE 
-    """, pipeline=[NormalizeTree(None)]) 
- 
-    init_assignment = TreeFragment(u""" 
-if VALUE is not None: 
-    ATTR = VALUE 
-    count += 1 
-    """, pipeline=[NormalizeTree(None)]) 
- 
-    fused_function = None 
-    in_lambda = 0 
- 
-    def __call__(self, root): 
-        # needed to determine if a cdef var is declared after it's used. 
-        self.seen_vars_stack = [] 
-        self.fused_error_funcs = set() 
-        super_class = super(AnalyseDeclarationsTransform, self) 
-        self._super_visit_FuncDefNode = super_class.visit_FuncDefNode 
-        return super_class.__call__(root) 
- 
-    def visit_NameNode(self, node): 
-        self.seen_vars_stack[-1].add(node.name) 
-        return node 
- 
-    def visit_ModuleNode(self, node): 
+        return node
+
+
+class AnalyseDeclarationsTransform(EnvTransform):
+
+    basic_property = TreeFragment(u"""
+property NAME:
+    def __get__(self):
+        return ATTR
+    def __set__(self, value):
+        ATTR = value
+    """, level='c_class', pipeline=[NormalizeTree(None)])
+    basic_pyobject_property = TreeFragment(u"""
+property NAME:
+    def __get__(self):
+        return ATTR
+    def __set__(self, value):
+        ATTR = value
+    def __del__(self):
+        ATTR = None
+    """, level='c_class', pipeline=[NormalizeTree(None)])
+    basic_property_ro = TreeFragment(u"""
+property NAME:
+    def __get__(self):
+        return ATTR
+    """, level='c_class', pipeline=[NormalizeTree(None)])
+
+    struct_or_union_wrapper = TreeFragment(u"""
+cdef class NAME:
+    cdef TYPE value
+    def __init__(self, MEMBER=None):
+        cdef int count
+        count = 0
+        INIT_ASSIGNMENTS
+        if IS_UNION and count > 1:
+            raise ValueError, "At most one union member should be specified."
+    def __str__(self):
+        return STR_FORMAT % MEMBER_TUPLE
+    def __repr__(self):
+        return REPR_FORMAT % MEMBER_TUPLE
+    """, pipeline=[NormalizeTree(None)])
+
+    init_assignment = TreeFragment(u"""
+if VALUE is not None:
+    ATTR = VALUE
+    count += 1
+    """, pipeline=[NormalizeTree(None)])
+
+    fused_function = None
+    in_lambda = 0
+
+    def __call__(self, root):
+        # needed to determine if a cdef var is declared after it's used.
+        self.seen_vars_stack = []
+        self.fused_error_funcs = set()
+        super_class = super(AnalyseDeclarationsTransform, self)
+        self._super_visit_FuncDefNode = super_class.visit_FuncDefNode
+        return super_class.__call__(root)
+
+    def visit_NameNode(self, node):
+        self.seen_vars_stack[-1].add(node.name)
+        return node
+
+    def visit_ModuleNode(self, node):
         # Pickling support requires injecting module-level nodes.
         self.extra_module_declarations = []
-        self.seen_vars_stack.append(set()) 
-        node.analyse_declarations(self.current_env()) 
-        self.visitchildren(node) 
-        self.seen_vars_stack.pop() 
+        self.seen_vars_stack.append(set())
+        node.analyse_declarations(self.current_env())
+        self.visitchildren(node)
+        self.seen_vars_stack.pop()
         node.body.stats.extend(self.extra_module_declarations)
-        return node 
- 
-    def visit_LambdaNode(self, node): 
-        self.in_lambda += 1 
-        node.analyse_declarations(self.current_env()) 
-        self.visitchildren(node) 
-        self.in_lambda -= 1 
-        return node 
- 
-    def visit_CClassDefNode(self, node): 
-        node = self.visit_ClassDefNode(node) 
-        if node.scope and node.scope.implemented and node.body: 
-            stats = [] 
-            for entry in node.scope.var_entries: 
-                if entry.needs_property: 
-                    property = self.create_Property(entry) 
-                    property.analyse_declarations(node.scope) 
-                    self.visit(property) 
-                    stats.append(property) 
-            if stats: 
-                node.body.stats += stats 
+        return node
+
+    def visit_LambdaNode(self, node):
+        self.in_lambda += 1
+        node.analyse_declarations(self.current_env())
+        self.visitchildren(node)
+        self.in_lambda -= 1
+        return node
+
+    def visit_CClassDefNode(self, node):
+        node = self.visit_ClassDefNode(node)
+        if node.scope and node.scope.implemented and node.body:
+            stats = []
+            for entry in node.scope.var_entries:
+                if entry.needs_property:
+                    property = self.create_Property(entry)
+                    property.analyse_declarations(node.scope)
+                    self.visit(property)
+                    stats.append(property)
+            if stats:
+                node.body.stats += stats
             if (node.visibility != 'extern'
                 and not node.scope.lookup('__reduce__')
                 and not node.scope.lookup('__reduce_ex__')):
                 self._inject_pickle_methods(node)
-        return node 
- 
+        return node
+
     def _inject_pickle_methods(self, node):
         env = self.current_env()
         if node.scope.directives['auto_pickle'] is False:   # None means attempt it.
@@ -1769,409 +1769,409 @@ if VALUE is not None:
             self.exit_scope()
             node.body.stats.append(pickle_func)
 
-    def _handle_fused_def_decorators(self, old_decorators, env, node): 
-        """ 
-        Create function calls to the decorators and reassignments to 
-        the function. 
-        """ 
-        # Delete staticmethod and classmethod decorators, this is 
-        # handled directly by the fused function object. 
-        decorators = [] 
-        for decorator in old_decorators: 
-            func = decorator.decorator 
-            if (not func.is_name or 
-                func.name not in ('staticmethod', 'classmethod') or 
-                env.lookup_here(func.name)): 
-                # not a static or classmethod 
-                decorators.append(decorator) 
- 
-        if decorators: 
-            transform = DecoratorTransform(self.context) 
-            def_node = node.node 
+    def _handle_fused_def_decorators(self, old_decorators, env, node):
+        """
+        Create function calls to the decorators and reassignments to
+        the function.
+        """
+        # Delete staticmethod and classmethod decorators, this is
+        # handled directly by the fused function object.
+        decorators = []
+        for decorator in old_decorators:
+            func = decorator.decorator
+            if (not func.is_name or
+                func.name not in ('staticmethod', 'classmethod') or
+                env.lookup_here(func.name)):
+                # not a static or classmethod
+                decorators.append(decorator)
+
+        if decorators:
+            transform = DecoratorTransform(self.context)
+            def_node = node.node
             _, reassignments = transform.chain_decorators(
-                def_node, decorators, def_node.name) 
-            reassignments.analyse_declarations(env) 
-            node = [node, reassignments] 
- 
-        return node 
- 
-    def _handle_def(self, decorators, env, node): 
-        "Handle def or cpdef fused functions" 
-        # Create PyCFunction nodes for each specialization 
-        node.stats.insert(0, node.py_func) 
-        node.py_func = self.visit(node.py_func) 
-        node.update_fused_defnode_entry(env) 
+                def_node, decorators, def_node.name)
+            reassignments.analyse_declarations(env)
+            node = [node, reassignments]
+
+        return node
+
+    def _handle_def(self, decorators, env, node):
+        "Handle def or cpdef fused functions"
+        # Create PyCFunction nodes for each specialization
+        node.stats.insert(0, node.py_func)
+        node.py_func = self.visit(node.py_func)
+        node.update_fused_defnode_entry(env)
         pycfunc = ExprNodes.PyCFunctionNode.from_defnode(node.py_func, binding=True)
-        pycfunc = ExprNodes.ProxyNode(pycfunc.coerce_to_temp(env)) 
-        node.resulting_fused_function = pycfunc 
-        # Create assignment node for our def function 
-        node.fused_func_assignment = self._create_assignment( 
-            node.py_func, ExprNodes.CloneNode(pycfunc), env) 
- 
-        if decorators: 
-            node = self._handle_fused_def_decorators(decorators, env, node) 
- 
-        return node 
- 
-    def _create_fused_function(self, env, node): 
-        "Create a fused function for a DefNode with fused arguments" 
-        from . import FusedNode 
- 
-        if self.fused_function or self.in_lambda: 
-            if self.fused_function not in self.fused_error_funcs: 
-                if self.in_lambda: 
-                    error(node.pos, "Fused lambdas not allowed") 
-                else: 
-                    error(node.pos, "Cannot nest fused functions") 
- 
-            self.fused_error_funcs.add(self.fused_function) 
- 
-            node.body = Nodes.PassStatNode(node.pos) 
-            for arg in node.args: 
-                if arg.type.is_fused: 
-                    arg.type = arg.type.get_fused_types()[0] 
- 
-            return node 
- 
-        decorators = getattr(node, 'decorators', None) 
-        node = FusedNode.FusedCFuncDefNode(node, env) 
-        self.fused_function = node 
-        self.visitchildren(node) 
-        self.fused_function = None 
-        if node.py_func: 
-            node = self._handle_def(decorators, env, node) 
- 
-        return node 
- 
-    def _handle_nogil_cleanup(self, lenv, node): 
-        "Handle cleanup for 'with gil' blocks in nogil functions." 
-        if lenv.nogil and lenv.has_with_gil_block: 
-            # Acquire the GIL for cleanup in 'nogil' functions, by wrapping 
-            # the entire function body in try/finally. 
-            # The corresponding release will be taken care of by 
-            # Nodes.FuncDefNode.generate_function_definitions() 
-            node.body = Nodes.NogilTryFinallyStatNode( 
-                node.body.pos, 
-                body=node.body, 
+        pycfunc = ExprNodes.ProxyNode(pycfunc.coerce_to_temp(env))
+        node.resulting_fused_function = pycfunc
+        # Create assignment node for our def function
+        node.fused_func_assignment = self._create_assignment(
+            node.py_func, ExprNodes.CloneNode(pycfunc), env)
+
+        if decorators:
+            node = self._handle_fused_def_decorators(decorators, env, node)
+
+        return node
+
+    def _create_fused_function(self, env, node):
+        "Create a fused function for a DefNode with fused arguments"
+        from . import FusedNode
+
+        if self.fused_function or self.in_lambda:
+            if self.fused_function not in self.fused_error_funcs:
+                if self.in_lambda:
+                    error(node.pos, "Fused lambdas not allowed")
+                else:
+                    error(node.pos, "Cannot nest fused functions")
+
+            self.fused_error_funcs.add(self.fused_function)
+
+            node.body = Nodes.PassStatNode(node.pos)
+            for arg in node.args:
+                if arg.type.is_fused:
+                    arg.type = arg.type.get_fused_types()[0]
+
+            return node
+
+        decorators = getattr(node, 'decorators', None)
+        node = FusedNode.FusedCFuncDefNode(node, env)
+        self.fused_function = node
+        self.visitchildren(node)
+        self.fused_function = None
+        if node.py_func:
+            node = self._handle_def(decorators, env, node)
+
+        return node
+
+    def _handle_nogil_cleanup(self, lenv, node):
+        "Handle cleanup for 'with gil' blocks in nogil functions."
+        if lenv.nogil and lenv.has_with_gil_block:
+            # Acquire the GIL for cleanup in 'nogil' functions, by wrapping
+            # the entire function body in try/finally.
+            # The corresponding release will be taken care of by
+            # Nodes.FuncDefNode.generate_function_definitions()
+            node.body = Nodes.NogilTryFinallyStatNode(
+                node.body.pos,
+                body=node.body,
                 finally_clause=Nodes.EnsureGILNode(node.body.pos),
                 finally_except_clause=Nodes.EnsureGILNode(node.body.pos))
- 
-    def _handle_fused(self, node): 
-        if node.is_generator and node.has_fused_arguments: 
-            node.has_fused_arguments = False 
-            error(node.pos, "Fused generators not supported") 
-            node.gbody = Nodes.StatListNode(node.pos, 
-                                            stats=[], 
-                                            body=Nodes.PassStatNode(node.pos)) 
- 
-        return node.has_fused_arguments 
- 
-    def visit_FuncDefNode(self, node): 
-        """ 
+
+    def _handle_fused(self, node):
+        if node.is_generator and node.has_fused_arguments:
+            node.has_fused_arguments = False
+            error(node.pos, "Fused generators not supported")
+            node.gbody = Nodes.StatListNode(node.pos,
+                                            stats=[],
+                                            body=Nodes.PassStatNode(node.pos))
+
+        return node.has_fused_arguments
+
+    def visit_FuncDefNode(self, node):
+        """
         Analyse a function and its body, as that hasn't happened yet.  Also
-        analyse the directive_locals set by @cython.locals(). 
- 
-        Then, if we are a function with fused arguments, replace the function 
-        (after it has declared itself in the symbol table!) with a 
-        FusedCFuncDefNode, and analyse its children (which are in turn normal 
-        functions). If we're a normal function, just analyse the body of the 
-        function. 
-        """ 
-        env = self.current_env() 
- 
-        self.seen_vars_stack.append(set()) 
-        lenv = node.local_scope 
-        node.declare_arguments(lenv) 
- 
-        # @cython.locals(...) 
-        for var, type_node in node.directive_locals.items(): 
-            if not lenv.lookup_here(var):   # don't redeclare args 
-                type = type_node.analyse_as_type(lenv) 
-                if type: 
-                    lenv.declare_var(var, type, type_node.pos) 
-                else: 
-                    error(type_node.pos, "Not a type") 
- 
-        if self._handle_fused(node): 
-            node = self._create_fused_function(env, node) 
-        else: 
-            node.body.analyse_declarations(lenv) 
-            self._handle_nogil_cleanup(lenv, node) 
-            self._super_visit_FuncDefNode(node) 
- 
-        self.seen_vars_stack.pop() 
-        return node 
- 
-    def visit_DefNode(self, node): 
-        node = self.visit_FuncDefNode(node) 
-        env = self.current_env() 
+        analyse the directive_locals set by @cython.locals().
+
+        Then, if we are a function with fused arguments, replace the function
+        (after it has declared itself in the symbol table!) with a
+        FusedCFuncDefNode, and analyse its children (which are in turn normal
+        functions). If we're a normal function, just analyse the body of the
+        function.
+        """
+        env = self.current_env()
+
+        self.seen_vars_stack.append(set())
+        lenv = node.local_scope
+        node.declare_arguments(lenv)
+
+        # @cython.locals(...)
+        for var, type_node in node.directive_locals.items():
+            if not lenv.lookup_here(var):   # don't redeclare args
+                type = type_node.analyse_as_type(lenv)
+                if type:
+                    lenv.declare_var(var, type, type_node.pos)
+                else:
+                    error(type_node.pos, "Not a type")
+
+        if self._handle_fused(node):
+            node = self._create_fused_function(env, node)
+        else:
+            node.body.analyse_declarations(lenv)
+            self._handle_nogil_cleanup(lenv, node)
+            self._super_visit_FuncDefNode(node)
+
+        self.seen_vars_stack.pop()
+        return node
+
+    def visit_DefNode(self, node):
+        node = self.visit_FuncDefNode(node)
+        env = self.current_env()
         if isinstance(node, Nodes.DefNode) and node.is_wrapper:
             env = env.parent_scope
-        if (not isinstance(node, Nodes.DefNode) or 
-                node.fused_py_func or node.is_generator_body or 
-                not node.needs_assignment_synthesis(env)): 
-            return node 
-        return [node, self._synthesize_assignment(node, env)] 
- 
-    def visit_GeneratorBodyDefNode(self, node): 
-        return self.visit_FuncDefNode(node) 
- 
-    def _synthesize_assignment(self, node, env): 
-        # Synthesize assignment node and put it right after defnode 
-        genv = env 
-        while genv.is_py_class_scope or genv.is_c_class_scope: 
-            genv = genv.outer_scope 
- 
-        if genv.is_closure_scope: 
-            rhs = node.py_cfunc_node = ExprNodes.InnerFunctionNode( 
-                node.pos, def_node=node, 
-                pymethdef_cname=node.entry.pymethdef_cname, 
-                code_object=ExprNodes.CodeObjectNode(node)) 
-        else: 
-            binding = self.current_directives.get('binding') 
-            rhs = ExprNodes.PyCFunctionNode.from_defnode(node, binding) 
+        if (not isinstance(node, Nodes.DefNode) or
+                node.fused_py_func or node.is_generator_body or
+                not node.needs_assignment_synthesis(env)):
+            return node
+        return [node, self._synthesize_assignment(node, env)]
+
+    def visit_GeneratorBodyDefNode(self, node):
+        return self.visit_FuncDefNode(node)
+
+    def _synthesize_assignment(self, node, env):
+        # Synthesize assignment node and put it right after defnode
+        genv = env
+        while genv.is_py_class_scope or genv.is_c_class_scope:
+            genv = genv.outer_scope
+
+        if genv.is_closure_scope:
+            rhs = node.py_cfunc_node = ExprNodes.InnerFunctionNode(
+                node.pos, def_node=node,
+                pymethdef_cname=node.entry.pymethdef_cname,
+                code_object=ExprNodes.CodeObjectNode(node))
+        else:
+            binding = self.current_directives.get('binding')
+            rhs = ExprNodes.PyCFunctionNode.from_defnode(node, binding)
             node.code_object = rhs.code_object
             if node.is_generator:
                 node.gbody.code_object = node.code_object
- 
-        if env.is_py_class_scope: 
-            rhs.binding = True 
- 
-        node.is_cyfunction = rhs.binding 
-        return self._create_assignment(node, rhs, env) 
- 
-    def _create_assignment(self, def_node, rhs, env): 
-        if def_node.decorators: 
-            for decorator in def_node.decorators[::-1]: 
-                rhs = ExprNodes.SimpleCallNode( 
-                    decorator.pos, 
-                    function = decorator.decorator, 
-                    args = [rhs]) 
-            def_node.decorators = None 
- 
-        assmt = Nodes.SingleAssignmentNode( 
-            def_node.pos, 
-            lhs=ExprNodes.NameNode(def_node.pos, name=def_node.name), 
-            rhs=rhs) 
-        assmt.analyse_declarations(env) 
-        return assmt 
- 
-    def visit_ScopedExprNode(self, node): 
-        env = self.current_env() 
-        node.analyse_declarations(env) 
-        # the node may or may not have a local scope 
-        if node.has_local_scope: 
-            self.seen_vars_stack.append(set(self.seen_vars_stack[-1])) 
-            self.enter_scope(node, node.expr_scope) 
-            node.analyse_scoped_declarations(node.expr_scope) 
-            self.visitchildren(node) 
-            self.exit_scope() 
-            self.seen_vars_stack.pop() 
-        else: 
-            node.analyse_scoped_declarations(env) 
-            self.visitchildren(node) 
-        return node 
- 
-    def visit_TempResultFromStatNode(self, node): 
-        self.visitchildren(node) 
-        node.analyse_declarations(self.current_env()) 
-        return node 
- 
-    def visit_CppClassNode(self, node): 
-        if node.visibility == 'extern': 
-            return None 
-        else: 
-            return self.visit_ClassDefNode(node) 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        # Create a wrapper node if needed. 
-        # We want to use the struct type information (so it can't happen 
-        # before this phase) but also create new objects to be declared 
-        # (so it can't happen later). 
-        # Note that we don't return the original node, as it is 
-        # never used after this phase. 
-        if True: # private (default) 
-            return None 
- 
-        self_value = ExprNodes.AttributeNode( 
-            pos = node.pos, 
-            obj = ExprNodes.NameNode(pos=node.pos, name=u"self"), 
-            attribute = EncodedString(u"value")) 
-        var_entries = node.entry.type.scope.var_entries 
-        attributes = [] 
-        for entry in var_entries: 
-            attributes.append(ExprNodes.AttributeNode(pos = entry.pos, 
-                                                      obj = self_value, 
-                                                      attribute = entry.name)) 
-        # __init__ assignments 
-        init_assignments = [] 
-        for entry, attr in zip(var_entries, attributes): 
-            # TODO: branch on visibility 
-            init_assignments.append(self.init_assignment.substitute({ 
-                    u"VALUE": ExprNodes.NameNode(entry.pos, name = entry.name), 
-                    u"ATTR": attr, 
-                }, pos = entry.pos)) 
- 
-        # create the class 
-        str_format = u"%s(%s)" % (node.entry.type.name, ("%s, " * len(attributes))[:-2]) 
-        wrapper_class = self.struct_or_union_wrapper.substitute({ 
-            u"INIT_ASSIGNMENTS": Nodes.StatListNode(node.pos, stats = init_assignments), 
-            u"IS_UNION": ExprNodes.BoolNode(node.pos, value = not node.entry.type.is_struct), 
-            u"MEMBER_TUPLE": ExprNodes.TupleNode(node.pos, args=attributes), 
-            u"STR_FORMAT": ExprNodes.StringNode(node.pos, value = EncodedString(str_format)), 
-            u"REPR_FORMAT": ExprNodes.StringNode(node.pos, value = EncodedString(str_format.replace("%s", "%r"))), 
-        }, pos = node.pos).stats[0] 
-        wrapper_class.class_name = node.name 
-        wrapper_class.shadow = True 
-        class_body = wrapper_class.body.stats 
- 
-        # fix value type 
-        assert isinstance(class_body[0].base_type, Nodes.CSimpleBaseTypeNode) 
-        class_body[0].base_type.name = node.name 
- 
-        # fix __init__ arguments 
-        init_method = class_body[1] 
-        assert isinstance(init_method, Nodes.DefNode) and init_method.name == '__init__' 
-        arg_template = init_method.args[1] 
-        if not node.entry.type.is_struct: 
-            arg_template.kw_only = True 
-        del init_method.args[1] 
-        for entry, attr in zip(var_entries, attributes): 
-            arg = copy.deepcopy(arg_template) 
-            arg.declarator.name = entry.name 
-            init_method.args.append(arg) 
- 
-        # setters/getters 
-        for entry, attr in zip(var_entries, attributes): 
-            # TODO: branch on visibility 
-            if entry.type.is_pyobject: 
-                template = self.basic_pyobject_property 
-            else: 
-                template = self.basic_property 
-            property = template.substitute({ 
-                    u"ATTR": attr, 
-                }, pos = entry.pos).stats[0] 
-            property.name = entry.name 
-            wrapper_class.body.stats.append(property) 
- 
-        wrapper_class.analyse_declarations(self.current_env()) 
-        return self.visit_CClassDefNode(wrapper_class) 
- 
-    # Some nodes are no longer needed after declaration 
-    # analysis and can be dropped. The analysis was performed 
+
+        if env.is_py_class_scope:
+            rhs.binding = True
+
+        node.is_cyfunction = rhs.binding
+        return self._create_assignment(node, rhs, env)
+
+    def _create_assignment(self, def_node, rhs, env):
+        if def_node.decorators:
+            for decorator in def_node.decorators[::-1]:
+                rhs = ExprNodes.SimpleCallNode(
+                    decorator.pos,
+                    function = decorator.decorator,
+                    args = [rhs])
+            def_node.decorators = None
+
+        assmt = Nodes.SingleAssignmentNode(
+            def_node.pos,
+            lhs=ExprNodes.NameNode(def_node.pos, name=def_node.name),
+            rhs=rhs)
+        assmt.analyse_declarations(env)
+        return assmt
+
+    def visit_ScopedExprNode(self, node):
+        env = self.current_env()
+        node.analyse_declarations(env)
+        # the node may or may not have a local scope
+        if node.has_local_scope:
+            self.seen_vars_stack.append(set(self.seen_vars_stack[-1]))
+            self.enter_scope(node, node.expr_scope)
+            node.analyse_scoped_declarations(node.expr_scope)
+            self.visitchildren(node)
+            self.exit_scope()
+            self.seen_vars_stack.pop()
+        else:
+            node.analyse_scoped_declarations(env)
+            self.visitchildren(node)
+        return node
+
+    def visit_TempResultFromStatNode(self, node):
+        self.visitchildren(node)
+        node.analyse_declarations(self.current_env())
+        return node
+
+    def visit_CppClassNode(self, node):
+        if node.visibility == 'extern':
+            return None
+        else:
+            return self.visit_ClassDefNode(node)
+
+    def visit_CStructOrUnionDefNode(self, node):
+        # Create a wrapper node if needed.
+        # We want to use the struct type information (so it can't happen
+        # before this phase) but also create new objects to be declared
+        # (so it can't happen later).
+        # Note that we don't return the original node, as it is
+        # never used after this phase.
+        if True: # private (default)
+            return None
+
+        self_value = ExprNodes.AttributeNode(
+            pos = node.pos,
+            obj = ExprNodes.NameNode(pos=node.pos, name=u"self"),
+            attribute = EncodedString(u"value"))
+        var_entries = node.entry.type.scope.var_entries
+        attributes = []
+        for entry in var_entries:
+            attributes.append(ExprNodes.AttributeNode(pos = entry.pos,
+                                                      obj = self_value,
+                                                      attribute = entry.name))
+        # __init__ assignments
+        init_assignments = []
+        for entry, attr in zip(var_entries, attributes):
+            # TODO: branch on visibility
+            init_assignments.append(self.init_assignment.substitute({
+                    u"VALUE": ExprNodes.NameNode(entry.pos, name = entry.name),
+                    u"ATTR": attr,
+                }, pos = entry.pos))
+
+        # create the class
+        str_format = u"%s(%s)" % (node.entry.type.name, ("%s, " * len(attributes))[:-2])
+        wrapper_class = self.struct_or_union_wrapper.substitute({
+            u"INIT_ASSIGNMENTS": Nodes.StatListNode(node.pos, stats = init_assignments),
+            u"IS_UNION": ExprNodes.BoolNode(node.pos, value = not node.entry.type.is_struct),
+            u"MEMBER_TUPLE": ExprNodes.TupleNode(node.pos, args=attributes),
+            u"STR_FORMAT": ExprNodes.StringNode(node.pos, value = EncodedString(str_format)),
+            u"REPR_FORMAT": ExprNodes.StringNode(node.pos, value = EncodedString(str_format.replace("%s", "%r"))),
+        }, pos = node.pos).stats[0]
+        wrapper_class.class_name = node.name
+        wrapper_class.shadow = True
+        class_body = wrapper_class.body.stats
+
+        # fix value type
+        assert isinstance(class_body[0].base_type, Nodes.CSimpleBaseTypeNode)
+        class_body[0].base_type.name = node.name
+
+        # fix __init__ arguments
+        init_method = class_body[1]
+        assert isinstance(init_method, Nodes.DefNode) and init_method.name == '__init__'
+        arg_template = init_method.args[1]
+        if not node.entry.type.is_struct:
+            arg_template.kw_only = True
+        del init_method.args[1]
+        for entry, attr in zip(var_entries, attributes):
+            arg = copy.deepcopy(arg_template)
+            arg.declarator.name = entry.name
+            init_method.args.append(arg)
+
+        # setters/getters
+        for entry, attr in zip(var_entries, attributes):
+            # TODO: branch on visibility
+            if entry.type.is_pyobject:
+                template = self.basic_pyobject_property
+            else:
+                template = self.basic_property
+            property = template.substitute({
+                    u"ATTR": attr,
+                }, pos = entry.pos).stats[0]
+            property.name = entry.name
+            wrapper_class.body.stats.append(property)
+
+        wrapper_class.analyse_declarations(self.current_env())
+        return self.visit_CClassDefNode(wrapper_class)
+
+    # Some nodes are no longer needed after declaration
+    # analysis and can be dropped. The analysis was performed
     # on these nodes in a separate recursive process from the
-    # enclosing function or module, so we can simply drop them. 
-    def visit_CDeclaratorNode(self, node): 
-        # necessary to ensure that all CNameDeclaratorNodes are visited. 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CTypeDefNode(self, node): 
-        return node 
- 
-    def visit_CBaseTypeNode(self, node): 
-        return None 
- 
-    def visit_CEnumDefNode(self, node): 
-        if node.visibility == 'public': 
-            return node 
-        else: 
-            return None 
- 
-    def visit_CNameDeclaratorNode(self, node): 
-        if node.name in self.seen_vars_stack[-1]: 
-            entry = self.current_env().lookup(node.name) 
-            if (entry is None or entry.visibility != 'extern' 
-                and not entry.scope.is_c_class_scope): 
-                warning(node.pos, "cdef variable '%s' declared after it is used" % node.name, 2) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CVarDefNode(self, node): 
-        # to ensure all CNameDeclaratorNodes are visited. 
-        self.visitchildren(node) 
-        return None 
- 
-    def visit_CnameDecoratorNode(self, node): 
-        child_node = self.visit(node.node) 
-        if not child_node: 
-            return None 
-        if type(child_node) is list: # Assignment synthesized 
-            node.child_node = child_node[0] 
-            return [node] + child_node[1:] 
-        node.node = child_node 
-        return node 
- 
-    def create_Property(self, entry): 
-        if entry.visibility == 'public': 
-            if entry.type.is_pyobject: 
-                template = self.basic_pyobject_property 
-            else: 
-                template = self.basic_property 
-        elif entry.visibility == 'readonly': 
-            template = self.basic_property_ro 
-        property = template.substitute({ 
-                u"ATTR": ExprNodes.AttributeNode(pos=entry.pos, 
-                                                 obj=ExprNodes.NameNode(pos=entry.pos, name="self"), 
-                                                 attribute=entry.name), 
-            }, pos=entry.pos).stats[0] 
-        property.name = entry.name 
-        property.doc = entry.doc 
-        return property 
- 
- 
-class CalculateQualifiedNamesTransform(EnvTransform): 
-    """ 
-    Calculate and store the '__qualname__' and the global 
-    module name on some nodes. 
-    """ 
-    def visit_ModuleNode(self, node): 
-        self.module_name = self.global_scope().qualified_name 
-        self.qualified_name = [] 
-        _super = super(CalculateQualifiedNamesTransform, self) 
-        self._super_visit_FuncDefNode = _super.visit_FuncDefNode 
-        self._super_visit_ClassDefNode = _super.visit_ClassDefNode 
-        self.visitchildren(node) 
-        return node 
- 
-    def _set_qualname(self, node, name=None): 
-        if name: 
-            qualname = self.qualified_name[:] 
-            qualname.append(name) 
-        else: 
-            qualname = self.qualified_name 
-        node.qualname = EncodedString('.'.join(qualname)) 
-        node.module_name = self.module_name 
- 
-    def _append_entry(self, entry): 
-        if entry.is_pyglobal and not entry.is_pyclass_attr: 
-            self.qualified_name = [entry.name] 
-        else: 
-            self.qualified_name.append(entry.name) 
- 
-    def visit_ClassNode(self, node): 
-        self._set_qualname(node, node.name) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_PyClassNamespaceNode(self, node): 
-        # class name was already added by parent node 
-        self._set_qualname(node) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_PyCFunctionNode(self, node): 
+    # enclosing function or module, so we can simply drop them.
+    def visit_CDeclaratorNode(self, node):
+        # necessary to ensure that all CNameDeclaratorNodes are visited.
+        self.visitchildren(node)
+        return node
+
+    def visit_CTypeDefNode(self, node):
+        return node
+
+    def visit_CBaseTypeNode(self, node):
+        return None
+
+    def visit_CEnumDefNode(self, node):
+        if node.visibility == 'public':
+            return node
+        else:
+            return None
+
+    def visit_CNameDeclaratorNode(self, node):
+        if node.name in self.seen_vars_stack[-1]:
+            entry = self.current_env().lookup(node.name)
+            if (entry is None or entry.visibility != 'extern'
+                and not entry.scope.is_c_class_scope):
+                warning(node.pos, "cdef variable '%s' declared after it is used" % node.name, 2)
+        self.visitchildren(node)
+        return node
+
+    def visit_CVarDefNode(self, node):
+        # to ensure all CNameDeclaratorNodes are visited.
+        self.visitchildren(node)
+        return None
+
+    def visit_CnameDecoratorNode(self, node):
+        child_node = self.visit(node.node)
+        if not child_node:
+            return None
+        if type(child_node) is list: # Assignment synthesized
+            node.child_node = child_node[0]
+            return [node] + child_node[1:]
+        node.node = child_node
+        return node
+
+    def create_Property(self, entry):
+        if entry.visibility == 'public':
+            if entry.type.is_pyobject:
+                template = self.basic_pyobject_property
+            else:
+                template = self.basic_property
+        elif entry.visibility == 'readonly':
+            template = self.basic_property_ro
+        property = template.substitute({
+                u"ATTR": ExprNodes.AttributeNode(pos=entry.pos,
+                                                 obj=ExprNodes.NameNode(pos=entry.pos, name="self"),
+                                                 attribute=entry.name),
+            }, pos=entry.pos).stats[0]
+        property.name = entry.name
+        property.doc = entry.doc
+        return property
+
+
+class CalculateQualifiedNamesTransform(EnvTransform):
+    """
+    Calculate and store the '__qualname__' and the global
+    module name on some nodes.
+    """
+    def visit_ModuleNode(self, node):
+        self.module_name = self.global_scope().qualified_name
+        self.qualified_name = []
+        _super = super(CalculateQualifiedNamesTransform, self)
+        self._super_visit_FuncDefNode = _super.visit_FuncDefNode
+        self._super_visit_ClassDefNode = _super.visit_ClassDefNode
+        self.visitchildren(node)
+        return node
+
+    def _set_qualname(self, node, name=None):
+        if name:
+            qualname = self.qualified_name[:]
+            qualname.append(name)
+        else:
+            qualname = self.qualified_name
+        node.qualname = EncodedString('.'.join(qualname))
+        node.module_name = self.module_name
+
+    def _append_entry(self, entry):
+        if entry.is_pyglobal and not entry.is_pyclass_attr:
+            self.qualified_name = [entry.name]
+        else:
+            self.qualified_name.append(entry.name)
+
+    def visit_ClassNode(self, node):
+        self._set_qualname(node, node.name)
+        self.visitchildren(node)
+        return node
+
+    def visit_PyClassNamespaceNode(self, node):
+        # class name was already added by parent node
+        self._set_qualname(node)
+        self.visitchildren(node)
+        return node
+
+    def visit_PyCFunctionNode(self, node):
         orig_qualified_name = self.qualified_name[:]
         if node.def_node.is_wrapper and self.qualified_name and self.qualified_name[-1] == '<locals>':
             self.qualified_name.pop()
             self._set_qualname(node)
         else:
             self._set_qualname(node, node.def_node.name)
-        self.visitchildren(node) 
+        self.visitchildren(node)
         self.qualified_name = orig_qualified_name
-        return node 
- 
-    def visit_DefNode(self, node): 
+        return node
+
+    def visit_DefNode(self, node):
         if node.is_wrapper and self.qualified_name:
             assert self.qualified_name[-1] == '<locals>', self.qualified_name
             orig_qualified_name = self.qualified_name[:]
@@ -2183,176 +2183,176 @@ class CalculateQualifiedNamesTransform(EnvTransform):
             self._set_qualname(node, node.name)
             self.visit_FuncDefNode(node)
         return node
- 
-    def visit_FuncDefNode(self, node): 
-        orig_qualified_name = self.qualified_name[:] 
-        if getattr(node, 'name', None) == '<lambda>': 
-            self.qualified_name.append('<lambda>') 
-        else: 
-            self._append_entry(node.entry) 
-        self.qualified_name.append('<locals>') 
-        self._super_visit_FuncDefNode(node) 
-        self.qualified_name = orig_qualified_name 
-        return node 
- 
-    def visit_ClassDefNode(self, node): 
-        orig_qualified_name = self.qualified_name[:] 
-        entry = (getattr(node, 'entry', None) or             # PyClass 
-                 self.current_env().lookup_here(node.name))  # CClass 
-        self._append_entry(entry) 
-        self._super_visit_ClassDefNode(node) 
-        self.qualified_name = orig_qualified_name 
-        return node 
- 
- 
-class AnalyseExpressionsTransform(CythonTransform): 
- 
-    def visit_ModuleNode(self, node): 
-        node.scope.infer_types() 
-        node.body = node.body.analyse_expressions(node.scope) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        node.local_scope.infer_types() 
-        node.body = node.body.analyse_expressions(node.local_scope) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_ScopedExprNode(self, node): 
-        if node.has_local_scope: 
-            node.expr_scope.infer_types() 
-            node = node.analyse_scoped_expressions(node.expr_scope) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_IndexNode(self, node): 
-        """ 
-        Replace index nodes used to specialize cdef functions with fused 
-        argument types with the Attribute- or NameNode referring to the 
-        function. We then need to copy over the specialization properties to 
-        the attribute or name node. 
- 
-        Because the indexing might be a Python indexing operation on a fused 
-        function, or (usually) a Cython indexing operation, we need to 
-        re-analyse the types. 
-        """ 
-        self.visit_Node(node) 
-        if node.is_fused_index and not node.type.is_error: 
-            node = node.base 
-        return node 
- 
- 
-class FindInvalidUseOfFusedTypes(CythonTransform): 
- 
-    def visit_FuncDefNode(self, node): 
-        # Errors related to use in functions with fused args will already 
-        # have been detected 
-        if not node.has_fused_arguments: 
-            if not node.is_generator_body and node.return_type.is_fused: 
-                error(node.pos, "Return type is not specified as argument type") 
-            else: 
-                self.visitchildren(node) 
- 
-        return node 
- 
-    def visit_ExprNode(self, node): 
-        if node.type and node.type.is_fused: 
-            error(node.pos, "Invalid use of fused types, type cannot be specialized") 
-        else: 
-            self.visitchildren(node) 
- 
-        return node 
- 
- 
-class ExpandInplaceOperators(EnvTransform): 
- 
-    def visit_InPlaceAssignmentNode(self, node): 
-        lhs = node.lhs 
-        rhs = node.rhs 
-        if lhs.type.is_cpp_class: 
-            # No getting around this exact operator here. 
-            return node 
+
+    def visit_FuncDefNode(self, node):
+        orig_qualified_name = self.qualified_name[:]
+        if getattr(node, 'name', None) == '<lambda>':
+            self.qualified_name.append('<lambda>')
+        else:
+            self._append_entry(node.entry)
+        self.qualified_name.append('<locals>')
+        self._super_visit_FuncDefNode(node)
+        self.qualified_name = orig_qualified_name
+        return node
+
+    def visit_ClassDefNode(self, node):
+        orig_qualified_name = self.qualified_name[:]
+        entry = (getattr(node, 'entry', None) or             # PyClass
+                 self.current_env().lookup_here(node.name))  # CClass
+        self._append_entry(entry)
+        self._super_visit_ClassDefNode(node)
+        self.qualified_name = orig_qualified_name
+        return node
+
+
+class AnalyseExpressionsTransform(CythonTransform):
+
+    def visit_ModuleNode(self, node):
+        node.scope.infer_types()
+        node.body = node.body.analyse_expressions(node.scope)
+        self.visitchildren(node)
+        return node
+
+    def visit_FuncDefNode(self, node):
+        node.local_scope.infer_types()
+        node.body = node.body.analyse_expressions(node.local_scope)
+        self.visitchildren(node)
+        return node
+
+    def visit_ScopedExprNode(self, node):
+        if node.has_local_scope:
+            node.expr_scope.infer_types()
+            node = node.analyse_scoped_expressions(node.expr_scope)
+        self.visitchildren(node)
+        return node
+
+    def visit_IndexNode(self, node):
+        """
+        Replace index nodes used to specialize cdef functions with fused
+        argument types with the Attribute- or NameNode referring to the
+        function. We then need to copy over the specialization properties to
+        the attribute or name node.
+
+        Because the indexing might be a Python indexing operation on a fused
+        function, or (usually) a Cython indexing operation, we need to
+        re-analyse the types.
+        """
+        self.visit_Node(node)
+        if node.is_fused_index and not node.type.is_error:
+            node = node.base
+        return node
+
+
+class FindInvalidUseOfFusedTypes(CythonTransform):
+
+    def visit_FuncDefNode(self, node):
+        # Errors related to use in functions with fused args will already
+        # have been detected
+        if not node.has_fused_arguments:
+            if not node.is_generator_body and node.return_type.is_fused:
+                error(node.pos, "Return type is not specified as argument type")
+            else:
+                self.visitchildren(node)
+
+        return node
+
+    def visit_ExprNode(self, node):
+        if node.type and node.type.is_fused:
+            error(node.pos, "Invalid use of fused types, type cannot be specialized")
+        else:
+            self.visitchildren(node)
+
+        return node
+
+
+class ExpandInplaceOperators(EnvTransform):
+
+    def visit_InPlaceAssignmentNode(self, node):
+        lhs = node.lhs
+        rhs = node.rhs
+        if lhs.type.is_cpp_class:
+            # No getting around this exact operator here.
+            return node
         if isinstance(lhs, ExprNodes.BufferIndexNode):
             # There is code to handle this case in InPlaceAssignmentNode
-            return node 
- 
-        env = self.current_env() 
-        def side_effect_free_reference(node, setting=False): 
+            return node
+
+        env = self.current_env()
+        def side_effect_free_reference(node, setting=False):
             if node.is_name:
-                return node, [] 
-            elif node.type.is_pyobject and not setting: 
-                node = LetRefNode(node) 
-                return node, [node] 
+                return node, []
+            elif node.type.is_pyobject and not setting:
+                node = LetRefNode(node)
+                return node, [node]
             elif node.is_subscript:
-                base, temps = side_effect_free_reference(node.base) 
-                index = LetRefNode(node.index) 
-                return ExprNodes.IndexNode(node.pos, base=base, index=index), temps + [index] 
+                base, temps = side_effect_free_reference(node.base)
+                index = LetRefNode(node.index)
+                return ExprNodes.IndexNode(node.pos, base=base, index=index), temps + [index]
             elif node.is_attribute:
-                obj, temps = side_effect_free_reference(node.obj) 
-                return ExprNodes.AttributeNode(node.pos, obj=obj, attribute=node.attribute), temps 
+                obj, temps = side_effect_free_reference(node.obj)
+                return ExprNodes.AttributeNode(node.pos, obj=obj, attribute=node.attribute), temps
             elif isinstance(node, ExprNodes.BufferIndexNode):
                 raise ValueError("Don't allow things like attributes of buffer indexing operations")
-            else: 
-                node = LetRefNode(node) 
-                return node, [node] 
-        try: 
-            lhs, let_ref_nodes = side_effect_free_reference(lhs, setting=True) 
-        except ValueError: 
-            return node 
-        dup = lhs.__class__(**lhs.__dict__) 
-        binop = ExprNodes.binop_node(node.pos, 
-                                     operator = node.operator, 
-                                     operand1 = dup, 
-                                     operand2 = rhs, 
-                                     inplace=True) 
-        # Manually analyse types for new node. 
-        lhs.analyse_target_types(env) 
-        dup.analyse_types(env) 
-        binop.analyse_operation(env) 
-        node = Nodes.SingleAssignmentNode( 
-            node.pos, 
-            lhs = lhs, 
-            rhs=binop.coerce_to(lhs.type, env)) 
-        # Use LetRefNode to avoid side effects. 
-        let_ref_nodes.reverse() 
-        for t in let_ref_nodes: 
-            node = LetNode(t, node) 
-        return node 
- 
-    def visit_ExprNode(self, node): 
-        # In-place assignments can't happen within an expression. 
-        return node 
- 
- 
-class AdjustDefByDirectives(CythonTransform, SkipDeclarations): 
-    """ 
-    Adjust function and class definitions by the decorator directives: 
- 
-    @cython.cfunc 
-    @cython.cclass 
-    @cython.ccall 
-    @cython.inline 
+            else:
+                node = LetRefNode(node)
+                return node, [node]
+        try:
+            lhs, let_ref_nodes = side_effect_free_reference(lhs, setting=True)
+        except ValueError:
+            return node
+        dup = lhs.__class__(**lhs.__dict__)
+        binop = ExprNodes.binop_node(node.pos,
+                                     operator = node.operator,
+                                     operand1 = dup,
+                                     operand2 = rhs,
+                                     inplace=True)
+        # Manually analyse types for new node.
+        lhs.analyse_target_types(env)
+        dup.analyse_types(env)
+        binop.analyse_operation(env)
+        node = Nodes.SingleAssignmentNode(
+            node.pos,
+            lhs = lhs,
+            rhs=binop.coerce_to(lhs.type, env))
+        # Use LetRefNode to avoid side effects.
+        let_ref_nodes.reverse()
+        for t in let_ref_nodes:
+            node = LetNode(t, node)
+        return node
+
+    def visit_ExprNode(self, node):
+        # In-place assignments can't happen within an expression.
+        return node
+
+
+class AdjustDefByDirectives(CythonTransform, SkipDeclarations):
+    """
+    Adjust function and class definitions by the decorator directives:
+
+    @cython.cfunc
+    @cython.cclass
+    @cython.ccall
+    @cython.inline
     @cython.nogil
-    """ 
- 
-    def visit_ModuleNode(self, node): 
-        self.directives = node.directives 
-        self.in_py_class = False 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CompilerDirectivesNode(self, node): 
-        old_directives = self.directives 
-        self.directives = node.directives 
-        self.visitchildren(node) 
-        self.directives = old_directives 
-        return node 
- 
-    def visit_DefNode(self, node): 
-        modifiers = [] 
-        if 'inline' in self.directives: 
-            modifiers.append('inline') 
+    """
+
+    def visit_ModuleNode(self, node):
+        self.directives = node.directives
+        self.in_py_class = False
+        self.visitchildren(node)
+        return node
+
+    def visit_CompilerDirectivesNode(self, node):
+        old_directives = self.directives
+        self.directives = node.directives
+        self.visitchildren(node)
+        self.directives = old_directives
+        return node
+
+    def visit_DefNode(self, node):
+        modifiers = []
+        if 'inline' in self.directives:
+            modifiers.append('inline')
         nogil = self.directives.get('nogil')
         except_val = self.directives.get('exceptval')
         return_type_node = self.directives.get('returns')
@@ -2364,198 +2364,198 @@ class AdjustDefByDirectives(CythonTransform, SkipDeclarations):
         elif except_val is None:
             # backward compatible default: no exception check
             except_val = (None, False)
-        if 'ccall' in self.directives: 
-            node = node.as_cfunction( 
+        if 'ccall' in self.directives:
+            node = node.as_cfunction(
                 overridable=True, modifiers=modifiers, nogil=nogil,
                 returns=return_type_node, except_val=except_val)
-            return self.visit(node) 
-        if 'cfunc' in self.directives: 
-            if self.in_py_class: 
-                error(node.pos, "cfunc directive is not allowed here") 
-            else: 
-                node = node.as_cfunction( 
+            return self.visit(node)
+        if 'cfunc' in self.directives:
+            if self.in_py_class:
+                error(node.pos, "cfunc directive is not allowed here")
+            else:
+                node = node.as_cfunction(
                     overridable=False, modifiers=modifiers, nogil=nogil,
                     returns=return_type_node, except_val=except_val)
-                return self.visit(node) 
-        if 'inline' in modifiers: 
-            error(node.pos, "Python functions cannot be declared 'inline'") 
+                return self.visit(node)
+        if 'inline' in modifiers:
+            error(node.pos, "Python functions cannot be declared 'inline'")
         if nogil:
             # TODO: turn this into a "with gil" declaration.
             error(node.pos, "Python functions cannot be declared 'nogil'")
-        self.visitchildren(node) 
-        return node 
- 
+        self.visitchildren(node)
+        return node
+
     def visit_LambdaNode(self, node):
         # No directives should modify lambdas or generator expressions (and also nothing in them).
         return node
 
-    def visit_PyClassDefNode(self, node): 
-        if 'cclass' in self.directives: 
-            node = node.as_cclass() 
-            return self.visit(node) 
-        else: 
-            old_in_pyclass = self.in_py_class 
-            self.in_py_class = True 
-            self.visitchildren(node) 
-            self.in_py_class = old_in_pyclass 
-            return node 
- 
-    def visit_CClassDefNode(self, node): 
-        old_in_pyclass = self.in_py_class 
-        self.in_py_class = False 
-        self.visitchildren(node) 
-        self.in_py_class = old_in_pyclass 
-        return node 
- 
- 
-class AlignFunctionDefinitions(CythonTransform): 
-    """ 
-    This class takes the signatures from a .pxd file and applies them to 
-    the def methods in a .py file. 
-    """ 
- 
-    def visit_ModuleNode(self, node): 
-        self.scope = node.scope 
-        self.directives = node.directives 
-        self.imported_names = set()  # hack, see visit_FromImportStatNode() 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_PyClassDefNode(self, node): 
-        pxd_def = self.scope.lookup(node.name) 
-        if pxd_def: 
-            if pxd_def.is_cclass: 
-                return self.visit_CClassDefNode(node.as_cclass(), pxd_def) 
-            elif not pxd_def.scope or not pxd_def.scope.is_builtin_scope: 
-                error(node.pos, "'%s' redeclared" % node.name) 
-                if pxd_def.pos: 
-                    error(pxd_def.pos, "previous declaration here") 
-                return None 
-        return node 
- 
-    def visit_CClassDefNode(self, node, pxd_def=None): 
-        if pxd_def is None: 
-            pxd_def = self.scope.lookup(node.class_name) 
-        if pxd_def: 
+    def visit_PyClassDefNode(self, node):
+        if 'cclass' in self.directives:
+            node = node.as_cclass()
+            return self.visit(node)
+        else:
+            old_in_pyclass = self.in_py_class
+            self.in_py_class = True
+            self.visitchildren(node)
+            self.in_py_class = old_in_pyclass
+            return node
+
+    def visit_CClassDefNode(self, node):
+        old_in_pyclass = self.in_py_class
+        self.in_py_class = False
+        self.visitchildren(node)
+        self.in_py_class = old_in_pyclass
+        return node
+
+
+class AlignFunctionDefinitions(CythonTransform):
+    """
+    This class takes the signatures from a .pxd file and applies them to
+    the def methods in a .py file.
+    """
+
+    def visit_ModuleNode(self, node):
+        self.scope = node.scope
+        self.directives = node.directives
+        self.imported_names = set()  # hack, see visit_FromImportStatNode()
+        self.visitchildren(node)
+        return node
+
+    def visit_PyClassDefNode(self, node):
+        pxd_def = self.scope.lookup(node.name)
+        if pxd_def:
+            if pxd_def.is_cclass:
+                return self.visit_CClassDefNode(node.as_cclass(), pxd_def)
+            elif not pxd_def.scope or not pxd_def.scope.is_builtin_scope:
+                error(node.pos, "'%s' redeclared" % node.name)
+                if pxd_def.pos:
+                    error(pxd_def.pos, "previous declaration here")
+                return None
+        return node
+
+    def visit_CClassDefNode(self, node, pxd_def=None):
+        if pxd_def is None:
+            pxd_def = self.scope.lookup(node.class_name)
+        if pxd_def:
             if not pxd_def.defined_in_pxd:
                 return node
-            outer_scope = self.scope 
-            self.scope = pxd_def.type.scope 
-        self.visitchildren(node) 
-        if pxd_def: 
-            self.scope = outer_scope 
-        return node 
- 
-    def visit_DefNode(self, node): 
-        pxd_def = self.scope.lookup(node.name) 
-        if pxd_def and (not pxd_def.scope or not pxd_def.scope.is_builtin_scope): 
-            if not pxd_def.is_cfunction: 
-                error(node.pos, "'%s' redeclared" % node.name) 
-                if pxd_def.pos: 
-                    error(pxd_def.pos, "previous declaration here") 
-                return None 
-            node = node.as_cfunction(pxd_def) 
-        elif (self.scope.is_module_scope and self.directives['auto_cpdef'] 
-              and not node.name in self.imported_names 
-              and node.is_cdef_func_compatible()): 
-            # FIXME: cpdef-ing should be done in analyse_declarations() 
-            node = node.as_cfunction(scope=self.scope) 
-        # Enable this when nested cdef functions are allowed. 
-        # self.visitchildren(node) 
-        return node 
- 
-    def visit_FromImportStatNode(self, node): 
-        # hack to prevent conditional import fallback functions from 
-        # being cdpef-ed (global Python variables currently conflict 
-        # with imports) 
-        if self.scope.is_module_scope: 
-            for name, _ in node.items: 
-                self.imported_names.add(name) 
-        return node 
- 
-    def visit_ExprNode(self, node): 
-        # ignore lambdas and everything else that appears in expressions 
-        return node 
- 
- 
-class RemoveUnreachableCode(CythonTransform): 
-    def visit_StatListNode(self, node): 
-        if not self.current_directives['remove_unreachable']: 
-            return node 
-        self.visitchildren(node) 
-        for idx, stat in enumerate(node.stats): 
-            idx += 1 
-            if stat.is_terminator: 
-                if idx < len(node.stats): 
-                    if self.current_directives['warn.unreachable']: 
-                        warning(node.stats[idx].pos, "Unreachable code", 2) 
-                    node.stats = node.stats[:idx] 
-                node.is_terminator = True 
-                break 
-        return node 
- 
-    def visit_IfClauseNode(self, node): 
-        self.visitchildren(node) 
-        if node.body.is_terminator: 
-            node.is_terminator = True 
-        return node 
- 
-    def visit_IfStatNode(self, node): 
-        self.visitchildren(node) 
-        if node.else_clause and node.else_clause.is_terminator: 
-            for clause in node.if_clauses: 
-                if not clause.is_terminator: 
-                    break 
-            else: 
-                node.is_terminator = True 
-        return node 
- 
-    def visit_TryExceptStatNode(self, node): 
-        self.visitchildren(node) 
-        if node.body.is_terminator and node.else_clause: 
-            if self.current_directives['warn.unreachable']: 
-                warning(node.else_clause.pos, "Unreachable code", 2) 
-            node.else_clause = None 
-        return node 
- 
+            outer_scope = self.scope
+            self.scope = pxd_def.type.scope
+        self.visitchildren(node)
+        if pxd_def:
+            self.scope = outer_scope
+        return node
+
+    def visit_DefNode(self, node):
+        pxd_def = self.scope.lookup(node.name)
+        if pxd_def and (not pxd_def.scope or not pxd_def.scope.is_builtin_scope):
+            if not pxd_def.is_cfunction:
+                error(node.pos, "'%s' redeclared" % node.name)
+                if pxd_def.pos:
+                    error(pxd_def.pos, "previous declaration here")
+                return None
+            node = node.as_cfunction(pxd_def)
+        elif (self.scope.is_module_scope and self.directives['auto_cpdef']
+              and not node.name in self.imported_names
+              and node.is_cdef_func_compatible()):
+            # FIXME: cpdef-ing should be done in analyse_declarations()
+            node = node.as_cfunction(scope=self.scope)
+        # Enable this when nested cdef functions are allowed.
+        # self.visitchildren(node)
+        return node
+
+    def visit_FromImportStatNode(self, node):
+        # hack to prevent conditional import fallback functions from
+        # being cdpef-ed (global Python variables currently conflict
+        # with imports)
+        if self.scope.is_module_scope:
+            for name, _ in node.items:
+                self.imported_names.add(name)
+        return node
+
+    def visit_ExprNode(self, node):
+        # ignore lambdas and everything else that appears in expressions
+        return node
+
+
+class RemoveUnreachableCode(CythonTransform):
+    def visit_StatListNode(self, node):
+        if not self.current_directives['remove_unreachable']:
+            return node
+        self.visitchildren(node)
+        for idx, stat in enumerate(node.stats):
+            idx += 1
+            if stat.is_terminator:
+                if idx < len(node.stats):
+                    if self.current_directives['warn.unreachable']:
+                        warning(node.stats[idx].pos, "Unreachable code", 2)
+                    node.stats = node.stats[:idx]
+                node.is_terminator = True
+                break
+        return node
+
+    def visit_IfClauseNode(self, node):
+        self.visitchildren(node)
+        if node.body.is_terminator:
+            node.is_terminator = True
+        return node
+
+    def visit_IfStatNode(self, node):
+        self.visitchildren(node)
+        if node.else_clause and node.else_clause.is_terminator:
+            for clause in node.if_clauses:
+                if not clause.is_terminator:
+                    break
+            else:
+                node.is_terminator = True
+        return node
+
+    def visit_TryExceptStatNode(self, node):
+        self.visitchildren(node)
+        if node.body.is_terminator and node.else_clause:
+            if self.current_directives['warn.unreachable']:
+                warning(node.else_clause.pos, "Unreachable code", 2)
+            node.else_clause = None
+        return node
+
     def visit_TryFinallyStatNode(self, node):
         self.visitchildren(node)
         if node.finally_clause.is_terminator:
             node.is_terminator = True
         return node
- 
 
-class YieldNodeCollector(TreeVisitor): 
- 
-    def __init__(self): 
-        super(YieldNodeCollector, self).__init__() 
-        self.yields = [] 
-        self.returns = [] 
+
+class YieldNodeCollector(TreeVisitor):
+
+    def __init__(self):
+        super(YieldNodeCollector, self).__init__()
+        self.yields = []
+        self.returns = []
         self.finallys = []
         self.excepts = []
-        self.has_return_value = False 
+        self.has_return_value = False
         self.has_yield = False
         self.has_await = False
- 
-    def visit_Node(self, node): 
-        self.visitchildren(node) 
- 
-    def visit_YieldExprNode(self, node): 
-        self.yields.append(node) 
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+
+    def visit_YieldExprNode(self, node):
+        self.yields.append(node)
         self.has_yield = True
-        self.visitchildren(node) 
- 
+        self.visitchildren(node)
+
     def visit_AwaitExprNode(self, node):
         self.yields.append(node)
         self.has_await = True
         self.visitchildren(node)
 
-    def visit_ReturnStatNode(self, node): 
-        self.visitchildren(node) 
-        if node.value: 
-            self.has_return_value = True 
-        self.returns.append(node) 
- 
+    def visit_ReturnStatNode(self, node):
+        self.visitchildren(node)
+        if node.value:
+            self.has_return_value = True
+        self.returns.append(node)
+
     def visit_TryFinallyStatNode(self, node):
         self.visitchildren(node)
         self.finallys.append(node)
@@ -2564,40 +2564,40 @@ class YieldNodeCollector(TreeVisitor):
         self.visitchildren(node)
         self.excepts.append(node)
 
-    def visit_ClassDefNode(self, node): 
-        pass 
- 
-    def visit_FuncDefNode(self, node): 
-        pass 
- 
-    def visit_LambdaNode(self, node): 
-        pass 
- 
-    def visit_GeneratorExpressionNode(self, node): 
-        pass 
- 
+    def visit_ClassDefNode(self, node):
+        pass
+
+    def visit_FuncDefNode(self, node):
+        pass
+
+    def visit_LambdaNode(self, node):
+        pass
+
+    def visit_GeneratorExpressionNode(self, node):
+        pass
+
     def visit_CArgDeclNode(self, node):
         # do not look into annotations
         # FIXME: support (yield) in default arguments (currently crashes)
         pass
- 
-
-class MarkClosureVisitor(CythonTransform): 
- 
-    def visit_ModuleNode(self, node): 
-        self.needs_closure = False 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        self.needs_closure = False 
-        self.visitchildren(node) 
-        node.needs_closure = self.needs_closure 
-        self.needs_closure = True 
- 
-        collector = YieldNodeCollector() 
-        collector.visitchildren(node) 
- 
+
+
+class MarkClosureVisitor(CythonTransform):
+
+    def visit_ModuleNode(self, node):
+        self.needs_closure = False
+        self.visitchildren(node)
+        return node
+
+    def visit_FuncDefNode(self, node):
+        self.needs_closure = False
+        self.visitchildren(node)
+        node.needs_closure = self.needs_closure
+        self.needs_closure = True
+
+        collector = YieldNodeCollector()
+        collector.visitchildren(node)
+
         if node.is_async_def:
             coroutine_type = Nodes.AsyncDefNode
             if collector.has_yield:
@@ -2614,12 +2614,12 @@ class MarkClosureVisitor(CythonTransform):
             coroutine_type = Nodes.GeneratorDefNode
         else:
             return node
- 
+
         for i, yield_expr in enumerate(collector.yields, 1):
             yield_expr.label_num = i
         for retnode in collector.returns + collector.finallys + collector.excepts:
             retnode.in_generator = True
- 
+
         gbody = Nodes.GeneratorBodyDefNode(
             pos=node.pos, name=node.name, body=node.body,
             is_async_gen_body=node.is_async_def and collector.has_yield)
@@ -2631,45 +2631,45 @@ class MarkClosureVisitor(CythonTransform):
             return_type_annotation=node.return_type_annotation)
         return coroutine
 
-    def visit_CFuncDefNode(self, node): 
+    def visit_CFuncDefNode(self, node):
         self.needs_closure = False
         self.visitchildren(node)
         node.needs_closure = self.needs_closure
         self.needs_closure = True
-        if node.needs_closure and node.overridable: 
-            error(node.pos, "closures inside cpdef functions not yet supported") 
-        return node 
- 
-    def visit_LambdaNode(self, node): 
-        self.needs_closure = False 
-        self.visitchildren(node) 
-        node.needs_closure = self.needs_closure 
-        self.needs_closure = True 
-        return node 
- 
-    def visit_ClassDefNode(self, node): 
-        self.visitchildren(node) 
-        self.needs_closure = True 
-        return node 
- 
-
-class CreateClosureClasses(CythonTransform): 
-    # Output closure classes in module scope for all functions 
-    # that really need it. 
- 
-    def __init__(self, context): 
-        super(CreateClosureClasses, self).__init__(context) 
-        self.path = [] 
-        self.in_lambda = False 
- 
-    def visit_ModuleNode(self, node): 
-        self.module_scope = node.scope 
-        self.visitchildren(node) 
-        return node 
- 
-    def find_entries_used_in_closures(self, node): 
-        from_closure = [] 
-        in_closure = [] 
+        if node.needs_closure and node.overridable:
+            error(node.pos, "closures inside cpdef functions not yet supported")
+        return node
+
+    def visit_LambdaNode(self, node):
+        self.needs_closure = False
+        self.visitchildren(node)
+        node.needs_closure = self.needs_closure
+        self.needs_closure = True
+        return node
+
+    def visit_ClassDefNode(self, node):
+        self.visitchildren(node)
+        self.needs_closure = True
+        return node
+
+
+class CreateClosureClasses(CythonTransform):
+    # Output closure classes in module scope for all functions
+    # that really need it.
+
+    def __init__(self, context):
+        super(CreateClosureClasses, self).__init__(context)
+        self.path = []
+        self.in_lambda = False
+
+    def visit_ModuleNode(self, node):
+        self.module_scope = node.scope
+        self.visitchildren(node)
+        return node
+
+    def find_entries_used_in_closures(self, node):
+        from_closure = []
+        in_closure = []
         for scope in node.local_scope.iter_local_scopes():
             for name, entry in scope.entries.items():
                 if not name:
@@ -2678,124 +2678,124 @@ class CreateClosureClasses(CythonTransform):
                     from_closure.append((name, entry))
                 elif entry.in_closure:
                     in_closure.append((name, entry))
-        return from_closure, in_closure 
- 
-    def create_class_from_scope(self, node, target_module_scope, inner_node=None): 
-        # move local variables into closure 
-        if node.is_generator: 
+        return from_closure, in_closure
+
+    def create_class_from_scope(self, node, target_module_scope, inner_node=None):
+        # move local variables into closure
+        if node.is_generator:
             for scope in node.local_scope.iter_local_scopes():
                 for entry in scope.entries.values():
                     if not (entry.from_closure or entry.is_pyglobal or entry.is_cglobal):
                         entry.in_closure = True
- 
-        from_closure, in_closure = self.find_entries_used_in_closures(node) 
-        in_closure.sort() 
- 
+
+        from_closure, in_closure = self.find_entries_used_in_closures(node)
+        in_closure.sort()
+
         # Now from the beginning
-        node.needs_closure = False 
-        node.needs_outer_scope = False 
- 
-        func_scope = node.local_scope 
-        cscope = node.entry.scope 
-        while cscope.is_py_class_scope or cscope.is_c_class_scope: 
-            cscope = cscope.outer_scope 
- 
-        if not from_closure and (self.path or inner_node): 
-            if not inner_node: 
-                if not node.py_cfunc_node: 
-                    raise InternalError("DefNode does not have assignment node") 
-                inner_node = node.py_cfunc_node 
-            inner_node.needs_self_code = False 
-            node.needs_outer_scope = False 
- 
-        if node.is_generator: 
-            pass 
-        elif not in_closure and not from_closure: 
-            return 
-        elif not in_closure: 
-            func_scope.is_passthrough = True 
-            func_scope.scope_class = cscope.scope_class 
-            node.needs_outer_scope = True 
-            return 
- 
+        node.needs_closure = False
+        node.needs_outer_scope = False
+
+        func_scope = node.local_scope
+        cscope = node.entry.scope
+        while cscope.is_py_class_scope or cscope.is_c_class_scope:
+            cscope = cscope.outer_scope
+
+        if not from_closure and (self.path or inner_node):
+            if not inner_node:
+                if not node.py_cfunc_node:
+                    raise InternalError("DefNode does not have assignment node")
+                inner_node = node.py_cfunc_node
+            inner_node.needs_self_code = False
+            node.needs_outer_scope = False
+
+        if node.is_generator:
+            pass
+        elif not in_closure and not from_closure:
+            return
+        elif not in_closure:
+            func_scope.is_passthrough = True
+            func_scope.scope_class = cscope.scope_class
+            node.needs_outer_scope = True
+            return
+
         # entry.cname can contain periods (eg. a derived C method of a class).
         # We want to use the cname as part of a C struct name, so we replace
         # periods with double underscores.
-        as_name = '%s_%s' % ( 
-            target_module_scope.next_id(Naming.closure_class_prefix), 
+        as_name = '%s_%s' % (
+            target_module_scope.next_id(Naming.closure_class_prefix),
             node.entry.cname.replace('.','__'))
- 
-        entry = target_module_scope.declare_c_class( 
-            name=as_name, pos=node.pos, defining=True, 
-            implementing=True) 
-        entry.type.is_final_type = True 
- 
-        func_scope.scope_class = entry 
-        class_scope = entry.type.scope 
-        class_scope.is_internal = True 
+
+        entry = target_module_scope.declare_c_class(
+            name=as_name, pos=node.pos, defining=True,
+            implementing=True)
+        entry.type.is_final_type = True
+
+        func_scope.scope_class = entry
+        class_scope = entry.type.scope
+        class_scope.is_internal = True
         class_scope.is_closure_class_scope = True
         if node.is_async_def or node.is_generator:
             # Generators need their closure intact during cleanup as they resume to handle GeneratorExit
             class_scope.directives['no_gc_clear'] = True
-        if Options.closure_freelist_size: 
-            class_scope.directives['freelist'] = Options.closure_freelist_size 
- 
-        if from_closure: 
-            assert cscope.is_closure_scope 
-            class_scope.declare_var(pos=node.pos, 
-                                    name=Naming.outer_scope_cname, 
-                                    cname=Naming.outer_scope_cname, 
-                                    type=cscope.scope_class.type, 
-                                    is_cdef=True) 
-            node.needs_outer_scope = True 
-        for name, entry in in_closure: 
+        if Options.closure_freelist_size:
+            class_scope.directives['freelist'] = Options.closure_freelist_size
+
+        if from_closure:
+            assert cscope.is_closure_scope
+            class_scope.declare_var(pos=node.pos,
+                                    name=Naming.outer_scope_cname,
+                                    cname=Naming.outer_scope_cname,
+                                    type=cscope.scope_class.type,
+                                    is_cdef=True)
+            node.needs_outer_scope = True
+        for name, entry in in_closure:
             closure_entry = class_scope.declare_var(
                 pos=entry.pos,
                 name=entry.name if not entry.in_subscope else None,
                 cname=entry.cname,
                 type=entry.type,
                 is_cdef=True)
-            if entry.is_declared_generic: 
-                closure_entry.is_declared_generic = 1 
-        node.needs_closure = True 
-        # Do it here because other classes are already checked 
-        target_module_scope.check_c_class(func_scope.scope_class) 
- 
-    def visit_LambdaNode(self, node): 
-        if not isinstance(node.def_node, Nodes.DefNode): 
-            # fused function, an error has been previously issued 
-            return node 
- 
-        was_in_lambda = self.in_lambda 
-        self.in_lambda = True 
-        self.create_class_from_scope(node.def_node, self.module_scope, node) 
-        self.visitchildren(node) 
-        self.in_lambda = was_in_lambda 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        if self.in_lambda: 
-            self.visitchildren(node) 
-            return node 
-        if node.needs_closure or self.path: 
-            self.create_class_from_scope(node, self.module_scope) 
-            self.path.append(node) 
-            self.visitchildren(node) 
-            self.path.pop() 
-        return node 
- 
-    def visit_GeneratorBodyDefNode(self, node): 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CFuncDefNode(self, node): 
-        if not node.overridable: 
-            return self.visit_FuncDefNode(node) 
-        else: 
-            self.visitchildren(node) 
-            return node 
- 
- 
+            if entry.is_declared_generic:
+                closure_entry.is_declared_generic = 1
+        node.needs_closure = True
+        # Do it here because other classes are already checked
+        target_module_scope.check_c_class(func_scope.scope_class)
+
+    def visit_LambdaNode(self, node):
+        if not isinstance(node.def_node, Nodes.DefNode):
+            # fused function, an error has been previously issued
+            return node
+
+        was_in_lambda = self.in_lambda
+        self.in_lambda = True
+        self.create_class_from_scope(node.def_node, self.module_scope, node)
+        self.visitchildren(node)
+        self.in_lambda = was_in_lambda
+        return node
+
+    def visit_FuncDefNode(self, node):
+        if self.in_lambda:
+            self.visitchildren(node)
+            return node
+        if node.needs_closure or self.path:
+            self.create_class_from_scope(node, self.module_scope)
+            self.path.append(node)
+            self.visitchildren(node)
+            self.path.pop()
+        return node
+
+    def visit_GeneratorBodyDefNode(self, node):
+        self.visitchildren(node)
+        return node
+
+    def visit_CFuncDefNode(self, node):
+        if not node.overridable:
+            return self.visit_FuncDefNode(node)
+        else:
+            self.visitchildren(node)
+            return node
+
+
 class InjectGilHandling(VisitorTransform, SkipDeclarations):
     """
     Allow certain Python operations inside of nogil blocks by implicitly acquiring the GIL.
@@ -2850,25 +2850,25 @@ class InjectGilHandling(VisitorTransform, SkipDeclarations):
     visit_Node = VisitorTransform.recurse_to_children
 
 
-class GilCheck(VisitorTransform): 
-    """ 
-    Call `node.gil_check(env)` on each node to make sure we hold the 
-    GIL when we need it.  Raise an error when on Python operations 
-    inside a `nogil` environment. 
- 
-    Additionally, raise exceptions for closely nested with gil or with nogil 
-    statements. The latter would abort Python. 
-    """ 
- 
-    def __call__(self, root): 
-        self.env_stack = [root.scope] 
-        self.nogil = False 
- 
-        # True for 'cdef func() nogil:' functions, as the GIL may be held while 
-        # calling this function (thus contained 'nogil' blocks may be valid). 
-        self.nogil_declarator_only = False 
-        return super(GilCheck, self).__call__(root) 
- 
+class GilCheck(VisitorTransform):
+    """
+    Call `node.gil_check(env)` on each node to make sure we hold the
+    GIL when we need it.  Raise an error when on Python operations
+    inside a `nogil` environment.
+
+    Additionally, raise exceptions for closely nested with gil or with nogil
+    statements. The latter would abort Python.
+    """
+
+    def __call__(self, root):
+        self.env_stack = [root.scope]
+        self.nogil = False
+
+        # True for 'cdef func() nogil:' functions, as the GIL may be held while
+        # calling this function (thus contained 'nogil' blocks may be valid).
+        self.nogil_declarator_only = False
+        return super(GilCheck, self).__call__(root)
+
     def _visit_scoped_children(self, node, gil_state):
         was_nogil = self.nogil
         outer_attrs = node.outer_attrs
@@ -2880,315 +2880,315 @@ class GilCheck(VisitorTransform):
         self.visitchildren(node, attrs=None, exclude=outer_attrs)
         self.nogil = was_nogil
 
-    def visit_FuncDefNode(self, node): 
-        self.env_stack.append(node.local_scope) 
+    def visit_FuncDefNode(self, node):
+        self.env_stack.append(node.local_scope)
         inner_nogil = node.local_scope.nogil
- 
+
         if inner_nogil:
-            self.nogil_declarator_only = True 
- 
+            self.nogil_declarator_only = True
+
         if inner_nogil and node.nogil_check:
-            node.nogil_check(node.local_scope) 
- 
+            node.nogil_check(node.local_scope)
+
         self._visit_scoped_children(node, inner_nogil)
- 
-        # This cannot be nested, so it doesn't need backup/restore 
-        self.nogil_declarator_only = False 
- 
-        self.env_stack.pop() 
-        return node 
- 
-    def visit_GILStatNode(self, node): 
-        if self.nogil and node.nogil_check: 
-            node.nogil_check() 
- 
-        was_nogil = self.nogil 
+
+        # This cannot be nested, so it doesn't need backup/restore
+        self.nogil_declarator_only = False
+
+        self.env_stack.pop()
+        return node
+
+    def visit_GILStatNode(self, node):
+        if self.nogil and node.nogil_check:
+            node.nogil_check()
+
+        was_nogil = self.nogil
         is_nogil = (node.state == 'nogil')
- 
+
         if was_nogil == is_nogil and not self.nogil_declarator_only:
-            if not was_nogil: 
-                error(node.pos, "Trying to acquire the GIL while it is " 
-                                "already held.") 
-            else: 
-                error(node.pos, "Trying to release the GIL while it was " 
-                                "previously released.") 
- 
-        if isinstance(node.finally_clause, Nodes.StatListNode): 
-            # The finally clause of the GILStatNode is a GILExitNode, 
-            # which is wrapped in a StatListNode. Just unpack that. 
-            node.finally_clause, = node.finally_clause.stats 
- 
+            if not was_nogil:
+                error(node.pos, "Trying to acquire the GIL while it is "
+                                "already held.")
+            else:
+                error(node.pos, "Trying to release the GIL while it was "
+                                "previously released.")
+
+        if isinstance(node.finally_clause, Nodes.StatListNode):
+            # The finally clause of the GILStatNode is a GILExitNode,
+            # which is wrapped in a StatListNode. Just unpack that.
+            node.finally_clause, = node.finally_clause.stats
+
         self._visit_scoped_children(node, is_nogil)
-        return node 
- 
-    def visit_ParallelRangeNode(self, node): 
-        if node.nogil: 
-            node.nogil = False 
-            node = Nodes.GILStatNode(node.pos, state='nogil', body=node) 
-            return self.visit_GILStatNode(node) 
- 
-        if not self.nogil: 
-            error(node.pos, "prange() can only be used without the GIL") 
-            # Forget about any GIL-related errors that may occur in the body 
-            return None 
- 
-        node.nogil_check(self.env_stack[-1]) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_ParallelWithBlockNode(self, node): 
-        if not self.nogil: 
-            error(node.pos, "The parallel section may only be used without " 
-                            "the GIL") 
-            return None 
- 
-        if node.nogil_check: 
-            # It does not currently implement this, but test for it anyway to 
-            # avoid potential future surprises 
-            node.nogil_check(self.env_stack[-1]) 
- 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_TryFinallyStatNode(self, node): 
-        """ 
-        Take care of try/finally statements in nogil code sections. 
-        """ 
-        if not self.nogil or isinstance(node, Nodes.GILStatNode): 
-            return self.visit_Node(node) 
- 
-        node.nogil_check = None 
-        node.is_try_finally_in_nogil = True 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_Node(self, node): 
-        if self.env_stack and self.nogil and node.nogil_check: 
-            node.nogil_check(self.env_stack[-1]) 
+        return node
+
+    def visit_ParallelRangeNode(self, node):
+        if node.nogil:
+            node.nogil = False
+            node = Nodes.GILStatNode(node.pos, state='nogil', body=node)
+            return self.visit_GILStatNode(node)
+
+        if not self.nogil:
+            error(node.pos, "prange() can only be used without the GIL")
+            # Forget about any GIL-related errors that may occur in the body
+            return None
+
+        node.nogil_check(self.env_stack[-1])
+        self.visitchildren(node)
+        return node
+
+    def visit_ParallelWithBlockNode(self, node):
+        if not self.nogil:
+            error(node.pos, "The parallel section may only be used without "
+                            "the GIL")
+            return None
+
+        if node.nogil_check:
+            # It does not currently implement this, but test for it anyway to
+            # avoid potential future surprises
+            node.nogil_check(self.env_stack[-1])
+
+        self.visitchildren(node)
+        return node
+
+    def visit_TryFinallyStatNode(self, node):
+        """
+        Take care of try/finally statements in nogil code sections.
+        """
+        if not self.nogil or isinstance(node, Nodes.GILStatNode):
+            return self.visit_Node(node)
+
+        node.nogil_check = None
+        node.is_try_finally_in_nogil = True
+        self.visitchildren(node)
+        return node
+
+    def visit_Node(self, node):
+        if self.env_stack and self.nogil and node.nogil_check:
+            node.nogil_check(self.env_stack[-1])
         if node.outer_attrs:
             self._visit_scoped_children(node, self.nogil)
         else:
             self.visitchildren(node)
         if self.nogil:
             node.in_nogil_context = True
-        return node 
- 
- 
-class TransformBuiltinMethods(EnvTransform): 
+        return node
+
+
+class TransformBuiltinMethods(EnvTransform):
     """
     Replace Cython's own cython.* builtins by the corresponding tree nodes.
     """
- 
-    def visit_SingleAssignmentNode(self, node): 
-        if node.declaration_only: 
-            return None 
-        else: 
-            self.visitchildren(node) 
-            return node 
- 
-    def visit_AttributeNode(self, node): 
-        self.visitchildren(node) 
-        return self.visit_cython_attribute(node) 
- 
-    def visit_NameNode(self, node): 
-        return self.visit_cython_attribute(node) 
- 
-    def visit_cython_attribute(self, node): 
-        attribute = node.as_cython_attribute() 
-        if attribute: 
-            if attribute == u'compiled': 
-                node = ExprNodes.BoolNode(node.pos, value=True) 
-            elif attribute == u'__version__': 
-                from .. import __version__ as version 
-                node = ExprNodes.StringNode(node.pos, value=EncodedString(version)) 
-            elif attribute == u'NULL': 
-                node = ExprNodes.NullNode(node.pos) 
-            elif attribute in (u'set', u'frozenset', u'staticmethod'): 
-                node = ExprNodes.NameNode(node.pos, name=EncodedString(attribute), 
-                                          entry=self.current_env().builtin_scope().lookup_here(attribute)) 
-            elif PyrexTypes.parse_basic_type(attribute): 
-                pass 
-            elif self.context.cython_scope.lookup_qualified_name(attribute): 
-                pass 
-            else: 
-                error(node.pos, u"'%s' not a valid cython attribute or is being used incorrectly" % attribute) 
-        return node 
- 
-    def visit_ExecStatNode(self, node): 
-        lenv = self.current_env() 
-        self.visitchildren(node) 
-        if len(node.args) == 1: 
-            node.args.append(ExprNodes.GlobalsExprNode(node.pos)) 
-            if not lenv.is_module_scope: 
-                node.args.append( 
-                    ExprNodes.LocalsExprNode( 
-                        node.pos, self.current_scope_node(), lenv)) 
-        return node 
- 
-    def _inject_locals(self, node, func_name): 
-        # locals()/dir()/vars() builtins 
-        lenv = self.current_env() 
-        entry = lenv.lookup_here(func_name) 
-        if entry: 
-            # not the builtin 
-            return node 
-        pos = node.pos 
-        if func_name in ('locals', 'vars'): 
-            if func_name == 'locals' and len(node.args) > 0: 
-                error(self.pos, "Builtin 'locals()' called with wrong number of args, expected 0, got %d" 
-                      % len(node.args)) 
-                return node 
-            elif func_name == 'vars': 
-                if len(node.args) > 1: 
-                    error(self.pos, "Builtin 'vars()' called with wrong number of args, expected 0-1, got %d" 
-                          % len(node.args)) 
-                if len(node.args) > 0: 
-                    return node # nothing to do 
-            return ExprNodes.LocalsExprNode(pos, self.current_scope_node(), lenv) 
-        else: # dir() 
-            if len(node.args) > 1: 
-                error(self.pos, "Builtin 'dir()' called with wrong number of args, expected 0-1, got %d" 
-                      % len(node.args)) 
-            if len(node.args) > 0: 
-                # optimised in Builtin.py 
-                return node 
-            if lenv.is_py_class_scope or lenv.is_module_scope: 
-                if lenv.is_py_class_scope: 
-                    pyclass = self.current_scope_node() 
-                    locals_dict = ExprNodes.CloneNode(pyclass.dict) 
-                else: 
-                    locals_dict = ExprNodes.GlobalsExprNode(pos) 
-                return ExprNodes.SortedDictKeysNode(locals_dict) 
-            local_names = sorted(var.name for var in lenv.entries.values() if var.name) 
-            items = [ExprNodes.IdentifierStringNode(pos, value=var) 
-                     for var in local_names] 
-            return ExprNodes.ListNode(pos, args=items) 
- 
-    def visit_PrimaryCmpNode(self, node): 
-        # special case: for in/not-in test, we do not need to sort locals() 
-        self.visitchildren(node) 
-        if node.operator in 'not_in':  # in/not_in 
-            if isinstance(node.operand2, ExprNodes.SortedDictKeysNode): 
-                arg = node.operand2.arg 
-                if isinstance(arg, ExprNodes.NoneCheckNode): 
-                    arg = arg.arg 
-                node.operand2 = arg 
-        return node 
- 
-    def visit_CascadedCmpNode(self, node): 
-        return self.visit_PrimaryCmpNode(node) 
- 
-    def _inject_eval(self, node, func_name): 
-        lenv = self.current_env() 
-        entry = lenv.lookup_here(func_name) 
-        if entry or len(node.args) != 1: 
-            return node 
-        # Inject globals and locals 
-        node.args.append(ExprNodes.GlobalsExprNode(node.pos)) 
-        if not lenv.is_module_scope: 
-            node.args.append( 
-                ExprNodes.LocalsExprNode( 
-                    node.pos, self.current_scope_node(), lenv)) 
-        return node 
- 
-    def _inject_super(self, node, func_name): 
-        lenv = self.current_env() 
-        entry = lenv.lookup_here(func_name) 
-        if entry or node.args: 
-            return node 
-        # Inject no-args super 
-        def_node = self.current_scope_node() 
-        if (not isinstance(def_node, Nodes.DefNode) or not def_node.args or 
-            len(self.env_stack) < 2): 
-            return node 
-        class_node, class_scope = self.env_stack[-2] 
-        if class_scope.is_py_class_scope: 
-            def_node.requires_classobj = True 
-            class_node.class_cell.is_active = True 
-            node.args = [ 
-                ExprNodes.ClassCellNode( 
-                    node.pos, is_generator=def_node.is_generator), 
-                ExprNodes.NameNode(node.pos, name=def_node.args[0].name) 
-                ] 
-        elif class_scope.is_c_class_scope: 
-            node.args = [ 
-                ExprNodes.NameNode( 
-                    node.pos, name=class_node.scope.name, 
-                    entry=class_node.entry), 
-                ExprNodes.NameNode(node.pos, name=def_node.args[0].name) 
-                ] 
-        return node 
- 
-    def visit_SimpleCallNode(self, node): 
-        # cython.foo 
-        function = node.function.as_cython_attribute() 
-        if function: 
-            if function in InterpretCompilerDirectives.unop_method_nodes: 
-                if len(node.args) != 1: 
-                    error(node.function.pos, u"%s() takes exactly one argument" % function) 
-                else: 
-                    node = InterpretCompilerDirectives.unop_method_nodes[function]( 
-                        node.function.pos, operand=node.args[0]) 
-            elif function in InterpretCompilerDirectives.binop_method_nodes: 
-                if len(node.args) != 2: 
-                    error(node.function.pos, u"%s() takes exactly two arguments" % function) 
-                else: 
-                    node = InterpretCompilerDirectives.binop_method_nodes[function]( 
-                        node.function.pos, operand1=node.args[0], operand2=node.args[1]) 
-            elif function == u'cast': 
-                if len(node.args) != 2: 
+
+    def visit_SingleAssignmentNode(self, node):
+        if node.declaration_only:
+            return None
+        else:
+            self.visitchildren(node)
+            return node
+
+    def visit_AttributeNode(self, node):
+        self.visitchildren(node)
+        return self.visit_cython_attribute(node)
+
+    def visit_NameNode(self, node):
+        return self.visit_cython_attribute(node)
+
+    def visit_cython_attribute(self, node):
+        attribute = node.as_cython_attribute()
+        if attribute:
+            if attribute == u'compiled':
+                node = ExprNodes.BoolNode(node.pos, value=True)
+            elif attribute == u'__version__':
+                from .. import __version__ as version
+                node = ExprNodes.StringNode(node.pos, value=EncodedString(version))
+            elif attribute == u'NULL':
+                node = ExprNodes.NullNode(node.pos)
+            elif attribute in (u'set', u'frozenset', u'staticmethod'):
+                node = ExprNodes.NameNode(node.pos, name=EncodedString(attribute),
+                                          entry=self.current_env().builtin_scope().lookup_here(attribute))
+            elif PyrexTypes.parse_basic_type(attribute):
+                pass
+            elif self.context.cython_scope.lookup_qualified_name(attribute):
+                pass
+            else:
+                error(node.pos, u"'%s' not a valid cython attribute or is being used incorrectly" % attribute)
+        return node
+
+    def visit_ExecStatNode(self, node):
+        lenv = self.current_env()
+        self.visitchildren(node)
+        if len(node.args) == 1:
+            node.args.append(ExprNodes.GlobalsExprNode(node.pos))
+            if not lenv.is_module_scope:
+                node.args.append(
+                    ExprNodes.LocalsExprNode(
+                        node.pos, self.current_scope_node(), lenv))
+        return node
+
+    def _inject_locals(self, node, func_name):
+        # locals()/dir()/vars() builtins
+        lenv = self.current_env()
+        entry = lenv.lookup_here(func_name)
+        if entry:
+            # not the builtin
+            return node
+        pos = node.pos
+        if func_name in ('locals', 'vars'):
+            if func_name == 'locals' and len(node.args) > 0:
+                error(self.pos, "Builtin 'locals()' called with wrong number of args, expected 0, got %d"
+                      % len(node.args))
+                return node
+            elif func_name == 'vars':
+                if len(node.args) > 1:
+                    error(self.pos, "Builtin 'vars()' called with wrong number of args, expected 0-1, got %d"
+                          % len(node.args))
+                if len(node.args) > 0:
+                    return node # nothing to do
+            return ExprNodes.LocalsExprNode(pos, self.current_scope_node(), lenv)
+        else: # dir()
+            if len(node.args) > 1:
+                error(self.pos, "Builtin 'dir()' called with wrong number of args, expected 0-1, got %d"
+                      % len(node.args))
+            if len(node.args) > 0:
+                # optimised in Builtin.py
+                return node
+            if lenv.is_py_class_scope or lenv.is_module_scope:
+                if lenv.is_py_class_scope:
+                    pyclass = self.current_scope_node()
+                    locals_dict = ExprNodes.CloneNode(pyclass.dict)
+                else:
+                    locals_dict = ExprNodes.GlobalsExprNode(pos)
+                return ExprNodes.SortedDictKeysNode(locals_dict)
+            local_names = sorted(var.name for var in lenv.entries.values() if var.name)
+            items = [ExprNodes.IdentifierStringNode(pos, value=var)
+                     for var in local_names]
+            return ExprNodes.ListNode(pos, args=items)
+
+    def visit_PrimaryCmpNode(self, node):
+        # special case: for in/not-in test, we do not need to sort locals()
+        self.visitchildren(node)
+        if node.operator in 'not_in':  # in/not_in
+            if isinstance(node.operand2, ExprNodes.SortedDictKeysNode):
+                arg = node.operand2.arg
+                if isinstance(arg, ExprNodes.NoneCheckNode):
+                    arg = arg.arg
+                node.operand2 = arg
+        return node
+
+    def visit_CascadedCmpNode(self, node):
+        return self.visit_PrimaryCmpNode(node)
+
+    def _inject_eval(self, node, func_name):
+        lenv = self.current_env()
+        entry = lenv.lookup_here(func_name)
+        if entry or len(node.args) != 1:
+            return node
+        # Inject globals and locals
+        node.args.append(ExprNodes.GlobalsExprNode(node.pos))
+        if not lenv.is_module_scope:
+            node.args.append(
+                ExprNodes.LocalsExprNode(
+                    node.pos, self.current_scope_node(), lenv))
+        return node
+
+    def _inject_super(self, node, func_name):
+        lenv = self.current_env()
+        entry = lenv.lookup_here(func_name)
+        if entry or node.args:
+            return node
+        # Inject no-args super
+        def_node = self.current_scope_node()
+        if (not isinstance(def_node, Nodes.DefNode) or not def_node.args or
+            len(self.env_stack) < 2):
+            return node
+        class_node, class_scope = self.env_stack[-2]
+        if class_scope.is_py_class_scope:
+            def_node.requires_classobj = True
+            class_node.class_cell.is_active = True
+            node.args = [
+                ExprNodes.ClassCellNode(
+                    node.pos, is_generator=def_node.is_generator),
+                ExprNodes.NameNode(node.pos, name=def_node.args[0].name)
+                ]
+        elif class_scope.is_c_class_scope:
+            node.args = [
+                ExprNodes.NameNode(
+                    node.pos, name=class_node.scope.name,
+                    entry=class_node.entry),
+                ExprNodes.NameNode(node.pos, name=def_node.args[0].name)
+                ]
+        return node
+
+    def visit_SimpleCallNode(self, node):
+        # cython.foo
+        function = node.function.as_cython_attribute()
+        if function:
+            if function in InterpretCompilerDirectives.unop_method_nodes:
+                if len(node.args) != 1:
+                    error(node.function.pos, u"%s() takes exactly one argument" % function)
+                else:
+                    node = InterpretCompilerDirectives.unop_method_nodes[function](
+                        node.function.pos, operand=node.args[0])
+            elif function in InterpretCompilerDirectives.binop_method_nodes:
+                if len(node.args) != 2:
+                    error(node.function.pos, u"%s() takes exactly two arguments" % function)
+                else:
+                    node = InterpretCompilerDirectives.binop_method_nodes[function](
+                        node.function.pos, operand1=node.args[0], operand2=node.args[1])
+            elif function == u'cast':
+                if len(node.args) != 2:
                     error(node.function.pos,
                           u"cast() takes exactly two arguments and an optional typecheck keyword")
-                else: 
-                    type = node.args[0].analyse_as_type(self.current_env()) 
-                    if type: 
+                else:
+                    type = node.args[0].analyse_as_type(self.current_env())
+                    if type:
                         node = ExprNodes.TypecastNode(
                             node.function.pos, type=type, operand=node.args[1], typecheck=False)
-                    else: 
-                        error(node.args[0].pos, "Not a type") 
-            elif function == u'sizeof': 
-                if len(node.args) != 1: 
-                    error(node.function.pos, u"sizeof() takes exactly one argument") 
-                else: 
-                    type = node.args[0].analyse_as_type(self.current_env()) 
-                    if type: 
-                        node = ExprNodes.SizeofTypeNode(node.function.pos, arg_type=type) 
-                    else: 
-                        node = ExprNodes.SizeofVarNode(node.function.pos, operand=node.args[0]) 
-            elif function == 'cmod': 
-                if len(node.args) != 2: 
-                    error(node.function.pos, u"cmod() takes exactly two arguments") 
-                else: 
-                    node = ExprNodes.binop_node(node.function.pos, '%', node.args[0], node.args[1]) 
-                    node.cdivision = True 
-            elif function == 'cdiv': 
-                if len(node.args) != 2: 
-                    error(node.function.pos, u"cdiv() takes exactly two arguments") 
-                else: 
-                    node = ExprNodes.binop_node(node.function.pos, '/', node.args[0], node.args[1]) 
-                    node.cdivision = True 
-            elif function == u'set': 
-                node.function = ExprNodes.NameNode(node.pos, name=EncodedString('set')) 
-            elif function == u'staticmethod': 
-                node.function = ExprNodes.NameNode(node.pos, name=EncodedString('staticmethod')) 
-            elif self.context.cython_scope.lookup_qualified_name(function): 
-                pass 
-            else: 
-                error(node.function.pos, 
-                      u"'%s' not a valid cython language construct" % function) 
- 
-        self.visitchildren(node) 
- 
-        if isinstance(node, ExprNodes.SimpleCallNode) and node.function.is_name: 
-            func_name = node.function.name 
-            if func_name in ('dir', 'locals', 'vars'): 
-                return self._inject_locals(node, func_name) 
-            if func_name == 'eval': 
-                return self._inject_eval(node, func_name) 
-            if func_name == 'super': 
-                return self._inject_super(node, func_name) 
-        return node 
- 
+                    else:
+                        error(node.args[0].pos, "Not a type")
+            elif function == u'sizeof':
+                if len(node.args) != 1:
+                    error(node.function.pos, u"sizeof() takes exactly one argument")
+                else:
+                    type = node.args[0].analyse_as_type(self.current_env())
+                    if type:
+                        node = ExprNodes.SizeofTypeNode(node.function.pos, arg_type=type)
+                    else:
+                        node = ExprNodes.SizeofVarNode(node.function.pos, operand=node.args[0])
+            elif function == 'cmod':
+                if len(node.args) != 2:
+                    error(node.function.pos, u"cmod() takes exactly two arguments")
+                else:
+                    node = ExprNodes.binop_node(node.function.pos, '%', node.args[0], node.args[1])
+                    node.cdivision = True
+            elif function == 'cdiv':
+                if len(node.args) != 2:
+                    error(node.function.pos, u"cdiv() takes exactly two arguments")
+                else:
+                    node = ExprNodes.binop_node(node.function.pos, '/', node.args[0], node.args[1])
+                    node.cdivision = True
+            elif function == u'set':
+                node.function = ExprNodes.NameNode(node.pos, name=EncodedString('set'))
+            elif function == u'staticmethod':
+                node.function = ExprNodes.NameNode(node.pos, name=EncodedString('staticmethod'))
+            elif self.context.cython_scope.lookup_qualified_name(function):
+                pass
+            else:
+                error(node.function.pos,
+                      u"'%s' not a valid cython language construct" % function)
+
+        self.visitchildren(node)
+
+        if isinstance(node, ExprNodes.SimpleCallNode) and node.function.is_name:
+            func_name = node.function.name
+            if func_name in ('dir', 'locals', 'vars'):
+                return self._inject_locals(node, func_name)
+            if func_name == 'eval':
+                return self._inject_eval(node, func_name)
+            if func_name == 'super':
+                return self._inject_super(node, func_name)
+        return node
+
     def visit_GeneralCallNode(self, node):
         function = node.function.as_cython_attribute()
         if function == u'cast':
@@ -3207,308 +3207,308 @@ class TransformBuiltinMethods(EnvTransform):
                         node.function.pos, type=type, operand=args[1], typecheck=typecheck)
                 else:
                     error(args[0].pos, "Not a type")
- 
-        self.visitchildren(node)
-        return node
-
-
-class ReplaceFusedTypeChecks(VisitorTransform): 
-    """ 
-    This is not a transform in the pipeline. It is invoked on the specific 
-    versions of a cdef function with fused argument types. It filters out any 
-    type branches that don't match. e.g. 
- 
-        if fused_t is mytype: 
-            ... 
-        elif fused_t in other_fused_type: 
-            ... 
-    """ 
-    def __init__(self, local_scope): 
-        super(ReplaceFusedTypeChecks, self).__init__() 
-        self.local_scope = local_scope 
-        # defer the import until now to avoid circular import time dependencies 
-        from .Optimize import ConstantFolding 
-        self.transform = ConstantFolding(reevaluate=True) 
- 
-    def visit_IfStatNode(self, node): 
-        """ 
-        Filters out any if clauses with false compile time type check 
-        expression. 
-        """ 
-        self.visitchildren(node) 
-        return self.transform(node) 
- 
-    def visit_PrimaryCmpNode(self, node): 
+
+        self.visitchildren(node)
+        return node
+
+
+class ReplaceFusedTypeChecks(VisitorTransform):
+    """
+    This is not a transform in the pipeline. It is invoked on the specific
+    versions of a cdef function with fused argument types. It filters out any
+    type branches that don't match. e.g.
+
+        if fused_t is mytype:
+            ...
+        elif fused_t in other_fused_type:
+            ...
+    """
+    def __init__(self, local_scope):
+        super(ReplaceFusedTypeChecks, self).__init__()
+        self.local_scope = local_scope
+        # defer the import until now to avoid circular import time dependencies
+        from .Optimize import ConstantFolding
+        self.transform = ConstantFolding(reevaluate=True)
+
+    def visit_IfStatNode(self, node):
+        """
+        Filters out any if clauses with false compile time type check
+        expression.
+        """
+        self.visitchildren(node)
+        return self.transform(node)
+
+    def visit_PrimaryCmpNode(self, node):
         with Errors.local_errors(ignore=True):
           type1 = node.operand1.analyse_as_type(self.local_scope)
           type2 = node.operand2.analyse_as_type(self.local_scope)
- 
-        if type1 and type2: 
-            false_node = ExprNodes.BoolNode(node.pos, value=False) 
-            true_node = ExprNodes.BoolNode(node.pos, value=True) 
- 
-            type1 = self.specialize_type(type1, node.operand1.pos) 
-            op = node.operator 
- 
-            if op in ('is', 'is_not', '==', '!='): 
-                type2 = self.specialize_type(type2, node.operand2.pos) 
- 
-                is_same = type1.same_as(type2) 
-                eq = op in ('is', '==') 
- 
-                if (is_same and eq) or (not is_same and not eq): 
-                    return true_node 
- 
-            elif op in ('in', 'not_in'): 
-                # We have to do an instance check directly, as operand2 
-                # needs to be a fused type and not a type with a subtype 
-                # that is fused. First unpack the typedef 
-                if isinstance(type2, PyrexTypes.CTypedefType): 
-                    type2 = type2.typedef_base_type 
- 
-                if type1.is_fused: 
-                    error(node.operand1.pos, "Type is fused") 
-                elif not type2.is_fused: 
-                    error(node.operand2.pos, 
-                          "Can only use 'in' or 'not in' on a fused type") 
-                else: 
-                    types = PyrexTypes.get_specialized_types(type2) 
- 
-                    for specialized_type in types: 
-                        if type1.same_as(specialized_type): 
-                            if op == 'in': 
-                                return true_node 
-                            else: 
-                                return false_node 
- 
-                    if op == 'not_in': 
-                        return true_node 
- 
-            return false_node 
- 
-        return node 
- 
-    def specialize_type(self, type, pos): 
-        try: 
-            return type.specialize(self.local_scope.fused_to_specific) 
-        except KeyError: 
-            error(pos, "Type is not specific") 
-            return type 
- 
-    def visit_Node(self, node): 
-        self.visitchildren(node) 
-        return node 
- 
- 
-class DebugTransform(CythonTransform): 
-    """ 
-    Write debug information for this Cython module. 
-    """ 
- 
-    def __init__(self, context, options, result): 
-        super(DebugTransform, self).__init__(context) 
-        self.visited = set() 
-        # our treebuilder and debug output writer 
-        # (see Cython.Debugger.debug_output.CythonDebugWriter) 
-        self.tb = self.context.gdb_debug_outputwriter 
-        #self.c_output_file = options.output_file 
-        self.c_output_file = result.c_file 
- 
-        # Closure support, basically treat nested functions as if the AST were 
-        # never nested 
-        self.nested_funcdefs = [] 
- 
-        # tells visit_NameNode whether it should register step-into functions 
-        self.register_stepinto = False 
- 
-    def visit_ModuleNode(self, node): 
-        self.tb.module_name = node.full_module_name 
-        attrs = dict( 
-            module_name=node.full_module_name, 
-            filename=node.pos[0].filename, 
-            c_filename=self.c_output_file) 
- 
-        self.tb.start('Module', attrs) 
- 
-        # serialize functions 
-        self.tb.start('Functions') 
-        # First, serialize functions normally... 
-        self.visitchildren(node) 
- 
-        # ... then, serialize nested functions 
-        for nested_funcdef in self.nested_funcdefs: 
-            self.visit_FuncDefNode(nested_funcdef) 
- 
-        self.register_stepinto = True 
-        self.serialize_modulenode_as_function(node) 
-        self.register_stepinto = False 
-        self.tb.end('Functions') 
- 
-        # 2.3 compatibility. Serialize global variables 
-        self.tb.start('Globals') 
-        entries = {} 
- 
+
+        if type1 and type2:
+            false_node = ExprNodes.BoolNode(node.pos, value=False)
+            true_node = ExprNodes.BoolNode(node.pos, value=True)
+
+            type1 = self.specialize_type(type1, node.operand1.pos)
+            op = node.operator
+
+            if op in ('is', 'is_not', '==', '!='):
+                type2 = self.specialize_type(type2, node.operand2.pos)
+
+                is_same = type1.same_as(type2)
+                eq = op in ('is', '==')
+
+                if (is_same and eq) or (not is_same and not eq):
+                    return true_node
+
+            elif op in ('in', 'not_in'):
+                # We have to do an instance check directly, as operand2
+                # needs to be a fused type and not a type with a subtype
+                # that is fused. First unpack the typedef
+                if isinstance(type2, PyrexTypes.CTypedefType):
+                    type2 = type2.typedef_base_type
+
+                if type1.is_fused:
+                    error(node.operand1.pos, "Type is fused")
+                elif not type2.is_fused:
+                    error(node.operand2.pos,
+                          "Can only use 'in' or 'not in' on a fused type")
+                else:
+                    types = PyrexTypes.get_specialized_types(type2)
+
+                    for specialized_type in types:
+                        if type1.same_as(specialized_type):
+                            if op == 'in':
+                                return true_node
+                            else:
+                                return false_node
+
+                    if op == 'not_in':
+                        return true_node
+
+            return false_node
+
+        return node
+
+    def specialize_type(self, type, pos):
+        try:
+            return type.specialize(self.local_scope.fused_to_specific)
+        except KeyError:
+            error(pos, "Type is not specific")
+            return type
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+        return node
+
+
+class DebugTransform(CythonTransform):
+    """
+    Write debug information for this Cython module.
+    """
+
+    def __init__(self, context, options, result):
+        super(DebugTransform, self).__init__(context)
+        self.visited = set()
+        # our treebuilder and debug output writer
+        # (see Cython.Debugger.debug_output.CythonDebugWriter)
+        self.tb = self.context.gdb_debug_outputwriter
+        #self.c_output_file = options.output_file
+        self.c_output_file = result.c_file
+
+        # Closure support, basically treat nested functions as if the AST were
+        # never nested
+        self.nested_funcdefs = []
+
+        # tells visit_NameNode whether it should register step-into functions
+        self.register_stepinto = False
+
+    def visit_ModuleNode(self, node):
+        self.tb.module_name = node.full_module_name
+        attrs = dict(
+            module_name=node.full_module_name,
+            filename=node.pos[0].filename,
+            c_filename=self.c_output_file)
+
+        self.tb.start('Module', attrs)
+
+        # serialize functions
+        self.tb.start('Functions')
+        # First, serialize functions normally...
+        self.visitchildren(node)
+
+        # ... then, serialize nested functions
+        for nested_funcdef in self.nested_funcdefs:
+            self.visit_FuncDefNode(nested_funcdef)
+
+        self.register_stepinto = True
+        self.serialize_modulenode_as_function(node)
+        self.register_stepinto = False
+        self.tb.end('Functions')
+
+        # 2.3 compatibility. Serialize global variables
+        self.tb.start('Globals')
+        entries = {}
+
         for k, v in node.scope.entries.items():
-            if (v.qualified_name not in self.visited and not 
+            if (v.qualified_name not in self.visited and not
                     v.name.startswith('__pyx_') and not
                     v.type.is_cfunction and not
                     v.type.is_extension_type):
-                entries[k]= v 
- 
-        self.serialize_local_variables(entries) 
-        self.tb.end('Globals') 
-        # self.tb.end('Module') # end Module after the line number mapping in 
-        # Cython.Compiler.ModuleNode.ModuleNode._serialize_lineno_map 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        self.visited.add(node.local_scope.qualified_name) 
- 
-        if getattr(node, 'is_wrapper', False): 
-            return node 
- 
-        if self.register_stepinto: 
-            self.nested_funcdefs.append(node) 
-            return node 
- 
-        # node.entry.visibility = 'extern' 
-        if node.py_func is None: 
-            pf_cname = '' 
-        else: 
-            pf_cname = node.py_func.entry.func_cname 
- 
-        attrs = dict( 
-            name=node.entry.name or getattr(node, 'name', '<unknown>'), 
-            cname=node.entry.func_cname, 
-            pf_cname=pf_cname, 
-            qualified_name=node.local_scope.qualified_name, 
-            lineno=str(node.pos[1])) 
- 
-        self.tb.start('Function', attrs=attrs) 
- 
-        self.tb.start('Locals') 
-        self.serialize_local_variables(node.local_scope.entries) 
-        self.tb.end('Locals') 
- 
-        self.tb.start('Arguments') 
-        for arg in node.local_scope.arg_entries: 
-            self.tb.start(arg.name) 
-            self.tb.end(arg.name) 
-        self.tb.end('Arguments') 
- 
-        self.tb.start('StepIntoFunctions') 
-        self.register_stepinto = True 
-        self.visitchildren(node) 
-        self.register_stepinto = False 
-        self.tb.end('StepIntoFunctions') 
-        self.tb.end('Function') 
- 
-        return node 
- 
-    def visit_NameNode(self, node): 
-        if (self.register_stepinto and 
+                entries[k]= v
+
+        self.serialize_local_variables(entries)
+        self.tb.end('Globals')
+        # self.tb.end('Module') # end Module after the line number mapping in
+        # Cython.Compiler.ModuleNode.ModuleNode._serialize_lineno_map
+        return node
+
+    def visit_FuncDefNode(self, node):
+        self.visited.add(node.local_scope.qualified_name)
+
+        if getattr(node, 'is_wrapper', False):
+            return node
+
+        if self.register_stepinto:
+            self.nested_funcdefs.append(node)
+            return node
+
+        # node.entry.visibility = 'extern'
+        if node.py_func is None:
+            pf_cname = ''
+        else:
+            pf_cname = node.py_func.entry.func_cname
+
+        attrs = dict(
+            name=node.entry.name or getattr(node, 'name', '<unknown>'),
+            cname=node.entry.func_cname,
+            pf_cname=pf_cname,
+            qualified_name=node.local_scope.qualified_name,
+            lineno=str(node.pos[1]))
+
+        self.tb.start('Function', attrs=attrs)
+
+        self.tb.start('Locals')
+        self.serialize_local_variables(node.local_scope.entries)
+        self.tb.end('Locals')
+
+        self.tb.start('Arguments')
+        for arg in node.local_scope.arg_entries:
+            self.tb.start(arg.name)
+            self.tb.end(arg.name)
+        self.tb.end('Arguments')
+
+        self.tb.start('StepIntoFunctions')
+        self.register_stepinto = True
+        self.visitchildren(node)
+        self.register_stepinto = False
+        self.tb.end('StepIntoFunctions')
+        self.tb.end('Function')
+
+        return node
+
+    def visit_NameNode(self, node):
+        if (self.register_stepinto and
             node.type is not None and
-            node.type.is_cfunction and 
-            getattr(node, 'is_called', False) and 
-            node.entry.func_cname is not None): 
-            # don't check node.entry.in_cinclude, as 'cdef extern: ...' 
-            # declared functions are not 'in_cinclude'. 
-            # This means we will list called 'cdef' functions as 
-            # "step into functions", but this is not an issue as they will be 
-            # recognized as Cython functions anyway. 
-            attrs = dict(name=node.entry.func_cname) 
-            self.tb.start('StepIntoFunction', attrs=attrs) 
-            self.tb.end('StepIntoFunction') 
- 
-        self.visitchildren(node) 
-        return node 
- 
-    def serialize_modulenode_as_function(self, node): 
-        """ 
-        Serialize the module-level code as a function so the debugger will know 
-        it's a "relevant frame" and it will know where to set the breakpoint 
-        for 'break modulename'. 
-        """ 
-        name = node.full_module_name.rpartition('.')[-1] 
- 
-        cname_py2 = 'init' + name 
-        cname_py3 = 'PyInit_' + name 
- 
-        py2_attrs = dict( 
-            name=name, 
-            cname=cname_py2, 
-            pf_cname='', 
-            # Ignore the qualified_name, breakpoints should be set using 
-            # `cy break modulename:lineno` for module-level breakpoints. 
-            qualified_name='', 
-            lineno='1', 
-            is_initmodule_function="True", 
-        ) 
- 
-        py3_attrs = dict(py2_attrs, cname=cname_py3) 
- 
-        self._serialize_modulenode_as_function(node, py2_attrs) 
-        self._serialize_modulenode_as_function(node, py3_attrs) 
- 
-    def _serialize_modulenode_as_function(self, node, attrs): 
-        self.tb.start('Function', attrs=attrs) 
- 
-        self.tb.start('Locals') 
-        self.serialize_local_variables(node.scope.entries) 
-        self.tb.end('Locals') 
- 
-        self.tb.start('Arguments') 
-        self.tb.end('Arguments') 
- 
-        self.tb.start('StepIntoFunctions') 
-        self.register_stepinto = True 
-        self.visitchildren(node) 
-        self.register_stepinto = False 
-        self.tb.end('StepIntoFunctions') 
- 
-        self.tb.end('Function') 
- 
-    def serialize_local_variables(self, entries): 
-        for entry in entries.values(): 
-            if not entry.cname: 
-                # not a local variable 
-                continue 
-            if entry.type.is_pyobject: 
-                vartype = 'PythonObject' 
-            else: 
-                vartype = 'CObject' 
- 
-            if entry.from_closure: 
-                # We're dealing with a closure where a variable from an outer 
-                # scope is accessed, get it from the scope object. 
-                cname = '%s->%s' % (Naming.cur_scope_cname, 
-                                    entry.outer_entry.cname) 
- 
-                qname = '%s.%s.%s' % (entry.scope.outer_scope.qualified_name, 
-                                      entry.scope.name, 
-                                      entry.name) 
-            elif entry.in_closure: 
-                cname = '%s->%s' % (Naming.cur_scope_cname, 
-                                    entry.cname) 
-                qname = entry.qualified_name 
-            else: 
-                cname = entry.cname 
-                qname = entry.qualified_name 
- 
-            if not entry.pos: 
-                # this happens for variables that are not in the user's code, 
-                # e.g. for the global __builtins__, __doc__, etc. We can just 
-                # set the lineno to 0 for those. 
-                lineno = '0' 
-            else: 
-                lineno = str(entry.pos[1]) 
- 
-            attrs = dict( 
-                name=entry.name, 
-                cname=cname, 
-                qualified_name=qname, 
-                type=vartype, 
-                lineno=lineno) 
- 
-            self.tb.start('LocalVar', attrs) 
-            self.tb.end('LocalVar') 
+            node.type.is_cfunction and
+            getattr(node, 'is_called', False) and
+            node.entry.func_cname is not None):
+            # don't check node.entry.in_cinclude, as 'cdef extern: ...'
+            # declared functions are not 'in_cinclude'.
+            # This means we will list called 'cdef' functions as
+            # "step into functions", but this is not an issue as they will be
+            # recognized as Cython functions anyway.
+            attrs = dict(name=node.entry.func_cname)
+            self.tb.start('StepIntoFunction', attrs=attrs)
+            self.tb.end('StepIntoFunction')
+
+        self.visitchildren(node)
+        return node
+
+    def serialize_modulenode_as_function(self, node):
+        """
+        Serialize the module-level code as a function so the debugger will know
+        it's a "relevant frame" and it will know where to set the breakpoint
+        for 'break modulename'.
+        """
+        name = node.full_module_name.rpartition('.')[-1]
+
+        cname_py2 = 'init' + name
+        cname_py3 = 'PyInit_' + name
+
+        py2_attrs = dict(
+            name=name,
+            cname=cname_py2,
+            pf_cname='',
+            # Ignore the qualified_name, breakpoints should be set using
+            # `cy break modulename:lineno` for module-level breakpoints.
+            qualified_name='',
+            lineno='1',
+            is_initmodule_function="True",
+        )
+
+        py3_attrs = dict(py2_attrs, cname=cname_py3)
+
+        self._serialize_modulenode_as_function(node, py2_attrs)
+        self._serialize_modulenode_as_function(node, py3_attrs)
+
+    def _serialize_modulenode_as_function(self, node, attrs):
+        self.tb.start('Function', attrs=attrs)
+
+        self.tb.start('Locals')
+        self.serialize_local_variables(node.scope.entries)
+        self.tb.end('Locals')
+
+        self.tb.start('Arguments')
+        self.tb.end('Arguments')
+
+        self.tb.start('StepIntoFunctions')
+        self.register_stepinto = True
+        self.visitchildren(node)
+        self.register_stepinto = False
+        self.tb.end('StepIntoFunctions')
+
+        self.tb.end('Function')
+
+    def serialize_local_variables(self, entries):
+        for entry in entries.values():
+            if not entry.cname:
+                # not a local variable
+                continue
+            if entry.type.is_pyobject:
+                vartype = 'PythonObject'
+            else:
+                vartype = 'CObject'
+
+            if entry.from_closure:
+                # We're dealing with a closure where a variable from an outer
+                # scope is accessed, get it from the scope object.
+                cname = '%s->%s' % (Naming.cur_scope_cname,
+                                    entry.outer_entry.cname)
+
+                qname = '%s.%s.%s' % (entry.scope.outer_scope.qualified_name,
+                                      entry.scope.name,
+                                      entry.name)
+            elif entry.in_closure:
+                cname = '%s->%s' % (Naming.cur_scope_cname,
+                                    entry.cname)
+                qname = entry.qualified_name
+            else:
+                cname = entry.cname
+                qname = entry.qualified_name
+
+            if not entry.pos:
+                # this happens for variables that are not in the user's code,
+                # e.g. for the global __builtins__, __doc__, etc. We can just
+                # set the lineno to 0 for those.
+                lineno = '0'
+            else:
+                lineno = str(entry.pos[1])
+
+            attrs = dict(
+                name=entry.name,
+                cname=cname,
+                qualified_name=qname,
+                type=vartype,
+                lineno=lineno)
+
+            self.tb.start('LocalVar', attrs)
+            self.tb.end('LocalVar')
diff --git a/contrib/tools/cython/Cython/Compiler/Parsing.pxd b/contrib/tools/cython/Cython/Compiler/Parsing.pxd
index 2911c5d554..25453b39ab 100644
--- a/contrib/tools/cython/Cython/Compiler/Parsing.pxd
+++ b/contrib/tools/cython/Cython/Compiler/Parsing.pxd
@@ -1,199 +1,199 @@
-# We declare all of these here to type the first argument. 
- 
-from __future__ import absolute_import 
- 
-cimport cython 
-from .Scanning cimport PyrexScanner 
- 
-ctypedef object (*p_sub_expr_func)(PyrexScanner obj) 
- 
-# entry points 
- 
-cpdef p_module(PyrexScanner s, pxd, full_module_name, ctx=*) 
-cpdef p_code(PyrexScanner s, level= *, ctx=*) 
- 
-# internal parser states 
- 
-cdef p_ident(PyrexScanner s, message =*) 
-cdef p_ident_list(PyrexScanner s) 
- 
-cdef tuple p_binop_operator(PyrexScanner s) 
-cdef p_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_sub_expr) 
-cdef p_lambdef(PyrexScanner s, bint allow_conditional=*) 
-cdef p_lambdef_nocond(PyrexScanner s) 
-cdef p_test(PyrexScanner s) 
-cdef p_test_nocond(PyrexScanner s) 
-cdef p_or_test(PyrexScanner s) 
-cdef p_rassoc_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_subexpr) 
-cdef p_and_test(PyrexScanner s) 
-cdef p_not_test(PyrexScanner s) 
-cdef p_comparison(PyrexScanner s) 
-cdef p_test_or_starred_expr(PyrexScanner s) 
-cdef p_starred_expr(PyrexScanner s) 
-cdef p_cascaded_cmp(PyrexScanner s) 
-cdef p_cmp_op(PyrexScanner s) 
-cdef p_bit_expr(PyrexScanner s) 
-cdef p_xor_expr(PyrexScanner s) 
-cdef p_and_expr(PyrexScanner s) 
-cdef p_shift_expr(PyrexScanner s) 
-cdef p_arith_expr(PyrexScanner s) 
-cdef p_term(PyrexScanner s) 
-cdef p_factor(PyrexScanner s) 
-cdef _p_factor(PyrexScanner s) 
-cdef p_typecast(PyrexScanner s) 
-cdef p_sizeof(PyrexScanner s) 
-cdef p_yield_expression(PyrexScanner s) 
-cdef p_yield_statement(PyrexScanner s) 
+# We declare all of these here to type the first argument.
+
+from __future__ import absolute_import
+
+cimport cython
+from .Scanning cimport PyrexScanner
+
+ctypedef object (*p_sub_expr_func)(PyrexScanner obj)
+
+# entry points
+
+cpdef p_module(PyrexScanner s, pxd, full_module_name, ctx=*)
+cpdef p_code(PyrexScanner s, level= *, ctx=*)
+
+# internal parser states
+
+cdef p_ident(PyrexScanner s, message =*)
+cdef p_ident_list(PyrexScanner s)
+
+cdef tuple p_binop_operator(PyrexScanner s)
+cdef p_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_sub_expr)
+cdef p_lambdef(PyrexScanner s, bint allow_conditional=*)
+cdef p_lambdef_nocond(PyrexScanner s)
+cdef p_test(PyrexScanner s)
+cdef p_test_nocond(PyrexScanner s)
+cdef p_or_test(PyrexScanner s)
+cdef p_rassoc_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_subexpr)
+cdef p_and_test(PyrexScanner s)
+cdef p_not_test(PyrexScanner s)
+cdef p_comparison(PyrexScanner s)
+cdef p_test_or_starred_expr(PyrexScanner s)
+cdef p_starred_expr(PyrexScanner s)
+cdef p_cascaded_cmp(PyrexScanner s)
+cdef p_cmp_op(PyrexScanner s)
+cdef p_bit_expr(PyrexScanner s)
+cdef p_xor_expr(PyrexScanner s)
+cdef p_and_expr(PyrexScanner s)
+cdef p_shift_expr(PyrexScanner s)
+cdef p_arith_expr(PyrexScanner s)
+cdef p_term(PyrexScanner s)
+cdef p_factor(PyrexScanner s)
+cdef _p_factor(PyrexScanner s)
+cdef p_typecast(PyrexScanner s)
+cdef p_sizeof(PyrexScanner s)
+cdef p_yield_expression(PyrexScanner s)
+cdef p_yield_statement(PyrexScanner s)
 cdef p_async_statement(PyrexScanner s, ctx, decorators)
-cdef p_power(PyrexScanner s) 
-cdef p_new_expr(PyrexScanner s) 
-cdef p_trailer(PyrexScanner s, node1) 
-cdef p_call_parse_args(PyrexScanner s, bint allow_genexp = *) 
+cdef p_power(PyrexScanner s)
+cdef p_new_expr(PyrexScanner s)
+cdef p_trailer(PyrexScanner s, node1)
+cdef p_call_parse_args(PyrexScanner s, bint allow_genexp = *)
 cdef p_call_build_packed_args(pos, positional_args, keyword_args)
-cdef p_call(PyrexScanner s, function) 
-cdef p_index(PyrexScanner s, base) 
-cdef tuple p_subscript_list(PyrexScanner s) 
-cdef p_subscript(PyrexScanner s) 
-cdef p_slice_element(PyrexScanner s, follow_set) 
-cdef expect_ellipsis(PyrexScanner s) 
-cdef make_slice_nodes(pos, subscripts) 
-cpdef make_slice_node(pos, start, stop = *, step = *) 
-cdef p_atom(PyrexScanner s) 
-@cython.locals(value=unicode) 
-cdef p_int_literal(PyrexScanner s) 
-cdef p_name(PyrexScanner s, name) 
-cdef wrap_compile_time_constant(pos, value) 
-cdef p_cat_string_literal(PyrexScanner s) 
-cdef p_opt_string_literal(PyrexScanner s, required_type=*) 
-cdef bint check_for_non_ascii_characters(unicode string) 
-@cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint) 
-cdef p_string_literal(PyrexScanner s, kind_override=*) 
+cdef p_call(PyrexScanner s, function)
+cdef p_index(PyrexScanner s, base)
+cdef tuple p_subscript_list(PyrexScanner s)
+cdef p_subscript(PyrexScanner s)
+cdef p_slice_element(PyrexScanner s, follow_set)
+cdef expect_ellipsis(PyrexScanner s)
+cdef make_slice_nodes(pos, subscripts)
+cpdef make_slice_node(pos, start, stop = *, step = *)
+cdef p_atom(PyrexScanner s)
+@cython.locals(value=unicode)
+cdef p_int_literal(PyrexScanner s)
+cdef p_name(PyrexScanner s, name)
+cdef wrap_compile_time_constant(pos, value)
+cdef p_cat_string_literal(PyrexScanner s)
+cdef p_opt_string_literal(PyrexScanner s, required_type=*)
+cdef bint check_for_non_ascii_characters(unicode string)
+@cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
+cdef p_string_literal(PyrexScanner s, kind_override=*)
 cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s)
 cdef tuple _f_string_error_pos(pos, string, Py_ssize_t i)
 @cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, next_start=Py_ssize_t)
 cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw)
 @cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4)
 cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw)
-cdef p_list_maker(PyrexScanner s) 
-cdef p_comp_iter(PyrexScanner s, body) 
-cdef p_comp_for(PyrexScanner s, body) 
-cdef p_comp_if(PyrexScanner s, body) 
-cdef p_dict_or_set_maker(PyrexScanner s) 
-cdef p_backquote_expr(PyrexScanner s) 
-cdef p_simple_expr_list(PyrexScanner s, expr=*) 
-cdef p_test_or_starred_expr_list(PyrexScanner s, expr=*) 
-cdef p_testlist(PyrexScanner s) 
-cdef p_testlist_star_expr(PyrexScanner s) 
-cdef p_testlist_comp(PyrexScanner s) 
-cdef p_genexp(PyrexScanner s, expr) 
- 
-#------------------------------------------------------- 
-# 
-#   Statements 
-# 
-#------------------------------------------------------- 
- 
-cdef p_global_statement(PyrexScanner s) 
-cdef p_nonlocal_statement(PyrexScanner s) 
-cdef p_expression_or_assignment(PyrexScanner s) 
-cdef p_print_statement(PyrexScanner s) 
-cdef p_exec_statement(PyrexScanner s) 
-cdef p_del_statement(PyrexScanner s) 
-cdef p_pass_statement(PyrexScanner s, bint with_newline = *) 
-cdef p_break_statement(PyrexScanner s) 
-cdef p_continue_statement(PyrexScanner s) 
-cdef p_return_statement(PyrexScanner s) 
-cdef p_raise_statement(PyrexScanner s) 
-cdef p_import_statement(PyrexScanner s) 
-cdef p_from_import_statement(PyrexScanner s, bint first_statement = *) 
-cdef p_imported_name(PyrexScanner s, bint is_cimport) 
-cdef p_dotted_name(PyrexScanner s, bint as_allowed) 
-cdef p_as_name(PyrexScanner s) 
-cdef p_assert_statement(PyrexScanner s) 
-cdef p_if_statement(PyrexScanner s) 
-cdef p_if_clause(PyrexScanner s) 
-cdef p_else_clause(PyrexScanner s) 
-cdef p_while_statement(PyrexScanner s) 
+cdef p_list_maker(PyrexScanner s)
+cdef p_comp_iter(PyrexScanner s, body)
+cdef p_comp_for(PyrexScanner s, body)
+cdef p_comp_if(PyrexScanner s, body)
+cdef p_dict_or_set_maker(PyrexScanner s)
+cdef p_backquote_expr(PyrexScanner s)
+cdef p_simple_expr_list(PyrexScanner s, expr=*)
+cdef p_test_or_starred_expr_list(PyrexScanner s, expr=*)
+cdef p_testlist(PyrexScanner s)
+cdef p_testlist_star_expr(PyrexScanner s)
+cdef p_testlist_comp(PyrexScanner s)
+cdef p_genexp(PyrexScanner s, expr)
+
+#-------------------------------------------------------
+#
+#   Statements
+#
+#-------------------------------------------------------
+
+cdef p_global_statement(PyrexScanner s)
+cdef p_nonlocal_statement(PyrexScanner s)
+cdef p_expression_or_assignment(PyrexScanner s)
+cdef p_print_statement(PyrexScanner s)
+cdef p_exec_statement(PyrexScanner s)
+cdef p_del_statement(PyrexScanner s)
+cdef p_pass_statement(PyrexScanner s, bint with_newline = *)
+cdef p_break_statement(PyrexScanner s)
+cdef p_continue_statement(PyrexScanner s)
+cdef p_return_statement(PyrexScanner s)
+cdef p_raise_statement(PyrexScanner s)
+cdef p_import_statement(PyrexScanner s)
+cdef p_from_import_statement(PyrexScanner s, bint first_statement = *)
+cdef p_imported_name(PyrexScanner s, bint is_cimport)
+cdef p_dotted_name(PyrexScanner s, bint as_allowed)
+cdef p_as_name(PyrexScanner s)
+cdef p_assert_statement(PyrexScanner s)
+cdef p_if_statement(PyrexScanner s)
+cdef p_if_clause(PyrexScanner s)
+cdef p_else_clause(PyrexScanner s)
+cdef p_while_statement(PyrexScanner s)
 cdef p_for_statement(PyrexScanner s, bint is_async=*)
 cdef dict p_for_bounds(PyrexScanner s, bint allow_testlist=*, bint is_async=*)
-cdef p_for_from_relation(PyrexScanner s) 
-cdef p_for_from_step(PyrexScanner s) 
-cdef p_target(PyrexScanner s, terminator) 
-cdef p_for_target(PyrexScanner s) 
+cdef p_for_from_relation(PyrexScanner s)
+cdef p_for_from_step(PyrexScanner s)
+cdef p_target(PyrexScanner s, terminator)
+cdef p_for_target(PyrexScanner s)
 cdef p_for_iterator(PyrexScanner s, bint allow_testlist=*, bint is_async=*)
-cdef p_try_statement(PyrexScanner s) 
-cdef p_except_clause(PyrexScanner s) 
-cdef p_include_statement(PyrexScanner s, ctx) 
-cdef p_with_statement(PyrexScanner s) 
+cdef p_try_statement(PyrexScanner s)
+cdef p_except_clause(PyrexScanner s)
+cdef p_include_statement(PyrexScanner s, ctx)
+cdef p_with_statement(PyrexScanner s)
 cdef p_with_items(PyrexScanner s, bint is_async=*)
-cdef p_with_template(PyrexScanner s) 
-cdef p_simple_statement(PyrexScanner s, bint first_statement = *) 
-cdef p_simple_statement_list(PyrexScanner s, ctx, bint first_statement = *) 
-cdef p_compile_time_expr(PyrexScanner s) 
-cdef p_DEF_statement(PyrexScanner s) 
-cdef p_IF_statement(PyrexScanner s, ctx) 
-cdef p_statement(PyrexScanner s, ctx, bint first_statement = *) 
-cdef p_statement_list(PyrexScanner s, ctx, bint first_statement = *) 
-cdef p_suite(PyrexScanner s, ctx = *) 
+cdef p_with_template(PyrexScanner s)
+cdef p_simple_statement(PyrexScanner s, bint first_statement = *)
+cdef p_simple_statement_list(PyrexScanner s, ctx, bint first_statement = *)
+cdef p_compile_time_expr(PyrexScanner s)
+cdef p_DEF_statement(PyrexScanner s)
+cdef p_IF_statement(PyrexScanner s, ctx)
+cdef p_statement(PyrexScanner s, ctx, bint first_statement = *)
+cdef p_statement_list(PyrexScanner s, ctx, bint first_statement = *)
+cdef p_suite(PyrexScanner s, ctx = *)
 cdef tuple p_suite_with_docstring(PyrexScanner s, ctx, bint with_doc_only=*)
-cdef tuple _extract_docstring(node) 
-cdef p_positional_and_keyword_args(PyrexScanner s, end_sy_set, templates = *) 
- 
-cpdef p_c_base_type(PyrexScanner s, bint self_flag = *, bint nonempty = *, templates = *) 
-cdef p_calling_convention(PyrexScanner s) 
-cdef p_c_complex_base_type(PyrexScanner s, templates = *) 
-cdef p_c_simple_base_type(PyrexScanner s, bint self_flag, bint nonempty, templates = *) 
-cdef p_buffer_or_template(PyrexScanner s, base_type_node, templates) 
-cdef p_bracketed_base_type(PyrexScanner s, base_type_node, nonempty, empty) 
-cdef is_memoryviewslice_access(PyrexScanner s) 
-cdef p_memoryviewslice_access(PyrexScanner s, base_type_node) 
-cdef bint looking_at_name(PyrexScanner s) except -2 
-cdef object looking_at_expr(PyrexScanner s)# except -2 
-cdef bint looking_at_base_type(PyrexScanner s) except -2 
-cdef bint looking_at_dotted_name(PyrexScanner s) except -2 
-cdef bint looking_at_call(PyrexScanner s) except -2 
-cdef p_sign_and_longness(PyrexScanner s) 
-cdef p_opt_cname(PyrexScanner s) 
-cpdef p_c_declarator(PyrexScanner s, ctx = *, bint empty = *, bint is_type = *, bint cmethod_flag = *, 
-                   bint assignable = *, bint nonempty = *, 
-                   bint calling_convention_allowed = *) 
-cdef p_c_array_declarator(PyrexScanner s, base) 
-cdef p_c_func_declarator(PyrexScanner s, pos, ctx, base, bint cmethod_flag) 
-cdef p_c_simple_declarator(PyrexScanner s, ctx, bint empty, bint is_type, bint cmethod_flag, 
-                          bint assignable, bint nonempty) 
-cdef p_nogil(PyrexScanner s) 
-cdef p_with_gil(PyrexScanner s) 
-cdef p_exception_value_clause(PyrexScanner s) 
-cpdef p_c_arg_list(PyrexScanner s, ctx = *, bint in_pyfunc = *, bint cmethod_flag = *, 
-                   bint nonempty_declarators = *, bint kw_only = *, bint annotated = *) 
-cdef p_optional_ellipsis(PyrexScanner s) 
-cdef p_c_arg_decl(PyrexScanner s, ctx, in_pyfunc, bint cmethod_flag = *, bint nonempty = *, bint kw_only = *, bint annotated = *) 
-cdef p_api(PyrexScanner s) 
-cdef p_cdef_statement(PyrexScanner s, ctx) 
-cdef p_cdef_block(PyrexScanner s, ctx) 
-cdef p_cdef_extern_block(PyrexScanner s, pos, ctx) 
-cdef p_c_enum_definition(PyrexScanner s, pos, ctx) 
-cdef p_c_enum_line(PyrexScanner s, ctx, list items) 
-cdef p_c_enum_item(PyrexScanner s, ctx, list items) 
-cdef p_c_struct_or_union_definition(PyrexScanner s, pos, ctx) 
-cdef p_fused_definition(PyrexScanner s, pos, ctx) 
-cdef p_struct_enum(PyrexScanner s, pos, ctx) 
-cdef p_visibility(PyrexScanner s, prev_visibility) 
-cdef p_c_modifiers(PyrexScanner s) 
-cdef p_c_func_or_var_declaration(PyrexScanner s, pos, ctx) 
-cdef p_ctypedef_statement(PyrexScanner s, ctx) 
-cdef p_decorators(PyrexScanner s) 
+cdef tuple _extract_docstring(node)
+cdef p_positional_and_keyword_args(PyrexScanner s, end_sy_set, templates = *)
+
+cpdef p_c_base_type(PyrexScanner s, bint self_flag = *, bint nonempty = *, templates = *)
+cdef p_calling_convention(PyrexScanner s)
+cdef p_c_complex_base_type(PyrexScanner s, templates = *)
+cdef p_c_simple_base_type(PyrexScanner s, bint self_flag, bint nonempty, templates = *)
+cdef p_buffer_or_template(PyrexScanner s, base_type_node, templates)
+cdef p_bracketed_base_type(PyrexScanner s, base_type_node, nonempty, empty)
+cdef is_memoryviewslice_access(PyrexScanner s)
+cdef p_memoryviewslice_access(PyrexScanner s, base_type_node)
+cdef bint looking_at_name(PyrexScanner s) except -2
+cdef object looking_at_expr(PyrexScanner s)# except -2
+cdef bint looking_at_base_type(PyrexScanner s) except -2
+cdef bint looking_at_dotted_name(PyrexScanner s) except -2
+cdef bint looking_at_call(PyrexScanner s) except -2
+cdef p_sign_and_longness(PyrexScanner s)
+cdef p_opt_cname(PyrexScanner s)
+cpdef p_c_declarator(PyrexScanner s, ctx = *, bint empty = *, bint is_type = *, bint cmethod_flag = *,
+                   bint assignable = *, bint nonempty = *,
+                   bint calling_convention_allowed = *)
+cdef p_c_array_declarator(PyrexScanner s, base)
+cdef p_c_func_declarator(PyrexScanner s, pos, ctx, base, bint cmethod_flag)
+cdef p_c_simple_declarator(PyrexScanner s, ctx, bint empty, bint is_type, bint cmethod_flag,
+                          bint assignable, bint nonempty)
+cdef p_nogil(PyrexScanner s)
+cdef p_with_gil(PyrexScanner s)
+cdef p_exception_value_clause(PyrexScanner s)
+cpdef p_c_arg_list(PyrexScanner s, ctx = *, bint in_pyfunc = *, bint cmethod_flag = *,
+                   bint nonempty_declarators = *, bint kw_only = *, bint annotated = *)
+cdef p_optional_ellipsis(PyrexScanner s)
+cdef p_c_arg_decl(PyrexScanner s, ctx, in_pyfunc, bint cmethod_flag = *, bint nonempty = *, bint kw_only = *, bint annotated = *)
+cdef p_api(PyrexScanner s)
+cdef p_cdef_statement(PyrexScanner s, ctx)
+cdef p_cdef_block(PyrexScanner s, ctx)
+cdef p_cdef_extern_block(PyrexScanner s, pos, ctx)
+cdef p_c_enum_definition(PyrexScanner s, pos, ctx)
+cdef p_c_enum_line(PyrexScanner s, ctx, list items)
+cdef p_c_enum_item(PyrexScanner s, ctx, list items)
+cdef p_c_struct_or_union_definition(PyrexScanner s, pos, ctx)
+cdef p_fused_definition(PyrexScanner s, pos, ctx)
+cdef p_struct_enum(PyrexScanner s, pos, ctx)
+cdef p_visibility(PyrexScanner s, prev_visibility)
+cdef p_c_modifiers(PyrexScanner s)
+cdef p_c_func_or_var_declaration(PyrexScanner s, pos, ctx)
+cdef p_ctypedef_statement(PyrexScanner s, ctx)
+cdef p_decorators(PyrexScanner s)
 cdef _reject_cdef_modifier_in_py(PyrexScanner s, name)
 cdef p_def_statement(PyrexScanner s, list decorators=*, bint is_async_def=*)
-cdef p_varargslist(PyrexScanner s, terminator=*, bint annotated = *) 
-cdef p_py_arg_decl(PyrexScanner s, bint annotated = *) 
-cdef p_class_statement(PyrexScanner s, decorators) 
-cdef p_c_class_definition(PyrexScanner s, pos,  ctx) 
+cdef p_varargslist(PyrexScanner s, terminator=*, bint annotated = *)
+cdef p_py_arg_decl(PyrexScanner s, bint annotated = *)
+cdef p_class_statement(PyrexScanner s, decorators)
+cdef p_c_class_definition(PyrexScanner s, pos,  ctx)
 cdef tuple p_c_class_options(PyrexScanner s)
-cdef p_property_decl(PyrexScanner s) 
-cdef p_doc_string(PyrexScanner s) 
-cdef p_ignorable_statement(PyrexScanner s) 
+cdef p_property_decl(PyrexScanner s)
+cdef p_doc_string(PyrexScanner s)
+cdef p_ignorable_statement(PyrexScanner s)
 cdef dict p_compiler_directive_comments(PyrexScanner s)
 cdef p_template_definition(PyrexScanner s)
-cdef p_cpp_class_definition(PyrexScanner s, pos, ctx) 
-cdef p_cpp_class_attribute(PyrexScanner s, ctx) 
+cdef p_cpp_class_definition(PyrexScanner s, pos, ctx)
+cdef p_cpp_class_attribute(PyrexScanner s, ctx)
diff --git a/contrib/tools/cython/Cython/Compiler/Parsing.py b/contrib/tools/cython/Cython/Compiler/Parsing.py
index ac68216e0c..4d2f12a24a 100644
--- a/contrib/tools/cython/Cython/Compiler/Parsing.py
+++ b/contrib/tools/cython/Cython/Compiler/Parsing.py
@@ -1,392 +1,392 @@
-# cython: auto_cpdef=True, infer_types=True, language_level=3, py2_import=True 
-# 
-#   Parser 
-# 
- 
-from __future__ import absolute_import 
- 
-# This should be done automatically 
-import cython 
-cython.declare(Nodes=object, ExprNodes=object, EncodedString=object, 
+# cython: auto_cpdef=True, infer_types=True, language_level=3, py2_import=True
+#
+#   Parser
+#
+
+from __future__ import absolute_import
+
+# This should be done automatically
+import cython
+cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
                bytes_literal=object, StringEncoding=object,
                FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object,
-               Future=object, Options=object, error=object, warning=object, 
+               Future=object, Options=object, error=object, warning=object,
                Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object,
                re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object,
                partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint,
                _CDEF_MODIFIERS=tuple)
- 
+
 from io import StringIO
-import re 
+import re
 import sys
 from unicodedata import lookup as lookup_unicodechar, category as unicode_category
 from functools import partial, reduce
- 
+
 from .Scanning import PyrexScanner, FileSourceDescriptor, StringSourceDescriptor
-from . import Nodes 
-from . import ExprNodes 
-from . import Builtin 
-from . import StringEncoding 
+from . import Nodes
+from . import ExprNodes
+from . import Builtin
+from . import StringEncoding
 from .StringEncoding import EncodedString, bytes_literal, _unicode, _bytes
-from .ModuleNode import ModuleNode 
-from .Errors import error, warning 
-from .. import Utils 
-from . import Future 
-from . import Options 
- 
+from .ModuleNode import ModuleNode
+from .Errors import error, warning
+from .. import Utils
+from . import Future
+from . import Options
+
 _IS_PY3 = sys.version_info[0] >= 3
 _IS_2BYTE_UNICODE = sys.maxunicode == 0xffff
 _CDEF_MODIFIERS = ('inline', 'nogil', 'api')
- 
-
-class Ctx(object): 
-    #  Parsing context 
-    level = 'other' 
-    visibility = 'private' 
-    cdef_flag = 0 
-    typedef_flag = 0 
-    api = 0 
-    overridable = 0 
-    nogil = 0 
-    namespace = None 
-    templates = None 
-    allow_struct_enum_decorator = False 
- 
-    def __init__(self, **kwds): 
-        self.__dict__.update(kwds) 
- 
-    def __call__(self, **kwds): 
-        ctx = Ctx() 
-        d = ctx.__dict__ 
-        d.update(self.__dict__) 
-        d.update(kwds) 
-        return ctx 
- 
+
+
+class Ctx(object):
+    #  Parsing context
+    level = 'other'
+    visibility = 'private'
+    cdef_flag = 0
+    typedef_flag = 0
+    api = 0
+    overridable = 0
+    nogil = 0
+    namespace = None
+    templates = None
+    allow_struct_enum_decorator = False
+
+    def __init__(self, **kwds):
+        self.__dict__.update(kwds)
+
+    def __call__(self, **kwds):
+        ctx = Ctx()
+        d = ctx.__dict__
+        d.update(self.__dict__)
+        d.update(kwds)
+        return ctx
+
 
 def p_ident(s, message="Expected an identifier"):
-    if s.sy == 'IDENT': 
-        name = s.systring 
-        s.next() 
-        return name 
-    else: 
-        s.error(message) 
- 
-def p_ident_list(s): 
-    names = [] 
-    while s.sy == 'IDENT': 
-        names.append(s.systring) 
-        s.next() 
-        if s.sy != ',': 
-            break 
-        s.next() 
-    return names 
- 
-#------------------------------------------ 
-# 
-#   Expressions 
-# 
-#------------------------------------------ 
- 
-def p_binop_operator(s): 
-    pos = s.position() 
-    op = s.sy 
-    s.next() 
-    return op, pos 
- 
-def p_binop_expr(s, ops, p_sub_expr): 
-    n1 = p_sub_expr(s) 
-    while s.sy in ops: 
-        op, pos = p_binop_operator(s) 
-        n2 = p_sub_expr(s) 
-        n1 = ExprNodes.binop_node(pos, op, n1, n2) 
-        if op == '/': 
-            if Future.division in s.context.future_directives: 
-                n1.truedivision = True 
-            else: 
-                n1.truedivision = None # unknown 
-    return n1 
- 
-#lambdef: 'lambda' [varargslist] ':' test 
- 
-def p_lambdef(s, allow_conditional=True): 
-    # s.sy == 'lambda' 
-    pos = s.position() 
-    s.next() 
-    if s.sy == ':': 
-        args = [] 
-        star_arg = starstar_arg = None 
-    else: 
-        args, star_arg, starstar_arg = p_varargslist( 
-            s, terminator=':', annotated=False) 
-    s.expect(':') 
-    if allow_conditional: 
-        expr = p_test(s) 
-    else: 
-        expr = p_test_nocond(s) 
-    return ExprNodes.LambdaNode( 
-        pos, args = args, 
-        star_arg = star_arg, starstar_arg = starstar_arg, 
-        result_expr = expr) 
- 
-#lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 
- 
-def p_lambdef_nocond(s): 
-    return p_lambdef(s, allow_conditional=False) 
- 
-#test: or_test ['if' or_test 'else' test] | lambdef 
- 
-def p_test(s): 
-    if s.sy == 'lambda': 
-        return p_lambdef(s) 
-    pos = s.position() 
-    expr = p_or_test(s) 
-    if s.sy == 'if': 
-        s.next() 
-        test = p_or_test(s) 
-        s.expect('else') 
-        other = p_test(s) 
-        return ExprNodes.CondExprNode(pos, test=test, true_val=expr, false_val=other) 
-    else: 
-        return expr 
- 
-#test_nocond: or_test | lambdef_nocond 
- 
-def p_test_nocond(s): 
-    if s.sy == 'lambda': 
-        return p_lambdef_nocond(s) 
-    else: 
-        return p_or_test(s) 
- 
-#or_test: and_test ('or' and_test)* 
- 
-def p_or_test(s): 
-    return p_rassoc_binop_expr(s, ('or',), p_and_test) 
- 
-def p_rassoc_binop_expr(s, ops, p_subexpr): 
-    n1 = p_subexpr(s) 
-    if s.sy in ops: 
-        pos = s.position() 
-        op = s.sy 
-        s.next() 
-        n2 = p_rassoc_binop_expr(s, ops, p_subexpr) 
-        n1 = ExprNodes.binop_node(pos, op, n1, n2) 
-    return n1 
- 
-#and_test: not_test ('and' not_test)* 
- 
-def p_and_test(s): 
-    #return p_binop_expr(s, ('and',), p_not_test) 
-    return p_rassoc_binop_expr(s, ('and',), p_not_test) 
- 
-#not_test: 'not' not_test | comparison 
- 
-def p_not_test(s): 
-    if s.sy == 'not': 
-        pos = s.position() 
-        s.next() 
-        return ExprNodes.NotNode(pos, operand = p_not_test(s)) 
-    else: 
-        return p_comparison(s) 
- 
-#comparison: expr (comp_op expr)* 
-#comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 
- 
-def p_comparison(s): 
-    n1 = p_starred_expr(s) 
-    if s.sy in comparison_ops: 
-        pos = s.position() 
-        op = p_cmp_op(s) 
-        n2 = p_starred_expr(s) 
-        n1 = ExprNodes.PrimaryCmpNode(pos, 
-            operator = op, operand1 = n1, operand2 = n2) 
-        if s.sy in comparison_ops: 
-            n1.cascade = p_cascaded_cmp(s) 
-    return n1 
- 
-def p_test_or_starred_expr(s): 
-    if s.sy == '*': 
-        return p_starred_expr(s) 
-    else: 
-        return p_test(s) 
- 
-def p_starred_expr(s): 
-    pos = s.position() 
-    if s.sy == '*': 
-        starred = True 
-        s.next() 
-    else: 
-        starred = False 
-    expr = p_bit_expr(s) 
-    if starred: 
+    if s.sy == 'IDENT':
+        name = s.systring
+        s.next()
+        return name
+    else:
+        s.error(message)
+
+def p_ident_list(s):
+    names = []
+    while s.sy == 'IDENT':
+        names.append(s.systring)
+        s.next()
+        if s.sy != ',':
+            break
+        s.next()
+    return names
+
+#------------------------------------------
+#
+#   Expressions
+#
+#------------------------------------------
+
+def p_binop_operator(s):
+    pos = s.position()
+    op = s.sy
+    s.next()
+    return op, pos
+
+def p_binop_expr(s, ops, p_sub_expr):
+    n1 = p_sub_expr(s)
+    while s.sy in ops:
+        op, pos = p_binop_operator(s)
+        n2 = p_sub_expr(s)
+        n1 = ExprNodes.binop_node(pos, op, n1, n2)
+        if op == '/':
+            if Future.division in s.context.future_directives:
+                n1.truedivision = True
+            else:
+                n1.truedivision = None # unknown
+    return n1
+
+#lambdef: 'lambda' [varargslist] ':' test
+
+def p_lambdef(s, allow_conditional=True):
+    # s.sy == 'lambda'
+    pos = s.position()
+    s.next()
+    if s.sy == ':':
+        args = []
+        star_arg = starstar_arg = None
+    else:
+        args, star_arg, starstar_arg = p_varargslist(
+            s, terminator=':', annotated=False)
+    s.expect(':')
+    if allow_conditional:
+        expr = p_test(s)
+    else:
+        expr = p_test_nocond(s)
+    return ExprNodes.LambdaNode(
+        pos, args = args,
+        star_arg = star_arg, starstar_arg = starstar_arg,
+        result_expr = expr)
+
+#lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+
+def p_lambdef_nocond(s):
+    return p_lambdef(s, allow_conditional=False)
+
+#test: or_test ['if' or_test 'else' test] | lambdef
+
+def p_test(s):
+    if s.sy == 'lambda':
+        return p_lambdef(s)
+    pos = s.position()
+    expr = p_or_test(s)
+    if s.sy == 'if':
+        s.next()
+        test = p_or_test(s)
+        s.expect('else')
+        other = p_test(s)
+        return ExprNodes.CondExprNode(pos, test=test, true_val=expr, false_val=other)
+    else:
+        return expr
+
+#test_nocond: or_test | lambdef_nocond
+
+def p_test_nocond(s):
+    if s.sy == 'lambda':
+        return p_lambdef_nocond(s)
+    else:
+        return p_or_test(s)
+
+#or_test: and_test ('or' and_test)*
+
+def p_or_test(s):
+    return p_rassoc_binop_expr(s, ('or',), p_and_test)
+
+def p_rassoc_binop_expr(s, ops, p_subexpr):
+    n1 = p_subexpr(s)
+    if s.sy in ops:
+        pos = s.position()
+        op = s.sy
+        s.next()
+        n2 = p_rassoc_binop_expr(s, ops, p_subexpr)
+        n1 = ExprNodes.binop_node(pos, op, n1, n2)
+    return n1
+
+#and_test: not_test ('and' not_test)*
+
+def p_and_test(s):
+    #return p_binop_expr(s, ('and',), p_not_test)
+    return p_rassoc_binop_expr(s, ('and',), p_not_test)
+
+#not_test: 'not' not_test | comparison
+
+def p_not_test(s):
+    if s.sy == 'not':
+        pos = s.position()
+        s.next()
+        return ExprNodes.NotNode(pos, operand = p_not_test(s))
+    else:
+        return p_comparison(s)
+
+#comparison: expr (comp_op expr)*
+#comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+
+def p_comparison(s):
+    n1 = p_starred_expr(s)
+    if s.sy in comparison_ops:
+        pos = s.position()
+        op = p_cmp_op(s)
+        n2 = p_starred_expr(s)
+        n1 = ExprNodes.PrimaryCmpNode(pos,
+            operator = op, operand1 = n1, operand2 = n2)
+        if s.sy in comparison_ops:
+            n1.cascade = p_cascaded_cmp(s)
+    return n1
+
+def p_test_or_starred_expr(s):
+    if s.sy == '*':
+        return p_starred_expr(s)
+    else:
+        return p_test(s)
+
+def p_starred_expr(s):
+    pos = s.position()
+    if s.sy == '*':
+        starred = True
+        s.next()
+    else:
+        starred = False
+    expr = p_bit_expr(s)
+    if starred:
         expr = ExprNodes.StarredUnpackingNode(pos, expr)
-    return expr 
- 
-def p_cascaded_cmp(s): 
-    pos = s.position() 
-    op = p_cmp_op(s) 
-    n2 = p_starred_expr(s) 
-    result = ExprNodes.CascadedCmpNode(pos, 
-        operator = op, operand2 = n2) 
-    if s.sy in comparison_ops: 
-        result.cascade = p_cascaded_cmp(s) 
-    return result 
- 
-def p_cmp_op(s): 
-    if s.sy == 'not': 
-        s.next() 
-        s.expect('in') 
-        op = 'not_in' 
-    elif s.sy == 'is': 
-        s.next() 
-        if s.sy == 'not': 
-            s.next() 
-            op = 'is_not' 
-        else: 
-            op = 'is' 
-    else: 
-        op = s.sy 
-        s.next() 
-    if op == '<>': 
-        op = '!=' 
-    return op 
- 
-comparison_ops = cython.declare(set, set([ 
-    '<', '>', '==', '>=', '<=', '<>', '!=', 
-    'in', 'is', 'not' 
-])) 
- 
-#expr: xor_expr ('|' xor_expr)* 
- 
-def p_bit_expr(s): 
-    return p_binop_expr(s, ('|',), p_xor_expr) 
- 
-#xor_expr: and_expr ('^' and_expr)* 
- 
-def p_xor_expr(s): 
-    return p_binop_expr(s, ('^',), p_and_expr) 
- 
-#and_expr: shift_expr ('&' shift_expr)* 
- 
-def p_and_expr(s): 
-    return p_binop_expr(s, ('&',), p_shift_expr) 
- 
-#shift_expr: arith_expr (('<<'|'>>') arith_expr)* 
- 
-def p_shift_expr(s): 
-    return p_binop_expr(s, ('<<', '>>'), p_arith_expr) 
- 
-#arith_expr: term (('+'|'-') term)* 
- 
-def p_arith_expr(s): 
-    return p_binop_expr(s, ('+', '-'), p_term) 
- 
-#term: factor (('*'|'@'|'/'|'%'|'//') factor)* 
- 
-def p_term(s): 
-    return p_binop_expr(s, ('*', '@', '/', '%', '//'), p_factor) 
- 
-#factor: ('+'|'-'|'~'|'&'|typecast|sizeof) factor | power 
- 
-def p_factor(s): 
-    # little indirection for C-ification purposes 
-    return _p_factor(s) 
- 
-def _p_factor(s): 
-    sy = s.sy 
-    if sy in ('+', '-', '~'): 
-        op = s.sy 
-        pos = s.position() 
-        s.next() 
-        return ExprNodes.unop_node(pos, op, p_factor(s)) 
-    elif not s.in_python_file: 
-        if sy == '&': 
-            pos = s.position() 
-            s.next() 
-            arg = p_factor(s) 
-            return ExprNodes.AmpersandNode(pos, operand = arg) 
-        elif sy == "<": 
-            return p_typecast(s) 
-        elif sy == 'IDENT' and s.systring == "sizeof": 
-            return p_sizeof(s) 
-    return p_power(s) 
- 
-def p_typecast(s): 
-    # s.sy == "<" 
-    pos = s.position() 
-    s.next() 
-    base_type = p_c_base_type(s) 
-    is_memslice = isinstance(base_type, Nodes.MemoryViewSliceTypeNode) 
-    is_template = isinstance(base_type, Nodes.TemplatedTypeNode) 
-    is_const = isinstance(base_type, Nodes.CConstTypeNode) 
-    if (not is_memslice and not is_template and not is_const 
-        and base_type.name is None): 
-        s.error("Unknown type") 
-    declarator = p_c_declarator(s, empty = 1) 
-    if s.sy == '?': 
-        s.next() 
-        typecheck = 1 
-    else: 
-        typecheck = 0 
-    s.expect(">") 
-    operand = p_factor(s) 
-    if is_memslice: 
-        return ExprNodes.CythonArrayNode(pos, base_type_node=base_type, 
-                                         operand=operand) 
- 
-    return ExprNodes.TypecastNode(pos, 
-        base_type = base_type, 
-        declarator = declarator, 
-        operand = operand, 
-        typecheck = typecheck) 
- 
-def p_sizeof(s): 
-    # s.sy == ident "sizeof" 
-    pos = s.position() 
-    s.next() 
-    s.expect('(') 
-    # Here we decide if we are looking at an expression or type 
-    # If it is actually a type, but parsable as an expression, 
-    # we treat it as an expression here. 
-    if looking_at_expr(s): 
-        operand = p_test(s) 
-        node = ExprNodes.SizeofVarNode(pos, operand = operand) 
-    else: 
-        base_type = p_c_base_type(s) 
-        declarator = p_c_declarator(s, empty = 1) 
-        node = ExprNodes.SizeofTypeNode(pos, 
-            base_type = base_type, declarator = declarator) 
-    s.expect(')') 
-    return node 
- 
-
-def p_yield_expression(s): 
-    # s.sy == "yield" 
-    pos = s.position() 
-    s.next() 
-    is_yield_from = False 
-    if s.sy == 'from': 
-        is_yield_from = True 
-        s.next() 
-    if s.sy != ')' and s.sy not in statement_terminators: 
+    return expr
+
+def p_cascaded_cmp(s):
+    pos = s.position()
+    op = p_cmp_op(s)
+    n2 = p_starred_expr(s)
+    result = ExprNodes.CascadedCmpNode(pos,
+        operator = op, operand2 = n2)
+    if s.sy in comparison_ops:
+        result.cascade = p_cascaded_cmp(s)
+    return result
+
+def p_cmp_op(s):
+    if s.sy == 'not':
+        s.next()
+        s.expect('in')
+        op = 'not_in'
+    elif s.sy == 'is':
+        s.next()
+        if s.sy == 'not':
+            s.next()
+            op = 'is_not'
+        else:
+            op = 'is'
+    else:
+        op = s.sy
+        s.next()
+    if op == '<>':
+        op = '!='
+    return op
+
+comparison_ops = cython.declare(set, set([
+    '<', '>', '==', '>=', '<=', '<>', '!=',
+    'in', 'is', 'not'
+]))
+
+#expr: xor_expr ('|' xor_expr)*
+
+def p_bit_expr(s):
+    return p_binop_expr(s, ('|',), p_xor_expr)
+
+#xor_expr: and_expr ('^' and_expr)*
+
+def p_xor_expr(s):
+    return p_binop_expr(s, ('^',), p_and_expr)
+
+#and_expr: shift_expr ('&' shift_expr)*
+
+def p_and_expr(s):
+    return p_binop_expr(s, ('&',), p_shift_expr)
+
+#shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+
+def p_shift_expr(s):
+    return p_binop_expr(s, ('<<', '>>'), p_arith_expr)
+
+#arith_expr: term (('+'|'-') term)*
+
+def p_arith_expr(s):
+    return p_binop_expr(s, ('+', '-'), p_term)
+
+#term: factor (('*'|'@'|'/'|'%'|'//') factor)*
+
+def p_term(s):
+    return p_binop_expr(s, ('*', '@', '/', '%', '//'), p_factor)
+
+#factor: ('+'|'-'|'~'|'&'|typecast|sizeof) factor | power
+
+def p_factor(s):
+    # little indirection for C-ification purposes
+    return _p_factor(s)
+
+def _p_factor(s):
+    sy = s.sy
+    if sy in ('+', '-', '~'):
+        op = s.sy
+        pos = s.position()
+        s.next()
+        return ExprNodes.unop_node(pos, op, p_factor(s))
+    elif not s.in_python_file:
+        if sy == '&':
+            pos = s.position()
+            s.next()
+            arg = p_factor(s)
+            return ExprNodes.AmpersandNode(pos, operand = arg)
+        elif sy == "<":
+            return p_typecast(s)
+        elif sy == 'IDENT' and s.systring == "sizeof":
+            return p_sizeof(s)
+    return p_power(s)
+
+def p_typecast(s):
+    # s.sy == "<"
+    pos = s.position()
+    s.next()
+    base_type = p_c_base_type(s)
+    is_memslice = isinstance(base_type, Nodes.MemoryViewSliceTypeNode)
+    is_template = isinstance(base_type, Nodes.TemplatedTypeNode)
+    is_const = isinstance(base_type, Nodes.CConstTypeNode)
+    if (not is_memslice and not is_template and not is_const
+        and base_type.name is None):
+        s.error("Unknown type")
+    declarator = p_c_declarator(s, empty = 1)
+    if s.sy == '?':
+        s.next()
+        typecheck = 1
+    else:
+        typecheck = 0
+    s.expect(">")
+    operand = p_factor(s)
+    if is_memslice:
+        return ExprNodes.CythonArrayNode(pos, base_type_node=base_type,
+                                         operand=operand)
+
+    return ExprNodes.TypecastNode(pos,
+        base_type = base_type,
+        declarator = declarator,
+        operand = operand,
+        typecheck = typecheck)
+
+def p_sizeof(s):
+    # s.sy == ident "sizeof"
+    pos = s.position()
+    s.next()
+    s.expect('(')
+    # Here we decide if we are looking at an expression or type
+    # If it is actually a type, but parsable as an expression,
+    # we treat it as an expression here.
+    if looking_at_expr(s):
+        operand = p_test(s)
+        node = ExprNodes.SizeofVarNode(pos, operand = operand)
+    else:
+        base_type = p_c_base_type(s)
+        declarator = p_c_declarator(s, empty = 1)
+        node = ExprNodes.SizeofTypeNode(pos,
+            base_type = base_type, declarator = declarator)
+    s.expect(')')
+    return node
+
+
+def p_yield_expression(s):
+    # s.sy == "yield"
+    pos = s.position()
+    s.next()
+    is_yield_from = False
+    if s.sy == 'from':
+        is_yield_from = True
+        s.next()
+    if s.sy != ')' and s.sy not in statement_terminators:
         # "yield from" does not support implicit tuples, but "yield" does ("yield 1,2")
         arg = p_test(s) if is_yield_from else p_testlist(s)
-    else: 
-        if is_yield_from: 
-            s.error("'yield from' requires a source argument", 
-                    pos=pos, fatal=False) 
-        arg = None 
-    if is_yield_from: 
-        return ExprNodes.YieldFromExprNode(pos, arg=arg) 
-    else: 
-        return ExprNodes.YieldExprNode(pos, arg=arg) 
- 
-
-def p_yield_statement(s): 
-    # s.sy == "yield" 
-    yield_expr = p_yield_expression(s) 
-    return Nodes.ExprStatNode(yield_expr.pos, expr=yield_expr) 
- 
- 
+    else:
+        if is_yield_from:
+            s.error("'yield from' requires a source argument",
+                    pos=pos, fatal=False)
+        arg = None
+    if is_yield_from:
+        return ExprNodes.YieldFromExprNode(pos, arg=arg)
+    else:
+        return ExprNodes.YieldExprNode(pos, arg=arg)
+
+
+def p_yield_statement(s):
+    # s.sy == "yield"
+    yield_expr = p_yield_expression(s)
+    return Nodes.ExprStatNode(yield_expr.pos, expr=yield_expr)
+
+
 def p_async_statement(s, ctx, decorators):
     # s.sy >> 'async' ...
     if s.sy == 'def':
@@ -409,51 +409,51 @@ def p_async_statement(s, ctx, decorators):
 #power: atom_expr ('**' factor)*
 #atom_expr: ['await'] atom trailer*
 
-def p_power(s): 
-    if s.systring == 'new' and s.peek()[0] == 'IDENT': 
-        return p_new_expr(s) 
+def p_power(s):
+    if s.systring == 'new' and s.peek()[0] == 'IDENT':
+        return p_new_expr(s)
     await_pos = None
     if s.sy == 'await':
         await_pos = s.position()
         s.next()
-    n1 = p_atom(s) 
-    while s.sy in ('(', '[', '.'): 
-        n1 = p_trailer(s, n1) 
+    n1 = p_atom(s)
+    while s.sy in ('(', '[', '.'):
+        n1 = p_trailer(s, n1)
     if await_pos:
         n1 = ExprNodes.AwaitExprNode(await_pos, arg=n1)
-    if s.sy == '**': 
-        pos = s.position() 
-        s.next() 
-        n2 = p_factor(s) 
-        n1 = ExprNodes.binop_node(pos, '**', n1, n2) 
-    return n1 
- 
-
-def p_new_expr(s): 
-    # s.systring == 'new'. 
-    pos = s.position() 
-    s.next() 
-    cppclass = p_c_base_type(s) 
-    return p_call(s, ExprNodes.NewExprNode(pos, cppclass = cppclass)) 
- 
-#trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 
- 
-def p_trailer(s, node1): 
-    pos = s.position() 
-    if s.sy == '(': 
-        return p_call(s, node1) 
-    elif s.sy == '[': 
-        return p_index(s, node1) 
-    else: # s.sy == '.' 
-        s.next() 
+    if s.sy == '**':
+        pos = s.position()
+        s.next()
+        n2 = p_factor(s)
+        n1 = ExprNodes.binop_node(pos, '**', n1, n2)
+    return n1
+
+
+def p_new_expr(s):
+    # s.systring == 'new'.
+    pos = s.position()
+    s.next()
+    cppclass = p_c_base_type(s)
+    return p_call(s, ExprNodes.NewExprNode(pos, cppclass = cppclass))
+
+#trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+
+def p_trailer(s, node1):
+    pos = s.position()
+    if s.sy == '(':
+        return p_call(s, node1)
+    elif s.sy == '[':
+        return p_index(s, node1)
+    else: # s.sy == '.'
+        s.next()
         name = p_ident(s)
-        return ExprNodes.AttributeNode(pos, 
+        return ExprNodes.AttributeNode(pos,
             obj=node1, attribute=name)
- 
 
-# arglist:  argument (',' argument)* [','] 
-# argument: [test '='] test       # Really [keyword '='] test 
- 
+
+# arglist:  argument (',' argument)* [',']
+# argument: [test '='] test       # Really [keyword '='] test
+
 # since PEP 448:
 # argument: ( test [comp_for] |
 #             test '=' test |
@@ -461,58 +461,58 @@ def p_trailer(s, node1):
 #             star_expr )
 
 def p_call_parse_args(s, allow_genexp=True):
-    # s.sy == '(' 
-    pos = s.position() 
-    s.next() 
-    positional_args = [] 
-    keyword_args = [] 
+    # s.sy == '('
+    pos = s.position()
+    s.next()
+    positional_args = []
+    keyword_args = []
     starstar_seen = False
     last_was_tuple_unpack = False
     while s.sy != ')':
-        if s.sy == '*': 
+        if s.sy == '*':
             if starstar_seen:
                 s.error("Non-keyword arg following keyword arg", pos=s.position())
-            s.next() 
+            s.next()
             positional_args.append(p_test(s))
             last_was_tuple_unpack = True
         elif s.sy == '**':
             s.next()
             keyword_args.append(p_test(s))
             starstar_seen = True
-        else: 
-            arg = p_test(s) 
-            if s.sy == '=': 
-                s.next() 
-                if not arg.is_name: 
-                    s.error("Expected an identifier before '='", 
-                            pos=arg.pos) 
+        else:
+            arg = p_test(s)
+            if s.sy == '=':
+                s.next()
+                if not arg.is_name:
+                    s.error("Expected an identifier before '='",
+                            pos=arg.pos)
                 encoded_name = s.context.intern_ustring(arg.name)
-                keyword = ExprNodes.IdentifierStringNode( 
-                    arg.pos, value=encoded_name) 
-                arg = p_test(s) 
-                keyword_args.append((keyword, arg)) 
-            else: 
-                if keyword_args: 
+                keyword = ExprNodes.IdentifierStringNode(
+                    arg.pos, value=encoded_name)
+                arg = p_test(s)
+                keyword_args.append((keyword, arg))
+            else:
+                if keyword_args:
                     s.error("Non-keyword arg following keyword arg", pos=arg.pos)
                 if positional_args and not last_was_tuple_unpack:
                     positional_args[-1].append(arg)
                 else:
                     positional_args.append([arg])
                 last_was_tuple_unpack = False
-        if s.sy != ',': 
-            break 
-        s.next() 
- 
+        if s.sy != ',':
+            break
+        s.next()
+
     if s.sy in ('for', 'async'):
         if not keyword_args and not last_was_tuple_unpack:
             if len(positional_args) == 1 and len(positional_args[0]) == 1:
                 positional_args = [[p_genexp(s, positional_args[0][0])]]
-    s.expect(')') 
+    s.expect(')')
     return positional_args or [[]], keyword_args
- 
+
 
 def p_call_build_packed_args(pos, positional_args, keyword_args):
-    keyword_dict = None 
+    keyword_dict = None
 
     subtuples = [
         ExprNodes.TupleNode(pos, args=arg) if isinstance(arg, list) else ExprNodes.AsTupleNode(pos, arg=arg)
@@ -550,229 +550,229 @@ def p_call_build_packed_args(pos, positional_args, keyword_args):
                 # at least one **kwargs
                 keyword_dict = ExprNodes.MergedDictNode(pos, keyword_args=kwargs)
 
-    return arg_tuple, keyword_dict 
- 
+    return arg_tuple, keyword_dict
+
 
-def p_call(s, function): 
-    # s.sy == '(' 
-    pos = s.position() 
+def p_call(s, function):
+    # s.sy == '('
+    pos = s.position()
     positional_args, keyword_args = p_call_parse_args(s)
- 
+
     if not keyword_args and len(positional_args) == 1 and isinstance(positional_args[0], list):
         return ExprNodes.SimpleCallNode(pos, function=function, args=positional_args[0])
-    else: 
+    else:
         arg_tuple, keyword_dict = p_call_build_packed_args(pos, positional_args, keyword_args)
         return ExprNodes.GeneralCallNode(
             pos, function=function, positional_args=arg_tuple, keyword_args=keyword_dict)
- 
-
-#lambdef: 'lambda' [varargslist] ':' test 
- 
-#subscriptlist: subscript (',' subscript)* [','] 
- 
-def p_index(s, base): 
-    # s.sy == '[' 
-    pos = s.position() 
-    s.next() 
-    subscripts, is_single_value = p_subscript_list(s) 
-    if is_single_value and len(subscripts[0]) == 2: 
-        start, stop = subscripts[0] 
-        result = ExprNodes.SliceIndexNode(pos, 
-            base = base, start = start, stop = stop) 
-    else: 
-        indexes = make_slice_nodes(pos, subscripts) 
-        if is_single_value: 
-            index = indexes[0] 
-        else: 
-            index = ExprNodes.TupleNode(pos, args = indexes) 
-        result = ExprNodes.IndexNode(pos, 
-            base = base, index = index) 
-    s.expect(']') 
-    return result 
- 
-def p_subscript_list(s): 
-    is_single_value = True 
-    items = [p_subscript(s)] 
-    while s.sy == ',': 
-        is_single_value = False 
-        s.next() 
-        if s.sy == ']': 
-            break 
-        items.append(p_subscript(s)) 
-    return items, is_single_value 
- 
-#subscript: '.' '.' '.' | test | [test] ':' [test] [':' [test]] 
- 
-def p_subscript(s): 
-    # Parse a subscript and return a list of 
-    # 1, 2 or 3 ExprNodes, depending on how 
-    # many slice elements were encountered. 
-    pos = s.position() 
-    start = p_slice_element(s, (':',)) 
-    if s.sy != ':': 
-        return [start] 
-    s.next() 
-    stop = p_slice_element(s, (':', ',', ']')) 
-    if s.sy != ':': 
-        return [start, stop] 
-    s.next() 
-    step = p_slice_element(s, (':', ',', ']')) 
-    return [start, stop, step] 
- 
-def p_slice_element(s, follow_set): 
-    # Simple expression which may be missing iff 
-    # it is followed by something in follow_set. 
-    if s.sy not in follow_set: 
-        return p_test(s) 
-    else: 
-        return None 
- 
-def expect_ellipsis(s): 
-    s.expect('.') 
-    s.expect('.') 
-    s.expect('.') 
- 
-def make_slice_nodes(pos, subscripts): 
-    # Convert a list of subscripts as returned 
-    # by p_subscript_list into a list of ExprNodes, 
-    # creating SliceNodes for elements with 2 or 
-    # more components. 
-    result = [] 
-    for subscript in subscripts: 
-        if len(subscript) == 1: 
-            result.append(subscript[0]) 
-        else: 
-            result.append(make_slice_node(pos, *subscript)) 
-    return result 
- 
-def make_slice_node(pos, start, stop = None, step = None): 
-    if not start: 
-        start = ExprNodes.NoneNode(pos) 
-    if not stop: 
-        stop = ExprNodes.NoneNode(pos) 
-    if not step: 
-        step = ExprNodes.NoneNode(pos) 
-    return ExprNodes.SliceNode(pos, 
-        start = start, stop = stop, step = step) 
- 
-#atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']' | '{' [dict_or_set_maker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+ 
- 
-def p_atom(s): 
-    pos = s.position() 
-    sy = s.sy 
-    if sy == '(': 
-        s.next() 
-        if s.sy == ')': 
-            result = ExprNodes.TupleNode(pos, args = []) 
-        elif s.sy == 'yield': 
-            result = p_yield_expression(s) 
-        else: 
-            result = p_testlist_comp(s) 
-        s.expect(')') 
-        return result 
-    elif sy == '[': 
-        return p_list_maker(s) 
-    elif sy == '{': 
-        return p_dict_or_set_maker(s) 
-    elif sy == '`': 
-        return p_backquote_expr(s) 
-    elif sy == '.': 
-        expect_ellipsis(s) 
-        return ExprNodes.EllipsisNode(pos) 
-    elif sy == 'INT': 
-        return p_int_literal(s) 
-    elif sy == 'FLOAT': 
-        value = s.systring 
-        s.next() 
-        return ExprNodes.FloatNode(pos, value = value) 
-    elif sy == 'IMAG': 
-        value = s.systring[:-1] 
-        s.next() 
-        return ExprNodes.ImagNode(pos, value = value) 
-    elif sy == 'BEGIN_STRING': 
-        kind, bytes_value, unicode_value = p_cat_string_literal(s) 
-        if kind == 'c': 
-            return ExprNodes.CharNode(pos, value = bytes_value) 
-        elif kind == 'u': 
-            return ExprNodes.UnicodeNode(pos, value = unicode_value, bytes_value = bytes_value) 
-        elif kind == 'b': 
-            return ExprNodes.BytesNode(pos, value = bytes_value) 
+
+
+#lambdef: 'lambda' [varargslist] ':' test
+
+#subscriptlist: subscript (',' subscript)* [',']
+
+def p_index(s, base):
+    # s.sy == '['
+    pos = s.position()
+    s.next()
+    subscripts, is_single_value = p_subscript_list(s)
+    if is_single_value and len(subscripts[0]) == 2:
+        start, stop = subscripts[0]
+        result = ExprNodes.SliceIndexNode(pos,
+            base = base, start = start, stop = stop)
+    else:
+        indexes = make_slice_nodes(pos, subscripts)
+        if is_single_value:
+            index = indexes[0]
+        else:
+            index = ExprNodes.TupleNode(pos, args = indexes)
+        result = ExprNodes.IndexNode(pos,
+            base = base, index = index)
+    s.expect(']')
+    return result
+
+def p_subscript_list(s):
+    is_single_value = True
+    items = [p_subscript(s)]
+    while s.sy == ',':
+        is_single_value = False
+        s.next()
+        if s.sy == ']':
+            break
+        items.append(p_subscript(s))
+    return items, is_single_value
+
+#subscript: '.' '.' '.' | test | [test] ':' [test] [':' [test]]
+
+def p_subscript(s):
+    # Parse a subscript and return a list of
+    # 1, 2 or 3 ExprNodes, depending on how
+    # many slice elements were encountered.
+    pos = s.position()
+    start = p_slice_element(s, (':',))
+    if s.sy != ':':
+        return [start]
+    s.next()
+    stop = p_slice_element(s, (':', ',', ']'))
+    if s.sy != ':':
+        return [start, stop]
+    s.next()
+    step = p_slice_element(s, (':', ',', ']'))
+    return [start, stop, step]
+
+def p_slice_element(s, follow_set):
+    # Simple expression which may be missing iff
+    # it is followed by something in follow_set.
+    if s.sy not in follow_set:
+        return p_test(s)
+    else:
+        return None
+
+def expect_ellipsis(s):
+    s.expect('.')
+    s.expect('.')
+    s.expect('.')
+
+def make_slice_nodes(pos, subscripts):
+    # Convert a list of subscripts as returned
+    # by p_subscript_list into a list of ExprNodes,
+    # creating SliceNodes for elements with 2 or
+    # more components.
+    result = []
+    for subscript in subscripts:
+        if len(subscript) == 1:
+            result.append(subscript[0])
+        else:
+            result.append(make_slice_node(pos, *subscript))
+    return result
+
+def make_slice_node(pos, start, stop = None, step = None):
+    if not start:
+        start = ExprNodes.NoneNode(pos)
+    if not stop:
+        stop = ExprNodes.NoneNode(pos)
+    if not step:
+        step = ExprNodes.NoneNode(pos)
+    return ExprNodes.SliceNode(pos,
+        start = start, stop = stop, step = step)
+
+#atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']' | '{' [dict_or_set_maker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
+
+def p_atom(s):
+    pos = s.position()
+    sy = s.sy
+    if sy == '(':
+        s.next()
+        if s.sy == ')':
+            result = ExprNodes.TupleNode(pos, args = [])
+        elif s.sy == 'yield':
+            result = p_yield_expression(s)
+        else:
+            result = p_testlist_comp(s)
+        s.expect(')')
+        return result
+    elif sy == '[':
+        return p_list_maker(s)
+    elif sy == '{':
+        return p_dict_or_set_maker(s)
+    elif sy == '`':
+        return p_backquote_expr(s)
+    elif sy == '.':
+        expect_ellipsis(s)
+        return ExprNodes.EllipsisNode(pos)
+    elif sy == 'INT':
+        return p_int_literal(s)
+    elif sy == 'FLOAT':
+        value = s.systring
+        s.next()
+        return ExprNodes.FloatNode(pos, value = value)
+    elif sy == 'IMAG':
+        value = s.systring[:-1]
+        s.next()
+        return ExprNodes.ImagNode(pos, value = value)
+    elif sy == 'BEGIN_STRING':
+        kind, bytes_value, unicode_value = p_cat_string_literal(s)
+        if kind == 'c':
+            return ExprNodes.CharNode(pos, value = bytes_value)
+        elif kind == 'u':
+            return ExprNodes.UnicodeNode(pos, value = unicode_value, bytes_value = bytes_value)
+        elif kind == 'b':
+            return ExprNodes.BytesNode(pos, value = bytes_value)
         elif kind == 'f':
             return ExprNodes.JoinedStrNode(pos, values = unicode_value)
         elif kind == '':
             return ExprNodes.StringNode(pos, value = bytes_value, unicode_value = unicode_value)
-        else: 
+        else:
             s.error("invalid string kind '%s'" % kind)
-    elif sy == 'IDENT': 
+    elif sy == 'IDENT':
         name = s.systring
-        if name == "None": 
+        if name == "None":
             result = ExprNodes.NoneNode(pos)
-        elif name == "True": 
+        elif name == "True":
             result = ExprNodes.BoolNode(pos, value=True)
-        elif name == "False": 
+        elif name == "False":
             result = ExprNodes.BoolNode(pos, value=False)
-        elif name == "NULL" and not s.in_python_file: 
+        elif name == "NULL" and not s.in_python_file:
             result = ExprNodes.NullNode(pos)
-        else: 
+        else:
             result = p_name(s, name)
         s.next()
         return result
-    else: 
-        s.error("Expected an identifier or literal") 
- 
-def p_int_literal(s): 
-    pos = s.position() 
-    value = s.systring 
-    s.next() 
-    unsigned = "" 
-    longness = "" 
-    while value[-1] in u"UuLl": 
-        if value[-1] in u"Ll": 
-            longness += "L" 
-        else: 
-            unsigned += "U" 
-        value = value[:-1] 
-    # '3L' is ambiguous in Py2 but not in Py3.  '3U' and '3LL' are 
-    # illegal in Py2 Python files.  All suffixes are illegal in Py3 
-    # Python files. 
-    is_c_literal = None 
-    if unsigned: 
-        is_c_literal = True 
-    elif longness: 
-        if longness == 'LL' or s.context.language_level >= 3: 
-            is_c_literal = True 
-    if s.in_python_file: 
-        if is_c_literal: 
-            error(pos, "illegal integer literal syntax in Python source file") 
-        is_c_literal = False 
-    return ExprNodes.IntNode(pos, 
-                             is_c_literal = is_c_literal, 
-                             value = value, 
-                             unsigned = unsigned, 
-                             longness = longness) 
- 
- 
-def p_name(s, name): 
-    pos = s.position() 
-    if not s.compile_time_expr and name in s.compile_time_env: 
-        value = s.compile_time_env.lookup_here(name) 
-        node = wrap_compile_time_constant(pos, value) 
-        if node is not None: 
-            return node 
-    return ExprNodes.NameNode(pos, name=name) 
- 
- 
-def wrap_compile_time_constant(pos, value): 
-    rep = repr(value) 
-    if value is None: 
-        return ExprNodes.NoneNode(pos) 
-    elif value is Ellipsis: 
-        return ExprNodes.EllipsisNode(pos) 
-    elif isinstance(value, bool): 
-        return ExprNodes.BoolNode(pos, value=value) 
-    elif isinstance(value, int): 
+    else:
+        s.error("Expected an identifier or literal")
+
+def p_int_literal(s):
+    pos = s.position()
+    value = s.systring
+    s.next()
+    unsigned = ""
+    longness = ""
+    while value[-1] in u"UuLl":
+        if value[-1] in u"Ll":
+            longness += "L"
+        else:
+            unsigned += "U"
+        value = value[:-1]
+    # '3L' is ambiguous in Py2 but not in Py3.  '3U' and '3LL' are
+    # illegal in Py2 Python files.  All suffixes are illegal in Py3
+    # Python files.
+    is_c_literal = None
+    if unsigned:
+        is_c_literal = True
+    elif longness:
+        if longness == 'LL' or s.context.language_level >= 3:
+            is_c_literal = True
+    if s.in_python_file:
+        if is_c_literal:
+            error(pos, "illegal integer literal syntax in Python source file")
+        is_c_literal = False
+    return ExprNodes.IntNode(pos,
+                             is_c_literal = is_c_literal,
+                             value = value,
+                             unsigned = unsigned,
+                             longness = longness)
+
+
+def p_name(s, name):
+    pos = s.position()
+    if not s.compile_time_expr and name in s.compile_time_env:
+        value = s.compile_time_env.lookup_here(name)
+        node = wrap_compile_time_constant(pos, value)
+        if node is not None:
+            return node
+    return ExprNodes.NameNode(pos, name=name)
+
+
+def wrap_compile_time_constant(pos, value):
+    rep = repr(value)
+    if value is None:
+        return ExprNodes.NoneNode(pos)
+    elif value is Ellipsis:
+        return ExprNodes.EllipsisNode(pos)
+    elif isinstance(value, bool):
+        return ExprNodes.BoolNode(pos, value=value)
+    elif isinstance(value, int):
         return ExprNodes.IntNode(pos, value=rep, constant_result=value)
-    elif isinstance(value, float): 
+    elif isinstance(value, float):
         return ExprNodes.FloatNode(pos, value=rep, constant_result=value)
     elif isinstance(value, complex):
         node = ExprNodes.ImagNode(pos, value=repr(value.imag), constant_result=complex(0.0, value.imag))
@@ -783,43 +783,43 @@ def wrap_compile_time_constant(pos, value):
                 pos, '+', ExprNodes.FloatNode(pos, value=repr(value.real), constant_result=value.real), node,
                 constant_result=value)
         return node
-    elif isinstance(value, _unicode): 
-        return ExprNodes.UnicodeNode(pos, value=EncodedString(value)) 
-    elif isinstance(value, _bytes): 
+    elif isinstance(value, _unicode):
+        return ExprNodes.UnicodeNode(pos, value=EncodedString(value))
+    elif isinstance(value, _bytes):
         bvalue = bytes_literal(value, 'ascii')  # actually: unknown encoding, but BytesLiteral requires one
         return ExprNodes.BytesNode(pos, value=bvalue, constant_result=value)
-    elif isinstance(value, tuple): 
-        args = [wrap_compile_time_constant(pos, arg) 
-                for arg in value] 
-        if None not in args: 
-            return ExprNodes.TupleNode(pos, args=args) 
-        else: 
-            # error already reported 
-            return None 
+    elif isinstance(value, tuple):
+        args = [wrap_compile_time_constant(pos, arg)
+                for arg in value]
+        if None not in args:
+            return ExprNodes.TupleNode(pos, args=args)
+        else:
+            # error already reported
+            return None
     elif not _IS_PY3 and isinstance(value, long):
         return ExprNodes.IntNode(pos, value=rep.rstrip('L'), constant_result=value)
-    error(pos, "Invalid type for compile-time constant: %r (type %s)" 
-               % (value, value.__class__.__name__)) 
-    return None 
- 
- 
-def p_cat_string_literal(s): 
-    # A sequence of one or more adjacent string literals. 
-    # Returns (kind, bytes_value, unicode_value) 
+    error(pos, "Invalid type for compile-time constant: %r (type %s)"
+               % (value, value.__class__.__name__))
+    return None
+
+
+def p_cat_string_literal(s):
+    # A sequence of one or more adjacent string literals.
+    # Returns (kind, bytes_value, unicode_value)
     # where kind in ('b', 'c', 'u', 'f', '')
     pos = s.position()
-    kind, bytes_value, unicode_value = p_string_literal(s) 
-    if kind == 'c' or s.sy != 'BEGIN_STRING': 
-        return kind, bytes_value, unicode_value 
+    kind, bytes_value, unicode_value = p_string_literal(s)
+    if kind == 'c' or s.sy != 'BEGIN_STRING':
+        return kind, bytes_value, unicode_value
     bstrings, ustrings, positions = [bytes_value], [unicode_value], [pos]
-    bytes_value = unicode_value = None 
-    while s.sy == 'BEGIN_STRING': 
-        pos = s.position() 
-        next_kind, next_bytes_value, next_unicode_value = p_string_literal(s) 
-        if next_kind == 'c': 
-            error(pos, "Cannot concatenate char literal with another string or char literal") 
+    bytes_value = unicode_value = None
+    while s.sy == 'BEGIN_STRING':
+        pos = s.position()
+        next_kind, next_bytes_value, next_unicode_value = p_string_literal(s)
+        if next_kind == 'c':
+            error(pos, "Cannot concatenate char literal with another string or char literal")
             continue
-        elif next_kind != kind: 
+        elif next_kind != kind:
             # concatenating f strings and normal strings is allowed and leads to an f string
             if set([kind, next_kind]) in (set(['f', 'u']), set(['f', ''])):
                 kind = 'f'
@@ -830,11 +830,11 @@ def p_cat_string_literal(s):
         bstrings.append(next_bytes_value)
         ustrings.append(next_unicode_value)
         positions.append(pos)
-    # join and rewrap the partial literals 
-    if kind in ('b', 'c', '') or kind == 'u' and None not in bstrings: 
-        # Py3 enforced unicode literals are parsed as bytes/unicode combination 
+    # join and rewrap the partial literals
+    if kind in ('b', 'c', '') or kind == 'u' and None not in bstrings:
+        # Py3 enforced unicode literals are parsed as bytes/unicode combination
         bytes_value = bytes_literal(StringEncoding.join_bytes(bstrings), s.source_encoding)
-    if kind in ('u', ''): 
+    if kind in ('u', ''):
         unicode_value = EncodedString(u''.join([u for u in ustrings if u is not None]))
     if kind == 'f':
         unicode_value = []
@@ -844,10 +844,10 @@ def p_cat_string_literal(s):
             else:
                 # non-f-string concatenated into the f-string
                 unicode_value.append(ExprNodes.UnicodeNode(pos, value=EncodedString(u)))
-    return kind, bytes_value, unicode_value 
- 
+    return kind, bytes_value, unicode_value
+
 
-def p_opt_string_literal(s, required_type='u'): 
+def p_opt_string_literal(s, required_type='u'):
     if s.sy != 'BEGIN_STRING':
         return None
     pos = s.position()
@@ -858,29 +858,29 @@ def p_opt_string_literal(s, required_type='u'):
         return unicode_value
     elif required_type == 'b':
         return bytes_value
-    else: 
+    else:
         s.error("internal parser configuration error")
- 
 
-def check_for_non_ascii_characters(string): 
-    for c in string: 
-        if c >= u'\x80': 
-            return True 
-    return False 
- 
 
-def p_string_literal(s, kind_override=None): 
-    # A single string or char literal.  Returns (kind, bvalue, uvalue) 
+def check_for_non_ascii_characters(string):
+    for c in string:
+        if c >= u'\x80':
+            return True
+    return False
+
+
+def p_string_literal(s, kind_override=None):
+    # A single string or char literal.  Returns (kind, bvalue, uvalue)
     # where kind in ('b', 'c', 'u', 'f', '').  The 'bvalue' is the source
-    # code byte sequence of the string literal, 'uvalue' is the 
-    # decoded Unicode string.  Either of the two may be None depending 
-    # on the 'kind' of string, only unprefixed strings have both 
+    # code byte sequence of the string literal, 'uvalue' is the
+    # decoded Unicode string.  Either of the two may be None depending
+    # on the 'kind' of string, only unprefixed strings have both
     # representations. In f-strings, the uvalue is a list of the Unicode
     # strings and f-string expressions that make up the f-string.
- 
-    # s.sy == 'BEGIN_STRING' 
-    pos = s.position() 
-    is_python3_source = s.context.language_level >= 3 
+
+    # s.sy == 'BEGIN_STRING'
+    pos = s.position()
+    is_python3_source = s.context.language_level >= 3
     has_non_ascii_literal_characters = False
     string_start_pos = (pos[0], pos[1], pos[2] + len(s.systring))
     kind_string = s.systring.rstrip('"\'').lower()
@@ -910,66 +910,66 @@ def p_string_literal(s, kind_override=None):
     elif 'u' in kind_string:
         kind = 'u'
     else:
-        kind = '' 
+        kind = ''
 
-    if kind == '' and kind_override is None and Future.unicode_literals in s.context.future_directives: 
-        chars = StringEncoding.StrLiteralBuilder(s.source_encoding) 
-        kind = 'u' 
-    else: 
-        if kind_override is not None and kind_override in 'ub': 
-            kind = kind_override 
+    if kind == '' and kind_override is None and Future.unicode_literals in s.context.future_directives:
+        chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
+        kind = 'u'
+    else:
+        if kind_override is not None and kind_override in 'ub':
+            kind = kind_override
         if kind in ('u', 'f'):  # f-strings are scanned exactly like Unicode literals, but are parsed further later
-            chars = StringEncoding.UnicodeLiteralBuilder() 
-        elif kind == '': 
-            chars = StringEncoding.StrLiteralBuilder(s.source_encoding) 
-        else: 
-            chars = StringEncoding.BytesLiteralBuilder(s.source_encoding) 
- 
-    while 1: 
-        s.next() 
-        sy = s.sy 
-        systr = s.systring 
+            chars = StringEncoding.UnicodeLiteralBuilder()
+        elif kind == '':
+            chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
+        else:
+            chars = StringEncoding.BytesLiteralBuilder(s.source_encoding)
+
+    while 1:
+        s.next()
+        sy = s.sy
+        systr = s.systring
         # print "p_string_literal: sy =", sy, repr(s.systring) ###
-        if sy == 'CHARS': 
-            chars.append(systr) 
+        if sy == 'CHARS':
+            chars.append(systr)
             if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
                 has_non_ascii_literal_characters = True
-        elif sy == 'ESCAPE': 
+        elif sy == 'ESCAPE':
             # in Py2, 'ur' raw unicode strings resolve unicode escapes but nothing else
             if is_raw and (is_python3_source or kind != 'u' or systr[1] not in u'Uu'):
-                chars.append(systr) 
+                chars.append(systr)
                 if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
                     has_non_ascii_literal_characters = True
-            else: 
+            else:
                 _append_escape_sequence(kind, chars, systr, s)
-        elif sy == 'NEWLINE': 
-            chars.append(u'\n') 
-        elif sy == 'END_STRING': 
-            break 
-        elif sy == 'EOF': 
-            s.error("Unclosed string literal", pos=pos) 
-        else: 
+        elif sy == 'NEWLINE':
+            chars.append(u'\n')
+        elif sy == 'END_STRING':
+            break
+        elif sy == 'EOF':
+            s.error("Unclosed string literal", pos=pos)
+        else:
             s.error("Unexpected token %r:%r in string literal" % (
                 sy, s.systring))
- 
-    if kind == 'c': 
-        unicode_value = None 
-        bytes_value = chars.getchar() 
-        if len(bytes_value) != 1: 
-            error(pos, u"invalid character literal: %r" % bytes_value) 
-    else: 
-        bytes_value, unicode_value = chars.getstrings() 
+
+    if kind == 'c':
+        unicode_value = None
+        bytes_value = chars.getchar()
+        if len(bytes_value) != 1:
+            error(pos, u"invalid character literal: %r" % bytes_value)
+    else:
+        bytes_value, unicode_value = chars.getstrings()
         if (has_non_ascii_literal_characters
                 and is_python3_source and Future.unicode_literals in s.context.future_directives):
-            # Python 3 forbids literal non-ASCII characters in byte strings 
+            # Python 3 forbids literal non-ASCII characters in byte strings
             if kind == 'b':
                 s.error("bytes can only contain ASCII literal characters.", pos=pos)
-            bytes_value = None 
+            bytes_value = None
     if kind == 'f':
         unicode_value = p_f_string(s, unicode_value, string_start_pos, is_raw='r' in kind_string)
-    s.next() 
-    return (kind, bytes_value, unicode_value) 
- 
+    s.next()
+    return (kind, bytes_value, unicode_value)
+
 
 def _append_escape_sequence(kind, builder, escape_sequence, s):
     c = escape_sequence[1]
@@ -1215,50 +1215,50 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
 # since PEP 448:
 # list_display  ::=     "[" [listmaker] "]"
 # listmaker     ::=     (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
-# comp_iter     ::=     comp_for | comp_if 
+# comp_iter     ::=     comp_for | comp_if
 # comp_for      ::=     ["async"] "for" expression_list "in" testlist [comp_iter]
 # comp_if       ::=     "if" test [comp_iter]
- 
-def p_list_maker(s): 
-    # s.sy == '[' 
-    pos = s.position() 
-    s.next() 
-    if s.sy == ']': 
-        s.expect(']') 
+
+def p_list_maker(s):
+    # s.sy == '['
+    pos = s.position()
+    s.next()
+    if s.sy == ']':
+        s.expect(']')
         return ExprNodes.ListNode(pos, args=[])
 
     expr = p_test_or_starred_expr(s)
     if s.sy in ('for', 'async'):
         if expr.is_starred:
             s.error("iterable unpacking cannot be used in comprehension")
-        append = ExprNodes.ComprehensionAppendNode(pos, expr=expr) 
-        loop = p_comp_for(s, append) 
-        s.expect(']') 
-        return ExprNodes.ComprehensionNode( 
+        append = ExprNodes.ComprehensionAppendNode(pos, expr=expr)
+        loop = p_comp_for(s, append)
+        s.expect(']')
+        return ExprNodes.ComprehensionNode(
             pos, loop=loop, append=append, type=Builtin.list_type,
-            # list comprehensions leak their loop variable in Py2 
+            # list comprehensions leak their loop variable in Py2
             has_local_scope=s.context.language_level >= 3)
 
     # (merged) list literal
     if s.sy == ',':
         s.next()
         exprs = p_test_or_starred_expr_list(s, expr)
-    else: 
+    else:
         exprs = [expr]
     s.expect(']')
     return ExprNodes.ListNode(pos, args=exprs)
- 
 
-def p_comp_iter(s, body): 
+
+def p_comp_iter(s, body):
     if s.sy in ('for', 'async'):
-        return p_comp_for(s, body) 
-    elif s.sy == 'if': 
-        return p_comp_if(s, body) 
-    else: 
-        # insert the 'append' operation into the loop 
-        return body 
- 
-def p_comp_for(s, body): 
+        return p_comp_for(s, body)
+    elif s.sy == 'if':
+        return p_comp_if(s, body)
+    else:
+        # insert the 'append' operation into the loop
+        return body
+
+def p_comp_for(s, body):
     pos = s.position()
     # [async] for ...
     is_async = False
@@ -1266,35 +1266,35 @@ def p_comp_for(s, body):
         is_async = True
         s.next()
 
-    # s.sy == 'for' 
+    # s.sy == 'for'
     s.expect('for')
     kw = p_for_bounds(s, allow_testlist=False, is_async=is_async)
     kw.update(else_clause=None, body=p_comp_iter(s, body), is_async=is_async)
-    return Nodes.ForStatNode(pos, **kw) 
- 
-def p_comp_if(s, body): 
-    # s.sy == 'if' 
-    pos = s.position() 
-    s.next() 
-    test = p_test_nocond(s) 
-    return Nodes.IfStatNode(pos, 
-        if_clauses = [Nodes.IfClauseNode(pos, condition = test, 
-                                         body = p_comp_iter(s, body))], 
-        else_clause = None ) 
- 
- 
+    return Nodes.ForStatNode(pos, **kw)
+
+def p_comp_if(s, body):
+    # s.sy == 'if'
+    pos = s.position()
+    s.next()
+    test = p_test_nocond(s)
+    return Nodes.IfStatNode(pos,
+        if_clauses = [Nodes.IfClauseNode(pos, condition = test,
+                                         body = p_comp_iter(s, body))],
+        else_clause = None )
+
+
 # since PEP 448:
 #dictorsetmaker: ( ((test ':' test | '**' expr)
 #                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
 #                  ((test | star_expr)
 #                   (comp_for | (',' (test | star_expr))* [','])) )
 
-def p_dict_or_set_maker(s): 
-    # s.sy == '{' 
-    pos = s.position() 
-    s.next() 
-    if s.sy == '}': 
-        s.next() 
+def p_dict_or_set_maker(s):
+    # s.sy == '{'
+    pos = s.position()
+    s.next()
+    if s.sy == '}':
+        s.next()
         return ExprNodes.DictNode(pos, key_value_pairs=[])
 
     parts = []
@@ -1308,7 +1308,7 @@ def p_dict_or_set_maker(s):
             elif target_type != len(s.sy):
                 s.error("unexpected %sitem found in %s literal" % (
                     s.sy, 'set' if target_type == 1 else 'dict'))
-            s.next() 
+            s.next()
             if s.sy == '*':
                 s.error("expected expression, found '*'")
             item = p_starred_expr(s)
@@ -1332,8 +1332,8 @@ def p_dict_or_set_maker(s):
 
         if s.sy == ',':
             s.next()
-            if s.sy == '}': 
-                break 
+            if s.sy == '}':
+                break
         else:
             break
 
@@ -1349,10 +1349,10 @@ def p_dict_or_set_maker(s):
             else:
                 comprehension_type = Builtin.set_type
                 append = ExprNodes.ComprehensionAppendNode(item.pos, expr=item)
-            loop = p_comp_for(s, append) 
-            s.expect('}') 
+            loop = p_comp_for(s, append)
+            s.expect('}')
             return ExprNodes.ComprehensionNode(pos, loop=loop, append=append, type=comprehension_type)
-        else: 
+        else:
             # syntax error, try to find a good error message
             if len(parts) == 1 and not isinstance(parts[0], list):
                 s.error("iterable unpacking cannot be used in comprehension")
@@ -1379,7 +1379,7 @@ def p_dict_or_set_maker(s):
         if len(items) == 1 and items[0].is_set_literal:
             return items[0]
         return ExprNodes.MergedSequenceNode(pos, args=items, type=Builtin.set_type)
-    else: 
+    else:
         # (merged) dict literal
         items = []
         dict_items = []
@@ -1396,305 +1396,305 @@ def p_dict_or_set_maker(s):
         if len(items) == 1 and items[0].is_dict_literal:
             return items[0]
         return ExprNodes.MergedDictNode(pos, keyword_args=items, reject_duplicates=False)
- 
-
-# NOTE: no longer in Py3 :) 
-def p_backquote_expr(s): 
-    # s.sy == '`' 
-    pos = s.position() 
-    s.next() 
-    args = [p_test(s)] 
-    while s.sy == ',': 
-        s.next() 
-        args.append(p_test(s)) 
-    s.expect('`') 
-    if len(args) == 1: 
-        arg = args[0] 
-    else: 
-        arg = ExprNodes.TupleNode(pos, args = args) 
-    return ExprNodes.BackquoteNode(pos, arg = arg) 
- 
-def p_simple_expr_list(s, expr=None): 
-    exprs = expr is not None and [expr] or [] 
-    while s.sy not in expr_terminators: 
-        exprs.append( p_test(s) ) 
-        if s.sy != ',': 
-            break 
-        s.next() 
-    return exprs 
- 
-
-def p_test_or_starred_expr_list(s, expr=None): 
-    exprs = expr is not None and [expr] or [] 
-    while s.sy not in expr_terminators: 
+
+
+# NOTE: no longer in Py3 :)
+def p_backquote_expr(s):
+    # s.sy == '`'
+    pos = s.position()
+    s.next()
+    args = [p_test(s)]
+    while s.sy == ',':
+        s.next()
+        args.append(p_test(s))
+    s.expect('`')
+    if len(args) == 1:
+        arg = args[0]
+    else:
+        arg = ExprNodes.TupleNode(pos, args = args)
+    return ExprNodes.BackquoteNode(pos, arg = arg)
+
+def p_simple_expr_list(s, expr=None):
+    exprs = expr is not None and [expr] or []
+    while s.sy not in expr_terminators:
+        exprs.append( p_test(s) )
+        if s.sy != ',':
+            break
+        s.next()
+    return exprs
+
+
+def p_test_or_starred_expr_list(s, expr=None):
+    exprs = expr is not None and [expr] or []
+    while s.sy not in expr_terminators:
         exprs.append(p_test_or_starred_expr(s))
-        if s.sy != ',': 
-            break 
-        s.next() 
-    return exprs 
- 
- 
-#testlist: test (',' test)* [','] 
- 
-def p_testlist(s): 
-    pos = s.position() 
-    expr = p_test(s) 
-    if s.sy == ',': 
-        s.next() 
-        exprs = p_simple_expr_list(s, expr) 
-        return ExprNodes.TupleNode(pos, args = exprs) 
-    else: 
-        return expr 
- 
-# testlist_star_expr: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 
- 
-def p_testlist_star_expr(s): 
-    pos = s.position() 
-    expr = p_test_or_starred_expr(s) 
-    if s.sy == ',': 
-        s.next() 
-        exprs = p_test_or_starred_expr_list(s, expr) 
-        return ExprNodes.TupleNode(pos, args = exprs) 
-    else: 
-        return expr 
- 
-# testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 
- 
-def p_testlist_comp(s): 
-    pos = s.position() 
-    expr = p_test_or_starred_expr(s) 
-    if s.sy == ',': 
-        s.next() 
-        exprs = p_test_or_starred_expr_list(s, expr) 
-        return ExprNodes.TupleNode(pos, args = exprs) 
+        if s.sy != ',':
+            break
+        s.next()
+    return exprs
+
+
+#testlist: test (',' test)* [',']
+
+def p_testlist(s):
+    pos = s.position()
+    expr = p_test(s)
+    if s.sy == ',':
+        s.next()
+        exprs = p_simple_expr_list(s, expr)
+        return ExprNodes.TupleNode(pos, args = exprs)
+    else:
+        return expr
+
+# testlist_star_expr: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+
+def p_testlist_star_expr(s):
+    pos = s.position()
+    expr = p_test_or_starred_expr(s)
+    if s.sy == ',':
+        s.next()
+        exprs = p_test_or_starred_expr_list(s, expr)
+        return ExprNodes.TupleNode(pos, args = exprs)
+    else:
+        return expr
+
+# testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+
+def p_testlist_comp(s):
+    pos = s.position()
+    expr = p_test_or_starred_expr(s)
+    if s.sy == ',':
+        s.next()
+        exprs = p_test_or_starred_expr_list(s, expr)
+        return ExprNodes.TupleNode(pos, args = exprs)
     elif s.sy in ('for', 'async'):
-        return p_genexp(s, expr) 
-    else: 
-        return expr 
- 
-def p_genexp(s, expr): 
+        return p_genexp(s, expr)
+    else:
+        return expr
+
+def p_genexp(s, expr):
     # s.sy == 'async' | 'for'
-    loop = p_comp_for(s, Nodes.ExprStatNode( 
-        expr.pos, expr = ExprNodes.YieldExprNode(expr.pos, arg=expr))) 
-    return ExprNodes.GeneratorExpressionNode(expr.pos, loop=loop) 
- 
-expr_terminators = cython.declare(set, set([ 
-    ')', ']', '}', ':', '=', 'NEWLINE'])) 
- 
-
-#------------------------------------------------------- 
-# 
-#   Statements 
-# 
-#------------------------------------------------------- 
- 
-def p_global_statement(s): 
-    # assume s.sy == 'global' 
-    pos = s.position() 
-    s.next() 
-    names = p_ident_list(s) 
-    return Nodes.GlobalNode(pos, names = names) 
- 
-
-def p_nonlocal_statement(s): 
-    pos = s.position() 
-    s.next() 
-    names = p_ident_list(s) 
-    return Nodes.NonlocalNode(pos, names = names) 
- 
-
-def p_expression_or_assignment(s): 
+    loop = p_comp_for(s, Nodes.ExprStatNode(
+        expr.pos, expr = ExprNodes.YieldExprNode(expr.pos, arg=expr)))
+    return ExprNodes.GeneratorExpressionNode(expr.pos, loop=loop)
+
+expr_terminators = cython.declare(set, set([
+    ')', ']', '}', ':', '=', 'NEWLINE']))
+
+
+#-------------------------------------------------------
+#
+#   Statements
+#
+#-------------------------------------------------------
+
+def p_global_statement(s):
+    # assume s.sy == 'global'
+    pos = s.position()
+    s.next()
+    names = p_ident_list(s)
+    return Nodes.GlobalNode(pos, names = names)
+
+
+def p_nonlocal_statement(s):
+    pos = s.position()
+    s.next()
+    names = p_ident_list(s)
+    return Nodes.NonlocalNode(pos, names = names)
+
+
+def p_expression_or_assignment(s):
     expr = p_testlist_star_expr(s)
     if s.sy == ':' and (expr.is_name or expr.is_subscript or expr.is_attribute):
         s.next()
         expr.annotation = p_test(s)
     if s.sy == '=' and expr.is_starred:
-        # This is a common enough error to make when learning Cython to let 
-        # it fail as early as possible and give a very clear error message. 
-        s.error("a starred assignment target must be in a list or tuple" 
-                " - maybe you meant to use an index assignment: var[0] = ...", 
+        # This is a common enough error to make when learning Cython to let
+        # it fail as early as possible and give a very clear error message.
+        s.error("a starred assignment target must be in a list or tuple"
+                " - maybe you meant to use an index assignment: var[0] = ...",
                 pos=expr.pos)
     expr_list = [expr]
-    while s.sy == '=': 
-        s.next() 
-        if s.sy == 'yield': 
-            expr = p_yield_expression(s) 
-        else: 
-            expr = p_testlist_star_expr(s) 
-        expr_list.append(expr) 
-    if len(expr_list) == 1: 
+    while s.sy == '=':
+        s.next()
+        if s.sy == 'yield':
+            expr = p_yield_expression(s)
+        else:
+            expr = p_testlist_star_expr(s)
+        expr_list.append(expr)
+    if len(expr_list) == 1:
         if re.match(r"([-+*/%^&|]|<<|>>|\*\*|//|@)=", s.sy):
-            lhs = expr_list[0] 
-            if isinstance(lhs, ExprNodes.SliceIndexNode): 
-                # implementation requires IndexNode 
-                lhs = ExprNodes.IndexNode( 
-                    lhs.pos, 
-                    base=lhs.base, 
-                    index=make_slice_node(lhs.pos, lhs.start, lhs.stop)) 
+            lhs = expr_list[0]
+            if isinstance(lhs, ExprNodes.SliceIndexNode):
+                # implementation requires IndexNode
+                lhs = ExprNodes.IndexNode(
+                    lhs.pos,
+                    base=lhs.base,
+                    index=make_slice_node(lhs.pos, lhs.start, lhs.stop))
             elif not isinstance(lhs, (ExprNodes.AttributeNode, ExprNodes.IndexNode, ExprNodes.NameNode)):
-                error(lhs.pos, "Illegal operand for inplace operation.") 
-            operator = s.sy[:-1] 
-            s.next() 
-            if s.sy == 'yield': 
-                rhs = p_yield_expression(s) 
-            else: 
-                rhs = p_testlist(s) 
+                error(lhs.pos, "Illegal operand for inplace operation.")
+            operator = s.sy[:-1]
+            s.next()
+            if s.sy == 'yield':
+                rhs = p_yield_expression(s)
+            else:
+                rhs = p_testlist(s)
             return Nodes.InPlaceAssignmentNode(lhs.pos, operator=operator, lhs=lhs, rhs=rhs)
-        expr = expr_list[0] 
-        return Nodes.ExprStatNode(expr.pos, expr=expr) 
- 
-    rhs = expr_list[-1] 
-    if len(expr_list) == 2: 
+        expr = expr_list[0]
+        return Nodes.ExprStatNode(expr.pos, expr=expr)
+
+    rhs = expr_list[-1]
+    if len(expr_list) == 2:
         return Nodes.SingleAssignmentNode(rhs.pos, lhs=expr_list[0], rhs=rhs)
-    else: 
+    else:
         return Nodes.CascadedAssignmentNode(rhs.pos, lhs_list=expr_list[:-1], rhs=rhs)
- 
-
-def p_print_statement(s): 
-    # s.sy == 'print' 
-    pos = s.position() 
-    ends_with_comma = 0 
-    s.next() 
-    if s.sy == '>>': 
-        s.next() 
-        stream = p_test(s) 
-        if s.sy == ',': 
-            s.next() 
-            ends_with_comma = s.sy in ('NEWLINE', 'EOF') 
-    else: 
-        stream = None 
-    args = [] 
-    if s.sy not in ('NEWLINE', 'EOF'): 
-        args.append(p_test(s)) 
-        while s.sy == ',': 
-            s.next() 
-            if s.sy in ('NEWLINE', 'EOF'): 
-                ends_with_comma = 1 
-                break 
-            args.append(p_test(s)) 
+
+
+def p_print_statement(s):
+    # s.sy == 'print'
+    pos = s.position()
+    ends_with_comma = 0
+    s.next()
+    if s.sy == '>>':
+        s.next()
+        stream = p_test(s)
+        if s.sy == ',':
+            s.next()
+            ends_with_comma = s.sy in ('NEWLINE', 'EOF')
+    else:
+        stream = None
+    args = []
+    if s.sy not in ('NEWLINE', 'EOF'):
+        args.append(p_test(s))
+        while s.sy == ',':
+            s.next()
+            if s.sy in ('NEWLINE', 'EOF'):
+                ends_with_comma = 1
+                break
+            args.append(p_test(s))
     arg_tuple = ExprNodes.TupleNode(pos, args=args)
-    return Nodes.PrintStatNode(pos, 
+    return Nodes.PrintStatNode(pos,
         arg_tuple=arg_tuple, stream=stream,
         append_newline=not ends_with_comma)
- 
-
-def p_exec_statement(s): 
-    # s.sy == 'exec' 
-    pos = s.position() 
-    s.next() 
-    code = p_bit_expr(s) 
-    if isinstance(code, ExprNodes.TupleNode): 
-        # Py3 compatibility syntax 
-        tuple_variant = True 
-        args = code.args 
-        if len(args) not in (2, 3): 
-            s.error("expected tuple of length 2 or 3, got length %d" % len(args), 
-                    pos=pos, fatal=False) 
-            args = [code] 
-    else: 
-        tuple_variant = False 
-        args = [code] 
-    if s.sy == 'in': 
-        if tuple_variant: 
-            s.error("tuple variant of exec does not support additional 'in' arguments", 
-                    fatal=False) 
-        s.next() 
-        args.append(p_test(s)) 
-        if s.sy == ',': 
-            s.next() 
-            args.append(p_test(s)) 
-    return Nodes.ExecStatNode(pos, args=args) 
- 
-def p_del_statement(s): 
-    # s.sy == 'del' 
-    pos = s.position() 
-    s.next() 
-    # FIXME: 'exprlist' in Python 
-    args = p_simple_expr_list(s) 
-    return Nodes.DelStatNode(pos, args = args) 
- 
-def p_pass_statement(s, with_newline = 0): 
-    pos = s.position() 
-    s.expect('pass') 
-    if with_newline: 
-        s.expect_newline("Expected a newline", ignore_semicolon=True) 
-    return Nodes.PassStatNode(pos) 
- 
-def p_break_statement(s): 
-    # s.sy == 'break' 
-    pos = s.position() 
-    s.next() 
-    return Nodes.BreakStatNode(pos) 
- 
-def p_continue_statement(s): 
-    # s.sy == 'continue' 
-    pos = s.position() 
-    s.next() 
-    return Nodes.ContinueStatNode(pos) 
- 
-def p_return_statement(s): 
-    # s.sy == 'return' 
-    pos = s.position() 
-    s.next() 
-    if s.sy not in statement_terminators: 
-        value = p_testlist(s) 
-    else: 
-        value = None 
-    return Nodes.ReturnStatNode(pos, value = value) 
- 
-def p_raise_statement(s): 
-    # s.sy == 'raise' 
-    pos = s.position() 
-    s.next() 
-    exc_type = None 
-    exc_value = None 
-    exc_tb = None 
-    cause = None 
-    if s.sy not in statement_terminators: 
-        exc_type = p_test(s) 
-        if s.sy == ',': 
-            s.next() 
-            exc_value = p_test(s) 
-            if s.sy == ',': 
-                s.next() 
-                exc_tb = p_test(s) 
-        elif s.sy == 'from': 
-            s.next() 
-            cause = p_test(s) 
-    if exc_type or exc_value or exc_tb: 
-        return Nodes.RaiseStatNode(pos, 
-            exc_type = exc_type, 
-            exc_value = exc_value, 
-            exc_tb = exc_tb, 
-            cause = cause) 
-    else: 
-        return Nodes.ReraiseStatNode(pos) 
- 
-
-def p_import_statement(s): 
-    # s.sy in ('import', 'cimport') 
-    pos = s.position() 
-    kind = s.sy 
-    s.next() 
+
+
+def p_exec_statement(s):
+    # s.sy == 'exec'
+    pos = s.position()
+    s.next()
+    code = p_bit_expr(s)
+    if isinstance(code, ExprNodes.TupleNode):
+        # Py3 compatibility syntax
+        tuple_variant = True
+        args = code.args
+        if len(args) not in (2, 3):
+            s.error("expected tuple of length 2 or 3, got length %d" % len(args),
+                    pos=pos, fatal=False)
+            args = [code]
+    else:
+        tuple_variant = False
+        args = [code]
+    if s.sy == 'in':
+        if tuple_variant:
+            s.error("tuple variant of exec does not support additional 'in' arguments",
+                    fatal=False)
+        s.next()
+        args.append(p_test(s))
+        if s.sy == ',':
+            s.next()
+            args.append(p_test(s))
+    return Nodes.ExecStatNode(pos, args=args)
+
+def p_del_statement(s):
+    # s.sy == 'del'
+    pos = s.position()
+    s.next()
+    # FIXME: 'exprlist' in Python
+    args = p_simple_expr_list(s)
+    return Nodes.DelStatNode(pos, args = args)
+
+def p_pass_statement(s, with_newline = 0):
+    pos = s.position()
+    s.expect('pass')
+    if with_newline:
+        s.expect_newline("Expected a newline", ignore_semicolon=True)
+    return Nodes.PassStatNode(pos)
+
+def p_break_statement(s):
+    # s.sy == 'break'
+    pos = s.position()
+    s.next()
+    return Nodes.BreakStatNode(pos)
+
+def p_continue_statement(s):
+    # s.sy == 'continue'
+    pos = s.position()
+    s.next()
+    return Nodes.ContinueStatNode(pos)
+
+def p_return_statement(s):
+    # s.sy == 'return'
+    pos = s.position()
+    s.next()
+    if s.sy not in statement_terminators:
+        value = p_testlist(s)
+    else:
+        value = None
+    return Nodes.ReturnStatNode(pos, value = value)
+
+def p_raise_statement(s):
+    # s.sy == 'raise'
+    pos = s.position()
+    s.next()
+    exc_type = None
+    exc_value = None
+    exc_tb = None
+    cause = None
+    if s.sy not in statement_terminators:
+        exc_type = p_test(s)
+        if s.sy == ',':
+            s.next()
+            exc_value = p_test(s)
+            if s.sy == ',':
+                s.next()
+                exc_tb = p_test(s)
+        elif s.sy == 'from':
+            s.next()
+            cause = p_test(s)
+    if exc_type or exc_value or exc_tb:
+        return Nodes.RaiseStatNode(pos,
+            exc_type = exc_type,
+            exc_value = exc_value,
+            exc_tb = exc_tb,
+            cause = cause)
+    else:
+        return Nodes.ReraiseStatNode(pos)
+
+
+def p_import_statement(s):
+    # s.sy in ('import', 'cimport')
+    pos = s.position()
+    kind = s.sy
+    s.next()
     items = [p_dotted_name(s, as_allowed=1)]
-    while s.sy == ',': 
-        s.next() 
+    while s.sy == ',':
+        s.next()
         items.append(p_dotted_name(s, as_allowed=1))
-    stats = [] 
+    stats = []
     is_absolute = Future.absolute_import in s.context.future_directives
-    for pos, target_name, dotted_name, as_name in items: 
-        if kind == 'cimport': 
+    for pos, target_name, dotted_name, as_name in items:
+        if kind == 'cimport':
             stat = Nodes.CImportStatNode(
                 pos,
                 module_name=dotted_name,
                 as_name=as_name,
                 is_absolute=is_absolute)
-        else: 
-            if as_name and "." in dotted_name: 
+        else:
+            if as_name and "." in dotted_name:
                 name_list = ExprNodes.ListNode(pos, args=[
                     ExprNodes.IdentifierStringNode(pos, value=s.context.intern_ustring("*"))])
-            else: 
-                name_list = None 
+            else:
+                name_list = None
             stat = Nodes.SingleAssignmentNode(
                 pos,
                 lhs=ExprNodes.NameNode(pos, name=as_name or target_name),
@@ -1703,346 +1703,346 @@ def p_import_statement(s):
                     module_name=ExprNodes.IdentifierStringNode(pos, value=dotted_name),
                     level=0 if is_absolute else None,
                     name_list=name_list))
-        stats.append(stat) 
+        stats.append(stat)
     return Nodes.StatListNode(pos, stats=stats)
- 
-
-def p_from_import_statement(s, first_statement = 0): 
-    # s.sy == 'from' 
-    pos = s.position() 
-    s.next() 
-    if s.sy == '.': 
-        # count relative import level 
-        level = 0 
-        while s.sy == '.': 
-            level += 1 
-            s.next() 
-    else: 
-        level = None 
-    if level is not None and s.sy in ('import', 'cimport'): 
-        # we are dealing with "from .. import foo, bar" 
+
+
+def p_from_import_statement(s, first_statement = 0):
+    # s.sy == 'from'
+    pos = s.position()
+    s.next()
+    if s.sy == '.':
+        # count relative import level
+        level = 0
+        while s.sy == '.':
+            level += 1
+            s.next()
+    else:
+        level = None
+    if level is not None and s.sy in ('import', 'cimport'):
+        # we are dealing with "from .. import foo, bar"
         dotted_name_pos, dotted_name = s.position(), s.context.intern_ustring('')
-    else: 
-        if level is None and Future.absolute_import in s.context.future_directives: 
-            level = 0 
-        (dotted_name_pos, _, dotted_name, _) = p_dotted_name(s, as_allowed=False) 
-    if s.sy not in ('import', 'cimport'): 
-        s.error("Expected 'import' or 'cimport'") 
-    kind = s.sy 
-    s.next() 
- 
-    is_cimport = kind == 'cimport' 
-    is_parenthesized = False 
-    if s.sy == '*': 
+    else:
+        if level is None and Future.absolute_import in s.context.future_directives:
+            level = 0
+        (dotted_name_pos, _, dotted_name, _) = p_dotted_name(s, as_allowed=False)
+    if s.sy not in ('import', 'cimport'):
+        s.error("Expected 'import' or 'cimport'")
+    kind = s.sy
+    s.next()
+
+    is_cimport = kind == 'cimport'
+    is_parenthesized = False
+    if s.sy == '*':
         imported_names = [(s.position(), s.context.intern_ustring("*"), None, None)]
-        s.next() 
-    else: 
-        if s.sy == '(': 
-            is_parenthesized = True 
-            s.next() 
-        imported_names = [p_imported_name(s, is_cimport)] 
-    while s.sy == ',': 
-        s.next() 
-        if is_parenthesized and s.sy == ')': 
-            break 
-        imported_names.append(p_imported_name(s, is_cimport)) 
-    if is_parenthesized: 
-        s.expect(')') 
-    if dotted_name == '__future__': 
-        if not first_statement: 
-            s.error("from __future__ imports must occur at the beginning of the file") 
-        elif level: 
-            s.error("invalid syntax") 
-        else: 
-            for (name_pos, name, as_name, kind) in imported_names: 
-                if name == "braces": 
-                    s.error("not a chance", name_pos) 
-                    break 
-                try: 
-                    directive = getattr(Future, name) 
-                except AttributeError: 
-                    s.error("future feature %s is not defined" % name, name_pos) 
-                    break 
-                s.context.future_directives.add(directive) 
-        return Nodes.PassStatNode(pos) 
-    elif kind == 'cimport': 
-        return Nodes.FromCImportStatNode( 
-            pos, module_name=dotted_name, 
-            relative_level=level, 
-            imported_names=imported_names) 
-    else: 
-        imported_name_strings = [] 
-        items = [] 
-        for (name_pos, name, as_name, kind) in imported_names: 
-            imported_name_strings.append( 
+        s.next()
+    else:
+        if s.sy == '(':
+            is_parenthesized = True
+            s.next()
+        imported_names = [p_imported_name(s, is_cimport)]
+    while s.sy == ',':
+        s.next()
+        if is_parenthesized and s.sy == ')':
+            break
+        imported_names.append(p_imported_name(s, is_cimport))
+    if is_parenthesized:
+        s.expect(')')
+    if dotted_name == '__future__':
+        if not first_statement:
+            s.error("from __future__ imports must occur at the beginning of the file")
+        elif level:
+            s.error("invalid syntax")
+        else:
+            for (name_pos, name, as_name, kind) in imported_names:
+                if name == "braces":
+                    s.error("not a chance", name_pos)
+                    break
+                try:
+                    directive = getattr(Future, name)
+                except AttributeError:
+                    s.error("future feature %s is not defined" % name, name_pos)
+                    break
+                s.context.future_directives.add(directive)
+        return Nodes.PassStatNode(pos)
+    elif kind == 'cimport':
+        return Nodes.FromCImportStatNode(
+            pos, module_name=dotted_name,
+            relative_level=level,
+            imported_names=imported_names)
+    else:
+        imported_name_strings = []
+        items = []
+        for (name_pos, name, as_name, kind) in imported_names:
+            imported_name_strings.append(
                 ExprNodes.IdentifierStringNode(name_pos, value=name))
-            items.append( 
+            items.append(
                 (name, ExprNodes.NameNode(name_pos, name=as_name or name)))
-        import_list = ExprNodes.ListNode( 
+        import_list = ExprNodes.ListNode(
             imported_names[0][0], args=imported_name_strings)
-        return Nodes.FromImportStatNode(pos, 
-            module = ExprNodes.ImportNode(dotted_name_pos, 
-                module_name = ExprNodes.IdentifierStringNode(pos, value = dotted_name), 
-                level = level, 
-                name_list = import_list), 
-            items = items) 
- 
- 
+        return Nodes.FromImportStatNode(pos,
+            module = ExprNodes.ImportNode(dotted_name_pos,
+                module_name = ExprNodes.IdentifierStringNode(pos, value = dotted_name),
+                level = level,
+                name_list = import_list),
+            items = items)
+
+
 imported_name_kinds = cython.declare(set, set(['class', 'struct', 'union']))
 
-def p_imported_name(s, is_cimport): 
-    pos = s.position() 
-    kind = None 
-    if is_cimport and s.systring in imported_name_kinds: 
-        kind = s.systring 
-        s.next() 
-    name = p_ident(s) 
-    as_name = p_as_name(s) 
-    return (pos, name, as_name, kind) 
- 
-
-def p_dotted_name(s, as_allowed): 
-    pos = s.position() 
-    target_name = p_ident(s) 
-    as_name = None 
-    names = [target_name] 
-    while s.sy == '.': 
-        s.next() 
-        names.append(p_ident(s)) 
-    if as_allowed: 
-        as_name = p_as_name(s) 
+def p_imported_name(s, is_cimport):
+    pos = s.position()
+    kind = None
+    if is_cimport and s.systring in imported_name_kinds:
+        kind = s.systring
+        s.next()
+    name = p_ident(s)
+    as_name = p_as_name(s)
+    return (pos, name, as_name, kind)
+
+
+def p_dotted_name(s, as_allowed):
+    pos = s.position()
+    target_name = p_ident(s)
+    as_name = None
+    names = [target_name]
+    while s.sy == '.':
+        s.next()
+        names.append(p_ident(s))
+    if as_allowed:
+        as_name = p_as_name(s)
     return (pos, target_name, s.context.intern_ustring(u'.'.join(names)), as_name)
- 
-
-def p_as_name(s): 
-    if s.sy == 'IDENT' and s.systring == 'as': 
-        s.next() 
-        return p_ident(s) 
-    else: 
-        return None 
- 
-
-def p_assert_statement(s): 
-    # s.sy == 'assert' 
-    pos = s.position() 
-    s.next() 
-    cond = p_test(s) 
-    if s.sy == ',': 
-        s.next() 
-        value = p_test(s) 
-    else: 
-        value = None 
-    return Nodes.AssertStatNode(pos, cond = cond, value = value) 
- 
-
-statement_terminators = cython.declare(set, set([';', 'NEWLINE', 'EOF'])) 
- 
-def p_if_statement(s): 
-    # s.sy == 'if' 
-    pos = s.position() 
-    s.next() 
-    if_clauses = [p_if_clause(s)] 
-    while s.sy == 'elif': 
-        s.next() 
-        if_clauses.append(p_if_clause(s)) 
-    else_clause = p_else_clause(s) 
-    return Nodes.IfStatNode(pos, 
-        if_clauses = if_clauses, else_clause = else_clause) 
- 
-def p_if_clause(s): 
-    pos = s.position() 
-    test = p_test(s) 
-    body = p_suite(s) 
-    return Nodes.IfClauseNode(pos, 
-        condition = test, body = body) 
- 
-def p_else_clause(s): 
-    if s.sy == 'else': 
-        s.next() 
-        return p_suite(s) 
-    else: 
-        return None 
- 
-def p_while_statement(s): 
-    # s.sy == 'while' 
-    pos = s.position() 
-    s.next() 
-    test = p_test(s) 
-    body = p_suite(s) 
-    else_clause = p_else_clause(s) 
-    return Nodes.WhileStatNode(pos, 
-        condition = test, body = body, 
-        else_clause = else_clause) 
- 
+
+
+def p_as_name(s):
+    if s.sy == 'IDENT' and s.systring == 'as':
+        s.next()
+        return p_ident(s)
+    else:
+        return None
+
+
+def p_assert_statement(s):
+    # s.sy == 'assert'
+    pos = s.position()
+    s.next()
+    cond = p_test(s)
+    if s.sy == ',':
+        s.next()
+        value = p_test(s)
+    else:
+        value = None
+    return Nodes.AssertStatNode(pos, cond = cond, value = value)
+
+
+statement_terminators = cython.declare(set, set([';', 'NEWLINE', 'EOF']))
+
+def p_if_statement(s):
+    # s.sy == 'if'
+    pos = s.position()
+    s.next()
+    if_clauses = [p_if_clause(s)]
+    while s.sy == 'elif':
+        s.next()
+        if_clauses.append(p_if_clause(s))
+    else_clause = p_else_clause(s)
+    return Nodes.IfStatNode(pos,
+        if_clauses = if_clauses, else_clause = else_clause)
+
+def p_if_clause(s):
+    pos = s.position()
+    test = p_test(s)
+    body = p_suite(s)
+    return Nodes.IfClauseNode(pos,
+        condition = test, body = body)
+
+def p_else_clause(s):
+    if s.sy == 'else':
+        s.next()
+        return p_suite(s)
+    else:
+        return None
+
+def p_while_statement(s):
+    # s.sy == 'while'
+    pos = s.position()
+    s.next()
+    test = p_test(s)
+    body = p_suite(s)
+    else_clause = p_else_clause(s)
+    return Nodes.WhileStatNode(pos,
+        condition = test, body = body,
+        else_clause = else_clause)
+
 
 def p_for_statement(s, is_async=False):
-    # s.sy == 'for' 
-    pos = s.position() 
-    s.next() 
+    # s.sy == 'for'
+    pos = s.position()
+    s.next()
     kw = p_for_bounds(s, allow_testlist=True, is_async=is_async)
-    body = p_suite(s) 
-    else_clause = p_else_clause(s) 
+    body = p_suite(s)
+    else_clause = p_else_clause(s)
     kw.update(body=body, else_clause=else_clause, is_async=is_async)
-    return Nodes.ForStatNode(pos, **kw) 
- 
+    return Nodes.ForStatNode(pos, **kw)
+
 
 def p_for_bounds(s, allow_testlist=True, is_async=False):
-    target = p_for_target(s) 
-    if s.sy == 'in': 
-        s.next() 
+    target = p_for_target(s)
+    if s.sy == 'in':
+        s.next()
         iterator = p_for_iterator(s, allow_testlist, is_async=is_async)
         return dict(target=target, iterator=iterator)
     elif not s.in_python_file and not is_async:
-        if s.sy == 'from': 
-            s.next() 
-            bound1 = p_bit_expr(s) 
-        else: 
-            # Support shorter "for a <= x < b" syntax 
-            bound1, target = target, None 
-        rel1 = p_for_from_relation(s) 
-        name2_pos = s.position() 
-        name2 = p_ident(s) 
-        rel2_pos = s.position() 
-        rel2 = p_for_from_relation(s) 
-        bound2 = p_bit_expr(s) 
-        step = p_for_from_step(s) 
-        if target is None: 
-            target = ExprNodes.NameNode(name2_pos, name = name2) 
-        else: 
-            if not target.is_name: 
-                error(target.pos, 
-                    "Target of for-from statement must be a variable name") 
-            elif name2 != target.name: 
-                error(name2_pos, 
-                    "Variable name in for-from range does not match target") 
-        if rel1[0] != rel2[0]: 
-            error(rel2_pos, 
-                "Relation directions in for-from do not match") 
-        return dict(target = target, 
-                    bound1 = bound1, 
-                    relation1 = rel1, 
-                    relation2 = rel2, 
-                    bound2 = bound2, 
-                    step = step, 
-                    ) 
-    else: 
-        s.expect('in') 
-        return {} 
- 
-def p_for_from_relation(s): 
-    if s.sy in inequality_relations: 
-        op = s.sy 
-        s.next() 
-        return op 
-    else: 
-        s.error("Expected one of '<', '<=', '>' '>='") 
- 
-def p_for_from_step(s): 
-    if s.sy == 'IDENT' and s.systring == 'by': 
-        s.next() 
-        step = p_bit_expr(s) 
-        return step 
-    else: 
-        return None 
- 
-inequality_relations = cython.declare(set, set(['<', '<=', '>', '>='])) 
- 
-def p_target(s, terminator): 
-    pos = s.position() 
-    expr = p_starred_expr(s) 
-    if s.sy == ',': 
-        s.next() 
-        exprs = [expr] 
-        while s.sy != terminator: 
-            exprs.append(p_starred_expr(s)) 
-            if s.sy != ',': 
-                break 
-            s.next() 
-        return ExprNodes.TupleNode(pos, args = exprs) 
-    else: 
-        return expr 
- 
-
-def p_for_target(s): 
-    return p_target(s, 'in') 
- 
+        if s.sy == 'from':
+            s.next()
+            bound1 = p_bit_expr(s)
+        else:
+            # Support shorter "for a <= x < b" syntax
+            bound1, target = target, None
+        rel1 = p_for_from_relation(s)
+        name2_pos = s.position()
+        name2 = p_ident(s)
+        rel2_pos = s.position()
+        rel2 = p_for_from_relation(s)
+        bound2 = p_bit_expr(s)
+        step = p_for_from_step(s)
+        if target is None:
+            target = ExprNodes.NameNode(name2_pos, name = name2)
+        else:
+            if not target.is_name:
+                error(target.pos,
+                    "Target of for-from statement must be a variable name")
+            elif name2 != target.name:
+                error(name2_pos,
+                    "Variable name in for-from range does not match target")
+        if rel1[0] != rel2[0]:
+            error(rel2_pos,
+                "Relation directions in for-from do not match")
+        return dict(target = target,
+                    bound1 = bound1,
+                    relation1 = rel1,
+                    relation2 = rel2,
+                    bound2 = bound2,
+                    step = step,
+                    )
+    else:
+        s.expect('in')
+        return {}
+
+def p_for_from_relation(s):
+    if s.sy in inequality_relations:
+        op = s.sy
+        s.next()
+        return op
+    else:
+        s.error("Expected one of '<', '<=', '>' '>='")
+
+def p_for_from_step(s):
+    if s.sy == 'IDENT' and s.systring == 'by':
+        s.next()
+        step = p_bit_expr(s)
+        return step
+    else:
+        return None
+
+inequality_relations = cython.declare(set, set(['<', '<=', '>', '>=']))
+
+def p_target(s, terminator):
+    pos = s.position()
+    expr = p_starred_expr(s)
+    if s.sy == ',':
+        s.next()
+        exprs = [expr]
+        while s.sy != terminator:
+            exprs.append(p_starred_expr(s))
+            if s.sy != ',':
+                break
+            s.next()
+        return ExprNodes.TupleNode(pos, args = exprs)
+    else:
+        return expr
+
+
+def p_for_target(s):
+    return p_target(s, 'in')
+
 
 def p_for_iterator(s, allow_testlist=True, is_async=False):
-    pos = s.position() 
-    if allow_testlist: 
-        expr = p_testlist(s) 
-    else: 
-        expr = p_or_test(s) 
+    pos = s.position()
+    if allow_testlist:
+        expr = p_testlist(s)
+    else:
+        expr = p_or_test(s)
     return (ExprNodes.AsyncIteratorNode if is_async else ExprNodes.IteratorNode)(pos, sequence=expr)
- 
-
-def p_try_statement(s): 
-    # s.sy == 'try' 
-    pos = s.position() 
-    s.next() 
-    body = p_suite(s) 
-    except_clauses = [] 
-    else_clause = None 
-    if s.sy in ('except', 'else'): 
-        while s.sy == 'except': 
-            except_clauses.append(p_except_clause(s)) 
-        if s.sy == 'else': 
-            s.next() 
-            else_clause = p_suite(s) 
-        body = Nodes.TryExceptStatNode(pos, 
-            body = body, except_clauses = except_clauses, 
-            else_clause = else_clause) 
-        if s.sy != 'finally': 
-            return body 
-        # try-except-finally is equivalent to nested try-except/try-finally 
-    if s.sy == 'finally': 
-        s.next() 
-        finally_clause = p_suite(s) 
-        return Nodes.TryFinallyStatNode(pos, 
-            body = body, finally_clause = finally_clause) 
-    else: 
-        s.error("Expected 'except' or 'finally'") 
- 
-def p_except_clause(s): 
-    # s.sy == 'except' 
-    pos = s.position() 
-    s.next() 
-    exc_type = None 
-    exc_value = None 
-    is_except_as = False 
-    if s.sy != ':': 
-        exc_type = p_test(s) 
-        # normalise into list of single exception tests 
-        if isinstance(exc_type, ExprNodes.TupleNode): 
-            exc_type = exc_type.args 
-        else: 
-            exc_type = [exc_type] 
-        if s.sy == ',' or (s.sy == 'IDENT' and s.systring == 'as' 
-                           and s.context.language_level == 2): 
-            s.next() 
-            exc_value = p_test(s) 
-        elif s.sy == 'IDENT' and s.systring == 'as': 
-            # Py3 syntax requires a name here 
-            s.next() 
-            pos2 = s.position() 
-            name = p_ident(s) 
-            exc_value = ExprNodes.NameNode(pos2, name = name) 
-            is_except_as = True 
-    body = p_suite(s) 
-    return Nodes.ExceptClauseNode(pos, 
-        pattern = exc_type, target = exc_value, 
-        body = body, is_except_as=is_except_as) 
- 
-def p_include_statement(s, ctx): 
-    pos = s.position() 
-    s.next() # 'include' 
-    unicode_include_file_name = p_string_literal(s, 'u')[2] 
-    s.expect_newline("Syntax error in include statement") 
-    if s.compile_time_eval: 
-        include_file_name = unicode_include_file_name 
-        include_file_path = s.context.find_include_file(include_file_name, pos) 
-        if include_file_path: 
-            s.included_files.append(include_file_name) 
+
+
+def p_try_statement(s):
+    # s.sy == 'try'
+    pos = s.position()
+    s.next()
+    body = p_suite(s)
+    except_clauses = []
+    else_clause = None
+    if s.sy in ('except', 'else'):
+        while s.sy == 'except':
+            except_clauses.append(p_except_clause(s))
+        if s.sy == 'else':
+            s.next()
+            else_clause = p_suite(s)
+        body = Nodes.TryExceptStatNode(pos,
+            body = body, except_clauses = except_clauses,
+            else_clause = else_clause)
+        if s.sy != 'finally':
+            return body
+        # try-except-finally is equivalent to nested try-except/try-finally
+    if s.sy == 'finally':
+        s.next()
+        finally_clause = p_suite(s)
+        return Nodes.TryFinallyStatNode(pos,
+            body = body, finally_clause = finally_clause)
+    else:
+        s.error("Expected 'except' or 'finally'")
+
+def p_except_clause(s):
+    # s.sy == 'except'
+    pos = s.position()
+    s.next()
+    exc_type = None
+    exc_value = None
+    is_except_as = False
+    if s.sy != ':':
+        exc_type = p_test(s)
+        # normalise into list of single exception tests
+        if isinstance(exc_type, ExprNodes.TupleNode):
+            exc_type = exc_type.args
+        else:
+            exc_type = [exc_type]
+        if s.sy == ',' or (s.sy == 'IDENT' and s.systring == 'as'
+                           and s.context.language_level == 2):
+            s.next()
+            exc_value = p_test(s)
+        elif s.sy == 'IDENT' and s.systring == 'as':
+            # Py3 syntax requires a name here
+            s.next()
+            pos2 = s.position()
+            name = p_ident(s)
+            exc_value = ExprNodes.NameNode(pos2, name = name)
+            is_except_as = True
+    body = p_suite(s)
+    return Nodes.ExceptClauseNode(pos,
+        pattern = exc_type, target = exc_value,
+        body = body, is_except_as=is_except_as)
+
+def p_include_statement(s, ctx):
+    pos = s.position()
+    s.next() # 'include'
+    unicode_include_file_name = p_string_literal(s, 'u')[2]
+    s.expect_newline("Syntax error in include statement")
+    if s.compile_time_eval:
+        include_file_name = unicode_include_file_name
+        include_file_path = s.context.find_include_file(include_file_name, pos)
+        if include_file_path:
+            s.included_files.append(include_file_name)
             with Utils.open_source_file(include_file_path) as f:
                 if Options.source_root:
                     import os
@@ -2051,281 +2051,281 @@ def p_include_statement(s, ctx):
                     rel_path = None
                 source_desc = FileSourceDescriptor(include_file_path, rel_path)
                 s2 = PyrexScanner(f, source_desc, s, source_encoding=f.encoding, parse_comments=s.parse_comments)
-                tree = p_statement_list(s2, ctx) 
-            return tree 
-        else: 
-            return None 
-    else: 
-        return Nodes.PassStatNode(pos) 
- 
-
-def p_with_statement(s): 
+                tree = p_statement_list(s2, ctx)
+            return tree
+        else:
+            return None
+    else:
+        return Nodes.PassStatNode(pos)
+
+
+def p_with_statement(s):
     s.next()  # 'with'
-    if s.systring == 'template' and not s.in_python_file: 
-        node = p_with_template(s) 
-    else: 
-        node = p_with_items(s) 
-    return node 
- 
+    if s.systring == 'template' and not s.in_python_file:
+        node = p_with_template(s)
+    else:
+        node = p_with_items(s)
+    return node
+
 
 def p_with_items(s, is_async=False):
-    pos = s.position() 
-    if not s.in_python_file and s.sy == 'IDENT' and s.systring in ('nogil', 'gil'): 
+    pos = s.position()
+    if not s.in_python_file and s.sy == 'IDENT' and s.systring in ('nogil', 'gil'):
         if is_async:
             s.error("with gil/nogil cannot be async")
-        state = s.systring 
-        s.next() 
-        if s.sy == ',': 
-            s.next() 
-            body = p_with_items(s) 
-        else: 
-            body = p_suite(s) 
+        state = s.systring
+        s.next()
+        if s.sy == ',':
+            s.next()
+            body = p_with_items(s)
+        else:
+            body = p_suite(s)
         return Nodes.GILStatNode(pos, state=state, body=body)
-    else: 
-        manager = p_test(s) 
-        target = None 
-        if s.sy == 'IDENT' and s.systring == 'as': 
-            s.next() 
-            target = p_starred_expr(s) 
-        if s.sy == ',': 
-            s.next() 
+    else:
+        manager = p_test(s)
+        target = None
+        if s.sy == 'IDENT' and s.systring == 'as':
+            s.next()
+            target = p_starred_expr(s)
+        if s.sy == ',':
+            s.next()
             body = p_with_items(s, is_async=is_async)
-        else: 
-            body = p_suite(s) 
+        else:
+            body = p_suite(s)
     return Nodes.WithStatNode(pos, manager=manager, target=target, body=body, is_async=is_async)
- 
-
-def p_with_template(s): 
-    pos = s.position() 
-    templates = [] 
-    s.next() 
-    s.expect('[') 
-    templates.append(s.systring) 
-    s.next() 
-    while s.systring == ',': 
-        s.next() 
-        templates.append(s.systring) 
-        s.next() 
-    s.expect(']') 
-    if s.sy == ':': 
-        s.next() 
-        s.expect_newline("Syntax error in template function declaration") 
-        s.expect_indent() 
-        body_ctx = Ctx() 
-        body_ctx.templates = templates 
-        func_or_var = p_c_func_or_var_declaration(s, pos, body_ctx) 
-        s.expect_dedent() 
-        return func_or_var 
-    else: 
-        error(pos, "Syntax error in template function declaration") 
- 
-def p_simple_statement(s, first_statement = 0): 
-    #print "p_simple_statement:", s.sy, s.systring ### 
-    if s.sy == 'global': 
-        node = p_global_statement(s) 
-    elif s.sy == 'nonlocal': 
-        node = p_nonlocal_statement(s) 
-    elif s.sy == 'print': 
-        node = p_print_statement(s) 
-    elif s.sy == 'exec': 
-        node = p_exec_statement(s) 
-    elif s.sy == 'del': 
-        node = p_del_statement(s) 
-    elif s.sy == 'break': 
-        node = p_break_statement(s) 
-    elif s.sy == 'continue': 
-        node = p_continue_statement(s) 
-    elif s.sy == 'return': 
-        node = p_return_statement(s) 
-    elif s.sy == 'raise': 
-        node = p_raise_statement(s) 
-    elif s.sy in ('import', 'cimport'): 
-        node = p_import_statement(s) 
-    elif s.sy == 'from': 
-        node = p_from_import_statement(s, first_statement = first_statement) 
-    elif s.sy == 'yield': 
-        node = p_yield_statement(s) 
-    elif s.sy == 'assert': 
-        node = p_assert_statement(s) 
-    elif s.sy == 'pass': 
-        node = p_pass_statement(s) 
-    else: 
-        node = p_expression_or_assignment(s) 
-    return node 
- 
-def p_simple_statement_list(s, ctx, first_statement = 0): 
-    # Parse a series of simple statements on one line 
-    # separated by semicolons. 
-    stat = p_simple_statement(s, first_statement = first_statement) 
-    pos = stat.pos 
-    stats = [] 
-    if not isinstance(stat, Nodes.PassStatNode): 
-        stats.append(stat) 
-    while s.sy == ';': 
-        #print "p_simple_statement_list: maybe more to follow" ### 
-        s.next() 
-        if s.sy in ('NEWLINE', 'EOF'): 
-            break 
-        stat = p_simple_statement(s, first_statement = first_statement) 
-        if isinstance(stat, Nodes.PassStatNode): 
-            continue 
-        stats.append(stat) 
-        first_statement = False 
- 
-    if not stats: 
-        stat = Nodes.PassStatNode(pos) 
-    elif len(stats) == 1: 
-        stat = stats[0] 
-    else: 
-        stat = Nodes.StatListNode(pos, stats = stats) 
+
+
+def p_with_template(s):
+    pos = s.position()
+    templates = []
+    s.next()
+    s.expect('[')
+    templates.append(s.systring)
+    s.next()
+    while s.systring == ',':
+        s.next()
+        templates.append(s.systring)
+        s.next()
+    s.expect(']')
+    if s.sy == ':':
+        s.next()
+        s.expect_newline("Syntax error in template function declaration")
+        s.expect_indent()
+        body_ctx = Ctx()
+        body_ctx.templates = templates
+        func_or_var = p_c_func_or_var_declaration(s, pos, body_ctx)
+        s.expect_dedent()
+        return func_or_var
+    else:
+        error(pos, "Syntax error in template function declaration")
+
+def p_simple_statement(s, first_statement = 0):
+    #print "p_simple_statement:", s.sy, s.systring ###
+    if s.sy == 'global':
+        node = p_global_statement(s)
+    elif s.sy == 'nonlocal':
+        node = p_nonlocal_statement(s)
+    elif s.sy == 'print':
+        node = p_print_statement(s)
+    elif s.sy == 'exec':
+        node = p_exec_statement(s)
+    elif s.sy == 'del':
+        node = p_del_statement(s)
+    elif s.sy == 'break':
+        node = p_break_statement(s)
+    elif s.sy == 'continue':
+        node = p_continue_statement(s)
+    elif s.sy == 'return':
+        node = p_return_statement(s)
+    elif s.sy == 'raise':
+        node = p_raise_statement(s)
+    elif s.sy in ('import', 'cimport'):
+        node = p_import_statement(s)
+    elif s.sy == 'from':
+        node = p_from_import_statement(s, first_statement = first_statement)
+    elif s.sy == 'yield':
+        node = p_yield_statement(s)
+    elif s.sy == 'assert':
+        node = p_assert_statement(s)
+    elif s.sy == 'pass':
+        node = p_pass_statement(s)
+    else:
+        node = p_expression_or_assignment(s)
+    return node
+
+def p_simple_statement_list(s, ctx, first_statement = 0):
+    # Parse a series of simple statements on one line
+    # separated by semicolons.
+    stat = p_simple_statement(s, first_statement = first_statement)
+    pos = stat.pos
+    stats = []
+    if not isinstance(stat, Nodes.PassStatNode):
+        stats.append(stat)
+    while s.sy == ';':
+        #print "p_simple_statement_list: maybe more to follow" ###
+        s.next()
+        if s.sy in ('NEWLINE', 'EOF'):
+            break
+        stat = p_simple_statement(s, first_statement = first_statement)
+        if isinstance(stat, Nodes.PassStatNode):
+            continue
+        stats.append(stat)
+        first_statement = False
+
+    if not stats:
+        stat = Nodes.PassStatNode(pos)
+    elif len(stats) == 1:
+        stat = stats[0]
+    else:
+        stat = Nodes.StatListNode(pos, stats = stats)
 
     if s.sy not in ('NEWLINE', 'EOF'):
         # provide a better error message for users who accidentally write Cython code in .py files
         if isinstance(stat, Nodes.ExprStatNode):
             if stat.expr.is_name and stat.expr.name == 'cdef':
                 s.error("The 'cdef' keyword is only allowed in Cython files (pyx/pxi/pxd)", pos)
-    s.expect_newline("Syntax error in simple statement list") 
-
-    return stat 
- 
-def p_compile_time_expr(s): 
-    old = s.compile_time_expr 
-    s.compile_time_expr = 1 
-    expr = p_testlist(s) 
-    s.compile_time_expr = old 
-    return expr 
- 
-def p_DEF_statement(s): 
-    pos = s.position() 
-    denv = s.compile_time_env 
-    s.next() # 'DEF' 
-    name = p_ident(s) 
-    s.expect('=') 
-    expr = p_compile_time_expr(s) 
+    s.expect_newline("Syntax error in simple statement list")
+
+    return stat
+
+def p_compile_time_expr(s):
+    old = s.compile_time_expr
+    s.compile_time_expr = 1
+    expr = p_testlist(s)
+    s.compile_time_expr = old
+    return expr
+
+def p_DEF_statement(s):
+    pos = s.position()
+    denv = s.compile_time_env
+    s.next() # 'DEF'
+    name = p_ident(s)
+    s.expect('=')
+    expr = p_compile_time_expr(s)
     if s.compile_time_eval:
         value = expr.compile_time_value(denv)
         #print "p_DEF_statement: %s = %r" % (name, value) ###
         denv.declare(name, value)
-    s.expect_newline("Expected a newline", ignore_semicolon=True) 
-    return Nodes.PassStatNode(pos) 
- 
-def p_IF_statement(s, ctx): 
-    pos = s.position() 
-    saved_eval = s.compile_time_eval 
-    current_eval = saved_eval 
-    denv = s.compile_time_env 
-    result = None 
-    while 1: 
-        s.next() # 'IF' or 'ELIF' 
-        expr = p_compile_time_expr(s) 
-        s.compile_time_eval = current_eval and bool(expr.compile_time_value(denv)) 
-        body = p_suite(s, ctx) 
-        if s.compile_time_eval: 
-            result = body 
-            current_eval = 0 
-        if s.sy != 'ELIF': 
-            break 
-    if s.sy == 'ELSE': 
-        s.next() 
-        s.compile_time_eval = current_eval 
-        body = p_suite(s, ctx) 
-        if current_eval: 
-            result = body 
-    if not result: 
-        result = Nodes.PassStatNode(pos) 
-    s.compile_time_eval = saved_eval 
-    return result 
- 
-def p_statement(s, ctx, first_statement = 0): 
-    cdef_flag = ctx.cdef_flag 
-    decorators = None 
-    if s.sy == 'ctypedef': 
-        if ctx.level not in ('module', 'module_pxd'): 
-            s.error("ctypedef statement not allowed here") 
-        #if ctx.api: 
-        #    error(s.position(), "'api' not allowed with 'ctypedef'") 
-        return p_ctypedef_statement(s, ctx) 
-    elif s.sy == 'DEF': 
-        return p_DEF_statement(s) 
-    elif s.sy == 'IF': 
-        return p_IF_statement(s, ctx) 
-    elif s.sy == '@': 
-        if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd', 'other'): 
-            s.error('decorator not allowed here') 
-        s.level = ctx.level 
-        decorators = p_decorators(s) 
+    s.expect_newline("Expected a newline", ignore_semicolon=True)
+    return Nodes.PassStatNode(pos)
+
+def p_IF_statement(s, ctx):
+    pos = s.position()
+    saved_eval = s.compile_time_eval
+    current_eval = saved_eval
+    denv = s.compile_time_env
+    result = None
+    while 1:
+        s.next() # 'IF' or 'ELIF'
+        expr = p_compile_time_expr(s)
+        s.compile_time_eval = current_eval and bool(expr.compile_time_value(denv))
+        body = p_suite(s, ctx)
+        if s.compile_time_eval:
+            result = body
+            current_eval = 0
+        if s.sy != 'ELIF':
+            break
+    if s.sy == 'ELSE':
+        s.next()
+        s.compile_time_eval = current_eval
+        body = p_suite(s, ctx)
+        if current_eval:
+            result = body
+    if not result:
+        result = Nodes.PassStatNode(pos)
+    s.compile_time_eval = saved_eval
+    return result
+
+def p_statement(s, ctx, first_statement = 0):
+    cdef_flag = ctx.cdef_flag
+    decorators = None
+    if s.sy == 'ctypedef':
+        if ctx.level not in ('module', 'module_pxd'):
+            s.error("ctypedef statement not allowed here")
+        #if ctx.api:
+        #    error(s.position(), "'api' not allowed with 'ctypedef'")
+        return p_ctypedef_statement(s, ctx)
+    elif s.sy == 'DEF':
+        return p_DEF_statement(s)
+    elif s.sy == 'IF':
+        return p_IF_statement(s, ctx)
+    elif s.sy == '@':
+        if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd', 'other'):
+            s.error('decorator not allowed here')
+        s.level = ctx.level
+        decorators = p_decorators(s)
         if not ctx.allow_struct_enum_decorator and s.sy not in ('def', 'cdef', 'cpdef', 'class', 'async'):
             if s.sy == 'IDENT' and s.systring == 'async':
                 pass  # handled below
             else:
                 s.error("Decorators can only be followed by functions or classes")
-    elif s.sy == 'pass' and cdef_flag: 
-        # empty cdef block 
+    elif s.sy == 'pass' and cdef_flag:
+        # empty cdef block
         return p_pass_statement(s, with_newline=1)
- 
-    overridable = 0 
-    if s.sy == 'cdef': 
-        cdef_flag = 1 
-        s.next() 
-    elif s.sy == 'cpdef': 
-        cdef_flag = 1 
-        overridable = 1 
-        s.next() 
-    if cdef_flag: 
-        if ctx.level not in ('module', 'module_pxd', 'function', 'c_class', 'c_class_pxd'): 
-            s.error('cdef statement not allowed here') 
-        s.level = ctx.level 
+
+    overridable = 0
+    if s.sy == 'cdef':
+        cdef_flag = 1
+        s.next()
+    elif s.sy == 'cpdef':
+        cdef_flag = 1
+        overridable = 1
+        s.next()
+    if cdef_flag:
+        if ctx.level not in ('module', 'module_pxd', 'function', 'c_class', 'c_class_pxd'):
+            s.error('cdef statement not allowed here')
+        s.level = ctx.level
         node = p_cdef_statement(s, ctx(overridable=overridable))
-        if decorators is not None: 
+        if decorators is not None:
             tup = (Nodes.CFuncDefNode, Nodes.CVarDefNode, Nodes.CClassDefNode)
-            if ctx.allow_struct_enum_decorator: 
+            if ctx.allow_struct_enum_decorator:
                 tup += (Nodes.CStructOrUnionDefNode, Nodes.CEnumDefNode)
-            if not isinstance(node, tup): 
-                s.error("Decorators can only be followed by functions or classes") 
-            node.decorators = decorators 
-        return node 
-    else: 
-        if ctx.api: 
-            s.error("'api' not allowed with this statement", fatal=False) 
-        elif s.sy == 'def': 
-            # def statements aren't allowed in pxd files, except 
-            # as part of a cdef class 
-            if ('pxd' in ctx.level) and (ctx.level != 'c_class_pxd'): 
-                s.error('def statement not allowed here') 
-            s.level = ctx.level 
-            return p_def_statement(s, decorators) 
-        elif s.sy == 'class': 
-            if ctx.level not in ('module', 'function', 'class', 'other'): 
-                s.error("class definition not allowed here") 
-            return p_class_statement(s, decorators) 
-        elif s.sy == 'include': 
-            if ctx.level not in ('module', 'module_pxd'): 
-                s.error("include statement not allowed here") 
-            return p_include_statement(s, ctx) 
-        elif ctx.level == 'c_class' and s.sy == 'IDENT' and s.systring == 'property': 
-            return p_property_decl(s) 
-        elif s.sy == 'pass' and ctx.level != 'property': 
-            return p_pass_statement(s, with_newline=True) 
-        else: 
-            if ctx.level in ('c_class_pxd', 'property'): 
-                node = p_ignorable_statement(s) 
-                if node is not None: 
-                    return node 
-                s.error("Executable statement not allowed here") 
-            if s.sy == 'if': 
-                return p_if_statement(s) 
-            elif s.sy == 'while': 
-                return p_while_statement(s) 
-            elif s.sy == 'for': 
-                return p_for_statement(s) 
-            elif s.sy == 'try': 
-                return p_try_statement(s) 
-            elif s.sy == 'with': 
-                return p_with_statement(s) 
+            if not isinstance(node, tup):
+                s.error("Decorators can only be followed by functions or classes")
+            node.decorators = decorators
+        return node
+    else:
+        if ctx.api:
+            s.error("'api' not allowed with this statement", fatal=False)
+        elif s.sy == 'def':
+            # def statements aren't allowed in pxd files, except
+            # as part of a cdef class
+            if ('pxd' in ctx.level) and (ctx.level != 'c_class_pxd'):
+                s.error('def statement not allowed here')
+            s.level = ctx.level
+            return p_def_statement(s, decorators)
+        elif s.sy == 'class':
+            if ctx.level not in ('module', 'function', 'class', 'other'):
+                s.error("class definition not allowed here")
+            return p_class_statement(s, decorators)
+        elif s.sy == 'include':
+            if ctx.level not in ('module', 'module_pxd'):
+                s.error("include statement not allowed here")
+            return p_include_statement(s, ctx)
+        elif ctx.level == 'c_class' and s.sy == 'IDENT' and s.systring == 'property':
+            return p_property_decl(s)
+        elif s.sy == 'pass' and ctx.level != 'property':
+            return p_pass_statement(s, with_newline=True)
+        else:
+            if ctx.level in ('c_class_pxd', 'property'):
+                node = p_ignorable_statement(s)
+                if node is not None:
+                    return node
+                s.error("Executable statement not allowed here")
+            if s.sy == 'if':
+                return p_if_statement(s)
+            elif s.sy == 'while':
+                return p_while_statement(s)
+            elif s.sy == 'for':
+                return p_for_statement(s)
+            elif s.sy == 'try':
+                return p_try_statement(s)
+            elif s.sy == 'with':
+                return p_with_statement(s)
             elif s.sy == 'async':
                 s.next()
                 return p_async_statement(s, ctx, decorators)
-            else: 
+            else:
                 if s.sy == 'IDENT' and s.systring == 'async':
                     ident_name = s.systring
                     # PEP 492 enables the async/await keywords when it spots "async def ..."
@@ -2336,134 +2336,134 @@ def p_statement(s, ctx, first_statement = 0):
                         s.error("Decorators can only be followed by functions or classes")
                     s.put_back('IDENT', ident_name)  # re-insert original token
                 return p_simple_statement_list(s, ctx, first_statement=first_statement)
- 
-
-def p_statement_list(s, ctx, first_statement = 0): 
-    # Parse a series of statements separated by newlines. 
-    pos = s.position() 
-    stats = [] 
-    while s.sy not in ('DEDENT', 'EOF'): 
-        stat = p_statement(s, ctx, first_statement = first_statement) 
-        if isinstance(stat, Nodes.PassStatNode): 
-            continue 
-        stats.append(stat) 
-        first_statement = False 
-    if not stats: 
-        return Nodes.PassStatNode(pos) 
-    elif len(stats) == 1: 
-        return stats[0] 
-    else: 
-        return Nodes.StatListNode(pos, stats = stats) 
- 
- 
-def p_suite(s, ctx=Ctx()): 
-    return p_suite_with_docstring(s, ctx, with_doc_only=False)[1] 
- 
- 
-def p_suite_with_docstring(s, ctx, with_doc_only=False): 
-    s.expect(':') 
-    doc = None 
-    if s.sy == 'NEWLINE': 
-        s.next() 
-        s.expect_indent() 
-        if with_doc_only: 
-            doc = p_doc_string(s) 
-        body = p_statement_list(s, ctx) 
-        s.expect_dedent() 
-    else: 
-        if ctx.api: 
-            s.error("'api' not allowed with this statement", fatal=False) 
-        if ctx.level in ('module', 'class', 'function', 'other'): 
-            body = p_simple_statement_list(s, ctx) 
-        else: 
-            body = p_pass_statement(s) 
-            s.expect_newline("Syntax error in declarations", ignore_semicolon=True) 
-    if not with_doc_only: 
-        doc, body = _extract_docstring(body) 
-    return doc, body 
- 
- 
-def p_positional_and_keyword_args(s, end_sy_set, templates = None): 
-    """ 
-    Parses positional and keyword arguments. end_sy_set 
-    should contain any s.sy that terminate the argument list. 
-    Argument expansion (* and **) are not allowed. 
- 
-    Returns: (positional_args, keyword_args) 
-    """ 
-    positional_args = [] 
-    keyword_args = [] 
-    pos_idx = 0 
- 
-    while s.sy not in end_sy_set: 
-        if s.sy == '*' or s.sy == '**': 
-            s.error('Argument expansion not allowed here.', fatal=False) 
- 
-        parsed_type = False 
-        if s.sy == 'IDENT' and s.peek()[0] == '=': 
-            ident = s.systring 
-            s.next() # s.sy is '=' 
-            s.next() 
-            if looking_at_expr(s): 
-                arg = p_test(s) 
-            else: 
-                base_type = p_c_base_type(s, templates = templates) 
-                declarator = p_c_declarator(s, empty = 1) 
-                arg = Nodes.CComplexBaseTypeNode(base_type.pos, 
-                    base_type = base_type, declarator = declarator) 
-                parsed_type = True 
+
+
+def p_statement_list(s, ctx, first_statement = 0):
+    # Parse a series of statements separated by newlines.
+    pos = s.position()
+    stats = []
+    while s.sy not in ('DEDENT', 'EOF'):
+        stat = p_statement(s, ctx, first_statement = first_statement)
+        if isinstance(stat, Nodes.PassStatNode):
+            continue
+        stats.append(stat)
+        first_statement = False
+    if not stats:
+        return Nodes.PassStatNode(pos)
+    elif len(stats) == 1:
+        return stats[0]
+    else:
+        return Nodes.StatListNode(pos, stats = stats)
+
+
+def p_suite(s, ctx=Ctx()):
+    return p_suite_with_docstring(s, ctx, with_doc_only=False)[1]
+
+
+def p_suite_with_docstring(s, ctx, with_doc_only=False):
+    s.expect(':')
+    doc = None
+    if s.sy == 'NEWLINE':
+        s.next()
+        s.expect_indent()
+        if with_doc_only:
+            doc = p_doc_string(s)
+        body = p_statement_list(s, ctx)
+        s.expect_dedent()
+    else:
+        if ctx.api:
+            s.error("'api' not allowed with this statement", fatal=False)
+        if ctx.level in ('module', 'class', 'function', 'other'):
+            body = p_simple_statement_list(s, ctx)
+        else:
+            body = p_pass_statement(s)
+            s.expect_newline("Syntax error in declarations", ignore_semicolon=True)
+    if not with_doc_only:
+        doc, body = _extract_docstring(body)
+    return doc, body
+
+
+def p_positional_and_keyword_args(s, end_sy_set, templates = None):
+    """
+    Parses positional and keyword arguments. end_sy_set
+    should contain any s.sy that terminate the argument list.
+    Argument expansion (* and **) are not allowed.
+
+    Returns: (positional_args, keyword_args)
+    """
+    positional_args = []
+    keyword_args = []
+    pos_idx = 0
+
+    while s.sy not in end_sy_set:
+        if s.sy == '*' or s.sy == '**':
+            s.error('Argument expansion not allowed here.', fatal=False)
+
+        parsed_type = False
+        if s.sy == 'IDENT' and s.peek()[0] == '=':
+            ident = s.systring
+            s.next() # s.sy is '='
+            s.next()
+            if looking_at_expr(s):
+                arg = p_test(s)
+            else:
+                base_type = p_c_base_type(s, templates = templates)
+                declarator = p_c_declarator(s, empty = 1)
+                arg = Nodes.CComplexBaseTypeNode(base_type.pos,
+                    base_type = base_type, declarator = declarator)
+                parsed_type = True
             keyword_node = ExprNodes.IdentifierStringNode(arg.pos, value=ident)
-            keyword_args.append((keyword_node, arg)) 
-            was_keyword = True 
- 
-        else: 
-            if looking_at_expr(s): 
-                arg = p_test(s) 
-            else: 
-                base_type = p_c_base_type(s, templates = templates) 
-                declarator = p_c_declarator(s, empty = 1) 
-                arg = Nodes.CComplexBaseTypeNode(base_type.pos, 
-                    base_type = base_type, declarator = declarator) 
-                parsed_type = True 
-            positional_args.append(arg) 
-            pos_idx += 1 
-            if len(keyword_args) > 0: 
-                s.error("Non-keyword arg following keyword arg", 
-                        pos=arg.pos) 
- 
-        if s.sy != ',': 
-            if s.sy not in end_sy_set: 
-                if parsed_type: 
-                    s.error("Unmatched %s" % " or ".join(end_sy_set)) 
-            break 
-        s.next() 
-    return positional_args, keyword_args 
- 
-def p_c_base_type(s, self_flag = 0, nonempty = 0, templates = None): 
-    # If self_flag is true, this is the base type for the 
-    # self argument of a C method of an extension type. 
-    if s.sy == '(': 
-        return p_c_complex_base_type(s, templates = templates) 
-    else: 
-        return p_c_simple_base_type(s, self_flag, nonempty = nonempty, templates = templates) 
- 
-def p_calling_convention(s): 
-    if s.sy == 'IDENT' and s.systring in calling_convention_words: 
-        result = s.systring 
-        s.next() 
-        return result 
-    else: 
-        return "" 
- 
-
-calling_convention_words = cython.declare( 
-    set, set(["__stdcall", "__cdecl", "__fastcall"])) 
- 
-
-def p_c_complex_base_type(s, templates = None): 
-    # s.sy == '(' 
-    pos = s.position() 
-    s.next() 
+            keyword_args.append((keyword_node, arg))
+            was_keyword = True
+
+        else:
+            if looking_at_expr(s):
+                arg = p_test(s)
+            else:
+                base_type = p_c_base_type(s, templates = templates)
+                declarator = p_c_declarator(s, empty = 1)
+                arg = Nodes.CComplexBaseTypeNode(base_type.pos,
+                    base_type = base_type, declarator = declarator)
+                parsed_type = True
+            positional_args.append(arg)
+            pos_idx += 1
+            if len(keyword_args) > 0:
+                s.error("Non-keyword arg following keyword arg",
+                        pos=arg.pos)
+
+        if s.sy != ',':
+            if s.sy not in end_sy_set:
+                if parsed_type:
+                    s.error("Unmatched %s" % " or ".join(end_sy_set))
+            break
+        s.next()
+    return positional_args, keyword_args
+
+def p_c_base_type(s, self_flag = 0, nonempty = 0, templates = None):
+    # If self_flag is true, this is the base type for the
+    # self argument of a C method of an extension type.
+    if s.sy == '(':
+        return p_c_complex_base_type(s, templates = templates)
+    else:
+        return p_c_simple_base_type(s, self_flag, nonempty = nonempty, templates = templates)
+
+def p_calling_convention(s):
+    if s.sy == 'IDENT' and s.systring in calling_convention_words:
+        result = s.systring
+        s.next()
+        return result
+    else:
+        return ""
+
+
+calling_convention_words = cython.declare(
+    set, set(["__stdcall", "__cdecl", "__fastcall"]))
+
+
+def p_c_complex_base_type(s, templates = None):
+    # s.sy == '('
+    pos = s.position()
+    s.next()
     base_type = p_c_base_type(s, templates=templates)
     declarator = p_c_declarator(s, empty=True)
     type_node = Nodes.CComplexBaseTypeNode(
@@ -2480,439 +2480,439 @@ def p_c_complex_base_type(s, templates = None):
                 pos, base_type=base_type, declarator=declarator))
         type_node = Nodes.CTupleBaseTypeNode(pos, components = components)
 
-    s.expect(')') 
-    if s.sy == '[': 
-        if is_memoryviewslice_access(s): 
-            type_node = p_memoryviewslice_access(s, type_node) 
-        else: 
-            type_node = p_buffer_or_template(s, type_node, templates) 
-    return type_node 
- 
- 
-def p_c_simple_base_type(s, self_flag, nonempty, templates = None): 
-    #print "p_c_simple_base_type: self_flag =", self_flag, nonempty 
-    is_basic = 0 
-    signed = 1 
-    longness = 0 
-    complex = 0 
-    module_path = [] 
-    pos = s.position() 
-    if not s.sy == 'IDENT': 
-        error(pos, "Expected an identifier, found '%s'" % s.sy) 
-    if s.systring == 'const': 
-        s.next() 
+    s.expect(')')
+    if s.sy == '[':
+        if is_memoryviewslice_access(s):
+            type_node = p_memoryviewslice_access(s, type_node)
+        else:
+            type_node = p_buffer_or_template(s, type_node, templates)
+    return type_node
+
+
+def p_c_simple_base_type(s, self_flag, nonempty, templates = None):
+    #print "p_c_simple_base_type: self_flag =", self_flag, nonempty
+    is_basic = 0
+    signed = 1
+    longness = 0
+    complex = 0
+    module_path = []
+    pos = s.position()
+    if not s.sy == 'IDENT':
+        error(pos, "Expected an identifier, found '%s'" % s.sy)
+    if s.systring == 'const':
+        s.next()
         base_type = p_c_base_type(s, self_flag=self_flag, nonempty=nonempty, templates=templates)
         if isinstance(base_type, Nodes.MemoryViewSliceTypeNode):
             # reverse order to avoid having to write "(const int)[:]"
             base_type.base_type_node = Nodes.CConstTypeNode(pos, base_type=base_type.base_type_node)
             return base_type
         return Nodes.CConstTypeNode(pos, base_type=base_type)
-    if looking_at_base_type(s): 
-        #print "p_c_simple_base_type: looking_at_base_type at", s.position() 
-        is_basic = 1 
-        if s.sy == 'IDENT' and s.systring in special_basic_c_types: 
-            signed, longness = special_basic_c_types[s.systring] 
-            name = s.systring 
-            s.next() 
-        else: 
-            signed, longness = p_sign_and_longness(s) 
-            if s.sy == 'IDENT' and s.systring in basic_c_type_names: 
-                name = s.systring 
-                s.next() 
-            else: 
-                name = 'int'  # long [int], short [int], long [int] complex, etc. 
-        if s.sy == 'IDENT' and s.systring == 'complex': 
-            complex = 1 
-            s.next() 
-    elif looking_at_dotted_name(s): 
-        #print "p_c_simple_base_type: looking_at_type_name at", s.position() 
-        name = s.systring 
-        s.next() 
-        while s.sy == '.': 
-            module_path.append(name) 
-            s.next() 
-            name = p_ident(s) 
-    else: 
-        name = s.systring 
-        s.next() 
-        if nonempty and s.sy != 'IDENT': 
-            # Make sure this is not a declaration of a variable or function. 
-            if s.sy == '(': 
-                s.next() 
-                if (s.sy == '*' or s.sy == '**' or s.sy == '&' 
-                        or (s.sy == 'IDENT' and s.systring in calling_convention_words)): 
-                    s.put_back('(', '(') 
-                else: 
-                    s.put_back('(', '(') 
-                    s.put_back('IDENT', name) 
-                    name = None 
-            elif s.sy not in ('*', '**', '[', '&'): 
-                s.put_back('IDENT', name) 
-                name = None 
- 
-    type_node = Nodes.CSimpleBaseTypeNode(pos, 
-        name = name, module_path = module_path, 
-        is_basic_c_type = is_basic, signed = signed, 
-        complex = complex, longness = longness, 
-        is_self_arg = self_flag, templates = templates) 
- 
-    #    declarations here. 
-    if s.sy == '[': 
-        if is_memoryviewslice_access(s): 
-            type_node = p_memoryviewslice_access(s, type_node) 
-        else: 
-            type_node = p_buffer_or_template(s, type_node, templates) 
- 
-    if s.sy == '.': 
-        s.next() 
-        name = p_ident(s) 
-        type_node = Nodes.CNestedBaseTypeNode(pos, base_type = type_node, name = name) 
- 
-    return type_node 
- 
-def p_buffer_or_template(s, base_type_node, templates): 
-    # s.sy == '[' 
-    pos = s.position() 
-    s.next() 
-    # Note that buffer_positional_options_count=1, so the only positional argument is dtype. 
-    # For templated types, all parameters are types. 
-    positional_args, keyword_args = ( 
-        p_positional_and_keyword_args(s, (']',), templates) 
-    ) 
-    s.expect(']') 
- 
-    if s.sy == '[': 
-        base_type_node = p_buffer_or_template(s, base_type_node, templates) 
- 
-    keyword_dict = ExprNodes.DictNode(pos, 
-        key_value_pairs = [ 
-            ExprNodes.DictItemNode(pos=key.pos, key=key, value=value) 
-            for key, value in keyword_args 
-        ]) 
-    result = Nodes.TemplatedTypeNode(pos, 
-        positional_args = positional_args, 
-        keyword_args = keyword_dict, 
-        base_type_node = base_type_node) 
-    return result 
- 
-def p_bracketed_base_type(s, base_type_node, nonempty, empty): 
-    # s.sy == '[' 
-    if empty and not nonempty: 
-        # sizeof-like thing.  Only anonymous C arrays allowed (int[SIZE]). 
-        return base_type_node 
-    elif not empty and nonempty: 
-        # declaration of either memoryview slice or buffer. 
-        if is_memoryviewslice_access(s): 
-            return p_memoryviewslice_access(s, base_type_node) 
-        else: 
-            return p_buffer_or_template(s, base_type_node, None) 
-            # return p_buffer_access(s, base_type_node) 
-    elif not empty and not nonempty: 
-        # only anonymous C arrays and memoryview slice arrays here.  We 
-        # disallow buffer declarations for now, due to ambiguity with anonymous 
-        # C arrays. 
-        if is_memoryviewslice_access(s): 
-            return p_memoryviewslice_access(s, base_type_node) 
-        else: 
-            return base_type_node 
- 
-def is_memoryviewslice_access(s): 
-    # s.sy == '[' 
-    # a memoryview slice declaration is distinguishable from a buffer access 
-    # declaration by the first entry in the bracketed list.  The buffer will 
-    # not have an unnested colon in the first entry; the memoryview slice will. 
-    saved = [(s.sy, s.systring)] 
-    s.next() 
-    retval = False 
-    if s.systring == ':': 
-        retval = True 
-    elif s.sy == 'INT': 
-        saved.append((s.sy, s.systring)) 
-        s.next() 
-        if s.sy == ':': 
-            retval = True 
- 
-    for sv in saved[::-1]: 
-        s.put_back(*sv) 
- 
-    return retval 
- 
-def p_memoryviewslice_access(s, base_type_node): 
-    # s.sy == '[' 
-    pos = s.position() 
-    s.next() 
-    subscripts, _ = p_subscript_list(s) 
-    # make sure each entry in subscripts is a slice 
-    for subscript in subscripts: 
-        if len(subscript) < 2: 
-            s.error("An axis specification in memoryview declaration does not have a ':'.") 
-    s.expect(']') 
-    indexes = make_slice_nodes(pos, subscripts) 
-    result = Nodes.MemoryViewSliceTypeNode(pos, 
-            base_type_node = base_type_node, 
-            axes = indexes) 
-    return result 
- 
-def looking_at_name(s): 
-    return s.sy == 'IDENT' and not s.systring in calling_convention_words 
- 
-def looking_at_expr(s): 
-    if s.systring in base_type_start_words: 
-        return False 
-    elif s.sy == 'IDENT': 
-        is_type = False 
-        name = s.systring 
-        dotted_path = [] 
-        s.next() 
- 
-        while s.sy == '.': 
-            s.next() 
-            dotted_path.append(s.systring) 
-            s.expect('IDENT') 
- 
-        saved = s.sy, s.systring 
-        if s.sy == 'IDENT': 
-            is_type = True 
-        elif s.sy == '*' or s.sy == '**': 
-            s.next() 
-            is_type = s.sy in (')', ']') 
-            s.put_back(*saved) 
-        elif s.sy == '(': 
-            s.next() 
-            is_type = s.sy == '*' 
-            s.put_back(*saved) 
-        elif s.sy == '[': 
-            s.next() 
+    if looking_at_base_type(s):
+        #print "p_c_simple_base_type: looking_at_base_type at", s.position()
+        is_basic = 1
+        if s.sy == 'IDENT' and s.systring in special_basic_c_types:
+            signed, longness = special_basic_c_types[s.systring]
+            name = s.systring
+            s.next()
+        else:
+            signed, longness = p_sign_and_longness(s)
+            if s.sy == 'IDENT' and s.systring in basic_c_type_names:
+                name = s.systring
+                s.next()
+            else:
+                name = 'int'  # long [int], short [int], long [int] complex, etc.
+        if s.sy == 'IDENT' and s.systring == 'complex':
+            complex = 1
+            s.next()
+    elif looking_at_dotted_name(s):
+        #print "p_c_simple_base_type: looking_at_type_name at", s.position()
+        name = s.systring
+        s.next()
+        while s.sy == '.':
+            module_path.append(name)
+            s.next()
+            name = p_ident(s)
+    else:
+        name = s.systring
+        s.next()
+        if nonempty and s.sy != 'IDENT':
+            # Make sure this is not a declaration of a variable or function.
+            if s.sy == '(':
+                s.next()
+                if (s.sy == '*' or s.sy == '**' or s.sy == '&'
+                        or (s.sy == 'IDENT' and s.systring in calling_convention_words)):
+                    s.put_back('(', '(')
+                else:
+                    s.put_back('(', '(')
+                    s.put_back('IDENT', name)
+                    name = None
+            elif s.sy not in ('*', '**', '[', '&'):
+                s.put_back('IDENT', name)
+                name = None
+
+    type_node = Nodes.CSimpleBaseTypeNode(pos,
+        name = name, module_path = module_path,
+        is_basic_c_type = is_basic, signed = signed,
+        complex = complex, longness = longness,
+        is_self_arg = self_flag, templates = templates)
+
+    #    declarations here.
+    if s.sy == '[':
+        if is_memoryviewslice_access(s):
+            type_node = p_memoryviewslice_access(s, type_node)
+        else:
+            type_node = p_buffer_or_template(s, type_node, templates)
+
+    if s.sy == '.':
+        s.next()
+        name = p_ident(s)
+        type_node = Nodes.CNestedBaseTypeNode(pos, base_type = type_node, name = name)
+
+    return type_node
+
+def p_buffer_or_template(s, base_type_node, templates):
+    # s.sy == '['
+    pos = s.position()
+    s.next()
+    # Note that buffer_positional_options_count=1, so the only positional argument is dtype.
+    # For templated types, all parameters are types.
+    positional_args, keyword_args = (
+        p_positional_and_keyword_args(s, (']',), templates)
+    )
+    s.expect(']')
+
+    if s.sy == '[':
+        base_type_node = p_buffer_or_template(s, base_type_node, templates)
+
+    keyword_dict = ExprNodes.DictNode(pos,
+        key_value_pairs = [
+            ExprNodes.DictItemNode(pos=key.pos, key=key, value=value)
+            for key, value in keyword_args
+        ])
+    result = Nodes.TemplatedTypeNode(pos,
+        positional_args = positional_args,
+        keyword_args = keyword_dict,
+        base_type_node = base_type_node)
+    return result
+
+def p_bracketed_base_type(s, base_type_node, nonempty, empty):
+    # s.sy == '['
+    if empty and not nonempty:
+        # sizeof-like thing.  Only anonymous C arrays allowed (int[SIZE]).
+        return base_type_node
+    elif not empty and nonempty:
+        # declaration of either memoryview slice or buffer.
+        if is_memoryviewslice_access(s):
+            return p_memoryviewslice_access(s, base_type_node)
+        else:
+            return p_buffer_or_template(s, base_type_node, None)
+            # return p_buffer_access(s, base_type_node)
+    elif not empty and not nonempty:
+        # only anonymous C arrays and memoryview slice arrays here.  We
+        # disallow buffer declarations for now, due to ambiguity with anonymous
+        # C arrays.
+        if is_memoryviewslice_access(s):
+            return p_memoryviewslice_access(s, base_type_node)
+        else:
+            return base_type_node
+
+def is_memoryviewslice_access(s):
+    # s.sy == '['
+    # a memoryview slice declaration is distinguishable from a buffer access
+    # declaration by the first entry in the bracketed list.  The buffer will
+    # not have an unnested colon in the first entry; the memoryview slice will.
+    saved = [(s.sy, s.systring)]
+    s.next()
+    retval = False
+    if s.systring == ':':
+        retval = True
+    elif s.sy == 'INT':
+        saved.append((s.sy, s.systring))
+        s.next()
+        if s.sy == ':':
+            retval = True
+
+    for sv in saved[::-1]:
+        s.put_back(*sv)
+
+    return retval
+
+def p_memoryviewslice_access(s, base_type_node):
+    # s.sy == '['
+    pos = s.position()
+    s.next()
+    subscripts, _ = p_subscript_list(s)
+    # make sure each entry in subscripts is a slice
+    for subscript in subscripts:
+        if len(subscript) < 2:
+            s.error("An axis specification in memoryview declaration does not have a ':'.")
+    s.expect(']')
+    indexes = make_slice_nodes(pos, subscripts)
+    result = Nodes.MemoryViewSliceTypeNode(pos,
+            base_type_node = base_type_node,
+            axes = indexes)
+    return result
+
+def looking_at_name(s):
+    return s.sy == 'IDENT' and not s.systring in calling_convention_words
+
+def looking_at_expr(s):
+    if s.systring in base_type_start_words:
+        return False
+    elif s.sy == 'IDENT':
+        is_type = False
+        name = s.systring
+        dotted_path = []
+        s.next()
+
+        while s.sy == '.':
+            s.next()
+            dotted_path.append(s.systring)
+            s.expect('IDENT')
+
+        saved = s.sy, s.systring
+        if s.sy == 'IDENT':
+            is_type = True
+        elif s.sy == '*' or s.sy == '**':
+            s.next()
+            is_type = s.sy in (')', ']')
+            s.put_back(*saved)
+        elif s.sy == '(':
+            s.next()
+            is_type = s.sy == '*'
+            s.put_back(*saved)
+        elif s.sy == '[':
+            s.next()
             is_type = s.sy == ']' or not looking_at_expr(s)  # could be a nested template type
-            s.put_back(*saved) 
- 
-        dotted_path.reverse() 
-        for p in dotted_path: 
-            s.put_back('IDENT', p) 
-            s.put_back('.', '.') 
- 
-        s.put_back('IDENT', name) 
-        return not is_type and saved[0] 
-    else: 
-        return True 
- 
-def looking_at_base_type(s): 
-    #print "looking_at_base_type?", s.sy, s.systring, s.position() 
-    return s.sy == 'IDENT' and s.systring in base_type_start_words 
- 
-def looking_at_dotted_name(s): 
-    if s.sy == 'IDENT': 
-        name = s.systring 
-        s.next() 
-        result = s.sy == '.' 
-        s.put_back('IDENT', name) 
-        return result 
-    else: 
-        return 0 
- 
-def looking_at_call(s): 
-    "See if we're looking at a.b.c(" 
-    # Don't mess up the original position, so save and restore it. 
-    # Unfortunately there's no good way to handle this, as a subsequent call 
-    # to next() will not advance the position until it reads a new token. 
-    position = s.start_line, s.start_col 
-    result = looking_at_expr(s) == u'(' 
-    if not result: 
-        s.start_line, s.start_col = position 
-    return result 
- 
-basic_c_type_names = cython.declare( 
-    set, set(["void", "char", "int", "float", "double", "bint"])) 
- 
-special_basic_c_types = cython.declare(dict, { 
-    # name : (signed, longness) 
-    "Py_UNICODE" : (0, 0), 
-    "Py_UCS4"    : (0, 0), 
+            s.put_back(*saved)
+
+        dotted_path.reverse()
+        for p in dotted_path:
+            s.put_back('IDENT', p)
+            s.put_back('.', '.')
+
+        s.put_back('IDENT', name)
+        return not is_type and saved[0]
+    else:
+        return True
+
+def looking_at_base_type(s):
+    #print "looking_at_base_type?", s.sy, s.systring, s.position()
+    return s.sy == 'IDENT' and s.systring in base_type_start_words
+
+def looking_at_dotted_name(s):
+    if s.sy == 'IDENT':
+        name = s.systring
+        s.next()
+        result = s.sy == '.'
+        s.put_back('IDENT', name)
+        return result
+    else:
+        return 0
+
+def looking_at_call(s):
+    "See if we're looking at a.b.c("
+    # Don't mess up the original position, so save and restore it.
+    # Unfortunately there's no good way to handle this, as a subsequent call
+    # to next() will not advance the position until it reads a new token.
+    position = s.start_line, s.start_col
+    result = looking_at_expr(s) == u'('
+    if not result:
+        s.start_line, s.start_col = position
+    return result
+
+basic_c_type_names = cython.declare(
+    set, set(["void", "char", "int", "float", "double", "bint"]))
+
+special_basic_c_types = cython.declare(dict, {
+    # name : (signed, longness)
+    "Py_UNICODE" : (0, 0),
+    "Py_UCS4"    : (0, 0),
     "Py_hash_t"  : (2, 0),
-    "Py_ssize_t" : (2, 0), 
-    "ssize_t"    : (2, 0), 
-    "size_t"     : (0, 0), 
-    "ptrdiff_t"  : (2, 0), 
+    "Py_ssize_t" : (2, 0),
+    "ssize_t"    : (2, 0),
+    "size_t"     : (0, 0),
+    "ptrdiff_t"  : (2, 0),
     "Py_tss_t"   : (1, 0),
-}) 
- 
-sign_and_longness_words = cython.declare( 
-    set, set(["short", "long", "signed", "unsigned"])) 
- 
-base_type_start_words = cython.declare( 
-    set, 
-    basic_c_type_names 
-    | sign_and_longness_words 
-    | set(special_basic_c_types)) 
- 
-struct_enum_union = cython.declare( 
-    set, set(["struct", "union", "enum", "packed"])) 
- 
-def p_sign_and_longness(s): 
-    signed = 1 
-    longness = 0 
-    while s.sy == 'IDENT' and s.systring in sign_and_longness_words: 
-        if s.systring == 'unsigned': 
-            signed = 0 
-        elif s.systring == 'signed': 
-            signed = 2 
-        elif s.systring == 'short': 
-            longness = -1 
-        elif s.systring == 'long': 
-            longness += 1 
-        s.next() 
-    return signed, longness 
- 
-def p_opt_cname(s): 
-    literal = p_opt_string_literal(s, 'u') 
-    if literal is not None: 
-        cname = EncodedString(literal) 
-        cname.encoding = s.source_encoding 
-    else: 
-        cname = None 
-    return cname 
- 
-def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0, 
-                   assignable = 0, nonempty = 0, 
-                   calling_convention_allowed = 0): 
-    # If empty is true, the declarator must be empty. If nonempty is true, 
-    # the declarator must be nonempty. Otherwise we don't care. 
-    # If cmethod_flag is true, then if this declarator declares 
-    # a function, it's a C method of an extension type. 
-    pos = s.position() 
-    if s.sy == '(': 
-        s.next() 
-        if s.sy == ')' or looking_at_name(s): 
+})
+
+sign_and_longness_words = cython.declare(
+    set, set(["short", "long", "signed", "unsigned"]))
+
+base_type_start_words = cython.declare(
+    set,
+    basic_c_type_names
+    | sign_and_longness_words
+    | set(special_basic_c_types))
+
+struct_enum_union = cython.declare(
+    set, set(["struct", "union", "enum", "packed"]))
+
+def p_sign_and_longness(s):
+    signed = 1
+    longness = 0
+    while s.sy == 'IDENT' and s.systring in sign_and_longness_words:
+        if s.systring == 'unsigned':
+            signed = 0
+        elif s.systring == 'signed':
+            signed = 2
+        elif s.systring == 'short':
+            longness = -1
+        elif s.systring == 'long':
+            longness += 1
+        s.next()
+    return signed, longness
+
+def p_opt_cname(s):
+    literal = p_opt_string_literal(s, 'u')
+    if literal is not None:
+        cname = EncodedString(literal)
+        cname.encoding = s.source_encoding
+    else:
+        cname = None
+    return cname
+
+def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0,
+                   assignable = 0, nonempty = 0,
+                   calling_convention_allowed = 0):
+    # If empty is true, the declarator must be empty. If nonempty is true,
+    # the declarator must be nonempty. Otherwise we don't care.
+    # If cmethod_flag is true, then if this declarator declares
+    # a function, it's a C method of an extension type.
+    pos = s.position()
+    if s.sy == '(':
+        s.next()
+        if s.sy == ')' or looking_at_name(s):
             base = Nodes.CNameDeclaratorNode(pos, name=s.context.intern_ustring(u""), cname=None)
-            result = p_c_func_declarator(s, pos, ctx, base, cmethod_flag) 
-        else: 
-            result = p_c_declarator(s, ctx, empty = empty, is_type = is_type, 
-                                    cmethod_flag = cmethod_flag, 
-                                    nonempty = nonempty, 
-                                    calling_convention_allowed = 1) 
-            s.expect(')') 
-    else: 
-        result = p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag, 
-                                       assignable, nonempty) 
-    if not calling_convention_allowed and result.calling_convention and s.sy != '(': 
-        error(s.position(), "%s on something that is not a function" 
-            % result.calling_convention) 
-    while s.sy in ('[', '('): 
-        pos = s.position() 
-        if s.sy == '[': 
-            result = p_c_array_declarator(s, result) 
-        else: # sy == '(' 
-            s.next() 
-            result = p_c_func_declarator(s, pos, ctx, result, cmethod_flag) 
-        cmethod_flag = 0 
-    return result 
- 
-def p_c_array_declarator(s, base): 
-    pos = s.position() 
-    s.next() # '[' 
-    if s.sy != ']': 
-        dim = p_testlist(s) 
-    else: 
-        dim = None 
-    s.expect(']') 
-    return Nodes.CArrayDeclaratorNode(pos, base = base, dimension = dim) 
- 
-def p_c_func_declarator(s, pos, ctx, base, cmethod_flag): 
-    #  Opening paren has already been skipped 
-    args = p_c_arg_list(s, ctx, cmethod_flag = cmethod_flag, 
-                        nonempty_declarators = 0) 
-    ellipsis = p_optional_ellipsis(s) 
-    s.expect(')') 
-    nogil = p_nogil(s) 
-    exc_val, exc_check = p_exception_value_clause(s) 
-    with_gil = p_with_gil(s) 
-    return Nodes.CFuncDeclaratorNode(pos, 
-        base = base, args = args, has_varargs = ellipsis, 
-        exception_value = exc_val, exception_check = exc_check, 
-        nogil = nogil or ctx.nogil or with_gil, with_gil = with_gil) 
- 
-supported_overloaded_operators = cython.declare(set, set([ 
-    '+', '-', '*', '/', '%', 
-    '++', '--', '~', '|', '&', '^', '<<', '>>', ',', 
-    '==', '!=', '>=', '>', '<=', '<', 
+            result = p_c_func_declarator(s, pos, ctx, base, cmethod_flag)
+        else:
+            result = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
+                                    cmethod_flag = cmethod_flag,
+                                    nonempty = nonempty,
+                                    calling_convention_allowed = 1)
+            s.expect(')')
+    else:
+        result = p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
+                                       assignable, nonempty)
+    if not calling_convention_allowed and result.calling_convention and s.sy != '(':
+        error(s.position(), "%s on something that is not a function"
+            % result.calling_convention)
+    while s.sy in ('[', '('):
+        pos = s.position()
+        if s.sy == '[':
+            result = p_c_array_declarator(s, result)
+        else: # sy == '('
+            s.next()
+            result = p_c_func_declarator(s, pos, ctx, result, cmethod_flag)
+        cmethod_flag = 0
+    return result
+
+def p_c_array_declarator(s, base):
+    pos = s.position()
+    s.next() # '['
+    if s.sy != ']':
+        dim = p_testlist(s)
+    else:
+        dim = None
+    s.expect(']')
+    return Nodes.CArrayDeclaratorNode(pos, base = base, dimension = dim)
+
+def p_c_func_declarator(s, pos, ctx, base, cmethod_flag):
+    #  Opening paren has already been skipped
+    args = p_c_arg_list(s, ctx, cmethod_flag = cmethod_flag,
+                        nonempty_declarators = 0)
+    ellipsis = p_optional_ellipsis(s)
+    s.expect(')')
+    nogil = p_nogil(s)
+    exc_val, exc_check = p_exception_value_clause(s)
+    with_gil = p_with_gil(s)
+    return Nodes.CFuncDeclaratorNode(pos,
+        base = base, args = args, has_varargs = ellipsis,
+        exception_value = exc_val, exception_check = exc_check,
+        nogil = nogil or ctx.nogil or with_gil, with_gil = with_gil)
+
+supported_overloaded_operators = cython.declare(set, set([
+    '+', '-', '*', '/', '%',
+    '++', '--', '~', '|', '&', '^', '<<', '>>', ',',
+    '==', '!=', '>=', '>', '<=', '<',
     '[]', '()', '!', '=',
     'bool',
-])) 
- 
-def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag, 
-                          assignable, nonempty): 
-    pos = s.position() 
-    calling_convention = p_calling_convention(s) 
-    if s.sy == '*': 
-        s.next() 
-        if s.systring == 'const': 
-            const_pos = s.position() 
-            s.next() 
-            const_base = p_c_declarator(s, ctx, empty = empty, 
-                                       is_type = is_type, 
-                                       cmethod_flag = cmethod_flag, 
-                                       assignable = assignable, 
-                                       nonempty = nonempty) 
-            base = Nodes.CConstDeclaratorNode(const_pos, base = const_base) 
-        else: 
-            base = p_c_declarator(s, ctx, empty = empty, is_type = is_type, 
-                                  cmethod_flag = cmethod_flag, 
-                                  assignable = assignable, nonempty = nonempty) 
-        result = Nodes.CPtrDeclaratorNode(pos, 
-            base = base) 
-    elif s.sy == '**': # scanner returns this as a single token 
-        s.next() 
-        base = p_c_declarator(s, ctx, empty = empty, is_type = is_type, 
-                              cmethod_flag = cmethod_flag, 
-                              assignable = assignable, nonempty = nonempty) 
-        result = Nodes.CPtrDeclaratorNode(pos, 
-            base = Nodes.CPtrDeclaratorNode(pos, 
-                base = base)) 
-    elif s.sy == '&': 
-        s.next() 
-        base = p_c_declarator(s, ctx, empty = empty, is_type = is_type, 
-                              cmethod_flag = cmethod_flag, 
-                              assignable = assignable, nonempty = nonempty) 
-        result = Nodes.CReferenceDeclaratorNode(pos, base = base) 
-    else: 
-        rhs = None 
-        if s.sy == 'IDENT': 
+]))
+
+def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
+                          assignable, nonempty):
+    pos = s.position()
+    calling_convention = p_calling_convention(s)
+    if s.sy == '*':
+        s.next()
+        if s.systring == 'const':
+            const_pos = s.position()
+            s.next()
+            const_base = p_c_declarator(s, ctx, empty = empty,
+                                       is_type = is_type,
+                                       cmethod_flag = cmethod_flag,
+                                       assignable = assignable,
+                                       nonempty = nonempty)
+            base = Nodes.CConstDeclaratorNode(const_pos, base = const_base)
+        else:
+            base = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
+                                  cmethod_flag = cmethod_flag,
+                                  assignable = assignable, nonempty = nonempty)
+        result = Nodes.CPtrDeclaratorNode(pos,
+            base = base)
+    elif s.sy == '**': # scanner returns this as a single token
+        s.next()
+        base = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
+                              cmethod_flag = cmethod_flag,
+                              assignable = assignable, nonempty = nonempty)
+        result = Nodes.CPtrDeclaratorNode(pos,
+            base = Nodes.CPtrDeclaratorNode(pos,
+                base = base))
+    elif s.sy == '&':
+        s.next()
+        base = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
+                              cmethod_flag = cmethod_flag,
+                              assignable = assignable, nonempty = nonempty)
+        result = Nodes.CReferenceDeclaratorNode(pos, base = base)
+    else:
+        rhs = None
+        if s.sy == 'IDENT':
             name = s.systring
-            if empty: 
-                error(s.position(), "Declarator should be empty") 
-            s.next() 
-            cname = p_opt_cname(s) 
-            if name != 'operator' and s.sy == '=' and assignable: 
-                s.next() 
-                rhs = p_test(s) 
-        else: 
-            if nonempty: 
-                error(s.position(), "Empty declarator") 
-            name = "" 
-            cname = None 
-        if cname is None and ctx.namespace is not None and nonempty: 
-            cname = ctx.namespace + "::" + name 
-        if name == 'operator' and ctx.visibility == 'extern' and nonempty: 
-            op = s.sy 
-            if [1 for c in op if c in '+-*/<=>!%&|([^~,']: 
-                s.next() 
-                # Handle diphthong operators. 
-                if op == '(': 
-                    s.expect(')') 
-                    op = '()' 
-                elif op == '[': 
-                    s.expect(']') 
-                    op = '[]' 
-                elif op in ('-', '+', '|', '&') and s.sy == op: 
-                    op *= 2       # ++, --, ... 
-                    s.next() 
-                elif s.sy == '=': 
-                    op += s.sy    # +=, -=, ... 
-                    s.next() 
-                if op not in supported_overloaded_operators: 
-                    s.error("Overloading operator '%s' not yet supported." % op, 
-                            fatal=False) 
-                name += op 
+            if empty:
+                error(s.position(), "Declarator should be empty")
+            s.next()
+            cname = p_opt_cname(s)
+            if name != 'operator' and s.sy == '=' and assignable:
+                s.next()
+                rhs = p_test(s)
+        else:
+            if nonempty:
+                error(s.position(), "Empty declarator")
+            name = ""
+            cname = None
+        if cname is None and ctx.namespace is not None and nonempty:
+            cname = ctx.namespace + "::" + name
+        if name == 'operator' and ctx.visibility == 'extern' and nonempty:
+            op = s.sy
+            if [1 for c in op if c in '+-*/<=>!%&|([^~,']:
+                s.next()
+                # Handle diphthong operators.
+                if op == '(':
+                    s.expect(')')
+                    op = '()'
+                elif op == '[':
+                    s.expect(']')
+                    op = '[]'
+                elif op in ('-', '+', '|', '&') and s.sy == op:
+                    op *= 2       # ++, --, ...
+                    s.next()
+                elif s.sy == '=':
+                    op += s.sy    # +=, -=, ...
+                    s.next()
+                if op not in supported_overloaded_operators:
+                    s.error("Overloading operator '%s' not yet supported." % op,
+                            fatal=False)
+                name += op
             elif op == 'IDENT':
                 op = s.systring;
                 if op not in supported_overloaded_operators:
@@ -2920,109 +2920,109 @@ def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
                             fatal=False)
                 name = name + ' ' + op
                 s.next()
-        result = Nodes.CNameDeclaratorNode(pos, 
-            name = name, cname = cname, default = rhs) 
-    result.calling_convention = calling_convention 
-    return result 
- 
-def p_nogil(s): 
-    if s.sy == 'IDENT' and s.systring == 'nogil': 
-        s.next() 
-        return 1 
-    else: 
-        return 0 
- 
-def p_with_gil(s): 
-    if s.sy == 'with': 
-        s.next() 
-        s.expect_keyword('gil') 
-        return 1 
-    else: 
-        return 0 
- 
-def p_exception_value_clause(s): 
-    exc_val = None 
-    exc_check = 0 
-    if s.sy == 'except': 
-        s.next() 
-        if s.sy == '*': 
-            exc_check = 1 
-            s.next() 
-        elif s.sy == '+': 
-            exc_check = '+' 
-            s.next() 
-            if s.sy == 'IDENT': 
-                name = s.systring 
-                s.next() 
-                exc_val = p_name(s, name) 
+        result = Nodes.CNameDeclaratorNode(pos,
+            name = name, cname = cname, default = rhs)
+    result.calling_convention = calling_convention
+    return result
+
+def p_nogil(s):
+    if s.sy == 'IDENT' and s.systring == 'nogil':
+        s.next()
+        return 1
+    else:
+        return 0
+
+def p_with_gil(s):
+    if s.sy == 'with':
+        s.next()
+        s.expect_keyword('gil')
+        return 1
+    else:
+        return 0
+
+def p_exception_value_clause(s):
+    exc_val = None
+    exc_check = 0
+    if s.sy == 'except':
+        s.next()
+        if s.sy == '*':
+            exc_check = 1
+            s.next()
+        elif s.sy == '+':
+            exc_check = '+'
+            s.next()
+            if s.sy == 'IDENT':
+                name = s.systring
+                s.next()
+                exc_val = p_name(s, name)
             elif s.sy == '*':
                 exc_val = ExprNodes.CharNode(s.position(), value=u'*')
                 s.next()
-        else: 
-            if s.sy == '?': 
-                exc_check = 1 
-                s.next() 
-            exc_val = p_test(s) 
-    return exc_val, exc_check 
- 
+        else:
+            if s.sy == '?':
+                exc_check = 1
+                s.next()
+            exc_val = p_test(s)
+    return exc_val, exc_check
+
 c_arg_list_terminators = cython.declare(set, set(['*', '**', '.', ')', ':']))
- 
-def p_c_arg_list(s, ctx = Ctx(), in_pyfunc = 0, cmethod_flag = 0, 
-                 nonempty_declarators = 0, kw_only = 0, annotated = 1): 
-    #  Comma-separated list of C argument declarations, possibly empty. 
-    #  May have a trailing comma. 
-    args = [] 
-    is_self_arg = cmethod_flag 
-    while s.sy not in c_arg_list_terminators: 
-        args.append(p_c_arg_decl(s, ctx, in_pyfunc, is_self_arg, 
-            nonempty = nonempty_declarators, kw_only = kw_only, 
-            annotated = annotated)) 
-        if s.sy != ',': 
-            break 
-        s.next() 
-        is_self_arg = 0 
-    return args 
- 
-def p_optional_ellipsis(s): 
-    if s.sy == '.': 
-        expect_ellipsis(s) 
-        return 1 
-    else: 
-        return 0 
- 
-def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0, 
-                 kw_only = 0, annotated = 1): 
-    pos = s.position() 
-    not_none = or_none = 0 
-    default = None 
-    annotation = None 
-    if s.in_python_file: 
-        # empty type declaration 
-        base_type = Nodes.CSimpleBaseTypeNode(pos, 
-            name = None, module_path = [], 
-            is_basic_c_type = 0, signed = 0, 
-            complex = 0, longness = 0, 
-            is_self_arg = cmethod_flag, templates = None) 
-    else: 
-        base_type = p_c_base_type(s, cmethod_flag, nonempty = nonempty) 
-    declarator = p_c_declarator(s, ctx, nonempty = nonempty) 
-    if s.sy in ('not', 'or') and not s.in_python_file: 
-        kind = s.sy 
-        s.next() 
-        if s.sy == 'IDENT' and s.systring == 'None': 
-            s.next() 
-        else: 
-            s.error("Expected 'None'") 
-        if not in_pyfunc: 
-            error(pos, "'%s None' only allowed in Python functions" % kind) 
-        or_none = kind == 'or' 
-        not_none = kind == 'not' 
-    if annotated and s.sy == ':': 
-        s.next() 
-        annotation = p_test(s) 
-    if s.sy == '=': 
-        s.next() 
-        if 'pxd' in ctx.level: 
+
+def p_c_arg_list(s, ctx = Ctx(), in_pyfunc = 0, cmethod_flag = 0,
+                 nonempty_declarators = 0, kw_only = 0, annotated = 1):
+    #  Comma-separated list of C argument declarations, possibly empty.
+    #  May have a trailing comma.
+    args = []
+    is_self_arg = cmethod_flag
+    while s.sy not in c_arg_list_terminators:
+        args.append(p_c_arg_decl(s, ctx, in_pyfunc, is_self_arg,
+            nonempty = nonempty_declarators, kw_only = kw_only,
+            annotated = annotated))
+        if s.sy != ',':
+            break
+        s.next()
+        is_self_arg = 0
+    return args
+
+def p_optional_ellipsis(s):
+    if s.sy == '.':
+        expect_ellipsis(s)
+        return 1
+    else:
+        return 0
+
+def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0,
+                 kw_only = 0, annotated = 1):
+    pos = s.position()
+    not_none = or_none = 0
+    default = None
+    annotation = None
+    if s.in_python_file:
+        # empty type declaration
+        base_type = Nodes.CSimpleBaseTypeNode(pos,
+            name = None, module_path = [],
+            is_basic_c_type = 0, signed = 0,
+            complex = 0, longness = 0,
+            is_self_arg = cmethod_flag, templates = None)
+    else:
+        base_type = p_c_base_type(s, cmethod_flag, nonempty = nonempty)
+    declarator = p_c_declarator(s, ctx, nonempty = nonempty)
+    if s.sy in ('not', 'or') and not s.in_python_file:
+        kind = s.sy
+        s.next()
+        if s.sy == 'IDENT' and s.systring == 'None':
+            s.next()
+        else:
+            s.error("Expected 'None'")
+        if not in_pyfunc:
+            error(pos, "'%s None' only allowed in Python functions" % kind)
+        or_none = kind == 'or'
+        not_none = kind == 'not'
+    if annotated and s.sy == ':':
+        s.next()
+        annotation = p_test(s)
+    if s.sy == '=':
+        s.next()
+        if 'pxd' in ctx.level:
             if s.sy in ['*', '?']:
                 # TODO(github/1736): Make this an error for inline declarations.
                 default = ExprNodes.NoneNode(pos)
@@ -3030,254 +3030,254 @@ def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0,
             elif 'inline' in ctx.modifiers:
                 default = p_test(s)
             else:
-                error(pos, "default values cannot be specified in pxd files, use ? or *") 
-        else: 
-            default = p_test(s) 
-    return Nodes.CArgDeclNode(pos, 
-        base_type = base_type, 
-        declarator = declarator, 
-        not_none = not_none, 
-        or_none = or_none, 
-        default = default, 
-        annotation = annotation, 
-        kw_only = kw_only) 
- 
-def p_api(s): 
-    if s.sy == 'IDENT' and s.systring == 'api': 
-        s.next() 
-        return 1 
-    else: 
-        return 0 
- 
-def p_cdef_statement(s, ctx): 
-    pos = s.position() 
-    ctx.visibility = p_visibility(s, ctx.visibility) 
-    ctx.api = ctx.api or p_api(s) 
-    if ctx.api: 
-        if ctx.visibility not in ('private', 'public'): 
-            error(pos, "Cannot combine 'api' with '%s'" % ctx.visibility) 
-    if (ctx.visibility == 'extern') and s.sy == 'from': 
-        return p_cdef_extern_block(s, pos, ctx) 
-    elif s.sy == 'import': 
-        s.next() 
-        return p_cdef_extern_block(s, pos, ctx) 
-    elif p_nogil(s): 
-        ctx.nogil = 1 
-        if ctx.overridable: 
-            error(pos, "cdef blocks cannot be declared cpdef") 
-        return p_cdef_block(s, ctx) 
-    elif s.sy == ':': 
-        if ctx.overridable: 
-            error(pos, "cdef blocks cannot be declared cpdef") 
-        return p_cdef_block(s, ctx) 
-    elif s.sy == 'class': 
-        if ctx.level not in ('module', 'module_pxd'): 
-            error(pos, "Extension type definition not allowed here") 
-        if ctx.overridable: 
-            error(pos, "Extension types cannot be declared cpdef") 
-        return p_c_class_definition(s, pos, ctx) 
-    elif s.sy == 'IDENT' and s.systring == 'cppclass': 
-        return p_cpp_class_definition(s, pos, ctx) 
-    elif s.sy == 'IDENT' and s.systring in struct_enum_union: 
-        if ctx.level not in ('module', 'module_pxd'): 
-            error(pos, "C struct/union/enum definition not allowed here") 
-        if ctx.overridable: 
-            if s.systring != 'enum': 
-                error(pos, "C struct/union cannot be declared cpdef") 
-        return p_struct_enum(s, pos, ctx) 
-    elif s.sy == 'IDENT' and s.systring == 'fused': 
-        return p_fused_definition(s, pos, ctx) 
-    else: 
-        return p_c_func_or_var_declaration(s, pos, ctx) 
- 
-def p_cdef_block(s, ctx): 
-    return p_suite(s, ctx(cdef_flag = 1)) 
- 
-def p_cdef_extern_block(s, pos, ctx): 
-    if ctx.overridable: 
-        error(pos, "cdef extern blocks cannot be declared cpdef") 
-    include_file = None 
-    s.expect('from') 
-    if s.sy == '*': 
-        s.next() 
-    else: 
-        include_file = p_string_literal(s, 'u')[2] 
-    ctx = ctx(cdef_flag = 1, visibility = 'extern') 
-    if s.systring == "namespace": 
-        s.next() 
-        ctx.namespace = p_string_literal(s, 'u')[2] 
-    if p_nogil(s): 
-        ctx.nogil = 1 
+                error(pos, "default values cannot be specified in pxd files, use ? or *")
+        else:
+            default = p_test(s)
+    return Nodes.CArgDeclNode(pos,
+        base_type = base_type,
+        declarator = declarator,
+        not_none = not_none,
+        or_none = or_none,
+        default = default,
+        annotation = annotation,
+        kw_only = kw_only)
+
+def p_api(s):
+    if s.sy == 'IDENT' and s.systring == 'api':
+        s.next()
+        return 1
+    else:
+        return 0
+
+def p_cdef_statement(s, ctx):
+    pos = s.position()
+    ctx.visibility = p_visibility(s, ctx.visibility)
+    ctx.api = ctx.api or p_api(s)
+    if ctx.api:
+        if ctx.visibility not in ('private', 'public'):
+            error(pos, "Cannot combine 'api' with '%s'" % ctx.visibility)
+    if (ctx.visibility == 'extern') and s.sy == 'from':
+        return p_cdef_extern_block(s, pos, ctx)
+    elif s.sy == 'import':
+        s.next()
+        return p_cdef_extern_block(s, pos, ctx)
+    elif p_nogil(s):
+        ctx.nogil = 1
+        if ctx.overridable:
+            error(pos, "cdef blocks cannot be declared cpdef")
+        return p_cdef_block(s, ctx)
+    elif s.sy == ':':
+        if ctx.overridable:
+            error(pos, "cdef blocks cannot be declared cpdef")
+        return p_cdef_block(s, ctx)
+    elif s.sy == 'class':
+        if ctx.level not in ('module', 'module_pxd'):
+            error(pos, "Extension type definition not allowed here")
+        if ctx.overridable:
+            error(pos, "Extension types cannot be declared cpdef")
+        return p_c_class_definition(s, pos, ctx)
+    elif s.sy == 'IDENT' and s.systring == 'cppclass':
+        return p_cpp_class_definition(s, pos, ctx)
+    elif s.sy == 'IDENT' and s.systring in struct_enum_union:
+        if ctx.level not in ('module', 'module_pxd'):
+            error(pos, "C struct/union/enum definition not allowed here")
+        if ctx.overridable:
+            if s.systring != 'enum':
+                error(pos, "C struct/union cannot be declared cpdef")
+        return p_struct_enum(s, pos, ctx)
+    elif s.sy == 'IDENT' and s.systring == 'fused':
+        return p_fused_definition(s, pos, ctx)
+    else:
+        return p_c_func_or_var_declaration(s, pos, ctx)
+
+def p_cdef_block(s, ctx):
+    return p_suite(s, ctx(cdef_flag = 1))
+
+def p_cdef_extern_block(s, pos, ctx):
+    if ctx.overridable:
+        error(pos, "cdef extern blocks cannot be declared cpdef")
+    include_file = None
+    s.expect('from')
+    if s.sy == '*':
+        s.next()
+    else:
+        include_file = p_string_literal(s, 'u')[2]
+    ctx = ctx(cdef_flag = 1, visibility = 'extern')
+    if s.systring == "namespace":
+        s.next()
+        ctx.namespace = p_string_literal(s, 'u')[2]
+    if p_nogil(s):
+        ctx.nogil = 1
 
     # Use "docstring" as verbatim string to include
     verbatim_include, body = p_suite_with_docstring(s, ctx, True)
 
-    return Nodes.CDefExternNode(pos, 
-        include_file = include_file, 
+    return Nodes.CDefExternNode(pos,
+        include_file = include_file,
         verbatim_include = verbatim_include,
-        body = body, 
-        namespace = ctx.namespace) 
- 
-def p_c_enum_definition(s, pos, ctx): 
-    # s.sy == ident 'enum' 
-    s.next() 
-    if s.sy == 'IDENT': 
-        name = s.systring 
-        s.next() 
-        cname = p_opt_cname(s) 
-        if cname is None and ctx.namespace is not None: 
-            cname = ctx.namespace + "::" + name 
-    else: 
-        name = None 
-        cname = None 
-    items = None 
-    s.expect(':') 
-    items = [] 
-    if s.sy != 'NEWLINE': 
-        p_c_enum_line(s, ctx, items) 
-    else: 
-        s.next() # 'NEWLINE' 
-        s.expect_indent() 
-        while s.sy not in ('DEDENT', 'EOF'): 
-            p_c_enum_line(s, ctx, items) 
-        s.expect_dedent() 
-    return Nodes.CEnumDefNode( 
-        pos, name = name, cname = cname, items = items, 
-        typedef_flag = ctx.typedef_flag, visibility = ctx.visibility, 
-        create_wrapper = ctx.overridable, 
-        api = ctx.api, in_pxd = ctx.level == 'module_pxd') 
- 
-def p_c_enum_line(s, ctx, items): 
-    if s.sy != 'pass': 
-        p_c_enum_item(s, ctx, items) 
-        while s.sy == ',': 
-            s.next() 
-            if s.sy in ('NEWLINE', 'EOF'): 
-                break 
-            p_c_enum_item(s, ctx, items) 
-    else: 
-        s.next() 
-    s.expect_newline("Syntax error in enum item list") 
- 
-def p_c_enum_item(s, ctx, items): 
-    pos = s.position() 
-    name = p_ident(s) 
-    cname = p_opt_cname(s) 
-    if cname is None and ctx.namespace is not None: 
-        cname = ctx.namespace + "::" + name 
-    value = None 
-    if s.sy == '=': 
-        s.next() 
-        value = p_test(s) 
-    items.append(Nodes.CEnumDefItemNode(pos, 
-        name = name, cname = cname, value = value)) 
- 
-def p_c_struct_or_union_definition(s, pos, ctx): 
-    packed = False 
-    if s.systring == 'packed': 
-        packed = True 
-        s.next() 
-        if s.sy != 'IDENT' or s.systring != 'struct': 
-            s.expected('struct') 
-    # s.sy == ident 'struct' or 'union' 
-    kind = s.systring 
-    s.next() 
-    name = p_ident(s) 
-    cname = p_opt_cname(s) 
-    if cname is None and ctx.namespace is not None: 
-        cname = ctx.namespace + "::" + name 
-    attributes = None 
-    if s.sy == ':': 
-        s.next() 
-        s.expect('NEWLINE') 
-        s.expect_indent() 
-        attributes = [] 
-        body_ctx = Ctx() 
-        while s.sy != 'DEDENT': 
-            if s.sy != 'pass': 
-                attributes.append( 
-                    p_c_func_or_var_declaration(s, s.position(), body_ctx)) 
-            else: 
-                s.next() 
-                s.expect_newline("Expected a newline") 
-        s.expect_dedent() 
-    else: 
-        s.expect_newline("Syntax error in struct or union definition") 
-    return Nodes.CStructOrUnionDefNode(pos, 
-        name = name, cname = cname, kind = kind, attributes = attributes, 
-        typedef_flag = ctx.typedef_flag, visibility = ctx.visibility, 
-        api = ctx.api, in_pxd = ctx.level == 'module_pxd', packed = packed) 
- 
-def p_fused_definition(s, pos, ctx): 
-    """ 
-    c(type)def fused my_fused_type: 
-        ... 
-    """ 
-    # s.systring == 'fused' 
- 
-    if ctx.level not in ('module', 'module_pxd'): 
-        error(pos, "Fused type definition not allowed here") 
- 
-    s.next() 
-    name = p_ident(s) 
- 
-    s.expect(":") 
-    s.expect_newline() 
-    s.expect_indent() 
- 
-    types = [] 
-    while s.sy != 'DEDENT': 
-        if s.sy != 'pass': 
-            #types.append(p_c_declarator(s)) 
-            types.append(p_c_base_type(s)) #, nonempty=1)) 
-        else: 
-            s.next() 
- 
-        s.expect_newline() 
- 
-    s.expect_dedent() 
- 
-    if not types: 
-        error(pos, "Need at least one type") 
- 
-    return Nodes.FusedTypeNode(pos, name=name, types=types) 
- 
-def p_struct_enum(s, pos, ctx): 
-    if s.systring == 'enum': 
-        return p_c_enum_definition(s, pos, ctx) 
-    else: 
-        return p_c_struct_or_union_definition(s, pos, ctx) 
- 
-def p_visibility(s, prev_visibility): 
-    pos = s.position() 
-    visibility = prev_visibility 
-    if s.sy == 'IDENT' and s.systring in ('extern', 'public', 'readonly'): 
-        visibility = s.systring 
-        if prev_visibility != 'private' and visibility != prev_visibility: 
-            s.error("Conflicting visibility options '%s' and '%s'" 
-                % (prev_visibility, visibility), fatal=False) 
-        s.next() 
-    return visibility 
- 
-def p_c_modifiers(s): 
-    if s.sy == 'IDENT' and s.systring in ('inline',): 
-        modifier = s.systring 
-        s.next() 
-        return [modifier] + p_c_modifiers(s) 
-    return [] 
- 
-def p_c_func_or_var_declaration(s, pos, ctx): 
-    cmethod_flag = ctx.level in ('c_class', 'c_class_pxd') 
-    modifiers = p_c_modifiers(s) 
-    base_type = p_c_base_type(s, nonempty = 1, templates = ctx.templates) 
+        body = body,
+        namespace = ctx.namespace)
+
+def p_c_enum_definition(s, pos, ctx):
+    # s.sy == ident 'enum'
+    s.next()
+    if s.sy == 'IDENT':
+        name = s.systring
+        s.next()
+        cname = p_opt_cname(s)
+        if cname is None and ctx.namespace is not None:
+            cname = ctx.namespace + "::" + name
+    else:
+        name = None
+        cname = None
+    items = None
+    s.expect(':')
+    items = []
+    if s.sy != 'NEWLINE':
+        p_c_enum_line(s, ctx, items)
+    else:
+        s.next() # 'NEWLINE'
+        s.expect_indent()
+        while s.sy not in ('DEDENT', 'EOF'):
+            p_c_enum_line(s, ctx, items)
+        s.expect_dedent()
+    return Nodes.CEnumDefNode(
+        pos, name = name, cname = cname, items = items,
+        typedef_flag = ctx.typedef_flag, visibility = ctx.visibility,
+        create_wrapper = ctx.overridable,
+        api = ctx.api, in_pxd = ctx.level == 'module_pxd')
+
+def p_c_enum_line(s, ctx, items):
+    if s.sy != 'pass':
+        p_c_enum_item(s, ctx, items)
+        while s.sy == ',':
+            s.next()
+            if s.sy in ('NEWLINE', 'EOF'):
+                break
+            p_c_enum_item(s, ctx, items)
+    else:
+        s.next()
+    s.expect_newline("Syntax error in enum item list")
+
+def p_c_enum_item(s, ctx, items):
+    pos = s.position()
+    name = p_ident(s)
+    cname = p_opt_cname(s)
+    if cname is None and ctx.namespace is not None:
+        cname = ctx.namespace + "::" + name
+    value = None
+    if s.sy == '=':
+        s.next()
+        value = p_test(s)
+    items.append(Nodes.CEnumDefItemNode(pos,
+        name = name, cname = cname, value = value))
+
+def p_c_struct_or_union_definition(s, pos, ctx):
+    packed = False
+    if s.systring == 'packed':
+        packed = True
+        s.next()
+        if s.sy != 'IDENT' or s.systring != 'struct':
+            s.expected('struct')
+    # s.sy == ident 'struct' or 'union'
+    kind = s.systring
+    s.next()
+    name = p_ident(s)
+    cname = p_opt_cname(s)
+    if cname is None and ctx.namespace is not None:
+        cname = ctx.namespace + "::" + name
+    attributes = None
+    if s.sy == ':':
+        s.next()
+        s.expect('NEWLINE')
+        s.expect_indent()
+        attributes = []
+        body_ctx = Ctx()
+        while s.sy != 'DEDENT':
+            if s.sy != 'pass':
+                attributes.append(
+                    p_c_func_or_var_declaration(s, s.position(), body_ctx))
+            else:
+                s.next()
+                s.expect_newline("Expected a newline")
+        s.expect_dedent()
+    else:
+        s.expect_newline("Syntax error in struct or union definition")
+    return Nodes.CStructOrUnionDefNode(pos,
+        name = name, cname = cname, kind = kind, attributes = attributes,
+        typedef_flag = ctx.typedef_flag, visibility = ctx.visibility,
+        api = ctx.api, in_pxd = ctx.level == 'module_pxd', packed = packed)
+
+def p_fused_definition(s, pos, ctx):
+    """
+    c(type)def fused my_fused_type:
+        ...
+    """
+    # s.systring == 'fused'
+
+    if ctx.level not in ('module', 'module_pxd'):
+        error(pos, "Fused type definition not allowed here")
+
+    s.next()
+    name = p_ident(s)
+
+    s.expect(":")
+    s.expect_newline()
+    s.expect_indent()
+
+    types = []
+    while s.sy != 'DEDENT':
+        if s.sy != 'pass':
+            #types.append(p_c_declarator(s))
+            types.append(p_c_base_type(s)) #, nonempty=1))
+        else:
+            s.next()
+
+        s.expect_newline()
+
+    s.expect_dedent()
+
+    if not types:
+        error(pos, "Need at least one type")
+
+    return Nodes.FusedTypeNode(pos, name=name, types=types)
+
+def p_struct_enum(s, pos, ctx):
+    if s.systring == 'enum':
+        return p_c_enum_definition(s, pos, ctx)
+    else:
+        return p_c_struct_or_union_definition(s, pos, ctx)
+
+def p_visibility(s, prev_visibility):
+    pos = s.position()
+    visibility = prev_visibility
+    if s.sy == 'IDENT' and s.systring in ('extern', 'public', 'readonly'):
+        visibility = s.systring
+        if prev_visibility != 'private' and visibility != prev_visibility:
+            s.error("Conflicting visibility options '%s' and '%s'"
+                % (prev_visibility, visibility), fatal=False)
+        s.next()
+    return visibility
+
+def p_c_modifiers(s):
+    if s.sy == 'IDENT' and s.systring in ('inline',):
+        modifier = s.systring
+        s.next()
+        return [modifier] + p_c_modifiers(s)
+    return []
+
+def p_c_func_or_var_declaration(s, pos, ctx):
+    cmethod_flag = ctx.level in ('c_class', 'c_class_pxd')
+    modifiers = p_c_modifiers(s)
+    base_type = p_c_base_type(s, nonempty = 1, templates = ctx.templates)
     declarator = p_c_declarator(s, ctx(modifiers=modifiers), cmethod_flag = cmethod_flag,
-                                assignable = 1, nonempty = 1) 
-    declarator.overridable = ctx.overridable 
-    if s.sy == 'IDENT' and s.systring == 'const' and ctx.level == 'cpp_class': 
-        s.next() 
-        is_const_method = 1 
-    else: 
-        is_const_method = 0 
+                                assignable = 1, nonempty = 1)
+    declarator.overridable = ctx.overridable
+    if s.sy == 'IDENT' and s.systring == 'const' and ctx.level == 'cpp_class':
+        s.next()
+        is_const_method = 1
+    else:
+        is_const_method = 0
     if s.sy == '->':
         # Special enough to give a better error message and keep going.
         s.error(
@@ -3286,92 +3286,92 @@ def p_c_func_or_var_declaration(s, pos, ctx):
             fatal=False)
         s.next()
         p_test(s)  # Keep going, but ignore result.
-    if s.sy == ':': 
-        if ctx.level not in ('module', 'c_class', 'module_pxd', 'c_class_pxd', 'cpp_class') and not ctx.templates: 
-            s.error("C function definition not allowed here") 
-        doc, suite = p_suite_with_docstring(s, Ctx(level='function')) 
-        result = Nodes.CFuncDefNode(pos, 
-            visibility = ctx.visibility, 
-            base_type = base_type, 
-            declarator = declarator, 
-            body = suite, 
-            doc = doc, 
-            modifiers = modifiers, 
-            api = ctx.api, 
-            overridable = ctx.overridable, 
-            is_const_method = is_const_method) 
-    else: 
-        #if api: 
-        #    s.error("'api' not allowed with variable declaration") 
-        if is_const_method: 
-            declarator.is_const_method = is_const_method 
-        declarators = [declarator] 
-        while s.sy == ',': 
-            s.next() 
-            if s.sy == 'NEWLINE': 
-                break 
-            declarator = p_c_declarator(s, ctx, cmethod_flag = cmethod_flag, 
-                                        assignable = 1, nonempty = 1) 
-            declarators.append(declarator) 
-        doc_line = s.start_line + 1 
-        s.expect_newline("Syntax error in C variable declaration", ignore_semicolon=True) 
-        if ctx.level in ('c_class', 'c_class_pxd') and s.start_line == doc_line: 
-            doc = p_doc_string(s) 
-        else: 
-            doc = None 
-        result = Nodes.CVarDefNode(pos, 
-            visibility = ctx.visibility, 
-            base_type = base_type, 
-            declarators = declarators, 
-            in_pxd = ctx.level in ('module_pxd', 'c_class_pxd'), 
-            doc = doc, 
-            api = ctx.api, 
-            modifiers = modifiers, 
-            overridable = ctx.overridable) 
-    return result 
- 
-def p_ctypedef_statement(s, ctx): 
-    # s.sy == 'ctypedef' 
-    pos = s.position() 
-    s.next() 
-    visibility = p_visibility(s, ctx.visibility) 
-    api = p_api(s) 
-    ctx = ctx(typedef_flag = 1, visibility = visibility) 
-    if api: 
-        ctx.api = 1 
-    if s.sy == 'class': 
-        return p_c_class_definition(s, pos, ctx) 
-    elif s.sy == 'IDENT' and s.systring in struct_enum_union: 
-        return p_struct_enum(s, pos, ctx) 
-    elif s.sy == 'IDENT' and s.systring == 'fused': 
-        return p_fused_definition(s, pos, ctx) 
-    else: 
-        base_type = p_c_base_type(s, nonempty = 1) 
-        declarator = p_c_declarator(s, ctx, is_type = 1, nonempty = 1) 
-        s.expect_newline("Syntax error in ctypedef statement", ignore_semicolon=True) 
-        return Nodes.CTypeDefNode( 
-            pos, base_type = base_type, 
-            declarator = declarator, 
-            visibility = visibility, api = api, 
-            in_pxd = ctx.level == 'module_pxd') 
- 
-def p_decorators(s): 
-    decorators = [] 
-    while s.sy == '@': 
-        pos = s.position() 
-        s.next() 
-        decstring = p_dotted_name(s, as_allowed=0)[2] 
-        names = decstring.split('.') 
+    if s.sy == ':':
+        if ctx.level not in ('module', 'c_class', 'module_pxd', 'c_class_pxd', 'cpp_class') and not ctx.templates:
+            s.error("C function definition not allowed here")
+        doc, suite = p_suite_with_docstring(s, Ctx(level='function'))
+        result = Nodes.CFuncDefNode(pos,
+            visibility = ctx.visibility,
+            base_type = base_type,
+            declarator = declarator,
+            body = suite,
+            doc = doc,
+            modifiers = modifiers,
+            api = ctx.api,
+            overridable = ctx.overridable,
+            is_const_method = is_const_method)
+    else:
+        #if api:
+        #    s.error("'api' not allowed with variable declaration")
+        if is_const_method:
+            declarator.is_const_method = is_const_method
+        declarators = [declarator]
+        while s.sy == ',':
+            s.next()
+            if s.sy == 'NEWLINE':
+                break
+            declarator = p_c_declarator(s, ctx, cmethod_flag = cmethod_flag,
+                                        assignable = 1, nonempty = 1)
+            declarators.append(declarator)
+        doc_line = s.start_line + 1
+        s.expect_newline("Syntax error in C variable declaration", ignore_semicolon=True)
+        if ctx.level in ('c_class', 'c_class_pxd') and s.start_line == doc_line:
+            doc = p_doc_string(s)
+        else:
+            doc = None
+        result = Nodes.CVarDefNode(pos,
+            visibility = ctx.visibility,
+            base_type = base_type,
+            declarators = declarators,
+            in_pxd = ctx.level in ('module_pxd', 'c_class_pxd'),
+            doc = doc,
+            api = ctx.api,
+            modifiers = modifiers,
+            overridable = ctx.overridable)
+    return result
+
+def p_ctypedef_statement(s, ctx):
+    # s.sy == 'ctypedef'
+    pos = s.position()
+    s.next()
+    visibility = p_visibility(s, ctx.visibility)
+    api = p_api(s)
+    ctx = ctx(typedef_flag = 1, visibility = visibility)
+    if api:
+        ctx.api = 1
+    if s.sy == 'class':
+        return p_c_class_definition(s, pos, ctx)
+    elif s.sy == 'IDENT' and s.systring in struct_enum_union:
+        return p_struct_enum(s, pos, ctx)
+    elif s.sy == 'IDENT' and s.systring == 'fused':
+        return p_fused_definition(s, pos, ctx)
+    else:
+        base_type = p_c_base_type(s, nonempty = 1)
+        declarator = p_c_declarator(s, ctx, is_type = 1, nonempty = 1)
+        s.expect_newline("Syntax error in ctypedef statement", ignore_semicolon=True)
+        return Nodes.CTypeDefNode(
+            pos, base_type = base_type,
+            declarator = declarator,
+            visibility = visibility, api = api,
+            in_pxd = ctx.level == 'module_pxd')
+
+def p_decorators(s):
+    decorators = []
+    while s.sy == '@':
+        pos = s.position()
+        s.next()
+        decstring = p_dotted_name(s, as_allowed=0)[2]
+        names = decstring.split('.')
         decorator = ExprNodes.NameNode(pos, name=s.context.intern_ustring(names[0]))
-        for name in names[1:]: 
+        for name in names[1:]:
             decorator = ExprNodes.AttributeNode(
                 pos, attribute=s.context.intern_ustring(name), obj=decorator)
-        if s.sy == '(': 
-            decorator = p_call(s, decorator) 
-        decorators.append(Nodes.DecoratorNode(pos, decorator=decorator)) 
-        s.expect_newline("Expected a newline after decorator") 
-    return decorators 
- 
+        if s.sy == '(':
+            decorator = p_call(s, decorator)
+        decorators.append(Nodes.DecoratorNode(pos, decorator=decorator))
+        s.expect_newline("Expected a newline after decorator")
+    return decorators
+
 
 def _reject_cdef_modifier_in_py(s, name):
     """Step over incorrectly placed cdef modifiers (@see _CDEF_MODIFIERS) to provide a good error message for them.
@@ -3384,113 +3384,113 @@ def _reject_cdef_modifier_in_py(s, name):
 
 
 def p_def_statement(s, decorators=None, is_async_def=False):
-    # s.sy == 'def' 
-    pos = s.position() 
+    # s.sy == 'def'
+    pos = s.position()
     # PEP 492 switches the async/await keywords on in "async def" functions
     if is_async_def:
         s.enter_async()
-    s.next() 
+    s.next()
     name = _reject_cdef_modifier_in_py(s, p_ident(s))
     s.expect(
         '(',
         "Expected '(', found '%s'. Did you use cdef syntax in a Python declaration? "
         "Use decorators and Python type annotations instead." % (
             s.systring if s.sy == 'IDENT' else s.sy))
-    args, star_arg, starstar_arg = p_varargslist(s, terminator=')') 
-    s.expect(')') 
+    args, star_arg, starstar_arg = p_varargslist(s, terminator=')')
+    s.expect(')')
     _reject_cdef_modifier_in_py(s, s.systring)
-    return_type_annotation = None 
-    if s.sy == '->': 
-        s.next() 
-        return_type_annotation = p_test(s) 
+    return_type_annotation = None
+    if s.sy == '->':
+        s.next()
+        return_type_annotation = p_test(s)
         _reject_cdef_modifier_in_py(s, s.systring)
 
-    doc, body = p_suite_with_docstring(s, Ctx(level='function')) 
+    doc, body = p_suite_with_docstring(s, Ctx(level='function'))
     if is_async_def:
         s.exit_async()
- 
+
     return Nodes.DefNode(
         pos, name=name, args=args, star_arg=star_arg, starstar_arg=starstar_arg,
         doc=doc, body=body, decorators=decorators, is_async_def=is_async_def,
         return_type_annotation=return_type_annotation)
 
 
-def p_varargslist(s, terminator=')', annotated=1): 
-    args = p_c_arg_list(s, in_pyfunc = 1, nonempty_declarators = 1, 
-                        annotated = annotated) 
-    star_arg = None 
-    starstar_arg = None 
-    if s.sy == '*': 
-        s.next() 
-        if s.sy == 'IDENT': 
-            star_arg = p_py_arg_decl(s, annotated=annotated) 
-        if s.sy == ',': 
-            s.next() 
-            args.extend(p_c_arg_list(s, in_pyfunc = 1, 
-                nonempty_declarators = 1, kw_only = 1, annotated = annotated)) 
-        elif s.sy != terminator: 
-            s.error("Syntax error in Python function argument list") 
-    if s.sy == '**': 
-        s.next() 
-        starstar_arg = p_py_arg_decl(s, annotated=annotated) 
+def p_varargslist(s, terminator=')', annotated=1):
+    args = p_c_arg_list(s, in_pyfunc = 1, nonempty_declarators = 1,
+                        annotated = annotated)
+    star_arg = None
+    starstar_arg = None
+    if s.sy == '*':
+        s.next()
+        if s.sy == 'IDENT':
+            star_arg = p_py_arg_decl(s, annotated=annotated)
+        if s.sy == ',':
+            s.next()
+            args.extend(p_c_arg_list(s, in_pyfunc = 1,
+                nonempty_declarators = 1, kw_only = 1, annotated = annotated))
+        elif s.sy != terminator:
+            s.error("Syntax error in Python function argument list")
+    if s.sy == '**':
+        s.next()
+        starstar_arg = p_py_arg_decl(s, annotated=annotated)
     if s.sy == ',':
         s.next()
-    return (args, star_arg, starstar_arg) 
- 
-def p_py_arg_decl(s, annotated = 1): 
-    pos = s.position() 
-    name = p_ident(s) 
-    annotation = None 
-    if annotated and s.sy == ':': 
-        s.next() 
-        annotation = p_test(s) 
-    return Nodes.PyArgDeclNode(pos, name = name, annotation = annotation) 
- 
-
-def p_class_statement(s, decorators): 
-    # s.sy == 'class' 
-    pos = s.position() 
-    s.next() 
+    return (args, star_arg, starstar_arg)
+
+def p_py_arg_decl(s, annotated = 1):
+    pos = s.position()
+    name = p_ident(s)
+    annotation = None
+    if annotated and s.sy == ':':
+        s.next()
+        annotation = p_test(s)
+    return Nodes.PyArgDeclNode(pos, name = name, annotation = annotation)
+
+
+def p_class_statement(s, decorators):
+    # s.sy == 'class'
+    pos = s.position()
+    s.next()
     class_name = EncodedString(p_ident(s))
     class_name.encoding = s.source_encoding  # FIXME: why is this needed?
-    arg_tuple = None 
-    keyword_dict = None 
-    if s.sy == '(': 
+    arg_tuple = None
+    keyword_dict = None
+    if s.sy == '(':
         positional_args, keyword_args = p_call_parse_args(s, allow_genexp=False)
         arg_tuple, keyword_dict = p_call_build_packed_args(pos, positional_args, keyword_args)
-    if arg_tuple is None: 
-        # XXX: empty arg_tuple 
-        arg_tuple = ExprNodes.TupleNode(pos, args=[]) 
-    doc, body = p_suite_with_docstring(s, Ctx(level='class')) 
-    return Nodes.PyClassDefNode( 
-        pos, name=class_name, 
-        bases=arg_tuple, 
-        keyword_args=keyword_dict, 
-        doc=doc, body=body, decorators=decorators, 
-        force_py3_semantics=s.context.language_level >= 3) 
- 
-
-def p_c_class_definition(s, pos,  ctx): 
-    # s.sy == 'class' 
-    s.next() 
-    module_path = [] 
-    class_name = p_ident(s) 
-    while s.sy == '.': 
-        s.next() 
-        module_path.append(class_name) 
-        class_name = p_ident(s) 
-    if module_path and ctx.visibility != 'extern': 
-        error(pos, "Qualified class name only allowed for 'extern' C class") 
-    if module_path and s.sy == 'IDENT' and s.systring == 'as': 
-        s.next() 
-        as_name = p_ident(s) 
-    else: 
-        as_name = class_name 
-    objstruct_name = None 
-    typeobj_name = None 
+    if arg_tuple is None:
+        # XXX: empty arg_tuple
+        arg_tuple = ExprNodes.TupleNode(pos, args=[])
+    doc, body = p_suite_with_docstring(s, Ctx(level='class'))
+    return Nodes.PyClassDefNode(
+        pos, name=class_name,
+        bases=arg_tuple,
+        keyword_args=keyword_dict,
+        doc=doc, body=body, decorators=decorators,
+        force_py3_semantics=s.context.language_level >= 3)
+
+
+def p_c_class_definition(s, pos,  ctx):
+    # s.sy == 'class'
+    s.next()
+    module_path = []
+    class_name = p_ident(s)
+    while s.sy == '.':
+        s.next()
+        module_path.append(class_name)
+        class_name = p_ident(s)
+    if module_path and ctx.visibility != 'extern':
+        error(pos, "Qualified class name only allowed for 'extern' C class")
+    if module_path and s.sy == 'IDENT' and s.systring == 'as':
+        s.next()
+        as_name = p_ident(s)
+    else:
+        as_name = class_name
+    objstruct_name = None
+    typeobj_name = None
     bases = None
     check_size = None
-    if s.sy == '(': 
+    if s.sy == '(':
         positional_args, keyword_args = p_call_parse_args(s, allow_genexp=False)
         if keyword_args:
             s.error("C classes cannot take keyword bases.")
@@ -3498,172 +3498,172 @@ def p_c_class_definition(s, pos,  ctx):
     if bases is None:
         bases = ExprNodes.TupleNode(pos, args=[])
 
-    if s.sy == '[': 
-        if ctx.visibility not in ('public', 'extern') and not ctx.api: 
-            error(s.position(), "Name options only allowed for 'public', 'api', or 'extern' C class") 
+    if s.sy == '[':
+        if ctx.visibility not in ('public', 'extern') and not ctx.api:
+            error(s.position(), "Name options only allowed for 'public', 'api', or 'extern' C class")
         objstruct_name, typeobj_name, check_size = p_c_class_options(s)
-    if s.sy == ':': 
-        if ctx.level == 'module_pxd': 
-            body_level = 'c_class_pxd' 
-        else: 
-            body_level = 'c_class' 
-        doc, body = p_suite_with_docstring(s, Ctx(level=body_level)) 
-    else: 
-        s.expect_newline("Syntax error in C class definition") 
-        doc = None 
-        body = None 
-    if ctx.visibility == 'extern': 
-        if not module_path: 
-            error(pos, "Module name required for 'extern' C class") 
-        if typeobj_name: 
-            error(pos, "Type object name specification not allowed for 'extern' C class") 
-    elif ctx.visibility == 'public': 
-        if not objstruct_name: 
-            error(pos, "Object struct name specification required for 'public' C class") 
-        if not typeobj_name: 
-            error(pos, "Type object name specification required for 'public' C class") 
-    elif ctx.visibility == 'private': 
-        if ctx.api: 
-            if not objstruct_name: 
-                error(pos, "Object struct name specification required for 'api' C class") 
-            if not typeobj_name: 
-                error(pos, "Type object name specification required for 'api' C class") 
-    else: 
-        error(pos, "Invalid class visibility '%s'" % ctx.visibility) 
-    return Nodes.CClassDefNode(pos, 
-        visibility = ctx.visibility, 
-        typedef_flag = ctx.typedef_flag, 
-        api = ctx.api, 
-        module_name = ".".join(module_path), 
-        class_name = class_name, 
-        as_name = as_name, 
+    if s.sy == ':':
+        if ctx.level == 'module_pxd':
+            body_level = 'c_class_pxd'
+        else:
+            body_level = 'c_class'
+        doc, body = p_suite_with_docstring(s, Ctx(level=body_level))
+    else:
+        s.expect_newline("Syntax error in C class definition")
+        doc = None
+        body = None
+    if ctx.visibility == 'extern':
+        if not module_path:
+            error(pos, "Module name required for 'extern' C class")
+        if typeobj_name:
+            error(pos, "Type object name specification not allowed for 'extern' C class")
+    elif ctx.visibility == 'public':
+        if not objstruct_name:
+            error(pos, "Object struct name specification required for 'public' C class")
+        if not typeobj_name:
+            error(pos, "Type object name specification required for 'public' C class")
+    elif ctx.visibility == 'private':
+        if ctx.api:
+            if not objstruct_name:
+                error(pos, "Object struct name specification required for 'api' C class")
+            if not typeobj_name:
+                error(pos, "Type object name specification required for 'api' C class")
+    else:
+        error(pos, "Invalid class visibility '%s'" % ctx.visibility)
+    return Nodes.CClassDefNode(pos,
+        visibility = ctx.visibility,
+        typedef_flag = ctx.typedef_flag,
+        api = ctx.api,
+        module_name = ".".join(module_path),
+        class_name = class_name,
+        as_name = as_name,
         bases = bases,
-        objstruct_name = objstruct_name, 
-        typeobj_name = typeobj_name, 
+        objstruct_name = objstruct_name,
+        typeobj_name = typeobj_name,
         check_size = check_size,
-        in_pxd = ctx.level == 'module_pxd', 
-        doc = doc, 
-        body = body) 
- 
-
-def p_c_class_options(s): 
-    objstruct_name = None 
-    typeobj_name = None 
+        in_pxd = ctx.level == 'module_pxd',
+        doc = doc,
+        body = body)
+
+
+def p_c_class_options(s):
+    objstruct_name = None
+    typeobj_name = None
     check_size = None
-    s.expect('[') 
-    while 1: 
-        if s.sy != 'IDENT': 
-            break 
-        if s.systring == 'object': 
-            s.next() 
-            objstruct_name = p_ident(s) 
-        elif s.systring == 'type': 
-            s.next() 
-            typeobj_name = p_ident(s) 
+    s.expect('[')
+    while 1:
+        if s.sy != 'IDENT':
+            break
+        if s.systring == 'object':
+            s.next()
+            objstruct_name = p_ident(s)
+        elif s.systring == 'type':
+            s.next()
+            typeobj_name = p_ident(s)
         elif s.systring == 'check_size':
             s.next()
             check_size = p_ident(s)
             if check_size not in ('ignore', 'warn', 'error'):
                 s.error("Expected one of ignore, warn or error, found %r" % check_size)
-        if s.sy != ',': 
-            break 
-        s.next() 
+        if s.sy != ',':
+            break
+        s.next()
     s.expect(']', "Expected 'object', 'type' or 'check_size'")
     return objstruct_name, typeobj_name, check_size
- 
- 
-def p_property_decl(s): 
-    pos = s.position() 
-    s.next()  # 'property' 
-    name = p_ident(s) 
-    doc, body = p_suite_with_docstring( 
-        s, Ctx(level='property'), with_doc_only=True) 
-    return Nodes.PropertyNode(pos, name=name, doc=doc, body=body) 
- 
- 
-def p_ignorable_statement(s): 
-    """ 
-    Parses any kind of ignorable statement that is allowed in .pxd files. 
-    """ 
-    if s.sy == 'BEGIN_STRING': 
-        pos = s.position() 
-        string_node = p_atom(s) 
-        s.expect_newline("Syntax error in string", ignore_semicolon=True) 
-        return Nodes.ExprStatNode(pos, expr=string_node) 
-    return None 
- 
- 
-def p_doc_string(s): 
-    if s.sy == 'BEGIN_STRING': 
-        pos = s.position() 
-        kind, bytes_result, unicode_result = p_cat_string_literal(s) 
-        s.expect_newline("Syntax error in doc string", ignore_semicolon=True) 
-        if kind in ('u', ''): 
-            return unicode_result 
-        warning(pos, "Python 3 requires docstrings to be unicode strings") 
-        return bytes_result 
-    else: 
-        return None 
- 
- 
-def _extract_docstring(node): 
-    """ 
-    Extract a docstring from a statement or from the first statement 
-    in a list.  Remove the statement if found.  Return a tuple 
-    (plain-docstring or None, node). 
-    """ 
-    doc_node = None 
-    if node is None: 
-        pass 
-    elif isinstance(node, Nodes.ExprStatNode): 
-        if node.expr.is_string_literal: 
-            doc_node = node.expr 
-            node = Nodes.StatListNode(node.pos, stats=[]) 
-    elif isinstance(node, Nodes.StatListNode) and node.stats: 
-        stats = node.stats 
-        if isinstance(stats[0], Nodes.ExprStatNode): 
-            if stats[0].expr.is_string_literal: 
-                doc_node = stats[0].expr 
-                del stats[0] 
- 
-    if doc_node is None: 
-        doc = None 
-    elif isinstance(doc_node, ExprNodes.BytesNode): 
-        warning(node.pos, 
-                "Python 3 requires docstrings to be unicode strings") 
-        doc = doc_node.value 
-    elif isinstance(doc_node, ExprNodes.StringNode): 
-        doc = doc_node.unicode_value 
-        if doc is None: 
-            doc = doc_node.value 
-    else: 
-        doc = doc_node.value 
-    return doc, node 
- 
- 
-def p_code(s, level=None, ctx=Ctx): 
-    body = p_statement_list(s, ctx(level = level), first_statement = 1) 
-    if s.sy != 'EOF': 
-        s.error("Syntax error in statement [%s,%s]" % ( 
-            repr(s.sy), repr(s.systring))) 
-    return body 
- 
-
-_match_compiler_directive_comment = cython.declare(object, re.compile( 
-    r"^#\s*cython\s*:\s*((\w|[.])+\s*=.*)$").match) 
- 
-
-def p_compiler_directive_comments(s): 
-    result = {} 
-    while s.sy == 'commentline': 
+
+
+def p_property_decl(s):
+    pos = s.position()
+    s.next()  # 'property'
+    name = p_ident(s)
+    doc, body = p_suite_with_docstring(
+        s, Ctx(level='property'), with_doc_only=True)
+    return Nodes.PropertyNode(pos, name=name, doc=doc, body=body)
+
+
+def p_ignorable_statement(s):
+    """
+    Parses any kind of ignorable statement that is allowed in .pxd files.
+    """
+    if s.sy == 'BEGIN_STRING':
         pos = s.position()
-        m = _match_compiler_directive_comment(s.systring) 
-        if m: 
+        string_node = p_atom(s)
+        s.expect_newline("Syntax error in string", ignore_semicolon=True)
+        return Nodes.ExprStatNode(pos, expr=string_node)
+    return None
+
+
+def p_doc_string(s):
+    if s.sy == 'BEGIN_STRING':
+        pos = s.position()
+        kind, bytes_result, unicode_result = p_cat_string_literal(s)
+        s.expect_newline("Syntax error in doc string", ignore_semicolon=True)
+        if kind in ('u', ''):
+            return unicode_result
+        warning(pos, "Python 3 requires docstrings to be unicode strings")
+        return bytes_result
+    else:
+        return None
+
+
+def _extract_docstring(node):
+    """
+    Extract a docstring from a statement or from the first statement
+    in a list.  Remove the statement if found.  Return a tuple
+    (plain-docstring or None, node).
+    """
+    doc_node = None
+    if node is None:
+        pass
+    elif isinstance(node, Nodes.ExprStatNode):
+        if node.expr.is_string_literal:
+            doc_node = node.expr
+            node = Nodes.StatListNode(node.pos, stats=[])
+    elif isinstance(node, Nodes.StatListNode) and node.stats:
+        stats = node.stats
+        if isinstance(stats[0], Nodes.ExprStatNode):
+            if stats[0].expr.is_string_literal:
+                doc_node = stats[0].expr
+                del stats[0]
+
+    if doc_node is None:
+        doc = None
+    elif isinstance(doc_node, ExprNodes.BytesNode):
+        warning(node.pos,
+                "Python 3 requires docstrings to be unicode strings")
+        doc = doc_node.value
+    elif isinstance(doc_node, ExprNodes.StringNode):
+        doc = doc_node.unicode_value
+        if doc is None:
+            doc = doc_node.value
+    else:
+        doc = doc_node.value
+    return doc, node
+
+
+def p_code(s, level=None, ctx=Ctx):
+    body = p_statement_list(s, ctx(level = level), first_statement = 1)
+    if s.sy != 'EOF':
+        s.error("Syntax error in statement [%s,%s]" % (
+            repr(s.sy), repr(s.systring)))
+    return body
+
+
+_match_compiler_directive_comment = cython.declare(object, re.compile(
+    r"^#\s*cython\s*:\s*((\w|[.])+\s*=.*)$").match)
+
+
+def p_compiler_directive_comments(s):
+    result = {}
+    while s.sy == 'commentline':
+        pos = s.position()
+        m = _match_compiler_directive_comment(s.systring)
+        if m:
             directives_string = m.group(1).strip()
-            try: 
+            try:
                 new_directives = Options.parse_directive_list(directives_string, ignore_unknown=True)
             except ValueError as e:
-                s.error(e.args[0], fatal=False) 
+                s.error(e.args[0], fatal=False)
                 s.next()
                 continue
 
@@ -3682,19 +3682,19 @@ def p_compiler_directive_comments(s):
 
             result.update(new_directives)
 
-        s.next() 
-    return result 
- 
+        s.next()
+    return result
+
+
+def p_module(s, pxd, full_module_name, ctx=Ctx):
+    pos = s.position()
+
+    directive_comments = p_compiler_directive_comments(s)
+    s.parse_comments = False
 
-def p_module(s, pxd, full_module_name, ctx=Ctx): 
-    pos = s.position() 
- 
-    directive_comments = p_compiler_directive_comments(s) 
-    s.parse_comments = False 
- 
     if s.context.language_level is None:
         s.context.set_language_level(2)  # Arcadia default.
- 
+
     if s.context.language_level is None:
         s.context.set_language_level(2)
         if pos[0].filename:
@@ -3706,20 +3706,20 @@ def p_module(s, pxd, full_module_name, ctx=Ctx):
                 stacklevel=1 if cython.compiled else 2,
             )
 
-    doc = p_doc_string(s) 
-    if pxd: 
-        level = 'module_pxd' 
-    else: 
-        level = 'module' 
- 
-    body = p_statement_list(s, ctx(level=level), first_statement = 1) 
-    if s.sy != 'EOF': 
-        s.error("Syntax error in statement [%s,%s]" % ( 
-            repr(s.sy), repr(s.systring))) 
-    return ModuleNode(pos, doc = doc, body = body, 
-                      full_module_name = full_module_name, 
-                      directive_comments = directive_comments) 
- 
+    doc = p_doc_string(s)
+    if pxd:
+        level = 'module_pxd'
+    else:
+        level = 'module'
+
+    body = p_statement_list(s, ctx(level=level), first_statement = 1)
+    if s.sy != 'EOF':
+        s.error("Syntax error in statement [%s,%s]" % (
+            repr(s.sy), repr(s.systring)))
+    return ModuleNode(pos, doc = doc, body = body,
+                      full_module_name = full_module_name,
+                      directive_comments = directive_comments)
+
 def p_template_definition(s):
     name = p_ident(s)
     if s.sy == '=':
@@ -3730,71 +3730,71 @@ def p_template_definition(s):
         required = True
     return name, required
 
-def p_cpp_class_definition(s, pos,  ctx): 
-    # s.sy == 'cppclass' 
-    s.next() 
-    module_path = [] 
-    class_name = p_ident(s) 
-    cname = p_opt_cname(s) 
-    if cname is None and ctx.namespace is not None: 
-        cname = ctx.namespace + "::" + class_name 
-    if s.sy == '.': 
-        error(pos, "Qualified class name not allowed C++ class") 
-    if s.sy == '[': 
-        s.next() 
+def p_cpp_class_definition(s, pos,  ctx):
+    # s.sy == 'cppclass'
+    s.next()
+    module_path = []
+    class_name = p_ident(s)
+    cname = p_opt_cname(s)
+    if cname is None and ctx.namespace is not None:
+        cname = ctx.namespace + "::" + class_name
+    if s.sy == '.':
+        error(pos, "Qualified class name not allowed C++ class")
+    if s.sy == '[':
+        s.next()
         templates = [p_template_definition(s)]
-        while s.sy == ',': 
-            s.next() 
+        while s.sy == ',':
+            s.next()
             templates.append(p_template_definition(s))
-        s.expect(']') 
+        s.expect(']')
         template_names = [name for name, required in templates]
-    else: 
-        templates = None 
+    else:
+        templates = None
         template_names = None
-    if s.sy == '(': 
-        s.next() 
+    if s.sy == '(':
+        s.next()
         base_classes = [p_c_base_type(s, templates = template_names)]
-        while s.sy == ',': 
-            s.next() 
+        while s.sy == ',':
+            s.next()
             base_classes.append(p_c_base_type(s, templates = template_names))
-        s.expect(')') 
-    else: 
-        base_classes = [] 
-    if s.sy == '[': 
-        error(s.position(), "Name options not allowed for C++ class") 
-    nogil = p_nogil(s) 
-    if s.sy == ':': 
-        s.next() 
-        s.expect('NEWLINE') 
-        s.expect_indent() 
-        attributes = [] 
-        body_ctx = Ctx(visibility = ctx.visibility, level='cpp_class', nogil=nogil or ctx.nogil) 
+        s.expect(')')
+    else:
+        base_classes = []
+    if s.sy == '[':
+        error(s.position(), "Name options not allowed for C++ class")
+    nogil = p_nogil(s)
+    if s.sy == ':':
+        s.next()
+        s.expect('NEWLINE')
+        s.expect_indent()
+        attributes = []
+        body_ctx = Ctx(visibility = ctx.visibility, level='cpp_class', nogil=nogil or ctx.nogil)
         body_ctx.templates = template_names
-        while s.sy != 'DEDENT': 
-            if s.sy != 'pass': 
-                attributes.append(p_cpp_class_attribute(s, body_ctx)) 
-            else: 
-                s.next() 
-                s.expect_newline("Expected a newline") 
-        s.expect_dedent() 
-    else: 
-        attributes = None 
-        s.expect_newline("Syntax error in C++ class definition") 
-    return Nodes.CppClassNode(pos, 
-        name = class_name, 
-        cname = cname, 
-        base_classes = base_classes, 
-        visibility = ctx.visibility, 
-        in_pxd = ctx.level == 'module_pxd', 
-        attributes = attributes, 
-        templates = templates) 
- 
-def p_cpp_class_attribute(s, ctx): 
-    decorators = None 
-    if s.sy == '@': 
-        decorators = p_decorators(s) 
-    if s.systring == 'cppclass': 
-        return p_cpp_class_definition(s, s.position(), ctx) 
+        while s.sy != 'DEDENT':
+            if s.sy != 'pass':
+                attributes.append(p_cpp_class_attribute(s, body_ctx))
+            else:
+                s.next()
+                s.expect_newline("Expected a newline")
+        s.expect_dedent()
+    else:
+        attributes = None
+        s.expect_newline("Syntax error in C++ class definition")
+    return Nodes.CppClassNode(pos,
+        name = class_name,
+        cname = cname,
+        base_classes = base_classes,
+        visibility = ctx.visibility,
+        in_pxd = ctx.level == 'module_pxd',
+        attributes = attributes,
+        templates = templates)
+
+def p_cpp_class_attribute(s, ctx):
+    decorators = None
+    if s.sy == '@':
+        decorators = p_decorators(s)
+    if s.systring == 'cppclass':
+        return p_cpp_class_definition(s, s.position(), ctx)
     elif s.systring == 'ctypedef':
         return p_ctypedef_statement(s, ctx)
     elif s.sy == 'IDENT' and s.systring in struct_enum_union:
@@ -3802,51 +3802,51 @@ def p_cpp_class_attribute(s, ctx):
             return p_cpp_class_definition(s, s.position(), ctx)
         else:
             return p_struct_enum(s, s.position(), ctx)
-    else: 
-        node = p_c_func_or_var_declaration(s, s.position(), ctx) 
-        if decorators is not None: 
-            tup = Nodes.CFuncDefNode, Nodes.CVarDefNode, Nodes.CClassDefNode 
-            if ctx.allow_struct_enum_decorator: 
-                tup += Nodes.CStructOrUnionDefNode, Nodes.CEnumDefNode 
-            if not isinstance(node, tup): 
-                s.error("Decorators can only be followed by functions or classes") 
-            node.decorators = decorators 
-        return node 
- 
- 
-#---------------------------------------------- 
-# 
-#   Debugging 
-# 
-#---------------------------------------------- 
- 
-def print_parse_tree(f, node, level, key = None): 
-    ind = "  " * level 
-    if node: 
-        f.write(ind) 
-        if key: 
-            f.write("%s: " % key) 
-        t = type(node) 
-        if t is tuple: 
-            f.write("(%s @ %s\n" % (node[0], node[1])) 
+    else:
+        node = p_c_func_or_var_declaration(s, s.position(), ctx)
+        if decorators is not None:
+            tup = Nodes.CFuncDefNode, Nodes.CVarDefNode, Nodes.CClassDefNode
+            if ctx.allow_struct_enum_decorator:
+                tup += Nodes.CStructOrUnionDefNode, Nodes.CEnumDefNode
+            if not isinstance(node, tup):
+                s.error("Decorators can only be followed by functions or classes")
+            node.decorators = decorators
+        return node
+
+
+#----------------------------------------------
+#
+#   Debugging
+#
+#----------------------------------------------
+
+def print_parse_tree(f, node, level, key = None):
+    ind = "  " * level
+    if node:
+        f.write(ind)
+        if key:
+            f.write("%s: " % key)
+        t = type(node)
+        if t is tuple:
+            f.write("(%s @ %s\n" % (node[0], node[1]))
             for i in range(2, len(node)):
-                print_parse_tree(f, node[i], level+1) 
-            f.write("%s)\n" % ind) 
-            return 
-        elif isinstance(node, Nodes.Node): 
-            try: 
-                tag = node.tag 
-            except AttributeError: 
-                tag = node.__class__.__name__ 
-            f.write("%s @ %s\n" % (tag, node.pos)) 
-            for name, value in node.__dict__.items(): 
-                if name != 'tag' and name != 'pos': 
-                    print_parse_tree(f, value, level+1, name) 
-            return 
-        elif t is list: 
-            f.write("[\n") 
+                print_parse_tree(f, node[i], level+1)
+            f.write("%s)\n" % ind)
+            return
+        elif isinstance(node, Nodes.Node):
+            try:
+                tag = node.tag
+            except AttributeError:
+                tag = node.__class__.__name__
+            f.write("%s @ %s\n" % (tag, node.pos))
+            for name, value in node.__dict__.items():
+                if name != 'tag' and name != 'pos':
+                    print_parse_tree(f, value, level+1, name)
+            return
+        elif t is list:
+            f.write("[\n")
             for i in range(len(node)):
-                print_parse_tree(f, node[i], level+1) 
-            f.write("%s]\n" % ind) 
-            return 
-    f.write("%s%s\n" % (ind, node)) 
+                print_parse_tree(f, node[i], level+1)
+            f.write("%s]\n" % ind)
+            return
+    f.write("%s%s\n" % (ind, node))
diff --git a/contrib/tools/cython/Cython/Compiler/Pipeline.py b/contrib/tools/cython/Cython/Compiler/Pipeline.py
index 91196417da..5194c3e49b 100644
--- a/contrib/tools/cython/Cython/Compiler/Pipeline.py
+++ b/contrib/tools/cython/Cython/Compiler/Pipeline.py
@@ -1,84 +1,84 @@
-from __future__ import absolute_import 
- 
-import itertools 
-from time import time 
- 
-from . import Errors 
-from . import DebugFlags 
-from . import Options 
-from .Errors import CompileError, InternalError, AbortError 
-from . import Naming 
- 
-# 
-# Really small pipeline stages 
-# 
-def dumptree(t): 
-    # For quick debugging in pipelines 
+from __future__ import absolute_import
+
+import itertools
+from time import time
+
+from . import Errors
+from . import DebugFlags
+from . import Options
+from .Errors import CompileError, InternalError, AbortError
+from . import Naming
+
+#
+# Really small pipeline stages
+#
+def dumptree(t):
+    # For quick debugging in pipelines
     print(t.dump())
-    return t 
- 
-def abort_on_errors(node): 
-    # Stop the pipeline if there are any errors. 
-    if Errors.num_errors != 0: 
-        raise AbortError("pipeline break") 
-    return node 
- 
-def parse_stage_factory(context): 
-    def parse(compsrc): 
-        source_desc = compsrc.source_desc 
-        full_module_name = compsrc.full_module_name 
-        initial_pos = (source_desc, 1, 0) 
-        saved_cimport_from_pyx, Options.cimport_from_pyx = Options.cimport_from_pyx, False 
+    return t
+
+def abort_on_errors(node):
+    # Stop the pipeline if there are any errors.
+    if Errors.num_errors != 0:
+        raise AbortError("pipeline break")
+    return node
+
+def parse_stage_factory(context):
+    def parse(compsrc):
+        source_desc = compsrc.source_desc
+        full_module_name = compsrc.full_module_name
+        initial_pos = (source_desc, 1, 0)
+        saved_cimport_from_pyx, Options.cimport_from_pyx = Options.cimport_from_pyx, False
         scope = context.find_module(full_module_name, pos = initial_pos, need_pxd = 0)
-        Options.cimport_from_pyx = saved_cimport_from_pyx 
-        tree = context.parse(source_desc, scope, pxd = 0, full_module_name = full_module_name) 
-        tree.compilation_source = compsrc 
-        tree.scope = scope 
-        tree.is_pxd = False 
-        return tree 
-    return parse 
- 
-def parse_pxd_stage_factory(context, scope, module_name): 
-    def parse(source_desc): 
-        tree = context.parse(source_desc, scope, pxd=True, 
-                             full_module_name=module_name) 
-        tree.scope = scope 
-        tree.is_pxd = True 
-        return tree 
-    return parse 
- 
-def generate_pyx_code_stage_factory(options, result): 
-    def generate_pyx_code_stage(module_node): 
-        module_node.process_implementation(options, result) 
-        result.compilation_source = module_node.compilation_source 
-        return result 
-    return generate_pyx_code_stage 
- 
-
-def inject_pxd_code_stage_factory(context): 
-    def inject_pxd_code_stage(module_node): 
+        Options.cimport_from_pyx = saved_cimport_from_pyx
+        tree = context.parse(source_desc, scope, pxd = 0, full_module_name = full_module_name)
+        tree.compilation_source = compsrc
+        tree.scope = scope
+        tree.is_pxd = False
+        return tree
+    return parse
+
+def parse_pxd_stage_factory(context, scope, module_name):
+    def parse(source_desc):
+        tree = context.parse(source_desc, scope, pxd=True,
+                             full_module_name=module_name)
+        tree.scope = scope
+        tree.is_pxd = True
+        return tree
+    return parse
+
+def generate_pyx_code_stage_factory(options, result):
+    def generate_pyx_code_stage(module_node):
+        module_node.process_implementation(options, result)
+        result.compilation_source = module_node.compilation_source
+        return result
+    return generate_pyx_code_stage
+
+
+def inject_pxd_code_stage_factory(context):
+    def inject_pxd_code_stage(module_node):
         for name, (statlistnode, scope) in context.pxds.items():
-            module_node.merge_in(statlistnode, scope) 
-        return module_node 
-    return inject_pxd_code_stage 
- 
-
-def use_utility_code_definitions(scope, target, seen=None): 
-    if seen is None: 
-        seen = set() 
- 
+            module_node.merge_in(statlistnode, scope)
+        return module_node
+    return inject_pxd_code_stage
+
+
+def use_utility_code_definitions(scope, target, seen=None):
+    if seen is None:
+        seen = set()
+
     for entry in scope.entries.values():
-        if entry in seen: 
-            continue 
- 
-        seen.add(entry) 
-        if entry.used and entry.utility_code_definition: 
-            target.use_utility_code(entry.utility_code_definition) 
-            for required_utility in entry.utility_code_definition.requires: 
-                target.use_utility_code(required_utility) 
-        elif entry.as_module: 
-            use_utility_code_definitions(entry.as_module, target, seen) 
- 
+        if entry in seen:
+            continue
+
+        seen.add(entry)
+        if entry.used and entry.utility_code_definition:
+            target.use_utility_code(entry.utility_code_definition)
+            for required_utility in entry.utility_code_definition.requires:
+                target.use_utility_code(required_utility)
+        elif entry.as_module:
+            use_utility_code_definitions(entry.as_module, target, seen)
+
 
 def sort_utility_codes(utilcodes):
     ranks = {}
@@ -109,241 +109,241 @@ def normalize_deps(utilcodes):
         utilcode.requires = [unify_dep(dep) for dep in utilcode.requires or ()]
 
 
-def inject_utility_code_stage_factory(context): 
-    def inject_utility_code_stage(module_node): 
+def inject_utility_code_stage_factory(context):
+    def inject_utility_code_stage(module_node):
         module_node.prepare_utility_code()
-        use_utility_code_definitions(context.cython_scope, module_node.scope) 
+        use_utility_code_definitions(context.cython_scope, module_node.scope)
         module_node.scope.utility_code_list = sort_utility_codes(module_node.scope.utility_code_list)
         normalize_deps(module_node.scope.utility_code_list)
-        added = [] 
-        # Note: the list might be extended inside the loop (if some utility code 
-        # pulls in other utility code, explicitly or implicitly) 
-        for utilcode in module_node.scope.utility_code_list: 
+        added = []
+        # Note: the list might be extended inside the loop (if some utility code
+        # pulls in other utility code, explicitly or implicitly)
+        for utilcode in module_node.scope.utility_code_list:
             if utilcode in added:
                 continue
-            added.append(utilcode) 
-            if utilcode.requires: 
-                for dep in utilcode.requires: 
+            added.append(utilcode)
+            if utilcode.requires:
+                for dep in utilcode.requires:
                     if dep not in added and dep not in module_node.scope.utility_code_list:
-                        module_node.scope.utility_code_list.append(dep) 
+                        module_node.scope.utility_code_list.append(dep)
             tree = utilcode.get_tree(cython_scope=context.cython_scope)
-            if tree: 
-                module_node.merge_in(tree.body, tree.scope, merge_scope=True) 
-        return module_node 
-    return inject_utility_code_stage 
- 
- 
-# 
-# Pipeline factories 
-# 
- 
-def create_pipeline(context, mode, exclude_classes=()): 
-    assert mode in ('pyx', 'py', 'pxd') 
-    from .Visitor import PrintTree 
-    from .ParseTreeTransforms import WithTransform, NormalizeTree, PostParse, PxdPostParse 
+            if tree:
+                module_node.merge_in(tree.body, tree.scope, merge_scope=True)
+        return module_node
+    return inject_utility_code_stage
+
+
+#
+# Pipeline factories
+#
+
+def create_pipeline(context, mode, exclude_classes=()):
+    assert mode in ('pyx', 'py', 'pxd')
+    from .Visitor import PrintTree
+    from .ParseTreeTransforms import WithTransform, NormalizeTree, PostParse, PxdPostParse
     from .ParseTreeTransforms import ForwardDeclareTypes, InjectGilHandling, AnalyseDeclarationsTransform
-    from .ParseTreeTransforms import AnalyseExpressionsTransform, FindInvalidUseOfFusedTypes 
-    from .ParseTreeTransforms import CreateClosureClasses, MarkClosureVisitor, DecoratorTransform 
+    from .ParseTreeTransforms import AnalyseExpressionsTransform, FindInvalidUseOfFusedTypes
+    from .ParseTreeTransforms import CreateClosureClasses, MarkClosureVisitor, DecoratorTransform
     from .ParseTreeTransforms import TrackNumpyAttributes, InterpretCompilerDirectives, TransformBuiltinMethods
-    from .ParseTreeTransforms import ExpandInplaceOperators, ParallelRangeTransform 
-    from .ParseTreeTransforms import CalculateQualifiedNamesTransform 
-    from .TypeInference import MarkParallelAssignments, MarkOverflowingArithmetic 
-    from .ParseTreeTransforms import AdjustDefByDirectives, AlignFunctionDefinitions 
-    from .ParseTreeTransforms import RemoveUnreachableCode, GilCheck 
-    from .FlowControl import ControlFlowAnalysis 
-    from .AnalysedTreeTransforms import AutoTestDictTransform 
-    from .AutoDocTransforms import EmbedSignature 
-    from .Optimize import FlattenInListTransform, SwitchTransform, IterationTransform 
-    from .Optimize import EarlyReplaceBuiltinCalls, OptimizeBuiltinCalls 
-    from .Optimize import InlineDefNodeCalls 
-    from .Optimize import ConstantFolding, FinalOptimizePhase 
-    from .Optimize import DropRefcountingTransform 
-    from .Optimize import ConsolidateOverflowCheck 
-    from .Buffer import IntroduceBufferAuxiliaryVars 
-    from .ModuleNode import check_c_declarations, check_c_declarations_pxd 
- 
- 
-    if mode == 'pxd': 
-        _check_c_declarations = check_c_declarations_pxd 
-        _specific_post_parse = PxdPostParse(context) 
-    else: 
-        _check_c_declarations = check_c_declarations 
-        _specific_post_parse = None 
- 
-    if mode == 'py': 
-        _align_function_definitions = AlignFunctionDefinitions(context) 
-    else: 
-        _align_function_definitions = None 
- 
-    # NOTE: This is the "common" parts of the pipeline, which is also 
-    # code in pxd files. So it will be run multiple times in a 
-    # compilation stage. 
-    stages = [ 
-        NormalizeTree(context), 
-        PostParse(context), 
-        _specific_post_parse, 
+    from .ParseTreeTransforms import ExpandInplaceOperators, ParallelRangeTransform
+    from .ParseTreeTransforms import CalculateQualifiedNamesTransform
+    from .TypeInference import MarkParallelAssignments, MarkOverflowingArithmetic
+    from .ParseTreeTransforms import AdjustDefByDirectives, AlignFunctionDefinitions
+    from .ParseTreeTransforms import RemoveUnreachableCode, GilCheck
+    from .FlowControl import ControlFlowAnalysis
+    from .AnalysedTreeTransforms import AutoTestDictTransform
+    from .AutoDocTransforms import EmbedSignature
+    from .Optimize import FlattenInListTransform, SwitchTransform, IterationTransform
+    from .Optimize import EarlyReplaceBuiltinCalls, OptimizeBuiltinCalls
+    from .Optimize import InlineDefNodeCalls
+    from .Optimize import ConstantFolding, FinalOptimizePhase
+    from .Optimize import DropRefcountingTransform
+    from .Optimize import ConsolidateOverflowCheck
+    from .Buffer import IntroduceBufferAuxiliaryVars
+    from .ModuleNode import check_c_declarations, check_c_declarations_pxd
+
+
+    if mode == 'pxd':
+        _check_c_declarations = check_c_declarations_pxd
+        _specific_post_parse = PxdPostParse(context)
+    else:
+        _check_c_declarations = check_c_declarations
+        _specific_post_parse = None
+
+    if mode == 'py':
+        _align_function_definitions = AlignFunctionDefinitions(context)
+    else:
+        _align_function_definitions = None
+
+    # NOTE: This is the "common" parts of the pipeline, which is also
+    # code in pxd files. So it will be run multiple times in a
+    # compilation stage.
+    stages = [
+        NormalizeTree(context),
+        PostParse(context),
+        _specific_post_parse,
         TrackNumpyAttributes(),
-        InterpretCompilerDirectives(context, context.compiler_directives), 
-        ParallelRangeTransform(context), 
-        AdjustDefByDirectives(context), 
+        InterpretCompilerDirectives(context, context.compiler_directives),
+        ParallelRangeTransform(context),
+        AdjustDefByDirectives(context),
         WithTransform(context),
-        MarkClosureVisitor(context), 
-        _align_function_definitions, 
-        RemoveUnreachableCode(context), 
-        ConstantFolding(), 
-        FlattenInListTransform(), 
-        DecoratorTransform(context), 
-        ForwardDeclareTypes(context), 
+        MarkClosureVisitor(context),
+        _align_function_definitions,
+        RemoveUnreachableCode(context),
+        ConstantFolding(),
+        FlattenInListTransform(),
+        DecoratorTransform(context),
+        ForwardDeclareTypes(context),
         InjectGilHandling(),
-        AnalyseDeclarationsTransform(context), 
-        AutoTestDictTransform(context), 
-        EmbedSignature(context), 
-        EarlyReplaceBuiltinCalls(context),  ## Necessary? 
+        AnalyseDeclarationsTransform(context),
+        AutoTestDictTransform(context),
+        EmbedSignature(context),
+        EarlyReplaceBuiltinCalls(context),  ## Necessary?
         TransformBuiltinMethods(context),
-        MarkParallelAssignments(context), 
-        ControlFlowAnalysis(context), 
-        RemoveUnreachableCode(context), 
-        # MarkParallelAssignments(context), 
-        MarkOverflowingArithmetic(context), 
-        IntroduceBufferAuxiliaryVars(context), 
-        _check_c_declarations, 
-        InlineDefNodeCalls(context), 
-        AnalyseExpressionsTransform(context), 
-        FindInvalidUseOfFusedTypes(context), 
-        ExpandInplaceOperators(context), 
+        MarkParallelAssignments(context),
+        ControlFlowAnalysis(context),
+        RemoveUnreachableCode(context),
+        # MarkParallelAssignments(context),
+        MarkOverflowingArithmetic(context),
+        IntroduceBufferAuxiliaryVars(context),
+        _check_c_declarations,
+        InlineDefNodeCalls(context),
+        AnalyseExpressionsTransform(context),
+        FindInvalidUseOfFusedTypes(context),
+        ExpandInplaceOperators(context),
         IterationTransform(context),
         SwitchTransform(context),
-        OptimizeBuiltinCalls(context),  ## Necessary? 
-        CreateClosureClasses(context),  ## After all lookups and type inference 
-        CalculateQualifiedNamesTransform(context), 
-        ConsolidateOverflowCheck(context), 
-        DropRefcountingTransform(), 
-        FinalOptimizePhase(context), 
-        GilCheck(), 
-        ] 
-    filtered_stages = [] 
-    for s in stages: 
-        if s.__class__ not in exclude_classes: 
-            filtered_stages.append(s) 
-    return filtered_stages 
- 
-def create_pyx_pipeline(context, options, result, py=False, exclude_classes=()): 
-    if py: 
-        mode = 'py' 
-    else: 
-        mode = 'pyx' 
-    test_support = [] 
-    if options.evaluate_tree_assertions: 
-        from ..TestUtils import TreeAssertVisitor 
-        test_support.append(TreeAssertVisitor()) 
- 
-    if options.gdb_debug: 
-        from ..Debugger import DebugWriter # requires Py2.5+ 
-        from .ParseTreeTransforms import DebugTransform 
-        context.gdb_debug_outputwriter = DebugWriter.CythonDebugWriter( 
-            options.output_dir) 
-        debug_transform = [DebugTransform(context, options, result)] 
-    else: 
-        debug_transform = [] 
- 
-    return list(itertools.chain( 
-        [parse_stage_factory(context)], 
-        create_pipeline(context, mode, exclude_classes=exclude_classes), 
-        test_support, 
-        [inject_pxd_code_stage_factory(context), 
-         inject_utility_code_stage_factory(context), 
-         abort_on_errors], 
-        debug_transform, 
-        [generate_pyx_code_stage_factory(options, result)])) 
- 
-def create_pxd_pipeline(context, scope, module_name): 
-    from .CodeGeneration import ExtractPxdCode 
- 
-    # The pxd pipeline ends up with a CCodeWriter containing the 
-    # code of the pxd, as well as a pxd scope. 
-    return [ 
-        parse_pxd_stage_factory(context, scope, module_name) 
-        ] + create_pipeline(context, 'pxd') + [ 
-        ExtractPxdCode() 
-        ] 
- 
-def create_py_pipeline(context, options, result): 
-    return create_pyx_pipeline(context, options, result, py=True) 
- 
-def create_pyx_as_pxd_pipeline(context, result): 
-    from .ParseTreeTransforms import AlignFunctionDefinitions, \ 
-        MarkClosureVisitor, WithTransform, AnalyseDeclarationsTransform 
-    from .Optimize import ConstantFolding, FlattenInListTransform 
-    from .Nodes import StatListNode 
-    pipeline = [] 
-    pyx_pipeline = create_pyx_pipeline(context, context.options, result, 
-                                       exclude_classes=[ 
-                                           AlignFunctionDefinitions, 
-                                           MarkClosureVisitor, 
-                                           ConstantFolding, 
-                                           FlattenInListTransform, 
-                                           WithTransform 
-                                           ]) 
-    for stage in pyx_pipeline: 
-        pipeline.append(stage) 
-        if isinstance(stage, AnalyseDeclarationsTransform): 
-            # This is the last stage we need. 
-            break 
-    def fake_pxd(root): 
-        for entry in root.scope.entries.values(): 
-            if not entry.in_cinclude: 
-                entry.defined_in_pxd = 1 
-                if entry.name == entry.cname and entry.visibility != 'extern': 
-                    # Always mangle non-extern cimported entries. 
-                    entry.cname = entry.scope.mangle(Naming.func_prefix, entry.name) 
-        return StatListNode(root.pos, stats=[]), root.scope 
-    pipeline.append(fake_pxd) 
-    return pipeline 
- 
-def insert_into_pipeline(pipeline, transform, before=None, after=None): 
-    """ 
-    Insert a new transform into the pipeline after or before an instance of 
-    the given class. e.g. 
- 
-        pipeline = insert_into_pipeline(pipeline, transform, 
-                                        after=AnalyseDeclarationsTransform) 
-    """ 
-    assert before or after 
- 
-    cls = before or after 
-    for i, t in enumerate(pipeline): 
-        if isinstance(t, cls): 
-            break 
- 
-    if after: 
-        i += 1 
- 
-    return pipeline[:i] + [transform] + pipeline[i:] 
- 
-# 
-# Running a pipeline 
-# 
- 
+        OptimizeBuiltinCalls(context),  ## Necessary?
+        CreateClosureClasses(context),  ## After all lookups and type inference
+        CalculateQualifiedNamesTransform(context),
+        ConsolidateOverflowCheck(context),
+        DropRefcountingTransform(),
+        FinalOptimizePhase(context),
+        GilCheck(),
+        ]
+    filtered_stages = []
+    for s in stages:
+        if s.__class__ not in exclude_classes:
+            filtered_stages.append(s)
+    return filtered_stages
+
+def create_pyx_pipeline(context, options, result, py=False, exclude_classes=()):
+    if py:
+        mode = 'py'
+    else:
+        mode = 'pyx'
+    test_support = []
+    if options.evaluate_tree_assertions:
+        from ..TestUtils import TreeAssertVisitor
+        test_support.append(TreeAssertVisitor())
+
+    if options.gdb_debug:
+        from ..Debugger import DebugWriter # requires Py2.5+
+        from .ParseTreeTransforms import DebugTransform
+        context.gdb_debug_outputwriter = DebugWriter.CythonDebugWriter(
+            options.output_dir)
+        debug_transform = [DebugTransform(context, options, result)]
+    else:
+        debug_transform = []
+
+    return list(itertools.chain(
+        [parse_stage_factory(context)],
+        create_pipeline(context, mode, exclude_classes=exclude_classes),
+        test_support,
+        [inject_pxd_code_stage_factory(context),
+         inject_utility_code_stage_factory(context),
+         abort_on_errors],
+        debug_transform,
+        [generate_pyx_code_stage_factory(options, result)]))
+
+def create_pxd_pipeline(context, scope, module_name):
+    from .CodeGeneration import ExtractPxdCode
+
+    # The pxd pipeline ends up with a CCodeWriter containing the
+    # code of the pxd, as well as a pxd scope.
+    return [
+        parse_pxd_stage_factory(context, scope, module_name)
+        ] + create_pipeline(context, 'pxd') + [
+        ExtractPxdCode()
+        ]
+
+def create_py_pipeline(context, options, result):
+    return create_pyx_pipeline(context, options, result, py=True)
+
+def create_pyx_as_pxd_pipeline(context, result):
+    from .ParseTreeTransforms import AlignFunctionDefinitions, \
+        MarkClosureVisitor, WithTransform, AnalyseDeclarationsTransform
+    from .Optimize import ConstantFolding, FlattenInListTransform
+    from .Nodes import StatListNode
+    pipeline = []
+    pyx_pipeline = create_pyx_pipeline(context, context.options, result,
+                                       exclude_classes=[
+                                           AlignFunctionDefinitions,
+                                           MarkClosureVisitor,
+                                           ConstantFolding,
+                                           FlattenInListTransform,
+                                           WithTransform
+                                           ])
+    for stage in pyx_pipeline:
+        pipeline.append(stage)
+        if isinstance(stage, AnalyseDeclarationsTransform):
+            # This is the last stage we need.
+            break
+    def fake_pxd(root):
+        for entry in root.scope.entries.values():
+            if not entry.in_cinclude:
+                entry.defined_in_pxd = 1
+                if entry.name == entry.cname and entry.visibility != 'extern':
+                    # Always mangle non-extern cimported entries.
+                    entry.cname = entry.scope.mangle(Naming.func_prefix, entry.name)
+        return StatListNode(root.pos, stats=[]), root.scope
+    pipeline.append(fake_pxd)
+    return pipeline
+
+def insert_into_pipeline(pipeline, transform, before=None, after=None):
+    """
+    Insert a new transform into the pipeline after or before an instance of
+    the given class. e.g.
+
+        pipeline = insert_into_pipeline(pipeline, transform,
+                                        after=AnalyseDeclarationsTransform)
+    """
+    assert before or after
+
+    cls = before or after
+    for i, t in enumerate(pipeline):
+        if isinstance(t, cls):
+            break
+
+    if after:
+        i += 1
+
+    return pipeline[:i] + [transform] + pipeline[i:]
+
+#
+# Running a pipeline
+#
+
 _pipeline_entry_points = {}
 
 
-def run_pipeline(pipeline, source, printtree=True): 
-    from .Visitor import PrintTree 
+def run_pipeline(pipeline, source, printtree=True):
+    from .Visitor import PrintTree
     exec_ns = globals().copy() if DebugFlags.debug_verbose_pipeline else None
- 
+
     def run(phase, data):
         return phase(data)
 
-    error = None 
-    data = source 
-    try: 
-        try: 
-            for phase in pipeline: 
-                if phase is not None: 
+    error = None
+    data = source
+    try:
+        try:
+            for phase in pipeline:
+                if phase is not None:
                     if not printtree and isinstance(phase, PrintTree):
                         continue
-                    if DebugFlags.debug_verbose_pipeline: 
-                        t = time() 
+                    if DebugFlags.debug_verbose_pipeline:
+                        t = time()
                         print("Entering pipeline phase %r" % phase)
                         # create a new wrapper for each step to show the name in profiles
                         phase_name = getattr(phase, '__name__', type(phase).__name__)
@@ -353,17 +353,17 @@ def run_pipeline(pipeline, source, printtree=True):
                             exec("def %s(phase, data): return phase(data)" % phase_name, exec_ns)
                             run = _pipeline_entry_points[phase_name] = exec_ns[phase_name]
                     data = run(phase, data)
-                    if DebugFlags.debug_verbose_pipeline: 
+                    if DebugFlags.debug_verbose_pipeline:
                         print("    %.3f seconds" % (time() - t))
         except CompileError as err:
-            # err is set 
+            # err is set
             Errors.report_error(err, use_stack=False)
-            error = err 
+            error = err
     except InternalError as err:
-        # Only raise if there was not an earlier error 
-        if Errors.num_errors == 0: 
-            raise 
-        error = err 
+        # Only raise if there was not an earlier error
+        if Errors.num_errors == 0:
+            raise
+        error = err
     except AbortError as err:
-        error = err 
-    return (error, data) 
+        error = err
+    return (error, data)
diff --git a/contrib/tools/cython/Cython/Compiler/PyrexTypes.py b/contrib/tools/cython/Cython/Compiler/PyrexTypes.py
index 6c98b05da5..3d4931cea6 100644
--- a/contrib/tools/cython/Cython/Compiler/PyrexTypes.py
+++ b/contrib/tools/cython/Cython/Compiler/PyrexTypes.py
@@ -1,39 +1,39 @@
-# 
-#   Cython/Python language types 
-# 
- 
-from __future__ import absolute_import 
- 
-import copy 
+#
+#   Cython/Python language types
+#
+
+from __future__ import absolute_import
+
+import copy
 import hashlib
 import re
- 
+
 try:
     reduce
 except NameError:
     from functools import reduce
 
 from Cython.Utils import cached_function
-from .Code import UtilityCode, LazyUtilityCode, TempitaUtilityCode 
-from . import StringEncoding 
-from . import Naming 
- 
+from .Code import UtilityCode, LazyUtilityCode, TempitaUtilityCode
+from . import StringEncoding
+from . import Naming
+
 from .Errors import error, warning
- 
- 
-class BaseType(object): 
-    # 
-    #  Base class for all Cython types including pseudo-types. 
- 
-    # List of attribute names of any subtypes 
-    subtypes = [] 
+
+
+class BaseType(object):
+    #
+    #  Base class for all Cython types including pseudo-types.
+
+    # List of attribute names of any subtypes
+    subtypes = []
     _empty_declaration = None
     _specialization_name = None
     default_format_spec = None
- 
-    def can_coerce_to_pyobject(self, env): 
-        return False 
- 
+
+    def can_coerce_to_pyobject(self, env):
+        return False
+
     def can_coerce_from_pyobject(self, env):
         return False
 
@@ -43,15 +43,15 @@ class BaseType(object):
     def convert_to_pystring(self, cvalue, code, format_spec=None):
         raise NotImplementedError("C types that support string formatting must override this method")
 
-    def cast_code(self, expr_code): 
+    def cast_code(self, expr_code):
         return "((%s)%s)" % (self.empty_declaration_code(), expr_code)
- 
+
     def empty_declaration_code(self):
         if self._empty_declaration is None:
             self._empty_declaration = self.declaration_code('')
         return self._empty_declaration
 
-    def specialization_name(self): 
+    def specialization_name(self):
         if self._specialization_name is None:
             # This is not entirely robust.
             common_subs = (self.empty_declaration_code()
@@ -61,261 +61,261 @@ class BaseType(object):
             self._specialization_name = re.sub(
                 '[^a-zA-Z0-9_]', lambda x: '_%x_' % ord(x.group(0)), common_subs)
         return self._specialization_name
- 
-    def base_declaration_code(self, base_code, entity_code): 
-        if entity_code: 
-            return "%s %s" % (base_code, entity_code) 
-        else: 
-            return base_code 
- 
-    def __deepcopy__(self, memo): 
-        """ 
-        Types never need to be copied, if we do copy, Unfortunate Things 
-        Will Happen! 
-        """ 
-        return self 
- 
-    def get_fused_types(self, result=None, seen=None, subtypes=None): 
-        subtypes = subtypes or self.subtypes 
-        if not subtypes: 
-            return None 
- 
-        if result is None: 
-            result = [] 
-            seen = set() 
- 
-        for attr in subtypes: 
-            list_or_subtype = getattr(self, attr) 
-            if list_or_subtype: 
-                if isinstance(list_or_subtype, BaseType): 
-                    list_or_subtype.get_fused_types(result, seen) 
-                else: 
-                    for subtype in list_or_subtype: 
-                        subtype.get_fused_types(result, seen) 
- 
-        return result 
- 
-    def specialize_fused(self, env): 
-        if env.fused_to_specific: 
-            return self.specialize(env.fused_to_specific) 
- 
-        return self 
- 
-    @property 
-    def is_fused(self): 
-        """ 
-        Whether this type or any of its subtypes is a fused type 
-        """ 
-        # Add this indirection for the is_fused property to allow overriding 
-        # get_fused_types in subclasses. 
-        return self.get_fused_types() 
- 
-    def deduce_template_params(self, actual): 
-        """ 
-        Deduce any template params in this (argument) type given the actual 
-        argument type. 
- 
-        http://en.cppreference.com/w/cpp/language/function_template#Template_argument_deduction 
-        """ 
+
+    def base_declaration_code(self, base_code, entity_code):
+        if entity_code:
+            return "%s %s" % (base_code, entity_code)
+        else:
+            return base_code
+
+    def __deepcopy__(self, memo):
+        """
+        Types never need to be copied, if we do copy, Unfortunate Things
+        Will Happen!
+        """
+        return self
+
+    def get_fused_types(self, result=None, seen=None, subtypes=None):
+        subtypes = subtypes or self.subtypes
+        if not subtypes:
+            return None
+
+        if result is None:
+            result = []
+            seen = set()
+
+        for attr in subtypes:
+            list_or_subtype = getattr(self, attr)
+            if list_or_subtype:
+                if isinstance(list_or_subtype, BaseType):
+                    list_or_subtype.get_fused_types(result, seen)
+                else:
+                    for subtype in list_or_subtype:
+                        subtype.get_fused_types(result, seen)
+
+        return result
+
+    def specialize_fused(self, env):
+        if env.fused_to_specific:
+            return self.specialize(env.fused_to_specific)
+
+        return self
+
+    @property
+    def is_fused(self):
+        """
+        Whether this type or any of its subtypes is a fused type
+        """
+        # Add this indirection for the is_fused property to allow overriding
+        # get_fused_types in subclasses.
+        return self.get_fused_types()
+
+    def deduce_template_params(self, actual):
+        """
+        Deduce any template params in this (argument) type given the actual
+        argument type.
+
+        http://en.cppreference.com/w/cpp/language/function_template#Template_argument_deduction
+        """
         return {}
- 
-    def __lt__(self, other): 
-        """ 
-        For sorting. The sorting order should correspond to the preference of 
-        conversion from Python types. 
- 
-        Override to provide something sensible. This is only implemented so that 
-        python 3 doesn't trip 
-        """ 
-        return id(type(self)) < id(type(other)) 
- 
-    def py_type_name(self): 
-        """ 
-        Return the name of the Python type that can coerce to this type. 
-        """ 
- 
-    def typeof_name(self): 
-        """ 
-        Return the string with which fused python functions can be indexed. 
-        """ 
-        if self.is_builtin_type or self.py_type_name() == 'object': 
-            index_name = self.py_type_name() 
-        else: 
-            index_name = str(self) 
- 
-        return index_name 
- 
-    def check_for_null_code(self, cname): 
-        """ 
-        Return the code for a NULL-check in case an UnboundLocalError should 
-        be raised if an entry of this type is referenced before assignment. 
-        Returns None if no check should be performed. 
-        """ 
-        return None 
- 
-    def invalid_value(self): 
-        """ 
-        Returns the most invalid value an object of this type can assume as a 
-        C expression string. Returns None if no such value exists. 
-        """ 
- 
- 
-class PyrexType(BaseType): 
-    # 
-    #  Base class for all Cython types 
-    # 
-    #  is_pyobject           boolean     Is a Python object type 
-    #  is_extension_type     boolean     Is a Python extension type 
-    #  is_final_type         boolean     Is a final extension type 
-    #  is_numeric            boolean     Is a C numeric type 
-    #  is_int                boolean     Is a C integer type 
-    #  is_float              boolean     Is a C floating point type 
-    #  is_complex            boolean     Is a C complex type 
-    #  is_void               boolean     Is the C void type 
-    #  is_array              boolean     Is a C array type 
-    #  is_ptr                boolean     Is a C pointer type 
-    #  is_null_ptr           boolean     Is the type of NULL 
-    #  is_reference          boolean     Is a C reference type 
-    #  is_const              boolean     Is a C const type. 
-    #  is_cfunction          boolean     Is a C function type 
-    #  is_struct_or_union    boolean     Is a C struct or union type 
-    #  is_struct             boolean     Is a C struct type 
-    #  is_enum               boolean     Is a C enum type 
-    #  is_typedef            boolean     Is a typedef type 
-    #  is_string             boolean     Is a C char * type 
-    #  is_pyunicode_ptr      boolean     Is a C PyUNICODE * type 
-    #  is_cpp_string         boolean     Is a C++ std::string type 
-    #  is_unicode_char       boolean     Is either Py_UCS4 or Py_UNICODE 
-    #  is_returncode         boolean     Is used only to signal exceptions 
-    #  is_error              boolean     Is the dummy error type 
-    #  is_buffer             boolean     Is buffer access type 
+
+    def __lt__(self, other):
+        """
+        For sorting. The sorting order should correspond to the preference of
+        conversion from Python types.
+
+        Override to provide something sensible. This is only implemented so that
+        python 3 doesn't trip
+        """
+        return id(type(self)) < id(type(other))
+
+    def py_type_name(self):
+        """
+        Return the name of the Python type that can coerce to this type.
+        """
+
+    def typeof_name(self):
+        """
+        Return the string with which fused python functions can be indexed.
+        """
+        if self.is_builtin_type or self.py_type_name() == 'object':
+            index_name = self.py_type_name()
+        else:
+            index_name = str(self)
+
+        return index_name
+
+    def check_for_null_code(self, cname):
+        """
+        Return the code for a NULL-check in case an UnboundLocalError should
+        be raised if an entry of this type is referenced before assignment.
+        Returns None if no check should be performed.
+        """
+        return None
+
+    def invalid_value(self):
+        """
+        Returns the most invalid value an object of this type can assume as a
+        C expression string. Returns None if no such value exists.
+        """
+
+
+class PyrexType(BaseType):
+    #
+    #  Base class for all Cython types
+    #
+    #  is_pyobject           boolean     Is a Python object type
+    #  is_extension_type     boolean     Is a Python extension type
+    #  is_final_type         boolean     Is a final extension type
+    #  is_numeric            boolean     Is a C numeric type
+    #  is_int                boolean     Is a C integer type
+    #  is_float              boolean     Is a C floating point type
+    #  is_complex            boolean     Is a C complex type
+    #  is_void               boolean     Is the C void type
+    #  is_array              boolean     Is a C array type
+    #  is_ptr                boolean     Is a C pointer type
+    #  is_null_ptr           boolean     Is the type of NULL
+    #  is_reference          boolean     Is a C reference type
+    #  is_const              boolean     Is a C const type.
+    #  is_cfunction          boolean     Is a C function type
+    #  is_struct_or_union    boolean     Is a C struct or union type
+    #  is_struct             boolean     Is a C struct type
+    #  is_enum               boolean     Is a C enum type
+    #  is_typedef            boolean     Is a typedef type
+    #  is_string             boolean     Is a C char * type
+    #  is_pyunicode_ptr      boolean     Is a C PyUNICODE * type
+    #  is_cpp_string         boolean     Is a C++ std::string type
+    #  is_unicode_char       boolean     Is either Py_UCS4 or Py_UNICODE
+    #  is_returncode         boolean     Is used only to signal exceptions
+    #  is_error              boolean     Is the dummy error type
+    #  is_buffer             boolean     Is buffer access type
     #  is_pythran_expr       boolean     Is Pythran expr
     #  is_numpy_buffer       boolean     Is Numpy array buffer
-    #  has_attributes        boolean     Has C dot-selectable attributes 
+    #  has_attributes        boolean     Has C dot-selectable attributes
     #  default_value         string      Initial value that can be assigned before first user assignment.
     #  declaration_value     string      The value statically assigned on declaration (if any).
-    #  entry                 Entry       The Entry for this type 
-    # 
-    #  declaration_code(entity_code, 
-    #      for_display = 0, dll_linkage = None, pyrex = 0) 
-    #    Returns a code fragment for the declaration of an entity 
-    #    of this type, given a code fragment for the entity. 
-    #    * If for_display, this is for reading by a human in an error 
-    #      message; otherwise it must be valid C code. 
-    #    * If dll_linkage is not None, it must be 'DL_EXPORT' or 
-    #      'DL_IMPORT', and will be added to the base type part of 
-    #      the declaration. 
-    #    * If pyrex = 1, this is for use in a 'cdef extern' 
-    #      statement of a Cython include file. 
-    # 
-    #  assignable_from(src_type) 
-    #    Tests whether a variable of this type can be 
-    #    assigned a value of type src_type. 
-    # 
-    #  same_as(other_type) 
-    #    Tests whether this type represents the same type 
-    #    as other_type. 
-    # 
-    #  as_argument_type(): 
-    #    Coerces array and C function types into pointer type for use as 
-    #    a formal argument type. 
-    # 
- 
-    is_pyobject = 0 
-    is_unspecified = 0 
-    is_extension_type = 0 
-    is_final_type = 0 
-    is_builtin_type = 0 
-    is_numeric = 0 
-    is_int = 0 
-    is_float = 0 
-    is_complex = 0 
-    is_void = 0 
-    is_array = 0 
-    is_ptr = 0 
-    is_null_ptr = 0 
-    is_reference = 0 
-    is_const = 0 
-    is_cfunction = 0 
-    is_struct_or_union = 0 
-    is_cpp_class = 0 
-    is_cpp_string = 0 
-    is_struct = 0 
-    is_enum = 0 
-    is_typedef = 0 
-    is_string = 0 
-    is_pyunicode_ptr = 0 
-    is_unicode_char = 0 
-    is_returncode = 0 
-    is_error = 0 
-    is_buffer = 0 
+    #  entry                 Entry       The Entry for this type
+    #
+    #  declaration_code(entity_code,
+    #      for_display = 0, dll_linkage = None, pyrex = 0)
+    #    Returns a code fragment for the declaration of an entity
+    #    of this type, given a code fragment for the entity.
+    #    * If for_display, this is for reading by a human in an error
+    #      message; otherwise it must be valid C code.
+    #    * If dll_linkage is not None, it must be 'DL_EXPORT' or
+    #      'DL_IMPORT', and will be added to the base type part of
+    #      the declaration.
+    #    * If pyrex = 1, this is for use in a 'cdef extern'
+    #      statement of a Cython include file.
+    #
+    #  assignable_from(src_type)
+    #    Tests whether a variable of this type can be
+    #    assigned a value of type src_type.
+    #
+    #  same_as(other_type)
+    #    Tests whether this type represents the same type
+    #    as other_type.
+    #
+    #  as_argument_type():
+    #    Coerces array and C function types into pointer type for use as
+    #    a formal argument type.
+    #
+
+    is_pyobject = 0
+    is_unspecified = 0
+    is_extension_type = 0
+    is_final_type = 0
+    is_builtin_type = 0
+    is_numeric = 0
+    is_int = 0
+    is_float = 0
+    is_complex = 0
+    is_void = 0
+    is_array = 0
+    is_ptr = 0
+    is_null_ptr = 0
+    is_reference = 0
+    is_const = 0
+    is_cfunction = 0
+    is_struct_or_union = 0
+    is_cpp_class = 0
+    is_cpp_string = 0
+    is_struct = 0
+    is_enum = 0
+    is_typedef = 0
+    is_string = 0
+    is_pyunicode_ptr = 0
+    is_unicode_char = 0
+    is_returncode = 0
+    is_error = 0
+    is_buffer = 0
     is_ctuple = 0
-    is_memoryviewslice = 0 
+    is_memoryviewslice = 0
     is_pythran_expr = 0
     is_numpy_buffer = 0
-    has_attributes = 0 
-    default_value = "" 
+    has_attributes = 0
+    default_value = ""
     declaration_value = ""
- 
-    def resolve(self): 
-        # If a typedef, returns the base type. 
-        return self 
- 
-    def specialize(self, values): 
-        # TODO(danilo): Override wherever it makes sense. 
-        return self 
- 
-    def literal_code(self, value): 
-        # Returns a C code fragment representing a literal 
-        # value of this type. 
-        return str(value) 
- 
-    def __str__(self): 
-        return self.declaration_code("", for_display = 1).strip() 
- 
-    def same_as(self, other_type, **kwds): 
-        return self.same_as_resolved_type(other_type.resolve(), **kwds) 
- 
-    def same_as_resolved_type(self, other_type): 
-        return self == other_type or other_type is error_type 
- 
-    def subtype_of(self, other_type): 
-        return self.subtype_of_resolved_type(other_type.resolve()) 
- 
-    def subtype_of_resolved_type(self, other_type): 
-        return self.same_as(other_type) 
- 
-    def assignable_from(self, src_type): 
-        return self.assignable_from_resolved_type(src_type.resolve()) 
- 
-    def assignable_from_resolved_type(self, src_type): 
-        return self.same_as(src_type) 
- 
-    def as_argument_type(self): 
-        return self 
- 
-    def is_complete(self): 
-        # A type is incomplete if it is an unsized array, 
-        # a struct whose attributes are not defined, etc. 
-        return 1 
- 
-    def is_simple_buffer_dtype(self): 
-        return (self.is_int or self.is_float or self.is_complex or self.is_pyobject or 
-                self.is_extension_type or self.is_ptr) 
- 
-    def struct_nesting_depth(self): 
-        # Returns the number levels of nested structs. This is 
-        # used for constructing a stack for walking the run-time 
-        # type information of the struct. 
-        return 1 
- 
-    def global_init_code(self, entry, code): 
-        # abstract 
-        pass 
- 
-    def needs_nonecheck(self): 
-        return 0 
- 
+
+    def resolve(self):
+        # If a typedef, returns the base type.
+        return self
+
+    def specialize(self, values):
+        # TODO(danilo): Override wherever it makes sense.
+        return self
+
+    def literal_code(self, value):
+        # Returns a C code fragment representing a literal
+        # value of this type.
+        return str(value)
+
+    def __str__(self):
+        return self.declaration_code("", for_display = 1).strip()
+
+    def same_as(self, other_type, **kwds):
+        return self.same_as_resolved_type(other_type.resolve(), **kwds)
+
+    def same_as_resolved_type(self, other_type):
+        return self == other_type or other_type is error_type
+
+    def subtype_of(self, other_type):
+        return self.subtype_of_resolved_type(other_type.resolve())
+
+    def subtype_of_resolved_type(self, other_type):
+        return self.same_as(other_type)
+
+    def assignable_from(self, src_type):
+        return self.assignable_from_resolved_type(src_type.resolve())
+
+    def assignable_from_resolved_type(self, src_type):
+        return self.same_as(src_type)
+
+    def as_argument_type(self):
+        return self
+
+    def is_complete(self):
+        # A type is incomplete if it is an unsized array,
+        # a struct whose attributes are not defined, etc.
+        return 1
+
+    def is_simple_buffer_dtype(self):
+        return (self.is_int or self.is_float or self.is_complex or self.is_pyobject or
+                self.is_extension_type or self.is_ptr)
+
+    def struct_nesting_depth(self):
+        # Returns the number levels of nested structs. This is
+        # used for constructing a stack for walking the run-time
+        # type information of the struct.
+        return 1
+
+    def global_init_code(self, entry, code):
+        # abstract
+        pass
+
+    def needs_nonecheck(self):
+        return 0
+
     def _assign_from_py_code(self, source_code, result_code, error_pos, code,
                              from_py_function=None, error_condition=None, extra_args=None):
         args = ', ' + ', '.join('%s' % arg for arg in extra_args) if extra_args else ''
@@ -330,87 +330,87 @@ class PyrexType(BaseType):
             result_code,
             convert_call,
             code.error_goto_if(error_condition or self.error_condition(result_code), error_pos))
- 
 
-def public_decl(base_code, dll_linkage): 
-    if dll_linkage: 
+
+def public_decl(base_code, dll_linkage):
+    if dll_linkage:
         return "%s(%s)" % (dll_linkage, base_code.replace(',', ' __PYX_COMMA '))
-    else: 
-        return base_code 
- 
+    else:
+        return base_code
+
 def create_typedef_type(name, base_type, cname, is_external=0, namespace=None):
-    is_fused = base_type.is_fused 
-    if base_type.is_complex or is_fused: 
-        if is_external: 
-            if is_fused: 
-                msg = "Fused" 
-            else: 
-                msg = "Complex" 
- 
-            raise ValueError("%s external typedefs not supported" % msg) 
- 
-        return base_type 
-    else: 
+    is_fused = base_type.is_fused
+    if base_type.is_complex or is_fused:
+        if is_external:
+            if is_fused:
+                msg = "Fused"
+            else:
+                msg = "Complex"
+
+            raise ValueError("%s external typedefs not supported" % msg)
+
+        return base_type
+    else:
         return CTypedefType(name, base_type, cname, is_external, namespace)
- 
- 
-class CTypedefType(BaseType): 
-    # 
-    #  Pseudo-type defined with a ctypedef statement in a 
-    #  'cdef extern from' block. 
-    #  Delegates most attribute lookups to the base type. 
-    #  (Anything not defined here or in the BaseType is delegated.) 
-    # 
-    #  qualified_name      string 
-    #  typedef_name        string 
-    #  typedef_cname       string 
-    #  typedef_base_type   PyrexType 
-    #  typedef_is_external bool 
- 
-    is_typedef = 1 
-    typedef_is_external = 0 
- 
-    to_py_utility_code = None 
-    from_py_utility_code = None 
- 
-    subtypes = ['typedef_base_type'] 
- 
+
+
+class CTypedefType(BaseType):
+    #
+    #  Pseudo-type defined with a ctypedef statement in a
+    #  'cdef extern from' block.
+    #  Delegates most attribute lookups to the base type.
+    #  (Anything not defined here or in the BaseType is delegated.)
+    #
+    #  qualified_name      string
+    #  typedef_name        string
+    #  typedef_cname       string
+    #  typedef_base_type   PyrexType
+    #  typedef_is_external bool
+
+    is_typedef = 1
+    typedef_is_external = 0
+
+    to_py_utility_code = None
+    from_py_utility_code = None
+
+    subtypes = ['typedef_base_type']
+
     def __init__(self, name, base_type, cname, is_external=0, namespace=None):
-        assert not base_type.is_complex 
-        self.typedef_name = name 
-        self.typedef_cname = cname 
-        self.typedef_base_type = base_type 
-        self.typedef_is_external = is_external 
+        assert not base_type.is_complex
+        self.typedef_name = name
+        self.typedef_cname = cname
+        self.typedef_base_type = base_type
+        self.typedef_is_external = is_external
         self.typedef_namespace = namespace
- 
-    def invalid_value(self): 
-        return self.typedef_base_type.invalid_value() 
- 
-    def resolve(self): 
-        return self.typedef_base_type.resolve() 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            base_code = self.typedef_name 
-        else: 
-            base_code = public_decl(self.typedef_cname, dll_linkage) 
+
+    def invalid_value(self):
+        return self.typedef_base_type.invalid_value()
+
+    def resolve(self):
+        return self.typedef_base_type.resolve()
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            base_code = self.typedef_name
+        else:
+            base_code = public_decl(self.typedef_cname, dll_linkage)
         if self.typedef_namespace is not None and not pyrex:
             base_code = "%s::%s" % (self.typedef_namespace.empty_declaration_code(), base_code)
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def as_argument_type(self): 
-        return self 
- 
-    def cast_code(self, expr_code): 
-        # If self is really an array (rather than pointer), we can't cast. 
-        # For example, the gmp mpz_t. 
-        if self.typedef_base_type.is_array: 
-            base_type = self.typedef_base_type.base_type 
-            return CPtrType(base_type).cast_code(expr_code) 
-        else: 
-            return BaseType.cast_code(self, expr_code) 
- 
+        return self.base_declaration_code(base_code, entity_code)
+
+    def as_argument_type(self):
+        return self
+
+    def cast_code(self, expr_code):
+        # If self is really an array (rather than pointer), we can't cast.
+        # For example, the gmp mpz_t.
+        if self.typedef_base_type.is_array:
+            base_type = self.typedef_base_type.base_type
+            return CPtrType(base_type).cast_code(expr_code)
+        else:
+            return BaseType.cast_code(self, expr_code)
+
     def specialize(self, values):
         base_type = self.typedef_base_type.specialize(values)
         namespace = self.typedef_namespace.specialize(values) if self.typedef_namespace else None
@@ -420,37 +420,37 @@ class CTypedefType(BaseType):
             return create_typedef_type(self.typedef_name, base_type, self.typedef_cname,
                                 0, namespace)
 
-    def __repr__(self): 
-        return "<CTypedefType %s>" % self.typedef_cname 
- 
-    def __str__(self): 
-        return self.typedef_name 
- 
-    def _create_utility_code(self, template_utility_code, 
-                             template_function_name): 
+    def __repr__(self):
+        return "<CTypedefType %s>" % self.typedef_cname
+
+    def __str__(self):
+        return self.typedef_name
+
+    def _create_utility_code(self, template_utility_code,
+                             template_function_name):
         type_name = type_identifier(self.typedef_cname)
-        utility_code = template_utility_code.specialize( 
-            type     = self.typedef_cname, 
-            TypeName = type_name) 
-        function_name = template_function_name % type_name 
-        return utility_code, function_name 
- 
-    def create_to_py_utility_code(self, env): 
-        if self.typedef_is_external: 
-            if not self.to_py_utility_code: 
-                base_type = self.typedef_base_type 
-                if type(base_type) is CIntType: 
-                    self.to_py_function = "__Pyx_PyInt_From_" + self.specialization_name() 
+        utility_code = template_utility_code.specialize(
+            type     = self.typedef_cname,
+            TypeName = type_name)
+        function_name = template_function_name % type_name
+        return utility_code, function_name
+
+    def create_to_py_utility_code(self, env):
+        if self.typedef_is_external:
+            if not self.to_py_utility_code:
+                base_type = self.typedef_base_type
+                if type(base_type) is CIntType:
+                    self.to_py_function = "__Pyx_PyInt_From_" + self.specialization_name()
                     env.use_utility_code(TempitaUtilityCode.load_cached(
-                        "CIntToPy", "TypeConversion.c", 
+                        "CIntToPy", "TypeConversion.c",
                         context={"TYPE": self.empty_declaration_code(),
-                                 "TO_PY_FUNCTION": self.to_py_function})) 
-                    return True 
-                elif base_type.is_float: 
-                    pass # XXX implement! 
-                elif base_type.is_complex: 
-                    pass # XXX implement! 
-                    pass 
+                                 "TO_PY_FUNCTION": self.to_py_function}))
+                    return True
+                elif base_type.is_float:
+                    pass # XXX implement!
+                elif base_type.is_complex:
+                    pass # XXX implement!
+                    pass
                 elif base_type.is_cpp_string:
                     cname = "__pyx_convert_PyObject_string_to_py_%s" % type_identifier(self)
                     context = {
@@ -462,27 +462,27 @@ class CTypedefType(BaseType):
                         "string.to_py", "CppConvert.pyx", context=context))
                     self.to_py_function = cname
                     return True
-            if self.to_py_utility_code: 
-                env.use_utility_code(self.to_py_utility_code) 
-                return True 
-        # delegation 
-        return self.typedef_base_type.create_to_py_utility_code(env) 
- 
-    def create_from_py_utility_code(self, env): 
-        if self.typedef_is_external: 
-            if not self.from_py_utility_code: 
-                base_type = self.typedef_base_type 
-                if type(base_type) is CIntType: 
-                    self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name() 
+            if self.to_py_utility_code:
+                env.use_utility_code(self.to_py_utility_code)
+                return True
+        # delegation
+        return self.typedef_base_type.create_to_py_utility_code(env)
+
+    def create_from_py_utility_code(self, env):
+        if self.typedef_is_external:
+            if not self.from_py_utility_code:
+                base_type = self.typedef_base_type
+                if type(base_type) is CIntType:
+                    self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
                     env.use_utility_code(TempitaUtilityCode.load_cached(
-                        "CIntFromPy", "TypeConversion.c", 
+                        "CIntFromPy", "TypeConversion.c",
                         context={"TYPE": self.empty_declaration_code(),
-                                 "FROM_PY_FUNCTION": self.from_py_function})) 
-                    return True 
-                elif base_type.is_float: 
-                    pass # XXX implement! 
-                elif base_type.is_complex: 
-                    pass # XXX implement! 
+                                 "FROM_PY_FUNCTION": self.from_py_function}))
+                    return True
+                elif base_type.is_float:
+                    pass # XXX implement!
+                elif base_type.is_complex:
+                    pass # XXX implement!
                 elif base_type.is_cpp_string:
                     cname = '__pyx_convert_string_from_py_%s' % type_identifier(self)
                     context = {
@@ -494,12 +494,12 @@ class CTypedefType(BaseType):
                         "string.from_py", "CppConvert.pyx", context=context))
                     self.from_py_function = cname
                     return True
-            if self.from_py_utility_code: 
-                env.use_utility_code(self.from_py_utility_code) 
-                return True 
-        # delegation 
-        return self.typedef_base_type.create_from_py_utility_code(env) 
- 
+            if self.from_py_utility_code:
+                env.use_utility_code(self.from_py_utility_code)
+                return True
+        # delegation
+        return self.typedef_base_type.create_from_py_utility_code(env)
+
     def to_py_call_code(self, source_code, result_code, result_type, to_py_function=None):
         if to_py_function is None:
             to_py_function = self.to_py_function
@@ -514,116 +514,116 @@ class CTypedefType(BaseType):
             error_condition or self.error_condition(result_code)
         )
 
-    def overflow_check_binop(self, binop, env, const_rhs=False): 
-        env.use_utility_code(UtilityCode.load("Common", "Overflow.c")) 
+    def overflow_check_binop(self, binop, env, const_rhs=False):
+        env.use_utility_code(UtilityCode.load("Common", "Overflow.c"))
         type = self.empty_declaration_code()
-        name = self.specialization_name() 
-        if binop == "lshift": 
+        name = self.specialization_name()
+        if binop == "lshift":
             env.use_utility_code(TempitaUtilityCode.load_cached(
-                "LeftShift", "Overflow.c", 
-                context={'TYPE': type, 'NAME': name, 'SIGNED': self.signed})) 
-        else: 
-            if const_rhs: 
-                binop += "_const" 
-            _load_overflow_base(env) 
+                "LeftShift", "Overflow.c",
+                context={'TYPE': type, 'NAME': name, 'SIGNED': self.signed}))
+        else:
+            if const_rhs:
+                binop += "_const"
+            _load_overflow_base(env)
             env.use_utility_code(TempitaUtilityCode.load_cached(
-                "SizeCheck", "Overflow.c", 
-                context={'TYPE': type, 'NAME': name})) 
+                "SizeCheck", "Overflow.c",
+                context={'TYPE': type, 'NAME': name}))
             env.use_utility_code(TempitaUtilityCode.load_cached(
-                "Binop", "Overflow.c", 
-                context={'TYPE': type, 'NAME': name, 'BINOP': binop})) 
-        return "__Pyx_%s_%s_checking_overflow" % (binop, name) 
- 
-    def error_condition(self, result_code): 
-        if self.typedef_is_external: 
-            if self.exception_value: 
+                "Binop", "Overflow.c",
+                context={'TYPE': type, 'NAME': name, 'BINOP': binop}))
+        return "__Pyx_%s_%s_checking_overflow" % (binop, name)
+
+    def error_condition(self, result_code):
+        if self.typedef_is_external:
+            if self.exception_value:
                 condition = "(%s == %s)" % (
                     result_code, self.cast_code(self.exception_value))
-                if self.exception_check: 
-                    condition += " && PyErr_Occurred()" 
-                return condition 
-        # delegation 
-        return self.typedef_base_type.error_condition(result_code) 
- 
-    def __getattr__(self, name): 
-        return getattr(self.typedef_base_type, name) 
- 
-    def py_type_name(self): 
-        return self.typedef_base_type.py_type_name() 
- 
-    def can_coerce_to_pyobject(self, env): 
-        return self.typedef_base_type.can_coerce_to_pyobject(env) 
- 
+                if self.exception_check:
+                    condition += " && PyErr_Occurred()"
+                return condition
+        # delegation
+        return self.typedef_base_type.error_condition(result_code)
+
+    def __getattr__(self, name):
+        return getattr(self.typedef_base_type, name)
+
+    def py_type_name(self):
+        return self.typedef_base_type.py_type_name()
+
+    def can_coerce_to_pyobject(self, env):
+        return self.typedef_base_type.can_coerce_to_pyobject(env)
+
     def can_coerce_from_pyobject(self, env):
         return self.typedef_base_type.can_coerce_from_pyobject(env)
- 
-
-class MemoryViewSliceType(PyrexType): 
- 
-    is_memoryviewslice = 1 
- 
-    has_attributes = 1 
-    scope = None 
- 
-    # These are special cased in Defnode 
-    from_py_function = None 
-    to_py_function = None 
- 
-    exception_value = None 
-    exception_check = True 
- 
-    subtypes = ['dtype'] 
- 
-    def __init__(self, base_dtype, axes): 
-        """ 
-        MemoryViewSliceType(base, axes) 
- 
-        Base is the C base type; axes is a list of (access, packing) strings, 
-        where access is one of 'full', 'direct' or 'ptr' and packing is one of 
-        'contig', 'strided' or 'follow'.  There is one (access, packing) tuple 
-        for each dimension. 
- 
-        the access specifiers determine whether the array data contains 
-        pointers that need to be dereferenced along that axis when 
-        retrieving/setting: 
- 
-        'direct' -- No pointers stored in this dimension. 
-        'ptr' -- Pointer stored in this dimension. 
-        'full' -- Check along this dimension, don't assume either. 
- 
-        the packing specifiers specify how the array elements are layed-out 
-        in memory. 
- 
+
+
+class MemoryViewSliceType(PyrexType):
+
+    is_memoryviewslice = 1
+
+    has_attributes = 1
+    scope = None
+
+    # These are special cased in Defnode
+    from_py_function = None
+    to_py_function = None
+
+    exception_value = None
+    exception_check = True
+
+    subtypes = ['dtype']
+
+    def __init__(self, base_dtype, axes):
+        """
+        MemoryViewSliceType(base, axes)
+
+        Base is the C base type; axes is a list of (access, packing) strings,
+        where access is one of 'full', 'direct' or 'ptr' and packing is one of
+        'contig', 'strided' or 'follow'.  There is one (access, packing) tuple
+        for each dimension.
+
+        the access specifiers determine whether the array data contains
+        pointers that need to be dereferenced along that axis when
+        retrieving/setting:
+
+        'direct' -- No pointers stored in this dimension.
+        'ptr' -- Pointer stored in this dimension.
+        'full' -- Check along this dimension, don't assume either.
+
+        the packing specifiers specify how the array elements are layed-out
+        in memory.
+
         'contig' -- The data is contiguous in memory along this dimension.
-                At most one dimension may be specified as 'contig'. 
+                At most one dimension may be specified as 'contig'.
         'strided' -- The data isn't contiguous along this dimension.
-        'follow' -- Used for C/Fortran contiguous arrays, a 'follow' dimension 
-            has its stride automatically computed from extents of the other 
-            dimensions to ensure C or Fortran memory layout. 
- 
-        C-contiguous memory has 'direct' as the access spec, 'contig' as the 
-        *last* axis' packing spec and 'follow' for all other packing specs. 
- 
-        Fortran-contiguous memory has 'direct' as the access spec, 'contig' as 
-        the *first* axis' packing spec and 'follow' for all other packing 
-        specs. 
-        """ 
+        'follow' -- Used for C/Fortran contiguous arrays, a 'follow' dimension
+            has its stride automatically computed from extents of the other
+            dimensions to ensure C or Fortran memory layout.
+
+        C-contiguous memory has 'direct' as the access spec, 'contig' as the
+        *last* axis' packing spec and 'follow' for all other packing specs.
+
+        Fortran-contiguous memory has 'direct' as the access spec, 'contig' as
+        the *first* axis' packing spec and 'follow' for all other packing
+        specs.
+        """
         from . import Buffer, MemoryView
- 
-        self.dtype = base_dtype 
-        self.axes = axes 
-        self.ndim = len(axes) 
-        self.flags = MemoryView.get_buf_flags(self.axes) 
- 
-        self.is_c_contig, self.is_f_contig = MemoryView.is_cf_contig(self.axes) 
-        assert not (self.is_c_contig and self.is_f_contig) 
- 
-        self.mode = MemoryView.get_mode(axes) 
-        self.writable_needed = False 
- 
-        if not self.dtype.is_fused: 
+
+        self.dtype = base_dtype
+        self.axes = axes
+        self.ndim = len(axes)
+        self.flags = MemoryView.get_buf_flags(self.axes)
+
+        self.is_c_contig, self.is_f_contig = MemoryView.is_cf_contig(self.axes)
+        assert not (self.is_c_contig and self.is_f_contig)
+
+        self.mode = MemoryView.get_mode(axes)
+        self.writable_needed = False
+
+        if not self.dtype.is_fused:
             self.dtype_name = Buffer.mangle_dtype_name(self.dtype)
- 
+
     def __hash__(self):
         return hash(self.__class__) ^ hash(self.dtype) ^ hash(tuple(self.axes))
 
@@ -633,131 +633,131 @@ class MemoryViewSliceType(PyrexType):
         else:
             return False
 
-    def same_as_resolved_type(self, other_type): 
-        return ((other_type.is_memoryviewslice and 
+    def same_as_resolved_type(self, other_type):
+        return ((other_type.is_memoryviewslice and
             #self.writable_needed == other_type.writable_needed and  # FIXME: should be only uni-directional
-            self.dtype.same_as(other_type.dtype) and 
-            self.axes == other_type.axes) or 
-            other_type is error_type) 
- 
-    def needs_nonecheck(self): 
-        return True 
- 
-    def is_complete(self): 
-        # incomplete since the underlying struct doesn't have a cython.memoryview object. 
-        return 0 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        # XXX: we put these guards in for now... 
-        assert not pyrex 
-        assert not dll_linkage 
-        from . import MemoryView 
+            self.dtype.same_as(other_type.dtype) and
+            self.axes == other_type.axes) or
+            other_type is error_type)
+
+    def needs_nonecheck(self):
+        return True
+
+    def is_complete(self):
+        # incomplete since the underlying struct doesn't have a cython.memoryview object.
+        return 0
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        # XXX: we put these guards in for now...
+        assert not pyrex
+        assert not dll_linkage
+        from . import MemoryView
         base_code = str(self) if for_display else MemoryView.memviewslice_cname
-        return self.base_declaration_code( 
+        return self.base_declaration_code(
                 base_code,
-                entity_code) 
- 
-    def attributes_known(self): 
-        if self.scope is None: 
-            from . import Symtab 
- 
-            self.scope = scope = Symtab.CClassScope( 
-                    'mvs_class_'+self.specialization_suffix(), 
-                    None, 
-                    visibility='extern') 
- 
-            scope.parent_type = self 
-            scope.directives = {} 
- 
-            scope.declare_var('_data', c_char_ptr_type, None, 
-                              cname='data', is_cdef=1) 
- 
-        return True 
- 
-    def declare_attribute(self, attribute, env, pos): 
-        from . import MemoryView, Options 
- 
-        scope = self.scope 
- 
-        if attribute == 'shape': 
-            scope.declare_var('shape', 
-                    c_array_type(c_py_ssize_t_type, 
-                                 Options.buffer_max_dims), 
-                    pos, 
-                    cname='shape', 
-                    is_cdef=1) 
- 
-        elif attribute == 'strides': 
-            scope.declare_var('strides', 
-                    c_array_type(c_py_ssize_t_type, 
-                                 Options.buffer_max_dims), 
-                    pos, 
-                    cname='strides', 
-                    is_cdef=1) 
- 
-        elif attribute == 'suboffsets': 
-            scope.declare_var('suboffsets', 
-                    c_array_type(c_py_ssize_t_type, 
-                                 Options.buffer_max_dims), 
-                    pos, 
-                    cname='suboffsets', 
-                    is_cdef=1) 
- 
-        elif attribute in ("copy", "copy_fortran"): 
-            ndim = len(self.axes) 
- 
+                entity_code)
+
+    def attributes_known(self):
+        if self.scope is None:
+            from . import Symtab
+
+            self.scope = scope = Symtab.CClassScope(
+                    'mvs_class_'+self.specialization_suffix(),
+                    None,
+                    visibility='extern')
+
+            scope.parent_type = self
+            scope.directives = {}
+
+            scope.declare_var('_data', c_char_ptr_type, None,
+                              cname='data', is_cdef=1)
+
+        return True
+
+    def declare_attribute(self, attribute, env, pos):
+        from . import MemoryView, Options
+
+        scope = self.scope
+
+        if attribute == 'shape':
+            scope.declare_var('shape',
+                    c_array_type(c_py_ssize_t_type,
+                                 Options.buffer_max_dims),
+                    pos,
+                    cname='shape',
+                    is_cdef=1)
+
+        elif attribute == 'strides':
+            scope.declare_var('strides',
+                    c_array_type(c_py_ssize_t_type,
+                                 Options.buffer_max_dims),
+                    pos,
+                    cname='strides',
+                    is_cdef=1)
+
+        elif attribute == 'suboffsets':
+            scope.declare_var('suboffsets',
+                    c_array_type(c_py_ssize_t_type,
+                                 Options.buffer_max_dims),
+                    pos,
+                    cname='suboffsets',
+                    is_cdef=1)
+
+        elif attribute in ("copy", "copy_fortran"):
+            ndim = len(self.axes)
+
             follow_dim = [('direct', 'follow')]
             contig_dim = [('direct', 'contig')]
             to_axes_c = follow_dim * (ndim - 1) + contig_dim
             to_axes_f = contig_dim + follow_dim * (ndim -1)
- 
+
             dtype = self.dtype
             if dtype.is_const:
                 dtype = dtype.const_base_type
- 
+
             to_memview_c = MemoryViewSliceType(dtype, to_axes_c)
             to_memview_f = MemoryViewSliceType(dtype, to_axes_f)
 
-            for to_memview, cython_name in [(to_memview_c, "copy"), 
-                                            (to_memview_f, "copy_fortran")]: 
+            for to_memview, cython_name in [(to_memview_c, "copy"),
+                                            (to_memview_f, "copy_fortran")]:
                 copy_func_type = CFuncType(
                     to_memview,
                     [CFuncTypeArg("memviewslice", self, None)])
                 copy_cname = MemoryView.copy_c_or_fortran_cname(to_memview)
- 
+
                 entry = scope.declare_cfunction(
                     cython_name,
                     copy_func_type, pos=pos, defining=1,
                     cname=copy_cname)
- 
+
                 utility = MemoryView.get_copy_new_utility(pos, self, to_memview)
                 env.use_utility_code(utility)
 
-            MemoryView.use_cython_array_utility_code(env) 
- 
-        elif attribute in ("is_c_contig", "is_f_contig"): 
-            # is_c_contig and is_f_contig functions 
+            MemoryView.use_cython_array_utility_code(env)
+
+        elif attribute in ("is_c_contig", "is_f_contig"):
+            # is_c_contig and is_f_contig functions
             for (c_or_f, cython_name) in (('C', 'is_c_contig'), ('F', 'is_f_contig')):
- 
+
                 is_contig_name = MemoryView.get_is_contig_func_name(c_or_f, self.ndim)
- 
-                cfunctype = CFuncType( 
-                        return_type=c_bint_type, 
-                        args=[CFuncTypeArg("memviewslice", self, None)], 
-                        exception_value="-1", 
-                ) 
- 
-                entry = scope.declare_cfunction(cython_name, 
-                            cfunctype, 
-                            pos=pos, 
-                            defining=1, 
-                            cname=is_contig_name) 
- 
+
+                cfunctype = CFuncType(
+                        return_type=c_bint_type,
+                        args=[CFuncTypeArg("memviewslice", self, None)],
+                        exception_value="-1",
+                )
+
+                entry = scope.declare_cfunction(cython_name,
+                            cfunctype,
+                            pos=pos,
+                            defining=1,
+                            cname=is_contig_name)
+
                 entry.utility_code_definition = MemoryView.get_is_contig_utility(c_or_f, self.ndim)
- 
-        return True 
- 
+
+        return True
+
     def get_entry(self, node, cname=None, type=None):
         from . import MemoryView, Symtab
 
@@ -869,54 +869,54 @@ class MemoryViewSliceType(PyrexType):
             super(MemoryViewSliceType,self).specialization_name(),
             self.specialization_suffix())
 
-    def specialization_suffix(self): 
-        return "%s_%s" % (self.axes_to_name(), self.dtype_name) 
- 
-    def can_coerce_to_pyobject(self, env): 
-        return True 
- 
+    def specialization_suffix(self):
+        return "%s_%s" % (self.axes_to_name(), self.dtype_name)
+
+    def can_coerce_to_pyobject(self, env):
+        return True
+
     def can_coerce_from_pyobject(self, env):
         return True
 
-    def check_for_null_code(self, cname): 
-        return cname + '.memview' 
- 
-    def create_from_py_utility_code(self, env): 
-        from . import MemoryView, Buffer 
- 
-        # We don't have 'code', so use a LazyUtilityCode with a callback. 
-        def lazy_utility_callback(code): 
+    def check_for_null_code(self, cname):
+        return cname + '.memview'
+
+    def create_from_py_utility_code(self, env):
+        from . import MemoryView, Buffer
+
+        # We don't have 'code', so use a LazyUtilityCode with a callback.
+        def lazy_utility_callback(code):
             context['dtype_typeinfo'] = Buffer.get_type_information_cname(code, self.dtype)
-            return TempitaUtilityCode.load( 
+            return TempitaUtilityCode.load(
                 "ObjectToMemviewSlice", "MemoryView_C.c", context=context)
- 
-        env.use_utility_code(MemoryView.memviewslice_init_code) 
-        env.use_utility_code(LazyUtilityCode(lazy_utility_callback)) 
- 
-        if self.is_c_contig: 
-            c_or_f_flag = "__Pyx_IS_C_CONTIG" 
-        elif self.is_f_contig: 
-            c_or_f_flag = "__Pyx_IS_F_CONTIG" 
-        else: 
-            c_or_f_flag = "0" 
- 
-        suffix = self.specialization_suffix() 
-        funcname = "__Pyx_PyObject_to_MemoryviewSlice_" + suffix 
- 
-        context = dict( 
-            MemoryView.context, 
-            buf_flag = self.flags, 
-            ndim = self.ndim, 
-            axes_specs = ', '.join(self.axes_to_code()), 
+
+        env.use_utility_code(MemoryView.memviewslice_init_code)
+        env.use_utility_code(LazyUtilityCode(lazy_utility_callback))
+
+        if self.is_c_contig:
+            c_or_f_flag = "__Pyx_IS_C_CONTIG"
+        elif self.is_f_contig:
+            c_or_f_flag = "__Pyx_IS_F_CONTIG"
+        else:
+            c_or_f_flag = "0"
+
+        suffix = self.specialization_suffix()
+        funcname = "__Pyx_PyObject_to_MemoryviewSlice_" + suffix
+
+        context = dict(
+            MemoryView.context,
+            buf_flag = self.flags,
+            ndim = self.ndim,
+            axes_specs = ', '.join(self.axes_to_code()),
             dtype_typedecl = self.dtype.empty_declaration_code(),
-            struct_nesting_depth = self.dtype.struct_nesting_depth(), 
-            c_or_f_flag = c_or_f_flag, 
-            funcname = funcname, 
-        ) 
- 
-        self.from_py_function = funcname 
-        return True 
- 
+            struct_nesting_depth = self.dtype.struct_nesting_depth(),
+            c_or_f_flag = c_or_f_flag,
+            funcname = funcname,
+        )
+
+        self.from_py_function = funcname
+        return True
+
     def from_py_call_code(self, source_code, result_code, error_pos, code,
                           from_py_function=None, error_condition=None):
         # NOTE: auto-detection of readonly buffers is disabled:
@@ -926,32 +926,32 @@ class MemoryViewSliceType(PyrexType):
             source_code, result_code, error_pos, code, from_py_function, error_condition,
             extra_args=['PyBUF_WRITABLE' if writable else '0'])
 
-    def create_to_py_utility_code(self, env): 
+    def create_to_py_utility_code(self, env):
         self._dtype_to_py_func, self._dtype_from_py_func = self.dtype_object_conversion_funcs(env)
-        return True 
- 
+        return True
+
     def to_py_call_code(self, source_code, result_code, result_type, to_py_function=None):
         assert self._dtype_to_py_func
         assert self._dtype_from_py_func
- 
+
         to_py_func = "(PyObject *(*)(char *)) " + self._dtype_to_py_func
         from_py_func = "(int (*)(char *, PyObject *)) " + self._dtype_from_py_func
- 
+
         tup = (result_code, source_code, self.ndim, to_py_func, from_py_func, self.dtype.is_pyobject)
         return "%s = __pyx_memoryview_fromslice(%s, %s, %s, %s, %d);" % tup
 
-    def dtype_object_conversion_funcs(self, env): 
-        get_function = "__pyx_memview_get_%s" % self.dtype_name 
-        set_function = "__pyx_memview_set_%s" % self.dtype_name 
- 
-        context = dict( 
-            get_function = get_function, 
-            set_function = set_function, 
-        ) 
- 
-        if self.dtype.is_pyobject: 
-            utility_name = "MemviewObjectToObject" 
-        else: 
+    def dtype_object_conversion_funcs(self, env):
+        get_function = "__pyx_memview_get_%s" % self.dtype_name
+        set_function = "__pyx_memview_set_%s" % self.dtype_name
+
+        context = dict(
+            get_function = get_function,
+            set_function = set_function,
+        )
+
+        if self.dtype.is_pyobject:
+            utility_name = "MemviewObjectToObject"
+        else:
             self.dtype.create_to_py_utility_code(env)
             to_py_function = self.dtype.to_py_function
 
@@ -961,539 +961,539 @@ class MemoryViewSliceType(PyrexType):
                 from_py_function = self.dtype.from_py_function
 
             if not (to_py_function or from_py_function):
-                return "NULL", "NULL" 
+                return "NULL", "NULL"
             if not to_py_function:
-                get_function = "NULL" 
+                get_function = "NULL"
             if not from_py_function:
-                set_function = "NULL" 
- 
-            utility_name = "MemviewDtypeToObject" 
-            error_condition = (self.dtype.error_condition('value') or 
-                               'PyErr_Occurred()') 
-            context.update( 
+                set_function = "NULL"
+
+            utility_name = "MemviewDtypeToObject"
+            error_condition = (self.dtype.error_condition('value') or
+                               'PyErr_Occurred()')
+            context.update(
                 to_py_function=to_py_function,
                 from_py_function=from_py_function,
                 dtype=self.dtype.empty_declaration_code(),
                 error_condition=error_condition,
-            ) 
- 
+            )
+
         utility = TempitaUtilityCode.load_cached(
             utility_name, "MemoryView_C.c", context=context)
-        env.use_utility_code(utility) 
-        return get_function, set_function 
- 
-    def axes_to_code(self): 
-        """Return a list of code constants for each axis""" 
-        from . import MemoryView 
-        d = MemoryView._spec_to_const 
-        return ["(%s | %s)" % (d[a], d[p]) for a, p in self.axes] 
- 
-    def axes_to_name(self): 
-        """Return an abbreviated name for our axes""" 
-        from . import MemoryView 
-        d = MemoryView._spec_to_abbrev 
-        return "".join(["%s%s" % (d[a], d[p]) for a, p in self.axes]) 
- 
-    def error_condition(self, result_code): 
-        return "!%s.memview" % result_code 
- 
-    def __str__(self): 
-        from . import MemoryView 
- 
-        axes_code_list = [] 
-        for idx, (access, packing) in enumerate(self.axes): 
-            flag = MemoryView.get_memoryview_flag(access, packing) 
-            if flag == "strided": 
-                axes_code_list.append(":") 
-            else: 
-                if flag == 'contiguous': 
-                    have_follow = [p for a, p in self.axes[idx - 1:idx + 2] 
-                                         if p == 'follow'] 
-                    if have_follow or self.ndim == 1: 
-                        flag = '1' 
- 
-                axes_code_list.append("::" + flag) 
- 
-        if self.dtype.is_pyobject: 
-            dtype_name = self.dtype.name 
-        else: 
-            dtype_name = self.dtype 
- 
-        return "%s[%s]" % (dtype_name, ", ".join(axes_code_list)) 
- 
-    def specialize(self, values): 
-        """This does not validate the base type!!""" 
-        dtype = self.dtype.specialize(values) 
-        if dtype is not self.dtype: 
-            return MemoryViewSliceType(dtype, self.axes) 
- 
-        return self 
- 
-    def cast_code(self, expr_code): 
-        return expr_code 
- 
- 
-class BufferType(BaseType): 
-    # 
-    #  Delegates most attribute lookups to the base type. 
-    #  (Anything not defined here or in the BaseType is delegated.) 
-    # 
-    # dtype            PyrexType 
-    # ndim             int 
-    # mode             str 
-    # negative_indices bool 
-    # cast             bool 
-    # is_buffer        bool 
-    # writable         bool 
- 
-    is_buffer = 1 
-    writable = True 
- 
-    subtypes = ['dtype'] 
- 
-    def __init__(self, base, dtype, ndim, mode, negative_indices, cast): 
-        self.base = base 
-        self.dtype = dtype 
-        self.ndim = ndim 
-        self.buffer_ptr_type = CPtrType(dtype) 
-        self.mode = mode 
-        self.negative_indices = negative_indices 
-        self.cast = cast 
+        env.use_utility_code(utility)
+        return get_function, set_function
+
+    def axes_to_code(self):
+        """Return a list of code constants for each axis"""
+        from . import MemoryView
+        d = MemoryView._spec_to_const
+        return ["(%s | %s)" % (d[a], d[p]) for a, p in self.axes]
+
+    def axes_to_name(self):
+        """Return an abbreviated name for our axes"""
+        from . import MemoryView
+        d = MemoryView._spec_to_abbrev
+        return "".join(["%s%s" % (d[a], d[p]) for a, p in self.axes])
+
+    def error_condition(self, result_code):
+        return "!%s.memview" % result_code
+
+    def __str__(self):
+        from . import MemoryView
+
+        axes_code_list = []
+        for idx, (access, packing) in enumerate(self.axes):
+            flag = MemoryView.get_memoryview_flag(access, packing)
+            if flag == "strided":
+                axes_code_list.append(":")
+            else:
+                if flag == 'contiguous':
+                    have_follow = [p for a, p in self.axes[idx - 1:idx + 2]
+                                         if p == 'follow']
+                    if have_follow or self.ndim == 1:
+                        flag = '1'
+
+                axes_code_list.append("::" + flag)
+
+        if self.dtype.is_pyobject:
+            dtype_name = self.dtype.name
+        else:
+            dtype_name = self.dtype
+
+        return "%s[%s]" % (dtype_name, ", ".join(axes_code_list))
+
+    def specialize(self, values):
+        """This does not validate the base type!!"""
+        dtype = self.dtype.specialize(values)
+        if dtype is not self.dtype:
+            return MemoryViewSliceType(dtype, self.axes)
+
+        return self
+
+    def cast_code(self, expr_code):
+        return expr_code
+
+
+class BufferType(BaseType):
+    #
+    #  Delegates most attribute lookups to the base type.
+    #  (Anything not defined here or in the BaseType is delegated.)
+    #
+    # dtype            PyrexType
+    # ndim             int
+    # mode             str
+    # negative_indices bool
+    # cast             bool
+    # is_buffer        bool
+    # writable         bool
+
+    is_buffer = 1
+    writable = True
+
+    subtypes = ['dtype']
+
+    def __init__(self, base, dtype, ndim, mode, negative_indices, cast):
+        self.base = base
+        self.dtype = dtype
+        self.ndim = ndim
+        self.buffer_ptr_type = CPtrType(dtype)
+        self.mode = mode
+        self.negative_indices = negative_indices
+        self.cast = cast
         self.is_numpy_buffer = self.base.name == "ndarray"
- 
+
     def can_coerce_to_pyobject(self,env):
         return True
 
     def can_coerce_from_pyobject(self,env):
         return True
 
-    def as_argument_type(self): 
-        return self 
- 
-    def specialize(self, values): 
-        dtype = self.dtype.specialize(values) 
-        if dtype is not self.dtype: 
-            return BufferType(self.base, dtype, self.ndim, self.mode, 
-                              self.negative_indices, self.cast) 
-        return self 
- 
+    def as_argument_type(self):
+        return self
+
+    def specialize(self, values):
+        dtype = self.dtype.specialize(values)
+        if dtype is not self.dtype:
+            return BufferType(self.base, dtype, self.ndim, self.mode,
+                              self.negative_indices, self.cast)
+        return self
+
     def get_entry(self, node):
         from . import Buffer
         assert node.is_name
         return Buffer.BufferEntry(node.entry)
 
-    def __getattr__(self, name): 
-        return getattr(self.base, name) 
- 
-    def __repr__(self): 
-        return "<BufferType %r>" % self.base 
- 
-    def __str__(self): 
-        # avoid ', ', as fused functions split the signature string on ', ' 
-        cast_str = '' 
-        if self.cast: 
-            cast_str = ',cast=True' 
- 
-        return "%s[%s,ndim=%d%s]" % (self.base, self.dtype, self.ndim, 
-                                      cast_str) 
- 
-    def assignable_from(self, other_type): 
-        if other_type.is_buffer: 
-            return (self.same_as(other_type, compare_base=False) and 
-                    self.base.assignable_from(other_type.base)) 
- 
-        return self.base.assignable_from(other_type) 
- 
-    def same_as(self, other_type, compare_base=True): 
-        if not other_type.is_buffer: 
-            return other_type.same_as(self.base) 
- 
-        return (self.dtype.same_as(other_type.dtype) and 
-                self.ndim == other_type.ndim and 
-                self.mode == other_type.mode and 
-                self.cast == other_type.cast and 
-                (not compare_base or self.base.same_as(other_type.base))) 
- 
- 
-class PyObjectType(PyrexType): 
-    # 
-    #  Base class for all Python object types (reference-counted). 
-    # 
-    #  buffer_defaults  dict or None     Default options for bu 
- 
-    name = "object" 
-    is_pyobject = 1 
-    default_value = "0" 
+    def __getattr__(self, name):
+        return getattr(self.base, name)
+
+    def __repr__(self):
+        return "<BufferType %r>" % self.base
+
+    def __str__(self):
+        # avoid ', ', as fused functions split the signature string on ', '
+        cast_str = ''
+        if self.cast:
+            cast_str = ',cast=True'
+
+        return "%s[%s,ndim=%d%s]" % (self.base, self.dtype, self.ndim,
+                                      cast_str)
+
+    def assignable_from(self, other_type):
+        if other_type.is_buffer:
+            return (self.same_as(other_type, compare_base=False) and
+                    self.base.assignable_from(other_type.base))
+
+        return self.base.assignable_from(other_type)
+
+    def same_as(self, other_type, compare_base=True):
+        if not other_type.is_buffer:
+            return other_type.same_as(self.base)
+
+        return (self.dtype.same_as(other_type.dtype) and
+                self.ndim == other_type.ndim and
+                self.mode == other_type.mode and
+                self.cast == other_type.cast and
+                (not compare_base or self.base.same_as(other_type.base)))
+
+
+class PyObjectType(PyrexType):
+    #
+    #  Base class for all Python object types (reference-counted).
+    #
+    #  buffer_defaults  dict or None     Default options for bu
+
+    name = "object"
+    is_pyobject = 1
+    default_value = "0"
     declaration_value = "0"
-    buffer_defaults = None 
-    is_extern = False 
-    is_subclassed = False 
-    is_gc_simple = False 
- 
-    def __str__(self): 
-        return "Python object" 
- 
-    def __repr__(self): 
-        return "<PyObjectType>" 
- 
-    def can_coerce_to_pyobject(self, env): 
-        return True 
- 
+    buffer_defaults = None
+    is_extern = False
+    is_subclassed = False
+    is_gc_simple = False
+
+    def __str__(self):
+        return "Python object"
+
+    def __repr__(self):
+        return "<PyObjectType>"
+
+    def can_coerce_to_pyobject(self, env):
+        return True
+
     def can_coerce_from_pyobject(self, env):
         return True
 
-    def default_coerced_ctype(self): 
-        """The default C type that this Python type coerces to, or None.""" 
-        return None 
- 
-    def assignable_from(self, src_type): 
-        # except for pointers, conversion will be attempted 
-        return not src_type.is_ptr or src_type.is_string or src_type.is_pyunicode_ptr 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            base_code = "object" 
-        else: 
-            base_code = public_decl("PyObject", dll_linkage) 
-            entity_code = "*%s" % entity_code 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def as_pyobject(self, cname): 
-        if (not self.is_complete()) or self.is_extension_type: 
-            return "(PyObject *)" + cname 
-        else: 
-            return cname 
- 
-    def py_type_name(self): 
-        return "object" 
- 
-    def __lt__(self, other): 
-        """ 
-        Make sure we sort highest, as instance checking on py_type_name 
-        ('object') is always true 
-        """ 
-        return False 
- 
-    def global_init_code(self, entry, code): 
-        code.put_init_var_to_py_none(entry, nanny=False) 
- 
-    def check_for_null_code(self, cname): 
-        return cname 
- 
- 
-builtin_types_that_cannot_create_refcycles = set([ 
-    'bool', 'int', 'long', 'float', 'complex', 
-    'bytearray', 'bytes', 'unicode', 'str', 'basestring' 
-]) 
- 
- 
-class BuiltinObjectType(PyObjectType): 
-    #  objstruct_cname  string           Name of PyObject struct 
- 
-    is_builtin_type = 1 
-    has_attributes = 1 
-    base_type = None 
-    module_name = '__builtin__' 
+    def default_coerced_ctype(self):
+        """The default C type that this Python type coerces to, or None."""
+        return None
+
+    def assignable_from(self, src_type):
+        # except for pointers, conversion will be attempted
+        return not src_type.is_ptr or src_type.is_string or src_type.is_pyunicode_ptr
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            base_code = "object"
+        else:
+            base_code = public_decl("PyObject", dll_linkage)
+            entity_code = "*%s" % entity_code
+        return self.base_declaration_code(base_code, entity_code)
+
+    def as_pyobject(self, cname):
+        if (not self.is_complete()) or self.is_extension_type:
+            return "(PyObject *)" + cname
+        else:
+            return cname
+
+    def py_type_name(self):
+        return "object"
+
+    def __lt__(self, other):
+        """
+        Make sure we sort highest, as instance checking on py_type_name
+        ('object') is always true
+        """
+        return False
+
+    def global_init_code(self, entry, code):
+        code.put_init_var_to_py_none(entry, nanny=False)
+
+    def check_for_null_code(self, cname):
+        return cname
+
+
+builtin_types_that_cannot_create_refcycles = set([
+    'bool', 'int', 'long', 'float', 'complex',
+    'bytearray', 'bytes', 'unicode', 'str', 'basestring'
+])
+
+
+class BuiltinObjectType(PyObjectType):
+    #  objstruct_cname  string           Name of PyObject struct
+
+    is_builtin_type = 1
+    has_attributes = 1
+    base_type = None
+    module_name = '__builtin__'
     require_exact = 1
- 
-    # fields that let it look like an extension type 
-    vtabslot_cname = None 
-    vtabstruct_cname = None 
-    vtabptr_cname = None 
-    typedef_flag = True 
-    is_external = True 
+
+    # fields that let it look like an extension type
+    vtabslot_cname = None
+    vtabstruct_cname = None
+    vtabptr_cname = None
+    typedef_flag = True
+    is_external = True
     decl_type = 'PyObject'
- 
-    def __init__(self, name, cname, objstruct_cname=None): 
-        self.name = name 
-        self.cname = cname 
-        self.typeptr_cname = "(&%s)" % cname 
-        self.objstruct_cname = objstruct_cname 
-        self.is_gc_simple = name in builtin_types_that_cannot_create_refcycles 
+
+    def __init__(self, name, cname, objstruct_cname=None):
+        self.name = name
+        self.cname = cname
+        self.typeptr_cname = "(&%s)" % cname
+        self.objstruct_cname = objstruct_cname
+        self.is_gc_simple = name in builtin_types_that_cannot_create_refcycles
         if name == 'type':
             # Special case the type type, as many C API calls (and other
             # libraries) actually expect a PyTypeObject* for type arguments.
             self.decl_type = objstruct_cname
         if name == 'Exception':
             self.require_exact = 0
- 
-    def set_scope(self, scope): 
-        self.scope = scope 
-        if scope: 
-            scope.parent_type = self 
- 
-    def __str__(self): 
-        return "%s object" % self.name 
- 
-    def __repr__(self): 
-        return "<%s>"% self.cname 
- 
-    def default_coerced_ctype(self): 
-        if self.name in ('bytes', 'bytearray'): 
-            return c_char_ptr_type 
-        elif self.name == 'bool': 
-            return c_bint_type 
-        elif self.name == 'float': 
-            return c_double_type 
-        return None 
- 
-    def assignable_from(self, src_type): 
-        if isinstance(src_type, BuiltinObjectType): 
-            if self.name == 'basestring': 
-                return src_type.name in ('str', 'unicode', 'basestring') 
-            else: 
-                return src_type.name == self.name 
-        elif src_type.is_extension_type: 
-            # FIXME: This is an ugly special case that we currently 
-            # keep supporting.  It allows users to specify builtin 
-            # types as external extension types, while keeping them 
-            # compatible with the real builtin types.  We already 
-            # generate a warning for it.  Big TODO: remove! 
-            return (src_type.module_name == '__builtin__' and 
-                    src_type.name == self.name) 
-        else: 
-            return True 
- 
-    def typeobj_is_available(self): 
-        return True 
- 
-    def attributes_known(self): 
-        return True 
- 
-    def subtype_of(self, type): 
-        return type.is_pyobject and type.assignable_from(self) 
- 
-    def type_check_function(self, exact=True): 
-        type_name = self.name 
-        if type_name == 'str': 
-            type_check = 'PyString_Check' 
-        elif type_name == 'basestring': 
-            type_check = '__Pyx_PyBaseString_Check' 
+
+    def set_scope(self, scope):
+        self.scope = scope
+        if scope:
+            scope.parent_type = self
+
+    def __str__(self):
+        return "%s object" % self.name
+
+    def __repr__(self):
+        return "<%s>"% self.cname
+
+    def default_coerced_ctype(self):
+        if self.name in ('bytes', 'bytearray'):
+            return c_char_ptr_type
+        elif self.name == 'bool':
+            return c_bint_type
+        elif self.name == 'float':
+            return c_double_type
+        return None
+
+    def assignable_from(self, src_type):
+        if isinstance(src_type, BuiltinObjectType):
+            if self.name == 'basestring':
+                return src_type.name in ('str', 'unicode', 'basestring')
+            else:
+                return src_type.name == self.name
+        elif src_type.is_extension_type:
+            # FIXME: This is an ugly special case that we currently
+            # keep supporting.  It allows users to specify builtin
+            # types as external extension types, while keeping them
+            # compatible with the real builtin types.  We already
+            # generate a warning for it.  Big TODO: remove!
+            return (src_type.module_name == '__builtin__' and
+                    src_type.name == self.name)
+        else:
+            return True
+
+    def typeobj_is_available(self):
+        return True
+
+    def attributes_known(self):
+        return True
+
+    def subtype_of(self, type):
+        return type.is_pyobject and type.assignable_from(self)
+
+    def type_check_function(self, exact=True):
+        type_name = self.name
+        if type_name == 'str':
+            type_check = 'PyString_Check'
+        elif type_name == 'basestring':
+            type_check = '__Pyx_PyBaseString_Check'
         elif type_name == 'Exception':
             type_check = '__Pyx_PyException_Check'
-        elif type_name == 'bytearray': 
-            type_check = 'PyByteArray_Check' 
-        elif type_name == 'frozenset': 
-            type_check = 'PyFrozenSet_Check' 
-        else: 
-            type_check = 'Py%s_Check' % type_name.capitalize() 
+        elif type_name == 'bytearray':
+            type_check = 'PyByteArray_Check'
+        elif type_name == 'frozenset':
+            type_check = 'PyFrozenSet_Check'
+        else:
+            type_check = 'Py%s_Check' % type_name.capitalize()
         if exact and type_name not in ('bool', 'slice', 'Exception'):
-            type_check += 'Exact' 
-        return type_check 
- 
-    def isinstance_code(self, arg): 
-        return '%s(%s)' % (self.type_check_function(exact=False), arg) 
- 
-    def type_test_code(self, arg, notnone=False, exact=True): 
-        type_check = self.type_check_function(exact=exact) 
-        check = 'likely(%s(%s))' % (type_check, arg) 
-        if not notnone: 
-            check += '||((%s) == Py_None)' % arg 
-        if self.name == 'basestring': 
-            name = '(PY_MAJOR_VERSION < 3 ? "basestring" : "str")' 
-            space_for_name = 16 
-        else: 
-            name = '"%s"' % self.name 
-            # avoid wasting too much space but limit number of different format strings 
-            space_for_name = (len(self.name) // 16 + 1) * 16 
-        error = '(PyErr_Format(PyExc_TypeError, "Expected %%.%ds, got %%.200s", %s, Py_TYPE(%s)->tp_name), 0)' % ( 
-            space_for_name, name, arg) 
-        return check + '||' + error 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            base_code = self.name 
-        else: 
+            type_check += 'Exact'
+        return type_check
+
+    def isinstance_code(self, arg):
+        return '%s(%s)' % (self.type_check_function(exact=False), arg)
+
+    def type_test_code(self, arg, notnone=False, exact=True):
+        type_check = self.type_check_function(exact=exact)
+        check = 'likely(%s(%s))' % (type_check, arg)
+        if not notnone:
+            check += '||((%s) == Py_None)' % arg
+        if self.name == 'basestring':
+            name = '(PY_MAJOR_VERSION < 3 ? "basestring" : "str")'
+            space_for_name = 16
+        else:
+            name = '"%s"' % self.name
+            # avoid wasting too much space but limit number of different format strings
+            space_for_name = (len(self.name) // 16 + 1) * 16
+        error = '(PyErr_Format(PyExc_TypeError, "Expected %%.%ds, got %%.200s", %s, Py_TYPE(%s)->tp_name), 0)' % (
+            space_for_name, name, arg)
+        return check + '||' + error
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            base_code = self.name
+        else:
             base_code = public_decl(self.decl_type, dll_linkage)
-            entity_code = "*%s" % entity_code 
-        return self.base_declaration_code(base_code, entity_code) 
- 
+            entity_code = "*%s" % entity_code
+        return self.base_declaration_code(base_code, entity_code)
+
     def as_pyobject(self, cname):
         if self.decl_type == 'PyObject':
             return cname
         else:
             return "(PyObject *)" + cname
 
-    def cast_code(self, expr_code, to_object_struct = False): 
-        return "((%s*)%s)" % ( 
+    def cast_code(self, expr_code, to_object_struct = False):
+        return "((%s*)%s)" % (
             to_object_struct and self.objstruct_cname or self.decl_type, # self.objstruct_cname may be None
-            expr_code) 
- 
-    def py_type_name(self): 
-        return self.name 
- 
- 
- 
-class PyExtensionType(PyObjectType): 
-    # 
-    #  A Python extension type. 
-    # 
-    #  name             string 
-    #  scope            CClassScope      Attribute namespace 
-    #  visibility       string 
-    #  typedef_flag     boolean 
-    #  base_type        PyExtensionType or None 
-    #  module_name      string or None   Qualified name of defining module 
-    #  objstruct_cname  string           Name of PyObject struct 
-    #  objtypedef_cname string           Name of PyObject struct typedef 
-    #  typeobj_cname    string or None   C code fragment referring to type object 
-    #  typeptr_cname    string or None   Name of pointer to external type object 
-    #  vtabslot_cname   string           Name of C method table member 
-    #  vtabstruct_cname string           Name of C method table struct 
-    #  vtabptr_cname    string           Name of pointer to C method table 
-    #  vtable_cname     string           Name of C method table definition 
+            expr_code)
+
+    def py_type_name(self):
+        return self.name
+
+
+
+class PyExtensionType(PyObjectType):
+    #
+    #  A Python extension type.
+    #
+    #  name             string
+    #  scope            CClassScope      Attribute namespace
+    #  visibility       string
+    #  typedef_flag     boolean
+    #  base_type        PyExtensionType or None
+    #  module_name      string or None   Qualified name of defining module
+    #  objstruct_cname  string           Name of PyObject struct
+    #  objtypedef_cname string           Name of PyObject struct typedef
+    #  typeobj_cname    string or None   C code fragment referring to type object
+    #  typeptr_cname    string or None   Name of pointer to external type object
+    #  vtabslot_cname   string           Name of C method table member
+    #  vtabstruct_cname string           Name of C method table struct
+    #  vtabptr_cname    string           Name of pointer to C method table
+    #  vtable_cname     string           Name of C method table definition
     #  early_init       boolean          Whether to initialize early (as opposed to during module execution).
-    #  defered_declarations [thunk]      Used to declare class hierarchies in order 
+    #  defered_declarations [thunk]      Used to declare class hierarchies in order
     #  check_size       'warn', 'error', 'ignore'    What to do if tp_basicsize does not match
- 
-    is_extension_type = 1 
-    has_attributes = 1 
+
+    is_extension_type = 1
+    has_attributes = 1
     early_init = 1
- 
-    objtypedef_cname = None 
- 
+
+    objtypedef_cname = None
+
     def __init__(self, name, typedef_flag, base_type, is_external=0, check_size=None):
-        self.name = name 
-        self.scope = None 
-        self.typedef_flag = typedef_flag 
-        if base_type is not None: 
-            base_type.is_subclassed = True 
-        self.base_type = base_type 
-        self.module_name = None 
-        self.objstruct_cname = None 
-        self.typeobj_cname = None 
-        self.typeptr_cname = None 
-        self.vtabslot_cname = None 
-        self.vtabstruct_cname = None 
-        self.vtabptr_cname = None 
-        self.vtable_cname = None 
-        self.is_external = is_external 
+        self.name = name
+        self.scope = None
+        self.typedef_flag = typedef_flag
+        if base_type is not None:
+            base_type.is_subclassed = True
+        self.base_type = base_type
+        self.module_name = None
+        self.objstruct_cname = None
+        self.typeobj_cname = None
+        self.typeptr_cname = None
+        self.vtabslot_cname = None
+        self.vtabstruct_cname = None
+        self.vtabptr_cname = None
+        self.vtable_cname = None
+        self.is_external = is_external
         self.check_size = check_size or 'warn'
-        self.defered_declarations = [] 
- 
-    def set_scope(self, scope): 
-        self.scope = scope 
-        if scope: 
-            scope.parent_type = self 
- 
-    def needs_nonecheck(self): 
-        return True 
- 
-    def subtype_of_resolved_type(self, other_type): 
-        if other_type.is_extension_type or other_type.is_builtin_type: 
-            return self is other_type or ( 
-                self.base_type and self.base_type.subtype_of(other_type)) 
-        else: 
-            return other_type is py_object_type 
- 
-    def typeobj_is_available(self): 
-        # Do we have a pointer to the type object? 
-        return self.typeptr_cname 
- 
-    def typeobj_is_imported(self): 
-        # If we don't know the C name of the type object but we do 
-        # know which module it's defined in, it will be imported. 
-        return self.typeobj_cname is None and self.module_name is not None 
- 
-    def assignable_from(self, src_type): 
-        if self == src_type: 
-            return True 
-        if isinstance(src_type, PyExtensionType): 
-            if src_type.base_type is not None: 
-                return self.assignable_from(src_type.base_type) 
-        if isinstance(src_type, BuiltinObjectType): 
-            # FIXME: This is an ugly special case that we currently 
-            # keep supporting.  It allows users to specify builtin 
-            # types as external extension types, while keeping them 
-            # compatible with the real builtin types.  We already 
-            # generate a warning for it.  Big TODO: remove! 
-            return (self.module_name == '__builtin__' and 
-                    self.name == src_type.name) 
-        return False 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0, deref = 0): 
-        if pyrex or for_display: 
-            base_code = self.name 
-        else: 
-            if self.typedef_flag: 
-                objstruct = self.objstruct_cname 
-            else: 
-                objstruct = "struct %s" % self.objstruct_cname 
-            base_code = public_decl(objstruct, dll_linkage) 
-            if deref: 
-                assert not entity_code 
-            else: 
-                entity_code = "*%s" % entity_code 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def type_test_code(self, py_arg, notnone=False): 
- 
-        none_check = "((%s) == Py_None)" % py_arg 
-        type_check = "likely(__Pyx_TypeTest(%s, %s))" % ( 
-            py_arg, self.typeptr_cname) 
-        if notnone: 
-            return type_check 
-        else: 
-            return "likely(%s || %s)" % (none_check, type_check) 
- 
-    def attributes_known(self): 
-        return self.scope is not None 
- 
-    def __str__(self): 
-        return self.name 
- 
-    def __repr__(self): 
-        return "<PyExtensionType %s%s>" % (self.scope.class_name, 
-            ("", " typedef")[self.typedef_flag]) 
- 
-    def py_type_name(self): 
-        if not self.module_name: 
-            return self.name 
- 
-        return "__import__(%r, None, None, ['']).%s" % (self.module_name, 
-                                                        self.name) 
- 
-class CType(PyrexType): 
-    # 
-    #  Base class for all C types (non-reference-counted). 
-    # 
-    #  to_py_function     string     C function for converting to Python object 
-    #  from_py_function   string     C function for constructing from Python object 
-    # 
- 
-    to_py_function = None 
-    from_py_function = None 
-    exception_value = None 
-    exception_check = 1 
- 
-    def create_to_py_utility_code(self, env): 
-        return self.to_py_function is not None 
- 
-    def create_from_py_utility_code(self, env): 
-        return self.from_py_function is not None 
- 
-    def can_coerce_to_pyobject(self, env): 
-        return self.create_to_py_utility_code(env) 
- 
+        self.defered_declarations = []
+
+    def set_scope(self, scope):
+        self.scope = scope
+        if scope:
+            scope.parent_type = self
+
+    def needs_nonecheck(self):
+        return True
+
+    def subtype_of_resolved_type(self, other_type):
+        if other_type.is_extension_type or other_type.is_builtin_type:
+            return self is other_type or (
+                self.base_type and self.base_type.subtype_of(other_type))
+        else:
+            return other_type is py_object_type
+
+    def typeobj_is_available(self):
+        # Do we have a pointer to the type object?
+        return self.typeptr_cname
+
+    def typeobj_is_imported(self):
+        # If we don't know the C name of the type object but we do
+        # know which module it's defined in, it will be imported.
+        return self.typeobj_cname is None and self.module_name is not None
+
+    def assignable_from(self, src_type):
+        if self == src_type:
+            return True
+        if isinstance(src_type, PyExtensionType):
+            if src_type.base_type is not None:
+                return self.assignable_from(src_type.base_type)
+        if isinstance(src_type, BuiltinObjectType):
+            # FIXME: This is an ugly special case that we currently
+            # keep supporting.  It allows users to specify builtin
+            # types as external extension types, while keeping them
+            # compatible with the real builtin types.  We already
+            # generate a warning for it.  Big TODO: remove!
+            return (self.module_name == '__builtin__' and
+                    self.name == src_type.name)
+        return False
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0, deref = 0):
+        if pyrex or for_display:
+            base_code = self.name
+        else:
+            if self.typedef_flag:
+                objstruct = self.objstruct_cname
+            else:
+                objstruct = "struct %s" % self.objstruct_cname
+            base_code = public_decl(objstruct, dll_linkage)
+            if deref:
+                assert not entity_code
+            else:
+                entity_code = "*%s" % entity_code
+        return self.base_declaration_code(base_code, entity_code)
+
+    def type_test_code(self, py_arg, notnone=False):
+
+        none_check = "((%s) == Py_None)" % py_arg
+        type_check = "likely(__Pyx_TypeTest(%s, %s))" % (
+            py_arg, self.typeptr_cname)
+        if notnone:
+            return type_check
+        else:
+            return "likely(%s || %s)" % (none_check, type_check)
+
+    def attributes_known(self):
+        return self.scope is not None
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        return "<PyExtensionType %s%s>" % (self.scope.class_name,
+            ("", " typedef")[self.typedef_flag])
+
+    def py_type_name(self):
+        if not self.module_name:
+            return self.name
+
+        return "__import__(%r, None, None, ['']).%s" % (self.module_name,
+                                                        self.name)
+
+class CType(PyrexType):
+    #
+    #  Base class for all C types (non-reference-counted).
+    #
+    #  to_py_function     string     C function for converting to Python object
+    #  from_py_function   string     C function for constructing from Python object
+    #
+
+    to_py_function = None
+    from_py_function = None
+    exception_value = None
+    exception_check = 1
+
+    def create_to_py_utility_code(self, env):
+        return self.to_py_function is not None
+
+    def create_from_py_utility_code(self, env):
+        return self.from_py_function is not None
+
+    def can_coerce_to_pyobject(self, env):
+        return self.create_to_py_utility_code(env)
+
     def can_coerce_from_pyobject(self, env):
         return self.create_from_py_utility_code(env)
 
-    def error_condition(self, result_code): 
-        conds = [] 
-        if self.is_string or self.is_pyunicode_ptr: 
-            conds.append("(!%s)" % result_code) 
-        elif self.exception_value is not None: 
-            conds.append("(%s == (%s)%s)" % (result_code, self.sign_and_name(), self.exception_value)) 
-        if self.exception_check: 
-            conds.append("PyErr_Occurred()") 
-        if len(conds) > 0: 
-            return " && ".join(conds) 
-        else: 
-            return 0 
- 
+    def error_condition(self, result_code):
+        conds = []
+        if self.is_string or self.is_pyunicode_ptr:
+            conds.append("(!%s)" % result_code)
+        elif self.exception_value is not None:
+            conds.append("(%s == (%s)%s)" % (result_code, self.sign_and_name(), self.exception_value))
+        if self.exception_check:
+            conds.append("PyErr_Occurred()")
+        if len(conds) > 0:
+            return " && ".join(conds)
+        else:
+            return 0
+
     def to_py_call_code(self, source_code, result_code, result_type, to_py_function=None):
         func = self.to_py_function if to_py_function is None else to_py_function
         assert func
@@ -1508,7 +1508,7 @@ class CType(PyrexType):
             result_code,
             func,
             source_code or 'NULL')
- 
+
     def from_py_call_code(self, source_code, result_code, error_pos, code,
                           from_py_function=None, error_condition=None):
         return self._assign_from_py_code(
@@ -1558,76 +1558,76 @@ class PythranExpr(CType):
         return hash(self.pythran_type)
 
 
-class CConstType(BaseType): 
- 
-    is_const = 1 
- 
-    def __init__(self, const_base_type): 
-        self.const_base_type = const_base_type 
-        if const_base_type.has_attributes and const_base_type.scope is not None: 
-            from . import Symtab 
-            self.scope = Symtab.CConstScope(const_base_type.scope) 
- 
-    def __repr__(self): 
-        return "<CConstType %s>" % repr(self.const_base_type) 
- 
-    def __str__(self): 
-        return self.declaration_code("", for_display=1) 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
+class CConstType(BaseType):
+
+    is_const = 1
+
+    def __init__(self, const_base_type):
+        self.const_base_type = const_base_type
+        if const_base_type.has_attributes and const_base_type.scope is not None:
+            from . import Symtab
+            self.scope = Symtab.CConstScope(const_base_type.scope)
+
+    def __repr__(self):
+        return "<CConstType %s>" % repr(self.const_base_type)
+
+    def __str__(self):
+        return self.declaration_code("", for_display=1)
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
         if for_display or pyrex:
             return "const " + self.const_base_type.declaration_code(entity_code, for_display, dll_linkage, pyrex)
         else:
             return self.const_base_type.declaration_code("const %s" % entity_code, for_display, dll_linkage, pyrex)
- 
-    def specialize(self, values): 
-        base_type = self.const_base_type.specialize(values) 
-        if base_type == self.const_base_type: 
-            return self 
-        else: 
-            return CConstType(base_type) 
- 
-    def deduce_template_params(self, actual): 
-        return self.const_base_type.deduce_template_params(actual) 
- 
+
+    def specialize(self, values):
+        base_type = self.const_base_type.specialize(values)
+        if base_type == self.const_base_type:
+            return self
+        else:
+            return CConstType(base_type)
+
+    def deduce_template_params(self, actual):
+        return self.const_base_type.deduce_template_params(actual)
+
     def can_coerce_to_pyobject(self, env):
         return self.const_base_type.can_coerce_to_pyobject(env)
 
     def can_coerce_from_pyobject(self, env):
         return self.const_base_type.can_coerce_from_pyobject(env)
 
-    def create_to_py_utility_code(self, env): 
-        if self.const_base_type.create_to_py_utility_code(env): 
-            self.to_py_function = self.const_base_type.to_py_function 
-            return True 
- 
+    def create_to_py_utility_code(self, env):
+        if self.const_base_type.create_to_py_utility_code(env):
+            self.to_py_function = self.const_base_type.to_py_function
+            return True
+
     def same_as_resolved_type(self, other_type):
         if other_type.is_const:
             return self.const_base_type.same_as_resolved_type(other_type.const_base_type)
         # Accept const LHS <- non-const RHS.
         return self.const_base_type.same_as_resolved_type(other_type)
 
-    def __getattr__(self, name): 
-        return getattr(self.const_base_type, name) 
- 
- 
-class FusedType(CType): 
-    """ 
-    Represents a Fused Type. All it needs to do is keep track of the types 
-    it aggregates, as it will be replaced with its specific version wherever 
-    needed. 
- 
-    See http://wiki.cython.org/enhancements/fusedtypes 
- 
-    types           [PyrexType]             is the list of types to be fused 
-    name            str                     the name of the ctypedef 
-    """ 
- 
-    is_fused = 1 
-    exception_check = 0 
- 
-    def __init__(self, types, name=None): 
+    def __getattr__(self, name):
+        return getattr(self.const_base_type, name)
+
+
+class FusedType(CType):
+    """
+    Represents a Fused Type. All it needs to do is keep track of the types
+    it aggregates, as it will be replaced with its specific version wherever
+    needed.
+
+    See http://wiki.cython.org/enhancements/fusedtypes
+
+    types           [PyrexType]             is the list of types to be fused
+    name            str                     the name of the ctypedef
+    """
+
+    is_fused = 1
+    exception_check = 0
+
+    def __init__(self, types, name=None):
         # Use list rather than set to preserve order (list should be short).
         flattened_types = []
         for t in types:
@@ -1639,153 +1639,153 @@ class FusedType(CType):
             elif t not in flattened_types:
                 flattened_types.append(t)
         self.types = flattened_types
-        self.name = name 
- 
-    def declaration_code(self, entity_code, for_display = 0, 
-                         dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            return self.name 
- 
-        raise Exception("This may never happen, please report a bug") 
- 
-    def __repr__(self): 
-        return 'FusedType(name=%r)' % self.name 
- 
-    def specialize(self, values): 
-        return values[self] 
- 
-    def get_fused_types(self, result=None, seen=None): 
-        if result is None: 
-            return [self] 
- 
-        if self not in seen: 
-            result.append(self) 
-            seen.add(self) 
- 
- 
-class CVoidType(CType): 
-    # 
-    #   C "void" type 
-    # 
- 
-    is_void = 1 
+        self.name = name
+
+    def declaration_code(self, entity_code, for_display = 0,
+                         dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            return self.name
+
+        raise Exception("This may never happen, please report a bug")
+
+    def __repr__(self):
+        return 'FusedType(name=%r)' % self.name
+
+    def specialize(self, values):
+        return values[self]
+
+    def get_fused_types(self, result=None, seen=None):
+        if result is None:
+            return [self]
+
+        if self not in seen:
+            result.append(self)
+            seen.add(self)
+
+
+class CVoidType(CType):
+    #
+    #   C "void" type
+    #
+
+    is_void = 1
     to_py_function = "__Pyx_void_to_None"
- 
-    def __repr__(self): 
-        return "<CVoidType>" 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            base_code = "void" 
-        else: 
-            base_code = public_decl("void", dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def is_complete(self): 
-        return 0 
- 
-class InvisibleVoidType(CVoidType): 
-    # 
-    #   For use with C++ constructors and destructors return types. 
-    #   Acts like void, but does not print out a declaration. 
-    # 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            base_code = "[void]" 
-        else: 
-            base_code = public_decl("", dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
- 
-class CNumericType(CType): 
-    # 
-    #   Base class for all C numeric types. 
-    # 
-    #   rank      integer     Relative size 
-    #   signed    integer     0 = unsigned, 1 = unspecified, 2 = explicitly signed 
-    # 
- 
-    is_numeric = 1 
-    default_value = "0" 
-    has_attributes = True 
-    scope = None 
- 
-    sign_words = ("unsigned ", "", "signed ") 
- 
-    def __init__(self, rank, signed = 1): 
-        self.rank = rank 
+
+    def __repr__(self):
+        return "<CVoidType>"
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            base_code = "void"
+        else:
+            base_code = public_decl("void", dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+    def is_complete(self):
+        return 0
+
+class InvisibleVoidType(CVoidType):
+    #
+    #   For use with C++ constructors and destructors return types.
+    #   Acts like void, but does not print out a declaration.
+    #
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            base_code = "[void]"
+        else:
+            base_code = public_decl("", dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+
+class CNumericType(CType):
+    #
+    #   Base class for all C numeric types.
+    #
+    #   rank      integer     Relative size
+    #   signed    integer     0 = unsigned, 1 = unspecified, 2 = explicitly signed
+    #
+
+    is_numeric = 1
+    default_value = "0"
+    has_attributes = True
+    scope = None
+
+    sign_words = ("unsigned ", "", "signed ")
+
+    def __init__(self, rank, signed = 1):
+        self.rank = rank
         if rank > 0 and signed == SIGNED:
             # Signed is meaningless for anything but char, and complicates
             # type promotion.
             signed = 1
-        self.signed = signed 
- 
-    def sign_and_name(self): 
-        s = self.sign_words[self.signed] 
-        n = rank_to_type_name[self.rank] 
-        return s + n 
- 
-    def __repr__(self): 
-        return "<CNumericType %s>" % self.sign_and_name() 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        type_name = self.sign_and_name() 
-        if pyrex or for_display: 
-            base_code = type_name.replace('PY_LONG_LONG', 'long long') 
-        else: 
-            base_code = public_decl(type_name, dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def attributes_known(self): 
-        if self.scope is None: 
-            from . import Symtab 
-            self.scope = scope = Symtab.CClassScope( 
-                    '', 
-                    None, 
-                    visibility="extern") 
-            scope.parent_type = self 
-            scope.directives = {} 
-            scope.declare_cfunction( 
-                    "conjugate", 
-                    CFuncType(self, [CFuncTypeArg("self", self, None)], nogil=True), 
-                    pos=None, 
-                    defining=1, 
-                    cname=" ") 
-        return True 
- 
-    def __lt__(self, other): 
-        """Sort based on rank, preferring signed over unsigned""" 
-        if other.is_numeric: 
-            return self.rank > other.rank and self.signed >= other.signed 
- 
-        # Prefer numeric types over others 
-        return True 
- 
-    def py_type_name(self): 
-        if self.rank <= 4: 
-            return "(int, long)" 
-        return "float" 
- 
- 
-class ForbidUseClass: 
-    def __repr__(self): 
-        raise RuntimeError() 
-    def __str__(self): 
-        raise RuntimeError() 
-ForbidUse = ForbidUseClass() 
- 
- 
+        self.signed = signed
+
+    def sign_and_name(self):
+        s = self.sign_words[self.signed]
+        n = rank_to_type_name[self.rank]
+        return s + n
+
+    def __repr__(self):
+        return "<CNumericType %s>" % self.sign_and_name()
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        type_name = self.sign_and_name()
+        if pyrex or for_display:
+            base_code = type_name.replace('PY_LONG_LONG', 'long long')
+        else:
+            base_code = public_decl(type_name, dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+    def attributes_known(self):
+        if self.scope is None:
+            from . import Symtab
+            self.scope = scope = Symtab.CClassScope(
+                    '',
+                    None,
+                    visibility="extern")
+            scope.parent_type = self
+            scope.directives = {}
+            scope.declare_cfunction(
+                    "conjugate",
+                    CFuncType(self, [CFuncTypeArg("self", self, None)], nogil=True),
+                    pos=None,
+                    defining=1,
+                    cname=" ")
+        return True
+
+    def __lt__(self, other):
+        """Sort based on rank, preferring signed over unsigned"""
+        if other.is_numeric:
+            return self.rank > other.rank and self.signed >= other.signed
+
+        # Prefer numeric types over others
+        return True
+
+    def py_type_name(self):
+        if self.rank <= 4:
+            return "(int, long)"
+        return "float"
+
+
+class ForbidUseClass:
+    def __repr__(self):
+        raise RuntimeError()
+    def __str__(self):
+        raise RuntimeError()
+ForbidUse = ForbidUseClass()
+
+
 class CIntLike(object):
     """Mixin for shared behaviour of C integers and enums.
     """
-    to_py_function = None 
-    from_py_function = None 
+    to_py_function = None
+    from_py_function = None
     to_pyunicode_utility = None
     default_format_spec = 'd'
- 
+
     def can_coerce_to_pyobject(self, env):
         return True
 
@@ -1852,117 +1852,117 @@ class CIntLike(object):
         format_type, width, padding_char = self._parse_format(format_spec)
         return "%s(%s, %d, '%s', '%s')" % (utility_code_name, cvalue, width, padding_char, format_type)
 
- 
+
 class CIntType(CIntLike, CNumericType):
- 
+
     is_int = 1
     typedef_flag = 0
     exception_value = -1
 
-    def get_to_py_type_conversion(self): 
-        if self.rank < list(rank_to_type_name).index('int'): 
-            # This assumes sizeof(short) < sizeof(int) 
-            return "PyInt_FromLong" 
-        else: 
-            # Py{Int|Long}_From[Unsigned]Long[Long] 
-            Prefix = "Int" 
-            SignWord = "" 
-            TypeName = "Long" 
-            if not self.signed: 
-                Prefix = "Long" 
-                SignWord = "Unsigned" 
-            if self.rank >= list(rank_to_type_name).index('PY_LONG_LONG'): 
-                Prefix = "Long" 
-                TypeName = "LongLong" 
-            return "Py%s_From%s%s" % (Prefix, SignWord, TypeName) 
- 
-    def assignable_from_resolved_type(self, src_type): 
-        return src_type.is_int or src_type.is_enum or src_type is error_type 
- 
-    def invalid_value(self): 
-        if rank_to_type_name[int(self.rank)] == 'char': 
-            return "'?'" 
-        else: 
-            # We do not really know the size of the type, so return 
-            # a 32-bit literal and rely on casting to final type. It will 
-            # be negative for signed ints, which is good. 
-            return "0xbad0bad0" 
- 
-    def overflow_check_binop(self, binop, env, const_rhs=False): 
-        env.use_utility_code(UtilityCode.load("Common", "Overflow.c")) 
+    def get_to_py_type_conversion(self):
+        if self.rank < list(rank_to_type_name).index('int'):
+            # This assumes sizeof(short) < sizeof(int)
+            return "PyInt_FromLong"
+        else:
+            # Py{Int|Long}_From[Unsigned]Long[Long]
+            Prefix = "Int"
+            SignWord = ""
+            TypeName = "Long"
+            if not self.signed:
+                Prefix = "Long"
+                SignWord = "Unsigned"
+            if self.rank >= list(rank_to_type_name).index('PY_LONG_LONG'):
+                Prefix = "Long"
+                TypeName = "LongLong"
+            return "Py%s_From%s%s" % (Prefix, SignWord, TypeName)
+
+    def assignable_from_resolved_type(self, src_type):
+        return src_type.is_int or src_type.is_enum or src_type is error_type
+
+    def invalid_value(self):
+        if rank_to_type_name[int(self.rank)] == 'char':
+            return "'?'"
+        else:
+            # We do not really know the size of the type, so return
+            # a 32-bit literal and rely on casting to final type. It will
+            # be negative for signed ints, which is good.
+            return "0xbad0bad0"
+
+    def overflow_check_binop(self, binop, env, const_rhs=False):
+        env.use_utility_code(UtilityCode.load("Common", "Overflow.c"))
         type = self.empty_declaration_code()
-        name = self.specialization_name() 
-        if binop == "lshift": 
+        name = self.specialization_name()
+        if binop == "lshift":
             env.use_utility_code(TempitaUtilityCode.load_cached(
-                "LeftShift", "Overflow.c", 
-                context={'TYPE': type, 'NAME': name, 'SIGNED': self.signed})) 
-        else: 
-            if const_rhs: 
-                binop += "_const" 
-            if type in ('int', 'long', 'long long'): 
+                "LeftShift", "Overflow.c",
+                context={'TYPE': type, 'NAME': name, 'SIGNED': self.signed}))
+        else:
+            if const_rhs:
+                binop += "_const"
+            if type in ('int', 'long', 'long long'):
                 env.use_utility_code(TempitaUtilityCode.load_cached(
-                    "BaseCaseSigned", "Overflow.c", 
-                    context={'INT': type, 'NAME': name})) 
-            elif type in ('unsigned int', 'unsigned long', 'unsigned long long'): 
+                    "BaseCaseSigned", "Overflow.c",
+                    context={'INT': type, 'NAME': name}))
+            elif type in ('unsigned int', 'unsigned long', 'unsigned long long'):
                 env.use_utility_code(TempitaUtilityCode.load_cached(
-                    "BaseCaseUnsigned", "Overflow.c", 
-                    context={'UINT': type, 'NAME': name})) 
-            elif self.rank <= 1: 
-                # sizeof(short) < sizeof(int) 
-                return "__Pyx_%s_%s_no_overflow" % (binop, name) 
-            else: 
-                _load_overflow_base(env) 
+                    "BaseCaseUnsigned", "Overflow.c",
+                    context={'UINT': type, 'NAME': name}))
+            elif self.rank <= 1:
+                # sizeof(short) < sizeof(int)
+                return "__Pyx_%s_%s_no_overflow" % (binop, name)
+            else:
+                _load_overflow_base(env)
                 env.use_utility_code(TempitaUtilityCode.load_cached(
-                    "SizeCheck", "Overflow.c", 
-                    context={'TYPE': type, 'NAME': name})) 
+                    "SizeCheck", "Overflow.c",
+                    context={'TYPE': type, 'NAME': name}))
                 env.use_utility_code(TempitaUtilityCode.load_cached(
-                    "Binop", "Overflow.c", 
-                    context={'TYPE': type, 'NAME': name, 'BINOP': binop})) 
-        return "__Pyx_%s_%s_checking_overflow" % (binop, name) 
- 
-
-def _load_overflow_base(env): 
-    env.use_utility_code(UtilityCode.load("Common", "Overflow.c")) 
-    for type in ('int', 'long', 'long long'): 
+                    "Binop", "Overflow.c",
+                    context={'TYPE': type, 'NAME': name, 'BINOP': binop}))
+        return "__Pyx_%s_%s_checking_overflow" % (binop, name)
+
+
+def _load_overflow_base(env):
+    env.use_utility_code(UtilityCode.load("Common", "Overflow.c"))
+    for type in ('int', 'long', 'long long'):
         env.use_utility_code(TempitaUtilityCode.load_cached(
-            "BaseCaseSigned", "Overflow.c", 
-            context={'INT': type, 'NAME': type.replace(' ', '_')})) 
-    for type in ('unsigned int', 'unsigned long', 'unsigned long long'): 
+            "BaseCaseSigned", "Overflow.c",
+            context={'INT': type, 'NAME': type.replace(' ', '_')}))
+    for type in ('unsigned int', 'unsigned long', 'unsigned long long'):
         env.use_utility_code(TempitaUtilityCode.load_cached(
-            "BaseCaseUnsigned", "Overflow.c", 
-            context={'UINT': type, 'NAME': type.replace(' ', '_')})) 
- 
- 
-class CAnonEnumType(CIntType): 
- 
-    is_enum = 1 
- 
-    def sign_and_name(self): 
-        return 'int' 
- 
- 
-class CReturnCodeType(CIntType): 
- 
-    to_py_function = "__Pyx_Owned_Py_None" 
- 
-    is_returncode = True 
-    exception_check = False 
+            "BaseCaseUnsigned", "Overflow.c",
+            context={'UINT': type, 'NAME': type.replace(' ', '_')}))
+
+
+class CAnonEnumType(CIntType):
+
+    is_enum = 1
+
+    def sign_and_name(self):
+        return 'int'
+
+
+class CReturnCodeType(CIntType):
+
+    to_py_function = "__Pyx_Owned_Py_None"
+
+    is_returncode = True
+    exception_check = False
     default_format_spec = ''
- 
+
     def can_coerce_to_pystring(self, env, format_spec=None):
         return not format_spec
- 
+
     def convert_to_pystring(self, cvalue, code, format_spec=None):
         return "__Pyx_NewRef(%s)" % code.globalstate.get_py_string_const(StringEncoding.EncodedString("None")).cname
 
 
-class CBIntType(CIntType): 
- 
-    to_py_function = "__Pyx_PyBool_FromLong" 
-    from_py_function = "__Pyx_PyObject_IsTrue" 
+class CBIntType(CIntType):
+
+    to_py_function = "__Pyx_PyBool_FromLong"
+    from_py_function = "__Pyx_PyObject_IsTrue"
     exception_check = 1  # for C++ bool
     default_format_spec = ''
- 
+
     def can_coerce_to_pystring(self, env, format_spec=None):
         return not format_spec or super(CBIntType, self).can_coerce_to_pystring(env, format_spec)
 
@@ -1980,227 +1980,227 @@ class CBIntType(CIntType):
         code.globalstate.use_utility_code(to_pyunicode_utility)
         return "%s(%s)" % (utility_code_name, cvalue)
 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
         if for_display:
-            base_code = 'bool' 
+            base_code = 'bool'
         elif pyrex:
             base_code = 'bint'
-        else: 
-            base_code = public_decl('int', dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def __repr__(self): 
-        return "<CNumericType bint>" 
- 
-    def __str__(self): 
-        return 'bint' 
- 
-    def py_type_name(self): 
-        return "bool" 
- 
- 
-class CPyUCS4IntType(CIntType): 
-    # Py_UCS4 
- 
-    is_unicode_char = True 
- 
-    # Py_UCS4 coerces from and to single character unicode strings (or 
-    # at most two characters on 16bit Unicode builds), but we also 
-    # allow Python integers as input.  The value range for Py_UCS4 
-    # is 0..1114111, which is checked when converting from an integer 
-    # value. 
- 
-    to_py_function = "PyUnicode_FromOrdinal" 
-    from_py_function = "__Pyx_PyObject_AsPy_UCS4" 
- 
+        else:
+            base_code = public_decl('int', dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+    def __repr__(self):
+        return "<CNumericType bint>"
+
+    def __str__(self):
+        return 'bint'
+
+    def py_type_name(self):
+        return "bool"
+
+
+class CPyUCS4IntType(CIntType):
+    # Py_UCS4
+
+    is_unicode_char = True
+
+    # Py_UCS4 coerces from and to single character unicode strings (or
+    # at most two characters on 16bit Unicode builds), but we also
+    # allow Python integers as input.  The value range for Py_UCS4
+    # is 0..1114111, which is checked when converting from an integer
+    # value.
+
+    to_py_function = "PyUnicode_FromOrdinal"
+    from_py_function = "__Pyx_PyObject_AsPy_UCS4"
+
     def can_coerce_to_pystring(self, env, format_spec=None):
         return False  # does the right thing anyway
 
-    def create_from_py_utility_code(self, env): 
-        env.use_utility_code(UtilityCode.load_cached("ObjectAsUCS4", "TypeConversion.c")) 
-        return True 
- 
-    def sign_and_name(self): 
-        return "Py_UCS4" 
- 
- 
-class CPyUnicodeIntType(CIntType): 
-    # Py_UNICODE 
- 
-    is_unicode_char = True 
- 
-    # Py_UNICODE coerces from and to single character unicode strings, 
-    # but we also allow Python integers as input.  The value range for 
-    # Py_UNICODE is 0..1114111, which is checked when converting from 
-    # an integer value. 
- 
-    to_py_function = "PyUnicode_FromOrdinal" 
-    from_py_function = "__Pyx_PyObject_AsPy_UNICODE" 
- 
+    def create_from_py_utility_code(self, env):
+        env.use_utility_code(UtilityCode.load_cached("ObjectAsUCS4", "TypeConversion.c"))
+        return True
+
+    def sign_and_name(self):
+        return "Py_UCS4"
+
+
+class CPyUnicodeIntType(CIntType):
+    # Py_UNICODE
+
+    is_unicode_char = True
+
+    # Py_UNICODE coerces from and to single character unicode strings,
+    # but we also allow Python integers as input.  The value range for
+    # Py_UNICODE is 0..1114111, which is checked when converting from
+    # an integer value.
+
+    to_py_function = "PyUnicode_FromOrdinal"
+    from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
+
     def can_coerce_to_pystring(self, env, format_spec=None):
         return False  # does the right thing anyway
 
-    def create_from_py_utility_code(self, env): 
-        env.use_utility_code(UtilityCode.load_cached("ObjectAsPyUnicode", "TypeConversion.c")) 
-        return True 
- 
-    def sign_and_name(self): 
-        return "Py_UNICODE" 
- 
- 
-class CPyHashTType(CIntType): 
- 
-    to_py_function = "__Pyx_PyInt_FromHash_t" 
-    from_py_function = "__Pyx_PyInt_AsHash_t" 
- 
-    def sign_and_name(self): 
-        return "Py_hash_t" 
- 
-class CPySSizeTType(CIntType): 
- 
-    to_py_function = "PyInt_FromSsize_t" 
-    from_py_function = "__Pyx_PyIndex_AsSsize_t" 
- 
-    def sign_and_name(self): 
-        return "Py_ssize_t" 
- 
-class CSSizeTType(CIntType): 
- 
-    to_py_function = "PyInt_FromSsize_t" 
-    from_py_function = "PyInt_AsSsize_t" 
- 
-    def sign_and_name(self): 
-        return "Py_ssize_t" 
- 
-class CSizeTType(CIntType): 
- 
-    to_py_function = "__Pyx_PyInt_FromSize_t" 
- 
-    def sign_and_name(self): 
-        return "size_t" 
- 
-class CPtrdiffTType(CIntType): 
- 
-    def sign_and_name(self): 
-        return "ptrdiff_t" 
- 
- 
-class CFloatType(CNumericType): 
- 
-    is_float = 1 
-    to_py_function = "PyFloat_FromDouble" 
-    from_py_function = "__pyx_PyFloat_AsDouble" 
- 
-    exception_value = -1 
- 
-    def __init__(self, rank, math_h_modifier = ''): 
-        CNumericType.__init__(self, rank, 1) 
-        self.math_h_modifier = math_h_modifier 
-        if rank == RANK_FLOAT: 
-            self.from_py_function = "__pyx_PyFloat_AsFloat" 
- 
-    def assignable_from_resolved_type(self, src_type): 
-        return (src_type.is_numeric and not src_type.is_complex) or src_type is error_type 
- 
-    def invalid_value(self): 
-        return Naming.PYX_NAN 
- 
-class CComplexType(CNumericType): 
- 
-    is_complex = 1 
-    to_py_function = "__pyx_PyComplex_FromComplex" 
-    has_attributes = 1 
-    scope = None 
- 
-    def __init__(self, real_type): 
-        while real_type.is_typedef and not real_type.typedef_is_external: 
-            real_type = real_type.typedef_base_type 
+    def create_from_py_utility_code(self, env):
+        env.use_utility_code(UtilityCode.load_cached("ObjectAsPyUnicode", "TypeConversion.c"))
+        return True
+
+    def sign_and_name(self):
+        return "Py_UNICODE"
+
+
+class CPyHashTType(CIntType):
+
+    to_py_function = "__Pyx_PyInt_FromHash_t"
+    from_py_function = "__Pyx_PyInt_AsHash_t"
+
+    def sign_and_name(self):
+        return "Py_hash_t"
+
+class CPySSizeTType(CIntType):
+
+    to_py_function = "PyInt_FromSsize_t"
+    from_py_function = "__Pyx_PyIndex_AsSsize_t"
+
+    def sign_and_name(self):
+        return "Py_ssize_t"
+
+class CSSizeTType(CIntType):
+
+    to_py_function = "PyInt_FromSsize_t"
+    from_py_function = "PyInt_AsSsize_t"
+
+    def sign_and_name(self):
+        return "Py_ssize_t"
+
+class CSizeTType(CIntType):
+
+    to_py_function = "__Pyx_PyInt_FromSize_t"
+
+    def sign_and_name(self):
+        return "size_t"
+
+class CPtrdiffTType(CIntType):
+
+    def sign_and_name(self):
+        return "ptrdiff_t"
+
+
+class CFloatType(CNumericType):
+
+    is_float = 1
+    to_py_function = "PyFloat_FromDouble"
+    from_py_function = "__pyx_PyFloat_AsDouble"
+
+    exception_value = -1
+
+    def __init__(self, rank, math_h_modifier = ''):
+        CNumericType.__init__(self, rank, 1)
+        self.math_h_modifier = math_h_modifier
+        if rank == RANK_FLOAT:
+            self.from_py_function = "__pyx_PyFloat_AsFloat"
+
+    def assignable_from_resolved_type(self, src_type):
+        return (src_type.is_numeric and not src_type.is_complex) or src_type is error_type
+
+    def invalid_value(self):
+        return Naming.PYX_NAN
+
+class CComplexType(CNumericType):
+
+    is_complex = 1
+    to_py_function = "__pyx_PyComplex_FromComplex"
+    has_attributes = 1
+    scope = None
+
+    def __init__(self, real_type):
+        while real_type.is_typedef and not real_type.typedef_is_external:
+            real_type = real_type.typedef_base_type
         self.funcsuffix = "_%s" % real_type.specialization_name()
         if real_type.is_float:
             self.math_h_modifier = real_type.math_h_modifier
-        else: 
+        else:
             self.math_h_modifier = "_UNUSED"
- 
-        self.real_type = real_type 
-        CNumericType.__init__(self, real_type.rank + 0.5, real_type.signed) 
-        self.binops = {} 
-        self.from_parts = "%s_from_parts" % self.specialization_name() 
-        self.default_value = "%s(0, 0)" % self.from_parts 
- 
-    def __eq__(self, other): 
-        if isinstance(self, CComplexType) and isinstance(other, CComplexType): 
-            return self.real_type == other.real_type 
-        else: 
-            return False 
- 
-    def __ne__(self, other): 
-        if isinstance(self, CComplexType) and isinstance(other, CComplexType): 
-            return self.real_type != other.real_type 
-        else: 
-            return True 
- 
-    def __lt__(self, other): 
-        if isinstance(self, CComplexType) and isinstance(other, CComplexType): 
-            return self.real_type < other.real_type 
-        else: 
-            # this is arbitrary, but it makes sure we always have 
-            # *some* kind of order 
-            return False 
- 
-    def __hash__(self): 
-        return ~hash(self.real_type) 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            real_code = self.real_type.declaration_code("", for_display, dll_linkage, pyrex) 
-            base_code = "%s complex" % real_code 
-        else: 
-            base_code = public_decl(self.sign_and_name(), dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def sign_and_name(self): 
-        real_type_name = self.real_type.specialization_name() 
-        real_type_name = real_type_name.replace('long__double','long_double') 
-        real_type_name = real_type_name.replace('PY_LONG_LONG','long_long') 
-        return Naming.type_prefix + real_type_name + "_complex" 
- 
-    def assignable_from(self, src_type): 
-        # Temporary hack/feature disabling, see #441 
-        if (not src_type.is_complex and src_type.is_numeric and src_type.is_typedef 
-            and src_type.typedef_is_external): 
-             return False 
+
+        self.real_type = real_type
+        CNumericType.__init__(self, real_type.rank + 0.5, real_type.signed)
+        self.binops = {}
+        self.from_parts = "%s_from_parts" % self.specialization_name()
+        self.default_value = "%s(0, 0)" % self.from_parts
+
+    def __eq__(self, other):
+        if isinstance(self, CComplexType) and isinstance(other, CComplexType):
+            return self.real_type == other.real_type
+        else:
+            return False
+
+    def __ne__(self, other):
+        if isinstance(self, CComplexType) and isinstance(other, CComplexType):
+            return self.real_type != other.real_type
+        else:
+            return True
+
+    def __lt__(self, other):
+        if isinstance(self, CComplexType) and isinstance(other, CComplexType):
+            return self.real_type < other.real_type
+        else:
+            # this is arbitrary, but it makes sure we always have
+            # *some* kind of order
+            return False
+
+    def __hash__(self):
+        return ~hash(self.real_type)
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            real_code = self.real_type.declaration_code("", for_display, dll_linkage, pyrex)
+            base_code = "%s complex" % real_code
+        else:
+            base_code = public_decl(self.sign_and_name(), dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+    def sign_and_name(self):
+        real_type_name = self.real_type.specialization_name()
+        real_type_name = real_type_name.replace('long__double','long_double')
+        real_type_name = real_type_name.replace('PY_LONG_LONG','long_long')
+        return Naming.type_prefix + real_type_name + "_complex"
+
+    def assignable_from(self, src_type):
+        # Temporary hack/feature disabling, see #441
+        if (not src_type.is_complex and src_type.is_numeric and src_type.is_typedef
+            and src_type.typedef_is_external):
+             return False
         elif src_type.is_pyobject:
             return True
-        else: 
-            return super(CComplexType, self).assignable_from(src_type) 
- 
-    def assignable_from_resolved_type(self, src_type): 
-        return (src_type.is_complex and self.real_type.assignable_from_resolved_type(src_type.real_type) 
-                    or src_type.is_numeric and self.real_type.assignable_from_resolved_type(src_type) 
-                    or src_type is error_type) 
- 
-    def attributes_known(self): 
-        if self.scope is None: 
-            from . import Symtab 
-            self.scope = scope = Symtab.CClassScope( 
-                    '', 
-                    None, 
-                    visibility="extern") 
-            scope.parent_type = self 
-            scope.directives = {} 
-            scope.declare_var("real", self.real_type, None, cname="real", is_cdef=True) 
-            scope.declare_var("imag", self.real_type, None, cname="imag", is_cdef=True) 
-            scope.declare_cfunction( 
-                    "conjugate", 
-                    CFuncType(self, [CFuncTypeArg("self", self, None)], nogil=True), 
-                    pos=None, 
-                    defining=1, 
-                    cname="__Pyx_c_conj%s" % self.funcsuffix) 
- 
-        return True 
- 
+        else:
+            return super(CComplexType, self).assignable_from(src_type)
+
+    def assignable_from_resolved_type(self, src_type):
+        return (src_type.is_complex and self.real_type.assignable_from_resolved_type(src_type.real_type)
+                    or src_type.is_numeric and self.real_type.assignable_from_resolved_type(src_type)
+                    or src_type is error_type)
+
+    def attributes_known(self):
+        if self.scope is None:
+            from . import Symtab
+            self.scope = scope = Symtab.CClassScope(
+                    '',
+                    None,
+                    visibility="extern")
+            scope.parent_type = self
+            scope.directives = {}
+            scope.declare_var("real", self.real_type, None, cname="real", is_cdef=True)
+            scope.declare_var("imag", self.real_type, None, cname="imag", is_cdef=True)
+            scope.declare_cfunction(
+                    "conjugate",
+                    CFuncType(self, [CFuncTypeArg("self", self, None)], nogil=True),
+                    pos=None,
+                    defining=1,
+                    cname="__Pyx_c_conj%s" % self.funcsuffix)
+
+        return True
+
     def _utility_code_context(self):
         return {
             'type': self.empty_declaration_code(),
@@ -2211,69 +2211,69 @@ class CComplexType(CNumericType):
             'is_float': int(self.real_type.is_float)
         }
 
-    def create_declaration_utility_code(self, env): 
-        # This must always be run, because a single CComplexType instance can be shared 
-        # across multiple compilations (the one created in the module scope) 
+    def create_declaration_utility_code(self, env):
+        # This must always be run, because a single CComplexType instance can be shared
+        # across multiple compilations (the one created in the module scope)
         env.use_utility_code(UtilityCode.load_cached('Header', 'Complex.c'))
         env.use_utility_code(UtilityCode.load_cached('RealImag', 'Complex.c'))
         env.use_utility_code(TempitaUtilityCode.load_cached(
             'Declarations', 'Complex.c', self._utility_code_context()))
         env.use_utility_code(TempitaUtilityCode.load_cached(
             'Arithmetic', 'Complex.c', self._utility_code_context()))
-        return True 
- 
+        return True
+
     def can_coerce_to_pyobject(self, env):
         return True
 
     def can_coerce_from_pyobject(self, env):
         return True
 
-    def create_to_py_utility_code(self, env): 
+    def create_to_py_utility_code(self, env):
         env.use_utility_code(UtilityCode.load_cached('ToPy', 'Complex.c'))
-        return True 
- 
-    def create_from_py_utility_code(self, env): 
+        return True
+
+    def create_from_py_utility_code(self, env):
         env.use_utility_code(TempitaUtilityCode.load_cached(
             'FromPy', 'Complex.c', self._utility_code_context()))
-        self.from_py_function = "__Pyx_PyComplex_As_" + self.specialization_name() 
-        return True 
- 
-    def lookup_op(self, nargs, op): 
-        try: 
-            return self.binops[nargs, op] 
-        except KeyError: 
-            pass 
-        try: 
-            op_name = complex_ops[nargs, op] 
-            self.binops[nargs, op] = func_name = "__Pyx_c_%s%s" % (op_name, self.funcsuffix) 
-            return func_name 
-        except KeyError: 
-            return None 
- 
-    def unary_op(self, op): 
-        return self.lookup_op(1, op) 
- 
-    def binary_op(self, op): 
-        return self.lookup_op(2, op) 
- 
-    def py_type_name(self): 
-        return "complex" 
- 
-    def cast_code(self, expr_code): 
-        return expr_code 
- 
-complex_ops = { 
-    (1, '-'): 'neg', 
-    (1, 'zero'): 'is_zero', 
-    (2, '+'): 'sum', 
-    (2, '-'): 'diff', 
-    (2, '*'): 'prod', 
-    (2, '/'): 'quot', 
+        self.from_py_function = "__Pyx_PyComplex_As_" + self.specialization_name()
+        return True
+
+    def lookup_op(self, nargs, op):
+        try:
+            return self.binops[nargs, op]
+        except KeyError:
+            pass
+        try:
+            op_name = complex_ops[nargs, op]
+            self.binops[nargs, op] = func_name = "__Pyx_c_%s%s" % (op_name, self.funcsuffix)
+            return func_name
+        except KeyError:
+            return None
+
+    def unary_op(self, op):
+        return self.lookup_op(1, op)
+
+    def binary_op(self, op):
+        return self.lookup_op(2, op)
+
+    def py_type_name(self):
+        return "complex"
+
+    def cast_code(self, expr_code):
+        return expr_code
+
+complex_ops = {
+    (1, '-'): 'neg',
+    (1, 'zero'): 'is_zero',
+    (2, '+'): 'sum',
+    (2, '-'): 'diff',
+    (2, '*'): 'prod',
+    (2, '/'): 'quot',
     (2, '**'): 'pow',
-    (2, '=='): 'eq', 
-} 
- 
- 
+    (2, '=='): 'eq',
+}
+
+
 class CPyTSSTType(CType):
     #
     #   PEP-539 "Py_tss_t" type
@@ -2293,133 +2293,133 @@ class CPyTSSTType(CType):
         return self.base_declaration_code(base_code, entity_code)
 
 
-class CPointerBaseType(CType): 
-    # common base type for pointer/array types 
-    # 
-    #  base_type     CType              Reference type 
- 
-    subtypes = ['base_type'] 
- 
-    def __init__(self, base_type): 
-        self.base_type = base_type 
+class CPointerBaseType(CType):
+    # common base type for pointer/array types
+    #
+    #  base_type     CType              Reference type
+
+    subtypes = ['base_type']
+
+    def __init__(self, base_type):
+        self.base_type = base_type
         if base_type.is_const:
             base_type = base_type.const_base_type
-        for char_type in (c_char_type, c_uchar_type, c_schar_type): 
-            if base_type.same_as(char_type): 
-                self.is_string = 1 
-                break 
-        else: 
-            if base_type.same_as(c_py_unicode_type): 
-                self.is_pyunicode_ptr = 1 
- 
-        if self.is_string and not base_type.is_error: 
+        for char_type in (c_char_type, c_uchar_type, c_schar_type):
+            if base_type.same_as(char_type):
+                self.is_string = 1
+                break
+        else:
+            if base_type.same_as(c_py_unicode_type):
+                self.is_pyunicode_ptr = 1
+
+        if self.is_string and not base_type.is_error:
             if base_type.signed == 2:
                 self.to_py_function = "__Pyx_PyObject_FromCString"
                 if self.is_ptr:
                     self.from_py_function = "__Pyx_PyObject_As%sSString"
             elif base_type.signed:
-                self.to_py_function = "__Pyx_PyObject_FromString" 
-                if self.is_ptr: 
+                self.to_py_function = "__Pyx_PyObject_FromString"
+                if self.is_ptr:
                     self.from_py_function = "__Pyx_PyObject_As%sString"
-            else: 
+            else:
                 self.to_py_function = "__Pyx_PyObject_FromCString"
-                if self.is_ptr: 
+                if self.is_ptr:
                     self.from_py_function = "__Pyx_PyObject_As%sUString"
             if self.is_ptr:
                 self.from_py_function %= '' if self.base_type.is_const else 'Writable'
-            self.exception_value = "NULL" 
-        elif self.is_pyunicode_ptr and not base_type.is_error: 
-            self.to_py_function = "__Pyx_PyUnicode_FromUnicode" 
-            if self.is_ptr: 
-                self.from_py_function = "__Pyx_PyUnicode_AsUnicode" 
-            self.exception_value = "NULL" 
- 
-    def py_type_name(self): 
-        if self.is_string: 
-            return "bytes" 
-        elif self.is_pyunicode_ptr: 
-            return "unicode" 
-        else: 
-            return super(CPointerBaseType, self).py_type_name() 
- 
-    def literal_code(self, value): 
-        if self.is_string: 
-            assert isinstance(value, str) 
-            return '"%s"' % StringEncoding.escape_byte_string(value) 
- 
- 
-class CArrayType(CPointerBaseType): 
-    #  base_type     CType              Element type 
-    #  size          integer or None    Number of elements 
- 
-    is_array = 1 
+            self.exception_value = "NULL"
+        elif self.is_pyunicode_ptr and not base_type.is_error:
+            self.to_py_function = "__Pyx_PyUnicode_FromUnicode"
+            if self.is_ptr:
+                self.from_py_function = "__Pyx_PyUnicode_AsUnicode"
+            self.exception_value = "NULL"
+
+    def py_type_name(self):
+        if self.is_string:
+            return "bytes"
+        elif self.is_pyunicode_ptr:
+            return "unicode"
+        else:
+            return super(CPointerBaseType, self).py_type_name()
+
+    def literal_code(self, value):
+        if self.is_string:
+            assert isinstance(value, str)
+            return '"%s"' % StringEncoding.escape_byte_string(value)
+
+
+class CArrayType(CPointerBaseType):
+    #  base_type     CType              Element type
+    #  size          integer or None    Number of elements
+
+    is_array = 1
     to_tuple_function = None
- 
-    def __init__(self, base_type, size): 
-        super(CArrayType, self).__init__(base_type) 
-        self.size = size 
- 
-    def __eq__(self, other): 
-        if isinstance(other, CType) and other.is_array and self.size == other.size: 
-            return self.base_type.same_as(other.base_type) 
-        return False 
- 
-    def __hash__(self): 
-        return hash(self.base_type) + 28 # arbitrarily chosen offset 
- 
-    def __repr__(self): 
-        return "<CArrayType %s %s>" % (self.size, repr(self.base_type)) 
- 
-    def same_as_resolved_type(self, other_type): 
-        return ((other_type.is_array and 
-            self.base_type.same_as(other_type.base_type)) 
-                or other_type is error_type) 
- 
-    def assignable_from_resolved_type(self, src_type): 
+
+    def __init__(self, base_type, size):
+        super(CArrayType, self).__init__(base_type)
+        self.size = size
+
+    def __eq__(self, other):
+        if isinstance(other, CType) and other.is_array and self.size == other.size:
+            return self.base_type.same_as(other.base_type)
+        return False
+
+    def __hash__(self):
+        return hash(self.base_type) + 28 # arbitrarily chosen offset
+
+    def __repr__(self):
+        return "<CArrayType %s %s>" % (self.size, repr(self.base_type))
+
+    def same_as_resolved_type(self, other_type):
+        return ((other_type.is_array and
+            self.base_type.same_as(other_type.base_type))
+                or other_type is error_type)
+
+    def assignable_from_resolved_type(self, src_type):
         # C arrays are assigned by value, either Python containers or C arrays/pointers
         if src_type.is_pyobject:
             return True
         if src_type.is_ptr or src_type.is_array:
             return self.base_type.assignable_from(src_type.base_type)
         return False
- 
-    def element_ptr_type(self): 
-        return c_ptr_type(self.base_type) 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if self.size is not None: 
-            dimension_code = self.size 
-        else: 
-            dimension_code = "" 
-        if entity_code.startswith("*"): 
-            entity_code = "(%s)" % entity_code 
-        return self.base_type.declaration_code( 
-            "%s[%s]" % (entity_code, dimension_code), 
-            for_display, dll_linkage, pyrex) 
- 
-    def as_argument_type(self): 
-        return c_ptr_type(self.base_type) 
- 
-    def is_complete(self): 
-        return self.size is not None 
- 
-    def specialize(self, values): 
-        base_type = self.base_type.specialize(values) 
-        if base_type == self.base_type: 
-            return self 
-        else: 
+
+    def element_ptr_type(self):
+        return c_ptr_type(self.base_type)
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if self.size is not None:
+            dimension_code = self.size
+        else:
+            dimension_code = ""
+        if entity_code.startswith("*"):
+            entity_code = "(%s)" % entity_code
+        return self.base_type.declaration_code(
+            "%s[%s]" % (entity_code, dimension_code),
+            for_display, dll_linkage, pyrex)
+
+    def as_argument_type(self):
+        return c_ptr_type(self.base_type)
+
+    def is_complete(self):
+        return self.size is not None
+
+    def specialize(self, values):
+        base_type = self.base_type.specialize(values)
+        if base_type == self.base_type:
+            return self
+        else:
             return CArrayType(base_type, self.size)
- 
-    def deduce_template_params(self, actual): 
-        if isinstance(actual, CArrayType): 
-            return self.base_type.deduce_template_params(actual.base_type) 
-        else: 
+
+    def deduce_template_params(self, actual):
+        if isinstance(actual, CArrayType):
+            return self.base_type.deduce_template_params(actual.base_type)
+        else:
             return {}
- 
+
     def can_coerce_to_pyobject(self, env):
         return self.base_type.can_coerce_to_pyobject(env)
- 
+
     def can_coerce_from_pyobject(self, env):
         return self.base_type.can_coerce_from_pyobject(env)
 
@@ -2490,121 +2490,121 @@ class CArrayType(CPointerBaseType):
         return code.error_goto_if_neg(call_code, error_pos)
 
 
-class CPtrType(CPointerBaseType): 
-    #  base_type     CType              Reference type 
- 
-    is_ptr = 1 
-    default_value = "0" 
- 
-    def __hash__(self): 
-        return hash(self.base_type) + 27 # arbitrarily chosen offset 
- 
-    def __eq__(self, other): 
-        if isinstance(other, CType) and other.is_ptr: 
-            return self.base_type.same_as(other.base_type) 
-        return False 
- 
-    def __ne__(self, other): 
-        return not (self == other) 
- 
-    def __repr__(self): 
-        return "<CPtrType %s>" % repr(self.base_type) 
- 
-    def same_as_resolved_type(self, other_type): 
-        return ((other_type.is_ptr and 
-            self.base_type.same_as(other_type.base_type)) 
-                or other_type is error_type) 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        #print "CPtrType.declaration_code: pointer to", self.base_type ### 
-        return self.base_type.declaration_code( 
-            "*%s" % entity_code, 
-            for_display, dll_linkage, pyrex) 
- 
-    def assignable_from_resolved_type(self, other_type): 
-        if other_type is error_type: 
-            return 1 
-        if other_type.is_null_ptr: 
-            return 1 
-        if self.base_type.is_const: 
-            self = CPtrType(self.base_type.const_base_type) 
-        if self.base_type.is_cfunction: 
-            if other_type.is_ptr: 
-                other_type = other_type.base_type.resolve() 
-            if other_type.is_cfunction: 
-                return self.base_type.pointer_assignable_from_resolved_type(other_type) 
-            else: 
-                return 0 
-        if (self.base_type.is_cpp_class and other_type.is_ptr 
-                and other_type.base_type.is_cpp_class and other_type.base_type.is_subclass(self.base_type)): 
-            return 1 
-        if other_type.is_array or other_type.is_ptr: 
-            return self.base_type.is_void or self.base_type.same_as(other_type.base_type) 
-        return 0 
- 
-    def specialize(self, values): 
-        base_type = self.base_type.specialize(values) 
-        if base_type == self.base_type: 
-            return self 
-        else: 
-            return CPtrType(base_type) 
- 
-    def deduce_template_params(self, actual): 
-        if isinstance(actual, CPtrType): 
-            return self.base_type.deduce_template_params(actual.base_type) 
-        else: 
+class CPtrType(CPointerBaseType):
+    #  base_type     CType              Reference type
+
+    is_ptr = 1
+    default_value = "0"
+
+    def __hash__(self):
+        return hash(self.base_type) + 27 # arbitrarily chosen offset
+
+    def __eq__(self, other):
+        if isinstance(other, CType) and other.is_ptr:
+            return self.base_type.same_as(other.base_type)
+        return False
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __repr__(self):
+        return "<CPtrType %s>" % repr(self.base_type)
+
+    def same_as_resolved_type(self, other_type):
+        return ((other_type.is_ptr and
+            self.base_type.same_as(other_type.base_type))
+                or other_type is error_type)
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        #print "CPtrType.declaration_code: pointer to", self.base_type ###
+        return self.base_type.declaration_code(
+            "*%s" % entity_code,
+            for_display, dll_linkage, pyrex)
+
+    def assignable_from_resolved_type(self, other_type):
+        if other_type is error_type:
+            return 1
+        if other_type.is_null_ptr:
+            return 1
+        if self.base_type.is_const:
+            self = CPtrType(self.base_type.const_base_type)
+        if self.base_type.is_cfunction:
+            if other_type.is_ptr:
+                other_type = other_type.base_type.resolve()
+            if other_type.is_cfunction:
+                return self.base_type.pointer_assignable_from_resolved_type(other_type)
+            else:
+                return 0
+        if (self.base_type.is_cpp_class and other_type.is_ptr
+                and other_type.base_type.is_cpp_class and other_type.base_type.is_subclass(self.base_type)):
+            return 1
+        if other_type.is_array or other_type.is_ptr:
+            return self.base_type.is_void or self.base_type.same_as(other_type.base_type)
+        return 0
+
+    def specialize(self, values):
+        base_type = self.base_type.specialize(values)
+        if base_type == self.base_type:
+            return self
+        else:
+            return CPtrType(base_type)
+
+    def deduce_template_params(self, actual):
+        if isinstance(actual, CPtrType):
+            return self.base_type.deduce_template_params(actual.base_type)
+        else:
             return {}
- 
-    def invalid_value(self): 
-        return "1" 
- 
-    def find_cpp_operation_type(self, operator, operand_type=None): 
-        if self.base_type.is_cpp_class: 
-            return self.base_type.find_cpp_operation_type(operator, operand_type) 
-        return None 
- 
-
-class CNullPtrType(CPtrType): 
- 
-    is_null_ptr = 1 
- 
- 
-class CReferenceType(BaseType): 
- 
-    is_reference = 1 
+
+    def invalid_value(self):
+        return "1"
+
+    def find_cpp_operation_type(self, operator, operand_type=None):
+        if self.base_type.is_cpp_class:
+            return self.base_type.find_cpp_operation_type(operator, operand_type)
+        return None
+
+
+class CNullPtrType(CPtrType):
+
+    is_null_ptr = 1
+
+
+class CReferenceType(BaseType):
+
+    is_reference = 1
     is_fake_reference = 0
- 
-    def __init__(self, base_type): 
-        self.ref_base_type = base_type 
- 
-    def __repr__(self): 
-        return "<CReferenceType %s>" % repr(self.ref_base_type) 
- 
-    def __str__(self): 
-        return "%s &" % self.ref_base_type 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        #print "CReferenceType.declaration_code: pointer to", self.base_type ### 
-        return self.ref_base_type.declaration_code( 
-            "&%s" % entity_code, 
-            for_display, dll_linkage, pyrex) 
- 
-    def specialize(self, values): 
-        base_type = self.ref_base_type.specialize(values) 
-        if base_type == self.ref_base_type: 
-            return self 
-        else: 
+
+    def __init__(self, base_type):
+        self.ref_base_type = base_type
+
+    def __repr__(self):
+        return "<CReferenceType %s>" % repr(self.ref_base_type)
+
+    def __str__(self):
+        return "%s &" % self.ref_base_type
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        #print "CReferenceType.declaration_code: pointer to", self.base_type ###
+        return self.ref_base_type.declaration_code(
+            "&%s" % entity_code,
+            for_display, dll_linkage, pyrex)
+
+    def specialize(self, values):
+        base_type = self.ref_base_type.specialize(values)
+        if base_type == self.ref_base_type:
+            return self
+        else:
             return type(self)(base_type)
- 
-    def deduce_template_params(self, actual): 
-        return self.ref_base_type.deduce_template_params(actual) 
- 
-    def __getattr__(self, name): 
-        return getattr(self.ref_base_type, name) 
- 
- 
+
+    def deduce_template_params(self, actual):
+        return self.ref_base_type.deduce_template_params(actual)
+
+    def __getattr__(self, name):
+        return getattr(self.ref_base_type, name)
+
+
 class CFakeReferenceType(CReferenceType):
 
     is_fake_reference = 1
@@ -2621,68 +2621,68 @@ class CFakeReferenceType(CReferenceType):
         return "__Pyx_FakeReference<%s> %s" % (self.ref_base_type.empty_declaration_code(), entity_code)
 
 
-class CFuncType(CType): 
-    #  return_type      CType 
-    #  args             [CFuncTypeArg] 
-    #  has_varargs      boolean 
-    #  exception_value  string 
-    #  exception_check  boolean    True if PyErr_Occurred check needed 
-    #  calling_convention  string  Function calling convention 
-    #  nogil            boolean    Can be called without gil 
-    #  with_gil         boolean    Acquire gil around function body 
-    #  templates        [string] or None 
-    #  cached_specialized_types [CFuncType]   cached specialized versions of the CFuncType if defined in a pxd 
-    #  from_fused       boolean    Indicates whether this is a specialized 
-    #                              C function 
-    #  is_strict_signature boolean  function refuses to accept coerced arguments 
-    #                               (used for optimisation overrides) 
-    #  is_const_method  boolean 
-    #  is_static_method boolean 
- 
-    is_cfunction = 1 
-    original_sig = None 
-    cached_specialized_types = None 
-    from_fused = False 
-    is_const_method = False 
- 
-    subtypes = ['return_type', 'args'] 
- 
-    def __init__(self, return_type, args, has_varargs = 0, 
-            exception_value = None, exception_check = 0, calling_convention = "", 
-            nogil = 0, with_gil = 0, is_overridable = 0, optional_arg_count = 0, 
-            is_const_method = False, is_static_method=False, 
-            templates = None, is_strict_signature = False): 
-        self.return_type = return_type 
-        self.args = args 
-        self.has_varargs = has_varargs 
-        self.optional_arg_count = optional_arg_count 
-        self.exception_value = exception_value 
-        self.exception_check = exception_check 
-        self.calling_convention = calling_convention 
-        self.nogil = nogil 
-        self.with_gil = with_gil 
-        self.is_overridable = is_overridable 
-        self.is_const_method = is_const_method 
-        self.is_static_method = is_static_method 
-        self.templates = templates 
-        self.is_strict_signature = is_strict_signature 
- 
-    def __repr__(self): 
+class CFuncType(CType):
+    #  return_type      CType
+    #  args             [CFuncTypeArg]
+    #  has_varargs      boolean
+    #  exception_value  string
+    #  exception_check  boolean    True if PyErr_Occurred check needed
+    #  calling_convention  string  Function calling convention
+    #  nogil            boolean    Can be called without gil
+    #  with_gil         boolean    Acquire gil around function body
+    #  templates        [string] or None
+    #  cached_specialized_types [CFuncType]   cached specialized versions of the CFuncType if defined in a pxd
+    #  from_fused       boolean    Indicates whether this is a specialized
+    #                              C function
+    #  is_strict_signature boolean  function refuses to accept coerced arguments
+    #                               (used for optimisation overrides)
+    #  is_const_method  boolean
+    #  is_static_method boolean
+
+    is_cfunction = 1
+    original_sig = None
+    cached_specialized_types = None
+    from_fused = False
+    is_const_method = False
+
+    subtypes = ['return_type', 'args']
+
+    def __init__(self, return_type, args, has_varargs = 0,
+            exception_value = None, exception_check = 0, calling_convention = "",
+            nogil = 0, with_gil = 0, is_overridable = 0, optional_arg_count = 0,
+            is_const_method = False, is_static_method=False,
+            templates = None, is_strict_signature = False):
+        self.return_type = return_type
+        self.args = args
+        self.has_varargs = has_varargs
+        self.optional_arg_count = optional_arg_count
+        self.exception_value = exception_value
+        self.exception_check = exception_check
+        self.calling_convention = calling_convention
+        self.nogil = nogil
+        self.with_gil = with_gil
+        self.is_overridable = is_overridable
+        self.is_const_method = is_const_method
+        self.is_static_method = is_static_method
+        self.templates = templates
+        self.is_strict_signature = is_strict_signature
+
+    def __repr__(self):
         arg_reprs = list(map(repr, self.args))
-        if self.has_varargs: 
-            arg_reprs.append("...") 
-        if self.exception_value: 
-            except_clause = " %r" % self.exception_value 
-        else: 
-            except_clause = "" 
-        if self.exception_check: 
-            except_clause += "?" 
-        return "<CFuncType %s %s[%s]%s>" % ( 
-            repr(self.return_type), 
-            self.calling_convention_prefix(), 
-            ",".join(arg_reprs), 
-            except_clause) 
- 
+        if self.has_varargs:
+            arg_reprs.append("...")
+        if self.exception_value:
+            except_clause = " %r" % self.exception_value
+        else:
+            except_clause = ""
+        if self.exception_check:
+            except_clause += "?"
+        return "<CFuncType %s %s[%s]%s>" % (
+            repr(self.return_type),
+            self.calling_convention_prefix(),
+            ",".join(arg_reprs),
+            except_clause)
+
     def with_with_gil(self, with_gil):
         if with_gil == self.with_gil:
             return self
@@ -2696,46 +2696,46 @@ class CFuncType(CType):
                 self.is_const_method, self.is_static_method,
                 self.templates, self.is_strict_signature)
 
-    def calling_convention_prefix(self): 
-        cc = self.calling_convention 
-        if cc: 
-            return cc + " " 
-        else: 
-            return "" 
- 
-    def as_argument_type(self): 
-        return c_ptr_type(self) 
- 
-    def same_c_signature_as(self, other_type, as_cmethod = 0): 
-        return self.same_c_signature_as_resolved_type( 
-            other_type.resolve(), as_cmethod) 
- 
+    def calling_convention_prefix(self):
+        cc = self.calling_convention
+        if cc:
+            return cc + " "
+        else:
+            return ""
+
+    def as_argument_type(self):
+        return c_ptr_type(self)
+
+    def same_c_signature_as(self, other_type, as_cmethod = 0):
+        return self.same_c_signature_as_resolved_type(
+            other_type.resolve(), as_cmethod)
+
     def same_c_signature_as_resolved_type(self, other_type, as_cmethod=False, as_pxd_definition=False,
                                           exact_semantics=True):
         # If 'exact_semantics' is false, allow any equivalent C signatures
         # if the Cython semantics are compatible, i.e. the same or wider for 'other_type'.
 
-        #print "CFuncType.same_c_signature_as_resolved_type:", \ 
-        #    self, other_type, "as_cmethod =", as_cmethod ### 
-        if other_type is error_type: 
-            return 1 
-        if not other_type.is_cfunction: 
-            return 0 
-        if self.is_overridable != other_type.is_overridable: 
-            return 0 
-        nargs = len(self.args) 
-        if nargs != len(other_type.args): 
-            return 0 
-        # When comparing C method signatures, the first argument 
-        # is exempt from compatibility checking (the proper check 
-        # is performed elsewhere). 
-        for i in range(as_cmethod, nargs): 
+        #print "CFuncType.same_c_signature_as_resolved_type:", \
+        #    self, other_type, "as_cmethod =", as_cmethod ###
+        if other_type is error_type:
+            return 1
+        if not other_type.is_cfunction:
+            return 0
+        if self.is_overridable != other_type.is_overridable:
+            return 0
+        nargs = len(self.args)
+        if nargs != len(other_type.args):
+            return 0
+        # When comparing C method signatures, the first argument
+        # is exempt from compatibility checking (the proper check
+        # is performed elsewhere).
+        for i in range(as_cmethod, nargs):
             if not self.args[i].type.same_as(other_type.args[i].type):
                 return 0
-        if self.has_varargs != other_type.has_varargs: 
-            return 0 
-        if self.optional_arg_count != other_type.optional_arg_count: 
-            return 0 
+        if self.has_varargs != other_type.has_varargs:
+            return 0
+        if self.optional_arg_count != other_type.optional_arg_count:
+            return 0
         if as_pxd_definition:
             # A narrowing of the return type declared in the pxd is allowed.
             if not self.return_type.subtype_of_resolved_type(other_type.return_type):
@@ -2743,8 +2743,8 @@ class CFuncType(CType):
         else:
             if not self.return_type.same_as(other_type.return_type):
                 return 0
-        if not self.same_calling_convention_as(other_type): 
-            return 0 
+        if not self.same_calling_convention_as(other_type):
+            return 0
         if exact_semantics:
             if self.exception_check != other_type.exception_check:
                 return 0
@@ -2752,8 +2752,8 @@ class CFuncType(CType):
                 return 0
         elif not self._is_exception_compatible_with(other_type):
             return 0
-        return 1 
- 
+        return 1
+
     def _same_exception_value(self, other_exc_value):
         if self.exception_value == other_exc_value:
             return 1
@@ -2770,43 +2770,43 @@ class CFuncType(CType):
             return 0
         return 1
 
-    def compatible_signature_with(self, other_type, as_cmethod = 0): 
-        return self.compatible_signature_with_resolved_type(other_type.resolve(), as_cmethod) 
- 
-    def compatible_signature_with_resolved_type(self, other_type, as_cmethod): 
-        #print "CFuncType.same_c_signature_as_resolved_type:", \ 
-        #    self, other_type, "as_cmethod =", as_cmethod ### 
-        if other_type is error_type: 
-            return 1 
-        if not other_type.is_cfunction: 
-            return 0 
-        if not self.is_overridable and other_type.is_overridable: 
-            return 0 
-        nargs = len(self.args) 
-        if nargs - self.optional_arg_count != len(other_type.args) - other_type.optional_arg_count: 
-            return 0 
-        if self.optional_arg_count < other_type.optional_arg_count: 
-            return 0 
-        # When comparing C method signatures, the first argument 
-        # is exempt from compatibility checking (the proper check 
-        # is performed elsewhere). 
-        for i in range(as_cmethod, len(other_type.args)): 
-            if not self.args[i].type.same_as( 
-                other_type.args[i].type): 
-                    return 0 
-        if self.has_varargs != other_type.has_varargs: 
-            return 0 
-        if not self.return_type.subtype_of_resolved_type(other_type.return_type): 
-            return 0 
-        if not self.same_calling_convention_as(other_type): 
-            return 0 
-        if self.nogil != other_type.nogil: 
-            return 0 
+    def compatible_signature_with(self, other_type, as_cmethod = 0):
+        return self.compatible_signature_with_resolved_type(other_type.resolve(), as_cmethod)
+
+    def compatible_signature_with_resolved_type(self, other_type, as_cmethod):
+        #print "CFuncType.same_c_signature_as_resolved_type:", \
+        #    self, other_type, "as_cmethod =", as_cmethod ###
+        if other_type is error_type:
+            return 1
+        if not other_type.is_cfunction:
+            return 0
+        if not self.is_overridable and other_type.is_overridable:
+            return 0
+        nargs = len(self.args)
+        if nargs - self.optional_arg_count != len(other_type.args) - other_type.optional_arg_count:
+            return 0
+        if self.optional_arg_count < other_type.optional_arg_count:
+            return 0
+        # When comparing C method signatures, the first argument
+        # is exempt from compatibility checking (the proper check
+        # is performed elsewhere).
+        for i in range(as_cmethod, len(other_type.args)):
+            if not self.args[i].type.same_as(
+                other_type.args[i].type):
+                    return 0
+        if self.has_varargs != other_type.has_varargs:
+            return 0
+        if not self.return_type.subtype_of_resolved_type(other_type.return_type):
+            return 0
+        if not self.same_calling_convention_as(other_type):
+            return 0
+        if self.nogil != other_type.nogil:
+            return 0
         if not self._is_exception_compatible_with(other_type):
             return 0
-        self.original_sig = other_type.original_sig or other_type 
-        return 1 
- 
+        self.original_sig = other_type.original_sig or other_type
+        return 1
+
     def _is_exception_compatible_with(self, other_type):
         # narrower exception checks are ok, but prevent mismatches
         if self.exception_check == '+' and other_type.exception_check != '+':
@@ -2821,54 +2821,54 @@ class CFuncType(CType):
                 return 0
         return 1
 
-    def narrower_c_signature_than(self, other_type, as_cmethod = 0): 
-        return self.narrower_c_signature_than_resolved_type(other_type.resolve(), as_cmethod) 
- 
-    def narrower_c_signature_than_resolved_type(self, other_type, as_cmethod): 
-        if other_type is error_type: 
-            return 1 
-        if not other_type.is_cfunction: 
-            return 0 
-        nargs = len(self.args) 
-        if nargs != len(other_type.args): 
-            return 0 
-        for i in range(as_cmethod, nargs): 
-            if not self.args[i].type.subtype_of_resolved_type(other_type.args[i].type): 
-                return 0 
-            else: 
-                self.args[i].needs_type_test = other_type.args[i].needs_type_test \ 
-                        or not self.args[i].type.same_as(other_type.args[i].type) 
-        if self.has_varargs != other_type.has_varargs: 
-            return 0 
-        if self.optional_arg_count != other_type.optional_arg_count: 
-            return 0 
-        if not self.return_type.subtype_of_resolved_type(other_type.return_type): 
-            return 0 
+    def narrower_c_signature_than(self, other_type, as_cmethod = 0):
+        return self.narrower_c_signature_than_resolved_type(other_type.resolve(), as_cmethod)
+
+    def narrower_c_signature_than_resolved_type(self, other_type, as_cmethod):
+        if other_type is error_type:
+            return 1
+        if not other_type.is_cfunction:
+            return 0
+        nargs = len(self.args)
+        if nargs != len(other_type.args):
+            return 0
+        for i in range(as_cmethod, nargs):
+            if not self.args[i].type.subtype_of_resolved_type(other_type.args[i].type):
+                return 0
+            else:
+                self.args[i].needs_type_test = other_type.args[i].needs_type_test \
+                        or not self.args[i].type.same_as(other_type.args[i].type)
+        if self.has_varargs != other_type.has_varargs:
+            return 0
+        if self.optional_arg_count != other_type.optional_arg_count:
+            return 0
+        if not self.return_type.subtype_of_resolved_type(other_type.return_type):
+            return 0
         if not self.exception_check and other_type.exception_check:
             # a redundant exception check doesn't make functions incompatible, but a missing one does
             return 0
         if not self._same_exception_value(other_type.exception_value):
             return 0
-        return 1 
- 
-    def same_calling_convention_as(self, other): 
-        ## XXX Under discussion ... 
-        ## callspec_words = ("__stdcall", "__cdecl", "__fastcall") 
-        ## cs1 = self.calling_convention 
-        ## cs2 = other.calling_convention 
-        ## if (cs1 in callspec_words or 
-        ##     cs2 in callspec_words): 
-        ##     return cs1 == cs2 
-        ## else: 
-        ##     return True 
-        sc1 = self.calling_convention == '__stdcall' 
-        sc2 = other.calling_convention == '__stdcall' 
-        return sc1 == sc2 
- 
+        return 1
+
+    def same_calling_convention_as(self, other):
+        ## XXX Under discussion ...
+        ## callspec_words = ("__stdcall", "__cdecl", "__fastcall")
+        ## cs1 = self.calling_convention
+        ## cs2 = other.calling_convention
+        ## if (cs1 in callspec_words or
+        ##     cs2 in callspec_words):
+        ##     return cs1 == cs2
+        ## else:
+        ##     return True
+        sc1 = self.calling_convention == '__stdcall'
+        sc2 = other.calling_convention == '__stdcall'
+        return sc1 == sc2
+
     def same_as_resolved_type(self, other_type, as_cmethod=False):
         return self.same_c_signature_as_resolved_type(other_type, as_cmethod=as_cmethod) \
-            and self.nogil == other_type.nogil 
- 
+            and self.nogil == other_type.nogil
+
     def pointer_assignable_from_resolved_type(self, rhs_type):
         # Accept compatible exception/nogil declarations for the RHS.
         if rhs_type is error_type:
@@ -2877,137 +2877,137 @@ class CFuncType(CType):
             return 0
         return rhs_type.same_c_signature_as_resolved_type(self, exact_semantics=False) \
             and not (self.nogil and not rhs_type.nogil)
- 
-    def declaration_code(self, entity_code, 
-                         for_display = 0, dll_linkage = None, pyrex = 0, 
-                         with_calling_convention = 1): 
-        arg_decl_list = [] 
-        for arg in self.args[:len(self.args)-self.optional_arg_count]: 
-            arg_decl_list.append( 
-                arg.type.declaration_code("", for_display, pyrex = pyrex)) 
-        if self.is_overridable: 
-            arg_decl_list.append("int %s" % Naming.skip_dispatch_cname) 
-        if self.optional_arg_count: 
-            arg_decl_list.append(self.op_arg_struct.declaration_code(Naming.optional_args_cname)) 
-        if self.has_varargs: 
-            arg_decl_list.append("...") 
-        arg_decl_code = ", ".join(arg_decl_list) 
-        if not arg_decl_code and not pyrex: 
-            arg_decl_code = "void" 
-        trailer = "" 
-        if (pyrex or for_display) and not self.return_type.is_pyobject: 
-            if self.exception_value and self.exception_check: 
-                trailer = " except? %s" % self.exception_value 
-            elif self.exception_value: 
-                trailer = " except %s" % self.exception_value 
-            elif self.exception_check == '+': 
-                trailer = " except +" 
+
+    def declaration_code(self, entity_code,
+                         for_display = 0, dll_linkage = None, pyrex = 0,
+                         with_calling_convention = 1):
+        arg_decl_list = []
+        for arg in self.args[:len(self.args)-self.optional_arg_count]:
+            arg_decl_list.append(
+                arg.type.declaration_code("", for_display, pyrex = pyrex))
+        if self.is_overridable:
+            arg_decl_list.append("int %s" % Naming.skip_dispatch_cname)
+        if self.optional_arg_count:
+            arg_decl_list.append(self.op_arg_struct.declaration_code(Naming.optional_args_cname))
+        if self.has_varargs:
+            arg_decl_list.append("...")
+        arg_decl_code = ", ".join(arg_decl_list)
+        if not arg_decl_code and not pyrex:
+            arg_decl_code = "void"
+        trailer = ""
+        if (pyrex or for_display) and not self.return_type.is_pyobject:
+            if self.exception_value and self.exception_check:
+                trailer = " except? %s" % self.exception_value
+            elif self.exception_value:
+                trailer = " except %s" % self.exception_value
+            elif self.exception_check == '+':
+                trailer = " except +"
             elif self.exception_check and for_display:
                 # not spelled out by default, unless for human eyes
                 trailer = " except *"
-            if self.nogil: 
-                trailer += " nogil" 
-        if not with_calling_convention: 
-            cc = '' 
-        else: 
-            cc = self.calling_convention_prefix() 
-            if (not entity_code and cc) or entity_code.startswith("*"): 
-                entity_code = "(%s%s)" % (cc, entity_code) 
-                cc = "" 
-        if self.is_const_method: 
-            trailer += " const" 
-        return self.return_type.declaration_code( 
-            "%s%s(%s)%s" % (cc, entity_code, arg_decl_code, trailer), 
-            for_display, dll_linkage, pyrex) 
- 
-    def function_header_code(self, func_name, arg_code): 
-        if self.is_const_method: 
-            trailer = " const" 
-        else: 
-            trailer = "" 
-        return "%s%s(%s)%s" % (self.calling_convention_prefix(), 
-            func_name, arg_code, trailer) 
- 
-    def signature_string(self): 
+            if self.nogil:
+                trailer += " nogil"
+        if not with_calling_convention:
+            cc = ''
+        else:
+            cc = self.calling_convention_prefix()
+            if (not entity_code and cc) or entity_code.startswith("*"):
+                entity_code = "(%s%s)" % (cc, entity_code)
+                cc = ""
+        if self.is_const_method:
+            trailer += " const"
+        return self.return_type.declaration_code(
+            "%s%s(%s)%s" % (cc, entity_code, arg_decl_code, trailer),
+            for_display, dll_linkage, pyrex)
+
+    def function_header_code(self, func_name, arg_code):
+        if self.is_const_method:
+            trailer = " const"
+        else:
+            trailer = ""
+        return "%s%s(%s)%s" % (self.calling_convention_prefix(),
+            func_name, arg_code, trailer)
+
+    def signature_string(self):
         s = self.empty_declaration_code()
-        return s 
- 
-    def signature_cast_string(self): 
-        s = self.declaration_code("(*)", with_calling_convention=False) 
-        return '(%s)' % s 
- 
-    def specialize(self, values): 
-        result = CFuncType(self.return_type.specialize(values), 
-                           [arg.specialize(values) for arg in self.args], 
-                           has_varargs = self.has_varargs, 
-                           exception_value = self.exception_value, 
-                           exception_check = self.exception_check, 
-                           calling_convention = self.calling_convention, 
-                           nogil = self.nogil, 
-                           with_gil = self.with_gil, 
-                           is_overridable = self.is_overridable, 
-                           optional_arg_count = self.optional_arg_count, 
-                           is_const_method = self.is_const_method, 
-                           is_static_method = self.is_static_method, 
-                           templates = self.templates) 
- 
-        result.from_fused = self.is_fused 
-        return result 
- 
-    def opt_arg_cname(self, arg_name): 
-        return self.op_arg_struct.base_type.scope.lookup(arg_name).cname 
- 
-    # Methods that deal with Fused Types 
-    # All but map_with_specific_entries should be called only on functions 
-    # with fused types (and not on their corresponding specific versions). 
- 
-    def get_all_specialized_permutations(self, fused_types=None): 
-        """ 
-        Permute all the types. For every specific instance of a fused type, we 
-        want all other specific instances of all other fused types. 
- 
-        It returns an iterable of two-tuples of the cname that should prefix 
-        the cname of the function, and a dict mapping any fused types to their 
-        respective specific types. 
-        """ 
-        assert self.is_fused 
- 
-        if fused_types is None: 
-            fused_types = self.get_fused_types() 
- 
-        return get_all_specialized_permutations(fused_types) 
- 
-    def get_all_specialized_function_types(self): 
-        """ 
-        Get all the specific function types of this one. 
-        """ 
-        assert self.is_fused 
- 
-        if self.entry.fused_cfunction: 
-            return [n.type for n in self.entry.fused_cfunction.nodes] 
-        elif self.cached_specialized_types is not None: 
-            return self.cached_specialized_types 
- 
-        result = [] 
-        permutations = self.get_all_specialized_permutations() 
- 
+        return s
+
+    def signature_cast_string(self):
+        s = self.declaration_code("(*)", with_calling_convention=False)
+        return '(%s)' % s
+
+    def specialize(self, values):
+        result = CFuncType(self.return_type.specialize(values),
+                           [arg.specialize(values) for arg in self.args],
+                           has_varargs = self.has_varargs,
+                           exception_value = self.exception_value,
+                           exception_check = self.exception_check,
+                           calling_convention = self.calling_convention,
+                           nogil = self.nogil,
+                           with_gil = self.with_gil,
+                           is_overridable = self.is_overridable,
+                           optional_arg_count = self.optional_arg_count,
+                           is_const_method = self.is_const_method,
+                           is_static_method = self.is_static_method,
+                           templates = self.templates)
+
+        result.from_fused = self.is_fused
+        return result
+
+    def opt_arg_cname(self, arg_name):
+        return self.op_arg_struct.base_type.scope.lookup(arg_name).cname
+
+    # Methods that deal with Fused Types
+    # All but map_with_specific_entries should be called only on functions
+    # with fused types (and not on their corresponding specific versions).
+
+    def get_all_specialized_permutations(self, fused_types=None):
+        """
+        Permute all the types. For every specific instance of a fused type, we
+        want all other specific instances of all other fused types.
+
+        It returns an iterable of two-tuples of the cname that should prefix
+        the cname of the function, and a dict mapping any fused types to their
+        respective specific types.
+        """
+        assert self.is_fused
+
+        if fused_types is None:
+            fused_types = self.get_fused_types()
+
+        return get_all_specialized_permutations(fused_types)
+
+    def get_all_specialized_function_types(self):
+        """
+        Get all the specific function types of this one.
+        """
+        assert self.is_fused
+
+        if self.entry.fused_cfunction:
+            return [n.type for n in self.entry.fused_cfunction.nodes]
+        elif self.cached_specialized_types is not None:
+            return self.cached_specialized_types
+
+        result = []
+        permutations = self.get_all_specialized_permutations()
+
         new_cfunc_entries = []
-        for cname, fused_to_specific in permutations: 
-            new_func_type = self.entry.type.specialize(fused_to_specific) 
- 
-            if self.optional_arg_count: 
-                # Remember, this method is set by CFuncDeclaratorNode 
-                self.declare_opt_arg_struct(new_func_type, cname) 
- 
-            new_entry = copy.deepcopy(self.entry) 
-            new_func_type.specialize_entry(new_entry, cname) 
- 
-            new_entry.type = new_func_type 
-            new_func_type.entry = new_entry 
-            result.append(new_func_type) 
- 
+        for cname, fused_to_specific in permutations:
+            new_func_type = self.entry.type.specialize(fused_to_specific)
+
+            if self.optional_arg_count:
+                # Remember, this method is set by CFuncDeclaratorNode
+                self.declare_opt_arg_struct(new_func_type, cname)
+
+            new_entry = copy.deepcopy(self.entry)
+            new_func_type.specialize_entry(new_entry, cname)
+
+            new_entry.type = new_func_type
+            new_func_type.entry = new_entry
+            result.append(new_func_type)
+
             new_cfunc_entries.append(new_entry)
- 
+
         cfunc_entries = self.entry.scope.cfunc_entries
         try:
             cindex = cfunc_entries.index(self.entry)
@@ -3016,19 +3016,19 @@ class CFuncType(CType):
         else:
             cfunc_entries[cindex:cindex+1] = new_cfunc_entries
 
-        self.cached_specialized_types = result 
- 
-        return result 
- 
-    def get_fused_types(self, result=None, seen=None, subtypes=None): 
-        """Return fused types in the order they appear as parameter types""" 
-        return super(CFuncType, self).get_fused_types(result, seen, 
-                                                      subtypes=['args']) 
- 
-    def specialize_entry(self, entry, cname): 
-        assert not self.is_fused 
-        specialize_entry(entry, cname) 
- 
+        self.cached_specialized_types = result
+
+        return result
+
+    def get_fused_types(self, result=None, seen=None, subtypes=None):
+        """Return fused types in the order they appear as parameter types"""
+        return super(CFuncType, self).get_fused_types(result, seen,
+                                                      subtypes=['args'])
+
+    def specialize_entry(self, entry, cname):
+        assert not self.is_fused
+        specialize_entry(entry, cname)
+
     def can_coerce_to_pyobject(self, env):
         # duplicating the decisions from create_to_py_utility_code() here avoids writing out unused code
         if self.has_varargs or self.optional_arg_count:
@@ -3041,7 +3041,7 @@ class CFuncType(CType):
         if not self.return_type.is_pyobject and not self.return_type.can_coerce_to_pyobject(env):
             return False
         return True
- 
+
     def create_to_py_utility_code(self, env):
         # FIXME: it seems we're trying to coerce in more cases than we should
         if self.to_py_function is not None:
@@ -3109,256 +3109,256 @@ class CFuncType(CType):
         return True
 
 
-def specialize_entry(entry, cname): 
-    """ 
-    Specialize an entry of a copied fused function or method 
-    """ 
-    entry.is_fused_specialized = True 
-    entry.name = get_fused_cname(cname, entry.name) 
- 
-    if entry.is_cmethod: 
-        entry.cname = entry.name 
-        if entry.is_inherited: 
-            entry.cname = StringEncoding.EncodedString( 
-                    "%s.%s" % (Naming.obj_base_cname, entry.cname)) 
-    else: 
-        entry.cname = get_fused_cname(cname, entry.cname) 
- 
-    if entry.func_cname: 
-        entry.func_cname = get_fused_cname(cname, entry.func_cname) 
- 
-def get_fused_cname(fused_cname, orig_cname): 
-    """ 
-    Given the fused cname id and an original cname, return a specialized cname 
-    """ 
-    assert fused_cname and orig_cname 
-    return StringEncoding.EncodedString('%s%s%s' % (Naming.fused_func_prefix, 
-                                                    fused_cname, orig_cname)) 
- 
-def unique(somelist): 
-    seen = set() 
-    result = [] 
-    for obj in somelist: 
-        if obj not in seen: 
-            result.append(obj) 
-            seen.add(obj) 
- 
-    return result 
- 
-def get_all_specialized_permutations(fused_types): 
-    return _get_all_specialized_permutations(unique(fused_types)) 
- 
-def _get_all_specialized_permutations(fused_types, id="", f2s=()): 
-    fused_type, = fused_types[0].get_fused_types() 
-    result = [] 
- 
-    for newid, specific_type in enumerate(fused_type.types): 
-        # f2s = dict(f2s, **{ fused_type: specific_type }) 
-        f2s = dict(f2s) 
-        f2s.update({ fused_type: specific_type }) 
- 
-        if id: 
-            cname = '%s_%s' % (id, newid) 
-        else: 
-            cname = str(newid) 
- 
-        if len(fused_types) > 1: 
-            result.extend(_get_all_specialized_permutations( 
-                                            fused_types[1:], cname, f2s)) 
-        else: 
-            result.append((cname, f2s)) 
- 
-    return result 
- 
-def specialization_signature_string(fused_compound_type, fused_to_specific): 
-    """ 
-    Return the signature for a specialization of a fused type. e.g. 
- 
-        floating[:] -> 
-            'float' or 'double' 
- 
-        cdef fused ft: 
-            float[:] 
-            double[:] 
- 
-        ft -> 
-            'float[:]' or 'double[:]' 
- 
-        integral func(floating) -> 
-            'int (*func)(float)' or ... 
-    """ 
-    fused_types = fused_compound_type.get_fused_types() 
-    if len(fused_types) == 1: 
-        fused_type = fused_types[0] 
-    else: 
-        fused_type = fused_compound_type 
- 
-    return fused_type.specialize(fused_to_specific).typeof_name() 
- 
-
-def get_specialized_types(type): 
-    """ 
+def specialize_entry(entry, cname):
+    """
+    Specialize an entry of a copied fused function or method
+    """
+    entry.is_fused_specialized = True
+    entry.name = get_fused_cname(cname, entry.name)
+
+    if entry.is_cmethod:
+        entry.cname = entry.name
+        if entry.is_inherited:
+            entry.cname = StringEncoding.EncodedString(
+                    "%s.%s" % (Naming.obj_base_cname, entry.cname))
+    else:
+        entry.cname = get_fused_cname(cname, entry.cname)
+
+    if entry.func_cname:
+        entry.func_cname = get_fused_cname(cname, entry.func_cname)
+
+def get_fused_cname(fused_cname, orig_cname):
+    """
+    Given the fused cname id and an original cname, return a specialized cname
+    """
+    assert fused_cname and orig_cname
+    return StringEncoding.EncodedString('%s%s%s' % (Naming.fused_func_prefix,
+                                                    fused_cname, orig_cname))
+
+def unique(somelist):
+    seen = set()
+    result = []
+    for obj in somelist:
+        if obj not in seen:
+            result.append(obj)
+            seen.add(obj)
+
+    return result
+
+def get_all_specialized_permutations(fused_types):
+    return _get_all_specialized_permutations(unique(fused_types))
+
+def _get_all_specialized_permutations(fused_types, id="", f2s=()):
+    fused_type, = fused_types[0].get_fused_types()
+    result = []
+
+    for newid, specific_type in enumerate(fused_type.types):
+        # f2s = dict(f2s, **{ fused_type: specific_type })
+        f2s = dict(f2s)
+        f2s.update({ fused_type: specific_type })
+
+        if id:
+            cname = '%s_%s' % (id, newid)
+        else:
+            cname = str(newid)
+
+        if len(fused_types) > 1:
+            result.extend(_get_all_specialized_permutations(
+                                            fused_types[1:], cname, f2s))
+        else:
+            result.append((cname, f2s))
+
+    return result
+
+def specialization_signature_string(fused_compound_type, fused_to_specific):
+    """
+    Return the signature for a specialization of a fused type. e.g.
+
+        floating[:] ->
+            'float' or 'double'
+
+        cdef fused ft:
+            float[:]
+            double[:]
+
+        ft ->
+            'float[:]' or 'double[:]'
+
+        integral func(floating) ->
+            'int (*func)(float)' or ...
+    """
+    fused_types = fused_compound_type.get_fused_types()
+    if len(fused_types) == 1:
+        fused_type = fused_types[0]
+    else:
+        fused_type = fused_compound_type
+
+    return fused_type.specialize(fused_to_specific).typeof_name()
+
+
+def get_specialized_types(type):
+    """
     Return a list of specialized types in their declared order.
-    """ 
-    assert type.is_fused 
- 
-    if isinstance(type, FusedType): 
+    """
+    assert type.is_fused
+
+    if isinstance(type, FusedType):
         result = list(type.types)
-        for specialized_type in result: 
-            specialized_type.specialization_string = specialized_type.typeof_name() 
-    else: 
-        result = [] 
-        for cname, f2s in get_all_specialized_permutations(type.get_fused_types()): 
-            specialized_type = type.specialize(f2s) 
-            specialized_type.specialization_string = ( 
-                            specialization_signature_string(type, f2s)) 
-            result.append(specialized_type) 
- 
+        for specialized_type in result:
+            specialized_type.specialization_string = specialized_type.typeof_name()
+    else:
+        result = []
+        for cname, f2s in get_all_specialized_permutations(type.get_fused_types()):
+            specialized_type = type.specialize(f2s)
+            specialized_type.specialization_string = (
+                            specialization_signature_string(type, f2s))
+            result.append(specialized_type)
+
     return result
- 
- 
-class CFuncTypeArg(BaseType): 
-    #  name       string 
-    #  cname      string 
-    #  type       PyrexType 
-    #  pos        source file position 
- 
-    # FIXME: is this the right setup? should None be allowed here? 
-    not_none = False 
-    or_none = False 
-    accept_none = True 
-    accept_builtin_subtypes = False 
+
+
+class CFuncTypeArg(BaseType):
+    #  name       string
+    #  cname      string
+    #  type       PyrexType
+    #  pos        source file position
+
+    # FIXME: is this the right setup? should None be allowed here?
+    not_none = False
+    or_none = False
+    accept_none = True
+    accept_builtin_subtypes = False
     annotation = None
- 
-    subtypes = ['type'] 
- 
+
+    subtypes = ['type']
+
     def __init__(self, name, type, pos, cname=None, annotation=None):
-        self.name = name 
-        if cname is not None: 
-            self.cname = cname 
-        else: 
-            self.cname = Naming.var_prefix + name 
+        self.name = name
+        if cname is not None:
+            self.cname = cname
+        else:
+            self.cname = Naming.var_prefix + name
         if annotation is not None:
             self.annotation = annotation
-        self.type = type 
-        self.pos = pos 
-        self.needs_type_test = False # TODO: should these defaults be set in analyse_types()? 
- 
-    def __repr__(self): 
-        return "%s:%s" % (self.name, repr(self.type)) 
- 
-    def declaration_code(self, for_display = 0): 
-        return self.type.declaration_code(self.cname, for_display) 
- 
-    def specialize(self, values): 
-        return CFuncTypeArg(self.name, self.type.specialize(values), self.pos, self.cname) 
- 
-
-class ToPyStructUtilityCode(object): 
- 
-    requires = None 
- 
+        self.type = type
+        self.pos = pos
+        self.needs_type_test = False # TODO: should these defaults be set in analyse_types()?
+
+    def __repr__(self):
+        return "%s:%s" % (self.name, repr(self.type))
+
+    def declaration_code(self, for_display = 0):
+        return self.type.declaration_code(self.cname, for_display)
+
+    def specialize(self, values):
+        return CFuncTypeArg(self.name, self.type.specialize(values), self.pos, self.cname)
+
+
+class ToPyStructUtilityCode(object):
+
+    requires = None
+
     def __init__(self, type, forward_decl, env):
-        self.type = type 
-        self.header = "static PyObject* %s(%s)" % (type.to_py_function, 
-                                                   type.declaration_code('s')) 
-        self.forward_decl = forward_decl 
+        self.type = type
+        self.header = "static PyObject* %s(%s)" % (type.to_py_function,
+                                                   type.declaration_code('s'))
+        self.forward_decl = forward_decl
         self.env = env
- 
-    def __eq__(self, other): 
-        return isinstance(other, ToPyStructUtilityCode) and self.header == other.header 
- 
-    def __hash__(self): 
-        return hash(self.header) 
- 
+
+    def __eq__(self, other):
+        return isinstance(other, ToPyStructUtilityCode) and self.header == other.header
+
+    def __hash__(self):
+        return hash(self.header)
+
     def get_tree(self, **kwargs):
-        pass 
- 
-    def put_code(self, output): 
-        code = output['utility_code_def'] 
-        proto = output['utility_code_proto'] 
- 
-        code.putln("%s {" % self.header) 
-        code.putln("PyObject* res;") 
-        code.putln("PyObject* member;") 
+        pass
+
+    def put_code(self, output):
+        code = output['utility_code_def']
+        proto = output['utility_code_proto']
+
+        code.putln("%s {" % self.header)
+        code.putln("PyObject* res;")
+        code.putln("PyObject* member;")
         code.putln("res = __Pyx_PyDict_NewPresized(%d); if (unlikely(!res)) return NULL;" %
                    len(self.type.scope.var_entries))
-        for member in self.type.scope.var_entries: 
-            nameconst_cname = code.get_py_string_const(member.name, identifier=True) 
+        for member in self.type.scope.var_entries:
+            nameconst_cname = code.get_py_string_const(member.name, identifier=True)
             code.putln("%s; if (unlikely(!member)) goto bad;" % (
                 member.type.to_py_call_code('s.%s' % member.cname, 'member', member.type)))
             code.putln("if (unlikely(PyDict_SetItem(res, %s, member) < 0)) goto bad;" % nameconst_cname)
-            code.putln("Py_DECREF(member);") 
-        code.putln("return res;") 
-        code.putln("bad:") 
-        code.putln("Py_XDECREF(member);") 
-        code.putln("Py_DECREF(res);") 
-        code.putln("return NULL;") 
-        code.putln("}") 
- 
-        # This is a bit of a hack, we need a forward declaration 
-        # due to the way things are ordered in the module... 
-        if self.forward_decl: 
+            code.putln("Py_DECREF(member);")
+        code.putln("return res;")
+        code.putln("bad:")
+        code.putln("Py_XDECREF(member);")
+        code.putln("Py_DECREF(res);")
+        code.putln("return NULL;")
+        code.putln("}")
+
+        # This is a bit of a hack, we need a forward declaration
+        # due to the way things are ordered in the module...
+        if self.forward_decl:
             proto.putln(self.type.empty_declaration_code() + ';')
-        proto.putln(self.header + ";") 
- 
-    def inject_tree_and_scope_into(self, module_node): 
-        pass 
- 
- 
-class CStructOrUnionType(CType): 
-    #  name          string 
-    #  cname         string 
-    #  kind          string              "struct" or "union" 
-    #  scope         StructOrUnionScope, or None if incomplete 
-    #  typedef_flag  boolean 
-    #  packed        boolean 
- 
-    # entry          Entry 
- 
-    is_struct_or_union = 1 
-    has_attributes = 1 
-    exception_check = True 
- 
-    def __init__(self, name, kind, scope, typedef_flag, cname, packed=False): 
-        self.name = name 
-        self.cname = cname 
-        self.kind = kind 
-        self.scope = scope 
-        self.typedef_flag = typedef_flag 
-        self.is_struct = kind == 'struct' 
+        proto.putln(self.header + ";")
+
+    def inject_tree_and_scope_into(self, module_node):
+        pass
+
+
+class CStructOrUnionType(CType):
+    #  name          string
+    #  cname         string
+    #  kind          string              "struct" or "union"
+    #  scope         StructOrUnionScope, or None if incomplete
+    #  typedef_flag  boolean
+    #  packed        boolean
+
+    # entry          Entry
+
+    is_struct_or_union = 1
+    has_attributes = 1
+    exception_check = True
+
+    def __init__(self, name, kind, scope, typedef_flag, cname, packed=False):
+        self.name = name
+        self.cname = cname
+        self.kind = kind
+        self.scope = scope
+        self.typedef_flag = typedef_flag
+        self.is_struct = kind == 'struct'
         self.to_py_function = "%s_to_py_%s" % (
             Naming.convert_func_prefix, self.specialization_name())
         self.from_py_function = "%s_from_py_%s" % (
             Naming.convert_func_prefix, self.specialization_name())
-        self.exception_check = True 
-        self._convert_to_py_code = None 
-        self._convert_from_py_code = None 
-        self.packed = packed 
- 
+        self.exception_check = True
+        self._convert_to_py_code = None
+        self._convert_from_py_code = None
+        self.packed = packed
+
     def can_coerce_to_pyobject(self, env):
         if self._convert_to_py_code is False:
             return None  # tri-state-ish
 
-        if env.outer_scope is None: 
-            return False 
- 
-        if self._convert_to_py_code is None: 
+        if env.outer_scope is None:
+            return False
+
+        if self._convert_to_py_code is None:
             is_union = not self.is_struct
             unsafe_union_types = set()
             safe_union_types = set()
-            for member in self.scope.var_entries: 
+            for member in self.scope.var_entries:
                 member_type = member.type
                 if not member_type.can_coerce_to_pyobject(env):
-                    self.to_py_function = None 
-                    self._convert_to_py_code = False 
-                    return False 
+                    self.to_py_function = None
+                    self._convert_to_py_code = False
+                    return False
                 if is_union:
                     if member_type.is_ptr or member_type.is_cpp_class:
                         unsafe_union_types.add(member_type)
                     else:
                         safe_union_types.add(member_type)
- 
+
             if unsafe_union_types and (safe_union_types or len(unsafe_union_types) > 1):
                 # unsafe mix of safe and unsafe to convert types
                 self.from_py_function = None
@@ -3377,9 +3377,9 @@ class CStructOrUnionType(CType):
             forward_decl = self.entry.visibility != 'extern' and not self.typedef_flag
             self._convert_to_py_code = ToPyStructUtilityCode(self, forward_decl, env)
 
-        env.use_utility_code(self._convert_to_py_code) 
-        return True 
- 
+        env.use_utility_code(self._convert_to_py_code)
+        return True
+
     def can_coerce_from_pyobject(self, env):
         if env.outer_scope is None or self._convert_from_py_code is False:
             return False
@@ -3388,102 +3388,102 @@ class CStructOrUnionType(CType):
                 return False
         return True
 
-    def create_from_py_utility_code(self, env): 
-        if env.outer_scope is None: 
-            return False 
- 
-        if self._convert_from_py_code is False: 
-            return None  # tri-state-ish 
- 
-        if self._convert_from_py_code is None: 
+    def create_from_py_utility_code(self, env):
+        if env.outer_scope is None:
+            return False
+
+        if self._convert_from_py_code is False:
+            return None  # tri-state-ish
+
+        if self._convert_from_py_code is None:
             if not self.scope.var_entries:
                 # There are obviously missing fields; don't allow instantiation
                 # where absolutely no content is provided.
                 return False
 
-            for member in self.scope.var_entries: 
-                if not member.type.create_from_py_utility_code(env): 
-                    self.from_py_function = None 
-                    self._convert_from_py_code = False 
-                    return False 
- 
-            context = dict( 
+            for member in self.scope.var_entries:
+                if not member.type.create_from_py_utility_code(env):
+                    self.from_py_function = None
+                    self._convert_from_py_code = False
+                    return False
+
+            context = dict(
                 struct_type=self,
-                var_entries=self.scope.var_entries, 
-                funcname=self.from_py_function, 
-            ) 
+                var_entries=self.scope.var_entries,
+                funcname=self.from_py_function,
+            )
             from .UtilityCode import CythonUtilityCode
             self._convert_from_py_code = CythonUtilityCode.load(
                 "FromPyStructUtility" if self.is_struct else "FromPyUnionUtility",
                 "CConvert.pyx",
                 outer_module_scope=env.global_scope(),  # need access to types declared in module
                 context=context)
- 
-        env.use_utility_code(self._convert_from_py_code) 
-        return True 
- 
-    def __repr__(self): 
-        return "<CStructOrUnionType %s %s%s>" % ( 
-            self.name, self.cname, 
-            ("", " typedef")[self.typedef_flag]) 
- 
-    def declaration_code(self, entity_code, 
-                         for_display=0, dll_linkage=None, pyrex=0): 
-        if pyrex or for_display: 
-            base_code = self.name 
-        else: 
-            if self.typedef_flag: 
-                base_code = self.cname 
-            else: 
-                base_code = "%s %s" % (self.kind, self.cname) 
-            base_code = public_decl(base_code, dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def __eq__(self, other): 
-        try: 
-            return (isinstance(other, CStructOrUnionType) and 
-                    self.name == other.name) 
-        except AttributeError: 
-            return False 
- 
-    def __lt__(self, other): 
-        try: 
-            return self.name < other.name 
-        except AttributeError: 
-            # this is arbitrary, but it makes sure we always have 
-            # *some* kind of order 
-            return False 
- 
-    def __hash__(self): 
-        return hash(self.cname) ^ hash(self.kind) 
- 
-    def is_complete(self): 
-        return self.scope is not None 
- 
-    def attributes_known(self): 
-        return self.is_complete() 
- 
-    def can_be_complex(self): 
-        # Does the struct consist of exactly two identical floats? 
-        fields = self.scope.var_entries 
-        if len(fields) != 2: return False 
-        a, b = fields 
-        return (a.type.is_float and b.type.is_float and 
+
+        env.use_utility_code(self._convert_from_py_code)
+        return True
+
+    def __repr__(self):
+        return "<CStructOrUnionType %s %s%s>" % (
+            self.name, self.cname,
+            ("", " typedef")[self.typedef_flag])
+
+    def declaration_code(self, entity_code,
+                         for_display=0, dll_linkage=None, pyrex=0):
+        if pyrex or for_display:
+            base_code = self.name
+        else:
+            if self.typedef_flag:
+                base_code = self.cname
+            else:
+                base_code = "%s %s" % (self.kind, self.cname)
+            base_code = public_decl(base_code, dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+    def __eq__(self, other):
+        try:
+            return (isinstance(other, CStructOrUnionType) and
+                    self.name == other.name)
+        except AttributeError:
+            return False
+
+    def __lt__(self, other):
+        try:
+            return self.name < other.name
+        except AttributeError:
+            # this is arbitrary, but it makes sure we always have
+            # *some* kind of order
+            return False
+
+    def __hash__(self):
+        return hash(self.cname) ^ hash(self.kind)
+
+    def is_complete(self):
+        return self.scope is not None
+
+    def attributes_known(self):
+        return self.is_complete()
+
+    def can_be_complex(self):
+        # Does the struct consist of exactly two identical floats?
+        fields = self.scope.var_entries
+        if len(fields) != 2: return False
+        a, b = fields
+        return (a.type.is_float and b.type.is_float and
                 a.type.empty_declaration_code() ==
                 b.type.empty_declaration_code())
- 
-    def struct_nesting_depth(self): 
-        child_depths = [x.type.struct_nesting_depth() 
-                        for x in self.scope.var_entries] 
-        return max(child_depths) + 1 
- 
-    def cast_code(self, expr_code): 
-        if self.is_struct: 
-            return expr_code 
-        return super(CStructOrUnionType, self).cast_code(expr_code) 
- 
+
+    def struct_nesting_depth(self):
+        child_depths = [x.type.struct_nesting_depth()
+                        for x in self.scope.var_entries]
+        return max(child_depths) + 1
+
+    def cast_code(self, expr_code):
+        if self.is_struct:
+            return expr_code
+        return super(CStructOrUnionType, self).cast_code(expr_code)
+
 cpp_string_conversions = ("std::string", "TString", "TStringBuf")
- 
+
 builtin_cpp_conversions = {
     # type                element template params
     "std::pair":          2,
@@ -3500,49 +3500,49 @@ builtin_cpp_conversions = {
     "THashMap":           2,
     "TMap":               2,
 }
- 
-class CppClassType(CType): 
-    #  name          string 
-    #  cname         string 
-    #  scope         CppClassScope 
-    #  templates     [string] or None 
- 
-    is_cpp_class = 1 
-    has_attributes = 1 
-    exception_check = True 
-    namespace = None 
- 
-    # For struct-like declaration. 
-    kind = "struct" 
-    packed = False 
-    typedef_flag = False 
- 
-    subtypes = ['templates'] 
- 
+
+class CppClassType(CType):
+    #  name          string
+    #  cname         string
+    #  scope         CppClassScope
+    #  templates     [string] or None
+
+    is_cpp_class = 1
+    has_attributes = 1
+    exception_check = True
+    namespace = None
+
+    # For struct-like declaration.
+    kind = "struct"
+    packed = False
+    typedef_flag = False
+
+    subtypes = ['templates']
+
     def __init__(self, name, scope, cname, base_classes, templates=None, template_type=None):
-        self.name = name 
-        self.cname = cname 
-        self.scope = scope 
-        self.base_classes = base_classes 
-        self.operators = [] 
-        self.templates = templates 
-        self.template_type = template_type 
+        self.name = name
+        self.cname = cname
+        self.scope = scope
+        self.base_classes = base_classes
+        self.operators = []
+        self.templates = templates
+        self.template_type = template_type
         self.num_optional_templates = sum(is_optional_template_param(T) for T in templates or ())
         if templates and False:  # https://github.com/cython/cython/issues/1868
             self.specializations = {tuple(zip(templates, templates)): self}
         else:
             self.specializations = {}
-        self.is_cpp_string = cname in cpp_string_conversions 
- 
-    def use_conversion_utility(self, from_or_to): 
-        pass 
- 
-    def maybe_unordered(self): 
-        if 'unordered' in self.cname: 
-            return 'unordered_' 
-        else: 
-            return '' 
- 
+        self.is_cpp_string = cname in cpp_string_conversions
+
+    def use_conversion_utility(self, from_or_to):
+        pass
+
+    def maybe_unordered(self):
+        if 'unordered' in self.cname:
+            return 'unordered_'
+        else:
+            return ''
+
     def can_coerce_from_pyobject(self, env):
         if self.cname in builtin_cpp_conversions:
             template_count = builtin_cpp_conversions[self.cname]
@@ -3556,41 +3556,41 @@ class CppClassType(CType):
             return True
         return False
 
-    def create_from_py_utility_code(self, env): 
-        if self.from_py_function is not None: 
-            return True 
+    def create_from_py_utility_code(self, env):
+        if self.from_py_function is not None:
+            return True
         if self.cname in builtin_cpp_conversions or self.cname in cpp_string_conversions:
-            X = "XYZABC" 
-            tags = [] 
+            X = "XYZABC"
+            tags = []
             context = {}
-            for ix, T in enumerate(self.templates or []): 
+            for ix, T in enumerate(self.templates or []):
                 if ix >= builtin_cpp_conversions[self.cname]:
                     break
-                if T.is_pyobject or not T.create_from_py_utility_code(env): 
-                    return False 
-                tags.append(T.specialization_name()) 
+                if T.is_pyobject or not T.create_from_py_utility_code(env):
+                    return False
+                tags.append(T.specialization_name())
                 context[X[ix]] = T
 
-            if self.cname in cpp_string_conversions: 
-                cls = 'string' 
+            if self.cname in cpp_string_conversions:
+                cls = 'string'
                 tags = type_identifier(self),
             elif self.cname.startswith('std::'):
                 cls = self.cname[5:]
-            else: 
+            else:
                 cls = 'arcadia_' + self.cname
             cname = '__pyx_convert_%s_from_py_%s' % (cls, '__and_'.join(tags))
             context.update({
-                'cname': cname, 
-                'maybe_unordered': self.maybe_unordered(), 
-                'type': self.cname, 
+                'cname': cname,
+                'maybe_unordered': self.maybe_unordered(),
+                'type': self.cname,
             })
-            from .UtilityCode import CythonUtilityCode 
+            from .UtilityCode import CythonUtilityCode
             env.use_utility_code(CythonUtilityCode.load(
                 cls.replace('unordered_', '') + ".from_py", "CppConvert.pyx",
                 context=context, compiler_directives=env.directives))
-            self.from_py_function = cname 
-            return True 
- 
+            self.from_py_function = cname
+            return True
+
     def can_coerce_to_pyobject(self, env):
         if self.cname in builtin_cpp_conversions or self.cname in cpp_string_conversions:
             for ix, T in enumerate(self.templates or []):
@@ -3600,44 +3600,44 @@ class CppClassType(CType):
                     return False
             return True
 
-    def create_to_py_utility_code(self, env): 
-        if self.to_py_function is not None: 
-            return True 
+    def create_to_py_utility_code(self, env):
+        if self.to_py_function is not None:
+            return True
         if self.cname in builtin_cpp_conversions or self.cname in cpp_string_conversions:
-            X = "XYZABC" 
-            tags = [] 
+            X = "XYZABC"
+            tags = []
             context = {}
-            for ix, T in enumerate(self.templates or []): 
+            for ix, T in enumerate(self.templates or []):
                 if ix >= builtin_cpp_conversions[self.cname]:
                     break
-                if not T.create_to_py_utility_code(env): 
-                    return False 
-                tags.append(T.specialization_name()) 
+                if not T.create_to_py_utility_code(env):
+                    return False
+                tags.append(T.specialization_name())
                 context[X[ix]] = T
 
-            if self.cname in cpp_string_conversions: 
-                cls = 'string' 
+            if self.cname in cpp_string_conversions:
+                cls = 'string'
                 prefix = 'PyObject_'  # gets specialised by explicit type casts in CoerceToPyTypeNode
                 tags = type_identifier(self),
             elif self.cname.startswith('std::'):
-                cls = self.cname[5:] 
+                cls = self.cname[5:]
                 prefix = ''
             else:
                 cls = 'arcadia_' + self.cname
                 prefix = ''
             cname = "__pyx_convert_%s%s_to_py_%s" % (prefix, cls, "____".join(tags))
             context.update({
-                'cname': cname, 
-                'maybe_unordered': self.maybe_unordered(), 
-                'type': self.cname, 
+                'cname': cname,
+                'maybe_unordered': self.maybe_unordered(),
+                'type': self.cname,
             })
-            from .UtilityCode import CythonUtilityCode 
+            from .UtilityCode import CythonUtilityCode
             env.use_utility_code(CythonUtilityCode.load(
                 cls.replace('unordered_', '') + ".to_py", "CppConvert.pyx",
                 context=context, compiler_directives=env.directives))
-            self.to_py_function = cname 
-            return True 
- 
+            self.to_py_function = cname
+            return True
+
     def is_template_type(self):
         return self.templates is not None and self.template_type is None
 
@@ -3654,8 +3654,8 @@ class CppClassType(CType):
 
     def specialize_here(self, pos, template_values=None):
         if not self.is_template_type():
-            error(pos, "'%s' type is not a template" % self) 
-            return error_type 
+            error(pos, "'%s' type is not a template" % self)
+            return error_type
         if len(self.templates) - self.num_optional_templates <= len(template_values) < len(self.templates):
             num_defaults = len(self.templates) - len(template_values)
             partial_specialization = self.declaration_code('', template_params=template_values)
@@ -3671,36 +3671,36 @@ class CppClassType(CType):
                 TemplatePlaceholderType(
                     "%s::%s" % (partial_specialization, param.name), True)
                 for param in self.templates[-num_defaults:]]
-        if len(self.templates) != len(template_values): 
-            error(pos, "%s templated type receives %d arguments, got %d" % 
-                  (self.name, len(self.templates), len(template_values))) 
-            return error_type 
-        has_object_template_param = False 
-        for value in template_values: 
-            if value.is_pyobject: 
-                has_object_template_param = True 
-                error(pos, 
-                      "Python object type '%s' cannot be used as a template argument" % value) 
-        if has_object_template_param: 
-            return error_type 
-        return self.specialize(dict(zip(self.templates, template_values))) 
- 
-    def specialize(self, values): 
-        if not self.templates and not self.namespace: 
-            return self 
-        if self.templates is None: 
-            self.templates = [] 
-        key = tuple(values.items()) 
-        if key in self.specializations: 
-            return self.specializations[key] 
-        template_values = [t.specialize(values) for t in self.templates] 
-        specialized = self.specializations[key] = \ 
-            CppClassType(self.name, None, self.cname, [], template_values, template_type=self) 
-        # Need to do these *after* self.specializations[key] is set 
-        # to avoid infinite recursion on circular references. 
-        specialized.base_classes = [b.specialize(values) for b in self.base_classes] 
-        if self.namespace is not None: 
-            specialized.namespace = self.namespace.specialize(values) 
+        if len(self.templates) != len(template_values):
+            error(pos, "%s templated type receives %d arguments, got %d" %
+                  (self.name, len(self.templates), len(template_values)))
+            return error_type
+        has_object_template_param = False
+        for value in template_values:
+            if value.is_pyobject:
+                has_object_template_param = True
+                error(pos,
+                      "Python object type '%s' cannot be used as a template argument" % value)
+        if has_object_template_param:
+            return error_type
+        return self.specialize(dict(zip(self.templates, template_values)))
+
+    def specialize(self, values):
+        if not self.templates and not self.namespace:
+            return self
+        if self.templates is None:
+            self.templates = []
+        key = tuple(values.items())
+        if key in self.specializations:
+            return self.specializations[key]
+        template_values = [t.specialize(values) for t in self.templates]
+        specialized = self.specializations[key] = \
+            CppClassType(self.name, None, self.cname, [], template_values, template_type=self)
+        # Need to do these *after* self.specializations[key] is set
+        # to avoid infinite recursion on circular references.
+        specialized.base_classes = [b.specialize(values) for b in self.base_classes]
+        if self.namespace is not None:
+            specialized.namespace = self.namespace.specialize(values)
         specialized.scope = self.scope.specialize(values, specialized)
         if self.cname == 'std::vector':
           # vector<bool> is special cased in the C++ standard, and its
@@ -3715,15 +3715,15 @@ class CppClassType(CType):
             for bit_ref_returner in ('at', 'back', 'front'):
               if bit_ref_returner in specialized.scope.entries:
                 specialized.scope.entries[bit_ref_returner].type.return_type = T
-        return specialized 
- 
-    def deduce_template_params(self, actual): 
+        return specialized
+
+    def deduce_template_params(self, actual):
         if actual.is_const:
             actual = actual.const_base_type
         if actual.is_reference:
             actual = actual.ref_base_type
-        if self == actual: 
-            return {} 
+        if self == actual:
+            return {}
         elif actual.is_cpp_class:
             self_template_type = self
             while getattr(self_template_type, 'template_type', None):
@@ -3745,42 +3745,42 @@ class CppClassType(CType):
                              for (formal_param, actual_param)
                              in zip(self.templates, actual_base.templates)],
                             {})
-        else: 
+        else:
             return {}
- 
-    def declaration_code(self, entity_code, 
+
+    def declaration_code(self, entity_code,
             for_display = 0, dll_linkage = None, pyrex = 0,
             template_params = None):
         if template_params is None:
             template_params = self.templates
-        if self.templates: 
-            template_strings = [param.declaration_code('', for_display, None, pyrex) 
+        if self.templates:
+            template_strings = [param.declaration_code('', for_display, None, pyrex)
                                 for param in template_params
                                 if not is_optional_template_param(param) and not param.is_fused]
-            if for_display: 
-                brackets = "[%s]" 
-            else: 
-                brackets = "<%s> " 
-            templates = brackets % ",".join(template_strings) 
-        else: 
-            templates = "" 
-        if pyrex or for_display: 
-            base_code = "%s%s" % (self.name, templates) 
-        else: 
-            base_code = "%s%s" % (self.cname, templates) 
-            if self.namespace is not None: 
+            if for_display:
+                brackets = "[%s]"
+            else:
+                brackets = "<%s> "
+            templates = brackets % ",".join(template_strings)
+        else:
+            templates = ""
+        if pyrex or for_display:
+            base_code = "%s%s" % (self.name, templates)
+        else:
+            base_code = "%s%s" % (self.cname, templates)
+            if self.namespace is not None:
                 base_code = "%s::%s" % (self.namespace.empty_declaration_code(), base_code)
-            base_code = public_decl(base_code, dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
-    def is_subclass(self, other_type): 
-        if self.same_as_resolved_type(other_type): 
-            return 1 
-        for base_class in self.base_classes: 
-            if base_class.is_subclass(other_type): 
-                return 1 
-        return 0 
- 
+            base_code = public_decl(base_code, dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
+    def is_subclass(self, other_type):
+        if self.same_as_resolved_type(other_type):
+            return 1
+        for base_class in self.base_classes:
+            if base_class.is_subclass(other_type):
+                return 1
+        return 0
+
     def subclass_dist(self, super_type):
         if self.same_as_resolved_type(super_type):
             return 0
@@ -3789,50 +3789,50 @@ class CppClassType(CType):
         else:
             return 1 + min(b.subclass_dist(super_type) for b in self.base_classes)
 
-    def same_as_resolved_type(self, other_type): 
-        if other_type.is_cpp_class: 
-            if self == other_type: 
-                return 1 
+    def same_as_resolved_type(self, other_type):
+        if other_type.is_cpp_class:
+            if self == other_type:
+                return 1
             # This messy logic is needed due to GH Issue #1852.
-            elif (self.cname == other_type.cname and 
+            elif (self.cname == other_type.cname and
                     (self.template_type and other_type.template_type
                      or self.templates
                      or other_type.templates)):
-                if self.templates == other_type.templates: 
-                    return 1 
-                for t1, t2 in zip(self.templates, other_type.templates): 
+                if self.templates == other_type.templates:
+                    return 1
+                for t1, t2 in zip(self.templates, other_type.templates):
                     if is_optional_template_param(t1) and is_optional_template_param(t2):
                         break
-                    if not t1.same_as_resolved_type(t2): 
-                        return 0 
-                return 1 
-        return 0 
- 
-    def assignable_from_resolved_type(self, other_type): 
-        # TODO: handle operator=(...) here? 
-        if other_type is error_type: 
-            return True 
+                    if not t1.same_as_resolved_type(t2):
+                        return 0
+                return 1
+        return 0
+
+    def assignable_from_resolved_type(self, other_type):
+        # TODO: handle operator=(...) here?
+        if other_type is error_type:
+            return True
         elif other_type.is_cpp_class:
             return other_type.is_subclass(self)
         elif other_type.is_string and self.cname in cpp_string_conversions:
             return True
- 
-    def attributes_known(self): 
-        return self.scope is not None 
- 
-    def find_cpp_operation_type(self, operator, operand_type=None): 
-        operands = [self] 
-        if operand_type is not None: 
-            operands.append(operand_type) 
-        # pos == None => no errors 
-        operator_entry = self.scope.lookup_operator_for_types(None, operator, operands) 
-        if not operator_entry: 
-            return None 
-        func_type = operator_entry.type 
-        if func_type.is_ptr: 
-            func_type = func_type.base_type 
-        return func_type.return_type 
- 
+
+    def attributes_known(self):
+        return self.scope is not None
+
+    def find_cpp_operation_type(self, operator, operand_type=None):
+        operands = [self]
+        if operand_type is not None:
+            operands.append(operand_type)
+        # pos == None => no errors
+        operator_entry = self.scope.lookup_operator_for_types(None, operator, operands)
+        if not operator_entry:
+            return None
+        func_type = operator_entry.type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        return func_type.return_type
+
     def get_constructor(self, pos):
         constructor = self.scope.lookup('<init>')
         if constructor is not None:
@@ -3850,99 +3850,99 @@ class CppClassType(CType):
         func_type = CFuncType(self, [], exception_check='+', nogil=nogil)
         return self.scope.declare_cfunction(u'<init>', func_type, pos)
 
-    def check_nullary_constructor(self, pos, msg="stack allocated"): 
-        constructor = self.scope.lookup(u'<init>') 
-        if constructor is not None and best_match([], constructor.all_alternatives()) is None: 
-            error(pos, "C++ class must have a nullary constructor to be %s" % msg) 
- 
- 
-class TemplatePlaceholderType(CType): 
- 
+    def check_nullary_constructor(self, pos, msg="stack allocated"):
+        constructor = self.scope.lookup(u'<init>')
+        if constructor is not None and best_match([], constructor.all_alternatives()) is None:
+            error(pos, "C++ class must have a nullary constructor to be %s" % msg)
+
+
+class TemplatePlaceholderType(CType):
+
     def __init__(self, name, optional=False):
-        self.name = name 
+        self.name = name
         self.optional = optional
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if entity_code: 
-            return self.name + " " + entity_code 
-        else: 
-            return self.name 
- 
-    def specialize(self, values): 
-        if self in values: 
-            return values[self] 
-        else: 
-            return self 
- 
-    def deduce_template_params(self, actual): 
-        return {self: actual} 
- 
-    def same_as_resolved_type(self, other_type): 
-        if isinstance(other_type, TemplatePlaceholderType): 
-            return self.name == other_type.name 
-        else: 
-            return 0 
- 
-    def __hash__(self): 
-        return hash(self.name) 
- 
-    def __cmp__(self, other): 
-        if isinstance(other, TemplatePlaceholderType): 
-            return cmp(self.name, other.name) 
-        else: 
-            return cmp(type(self), type(other)) 
- 
-    def __eq__(self, other): 
-        if isinstance(other, TemplatePlaceholderType): 
-            return self.name == other.name 
-        else: 
-            return False 
- 
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if entity_code:
+            return self.name + " " + entity_code
+        else:
+            return self.name
+
+    def specialize(self, values):
+        if self in values:
+            return values[self]
+        else:
+            return self
+
+    def deduce_template_params(self, actual):
+        return {self: actual}
+
+    def same_as_resolved_type(self, other_type):
+        if isinstance(other_type, TemplatePlaceholderType):
+            return self.name == other_type.name
+        else:
+            return 0
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __cmp__(self, other):
+        if isinstance(other, TemplatePlaceholderType):
+            return cmp(self.name, other.name)
+        else:
+            return cmp(type(self), type(other))
+
+    def __eq__(self, other):
+        if isinstance(other, TemplatePlaceholderType):
+            return self.name == other.name
+        else:
+            return False
+
 def is_optional_template_param(type):
     return isinstance(type, TemplatePlaceholderType) and type.optional
 
 
 class CEnumType(CIntLike, CType):
-    #  name           string 
-    #  cname          string or None 
-    #  typedef_flag   boolean 
+    #  name           string
+    #  cname          string or None
+    #  typedef_flag   boolean
     #  values         [string], populated during declaration analysis
- 
-    is_enum = 1 
-    signed = 1 
-    rank = -1 # Ranks below any integer type 
- 
+
+    is_enum = 1
+    signed = 1
+    rank = -1 # Ranks below any integer type
+
     def __init__(self, name, cname, typedef_flag, namespace=None):
-        self.name = name 
-        self.cname = cname 
-        self.values = [] 
-        self.typedef_flag = typedef_flag 
+        self.name = name
+        self.cname = cname
+        self.values = []
+        self.typedef_flag = typedef_flag
         self.namespace = namespace
         self.default_value = "(%s) 0" % self.empty_declaration_code()
- 
-    def __str__(self): 
-        return self.name 
- 
-    def __repr__(self): 
-        return "<CEnumType %s %s%s>" % (self.name, self.cname, 
-            ("", " typedef")[self.typedef_flag]) 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        if pyrex or for_display: 
-            base_code = self.name 
-        else: 
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        return "<CEnumType %s %s%s>" % (self.name, self.cname,
+            ("", " typedef")[self.typedef_flag])
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            base_code = self.name
+        else:
             if self.namespace:
                 base_code = "%s::%s" % (
                     self.namespace.empty_declaration_code(), self.cname)
             elif self.typedef_flag:
-                base_code = self.cname 
-            else: 
-                base_code = "enum %s" % self.cname 
-            base_code = public_decl(base_code, dll_linkage) 
-        return self.base_declaration_code(base_code, entity_code) 
- 
+                base_code = self.cname
+            else:
+                base_code = "enum %s" % self.cname
+            base_code = public_decl(base_code, dll_linkage)
+        return self.base_declaration_code(base_code, entity_code)
+
     def specialize(self, values):
         if self.namespace:
             namespace = self.namespace.specialize(values)
@@ -4054,325 +4054,325 @@ def c_tuple_type(components):
     return tuple_type
 
 
-class UnspecifiedType(PyrexType): 
-    # Used as a placeholder until the type can be determined. 
- 
-    is_unspecified = 1 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        return "<unspecified>" 
- 
-    def same_as_resolved_type(self, other_type): 
-        return False 
- 
- 
-class ErrorType(PyrexType): 
-    # Used to prevent propagation of error messages. 
- 
-    is_error = 1 
-    exception_value = "0" 
-    exception_check    = 0 
-    to_py_function = "dummy" 
-    from_py_function = "dummy" 
- 
-    def create_to_py_utility_code(self, env): 
-        return True 
- 
-    def create_from_py_utility_code(self, env): 
-        return True 
- 
-    def declaration_code(self, entity_code, 
-            for_display = 0, dll_linkage = None, pyrex = 0): 
-        return "<error>" 
- 
-    def same_as_resolved_type(self, other_type): 
-        return 1 
- 
-    def error_condition(self, result_code): 
-        return "dummy" 
- 
- 
-rank_to_type_name = ( 
-    "char",         # 0 
-    "short",        # 1 
-    "int",          # 2 
-    "long",         # 3 
-    "PY_LONG_LONG", # 4 
-    "float",        # 5 
-    "double",       # 6 
-    "long double",  # 7 
-) 
- 
-_rank_to_type_name = list(rank_to_type_name) 
-RANK_INT  = _rank_to_type_name.index('int') 
-RANK_LONG = _rank_to_type_name.index('long') 
-RANK_FLOAT = _rank_to_type_name.index('float') 
-UNSIGNED = 0 
-SIGNED = 2 
- 
-error_type =    ErrorType() 
-unspecified_type = UnspecifiedType() 
- 
-py_object_type = PyObjectType() 
- 
-c_void_type =        CVoidType() 
- 
-c_uchar_type =       CIntType(0, UNSIGNED) 
-c_ushort_type =      CIntType(1, UNSIGNED) 
-c_uint_type =        CIntType(2, UNSIGNED) 
-c_ulong_type =       CIntType(3, UNSIGNED) 
-c_ulonglong_type =   CIntType(4, UNSIGNED) 
- 
-c_char_type =        CIntType(0) 
-c_short_type =       CIntType(1) 
-c_int_type =         CIntType(2) 
-c_long_type =        CIntType(3) 
-c_longlong_type =    CIntType(4) 
- 
-c_schar_type =       CIntType(0, SIGNED) 
-c_sshort_type =      CIntType(1, SIGNED) 
-c_sint_type =        CIntType(2, SIGNED) 
-c_slong_type =       CIntType(3, SIGNED) 
-c_slonglong_type =   CIntType(4, SIGNED) 
- 
-c_float_type =       CFloatType(5, math_h_modifier='f') 
-c_double_type =      CFloatType(6) 
-c_longdouble_type =  CFloatType(7, math_h_modifier='l') 
- 
-c_float_complex_type =      CComplexType(c_float_type) 
-c_double_complex_type =     CComplexType(c_double_type) 
-c_longdouble_complex_type = CComplexType(c_longdouble_type) 
- 
-c_anon_enum_type =   CAnonEnumType(-1) 
-c_returncode_type =  CReturnCodeType(RANK_INT) 
-c_bint_type =        CBIntType(RANK_INT) 
-c_py_unicode_type =  CPyUnicodeIntType(RANK_INT-0.5, UNSIGNED) 
-c_py_ucs4_type =     CPyUCS4IntType(RANK_LONG-0.5, UNSIGNED) 
-c_py_hash_t_type =   CPyHashTType(RANK_LONG+0.5, SIGNED) 
-c_py_ssize_t_type =  CPySSizeTType(RANK_LONG+0.5, SIGNED) 
-c_ssize_t_type =     CSSizeTType(RANK_LONG+0.5, SIGNED) 
-c_size_t_type =      CSizeTType(RANK_LONG+0.5, UNSIGNED) 
-c_ptrdiff_t_type =   CPtrdiffTType(RANK_LONG+0.75, SIGNED) 
- 
-c_null_ptr_type =     CNullPtrType(c_void_type) 
-c_void_ptr_type =     CPtrType(c_void_type) 
-c_void_ptr_ptr_type = CPtrType(c_void_ptr_type) 
-c_char_ptr_type =     CPtrType(c_char_type) 
+class UnspecifiedType(PyrexType):
+    # Used as a placeholder until the type can be determined.
+
+    is_unspecified = 1
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        return "<unspecified>"
+
+    def same_as_resolved_type(self, other_type):
+        return False
+
+
+class ErrorType(PyrexType):
+    # Used to prevent propagation of error messages.
+
+    is_error = 1
+    exception_value = "0"
+    exception_check    = 0
+    to_py_function = "dummy"
+    from_py_function = "dummy"
+
+    def create_to_py_utility_code(self, env):
+        return True
+
+    def create_from_py_utility_code(self, env):
+        return True
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        return "<error>"
+
+    def same_as_resolved_type(self, other_type):
+        return 1
+
+    def error_condition(self, result_code):
+        return "dummy"
+
+
+rank_to_type_name = (
+    "char",         # 0
+    "short",        # 1
+    "int",          # 2
+    "long",         # 3
+    "PY_LONG_LONG", # 4
+    "float",        # 5
+    "double",       # 6
+    "long double",  # 7
+)
+
+_rank_to_type_name = list(rank_to_type_name)
+RANK_INT  = _rank_to_type_name.index('int')
+RANK_LONG = _rank_to_type_name.index('long')
+RANK_FLOAT = _rank_to_type_name.index('float')
+UNSIGNED = 0
+SIGNED = 2
+
+error_type =    ErrorType()
+unspecified_type = UnspecifiedType()
+
+py_object_type = PyObjectType()
+
+c_void_type =        CVoidType()
+
+c_uchar_type =       CIntType(0, UNSIGNED)
+c_ushort_type =      CIntType(1, UNSIGNED)
+c_uint_type =        CIntType(2, UNSIGNED)
+c_ulong_type =       CIntType(3, UNSIGNED)
+c_ulonglong_type =   CIntType(4, UNSIGNED)
+
+c_char_type =        CIntType(0)
+c_short_type =       CIntType(1)
+c_int_type =         CIntType(2)
+c_long_type =        CIntType(3)
+c_longlong_type =    CIntType(4)
+
+c_schar_type =       CIntType(0, SIGNED)
+c_sshort_type =      CIntType(1, SIGNED)
+c_sint_type =        CIntType(2, SIGNED)
+c_slong_type =       CIntType(3, SIGNED)
+c_slonglong_type =   CIntType(4, SIGNED)
+
+c_float_type =       CFloatType(5, math_h_modifier='f')
+c_double_type =      CFloatType(6)
+c_longdouble_type =  CFloatType(7, math_h_modifier='l')
+
+c_float_complex_type =      CComplexType(c_float_type)
+c_double_complex_type =     CComplexType(c_double_type)
+c_longdouble_complex_type = CComplexType(c_longdouble_type)
+
+c_anon_enum_type =   CAnonEnumType(-1)
+c_returncode_type =  CReturnCodeType(RANK_INT)
+c_bint_type =        CBIntType(RANK_INT)
+c_py_unicode_type =  CPyUnicodeIntType(RANK_INT-0.5, UNSIGNED)
+c_py_ucs4_type =     CPyUCS4IntType(RANK_LONG-0.5, UNSIGNED)
+c_py_hash_t_type =   CPyHashTType(RANK_LONG+0.5, SIGNED)
+c_py_ssize_t_type =  CPySSizeTType(RANK_LONG+0.5, SIGNED)
+c_ssize_t_type =     CSSizeTType(RANK_LONG+0.5, SIGNED)
+c_size_t_type =      CSizeTType(RANK_LONG+0.5, UNSIGNED)
+c_ptrdiff_t_type =   CPtrdiffTType(RANK_LONG+0.75, SIGNED)
+
+c_null_ptr_type =     CNullPtrType(c_void_type)
+c_void_ptr_type =     CPtrType(c_void_type)
+c_void_ptr_ptr_type = CPtrType(c_void_ptr_type)
+c_char_ptr_type =     CPtrType(c_char_type)
 c_const_char_ptr_type = CPtrType(CConstType(c_char_type))
-c_uchar_ptr_type =    CPtrType(c_uchar_type) 
+c_uchar_ptr_type =    CPtrType(c_uchar_type)
 c_const_uchar_ptr_type = CPtrType(CConstType(c_uchar_type))
-c_char_ptr_ptr_type = CPtrType(c_char_ptr_type) 
-c_int_ptr_type =      CPtrType(c_int_type) 
-c_py_unicode_ptr_type = CPtrType(c_py_unicode_type) 
+c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
+c_int_ptr_type =      CPtrType(c_int_type)
+c_py_unicode_ptr_type = CPtrType(c_py_unicode_type)
 c_const_py_unicode_ptr_type = CPtrType(CConstType(c_py_unicode_type))
-c_py_ssize_t_ptr_type =  CPtrType(c_py_ssize_t_type) 
-c_ssize_t_ptr_type =  CPtrType(c_ssize_t_type) 
-c_size_t_ptr_type =  CPtrType(c_size_t_type) 
- 
-# GIL state 
-c_gilstate_type = CEnumType("PyGILState_STATE", "PyGILState_STATE", True) 
-c_threadstate_type = CStructOrUnionType("PyThreadState", "struct", None, 1, "PyThreadState") 
-c_threadstate_ptr_type = CPtrType(c_threadstate_type) 
- 
+c_py_ssize_t_ptr_type =  CPtrType(c_py_ssize_t_type)
+c_ssize_t_ptr_type =  CPtrType(c_ssize_t_type)
+c_size_t_ptr_type =  CPtrType(c_size_t_type)
+
+# GIL state
+c_gilstate_type = CEnumType("PyGILState_STATE", "PyGILState_STATE", True)
+c_threadstate_type = CStructOrUnionType("PyThreadState", "struct", None, 1, "PyThreadState")
+c_threadstate_ptr_type = CPtrType(c_threadstate_type)
+
 # PEP-539 "Py_tss_t" type
 c_pytss_t_type = CPyTSSTType()
 
-# the Py_buffer type is defined in Builtin.py 
-c_py_buffer_type = CStructOrUnionType("Py_buffer", "struct", None, 1, "Py_buffer") 
-c_py_buffer_ptr_type = CPtrType(c_py_buffer_type) 
- 
-# Not sure whether the unsigned versions and 'long long' should be in there 
-# long long requires C99 and might be slow, and would always get preferred 
-# when specialization happens through calling and not indexing 
-cy_integral_type = FusedType([c_short_type, c_int_type, c_long_type], 
-                             name="integral") 
-# Omitting long double as it might be slow 
-cy_floating_type = FusedType([c_float_type, c_double_type], name="floating") 
-cy_numeric_type = FusedType([c_short_type, 
-                             c_int_type, 
-                             c_long_type, 
-                             c_float_type, 
-                             c_double_type, 
-                             c_float_complex_type, 
-                             c_double_complex_type], name="numeric") 
- 
-# buffer-related structs 
-c_buf_diminfo_type =  CStructOrUnionType("__Pyx_Buf_DimInfo", "struct", 
-                                      None, 1, "__Pyx_Buf_DimInfo") 
-c_pyx_buffer_type = CStructOrUnionType("__Pyx_Buffer", "struct", None, 1, "__Pyx_Buffer") 
-c_pyx_buffer_ptr_type = CPtrType(c_pyx_buffer_type) 
-c_pyx_buffer_nd_type = CStructOrUnionType("__Pyx_LocalBuf_ND", "struct", 
-                                      None, 1, "__Pyx_LocalBuf_ND") 
- 
-cython_memoryview_type = CStructOrUnionType("__pyx_memoryview_obj", "struct", 
-                                      None, 0, "__pyx_memoryview_obj") 
- 
-memoryviewslice_type = CStructOrUnionType("memoryviewslice", "struct", 
-                                          None, 1, "__Pyx_memviewslice") 
- 
-modifiers_and_name_to_type = { 
-    #(signed, longness, name) : type 
-    (0,  0, "char"): c_uchar_type, 
-    (1,  0, "char"): c_char_type, 
-    (2,  0, "char"): c_schar_type, 
- 
-    (0, -1, "int"): c_ushort_type, 
-    (0,  0, "int"): c_uint_type, 
-    (0,  1, "int"): c_ulong_type, 
-    (0,  2, "int"): c_ulonglong_type, 
- 
-    (1, -1, "int"): c_short_type, 
-    (1,  0, "int"): c_int_type, 
-    (1,  1, "int"): c_long_type, 
-    (1,  2, "int"): c_longlong_type, 
- 
-    (2, -1, "int"): c_sshort_type, 
-    (2,  0, "int"): c_sint_type, 
-    (2,  1, "int"): c_slong_type, 
-    (2,  2, "int"): c_slonglong_type, 
- 
-    (1,  0, "float"):  c_float_type, 
-    (1,  0, "double"): c_double_type, 
-    (1,  1, "double"): c_longdouble_type, 
- 
-    (1,  0, "complex"):  c_double_complex_type,  # C: float, Python: double => Python wins 
-    (1,  0, "floatcomplex"):  c_float_complex_type, 
-    (1,  0, "doublecomplex"): c_double_complex_type, 
-    (1,  1, "doublecomplex"): c_longdouble_complex_type, 
- 
-    # 
-    (1,  0, "void"): c_void_type, 
+# the Py_buffer type is defined in Builtin.py
+c_py_buffer_type = CStructOrUnionType("Py_buffer", "struct", None, 1, "Py_buffer")
+c_py_buffer_ptr_type = CPtrType(c_py_buffer_type)
+
+# Not sure whether the unsigned versions and 'long long' should be in there
+# long long requires C99 and might be slow, and would always get preferred
+# when specialization happens through calling and not indexing
+cy_integral_type = FusedType([c_short_type, c_int_type, c_long_type],
+                             name="integral")
+# Omitting long double as it might be slow
+cy_floating_type = FusedType([c_float_type, c_double_type], name="floating")
+cy_numeric_type = FusedType([c_short_type,
+                             c_int_type,
+                             c_long_type,
+                             c_float_type,
+                             c_double_type,
+                             c_float_complex_type,
+                             c_double_complex_type], name="numeric")
+
+# buffer-related structs
+c_buf_diminfo_type =  CStructOrUnionType("__Pyx_Buf_DimInfo", "struct",
+                                      None, 1, "__Pyx_Buf_DimInfo")
+c_pyx_buffer_type = CStructOrUnionType("__Pyx_Buffer", "struct", None, 1, "__Pyx_Buffer")
+c_pyx_buffer_ptr_type = CPtrType(c_pyx_buffer_type)
+c_pyx_buffer_nd_type = CStructOrUnionType("__Pyx_LocalBuf_ND", "struct",
+                                      None, 1, "__Pyx_LocalBuf_ND")
+
+cython_memoryview_type = CStructOrUnionType("__pyx_memoryview_obj", "struct",
+                                      None, 0, "__pyx_memoryview_obj")
+
+memoryviewslice_type = CStructOrUnionType("memoryviewslice", "struct",
+                                          None, 1, "__Pyx_memviewslice")
+
+modifiers_and_name_to_type = {
+    #(signed, longness, name) : type
+    (0,  0, "char"): c_uchar_type,
+    (1,  0, "char"): c_char_type,
+    (2,  0, "char"): c_schar_type,
+
+    (0, -1, "int"): c_ushort_type,
+    (0,  0, "int"): c_uint_type,
+    (0,  1, "int"): c_ulong_type,
+    (0,  2, "int"): c_ulonglong_type,
+
+    (1, -1, "int"): c_short_type,
+    (1,  0, "int"): c_int_type,
+    (1,  1, "int"): c_long_type,
+    (1,  2, "int"): c_longlong_type,
+
+    (2, -1, "int"): c_sshort_type,
+    (2,  0, "int"): c_sint_type,
+    (2,  1, "int"): c_slong_type,
+    (2,  2, "int"): c_slonglong_type,
+
+    (1,  0, "float"):  c_float_type,
+    (1,  0, "double"): c_double_type,
+    (1,  1, "double"): c_longdouble_type,
+
+    (1,  0, "complex"):  c_double_complex_type,  # C: float, Python: double => Python wins
+    (1,  0, "floatcomplex"):  c_float_complex_type,
+    (1,  0, "doublecomplex"): c_double_complex_type,
+    (1,  1, "doublecomplex"): c_longdouble_complex_type,
+
+    #
+    (1,  0, "void"): c_void_type,
     (1,  0, "Py_tss_t"): c_pytss_t_type,
- 
-    (1,  0, "bint"):       c_bint_type, 
-    (0,  0, "Py_UNICODE"): c_py_unicode_type, 
-    (0,  0, "Py_UCS4"):    c_py_ucs4_type, 
-    (2,  0, "Py_hash_t"):  c_py_hash_t_type, 
-    (2,  0, "Py_ssize_t"): c_py_ssize_t_type, 
-    (2,  0, "ssize_t") :   c_ssize_t_type, 
-    (0,  0, "size_t") :    c_size_t_type, 
-    (2,  0, "ptrdiff_t") : c_ptrdiff_t_type, 
- 
-    (1,  0, "object"): py_object_type, 
-} 
- 
-def is_promotion(src_type, dst_type): 
-    # It's hard to find a hard definition of promotion, but empirical 
-    # evidence suggests that the below is all that's allowed. 
-    if src_type.is_numeric: 
-        if dst_type.same_as(c_int_type): 
-            unsigned = (not src_type.signed) 
-            return (src_type.is_enum or 
-                    (src_type.is_int and 
-                     unsigned + src_type.rank < dst_type.rank)) 
-        elif dst_type.same_as(c_double_type): 
-            return src_type.is_float and src_type.rank <= dst_type.rank 
-    return False 
- 
+
+    (1,  0, "bint"):       c_bint_type,
+    (0,  0, "Py_UNICODE"): c_py_unicode_type,
+    (0,  0, "Py_UCS4"):    c_py_ucs4_type,
+    (2,  0, "Py_hash_t"):  c_py_hash_t_type,
+    (2,  0, "Py_ssize_t"): c_py_ssize_t_type,
+    (2,  0, "ssize_t") :   c_ssize_t_type,
+    (0,  0, "size_t") :    c_size_t_type,
+    (2,  0, "ptrdiff_t") : c_ptrdiff_t_type,
+
+    (1,  0, "object"): py_object_type,
+}
+
+def is_promotion(src_type, dst_type):
+    # It's hard to find a hard definition of promotion, but empirical
+    # evidence suggests that the below is all that's allowed.
+    if src_type.is_numeric:
+        if dst_type.same_as(c_int_type):
+            unsigned = (not src_type.signed)
+            return (src_type.is_enum or
+                    (src_type.is_int and
+                     unsigned + src_type.rank < dst_type.rank))
+        elif dst_type.same_as(c_double_type):
+            return src_type.is_float and src_type.rank <= dst_type.rank
+    return False
+
 def best_match(arg_types, functions, pos=None, env=None, args=None):
-    """ 
-    Given a list args of arguments and a list of functions, choose one 
-    to call which seems to be the "best" fit for this list of arguments. 
-    This function is used, e.g., when deciding which overloaded method 
-    to dispatch for C++ classes. 
- 
-    We first eliminate functions based on arity, and if only one 
-    function has the correct arity, we return it. Otherwise, we weight 
-    functions based on how much work must be done to convert the 
-    arguments, with the following priorities: 
-      * identical types or pointers to identical types 
-      * promotions 
-      * non-Python types 
-    That is, we prefer functions where no arguments need converted, 
-    and failing that, functions where only promotions are required, and 
-    so on. 
- 
-    If no function is deemed a good fit, or if two or more functions have 
-    the same weight, we return None (as there is no best match). If pos 
-    is not None, we also generate an error. 
-    """ 
-    # TODO: args should be a list of types, not a list of Nodes. 
+    """
+    Given a list args of arguments and a list of functions, choose one
+    to call which seems to be the "best" fit for this list of arguments.
+    This function is used, e.g., when deciding which overloaded method
+    to dispatch for C++ classes.
+
+    We first eliminate functions based on arity, and if only one
+    function has the correct arity, we return it. Otherwise, we weight
+    functions based on how much work must be done to convert the
+    arguments, with the following priorities:
+      * identical types or pointers to identical types
+      * promotions
+      * non-Python types
+    That is, we prefer functions where no arguments need converted,
+    and failing that, functions where only promotions are required, and
+    so on.
+
+    If no function is deemed a good fit, or if two or more functions have
+    the same weight, we return None (as there is no best match). If pos
+    is not None, we also generate an error.
+    """
+    # TODO: args should be a list of types, not a list of Nodes.
     actual_nargs = len(arg_types)
- 
-    candidates = [] 
-    errors = [] 
-    for func in functions: 
-        error_mesg = "" 
-        func_type = func.type 
-        if func_type.is_ptr: 
-            func_type = func_type.base_type 
-        # Check function type 
-        if not func_type.is_cfunction: 
-            if not func_type.is_error and pos is not None: 
-                error_mesg = "Calling non-function type '%s'" % func_type 
-            errors.append((func, error_mesg)) 
-            continue 
-        # Check no. of args 
-        max_nargs = len(func_type.args) 
-        min_nargs = max_nargs - func_type.optional_arg_count 
-        if actual_nargs < min_nargs or \ 
-            (not func_type.has_varargs and actual_nargs > max_nargs): 
-            if max_nargs == min_nargs and not func_type.has_varargs: 
-                expectation = max_nargs 
-            elif actual_nargs < min_nargs: 
-                expectation = "at least %s" % min_nargs 
-            else: 
-                expectation = "at most %s" % max_nargs 
-            error_mesg = "Call with wrong number of arguments (expected %s, got %s)" \ 
-                         % (expectation, actual_nargs) 
-            errors.append((func, error_mesg)) 
-            continue 
-        if func_type.templates: 
-            deductions = reduce( 
-                merge_template_deductions, 
-                [pattern.type.deduce_template_params(actual) for (pattern, actual) in zip(func_type.args, arg_types)], 
-                {}) 
-            if deductions is None: 
+
+    candidates = []
+    errors = []
+    for func in functions:
+        error_mesg = ""
+        func_type = func.type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        # Check function type
+        if not func_type.is_cfunction:
+            if not func_type.is_error and pos is not None:
+                error_mesg = "Calling non-function type '%s'" % func_type
+            errors.append((func, error_mesg))
+            continue
+        # Check no. of args
+        max_nargs = len(func_type.args)
+        min_nargs = max_nargs - func_type.optional_arg_count
+        if actual_nargs < min_nargs or \
+            (not func_type.has_varargs and actual_nargs > max_nargs):
+            if max_nargs == min_nargs and not func_type.has_varargs:
+                expectation = max_nargs
+            elif actual_nargs < min_nargs:
+                expectation = "at least %s" % min_nargs
+            else:
+                expectation = "at most %s" % max_nargs
+            error_mesg = "Call with wrong number of arguments (expected %s, got %s)" \
+                         % (expectation, actual_nargs)
+            errors.append((func, error_mesg))
+            continue
+        if func_type.templates:
+            deductions = reduce(
+                merge_template_deductions,
+                [pattern.type.deduce_template_params(actual) for (pattern, actual) in zip(func_type.args, arg_types)],
+                {})
+            if deductions is None:
                 errors.append((func, "Unable to deduce type parameters for %s given (%s)" % (func_type, ', '.join(map(str, arg_types)))))
-            elif len(deductions) < len(func_type.templates): 
-                errors.append((func, "Unable to deduce type parameter %s" % ( 
-                    ", ".join([param.name for param in set(func_type.templates) - set(deductions.keys())])))) 
-            else: 
-                type_list = [deductions[param] for param in func_type.templates] 
-                from .Symtab import Entry 
-                specialization = Entry( 
-                    name = func.name + "[%s]" % ",".join([str(t) for t in type_list]), 
+            elif len(deductions) < len(func_type.templates):
+                errors.append((func, "Unable to deduce type parameter %s" % (
+                    ", ".join([param.name for param in set(func_type.templates) - set(deductions.keys())]))))
+            else:
+                type_list = [deductions[param] for param in func_type.templates]
+                from .Symtab import Entry
+                specialization = Entry(
+                    name = func.name + "[%s]" % ",".join([str(t) for t in type_list]),
                     cname = func.cname + "<%s>" % ",".join([t.empty_declaration_code() for t in type_list]),
-                    type = func_type.specialize(deductions), 
-                    pos = func.pos) 
-                candidates.append((specialization, specialization.type)) 
-        else: 
-            candidates.append((func, func_type)) 
- 
-    # Optimize the most common case of no overloading... 
-    if len(candidates) == 1: 
-        return candidates[0][0] 
-    elif len(candidates) == 0: 
-        if pos is not None: 
-            func, errmsg = errors[0] 
-            if len(errors) == 1 or [1 for func, e in errors if e == errmsg]: 
-                error(pos, errmsg) 
-            else: 
-                error(pos, "no suitable method found") 
-        return None 
- 
-    possibilities = [] 
-    bad_types = [] 
-    needed_coercions = {} 
- 
-    for index, (func, func_type) in enumerate(candidates): 
+                    type = func_type.specialize(deductions),
+                    pos = func.pos)
+                candidates.append((specialization, specialization.type))
+        else:
+            candidates.append((func, func_type))
+
+    # Optimize the most common case of no overloading...
+    if len(candidates) == 1:
+        return candidates[0][0]
+    elif len(candidates) == 0:
+        if pos is not None:
+            func, errmsg = errors[0]
+            if len(errors) == 1 or [1 for func, e in errors if e == errmsg]:
+                error(pos, errmsg)
+            else:
+                error(pos, "no suitable method found")
+        return None
+
+    possibilities = []
+    bad_types = []
+    needed_coercions = {}
+
+    for index, (func, func_type) in enumerate(candidates):
         score = [0,0,0,0,0,0,0]
         for i in range(min(actual_nargs, len(func_type.args))):
             src_type = arg_types[i]
-            dst_type = func_type.args[i].type 
- 
-            assignable = dst_type.assignable_from(src_type) 
- 
+            dst_type = func_type.args[i].type
+
+            assignable = dst_type.assignable_from(src_type)
+
             # Now take care of unprefixed string literals. So when you call a cdef
-            # function that takes a char *, the coercion will mean that the 
-            # type will simply become bytes. We need to do this coercion 
-            # manually for overloaded and fused functions 
+            # function that takes a char *, the coercion will mean that the
+            # type will simply become bytes. We need to do this coercion
+            # manually for overloaded and fused functions
             if not assignable:
                 c_src_type = None
                 if src_type.is_pyobject:
@@ -4382,24 +4382,24 @@ def best_match(arg_types, functions, pos=None, env=None, args=None):
                         c_src_type = src_type.default_coerced_ctype()
                 elif src_type.is_pythran_expr:
                         c_src_type = src_type.org_buffer
- 
+
                 if c_src_type is not None:
-                    assignable = dst_type.assignable_from(c_src_type) 
-                    if assignable: 
-                        src_type = c_src_type 
+                    assignable = dst_type.assignable_from(c_src_type)
+                    if assignable:
+                        src_type = c_src_type
                         needed_coercions[func] = (i, dst_type)
- 
-            if assignable: 
-                if src_type == dst_type or dst_type.same_as(src_type): 
+
+            if assignable:
+                if src_type == dst_type or dst_type.same_as(src_type):
                     pass  # score 0
-                elif func_type.is_strict_signature: 
+                elif func_type.is_strict_signature:
                     break  # exact match requested but not found
-                elif is_promotion(src_type, dst_type): 
-                    score[2] += 1 
-                elif ((src_type.is_int and dst_type.is_int) or 
-                      (src_type.is_float and dst_type.is_float)): 
-                    score[2] += abs(dst_type.rank + (not dst_type.signed) - 
-                                    (src_type.rank + (not src_type.signed))) + 1 
+                elif is_promotion(src_type, dst_type):
+                    score[2] += 1
+                elif ((src_type.is_int and dst_type.is_int) or
+                      (src_type.is_float and dst_type.is_float)):
+                    score[2] += abs(dst_type.rank + (not dst_type.signed) -
+                                    (src_type.rank + (not src_type.signed))) + 1
                 elif dst_type.is_ptr and src_type.is_ptr:
                     if dst_type.base_type == c_void_type:
                         score[4] += 1
@@ -4407,57 +4407,57 @@ def best_match(arg_types, functions, pos=None, env=None, args=None):
                         score[6] += src_type.base_type.subclass_dist(dst_type.base_type)
                     else:
                         score[5] += 1
-                elif not src_type.is_pyobject: 
-                    score[1] += 1 
-                else: 
-                    score[0] += 1 
-            else: 
+                elif not src_type.is_pyobject:
+                    score[1] += 1
+                else:
+                    score[0] += 1
+            else:
                 error_mesg = "Invalid conversion from '%s' to '%s'" % (src_type, dst_type)
-                bad_types.append((func, error_mesg)) 
-                break 
-        else: 
+                bad_types.append((func, error_mesg))
+                break
+        else:
             possibilities.append((score, index, func))  # so we can sort it
- 
-    if possibilities: 
-        possibilities.sort() 
-        if len(possibilities) > 1: 
-            score1 = possibilities[0][0] 
-            score2 = possibilities[1][0] 
-            if score1 == score2: 
-                if pos is not None: 
-                    error(pos, "ambiguous overloaded method") 
-                return None 
- 
-        function = possibilities[0][-1] 
- 
-        if function in needed_coercions and env: 
-            arg_i, coerce_to_type = needed_coercions[function] 
-            args[arg_i] = args[arg_i].coerce_to(coerce_to_type, env) 
- 
-        return function 
- 
-    if pos is not None: 
-        if len(bad_types) == 1: 
-            error(pos, bad_types[0][1]) 
-        else: 
-            error(pos, "no suitable method found") 
- 
-    return None 
- 
-def merge_template_deductions(a, b): 
-    if a is None or b is None: 
-        return None 
-    all = a 
+
+    if possibilities:
+        possibilities.sort()
+        if len(possibilities) > 1:
+            score1 = possibilities[0][0]
+            score2 = possibilities[1][0]
+            if score1 == score2:
+                if pos is not None:
+                    error(pos, "ambiguous overloaded method")
+                return None
+
+        function = possibilities[0][-1]
+
+        if function in needed_coercions and env:
+            arg_i, coerce_to_type = needed_coercions[function]
+            args[arg_i] = args[arg_i].coerce_to(coerce_to_type, env)
+
+        return function
+
+    if pos is not None:
+        if len(bad_types) == 1:
+            error(pos, bad_types[0][1])
+        else:
+            error(pos, "no suitable method found")
+
+    return None
+
+def merge_template_deductions(a, b):
+    if a is None or b is None:
+        return None
+    all = a
     for param, value in b.items():
-        if param in all: 
-            if a[param] != b[param]: 
-                return None 
-        else: 
-            all[param] = value 
-    return all 
- 
- 
-def widest_numeric_type(type1, type2): 
+        if param in all:
+            if a[param] != b[param]:
+                return None
+        else:
+            all[param] = value
+    return all
+
+
+def widest_numeric_type(type1, type2):
     """Given two numeric types, return the narrowest type encompassing both of them.
     """
     if type1.is_reference:
@@ -4468,124 +4468,124 @@ def widest_numeric_type(type1, type2):
         type1 = type1.const_base_type
     if type2.is_const:
         type2 = type2.const_base_type
-    if type1 == type2: 
-        widest_type = type1 
-    elif type1.is_complex or type2.is_complex: 
-        def real_type(ntype): 
-            if ntype.is_complex: 
-                return ntype.real_type 
-            return ntype 
-        widest_type = CComplexType( 
-            widest_numeric_type( 
-                real_type(type1), 
-                real_type(type2))) 
-    elif type1.is_enum and type2.is_enum: 
-        widest_type = c_int_type 
-    elif type1.rank < type2.rank: 
-        widest_type = type2 
-    elif type1.rank > type2.rank: 
-        widest_type = type1 
-    elif type1.signed < type2.signed: 
-        widest_type = type1 
+    if type1 == type2:
+        widest_type = type1
+    elif type1.is_complex or type2.is_complex:
+        def real_type(ntype):
+            if ntype.is_complex:
+                return ntype.real_type
+            return ntype
+        widest_type = CComplexType(
+            widest_numeric_type(
+                real_type(type1),
+                real_type(type2)))
+    elif type1.is_enum and type2.is_enum:
+        widest_type = c_int_type
+    elif type1.rank < type2.rank:
+        widest_type = type2
+    elif type1.rank > type2.rank:
+        widest_type = type1
+    elif type1.signed < type2.signed:
+        widest_type = type1
     elif type1.signed > type2.signed:
         widest_type = type2
     elif type1.is_typedef > type2.is_typedef:
         widest_type = type1
-    else: 
-        widest_type = type2 
-    return widest_type 
- 
- 
-def numeric_type_fits(small_type, large_type): 
-    return widest_numeric_type(small_type, large_type) == large_type 
- 
- 
-def independent_spanning_type(type1, type2): 
-    # Return a type assignable independently from both type1 and 
-    # type2, but do not require any interoperability between the two. 
-    # For example, in "True * 2", it is safe to assume an integer 
-    # result type (so spanning_type() will do the right thing), 
-    # whereas "x = True or 2" must evaluate to a type that can hold 
-    # both a boolean value and an integer, so this function works 
-    # better. 
+    else:
+        widest_type = type2
+    return widest_type
+
+
+def numeric_type_fits(small_type, large_type):
+    return widest_numeric_type(small_type, large_type) == large_type
+
+
+def independent_spanning_type(type1, type2):
+    # Return a type assignable independently from both type1 and
+    # type2, but do not require any interoperability between the two.
+    # For example, in "True * 2", it is safe to assume an integer
+    # result type (so spanning_type() will do the right thing),
+    # whereas "x = True or 2" must evaluate to a type that can hold
+    # both a boolean value and an integer, so this function works
+    # better.
     if type1.is_reference ^ type2.is_reference:
         if type1.is_reference:
             type1 = type1.ref_base_type
         else:
             type2 = type2.ref_base_type
-    if type1 == type2: 
-        return type1 
-    elif (type1 is c_bint_type or type2 is c_bint_type) and (type1.is_numeric and type2.is_numeric): 
-        # special case: if one of the results is a bint and the other 
-        # is another C integer, we must prevent returning a numeric 
-        # type so that we do not lose the ability to coerce to a 
-        # Python bool if we have to. 
-        return py_object_type 
-    span_type = _spanning_type(type1, type2) 
-    if span_type is None: 
-        return error_type 
-    return span_type 
- 
-def spanning_type(type1, type2): 
-    # Return a type assignable from both type1 and type2, or 
-    # py_object_type if no better type is found.  Assumes that the 
-    # code that calls this will try a coercion afterwards, which will 
-    # fail if the types cannot actually coerce to a py_object_type. 
-    if type1 == type2: 
-        return type1 
-    elif type1 is py_object_type or type2 is py_object_type: 
-        return py_object_type 
-    elif type1 is c_py_unicode_type or type2 is c_py_unicode_type: 
-        # Py_UNICODE behaves more like a string than an int 
-        return py_object_type 
-    span_type = _spanning_type(type1, type2) 
-    if span_type is None: 
-        return py_object_type 
-    return span_type 
- 
-def _spanning_type(type1, type2): 
-    if type1.is_numeric and type2.is_numeric: 
-        return widest_numeric_type(type1, type2) 
-    elif type1.is_builtin_type and type1.name == 'float' and type2.is_numeric: 
-        return widest_numeric_type(c_double_type, type2) 
-    elif type2.is_builtin_type and type2.name == 'float' and type1.is_numeric: 
-        return widest_numeric_type(type1, c_double_type) 
-    elif type1.is_extension_type and type2.is_extension_type: 
-        return widest_extension_type(type1, type2) 
-    elif type1.is_pyobject or type2.is_pyobject: 
-        return py_object_type 
-    elif type1.assignable_from(type2): 
-        if type1.is_extension_type and type1.typeobj_is_imported(): 
-            # external types are unsafe, so we use PyObject instead 
-            return py_object_type 
-        return type1 
-    elif type2.assignable_from(type1): 
-        if type2.is_extension_type and type2.typeobj_is_imported(): 
-            # external types are unsafe, so we use PyObject instead 
-            return py_object_type 
-        return type2 
-    elif type1.is_ptr and type2.is_ptr: 
+    if type1 == type2:
+        return type1
+    elif (type1 is c_bint_type or type2 is c_bint_type) and (type1.is_numeric and type2.is_numeric):
+        # special case: if one of the results is a bint and the other
+        # is another C integer, we must prevent returning a numeric
+        # type so that we do not lose the ability to coerce to a
+        # Python bool if we have to.
+        return py_object_type
+    span_type = _spanning_type(type1, type2)
+    if span_type is None:
+        return error_type
+    return span_type
+
+def spanning_type(type1, type2):
+    # Return a type assignable from both type1 and type2, or
+    # py_object_type if no better type is found.  Assumes that the
+    # code that calls this will try a coercion afterwards, which will
+    # fail if the types cannot actually coerce to a py_object_type.
+    if type1 == type2:
+        return type1
+    elif type1 is py_object_type or type2 is py_object_type:
+        return py_object_type
+    elif type1 is c_py_unicode_type or type2 is c_py_unicode_type:
+        # Py_UNICODE behaves more like a string than an int
+        return py_object_type
+    span_type = _spanning_type(type1, type2)
+    if span_type is None:
+        return py_object_type
+    return span_type
+
+def _spanning_type(type1, type2):
+    if type1.is_numeric and type2.is_numeric:
+        return widest_numeric_type(type1, type2)
+    elif type1.is_builtin_type and type1.name == 'float' and type2.is_numeric:
+        return widest_numeric_type(c_double_type, type2)
+    elif type2.is_builtin_type and type2.name == 'float' and type1.is_numeric:
+        return widest_numeric_type(type1, c_double_type)
+    elif type1.is_extension_type and type2.is_extension_type:
+        return widest_extension_type(type1, type2)
+    elif type1.is_pyobject or type2.is_pyobject:
+        return py_object_type
+    elif type1.assignable_from(type2):
+        if type1.is_extension_type and type1.typeobj_is_imported():
+            # external types are unsafe, so we use PyObject instead
+            return py_object_type
+        return type1
+    elif type2.assignable_from(type1):
+        if type2.is_extension_type and type2.typeobj_is_imported():
+            # external types are unsafe, so we use PyObject instead
+            return py_object_type
+        return type2
+    elif type1.is_ptr and type2.is_ptr:
         if type1.base_type.is_cpp_class and type2.base_type.is_cpp_class:
             common_base = widest_cpp_type(type1.base_type, type2.base_type)
             if common_base:
                 return CPtrType(common_base)
-        # incompatible pointers, void* will do as a result 
-        return c_void_ptr_type 
-    else: 
-        return None 
- 
-def widest_extension_type(type1, type2): 
-    if type1.typeobj_is_imported() or type2.typeobj_is_imported(): 
-        return py_object_type 
-    while True: 
-        if type1.subtype_of(type2): 
-            return type2 
-        elif type2.subtype_of(type1): 
-            return type1 
-        type1, type2 = type1.base_type, type2.base_type 
-        if type1 is None or type2 is None: 
-            return py_object_type 
- 
+        # incompatible pointers, void* will do as a result
+        return c_void_ptr_type
+    else:
+        return None
+
+def widest_extension_type(type1, type2):
+    if type1.typeobj_is_imported() or type2.typeobj_is_imported():
+        return py_object_type
+    while True:
+        if type1.subtype_of(type2):
+            return type2
+        elif type2.subtype_of(type1):
+            return type1
+        type1, type2 = type1.base_type, type2.base_type
+        if type1 is None or type2 is None:
+            return py_object_type
+
 def widest_cpp_type(type1, type2):
     @cached_function
     def bases(type):
@@ -4604,109 +4604,109 @@ def widest_cpp_type(type1, type2):
         return None
 
 
-def simple_c_type(signed, longness, name): 
-    # Find type descriptor for simple type given name and modifiers. 
-    # Returns None if arguments don't make sense. 
-    return modifiers_and_name_to_type.get((signed, longness, name)) 
- 
-def parse_basic_type(name): 
-    base = None 
-    if name.startswith('p_'): 
-        base = parse_basic_type(name[2:]) 
-    elif name.startswith('p'): 
-        base = parse_basic_type(name[1:]) 
-    elif name.endswith('*'): 
-        base = parse_basic_type(name[:-1]) 
-    if base: 
-        return CPtrType(base) 
-    # 
-    basic_type = simple_c_type(1, 0, name) 
-    if basic_type: 
-        return basic_type 
-    # 
-    signed = 1 
-    longness = 0 
-    if name == 'Py_UNICODE': 
-        signed = 0 
-    elif name == 'Py_UCS4': 
-        signed = 0 
-    elif name == 'Py_hash_t': 
-        signed = 2 
-    elif name == 'Py_ssize_t': 
-        signed = 2 
-    elif name == 'ssize_t': 
-        signed = 2 
-    elif name == 'size_t': 
-        signed = 0 
-    else: 
-        if name.startswith('u'): 
-            name = name[1:] 
-            signed = 0 
-        elif (name.startswith('s') and 
-              not name.startswith('short')): 
-            name = name[1:] 
-            signed = 2 
-        longness = 0 
-        while name.startswith('short'): 
-            name = name.replace('short', '', 1).strip() 
-            longness -= 1 
-        while name.startswith('long'): 
-            name = name.replace('long', '', 1).strip() 
-            longness += 1 
-        if longness != 0 and not name: 
-            name = 'int' 
-    return simple_c_type(signed, longness, name) 
- 
-def c_array_type(base_type, size): 
-    # Construct a C array type. 
-    if base_type is error_type: 
-        return error_type 
-    else: 
-        return CArrayType(base_type, size) 
- 
-def c_ptr_type(base_type): 
-    # Construct a C pointer type. 
-    if base_type is error_type: 
-        return error_type 
+def simple_c_type(signed, longness, name):
+    # Find type descriptor for simple type given name and modifiers.
+    # Returns None if arguments don't make sense.
+    return modifiers_and_name_to_type.get((signed, longness, name))
+
+def parse_basic_type(name):
+    base = None
+    if name.startswith('p_'):
+        base = parse_basic_type(name[2:])
+    elif name.startswith('p'):
+        base = parse_basic_type(name[1:])
+    elif name.endswith('*'):
+        base = parse_basic_type(name[:-1])
+    if base:
+        return CPtrType(base)
+    #
+    basic_type = simple_c_type(1, 0, name)
+    if basic_type:
+        return basic_type
+    #
+    signed = 1
+    longness = 0
+    if name == 'Py_UNICODE':
+        signed = 0
+    elif name == 'Py_UCS4':
+        signed = 0
+    elif name == 'Py_hash_t':
+        signed = 2
+    elif name == 'Py_ssize_t':
+        signed = 2
+    elif name == 'ssize_t':
+        signed = 2
+    elif name == 'size_t':
+        signed = 0
+    else:
+        if name.startswith('u'):
+            name = name[1:]
+            signed = 0
+        elif (name.startswith('s') and
+              not name.startswith('short')):
+            name = name[1:]
+            signed = 2
+        longness = 0
+        while name.startswith('short'):
+            name = name.replace('short', '', 1).strip()
+            longness -= 1
+        while name.startswith('long'):
+            name = name.replace('long', '', 1).strip()
+            longness += 1
+        if longness != 0 and not name:
+            name = 'int'
+    return simple_c_type(signed, longness, name)
+
+def c_array_type(base_type, size):
+    # Construct a C array type.
+    if base_type is error_type:
+        return error_type
+    else:
+        return CArrayType(base_type, size)
+
+def c_ptr_type(base_type):
+    # Construct a C pointer type.
+    if base_type is error_type:
+        return error_type
     elif base_type.is_reference:
         return CPtrType(base_type.ref_base_type)
-    else: 
-        return CPtrType(base_type) 
- 
-def c_ref_type(base_type): 
-    # Construct a C reference type 
-    if base_type is error_type: 
-        return error_type 
-    else: 
-        return CReferenceType(base_type) 
- 
-def c_const_type(base_type): 
-    # Construct a C const type. 
-    if base_type is error_type: 
-        return error_type 
-    else: 
-        return CConstType(base_type) 
- 
-def same_type(type1, type2): 
-    return type1.same_as(type2) 
- 
-def assignable_from(type1, type2): 
-    return type1.assignable_from(type2) 
- 
-def typecast(to_type, from_type, expr_code): 
-    #  Return expr_code cast to a C type which can be 
-    #  assigned to to_type, assuming its existing C type 
-    #  is from_type. 
-    if (to_type is from_type or 
-            (not to_type.is_pyobject and assignable_from(to_type, from_type))): 
-        return expr_code 
-    elif (to_type is py_object_type and from_type and 
-            from_type.is_builtin_type and from_type.name != 'type'): 
-        # no cast needed, builtins are PyObject* already 
-        return expr_code 
-    else: 
-        #print "typecast: to", to_type, "from", from_type ### 
-        return to_type.cast_code(expr_code) 
+    else:
+        return CPtrType(base_type)
+
+def c_ref_type(base_type):
+    # Construct a C reference type
+    if base_type is error_type:
+        return error_type
+    else:
+        return CReferenceType(base_type)
+
+def c_const_type(base_type):
+    # Construct a C const type.
+    if base_type is error_type:
+        return error_type
+    else:
+        return CConstType(base_type)
+
+def same_type(type1, type2):
+    return type1.same_as(type2)
+
+def assignable_from(type1, type2):
+    return type1.assignable_from(type2)
+
+def typecast(to_type, from_type, expr_code):
+    #  Return expr_code cast to a C type which can be
+    #  assigned to to_type, assuming its existing C type
+    #  is from_type.
+    if (to_type is from_type or
+            (not to_type.is_pyobject and assignable_from(to_type, from_type))):
+        return expr_code
+    elif (to_type is py_object_type and from_type and
+            from_type.is_builtin_type and from_type.name != 'type'):
+        # no cast needed, builtins are PyObject* already
+        return expr_code
+    else:
+        #print "typecast: to", to_type, "from", from_type ###
+        return to_type.cast_code(expr_code)
 
 def type_list_identifier(types):
     return cap_length('__and_'.join(type_identifier(type) for type in types))
diff --git a/contrib/tools/cython/Cython/Compiler/Scanning.pxd b/contrib/tools/cython/Cython/Compiler/Scanning.pxd
index 4803025e5b..59593f88a2 100644
--- a/contrib/tools/cython/Cython/Compiler/Scanning.pxd
+++ b/contrib/tools/cython/Cython/Compiler/Scanning.pxd
@@ -1,62 +1,62 @@
-from __future__ import absolute_import 
- 
-import cython 
- 
-from ..Plex.Scanners cimport Scanner 
- 
+from __future__ import absolute_import
+
+import cython
+
+from ..Plex.Scanners cimport Scanner
+
 cdef unicode any_string_prefix, IDENT
 
 cdef get_lexicon()
 cdef initial_compile_time_env()
 
-cdef class Method: 
-    cdef object name 
+cdef class Method:
+    cdef object name
     cdef dict kwargs
     cdef readonly object __name__  # for tracing the scanner
- 
+
 ## methods commented with '##' out are used by Parsing.py when compiled.
 
 @cython.final
-cdef class CompileTimeScope: 
-    cdef public dict entries 
-    cdef public CompileTimeScope outer 
+cdef class CompileTimeScope:
+    cdef public dict entries
+    cdef public CompileTimeScope outer
     ##cdef declare(self, name, value)
     ##cdef lookup_here(self, name)
     ##cpdef lookup(self, name)
- 
+
 @cython.final
-cdef class PyrexScanner(Scanner): 
-    cdef public context 
-    cdef public list included_files 
-    cdef public CompileTimeScope compile_time_env 
-    cdef public bint compile_time_eval 
-    cdef public bint compile_time_expr 
-    cdef public bint parse_comments 
-    cdef public bint in_python_file 
-    cdef public source_encoding 
-    cdef set keywords 
-    cdef public list indentation_stack 
-    cdef public indentation_char 
-    cdef public int bracket_nesting_level 
+cdef class PyrexScanner(Scanner):
+    cdef public context
+    cdef public list included_files
+    cdef public CompileTimeScope compile_time_env
+    cdef public bint compile_time_eval
+    cdef public bint compile_time_expr
+    cdef public bint parse_comments
+    cdef public bint in_python_file
+    cdef public source_encoding
+    cdef set keywords
+    cdef public list indentation_stack
+    cdef public indentation_char
+    cdef public int bracket_nesting_level
     cdef readonly bint async_enabled
-    cdef public sy 
-    cdef public systring 
- 
-    cdef long current_level(self) 
-    #cpdef commentline(self, text) 
-    #cpdef open_bracket_action(self, text) 
-    #cpdef close_bracket_action(self, text) 
-    #cpdef newline_action(self, text) 
-    #cpdef begin_string_action(self, text) 
-    #cpdef end_string_action(self, text) 
-    #cpdef unclosed_string_action(self, text) 
-    @cython.locals(current_level=cython.long, new_level=cython.long) 
-    cpdef indentation_action(self, text) 
-    #cpdef eof_action(self, text) 
+    cdef public sy
+    cdef public systring
+
+    cdef long current_level(self)
+    #cpdef commentline(self, text)
+    #cpdef open_bracket_action(self, text)
+    #cpdef close_bracket_action(self, text)
+    #cpdef newline_action(self, text)
+    #cpdef begin_string_action(self, text)
+    #cpdef end_string_action(self, text)
+    #cpdef unclosed_string_action(self, text)
+    @cython.locals(current_level=cython.long, new_level=cython.long)
+    cpdef indentation_action(self, text)
+    #cpdef eof_action(self, text)
     ##cdef next(self)
     ##cdef peek(self)
-    #cpdef put_back(self, sy, systring) 
-    #cdef unread(self, token, value) 
+    #cpdef put_back(self, sy, systring)
+    #cdef unread(self, token, value)
     ##cdef bint expect(self, what, message = *) except -2
     ##cdef expect_keyword(self, what, message = *)
     ##cdef expected(self, what, message = *)
diff --git a/contrib/tools/cython/Cython/Compiler/Scanning.py b/contrib/tools/cython/Cython/Compiler/Scanning.py
index 9eaa91522b..c721bba69b 100644
--- a/contrib/tools/cython/Cython/Compiler/Scanning.py
+++ b/contrib/tools/cython/Cython/Compiler/Scanning.py
@@ -1,68 +1,68 @@
 # cython: infer_types=True, language_level=3, py2_import=True, auto_pickle=False
-# 
-#   Cython Scanner 
-# 
- 
-from __future__ import absolute_import 
- 
+#
+#   Cython Scanner
+#
+
+from __future__ import absolute_import
+
 import cython
 cython.declare(make_lexicon=object, lexicon=object,
                print_function=object, error=object, warning=object,
                os=object, platform=object)
 
-import os 
-import platform 
- 
-from .. import Utils 
-from ..Plex.Scanners import Scanner 
-from ..Plex.Errors import UnrecognizedInput 
-from .Errors import error, warning 
-from .Lexicon import any_string_prefix, make_lexicon, IDENT 
-from .Future import print_function 
- 
-debug_scanner = 0 
-trace_scanner = 0 
-scanner_debug_flags = 0 
-scanner_dump_file = None 
- 
-lexicon = None 
- 
-
-def get_lexicon(): 
-    global lexicon 
-    if not lexicon: 
-        lexicon = make_lexicon() 
-    return lexicon 
- 
-
-#------------------------------------------------------------------ 
- 
-py_reserved_words = [ 
-    "global", "nonlocal", "def", "class", "print", "del", "pass", "break", 
-    "continue", "return", "raise", "import", "exec", "try", 
-    "except", "finally", "while", "if", "elif", "else", "for", 
+import os
+import platform
+
+from .. import Utils
+from ..Plex.Scanners import Scanner
+from ..Plex.Errors import UnrecognizedInput
+from .Errors import error, warning
+from .Lexicon import any_string_prefix, make_lexicon, IDENT
+from .Future import print_function
+
+debug_scanner = 0
+trace_scanner = 0
+scanner_debug_flags = 0
+scanner_dump_file = None
+
+lexicon = None
+
+
+def get_lexicon():
+    global lexicon
+    if not lexicon:
+        lexicon = make_lexicon()
+    return lexicon
+
+
+#------------------------------------------------------------------
+
+py_reserved_words = [
+    "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
+    "continue", "return", "raise", "import", "exec", "try",
+    "except", "finally", "while", "if", "elif", "else", "for",
     "in", "assert", "and", "or", "not", "is", "lambda",
     "from", "yield", "with",
-] 
- 
-pyx_reserved_words = py_reserved_words + [ 
-    "include", "ctypedef", "cdef", "cpdef", 
-    "cimport", "DEF", "IF", "ELIF", "ELSE" 
-] 
- 
-
-class Method(object): 
- 
+]
+
+pyx_reserved_words = py_reserved_words + [
+    "include", "ctypedef", "cdef", "cpdef",
+    "cimport", "DEF", "IF", "ELIF", "ELSE"
+]
+
+
+class Method(object):
+
     def __init__(self, name, **kwargs):
-        self.name = name 
+        self.name = name
         self.kwargs = kwargs or None
         self.__name__ = name  # for Plex tracing
- 
-    def __call__(self, stream, text): 
+
+    def __call__(self, stream, text):
         method = getattr(stream, self.name)
         # self.kwargs is almost always unused => avoid call overhead
         return method(text, **self.kwargs) if self.kwargs is not None else method(text)
- 
+
     def __copy__(self):
         return self  # immutable, no need to copy
 
@@ -70,47 +70,47 @@ class Method(object):
         return self  # immutable, no need to copy
 
 
-#------------------------------------------------------------------ 
- 
-class CompileTimeScope(object): 
- 
+#------------------------------------------------------------------
+
+class CompileTimeScope(object):
+
     def __init__(self, outer=None):
-        self.entries = {} 
-        self.outer = outer 
- 
-    def declare(self, name, value): 
-        self.entries[name] = value 
- 
-    def update(self, other): 
-        self.entries.update(other) 
- 
-    def lookup_here(self, name): 
-        return self.entries[name] 
- 
-    def __contains__(self, name): 
-        return name in self.entries 
- 
-    def lookup(self, name): 
-        try: 
-            return self.lookup_here(name) 
-        except KeyError: 
-            outer = self.outer 
-            if outer: 
-                return outer.lookup(name) 
-            else: 
-                raise 
- 
-
-def initial_compile_time_env(): 
-    benv = CompileTimeScope() 
+        self.entries = {}
+        self.outer = outer
+
+    def declare(self, name, value):
+        self.entries[name] = value
+
+    def update(self, other):
+        self.entries.update(other)
+
+    def lookup_here(self, name):
+        return self.entries[name]
+
+    def __contains__(self, name):
+        return name in self.entries
+
+    def lookup(self, name):
+        try:
+            return self.lookup_here(name)
+        except KeyError:
+            outer = self.outer
+            if outer:
+                return outer.lookup(name)
+            else:
+                raise
+
+
+def initial_compile_time_env():
+    benv = CompileTimeScope()
     names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
-    for name, value in zip(names, platform.uname()): 
-        benv.declare(name, value) 
-    try: 
-        import __builtin__ as builtins 
-    except ImportError: 
-        import builtins 
- 
+    for name, value in zip(names, platform.uname()):
+        benv.declare(name, value)
+    try:
+        import __builtin__ as builtins
+    except ImportError:
+        import builtins
+
     names = (
         'False', 'True',
         'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
@@ -122,13 +122,13 @@ def initial_compile_time_env():
         ### defined below in a platform independent way
         # 'long', 'unicode', 'reduce', 'xrange'
     )
- 
-    for name in names: 
-        try: 
-            benv.declare(name, getattr(builtins, name)) 
-        except AttributeError: 
-            # ignore, likely Py3 
-            pass 
+
+    for name in names:
+        try:
+            benv.declare(name, getattr(builtins, name))
+        except AttributeError:
+            # ignore, likely Py3
+            pass
 
     # Py2/3 adaptations
     from functools import reduce
@@ -137,65 +137,65 @@ def initial_compile_time_env():
     benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
     benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
 
-    denv = CompileTimeScope(benv) 
-    return denv 
- 
+    denv = CompileTimeScope(benv)
+    return denv
+
+
+#------------------------------------------------------------------
 
-#------------------------------------------------------------------ 
- 
-class SourceDescriptor(object): 
-    """ 
-    A SourceDescriptor should be considered immutable. 
-    """ 
+class SourceDescriptor(object):
+    """
+    A SourceDescriptor should be considered immutable.
+    """
     filename = None
 
-    _file_type = 'pyx' 
- 
-    _escaped_description = None 
-    _cmp_name = '' 
-    def __str__(self): 
-        assert False # To catch all places where a descriptor is used directly as a filename 
- 
-    def set_file_type_from_name(self, filename): 
-        name, ext = os.path.splitext(filename) 
-        self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx' 
- 
-    def is_cython_file(self): 
-        return self._file_type in ('pyx', 'pxd') 
- 
-    def is_python_file(self): 
-        return self._file_type == 'py' 
- 
-    def get_escaped_description(self): 
-        if self._escaped_description is None: 
+    _file_type = 'pyx'
+
+    _escaped_description = None
+    _cmp_name = ''
+    def __str__(self):
+        assert False # To catch all places where a descriptor is used directly as a filename
+
+    def set_file_type_from_name(self, filename):
+        name, ext = os.path.splitext(filename)
+        self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
+
+    def is_cython_file(self):
+        return self._file_type in ('pyx', 'pxd')
+
+    def is_python_file(self):
+        return self._file_type == 'py'
+
+    def get_escaped_description(self):
+        if self._escaped_description is None:
             esc_desc = \
-                self.get_description().encode('ASCII', 'replace').decode("ASCII") 
+                self.get_description().encode('ASCII', 'replace').decode("ASCII")
             # Use forward slashes on Windows since these paths
             # will be used in the #line directives in the C/C++ files.
             self._escaped_description = esc_desc.replace('\\', '/')
-        return self._escaped_description 
- 
-    def __gt__(self, other): 
-        # this is only used to provide some sort of order 
-        try: 
-            return self._cmp_name > other._cmp_name 
-        except AttributeError: 
-            return False 
- 
-    def __lt__(self, other): 
-        # this is only used to provide some sort of order 
-        try: 
-            return self._cmp_name < other._cmp_name 
-        except AttributeError: 
-            return False 
- 
-    def __le__(self, other): 
-        # this is only used to provide some sort of order 
-        try: 
-            return self._cmp_name <= other._cmp_name 
-        except AttributeError: 
-            return False 
- 
+        return self._escaped_description
+
+    def __gt__(self, other):
+        # this is only used to provide some sort of order
+        try:
+            return self._cmp_name > other._cmp_name
+        except AttributeError:
+            return False
+
+    def __lt__(self, other):
+        # this is only used to provide some sort of order
+        try:
+            return self._cmp_name < other._cmp_name
+        except AttributeError:
+            return False
+
+    def __le__(self, other):
+        # this is only used to provide some sort of order
+        try:
+            return self._cmp_name <= other._cmp_name
+        except AttributeError:
+            return False
+
     def __copy__(self):
         return self  # immutable, no need to copy
 
@@ -203,127 +203,127 @@ class SourceDescriptor(object):
         return self  # immutable, no need to copy
 
 
-class FileSourceDescriptor(SourceDescriptor): 
-    """ 
-    Represents a code source. A code source is a more generic abstraction 
-    for a "filename" (as sometimes the code doesn't come from a file). 
-    Instances of code sources are passed to Scanner.__init__ as the 
-    optional name argument and will be passed back when asking for 
-    the position()-tuple. 
-    """ 
-    def __init__(self, filename, path_description=None): 
-        filename = Utils.decode_filename(filename) 
-        self.path_description = path_description or filename 
-        self.filename = filename 
+class FileSourceDescriptor(SourceDescriptor):
+    """
+    Represents a code source. A code source is a more generic abstraction
+    for a "filename" (as sometimes the code doesn't come from a file).
+    Instances of code sources are passed to Scanner.__init__ as the
+    optional name argument and will be passed back when asking for
+    the position()-tuple.
+    """
+    def __init__(self, filename, path_description=None):
+        filename = Utils.decode_filename(filename)
+        self.path_description = path_description or filename
+        self.filename = filename
         # Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
         workdir = os.path.abspath('.') + os.sep
         self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
-        self.set_file_type_from_name(filename) 
-        self._cmp_name = filename 
-        self._lines = {} 
- 
-    def get_lines(self, encoding=None, error_handling=None): 
-        # we cache the lines only the second time this is called, in 
-        # order to save memory when they are only used once 
-        key = (encoding, error_handling) 
-        try: 
-            lines = self._lines[key] 
-            if lines is not None: 
-                return lines 
-        except KeyError: 
-            pass 
+        self.set_file_type_from_name(filename)
+        self._cmp_name = filename
+        self._lines = {}
+
+    def get_lines(self, encoding=None, error_handling=None):
+        # we cache the lines only the second time this is called, in
+        # order to save memory when they are only used once
+        key = (encoding, error_handling)
+        try:
+            lines = self._lines[key]
+            if lines is not None:
+                return lines
+        except KeyError:
+            pass
 
         with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
-            lines = list(f) 
-
-        if key in self._lines: 
-            self._lines[key] = lines 
-        else: 
-            # do not cache the first access, but remember that we 
-            # already read it once 
-            self._lines[key] = None 
-        return lines 
- 
-    def get_description(self): 
+            lines = list(f)
+
+        if key in self._lines:
+            self._lines[key] = lines
+        else:
+            # do not cache the first access, but remember that we
+            # already read it once
+            self._lines[key] = None
+        return lines
+
+    def get_description(self):
         # Dump path_description, it's already arcadia root relative (required for proper file matching in coverage)
         return self.path_description
-        try: 
-            return os.path.relpath(self.path_description) 
-        except ValueError: 
-            # path not under current directory => use complete file path 
-            return self.path_description 
- 
-    def get_error_description(self): 
-        path = self.filename 
-        cwd = Utils.decode_filename(os.getcwd() + os.path.sep) 
-        if path.startswith(cwd): 
-            return path[len(cwd):] 
-        return path 
- 
-    def get_filenametable_entry(self): 
+        try:
+            return os.path.relpath(self.path_description)
+        except ValueError:
+            # path not under current directory => use complete file path
+            return self.path_description
+
+    def get_error_description(self):
+        path = self.filename
+        cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
+        if path.startswith(cwd):
+            return path[len(cwd):]
+        return path
+
+    def get_filenametable_entry(self):
         return self.file_path
- 
-    def __eq__(self, other): 
-        return isinstance(other, FileSourceDescriptor) and self.filename == other.filename 
- 
-    def __hash__(self): 
-        return hash(self.filename) 
- 
-    def __repr__(self): 
-        return "<FileSourceDescriptor:%s>" % self.filename 
- 
-
-class StringSourceDescriptor(SourceDescriptor): 
-    """ 
-    Instances of this class can be used instead of a filenames if the 
-    code originates from a string object. 
-    """ 
-    def __init__(self, name, code): 
-        self.name = name 
-        #self.set_file_type_from_name(name) 
-        self.codelines = [x + "\n" for x in code.split("\n")] 
-        self._cmp_name = name 
- 
-    def get_lines(self, encoding=None, error_handling=None): 
-        if not encoding: 
-            return self.codelines 
-        else: 
+
+    def __eq__(self, other):
+        return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
+
+    def __hash__(self):
+        return hash(self.filename)
+
+    def __repr__(self):
+        return "<FileSourceDescriptor:%s>" % self.filename
+
+
+class StringSourceDescriptor(SourceDescriptor):
+    """
+    Instances of this class can be used instead of a filenames if the
+    code originates from a string object.
+    """
+    def __init__(self, name, code):
+        self.name = name
+        #self.set_file_type_from_name(name)
+        self.codelines = [x + "\n" for x in code.split("\n")]
+        self._cmp_name = name
+
+    def get_lines(self, encoding=None, error_handling=None):
+        if not encoding:
+            return self.codelines
+        else:
             return [line.encode(encoding, error_handling).decode(encoding)
                     for line in self.codelines]
- 
-    def get_description(self): 
-        return self.name 
- 
-    get_error_description = get_description 
- 
-    def get_filenametable_entry(self): 
-        return "stringsource" 
- 
-    def __hash__(self): 
-        return id(self) 
-        # Do not hash on the name, an identical string source should be the 
-        # same object (name is often defaulted in other places) 
-        # return hash(self.name) 
- 
-    def __eq__(self, other): 
-        return isinstance(other, StringSourceDescriptor) and self.name == other.name 
- 
-    def __repr__(self): 
-        return "<StringSourceDescriptor:%s>" % self.name 
- 
-
-#------------------------------------------------------------------ 
- 
-class PyrexScanner(Scanner): 
-    #  context            Context  Compilation context 
-    #  included_files     [string] Files included with 'include' statement 
-    #  compile_time_env   dict     Environment for conditional compilation 
-    #  compile_time_eval  boolean  In a true conditional compilation context 
-    #  compile_time_expr  boolean  In a compile-time expression context 
- 
+
+    def get_description(self):
+        return self.name
+
+    get_error_description = get_description
+
+    def get_filenametable_entry(self):
+        return "stringsource"
+
+    def __hash__(self):
+        return id(self)
+        # Do not hash on the name, an identical string source should be the
+        # same object (name is often defaulted in other places)
+        # return hash(self.name)
+
+    def __eq__(self, other):
+        return isinstance(other, StringSourceDescriptor) and self.name == other.name
+
+    def __repr__(self):
+        return "<StringSourceDescriptor:%s>" % self.name
+
+
+#------------------------------------------------------------------
+
+class PyrexScanner(Scanner):
+    #  context            Context  Compilation context
+    #  included_files     [string] Files included with 'include' statement
+    #  compile_time_env   dict     Environment for conditional compilation
+    #  compile_time_eval  boolean  In a true conditional compilation context
+    #  compile_time_expr  boolean  In a compile-time expression context
+
     def __init__(self, file, filename, parent_scanner=None,
                  scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
-        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos) 
+        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
 
         if filename.is_python_file():
             self.in_python_file = True
@@ -334,208 +334,208 @@ class PyrexScanner(Scanner):
 
         self.async_enabled = 0
 
-        if parent_scanner: 
-            self.context = parent_scanner.context 
-            self.included_files = parent_scanner.included_files 
-            self.compile_time_env = parent_scanner.compile_time_env 
-            self.compile_time_eval = parent_scanner.compile_time_eval 
-            self.compile_time_expr = parent_scanner.compile_time_expr 
+        if parent_scanner:
+            self.context = parent_scanner.context
+            self.included_files = parent_scanner.included_files
+            self.compile_time_env = parent_scanner.compile_time_env
+            self.compile_time_eval = parent_scanner.compile_time_eval
+            self.compile_time_expr = parent_scanner.compile_time_expr
 
             if parent_scanner.async_enabled:
                 self.enter_async()
-        else: 
-            self.context = context 
-            self.included_files = scope.included_files 
-            self.compile_time_env = initial_compile_time_env() 
-            self.compile_time_eval = 1 
-            self.compile_time_expr = 0 
+        else:
+            self.context = context
+            self.included_files = scope.included_files
+            self.compile_time_env = initial_compile_time_env()
+            self.compile_time_eval = 1
+            self.compile_time_expr = 0
             if getattr(context.options, 'compile_time_env', None):
-                self.compile_time_env.update(context.options.compile_time_env) 
-        self.parse_comments = parse_comments 
-        self.source_encoding = source_encoding 
-        self.trace = trace_scanner 
-        self.indentation_stack = [0] 
-        self.indentation_char = None 
-        self.bracket_nesting_level = 0 
-
-        self.begin('INDENT') 
-        self.sy = '' 
-        self.next() 
- 
-    def commentline(self, text): 
-        if self.parse_comments: 
-            self.produce('commentline', text) 
- 
+                self.compile_time_env.update(context.options.compile_time_env)
+        self.parse_comments = parse_comments
+        self.source_encoding = source_encoding
+        self.trace = trace_scanner
+        self.indentation_stack = [0]
+        self.indentation_char = None
+        self.bracket_nesting_level = 0
+
+        self.begin('INDENT')
+        self.sy = ''
+        self.next()
+
+    def commentline(self, text):
+        if self.parse_comments:
+            self.produce('commentline', text)
+
     def strip_underscores(self, text, symbol):
         self.produce(symbol, text.replace('_', ''))
 
-    def current_level(self): 
-        return self.indentation_stack[-1] 
- 
-    def open_bracket_action(self, text): 
+    def current_level(self):
+        return self.indentation_stack[-1]
+
+    def open_bracket_action(self, text):
         self.bracket_nesting_level += 1
-        return text 
- 
-    def close_bracket_action(self, text): 
+        return text
+
+    def close_bracket_action(self, text):
         self.bracket_nesting_level -= 1
-        return text 
- 
-    def newline_action(self, text): 
-        if self.bracket_nesting_level == 0: 
-            self.begin('INDENT') 
-            self.produce('NEWLINE', '') 
- 
-    string_states = { 
-        "'":   'SQ_STRING', 
-        '"':   'DQ_STRING', 
-        "'''": 'TSQ_STRING', 
-        '"""': 'TDQ_STRING' 
-    } 
- 
-    def begin_string_action(self, text): 
-        while text[:1] in any_string_prefix: 
-            text = text[1:] 
-        self.begin(self.string_states[text]) 
-        self.produce('BEGIN_STRING') 
- 
-    def end_string_action(self, text): 
-        self.begin('') 
-        self.produce('END_STRING') 
- 
-    def unclosed_string_action(self, text): 
-        self.end_string_action(text) 
-        self.error("Unclosed string literal") 
- 
-    def indentation_action(self, text): 
-        self.begin('') 
-        # Indentation within brackets should be ignored. 
-        #if self.bracket_nesting_level > 0: 
-        #    return 
-        # Check that tabs and spaces are being used consistently. 
-        if text: 
-            c = text[0] 
-            #print "Scanner.indentation_action: indent with", repr(c) ### 
-            if self.indentation_char is None: 
-                self.indentation_char = c 
-                #print "Scanner.indentation_action: setting indent_char to", repr(c) 
-            else: 
-                if self.indentation_char != c: 
-                    self.error("Mixed use of tabs and spaces") 
-            if text.replace(c, "") != "": 
-                self.error("Mixed use of tabs and spaces") 
-        # Figure out how many indents/dedents to do 
-        current_level = self.current_level() 
-        new_level = len(text) 
-        #print "Changing indent level from", current_level, "to", new_level ### 
-        if new_level == current_level: 
-            return 
-        elif new_level > current_level: 
-            #print "...pushing level", new_level ### 
-            self.indentation_stack.append(new_level) 
-            self.produce('INDENT', '') 
-        else: 
-            while new_level < self.current_level(): 
-                #print "...popping level", self.indentation_stack[-1] ### 
-                self.indentation_stack.pop() 
-                self.produce('DEDENT', '') 
-            #print "...current level now", self.current_level() ### 
-            if new_level != self.current_level(): 
-                self.error("Inconsistent indentation") 
- 
-    def eof_action(self, text): 
-        while len(self.indentation_stack) > 1: 
-            self.produce('DEDENT', '') 
-            self.indentation_stack.pop() 
-        self.produce('EOF', '') 
- 
-    def next(self): 
-        try: 
-            sy, systring = self.read() 
-        except UnrecognizedInput: 
-            self.error("Unrecognized character") 
+        return text
+
+    def newline_action(self, text):
+        if self.bracket_nesting_level == 0:
+            self.begin('INDENT')
+            self.produce('NEWLINE', '')
+
+    string_states = {
+        "'":   'SQ_STRING',
+        '"':   'DQ_STRING',
+        "'''": 'TSQ_STRING',
+        '"""': 'TDQ_STRING'
+    }
+
+    def begin_string_action(self, text):
+        while text[:1] in any_string_prefix:
+            text = text[1:]
+        self.begin(self.string_states[text])
+        self.produce('BEGIN_STRING')
+
+    def end_string_action(self, text):
+        self.begin('')
+        self.produce('END_STRING')
+
+    def unclosed_string_action(self, text):
+        self.end_string_action(text)
+        self.error("Unclosed string literal")
+
+    def indentation_action(self, text):
+        self.begin('')
+        # Indentation within brackets should be ignored.
+        #if self.bracket_nesting_level > 0:
+        #    return
+        # Check that tabs and spaces are being used consistently.
+        if text:
+            c = text[0]
+            #print "Scanner.indentation_action: indent with", repr(c) ###
+            if self.indentation_char is None:
+                self.indentation_char = c
+                #print "Scanner.indentation_action: setting indent_char to", repr(c)
+            else:
+                if self.indentation_char != c:
+                    self.error("Mixed use of tabs and spaces")
+            if text.replace(c, "") != "":
+                self.error("Mixed use of tabs and spaces")
+        # Figure out how many indents/dedents to do
+        current_level = self.current_level()
+        new_level = len(text)
+        #print "Changing indent level from", current_level, "to", new_level ###
+        if new_level == current_level:
+            return
+        elif new_level > current_level:
+            #print "...pushing level", new_level ###
+            self.indentation_stack.append(new_level)
+            self.produce('INDENT', '')
+        else:
+            while new_level < self.current_level():
+                #print "...popping level", self.indentation_stack[-1] ###
+                self.indentation_stack.pop()
+                self.produce('DEDENT', '')
+            #print "...current level now", self.current_level() ###
+            if new_level != self.current_level():
+                self.error("Inconsistent indentation")
+
+    def eof_action(self, text):
+        while len(self.indentation_stack) > 1:
+            self.produce('DEDENT', '')
+            self.indentation_stack.pop()
+        self.produce('EOF', '')
+
+    def next(self):
+        try:
+            sy, systring = self.read()
+        except UnrecognizedInput:
+            self.error("Unrecognized character")
             return  # just a marker, error() always raises
-        if sy == IDENT: 
-            if systring in self.keywords: 
-                if systring == u'print' and print_function in self.context.future_directives: 
-                    self.keywords.discard('print') 
-                elif systring == u'exec' and self.context.language_level >= 3: 
-                    self.keywords.discard('exec') 
-                else: 
-                    sy = systring 
+        if sy == IDENT:
+            if systring in self.keywords:
+                if systring == u'print' and print_function in self.context.future_directives:
+                    self.keywords.discard('print')
+                elif systring == u'exec' and self.context.language_level >= 3:
+                    self.keywords.discard('exec')
+                else:
+                    sy = systring
             systring = self.context.intern_ustring(systring)
-        self.sy = sy 
-        self.systring = systring 
-        if False: # debug_scanner: 
-            _, line, col = self.position() 
-            if not self.systring or self.sy == self.systring: 
-                t = self.sy 
-            else: 
-                t = "%s %s" % (self.sy, self.systring) 
-            print("--- %3d %2d %s" % (line, col, t)) 
- 
-    def peek(self): 
-        saved = self.sy, self.systring 
-        self.next() 
-        next = self.sy, self.systring 
-        self.unread(*next) 
-        self.sy, self.systring = saved 
-        return next 
- 
-    def put_back(self, sy, systring): 
-        self.unread(self.sy, self.systring) 
-        self.sy = sy 
-        self.systring = systring 
- 
-    def unread(self, token, value): 
-        # This method should be added to Plex 
-        self.queue.insert(0, (token, value)) 
- 
+        self.sy = sy
+        self.systring = systring
+        if False: # debug_scanner:
+            _, line, col = self.position()
+            if not self.systring or self.sy == self.systring:
+                t = self.sy
+            else:
+                t = "%s %s" % (self.sy, self.systring)
+            print("--- %3d %2d %s" % (line, col, t))
+
+    def peek(self):
+        saved = self.sy, self.systring
+        self.next()
+        next = self.sy, self.systring
+        self.unread(*next)
+        self.sy, self.systring = saved
+        return next
+
+    def put_back(self, sy, systring):
+        self.unread(self.sy, self.systring)
+        self.sy = sy
+        self.systring = systring
+
+    def unread(self, token, value):
+        # This method should be added to Plex
+        self.queue.insert(0, (token, value))
+
     def error(self, message, pos=None, fatal=True):
-        if pos is None: 
-            pos = self.position() 
-        if self.sy == 'INDENT': 
+        if pos is None:
+            pos = self.position()
+        if self.sy == 'INDENT':
             error(pos, "Possible inconsistent indentation")
-        err = error(pos, message) 
-        if fatal: raise err 
- 
+        err = error(pos, message)
+        if fatal: raise err
+
     def expect(self, what, message=None):
-        if self.sy == what: 
-            self.next() 
-        else: 
-            self.expected(what, message) 
- 
+        if self.sy == what:
+            self.next()
+        else:
+            self.expected(what, message)
+
     def expect_keyword(self, what, message=None):
-        if self.sy == IDENT and self.systring == what: 
-            self.next() 
-        else: 
-            self.expected(what, message) 
- 
+        if self.sy == IDENT and self.systring == what:
+            self.next()
+        else:
+            self.expected(what, message)
+
     def expected(self, what, message=None):
-        if message: 
-            self.error(message) 
-        else: 
-            if self.sy == IDENT: 
-                found = self.systring 
-            else: 
-                found = self.sy 
-            self.error("Expected '%s', found '%s'" % (what, found)) 
- 
-    def expect_indent(self): 
+        if message:
+            self.error(message)
+        else:
+            if self.sy == IDENT:
+                found = self.systring
+            else:
+                found = self.sy
+            self.error("Expected '%s', found '%s'" % (what, found))
+
+    def expect_indent(self):
         self.expect('INDENT', "Expected an increase in indentation level")
- 
-    def expect_dedent(self): 
+
+    def expect_dedent(self):
         self.expect('DEDENT', "Expected a decrease in indentation level")
- 
-    def expect_newline(self, message="Expected a newline", ignore_semicolon=False): 
-        # Expect either a newline or end of file 
-        useless_trailing_semicolon = None 
-        if ignore_semicolon and self.sy == ';': 
-            useless_trailing_semicolon = self.position() 
-            self.next() 
-        if self.sy != 'EOF': 
-            self.expect('NEWLINE', message) 
-        if useless_trailing_semicolon is not None: 
-            warning(useless_trailing_semicolon, "useless trailing semicolon") 
+
+    def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
+        # Expect either a newline or end of file
+        useless_trailing_semicolon = None
+        if ignore_semicolon and self.sy == ';':
+            useless_trailing_semicolon = self.position()
+            self.next()
+        if self.sy != 'EOF':
+            self.expect('NEWLINE', message)
+        if useless_trailing_semicolon is not None:
+            warning(useless_trailing_semicolon, "useless trailing semicolon")
 
     def enter_async(self):
         self.async_enabled += 1
diff --git a/contrib/tools/cython/Cython/Compiler/StringEncoding.py b/contrib/tools/cython/Cython/Compiler/StringEncoding.py
index d9993c6615..c37e8aab79 100644
--- a/contrib/tools/cython/Cython/Compiler/StringEncoding.py
+++ b/contrib/tools/cython/Cython/Compiler/StringEncoding.py
@@ -1,159 +1,159 @@
-# 
-#   Cython -- encoding related tools 
-# 
- 
-from __future__ import absolute_import 
- 
-import re 
-import sys 
- 
-if sys.version_info[0] >= 3: 
+#
+#   Cython -- encoding related tools
+#
+
+from __future__ import absolute_import
+
+import re
+import sys
+
+if sys.version_info[0] >= 3:
     _unicode, _str, _bytes, _unichr = str, str, bytes, chr
-    IS_PYTHON3 = True 
-else: 
+    IS_PYTHON3 = True
+else:
     _unicode, _str, _bytes, _unichr = unicode, str, str, unichr
-    IS_PYTHON3 = False 
- 
-empty_bytes = _bytes() 
-empty_unicode = _unicode() 
- 
-join_bytes = empty_bytes.join 
- 
- 
-class UnicodeLiteralBuilder(object): 
-    """Assemble a unicode string. 
-    """ 
-    def __init__(self): 
-        self.chars = [] 
- 
-    def append(self, characters): 
-        if isinstance(characters, _bytes): 
-            # this came from a Py2 string literal in the parser code 
-            characters = characters.decode("ASCII") 
-        assert isinstance(characters, _unicode), str(type(characters)) 
-        self.chars.append(characters) 
- 
-    if sys.maxunicode == 65535: 
-        def append_charval(self, char_number): 
-            if char_number > 65535: 
-                # wide Unicode character on narrow platform => replace 
-                # by surrogate pair 
-                char_number -= 0x10000 
+    IS_PYTHON3 = False
+
+empty_bytes = _bytes()
+empty_unicode = _unicode()
+
+join_bytes = empty_bytes.join
+
+
+class UnicodeLiteralBuilder(object):
+    """Assemble a unicode string.
+    """
+    def __init__(self):
+        self.chars = []
+
+    def append(self, characters):
+        if isinstance(characters, _bytes):
+            # this came from a Py2 string literal in the parser code
+            characters = characters.decode("ASCII")
+        assert isinstance(characters, _unicode), str(type(characters))
+        self.chars.append(characters)
+
+    if sys.maxunicode == 65535:
+        def append_charval(self, char_number):
+            if char_number > 65535:
+                # wide Unicode character on narrow platform => replace
+                # by surrogate pair
+                char_number -= 0x10000
                 self.chars.append( _unichr((char_number // 1024) + 0xD800) )
                 self.chars.append( _unichr((char_number  % 1024) + 0xDC00) )
-            else: 
+            else:
                 self.chars.append( _unichr(char_number) )
-    else: 
-        def append_charval(self, char_number): 
+    else:
+        def append_charval(self, char_number):
             self.chars.append( _unichr(char_number) )
- 
-    def append_uescape(self, char_number, escape_string): 
-        self.append_charval(char_number) 
- 
-    def getstring(self): 
-        return EncodedString(u''.join(self.chars)) 
- 
-    def getstrings(self): 
-        return (None, self.getstring()) 
- 
- 
-class BytesLiteralBuilder(object): 
-    """Assemble a byte string or char value. 
-    """ 
-    def __init__(self, target_encoding): 
-        self.chars = [] 
-        self.target_encoding = target_encoding 
- 
-    def append(self, characters): 
-        if isinstance(characters, _unicode): 
-            characters = characters.encode(self.target_encoding) 
-        assert isinstance(characters, _bytes), str(type(characters)) 
-        self.chars.append(characters) 
- 
-    def append_charval(self, char_number): 
+
+    def append_uescape(self, char_number, escape_string):
+        self.append_charval(char_number)
+
+    def getstring(self):
+        return EncodedString(u''.join(self.chars))
+
+    def getstrings(self):
+        return (None, self.getstring())
+
+
+class BytesLiteralBuilder(object):
+    """Assemble a byte string or char value.
+    """
+    def __init__(self, target_encoding):
+        self.chars = []
+        self.target_encoding = target_encoding
+
+    def append(self, characters):
+        if isinstance(characters, _unicode):
+            characters = characters.encode(self.target_encoding)
+        assert isinstance(characters, _bytes), str(type(characters))
+        self.chars.append(characters)
+
+    def append_charval(self, char_number):
         self.chars.append( _unichr(char_number).encode('ISO-8859-1') )
- 
-    def append_uescape(self, char_number, escape_string): 
-        self.append(escape_string) 
- 
-    def getstring(self): 
-        # this *must* return a byte string! 
+
+    def append_uescape(self, char_number, escape_string):
+        self.append(escape_string)
+
+    def getstring(self):
+        # this *must* return a byte string!
         return bytes_literal(join_bytes(self.chars), self.target_encoding)
- 
-    def getchar(self): 
-        # this *must* return a byte string! 
-        return self.getstring() 
- 
-    def getstrings(self): 
-        return (self.getstring(), None) 
- 
-
-class StrLiteralBuilder(object): 
-    """Assemble both a bytes and a unicode representation of a string. 
-    """ 
-    def __init__(self, target_encoding): 
-        self._bytes   = BytesLiteralBuilder(target_encoding) 
-        self._unicode = UnicodeLiteralBuilder() 
- 
-    def append(self, characters): 
-        self._bytes.append(characters) 
-        self._unicode.append(characters) 
- 
-    def append_charval(self, char_number): 
-        self._bytes.append_charval(char_number) 
-        self._unicode.append_charval(char_number) 
- 
-    def append_uescape(self, char_number, escape_string): 
-        self._bytes.append(escape_string) 
-        self._unicode.append_charval(char_number) 
- 
-    def getstrings(self): 
-        return (self._bytes.getstring(), self._unicode.getstring()) 
- 
- 
-class EncodedString(_unicode): 
-    # unicode string subclass to keep track of the original encoding. 
-    # 'encoding' is None for unicode strings and the source encoding 
-    # otherwise 
-    encoding = None 
- 
-    def __deepcopy__(self, memo): 
-        return self 
- 
-    def byteencode(self): 
-        assert self.encoding is not None 
-        return self.encode(self.encoding) 
- 
-    def utf8encode(self): 
-        assert self.encoding is None 
-        return self.encode("UTF-8") 
- 
-    @property 
-    def is_unicode(self): 
-        return self.encoding is None 
- 
-    def contains_surrogates(self): 
-        return string_contains_surrogates(self) 
- 
+
+    def getchar(self):
+        # this *must* return a byte string!
+        return self.getstring()
+
+    def getstrings(self):
+        return (self.getstring(), None)
+
+
+class StrLiteralBuilder(object):
+    """Assemble both a bytes and a unicode representation of a string.
+    """
+    def __init__(self, target_encoding):
+        self._bytes   = BytesLiteralBuilder(target_encoding)
+        self._unicode = UnicodeLiteralBuilder()
+
+    def append(self, characters):
+        self._bytes.append(characters)
+        self._unicode.append(characters)
+
+    def append_charval(self, char_number):
+        self._bytes.append_charval(char_number)
+        self._unicode.append_charval(char_number)
+
+    def append_uescape(self, char_number, escape_string):
+        self._bytes.append(escape_string)
+        self._unicode.append_charval(char_number)
+
+    def getstrings(self):
+        return (self._bytes.getstring(), self._unicode.getstring())
+
+
+class EncodedString(_unicode):
+    # unicode string subclass to keep track of the original encoding.
+    # 'encoding' is None for unicode strings and the source encoding
+    # otherwise
+    encoding = None
+
+    def __deepcopy__(self, memo):
+        return self
+
+    def byteencode(self):
+        assert self.encoding is not None
+        return self.encode(self.encoding)
+
+    def utf8encode(self):
+        assert self.encoding is None
+        return self.encode("UTF-8")
+
+    @property
+    def is_unicode(self):
+        return self.encoding is None
+
+    def contains_surrogates(self):
+        return string_contains_surrogates(self)
+
     def as_utf8_string(self):
         return bytes_literal(self.utf8encode(), 'utf8')
- 
-
-def string_contains_surrogates(ustring): 
-    """ 
-    Check if the unicode string contains surrogate code points 
-    on a CPython platform with wide (UCS-4) or narrow (UTF-16) 
-    Unicode, i.e. characters that would be spelled as two 
-    separate code units on a narrow platform. 
-    """ 
-    for c in map(ord, ustring): 
-        if c > 65535:  # can only happen on wide platforms 
-            return True 
-        if 0xD800 <= c <= 0xDFFF: 
-            return True 
-    return False 
- 
- 
+
+
+def string_contains_surrogates(ustring):
+    """
+    Check if the unicode string contains surrogate code points
+    on a CPython platform with wide (UCS-4) or narrow (UTF-16)
+    Unicode, i.e. characters that would be spelled as two
+    separate code units on a narrow platform.
+    """
+    for c in map(ord, ustring):
+        if c > 65535:  # can only happen on wide platforms
+            return True
+        if 0xD800 <= c <= 0xDFFF:
+            return True
+    return False
+
+
 def string_contains_lone_surrogates(ustring):
     """
     Check if the unicode string contains lone surrogate code points
@@ -182,35 +182,35 @@ def string_contains_lone_surrogates(ustring):
     return last_was_start
 
 
-class BytesLiteral(_bytes): 
-    # bytes subclass that is compatible with EncodedString 
-    encoding = None 
- 
-    def __deepcopy__(self, memo): 
-        return self 
- 
-    def byteencode(self): 
-        if IS_PYTHON3: 
-            return _bytes(self) 
-        else: 
-            # fake-recode the string to make it a plain bytes object 
-            return self.decode('ISO-8859-1').encode('ISO-8859-1') 
- 
-    def utf8encode(self): 
-        assert False, "this is not a unicode string: %r" % self 
- 
-    def __str__(self): 
-        """Fake-decode the byte string to unicode to support % 
-        formatting of unicode strings. 
-        """ 
-        return self.decode('ISO-8859-1') 
- 
-    is_unicode = False 
- 
+class BytesLiteral(_bytes):
+    # bytes subclass that is compatible with EncodedString
+    encoding = None
+
+    def __deepcopy__(self, memo):
+        return self
+
+    def byteencode(self):
+        if IS_PYTHON3:
+            return _bytes(self)
+        else:
+            # fake-recode the string to make it a plain bytes object
+            return self.decode('ISO-8859-1').encode('ISO-8859-1')
+
+    def utf8encode(self):
+        assert False, "this is not a unicode string: %r" % self
+
+    def __str__(self):
+        """Fake-decode the byte string to unicode to support %
+        formatting of unicode strings.
+        """
+        return self.decode('ISO-8859-1')
+
+    is_unicode = False
+
     def as_c_string_literal(self):
         value = split_string_literal(escape_byte_string(self))
         return '"%s"' % value
- 
+
 
 def bytes_literal(s, encoding):
     assert isinstance(s, bytes)
@@ -227,137 +227,137 @@ def encoded_string(s, encoding):
     return s
 
 
-char_from_escape_sequence = { 
-    r'\a' : u'\a', 
-    r'\b' : u'\b', 
-    r'\f' : u'\f', 
-    r'\n' : u'\n', 
-    r'\r' : u'\r', 
-    r'\t' : u'\t', 
-    r'\v' : u'\v', 
-    }.get 
- 
-_c_special = ('\\', '??', '"') + tuple(map(chr, range(32))) 
- 
- 
-def _to_escape_sequence(s): 
-    if s in '\n\r\t': 
-        return repr(s)[1:-1] 
-    elif s == '"': 
-        return r'\"' 
-    elif s == '\\': 
-        return r'\\' 
-    else: 
-        # within a character sequence, oct passes much better than hex 
-        return ''.join(['\\%03o' % ord(c) for c in s]) 
- 
- 
-def _build_specials_replacer(): 
-    subexps = [] 
-    replacements = {} 
-    for special in _c_special: 
-        regexp = ''.join(['[%s]' % c.replace('\\', '\\\\') for c in special]) 
-        subexps.append(regexp) 
-        replacements[special.encode('ASCII')] = _to_escape_sequence(special).encode('ASCII') 
-    sub = re.compile(('(%s)' % '|'.join(subexps)).encode('ASCII')).sub 
-    def replace_specials(m): 
-        return replacements[m.group(1)] 
-    def replace(s): 
-        return sub(replace_specials, s) 
-    return replace 
- 
-_replace_specials = _build_specials_replacer() 
- 
- 
-def escape_char(c): 
-    if IS_PYTHON3: 
-        c = c.decode('ISO-8859-1') 
-    if c in '\n\r\t\\': 
-        return repr(c)[1:-1] 
-    elif c == "'": 
-        return "\\'" 
-    n = ord(c) 
-    if n < 32 or n > 127: 
-        # hex works well for characters 
-        return "\\x%02X" % n 
-    else: 
-        return c 
- 
-def escape_byte_string(s): 
-    """Escape a byte string so that it can be written into C code. 
-    Note that this returns a Unicode string instead which, when 
-    encoded as ISO-8859-1, will result in the correct byte sequence 
-    being written. 
-    """ 
-    s = _replace_specials(s) 
-    try: 
-        return s.decode("ASCII") # trial decoding: plain ASCII => done 
-    except UnicodeDecodeError: 
-        pass 
-    if IS_PYTHON3: 
-        s_new = bytearray() 
-        append, extend = s_new.append, s_new.extend 
-        for b in s: 
-            if b >= 128: 
-                extend(('\\%3o' % b).encode('ASCII')) 
-            else: 
-                append(b) 
-        return s_new.decode('ISO-8859-1') 
-    else: 
-        l = [] 
-        append = l.append 
-        for c in s: 
-            o = ord(c) 
-            if o >= 128: 
-                append('\\%3o' % o) 
-            else: 
-                append(c) 
-        return join_bytes(l).decode('ISO-8859-1') 
- 
-def split_string_literal(s, limit=2000): 
-    # MSVC can't handle long string literals. 
-    if len(s) < limit: 
-        return s 
-    else: 
-        start = 0 
-        chunks = [] 
-        while start < len(s): 
-            end = start + limit 
-            if len(s) > end-4 and '\\' in s[end-4:end]: 
-                end -= 4 - s[end-4:end].find('\\') # just before the backslash 
-                while s[end-1] == '\\': 
-                    end -= 1 
-                    if end == start: 
-                        # must have been a long line of backslashes 
-                        end = start + limit - (limit % 2) - 4 
-                        break 
-            chunks.append(s[start:end]) 
-            start = end 
-        return '""'.join(chunks) 
- 
-def encode_pyunicode_string(s): 
-    """Create Py_UNICODE[] representation of a given unicode string. 
-    """ 
+char_from_escape_sequence = {
+    r'\a' : u'\a',
+    r'\b' : u'\b',
+    r'\f' : u'\f',
+    r'\n' : u'\n',
+    r'\r' : u'\r',
+    r'\t' : u'\t',
+    r'\v' : u'\v',
+    }.get
+
+_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
+
+
+def _to_escape_sequence(s):
+    if s in '\n\r\t':
+        return repr(s)[1:-1]
+    elif s == '"':
+        return r'\"'
+    elif s == '\\':
+        return r'\\'
+    else:
+        # within a character sequence, oct passes much better than hex
+        return ''.join(['\\%03o' % ord(c) for c in s])
+
+
+def _build_specials_replacer():
+    subexps = []
+    replacements = {}
+    for special in _c_special:
+        regexp = ''.join(['[%s]' % c.replace('\\', '\\\\') for c in special])
+        subexps.append(regexp)
+        replacements[special.encode('ASCII')] = _to_escape_sequence(special).encode('ASCII')
+    sub = re.compile(('(%s)' % '|'.join(subexps)).encode('ASCII')).sub
+    def replace_specials(m):
+        return replacements[m.group(1)]
+    def replace(s):
+        return sub(replace_specials, s)
+    return replace
+
+_replace_specials = _build_specials_replacer()
+
+
+def escape_char(c):
+    if IS_PYTHON3:
+        c = c.decode('ISO-8859-1')
+    if c in '\n\r\t\\':
+        return repr(c)[1:-1]
+    elif c == "'":
+        return "\\'"
+    n = ord(c)
+    if n < 32 or n > 127:
+        # hex works well for characters
+        return "\\x%02X" % n
+    else:
+        return c
+
+def escape_byte_string(s):
+    """Escape a byte string so that it can be written into C code.
+    Note that this returns a Unicode string instead which, when
+    encoded as ISO-8859-1, will result in the correct byte sequence
+    being written.
+    """
+    s = _replace_specials(s)
+    try:
+        return s.decode("ASCII") # trial decoding: plain ASCII => done
+    except UnicodeDecodeError:
+        pass
+    if IS_PYTHON3:
+        s_new = bytearray()
+        append, extend = s_new.append, s_new.extend
+        for b in s:
+            if b >= 128:
+                extend(('\\%3o' % b).encode('ASCII'))
+            else:
+                append(b)
+        return s_new.decode('ISO-8859-1')
+    else:
+        l = []
+        append = l.append
+        for c in s:
+            o = ord(c)
+            if o >= 128:
+                append('\\%3o' % o)
+            else:
+                append(c)
+        return join_bytes(l).decode('ISO-8859-1')
+
+def split_string_literal(s, limit=2000):
+    # MSVC can't handle long string literals.
+    if len(s) < limit:
+        return s
+    else:
+        start = 0
+        chunks = []
+        while start < len(s):
+            end = start + limit
+            if len(s) > end-4 and '\\' in s[end-4:end]:
+                end -= 4 - s[end-4:end].find('\\') # just before the backslash
+                while s[end-1] == '\\':
+                    end -= 1
+                    if end == start:
+                        # must have been a long line of backslashes
+                        end = start + limit - (limit % 2) - 4
+                        break
+            chunks.append(s[start:end])
+            start = end
+        return '""'.join(chunks)
+
+def encode_pyunicode_string(s):
+    """Create Py_UNICODE[] representation of a given unicode string.
+    """
     s = list(map(ord, s)) + [0]
- 
-    if sys.maxunicode >= 0x10000:  # Wide build or Py3.3 
-        utf16, utf32 = [], s 
-        for code_point in s: 
-            if code_point >= 0x10000:  # outside of BMP 
-                high, low = divmod(code_point - 0x10000, 1024) 
-                utf16.append(high + 0xD800) 
-                utf16.append(low + 0xDC00) 
-            else: 
-                utf16.append(code_point) 
-    else: 
-        utf16, utf32 = s, [] 
-        for code_unit in s: 
-            if 0xDC00 <= code_unit <= 0xDFFF and utf32 and 0xD800 <= utf32[-1] <= 0xDBFF: 
-                high, low = utf32[-1], code_unit 
-                utf32[-1] = ((high & 0x3FF) << 10) + (low & 0x3FF) + 0x10000 
-            else: 
-                utf32.append(code_unit) 
- 
-    if utf16 == utf32: 
-        utf16 = [] 
+
+    if sys.maxunicode >= 0x10000:  # Wide build or Py3.3
+        utf16, utf32 = [], s
+        for code_point in s:
+            if code_point >= 0x10000:  # outside of BMP
+                high, low = divmod(code_point - 0x10000, 1024)
+                utf16.append(high + 0xD800)
+                utf16.append(low + 0xDC00)
+            else:
+                utf16.append(code_point)
+    else:
+        utf16, utf32 = s, []
+        for code_unit in s:
+            if 0xDC00 <= code_unit <= 0xDFFF and utf32 and 0xD800 <= utf32[-1] <= 0xDBFF:
+                high, low = utf32[-1], code_unit
+                utf32[-1] = ((high & 0x3FF) << 10) + (low & 0x3FF) + 0x10000
+            else:
+                utf32.append(code_unit)
+
+    if utf16 == utf32:
+        utf16 = []
     return ",".join(map(_unicode, utf16)), ",".join(map(_unicode, utf32))
diff --git a/contrib/tools/cython/Cython/Compiler/Symtab.py b/contrib/tools/cython/Cython/Compiler/Symtab.py
index 037b5d2d9a..7361a55aea 100644
--- a/contrib/tools/cython/Cython/Compiler/Symtab.py
+++ b/contrib/tools/cython/Cython/Compiler/Symtab.py
@@ -1,11 +1,11 @@
-# 
-#   Symbol Table 
-# 
- 
-from __future__ import absolute_import 
- 
+#
+#   Symbol Table
+#
+
+from __future__ import absolute_import
+
 import re
-import copy 
+import copy
 import operator
 
 try:
@@ -13,440 +13,440 @@ try:
 except ImportError:  # Py3
     import builtins
 
-from .Errors import warning, error, InternalError 
-from .StringEncoding import EncodedString 
-from . import Options, Naming 
-from . import PyrexTypes 
-from .PyrexTypes import py_object_type, unspecified_type 
+from .Errors import warning, error, InternalError
+from .StringEncoding import EncodedString
+from . import Options, Naming
+from . import PyrexTypes
+from .PyrexTypes import py_object_type, unspecified_type
 from .TypeSlots import (
     pyfunction_signature, pymethod_signature, richcmp_special_methods,
     get_special_method_signature, get_property_accessor_signature)
 from . import Future
 
-from . import Code 
- 
-iso_c99_keywords = set( 
-['auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 
-    'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 
-    'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 
-    'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 
-    'volatile', 'while', 
-    '_Bool', '_Complex'', _Imaginary', 'inline', 'restrict']) 
- 
- 
-def c_safe_identifier(cname): 
-    # There are some C limitations on struct entry names. 
+from . import Code
+
+iso_c99_keywords = set(
+['auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
+    'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
+    'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof',
+    'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void',
+    'volatile', 'while',
+    '_Bool', '_Complex'', _Imaginary', 'inline', 'restrict'])
+
+
+def c_safe_identifier(cname):
+    # There are some C limitations on struct entry names.
     if ((cname[:2] == '__' and not (cname.startswith(Naming.pyrex_prefix)
                                     or cname in ('__weakref__', '__dict__')))
             or cname in iso_c99_keywords):
-        cname = Naming.pyrex_prefix + cname 
-    return cname 
- 
-
-class BufferAux(object): 
-    writable_needed = False 
- 
-    def __init__(self, buflocal_nd_var, rcbuf_var): 
-        self.buflocal_nd_var = buflocal_nd_var 
-        self.rcbuf_var = rcbuf_var 
- 
-    def __repr__(self): 
-        return "<BufferAux %r>" % self.__dict__ 
- 
- 
-class Entry(object): 
-    # A symbol table entry in a Scope or ModuleNamespace. 
-    # 
-    # name             string     Python name of entity 
-    # cname            string     C name of entity 
-    # type             PyrexType  Type of entity 
-    # doc              string     Doc string 
+        cname = Naming.pyrex_prefix + cname
+    return cname
+
+
+class BufferAux(object):
+    writable_needed = False
+
+    def __init__(self, buflocal_nd_var, rcbuf_var):
+        self.buflocal_nd_var = buflocal_nd_var
+        self.rcbuf_var = rcbuf_var
+
+    def __repr__(self):
+        return "<BufferAux %r>" % self.__dict__
+
+
+class Entry(object):
+    # A symbol table entry in a Scope or ModuleNamespace.
+    #
+    # name             string     Python name of entity
+    # cname            string     C name of entity
+    # type             PyrexType  Type of entity
+    # doc              string     Doc string
     # annotation       ExprNode   PEP 484/526 annotation
-    # init             string     Initial value 
-    # visibility       'private' or 'public' or 'extern' 
-    # is_builtin       boolean    Is an entry in the Python builtins dict 
-    # is_cglobal       boolean    Is a C global variable 
-    # is_pyglobal      boolean    Is a Python module-level variable 
-    #                               or class attribute during 
-    #                               class construction 
-    # is_member        boolean    Is an assigned class member 
-    # is_pyclass_attr  boolean    Is a name in a Python class namespace 
-    # is_variable      boolean    Is a variable 
-    # is_cfunction     boolean    Is a C function 
-    # is_cmethod       boolean    Is a C method of an extension type 
-    # is_builtin_cmethod boolean  Is a C method of a builtin type (implies is_cmethod) 
-    # is_unbound_cmethod boolean  Is an unbound C method of an extension type 
-    # is_final_cmethod   boolean  Is non-overridable C method 
-    # is_inline_cmethod  boolean  Is inlined C method 
-    # is_anonymous     boolean    Is a anonymous pyfunction entry 
-    # is_type          boolean    Is a type definition 
-    # is_cclass        boolean    Is an extension class 
-    # is_cpp_class     boolean    Is a C++ class 
-    # is_const         boolean    Is a constant 
-    # is_property      boolean    Is a property of an extension type: 
-    # doc_cname        string or None  C const holding the docstring 
-    # getter_cname     string          C func for getting property 
-    # setter_cname     string          C func for setting or deleting property 
-    # is_self_arg      boolean    Is the "self" arg of an exttype method 
-    # is_arg           boolean    Is the arg of a method 
-    # is_local         boolean    Is a local variable 
-    # in_closure       boolean    Is referenced in an inner scope 
+    # init             string     Initial value
+    # visibility       'private' or 'public' or 'extern'
+    # is_builtin       boolean    Is an entry in the Python builtins dict
+    # is_cglobal       boolean    Is a C global variable
+    # is_pyglobal      boolean    Is a Python module-level variable
+    #                               or class attribute during
+    #                               class construction
+    # is_member        boolean    Is an assigned class member
+    # is_pyclass_attr  boolean    Is a name in a Python class namespace
+    # is_variable      boolean    Is a variable
+    # is_cfunction     boolean    Is a C function
+    # is_cmethod       boolean    Is a C method of an extension type
+    # is_builtin_cmethod boolean  Is a C method of a builtin type (implies is_cmethod)
+    # is_unbound_cmethod boolean  Is an unbound C method of an extension type
+    # is_final_cmethod   boolean  Is non-overridable C method
+    # is_inline_cmethod  boolean  Is inlined C method
+    # is_anonymous     boolean    Is a anonymous pyfunction entry
+    # is_type          boolean    Is a type definition
+    # is_cclass        boolean    Is an extension class
+    # is_cpp_class     boolean    Is a C++ class
+    # is_const         boolean    Is a constant
+    # is_property      boolean    Is a property of an extension type:
+    # doc_cname        string or None  C const holding the docstring
+    # getter_cname     string          C func for getting property
+    # setter_cname     string          C func for setting or deleting property
+    # is_self_arg      boolean    Is the "self" arg of an exttype method
+    # is_arg           boolean    Is the arg of a method
+    # is_local         boolean    Is a local variable
+    # in_closure       boolean    Is referenced in an inner scope
     # in_subscope      boolean    Belongs to a generator expression scope
-    # is_readonly      boolean    Can't be assigned to 
-    # func_cname       string     C func implementing Python func 
-    # func_modifiers   [string]   C function modifiers ('inline') 
-    # pos              position   Source position where declared 
-    # namespace_cname  string     If is_pyglobal, the C variable 
-    #                               holding its home namespace 
-    # pymethdef_cname  string     PyMethodDef structure 
-    # signature        Signature  Arg & return types for Python func 
-    # as_variable      Entry      Alternative interpretation of extension 
-    #                               type name or builtin C function as a variable 
-    # xdecref_cleanup  boolean    Use Py_XDECREF for error cleanup 
-    # in_cinclude      boolean    Suppress C declaration code 
-    # enum_values      [Entry]    For enum types, list of values 
-    # qualified_name   string     "modname.funcname" or "modname.classname" 
-    #                               or "modname.classname.funcname" 
-    # is_declared_generic  boolean  Is declared as PyObject * even though its 
-    #                                 type is an extension type 
-    # as_module        None       Module scope, if a cimported module 
-    # is_inherited     boolean    Is an inherited attribute of an extension type 
-    # pystring_cname   string     C name of Python version of string literal 
-    # is_interned      boolean    For string const entries, value is interned 
-    # is_identifier    boolean    For string const entries, value is an identifier 
-    # used             boolean 
-    # is_special       boolean    Is a special method or property accessor 
-    #                               of an extension type 
-    # defined_in_pxd   boolean    Is defined in a .pxd file (not just declared) 
-    # api              boolean    Generate C API for C class or function 
-    # utility_code     string     Utility code needed when this entry is used 
-    # 
-    # buffer_aux       BufferAux or None  Extra information needed for buffer variables 
-    # inline_func_in_pxd boolean  Hacky special case for inline function in pxd file. 
+    # is_readonly      boolean    Can't be assigned to
+    # func_cname       string     C func implementing Python func
+    # func_modifiers   [string]   C function modifiers ('inline')
+    # pos              position   Source position where declared
+    # namespace_cname  string     If is_pyglobal, the C variable
+    #                               holding its home namespace
+    # pymethdef_cname  string     PyMethodDef structure
+    # signature        Signature  Arg & return types for Python func
+    # as_variable      Entry      Alternative interpretation of extension
+    #                               type name or builtin C function as a variable
+    # xdecref_cleanup  boolean    Use Py_XDECREF for error cleanup
+    # in_cinclude      boolean    Suppress C declaration code
+    # enum_values      [Entry]    For enum types, list of values
+    # qualified_name   string     "modname.funcname" or "modname.classname"
+    #                               or "modname.classname.funcname"
+    # is_declared_generic  boolean  Is declared as PyObject * even though its
+    #                                 type is an extension type
+    # as_module        None       Module scope, if a cimported module
+    # is_inherited     boolean    Is an inherited attribute of an extension type
+    # pystring_cname   string     C name of Python version of string literal
+    # is_interned      boolean    For string const entries, value is interned
+    # is_identifier    boolean    For string const entries, value is an identifier
+    # used             boolean
+    # is_special       boolean    Is a special method or property accessor
+    #                               of an extension type
+    # defined_in_pxd   boolean    Is defined in a .pxd file (not just declared)
+    # api              boolean    Generate C API for C class or function
+    # utility_code     string     Utility code needed when this entry is used
+    #
+    # buffer_aux       BufferAux or None  Extra information needed for buffer variables
+    # inline_func_in_pxd boolean  Hacky special case for inline function in pxd file.
     #                             Ideally this should not be necessary.
-    # might_overflow   boolean    In an arithmetic expression that could cause 
-    #                             overflow (used for type inference). 
-    # utility_code_definition     For some Cython builtins, the utility code 
-    #                             which contains the definition of the entry. 
-    #                             Currently only supported for CythonScope entries. 
-    # error_on_uninitialized      Have Control Flow issue an error when this entry is 
-    #                             used uninitialized 
-    # cf_used          boolean    Entry is used 
-    # is_fused_specialized boolean Whether this entry of a cdef or def function 
-    #                              is a specialization 
- 
-    # TODO: utility_code and utility_code_definition serves the same purpose... 
- 
-    inline_func_in_pxd = False 
-    borrowed = 0 
-    init = "" 
+    # might_overflow   boolean    In an arithmetic expression that could cause
+    #                             overflow (used for type inference).
+    # utility_code_definition     For some Cython builtins, the utility code
+    #                             which contains the definition of the entry.
+    #                             Currently only supported for CythonScope entries.
+    # error_on_uninitialized      Have Control Flow issue an error when this entry is
+    #                             used uninitialized
+    # cf_used          boolean    Entry is used
+    # is_fused_specialized boolean Whether this entry of a cdef or def function
+    #                              is a specialization
+
+    # TODO: utility_code and utility_code_definition serves the same purpose...
+
+    inline_func_in_pxd = False
+    borrowed = 0
+    init = ""
     annotation = None
-    visibility = 'private' 
-    is_builtin = 0 
-    is_cglobal = 0 
-    is_pyglobal = 0 
-    is_member = 0 
-    is_pyclass_attr = 0 
-    is_variable = 0 
-    is_cfunction = 0 
-    is_cmethod = 0 
-    is_builtin_cmethod = False 
-    is_unbound_cmethod = 0 
-    is_final_cmethod = 0 
-    is_inline_cmethod = 0 
-    is_anonymous = 0 
-    is_type = 0 
-    is_cclass = 0 
-    is_cpp_class = 0 
-    is_const = 0 
-    is_property = 0 
-    doc_cname = None 
-    getter_cname = None 
-    setter_cname = None 
-    is_self_arg = 0 
-    is_arg = 0 
-    is_local = 0 
-    in_closure = 0 
-    from_closure = 0 
+    visibility = 'private'
+    is_builtin = 0
+    is_cglobal = 0
+    is_pyglobal = 0
+    is_member = 0
+    is_pyclass_attr = 0
+    is_variable = 0
+    is_cfunction = 0
+    is_cmethod = 0
+    is_builtin_cmethod = False
+    is_unbound_cmethod = 0
+    is_final_cmethod = 0
+    is_inline_cmethod = 0
+    is_anonymous = 0
+    is_type = 0
+    is_cclass = 0
+    is_cpp_class = 0
+    is_const = 0
+    is_property = 0
+    doc_cname = None
+    getter_cname = None
+    setter_cname = None
+    is_self_arg = 0
+    is_arg = 0
+    is_local = 0
+    in_closure = 0
+    from_closure = 0
     in_subscope = 0
-    is_declared_generic = 0 
-    is_readonly = 0 
-    pyfunc_cname = None 
-    func_cname = None 
-    func_modifiers = [] 
-    final_func_cname = None 
-    doc = None 
-    as_variable = None 
-    xdecref_cleanup = 0 
-    in_cinclude = 0 
-    as_module = None 
-    is_inherited = 0 
-    pystring_cname = None 
-    is_identifier = 0 
-    is_interned = 0 
-    used = 0 
-    is_special = 0 
-    defined_in_pxd = 0 
-    is_implemented = 0 
-    api = 0 
-    utility_code = None 
-    is_overridable = 0 
-    buffer_aux = None 
-    prev_entry = None 
-    might_overflow = 0 
-    fused_cfunction = None 
-    is_fused_specialized = False 
-    utility_code_definition = None 
-    needs_property = False 
-    in_with_gil_block = 0 
-    from_cython_utility_code = None 
-    error_on_uninitialized = False 
-    cf_used = True 
-    outer_entry = None 
- 
-    def __init__(self, name, cname, type, pos = None, init = None): 
-        self.name = name 
-        self.cname = cname 
-        self.type = type 
-        self.pos = pos 
-        self.init = init 
-        self.overloaded_alternatives = [] 
-        self.cf_assignments = [] 
-        self.cf_references = [] 
-        self.inner_entries = [] 
-        self.defining_entry = self 
- 
-    def __repr__(self): 
-        return "%s(<%x>, name=%s, type=%s)" % (type(self).__name__, id(self), self.name, self.type) 
- 
+    is_declared_generic = 0
+    is_readonly = 0
+    pyfunc_cname = None
+    func_cname = None
+    func_modifiers = []
+    final_func_cname = None
+    doc = None
+    as_variable = None
+    xdecref_cleanup = 0
+    in_cinclude = 0
+    as_module = None
+    is_inherited = 0
+    pystring_cname = None
+    is_identifier = 0
+    is_interned = 0
+    used = 0
+    is_special = 0
+    defined_in_pxd = 0
+    is_implemented = 0
+    api = 0
+    utility_code = None
+    is_overridable = 0
+    buffer_aux = None
+    prev_entry = None
+    might_overflow = 0
+    fused_cfunction = None
+    is_fused_specialized = False
+    utility_code_definition = None
+    needs_property = False
+    in_with_gil_block = 0
+    from_cython_utility_code = None
+    error_on_uninitialized = False
+    cf_used = True
+    outer_entry = None
+
+    def __init__(self, name, cname, type, pos = None, init = None):
+        self.name = name
+        self.cname = cname
+        self.type = type
+        self.pos = pos
+        self.init = init
+        self.overloaded_alternatives = []
+        self.cf_assignments = []
+        self.cf_references = []
+        self.inner_entries = []
+        self.defining_entry = self
+
+    def __repr__(self):
+        return "%s(<%x>, name=%s, type=%s)" % (type(self).__name__, id(self), self.name, self.type)
+
     def already_declared_here(self):
         error(self.pos, "Previous declaration is here")
 
-    def redeclared(self, pos): 
-        error(pos, "'%s' does not match previous declaration" % self.name) 
+    def redeclared(self, pos):
+        error(pos, "'%s' does not match previous declaration" % self.name)
         self.already_declared_here()
- 
-    def all_alternatives(self): 
-        return [self] + self.overloaded_alternatives 
- 
-    def all_entries(self): 
-        return [self] + self.inner_entries 
- 
+
+    def all_alternatives(self):
+        return [self] + self.overloaded_alternatives
+
+    def all_entries(self):
+        return [self] + self.inner_entries
+
     def __lt__(left, right):
         if isinstance(left, Entry) and isinstance(right, Entry):
             return (left.name, left.cname) < (right.name, right.cname)
         else:
             return NotImplemented
- 
-
-class InnerEntry(Entry): 
-    """ 
-    An entry in a closure scope that represents the real outer Entry. 
-    """ 
-    from_closure = True 
- 
-    def __init__(self, outer_entry, scope): 
-        Entry.__init__(self, outer_entry.name, 
-                       outer_entry.cname, 
-                       outer_entry.type, 
-                       outer_entry.pos) 
-        self.outer_entry = outer_entry 
-        self.scope = scope 
- 
-        # share state with (outermost) defining entry 
-        outermost_entry = outer_entry 
-        while outermost_entry.outer_entry: 
-            outermost_entry = outermost_entry.outer_entry 
-        self.defining_entry = outermost_entry 
-        self.inner_entries = outermost_entry.inner_entries 
-        self.cf_assignments = outermost_entry.cf_assignments 
-        self.cf_references = outermost_entry.cf_references 
-        self.overloaded_alternatives = outermost_entry.overloaded_alternatives 
-        self.inner_entries.append(self) 
- 
-    def __getattr__(self, name): 
-        if name.startswith('__'): 
-            # we wouldn't have been called if it was there 
-            raise AttributeError(name) 
-        return getattr(self.defining_entry, name) 
- 
-    def all_entries(self): 
-        return self.defining_entry.all_entries() 
- 
- 
-class Scope(object): 
-    # name              string             Unqualified name 
-    # outer_scope       Scope or None      Enclosing scope 
-    # entries           {string : Entry}   Python name to entry, non-types 
-    # const_entries     [Entry]            Constant entries 
-    # type_entries      [Entry]            Struct/union/enum/typedef/exttype entries 
-    # sue_entries       [Entry]            Struct/union/enum entries 
-    # arg_entries       [Entry]            Function argument entries 
-    # var_entries       [Entry]            User-defined variable entries 
-    # pyfunc_entries    [Entry]            Python function entries 
-    # cfunc_entries     [Entry]            C function entries 
-    # c_class_entries   [Entry]            All extension type entries 
-    # cname_to_entry    {string : Entry}   Temp cname to entry mapping 
-    # return_type       PyrexType or None  Return type of function owning scope 
-    # is_builtin_scope  boolean            Is the builtin scope of Python/Cython 
-    # is_py_class_scope boolean            Is a Python class scope 
-    # is_c_class_scope  boolean            Is an extension type scope 
-    # is_closure_scope  boolean            Is a closure scope 
-    # is_passthrough    boolean            Outer scope is passed directly 
-    # is_cpp_class_scope  boolean          Is a C++ class scope 
-    # is_property_scope boolean            Is a extension type property scope 
-    # scope_prefix      string             Disambiguator for C names 
-    # in_cinclude       boolean            Suppress C declaration code 
-    # qualified_name    string             "modname" or "modname.classname" 
-    #                                        Python strings in this scope 
-    # nogil             boolean            In a nogil section 
-    # directives        dict               Helper variable for the recursive 
-    #                                      analysis, contains directive values. 
-    # is_internal       boolean            Is only used internally (simpler setup) 
- 
-    is_builtin_scope = 0 
-    is_py_class_scope = 0 
-    is_c_class_scope = 0 
-    is_closure_scope = 0 
+
+
+class InnerEntry(Entry):
+    """
+    An entry in a closure scope that represents the real outer Entry.
+    """
+    from_closure = True
+
+    def __init__(self, outer_entry, scope):
+        Entry.__init__(self, outer_entry.name,
+                       outer_entry.cname,
+                       outer_entry.type,
+                       outer_entry.pos)
+        self.outer_entry = outer_entry
+        self.scope = scope
+
+        # share state with (outermost) defining entry
+        outermost_entry = outer_entry
+        while outermost_entry.outer_entry:
+            outermost_entry = outermost_entry.outer_entry
+        self.defining_entry = outermost_entry
+        self.inner_entries = outermost_entry.inner_entries
+        self.cf_assignments = outermost_entry.cf_assignments
+        self.cf_references = outermost_entry.cf_references
+        self.overloaded_alternatives = outermost_entry.overloaded_alternatives
+        self.inner_entries.append(self)
+
+    def __getattr__(self, name):
+        if name.startswith('__'):
+            # we wouldn't have been called if it was there
+            raise AttributeError(name)
+        return getattr(self.defining_entry, name)
+
+    def all_entries(self):
+        return self.defining_entry.all_entries()
+
+
+class Scope(object):
+    # name              string             Unqualified name
+    # outer_scope       Scope or None      Enclosing scope
+    # entries           {string : Entry}   Python name to entry, non-types
+    # const_entries     [Entry]            Constant entries
+    # type_entries      [Entry]            Struct/union/enum/typedef/exttype entries
+    # sue_entries       [Entry]            Struct/union/enum entries
+    # arg_entries       [Entry]            Function argument entries
+    # var_entries       [Entry]            User-defined variable entries
+    # pyfunc_entries    [Entry]            Python function entries
+    # cfunc_entries     [Entry]            C function entries
+    # c_class_entries   [Entry]            All extension type entries
+    # cname_to_entry    {string : Entry}   Temp cname to entry mapping
+    # return_type       PyrexType or None  Return type of function owning scope
+    # is_builtin_scope  boolean            Is the builtin scope of Python/Cython
+    # is_py_class_scope boolean            Is a Python class scope
+    # is_c_class_scope  boolean            Is an extension type scope
+    # is_closure_scope  boolean            Is a closure scope
+    # is_passthrough    boolean            Outer scope is passed directly
+    # is_cpp_class_scope  boolean          Is a C++ class scope
+    # is_property_scope boolean            Is a extension type property scope
+    # scope_prefix      string             Disambiguator for C names
+    # in_cinclude       boolean            Suppress C declaration code
+    # qualified_name    string             "modname" or "modname.classname"
+    #                                        Python strings in this scope
+    # nogil             boolean            In a nogil section
+    # directives        dict               Helper variable for the recursive
+    #                                      analysis, contains directive values.
+    # is_internal       boolean            Is only used internally (simpler setup)
+
+    is_builtin_scope = 0
+    is_py_class_scope = 0
+    is_c_class_scope = 0
+    is_closure_scope = 0
     is_genexpr_scope = 0
-    is_passthrough = 0 
-    is_cpp_class_scope = 0 
-    is_property_scope = 0 
-    is_module_scope = 0 
-    is_internal = 0 
-    scope_prefix = "" 
-    in_cinclude = 0 
-    nogil = 0 
-    fused_to_specific = None 
+    is_passthrough = 0
+    is_cpp_class_scope = 0
+    is_property_scope = 0
+    is_module_scope = 0
+    is_internal = 0
+    scope_prefix = ""
+    in_cinclude = 0
+    nogil = 0
+    fused_to_specific = None
     return_type = None
- 
-    def __init__(self, name, outer_scope, parent_scope): 
-        # The outer_scope is the next scope in the lookup chain. 
-        # The parent_scope is used to derive the qualified name of this scope. 
-        self.name = name 
-        self.outer_scope = outer_scope 
-        self.parent_scope = parent_scope 
+
+    def __init__(self, name, outer_scope, parent_scope):
+        # The outer_scope is the next scope in the lookup chain.
+        # The parent_scope is used to derive the qualified name of this scope.
+        self.name = name
+        self.outer_scope = outer_scope
+        self.parent_scope = parent_scope
         mangled_name = "%d%s_" % (len(name), name.replace('.', '_dot_'))
-        qual_scope = self.qualifying_scope() 
-        if qual_scope: 
-            self.qualified_name = qual_scope.qualify_name(name) 
-            self.scope_prefix = qual_scope.scope_prefix + mangled_name 
-        else: 
-            self.qualified_name = EncodedString(name) 
-            self.scope_prefix = mangled_name 
-        self.entries = {} 
+        qual_scope = self.qualifying_scope()
+        if qual_scope:
+            self.qualified_name = qual_scope.qualify_name(name)
+            self.scope_prefix = qual_scope.scope_prefix + mangled_name
+        else:
+            self.qualified_name = EncodedString(name)
+            self.scope_prefix = mangled_name
+        self.entries = {}
         self.subscopes = set()
-        self.const_entries = [] 
-        self.type_entries = [] 
-        self.sue_entries = [] 
-        self.arg_entries = [] 
-        self.var_entries = [] 
-        self.pyfunc_entries = [] 
-        self.cfunc_entries = [] 
-        self.c_class_entries = [] 
-        self.defined_c_classes = [] 
-        self.imported_c_classes = {} 
-        self.cname_to_entry = {} 
-        self.string_to_entry = {} 
-        self.identifier_to_entry = {} 
-        self.num_to_entry = {} 
-        self.obj_to_entry = {} 
-        self.buffer_entries = [] 
-        self.lambda_defs = [] 
-        self.id_counters = {} 
- 
-    def __deepcopy__(self, memo): 
-        return self 
- 
-    def merge_in(self, other, merge_unused=True, whitelist=None): 
-        # Use with care... 
-        entries = [] 
+        self.const_entries = []
+        self.type_entries = []
+        self.sue_entries = []
+        self.arg_entries = []
+        self.var_entries = []
+        self.pyfunc_entries = []
+        self.cfunc_entries = []
+        self.c_class_entries = []
+        self.defined_c_classes = []
+        self.imported_c_classes = {}
+        self.cname_to_entry = {}
+        self.string_to_entry = {}
+        self.identifier_to_entry = {}
+        self.num_to_entry = {}
+        self.obj_to_entry = {}
+        self.buffer_entries = []
+        self.lambda_defs = []
+        self.id_counters = {}
+
+    def __deepcopy__(self, memo):
+        return self
+
+    def merge_in(self, other, merge_unused=True, whitelist=None):
+        # Use with care...
+        entries = []
         for name, entry in other.entries.items():
-            if not whitelist or name in whitelist: 
-                if entry.used or merge_unused: 
-                    entries.append((name, entry)) 
- 
-        self.entries.update(entries) 
- 
-        for attr in ('const_entries', 
-                     'type_entries', 
-                     'sue_entries', 
-                     'arg_entries', 
-                     'var_entries', 
-                     'pyfunc_entries', 
-                     'cfunc_entries', 
-                     'c_class_entries'): 
-            self_entries = getattr(self, attr) 
-            names = set(e.name for e in self_entries) 
-            for entry in getattr(other, attr): 
-                if (entry.used or merge_unused) and entry.name not in names: 
-                    self_entries.append(entry) 
- 
-    def __str__(self): 
-        return "<%s %s>" % (self.__class__.__name__, self.qualified_name) 
- 
-    def qualifying_scope(self): 
-        return self.parent_scope 
- 
-    def mangle(self, prefix, name = None): 
-        if name: 
-            return "%s%s%s" % (prefix, self.scope_prefix, name) 
-        else: 
-            return self.parent_scope.mangle(prefix, self.name) 
- 
-    def mangle_internal(self, name): 
-        # Mangle an internal name so as not to clash with any 
-        # user-defined name in this scope. 
-        prefix = "%s%s_" % (Naming.pyrex_prefix, name) 
-        return self.mangle(prefix) 
-        #return self.parent_scope.mangle(prefix, self.name) 
- 
-    def mangle_class_private_name(self, name): 
-        if self.parent_scope: 
-            return self.parent_scope.mangle_class_private_name(name) 
-        return name 
- 
-    def next_id(self, name=None): 
-        # Return a cname fragment that is unique for this module 
-        counters = self.global_scope().id_counters 
-        try: 
-            count = counters[name] + 1 
-        except KeyError: 
-            count = 0 
-        counters[name] = count 
-        if name: 
-            if not count: 
-                # unique names don't need a suffix, reoccurrences will get one 
-                return name 
-            return '%s%d' % (name, count) 
-        else: 
-            return '%d' % count 
- 
-    def global_scope(self): 
-        """ Return the module-level scope containing this scope. """ 
-        return self.outer_scope.global_scope() 
- 
-    def builtin_scope(self): 
-        """ Return the module-level scope containing this scope. """ 
-        return self.outer_scope.builtin_scope() 
- 
+            if not whitelist or name in whitelist:
+                if entry.used or merge_unused:
+                    entries.append((name, entry))
+
+        self.entries.update(entries)
+
+        for attr in ('const_entries',
+                     'type_entries',
+                     'sue_entries',
+                     'arg_entries',
+                     'var_entries',
+                     'pyfunc_entries',
+                     'cfunc_entries',
+                     'c_class_entries'):
+            self_entries = getattr(self, attr)
+            names = set(e.name for e in self_entries)
+            for entry in getattr(other, attr):
+                if (entry.used or merge_unused) and entry.name not in names:
+                    self_entries.append(entry)
+
+    def __str__(self):
+        return "<%s %s>" % (self.__class__.__name__, self.qualified_name)
+
+    def qualifying_scope(self):
+        return self.parent_scope
+
+    def mangle(self, prefix, name = None):
+        if name:
+            return "%s%s%s" % (prefix, self.scope_prefix, name)
+        else:
+            return self.parent_scope.mangle(prefix, self.name)
+
+    def mangle_internal(self, name):
+        # Mangle an internal name so as not to clash with any
+        # user-defined name in this scope.
+        prefix = "%s%s_" % (Naming.pyrex_prefix, name)
+        return self.mangle(prefix)
+        #return self.parent_scope.mangle(prefix, self.name)
+
+    def mangle_class_private_name(self, name):
+        if self.parent_scope:
+            return self.parent_scope.mangle_class_private_name(name)
+        return name
+
+    def next_id(self, name=None):
+        # Return a cname fragment that is unique for this module
+        counters = self.global_scope().id_counters
+        try:
+            count = counters[name] + 1
+        except KeyError:
+            count = 0
+        counters[name] = count
+        if name:
+            if not count:
+                # unique names don't need a suffix, reoccurrences will get one
+                return name
+            return '%s%d' % (name, count)
+        else:
+            return '%d' % count
+
+    def global_scope(self):
+        """ Return the module-level scope containing this scope. """
+        return self.outer_scope.global_scope()
+
+    def builtin_scope(self):
+        """ Return the module-level scope containing this scope. """
+        return self.outer_scope.builtin_scope()
+
     def iter_local_scopes(self):
         yield self
         if self.subscopes:
             for scope in sorted(self.subscopes, key=operator.attrgetter('scope_prefix')):
                 yield scope
 
-    def declare(self, name, cname, type, pos, visibility, shadow = 0, is_type = 0, create_wrapper = 0): 
-        # Create new entry, and add to dictionary if 
-        # name is not None. Reports a warning if already 
-        # declared. 
-        if type.is_buffer and not isinstance(self, LocalScope): # and not is_type: 
-            error(pos, 'Buffer types only allowed as function local variables') 
-        if not self.in_cinclude and cname and re.match("^_[_A-Z]+$", cname): 
-            # See http://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names 
-            warning(pos, "'%s' is a reserved name in C." % cname, -1) 
-        entries = self.entries 
-        if name and name in entries and not shadow: 
+    def declare(self, name, cname, type, pos, visibility, shadow = 0, is_type = 0, create_wrapper = 0):
+        # Create new entry, and add to dictionary if
+        # name is not None. Reports a warning if already
+        # declared.
+        if type.is_buffer and not isinstance(self, LocalScope): # and not is_type:
+            error(pos, 'Buffer types only allowed as function local variables')
+        if not self.in_cinclude and cname and re.match("^_[_A-Z]+$", cname):
+            # See http://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names
+            warning(pos, "'%s' is a reserved name in C." % cname, -1)
+        entries = self.entries
+        if name and name in entries and not shadow:
             old_entry = entries[name]
 
             # Reject redeclared C++ functions only if they have the same type signature.
@@ -471,296 +471,296 @@ class Scope(object):
                 # Silenced outside of "cdef extern" blocks, until we have a safe way to
                 # prevent pxd-defined cpdef functions from ending up here.
                 warning(pos, "'%s' redeclared " % name, 1 if self.in_cinclude else 0)
-            elif visibility != 'ignore': 
-                error(pos, "'%s' redeclared " % name) 
+            elif visibility != 'ignore':
+                error(pos, "'%s' redeclared " % name)
                 entries[name].already_declared_here()
-        entry = Entry(name, cname, type, pos = pos) 
-        entry.in_cinclude = self.in_cinclude 
-        entry.create_wrapper = create_wrapper 
-        if name: 
-            entry.qualified_name = self.qualify_name(name) 
-#            if name in entries and self.is_cpp(): 
-#                entries[name].overloaded_alternatives.append(entry) 
-#            else: 
-#                entries[name] = entry 
-            if not shadow: 
-                entries[name] = entry 
- 
-        if type.is_memoryviewslice: 
-            from . import MemoryView 
-            entry.init = MemoryView.memslice_entry_init 
- 
-        entry.scope = self 
-        entry.visibility = visibility 
-        return entry 
- 
-    def qualify_name(self, name): 
-        return EncodedString("%s.%s" % (self.qualified_name, name)) 
- 
-    def declare_const(self, name, type, value, pos, cname = None, visibility = 'private', api = 0, create_wrapper = 0): 
-        # Add an entry for a named constant. 
-        if not cname: 
-            if self.in_cinclude or (visibility == 'public' or api): 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.enum_prefix, name) 
-        entry = self.declare(name, cname, type, pos, visibility, create_wrapper = create_wrapper) 
-        entry.is_const = 1 
-        entry.value_node = value 
-        return entry 
- 
-    def declare_type(self, name, type, pos, 
-            cname = None, visibility = 'private', api = 0, defining = 1, 
-            shadow = 0, template = 0): 
-        # Add an entry for a type definition. 
-        if not cname: 
-            cname = name 
-        entry = self.declare(name, cname, type, pos, visibility, shadow, 
-                             is_type=True) 
-        entry.is_type = 1 
-        entry.api = api 
-        if defining: 
-            self.type_entries.append(entry) 
- 
-        if not template: 
-            type.entry = entry 
- 
-        # here we would set as_variable to an object representing this type 
-        return entry 
- 
-    def declare_typedef(self, name, base_type, pos, cname = None, 
-                        visibility = 'private', api = 0): 
-        if not cname: 
+        entry = Entry(name, cname, type, pos = pos)
+        entry.in_cinclude = self.in_cinclude
+        entry.create_wrapper = create_wrapper
+        if name:
+            entry.qualified_name = self.qualify_name(name)
+#            if name in entries and self.is_cpp():
+#                entries[name].overloaded_alternatives.append(entry)
+#            else:
+#                entries[name] = entry
+            if not shadow:
+                entries[name] = entry
+
+        if type.is_memoryviewslice:
+            from . import MemoryView
+            entry.init = MemoryView.memslice_entry_init
+
+        entry.scope = self
+        entry.visibility = visibility
+        return entry
+
+    def qualify_name(self, name):
+        return EncodedString("%s.%s" % (self.qualified_name, name))
+
+    def declare_const(self, name, type, value, pos, cname = None, visibility = 'private', api = 0, create_wrapper = 0):
+        # Add an entry for a named constant.
+        if not cname:
+            if self.in_cinclude or (visibility == 'public' or api):
+                cname = name
+            else:
+                cname = self.mangle(Naming.enum_prefix, name)
+        entry = self.declare(name, cname, type, pos, visibility, create_wrapper = create_wrapper)
+        entry.is_const = 1
+        entry.value_node = value
+        return entry
+
+    def declare_type(self, name, type, pos,
+            cname = None, visibility = 'private', api = 0, defining = 1,
+            shadow = 0, template = 0):
+        # Add an entry for a type definition.
+        if not cname:
+            cname = name
+        entry = self.declare(name, cname, type, pos, visibility, shadow,
+                             is_type=True)
+        entry.is_type = 1
+        entry.api = api
+        if defining:
+            self.type_entries.append(entry)
+
+        if not template:
+            type.entry = entry
+
+        # here we would set as_variable to an object representing this type
+        return entry
+
+    def declare_typedef(self, name, base_type, pos, cname = None,
+                        visibility = 'private', api = 0):
+        if not cname:
             if self.in_cinclude or (visibility != 'private' or api):
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.type_prefix, name) 
-        try: 
+                cname = name
+            else:
+                cname = self.mangle(Naming.type_prefix, name)
+        try:
             if self.is_cpp_class_scope:
                 namespace = self.outer_scope.lookup(self.name).type
             else:
                 namespace = None
-            type = PyrexTypes.create_typedef_type(name, base_type, cname, 
+            type = PyrexTypes.create_typedef_type(name, base_type, cname,
                                                   (visibility == 'extern'),
                                                   namespace)
         except ValueError as e:
-            error(pos, e.args[0]) 
-            type = PyrexTypes.error_type 
-        entry = self.declare_type(name, type, pos, cname, 
-                                  visibility = visibility, api = api) 
-        type.qualified_name = entry.qualified_name 
-        return entry 
- 
-    def declare_struct_or_union(self, name, kind, scope, 
-                                typedef_flag, pos, cname = None, 
-                                visibility = 'private', api = 0, 
-                                packed = False): 
-        # Add an entry for a struct or union definition. 
-        if not cname: 
-            if self.in_cinclude or (visibility == 'public' or api): 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.type_prefix, name) 
-        entry = self.lookup_here(name) 
-        if not entry: 
-            type = PyrexTypes.CStructOrUnionType( 
-                name, kind, scope, typedef_flag, cname, packed) 
-            entry = self.declare_type(name, type, pos, cname, 
-                visibility = visibility, api = api, 
-                defining = scope is not None) 
-            self.sue_entries.append(entry) 
-            type.entry = entry 
-        else: 
-            if not (entry.is_type and entry.type.is_struct_or_union 
-                    and entry.type.kind == kind): 
-                warning(pos, "'%s' redeclared  " % name, 0) 
-            elif scope and entry.type.scope: 
-                warning(pos, "'%s' already defined  (ignoring second definition)" % name, 0) 
-            else: 
-                self.check_previous_typedef_flag(entry, typedef_flag, pos) 
-                self.check_previous_visibility(entry, visibility, pos) 
-                if scope: 
-                    entry.type.scope = scope 
-                    self.type_entries.append(entry) 
+            error(pos, e.args[0])
+            type = PyrexTypes.error_type
+        entry = self.declare_type(name, type, pos, cname,
+                                  visibility = visibility, api = api)
+        type.qualified_name = entry.qualified_name
+        return entry
+
+    def declare_struct_or_union(self, name, kind, scope,
+                                typedef_flag, pos, cname = None,
+                                visibility = 'private', api = 0,
+                                packed = False):
+        # Add an entry for a struct or union definition.
+        if not cname:
+            if self.in_cinclude or (visibility == 'public' or api):
+                cname = name
+            else:
+                cname = self.mangle(Naming.type_prefix, name)
+        entry = self.lookup_here(name)
+        if not entry:
+            type = PyrexTypes.CStructOrUnionType(
+                name, kind, scope, typedef_flag, cname, packed)
+            entry = self.declare_type(name, type, pos, cname,
+                visibility = visibility, api = api,
+                defining = scope is not None)
+            self.sue_entries.append(entry)
+            type.entry = entry
+        else:
+            if not (entry.is_type and entry.type.is_struct_or_union
+                    and entry.type.kind == kind):
+                warning(pos, "'%s' redeclared  " % name, 0)
+            elif scope and entry.type.scope:
+                warning(pos, "'%s' already defined  (ignoring second definition)" % name, 0)
+            else:
+                self.check_previous_typedef_flag(entry, typedef_flag, pos)
+                self.check_previous_visibility(entry, visibility, pos)
+                if scope:
+                    entry.type.scope = scope
+                    self.type_entries.append(entry)
         if self.is_cpp_class_scope:
             entry.type.namespace = self.outer_scope.lookup(self.name).type
-        return entry 
- 
-    def declare_cpp_class(self, name, scope, 
-            pos, cname = None, base_classes = (), 
-            visibility = 'extern', templates = None): 
-        if cname is None: 
-            if self.in_cinclude or (visibility != 'private'): 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.type_prefix, name) 
-        base_classes = list(base_classes) 
-        entry = self.lookup_here(name) 
-        if not entry: 
-            type = PyrexTypes.CppClassType( 
-                name, scope, cname, base_classes, templates = templates) 
-            entry = self.declare_type(name, type, pos, cname, 
-                visibility = visibility, defining = scope is not None) 
-            self.sue_entries.append(entry) 
-        else: 
-            if not (entry.is_type and entry.type.is_cpp_class): 
-                error(pos, "'%s' redeclared " % name) 
+        return entry
+
+    def declare_cpp_class(self, name, scope,
+            pos, cname = None, base_classes = (),
+            visibility = 'extern', templates = None):
+        if cname is None:
+            if self.in_cinclude or (visibility != 'private'):
+                cname = name
+            else:
+                cname = self.mangle(Naming.type_prefix, name)
+        base_classes = list(base_classes)
+        entry = self.lookup_here(name)
+        if not entry:
+            type = PyrexTypes.CppClassType(
+                name, scope, cname, base_classes, templates = templates)
+            entry = self.declare_type(name, type, pos, cname,
+                visibility = visibility, defining = scope is not None)
+            self.sue_entries.append(entry)
+        else:
+            if not (entry.is_type and entry.type.is_cpp_class):
+                error(pos, "'%s' redeclared " % name)
                 entry.already_declared_here()
-                return None 
-            elif scope and entry.type.scope: 
-                warning(pos, "'%s' already defined  (ignoring second definition)" % name, 0) 
-            else: 
-                if scope: 
-                    entry.type.scope = scope 
-                    self.type_entries.append(entry) 
-            if base_classes: 
-                if entry.type.base_classes and entry.type.base_classes != base_classes: 
-                    error(pos, "Base type does not match previous declaration") 
+                return None
+            elif scope and entry.type.scope:
+                warning(pos, "'%s' already defined  (ignoring second definition)" % name, 0)
+            else:
+                if scope:
+                    entry.type.scope = scope
+                    self.type_entries.append(entry)
+            if base_classes:
+                if entry.type.base_classes and entry.type.base_classes != base_classes:
+                    error(pos, "Base type does not match previous declaration")
                     entry.already_declared_here()
-                else: 
-                    entry.type.base_classes = base_classes 
-            if templates or entry.type.templates: 
-                if templates != entry.type.templates: 
-                    error(pos, "Template parameters do not match previous declaration") 
+                else:
+                    entry.type.base_classes = base_classes
+            if templates or entry.type.templates:
+                if templates != entry.type.templates:
+                    error(pos, "Template parameters do not match previous declaration")
                     entry.already_declared_here()
- 
-        def declare_inherited_attributes(entry, base_classes): 
-            for base_class in base_classes: 
-                if base_class is PyrexTypes.error_type: 
-                    continue 
-                if base_class.scope is None: 
-                    error(pos, "Cannot inherit from incomplete type") 
-                else: 
-                    declare_inherited_attributes(entry, base_class.base_classes) 
+
+        def declare_inherited_attributes(entry, base_classes):
+            for base_class in base_classes:
+                if base_class is PyrexTypes.error_type:
+                    continue
+                if base_class.scope is None:
+                    error(pos, "Cannot inherit from incomplete type")
+                else:
+                    declare_inherited_attributes(entry, base_class.base_classes)
                     entry.type.scope.declare_inherited_cpp_attributes(base_class)
         if scope:
-            declare_inherited_attributes(entry, base_classes) 
-            scope.declare_var(name="this", cname="this", type=PyrexTypes.CPtrType(entry.type), pos=entry.pos) 
-        if self.is_cpp_class_scope: 
-            entry.type.namespace = self.outer_scope.lookup(self.name).type 
-        return entry 
- 
-    def check_previous_typedef_flag(self, entry, typedef_flag, pos): 
-        if typedef_flag != entry.type.typedef_flag: 
-            error(pos, "'%s' previously declared using '%s'" % ( 
-                entry.name, ("cdef", "ctypedef")[entry.type.typedef_flag])) 
- 
-    def check_previous_visibility(self, entry, visibility, pos): 
-        if entry.visibility != visibility: 
-            error(pos, "'%s' previously declared as '%s'" % ( 
-                entry.name, entry.visibility)) 
- 
-    def declare_enum(self, name, pos, cname, typedef_flag, 
-            visibility = 'private', api = 0, create_wrapper = 0): 
-        if name: 
-            if not cname: 
+            declare_inherited_attributes(entry, base_classes)
+            scope.declare_var(name="this", cname="this", type=PyrexTypes.CPtrType(entry.type), pos=entry.pos)
+        if self.is_cpp_class_scope:
+            entry.type.namespace = self.outer_scope.lookup(self.name).type
+        return entry
+
+    def check_previous_typedef_flag(self, entry, typedef_flag, pos):
+        if typedef_flag != entry.type.typedef_flag:
+            error(pos, "'%s' previously declared using '%s'" % (
+                entry.name, ("cdef", "ctypedef")[entry.type.typedef_flag]))
+
+    def check_previous_visibility(self, entry, visibility, pos):
+        if entry.visibility != visibility:
+            error(pos, "'%s' previously declared as '%s'" % (
+                entry.name, entry.visibility))
+
+    def declare_enum(self, name, pos, cname, typedef_flag,
+            visibility = 'private', api = 0, create_wrapper = 0):
+        if name:
+            if not cname:
                 if (self.in_cinclude or visibility == 'public'
                     or visibility == 'extern' or api):
-                    cname = name 
-                else: 
-                    cname = self.mangle(Naming.type_prefix, name) 
+                    cname = name
+                else:
+                    cname = self.mangle(Naming.type_prefix, name)
             if self.is_cpp_class_scope:
                 namespace = self.outer_scope.lookup(self.name).type
             else:
                 namespace = None
             type = PyrexTypes.CEnumType(name, cname, typedef_flag, namespace)
-        else: 
-            type = PyrexTypes.c_anon_enum_type 
-        entry = self.declare_type(name, type, pos, cname = cname, 
-            visibility = visibility, api = api) 
-        entry.create_wrapper = create_wrapper 
-        entry.enum_values = [] 
-        self.sue_entries.append(entry) 
-        return entry 
- 
+        else:
+            type = PyrexTypes.c_anon_enum_type
+        entry = self.declare_type(name, type, pos, cname = cname,
+            visibility = visibility, api = api)
+        entry.create_wrapper = create_wrapper
+        entry.enum_values = []
+        self.sue_entries.append(entry)
+        return entry
+
     def declare_tuple_type(self, pos, components):
         return self.outer_scope.declare_tuple_type(pos, components)
 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = 0): 
-        # Add an entry for a variable. 
-        if not cname: 
-            if visibility != 'private' or api: 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.var_prefix, name) 
-        if type.is_cpp_class and visibility != 'extern': 
-            type.check_nullary_constructor(pos) 
-        entry = self.declare(name, cname, type, pos, visibility) 
-        entry.is_variable = 1 
-        if in_pxd and visibility != 'extern': 
-            entry.defined_in_pxd = 1 
-            entry.used = 1 
-        if api: 
-            entry.api = 1 
-            entry.used = 1 
-        return entry 
- 
-    def declare_builtin(self, name, pos): 
-        return self.outer_scope.declare_builtin(name, pos) 
- 
-    def _declare_pyfunction(self, name, pos, visibility='extern', entry=None): 
-        if entry and not entry.type.is_cfunction: 
-            error(pos, "'%s' already declared" % name) 
-            error(entry.pos, "Previous declaration is here") 
-        entry = self.declare_var(name, py_object_type, pos, visibility=visibility) 
-        entry.signature = pyfunction_signature 
-        self.pyfunc_entries.append(entry) 
-        return entry 
- 
-    def declare_pyfunction(self, name, pos, allow_redefine=False, visibility='extern'): 
-        # Add an entry for a Python function. 
-        entry = self.lookup_here(name) 
-        if not allow_redefine: 
-            return self._declare_pyfunction(name, pos, visibility=visibility, entry=entry) 
-        if entry: 
-            if entry.type.is_unspecified: 
-                entry.type = py_object_type 
-            elif entry.type is not py_object_type: 
-                return self._declare_pyfunction(name, pos, visibility=visibility, entry=entry) 
-        else: # declare entry stub 
-            self.declare_var(name, py_object_type, pos, visibility=visibility) 
-        entry = self.declare_var(None, py_object_type, pos, 
-                                 cname=name, visibility='private') 
-        entry.name = EncodedString(name) 
-        entry.qualified_name = self.qualify_name(name) 
-        entry.signature = pyfunction_signature 
-        entry.is_anonymous = True 
-        return entry 
- 
-    def declare_lambda_function(self, lambda_name, pos): 
-        # Add an entry for an anonymous Python function. 
-        func_cname = self.mangle(Naming.lambda_func_prefix + u'funcdef_', lambda_name) 
-        pymethdef_cname = self.mangle(Naming.lambda_func_prefix + u'methdef_', lambda_name) 
-        qualified_name = self.qualify_name(lambda_name) 
- 
-        entry = self.declare(None, func_cname, py_object_type, pos, 'private') 
-        entry.name = lambda_name 
-        entry.qualified_name = qualified_name 
-        entry.pymethdef_cname = pymethdef_cname 
-        entry.func_cname = func_cname 
-        entry.signature = pyfunction_signature 
-        entry.is_anonymous = True 
-        return entry 
- 
-    def add_lambda_def(self, def_node): 
-        self.lambda_defs.append(def_node) 
- 
-    def register_pyfunction(self, entry): 
-        self.pyfunc_entries.append(entry) 
- 
-    def declare_cfunction(self, name, type, pos, 
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = 0):
+        # Add an entry for a variable.
+        if not cname:
+            if visibility != 'private' or api:
+                cname = name
+            else:
+                cname = self.mangle(Naming.var_prefix, name)
+        if type.is_cpp_class and visibility != 'extern':
+            type.check_nullary_constructor(pos)
+        entry = self.declare(name, cname, type, pos, visibility)
+        entry.is_variable = 1
+        if in_pxd and visibility != 'extern':
+            entry.defined_in_pxd = 1
+            entry.used = 1
+        if api:
+            entry.api = 1
+            entry.used = 1
+        return entry
+
+    def declare_builtin(self, name, pos):
+        return self.outer_scope.declare_builtin(name, pos)
+
+    def _declare_pyfunction(self, name, pos, visibility='extern', entry=None):
+        if entry and not entry.type.is_cfunction:
+            error(pos, "'%s' already declared" % name)
+            error(entry.pos, "Previous declaration is here")
+        entry = self.declare_var(name, py_object_type, pos, visibility=visibility)
+        entry.signature = pyfunction_signature
+        self.pyfunc_entries.append(entry)
+        return entry
+
+    def declare_pyfunction(self, name, pos, allow_redefine=False, visibility='extern'):
+        # Add an entry for a Python function.
+        entry = self.lookup_here(name)
+        if not allow_redefine:
+            return self._declare_pyfunction(name, pos, visibility=visibility, entry=entry)
+        if entry:
+            if entry.type.is_unspecified:
+                entry.type = py_object_type
+            elif entry.type is not py_object_type:
+                return self._declare_pyfunction(name, pos, visibility=visibility, entry=entry)
+        else: # declare entry stub
+            self.declare_var(name, py_object_type, pos, visibility=visibility)
+        entry = self.declare_var(None, py_object_type, pos,
+                                 cname=name, visibility='private')
+        entry.name = EncodedString(name)
+        entry.qualified_name = self.qualify_name(name)
+        entry.signature = pyfunction_signature
+        entry.is_anonymous = True
+        return entry
+
+    def declare_lambda_function(self, lambda_name, pos):
+        # Add an entry for an anonymous Python function.
+        func_cname = self.mangle(Naming.lambda_func_prefix + u'funcdef_', lambda_name)
+        pymethdef_cname = self.mangle(Naming.lambda_func_prefix + u'methdef_', lambda_name)
+        qualified_name = self.qualify_name(lambda_name)
+
+        entry = self.declare(None, func_cname, py_object_type, pos, 'private')
+        entry.name = lambda_name
+        entry.qualified_name = qualified_name
+        entry.pymethdef_cname = pymethdef_cname
+        entry.func_cname = func_cname
+        entry.signature = pyfunction_signature
+        entry.is_anonymous = True
+        return entry
+
+    def add_lambda_def(self, def_node):
+        self.lambda_defs.append(def_node)
+
+    def register_pyfunction(self, entry):
+        self.pyfunc_entries.append(entry)
+
+    def declare_cfunction(self, name, type, pos,
                           cname=None, visibility='private', api=0, in_pxd=0,
                           defining=0, modifiers=(), utility_code=None, overridable=False):
-        # Add an entry for a C function. 
-        if not cname: 
-            if visibility != 'private' or api: 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.func_prefix, name) 
-        entry = self.lookup_here(name) 
-        if entry: 
+        # Add an entry for a C function.
+        if not cname:
+            if visibility != 'private' or api:
+                cname = name
+            else:
+                cname = self.mangle(Naming.func_prefix, name)
+        entry = self.lookup_here(name)
+        if entry:
             if not in_pxd and visibility != entry.visibility and visibility == 'extern':
                 # Previously declared, but now extern => treat this
                 # as implementing the function, using the new cname
@@ -768,7 +768,7 @@ class Scope(object):
                 visibility = entry.visibility
                 entry.cname = cname
                 entry.func_cname = cname
-            if visibility != 'private' and visibility != entry.visibility: 
+            if visibility != 'private' and visibility != entry.visibility:
                 warning(pos, "Function '%s' previously declared as '%s', now as '%s'" % (name, entry.visibility, visibility), 1)
             if overridable != entry.is_overridable:
                 warning(pos, "Function '%s' previously declared as '%s'" % (
@@ -777,48 +777,48 @@ class Scope(object):
                 # Fix with_gil vs nogil.
                 entry.type = entry.type.with_with_gil(type.with_gil)
             else:
-                if visibility == 'extern' and entry.visibility == 'extern': 
-                    can_override = False 
-                    if self.is_cpp(): 
-                        can_override = True 
-                    elif cname: 
-                        # if all alternatives have different cnames, 
-                        # it's safe to allow signature overrides 
-                        for alt_entry in entry.all_alternatives(): 
-                            if not alt_entry.cname or cname == alt_entry.cname: 
-                                break # cname not unique! 
-                        else: 
-                            can_override = True 
-                    if can_override: 
-                        temp = self.add_cfunction(name, type, pos, cname, visibility, modifiers) 
-                        temp.overloaded_alternatives = entry.all_alternatives() 
-                        entry = temp 
-                    else: 
-                        warning(pos, "Function signature does not match previous declaration", 1) 
-                        entry.type = type 
+                if visibility == 'extern' and entry.visibility == 'extern':
+                    can_override = False
+                    if self.is_cpp():
+                        can_override = True
+                    elif cname:
+                        # if all alternatives have different cnames,
+                        # it's safe to allow signature overrides
+                        for alt_entry in entry.all_alternatives():
+                            if not alt_entry.cname or cname == alt_entry.cname:
+                                break # cname not unique!
+                        else:
+                            can_override = True
+                    if can_override:
+                        temp = self.add_cfunction(name, type, pos, cname, visibility, modifiers)
+                        temp.overloaded_alternatives = entry.all_alternatives()
+                        entry = temp
+                    else:
+                        warning(pos, "Function signature does not match previous declaration", 1)
+                        entry.type = type
                 elif not in_pxd and entry.defined_in_pxd and type.compatible_signature_with(entry.type):
                     # TODO: check that this was done by a signature optimisation and not a user error.
                     #warning(pos, "Function signature does not match previous declaration", 1)
                     entry.type = type
-                else: 
-                    error(pos, "Function signature does not match previous declaration") 
-        else: 
-            entry = self.add_cfunction(name, type, pos, cname, visibility, modifiers) 
-            entry.func_cname = cname 
+                else:
+                    error(pos, "Function signature does not match previous declaration")
+        else:
+            entry = self.add_cfunction(name, type, pos, cname, visibility, modifiers)
+            entry.func_cname = cname
             entry.is_overridable = overridable
-        if in_pxd and visibility != 'extern': 
-            entry.defined_in_pxd = 1 
-        if api: 
-            entry.api = 1 
-        if not defining and not in_pxd and visibility != 'extern': 
-            error(pos, "Non-extern C function '%s' declared but not defined" % name) 
-        if defining: 
-            entry.is_implemented = True 
-        if modifiers: 
-            entry.func_modifiers = modifiers 
-        if utility_code: 
-            assert not entry.utility_code, "duplicate utility code definition in entry %s (%s)" % (name, cname) 
-            entry.utility_code = utility_code 
+        if in_pxd and visibility != 'extern':
+            entry.defined_in_pxd = 1
+        if api:
+            entry.api = 1
+        if not defining and not in_pxd and visibility != 'extern':
+            error(pos, "Non-extern C function '%s' declared but not defined" % name)
+        if defining:
+            entry.is_implemented = True
+        if modifiers:
+            entry.func_modifiers = modifiers
+        if utility_code:
+            assert not entry.utility_code, "duplicate utility code definition in entry %s (%s)" % (name, cname)
+            entry.utility_code = utility_code
         if overridable:
             # names of cpdef functions can be used as variables and can be assigned to
             var_entry = Entry(name, cname, py_object_type)   # FIXME: cname?
@@ -827,15 +827,15 @@ class Scope(object):
             var_entry.is_pyglobal = 1
             var_entry.scope = entry.scope
             entry.as_variable = var_entry
-        type.entry = entry 
-        return entry 
- 
+        type.entry = entry
+        return entry
+
     def add_cfunction(self, name, type, pos, cname, visibility, modifiers, inherited=False):
-        # Add a C function entry without giving it a func_cname. 
-        entry = self.declare(name, cname, type, pos, visibility) 
-        entry.is_cfunction = 1 
-        if modifiers: 
-            entry.func_modifiers = modifiers 
+        # Add a C function entry without giving it a func_cname.
+        entry = self.declare(name, cname, type, pos, visibility)
+        entry.is_cfunction = 1
+        if modifiers:
+            entry.func_modifiers = modifiers
         if inherited or type.is_fused:
             self.cfunc_entries.append(entry)
         else:
@@ -847,68 +847,68 @@ class Scope(object):
                     break
                 i -= 1
             self.cfunc_entries.insert(i, entry)
-        return entry 
- 
-    def find(self, name, pos): 
-        # Look up name, report error if not found. 
-        entry = self.lookup(name) 
-        if entry: 
-            return entry 
-        else: 
-            error(pos, "'%s' is not declared" % name) 
- 
-    def find_imported_module(self, path, pos): 
-        # Look up qualified name, must be a module, report error if not found. 
-        # Path is a list of names. 
-        scope = self 
-        for name in path: 
-            entry = scope.find(name, pos) 
-            if not entry: 
-                return None 
-            if entry.as_module: 
-                scope = entry.as_module 
-            else: 
-                error(pos, "'%s' is not a cimported module" % '.'.join(path)) 
-                return None 
-        return scope 
- 
-    def lookup(self, name): 
-        # Look up name in this scope or an enclosing one. 
-        # Return None if not found. 
-        return (self.lookup_here(name) 
-            or (self.outer_scope and self.outer_scope.lookup(name)) 
-            or None) 
- 
-    def lookup_here(self, name): 
-        # Look up in this scope only, return None if not found. 
-        return self.entries.get(name, None) 
- 
-    def lookup_target(self, name): 
-        # Look up name in this scope only. Declare as Python 
-        # variable if not found. 
-        entry = self.lookup_here(name) 
-        if not entry: 
-            entry = self.declare_var(name, py_object_type, None) 
-        return entry 
- 
-    def lookup_type(self, name): 
-        entry = self.lookup(name) 
-        if entry and entry.is_type: 
-            if entry.type.is_fused and self.fused_to_specific: 
-                return entry.type.specialize(self.fused_to_specific) 
-            return entry.type 
- 
-    def lookup_operator(self, operator, operands): 
-        if operands[0].type.is_cpp_class: 
-            obj_type = operands[0].type 
-            method = obj_type.scope.lookup("operator%s" % operator) 
-            if method is not None: 
+        return entry
+
+    def find(self, name, pos):
+        # Look up name, report error if not found.
+        entry = self.lookup(name)
+        if entry:
+            return entry
+        else:
+            error(pos, "'%s' is not declared" % name)
+
+    def find_imported_module(self, path, pos):
+        # Look up qualified name, must be a module, report error if not found.
+        # Path is a list of names.
+        scope = self
+        for name in path:
+            entry = scope.find(name, pos)
+            if not entry:
+                return None
+            if entry.as_module:
+                scope = entry.as_module
+            else:
+                error(pos, "'%s' is not a cimported module" % '.'.join(path))
+                return None
+        return scope
+
+    def lookup(self, name):
+        # Look up name in this scope or an enclosing one.
+        # Return None if not found.
+        return (self.lookup_here(name)
+            or (self.outer_scope and self.outer_scope.lookup(name))
+            or None)
+
+    def lookup_here(self, name):
+        # Look up in this scope only, return None if not found.
+        return self.entries.get(name, None)
+
+    def lookup_target(self, name):
+        # Look up name in this scope only. Declare as Python
+        # variable if not found.
+        entry = self.lookup_here(name)
+        if not entry:
+            entry = self.declare_var(name, py_object_type, None)
+        return entry
+
+    def lookup_type(self, name):
+        entry = self.lookup(name)
+        if entry and entry.is_type:
+            if entry.type.is_fused and self.fused_to_specific:
+                return entry.type.specialize(self.fused_to_specific)
+            return entry.type
+
+    def lookup_operator(self, operator, operands):
+        if operands[0].type.is_cpp_class:
+            obj_type = operands[0].type
+            method = obj_type.scope.lookup("operator%s" % operator)
+            if method is not None:
                 arg_types = [arg.type for arg in operands[1:]]
                 res = PyrexTypes.best_match([arg.type for arg in operands[1:]],
                                             method.all_alternatives())
-                if res is not None: 
-                    return res 
-        function = self.lookup("operator%s" % operator) 
+                if res is not None:
+                    return res
+        function = self.lookup("operator%s" % operator)
         function_alternatives = []
         if function is not None:
             function_alternatives = function.all_alternatives()
@@ -924,34 +924,34 @@ class Scope(object):
                         method_alternatives += method.all_alternatives()
 
         if (not method_alternatives) and (not function_alternatives):
-            return None 
+            return None
 
         # select the unique alternatives
         all_alternatives = list(set(method_alternatives + function_alternatives))
 
         return PyrexTypes.best_match([arg.type for arg in operands],
                                      all_alternatives)
- 
-    def lookup_operator_for_types(self, pos, operator, types): 
-        from .Nodes import Node 
-        class FakeOperand(Node): 
-            pass 
-        operands = [FakeOperand(pos, type=type) for type in types] 
-        return self.lookup_operator(operator, operands) 
- 
-    def use_utility_code(self, new_code): 
-        self.global_scope().use_utility_code(new_code) 
- 
+
+    def lookup_operator_for_types(self, pos, operator, types):
+        from .Nodes import Node
+        class FakeOperand(Node):
+            pass
+        operands = [FakeOperand(pos, type=type) for type in types]
+        return self.lookup_operator(operator, operands)
+
+    def use_utility_code(self, new_code):
+        self.global_scope().use_utility_code(new_code)
+
     def use_entry_utility_code(self, entry):
         self.global_scope().use_entry_utility_code(entry)
 
-    def defines_any(self, names): 
+    def defines_any(self, names):
         # Test whether any of the given names are defined in this scope.
-        for name in names: 
-            if name in self.entries: 
-                return 1 
-        return 0 
- 
+        for name in names:
+            if name in self.entries:
+                return 1
+        return 0
+
     def defines_any_special(self, names):
         # Test whether any of the given names are defined as special methods in this scope.
         for name in names:
@@ -959,240 +959,240 @@ class Scope(object):
                 return 1
         return 0
 
-    def infer_types(self): 
-        from .TypeInference import get_type_inferer 
-        get_type_inferer().infer_types(self) 
- 
-    def is_cpp(self): 
-        outer = self.outer_scope 
-        if outer is None: 
-            return False 
-        else: 
-            return outer.is_cpp() 
- 
+    def infer_types(self):
+        from .TypeInference import get_type_inferer
+        get_type_inferer().infer_types(self)
+
+    def is_cpp(self):
+        outer = self.outer_scope
+        if outer is None:
+            return False
+        else:
+            return outer.is_cpp()
+
     def add_include_file(self, filename, verbatim_include=None, late=False):
         self.outer_scope.add_include_file(filename, verbatim_include, late)
- 
- 
-class PreImportScope(Scope): 
- 
-    namespace_cname = Naming.preimport_cname 
- 
-    def __init__(self): 
-        Scope.__init__(self, Options.pre_import, None, None) 
- 
-    def declare_builtin(self, name, pos): 
-        entry = self.declare(name, name, py_object_type, pos, 'private') 
-        entry.is_variable = True 
-        entry.is_pyglobal = True 
-        return entry 
- 
- 
-class BuiltinScope(Scope): 
-    #  The builtin namespace. 
- 
-    is_builtin_scope = True 
- 
-    def __init__(self): 
-        if Options.pre_import is None: 
-            Scope.__init__(self, "__builtin__", None, None) 
-        else: 
-            Scope.__init__(self, "__builtin__", PreImportScope(), None) 
-        self.type_names = {} 
- 
+
+
+class PreImportScope(Scope):
+
+    namespace_cname = Naming.preimport_cname
+
+    def __init__(self):
+        Scope.__init__(self, Options.pre_import, None, None)
+
+    def declare_builtin(self, name, pos):
+        entry = self.declare(name, name, py_object_type, pos, 'private')
+        entry.is_variable = True
+        entry.is_pyglobal = True
+        return entry
+
+
+class BuiltinScope(Scope):
+    #  The builtin namespace.
+
+    is_builtin_scope = True
+
+    def __init__(self):
+        if Options.pre_import is None:
+            Scope.__init__(self, "__builtin__", None, None)
+        else:
+            Scope.__init__(self, "__builtin__", PreImportScope(), None)
+        self.type_names = {}
+
         for name, definition in sorted(self.builtin_entries.items()):
-            cname, type = definition 
-            self.declare_var(name, type, None, cname) 
- 
+            cname, type = definition
+            self.declare_var(name, type, None, cname)
+
     def lookup(self, name, language_level=None, str_is_str=None):
         # 'language_level' and 'str_is_str' are passed by ModuleScope
         if name == 'str':
             if str_is_str is None:
                 str_is_str = language_level in (None, 2)
             if not str_is_str:
-                name = 'unicode' 
-        return Scope.lookup(self, name) 
- 
-    def declare_builtin(self, name, pos): 
-        if not hasattr(builtins, name): 
-            if self.outer_scope is not None: 
-                return self.outer_scope.declare_builtin(name, pos) 
-            else: 
-                if Options.error_on_unknown_names: 
-                    error(pos, "undeclared name not builtin: %s" % name) 
-                else: 
-                    warning(pos, "undeclared name not builtin: %s" % name, 2) 
- 
+                name = 'unicode'
+        return Scope.lookup(self, name)
+
+    def declare_builtin(self, name, pos):
+        if not hasattr(builtins, name):
+            if self.outer_scope is not None:
+                return self.outer_scope.declare_builtin(name, pos)
+            else:
+                if Options.error_on_unknown_names:
+                    error(pos, "undeclared name not builtin: %s" % name)
+                else:
+                    warning(pos, "undeclared name not builtin: %s" % name, 2)
+
     def declare_builtin_cfunction(self, name, type, cname, python_equiv=None, utility_code=None):
-        # If python_equiv == "*", the Python equivalent has the same name 
-        # as the entry, otherwise it has the name specified by python_equiv. 
-        name = EncodedString(name) 
-        entry = self.declare_cfunction(name, type, None, cname, visibility='extern', 
+        # If python_equiv == "*", the Python equivalent has the same name
+        # as the entry, otherwise it has the name specified by python_equiv.
+        name = EncodedString(name)
+        entry = self.declare_cfunction(name, type, None, cname, visibility='extern',
                                        utility_code=utility_code)
-        if python_equiv: 
-            if python_equiv == "*": 
-                python_equiv = name 
-            else: 
-                python_equiv = EncodedString(python_equiv) 
-            var_entry = Entry(python_equiv, python_equiv, py_object_type) 
+        if python_equiv:
+            if python_equiv == "*":
+                python_equiv = name
+            else:
+                python_equiv = EncodedString(python_equiv)
+            var_entry = Entry(python_equiv, python_equiv, py_object_type)
             var_entry.qualified_name = self.qualify_name(name)
-            var_entry.is_variable = 1 
-            var_entry.is_builtin = 1 
-            var_entry.utility_code = utility_code 
-            var_entry.scope = entry.scope 
-            entry.as_variable = var_entry 
-        return entry 
- 
-    def declare_builtin_type(self, name, cname, utility_code = None, objstruct_cname = None): 
-        name = EncodedString(name) 
-        type = PyrexTypes.BuiltinObjectType(name, cname, objstruct_cname) 
-        scope = CClassScope(name, outer_scope=None, visibility='extern') 
-        scope.directives = {} 
-        if name == 'bool': 
-            type.is_final_type = True 
-        type.set_scope(scope) 
-        self.type_names[name] = 1 
-        entry = self.declare_type(name, type, None, visibility='extern') 
-        entry.utility_code = utility_code 
- 
-        var_entry = Entry(name = entry.name, 
-            type = self.lookup('type').type, # make sure "type" is the first type declared... 
-            pos = entry.pos, 
+            var_entry.is_variable = 1
+            var_entry.is_builtin = 1
+            var_entry.utility_code = utility_code
+            var_entry.scope = entry.scope
+            entry.as_variable = var_entry
+        return entry
+
+    def declare_builtin_type(self, name, cname, utility_code = None, objstruct_cname = None):
+        name = EncodedString(name)
+        type = PyrexTypes.BuiltinObjectType(name, cname, objstruct_cname)
+        scope = CClassScope(name, outer_scope=None, visibility='extern')
+        scope.directives = {}
+        if name == 'bool':
+            type.is_final_type = True
+        type.set_scope(scope)
+        self.type_names[name] = 1
+        entry = self.declare_type(name, type, None, visibility='extern')
+        entry.utility_code = utility_code
+
+        var_entry = Entry(name = entry.name,
+            type = self.lookup('type').type, # make sure "type" is the first type declared...
+            pos = entry.pos,
             cname = entry.type.typeptr_cname)
         var_entry.qualified_name = self.qualify_name(name)
-        var_entry.is_variable = 1 
-        var_entry.is_cglobal = 1 
-        var_entry.is_readonly = 1 
-        var_entry.is_builtin = 1 
-        var_entry.utility_code = utility_code 
+        var_entry.is_variable = 1
+        var_entry.is_cglobal = 1
+        var_entry.is_readonly = 1
+        var_entry.is_builtin = 1
+        var_entry.utility_code = utility_code
         var_entry.scope = self
-        if Options.cache_builtins: 
-            var_entry.is_const = True 
-        entry.as_variable = var_entry 
- 
-        return type 
- 
-    def builtin_scope(self): 
-        return self 
- 
-    builtin_entries = { 
- 
-        "type":   ["((PyObject*)&PyType_Type)", py_object_type], 
- 
-        "bool":   ["((PyObject*)&PyBool_Type)", py_object_type], 
-        "int":    ["((PyObject*)&PyInt_Type)", py_object_type], 
-        "long":   ["((PyObject*)&PyLong_Type)", py_object_type], 
-        "float":  ["((PyObject*)&PyFloat_Type)", py_object_type], 
-        "complex":["((PyObject*)&PyComplex_Type)", py_object_type], 
- 
-        "bytes":  ["((PyObject*)&PyBytes_Type)", py_object_type], 
-        "bytearray":   ["((PyObject*)&PyByteArray_Type)", py_object_type], 
-        "str":    ["((PyObject*)&PyString_Type)", py_object_type], 
-        "unicode":["((PyObject*)&PyUnicode_Type)", py_object_type], 
- 
-        "tuple":  ["((PyObject*)&PyTuple_Type)", py_object_type], 
-        "list":   ["((PyObject*)&PyList_Type)", py_object_type], 
-        "dict":   ["((PyObject*)&PyDict_Type)", py_object_type], 
-        "set":    ["((PyObject*)&PySet_Type)", py_object_type], 
-        "frozenset":   ["((PyObject*)&PyFrozenSet_Type)", py_object_type], 
- 
-        "slice":  ["((PyObject*)&PySlice_Type)", py_object_type], 
-#        "file":   ["((PyObject*)&PyFile_Type)", py_object_type],  # not in Py3 
- 
-        "None":   ["Py_None", py_object_type], 
-        "False":  ["Py_False", py_object_type], 
-        "True":   ["Py_True", py_object_type], 
-    } 
- 
-const_counter = 1 # As a temporary solution for compiling code in pxds 
- 
-class ModuleScope(Scope): 
-    # module_name          string             Python name of the module 
-    # module_cname         string             C name of Python module object 
-    # #module_dict_cname   string             C name of module dict object 
-    # method_table_cname   string             C name of method table 
-    # doc                  string             Module doc string 
-    # doc_cname            string             C name of module doc string 
-    # utility_code_list    [UtilityCode]      Queuing utility codes for forwarding to Code.py 
+        if Options.cache_builtins:
+            var_entry.is_const = True
+        entry.as_variable = var_entry
+
+        return type
+
+    def builtin_scope(self):
+        return self
+
+    builtin_entries = {
+
+        "type":   ["((PyObject*)&PyType_Type)", py_object_type],
+
+        "bool":   ["((PyObject*)&PyBool_Type)", py_object_type],
+        "int":    ["((PyObject*)&PyInt_Type)", py_object_type],
+        "long":   ["((PyObject*)&PyLong_Type)", py_object_type],
+        "float":  ["((PyObject*)&PyFloat_Type)", py_object_type],
+        "complex":["((PyObject*)&PyComplex_Type)", py_object_type],
+
+        "bytes":  ["((PyObject*)&PyBytes_Type)", py_object_type],
+        "bytearray":   ["((PyObject*)&PyByteArray_Type)", py_object_type],
+        "str":    ["((PyObject*)&PyString_Type)", py_object_type],
+        "unicode":["((PyObject*)&PyUnicode_Type)", py_object_type],
+
+        "tuple":  ["((PyObject*)&PyTuple_Type)", py_object_type],
+        "list":   ["((PyObject*)&PyList_Type)", py_object_type],
+        "dict":   ["((PyObject*)&PyDict_Type)", py_object_type],
+        "set":    ["((PyObject*)&PySet_Type)", py_object_type],
+        "frozenset":   ["((PyObject*)&PyFrozenSet_Type)", py_object_type],
+
+        "slice":  ["((PyObject*)&PySlice_Type)", py_object_type],
+#        "file":   ["((PyObject*)&PyFile_Type)", py_object_type],  # not in Py3
+
+        "None":   ["Py_None", py_object_type],
+        "False":  ["Py_False", py_object_type],
+        "True":   ["Py_True", py_object_type],
+    }
+
+const_counter = 1 # As a temporary solution for compiling code in pxds
+
+class ModuleScope(Scope):
+    # module_name          string             Python name of the module
+    # module_cname         string             C name of Python module object
+    # #module_dict_cname   string             C name of module dict object
+    # method_table_cname   string             C name of method table
+    # doc                  string             Module doc string
+    # doc_cname            string             C name of module doc string
+    # utility_code_list    [UtilityCode]      Queuing utility codes for forwarding to Code.py
     # c_includes           {key: IncludeCode} C headers or verbatim code to be generated
     #                                         See process_include() for more documentation
-    # string_to_entry      {string : Entry}   Map string const to entry 
-    # identifier_to_entry  {string : Entry}   Map identifier string const to entry 
-    # context              Context 
-    # parent_module        Scope              Parent in the import namespace 
-    # module_entries       {string : Entry}   For cimport statements 
-    # type_names           {string : 1}       Set of type names (used during parsing) 
-    # included_files       [string]           Cython sources included with 'include' 
-    # pxd_file_loaded      boolean            Corresponding .pxd file has been processed 
-    # cimported_modules    [ModuleScope]      Modules imported with cimport 
-    # types_imported       {PyrexType}        Set of types for which import code generated 
-    # has_import_star      boolean            Module contains import * 
-    # cpp                  boolean            Compiling a C++ file 
-    # is_cython_builtin    boolean            Is this the Cython builtin scope (or a child scope) 
-    # is_package           boolean            Is this a package module? (__init__) 
- 
-    is_module_scope = 1 
-    has_import_star = 0 
-    is_cython_builtin = 0 
+    # string_to_entry      {string : Entry}   Map string const to entry
+    # identifier_to_entry  {string : Entry}   Map identifier string const to entry
+    # context              Context
+    # parent_module        Scope              Parent in the import namespace
+    # module_entries       {string : Entry}   For cimport statements
+    # type_names           {string : 1}       Set of type names (used during parsing)
+    # included_files       [string]           Cython sources included with 'include'
+    # pxd_file_loaded      boolean            Corresponding .pxd file has been processed
+    # cimported_modules    [ModuleScope]      Modules imported with cimport
+    # types_imported       {PyrexType}        Set of types for which import code generated
+    # has_import_star      boolean            Module contains import *
+    # cpp                  boolean            Compiling a C++ file
+    # is_cython_builtin    boolean            Is this the Cython builtin scope (or a child scope)
+    # is_package           boolean            Is this a package module? (__init__)
+
+    is_module_scope = 1
+    has_import_star = 0
+    is_cython_builtin = 0
     old_style_globals = 0
- 
-    def __init__(self, name, parent_module, context): 
-        from . import Builtin 
-        self.parent_module = parent_module 
-        outer_scope = Builtin.builtin_scope 
-        Scope.__init__(self, name, outer_scope, parent_module) 
-        if name == "__init__": 
-            # Treat Spam/__init__.pyx specially, so that when Python loads 
-            # Spam/__init__.so, initSpam() is defined. 
-            self.module_name = parent_module.module_name 
-            self.is_package = True 
-        else: 
-            self.module_name = name 
-            self.is_package = False 
-        self.module_name = EncodedString(self.module_name) 
-        self.context = context 
-        self.module_cname = Naming.module_cname 
-        self.module_dict_cname = Naming.moddict_cname 
-        self.method_table_cname = Naming.methtable_cname 
-        self.doc = "" 
-        self.doc_cname = Naming.moddoc_cname 
-        self.utility_code_list = [] 
-        self.module_entries = {} 
+
+    def __init__(self, name, parent_module, context):
+        from . import Builtin
+        self.parent_module = parent_module
+        outer_scope = Builtin.builtin_scope
+        Scope.__init__(self, name, outer_scope, parent_module)
+        if name == "__init__":
+            # Treat Spam/__init__.pyx specially, so that when Python loads
+            # Spam/__init__.so, initSpam() is defined.
+            self.module_name = parent_module.module_name
+            self.is_package = True
+        else:
+            self.module_name = name
+            self.is_package = False
+        self.module_name = EncodedString(self.module_name)
+        self.context = context
+        self.module_cname = Naming.module_cname
+        self.module_dict_cname = Naming.moddict_cname
+        self.method_table_cname = Naming.methtable_cname
+        self.doc = ""
+        self.doc_cname = Naming.moddoc_cname
+        self.utility_code_list = []
+        self.module_entries = {}
         self.c_includes = {}
-        self.type_names = dict(outer_scope.type_names) 
-        self.pxd_file_loaded = 0 
-        self.cimported_modules = [] 
-        self.types_imported = set() 
-        self.included_files = [] 
-        self.has_extern_class = 0 
-        self.cached_builtins = [] 
-        self.undeclared_cached_builtins = [] 
-        self.namespace_cname = self.module_cname 
+        self.type_names = dict(outer_scope.type_names)
+        self.pxd_file_loaded = 0
+        self.cimported_modules = []
+        self.types_imported = set()
+        self.included_files = []
+        self.has_extern_class = 0
+        self.cached_builtins = []
+        self.undeclared_cached_builtins = []
+        self.namespace_cname = self.module_cname
         self._cached_tuple_types = {}
         for var_name in ['__builtins__', '__name__', '__file__', '__doc__', '__path__',
                          '__spec__', '__loader__', '__package__', '__cached__']:
-            self.declare_var(EncodedString(var_name), py_object_type, None) 
+            self.declare_var(EncodedString(var_name), py_object_type, None)
         self.process_include(Code.IncludeCode("Python.h", initial=True))
- 
-    def qualifying_scope(self): 
-        return self.parent_module 
- 
-    def global_scope(self): 
-        return self 
- 
+
+    def qualifying_scope(self):
+        return self.parent_module
+
+    def global_scope(self):
+        return self
+
     def lookup(self, name, language_level=None, str_is_str=None):
-        entry = self.lookup_here(name) 
-        if entry is not None: 
-            return entry 
- 
+        entry = self.lookup_here(name)
+        if entry is not None:
+            return entry
+
         if language_level is None:
             language_level = self.context.language_level if self.context is not None else 3
         if str_is_str is None:
             str_is_str = language_level == 2 or (
                 self.context is not None and Future.unicode_literals not in self.context.future_directives)
- 
+
         return self.outer_scope.lookup(name, language_level=language_level, str_is_str=str_is_str)
- 
+
     def declare_tuple_type(self, pos, components):
         components = tuple(components)
         try:
@@ -1213,48 +1213,48 @@ class ModuleScope(Scope):
         ttype.entry = entry
         return entry
 
-    def declare_builtin(self, name, pos): 
-        if not hasattr(builtins, name) \ 
-               and name not in Code.non_portable_builtins_map \ 
-               and name not in Code.uncachable_builtins: 
-            if self.has_import_star: 
-                entry = self.declare_var(name, py_object_type, pos) 
-                return entry 
-            else: 
-                if Options.error_on_unknown_names: 
-                    error(pos, "undeclared name not builtin: %s" % name) 
-                else: 
-                    warning(pos, "undeclared name not builtin: %s" % name, 2) 
-                # unknown - assume it's builtin and look it up at runtime 
-                entry = self.declare(name, None, py_object_type, pos, 'private') 
-                entry.is_builtin = 1 
-                return entry 
-        if Options.cache_builtins: 
-            for entry in self.cached_builtins: 
-                if entry.name == name: 
-                    return entry 
+    def declare_builtin(self, name, pos):
+        if not hasattr(builtins, name) \
+               and name not in Code.non_portable_builtins_map \
+               and name not in Code.uncachable_builtins:
+            if self.has_import_star:
+                entry = self.declare_var(name, py_object_type, pos)
+                return entry
+            else:
+                if Options.error_on_unknown_names:
+                    error(pos, "undeclared name not builtin: %s" % name)
+                else:
+                    warning(pos, "undeclared name not builtin: %s" % name, 2)
+                # unknown - assume it's builtin and look it up at runtime
+                entry = self.declare(name, None, py_object_type, pos, 'private')
+                entry.is_builtin = 1
+                return entry
+        if Options.cache_builtins:
+            for entry in self.cached_builtins:
+                if entry.name == name:
+                    return entry
         if name == 'globals' and not self.old_style_globals:
             return self.outer_scope.lookup('__Pyx_Globals')
         else:
             entry = self.declare(None, None, py_object_type, pos, 'private')
-        if Options.cache_builtins and name not in Code.uncachable_builtins: 
-            entry.is_builtin = 1 
-            entry.is_const = 1 # cached 
-            entry.name = name 
-            entry.cname = Naming.builtin_prefix + name 
-            self.cached_builtins.append(entry) 
-            self.undeclared_cached_builtins.append(entry) 
-        else: 
-            entry.is_builtin = 1 
-            entry.name = name 
+        if Options.cache_builtins and name not in Code.uncachable_builtins:
+            entry.is_builtin = 1
+            entry.is_const = 1 # cached
+            entry.name = name
+            entry.cname = Naming.builtin_prefix + name
+            self.cached_builtins.append(entry)
+            self.undeclared_cached_builtins.append(entry)
+        else:
+            entry.is_builtin = 1
+            entry.name = name
         entry.qualified_name = self.builtin_scope().qualify_name(name)
-        return entry 
- 
-    def find_module(self, module_name, pos, relative_level=-1): 
-        # Find a module in the import namespace, interpreting 
-        # relative imports relative to this module's parent. 
-        # Finds and parses the module's .pxd file if the module 
-        # has not been referenced before. 
+        return entry
+
+    def find_module(self, module_name, pos, relative_level=-1):
+        # Find a module in the import namespace, interpreting
+        # relative imports relative to this module's parent.
+        # Finds and parses the module's .pxd file if the module
+        # has not been referenced before.
         relative_to = None
         absolute_fallback = False
         if relative_level is not None and relative_level > 0:
@@ -1269,27 +1269,27 @@ class ModuleScope(Scope):
             relative_to = self.parent_module
             absolute_fallback = True
 
-        module_scope = self.global_scope() 
-        return module_scope.context.find_module( 
+        module_scope = self.global_scope()
+        return module_scope.context.find_module(
             module_name, relative_to=relative_to, pos=pos, absolute_fallback=absolute_fallback)
- 
-    def find_submodule(self, name): 
-        # Find and return scope for a submodule of this module, 
-        # creating a new empty one if necessary. Doesn't parse .pxd. 
+
+    def find_submodule(self, name):
+        # Find and return scope for a submodule of this module,
+        # creating a new empty one if necessary. Doesn't parse .pxd.
         if '.' in name:
             name, submodule = name.split('.', 1)
         else:
             submodule = None
-        scope = self.lookup_submodule(name) 
-        if not scope: 
+        scope = self.lookup_submodule(name)
+        if not scope:
             scope = ModuleScope(name, parent_module=self, context=self.context)
-            self.module_entries[name] = scope 
+            self.module_entries[name] = scope
         if submodule:
             scope = scope.find_submodule(submodule)
-        return scope 
- 
-    def lookup_submodule(self, name): 
-        # Return scope for submodule of this module, or None. 
+        return scope
+
+    def lookup_submodule(self, name):
+        # Return scope for submodule of this module, or None.
         if '.' in name:
             name, submodule = name.split('.', 1)
         else:
@@ -1298,7 +1298,7 @@ class ModuleScope(Scope):
         if submodule and module is not None:
             module = module.lookup_submodule(submodule)
         return module
- 
+
     def add_include_file(self, filename, verbatim_include=None, late=False):
         """
         Add `filename` as include file. Add `verbatim_include` as
@@ -1307,7 +1307,7 @@ class ModuleScope(Scope):
         """
         inc = Code.IncludeCode(filename, verbatim_include, late=late)
         self.process_include(inc)
- 
+
     def process_include(self, inc):
         """
         Add `inc`, which is an instance of `IncludeCode`, to this
@@ -1339,142 +1339,142 @@ class ModuleScope(Scope):
         inc.dict_update(self.c_includes, key)
         inc = self.c_includes[key]
 
-    def add_imported_module(self, scope): 
-        if scope not in self.cimported_modules: 
+    def add_imported_module(self, scope):
+        if scope not in self.cimported_modules:
             for inc in scope.c_includes.values():
                 self.process_include(inc)
-            self.cimported_modules.append(scope) 
-            for m in scope.cimported_modules: 
-                self.add_imported_module(m) 
- 
-    def add_imported_entry(self, name, entry, pos): 
+            self.cimported_modules.append(scope)
+            for m in scope.cimported_modules:
+                self.add_imported_module(m)
+
+    def add_imported_entry(self, name, entry, pos):
         if entry.is_pyglobal:
             # Allow cimports to follow imports.
             entry.is_variable = True
-        if entry not in self.entries: 
-            self.entries[name] = entry 
-        else: 
-            warning(pos, "'%s' redeclared  " % name, 0) 
- 
-    def declare_module(self, name, scope, pos): 
-        # Declare a cimported module. This is represented as a 
-        # Python module-level variable entry with a module 
-        # scope attached to it. Reports an error and returns 
-        # None if previously declared as something else. 
-        entry = self.lookup_here(name) 
-        if entry: 
-            if entry.is_pyglobal and entry.as_module is scope: 
-                return entry # Already declared as the same module 
-            if not (entry.is_pyglobal and not entry.as_module): 
-                # SAGE -- I put this here so Pyrex 
-                # cimport's work across directories. 
-                # Currently it tries to multiply define 
-                # every module appearing in an import list. 
-                # It shouldn't be an error for a module 
-                # name to appear again, and indeed the generated 
-                # code compiles fine. 
-                return entry 
-        else: 
-            entry = self.declare_var(name, py_object_type, pos) 
+        if entry not in self.entries:
+            self.entries[name] = entry
+        else:
+            warning(pos, "'%s' redeclared  " % name, 0)
+
+    def declare_module(self, name, scope, pos):
+        # Declare a cimported module. This is represented as a
+        # Python module-level variable entry with a module
+        # scope attached to it. Reports an error and returns
+        # None if previously declared as something else.
+        entry = self.lookup_here(name)
+        if entry:
+            if entry.is_pyglobal and entry.as_module is scope:
+                return entry # Already declared as the same module
+            if not (entry.is_pyglobal and not entry.as_module):
+                # SAGE -- I put this here so Pyrex
+                # cimport's work across directories.
+                # Currently it tries to multiply define
+                # every module appearing in an import list.
+                # It shouldn't be an error for a module
+                # name to appear again, and indeed the generated
+                # code compiles fine.
+                return entry
+        else:
+            entry = self.declare_var(name, py_object_type, pos)
             entry.is_variable = 0
-        entry.as_module = scope 
-        self.add_imported_module(scope) 
-        return entry 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = 0): 
-        # Add an entry for a global variable. If it is a Python 
-        # object type, and not declared with cdef, it will live 
-        # in the module dictionary, otherwise it will be a C 
-        # global variable. 
-        if not visibility in ('private', 'public', 'extern'): 
-            error(pos, "Module-level variable cannot be declared %s" % visibility) 
-        if not is_cdef: 
-            if type is unspecified_type: 
-                type = py_object_type 
-            if not (type.is_pyobject and not type.is_extension_type): 
-                raise InternalError( 
-                    "Non-cdef global variable is not a generic Python object") 
- 
-        if not cname: 
-            defining = not in_pxd 
-            if visibility == 'extern' or (visibility == 'public' and defining): 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.var_prefix, name) 
- 
-        entry = self.lookup_here(name) 
-        if entry and entry.defined_in_pxd: 
-            #if visibility != 'private' and visibility != entry.visibility: 
-            #    warning(pos, "Variable '%s' previously declared as '%s'" % (name, entry.visibility), 1) 
-            if not entry.type.same_as(type): 
-                if visibility == 'extern' and entry.visibility == 'extern': 
-                    warning(pos, "Variable '%s' type does not match previous declaration" % name, 1) 
-                    entry.type = type 
-                #else: 
-                #    error(pos, "Variable '%s' type does not match previous declaration" % name) 
-            if entry.visibility != "private": 
-                mangled_cname = self.mangle(Naming.var_prefix, name) 
-                if entry.cname == mangled_cname: 
-                    cname = name 
-                    entry.cname = name 
-            if not entry.is_implemented: 
-                entry.is_implemented = True 
-                return entry 
- 
-        entry = Scope.declare_var(self, name, type, pos, 
-                                  cname=cname, visibility=visibility, 
-                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef) 
-        if is_cdef: 
-            entry.is_cglobal = 1 
+        entry.as_module = scope
+        self.add_imported_module(scope)
+        return entry
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = 0):
+        # Add an entry for a global variable. If it is a Python
+        # object type, and not declared with cdef, it will live
+        # in the module dictionary, otherwise it will be a C
+        # global variable.
+        if not visibility in ('private', 'public', 'extern'):
+            error(pos, "Module-level variable cannot be declared %s" % visibility)
+        if not is_cdef:
+            if type is unspecified_type:
+                type = py_object_type
+            if not (type.is_pyobject and not type.is_extension_type):
+                raise InternalError(
+                    "Non-cdef global variable is not a generic Python object")
+
+        if not cname:
+            defining = not in_pxd
+            if visibility == 'extern' or (visibility == 'public' and defining):
+                cname = name
+            else:
+                cname = self.mangle(Naming.var_prefix, name)
+
+        entry = self.lookup_here(name)
+        if entry and entry.defined_in_pxd:
+            #if visibility != 'private' and visibility != entry.visibility:
+            #    warning(pos, "Variable '%s' previously declared as '%s'" % (name, entry.visibility), 1)
+            if not entry.type.same_as(type):
+                if visibility == 'extern' and entry.visibility == 'extern':
+                    warning(pos, "Variable '%s' type does not match previous declaration" % name, 1)
+                    entry.type = type
+                #else:
+                #    error(pos, "Variable '%s' type does not match previous declaration" % name)
+            if entry.visibility != "private":
+                mangled_cname = self.mangle(Naming.var_prefix, name)
+                if entry.cname == mangled_cname:
+                    cname = name
+                    entry.cname = name
+            if not entry.is_implemented:
+                entry.is_implemented = True
+                return entry
+
+        entry = Scope.declare_var(self, name, type, pos,
+                                  cname=cname, visibility=visibility,
+                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+        if is_cdef:
+            entry.is_cglobal = 1
             if entry.type.declaration_value:
                 entry.init = entry.type.declaration_value
-            self.var_entries.append(entry) 
-        else: 
-            entry.is_pyglobal = 1 
-        if Options.cimport_from_pyx: 
-            entry.used = 1 
-        return entry 
- 
-    def declare_cfunction(self, name, type, pos, 
+            self.var_entries.append(entry)
+        else:
+            entry.is_pyglobal = 1
+        if Options.cimport_from_pyx:
+            entry.used = 1
+        return entry
+
+    def declare_cfunction(self, name, type, pos,
                           cname=None, visibility='private', api=0, in_pxd=0,
                           defining=0, modifiers=(), utility_code=None, overridable=False):
         if not defining and 'inline' in modifiers:
             # TODO(github/1736): Make this an error.
             warning(pos, "Declarations should not be declared inline.", 1)
-        # Add an entry for a C function. 
-        if not cname: 
-            if visibility == 'extern' or (visibility == 'public' and defining): 
-                cname = name 
-            else: 
-                cname = self.mangle(Naming.func_prefix, name) 
+        # Add an entry for a C function.
+        if not cname:
+            if visibility == 'extern' or (visibility == 'public' and defining):
+                cname = name
+            else:
+                cname = self.mangle(Naming.func_prefix, name)
         if visibility == 'extern' and type.optional_arg_count:
             error(pos, "Extern functions cannot have default arguments values.")
-        entry = self.lookup_here(name) 
-        if entry and entry.defined_in_pxd: 
-            if entry.visibility != "private": 
-                mangled_cname = self.mangle(Naming.var_prefix, name) 
-                if entry.cname == mangled_cname: 
-                    cname = name 
-                    entry.cname = cname 
-                    entry.func_cname = cname 
-        entry = Scope.declare_cfunction( 
-            self, name, type, pos, 
+        entry = self.lookup_here(name)
+        if entry and entry.defined_in_pxd:
+            if entry.visibility != "private":
+                mangled_cname = self.mangle(Naming.var_prefix, name)
+                if entry.cname == mangled_cname:
+                    cname = name
+                    entry.cname = cname
+                    entry.func_cname = cname
+        entry = Scope.declare_cfunction(
+            self, name, type, pos,
             cname=cname, visibility=visibility, api=api, in_pxd=in_pxd,
             defining=defining, modifiers=modifiers, utility_code=utility_code,
             overridable=overridable)
-        return entry 
- 
-    def declare_global(self, name, pos): 
-        entry = self.lookup_here(name) 
-        if not entry: 
-            self.declare_var(name, py_object_type, pos) 
- 
-    def use_utility_code(self, new_code): 
-        if new_code is not None: 
-            self.utility_code_list.append(new_code) 
- 
+        return entry
+
+    def declare_global(self, name, pos):
+        entry = self.lookup_here(name)
+        if not entry:
+            self.declare_var(name, py_object_type, pos)
+
+    def use_utility_code(self, new_code):
+        if new_code is not None:
+            self.utility_code_list.append(new_code)
+
     def use_entry_utility_code(self, entry):
         if entry is None:
             return
@@ -1488,329 +1488,329 @@ class ModuleScope(Scope):
             typeobj_cname=None, typeptr_cname=None, visibility='private',
             typedef_flag=0, api=0, check_size=None,
             buffer_defaults=None, shadow=0):
-        # If this is a non-extern typedef class, expose the typedef, but use 
-        # the non-typedef struct internally to avoid needing forward 
-        # declarations for anonymous structs. 
-        if typedef_flag and visibility != 'extern': 
-            if not (visibility == 'public' or api): 
-                warning(pos, "ctypedef only valid for 'extern' , 'public', and 'api'", 2) 
-            objtypedef_cname = objstruct_cname 
-            typedef_flag = 0 
-        else: 
-            objtypedef_cname = None 
-        # 
-        #  Look for previous declaration as a type 
-        # 
-        entry = self.lookup_here(name) 
-        if entry and not shadow: 
-            type = entry.type 
-            if not (entry.is_type and type.is_extension_type): 
-                entry = None # Will cause redeclaration and produce an error 
-            else: 
-                scope = type.scope 
-                if typedef_flag and (not scope or scope.defined): 
-                    self.check_previous_typedef_flag(entry, typedef_flag, pos) 
-                if (scope and scope.defined) or (base_type and type.base_type): 
-                    if base_type and base_type is not type.base_type: 
-                        error(pos, "Base type does not match previous declaration") 
-                if base_type and not type.base_type: 
-                    type.base_type = base_type 
-        # 
-        #  Make a new entry if needed 
-        # 
-        if not entry or shadow: 
+        # If this is a non-extern typedef class, expose the typedef, but use
+        # the non-typedef struct internally to avoid needing forward
+        # declarations for anonymous structs.
+        if typedef_flag and visibility != 'extern':
+            if not (visibility == 'public' or api):
+                warning(pos, "ctypedef only valid for 'extern' , 'public', and 'api'", 2)
+            objtypedef_cname = objstruct_cname
+            typedef_flag = 0
+        else:
+            objtypedef_cname = None
+        #
+        #  Look for previous declaration as a type
+        #
+        entry = self.lookup_here(name)
+        if entry and not shadow:
+            type = entry.type
+            if not (entry.is_type and type.is_extension_type):
+                entry = None # Will cause redeclaration and produce an error
+            else:
+                scope = type.scope
+                if typedef_flag and (not scope or scope.defined):
+                    self.check_previous_typedef_flag(entry, typedef_flag, pos)
+                if (scope and scope.defined) or (base_type and type.base_type):
+                    if base_type and base_type is not type.base_type:
+                        error(pos, "Base type does not match previous declaration")
+                if base_type and not type.base_type:
+                    type.base_type = base_type
+        #
+        #  Make a new entry if needed
+        #
+        if not entry or shadow:
             type = PyrexTypes.PyExtensionType(
                 name, typedef_flag, base_type, visibility == 'extern', check_size=check_size)
-            type.pos = pos 
-            type.buffer_defaults = buffer_defaults 
-            if objtypedef_cname is not None: 
-                type.objtypedef_cname = objtypedef_cname 
-            if visibility == 'extern': 
-                type.module_name = module_name 
-            else: 
-                type.module_name = self.qualified_name 
-            if typeptr_cname: 
-                type.typeptr_cname = typeptr_cname 
-            else: 
-                type.typeptr_cname = self.mangle(Naming.typeptr_prefix, name) 
-            entry = self.declare_type(name, type, pos, visibility = visibility, 
-                defining = 0, shadow = shadow) 
-            entry.is_cclass = True 
-            if objstruct_cname: 
-                type.objstruct_cname = objstruct_cname 
-            elif not entry.in_cinclude: 
-                type.objstruct_cname = self.mangle(Naming.objstruct_prefix, name) 
-            else: 
-                error(entry.pos, 
-                    "Object name required for 'public' or 'extern' C class") 
-            self.attach_var_entry_to_c_class(entry) 
-            self.c_class_entries.append(entry) 
-        # 
-        #  Check for re-definition and create scope if needed 
-        # 
-        if not type.scope: 
-            if defining or implementing: 
-                scope = CClassScope(name = name, outer_scope = self, 
-                    visibility = visibility) 
-                scope.directives = self.directives.copy() 
-                if base_type and base_type.scope: 
-                    scope.declare_inherited_c_attributes(base_type.scope) 
-                type.set_scope(scope) 
-                self.type_entries.append(entry) 
-        else: 
-            if defining and type.scope.defined: 
-                error(pos, "C class '%s' already defined" % name) 
-            elif implementing and type.scope.implemented: 
-                error(pos, "C class '%s' already implemented" % name) 
-        # 
-        #  Fill in options, checking for compatibility with any previous declaration 
-        # 
-        if defining: 
-            entry.defined_in_pxd = 1 
-        if implementing:   # So that filenames in runtime exceptions refer to 
-            entry.pos = pos  # the .pyx file and not the .pxd file 
-        if visibility != 'private' and entry.visibility != visibility: 
-            error(pos, "Class '%s' previously declared as '%s'" 
-                % (name, entry.visibility)) 
-        if api: 
-            entry.api = 1 
-        if objstruct_cname: 
-            if type.objstruct_cname and type.objstruct_cname != objstruct_cname: 
-                error(pos, "Object struct name differs from previous declaration") 
-            type.objstruct_cname = objstruct_cname 
-        if typeobj_cname: 
-            if type.typeobj_cname and type.typeobj_cname != typeobj_cname: 
-                    error(pos, "Type object name differs from previous declaration") 
-            type.typeobj_cname = typeobj_cname 
- 
-        if self.directives.get('final'): 
-            entry.type.is_final_type = True 
- 
-        # cdef classes are always exported, but we need to set it to 
-        # distinguish between unused Cython utility code extension classes 
-        entry.used = True 
- 
-        # 
-        # Return new or existing entry 
-        # 
-        return entry 
- 
-    def allocate_vtable_names(self, entry): 
-        #  If extension type has a vtable, allocate vtable struct and 
-        #  slot names for it. 
-        type = entry.type 
-        if type.base_type and type.base_type.vtabslot_cname: 
-            #print "...allocating vtabslot_cname because base type has one" ### 
-            type.vtabslot_cname = "%s.%s" % ( 
-                Naming.obj_base_cname, type.base_type.vtabslot_cname) 
-        elif type.scope and type.scope.cfunc_entries: 
-            # one special case here: when inheriting from builtin 
-            # types, the methods may also be built-in, in which 
-            # case they won't need a vtable 
-            entry_count = len(type.scope.cfunc_entries) 
-            base_type = type.base_type 
-            while base_type: 
-                # FIXME: this will break if we ever get non-inherited C methods 
-                if not base_type.scope or entry_count > len(base_type.scope.cfunc_entries): 
-                    break 
-                if base_type.is_builtin_type: 
-                    # builtin base type defines all methods => no vtable needed 
-                    return 
-                base_type = base_type.base_type 
-            #print "...allocating vtabslot_cname because there are C methods" ### 
-            type.vtabslot_cname = Naming.vtabslot_cname 
-        if type.vtabslot_cname: 
-            #print "...allocating other vtable related cnames" ### 
-            type.vtabstruct_cname = self.mangle(Naming.vtabstruct_prefix, entry.name) 
-            type.vtabptr_cname = self.mangle(Naming.vtabptr_prefix, entry.name) 
- 
-    def check_c_classes_pxd(self): 
-        # Performs post-analysis checking and finishing up of extension types 
-        # being implemented in this module. This is called only for the .pxd. 
-        # 
-        # Checks all extension types declared in this scope to 
-        # make sure that: 
-        # 
-        #    * The extension type is fully declared 
-        # 
-        # Also allocates a name for the vtable if needed. 
-        # 
-        for entry in self.c_class_entries: 
-            # Check defined 
-            if not entry.type.scope: 
-                error(entry.pos, "C class '%s' is declared but not defined" % entry.name) 
- 
-    def check_c_class(self, entry): 
-        type = entry.type 
-        name = entry.name 
-        visibility = entry.visibility 
-        # Check defined 
-        if not type.scope: 
-            error(entry.pos, "C class '%s' is declared but not defined" % name) 
-        # Generate typeobj_cname 
-        if visibility != 'extern' and not type.typeobj_cname: 
-            type.typeobj_cname = self.mangle(Naming.typeobj_prefix, name) 
-        ## Generate typeptr_cname 
-        #type.typeptr_cname = self.mangle(Naming.typeptr_prefix, name) 
-        # Check C methods defined 
-        if type.scope: 
-            for method_entry in type.scope.cfunc_entries: 
-                if not method_entry.is_inherited and not method_entry.func_cname: 
-                    error(method_entry.pos, "C method '%s' is declared but not defined" % 
-                        method_entry.name) 
-        # Allocate vtable name if necessary 
-        if type.vtabslot_cname: 
-            #print "ModuleScope.check_c_classes: allocating vtable cname for", self ### 
-            type.vtable_cname = self.mangle(Naming.vtable_prefix, entry.name) 
- 
-    def check_c_classes(self): 
-        # Performs post-analysis checking and finishing up of extension types 
-        # being implemented in this module. This is called only for the main 
-        # .pyx file scope, not for cimported .pxd scopes. 
-        # 
-        # Checks all extension types declared in this scope to 
-        # make sure that: 
-        # 
-        #    * The extension type is implemented 
-        #    * All required object and type names have been specified or generated 
-        #    * All non-inherited C methods are implemented 
-        # 
-        # Also allocates a name for the vtable if needed. 
-        # 
-        debug_check_c_classes = 0 
-        if debug_check_c_classes: 
-            print("Scope.check_c_classes: checking scope " + self.qualified_name) 
-        for entry in self.c_class_entries: 
-            if debug_check_c_classes: 
-                print("...entry %s %s" % (entry.name, entry)) 
-                print("......type = ",  entry.type) 
-                print("......visibility = ", entry.visibility) 
-            self.check_c_class(entry) 
- 
-    def check_c_functions(self): 
-        # Performs post-analysis checking making sure all 
-        # defined c functions are actually implemented. 
-        for name, entry in self.entries.items(): 
-            if entry.is_cfunction: 
-                if (entry.defined_in_pxd 
-                        and entry.scope is self 
-                        and entry.visibility != 'extern' 
-                        and not entry.in_cinclude 
-                        and not entry.is_implemented): 
-                    error(entry.pos, "Non-extern C function '%s' declared but not defined" % name) 
- 
-    def attach_var_entry_to_c_class(self, entry): 
-        # The name of an extension class has to serve as both a type 
-        # name and a variable name holding the type object. It is 
-        # represented in the symbol table by a type entry with a 
-        # variable entry attached to it. For the variable entry, 
-        # we use a read-only C global variable whose name is an 
-        # expression that refers to the type object. 
-        from . import Builtin 
-        var_entry = Entry(name = entry.name, 
-            type = Builtin.type_type, 
-            pos = entry.pos, 
+            type.pos = pos
+            type.buffer_defaults = buffer_defaults
+            if objtypedef_cname is not None:
+                type.objtypedef_cname = objtypedef_cname
+            if visibility == 'extern':
+                type.module_name = module_name
+            else:
+                type.module_name = self.qualified_name
+            if typeptr_cname:
+                type.typeptr_cname = typeptr_cname
+            else:
+                type.typeptr_cname = self.mangle(Naming.typeptr_prefix, name)
+            entry = self.declare_type(name, type, pos, visibility = visibility,
+                defining = 0, shadow = shadow)
+            entry.is_cclass = True
+            if objstruct_cname:
+                type.objstruct_cname = objstruct_cname
+            elif not entry.in_cinclude:
+                type.objstruct_cname = self.mangle(Naming.objstruct_prefix, name)
+            else:
+                error(entry.pos,
+                    "Object name required for 'public' or 'extern' C class")
+            self.attach_var_entry_to_c_class(entry)
+            self.c_class_entries.append(entry)
+        #
+        #  Check for re-definition and create scope if needed
+        #
+        if not type.scope:
+            if defining or implementing:
+                scope = CClassScope(name = name, outer_scope = self,
+                    visibility = visibility)
+                scope.directives = self.directives.copy()
+                if base_type and base_type.scope:
+                    scope.declare_inherited_c_attributes(base_type.scope)
+                type.set_scope(scope)
+                self.type_entries.append(entry)
+        else:
+            if defining and type.scope.defined:
+                error(pos, "C class '%s' already defined" % name)
+            elif implementing and type.scope.implemented:
+                error(pos, "C class '%s' already implemented" % name)
+        #
+        #  Fill in options, checking for compatibility with any previous declaration
+        #
+        if defining:
+            entry.defined_in_pxd = 1
+        if implementing:   # So that filenames in runtime exceptions refer to
+            entry.pos = pos  # the .pyx file and not the .pxd file
+        if visibility != 'private' and entry.visibility != visibility:
+            error(pos, "Class '%s' previously declared as '%s'"
+                % (name, entry.visibility))
+        if api:
+            entry.api = 1
+        if objstruct_cname:
+            if type.objstruct_cname and type.objstruct_cname != objstruct_cname:
+                error(pos, "Object struct name differs from previous declaration")
+            type.objstruct_cname = objstruct_cname
+        if typeobj_cname:
+            if type.typeobj_cname and type.typeobj_cname != typeobj_cname:
+                    error(pos, "Type object name differs from previous declaration")
+            type.typeobj_cname = typeobj_cname
+
+        if self.directives.get('final'):
+            entry.type.is_final_type = True
+
+        # cdef classes are always exported, but we need to set it to
+        # distinguish between unused Cython utility code extension classes
+        entry.used = True
+
+        #
+        # Return new or existing entry
+        #
+        return entry
+
+    def allocate_vtable_names(self, entry):
+        #  If extension type has a vtable, allocate vtable struct and
+        #  slot names for it.
+        type = entry.type
+        if type.base_type and type.base_type.vtabslot_cname:
+            #print "...allocating vtabslot_cname because base type has one" ###
+            type.vtabslot_cname = "%s.%s" % (
+                Naming.obj_base_cname, type.base_type.vtabslot_cname)
+        elif type.scope and type.scope.cfunc_entries:
+            # one special case here: when inheriting from builtin
+            # types, the methods may also be built-in, in which
+            # case they won't need a vtable
+            entry_count = len(type.scope.cfunc_entries)
+            base_type = type.base_type
+            while base_type:
+                # FIXME: this will break if we ever get non-inherited C methods
+                if not base_type.scope or entry_count > len(base_type.scope.cfunc_entries):
+                    break
+                if base_type.is_builtin_type:
+                    # builtin base type defines all methods => no vtable needed
+                    return
+                base_type = base_type.base_type
+            #print "...allocating vtabslot_cname because there are C methods" ###
+            type.vtabslot_cname = Naming.vtabslot_cname
+        if type.vtabslot_cname:
+            #print "...allocating other vtable related cnames" ###
+            type.vtabstruct_cname = self.mangle(Naming.vtabstruct_prefix, entry.name)
+            type.vtabptr_cname = self.mangle(Naming.vtabptr_prefix, entry.name)
+
+    def check_c_classes_pxd(self):
+        # Performs post-analysis checking and finishing up of extension types
+        # being implemented in this module. This is called only for the .pxd.
+        #
+        # Checks all extension types declared in this scope to
+        # make sure that:
+        #
+        #    * The extension type is fully declared
+        #
+        # Also allocates a name for the vtable if needed.
+        #
+        for entry in self.c_class_entries:
+            # Check defined
+            if not entry.type.scope:
+                error(entry.pos, "C class '%s' is declared but not defined" % entry.name)
+
+    def check_c_class(self, entry):
+        type = entry.type
+        name = entry.name
+        visibility = entry.visibility
+        # Check defined
+        if not type.scope:
+            error(entry.pos, "C class '%s' is declared but not defined" % name)
+        # Generate typeobj_cname
+        if visibility != 'extern' and not type.typeobj_cname:
+            type.typeobj_cname = self.mangle(Naming.typeobj_prefix, name)
+        ## Generate typeptr_cname
+        #type.typeptr_cname = self.mangle(Naming.typeptr_prefix, name)
+        # Check C methods defined
+        if type.scope:
+            for method_entry in type.scope.cfunc_entries:
+                if not method_entry.is_inherited and not method_entry.func_cname:
+                    error(method_entry.pos, "C method '%s' is declared but not defined" %
+                        method_entry.name)
+        # Allocate vtable name if necessary
+        if type.vtabslot_cname:
+            #print "ModuleScope.check_c_classes: allocating vtable cname for", self ###
+            type.vtable_cname = self.mangle(Naming.vtable_prefix, entry.name)
+
+    def check_c_classes(self):
+        # Performs post-analysis checking and finishing up of extension types
+        # being implemented in this module. This is called only for the main
+        # .pyx file scope, not for cimported .pxd scopes.
+        #
+        # Checks all extension types declared in this scope to
+        # make sure that:
+        #
+        #    * The extension type is implemented
+        #    * All required object and type names have been specified or generated
+        #    * All non-inherited C methods are implemented
+        #
+        # Also allocates a name for the vtable if needed.
+        #
+        debug_check_c_classes = 0
+        if debug_check_c_classes:
+            print("Scope.check_c_classes: checking scope " + self.qualified_name)
+        for entry in self.c_class_entries:
+            if debug_check_c_classes:
+                print("...entry %s %s" % (entry.name, entry))
+                print("......type = ",  entry.type)
+                print("......visibility = ", entry.visibility)
+            self.check_c_class(entry)
+
+    def check_c_functions(self):
+        # Performs post-analysis checking making sure all
+        # defined c functions are actually implemented.
+        for name, entry in self.entries.items():
+            if entry.is_cfunction:
+                if (entry.defined_in_pxd
+                        and entry.scope is self
+                        and entry.visibility != 'extern'
+                        and not entry.in_cinclude
+                        and not entry.is_implemented):
+                    error(entry.pos, "Non-extern C function '%s' declared but not defined" % name)
+
+    def attach_var_entry_to_c_class(self, entry):
+        # The name of an extension class has to serve as both a type
+        # name and a variable name holding the type object. It is
+        # represented in the symbol table by a type entry with a
+        # variable entry attached to it. For the variable entry,
+        # we use a read-only C global variable whose name is an
+        # expression that refers to the type object.
+        from . import Builtin
+        var_entry = Entry(name = entry.name,
+            type = Builtin.type_type,
+            pos = entry.pos,
             cname = entry.type.typeptr_cname)
         var_entry.qualified_name = entry.qualified_name
-        var_entry.is_variable = 1 
-        var_entry.is_cglobal = 1 
-        var_entry.is_readonly = 1 
+        var_entry.is_variable = 1
+        var_entry.is_cglobal = 1
+        var_entry.is_readonly = 1
         var_entry.scope = entry.scope
-        entry.as_variable = var_entry 
- 
-    def is_cpp(self): 
-        return self.cpp 
- 
-    def infer_types(self): 
-        from .TypeInference import PyObjectTypeInferer 
-        PyObjectTypeInferer().infer_types(self) 
- 
- 
-class LocalScope(Scope): 
- 
-    # Does the function have a 'with gil:' block? 
-    has_with_gil_block = False 
- 
-    # Transient attribute, used for symbol table variable declarations 
-    _in_with_gil_block = False 
- 
-    def __init__(self, name, outer_scope, parent_scope = None): 
-        if parent_scope is None: 
-            parent_scope = outer_scope 
-        Scope.__init__(self, name, outer_scope, parent_scope) 
- 
-    def mangle(self, prefix, name): 
-        return prefix + name 
- 
-    def declare_arg(self, name, type, pos): 
-        # Add an entry for an argument of a function. 
-        cname = self.mangle(Naming.var_prefix, name) 
-        entry = self.declare(name, cname, type, pos, 'private') 
-        entry.is_variable = 1 
-        if type.is_pyobject: 
-            entry.init = "0" 
-        entry.is_arg = 1 
-        #entry.borrowed = 1 # Not using borrowed arg refs for now 
-        self.arg_entries.append(entry) 
-        return entry 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = 0): 
-        # Add an entry for a local variable. 
-        if visibility in ('public', 'readonly'): 
-            error(pos, "Local variable cannot be declared %s" % visibility) 
-        entry = Scope.declare_var(self, name, type, pos, 
-                                  cname=cname, visibility=visibility, 
-                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef) 
+        entry.as_variable = var_entry
+
+    def is_cpp(self):
+        return self.cpp
+
+    def infer_types(self):
+        from .TypeInference import PyObjectTypeInferer
+        PyObjectTypeInferer().infer_types(self)
+
+
+class LocalScope(Scope):
+
+    # Does the function have a 'with gil:' block?
+    has_with_gil_block = False
+
+    # Transient attribute, used for symbol table variable declarations
+    _in_with_gil_block = False
+
+    def __init__(self, name, outer_scope, parent_scope = None):
+        if parent_scope is None:
+            parent_scope = outer_scope
+        Scope.__init__(self, name, outer_scope, parent_scope)
+
+    def mangle(self, prefix, name):
+        return prefix + name
+
+    def declare_arg(self, name, type, pos):
+        # Add an entry for an argument of a function.
+        cname = self.mangle(Naming.var_prefix, name)
+        entry = self.declare(name, cname, type, pos, 'private')
+        entry.is_variable = 1
+        if type.is_pyobject:
+            entry.init = "0"
+        entry.is_arg = 1
+        #entry.borrowed = 1 # Not using borrowed arg refs for now
+        self.arg_entries.append(entry)
+        return entry
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = 0):
+        # Add an entry for a local variable.
+        if visibility in ('public', 'readonly'):
+            error(pos, "Local variable cannot be declared %s" % visibility)
+        entry = Scope.declare_var(self, name, type, pos,
+                                  cname=cname, visibility=visibility,
+                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef)
         if entry.type.declaration_value:
             entry.init = entry.type.declaration_value
-        entry.is_local = 1 
- 
-        entry.in_with_gil_block = self._in_with_gil_block 
-        self.var_entries.append(entry) 
-        return entry 
- 
-    def declare_global(self, name, pos): 
-        # Pull entry from global scope into local scope. 
-        if self.lookup_here(name): 
-            warning(pos, "'%s' redeclared  ", 0) 
-        else: 
-            entry = self.global_scope().lookup_target(name) 
-            self.entries[name] = entry 
- 
-    def declare_nonlocal(self, name, pos): 
-        # Pull entry from outer scope into local scope 
-        orig_entry = self.lookup_here(name) 
-        if orig_entry and orig_entry.scope is self and not orig_entry.from_closure: 
-            error(pos, "'%s' redeclared as nonlocal" % name) 
+        entry.is_local = 1
+
+        entry.in_with_gil_block = self._in_with_gil_block
+        self.var_entries.append(entry)
+        return entry
+
+    def declare_global(self, name, pos):
+        # Pull entry from global scope into local scope.
+        if self.lookup_here(name):
+            warning(pos, "'%s' redeclared  ", 0)
+        else:
+            entry = self.global_scope().lookup_target(name)
+            self.entries[name] = entry
+
+    def declare_nonlocal(self, name, pos):
+        # Pull entry from outer scope into local scope
+        orig_entry = self.lookup_here(name)
+        if orig_entry and orig_entry.scope is self and not orig_entry.from_closure:
+            error(pos, "'%s' redeclared as nonlocal" % name)
             orig_entry.already_declared_here()
-        else: 
-            entry = self.lookup(name) 
-            if entry is None or not entry.from_closure: 
-                error(pos, "no binding for nonlocal '%s' found" % name) 
- 
-    def lookup(self, name): 
-        # Look up name in this scope or an enclosing one. 
-        # Return None if not found. 
-        entry = Scope.lookup(self, name) 
-        if entry is not None: 
+        else:
+            entry = self.lookup(name)
+            if entry is None or not entry.from_closure:
+                error(pos, "no binding for nonlocal '%s' found" % name)
+
+    def lookup(self, name):
+        # Look up name in this scope or an enclosing one.
+        # Return None if not found.
+        entry = Scope.lookup(self, name)
+        if entry is not None:
             entry_scope = entry.scope
             while entry_scope.is_genexpr_scope:
                 entry_scope = entry_scope.outer_scope
             if entry_scope is not self and entry_scope.is_closure_scope:
-                if hasattr(entry.scope, "scope_class"): 
-                    raise InternalError("lookup() after scope class created.") 
-                # The actual c fragment for the different scopes differs 
-                # on the outside and inside, so we make a new entry 
-                entry.in_closure = True 
-                inner_entry = InnerEntry(entry, self) 
-                inner_entry.is_variable = True 
-                self.entries[name] = inner_entry 
-                return inner_entry 
-        return entry 
- 
-    def mangle_closure_cnames(self, outer_scope_cname): 
+                if hasattr(entry.scope, "scope_class"):
+                    raise InternalError("lookup() after scope class created.")
+                # The actual c fragment for the different scopes differs
+                # on the outside and inside, so we make a new entry
+                entry.in_closure = True
+                inner_entry = InnerEntry(entry, self)
+                inner_entry.is_variable = True
+                self.entries[name] = inner_entry
+                return inner_entry
+        return entry
+
+    def mangle_closure_cnames(self, outer_scope_cname):
         for scope in self.iter_local_scopes():
             for entry in scope.entries.values():
                 if entry.from_closure:
@@ -1824,25 +1824,25 @@ class LocalScope(Scope):
                 elif entry.in_closure:
                     entry.original_cname = entry.cname
                     entry.cname = "%s->%s" % (Naming.cur_scope_cname, entry.cname)
- 
- 
-class GeneratorExpressionScope(Scope): 
-    """Scope for generator expressions and comprehensions.  As opposed 
-    to generators, these can be easily inlined in some cases, so all 
-    we really need is a scope that holds the loop variable(s). 
-    """ 
+
+
+class GeneratorExpressionScope(Scope):
+    """Scope for generator expressions and comprehensions.  As opposed
+    to generators, these can be easily inlined in some cases, so all
+    we really need is a scope that holds the loop variable(s).
+    """
     is_genexpr_scope = True
 
-    def __init__(self, outer_scope): 
+    def __init__(self, outer_scope):
         parent_scope = outer_scope
         # TODO: also ignore class scopes?
         while parent_scope.is_genexpr_scope:
             parent_scope = parent_scope.parent_scope
         name = parent_scope.global_scope().next_id(Naming.genexpr_id_ref)
         Scope.__init__(self, name, outer_scope, parent_scope)
-        self.directives = outer_scope.directives 
-        self.genexp_prefix = "%s%d%s" % (Naming.pyrex_prefix, len(name), name) 
- 
+        self.directives = outer_scope.directives
+        self.genexp_prefix = "%s%d%s" % (Naming.pyrex_prefix, len(name), name)
+
         # Class/ExtType scopes are filled at class creation time, i.e. from the
         # module init function or surrounding function.
         while outer_scope.is_genexpr_scope or outer_scope.is_c_class_scope or outer_scope.is_py_class_scope:
@@ -1850,333 +1850,333 @@ class GeneratorExpressionScope(Scope):
         self.var_entries = outer_scope.var_entries  # keep declarations outside
         outer_scope.subscopes.add(self)
 
-    def mangle(self, prefix, name): 
-        return '%s%s' % (self.genexp_prefix, self.parent_scope.mangle(prefix, name)) 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = True): 
-        if type is unspecified_type: 
-            # if the outer scope defines a type for this variable, inherit it 
-            outer_entry = self.outer_scope.lookup(name) 
-            if outer_entry and outer_entry.is_variable: 
-                type = outer_entry.type # may still be 'unspecified_type' ! 
-        # the parent scope needs to generate code for the variable, but 
-        # this scope must hold its name exclusively 
-        cname = '%s%s' % (self.genexp_prefix, self.parent_scope.mangle(Naming.var_prefix, name or self.next_id())) 
-        entry = self.declare(name, cname, type, pos, visibility) 
+    def mangle(self, prefix, name):
+        return '%s%s' % (self.genexp_prefix, self.parent_scope.mangle(prefix, name))
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = True):
+        if type is unspecified_type:
+            # if the outer scope defines a type for this variable, inherit it
+            outer_entry = self.outer_scope.lookup(name)
+            if outer_entry and outer_entry.is_variable:
+                type = outer_entry.type # may still be 'unspecified_type' !
+        # the parent scope needs to generate code for the variable, but
+        # this scope must hold its name exclusively
+        cname = '%s%s' % (self.genexp_prefix, self.parent_scope.mangle(Naming.var_prefix, name or self.next_id()))
+        entry = self.declare(name, cname, type, pos, visibility)
         entry.is_variable = True
         if self.parent_scope.is_module_scope:
             entry.is_cglobal = True
         else:
             entry.is_local = True
         entry.in_subscope = True
-        self.var_entries.append(entry) 
-        self.entries[name] = entry 
-        return entry 
- 
-    def declare_pyfunction(self, name, pos, allow_redefine=False): 
-        return self.outer_scope.declare_pyfunction( 
-            name, pos, allow_redefine) 
- 
-    def declare_lambda_function(self, func_cname, pos): 
-        return self.outer_scope.declare_lambda_function(func_cname, pos) 
- 
-    def add_lambda_def(self, def_node): 
-        return self.outer_scope.add_lambda_def(def_node) 
- 
- 
-class ClosureScope(LocalScope): 
- 
-    is_closure_scope = True 
- 
-    def __init__(self, name, scope_name, outer_scope, parent_scope=None): 
-        LocalScope.__init__(self, name, outer_scope, parent_scope) 
-        self.closure_cname = "%s%s" % (Naming.closure_scope_prefix, scope_name) 
- 
-#    def mangle_closure_cnames(self, scope_var): 
-#        for entry in self.entries.values() + self.temp_entries: 
-#            entry.in_closure = 1 
-#        LocalScope.mangle_closure_cnames(self, scope_var) 
- 
-#    def mangle(self, prefix, name): 
-#        return "%s->%s" % (self.cur_scope_cname, name) 
-#        return "%s->%s" % (self.closure_cname, name) 
- 
-    def declare_pyfunction(self, name, pos, allow_redefine=False): 
-        return LocalScope.declare_pyfunction(self, name, pos, allow_redefine, visibility='private') 
- 
- 
-class StructOrUnionScope(Scope): 
-    #  Namespace of a C struct or union. 
- 
-    def __init__(self, name="?"): 
-        Scope.__init__(self, name, None, None) 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = 0, 
+        self.var_entries.append(entry)
+        self.entries[name] = entry
+        return entry
+
+    def declare_pyfunction(self, name, pos, allow_redefine=False):
+        return self.outer_scope.declare_pyfunction(
+            name, pos, allow_redefine)
+
+    def declare_lambda_function(self, func_cname, pos):
+        return self.outer_scope.declare_lambda_function(func_cname, pos)
+
+    def add_lambda_def(self, def_node):
+        return self.outer_scope.add_lambda_def(def_node)
+
+
+class ClosureScope(LocalScope):
+
+    is_closure_scope = True
+
+    def __init__(self, name, scope_name, outer_scope, parent_scope=None):
+        LocalScope.__init__(self, name, outer_scope, parent_scope)
+        self.closure_cname = "%s%s" % (Naming.closure_scope_prefix, scope_name)
+
+#    def mangle_closure_cnames(self, scope_var):
+#        for entry in self.entries.values() + self.temp_entries:
+#            entry.in_closure = 1
+#        LocalScope.mangle_closure_cnames(self, scope_var)
+
+#    def mangle(self, prefix, name):
+#        return "%s->%s" % (self.cur_scope_cname, name)
+#        return "%s->%s" % (self.closure_cname, name)
+
+    def declare_pyfunction(self, name, pos, allow_redefine=False):
+        return LocalScope.declare_pyfunction(self, name, pos, allow_redefine, visibility='private')
+
+
+class StructOrUnionScope(Scope):
+    #  Namespace of a C struct or union.
+
+    def __init__(self, name="?"):
+        Scope.__init__(self, name, None, None)
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = 0,
                     allow_pyobject=False, allow_memoryview=False):
-        # Add an entry for an attribute. 
-        if not cname: 
-            cname = name 
-            if visibility == 'private': 
-                cname = c_safe_identifier(cname) 
-        if type.is_cfunction: 
-            type = PyrexTypes.CPtrType(type) 
-        entry = self.declare(name, cname, type, pos, visibility) 
-        entry.is_variable = 1 
-        self.var_entries.append(entry) 
-        if type.is_pyobject and not allow_pyobject: 
+        # Add an entry for an attribute.
+        if not cname:
+            cname = name
+            if visibility == 'private':
+                cname = c_safe_identifier(cname)
+        if type.is_cfunction:
+            type = PyrexTypes.CPtrType(type)
+        entry = self.declare(name, cname, type, pos, visibility)
+        entry.is_variable = 1
+        self.var_entries.append(entry)
+        if type.is_pyobject and not allow_pyobject:
             error(pos, "C struct/union member cannot be a Python object")
         elif type.is_memoryviewslice and not allow_memoryview:
             # Memory views wrap their buffer owner as a Python object.
             error(pos, "C struct/union member cannot be a memory view")
-        if visibility != 'private': 
+        if visibility != 'private':
             error(pos, "C struct/union member cannot be declared %s" % visibility)
-        return entry 
- 
-    def declare_cfunction(self, name, type, pos, 
+        return entry
+
+    def declare_cfunction(self, name, type, pos,
                           cname=None, visibility='private', api=0, in_pxd=0,
                           defining=0, modifiers=(), overridable=False):  # currently no utility code ...
         if overridable:
             error(pos, "C struct/union member cannot be declared 'cpdef'")
-        return self.declare_var(name, type, pos, 
-                                cname=cname, visibility=visibility) 
- 
- 
-class ClassScope(Scope): 
-    #  Abstract base class for namespace of 
-    #  Python class or extension type. 
-    # 
-    #  class_name     string   Python name of the class 
-    #  scope_prefix   string   Additional prefix for names 
-    #                          declared in the class 
-    #  doc    string or None   Doc string 
- 
-    def __init__(self, name, outer_scope): 
-        Scope.__init__(self, name, outer_scope, outer_scope) 
-        self.class_name = name 
-        self.doc = None 
- 
-    def lookup(self, name): 
-        entry = Scope.lookup(self, name) 
-        if entry: 
-            return entry 
-        if name == "classmethod": 
-            # We don't want to use the builtin classmethod here 'cause it won't do the 
-            # right thing in this scope (as the class members aren't still functions). 
-            # Don't want to add a cfunction to this scope 'cause that would mess with 
-            # the type definition, so we just return the right entry. 
-            entry = Entry( 
-                "classmethod", 
-                "__Pyx_Method_ClassMethod", 
-                PyrexTypes.CFuncType( 
-                    py_object_type, 
-                    [PyrexTypes.CFuncTypeArg("", py_object_type, None)], 0, 0)) 
-            entry.utility_code_definition = Code.UtilityCode.load_cached("ClassMethod", "CythonFunction.c") 
+        return self.declare_var(name, type, pos,
+                                cname=cname, visibility=visibility)
+
+
+class ClassScope(Scope):
+    #  Abstract base class for namespace of
+    #  Python class or extension type.
+    #
+    #  class_name     string   Python name of the class
+    #  scope_prefix   string   Additional prefix for names
+    #                          declared in the class
+    #  doc    string or None   Doc string
+
+    def __init__(self, name, outer_scope):
+        Scope.__init__(self, name, outer_scope, outer_scope)
+        self.class_name = name
+        self.doc = None
+
+    def lookup(self, name):
+        entry = Scope.lookup(self, name)
+        if entry:
+            return entry
+        if name == "classmethod":
+            # We don't want to use the builtin classmethod here 'cause it won't do the
+            # right thing in this scope (as the class members aren't still functions).
+            # Don't want to add a cfunction to this scope 'cause that would mess with
+            # the type definition, so we just return the right entry.
+            entry = Entry(
+                "classmethod",
+                "__Pyx_Method_ClassMethod",
+                PyrexTypes.CFuncType(
+                    py_object_type,
+                    [PyrexTypes.CFuncTypeArg("", py_object_type, None)], 0, 0))
+            entry.utility_code_definition = Code.UtilityCode.load_cached("ClassMethod", "CythonFunction.c")
             self.use_entry_utility_code(entry)
-            entry.is_cfunction = 1 
-        return entry 
- 
- 
-class PyClassScope(ClassScope): 
-    #  Namespace of a Python class. 
-    # 
-    #  class_obj_cname     string   C variable holding class object 
- 
-    is_py_class_scope = 1 
- 
-    def mangle_class_private_name(self, name): 
-        return self.mangle_special_name(name) 
- 
-    def mangle_special_name(self, name): 
-        if name and name.startswith('__') and not name.endswith('__'): 
-            name = EncodedString('_%s%s' % (self.class_name.lstrip('_'), name)) 
-        return name 
- 
-    def lookup_here(self, name): 
-        name = self.mangle_special_name(name) 
-        return ClassScope.lookup_here(self, name) 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = 0): 
-        name = self.mangle_special_name(name) 
-        if type is unspecified_type: 
-            type = py_object_type 
-        # Add an entry for a class attribute. 
-        entry = Scope.declare_var(self, name, type, pos, 
-                                  cname=cname, visibility=visibility, 
-                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef) 
-        entry.is_pyglobal = 1 
-        entry.is_pyclass_attr = 1 
-        return entry 
- 
-    def declare_nonlocal(self, name, pos): 
-        # Pull entry from outer scope into local scope 
-        orig_entry = self.lookup_here(name) 
-        if orig_entry and orig_entry.scope is self and not orig_entry.from_closure: 
-            error(pos, "'%s' redeclared as nonlocal" % name) 
+            entry.is_cfunction = 1
+        return entry
+
+
+class PyClassScope(ClassScope):
+    #  Namespace of a Python class.
+    #
+    #  class_obj_cname     string   C variable holding class object
+
+    is_py_class_scope = 1
+
+    def mangle_class_private_name(self, name):
+        return self.mangle_special_name(name)
+
+    def mangle_special_name(self, name):
+        if name and name.startswith('__') and not name.endswith('__'):
+            name = EncodedString('_%s%s' % (self.class_name.lstrip('_'), name))
+        return name
+
+    def lookup_here(self, name):
+        name = self.mangle_special_name(name)
+        return ClassScope.lookup_here(self, name)
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = 0):
+        name = self.mangle_special_name(name)
+        if type is unspecified_type:
+            type = py_object_type
+        # Add an entry for a class attribute.
+        entry = Scope.declare_var(self, name, type, pos,
+                                  cname=cname, visibility=visibility,
+                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+        entry.is_pyglobal = 1
+        entry.is_pyclass_attr = 1
+        return entry
+
+    def declare_nonlocal(self, name, pos):
+        # Pull entry from outer scope into local scope
+        orig_entry = self.lookup_here(name)
+        if orig_entry and orig_entry.scope is self and not orig_entry.from_closure:
+            error(pos, "'%s' redeclared as nonlocal" % name)
             orig_entry.already_declared_here()
-        else: 
-            entry = self.lookup(name) 
-            if entry is None: 
-                error(pos, "no binding for nonlocal '%s' found" % name) 
-            else: 
-                # FIXME: this works, but it's unclear if it's the 
-                # right thing to do 
-                self.entries[name] = entry 
- 
-    def declare_global(self, name, pos): 
-        # Pull entry from global scope into local scope. 
-        if self.lookup_here(name): 
-            warning(pos, "'%s' redeclared  ", 0) 
-        else: 
-            entry = self.global_scope().lookup_target(name) 
-            self.entries[name] = entry 
- 
-    def add_default_value(self, type): 
-        return self.outer_scope.add_default_value(type) 
- 
- 
-class CClassScope(ClassScope): 
-    #  Namespace of an extension type. 
-    # 
-    #  parent_type           CClassType 
-    #  #typeobj_cname        string or None 
-    #  #objstruct_cname      string 
-    #  method_table_cname    string 
-    #  getset_table_cname    string 
-    #  has_pyobject_attrs    boolean  Any PyObject attributes? 
-    #  has_memoryview_attrs  boolean  Any memory view attributes? 
+        else:
+            entry = self.lookup(name)
+            if entry is None:
+                error(pos, "no binding for nonlocal '%s' found" % name)
+            else:
+                # FIXME: this works, but it's unclear if it's the
+                # right thing to do
+                self.entries[name] = entry
+
+    def declare_global(self, name, pos):
+        # Pull entry from global scope into local scope.
+        if self.lookup_here(name):
+            warning(pos, "'%s' redeclared  ", 0)
+        else:
+            entry = self.global_scope().lookup_target(name)
+            self.entries[name] = entry
+
+    def add_default_value(self, type):
+        return self.outer_scope.add_default_value(type)
+
+
+class CClassScope(ClassScope):
+    #  Namespace of an extension type.
+    #
+    #  parent_type           CClassType
+    #  #typeobj_cname        string or None
+    #  #objstruct_cname      string
+    #  method_table_cname    string
+    #  getset_table_cname    string
+    #  has_pyobject_attrs    boolean  Any PyObject attributes?
+    #  has_memoryview_attrs  boolean  Any memory view attributes?
     #  has_cpp_class_attrs   boolean  Any (non-pointer) C++ attributes?
-    #  has_cyclic_pyobject_attrs    boolean  Any PyObject attributes that may need GC? 
-    #  property_entries      [Entry] 
-    #  defined               boolean  Defined in .pxd file 
-    #  implemented           boolean  Defined in .pyx file 
-    #  inherited_var_entries [Entry]  Adapted var entries from base class 
- 
-    is_c_class_scope = 1 
+    #  has_cyclic_pyobject_attrs    boolean  Any PyObject attributes that may need GC?
+    #  property_entries      [Entry]
+    #  defined               boolean  Defined in .pxd file
+    #  implemented           boolean  Defined in .pyx file
+    #  inherited_var_entries [Entry]  Adapted var entries from base class
+
+    is_c_class_scope = 1
     is_closure_class_scope = False
- 
-    has_pyobject_attrs = False 
-    has_memoryview_attrs = False 
+
+    has_pyobject_attrs = False
+    has_memoryview_attrs = False
     has_cpp_class_attrs = False
-    has_cyclic_pyobject_attrs = False 
-    defined = False 
-    implemented = False 
- 
-    def __init__(self, name, outer_scope, visibility): 
-        ClassScope.__init__(self, name, outer_scope) 
-        if visibility != 'extern': 
-            self.method_table_cname = outer_scope.mangle(Naming.methtab_prefix, name) 
-            self.getset_table_cname = outer_scope.mangle(Naming.gstab_prefix, name) 
-        self.property_entries = [] 
-        self.inherited_var_entries = [] 
- 
-    def needs_gc(self): 
-        # If the type or any of its base types have Python-valued 
-        # C attributes, then it needs to participate in GC. 
+    has_cyclic_pyobject_attrs = False
+    defined = False
+    implemented = False
+
+    def __init__(self, name, outer_scope, visibility):
+        ClassScope.__init__(self, name, outer_scope)
+        if visibility != 'extern':
+            self.method_table_cname = outer_scope.mangle(Naming.methtab_prefix, name)
+            self.getset_table_cname = outer_scope.mangle(Naming.gstab_prefix, name)
+        self.property_entries = []
+        self.inherited_var_entries = []
+
+    def needs_gc(self):
+        # If the type or any of its base types have Python-valued
+        # C attributes, then it needs to participate in GC.
         if self.has_cyclic_pyobject_attrs and not self.directives.get('no_gc', False):
-            return True 
-        base_type = self.parent_type.base_type 
-        if base_type and base_type.scope is not None: 
-            return base_type.scope.needs_gc() 
-        elif self.parent_type.is_builtin_type: 
-            return not self.parent_type.is_gc_simple 
-        return False 
- 
-    def needs_tp_clear(self): 
-        """ 
-        Do we need to generate an implementation for the tp_clear slot? Can 
-        be disabled to keep references for the __dealloc__ cleanup function. 
-        """ 
-        return self.needs_gc() and not self.directives.get('no_gc_clear', False) 
- 
-    def get_refcounted_entries(self, include_weakref=False, 
-                               include_gc_simple=True): 
-        py_attrs = [] 
-        py_buffers = [] 
-        memoryview_slices = [] 
- 
-        for entry in self.var_entries: 
-            if entry.type.is_pyobject: 
+            return True
+        base_type = self.parent_type.base_type
+        if base_type and base_type.scope is not None:
+            return base_type.scope.needs_gc()
+        elif self.parent_type.is_builtin_type:
+            return not self.parent_type.is_gc_simple
+        return False
+
+    def needs_tp_clear(self):
+        """
+        Do we need to generate an implementation for the tp_clear slot? Can
+        be disabled to keep references for the __dealloc__ cleanup function.
+        """
+        return self.needs_gc() and not self.directives.get('no_gc_clear', False)
+
+    def get_refcounted_entries(self, include_weakref=False,
+                               include_gc_simple=True):
+        py_attrs = []
+        py_buffers = []
+        memoryview_slices = []
+
+        for entry in self.var_entries:
+            if entry.type.is_pyobject:
                 if include_weakref or (self.is_closure_class_scope or entry.name != "__weakref__"):
-                    if include_gc_simple or not entry.type.is_gc_simple: 
-                        py_attrs.append(entry) 
-            elif entry.type == PyrexTypes.c_py_buffer_type: 
-                py_buffers.append(entry) 
-            elif entry.type.is_memoryviewslice: 
-                memoryview_slices.append(entry) 
- 
-        have_entries = py_attrs or py_buffers or memoryview_slices 
-        return have_entries, (py_attrs, py_buffers, memoryview_slices) 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'private', 
-                    api = 0, in_pxd = 0, is_cdef = 0): 
-        if is_cdef: 
-            # Add an entry for an attribute. 
-            if self.defined: 
-                error(pos, 
-                    "C attributes cannot be added in implementation part of" 
-                    " extension type defined in a pxd") 
+                    if include_gc_simple or not entry.type.is_gc_simple:
+                        py_attrs.append(entry)
+            elif entry.type == PyrexTypes.c_py_buffer_type:
+                py_buffers.append(entry)
+            elif entry.type.is_memoryviewslice:
+                memoryview_slices.append(entry)
+
+        have_entries = py_attrs or py_buffers or memoryview_slices
+        return have_entries, (py_attrs, py_buffers, memoryview_slices)
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'private',
+                    api = 0, in_pxd = 0, is_cdef = 0):
+        if is_cdef:
+            # Add an entry for an attribute.
+            if self.defined:
+                error(pos,
+                    "C attributes cannot be added in implementation part of"
+                    " extension type defined in a pxd")
             if not self.is_closure_class_scope and get_special_method_signature(name):
-                error(pos, 
-                    "The name '%s' is reserved for a special method." 
-                        % name) 
-            if not cname: 
-                cname = name 
-                if visibility == 'private': 
-                    cname = c_safe_identifier(cname) 
-            if type.is_cpp_class and visibility != 'extern': 
-                type.check_nullary_constructor(pos) 
-                self.use_utility_code(Code.UtilityCode("#include <new>")) 
-            entry = self.declare(name, cname, type, pos, visibility) 
-            entry.is_variable = 1 
-            self.var_entries.append(entry) 
-            if type.is_memoryviewslice: 
-                self.has_memoryview_attrs = True 
+                error(pos,
+                    "The name '%s' is reserved for a special method."
+                        % name)
+            if not cname:
+                cname = name
+                if visibility == 'private':
+                    cname = c_safe_identifier(cname)
+            if type.is_cpp_class and visibility != 'extern':
+                type.check_nullary_constructor(pos)
+                self.use_utility_code(Code.UtilityCode("#include <new>"))
+            entry = self.declare(name, cname, type, pos, visibility)
+            entry.is_variable = 1
+            self.var_entries.append(entry)
+            if type.is_memoryviewslice:
+                self.has_memoryview_attrs = True
             elif type.is_cpp_class:
                 self.has_cpp_class_attrs = True
             elif type.is_pyobject and (self.is_closure_class_scope or name != '__weakref__'):
-                self.has_pyobject_attrs = True 
-                if (not type.is_builtin_type 
-                        or not type.scope or type.scope.needs_gc()): 
-                    self.has_cyclic_pyobject_attrs = True 
-            if visibility not in ('private', 'public', 'readonly'): 
-                error(pos, 
-                    "Attribute of extension type cannot be declared %s" % visibility) 
-            if visibility in ('public', 'readonly'): 
-                # If the field is an external typedef, we cannot be sure about the type, 
-                # so do conversion ourself rather than rely on the CPython mechanism (through 
-                # a property; made in AnalyseDeclarationsTransform). 
-                entry.needs_property = True 
+                self.has_pyobject_attrs = True
+                if (not type.is_builtin_type
+                        or not type.scope or type.scope.needs_gc()):
+                    self.has_cyclic_pyobject_attrs = True
+            if visibility not in ('private', 'public', 'readonly'):
+                error(pos,
+                    "Attribute of extension type cannot be declared %s" % visibility)
+            if visibility in ('public', 'readonly'):
+                # If the field is an external typedef, we cannot be sure about the type,
+                # so do conversion ourself rather than rely on the CPython mechanism (through
+                # a property; made in AnalyseDeclarationsTransform).
+                entry.needs_property = True
                 if not self.is_closure_class_scope and name == "__weakref__":
-                    error(pos, "Special attribute __weakref__ cannot be exposed to Python") 
+                    error(pos, "Special attribute __weakref__ cannot be exposed to Python")
                 if not (type.is_pyobject or type.can_coerce_to_pyobject(self)):
                     # we're not testing for coercion *from* Python here - that would fail later
                     error(pos, "C attribute of type '%s' cannot be accessed from Python" % type)
-            else: 
-                entry.needs_property = False 
-            return entry 
-        else: 
-            if type is unspecified_type: 
-                type = py_object_type 
-            # Add an entry for a class attribute. 
-            entry = Scope.declare_var(self, name, type, pos, 
-                                      cname=cname, visibility=visibility, 
-                                      api=api, in_pxd=in_pxd, is_cdef=is_cdef) 
-            entry.is_member = 1 
-            entry.is_pyglobal = 1 # xxx: is_pyglobal changes behaviour in so many places that 
-                                  # I keep it in for now. is_member should be enough 
-                                  # later on 
-            self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname 
-            return entry 
- 
-    def declare_pyfunction(self, name, pos, allow_redefine=False): 
-        # Add an entry for a method. 
+            else:
+                entry.needs_property = False
+            return entry
+        else:
+            if type is unspecified_type:
+                type = py_object_type
+            # Add an entry for a class attribute.
+            entry = Scope.declare_var(self, name, type, pos,
+                                      cname=cname, visibility=visibility,
+                                      api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+            entry.is_member = 1
+            entry.is_pyglobal = 1 # xxx: is_pyglobal changes behaviour in so many places that
+                                  # I keep it in for now. is_member should be enough
+                                  # later on
+            self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname
+            return entry
+
+    def declare_pyfunction(self, name, pos, allow_redefine=False):
+        # Add an entry for a method.
         if name in richcmp_special_methods:
             if self.lookup_here('__richcmp__'):
                 error(pos, "Cannot define both % and __richcmp__" % name)
@@ -2184,67 +2184,67 @@ class CClassScope(ClassScope):
             for n in richcmp_special_methods:
                 if self.lookup_here(n):
                     error(pos, "Cannot define both % and __richcmp__" % n)
-        if name == "__new__": 
-            error(pos, "__new__ method of extension type will change semantics " 
-                "in a future version of Pyrex and Cython. Use __cinit__ instead.") 
-        entry = self.declare_var(name, py_object_type, pos, 
-                                 visibility='extern') 
-        special_sig = get_special_method_signature(name) 
-        if special_sig: 
-            # Special methods get put in the method table with a particular 
-            # signature declared in advance. 
-            entry.signature = special_sig 
-            entry.is_special = 1 
-        else: 
-            entry.signature = pymethod_signature 
-            entry.is_special = 0 
- 
-        self.pyfunc_entries.append(entry) 
-        return entry 
- 
-    def lookup_here(self, name): 
+        if name == "__new__":
+            error(pos, "__new__ method of extension type will change semantics "
+                "in a future version of Pyrex and Cython. Use __cinit__ instead.")
+        entry = self.declare_var(name, py_object_type, pos,
+                                 visibility='extern')
+        special_sig = get_special_method_signature(name)
+        if special_sig:
+            # Special methods get put in the method table with a particular
+            # signature declared in advance.
+            entry.signature = special_sig
+            entry.is_special = 1
+        else:
+            entry.signature = pymethod_signature
+            entry.is_special = 0
+
+        self.pyfunc_entries.append(entry)
+        return entry
+
+    def lookup_here(self, name):
         if not self.is_closure_class_scope and name == "__new__":
-            name = EncodedString("__cinit__") 
-        entry = ClassScope.lookup_here(self, name) 
-        if entry and entry.is_builtin_cmethod: 
-            if not self.parent_type.is_builtin_type: 
-                # For subtypes of builtin types, we can only return 
-                # optimised C methods if the type if final. 
-                # Otherwise, subtypes may choose to override the 
-                # method, but the optimisation would prevent the 
-                # subtype method from being called. 
-                if not self.parent_type.is_final_type: 
-                    return None 
-        return entry 
- 
-    def declare_cfunction(self, name, type, pos, 
+            name = EncodedString("__cinit__")
+        entry = ClassScope.lookup_here(self, name)
+        if entry and entry.is_builtin_cmethod:
+            if not self.parent_type.is_builtin_type:
+                # For subtypes of builtin types, we can only return
+                # optimised C methods if the type if final.
+                # Otherwise, subtypes may choose to override the
+                # method, but the optimisation would prevent the
+                # subtype method from being called.
+                if not self.parent_type.is_final_type:
+                    return None
+        return entry
+
+    def declare_cfunction(self, name, type, pos,
                           cname=None, visibility='private', api=0, in_pxd=0,
                           defining=0, modifiers=(), utility_code=None, overridable=False):
-        if get_special_method_signature(name) and not self.parent_type.is_builtin_type: 
-            error(pos, "Special methods must be declared with 'def', not 'cdef'") 
-        args = type.args 
-        if not type.is_static_method: 
-            if not args: 
-                error(pos, "C method has no self argument") 
-            elif not self.parent_type.assignable_from(args[0].type): 
-                error(pos, "Self argument (%s) of C method '%s' does not match parent type (%s)" % 
-                      (args[0].type, name, self.parent_type)) 
-        entry = self.lookup_here(name) 
-        if cname is None: 
-            cname = c_safe_identifier(name) 
-        if entry: 
-            if not entry.is_cfunction: 
-                warning(pos, "'%s' redeclared  " % name, 0) 
-            else: 
-                if defining and entry.func_cname: 
-                    error(pos, "'%s' already defined" % name) 
-                #print "CClassScope.declare_cfunction: checking signature" ### 
-                if entry.is_final_cmethod and entry.is_inherited: 
-                    error(pos, "Overriding final methods is not allowed") 
-                elif type.same_c_signature_as(entry.type, as_cmethod = 1) and type.nogil == entry.type.nogil: 
+        if get_special_method_signature(name) and not self.parent_type.is_builtin_type:
+            error(pos, "Special methods must be declared with 'def', not 'cdef'")
+        args = type.args
+        if not type.is_static_method:
+            if not args:
+                error(pos, "C method has no self argument")
+            elif not self.parent_type.assignable_from(args[0].type):
+                error(pos, "Self argument (%s) of C method '%s' does not match parent type (%s)" %
+                      (args[0].type, name, self.parent_type))
+        entry = self.lookup_here(name)
+        if cname is None:
+            cname = c_safe_identifier(name)
+        if entry:
+            if not entry.is_cfunction:
+                warning(pos, "'%s' redeclared  " % name, 0)
+            else:
+                if defining and entry.func_cname:
+                    error(pos, "'%s' already defined" % name)
+                #print "CClassScope.declare_cfunction: checking signature" ###
+                if entry.is_final_cmethod and entry.is_inherited:
+                    error(pos, "Overriding final methods is not allowed")
+                elif type.same_c_signature_as(entry.type, as_cmethod = 1) and type.nogil == entry.type.nogil:
                     # Fix with_gil vs nogil.
                     entry.type = entry.type.with_with_gil(type.with_gil)
-                elif type.compatible_signature_with(entry.type, as_cmethod = 1) and type.nogil == entry.type.nogil: 
+                elif type.compatible_signature_with(entry.type, as_cmethod = 1) and type.nogil == entry.type.nogil:
                     if (self.defined and not in_pxd
                         and not type.same_c_signature_as_resolved_type(entry.type, as_cmethod = 1, as_pxd_definition = 1)):
                         # TODO(robertwb): Make this an error.
@@ -2254,162 +2254,162 @@ class CClassScope(ClassScope):
                             "This may cause incorrect vtables to be generated." % (
                                     name, self.class_name), 2)
                         warning(entry.pos, "Previous declaration is here", 2)
-                    entry = self.add_cfunction(name, type, pos, cname, visibility='ignore', modifiers=modifiers) 
-                else: 
-                    error(pos, "Signature not compatible with previous declaration") 
-                    error(entry.pos, "Previous declaration is here") 
-        else: 
-            if self.defined: 
-                error(pos, 
-                    "C method '%s' not previously declared in definition part of" 
+                    entry = self.add_cfunction(name, type, pos, cname, visibility='ignore', modifiers=modifiers)
+                else:
+                    error(pos, "Signature not compatible with previous declaration")
+                    error(entry.pos, "Previous declaration is here")
+        else:
+            if self.defined:
+                error(pos,
+                    "C method '%s' not previously declared in definition part of"
                     " extension type '%s'" % (name, self.class_name))
             entry = self.add_cfunction(name, type, pos, cname, visibility, modifiers)
-        if defining: 
-            entry.func_cname = self.mangle(Naming.func_prefix, name) 
-        entry.utility_code = utility_code 
-        type.entry = entry 
- 
-        if u'inline' in modifiers: 
-            entry.is_inline_cmethod = True 
- 
-        if (self.parent_type.is_final_type or entry.is_inline_cmethod or 
-            self.directives.get('final')): 
-            entry.is_final_cmethod = True 
-            entry.final_func_cname = entry.func_cname 
- 
-        return entry 
- 
+        if defining:
+            entry.func_cname = self.mangle(Naming.func_prefix, name)
+        entry.utility_code = utility_code
+        type.entry = entry
+
+        if u'inline' in modifiers:
+            entry.is_inline_cmethod = True
+
+        if (self.parent_type.is_final_type or entry.is_inline_cmethod or
+            self.directives.get('final')):
+            entry.is_final_cmethod = True
+            entry.final_func_cname = entry.func_cname
+
+        return entry
+
     def add_cfunction(self, name, type, pos, cname, visibility, modifiers, inherited=False):
-        # Add a cfunction entry without giving it a func_cname. 
-        prev_entry = self.lookup_here(name) 
-        entry = ClassScope.add_cfunction(self, name, type, pos, cname, 
+        # Add a cfunction entry without giving it a func_cname.
+        prev_entry = self.lookup_here(name)
+        entry = ClassScope.add_cfunction(self, name, type, pos, cname,
                                          visibility, modifiers, inherited=inherited)
-        entry.is_cmethod = 1 
-        entry.prev_entry = prev_entry 
-        return entry 
- 
-    def declare_builtin_cfunction(self, name, type, cname, utility_code = None): 
-        # overridden methods of builtin types still have their Python 
-        # equivalent that must be accessible to support bound methods 
-        name = EncodedString(name) 
-        entry = self.declare_cfunction(name, type, None, cname, visibility='extern', 
+        entry.is_cmethod = 1
+        entry.prev_entry = prev_entry
+        return entry
+
+    def declare_builtin_cfunction(self, name, type, cname, utility_code = None):
+        # overridden methods of builtin types still have their Python
+        # equivalent that must be accessible to support bound methods
+        name = EncodedString(name)
+        entry = self.declare_cfunction(name, type, None, cname, visibility='extern',
                                        utility_code=utility_code)
-        var_entry = Entry(name, name, py_object_type) 
+        var_entry = Entry(name, name, py_object_type)
         var_entry.qualified_name = name
-        var_entry.is_variable = 1 
-        var_entry.is_builtin = 1 
-        var_entry.utility_code = utility_code 
+        var_entry.is_variable = 1
+        var_entry.is_builtin = 1
+        var_entry.utility_code = utility_code
         var_entry.scope = entry.scope
-        entry.as_variable = var_entry 
-        return entry 
- 
-    def declare_property(self, name, doc, pos): 
-        entry = self.lookup_here(name) 
-        if entry is None: 
-            entry = self.declare(name, name, py_object_type, pos, 'private') 
-        entry.is_property = 1 
-        entry.doc = doc 
-        entry.scope = PropertyScope(name, 
-            outer_scope = self.global_scope(), parent_scope = self) 
-        entry.scope.parent_type = self.parent_type 
-        self.property_entries.append(entry) 
-        return entry 
- 
-    def declare_inherited_c_attributes(self, base_scope): 
-        # Declare entries for all the C attributes of an 
-        # inherited type, with cnames modified appropriately 
-        # to work with this type. 
-        def adapt(cname): 
-            return "%s.%s" % (Naming.obj_base_cname, base_entry.cname) 
- 
-        entries = base_scope.inherited_var_entries + base_scope.var_entries 
-        for base_entry in entries: 
-            entry = self.declare( 
-                base_entry.name, adapt(base_entry.cname), 
-                base_entry.type, None, 'private') 
-            entry.is_variable = 1 
-            self.inherited_var_entries.append(entry) 
- 
-        # If the class defined in a pxd, specific entries have not been added. 
-        # Ensure now that the parent (base) scope has specific entries 
-        # Iterate over a copy as get_all_specialized_function_types() will mutate 
-        for base_entry in base_scope.cfunc_entries[:]: 
-            if base_entry.type.is_fused: 
-                base_entry.type.get_all_specialized_function_types() 
- 
-        for base_entry in base_scope.cfunc_entries: 
-            cname = base_entry.cname 
-            var_entry = base_entry.as_variable 
-            is_builtin = var_entry and var_entry.is_builtin 
-            if not is_builtin: 
-                cname = adapt(cname) 
-            entry = self.add_cfunction(base_entry.name, base_entry.type, 
-                                       base_entry.pos, cname, 
+        entry.as_variable = var_entry
+        return entry
+
+    def declare_property(self, name, doc, pos):
+        entry = self.lookup_here(name)
+        if entry is None:
+            entry = self.declare(name, name, py_object_type, pos, 'private')
+        entry.is_property = 1
+        entry.doc = doc
+        entry.scope = PropertyScope(name,
+            outer_scope = self.global_scope(), parent_scope = self)
+        entry.scope.parent_type = self.parent_type
+        self.property_entries.append(entry)
+        return entry
+
+    def declare_inherited_c_attributes(self, base_scope):
+        # Declare entries for all the C attributes of an
+        # inherited type, with cnames modified appropriately
+        # to work with this type.
+        def adapt(cname):
+            return "%s.%s" % (Naming.obj_base_cname, base_entry.cname)
+
+        entries = base_scope.inherited_var_entries + base_scope.var_entries
+        for base_entry in entries:
+            entry = self.declare(
+                base_entry.name, adapt(base_entry.cname),
+                base_entry.type, None, 'private')
+            entry.is_variable = 1
+            self.inherited_var_entries.append(entry)
+
+        # If the class defined in a pxd, specific entries have not been added.
+        # Ensure now that the parent (base) scope has specific entries
+        # Iterate over a copy as get_all_specialized_function_types() will mutate
+        for base_entry in base_scope.cfunc_entries[:]:
+            if base_entry.type.is_fused:
+                base_entry.type.get_all_specialized_function_types()
+
+        for base_entry in base_scope.cfunc_entries:
+            cname = base_entry.cname
+            var_entry = base_entry.as_variable
+            is_builtin = var_entry and var_entry.is_builtin
+            if not is_builtin:
+                cname = adapt(cname)
+            entry = self.add_cfunction(base_entry.name, base_entry.type,
+                                       base_entry.pos, cname,
                                        base_entry.visibility, base_entry.func_modifiers, inherited=True)
-            entry.is_inherited = 1 
-            if base_entry.is_final_cmethod: 
-                entry.is_final_cmethod = True 
-                entry.is_inline_cmethod = base_entry.is_inline_cmethod 
-                if (self.parent_scope == base_scope.parent_scope or 
-                        entry.is_inline_cmethod): 
-                    entry.final_func_cname = base_entry.final_func_cname 
-            if is_builtin: 
-                entry.is_builtin_cmethod = True 
-                entry.as_variable = var_entry 
-            if base_entry.utility_code: 
-                entry.utility_code = base_entry.utility_code 
- 
- 
-class CppClassScope(Scope): 
-    #  Namespace of a C++ class. 
- 
-    is_cpp_class_scope = 1 
- 
-    default_constructor = None 
-    type = None 
- 
-    def __init__(self, name, outer_scope, templates=None): 
-        Scope.__init__(self, name, outer_scope, None) 
-        self.directives = outer_scope.directives 
-        self.inherited_var_entries = [] 
-        if templates is not None: 
-            for T in templates: 
-                template_entry = self.declare( 
-                    T, T, PyrexTypes.TemplatePlaceholderType(T), None, 'extern') 
-                template_entry.is_type = 1 
- 
-    def declare_var(self, name, type, pos, 
-                    cname = None, visibility = 'extern', 
+            entry.is_inherited = 1
+            if base_entry.is_final_cmethod:
+                entry.is_final_cmethod = True
+                entry.is_inline_cmethod = base_entry.is_inline_cmethod
+                if (self.parent_scope == base_scope.parent_scope or
+                        entry.is_inline_cmethod):
+                    entry.final_func_cname = base_entry.final_func_cname
+            if is_builtin:
+                entry.is_builtin_cmethod = True
+                entry.as_variable = var_entry
+            if base_entry.utility_code:
+                entry.utility_code = base_entry.utility_code
+
+
+class CppClassScope(Scope):
+    #  Namespace of a C++ class.
+
+    is_cpp_class_scope = 1
+
+    default_constructor = None
+    type = None
+
+    def __init__(self, name, outer_scope, templates=None):
+        Scope.__init__(self, name, outer_scope, None)
+        self.directives = outer_scope.directives
+        self.inherited_var_entries = []
+        if templates is not None:
+            for T in templates:
+                template_entry = self.declare(
+                    T, T, PyrexTypes.TemplatePlaceholderType(T), None, 'extern')
+                template_entry.is_type = 1
+
+    def declare_var(self, name, type, pos,
+                    cname = None, visibility = 'extern',
                     api = 0, in_pxd = 0, is_cdef = 0, defining = 0):
-        # Add an entry for an attribute. 
-        if not cname: 
-            cname = name 
-        entry = self.lookup_here(name) 
-        if defining and entry is not None: 
+        # Add an entry for an attribute.
+        if not cname:
+            cname = name
+        entry = self.lookup_here(name)
+        if defining and entry is not None:
             if entry.type.same_as(type):
                 # Fix with_gil vs nogil.
                 entry.type = entry.type.with_with_gil(type.with_gil)
             elif type.is_cfunction and type.compatible_signature_with(entry.type):
                 entry.type = type
             else:
-                error(pos, "Function signature does not match previous declaration") 
-        else: 
-            entry = self.declare(name, cname, type, pos, visibility) 
-        entry.is_variable = 1 
-        if type.is_cfunction and self.type: 
+                error(pos, "Function signature does not match previous declaration")
+        else:
+            entry = self.declare(name, cname, type, pos, visibility)
+        entry.is_variable = 1
+        if type.is_cfunction and self.type:
             if not self.type.get_fused_types():
                 entry.func_cname = "%s::%s" % (self.type.empty_declaration_code(), cname)
-        if name != "this" and (defining or name != "<init>"): 
-            self.var_entries.append(entry) 
-        return entry 
- 
-    def declare_cfunction(self, name, type, pos, 
+        if name != "this" and (defining or name != "<init>"):
+            self.var_entries.append(entry)
+        return entry
+
+    def declare_cfunction(self, name, type, pos,
                           cname=None, visibility='extern', api=0, in_pxd=0,
                           defining=0, modifiers=(), utility_code=None, overridable=False):
         class_name = self.name.split('::')[-1]
         if name in (class_name, '__init__') and cname is None:
             cname = "%s__init__%s" % (Naming.func_prefix, class_name)
-            name = '<init>' 
+            name = '<init>'
             type.return_type = PyrexTypes.CVoidType()
             # This is called by the actual constructor, but need to support
             # arguments that cannot by called by value.
@@ -2421,9 +2421,9 @@ class CppClassScope(Scope):
                 else:
                     return arg
             type.args = [maybe_ref(arg) for arg in type.args]
-        elif name == '__dealloc__' and cname is None: 
+        elif name == '__dealloc__' and cname is None:
             cname = "%s__dealloc__%s" % (Naming.func_prefix, class_name)
-            name = '<del>' 
+            name = '<del>'
             type.return_type = PyrexTypes.CVoidType()
         if name in ('<init>', '<del>') and type.nogil:
             for base in self.type.base_classes:
@@ -2431,16 +2431,16 @@ class CppClassScope(Scope):
                 if base_entry and not base_entry.type.nogil:
                     error(pos, "Constructor cannot be called without GIL unless all base constructors can also be called without GIL")
                     error(base_entry.pos, "Base constructor defined here.")
-        prev_entry = self.lookup_here(name) 
-        entry = self.declare_var(name, type, pos, 
-                                 defining=defining, 
-                                 cname=cname, visibility=visibility) 
-        if prev_entry and not defining: 
-            entry.overloaded_alternatives = prev_entry.all_alternatives() 
-        entry.utility_code = utility_code 
-        type.entry = entry 
-        return entry 
- 
+        prev_entry = self.lookup_here(name)
+        entry = self.declare_var(name, type, pos,
+                                 defining=defining,
+                                 cname=cname, visibility=visibility)
+        if prev_entry and not defining:
+            entry.overloaded_alternatives = prev_entry.all_alternatives()
+        entry.utility_code = utility_code
+        type.entry = entry
+        return entry
+
     def declare_inherited_cpp_attributes(self, base_class):
         base_scope = base_class.scope
         template_type = base_class
@@ -2450,103 +2450,103 @@ class CppClassScope(Scope):
             base_templates = [T.name for T in template_type.templates]
         else:
             base_templates = ()
-        # Declare entries for all the C++ attributes of an 
-        # inherited type, with cnames modified appropriately 
-        # to work with this type. 
-        for base_entry in \ 
-            base_scope.inherited_var_entries + base_scope.var_entries: 
+        # Declare entries for all the C++ attributes of an
+        # inherited type, with cnames modified appropriately
+        # to work with this type.
+        for base_entry in \
+            base_scope.inherited_var_entries + base_scope.var_entries:
                 #constructor/destructor is not inherited
                 if base_entry.name in ("<init>", "<del>"):
-                    continue 
-                #print base_entry.name, self.entries 
-                if base_entry.name in self.entries: 
-                    base_entry.name    # FIXME: is there anything to do in this case? 
-                entry = self.declare(base_entry.name, base_entry.cname, 
-                    base_entry.type, None, 'extern') 
-                entry.is_variable = 1 
+                    continue
+                #print base_entry.name, self.entries
+                if base_entry.name in self.entries:
+                    base_entry.name    # FIXME: is there anything to do in this case?
+                entry = self.declare(base_entry.name, base_entry.cname,
+                    base_entry.type, None, 'extern')
+                entry.is_variable = 1
                 entry.is_inherited = 1
-                self.inherited_var_entries.append(entry) 
-        for base_entry in base_scope.cfunc_entries: 
-            entry = self.declare_cfunction(base_entry.name, base_entry.type, 
-                                           base_entry.pos, base_entry.cname, 
+                self.inherited_var_entries.append(entry)
+        for base_entry in base_scope.cfunc_entries:
+            entry = self.declare_cfunction(base_entry.name, base_entry.type,
+                                           base_entry.pos, base_entry.cname,
                                            base_entry.visibility, api=0,
                                            modifiers=base_entry.func_modifiers,
                                            utility_code=base_entry.utility_code)
-            entry.is_inherited = 1 
+            entry.is_inherited = 1
         for base_entry in base_scope.type_entries:
             if base_entry.name not in base_templates:
                 entry = self.declare_type(base_entry.name, base_entry.type,
                                           base_entry.pos, base_entry.cname,
                                           base_entry.visibility)
                 entry.is_inherited = 1
- 
-    def specialize(self, values, type_entry): 
-        scope = CppClassScope(self.name, self.outer_scope) 
-        scope.type = type_entry 
-        for entry in self.entries.values(): 
-            if entry.is_type: 
-                scope.declare_type(entry.name, 
-                                   entry.type.specialize(values), 
-                                   entry.pos, 
-                                   entry.cname, 
-                                   template=1) 
-            elif entry.type.is_cfunction: 
-                for e in entry.all_alternatives(): 
-                    scope.declare_cfunction(e.name, 
-                                            e.type.specialize(values), 
-                                            e.pos, 
-                                            e.cname, 
+
+    def specialize(self, values, type_entry):
+        scope = CppClassScope(self.name, self.outer_scope)
+        scope.type = type_entry
+        for entry in self.entries.values():
+            if entry.is_type:
+                scope.declare_type(entry.name,
+                                   entry.type.specialize(values),
+                                   entry.pos,
+                                   entry.cname,
+                                   template=1)
+            elif entry.type.is_cfunction:
+                for e in entry.all_alternatives():
+                    scope.declare_cfunction(e.name,
+                                            e.type.specialize(values),
+                                            e.pos,
+                                            e.cname,
                                             utility_code=e.utility_code)
-            else: 
-                scope.declare_var(entry.name, 
-                                  entry.type.specialize(values), 
-                                  entry.pos, 
-                                  entry.cname, 
-                                  entry.visibility) 
- 
-        return scope 
- 
- 
-class PropertyScope(Scope): 
-    #  Scope holding the __get__, __set__ and __del__ methods for 
-    #  a property of an extension type. 
-    # 
-    #  parent_type   PyExtensionType   The type to which the property belongs 
- 
-    is_property_scope = 1 
- 
-    def declare_pyfunction(self, name, pos, allow_redefine=False): 
-        # Add an entry for a method. 
-        signature = get_property_accessor_signature(name) 
-        if signature: 
-            entry = self.declare(name, name, py_object_type, pos, 'private') 
-            entry.is_special = 1 
-            entry.signature = signature 
-            return entry 
-        else: 
-            error(pos, "Only __get__, __set__ and __del__ methods allowed " 
-                "in a property declaration") 
-            return None 
- 
- 
-class CConstScope(Scope): 
- 
-    def __init__(self, const_base_type_scope): 
-        Scope.__init__( 
-            self, 
-            'const_' + const_base_type_scope.name, 
-            const_base_type_scope.outer_scope, 
-            const_base_type_scope.parent_scope) 
-        self.const_base_type_scope = const_base_type_scope 
- 
-    def lookup_here(self, name): 
-        entry = self.const_base_type_scope.lookup_here(name) 
-        if entry is not None: 
-            entry = copy.copy(entry) 
-            entry.type = PyrexTypes.c_const_type(entry.type) 
-            return entry 
- 
-class TemplateScope(Scope): 
-    def __init__(self, name, outer_scope): 
-        Scope.__init__(self, name, outer_scope, None) 
-        self.directives = outer_scope.directives 
+            else:
+                scope.declare_var(entry.name,
+                                  entry.type.specialize(values),
+                                  entry.pos,
+                                  entry.cname,
+                                  entry.visibility)
+
+        return scope
+
+
+class PropertyScope(Scope):
+    #  Scope holding the __get__, __set__ and __del__ methods for
+    #  a property of an extension type.
+    #
+    #  parent_type   PyExtensionType   The type to which the property belongs
+
+    is_property_scope = 1
+
+    def declare_pyfunction(self, name, pos, allow_redefine=False):
+        # Add an entry for a method.
+        signature = get_property_accessor_signature(name)
+        if signature:
+            entry = self.declare(name, name, py_object_type, pos, 'private')
+            entry.is_special = 1
+            entry.signature = signature
+            return entry
+        else:
+            error(pos, "Only __get__, __set__ and __del__ methods allowed "
+                "in a property declaration")
+            return None
+
+
+class CConstScope(Scope):
+
+    def __init__(self, const_base_type_scope):
+        Scope.__init__(
+            self,
+            'const_' + const_base_type_scope.name,
+            const_base_type_scope.outer_scope,
+            const_base_type_scope.parent_scope)
+        self.const_base_type_scope = const_base_type_scope
+
+    def lookup_here(self, name):
+        entry = self.const_base_type_scope.lookup_here(name)
+        if entry is not None:
+            entry = copy.copy(entry)
+            entry.type = PyrexTypes.c_const_type(entry.type)
+            return entry
+
+class TemplateScope(Scope):
+    def __init__(self, name, outer_scope):
+        Scope.__init__(self, name, outer_scope, None)
+        self.directives = outer_scope.directives
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestBuffer.py b/contrib/tools/cython/Cython/Compiler/Tests/TestBuffer.py
index 571191b5ea..1f69d96524 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestBuffer.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestBuffer.py
@@ -1,105 +1,105 @@
-from Cython.TestUtils import CythonTest 
-import Cython.Compiler.Errors as Errors 
-from Cython.Compiler.Nodes import * 
-from Cython.Compiler.ParseTreeTransforms import * 
-from Cython.Compiler.Buffer import * 
- 
- 
-class TestBufferParsing(CythonTest): 
-    # First, we only test the raw parser, i.e. 
-    # the number and contents of arguments are NOT checked. 
-    # However "dtype"/the first positional argument is special-cased 
-    #  to parse a type argument rather than an expression 
- 
-    def parse(self, s): 
-        return self.should_not_fail(lambda: self.fragment(s)).root 
- 
-    def not_parseable(self, expected_error, s): 
-        e = self.should_fail(lambda: self.fragment(s),  Errors.CompileError) 
-        self.assertEqual(expected_error, e.message_only) 
- 
-    def test_basic(self): 
-        t = self.parse(u"cdef object[float, 4, ndim=2, foo=foo] x") 
-        bufnode = t.stats[0].base_type 
+from Cython.TestUtils import CythonTest
+import Cython.Compiler.Errors as Errors
+from Cython.Compiler.Nodes import *
+from Cython.Compiler.ParseTreeTransforms import *
+from Cython.Compiler.Buffer import *
+
+
+class TestBufferParsing(CythonTest):
+    # First, we only test the raw parser, i.e.
+    # the number and contents of arguments are NOT checked.
+    # However "dtype"/the first positional argument is special-cased
+    #  to parse a type argument rather than an expression
+
+    def parse(self, s):
+        return self.should_not_fail(lambda: self.fragment(s)).root
+
+    def not_parseable(self, expected_error, s):
+        e = self.should_fail(lambda: self.fragment(s),  Errors.CompileError)
+        self.assertEqual(expected_error, e.message_only)
+
+    def test_basic(self):
+        t = self.parse(u"cdef object[float, 4, ndim=2, foo=foo] x")
+        bufnode = t.stats[0].base_type
         self.assertTrue(isinstance(bufnode, TemplatedTypeNode))
-        self.assertEqual(2, len(bufnode.positional_args)) 
-#        print bufnode.dump() 
-        # should put more here... 
- 
-    def test_type_pos(self): 
-        self.parse(u"cdef object[short unsigned int, 3] x") 
- 
-    def test_type_keyword(self): 
-        self.parse(u"cdef object[foo=foo, dtype=short unsigned int] x") 
- 
-    def test_pos_after_key(self): 
-        self.not_parseable("Non-keyword arg following keyword arg", 
-                           u"cdef object[foo=1, 2] x") 
- 
- 
-# See also tests/error/e_bufaccess.pyx and tets/run/bufaccess.pyx 
-# THESE TESTS ARE NOW DISABLED, the code they test was pretty much 
-# refactored away 
-class TestBufferOptions(CythonTest): 
-    # Tests the full parsing of the options within the brackets 
- 
-    def nonfatal_error(self, error): 
-        # We're passing self as context to transform to trap this 
-        self.error = error 
+        self.assertEqual(2, len(bufnode.positional_args))
+#        print bufnode.dump()
+        # should put more here...
+
+    def test_type_pos(self):
+        self.parse(u"cdef object[short unsigned int, 3] x")
+
+    def test_type_keyword(self):
+        self.parse(u"cdef object[foo=foo, dtype=short unsigned int] x")
+
+    def test_pos_after_key(self):
+        self.not_parseable("Non-keyword arg following keyword arg",
+                           u"cdef object[foo=1, 2] x")
+
+
+# See also tests/error/e_bufaccess.pyx and tets/run/bufaccess.pyx
+# THESE TESTS ARE NOW DISABLED, the code they test was pretty much
+# refactored away
+class TestBufferOptions(CythonTest):
+    # Tests the full parsing of the options within the brackets
+
+    def nonfatal_error(self, error):
+        # We're passing self as context to transform to trap this
+        self.error = error
         self.assertTrue(self.expect_error)
- 
-    def parse_opts(self, opts, expect_error=False): 
-        assert opts != "" 
-        s = u"def f():\n  cdef object[%s] x" % opts 
-        self.expect_error = expect_error 
-        root = self.fragment(s, pipeline=[NormalizeTree(self), PostParse(self)]).root 
-        if not expect_error: 
-            vardef = root.stats[0].body.stats[0] 
-            assert isinstance(vardef, CVarDefNode) # use normal assert as this is to validate the test code 
-            buftype = vardef.base_type 
+
+    def parse_opts(self, opts, expect_error=False):
+        assert opts != ""
+        s = u"def f():\n  cdef object[%s] x" % opts
+        self.expect_error = expect_error
+        root = self.fragment(s, pipeline=[NormalizeTree(self), PostParse(self)]).root
+        if not expect_error:
+            vardef = root.stats[0].body.stats[0]
+            assert isinstance(vardef, CVarDefNode) # use normal assert as this is to validate the test code
+            buftype = vardef.base_type
             self.assertTrue(isinstance(buftype, TemplatedTypeNode))
             self.assertTrue(isinstance(buftype.base_type_node, CSimpleBaseTypeNode))
-            self.assertEqual(u"object", buftype.base_type_node.name) 
-            return buftype 
-        else: 
+            self.assertEqual(u"object", buftype.base_type_node.name)
+            return buftype
+        else:
             self.assertTrue(len(root.stats[0].body.stats) == 0)
- 
-    def non_parse(self, expected_err, opts): 
-        self.parse_opts(opts, expect_error=True) 
-#        e = self.should_fail(lambda: self.parse_opts(opts)) 
-        self.assertEqual(expected_err, self.error.message_only) 
- 
-    def __test_basic(self): 
-        buf = self.parse_opts(u"unsigned short int, 3") 
+
+    def non_parse(self, expected_err, opts):
+        self.parse_opts(opts, expect_error=True)
+#        e = self.should_fail(lambda: self.parse_opts(opts))
+        self.assertEqual(expected_err, self.error.message_only)
+
+    def __test_basic(self):
+        buf = self.parse_opts(u"unsigned short int, 3")
         self.assertTrue(isinstance(buf.dtype_node, CSimpleBaseTypeNode))
         self.assertTrue(buf.dtype_node.signed == 0 and buf.dtype_node.longness == -1)
-        self.assertEqual(3, buf.ndim) 
- 
-    def __test_dict(self): 
-        buf = self.parse_opts(u"ndim=3, dtype=unsigned short int") 
+        self.assertEqual(3, buf.ndim)
+
+    def __test_dict(self):
+        buf = self.parse_opts(u"ndim=3, dtype=unsigned short int")
         self.assertTrue(isinstance(buf.dtype_node, CSimpleBaseTypeNode))
         self.assertTrue(buf.dtype_node.signed == 0 and buf.dtype_node.longness == -1)
-        self.assertEqual(3, buf.ndim) 
- 
-    def __test_ndim(self): 
-        self.parse_opts(u"int, 2") 
-        self.non_parse(ERR_BUF_NDIM, u"int, 'a'") 
-        self.non_parse(ERR_BUF_NDIM, u"int, -34") 
- 
-    def __test_use_DEF(self): 
-        t = self.fragment(u""" 
-        DEF ndim = 3 
-        def f(): 
-            cdef object[int, ndim] x 
-            cdef object[ndim=ndim, dtype=int] y 
-        """, pipeline=[NormalizeTree(self), PostParse(self)]).root 
-        stats = t.stats[0].body.stats 
+        self.assertEqual(3, buf.ndim)
+
+    def __test_ndim(self):
+        self.parse_opts(u"int, 2")
+        self.non_parse(ERR_BUF_NDIM, u"int, 'a'")
+        self.non_parse(ERR_BUF_NDIM, u"int, -34")
+
+    def __test_use_DEF(self):
+        t = self.fragment(u"""
+        DEF ndim = 3
+        def f():
+            cdef object[int, ndim] x
+            cdef object[ndim=ndim, dtype=int] y
+        """, pipeline=[NormalizeTree(self), PostParse(self)]).root
+        stats = t.stats[0].body.stats
         self.assertTrue(stats[0].base_type.ndim == 3)
         self.assertTrue(stats[1].base_type.ndim == 3)
- 
-    # add exotic and impossible combinations as they come along... 
- 
-if __name__ == '__main__': 
-    import unittest 
-    unittest.main() 
- 
+
+    # add exotic and impossible combinations as they come along...
+
+if __name__ == '__main__':
+    import unittest
+    unittest.main()
+
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestMemView.py b/contrib/tools/cython/Cython/Compiler/Tests/TestMemView.py
index 15785f418d..3792f26e99 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestMemView.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestMemView.py
@@ -1,71 +1,71 @@
-from Cython.TestUtils import CythonTest 
-import Cython.Compiler.Errors as Errors 
-from Cython.Compiler.Nodes import * 
-from Cython.Compiler.ParseTreeTransforms import * 
-from Cython.Compiler.Buffer import * 
- 
- 
-class TestMemviewParsing(CythonTest): 
- 
-    def parse(self, s): 
-        return self.should_not_fail(lambda: self.fragment(s)).root 
- 
-    def not_parseable(self, expected_error, s): 
-        e = self.should_fail(lambda: self.fragment(s),  Errors.CompileError) 
-        self.assertEqual(expected_error, e.message_only) 
-
-    def test_default_1dim(self): 
-        self.parse(u"cdef int[:] x") 
-        self.parse(u"cdef short int[:] x") 
- 
-    def test_default_ndim(self): 
-        self.parse(u"cdef int[:,:,:,:,:] x") 
-        self.parse(u"cdef unsigned long int[:,:,:,:,:] x") 
-        self.parse(u"cdef unsigned int[:,:,:,:,:] x") 
- 
-    def test_zero_offset(self): 
-        self.parse(u"cdef long double[0:] x") 
-        self.parse(u"cdef int[0:] x") 
- 
-    def test_zero_offset_ndim(self): 
-        self.parse(u"cdef int[0:,0:,0:,0:] x") 
- 
-    def test_def_arg(self): 
-        self.parse(u"def foo(int[:,:] x): pass") 
- 
-    def test_cdef_arg(self): 
-        self.parse(u"cdef foo(int[:,:] x): pass") 
- 
-    def test_general_slice(self): 
-        self.parse(u'cdef float[::ptr, ::direct & contig, 0::full & strided] x') 
- 
-    def test_non_slice_memview(self): 
-        self.not_parseable(u"An axis specification in memoryview declaration does not have a ':'.", 
-                u"cdef double[:foo, bar] x") 
-        self.not_parseable(u"An axis specification in memoryview declaration does not have a ':'.", 
-                u"cdef double[0:foo, bar] x") 
- 
-    def test_basic(self): 
-        t = self.parse(u"cdef int[:] x") 
-        memv_node = t.stats[0].base_type 
+from Cython.TestUtils import CythonTest
+import Cython.Compiler.Errors as Errors
+from Cython.Compiler.Nodes import *
+from Cython.Compiler.ParseTreeTransforms import *
+from Cython.Compiler.Buffer import *
+
+
+class TestMemviewParsing(CythonTest):
+
+    def parse(self, s):
+        return self.should_not_fail(lambda: self.fragment(s)).root
+
+    def not_parseable(self, expected_error, s):
+        e = self.should_fail(lambda: self.fragment(s),  Errors.CompileError)
+        self.assertEqual(expected_error, e.message_only)
+
+    def test_default_1dim(self):
+        self.parse(u"cdef int[:] x")
+        self.parse(u"cdef short int[:] x")
+
+    def test_default_ndim(self):
+        self.parse(u"cdef int[:,:,:,:,:] x")
+        self.parse(u"cdef unsigned long int[:,:,:,:,:] x")
+        self.parse(u"cdef unsigned int[:,:,:,:,:] x")
+
+    def test_zero_offset(self):
+        self.parse(u"cdef long double[0:] x")
+        self.parse(u"cdef int[0:] x")
+
+    def test_zero_offset_ndim(self):
+        self.parse(u"cdef int[0:,0:,0:,0:] x")
+
+    def test_def_arg(self):
+        self.parse(u"def foo(int[:,:] x): pass")
+
+    def test_cdef_arg(self):
+        self.parse(u"cdef foo(int[:,:] x): pass")
+
+    def test_general_slice(self):
+        self.parse(u'cdef float[::ptr, ::direct & contig, 0::full & strided] x')
+
+    def test_non_slice_memview(self):
+        self.not_parseable(u"An axis specification in memoryview declaration does not have a ':'.",
+                u"cdef double[:foo, bar] x")
+        self.not_parseable(u"An axis specification in memoryview declaration does not have a ':'.",
+                u"cdef double[0:foo, bar] x")
+
+    def test_basic(self):
+        t = self.parse(u"cdef int[:] x")
+        memv_node = t.stats[0].base_type
         self.assertTrue(isinstance(memv_node, MemoryViewSliceTypeNode))
- 
-    # we also test other similar declarations (buffers, anonymous C arrays) 
-    # since the parsing has to distinguish between them. 
- 
-    def disable_test_no_buf_arg(self): # TODO 
-        self.not_parseable(u"Expected ']'", 
-                u"cdef extern foo(object[int, ndim=2])") 
- 
-    def disable_test_parse_sizeof(self): # TODO 
-        self.parse(u"sizeof(int[NN])") 
-        self.parse(u"sizeof(int[])") 
-        self.parse(u"sizeof(int[][NN])") 
-        self.not_parseable(u"Expected an identifier or literal", 
-                u"sizeof(int[:NN])") 
-        self.not_parseable(u"Expected ']'", 
-                u"sizeof(foo[dtype=bar]") 
- 
-if __name__ == '__main__': 
-    import unittest 
-    unittest.main() 
+
+    # we also test other similar declarations (buffers, anonymous C arrays)
+    # since the parsing has to distinguish between them.
+
+    def disable_test_no_buf_arg(self): # TODO
+        self.not_parseable(u"Expected ']'",
+                u"cdef extern foo(object[int, ndim=2])")
+
+    def disable_test_parse_sizeof(self): # TODO
+        self.parse(u"sizeof(int[NN])")
+        self.parse(u"sizeof(int[])")
+        self.parse(u"sizeof(int[][NN])")
+        self.not_parseable(u"Expected an identifier or literal",
+                u"sizeof(int[:NN])")
+        self.not_parseable(u"Expected ']'",
+                u"sizeof(foo[dtype=bar]")
+
+if __name__ == '__main__':
+    import unittest
+    unittest.main()
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestParseTreeTransforms.py b/contrib/tools/cython/Cython/Compiler/Tests/TestParseTreeTransforms.py
index 1249c8db07..234b45db5b 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestParseTreeTransforms.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestParseTreeTransforms.py
@@ -1,282 +1,282 @@
-import os 
- 
-from Cython.TestUtils import TransformTest 
-from Cython.Compiler.ParseTreeTransforms import * 
-from Cython.Compiler.Nodes import * 
-from Cython.Compiler import Main, Symtab 
- 
- 
-class TestNormalizeTree(TransformTest): 
-    def test_parserbehaviour_is_what_we_coded_for(self): 
-        t = self.fragment(u"if x: y").root 
-        self.assertLines(u""" 
-(root): StatListNode 
-  stats[0]: IfStatNode 
-    if_clauses[0]: IfClauseNode 
-      condition: NameNode 
-      body: ExprStatNode 
-        expr: NameNode 
-""", self.treetypes(t)) 
- 
-    def test_wrap_singlestat(self): 
-        t = self.run_pipeline([NormalizeTree(None)], u"if x: y") 
-        self.assertLines(u""" 
-(root): StatListNode 
-  stats[0]: IfStatNode 
-    if_clauses[0]: IfClauseNode 
-      condition: NameNode 
-      body: StatListNode 
-        stats[0]: ExprStatNode 
-          expr: NameNode 
-""", self.treetypes(t)) 
- 
-    def test_wrap_multistat(self): 
-        t = self.run_pipeline([NormalizeTree(None)], u""" 
-            if z: 
-                x 
-                y 
-        """) 
-        self.assertLines(u""" 
-(root): StatListNode 
-  stats[0]: IfStatNode 
-    if_clauses[0]: IfClauseNode 
-      condition: NameNode 
-      body: StatListNode 
-        stats[0]: ExprStatNode 
-          expr: NameNode 
-        stats[1]: ExprStatNode 
-          expr: NameNode 
-""", self.treetypes(t)) 
- 
-    def test_statinexpr(self): 
-        t = self.run_pipeline([NormalizeTree(None)], u""" 
-            a, b = x, y 
-        """) 
-        self.assertLines(u""" 
-(root): StatListNode 
-  stats[0]: SingleAssignmentNode 
-    lhs: TupleNode 
-      args[0]: NameNode 
-      args[1]: NameNode 
-    rhs: TupleNode 
-      args[0]: NameNode 
-      args[1]: NameNode 
-""", self.treetypes(t)) 
- 
-    def test_wrap_offagain(self): 
-        t = self.run_pipeline([NormalizeTree(None)], u""" 
-            x 
-            y 
-            if z: 
-                x 
-        """) 
-        self.assertLines(u""" 
-(root): StatListNode 
-  stats[0]: ExprStatNode 
-    expr: NameNode 
-  stats[1]: ExprStatNode 
-    expr: NameNode 
-  stats[2]: IfStatNode 
-    if_clauses[0]: IfClauseNode 
-      condition: NameNode 
-      body: StatListNode 
-        stats[0]: ExprStatNode 
-          expr: NameNode 
-""", self.treetypes(t)) 
- 
- 
-    def test_pass_eliminated(self): 
-        t = self.run_pipeline([NormalizeTree(None)], u"pass") 
+import os
+
+from Cython.TestUtils import TransformTest
+from Cython.Compiler.ParseTreeTransforms import *
+from Cython.Compiler.Nodes import *
+from Cython.Compiler import Main, Symtab
+
+
+class TestNormalizeTree(TransformTest):
+    def test_parserbehaviour_is_what_we_coded_for(self):
+        t = self.fragment(u"if x: y").root
+        self.assertLines(u"""
+(root): StatListNode
+  stats[0]: IfStatNode
+    if_clauses[0]: IfClauseNode
+      condition: NameNode
+      body: ExprStatNode
+        expr: NameNode
+""", self.treetypes(t))
+
+    def test_wrap_singlestat(self):
+        t = self.run_pipeline([NormalizeTree(None)], u"if x: y")
+        self.assertLines(u"""
+(root): StatListNode
+  stats[0]: IfStatNode
+    if_clauses[0]: IfClauseNode
+      condition: NameNode
+      body: StatListNode
+        stats[0]: ExprStatNode
+          expr: NameNode
+""", self.treetypes(t))
+
+    def test_wrap_multistat(self):
+        t = self.run_pipeline([NormalizeTree(None)], u"""
+            if z:
+                x
+                y
+        """)
+        self.assertLines(u"""
+(root): StatListNode
+  stats[0]: IfStatNode
+    if_clauses[0]: IfClauseNode
+      condition: NameNode
+      body: StatListNode
+        stats[0]: ExprStatNode
+          expr: NameNode
+        stats[1]: ExprStatNode
+          expr: NameNode
+""", self.treetypes(t))
+
+    def test_statinexpr(self):
+        t = self.run_pipeline([NormalizeTree(None)], u"""
+            a, b = x, y
+        """)
+        self.assertLines(u"""
+(root): StatListNode
+  stats[0]: SingleAssignmentNode
+    lhs: TupleNode
+      args[0]: NameNode
+      args[1]: NameNode
+    rhs: TupleNode
+      args[0]: NameNode
+      args[1]: NameNode
+""", self.treetypes(t))
+
+    def test_wrap_offagain(self):
+        t = self.run_pipeline([NormalizeTree(None)], u"""
+            x
+            y
+            if z:
+                x
+        """)
+        self.assertLines(u"""
+(root): StatListNode
+  stats[0]: ExprStatNode
+    expr: NameNode
+  stats[1]: ExprStatNode
+    expr: NameNode
+  stats[2]: IfStatNode
+    if_clauses[0]: IfClauseNode
+      condition: NameNode
+      body: StatListNode
+        stats[0]: ExprStatNode
+          expr: NameNode
+""", self.treetypes(t))
+
+
+    def test_pass_eliminated(self):
+        t = self.run_pipeline([NormalizeTree(None)], u"pass")
         self.assertTrue(len(t.stats) == 0)
- 
-class TestWithTransform(object): # (TransformTest): # Disabled! 
- 
-    def test_simplified(self): 
-        t = self.run_pipeline([WithTransform(None)], u""" 
-        with x: 
-            y = z ** 3 
-        """) 
- 
-        self.assertCode(u""" 
- 
-        $0_0 = x 
-        $0_2 = $0_0.__exit__ 
-        $0_0.__enter__() 
-        $0_1 = True 
-        try: 
-            try: 
-                $1_0 = None 
-                y = z ** 3 
-            except: 
-                $0_1 = False 
-                if (not $0_2($1_0)): 
-                    raise 
-        finally: 
-            if $0_1: 
-                $0_2(None, None, None) 
- 
-        """, t) 
- 
-    def test_basic(self): 
-        t = self.run_pipeline([WithTransform(None)], u""" 
-        with x as y: 
-            y = z ** 3 
-        """) 
-        self.assertCode(u""" 
- 
-        $0_0 = x 
-        $0_2 = $0_0.__exit__ 
-        $0_3 = $0_0.__enter__() 
-        $0_1 = True 
-        try: 
-            try: 
-                $1_0 = None 
-                y = $0_3 
-                y = z ** 3 
-            except: 
-                $0_1 = False 
-                if (not $0_2($1_0)): 
-                    raise 
-        finally: 
-            if $0_1: 
-                $0_2(None, None, None) 
- 
-        """, t) 
- 
- 
-class TestInterpretCompilerDirectives(TransformTest): 
-    """ 
-    This class tests the parallel directives AST-rewriting and importing. 
-    """ 
- 
-    # Test the parallel directives (c)importing 
- 
-    import_code = u""" 
-        cimport cython.parallel 
-        cimport cython.parallel as par 
-        from cython cimport parallel as par2 
-        from cython cimport parallel 
- 
-        from cython.parallel cimport threadid as tid 
-        from cython.parallel cimport threadavailable as tavail 
-        from cython.parallel cimport prange 
-    """ 
- 
-    expected_directives_dict = { 
-        u'cython.parallel': u'cython.parallel', 
-        u'par': u'cython.parallel', 
-        u'par2': u'cython.parallel', 
-        u'parallel': u'cython.parallel', 
- 
-        u"tid": u"cython.parallel.threadid", 
-        u"tavail": u"cython.parallel.threadavailable", 
-        u"prange": u"cython.parallel.prange", 
-    } 
- 
- 
-    def setUp(self): 
-        super(TestInterpretCompilerDirectives, self).setUp() 
- 
-        compilation_options = Main.CompilationOptions(Main.default_options) 
-        ctx = compilation_options.create_context() 
- 
-        transform = InterpretCompilerDirectives(ctx, ctx.compiler_directives) 
-        transform.module_scope = Symtab.ModuleScope('__main__', None, ctx) 
-        self.pipeline = [transform] 
- 
-        self.debug_exception_on_error = DebugFlags.debug_exception_on_error 
- 
-    def tearDown(self): 
-        DebugFlags.debug_exception_on_error = self.debug_exception_on_error 
- 
-    def test_parallel_directives_cimports(self): 
-        self.run_pipeline(self.pipeline, self.import_code) 
-        parallel_directives = self.pipeline[0].parallel_directives 
-        self.assertEqual(parallel_directives, self.expected_directives_dict) 
- 
-    def test_parallel_directives_imports(self): 
-        self.run_pipeline(self.pipeline, 
-                          self.import_code.replace(u'cimport', u'import')) 
-        parallel_directives = self.pipeline[0].parallel_directives 
-        self.assertEqual(parallel_directives, self.expected_directives_dict) 
- 
- 
-# TODO: Re-enable once they're more robust. 
-if False: 
-    from Cython.Debugger import DebugWriter 
-    from Cython.Debugger.Tests.TestLibCython import DebuggerTestCase 
-else: 
-    # skip test, don't let it inherit unittest.TestCase 
-    DebuggerTestCase = object 
- 
- 
-class TestDebugTransform(DebuggerTestCase): 
- 
-    def elem_hasattrs(self, elem, attrs): 
-        return all(attr in elem.attrib for attr in attrs) 
- 
-    def test_debug_info(self): 
-        try: 
-            assert os.path.exists(self.debug_dest) 
- 
-            t = DebugWriter.etree.parse(self.debug_dest) 
-            # the xpath of the standard ElementTree is primitive, don't use 
-            # anything fancy 
-            L = list(t.find('/Module/Globals')) 
-            assert L 
-            xml_globals = dict((e.attrib['name'], e.attrib['type']) for e in L) 
-            self.assertEqual(len(L), len(xml_globals)) 
- 
-            L = list(t.find('/Module/Functions')) 
-            assert L 
-            xml_funcs = dict((e.attrib['qualified_name'], e) for e in L) 
-            self.assertEqual(len(L), len(xml_funcs)) 
- 
-            # test globals 
-            self.assertEqual('CObject', xml_globals.get('c_var')) 
-            self.assertEqual('PythonObject', xml_globals.get('python_var')) 
- 
-            # test functions 
-            funcnames = ('codefile.spam', 'codefile.ham', 'codefile.eggs', 
-                         'codefile.closure', 'codefile.inner') 
-            required_xml_attrs = 'name', 'cname', 'qualified_name' 
-            assert all(f in xml_funcs for f in funcnames) 
-            spam, ham, eggs = [xml_funcs[funcname] for funcname in funcnames] 
- 
-            self.assertEqual(spam.attrib['name'], 'spam') 
-            self.assertNotEqual('spam', spam.attrib['cname']) 
-            assert self.elem_hasattrs(spam, required_xml_attrs) 
- 
-            # test locals of functions 
-            spam_locals = list(spam.find('Locals')) 
-            assert spam_locals 
-            spam_locals.sort(key=lambda e: e.attrib['name']) 
-            names = [e.attrib['name'] for e in spam_locals] 
-            self.assertEqual(list('abcd'), names) 
-            assert self.elem_hasattrs(spam_locals[0], required_xml_attrs) 
- 
-            # test arguments of functions 
-            spam_arguments = list(spam.find('Arguments')) 
-            assert spam_arguments 
-            self.assertEqual(1, len(list(spam_arguments))) 
- 
-            # test step-into functions 
-            step_into = spam.find('StepIntoFunctions') 
-            spam_stepinto = [x.attrib['name'] for x in step_into] 
-            assert spam_stepinto 
-            self.assertEqual(2, len(spam_stepinto)) 
-            assert 'puts' in spam_stepinto 
-            assert 'some_c_function' in spam_stepinto 
-        except: 
-            f = open(self.debug_dest) 
-            try: 
-                print(f.read()) 
-            finally: 
-                f.close() 
-            raise 
- 
- 
- 
-if __name__ == "__main__": 
-    import unittest 
-    unittest.main() 
+
+class TestWithTransform(object): # (TransformTest): # Disabled!
+
+    def test_simplified(self):
+        t = self.run_pipeline([WithTransform(None)], u"""
+        with x:
+            y = z ** 3
+        """)
+
+        self.assertCode(u"""
+
+        $0_0 = x
+        $0_2 = $0_0.__exit__
+        $0_0.__enter__()
+        $0_1 = True
+        try:
+            try:
+                $1_0 = None
+                y = z ** 3
+            except:
+                $0_1 = False
+                if (not $0_2($1_0)):
+                    raise
+        finally:
+            if $0_1:
+                $0_2(None, None, None)
+
+        """, t)
+
+    def test_basic(self):
+        t = self.run_pipeline([WithTransform(None)], u"""
+        with x as y:
+            y = z ** 3
+        """)
+        self.assertCode(u"""
+
+        $0_0 = x
+        $0_2 = $0_0.__exit__
+        $0_3 = $0_0.__enter__()
+        $0_1 = True
+        try:
+            try:
+                $1_0 = None
+                y = $0_3
+                y = z ** 3
+            except:
+                $0_1 = False
+                if (not $0_2($1_0)):
+                    raise
+        finally:
+            if $0_1:
+                $0_2(None, None, None)
+
+        """, t)
+
+
+class TestInterpretCompilerDirectives(TransformTest):
+    """
+    This class tests the parallel directives AST-rewriting and importing.
+    """
+
+    # Test the parallel directives (c)importing
+
+    import_code = u"""
+        cimport cython.parallel
+        cimport cython.parallel as par
+        from cython cimport parallel as par2
+        from cython cimport parallel
+
+        from cython.parallel cimport threadid as tid
+        from cython.parallel cimport threadavailable as tavail
+        from cython.parallel cimport prange
+    """
+
+    expected_directives_dict = {
+        u'cython.parallel': u'cython.parallel',
+        u'par': u'cython.parallel',
+        u'par2': u'cython.parallel',
+        u'parallel': u'cython.parallel',
+
+        u"tid": u"cython.parallel.threadid",
+        u"tavail": u"cython.parallel.threadavailable",
+        u"prange": u"cython.parallel.prange",
+    }
+
+
+    def setUp(self):
+        super(TestInterpretCompilerDirectives, self).setUp()
+
+        compilation_options = Main.CompilationOptions(Main.default_options)
+        ctx = compilation_options.create_context()
+
+        transform = InterpretCompilerDirectives(ctx, ctx.compiler_directives)
+        transform.module_scope = Symtab.ModuleScope('__main__', None, ctx)
+        self.pipeline = [transform]
+
+        self.debug_exception_on_error = DebugFlags.debug_exception_on_error
+
+    def tearDown(self):
+        DebugFlags.debug_exception_on_error = self.debug_exception_on_error
+
+    def test_parallel_directives_cimports(self):
+        self.run_pipeline(self.pipeline, self.import_code)
+        parallel_directives = self.pipeline[0].parallel_directives
+        self.assertEqual(parallel_directives, self.expected_directives_dict)
+
+    def test_parallel_directives_imports(self):
+        self.run_pipeline(self.pipeline,
+                          self.import_code.replace(u'cimport', u'import'))
+        parallel_directives = self.pipeline[0].parallel_directives
+        self.assertEqual(parallel_directives, self.expected_directives_dict)
+
+
+# TODO: Re-enable once they're more robust.
+if False:
+    from Cython.Debugger import DebugWriter
+    from Cython.Debugger.Tests.TestLibCython import DebuggerTestCase
+else:
+    # skip test, don't let it inherit unittest.TestCase
+    DebuggerTestCase = object
+
+
+class TestDebugTransform(DebuggerTestCase):
+
+    def elem_hasattrs(self, elem, attrs):
+        return all(attr in elem.attrib for attr in attrs)
+
+    def test_debug_info(self):
+        try:
+            assert os.path.exists(self.debug_dest)
+
+            t = DebugWriter.etree.parse(self.debug_dest)
+            # the xpath of the standard ElementTree is primitive, don't use
+            # anything fancy
+            L = list(t.find('/Module/Globals'))
+            assert L
+            xml_globals = dict((e.attrib['name'], e.attrib['type']) for e in L)
+            self.assertEqual(len(L), len(xml_globals))
+
+            L = list(t.find('/Module/Functions'))
+            assert L
+            xml_funcs = dict((e.attrib['qualified_name'], e) for e in L)
+            self.assertEqual(len(L), len(xml_funcs))
+
+            # test globals
+            self.assertEqual('CObject', xml_globals.get('c_var'))
+            self.assertEqual('PythonObject', xml_globals.get('python_var'))
+
+            # test functions
+            funcnames = ('codefile.spam', 'codefile.ham', 'codefile.eggs',
+                         'codefile.closure', 'codefile.inner')
+            required_xml_attrs = 'name', 'cname', 'qualified_name'
+            assert all(f in xml_funcs for f in funcnames)
+            spam, ham, eggs = [xml_funcs[funcname] for funcname in funcnames]
+
+            self.assertEqual(spam.attrib['name'], 'spam')
+            self.assertNotEqual('spam', spam.attrib['cname'])
+            assert self.elem_hasattrs(spam, required_xml_attrs)
+
+            # test locals of functions
+            spam_locals = list(spam.find('Locals'))
+            assert spam_locals
+            spam_locals.sort(key=lambda e: e.attrib['name'])
+            names = [e.attrib['name'] for e in spam_locals]
+            self.assertEqual(list('abcd'), names)
+            assert self.elem_hasattrs(spam_locals[0], required_xml_attrs)
+
+            # test arguments of functions
+            spam_arguments = list(spam.find('Arguments'))
+            assert spam_arguments
+            self.assertEqual(1, len(list(spam_arguments)))
+
+            # test step-into functions
+            step_into = spam.find('StepIntoFunctions')
+            spam_stepinto = [x.attrib['name'] for x in step_into]
+            assert spam_stepinto
+            self.assertEqual(2, len(spam_stepinto))
+            assert 'puts' in spam_stepinto
+            assert 'some_c_function' in spam_stepinto
+        except:
+            f = open(self.debug_dest)
+            try:
+                print(f.read())
+            finally:
+                f.close()
+            raise
+
+
+
+if __name__ == "__main__":
+    import unittest
+    unittest.main()
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestSignatureMatching.py b/contrib/tools/cython/Cython/Compiler/Tests/TestSignatureMatching.py
index 21214c50c2..166bb225b9 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestSignatureMatching.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestSignatureMatching.py
@@ -1,73 +1,73 @@
-import unittest 
- 
-from Cython.Compiler import PyrexTypes as pt 
-from Cython.Compiler.ExprNodes import NameNode 
-from Cython.Compiler.PyrexTypes import CFuncTypeArg 
- 
-def cfunctype(*arg_types): 
-    return pt.CFuncType(pt.c_int_type, 
-        [ CFuncTypeArg("name", arg_type, None) for arg_type in arg_types ]) 
- 
-def cppclasstype(name, base_classes): 
-    return pt.CppClassType(name, None, 'CPP_'+name, base_classes) 
- 
-class SignatureMatcherTest(unittest.TestCase): 
-    """ 
-    Test the signature matching algorithm for overloaded signatures. 
-    """ 
-    def assertMatches(self, expected_type, arg_types, functions): 
+import unittest
+
+from Cython.Compiler import PyrexTypes as pt
+from Cython.Compiler.ExprNodes import NameNode
+from Cython.Compiler.PyrexTypes import CFuncTypeArg
+
+def cfunctype(*arg_types):
+    return pt.CFuncType(pt.c_int_type,
+        [ CFuncTypeArg("name", arg_type, None) for arg_type in arg_types ])
+
+def cppclasstype(name, base_classes):
+    return pt.CppClassType(name, None, 'CPP_'+name, base_classes)
+
+class SignatureMatcherTest(unittest.TestCase):
+    """
+    Test the signature matching algorithm for overloaded signatures.
+    """
+    def assertMatches(self, expected_type, arg_types, functions):
         match = pt.best_match(arg_types, functions)
-        if expected_type is not None: 
-            self.assertNotEqual(None, match) 
-        self.assertEqual(expected_type, match.type) 
- 
-    def test_cpp_reference_single_arg(self): 
-        function_types = [ 
-            cfunctype(pt.CReferenceType(pt.c_int_type)), 
-            cfunctype(pt.CReferenceType(pt.c_long_type)), 
-            cfunctype(pt.CReferenceType(pt.c_double_type)), 
-            ] 
- 
-        functions = [ NameNode(None, type=t) for t in function_types ] 
-        self.assertMatches(function_types[0], [pt.c_int_type], functions) 
-        self.assertMatches(function_types[1], [pt.c_long_type], functions) 
-        self.assertMatches(function_types[2], [pt.c_double_type], functions) 
- 
-    def test_cpp_reference_two_args(self): 
-        function_types = [ 
-            cfunctype( 
-                pt.CReferenceType(pt.c_int_type), pt.CReferenceType(pt.c_long_type)), 
-            cfunctype( 
-                pt.CReferenceType(pt.c_long_type), pt.CReferenceType(pt.c_long_type)), 
-            ] 
- 
-        functions = [ NameNode(None, type=t) for t in function_types ] 
-        self.assertMatches(function_types[0], [pt.c_int_type, pt.c_long_type], functions) 
-        self.assertMatches(function_types[1], [pt.c_long_type, pt.c_long_type], functions) 
-        self.assertMatches(function_types[1], [pt.c_long_type, pt.c_int_type], functions) 
- 
-    def test_cpp_reference_cpp_class(self): 
-        classes = [ cppclasstype("Test%d"%i, []) for i in range(2) ] 
-        function_types = [ 
-            cfunctype(pt.CReferenceType(classes[0])), 
-            cfunctype(pt.CReferenceType(classes[1])), 
-            ] 
- 
-        functions = [ NameNode(None, type=t) for t in function_types ] 
-        self.assertMatches(function_types[0], [classes[0]], functions) 
-        self.assertMatches(function_types[1], [classes[1]], functions) 
- 
-    def test_cpp_reference_cpp_class_and_int(self): 
-        classes = [ cppclasstype("Test%d"%i, []) for i in range(2) ] 
-        function_types = [ 
-            cfunctype(pt.CReferenceType(classes[0]), pt.c_int_type), 
-            cfunctype(pt.CReferenceType(classes[0]), pt.c_long_type), 
-            cfunctype(pt.CReferenceType(classes[1]), pt.c_int_type), 
-            cfunctype(pt.CReferenceType(classes[1]), pt.c_long_type), 
-            ] 
- 
-        functions = [ NameNode(None, type=t) for t in function_types ] 
-        self.assertMatches(function_types[0], [classes[0], pt.c_int_type], functions) 
-        self.assertMatches(function_types[1], [classes[0], pt.c_long_type], functions) 
-        self.assertMatches(function_types[2], [classes[1], pt.c_int_type], functions) 
-        self.assertMatches(function_types[3], [classes[1], pt.c_long_type], functions) 
+        if expected_type is not None:
+            self.assertNotEqual(None, match)
+        self.assertEqual(expected_type, match.type)
+
+    def test_cpp_reference_single_arg(self):
+        function_types = [
+            cfunctype(pt.CReferenceType(pt.c_int_type)),
+            cfunctype(pt.CReferenceType(pt.c_long_type)),
+            cfunctype(pt.CReferenceType(pt.c_double_type)),
+            ]
+
+        functions = [ NameNode(None, type=t) for t in function_types ]
+        self.assertMatches(function_types[0], [pt.c_int_type], functions)
+        self.assertMatches(function_types[1], [pt.c_long_type], functions)
+        self.assertMatches(function_types[2], [pt.c_double_type], functions)
+
+    def test_cpp_reference_two_args(self):
+        function_types = [
+            cfunctype(
+                pt.CReferenceType(pt.c_int_type), pt.CReferenceType(pt.c_long_type)),
+            cfunctype(
+                pt.CReferenceType(pt.c_long_type), pt.CReferenceType(pt.c_long_type)),
+            ]
+
+        functions = [ NameNode(None, type=t) for t in function_types ]
+        self.assertMatches(function_types[0], [pt.c_int_type, pt.c_long_type], functions)
+        self.assertMatches(function_types[1], [pt.c_long_type, pt.c_long_type], functions)
+        self.assertMatches(function_types[1], [pt.c_long_type, pt.c_int_type], functions)
+
+    def test_cpp_reference_cpp_class(self):
+        classes = [ cppclasstype("Test%d"%i, []) for i in range(2) ]
+        function_types = [
+            cfunctype(pt.CReferenceType(classes[0])),
+            cfunctype(pt.CReferenceType(classes[1])),
+            ]
+
+        functions = [ NameNode(None, type=t) for t in function_types ]
+        self.assertMatches(function_types[0], [classes[0]], functions)
+        self.assertMatches(function_types[1], [classes[1]], functions)
+
+    def test_cpp_reference_cpp_class_and_int(self):
+        classes = [ cppclasstype("Test%d"%i, []) for i in range(2) ]
+        function_types = [
+            cfunctype(pt.CReferenceType(classes[0]), pt.c_int_type),
+            cfunctype(pt.CReferenceType(classes[0]), pt.c_long_type),
+            cfunctype(pt.CReferenceType(classes[1]), pt.c_int_type),
+            cfunctype(pt.CReferenceType(classes[1]), pt.c_long_type),
+            ]
+
+        functions = [ NameNode(None, type=t) for t in function_types ]
+        self.assertMatches(function_types[0], [classes[0], pt.c_int_type], functions)
+        self.assertMatches(function_types[1], [classes[0], pt.c_long_type], functions)
+        self.assertMatches(function_types[2], [classes[1], pt.c_int_type], functions)
+        self.assertMatches(function_types[3], [classes[1], pt.c_long_type], functions)
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestTreeFragment.py b/contrib/tools/cython/Cython/Compiler/Tests/TestTreeFragment.py
index 5256ed93d7..9ee8da5478 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestTreeFragment.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestTreeFragment.py
@@ -1,64 +1,64 @@
-from Cython.TestUtils import CythonTest 
-from Cython.Compiler.TreeFragment import * 
-from Cython.Compiler.Nodes import * 
-from Cython.Compiler.UtilNodes import * 
-import Cython.Compiler.Naming as Naming 
- 
-class TestTreeFragments(CythonTest): 
- 
-    def test_basic(self): 
-        F = self.fragment(u"x = 4") 
-        T = F.copy() 
-        self.assertCode(u"x = 4", T) 
- 
-    def test_copy_is_taken(self): 
-        F = self.fragment(u"if True: x = 4") 
-        T1 = F.root 
-        T2 = F.copy() 
-        self.assertEqual("x", T2.stats[0].if_clauses[0].body.lhs.name) 
-        T2.stats[0].if_clauses[0].body.lhs.name = "other" 
-        self.assertEqual("x", T1.stats[0].if_clauses[0].body.lhs.name) 
- 
-    def test_substitutions_are_copied(self): 
-        T = self.fragment(u"y + y").substitute({"y": NameNode(pos=None, name="x")}) 
-        self.assertEqual("x", T.stats[0].expr.operand1.name) 
-        self.assertEqual("x", T.stats[0].expr.operand2.name) 
+from Cython.TestUtils import CythonTest
+from Cython.Compiler.TreeFragment import *
+from Cython.Compiler.Nodes import *
+from Cython.Compiler.UtilNodes import *
+import Cython.Compiler.Naming as Naming
+
+class TestTreeFragments(CythonTest):
+
+    def test_basic(self):
+        F = self.fragment(u"x = 4")
+        T = F.copy()
+        self.assertCode(u"x = 4", T)
+
+    def test_copy_is_taken(self):
+        F = self.fragment(u"if True: x = 4")
+        T1 = F.root
+        T2 = F.copy()
+        self.assertEqual("x", T2.stats[0].if_clauses[0].body.lhs.name)
+        T2.stats[0].if_clauses[0].body.lhs.name = "other"
+        self.assertEqual("x", T1.stats[0].if_clauses[0].body.lhs.name)
+
+    def test_substitutions_are_copied(self):
+        T = self.fragment(u"y + y").substitute({"y": NameNode(pos=None, name="x")})
+        self.assertEqual("x", T.stats[0].expr.operand1.name)
+        self.assertEqual("x", T.stats[0].expr.operand2.name)
         self.assertTrue(T.stats[0].expr.operand1 is not T.stats[0].expr.operand2)
- 
-    def test_substitution(self): 
-        F = self.fragment(u"x = 4") 
-        y = NameNode(pos=None, name=u"y") 
-        T = F.substitute({"x" : y}) 
-        self.assertCode(u"y = 4", T) 
- 
-    def test_exprstat(self): 
-        F = self.fragment(u"PASS") 
-        pass_stat = PassStatNode(pos=None) 
-        T = F.substitute({"PASS" : pass_stat}) 
+
+    def test_substitution(self):
+        F = self.fragment(u"x = 4")
+        y = NameNode(pos=None, name=u"y")
+        T = F.substitute({"x" : y})
+        self.assertCode(u"y = 4", T)
+
+    def test_exprstat(self):
+        F = self.fragment(u"PASS")
+        pass_stat = PassStatNode(pos=None)
+        T = F.substitute({"PASS" : pass_stat})
         self.assertTrue(isinstance(T.stats[0], PassStatNode), T)
- 
-    def test_pos_is_transferred(self): 
-        F = self.fragment(u""" 
-        x = y 
-        x = u * v ** w 
-        """) 
-        T = F.substitute({"v" : NameNode(pos=None, name="a")}) 
-        v = F.root.stats[1].rhs.operand2.operand1 
-        a = T.stats[1].rhs.operand2.operand1 
+
+    def test_pos_is_transferred(self):
+        F = self.fragment(u"""
+        x = y
+        x = u * v ** w
+        """)
+        T = F.substitute({"v" : NameNode(pos=None, name="a")})
+        v = F.root.stats[1].rhs.operand2.operand1
+        a = T.stats[1].rhs.operand2.operand1
         self.assertEqual(v.pos, a.pos)
- 
-    def test_temps(self): 
-        TemplateTransform.temp_name_counter = 0 
-        F = self.fragment(u""" 
-            TMP 
-            x = TMP 
-        """) 
-        T = F.substitute(temps=[u"TMP"]) 
-        s = T.body.stats 
+
+    def test_temps(self):
+        TemplateTransform.temp_name_counter = 0
+        F = self.fragment(u"""
+            TMP
+            x = TMP
+        """)
+        T = F.substitute(temps=[u"TMP"])
+        s = T.body.stats
         self.assertTrue(isinstance(s[0].expr, TempRefNode))
         self.assertTrue(isinstance(s[1].rhs, TempRefNode))
         self.assertTrue(s[0].expr.handle is s[1].rhs.handle)
- 
-if __name__ == "__main__": 
-    import unittest 
-    unittest.main() 
+
+if __name__ == "__main__":
+    import unittest
+    unittest.main()
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestTreePath.py b/contrib/tools/cython/Cython/Compiler/Tests/TestTreePath.py
index eb8e2389ba..bee53b3d2b 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestTreePath.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestTreePath.py
@@ -1,94 +1,94 @@
-import unittest 
-from Cython.Compiler.Visitor import PrintTree 
-from Cython.TestUtils import TransformTest 
-from Cython.Compiler.TreePath import find_first, find_all 
-from Cython.Compiler import Nodes, ExprNodes 
- 
-class TestTreePath(TransformTest): 
-    _tree = None 
- 
-    def _build_tree(self): 
-        if self._tree is None: 
-            self._tree = self.run_pipeline([], u""" 
-            def decorator(fun):  # DefNode 
-                return fun       # ReturnStatNode, NameNode 
-            @decorator           # NameNode 
-            def decorated():     # DefNode 
-                pass 
-            """) 
-        return self._tree 
- 
-    def test_node_path(self): 
-        t = self._build_tree() 
+import unittest
+from Cython.Compiler.Visitor import PrintTree
+from Cython.TestUtils import TransformTest
+from Cython.Compiler.TreePath import find_first, find_all
+from Cython.Compiler import Nodes, ExprNodes
+
+class TestTreePath(TransformTest):
+    _tree = None
+
+    def _build_tree(self):
+        if self._tree is None:
+            self._tree = self.run_pipeline([], u"""
+            def decorator(fun):  # DefNode
+                return fun       # ReturnStatNode, NameNode
+            @decorator           # NameNode
+            def decorated():     # DefNode
+                pass
+            """)
+        return self._tree
+
+    def test_node_path(self):
+        t = self._build_tree()
         self.assertEqual(2, len(find_all(t, "//DefNode")))
         self.assertEqual(2, len(find_all(t, "//NameNode")))
         self.assertEqual(1, len(find_all(t, "//ReturnStatNode")))
         self.assertEqual(1, len(find_all(t, "//DefNode//ReturnStatNode")))
- 
-    def test_node_path_star(self): 
-        t = self._build_tree() 
+
+    def test_node_path_star(self):
+        t = self._build_tree()
         self.assertEqual(10, len(find_all(t, "//*")))
         self.assertEqual(8, len(find_all(t, "//DefNode//*")))
         self.assertEqual(0, len(find_all(t, "//NameNode//*")))
- 
-    def test_node_path_attribute(self): 
-        t = self._build_tree() 
+
+    def test_node_path_attribute(self):
+        t = self._build_tree()
         self.assertEqual(2, len(find_all(t, "//NameNode/@name")))
         self.assertEqual(['fun', 'decorator'], find_all(t, "//NameNode/@name"))
- 
-    def test_node_path_attribute_dotted(self): 
-        t = self._build_tree() 
+
+    def test_node_path_attribute_dotted(self):
+        t = self._build_tree()
         self.assertEqual(1, len(find_all(t, "//ReturnStatNode/@value.name")))
         self.assertEqual(['fun'], find_all(t, "//ReturnStatNode/@value.name"))
- 
-    def test_node_path_child(self): 
-        t = self._build_tree() 
+
+    def test_node_path_child(self):
+        t = self._build_tree()
         self.assertEqual(1, len(find_all(t, "//DefNode/ReturnStatNode/NameNode")))
         self.assertEqual(1, len(find_all(t, "//ReturnStatNode/NameNode")))
- 
-    def test_node_path_node_predicate(self): 
-        t = self._build_tree() 
+
+    def test_node_path_node_predicate(self):
+        t = self._build_tree()
         self.assertEqual(0, len(find_all(t, "//DefNode[.//ForInStatNode]")))
         self.assertEqual(2, len(find_all(t, "//DefNode[.//NameNode]")))
         self.assertEqual(1, len(find_all(t, "//ReturnStatNode[./NameNode]")))
         self.assertEqual(Nodes.ReturnStatNode,
                          type(find_first(t, "//ReturnStatNode[./NameNode]")))
- 
-    def test_node_path_node_predicate_step(self): 
-        t = self._build_tree() 
+
+    def test_node_path_node_predicate_step(self):
+        t = self._build_tree()
         self.assertEqual(2, len(find_all(t, "//DefNode[.//NameNode]")))
         self.assertEqual(8, len(find_all(t, "//DefNode[.//NameNode]//*")))
         self.assertEqual(1, len(find_all(t, "//DefNode[.//NameNode]//ReturnStatNode")))
         self.assertEqual(Nodes.ReturnStatNode,
                          type(find_first(t, "//DefNode[.//NameNode]//ReturnStatNode")))
- 
-    def test_node_path_attribute_exists(self): 
-        t = self._build_tree() 
+
+    def test_node_path_attribute_exists(self):
+        t = self._build_tree()
         self.assertEqual(2, len(find_all(t, "//NameNode[@name]")))
         self.assertEqual(ExprNodes.NameNode,
                          type(find_first(t, "//NameNode[@name]")))
- 
-    def test_node_path_attribute_exists_not(self): 
-        t = self._build_tree() 
+
+    def test_node_path_attribute_exists_not(self):
+        t = self._build_tree()
         self.assertEqual(0, len(find_all(t, "//NameNode[not(@name)]")))
         self.assertEqual(2, len(find_all(t, "//NameNode[not(@honking)]")))
- 
-    def test_node_path_and(self): 
-        t = self._build_tree() 
+
+    def test_node_path_and(self):
+        t = self._build_tree()
         self.assertEqual(1, len(find_all(t, "//DefNode[.//ReturnStatNode and .//NameNode]")))
         self.assertEqual(0, len(find_all(t, "//NameNode[@honking and @name]")))
         self.assertEqual(0, len(find_all(t, "//NameNode[@name and @honking]")))
         self.assertEqual(2, len(find_all(t, "//DefNode[.//NameNode[@name] and @name]")))
- 
-    def test_node_path_attribute_string_predicate(self): 
-        t = self._build_tree() 
+
+    def test_node_path_attribute_string_predicate(self):
+        t = self._build_tree()
         self.assertEqual(1, len(find_all(t, "//NameNode[@name = 'decorator']")))
- 
-    def test_node_path_recursive_predicate(self): 
-        t = self._build_tree() 
+
+    def test_node_path_recursive_predicate(self):
+        t = self._build_tree()
         self.assertEqual(2, len(find_all(t, "//DefNode[.//NameNode[@name]]")))
         self.assertEqual(1, len(find_all(t, "//DefNode[.//NameNode[@name = 'decorator']]")))
         self.assertEqual(1, len(find_all(t, "//DefNode[.//ReturnStatNode[./NameNode[@name = 'fun']]/NameNode]")))
- 
-if __name__ == '__main__': 
-    unittest.main() 
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestUtilityLoad.py b/contrib/tools/cython/Cython/Compiler/Tests/TestUtilityLoad.py
index 82528913df..3d1906ca0b 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestUtilityLoad.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestUtilityLoad.py
@@ -1,101 +1,101 @@
-import unittest 
- 
-from Cython.Compiler import Code, UtilityCode 
- 
- 
-def strip_2tup(tup): 
-    return tup[0] and tup[0].strip(), tup[1] and tup[1].strip() 
- 
-class TestUtilityLoader(unittest.TestCase): 
-    """ 
-    Test loading UtilityCodes 
-    """ 
- 
-    expected = "test {{loader}} prototype", "test {{loader}} impl" 
- 
-    required = "req {{loader}} proto", "req {{loader}} impl" 
- 
-    context = dict(loader='Loader') 
- 
-    name = "TestUtilityLoader" 
-    filename = "TestUtilityLoader.c" 
-    cls = Code.UtilityCode 
- 
-    def test_load_as_string(self): 
-        got = strip_2tup(self.cls.load_as_string(self.name)) 
+import unittest
+
+from Cython.Compiler import Code, UtilityCode
+
+
+def strip_2tup(tup):
+    return tup[0] and tup[0].strip(), tup[1] and tup[1].strip()
+
+class TestUtilityLoader(unittest.TestCase):
+    """
+    Test loading UtilityCodes
+    """
+
+    expected = "test {{loader}} prototype", "test {{loader}} impl"
+
+    required = "req {{loader}} proto", "req {{loader}} impl"
+
+    context = dict(loader='Loader')
+
+    name = "TestUtilityLoader"
+    filename = "TestUtilityLoader.c"
+    cls = Code.UtilityCode
+
+    def test_load_as_string(self):
+        got = strip_2tup(self.cls.load_as_string(self.name))
         self.assertEqual(got, self.expected)
- 
-        got = strip_2tup(self.cls.load_as_string(self.name, self.filename)) 
+
+        got = strip_2tup(self.cls.load_as_string(self.name, self.filename))
         self.assertEqual(got, self.expected)
- 
-    def test_load(self): 
-        utility = self.cls.load(self.name) 
-        got = strip_2tup((utility.proto, utility.impl)) 
+
+    def test_load(self):
+        utility = self.cls.load(self.name)
+        got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected)
- 
-        required, = utility.requires 
-        got = strip_2tup((required.proto, required.impl)) 
+
+        required, = utility.requires
+        got = strip_2tup((required.proto, required.impl))
         self.assertEqual(got, self.required)
- 
-        utility = self.cls.load(self.name, from_file=self.filename) 
-        got = strip_2tup((utility.proto, utility.impl)) 
+
+        utility = self.cls.load(self.name, from_file=self.filename)
+        got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected)
- 
-        utility = self.cls.load_cached(self.name, from_file=self.filename) 
-        got = strip_2tup((utility.proto, utility.impl)) 
+
+        utility = self.cls.load_cached(self.name, from_file=self.filename)
+        got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected)
- 
- 
-class TestTempitaUtilityLoader(TestUtilityLoader): 
-    """ 
-    Test loading UtilityCodes with Tempita substitution 
-    """ 
-    expected_tempita = (TestUtilityLoader.expected[0].replace('{{loader}}', 'Loader'), 
-                        TestUtilityLoader.expected[1].replace('{{loader}}', 'Loader')) 
- 
-    required_tempita = (TestUtilityLoader.required[0].replace('{{loader}}', 'Loader'), 
-                        TestUtilityLoader.required[1].replace('{{loader}}', 'Loader')) 
- 
-    cls = Code.TempitaUtilityCode 
- 
-    def test_load_as_string(self): 
-        got = strip_2tup(self.cls.load_as_string(self.name, context=self.context)) 
+
+
+class TestTempitaUtilityLoader(TestUtilityLoader):
+    """
+    Test loading UtilityCodes with Tempita substitution
+    """
+    expected_tempita = (TestUtilityLoader.expected[0].replace('{{loader}}', 'Loader'),
+                        TestUtilityLoader.expected[1].replace('{{loader}}', 'Loader'))
+
+    required_tempita = (TestUtilityLoader.required[0].replace('{{loader}}', 'Loader'),
+                        TestUtilityLoader.required[1].replace('{{loader}}', 'Loader'))
+
+    cls = Code.TempitaUtilityCode
+
+    def test_load_as_string(self):
+        got = strip_2tup(self.cls.load_as_string(self.name, context=self.context))
         self.assertEqual(got, self.expected_tempita)
- 
-    def test_load(self): 
-        utility = self.cls.load(self.name, context=self.context) 
-        got = strip_2tup((utility.proto, utility.impl)) 
+
+    def test_load(self):
+        utility = self.cls.load(self.name, context=self.context)
+        got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected_tempita)
- 
-        required, = utility.requires 
-        got = strip_2tup((required.proto, required.impl)) 
+
+        required, = utility.requires
+        got = strip_2tup((required.proto, required.impl))
         self.assertEqual(got, self.required_tempita)
- 
-        utility = self.cls.load(self.name, from_file=self.filename, context=self.context) 
-        got = strip_2tup((utility.proto, utility.impl)) 
+
+        utility = self.cls.load(self.name, from_file=self.filename, context=self.context)
+        got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected_tempita)
- 
- 
-class TestCythonUtilityLoader(TestTempitaUtilityLoader): 
-    """ 
-    Test loading CythonUtilityCodes 
-    """ 
- 
-    # Just change the attributes and run the same tests 
-    expected = None, "test {{cy_loader}} impl" 
-    expected_tempita = None, "test CyLoader impl" 
- 
-    required = None, "req {{cy_loader}} impl" 
-    required_tempita = None, "req CyLoader impl" 
- 
-    context = dict(cy_loader='CyLoader') 
- 
-    name = "TestCyUtilityLoader" 
-    filename = "TestCyUtilityLoader.pyx" 
-    cls = UtilityCode.CythonUtilityCode 
- 
-    # Small hack to pass our tests above 
-    cls.proto = None 
- 
-    test_load = TestUtilityLoader.test_load 
-    test_load_tempita = TestTempitaUtilityLoader.test_load 
+
+
+class TestCythonUtilityLoader(TestTempitaUtilityLoader):
+    """
+    Test loading CythonUtilityCodes
+    """
+
+    # Just change the attributes and run the same tests
+    expected = None, "test {{cy_loader}} impl"
+    expected_tempita = None, "test CyLoader impl"
+
+    required = None, "req {{cy_loader}} impl"
+    required_tempita = None, "req CyLoader impl"
+
+    context = dict(cy_loader='CyLoader')
+
+    name = "TestCyUtilityLoader"
+    filename = "TestCyUtilityLoader.pyx"
+    cls = UtilityCode.CythonUtilityCode
+
+    # Small hack to pass our tests above
+    cls.proto = None
+
+    test_load = TestUtilityLoader.test_load
+    test_load_tempita = TestTempitaUtilityLoader.test_load
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestVisitor.py b/contrib/tools/cython/Cython/Compiler/Tests/TestVisitor.py
index ee0fcc199f..dbc8e0c03a 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/TestVisitor.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestVisitor.py
@@ -1,61 +1,61 @@
-from Cython.Compiler.ModuleNode import ModuleNode 
-from Cython.Compiler.Symtab import ModuleScope 
-from Cython.TestUtils import TransformTest 
-from Cython.Compiler.Visitor import MethodDispatcherTransform 
-from Cython.Compiler.ParseTreeTransforms import ( 
-    NormalizeTree, AnalyseDeclarationsTransform, 
-    AnalyseExpressionsTransform, InterpretCompilerDirectives) 
- 
- 
-class TestMethodDispatcherTransform(TransformTest): 
-    _tree = None 
- 
-    def _build_tree(self): 
-        if self._tree is None: 
-            context = None 
- 
-            def fake_module(node): 
-                scope = ModuleScope('test', None, None) 
-                return ModuleNode(node.pos, doc=None, body=node, 
-                                  scope=scope, full_module_name='test', 
-                                  directive_comments={}) 
-            pipeline = [ 
-                fake_module, 
-                NormalizeTree(context), 
-                InterpretCompilerDirectives(context, {}), 
-                AnalyseDeclarationsTransform(context), 
-                AnalyseExpressionsTransform(context), 
-            ] 
-            self._tree = self.run_pipeline(pipeline, u""" 
-                cdef bytes s = b'asdfg' 
-                cdef dict d = {1:2} 
-                x = s * 3 
-                d.get('test') 
-            """) 
-        return self._tree 
- 
-    def test_builtin_method(self): 
-        calls = [0] 
-        class Test(MethodDispatcherTransform): 
-            def _handle_simple_method_dict_get(self, node, func, args, unbound): 
-                calls[0] += 1 
-                return node 
- 
-        tree = self._build_tree() 
-        Test(None)(tree) 
-        self.assertEqual(1, calls[0]) 
- 
-    def test_binop_method(self): 
-        calls = {'bytes': 0, 'object': 0} 
-        class Test(MethodDispatcherTransform): 
-            def _handle_simple_method_bytes___mul__(self, node, func, args, unbound): 
-                calls['bytes'] += 1 
-                return node 
-            def _handle_simple_method_object___mul__(self, node, func, args, unbound): 
-                calls['object'] += 1 
-                return node 
- 
-        tree = self._build_tree() 
-        Test(None)(tree) 
-        self.assertEqual(1, calls['bytes']) 
-        self.assertEqual(0, calls['object']) 
+from Cython.Compiler.ModuleNode import ModuleNode
+from Cython.Compiler.Symtab import ModuleScope
+from Cython.TestUtils import TransformTest
+from Cython.Compiler.Visitor import MethodDispatcherTransform
+from Cython.Compiler.ParseTreeTransforms import (
+    NormalizeTree, AnalyseDeclarationsTransform,
+    AnalyseExpressionsTransform, InterpretCompilerDirectives)
+
+
+class TestMethodDispatcherTransform(TransformTest):
+    _tree = None
+
+    def _build_tree(self):
+        if self._tree is None:
+            context = None
+
+            def fake_module(node):
+                scope = ModuleScope('test', None, None)
+                return ModuleNode(node.pos, doc=None, body=node,
+                                  scope=scope, full_module_name='test',
+                                  directive_comments={})
+            pipeline = [
+                fake_module,
+                NormalizeTree(context),
+                InterpretCompilerDirectives(context, {}),
+                AnalyseDeclarationsTransform(context),
+                AnalyseExpressionsTransform(context),
+            ]
+            self._tree = self.run_pipeline(pipeline, u"""
+                cdef bytes s = b'asdfg'
+                cdef dict d = {1:2}
+                x = s * 3
+                d.get('test')
+            """)
+        return self._tree
+
+    def test_builtin_method(self):
+        calls = [0]
+        class Test(MethodDispatcherTransform):
+            def _handle_simple_method_dict_get(self, node, func, args, unbound):
+                calls[0] += 1
+                return node
+
+        tree = self._build_tree()
+        Test(None)(tree)
+        self.assertEqual(1, calls[0])
+
+    def test_binop_method(self):
+        calls = {'bytes': 0, 'object': 0}
+        class Test(MethodDispatcherTransform):
+            def _handle_simple_method_bytes___mul__(self, node, func, args, unbound):
+                calls['bytes'] += 1
+                return node
+            def _handle_simple_method_object___mul__(self, node, func, args, unbound):
+                calls['object'] += 1
+                return node
+
+        tree = self._build_tree()
+        Test(None)(tree)
+        self.assertEqual(1, calls['bytes'])
+        self.assertEqual(0, calls['object'])
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/__init__.py b/contrib/tools/cython/Cython/Compiler/Tests/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Compiler/Tests/__init__.py
+++ b/contrib/tools/cython/Cython/Compiler/Tests/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Compiler/TreeFragment.py b/contrib/tools/cython/Cython/Compiler/TreeFragment.py
index 5bd7fc39e0..b85da8191a 100644
--- a/contrib/tools/cython/Cython/Compiler/TreeFragment.py
+++ b/contrib/tools/cython/Cython/Compiler/TreeFragment.py
@@ -1,29 +1,29 @@
-# 
-# TreeFragments - parsing of strings to trees 
-# 
- 
-""" 
-Support for parsing strings into code trees. 
-""" 
- 
-from __future__ import absolute_import 
- 
-import re 
+#
+# TreeFragments - parsing of strings to trees
+#
+
+"""
+Support for parsing strings into code trees.
+"""
+
+from __future__ import absolute_import
+
+import re
 from io import StringIO
- 
-from .Scanning import PyrexScanner, StringSourceDescriptor 
-from .Symtab import ModuleScope 
-from . import PyrexTypes 
-from .Visitor import VisitorTransform 
-from .Nodes import Node, StatListNode 
-from .ExprNodes import NameNode 
+
+from .Scanning import PyrexScanner, StringSourceDescriptor
+from .Symtab import ModuleScope
+from . import PyrexTypes
+from .Visitor import VisitorTransform
+from .Nodes import Node, StatListNode
+from .ExprNodes import NameNode
 from .StringEncoding import _unicode
-from . import Parsing 
-from . import Main 
-from . import UtilNodes 
- 
- 
-class StringParseContext(Main.Context): 
+from . import Parsing
+from . import Main
+from . import UtilNodes
+
+
+class StringParseContext(Main.Context):
     def __init__(self, name, include_directories=None, compiler_directives=None, cpp=False):
         if include_directories is None:
             include_directories = []
@@ -31,191 +31,191 @@ class StringParseContext(Main.Context):
             compiler_directives = {}
         # TODO: see if "language_level=3" also works for our internal code here.
         Main.Context.__init__(self, include_directories, compiler_directives, cpp=cpp, language_level=2)
-        self.module_name = name 
- 
+        self.module_name = name
+
     def find_module(self, module_name, relative_to=None, pos=None, need_pxd=1, absolute_fallback=True):
-        if module_name not in (self.module_name, 'cython'): 
-            raise AssertionError("Not yet supporting any cimports/includes from string code snippets") 
+        if module_name not in (self.module_name, 'cython'):
+            raise AssertionError("Not yet supporting any cimports/includes from string code snippets")
         return ModuleScope(module_name, parent_module=None, context=self)
- 
- 
+
+
 def parse_from_strings(name, code, pxds=None, level=None, initial_pos=None,
-                       context=None, allow_struct_enum_decorator=False): 
-    """ 
-    Utility method to parse a (unicode) string of code. This is mostly 
-    used for internal Cython compiler purposes (creating code snippets 
-    that transforms should emit, as well as unit testing). 
- 
-    code - a unicode string containing Cython (module-level) code 
-    name - a descriptive name for the code source (to use in error messages etc.) 
- 
-    RETURNS 
- 
-    The tree, i.e. a ModuleNode. The ModuleNode's scope attribute is 
-    set to the scope used when parsing. 
-    """ 
-    if context is None: 
-        context = StringParseContext(name) 
-    # Since source files carry an encoding, it makes sense in this context 
-    # to use a unicode string so that code fragments don't have to bother 
-    # with encoding. This means that test code passed in should not have an 
-    # encoding header. 
+                       context=None, allow_struct_enum_decorator=False):
+    """
+    Utility method to parse a (unicode) string of code. This is mostly
+    used for internal Cython compiler purposes (creating code snippets
+    that transforms should emit, as well as unit testing).
+
+    code - a unicode string containing Cython (module-level) code
+    name - a descriptive name for the code source (to use in error messages etc.)
+
+    RETURNS
+
+    The tree, i.e. a ModuleNode. The ModuleNode's scope attribute is
+    set to the scope used when parsing.
+    """
+    if context is None:
+        context = StringParseContext(name)
+    # Since source files carry an encoding, it makes sense in this context
+    # to use a unicode string so that code fragments don't have to bother
+    # with encoding. This means that test code passed in should not have an
+    # encoding header.
     assert isinstance(code, _unicode), "unicode code snippets only please"
-    encoding = "UTF-8" 
- 
-    module_name = name 
-    if initial_pos is None: 
-        initial_pos = (name, 1, 0) 
-    code_source = StringSourceDescriptor(name, code) 
- 
+    encoding = "UTF-8"
+
+    module_name = name
+    if initial_pos is None:
+        initial_pos = (name, 1, 0)
+    code_source = StringSourceDescriptor(name, code)
+
     scope = context.find_module(module_name, pos=initial_pos, need_pxd=False)
- 
-    buf = StringIO(code) 
- 
-    scanner = PyrexScanner(buf, code_source, source_encoding = encoding, 
-                     scope = scope, context = context, initial_pos = initial_pos) 
-    ctx = Parsing.Ctx(allow_struct_enum_decorator=allow_struct_enum_decorator) 
- 
-    if level is None: 
-        tree = Parsing.p_module(scanner, 0, module_name, ctx=ctx) 
-        tree.scope = scope 
-        tree.is_pxd = False 
-    else: 
-        tree = Parsing.p_code(scanner, level=level, ctx=ctx) 
- 
-    tree.scope = scope 
-    return tree 
- 
-
-class TreeCopier(VisitorTransform): 
-    def visit_Node(self, node): 
-        if node is None: 
-            return node 
-        else: 
-            c = node.clone_node() 
-            self.visitchildren(c) 
-            return c 
- 
-
-class ApplyPositionAndCopy(TreeCopier): 
-    def __init__(self, pos): 
-        super(ApplyPositionAndCopy, self).__init__() 
-        self.pos = pos 
- 
-    def visit_Node(self, node): 
-        copy = super(ApplyPositionAndCopy, self).visit_Node(node) 
-        copy.pos = self.pos 
-        return copy 
- 
-
-class TemplateTransform(VisitorTransform): 
-    """ 
-    Makes a copy of a template tree while doing substitutions. 
- 
-    A dictionary "substitutions" should be passed in when calling 
-    the transform; mapping names to replacement nodes. Then replacement 
-    happens like this: 
-     - If an ExprStatNode contains a single NameNode, whose name is 
-       a key in the substitutions dictionary, the ExprStatNode is 
-       replaced with a copy of the tree given in the dictionary. 
-       It is the responsibility of the caller that the replacement 
-       node is a valid statement. 
-     - If a single NameNode is otherwise encountered, it is replaced 
-       if its name is listed in the substitutions dictionary in the 
-       same way. It is the responsibility of the caller to make sure 
-       that the replacement nodes is a valid expression. 
- 
-    Also a list "temps" should be passed. Any names listed will 
-    be transformed into anonymous, temporary names. 
- 
-    Currently supported for tempnames is: 
-    NameNode 
-    (various function and class definition nodes etc. should be added to this) 
- 
-    Each replacement node gets the position of the substituted node 
-    recursively applied to every member node. 
-    """ 
- 
-    temp_name_counter = 0 
- 
-    def __call__(self, node, substitutions, temps, pos): 
-        self.substitutions = substitutions 
-        self.pos = pos 
-        tempmap = {} 
-        temphandles = [] 
-        for temp in temps: 
-            TemplateTransform.temp_name_counter += 1 
-            handle = UtilNodes.TempHandle(PyrexTypes.py_object_type) 
-            tempmap[temp] = handle 
-            temphandles.append(handle) 
-        self.tempmap = tempmap 
-        result = super(TemplateTransform, self).__call__(node) 
-        if temps: 
-            result = UtilNodes.TempsBlockNode(self.get_pos(node), 
-                                              temps=temphandles, 
-                                              body=result) 
-        return result 
- 
-    def get_pos(self, node): 
-        if self.pos: 
-            return self.pos 
-        else: 
-            return node.pos 
- 
-    def visit_Node(self, node): 
-        if node is None: 
-            return None 
-        else: 
-            c = node.clone_node() 
-            if self.pos is not None: 
-                c.pos = self.pos 
-            self.visitchildren(c) 
-            return c 
- 
-    def try_substitution(self, node, key): 
-        sub = self.substitutions.get(key) 
-        if sub is not None: 
-            pos = self.pos 
-            if pos is None: pos = node.pos 
-            return ApplyPositionAndCopy(pos)(sub) 
-        else: 
-            return self.visit_Node(node) # make copy as usual 
- 
-    def visit_NameNode(self, node): 
-        temphandle = self.tempmap.get(node.name) 
-        if temphandle: 
-            # Replace name with temporary 
-            return temphandle.ref(self.get_pos(node)) 
-        else: 
-            return self.try_substitution(node, node.name) 
- 
-    def visit_ExprStatNode(self, node): 
-        # If an expression-as-statement consists of only a replaceable 
-        # NameNode, we replace the entire statement, not only the NameNode 
-        if isinstance(node.expr, NameNode): 
-            return self.try_substitution(node, node.expr.name) 
-        else: 
-            return self.visit_Node(node) 
- 
-
-def copy_code_tree(node): 
-    return TreeCopier()(node) 
- 
+
+    buf = StringIO(code)
+
+    scanner = PyrexScanner(buf, code_source, source_encoding = encoding,
+                     scope = scope, context = context, initial_pos = initial_pos)
+    ctx = Parsing.Ctx(allow_struct_enum_decorator=allow_struct_enum_decorator)
+
+    if level is None:
+        tree = Parsing.p_module(scanner, 0, module_name, ctx=ctx)
+        tree.scope = scope
+        tree.is_pxd = False
+    else:
+        tree = Parsing.p_code(scanner, level=level, ctx=ctx)
+
+    tree.scope = scope
+    return tree
+
+
+class TreeCopier(VisitorTransform):
+    def visit_Node(self, node):
+        if node is None:
+            return node
+        else:
+            c = node.clone_node()
+            self.visitchildren(c)
+            return c
+
+
+class ApplyPositionAndCopy(TreeCopier):
+    def __init__(self, pos):
+        super(ApplyPositionAndCopy, self).__init__()
+        self.pos = pos
+
+    def visit_Node(self, node):
+        copy = super(ApplyPositionAndCopy, self).visit_Node(node)
+        copy.pos = self.pos
+        return copy
+
+
+class TemplateTransform(VisitorTransform):
+    """
+    Makes a copy of a template tree while doing substitutions.
+
+    A dictionary "substitutions" should be passed in when calling
+    the transform; mapping names to replacement nodes. Then replacement
+    happens like this:
+     - If an ExprStatNode contains a single NameNode, whose name is
+       a key in the substitutions dictionary, the ExprStatNode is
+       replaced with a copy of the tree given in the dictionary.
+       It is the responsibility of the caller that the replacement
+       node is a valid statement.
+     - If a single NameNode is otherwise encountered, it is replaced
+       if its name is listed in the substitutions dictionary in the
+       same way. It is the responsibility of the caller to make sure
+       that the replacement nodes is a valid expression.
+
+    Also a list "temps" should be passed. Any names listed will
+    be transformed into anonymous, temporary names.
+
+    Currently supported for tempnames is:
+    NameNode
+    (various function and class definition nodes etc. should be added to this)
+
+    Each replacement node gets the position of the substituted node
+    recursively applied to every member node.
+    """
+
+    temp_name_counter = 0
+
+    def __call__(self, node, substitutions, temps, pos):
+        self.substitutions = substitutions
+        self.pos = pos
+        tempmap = {}
+        temphandles = []
+        for temp in temps:
+            TemplateTransform.temp_name_counter += 1
+            handle = UtilNodes.TempHandle(PyrexTypes.py_object_type)
+            tempmap[temp] = handle
+            temphandles.append(handle)
+        self.tempmap = tempmap
+        result = super(TemplateTransform, self).__call__(node)
+        if temps:
+            result = UtilNodes.TempsBlockNode(self.get_pos(node),
+                                              temps=temphandles,
+                                              body=result)
+        return result
+
+    def get_pos(self, node):
+        if self.pos:
+            return self.pos
+        else:
+            return node.pos
+
+    def visit_Node(self, node):
+        if node is None:
+            return None
+        else:
+            c = node.clone_node()
+            if self.pos is not None:
+                c.pos = self.pos
+            self.visitchildren(c)
+            return c
+
+    def try_substitution(self, node, key):
+        sub = self.substitutions.get(key)
+        if sub is not None:
+            pos = self.pos
+            if pos is None: pos = node.pos
+            return ApplyPositionAndCopy(pos)(sub)
+        else:
+            return self.visit_Node(node) # make copy as usual
+
+    def visit_NameNode(self, node):
+        temphandle = self.tempmap.get(node.name)
+        if temphandle:
+            # Replace name with temporary
+            return temphandle.ref(self.get_pos(node))
+        else:
+            return self.try_substitution(node, node.name)
+
+    def visit_ExprStatNode(self, node):
+        # If an expression-as-statement consists of only a replaceable
+        # NameNode, we replace the entire statement, not only the NameNode
+        if isinstance(node.expr, NameNode):
+            return self.try_substitution(node, node.expr.name)
+        else:
+            return self.visit_Node(node)
+
+
+def copy_code_tree(node):
+    return TreeCopier()(node)
+
 
 _match_indent = re.compile(u"^ *").match
 
 
-def strip_common_indent(lines): 
+def strip_common_indent(lines):
     """Strips empty lines and common indentation from the list of strings given in lines"""
-    # TODO: Facilitate textwrap.indent instead 
-    lines = [x for x in lines if x.strip() != u""] 
+    # TODO: Facilitate textwrap.indent instead
+    lines = [x for x in lines if x.strip() != u""]
     if lines:
         minindent = min([len(_match_indent(x).group(0)) for x in lines])
         lines = [x[minindent:] for x in lines]
-    return lines 
- 
+    return lines
+
 
-class TreeFragment(object): 
+class TreeFragment(object):
     def __init__(self, code, name=None, pxds=None, temps=None, pipeline=None, level=None, initial_pos=None):
         if pxds is None:
             pxds = {}
@@ -227,49 +227,49 @@ class TreeFragment(object):
             name = "(tree fragment)"
 
         if isinstance(code, _unicode):
-            def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n"))) 
- 
-            fmt_code = fmt(code) 
-            fmt_pxds = {} 
+            def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n")))
+
+            fmt_code = fmt(code)
+            fmt_pxds = {}
             for key, value in pxds.items():
-                fmt_pxds[key] = fmt(value) 
-            mod = t = parse_from_strings(name, fmt_code, fmt_pxds, level=level, initial_pos=initial_pos) 
-            if level is None: 
-                t = t.body # Make sure a StatListNode is at the top 
-            if not isinstance(t, StatListNode): 
-                t = StatListNode(pos=mod.pos, stats=[t]) 
-            for transform in pipeline: 
-                if transform is None: 
-                    continue 
-                t = transform(t) 
-            self.root = t 
-        elif isinstance(code, Node): 
+                fmt_pxds[key] = fmt(value)
+            mod = t = parse_from_strings(name, fmt_code, fmt_pxds, level=level, initial_pos=initial_pos)
+            if level is None:
+                t = t.body # Make sure a StatListNode is at the top
+            if not isinstance(t, StatListNode):
+                t = StatListNode(pos=mod.pos, stats=[t])
+            for transform in pipeline:
+                if transform is None:
+                    continue
+                t = transform(t)
+            self.root = t
+        elif isinstance(code, Node):
             if pxds:
                 raise NotImplementedError()
-            self.root = code 
-        else: 
-            raise ValueError("Unrecognized code format (accepts unicode and Node)") 
-        self.temps = temps 
- 
-    def copy(self): 
-        return copy_code_tree(self.root) 
- 
+            self.root = code
+        else:
+            raise ValueError("Unrecognized code format (accepts unicode and Node)")
+        self.temps = temps
+
+    def copy(self):
+        return copy_code_tree(self.root)
+
     def substitute(self, nodes=None, temps=None, pos = None):
         if nodes is None:
             nodes = {}
         if temps is None:
             temps = []
-        return TemplateTransform()(self.root, 
-                                   substitutions = nodes, 
-                                   temps = self.temps + temps, pos = pos) 
- 
-
-class SetPosTransform(VisitorTransform): 
-    def __init__(self, pos): 
-        super(SetPosTransform, self).__init__() 
-        self.pos = pos 
- 
-    def visit_Node(self, node): 
-        node.pos = self.pos 
-        self.visitchildren(node) 
-        return node 
+        return TemplateTransform()(self.root,
+                                   substitutions = nodes,
+                                   temps = self.temps + temps, pos = pos)
+
+
+class SetPosTransform(VisitorTransform):
+    def __init__(self, pos):
+        super(SetPosTransform, self).__init__()
+        self.pos = pos
+
+    def visit_Node(self, node):
+        node.pos = self.pos
+        self.visitchildren(node)
+        return node
diff --git a/contrib/tools/cython/Cython/Compiler/TreePath.py b/contrib/tools/cython/Cython/Compiler/TreePath.py
index 2ad4545d0b..8585905557 100644
--- a/contrib/tools/cython/Cython/Compiler/TreePath.py
+++ b/contrib/tools/cython/Cython/Compiler/TreePath.py
@@ -1,23 +1,23 @@
-""" 
-A simple XPath-like language for tree traversal. 
- 
-This works by creating a filter chain of generator functions.  Each 
-function selects a part of the expression, e.g. a child node, a 
-specific descendant or a node that holds an attribute. 
-""" 
- 
-from __future__ import absolute_import 
- 
-import re 
-import operator 
+"""
+A simple XPath-like language for tree traversal.
+
+This works by creating a filter chain of generator functions.  Each
+function selects a part of the expression, e.g. a child node, a
+specific descendant or a node that holds an attribute.
+"""
+
+from __future__ import absolute_import
+
+import re
+import operator
 import sys
- 
+
 if sys.version_info[0] >= 3:
     _unicode = str
 else:
     _unicode = unicode
 
-path_tokenizer = re.compile( 
+path_tokenizer = re.compile(
     r"("
     r"'[^']*'|\"[^\"]*\"|"
     r"//?|"
@@ -26,271 +26,271 @@ path_tokenizer = re.compile(
     r"[/.*\[\]()@])|"
     r"([^/\[\]()@=\s]+)|"
     r"\s+"
-    ).findall 
- 
-def iterchildren(node, attr_name): 
-    # returns an iterable of all child nodes of that name 
-    child = getattr(node, attr_name) 
-    if child is not None: 
-        if type(child) is list: 
-            return child 
-        else: 
-            return [child] 
-    else: 
-        return () 
- 
-def _get_first_or_none(it): 
-    try: 
-        try: 
-            _next = it.next 
-        except AttributeError: 
-            return next(it) 
-        else: 
-            return _next() 
-    except StopIteration: 
-        return None 
- 
-def type_name(node): 
-    return node.__class__.__name__.split('.')[-1] 
- 
-def parse_func(next, token): 
-    name = token[1] 
-    token = next() 
-    if token[0] != '(': 
-        raise ValueError("Expected '(' after function name '%s'" % name) 
-    predicate = handle_predicate(next, token) 
-    return name, predicate 
- 
-def handle_func_not(next, token): 
-    """ 
-    not(...) 
-    """ 
-    name, predicate = parse_func(next, token) 
- 
-    def select(result): 
-        for node in result: 
-            if _get_first_or_none(predicate([node])) is None: 
-                yield node 
-    return select 
- 
-def handle_name(next, token): 
-    """ 
-    /NodeName/ 
-    or 
-    func(...) 
-    """ 
-    name = token[1] 
-    if name in functions: 
-        return functions[name](next, token) 
-    def select(result): 
-        for node in result: 
-            for attr_name in node.child_attrs: 
-                for child in iterchildren(node, attr_name): 
-                    if type_name(child) == name: 
-                        yield child 
-    return select 
- 
-def handle_star(next, token): 
-    """ 
-    /*/ 
-    """ 
-    def select(result): 
-        for node in result: 
-            for name in node.child_attrs: 
-                for child in iterchildren(node, name): 
-                    yield child 
-    return select 
- 
-def handle_dot(next, token): 
-    """ 
-    /./ 
-    """ 
-    def select(result): 
-        return result 
-    return select 
- 
-def handle_descendants(next, token): 
-    """ 
-    //... 
-    """ 
-    token = next() 
-    if token[0] == "*": 
-        def iter_recursive(node): 
-            for name in node.child_attrs: 
-                for child in iterchildren(node, name): 
-                    yield child 
-                    for c in iter_recursive(child): 
-                        yield c 
-    elif not token[0]: 
-        node_name = token[1] 
-        def iter_recursive(node): 
-            for name in node.child_attrs: 
-                for child in iterchildren(node, name): 
-                    if type_name(child) == node_name: 
-                        yield child 
-                    for c in iter_recursive(child): 
-                        yield c 
-    else: 
-        raise ValueError("Expected node name after '//'") 
- 
-    def select(result): 
-        for node in result: 
-            for child in iter_recursive(node): 
-                yield child 
- 
-    return select 
- 
- 
-def handle_attribute(next, token): 
-    token = next() 
-    if token[0]: 
-        raise ValueError("Expected attribute name") 
-    name = token[1] 
-    value = None 
-    try: 
-        token = next() 
-    except StopIteration: 
-        pass 
-    else: 
-        if token[0] == '=': 
-            value = parse_path_value(next) 
-    readattr = operator.attrgetter(name) 
-    if value is None: 
-        def select(result): 
-            for node in result: 
-                try: 
-                    attr_value = readattr(node) 
-                except AttributeError: 
-                    continue 
-                if attr_value is not None: 
-                    yield attr_value 
-    else: 
-        def select(result): 
-            for node in result: 
-                try: 
-                    attr_value = readattr(node) 
-                except AttributeError: 
-                    continue 
-                if attr_value == value: 
-                    yield attr_value 
+    ).findall
+
+def iterchildren(node, attr_name):
+    # returns an iterable of all child nodes of that name
+    child = getattr(node, attr_name)
+    if child is not None:
+        if type(child) is list:
+            return child
+        else:
+            return [child]
+    else:
+        return ()
+
+def _get_first_or_none(it):
+    try:
+        try:
+            _next = it.next
+        except AttributeError:
+            return next(it)
+        else:
+            return _next()
+    except StopIteration:
+        return None
+
+def type_name(node):
+    return node.__class__.__name__.split('.')[-1]
+
+def parse_func(next, token):
+    name = token[1]
+    token = next()
+    if token[0] != '(':
+        raise ValueError("Expected '(' after function name '%s'" % name)
+    predicate = handle_predicate(next, token)
+    return name, predicate
+
+def handle_func_not(next, token):
+    """
+    not(...)
+    """
+    name, predicate = parse_func(next, token)
+
+    def select(result):
+        for node in result:
+            if _get_first_or_none(predicate([node])) is None:
+                yield node
+    return select
+
+def handle_name(next, token):
+    """
+    /NodeName/
+    or
+    func(...)
+    """
+    name = token[1]
+    if name in functions:
+        return functions[name](next, token)
+    def select(result):
+        for node in result:
+            for attr_name in node.child_attrs:
+                for child in iterchildren(node, attr_name):
+                    if type_name(child) == name:
+                        yield child
+    return select
+
+def handle_star(next, token):
+    """
+    /*/
+    """
+    def select(result):
+        for node in result:
+            for name in node.child_attrs:
+                for child in iterchildren(node, name):
+                    yield child
+    return select
+
+def handle_dot(next, token):
+    """
+    /./
+    """
+    def select(result):
+        return result
+    return select
+
+def handle_descendants(next, token):
+    """
+    //...
+    """
+    token = next()
+    if token[0] == "*":
+        def iter_recursive(node):
+            for name in node.child_attrs:
+                for child in iterchildren(node, name):
+                    yield child
+                    for c in iter_recursive(child):
+                        yield c
+    elif not token[0]:
+        node_name = token[1]
+        def iter_recursive(node):
+            for name in node.child_attrs:
+                for child in iterchildren(node, name):
+                    if type_name(child) == node_name:
+                        yield child
+                    for c in iter_recursive(child):
+                        yield c
+    else:
+        raise ValueError("Expected node name after '//'")
+
+    def select(result):
+        for node in result:
+            for child in iter_recursive(node):
+                yield child
+
+    return select
+
+
+def handle_attribute(next, token):
+    token = next()
+    if token[0]:
+        raise ValueError("Expected attribute name")
+    name = token[1]
+    value = None
+    try:
+        token = next()
+    except StopIteration:
+        pass
+    else:
+        if token[0] == '=':
+            value = parse_path_value(next)
+    readattr = operator.attrgetter(name)
+    if value is None:
+        def select(result):
+            for node in result:
+                try:
+                    attr_value = readattr(node)
+                except AttributeError:
+                    continue
+                if attr_value is not None:
+                    yield attr_value
+    else:
+        def select(result):
+            for node in result:
+                try:
+                    attr_value = readattr(node)
+                except AttributeError:
+                    continue
+                if attr_value == value:
+                    yield attr_value
                 elif (isinstance(attr_value, bytes) and isinstance(value, _unicode) and
                         attr_value == value.encode()):
                     # allow a bytes-to-string comparison too
                     yield attr_value
 
-    return select 
- 
- 
-def parse_path_value(next): 
-    token = next() 
-    value = token[0] 
-    if value: 
-        if value[:1] == "'" or value[:1] == '"': 
-            return value[1:-1] 
-        try: 
-            return int(value) 
-        except ValueError: 
-            pass 
+    return select
+
+
+def parse_path_value(next):
+    token = next()
+    value = token[0]
+    if value:
+        if value[:1] == "'" or value[:1] == '"':
+            return value[1:-1]
+        try:
+            return int(value)
+        except ValueError:
+            pass
     elif token[1].isdigit():
         return int(token[1])
-    else: 
-        name = token[1].lower() 
-        if name == 'true': 
-            return True 
-        elif name == 'false': 
-            return False 
-    raise ValueError("Invalid attribute predicate: '%s'" % value) 
- 
-def handle_predicate(next, token): 
-    token = next() 
-    selector = [] 
-    while token[0] != ']': 
-        selector.append( operations[token[0]](next, token) ) 
-        try: 
-            token = next() 
-        except StopIteration: 
-            break 
-        else: 
-            if token[0] == "/": 
-                token = next() 
- 
-        if not token[0] and token[1] == 'and': 
-            return logical_and(selector, handle_predicate(next, token)) 
- 
-    def select(result): 
-        for node in result: 
-            subresult = iter((node,)) 
-            for select in selector: 
-                subresult = select(subresult) 
-            predicate_result = _get_first_or_none(subresult) 
-            if predicate_result is not None: 
-                yield node 
-    return select 
- 
-def logical_and(lhs_selects, rhs_select): 
-    def select(result): 
-        for node in result: 
-            subresult = iter((node,)) 
-            for select in lhs_selects: 
-                subresult = select(subresult) 
-            predicate_result = _get_first_or_none(subresult) 
-            subresult = iter((node,)) 
-            if predicate_result is not None: 
-                for result_node in rhs_select(subresult): 
-                    yield node 
-    return select 
- 
- 
-operations = { 
-    "@":  handle_attribute, 
-    "":   handle_name, 
-    "*":  handle_star, 
-    ".":  handle_dot, 
-    "//": handle_descendants, 
-    "[":  handle_predicate, 
-    } 
- 
-functions = { 
-    'not' : handle_func_not 
-    } 
- 
-def _build_path_iterator(path): 
-    # parse pattern 
-    stream = iter([ (special,text) 
-                    for (special,text) in path_tokenizer(path) 
-                    if special or text ]) 
-    try: 
-        _next = stream.next 
-    except AttributeError: 
-        # Python 3 
-        def _next(): 
-            return next(stream) 
-    token = _next() 
-    selector = [] 
-    while 1: 
-        try: 
-            selector.append(operations[token[0]](_next, token)) 
-        except StopIteration: 
-            raise ValueError("invalid path") 
-        try: 
-            token = _next() 
-            if token[0] == "/": 
-                token = _next() 
-        except StopIteration: 
-            break 
-    return selector 
- 
-# main module API 
- 
-def iterfind(node, path): 
-    selector_chain = _build_path_iterator(path) 
-    result = iter((node,)) 
-    for select in selector_chain: 
-        result = select(result) 
-    return result 
- 
-def find_first(node, path): 
-    return _get_first_or_none(iterfind(node, path)) 
- 
-def find_all(node, path): 
-    return list(iterfind(node, path)) 
+    else:
+        name = token[1].lower()
+        if name == 'true':
+            return True
+        elif name == 'false':
+            return False
+    raise ValueError("Invalid attribute predicate: '%s'" % value)
+
+def handle_predicate(next, token):
+    token = next()
+    selector = []
+    while token[0] != ']':
+        selector.append( operations[token[0]](next, token) )
+        try:
+            token = next()
+        except StopIteration:
+            break
+        else:
+            if token[0] == "/":
+                token = next()
+
+        if not token[0] and token[1] == 'and':
+            return logical_and(selector, handle_predicate(next, token))
+
+    def select(result):
+        for node in result:
+            subresult = iter((node,))
+            for select in selector:
+                subresult = select(subresult)
+            predicate_result = _get_first_or_none(subresult)
+            if predicate_result is not None:
+                yield node
+    return select
+
+def logical_and(lhs_selects, rhs_select):
+    def select(result):
+        for node in result:
+            subresult = iter((node,))
+            for select in lhs_selects:
+                subresult = select(subresult)
+            predicate_result = _get_first_or_none(subresult)
+            subresult = iter((node,))
+            if predicate_result is not None:
+                for result_node in rhs_select(subresult):
+                    yield node
+    return select
+
+
+operations = {
+    "@":  handle_attribute,
+    "":   handle_name,
+    "*":  handle_star,
+    ".":  handle_dot,
+    "//": handle_descendants,
+    "[":  handle_predicate,
+    }
+
+functions = {
+    'not' : handle_func_not
+    }
+
+def _build_path_iterator(path):
+    # parse pattern
+    stream = iter([ (special,text)
+                    for (special,text) in path_tokenizer(path)
+                    if special or text ])
+    try:
+        _next = stream.next
+    except AttributeError:
+        # Python 3
+        def _next():
+            return next(stream)
+    token = _next()
+    selector = []
+    while 1:
+        try:
+            selector.append(operations[token[0]](_next, token))
+        except StopIteration:
+            raise ValueError("invalid path")
+        try:
+            token = _next()
+            if token[0] == "/":
+                token = _next()
+        except StopIteration:
+            break
+    return selector
+
+# main module API
+
+def iterfind(node, path):
+    selector_chain = _build_path_iterator(path)
+    result = iter((node,))
+    for select in selector_chain:
+        result = select(result)
+    return result
+
+def find_first(node, path):
+    return _get_first_or_none(iterfind(node, path))
+
+def find_all(node, path):
+    return list(iterfind(node, path))
diff --git a/contrib/tools/cython/Cython/Compiler/TypeInference.py b/contrib/tools/cython/Cython/Compiler/TypeInference.py
index 2e179633d9..c7ffee7d24 100644
--- a/contrib/tools/cython/Cython/Compiler/TypeInference.py
+++ b/contrib/tools/cython/Cython/Compiler/TypeInference.py
@@ -1,311 +1,311 @@
-from __future__ import absolute_import 
- 
-from .Errors import error, message 
-from . import ExprNodes 
-from . import Nodes 
-from . import Builtin 
-from . import PyrexTypes 
-from .. import Utils 
-from .PyrexTypes import py_object_type, unspecified_type 
-from .Visitor import CythonTransform, EnvTransform 
- 
+from __future__ import absolute_import
+
+from .Errors import error, message
+from . import ExprNodes
+from . import Nodes
+from . import Builtin
+from . import PyrexTypes
+from .. import Utils
+from .PyrexTypes import py_object_type, unspecified_type
+from .Visitor import CythonTransform, EnvTransform
+
 try:
     reduce
 except NameError:
     from functools import reduce
- 
 
-class TypedExprNode(ExprNodes.ExprNode): 
-    # Used for declaring assignments of a specified type without a known entry. 
+
+class TypedExprNode(ExprNodes.ExprNode):
+    # Used for declaring assignments of a specified type without a known entry.
     subexprs = []
- 
+
     def __init__(self, type, pos=None):
         super(TypedExprNode, self).__init__(pos, type=type)
 
-object_expr = TypedExprNode(py_object_type) 
- 
- 
-class MarkParallelAssignments(EnvTransform): 
-    # Collects assignments inside parallel blocks prange, with parallel. 
-    # Perhaps it's better to move it to ControlFlowAnalysis. 
- 
-    # tells us whether we're in a normal loop 
-    in_loop = False 
- 
-    parallel_errors = False 
- 
-    def __init__(self, context): 
-        # Track the parallel block scopes (with parallel, for i in prange()) 
-        self.parallel_block_stack = [] 
-        super(MarkParallelAssignments, self).__init__(context) 
- 
-    def mark_assignment(self, lhs, rhs, inplace_op=None): 
-        if isinstance(lhs, (ExprNodes.NameNode, Nodes.PyArgDeclNode)): 
-            if lhs.entry is None: 
-                # TODO: This shouldn't happen... 
-                return 
- 
-            if self.parallel_block_stack: 
-                parallel_node = self.parallel_block_stack[-1] 
-                previous_assignment = parallel_node.assignments.get(lhs.entry) 
- 
-                # If there was a previous assignment to the variable, keep the 
-                # previous assignment position 
-                if previous_assignment: 
-                    pos, previous_inplace_op = previous_assignment 
- 
-                    if (inplace_op and previous_inplace_op and 
-                            inplace_op != previous_inplace_op): 
-                        # x += y; x *= y 
-                        t = (inplace_op, previous_inplace_op) 
-                        error(lhs.pos, 
-                              "Reduction operator '%s' is inconsistent " 
-                              "with previous reduction operator '%s'" % t) 
-                else: 
-                    pos = lhs.pos 
- 
-                parallel_node.assignments[lhs.entry] = (pos, inplace_op) 
-                parallel_node.assigned_nodes.append(lhs) 
- 
-        elif isinstance(lhs, ExprNodes.SequenceNode): 
+object_expr = TypedExprNode(py_object_type)
+
+
+class MarkParallelAssignments(EnvTransform):
+    # Collects assignments inside parallel blocks prange, with parallel.
+    # Perhaps it's better to move it to ControlFlowAnalysis.
+
+    # tells us whether we're in a normal loop
+    in_loop = False
+
+    parallel_errors = False
+
+    def __init__(self, context):
+        # Track the parallel block scopes (with parallel, for i in prange())
+        self.parallel_block_stack = []
+        super(MarkParallelAssignments, self).__init__(context)
+
+    def mark_assignment(self, lhs, rhs, inplace_op=None):
+        if isinstance(lhs, (ExprNodes.NameNode, Nodes.PyArgDeclNode)):
+            if lhs.entry is None:
+                # TODO: This shouldn't happen...
+                return
+
+            if self.parallel_block_stack:
+                parallel_node = self.parallel_block_stack[-1]
+                previous_assignment = parallel_node.assignments.get(lhs.entry)
+
+                # If there was a previous assignment to the variable, keep the
+                # previous assignment position
+                if previous_assignment:
+                    pos, previous_inplace_op = previous_assignment
+
+                    if (inplace_op and previous_inplace_op and
+                            inplace_op != previous_inplace_op):
+                        # x += y; x *= y
+                        t = (inplace_op, previous_inplace_op)
+                        error(lhs.pos,
+                              "Reduction operator '%s' is inconsistent "
+                              "with previous reduction operator '%s'" % t)
+                else:
+                    pos = lhs.pos
+
+                parallel_node.assignments[lhs.entry] = (pos, inplace_op)
+                parallel_node.assigned_nodes.append(lhs)
+
+        elif isinstance(lhs, ExprNodes.SequenceNode):
             for i, arg in enumerate(lhs.args):
                 if not rhs or arg.is_starred:
                     item_node = None
                 else:
                     item_node = rhs.inferable_item_node(i)
                 self.mark_assignment(arg, item_node)
-        else: 
-            # Could use this info to infer cdef class attributes... 
-            pass 
- 
-    def visit_WithTargetAssignmentStatNode(self, node): 
-        self.mark_assignment(node.lhs, node.with_node.enter_call) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        self.mark_assignment(node.lhs, node.rhs) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CascadedAssignmentNode(self, node): 
-        for lhs in node.lhs_list: 
-            self.mark_assignment(lhs, node.rhs) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_InPlaceAssignmentNode(self, node): 
-        self.mark_assignment(node.lhs, node.create_binop_node(), node.operator) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_ForInStatNode(self, node): 
-        # TODO: Remove redundancy with range optimization... 
-        is_special = False 
-        sequence = node.iterator.sequence 
-        target = node.target 
-        if isinstance(sequence, ExprNodes.SimpleCallNode): 
-            function = sequence.function 
-            if sequence.self is None and function.is_name: 
-                entry = self.current_env().lookup(function.name) 
-                if not entry or entry.is_builtin: 
-                    if function.name == 'reversed' and len(sequence.args) == 1: 
-                        sequence = sequence.args[0] 
-                    elif function.name == 'enumerate' and len(sequence.args) == 1: 
-                        if target.is_sequence_constructor and len(target.args) == 2: 
-                            iterator = sequence.args[0] 
-                            if iterator.is_name: 
-                                iterator_type = iterator.infer_type(self.current_env()) 
-                                if iterator_type.is_builtin_type: 
-                                    # assume that builtin types have a length within Py_ssize_t 
-                                    self.mark_assignment( 
-                                        target.args[0], 
-                                        ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX', 
-                                                          type=PyrexTypes.c_py_ssize_t_type)) 
-                                    target = target.args[1] 
-                                    sequence = sequence.args[0] 
-        if isinstance(sequence, ExprNodes.SimpleCallNode): 
-            function = sequence.function 
-            if sequence.self is None and function.is_name: 
-                entry = self.current_env().lookup(function.name) 
-                if not entry or entry.is_builtin: 
-                    if function.name in ('range', 'xrange'): 
-                        is_special = True 
-                        for arg in sequence.args[:2]: 
-                            self.mark_assignment(target, arg) 
-                        if len(sequence.args) > 2: 
-                            self.mark_assignment( 
-                                target, 
-                                ExprNodes.binop_node(node.pos, 
-                                                     '+', 
-                                                     sequence.args[0], 
-                                                     sequence.args[2])) 
- 
-        if not is_special: 
-            # A for-loop basically translates to subsequent calls to 
-            # __getitem__(), so using an IndexNode here allows us to 
-            # naturally infer the base type of pointers, C arrays, 
-            # Python strings, etc., while correctly falling back to an 
-            # object type when the base type cannot be handled. 
-            self.mark_assignment(target, ExprNodes.IndexNode( 
-                node.pos, 
-                base=sequence, 
-                index=ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX', 
-                                        type=PyrexTypes.c_py_ssize_t_type))) 
- 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_ForFromStatNode(self, node): 
-        self.mark_assignment(node.target, node.bound1) 
-        if node.step is not None: 
-            self.mark_assignment(node.target, 
-                    ExprNodes.binop_node(node.pos, 
-                                         '+', 
-                                         node.bound1, 
-                                         node.step)) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_WhileStatNode(self, node): 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_ExceptClauseNode(self, node): 
-        if node.target is not None: 
-            self.mark_assignment(node.target, object_expr) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_FromCImportStatNode(self, node): 
-        pass # Can't be assigned to... 
- 
-    def visit_FromImportStatNode(self, node): 
-        for name, target in node.items: 
-            if name != "*": 
-                self.mark_assignment(target, object_expr) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_DefNode(self, node): 
-        # use fake expressions with the right result type 
-        if node.star_arg: 
-            self.mark_assignment( 
+        else:
+            # Could use this info to infer cdef class attributes...
+            pass
+
+    def visit_WithTargetAssignmentStatNode(self, node):
+        self.mark_assignment(node.lhs, node.with_node.enter_call)
+        self.visitchildren(node)
+        return node
+
+    def visit_SingleAssignmentNode(self, node):
+        self.mark_assignment(node.lhs, node.rhs)
+        self.visitchildren(node)
+        return node
+
+    def visit_CascadedAssignmentNode(self, node):
+        for lhs in node.lhs_list:
+            self.mark_assignment(lhs, node.rhs)
+        self.visitchildren(node)
+        return node
+
+    def visit_InPlaceAssignmentNode(self, node):
+        self.mark_assignment(node.lhs, node.create_binop_node(), node.operator)
+        self.visitchildren(node)
+        return node
+
+    def visit_ForInStatNode(self, node):
+        # TODO: Remove redundancy with range optimization...
+        is_special = False
+        sequence = node.iterator.sequence
+        target = node.target
+        if isinstance(sequence, ExprNodes.SimpleCallNode):
+            function = sequence.function
+            if sequence.self is None and function.is_name:
+                entry = self.current_env().lookup(function.name)
+                if not entry or entry.is_builtin:
+                    if function.name == 'reversed' and len(sequence.args) == 1:
+                        sequence = sequence.args[0]
+                    elif function.name == 'enumerate' and len(sequence.args) == 1:
+                        if target.is_sequence_constructor and len(target.args) == 2:
+                            iterator = sequence.args[0]
+                            if iterator.is_name:
+                                iterator_type = iterator.infer_type(self.current_env())
+                                if iterator_type.is_builtin_type:
+                                    # assume that builtin types have a length within Py_ssize_t
+                                    self.mark_assignment(
+                                        target.args[0],
+                                        ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX',
+                                                          type=PyrexTypes.c_py_ssize_t_type))
+                                    target = target.args[1]
+                                    sequence = sequence.args[0]
+        if isinstance(sequence, ExprNodes.SimpleCallNode):
+            function = sequence.function
+            if sequence.self is None and function.is_name:
+                entry = self.current_env().lookup(function.name)
+                if not entry or entry.is_builtin:
+                    if function.name in ('range', 'xrange'):
+                        is_special = True
+                        for arg in sequence.args[:2]:
+                            self.mark_assignment(target, arg)
+                        if len(sequence.args) > 2:
+                            self.mark_assignment(
+                                target,
+                                ExprNodes.binop_node(node.pos,
+                                                     '+',
+                                                     sequence.args[0],
+                                                     sequence.args[2]))
+
+        if not is_special:
+            # A for-loop basically translates to subsequent calls to
+            # __getitem__(), so using an IndexNode here allows us to
+            # naturally infer the base type of pointers, C arrays,
+            # Python strings, etc., while correctly falling back to an
+            # object type when the base type cannot be handled.
+            self.mark_assignment(target, ExprNodes.IndexNode(
+                node.pos,
+                base=sequence,
+                index=ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX',
+                                        type=PyrexTypes.c_py_ssize_t_type)))
+
+        self.visitchildren(node)
+        return node
+
+    def visit_ForFromStatNode(self, node):
+        self.mark_assignment(node.target, node.bound1)
+        if node.step is not None:
+            self.mark_assignment(node.target,
+                    ExprNodes.binop_node(node.pos,
+                                         '+',
+                                         node.bound1,
+                                         node.step))
+        self.visitchildren(node)
+        return node
+
+    def visit_WhileStatNode(self, node):
+        self.visitchildren(node)
+        return node
+
+    def visit_ExceptClauseNode(self, node):
+        if node.target is not None:
+            self.mark_assignment(node.target, object_expr)
+        self.visitchildren(node)
+        return node
+
+    def visit_FromCImportStatNode(self, node):
+        pass # Can't be assigned to...
+
+    def visit_FromImportStatNode(self, node):
+        for name, target in node.items:
+            if name != "*":
+                self.mark_assignment(target, object_expr)
+        self.visitchildren(node)
+        return node
+
+    def visit_DefNode(self, node):
+        # use fake expressions with the right result type
+        if node.star_arg:
+            self.mark_assignment(
                 node.star_arg, TypedExprNode(Builtin.tuple_type, node.pos))
-        if node.starstar_arg: 
-            self.mark_assignment( 
+        if node.starstar_arg:
+            self.mark_assignment(
                 node.starstar_arg, TypedExprNode(Builtin.dict_type, node.pos))
-        EnvTransform.visit_FuncDefNode(self, node) 
-        return node 
- 
-    def visit_DelStatNode(self, node): 
-        for arg in node.args: 
-            self.mark_assignment(arg, arg) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_ParallelStatNode(self, node): 
-        if self.parallel_block_stack: 
-            node.parent = self.parallel_block_stack[-1] 
-        else: 
-            node.parent = None 
- 
-        nested = False 
-        if node.is_prange: 
-            if not node.parent: 
-                node.is_parallel = True 
-            else: 
-                node.is_parallel = (node.parent.is_prange or not 
-                                    node.parent.is_parallel) 
-                nested = node.parent.is_prange 
-        else: 
-            node.is_parallel = True 
-            # Note: nested with parallel() blocks are handled by 
-            # ParallelRangeTransform! 
-            # nested = node.parent 
-            nested = node.parent and node.parent.is_prange 
- 
-        self.parallel_block_stack.append(node) 
- 
-        nested = nested or len(self.parallel_block_stack) > 2 
-        if not self.parallel_errors and nested and not node.is_prange: 
-            error(node.pos, "Only prange() may be nested") 
-            self.parallel_errors = True 
- 
-        if node.is_prange: 
-            child_attrs = node.child_attrs 
-            node.child_attrs = ['body', 'target', 'args'] 
-            self.visitchildren(node) 
-            node.child_attrs = child_attrs 
- 
-            self.parallel_block_stack.pop() 
-            if node.else_clause: 
-                node.else_clause = self.visit(node.else_clause) 
-        else: 
-            self.visitchildren(node) 
-            self.parallel_block_stack.pop() 
- 
-        self.parallel_errors = False 
-        return node 
- 
-    def visit_YieldExprNode(self, node): 
-        if self.parallel_block_stack: 
+        EnvTransform.visit_FuncDefNode(self, node)
+        return node
+
+    def visit_DelStatNode(self, node):
+        for arg in node.args:
+            self.mark_assignment(arg, arg)
+        self.visitchildren(node)
+        return node
+
+    def visit_ParallelStatNode(self, node):
+        if self.parallel_block_stack:
+            node.parent = self.parallel_block_stack[-1]
+        else:
+            node.parent = None
+
+        nested = False
+        if node.is_prange:
+            if not node.parent:
+                node.is_parallel = True
+            else:
+                node.is_parallel = (node.parent.is_prange or not
+                                    node.parent.is_parallel)
+                nested = node.parent.is_prange
+        else:
+            node.is_parallel = True
+            # Note: nested with parallel() blocks are handled by
+            # ParallelRangeTransform!
+            # nested = node.parent
+            nested = node.parent and node.parent.is_prange
+
+        self.parallel_block_stack.append(node)
+
+        nested = nested or len(self.parallel_block_stack) > 2
+        if not self.parallel_errors and nested and not node.is_prange:
+            error(node.pos, "Only prange() may be nested")
+            self.parallel_errors = True
+
+        if node.is_prange:
+            child_attrs = node.child_attrs
+            node.child_attrs = ['body', 'target', 'args']
+            self.visitchildren(node)
+            node.child_attrs = child_attrs
+
+            self.parallel_block_stack.pop()
+            if node.else_clause:
+                node.else_clause = self.visit(node.else_clause)
+        else:
+            self.visitchildren(node)
+            self.parallel_block_stack.pop()
+
+        self.parallel_errors = False
+        return node
+
+    def visit_YieldExprNode(self, node):
+        if self.parallel_block_stack:
             error(node.pos, "'%s' not allowed in parallel sections" % node.expr_keyword)
-        return node 
- 
-    def visit_ReturnStatNode(self, node): 
-        node.in_parallel = bool(self.parallel_block_stack) 
-        return node 
- 
- 
-class MarkOverflowingArithmetic(CythonTransform): 
- 
-    # It may be possible to integrate this with the above for 
-    # performance improvements (though likely not worth it). 
- 
-    might_overflow = False 
- 
-    def __call__(self, root): 
-        self.env_stack = [] 
-        self.env = root.scope 
-        return super(MarkOverflowingArithmetic, self).__call__(root) 
- 
-    def visit_safe_node(self, node): 
-        self.might_overflow, saved = False, self.might_overflow 
-        self.visitchildren(node) 
-        self.might_overflow = saved 
-        return node 
- 
-    def visit_neutral_node(self, node): 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_dangerous_node(self, node): 
-        self.might_overflow, saved = True, self.might_overflow 
-        self.visitchildren(node) 
-        self.might_overflow = saved 
-        return node 
- 
-    def visit_FuncDefNode(self, node): 
-        self.env_stack.append(self.env) 
-        self.env = node.local_scope 
-        self.visit_safe_node(node) 
-        self.env = self.env_stack.pop() 
-        return node 
- 
-    def visit_NameNode(self, node): 
-        if self.might_overflow: 
-            entry = node.entry or self.env.lookup(node.name) 
-            if entry: 
-                entry.might_overflow = True 
-        return node 
- 
-    def visit_BinopNode(self, node): 
-        if node.operator in '&|^': 
-            return self.visit_neutral_node(node) 
-        else: 
-            return self.visit_dangerous_node(node) 
- 
+        return node
+
+    def visit_ReturnStatNode(self, node):
+        node.in_parallel = bool(self.parallel_block_stack)
+        return node
+
+
+class MarkOverflowingArithmetic(CythonTransform):
+
+    # It may be possible to integrate this with the above for
+    # performance improvements (though likely not worth it).
+
+    might_overflow = False
+
+    def __call__(self, root):
+        self.env_stack = []
+        self.env = root.scope
+        return super(MarkOverflowingArithmetic, self).__call__(root)
+
+    def visit_safe_node(self, node):
+        self.might_overflow, saved = False, self.might_overflow
+        self.visitchildren(node)
+        self.might_overflow = saved
+        return node
+
+    def visit_neutral_node(self, node):
+        self.visitchildren(node)
+        return node
+
+    def visit_dangerous_node(self, node):
+        self.might_overflow, saved = True, self.might_overflow
+        self.visitchildren(node)
+        self.might_overflow = saved
+        return node
+
+    def visit_FuncDefNode(self, node):
+        self.env_stack.append(self.env)
+        self.env = node.local_scope
+        self.visit_safe_node(node)
+        self.env = self.env_stack.pop()
+        return node
+
+    def visit_NameNode(self, node):
+        if self.might_overflow:
+            entry = node.entry or self.env.lookup(node.name)
+            if entry:
+                entry.might_overflow = True
+        return node
+
+    def visit_BinopNode(self, node):
+        if node.operator in '&|^':
+            return self.visit_neutral_node(node)
+        else:
+            return self.visit_dangerous_node(node)
+
     def visit_SimpleCallNode(self, node):
         if node.function.is_name and node.function.name == 'abs':
           # Overflows for minimum value of fixed size ints.
@@ -313,108 +313,108 @@ class MarkOverflowingArithmetic(CythonTransform):
         else:
           return self.visit_neutral_node(node)
 
-    visit_UnopNode = visit_neutral_node 
- 
-    visit_UnaryMinusNode = visit_dangerous_node 
- 
-    visit_InPlaceAssignmentNode = visit_dangerous_node 
- 
-    visit_Node = visit_safe_node 
- 
-    def visit_assignment(self, lhs, rhs): 
-        if (isinstance(rhs, ExprNodes.IntNode) 
-                and isinstance(lhs, ExprNodes.NameNode) 
-                and Utils.long_literal(rhs.value)): 
-            entry = lhs.entry or self.env.lookup(lhs.name) 
-            if entry: 
-                entry.might_overflow = True 
- 
-    def visit_SingleAssignmentNode(self, node): 
-        self.visit_assignment(node.lhs, node.rhs) 
-        self.visitchildren(node) 
-        return node 
- 
-    def visit_CascadedAssignmentNode(self, node): 
-        for lhs in node.lhs_list: 
-            self.visit_assignment(lhs, node.rhs) 
-        self.visitchildren(node) 
-        return node 
- 
-class PyObjectTypeInferer(object): 
-    """ 
-    If it's not declared, it's a PyObject. 
-    """ 
-    def infer_types(self, scope): 
-        """ 
-        Given a dict of entries, map all unspecified types to a specified type. 
-        """ 
-        for name, entry in scope.entries.items(): 
-            if entry.type is unspecified_type: 
-                entry.type = py_object_type 
- 
-class SimpleAssignmentTypeInferer(object): 
-    """ 
-    Very basic type inference. 
- 
-    Note: in order to support cross-closure type inference, this must be 
-    applies to nested scopes in top-down order. 
-    """ 
-    def set_entry_type(self, entry, entry_type): 
-        entry.type = entry_type 
-        for e in entry.all_entries(): 
-            e.type = entry_type 
- 
-    def infer_types(self, scope): 
-        enabled = scope.directives['infer_types'] 
-        verbose = scope.directives['infer_types.verbose'] 
- 
-        if enabled == True: 
-            spanning_type = aggressive_spanning_type 
-        elif enabled is None: # safe mode 
-            spanning_type = safe_spanning_type 
-        else: 
-            for entry in scope.entries.values(): 
-                if entry.type is unspecified_type: 
-                    self.set_entry_type(entry, py_object_type) 
-            return 
- 
+    visit_UnopNode = visit_neutral_node
+
+    visit_UnaryMinusNode = visit_dangerous_node
+
+    visit_InPlaceAssignmentNode = visit_dangerous_node
+
+    visit_Node = visit_safe_node
+
+    def visit_assignment(self, lhs, rhs):
+        if (isinstance(rhs, ExprNodes.IntNode)
+                and isinstance(lhs, ExprNodes.NameNode)
+                and Utils.long_literal(rhs.value)):
+            entry = lhs.entry or self.env.lookup(lhs.name)
+            if entry:
+                entry.might_overflow = True
+
+    def visit_SingleAssignmentNode(self, node):
+        self.visit_assignment(node.lhs, node.rhs)
+        self.visitchildren(node)
+        return node
+
+    def visit_CascadedAssignmentNode(self, node):
+        for lhs in node.lhs_list:
+            self.visit_assignment(lhs, node.rhs)
+        self.visitchildren(node)
+        return node
+
+class PyObjectTypeInferer(object):
+    """
+    If it's not declared, it's a PyObject.
+    """
+    def infer_types(self, scope):
+        """
+        Given a dict of entries, map all unspecified types to a specified type.
+        """
+        for name, entry in scope.entries.items():
+            if entry.type is unspecified_type:
+                entry.type = py_object_type
+
+class SimpleAssignmentTypeInferer(object):
+    """
+    Very basic type inference.
+
+    Note: in order to support cross-closure type inference, this must be
+    applies to nested scopes in top-down order.
+    """
+    def set_entry_type(self, entry, entry_type):
+        entry.type = entry_type
+        for e in entry.all_entries():
+            e.type = entry_type
+
+    def infer_types(self, scope):
+        enabled = scope.directives['infer_types']
+        verbose = scope.directives['infer_types.verbose']
+
+        if enabled == True:
+            spanning_type = aggressive_spanning_type
+        elif enabled is None: # safe mode
+            spanning_type = safe_spanning_type
+        else:
+            for entry in scope.entries.values():
+                if entry.type is unspecified_type:
+                    self.set_entry_type(entry, py_object_type)
+            return
+
         # Set of assignments
-        assignments = set() 
-        assmts_resolved = set() 
-        dependencies = {} 
-        assmt_to_names = {} 
- 
-        for name, entry in scope.entries.items(): 
-            for assmt in entry.cf_assignments: 
-                names = assmt.type_dependencies() 
-                assmt_to_names[assmt] = names 
-                assmts = set() 
-                for node in names: 
-                    assmts.update(node.cf_state) 
-                dependencies[assmt] = assmts 
-            if entry.type is unspecified_type: 
-                assignments.update(entry.cf_assignments) 
-            else: 
-                assmts_resolved.update(entry.cf_assignments) 
- 
-        def infer_name_node_type(node): 
-            types = [assmt.inferred_type for assmt in node.cf_state] 
-            if not types: 
-                node_type = py_object_type 
-            else: 
-                entry = node.entry 
-                node_type = spanning_type( 
+        assignments = set()
+        assmts_resolved = set()
+        dependencies = {}
+        assmt_to_names = {}
+
+        for name, entry in scope.entries.items():
+            for assmt in entry.cf_assignments:
+                names = assmt.type_dependencies()
+                assmt_to_names[assmt] = names
+                assmts = set()
+                for node in names:
+                    assmts.update(node.cf_state)
+                dependencies[assmt] = assmts
+            if entry.type is unspecified_type:
+                assignments.update(entry.cf_assignments)
+            else:
+                assmts_resolved.update(entry.cf_assignments)
+
+        def infer_name_node_type(node):
+            types = [assmt.inferred_type for assmt in node.cf_state]
+            if not types:
+                node_type = py_object_type
+            else:
+                entry = node.entry
+                node_type = spanning_type(
                     types, entry.might_overflow, entry.pos, scope)
-            node.inferred_type = node_type 
- 
-        def infer_name_node_type_partial(node): 
-            types = [assmt.inferred_type for assmt in node.cf_state 
-                     if assmt.inferred_type is not None] 
-            if not types: 
-                return 
-            entry = node.entry 
+            node.inferred_type = node_type
+
+        def infer_name_node_type_partial(node):
+            types = [assmt.inferred_type for assmt in node.cf_state
+                     if assmt.inferred_type is not None]
+            if not types:
+                return
+            entry = node.entry
             return spanning_type(types, entry.might_overflow, entry.pos, scope)
- 
+
         def inferred_types(entry):
             has_none = False
             has_pyobjects = False
@@ -433,159 +433,159 @@ class SimpleAssignmentTypeInferer(object):
                 types.append(py_object_type)
             return types
 
-        def resolve_assignments(assignments): 
-            resolved = set() 
-            for assmt in assignments: 
-                deps = dependencies[assmt] 
-                # All assignments are resolved 
-                if assmts_resolved.issuperset(deps): 
-                    for node in assmt_to_names[assmt]: 
-                        infer_name_node_type(node) 
-                    # Resolve assmt 
-                    inferred_type = assmt.infer_type() 
-                    assmts_resolved.add(assmt) 
-                    resolved.add(assmt) 
-            assignments.difference_update(resolved) 
-            return resolved 
- 
-        def partial_infer(assmt): 
-            partial_types = [] 
-            for node in assmt_to_names[assmt]: 
-                partial_type = infer_name_node_type_partial(node) 
-                if partial_type is None: 
-                    return False 
-                partial_types.append((node, partial_type)) 
-            for node, partial_type in partial_types: 
-                node.inferred_type = partial_type 
-            assmt.infer_type() 
-            return True 
- 
-        partial_assmts = set() 
-        def resolve_partial(assignments): 
-            # try to handle circular references 
-            partials = set() 
-            for assmt in assignments: 
-                if assmt in partial_assmts: 
-                    continue 
-                if partial_infer(assmt): 
-                    partials.add(assmt) 
-                    assmts_resolved.add(assmt) 
-            partial_assmts.update(partials) 
-            return partials 
- 
-        # Infer assignments 
-        while True: 
-            if not resolve_assignments(assignments): 
-                if not resolve_partial(assignments): 
-                    break 
-        inferred = set() 
-        # First pass 
-        for entry in scope.entries.values(): 
-            if entry.type is not unspecified_type: 
-                continue 
-            entry_type = py_object_type 
-            if assmts_resolved.issuperset(entry.cf_assignments): 
+        def resolve_assignments(assignments):
+            resolved = set()
+            for assmt in assignments:
+                deps = dependencies[assmt]
+                # All assignments are resolved
+                if assmts_resolved.issuperset(deps):
+                    for node in assmt_to_names[assmt]:
+                        infer_name_node_type(node)
+                    # Resolve assmt
+                    inferred_type = assmt.infer_type()
+                    assmts_resolved.add(assmt)
+                    resolved.add(assmt)
+            assignments.difference_update(resolved)
+            return resolved
+
+        def partial_infer(assmt):
+            partial_types = []
+            for node in assmt_to_names[assmt]:
+                partial_type = infer_name_node_type_partial(node)
+                if partial_type is None:
+                    return False
+                partial_types.append((node, partial_type))
+            for node, partial_type in partial_types:
+                node.inferred_type = partial_type
+            assmt.infer_type()
+            return True
+
+        partial_assmts = set()
+        def resolve_partial(assignments):
+            # try to handle circular references
+            partials = set()
+            for assmt in assignments:
+                if assmt in partial_assmts:
+                    continue
+                if partial_infer(assmt):
+                    partials.add(assmt)
+                    assmts_resolved.add(assmt)
+            partial_assmts.update(partials)
+            return partials
+
+        # Infer assignments
+        while True:
+            if not resolve_assignments(assignments):
+                if not resolve_partial(assignments):
+                    break
+        inferred = set()
+        # First pass
+        for entry in scope.entries.values():
+            if entry.type is not unspecified_type:
+                continue
+            entry_type = py_object_type
+            if assmts_resolved.issuperset(entry.cf_assignments):
                 types = inferred_types(entry)
-                if types and all(types): 
-                    entry_type = spanning_type( 
+                if types and all(types):
+                    entry_type = spanning_type(
                         types, entry.might_overflow, entry.pos, scope)
-                    inferred.add(entry) 
-            self.set_entry_type(entry, entry_type) 
- 
-        def reinfer(): 
-            dirty = False 
-            for entry in inferred: 
+                    inferred.add(entry)
+            self.set_entry_type(entry, entry_type)
+
+        def reinfer():
+            dirty = False
+            for entry in inferred:
                 for assmt in entry.cf_assignments:
                     assmt.infer_type()
                 types = inferred_types(entry)
                 new_type = spanning_type(types, entry.might_overflow, entry.pos, scope)
-                if new_type != entry.type: 
-                    self.set_entry_type(entry, new_type) 
-                    dirty = True 
-            return dirty 
- 
-        # types propagation 
-        while reinfer(): 
-            pass 
- 
-        if verbose: 
-            for entry in inferred: 
-                message(entry.pos, "inferred '%s' to be of type '%s'" % ( 
-                    entry.name, entry.type)) 
- 
- 
-def find_spanning_type(type1, type2): 
-    if type1 is type2: 
-        result_type = type1 
-    elif type1 is PyrexTypes.c_bint_type or type2 is PyrexTypes.c_bint_type: 
-        # type inference can break the coercion back to a Python bool 
-        # if it returns an arbitrary int type here 
-        return py_object_type 
-    else: 
-        result_type = PyrexTypes.spanning_type(type1, type2) 
-    if result_type in (PyrexTypes.c_double_type, PyrexTypes.c_float_type, 
-                       Builtin.float_type): 
-        # Python's float type is just a C double, so it's safe to 
-        # use the C type instead 
-        return PyrexTypes.c_double_type 
-    return result_type 
- 
+                if new_type != entry.type:
+                    self.set_entry_type(entry, new_type)
+                    dirty = True
+            return dirty
+
+        # types propagation
+        while reinfer():
+            pass
+
+        if verbose:
+            for entry in inferred:
+                message(entry.pos, "inferred '%s' to be of type '%s'" % (
+                    entry.name, entry.type))
+
+
+def find_spanning_type(type1, type2):
+    if type1 is type2:
+        result_type = type1
+    elif type1 is PyrexTypes.c_bint_type or type2 is PyrexTypes.c_bint_type:
+        # type inference can break the coercion back to a Python bool
+        # if it returns an arbitrary int type here
+        return py_object_type
+    else:
+        result_type = PyrexTypes.spanning_type(type1, type2)
+    if result_type in (PyrexTypes.c_double_type, PyrexTypes.c_float_type,
+                       Builtin.float_type):
+        # Python's float type is just a C double, so it's safe to
+        # use the C type instead
+        return PyrexTypes.c_double_type
+    return result_type
+
 def simply_type(result_type, pos):
-    if result_type.is_reference: 
-        result_type = result_type.ref_base_type 
-    if result_type.is_const: 
-        result_type = result_type.const_base_type 
-    if result_type.is_cpp_class: 
-        result_type.check_nullary_constructor(pos) 
+    if result_type.is_reference:
+        result_type = result_type.ref_base_type
+    if result_type.is_const:
+        result_type = result_type.const_base_type
+    if result_type.is_cpp_class:
+        result_type.check_nullary_constructor(pos)
     if result_type.is_array:
         result_type = PyrexTypes.c_ptr_type(result_type.base_type)
-    return result_type 
- 
+    return result_type
+
 def aggressive_spanning_type(types, might_overflow, pos, scope):
     return simply_type(reduce(find_spanning_type, types), pos)
 
 def safe_spanning_type(types, might_overflow, pos, scope):
     result_type = simply_type(reduce(find_spanning_type, types), pos)
-    if result_type.is_pyobject: 
-        # In theory, any specific Python type is always safe to 
-        # infer. However, inferring str can cause some existing code 
-        # to break, since we are also now much more strict about 
-        # coercion from str to char *. See trac #553. 
-        if result_type.name == 'str': 
-            return py_object_type 
-        else: 
-            return result_type 
-    elif result_type is PyrexTypes.c_double_type: 
-        # Python's float type is just a C double, so it's safe to use 
-        # the C type instead 
-        return result_type 
-    elif result_type is PyrexTypes.c_bint_type: 
-        # find_spanning_type() only returns 'bint' for clean boolean 
-        # operations without other int types, so this is safe, too 
-        return result_type 
+    if result_type.is_pyobject:
+        # In theory, any specific Python type is always safe to
+        # infer. However, inferring str can cause some existing code
+        # to break, since we are also now much more strict about
+        # coercion from str to char *. See trac #553.
+        if result_type.name == 'str':
+            return py_object_type
+        else:
+            return result_type
+    elif result_type is PyrexTypes.c_double_type:
+        # Python's float type is just a C double, so it's safe to use
+        # the C type instead
+        return result_type
+    elif result_type is PyrexTypes.c_bint_type:
+        # find_spanning_type() only returns 'bint' for clean boolean
+        # operations without other int types, so this is safe, too
+        return result_type
     elif result_type.is_pythran_expr:
         return result_type
-    elif result_type.is_ptr: 
-        # Any pointer except (signed|unsigned|) char* can't implicitly 
-        # become a PyObject, and inferring char* is now accepted, too. 
-        return result_type 
-    elif result_type.is_cpp_class: 
-        # These can't implicitly become Python objects either. 
-        return result_type 
-    elif result_type.is_struct: 
-        # Though we have struct -> object for some structs, this is uncommonly 
-        # used, won't arise in pure Python, and there shouldn't be side 
-        # effects, so I'm declaring this safe. 
-        return result_type 
-    # TODO: double complex should be OK as well, but we need 
-    # to make sure everything is supported. 
-    elif (result_type.is_int or result_type.is_enum) and not might_overflow: 
-        return result_type 
+    elif result_type.is_ptr:
+        # Any pointer except (signed|unsigned|) char* can't implicitly
+        # become a PyObject, and inferring char* is now accepted, too.
+        return result_type
+    elif result_type.is_cpp_class:
+        # These can't implicitly become Python objects either.
+        return result_type
+    elif result_type.is_struct:
+        # Though we have struct -> object for some structs, this is uncommonly
+        # used, won't arise in pure Python, and there shouldn't be side
+        # effects, so I'm declaring this safe.
+        return result_type
+    # TODO: double complex should be OK as well, but we need
+    # to make sure everything is supported.
+    elif (result_type.is_int or result_type.is_enum) and not might_overflow:
+        return result_type
     elif (not result_type.can_coerce_to_pyobject(scope)
             and not result_type.is_error):
         return result_type
-    return py_object_type 
- 
- 
-def get_type_inferer(): 
-    return SimpleAssignmentTypeInferer() 
+    return py_object_type
+
+
+def get_type_inferer():
+    return SimpleAssignmentTypeInferer()
diff --git a/contrib/tools/cython/Cython/Compiler/TypeSlots.py b/contrib/tools/cython/Cython/Compiler/TypeSlots.py
index 561e7f1ed3..0b4ff67042 100644
--- a/contrib/tools/cython/Cython/Compiler/TypeSlots.py
+++ b/contrib/tools/cython/Cython/Compiler/TypeSlots.py
@@ -1,410 +1,410 @@
-# 
-#   Tables describing slots in the CPython type object 
-#   and associated know-how. 
-# 
- 
-from __future__ import absolute_import 
- 
-from . import Naming 
-from . import PyrexTypes 
+#
+#   Tables describing slots in the CPython type object
+#   and associated know-how.
+#
+
+from __future__ import absolute_import
+
+from . import Naming
+from . import PyrexTypes
 from .Errors import error
- 
-invisible = ['__cinit__', '__dealloc__', '__richcmp__', 
-             '__nonzero__', '__bool__'] 
- 
+
+invisible = ['__cinit__', '__dealloc__', '__richcmp__',
+             '__nonzero__', '__bool__']
+
 richcmp_special_methods = ['__eq__', '__ne__', '__lt__', '__gt__', '__le__', '__ge__']
- 
-
-class Signature(object): 
-    #  Method slot signature descriptor. 
-    # 
-    #  has_dummy_arg      boolean 
-    #  has_generic_args   boolean 
-    #  fixed_arg_format   string 
-    #  ret_format         string 
-    #  error_value        string 
-    # 
-    #  The formats are strings made up of the following 
-    #  characters: 
-    # 
-    #    'O'  Python object 
-    #    'T'  Python object of the type of 'self' 
-    #    'v'  void 
-    #    'p'  void * 
-    #    'P'  void ** 
-    #    'i'  int 
-    #    'b'  bint 
-    #    'I'  int * 
-    #    'l'  long 
-    #    'f'  float 
-    #    'd'  double 
-    #    'h'  Py_hash_t 
-    #    'z'  Py_ssize_t 
-    #    'Z'  Py_ssize_t * 
-    #    's'  char * 
-    #    'S'  char ** 
-    #    'r'  int used only to signal exception 
-    #    'B'  Py_buffer * 
-    #    '-'  dummy 'self' argument (not used) 
-    #    '*'  rest of args passed as generic Python 
-    #           arg tuple and kw dict (must be last 
-    #           char in format string) 
- 
-    format_map = { 
-        'O': PyrexTypes.py_object_type, 
-        'v': PyrexTypes.c_void_type, 
-        'p': PyrexTypes.c_void_ptr_type, 
-        'P': PyrexTypes.c_void_ptr_ptr_type, 
-        'i': PyrexTypes.c_int_type, 
-        'b': PyrexTypes.c_bint_type, 
-        'I': PyrexTypes.c_int_ptr_type, 
-        'l': PyrexTypes.c_long_type, 
-        'f': PyrexTypes.c_float_type, 
-        'd': PyrexTypes.c_double_type, 
-        'h': PyrexTypes.c_py_hash_t_type, 
-        'z': PyrexTypes.c_py_ssize_t_type, 
-        'Z': PyrexTypes.c_py_ssize_t_ptr_type, 
-        's': PyrexTypes.c_char_ptr_type, 
-        'S': PyrexTypes.c_char_ptr_ptr_type, 
-        'r': PyrexTypes.c_returncode_type, 
-        'B': PyrexTypes.c_py_buffer_ptr_type, 
-        # 'T', '-' and '*' are handled otherwise 
-        # and are not looked up in here 
-    } 
- 
-    type_to_format_map = dict( 
+
+
+class Signature(object):
+    #  Method slot signature descriptor.
+    #
+    #  has_dummy_arg      boolean
+    #  has_generic_args   boolean
+    #  fixed_arg_format   string
+    #  ret_format         string
+    #  error_value        string
+    #
+    #  The formats are strings made up of the following
+    #  characters:
+    #
+    #    'O'  Python object
+    #    'T'  Python object of the type of 'self'
+    #    'v'  void
+    #    'p'  void *
+    #    'P'  void **
+    #    'i'  int
+    #    'b'  bint
+    #    'I'  int *
+    #    'l'  long
+    #    'f'  float
+    #    'd'  double
+    #    'h'  Py_hash_t
+    #    'z'  Py_ssize_t
+    #    'Z'  Py_ssize_t *
+    #    's'  char *
+    #    'S'  char **
+    #    'r'  int used only to signal exception
+    #    'B'  Py_buffer *
+    #    '-'  dummy 'self' argument (not used)
+    #    '*'  rest of args passed as generic Python
+    #           arg tuple and kw dict (must be last
+    #           char in format string)
+
+    format_map = {
+        'O': PyrexTypes.py_object_type,
+        'v': PyrexTypes.c_void_type,
+        'p': PyrexTypes.c_void_ptr_type,
+        'P': PyrexTypes.c_void_ptr_ptr_type,
+        'i': PyrexTypes.c_int_type,
+        'b': PyrexTypes.c_bint_type,
+        'I': PyrexTypes.c_int_ptr_type,
+        'l': PyrexTypes.c_long_type,
+        'f': PyrexTypes.c_float_type,
+        'd': PyrexTypes.c_double_type,
+        'h': PyrexTypes.c_py_hash_t_type,
+        'z': PyrexTypes.c_py_ssize_t_type,
+        'Z': PyrexTypes.c_py_ssize_t_ptr_type,
+        's': PyrexTypes.c_char_ptr_type,
+        'S': PyrexTypes.c_char_ptr_ptr_type,
+        'r': PyrexTypes.c_returncode_type,
+        'B': PyrexTypes.c_py_buffer_ptr_type,
+        # 'T', '-' and '*' are handled otherwise
+        # and are not looked up in here
+    }
+
+    type_to_format_map = dict(
         (type_, format_) for format_, type_ in format_map.items())
- 
-    error_value_map = { 
-        'O': "NULL", 
-        'T': "NULL", 
-        'i': "-1", 
-        'b': "-1", 
-        'l': "-1", 
-        'r': "-1", 
-        'h': "-1", 
-        'z': "-1", 
-    } 
- 
-    def __init__(self, arg_format, ret_format): 
-        self.has_dummy_arg = 0 
-        self.has_generic_args = 0 
-        if arg_format[:1] == '-': 
-            self.has_dummy_arg = 1 
-            arg_format = arg_format[1:] 
-        if arg_format[-1:] == '*': 
-            self.has_generic_args = 1 
-            arg_format = arg_format[:-1] 
-        self.fixed_arg_format = arg_format 
-        self.ret_format = ret_format 
-        self.error_value = self.error_value_map.get(ret_format, None) 
-        self.exception_check = ret_format != 'r' and self.error_value is not None 
-        self.is_staticmethod = False 
- 
+
+    error_value_map = {
+        'O': "NULL",
+        'T': "NULL",
+        'i': "-1",
+        'b': "-1",
+        'l': "-1",
+        'r': "-1",
+        'h': "-1",
+        'z': "-1",
+    }
+
+    def __init__(self, arg_format, ret_format):
+        self.has_dummy_arg = 0
+        self.has_generic_args = 0
+        if arg_format[:1] == '-':
+            self.has_dummy_arg = 1
+            arg_format = arg_format[1:]
+        if arg_format[-1:] == '*':
+            self.has_generic_args = 1
+            arg_format = arg_format[:-1]
+        self.fixed_arg_format = arg_format
+        self.ret_format = ret_format
+        self.error_value = self.error_value_map.get(ret_format, None)
+        self.exception_check = ret_format != 'r' and self.error_value is not None
+        self.is_staticmethod = False
+
     def __repr__(self):
         return '<Signature[%s(%s%s)]>' % (
             self.ret_format,
             ', '.join(self.fixed_arg_format),
             '*' if self.has_generic_args else '')
 
-    def num_fixed_args(self): 
-        return len(self.fixed_arg_format) 
- 
-    def is_self_arg(self, i): 
-        # argument is 'self' for methods or 'class' for classmethods 
-        return self.fixed_arg_format[i] == 'T' 
- 
-    def returns_self_type(self): 
-        # return type is same as 'self' argument type 
-        return self.ret_format == 'T' 
- 
-    def fixed_arg_type(self, i): 
-        return self.format_map[self.fixed_arg_format[i]] 
- 
-    def return_type(self): 
-        return self.format_map[self.ret_format] 
- 
-    def format_from_type(self, arg_type): 
-        if arg_type.is_pyobject: 
-            arg_type = PyrexTypes.py_object_type 
-        return self.type_to_format_map[arg_type] 
- 
-    def exception_value(self): 
-        return self.error_value_map.get(self.ret_format) 
- 
-    def function_type(self, self_arg_override=None): 
-        #  Construct a C function type descriptor for this signature 
-        args = [] 
+    def num_fixed_args(self):
+        return len(self.fixed_arg_format)
+
+    def is_self_arg(self, i):
+        # argument is 'self' for methods or 'class' for classmethods
+        return self.fixed_arg_format[i] == 'T'
+
+    def returns_self_type(self):
+        # return type is same as 'self' argument type
+        return self.ret_format == 'T'
+
+    def fixed_arg_type(self, i):
+        return self.format_map[self.fixed_arg_format[i]]
+
+    def return_type(self):
+        return self.format_map[self.ret_format]
+
+    def format_from_type(self, arg_type):
+        if arg_type.is_pyobject:
+            arg_type = PyrexTypes.py_object_type
+        return self.type_to_format_map[arg_type]
+
+    def exception_value(self):
+        return self.error_value_map.get(self.ret_format)
+
+    def function_type(self, self_arg_override=None):
+        #  Construct a C function type descriptor for this signature
+        args = []
         for i in range(self.num_fixed_args()):
-            if self_arg_override is not None and self.is_self_arg(i): 
-                assert isinstance(self_arg_override, PyrexTypes.CFuncTypeArg) 
-                args.append(self_arg_override) 
-            else: 
-                arg_type = self.fixed_arg_type(i) 
-                args.append(PyrexTypes.CFuncTypeArg("", arg_type, None)) 
-        if self_arg_override is not None and self.returns_self_type(): 
-            ret_type = self_arg_override.type 
-        else: 
-            ret_type = self.return_type() 
-        exc_value = self.exception_value() 
-        return PyrexTypes.CFuncType( 
-            ret_type, args, exception_value=exc_value, 
-            exception_check=self.exception_check) 
- 
-    def method_flags(self): 
-        if self.ret_format == "O": 
-            full_args = self.fixed_arg_format 
-            if self.has_dummy_arg: 
-                full_args = "O" + full_args 
-            if full_args in ["O", "T"]: 
-                if self.has_generic_args: 
-                    return [method_varargs, method_keywords] 
-                else: 
-                    return [method_noargs] 
-            elif full_args in ["OO", "TO"] and not self.has_generic_args: 
-                return [method_onearg] 
- 
-            if self.is_staticmethod: 
-                return [method_varargs, method_keywords] 
-        return None 
- 
- 
-class SlotDescriptor(object): 
-    #  Abstract base class for type slot descriptors. 
-    # 
-    #  slot_name    string           Member name of the slot in the type object 
-    #  is_initialised_dynamically    Is initialised by code in the module init function 
-    #  is_inherited                  Is inherited by subtypes (see PyType_Ready()) 
-    #  py3                           Indicates presence of slot in Python 3 
-    #  py2                           Indicates presence of slot in Python 2 
-    #  ifdef                         Full #ifdef string that slot is wrapped in. Using this causes py3, py2 and flags to be ignored.) 
- 
-    def __init__(self, slot_name, dynamic=False, inherited=False, 
-                 py3=True, py2=True, ifdef=None): 
-        self.slot_name = slot_name 
-        self.is_initialised_dynamically = dynamic 
-        self.is_inherited = inherited 
-        self.ifdef = ifdef 
-        self.py3 = py3 
-        self.py2 = py2 
- 
-    def preprocessor_guard_code(self): 
-        ifdef = self.ifdef 
-        py2 = self.py2 
-        py3 = self.py3 
-        guard = None 
-        if ifdef: 
-            guard = ("#if %s" % ifdef) 
-        elif not py3 or py3 == '<RESERVED>': 
-            guard = ("#if PY_MAJOR_VERSION < 3") 
-        elif not py2: 
-            guard = ("#if PY_MAJOR_VERSION >= 3") 
-        return guard 
- 
-    def generate(self, scope, code): 
+            if self_arg_override is not None and self.is_self_arg(i):
+                assert isinstance(self_arg_override, PyrexTypes.CFuncTypeArg)
+                args.append(self_arg_override)
+            else:
+                arg_type = self.fixed_arg_type(i)
+                args.append(PyrexTypes.CFuncTypeArg("", arg_type, None))
+        if self_arg_override is not None and self.returns_self_type():
+            ret_type = self_arg_override.type
+        else:
+            ret_type = self.return_type()
+        exc_value = self.exception_value()
+        return PyrexTypes.CFuncType(
+            ret_type, args, exception_value=exc_value,
+            exception_check=self.exception_check)
+
+    def method_flags(self):
+        if self.ret_format == "O":
+            full_args = self.fixed_arg_format
+            if self.has_dummy_arg:
+                full_args = "O" + full_args
+            if full_args in ["O", "T"]:
+                if self.has_generic_args:
+                    return [method_varargs, method_keywords]
+                else:
+                    return [method_noargs]
+            elif full_args in ["OO", "TO"] and not self.has_generic_args:
+                return [method_onearg]
+
+            if self.is_staticmethod:
+                return [method_varargs, method_keywords]
+        return None
+
+
+class SlotDescriptor(object):
+    #  Abstract base class for type slot descriptors.
+    #
+    #  slot_name    string           Member name of the slot in the type object
+    #  is_initialised_dynamically    Is initialised by code in the module init function
+    #  is_inherited                  Is inherited by subtypes (see PyType_Ready())
+    #  py3                           Indicates presence of slot in Python 3
+    #  py2                           Indicates presence of slot in Python 2
+    #  ifdef                         Full #ifdef string that slot is wrapped in. Using this causes py3, py2 and flags to be ignored.)
+
+    def __init__(self, slot_name, dynamic=False, inherited=False,
+                 py3=True, py2=True, ifdef=None):
+        self.slot_name = slot_name
+        self.is_initialised_dynamically = dynamic
+        self.is_inherited = inherited
+        self.ifdef = ifdef
+        self.py3 = py3
+        self.py2 = py2
+
+    def preprocessor_guard_code(self):
+        ifdef = self.ifdef
+        py2 = self.py2
+        py3 = self.py3
+        guard = None
+        if ifdef:
+            guard = ("#if %s" % ifdef)
+        elif not py3 or py3 == '<RESERVED>':
+            guard = ("#if PY_MAJOR_VERSION < 3")
+        elif not py2:
+            guard = ("#if PY_MAJOR_VERSION >= 3")
+        return guard
+
+    def generate(self, scope, code):
         preprocessor_guard = self.preprocessor_guard_code()
         if preprocessor_guard:
             code.putln(preprocessor_guard)
 
-        end_pypy_guard = False 
-        if self.is_initialised_dynamically: 
-            value = "0" 
-        else: 
-            value = self.slot_code(scope) 
-            if value == "0" and self.is_inherited: 
-                # PyPy currently has a broken PyType_Ready() that fails to 
-                # inherit some slots.  To work around this, we explicitly 
-                # set inherited slots here, but only in PyPy since CPython 
-                # handles this better than we do. 
-                inherited_value = value 
-                current_scope = scope 
-                while (inherited_value == "0" 
-                       and current_scope.parent_type 
-                       and current_scope.parent_type.base_type 
-                       and current_scope.parent_type.base_type.scope): 
-                    current_scope = current_scope.parent_type.base_type.scope 
-                    inherited_value = self.slot_code(current_scope) 
-                if inherited_value != "0": 
-                    code.putln("#if CYTHON_COMPILING_IN_PYPY") 
-                    code.putln("%s, /*%s*/" % (inherited_value, self.slot_name)) 
-                    code.putln("#else") 
-                    end_pypy_guard = True 
-
-        code.putln("%s, /*%s*/" % (value, self.slot_name)) 
+        end_pypy_guard = False
+        if self.is_initialised_dynamically:
+            value = "0"
+        else:
+            value = self.slot_code(scope)
+            if value == "0" and self.is_inherited:
+                # PyPy currently has a broken PyType_Ready() that fails to
+                # inherit some slots.  To work around this, we explicitly
+                # set inherited slots here, but only in PyPy since CPython
+                # handles this better than we do.
+                inherited_value = value
+                current_scope = scope
+                while (inherited_value == "0"
+                       and current_scope.parent_type
+                       and current_scope.parent_type.base_type
+                       and current_scope.parent_type.base_type.scope):
+                    current_scope = current_scope.parent_type.base_type.scope
+                    inherited_value = self.slot_code(current_scope)
+                if inherited_value != "0":
+                    code.putln("#if CYTHON_COMPILING_IN_PYPY")
+                    code.putln("%s, /*%s*/" % (inherited_value, self.slot_name))
+                    code.putln("#else")
+                    end_pypy_guard = True
+
+        code.putln("%s, /*%s*/" % (value, self.slot_name))
 
         if end_pypy_guard:
             code.putln("#endif")
 
-        if self.py3 == '<RESERVED>': 
-            code.putln("#else") 
-            code.putln("0, /*reserved*/") 
-        if preprocessor_guard: 
-            code.putln("#endif") 
- 
-    # Some C implementations have trouble statically 
-    # initialising a global with a pointer to an extern 
-    # function, so we initialise some of the type slots 
-    # in the module init function instead. 
- 
-    def generate_dynamic_init_code(self, scope, code): 
-        if self.is_initialised_dynamically: 
-            value = self.slot_code(scope) 
-            if value != "0": 
-                code.putln("%s.%s = %s;" % ( 
-                    scope.parent_type.typeobj_cname, 
-                    self.slot_name, 
-                    value 
-                    ) 
-                ) 
- 
- 
-class FixedSlot(SlotDescriptor): 
-    #  Descriptor for a type slot with a fixed value. 
-    # 
-    #  value        string 
- 
-    def __init__(self, slot_name, value, py3=True, py2=True, ifdef=None): 
-        SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2, ifdef=ifdef) 
-        self.value = value 
- 
-    def slot_code(self, scope): 
-        return self.value 
- 
- 
-class EmptySlot(FixedSlot): 
-    #  Descriptor for a type slot whose value is always 0. 
- 
-    def __init__(self, slot_name, py3=True, py2=True, ifdef=None): 
-        FixedSlot.__init__(self, slot_name, "0", py3=py3, py2=py2, ifdef=ifdef) 
- 
- 
-class MethodSlot(SlotDescriptor): 
-    #  Type slot descriptor for a user-definable method. 
-    # 
-    #  signature    Signature 
-    #  method_name  string           The __xxx__ name of the method 
-    #  alternatives [string]         Alternative list of __xxx__ names for the method 
- 
-    def __init__(self, signature, slot_name, method_name, fallback=None, 
-                 py3=True, py2=True, ifdef=None, inherited=True): 
-        SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2, 
-                                ifdef=ifdef, inherited=inherited) 
-        self.signature = signature 
-        self.slot_name = slot_name 
-        self.method_name = method_name 
-        self.alternatives = [] 
-        method_name_to_slot[method_name] = self 
-        # 
-        if fallback: 
-            self.alternatives.append(fallback) 
-        for alt in (self.py2, self.py3): 
-            if isinstance(alt, (tuple, list)): 
-                slot_name, method_name = alt 
-                self.alternatives.append(method_name) 
-                method_name_to_slot[method_name] = self 
- 
-    def slot_code(self, scope): 
-        entry = scope.lookup_here(self.method_name) 
+        if self.py3 == '<RESERVED>':
+            code.putln("#else")
+            code.putln("0, /*reserved*/")
+        if preprocessor_guard:
+            code.putln("#endif")
+
+    # Some C implementations have trouble statically
+    # initialising a global with a pointer to an extern
+    # function, so we initialise some of the type slots
+    # in the module init function instead.
+
+    def generate_dynamic_init_code(self, scope, code):
+        if self.is_initialised_dynamically:
+            value = self.slot_code(scope)
+            if value != "0":
+                code.putln("%s.%s = %s;" % (
+                    scope.parent_type.typeobj_cname,
+                    self.slot_name,
+                    value
+                    )
+                )
+
+
+class FixedSlot(SlotDescriptor):
+    #  Descriptor for a type slot with a fixed value.
+    #
+    #  value        string
+
+    def __init__(self, slot_name, value, py3=True, py2=True, ifdef=None):
+        SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2, ifdef=ifdef)
+        self.value = value
+
+    def slot_code(self, scope):
+        return self.value
+
+
+class EmptySlot(FixedSlot):
+    #  Descriptor for a type slot whose value is always 0.
+
+    def __init__(self, slot_name, py3=True, py2=True, ifdef=None):
+        FixedSlot.__init__(self, slot_name, "0", py3=py3, py2=py2, ifdef=ifdef)
+
+
+class MethodSlot(SlotDescriptor):
+    #  Type slot descriptor for a user-definable method.
+    #
+    #  signature    Signature
+    #  method_name  string           The __xxx__ name of the method
+    #  alternatives [string]         Alternative list of __xxx__ names for the method
+
+    def __init__(self, signature, slot_name, method_name, fallback=None,
+                 py3=True, py2=True, ifdef=None, inherited=True):
+        SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2,
+                                ifdef=ifdef, inherited=inherited)
+        self.signature = signature
+        self.slot_name = slot_name
+        self.method_name = method_name
+        self.alternatives = []
+        method_name_to_slot[method_name] = self
+        #
+        if fallback:
+            self.alternatives.append(fallback)
+        for alt in (self.py2, self.py3):
+            if isinstance(alt, (tuple, list)):
+                slot_name, method_name = alt
+                self.alternatives.append(method_name)
+                method_name_to_slot[method_name] = self
+
+    def slot_code(self, scope):
+        entry = scope.lookup_here(self.method_name)
         if entry and entry.is_special and entry.func_cname:
-            return entry.func_cname 
-        for method_name in self.alternatives: 
-            entry = scope.lookup_here(method_name) 
+            return entry.func_cname
+        for method_name in self.alternatives:
+            entry = scope.lookup_here(method_name)
             if entry and entry.is_special and entry.func_cname:
-                return entry.func_cname 
-        return "0" 
- 
- 
-class InternalMethodSlot(SlotDescriptor): 
-    #  Type slot descriptor for a method which is always 
-    #  synthesized by Cython. 
-    # 
-    #  slot_name    string           Member name of the slot in the type object 
- 
-    def __init__(self, slot_name, **kargs): 
-        SlotDescriptor.__init__(self, slot_name, **kargs) 
- 
-    def slot_code(self, scope): 
-        return scope.mangle_internal(self.slot_name) 
- 
- 
-class GCDependentSlot(InternalMethodSlot): 
-    #  Descriptor for a slot whose value depends on whether 
-    #  the type participates in GC. 
- 
-    def __init__(self, slot_name, **kargs): 
-        InternalMethodSlot.__init__(self, slot_name, **kargs) 
- 
-    def slot_code(self, scope): 
-        if not scope.needs_gc(): 
-            return "0" 
-        if not scope.has_cyclic_pyobject_attrs: 
-            # if the type does not have GC relevant object attributes, it can 
-            # delegate GC methods to its parent - iff the parent functions 
-            # are defined in the same module 
-            parent_type_scope = scope.parent_type.base_type.scope 
-            if scope.parent_scope is parent_type_scope.parent_scope: 
-                entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name) 
-                if entry.visibility != 'extern': 
-                    return self.slot_code(parent_type_scope) 
-        return InternalMethodSlot.slot_code(self, scope) 
- 
- 
-class GCClearReferencesSlot(GCDependentSlot): 
- 
-    def slot_code(self, scope): 
-        if scope.needs_tp_clear(): 
-            return GCDependentSlot.slot_code(self, scope) 
-        return "0" 
- 
- 
-class ConstructorSlot(InternalMethodSlot): 
-    #  Descriptor for tp_new and tp_dealloc. 
- 
-    def __init__(self, slot_name, method, **kargs): 
-        InternalMethodSlot.__init__(self, slot_name, **kargs) 
-        self.method = method 
- 
-    def slot_code(self, scope): 
+                return entry.func_cname
+        return "0"
+
+
+class InternalMethodSlot(SlotDescriptor):
+    #  Type slot descriptor for a method which is always
+    #  synthesized by Cython.
+    #
+    #  slot_name    string           Member name of the slot in the type object
+
+    def __init__(self, slot_name, **kargs):
+        SlotDescriptor.__init__(self, slot_name, **kargs)
+
+    def slot_code(self, scope):
+        return scope.mangle_internal(self.slot_name)
+
+
+class GCDependentSlot(InternalMethodSlot):
+    #  Descriptor for a slot whose value depends on whether
+    #  the type participates in GC.
+
+    def __init__(self, slot_name, **kargs):
+        InternalMethodSlot.__init__(self, slot_name, **kargs)
+
+    def slot_code(self, scope):
+        if not scope.needs_gc():
+            return "0"
+        if not scope.has_cyclic_pyobject_attrs:
+            # if the type does not have GC relevant object attributes, it can
+            # delegate GC methods to its parent - iff the parent functions
+            # are defined in the same module
+            parent_type_scope = scope.parent_type.base_type.scope
+            if scope.parent_scope is parent_type_scope.parent_scope:
+                entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+                if entry.visibility != 'extern':
+                    return self.slot_code(parent_type_scope)
+        return InternalMethodSlot.slot_code(self, scope)
+
+
+class GCClearReferencesSlot(GCDependentSlot):
+
+    def slot_code(self, scope):
+        if scope.needs_tp_clear():
+            return GCDependentSlot.slot_code(self, scope)
+        return "0"
+
+
+class ConstructorSlot(InternalMethodSlot):
+    #  Descriptor for tp_new and tp_dealloc.
+
+    def __init__(self, slot_name, method, **kargs):
+        InternalMethodSlot.__init__(self, slot_name, **kargs)
+        self.method = method
+
+    def slot_code(self, scope):
         entry = scope.lookup_here(self.method)
-        if (self.slot_name != 'tp_new' 
-                and scope.parent_type.base_type 
-                and not scope.has_pyobject_attrs 
-                and not scope.has_memoryview_attrs 
+        if (self.slot_name != 'tp_new'
+                and scope.parent_type.base_type
+                and not scope.has_pyobject_attrs
+                and not scope.has_memoryview_attrs
                 and not scope.has_cpp_class_attrs
                 and not (entry and entry.is_special)):
-            # if the type does not have object attributes, it can 
-            # delegate GC methods to its parent - iff the parent 
-            # functions are defined in the same module 
-            parent_type_scope = scope.parent_type.base_type.scope 
-            if scope.parent_scope is parent_type_scope.parent_scope: 
-                entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name) 
-                if entry.visibility != 'extern': 
-                    return self.slot_code(parent_type_scope) 
+            # if the type does not have object attributes, it can
+            # delegate GC methods to its parent - iff the parent
+            # functions are defined in the same module
+            parent_type_scope = scope.parent_type.base_type.scope
+            if scope.parent_scope is parent_type_scope.parent_scope:
+                entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+                if entry.visibility != 'extern':
+                    return self.slot_code(parent_type_scope)
         if entry and not entry.is_special:
             return "0"
-        return InternalMethodSlot.slot_code(self, scope) 
- 
- 
-class SyntheticSlot(InternalMethodSlot): 
-    #  Type slot descriptor for a synthesized method which 
-    #  dispatches to one or more user-defined methods depending 
-    #  on its arguments. If none of the relevant methods are 
-    #  defined, the method will not be synthesized and an 
-    #  alternative default value will be placed in the type 
-    #  slot. 
- 
-    def __init__(self, slot_name, user_methods, default_value, **kargs): 
-        InternalMethodSlot.__init__(self, slot_name, **kargs) 
-        self.user_methods = user_methods 
-        self.default_value = default_value 
- 
-    def slot_code(self, scope): 
+        return InternalMethodSlot.slot_code(self, scope)
+
+
+class SyntheticSlot(InternalMethodSlot):
+    #  Type slot descriptor for a synthesized method which
+    #  dispatches to one or more user-defined methods depending
+    #  on its arguments. If none of the relevant methods are
+    #  defined, the method will not be synthesized and an
+    #  alternative default value will be placed in the type
+    #  slot.
+
+    def __init__(self, slot_name, user_methods, default_value, **kargs):
+        InternalMethodSlot.__init__(self, slot_name, **kargs)
+        self.user_methods = user_methods
+        self.default_value = default_value
+
+    def slot_code(self, scope):
         if scope.defines_any_special(self.user_methods):
-            return InternalMethodSlot.slot_code(self, scope) 
-        else: 
-            return self.default_value 
- 
- 
+            return InternalMethodSlot.slot_code(self, scope)
+        else:
+            return self.default_value
+
+
 class RichcmpSlot(MethodSlot):
     def slot_code(self, scope):
         entry = scope.lookup_here(self.method_name)
@@ -416,121 +416,121 @@ class RichcmpSlot(MethodSlot):
             return "0"
 
 
-class TypeFlagsSlot(SlotDescriptor): 
-    #  Descriptor for the type flags slot. 
- 
-    def slot_code(self, scope): 
-        value = "Py_TPFLAGS_DEFAULT" 
-        if scope.directives['type_version_tag']: 
-            # it's not in 'Py_TPFLAGS_DEFAULT' in Py2 
-            value += "|Py_TPFLAGS_HAVE_VERSION_TAG" 
-        else: 
-            # it's enabled in 'Py_TPFLAGS_DEFAULT' in Py3 
-            value = "(%s&~Py_TPFLAGS_HAVE_VERSION_TAG)" % value 
-        value += "|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER" 
-        if not scope.parent_type.is_final_type: 
-            value += "|Py_TPFLAGS_BASETYPE" 
-        if scope.needs_gc(): 
-            value += "|Py_TPFLAGS_HAVE_GC" 
-        return value 
- 
- 
-class DocStringSlot(SlotDescriptor): 
-    #  Descriptor for the docstring slot. 
- 
-    def slot_code(self, scope): 
+class TypeFlagsSlot(SlotDescriptor):
+    #  Descriptor for the type flags slot.
+
+    def slot_code(self, scope):
+        value = "Py_TPFLAGS_DEFAULT"
+        if scope.directives['type_version_tag']:
+            # it's not in 'Py_TPFLAGS_DEFAULT' in Py2
+            value += "|Py_TPFLAGS_HAVE_VERSION_TAG"
+        else:
+            # it's enabled in 'Py_TPFLAGS_DEFAULT' in Py3
+            value = "(%s&~Py_TPFLAGS_HAVE_VERSION_TAG)" % value
+        value += "|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER"
+        if not scope.parent_type.is_final_type:
+            value += "|Py_TPFLAGS_BASETYPE"
+        if scope.needs_gc():
+            value += "|Py_TPFLAGS_HAVE_GC"
+        return value
+
+
+class DocStringSlot(SlotDescriptor):
+    #  Descriptor for the docstring slot.
+
+    def slot_code(self, scope):
         doc = scope.doc
         if doc is None:
-            return "0" 
+            return "0"
         if doc.is_unicode:
             doc = doc.as_utf8_string()
         return doc.as_c_string_literal()
- 
- 
-class SuiteSlot(SlotDescriptor): 
-    #  Descriptor for a substructure of the type object. 
-    # 
-    #  sub_slots   [SlotDescriptor] 
- 
+
+
+class SuiteSlot(SlotDescriptor):
+    #  Descriptor for a substructure of the type object.
+    #
+    #  sub_slots   [SlotDescriptor]
+
     def __init__(self, sub_slots, slot_type, slot_name, ifdef=None):
         SlotDescriptor.__init__(self, slot_name, ifdef=ifdef)
-        self.sub_slots = sub_slots 
-        self.slot_type = slot_type 
-        substructures.append(self) 
- 
-    def is_empty(self, scope): 
-        for slot in self.sub_slots: 
-            if slot.slot_code(scope) != "0": 
-                return False 
-        return True 
- 
-    def substructure_cname(self, scope): 
-        return "%s%s_%s" % (Naming.pyrex_prefix, self.slot_name, scope.class_name) 
- 
-    def slot_code(self, scope): 
-        if not self.is_empty(scope): 
-            return "&%s" % self.substructure_cname(scope) 
-        return "0" 
- 
-    def generate_substructure(self, scope, code): 
-        if not self.is_empty(scope): 
-            code.putln("") 
+        self.sub_slots = sub_slots
+        self.slot_type = slot_type
+        substructures.append(self)
+
+    def is_empty(self, scope):
+        for slot in self.sub_slots:
+            if slot.slot_code(scope) != "0":
+                return False
+        return True
+
+    def substructure_cname(self, scope):
+        return "%s%s_%s" % (Naming.pyrex_prefix, self.slot_name, scope.class_name)
+
+    def slot_code(self, scope):
+        if not self.is_empty(scope):
+            return "&%s" % self.substructure_cname(scope)
+        return "0"
+
+    def generate_substructure(self, scope, code):
+        if not self.is_empty(scope):
+            code.putln("")
             if self.ifdef:
                 code.putln("#if %s" % self.ifdef)
-            code.putln( 
-                "static %s %s = {" % ( 
-                    self.slot_type, 
-                    self.substructure_cname(scope))) 
-            for slot in self.sub_slots: 
-                slot.generate(scope, code) 
-            code.putln("};") 
+            code.putln(
+                "static %s %s = {" % (
+                    self.slot_type,
+                    self.substructure_cname(scope)))
+            for slot in self.sub_slots:
+                slot.generate(scope, code)
+            code.putln("};")
             if self.ifdef:
                 code.putln("#endif")
- 
-substructures = []   # List of all SuiteSlot instances 
- 
-class MethodTableSlot(SlotDescriptor): 
-    #  Slot descriptor for the method table. 
- 
-    def slot_code(self, scope): 
-        if scope.pyfunc_entries: 
-            return scope.method_table_cname 
-        else: 
-            return "0" 
- 
- 
-class MemberTableSlot(SlotDescriptor): 
-    #  Slot descriptor for the table of Python-accessible attributes. 
- 
-    def slot_code(self, scope): 
-        return "0" 
- 
- 
-class GetSetSlot(SlotDescriptor): 
-    #  Slot descriptor for the table of attribute get & set methods. 
- 
-    def slot_code(self, scope): 
-        if scope.property_entries: 
-            return scope.getset_table_cname 
-        else: 
-            return "0" 
- 
- 
-class BaseClassSlot(SlotDescriptor): 
-    #  Slot descriptor for the base class slot. 
- 
-    def __init__(self, name): 
-        SlotDescriptor.__init__(self, name, dynamic = 1) 
- 
-    def generate_dynamic_init_code(self, scope, code): 
-        base_type = scope.parent_type.base_type 
-        if base_type: 
-            code.putln("%s.%s = %s;" % ( 
-                scope.parent_type.typeobj_cname, 
-                self.slot_name, 
-                base_type.typeptr_cname)) 
- 
- 
+
+substructures = []   # List of all SuiteSlot instances
+
+class MethodTableSlot(SlotDescriptor):
+    #  Slot descriptor for the method table.
+
+    def slot_code(self, scope):
+        if scope.pyfunc_entries:
+            return scope.method_table_cname
+        else:
+            return "0"
+
+
+class MemberTableSlot(SlotDescriptor):
+    #  Slot descriptor for the table of Python-accessible attributes.
+
+    def slot_code(self, scope):
+        return "0"
+
+
+class GetSetSlot(SlotDescriptor):
+    #  Slot descriptor for the table of attribute get & set methods.
+
+    def slot_code(self, scope):
+        if scope.property_entries:
+            return scope.getset_table_cname
+        else:
+            return "0"
+
+
+class BaseClassSlot(SlotDescriptor):
+    #  Slot descriptor for the base class slot.
+
+    def __init__(self, name):
+        SlotDescriptor.__init__(self, name, dynamic = 1)
+
+    def generate_dynamic_init_code(self, scope, code):
+        base_type = scope.parent_type.base_type
+        if base_type:
+            code.putln("%s.%s = %s;" % (
+                scope.parent_type.typeobj_cname,
+                self.slot_name,
+                base_type.typeptr_cname))
+
+
 class DictOffsetSlot(SlotDescriptor):
     #  Slot descriptor for a class' dict offset, for dynamic attributes.
 
@@ -552,66 +552,66 @@ class DictOffsetSlot(SlotDescriptor):
             return "0"
 
 
-# The following dictionary maps __xxx__ method names to slot descriptors. 
- 
-method_name_to_slot = {} 
- 
-## The following slots are (or could be) initialised with an 
-## extern function pointer. 
-# 
-#slots_initialised_from_extern = ( 
-#    "tp_free", 
-#) 
- 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Utility functions for accessing slot table data structures 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-def get_special_method_signature(name): 
-    #  Given a method name, if it is a special method, 
-    #  return its signature, else return None. 
-    slot = method_name_to_slot.get(name) 
-    if slot: 
-        return slot.signature 
+# The following dictionary maps __xxx__ method names to slot descriptors.
+
+method_name_to_slot = {}
+
+## The following slots are (or could be) initialised with an
+## extern function pointer.
+#
+#slots_initialised_from_extern = (
+#    "tp_free",
+#)
+
+#------------------------------------------------------------------------------------------
+#
+#  Utility functions for accessing slot table data structures
+#
+#------------------------------------------------------------------------------------------
+
+def get_special_method_signature(name):
+    #  Given a method name, if it is a special method,
+    #  return its signature, else return None.
+    slot = method_name_to_slot.get(name)
+    if slot:
+        return slot.signature
     elif name in richcmp_special_methods:
         return ibinaryfunc
-    else: 
-        return None 
- 
- 
-def get_property_accessor_signature(name): 
-    #  Return signature of accessor for an extension type 
-    #  property, else None. 
-    return property_accessor_signatures.get(name) 
- 
- 
-def get_base_slot_function(scope, slot): 
-    #  Returns the function implementing this slot in the baseclass. 
-    #  This is useful for enabling the compiler to optimize calls 
-    #  that recursively climb the class hierarchy. 
-    base_type = scope.parent_type.base_type 
-    if scope.parent_scope is base_type.scope.parent_scope: 
-        parent_slot = slot.slot_code(base_type.scope) 
-        if parent_slot != '0': 
-            entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name) 
-            if entry.visibility != 'extern': 
-                return parent_slot 
-    return None 
- 
- 
-def get_slot_function(scope, slot): 
-    #  Returns the function implementing this slot in the baseclass. 
-    #  This is useful for enabling the compiler to optimize calls 
-    #  that recursively climb the class hierarchy. 
-    slot_code = slot.slot_code(scope) 
-    if slot_code != '0': 
-        entry = scope.parent_scope.lookup_here(scope.parent_type.name) 
-        if entry.visibility != 'extern': 
-            return slot_code 
-    return None 
- 
+    else:
+        return None
+
+
+def get_property_accessor_signature(name):
+    #  Return signature of accessor for an extension type
+    #  property, else None.
+    return property_accessor_signatures.get(name)
+
+
+def get_base_slot_function(scope, slot):
+    #  Returns the function implementing this slot in the baseclass.
+    #  This is useful for enabling the compiler to optimize calls
+    #  that recursively climb the class hierarchy.
+    base_type = scope.parent_type.base_type
+    if scope.parent_scope is base_type.scope.parent_scope:
+        parent_slot = slot.slot_code(base_type.scope)
+        if parent_slot != '0':
+            entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+            if entry.visibility != 'extern':
+                return parent_slot
+    return None
+
+
+def get_slot_function(scope, slot):
+    #  Returns the function implementing this slot in the baseclass.
+    #  This is useful for enabling the compiler to optimize calls
+    #  that recursively climb the class hierarchy.
+    slot_code = slot.slot_code(scope)
+    if slot_code != '0':
+        entry = scope.parent_scope.lookup_here(scope.parent_type.name)
+        if entry.visibility != 'extern':
+            return slot_code
+    return None
+
 
 def get_slot_by_name(slot_name):
     # For now, only search the type struct, no referenced sub-structs.
@@ -626,189 +626,189 @@ def get_slot_code_by_name(scope, slot_name):
     return slot.slot_code(scope)
 
 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Signatures for generic Python functions and methods. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-pyfunction_signature = Signature("-*", "O") 
-pymethod_signature = Signature("T*", "O") 
- 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Signatures for simple Python functions. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-pyfunction_noargs = Signature("-", "O") 
-pyfunction_onearg = Signature("-O", "O") 
- 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Signatures for the various kinds of function that 
-#  can appear in the type object and its substructures. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-unaryfunc = Signature("T", "O")            # typedef PyObject * (*unaryfunc)(PyObject *); 
-binaryfunc = Signature("OO", "O")          # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); 
-ibinaryfunc = Signature("TO", "O")         # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); 
-ternaryfunc = Signature("OOO", "O")        # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); 
-iternaryfunc = Signature("TOO", "O")       # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); 
-callfunc = Signature("T*", "O")            # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); 
-inquiry = Signature("T", "i")              # typedef int (*inquiry)(PyObject *); 
-lenfunc = Signature("T", "z")              # typedef Py_ssize_t (*lenfunc)(PyObject *); 
- 
-                                           # typedef int (*coercion)(PyObject **, PyObject **); 
-intargfunc = Signature("Ti", "O")          # typedef PyObject *(*intargfunc)(PyObject *, int); 
-ssizeargfunc = Signature("Tz", "O")        # typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t); 
-intintargfunc = Signature("Tii", "O")      # typedef PyObject *(*intintargfunc)(PyObject *, int, int); 
-ssizessizeargfunc = Signature("Tzz", "O")  # typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t); 
-intobjargproc = Signature("TiO", 'r')      # typedef int(*intobjargproc)(PyObject *, int, PyObject *); 
-ssizeobjargproc = Signature("TzO", 'r')    # typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *); 
-intintobjargproc = Signature("TiiO", 'r')  # typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *); 
-ssizessizeobjargproc = Signature("TzzO", 'r') # typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); 
- 
-intintargproc = Signature("Tii", 'r') 
-ssizessizeargproc = Signature("Tzz", 'r') 
-objargfunc = Signature("TO", "O") 
-objobjargproc = Signature("TOO", 'r')      # typedef int (*objobjargproc)(PyObject *, PyObject *, PyObject *); 
-readbufferproc = Signature("TzP", "z")     # typedef Py_ssize_t (*readbufferproc)(PyObject *, Py_ssize_t, void **); 
-writebufferproc = Signature("TzP", "z")    # typedef Py_ssize_t (*writebufferproc)(PyObject *, Py_ssize_t, void **); 
-segcountproc = Signature("TZ", "z")        # typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *); 
-charbufferproc = Signature("TzS", "z")     # typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **); 
-objargproc = Signature("TO", 'r')          # typedef int (*objobjproc)(PyObject *, PyObject *); 
-                                           # typedef int (*visitproc)(PyObject *, void *); 
-                                           # typedef int (*traverseproc)(PyObject *, visitproc, void *); 
- 
-destructor = Signature("T", "v")           # typedef void (*destructor)(PyObject *); 
-# printfunc = Signature("TFi", 'r')        # typedef int (*printfunc)(PyObject *, FILE *, int); 
-                                           # typedef PyObject *(*getattrfunc)(PyObject *, char *); 
-getattrofunc = Signature("TO", "O")        # typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); 
-                                           # typedef int (*setattrfunc)(PyObject *, char *, PyObject *); 
-setattrofunc = Signature("TOO", 'r')       # typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); 
-delattrofunc = Signature("TO", 'r') 
-cmpfunc = Signature("TO", "i")             # typedef int (*cmpfunc)(PyObject *, PyObject *); 
-reprfunc = Signature("T", "O")             # typedef PyObject *(*reprfunc)(PyObject *); 
-hashfunc = Signature("T", "h")             # typedef Py_hash_t (*hashfunc)(PyObject *); 
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for generic Python functions and methods.
+#
+#------------------------------------------------------------------------------------------
+
+pyfunction_signature = Signature("-*", "O")
+pymethod_signature = Signature("T*", "O")
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for simple Python functions.
+#
+#------------------------------------------------------------------------------------------
+
+pyfunction_noargs = Signature("-", "O")
+pyfunction_onearg = Signature("-O", "O")
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for the various kinds of function that
+#  can appear in the type object and its substructures.
+#
+#------------------------------------------------------------------------------------------
+
+unaryfunc = Signature("T", "O")            # typedef PyObject * (*unaryfunc)(PyObject *);
+binaryfunc = Signature("OO", "O")          # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
+ibinaryfunc = Signature("TO", "O")         # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
+ternaryfunc = Signature("OOO", "O")        # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+iternaryfunc = Signature("TOO", "O")       # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+callfunc = Signature("T*", "O")            # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+inquiry = Signature("T", "i")              # typedef int (*inquiry)(PyObject *);
+lenfunc = Signature("T", "z")              # typedef Py_ssize_t (*lenfunc)(PyObject *);
+
+                                           # typedef int (*coercion)(PyObject **, PyObject **);
+intargfunc = Signature("Ti", "O")          # typedef PyObject *(*intargfunc)(PyObject *, int);
+ssizeargfunc = Signature("Tz", "O")        # typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
+intintargfunc = Signature("Tii", "O")      # typedef PyObject *(*intintargfunc)(PyObject *, int, int);
+ssizessizeargfunc = Signature("Tzz", "O")  # typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t);
+intobjargproc = Signature("TiO", 'r')      # typedef int(*intobjargproc)(PyObject *, int, PyObject *);
+ssizeobjargproc = Signature("TzO", 'r')    # typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *);
+intintobjargproc = Signature("TiiO", 'r')  # typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *);
+ssizessizeobjargproc = Signature("TzzO", 'r') # typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *);
+
+intintargproc = Signature("Tii", 'r')
+ssizessizeargproc = Signature("Tzz", 'r')
+objargfunc = Signature("TO", "O")
+objobjargproc = Signature("TOO", 'r')      # typedef int (*objobjargproc)(PyObject *, PyObject *, PyObject *);
+readbufferproc = Signature("TzP", "z")     # typedef Py_ssize_t (*readbufferproc)(PyObject *, Py_ssize_t, void **);
+writebufferproc = Signature("TzP", "z")    # typedef Py_ssize_t (*writebufferproc)(PyObject *, Py_ssize_t, void **);
+segcountproc = Signature("TZ", "z")        # typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *);
+charbufferproc = Signature("TzS", "z")     # typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **);
+objargproc = Signature("TO", 'r')          # typedef int (*objobjproc)(PyObject *, PyObject *);
+                                           # typedef int (*visitproc)(PyObject *, void *);
+                                           # typedef int (*traverseproc)(PyObject *, visitproc, void *);
+
+destructor = Signature("T", "v")           # typedef void (*destructor)(PyObject *);
+# printfunc = Signature("TFi", 'r')        # typedef int (*printfunc)(PyObject *, FILE *, int);
+                                           # typedef PyObject *(*getattrfunc)(PyObject *, char *);
+getattrofunc = Signature("TO", "O")        # typedef PyObject *(*getattrofunc)(PyObject *, PyObject *);
+                                           # typedef int (*setattrfunc)(PyObject *, char *, PyObject *);
+setattrofunc = Signature("TOO", 'r')       # typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *);
+delattrofunc = Signature("TO", 'r')
+cmpfunc = Signature("TO", "i")             # typedef int (*cmpfunc)(PyObject *, PyObject *);
+reprfunc = Signature("T", "O")             # typedef PyObject *(*reprfunc)(PyObject *);
+hashfunc = Signature("T", "h")             # typedef Py_hash_t (*hashfunc)(PyObject *);
 richcmpfunc = Signature("TOi", "O")        # typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int);
-getiterfunc = Signature("T", "O")          # typedef PyObject *(*getiterfunc) (PyObject *); 
-iternextfunc = Signature("T", "O")         # typedef PyObject *(*iternextfunc) (PyObject *); 
-descrgetfunc = Signature("TOO", "O")       # typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); 
-descrsetfunc = Signature("TOO", 'r')       # typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); 
-descrdelfunc = Signature("TO", 'r') 
-initproc = Signature("T*", 'r')            # typedef int (*initproc)(PyObject *, PyObject *, PyObject *); 
-                                           # typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); 
-                                           # typedef PyObject *(*allocfunc)(struct _typeobject *, int); 
- 
-getbufferproc = Signature("TBi", "r")      # typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); 
-releasebufferproc = Signature("TB", "v")   # typedef void (*releasebufferproc)(PyObject *, Py_buffer *); 
- 
- 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Signatures for accessor methods of properties. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-property_accessor_signatures = { 
-    '__get__': Signature("T", "O"), 
-    '__set__': Signature("TO", 'r'), 
-    '__del__': Signature("T", 'r') 
-} 
- 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Descriptor tables for the slots of the various type object 
-#  substructures, in the order they appear in the structure. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
+getiterfunc = Signature("T", "O")          # typedef PyObject *(*getiterfunc) (PyObject *);
+iternextfunc = Signature("T", "O")         # typedef PyObject *(*iternextfunc) (PyObject *);
+descrgetfunc = Signature("TOO", "O")       # typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *);
+descrsetfunc = Signature("TOO", 'r')       # typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *);
+descrdelfunc = Signature("TO", 'r')
+initproc = Signature("T*", 'r')            # typedef int (*initproc)(PyObject *, PyObject *, PyObject *);
+                                           # typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *);
+                                           # typedef PyObject *(*allocfunc)(struct _typeobject *, int);
+
+getbufferproc = Signature("TBi", "r")      # typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
+releasebufferproc = Signature("TB", "v")   # typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
+
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for accessor methods of properties.
+#
+#------------------------------------------------------------------------------------------
+
+property_accessor_signatures = {
+    '__get__': Signature("T", "O"),
+    '__set__': Signature("TO", 'r'),
+    '__del__': Signature("T", 'r')
+}
+
+#------------------------------------------------------------------------------------------
+#
+#  Descriptor tables for the slots of the various type object
+#  substructures, in the order they appear in the structure.
+#
+#------------------------------------------------------------------------------------------
+
 PyNumberMethods_Py3_GUARD = "PY_MAJOR_VERSION < 3 || (CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x03050000)"
 
-PyNumberMethods = ( 
-    MethodSlot(binaryfunc, "nb_add", "__add__"), 
-    MethodSlot(binaryfunc, "nb_subtract", "__sub__"), 
-    MethodSlot(binaryfunc, "nb_multiply", "__mul__"), 
+PyNumberMethods = (
+    MethodSlot(binaryfunc, "nb_add", "__add__"),
+    MethodSlot(binaryfunc, "nb_subtract", "__sub__"),
+    MethodSlot(binaryfunc, "nb_multiply", "__mul__"),
     MethodSlot(binaryfunc, "nb_divide", "__div__", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(binaryfunc, "nb_remainder", "__mod__"), 
-    MethodSlot(binaryfunc, "nb_divmod", "__divmod__"), 
-    MethodSlot(ternaryfunc, "nb_power", "__pow__"), 
-    MethodSlot(unaryfunc, "nb_negative", "__neg__"), 
-    MethodSlot(unaryfunc, "nb_positive", "__pos__"), 
-    MethodSlot(unaryfunc, "nb_absolute", "__abs__"), 
-    MethodSlot(inquiry, "nb_nonzero", "__nonzero__", py3 = ("nb_bool", "__bool__")), 
-    MethodSlot(unaryfunc, "nb_invert", "__invert__"), 
-    MethodSlot(binaryfunc, "nb_lshift", "__lshift__"), 
-    MethodSlot(binaryfunc, "nb_rshift", "__rshift__"), 
-    MethodSlot(binaryfunc, "nb_and", "__and__"), 
-    MethodSlot(binaryfunc, "nb_xor", "__xor__"), 
-    MethodSlot(binaryfunc, "nb_or", "__or__"), 
+    MethodSlot(binaryfunc, "nb_remainder", "__mod__"),
+    MethodSlot(binaryfunc, "nb_divmod", "__divmod__"),
+    MethodSlot(ternaryfunc, "nb_power", "__pow__"),
+    MethodSlot(unaryfunc, "nb_negative", "__neg__"),
+    MethodSlot(unaryfunc, "nb_positive", "__pos__"),
+    MethodSlot(unaryfunc, "nb_absolute", "__abs__"),
+    MethodSlot(inquiry, "nb_nonzero", "__nonzero__", py3 = ("nb_bool", "__bool__")),
+    MethodSlot(unaryfunc, "nb_invert", "__invert__"),
+    MethodSlot(binaryfunc, "nb_lshift", "__lshift__"),
+    MethodSlot(binaryfunc, "nb_rshift", "__rshift__"),
+    MethodSlot(binaryfunc, "nb_and", "__and__"),
+    MethodSlot(binaryfunc, "nb_xor", "__xor__"),
+    MethodSlot(binaryfunc, "nb_or", "__or__"),
     EmptySlot("nb_coerce", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(unaryfunc, "nb_int", "__int__", fallback="__long__"), 
-    MethodSlot(unaryfunc, "nb_long", "__long__", fallback="__int__", py3 = "<RESERVED>"), 
-    MethodSlot(unaryfunc, "nb_float", "__float__"), 
+    MethodSlot(unaryfunc, "nb_int", "__int__", fallback="__long__"),
+    MethodSlot(unaryfunc, "nb_long", "__long__", fallback="__int__", py3 = "<RESERVED>"),
+    MethodSlot(unaryfunc, "nb_float", "__float__"),
     MethodSlot(unaryfunc, "nb_oct", "__oct__", ifdef = PyNumberMethods_Py3_GUARD),
     MethodSlot(unaryfunc, "nb_hex", "__hex__", ifdef = PyNumberMethods_Py3_GUARD),
- 
-    # Added in release 2.0 
-    MethodSlot(ibinaryfunc, "nb_inplace_add", "__iadd__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_subtract", "__isub__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_multiply", "__imul__"), 
+
+    # Added in release 2.0
+    MethodSlot(ibinaryfunc, "nb_inplace_add", "__iadd__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_subtract", "__isub__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_multiply", "__imul__"),
     MethodSlot(ibinaryfunc, "nb_inplace_divide", "__idiv__", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(ibinaryfunc, "nb_inplace_remainder", "__imod__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_power", "__ipow__"), # actually ternaryfunc!!! 
-    MethodSlot(ibinaryfunc, "nb_inplace_lshift", "__ilshift__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_rshift", "__irshift__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_and", "__iand__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_xor", "__ixor__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_or", "__ior__"), 
- 
-    # Added in release 2.2 
-    # The following require the Py_TPFLAGS_HAVE_CLASS flag 
-    MethodSlot(binaryfunc, "nb_floor_divide", "__floordiv__"), 
-    MethodSlot(binaryfunc, "nb_true_divide", "__truediv__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__"), 
- 
-    # Added in release 2.5 
-    MethodSlot(unaryfunc, "nb_index", "__index__"), 
- 
-    # Added in release 3.5 
-    MethodSlot(binaryfunc, "nb_matrix_multiply", "__matmul__", ifdef="PY_VERSION_HEX >= 0x03050000"), 
-    MethodSlot(ibinaryfunc, "nb_inplace_matrix_multiply", "__imatmul__", ifdef="PY_VERSION_HEX >= 0x03050000"), 
-) 
- 
-PySequenceMethods = ( 
-    MethodSlot(lenfunc, "sq_length", "__len__"), 
-    EmptySlot("sq_concat"), # nb_add used instead 
-    EmptySlot("sq_repeat"), # nb_multiply used instead 
-    SyntheticSlot("sq_item", ["__getitem__"], "0"),    #EmptySlot("sq_item"),   # mp_subscript used instead 
-    MethodSlot(ssizessizeargfunc, "sq_slice", "__getslice__"), 
-    EmptySlot("sq_ass_item"), # mp_ass_subscript used instead 
-    SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"), 
-    MethodSlot(cmpfunc, "sq_contains", "__contains__"), 
-    EmptySlot("sq_inplace_concat"), # nb_inplace_add used instead 
-    EmptySlot("sq_inplace_repeat"), # nb_inplace_multiply used instead 
-) 
- 
-PyMappingMethods = ( 
-    MethodSlot(lenfunc, "mp_length", "__len__"), 
-    MethodSlot(objargfunc, "mp_subscript", "__getitem__"), 
-    SyntheticSlot("mp_ass_subscript", ["__setitem__", "__delitem__"], "0"), 
-) 
- 
-PyBufferProcs = ( 
-    MethodSlot(readbufferproc, "bf_getreadbuffer", "__getreadbuffer__", py3 = False), 
-    MethodSlot(writebufferproc, "bf_getwritebuffer", "__getwritebuffer__", py3 = False), 
-    MethodSlot(segcountproc, "bf_getsegcount", "__getsegcount__", py3 = False), 
-    MethodSlot(charbufferproc, "bf_getcharbuffer", "__getcharbuffer__", py3 = False), 
- 
-    MethodSlot(getbufferproc, "bf_getbuffer", "__getbuffer__"), 
-    MethodSlot(releasebufferproc, "bf_releasebuffer", "__releasebuffer__") 
-) 
- 
+    MethodSlot(ibinaryfunc, "nb_inplace_remainder", "__imod__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_power", "__ipow__"), # actually ternaryfunc!!!
+    MethodSlot(ibinaryfunc, "nb_inplace_lshift", "__ilshift__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_rshift", "__irshift__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_and", "__iand__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_xor", "__ixor__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_or", "__ior__"),
+
+    # Added in release 2.2
+    # The following require the Py_TPFLAGS_HAVE_CLASS flag
+    MethodSlot(binaryfunc, "nb_floor_divide", "__floordiv__"),
+    MethodSlot(binaryfunc, "nb_true_divide", "__truediv__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__"),
+    MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__"),
+
+    # Added in release 2.5
+    MethodSlot(unaryfunc, "nb_index", "__index__"),
+
+    # Added in release 3.5
+    MethodSlot(binaryfunc, "nb_matrix_multiply", "__matmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
+    MethodSlot(ibinaryfunc, "nb_inplace_matrix_multiply", "__imatmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
+)
+
+PySequenceMethods = (
+    MethodSlot(lenfunc, "sq_length", "__len__"),
+    EmptySlot("sq_concat"), # nb_add used instead
+    EmptySlot("sq_repeat"), # nb_multiply used instead
+    SyntheticSlot("sq_item", ["__getitem__"], "0"),    #EmptySlot("sq_item"),   # mp_subscript used instead
+    MethodSlot(ssizessizeargfunc, "sq_slice", "__getslice__"),
+    EmptySlot("sq_ass_item"), # mp_ass_subscript used instead
+    SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"),
+    MethodSlot(cmpfunc, "sq_contains", "__contains__"),
+    EmptySlot("sq_inplace_concat"), # nb_inplace_add used instead
+    EmptySlot("sq_inplace_repeat"), # nb_inplace_multiply used instead
+)
+
+PyMappingMethods = (
+    MethodSlot(lenfunc, "mp_length", "__len__"),
+    MethodSlot(objargfunc, "mp_subscript", "__getitem__"),
+    SyntheticSlot("mp_ass_subscript", ["__setitem__", "__delitem__"], "0"),
+)
+
+PyBufferProcs = (
+    MethodSlot(readbufferproc, "bf_getreadbuffer", "__getreadbuffer__", py3 = False),
+    MethodSlot(writebufferproc, "bf_getwritebuffer", "__getwritebuffer__", py3 = False),
+    MethodSlot(segcountproc, "bf_getsegcount", "__getsegcount__", py3 = False),
+    MethodSlot(charbufferproc, "bf_getcharbuffer", "__getcharbuffer__", py3 = False),
+
+    MethodSlot(getbufferproc, "bf_getbuffer", "__getbuffer__"),
+    MethodSlot(releasebufferproc, "bf_releasebuffer", "__releasebuffer__")
+)
+
 PyAsyncMethods = (
     MethodSlot(unaryfunc, "am_await", "__await__"),
     MethodSlot(unaryfunc, "am_aiter", "__aiter__"),
@@ -816,112 +816,112 @@ PyAsyncMethods = (
     EmptySlot("am_send", ifdef="PY_VERSION_HEX >= 0x030A00A3"),
 )
 
-#------------------------------------------------------------------------------------------ 
-# 
-#  The main slot table. This table contains descriptors for all the 
-#  top-level type slots, beginning with tp_dealloc, in the order they 
-#  appear in the type object. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-slot_table = ( 
-    ConstructorSlot("tp_dealloc", '__dealloc__'), 
+#------------------------------------------------------------------------------------------
+#
+#  The main slot table. This table contains descriptors for all the
+#  top-level type slots, beginning with tp_dealloc, in the order they
+#  appear in the type object.
+#
+#------------------------------------------------------------------------------------------
+
+slot_table = (
+    ConstructorSlot("tp_dealloc", '__dealloc__'),
     EmptySlot("tp_print", ifdef="PY_VERSION_HEX < 0x030800b4"),
     EmptySlot("tp_vectorcall_offset", ifdef="PY_VERSION_HEX >= 0x030800b4"),
-    EmptySlot("tp_getattr"), 
-    EmptySlot("tp_setattr"), 
+    EmptySlot("tp_getattr"),
+    EmptySlot("tp_setattr"),
 
     # tp_compare (Py2) / tp_reserved (Py3<3.5) / tp_as_async (Py3.5+) is always used as tp_as_async in Py3
     MethodSlot(cmpfunc, "tp_compare", "__cmp__", ifdef="PY_MAJOR_VERSION < 3"),
     SuiteSlot(PyAsyncMethods, "__Pyx_PyAsyncMethodsStruct", "tp_as_async", ifdef="PY_MAJOR_VERSION >= 3"),
 
-    MethodSlot(reprfunc, "tp_repr", "__repr__"), 
- 
-    SuiteSlot(PyNumberMethods, "PyNumberMethods", "tp_as_number"), 
-    SuiteSlot(PySequenceMethods, "PySequenceMethods", "tp_as_sequence"), 
-    SuiteSlot(PyMappingMethods, "PyMappingMethods", "tp_as_mapping"), 
- 
-    MethodSlot(hashfunc, "tp_hash", "__hash__", inherited=False),    # Py3 checks for __richcmp__ 
-    MethodSlot(callfunc, "tp_call", "__call__"), 
-    MethodSlot(reprfunc, "tp_str", "__str__"), 
- 
-    SyntheticSlot("tp_getattro", ["__getattr__","__getattribute__"], "0"), #"PyObject_GenericGetAttr"), 
-    SyntheticSlot("tp_setattro", ["__setattr__", "__delattr__"], "0"), #"PyObject_GenericSetAttr"), 
- 
-    SuiteSlot(PyBufferProcs, "PyBufferProcs", "tp_as_buffer"), 
- 
-    TypeFlagsSlot("tp_flags"), 
-    DocStringSlot("tp_doc"), 
- 
-    GCDependentSlot("tp_traverse"), 
-    GCClearReferencesSlot("tp_clear"), 
- 
+    MethodSlot(reprfunc, "tp_repr", "__repr__"),
+
+    SuiteSlot(PyNumberMethods, "PyNumberMethods", "tp_as_number"),
+    SuiteSlot(PySequenceMethods, "PySequenceMethods", "tp_as_sequence"),
+    SuiteSlot(PyMappingMethods, "PyMappingMethods", "tp_as_mapping"),
+
+    MethodSlot(hashfunc, "tp_hash", "__hash__", inherited=False),    # Py3 checks for __richcmp__
+    MethodSlot(callfunc, "tp_call", "__call__"),
+    MethodSlot(reprfunc, "tp_str", "__str__"),
+
+    SyntheticSlot("tp_getattro", ["__getattr__","__getattribute__"], "0"), #"PyObject_GenericGetAttr"),
+    SyntheticSlot("tp_setattro", ["__setattr__", "__delattr__"], "0"), #"PyObject_GenericSetAttr"),
+
+    SuiteSlot(PyBufferProcs, "PyBufferProcs", "tp_as_buffer"),
+
+    TypeFlagsSlot("tp_flags"),
+    DocStringSlot("tp_doc"),
+
+    GCDependentSlot("tp_traverse"),
+    GCClearReferencesSlot("tp_clear"),
+
     RichcmpSlot(richcmpfunc, "tp_richcompare", "__richcmp__", inherited=False),  # Py3 checks for __hash__
- 
-    EmptySlot("tp_weaklistoffset"), 
- 
-    MethodSlot(getiterfunc, "tp_iter", "__iter__"), 
-    MethodSlot(iternextfunc, "tp_iternext", "__next__"), 
- 
-    MethodTableSlot("tp_methods"), 
-    MemberTableSlot("tp_members"), 
-    GetSetSlot("tp_getset"), 
- 
-    BaseClassSlot("tp_base"), #EmptySlot("tp_base"), 
-    EmptySlot("tp_dict"), 
- 
-    SyntheticSlot("tp_descr_get", ["__get__"], "0"), 
-    SyntheticSlot("tp_descr_set", ["__set__", "__delete__"], "0"), 
- 
+
+    EmptySlot("tp_weaklistoffset"),
+
+    MethodSlot(getiterfunc, "tp_iter", "__iter__"),
+    MethodSlot(iternextfunc, "tp_iternext", "__next__"),
+
+    MethodTableSlot("tp_methods"),
+    MemberTableSlot("tp_members"),
+    GetSetSlot("tp_getset"),
+
+    BaseClassSlot("tp_base"), #EmptySlot("tp_base"),
+    EmptySlot("tp_dict"),
+
+    SyntheticSlot("tp_descr_get", ["__get__"], "0"),
+    SyntheticSlot("tp_descr_set", ["__set__", "__delete__"], "0"),
+
     DictOffsetSlot("tp_dictoffset"),
- 
-    MethodSlot(initproc, "tp_init", "__init__"), 
-    EmptySlot("tp_alloc"), #FixedSlot("tp_alloc", "PyType_GenericAlloc"), 
-    InternalMethodSlot("tp_new"), 
-    EmptySlot("tp_free"), 
- 
-    EmptySlot("tp_is_gc"), 
-    EmptySlot("tp_bases"), 
-    EmptySlot("tp_mro"), 
-    EmptySlot("tp_cache"), 
-    EmptySlot("tp_subclasses"), 
-    EmptySlot("tp_weaklist"), 
-    EmptySlot("tp_del"), 
-    EmptySlot("tp_version_tag"), 
-    EmptySlot("tp_finalize", ifdef="PY_VERSION_HEX >= 0x030400a1"), 
+
+    MethodSlot(initproc, "tp_init", "__init__"),
+    EmptySlot("tp_alloc"), #FixedSlot("tp_alloc", "PyType_GenericAlloc"),
+    InternalMethodSlot("tp_new"),
+    EmptySlot("tp_free"),
+
+    EmptySlot("tp_is_gc"),
+    EmptySlot("tp_bases"),
+    EmptySlot("tp_mro"),
+    EmptySlot("tp_cache"),
+    EmptySlot("tp_subclasses"),
+    EmptySlot("tp_weaklist"),
+    EmptySlot("tp_del"),
+    EmptySlot("tp_version_tag"),
+    EmptySlot("tp_finalize", ifdef="PY_VERSION_HEX >= 0x030400a1"),
     EmptySlot("tp_vectorcall", ifdef="PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)"),
     EmptySlot("tp_print", ifdef="PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000"),
     # PyPy specific extension - only here to avoid C compiler warnings.
     EmptySlot("tp_pypy_flags", ifdef="CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000"),
-) 
- 
-#------------------------------------------------------------------------------------------ 
-# 
-#  Descriptors for special methods which don't appear directly 
-#  in the type object or its substructures. These methods are 
-#  called from slot functions synthesized by Cython. 
-# 
-#------------------------------------------------------------------------------------------ 
- 
-MethodSlot(initproc, "", "__cinit__") 
-MethodSlot(destructor, "", "__dealloc__") 
-MethodSlot(objobjargproc, "", "__setitem__") 
-MethodSlot(objargproc, "", "__delitem__") 
-MethodSlot(ssizessizeobjargproc, "", "__setslice__") 
-MethodSlot(ssizessizeargproc, "", "__delslice__") 
-MethodSlot(getattrofunc, "", "__getattr__") 
+)
+
+#------------------------------------------------------------------------------------------
+#
+#  Descriptors for special methods which don't appear directly
+#  in the type object or its substructures. These methods are
+#  called from slot functions synthesized by Cython.
+#
+#------------------------------------------------------------------------------------------
+
+MethodSlot(initproc, "", "__cinit__")
+MethodSlot(destructor, "", "__dealloc__")
+MethodSlot(objobjargproc, "", "__setitem__")
+MethodSlot(objargproc, "", "__delitem__")
+MethodSlot(ssizessizeobjargproc, "", "__setslice__")
+MethodSlot(ssizessizeargproc, "", "__delslice__")
+MethodSlot(getattrofunc, "", "__getattr__")
 MethodSlot(getattrofunc, "", "__getattribute__")
-MethodSlot(setattrofunc, "", "__setattr__") 
-MethodSlot(delattrofunc, "", "__delattr__") 
-MethodSlot(descrgetfunc, "", "__get__") 
-MethodSlot(descrsetfunc, "", "__set__") 
-MethodSlot(descrdelfunc, "", "__delete__") 
- 
- 
-# Method flags for python-exposed methods. 
- 
-method_noargs   = "METH_NOARGS" 
-method_onearg   = "METH_O" 
-method_varargs  = "METH_VARARGS" 
-method_keywords = "METH_KEYWORDS" 
-method_coexist  = "METH_COEXIST" 
+MethodSlot(setattrofunc, "", "__setattr__")
+MethodSlot(delattrofunc, "", "__delattr__")
+MethodSlot(descrgetfunc, "", "__get__")
+MethodSlot(descrsetfunc, "", "__set__")
+MethodSlot(descrdelfunc, "", "__delete__")
+
+
+# Method flags for python-exposed methods.
+
+method_noargs   = "METH_NOARGS"
+method_onearg   = "METH_O"
+method_varargs  = "METH_VARARGS"
+method_keywords = "METH_KEYWORDS"
+method_coexist  = "METH_COEXIST"
diff --git a/contrib/tools/cython/Cython/Compiler/UtilNodes.py b/contrib/tools/cython/Cython/Compiler/UtilNodes.py
index 0b8db28207..c41748ace0 100644
--- a/contrib/tools/cython/Cython/Compiler/UtilNodes.py
+++ b/contrib/tools/cython/Cython/Compiler/UtilNodes.py
@@ -1,359 +1,359 @@
-# 
-# Nodes used as utilities and support for transforms etc. 
-# These often make up sets including both Nodes and ExprNodes 
+#
+# Nodes used as utilities and support for transforms etc.
+# These often make up sets including both Nodes and ExprNodes
 # so it is convenient to have them in a separate module.
-# 
- 
-from __future__ import absolute_import 
- 
-from . import Nodes 
-from . import ExprNodes 
-from .Nodes import Node 
-from .ExprNodes import AtomicExprNode 
-from .PyrexTypes import c_ptr_type 
- 
- 
-class TempHandle(object): 
-    # THIS IS DEPRECATED, USE LetRefNode instead 
-    temp = None 
-    needs_xdecref = False 
-    def __init__(self, type, needs_cleanup=None): 
-        self.type = type 
-        if needs_cleanup is None: 
-            self.needs_cleanup = type.is_pyobject 
-        else: 
-            self.needs_cleanup = needs_cleanup 
- 
-    def ref(self, pos): 
-        return TempRefNode(pos, handle=self, type=self.type) 
- 
- 
-class TempRefNode(AtomicExprNode): 
-    # THIS IS DEPRECATED, USE LetRefNode instead 
-    # handle   TempHandle 
- 
-    def analyse_types(self, env): 
-        assert self.type == self.handle.type 
-        return self 
- 
-    def analyse_target_types(self, env): 
-        assert self.type == self.handle.type 
-        return self 
- 
-    def analyse_target_declaration(self, env): 
-        pass 
- 
-    def calculate_result_code(self): 
-        result = self.handle.temp 
-        if result is None: result = "<error>" # might be called and overwritten 
-        return result 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
+#
+
+from __future__ import absolute_import
+
+from . import Nodes
+from . import ExprNodes
+from .Nodes import Node
+from .ExprNodes import AtomicExprNode
+from .PyrexTypes import c_ptr_type
+
+
+class TempHandle(object):
+    # THIS IS DEPRECATED, USE LetRefNode instead
+    temp = None
+    needs_xdecref = False
+    def __init__(self, type, needs_cleanup=None):
+        self.type = type
+        if needs_cleanup is None:
+            self.needs_cleanup = type.is_pyobject
+        else:
+            self.needs_cleanup = needs_cleanup
+
+    def ref(self, pos):
+        return TempRefNode(pos, handle=self, type=self.type)
+
+
+class TempRefNode(AtomicExprNode):
+    # THIS IS DEPRECATED, USE LetRefNode instead
+    # handle   TempHandle
+
+    def analyse_types(self, env):
+        assert self.type == self.handle.type
+        return self
+
+    def analyse_target_types(self, env):
+        assert self.type == self.handle.type
+        return self
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def calculate_result_code(self):
+        result = self.handle.temp
+        if result is None: result = "<error>" # might be called and overwritten
+        return result
+
+    def generate_result_code(self, code):
+        pass
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
-        if self.type.is_pyobject: 
-            rhs.make_owned_reference(code) 
-            # TODO: analyse control flow to see if this is necessary 
-            code.put_xdecref(self.result(), self.ctype()) 
+        if self.type.is_pyobject:
+            rhs.make_owned_reference(code)
+            # TODO: analyse control flow to see if this is necessary
+            code.put_xdecref(self.result(), self.ctype())
         code.putln('%s = %s;' % (
             self.result(),
             rhs.result() if overloaded_assignment else rhs.result_as(self.ctype()),
         ))
-        rhs.generate_post_assignment_code(code) 
-        rhs.free_temps(code) 
- 
- 
-class TempsBlockNode(Node): 
-    # THIS IS DEPRECATED, USE LetNode instead 
- 
-    """ 
-    Creates a block which allocates temporary variables. 
-    This is used by transforms to output constructs that need 
-    to make use of a temporary variable. Simply pass the types 
-    of the needed temporaries to the constructor. 
- 
-    The variables can be referred to using a TempRefNode 
-    (which can be constructed by calling get_ref_node). 
-    """ 
- 
-    # temps   [TempHandle] 
-    # body    StatNode 
- 
-    child_attrs = ["body"] 
- 
-    def generate_execution_code(self, code): 
-        for handle in self.temps: 
-            handle.temp = code.funcstate.allocate_temp( 
-                handle.type, manage_ref=handle.needs_cleanup) 
-        self.body.generate_execution_code(code) 
-        for handle in self.temps: 
-            if handle.needs_cleanup: 
-                if handle.needs_xdecref: 
-                    code.put_xdecref_clear(handle.temp, handle.type) 
-                else: 
-                    code.put_decref_clear(handle.temp, handle.type) 
-            code.funcstate.release_temp(handle.temp) 
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_function_definitions(self, env, code): 
-        self.body.generate_function_definitions(env, code) 
- 
-    def annotate(self, code): 
-        self.body.annotate(code) 
- 
- 
-class ResultRefNode(AtomicExprNode): 
-    # A reference to the result of an expression.  The result_code 
-    # must be set externally (usually a temp name). 
- 
-    subexprs = [] 
-    lhs_of_first_assignment = False 
- 
-    def __init__(self, expression=None, pos=None, type=None, may_hold_none=True, is_temp=False): 
-        self.expression = expression 
-        self.pos = None 
-        self.may_hold_none = may_hold_none 
-        if expression is not None: 
-            self.pos = expression.pos 
-            if hasattr(expression, "type"): 
-                self.type = expression.type 
-        if pos is not None: 
-            self.pos = pos 
-        if type is not None: 
-            self.type = type 
-        if is_temp: 
-            self.is_temp = True 
-        assert self.pos is not None 
- 
-    def clone_node(self): 
-        # nothing to do here 
-        return self 
- 
-    def type_dependencies(self, env): 
-        if self.expression: 
-            return self.expression.type_dependencies(env) 
-        else: 
-            return () 
- 
+        rhs.generate_post_assignment_code(code)
+        rhs.free_temps(code)
+
+
+class TempsBlockNode(Node):
+    # THIS IS DEPRECATED, USE LetNode instead
+
+    """
+    Creates a block which allocates temporary variables.
+    This is used by transforms to output constructs that need
+    to make use of a temporary variable. Simply pass the types
+    of the needed temporaries to the constructor.
+
+    The variables can be referred to using a TempRefNode
+    (which can be constructed by calling get_ref_node).
+    """
+
+    # temps   [TempHandle]
+    # body    StatNode
+
+    child_attrs = ["body"]
+
+    def generate_execution_code(self, code):
+        for handle in self.temps:
+            handle.temp = code.funcstate.allocate_temp(
+                handle.type, manage_ref=handle.needs_cleanup)
+        self.body.generate_execution_code(code)
+        for handle in self.temps:
+            if handle.needs_cleanup:
+                if handle.needs_xdecref:
+                    code.put_xdecref_clear(handle.temp, handle.type)
+                else:
+                    code.put_decref_clear(handle.temp, handle.type)
+            code.funcstate.release_temp(handle.temp)
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.body.annotate(code)
+
+
+class ResultRefNode(AtomicExprNode):
+    # A reference to the result of an expression.  The result_code
+    # must be set externally (usually a temp name).
+
+    subexprs = []
+    lhs_of_first_assignment = False
+
+    def __init__(self, expression=None, pos=None, type=None, may_hold_none=True, is_temp=False):
+        self.expression = expression
+        self.pos = None
+        self.may_hold_none = may_hold_none
+        if expression is not None:
+            self.pos = expression.pos
+            if hasattr(expression, "type"):
+                self.type = expression.type
+        if pos is not None:
+            self.pos = pos
+        if type is not None:
+            self.type = type
+        if is_temp:
+            self.is_temp = True
+        assert self.pos is not None
+
+    def clone_node(self):
+        # nothing to do here
+        return self
+
+    def type_dependencies(self, env):
+        if self.expression:
+            return self.expression.type_dependencies(env)
+        else:
+            return ()
+
     def update_expression(self, expression):
         self.expression = expression
         if hasattr(expression, "type"):
             self.type = expression.type
 
-    def analyse_types(self, env): 
-        if self.expression is not None: 
+    def analyse_types(self, env):
+        if self.expression is not None:
             if not self.expression.type:
               self.expression = self.expression.analyse_types(env)
-            self.type = self.expression.type 
-        return self 
- 
-    def infer_type(self, env): 
-        if self.type is not None: 
-            return self.type 
-        if self.expression is not None: 
-            if self.expression.type is not None: 
-                return self.expression.type 
-            return self.expression.infer_type(env) 
-        assert False, "cannot infer type of ResultRefNode" 
- 
-    def may_be_none(self): 
-        if not self.type.is_pyobject: 
-            return False 
-        return self.may_hold_none 
- 
-    def _DISABLED_may_be_none(self): 
-        # not sure if this is safe - the expression may not be the 
-        # only value that gets assigned 
-        if self.expression is not None: 
-            return self.expression.may_be_none() 
-        if self.type is not None: 
-            return self.type.is_pyobject 
-        return True # play safe 
- 
-    def is_simple(self): 
-        return True 
- 
-    def result(self): 
-        try: 
-            return self.result_code 
-        except AttributeError: 
-            if self.expression is not None: 
-                self.result_code = self.expression.result() 
-        return self.result_code 
- 
-    def generate_evaluation_code(self, code): 
-        pass 
- 
-    def generate_result_code(self, code): 
-        pass 
- 
-    def generate_disposal_code(self, code): 
-        pass 
- 
+            self.type = self.expression.type
+        return self
+
+    def infer_type(self, env):
+        if self.type is not None:
+            return self.type
+        if self.expression is not None:
+            if self.expression.type is not None:
+                return self.expression.type
+            return self.expression.infer_type(env)
+        assert False, "cannot infer type of ResultRefNode"
+
+    def may_be_none(self):
+        if not self.type.is_pyobject:
+            return False
+        return self.may_hold_none
+
+    def _DISABLED_may_be_none(self):
+        # not sure if this is safe - the expression may not be the
+        # only value that gets assigned
+        if self.expression is not None:
+            return self.expression.may_be_none()
+        if self.type is not None:
+            return self.type.is_pyobject
+        return True # play safe
+
+    def is_simple(self):
+        return True
+
+    def result(self):
+        try:
+            return self.result_code
+        except AttributeError:
+            if self.expression is not None:
+                self.result_code = self.expression.result()
+        return self.result_code
+
+    def generate_evaluation_code(self, code):
+        pass
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_disposal_code(self, code):
+        pass
+
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
-        if self.type.is_pyobject: 
-            rhs.make_owned_reference(code) 
-            if not self.lhs_of_first_assignment: 
-                code.put_decref(self.result(), self.ctype()) 
+        if self.type.is_pyobject:
+            rhs.make_owned_reference(code)
+            if not self.lhs_of_first_assignment:
+                code.put_decref(self.result(), self.ctype())
         code.putln('%s = %s;' % (
             self.result(),
             rhs.result() if overloaded_assignment else rhs.result_as(self.ctype()),
         ))
-        rhs.generate_post_assignment_code(code) 
-        rhs.free_temps(code) 
- 
-    def allocate_temps(self, env): 
-        pass 
- 
-    def release_temp(self, env): 
-        pass 
- 
-    def free_temps(self, code): 
-        pass 
- 
- 
-class LetNodeMixin: 
-    def set_temp_expr(self, lazy_temp): 
-        self.lazy_temp = lazy_temp 
-        self.temp_expression = lazy_temp.expression 
- 
-    def setup_temp_expr(self, code): 
-        self.temp_expression.generate_evaluation_code(code) 
-        self.temp_type = self.temp_expression.type 
-        if self.temp_type.is_array: 
-            self.temp_type = c_ptr_type(self.temp_type.base_type) 
-        self._result_in_temp = self.temp_expression.result_in_temp() 
-        if self._result_in_temp: 
-            self.temp = self.temp_expression.result() 
-        else: 
-            self.temp_expression.make_owned_reference(code) 
-            self.temp = code.funcstate.allocate_temp( 
-                self.temp_type, manage_ref=True) 
-            code.putln("%s = %s;" % (self.temp, self.temp_expression.result())) 
-            self.temp_expression.generate_disposal_code(code) 
-            self.temp_expression.free_temps(code) 
-        self.lazy_temp.result_code = self.temp 
- 
-    def teardown_temp_expr(self, code): 
-        if self._result_in_temp: 
-            self.temp_expression.generate_disposal_code(code) 
-            self.temp_expression.free_temps(code) 
-        else: 
-            if self.temp_type.is_pyobject: 
-                code.put_decref_clear(self.temp, self.temp_type) 
-            code.funcstate.release_temp(self.temp) 
- 
-
-class EvalWithTempExprNode(ExprNodes.ExprNode, LetNodeMixin): 
-    # A wrapper around a subexpression that moves an expression into a 
-    # temp variable and provides it to the subexpression. 
- 
-    subexprs = ['temp_expression', 'subexpression'] 
- 
-    def __init__(self, lazy_temp, subexpression): 
-        self.set_temp_expr(lazy_temp) 
-        self.pos = subexpression.pos 
-        self.subexpression = subexpression 
-        # if called after type analysis, we already know the type here 
-        self.type = self.subexpression.type 
- 
-    def infer_type(self, env): 
-        return self.subexpression.infer_type(env) 
- 
+        rhs.generate_post_assignment_code(code)
+        rhs.free_temps(code)
+
+    def allocate_temps(self, env):
+        pass
+
+    def release_temp(self, env):
+        pass
+
+    def free_temps(self, code):
+        pass
+
+
+class LetNodeMixin:
+    def set_temp_expr(self, lazy_temp):
+        self.lazy_temp = lazy_temp
+        self.temp_expression = lazy_temp.expression
+
+    def setup_temp_expr(self, code):
+        self.temp_expression.generate_evaluation_code(code)
+        self.temp_type = self.temp_expression.type
+        if self.temp_type.is_array:
+            self.temp_type = c_ptr_type(self.temp_type.base_type)
+        self._result_in_temp = self.temp_expression.result_in_temp()
+        if self._result_in_temp:
+            self.temp = self.temp_expression.result()
+        else:
+            self.temp_expression.make_owned_reference(code)
+            self.temp = code.funcstate.allocate_temp(
+                self.temp_type, manage_ref=True)
+            code.putln("%s = %s;" % (self.temp, self.temp_expression.result()))
+            self.temp_expression.generate_disposal_code(code)
+            self.temp_expression.free_temps(code)
+        self.lazy_temp.result_code = self.temp
+
+    def teardown_temp_expr(self, code):
+        if self._result_in_temp:
+            self.temp_expression.generate_disposal_code(code)
+            self.temp_expression.free_temps(code)
+        else:
+            if self.temp_type.is_pyobject:
+                code.put_decref_clear(self.temp, self.temp_type)
+            code.funcstate.release_temp(self.temp)
+
+
+class EvalWithTempExprNode(ExprNodes.ExprNode, LetNodeMixin):
+    # A wrapper around a subexpression that moves an expression into a
+    # temp variable and provides it to the subexpression.
+
+    subexprs = ['temp_expression', 'subexpression']
+
+    def __init__(self, lazy_temp, subexpression):
+        self.set_temp_expr(lazy_temp)
+        self.pos = subexpression.pos
+        self.subexpression = subexpression
+        # if called after type analysis, we already know the type here
+        self.type = self.subexpression.type
+
+    def infer_type(self, env):
+        return self.subexpression.infer_type(env)
+
     def may_be_none(self):
         return self.subexpression.may_be_none()
 
-    def result(self): 
-        return self.subexpression.result() 
- 
-    def analyse_types(self, env): 
-        self.temp_expression = self.temp_expression.analyse_types(env) 
+    def result(self):
+        return self.subexpression.result()
+
+    def analyse_types(self, env):
+        self.temp_expression = self.temp_expression.analyse_types(env)
         self.lazy_temp.update_expression(self.temp_expression)  # overwrite in case it changed
-        self.subexpression = self.subexpression.analyse_types(env) 
-        self.type = self.subexpression.type 
-        return self 
- 
-    def free_subexpr_temps(self, code): 
-        self.subexpression.free_temps(code) 
- 
-    def generate_subexpr_disposal_code(self, code): 
-        self.subexpression.generate_disposal_code(code) 
- 
-    def generate_evaluation_code(self, code): 
-        self.setup_temp_expr(code) 
-        self.subexpression.generate_evaluation_code(code) 
-        self.teardown_temp_expr(code) 
- 
-
-LetRefNode = ResultRefNode 
- 
-
-class LetNode(Nodes.StatNode, LetNodeMixin): 
-    # Implements a local temporary variable scope. Imagine this 
-    # syntax being present: 
-    # let temp = VALUE: 
-    #     BLOCK (can modify temp) 
-    #     if temp is an object, decref 
-    # 
-    # Usually used after analysis phase, but forwards analysis methods 
-    # to its children 
- 
-    child_attrs = ['temp_expression', 'body'] 
- 
-    def __init__(self, lazy_temp, body): 
-        self.set_temp_expr(lazy_temp) 
-        self.pos = body.pos 
-        self.body = body 
- 
-    def analyse_declarations(self, env): 
-        self.temp_expression.analyse_declarations(env) 
-        self.body.analyse_declarations(env) 
- 
-    def analyse_expressions(self, env): 
-        self.temp_expression = self.temp_expression.analyse_expressions(env) 
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_execution_code(self, code): 
-        self.setup_temp_expr(code) 
-        self.body.generate_execution_code(code) 
-        self.teardown_temp_expr(code) 
- 
-    def generate_function_definitions(self, env, code): 
-        self.temp_expression.generate_function_definitions(env, code) 
-        self.body.generate_function_definitions(env, code) 
- 
- 
-class TempResultFromStatNode(ExprNodes.ExprNode): 
-    # An ExprNode wrapper around a StatNode that executes the StatNode 
-    # body.  Requires a ResultRefNode that it sets up to refer to its 
-    # own temp result.  The StatNode must assign a value to the result 
-    # node, which then becomes the result of this node. 
- 
-    subexprs = [] 
-    child_attrs = ['body'] 
- 
-    def __init__(self, result_ref, body): 
-        self.result_ref = result_ref 
-        self.pos = body.pos 
-        self.body = body 
-        self.type = result_ref.type 
-        self.is_temp = 1 
- 
-    def analyse_declarations(self, env): 
-        self.body.analyse_declarations(env) 
- 
-    def analyse_types(self, env): 
-        self.body = self.body.analyse_expressions(env) 
-        return self 
- 
-    def generate_result_code(self, code): 
-        self.result_ref.result_code = self.result() 
-        self.body.generate_execution_code(code) 
+        self.subexpression = self.subexpression.analyse_types(env)
+        self.type = self.subexpression.type
+        return self
+
+    def free_subexpr_temps(self, code):
+        self.subexpression.free_temps(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        self.subexpression.generate_disposal_code(code)
+
+    def generate_evaluation_code(self, code):
+        self.setup_temp_expr(code)
+        self.subexpression.generate_evaluation_code(code)
+        self.teardown_temp_expr(code)
+
+
+LetRefNode = ResultRefNode
+
+
+class LetNode(Nodes.StatNode, LetNodeMixin):
+    # Implements a local temporary variable scope. Imagine this
+    # syntax being present:
+    # let temp = VALUE:
+    #     BLOCK (can modify temp)
+    #     if temp is an object, decref
+    #
+    # Usually used after analysis phase, but forwards analysis methods
+    # to its children
+
+    child_attrs = ['temp_expression', 'body']
+
+    def __init__(self, lazy_temp, body):
+        self.set_temp_expr(lazy_temp)
+        self.pos = body.pos
+        self.body = body
+
+    def analyse_declarations(self, env):
+        self.temp_expression.analyse_declarations(env)
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.temp_expression = self.temp_expression.analyse_expressions(env)
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_execution_code(self, code):
+        self.setup_temp_expr(code)
+        self.body.generate_execution_code(code)
+        self.teardown_temp_expr(code)
+
+    def generate_function_definitions(self, env, code):
+        self.temp_expression.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+
+
+class TempResultFromStatNode(ExprNodes.ExprNode):
+    # An ExprNode wrapper around a StatNode that executes the StatNode
+    # body.  Requires a ResultRefNode that it sets up to refer to its
+    # own temp result.  The StatNode must assign a value to the result
+    # node, which then becomes the result of this node.
+
+    subexprs = []
+    child_attrs = ['body']
+
+    def __init__(self, result_ref, body):
+        self.result_ref = result_ref
+        self.pos = body.pos
+        self.body = body
+        self.type = result_ref.type
+        self.is_temp = 1
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+
+    def analyse_types(self, env):
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_result_code(self, code):
+        self.result_ref.result_code = self.result()
+        self.body.generate_execution_code(code)
diff --git a/contrib/tools/cython/Cython/Compiler/UtilityCode.py b/contrib/tools/cython/Cython/Compiler/UtilityCode.py
index 401422222f..98e9ab5bfb 100644
--- a/contrib/tools/cython/Cython/Compiler/UtilityCode.py
+++ b/contrib/tools/cython/Cython/Compiler/UtilityCode.py
@@ -1,99 +1,99 @@
-from __future__ import absolute_import 
- 
-from .TreeFragment import parse_from_strings, StringParseContext 
-from . import Symtab 
-from . import Naming 
-from . import Code 
- 
- 
-class NonManglingModuleScope(Symtab.ModuleScope): 
- 
-    def __init__(self, prefix, *args, **kw): 
-        self.prefix = prefix 
-        self.cython_scope = None 
+from __future__ import absolute_import
+
+from .TreeFragment import parse_from_strings, StringParseContext
+from . import Symtab
+from . import Naming
+from . import Code
+
+
+class NonManglingModuleScope(Symtab.ModuleScope):
+
+    def __init__(self, prefix, *args, **kw):
+        self.prefix = prefix
+        self.cython_scope = None
         self.cpp = kw.pop('cpp', False)
-        Symtab.ModuleScope.__init__(self, *args, **kw) 
- 
-    def add_imported_entry(self, name, entry, pos): 
-        entry.used = True 
+        Symtab.ModuleScope.__init__(self, *args, **kw)
+
+    def add_imported_entry(self, name, entry, pos):
+        entry.used = True
         return super(NonManglingModuleScope, self).add_imported_entry(name, entry, pos)
- 
-    def mangle(self, prefix, name=None): 
-        if name: 
-            if prefix in (Naming.typeobj_prefix, Naming.func_prefix, Naming.var_prefix, Naming.pyfunc_prefix): 
-                # Functions, classes etc. gets a manually defined prefix easily 
-                # manually callable instead (the one passed to CythonUtilityCode) 
-                prefix = self.prefix 
-            return "%s%s" % (prefix, name) 
-        else: 
-            return Symtab.ModuleScope.mangle(self, prefix) 
- 
-
-class CythonUtilityCodeContext(StringParseContext): 
-    scope = None 
- 
+
+    def mangle(self, prefix, name=None):
+        if name:
+            if prefix in (Naming.typeobj_prefix, Naming.func_prefix, Naming.var_prefix, Naming.pyfunc_prefix):
+                # Functions, classes etc. gets a manually defined prefix easily
+                # manually callable instead (the one passed to CythonUtilityCode)
+                prefix = self.prefix
+            return "%s%s" % (prefix, name)
+        else:
+            return Symtab.ModuleScope.mangle(self, prefix)
+
+
+class CythonUtilityCodeContext(StringParseContext):
+    scope = None
+
     def find_module(self, module_name, relative_to=None, pos=None, need_pxd=True, absolute_fallback=True):
         if relative_to:
             raise AssertionError("Relative imports not supported in utility code.")
-        if module_name != self.module_name: 
-            if module_name not in self.modules: 
-                raise AssertionError("Only the cython cimport is supported.") 
-            else: 
-                return self.modules[module_name] 
- 
-        if self.scope is None: 
+        if module_name != self.module_name:
+            if module_name not in self.modules:
+                raise AssertionError("Only the cython cimport is supported.")
+            else:
+                return self.modules[module_name]
+
+        if self.scope is None:
             self.scope = NonManglingModuleScope(
                 self.prefix, module_name, parent_module=None, context=self, cpp=self.cpp)
- 
-        return self.scope 
- 
- 
-class CythonUtilityCode(Code.UtilityCodeBase): 
-    """ 
-    Utility code written in the Cython language itself. 
- 
-    The @cname decorator can set the cname for a function, method of cdef class. 
-    Functions decorated with @cname('c_func_name') get the given cname. 
- 
-    For cdef classes the rules are as follows: 
-        obj struct      -> <cname>_obj 
-        obj type ptr    -> <cname>_type 
-        methods         -> <class_cname>_<method_cname> 
- 
-    For methods the cname decorator is optional, but without the decorator the 
-    methods will not be prototyped. See Cython.Compiler.CythonScope and 
-    tests/run/cythonscope.pyx for examples. 
-    """ 
- 
-    is_cython_utility = True 
- 
-    def __init__(self, impl, name="__pyxutil", prefix="", requires=None, 
+
+        return self.scope
+
+
+class CythonUtilityCode(Code.UtilityCodeBase):
+    """
+    Utility code written in the Cython language itself.
+
+    The @cname decorator can set the cname for a function, method of cdef class.
+    Functions decorated with @cname('c_func_name') get the given cname.
+
+    For cdef classes the rules are as follows:
+        obj struct      -> <cname>_obj
+        obj type ptr    -> <cname>_type
+        methods         -> <class_cname>_<method_cname>
+
+    For methods the cname decorator is optional, but without the decorator the
+    methods will not be prototyped. See Cython.Compiler.CythonScope and
+    tests/run/cythonscope.pyx for examples.
+    """
+
+    is_cython_utility = True
+
+    def __init__(self, impl, name="__pyxutil", prefix="", requires=None,
                  file=None, from_scope=None, context=None, compiler_directives=None,
                  outer_module_scope=None):
-        # 1) We need to delay the parsing/processing, so that all modules can be 
-        #    imported without import loops 
-        # 2) The same utility code object can be used for multiple source files; 
-        #    while the generated node trees can be altered in the compilation of a 
-        #    single file. 
-        # Hence, delay any processing until later. 
+        # 1) We need to delay the parsing/processing, so that all modules can be
+        #    imported without import loops
+        # 2) The same utility code object can be used for multiple source files;
+        #    while the generated node trees can be altered in the compilation of a
+        #    single file.
+        # Hence, delay any processing until later.
         context_types = {}
-        if context is not None: 
+        if context is not None:
             from .PyrexTypes import BaseType
             for key, value in context.items():
                 if isinstance(value, BaseType):
                     context[key] = key
                     context_types[key] = value
-            impl = Code.sub_tempita(impl, context, file, name) 
-        self.impl = impl 
-        self.name = name 
-        self.file = file 
-        self.prefix = prefix 
-        self.requires = requires or [] 
-        self.from_scope = from_scope 
+            impl = Code.sub_tempita(impl, context, file, name)
+        self.impl = impl
+        self.name = name
+        self.file = file
+        self.prefix = prefix
+        self.requires = requires or []
+        self.from_scope = from_scope
         self.outer_module_scope = outer_module_scope
         self.compiler_directives = compiler_directives
         self.context_types = context_types
- 
+
     def __eq__(self, other):
         if isinstance(other, CythonUtilityCode):
             return self._equality_params() == other._equality_params()
@@ -109,51 +109,51 @@ class CythonUtilityCode(Code.UtilityCodeBase):
     def __hash__(self):
         return hash(self.impl)
 
-    def get_tree(self, entries_only=False, cython_scope=None): 
-        from .AnalysedTreeTransforms import AutoTestDictTransform 
-        # The AutoTestDictTransform creates the statement "__test__ = {}", 
-        # which when copied into the main ModuleNode overwrites 
-        # any __test__ in user code; not desired 
-        excludes = [AutoTestDictTransform] 
- 
-        from . import Pipeline, ParseTreeTransforms 
+    def get_tree(self, entries_only=False, cython_scope=None):
+        from .AnalysedTreeTransforms import AutoTestDictTransform
+        # The AutoTestDictTransform creates the statement "__test__ = {}",
+        # which when copied into the main ModuleNode overwrites
+        # any __test__ in user code; not desired
+        excludes = [AutoTestDictTransform]
+
+        from . import Pipeline, ParseTreeTransforms
         context = CythonUtilityCodeContext(
             self.name, compiler_directives=self.compiler_directives,
             cpp=cython_scope.is_cpp() if cython_scope else False)
-        context.prefix = self.prefix 
-        context.cython_scope = cython_scope 
-        #context = StringParseContext(self.name) 
+        context.prefix = self.prefix
+        context.cython_scope = cython_scope
+        #context = StringParseContext(self.name)
         tree = parse_from_strings(
             self.name, self.impl, context=context, allow_struct_enum_decorator=True)
-        pipeline = Pipeline.create_pipeline(context, 'pyx', exclude_classes=excludes) 
- 
-        if entries_only: 
-            p = [] 
-            for t in pipeline: 
-                p.append(t) 
-                if isinstance(p, ParseTreeTransforms.AnalyseDeclarationsTransform): 
-                    break 
- 
-            pipeline = p 
- 
-        transform = ParseTreeTransforms.CnameDirectivesTransform(context) 
-        # InterpretCompilerDirectives already does a cdef declarator check 
-        #before = ParseTreeTransforms.DecoratorTransform 
-        before = ParseTreeTransforms.InterpretCompilerDirectives 
-        pipeline = Pipeline.insert_into_pipeline(pipeline, transform, 
-                                                 before=before) 
- 
+        pipeline = Pipeline.create_pipeline(context, 'pyx', exclude_classes=excludes)
+
+        if entries_only:
+            p = []
+            for t in pipeline:
+                p.append(t)
+                if isinstance(p, ParseTreeTransforms.AnalyseDeclarationsTransform):
+                    break
+
+            pipeline = p
+
+        transform = ParseTreeTransforms.CnameDirectivesTransform(context)
+        # InterpretCompilerDirectives already does a cdef declarator check
+        #before = ParseTreeTransforms.DecoratorTransform
+        before = ParseTreeTransforms.InterpretCompilerDirectives
+        pipeline = Pipeline.insert_into_pipeline(pipeline, transform,
+                                                 before=before)
+
         def merge_scope(scope):
             def merge_scope_transform(module_node):
                 module_node.scope.merge_in(scope)
-                return module_node 
+                return module_node
             return merge_scope_transform
- 
+
         if self.from_scope:
             pipeline = Pipeline.insert_into_pipeline(
                 pipeline, merge_scope(self.from_scope),
                 before=ParseTreeTransforms.AnalyseDeclarationsTransform)
- 
+
         for dep in self.requires:
             if isinstance(dep, CythonUtilityCode) and hasattr(dep, 'tree') and not cython_scope:
                 pipeline = Pipeline.insert_into_pipeline(
@@ -182,56 +182,56 @@ class CythonUtilityCode(Code.UtilityCodeBase):
                 pipeline, scope_transform,
                 before=ParseTreeTransforms.AnalyseDeclarationsTransform)
 
-        (err, tree) = Pipeline.run_pipeline(pipeline, tree, printtree=False) 
-        assert not err, err 
+        (err, tree) = Pipeline.run_pipeline(pipeline, tree, printtree=False)
+        assert not err, err
         self.tree = tree
-        return tree 
- 
-    def put_code(self, output): 
-        pass 
- 
-    @classmethod 
-    def load_as_string(cls, util_code_name, from_file=None, **kwargs): 
-        """ 
-        Load a utility code as a string. Returns (proto, implementation) 
-        """ 
-        util = cls.load(util_code_name, from_file, **kwargs) 
-        return util.proto, util.impl # keep line numbers => no lstrip() 
- 
-    def declare_in_scope(self, dest_scope, used=False, cython_scope=None, 
-                         whitelist=None): 
-        """ 
-        Declare all entries from the utility code in dest_scope. Code will only 
-        be included for used entries. If module_name is given, declare the 
-        type entries with that name. 
-        """ 
-        tree = self.get_tree(entries_only=True, cython_scope=cython_scope) 
- 
-        entries = tree.scope.entries 
-        entries.pop('__name__') 
-        entries.pop('__file__') 
-        entries.pop('__builtins__') 
-        entries.pop('__doc__') 
- 
+        return tree
+
+    def put_code(self, output):
+        pass
+
+    @classmethod
+    def load_as_string(cls, util_code_name, from_file=None, **kwargs):
+        """
+        Load a utility code as a string. Returns (proto, implementation)
+        """
+        util = cls.load(util_code_name, from_file, **kwargs)
+        return util.proto, util.impl # keep line numbers => no lstrip()
+
+    def declare_in_scope(self, dest_scope, used=False, cython_scope=None,
+                         whitelist=None):
+        """
+        Declare all entries from the utility code in dest_scope. Code will only
+        be included for used entries. If module_name is given, declare the
+        type entries with that name.
+        """
+        tree = self.get_tree(entries_only=True, cython_scope=cython_scope)
+
+        entries = tree.scope.entries
+        entries.pop('__name__')
+        entries.pop('__file__')
+        entries.pop('__builtins__')
+        entries.pop('__doc__')
+
         for entry in entries.values():
-            entry.utility_code_definition = self 
-            entry.used = used 
- 
-        original_scope = tree.scope 
+            entry.utility_code_definition = self
+            entry.used = used
+
+        original_scope = tree.scope
         dest_scope.merge_in(original_scope, merge_unused=True, whitelist=whitelist)
-        tree.scope = dest_scope 
- 
-        for dep in self.requires: 
-            if dep.is_cython_utility: 
+        tree.scope = dest_scope
+
+        for dep in self.requires:
+            if dep.is_cython_utility:
                 dep.declare_in_scope(dest_scope, cython_scope=cython_scope)
- 
-        return original_scope 
- 
-
-def declare_declarations_in_scope(declaration_string, env, private_type=True, 
-                                  *args, **kwargs): 
-    """ 
-    Declare some declarations given as Cython code in declaration_string 
-    in scope env. 
-    """ 
-    CythonUtilityCode(declaration_string, *args, **kwargs).declare_in_scope(env) 
+
+        return original_scope
+
+
+def declare_declarations_in_scope(declaration_string, env, private_type=True,
+                                  *args, **kwargs):
+    """
+    Declare some declarations given as Cython code in declaration_string
+    in scope env.
+    """
+    CythonUtilityCode(declaration_string, *args, **kwargs).declare_in_scope(env)
diff --git a/contrib/tools/cython/Cython/Compiler/Version.py b/contrib/tools/cython/Cython/Compiler/Version.py
index d419b5c018..dcb561f78c 100644
--- a/contrib/tools/cython/Cython/Compiler/Version.py
+++ b/contrib/tools/cython/Cython/Compiler/Version.py
@@ -1,9 +1,9 @@
-# for backwards compatibility 
- 
-from __future__ import absolute_import 
- 
-from .. import __version__ as version 
- 
-# For 'generated by' header line in C files. 
- 
-watermark = str(version) 
+# for backwards compatibility
+
+from __future__ import absolute_import
+
+from .. import __version__ as version
+
+# For 'generated by' header line in C files.
+
+watermark = str(version)
diff --git a/contrib/tools/cython/Cython/Compiler/Visitor.pxd b/contrib/tools/cython/Cython/Compiler/Visitor.pxd
index 578522ed64..d5d5692aa7 100644
--- a/contrib/tools/cython/Cython/Compiler/Visitor.pxd
+++ b/contrib/tools/cython/Cython/Compiler/Visitor.pxd
@@ -1,55 +1,55 @@
-from __future__ import absolute_import 
- 
-cimport cython 
- 
-cdef class TreeVisitor: 
-    cdef public list access_path 
-    cdef dict dispatch_table 
- 
-    cpdef visit(self, obj) 
-    cdef _visit(self, obj) 
-    cdef find_handler(self, obj) 
-    cdef _visitchild(self, child, parent, attrname, idx) 
-    cdef dict _visitchildren(self, parent, attrs) 
-    cpdef visitchildren(self, parent, attrs=*) 
+from __future__ import absolute_import
+
+cimport cython
+
+cdef class TreeVisitor:
+    cdef public list access_path
+    cdef dict dispatch_table
+
+    cpdef visit(self, obj)
+    cdef _visit(self, obj)
+    cdef find_handler(self, obj)
+    cdef _visitchild(self, child, parent, attrname, idx)
+    cdef dict _visitchildren(self, parent, attrs)
+    cpdef visitchildren(self, parent, attrs=*)
     cdef _raise_compiler_error(self, child, e)
- 
-cdef class VisitorTransform(TreeVisitor): 
+
+cdef class VisitorTransform(TreeVisitor):
     cdef dict _process_children(self, parent, attrs=*)
     cpdef visitchildren(self, parent, attrs=*, exclude=*)
     cdef list _flatten_list(self, list orig_list)
     cdef list _select_attrs(self, attrs, exclude)
- 
-cdef class CythonTransform(VisitorTransform): 
-    cdef public context 
-    cdef public current_directives 
- 
-cdef class ScopeTrackingTransform(CythonTransform): 
-    cdef public scope_type 
-    cdef public scope_node 
-    cdef visit_scope(self, node, scope_type) 
- 
-cdef class EnvTransform(CythonTransform): 
-    cdef public list env_stack 
- 
-cdef class MethodDispatcherTransform(EnvTransform): 
-    @cython.final 
-    cdef _visit_binop_node(self, node) 
-    @cython.final 
-    cdef _find_handler(self, match_name, bint has_kwargs) 
-    @cython.final 
-    cdef _delegate_to_assigned_value(self, node, function, arg_list, kwargs) 
-    @cython.final 
-    cdef _dispatch_to_handler(self, node, function, arg_list, kwargs) 
-    @cython.final 
-    cdef _dispatch_to_method_handler(self, attr_name, self_arg, 
-                                     is_unbound_method, type_name, 
-                                     node, function, arg_list, kwargs) 
- 
-cdef class RecursiveNodeReplacer(VisitorTransform): 
-     cdef public orig_node 
-     cdef public new_node 
- 
-cdef class NodeFinder(TreeVisitor): 
-    cdef node 
-    cdef public bint found 
+
+cdef class CythonTransform(VisitorTransform):
+    cdef public context
+    cdef public current_directives
+
+cdef class ScopeTrackingTransform(CythonTransform):
+    cdef public scope_type
+    cdef public scope_node
+    cdef visit_scope(self, node, scope_type)
+
+cdef class EnvTransform(CythonTransform):
+    cdef public list env_stack
+
+cdef class MethodDispatcherTransform(EnvTransform):
+    @cython.final
+    cdef _visit_binop_node(self, node)
+    @cython.final
+    cdef _find_handler(self, match_name, bint has_kwargs)
+    @cython.final
+    cdef _delegate_to_assigned_value(self, node, function, arg_list, kwargs)
+    @cython.final
+    cdef _dispatch_to_handler(self, node, function, arg_list, kwargs)
+    @cython.final
+    cdef _dispatch_to_method_handler(self, attr_name, self_arg,
+                                     is_unbound_method, type_name,
+                                     node, function, arg_list, kwargs)
+
+cdef class RecursiveNodeReplacer(VisitorTransform):
+     cdef public orig_node
+     cdef public new_node
+
+cdef class NodeFinder(TreeVisitor):
+    cdef node
+    cdef public bint found
diff --git a/contrib/tools/cython/Cython/Compiler/Visitor.py b/contrib/tools/cython/Cython/Compiler/Visitor.py
index 9d8c116292..a35d13e1d0 100644
--- a/contrib/tools/cython/Cython/Compiler/Visitor.py
+++ b/contrib/tools/cython/Cython/Compiler/Visitor.py
@@ -1,27 +1,27 @@
-# cython: infer_types=True 
+# cython: infer_types=True
 # cython: language_level=3
 # cython: auto_pickle=False
- 
-# 
-#   Tree visitor and transform framework 
-# 
- 
+
+#
+#   Tree visitor and transform framework
+#
+
 from __future__ import absolute_import, print_function
- 
+
 import sys
-import inspect 
- 
-from . import TypeSlots 
-from . import Builtin 
-from . import Nodes 
-from . import ExprNodes 
-from . import Errors 
-from . import DebugFlags 
+import inspect
+
+from . import TypeSlots
+from . import Builtin
+from . import Nodes
+from . import ExprNodes
+from . import Errors
+from . import DebugFlags
 from . import Future
- 
-import cython 
- 
- 
+
+import cython
+
+
 cython.declare(_PRINTABLE=tuple)
 
 if sys.version_info[0] >= 3:
@@ -30,220 +30,220 @@ else:
     _PRINTABLE = (str, unicode, long, int, float)
 
 
-class TreeVisitor(object): 
-    """ 
-    Base class for writing visitors for a Cython tree, contains utilities for 
-    recursing such trees using visitors. Each node is 
-    expected to have a child_attrs iterable containing the names of attributes 
-    containing child nodes or lists of child nodes. Lists are not considered 
-    part of the tree structure (i.e. contained nodes are considered direct 
-    children of the parent node). 
- 
-    visit_children visits each of the children of a given node (see the visit_children 
-    documentation). When recursing the tree using visit_children, an attribute 
-    access_path is maintained which gives information about the current location 
-    in the tree as a stack of tuples: (parent_node, attrname, index), representing 
-    the node, attribute and optional list index that was taken in each step in the path to 
-    the current node. 
- 
-    Example: 
- 
-    >>> class SampleNode(object): 
-    ...     child_attrs = ["head", "body"] 
-    ...     def __init__(self, value, head=None, body=None): 
-    ...         self.value = value 
-    ...         self.head = head 
-    ...         self.body = body 
-    ...     def __repr__(self): return "SampleNode(%s)" % self.value 
-    ... 
-    >>> tree = SampleNode(0, SampleNode(1), [SampleNode(2), SampleNode(3)]) 
-    >>> class MyVisitor(TreeVisitor): 
-    ...     def visit_SampleNode(self, node): 
+class TreeVisitor(object):
+    """
+    Base class for writing visitors for a Cython tree, contains utilities for
+    recursing such trees using visitors. Each node is
+    expected to have a child_attrs iterable containing the names of attributes
+    containing child nodes or lists of child nodes. Lists are not considered
+    part of the tree structure (i.e. contained nodes are considered direct
+    children of the parent node).
+
+    visit_children visits each of the children of a given node (see the visit_children
+    documentation). When recursing the tree using visit_children, an attribute
+    access_path is maintained which gives information about the current location
+    in the tree as a stack of tuples: (parent_node, attrname, index), representing
+    the node, attribute and optional list index that was taken in each step in the path to
+    the current node.
+
+    Example:
+
+    >>> class SampleNode(object):
+    ...     child_attrs = ["head", "body"]
+    ...     def __init__(self, value, head=None, body=None):
+    ...         self.value = value
+    ...         self.head = head
+    ...         self.body = body
+    ...     def __repr__(self): return "SampleNode(%s)" % self.value
+    ...
+    >>> tree = SampleNode(0, SampleNode(1), [SampleNode(2), SampleNode(3)])
+    >>> class MyVisitor(TreeVisitor):
+    ...     def visit_SampleNode(self, node):
     ...         print("in %s %s" % (node.value, self.access_path))
-    ...         self.visitchildren(node) 
+    ...         self.visitchildren(node)
     ...         print("out %s" % node.value)
-    ... 
-    >>> MyVisitor().visit(tree) 
-    in 0 [] 
-    in 1 [(SampleNode(0), 'head', None)] 
-    out 1 
-    in 2 [(SampleNode(0), 'body', 0)] 
-    out 2 
-    in 3 [(SampleNode(0), 'body', 1)] 
-    out 3 
-    out 0 
-    """ 
-    def __init__(self): 
-        super(TreeVisitor, self).__init__() 
-        self.dispatch_table = {} 
-        self.access_path = [] 
- 
+    ...
+    >>> MyVisitor().visit(tree)
+    in 0 []
+    in 1 [(SampleNode(0), 'head', None)]
+    out 1
+    in 2 [(SampleNode(0), 'body', 0)]
+    out 2
+    in 3 [(SampleNode(0), 'body', 1)]
+    out 3
+    out 0
+    """
+    def __init__(self):
+        super(TreeVisitor, self).__init__()
+        self.dispatch_table = {}
+        self.access_path = []
+
     def dump_node(self, node):
         ignored = list(node.child_attrs or []) + [
             u'child_attrs', u'pos', u'gil_message', u'cpp_message', u'subexprs']
-        values = [] 
-        pos = getattr(node, 'pos', None) 
-        if pos: 
-            source = pos[0] 
-            if source: 
-                import os.path 
-                source = os.path.basename(source.get_description()) 
-            values.append(u'%s:%s:%s' % (source, pos[1], pos[2])) 
-        attribute_names = dir(node) 
-        for attr in attribute_names: 
-            if attr in ignored: 
-                continue 
+        values = []
+        pos = getattr(node, 'pos', None)
+        if pos:
+            source = pos[0]
+            if source:
+                import os.path
+                source = os.path.basename(source.get_description())
+            values.append(u'%s:%s:%s' % (source, pos[1], pos[2]))
+        attribute_names = dir(node)
+        for attr in attribute_names:
+            if attr in ignored:
+                continue
             if attr.startswith('_') or attr.endswith('_'):
-                continue 
-            try: 
-                value = getattr(node, attr) 
-            except AttributeError: 
-                continue 
-            if value is None or value == 0: 
-                continue 
-            elif isinstance(value, list): 
-                value = u'[...]/%d' % len(value) 
+                continue
+            try:
+                value = getattr(node, attr)
+            except AttributeError:
+                continue
+            if value is None or value == 0:
+                continue
+            elif isinstance(value, list):
+                value = u'[...]/%d' % len(value)
             elif not isinstance(value, _PRINTABLE):
-                continue 
-            else: 
-                value = repr(value) 
-            values.append(u'%s = %s' % (attr, value)) 
+                continue
+            else:
+                value = repr(value)
+            values.append(u'%s = %s' % (attr, value))
         return u'%s(%s)' % (node.__class__.__name__, u',\n    '.join(values))
- 
-    def _find_node_path(self, stacktrace): 
-        import os.path 
-        last_traceback = stacktrace 
-        nodes = [] 
-        while hasattr(stacktrace, 'tb_frame'): 
-            frame = stacktrace.tb_frame 
-            node = frame.f_locals.get(u'self') 
-            if isinstance(node, Nodes.Node): 
-                code = frame.f_code 
-                method_name = code.co_name 
-                pos = (os.path.basename(code.co_filename), 
-                       frame.f_lineno) 
-                nodes.append((node, method_name, pos)) 
-                last_traceback = stacktrace 
-            stacktrace = stacktrace.tb_next 
-        return (last_traceback, nodes) 
- 
-    def _raise_compiler_error(self, child, e): 
-        trace = [''] 
-        for parent, attribute, index in self.access_path: 
-            node = getattr(parent, attribute) 
-            if index is None: 
-                index = '' 
-            else: 
-                node = node[index] 
-                index = u'[%d]' % index 
-            trace.append(u'%s.%s%s = %s' % ( 
-                parent.__class__.__name__, attribute, index, 
-                self.dump_node(node))) 
-        stacktrace, called_nodes = self._find_node_path(sys.exc_info()[2]) 
-        last_node = child 
-        for node, method_name, pos in called_nodes: 
-            last_node = node 
-            trace.append(u"File '%s', line %d, in %s: %s" % ( 
-                pos[0], pos[1], method_name, self.dump_node(node))) 
-        raise Errors.CompilerCrash( 
-            getattr(last_node, 'pos', None), self.__class__.__name__, 
-            u'\n'.join(trace), e, stacktrace) 
- 
-    @cython.final 
-    def find_handler(self, obj): 
-        # to resolve, try entire hierarchy 
-        cls = type(obj) 
-        pattern = "visit_%s" 
-        mro = inspect.getmro(cls) 
-        for mro_cls in mro: 
-            handler_method = getattr(self, pattern % mro_cls.__name__, None) 
-            if handler_method is not None: 
-                return handler_method 
+
+    def _find_node_path(self, stacktrace):
+        import os.path
+        last_traceback = stacktrace
+        nodes = []
+        while hasattr(stacktrace, 'tb_frame'):
+            frame = stacktrace.tb_frame
+            node = frame.f_locals.get(u'self')
+            if isinstance(node, Nodes.Node):
+                code = frame.f_code
+                method_name = code.co_name
+                pos = (os.path.basename(code.co_filename),
+                       frame.f_lineno)
+                nodes.append((node, method_name, pos))
+                last_traceback = stacktrace
+            stacktrace = stacktrace.tb_next
+        return (last_traceback, nodes)
+
+    def _raise_compiler_error(self, child, e):
+        trace = ['']
+        for parent, attribute, index in self.access_path:
+            node = getattr(parent, attribute)
+            if index is None:
+                index = ''
+            else:
+                node = node[index]
+                index = u'[%d]' % index
+            trace.append(u'%s.%s%s = %s' % (
+                parent.__class__.__name__, attribute, index,
+                self.dump_node(node)))
+        stacktrace, called_nodes = self._find_node_path(sys.exc_info()[2])
+        last_node = child
+        for node, method_name, pos in called_nodes:
+            last_node = node
+            trace.append(u"File '%s', line %d, in %s: %s" % (
+                pos[0], pos[1], method_name, self.dump_node(node)))
+        raise Errors.CompilerCrash(
+            getattr(last_node, 'pos', None), self.__class__.__name__,
+            u'\n'.join(trace), e, stacktrace)
+
+    @cython.final
+    def find_handler(self, obj):
+        # to resolve, try entire hierarchy
+        cls = type(obj)
+        pattern = "visit_%s"
+        mro = inspect.getmro(cls)
+        for mro_cls in mro:
+            handler_method = getattr(self, pattern % mro_cls.__name__, None)
+            if handler_method is not None:
+                return handler_method
         print(type(self), cls)
-        if self.access_path: 
+        if self.access_path:
             print(self.access_path)
             print(self.access_path[-1][0].pos)
             print(self.access_path[-1][0].__dict__)
-        raise RuntimeError("Visitor %r does not accept object: %s" % (self, obj)) 
- 
-    def visit(self, obj): 
-        return self._visit(obj) 
- 
-    @cython.final 
-    def _visit(self, obj): 
-        try: 
-            try: 
-                handler_method = self.dispatch_table[type(obj)] 
-            except KeyError: 
-                handler_method = self.find_handler(obj) 
-                self.dispatch_table[type(obj)] = handler_method 
-            return handler_method(obj) 
-        except Errors.CompileError: 
-            raise 
-        except Errors.AbortError: 
-            raise 
+        raise RuntimeError("Visitor %r does not accept object: %s" % (self, obj))
+
+    def visit(self, obj):
+        return self._visit(obj)
+
+    @cython.final
+    def _visit(self, obj):
+        try:
+            try:
+                handler_method = self.dispatch_table[type(obj)]
+            except KeyError:
+                handler_method = self.find_handler(obj)
+                self.dispatch_table[type(obj)] = handler_method
+            return handler_method(obj)
+        except Errors.CompileError:
+            raise
+        except Errors.AbortError:
+            raise
         except Exception as e:
-            if DebugFlags.debug_no_exception_intercept: 
-                raise 
-            self._raise_compiler_error(obj, e) 
- 
-    @cython.final 
-    def _visitchild(self, child, parent, attrname, idx): 
-        self.access_path.append((parent, attrname, idx)) 
-        result = self._visit(child) 
-        self.access_path.pop() 
-        return result 
- 
-    def visitchildren(self, parent, attrs=None): 
-        return self._visitchildren(parent, attrs) 
- 
-    @cython.final 
+            if DebugFlags.debug_no_exception_intercept:
+                raise
+            self._raise_compiler_error(obj, e)
+
+    @cython.final
+    def _visitchild(self, child, parent, attrname, idx):
+        self.access_path.append((parent, attrname, idx))
+        result = self._visit(child)
+        self.access_path.pop()
+        return result
+
+    def visitchildren(self, parent, attrs=None):
+        return self._visitchildren(parent, attrs)
+
+    @cython.final
     @cython.locals(idx=cython.Py_ssize_t)
-    def _visitchildren(self, parent, attrs): 
-        """ 
-        Visits the children of the given parent. If parent is None, returns 
-        immediately (returning None). 
- 
-        The return value is a dictionary giving the results for each 
-        child (mapping the attribute name to either the return value 
-        or a list of return values (in the case of multiple children 
-        in an attribute)). 
-        """ 
-        if parent is None: return None 
-        result = {} 
-        for attr in parent.child_attrs: 
-            if attrs is not None and attr not in attrs: continue 
-            child = getattr(parent, attr) 
-            if child is not None: 
-                if type(child) is list: 
-                    childretval = [self._visitchild(x, parent, attr, idx) for idx, x in enumerate(child)] 
-                else: 
-                    childretval = self._visitchild(child, parent, attr, None) 
-                    assert not isinstance(childretval, list), 'Cannot insert list here: %s in %r' % (attr, parent) 
-                result[attr] = childretval 
-        return result 
- 
- 
-class VisitorTransform(TreeVisitor): 
-    """ 
-    A tree transform is a base class for visitors that wants to do stream 
-    processing of the structure (rather than attributes etc.) of a tree. 
- 
-    It implements __call__ to simply visit the argument node. 
- 
-    It requires the visitor methods to return the nodes which should take 
-    the place of the visited node in the result tree (which can be the same 
-    or one or more replacement). Specifically, if the return value from 
-    a visitor method is: 
- 
-    - [] or None; the visited node will be removed (set to None if an attribute and 
-    removed if in a list) 
-    - A single node; the visited node will be replaced by the returned node. 
-    - A list of nodes; the visited nodes will be replaced by all the nodes in the 
-    list. This will only work if the node was already a member of a list; if it 
-    was not, an exception will be raised. (Typically you want to ensure that you 
-    are within a StatListNode or similar before doing this.) 
-    """ 
+    def _visitchildren(self, parent, attrs):
+        """
+        Visits the children of the given parent. If parent is None, returns
+        immediately (returning None).
+
+        The return value is a dictionary giving the results for each
+        child (mapping the attribute name to either the return value
+        or a list of return values (in the case of multiple children
+        in an attribute)).
+        """
+        if parent is None: return None
+        result = {}
+        for attr in parent.child_attrs:
+            if attrs is not None and attr not in attrs: continue
+            child = getattr(parent, attr)
+            if child is not None:
+                if type(child) is list:
+                    childretval = [self._visitchild(x, parent, attr, idx) for idx, x in enumerate(child)]
+                else:
+                    childretval = self._visitchild(child, parent, attr, None)
+                    assert not isinstance(childretval, list), 'Cannot insert list here: %s in %r' % (attr, parent)
+                result[attr] = childretval
+        return result
+
+
+class VisitorTransform(TreeVisitor):
+    """
+    A tree transform is a base class for visitors that wants to do stream
+    processing of the structure (rather than attributes etc.) of a tree.
+
+    It implements __call__ to simply visit the argument node.
+
+    It requires the visitor methods to return the nodes which should take
+    the place of the visited node in the result tree (which can be the same
+    or one or more replacement). Specifically, if the return value from
+    a visitor method is:
+
+    - [] or None; the visited node will be removed (set to None if an attribute and
+    removed if in a list)
+    - A single node; the visited node will be replaced by the returned node.
+    - A list of nodes; the visited nodes will be replaced by all the nodes in the
+    list. This will only work if the node was already a member of a list; if it
+    was not, an exception will be raised. (Typically you want to ensure that you
+    are within a StatListNode or similar before doing this.)
+    """
     def visitchildren(self, parent, attrs=None, exclude=None):
         # generic def entry point for calls from Python subclasses
         if exclude is not None:
@@ -257,13 +257,13 @@ class VisitorTransform(TreeVisitor):
     @cython.final
     def _process_children(self, parent, attrs=None):
         # fast cdef entry point for calls from Cython subclasses
-        result = self._visitchildren(parent, attrs) 
+        result = self._visitchildren(parent, attrs)
         for attr, newnode in result.items():
             if type(newnode) is list:
                 newnode = self._flatten_list(newnode)
             setattr(parent, attr, newnode)
-        return result 
- 
+        return result
+
     @cython.final
     def _flatten_list(self, orig_list):
         # Flatten the list one level and remove any None
@@ -276,338 +276,338 @@ class VisitorTransform(TreeVisitor):
                     newlist.append(x)
         return newlist
 
-    def recurse_to_children(self, node): 
+    def recurse_to_children(self, node):
         self._process_children(node)
-        return node 
- 
-    def __call__(self, root): 
-        return self._visit(root) 
- 
-
-class CythonTransform(VisitorTransform): 
-    """ 
-    Certain common conventions and utilities for Cython transforms. 
- 
-     - Sets up the context of the pipeline in self.context 
-     - Tracks directives in effect in self.current_directives 
-    """ 
-    def __init__(self, context): 
-        super(CythonTransform, self).__init__() 
-        self.context = context 
- 
-    def __call__(self, node): 
-        from . import ModuleNode 
-        if isinstance(node, ModuleNode.ModuleNode): 
-            self.current_directives = node.directives 
-        return super(CythonTransform, self).__call__(node) 
- 
-    def visit_CompilerDirectivesNode(self, node): 
-        old = self.current_directives 
-        self.current_directives = node.directives 
+        return node
+
+    def __call__(self, root):
+        return self._visit(root)
+
+
+class CythonTransform(VisitorTransform):
+    """
+    Certain common conventions and utilities for Cython transforms.
+
+     - Sets up the context of the pipeline in self.context
+     - Tracks directives in effect in self.current_directives
+    """
+    def __init__(self, context):
+        super(CythonTransform, self).__init__()
+        self.context = context
+
+    def __call__(self, node):
+        from . import ModuleNode
+        if isinstance(node, ModuleNode.ModuleNode):
+            self.current_directives = node.directives
+        return super(CythonTransform, self).__call__(node)
+
+    def visit_CompilerDirectivesNode(self, node):
+        old = self.current_directives
+        self.current_directives = node.directives
         self._process_children(node)
-        self.current_directives = old 
-        return node 
- 
-    def visit_Node(self, node): 
+        self.current_directives = old
+        return node
+
+    def visit_Node(self, node):
         self._process_children(node)
-        return node 
- 
-
-class ScopeTrackingTransform(CythonTransform): 
-    # Keeps track of type of scopes 
-    #scope_type: can be either of 'module', 'function', 'cclass', 'pyclass', 'struct' 
-    #scope_node: the node that owns the current scope 
- 
-    def visit_ModuleNode(self, node): 
-        self.scope_type = 'module' 
-        self.scope_node = node 
+        return node
+
+
+class ScopeTrackingTransform(CythonTransform):
+    # Keeps track of type of scopes
+    #scope_type: can be either of 'module', 'function', 'cclass', 'pyclass', 'struct'
+    #scope_node: the node that owns the current scope
+
+    def visit_ModuleNode(self, node):
+        self.scope_type = 'module'
+        self.scope_node = node
         self._process_children(node)
-        return node 
- 
-    def visit_scope(self, node, scope_type): 
-        prev = self.scope_type, self.scope_node 
-        self.scope_type = scope_type 
-        self.scope_node = node 
+        return node
+
+    def visit_scope(self, node, scope_type):
+        prev = self.scope_type, self.scope_node
+        self.scope_type = scope_type
+        self.scope_node = node
         self._process_children(node)
-        self.scope_type, self.scope_node = prev 
-        return node 
- 
-    def visit_CClassDefNode(self, node): 
-        return self.visit_scope(node, 'cclass') 
- 
-    def visit_PyClassDefNode(self, node): 
-        return self.visit_scope(node, 'pyclass') 
- 
-    def visit_FuncDefNode(self, node): 
-        return self.visit_scope(node, 'function') 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        return self.visit_scope(node, 'struct') 
- 
- 
-class EnvTransform(CythonTransform): 
-    """ 
-    This transformation keeps a stack of the environments. 
-    """ 
-    def __call__(self, root): 
-        self.env_stack = [] 
-        self.enter_scope(root, root.scope) 
-        return super(EnvTransform, self).__call__(root) 
- 
-    def current_env(self): 
-        return self.env_stack[-1][1] 
- 
-    def current_scope_node(self): 
-        return self.env_stack[-1][0] 
- 
-    def global_scope(self): 
-        return self.current_env().global_scope() 
- 
-    def enter_scope(self, node, scope): 
-        self.env_stack.append((node, scope)) 
- 
-    def exit_scope(self): 
-        self.env_stack.pop() 
- 
-    def visit_FuncDefNode(self, node): 
-        self.enter_scope(node, node.local_scope) 
+        self.scope_type, self.scope_node = prev
+        return node
+
+    def visit_CClassDefNode(self, node):
+        return self.visit_scope(node, 'cclass')
+
+    def visit_PyClassDefNode(self, node):
+        return self.visit_scope(node, 'pyclass')
+
+    def visit_FuncDefNode(self, node):
+        return self.visit_scope(node, 'function')
+
+    def visit_CStructOrUnionDefNode(self, node):
+        return self.visit_scope(node, 'struct')
+
+
+class EnvTransform(CythonTransform):
+    """
+    This transformation keeps a stack of the environments.
+    """
+    def __call__(self, root):
+        self.env_stack = []
+        self.enter_scope(root, root.scope)
+        return super(EnvTransform, self).__call__(root)
+
+    def current_env(self):
+        return self.env_stack[-1][1]
+
+    def current_scope_node(self):
+        return self.env_stack[-1][0]
+
+    def global_scope(self):
+        return self.current_env().global_scope()
+
+    def enter_scope(self, node, scope):
+        self.env_stack.append((node, scope))
+
+    def exit_scope(self):
+        self.env_stack.pop()
+
+    def visit_FuncDefNode(self, node):
+        self.enter_scope(node, node.local_scope)
         self._process_children(node)
-        self.exit_scope() 
-        return node 
- 
-    def visit_GeneratorBodyDefNode(self, node): 
+        self.exit_scope()
+        return node
+
+    def visit_GeneratorBodyDefNode(self, node):
         self._process_children(node)
-        return node 
- 
-    def visit_ClassDefNode(self, node): 
-        self.enter_scope(node, node.scope) 
+        return node
+
+    def visit_ClassDefNode(self, node):
+        self.enter_scope(node, node.scope)
         self._process_children(node)
-        self.exit_scope() 
-        return node 
- 
-    def visit_CStructOrUnionDefNode(self, node): 
-        self.enter_scope(node, node.scope) 
+        self.exit_scope()
+        return node
+
+    def visit_CStructOrUnionDefNode(self, node):
+        self.enter_scope(node, node.scope)
         self._process_children(node)
-        self.exit_scope() 
-        return node 
- 
-    def visit_ScopedExprNode(self, node): 
-        if node.expr_scope: 
-            self.enter_scope(node, node.expr_scope) 
+        self.exit_scope()
+        return node
+
+    def visit_ScopedExprNode(self, node):
+        if node.expr_scope:
+            self.enter_scope(node, node.expr_scope)
             self._process_children(node)
-            self.exit_scope() 
-        else: 
+            self.exit_scope()
+        else:
             self._process_children(node)
-        return node 
- 
-    def visit_CArgDeclNode(self, node): 
-        # default arguments are evaluated in the outer scope 
-        if node.default: 
+        return node
+
+    def visit_CArgDeclNode(self, node):
+        # default arguments are evaluated in the outer scope
+        if node.default:
             attrs = [attr for attr in node.child_attrs if attr != 'default']
             self._process_children(node, attrs)
-            self.enter_scope(node, self.current_env().outer_scope) 
-            self.visitchildren(node, ('default',)) 
-            self.exit_scope() 
-        else: 
+            self.enter_scope(node, self.current_env().outer_scope)
+            self.visitchildren(node, ('default',))
+            self.exit_scope()
+        else:
             self._process_children(node)
-        return node 
- 
- 
-class NodeRefCleanupMixin(object): 
-    """ 
-    Clean up references to nodes that were replaced. 
- 
-    NOTE: this implementation assumes that the replacement is 
-    done first, before hitting any further references during 
-    normal tree traversal.  This needs to be arranged by calling 
-    "self.visitchildren()" at a proper place in the transform 
-    and by ordering the "child_attrs" of nodes appropriately. 
-    """ 
-    def __init__(self, *args): 
-        super(NodeRefCleanupMixin, self).__init__(*args) 
-        self._replacements = {} 
- 
-    def visit_CloneNode(self, node): 
-        arg = node.arg 
-        if arg not in self._replacements: 
+        return node
+
+
+class NodeRefCleanupMixin(object):
+    """
+    Clean up references to nodes that were replaced.
+
+    NOTE: this implementation assumes that the replacement is
+    done first, before hitting any further references during
+    normal tree traversal.  This needs to be arranged by calling
+    "self.visitchildren()" at a proper place in the transform
+    and by ordering the "child_attrs" of nodes appropriately.
+    """
+    def __init__(self, *args):
+        super(NodeRefCleanupMixin, self).__init__(*args)
+        self._replacements = {}
+
+    def visit_CloneNode(self, node):
+        arg = node.arg
+        if arg not in self._replacements:
             self.visitchildren(arg)
-        node.arg = self._replacements.get(arg, arg) 
-        return node 
- 
-    def visit_ResultRefNode(self, node): 
-        expr = node.expression 
-        if expr is None or expr not in self._replacements: 
-            self.visitchildren(node) 
-            expr = node.expression 
-        if expr is not None: 
-            node.expression = self._replacements.get(expr, expr) 
-        return node 
- 
-    def replace(self, node, replacement): 
-        self._replacements[node] = replacement 
-        return replacement 
- 
- 
-find_special_method_for_binary_operator = { 
-    '<':  '__lt__', 
-    '<=': '__le__', 
-    '==': '__eq__', 
-    '!=': '__ne__', 
-    '>=': '__ge__', 
-    '>':  '__gt__', 
-    '+':  '__add__', 
-    '&':  '__and__', 
+        node.arg = self._replacements.get(arg, arg)
+        return node
+
+    def visit_ResultRefNode(self, node):
+        expr = node.expression
+        if expr is None or expr not in self._replacements:
+            self.visitchildren(node)
+            expr = node.expression
+        if expr is not None:
+            node.expression = self._replacements.get(expr, expr)
+        return node
+
+    def replace(self, node, replacement):
+        self._replacements[node] = replacement
+        return replacement
+
+
+find_special_method_for_binary_operator = {
+    '<':  '__lt__',
+    '<=': '__le__',
+    '==': '__eq__',
+    '!=': '__ne__',
+    '>=': '__ge__',
+    '>':  '__gt__',
+    '+':  '__add__',
+    '&':  '__and__',
     '/':  '__div__',
-    '//': '__floordiv__', 
-    '<<': '__lshift__', 
-    '%':  '__mod__', 
-    '*':  '__mul__', 
-    '|':  '__or__', 
-    '**': '__pow__', 
-    '>>': '__rshift__', 
-    '-':  '__sub__', 
-    '^':  '__xor__', 
-    'in': '__contains__', 
-}.get 
- 
- 
-find_special_method_for_unary_operator = { 
-    'not': '__not__', 
-    '~':   '__inv__', 
-    '-':   '__neg__', 
-    '+':   '__pos__', 
-}.get 
- 
- 
-class MethodDispatcherTransform(EnvTransform): 
-    """ 
-    Base class for transformations that want to intercept on specific 
-    builtin functions or methods of builtin types, including special 
-    methods triggered by Python operators.  Must run after declaration 
-    analysis when entries were assigned. 
- 
-    Naming pattern for handler methods is as follows: 
- 
-    * builtin functions: _handle_(general|simple|any)_function_NAME 
- 
-    * builtin methods: _handle_(general|simple|any)_method_TYPENAME_METHODNAME 
-    """ 
-    # only visit call nodes and Python operations 
-    def visit_GeneralCallNode(self, node): 
+    '//': '__floordiv__',
+    '<<': '__lshift__',
+    '%':  '__mod__',
+    '*':  '__mul__',
+    '|':  '__or__',
+    '**': '__pow__',
+    '>>': '__rshift__',
+    '-':  '__sub__',
+    '^':  '__xor__',
+    'in': '__contains__',
+}.get
+
+
+find_special_method_for_unary_operator = {
+    'not': '__not__',
+    '~':   '__inv__',
+    '-':   '__neg__',
+    '+':   '__pos__',
+}.get
+
+
+class MethodDispatcherTransform(EnvTransform):
+    """
+    Base class for transformations that want to intercept on specific
+    builtin functions or methods of builtin types, including special
+    methods triggered by Python operators.  Must run after declaration
+    analysis when entries were assigned.
+
+    Naming pattern for handler methods is as follows:
+
+    * builtin functions: _handle_(general|simple|any)_function_NAME
+
+    * builtin methods: _handle_(general|simple|any)_method_TYPENAME_METHODNAME
+    """
+    # only visit call nodes and Python operations
+    def visit_GeneralCallNode(self, node):
         self._process_children(node)
-        function = node.function 
-        if not function.type.is_pyobject: 
-            return node 
-        arg_tuple = node.positional_args 
-        if not isinstance(arg_tuple, ExprNodes.TupleNode): 
-            return node 
-        keyword_args = node.keyword_args 
-        if keyword_args and not isinstance(keyword_args, ExprNodes.DictNode): 
-            # can't handle **kwargs 
-            return node 
-        args = arg_tuple.args 
-        return self._dispatch_to_handler(node, function, args, keyword_args) 
- 
-    def visit_SimpleCallNode(self, node): 
+        function = node.function
+        if not function.type.is_pyobject:
+            return node
+        arg_tuple = node.positional_args
+        if not isinstance(arg_tuple, ExprNodes.TupleNode):
+            return node
+        keyword_args = node.keyword_args
+        if keyword_args and not isinstance(keyword_args, ExprNodes.DictNode):
+            # can't handle **kwargs
+            return node
+        args = arg_tuple.args
+        return self._dispatch_to_handler(node, function, args, keyword_args)
+
+    def visit_SimpleCallNode(self, node):
         self._process_children(node)
-        function = node.function 
-        if function.type.is_pyobject: 
-            arg_tuple = node.arg_tuple 
-            if not isinstance(arg_tuple, ExprNodes.TupleNode): 
-                return node 
-            args = arg_tuple.args 
-        else: 
-            args = node.args 
-        return self._dispatch_to_handler(node, function, args, None) 
- 
-    def visit_PrimaryCmpNode(self, node): 
-        if node.cascade: 
-            # not currently handled below 
+        function = node.function
+        if function.type.is_pyobject:
+            arg_tuple = node.arg_tuple
+            if not isinstance(arg_tuple, ExprNodes.TupleNode):
+                return node
+            args = arg_tuple.args
+        else:
+            args = node.args
+        return self._dispatch_to_handler(node, function, args, None)
+
+    def visit_PrimaryCmpNode(self, node):
+        if node.cascade:
+            # not currently handled below
             self._process_children(node)
-            return node 
-        return self._visit_binop_node(node) 
- 
-    def visit_BinopNode(self, node): 
-        return self._visit_binop_node(node) 
- 
-    def _visit_binop_node(self, node): 
+            return node
+        return self._visit_binop_node(node)
+
+    def visit_BinopNode(self, node):
+        return self._visit_binop_node(node)
+
+    def _visit_binop_node(self, node):
         self._process_children(node)
-        # FIXME: could special case 'not_in' 
-        special_method_name = find_special_method_for_binary_operator(node.operator) 
-        if special_method_name: 
-            operand1, operand2 = node.operand1, node.operand2 
-            if special_method_name == '__contains__': 
-                operand1, operand2 = operand2, operand1 
+        # FIXME: could special case 'not_in'
+        special_method_name = find_special_method_for_binary_operator(node.operator)
+        if special_method_name:
+            operand1, operand2 = node.operand1, node.operand2
+            if special_method_name == '__contains__':
+                operand1, operand2 = operand2, operand1
             elif special_method_name == '__div__':
                 if Future.division in self.current_env().global_scope().context.future_directives:
                     special_method_name = '__truediv__'
-            obj_type = operand1.type 
-            if obj_type.is_builtin_type: 
-                type_name = obj_type.name 
-            else: 
-                type_name = "object"  # safety measure 
-            node = self._dispatch_to_method_handler( 
-                special_method_name, None, False, type_name, 
-                node, None, [operand1, operand2], None) 
-        return node 
- 
-    def visit_UnopNode(self, node): 
+            obj_type = operand1.type
+            if obj_type.is_builtin_type:
+                type_name = obj_type.name
+            else:
+                type_name = "object"  # safety measure
+            node = self._dispatch_to_method_handler(
+                special_method_name, None, False, type_name,
+                node, None, [operand1, operand2], None)
+        return node
+
+    def visit_UnopNode(self, node):
         self._process_children(node)
-        special_method_name = find_special_method_for_unary_operator(node.operator) 
-        if special_method_name: 
-            operand = node.operand 
-            obj_type = operand.type 
-            if obj_type.is_builtin_type: 
-                type_name = obj_type.name 
-            else: 
-                type_name = "object"  # safety measure 
-            node = self._dispatch_to_method_handler( 
-                special_method_name, None, False, type_name, 
-                node, None, [operand], None) 
-        return node 
- 
-    ### dispatch to specific handlers 
- 
-    def _find_handler(self, match_name, has_kwargs): 
-        call_type = has_kwargs and 'general' or 'simple' 
-        handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None) 
-        if handler is None: 
-            handler = getattr(self, '_handle_any_%s' % match_name, None) 
-        return handler 
- 
-    def _delegate_to_assigned_value(self, node, function, arg_list, kwargs): 
-        assignment = function.cf_state[0] 
-        value = assignment.rhs 
-        if value.is_name: 
-            if not value.entry or len(value.entry.cf_assignments) > 1: 
-                # the variable might have been reassigned => play safe 
-                return node 
-        elif value.is_attribute and value.obj.is_name: 
-            if not value.obj.entry or len(value.obj.entry.cf_assignments) > 1: 
-                # the underlying variable might have been reassigned => play safe 
-                return node 
-        else: 
-            return node 
-        return self._dispatch_to_handler( 
-            node, value, arg_list, kwargs) 
- 
-    def _dispatch_to_handler(self, node, function, arg_list, kwargs): 
-        if function.is_name: 
-            # we only consider functions that are either builtin 
-            # Python functions or builtins that were already replaced 
-            # into a C function call (defined in the builtin scope) 
-            if not function.entry: 
-                return node 
+        special_method_name = find_special_method_for_unary_operator(node.operator)
+        if special_method_name:
+            operand = node.operand
+            obj_type = operand.type
+            if obj_type.is_builtin_type:
+                type_name = obj_type.name
+            else:
+                type_name = "object"  # safety measure
+            node = self._dispatch_to_method_handler(
+                special_method_name, None, False, type_name,
+                node, None, [operand], None)
+        return node
+
+    ### dispatch to specific handlers
+
+    def _find_handler(self, match_name, has_kwargs):
+        call_type = has_kwargs and 'general' or 'simple'
+        handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None)
+        if handler is None:
+            handler = getattr(self, '_handle_any_%s' % match_name, None)
+        return handler
+
+    def _delegate_to_assigned_value(self, node, function, arg_list, kwargs):
+        assignment = function.cf_state[0]
+        value = assignment.rhs
+        if value.is_name:
+            if not value.entry or len(value.entry.cf_assignments) > 1:
+                # the variable might have been reassigned => play safe
+                return node
+        elif value.is_attribute and value.obj.is_name:
+            if not value.obj.entry or len(value.obj.entry.cf_assignments) > 1:
+                # the underlying variable might have been reassigned => play safe
+                return node
+        else:
+            return node
+        return self._dispatch_to_handler(
+            node, value, arg_list, kwargs)
+
+    def _dispatch_to_handler(self, node, function, arg_list, kwargs):
+        if function.is_name:
+            # we only consider functions that are either builtin
+            # Python functions or builtins that were already replaced
+            # into a C function call (defined in the builtin scope)
+            if not function.entry:
+                return node
             entry = function.entry
-            is_builtin = ( 
+            is_builtin = (
                 entry.is_builtin or
                 entry is self.current_env().builtin_scope().lookup_here(function.name))
-            if not is_builtin: 
-                if function.cf_state and function.cf_state.is_single: 
-                    # we know the value of the variable 
-                    # => see if it's usable instead 
-                    return self._delegate_to_assigned_value( 
-                        node, function, arg_list, kwargs) 
+            if not is_builtin:
+                if function.cf_state and function.cf_state.is_single:
+                    # we know the value of the variable
+                    # => see if it's usable instead
+                    return self._delegate_to_assigned_value(
+                        node, function, arg_list, kwargs)
                 if arg_list and entry.is_cmethod and entry.scope and entry.scope.parent_type.is_builtin_type:
                     if entry.scope.parent_type is arg_list[0].type:
                         # Optimised (unbound) method of a builtin type => try to "de-optimise".
@@ -615,17 +615,17 @@ class MethodDispatcherTransform(EnvTransform):
                             entry.name, self_arg=None, is_unbound_method=True,
                             type_name=entry.scope.parent_type.name,
                             node=node, function=function, arg_list=arg_list, kwargs=kwargs)
-                return node 
-            function_handler = self._find_handler( 
-                "function_%s" % function.name, kwargs) 
-            if function_handler is None: 
-                return self._handle_function(node, function.name, function, arg_list, kwargs) 
-            if kwargs: 
-                return function_handler(node, function, arg_list, kwargs) 
-            else: 
-                return function_handler(node, function, arg_list) 
+                return node
+            function_handler = self._find_handler(
+                "function_%s" % function.name, kwargs)
+            if function_handler is None:
+                return self._handle_function(node, function.name, function, arg_list, kwargs)
+            if kwargs:
+                return function_handler(node, function, arg_list, kwargs)
+            else:
+                return function_handler(node, function, arg_list)
         elif function.is_attribute:
-            attr_name = function.attribute 
+            attr_name = function.attribute
             if function.type.is_pyobject:
                 self_arg = function.obj
             elif node.self and function.entry:
@@ -637,68 +637,68 @@ class MethodDispatcherTransform(EnvTransform):
                 arg_list = arg_list[1:]  # drop CloneNode of self argument
             else:
                 return node
-            obj_type = self_arg.type 
-            is_unbound_method = False 
-            if obj_type.is_builtin_type: 
+            obj_type = self_arg.type
+            is_unbound_method = False
+            if obj_type.is_builtin_type:
                 if obj_type is Builtin.type_type and self_arg.is_name and arg_list and arg_list[0].type.is_pyobject:
-                    # calling an unbound method like 'list.append(L,x)' 
-                    # (ignoring 'type.mro()' here ...) 
-                    type_name = self_arg.name 
-                    self_arg = None 
-                    is_unbound_method = True 
-                else: 
-                    type_name = obj_type.name 
-            else: 
-                type_name = "object"  # safety measure 
-            return self._dispatch_to_method_handler( 
-                attr_name, self_arg, is_unbound_method, type_name, 
-                node, function, arg_list, kwargs) 
-        else: 
-            return node 
- 
-    def _dispatch_to_method_handler(self, attr_name, self_arg, 
-                                    is_unbound_method, type_name, 
-                                    node, function, arg_list, kwargs): 
-        method_handler = self._find_handler( 
-            "method_%s_%s" % (type_name, attr_name), kwargs) 
-        if method_handler is None: 
-            if (attr_name in TypeSlots.method_name_to_slot 
-                    or attr_name == '__new__'): 
-                method_handler = self._find_handler( 
-                    "slot%s" % attr_name, kwargs) 
-            if method_handler is None: 
-                return self._handle_method( 
-                    node, type_name, attr_name, function, 
-                    arg_list, is_unbound_method, kwargs) 
-        if self_arg is not None: 
-            arg_list = [self_arg] + list(arg_list) 
-        if kwargs: 
+                    # calling an unbound method like 'list.append(L,x)'
+                    # (ignoring 'type.mro()' here ...)
+                    type_name = self_arg.name
+                    self_arg = None
+                    is_unbound_method = True
+                else:
+                    type_name = obj_type.name
+            else:
+                type_name = "object"  # safety measure
+            return self._dispatch_to_method_handler(
+                attr_name, self_arg, is_unbound_method, type_name,
+                node, function, arg_list, kwargs)
+        else:
+            return node
+
+    def _dispatch_to_method_handler(self, attr_name, self_arg,
+                                    is_unbound_method, type_name,
+                                    node, function, arg_list, kwargs):
+        method_handler = self._find_handler(
+            "method_%s_%s" % (type_name, attr_name), kwargs)
+        if method_handler is None:
+            if (attr_name in TypeSlots.method_name_to_slot
+                    or attr_name == '__new__'):
+                method_handler = self._find_handler(
+                    "slot%s" % attr_name, kwargs)
+            if method_handler is None:
+                return self._handle_method(
+                    node, type_name, attr_name, function,
+                    arg_list, is_unbound_method, kwargs)
+        if self_arg is not None:
+            arg_list = [self_arg] + list(arg_list)
+        if kwargs:
             result = method_handler(
-                node, function, arg_list, is_unbound_method, kwargs) 
-        else: 
+                node, function, arg_list, is_unbound_method, kwargs)
+        else:
             result = method_handler(
-                node, function, arg_list, is_unbound_method) 
+                node, function, arg_list, is_unbound_method)
         return result
- 
-    def _handle_function(self, node, function_name, function, arg_list, kwargs): 
-        """Fallback handler""" 
-        return node 
- 
-    def _handle_method(self, node, type_name, attr_name, function, 
-                       arg_list, is_unbound_method, kwargs): 
-        """Fallback handler""" 
-        return node 
- 
- 
-class RecursiveNodeReplacer(VisitorTransform): 
-    """ 
-    Recursively replace all occurrences of a node in a subtree by 
-    another node. 
-    """ 
-    def __init__(self, orig_node, new_node): 
-        super(RecursiveNodeReplacer, self).__init__() 
-        self.orig_node, self.new_node = orig_node, new_node 
- 
+
+    def _handle_function(self, node, function_name, function, arg_list, kwargs):
+        """Fallback handler"""
+        return node
+
+    def _handle_method(self, node, type_name, attr_name, function,
+                       arg_list, is_unbound_method, kwargs):
+        """Fallback handler"""
+        return node
+
+
+class RecursiveNodeReplacer(VisitorTransform):
+    """
+    Recursively replace all occurrences of a node in a subtree by
+    another node.
+    """
+    def __init__(self, orig_node, new_node):
+        super(RecursiveNodeReplacer, self).__init__()
+        self.orig_node, self.new_node = orig_node, new_node
+
     def visit_CloneNode(self, node):
         if node is self.orig_node:
             return self.new_node
@@ -706,87 +706,87 @@ class RecursiveNodeReplacer(VisitorTransform):
             node.arg = self.new_node
         return node
 
-    def visit_Node(self, node): 
+    def visit_Node(self, node):
         self._process_children(node)
-        if node is self.orig_node: 
-            return self.new_node 
-        else: 
-            return node 
- 
-def recursively_replace_node(tree, old_node, new_node): 
-    replace_in = RecursiveNodeReplacer(old_node, new_node) 
-    replace_in(tree) 
- 
- 
-class NodeFinder(TreeVisitor): 
-    """ 
-    Find out if a node appears in a subtree. 
-    """ 
-    def __init__(self, node): 
-        super(NodeFinder, self).__init__() 
-        self.node = node 
-        self.found = False 
- 
-    def visit_Node(self, node): 
-        if self.found: 
-            pass  # short-circuit 
-        elif node is self.node: 
-            self.found = True 
-        else: 
-            self._visitchildren(node, None) 
- 
-def tree_contains(tree, node): 
-    finder = NodeFinder(node) 
-    finder.visit(tree) 
-    return finder.found 
- 
- 
-# Utils 
-def replace_node(ptr, value): 
-    """Replaces a node. ptr is of the form used on the access path stack 
-    (parent, attrname, listidx|None) 
-    """ 
-    parent, attrname, listidx = ptr 
-    if listidx is None: 
-        setattr(parent, attrname, value) 
-    else: 
-        getattr(parent, attrname)[listidx] = value 
- 
-
-class PrintTree(TreeVisitor): 
-    """Prints a representation of the tree to standard output. 
-    Subclass and override repr_of to provide more information 
-    about nodes. """ 
+        if node is self.orig_node:
+            return self.new_node
+        else:
+            return node
+
+def recursively_replace_node(tree, old_node, new_node):
+    replace_in = RecursiveNodeReplacer(old_node, new_node)
+    replace_in(tree)
+
+
+class NodeFinder(TreeVisitor):
+    """
+    Find out if a node appears in a subtree.
+    """
+    def __init__(self, node):
+        super(NodeFinder, self).__init__()
+        self.node = node
+        self.found = False
+
+    def visit_Node(self, node):
+        if self.found:
+            pass  # short-circuit
+        elif node is self.node:
+            self.found = True
+        else:
+            self._visitchildren(node, None)
+
+def tree_contains(tree, node):
+    finder = NodeFinder(node)
+    finder.visit(tree)
+    return finder.found
+
+
+# Utils
+def replace_node(ptr, value):
+    """Replaces a node. ptr is of the form used on the access path stack
+    (parent, attrname, listidx|None)
+    """
+    parent, attrname, listidx = ptr
+    if listidx is None:
+        setattr(parent, attrname, value)
+    else:
+        getattr(parent, attrname)[listidx] = value
+
+
+class PrintTree(TreeVisitor):
+    """Prints a representation of the tree to standard output.
+    Subclass and override repr_of to provide more information
+    about nodes. """
     def __init__(self, start=None, end=None):
-        TreeVisitor.__init__(self) 
-        self._indent = "" 
+        TreeVisitor.__init__(self)
+        self._indent = ""
         if start is not None or end is not None:
             self._line_range = (start or 0, end or 2**30)
         else:
             self._line_range = None
- 
-    def indent(self): 
-        self._indent += "  " 
-
-    def unindent(self): 
-        self._indent = self._indent[:-2] 
- 
-    def __call__(self, tree, phase=None): 
-        print("Parse tree dump at phase '%s'" % phase) 
-        self.visit(tree) 
-        return tree 
- 
-    # Don't do anything about process_list, the defaults gives 
-    # nice-looking name[idx] nodes which will visually appear 
-    # under the parent-node, not displaying the list itself in 
-    # the hierarchy. 
-    def visit_Node(self, node): 
+
+    def indent(self):
+        self._indent += "  "
+
+    def unindent(self):
+        self._indent = self._indent[:-2]
+
+    def __call__(self, tree, phase=None):
+        print("Parse tree dump at phase '%s'" % phase)
+        self.visit(tree)
+        return tree
+
+    # Don't do anything about process_list, the defaults gives
+    # nice-looking name[idx] nodes which will visually appear
+    # under the parent-node, not displaying the list itself in
+    # the hierarchy.
+    def visit_Node(self, node):
         self._print_node(node)
-        self.indent() 
-        self.visitchildren(node) 
-        self.unindent() 
-        return node 
- 
+        self.indent()
+        self.visitchildren(node)
+        self.unindent()
+        return node
+
     def visit_CloneNode(self, node):
         self._print_node(node)
         self.indent()
@@ -812,29 +812,29 @@ class PrintTree(TreeVisitor):
                     name = attr
             print("%s- %s: %s" % (self._indent, name, self.repr_of(node)))
 
-    def repr_of(self, node): 
-        if node is None: 
-            return "(none)" 
-        else: 
-            result = node.__class__.__name__ 
-            if isinstance(node, ExprNodes.NameNode): 
-                result += "(type=%s, name=\"%s\")" % (repr(node.type), node.name) 
-            elif isinstance(node, Nodes.DefNode): 
-                result += "(name=\"%s\")" % node.name 
-            elif isinstance(node, ExprNodes.ExprNode): 
-                t = node.type 
-                result += "(type=%s)" % repr(t) 
-            elif node.pos: 
-                pos = node.pos 
-                path = pos[0].get_description() 
-                if '/' in path: 
-                    path = path.split('/')[-1] 
-                if '\\' in path: 
-                    path = path.split('\\')[-1] 
-                result += "(pos=(%s:%s:%s))" % (path, pos[1], pos[2]) 
- 
-            return result 
- 
-if __name__ == "__main__": 
-    import doctest 
-    doctest.testmod() 
+    def repr_of(self, node):
+        if node is None:
+            return "(none)"
+        else:
+            result = node.__class__.__name__
+            if isinstance(node, ExprNodes.NameNode):
+                result += "(type=%s, name=\"%s\")" % (repr(node.type), node.name)
+            elif isinstance(node, Nodes.DefNode):
+                result += "(name=\"%s\")" % node.name
+            elif isinstance(node, ExprNodes.ExprNode):
+                t = node.type
+                result += "(type=%s)" % repr(t)
+            elif node.pos:
+                pos = node.pos
+                path = pos[0].get_description()
+                if '/' in path:
+                    path = path.split('/')[-1]
+                if '\\' in path:
+                    path = path.split('\\')[-1]
+                result += "(pos=(%s:%s:%s))" % (path, pos[1], pos[2])
+
+            return result
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/contrib/tools/cython/Cython/Compiler/__init__.py b/contrib/tools/cython/Cython/Compiler/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Compiler/__init__.py
+++ b/contrib/tools/cython/Cython/Compiler/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Debugger/Cygdb.py b/contrib/tools/cython/Cython/Debugger/Cygdb.py
index a317020ab5..45f31ce6f7 100644
--- a/contrib/tools/cython/Cython/Debugger/Cygdb.py
+++ b/contrib/tools/cython/Cython/Debugger/Cygdb.py
@@ -1,143 +1,143 @@
-#!/usr/bin/env python 
- 
-""" 
-The Cython debugger 
- 
-The current directory should contain a directory named 'cython_debug', or a 
-path to the cython project directory should be given (the parent directory of 
-cython_debug). 
- 
-Additional gdb args can be provided only if a path to the project directory is 
-given. 
-""" 
- 
-import os 
-import sys 
-import glob 
-import tempfile 
-import textwrap 
-import subprocess 
-import optparse 
-import logging 
- 
-logger = logging.getLogger(__name__) 
- 
-def make_command_file(path_to_debug_info, prefix_code='', no_import=False): 
-    if not no_import: 
-        pattern = os.path.join(path_to_debug_info, 
-                               'cython_debug', 
-                               'cython_debug_info_*') 
-        debug_files = glob.glob(pattern) 
- 
-        if not debug_files: 
-            sys.exit('%s.\nNo debug files were found in %s. Aborting.' % ( 
-                                   usage, os.path.abspath(path_to_debug_info))) 
- 
-    fd, tempfilename = tempfile.mkstemp() 
-    f = os.fdopen(fd, 'w') 
-    try: 
-        f.write(prefix_code) 
-        f.write(textwrap.dedent('''\ 
-            # This is a gdb command file 
-            # See https://sourceware.org/gdb/onlinedocs/gdb/Command-Files.html 
- 
-            set breakpoint pending on 
-            set print pretty on 
- 
-            python 
-            # Activate virtualenv, if we were launched from one 
-            import os 
-            virtualenv = os.getenv('VIRTUAL_ENV') 
-            if virtualenv: 
-                path_to_activate_this_py = os.path.join(virtualenv, 'bin', 'activate_this.py') 
-                print("gdb command file: Activating virtualenv: %s; path_to_activate_this_py: %s" % ( 
-                    virtualenv, path_to_activate_this_py)) 
-                with open(path_to_activate_this_py) as f: 
-                    exec(f.read(), dict(__file__=path_to_activate_this_py)) 
- 
-            from Cython.Debugger import libcython, libpython 
-            end 
-            ''')) 
- 
-        if no_import: 
-            # don't do this, this overrides file command in .gdbinit 
-            # f.write("file %s\n" % sys.executable) 
-            pass 
-        else: 
-            path = os.path.join(path_to_debug_info, "cython_debug", "interpreter") 
-            interpreter_file = open(path) 
-            try: 
-                interpreter = interpreter_file.read() 
-            finally: 
-                interpreter_file.close() 
-            f.write("file %s\n" % interpreter) 
-            f.write('\n'.join('cy import %s\n' % fn for fn in debug_files)) 
-            f.write(textwrap.dedent('''\ 
-                python 
-                import sys 
-                try: 
-                    gdb.lookup_type('PyModuleObject') 
-                except RuntimeError: 
-                    sys.stderr.write( 
-                        'Python was not compiled with debug symbols (or it was ' 
-                        'stripped). Some functionality may not work (properly).\\n') 
-                end 
- 
-                source .cygdbinit 
-            ''')) 
-    finally: 
-        f.close() 
- 
-    return tempfilename 
- 
-usage = "Usage: cygdb [options] [PATH [-- GDB_ARGUMENTS]]" 
- 
-def main(path_to_debug_info=None, gdb_argv=None, no_import=False): 
-    """ 
-    Start the Cython debugger. This tells gdb to import the Cython and Python 
-    extensions (libcython.py and libpython.py) and it enables gdb's pending 
-    breakpoints. 
- 
-    path_to_debug_info is the path to the Cython build directory 
-    gdb_argv is the list of options to gdb 
-    no_import tells cygdb whether it should import debug information 
-    """ 
-    parser = optparse.OptionParser(usage=usage) 
-    parser.add_option("--gdb-executable", 
-        dest="gdb", default='gdb', 
-        help="gdb executable to use [default: gdb]") 
-    parser.add_option("--verbose", "-v", 
-        dest="verbosity", action="count", default=0, 
-        help="Verbose mode. Multiple -v options increase the verbosity") 
- 
-    (options, args) = parser.parse_args() 
-    if path_to_debug_info is None: 
-        if len(args) > 1: 
-            path_to_debug_info = args[0] 
-        else: 
-            path_to_debug_info = os.curdir 
- 
-    if gdb_argv is None: 
-        gdb_argv = args[1:] 
- 
-    if path_to_debug_info == '--': 
-        no_import = True 
- 
-    logging_level = logging.WARN 
-    if options.verbosity == 1: 
-        logging_level = logging.INFO 
+#!/usr/bin/env python
+
+"""
+The Cython debugger
+
+The current directory should contain a directory named 'cython_debug', or a
+path to the cython project directory should be given (the parent directory of
+cython_debug).
+
+Additional gdb args can be provided only if a path to the project directory is
+given.
+"""
+
+import os
+import sys
+import glob
+import tempfile
+import textwrap
+import subprocess
+import optparse
+import logging
+
+logger = logging.getLogger(__name__)
+
+def make_command_file(path_to_debug_info, prefix_code='', no_import=False):
+    if not no_import:
+        pattern = os.path.join(path_to_debug_info,
+                               'cython_debug',
+                               'cython_debug_info_*')
+        debug_files = glob.glob(pattern)
+
+        if not debug_files:
+            sys.exit('%s.\nNo debug files were found in %s. Aborting.' % (
+                                   usage, os.path.abspath(path_to_debug_info)))
+
+    fd, tempfilename = tempfile.mkstemp()
+    f = os.fdopen(fd, 'w')
+    try:
+        f.write(prefix_code)
+        f.write(textwrap.dedent('''\
+            # This is a gdb command file
+            # See https://sourceware.org/gdb/onlinedocs/gdb/Command-Files.html
+
+            set breakpoint pending on
+            set print pretty on
+
+            python
+            # Activate virtualenv, if we were launched from one
+            import os
+            virtualenv = os.getenv('VIRTUAL_ENV')
+            if virtualenv:
+                path_to_activate_this_py = os.path.join(virtualenv, 'bin', 'activate_this.py')
+                print("gdb command file: Activating virtualenv: %s; path_to_activate_this_py: %s" % (
+                    virtualenv, path_to_activate_this_py))
+                with open(path_to_activate_this_py) as f:
+                    exec(f.read(), dict(__file__=path_to_activate_this_py))
+
+            from Cython.Debugger import libcython, libpython
+            end
+            '''))
+
+        if no_import:
+            # don't do this, this overrides file command in .gdbinit
+            # f.write("file %s\n" % sys.executable)
+            pass
+        else:
+            path = os.path.join(path_to_debug_info, "cython_debug", "interpreter")
+            interpreter_file = open(path)
+            try:
+                interpreter = interpreter_file.read()
+            finally:
+                interpreter_file.close()
+            f.write("file %s\n" % interpreter)
+            f.write('\n'.join('cy import %s\n' % fn for fn in debug_files))
+            f.write(textwrap.dedent('''\
+                python
+                import sys
+                try:
+                    gdb.lookup_type('PyModuleObject')
+                except RuntimeError:
+                    sys.stderr.write(
+                        'Python was not compiled with debug symbols (or it was '
+                        'stripped). Some functionality may not work (properly).\\n')
+                end
+
+                source .cygdbinit
+            '''))
+    finally:
+        f.close()
+
+    return tempfilename
+
+usage = "Usage: cygdb [options] [PATH [-- GDB_ARGUMENTS]]"
+
+def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
+    """
+    Start the Cython debugger. This tells gdb to import the Cython and Python
+    extensions (libcython.py and libpython.py) and it enables gdb's pending
+    breakpoints.
+
+    path_to_debug_info is the path to the Cython build directory
+    gdb_argv is the list of options to gdb
+    no_import tells cygdb whether it should import debug information
+    """
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--gdb-executable",
+        dest="gdb", default='gdb',
+        help="gdb executable to use [default: gdb]")
+    parser.add_option("--verbose", "-v",
+        dest="verbosity", action="count", default=0,
+        help="Verbose mode. Multiple -v options increase the verbosity")
+
+    (options, args) = parser.parse_args()
+    if path_to_debug_info is None:
+        if len(args) > 1:
+            path_to_debug_info = args[0]
+        else:
+            path_to_debug_info = os.curdir
+
+    if gdb_argv is None:
+        gdb_argv = args[1:]
+
+    if path_to_debug_info == '--':
+        no_import = True
+
+    logging_level = logging.WARN
+    if options.verbosity == 1:
+        logging_level = logging.INFO
     if options.verbosity >= 2:
-        logging_level = logging.DEBUG 
-    logging.basicConfig(level=logging_level) 
- 
-    logger.info("verbosity = %r", options.verbosity) 
-    logger.debug("options = %r; args = %r", options, args) 
-    logger.debug("Done parsing command-line options. path_to_debug_info = %r, gdb_argv = %r", 
-        path_to_debug_info, gdb_argv) 
- 
-    tempfilename = make_command_file(path_to_debug_info, no_import=no_import) 
-    logger.info("Launching %s with command file: %s and gdb_argv: %s", 
-        options.gdb, tempfilename, gdb_argv) 
+        logging_level = logging.DEBUG
+    logging.basicConfig(level=logging_level)
+
+    logger.info("verbosity = %r", options.verbosity)
+    logger.debug("options = %r; args = %r", options, args)
+    logger.debug("Done parsing command-line options. path_to_debug_info = %r, gdb_argv = %r",
+        path_to_debug_info, gdb_argv)
+
+    tempfilename = make_command_file(path_to_debug_info, no_import=no_import)
+    logger.info("Launching %s with command file: %s and gdb_argv: %s",
+        options.gdb, tempfilename, gdb_argv)
     with open(tempfilename) as tempfile:
         logger.debug('Command file (%s) contains: """\n%s"""', tempfilename, tempfile.read())
         logger.info("Spawning %s...", options.gdb)
@@ -153,6 +153,6 @@ def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
             else:
                 break
         logger.debug("Closing temp command file with fd: %s", tempfile.fileno())
-    logger.debug("Removing temp command file: %s", tempfilename) 
-    os.remove(tempfilename) 
-    logger.debug("Removed temp command file: %s", tempfilename) 
+    logger.debug("Removing temp command file: %s", tempfilename)
+    os.remove(tempfilename)
+    logger.debug("Removed temp command file: %s", tempfilename)
diff --git a/contrib/tools/cython/Cython/Debugger/DebugWriter.py b/contrib/tools/cython/Cython/Debugger/DebugWriter.py
index ebf99a2f64..876a3a2169 100644
--- a/contrib/tools/cython/Cython/Debugger/DebugWriter.py
+++ b/contrib/tools/cython/Cython/Debugger/DebugWriter.py
@@ -1,72 +1,72 @@
 from __future__ import absolute_import
- 
-import os 
-import sys 
-import errno 
- 
-try: 
-  from lxml import etree 
-  have_lxml = True 
-except ImportError: 
-    have_lxml = False 
-    try: 
-        from xml.etree import cElementTree as etree 
-    except ImportError: 
-        try: 
-            from xml.etree import ElementTree as etree 
-        except ImportError: 
+
+import os
+import sys
+import errno
+
+try:
+  from lxml import etree
+  have_lxml = True
+except ImportError:
+    have_lxml = False
+    try:
+        from xml.etree import cElementTree as etree
+    except ImportError:
+        try:
+            from xml.etree import ElementTree as etree
+        except ImportError:
             etree = None
- 
+
 from ..Compiler import Errors
- 
- 
-class CythonDebugWriter(object): 
-    """ 
-    Class to output debugging information for cygdb 
- 
-    It writes debug information to cython_debug/cython_debug_info_<modulename> 
-    in the build directory. 
-    """ 
- 
-    def __init__(self, output_dir): 
-        if etree is None: 
-            raise Errors.NoElementTreeInstalledException() 
- 
+
+
+class CythonDebugWriter(object):
+    """
+    Class to output debugging information for cygdb
+
+    It writes debug information to cython_debug/cython_debug_info_<modulename>
+    in the build directory.
+    """
+
+    def __init__(self, output_dir):
+        if etree is None:
+            raise Errors.NoElementTreeInstalledException()
+
         self.output_dir = os.path.join(output_dir or os.curdir, 'cython_debug')
-        self.tb = etree.TreeBuilder() 
-        # set by Cython.Compiler.ParseTreeTransforms.DebugTransform 
-        self.module_name = None 
-        self.start('cython_debug', attrs=dict(version='1.0')) 
- 
-    def start(self, name, attrs=None): 
-        self.tb.start(name, attrs or {}) 
- 
-    def end(self, name): 
-        self.tb.end(name) 
- 
+        self.tb = etree.TreeBuilder()
+        # set by Cython.Compiler.ParseTreeTransforms.DebugTransform
+        self.module_name = None
+        self.start('cython_debug', attrs=dict(version='1.0'))
+
+    def start(self, name, attrs=None):
+        self.tb.start(name, attrs or {})
+
+    def end(self, name):
+        self.tb.end(name)
+
     def add_entry(self, name, **attrs):
         self.tb.start(name, attrs)
         self.tb.end(name)
 
-    def serialize(self): 
-        self.tb.end('Module') 
-        self.tb.end('cython_debug') 
-        xml_root_element = self.tb.close() 
- 
-        try: 
-            os.makedirs(self.output_dir) 
+    def serialize(self):
+        self.tb.end('Module')
+        self.tb.end('cython_debug')
+        xml_root_element = self.tb.close()
+
+        try:
+            os.makedirs(self.output_dir)
         except OSError as e:
-            if e.errno != errno.EEXIST: 
-                raise 
- 
-        et = etree.ElementTree(xml_root_element) 
-        kw = {} 
-        if have_lxml: 
-            kw['pretty_print'] = True 
- 
-        fn = "cython_debug_info_" + self.module_name 
-        et.write(os.path.join(self.output_dir, fn), encoding="UTF-8", **kw) 
- 
-        interpreter_path = os.path.join(self.output_dir, 'interpreter') 
-        with open(interpreter_path, 'w') as f: 
-            f.write(sys.executable) 
+            if e.errno != errno.EEXIST:
+                raise
+
+        et = etree.ElementTree(xml_root_element)
+        kw = {}
+        if have_lxml:
+            kw['pretty_print'] = True
+
+        fn = "cython_debug_info_" + self.module_name
+        et.write(os.path.join(self.output_dir, fn), encoding="UTF-8", **kw)
+
+        interpreter_path = os.path.join(self.output_dir, 'interpreter')
+        with open(interpreter_path, 'w') as f:
+            f.write(sys.executable)
diff --git a/contrib/tools/cython/Cython/Debugger/Tests/TestLibCython.py b/contrib/tools/cython/Cython/Debugger/Tests/TestLibCython.py
index 6a769cc2fc..13560646ff 100644
--- a/contrib/tools/cython/Cython/Debugger/Tests/TestLibCython.py
+++ b/contrib/tools/cython/Cython/Debugger/Tests/TestLibCython.py
@@ -1,274 +1,274 @@
- 
-import os 
-import re 
-import sys 
-import shutil 
-import warnings 
-import textwrap 
-import unittest 
-import tempfile 
-import subprocess 
-#import distutils.core 
-#from distutils import sysconfig 
-from distutils import ccompiler 
- 
-import runtests 
-import Cython.Distutils.extension 
+
+import os
+import re
+import sys
+import shutil
+import warnings
+import textwrap
+import unittest
+import tempfile
+import subprocess
+#import distutils.core
+#from distutils import sysconfig
+from distutils import ccompiler
+
+import runtests
+import Cython.Distutils.extension
 import Cython.Distutils.old_build_ext as build_ext
-from Cython.Debugger import Cygdb as cygdb 
- 
-root = os.path.dirname(os.path.abspath(__file__)) 
-codefile = os.path.join(root, 'codefile') 
-cfuncs_file = os.path.join(root, 'cfuncs.c') 
- 
-with open(codefile) as f: 
-    source_to_lineno = dict((line.strip(), i + 1) for i, line in enumerate(f)) 
- 
- 
-have_gdb = None 
-def test_gdb(): 
-    global have_gdb 
-    if have_gdb is not None: 
-        return have_gdb 
- 
+from Cython.Debugger import Cygdb as cygdb
+
+root = os.path.dirname(os.path.abspath(__file__))
+codefile = os.path.join(root, 'codefile')
+cfuncs_file = os.path.join(root, 'cfuncs.c')
+
+with open(codefile) as f:
+    source_to_lineno = dict((line.strip(), i + 1) for i, line in enumerate(f))
+
+
+have_gdb = None
+def test_gdb():
+    global have_gdb
+    if have_gdb is not None:
+        return have_gdb
+
     have_gdb = False
-    try: 
+    try:
         p = subprocess.Popen(['gdb', '-nx', '--version'], stdout=subprocess.PIPE)
-    except OSError: 
+    except OSError:
         # gdb not found
         gdb_version = None
-    else: 
+    else:
         stdout, _ = p.communicate()
-        # Based on Lib/test/test_gdb.py 
+        # Based on Lib/test/test_gdb.py
         regex = r"GNU gdb [^\d]*(\d+)\.(\d+)"
         gdb_version = re.match(regex, stdout.decode('ascii', 'ignore'))
- 
+
     if gdb_version:
         gdb_version_number = list(map(int, gdb_version.groups()))
-        if gdb_version_number >= [7, 2]: 
+        if gdb_version_number >= [7, 2]:
             have_gdb = True
             with tempfile.NamedTemporaryFile(mode='w+') as python_version_script:
-                python_version_script.write( 
-                    'python import sys; print("%s %s" % sys.version_info[:2])') 
-                python_version_script.flush() 
-                p = subprocess.Popen(['gdb', '-batch', '-x', python_version_script.name], 
-                                     stdout=subprocess.PIPE) 
+                python_version_script.write(
+                    'python import sys; print("%s %s" % sys.version_info[:2])')
+                python_version_script.flush()
+                p = subprocess.Popen(['gdb', '-batch', '-x', python_version_script.name],
+                                     stdout=subprocess.PIPE)
                 stdout, _ = p.communicate()
-                try: 
+                try:
                     internal_python_version = list(map(int, stdout.decode('ascii', 'ignore').split()))
                     if internal_python_version < [2, 6]:
                         have_gdb = False
-                except ValueError: 
-                    have_gdb = False 
- 
+                except ValueError:
+                    have_gdb = False
+
     if not have_gdb:
         warnings.warn('Skipping gdb tests, need gdb >= 7.2 with Python >= 2.6')
- 
-    return have_gdb 
- 
- 
-class DebuggerTestCase(unittest.TestCase): 
- 
-    def setUp(self): 
-        """ 
-        Run gdb and have cygdb import the debug information from the code 
-        defined in TestParseTreeTransforms's setUp method 
-        """ 
-        if not test_gdb(): 
-            return 
- 
-        self.tempdir = tempfile.mkdtemp() 
-        self.destfile = os.path.join(self.tempdir, 'codefile.pyx') 
-        self.debug_dest = os.path.join(self.tempdir, 
-                                      'cython_debug', 
-                                      'cython_debug_info_codefile') 
-        self.cfuncs_destfile = os.path.join(self.tempdir, 'cfuncs') 
- 
-        self.cwd = os.getcwd() 
-        try: 
-            os.chdir(self.tempdir) 
- 
-            shutil.copy(codefile, self.destfile) 
-            shutil.copy(cfuncs_file, self.cfuncs_destfile + '.c') 
+
+    return have_gdb
+
+
+class DebuggerTestCase(unittest.TestCase):
+
+    def setUp(self):
+        """
+        Run gdb and have cygdb import the debug information from the code
+        defined in TestParseTreeTransforms's setUp method
+        """
+        if not test_gdb():
+            return
+
+        self.tempdir = tempfile.mkdtemp()
+        self.destfile = os.path.join(self.tempdir, 'codefile.pyx')
+        self.debug_dest = os.path.join(self.tempdir,
+                                      'cython_debug',
+                                      'cython_debug_info_codefile')
+        self.cfuncs_destfile = os.path.join(self.tempdir, 'cfuncs')
+
+        self.cwd = os.getcwd()
+        try:
+            os.chdir(self.tempdir)
+
+            shutil.copy(codefile, self.destfile)
+            shutil.copy(cfuncs_file, self.cfuncs_destfile + '.c')
             shutil.copy(cfuncs_file.replace('.c', '.h'),
                         self.cfuncs_destfile + '.h')
- 
-            compiler = ccompiler.new_compiler() 
-            compiler.compile(['cfuncs.c'], debug=True, extra_postargs=['-fPIC']) 
- 
-            opts = dict( 
-                test_directory=self.tempdir, 
-                module='codefile', 
-            ) 
- 
-            optimization_disabler = build_ext.Optimization() 
- 
-            cython_compile_testcase = runtests.CythonCompileTestCase( 
-                workdir=self.tempdir, 
-                # we clean up everything (not only compiled files) 
-                cleanup_workdir=False, 
-                tags=runtests.parse_tags(codefile), 
-                **opts 
-            ) 
- 
- 
-            new_stderr = open(os.devnull, 'w') 
- 
-            stderr = sys.stderr 
-            sys.stderr = new_stderr 
- 
-            optimization_disabler.disable_optimization() 
-            try: 
-                cython_compile_testcase.run_cython( 
-                    targetdir=self.tempdir, 
-                    incdir=None, 
-                    annotate=False, 
-                    extra_compile_options={ 
-                        'gdb_debug':True, 
-                        'output_dir':self.tempdir, 
-                    }, 
-                    **opts 
-                ) 
- 
-                cython_compile_testcase.run_distutils( 
-                    incdir=None, 
-                    workdir=self.tempdir, 
-                    extra_extension_args={'extra_objects':['cfuncs.o']}, 
-                    **opts 
-                ) 
-            finally: 
-                optimization_disabler.restore_state() 
-                sys.stderr = stderr 
-                new_stderr.close() 
- 
-            # ext = Cython.Distutils.extension.Extension( 
-                # 'codefile', 
-                # ['codefile.pyx'], 
-                # cython_gdb=True, 
-                # extra_objects=['cfuncs.o']) 
-            # 
-            # distutils.core.setup( 
-                # script_args=['build_ext', '--inplace'], 
-                # ext_modules=[ext], 
-                # cmdclass=dict(build_ext=Cython.Distutils.build_ext) 
-            # ) 
- 
-        except: 
-            os.chdir(self.cwd) 
-            raise 
- 
-    def tearDown(self): 
-        if not test_gdb(): 
-            return 
-        os.chdir(self.cwd) 
-        shutil.rmtree(self.tempdir) 
- 
- 
-class GdbDebuggerTestCase(DebuggerTestCase): 
- 
-    def setUp(self): 
-        if not test_gdb(): 
-            return 
- 
-        super(GdbDebuggerTestCase, self).setUp() 
- 
-        prefix_code = textwrap.dedent('''\ 
-            python 
- 
-            import os 
-            import sys 
-            import traceback 
- 
-            def excepthook(type, value, tb): 
-                traceback.print_exception(type, value, tb) 
-                sys.stderr.flush() 
-                sys.stdout.flush() 
-                os._exit(1) 
- 
-            sys.excepthook = excepthook 
- 
-            # Have tracebacks end up on sys.stderr (gdb replaces sys.stderr 
-            # with an object that calls gdb.write()) 
-            sys.stderr = sys.__stderr__ 
- 
-            end 
-            ''') 
- 
-        code = textwrap.dedent('''\ 
-            python 
- 
-            from Cython.Debugger.Tests import test_libcython_in_gdb 
-            test_libcython_in_gdb.main(version=%r) 
- 
-            end 
-            ''' % (sys.version_info[:2],)) 
- 
-        self.gdb_command_file = cygdb.make_command_file(self.tempdir, 
-                                                        prefix_code) 
- 
-        with open(self.gdb_command_file, 'a') as f: 
-            f.write(code) 
- 
-        args = ['gdb', '-batch', '-x', self.gdb_command_file, '-n', '--args', 
-                sys.executable, '-c', 'import codefile'] 
- 
-        paths = [] 
-        path = os.environ.get('PYTHONPATH') 
-        if path: 
-            paths.append(path) 
-        paths.append(os.path.dirname(os.path.dirname( 
-            os.path.abspath(Cython.__file__)))) 
-        env = dict(os.environ, PYTHONPATH=os.pathsep.join(paths)) 
- 
-        self.p = subprocess.Popen( 
-            args, 
-            stdout=subprocess.PIPE, 
-            stderr=subprocess.PIPE, 
-            env=env) 
- 
-    def tearDown(self): 
-        if not test_gdb(): 
-            return 
- 
-        try: 
-            super(GdbDebuggerTestCase, self).tearDown() 
-            if self.p: 
-                try: self.p.stdout.close() 
-                except: pass 
-                try: self.p.stderr.close() 
-                except: pass 
-                self.p.wait() 
-        finally: 
-            os.remove(self.gdb_command_file) 
- 
- 
-class TestAll(GdbDebuggerTestCase): 
- 
-    def test_all(self): 
-        if not test_gdb(): 
-            return 
- 
-        out, err = self.p.communicate() 
-        out = out.decode('UTF-8') 
-        err = err.decode('UTF-8') 
- 
-        exit_status = self.p.returncode 
- 
-        if exit_status == 1: 
-            sys.stderr.write(out) 
-            sys.stderr.write(err) 
-        elif exit_status >= 2: 
-            border = u'*' * 30 
-            start  = u'%s   v INSIDE GDB v   %s' % (border, border) 
-            stderr = u'%s   v STDERR v   %s' % (border, border) 
-            end    = u'%s   ^ INSIDE GDB ^   %s' % (border, border) 
-            errmsg = u'\n%s\n%s%s\n%s%s' % (start, out, stderr, err, end) 
- 
-            sys.stderr.write(errmsg) 
- 
-        # FIXME: re-enable this to make the test fail on internal failures 
-        #self.assertEqual(exit_status, 0) 
- 
- 
-if __name__ == '__main__': 
-    unittest.main() 
+
+            compiler = ccompiler.new_compiler()
+            compiler.compile(['cfuncs.c'], debug=True, extra_postargs=['-fPIC'])
+
+            opts = dict(
+                test_directory=self.tempdir,
+                module='codefile',
+            )
+
+            optimization_disabler = build_ext.Optimization()
+
+            cython_compile_testcase = runtests.CythonCompileTestCase(
+                workdir=self.tempdir,
+                # we clean up everything (not only compiled files)
+                cleanup_workdir=False,
+                tags=runtests.parse_tags(codefile),
+                **opts
+            )
+
+
+            new_stderr = open(os.devnull, 'w')
+
+            stderr = sys.stderr
+            sys.stderr = new_stderr
+
+            optimization_disabler.disable_optimization()
+            try:
+                cython_compile_testcase.run_cython(
+                    targetdir=self.tempdir,
+                    incdir=None,
+                    annotate=False,
+                    extra_compile_options={
+                        'gdb_debug':True,
+                        'output_dir':self.tempdir,
+                    },
+                    **opts
+                )
+
+                cython_compile_testcase.run_distutils(
+                    incdir=None,
+                    workdir=self.tempdir,
+                    extra_extension_args={'extra_objects':['cfuncs.o']},
+                    **opts
+                )
+            finally:
+                optimization_disabler.restore_state()
+                sys.stderr = stderr
+                new_stderr.close()
+
+            # ext = Cython.Distutils.extension.Extension(
+                # 'codefile',
+                # ['codefile.pyx'],
+                # cython_gdb=True,
+                # extra_objects=['cfuncs.o'])
+            #
+            # distutils.core.setup(
+                # script_args=['build_ext', '--inplace'],
+                # ext_modules=[ext],
+                # cmdclass=dict(build_ext=Cython.Distutils.build_ext)
+            # )
+
+        except:
+            os.chdir(self.cwd)
+            raise
+
+    def tearDown(self):
+        if not test_gdb():
+            return
+        os.chdir(self.cwd)
+        shutil.rmtree(self.tempdir)
+
+
+class GdbDebuggerTestCase(DebuggerTestCase):
+
+    def setUp(self):
+        if not test_gdb():
+            return
+
+        super(GdbDebuggerTestCase, self).setUp()
+
+        prefix_code = textwrap.dedent('''\
+            python
+
+            import os
+            import sys
+            import traceback
+
+            def excepthook(type, value, tb):
+                traceback.print_exception(type, value, tb)
+                sys.stderr.flush()
+                sys.stdout.flush()
+                os._exit(1)
+
+            sys.excepthook = excepthook
+
+            # Have tracebacks end up on sys.stderr (gdb replaces sys.stderr
+            # with an object that calls gdb.write())
+            sys.stderr = sys.__stderr__
+
+            end
+            ''')
+
+        code = textwrap.dedent('''\
+            python
+
+            from Cython.Debugger.Tests import test_libcython_in_gdb
+            test_libcython_in_gdb.main(version=%r)
+
+            end
+            ''' % (sys.version_info[:2],))
+
+        self.gdb_command_file = cygdb.make_command_file(self.tempdir,
+                                                        prefix_code)
+
+        with open(self.gdb_command_file, 'a') as f:
+            f.write(code)
+
+        args = ['gdb', '-batch', '-x', self.gdb_command_file, '-n', '--args',
+                sys.executable, '-c', 'import codefile']
+
+        paths = []
+        path = os.environ.get('PYTHONPATH')
+        if path:
+            paths.append(path)
+        paths.append(os.path.dirname(os.path.dirname(
+            os.path.abspath(Cython.__file__))))
+        env = dict(os.environ, PYTHONPATH=os.pathsep.join(paths))
+
+        self.p = subprocess.Popen(
+            args,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=env)
+
+    def tearDown(self):
+        if not test_gdb():
+            return
+
+        try:
+            super(GdbDebuggerTestCase, self).tearDown()
+            if self.p:
+                try: self.p.stdout.close()
+                except: pass
+                try: self.p.stderr.close()
+                except: pass
+                self.p.wait()
+        finally:
+            os.remove(self.gdb_command_file)
+
+
+class TestAll(GdbDebuggerTestCase):
+
+    def test_all(self):
+        if not test_gdb():
+            return
+
+        out, err = self.p.communicate()
+        out = out.decode('UTF-8')
+        err = err.decode('UTF-8')
+
+        exit_status = self.p.returncode
+
+        if exit_status == 1:
+            sys.stderr.write(out)
+            sys.stderr.write(err)
+        elif exit_status >= 2:
+            border = u'*' * 30
+            start  = u'%s   v INSIDE GDB v   %s' % (border, border)
+            stderr = u'%s   v STDERR v   %s' % (border, border)
+            end    = u'%s   ^ INSIDE GDB ^   %s' % (border, border)
+            errmsg = u'\n%s\n%s%s\n%s%s' % (start, out, stderr, err, end)
+
+            sys.stderr.write(errmsg)
+
+        # FIXME: re-enable this to make the test fail on internal failures
+        #self.assertEqual(exit_status, 0)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/contrib/tools/cython/Cython/Debugger/Tests/__init__.py b/contrib/tools/cython/Cython/Debugger/Tests/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Debugger/Tests/__init__.py
+++ b/contrib/tools/cython/Cython/Debugger/Tests/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Debugger/Tests/cfuncs.c b/contrib/tools/cython/Cython/Debugger/Tests/cfuncs.c
index 263589e77f..ccb42050bf 100644
--- a/contrib/tools/cython/Cython/Debugger/Tests/cfuncs.c
+++ b/contrib/tools/cython/Cython/Debugger/Tests/cfuncs.c
@@ -1,8 +1,8 @@
-void 
-some_c_function(void) 
-{ 
-    int a, b, c; 
- 
-    a = 1; 
-    b = 2; 
-} 
+void
+some_c_function(void)
+{
+    int a, b, c;
+
+    a = 1;
+    b = 2;
+}
diff --git a/contrib/tools/cython/Cython/Debugger/Tests/codefile b/contrib/tools/cython/Cython/Debugger/Tests/codefile
index 5c2b0c2957..6b4c6b6add 100644
--- a/contrib/tools/cython/Cython/Debugger/Tests/codefile
+++ b/contrib/tools/cython/Cython/Debugger/Tests/codefile
@@ -1,50 +1,50 @@
-cdef extern from "stdio.h": 
-    int puts(char *s) 
- 
+cdef extern from "stdio.h":
+    int puts(char *s)
+
 cdef extern from "cfuncs.h":
-    void some_c_function() 
- 
-import os 
- 
-cdef int c_var = 12 
-python_var = 13 
- 
-def spam(a=0): 
-    cdef: 
-        int b, c 
-
-    b = c = d = 0 
-
-    b = 1 
-    c = 2 
-    int(10) 
-    puts("spam") 
-    os.path.join("foo", "bar") 
-    some_c_function() 
- 
-cpdef eggs(): 
+    void some_c_function()
+
+import os
+
+cdef int c_var = 12
+python_var = 13
+
+def spam(a=0):
+    cdef:
+        int b, c
+
+    b = c = d = 0
+
+    b = 1
+    c = 2
+    int(10)
+    puts("spam")
+    os.path.join("foo", "bar")
+    some_c_function()
+
+cpdef eggs():
     pass
 
-cdef ham(): 
-    pass 
-
-cdef class SomeClass(object): 
-    def spam(self): 
-        pass 
- 
-def outer(): 
-    cdef object a = "an object" 
-    def inner(): 
-        b = 2 
-        # access closed over variables 
-        print a, b 
-    return inner 
- 
- 
-outer()() 
- 
-spam() 
-print "bye!" 
- 
-def use_ham(): 
-    ham() 
+cdef ham():
+    pass
+
+cdef class SomeClass(object):
+    def spam(self):
+        pass
+
+def outer():
+    cdef object a = "an object"
+    def inner():
+        b = 2
+        # access closed over variables
+        print a, b
+    return inner
+
+
+outer()()
+
+spam()
+print "bye!"
+
+def use_ham():
+    ham()
diff --git a/contrib/tools/cython/Cython/Debugger/Tests/test_libcython_in_gdb.py b/contrib/tools/cython/Cython/Debugger/Tests/test_libcython_in_gdb.py
index c4d4ace7c0..bd7608d607 100644
--- a/contrib/tools/cython/Cython/Debugger/Tests/test_libcython_in_gdb.py
+++ b/contrib/tools/cython/Cython/Debugger/Tests/test_libcython_in_gdb.py
@@ -1,496 +1,496 @@
-""" 
-Tests that run inside GDB. 
- 
-Note: debug information is already imported by the file generated by 
-Cython.Debugger.Cygdb.make_command_file() 
-""" 
- 
+"""
+Tests that run inside GDB.
+
+Note: debug information is already imported by the file generated by
+Cython.Debugger.Cygdb.make_command_file()
+"""
+
 from __future__ import absolute_import
 
-import os 
-import re 
-import sys 
-import trace 
-import inspect 
-import warnings 
-import unittest 
-import textwrap 
-import tempfile 
-import functools 
-import traceback 
-import itertools 
+import os
+import re
+import sys
+import trace
+import inspect
+import warnings
+import unittest
+import textwrap
+import tempfile
+import functools
+import traceback
+import itertools
 #from test import test_support
- 
-import gdb 
- 
+
+import gdb
+
 from .. import libcython
 from .. import libpython
 from . import TestLibCython as test_libcython
 from ...Utils import add_metaclass
- 
-# for some reason sys.argv is missing in gdb 
-sys.argv = ['gdb'] 
- 
- 
-def print_on_call_decorator(func): 
-    @functools.wraps(func) 
-    def wrapper(self, *args, **kwargs): 
-        _debug(type(self).__name__, func.__name__) 
- 
-        try: 
-            return func(self, *args, **kwargs) 
+
+# for some reason sys.argv is missing in gdb
+sys.argv = ['gdb']
+
+
+def print_on_call_decorator(func):
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        _debug(type(self).__name__, func.__name__)
+
+        try:
+            return func(self, *args, **kwargs)
         except Exception:
             _debug("An exception occurred:", traceback.format_exc())
-            raise 
- 
-    return wrapper 
- 
-class TraceMethodCallMeta(type): 
- 
-    def __init__(self, name, bases, dict): 
+            raise
+
+    return wrapper
+
+class TraceMethodCallMeta(type):
+
+    def __init__(self, name, bases, dict):
         for func_name, func in dict.items():
-            if inspect.isfunction(func): 
-                setattr(self, func_name, print_on_call_decorator(func)) 
- 
- 
+            if inspect.isfunction(func):
+                setattr(self, func_name, print_on_call_decorator(func))
+
+
 @add_metaclass(TraceMethodCallMeta)
-class DebugTestCase(unittest.TestCase): 
-    """ 
-    Base class for test cases. On teardown it kills the inferior and unsets 
-    all breakpoints. 
-    """ 
- 
-    def __init__(self, name): 
-        super(DebugTestCase, self).__init__(name) 
-        self.cy = libcython.cy 
-        self.module = libcython.cy.cython_namespace['codefile'] 
-        self.spam_func, self.spam_meth = libcython.cy.functions_by_name['spam'] 
-        self.ham_func = libcython.cy.functions_by_qualified_name[ 
-            'codefile.ham'] 
-        self.eggs_func = libcython.cy.functions_by_qualified_name[ 
-            'codefile.eggs'] 
- 
-    def read_var(self, varname, cast_to=None): 
-        result = gdb.parse_and_eval('$cy_cvalue("%s")' % varname) 
-        if cast_to: 
-            result = cast_to(result) 
- 
-        return result 
- 
-    def local_info(self): 
-        return gdb.execute('info locals', to_string=True) 
- 
-    def lineno_equals(self, source_line=None, lineno=None): 
-        if source_line is not None: 
-            lineno = test_libcython.source_to_lineno[source_line] 
-        frame = gdb.selected_frame() 
-        self.assertEqual(libcython.cython_info.lineno(frame), lineno) 
- 
-    def break_and_run(self, source_line): 
-        break_lineno = test_libcython.source_to_lineno[source_line] 
-        gdb.execute('cy break codefile:%d' % break_lineno, to_string=True) 
-        gdb.execute('run', to_string=True) 
- 
-    def tearDown(self): 
-        gdb.execute('delete breakpoints', to_string=True) 
-        try: 
-            gdb.execute('kill inferior 1', to_string=True) 
-        except RuntimeError: 
-            pass 
- 
-        gdb.execute('set args -c "import codefile"') 
- 
- 
-class TestDebugInformationClasses(DebugTestCase): 
- 
-    def test_CythonModule(self): 
-        "test that debug information was parsed properly into data structures" 
-        self.assertEqual(self.module.name, 'codefile') 
-        global_vars = ('c_var', 'python_var', '__name__', 
-                       '__builtins__', '__doc__', '__file__') 
-        assert set(global_vars).issubset(self.module.globals) 
- 
-    def test_CythonVariable(self): 
-        module_globals = self.module.globals 
-        c_var = module_globals['c_var'] 
-        python_var = module_globals['python_var'] 
-        self.assertEqual(c_var.type, libcython.CObject) 
-        self.assertEqual(python_var.type, libcython.PythonObject) 
-        self.assertEqual(c_var.qualified_name, 'codefile.c_var') 
- 
-    def test_CythonFunction(self): 
-        self.assertEqual(self.spam_func.qualified_name, 'codefile.spam') 
-        self.assertEqual(self.spam_meth.qualified_name, 
-                         'codefile.SomeClass.spam') 
-        self.assertEqual(self.spam_func.module, self.module) 
- 
-        assert self.eggs_func.pf_cname, (self.eggs_func, self.eggs_func.pf_cname) 
-        assert not self.ham_func.pf_cname 
-        assert not self.spam_func.pf_cname 
-        assert not self.spam_meth.pf_cname 
- 
-        self.assertEqual(self.spam_func.type, libcython.CObject) 
-        self.assertEqual(self.ham_func.type, libcython.CObject) 
- 
-        self.assertEqual(self.spam_func.arguments, ['a']) 
-        self.assertEqual(self.spam_func.step_into_functions, 
-                         set(['puts', 'some_c_function'])) 
- 
-        expected_lineno = test_libcython.source_to_lineno['def spam(a=0):'] 
-        self.assertEqual(self.spam_func.lineno, expected_lineno) 
-        self.assertEqual(sorted(self.spam_func.locals), list('abcd')) 
- 
- 
-class TestParameters(unittest.TestCase): 
- 
-    def test_parameters(self): 
-        gdb.execute('set cy_colorize_code on') 
-        assert libcython.parameters.colorize_code 
-        gdb.execute('set cy_colorize_code off') 
-        assert not libcython.parameters.colorize_code 
- 
- 
-class TestBreak(DebugTestCase): 
- 
-    def test_break(self): 
-        breakpoint_amount = len(gdb.breakpoints() or ()) 
-        gdb.execute('cy break codefile.spam') 
- 
-        self.assertEqual(len(gdb.breakpoints()), breakpoint_amount + 1) 
-        bp = gdb.breakpoints()[-1] 
-        self.assertEqual(bp.type, gdb.BP_BREAKPOINT) 
-        assert self.spam_func.cname in bp.location 
-        assert bp.enabled 
- 
-    def test_python_break(self): 
-        gdb.execute('cy break -p join') 
-        assert 'def join(' in gdb.execute('cy run', to_string=True) 
- 
-    def test_break_lineno(self): 
-        beginline = 'import os' 
-        nextline = 'cdef int c_var = 12' 
- 
-        self.break_and_run(beginline) 
-        self.lineno_equals(beginline) 
-        step_result = gdb.execute('cy step', to_string=True) 
-        self.lineno_equals(nextline) 
-        assert step_result.rstrip().endswith(nextline) 
- 
- 
-class TestKilled(DebugTestCase): 
- 
-    def test_abort(self): 
-        gdb.execute("set args -c 'import os; os.abort()'") 
-        output = gdb.execute('cy run', to_string=True) 
-        assert 'abort' in output.lower() 
- 
- 
-class DebugStepperTestCase(DebugTestCase): 
- 
-    def step(self, varnames_and_values, source_line=None, lineno=None): 
-        gdb.execute(self.command) 
-        for varname, value in varnames_and_values: 
-            self.assertEqual(self.read_var(varname), value, self.local_info()) 
- 
-        self.lineno_equals(source_line, lineno) 
- 
- 
-class TestStep(DebugStepperTestCase): 
-    """ 
-    Test stepping. Stepping happens in the code found in 
-    Cython/Debugger/Tests/codefile. 
-    """ 
- 
-    def test_cython_step(self): 
-        gdb.execute('cy break codefile.spam') 
- 
-        gdb.execute('run', to_string=True) 
-        self.lineno_equals('def spam(a=0):') 
- 
-        gdb.execute('cy step', to_string=True) 
-        self.lineno_equals('b = c = d = 0') 
- 
-        self.command = 'cy step' 
-        self.step([('b', 0)], source_line='b = 1') 
-        self.step([('b', 1), ('c', 0)], source_line='c = 2') 
-        self.step([('c', 2)], source_line='int(10)') 
-        self.step([], source_line='puts("spam")') 
- 
-        gdb.execute('cont', to_string=True) 
-        self.assertEqual(len(gdb.inferiors()), 1) 
-        self.assertEqual(gdb.inferiors()[0].pid, 0) 
- 
-    def test_c_step(self): 
-        self.break_and_run('some_c_function()') 
-        gdb.execute('cy step', to_string=True) 
-        self.assertEqual(gdb.selected_frame().name(), 'some_c_function') 
- 
-    def test_python_step(self): 
-        self.break_and_run('os.path.join("foo", "bar")') 
- 
-        result = gdb.execute('cy step', to_string=True) 
- 
-        curframe = gdb.selected_frame() 
-        self.assertEqual(curframe.name(), 'PyEval_EvalFrameEx') 
- 
-        pyframe = libpython.Frame(curframe).get_pyop() 
-        # With Python 3 inferiors, pyframe.co_name will return a PyUnicodePtr, 
-        # be compatible 
-        frame_name = pyframe.co_name.proxyval(set()) 
-        self.assertEqual(frame_name, 'join') 
-        assert re.match(r'\d+    def join\(', result), result 
- 
- 
-class TestNext(DebugStepperTestCase): 
- 
-    def test_cython_next(self): 
-        self.break_and_run('c = 2') 
- 
-        lines = ( 
-            'int(10)', 
-            'puts("spam")', 
-            'os.path.join("foo", "bar")', 
-            'some_c_function()', 
-        ) 
- 
-        for line in lines: 
-            gdb.execute('cy next') 
-            self.lineno_equals(line) 
- 
- 
-class TestLocalsGlobals(DebugTestCase): 
- 
-    def test_locals(self): 
-        self.break_and_run('int(10)') 
- 
-        result = gdb.execute('cy locals', to_string=True) 
-        assert 'a = 0', repr(result) 
-        assert 'b = (int) 1', result 
-        assert 'c = (int) 2' in result, repr(result) 
- 
-    def test_globals(self): 
-        self.break_and_run('int(10)') 
- 
-        result = gdb.execute('cy globals', to_string=True) 
-        assert '__name__ ' in result, repr(result) 
-        assert '__doc__ ' in result, repr(result) 
-        assert 'os ' in result, repr(result) 
-        assert 'c_var ' in result, repr(result) 
-        assert 'python_var ' in result, repr(result) 
- 
- 
-class TestBacktrace(DebugTestCase): 
- 
-    def test_backtrace(self): 
-        libcython.parameters.colorize_code.value = False 
- 
-        self.break_and_run('os.path.join("foo", "bar")') 
- 
-        def match_backtrace_output(result): 
-            assert re.search(r'\#\d+ *0x.* in spam\(\) at .*codefile\.pyx:22', 
-                             result), result 
-            assert 'os.path.join("foo", "bar")' in result, result 
- 
-        result = gdb.execute('cy bt', to_string=True) 
-        match_backtrace_output(result) 
- 
-        result = gdb.execute('cy bt -a', to_string=True) 
-        match_backtrace_output(result) 
- 
-        # Apparently not everyone has main() 
-        # assert re.search(r'\#0 *0x.* in main\(\)', result), result 
- 
- 
-class TestFunctions(DebugTestCase): 
- 
-    def test_functions(self): 
-        self.break_and_run('c = 2') 
-        result = gdb.execute('print $cy_cname("b")', to_string=True) 
-        assert re.search('__pyx_.*b', result), result 
- 
-        result = gdb.execute('print $cy_lineno()', to_string=True) 
-        supposed_lineno = test_libcython.source_to_lineno['c = 2'] 
-        assert str(supposed_lineno) in result, (supposed_lineno, result) 
- 
-        result = gdb.execute('print $cy_cvalue("b")', to_string=True) 
-        assert '= 1' in result 
- 
- 
-class TestPrint(DebugTestCase): 
- 
-    def test_print(self): 
-        self.break_and_run('c = 2') 
-        result = gdb.execute('cy print b', to_string=True) 
-        self.assertEqual('b = (int) 1\n', result) 
- 
- 
-class TestUpDown(DebugTestCase): 
- 
-    def test_updown(self): 
-        self.break_and_run('os.path.join("foo", "bar")') 
-        gdb.execute('cy step') 
-        self.assertRaises(RuntimeError, gdb.execute, 'cy down') 
- 
-        result = gdb.execute('cy up', to_string=True) 
-        assert 'spam()' in result 
-        assert 'os.path.join("foo", "bar")' in result 
- 
- 
-class TestExec(DebugTestCase): 
- 
-    def setUp(self): 
-        super(TestExec, self).setUp() 
-        self.fd, self.tmpfilename = tempfile.mkstemp() 
-        self.tmpfile = os.fdopen(self.fd, 'r+') 
- 
-    def tearDown(self): 
-        super(TestExec, self).tearDown() 
- 
-        try: 
-            self.tmpfile.close() 
-        finally: 
-            os.remove(self.tmpfilename) 
- 
-    def eval_command(self, command): 
-        gdb.execute('cy exec open(%r, "w").write(str(%s))' % 
-                                                (self.tmpfilename, command)) 
-        return self.tmpfile.read().strip() 
- 
-    def test_cython_exec(self): 
-        self.break_and_run('os.path.join("foo", "bar")') 
- 
-        # test normal behaviour 
-        self.assertEqual("[0]", self.eval_command('[a]')) 
- 
-        # test multiline code 
-        result = gdb.execute(textwrap.dedent('''\ 
-            cy exec 
-            pass 
- 
-            "nothing" 
-            end 
-            ''')) 
-        result = self.tmpfile.read().rstrip() 
-        self.assertEqual('', result) 
- 
-    def test_python_exec(self): 
-        self.break_and_run('os.path.join("foo", "bar")') 
-        gdb.execute('cy step') 
- 
-        gdb.execute('cy exec some_random_var = 14') 
-        self.assertEqual('14', self.eval_command('some_random_var')) 
- 
- 
-class CySet(DebugTestCase): 
- 
-    def test_cyset(self): 
-        self.break_and_run('os.path.join("foo", "bar")') 
- 
-        gdb.execute('cy set a = $cy_eval("{None: []}")') 
-        stringvalue = self.read_var("a", cast_to=str) 
-        self.assertEqual(stringvalue, "{None: []}") 
- 
- 
-class TestCyEval(DebugTestCase): 
-    "Test the $cy_eval() gdb function." 
- 
-    def test_cy_eval(self): 
-        # This function leaks a few objects in the GDB python process. This 
-        # is no biggie 
-        self.break_and_run('os.path.join("foo", "bar")') 
- 
-        result = gdb.execute('print $cy_eval("None")', to_string=True) 
-        assert re.match(r'\$\d+ = None\n', result), result 
- 
-        result = gdb.execute('print $cy_eval("[a]")', to_string=True) 
-        assert re.match(r'\$\d+ = \[0\]', result), result 
- 
- 
-class TestClosure(DebugTestCase): 
- 
-    def break_and_run_func(self, funcname): 
-        gdb.execute('cy break ' + funcname) 
-        gdb.execute('cy run') 
- 
-    def test_inner(self): 
-        self.break_and_run_func('inner') 
-        self.assertEqual('', gdb.execute('cy locals', to_string=True)) 
- 
-        # Allow the Cython-generated code to initialize the scope variable 
-        gdb.execute('cy step') 
- 
-        self.assertEqual(str(self.read_var('a')), "'an object'") 
-        print_result = gdb.execute('cy print a', to_string=True).strip() 
-        self.assertEqual(print_result, "a = 'an object'") 
- 
-    def test_outer(self): 
-        self.break_and_run_func('outer') 
-        self.assertEqual('', gdb.execute('cy locals', to_string=True)) 
- 
-        # Initialize scope with 'a' uninitialized 
-        gdb.execute('cy step') 
-        self.assertEqual('', gdb.execute('cy locals', to_string=True)) 
- 
-        # Initialize 'a' to 1 
-        gdb.execute('cy step') 
-        print_result = gdb.execute('cy print a', to_string=True).strip() 
-        self.assertEqual(print_result, "a = 'an object'") 
- 
- 
-_do_debug = os.environ.get('GDB_DEBUG') 
-if _do_debug: 
-    _debug_file = open('/dev/tty', 'w') 
- 
-def _debug(*messages): 
-    if _do_debug: 
-        messages = itertools.chain([sys._getframe(1).f_code.co_name, ':'], 
-                                   messages) 
-        _debug_file.write(' '.join(str(msg) for msg in messages) + '\n') 
- 
- 
-def run_unittest_in_module(modulename): 
-    try: 
-        gdb.lookup_type('PyModuleObject') 
-    except RuntimeError: 
-        msg = ("Unable to run tests, Python was not compiled with " 
-                "debugging information. Either compile python with " 
-                "-g or get a debug build (configure with --with-pydebug).") 
-        warnings.warn(msg) 
-        os._exit(1) 
-    else: 
-        m = __import__(modulename, fromlist=['']) 
-        tests = inspect.getmembers(m, inspect.isclass) 
- 
-        # test_support.run_unittest(tests) 
- 
-        test_loader = unittest.TestLoader() 
-        suite = unittest.TestSuite( 
-            [test_loader.loadTestsFromTestCase(cls) for name, cls in tests]) 
- 
-        result = unittest.TextTestRunner(verbosity=1).run(suite) 
-        return result.wasSuccessful() 
- 
-def runtests(): 
-    """ 
-    Run the libcython and libpython tests. Ensure that an appropriate status is 
-    returned to the parent test process. 
-    """ 
-    from Cython.Debugger.Tests import test_libpython_in_gdb 
- 
-    success_libcython = run_unittest_in_module(__name__) 
-    success_libpython = run_unittest_in_module(test_libpython_in_gdb.__name__) 
- 
-    if not success_libcython or not success_libpython: 
-        sys.exit(2) 
- 
-def main(version, trace_code=False): 
-    global inferior_python_version 
- 
-    inferior_python_version = version 
- 
-    if trace_code: 
-        tracer = trace.Trace(count=False, trace=True, outfile=sys.stderr, 
-                            ignoredirs=[sys.prefix, sys.exec_prefix]) 
-        tracer.runfunc(runtests) 
-    else: 
-        runtests() 
+class DebugTestCase(unittest.TestCase):
+    """
+    Base class for test cases. On teardown it kills the inferior and unsets
+    all breakpoints.
+    """
+
+    def __init__(self, name):
+        super(DebugTestCase, self).__init__(name)
+        self.cy = libcython.cy
+        self.module = libcython.cy.cython_namespace['codefile']
+        self.spam_func, self.spam_meth = libcython.cy.functions_by_name['spam']
+        self.ham_func = libcython.cy.functions_by_qualified_name[
+            'codefile.ham']
+        self.eggs_func = libcython.cy.functions_by_qualified_name[
+            'codefile.eggs']
+
+    def read_var(self, varname, cast_to=None):
+        result = gdb.parse_and_eval('$cy_cvalue("%s")' % varname)
+        if cast_to:
+            result = cast_to(result)
+
+        return result
+
+    def local_info(self):
+        return gdb.execute('info locals', to_string=True)
+
+    def lineno_equals(self, source_line=None, lineno=None):
+        if source_line is not None:
+            lineno = test_libcython.source_to_lineno[source_line]
+        frame = gdb.selected_frame()
+        self.assertEqual(libcython.cython_info.lineno(frame), lineno)
+
+    def break_and_run(self, source_line):
+        break_lineno = test_libcython.source_to_lineno[source_line]
+        gdb.execute('cy break codefile:%d' % break_lineno, to_string=True)
+        gdb.execute('run', to_string=True)
+
+    def tearDown(self):
+        gdb.execute('delete breakpoints', to_string=True)
+        try:
+            gdb.execute('kill inferior 1', to_string=True)
+        except RuntimeError:
+            pass
+
+        gdb.execute('set args -c "import codefile"')
+
+
+class TestDebugInformationClasses(DebugTestCase):
+
+    def test_CythonModule(self):
+        "test that debug information was parsed properly into data structures"
+        self.assertEqual(self.module.name, 'codefile')
+        global_vars = ('c_var', 'python_var', '__name__',
+                       '__builtins__', '__doc__', '__file__')
+        assert set(global_vars).issubset(self.module.globals)
+
+    def test_CythonVariable(self):
+        module_globals = self.module.globals
+        c_var = module_globals['c_var']
+        python_var = module_globals['python_var']
+        self.assertEqual(c_var.type, libcython.CObject)
+        self.assertEqual(python_var.type, libcython.PythonObject)
+        self.assertEqual(c_var.qualified_name, 'codefile.c_var')
+
+    def test_CythonFunction(self):
+        self.assertEqual(self.spam_func.qualified_name, 'codefile.spam')
+        self.assertEqual(self.spam_meth.qualified_name,
+                         'codefile.SomeClass.spam')
+        self.assertEqual(self.spam_func.module, self.module)
+
+        assert self.eggs_func.pf_cname, (self.eggs_func, self.eggs_func.pf_cname)
+        assert not self.ham_func.pf_cname
+        assert not self.spam_func.pf_cname
+        assert not self.spam_meth.pf_cname
+
+        self.assertEqual(self.spam_func.type, libcython.CObject)
+        self.assertEqual(self.ham_func.type, libcython.CObject)
+
+        self.assertEqual(self.spam_func.arguments, ['a'])
+        self.assertEqual(self.spam_func.step_into_functions,
+                         set(['puts', 'some_c_function']))
+
+        expected_lineno = test_libcython.source_to_lineno['def spam(a=0):']
+        self.assertEqual(self.spam_func.lineno, expected_lineno)
+        self.assertEqual(sorted(self.spam_func.locals), list('abcd'))
+
+
+class TestParameters(unittest.TestCase):
+
+    def test_parameters(self):
+        gdb.execute('set cy_colorize_code on')
+        assert libcython.parameters.colorize_code
+        gdb.execute('set cy_colorize_code off')
+        assert not libcython.parameters.colorize_code
+
+
+class TestBreak(DebugTestCase):
+
+    def test_break(self):
+        breakpoint_amount = len(gdb.breakpoints() or ())
+        gdb.execute('cy break codefile.spam')
+
+        self.assertEqual(len(gdb.breakpoints()), breakpoint_amount + 1)
+        bp = gdb.breakpoints()[-1]
+        self.assertEqual(bp.type, gdb.BP_BREAKPOINT)
+        assert self.spam_func.cname in bp.location
+        assert bp.enabled
+
+    def test_python_break(self):
+        gdb.execute('cy break -p join')
+        assert 'def join(' in gdb.execute('cy run', to_string=True)
+
+    def test_break_lineno(self):
+        beginline = 'import os'
+        nextline = 'cdef int c_var = 12'
+
+        self.break_and_run(beginline)
+        self.lineno_equals(beginline)
+        step_result = gdb.execute('cy step', to_string=True)
+        self.lineno_equals(nextline)
+        assert step_result.rstrip().endswith(nextline)
+
+
+class TestKilled(DebugTestCase):
+
+    def test_abort(self):
+        gdb.execute("set args -c 'import os; os.abort()'")
+        output = gdb.execute('cy run', to_string=True)
+        assert 'abort' in output.lower()
+
+
+class DebugStepperTestCase(DebugTestCase):
+
+    def step(self, varnames_and_values, source_line=None, lineno=None):
+        gdb.execute(self.command)
+        for varname, value in varnames_and_values:
+            self.assertEqual(self.read_var(varname), value, self.local_info())
+
+        self.lineno_equals(source_line, lineno)
+
+
+class TestStep(DebugStepperTestCase):
+    """
+    Test stepping. Stepping happens in the code found in
+    Cython/Debugger/Tests/codefile.
+    """
+
+    def test_cython_step(self):
+        gdb.execute('cy break codefile.spam')
+
+        gdb.execute('run', to_string=True)
+        self.lineno_equals('def spam(a=0):')
+
+        gdb.execute('cy step', to_string=True)
+        self.lineno_equals('b = c = d = 0')
+
+        self.command = 'cy step'
+        self.step([('b', 0)], source_line='b = 1')
+        self.step([('b', 1), ('c', 0)], source_line='c = 2')
+        self.step([('c', 2)], source_line='int(10)')
+        self.step([], source_line='puts("spam")')
+
+        gdb.execute('cont', to_string=True)
+        self.assertEqual(len(gdb.inferiors()), 1)
+        self.assertEqual(gdb.inferiors()[0].pid, 0)
+
+    def test_c_step(self):
+        self.break_and_run('some_c_function()')
+        gdb.execute('cy step', to_string=True)
+        self.assertEqual(gdb.selected_frame().name(), 'some_c_function')
+
+    def test_python_step(self):
+        self.break_and_run('os.path.join("foo", "bar")')
+
+        result = gdb.execute('cy step', to_string=True)
+
+        curframe = gdb.selected_frame()
+        self.assertEqual(curframe.name(), 'PyEval_EvalFrameEx')
+
+        pyframe = libpython.Frame(curframe).get_pyop()
+        # With Python 3 inferiors, pyframe.co_name will return a PyUnicodePtr,
+        # be compatible
+        frame_name = pyframe.co_name.proxyval(set())
+        self.assertEqual(frame_name, 'join')
+        assert re.match(r'\d+    def join\(', result), result
+
+
+class TestNext(DebugStepperTestCase):
+
+    def test_cython_next(self):
+        self.break_and_run('c = 2')
+
+        lines = (
+            'int(10)',
+            'puts("spam")',
+            'os.path.join("foo", "bar")',
+            'some_c_function()',
+        )
+
+        for line in lines:
+            gdb.execute('cy next')
+            self.lineno_equals(line)
+
+
+class TestLocalsGlobals(DebugTestCase):
+
+    def test_locals(self):
+        self.break_and_run('int(10)')
+
+        result = gdb.execute('cy locals', to_string=True)
+        assert 'a = 0', repr(result)
+        assert 'b = (int) 1', result
+        assert 'c = (int) 2' in result, repr(result)
+
+    def test_globals(self):
+        self.break_and_run('int(10)')
+
+        result = gdb.execute('cy globals', to_string=True)
+        assert '__name__ ' in result, repr(result)
+        assert '__doc__ ' in result, repr(result)
+        assert 'os ' in result, repr(result)
+        assert 'c_var ' in result, repr(result)
+        assert 'python_var ' in result, repr(result)
+
+
+class TestBacktrace(DebugTestCase):
+
+    def test_backtrace(self):
+        libcython.parameters.colorize_code.value = False
+
+        self.break_and_run('os.path.join("foo", "bar")')
+
+        def match_backtrace_output(result):
+            assert re.search(r'\#\d+ *0x.* in spam\(\) at .*codefile\.pyx:22',
+                             result), result
+            assert 'os.path.join("foo", "bar")' in result, result
+
+        result = gdb.execute('cy bt', to_string=True)
+        match_backtrace_output(result)
+
+        result = gdb.execute('cy bt -a', to_string=True)
+        match_backtrace_output(result)
+
+        # Apparently not everyone has main()
+        # assert re.search(r'\#0 *0x.* in main\(\)', result), result
+
+
+class TestFunctions(DebugTestCase):
+
+    def test_functions(self):
+        self.break_and_run('c = 2')
+        result = gdb.execute('print $cy_cname("b")', to_string=True)
+        assert re.search('__pyx_.*b', result), result
+
+        result = gdb.execute('print $cy_lineno()', to_string=True)
+        supposed_lineno = test_libcython.source_to_lineno['c = 2']
+        assert str(supposed_lineno) in result, (supposed_lineno, result)
+
+        result = gdb.execute('print $cy_cvalue("b")', to_string=True)
+        assert '= 1' in result
+
+
+class TestPrint(DebugTestCase):
+
+    def test_print(self):
+        self.break_and_run('c = 2')
+        result = gdb.execute('cy print b', to_string=True)
+        self.assertEqual('b = (int) 1\n', result)
+
+
+class TestUpDown(DebugTestCase):
+
+    def test_updown(self):
+        self.break_and_run('os.path.join("foo", "bar")')
+        gdb.execute('cy step')
+        self.assertRaises(RuntimeError, gdb.execute, 'cy down')
+
+        result = gdb.execute('cy up', to_string=True)
+        assert 'spam()' in result
+        assert 'os.path.join("foo", "bar")' in result
+
+
+class TestExec(DebugTestCase):
+
+    def setUp(self):
+        super(TestExec, self).setUp()
+        self.fd, self.tmpfilename = tempfile.mkstemp()
+        self.tmpfile = os.fdopen(self.fd, 'r+')
+
+    def tearDown(self):
+        super(TestExec, self).tearDown()
+
+        try:
+            self.tmpfile.close()
+        finally:
+            os.remove(self.tmpfilename)
+
+    def eval_command(self, command):
+        gdb.execute('cy exec open(%r, "w").write(str(%s))' %
+                                                (self.tmpfilename, command))
+        return self.tmpfile.read().strip()
+
+    def test_cython_exec(self):
+        self.break_and_run('os.path.join("foo", "bar")')
+
+        # test normal behaviour
+        self.assertEqual("[0]", self.eval_command('[a]'))
+
+        # test multiline code
+        result = gdb.execute(textwrap.dedent('''\
+            cy exec
+            pass
+
+            "nothing"
+            end
+            '''))
+        result = self.tmpfile.read().rstrip()
+        self.assertEqual('', result)
+
+    def test_python_exec(self):
+        self.break_and_run('os.path.join("foo", "bar")')
+        gdb.execute('cy step')
+
+        gdb.execute('cy exec some_random_var = 14')
+        self.assertEqual('14', self.eval_command('some_random_var'))
+
+
+class CySet(DebugTestCase):
+
+    def test_cyset(self):
+        self.break_and_run('os.path.join("foo", "bar")')
+
+        gdb.execute('cy set a = $cy_eval("{None: []}")')
+        stringvalue = self.read_var("a", cast_to=str)
+        self.assertEqual(stringvalue, "{None: []}")
+
+
+class TestCyEval(DebugTestCase):
+    "Test the $cy_eval() gdb function."
+
+    def test_cy_eval(self):
+        # This function leaks a few objects in the GDB python process. This
+        # is no biggie
+        self.break_and_run('os.path.join("foo", "bar")')
+
+        result = gdb.execute('print $cy_eval("None")', to_string=True)
+        assert re.match(r'\$\d+ = None\n', result), result
+
+        result = gdb.execute('print $cy_eval("[a]")', to_string=True)
+        assert re.match(r'\$\d+ = \[0\]', result), result
+
+
+class TestClosure(DebugTestCase):
+
+    def break_and_run_func(self, funcname):
+        gdb.execute('cy break ' + funcname)
+        gdb.execute('cy run')
+
+    def test_inner(self):
+        self.break_and_run_func('inner')
+        self.assertEqual('', gdb.execute('cy locals', to_string=True))
+
+        # Allow the Cython-generated code to initialize the scope variable
+        gdb.execute('cy step')
+
+        self.assertEqual(str(self.read_var('a')), "'an object'")
+        print_result = gdb.execute('cy print a', to_string=True).strip()
+        self.assertEqual(print_result, "a = 'an object'")
+
+    def test_outer(self):
+        self.break_and_run_func('outer')
+        self.assertEqual('', gdb.execute('cy locals', to_string=True))
+
+        # Initialize scope with 'a' uninitialized
+        gdb.execute('cy step')
+        self.assertEqual('', gdb.execute('cy locals', to_string=True))
+
+        # Initialize 'a' to 1
+        gdb.execute('cy step')
+        print_result = gdb.execute('cy print a', to_string=True).strip()
+        self.assertEqual(print_result, "a = 'an object'")
+
+
+_do_debug = os.environ.get('GDB_DEBUG')
+if _do_debug:
+    _debug_file = open('/dev/tty', 'w')
+
+def _debug(*messages):
+    if _do_debug:
+        messages = itertools.chain([sys._getframe(1).f_code.co_name, ':'],
+                                   messages)
+        _debug_file.write(' '.join(str(msg) for msg in messages) + '\n')
+
+
+def run_unittest_in_module(modulename):
+    try:
+        gdb.lookup_type('PyModuleObject')
+    except RuntimeError:
+        msg = ("Unable to run tests, Python was not compiled with "
+                "debugging information. Either compile python with "
+                "-g or get a debug build (configure with --with-pydebug).")
+        warnings.warn(msg)
+        os._exit(1)
+    else:
+        m = __import__(modulename, fromlist=[''])
+        tests = inspect.getmembers(m, inspect.isclass)
+
+        # test_support.run_unittest(tests)
+
+        test_loader = unittest.TestLoader()
+        suite = unittest.TestSuite(
+            [test_loader.loadTestsFromTestCase(cls) for name, cls in tests])
+
+        result = unittest.TextTestRunner(verbosity=1).run(suite)
+        return result.wasSuccessful()
+
+def runtests():
+    """
+    Run the libcython and libpython tests. Ensure that an appropriate status is
+    returned to the parent test process.
+    """
+    from Cython.Debugger.Tests import test_libpython_in_gdb
+
+    success_libcython = run_unittest_in_module(__name__)
+    success_libpython = run_unittest_in_module(test_libpython_in_gdb.__name__)
+
+    if not success_libcython or not success_libpython:
+        sys.exit(2)
+
+def main(version, trace_code=False):
+    global inferior_python_version
+
+    inferior_python_version = version
+
+    if trace_code:
+        tracer = trace.Trace(count=False, trace=True, outfile=sys.stderr,
+                            ignoredirs=[sys.prefix, sys.exec_prefix])
+        tracer.runfunc(runtests)
+    else:
+        runtests()
diff --git a/contrib/tools/cython/Cython/Debugger/Tests/test_libpython_in_gdb.py b/contrib/tools/cython/Cython/Debugger/Tests/test_libpython_in_gdb.py
index 8a2b83419b..6f34cee47b 100644
--- a/contrib/tools/cython/Cython/Debugger/Tests/test_libpython_in_gdb.py
+++ b/contrib/tools/cython/Cython/Debugger/Tests/test_libpython_in_gdb.py
@@ -1,115 +1,115 @@
-# -*- coding: UTF-8 -*- 
- 
-""" 
-Test libpython.py. This is already partly tested by test_libcython_in_gdb and 
-Lib/test/test_gdb.py in the Python source. These tests are run in gdb and 
-called from test_libcython_in_gdb.main() 
-""" 
- 
-import os 
-import sys 
- 
-import gdb 
- 
-from Cython.Debugger import libcython 
-from Cython.Debugger import libpython 
- 
+# -*- coding: UTF-8 -*-
+
+"""
+Test libpython.py. This is already partly tested by test_libcython_in_gdb and
+Lib/test/test_gdb.py in the Python source. These tests are run in gdb and
+called from test_libcython_in_gdb.main()
+"""
+
+import os
+import sys
+
+import gdb
+
+from Cython.Debugger import libcython
+from Cython.Debugger import libpython
+
 from . import test_libcython_in_gdb
 from .test_libcython_in_gdb import _debug, inferior_python_version
- 
- 
-class TestPrettyPrinters(test_libcython_in_gdb.DebugTestCase): 
-    """ 
-    Test whether types of Python objects are correctly inferred and that 
-    the right libpython.PySomeTypeObjectPtr classes are instantiated. 
- 
-    Also test whether values are appropriately formatted (don't be too 
-    laborious as Lib/test/test_gdb.py already covers this extensively). 
- 
-    Don't take care of decreffing newly allocated objects as a new 
-    interpreter is started for every test anyway. 
-    """ 
- 
-    def setUp(self): 
-        super(TestPrettyPrinters, self).setUp() 
-        self.break_and_run('b = c = d = 0') 
- 
-    def get_pyobject(self, code): 
-        value = gdb.parse_and_eval(code) 
-        assert libpython.pointervalue(value) != 0 
-        return value 
- 
-    def pyobject_fromcode(self, code, gdbvar=None): 
-        if gdbvar is not None: 
-            d = {'varname':gdbvar, 'code':code} 
-            gdb.execute('set $%(varname)s = %(code)s' % d) 
-            code = '$' + gdbvar 
- 
-        return libpython.PyObjectPtr.from_pyobject_ptr(self.get_pyobject(code)) 
- 
-    def get_repr(self, pyobject): 
-        return pyobject.get_truncated_repr(libpython.MAX_OUTPUT_LEN) 
- 
-    def alloc_bytestring(self, string, gdbvar=None): 
-        if inferior_python_version < (3, 0): 
-            funcname = 'PyString_FromStringAndSize' 
-        else: 
-            funcname = 'PyBytes_FromStringAndSize' 
- 
+
+
+class TestPrettyPrinters(test_libcython_in_gdb.DebugTestCase):
+    """
+    Test whether types of Python objects are correctly inferred and that
+    the right libpython.PySomeTypeObjectPtr classes are instantiated.
+
+    Also test whether values are appropriately formatted (don't be too
+    laborious as Lib/test/test_gdb.py already covers this extensively).
+
+    Don't take care of decreffing newly allocated objects as a new
+    interpreter is started for every test anyway.
+    """
+
+    def setUp(self):
+        super(TestPrettyPrinters, self).setUp()
+        self.break_and_run('b = c = d = 0')
+
+    def get_pyobject(self, code):
+        value = gdb.parse_and_eval(code)
+        assert libpython.pointervalue(value) != 0
+        return value
+
+    def pyobject_fromcode(self, code, gdbvar=None):
+        if gdbvar is not None:
+            d = {'varname':gdbvar, 'code':code}
+            gdb.execute('set $%(varname)s = %(code)s' % d)
+            code = '$' + gdbvar
+
+        return libpython.PyObjectPtr.from_pyobject_ptr(self.get_pyobject(code))
+
+    def get_repr(self, pyobject):
+        return pyobject.get_truncated_repr(libpython.MAX_OUTPUT_LEN)
+
+    def alloc_bytestring(self, string, gdbvar=None):
+        if inferior_python_version < (3, 0):
+            funcname = 'PyString_FromStringAndSize'
+        else:
+            funcname = 'PyBytes_FromStringAndSize'
+
         assert b'"' not in string
- 
-        # ensure double quotes 
+
+        # ensure double quotes
         code = '(PyObject *) %s("%s", %d)' % (funcname, string.decode('iso8859-1'), len(string))
-        return self.pyobject_fromcode(code, gdbvar=gdbvar) 
- 
-    def alloc_unicodestring(self, string, gdbvar=None): 
-        postfix = libpython.get_inferior_unicode_postfix() 
+        return self.pyobject_fromcode(code, gdbvar=gdbvar)
+
+    def alloc_unicodestring(self, string, gdbvar=None):
+        postfix = libpython.get_inferior_unicode_postfix()
         funcname = 'PyUnicode%s_DecodeUnicodeEscape' % (postfix,)
- 
+
         data = string.encode("unicode_escape").decode('iso8859-1')
-        return self.pyobject_fromcode( 
+        return self.pyobject_fromcode(
             '(PyObject *) %s("%s", %d, "strict")' % (
                 funcname, data.replace('"', r'\"').replace('\\', r'\\'), len(data)),
-            gdbvar=gdbvar) 
- 
-    def test_bytestring(self): 
+            gdbvar=gdbvar)
+
+    def test_bytestring(self):
         bytestring = self.alloc_bytestring(b"spam")
- 
-        if inferior_python_version < (3, 0): 
-            bytestring_class = libpython.PyStringObjectPtr 
+
+        if inferior_python_version < (3, 0):
+            bytestring_class = libpython.PyStringObjectPtr
             expected = repr(b"spam")
-        else: 
-            bytestring_class = libpython.PyBytesObjectPtr 
-            expected = "b'spam'" 
- 
-        self.assertEqual(type(bytestring), bytestring_class) 
-        self.assertEqual(self.get_repr(bytestring), expected) 
- 
-    def test_unicode(self): 
-        unicode_string = self.alloc_unicodestring(u"spam ἄλφα") 
- 
+        else:
+            bytestring_class = libpython.PyBytesObjectPtr
+            expected = "b'spam'"
+
+        self.assertEqual(type(bytestring), bytestring_class)
+        self.assertEqual(self.get_repr(bytestring), expected)
+
+    def test_unicode(self):
+        unicode_string = self.alloc_unicodestring(u"spam ἄλφα")
+
         expected = u"'spam ἄλφα'"
-        if inferior_python_version < (3, 0): 
-            expected = 'u' + expected 
- 
-        self.assertEqual(type(unicode_string), libpython.PyUnicodeObjectPtr) 
-        self.assertEqual(self.get_repr(unicode_string), expected) 
- 
-    def test_int(self): 
-        if inferior_python_version < (3, 0): 
-            intval = self.pyobject_fromcode('PyInt_FromLong(100)') 
-            self.assertEqual(type(intval), libpython.PyIntObjectPtr) 
-            self.assertEqual(self.get_repr(intval), '100') 
- 
-    def test_long(self): 
-        longval = self.pyobject_fromcode('PyLong_FromLong(200)', 
-                                         gdbvar='longval') 
-        assert gdb.parse_and_eval('$longval->ob_type == &PyLong_Type') 
- 
-        self.assertEqual(type(longval), libpython.PyLongObjectPtr) 
-        self.assertEqual(self.get_repr(longval), '200') 
- 
-    def test_frame_type(self): 
-        frame = self.pyobject_fromcode('PyEval_GetFrame()') 
- 
-        self.assertEqual(type(frame), libpython.PyFrameObjectPtr) 
+        if inferior_python_version < (3, 0):
+            expected = 'u' + expected
+
+        self.assertEqual(type(unicode_string), libpython.PyUnicodeObjectPtr)
+        self.assertEqual(self.get_repr(unicode_string), expected)
+
+    def test_int(self):
+        if inferior_python_version < (3, 0):
+            intval = self.pyobject_fromcode('PyInt_FromLong(100)')
+            self.assertEqual(type(intval), libpython.PyIntObjectPtr)
+            self.assertEqual(self.get_repr(intval), '100')
+
+    def test_long(self):
+        longval = self.pyobject_fromcode('PyLong_FromLong(200)',
+                                         gdbvar='longval')
+        assert gdb.parse_and_eval('$longval->ob_type == &PyLong_Type')
+
+        self.assertEqual(type(longval), libpython.PyLongObjectPtr)
+        self.assertEqual(self.get_repr(longval), '200')
+
+    def test_frame_type(self):
+        frame = self.pyobject_fromcode('PyEval_GetFrame()')
+
+        self.assertEqual(type(frame), libpython.PyFrameObjectPtr)
diff --git a/contrib/tools/cython/Cython/Debugger/__init__.py b/contrib/tools/cython/Cython/Debugger/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Debugger/__init__.py
+++ b/contrib/tools/cython/Cython/Debugger/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Debugger/libcython.py b/contrib/tools/cython/Cython/Debugger/libcython.py
index 2ddf43922e..23153789b6 100644
--- a/contrib/tools/cython/Cython/Debugger/libcython.py
+++ b/contrib/tools/cython/Cython/Debugger/libcython.py
@@ -1,23 +1,23 @@
-""" 
-GDB extension that adds Cython support. 
-""" 
- 
-from __future__ import print_function 
- 
+"""
+GDB extension that adds Cython support.
+"""
+
+from __future__ import print_function
+
 try:
     input = raw_input
 except NameError:
     pass
 
-import sys 
-import textwrap 
-import traceback 
-import functools 
-import itertools 
-import collections 
- 
-import gdb 
- 
+import sys
+import textwrap
+import traceback
+import functools
+import itertools
+import collections
+
+import gdb
+
 try:  # python 2
     UNICODE = unicode
     BYTES = str
@@ -25,1410 +25,1410 @@ except NameError:  # python 3
     UNICODE = str
     BYTES = bytes
 
-try: 
-    from lxml import etree 
-    have_lxml = True 
-except ImportError: 
-    have_lxml = False 
-    try: 
-        # Python 2.5 
-        from xml.etree import cElementTree as etree 
-    except ImportError: 
-        try: 
-            # Python 2.5 
-            from xml.etree import ElementTree as etree 
-        except ImportError: 
-            try: 
-                # normal cElementTree install 
-                import cElementTree as etree 
-            except ImportError: 
-                # normal ElementTree install 
-                import elementtree.ElementTree as etree 
- 
-try: 
-    import pygments.lexers 
-    import pygments.formatters 
-except ImportError: 
-    pygments = None 
-    sys.stderr.write("Install pygments for colorized source code.\n") 
- 
-if hasattr(gdb, 'string_to_argv'): 
-    from gdb import string_to_argv 
-else: 
-    from shlex import split as string_to_argv 
- 
-from Cython.Debugger import libpython 
- 
-# C or Python type 
-CObject = 'CObject' 
-PythonObject = 'PythonObject' 
- 
-_data_types = dict(CObject=CObject, PythonObject=PythonObject) 
-_filesystemencoding = sys.getfilesystemencoding() or 'UTF-8' 
- 
- 
-# decorators 
- 
-def dont_suppress_errors(function): 
-    "*sigh*, readline" 
-    @functools.wraps(function) 
-    def wrapper(*args, **kwargs): 
-        try: 
-            return function(*args, **kwargs) 
-        except Exception: 
-            traceback.print_exc() 
-            raise 
- 
-    return wrapper 
- 
- 
-def default_selected_gdb_frame(err=True): 
-    def decorator(function): 
-        @functools.wraps(function) 
-        def wrapper(self, frame=None, *args, **kwargs): 
-            try: 
-                frame = frame or gdb.selected_frame() 
-            except RuntimeError: 
-                raise gdb.GdbError("No frame is currently selected.") 
- 
-            if err and frame.name() is None: 
-                raise NoFunctionNameInFrameError() 
- 
-            return function(self, frame, *args, **kwargs) 
-        return wrapper 
-    return decorator 
- 
- 
-def require_cython_frame(function): 
-    @functools.wraps(function) 
-    @require_running_program 
-    def wrapper(self, *args, **kwargs): 
-        frame = kwargs.get('frame') or gdb.selected_frame() 
-        if not self.is_cython_function(frame): 
-            raise gdb.GdbError('Selected frame does not correspond with a ' 
-                               'Cython function we know about.') 
-        return function(self, *args, **kwargs) 
-    return wrapper 
- 
- 
-def dispatch_on_frame(c_command, python_command=None): 
-    def decorator(function): 
-        @functools.wraps(function) 
-        def wrapper(self, *args, **kwargs): 
-            is_cy = self.is_cython_function() 
-            is_py = self.is_python_function() 
- 
-            if is_cy or (is_py and not python_command): 
-                function(self, *args, **kwargs) 
-            elif is_py: 
-                gdb.execute(python_command) 
-            elif self.is_relevant_function(): 
-                gdb.execute(c_command) 
-            else: 
-                raise gdb.GdbError("Not a function cygdb knows about. " 
-                                   "Use the normal GDB commands instead.") 
- 
-        return wrapper 
-    return decorator 
- 
- 
-def require_running_program(function): 
-    @functools.wraps(function) 
-    def wrapper(*args, **kwargs): 
-        try: 
-            gdb.selected_frame() 
-        except RuntimeError: 
-            raise gdb.GdbError("No frame is currently selected.") 
- 
-        return function(*args, **kwargs) 
-    return wrapper 
- 
- 
-def gdb_function_value_to_unicode(function): 
-    @functools.wraps(function) 
-    def wrapper(self, string, *args, **kwargs): 
-        if isinstance(string, gdb.Value): 
-            string = string.string() 
- 
-        return function(self, string, *args, **kwargs) 
-    return wrapper 
- 
- 
-# Classes that represent the debug information 
-# Don't rename the parameters of these classes, they come directly from the XML 
- 
-class CythonModule(object): 
-    def __init__(self, module_name, filename, c_filename): 
-        self.name = module_name 
-        self.filename = filename 
-        self.c_filename = c_filename 
-        self.globals = {} 
-        # {cython_lineno: min(c_linenos)} 
-        self.lineno_cy2c = {} 
-        # {c_lineno: cython_lineno} 
-        self.lineno_c2cy = {} 
-        self.functions = {} 
- 
- 
-class CythonVariable(object): 
- 
-    def __init__(self, name, cname, qualified_name, type, lineno): 
-        self.name = name 
-        self.cname = cname 
-        self.qualified_name = qualified_name 
-        self.type = type 
-        self.lineno = int(lineno) 
- 
- 
-class CythonFunction(CythonVariable): 
-    def __init__(self, 
-                 module, 
-                 name, 
-                 cname, 
-                 pf_cname, 
-                 qualified_name, 
-                 lineno, 
-                 type=CObject, 
-                 is_initmodule_function="False"): 
-        super(CythonFunction, self).__init__(name, 
-                                             cname, 
-                                             qualified_name, 
-                                             type, 
-                                             lineno) 
-        self.module = module 
-        self.pf_cname = pf_cname 
-        self.is_initmodule_function = is_initmodule_function == "True" 
-        self.locals = {} 
-        self.arguments = [] 
-        self.step_into_functions = set() 
- 
- 
-# General purpose classes 
- 
-class CythonBase(object): 
- 
-    @default_selected_gdb_frame(err=False) 
-    def is_cython_function(self, frame): 
-        return frame.name() in self.cy.functions_by_cname 
- 
-    @default_selected_gdb_frame(err=False) 
-    def is_python_function(self, frame): 
-        """ 
-        Tells if a frame is associated with a Python function. 
-        If we can't read the Python frame information, don't regard it as such. 
-        """ 
-        if frame.name() == 'PyEval_EvalFrameEx': 
-            pyframe = libpython.Frame(frame).get_pyop() 
-            return pyframe and not pyframe.is_optimized_out() 
-        return False 
- 
-    @default_selected_gdb_frame() 
-    def get_c_function_name(self, frame): 
-        return frame.name() 
- 
-    @default_selected_gdb_frame() 
-    def get_c_lineno(self, frame): 
-        return frame.find_sal().line 
- 
-    @default_selected_gdb_frame() 
-    def get_cython_function(self, frame): 
-        result = self.cy.functions_by_cname.get(frame.name()) 
-        if result is None: 
-            raise NoCythonFunctionInFrameError() 
- 
-        return result 
- 
-    @default_selected_gdb_frame() 
-    def get_cython_lineno(self, frame): 
-        """ 
-        Get the current Cython line number. Returns 0 if there is no 
-        correspondence between the C and Cython code. 
-        """ 
-        cyfunc = self.get_cython_function(frame) 
-        return cyfunc.module.lineno_c2cy.get(self.get_c_lineno(frame), 0) 
- 
-    @default_selected_gdb_frame() 
-    def get_source_desc(self, frame): 
-        filename = lineno = lexer = None 
-        if self.is_cython_function(frame): 
-            filename = self.get_cython_function(frame).module.filename 
-            lineno = self.get_cython_lineno(frame) 
-            if pygments: 
-                lexer = pygments.lexers.CythonLexer(stripall=False) 
-        elif self.is_python_function(frame): 
-            pyframeobject = libpython.Frame(frame).get_pyop() 
- 
-            if not pyframeobject: 
-                raise gdb.GdbError( 
-                            'Unable to read information on python frame') 
- 
-            filename = pyframeobject.filename() 
-            lineno = pyframeobject.current_line_num() 
- 
-            if pygments: 
-                lexer = pygments.lexers.PythonLexer(stripall=False) 
-        else: 
-            symbol_and_line_obj = frame.find_sal() 
-            if not symbol_and_line_obj or not symbol_and_line_obj.symtab: 
-                filename = None 
-                lineno = 0 
-            else: 
-                filename = symbol_and_line_obj.symtab.fullname() 
-                lineno = symbol_and_line_obj.line 
-                if pygments: 
-                    lexer = pygments.lexers.CLexer(stripall=False) 
- 
-        return SourceFileDescriptor(filename, lexer), lineno 
- 
-    @default_selected_gdb_frame() 
-    def get_source_line(self, frame): 
-        source_desc, lineno = self.get_source_desc() 
-        return source_desc.get_source(lineno) 
- 
-    @default_selected_gdb_frame() 
-    def is_relevant_function(self, frame): 
-        """ 
-        returns whether we care about a frame on the user-level when debugging 
-        Cython code 
-        """ 
-        name = frame.name() 
-        older_frame = frame.older() 
-        if self.is_cython_function(frame) or self.is_python_function(frame): 
-            return True 
-        elif older_frame and self.is_cython_function(older_frame): 
-            # check for direct C function call from a Cython function 
-            cython_func = self.get_cython_function(older_frame) 
-            return name in cython_func.step_into_functions 
- 
-        return False 
- 
-    @default_selected_gdb_frame(err=False) 
-    def print_stackframe(self, frame, index, is_c=False): 
-        """ 
-        Print a C, Cython or Python stack frame and the line of source code 
-        if available. 
-        """ 
-        # do this to prevent the require_cython_frame decorator from 
-        # raising GdbError when calling self.cy.cy_cvalue.invoke() 
-        selected_frame = gdb.selected_frame() 
-        frame.select() 
- 
-        try: 
-            source_desc, lineno = self.get_source_desc(frame) 
-        except NoFunctionNameInFrameError: 
-            print('#%-2d Unknown Frame (compile with -g)' % index) 
-            return 
- 
-        if not is_c and self.is_python_function(frame): 
-            pyframe = libpython.Frame(frame).get_pyop() 
-            if pyframe is None or pyframe.is_optimized_out(): 
-                # print this python function as a C function 
-                return self.print_stackframe(frame, index, is_c=True) 
- 
-            func_name = pyframe.co_name 
-            func_cname = 'PyEval_EvalFrameEx' 
-            func_args = [] 
-        elif self.is_cython_function(frame): 
-            cyfunc = self.get_cython_function(frame) 
-            f = lambda arg: self.cy.cy_cvalue.invoke(arg, frame=frame) 
- 
-            func_name = cyfunc.name 
-            func_cname = cyfunc.cname 
-            func_args = [] # [(arg, f(arg)) for arg in cyfunc.arguments] 
-        else: 
-            source_desc, lineno = self.get_source_desc(frame) 
-            func_name = frame.name() 
-            func_cname = func_name 
-            func_args = [] 
- 
-        try: 
-            gdb_value = gdb.parse_and_eval(func_cname) 
-        except RuntimeError: 
-            func_address = 0 
-        else: 
+try:
+    from lxml import etree
+    have_lxml = True
+except ImportError:
+    have_lxml = False
+    try:
+        # Python 2.5
+        from xml.etree import cElementTree as etree
+    except ImportError:
+        try:
+            # Python 2.5
+            from xml.etree import ElementTree as etree
+        except ImportError:
+            try:
+                # normal cElementTree install
+                import cElementTree as etree
+            except ImportError:
+                # normal ElementTree install
+                import elementtree.ElementTree as etree
+
+try:
+    import pygments.lexers
+    import pygments.formatters
+except ImportError:
+    pygments = None
+    sys.stderr.write("Install pygments for colorized source code.\n")
+
+if hasattr(gdb, 'string_to_argv'):
+    from gdb import string_to_argv
+else:
+    from shlex import split as string_to_argv
+
+from Cython.Debugger import libpython
+
+# C or Python type
+CObject = 'CObject'
+PythonObject = 'PythonObject'
+
+_data_types = dict(CObject=CObject, PythonObject=PythonObject)
+_filesystemencoding = sys.getfilesystemencoding() or 'UTF-8'
+
+
+# decorators
+
+def dont_suppress_errors(function):
+    "*sigh*, readline"
+    @functools.wraps(function)
+    def wrapper(*args, **kwargs):
+        try:
+            return function(*args, **kwargs)
+        except Exception:
+            traceback.print_exc()
+            raise
+
+    return wrapper
+
+
+def default_selected_gdb_frame(err=True):
+    def decorator(function):
+        @functools.wraps(function)
+        def wrapper(self, frame=None, *args, **kwargs):
+            try:
+                frame = frame or gdb.selected_frame()
+            except RuntimeError:
+                raise gdb.GdbError("No frame is currently selected.")
+
+            if err and frame.name() is None:
+                raise NoFunctionNameInFrameError()
+
+            return function(self, frame, *args, **kwargs)
+        return wrapper
+    return decorator
+
+
+def require_cython_frame(function):
+    @functools.wraps(function)
+    @require_running_program
+    def wrapper(self, *args, **kwargs):
+        frame = kwargs.get('frame') or gdb.selected_frame()
+        if not self.is_cython_function(frame):
+            raise gdb.GdbError('Selected frame does not correspond with a '
+                               'Cython function we know about.')
+        return function(self, *args, **kwargs)
+    return wrapper
+
+
+def dispatch_on_frame(c_command, python_command=None):
+    def decorator(function):
+        @functools.wraps(function)
+        def wrapper(self, *args, **kwargs):
+            is_cy = self.is_cython_function()
+            is_py = self.is_python_function()
+
+            if is_cy or (is_py and not python_command):
+                function(self, *args, **kwargs)
+            elif is_py:
+                gdb.execute(python_command)
+            elif self.is_relevant_function():
+                gdb.execute(c_command)
+            else:
+                raise gdb.GdbError("Not a function cygdb knows about. "
+                                   "Use the normal GDB commands instead.")
+
+        return wrapper
+    return decorator
+
+
+def require_running_program(function):
+    @functools.wraps(function)
+    def wrapper(*args, **kwargs):
+        try:
+            gdb.selected_frame()
+        except RuntimeError:
+            raise gdb.GdbError("No frame is currently selected.")
+
+        return function(*args, **kwargs)
+    return wrapper
+
+
+def gdb_function_value_to_unicode(function):
+    @functools.wraps(function)
+    def wrapper(self, string, *args, **kwargs):
+        if isinstance(string, gdb.Value):
+            string = string.string()
+
+        return function(self, string, *args, **kwargs)
+    return wrapper
+
+
+# Classes that represent the debug information
+# Don't rename the parameters of these classes, they come directly from the XML
+
+class CythonModule(object):
+    def __init__(self, module_name, filename, c_filename):
+        self.name = module_name
+        self.filename = filename
+        self.c_filename = c_filename
+        self.globals = {}
+        # {cython_lineno: min(c_linenos)}
+        self.lineno_cy2c = {}
+        # {c_lineno: cython_lineno}
+        self.lineno_c2cy = {}
+        self.functions = {}
+
+
+class CythonVariable(object):
+
+    def __init__(self, name, cname, qualified_name, type, lineno):
+        self.name = name
+        self.cname = cname
+        self.qualified_name = qualified_name
+        self.type = type
+        self.lineno = int(lineno)
+
+
+class CythonFunction(CythonVariable):
+    def __init__(self,
+                 module,
+                 name,
+                 cname,
+                 pf_cname,
+                 qualified_name,
+                 lineno,
+                 type=CObject,
+                 is_initmodule_function="False"):
+        super(CythonFunction, self).__init__(name,
+                                             cname,
+                                             qualified_name,
+                                             type,
+                                             lineno)
+        self.module = module
+        self.pf_cname = pf_cname
+        self.is_initmodule_function = is_initmodule_function == "True"
+        self.locals = {}
+        self.arguments = []
+        self.step_into_functions = set()
+
+
+# General purpose classes
+
+class CythonBase(object):
+
+    @default_selected_gdb_frame(err=False)
+    def is_cython_function(self, frame):
+        return frame.name() in self.cy.functions_by_cname
+
+    @default_selected_gdb_frame(err=False)
+    def is_python_function(self, frame):
+        """
+        Tells if a frame is associated with a Python function.
+        If we can't read the Python frame information, don't regard it as such.
+        """
+        if frame.name() == 'PyEval_EvalFrameEx':
+            pyframe = libpython.Frame(frame).get_pyop()
+            return pyframe and not pyframe.is_optimized_out()
+        return False
+
+    @default_selected_gdb_frame()
+    def get_c_function_name(self, frame):
+        return frame.name()
+
+    @default_selected_gdb_frame()
+    def get_c_lineno(self, frame):
+        return frame.find_sal().line
+
+    @default_selected_gdb_frame()
+    def get_cython_function(self, frame):
+        result = self.cy.functions_by_cname.get(frame.name())
+        if result is None:
+            raise NoCythonFunctionInFrameError()
+
+        return result
+
+    @default_selected_gdb_frame()
+    def get_cython_lineno(self, frame):
+        """
+        Get the current Cython line number. Returns 0 if there is no
+        correspondence between the C and Cython code.
+        """
+        cyfunc = self.get_cython_function(frame)
+        return cyfunc.module.lineno_c2cy.get(self.get_c_lineno(frame), 0)
+
+    @default_selected_gdb_frame()
+    def get_source_desc(self, frame):
+        filename = lineno = lexer = None
+        if self.is_cython_function(frame):
+            filename = self.get_cython_function(frame).module.filename
+            lineno = self.get_cython_lineno(frame)
+            if pygments:
+                lexer = pygments.lexers.CythonLexer(stripall=False)
+        elif self.is_python_function(frame):
+            pyframeobject = libpython.Frame(frame).get_pyop()
+
+            if not pyframeobject:
+                raise gdb.GdbError(
+                            'Unable to read information on python frame')
+
+            filename = pyframeobject.filename()
+            lineno = pyframeobject.current_line_num()
+
+            if pygments:
+                lexer = pygments.lexers.PythonLexer(stripall=False)
+        else:
+            symbol_and_line_obj = frame.find_sal()
+            if not symbol_and_line_obj or not symbol_and_line_obj.symtab:
+                filename = None
+                lineno = 0
+            else:
+                filename = symbol_and_line_obj.symtab.fullname()
+                lineno = symbol_and_line_obj.line
+                if pygments:
+                    lexer = pygments.lexers.CLexer(stripall=False)
+
+        return SourceFileDescriptor(filename, lexer), lineno
+
+    @default_selected_gdb_frame()
+    def get_source_line(self, frame):
+        source_desc, lineno = self.get_source_desc()
+        return source_desc.get_source(lineno)
+
+    @default_selected_gdb_frame()
+    def is_relevant_function(self, frame):
+        """
+        returns whether we care about a frame on the user-level when debugging
+        Cython code
+        """
+        name = frame.name()
+        older_frame = frame.older()
+        if self.is_cython_function(frame) or self.is_python_function(frame):
+            return True
+        elif older_frame and self.is_cython_function(older_frame):
+            # check for direct C function call from a Cython function
+            cython_func = self.get_cython_function(older_frame)
+            return name in cython_func.step_into_functions
+
+        return False
+
+    @default_selected_gdb_frame(err=False)
+    def print_stackframe(self, frame, index, is_c=False):
+        """
+        Print a C, Cython or Python stack frame and the line of source code
+        if available.
+        """
+        # do this to prevent the require_cython_frame decorator from
+        # raising GdbError when calling self.cy.cy_cvalue.invoke()
+        selected_frame = gdb.selected_frame()
+        frame.select()
+
+        try:
+            source_desc, lineno = self.get_source_desc(frame)
+        except NoFunctionNameInFrameError:
+            print('#%-2d Unknown Frame (compile with -g)' % index)
+            return
+
+        if not is_c and self.is_python_function(frame):
+            pyframe = libpython.Frame(frame).get_pyop()
+            if pyframe is None or pyframe.is_optimized_out():
+                # print this python function as a C function
+                return self.print_stackframe(frame, index, is_c=True)
+
+            func_name = pyframe.co_name
+            func_cname = 'PyEval_EvalFrameEx'
+            func_args = []
+        elif self.is_cython_function(frame):
+            cyfunc = self.get_cython_function(frame)
+            f = lambda arg: self.cy.cy_cvalue.invoke(arg, frame=frame)
+
+            func_name = cyfunc.name
+            func_cname = cyfunc.cname
+            func_args = [] # [(arg, f(arg)) for arg in cyfunc.arguments]
+        else:
+            source_desc, lineno = self.get_source_desc(frame)
+            func_name = frame.name()
+            func_cname = func_name
+            func_args = []
+
+        try:
+            gdb_value = gdb.parse_and_eval(func_cname)
+        except RuntimeError:
+            func_address = 0
+        else:
             func_address = gdb_value.address
             if not isinstance(func_address, int):
                 # Seriously? Why is the address not an int?
                 if not isinstance(func_address, (str, bytes)):
                     func_address = str(func_address)
                 func_address = int(func_address.split()[0], 0)
- 
-        a = ', '.join('%s=%s' % (name, val) for name, val in func_args) 
-        sys.stdout.write('#%-2d 0x%016x in %s(%s)' % (index, func_address, func_name, a)) 
- 
-        if source_desc.filename is not None: 
-            sys.stdout.write(' at %s:%s' % (source_desc.filename, lineno)) 
- 
-        sys.stdout.write('\n') 
- 
-        try: 
-            sys.stdout.write('    ' + source_desc.get_source(lineno)) 
-        except gdb.GdbError: 
-            pass 
- 
-        selected_frame.select() 
- 
-    def get_remote_cython_globals_dict(self): 
-        m = gdb.parse_and_eval('__pyx_m') 
- 
-        try: 
-            PyModuleObject = gdb.lookup_type('PyModuleObject') 
-        except RuntimeError: 
-            raise gdb.GdbError(textwrap.dedent("""\ 
-                Unable to lookup type PyModuleObject, did you compile python 
-                with debugging support (-g)?""")) 
- 
-        m = m.cast(PyModuleObject.pointer()) 
-        return m['md_dict'] 
- 
- 
-    def get_cython_globals_dict(self): 
-        """ 
-        Get the Cython globals dict where the remote names are turned into 
-        local strings. 
-        """ 
-        remote_dict = self.get_remote_cython_globals_dict() 
-        pyobject_dict = libpython.PyObjectPtr.from_pyobject_ptr(remote_dict) 
- 
-        result = {} 
-        seen = set() 
+
+        a = ', '.join('%s=%s' % (name, val) for name, val in func_args)
+        sys.stdout.write('#%-2d 0x%016x in %s(%s)' % (index, func_address, func_name, a))
+
+        if source_desc.filename is not None:
+            sys.stdout.write(' at %s:%s' % (source_desc.filename, lineno))
+
+        sys.stdout.write('\n')
+
+        try:
+            sys.stdout.write('    ' + source_desc.get_source(lineno))
+        except gdb.GdbError:
+            pass
+
+        selected_frame.select()
+
+    def get_remote_cython_globals_dict(self):
+        m = gdb.parse_and_eval('__pyx_m')
+
+        try:
+            PyModuleObject = gdb.lookup_type('PyModuleObject')
+        except RuntimeError:
+            raise gdb.GdbError(textwrap.dedent("""\
+                Unable to lookup type PyModuleObject, did you compile python
+                with debugging support (-g)?"""))
+
+        m = m.cast(PyModuleObject.pointer())
+        return m['md_dict']
+
+
+    def get_cython_globals_dict(self):
+        """
+        Get the Cython globals dict where the remote names are turned into
+        local strings.
+        """
+        remote_dict = self.get_remote_cython_globals_dict()
+        pyobject_dict = libpython.PyObjectPtr.from_pyobject_ptr(remote_dict)
+
+        result = {}
+        seen = set()
         for k, v in pyobject_dict.items():
-            result[k.proxyval(seen)] = v 
- 
-        return result 
- 
-    def print_gdb_value(self, name, value, max_name_length=None, prefix=''): 
-        if libpython.pretty_printer_lookup(value): 
-            typename = '' 
-        else: 
-            typename = '(%s) ' % (value.type,) 
- 
-        if max_name_length is None: 
-            print('%s%s = %s%s' % (prefix, name, typename, value)) 
-        else: 
-            print('%s%-*s = %s%s' % (prefix, max_name_length, name, typename, value)) 
- 
-    def is_initialized(self, cython_func, local_name): 
-        cyvar = cython_func.locals[local_name] 
-        cur_lineno = self.get_cython_lineno() 
- 
-        if '->' in cyvar.cname: 
-            # Closed over free variable 
-            if cur_lineno > cython_func.lineno: 
-                if cyvar.type == PythonObject: 
+            result[k.proxyval(seen)] = v
+
+        return result
+
+    def print_gdb_value(self, name, value, max_name_length=None, prefix=''):
+        if libpython.pretty_printer_lookup(value):
+            typename = ''
+        else:
+            typename = '(%s) ' % (value.type,)
+
+        if max_name_length is None:
+            print('%s%s = %s%s' % (prefix, name, typename, value))
+        else:
+            print('%s%-*s = %s%s' % (prefix, max_name_length, name, typename, value))
+
+    def is_initialized(self, cython_func, local_name):
+        cyvar = cython_func.locals[local_name]
+        cur_lineno = self.get_cython_lineno()
+
+        if '->' in cyvar.cname:
+            # Closed over free variable
+            if cur_lineno > cython_func.lineno:
+                if cyvar.type == PythonObject:
                     return int(gdb.parse_and_eval(cyvar.cname))
-                return True 
-            return False 
- 
-        return cur_lineno > cyvar.lineno 
- 
- 
-class SourceFileDescriptor(object): 
-    def __init__(self, filename, lexer, formatter=None): 
-        self.filename = filename 
-        self.lexer = lexer 
-        self.formatter = formatter 
- 
-    def valid(self): 
-        return self.filename is not None 
- 
-    def lex(self, code): 
-        if pygments and self.lexer and parameters.colorize_code: 
-            bg = parameters.terminal_background.value 
-            if self.formatter is None: 
-                formatter = pygments.formatters.TerminalFormatter(bg=bg) 
-            else: 
-                formatter = self.formatter 
- 
-            return pygments.highlight(code, self.lexer, formatter) 
- 
-        return code 
- 
-    def _get_source(self, start, stop, lex_source, mark_line, lex_entire): 
-        with open(self.filename) as f: 
-            # to provide "correct" colouring, the entire code needs to be 
-            # lexed. However, this makes a lot of things terribly slow, so 
-            # we decide not to. Besides, it's unlikely to matter. 
- 
-            if lex_source and lex_entire: 
-                f = self.lex(f.read()).splitlines() 
- 
-            slice = itertools.islice(f, start - 1, stop - 1) 
- 
-            for idx, line in enumerate(slice): 
-                if start + idx == mark_line: 
-                    prefix = '>' 
-                else: 
-                    prefix = ' ' 
- 
-                if lex_source and not lex_entire: 
-                    line = self.lex(line) 
- 
-                yield '%s %4d    %s' % (prefix, start + idx, line.rstrip()) 
- 
-    def get_source(self, start, stop=None, lex_source=True, mark_line=0, 
-                   lex_entire=False): 
-        exc = gdb.GdbError('Unable to retrieve source code') 
- 
-        if not self.filename: 
-            raise exc 
- 
-        start = max(start, 1) 
-        if stop is None: 
-            stop = start + 1 
- 
-        try: 
-            return '\n'.join( 
-                self._get_source(start, stop, lex_source, mark_line, lex_entire)) 
-        except IOError: 
-            raise exc 
- 
- 
-# Errors 
- 
-class CyGDBError(gdb.GdbError): 
-    """ 
+                return True
+            return False
+
+        return cur_lineno > cyvar.lineno
+
+
+class SourceFileDescriptor(object):
+    def __init__(self, filename, lexer, formatter=None):
+        self.filename = filename
+        self.lexer = lexer
+        self.formatter = formatter
+
+    def valid(self):
+        return self.filename is not None
+
+    def lex(self, code):
+        if pygments and self.lexer and parameters.colorize_code:
+            bg = parameters.terminal_background.value
+            if self.formatter is None:
+                formatter = pygments.formatters.TerminalFormatter(bg=bg)
+            else:
+                formatter = self.formatter
+
+            return pygments.highlight(code, self.lexer, formatter)
+
+        return code
+
+    def _get_source(self, start, stop, lex_source, mark_line, lex_entire):
+        with open(self.filename) as f:
+            # to provide "correct" colouring, the entire code needs to be
+            # lexed. However, this makes a lot of things terribly slow, so
+            # we decide not to. Besides, it's unlikely to matter.
+
+            if lex_source and lex_entire:
+                f = self.lex(f.read()).splitlines()
+
+            slice = itertools.islice(f, start - 1, stop - 1)
+
+            for idx, line in enumerate(slice):
+                if start + idx == mark_line:
+                    prefix = '>'
+                else:
+                    prefix = ' '
+
+                if lex_source and not lex_entire:
+                    line = self.lex(line)
+
+                yield '%s %4d    %s' % (prefix, start + idx, line.rstrip())
+
+    def get_source(self, start, stop=None, lex_source=True, mark_line=0,
+                   lex_entire=False):
+        exc = gdb.GdbError('Unable to retrieve source code')
+
+        if not self.filename:
+            raise exc
+
+        start = max(start, 1)
+        if stop is None:
+            stop = start + 1
+
+        try:
+            return '\n'.join(
+                self._get_source(start, stop, lex_source, mark_line, lex_entire))
+        except IOError:
+            raise exc
+
+
+# Errors
+
+class CyGDBError(gdb.GdbError):
+    """
     Base class for Cython-command related errors
-    """ 
- 
-    def __init__(self, *args): 
-        args = args or (self.msg,) 
-        super(CyGDBError, self).__init__(*args) 
- 
- 
-class NoCythonFunctionInFrameError(CyGDBError): 
-    """ 
-    raised when the user requests the current cython function, which is 
-    unavailable 
-    """ 
-    msg = "Current function is a function cygdb doesn't know about" 
- 
- 
-class NoFunctionNameInFrameError(NoCythonFunctionInFrameError): 
-    """ 
-    raised when the name of the C function could not be determined 
-    in the current C stack frame 
-    """ 
-    msg = ('C function name could not be determined in the current C stack ' 
-           'frame') 
- 
- 
-# Parameters 
- 
-class CythonParameter(gdb.Parameter): 
-    """ 
-    Base class for cython parameters 
-    """ 
- 
-    def __init__(self, name, command_class, parameter_class, default=None): 
-        self.show_doc = self.set_doc = self.__class__.__doc__ 
-        super(CythonParameter, self).__init__(name, command_class, 
-                                              parameter_class) 
-        if default is not None: 
-            self.value = default 
- 
-    def __bool__(self): 
-        return bool(self.value) 
- 
-    __nonzero__ = __bool__  # Python 2 
- 
- 
- 
-class CompleteUnqualifiedFunctionNames(CythonParameter): 
-    """ 
-    Have 'cy break' complete unqualified function or method names. 
-    """ 
- 
- 
-class ColorizeSourceCode(CythonParameter): 
-    """ 
-    Tell cygdb whether to colorize source code. 
-    """ 
- 
- 
-class TerminalBackground(CythonParameter): 
-    """ 
-    Tell cygdb about the user's terminal background (light or dark). 
-    """ 
- 
- 
-class CythonParameters(object): 
-    """ 
-    Simple container class that might get more functionality in the distant 
-    future (mostly to remind us that we're dealing with parameters). 
-    """ 
- 
-    def __init__(self): 
-        self.complete_unqualified = CompleteUnqualifiedFunctionNames( 
-            'cy_complete_unqualified', 
-            gdb.COMMAND_BREAKPOINTS, 
-            gdb.PARAM_BOOLEAN, 
-            True) 
-        self.colorize_code = ColorizeSourceCode( 
-            'cy_colorize_code', 
-            gdb.COMMAND_FILES, 
-            gdb.PARAM_BOOLEAN, 
-            True) 
-        self.terminal_background = TerminalBackground( 
-            'cy_terminal_background_color', 
-            gdb.COMMAND_FILES, 
-            gdb.PARAM_STRING, 
-            "dark") 
- 
-parameters = CythonParameters() 
- 
- 
-# Commands 
- 
-class CythonCommand(gdb.Command, CythonBase): 
-    """ 
-    Base class for Cython commands 
-    """ 
- 
-    command_class = gdb.COMMAND_NONE 
- 
-    @classmethod 
-    def _register(cls, clsname, args, kwargs): 
-        if not hasattr(cls, 'completer_class'): 
-            return cls(clsname, cls.command_class, *args, **kwargs) 
-        else: 
-            return cls(clsname, cls.command_class, cls.completer_class, 
-                       *args, **kwargs) 
- 
-    @classmethod 
-    def register(cls, *args, **kwargs): 
-        alias = getattr(cls, 'alias', None) 
-        if alias: 
-            cls._register(cls.alias, args, kwargs) 
- 
-        return cls._register(cls.name, args, kwargs) 
- 
- 
-class CyCy(CythonCommand): 
-    """ 
-    Invoke a Cython command. Available commands are: 
- 
-        cy import 
-        cy break 
-        cy step 
-        cy next 
-        cy run 
-        cy cont 
-        cy finish 
-        cy up 
-        cy down 
-        cy select 
-        cy bt / cy backtrace 
-        cy list 
-        cy print 
-        cy set 
-        cy locals 
-        cy globals 
-        cy exec 
-    """ 
- 
-    name = 'cy' 
-    command_class = gdb.COMMAND_NONE 
-    completer_class = gdb.COMPLETE_COMMAND 
- 
-    def __init__(self, name, command_class, completer_class): 
-        # keep the signature 2.5 compatible (i.e. do not use f(*a, k=v) 
-        super(CythonCommand, self).__init__(name, command_class, 
-                                            completer_class, prefix=True) 
- 
-        commands = dict( 
-            # GDB commands 
-            import_ = CyImport.register(), 
-            break_ = CyBreak.register(), 
-            step = CyStep.register(), 
-            next = CyNext.register(), 
-            run = CyRun.register(), 
-            cont = CyCont.register(), 
-            finish = CyFinish.register(), 
-            up = CyUp.register(), 
-            down = CyDown.register(), 
-            select = CySelect.register(), 
-            bt = CyBacktrace.register(), 
-            list = CyList.register(), 
-            print_ = CyPrint.register(), 
-            locals = CyLocals.register(), 
-            globals = CyGlobals.register(), 
-            exec_ = libpython.FixGdbCommand('cy exec', '-cy-exec'), 
-            _exec = CyExec.register(), 
-            set = CySet.register(), 
- 
-            # GDB functions 
-            cy_cname = CyCName('cy_cname'), 
-            cy_cvalue = CyCValue('cy_cvalue'), 
-            cy_lineno = CyLine('cy_lineno'), 
-            cy_eval = CyEval('cy_eval'), 
-        ) 
- 
-        for command_name, command in commands.items(): 
-            command.cy = self 
-            setattr(self, command_name, command) 
- 
-        self.cy = self 
- 
-        # Cython module namespace 
-        self.cython_namespace = {} 
- 
-        # maps (unique) qualified function names (e.g. 
-        # cythonmodule.ClassName.method_name) to the CythonFunction object 
-        self.functions_by_qualified_name = {} 
- 
-        # unique cnames of Cython functions 
-        self.functions_by_cname = {} 
- 
-        # map function names like method_name to a list of all such 
-        # CythonFunction objects 
-        self.functions_by_name = collections.defaultdict(list) 
- 
- 
-class CyImport(CythonCommand): 
-    """ 
-    Import debug information outputted by the Cython compiler 
-    Example: cy import FILE... 
-    """ 
- 
-    name = 'cy import' 
-    command_class = gdb.COMMAND_STATUS 
-    completer_class = gdb.COMPLETE_FILENAME 
- 
-    def invoke(self, args, from_tty): 
+    """
+
+    def __init__(self, *args):
+        args = args or (self.msg,)
+        super(CyGDBError, self).__init__(*args)
+
+
+class NoCythonFunctionInFrameError(CyGDBError):
+    """
+    raised when the user requests the current cython function, which is
+    unavailable
+    """
+    msg = "Current function is a function cygdb doesn't know about"
+
+
+class NoFunctionNameInFrameError(NoCythonFunctionInFrameError):
+    """
+    raised when the name of the C function could not be determined
+    in the current C stack frame
+    """
+    msg = ('C function name could not be determined in the current C stack '
+           'frame')
+
+
+# Parameters
+
+class CythonParameter(gdb.Parameter):
+    """
+    Base class for cython parameters
+    """
+
+    def __init__(self, name, command_class, parameter_class, default=None):
+        self.show_doc = self.set_doc = self.__class__.__doc__
+        super(CythonParameter, self).__init__(name, command_class,
+                                              parameter_class)
+        if default is not None:
+            self.value = default
+
+    def __bool__(self):
+        return bool(self.value)
+
+    __nonzero__ = __bool__  # Python 2
+
+
+
+class CompleteUnqualifiedFunctionNames(CythonParameter):
+    """
+    Have 'cy break' complete unqualified function or method names.
+    """
+
+
+class ColorizeSourceCode(CythonParameter):
+    """
+    Tell cygdb whether to colorize source code.
+    """
+
+
+class TerminalBackground(CythonParameter):
+    """
+    Tell cygdb about the user's terminal background (light or dark).
+    """
+
+
+class CythonParameters(object):
+    """
+    Simple container class that might get more functionality in the distant
+    future (mostly to remind us that we're dealing with parameters).
+    """
+
+    def __init__(self):
+        self.complete_unqualified = CompleteUnqualifiedFunctionNames(
+            'cy_complete_unqualified',
+            gdb.COMMAND_BREAKPOINTS,
+            gdb.PARAM_BOOLEAN,
+            True)
+        self.colorize_code = ColorizeSourceCode(
+            'cy_colorize_code',
+            gdb.COMMAND_FILES,
+            gdb.PARAM_BOOLEAN,
+            True)
+        self.terminal_background = TerminalBackground(
+            'cy_terminal_background_color',
+            gdb.COMMAND_FILES,
+            gdb.PARAM_STRING,
+            "dark")
+
+parameters = CythonParameters()
+
+
+# Commands
+
+class CythonCommand(gdb.Command, CythonBase):
+    """
+    Base class for Cython commands
+    """
+
+    command_class = gdb.COMMAND_NONE
+
+    @classmethod
+    def _register(cls, clsname, args, kwargs):
+        if not hasattr(cls, 'completer_class'):
+            return cls(clsname, cls.command_class, *args, **kwargs)
+        else:
+            return cls(clsname, cls.command_class, cls.completer_class,
+                       *args, **kwargs)
+
+    @classmethod
+    def register(cls, *args, **kwargs):
+        alias = getattr(cls, 'alias', None)
+        if alias:
+            cls._register(cls.alias, args, kwargs)
+
+        return cls._register(cls.name, args, kwargs)
+
+
+class CyCy(CythonCommand):
+    """
+    Invoke a Cython command. Available commands are:
+
+        cy import
+        cy break
+        cy step
+        cy next
+        cy run
+        cy cont
+        cy finish
+        cy up
+        cy down
+        cy select
+        cy bt / cy backtrace
+        cy list
+        cy print
+        cy set
+        cy locals
+        cy globals
+        cy exec
+    """
+
+    name = 'cy'
+    command_class = gdb.COMMAND_NONE
+    completer_class = gdb.COMPLETE_COMMAND
+
+    def __init__(self, name, command_class, completer_class):
+        # keep the signature 2.5 compatible (i.e. do not use f(*a, k=v)
+        super(CythonCommand, self).__init__(name, command_class,
+                                            completer_class, prefix=True)
+
+        commands = dict(
+            # GDB commands
+            import_ = CyImport.register(),
+            break_ = CyBreak.register(),
+            step = CyStep.register(),
+            next = CyNext.register(),
+            run = CyRun.register(),
+            cont = CyCont.register(),
+            finish = CyFinish.register(),
+            up = CyUp.register(),
+            down = CyDown.register(),
+            select = CySelect.register(),
+            bt = CyBacktrace.register(),
+            list = CyList.register(),
+            print_ = CyPrint.register(),
+            locals = CyLocals.register(),
+            globals = CyGlobals.register(),
+            exec_ = libpython.FixGdbCommand('cy exec', '-cy-exec'),
+            _exec = CyExec.register(),
+            set = CySet.register(),
+
+            # GDB functions
+            cy_cname = CyCName('cy_cname'),
+            cy_cvalue = CyCValue('cy_cvalue'),
+            cy_lineno = CyLine('cy_lineno'),
+            cy_eval = CyEval('cy_eval'),
+        )
+
+        for command_name, command in commands.items():
+            command.cy = self
+            setattr(self, command_name, command)
+
+        self.cy = self
+
+        # Cython module namespace
+        self.cython_namespace = {}
+
+        # maps (unique) qualified function names (e.g.
+        # cythonmodule.ClassName.method_name) to the CythonFunction object
+        self.functions_by_qualified_name = {}
+
+        # unique cnames of Cython functions
+        self.functions_by_cname = {}
+
+        # map function names like method_name to a list of all such
+        # CythonFunction objects
+        self.functions_by_name = collections.defaultdict(list)
+
+
+class CyImport(CythonCommand):
+    """
+    Import debug information outputted by the Cython compiler
+    Example: cy import FILE...
+    """
+
+    name = 'cy import'
+    command_class = gdb.COMMAND_STATUS
+    completer_class = gdb.COMPLETE_FILENAME
+
+    def invoke(self, args, from_tty):
         if isinstance(args, BYTES):
             args = args.decode(_filesystemencoding)
-        for arg in string_to_argv(args): 
-            try: 
-                f = open(arg) 
-            except OSError as e: 
-                raise gdb.GdbError('Unable to open file %r: %s' % (args, e.args[1])) 
- 
-            t = etree.parse(f) 
- 
-            for module in t.getroot(): 
-                cython_module = CythonModule(**module.attrib) 
-                self.cy.cython_namespace[cython_module.name] = cython_module 
- 
-                for variable in module.find('Globals'): 
-                    d = variable.attrib 
-                    cython_module.globals[d['name']] = CythonVariable(**d) 
- 
-                for function in module.find('Functions'): 
-                    cython_function = CythonFunction(module=cython_module, 
-                                                     **function.attrib) 
- 
-                    # update the global function mappings 
-                    name = cython_function.name 
-                    qname = cython_function.qualified_name 
- 
-                    self.cy.functions_by_name[name].append(cython_function) 
-                    self.cy.functions_by_qualified_name[ 
-                        cython_function.qualified_name] = cython_function 
-                    self.cy.functions_by_cname[ 
-                        cython_function.cname] = cython_function 
- 
-                    d = cython_module.functions[qname] = cython_function 
- 
-                    for local in function.find('Locals'): 
-                        d = local.attrib 
-                        cython_function.locals[d['name']] = CythonVariable(**d) 
- 
-                    for step_into_func in function.find('StepIntoFunctions'): 
-                        d = step_into_func.attrib 
-                        cython_function.step_into_functions.add(d['name']) 
- 
-                    cython_function.arguments.extend( 
-                        funcarg.tag for funcarg in function.find('Arguments')) 
- 
-                for marker in module.find('LineNumberMapping'): 
-                    cython_lineno = int(marker.attrib['cython_lineno']) 
+        for arg in string_to_argv(args):
+            try:
+                f = open(arg)
+            except OSError as e:
+                raise gdb.GdbError('Unable to open file %r: %s' % (args, e.args[1]))
+
+            t = etree.parse(f)
+
+            for module in t.getroot():
+                cython_module = CythonModule(**module.attrib)
+                self.cy.cython_namespace[cython_module.name] = cython_module
+
+                for variable in module.find('Globals'):
+                    d = variable.attrib
+                    cython_module.globals[d['name']] = CythonVariable(**d)
+
+                for function in module.find('Functions'):
+                    cython_function = CythonFunction(module=cython_module,
+                                                     **function.attrib)
+
+                    # update the global function mappings
+                    name = cython_function.name
+                    qname = cython_function.qualified_name
+
+                    self.cy.functions_by_name[name].append(cython_function)
+                    self.cy.functions_by_qualified_name[
+                        cython_function.qualified_name] = cython_function
+                    self.cy.functions_by_cname[
+                        cython_function.cname] = cython_function
+
+                    d = cython_module.functions[qname] = cython_function
+
+                    for local in function.find('Locals'):
+                        d = local.attrib
+                        cython_function.locals[d['name']] = CythonVariable(**d)
+
+                    for step_into_func in function.find('StepIntoFunctions'):
+                        d = step_into_func.attrib
+                        cython_function.step_into_functions.add(d['name'])
+
+                    cython_function.arguments.extend(
+                        funcarg.tag for funcarg in function.find('Arguments'))
+
+                for marker in module.find('LineNumberMapping'):
+                    cython_lineno = int(marker.attrib['cython_lineno'])
                     c_linenos = list(map(int, marker.attrib['c_linenos'].split()))
-                    cython_module.lineno_cy2c[cython_lineno] = min(c_linenos) 
-                    for c_lineno in c_linenos: 
-                        cython_module.lineno_c2cy[c_lineno] = cython_lineno 
- 
- 
-class CyBreak(CythonCommand): 
-    """ 
-    Set a breakpoint for Cython code using Cython qualified name notation, e.g.: 
- 
-        cy break cython_modulename.ClassName.method_name... 
- 
-    or normal notation: 
- 
-        cy break function_or_method_name... 
- 
-    or for a line number: 
- 
-        cy break cython_module:lineno... 
- 
-    Set a Python breakpoint: 
-        Break on any function or method named 'func' in module 'modname' 
- 
-            cy break -p modname.func... 
- 
-        Break on any function or method named 'func' 
- 
-            cy break -p func... 
-    """ 
- 
-    name = 'cy break' 
-    command_class = gdb.COMMAND_BREAKPOINTS 
- 
-    def _break_pyx(self, name): 
-        modulename, _, lineno = name.partition(':') 
-        lineno = int(lineno) 
-        if modulename: 
-            cython_module = self.cy.cython_namespace[modulename] 
-        else: 
-            cython_module = self.get_cython_function().module 
- 
-        if lineno in cython_module.lineno_cy2c: 
-            c_lineno = cython_module.lineno_cy2c[lineno] 
-            breakpoint = '%s:%s' % (cython_module.c_filename, c_lineno) 
-            gdb.execute('break ' + breakpoint) 
-        else: 
-            raise gdb.GdbError("Not a valid line number. " 
-                               "Does it contain actual code?") 
- 
-    def _break_funcname(self, funcname): 
-        func = self.cy.functions_by_qualified_name.get(funcname) 
- 
-        if func and func.is_initmodule_function: 
-            func = None 
- 
-        break_funcs = [func] 
- 
-        if not func: 
-            funcs = self.cy.functions_by_name.get(funcname) or [] 
-            funcs = [f for f in funcs if not f.is_initmodule_function] 
- 
-            if not funcs: 
-                gdb.execute('break ' + funcname) 
-                return 
- 
-            if len(funcs) > 1: 
-                # multiple functions, let the user pick one 
-                print('There are multiple such functions:') 
-                for idx, func in enumerate(funcs): 
-                    print('%3d) %s' % (idx, func.qualified_name)) 
- 
-                while True: 
-                    try: 
+                    cython_module.lineno_cy2c[cython_lineno] = min(c_linenos)
+                    for c_lineno in c_linenos:
+                        cython_module.lineno_c2cy[c_lineno] = cython_lineno
+
+
+class CyBreak(CythonCommand):
+    """
+    Set a breakpoint for Cython code using Cython qualified name notation, e.g.:
+
+        cy break cython_modulename.ClassName.method_name...
+
+    or normal notation:
+
+        cy break function_or_method_name...
+
+    or for a line number:
+
+        cy break cython_module:lineno...
+
+    Set a Python breakpoint:
+        Break on any function or method named 'func' in module 'modname'
+
+            cy break -p modname.func...
+
+        Break on any function or method named 'func'
+
+            cy break -p func...
+    """
+
+    name = 'cy break'
+    command_class = gdb.COMMAND_BREAKPOINTS
+
+    def _break_pyx(self, name):
+        modulename, _, lineno = name.partition(':')
+        lineno = int(lineno)
+        if modulename:
+            cython_module = self.cy.cython_namespace[modulename]
+        else:
+            cython_module = self.get_cython_function().module
+
+        if lineno in cython_module.lineno_cy2c:
+            c_lineno = cython_module.lineno_cy2c[lineno]
+            breakpoint = '%s:%s' % (cython_module.c_filename, c_lineno)
+            gdb.execute('break ' + breakpoint)
+        else:
+            raise gdb.GdbError("Not a valid line number. "
+                               "Does it contain actual code?")
+
+    def _break_funcname(self, funcname):
+        func = self.cy.functions_by_qualified_name.get(funcname)
+
+        if func and func.is_initmodule_function:
+            func = None
+
+        break_funcs = [func]
+
+        if not func:
+            funcs = self.cy.functions_by_name.get(funcname) or []
+            funcs = [f for f in funcs if not f.is_initmodule_function]
+
+            if not funcs:
+                gdb.execute('break ' + funcname)
+                return
+
+            if len(funcs) > 1:
+                # multiple functions, let the user pick one
+                print('There are multiple such functions:')
+                for idx, func in enumerate(funcs):
+                    print('%3d) %s' % (idx, func.qualified_name))
+
+                while True:
+                    try:
                         result = input(
-                            "Select a function, press 'a' for all " 
-                            "functions or press 'q' or '^D' to quit: ") 
-                    except EOFError: 
-                        return 
-                    else: 
-                        if result.lower() == 'q': 
-                            return 
-                        elif result.lower() == 'a': 
-                            break_funcs = funcs 
-                            break 
-                        elif (result.isdigit() and 
-                                0 <= int(result) < len(funcs)): 
-                            break_funcs = [funcs[int(result)]] 
-                            break 
-                        else: 
-                            print('Not understood...') 
-            else: 
-                break_funcs = [funcs[0]] 
- 
-        for func in break_funcs: 
-            gdb.execute('break %s' % func.cname) 
-            if func.pf_cname: 
-                gdb.execute('break %s' % func.pf_cname) 
- 
-    def invoke(self, function_names, from_tty): 
+                            "Select a function, press 'a' for all "
+                            "functions or press 'q' or '^D' to quit: ")
+                    except EOFError:
+                        return
+                    else:
+                        if result.lower() == 'q':
+                            return
+                        elif result.lower() == 'a':
+                            break_funcs = funcs
+                            break
+                        elif (result.isdigit() and
+                                0 <= int(result) < len(funcs)):
+                            break_funcs = [funcs[int(result)]]
+                            break
+                        else:
+                            print('Not understood...')
+            else:
+                break_funcs = [funcs[0]]
+
+        for func in break_funcs:
+            gdb.execute('break %s' % func.cname)
+            if func.pf_cname:
+                gdb.execute('break %s' % func.pf_cname)
+
+    def invoke(self, function_names, from_tty):
         if isinstance(function_names, BYTES):
             function_names = function_names.decode(_filesystemencoding)
         argv = string_to_argv(function_names)
-        if function_names.startswith('-p'): 
-            argv = argv[1:] 
-            python_breakpoints = True 
-        else: 
-            python_breakpoints = False 
- 
-        for funcname in argv: 
-            if python_breakpoints: 
-                gdb.execute('py-break %s' % funcname) 
-            elif ':' in funcname: 
-                self._break_pyx(funcname) 
-            else: 
-                self._break_funcname(funcname) 
- 
-    @dont_suppress_errors 
-    def complete(self, text, word): 
-        # Filter init-module functions (breakpoints can be set using 
-        # modulename:linenumber). 
+        if function_names.startswith('-p'):
+            argv = argv[1:]
+            python_breakpoints = True
+        else:
+            python_breakpoints = False
+
+        for funcname in argv:
+            if python_breakpoints:
+                gdb.execute('py-break %s' % funcname)
+            elif ':' in funcname:
+                self._break_pyx(funcname)
+            else:
+                self._break_funcname(funcname)
+
+    @dont_suppress_errors
+    def complete(self, text, word):
+        # Filter init-module functions (breakpoints can be set using
+        # modulename:linenumber).
         names =  [n for n, L in self.cy.functions_by_name.items()
                   if any(not f.is_initmodule_function for f in L)]
         qnames = [n for n, f in self.cy.functions_by_qualified_name.items()
                   if not f.is_initmodule_function]
- 
-        if parameters.complete_unqualified: 
-            all_names = itertools.chain(qnames, names) 
-        else: 
-            all_names = qnames 
- 
-        words = text.strip().split() 
-        if not words or '.' not in words[-1]: 
-            # complete unqualified 
-            seen = set(text[:-len(word)].split()) 
-            return [n for n in all_names 
-                          if n.startswith(word) and n not in seen] 
- 
-        # complete qualified name 
-        lastword = words[-1] 
-        compl = [n for n in qnames if n.startswith(lastword)] 
- 
-        if len(lastword) > len(word): 
-            # readline sees something (e.g. a '.') as a word boundary, so don't 
-            # "recomplete" this prefix 
-            strip_prefix_length = len(lastword) - len(word) 
-            compl = [n[strip_prefix_length:] for n in compl] 
- 
-        return compl 
- 
- 
-class CythonInfo(CythonBase, libpython.PythonInfo): 
-    """ 
-    Implementation of the interface dictated by libpython.LanguageInfo. 
-    """ 
- 
-    def lineno(self, frame): 
-        # Take care of the Python and Cython levels. We need to care for both 
+
+        if parameters.complete_unqualified:
+            all_names = itertools.chain(qnames, names)
+        else:
+            all_names = qnames
+
+        words = text.strip().split()
+        if not words or '.' not in words[-1]:
+            # complete unqualified
+            seen = set(text[:-len(word)].split())
+            return [n for n in all_names
+                          if n.startswith(word) and n not in seen]
+
+        # complete qualified name
+        lastword = words[-1]
+        compl = [n for n in qnames if n.startswith(lastword)]
+
+        if len(lastword) > len(word):
+            # readline sees something (e.g. a '.') as a word boundary, so don't
+            # "recomplete" this prefix
+            strip_prefix_length = len(lastword) - len(word)
+            compl = [n[strip_prefix_length:] for n in compl]
+
+        return compl
+
+
+class CythonInfo(CythonBase, libpython.PythonInfo):
+    """
+    Implementation of the interface dictated by libpython.LanguageInfo.
+    """
+
+    def lineno(self, frame):
+        # Take care of the Python and Cython levels. We need to care for both
         # as we can't simply dispatch to 'py-step', since that would work for
-        # stepping through Python code, but it would not step back into Cython- 
-        # related code. The C level should be dispatched to the 'step' command. 
-        if self.is_cython_function(frame): 
-            return self.get_cython_lineno(frame) 
-        return super(CythonInfo, self).lineno(frame) 
- 
-    def get_source_line(self, frame): 
-        try: 
-            line = super(CythonInfo, self).get_source_line(frame) 
-        except gdb.GdbError: 
-            return None 
-        else: 
-            return line.strip() or None 
- 
-    def exc_info(self, frame): 
-        if self.is_python_function: 
-            return super(CythonInfo, self).exc_info(frame) 
- 
-    def runtime_break_functions(self): 
-        if self.is_cython_function(): 
-            return self.get_cython_function().step_into_functions 
-        return () 
- 
-    def static_break_functions(self): 
-        result = ['PyEval_EvalFrameEx'] 
-        result.extend(self.cy.functions_by_cname) 
-        return result 
- 
- 
-class CythonExecutionControlCommand(CythonCommand, 
-                                    libpython.ExecutionControlCommandBase): 
- 
-    @classmethod 
-    def register(cls): 
-        return cls(cls.name, cython_info) 
- 
- 
-class CyStep(CythonExecutionControlCommand, libpython.PythonStepperMixin): 
-    "Step through Cython, Python or C code." 
- 
-    name = 'cy -step' 
-    stepinto = True 
- 
-    def invoke(self, args, from_tty): 
-        if self.is_python_function(): 
-            self.python_step(self.stepinto) 
-        elif not self.is_cython_function(): 
-            if self.stepinto: 
-                command = 'step' 
-            else: 
-                command = 'next' 
- 
-            self.finish_executing(gdb.execute(command, to_string=True)) 
-        else: 
-            self.step(stepinto=self.stepinto) 
- 
- 
-class CyNext(CyStep): 
-    "Step-over Cython, Python or C code." 
- 
-    name = 'cy -next' 
-    stepinto = False 
- 
- 
-class CyRun(CythonExecutionControlCommand): 
-    """ 
-    Run a Cython program. This is like the 'run' command, except that it 
-    displays Cython or Python source lines as well 
-    """ 
- 
-    name = 'cy run' 
- 
-    invoke = CythonExecutionControlCommand.run 
- 
- 
-class CyCont(CythonExecutionControlCommand): 
-    """ 
-    Continue a Cython program. This is like the 'run' command, except that it 
-    displays Cython or Python source lines as well. 
-    """ 
- 
-    name = 'cy cont' 
-    invoke = CythonExecutionControlCommand.cont 
- 
- 
-class CyFinish(CythonExecutionControlCommand): 
-    """ 
-    Execute until the function returns. 
-    """ 
-    name = 'cy finish' 
- 
-    invoke = CythonExecutionControlCommand.finish 
- 
- 
-class CyUp(CythonCommand): 
-    """ 
-    Go up a Cython, Python or relevant C frame. 
-    """ 
-    name = 'cy up' 
-    _command = 'up' 
- 
-    def invoke(self, *args): 
-        try: 
-            gdb.execute(self._command, to_string=True) 
-            while not self.is_relevant_function(gdb.selected_frame()): 
-                gdb.execute(self._command, to_string=True) 
-        except RuntimeError as e: 
-            raise gdb.GdbError(*e.args) 
- 
-        frame = gdb.selected_frame() 
-        index = 0 
-        while frame: 
-            frame = frame.older() 
-            index += 1 
- 
-        self.print_stackframe(index=index - 1) 
- 
- 
-class CyDown(CyUp): 
-    """ 
-    Go down a Cython, Python or relevant C frame. 
-    """ 
- 
-    name = 'cy down' 
-    _command = 'down' 
- 
- 
-class CySelect(CythonCommand): 
-    """ 
-    Select a frame. Use frame numbers as listed in `cy backtrace`. 
-    This command is useful because `cy backtrace` prints a reversed backtrace. 
-    """ 
- 
-    name = 'cy select' 
- 
-    def invoke(self, stackno, from_tty): 
-        try: 
-            stackno = int(stackno) 
-        except ValueError: 
-            raise gdb.GdbError("Not a valid number: %r" % (stackno,)) 
- 
-        frame = gdb.selected_frame() 
-        while frame.newer(): 
-            frame = frame.newer() 
- 
-        stackdepth = libpython.stackdepth(frame) 
- 
-        try: 
-            gdb.execute('select %d' % (stackdepth - stackno - 1,)) 
-        except RuntimeError as e: 
-            raise gdb.GdbError(*e.args) 
- 
- 
-class CyBacktrace(CythonCommand): 
-    'Print the Cython stack' 
- 
-    name = 'cy bt' 
-    alias = 'cy backtrace' 
-    command_class = gdb.COMMAND_STACK 
-    completer_class = gdb.COMPLETE_NONE 
- 
-    @require_running_program 
-    def invoke(self, args, from_tty): 
-        # get the first frame 
-        frame = gdb.selected_frame() 
-        while frame.older(): 
-            frame = frame.older() 
- 
-        print_all = args == '-a' 
- 
-        index = 0 
-        while frame: 
-            try: 
-                is_relevant = self.is_relevant_function(frame) 
-            except CyGDBError: 
-                is_relevant = False 
- 
-            if print_all or is_relevant: 
-                self.print_stackframe(frame, index) 
- 
-            index += 1 
-            frame = frame.newer() 
- 
- 
-class CyList(CythonCommand): 
-    """ 
-    List Cython source code. To disable to customize colouring see the cy_* 
-    parameters. 
-    """ 
- 
-    name = 'cy list' 
-    command_class = gdb.COMMAND_FILES 
-    completer_class = gdb.COMPLETE_NONE 
- 
-    # @dispatch_on_frame(c_command='list') 
-    def invoke(self, _, from_tty): 
-        sd, lineno = self.get_source_desc() 
-        source = sd.get_source(lineno - 5, lineno + 5, mark_line=lineno, 
-                               lex_entire=True) 
-        print(source) 
- 
- 
-class CyPrint(CythonCommand): 
-    """ 
-    Print a Cython variable using 'cy-print x' or 'cy-print module.function.x' 
-    """ 
- 
-    name = 'cy print' 
-    command_class = gdb.COMMAND_DATA 
- 
-    def invoke(self, name, from_tty, max_name_length=None): 
-        if self.is_python_function(): 
-            return gdb.execute('py-print ' + name) 
-        elif self.is_cython_function(): 
-            value = self.cy.cy_cvalue.invoke(name.lstrip('*')) 
-            for c in name: 
-                if c == '*': 
-                    value = value.dereference() 
-                else: 
-                    break 
- 
-            self.print_gdb_value(name, value, max_name_length) 
-        else: 
-            gdb.execute('print ' + name) 
- 
-    def complete(self): 
-        if self.is_cython_function(): 
-            f = self.get_cython_function() 
-            return list(itertools.chain(f.locals, f.globals)) 
-        else: 
-            return [] 
- 
- 
-sortkey = lambda item: item[0].lower() 
- 
- 
-class CyLocals(CythonCommand): 
-    """ 
-    List the locals from the current Cython frame. 
-    """ 
- 
-    name = 'cy locals' 
-    command_class = gdb.COMMAND_STACK 
-    completer_class = gdb.COMPLETE_NONE 
- 
-    @dispatch_on_frame(c_command='info locals', python_command='py-locals') 
-    def invoke(self, args, from_tty): 
-        cython_function = self.get_cython_function() 
- 
-        if cython_function.is_initmodule_function: 
-            self.cy.globals.invoke(args, from_tty) 
-            return 
- 
-        local_cython_vars = cython_function.locals 
-        max_name_length = len(max(local_cython_vars, key=len)) 
+        # stepping through Python code, but it would not step back into Cython-
+        # related code. The C level should be dispatched to the 'step' command.
+        if self.is_cython_function(frame):
+            return self.get_cython_lineno(frame)
+        return super(CythonInfo, self).lineno(frame)
+
+    def get_source_line(self, frame):
+        try:
+            line = super(CythonInfo, self).get_source_line(frame)
+        except gdb.GdbError:
+            return None
+        else:
+            return line.strip() or None
+
+    def exc_info(self, frame):
+        if self.is_python_function:
+            return super(CythonInfo, self).exc_info(frame)
+
+    def runtime_break_functions(self):
+        if self.is_cython_function():
+            return self.get_cython_function().step_into_functions
+        return ()
+
+    def static_break_functions(self):
+        result = ['PyEval_EvalFrameEx']
+        result.extend(self.cy.functions_by_cname)
+        return result
+
+
+class CythonExecutionControlCommand(CythonCommand,
+                                    libpython.ExecutionControlCommandBase):
+
+    @classmethod
+    def register(cls):
+        return cls(cls.name, cython_info)
+
+
+class CyStep(CythonExecutionControlCommand, libpython.PythonStepperMixin):
+    "Step through Cython, Python or C code."
+
+    name = 'cy -step'
+    stepinto = True
+
+    def invoke(self, args, from_tty):
+        if self.is_python_function():
+            self.python_step(self.stepinto)
+        elif not self.is_cython_function():
+            if self.stepinto:
+                command = 'step'
+            else:
+                command = 'next'
+
+            self.finish_executing(gdb.execute(command, to_string=True))
+        else:
+            self.step(stepinto=self.stepinto)
+
+
+class CyNext(CyStep):
+    "Step-over Cython, Python or C code."
+
+    name = 'cy -next'
+    stepinto = False
+
+
+class CyRun(CythonExecutionControlCommand):
+    """
+    Run a Cython program. This is like the 'run' command, except that it
+    displays Cython or Python source lines as well
+    """
+
+    name = 'cy run'
+
+    invoke = CythonExecutionControlCommand.run
+
+
+class CyCont(CythonExecutionControlCommand):
+    """
+    Continue a Cython program. This is like the 'run' command, except that it
+    displays Cython or Python source lines as well.
+    """
+
+    name = 'cy cont'
+    invoke = CythonExecutionControlCommand.cont
+
+
+class CyFinish(CythonExecutionControlCommand):
+    """
+    Execute until the function returns.
+    """
+    name = 'cy finish'
+
+    invoke = CythonExecutionControlCommand.finish
+
+
+class CyUp(CythonCommand):
+    """
+    Go up a Cython, Python or relevant C frame.
+    """
+    name = 'cy up'
+    _command = 'up'
+
+    def invoke(self, *args):
+        try:
+            gdb.execute(self._command, to_string=True)
+            while not self.is_relevant_function(gdb.selected_frame()):
+                gdb.execute(self._command, to_string=True)
+        except RuntimeError as e:
+            raise gdb.GdbError(*e.args)
+
+        frame = gdb.selected_frame()
+        index = 0
+        while frame:
+            frame = frame.older()
+            index += 1
+
+        self.print_stackframe(index=index - 1)
+
+
+class CyDown(CyUp):
+    """
+    Go down a Cython, Python or relevant C frame.
+    """
+
+    name = 'cy down'
+    _command = 'down'
+
+
+class CySelect(CythonCommand):
+    """
+    Select a frame. Use frame numbers as listed in `cy backtrace`.
+    This command is useful because `cy backtrace` prints a reversed backtrace.
+    """
+
+    name = 'cy select'
+
+    def invoke(self, stackno, from_tty):
+        try:
+            stackno = int(stackno)
+        except ValueError:
+            raise gdb.GdbError("Not a valid number: %r" % (stackno,))
+
+        frame = gdb.selected_frame()
+        while frame.newer():
+            frame = frame.newer()
+
+        stackdepth = libpython.stackdepth(frame)
+
+        try:
+            gdb.execute('select %d' % (stackdepth - stackno - 1,))
+        except RuntimeError as e:
+            raise gdb.GdbError(*e.args)
+
+
+class CyBacktrace(CythonCommand):
+    'Print the Cython stack'
+
+    name = 'cy bt'
+    alias = 'cy backtrace'
+    command_class = gdb.COMMAND_STACK
+    completer_class = gdb.COMPLETE_NONE
+
+    @require_running_program
+    def invoke(self, args, from_tty):
+        # get the first frame
+        frame = gdb.selected_frame()
+        while frame.older():
+            frame = frame.older()
+
+        print_all = args == '-a'
+
+        index = 0
+        while frame:
+            try:
+                is_relevant = self.is_relevant_function(frame)
+            except CyGDBError:
+                is_relevant = False
+
+            if print_all or is_relevant:
+                self.print_stackframe(frame, index)
+
+            index += 1
+            frame = frame.newer()
+
+
+class CyList(CythonCommand):
+    """
+    List Cython source code. To disable to customize colouring see the cy_*
+    parameters.
+    """
+
+    name = 'cy list'
+    command_class = gdb.COMMAND_FILES
+    completer_class = gdb.COMPLETE_NONE
+
+    # @dispatch_on_frame(c_command='list')
+    def invoke(self, _, from_tty):
+        sd, lineno = self.get_source_desc()
+        source = sd.get_source(lineno - 5, lineno + 5, mark_line=lineno,
+                               lex_entire=True)
+        print(source)
+
+
+class CyPrint(CythonCommand):
+    """
+    Print a Cython variable using 'cy-print x' or 'cy-print module.function.x'
+    """
+
+    name = 'cy print'
+    command_class = gdb.COMMAND_DATA
+
+    def invoke(self, name, from_tty, max_name_length=None):
+        if self.is_python_function():
+            return gdb.execute('py-print ' + name)
+        elif self.is_cython_function():
+            value = self.cy.cy_cvalue.invoke(name.lstrip('*'))
+            for c in name:
+                if c == '*':
+                    value = value.dereference()
+                else:
+                    break
+
+            self.print_gdb_value(name, value, max_name_length)
+        else:
+            gdb.execute('print ' + name)
+
+    def complete(self):
+        if self.is_cython_function():
+            f = self.get_cython_function()
+            return list(itertools.chain(f.locals, f.globals))
+        else:
+            return []
+
+
+sortkey = lambda item: item[0].lower()
+
+
+class CyLocals(CythonCommand):
+    """
+    List the locals from the current Cython frame.
+    """
+
+    name = 'cy locals'
+    command_class = gdb.COMMAND_STACK
+    completer_class = gdb.COMPLETE_NONE
+
+    @dispatch_on_frame(c_command='info locals', python_command='py-locals')
+    def invoke(self, args, from_tty):
+        cython_function = self.get_cython_function()
+
+        if cython_function.is_initmodule_function:
+            self.cy.globals.invoke(args, from_tty)
+            return
+
+        local_cython_vars = cython_function.locals
+        max_name_length = len(max(local_cython_vars, key=len))
         for name, cyvar in sorted(local_cython_vars.items(), key=sortkey):
-            if self.is_initialized(self.get_cython_function(), cyvar.name): 
-                value = gdb.parse_and_eval(cyvar.cname) 
-                if not value.is_optimized_out: 
-                    self.print_gdb_value(cyvar.name, value, 
-                                         max_name_length, '') 
- 
- 
-class CyGlobals(CyLocals): 
-    """ 
-    List the globals from the current Cython module. 
-    """ 
- 
-    name = 'cy globals' 
-    command_class = gdb.COMMAND_STACK 
-    completer_class = gdb.COMPLETE_NONE 
- 
-    @dispatch_on_frame(c_command='info variables', python_command='py-globals') 
-    def invoke(self, args, from_tty): 
-        global_python_dict = self.get_cython_globals_dict() 
-        module_globals = self.get_cython_function().module.globals 
- 
-        max_globals_len = 0 
-        max_globals_dict_len = 0 
-        if module_globals: 
-            max_globals_len = len(max(module_globals, key=len)) 
-        if global_python_dict: 
-            max_globals_dict_len = len(max(global_python_dict)) 
- 
-        max_name_length = max(max_globals_len, max_globals_dict_len) 
- 
-        seen = set() 
-        print('Python globals:') 
+            if self.is_initialized(self.get_cython_function(), cyvar.name):
+                value = gdb.parse_and_eval(cyvar.cname)
+                if not value.is_optimized_out:
+                    self.print_gdb_value(cyvar.name, value,
+                                         max_name_length, '')
+
+
+class CyGlobals(CyLocals):
+    """
+    List the globals from the current Cython module.
+    """
+
+    name = 'cy globals'
+    command_class = gdb.COMMAND_STACK
+    completer_class = gdb.COMPLETE_NONE
+
+    @dispatch_on_frame(c_command='info variables', python_command='py-globals')
+    def invoke(self, args, from_tty):
+        global_python_dict = self.get_cython_globals_dict()
+        module_globals = self.get_cython_function().module.globals
+
+        max_globals_len = 0
+        max_globals_dict_len = 0
+        if module_globals:
+            max_globals_len = len(max(module_globals, key=len))
+        if global_python_dict:
+            max_globals_dict_len = len(max(global_python_dict))
+
+        max_name_length = max(max_globals_len, max_globals_dict_len)
+
+        seen = set()
+        print('Python globals:')
         for k, v in sorted(global_python_dict.items(), key=sortkey):
-            v = v.get_truncated_repr(libpython.MAX_OUTPUT_LEN) 
-            seen.add(k) 
-            print('    %-*s = %s' % (max_name_length, k, v)) 
- 
-        print('C globals:') 
+            v = v.get_truncated_repr(libpython.MAX_OUTPUT_LEN)
+            seen.add(k)
+            print('    %-*s = %s' % (max_name_length, k, v))
+
+        print('C globals:')
         for name, cyvar in sorted(module_globals.items(), key=sortkey):
-            if name not in seen: 
-                try: 
-                    value = gdb.parse_and_eval(cyvar.cname) 
-                except RuntimeError: 
-                    pass 
-                else: 
-                    if not value.is_optimized_out: 
-                        self.print_gdb_value(cyvar.name, value, 
-                                             max_name_length, '    ') 
- 
- 
-class EvaluateOrExecuteCodeMixin(object): 
-    """ 
-    Evaluate or execute Python code in a Cython or Python frame. The 'evalcode' 
-    method evaluations Python code, prints a traceback if an exception went 
-    uncaught, and returns any return value as a gdb.Value (NULL on exception). 
-    """ 
- 
-    def _fill_locals_dict(self, executor, local_dict_pointer): 
-        "Fill a remotely allocated dict with values from the Cython C stack" 
-        cython_func = self.get_cython_function() 
- 
+            if name not in seen:
+                try:
+                    value = gdb.parse_and_eval(cyvar.cname)
+                except RuntimeError:
+                    pass
+                else:
+                    if not value.is_optimized_out:
+                        self.print_gdb_value(cyvar.name, value,
+                                             max_name_length, '    ')
+
+
+class EvaluateOrExecuteCodeMixin(object):
+    """
+    Evaluate or execute Python code in a Cython or Python frame. The 'evalcode'
+    method evaluations Python code, prints a traceback if an exception went
+    uncaught, and returns any return value as a gdb.Value (NULL on exception).
+    """
+
+    def _fill_locals_dict(self, executor, local_dict_pointer):
+        "Fill a remotely allocated dict with values from the Cython C stack"
+        cython_func = self.get_cython_function()
+
         for name, cyvar in cython_func.locals.items():
             if cyvar.type == PythonObject and self.is_initialized(cython_func, name):
-                try: 
-                    val = gdb.parse_and_eval(cyvar.cname) 
-                except RuntimeError: 
-                    continue 
-                else: 
-                    if val.is_optimized_out: 
-                        continue 
- 
-                pystringp = executor.alloc_pystring(name) 
-                code = ''' 
-                    (PyObject *) PyDict_SetItem( 
-                        (PyObject *) %d, 
-                        (PyObject *) %d, 
-                        (PyObject *) %s) 
-                ''' % (local_dict_pointer, pystringp, cyvar.cname) 
- 
-                try: 
-                    if gdb.parse_and_eval(code) < 0: 
-                        gdb.parse_and_eval('PyErr_Print()') 
-                        raise gdb.GdbError("Unable to execute Python code.") 
-                finally: 
-                    # PyDict_SetItem doesn't steal our reference 
-                    executor.xdecref(pystringp) 
- 
-    def _find_first_cython_or_python_frame(self): 
-        frame = gdb.selected_frame() 
-        while frame: 
-            if (self.is_cython_function(frame) or 
-                self.is_python_function(frame)): 
-                frame.select() 
-                return frame 
- 
-            frame = frame.older() 
- 
-        raise gdb.GdbError("There is no Cython or Python frame on the stack.") 
- 
-    def _evalcode_cython(self, executor, code, input_type): 
-        with libpython.FetchAndRestoreError(): 
-            # get the dict of Cython globals and construct a dict in the 
-            # inferior with Cython locals 
-            global_dict = gdb.parse_and_eval( 
-                '(PyObject *) PyModule_GetDict(__pyx_m)') 
-            local_dict = gdb.parse_and_eval('(PyObject *) PyDict_New()') 
- 
-            try: 
-                self._fill_locals_dict(executor, 
-                                       libpython.pointervalue(local_dict)) 
-                result = executor.evalcode(code, input_type, global_dict, 
-                                           local_dict) 
-            finally: 
-                executor.xdecref(libpython.pointervalue(local_dict)) 
- 
-        return result 
- 
-    def evalcode(self, code, input_type): 
-        """ 
-        Evaluate `code` in a Python or Cython stack frame using the given 
-        `input_type`. 
-        """ 
-        frame = self._find_first_cython_or_python_frame() 
-        executor = libpython.PythonCodeExecutor() 
-        if self.is_python_function(frame): 
-            return libpython._evalcode_python(executor, code, input_type) 
-        return self._evalcode_cython(executor, code, input_type) 
- 
- 
-class CyExec(CythonCommand, libpython.PyExec, EvaluateOrExecuteCodeMixin): 
-    """ 
-    Execute Python code in the nearest Python or Cython frame. 
-    """ 
- 
-    name = '-cy-exec' 
-    command_class = gdb.COMMAND_STACK 
-    completer_class = gdb.COMPLETE_NONE 
- 
-    def invoke(self, expr, from_tty): 
-        expr, input_type = self.readcode(expr) 
-        executor = libpython.PythonCodeExecutor() 
-        executor.xdecref(self.evalcode(expr, executor.Py_single_input)) 
- 
- 
-class CySet(CythonCommand): 
-    """ 
-    Set a Cython variable to a certain value 
- 
-        cy set my_cython_c_variable = 10 
-        cy set my_cython_py_variable = $cy_eval("{'doner': 'kebab'}") 
- 
-    This is equivalent to 
- 
-        set $cy_value("my_cython_variable") = 10 
-    """ 
- 
-    name = 'cy set' 
-    command_class = gdb.COMMAND_DATA 
-    completer_class = gdb.COMPLETE_NONE 
- 
-    @require_cython_frame 
-    def invoke(self, expr, from_tty): 
-        name_and_expr = expr.split('=', 1) 
-        if len(name_and_expr) != 2: 
-            raise gdb.GdbError("Invalid expression. Use 'cy set var = expr'.") 
- 
-        varname, expr = name_and_expr 
-        cname = self.cy.cy_cname.invoke(varname.strip()) 
-        gdb.execute("set %s = %s" % (cname, expr)) 
- 
- 
-# Functions 
- 
-class CyCName(gdb.Function, CythonBase): 
-    """ 
-    Get the C name of a Cython variable in the current context. 
-    Examples: 
- 
-        print $cy_cname("function") 
-        print $cy_cname("Class.method") 
-        print $cy_cname("module.function") 
-    """ 
- 
-    @require_cython_frame 
-    @gdb_function_value_to_unicode 
-    def invoke(self, cyname, frame=None): 
-        frame = frame or gdb.selected_frame() 
-        cname = None 
- 
-        if self.is_cython_function(frame): 
-            cython_function = self.get_cython_function(frame) 
-            if cyname in cython_function.locals: 
-                cname = cython_function.locals[cyname].cname 
-            elif cyname in cython_function.module.globals: 
-                cname = cython_function.module.globals[cyname].cname 
-            else: 
-                qname = '%s.%s' % (cython_function.module.name, cyname) 
-                if qname in cython_function.module.functions: 
-                    cname = cython_function.module.functions[qname].cname 
- 
-        if not cname: 
-            cname = self.cy.functions_by_qualified_name.get(cyname) 
- 
-        if not cname: 
-            raise gdb.GdbError('No such Cython variable: %s' % cyname) 
- 
-        return cname 
- 
- 
-class CyCValue(CyCName): 
-    """ 
-    Get the value of a Cython variable. 
-    """ 
- 
-    @require_cython_frame 
-    @gdb_function_value_to_unicode 
-    def invoke(self, cyname, frame=None): 
-        globals_dict = self.get_cython_globals_dict() 
-        cython_function = self.get_cython_function(frame) 
- 
-        if self.is_initialized(cython_function, cyname): 
-            cname = super(CyCValue, self).invoke(cyname, frame=frame) 
-            return gdb.parse_and_eval(cname) 
-        elif cyname in globals_dict: 
-            return globals_dict[cyname]._gdbval 
-        else: 
-            raise gdb.GdbError("Variable %s is not initialized." % cyname) 
- 
- 
-class CyLine(gdb.Function, CythonBase): 
-    """ 
-    Get the current Cython line. 
-    """ 
- 
-    @require_cython_frame 
-    def invoke(self): 
-        return self.get_cython_lineno() 
- 
- 
-class CyEval(gdb.Function, CythonBase, EvaluateOrExecuteCodeMixin): 
-    """ 
-    Evaluate Python code in the nearest Python or Cython frame and return 
-    """ 
- 
-    @gdb_function_value_to_unicode 
-    def invoke(self, python_expression): 
-        input_type = libpython.PythonCodeExecutor.Py_eval_input 
-        return self.evalcode(python_expression, input_type) 
- 
- 
-cython_info = CythonInfo() 
-cy = CyCy.register() 
-cython_info.cy = cy 
- 
- 
-def register_defines(): 
-    libpython.source_gdb_script(textwrap.dedent("""\ 
-        define cy step 
-        cy -step 
-        end 
- 
-        define cy next 
-        cy -next 
-        end 
- 
-        document cy step 
-        %s 
-        end 
- 
-        document cy next 
-        %s 
-        end 
-    """) % (CyStep.__doc__, CyNext.__doc__)) 
- 
-register_defines() 
+                try:
+                    val = gdb.parse_and_eval(cyvar.cname)
+                except RuntimeError:
+                    continue
+                else:
+                    if val.is_optimized_out:
+                        continue
+
+                pystringp = executor.alloc_pystring(name)
+                code = '''
+                    (PyObject *) PyDict_SetItem(
+                        (PyObject *) %d,
+                        (PyObject *) %d,
+                        (PyObject *) %s)
+                ''' % (local_dict_pointer, pystringp, cyvar.cname)
+
+                try:
+                    if gdb.parse_and_eval(code) < 0:
+                        gdb.parse_and_eval('PyErr_Print()')
+                        raise gdb.GdbError("Unable to execute Python code.")
+                finally:
+                    # PyDict_SetItem doesn't steal our reference
+                    executor.xdecref(pystringp)
+
+    def _find_first_cython_or_python_frame(self):
+        frame = gdb.selected_frame()
+        while frame:
+            if (self.is_cython_function(frame) or
+                self.is_python_function(frame)):
+                frame.select()
+                return frame
+
+            frame = frame.older()
+
+        raise gdb.GdbError("There is no Cython or Python frame on the stack.")
+
+    def _evalcode_cython(self, executor, code, input_type):
+        with libpython.FetchAndRestoreError():
+            # get the dict of Cython globals and construct a dict in the
+            # inferior with Cython locals
+            global_dict = gdb.parse_and_eval(
+                '(PyObject *) PyModule_GetDict(__pyx_m)')
+            local_dict = gdb.parse_and_eval('(PyObject *) PyDict_New()')
+
+            try:
+                self._fill_locals_dict(executor,
+                                       libpython.pointervalue(local_dict))
+                result = executor.evalcode(code, input_type, global_dict,
+                                           local_dict)
+            finally:
+                executor.xdecref(libpython.pointervalue(local_dict))
+
+        return result
+
+    def evalcode(self, code, input_type):
+        """
+        Evaluate `code` in a Python or Cython stack frame using the given
+        `input_type`.
+        """
+        frame = self._find_first_cython_or_python_frame()
+        executor = libpython.PythonCodeExecutor()
+        if self.is_python_function(frame):
+            return libpython._evalcode_python(executor, code, input_type)
+        return self._evalcode_cython(executor, code, input_type)
+
+
+class CyExec(CythonCommand, libpython.PyExec, EvaluateOrExecuteCodeMixin):
+    """
+    Execute Python code in the nearest Python or Cython frame.
+    """
+
+    name = '-cy-exec'
+    command_class = gdb.COMMAND_STACK
+    completer_class = gdb.COMPLETE_NONE
+
+    def invoke(self, expr, from_tty):
+        expr, input_type = self.readcode(expr)
+        executor = libpython.PythonCodeExecutor()
+        executor.xdecref(self.evalcode(expr, executor.Py_single_input))
+
+
+class CySet(CythonCommand):
+    """
+    Set a Cython variable to a certain value
+
+        cy set my_cython_c_variable = 10
+        cy set my_cython_py_variable = $cy_eval("{'doner': 'kebab'}")
+
+    This is equivalent to
+
+        set $cy_value("my_cython_variable") = 10
+    """
+
+    name = 'cy set'
+    command_class = gdb.COMMAND_DATA
+    completer_class = gdb.COMPLETE_NONE
+
+    @require_cython_frame
+    def invoke(self, expr, from_tty):
+        name_and_expr = expr.split('=', 1)
+        if len(name_and_expr) != 2:
+            raise gdb.GdbError("Invalid expression. Use 'cy set var = expr'.")
+
+        varname, expr = name_and_expr
+        cname = self.cy.cy_cname.invoke(varname.strip())
+        gdb.execute("set %s = %s" % (cname, expr))
+
+
+# Functions
+
+class CyCName(gdb.Function, CythonBase):
+    """
+    Get the C name of a Cython variable in the current context.
+    Examples:
+
+        print $cy_cname("function")
+        print $cy_cname("Class.method")
+        print $cy_cname("module.function")
+    """
+
+    @require_cython_frame
+    @gdb_function_value_to_unicode
+    def invoke(self, cyname, frame=None):
+        frame = frame or gdb.selected_frame()
+        cname = None
+
+        if self.is_cython_function(frame):
+            cython_function = self.get_cython_function(frame)
+            if cyname in cython_function.locals:
+                cname = cython_function.locals[cyname].cname
+            elif cyname in cython_function.module.globals:
+                cname = cython_function.module.globals[cyname].cname
+            else:
+                qname = '%s.%s' % (cython_function.module.name, cyname)
+                if qname in cython_function.module.functions:
+                    cname = cython_function.module.functions[qname].cname
+
+        if not cname:
+            cname = self.cy.functions_by_qualified_name.get(cyname)
+
+        if not cname:
+            raise gdb.GdbError('No such Cython variable: %s' % cyname)
+
+        return cname
+
+
+class CyCValue(CyCName):
+    """
+    Get the value of a Cython variable.
+    """
+
+    @require_cython_frame
+    @gdb_function_value_to_unicode
+    def invoke(self, cyname, frame=None):
+        globals_dict = self.get_cython_globals_dict()
+        cython_function = self.get_cython_function(frame)
+
+        if self.is_initialized(cython_function, cyname):
+            cname = super(CyCValue, self).invoke(cyname, frame=frame)
+            return gdb.parse_and_eval(cname)
+        elif cyname in globals_dict:
+            return globals_dict[cyname]._gdbval
+        else:
+            raise gdb.GdbError("Variable %s is not initialized." % cyname)
+
+
+class CyLine(gdb.Function, CythonBase):
+    """
+    Get the current Cython line.
+    """
+
+    @require_cython_frame
+    def invoke(self):
+        return self.get_cython_lineno()
+
+
+class CyEval(gdb.Function, CythonBase, EvaluateOrExecuteCodeMixin):
+    """
+    Evaluate Python code in the nearest Python or Cython frame and return
+    """
+
+    @gdb_function_value_to_unicode
+    def invoke(self, python_expression):
+        input_type = libpython.PythonCodeExecutor.Py_eval_input
+        return self.evalcode(python_expression, input_type)
+
+
+cython_info = CythonInfo()
+cy = CyCy.register()
+cython_info.cy = cy
+
+
+def register_defines():
+    libpython.source_gdb_script(textwrap.dedent("""\
+        define cy step
+        cy -step
+        end
+
+        define cy next
+        cy -next
+        end
+
+        document cy step
+        %s
+        end
+
+        document cy next
+        %s
+        end
+    """) % (CyStep.__doc__, CyNext.__doc__))
+
+register_defines()
diff --git a/contrib/tools/cython/Cython/Debugger/libpython.py b/contrib/tools/cython/Cython/Debugger/libpython.py
index 9285129026..fea626dd73 100644
--- a/contrib/tools/cython/Cython/Debugger/libpython.py
+++ b/contrib/tools/cython/Cython/Debugger/libpython.py
@@ -1,73 +1,73 @@
-#!/usr/bin/python 
- 
-# NOTE: this file is taken from the Python source distribution 
-# It can be found under Tools/gdb/libpython.py. It is shipped with Cython 
-# because it's not installed as a python module, and because changes are only 
-# merged into new python versions (v3.2+). 
- 
-''' 
-From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 
-to be extended with Python code e.g. for library-specific data visualizations, 
-such as for the C++ STL types.  Documentation on this API can be seen at: 
-http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 
- 
- 
-This python module deals with the case when the process being debugged (the 
-"inferior process" in gdb parlance) is itself python, or more specifically, 
-linked against libpython.  In this situation, almost every item of data is a 
-(PyObject*), and having the debugger merely print their addresses is not very 
-enlightening. 
- 
-This module embeds knowledge about the implementation details of libpython so 
-that we can emit useful visualizations e.g. a string, a list, a dict, a frame 
-giving file/line information and the state of local variables 
- 
-In particular, given a gdb.Value corresponding to a PyObject* in the inferior 
-process, we can generate a "proxy value" within the gdb process.  For example, 
-given a PyObject* in the inferior process that is in fact a PyListObject* 
+#!/usr/bin/python
+
+# NOTE: this file is taken from the Python source distribution
+# It can be found under Tools/gdb/libpython.py. It is shipped with Cython
+# because it's not installed as a python module, and because changes are only
+# merged into new python versions (v3.2+).
+
+'''
+From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
+to be extended with Python code e.g. for library-specific data visualizations,
+such as for the C++ STL types.  Documentation on this API can be seen at:
+http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
+
+
+This python module deals with the case when the process being debugged (the
+"inferior process" in gdb parlance) is itself python, or more specifically,
+linked against libpython.  In this situation, almost every item of data is a
+(PyObject*), and having the debugger merely print their addresses is not very
+enlightening.
+
+This module embeds knowledge about the implementation details of libpython so
+that we can emit useful visualizations e.g. a string, a list, a dict, a frame
+giving file/line information and the state of local variables
+
+In particular, given a gdb.Value corresponding to a PyObject* in the inferior
+process, we can generate a "proxy value" within the gdb process.  For example,
+given a PyObject* in the inferior process that is in fact a PyListObject*
 holding three PyObject* that turn out to be PyBytesObject* instances, we can
 generate a proxy value within the gdb process that is a list of bytes
 instances:
   [b"foo", b"bar", b"baz"]
- 
-Doing so can be expensive for complicated graphs of objects, and could take 
-some time, so we also have a "write_repr" method that writes a representation 
-of the data to a file-like object.  This allows us to stop the traversal by 
-having the file-like object raise an exception if it gets too much data. 
- 
-With both "proxyval" and "write_repr" we keep track of the set of all addresses 
-visited so far in the traversal, to avoid infinite recursion due to cycles in 
-the graph of object references. 
- 
-We try to defer gdb.lookup_type() invocations for python types until as late as 
-possible: for a dynamically linked python binary, when the process starts in 
-the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 
-the type names are known to the debugger 
- 
-The module also extends gdb with some python-specific commands. 
-''' 
+
+Doing so can be expensive for complicated graphs of objects, and could take
+some time, so we also have a "write_repr" method that writes a representation
+of the data to a file-like object.  This allows us to stop the traversal by
+having the file-like object raise an exception if it gets too much data.
+
+With both "proxyval" and "write_repr" we keep track of the set of all addresses
+visited so far in the traversal, to avoid infinite recursion due to cycles in
+the graph of object references.
+
+We try to defer gdb.lookup_type() invocations for python types until as late as
+possible: for a dynamically linked python binary, when the process starts in
+the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
+the type names are known to the debugger
+
+The module also extends gdb with some python-specific commands.
+'''
 
 # NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax
 # compatible (2.6+ and 3.0+).  See #19308.
 
 from __future__ import print_function
 import gdb
-import os 
+import os
 import locale
-import sys 
- 
+import sys
+
 if sys.version_info[0] >= 3:
     unichr = chr
-    xrange = range 
+    xrange = range
     long = int
- 
-# Look up the gdb.Type for some standard types: 
+
+# Look up the gdb.Type for some standard types:
 # Those need to be refreshed as types (pointer sizes) may change when
 # gdb loads different executables
- 
+
 def _type_char_ptr():
     return gdb.lookup_type('char').pointer()  # char*
- 
+
 
 def _type_unsigned_char_ptr():
     return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
@@ -88,41 +88,41 @@ def _sizeof_void_p():
 # value computed later, see PyUnicodeObjectPtr.proxy()
 _is_pep393 = None
 
-Py_TPFLAGS_HEAPTYPE = (1 << 9) 
-Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24) 
-Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25) 
-Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26) 
-Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27) 
-Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28) 
-Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29) 
-Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) 
-Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31) 
- 
- 
+Py_TPFLAGS_HEAPTYPE = (1 << 9)
+Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
+Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
+Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
+Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
+Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
+Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
+Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
+Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
+
+
 MAX_OUTPUT_LEN=1024
 
-hexdigits = "0123456789abcdef" 
- 
-ENCODING = locale.getpreferredencoding() 
- 
+hexdigits = "0123456789abcdef"
+
+ENCODING = locale.getpreferredencoding()
+
 EVALFRAME = '_PyEval_EvalFrameDefault'
- 
-class NullPyObjectPtr(RuntimeError): 
-    pass 
- 
- 
-def safety_limit(val): 
+
+class NullPyObjectPtr(RuntimeError):
+    pass
+
+
+def safety_limit(val):
     # Given an integer value from the process being debugged, limit it to some
-    # safety threshold so that arbitrary breakage within said process doesn't 
-    # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 
-    return min(val, 1000) 
- 
- 
-def safe_range(val): 
-    # As per range, but don't trust the value too much: cap it to a safety 
-    # threshold in case the data was corrupted 
+    # safety threshold so that arbitrary breakage within said process doesn't
+    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
+    return min(val, 1000)
+
+
+def safe_range(val):
+    # As per range, but don't trust the value too much: cap it to a safety
+    # threshold in case the data was corrupted
     return xrange(safety_limit(int(val)))
- 
+
 if sys.version_info[0] >= 3:
     def write_unicode(file, text):
         file.write(text)
@@ -134,7 +134,7 @@ else:
         if isinstance(text, unicode):
             text = text.encode(ENCODING, 'backslashreplace')
         file.write(text)
- 
+
 try:
     os_fsencode = os.fsencode
 except AttributeError:
@@ -154,525 +154,525 @@ except AttributeError:
                 byte = char.encode(encoding)
             encoded.append(byte)
         return ''.join(encoded)
- 
-class StringTruncated(RuntimeError): 
-    pass 
- 
-class TruncatedStringIO(object): 
+
+class StringTruncated(RuntimeError):
+    pass
+
+class TruncatedStringIO(object):
     '''Similar to io.StringIO, but can truncate the output by raising a
-    StringTruncated exception''' 
-    def __init__(self, maxlen=None): 
-        self._val = '' 
-        self.maxlen = maxlen 
- 
-    def write(self, data): 
-        if self.maxlen: 
-            if len(data) + len(self._val) > self.maxlen: 
-                # Truncation: 
-                self._val += data[0:self.maxlen - len(self._val)] 
-                raise StringTruncated() 
- 
-        self._val += data 
- 
-    def getvalue(self): 
-        return self._val 
- 
-class PyObjectPtr(object): 
-    """ 
+    StringTruncated exception'''
+    def __init__(self, maxlen=None):
+        self._val = ''
+        self.maxlen = maxlen
+
+    def write(self, data):
+        if self.maxlen:
+            if len(data) + len(self._val) > self.maxlen:
+                # Truncation:
+                self._val += data[0:self.maxlen - len(self._val)]
+                raise StringTruncated()
+
+        self._val += data
+
+    def getvalue(self):
+        return self._val
+
+class PyObjectPtr(object):
+    """
     Class wrapping a gdb.Value that's either a (PyObject*) within the
     inferior process, or some subclass pointer e.g. (PyBytesObject*)
- 
-    There will be a subclass for every refined PyObject type that we care 
-    about. 
- 
-    Note that at every stage the underlying pointer could be NULL, point 
-    to corrupt data, etc; this is the debugger, after all. 
-    """ 
-    _typename = 'PyObject' 
- 
-    def __init__(self, gdbval, cast_to=None): 
-        if cast_to: 
-            self._gdbval = gdbval.cast(cast_to) 
-        else: 
-            self._gdbval = gdbval 
- 
-    def field(self, name): 
-        ''' 
-        Get the gdb.Value for the given field within the PyObject, coping with 
-        some python 2 versus python 3 differences. 
- 
-        Various libpython types are defined using the "PyObject_HEAD" and 
-        "PyObject_VAR_HEAD" macros. 
- 
-        In Python 2, this these are defined so that "ob_type" and (for a var 
-        object) "ob_size" are fields of the type in question. 
- 
-        In Python 3, this is defined as an embedded PyVarObject type thus: 
-           PyVarObject ob_base; 
-        so that the "ob_size" field is located insize the "ob_base" field, and 
-        the "ob_type" is most easily accessed by casting back to a (PyObject*). 
-        ''' 
-        if self.is_null(): 
-            raise NullPyObjectPtr(self) 
- 
-        if name == 'ob_type': 
-            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 
-            return pyo_ptr.dereference()[name] 
- 
-        if name == 'ob_size': 
-            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 
-            return pyo_ptr.dereference()[name] 
- 
-        # General case: look it up inside the object: 
-        return self._gdbval.dereference()[name] 
- 
-    def pyop_field(self, name): 
-        ''' 
-        Get a PyObjectPtr for the given PyObject* field within this PyObject, 
-        coping with some python 2 versus python 3 differences. 
-        ''' 
-        return PyObjectPtr.from_pyobject_ptr(self.field(name)) 
- 
-    def write_field_repr(self, name, out, visited): 
-        ''' 
-        Extract the PyObject* field named "name", and write its representation 
-        to file-like object "out" 
-        ''' 
-        field_obj = self.pyop_field(name) 
-        field_obj.write_repr(out, visited) 
- 
-    def get_truncated_repr(self, maxlen): 
-        ''' 
-        Get a repr-like string for the data, but truncate it at "maxlen" bytes 
-        (ending the object graph traversal as soon as you do) 
-        ''' 
-        out = TruncatedStringIO(maxlen) 
-        try: 
-            self.write_repr(out, set()) 
-        except StringTruncated: 
-            # Truncation occurred: 
-            return out.getvalue() + '...(truncated)' 
- 
-        # No truncation occurred: 
-        return out.getvalue() 
- 
-    def type(self): 
-        return PyTypeObjectPtr(self.field('ob_type')) 
- 
-    def is_null(self): 
+
+    There will be a subclass for every refined PyObject type that we care
+    about.
+
+    Note that at every stage the underlying pointer could be NULL, point
+    to corrupt data, etc; this is the debugger, after all.
+    """
+    _typename = 'PyObject'
+
+    def __init__(self, gdbval, cast_to=None):
+        if cast_to:
+            self._gdbval = gdbval.cast(cast_to)
+        else:
+            self._gdbval = gdbval
+
+    def field(self, name):
+        '''
+        Get the gdb.Value for the given field within the PyObject, coping with
+        some python 2 versus python 3 differences.
+
+        Various libpython types are defined using the "PyObject_HEAD" and
+        "PyObject_VAR_HEAD" macros.
+
+        In Python 2, this these are defined so that "ob_type" and (for a var
+        object) "ob_size" are fields of the type in question.
+
+        In Python 3, this is defined as an embedded PyVarObject type thus:
+           PyVarObject ob_base;
+        so that the "ob_size" field is located insize the "ob_base" field, and
+        the "ob_type" is most easily accessed by casting back to a (PyObject*).
+        '''
+        if self.is_null():
+            raise NullPyObjectPtr(self)
+
+        if name == 'ob_type':
+            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
+
+        if name == 'ob_size':
+            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
+
+        # General case: look it up inside the object:
+        return self._gdbval.dereference()[name]
+
+    def pyop_field(self, name):
+        '''
+        Get a PyObjectPtr for the given PyObject* field within this PyObject,
+        coping with some python 2 versus python 3 differences.
+        '''
+        return PyObjectPtr.from_pyobject_ptr(self.field(name))
+
+    def write_field_repr(self, name, out, visited):
+        '''
+        Extract the PyObject* field named "name", and write its representation
+        to file-like object "out"
+        '''
+        field_obj = self.pyop_field(name)
+        field_obj.write_repr(out, visited)
+
+    def get_truncated_repr(self, maxlen):
+        '''
+        Get a repr-like string for the data, but truncate it at "maxlen" bytes
+        (ending the object graph traversal as soon as you do)
+        '''
+        out = TruncatedStringIO(maxlen)
+        try:
+            self.write_repr(out, set())
+        except StringTruncated:
+            # Truncation occurred:
+            return out.getvalue() + '...(truncated)'
+
+        # No truncation occurred:
+        return out.getvalue()
+
+    def type(self):
+        return PyTypeObjectPtr(self.field('ob_type'))
+
+    def is_null(self):
         return 0 == long(self._gdbval)
- 
-    def is_optimized_out(self): 
-        ''' 
-        Is the value of the underlying PyObject* visible to the debugger? 
- 
-        This can vary with the precise version of the compiler used to build 
-        Python, and the precise version of gdb. 
- 
-        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 
-        PyEval_EvalFrameEx's "f" 
-        ''' 
-        return self._gdbval.is_optimized_out 
- 
-    def safe_tp_name(self): 
-        try: 
-            return self.type().field('tp_name').string() 
-        except NullPyObjectPtr: 
-            # NULL tp_name? 
-            return 'unknown' 
-        except RuntimeError: 
-            # Can't even read the object at all? 
-            return 'unknown' 
- 
-    def proxyval(self, visited): 
-        ''' 
-        Scrape a value from the inferior process, and try to represent it 
-        within the gdb process, whilst (hopefully) avoiding crashes when 
-        the remote data is corrupt. 
- 
-        Derived classes will override this. 
- 
-        For example, a PyIntObject* with ob_ival 42 in the inferior process 
-        should result in an int(42) in this process. 
- 
-        visited: a set of all gdb.Value pyobject pointers already visited 
-        whilst generating this value (to guard against infinite recursion when 
-        visiting object graphs with loops).  Analogous to Py_ReprEnter and 
-        Py_ReprLeave 
-        ''' 
- 
-        class FakeRepr(object): 
-            """ 
-            Class representing a non-descript PyObject* value in the inferior 
-            process for when we don't have a custom scraper, intended to have 
-            a sane repr(). 
-            """ 
- 
-            def __init__(self, tp_name, address): 
-                self.tp_name = tp_name 
-                self.address = address 
- 
-            def __repr__(self): 
-                # For the NULL pointer, we have no way of knowing a type, so 
-                # special-case it as per 
-                # http://bugs.python.org/issue8032#msg100882 
-                if self.address == 0: 
-                    return '0x0' 
-                return '<%s at remote 0x%x>' % (self.tp_name, self.address) 
- 
-        return FakeRepr(self.safe_tp_name(), 
+
+    def is_optimized_out(self):
+        '''
+        Is the value of the underlying PyObject* visible to the debugger?
+
+        This can vary with the precise version of the compiler used to build
+        Python, and the precise version of gdb.
+
+        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
+        PyEval_EvalFrameEx's "f"
+        '''
+        return self._gdbval.is_optimized_out
+
+    def safe_tp_name(self):
+        try:
+            return self.type().field('tp_name').string()
+        except NullPyObjectPtr:
+            # NULL tp_name?
+            return 'unknown'
+        except RuntimeError:
+            # Can't even read the object at all?
+            return 'unknown'
+
+    def proxyval(self, visited):
+        '''
+        Scrape a value from the inferior process, and try to represent it
+        within the gdb process, whilst (hopefully) avoiding crashes when
+        the remote data is corrupt.
+
+        Derived classes will override this.
+
+        For example, a PyIntObject* with ob_ival 42 in the inferior process
+        should result in an int(42) in this process.
+
+        visited: a set of all gdb.Value pyobject pointers already visited
+        whilst generating this value (to guard against infinite recursion when
+        visiting object graphs with loops).  Analogous to Py_ReprEnter and
+        Py_ReprLeave
+        '''
+
+        class FakeRepr(object):
+            """
+            Class representing a non-descript PyObject* value in the inferior
+            process for when we don't have a custom scraper, intended to have
+            a sane repr().
+            """
+
+            def __init__(self, tp_name, address):
+                self.tp_name = tp_name
+                self.address = address
+
+            def __repr__(self):
+                # For the NULL pointer, we have no way of knowing a type, so
+                # special-case it as per
+                # http://bugs.python.org/issue8032#msg100882
+                if self.address == 0:
+                    return '0x0'
+                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
+
+        return FakeRepr(self.safe_tp_name(),
                         long(self._gdbval))
- 
-    def write_repr(self, out, visited): 
-        ''' 
-        Write a string representation of the value scraped from the inferior 
-        process to "out", a file-like object. 
-        ''' 
-        # Default implementation: generate a proxy value and write its repr 
-        # However, this could involve a lot of work for complicated objects, 
-        # so for derived classes we specialize this 
-        return out.write(repr(self.proxyval(visited))) 
- 
-    @classmethod 
-    def subclass_from_type(cls, t): 
-        ''' 
-        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 
-        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 
-        to use 
- 
-        Ideally, we would look up the symbols for the global types, but that 
-        isn't working yet: 
-          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 
-          Traceback (most recent call last): 
-            File "<string>", line 1, in <module> 
-          NotImplementedError: Symbol type not yet supported in Python scripts. 
-          Error while executing Python code. 
- 
-        For now, we use tp_flags, after doing some string comparisons on the 
-        tp_name for some special-cases that don't seem to be visible through 
-        flags 
-        ''' 
-        try: 
-            tp_name = t.field('tp_name').string() 
-            tp_flags = int(t.field('tp_flags')) 
-        except RuntimeError: 
-            # Handle any kind of error e.g. NULL ptrs by simply using the base 
-            # class 
-            return cls 
- 
+
+    def write_repr(self, out, visited):
+        '''
+        Write a string representation of the value scraped from the inferior
+        process to "out", a file-like object.
+        '''
+        # Default implementation: generate a proxy value and write its repr
+        # However, this could involve a lot of work for complicated objects,
+        # so for derived classes we specialize this
+        return out.write(repr(self.proxyval(visited)))
+
+    @classmethod
+    def subclass_from_type(cls, t):
+        '''
+        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
+        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
+        to use
+
+        Ideally, we would look up the symbols for the global types, but that
+        isn't working yet:
+          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
+          Traceback (most recent call last):
+            File "<string>", line 1, in <module>
+          NotImplementedError: Symbol type not yet supported in Python scripts.
+          Error while executing Python code.
+
+        For now, we use tp_flags, after doing some string comparisons on the
+        tp_name for some special-cases that don't seem to be visible through
+        flags
+        '''
+        try:
+            tp_name = t.field('tp_name').string()
+            tp_flags = int(t.field('tp_flags'))
+        except RuntimeError:
+            # Handle any kind of error e.g. NULL ptrs by simply using the base
+            # class
+            return cls
+
         #print('tp_flags = 0x%08x' % tp_flags)
         #print('tp_name = %r' % tp_name)
- 
-        name_map = {'bool': PyBoolObjectPtr, 
-                    'classobj': PyClassObjectPtr, 
-                    'NoneType': PyNoneStructPtr, 
-                    'frame': PyFrameObjectPtr, 
-                    'set' : PySetObjectPtr, 
-                    'frozenset' : PySetObjectPtr, 
-                    'builtin_function_or_method' : PyCFunctionObjectPtr, 
+
+        name_map = {'bool': PyBoolObjectPtr,
+                    'classobj': PyClassObjectPtr,
+                    'NoneType': PyNoneStructPtr,
+                    'frame': PyFrameObjectPtr,
+                    'set' : PySetObjectPtr,
+                    'frozenset' : PySetObjectPtr,
+                    'builtin_function_or_method' : PyCFunctionObjectPtr,
                     'method-wrapper': wrapperobject,
-                    } 
-        if tp_name in name_map: 
-            return name_map[tp_name] 
- 
+                    }
+        if tp_name in name_map:
+            return name_map[tp_name]
+
         if tp_flags & Py_TPFLAGS_HEAPTYPE:
             return HeapTypeObjectPtr
- 
-        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 
-            return PyLongObjectPtr 
-        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 
-            return PyListObjectPtr 
-        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 
-            return PyTupleObjectPtr 
+
+        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
+            return PyLongObjectPtr
+        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
+            return PyListObjectPtr
+        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
+            return PyTupleObjectPtr
         if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
             return PyBytesObjectPtr
-        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 
-            return PyUnicodeObjectPtr 
-        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 
-            return PyDictObjectPtr 
-        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 
-            return PyBaseExceptionObjectPtr 
+        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
+            return PyUnicodeObjectPtr
+        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
+            return PyDictObjectPtr
+        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
+            return PyBaseExceptionObjectPtr
         #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
         #    return PyTypeObjectPtr
- 
-        # Use the base class: 
-        return cls 
- 
-    @classmethod 
-    def from_pyobject_ptr(cls, gdbval): 
-        ''' 
-        Try to locate the appropriate derived class dynamically, and cast 
-        the pointer accordingly. 
-        ''' 
-        try: 
-            p = PyObjectPtr(gdbval) 
-            cls = cls.subclass_from_type(p.type()) 
-            return cls(gdbval, cast_to=cls.get_gdb_type()) 
+
+        # Use the base class:
+        return cls
+
+    @classmethod
+    def from_pyobject_ptr(cls, gdbval):
+        '''
+        Try to locate the appropriate derived class dynamically, and cast
+        the pointer accordingly.
+        '''
+        try:
+            p = PyObjectPtr(gdbval)
+            cls = cls.subclass_from_type(p.type())
+            return cls(gdbval, cast_to=cls.get_gdb_type())
         except RuntimeError:
-            # Handle any kind of error e.g. NULL ptrs by simply using the base 
-            # class 
-            pass 
-        return cls(gdbval) 
- 
-    @classmethod 
-    def get_gdb_type(cls): 
-        return gdb.lookup_type(cls._typename).pointer() 
- 
-    def as_address(self): 
+            # Handle any kind of error e.g. NULL ptrs by simply using the base
+            # class
+            pass
+        return cls(gdbval)
+
+    @classmethod
+    def get_gdb_type(cls):
+        return gdb.lookup_type(cls._typename).pointer()
+
+    def as_address(self):
         return long(self._gdbval)
- 
-class PyVarObjectPtr(PyObjectPtr): 
-    _typename = 'PyVarObject' 
- 
-class ProxyAlreadyVisited(object): 
-    ''' 
-    Placeholder proxy to use when protecting against infinite recursion due to 
-    loops in the object graph. 
- 
-    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 
-    ''' 
-    def __init__(self, rep): 
-        self._rep = rep 
- 
-    def __repr__(self): 
-        return self._rep 
- 
- 
-def _write_instance_repr(out, visited, name, pyop_attrdict, address): 
+
+class PyVarObjectPtr(PyObjectPtr):
+    _typename = 'PyVarObject'
+
+class ProxyAlreadyVisited(object):
+    '''
+    Placeholder proxy to use when protecting against infinite recursion due to
+    loops in the object graph.
+
+    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
+    '''
+    def __init__(self, rep):
+        self._rep = rep
+
+    def __repr__(self):
+        return self._rep
+
+
+def _write_instance_repr(out, visited, name, pyop_attrdict, address):
     '''Shared code for use by all classes:
-    write a representation to file-like object "out"''' 
-    out.write('<') 
-    out.write(name) 
- 
-    # Write dictionary of instance attributes: 
-    if isinstance(pyop_attrdict, PyDictObjectPtr): 
-        out.write('(') 
-        first = True 
+    write a representation to file-like object "out"'''
+    out.write('<')
+    out.write(name)
+
+    # Write dictionary of instance attributes:
+    if isinstance(pyop_attrdict, PyDictObjectPtr):
+        out.write('(')
+        first = True
         for pyop_arg, pyop_val in pyop_attrdict.iteritems():
-            if not first: 
-                out.write(', ') 
-            first = False 
-            out.write(pyop_arg.proxyval(visited)) 
-            out.write('=') 
-            pyop_val.write_repr(out, visited) 
-        out.write(')') 
-    out.write(' at remote 0x%x>' % address) 
- 
- 
-class InstanceProxy(object): 
- 
-    def __init__(self, cl_name, attrdict, address): 
-        self.cl_name = cl_name 
-        self.attrdict = attrdict 
-        self.address = address 
- 
-    def __repr__(self): 
-        if isinstance(self.attrdict, dict): 
+            if not first:
+                out.write(', ')
+            first = False
+            out.write(pyop_arg.proxyval(visited))
+            out.write('=')
+            pyop_val.write_repr(out, visited)
+        out.write(')')
+    out.write(' at remote 0x%x>' % address)
+
+
+class InstanceProxy(object):
+
+    def __init__(self, cl_name, attrdict, address):
+        self.cl_name = cl_name
+        self.attrdict = attrdict
+        self.address = address
+
+    def __repr__(self):
+        if isinstance(self.attrdict, dict):
             kwargs = ', '.join(["%s=%r" % (arg, val)
                                 for arg, val in self.attrdict.iteritems()])
             return '<%s(%s) at remote 0x%x>' % (self.cl_name,
                                                 kwargs, self.address)
-        else: 
+        else:
             return '<%s at remote 0x%x>' % (self.cl_name,
                                             self.address)
- 
+
 def _PyObject_VAR_SIZE(typeobj, nitems):
     if _PyObject_VAR_SIZE._type_size_t is None:
         _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
- 
-    return ( ( typeobj.field('tp_basicsize') + 
-               nitems * typeobj.field('tp_itemsize') + 
+
+    return ( ( typeobj.field('tp_basicsize') +
+               nitems * typeobj.field('tp_itemsize') +
                (_sizeof_void_p() - 1)
              ) & ~(_sizeof_void_p() - 1)
            ).cast(_PyObject_VAR_SIZE._type_size_t)
 _PyObject_VAR_SIZE._type_size_t = None
- 
+
 class HeapTypeObjectPtr(PyObjectPtr):
     _typename = 'PyObject'
- 
-    def get_attr_dict(self): 
-        ''' 
-        Get the PyDictObject ptr representing the attribute dictionary 
-        (or None if there's a problem) 
-        ''' 
-        try: 
-            typeobj = self.type() 
-            dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 
-            if dictoffset != 0: 
-                if dictoffset < 0: 
-                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 
-                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 
-                    if tsize < 0: 
-                        tsize = -tsize 
-                    size = _PyObject_VAR_SIZE(typeobj, tsize) 
-                    dictoffset += size 
-                    assert dictoffset > 0 
+
+    def get_attr_dict(self):
+        '''
+        Get the PyDictObject ptr representing the attribute dictionary
+        (or None if there's a problem)
+        '''
+        try:
+            typeobj = self.type()
+            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
+            if dictoffset != 0:
+                if dictoffset < 0:
+                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
+                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
+                    if tsize < 0:
+                        tsize = -tsize
+                    size = _PyObject_VAR_SIZE(typeobj, tsize)
+                    dictoffset += size
+                    assert dictoffset > 0
                     assert dictoffset % _sizeof_void_p() == 0
- 
+
                 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
-                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 
-                dictptr = dictptr.cast(PyObjectPtrPtr) 
-                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 
-        except RuntimeError: 
-            # Corrupt data somewhere; fail safe 
-            pass 
- 
-        # Not found, or some kind of error: 
-        return None 
- 
-    def proxyval(self, visited): 
-        ''' 
+                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
+                dictptr = dictptr.cast(PyObjectPtrPtr)
+                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
+        except RuntimeError:
+            # Corrupt data somewhere; fail safe
+            pass
+
+        # Not found, or some kind of error:
+        return None
+
+    def proxyval(self, visited):
+        '''
         Support for classes.
- 
-        Currently we just locate the dictionary using a transliteration to 
-        python of _PyObject_GetDictPtr, ignoring descriptors 
-        ''' 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            return ProxyAlreadyVisited('<...>') 
-        visited.add(self.as_address()) 
- 
-        pyop_attr_dict = self.get_attr_dict() 
-        if pyop_attr_dict: 
-            attr_dict = pyop_attr_dict.proxyval(visited) 
-        else: 
-            attr_dict = {} 
-        tp_name = self.safe_tp_name() 
- 
+
+        Currently we just locate the dictionary using a transliteration to
+        python of _PyObject_GetDictPtr, ignoring descriptors
+        '''
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('<...>')
+        visited.add(self.as_address())
+
+        pyop_attr_dict = self.get_attr_dict()
+        if pyop_attr_dict:
+            attr_dict = pyop_attr_dict.proxyval(visited)
+        else:
+            attr_dict = {}
+        tp_name = self.safe_tp_name()
+
         # Class:
         return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
- 
-    def write_repr(self, out, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            out.write('<...>') 
-            return 
-        visited.add(self.as_address()) 
- 
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('<...>')
+            return
+        visited.add(self.as_address())
+
         pyop_attrdict = self.get_attr_dict()
         _write_instance_repr(out, visited,
                              self.safe_tp_name(), pyop_attrdict, self.as_address())
- 
-class ProxyException(Exception): 
-    def __init__(self, tp_name, args): 
-        self.tp_name = tp_name 
-        self.args = args 
- 
-    def __repr__(self): 
-        return '%s%r' % (self.tp_name, self.args) 
- 
-class PyBaseExceptionObjectPtr(PyObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 
-    within the process being debugged. 
-    """ 
-    _typename = 'PyBaseExceptionObject' 
- 
-    def proxyval(self, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            return ProxyAlreadyVisited('(...)') 
-        visited.add(self.as_address()) 
-        arg_proxy = self.pyop_field('args').proxyval(visited) 
-        return ProxyException(self.safe_tp_name(), 
-                              arg_proxy) 
- 
-    def write_repr(self, out, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            out.write('(...)') 
-            return 
-        visited.add(self.as_address()) 
- 
-        out.write(self.safe_tp_name()) 
-        self.write_field_repr('args', out, visited) 
- 
-class PyClassObjectPtr(PyObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 
-    instance within the process being debugged. 
-    """ 
-    _typename = 'PyClassObject' 
- 
- 
-class BuiltInFunctionProxy(object): 
-    def __init__(self, ml_name): 
-        self.ml_name = ml_name 
- 
-    def __repr__(self): 
-        return "<built-in function %s>" % self.ml_name 
- 
-class BuiltInMethodProxy(object): 
-    def __init__(self, ml_name, pyop_m_self): 
-        self.ml_name = ml_name 
-        self.pyop_m_self = pyop_m_self 
- 
-    def __repr__(self): 
+
+class ProxyException(Exception):
+    def __init__(self, tp_name, args):
+        self.tp_name = tp_name
+        self.args = args
+
+    def __repr__(self):
+        return '%s%r' % (self.tp_name, self.args)
+
+class PyBaseExceptionObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
+    within the process being debugged.
+    """
+    _typename = 'PyBaseExceptionObject'
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('(...)')
+        visited.add(self.as_address())
+        arg_proxy = self.pyop_field('args').proxyval(visited)
+        return ProxyException(self.safe_tp_name(),
+                              arg_proxy)
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('(...)')
+            return
+        visited.add(self.as_address())
+
+        out.write(self.safe_tp_name())
+        self.write_field_repr('args', out, visited)
+
+class PyClassObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
+    instance within the process being debugged.
+    """
+    _typename = 'PyClassObject'
+
+
+class BuiltInFunctionProxy(object):
+    def __init__(self, ml_name):
+        self.ml_name = ml_name
+
+    def __repr__(self):
+        return "<built-in function %s>" % self.ml_name
+
+class BuiltInMethodProxy(object):
+    def __init__(self, ml_name, pyop_m_self):
+        self.ml_name = ml_name
+        self.pyop_m_self = pyop_m_self
+
+    def __repr__(self):
         return ('<built-in method %s of %s object at remote 0x%x>'
                 % (self.ml_name,
                    self.pyop_m_self.safe_tp_name(),
                    self.pyop_m_self.as_address())
                 )
- 
-class PyCFunctionObjectPtr(PyObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyCFunctionObject* 
-    (see Include/methodobject.h and Objects/methodobject.c) 
-    """ 
-    _typename = 'PyCFunctionObject' 
- 
-    def proxyval(self, visited): 
-        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 
-        ml_name = m_ml['ml_name'].string() 
- 
-        pyop_m_self = self.pyop_field('m_self') 
-        if pyop_m_self.is_null(): 
-            return BuiltInFunctionProxy(ml_name) 
-        else: 
-            return BuiltInMethodProxy(ml_name, pyop_m_self) 
- 
- 
-class PyCodeObjectPtr(PyObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 
-    within the process being debugged. 
-    """ 
-    _typename = 'PyCodeObject' 
- 
-    def addr2line(self, addrq): 
-        ''' 
-        Get the line number for a given bytecode offset 
- 
-        Analogous to PyCode_Addr2Line; translated from pseudocode in 
-        Objects/lnotab_notes.txt 
-        ''' 
-        co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) 
- 
-        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 
-        # not 0, as lnotab_notes.txt has it: 
-        lineno = int_from_int(self.field('co_firstlineno')) 
- 
-        addr = 0 
-        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): 
-            addr += ord(addr_incr) 
-            if addr > addrq: 
-                return lineno 
-            lineno += ord(line_incr) 
-        return lineno 
- 
- 
-class PyDictObjectPtr(PyObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 
-    within the process being debugged. 
-    """ 
-    _typename = 'PyDictObject' 
- 
-    def iteritems(self): 
-        ''' 
-        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 
+
+class PyCFunctionObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyCFunctionObject*
+    (see Include/methodobject.h and Objects/methodobject.c)
+    """
+    _typename = 'PyCFunctionObject'
+
+    def proxyval(self, visited):
+        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
+        ml_name = m_ml['ml_name'].string()
+
+        pyop_m_self = self.pyop_field('m_self')
+        if pyop_m_self.is_null():
+            return BuiltInFunctionProxy(ml_name)
+        else:
+            return BuiltInMethodProxy(ml_name, pyop_m_self)
+
+
+class PyCodeObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
+    within the process being debugged.
+    """
+    _typename = 'PyCodeObject'
+
+    def addr2line(self, addrq):
+        '''
+        Get the line number for a given bytecode offset
+
+        Analogous to PyCode_Addr2Line; translated from pseudocode in
+        Objects/lnotab_notes.txt
+        '''
+        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
+
+        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
+        # not 0, as lnotab_notes.txt has it:
+        lineno = int_from_int(self.field('co_firstlineno'))
+
+        addr = 0
+        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
+            addr += ord(addr_incr)
+            if addr > addrq:
+                return lineno
+            lineno += ord(line_incr)
+        return lineno
+
+
+class PyDictObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
+    within the process being debugged.
+    """
+    _typename = 'PyDictObject'
+
+    def iteritems(self):
+        '''
+        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
         analogous to dict.iteritems()
-        ''' 
+        '''
         keys = self.field('ma_keys')
         values = self.field('ma_values')
         entries, nentries = self._get_entries(keys)
@@ -682,41 +682,41 @@ class PyDictObjectPtr(PyObjectPtr):
                 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
             else:
                 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
-            if not pyop_value.is_null(): 
-                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 
-                yield (pyop_key, pyop_value) 
- 
-    def proxyval(self, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            return ProxyAlreadyVisited('{...}') 
-        visited.add(self.as_address()) 
- 
-        result = {} 
+            if not pyop_value.is_null():
+                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
+                yield (pyop_key, pyop_value)
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('{...}')
+        visited.add(self.as_address())
+
+        result = {}
         for pyop_key, pyop_value in self.iteritems():
-            proxy_key = pyop_key.proxyval(visited) 
-            proxy_value = pyop_value.proxyval(visited) 
-            result[proxy_key] = proxy_value 
-        return result 
- 
-    def write_repr(self, out, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            out.write('{...}') 
-            return 
-        visited.add(self.as_address()) 
- 
-        out.write('{') 
-        first = True 
+            proxy_key = pyop_key.proxyval(visited)
+            proxy_value = pyop_value.proxyval(visited)
+            result[proxy_key] = proxy_value
+        return result
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('{...}')
+            return
+        visited.add(self.as_address())
+
+        out.write('{')
+        first = True
         for pyop_key, pyop_value in self.iteritems():
-            if not first: 
-                out.write(', ') 
-            first = False 
-            pyop_key.write_repr(out, visited) 
-            out.write(': ') 
-            pyop_value.write_repr(out, visited) 
-        out.write('}') 
- 
+            if not first:
+                out.write(', ')
+            first = False
+            pyop_key.write_repr(out, visited)
+            out.write(': ')
+            pyop_value.write_repr(out, visited)
+        out.write('}')
+
     def _get_entries(self, keys):
         dk_nentries = int(keys['dk_nentries'])
         dk_size = int(keys['dk_size'])
@@ -726,7 +726,7 @@ class PyDictObjectPtr(PyObjectPtr):
         except RuntimeError:
             # >= Python 3.6
             pass
- 
+
         if dk_size <= 0xFF:
             offset = dk_size
         elif dk_size <= 0xFFFF:
@@ -735,247 +735,247 @@ class PyDictObjectPtr(PyObjectPtr):
             offset = 4 * dk_size
         else:
             offset = 8 * dk_size
- 
+
         ent_addr = keys['dk_indices']['as_1'].address
         ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
         ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
         ent_addr = ent_addr.cast(ent_ptr_t)
- 
+
         return ent_addr, dk_nentries
- 
- 
-class PyListObjectPtr(PyObjectPtr): 
-    _typename = 'PyListObject' 
- 
-    def __getitem__(self, i): 
-        # Get the gdb.Value for the (PyObject*) with the given index: 
-        field_ob_item = self.field('ob_item') 
-        return field_ob_item[i] 
- 
-    def proxyval(self, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            return ProxyAlreadyVisited('[...]') 
-        visited.add(self.as_address()) 
- 
-        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 
-                  for i in safe_range(int_from_int(self.field('ob_size')))] 
-        return result 
- 
-    def write_repr(self, out, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            out.write('[...]') 
-            return 
-        visited.add(self.as_address()) 
- 
-        out.write('[') 
-        for i in safe_range(int_from_int(self.field('ob_size'))): 
-            if i > 0: 
-                out.write(', ') 
-            element = PyObjectPtr.from_pyobject_ptr(self[i]) 
-            element.write_repr(out, visited) 
-        out.write(']') 
- 
-class PyLongObjectPtr(PyObjectPtr): 
-    _typename = 'PyLongObject' 
- 
-    def proxyval(self, visited): 
-        ''' 
-        Python's Include/longobjrep.h has this declaration: 
-           struct _longobject { 
-               PyObject_VAR_HEAD 
-               digit ob_digit[1]; 
-           }; 
- 
-        with this description: 
-            The absolute value of a number is equal to 
-                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 
-            Negative numbers are represented with ob_size < 0; 
-            zero is represented by ob_size == 0. 
- 
-        where SHIFT can be either: 
-            #define PyLong_SHIFT        30 
-            #define PyLong_SHIFT        15 
-        ''' 
+
+
+class PyListObjectPtr(PyObjectPtr):
+    _typename = 'PyListObject'
+
+    def __getitem__(self, i):
+        # Get the gdb.Value for the (PyObject*) with the given index:
+        field_ob_item = self.field('ob_item')
+        return field_ob_item[i]
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('[...]')
+        visited.add(self.as_address())
+
+        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
+                  for i in safe_range(int_from_int(self.field('ob_size')))]
+        return result
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('[...]')
+            return
+        visited.add(self.as_address())
+
+        out.write('[')
+        for i in safe_range(int_from_int(self.field('ob_size'))):
+            if i > 0:
+                out.write(', ')
+            element = PyObjectPtr.from_pyobject_ptr(self[i])
+            element.write_repr(out, visited)
+        out.write(']')
+
+class PyLongObjectPtr(PyObjectPtr):
+    _typename = 'PyLongObject'
+
+    def proxyval(self, visited):
+        '''
+        Python's Include/longobjrep.h has this declaration:
+           struct _longobject {
+               PyObject_VAR_HEAD
+               digit ob_digit[1];
+           };
+
+        with this description:
+            The absolute value of a number is equal to
+                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
+            Negative numbers are represented with ob_size < 0;
+            zero is represented by ob_size == 0.
+
+        where SHIFT can be either:
+            #define PyLong_SHIFT        30
+            #define PyLong_SHIFT        15
+        '''
         ob_size = long(self.field('ob_size'))
-        if ob_size == 0: 
+        if ob_size == 0:
             return 0
- 
-        ob_digit = self.field('ob_digit') 
- 
-        if gdb.lookup_type('digit').sizeof == 2: 
-            SHIFT = 15 
-        else: 
-            SHIFT = 30 
- 
+
+        ob_digit = self.field('ob_digit')
+
+        if gdb.lookup_type('digit').sizeof == 2:
+            SHIFT = 15
+        else:
+            SHIFT = 30
+
         digits = [long(ob_digit[i]) * 2**(SHIFT*i)
-                  for i in safe_range(abs(ob_size))] 
-        result = sum(digits) 
-        if ob_size < 0: 
-            result = -result 
-        return result 
- 
-    def write_repr(self, out, visited): 
-        # Write this out as a Python 3 int literal, i.e. without the "L" suffix 
-        proxy = self.proxyval(visited) 
-        out.write("%s" % proxy) 
- 
- 
-class PyBoolObjectPtr(PyLongObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 
-    <bool> instances (Py_True/Py_False) within the process being debugged. 
-    """ 
-    def proxyval(self, visited): 
+                  for i in safe_range(abs(ob_size))]
+        result = sum(digits)
+        if ob_size < 0:
+            result = -result
+        return result
+
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+        proxy = self.proxyval(visited)
+        out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+    <bool> instances (Py_True/Py_False) within the process being debugged.
+    """
+    def proxyval(self, visited):
         if PyLongObjectPtr.proxyval(self, visited):
             return True
         else:
             return False
- 
-class PyNoneStructPtr(PyObjectPtr): 
-    """ 
-    Class wrapping a gdb.Value that's a PyObject* pointing to the 
-    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 
-    """ 
-    _typename = 'PyObject' 
- 
-    def proxyval(self, visited): 
-        return None 
- 
- 
-class PyFrameObjectPtr(PyObjectPtr): 
-    _typename = 'PyFrameObject' 
- 
-    def __init__(self, gdbval, cast_to=None): 
-        PyObjectPtr.__init__(self, gdbval, cast_to) 
- 
-        if not self.is_optimized_out(): 
-            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 
-            self.co_name = self.co.pyop_field('co_name') 
-            self.co_filename = self.co.pyop_field('co_filename') 
- 
-            self.f_lineno = int_from_int(self.field('f_lineno')) 
-            self.f_lasti = int_from_int(self.field('f_lasti')) 
-            self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 
-            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 
- 
-    def iter_locals(self): 
-        ''' 
-        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 
-        the local variables of this frame 
-        ''' 
-        if self.is_optimized_out(): 
-            return 
- 
-        f_localsplus = self.field('f_localsplus') 
-        for i in safe_range(self.co_nlocals): 
-            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 
-            if not pyop_value.is_null(): 
-                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 
-                yield (pyop_name, pyop_value) 
- 
-    def iter_globals(self): 
-        ''' 
-        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 
-        the global variables of this frame 
-        ''' 
-        if self.is_optimized_out(): 
+
+class PyNoneStructPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyObject* pointing to the
+    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
+    """
+    _typename = 'PyObject'
+
+    def proxyval(self, visited):
+        return None
+
+
+class PyFrameObjectPtr(PyObjectPtr):
+    _typename = 'PyFrameObject'
+
+    def __init__(self, gdbval, cast_to=None):
+        PyObjectPtr.__init__(self, gdbval, cast_to)
+
+        if not self.is_optimized_out():
+            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
+            self.co_name = self.co.pyop_field('co_name')
+            self.co_filename = self.co.pyop_field('co_filename')
+
+            self.f_lineno = int_from_int(self.field('f_lineno'))
+            self.f_lasti = int_from_int(self.field('f_lasti'))
+            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
+            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
+
+    def iter_locals(self):
+        '''
+        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
+        the local variables of this frame
+        '''
+        if self.is_optimized_out():
+            return
+
+        f_localsplus = self.field('f_localsplus')
+        for i in safe_range(self.co_nlocals):
+            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
+            if not pyop_value.is_null():
+                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
+                yield (pyop_name, pyop_value)
+
+    def iter_globals(self):
+        '''
+        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
+        the global variables of this frame
+        '''
+        if self.is_optimized_out():
             return ()
- 
-        pyop_globals = self.pyop_field('f_globals') 
+
+        pyop_globals = self.pyop_field('f_globals')
         return pyop_globals.iteritems()
- 
-    def iter_builtins(self): 
-        ''' 
-        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 
-        the builtin variables 
-        ''' 
-        if self.is_optimized_out(): 
+
+    def iter_builtins(self):
+        '''
+        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
+        the builtin variables
+        '''
+        if self.is_optimized_out():
             return ()
- 
-        pyop_builtins = self.pyop_field('f_builtins') 
+
+        pyop_builtins = self.pyop_field('f_builtins')
         return pyop_builtins.iteritems()
- 
-    def get_var_by_name(self, name): 
-        ''' 
-        Look for the named local variable, returning a (PyObjectPtr, scope) pair 
-        where scope is a string 'local', 'global', 'builtin' 
- 
-        If not found, return (None, None) 
-        ''' 
-        for pyop_name, pyop_value in self.iter_locals(): 
-            if name == pyop_name.proxyval(set()): 
-                return pyop_value, 'local' 
-        for pyop_name, pyop_value in self.iter_globals(): 
-            if name == pyop_name.proxyval(set()): 
-                return pyop_value, 'global' 
-        for pyop_name, pyop_value in self.iter_builtins(): 
-            if name == pyop_name.proxyval(set()): 
-                return pyop_value, 'builtin' 
-        return None, None 
- 
-    def filename(self): 
-        '''Get the path of the current Python source file, as a string''' 
-        if self.is_optimized_out(): 
-            return '(frame information optimized out)' 
-        return self.co_filename.proxyval(set()) 
- 
-    def current_line_num(self): 
-        '''Get current line number as an integer (1-based) 
- 
-        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 
- 
-        See Objects/lnotab_notes.txt 
-        ''' 
-        if self.is_optimized_out(): 
-            return None 
-        f_trace = self.field('f_trace') 
+
+    def get_var_by_name(self, name):
+        '''
+        Look for the named local variable, returning a (PyObjectPtr, scope) pair
+        where scope is a string 'local', 'global', 'builtin'
+
+        If not found, return (None, None)
+        '''
+        for pyop_name, pyop_value in self.iter_locals():
+            if name == pyop_name.proxyval(set()):
+                return pyop_value, 'local'
+        for pyop_name, pyop_value in self.iter_globals():
+            if name == pyop_name.proxyval(set()):
+                return pyop_value, 'global'
+        for pyop_name, pyop_value in self.iter_builtins():
+            if name == pyop_name.proxyval(set()):
+                return pyop_value, 'builtin'
+        return None, None
+
+    def filename(self):
+        '''Get the path of the current Python source file, as a string'''
+        if self.is_optimized_out():
+            return '(frame information optimized out)'
+        return self.co_filename.proxyval(set())
+
+    def current_line_num(self):
+        '''Get current line number as an integer (1-based)
+
+        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
+
+        See Objects/lnotab_notes.txt
+        '''
+        if self.is_optimized_out():
+            return None
+        f_trace = self.field('f_trace')
         if long(f_trace) != 0:
-            # we have a non-NULL f_trace: 
-            return self.f_lineno 
-        else: 
-            #try: 
-            return self.co.addr2line(self.f_lasti) 
-            #except ValueError: 
-            #    return self.f_lineno 
- 
-    def current_line(self): 
-        '''Get the text of the current source line as a string, with a trailing 
-        newline character''' 
-        if self.is_optimized_out(): 
-            return '(frame information optimized out)' 
-        filename = self.filename() 
+            # we have a non-NULL f_trace:
+            return self.f_lineno
+        else:
+            #try:
+            return self.co.addr2line(self.f_lasti)
+            #except ValueError:
+            #    return self.f_lineno
+
+    def current_line(self):
+        '''Get the text of the current source line as a string, with a trailing
+        newline character'''
+        if self.is_optimized_out():
+            return '(frame information optimized out)'
+        filename = self.filename()
         try:
             f = open(os_fsencode(filename), 'r')
         except IOError:
             return None
         with f:
-            all_lines = f.readlines() 
-            # Convert from 1-based current_line_num to 0-based list offset: 
-            return all_lines[self.current_line_num()-1] 
- 
-    def write_repr(self, out, visited): 
-        if self.is_optimized_out(): 
-            out.write('(frame information optimized out)') 
-            return 
-        out.write('Frame 0x%x, for file %s, line %i, in %s (' 
-                  % (self.as_address(), 
-                     self.co_filename.proxyval(visited), 
-                     self.current_line_num(), 
-                     self.co_name.proxyval(visited))) 
-        first = True 
-        for pyop_name, pyop_value in self.iter_locals(): 
-            if not first: 
-                out.write(', ') 
-            first = False 
- 
-            out.write(pyop_name.proxyval(visited)) 
-            out.write('=') 
-            pyop_value.write_repr(out, visited) 
- 
-        out.write(')') 
- 
+            all_lines = f.readlines()
+            # Convert from 1-based current_line_num to 0-based list offset:
+            return all_lines[self.current_line_num()-1]
+
+    def write_repr(self, out, visited):
+        if self.is_optimized_out():
+            out.write('(frame information optimized out)')
+            return
+        out.write('Frame 0x%x, for file %s, line %i, in %s ('
+                  % (self.as_address(),
+                     self.co_filename.proxyval(visited),
+                     self.current_line_num(),
+                     self.co_name.proxyval(visited)))
+        first = True
+        for pyop_name, pyop_value in self.iter_locals():
+            if not first:
+                out.write(', ')
+            first = False
+
+            out.write(pyop_name.proxyval(visited))
+            out.write('=')
+            pyop_value.write_repr(out, visited)
+
+        out.write(')')
+
     def print_traceback(self):
         if self.is_optimized_out():
             sys.stdout.write('  (frame information optimized out)\n')
@@ -985,10 +985,10 @@ class PyFrameObjectPtr(PyObjectPtr):
                   % (self.co_filename.proxyval(visited),
                      self.current_line_num(),
                      self.co_name.proxyval(visited)))
- 
-class PySetObjectPtr(PyObjectPtr): 
-    _typename = 'PySetObject' 
- 
+
+class PySetObjectPtr(PyObjectPtr):
+    _typename = 'PySetObject'
+
     @classmethod
     def _dummy_key(self):
         return gdb.lookup_global_symbol('_PySet_Dummy').value()
@@ -1002,168 +1002,168 @@ class PySetObjectPtr(PyObjectPtr):
             if key != 0 and key != dummy_ptr:
                 yield PyObjectPtr.from_pyobject_ptr(key)
 
-    def proxyval(self, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 
-        visited.add(self.as_address()) 
- 
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
+        visited.add(self.as_address())
+
         members = (key.proxyval(visited) for key in self)
-        if self.safe_tp_name() == 'frozenset': 
-            return frozenset(members) 
-        else: 
-            return set(members) 
- 
-    def write_repr(self, out, visited): 
-        # Emulate Python 3's set_repr 
-        tp_name = self.safe_tp_name() 
- 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            out.write('(...)') 
-            return 
-        visited.add(self.as_address()) 
- 
-        # Python 3's set_repr special-cases the empty set: 
-        if not self.field('used'): 
-            out.write(tp_name) 
-            out.write('()') 
-            return 
- 
-        # Python 3 uses {} for set literals: 
-        if tp_name != 'set': 
-            out.write(tp_name) 
-            out.write('(') 
- 
-        out.write('{') 
-        first = True 
+        if self.safe_tp_name() == 'frozenset':
+            return frozenset(members)
+        else:
+            return set(members)
+
+    def write_repr(self, out, visited):
+        # Emulate Python 3's set_repr
+        tp_name = self.safe_tp_name()
+
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('(...)')
+            return
+        visited.add(self.as_address())
+
+        # Python 3's set_repr special-cases the empty set:
+        if not self.field('used'):
+            out.write(tp_name)
+            out.write('()')
+            return
+
+        # Python 3 uses {} for set literals:
+        if tp_name != 'set':
+            out.write(tp_name)
+            out.write('(')
+
+        out.write('{')
+        first = True
         for key in self:
             if not first:
                 out.write(', ')
             first = False
             key.write_repr(out, visited)
-        out.write('}') 
- 
-        if tp_name != 'set': 
-            out.write(')') 
- 
- 
-class PyBytesObjectPtr(PyObjectPtr): 
-    _typename = 'PyBytesObject' 
- 
-    def __str__(self): 
-        field_ob_size = self.field('ob_size') 
-        field_ob_sval = self.field('ob_sval') 
+        out.write('}')
+
+        if tp_name != 'set':
+            out.write(')')
+
+
+class PyBytesObjectPtr(PyObjectPtr):
+    _typename = 'PyBytesObject'
+
+    def __str__(self):
+        field_ob_size = self.field('ob_size')
+        field_ob_sval = self.field('ob_sval')
         char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
         return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
- 
-    def proxyval(self, visited): 
-        return str(self) 
- 
+
+    def proxyval(self, visited):
+        return str(self)
+
     def write_repr(self, out, visited):
-        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 
- 
-        # Get a PyStringObject* within the Python 2 gdb process: 
-        proxy = self.proxyval(visited) 
- 
-        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 
-        # to Python 2 code: 
-        quote = "'" 
-        if "'" in proxy and not '"' in proxy: 
-            quote = '"' 
+        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+        # Get a PyStringObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+        # to Python 2 code:
+        quote = "'"
+        if "'" in proxy and not '"' in proxy:
+            quote = '"'
         out.write('b')
-        out.write(quote) 
-        for byte in proxy: 
-            if byte == quote or byte == '\\': 
-                out.write('\\') 
-                out.write(byte) 
-            elif byte == '\t': 
-                out.write('\\t') 
-            elif byte == '\n': 
-                out.write('\\n') 
-            elif byte == '\r': 
-                out.write('\\r') 
-            elif byte < ' ' or ord(byte) >= 0x7f: 
-                out.write('\\x') 
-                out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 
-                out.write(hexdigits[ord(byte) & 0xf]) 
-            else: 
-                out.write(byte) 
-        out.write(quote) 
- 
- 
-class PyStringObjectPtr(PyBytesObjectPtr): 
-    _typename = 'PyStringObject' 
- 
- 
-class PyTupleObjectPtr(PyObjectPtr): 
-    _typename = 'PyTupleObject' 
- 
-    def __getitem__(self, i): 
-        # Get the gdb.Value for the (PyObject*) with the given index: 
-        field_ob_item = self.field('ob_item') 
-        return field_ob_item[i] 
- 
-    def proxyval(self, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            return ProxyAlreadyVisited('(...)') 
-        visited.add(self.as_address()) 
- 
+        out.write(quote)
+        for byte in proxy:
+            if byte == quote or byte == '\\':
+                out.write('\\')
+                out.write(byte)
+            elif byte == '\t':
+                out.write('\\t')
+            elif byte == '\n':
+                out.write('\\n')
+            elif byte == '\r':
+                out.write('\\r')
+            elif byte < ' ' or ord(byte) >= 0x7f:
+                out.write('\\x')
+                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+                out.write(hexdigits[ord(byte) & 0xf])
+            else:
+                out.write(byte)
+        out.write(quote)
+
+
+class PyStringObjectPtr(PyBytesObjectPtr):
+    _typename = 'PyStringObject'
+
+
+class PyTupleObjectPtr(PyObjectPtr):
+    _typename = 'PyTupleObject'
+
+    def __getitem__(self, i):
+        # Get the gdb.Value for the (PyObject*) with the given index:
+        field_ob_item = self.field('ob_item')
+        return field_ob_item[i]
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('(...)')
+        visited.add(self.as_address())
+
         result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
                        for i in safe_range(int_from_int(self.field('ob_size'))))
-        return result 
- 
-    def write_repr(self, out, visited): 
-        # Guard against infinite loops: 
-        if self.as_address() in visited: 
-            out.write('(...)') 
-            return 
-        visited.add(self.as_address()) 
- 
-        out.write('(') 
-        for i in safe_range(int_from_int(self.field('ob_size'))): 
-            if i > 0: 
-                out.write(', ') 
-            element = PyObjectPtr.from_pyobject_ptr(self[i]) 
-            element.write_repr(out, visited) 
-        if self.field('ob_size') == 1: 
-            out.write(',)') 
-        else: 
-            out.write(')') 
- 
+        return result
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('(...)')
+            return
+        visited.add(self.as_address())
+
+        out.write('(')
+        for i in safe_range(int_from_int(self.field('ob_size'))):
+            if i > 0:
+                out.write(', ')
+            element = PyObjectPtr.from_pyobject_ptr(self[i])
+            element.write_repr(out, visited)
+        if self.field('ob_size') == 1:
+            out.write(',)')
+        else:
+            out.write(')')
+
 class PyTypeObjectPtr(PyObjectPtr):
     _typename = 'PyTypeObject'
- 
-
-def _unichr_is_printable(char): 
-    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 
-    if char == u" ": 
-        return True 
-    import unicodedata 
-    return unicodedata.category(char) not in ("C", "Z") 
- 
-if sys.maxunicode >= 0x10000: 
+
+
+def _unichr_is_printable(char):
+    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+    if char == u" ":
+        return True
+    import unicodedata
+    return unicodedata.category(char) not in ("C", "Z")
+
+if sys.maxunicode >= 0x10000:
     _unichr = unichr
-else: 
-    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 
-    def _unichr(x): 
-        if x < 0x10000: 
-            return unichr(x) 
-        x -= 0x10000 
-        ch1 = 0xD800 | (x >> 10) 
-        ch2 = 0xDC00 | (x & 0x3FF) 
-        return unichr(ch1) + unichr(ch2) 
- 
- 
-class PyUnicodeObjectPtr(PyObjectPtr): 
-    _typename = 'PyUnicodeObject' 
- 
-    def char_width(self): 
-        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 
-        return _type_Py_UNICODE.sizeof 
- 
-    def proxyval(self, visited): 
+else:
+    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
+    def _unichr(x):
+        if x < 0x10000:
+            return unichr(x)
+        x -= 0x10000
+        ch1 = 0xD800 | (x >> 10)
+        ch2 = 0xDC00 | (x & 0x3FF)
+        return unichr(ch1) + unichr(ch2)
+
+
+class PyUnicodeObjectPtr(PyObjectPtr):
+    _typename = 'PyUnicodeObject'
+
+    def char_width(self):
+        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
+        return _type_Py_UNICODE.sizeof
+
+    def proxyval(self, visited):
         global _is_pep393
         if _is_pep393 is None:
             fields = gdb.lookup_type('PyUnicodeObject').target().fields()
@@ -1200,160 +1200,160 @@ class PyUnicodeObjectPtr(PyObjectPtr):
             field_length = long(self.field('length'))
             field_str = self.field('str')
             may_have_surrogates = self.char_width() == 2
- 
-        # Gather a list of ints from the Py_UNICODE array; these are either 
+
+        # Gather a list of ints from the Py_UNICODE array; these are either
         # UCS-1, UCS-2 or UCS-4 code points:
         if not may_have_surrogates:
-            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 
-        else: 
-            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 
-            # inferior process: we must join surrogate pairs. 
-            Py_UNICODEs = [] 
-            i = 0 
-            limit = safety_limit(field_length) 
-            while i < limit: 
-                ucs = int(field_str[i]) 
-                i += 1 
-                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 
-                    Py_UNICODEs.append(ucs) 
-                    continue 
-                # This could be a surrogate pair. 
-                ucs2 = int(field_str[i]) 
-                if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 
-                    continue 
-                code = (ucs & 0x03FF) << 10 
-                code |= ucs2 & 0x03FF 
-                code += 0x00010000 
-                Py_UNICODEs.append(code) 
-                i += 1 
- 
-        # Convert the int code points to unicode characters, and generate a 
-        # local unicode instance. 
-        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 
+            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+        else:
+            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
+            # inferior process: we must join surrogate pairs.
+            Py_UNICODEs = []
+            i = 0
+            limit = safety_limit(field_length)
+            while i < limit:
+                ucs = int(field_str[i])
+                i += 1
+                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
+                    Py_UNICODEs.append(ucs)
+                    continue
+                # This could be a surrogate pair.
+                ucs2 = int(field_str[i])
+                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
+                    continue
+                code = (ucs & 0x03FF) << 10
+                code |= ucs2 & 0x03FF
+                code += 0x00010000
+                Py_UNICODEs.append(code)
+                i += 1
+
+        # Convert the int code points to unicode characters, and generate a
+        # local unicode instance.
+        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
         result = u''.join([
             (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd')
             for ucs in Py_UNICODEs])
-        return result 
- 
-    def write_repr(self, out, visited): 
+        return result
+
+    def write_repr(self, out, visited):
         # Write this out as a Python 3 str literal, i.e. without a "u" prefix
 
-        # Get a PyUnicodeObject* within the Python 2 gdb process: 
-        proxy = self.proxyval(visited) 
- 
-        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 
-        # to Python 2: 
-        if "'" in proxy and '"' not in proxy: 
-            quote = '"' 
-        else: 
-            quote = "'" 
-        out.write(quote) 
- 
-        i = 0 
-        while i < len(proxy): 
-            ch = proxy[i] 
-            i += 1 
- 
-            # Escape quotes and backslashes 
-            if ch == quote or ch == '\\': 
-                out.write('\\') 
-                out.write(ch) 
- 
-            #  Map special whitespace to '\t', \n', '\r' 
-            elif ch == '\t': 
-                out.write('\\t') 
-            elif ch == '\n': 
-                out.write('\\n') 
-            elif ch == '\r': 
-                out.write('\\r') 
- 
-            # Map non-printable US ASCII to '\xhh' */ 
-            elif ch < ' ' or ch == 0x7F: 
-                out.write('\\x') 
-                out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 
-                out.write(hexdigits[ord(ch) & 0x000F]) 
- 
-            # Copy ASCII characters as-is 
-            elif ord(ch) < 0x7F: 
-                out.write(ch) 
- 
-            # Non-ASCII characters 
-            else: 
-                ucs = ch 
-                ch2 = None 
-                if sys.maxunicode < 0x10000: 
-                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join 
-                    # surrogate pairs before calling _unichr_is_printable. 
-                    if (i < len(proxy) 
-                    and 0xD800 <= ord(ch) < 0xDC00 \ 
-                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): 
-                        ch2 = proxy[i] 
-                        ucs = ch + ch2 
-                        i += 1 
- 
-                # Unfortuately, Python 2's unicode type doesn't seem 
-                # to expose the "isprintable" method 
-                printable = _unichr_is_printable(ucs) 
-                if printable: 
-                    try: 
-                        ucs.encode(ENCODING) 
-                    except UnicodeEncodeError: 
-                        printable = False 
- 
-                # Map Unicode whitespace and control characters 
-                # (categories Z* and C* except ASCII space) 
-                if not printable: 
-                    if ch2 is not None: 
-                        # Match Python 3's representation of non-printable 
-                        # wide characters. 
-                        code = (ord(ch) & 0x03FF) << 10 
-                        code |= ord(ch2) & 0x03FF 
-                        code += 0x00010000 
-                    else: 
-                        code = ord(ucs) 
- 
-                    # Map 8-bit characters to '\\xhh' 
-                    if code <= 0xff: 
-                        out.write('\\x') 
-                        out.write(hexdigits[(code >> 4) & 0x000F]) 
-                        out.write(hexdigits[code & 0x000F]) 
-                    # Map 21-bit characters to '\U00xxxxxx' 
-                    elif code >= 0x10000: 
-                        out.write('\\U') 
-                        out.write(hexdigits[(code >> 28) & 0x0000000F]) 
-                        out.write(hexdigits[(code >> 24) & 0x0000000F]) 
-                        out.write(hexdigits[(code >> 20) & 0x0000000F]) 
-                        out.write(hexdigits[(code >> 16) & 0x0000000F]) 
-                        out.write(hexdigits[(code >> 12) & 0x0000000F]) 
-                        out.write(hexdigits[(code >> 8) & 0x0000000F]) 
-                        out.write(hexdigits[(code >> 4) & 0x0000000F]) 
-                        out.write(hexdigits[code & 0x0000000F]) 
-                    # Map 16-bit characters to '\uxxxx' 
-                    else: 
-                        out.write('\\u') 
-                        out.write(hexdigits[(code >> 12) & 0x000F]) 
-                        out.write(hexdigits[(code >> 8) & 0x000F]) 
-                        out.write(hexdigits[(code >> 4) & 0x000F]) 
-                        out.write(hexdigits[code & 0x000F]) 
-                else: 
-                    # Copy characters as-is 
-                    out.write(ch) 
-                    if ch2 is not None: 
-                        out.write(ch2) 
- 
-        out.write(quote) 
- 
- 
+        # Get a PyUnicodeObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+        # to Python 2:
+        if "'" in proxy and '"' not in proxy:
+            quote = '"'
+        else:
+            quote = "'"
+        out.write(quote)
+
+        i = 0
+        while i < len(proxy):
+            ch = proxy[i]
+            i += 1
+
+            # Escape quotes and backslashes
+            if ch == quote or ch == '\\':
+                out.write('\\')
+                out.write(ch)
+
+            #  Map special whitespace to '\t', \n', '\r'
+            elif ch == '\t':
+                out.write('\\t')
+            elif ch == '\n':
+                out.write('\\n')
+            elif ch == '\r':
+                out.write('\\r')
+
+            # Map non-printable US ASCII to '\xhh' */
+            elif ch < ' ' or ch == 0x7F:
+                out.write('\\x')
+                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+                out.write(hexdigits[ord(ch) & 0x000F])
+
+            # Copy ASCII characters as-is
+            elif ord(ch) < 0x7F:
+                out.write(ch)
+
+            # Non-ASCII characters
+            else:
+                ucs = ch
+                ch2 = None
+                if sys.maxunicode < 0x10000:
+                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
+                    # surrogate pairs before calling _unichr_is_printable.
+                    if (i < len(proxy)
+                    and 0xD800 <= ord(ch) < 0xDC00 \
+                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+                        ch2 = proxy[i]
+                        ucs = ch + ch2
+                        i += 1
+
+                # Unfortuately, Python 2's unicode type doesn't seem
+                # to expose the "isprintable" method
+                printable = _unichr_is_printable(ucs)
+                if printable:
+                    try:
+                        ucs.encode(ENCODING)
+                    except UnicodeEncodeError:
+                        printable = False
+
+                # Map Unicode whitespace and control characters
+                # (categories Z* and C* except ASCII space)
+                if not printable:
+                    if ch2 is not None:
+                        # Match Python 3's representation of non-printable
+                        # wide characters.
+                        code = (ord(ch) & 0x03FF) << 10
+                        code |= ord(ch2) & 0x03FF
+                        code += 0x00010000
+                    else:
+                        code = ord(ucs)
+
+                    # Map 8-bit characters to '\\xhh'
+                    if code <= 0xff:
+                        out.write('\\x')
+                        out.write(hexdigits[(code >> 4) & 0x000F])
+                        out.write(hexdigits[code & 0x000F])
+                    # Map 21-bit characters to '\U00xxxxxx'
+                    elif code >= 0x10000:
+                        out.write('\\U')
+                        out.write(hexdigits[(code >> 28) & 0x0000000F])
+                        out.write(hexdigits[(code >> 24) & 0x0000000F])
+                        out.write(hexdigits[(code >> 20) & 0x0000000F])
+                        out.write(hexdigits[(code >> 16) & 0x0000000F])
+                        out.write(hexdigits[(code >> 12) & 0x0000000F])
+                        out.write(hexdigits[(code >> 8) & 0x0000000F])
+                        out.write(hexdigits[(code >> 4) & 0x0000000F])
+                        out.write(hexdigits[code & 0x0000000F])
+                    # Map 16-bit characters to '\uxxxx'
+                    else:
+                        out.write('\\u')
+                        out.write(hexdigits[(code >> 12) & 0x000F])
+                        out.write(hexdigits[(code >> 8) & 0x000F])
+                        out.write(hexdigits[(code >> 4) & 0x000F])
+                        out.write(hexdigits[code & 0x000F])
+                else:
+                    # Copy characters as-is
+                    out.write(ch)
+                    if ch2 is not None:
+                        out.write(ch2)
+
+        out.write(quote)
+
+
 class wrapperobject(PyObjectPtr):
     _typename = 'wrapperobject'
- 
+
     def safe_name(self):
         try:
             name = self.field('descr')['d_base']['name'].string()
             return repr(name)
         except (NullPyObjectPtr, RuntimeError):
             return '<unknown name>'
- 
+
     def safe_tp_name(self):
         try:
             return self.field('self')['ob_type']['tp_name'].string()
@@ -1379,124 +1379,124 @@ class wrapperobject(PyObjectPtr):
         out.write(proxy)
 
 
-def int_from_int(gdbval): 
-    return int(str(gdbval)) 
- 
- 
-def stringify(val): 
-    # TODO: repr() puts everything on one line; pformat can be nicer, but 
-    # can lead to v.long results; this function isolates the choice 
-    if True: 
-        return repr(val) 
-    else: 
-        from pprint import pformat 
-        return pformat(val) 
- 
- 
-class PyObjectPtrPrinter: 
-    "Prints a (PyObject*)" 
- 
-    def __init__ (self, gdbval): 
-        self.gdbval = gdbval 
- 
-    def to_string (self): 
-        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 
-        if True: 
-            return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 
-        else: 
-            # Generate full proxy value then stringify it. 
-            # Doing so could be expensive 
-            proxyval = pyop.proxyval(set()) 
-            return stringify(proxyval) 
- 
-def pretty_printer_lookup(gdbval): 
-    type = gdbval.type.unqualified() 
+def int_from_int(gdbval):
+    return int(str(gdbval))
+
+
+def stringify(val):
+    # TODO: repr() puts everything on one line; pformat can be nicer, but
+    # can lead to v.long results; this function isolates the choice
+    if True:
+        return repr(val)
+    else:
+        from pprint import pformat
+        return pformat(val)
+
+
+class PyObjectPtrPrinter:
+    "Prints a (PyObject*)"
+
+    def __init__ (self, gdbval):
+        self.gdbval = gdbval
+
+    def to_string (self):
+        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
+        if True:
+            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+        else:
+            # Generate full proxy value then stringify it.
+            # Doing so could be expensive
+            proxyval = pyop.proxyval(set())
+            return stringify(proxyval)
+
+def pretty_printer_lookup(gdbval):
+    type = gdbval.type.unqualified()
     if type.code != gdb.TYPE_CODE_PTR:
         return None
- 
+
     type = type.target().unqualified()
     t = str(type)
     if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"):
         return PyObjectPtrPrinter(gdbval)
 
-""" 
-During development, I've been manually invoking the code in this way: 
-(gdb) python 
- 
-import sys 
-sys.path.append('/home/david/coding/python-gdb') 
-import libpython 
-end 
- 
-then reloading it after each edit like this: 
-(gdb) python reload(libpython) 
- 
-The following code should ensure that the prettyprinter is registered 
-if the code is autoloaded by gdb when visiting libpython.so, provided 
-that this python file is installed to the same path as the library (or its 
-.debug file) plus a "-gdb.py" suffix, e.g: 
-  /usr/lib/libpython2.6.so.1.0-gdb.py 
-  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 
-""" 
+"""
+During development, I've been manually invoking the code in this way:
+(gdb) python
+
+import sys
+sys.path.append('/home/david/coding/python-gdb')
+import libpython
+end
+
+then reloading it after each edit like this:
+(gdb) python reload(libpython)
+
+The following code should ensure that the prettyprinter is registered
+if the code is autoloaded by gdb when visiting libpython.so, provided
+that this python file is installed to the same path as the library (or its
+.debug file) plus a "-gdb.py" suffix, e.g:
+  /usr/lib/libpython2.6.so.1.0-gdb.py
+  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
+"""
 def register (obj):
-    if obj is None: 
-        obj = gdb 
- 
-    # Wire up the pretty-printer 
-    obj.pretty_printers.append(pretty_printer_lookup) 
- 
+    if obj is None:
+        obj = gdb
+
+    # Wire up the pretty-printer
+    obj.pretty_printers.append(pretty_printer_lookup)
+
 register (gdb.current_objfile ())
- 
-
-
-# Unfortunately, the exact API exposed by the gdb module varies somewhat 
-# from build to build 
-# See http://bugs.python.org/issue8279?#msg102276 
- 
-class Frame(object): 
-    ''' 
-    Wrapper for gdb.Frame, adding various methods 
-    ''' 
-    def __init__(self, gdbframe): 
-        self._gdbframe = gdbframe 
- 
-    def older(self): 
-        older = self._gdbframe.older() 
-        if older: 
-            return Frame(older) 
-        else: 
-            return None 
- 
-    def newer(self): 
-        newer = self._gdbframe.newer() 
-        if newer: 
-            return Frame(newer) 
-        else: 
-            return None 
- 
-    def select(self): 
-        '''If supported, select this frame and return True; return False if unsupported 
- 
-        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 
-        onwards, but absent on Ubuntu buildbot''' 
-        if not hasattr(self._gdbframe, 'select'): 
-            print ('Unable to select frame: ' 
-                   'this build of gdb does not expose a gdb.Frame.select method') 
-            return False 
-        self._gdbframe.select() 
-        return True 
- 
-    def get_index(self): 
-        '''Calculate index of frame, starting at 0 for the newest frame within 
-        this thread''' 
-        index = 0 
-        # Go down until you reach the newest frame: 
-        iter_frame = self 
-        while iter_frame.newer(): 
-            index += 1 
-            iter_frame = iter_frame.newer() 
-        return index 
- 
+
+
+
+# Unfortunately, the exact API exposed by the gdb module varies somewhat
+# from build to build
+# See http://bugs.python.org/issue8279?#msg102276
+
+class Frame(object):
+    '''
+    Wrapper for gdb.Frame, adding various methods
+    '''
+    def __init__(self, gdbframe):
+        self._gdbframe = gdbframe
+
+    def older(self):
+        older = self._gdbframe.older()
+        if older:
+            return Frame(older)
+        else:
+            return None
+
+    def newer(self):
+        newer = self._gdbframe.newer()
+        if newer:
+            return Frame(newer)
+        else:
+            return None
+
+    def select(self):
+        '''If supported, select this frame and return True; return False if unsupported
+
+        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
+        onwards, but absent on Ubuntu buildbot'''
+        if not hasattr(self._gdbframe, 'select'):
+            print ('Unable to select frame: '
+                   'this build of gdb does not expose a gdb.Frame.select method')
+            return False
+        self._gdbframe.select()
+        return True
+
+    def get_index(self):
+        '''Calculate index of frame, starting at 0 for the newest frame within
+        this thread'''
+        index = 0
+        # Go down until you reach the newest frame:
+        iter_frame = self
+        while iter_frame.newer():
+            index += 1
+            iter_frame = iter_frame.newer()
+        return index
+
     # We divide frames into:
     #   - "python frames":
     #       - "bytecode frames" i.e. PyEval_EvalFrameEx
@@ -1517,19 +1517,19 @@ class Frame(object):
     def is_evalframe(self):
         '''Is this a _PyEval_EvalFrameDefault frame?'''
         if self._gdbframe.name() == EVALFRAME:
-            ''' 
-            I believe we also need to filter on the inline 
-            struct frame_id.inline_depth, only regarding frames with 
-            an inline depth of 0 as actually being this function 
- 
-            So we reject those with type gdb.INLINE_FRAME 
-            ''' 
-            if self._gdbframe.type() == gdb.NORMAL_FRAME: 
+            '''
+            I believe we also need to filter on the inline
+            struct frame_id.inline_depth, only regarding frames with
+            an inline depth of 0 as actually being this function
+
+            So we reject those with type gdb.INLINE_FRAME
+            '''
+            if self._gdbframe.type() == gdb.NORMAL_FRAME:
                 # We have a _PyEval_EvalFrameDefault frame:
-                return True 
- 
-        return False 
- 
+                return True
+
+        return False
+
     def is_other_python_frame(self):
         '''Is this frame worth displaying in python backtraces?
         Examples:
@@ -1541,10 +1541,10 @@ class Frame(object):
          '''
         if self.is_waiting_for_gil():
             return 'Waiting for the GIL'
- 
+
         if self.is_gc_collect():
             return 'Garbage-collecting'
- 
+
         # Detect invocations of PyCFunction instances:
         frame = self._gdbframe
         caller = frame.name()
@@ -1559,23 +1559,23 @@ class Frame(object):
             # PyCFunctionObject instance
             #   "f" is the same value, but cast to (PyCFunctionObject*)
             #   "self" is the (PyObject*) of the 'self'
-            try: 
+            try:
                 # Use the prettyprinter for the func:
                 func = frame.read_var(arg_name)
                 return str(func)
             except RuntimeError:
                 return 'PyCFunction invocation (unable to read %s)' % arg_name
- 
+
         if caller == 'wrapper_call':
             try:
                 func = frame.read_var('wp')
                 return str(func)
             except RuntimeError:
                 return '<wrapper_call invocation>'
- 
+
         # This frame isn't worth reporting:
         return False
- 
+
     def is_waiting_for_gil(self):
         '''Is this frame waiting on the GIL?'''
         # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
@@ -1587,8 +1587,8 @@ class Frame(object):
         '''Is this frame "collect" within the garbage-collector?'''
         return self._gdbframe.name() == 'collect'
 
-    def get_pyop(self): 
-        try: 
+    def get_pyop(self):
+        try:
             f = self._gdbframe.read_var('f')
             frame = PyFrameObjectPtr.from_pyobject_ptr(f)
             if not frame.is_optimized_out():
@@ -1605,17 +1605,17 @@ class Frame(object):
                     return frame
             return orig_frame
         except ValueError:
-            return None 
- 
-    @classmethod 
-    def get_selected_frame(cls): 
-        _gdbframe = gdb.selected_frame() 
-        if _gdbframe: 
-            return Frame(_gdbframe) 
-        return None 
- 
-    @classmethod 
-    def get_selected_python_frame(cls): 
+            return None
+
+    @classmethod
+    def get_selected_frame(cls):
+        _gdbframe = gdb.selected_frame()
+        if _gdbframe:
+            return Frame(_gdbframe)
+        return None
+
+    @classmethod
+    def get_selected_python_frame(cls):
         '''Try to obtain the Frame for the python-related code in the selected
         frame, or None'''
         try:
@@ -1636,35 +1636,35 @@ class Frame(object):
     def get_selected_bytecode_frame(cls):
         '''Try to obtain the Frame for the python bytecode interpreter in the
         selected GDB frame, or None'''
-        frame = cls.get_selected_frame() 
- 
-        while frame: 
+        frame = cls.get_selected_frame()
+
+        while frame:
             if frame.is_evalframe():
-                return frame 
-            frame = frame.older() 
- 
-        # Not found: 
-        return None 
- 
-    def print_summary(self): 
+                return frame
+            frame = frame.older()
+
+        # Not found:
+        return None
+
+    def print_summary(self):
         if self.is_evalframe():
-            pyop = self.get_pyop() 
-            if pyop: 
-                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) 
-                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) 
+            pyop = self.get_pyop()
+            if pyop:
+                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
                 if not pyop.is_optimized_out():
                     line = pyop.current_line()
                     if line is not None:
                         sys.stdout.write('    %s\n' % line.strip())
-            else: 
-                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 
-        else: 
+            else:
+                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
+        else:
             info = self.is_other_python_frame()
             if info:
                 sys.stdout.write('#%i %s\n' % (self.get_index(), info))
             else:
                 sys.stdout.write('#%i\n' % self.get_index())
- 
+
     def print_traceback(self):
         if self.is_evalframe():
             pyop = self.get_pyop()
@@ -1682,62 +1682,62 @@ class Frame(object):
                 sys.stdout.write('  %s\n' % info)
             else:
                 sys.stdout.write('  (not a python frame)\n')
- 
-class PyList(gdb.Command): 
-    '''List the current Python source code, if any 
- 
-    Use 
-       py-list START 
-    to list at a different line number within the python source. 
- 
-    Use 
-       py-list START, END 
-    to list a specific range of lines within the python source. 
-    ''' 
- 
-    def __init__(self): 
-        gdb.Command.__init__ (self, 
-                              "py-list", 
-                              gdb.COMMAND_FILES, 
-                              gdb.COMPLETE_NONE) 
- 
-
-    def invoke(self, args, from_tty): 
-        import re 
- 
-        start = None 
-        end = None 
- 
-        m = re.match(r'\s*(\d+)\s*', args) 
-        if m: 
-            start = int(m.group(0)) 
-            end = start + 10 
- 
-        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 
-        if m: 
-            start, end = map(int, m.groups()) 
- 
+
+class PyList(gdb.Command):
+    '''List the current Python source code, if any
+
+    Use
+       py-list START
+    to list at a different line number within the python source.
+
+    Use
+       py-list START, END
+    to list a specific range of lines within the python source.
+    '''
+
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-list",
+                              gdb.COMMAND_FILES,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        import re
+
+        start = None
+        end = None
+
+        m = re.match(r'\s*(\d+)\s*', args)
+        if m:
+            start = int(m.group(0))
+            end = start + 10
+
+        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
+        if m:
+            start, end = map(int, m.groups())
+
         # py-list requires an actual PyEval_EvalFrameEx frame:
         frame = Frame.get_selected_bytecode_frame()
-        if not frame: 
+        if not frame:
             print('Unable to locate gdb frame for python bytecode interpreter')
-            return 
- 
-        pyop = frame.get_pyop() 
+            return
+
+        pyop = frame.get_pyop()
         if not pyop or pyop.is_optimized_out():
-            print('Unable to read information on python frame') 
-            return 
- 
-        filename = pyop.filename() 
-        lineno = pyop.current_line_num() 
- 
-        if start is None: 
-            start = lineno - 5 
-            end = lineno + 5 
- 
-        if start<1: 
-            start = 1 
- 
+            print('Unable to read information on python frame')
+            return
+
+        filename = pyop.filename()
+        lineno = pyop.current_line_num()
+
+        if start is None:
+            start = lineno - 5
+            end = lineno + 5
+
+        if start<1:
+            start = 1
+
         try:
             f = open(os_fsencode(filename), 'r')
         except IOError as err:
@@ -1745,79 +1745,79 @@ class PyList(gdb.Command):
                              % (filename, err))
             return
         with f:
-            all_lines = f.readlines() 
-            # start and end are 1-based, all_lines is 0-based; 
-            # so [start-1:end] as a python slice gives us [start, end] as a 
-            # closed interval 
-            for i, line in enumerate(all_lines[start-1:end]): 
-                linestr = str(i+start) 
-                # Highlight current line: 
-                if i + start == lineno: 
-                    linestr = '>' + linestr 
-                sys.stdout.write('%4s    %s' % (linestr, line)) 
- 
-
-# ...and register the command: 
-PyList() 
- 
-def move_in_stack(move_up): 
-    '''Move up or down the stack (for the py-up/py-down command)''' 
-    frame = Frame.get_selected_python_frame() 
+            all_lines = f.readlines()
+            # start and end are 1-based, all_lines is 0-based;
+            # so [start-1:end] as a python slice gives us [start, end] as a
+            # closed interval
+            for i, line in enumerate(all_lines[start-1:end]):
+                linestr = str(i+start)
+                # Highlight current line:
+                if i + start == lineno:
+                    linestr = '>' + linestr
+                sys.stdout.write('%4s    %s' % (linestr, line))
+
+
+# ...and register the command:
+PyList()
+
+def move_in_stack(move_up):
+    '''Move up or down the stack (for the py-up/py-down command)'''
+    frame = Frame.get_selected_python_frame()
     if not frame:
         print('Unable to locate python frame')
         return
 
-    while frame: 
-        if move_up: 
-            iter_frame = frame.older() 
-        else: 
-            iter_frame = frame.newer() 
- 
-        if not iter_frame: 
-            break 
- 
+    while frame:
+        if move_up:
+            iter_frame = frame.older()
+        else:
+            iter_frame = frame.newer()
+
+        if not iter_frame:
+            break
+
         if iter_frame.is_python_frame():
-            # Result: 
-            if iter_frame.select(): 
-                iter_frame.print_summary() 
-            return 
- 
-        frame = iter_frame 
- 
-    if move_up: 
-        print('Unable to find an older python frame') 
-    else: 
-        print('Unable to find a newer python frame') 
- 
-class PyUp(gdb.Command): 
-    'Select and print the python stack frame that called this one (if any)' 
-    def __init__(self): 
-        gdb.Command.__init__ (self, 
-                              "py-up", 
-                              gdb.COMMAND_STACK, 
-                              gdb.COMPLETE_NONE) 
- 
-
-    def invoke(self, args, from_tty): 
-        move_in_stack(move_up=True) 
- 
-class PyDown(gdb.Command): 
-    'Select and print the python stack frame called by this one (if any)' 
-    def __init__(self): 
-        gdb.Command.__init__ (self, 
-                              "py-down", 
-                              gdb.COMMAND_STACK, 
-                              gdb.COMPLETE_NONE) 
- 
-
-    def invoke(self, args, from_tty): 
-        move_in_stack(move_up=False) 
- 
-# Not all builds of gdb have gdb.Frame.select 
-if hasattr(gdb.Frame, 'select'): 
-    PyUp() 
-    PyDown() 
- 
+            # Result:
+            if iter_frame.select():
+                iter_frame.print_summary()
+            return
+
+        frame = iter_frame
+
+    if move_up:
+        print('Unable to find an older python frame')
+    else:
+        print('Unable to find a newer python frame')
+
+class PyUp(gdb.Command):
+    'Select and print the python stack frame that called this one (if any)'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-up",
+                              gdb.COMMAND_STACK,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        move_in_stack(move_up=True)
+
+class PyDown(gdb.Command):
+    'Select and print the python stack frame called by this one (if any)'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-down",
+                              gdb.COMMAND_STACK,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        move_in_stack(move_up=False)
+
+# Not all builds of gdb have gdb.Frame.select
+if hasattr(gdb.Frame, 'select'):
+    PyUp()
+    PyDown()
+
 class PyBacktraceFull(gdb.Command):
     'Display the current python frame and all the frames within its call stack (if any)'
     def __init__(self):
@@ -1825,7 +1825,7 @@ class PyBacktraceFull(gdb.Command):
                               "py-bt-full",
                               gdb.COMMAND_STACK,
                               gdb.COMPLETE_NONE)
- 
+
 
     def invoke(self, args, from_tty):
         frame = Frame.get_selected_python_frame()
@@ -1840,101 +1840,101 @@ class PyBacktraceFull(gdb.Command):
 
 PyBacktraceFull()
 
-class PyBacktrace(gdb.Command): 
-    'Display the current python frame and all the frames within its call stack (if any)' 
-    def __init__(self): 
-        gdb.Command.__init__ (self, 
-                              "py-bt", 
-                              gdb.COMMAND_STACK, 
-                              gdb.COMPLETE_NONE) 
- 
- 
-    def invoke(self, args, from_tty): 
-        frame = Frame.get_selected_python_frame() 
+class PyBacktrace(gdb.Command):
+    'Display the current python frame and all the frames within its call stack (if any)'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-bt",
+                              gdb.COMMAND_STACK,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        frame = Frame.get_selected_python_frame()
         if not frame:
             print('Unable to locate python frame')
             return
 
         sys.stdout.write('Traceback (most recent call first):\n')
-        while frame: 
+        while frame:
             if frame.is_python_frame():
                 frame.print_traceback()
-            frame = frame.older() 
- 
-PyBacktrace() 
- 
-class PyPrint(gdb.Command): 
-    'Look up the given python variable name, and print it' 
-    def __init__(self): 
-        gdb.Command.__init__ (self, 
-                              "py-print", 
-                              gdb.COMMAND_DATA, 
-                              gdb.COMPLETE_NONE) 
- 
-
-    def invoke(self, args, from_tty): 
-        name = str(args) 
- 
-        frame = Frame.get_selected_python_frame() 
-        if not frame: 
-            print('Unable to locate python frame') 
-            return 
- 
-        pyop_frame = frame.get_pyop() 
-        if not pyop_frame: 
-            print('Unable to read information on python frame') 
-            return 
- 
-        pyop_var, scope = pyop_frame.get_var_by_name(name) 
- 
-        if pyop_var: 
+            frame = frame.older()
+
+PyBacktrace()
+
+class PyPrint(gdb.Command):
+    'Look up the given python variable name, and print it'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-print",
+                              gdb.COMMAND_DATA,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        name = str(args)
+
+        frame = Frame.get_selected_python_frame()
+        if not frame:
+            print('Unable to locate python frame')
+            return
+
+        pyop_frame = frame.get_pyop()
+        if not pyop_frame:
+            print('Unable to read information on python frame')
+            return
+
+        pyop_var, scope = pyop_frame.get_var_by_name(name)
+
+        if pyop_var:
             print('%s %r = %s'
                    % (scope,
                       name,
                       pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
-        else: 
-            print('%r not found' % name) 
- 
-PyPrint() 
- 
-class PyLocals(gdb.Command): 
-    'Look up the given python variable name, and print it' 
+        else:
+            print('%r not found' % name)
+
+PyPrint()
+
+class PyLocals(gdb.Command):
+    'Look up the given python variable name, and print it'
     def __init__(self, command="py-locals"):
         gdb.Command.__init__ (self,
                               command,
                               gdb.COMMAND_DATA,
                               gdb.COMPLETE_NONE)
- 
-
-    def invoke(self, args, from_tty): 
-        name = str(args) 
- 
-        frame = Frame.get_selected_python_frame() 
-        if not frame: 
-            print('Unable to locate python frame') 
-            return 
- 
-        pyop_frame = frame.get_pyop() 
-        if not pyop_frame: 
-            print('Unable to read information on python frame') 
-            return 
- 
-        namespace = self.get_namespace(pyop_frame) 
-        namespace = [(name.proxyval(set()), val) for name, val in namespace] 
- 
-        if namespace: 
-            name, val = max(namespace, key=lambda item: len(item[0])) 
-            max_name_length = len(name) 
- 
-            for name, pyop_value in namespace: 
-                value = pyop_value.get_truncated_repr(MAX_OUTPUT_LEN) 
-                print('%-*s = %s' % (max_name_length, name, value)) 
- 
-    def get_namespace(self, pyop_frame): 
-        return pyop_frame.iter_locals() 
- 
+
+
+    def invoke(self, args, from_tty):
+        name = str(args)
+
+        frame = Frame.get_selected_python_frame()
+        if not frame:
+            print('Unable to locate python frame')
+            return
+
+        pyop_frame = frame.get_pyop()
+        if not pyop_frame:
+            print('Unable to read information on python frame')
+            return
+
+        namespace = self.get_namespace(pyop_frame)
+        namespace = [(name.proxyval(set()), val) for name, val in namespace]
+
+        if namespace:
+            name, val = max(namespace, key=lambda item: len(item[0]))
+            max_name_length = len(name)
+
+            for name, pyop_value in namespace:
+                value = pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)
+                print('%-*s = %s' % (max_name_length, name, value))
+
+    def get_namespace(self, pyop_frame):
+        return pyop_frame.iter_locals()
+
 PyLocals()
- 
+
 
 ##################################################################
 ## added, not in CPython
@@ -1946,815 +1946,815 @@ import tempfile
 import textwrap
 import itertools
 
-class PyGlobals(PyLocals): 
-    'List all the globals in the currently select Python frame' 
- 
-    def get_namespace(self, pyop_frame): 
-        return pyop_frame.iter_globals() 
- 
- 
+class PyGlobals(PyLocals):
+    'List all the globals in the currently select Python frame'
+
+    def get_namespace(self, pyop_frame):
+        return pyop_frame.iter_globals()
+
+
 PyGlobals("py-globals")
- 
- 
-class PyNameEquals(gdb.Function): 
- 
-    def _get_pycurframe_attr(self, attr): 
-        frame = Frame(gdb.selected_frame()) 
-        if frame.is_evalframeex(): 
-            pyframe = frame.get_pyop() 
-            if pyframe is None: 
-                warnings.warn("Use a Python debug build, Python breakpoints " 
-                              "won't work otherwise.") 
-                return None 
- 
-            return getattr(pyframe, attr).proxyval(set()) 
- 
-        return None 
- 
-    def invoke(self, funcname): 
-        attr = self._get_pycurframe_attr('co_name') 
-        return attr is not None and attr == funcname.string() 
- 
-PyNameEquals("pyname_equals") 
- 
- 
-class PyModEquals(PyNameEquals): 
- 
-    def invoke(self, modname): 
-        attr = self._get_pycurframe_attr('co_filename') 
-        if attr is not None: 
-            filename, ext = os.path.splitext(os.path.basename(attr)) 
-            return filename == modname.string() 
-        return False 
- 
-PyModEquals("pymod_equals") 
- 
- 
-class PyBreak(gdb.Command): 
-    """ 
-    Set a Python breakpoint. Examples: 
- 
-    Break on any function or method named 'func' in module 'modname' 
- 
-        py-break modname.func 
- 
-    Break on any function or method named 'func' 
- 
-        py-break func 
-    """ 
- 
-    def invoke(self, funcname, from_tty): 
-        if '.' in funcname: 
-            modname, dot, funcname = funcname.rpartition('.') 
-            cond = '$pyname_equals("%s") && $pymod_equals("%s")' % (funcname, 
-                                                                    modname) 
-        else: 
-            cond = '$pyname_equals("%s")' % funcname 
- 
-        gdb.execute('break PyEval_EvalFrameEx if ' + cond) 
- 
-PyBreak("py-break", gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE) 
- 
- 
-class _LoggingState(object): 
-    """ 
-    State that helps to provide a reentrant gdb.execute() function. 
-    """ 
- 
-    def __init__(self): 
+
+
+class PyNameEquals(gdb.Function):
+
+    def _get_pycurframe_attr(self, attr):
+        frame = Frame(gdb.selected_frame())
+        if frame.is_evalframeex():
+            pyframe = frame.get_pyop()
+            if pyframe is None:
+                warnings.warn("Use a Python debug build, Python breakpoints "
+                              "won't work otherwise.")
+                return None
+
+            return getattr(pyframe, attr).proxyval(set())
+
+        return None
+
+    def invoke(self, funcname):
+        attr = self._get_pycurframe_attr('co_name')
+        return attr is not None and attr == funcname.string()
+
+PyNameEquals("pyname_equals")
+
+
+class PyModEquals(PyNameEquals):
+
+    def invoke(self, modname):
+        attr = self._get_pycurframe_attr('co_filename')
+        if attr is not None:
+            filename, ext = os.path.splitext(os.path.basename(attr))
+            return filename == modname.string()
+        return False
+
+PyModEquals("pymod_equals")
+
+
+class PyBreak(gdb.Command):
+    """
+    Set a Python breakpoint. Examples:
+
+    Break on any function or method named 'func' in module 'modname'
+
+        py-break modname.func
+
+    Break on any function or method named 'func'
+
+        py-break func
+    """
+
+    def invoke(self, funcname, from_tty):
+        if '.' in funcname:
+            modname, dot, funcname = funcname.rpartition('.')
+            cond = '$pyname_equals("%s") && $pymod_equals("%s")' % (funcname,
+                                                                    modname)
+        else:
+            cond = '$pyname_equals("%s")' % funcname
+
+        gdb.execute('break PyEval_EvalFrameEx if ' + cond)
+
+PyBreak("py-break", gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
+
+
+class _LoggingState(object):
+    """
+    State that helps to provide a reentrant gdb.execute() function.
+    """
+
+    def __init__(self):
         f = tempfile.NamedTemporaryFile('r+')
         self.file = f
         self.filename = f.name
         self.fd = f.fileno()
-        _execute("set logging file %s" % self.filename) 
-        self.file_position_stack = [] 
- 
-    def __enter__(self): 
-        if not self.file_position_stack: 
-            _execute("set logging redirect on") 
-            _execute("set logging on") 
-            _execute("set pagination off") 
- 
-        self.file_position_stack.append(os.fstat(self.fd).st_size) 
-        return self 
- 
-    def getoutput(self): 
-        gdb.flush() 
-        self.file.seek(self.file_position_stack[-1]) 
-        result = self.file.read() 
-        return result 
- 
-    def __exit__(self, exc_type, exc_val, tb): 
-        startpos = self.file_position_stack.pop() 
-        self.file.seek(startpos) 
-        self.file.truncate() 
-        if not self.file_position_stack: 
-            _execute("set logging off") 
-            _execute("set logging redirect off") 
-            _execute("set pagination on") 
- 
- 
-def execute(command, from_tty=False, to_string=False): 
-    """ 
-    Replace gdb.execute() with this function and have it accept a 'to_string' 
-    argument (new in 7.2). Have it properly capture stderr also. Ensure 
-    reentrancy. 
-    """ 
-    if to_string: 
-        with _logging_state as state: 
-            _execute(command, from_tty) 
-            return state.getoutput() 
-    else: 
-        _execute(command, from_tty) 
- 
- 
-_execute = gdb.execute 
-gdb.execute = execute 
-_logging_state = _LoggingState() 
- 
- 
-def get_selected_inferior(): 
-    """ 
-    Return the selected inferior in gdb. 
-    """ 
-    # Woooh, another bug in gdb! Is there an end in sight? 
-    # http://sourceware.org/bugzilla/show_bug.cgi?id=12212 
-    return gdb.inferiors()[0] 
- 
-    selected_thread = gdb.selected_thread() 
- 
-    for inferior in gdb.inferiors(): 
-        for thread in inferior.threads(): 
-            if thread == selected_thread: 
-                return inferior 
- 
- 
-def source_gdb_script(script_contents, to_string=False): 
-    """ 
-    Source a gdb script with script_contents passed as a string. This is useful 
-    to provide defines for py-step and py-next to make them repeatable (this is 
-    not possible with gdb.execute()). See 
-    http://sourceware.org/bugzilla/show_bug.cgi?id=12216 
-    """ 
-    fd, filename = tempfile.mkstemp() 
-    f = os.fdopen(fd, 'w') 
-    f.write(script_contents) 
-    f.close() 
-    gdb.execute("source %s" % filename, to_string=to_string) 
-    os.remove(filename) 
- 
- 
-def register_defines(): 
-    source_gdb_script(textwrap.dedent("""\ 
-        define py-step 
-        -py-step 
-        end 
- 
-        define py-next 
-        -py-next 
-        end 
- 
-        document py-step 
-        %s 
-        end 
- 
-        document py-next 
-        %s 
-        end 
-    """) % (PyStep.__doc__, PyNext.__doc__)) 
- 
- 
-def stackdepth(frame): 
-    "Tells the stackdepth of a gdb frame." 
-    depth = 0 
-    while frame: 
-        frame = frame.older() 
-        depth += 1 
- 
-    return depth 
- 
- 
-class ExecutionControlCommandBase(gdb.Command): 
-    """ 
-    Superclass for language specific execution control. Language specific 
-    features should be implemented by lang_info using the LanguageInfo 
-    interface. 'name' is the name of the command. 
-    """ 
- 
-    def __init__(self, name, lang_info): 
-        super(ExecutionControlCommandBase, self).__init__( 
-                                name, gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE) 
-        self.lang_info = lang_info 
- 
-    def install_breakpoints(self): 
-        all_locations = itertools.chain( 
-            self.lang_info.static_break_functions(), 
-            self.lang_info.runtime_break_functions()) 
- 
-        for location in all_locations: 
-            result = gdb.execute('break %s' % location, to_string=True) 
-            yield re.search(r'Breakpoint (\d+)', result).group(1) 
- 
-    def delete_breakpoints(self, breakpoint_list): 
-        for bp in breakpoint_list: 
-            gdb.execute("delete %s" % bp) 
- 
-    def filter_output(self, result): 
-        reflags = re.MULTILINE 
- 
-        output_on_halt = [ 
-            (r'^Program received signal .*', reflags|re.DOTALL), 
-            (r'.*[Ww]arning.*', 0), 
-            (r'^Program exited .*', reflags), 
-        ] 
- 
-        output_always = [ 
-            # output when halting on a watchpoint 
-            (r'^(Old|New) value = .*', reflags), 
-            # output from the 'display' command 
-            (r'^\d+: \w+ = .*', reflags), 
-        ] 
- 
-        def filter_output(regexes): 
-            output = [] 
-            for regex, flags in regexes: 
-                for match in re.finditer(regex, result, flags): 
-                    output.append(match.group(0)) 
- 
-            return '\n'.join(output) 
- 
-        # Filter the return value output of the 'finish' command 
-        match_finish = re.search(r'^Value returned is \$\d+ = (.*)', result, 
-                                 re.MULTILINE) 
-        if match_finish: 
-            finish_output = 'Value returned: %s\n' % match_finish.group(1) 
-        else: 
-            finish_output = '' 
- 
-        return (filter_output(output_on_halt), 
-                finish_output + filter_output(output_always)) 
- 
-    def stopped(self): 
-        return get_selected_inferior().pid == 0 
- 
-    def finish_executing(self, result): 
-        """ 
-        After doing some kind of code running in the inferior, print the line 
-        of source code or the result of the last executed gdb command (passed 
-        in as the `result` argument). 
-        """ 
-        output_on_halt, output_always = self.filter_output(result) 
- 
-        if self.stopped(): 
-            print(output_always) 
-            print(output_on_halt) 
-        else: 
-            frame = gdb.selected_frame() 
-            source_line = self.lang_info.get_source_line(frame) 
-            if self.lang_info.is_relevant_function(frame): 
-                raised_exception = self.lang_info.exc_info(frame) 
-                if raised_exception: 
-                    print(raised_exception) 
- 
-            if source_line: 
-                if output_always.rstrip(): 
-                    print(output_always.rstrip()) 
-                print(source_line) 
-            else: 
-                print(result) 
- 
-    def _finish(self): 
-        """ 
-        Execute until the function returns (or until something else makes it 
-        stop) 
-        """ 
-        if gdb.selected_frame().older() is not None: 
-            return gdb.execute('finish', to_string=True) 
-        else: 
-            # outermost frame, continue 
-            return gdb.execute('cont', to_string=True) 
- 
-    def _finish_frame(self): 
-        """ 
-        Execute until the function returns to a relevant caller. 
-        """ 
-        while True: 
-            result = self._finish() 
- 
-            try: 
-                frame = gdb.selected_frame() 
-            except RuntimeError: 
-                break 
- 
-            hitbp = re.search(r'Breakpoint (\d+)', result) 
-            is_relevant = self.lang_info.is_relevant_function(frame) 
-            if hitbp or is_relevant or self.stopped(): 
-                break 
- 
-        return result 
- 
-    def finish(self, *args): 
-        "Implements the finish command." 
-        result = self._finish_frame() 
-        self.finish_executing(result) 
- 
-    def step(self, stepinto, stepover_command='next'): 
-        """ 
-        Do a single step or step-over. Returns the result of the last gdb 
-        command that made execution stop. 
- 
-        This implementation, for stepping, sets (conditional) breakpoints for 
-        all functions that are deemed relevant. It then does a step over until 
-        either something halts execution, or until the next line is reached. 
- 
-        If, however, stepover_command is given, it should be a string gdb 
-        command that continues execution in some way. The idea is that the 
-        caller has set a (conditional) breakpoint or watchpoint that can work 
-        more efficiently than the step-over loop. For Python this means setting 
-        a watchpoint for f->f_lasti, which means we can then subsequently 
-        "finish" frames. 
-        We want f->f_lasti instead of f->f_lineno, because the latter only 
-        works properly with local trace functions, see 
-        PyFrameObjectPtr.current_line_num and PyFrameObjectPtr.addr2line. 
-        """ 
-        if stepinto: 
-            breakpoint_list = list(self.install_breakpoints()) 
- 
-        beginframe = gdb.selected_frame() 
- 
-        if self.lang_info.is_relevant_function(beginframe): 
-            # If we start in a relevant frame, initialize stuff properly. If 
-            # we don't start in a relevant frame, the loop will halt 
-            # immediately. So don't call self.lang_info.lineno() as it may 
-            # raise for irrelevant frames. 
-            beginline = self.lang_info.lineno(beginframe) 
- 
-            if not stepinto: 
-                depth = stackdepth(beginframe) 
- 
-        newframe = beginframe 
- 
-        while True: 
-            if self.lang_info.is_relevant_function(newframe): 
-                result = gdb.execute(stepover_command, to_string=True) 
-            else: 
-                result = self._finish_frame() 
- 
-            if self.stopped(): 
-                break 
- 
-            newframe = gdb.selected_frame() 
-            is_relevant_function = self.lang_info.is_relevant_function(newframe) 
-            try: 
-                framename = newframe.name() 
-            except RuntimeError: 
-                framename = None 
- 
-            m = re.search(r'Breakpoint (\d+)', result) 
-            if m: 
-                if is_relevant_function and m.group(1) in breakpoint_list: 
-                    # although we hit a breakpoint, we still need to check 
-                    # that the function, in case hit by a runtime breakpoint, 
-                    # is in the right context 
-                    break 
- 
-            if newframe != beginframe: 
-                # new function 
- 
-                if not stepinto: 
-                    # see if we returned to the caller 
-                    newdepth = stackdepth(newframe) 
-                    is_relevant_function = (newdepth < depth and 
-                                            is_relevant_function) 
- 
-                if is_relevant_function: 
-                    break 
-            else: 
-                # newframe equals beginframe, check for a difference in the 
-                # line number 
-                lineno = self.lang_info.lineno(newframe) 
-                if lineno and lineno != beginline: 
-                    break 
- 
-        if stepinto: 
-            self.delete_breakpoints(breakpoint_list) 
- 
-        self.finish_executing(result) 
- 
-    def run(self, args, from_tty): 
-        self.finish_executing(gdb.execute('run ' + args, to_string=True)) 
- 
-    def cont(self, *args): 
-        self.finish_executing(gdb.execute('cont', to_string=True)) 
- 
- 
-class LanguageInfo(object): 
-    """ 
-    This class defines the interface that ExecutionControlCommandBase needs to 
-    provide language-specific execution control. 
- 
-    Classes that implement this interface should implement: 
- 
-        lineno(frame) 
-            Tells the current line number (only called for a relevant frame). 
-            If lineno is a false value it is not checked for a difference. 
- 
-        is_relevant_function(frame) 
-            tells whether we care about frame 'frame' 
- 
-        get_source_line(frame) 
-            get the line of source code for the current line (only called for a 
-            relevant frame). If the source code cannot be retrieved this 
-            function should return None 
- 
-        exc_info(frame) -- optional 
-            tells whether an exception was raised, if so, it should return a 
-            string representation of the exception value, None otherwise. 
- 
-        static_break_functions() 
-            returns an iterable of function names that are considered relevant 
-            and should halt step-into execution. This is needed to provide a 
-            performing step-into 
- 
-        runtime_break_functions() -- optional 
-            list of functions that we should break into depending on the 
-            context 
-    """ 
- 
-    def exc_info(self, frame): 
-        "See this class' docstring." 
- 
-    def runtime_break_functions(self): 
-        """ 
-        Implement this if the list of step-into functions depends on the 
-        context. 
-        """ 
-        return () 
- 
- 
-class PythonInfo(LanguageInfo): 
- 
-    def pyframe(self, frame): 
-        pyframe = Frame(frame).get_pyop() 
-        if pyframe: 
-            return pyframe 
-        else: 
-            raise gdb.RuntimeError( 
-                "Unable to find the Python frame, run your code with a debug " 
-                "build (configure with --with-pydebug or compile with -g).") 
- 
-    def lineno(self, frame): 
-        return self.pyframe(frame).current_line_num() 
- 
-    def is_relevant_function(self, frame): 
-        return Frame(frame).is_evalframeex() 
- 
-    def get_source_line(self, frame): 
-        try: 
-            pyframe = self.pyframe(frame) 
-            return '%4d    %s' % (pyframe.current_line_num(), 
-                                  pyframe.current_line().rstrip()) 
-        except IOError: 
-            return None 
- 
-    def exc_info(self, frame): 
-        try: 
-            tstate = frame.read_var('tstate').dereference() 
-            if gdb.parse_and_eval('tstate->frame == f'): 
-                # tstate local variable initialized, check for an exception 
-                inf_type = tstate['curexc_type'] 
-                inf_value = tstate['curexc_value'] 
- 
-                if inf_type: 
-                    return 'An exception was raised: %s' % (inf_value,) 
-        except (ValueError, RuntimeError): 
-            # Could not read the variable tstate or it's memory, it's ok 
-            pass 
- 
-    def static_break_functions(self): 
-        yield 'PyEval_EvalFrameEx' 
- 
- 
-class PythonStepperMixin(object): 
-    """ 
-    Make this a mixin so CyStep can also inherit from this and use a 
-    CythonCodeStepper at the same time. 
-    """ 
- 
-    def python_step(self, stepinto): 
-        """ 
-        Set a watchpoint on the Python bytecode instruction pointer and try 
-        to finish the frame 
-        """ 
-        output = gdb.execute('watch f->f_lasti', to_string=True) 
-        watchpoint = int(re.search(r'[Ww]atchpoint (\d+):', output).group(1)) 
-        self.step(stepinto=stepinto, stepover_command='finish') 
-        gdb.execute('delete %s' % watchpoint) 
- 
- 
-class PyStep(ExecutionControlCommandBase, PythonStepperMixin): 
-    "Step through Python code." 
- 
-    stepinto = True 
- 
-    def invoke(self, args, from_tty): 
-        self.python_step(stepinto=self.stepinto) 
- 
- 
-class PyNext(PyStep): 
-    "Step-over Python code." 
- 
-    stepinto = False 
- 
- 
-class PyFinish(ExecutionControlCommandBase): 
-    "Execute until function returns to a caller." 
- 
-    invoke = ExecutionControlCommandBase.finish 
- 
- 
-class PyRun(ExecutionControlCommandBase): 
-    "Run the program." 
- 
-    invoke = ExecutionControlCommandBase.run 
- 
- 
-class PyCont(ExecutionControlCommandBase): 
- 
-    invoke = ExecutionControlCommandBase.cont 
- 
- 
-def _pointervalue(gdbval): 
-    """ 
+        _execute("set logging file %s" % self.filename)
+        self.file_position_stack = []
+
+    def __enter__(self):
+        if not self.file_position_stack:
+            _execute("set logging redirect on")
+            _execute("set logging on")
+            _execute("set pagination off")
+
+        self.file_position_stack.append(os.fstat(self.fd).st_size)
+        return self
+
+    def getoutput(self):
+        gdb.flush()
+        self.file.seek(self.file_position_stack[-1])
+        result = self.file.read()
+        return result
+
+    def __exit__(self, exc_type, exc_val, tb):
+        startpos = self.file_position_stack.pop()
+        self.file.seek(startpos)
+        self.file.truncate()
+        if not self.file_position_stack:
+            _execute("set logging off")
+            _execute("set logging redirect off")
+            _execute("set pagination on")
+
+
+def execute(command, from_tty=False, to_string=False):
+    """
+    Replace gdb.execute() with this function and have it accept a 'to_string'
+    argument (new in 7.2). Have it properly capture stderr also. Ensure
+    reentrancy.
+    """
+    if to_string:
+        with _logging_state as state:
+            _execute(command, from_tty)
+            return state.getoutput()
+    else:
+        _execute(command, from_tty)
+
+
+_execute = gdb.execute
+gdb.execute = execute
+_logging_state = _LoggingState()
+
+
+def get_selected_inferior():
+    """
+    Return the selected inferior in gdb.
+    """
+    # Woooh, another bug in gdb! Is there an end in sight?
+    # http://sourceware.org/bugzilla/show_bug.cgi?id=12212
+    return gdb.inferiors()[0]
+
+    selected_thread = gdb.selected_thread()
+
+    for inferior in gdb.inferiors():
+        for thread in inferior.threads():
+            if thread == selected_thread:
+                return inferior
+
+
+def source_gdb_script(script_contents, to_string=False):
+    """
+    Source a gdb script with script_contents passed as a string. This is useful
+    to provide defines for py-step and py-next to make them repeatable (this is
+    not possible with gdb.execute()). See
+    http://sourceware.org/bugzilla/show_bug.cgi?id=12216
+    """
+    fd, filename = tempfile.mkstemp()
+    f = os.fdopen(fd, 'w')
+    f.write(script_contents)
+    f.close()
+    gdb.execute("source %s" % filename, to_string=to_string)
+    os.remove(filename)
+
+
+def register_defines():
+    source_gdb_script(textwrap.dedent("""\
+        define py-step
+        -py-step
+        end
+
+        define py-next
+        -py-next
+        end
+
+        document py-step
+        %s
+        end
+
+        document py-next
+        %s
+        end
+    """) % (PyStep.__doc__, PyNext.__doc__))
+
+
+def stackdepth(frame):
+    "Tells the stackdepth of a gdb frame."
+    depth = 0
+    while frame:
+        frame = frame.older()
+        depth += 1
+
+    return depth
+
+
+class ExecutionControlCommandBase(gdb.Command):
+    """
+    Superclass for language specific execution control. Language specific
+    features should be implemented by lang_info using the LanguageInfo
+    interface. 'name' is the name of the command.
+    """
+
+    def __init__(self, name, lang_info):
+        super(ExecutionControlCommandBase, self).__init__(
+                                name, gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
+        self.lang_info = lang_info
+
+    def install_breakpoints(self):
+        all_locations = itertools.chain(
+            self.lang_info.static_break_functions(),
+            self.lang_info.runtime_break_functions())
+
+        for location in all_locations:
+            result = gdb.execute('break %s' % location, to_string=True)
+            yield re.search(r'Breakpoint (\d+)', result).group(1)
+
+    def delete_breakpoints(self, breakpoint_list):
+        for bp in breakpoint_list:
+            gdb.execute("delete %s" % bp)
+
+    def filter_output(self, result):
+        reflags = re.MULTILINE
+
+        output_on_halt = [
+            (r'^Program received signal .*', reflags|re.DOTALL),
+            (r'.*[Ww]arning.*', 0),
+            (r'^Program exited .*', reflags),
+        ]
+
+        output_always = [
+            # output when halting on a watchpoint
+            (r'^(Old|New) value = .*', reflags),
+            # output from the 'display' command
+            (r'^\d+: \w+ = .*', reflags),
+        ]
+
+        def filter_output(regexes):
+            output = []
+            for regex, flags in regexes:
+                for match in re.finditer(regex, result, flags):
+                    output.append(match.group(0))
+
+            return '\n'.join(output)
+
+        # Filter the return value output of the 'finish' command
+        match_finish = re.search(r'^Value returned is \$\d+ = (.*)', result,
+                                 re.MULTILINE)
+        if match_finish:
+            finish_output = 'Value returned: %s\n' % match_finish.group(1)
+        else:
+            finish_output = ''
+
+        return (filter_output(output_on_halt),
+                finish_output + filter_output(output_always))
+
+    def stopped(self):
+        return get_selected_inferior().pid == 0
+
+    def finish_executing(self, result):
+        """
+        After doing some kind of code running in the inferior, print the line
+        of source code or the result of the last executed gdb command (passed
+        in as the `result` argument).
+        """
+        output_on_halt, output_always = self.filter_output(result)
+
+        if self.stopped():
+            print(output_always)
+            print(output_on_halt)
+        else:
+            frame = gdb.selected_frame()
+            source_line = self.lang_info.get_source_line(frame)
+            if self.lang_info.is_relevant_function(frame):
+                raised_exception = self.lang_info.exc_info(frame)
+                if raised_exception:
+                    print(raised_exception)
+
+            if source_line:
+                if output_always.rstrip():
+                    print(output_always.rstrip())
+                print(source_line)
+            else:
+                print(result)
+
+    def _finish(self):
+        """
+        Execute until the function returns (or until something else makes it
+        stop)
+        """
+        if gdb.selected_frame().older() is not None:
+            return gdb.execute('finish', to_string=True)
+        else:
+            # outermost frame, continue
+            return gdb.execute('cont', to_string=True)
+
+    def _finish_frame(self):
+        """
+        Execute until the function returns to a relevant caller.
+        """
+        while True:
+            result = self._finish()
+
+            try:
+                frame = gdb.selected_frame()
+            except RuntimeError:
+                break
+
+            hitbp = re.search(r'Breakpoint (\d+)', result)
+            is_relevant = self.lang_info.is_relevant_function(frame)
+            if hitbp or is_relevant or self.stopped():
+                break
+
+        return result
+
+    def finish(self, *args):
+        "Implements the finish command."
+        result = self._finish_frame()
+        self.finish_executing(result)
+
+    def step(self, stepinto, stepover_command='next'):
+        """
+        Do a single step or step-over. Returns the result of the last gdb
+        command that made execution stop.
+
+        This implementation, for stepping, sets (conditional) breakpoints for
+        all functions that are deemed relevant. It then does a step over until
+        either something halts execution, or until the next line is reached.
+
+        If, however, stepover_command is given, it should be a string gdb
+        command that continues execution in some way. The idea is that the
+        caller has set a (conditional) breakpoint or watchpoint that can work
+        more efficiently than the step-over loop. For Python this means setting
+        a watchpoint for f->f_lasti, which means we can then subsequently
+        "finish" frames.
+        We want f->f_lasti instead of f->f_lineno, because the latter only
+        works properly with local trace functions, see
+        PyFrameObjectPtr.current_line_num and PyFrameObjectPtr.addr2line.
+        """
+        if stepinto:
+            breakpoint_list = list(self.install_breakpoints())
+
+        beginframe = gdb.selected_frame()
+
+        if self.lang_info.is_relevant_function(beginframe):
+            # If we start in a relevant frame, initialize stuff properly. If
+            # we don't start in a relevant frame, the loop will halt
+            # immediately. So don't call self.lang_info.lineno() as it may
+            # raise for irrelevant frames.
+            beginline = self.lang_info.lineno(beginframe)
+
+            if not stepinto:
+                depth = stackdepth(beginframe)
+
+        newframe = beginframe
+
+        while True:
+            if self.lang_info.is_relevant_function(newframe):
+                result = gdb.execute(stepover_command, to_string=True)
+            else:
+                result = self._finish_frame()
+
+            if self.stopped():
+                break
+
+            newframe = gdb.selected_frame()
+            is_relevant_function = self.lang_info.is_relevant_function(newframe)
+            try:
+                framename = newframe.name()
+            except RuntimeError:
+                framename = None
+
+            m = re.search(r'Breakpoint (\d+)', result)
+            if m:
+                if is_relevant_function and m.group(1) in breakpoint_list:
+                    # although we hit a breakpoint, we still need to check
+                    # that the function, in case hit by a runtime breakpoint,
+                    # is in the right context
+                    break
+
+            if newframe != beginframe:
+                # new function
+
+                if not stepinto:
+                    # see if we returned to the caller
+                    newdepth = stackdepth(newframe)
+                    is_relevant_function = (newdepth < depth and
+                                            is_relevant_function)
+
+                if is_relevant_function:
+                    break
+            else:
+                # newframe equals beginframe, check for a difference in the
+                # line number
+                lineno = self.lang_info.lineno(newframe)
+                if lineno and lineno != beginline:
+                    break
+
+        if stepinto:
+            self.delete_breakpoints(breakpoint_list)
+
+        self.finish_executing(result)
+
+    def run(self, args, from_tty):
+        self.finish_executing(gdb.execute('run ' + args, to_string=True))
+
+    def cont(self, *args):
+        self.finish_executing(gdb.execute('cont', to_string=True))
+
+
+class LanguageInfo(object):
+    """
+    This class defines the interface that ExecutionControlCommandBase needs to
+    provide language-specific execution control.
+
+    Classes that implement this interface should implement:
+
+        lineno(frame)
+            Tells the current line number (only called for a relevant frame).
+            If lineno is a false value it is not checked for a difference.
+
+        is_relevant_function(frame)
+            tells whether we care about frame 'frame'
+
+        get_source_line(frame)
+            get the line of source code for the current line (only called for a
+            relevant frame). If the source code cannot be retrieved this
+            function should return None
+
+        exc_info(frame) -- optional
+            tells whether an exception was raised, if so, it should return a
+            string representation of the exception value, None otherwise.
+
+        static_break_functions()
+            returns an iterable of function names that are considered relevant
+            and should halt step-into execution. This is needed to provide a
+            performing step-into
+
+        runtime_break_functions() -- optional
+            list of functions that we should break into depending on the
+            context
+    """
+
+    def exc_info(self, frame):
+        "See this class' docstring."
+
+    def runtime_break_functions(self):
+        """
+        Implement this if the list of step-into functions depends on the
+        context.
+        """
+        return ()
+
+
+class PythonInfo(LanguageInfo):
+
+    def pyframe(self, frame):
+        pyframe = Frame(frame).get_pyop()
+        if pyframe:
+            return pyframe
+        else:
+            raise gdb.RuntimeError(
+                "Unable to find the Python frame, run your code with a debug "
+                "build (configure with --with-pydebug or compile with -g).")
+
+    def lineno(self, frame):
+        return self.pyframe(frame).current_line_num()
+
+    def is_relevant_function(self, frame):
+        return Frame(frame).is_evalframeex()
+
+    def get_source_line(self, frame):
+        try:
+            pyframe = self.pyframe(frame)
+            return '%4d    %s' % (pyframe.current_line_num(),
+                                  pyframe.current_line().rstrip())
+        except IOError:
+            return None
+
+    def exc_info(self, frame):
+        try:
+            tstate = frame.read_var('tstate').dereference()
+            if gdb.parse_and_eval('tstate->frame == f'):
+                # tstate local variable initialized, check for an exception
+                inf_type = tstate['curexc_type']
+                inf_value = tstate['curexc_value']
+
+                if inf_type:
+                    return 'An exception was raised: %s' % (inf_value,)
+        except (ValueError, RuntimeError):
+            # Could not read the variable tstate or it's memory, it's ok
+            pass
+
+    def static_break_functions(self):
+        yield 'PyEval_EvalFrameEx'
+
+
+class PythonStepperMixin(object):
+    """
+    Make this a mixin so CyStep can also inherit from this and use a
+    CythonCodeStepper at the same time.
+    """
+
+    def python_step(self, stepinto):
+        """
+        Set a watchpoint on the Python bytecode instruction pointer and try
+        to finish the frame
+        """
+        output = gdb.execute('watch f->f_lasti', to_string=True)
+        watchpoint = int(re.search(r'[Ww]atchpoint (\d+):', output).group(1))
+        self.step(stepinto=stepinto, stepover_command='finish')
+        gdb.execute('delete %s' % watchpoint)
+
+
+class PyStep(ExecutionControlCommandBase, PythonStepperMixin):
+    "Step through Python code."
+
+    stepinto = True
+
+    def invoke(self, args, from_tty):
+        self.python_step(stepinto=self.stepinto)
+
+
+class PyNext(PyStep):
+    "Step-over Python code."
+
+    stepinto = False
+
+
+class PyFinish(ExecutionControlCommandBase):
+    "Execute until function returns to a caller."
+
+    invoke = ExecutionControlCommandBase.finish
+
+
+class PyRun(ExecutionControlCommandBase):
+    "Run the program."
+
+    invoke = ExecutionControlCommandBase.run
+
+
+class PyCont(ExecutionControlCommandBase):
+
+    invoke = ExecutionControlCommandBase.cont
+
+
+def _pointervalue(gdbval):
+    """
     Return the value of the pointer as a Python int.
- 
-    gdbval.type must be a pointer type 
-    """ 
-    # don't convert with int() as it will raise a RuntimeError 
-    if gdbval.address is not None: 
+
+    gdbval.type must be a pointer type
+    """
+    # don't convert with int() as it will raise a RuntimeError
+    if gdbval.address is not None:
         return int(gdbval.address)
-    else: 
-        # the address attribute is None sometimes, in which case we can 
-        # still convert the pointer to an int 
+    else:
+        # the address attribute is None sometimes, in which case we can
+        # still convert the pointer to an int
         return int(gdbval)
- 
- 
-def pointervalue(gdbval): 
-    pointer = _pointervalue(gdbval) 
-    try: 
-        if pointer < 0: 
-            raise gdb.GdbError("Negative pointer value, presumably a bug " 
-                               "in gdb, aborting.") 
-    except RuntimeError: 
-        # work around yet another bug in gdb where you get random behaviour 
-        # and tracebacks 
-        pass 
- 
-    return pointer 
- 
- 
-def get_inferior_unicode_postfix(): 
-    try: 
-        gdb.parse_and_eval('PyUnicode_FromEncodedObject') 
-    except RuntimeError: 
-        try: 
-            gdb.parse_and_eval('PyUnicodeUCS2_FromEncodedObject') 
-        except RuntimeError: 
-            return 'UCS4' 
-        else: 
-            return 'UCS2' 
-    else: 
-        return '' 
- 
- 
-class PythonCodeExecutor(object): 
- 
-    Py_single_input = 256 
-    Py_file_input = 257 
-    Py_eval_input = 258 
- 
-    def malloc(self, size): 
-        chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size)) 
- 
-        pointer = pointervalue(chunk) 
-        if pointer == 0: 
-            raise gdb.GdbError("No memory could be allocated in the inferior.") 
- 
-        return pointer 
- 
-    def alloc_string(self, string): 
-        pointer = self.malloc(len(string)) 
-        get_selected_inferior().write_memory(pointer, string) 
- 
-        return pointer 
- 
-    def alloc_pystring(self, string): 
-        stringp = self.alloc_string(string) 
-        PyString_FromStringAndSize = 'PyString_FromStringAndSize' 
- 
-        try: 
-            gdb.parse_and_eval(PyString_FromStringAndSize) 
-        except RuntimeError: 
-            # Python 3 
-            PyString_FromStringAndSize = ('PyUnicode%s_FromStringAndSize' % 
-                                               (get_inferior_unicode_postfix(),)) 
- 
-        try: 
-            result = gdb.parse_and_eval( 
-                '(PyObject *) %s((char *) %d, (size_t) %d)' % ( 
-                            PyString_FromStringAndSize, stringp, len(string))) 
-        finally: 
-            self.free(stringp) 
- 
-        pointer = pointervalue(result) 
-        if pointer == 0: 
-            raise gdb.GdbError("Unable to allocate Python string in " 
-                               "the inferior.") 
- 
-        return pointer 
- 
-    def free(self, pointer): 
-        gdb.parse_and_eval("free((void *) %d)" % pointer) 
- 
-    def incref(self, pointer): 
-        "Increment the reference count of a Python object in the inferior." 
-        gdb.parse_and_eval('Py_IncRef((PyObject *) %d)' % pointer) 
- 
-    def xdecref(self, pointer): 
-        "Decrement the reference count of a Python object in the inferior." 
-        # Py_DecRef is like Py_XDECREF, but a function. So we don't have 
-        # to check for NULL. This should also decref all our allocated 
-        # Python strings. 
-        gdb.parse_and_eval('Py_DecRef((PyObject *) %d)' % pointer) 
- 
-    def evalcode(self, code, input_type, global_dict=None, local_dict=None): 
-        """ 
-        Evaluate python code `code` given as a string in the inferior and 
-        return the result as a gdb.Value. Returns a new reference in the 
-        inferior. 
- 
-        Of course, executing any code in the inferior may be dangerous and may 
+
+
+def pointervalue(gdbval):
+    pointer = _pointervalue(gdbval)
+    try:
+        if pointer < 0:
+            raise gdb.GdbError("Negative pointer value, presumably a bug "
+                               "in gdb, aborting.")
+    except RuntimeError:
+        # work around yet another bug in gdb where you get random behaviour
+        # and tracebacks
+        pass
+
+    return pointer
+
+
+def get_inferior_unicode_postfix():
+    try:
+        gdb.parse_and_eval('PyUnicode_FromEncodedObject')
+    except RuntimeError:
+        try:
+            gdb.parse_and_eval('PyUnicodeUCS2_FromEncodedObject')
+        except RuntimeError:
+            return 'UCS4'
+        else:
+            return 'UCS2'
+    else:
+        return ''
+
+
+class PythonCodeExecutor(object):
+
+    Py_single_input = 256
+    Py_file_input = 257
+    Py_eval_input = 258
+
+    def malloc(self, size):
+        chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
+
+        pointer = pointervalue(chunk)
+        if pointer == 0:
+            raise gdb.GdbError("No memory could be allocated in the inferior.")
+
+        return pointer
+
+    def alloc_string(self, string):
+        pointer = self.malloc(len(string))
+        get_selected_inferior().write_memory(pointer, string)
+
+        return pointer
+
+    def alloc_pystring(self, string):
+        stringp = self.alloc_string(string)
+        PyString_FromStringAndSize = 'PyString_FromStringAndSize'
+
+        try:
+            gdb.parse_and_eval(PyString_FromStringAndSize)
+        except RuntimeError:
+            # Python 3
+            PyString_FromStringAndSize = ('PyUnicode%s_FromStringAndSize' %
+                                               (get_inferior_unicode_postfix(),))
+
+        try:
+            result = gdb.parse_and_eval(
+                '(PyObject *) %s((char *) %d, (size_t) %d)' % (
+                            PyString_FromStringAndSize, stringp, len(string)))
+        finally:
+            self.free(stringp)
+
+        pointer = pointervalue(result)
+        if pointer == 0:
+            raise gdb.GdbError("Unable to allocate Python string in "
+                               "the inferior.")
+
+        return pointer
+
+    def free(self, pointer):
+        gdb.parse_and_eval("free((void *) %d)" % pointer)
+
+    def incref(self, pointer):
+        "Increment the reference count of a Python object in the inferior."
+        gdb.parse_and_eval('Py_IncRef((PyObject *) %d)' % pointer)
+
+    def xdecref(self, pointer):
+        "Decrement the reference count of a Python object in the inferior."
+        # Py_DecRef is like Py_XDECREF, but a function. So we don't have
+        # to check for NULL. This should also decref all our allocated
+        # Python strings.
+        gdb.parse_and_eval('Py_DecRef((PyObject *) %d)' % pointer)
+
+    def evalcode(self, code, input_type, global_dict=None, local_dict=None):
+        """
+        Evaluate python code `code` given as a string in the inferior and
+        return the result as a gdb.Value. Returns a new reference in the
+        inferior.
+
+        Of course, executing any code in the inferior may be dangerous and may
         leave the debuggee in an unsafe state or terminate it altogether.
-        """ 
-        if '\0' in code: 
-            raise gdb.GdbError("String contains NUL byte.") 
- 
-        code += '\0' 
- 
-        pointer = self.alloc_string(code) 
- 
-        globalsp = pointervalue(global_dict) 
-        localsp = pointervalue(local_dict) 
- 
-        if globalsp == 0 or localsp == 0: 
-            raise gdb.GdbError("Unable to obtain or create locals or globals.") 
- 
-        code = """ 
-            PyRun_String( 
-                (char *) %(code)d, 
-                (int) %(start)d, 
-                (PyObject *) %(globals)s, 
-                (PyObject *) %(locals)d) 
-        """ % dict(code=pointer, start=input_type, 
-                   globals=globalsp, locals=localsp) 
- 
-        with FetchAndRestoreError(): 
-            try: 
-                pyobject_return_value = gdb.parse_and_eval(code) 
-            finally: 
-                self.free(pointer) 
- 
-        return pyobject_return_value 
- 
- 
-class FetchAndRestoreError(PythonCodeExecutor): 
-    """ 
-    Context manager that fetches the error indicator in the inferior and 
-    restores it on exit. 
-    """ 
- 
-    def __init__(self): 
-        self.sizeof_PyObjectPtr = gdb.lookup_type('PyObject').pointer().sizeof 
-        self.pointer = self.malloc(self.sizeof_PyObjectPtr * 3) 
- 
-        type = self.pointer 
-        value = self.pointer + self.sizeof_PyObjectPtr 
-        traceback = self.pointer + self.sizeof_PyObjectPtr * 2 
- 
-        self.errstate = type, value, traceback 
- 
-    def __enter__(self): 
-        gdb.parse_and_eval("PyErr_Fetch(%d, %d, %d)" % self.errstate) 
- 
-    def __exit__(self, *args): 
-        if gdb.parse_and_eval("(int) PyErr_Occurred()"): 
-            gdb.parse_and_eval("PyErr_Print()") 
- 
-        pyerr_restore = ("PyErr_Restore(" 
-                            "(PyObject *) *%d," 
-                            "(PyObject *) *%d," 
-                            "(PyObject *) *%d)") 
- 
-        try: 
-            gdb.parse_and_eval(pyerr_restore % self.errstate) 
-        finally: 
-            self.free(self.pointer) 
- 
- 
-class FixGdbCommand(gdb.Command): 
- 
-    def __init__(self, command, actual_command): 
-        super(FixGdbCommand, self).__init__(command, gdb.COMMAND_DATA, 
-                                            gdb.COMPLETE_NONE) 
-        self.actual_command = actual_command 
- 
-    def fix_gdb(self): 
-        """ 
+        """
+        if '\0' in code:
+            raise gdb.GdbError("String contains NUL byte.")
+
+        code += '\0'
+
+        pointer = self.alloc_string(code)
+
+        globalsp = pointervalue(global_dict)
+        localsp = pointervalue(local_dict)
+
+        if globalsp == 0 or localsp == 0:
+            raise gdb.GdbError("Unable to obtain or create locals or globals.")
+
+        code = """
+            PyRun_String(
+                (char *) %(code)d,
+                (int) %(start)d,
+                (PyObject *) %(globals)s,
+                (PyObject *) %(locals)d)
+        """ % dict(code=pointer, start=input_type,
+                   globals=globalsp, locals=localsp)
+
+        with FetchAndRestoreError():
+            try:
+                pyobject_return_value = gdb.parse_and_eval(code)
+            finally:
+                self.free(pointer)
+
+        return pyobject_return_value
+
+
+class FetchAndRestoreError(PythonCodeExecutor):
+    """
+    Context manager that fetches the error indicator in the inferior and
+    restores it on exit.
+    """
+
+    def __init__(self):
+        self.sizeof_PyObjectPtr = gdb.lookup_type('PyObject').pointer().sizeof
+        self.pointer = self.malloc(self.sizeof_PyObjectPtr * 3)
+
+        type = self.pointer
+        value = self.pointer + self.sizeof_PyObjectPtr
+        traceback = self.pointer + self.sizeof_PyObjectPtr * 2
+
+        self.errstate = type, value, traceback
+
+    def __enter__(self):
+        gdb.parse_and_eval("PyErr_Fetch(%d, %d, %d)" % self.errstate)
+
+    def __exit__(self, *args):
+        if gdb.parse_and_eval("(int) PyErr_Occurred()"):
+            gdb.parse_and_eval("PyErr_Print()")
+
+        pyerr_restore = ("PyErr_Restore("
+                            "(PyObject *) *%d,"
+                            "(PyObject *) *%d,"
+                            "(PyObject *) *%d)")
+
+        try:
+            gdb.parse_and_eval(pyerr_restore % self.errstate)
+        finally:
+            self.free(self.pointer)
+
+
+class FixGdbCommand(gdb.Command):
+
+    def __init__(self, command, actual_command):
+        super(FixGdbCommand, self).__init__(command, gdb.COMMAND_DATA,
+                                            gdb.COMPLETE_NONE)
+        self.actual_command = actual_command
+
+    def fix_gdb(self):
+        """
         It seems that invoking either 'cy exec' and 'py-exec' work perfectly
         fine, but after this gdb's python API is entirely broken.
-        Maybe some uncleared exception value is still set? 
-        sys.exc_clear() didn't help. A demonstration: 
- 
-        (gdb) cy exec 'hello' 
-        'hello' 
-        (gdb) python gdb.execute('cont') 
-        RuntimeError: Cannot convert value to int. 
-        Error while executing Python code. 
-        (gdb) python gdb.execute('cont') 
-        [15148 refs] 
- 
-        Program exited normally. 
-        """ 
-        warnings.filterwarnings('ignore', r'.*', RuntimeWarning, 
-                                re.escape(__name__)) 
-        try: 
+        Maybe some uncleared exception value is still set?
+        sys.exc_clear() didn't help. A demonstration:
+
+        (gdb) cy exec 'hello'
+        'hello'
+        (gdb) python gdb.execute('cont')
+        RuntimeError: Cannot convert value to int.
+        Error while executing Python code.
+        (gdb) python gdb.execute('cont')
+        [15148 refs]
+
+        Program exited normally.
+        """
+        warnings.filterwarnings('ignore', r'.*', RuntimeWarning,
+                                re.escape(__name__))
+        try:
             int(gdb.parse_and_eval("(void *) 0")) == 0
-        except RuntimeError: 
-            pass 
-        # warnings.resetwarnings() 
- 
-    def invoke(self, args, from_tty): 
-        self.fix_gdb() 
-        try: 
-            gdb.execute('%s %s' % (self.actual_command, args)) 
-        except RuntimeError as e: 
-            raise gdb.GdbError(str(e)) 
-        self.fix_gdb() 
- 
- 
-def _evalcode_python(executor, code, input_type): 
-    """ 
-    Execute Python code in the most recent stack frame. 
-    """ 
-    global_dict = gdb.parse_and_eval('PyEval_GetGlobals()') 
-    local_dict = gdb.parse_and_eval('PyEval_GetLocals()') 
- 
-    if (pointervalue(global_dict) == 0 or pointervalue(local_dict) == 0): 
-        raise gdb.GdbError("Unable to find the locals or globals of the " 
-                           "most recent Python function (relative to the " 
-                           "selected frame).") 
- 
-    return executor.evalcode(code, input_type, global_dict, local_dict) 
- 
- 
-class PyExec(gdb.Command): 
- 
-    def readcode(self, expr): 
-        if expr: 
-            return expr, PythonCodeExecutor.Py_single_input 
-        else: 
-            lines = [] 
-            while True: 
-                try: 
+        except RuntimeError:
+            pass
+        # warnings.resetwarnings()
+
+    def invoke(self, args, from_tty):
+        self.fix_gdb()
+        try:
+            gdb.execute('%s %s' % (self.actual_command, args))
+        except RuntimeError as e:
+            raise gdb.GdbError(str(e))
+        self.fix_gdb()
+
+
+def _evalcode_python(executor, code, input_type):
+    """
+    Execute Python code in the most recent stack frame.
+    """
+    global_dict = gdb.parse_and_eval('PyEval_GetGlobals()')
+    local_dict = gdb.parse_and_eval('PyEval_GetLocals()')
+
+    if (pointervalue(global_dict) == 0 or pointervalue(local_dict) == 0):
+        raise gdb.GdbError("Unable to find the locals or globals of the "
+                           "most recent Python function (relative to the "
+                           "selected frame).")
+
+    return executor.evalcode(code, input_type, global_dict, local_dict)
+
+
+class PyExec(gdb.Command):
+
+    def readcode(self, expr):
+        if expr:
+            return expr, PythonCodeExecutor.Py_single_input
+        else:
+            lines = []
+            while True:
+                try:
                     line = input('>')
-                except EOFError: 
-                    break 
-                else: 
-                    if line.rstrip() == 'end': 
-                        break 
- 
-                    lines.append(line) 
- 
-            return '\n'.join(lines), PythonCodeExecutor.Py_file_input 
- 
-    def invoke(self, expr, from_tty): 
-        expr, input_type = self.readcode(expr) 
-        executor = PythonCodeExecutor() 
-        executor.xdecref(_evalcode_python(executor, input_type, global_dict, local_dict)) 
- 
- 
-gdb.execute('set breakpoint pending on') 
- 
-if hasattr(gdb, 'GdbError'): 
-     # Wrap py-step and py-next in gdb defines to make them repeatable. 
-    py_step = PyStep('-py-step', PythonInfo()) 
-    py_next = PyNext('-py-next', PythonInfo()) 
-    register_defines() 
-    py_finish = PyFinish('py-finish', PythonInfo()) 
-    py_run = PyRun('py-run', PythonInfo()) 
-    py_cont = PyCont('py-cont', PythonInfo()) 
- 
-    py_exec = FixGdbCommand('py-exec', '-py-exec') 
-    _py_exec = PyExec("-py-exec", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) 
-else: 
-    warnings.warn("Use gdb 7.2 or higher to use the py-exec command.") 
+                except EOFError:
+                    break
+                else:
+                    if line.rstrip() == 'end':
+                        break
+
+                    lines.append(line)
+
+            return '\n'.join(lines), PythonCodeExecutor.Py_file_input
+
+    def invoke(self, expr, from_tty):
+        expr, input_type = self.readcode(expr)
+        executor = PythonCodeExecutor()
+        executor.xdecref(_evalcode_python(executor, input_type, global_dict, local_dict))
+
+
+gdb.execute('set breakpoint pending on')
+
+if hasattr(gdb, 'GdbError'):
+     # Wrap py-step and py-next in gdb defines to make them repeatable.
+    py_step = PyStep('-py-step', PythonInfo())
+    py_next = PyNext('-py-next', PythonInfo())
+    register_defines()
+    py_finish = PyFinish('py-finish', PythonInfo())
+    py_run = PyRun('py-run', PythonInfo())
+    py_cont = PyCont('py-cont', PythonInfo())
+
+    py_exec = FixGdbCommand('py-exec', '-py-exec')
+    _py_exec = PyExec("-py-exec", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
+else:
+    warnings.warn("Use gdb 7.2 or higher to use the py-exec command.")
diff --git a/contrib/tools/cython/Cython/Debugging.py b/contrib/tools/cython/Cython/Debugging.py
index c710552cee..edb3f4e8ca 100644
--- a/contrib/tools/cython/Cython/Debugging.py
+++ b/contrib/tools/cython/Cython/Debugging.py
@@ -1,20 +1,20 @@
-############################################### 
-# 
-#   Odds and ends for debugging 
-# 
-############################################### 
- 
-def print_call_chain(*args): 
-    import sys 
-    print(" ".join(map(str, args))) 
-    f = sys._getframe(1) 
-    while f: 
-        name = f.f_code.co_name 
-        s = f.f_locals.get('self', None) 
-        if s: 
-            c = getattr(s, "__class__", None) 
-            if c: 
-                name = "%s.%s" % (c.__name__, name) 
-        print("Called from: %s %s" % (name, f.f_lineno)) 
-        f = f.f_back 
-    print("-" * 70) 
+###############################################
+#
+#   Odds and ends for debugging
+#
+###############################################
+
+def print_call_chain(*args):
+    import sys
+    print(" ".join(map(str, args)))
+    f = sys._getframe(1)
+    while f:
+        name = f.f_code.co_name
+        s = f.f_locals.get('self', None)
+        if s:
+            c = getattr(s, "__class__", None)
+            if c:
+                name = "%s.%s" % (c.__name__, name)
+        print("Called from: %s %s" % (name, f.f_lineno))
+        f = f.f_back
+    print("-" * 70)
diff --git a/contrib/tools/cython/Cython/Distutils/__init__.py b/contrib/tools/cython/Cython/Distutils/__init__.py
index eae3ae15da..4a24001f15 100644
--- a/contrib/tools/cython/Cython/Distutils/__init__.py
+++ b/contrib/tools/cython/Cython/Distutils/__init__.py
@@ -1,2 +1,2 @@
-from Cython.Distutils.build_ext import build_ext 
-from Cython.Distutils.extension import Extension 
+from Cython.Distutils.build_ext import build_ext
+from Cython.Distutils.extension import Extension
diff --git a/contrib/tools/cython/Cython/Distutils/build_ext.py b/contrib/tools/cython/Cython/Distutils/build_ext.py
index 2822fa5649..598bb4a89b 100644
--- a/contrib/tools/cython/Cython/Distutils/build_ext.py
+++ b/contrib/tools/cython/Cython/Distutils/build_ext.py
@@ -1,5 +1,5 @@
-import sys 
- 
+import sys
+
 if 'setuptools' in sys.modules:
     try:
         from setuptools.command.build_ext import build_ext as _build_ext
@@ -10,7 +10,7 @@ if 'setuptools' in sys.modules:
 else:
     from distutils.command.build_ext import build_ext as _build_ext
 
- 
+
 class new_build_ext(_build_ext, object):
     def finalize_options(self):
         if self.distribution.ext_modules:
@@ -20,6 +20,6 @@ class new_build_ext(_build_ext, object):
             self.distribution.ext_modules[:] = cythonize(
                 self.distribution.ext_modules, nthreads=nthreads, force=self.force)
         super(new_build_ext, self).finalize_options()
- 
+
 # This will become new_build_ext in the future.
 from .old_build_ext import old_build_ext as build_ext
diff --git a/contrib/tools/cython/Cython/Distutils/extension.py b/contrib/tools/cython/Cython/Distutils/extension.py
index d0555c6fe5..d8bdbf0f5b 100644
--- a/contrib/tools/cython/Cython/Distutils/extension.py
+++ b/contrib/tools/cython/Cython/Distutils/extension.py
@@ -1,128 +1,128 @@
-"""Pyrex.Distutils.extension 
- 
-Provides a modified Extension class, that understands how to describe 
-Pyrex extension modules in setup scripts.""" 
- 
-__revision__ = "$Id:$" 
- 
-import sys 
-import distutils.extension as _Extension 
- 
-try: 
-    import warnings 
-except ImportError: 
-    warnings = None 
- 
- 
-class Extension(_Extension.Extension): 
-    # When adding arguments to this constructor, be sure to update 
-    # user_options.extend in build_ext.py. 
-    def __init__(self, name, sources, 
-                 include_dirs=None, 
-                 define_macros=None, 
-                 undef_macros=None, 
-                 library_dirs=None, 
-                 libraries=None, 
-                 runtime_library_dirs=None, 
-                 extra_objects=None, 
-                 extra_compile_args=None, 
-                 extra_link_args=None, 
-                 export_symbols=None, 
-                 #swig_opts=None, 
-                 depends=None, 
-                 language=None, 
-                 cython_include_dirs=None, 
-                 cython_directives=None, 
-                 cython_create_listing=False, 
-                 cython_line_directives=False, 
-                 cython_cplus=False, 
-                 cython_c_in_temp=False, 
-                 cython_gen_pxi=False, 
-                 cython_gdb=False, 
-                 no_c_in_traceback=False, 
-                 cython_compile_time_env=None, 
-                 **kw): 
- 
-        # Translate pyrex_X to cython_X for backwards compatibility. 
-        had_pyrex_options = False 
+"""Pyrex.Distutils.extension
+
+Provides a modified Extension class, that understands how to describe
+Pyrex extension modules in setup scripts."""
+
+__revision__ = "$Id:$"
+
+import sys
+import distutils.extension as _Extension
+
+try:
+    import warnings
+except ImportError:
+    warnings = None
+
+
+class Extension(_Extension.Extension):
+    # When adding arguments to this constructor, be sure to update
+    # user_options.extend in build_ext.py.
+    def __init__(self, name, sources,
+                 include_dirs=None,
+                 define_macros=None,
+                 undef_macros=None,
+                 library_dirs=None,
+                 libraries=None,
+                 runtime_library_dirs=None,
+                 extra_objects=None,
+                 extra_compile_args=None,
+                 extra_link_args=None,
+                 export_symbols=None,
+                 #swig_opts=None,
+                 depends=None,
+                 language=None,
+                 cython_include_dirs=None,
+                 cython_directives=None,
+                 cython_create_listing=False,
+                 cython_line_directives=False,
+                 cython_cplus=False,
+                 cython_c_in_temp=False,
+                 cython_gen_pxi=False,
+                 cython_gdb=False,
+                 no_c_in_traceback=False,
+                 cython_compile_time_env=None,
+                 **kw):
+
+        # Translate pyrex_X to cython_X for backwards compatibility.
+        had_pyrex_options = False
         for key in list(kw):
-            if key.startswith('pyrex_'): 
-                had_pyrex_options = True 
-                kw['cython' + key[5:]] = kw.pop(key) 
-        if had_pyrex_options: 
-            Extension.__init__( 
-                self, name, sources, 
-                include_dirs=include_dirs, 
-                define_macros=define_macros, 
-                undef_macros=undef_macros, 
-                library_dirs=library_dirs, 
-                libraries=libraries, 
-                runtime_library_dirs=runtime_library_dirs, 
-                extra_objects=extra_objects, 
-                extra_compile_args=extra_compile_args, 
-                extra_link_args=extra_link_args, 
-                export_symbols=export_symbols, 
-                #swig_opts=swig_opts, 
-                depends=depends, 
-                language=language, 
-                no_c_in_traceback=no_c_in_traceback, 
-                **kw) 
-            return 
- 
-        _Extension.Extension.__init__( 
-            self, name, sources, 
-            include_dirs=include_dirs, 
-            define_macros=define_macros, 
-            undef_macros=undef_macros, 
-            library_dirs=library_dirs, 
-            libraries=libraries, 
-            runtime_library_dirs=runtime_library_dirs, 
-            extra_objects=extra_objects, 
-            extra_compile_args=extra_compile_args, 
-            extra_link_args=extra_link_args, 
-            export_symbols=export_symbols, 
-            #swig_opts=swig_opts, 
-            depends=depends, 
-            language=language, 
-            **kw) 
- 
-        self.cython_include_dirs = cython_include_dirs or [] 
-        self.cython_directives = cython_directives or {} 
-        self.cython_create_listing = cython_create_listing 
-        self.cython_line_directives = cython_line_directives 
-        self.cython_cplus = cython_cplus 
-        self.cython_c_in_temp = cython_c_in_temp 
-        self.cython_gen_pxi = cython_gen_pxi 
-        self.cython_gdb = cython_gdb 
-        self.no_c_in_traceback = no_c_in_traceback 
-        self.cython_compile_time_env = cython_compile_time_env 
- 
-# class Extension 
- 
-read_setup_file = _Extension.read_setup_file 
- 
- 
-# reuse and extend original docstring from base class (if we can) 
-if sys.version_info[0] < 3 and _Extension.Extension.__doc__: 
-    # -OO discards docstrings 
-    Extension.__doc__ = _Extension.Extension.__doc__ + """\ 
-    cython_include_dirs : [string] 
-        list of directories to search for Pyrex header files (.pxd) (in 
-        Unix form for portability) 
-    cython_directives : {string:value} 
-        dict of compiler directives 
-    cython_create_listing_file : boolean 
-        write pyrex error messages to a listing (.lis) file. 
-    cython_line_directives : boolean 
-        emit pyx line numbers for debugging/profiling 
-    cython_cplus : boolean 
-        use the C++ compiler for compiling and linking. 
-    cython_c_in_temp : boolean 
-        put generated C files in temp directory. 
-    cython_gen_pxi : boolean 
-        generate .pxi file for public declarations 
-    cython_gdb : boolean 
-        generate Cython debug information for this extension for cygdb 
-    no_c_in_traceback : boolean 
-        emit the c file and line number from the traceback for exceptions 
-""" 
+            if key.startswith('pyrex_'):
+                had_pyrex_options = True
+                kw['cython' + key[5:]] = kw.pop(key)
+        if had_pyrex_options:
+            Extension.__init__(
+                self, name, sources,
+                include_dirs=include_dirs,
+                define_macros=define_macros,
+                undef_macros=undef_macros,
+                library_dirs=library_dirs,
+                libraries=libraries,
+                runtime_library_dirs=runtime_library_dirs,
+                extra_objects=extra_objects,
+                extra_compile_args=extra_compile_args,
+                extra_link_args=extra_link_args,
+                export_symbols=export_symbols,
+                #swig_opts=swig_opts,
+                depends=depends,
+                language=language,
+                no_c_in_traceback=no_c_in_traceback,
+                **kw)
+            return
+
+        _Extension.Extension.__init__(
+            self, name, sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            undef_macros=undef_macros,
+            library_dirs=library_dirs,
+            libraries=libraries,
+            runtime_library_dirs=runtime_library_dirs,
+            extra_objects=extra_objects,
+            extra_compile_args=extra_compile_args,
+            extra_link_args=extra_link_args,
+            export_symbols=export_symbols,
+            #swig_opts=swig_opts,
+            depends=depends,
+            language=language,
+            **kw)
+
+        self.cython_include_dirs = cython_include_dirs or []
+        self.cython_directives = cython_directives or {}
+        self.cython_create_listing = cython_create_listing
+        self.cython_line_directives = cython_line_directives
+        self.cython_cplus = cython_cplus
+        self.cython_c_in_temp = cython_c_in_temp
+        self.cython_gen_pxi = cython_gen_pxi
+        self.cython_gdb = cython_gdb
+        self.no_c_in_traceback = no_c_in_traceback
+        self.cython_compile_time_env = cython_compile_time_env
+
+# class Extension
+
+read_setup_file = _Extension.read_setup_file
+
+
+# reuse and extend original docstring from base class (if we can)
+if sys.version_info[0] < 3 and _Extension.Extension.__doc__:
+    # -OO discards docstrings
+    Extension.__doc__ = _Extension.Extension.__doc__ + """\
+    cython_include_dirs : [string]
+        list of directories to search for Pyrex header files (.pxd) (in
+        Unix form for portability)
+    cython_directives : {string:value}
+        dict of compiler directives
+    cython_create_listing_file : boolean
+        write pyrex error messages to a listing (.lis) file.
+    cython_line_directives : boolean
+        emit pyx line numbers for debugging/profiling
+    cython_cplus : boolean
+        use the C++ compiler for compiling and linking.
+    cython_c_in_temp : boolean
+        put generated C files in temp directory.
+    cython_gen_pxi : boolean
+        generate .pxi file for public declarations
+    cython_gdb : boolean
+        generate Cython debug information for this extension for cygdb
+    no_c_in_traceback : boolean
+        emit the c file and line number from the traceback for exceptions
+"""
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python.pxd
index 4b61e4e1b7..56236e925c 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython cimport * 
+from cpython cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_bool.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_bool.pxd
index 9a2ef489ee..9a6d253f45 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_bool.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_bool.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.bool cimport * 
+from cpython.bool cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_buffer.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_buffer.pxd
index 21fb0931cd..2baeaae00c 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_buffer.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_buffer.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.buffer cimport * 
+from cpython.buffer cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_bytes.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_bytes.pxd
index e9d0b5e328..87af662de0 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_bytes.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_bytes.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.bytes cimport * 
+from cpython.bytes cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_cobject.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_cobject.pxd
index 5835605aa4..ed32c6b878 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_cobject.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_cobject.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.cobject cimport * 
+from cpython.cobject cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_complex.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_complex.pxd
index 311c8197a6..0a780b3b2d 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_complex.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_complex.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.complex cimport * 
+from cpython.complex cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_dict.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_dict.pxd
index 3a96b37765..05b5f4796a 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_dict.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_dict.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.dict cimport * 
+from cpython.dict cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_exc.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_exc.pxd
index 4be358ce54..6eb236bccb 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_exc.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_exc.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.exc cimport * 
+from cpython.exc cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_float.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_float.pxd
index e55f078a25..7e133ef9bb 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_float.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_float.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.float cimport * 
+from cpython.float cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_function.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_function.pxd
index 5c05e1f02d..1461c4e635 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_function.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_function.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.function cimport * 
+from cpython.function cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_getargs.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_getargs.pxd
index d9a9f9c152..3852d6a6a1 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_getargs.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_getargs.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.getargs cimport * 
+from cpython.getargs cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_instance.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_instance.pxd
index b773d59ff7..99cb5a9091 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_instance.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_instance.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.instance cimport * 
+from cpython.instance cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_int.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_int.pxd
index daaa8b3bef..c1fd5178d6 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_int.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_int.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.int cimport * 
+from cpython.int cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_iterator.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_iterator.pxd
index 8ba3d874d7..e09aad2790 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_iterator.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_iterator.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.iterator cimport * 
+from cpython.iterator cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_list.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_list.pxd
index 53ea6059bd..64febcf969 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_list.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_list.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.list cimport * 
+from cpython.list cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_long.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_long.pxd
index 380a7797ed..1a24380c4c 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_long.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_long.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.long cimport * 
+from cpython.long cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_mapping.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_mapping.pxd
index bbd90d0e2a..cd01bee015 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_mapping.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_mapping.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.mapping cimport * 
+from cpython.mapping cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_mem.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_mem.pxd
index 8d824ff83b..d74429ea36 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_mem.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_mem.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.mem cimport * 
+from cpython.mem cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_method.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_method.pxd
index 459efdd4b3..e7da5154e4 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_method.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_method.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.method cimport * 
+from cpython.method cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_module.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_module.pxd
index ab80a90a37..6310c0247d 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_module.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_module.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.module cimport * 
+from cpython.module cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_number.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_number.pxd
index 0059334465..ae67da1c38 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_number.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_number.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.number cimport * 
+from cpython.number cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_object.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_object.pxd
index dc2ff781fd..3981bfa44e 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_object.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_object.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.object cimport * 
+from cpython.object cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_oldbuffer.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_oldbuffer.pxd
index 39ee152109..e03e66a2e2 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_oldbuffer.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_oldbuffer.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.oldbuffer cimport * 
+from cpython.oldbuffer cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_pycapsule.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_pycapsule.pxd
index 372ad3aef5..fe9cf8f8d9 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_pycapsule.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_pycapsule.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.pycapsule cimport * 
+from cpython.pycapsule cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_ref.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_ref.pxd
index 93655dd49b..9447418198 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_ref.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_ref.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.ref cimport * 
+from cpython.ref cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_sequence.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_sequence.pxd
index 2e454ec562..fdef5b63eb 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_sequence.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_sequence.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.sequence cimport * 
+from cpython.sequence cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_set.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_set.pxd
index b85bc28301..a2feb93712 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_set.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_set.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.set cimport * 
+from cpython.set cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_string.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_string.pxd
index 34d159b0a8..24c818338e 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_string.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_string.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.string cimport * 
+from cpython.string cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_tuple.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_tuple.pxd
index 9fd5afdb2a..190713b020 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_tuple.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_tuple.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.tuple cimport * 
+from cpython.tuple cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_type.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_type.pxd
index 8d69ae3ad5..3ac47d1b3f 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_type.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_type.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.type cimport * 
+from cpython.type cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_unicode.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_unicode.pxd
index db8f07b70b..2b488b2dc8 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_unicode.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_unicode.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.unicode cimport * 
+from cpython.unicode cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_version.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_version.pxd
index bf30d7a525..c27ca4df95 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_version.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_version.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.version cimport * 
+from cpython.version cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/python_weakref.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/python_weakref.pxd
index c234efa18b..1f84f1a179 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/python_weakref.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/python_weakref.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from cpython.weakref cimport * 
+from cpython.weakref cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/stdio.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/stdio.pxd
index ebc4fc8bea..41a4aebf1d 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/stdio.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/stdio.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from libc.stdio cimport * 
+from libc.stdio cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/stdlib.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/stdlib.pxd
index 9f8d1c65ff..499511cde9 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/stdlib.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/stdlib.pxd
@@ -1,2 +1,2 @@
 # Present for backwards compatibility
-from libc.stdlib cimport * 
+from libc.stdlib cimport *
diff --git a/contrib/tools/cython/Cython/Includes/Deprecated/stl.pxd b/contrib/tools/cython/Cython/Includes/Deprecated/stl.pxd
index b387c9a6c9..22248d2655 100644
--- a/contrib/tools/cython/Cython/Includes/Deprecated/stl.pxd
+++ b/contrib/tools/cython/Cython/Includes/Deprecated/stl.pxd
@@ -1,91 +1,91 @@
-cdef extern from "<vector>" namespace std: 
- 
-	cdef cppclass vector[TYPE]: 
-		#constructors 
-		__init__() 
-		__init__(vector&) 
-		__init__(int) 
-		__init__(int, TYPE&) 
-		__init__(iterator, iterator) 
-		#operators 
-		TYPE& __getitem__(int) 
-		TYPE& __setitem__(int, TYPE&) 
-		vector __new__(vector&) 
-		bool __eq__(vector&, vector&) 
-		bool __ne__(vector&, vector&) 
-		bool __lt__(vector&, vector&) 
-		bool __gt__(vector&, vector&) 
-		bool __le__(vector&, vector&) 
-		bool __ge__(vector&, vector&) 
-		#others 
-		void assign(int, TYPE) 
-		#void assign(iterator, iterator) 
-		TYPE& at(int) 
-		TYPE& back() 
-		iterator begin() 
-		int capacity() 
-		void clear() 
-		bool empty() 
-		iterator end() 
-		iterator erase(iterator) 
-		iterator erase(iterator, iterator) 
-		TYPE& front() 
-		iterator insert(iterator, TYPE&) 
-		void insert(iterator, int, TYPE&) 
-		void insert(iterator, iterator) 
-		int max_size() 
-		void pop_back() 
-		void push_back(TYPE&) 
-		iterator rbegin() 
-		iterator rend() 
-		void reserve(int) 
-		void resize(int) 
-		void resize(int, TYPE&) #void resize(size_type num, const TYPE& = TYPE()) 
-		int size() 
-		void swap(container&) 
- 
-cdef extern from "<deque>" namespace std: 
- 
-	cdef cppclass deque[TYPE]: 
-		#constructors 
-		__init__() 
-		__init__(deque&) 
-		__init__(int) 
-		__init__(int, TYPE&) 
-		__init__(iterator, iterator) 
-		#operators 
-		TYPE& operator[]( size_type index ); 
-		const TYPE& operator[]( size_type index ) const; 
-		deque __new__(deque&); 
-		bool __eq__(deque&, deque&); 
-		bool __ne__(deque&, deque&); 
-		bool __lt__(deque&, deque&); 
-		bool __gt__(deque&, deque&); 
-		bool __le__(deque&, deque&); 
-		bool __ge__(deque&, deque&); 
-		#others 
-		void assign(int, TYPE&) 
-		void assign(iterator, iterator) 
-		TYPE& at(int) 
-		TYPE& back() 
-		iterator begin() 
-		void clear() 
-		bool empty() 
-		iterator end() 
-		iterator erase(iterator) 
-		iterator erase(iterator, iterator) 
-		TYPE& front() 
-		iterator insert(iterator, TYPE&) 
-		void insert(iterator, int, TYPE&) 
-		void insert(iterator, iterator, iterator) 
-		int max_size() 
-		void pop_back() 
-		void pop_front() 
-		void push_back(TYPE&) 
-		void push_front(TYPE&) 
-		iterator rbegin() 
-		iterator rend() 
-		void resize(int) 
-		void resize(int, TYPE&) 
-		int size() 
-		void swap(container&) 
+cdef extern from "<vector>" namespace std:
+
+	cdef cppclass vector[TYPE]:
+		#constructors
+		__init__()
+		__init__(vector&)
+		__init__(int)
+		__init__(int, TYPE&)
+		__init__(iterator, iterator)
+		#operators
+		TYPE& __getitem__(int)
+		TYPE& __setitem__(int, TYPE&)
+		vector __new__(vector&)
+		bool __eq__(vector&, vector&)
+		bool __ne__(vector&, vector&)
+		bool __lt__(vector&, vector&)
+		bool __gt__(vector&, vector&)
+		bool __le__(vector&, vector&)
+		bool __ge__(vector&, vector&)
+		#others
+		void assign(int, TYPE)
+		#void assign(iterator, iterator)
+		TYPE& at(int)
+		TYPE& back()
+		iterator begin()
+		int capacity()
+		void clear()
+		bool empty()
+		iterator end()
+		iterator erase(iterator)
+		iterator erase(iterator, iterator)
+		TYPE& front()
+		iterator insert(iterator, TYPE&)
+		void insert(iterator, int, TYPE&)
+		void insert(iterator, iterator)
+		int max_size()
+		void pop_back()
+		void push_back(TYPE&)
+		iterator rbegin()
+		iterator rend()
+		void reserve(int)
+		void resize(int)
+		void resize(int, TYPE&) #void resize(size_type num, const TYPE& = TYPE())
+		int size()
+		void swap(container&)
+
+cdef extern from "<deque>" namespace std:
+
+	cdef cppclass deque[TYPE]:
+		#constructors
+		__init__()
+		__init__(deque&)
+		__init__(int)
+		__init__(int, TYPE&)
+		__init__(iterator, iterator)
+		#operators
+		TYPE& operator[]( size_type index );
+		const TYPE& operator[]( size_type index ) const;
+		deque __new__(deque&);
+		bool __eq__(deque&, deque&);
+		bool __ne__(deque&, deque&);
+		bool __lt__(deque&, deque&);
+		bool __gt__(deque&, deque&);
+		bool __le__(deque&, deque&);
+		bool __ge__(deque&, deque&);
+		#others
+		void assign(int, TYPE&)
+		void assign(iterator, iterator)
+		TYPE& at(int)
+		TYPE& back()
+		iterator begin()
+		void clear()
+		bool empty()
+		iterator end()
+		iterator erase(iterator)
+		iterator erase(iterator, iterator)
+		TYPE& front()
+		iterator insert(iterator, TYPE&)
+		void insert(iterator, int, TYPE&)
+		void insert(iterator, iterator, iterator)
+		int max_size()
+		void pop_back()
+		void pop_front()
+		void push_back(TYPE&)
+		void push_front(TYPE&)
+		iterator rbegin()
+		iterator rend()
+		void resize(int)
+		void resize(int, TYPE&)
+		int size()
+		void swap(container&)
diff --git a/contrib/tools/cython/Cython/Includes/cpython/__init__.pxd b/contrib/tools/cython/Cython/Includes/cpython/__init__.pxd
index c049ca39fe..c81f4e6655 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/__init__.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/__init__.pxd
@@ -1,184 +1,184 @@
-##################################################################### 
-# 
-# These are the Cython pxd files for (most of) the Python/C API. 
-# 
-# REFERENCE COUNTING: 
-# 
-#   JUST TO SCARE YOU: 
-#   If you are going to use any of the Python/C API in your Cython 
-#   program, you might be responsible for doing reference counting. 
-#   Read http://docs.python.org/api/refcounts.html which is so 
-#   important I've copied it below. 
-# 
+#####################################################################
+#
+# These are the Cython pxd files for (most of) the Python/C API.
+#
+# REFERENCE COUNTING:
+#
+#   JUST TO SCARE YOU:
+#   If you are going to use any of the Python/C API in your Cython
+#   program, you might be responsible for doing reference counting.
+#   Read http://docs.python.org/api/refcounts.html which is so
+#   important I've copied it below.
+#
 # For all the declaration below, whenever the Py_ function returns
-# a *new reference* to a PyObject*, the return type is "object". 
-# When the function returns a borrowed reference, the return 
-# type is PyObject*.  When Cython sees "object" as a return type 
-# it doesn't increment the reference count.  When it sees PyObject* 
-# in order to use the result you must explicitly cast to <object>, 
+# a *new reference* to a PyObject*, the return type is "object".
+# When the function returns a borrowed reference, the return
+# type is PyObject*.  When Cython sees "object" as a return type
+# it doesn't increment the reference count.  When it sees PyObject*
+# in order to use the result you must explicitly cast to <object>,
 # and when you do that Cython increments the reference count whether
-# you want it to or not, forcing you to an explicit DECREF (or leak memory). 
-# To avoid this we make the above convention.  Note, you can 
-# always locally override this convention by putting something like 
-# 
-#     cdef extern from "Python.h": 
-#         PyObject* PyNumber_Add(PyObject *o1, PyObject *o2) 
-# 
-# in your .pyx file or into a cimported .pxd file.  You just have to 
-# use the one from the right (pxd-)namespace then. 
-# 
-# Cython automatically takes care of reference counting for anything 
-# of type object. 
-# 
-## More precisely, I think the correct convention for 
-## using the Python/C API from Cython is as follows. 
-## 
-## (1) Declare all input arguments as type "object".  This way no explicit 
-##    <PyObject*> casting is needed, and moreover Cython doesn't generate 
-##    any funny reference counting. 
-## (2) Declare output as object if a new reference is returned. 
-## (3) Declare output as PyObject* if a borrowed reference is returned. 
-## 
-## This way when you call objects, no cast is needed, and if the api 
-## calls returns a new reference (which is about 95% of them), then 
-## you can just assign to a variable of type object.  With borrowed 
-## references if you do an explicit typecast to <object>, Cython generates an 
-## INCREF and DECREF so you have to be careful.  However, you got a 
-## borrowed reference in this case, so there's got to be another reference 
-## to your object, so you're OK, as long as you relealize this 
-## and use the result of an explicit cast to <object> as a borrowed 
-## reference (and you can call Py_INCREF if you want to turn it 
-## into another reference for some reason). 
-# 
-# "The reference count is important because today's computers have 
-# a finite (and often severely limited) memory size; it counts how 
-# many different places there are that have a reference to an 
-# object. Such a place could be another object, or a global (or 
-# static) C variable, or a local variable in some C function. When 
-# an object's reference count becomes zero, the object is 
-# deallocated. If it contains references to other objects, their 
-# reference count is decremented. Those other objects may be 
-# deallocated in turn, if this decrement makes their reference 
-# count become zero, and so on. (There's an obvious problem with 
-# objects that reference each other here; for now, the solution is 
-# ``don't do that.'') 
-# 
-# Reference counts are always manipulated explicitly. The normal 
-# way is to use the macro Py_INCREF() to increment an object's 
-# reference count by one, and Py_DECREF() to decrement it by 
-# one. The Py_DECREF() macro is considerably more complex than the 
-# incref one, since it must check whether the reference count 
-# becomes zero and then cause the object's deallocator to be 
-# called. The deallocator is a function pointer contained in the 
-# object's type structure. The type-specific deallocator takes 
-# care of decrementing the reference counts for other objects 
-# contained in the object if this is a compound object type, such 
-# as a list, as well as performing any additional finalization 
-# that's needed. There's no chance that the reference count can 
-# overflow; at least as many bits are used to hold the reference 
-# count as there are distinct memory locations in virtual memory 
-# (assuming sizeof(long) >= sizeof(char*)). Thus, the reference 
-# count increment is a simple operation. 
-# 
-# It is not necessary to increment an object's reference count for 
-# every local variable that contains a pointer to an object. In 
-# theory, the object's reference count goes up by one when the 
-# variable is made to point to it and it goes down by one when the 
-# variable goes out of scope. However, these two cancel each other 
-# out, so at the end the reference count hasn't changed. The only 
-# real reason to use the reference count is to prevent the object 
-# from being deallocated as long as our variable is pointing to 
-# it. If we know that there is at least one other reference to the 
-# object that lives at least as long as our variable, there is no 
-# need to increment the reference count temporarily. An important 
-# situation where this arises is in objects that are passed as 
-# arguments to C functions in an extension module that are called 
-# from Python; the call mechanism guarantees to hold a reference 
-# to every argument for the duration of the call. 
-# 
-# However, a common pitfall is to extract an object from a list 
-# and hold on to it for a while without incrementing its reference 
-# count. Some other operation might conceivably remove the object 
-# from the list, decrementing its reference count and possible 
-# deallocating it. The real danger is that innocent-looking 
-# operations may invoke arbitrary Python code which could do this; 
-# there is a code path which allows control to flow back to the 
-# user from a Py_DECREF(), so almost any operation is potentially 
-# dangerous. 
-# 
-# A safe approach is to always use the generic operations 
-# (functions whose name begins with "PyObject_", "PyNumber_", 
-# "PySequence_" or "PyMapping_"). These operations always 
-# increment the reference count of the object they return. This 
-# leaves the caller with the responsibility to call Py_DECREF() 
-# when they are done with the result; this soon becomes second 
-# nature. 
-# 
-# Now you should read http://docs.python.org/api/refcountDetails.html 
-# just to be sure you understand what is going on. 
-# 
-################################################################# 
- 
- 
- 
-################################################################# 
-# BIG FAT DEPRECATION WARNING 
-################################################################# 
-# Do NOT cimport any names directly from the cpython package, 
-# despite of the star-imports below.  They will be removed at 
-# some point. 
-# Instead, use the correct sub-module to draw your cimports from. 
-# 
-# A direct cimport from the package will make your code depend on 
-# all of the existing declarations. This may have side-effects 
-# and reduces the portability of your code. 
-################################################################# 
-# START OF DEPRECATED SECTION 
-################################################################# 
- 
-from cpython.version cimport * 
-from cpython.ref cimport * 
-from cpython.exc cimport * 
-from cpython.module cimport * 
-from cpython.mem cimport * 
-from cpython.tuple cimport * 
-from cpython.list cimport * 
-from cpython.object cimport * 
-from cpython.sequence cimport * 
-from cpython.mapping cimport * 
-from cpython.iterator cimport * 
-from cpython.type cimport * 
-from cpython.number cimport * 
-from cpython.int cimport * 
-from cpython.bool cimport * 
-from cpython.long cimport * 
-from cpython.float cimport * 
-from cpython.complex cimport * 
-from cpython.string cimport * 
-from cpython.unicode cimport * 
-from cpython.dict cimport * 
-from cpython.instance cimport * 
-from cpython.function cimport * 
-from cpython.method cimport * 
-from cpython.weakref cimport * 
-from cpython.getargs cimport * 
-from cpython.pythread cimport * 
-from cpython.pystate cimport * 
- 
-# Python <= 2.x 
-from cpython.cobject cimport * 
-from cpython.oldbuffer cimport * 
- 
-# Python >= 2.4 
-from cpython.set cimport * 
- 
-# Python >= 2.6 
-from cpython.buffer cimport * 
-from cpython.bytes cimport * 
- 
-# Python >= 3.0 
-from cpython.pycapsule cimport * 
- 
-################################################################# 
-# END OF DEPRECATED SECTION 
-################################################################# 
+# you want it to or not, forcing you to an explicit DECREF (or leak memory).
+# To avoid this we make the above convention.  Note, you can
+# always locally override this convention by putting something like
+#
+#     cdef extern from "Python.h":
+#         PyObject* PyNumber_Add(PyObject *o1, PyObject *o2)
+#
+# in your .pyx file or into a cimported .pxd file.  You just have to
+# use the one from the right (pxd-)namespace then.
+#
+# Cython automatically takes care of reference counting for anything
+# of type object.
+#
+## More precisely, I think the correct convention for
+## using the Python/C API from Cython is as follows.
+##
+## (1) Declare all input arguments as type "object".  This way no explicit
+##    <PyObject*> casting is needed, and moreover Cython doesn't generate
+##    any funny reference counting.
+## (2) Declare output as object if a new reference is returned.
+## (3) Declare output as PyObject* if a borrowed reference is returned.
+##
+## This way when you call objects, no cast is needed, and if the api
+## calls returns a new reference (which is about 95% of them), then
+## you can just assign to a variable of type object.  With borrowed
+## references if you do an explicit typecast to <object>, Cython generates an
+## INCREF and DECREF so you have to be careful.  However, you got a
+## borrowed reference in this case, so there's got to be another reference
+## to your object, so you're OK, as long as you relealize this
+## and use the result of an explicit cast to <object> as a borrowed
+## reference (and you can call Py_INCREF if you want to turn it
+## into another reference for some reason).
+#
+# "The reference count is important because today's computers have
+# a finite (and often severely limited) memory size; it counts how
+# many different places there are that have a reference to an
+# object. Such a place could be another object, or a global (or
+# static) C variable, or a local variable in some C function. When
+# an object's reference count becomes zero, the object is
+# deallocated. If it contains references to other objects, their
+# reference count is decremented. Those other objects may be
+# deallocated in turn, if this decrement makes their reference
+# count become zero, and so on. (There's an obvious problem with
+# objects that reference each other here; for now, the solution is
+# ``don't do that.'')
+#
+# Reference counts are always manipulated explicitly. The normal
+# way is to use the macro Py_INCREF() to increment an object's
+# reference count by one, and Py_DECREF() to decrement it by
+# one. The Py_DECREF() macro is considerably more complex than the
+# incref one, since it must check whether the reference count
+# becomes zero and then cause the object's deallocator to be
+# called. The deallocator is a function pointer contained in the
+# object's type structure. The type-specific deallocator takes
+# care of decrementing the reference counts for other objects
+# contained in the object if this is a compound object type, such
+# as a list, as well as performing any additional finalization
+# that's needed. There's no chance that the reference count can
+# overflow; at least as many bits are used to hold the reference
+# count as there are distinct memory locations in virtual memory
+# (assuming sizeof(long) >= sizeof(char*)). Thus, the reference
+# count increment is a simple operation.
+#
+# It is not necessary to increment an object's reference count for
+# every local variable that contains a pointer to an object. In
+# theory, the object's reference count goes up by one when the
+# variable is made to point to it and it goes down by one when the
+# variable goes out of scope. However, these two cancel each other
+# out, so at the end the reference count hasn't changed. The only
+# real reason to use the reference count is to prevent the object
+# from being deallocated as long as our variable is pointing to
+# it. If we know that there is at least one other reference to the
+# object that lives at least as long as our variable, there is no
+# need to increment the reference count temporarily. An important
+# situation where this arises is in objects that are passed as
+# arguments to C functions in an extension module that are called
+# from Python; the call mechanism guarantees to hold a reference
+# to every argument for the duration of the call.
+#
+# However, a common pitfall is to extract an object from a list
+# and hold on to it for a while without incrementing its reference
+# count. Some other operation might conceivably remove the object
+# from the list, decrementing its reference count and possible
+# deallocating it. The real danger is that innocent-looking
+# operations may invoke arbitrary Python code which could do this;
+# there is a code path which allows control to flow back to the
+# user from a Py_DECREF(), so almost any operation is potentially
+# dangerous.
+#
+# A safe approach is to always use the generic operations
+# (functions whose name begins with "PyObject_", "PyNumber_",
+# "PySequence_" or "PyMapping_"). These operations always
+# increment the reference count of the object they return. This
+# leaves the caller with the responsibility to call Py_DECREF()
+# when they are done with the result; this soon becomes second
+# nature.
+#
+# Now you should read http://docs.python.org/api/refcountDetails.html
+# just to be sure you understand what is going on.
+#
+#################################################################
+
+
+
+#################################################################
+# BIG FAT DEPRECATION WARNING
+#################################################################
+# Do NOT cimport any names directly from the cpython package,
+# despite of the star-imports below.  They will be removed at
+# some point.
+# Instead, use the correct sub-module to draw your cimports from.
+#
+# A direct cimport from the package will make your code depend on
+# all of the existing declarations. This may have side-effects
+# and reduces the portability of your code.
+#################################################################
+# START OF DEPRECATED SECTION
+#################################################################
+
+from cpython.version cimport *
+from cpython.ref cimport *
+from cpython.exc cimport *
+from cpython.module cimport *
+from cpython.mem cimport *
+from cpython.tuple cimport *
+from cpython.list cimport *
+from cpython.object cimport *
+from cpython.sequence cimport *
+from cpython.mapping cimport *
+from cpython.iterator cimport *
+from cpython.type cimport *
+from cpython.number cimport *
+from cpython.int cimport *
+from cpython.bool cimport *
+from cpython.long cimport *
+from cpython.float cimport *
+from cpython.complex cimport *
+from cpython.string cimport *
+from cpython.unicode cimport *
+from cpython.dict cimport *
+from cpython.instance cimport *
+from cpython.function cimport *
+from cpython.method cimport *
+from cpython.weakref cimport *
+from cpython.getargs cimport *
+from cpython.pythread cimport *
+from cpython.pystate cimport *
+
+# Python <= 2.x
+from cpython.cobject cimport *
+from cpython.oldbuffer cimport *
+
+# Python >= 2.4
+from cpython.set cimport *
+
+# Python >= 2.6
+from cpython.buffer cimport *
+from cpython.bytes cimport *
+
+# Python >= 3.0
+from cpython.pycapsule cimport *
+
+#################################################################
+# END OF DEPRECATED SECTION
+#################################################################
diff --git a/contrib/tools/cython/Cython/Includes/cpython/array.pxd b/contrib/tools/cython/Cython/Includes/cpython/array.pxd
index f5865c65e3..19230a0a82 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/array.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/array.pxd
@@ -1,163 +1,163 @@
-""" 
-  array.pxd 
+"""
+  array.pxd
 
-  Cython interface to Python's array.array module. 
+  Cython interface to Python's array.array module.
 
-  * 1D contiguous data view 
-  * tools for fast array creation, maximum C-speed and handiness 
-  * suitable as allround light weight auto-array within Cython code too 
+  * 1D contiguous data view
+  * tools for fast array creation, maximum C-speed and handiness
+  * suitable as allround light weight auto-array within Cython code too
 
-  Usage: 
+  Usage:
+
+  >>> cimport array
 
-  >>> cimport array 
- 
   Usage through Cython buffer interface (Py2.3+):
 
-    >>> def f(arg1, unsigned i, double dx) 
-    ...     array.array[double] a = arg1 
-    ...     a[i] += dx 
+    >>> def f(arg1, unsigned i, double dx)
+    ...     array.array[double] a = arg1
+    ...     a[i] += dx
 
-  Fast C-level new_array(_zeros), resize_array, copy_array, Py_SIZE(obj), 
-  zero_array 
+  Fast C-level new_array(_zeros), resize_array, copy_array, Py_SIZE(obj),
+  zero_array
 
     cdef array.array[double] k = array.copy(d)
-    cdef array.array[double] n = array.array(d, Py_SIZE(d) * 2 ) 
-    cdef array.array[double] m = array.zeros_like(FLOAT_TEMPLATE) 
-    array.resize(f, 200000) 
+    cdef array.array[double] n = array.array(d, Py_SIZE(d) * 2 )
+    cdef array.array[double] m = array.zeros_like(FLOAT_TEMPLATE)
+    array.resize(f, 200000)
 
   Zero overhead with naked data pointer views by union:
   _f, _d, _i, _c, _u, ...
-  => Original C array speed + Python dynamic memory management 
- 
-    cdef array.array a = inarray 
+  => Original C array speed + Python dynamic memory management
+
+    cdef array.array a = inarray
     if
-    a._d[2] += 0.66   # use as double array without extra casting 
+    a._d[2] += 0.66   # use as double array without extra casting
 
-    float *subview = vector._f + 10  # starting from 10th element 
+    float *subview = vector._f + 10  # starting from 10th element
     unsigned char *subview_buffer = vector._B + 4
 
   Suitable as lightweight arrays intra Cython without speed penalty.
   Replacement for C stack/malloc arrays; no trouble with refcounting,
-  mem.leaks; seamless Python compatibility, buffer() optional 
- 
-
-  last changes: 2009-05-15 rk 
-              : 2009-12-06 bp 
-              : 2012-05-02 andreasvc 
-              : (see revision control) 
-""" 
-from libc.string cimport strcat, strncat, \ 
-    memset, memchr, memcmp, memcpy, memmove 
- 
-from cpython.object cimport Py_SIZE 
-from cpython.ref cimport PyTypeObject, Py_TYPE 
-from cpython.exc cimport PyErr_BadArgument 
+  mem.leaks; seamless Python compatibility, buffer() optional
+
+
+  last changes: 2009-05-15 rk
+              : 2009-12-06 bp
+              : 2012-05-02 andreasvc
+              : (see revision control)
+"""
+from libc.string cimport strcat, strncat, \
+    memset, memchr, memcmp, memcpy, memmove
+
+from cpython.object cimport Py_SIZE
+from cpython.ref cimport PyTypeObject, Py_TYPE
+from cpython.exc cimport PyErr_BadArgument
 from cpython.mem cimport PyObject_Malloc, PyObject_Free
- 
-cdef extern from *:  # Hard-coded utility code hack. 
-    ctypedef class array.array [object arrayobject] 
-    ctypedef object GETF(array a, Py_ssize_t ix) 
-    ctypedef object SETF(array a, Py_ssize_t ix, object o) 
-    ctypedef struct arraydescr:  # [object arraydescr]: 
+
+cdef extern from *:  # Hard-coded utility code hack.
+    ctypedef class array.array [object arrayobject]
+    ctypedef object GETF(array a, Py_ssize_t ix)
+    ctypedef object SETF(array a, Py_ssize_t ix, object o)
+    ctypedef struct arraydescr:  # [object arraydescr]:
             char typecode
-            int itemsize 
-            GETF getitem    # PyObject * (*getitem)(struct arrayobject *, Py_ssize_t); 
-            SETF setitem    # int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *); 
- 
-    ctypedef union __data_union: 
-        # views of ob_item: 
-        float* as_floats        # direct float pointer access to buffer 
-        double* as_doubles      # double ... 
-        int*    as_ints 
-        unsigned int *as_uints 
-        unsigned char *as_uchars 
-        signed char *as_schars 
-        char *as_chars 
-        unsigned long *as_ulongs 
-        long *as_longs 
+            int itemsize
+            GETF getitem    # PyObject * (*getitem)(struct arrayobject *, Py_ssize_t);
+            SETF setitem    # int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *);
+
+    ctypedef union __data_union:
+        # views of ob_item:
+        float* as_floats        # direct float pointer access to buffer
+        double* as_doubles      # double ...
+        int*    as_ints
+        unsigned int *as_uints
+        unsigned char *as_uchars
+        signed char *as_schars
+        char *as_chars
+        unsigned long *as_ulongs
+        long *as_longs
         unsigned long long *as_ulonglongs
         long long *as_longlongs
-        short *as_shorts 
-        unsigned short *as_ushorts 
-        Py_UNICODE *as_pyunicodes 
-        void *as_voidptr 
- 
-    ctypedef class array.array [object arrayobject]: 
-        cdef __cythonbufferdefaults__ = {'ndim' : 1, 'mode':'c'} 
- 
-        cdef: 
-            Py_ssize_t ob_size 
-            arraydescr* ob_descr    # struct arraydescr *ob_descr; 
-            __data_union data 
- 
-        def __getbuffer__(self, Py_buffer* info, int flags): 
-            # This implementation of getbuffer is geared towards Cython 
+        short *as_shorts
+        unsigned short *as_ushorts
+        Py_UNICODE *as_pyunicodes
+        void *as_voidptr
+
+    ctypedef class array.array [object arrayobject]:
+        cdef __cythonbufferdefaults__ = {'ndim' : 1, 'mode':'c'}
+
+        cdef:
+            Py_ssize_t ob_size
+            arraydescr* ob_descr    # struct arraydescr *ob_descr;
+            __data_union data
+
+        def __getbuffer__(self, Py_buffer* info, int flags):
+            # This implementation of getbuffer is geared towards Cython
             # requirements, and does not yet fulfill the PEP.
-            # In particular strided access is always provided regardless 
-            # of flags 
-            item_count = Py_SIZE(self) 
- 
-            info.suboffsets = NULL 
-            info.buf = self.data.as_chars 
-            info.readonly = 0 
-            info.ndim = 1 
-            info.itemsize = self.ob_descr.itemsize   # e.g. sizeof(float) 
-            info.len = info.itemsize * item_count 
- 
+            # In particular strided access is always provided regardless
+            # of flags
+            item_count = Py_SIZE(self)
+
+            info.suboffsets = NULL
+            info.buf = self.data.as_chars
+            info.readonly = 0
+            info.ndim = 1
+            info.itemsize = self.ob_descr.itemsize   # e.g. sizeof(float)
+            info.len = info.itemsize * item_count
+
             info.shape = <Py_ssize_t*> PyObject_Malloc(sizeof(Py_ssize_t) + 2)
-            if not info.shape: 
-                raise MemoryError() 
-            info.shape[0] = item_count      # constant regardless of resizing 
-            info.strides = &info.itemsize 
- 
-            info.format = <char*> (info.shape + 1) 
-            info.format[0] = self.ob_descr.typecode 
-            info.format[1] = 0 
-            info.obj = self 
- 
-        def __releasebuffer__(self, Py_buffer* info): 
+            if not info.shape:
+                raise MemoryError()
+            info.shape[0] = item_count      # constant regardless of resizing
+            info.strides = &info.itemsize
+
+            info.format = <char*> (info.shape + 1)
+            info.format[0] = self.ob_descr.typecode
+            info.format[1] = 0
+            info.obj = self
+
+        def __releasebuffer__(self, Py_buffer* info):
             PyObject_Free(info.shape)
- 
-    array newarrayobject(PyTypeObject* type, Py_ssize_t size, arraydescr *descr) 
- 
-    # fast resize/realloc 
-    # not suitable for small increments; reallocation 'to the point' 
-    int resize(array self, Py_ssize_t n) except -1 
-    # efficient for small increments (not in Py2.3-) 
-    int resize_smart(array self, Py_ssize_t n) except -1 
- 
- 
-cdef inline array clone(array template, Py_ssize_t length, bint zero): 
-    """ fast creation of a new array, given a template array. 
-    type will be same as template. 
-    if zero is true, new array will be initialized with zeroes.""" 
+
+    array newarrayobject(PyTypeObject* type, Py_ssize_t size, arraydescr *descr)
+
+    # fast resize/realloc
+    # not suitable for small increments; reallocation 'to the point'
+    int resize(array self, Py_ssize_t n) except -1
+    # efficient for small increments (not in Py2.3-)
+    int resize_smart(array self, Py_ssize_t n) except -1
+
+
+cdef inline array clone(array template, Py_ssize_t length, bint zero):
+    """ fast creation of a new array, given a template array.
+    type will be same as template.
+    if zero is true, new array will be initialized with zeroes."""
     cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr)
-    if zero and op is not None: 
-        memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) 
-    return op 
- 
-cdef inline array copy(array self): 
-    """ make a copy of an array. """ 
+    if zero and op is not None:
+        memset(op.data.as_chars, 0, length * op.ob_descr.itemsize)
+    return op
+
+cdef inline array copy(array self):
+    """ make a copy of an array. """
     cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr)
-    memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) 
-    return op 
- 
-cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: 
+    memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize)
+    return op
+
+cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1:
     """ efficient appending of new stuff of same type
-    (e.g. of same array type) 
-    n: number of elements (not number of bytes!) """ 
-    cdef Py_ssize_t itemsize = self.ob_descr.itemsize 
-    cdef Py_ssize_t origsize = Py_SIZE(self) 
-    resize_smart(self, origsize + n) 
-    memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) 
-    return 0 
- 
-cdef inline int extend(array self, array other) except -1: 
-    """ extend array with data from another array; types must match. """ 
-    if self.ob_descr.typecode != other.ob_descr.typecode: 
-        PyErr_BadArgument() 
-    return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) 
- 
-cdef inline void zero(array self): 
-    """ set all elements of array to zero. """ 
-    memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) 
+    (e.g. of same array type)
+    n: number of elements (not number of bytes!) """
+    cdef Py_ssize_t itemsize = self.ob_descr.itemsize
+    cdef Py_ssize_t origsize = Py_SIZE(self)
+    resize_smart(self, origsize + n)
+    memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize)
+    return 0
+
+cdef inline int extend(array self, array other) except -1:
+    """ extend array with data from another array; types must match. """
+    if self.ob_descr.typecode != other.ob_descr.typecode:
+        PyErr_BadArgument()
+    return extend_buffer(self, other.data.as_chars, Py_SIZE(other))
+
+cdef inline void zero(array self):
+    """ set all elements of array to zero. """
+    memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize)
diff --git a/contrib/tools/cython/Cython/Includes/cpython/bool.pxd b/contrib/tools/cython/Cython/Includes/cpython/bool.pxd
index 34b13155a6..c775088ce6 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/bool.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/bool.pxd
@@ -1,38 +1,38 @@
- 
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 7.2.2 Boolean Objects 
-    ############################################################################ 
- 
-    ctypedef class __builtin__.bool [object PyBoolObject]: 
-        pass 
- 
-    # Booleans in Python are implemented as a subclass of 
-    # integers. There are only two booleans, Py_False and Py_True. As 
-    # such, the normal creation and deletion functions don't apply to 
-    # booleans. The following macros are available, however. 
- 
-    bint PyBool_Check(object o) 
-    # Return true if o is of type PyBool_Type. 
- 
-    #PyObject* Py_False 
-    # The Python False object. This object has no methods. It needs to 
-    # be treated just like any other object with respect to reference 
-    # counts. 
- 
-    #PyObject* Py_True 
-    # The Python True object. This object has no methods. It needs to 
-    # be treated just like any other object with respect to reference 
-    # counts. 
- 
-    # Py_RETURN_FALSE 
-    # Return Py_False from a function, properly incrementing its reference count. 
- 
-    # Py_RETURN_TRUE 
-    # Return Py_True from a function, properly incrementing its reference count. 
- 
-    object PyBool_FromLong(long v) 
-    # Return value: New reference. 
-    # Return a new reference to Py_True or Py_False depending on the truth value of v. 
- 
+
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 7.2.2 Boolean Objects
+    ############################################################################
+
+    ctypedef class __builtin__.bool [object PyBoolObject]:
+        pass
+
+    # Booleans in Python are implemented as a subclass of
+    # integers. There are only two booleans, Py_False and Py_True. As
+    # such, the normal creation and deletion functions don't apply to
+    # booleans. The following macros are available, however.
+
+    bint PyBool_Check(object o)
+    # Return true if o is of type PyBool_Type.
+
+    #PyObject* Py_False
+    # The Python False object. This object has no methods. It needs to
+    # be treated just like any other object with respect to reference
+    # counts.
+
+    #PyObject* Py_True
+    # The Python True object. This object has no methods. It needs to
+    # be treated just like any other object with respect to reference
+    # counts.
+
+    # Py_RETURN_FALSE
+    # Return Py_False from a function, properly incrementing its reference count.
+
+    # Py_RETURN_TRUE
+    # Return Py_True from a function, properly incrementing its reference count.
+
+    object PyBool_FromLong(long v)
+    # Return value: New reference.
+    # Return a new reference to Py_True or Py_False depending on the truth value of v.
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/buffer.pxd b/contrib/tools/cython/Cython/Includes/cpython/buffer.pxd
index 82c0ced9df..3f1ada774a 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/buffer.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/buffer.pxd
@@ -1,112 +1,112 @@
-# Please see the Python header files (object.h/abstract.h) for docs 
- 
-cdef extern from "Python.h": 
- 
-    cdef enum: 
+# Please see the Python header files (object.h/abstract.h) for docs
+
+cdef extern from "Python.h":
+
+    cdef enum:
         PyBUF_MAX_NDIM
 
     cdef enum:
-        PyBUF_SIMPLE, 
-        PyBUF_WRITABLE, 
+        PyBUF_SIMPLE,
+        PyBUF_WRITABLE,
         PyBUF_WRITEABLE, # backwards compatibility
-        PyBUF_FORMAT, 
-        PyBUF_ND, 
-        PyBUF_STRIDES, 
-        PyBUF_C_CONTIGUOUS, 
-        PyBUF_F_CONTIGUOUS, 
-        PyBUF_ANY_CONTIGUOUS, 
-        PyBUF_INDIRECT, 
-        PyBUF_CONTIG, 
-        PyBUF_CONTIG_RO, 
-        PyBUF_STRIDED, 
-        PyBUF_STRIDED_RO, 
-        PyBUF_RECORDS, 
-        PyBUF_RECORDS_RO, 
-        PyBUF_FULL, 
-        PyBUF_FULL_RO, 
-        PyBUF_READ, 
-        PyBUF_WRITE, 
-        PyBUF_SHADOW 
- 
-    bint PyObject_CheckBuffer(object obj) 
-    # Return 1 if obj supports the buffer interface otherwise 0. 
- 
-    int PyObject_GetBuffer(object obj, Py_buffer *view, int flags) except -1 
-    # Export obj into a Py_buffer, view. These arguments must never be 
-    # NULL. The flags argument is a bit field indicating what kind of 
-    # buffer the caller is prepared to deal with and therefore what 
-    # kind of buffer the exporter is allowed to return. The buffer 
-    # interface allows for complicated memory sharing possibilities, 
-    # but some caller may not be able to handle all the complexity but 
-    # may want to see if the exporter will let them take a simpler 
-    # view to its memory. 
- 
-    # Some exporters may not be able to share memory in every possible 
-    # way and may need to raise errors to signal to some consumers 
-    # that something is just not possible. These errors should be a 
-    # BufferError unless there is another error that is actually 
-    # causing the problem. The exporter can use flags information to 
-    # simplify how much of the Py_buffer structure is filled in with 
-    # non-default values and/or raise an error if the object can’t 
-    # support a simpler view of its memory. 
- 
-    # 0 is returned on success and -1 on error. 
- 
-    void PyBuffer_Release(Py_buffer *view) 
-    # Release the buffer view. This should be called when the buffer 
-    # is no longer being used as it may free memory from it. 
- 
-    void* PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices) 
-    # ?? 
- 
-    Py_ssize_t PyBuffer_SizeFromFormat(char *) # actually const char 
-    # Return the implied ~Py_buffer.itemsize from the struct-stype 
-    # ~Py_buffer.format 
- 
-    int PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) 
-    # ?? 
- 
-    int PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) 
-    # ?? 
- 
-    int PyObject_CopyToObject(object obj, void *buf, Py_ssize_t len, char fortran) except -1 
-    # Copy len bytes of data pointed to by the contiguous chunk of 
-    # memory pointed to by buf into the buffer exported by obj. The 
-    # buffer must of course be writable. Return 0 on success and 
-    # return -1 and raise an error on failure. If the object does not 
-    # have a writable buffer, then an error is raised. If fortran is 
-    # 'F', then if the object is multi-dimensional, then the data will 
-    # be copied into the array in Fortran-style (first dimension 
-    # varies the fastest). If fortran is 'C', then the data will be 
-    # copied into the array in C-style (last dimension varies the 
-    # fastest). If fortran is 'A', then it does not matter and the 
-    # copy will be made in whatever way is more efficient. 
- 
-    int PyObject_CopyData(object dest, object src) except -1 
-    # Copy the data from the src buffer to the buffer of destination 
- 
-    bint PyBuffer_IsContiguous(Py_buffer *view, char fort) 
-    # Return 1 if the memory defined by the view is C-style (fortran 
-    # is 'C') or Fortran-style (fortran is 'F') contiguous or either 
-    # one (fortran is 'A'). Return 0 otherwise. 
- 
-    void PyBuffer_FillContiguousStrides(int ndims, 
-                                        Py_ssize_t *shape, 
-                                        Py_ssize_t *strides, 
-                                        Py_ssize_t itemsize, 
-                                        char fort) 
-    # Fill the strides array with byte-strides of a contiguous 
-    # (Fortran-style if fort is 'F' or C-style otherwise) array of the 
-    # given shape with the given number of bytes per element. 
- 
-    int PyBuffer_FillInfo(Py_buffer *view, object exporter, void *buf, 
-                          Py_ssize_t len, int readonly, int flags) except -1 
-    # Fill in a buffer-info structure, view, correctly for an exporter 
-    # that can only share a contiguous chunk of memory of “unsigned 
-    # bytes” of the given length. Return 0 on success and -1 (with 
-    # raising an error) on error. 
- 
-    # DEPRECATED HERE: do not cimport from here, cimport from cpython.object instead 
-    object PyObject_Format(object obj, object format_spec) 
-    # Takes an arbitrary object and returns the result of calling 
-    # obj.__format__(format_spec). 
+        PyBUF_FORMAT,
+        PyBUF_ND,
+        PyBUF_STRIDES,
+        PyBUF_C_CONTIGUOUS,
+        PyBUF_F_CONTIGUOUS,
+        PyBUF_ANY_CONTIGUOUS,
+        PyBUF_INDIRECT,
+        PyBUF_CONTIG,
+        PyBUF_CONTIG_RO,
+        PyBUF_STRIDED,
+        PyBUF_STRIDED_RO,
+        PyBUF_RECORDS,
+        PyBUF_RECORDS_RO,
+        PyBUF_FULL,
+        PyBUF_FULL_RO,
+        PyBUF_READ,
+        PyBUF_WRITE,
+        PyBUF_SHADOW
+
+    bint PyObject_CheckBuffer(object obj)
+    # Return 1 if obj supports the buffer interface otherwise 0.
+
+    int PyObject_GetBuffer(object obj, Py_buffer *view, int flags) except -1
+    # Export obj into a Py_buffer, view. These arguments must never be
+    # NULL. The flags argument is a bit field indicating what kind of
+    # buffer the caller is prepared to deal with and therefore what
+    # kind of buffer the exporter is allowed to return. The buffer
+    # interface allows for complicated memory sharing possibilities,
+    # but some caller may not be able to handle all the complexity but
+    # may want to see if the exporter will let them take a simpler
+    # view to its memory.
+
+    # Some exporters may not be able to share memory in every possible
+    # way and may need to raise errors to signal to some consumers
+    # that something is just not possible. These errors should be a
+    # BufferError unless there is another error that is actually
+    # causing the problem. The exporter can use flags information to
+    # simplify how much of the Py_buffer structure is filled in with
+    # non-default values and/or raise an error if the object can’t
+    # support a simpler view of its memory.
+
+    # 0 is returned on success and -1 on error.
+
+    void PyBuffer_Release(Py_buffer *view)
+    # Release the buffer view. This should be called when the buffer
+    # is no longer being used as it may free memory from it.
+
+    void* PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices)
+    # ??
+
+    Py_ssize_t PyBuffer_SizeFromFormat(char *) # actually const char
+    # Return the implied ~Py_buffer.itemsize from the struct-stype
+    # ~Py_buffer.format
+
+    int PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
+    # ??
+
+    int PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
+    # ??
+
+    int PyObject_CopyToObject(object obj, void *buf, Py_ssize_t len, char fortran) except -1
+    # Copy len bytes of data pointed to by the contiguous chunk of
+    # memory pointed to by buf into the buffer exported by obj. The
+    # buffer must of course be writable. Return 0 on success and
+    # return -1 and raise an error on failure. If the object does not
+    # have a writable buffer, then an error is raised. If fortran is
+    # 'F', then if the object is multi-dimensional, then the data will
+    # be copied into the array in Fortran-style (first dimension
+    # varies the fastest). If fortran is 'C', then the data will be
+    # copied into the array in C-style (last dimension varies the
+    # fastest). If fortran is 'A', then it does not matter and the
+    # copy will be made in whatever way is more efficient.
+
+    int PyObject_CopyData(object dest, object src) except -1
+    # Copy the data from the src buffer to the buffer of destination
+
+    bint PyBuffer_IsContiguous(Py_buffer *view, char fort)
+    # Return 1 if the memory defined by the view is C-style (fortran
+    # is 'C') or Fortran-style (fortran is 'F') contiguous or either
+    # one (fortran is 'A'). Return 0 otherwise.
+
+    void PyBuffer_FillContiguousStrides(int ndims,
+                                        Py_ssize_t *shape,
+                                        Py_ssize_t *strides,
+                                        Py_ssize_t itemsize,
+                                        char fort)
+    # Fill the strides array with byte-strides of a contiguous
+    # (Fortran-style if fort is 'F' or C-style otherwise) array of the
+    # given shape with the given number of bytes per element.
+
+    int PyBuffer_FillInfo(Py_buffer *view, object exporter, void *buf,
+                          Py_ssize_t len, int readonly, int flags) except -1
+    # Fill in a buffer-info structure, view, correctly for an exporter
+    # that can only share a contiguous chunk of memory of “unsigned
+    # bytes” of the given length. Return 0 on success and -1 (with
+    # raising an error) on error.
+
+    # DEPRECATED HERE: do not cimport from here, cimport from cpython.object instead
+    object PyObject_Format(object obj, object format_spec)
+    # Takes an arbitrary object and returns the result of calling
+    # obj.__format__(format_spec).
diff --git a/contrib/tools/cython/Cython/Includes/cpython/bytes.pxd b/contrib/tools/cython/Cython/Includes/cpython/bytes.pxd
index ff4c78dc5f..ea72c6aae7 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/bytes.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/bytes.pxd
@@ -1,198 +1,198 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
-    ctypedef struct va_list 
- 
-    ############################################################################ 
-    # 7.3.1 String Objects 
-    ############################################################################ 
- 
-    # These functions raise TypeError when expecting a string 
-    # parameter and are called with a non-string parameter. 
-    # PyStringObject 
-    # This subtype of PyObject represents a Python bytes object. 
-    # PyTypeObject PyBytes_Type 
-    # This instance of PyTypeObject represents the Python bytes type; 
-    # it is the same object as bytes and types.BytesType in the Python 
-    # layer. 
- 
-    bint PyBytes_Check(object o) 
-    # Return true if the object o is a string object or an instance of 
-    # a subtype of the string type. 
- 
-    bint PyBytes_CheckExact(object o) 
-    # Return true if the object o is a string object, but not an instance of a subtype of the string type. 
- 
-    bytes PyBytes_FromString(char *v) 
-    # Return value: New reference. 
-    # Return a new string object with the value v on success, and NULL 
-    # on failure. The parameter v must not be NULL; it will not be 
-    # checked. 
- 
-    bytes PyBytes_FromStringAndSize(char *v, Py_ssize_t len) 
-    # Return value: New reference. 
-    # Return a new string object with the value v and length len on 
-    # success, and NULL on failure. If v is NULL, the contents of the 
-    # string are uninitialized. 
- 
-    bytes PyBytes_FromFormat(char *format, ...) 
-    # Return value: New reference. 
-    # Take a C printf()-style format string and a variable number of 
-    # arguments, calculate the size of the resulting Python string and 
-    # return a string with the values formatted into it. The variable 
-    # arguments must be C types and must correspond exactly to the 
-    # format characters in the format string. The following format 
-    # characters are allowed: 
-    # Format Characters 	Type 	Comment 
-    # %% 	n/a 	The literal % character. 
-    # %c 	int 	A single character, represented as an C int. 
-    # %d 	int 	Exactly equivalent to printf("%d"). 
-    # %u 	unsigned int 	Exactly equivalent to printf("%u"). 
-    # %ld 	long 	Exactly equivalent to printf("%ld"). 
-    # %lu 	unsigned long 	Exactly equivalent to printf("%lu"). 
-    # %zd 	Py_ssize_t 	Exactly equivalent to printf("%zd"). 
-    # %zu 	size_t 	Exactly equivalent to printf("%zu"). 
-    # %i 	int 	Exactly equivalent to printf("%i"). 
-    # %x 	int 	Exactly equivalent to printf("%x"). 
-    # %s 	char* 	A null-terminated C character array. 
- 
-    # %p 	void* 	The hex representation of a C pointer. 
-    #    Mostly equivalent to printf("%p") except that it is guaranteed to 
-    #    start with the literal 0x regardless of what the platform's printf 
-    #    yields. 
-    # An unrecognized format character causes all the rest of the 
-    # format string to be copied as-is to the result string, and any 
-    # extra arguments discarded. 
- 
-    bytes PyBytes_FromFormatV(char *format, va_list vargs) 
-    # Return value: New reference. 
-    # Identical to PyBytes_FromFormat() except that it takes exactly two arguments. 
- 
-    Py_ssize_t PyBytes_Size(object string) except -1 
-    # Return the length of the string in string object string. 
- 
-    Py_ssize_t PyBytes_GET_SIZE(object string) 
-    # Macro form of PyBytes_Size() but without error checking. 
- 
-    char* PyBytes_AsString(object string) except NULL 
-    # Return a NUL-terminated representation of the contents of 
-    # string. The pointer refers to the internal buffer of string, not 
-    # a copy. The data must not be modified in any way, unless the 
-    # string was just created using PyBytes_FromStringAndSize(NULL, 
-    # size). It must not be deallocated. If string is a Unicode 
-    # object, this function computes the default encoding of string 
-    # and operates on that. If string is not a string object at all, 
-    # PyBytes_AsString() returns NULL and raises TypeError. 
- 
-    char* PyBytes_AS_STRING(object string) 
-    # Macro form of PyBytes_AsString() but without error 
-    # checking. Only string objects are supported; no Unicode objects 
-    # should be passed. 
- 
-    int PyBytes_AsStringAndSize(object obj, char **buffer, Py_ssize_t *length) except -1 
-    # Return a NULL-terminated representation of the contents of the 
-    # object obj through the output variables buffer and length. 
-    # 
-    # The function accepts both string and Unicode objects as 
-    # input. For Unicode objects it returns the default encoded 
-    # version of the object. If length is NULL, the resulting buffer 
-    # may not contain NUL characters; if it does, the function returns 
-    # -1 and a TypeError is raised. 
- 
-    # The buffer refers to an internal string buffer of obj, not a 
-    # copy. The data must not be modified in any way, unless the 
-    # string was just created using PyBytes_FromStringAndSize(NULL, 
-    # size). It must not be deallocated. If string is a Unicode 
-    # object, this function computes the default encoding of string 
-    # and operates on that. If string is not a string object at all, 
-    # PyBytes_AsStringAndSize() returns -1 and raises TypeError. 
- 
-    void PyBytes_Concat(PyObject **string, object newpart) 
-    # Create a new string object in *string containing the contents of 
-    # newpart appended to string; the caller will own the new 
-    # reference. The reference to the old value of string will be 
-    # stolen. If the new string cannot be created, the old reference 
-    # to string will still be discarded and the value of *string will 
-    # be set to NULL; the appropriate exception will be set. 
- 
-    void PyBytes_ConcatAndDel(PyObject **string, object newpart) 
-    # Create a new string object in *string containing the contents of 
-    # newpart appended to string. This version decrements the 
-    # reference count of newpart. 
- 
-    int _PyBytes_Resize(PyObject **string, Py_ssize_t newsize) except -1 
-    # A way to resize a string object even though it is 
-    # ``immutable''. Only use this to build up a brand new string 
-    # object; don't use this if the string may already be known in 
-    # other parts of the code. It is an error to call this function if 
-    # the refcount on the input string object is not one. Pass the 
-    # address of an existing string object as an lvalue (it may be 
-    # written into), and the new size desired. On success, *string 
-    # holds the resized string object and 0 is returned; the address 
-    # in *string may differ from its input value. If the reallocation 
-    # fails, the original string object at *string is deallocated, 
-    # *string is set to NULL, a memory exception is set, and -1 is 
-    # returned. 
- 
-    bytes PyBytes_Format(object format, object args) 
-    # Return value: New reference.  Return a new string object from 
-    # format and args. Analogous to format % args. The args argument 
-    # must be a tuple. 
- 
-    void PyBytes_InternInPlace(PyObject **string) 
-    # Intern the argument *string in place. The argument must be the 
-    # address of a pointer variable pointing to a Python string 
-    # object. If there is an existing interned string that is the same 
-    # as *string, it sets *string to it (decrementing the reference 
-    # count of the old string object and incrementing the reference 
-    # count of the interned string object), otherwise it leaves 
-    # *string alone and interns it (incrementing its reference 
-    # count). (Clarification: even though there is a lot of talk about 
-    # reference counts, think of this function as 
-    # reference-count-neutral; you own the object after the call if 
-    # and only if you owned it before the call.) 
- 
-    bytes PyBytes_InternFromString(char *v) 
-    # Return value: New reference. 
-    # A combination of PyBytes_FromString() and 
-    # PyBytes_InternInPlace(), returning either a new string object 
-    # that has been interned, or a new (``owned'') reference to an 
-    # earlier interned string object with the same value. 
- 
-    object PyBytes_Decode(char *s, Py_ssize_t size, char *encoding, char *errors) 
-    #  Return value: New reference. 
-    # Create an object by decoding size bytes of the encoded buffer s 
-    # using the codec registered for encoding. encoding and errors 
-    # have the same meaning as the parameters of the same name in the 
-    # unicode() built-in function. The codec to be used is looked up 
-    # using the Python codec registry. Return NULL if an exception was 
-    # raised by the codec. 
- 
-    object PyBytes_AsDecodedObject(object str, char *encoding, char *errors) 
-    # Return value: New reference. 
-    # Decode a string object by passing it to the codec registered for 
-    # encoding and return the result as Python object. encoding and 
-    # errors have the same meaning as the parameters of the same name 
-    # in the string encode() method. The codec to be used is looked up 
-    # using the Python codec registry. Return NULL if an exception was 
-    # raised by the codec. 
- 
-    object PyBytes_Encode(char *s, Py_ssize_t size, char *encoding, char *errors) 
-    # Return value: New reference. 
-    # Encode the char buffer of the given size by passing it to the 
-    # codec registered for encoding and return a Python 
-    # object. encoding and errors have the same meaning as the 
-    # parameters of the same name in the string encode() method. The 
-    # codec to be used is looked up using the Python codec 
-    # registry. Return NULL if an exception was raised by the codec. 
- 
-    object PyBytes_AsEncodedObject(object str, char *encoding, char *errors) 
-    # Return value: New reference. 
-    # Encode a string object using the codec registered for encoding 
-    # and return the result as Python object. encoding and errors have 
-    # the same meaning as the parameters of the same name in the 
-    # string encode() method. The codec to be used is looked up using 
-    # the Python codec registry. Return NULL if an exception was 
-    # raised by the codec. 
- 
- 
+
+cdef extern from "Python.h":
+    ctypedef struct va_list
+
+    ############################################################################
+    # 7.3.1 String Objects
+    ############################################################################
+
+    # These functions raise TypeError when expecting a string
+    # parameter and are called with a non-string parameter.
+    # PyStringObject
+    # This subtype of PyObject represents a Python bytes object.
+    # PyTypeObject PyBytes_Type
+    # This instance of PyTypeObject represents the Python bytes type;
+    # it is the same object as bytes and types.BytesType in the Python
+    # layer.
+
+    bint PyBytes_Check(object o)
+    # Return true if the object o is a string object or an instance of
+    # a subtype of the string type.
+
+    bint PyBytes_CheckExact(object o)
+    # Return true if the object o is a string object, but not an instance of a subtype of the string type.
+
+    bytes PyBytes_FromString(char *v)
+    # Return value: New reference.
+    # Return a new string object with the value v on success, and NULL
+    # on failure. The parameter v must not be NULL; it will not be
+    # checked.
+
+    bytes PyBytes_FromStringAndSize(char *v, Py_ssize_t len)
+    # Return value: New reference.
+    # Return a new string object with the value v and length len on
+    # success, and NULL on failure. If v is NULL, the contents of the
+    # string are uninitialized.
+
+    bytes PyBytes_FromFormat(char *format, ...)
+    # Return value: New reference.
+    # Take a C printf()-style format string and a variable number of
+    # arguments, calculate the size of the resulting Python string and
+    # return a string with the values formatted into it. The variable
+    # arguments must be C types and must correspond exactly to the
+    # format characters in the format string. The following format
+    # characters are allowed:
+    # Format Characters 	Type 	Comment
+    # %% 	n/a 	The literal % character.
+    # %c 	int 	A single character, represented as an C int.
+    # %d 	int 	Exactly equivalent to printf("%d").
+    # %u 	unsigned int 	Exactly equivalent to printf("%u").
+    # %ld 	long 	Exactly equivalent to printf("%ld").
+    # %lu 	unsigned long 	Exactly equivalent to printf("%lu").
+    # %zd 	Py_ssize_t 	Exactly equivalent to printf("%zd").
+    # %zu 	size_t 	Exactly equivalent to printf("%zu").
+    # %i 	int 	Exactly equivalent to printf("%i").
+    # %x 	int 	Exactly equivalent to printf("%x").
+    # %s 	char* 	A null-terminated C character array.
+
+    # %p 	void* 	The hex representation of a C pointer.
+    #    Mostly equivalent to printf("%p") except that it is guaranteed to
+    #    start with the literal 0x regardless of what the platform's printf
+    #    yields.
+    # An unrecognized format character causes all the rest of the
+    # format string to be copied as-is to the result string, and any
+    # extra arguments discarded.
+
+    bytes PyBytes_FromFormatV(char *format, va_list vargs)
+    # Return value: New reference.
+    # Identical to PyBytes_FromFormat() except that it takes exactly two arguments.
+
+    Py_ssize_t PyBytes_Size(object string) except -1
+    # Return the length of the string in string object string.
+
+    Py_ssize_t PyBytes_GET_SIZE(object string)
+    # Macro form of PyBytes_Size() but without error checking.
+
+    char* PyBytes_AsString(object string) except NULL
+    # Return a NUL-terminated representation of the contents of
+    # string. The pointer refers to the internal buffer of string, not
+    # a copy. The data must not be modified in any way, unless the
+    # string was just created using PyBytes_FromStringAndSize(NULL,
+    # size). It must not be deallocated. If string is a Unicode
+    # object, this function computes the default encoding of string
+    # and operates on that. If string is not a string object at all,
+    # PyBytes_AsString() returns NULL and raises TypeError.
+
+    char* PyBytes_AS_STRING(object string)
+    # Macro form of PyBytes_AsString() but without error
+    # checking. Only string objects are supported; no Unicode objects
+    # should be passed.
+
+    int PyBytes_AsStringAndSize(object obj, char **buffer, Py_ssize_t *length) except -1
+    # Return a NULL-terminated representation of the contents of the
+    # object obj through the output variables buffer and length.
+    #
+    # The function accepts both string and Unicode objects as
+    # input. For Unicode objects it returns the default encoded
+    # version of the object. If length is NULL, the resulting buffer
+    # may not contain NUL characters; if it does, the function returns
+    # -1 and a TypeError is raised.
+
+    # The buffer refers to an internal string buffer of obj, not a
+    # copy. The data must not be modified in any way, unless the
+    # string was just created using PyBytes_FromStringAndSize(NULL,
+    # size). It must not be deallocated. If string is a Unicode
+    # object, this function computes the default encoding of string
+    # and operates on that. If string is not a string object at all,
+    # PyBytes_AsStringAndSize() returns -1 and raises TypeError.
+
+    void PyBytes_Concat(PyObject **string, object newpart)
+    # Create a new string object in *string containing the contents of
+    # newpart appended to string; the caller will own the new
+    # reference. The reference to the old value of string will be
+    # stolen. If the new string cannot be created, the old reference
+    # to string will still be discarded and the value of *string will
+    # be set to NULL; the appropriate exception will be set.
+
+    void PyBytes_ConcatAndDel(PyObject **string, object newpart)
+    # Create a new string object in *string containing the contents of
+    # newpart appended to string. This version decrements the
+    # reference count of newpart.
+
+    int _PyBytes_Resize(PyObject **string, Py_ssize_t newsize) except -1
+    # A way to resize a string object even though it is
+    # ``immutable''. Only use this to build up a brand new string
+    # object; don't use this if the string may already be known in
+    # other parts of the code. It is an error to call this function if
+    # the refcount on the input string object is not one. Pass the
+    # address of an existing string object as an lvalue (it may be
+    # written into), and the new size desired. On success, *string
+    # holds the resized string object and 0 is returned; the address
+    # in *string may differ from its input value. If the reallocation
+    # fails, the original string object at *string is deallocated,
+    # *string is set to NULL, a memory exception is set, and -1 is
+    # returned.
+
+    bytes PyBytes_Format(object format, object args)
+    # Return value: New reference.  Return a new string object from
+    # format and args. Analogous to format % args. The args argument
+    # must be a tuple.
+
+    void PyBytes_InternInPlace(PyObject **string)
+    # Intern the argument *string in place. The argument must be the
+    # address of a pointer variable pointing to a Python string
+    # object. If there is an existing interned string that is the same
+    # as *string, it sets *string to it (decrementing the reference
+    # count of the old string object and incrementing the reference
+    # count of the interned string object), otherwise it leaves
+    # *string alone and interns it (incrementing its reference
+    # count). (Clarification: even though there is a lot of talk about
+    # reference counts, think of this function as
+    # reference-count-neutral; you own the object after the call if
+    # and only if you owned it before the call.)
+
+    bytes PyBytes_InternFromString(char *v)
+    # Return value: New reference.
+    # A combination of PyBytes_FromString() and
+    # PyBytes_InternInPlace(), returning either a new string object
+    # that has been interned, or a new (``owned'') reference to an
+    # earlier interned string object with the same value.
+
+    object PyBytes_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
+    #  Return value: New reference.
+    # Create an object by decoding size bytes of the encoded buffer s
+    # using the codec registered for encoding. encoding and errors
+    # have the same meaning as the parameters of the same name in the
+    # unicode() built-in function. The codec to be used is looked up
+    # using the Python codec registry. Return NULL if an exception was
+    # raised by the codec.
+
+    object PyBytes_AsDecodedObject(object str, char *encoding, char *errors)
+    # Return value: New reference.
+    # Decode a string object by passing it to the codec registered for
+    # encoding and return the result as Python object. encoding and
+    # errors have the same meaning as the parameters of the same name
+    # in the string encode() method. The codec to be used is looked up
+    # using the Python codec registry. Return NULL if an exception was
+    # raised by the codec.
+
+    object PyBytes_Encode(char *s, Py_ssize_t size, char *encoding, char *errors)
+    # Return value: New reference.
+    # Encode the char buffer of the given size by passing it to the
+    # codec registered for encoding and return a Python
+    # object. encoding and errors have the same meaning as the
+    # parameters of the same name in the string encode() method. The
+    # codec to be used is looked up using the Python codec
+    # registry. Return NULL if an exception was raised by the codec.
+
+    object PyBytes_AsEncodedObject(object str, char *encoding, char *errors)
+    # Return value: New reference.
+    # Encode a string object using the codec registered for encoding
+    # and return the result as Python object. encoding and errors have
+    # the same meaning as the parameters of the same name in the
+    # string encode() method. The codec to be used is looked up using
+    # the Python codec registry. Return NULL if an exception was
+    # raised by the codec.
+
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/cobject.pxd b/contrib/tools/cython/Cython/Includes/cpython/cobject.pxd
index 0fab44b3e0..497d8a92e8 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/cobject.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/cobject.pxd
@@ -1,36 +1,36 @@
- 
-cdef extern from "Python.h": 
- 
-    ########################################################################### 
-    # Warning: 
-    # 
-    # The CObject API is deprecated as of Python 3.1. Please switch to 
-    # the new Capsules API. 
-    ########################################################################### 
- 
-    int PyCObject_Check(object p) 
-    #     Return true if its argument is a PyCObject. 
- 
-    object PyCObject_FromVoidPtr(void* cobj, void (*destr)(void *)) 
-    #     Return value: New reference. 
-    # 
-    #     Create a PyCObject from the void * cobj. The destr function will 
-    #     be called when the object is reclaimed, unless it is NULL. 
- 
-    object PyCObject_FromVoidPtrAndDesc(void* cobj, void* desc, void (*destr)(void *, void *)) 
-    #     Return value: New reference. 
-    # 
-    #     Create a PyCObject from the void * cobj. The destr function will 
-    #     be called when the object is reclaimed. The desc argument can be 
-    #     used to pass extra callback data for the destructor function. 
- 
-    void* PyCObject_AsVoidPtr(object self) except? NULL 
-    #     Return the object void * that the PyCObject self was created with. 
- 
-    void* PyCObject_GetDesc(object self) except? NULL 
-    #     Return the description void * that the PyCObject self was created with. 
- 
-    int PyCObject_SetVoidPtr(object self, void* cobj) except 0 
-    #     Set the void pointer inside self to cobj. The PyCObject must not 
-    #     have an associated destructor. Return true on success, false on 
-    #     failure. 
+
+cdef extern from "Python.h":
+
+    ###########################################################################
+    # Warning:
+    #
+    # The CObject API is deprecated as of Python 3.1. Please switch to
+    # the new Capsules API.
+    ###########################################################################
+
+    int PyCObject_Check(object p)
+    #     Return true if its argument is a PyCObject.
+
+    object PyCObject_FromVoidPtr(void* cobj, void (*destr)(void *))
+    #     Return value: New reference.
+    #
+    #     Create a PyCObject from the void * cobj. The destr function will
+    #     be called when the object is reclaimed, unless it is NULL.
+
+    object PyCObject_FromVoidPtrAndDesc(void* cobj, void* desc, void (*destr)(void *, void *))
+    #     Return value: New reference.
+    #
+    #     Create a PyCObject from the void * cobj. The destr function will
+    #     be called when the object is reclaimed. The desc argument can be
+    #     used to pass extra callback data for the destructor function.
+
+    void* PyCObject_AsVoidPtr(object self) except? NULL
+    #     Return the object void * that the PyCObject self was created with.
+
+    void* PyCObject_GetDesc(object self) except? NULL
+    #     Return the description void * that the PyCObject self was created with.
+
+    int PyCObject_SetVoidPtr(object self, void* cobj) except 0
+    #     Set the void pointer inside self to cobj. The PyCObject must not
+    #     have an associated destructor. Return true on success, false on
+    #     failure.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/complex.pxd b/contrib/tools/cython/Cython/Includes/cpython/complex.pxd
index 607d934a93..f5ba339575 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/complex.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/complex.pxd
@@ -1,50 +1,50 @@
- 
-cdef extern from "Python.h": 
- 
-    ctypedef struct Py_complex: 
-        double imag 
-        double real 
- 
-    ############################################################################ 
-    # 7.2.5.2 Complex Numbers as Python Objects 
-    ############################################################################ 
- 
-    # PyComplexObject 
-    # This subtype of PyObject represents a Python complex number object. 
- 
-    ctypedef class __builtin__.complex [object PyComplexObject]: 
-        cdef Py_complex cval 
-        # not making these available to keep them read-only: 
-        #cdef double imag "cval.imag" 
-        #cdef double real "cval.real" 
- 
-    # PyTypeObject PyComplex_Type 
-    # This instance of PyTypeObject represents the Python complex 
-    # number type. It is the same object as complex and 
-    # types.ComplexType. 
- 
-    bint PyComplex_Check(object p) 
-    # Return true if its argument is a PyComplexObject or a subtype of 
-    # PyComplexObject. 
- 
-    bint PyComplex_CheckExact(object p) 
-    # Return true if its argument is a PyComplexObject, but not a subtype of PyComplexObject. 
- 
-    object PyComplex_FromCComplex(Py_complex v) 
-    # Return value: New reference. 
-    # Create a new Python complex number object from a C Py_complex value. 
- 
-    object PyComplex_FromDoubles(double real, double imag) 
-    # Return value: New reference. 
-    # Return a new PyComplexObject object from real and imag. 
- 
-    double PyComplex_RealAsDouble(object op) except? -1 
-    # Return the real part of op as a C double. 
- 
-    double PyComplex_ImagAsDouble(object op) except? -1 
-    # Return the imaginary part of op as a C double. 
- 
-    Py_complex PyComplex_AsCComplex(object op) 
-    # Return the Py_complex value of the complex number op. 
-    # 
-    # Returns (-1+0i) in case of an error 
+
+cdef extern from "Python.h":
+
+    ctypedef struct Py_complex:
+        double imag
+        double real
+
+    ############################################################################
+    # 7.2.5.2 Complex Numbers as Python Objects
+    ############################################################################
+
+    # PyComplexObject
+    # This subtype of PyObject represents a Python complex number object.
+
+    ctypedef class __builtin__.complex [object PyComplexObject]:
+        cdef Py_complex cval
+        # not making these available to keep them read-only:
+        #cdef double imag "cval.imag"
+        #cdef double real "cval.real"
+
+    # PyTypeObject PyComplex_Type
+    # This instance of PyTypeObject represents the Python complex
+    # number type. It is the same object as complex and
+    # types.ComplexType.
+
+    bint PyComplex_Check(object p)
+    # Return true if its argument is a PyComplexObject or a subtype of
+    # PyComplexObject.
+
+    bint PyComplex_CheckExact(object p)
+    # Return true if its argument is a PyComplexObject, but not a subtype of PyComplexObject.
+
+    object PyComplex_FromCComplex(Py_complex v)
+    # Return value: New reference.
+    # Create a new Python complex number object from a C Py_complex value.
+
+    object PyComplex_FromDoubles(double real, double imag)
+    # Return value: New reference.
+    # Return a new PyComplexObject object from real and imag.
+
+    double PyComplex_RealAsDouble(object op) except? -1
+    # Return the real part of op as a C double.
+
+    double PyComplex_ImagAsDouble(object op) except? -1
+    # Return the imaginary part of op as a C double.
+
+    Py_complex PyComplex_AsCComplex(object op)
+    # Return the Py_complex value of the complex number op.
+    #
+    # Returns (-1+0i) in case of an error
diff --git a/contrib/tools/cython/Cython/Includes/cpython/datetime.pxd b/contrib/tools/cython/Cython/Includes/cpython/datetime.pxd
index 7fef60633b..cd0f90719b 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/datetime.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/datetime.pxd
@@ -1,212 +1,212 @@
 from cpython.object cimport PyObject
- 
-cdef extern from "Python.h": 
-    ctypedef struct PyTypeObject: 
-        pass 
- 
-cdef extern from "datetime.h": 
-
-    ctypedef extern class datetime.date[object PyDateTime_Date]: 
-        pass 
- 
-    ctypedef extern class datetime.time[object PyDateTime_Time]: 
-        pass 
- 
-    ctypedef extern class datetime.datetime[object PyDateTime_DateTime]: 
-        pass 
- 
-    ctypedef extern class datetime.timedelta[object PyDateTime_Delta]: 
-        pass 
- 
-    ctypedef extern class datetime.tzinfo[object PyDateTime_TZInfo]: 
-        pass 
- 
-    ctypedef struct PyDateTime_Date: 
-        pass 
-
-    ctypedef struct PyDateTime_Time: 
-        char hastzinfo 
-        PyObject *tzinfo 
-
-    ctypedef struct PyDateTime_DateTime: 
-        char hastzinfo 
-        PyObject *tzinfo 
- 
-    ctypedef struct PyDateTime_Delta: 
-        int days 
-        int seconds 
-        int microseconds 
-
-    # Define structure for C API. 
-    ctypedef struct PyDateTime_CAPI: 
+
+cdef extern from "Python.h":
+    ctypedef struct PyTypeObject:
+        pass
+
+cdef extern from "datetime.h":
+
+    ctypedef extern class datetime.date[object PyDateTime_Date]:
+        pass
+
+    ctypedef extern class datetime.time[object PyDateTime_Time]:
+        pass
+
+    ctypedef extern class datetime.datetime[object PyDateTime_DateTime]:
+        pass
+
+    ctypedef extern class datetime.timedelta[object PyDateTime_Delta]:
+        pass
+
+    ctypedef extern class datetime.tzinfo[object PyDateTime_TZInfo]:
+        pass
+
+    ctypedef struct PyDateTime_Date:
+        pass
+
+    ctypedef struct PyDateTime_Time:
+        char hastzinfo
+        PyObject *tzinfo
+
+    ctypedef struct PyDateTime_DateTime:
+        char hastzinfo
+        PyObject *tzinfo
+
+    ctypedef struct PyDateTime_Delta:
+        int days
+        int seconds
+        int microseconds
+
+    # Define structure for C API.
+    ctypedef struct PyDateTime_CAPI:
         # type objects
-        PyTypeObject *DateType 
-        PyTypeObject *DateTimeType 
-        PyTypeObject *TimeType 
-        PyTypeObject *DeltaType 
-        PyTypeObject *TZInfoType 
-
-        # constructors 
-        object (*Date_FromDate)(int, int, int, PyTypeObject*) 
-        object (*DateTime_FromDateAndTime)(int, int, int, int, int, int, int, object, PyTypeObject*) 
-        object (*Time_FromTime)(int, int, int, int, object, PyTypeObject*) 
-        object (*Delta_FromDelta)(int, int, int, int, PyTypeObject*) 
-
-        # constructors for the DB API 
-        object (*DateTime_FromTimestamp)(object, object, object) 
-        object (*Date_FromTimestamp)(object, object) 
- 
-    # Check type of the object. 
-    bint PyDate_Check(object op) 
-    bint PyDate_CheckExact(object op) 
- 
-    bint PyDateTime_Check(object op) 
-    bint PyDateTime_CheckExact(object op) 
- 
-    bint PyTime_Check(object op) 
-    bint PyTime_CheckExact(object op) 
- 
-    bint PyDelta_Check(object op) 
-    bint PyDelta_CheckExact(object op) 
- 
-    bint PyTZInfo_Check(object op) 
-    bint PyTZInfo_CheckExact(object op) 
- 
-    # Getters for date and datetime (C macros). 
-    int PyDateTime_GET_YEAR(object o) 
-    int PyDateTime_GET_MONTH(object o) 
-    int PyDateTime_GET_DAY(object o) 
- 
-    # Getters for datetime (C macros). 
-    int PyDateTime_DATE_GET_HOUR(object o) 
-    int PyDateTime_DATE_GET_MINUTE(object o) 
-    int PyDateTime_DATE_GET_SECOND(object o) 
-    int PyDateTime_DATE_GET_MICROSECOND(object o) 
- 
-    # Getters for time (C macros). 
-    int PyDateTime_TIME_GET_HOUR(object o) 
-    int PyDateTime_TIME_GET_MINUTE(object o) 
-    int PyDateTime_TIME_GET_SECOND(object o) 
-    int PyDateTime_TIME_GET_MICROSECOND(object o) 
- 
-    # Getters for timedelta (C macros). 
+        PyTypeObject *DateType
+        PyTypeObject *DateTimeType
+        PyTypeObject *TimeType
+        PyTypeObject *DeltaType
+        PyTypeObject *TZInfoType
+
+        # constructors
+        object (*Date_FromDate)(int, int, int, PyTypeObject*)
+        object (*DateTime_FromDateAndTime)(int, int, int, int, int, int, int, object, PyTypeObject*)
+        object (*Time_FromTime)(int, int, int, int, object, PyTypeObject*)
+        object (*Delta_FromDelta)(int, int, int, int, PyTypeObject*)
+
+        # constructors for the DB API
+        object (*DateTime_FromTimestamp)(object, object, object)
+        object (*Date_FromTimestamp)(object, object)
+
+    # Check type of the object.
+    bint PyDate_Check(object op)
+    bint PyDate_CheckExact(object op)
+
+    bint PyDateTime_Check(object op)
+    bint PyDateTime_CheckExact(object op)
+
+    bint PyTime_Check(object op)
+    bint PyTime_CheckExact(object op)
+
+    bint PyDelta_Check(object op)
+    bint PyDelta_CheckExact(object op)
+
+    bint PyTZInfo_Check(object op)
+    bint PyTZInfo_CheckExact(object op)
+
+    # Getters for date and datetime (C macros).
+    int PyDateTime_GET_YEAR(object o)
+    int PyDateTime_GET_MONTH(object o)
+    int PyDateTime_GET_DAY(object o)
+
+    # Getters for datetime (C macros).
+    int PyDateTime_DATE_GET_HOUR(object o)
+    int PyDateTime_DATE_GET_MINUTE(object o)
+    int PyDateTime_DATE_GET_SECOND(object o)
+    int PyDateTime_DATE_GET_MICROSECOND(object o)
+
+    # Getters for time (C macros).
+    int PyDateTime_TIME_GET_HOUR(object o)
+    int PyDateTime_TIME_GET_MINUTE(object o)
+    int PyDateTime_TIME_GET_SECOND(object o)
+    int PyDateTime_TIME_GET_MICROSECOND(object o)
+
+    # Getters for timedelta (C macros).
     int PyDateTime_DELTA_GET_DAYS(object o)
     int PyDateTime_DELTA_GET_SECONDS(object o)
     int PyDateTime_DELTA_GET_MICROSECONDS(object o)
- 
-    # PyDateTime CAPI object. 
-    PyDateTime_CAPI *PyDateTimeAPI 
-
-    void PyDateTime_IMPORT() 
- 
-# Datetime C API initialization function. 
-# You have to call it before any usage of DateTime CAPI functions. 
-cdef inline void import_datetime(): 
-    PyDateTime_IMPORT 
- 
-# Create date object using DateTime CAPI factory function. 
-# Note, there are no range checks for any of the arguments. 
-cdef inline object date_new(int year, int month, int day): 
-    return PyDateTimeAPI.Date_FromDate(year, month, day, PyDateTimeAPI.DateType) 
-
-# Create time object using DateTime CAPI factory function 
-# Note, there are no range checks for any of the arguments. 
-cdef inline object time_new(int hour, int minute, int second, int microsecond, object tz): 
-    return PyDateTimeAPI.Time_FromTime(hour, minute, second, microsecond, tz, PyDateTimeAPI.TimeType) 
- 
-# Create datetime object using DateTime CAPI factory function. 
-# Note, there are no range checks for any of the arguments. 
-cdef inline object datetime_new(int year, int month, int day, int hour, int minute, int second, int microsecond, object tz): 
-    return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, hour, minute, second, microsecond, tz, PyDateTimeAPI.DateTimeType) 
- 
-# Create timedelta object using DateTime CAPI factory function. 
-# Note, there are no range checks for any of the arguments. 
-cdef inline object timedelta_new(int days, int seconds, int useconds): 
-    return PyDateTimeAPI.Delta_FromDelta(days, seconds, useconds, 1, PyDateTimeAPI.DeltaType) 
- 
-# More recognizable getters for date/time/datetime/timedelta. 
-# There are no setters because datetime.h hasn't them. 
-# This is because of immutable nature of these objects by design. 
+
+    # PyDateTime CAPI object.
+    PyDateTime_CAPI *PyDateTimeAPI
+
+    void PyDateTime_IMPORT()
+
+# Datetime C API initialization function.
+# You have to call it before any usage of DateTime CAPI functions.
+cdef inline void import_datetime():
+    PyDateTime_IMPORT
+
+# Create date object using DateTime CAPI factory function.
+# Note, there are no range checks for any of the arguments.
+cdef inline object date_new(int year, int month, int day):
+    return PyDateTimeAPI.Date_FromDate(year, month, day, PyDateTimeAPI.DateType)
+
+# Create time object using DateTime CAPI factory function
+# Note, there are no range checks for any of the arguments.
+cdef inline object time_new(int hour, int minute, int second, int microsecond, object tz):
+    return PyDateTimeAPI.Time_FromTime(hour, minute, second, microsecond, tz, PyDateTimeAPI.TimeType)
+
+# Create datetime object using DateTime CAPI factory function.
+# Note, there are no range checks for any of the arguments.
+cdef inline object datetime_new(int year, int month, int day, int hour, int minute, int second, int microsecond, object tz):
+    return PyDateTimeAPI.DateTime_FromDateAndTime(year, month, day, hour, minute, second, microsecond, tz, PyDateTimeAPI.DateTimeType)
+
+# Create timedelta object using DateTime CAPI factory function.
+# Note, there are no range checks for any of the arguments.
+cdef inline object timedelta_new(int days, int seconds, int useconds):
+    return PyDateTimeAPI.Delta_FromDelta(days, seconds, useconds, 1, PyDateTimeAPI.DeltaType)
+
+# More recognizable getters for date/time/datetime/timedelta.
+# There are no setters because datetime.h hasn't them.
+# This is because of immutable nature of these objects by design.
 # If you would change time/date/datetime/timedelta object you need to recreate.
- 
-# Get tzinfo of time 
-cdef inline object time_tzinfo(object o): 
-    if (<PyDateTime_Time*>o).hastzinfo: 
-        return <object>(<PyDateTime_Time*>o).tzinfo 
-    else: 
-        return None 
- 
+
+# Get tzinfo of time
+cdef inline object time_tzinfo(object o):
+    if (<PyDateTime_Time*>o).hastzinfo:
+        return <object>(<PyDateTime_Time*>o).tzinfo
+    else:
+        return None
+
 # Get tzinfo of datetime
-cdef inline object datetime_tzinfo(object o): 
-    if (<PyDateTime_DateTime*>o).hastzinfo: 
-        return <object>(<PyDateTime_DateTime*>o).tzinfo 
-    else: 
-        return None 
- 
-# Get year of date 
-cdef inline int date_year(object o): 
-    return PyDateTime_GET_YEAR(o) 
-
-# Get month of date 
-cdef inline int date_month(object o): 
-    return PyDateTime_GET_MONTH(o) 
- 
-# Get day of date 
-cdef inline int date_day(object o): 
-    return PyDateTime_GET_DAY(o) 
- 
-# Get year of datetime 
-cdef inline int datetime_year(object o): 
-    return PyDateTime_GET_YEAR(o) 
-
-# Get month of datetime 
-cdef inline int datetime_month(object o): 
-    return PyDateTime_GET_MONTH(o) 
- 
-# Get day of datetime 
-cdef inline int datetime_day(object o): 
-    return PyDateTime_GET_DAY(o) 
- 
-# Get hour of time 
-cdef inline int time_hour(object o): 
-    return PyDateTime_TIME_GET_HOUR(o) 
- 
-# Get minute of time 
-cdef inline int time_minute(object o): 
-    return PyDateTime_TIME_GET_MINUTE(o) 
- 
-# Get second of time 
-cdef inline int time_second(object o): 
-    return PyDateTime_TIME_GET_SECOND(o) 
- 
-# Get microsecond of time 
-cdef inline int time_microsecond(object o): 
-    return PyDateTime_TIME_GET_MICROSECOND(o) 
- 
-# Get hour of datetime 
-cdef inline int datetime_hour(object o): 
-    return PyDateTime_DATE_GET_HOUR(o) 
- 
-# Get minute of datetime 
-cdef inline int datetime_minute(object o): 
-    return PyDateTime_DATE_GET_MINUTE(o) 
- 
-# Get second of datetime 
-cdef inline int datetime_second(object o): 
-    return PyDateTime_DATE_GET_SECOND(o) 
- 
-# Get microsecond of datetime 
-cdef inline int datetime_microsecond(object o): 
-    return PyDateTime_DATE_GET_MICROSECOND(o) 
- 
-# Get days of timedelta 
-cdef inline int timedelta_days(object o): 
-    return (<PyDateTime_Delta*>o).days 
- 
-# Get seconds of timedelta 
-cdef inline int timedelta_seconds(object o): 
-    return (<PyDateTime_Delta*>o).seconds 
- 
-# Get microseconds of timedelta 
-cdef inline int timedelta_microseconds(object o): 
-    return (<PyDateTime_Delta*>o).microseconds 
+cdef inline object datetime_tzinfo(object o):
+    if (<PyDateTime_DateTime*>o).hastzinfo:
+        return <object>(<PyDateTime_DateTime*>o).tzinfo
+    else:
+        return None
+
+# Get year of date
+cdef inline int date_year(object o):
+    return PyDateTime_GET_YEAR(o)
+
+# Get month of date
+cdef inline int date_month(object o):
+    return PyDateTime_GET_MONTH(o)
+
+# Get day of date
+cdef inline int date_day(object o):
+    return PyDateTime_GET_DAY(o)
+
+# Get year of datetime
+cdef inline int datetime_year(object o):
+    return PyDateTime_GET_YEAR(o)
+
+# Get month of datetime
+cdef inline int datetime_month(object o):
+    return PyDateTime_GET_MONTH(o)
+
+# Get day of datetime
+cdef inline int datetime_day(object o):
+    return PyDateTime_GET_DAY(o)
+
+# Get hour of time
+cdef inline int time_hour(object o):
+    return PyDateTime_TIME_GET_HOUR(o)
+
+# Get minute of time
+cdef inline int time_minute(object o):
+    return PyDateTime_TIME_GET_MINUTE(o)
+
+# Get second of time
+cdef inline int time_second(object o):
+    return PyDateTime_TIME_GET_SECOND(o)
+
+# Get microsecond of time
+cdef inline int time_microsecond(object o):
+    return PyDateTime_TIME_GET_MICROSECOND(o)
+
+# Get hour of datetime
+cdef inline int datetime_hour(object o):
+    return PyDateTime_DATE_GET_HOUR(o)
+
+# Get minute of datetime
+cdef inline int datetime_minute(object o):
+    return PyDateTime_DATE_GET_MINUTE(o)
+
+# Get second of datetime
+cdef inline int datetime_second(object o):
+    return PyDateTime_DATE_GET_SECOND(o)
+
+# Get microsecond of datetime
+cdef inline int datetime_microsecond(object o):
+    return PyDateTime_DATE_GET_MICROSECOND(o)
+
+# Get days of timedelta
+cdef inline int timedelta_days(object o):
+    return (<PyDateTime_Delta*>o).days
+
+# Get seconds of timedelta
+cdef inline int timedelta_seconds(object o):
+    return (<PyDateTime_Delta*>o).seconds
+
+# Get microseconds of timedelta
+cdef inline int timedelta_microseconds(object o):
+    return (<PyDateTime_Delta*>o).microseconds
diff --git a/contrib/tools/cython/Cython/Includes/cpython/dict.pxd b/contrib/tools/cython/Cython/Includes/cpython/dict.pxd
index a27df95f9f..16dd5e1458 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/dict.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/dict.pxd
@@ -1,165 +1,165 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 7.4.1 Dictionary Objects 
-    ############################################################################ 
- 
-    # PyDictObject 
-    # 
-    # This subtype of PyObject represents a Python dictionary object 
-    # (i.e. the 'dict' type). 
- 
-    # PyTypeObject PyDict_Type 
-    # 
-    # This instance of PyTypeObject represents the Python dictionary 
-    # type. This is exposed to Python programs as dict and 
-    # types.DictType. 
- 
-    bint PyDict_Check(object p) 
-    # Return true if p is a dict object or an instance of a subtype of 
-    # the dict type. 
- 
-    bint PyDict_CheckExact(object p) 
-    # Return true if p is a dict object, but not an instance of a 
-    # subtype of the dict type. 
- 
-    dict PyDict_New() 
-    # Return value: New reference. 
-    # Return a new empty dictionary, or NULL on failure. 
- 
-    object PyDictProxy_New(object dict) 
-    # Return value: New reference. 
-    # Return a proxy object for a mapping which enforces read-only 
-    # behavior. This is normally used to create a proxy to prevent 
-    # modification of the dictionary for non-dynamic class types. 
- 
-    void PyDict_Clear(object p) 
-    # Empty an existing dictionary of all key-value pairs. 
- 
-    int PyDict_Contains(object p, object key) except -1 
-    # Determine if dictionary p contains key. If an item in p is 
-    # matches key, return 1, otherwise return 0. On error, return 
-    # -1. This is equivalent to the Python expression "key in p". 
- 
-    dict PyDict_Copy(object p) 
-    # Return value: New reference. 
-    # Return a new dictionary that contains the same key-value pairs as p. 
- 
-    int PyDict_SetItem(object p, object key, object val) except -1 
-    # Insert value into the dictionary p with a key of key. key must 
-    # be hashable; if it isn't, TypeError will be raised. Return 0 on 
-    # success or -1 on failure. 
- 
+
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 7.4.1 Dictionary Objects
+    ############################################################################
+
+    # PyDictObject
+    #
+    # This subtype of PyObject represents a Python dictionary object
+    # (i.e. the 'dict' type).
+
+    # PyTypeObject PyDict_Type
+    #
+    # This instance of PyTypeObject represents the Python dictionary
+    # type. This is exposed to Python programs as dict and
+    # types.DictType.
+
+    bint PyDict_Check(object p)
+    # Return true if p is a dict object or an instance of a subtype of
+    # the dict type.
+
+    bint PyDict_CheckExact(object p)
+    # Return true if p is a dict object, but not an instance of a
+    # subtype of the dict type.
+
+    dict PyDict_New()
+    # Return value: New reference.
+    # Return a new empty dictionary, or NULL on failure.
+
+    object PyDictProxy_New(object dict)
+    # Return value: New reference.
+    # Return a proxy object for a mapping which enforces read-only
+    # behavior. This is normally used to create a proxy to prevent
+    # modification of the dictionary for non-dynamic class types.
+
+    void PyDict_Clear(object p)
+    # Empty an existing dictionary of all key-value pairs.
+
+    int PyDict_Contains(object p, object key) except -1
+    # Determine if dictionary p contains key. If an item in p is
+    # matches key, return 1, otherwise return 0. On error, return
+    # -1. This is equivalent to the Python expression "key in p".
+
+    dict PyDict_Copy(object p)
+    # Return value: New reference.
+    # Return a new dictionary that contains the same key-value pairs as p.
+
+    int PyDict_SetItem(object p, object key, object val) except -1
+    # Insert value into the dictionary p with a key of key. key must
+    # be hashable; if it isn't, TypeError will be raised. Return 0 on
+    # success or -1 on failure.
+
     int PyDict_SetItemString(object p, const char *key, object val) except -1
-    # Insert value into the dictionary p using key as a key. key 
-    # should be a char*. The key object is created using 
-    # PyString_FromString(key). Return 0 on success or -1 on failure. 
- 
-    int PyDict_DelItem(object p, object key) except -1 
-    # Remove the entry in dictionary p with key key. key must be 
-    # hashable; if it isn't, TypeError is raised. Return 0 on success 
-    # or -1 on failure. 
- 
+    # Insert value into the dictionary p using key as a key. key
+    # should be a char*. The key object is created using
+    # PyString_FromString(key). Return 0 on success or -1 on failure.
+
+    int PyDict_DelItem(object p, object key) except -1
+    # Remove the entry in dictionary p with key key. key must be
+    # hashable; if it isn't, TypeError is raised. Return 0 on success
+    # or -1 on failure.
+
     int PyDict_DelItemString(object p, const char *key) except -1
-    # Remove the entry in dictionary p which has a key specified by 
-    # the string key. Return 0 on success or -1 on failure. 
- 
-    PyObject* PyDict_GetItem(object p, object key) 
-    # Return value: Borrowed reference. 
-    # Return the object from dictionary p which has a key key. Return 
-    # NULL if the key key is not present, but without setting an 
-    # exception. 
- 
+    # Remove the entry in dictionary p which has a key specified by
+    # the string key. Return 0 on success or -1 on failure.
+
+    PyObject* PyDict_GetItem(object p, object key)
+    # Return value: Borrowed reference.
+    # Return the object from dictionary p which has a key key. Return
+    # NULL if the key key is not present, but without setting an
+    # exception.
+
     PyObject* PyDict_GetItemString(object p, const char *key)
-    # Return value: Borrowed reference. 
-    # This is the same as PyDict_GetItem(), but key is specified as a 
-    # char*, rather than a PyObject*. 
- 
-    list PyDict_Items(object p) 
-    # Return value: New reference. 
-    # Return a PyListObject containing all the items from the 
-    # dictionary, as in the dictionary method items() (see the Python 
-    # Library Reference). 
- 
-    list PyDict_Keys(object p) 
-    # Return value: New reference. 
-    # Return a PyListObject containing all the keys from the 
-    # dictionary, as in the dictionary method keys() (see the Python 
-    # Library Reference). 
- 
-    list PyDict_Values(object p) 
-    # Return value: New reference. 
-    # Return a PyListObject containing all the values from the 
-    # dictionary p, as in the dictionary method values() (see the 
-    # Python Library Reference). 
- 
-    Py_ssize_t PyDict_Size(object p) except -1 
-    # Return the number of items in the dictionary. This is equivalent 
-    # to "len(p)" on a dictionary. 
- 
-    int PyDict_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pvalue) 
-    # Iterate over all key-value pairs in the dictionary p. The int 
-    # referred to by ppos must be initialized to 0 prior to the first 
-    # call to this function to start the iteration; the function 
-    # returns true for each pair in the dictionary, and false once all 
-    # pairs have been reported. The parameters pkey and pvalue should 
-    # either point to PyObject* variables that will be filled in with 
-    # each key and value, respectively, or may be NULL. Any references 
-    # returned through them are borrowed. ppos should not be altered 
-    # during iteration. Its value represents offsets within the 
-    # internal dictionary structure, and since the structure is 
-    # sparse, the offsets are not consecutive. 
-    # For example: 
-    # 
-    #object key, *value; 
-    #int pos = 0; 
-    # 
-    #while (PyDict_Next(self->dict, &pos, &key, &value)) { 
-    #   /* do something interesting with the values... */ 
-    #    ... 
-    #} 
-    # The dictionary p should not be mutated during iteration. It is 
-    # safe (since Python 2.1) to modify the values of the keys as you 
-    # iterate over the dictionary, but only so long as the set of keys 
-    # does not change. For example: 
-    # object key, *value; 
-    # int pos = 0; 
-    # while (PyDict_Next(self->dict, &pos, &key, &value)) { 
-    #    int i = PyInt_AS_LONG(value) + 1; 
-    #    object o = PyInt_FromLong(i); 
-    #    if (o == NULL) 
-    #        return -1; 
-    #    if (PyDict_SetItem(self->dict, key, o) < 0) { 
-    #        Py_DECREF(o); 
-    #        return -1; 
-    #    } 
-    #    Py_DECREF(o); 
-    # } 
- 
-    int PyDict_Merge(object a, object b, int override) except -1 
-    # Iterate over mapping object b adding key-value pairs to 
-    # dictionary a. b may be a dictionary, or any object supporting 
-    # PyMapping_Keys() and PyObject_GetItem(). If override is true, 
-    # existing pairs in a will be replaced if a matching key is found 
-    # in b, otherwise pairs will only be added if there is not a 
-    # matching key in a. Return 0 on success or -1 if an exception was 
-    # raised. 
- 
-    int PyDict_Update(object a, object b) except -1 
-    # This is the same as PyDict_Merge(a, b, 1) in C, or a.update(b) 
-    # in Python. Return 0 on success or -1 if an exception was raised. 
- 
-    int PyDict_MergeFromSeq2(object a, object seq2, int override) except -1 
-    # Update or merge into dictionary a, from the key-value pairs in 
-    # seq2. seq2 must be an iterable object producing iterable objects 
-    # of length 2, viewed as key-value pairs. In case of duplicate 
-    # keys, the last wins if override is true, else the first 
-    # wins. Return 0 on success or -1 if an exception was 
-    # raised. Equivalent Python (except for the return value): 
-    # 
-    #def PyDict_MergeFromSeq2(a, seq2, override): 
-    #    for key, value in seq2: 
-    #        if override or key not in a: 
-    #            a[key] = value 
+    # Return value: Borrowed reference.
+    # This is the same as PyDict_GetItem(), but key is specified as a
+    # char*, rather than a PyObject*.
+
+    list PyDict_Items(object p)
+    # Return value: New reference.
+    # Return a PyListObject containing all the items from the
+    # dictionary, as in the dictionary method items() (see the Python
+    # Library Reference).
+
+    list PyDict_Keys(object p)
+    # Return value: New reference.
+    # Return a PyListObject containing all the keys from the
+    # dictionary, as in the dictionary method keys() (see the Python
+    # Library Reference).
+
+    list PyDict_Values(object p)
+    # Return value: New reference.
+    # Return a PyListObject containing all the values from the
+    # dictionary p, as in the dictionary method values() (see the
+    # Python Library Reference).
+
+    Py_ssize_t PyDict_Size(object p) except -1
+    # Return the number of items in the dictionary. This is equivalent
+    # to "len(p)" on a dictionary.
+
+    int PyDict_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pvalue)
+    # Iterate over all key-value pairs in the dictionary p. The int
+    # referred to by ppos must be initialized to 0 prior to the first
+    # call to this function to start the iteration; the function
+    # returns true for each pair in the dictionary, and false once all
+    # pairs have been reported. The parameters pkey and pvalue should
+    # either point to PyObject* variables that will be filled in with
+    # each key and value, respectively, or may be NULL. Any references
+    # returned through them are borrowed. ppos should not be altered
+    # during iteration. Its value represents offsets within the
+    # internal dictionary structure, and since the structure is
+    # sparse, the offsets are not consecutive.
+    # For example:
+    #
+    #object key, *value;
+    #int pos = 0;
+    #
+    #while (PyDict_Next(self->dict, &pos, &key, &value)) {
+    #   /* do something interesting with the values... */
+    #    ...
+    #}
+    # The dictionary p should not be mutated during iteration. It is
+    # safe (since Python 2.1) to modify the values of the keys as you
+    # iterate over the dictionary, but only so long as the set of keys
+    # does not change. For example:
+    # object key, *value;
+    # int pos = 0;
+    # while (PyDict_Next(self->dict, &pos, &key, &value)) {
+    #    int i = PyInt_AS_LONG(value) + 1;
+    #    object o = PyInt_FromLong(i);
+    #    if (o == NULL)
+    #        return -1;
+    #    if (PyDict_SetItem(self->dict, key, o) < 0) {
+    #        Py_DECREF(o);
+    #        return -1;
+    #    }
+    #    Py_DECREF(o);
+    # }
+
+    int PyDict_Merge(object a, object b, int override) except -1
+    # Iterate over mapping object b adding key-value pairs to
+    # dictionary a. b may be a dictionary, or any object supporting
+    # PyMapping_Keys() and PyObject_GetItem(). If override is true,
+    # existing pairs in a will be replaced if a matching key is found
+    # in b, otherwise pairs will only be added if there is not a
+    # matching key in a. Return 0 on success or -1 if an exception was
+    # raised.
+
+    int PyDict_Update(object a, object b) except -1
+    # This is the same as PyDict_Merge(a, b, 1) in C, or a.update(b)
+    # in Python. Return 0 on success or -1 if an exception was raised.
+
+    int PyDict_MergeFromSeq2(object a, object seq2, int override) except -1
+    # Update or merge into dictionary a, from the key-value pairs in
+    # seq2. seq2 must be an iterable object producing iterable objects
+    # of length 2, viewed as key-value pairs. In case of duplicate
+    # keys, the last wins if override is true, else the first
+    # wins. Return 0 on success or -1 if an exception was
+    # raised. Equivalent Python (except for the return value):
+    #
+    #def PyDict_MergeFromSeq2(a, seq2, override):
+    #    for key, value in seq2:
+    #        if override or key not in a:
+    #            a[key] = value
diff --git a/contrib/tools/cython/Cython/Includes/cpython/exc.pxd b/contrib/tools/cython/Cython/Includes/cpython/exc.pxd
index de0a086e11..bc57c0e571 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/exc.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/exc.pxd
@@ -1,158 +1,158 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ##################################################################### 
-    # 3. Exception Handling 
-    ##################################################################### 
- 
-    # The functions described in this chapter will let you handle and 
-    # raise Python exceptions. It is important to understand some of 
-    # the basics of Python exception handling. It works somewhat like 
-    # the Unix errno variable: there is a global indicator (per 
-    # thread) of the last error that occurred. Most functions don't 
-    # clear this on success, but will set it to indicate the cause of 
-    # the error on failure. Most functions also return an error 
-    # indicator, usually NULL if they are supposed to return a 
-    # pointer, or -1 if they return an integer (exception: the 
-    # PyArg_*() functions return 1 for success and 0 for failure). 
- 
-    # When a function must fail because some function it called 
-    # failed, it generally doesn't set the error indicator; the 
-    # function it called already set it. It is responsible for either 
-    # handling the error and clearing the exception or returning after 
-    # cleaning up any resources it holds (such as object references or 
-    # memory allocations); it should not continue normally if it is 
-    # not prepared to handle the error. If returning due to an error, 
-    # it is important to indicate to the caller that an error has been 
-    # set. If the error is not handled or carefully propagated, 
-    # additional calls into the Python/C API may not behave as 
-    # intended and may fail in mysterious ways. 
- 
-    # The error indicator consists of three Python objects 
-    # corresponding to the Python variables sys.exc_type, 
-    # sys.exc_value and sys.exc_traceback. API functions exist to 
-    # interact with the error indicator in various ways. There is a 
-    # separate error indicator for each thread. 
- 
-    void PyErr_Print() 
-    # Print a standard traceback to sys.stderr and clear the error 
-    # indicator. Call this function only when the error indicator is 
-    # set. (Otherwise it will cause a fatal error!) 
- 
-    PyObject* PyErr_Occurred() 
-    # Return value: Borrowed reference. 
-    # Test whether the error indicator is set. If set, return the 
-    # exception type (the first argument to the last call to one of 
-    # the PyErr_Set*() functions or to PyErr_Restore()). If not set, 
-    # return NULL. You do not own a reference to the return value, so 
-    # you do not need to Py_DECREF() it. Note: Do not compare the 
-    # return value to a specific exception; use 
-    # PyErr_ExceptionMatches() instead, shown below. (The comparison 
-    # could easily fail since the exception may be an instance instead 
+
+cdef extern from "Python.h":
+
+    #####################################################################
+    # 3. Exception Handling
+    #####################################################################
+
+    # The functions described in this chapter will let you handle and
+    # raise Python exceptions. It is important to understand some of
+    # the basics of Python exception handling. It works somewhat like
+    # the Unix errno variable: there is a global indicator (per
+    # thread) of the last error that occurred. Most functions don't
+    # clear this on success, but will set it to indicate the cause of
+    # the error on failure. Most functions also return an error
+    # indicator, usually NULL if they are supposed to return a
+    # pointer, or -1 if they return an integer (exception: the
+    # PyArg_*() functions return 1 for success and 0 for failure).
+
+    # When a function must fail because some function it called
+    # failed, it generally doesn't set the error indicator; the
+    # function it called already set it. It is responsible for either
+    # handling the error and clearing the exception or returning after
+    # cleaning up any resources it holds (such as object references or
+    # memory allocations); it should not continue normally if it is
+    # not prepared to handle the error. If returning due to an error,
+    # it is important to indicate to the caller that an error has been
+    # set. If the error is not handled or carefully propagated,
+    # additional calls into the Python/C API may not behave as
+    # intended and may fail in mysterious ways.
+
+    # The error indicator consists of three Python objects
+    # corresponding to the Python variables sys.exc_type,
+    # sys.exc_value and sys.exc_traceback. API functions exist to
+    # interact with the error indicator in various ways. There is a
+    # separate error indicator for each thread.
+
+    void PyErr_Print()
+    # Print a standard traceback to sys.stderr and clear the error
+    # indicator. Call this function only when the error indicator is
+    # set. (Otherwise it will cause a fatal error!)
+
+    PyObject* PyErr_Occurred()
+    # Return value: Borrowed reference.
+    # Test whether the error indicator is set. If set, return the
+    # exception type (the first argument to the last call to one of
+    # the PyErr_Set*() functions or to PyErr_Restore()). If not set,
+    # return NULL. You do not own a reference to the return value, so
+    # you do not need to Py_DECREF() it. Note: Do not compare the
+    # return value to a specific exception; use
+    # PyErr_ExceptionMatches() instead, shown below. (The comparison
+    # could easily fail since the exception may be an instance instead
     # of a class, in the case of a class exception, or it may be a
-    # subclass of the expected exception.) 
- 
-    bint PyErr_ExceptionMatches(object exc) 
-    # Equivalent to "PyErr_GivenExceptionMatches(PyErr_Occurred(), 
-    # exc)". This should only be called when an exception is actually 
-    # set; a memory access violation will occur if no exception has 
-    # been raised. 
- 
-    bint PyErr_GivenExceptionMatches(object given, object exc) 
-    # Return true if the given exception matches the exception in 
-    # exc. If exc is a class object, this also returns true when given 
-    # is an instance of a subclass. If exc is a tuple, all exceptions 
-    # in the tuple (and recursively in subtuples) are searched for a 
-    # match. If given is NULL, a memory access violation will occur. 
- 
-    void PyErr_NormalizeException(PyObject** exc, PyObject** val, PyObject** tb) 
-    # Under certain circumstances, the values returned by 
-    # PyErr_Fetch() below can be ``unnormalized'', meaning that *exc 
-    # is a class object but *val is not an instance of the same 
-    # class. This function can be used to instantiate the class in 
-    # that case. If the values are already normalized, nothing 
-    # happens. The delayed normalization is implemented to improve 
-    # performance. 
- 
-    void PyErr_Clear() 
-    # Clear the error indicator. If the error indicator is not set, there is no effect. 
- 
-    void PyErr_Fetch(PyObject** ptype, PyObject** pvalue, PyObject** ptraceback) 
-    # Retrieve the error indicator into three variables whose 
-    # addresses are passed. If the error indicator is not set, set all 
-    # three variables to NULL. If it is set, it will be cleared and 
-    # you own a reference to each object retrieved. The value and 
-    # traceback object may be NULL even when the type object is 
-    # not. Note: This function is normally only used by code that 
-    # needs to handle exceptions or by code that needs to save and 
-    # restore the error indicator temporarily. 
- 
-    void PyErr_Restore(PyObject* type, PyObject* value, PyObject* traceback) 
-    # Set the error indicator from the three objects. If the error 
-    # indicator is already set, it is cleared first. If the objects 
-    # are NULL, the error indicator is cleared. Do not pass a NULL 
-    # type and non-NULL value or traceback. The exception type should 
-    # be a class. Do not pass an invalid exception type or 
-    # value. (Violating these rules will cause subtle problems later.) 
-    # This call takes away a reference to each object: you must own a 
-    # reference to each object before the call and after the call you 
-    # no longer own these references. (If you don't understand this, 
-    # don't use this function. I warned you.) Note: This function is 
-    # normally only used by code that needs to save and restore the 
-    # error indicator temporarily; use PyErr_Fetch() to save the 
-    # current exception state. 
- 
-    void PyErr_SetString(object type, char *message) 
-    # This is the most common way to set the error indicator. The 
-    # first argument specifies the exception type; it is normally one 
-    # of the standard exceptions, e.g. PyExc_RuntimeError. You need 
-    # not increment its reference count. The second argument is an 
-    # error message; it is converted to a string object. 
- 
-    void PyErr_SetObject(object type, object value) 
-    # This function is similar to PyErr_SetString() but lets you 
-    # specify an arbitrary Python object for the ``value'' of the 
-    # exception. 
- 
-    PyObject* PyErr_Format(object exception, char *format, ...) except NULL 
-    # Return value: Always NULL. 
-    # This function sets the error indicator and returns 
-    # NULL. exception should be a Python exception (class, not an 
-    # instance). format should be a string, containing format codes, 
-    # similar to printf(). The width.precision before a format code is 
-    # parsed, but the width part is ignored. 
- 
-    void PyErr_SetNone(object type) 
-    # This is a shorthand for "PyErr_SetObject(type, Py_None)". 
- 
-    int PyErr_BadArgument() except 0 
- 
-    # This is a shorthand for "PyErr_SetString(PyExc_TypeError, 
-    # message)", where message indicates that a built-in operation was 
-    # invoked with an illegal argument. It is mostly for internal use. 
- 
-    PyObject* PyErr_NoMemory() except NULL 
-    # Return value: Always NULL. 
-    # This is a shorthand for "PyErr_SetNone(PyExc_MemoryError)"; it 
-    # returns NULL so an object allocation function can write "return 
-    # PyErr_NoMemory();" when it runs out of memory. 
- 
-    PyObject* PyErr_SetFromErrno(object type) except NULL 
-    # Return value: Always NULL. 
-    # This is a convenience function to raise an exception when a C 
-    # library function has returned an error and set the C variable 
-    # errno. It constructs a tuple object whose first item is the 
-    # integer errno value and whose second item is the corresponding 
-    # error message (gotten from strerror()), and then calls 
-    # "PyErr_SetObject(type, object)". On Unix, when the errno value 
-    # is EINTR, indicating an interrupted system call, this calls 
-    # PyErr_CheckSignals(), and if that set the error indicator, 
-    # leaves it set to that. The function always returns NULL, so a 
-    # wrapper function around a system call can write "return 
-    # PyErr_SetFromErrno(type);" when the system call returns an 
-    # error. 
- 
+    # subclass of the expected exception.)
+
+    bint PyErr_ExceptionMatches(object exc)
+    # Equivalent to "PyErr_GivenExceptionMatches(PyErr_Occurred(),
+    # exc)". This should only be called when an exception is actually
+    # set; a memory access violation will occur if no exception has
+    # been raised.
+
+    bint PyErr_GivenExceptionMatches(object given, object exc)
+    # Return true if the given exception matches the exception in
+    # exc. If exc is a class object, this also returns true when given
+    # is an instance of a subclass. If exc is a tuple, all exceptions
+    # in the tuple (and recursively in subtuples) are searched for a
+    # match. If given is NULL, a memory access violation will occur.
+
+    void PyErr_NormalizeException(PyObject** exc, PyObject** val, PyObject** tb)
+    # Under certain circumstances, the values returned by
+    # PyErr_Fetch() below can be ``unnormalized'', meaning that *exc
+    # is a class object but *val is not an instance of the same
+    # class. This function can be used to instantiate the class in
+    # that case. If the values are already normalized, nothing
+    # happens. The delayed normalization is implemented to improve
+    # performance.
+
+    void PyErr_Clear()
+    # Clear the error indicator. If the error indicator is not set, there is no effect.
+
+    void PyErr_Fetch(PyObject** ptype, PyObject** pvalue, PyObject** ptraceback)
+    # Retrieve the error indicator into three variables whose
+    # addresses are passed. If the error indicator is not set, set all
+    # three variables to NULL. If it is set, it will be cleared and
+    # you own a reference to each object retrieved. The value and
+    # traceback object may be NULL even when the type object is
+    # not. Note: This function is normally only used by code that
+    # needs to handle exceptions or by code that needs to save and
+    # restore the error indicator temporarily.
+
+    void PyErr_Restore(PyObject* type, PyObject* value, PyObject* traceback)
+    # Set the error indicator from the three objects. If the error
+    # indicator is already set, it is cleared first. If the objects
+    # are NULL, the error indicator is cleared. Do not pass a NULL
+    # type and non-NULL value or traceback. The exception type should
+    # be a class. Do not pass an invalid exception type or
+    # value. (Violating these rules will cause subtle problems later.)
+    # This call takes away a reference to each object: you must own a
+    # reference to each object before the call and after the call you
+    # no longer own these references. (If you don't understand this,
+    # don't use this function. I warned you.) Note: This function is
+    # normally only used by code that needs to save and restore the
+    # error indicator temporarily; use PyErr_Fetch() to save the
+    # current exception state.
+
+    void PyErr_SetString(object type, char *message)
+    # This is the most common way to set the error indicator. The
+    # first argument specifies the exception type; it is normally one
+    # of the standard exceptions, e.g. PyExc_RuntimeError. You need
+    # not increment its reference count. The second argument is an
+    # error message; it is converted to a string object.
+
+    void PyErr_SetObject(object type, object value)
+    # This function is similar to PyErr_SetString() but lets you
+    # specify an arbitrary Python object for the ``value'' of the
+    # exception.
+
+    PyObject* PyErr_Format(object exception, char *format, ...) except NULL
+    # Return value: Always NULL.
+    # This function sets the error indicator and returns
+    # NULL. exception should be a Python exception (class, not an
+    # instance). format should be a string, containing format codes,
+    # similar to printf(). The width.precision before a format code is
+    # parsed, but the width part is ignored.
+
+    void PyErr_SetNone(object type)
+    # This is a shorthand for "PyErr_SetObject(type, Py_None)".
+
+    int PyErr_BadArgument() except 0
+
+    # This is a shorthand for "PyErr_SetString(PyExc_TypeError,
+    # message)", where message indicates that a built-in operation was
+    # invoked with an illegal argument. It is mostly for internal use.
+
+    PyObject* PyErr_NoMemory() except NULL
+    # Return value: Always NULL.
+    # This is a shorthand for "PyErr_SetNone(PyExc_MemoryError)"; it
+    # returns NULL so an object allocation function can write "return
+    # PyErr_NoMemory();" when it runs out of memory.
+
+    PyObject* PyErr_SetFromErrno(object type) except NULL
+    # Return value: Always NULL.
+    # This is a convenience function to raise an exception when a C
+    # library function has returned an error and set the C variable
+    # errno. It constructs a tuple object whose first item is the
+    # integer errno value and whose second item is the corresponding
+    # error message (gotten from strerror()), and then calls
+    # "PyErr_SetObject(type, object)". On Unix, when the errno value
+    # is EINTR, indicating an interrupted system call, this calls
+    # PyErr_CheckSignals(), and if that set the error indicator,
+    # leaves it set to that. The function always returns NULL, so a
+    # wrapper function around a system call can write "return
+    # PyErr_SetFromErrno(type);" when the system call returns an
+    # error.
+
     PyObject* PyErr_SetFromErrnoWithFilenameObject(object type, object filenameObject) except NULL
     # Similar to PyErr_SetFromErrno(), with the additional behavior
     # that if filenameObject is not NULL, it is passed to the
@@ -160,98 +160,98 @@ cdef extern from "Python.h":
     # In the case of OSError exception, this is used to define
     # the filename attribute of the exception instance.
 
-    PyObject* PyErr_SetFromErrnoWithFilename(object type, char *filename) except NULL 
-    # Return value: Always NULL.  Similar to PyErr_SetFromErrno(), 
-    # with the additional behavior that if filename is not NULL, it is 
-    # passed to the constructor of type as a third parameter. In the 
-    # case of exceptions such as IOError and OSError, this is used to 
-    # define the filename attribute of the exception instance. 
- 
-    PyObject* PyErr_SetFromWindowsErr(int ierr) except NULL 
-    # Return value: Always NULL.  This is a convenience function to 
-    # raise WindowsError. If called with ierr of 0, the error code 
-    # returned by a call to GetLastError() is used instead. It calls 
-    # the Win32 function FormatMessage() to retrieve the Windows 
-    # description of error code given by ierr or GetLastError(), then 
-    # it constructs a tuple object whose first item is the ierr value 
-    # and whose second item is the corresponding error message (gotten 
-    # from FormatMessage()), and then calls 
-    # "PyErr_SetObject(PyExc_WindowsError, object)". This function 
-    # always returns NULL. Availability: Windows. 
- 
-    PyObject* PyErr_SetExcFromWindowsErr(object type, int ierr) except NULL 
-    # Return value: Always NULL.  Similar to 
-    # PyErr_SetFromWindowsErr(), with an additional parameter 
-    # specifying the exception type to be raised. Availability: 
-    # Windows. New in version 2.3. 
- 
-    PyObject* PyErr_SetFromWindowsErrWithFilename(int ierr, char *filename) except NULL 
-    # Return value: Always NULL.  Similar to 
-    # PyErr_SetFromWindowsErr(), with the additional behavior that if 
-    # filename is not NULL, it is passed to the constructor of 
-    # WindowsError as a third parameter. Availability: Windows. 
- 
-    PyObject* PyErr_SetExcFromWindowsErrWithFilename(object type, int ierr, char *filename) except NULL 
-    # Return value: Always NULL. 
-    # Similar to PyErr_SetFromWindowsErrWithFilename(), with an 
-    # additional parameter specifying the exception type to be 
-    # raised. Availability: Windows. 
- 
-    void PyErr_BadInternalCall() 
-    # This is a shorthand for "PyErr_SetString(PyExc_TypeError, 
-    # message)", where message indicates that an internal operation 
-    # (e.g. a Python/C API function) was invoked with an illegal 
-    # argument. It is mostly for internal use. 
- 
-    int PyErr_WarnEx(object category, char *message, int stacklevel) except -1 
-    # Issue a warning message. The category argument is a warning 
-    # category (see below) or NULL; the message argument is a message 
-    # string. stacklevel is a positive number giving a number of stack 
-    # frames; the warning will be issued from the currently executing 
-    # line of code in that stack frame. A stacklevel of 1 is the 
-    # function calling PyErr_WarnEx(), 2 is the function above that, 
-    # and so forth. 
- 
-    int PyErr_WarnExplicit(object category, char *message, char *filename, int lineno, char *module, object registry) except -1 
-    # Issue a warning message with explicit control over all warning 
-    # attributes. This is a straightforward wrapper around the Python 
-    # function warnings.warn_explicit(), see there for more 
-    # information. The module and registry arguments may be set to 
-    # NULL to get the default effect described there. 
- 
-    int PyErr_CheckSignals() except -1 
-    # This function interacts with Python's signal handling. It checks 
-    # whether a signal has been sent to the processes and if so, 
-    # invokes the corresponding signal handler. If the signal module 
-    # is supported, this can invoke a signal handler written in 
-    # Python. In all cases, the default effect for SIGINT is to raise 
-    # the KeyboardInterrupt exception. If an exception is raised the 
-    # error indicator is set and the function returns 1; otherwise the 
-    # function returns 0. The error indicator may or may not be 
-    # cleared if it was previously set. 
- 
-    void PyErr_SetInterrupt() nogil 
-    # This function simulates the effect of a SIGINT signal arriving 
-    # -- the next time PyErr_CheckSignals() is called, 
-    # KeyboardInterrupt will be raised. It may be called without 
-    # holding the interpreter lock. 
- 
-    object PyErr_NewException(char *name, object base, object dict) 
-    # Return value: New reference. 
-    # This utility function creates and returns a new exception 
-    # object. The name argument must be the name of the new exception, 
-    # a C string of the form module.class. The base and dict arguments 
-    # are normally NULL. This creates a class object derived from 
-    # Exception (accessible in C as PyExc_Exception). 
- 
-    void PyErr_WriteUnraisable(object obj) 
-    # This utility function prints a warning message to sys.stderr 
-    # when an exception has been set but it is impossible for the 
-    # interpreter to actually raise the exception. It is used, for 
-    # example, when an exception occurs in an __del__() method. 
-    # 
-    # The function is called with a single argument obj that 
-    # identifies the context in which the unraisable exception 
-    # occurred. The repr of obj will be printed in the warning 
-    # message. 
- 
+    PyObject* PyErr_SetFromErrnoWithFilename(object type, char *filename) except NULL
+    # Return value: Always NULL.  Similar to PyErr_SetFromErrno(),
+    # with the additional behavior that if filename is not NULL, it is
+    # passed to the constructor of type as a third parameter. In the
+    # case of exceptions such as IOError and OSError, this is used to
+    # define the filename attribute of the exception instance.
+
+    PyObject* PyErr_SetFromWindowsErr(int ierr) except NULL
+    # Return value: Always NULL.  This is a convenience function to
+    # raise WindowsError. If called with ierr of 0, the error code
+    # returned by a call to GetLastError() is used instead. It calls
+    # the Win32 function FormatMessage() to retrieve the Windows
+    # description of error code given by ierr or GetLastError(), then
+    # it constructs a tuple object whose first item is the ierr value
+    # and whose second item is the corresponding error message (gotten
+    # from FormatMessage()), and then calls
+    # "PyErr_SetObject(PyExc_WindowsError, object)". This function
+    # always returns NULL. Availability: Windows.
+
+    PyObject* PyErr_SetExcFromWindowsErr(object type, int ierr) except NULL
+    # Return value: Always NULL.  Similar to
+    # PyErr_SetFromWindowsErr(), with an additional parameter
+    # specifying the exception type to be raised. Availability:
+    # Windows. New in version 2.3.
+
+    PyObject* PyErr_SetFromWindowsErrWithFilename(int ierr, char *filename) except NULL
+    # Return value: Always NULL.  Similar to
+    # PyErr_SetFromWindowsErr(), with the additional behavior that if
+    # filename is not NULL, it is passed to the constructor of
+    # WindowsError as a third parameter. Availability: Windows.
+
+    PyObject* PyErr_SetExcFromWindowsErrWithFilename(object type, int ierr, char *filename) except NULL
+    # Return value: Always NULL.
+    # Similar to PyErr_SetFromWindowsErrWithFilename(), with an
+    # additional parameter specifying the exception type to be
+    # raised. Availability: Windows.
+
+    void PyErr_BadInternalCall()
+    # This is a shorthand for "PyErr_SetString(PyExc_TypeError,
+    # message)", where message indicates that an internal operation
+    # (e.g. a Python/C API function) was invoked with an illegal
+    # argument. It is mostly for internal use.
+
+    int PyErr_WarnEx(object category, char *message, int stacklevel) except -1
+    # Issue a warning message. The category argument is a warning
+    # category (see below) or NULL; the message argument is a message
+    # string. stacklevel is a positive number giving a number of stack
+    # frames; the warning will be issued from the currently executing
+    # line of code in that stack frame. A stacklevel of 1 is the
+    # function calling PyErr_WarnEx(), 2 is the function above that,
+    # and so forth.
+
+    int PyErr_WarnExplicit(object category, char *message, char *filename, int lineno, char *module, object registry) except -1
+    # Issue a warning message with explicit control over all warning
+    # attributes. This is a straightforward wrapper around the Python
+    # function warnings.warn_explicit(), see there for more
+    # information. The module and registry arguments may be set to
+    # NULL to get the default effect described there.
+
+    int PyErr_CheckSignals() except -1
+    # This function interacts with Python's signal handling. It checks
+    # whether a signal has been sent to the processes and if so,
+    # invokes the corresponding signal handler. If the signal module
+    # is supported, this can invoke a signal handler written in
+    # Python. In all cases, the default effect for SIGINT is to raise
+    # the KeyboardInterrupt exception. If an exception is raised the
+    # error indicator is set and the function returns 1; otherwise the
+    # function returns 0. The error indicator may or may not be
+    # cleared if it was previously set.
+
+    void PyErr_SetInterrupt() nogil
+    # This function simulates the effect of a SIGINT signal arriving
+    # -- the next time PyErr_CheckSignals() is called,
+    # KeyboardInterrupt will be raised. It may be called without
+    # holding the interpreter lock.
+
+    object PyErr_NewException(char *name, object base, object dict)
+    # Return value: New reference.
+    # This utility function creates and returns a new exception
+    # object. The name argument must be the name of the new exception,
+    # a C string of the form module.class. The base and dict arguments
+    # are normally NULL. This creates a class object derived from
+    # Exception (accessible in C as PyExc_Exception).
+
+    void PyErr_WriteUnraisable(object obj)
+    # This utility function prints a warning message to sys.stderr
+    # when an exception has been set but it is impossible for the
+    # interpreter to actually raise the exception. It is used, for
+    # example, when an exception occurs in an __del__() method.
+    #
+    # The function is called with a single argument obj that
+    # identifies the context in which the unraisable exception
+    # occurred. The repr of obj will be printed in the warning
+    # message.
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/float.pxd b/contrib/tools/cython/Cython/Includes/cpython/float.pxd
index 2ce7762ba2..65328f31ea 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/float.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/float.pxd
@@ -1,39 +1,39 @@
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 7.2.3 
-    ############################################################################ 
-    # PyFloatObject 
-    # 
-    # This subtype of PyObject represents a Python floating point object. 
- 
-    # PyTypeObject PyFloat_Type 
-    # 
-    # This instance of PyTypeObject represents the Python floating 
-    # point type. This is the same object as float and 
-    # types.FloatType. 
- 
-    bint PyFloat_Check(object p) 
-    # Return true if its argument is a PyFloatObject or a subtype of 
-    # PyFloatObject. 
- 
-    bint PyFloat_CheckExact(object p) 
-    # Return true if its argument is a PyFloatObject, but not a 
-    # subtype of PyFloatObject. 
- 
-    object PyFloat_FromString(object str, char **pend) 
-    # Return value: New reference. 
-    # Create a PyFloatObject object based on the string value in str, 
-    # or NULL on failure. The pend argument is ignored. It remains 
-    # only for backward compatibility. 
- 
-    object PyFloat_FromDouble(double v) 
-    # Return value: New reference. 
-    # Create a PyFloatObject object from v, or NULL on failure. 
- 
-    double PyFloat_AsDouble(object pyfloat) except? -1 
-    # Return a C double representation of the contents of pyfloat. 
- 
-    double PyFloat_AS_DOUBLE(object pyfloat) 
-    # Return a C double representation of the contents of pyfloat, but 
-    # without error checking. 
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 7.2.3
+    ############################################################################
+    # PyFloatObject
+    #
+    # This subtype of PyObject represents a Python floating point object.
+
+    # PyTypeObject PyFloat_Type
+    #
+    # This instance of PyTypeObject represents the Python floating
+    # point type. This is the same object as float and
+    # types.FloatType.
+
+    bint PyFloat_Check(object p)
+    # Return true if its argument is a PyFloatObject or a subtype of
+    # PyFloatObject.
+
+    bint PyFloat_CheckExact(object p)
+    # Return true if its argument is a PyFloatObject, but not a
+    # subtype of PyFloatObject.
+
+    object PyFloat_FromString(object str, char **pend)
+    # Return value: New reference.
+    # Create a PyFloatObject object based on the string value in str,
+    # or NULL on failure. The pend argument is ignored. It remains
+    # only for backward compatibility.
+
+    object PyFloat_FromDouble(double v)
+    # Return value: New reference.
+    # Create a PyFloatObject object from v, or NULL on failure.
+
+    double PyFloat_AsDouble(object pyfloat) except? -1
+    # Return a C double representation of the contents of pyfloat.
+
+    double PyFloat_AS_DOUBLE(object pyfloat)
+    # Return a C double representation of the contents of pyfloat, but
+    # without error checking.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/function.pxd b/contrib/tools/cython/Cython/Includes/cpython/function.pxd
index 066fd2683a..0002a3f6cb 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/function.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/function.pxd
@@ -1,65 +1,65 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 7.5.3 Function Objects 
-    ############################################################################ 
-    # There are a few functions specific to Python functions. 
- 
-    # PyFunctionObject 
-    # 
-    # The C structure used for functions. 
- 
-    # PyTypeObject PyFunction_Type 
-    # 
-    # This is an instance of PyTypeObject and represents the Python 
-    # function type. It is exposed to Python programmers as 
-    # types.FunctionType. 
- 
-    bint PyFunction_Check(object o) 
-    # Return true if o is a function object (has type 
-    # PyFunction_Type). The parameter must not be NULL. 
- 
-    object PyFunction_New(object code, object globals) 
-    # Return value: New reference. 
-    # Return a new function object associated with the code object 
-    # code. globals must be a dictionary with the global variables 
-    # accessible to the function. 
-    # The function's docstring, name and __module__ are retrieved from 
-    # the code object, the argument defaults and closure are set to 
-    # NULL. 
- 
-    PyObject* PyFunction_GetCode(object op) except? NULL 
-    # Return value: Borrowed reference. 
-    # Return the code object associated with the function object op. 
- 
-    PyObject* PyFunction_GetGlobals(object op) except? NULL 
-    # Return value: Borrowed reference. 
-    # Return the globals dictionary associated with the function object op. 
- 
-    PyObject* PyFunction_GetModule(object op) except? NULL 
-    # Return value: Borrowed reference. 
-    # Return the __module__ attribute of the function object op. This 
-    # is normally a string containing the module name, but can be set 
-    # to any other object by Python code. 
- 
-    PyObject* PyFunction_GetDefaults(object op) except? NULL 
-    # Return value: Borrowed reference. 
-    # Return the argument default values of the function object 
-    # op. This can be a tuple of arguments or NULL. 
- 
-    int PyFunction_SetDefaults(object op, object defaults) except -1 
-    # Set the argument default values for the function object 
-    # op. defaults must be Py_None or a tuple. 
-    # Raises SystemError and returns -1 on failure. 
- 
-    PyObject* PyFunction_GetClosure(object op) except? NULL 
-    # Return value: Borrowed reference. 
-    # Return the closure associated with the function object op. This 
-    # can be NULL or a tuple of cell objects. 
- 
-    int PyFunction_SetClosure(object op, object closure) except -1 
-    # Set the closure associated with the function object op. closure 
-    # must be Py_None or a tuple of cell objects. 
-    # Raises SystemError and returns -1 on failure. 
+
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 7.5.3 Function Objects
+    ############################################################################
+    # There are a few functions specific to Python functions.
+
+    # PyFunctionObject
+    #
+    # The C structure used for functions.
+
+    # PyTypeObject PyFunction_Type
+    #
+    # This is an instance of PyTypeObject and represents the Python
+    # function type. It is exposed to Python programmers as
+    # types.FunctionType.
+
+    bint PyFunction_Check(object o)
+    # Return true if o is a function object (has type
+    # PyFunction_Type). The parameter must not be NULL.
+
+    object PyFunction_New(object code, object globals)
+    # Return value: New reference.
+    # Return a new function object associated with the code object
+    # code. globals must be a dictionary with the global variables
+    # accessible to the function.
+    # The function's docstring, name and __module__ are retrieved from
+    # the code object, the argument defaults and closure are set to
+    # NULL.
+
+    PyObject* PyFunction_GetCode(object op) except? NULL
+    # Return value: Borrowed reference.
+    # Return the code object associated with the function object op.
+
+    PyObject* PyFunction_GetGlobals(object op) except? NULL
+    # Return value: Borrowed reference.
+    # Return the globals dictionary associated with the function object op.
+
+    PyObject* PyFunction_GetModule(object op) except? NULL
+    # Return value: Borrowed reference.
+    # Return the __module__ attribute of the function object op. This
+    # is normally a string containing the module name, but can be set
+    # to any other object by Python code.
+
+    PyObject* PyFunction_GetDefaults(object op) except? NULL
+    # Return value: Borrowed reference.
+    # Return the argument default values of the function object
+    # op. This can be a tuple of arguments or NULL.
+
+    int PyFunction_SetDefaults(object op, object defaults) except -1
+    # Set the argument default values for the function object
+    # op. defaults must be Py_None or a tuple.
+    # Raises SystemError and returns -1 on failure.
+
+    PyObject* PyFunction_GetClosure(object op) except? NULL
+    # Return value: Borrowed reference.
+    # Return the closure associated with the function object op. This
+    # can be NULL or a tuple of cell objects.
+
+    int PyFunction_SetClosure(object op, object closure) except -1
+    # Set the closure associated with the function object op. closure
+    # must be Py_None or a tuple of cell objects.
+    # Raises SystemError and returns -1 on failure.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/getargs.pxd b/contrib/tools/cython/Cython/Includes/cpython/getargs.pxd
index fd3c307c6b..be6df3285a 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/getargs.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/getargs.pxd
@@ -1,12 +1,12 @@
- 
-cdef extern from "Python.h": 
-    ##################################################################### 
-    # 5.5 Parsing arguments and building values 
-    ##################################################################### 
-    ctypedef struct va_list 
-    int PyArg_ParseTuple(object args, char *format, ...) except 0 
-    int PyArg_VaParse(object args, char *format, va_list vargs) except 0 
-    int PyArg_ParseTupleAndKeywords(object args, object kw, char *format, char *keywords[], ...) except 0 
-    int PyArg_VaParseTupleAndKeywords(object args, object kw, char *format, char *keywords[], va_list vargs) except 0 
-    int PyArg_Parse(object args, char *format, ...) except 0 
-    int PyArg_UnpackTuple(object args, char *name, Py_ssize_t min, Py_ssize_t max, ...) except 0 
+
+cdef extern from "Python.h":
+    #####################################################################
+    # 5.5 Parsing arguments and building values
+    #####################################################################
+    ctypedef struct va_list
+    int PyArg_ParseTuple(object args, char *format, ...) except 0
+    int PyArg_VaParse(object args, char *format, va_list vargs) except 0
+    int PyArg_ParseTupleAndKeywords(object args, object kw, char *format, char *keywords[], ...) except 0
+    int PyArg_VaParseTupleAndKeywords(object args, object kw, char *format, char *keywords[], va_list vargs) except 0
+    int PyArg_Parse(object args, char *format, ...) except 0
+    int PyArg_UnpackTuple(object args, char *name, Py_ssize_t min, Py_ssize_t max, ...) except 0
diff --git a/contrib/tools/cython/Cython/Includes/cpython/instance.pxd b/contrib/tools/cython/Cython/Includes/cpython/instance.pxd
index e009a5b8f2..aecdc0cfd7 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/instance.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/instance.pxd
@@ -1,25 +1,25 @@
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 7.5.2 Instance Objects 
-    ############################################################################ 
- 
-    # PyTypeObject PyInstance_Type 
-    # 
-    # Type object for class instances. 
- 
-    int PyInstance_Check(object obj) 
-    # Return true if obj is an instance. 
- 
-    object PyInstance_New(object cls, object arg, object kw) 
-    # Return value: New reference. 
-    # Create a new instance of a specific class. The parameters arg 
-    # and kw are used as the positional and keyword parameters to the 
-    # object's constructor. 
- 
-    object PyInstance_NewRaw(object cls, object dict) 
-    # Return value: New reference. 
-    # Create a new instance of a specific class without calling its 
-    # constructor. class is the class of new object. The dict 
-    # parameter will be used as the object's __dict__; if NULL, a new 
-    # dictionary will be created for the instance. 
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 7.5.2 Instance Objects
+    ############################################################################
+
+    # PyTypeObject PyInstance_Type
+    #
+    # Type object for class instances.
+
+    int PyInstance_Check(object obj)
+    # Return true if obj is an instance.
+
+    object PyInstance_New(object cls, object arg, object kw)
+    # Return value: New reference.
+    # Create a new instance of a specific class. The parameters arg
+    # and kw are used as the positional and keyword parameters to the
+    # object's constructor.
+
+    object PyInstance_NewRaw(object cls, object dict)
+    # Return value: New reference.
+    # Create a new instance of a specific class without calling its
+    # constructor. class is the class of new object. The dict
+    # parameter will be used as the object's __dict__; if NULL, a new
+    # dictionary will be created for the instance.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/int.pxd b/contrib/tools/cython/Cython/Includes/cpython/int.pxd
index 44f64a57e8..50babff615 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/int.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/int.pxd
@@ -1,88 +1,88 @@
-cdef extern from "Python.h": 
-    ctypedef unsigned long long PY_LONG_LONG 
- 
-    ############################################################################ 
-    # Integer Objects 
-    ############################################################################ 
-    # PyTypeObject PyInt_Type 
-    # This instance of PyTypeObject represents the Python plain 
-    # integer type. This is the same object as int and types.IntType. 
- 
-    bint PyInt_Check(object  o) 
-    # Return true if o is of type PyInt_Type or a subtype of 
-    # PyInt_Type. 
- 
-    bint PyInt_CheckExact(object  o) 
-    # Return true if o is of type PyInt_Type, but not a subtype of 
-    # PyInt_Type. 
- 
-    object PyInt_FromString(char *str, char **pend, int base) 
-    # Return value: New reference. 
-    # Return a new PyIntObject or PyLongObject based on the string 
-    # value in str, which is interpreted according to the radix in 
-    # base. If pend is non-NULL, *pend will point to the first 
-    # character in str which follows the representation of the 
-    # number. If base is 0, the radix will be determined based on the 
-    # leading characters of str: if str starts with '0x' or '0X', 
-    # radix 16 will be used; if str starts with '0', radix 8 will be 
-    # used; otherwise radix 10 will be used. If base is not 0, it must 
-    # be between 2 and 36, inclusive. Leading spaces are ignored. If 
-    # there are no digits, ValueError will be raised. If the string 
-    # represents a number too large to be contained within the 
-    # machine's long int type and overflow warnings are being 
-    # suppressed, a PyLongObject will be returned. If overflow 
-    # warnings are not being suppressed, NULL will be returned in this 
-    # case. 
- 
-    object PyInt_FromLong(long ival) 
-    # Return value: New reference. 
-    # Create a new integer object with a value of ival. 
-    # The current implementation keeps an array of integer objects for 
-    # all integers between -5 and 256, when you create an int in that 
-    # range you actually just get back a reference to the existing 
-    # object. So it should be possible to change the value of 1. I 
-    # suspect the behaviour of Python in this case is undefined. :-) 
- 
-    object PyInt_FromSsize_t(Py_ssize_t ival) 
-    # Return value: New reference. 
-    # Create a new integer object with a value of ival. If the value 
+cdef extern from "Python.h":
+    ctypedef unsigned long long PY_LONG_LONG
+
+    ############################################################################
+    # Integer Objects
+    ############################################################################
+    # PyTypeObject PyInt_Type
+    # This instance of PyTypeObject represents the Python plain
+    # integer type. This is the same object as int and types.IntType.
+
+    bint PyInt_Check(object  o)
+    # Return true if o is of type PyInt_Type or a subtype of
+    # PyInt_Type.
+
+    bint PyInt_CheckExact(object  o)
+    # Return true if o is of type PyInt_Type, but not a subtype of
+    # PyInt_Type.
+
+    object PyInt_FromString(char *str, char **pend, int base)
+    # Return value: New reference.
+    # Return a new PyIntObject or PyLongObject based on the string
+    # value in str, which is interpreted according to the radix in
+    # base. If pend is non-NULL, *pend will point to the first
+    # character in str which follows the representation of the
+    # number. If base is 0, the radix will be determined based on the
+    # leading characters of str: if str starts with '0x' or '0X',
+    # radix 16 will be used; if str starts with '0', radix 8 will be
+    # used; otherwise radix 10 will be used. If base is not 0, it must
+    # be between 2 and 36, inclusive. Leading spaces are ignored. If
+    # there are no digits, ValueError will be raised. If the string
+    # represents a number too large to be contained within the
+    # machine's long int type and overflow warnings are being
+    # suppressed, a PyLongObject will be returned. If overflow
+    # warnings are not being suppressed, NULL will be returned in this
+    # case.
+
+    object PyInt_FromLong(long ival)
+    # Return value: New reference.
+    # Create a new integer object with a value of ival.
+    # The current implementation keeps an array of integer objects for
+    # all integers between -5 and 256, when you create an int in that
+    # range you actually just get back a reference to the existing
+    # object. So it should be possible to change the value of 1. I
+    # suspect the behaviour of Python in this case is undefined. :-)
+
+    object PyInt_FromSsize_t(Py_ssize_t ival)
+    # Return value: New reference.
+    # Create a new integer object with a value of ival. If the value
     # is larger than LONG_MAX or smaller than LONG_MIN, a long integer
     # object is returned.
 
     object PyInt_FromSize_t(size_t ival)
     # Return value: New reference.
     # Create a new integer object with a value of ival. If the value
-    # exceeds LONG_MAX, a long integer object is returned. 
- 
-    long PyInt_AsLong(object io) except? -1 
-    # Will first attempt to cast the object to a PyIntObject, if it is 
-    # not already one, and then return its value. If there is an 
-    # error, -1 is returned, and the caller should check 
-    # PyErr_Occurred() to find out whether there was an error, or 
-    # whether the value just happened to be -1. 
- 
-    long PyInt_AS_LONG(object io) 
-    # Return the value of the object io. No error checking is performed. 
- 
-    unsigned long PyInt_AsUnsignedLongMask(object io) except? -1 
-    # Will first attempt to cast the object to a PyIntObject or 
-    # PyLongObject, if it is not already one, and then return its 
-    # value as unsigned long. This function does not check for 
-    # overflow. 
- 
-    PY_LONG_LONG PyInt_AsUnsignedLongLongMask(object io) except? -1 
-    # Will first attempt to cast the object to a PyIntObject or 
-    # PyLongObject, if it is not already one, and then return its 
-    # value as unsigned long long, without checking for overflow. 
- 
-    Py_ssize_t PyInt_AsSsize_t(object io) except? -1 
-    # Will first attempt to cast the object to a PyIntObject or 
-    # PyLongObject, if it is not already one, and then return its 
-    # value as Py_ssize_t. 
- 
-    long PyInt_GetMax() 
-    # Return the system's idea of the largest integer it can handle 
-    # (LONG_MAX, as defined in the system header files). 
+    # exceeds LONG_MAX, a long integer object is returned.
+
+    long PyInt_AsLong(object io) except? -1
+    # Will first attempt to cast the object to a PyIntObject, if it is
+    # not already one, and then return its value. If there is an
+    # error, -1 is returned, and the caller should check
+    # PyErr_Occurred() to find out whether there was an error, or
+    # whether the value just happened to be -1.
+
+    long PyInt_AS_LONG(object io)
+    # Return the value of the object io. No error checking is performed.
+
+    unsigned long PyInt_AsUnsignedLongMask(object io) except? -1
+    # Will first attempt to cast the object to a PyIntObject or
+    # PyLongObject, if it is not already one, and then return its
+    # value as unsigned long. This function does not check for
+    # overflow.
+
+    PY_LONG_LONG PyInt_AsUnsignedLongLongMask(object io) except? -1
+    # Will first attempt to cast the object to a PyIntObject or
+    # PyLongObject, if it is not already one, and then return its
+    # value as unsigned long long, without checking for overflow.
+
+    Py_ssize_t PyInt_AsSsize_t(object io) except? -1
+    # Will first attempt to cast the object to a PyIntObject or
+    # PyLongObject, if it is not already one, and then return its
+    # value as Py_ssize_t.
+
+    long PyInt_GetMax()
+    # Return the system's idea of the largest integer it can handle
+    # (LONG_MAX, as defined in the system header files).
 
     int PyInt_ClearFreeList()
     # Clear the integer free list. Return the number of items that could not be freed.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/iterator.pxd b/contrib/tools/cython/Cython/Includes/cpython/iterator.pxd
index b0e0513e83..0e10907f7f 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/iterator.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/iterator.pxd
@@ -1,36 +1,36 @@
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 6.5 Iterator Protocol 
-    ############################################################################ 
-    bint PyIter_Check(object o) 
-    # Return true if the object o supports the iterator protocol. 
- 
-    object PyIter_Next(object o) 
-    # Return value: New reference. 
-    # Return the next value from the iteration o. If the object is an 
-    # iterator, this retrieves the next value from the iteration, and 
-    # returns NULL with no exception set if there are no remaining 
-    # items. If the object is not an iterator, TypeError is raised, or 
-    # if there is an error in retrieving the item, returns NULL and 
-    # passes along the exception. 
- 
-    # To write a loop which iterates over an iterator, the C code should look something like this: 
-    # PyObject *iterator = PyObject_GetIter(obj); 
-    # PyObject *item; 
-    # if (iterator == NULL) { 
-    # /* propagate error */ 
-    # } 
-    # while (item = PyIter_Next(iterator)) { 
-    # /* do something with item */ 
-    # ... 
-    # /* release reference when done */ 
-    # Py_DECREF(item); 
-    # } 
-    # Py_DECREF(iterator); 
-    # if (PyErr_Occurred()) { 
-    # /* propagate error */ 
-    # } 
-    # else { 
-    # /* continue doing useful work */ 
-    # } 
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 6.5 Iterator Protocol
+    ############################################################################
+    bint PyIter_Check(object o)
+    # Return true if the object o supports the iterator protocol.
+
+    object PyIter_Next(object o)
+    # Return value: New reference.
+    # Return the next value from the iteration o. If the object is an
+    # iterator, this retrieves the next value from the iteration, and
+    # returns NULL with no exception set if there are no remaining
+    # items. If the object is not an iterator, TypeError is raised, or
+    # if there is an error in retrieving the item, returns NULL and
+    # passes along the exception.
+
+    # To write a loop which iterates over an iterator, the C code should look something like this:
+    # PyObject *iterator = PyObject_GetIter(obj);
+    # PyObject *item;
+    # if (iterator == NULL) {
+    # /* propagate error */
+    # }
+    # while (item = PyIter_Next(iterator)) {
+    # /* do something with item */
+    # ...
+    # /* release reference when done */
+    # Py_DECREF(item);
+    # }
+    # Py_DECREF(iterator);
+    # if (PyErr_Occurred()) {
+    # /* propagate error */
+    # }
+    # else {
+    # /* continue doing useful work */
+    # }
diff --git a/contrib/tools/cython/Cython/Includes/cpython/list.pxd b/contrib/tools/cython/Cython/Includes/cpython/list.pxd
index c9689f0071..c6a29535c9 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/list.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/list.pxd
@@ -1,92 +1,92 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # Lists 
-    ############################################################################ 
-    list PyList_New(Py_ssize_t len) 
-    # Return a new list of length len on success, or NULL on failure. 
-    # 
-    # Note: If length is greater than zero, the returned list object's 
-    # items are set to NULL. Thus you cannot use abstract API 
-    # functions such as PySequence_SetItem() or expose the object to 
-    # Python code before setting all items to a real object with 
-    # PyList_SetItem(). 
- 
-    bint PyList_Check(object p) 
-    # Return true if p is a list object or an instance of a subtype of 
-    # the list type. 
- 
-    bint PyList_CheckExact(object p) 
-    # Return true if p is a list object, but not an instance of a 
-    # subtype of the list type. 
- 
-    Py_ssize_t PyList_Size(object list) except -1 
-    # Return the length of the list object in list; this is equivalent 
-    # to "len(list)" on a list object. 
- 
-    Py_ssize_t PyList_GET_SIZE(object list) 
-    # Macro form of PyList_Size() without error checking. 
- 
-    PyObject* PyList_GetItem(object list, Py_ssize_t index) except NULL 
-    # Return value: Borrowed reference. 
-    # Return the object at position pos in the list pointed to by 
-    # p. The position must be positive, indexing from the end of the 
-    # list is not supported. If pos is out of bounds, return NULL and 
-    # set an IndexError exception. 
- 
-    PyObject* PyList_GET_ITEM(object list, Py_ssize_t i) 
-    # Return value: Borrowed reference. 
-    # Macro form of PyList_GetItem() without error checking. 
- 
-    int PyList_SetItem(object list, Py_ssize_t index, object item) except -1 
-    # Set the item at index index in list to item. Return 0 on success 
-    # or -1 on failure. Note: This function ``steals'' a reference to 
-    # item and discards a reference to an item already in the list at 
-    # the affected position. 
- 
-    void PyList_SET_ITEM(object list, Py_ssize_t i, object o) 
-    # Macro form of PyList_SetItem() without error checking. This is 
-    # normally only used to fill in new lists where there is no 
-    # previous content. Note: This function ``steals'' a reference to 
-    # item, and, unlike PyList_SetItem(), does not discard a reference 
-    # to any item that it being replaced; any reference in list at 
-    # position i will be *leaked*. 
- 
-    int PyList_Insert(object list, Py_ssize_t index, object item) except -1 
-    # Insert the item item into list list in front of index 
-    # index. Return 0 if successful; return -1 and set an exception if 
-    # unsuccessful. Analogous to list.insert(index, item). 
- 
-    int PyList_Append(object list, object item) except -1 
-    # Append the object item at the end of list list. Return 0 if 
-    # successful; return -1 and set an exception if 
-    # unsuccessful. Analogous to list.append(item). 
- 
-    list PyList_GetSlice(object list, Py_ssize_t low, Py_ssize_t high) 
-    # Return value: New reference. 
-    # Return a list of the objects in list containing the objects 
-    # between low and high. Return NULL and set an exception if 
-    # unsuccessful. Analogous to list[low:high]. 
- 
-    int PyList_SetSlice(object list, Py_ssize_t low, Py_ssize_t high, object itemlist) except -1 
-    # Set the slice of list between low and high to the contents of 
-    # itemlist. Analogous to list[low:high] = itemlist. The itemlist 
-    # may be NULL, indicating the assignment of an empty list (slice 
-    # deletion). Return 0 on success, -1 on failure. 
- 
-    int PyList_Sort(object list) except -1 
-    # Sort the items of list in place. Return 0 on success, -1 on 
-    # failure. This is equivalent to "list.sort()". 
- 
-    int PyList_Reverse(object list) except -1 
-    # Reverse the items of list in place. Return 0 on success, -1 on 
-    # failure. This is the equivalent of "list.reverse()". 
- 
-    tuple PyList_AsTuple(object list) 
-    # Return value: New reference. 
-    # Return a new tuple object containing the contents of list; 
-    # equivalent to "tuple(list)". 
- 
- 
+
+cdef extern from "Python.h":
+
+    ############################################################################
+    # Lists
+    ############################################################################
+    list PyList_New(Py_ssize_t len)
+    # Return a new list of length len on success, or NULL on failure.
+    #
+    # Note: If length is greater than zero, the returned list object's
+    # items are set to NULL. Thus you cannot use abstract API
+    # functions such as PySequence_SetItem() or expose the object to
+    # Python code before setting all items to a real object with
+    # PyList_SetItem().
+
+    bint PyList_Check(object p)
+    # Return true if p is a list object or an instance of a subtype of
+    # the list type.
+
+    bint PyList_CheckExact(object p)
+    # Return true if p is a list object, but not an instance of a
+    # subtype of the list type.
+
+    Py_ssize_t PyList_Size(object list) except -1
+    # Return the length of the list object in list; this is equivalent
+    # to "len(list)" on a list object.
+
+    Py_ssize_t PyList_GET_SIZE(object list)
+    # Macro form of PyList_Size() without error checking.
+
+    PyObject* PyList_GetItem(object list, Py_ssize_t index) except NULL
+    # Return value: Borrowed reference.
+    # Return the object at position pos in the list pointed to by
+    # p. The position must be positive, indexing from the end of the
+    # list is not supported. If pos is out of bounds, return NULL and
+    # set an IndexError exception.
+
+    PyObject* PyList_GET_ITEM(object list, Py_ssize_t i)
+    # Return value: Borrowed reference.
+    # Macro form of PyList_GetItem() without error checking.
+
+    int PyList_SetItem(object list, Py_ssize_t index, object item) except -1
+    # Set the item at index index in list to item. Return 0 on success
+    # or -1 on failure. Note: This function ``steals'' a reference to
+    # item and discards a reference to an item already in the list at
+    # the affected position.
+
+    void PyList_SET_ITEM(object list, Py_ssize_t i, object o)
+    # Macro form of PyList_SetItem() without error checking. This is
+    # normally only used to fill in new lists where there is no
+    # previous content. Note: This function ``steals'' a reference to
+    # item, and, unlike PyList_SetItem(), does not discard a reference
+    # to any item that it being replaced; any reference in list at
+    # position i will be *leaked*.
+
+    int PyList_Insert(object list, Py_ssize_t index, object item) except -1
+    # Insert the item item into list list in front of index
+    # index. Return 0 if successful; return -1 and set an exception if
+    # unsuccessful. Analogous to list.insert(index, item).
+
+    int PyList_Append(object list, object item) except -1
+    # Append the object item at the end of list list. Return 0 if
+    # successful; return -1 and set an exception if
+    # unsuccessful. Analogous to list.append(item).
+
+    list PyList_GetSlice(object list, Py_ssize_t low, Py_ssize_t high)
+    # Return value: New reference.
+    # Return a list of the objects in list containing the objects
+    # between low and high. Return NULL and set an exception if
+    # unsuccessful. Analogous to list[low:high].
+
+    int PyList_SetSlice(object list, Py_ssize_t low, Py_ssize_t high, object itemlist) except -1
+    # Set the slice of list between low and high to the contents of
+    # itemlist. Analogous to list[low:high] = itemlist. The itemlist
+    # may be NULL, indicating the assignment of an empty list (slice
+    # deletion). Return 0 on success, -1 on failure.
+
+    int PyList_Sort(object list) except -1
+    # Sort the items of list in place. Return 0 on success, -1 on
+    # failure. This is equivalent to "list.sort()".
+
+    int PyList_Reverse(object list) except -1
+    # Reverse the items of list in place. Return 0 on success, -1 on
+    # failure. This is the equivalent of "list.reverse()".
+
+    tuple PyList_AsTuple(object list)
+    # Return value: New reference.
+    # Return a new tuple object containing the contents of list;
+    # equivalent to "tuple(list)".
+
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/long.pxd b/contrib/tools/cython/Cython/Includes/cpython/long.pxd
index ccfa55e7c0..eb8140d417 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/long.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/long.pxd
@@ -1,35 +1,35 @@
- 
-cdef extern from "Python.h": 
-    ctypedef long long PY_LONG_LONG 
-    ctypedef unsigned long long uPY_LONG_LONG "unsigned PY_LONG_LONG" 
- 
-    ############################################################################ 
-    # 7.2.3 Long Integer Objects 
-    ############################################################################ 
- 
-    # PyLongObject 
-    # 
-    # This subtype of PyObject represents a Python long integer object. 
- 
-    # PyTypeObject PyLong_Type 
-    # 
-    # This instance of PyTypeObject represents the Python long integer 
-    # type. This is the same object as long and types.LongType. 
- 
-    bint PyLong_Check(object p) 
-    # Return true if its argument is a PyLongObject or a subtype of PyLongObject. 
- 
-    bint PyLong_CheckExact(object p) 
-    # Return true if its argument is a PyLongObject, but not a subtype of PyLongObject. 
- 
-    object PyLong_FromLong(long v) 
-    # Return value: New reference. 
-    # Return a new PyLongObject object from v, or NULL on failure. 
- 
-    object PyLong_FromUnsignedLong(unsigned long v) 
-    # Return value: New reference. 
-    # Return a new PyLongObject object from a C unsigned long, or NULL on failure. 
- 
+
+cdef extern from "Python.h":
+    ctypedef long long PY_LONG_LONG
+    ctypedef unsigned long long uPY_LONG_LONG "unsigned PY_LONG_LONG"
+
+    ############################################################################
+    # 7.2.3 Long Integer Objects
+    ############################################################################
+
+    # PyLongObject
+    #
+    # This subtype of PyObject represents a Python long integer object.
+
+    # PyTypeObject PyLong_Type
+    #
+    # This instance of PyTypeObject represents the Python long integer
+    # type. This is the same object as long and types.LongType.
+
+    bint PyLong_Check(object p)
+    # Return true if its argument is a PyLongObject or a subtype of PyLongObject.
+
+    bint PyLong_CheckExact(object p)
+    # Return true if its argument is a PyLongObject, but not a subtype of PyLongObject.
+
+    object PyLong_FromLong(long v)
+    # Return value: New reference.
+    # Return a new PyLongObject object from v, or NULL on failure.
+
+    object PyLong_FromUnsignedLong(unsigned long v)
+    # Return value: New reference.
+    # Return a new PyLongObject object from a C unsigned long, or NULL on failure.
+
     object PyLong_FromSsize_t(Py_ssize_t v)
     # Return value: New reference.
     # Return a new PyLongObject object from a C Py_ssize_t, or NULL on failure.)
@@ -38,57 +38,57 @@ cdef extern from "Python.h":
     # Return value: New reference.
     # Return a new PyLongObject object from a C size_t, or NULL on failure.
 
-    object PyLong_FromLongLong(PY_LONG_LONG v) 
-    # Return value: New reference. 
-    # Return a new PyLongObject object from a C long long, or NULL on failure. 
- 
-    object PyLong_FromUnsignedLongLong(uPY_LONG_LONG v) 
-    # Return value: New reference. 
-    # Return a new PyLongObject object from a C unsigned long long, or NULL on failure. 
- 
-    object PyLong_FromDouble(double v) 
-    # Return value: New reference. 
-    # Return a new PyLongObject object from the integer part of v, or NULL on failure. 
- 
-    object PyLong_FromString(char *str, char **pend, int base) 
-    # Return value: New reference. 
-    # Return a new PyLongObject based on the string value in str, 
-    # which is interpreted according to the radix in base. If pend is 
-    # non-NULL, *pend will point to the first character in str which 
-    # follows the representation of the number. If base is 0, the 
-    # radix will be determined based on the leading characters of str: 
-    # if str starts with '0x' or '0X', radix 16 will be used; if str 
-    # starts with '0', radix 8 will be used; otherwise radix 10 will 
-    # be used. If base is not 0, it must be between 2 and 36, 
-    # inclusive. Leading spaces are ignored. If there are no digits, 
-    # ValueError will be raised. 
- 
-    object PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) 
-    # Return value: New reference. 
-    # Convert a sequence of Unicode digits to a Python long integer 
-    # value. The first parameter, u, points to the first character of 
-    # the Unicode string, length gives the number of characters, and 
-    # base is the radix for the conversion. The radix must be in the 
-    # range [2, 36]; if it is out of range, ValueError will be 
-    # raised. 
- 
+    object PyLong_FromLongLong(PY_LONG_LONG v)
+    # Return value: New reference.
+    # Return a new PyLongObject object from a C long long, or NULL on failure.
+
+    object PyLong_FromUnsignedLongLong(uPY_LONG_LONG v)
+    # Return value: New reference.
+    # Return a new PyLongObject object from a C unsigned long long, or NULL on failure.
+
+    object PyLong_FromDouble(double v)
+    # Return value: New reference.
+    # Return a new PyLongObject object from the integer part of v, or NULL on failure.
+
+    object PyLong_FromString(char *str, char **pend, int base)
+    # Return value: New reference.
+    # Return a new PyLongObject based on the string value in str,
+    # which is interpreted according to the radix in base. If pend is
+    # non-NULL, *pend will point to the first character in str which
+    # follows the representation of the number. If base is 0, the
+    # radix will be determined based on the leading characters of str:
+    # if str starts with '0x' or '0X', radix 16 will be used; if str
+    # starts with '0', radix 8 will be used; otherwise radix 10 will
+    # be used. If base is not 0, it must be between 2 and 36,
+    # inclusive. Leading spaces are ignored. If there are no digits,
+    # ValueError will be raised.
+
+    object PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
+    # Return value: New reference.
+    # Convert a sequence of Unicode digits to a Python long integer
+    # value. The first parameter, u, points to the first character of
+    # the Unicode string, length gives the number of characters, and
+    # base is the radix for the conversion. The radix must be in the
+    # range [2, 36]; if it is out of range, ValueError will be
+    # raised.
+
     # object PyLong_FromUnicodeObject(object u, int base)
     # Convert a sequence of Unicode digits in the string u to a Python integer
     # value. The Unicode string is first encoded to a byte string using
     # PyUnicode_EncodeDecimal() and then converted using PyLong_FromString().
     # New in version 3.3.
 
-    object PyLong_FromVoidPtr(void *p) 
-    # Return value: New reference. 
-    # Create a Python integer or long integer from the pointer p. The 
-    # pointer value can be retrieved from the resulting value using 
-    # PyLong_AsVoidPtr().  If the integer is larger than LONG_MAX, a 
-    # positive long integer is returned. 
- 
-    long PyLong_AsLong(object pylong) except? -1 
-    # Return a C long representation of the contents of pylong. If 
-    # pylong is greater than LONG_MAX, an OverflowError is raised. 
- 
+    object PyLong_FromVoidPtr(void *p)
+    # Return value: New reference.
+    # Create a Python integer or long integer from the pointer p. The
+    # pointer value can be retrieved from the resulting value using
+    # PyLong_AsVoidPtr().  If the integer is larger than LONG_MAX, a
+    # positive long integer is returned.
+
+    long PyLong_AsLong(object pylong) except? -1
+    # Return a C long representation of the contents of pylong. If
+    # pylong is greater than LONG_MAX, an OverflowError is raised.
+
     # long PyLong_AsLongAndOverflow(object pylong, int *overflow) except? -1
     # Return a C long representation of the contents of pylong. If pylong is
     # greater than LONG_MAX or less than LONG_MIN, set *overflow to 1 or -1,
@@ -110,40 +110,40 @@ cdef extern from "Python.h":
     # is greater than PY_SSIZE_T_MAX, an OverflowError is raised and -1 will be
     # returned.
 
-    unsigned long PyLong_AsUnsignedLong(object pylong) except? -1 
-    # Return a C unsigned long representation of the contents of 
-    # pylong. If pylong is greater than ULONG_MAX, an OverflowError is 
-    # raised. 
- 
-    PY_LONG_LONG PyLong_AsLongLong(object pylong) except? -1 
-    # Return a C long long from a Python long integer. If pylong 
-    # cannot be represented as a long long, an OverflowError will be 
-    # raised. 
- 
-    uPY_LONG_LONG PyLong_AsUnsignedLongLong(object pylong) except? -1 
-    #unsigned PY_LONG_LONG PyLong_AsUnsignedLongLong(object pylong) 
-    # Return a C unsigned long long from a Python long integer. If 
-    # pylong cannot be represented as an unsigned long long, an 
-    # OverflowError will be raised if the value is positive, or a 
-    # TypeError will be raised if the value is negative. 
- 
-    unsigned long PyLong_AsUnsignedLongMask(object io) except? -1 
-    # Return a C unsigned long from a Python long integer, without 
-    # checking for overflow. 
- 
-    uPY_LONG_LONG PyLong_AsUnsignedLongLongMask(object io) except? -1 
-    #unsigned PY_LONG_LONG PyLong_AsUnsignedLongLongMask(object io) 
-    # Return a C unsigned long long from a Python long integer, 
-    # without checking for overflow. 
- 
-    double PyLong_AsDouble(object pylong) except? -1.0 
-    # Return a C double representation of the contents of pylong. If 
-    # pylong cannot be approximately represented as a double, an 
-    # OverflowError exception is raised and -1.0 will be returned. 
- 
-    void* PyLong_AsVoidPtr(object pylong) except? NULL 
-    # Convert a Python integer or long integer pylong to a C void 
-    # pointer. If pylong cannot be converted, an OverflowError will be 
-    # raised. This is only assured to produce a usable void pointer 
-    # for values created with PyLong_FromVoidPtr(). For values outside 
+    unsigned long PyLong_AsUnsignedLong(object pylong) except? -1
+    # Return a C unsigned long representation of the contents of
+    # pylong. If pylong is greater than ULONG_MAX, an OverflowError is
+    # raised.
+
+    PY_LONG_LONG PyLong_AsLongLong(object pylong) except? -1
+    # Return a C long long from a Python long integer. If pylong
+    # cannot be represented as a long long, an OverflowError will be
+    # raised.
+
+    uPY_LONG_LONG PyLong_AsUnsignedLongLong(object pylong) except? -1
+    #unsigned PY_LONG_LONG PyLong_AsUnsignedLongLong(object pylong)
+    # Return a C unsigned long long from a Python long integer. If
+    # pylong cannot be represented as an unsigned long long, an
+    # OverflowError will be raised if the value is positive, or a
+    # TypeError will be raised if the value is negative.
+
+    unsigned long PyLong_AsUnsignedLongMask(object io) except? -1
+    # Return a C unsigned long from a Python long integer, without
+    # checking for overflow.
+
+    uPY_LONG_LONG PyLong_AsUnsignedLongLongMask(object io) except? -1
+    #unsigned PY_LONG_LONG PyLong_AsUnsignedLongLongMask(object io)
+    # Return a C unsigned long long from a Python long integer,
+    # without checking for overflow.
+
+    double PyLong_AsDouble(object pylong) except? -1.0
+    # Return a C double representation of the contents of pylong. If
+    # pylong cannot be approximately represented as a double, an
+    # OverflowError exception is raised and -1.0 will be returned.
+
+    void* PyLong_AsVoidPtr(object pylong) except? NULL
+    # Convert a Python integer or long integer pylong to a C void
+    # pointer. If pylong cannot be converted, an OverflowError will be
+    # raised. This is only assured to produce a usable void pointer
+    # for values created with PyLong_FromVoidPtr(). For values outside
     # 0..LONG_MAX, both signed and unsigned integers are accepted.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/mapping.pxd b/contrib/tools/cython/Cython/Includes/cpython/mapping.pxd
index 599740d9f2..3d235b65e2 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/mapping.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/mapping.pxd
@@ -1,64 +1,64 @@
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 6.4 Mapping Protocol 
-    ############################################################################ 
- 
-    bint PyMapping_Check(object o) 
-    # Return 1 if the object provides mapping protocol, and 0 
-    # otherwise. This function always succeeds. 
- 
-    Py_ssize_t PyMapping_Length(object o) except -1 
-    # Returns the number of keys in object o on success, and -1 on 
-    # failure. For objects that do not provide mapping protocol, this 
-    # is equivalent to the Python expression "len(o)". 
- 
-    int PyMapping_DelItemString(object o, char *key) except -1 
-    # Remove the mapping for object key from the object o. Return -1 
-    # on failure. This is equivalent to the Python statement "del 
-    # o[key]". 
- 
-    int PyMapping_DelItem(object o, object key) except -1 
-    # Remove the mapping for object key from the object o. Return -1 
-    # on failure. This is equivalent to the Python statement "del 
-    # o[key]". 
- 
-    bint PyMapping_HasKeyString(object o, char *key) 
-    # On success, return 1 if the mapping object has the key key and 0 
-    # otherwise. This is equivalent to the Python expression 
-    # "o.has_key(key)". This function always succeeds. 
- 
-    bint PyMapping_HasKey(object o, object key) 
-    # Return 1 if the mapping object has the key key and 0 
-    # otherwise. This is equivalent to the Python expression 
-    # "o.has_key(key)". This function always succeeds. 
- 
-    object PyMapping_Keys(object o) 
-    # Return value: New reference. 
-    # On success, return a list of the keys in object o. On failure, 
-    # return NULL. This is equivalent to the Python expression 
-    # "o.keys()". 
- 
-    object PyMapping_Values(object o) 
-    # Return value: New reference. 
-    # On success, return a list of the values in object o. On failure, 
-    # return NULL. This is equivalent to the Python expression 
-    # "o.values()". 
- 
-    object PyMapping_Items(object o) 
-    # Return value: New reference. 
-    # On success, return a list of the items in object o, where each 
-    # item is a tuple containing a key-value pair. On failure, return 
-    # NULL. This is equivalent to the Python expression "o.items()". 
- 
-    object PyMapping_GetItemString(object o, char *key) 
-    # Return value: New reference. 
-    # Return element of o corresponding to the object key or NULL on 
-    # failure. This is the equivalent of the Python expression 
-    # "o[key]". 
- 
-    int PyMapping_SetItemString(object o, char *key, object v) except -1 
-    # Map the object key to the value v in object o. Returns -1 on 
-    # failure. This is the equivalent of the Python statement "o[key] 
-    # = v". 
- 
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 6.4 Mapping Protocol
+    ############################################################################
+
+    bint PyMapping_Check(object o)
+    # Return 1 if the object provides mapping protocol, and 0
+    # otherwise. This function always succeeds.
+
+    Py_ssize_t PyMapping_Length(object o) except -1
+    # Returns the number of keys in object o on success, and -1 on
+    # failure. For objects that do not provide mapping protocol, this
+    # is equivalent to the Python expression "len(o)".
+
+    int PyMapping_DelItemString(object o, char *key) except -1
+    # Remove the mapping for object key from the object o. Return -1
+    # on failure. This is equivalent to the Python statement "del
+    # o[key]".
+
+    int PyMapping_DelItem(object o, object key) except -1
+    # Remove the mapping for object key from the object o. Return -1
+    # on failure. This is equivalent to the Python statement "del
+    # o[key]".
+
+    bint PyMapping_HasKeyString(object o, char *key)
+    # On success, return 1 if the mapping object has the key key and 0
+    # otherwise. This is equivalent to the Python expression
+    # "o.has_key(key)". This function always succeeds.
+
+    bint PyMapping_HasKey(object o, object key)
+    # Return 1 if the mapping object has the key key and 0
+    # otherwise. This is equivalent to the Python expression
+    # "o.has_key(key)". This function always succeeds.
+
+    object PyMapping_Keys(object o)
+    # Return value: New reference.
+    # On success, return a list of the keys in object o. On failure,
+    # return NULL. This is equivalent to the Python expression
+    # "o.keys()".
+
+    object PyMapping_Values(object o)
+    # Return value: New reference.
+    # On success, return a list of the values in object o. On failure,
+    # return NULL. This is equivalent to the Python expression
+    # "o.values()".
+
+    object PyMapping_Items(object o)
+    # Return value: New reference.
+    # On success, return a list of the items in object o, where each
+    # item is a tuple containing a key-value pair. On failure, return
+    # NULL. This is equivalent to the Python expression "o.items()".
+
+    object PyMapping_GetItemString(object o, char *key)
+    # Return value: New reference.
+    # Return element of o corresponding to the object key or NULL on
+    # failure. This is the equivalent of the Python expression
+    # "o[key]".
+
+    int PyMapping_SetItemString(object o, char *key, object v) except -1
+    # Map the object key to the value v in object o. Returns -1 on
+    # failure. This is the equivalent of the Python statement "o[key]
+    # = v".
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/mem.pxd b/contrib/tools/cython/Cython/Includes/cpython/mem.pxd
index 34b4418834..af820f2ee0 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/mem.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/mem.pxd
@@ -1,81 +1,81 @@
-cdef extern from "Python.h": 
- 
-    ##################################################################### 
-    # 9.2 Memory Interface 
-    ##################################################################### 
-    # You are definitely *supposed* to use these: "In most situations, 
-    # however, it is recommended to allocate memory from the Python 
-    # heap specifically because the latter is under control of the 
-    # Python memory manager. For example, this is required when the 
-    # interpreter is extended with new object types written in 
-    # C. Another reason for using the Python heap is the desire to 
-    # inform the Python memory manager about the memory needs of the 
-    # extension module. Even when the requested memory is used 
-    # exclusively for internal, highly-specific purposes, delegating 
-    # all memory requests to the Python memory manager causes the 
-    # interpreter to have a more accurate image of its memory 
-    # footprint as a whole. Consequently, under certain circumstances, 
-    # the Python memory manager may or may not trigger appropriate 
-    # actions, like garbage collection, memory compaction or other 
-    # preventive procedures. Note that by using the C library 
-    # allocator as shown in the previous example, the allocated memory 
-    # for the I/O buffer escapes completely the Python memory 
-    # manager." 
- 
-    # The following function sets, modeled after the ANSI C standard, 
-    # but specifying behavior when requesting zero bytes, are 
-    # available for allocating and releasing memory from the Python 
-    # heap: 
- 
+cdef extern from "Python.h":
+
+    #####################################################################
+    # 9.2 Memory Interface
+    #####################################################################
+    # You are definitely *supposed* to use these: "In most situations,
+    # however, it is recommended to allocate memory from the Python
+    # heap specifically because the latter is under control of the
+    # Python memory manager. For example, this is required when the
+    # interpreter is extended with new object types written in
+    # C. Another reason for using the Python heap is the desire to
+    # inform the Python memory manager about the memory needs of the
+    # extension module. Even when the requested memory is used
+    # exclusively for internal, highly-specific purposes, delegating
+    # all memory requests to the Python memory manager causes the
+    # interpreter to have a more accurate image of its memory
+    # footprint as a whole. Consequently, under certain circumstances,
+    # the Python memory manager may or may not trigger appropriate
+    # actions, like garbage collection, memory compaction or other
+    # preventive procedures. Note that by using the C library
+    # allocator as shown in the previous example, the allocated memory
+    # for the I/O buffer escapes completely the Python memory
+    # manager."
+
+    # The following function sets, modeled after the ANSI C standard,
+    # but specifying behavior when requesting zero bytes, are
+    # available for allocating and releasing memory from the Python
+    # heap:
+
     void* PyMem_RawMalloc(size_t n) nogil
-    void* PyMem_Malloc(size_t n) 
-    # Allocates n bytes and returns a pointer of type void* to the 
-    # allocated memory, or NULL if the request fails. Requesting zero 
-    # bytes returns a distinct non-NULL pointer if possible, as if 
-    # PyMem_Malloc(1) had been called instead. The memory will not 
-    # have been initialized in any way. 
- 
+    void* PyMem_Malloc(size_t n)
+    # Allocates n bytes and returns a pointer of type void* to the
+    # allocated memory, or NULL if the request fails. Requesting zero
+    # bytes returns a distinct non-NULL pointer if possible, as if
+    # PyMem_Malloc(1) had been called instead. The memory will not
+    # have been initialized in any way.
+
     void* PyMem_RawRealloc(void *p, size_t n) nogil
-    void* PyMem_Realloc(void *p, size_t n) 
-    # Resizes the memory block pointed to by p to n bytes. The 
-    # contents will be unchanged to the minimum of the old and the new 
-    # sizes. If p is NULL, the call is equivalent to PyMem_Malloc(n); 
-    # else if n is equal to zero, the memory block is resized but is 
-    # not freed, and the returned pointer is non-NULL. Unless p is 
-    # NULL, it must have been returned by a previous call to 
-    # PyMem_Malloc() or PyMem_Realloc(). 
- 
+    void* PyMem_Realloc(void *p, size_t n)
+    # Resizes the memory block pointed to by p to n bytes. The
+    # contents will be unchanged to the minimum of the old and the new
+    # sizes. If p is NULL, the call is equivalent to PyMem_Malloc(n);
+    # else if n is equal to zero, the memory block is resized but is
+    # not freed, and the returned pointer is non-NULL. Unless p is
+    # NULL, it must have been returned by a previous call to
+    # PyMem_Malloc() or PyMem_Realloc().
+
     void PyMem_RawFree(void *p) nogil
-    void PyMem_Free(void *p) 
-    # Frees the memory block pointed to by p, which must have been 
-    # returned by a previous call to PyMem_Malloc() or 
-    # PyMem_Realloc(). Otherwise, or if PyMem_Free(p) has been called 
-    # before, undefined behavior occurs. If p is NULL, no operation is 
-    # performed. 
- 
-    # The following type-oriented macros are provided for 
-    # convenience. Note that TYPE refers to any C type. 
- 
-    # TYPE* PyMem_New(TYPE, size_t n) 
-    # Same as PyMem_Malloc(), but allocates (n * sizeof(TYPE)) bytes 
-    # of memory. Returns a pointer cast to TYPE*. The memory will not 
-    # have been initialized in any way. 
- 
-    # TYPE* PyMem_Resize(void *p, TYPE, size_t n) 
-    # Same as PyMem_Realloc(), but the memory block is resized to (n * 
-    # sizeof(TYPE)) bytes. Returns a pointer cast to TYPE*. 
- 
-    void PyMem_Del(void *p) 
-    # Same as PyMem_Free(). 
- 
-    # In addition, the following macro sets are provided for calling 
-    # the Python memory allocator directly, without involving the C 
-    # API functions listed above. However, note that their use does 
-    # not preserve binary compatibility across Python versions and is 
-    # therefore deprecated in extension modules. 
- 
-    # PyMem_MALLOC(), PyMem_REALLOC(), PyMem_FREE(). 
-    # PyMem_NEW(), PyMem_RESIZE(), PyMem_DEL(). 
+    void PyMem_Free(void *p)
+    # Frees the memory block pointed to by p, which must have been
+    # returned by a previous call to PyMem_Malloc() or
+    # PyMem_Realloc(). Otherwise, or if PyMem_Free(p) has been called
+    # before, undefined behavior occurs. If p is NULL, no operation is
+    # performed.
+
+    # The following type-oriented macros are provided for
+    # convenience. Note that TYPE refers to any C type.
+
+    # TYPE* PyMem_New(TYPE, size_t n)
+    # Same as PyMem_Malloc(), but allocates (n * sizeof(TYPE)) bytes
+    # of memory. Returns a pointer cast to TYPE*. The memory will not
+    # have been initialized in any way.
+
+    # TYPE* PyMem_Resize(void *p, TYPE, size_t n)
+    # Same as PyMem_Realloc(), but the memory block is resized to (n *
+    # sizeof(TYPE)) bytes. Returns a pointer cast to TYPE*.
+
+    void PyMem_Del(void *p)
+    # Same as PyMem_Free().
+
+    # In addition, the following macro sets are provided for calling
+    # the Python memory allocator directly, without involving the C
+    # API functions listed above. However, note that their use does
+    # not preserve binary compatibility across Python versions and is
+    # therefore deprecated in extension modules.
+
+    # PyMem_MALLOC(), PyMem_REALLOC(), PyMem_FREE().
+    # PyMem_NEW(), PyMem_RESIZE(), PyMem_DEL().
 
 
     #####################################################################
diff --git a/contrib/tools/cython/Cython/Includes/cpython/method.pxd b/contrib/tools/cython/Cython/Includes/cpython/method.pxd
index b403a16a00..f51ebcc7c7 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/method.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/method.pxd
@@ -1,49 +1,49 @@
 from .object cimport PyObject
 
-cdef extern from "Python.h": 
-    ############################################################################ 
-    # 7.5.4 Method Objects 
-    ############################################################################ 
- 
-    # There are some useful functions that are useful for working with method objects. 
-    # PyTypeObject PyMethod_Type 
-    # This instance of PyTypeObject represents the Python method type. This is exposed to Python programs as types.MethodType. 
- 
-    bint PyMethod_Check(object o) 
-    # Return true if o is a method object (has type 
-    # PyMethod_Type). The parameter must not be NULL. 
- 
-    object PyMethod_New(object func, object self, object cls) 
-    # Return value: New reference. 
-    # Return a new method object, with func being any callable object; 
-    # this is the function that will be called when the method is 
-    # called. If this method should be bound to an instance, self 
-    # should be the instance and class should be the class of self, 
-    # otherwise self should be NULL and class should be the class 
-    # which provides the unbound method.. 
- 
-    PyObject* PyMethod_Class(object meth) except NULL 
-    # Return value: Borrowed reference. 
-    # Return the class object from which the method meth was created; 
-    # if this was created from an instance, it will be the class of 
-    # the instance. 
- 
-    PyObject* PyMethod_GET_CLASS(object meth) 
-    # Return value: Borrowed reference. 
-    # Macro version of PyMethod_Class() which avoids error checking. 
- 
-    PyObject* PyMethod_Function(object meth) except NULL 
-    # Return value: Borrowed reference. 
-    # Return the function object associated with the method meth. 
- 
-    PyObject* PyMethod_GET_FUNCTION(object meth) 
-    # Return value: Borrowed reference. 
-    # Macro version of PyMethod_Function() which avoids error checking. 
- 
-    PyObject* PyMethod_Self(object meth) except? NULL 
-    # Return value: Borrowed reference. 
-    # Return the instance associated with the method meth if it is bound, otherwise return NULL. 
- 
-    PyObject* PyMethod_GET_SELF(object meth) 
-    # Return value: Borrowed reference. 
-    # Macro version of PyMethod_Self() which avoids error checking. 
+cdef extern from "Python.h":
+    ############################################################################
+    # 7.5.4 Method Objects
+    ############################################################################
+
+    # There are some useful functions that are useful for working with method objects.
+    # PyTypeObject PyMethod_Type
+    # This instance of PyTypeObject represents the Python method type. This is exposed to Python programs as types.MethodType.
+
+    bint PyMethod_Check(object o)
+    # Return true if o is a method object (has type
+    # PyMethod_Type). The parameter must not be NULL.
+
+    object PyMethod_New(object func, object self, object cls)
+    # Return value: New reference.
+    # Return a new method object, with func being any callable object;
+    # this is the function that will be called when the method is
+    # called. If this method should be bound to an instance, self
+    # should be the instance and class should be the class of self,
+    # otherwise self should be NULL and class should be the class
+    # which provides the unbound method..
+
+    PyObject* PyMethod_Class(object meth) except NULL
+    # Return value: Borrowed reference.
+    # Return the class object from which the method meth was created;
+    # if this was created from an instance, it will be the class of
+    # the instance.
+
+    PyObject* PyMethod_GET_CLASS(object meth)
+    # Return value: Borrowed reference.
+    # Macro version of PyMethod_Class() which avoids error checking.
+
+    PyObject* PyMethod_Function(object meth) except NULL
+    # Return value: Borrowed reference.
+    # Return the function object associated with the method meth.
+
+    PyObject* PyMethod_GET_FUNCTION(object meth)
+    # Return value: Borrowed reference.
+    # Macro version of PyMethod_Function() which avoids error checking.
+
+    PyObject* PyMethod_Self(object meth) except? NULL
+    # Return value: Borrowed reference.
+    # Return the instance associated with the method meth if it is bound, otherwise return NULL.
+
+    PyObject* PyMethod_GET_SELF(object meth)
+    # Return value: Borrowed reference.
+    # Macro version of PyMethod_Self() which avoids error checking.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/module.pxd b/contrib/tools/cython/Cython/Includes/cpython/module.pxd
index 2da4684e4b..8eb323b010 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/module.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/module.pxd
@@ -1,40 +1,40 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
-    ctypedef struct _inittab 
- 
-    ##################################################################### 
-    # 5.3 Importing Modules 
-    ##################################################################### 
+
+cdef extern from "Python.h":
+    ctypedef struct _inittab
+
+    #####################################################################
+    # 5.3 Importing Modules
+    #####################################################################
     object PyImport_ImportModule(const char *name)
-    # Return value: New reference. 
-    # This is a simplified interface to PyImport_ImportModuleEx() 
-    # below, leaving the globals and locals arguments set to 
-    # NULL. When the name argument contains a dot (when it specifies a 
-    # submodule of a package), the fromlist argument is set to the 
-    # list ['*'] so that the return value is the named module rather 
-    # than the top-level package containing it as would otherwise be 
-    # the case. (Unfortunately, this has an additional side effect 
-    # when name in fact specifies a subpackage instead of a submodule: 
-    # the submodules specified in the package's __all__ variable are 
-    # loaded.) Return a new reference to the imported module, or NULL 
-    # with an exception set on failure. 
- 
+    # Return value: New reference.
+    # This is a simplified interface to PyImport_ImportModuleEx()
+    # below, leaving the globals and locals arguments set to
+    # NULL. When the name argument contains a dot (when it specifies a
+    # submodule of a package), the fromlist argument is set to the
+    # list ['*'] so that the return value is the named module rather
+    # than the top-level package containing it as would otherwise be
+    # the case. (Unfortunately, this has an additional side effect
+    # when name in fact specifies a subpackage instead of a submodule:
+    # the submodules specified in the package's __all__ variable are
+    # loaded.) Return a new reference to the imported module, or NULL
+    # with an exception set on failure.
+
     object PyImport_ImportModuleEx(const char *name, object globals, object locals, object fromlist)
-    # Return value: New reference. 
- 
-    # Import a module. This is best described by referring to the 
-    # built-in Python function __import__(), as the standard 
-    # __import__() function calls this function directly. 
- 
-    # The return value is a new reference to the imported module or 
-    # top-level package, or NULL with an exception set on failure 
-    # (before Python 2.4, the module may still be created in this 
-    # case). Like for __import__(), the return value when a submodule 
-    # of a package was requested is normally the top-level package, 
-    # unless a non-empty fromlist was given. Changed in version 2.4: 
-    # failing imports remove incomplete module objects. 
- 
+    # Return value: New reference.
+
+    # Import a module. This is best described by referring to the
+    # built-in Python function __import__(), as the standard
+    # __import__() function calls this function directly.
+
+    # The return value is a new reference to the imported module or
+    # top-level package, or NULL with an exception set on failure
+    # (before Python 2.4, the module may still be created in this
+    # case). Like for __import__(), the return value when a submodule
+    # of a package was requested is normally the top-level package,
+    # unless a non-empty fromlist was given. Changed in version 2.4:
+    # failing imports remove incomplete module objects.
+
     object PyImport_ImportModuleLevel(char *name, object globals, object locals, object fromlist, int level)
     # Return value: New reference.
 
@@ -48,141 +48,141 @@ cdef extern from "Python.h":
     # was requested is normally the top-level package, unless a
     # non-empty fromlist was given.
 
-    object PyImport_Import(object name) 
-    # Return value: New reference. 
-    # This is a higher-level interface that calls the current ``import 
-    # hook function''. It invokes the __import__() function from the 
-    # __builtins__ of the current globals. This means that the import 
-    # is done using whatever import hooks are installed in the current 
-    # environment, e.g. by rexec or ihooks. 
- 
-    object PyImport_ReloadModule(object m) 
-    # Return value: New reference. 
-    # Reload a module. This is best described by referring to the 
-    # built-in Python function reload(), as the standard reload() 
-    # function calls this function directly. Return a new reference to 
-    # the reloaded module, or NULL with an exception set on failure 
-    # (the module still exists in this case). 
- 
+    object PyImport_Import(object name)
+    # Return value: New reference.
+    # This is a higher-level interface that calls the current ``import
+    # hook function''. It invokes the __import__() function from the
+    # __builtins__ of the current globals. This means that the import
+    # is done using whatever import hooks are installed in the current
+    # environment, e.g. by rexec or ihooks.
+
+    object PyImport_ReloadModule(object m)
+    # Return value: New reference.
+    # Reload a module. This is best described by referring to the
+    # built-in Python function reload(), as the standard reload()
+    # function calls this function directly. Return a new reference to
+    # the reloaded module, or NULL with an exception set on failure
+    # (the module still exists in this case).
+
     PyObject* PyImport_AddModule(const char *name) except NULL
-    # Return value: Borrowed reference. 
-    # Return the module object corresponding to a module name. The 
-    # name argument may be of the form package.module. First check the 
-    # modules dictionary if there's one there, and if not, create a 
-    # new one and insert it in the modules dictionary. Return NULL 
-    # with an exception set on failure. Note: This function does not 
-    # load or import the module; if the module wasn't already loaded, 
-    # you will get an empty module object. Use PyImport_ImportModule() 
-    # or one of its variants to import a module. Package structures 
-    # implied by a dotted name for name are not created if not already 
-    # present. 
- 
-    object PyImport_ExecCodeModule(char *name, object co) 
-    # Return value: New reference. 
-    # Given a module name (possibly of the form package.module) and a 
-    # code object read from a Python bytecode file or obtained from 
-    # the built-in function compile(), load the module. Return a new 
-    # reference to the module object, or NULL with an exception set if 
-    # an error occurred. Name is removed from sys.modules in error 
-    # cases, and even if name was already in sys.modules on entry to 
-    # PyImport_ExecCodeModule(). Leaving incompletely initialized 
-    # modules in sys.modules is dangerous, as imports of such modules 
-    # have no way to know that the module object is an unknown (and 
-    # probably damaged with respect to the module author's intents) 
-    # state. 
-    # This function will reload the module if it was already 
-    # imported. See PyImport_ReloadModule() for the intended way to 
-    # reload a module. 
-    # If name points to a dotted name of the form package.module, any 
-    # package structures not already created will still not be 
-    # created. 
- 
- 
-    long PyImport_GetMagicNumber() 
-    # Return the magic number for Python bytecode files (a.k.a. .pyc 
-    # and .pyo files). The magic number should be present in the first 
-    # four bytes of the bytecode file, in little-endian byte order. 
- 
-    PyObject* PyImport_GetModuleDict() except NULL 
-    # Return value: Borrowed reference. 
-    # Return the dictionary used for the module administration 
-    # (a.k.a. sys.modules). Note that this is a per-interpreter 
-    # variable. 
- 
- 
-    int PyImport_ImportFrozenModule(char *name) except -1 
-    # Load a frozen module named name. Return 1 for success, 0 if the 
-    # module is not found, and -1 with an exception set if the 
-    # initialization failed. To access the imported module on a 
-    # successful load, use PyImport_ImportModule(). (Note the misnomer 
-    # -- this function would reload the module if it was already 
-    # imported.) 
- 
- 
-    int PyImport_ExtendInittab(_inittab *newtab) except -1 
-    # Add a collection of modules to the table of built-in 
-    # modules. The newtab array must end with a sentinel entry which 
-    # contains NULL for the name field; failure to provide the 
-    # sentinel value can result in a memory fault. Returns 0 on 
-    # success or -1 if insufficient memory could be allocated to 
-    # extend the internal table. In the event of failure, no modules 
-    # are added to the internal table. This should be called before 
-    # Py_Initialize(). 
- 
-    ##################################################################### 
-    # 7.5.5 Module Objects 
-    ##################################################################### 
- 
-    # PyTypeObject PyModule_Type 
-    # 
-    # This instance of PyTypeObject represents the Python module 
-    # type. This is exposed to Python programs as types.ModuleType. 
- 
-    bint PyModule_Check(object p) 
-    # Return true if p is a module object, or a subtype of a module 
-    # object. 
- 
-    bint PyModule_CheckExact(object p) 
-    # Return true if p is a module object, but not a subtype of PyModule_Type. 
- 
+    # Return value: Borrowed reference.
+    # Return the module object corresponding to a module name. The
+    # name argument may be of the form package.module. First check the
+    # modules dictionary if there's one there, and if not, create a
+    # new one and insert it in the modules dictionary. Return NULL
+    # with an exception set on failure. Note: This function does not
+    # load or import the module; if the module wasn't already loaded,
+    # you will get an empty module object. Use PyImport_ImportModule()
+    # or one of its variants to import a module. Package structures
+    # implied by a dotted name for name are not created if not already
+    # present.
+
+    object PyImport_ExecCodeModule(char *name, object co)
+    # Return value: New reference.
+    # Given a module name (possibly of the form package.module) and a
+    # code object read from a Python bytecode file or obtained from
+    # the built-in function compile(), load the module. Return a new
+    # reference to the module object, or NULL with an exception set if
+    # an error occurred. Name is removed from sys.modules in error
+    # cases, and even if name was already in sys.modules on entry to
+    # PyImport_ExecCodeModule(). Leaving incompletely initialized
+    # modules in sys.modules is dangerous, as imports of such modules
+    # have no way to know that the module object is an unknown (and
+    # probably damaged with respect to the module author's intents)
+    # state.
+    # This function will reload the module if it was already
+    # imported. See PyImport_ReloadModule() for the intended way to
+    # reload a module.
+    # If name points to a dotted name of the form package.module, any
+    # package structures not already created will still not be
+    # created.
+
+
+    long PyImport_GetMagicNumber()
+    # Return the magic number for Python bytecode files (a.k.a. .pyc
+    # and .pyo files). The magic number should be present in the first
+    # four bytes of the bytecode file, in little-endian byte order.
+
+    PyObject* PyImport_GetModuleDict() except NULL
+    # Return value: Borrowed reference.
+    # Return the dictionary used for the module administration
+    # (a.k.a. sys.modules). Note that this is a per-interpreter
+    # variable.
+
+
+    int PyImport_ImportFrozenModule(char *name) except -1
+    # Load a frozen module named name. Return 1 for success, 0 if the
+    # module is not found, and -1 with an exception set if the
+    # initialization failed. To access the imported module on a
+    # successful load, use PyImport_ImportModule(). (Note the misnomer
+    # -- this function would reload the module if it was already
+    # imported.)
+
+
+    int PyImport_ExtendInittab(_inittab *newtab) except -1
+    # Add a collection of modules to the table of built-in
+    # modules. The newtab array must end with a sentinel entry which
+    # contains NULL for the name field; failure to provide the
+    # sentinel value can result in a memory fault. Returns 0 on
+    # success or -1 if insufficient memory could be allocated to
+    # extend the internal table. In the event of failure, no modules
+    # are added to the internal table. This should be called before
+    # Py_Initialize().
+
+    #####################################################################
+    # 7.5.5 Module Objects
+    #####################################################################
+
+    # PyTypeObject PyModule_Type
+    #
+    # This instance of PyTypeObject represents the Python module
+    # type. This is exposed to Python programs as types.ModuleType.
+
+    bint PyModule_Check(object p)
+    # Return true if p is a module object, or a subtype of a module
+    # object.
+
+    bint PyModule_CheckExact(object p)
+    # Return true if p is a module object, but not a subtype of PyModule_Type.
+
     object PyModule_New(const char *name)
-    # Return value: New reference. 
-    # Return a new module object with the __name__ attribute set to 
-    # name. Only the module's __doc__ and __name__ attributes are 
-    # filled in; the caller is responsible for providing a __file__ 
-    # attribute. 
- 
-    PyObject* PyModule_GetDict(object module) except NULL 
-    # Return value: Borrowed reference. 
-    # Return the dictionary object that implements module's namespace; 
-    # this object is the same as the __dict__ attribute of the module 
-    # object. This function never fails. It is recommended extensions 
-    # use other PyModule_*() and PyObject_*() functions rather than 
-    # directly manipulate a module's __dict__. 
- 
-    char* PyModule_GetName(object module) except NULL 
-    # Return module's __name__ value. If the module does not provide 
-    # one, or if it is not a string, SystemError is raised and NULL is 
-    # returned. 
- 
-    char* PyModule_GetFilename(object module) except NULL 
-    # Return the name of the file from which module was loaded using 
-    # module's __file__ attribute. If this is not defined, or if it is 
-    # not a string, raise SystemError and return NULL. 
- 
+    # Return value: New reference.
+    # Return a new module object with the __name__ attribute set to
+    # name. Only the module's __doc__ and __name__ attributes are
+    # filled in; the caller is responsible for providing a __file__
+    # attribute.
+
+    PyObject* PyModule_GetDict(object module) except NULL
+    # Return value: Borrowed reference.
+    # Return the dictionary object that implements module's namespace;
+    # this object is the same as the __dict__ attribute of the module
+    # object. This function never fails. It is recommended extensions
+    # use other PyModule_*() and PyObject_*() functions rather than
+    # directly manipulate a module's __dict__.
+
+    char* PyModule_GetName(object module) except NULL
+    # Return module's __name__ value. If the module does not provide
+    # one, or if it is not a string, SystemError is raised and NULL is
+    # returned.
+
+    char* PyModule_GetFilename(object module) except NULL
+    # Return the name of the file from which module was loaded using
+    # module's __file__ attribute. If this is not defined, or if it is
+    # not a string, raise SystemError and return NULL.
+
     int PyModule_AddObject(object module,  const char *name, object value) except -1
-    # Add an object to module as name. This is a convenience function 
-    # which can be used from the module's initialization 
-    # function. This steals a reference to value. Return -1 on error, 
-    # 0 on success. 
- 
+    # Add an object to module as name. This is a convenience function
+    # which can be used from the module's initialization
+    # function. This steals a reference to value. Return -1 on error,
+    # 0 on success.
+
     int PyModule_AddIntConstant(object module,  const char *name, long value) except -1
-    # Add an integer constant to module as name. This convenience 
-    # function can be used from the module's initialization 
-    # function. Return -1 on error, 0 on success. 
- 
+    # Add an integer constant to module as name. This convenience
+    # function can be used from the module's initialization
+    # function. Return -1 on error, 0 on success.
+
     int PyModule_AddStringConstant(object module,  const char *name,  const char *value) except -1
-    # Add a string constant to module as name. This convenience 
-    # function can be used from the module's initialization 
-    # function. The string value must be null-terminated. Return -1 on 
-    # error, 0 on success. 
+    # Add a string constant to module as name. This convenience
+    # function can be used from the module's initialization
+    # function. The string value must be null-terminated. Return -1 on
+    # error, 0 on success.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/number.pxd b/contrib/tools/cython/Cython/Includes/cpython/number.pxd
index f3a1629d44..ded35c292a 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/number.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/number.pxd
@@ -1,32 +1,32 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ##################################################################### 
-    # 6.2 Number Protocol 
-    ##################################################################### 
- 
-    bint PyNumber_Check(object o) 
-    # Returns 1 if the object o provides numeric protocols, and false 
-    # otherwise. This function always succeeds. 
- 
-    object PyNumber_Add(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of adding o1 and o2, or NULL on failure. This 
-    # is the equivalent of the Python expression "o1 + o2". 
- 
-    object PyNumber_Subtract(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of subtracting o2 from o1, or NULL on 
-    # failure. This is the equivalent of the Python expression "o1 - 
-    # o2". 
- 
-    object PyNumber_Multiply(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of multiplying o1 and o2, or NULL on 
-    # failure. This is the equivalent of the Python expression "o1 * 
-    # o2". 
- 
+
+cdef extern from "Python.h":
+
+    #####################################################################
+    # 6.2 Number Protocol
+    #####################################################################
+
+    bint PyNumber_Check(object o)
+    # Returns 1 if the object o provides numeric protocols, and false
+    # otherwise. This function always succeeds.
+
+    object PyNumber_Add(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of adding o1 and o2, or NULL on failure. This
+    # is the equivalent of the Python expression "o1 + o2".
+
+    object PyNumber_Subtract(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of subtracting o2 from o1, or NULL on
+    # failure. This is the equivalent of the Python expression "o1 -
+    # o2".
+
+    object PyNumber_Multiply(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of multiplying o1 and o2, or NULL on
+    # failure. This is the equivalent of the Python expression "o1 *
+    # o2".
+
     object PyNumber_MatrixMultiply(object o1, object o2)
     # Return value: New reference.
     # Returns the result of matrix multiplication on o1 and o2, or
@@ -34,112 +34,112 @@ cdef extern from "Python.h":
     # expression "o1 @ o2".
     # New in version 3.5.
 
-    object PyNumber_Divide(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of dividing o1 by o2, or NULL on 
-    # failure. This is the equivalent of the Python expression "o1 / 
-    # o2". 
- 
-    object PyNumber_FloorDivide(object o1, object o2) 
-    # Return value: New reference. 
-    # Return the floor of o1 divided by o2, or NULL on failure. This 
-    # is equivalent to the ``classic'' division of integers. 
- 
-    object PyNumber_TrueDivide(object o1, object o2) 
-    # Return value: New reference. 
-    # Return a reasonable approximation for the mathematical value of 
-    # o1 divided by o2, or NULL on failure. The return value is 
-    # ``approximate'' because binary floating point numbers are 
-    # approximate; it is not possible to represent all real numbers in 
-    # base two. This function can return a floating point value when 
-    # passed two integers. 
- 
-    object PyNumber_Remainder(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the remainder of dividing o1 by o2, or NULL on 
-    # failure. This is the equivalent of the Python expression "o1 % 
-    # o2". 
- 
-    object PyNumber_Divmod(object o1, object o2) 
-    # Return value: New reference. 
-    # See the built-in function divmod(). Returns NULL on 
-    # failure. This is the equivalent of the Python expression 
-    # "divmod(o1, o2)". 
- 
-    object PyNumber_Power(object o1, object o2, object o3) 
-    # Return value: New reference. 
-    # See the built-in function pow(). Returns NULL on failure. This 
-    # is the equivalent of the Python expression "pow(o1, o2, o3)", 
-    # where o3 is optional. If o3 is to be ignored, pass Py_None in 
-    # its place (passing NULL for o3 would cause an illegal memory 
-    # access). 
- 
-    object PyNumber_Negative(object o) 
-    # Return value: New reference. 
-    # Returns the negation of o on success, or NULL on failure. This 
-    # is the equivalent of the Python expression "-o". 
- 
-    object PyNumber_Positive(object o) 
-    # Return value: New reference. 
-    # Returns o on success, or NULL on failure. This is the equivalent 
-    # of the Python expression "+o". 
- 
-    object PyNumber_Absolute(object o) 
-    # Return value: New reference. 
-    # Returns the absolute value of o, or NULL on failure. This is the 
-    # equivalent of the Python expression "abs(o)". 
- 
-    object PyNumber_Invert(object o) 
-    # Return value: New reference. 
-    # Returns the bitwise negation of o on success, or NULL on 
-    # failure. This is the equivalent of the Python expression "~o". 
- 
-    object PyNumber_Lshift(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of left shifting o1 by o2 on success, or NULL 
-    # on failure. This is the equivalent of the Python expression "o1 
-    # << o2". 
- 
-    object PyNumber_Rshift(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of right shifting o1 by o2 on success, or 
-    # NULL on failure. This is the equivalent of the Python expression 
-    # "o1 >> o2". 
- 
-    object PyNumber_And(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the ``bitwise and'' of o1 and o2 on success and NULL on 
-    # failure. This is the equivalent of the Python expression "o1 & 
-    # o2". 
- 
-    object PyNumber_Xor(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the ``bitwise exclusive or'' of o1 by o2 on success, or 
-    # NULL on failure. This is the equivalent of the Python expression 
-    # "o1 ^ o2". 
- 
-    object PyNumber_Or(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the ``bitwise or'' of o1 and o2 on success, or NULL on failure. This is the equivalent of the Python expression "o1 | o2". 
- 
-    object PyNumber_InPlaceAdd(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of adding o1 and o2, or NULL on failure. The 
-    # operation is done in-place when o1 supports it. This is the 
-    # equivalent of the Python statement "o1 += o2". 
- 
-    object PyNumber_InPlaceSubtract(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of subtracting o2 from o1, or NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 -= o2". 
- 
-    object PyNumber_InPlaceMultiply(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of multiplying o1 and o2, or NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 *= o2". 
- 
+    object PyNumber_Divide(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of dividing o1 by o2, or NULL on
+    # failure. This is the equivalent of the Python expression "o1 /
+    # o2".
+
+    object PyNumber_FloorDivide(object o1, object o2)
+    # Return value: New reference.
+    # Return the floor of o1 divided by o2, or NULL on failure. This
+    # is equivalent to the ``classic'' division of integers.
+
+    object PyNumber_TrueDivide(object o1, object o2)
+    # Return value: New reference.
+    # Return a reasonable approximation for the mathematical value of
+    # o1 divided by o2, or NULL on failure. The return value is
+    # ``approximate'' because binary floating point numbers are
+    # approximate; it is not possible to represent all real numbers in
+    # base two. This function can return a floating point value when
+    # passed two integers.
+
+    object PyNumber_Remainder(object o1, object o2)
+    # Return value: New reference.
+    # Returns the remainder of dividing o1 by o2, or NULL on
+    # failure. This is the equivalent of the Python expression "o1 %
+    # o2".
+
+    object PyNumber_Divmod(object o1, object o2)
+    # Return value: New reference.
+    # See the built-in function divmod(). Returns NULL on
+    # failure. This is the equivalent of the Python expression
+    # "divmod(o1, o2)".
+
+    object PyNumber_Power(object o1, object o2, object o3)
+    # Return value: New reference.
+    # See the built-in function pow(). Returns NULL on failure. This
+    # is the equivalent of the Python expression "pow(o1, o2, o3)",
+    # where o3 is optional. If o3 is to be ignored, pass Py_None in
+    # its place (passing NULL for o3 would cause an illegal memory
+    # access).
+
+    object PyNumber_Negative(object o)
+    # Return value: New reference.
+    # Returns the negation of o on success, or NULL on failure. This
+    # is the equivalent of the Python expression "-o".
+
+    object PyNumber_Positive(object o)
+    # Return value: New reference.
+    # Returns o on success, or NULL on failure. This is the equivalent
+    # of the Python expression "+o".
+
+    object PyNumber_Absolute(object o)
+    # Return value: New reference.
+    # Returns the absolute value of o, or NULL on failure. This is the
+    # equivalent of the Python expression "abs(o)".
+
+    object PyNumber_Invert(object o)
+    # Return value: New reference.
+    # Returns the bitwise negation of o on success, or NULL on
+    # failure. This is the equivalent of the Python expression "~o".
+
+    object PyNumber_Lshift(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of left shifting o1 by o2 on success, or NULL
+    # on failure. This is the equivalent of the Python expression "o1
+    # << o2".
+
+    object PyNumber_Rshift(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of right shifting o1 by o2 on success, or
+    # NULL on failure. This is the equivalent of the Python expression
+    # "o1 >> o2".
+
+    object PyNumber_And(object o1, object o2)
+    # Return value: New reference.
+    # Returns the ``bitwise and'' of o1 and o2 on success and NULL on
+    # failure. This is the equivalent of the Python expression "o1 &
+    # o2".
+
+    object PyNumber_Xor(object o1, object o2)
+    # Return value: New reference.
+    # Returns the ``bitwise exclusive or'' of o1 by o2 on success, or
+    # NULL on failure. This is the equivalent of the Python expression
+    # "o1 ^ o2".
+
+    object PyNumber_Or(object o1, object o2)
+    # Return value: New reference.
+    # Returns the ``bitwise or'' of o1 and o2 on success, or NULL on failure. This is the equivalent of the Python expression "o1 | o2".
+
+    object PyNumber_InPlaceAdd(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of adding o1 and o2, or NULL on failure. The
+    # operation is done in-place when o1 supports it. This is the
+    # equivalent of the Python statement "o1 += o2".
+
+    object PyNumber_InPlaceSubtract(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of subtracting o2 from o1, or NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 -= o2".
+
+    object PyNumber_InPlaceMultiply(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of multiplying o1 and o2, or NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 *= o2".
+
     object PyNumber_InPlaceMatrixMultiply(object o1, object o2)
     # Return value: New reference.
     # Returns the result of matrix multiplication on o1 and o2, or
@@ -147,119 +147,119 @@ cdef extern from "Python.h":
     # it. This is the equivalent of the Python statement "o1 @= o2".
     # New in version 3.5.
 
-    object PyNumber_InPlaceDivide(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of dividing o1 by o2, or NULL on failure. The 
-    # operation is done in-place when o1 supports it. This is the 
-    # equivalent of the Python statement "o1 /= o2". 
- 
-    object PyNumber_InPlaceFloorDivide(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the mathematical floor of dividing o1 by o2, or NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 //= 
-    # o2". 
- 
-    object PyNumber_InPlaceTrueDivide(object o1, object o2) 
-    # Return value: New reference. 
-    # Return a reasonable approximation for the mathematical value of 
-    # o1 divided by o2, or NULL on failure. The return value is 
-    # ``approximate'' because binary floating point numbers are 
-    # approximate; it is not possible to represent all real numbers in 
-    # base two. This function can return a floating point value when 
-    # passed two integers. The operation is done in-place when o1 
-    # supports it. 
- 
-    object PyNumber_InPlaceRemainder(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the remainder of dividing o1 by o2, or NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 %= o2". 
- 
-    object PyNumber_InPlacePower(object o1, object o2, object o3) 
-    # Return value: New reference. 
-    # See the built-in function pow(). Returns NULL on failure. The 
-    # operation is done in-place when o1 supports it. This is the 
-    # equivalent of the Python statement "o1 **= o2" when o3 is 
-    # Py_None, or an in-place variant of "pow(o1, o2, o3)" 
-    # otherwise. If o3 is to be ignored, pass Py_None in its place 
-    # (passing NULL for o3 would cause an illegal memory access). 
- 
-    object PyNumber_InPlaceLshift(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of left shifting o1 by o2 on success, or NULL 
-    # on failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 <<= o2". 
- 
-    object PyNumber_InPlaceRshift(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the result of right shifting o1 by o2 on success, or 
-    # NULL on failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 >>= o2". 
- 
-    object PyNumber_InPlaceAnd(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the ``bitwise and'' of o1 and o2 on success and NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 &= o2". 
- 
-    object PyNumber_InPlaceXor(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the ``bitwise exclusive or'' of o1 by o2 on success, or 
-    # NULL on failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 ^= o2". 
- 
-    object PyNumber_InPlaceOr(object o1, object o2) 
-    # Return value: New reference. 
-    # Returns the ``bitwise or'' of o1 and o2 on success, or NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python statement "o1 |= o2". 
- 
-    int PyNumber_Coerce(PyObject **p1, PyObject **p2) except -1 
-    # This function takes the addresses of two variables of type 
-    # PyObject*. If the objects pointed to by *p1 and *p2 have the 
-    # same type, increment their reference count and return 0 
-    # (success). If the objects can be converted to a common numeric 
-    # type, replace *p1 and *p2 by their converted value (with 'new' 
-    # reference counts), and return 0. If no conversion is possible, 
-    # or if some other error occurs, return -1 (failure) and don't 
-    # increment the reference counts. The call PyNumber_Coerce(&o1, 
-    # &o2) is equivalent to the Python statement "o1, o2 = coerce(o1, 
-    # o2)". 
- 
-    object PyNumber_Int(object o) 
-    # Return value: New reference. 
-    # Returns the o converted to an integer object on success, or NULL 
-    # on failure. If the argument is outside the integer range a long 
-    # object will be returned instead. This is the equivalent of the 
-    # Python expression "int(o)". 
- 
-    object PyNumber_Long(object o) 
-    # Return value: New reference. 
-    # Returns the o converted to a long integer object on success, or 
-    # NULL on failure. This is the equivalent of the Python expression 
-    # "long(o)". 
- 
-    object PyNumber_Float(object o) 
-    # Return value: New reference. 
-    # Returns the o converted to a float object on success, or NULL on 
-    # failure. This is the equivalent of the Python expression 
-    # "float(o)". 
- 
-    object PyNumber_Index(object o) 
-    # Returns the o converted to a Python int or long on success or 
-    # NULL with a TypeError exception raised on failure. 
- 
-    Py_ssize_t PyNumber_AsSsize_t(object o, object exc) except? -1 
-    # Returns o converted to a Py_ssize_t value if o can be 
-    # interpreted as an integer. If o can be converted to a Python int 
-    # or long but the attempt to convert to a Py_ssize_t value would 
-    # raise an OverflowError, then the exc argument is the type of 
-    # exception that will be raised (usually IndexError or 
-    # OverflowError). If exc is NULL, then the exception is cleared 
-    # and the value is clipped to PY_SSIZE_T_MIN for a negative 
-    # integer or PY_SSIZE_T_MAX for a positive integer. 
- 
-    bint PyIndex_Check(object) 
-    # Returns True if o is an index integer (has the nb_index slot of 
-    # the tp_as_number structure filled in). 
+    object PyNumber_InPlaceDivide(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of dividing o1 by o2, or NULL on failure. The
+    # operation is done in-place when o1 supports it. This is the
+    # equivalent of the Python statement "o1 /= o2".
+
+    object PyNumber_InPlaceFloorDivide(object o1, object o2)
+    # Return value: New reference.
+    # Returns the mathematical floor of dividing o1 by o2, or NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 //=
+    # o2".
+
+    object PyNumber_InPlaceTrueDivide(object o1, object o2)
+    # Return value: New reference.
+    # Return a reasonable approximation for the mathematical value of
+    # o1 divided by o2, or NULL on failure. The return value is
+    # ``approximate'' because binary floating point numbers are
+    # approximate; it is not possible to represent all real numbers in
+    # base two. This function can return a floating point value when
+    # passed two integers. The operation is done in-place when o1
+    # supports it.
+
+    object PyNumber_InPlaceRemainder(object o1, object o2)
+    # Return value: New reference.
+    # Returns the remainder of dividing o1 by o2, or NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 %= o2".
+
+    object PyNumber_InPlacePower(object o1, object o2, object o3)
+    # Return value: New reference.
+    # See the built-in function pow(). Returns NULL on failure. The
+    # operation is done in-place when o1 supports it. This is the
+    # equivalent of the Python statement "o1 **= o2" when o3 is
+    # Py_None, or an in-place variant of "pow(o1, o2, o3)"
+    # otherwise. If o3 is to be ignored, pass Py_None in its place
+    # (passing NULL for o3 would cause an illegal memory access).
+
+    object PyNumber_InPlaceLshift(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of left shifting o1 by o2 on success, or NULL
+    # on failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 <<= o2".
+
+    object PyNumber_InPlaceRshift(object o1, object o2)
+    # Return value: New reference.
+    # Returns the result of right shifting o1 by o2 on success, or
+    # NULL on failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 >>= o2".
+
+    object PyNumber_InPlaceAnd(object o1, object o2)
+    # Return value: New reference.
+    # Returns the ``bitwise and'' of o1 and o2 on success and NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 &= o2".
+
+    object PyNumber_InPlaceXor(object o1, object o2)
+    # Return value: New reference.
+    # Returns the ``bitwise exclusive or'' of o1 by o2 on success, or
+    # NULL on failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 ^= o2".
+
+    object PyNumber_InPlaceOr(object o1, object o2)
+    # Return value: New reference.
+    # Returns the ``bitwise or'' of o1 and o2 on success, or NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python statement "o1 |= o2".
+
+    int PyNumber_Coerce(PyObject **p1, PyObject **p2) except -1
+    # This function takes the addresses of two variables of type
+    # PyObject*. If the objects pointed to by *p1 and *p2 have the
+    # same type, increment their reference count and return 0
+    # (success). If the objects can be converted to a common numeric
+    # type, replace *p1 and *p2 by their converted value (with 'new'
+    # reference counts), and return 0. If no conversion is possible,
+    # or if some other error occurs, return -1 (failure) and don't
+    # increment the reference counts. The call PyNumber_Coerce(&o1,
+    # &o2) is equivalent to the Python statement "o1, o2 = coerce(o1,
+    # o2)".
+
+    object PyNumber_Int(object o)
+    # Return value: New reference.
+    # Returns the o converted to an integer object on success, or NULL
+    # on failure. If the argument is outside the integer range a long
+    # object will be returned instead. This is the equivalent of the
+    # Python expression "int(o)".
+
+    object PyNumber_Long(object o)
+    # Return value: New reference.
+    # Returns the o converted to a long integer object on success, or
+    # NULL on failure. This is the equivalent of the Python expression
+    # "long(o)".
+
+    object PyNumber_Float(object o)
+    # Return value: New reference.
+    # Returns the o converted to a float object on success, or NULL on
+    # failure. This is the equivalent of the Python expression
+    # "float(o)".
+
+    object PyNumber_Index(object o)
+    # Returns the o converted to a Python int or long on success or
+    # NULL with a TypeError exception raised on failure.
+
+    Py_ssize_t PyNumber_AsSsize_t(object o, object exc) except? -1
+    # Returns o converted to a Py_ssize_t value if o can be
+    # interpreted as an integer. If o can be converted to a Python int
+    # or long but the attempt to convert to a Py_ssize_t value would
+    # raise an OverflowError, then the exc argument is the type of
+    # exception that will be raised (usually IndexError or
+    # OverflowError). If exc is NULL, then the exception is cleared
+    # and the value is clipped to PY_SSIZE_T_MIN for a negative
+    # integer or PY_SSIZE_T_MAX for a positive integer.
+
+    bint PyIndex_Check(object)
+    # Returns True if o is an index integer (has the nb_index slot of
+    # the tp_as_number structure filled in).
diff --git a/contrib/tools/cython/Cython/Includes/cpython/object.pxd b/contrib/tools/cython/Cython/Includes/cpython/object.pxd
index 7917811c2c..5a81166393 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/object.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/object.pxd
@@ -1,8 +1,8 @@
-from libc.stdio cimport FILE 
+from libc.stdio cimport FILE
 cimport cpython.type
- 
-cdef extern from "Python.h": 
- 
+
+cdef extern from "Python.h":
+
     ctypedef struct PyObject  # forward declaration
 
     ctypedef object (*newfunc)(cpython.type.type, object, object)  # (type, args, kwargs)
@@ -73,297 +73,297 @@ cdef extern from "Python.h":
     void* PyObject_Realloc(void *, size_t)
     void PyObject_Free(void *)
 
-    ##################################################################### 
-    # 6.1 Object Protocol 
-    ##################################################################### 
-    int PyObject_Print(object o, FILE *fp, int flags) except -1 
-    # Print an object o, on file fp. Returns -1 on error. The flags 
-    # argument is used to enable certain printing options. The only 
-    # option currently supported is Py_PRINT_RAW; if given, the str() 
-    # of the object is written instead of the repr(). 
- 
+    #####################################################################
+    # 6.1 Object Protocol
+    #####################################################################
+    int PyObject_Print(object o, FILE *fp, int flags) except -1
+    # Print an object o, on file fp. Returns -1 on error. The flags
+    # argument is used to enable certain printing options. The only
+    # option currently supported is Py_PRINT_RAW; if given, the str()
+    # of the object is written instead of the repr().
+
     bint PyObject_HasAttrString(object o, const char *attr_name)
-    # Returns 1 if o has the attribute attr_name, and 0 
-    # otherwise. This is equivalent to the Python expression 
-    # "hasattr(o, attr_name)". This function always succeeds. 
- 
+    # Returns 1 if o has the attribute attr_name, and 0
+    # otherwise. This is equivalent to the Python expression
+    # "hasattr(o, attr_name)". This function always succeeds.
+
     object PyObject_GetAttrString(object o, const char *attr_name)
-    # Return value: New reference.  Retrieve an attribute named 
-    # attr_name from object o. Returns the attribute value on success, 
-    # or NULL on failure. This is the equivalent of the Python 
-    # expression "o.attr_name". 
- 
-    bint PyObject_HasAttr(object o, object attr_name) 
-    # Returns 1 if o has the attribute attr_name, and 0 
-    # otherwise. This is equivalent to the Python expression 
-    # "hasattr(o, attr_name)". This function always succeeds. 
- 
-    object PyObject_GetAttr(object o, object attr_name) 
-    # Return value: New reference.  Retrieve an attribute named 
-    # attr_name from object o. Returns the attribute value on success, 
-    # or NULL on failure. This is the equivalent of the Python 
-    # expression "o.attr_name". 
- 
+    # Return value: New reference.  Retrieve an attribute named
+    # attr_name from object o. Returns the attribute value on success,
+    # or NULL on failure. This is the equivalent of the Python
+    # expression "o.attr_name".
+
+    bint PyObject_HasAttr(object o, object attr_name)
+    # Returns 1 if o has the attribute attr_name, and 0
+    # otherwise. This is equivalent to the Python expression
+    # "hasattr(o, attr_name)". This function always succeeds.
+
+    object PyObject_GetAttr(object o, object attr_name)
+    # Return value: New reference.  Retrieve an attribute named
+    # attr_name from object o. Returns the attribute value on success,
+    # or NULL on failure. This is the equivalent of the Python
+    # expression "o.attr_name".
+
     object PyObject_GenericGetAttr(object o, object attr_name)
 
     int PyObject_SetAttrString(object o, const char *attr_name, object v) except -1
-    # Set the value of the attribute named attr_name, for object o, to 
-    # the value v. Returns -1 on failure. This is the equivalent of 
-    # the Python statement "o.attr_name = v". 
- 
-    int PyObject_SetAttr(object o, object attr_name, object v) except -1 
-    # Set the value of the attribute named attr_name, for object o, to 
-    # the value v. Returns -1 on failure. This is the equivalent of 
-    # the Python statement "o.attr_name = v". 
- 
+    # Set the value of the attribute named attr_name, for object o, to
+    # the value v. Returns -1 on failure. This is the equivalent of
+    # the Python statement "o.attr_name = v".
+
+    int PyObject_SetAttr(object o, object attr_name, object v) except -1
+    # Set the value of the attribute named attr_name, for object o, to
+    # the value v. Returns -1 on failure. This is the equivalent of
+    # the Python statement "o.attr_name = v".
+
     int PyObject_GenericSetAttr(object o, object attr_name, object v) except -1
 
     int PyObject_DelAttrString(object o, const char *attr_name) except -1
-    # Delete attribute named attr_name, for object o. Returns -1 on 
-    # failure. This is the equivalent of the Python statement: "del 
-    # o.attr_name". 
- 
-    int PyObject_DelAttr(object o, object attr_name) except -1 
-    # Delete attribute named attr_name, for object o. Returns -1 on 
-    # failure. This is the equivalent of the Python statement "del 
-    # o.attr_name". 
- 
-    int Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, Py_GE 
- 
-    object PyObject_RichCompare(object o1, object o2, int opid) 
-    # Return value: New reference. 
-    # Compare the values of o1 and o2 using the operation specified by 
-    # opid, which must be one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or 
-    # Py_GE, corresponding to <, <=, ==, !=, >, or >= 
-    # respectively. This is the equivalent of the Python expression 
-    # "o1 op o2", where op is the operator corresponding to 
-    # opid. Returns the value of the comparison on success, or NULL on 
-    # failure. 
- 
-    bint PyObject_RichCompareBool(object o1, object o2, int opid) except -1 
-    # Compare the values of o1 and o2 using the operation specified by 
-    # opid, which must be one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or 
-    # Py_GE, corresponding to <, <=, ==, !=, >, or >= 
-    # respectively. Returns -1 on error, 0 if the result is false, 1 
-    # otherwise. This is the equivalent of the Python expression "o1 
-    # op o2", where op is the operator corresponding to opid. 
- 
-    int PyObject_Cmp(object o1, object o2, int *result) except -1 
-    # Compare the values of o1 and o2 using a routine provided by o1, 
-    # if one exists, otherwise with a routine provided by o2. The 
-    # result of the comparison is returned in result. Returns -1 on 
-    # failure. This is the equivalent of the Python statement "result 
-    # = cmp(o1, o2)". 
- 
-    int PyObject_Compare(object o1, object o2) except * 
-    # Compare the values of o1 and o2 using a routine provided by o1, 
-    # if one exists, otherwise with a routine provided by o2. Returns 
-    # the result of the comparison on success. On error, the value 
-    # returned is undefined; use PyErr_Occurred() to detect an 
-    # error. This is equivalent to the Python expression "cmp(o1, 
-    # o2)". 
- 
-    object PyObject_Repr(object o) 
-    # Return value: New reference. 
-    # Compute a string representation of object o. Returns the string 
-    # representation on success, NULL on failure. This is the 
-    # equivalent of the Python expression "repr(o)". Called by the 
-    # repr() built-in function and by reverse quotes. 
- 
-    object PyObject_Str(object o) 
-    # Return value: New reference. 
-    # Compute a string representation of object o. Returns the string 
-    # representation on success, NULL on failure. This is the 
-    # equivalent of the Python expression "str(o)". Called by the 
-    # str() built-in function and by the print statement. 
- 
-    object PyObject_Unicode(object o) 
-    # Return value: New reference. 
-    # Compute a Unicode string representation of object o. Returns the 
-    # Unicode string representation on success, NULL on failure. This 
-    # is the equivalent of the Python expression "unicode(o)". Called 
-    # by the unicode() built-in function. 
- 
-    bint PyObject_IsInstance(object inst, object cls) except -1 
-    # Returns 1 if inst is an instance of the class cls or a subclass 
-    # of cls, or 0 if not. On error, returns -1 and sets an 
-    # exception. If cls is a type object rather than a class object, 
-    # PyObject_IsInstance() returns 1 if inst is of type cls. If cls 
-    # is a tuple, the check will be done against every entry in 
-    # cls. The result will be 1 when at least one of the checks 
-    # returns 1, otherwise it will be 0. If inst is not a class 
-    # instance and cls is neither a type object, nor a class object, 
-    # nor a tuple, inst must have a __class__ attribute -- the class 
-    # relationship of the value of that attribute with cls will be 
-    # used to determine the result of this function. 
- 
-    # Subclass determination is done in a fairly straightforward way, 
-    # but includes a wrinkle that implementors of extensions to the 
-    # class system may want to be aware of. If A and B are class 
-    # objects, B is a subclass of A if it inherits from A either 
-    # directly or indirectly. If either is not a class object, a more 
-    # general mechanism is used to determine the class relationship of 
-    # the two objects. When testing if B is a subclass of A, if A is 
-    # B, PyObject_IsSubclass() returns true. If A and B are different 
-    # objects, B's __bases__ attribute is searched in a depth-first 
-    # fashion for A -- the presence of the __bases__ attribute is 
-    # considered sufficient for this determination. 
- 
-    bint PyObject_IsSubclass(object derived, object cls) except -1 
-    # Returns 1 if the class derived is identical to or derived from 
-    # the class cls, otherwise returns 0. In case of an error, returns 
-    # -1. If cls is a tuple, the check will be done against every 
-    # entry in cls. The result will be 1 when at least one of the 
-    # checks returns 1, otherwise it will be 0. If either derived or 
-    # cls is not an actual class object (or tuple), this function uses 
-    # the generic algorithm described above. New in version 
-    # 2.1. Changed in version 2.3: Older versions of Python did not 
-    # support a tuple as the second argument. 
- 
-    bint PyCallable_Check(object o) 
-    # Determine if the object o is callable. Return 1 if the object is 
-    # callable and 0 otherwise. This function always succeeds. 
- 
-    object PyObject_Call(object callable_object, object args, object kw) 
-    # Return value: New reference. 
-    # Call a callable Python object callable_object, with arguments 
-    # given by the tuple args, and named arguments given by the 
-    # dictionary kw. If no named arguments are needed, kw may be 
-    # NULL. args must not be NULL, use an empty tuple if no arguments 
-    # are needed. Returns the result of the call on success, or NULL 
-    # on failure. This is the equivalent of the Python expression 
-    # "apply(callable_object, args, kw)" or "callable_object(*args, 
-    # **kw)". 
- 
-    object PyObject_CallObject(object callable_object, object args) 
-    # Return value: New reference. 
-    # Call a callable Python object callable_object, with arguments 
-    # given by the tuple args. If no arguments are needed, then args 
-    # may be NULL. Returns the result of the call on success, or NULL 
-    # on failure. This is the equivalent of the Python expression 
-    # "apply(callable_object, args)" or "callable_object(*args)". 
- 
-    object PyObject_CallFunction(object callable, char *format, ...) 
-    # Return value: New reference. 
-    # Call a callable Python object callable, with a variable number 
-    # of C arguments. The C arguments are described using a 
-    # Py_BuildValue() style format string. The format may be NULL, 
-    # indicating that no arguments are provided. Returns the result of 
-    # the call on success, or NULL on failure. This is the equivalent 
-    # of the Python expression "apply(callable, args)" or 
-    # "callable(*args)". Note that if you only pass object  args, 
-    # PyObject_CallFunctionObjArgs is a faster alternative. 
- 
-    object PyObject_CallMethod(object o, char *method, char *format, ...) 
-    # Return value: New reference. 
-    # Call the method named method of object o with a variable number 
-    # of C arguments. The C arguments are described by a 
-    # Py_BuildValue() format string that should produce a tuple. The 
-    # format may be NULL, indicating that no arguments are 
-    # provided. Returns the result of the call on success, or NULL on 
-    # failure. This is the equivalent of the Python expression 
-    # "o.method(args)". Note that if you only pass object  args, 
-    # PyObject_CallMethodObjArgs is a faster alternative. 
- 
-    #object PyObject_CallFunctionObjArgs(object callable, ..., NULL) 
-    object PyObject_CallFunctionObjArgs(object callable, ...) 
-    # Return value: New reference. 
-    # Call a callable Python object callable, with a variable number 
-    # of PyObject* arguments. The arguments are provided as a variable 
-    # number of parameters followed by NULL. Returns the result of the 
-    # call on success, or NULL on failure. 
- 
-    #PyObject* PyObject_CallMethodObjArgs(object o, object name, ..., NULL) 
-    object PyObject_CallMethodObjArgs(object o, object name, ...) 
-    # Return value: New reference. 
-    # Calls a method of the object o, where the name of the method is 
-    # given as a Python string object in name. It is called with a 
-    # variable number of PyObject* arguments. The arguments are 
-    # provided as a variable number of parameters followed by 
-    # NULL. Returns the result of the call on success, or NULL on 
-    # failure. 
- 
-    long PyObject_Hash(object o) except? -1 
-    # Compute and return the hash value of an object o. On failure, 
-    # return -1. This is the equivalent of the Python expression 
-    # "hash(o)". 
- 
-    bint PyObject_IsTrue(object o) except -1 
-    # Returns 1 if the object o is considered to be true, and 0 
-    # otherwise. This is equivalent to the Python expression "not not 
-    # o". On failure, return -1. 
- 
-    bint PyObject_Not(object o) except -1 
-    # Returns 0 if the object o is considered to be true, and 1 
-    # otherwise. This is equivalent to the Python expression "not 
-    # o". On failure, return -1. 
- 
-    object PyObject_Type(object o) 
-    # Return value: New reference. 
-    # When o is non-NULL, returns a type object corresponding to the 
-    # object type of object o. On failure, raises SystemError and 
-    # returns NULL. This is equivalent to the Python expression 
-    # type(o). This function increments the reference count of the 
-    # return value. There's really no reason to use this function 
-    # instead of the common expression o->ob_type, which returns a 
-    # pointer of type PyTypeObject*, except when the incremented 
-    # reference count is needed. 
- 
-    bint PyObject_TypeCheck(object o, PyTypeObject *type) 
-    # Return true if the object o is of type type or a subtype of 
-    # type. Both parameters must be non-NULL. 
- 
-    Py_ssize_t PyObject_Length(object o) except -1 
-    Py_ssize_t PyObject_Size(object o) except -1 
-    # Return the length of object o. If the object o provides either 
-    # the sequence and mapping protocols, the sequence length is 
-    # returned. On error, -1 is returned. This is the equivalent to 
-    # the Python expression "len(o)". 
- 
-    object PyObject_GetItem(object o, object key) 
-    # Return value: New reference. 
-    # Return element of o corresponding to the object key or NULL on 
-    # failure. This is the equivalent of the Python expression 
-    # "o[key]". 
- 
-    int PyObject_SetItem(object o, object key, object v) except -1 
-    # Map the object key to the value v. Returns -1 on failure. This 
-    # is the equivalent of the Python statement "o[key] = v". 
- 
-    int PyObject_DelItem(object o, object key) except -1 
-    # Delete the mapping for key from o. Returns -1 on failure. This 
-    # is the equivalent of the Python statement "del o[key]". 
- 
-    int PyObject_AsFileDescriptor(object o) except -1 
-    # Derives a file-descriptor from a Python object. If the object is 
-    # an integer or long integer, its value is returned. If not, the 
-    # object's fileno() method is called if it exists; the method must 
-    # return an integer or long integer, which is returned as the file 
-    # descriptor value. Returns -1 on failure. 
- 
-    object PyObject_Dir(object o) 
-    # Return value: New reference. 
-    # This is equivalent to the Python expression "dir(o)", returning 
-    # a (possibly empty) list of strings appropriate for the object 
-    # argument, or NULL if there was an error. If the argument is 
-    # NULL, this is like the Python "dir()", returning the names of 
-    # the current locals; in this case, if no execution frame is 
-    # active then NULL is returned but PyErr_Occurred() will return 
-    # false. 
- 
-    object PyObject_GetIter(object o) 
-    # Return value: New reference. 
-    # This is equivalent to the Python expression "iter(o)". It 
-    # returns a new iterator for the object argument, or the object 
-    # itself if the object is already an iterator. Raises TypeError 
-    # and returns NULL if the object cannot be iterated. 
- 
-    Py_ssize_t Py_SIZE(object o) 
- 
-    object PyObject_Format(object obj, object format_spec) 
-    # Takes an arbitrary object and returns the result of calling 
-    # obj.__format__(format_spec). 
-    # Added in Py2.6 
+    # Delete attribute named attr_name, for object o. Returns -1 on
+    # failure. This is the equivalent of the Python statement: "del
+    # o.attr_name".
+
+    int PyObject_DelAttr(object o, object attr_name) except -1
+    # Delete attribute named attr_name, for object o. Returns -1 on
+    # failure. This is the equivalent of the Python statement "del
+    # o.attr_name".
+
+    int Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, Py_GE
+
+    object PyObject_RichCompare(object o1, object o2, int opid)
+    # Return value: New reference.
+    # Compare the values of o1 and o2 using the operation specified by
+    # opid, which must be one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or
+    # Py_GE, corresponding to <, <=, ==, !=, >, or >=
+    # respectively. This is the equivalent of the Python expression
+    # "o1 op o2", where op is the operator corresponding to
+    # opid. Returns the value of the comparison on success, or NULL on
+    # failure.
+
+    bint PyObject_RichCompareBool(object o1, object o2, int opid) except -1
+    # Compare the values of o1 and o2 using the operation specified by
+    # opid, which must be one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or
+    # Py_GE, corresponding to <, <=, ==, !=, >, or >=
+    # respectively. Returns -1 on error, 0 if the result is false, 1
+    # otherwise. This is the equivalent of the Python expression "o1
+    # op o2", where op is the operator corresponding to opid.
+
+    int PyObject_Cmp(object o1, object o2, int *result) except -1
+    # Compare the values of o1 and o2 using a routine provided by o1,
+    # if one exists, otherwise with a routine provided by o2. The
+    # result of the comparison is returned in result. Returns -1 on
+    # failure. This is the equivalent of the Python statement "result
+    # = cmp(o1, o2)".
+
+    int PyObject_Compare(object o1, object o2) except *
+    # Compare the values of o1 and o2 using a routine provided by o1,
+    # if one exists, otherwise with a routine provided by o2. Returns
+    # the result of the comparison on success. On error, the value
+    # returned is undefined; use PyErr_Occurred() to detect an
+    # error. This is equivalent to the Python expression "cmp(o1,
+    # o2)".
+
+    object PyObject_Repr(object o)
+    # Return value: New reference.
+    # Compute a string representation of object o. Returns the string
+    # representation on success, NULL on failure. This is the
+    # equivalent of the Python expression "repr(o)". Called by the
+    # repr() built-in function and by reverse quotes.
+
+    object PyObject_Str(object o)
+    # Return value: New reference.
+    # Compute a string representation of object o. Returns the string
+    # representation on success, NULL on failure. This is the
+    # equivalent of the Python expression "str(o)". Called by the
+    # str() built-in function and by the print statement.
+
+    object PyObject_Unicode(object o)
+    # Return value: New reference.
+    # Compute a Unicode string representation of object o. Returns the
+    # Unicode string representation on success, NULL on failure. This
+    # is the equivalent of the Python expression "unicode(o)". Called
+    # by the unicode() built-in function.
+
+    bint PyObject_IsInstance(object inst, object cls) except -1
+    # Returns 1 if inst is an instance of the class cls or a subclass
+    # of cls, or 0 if not. On error, returns -1 and sets an
+    # exception. If cls is a type object rather than a class object,
+    # PyObject_IsInstance() returns 1 if inst is of type cls. If cls
+    # is a tuple, the check will be done against every entry in
+    # cls. The result will be 1 when at least one of the checks
+    # returns 1, otherwise it will be 0. If inst is not a class
+    # instance and cls is neither a type object, nor a class object,
+    # nor a tuple, inst must have a __class__ attribute -- the class
+    # relationship of the value of that attribute with cls will be
+    # used to determine the result of this function.
+
+    # Subclass determination is done in a fairly straightforward way,
+    # but includes a wrinkle that implementors of extensions to the
+    # class system may want to be aware of. If A and B are class
+    # objects, B is a subclass of A if it inherits from A either
+    # directly or indirectly. If either is not a class object, a more
+    # general mechanism is used to determine the class relationship of
+    # the two objects. When testing if B is a subclass of A, if A is
+    # B, PyObject_IsSubclass() returns true. If A and B are different
+    # objects, B's __bases__ attribute is searched in a depth-first
+    # fashion for A -- the presence of the __bases__ attribute is
+    # considered sufficient for this determination.
+
+    bint PyObject_IsSubclass(object derived, object cls) except -1
+    # Returns 1 if the class derived is identical to or derived from
+    # the class cls, otherwise returns 0. In case of an error, returns
+    # -1. If cls is a tuple, the check will be done against every
+    # entry in cls. The result will be 1 when at least one of the
+    # checks returns 1, otherwise it will be 0. If either derived or
+    # cls is not an actual class object (or tuple), this function uses
+    # the generic algorithm described above. New in version
+    # 2.1. Changed in version 2.3: Older versions of Python did not
+    # support a tuple as the second argument.
+
+    bint PyCallable_Check(object o)
+    # Determine if the object o is callable. Return 1 if the object is
+    # callable and 0 otherwise. This function always succeeds.
+
+    object PyObject_Call(object callable_object, object args, object kw)
+    # Return value: New reference.
+    # Call a callable Python object callable_object, with arguments
+    # given by the tuple args, and named arguments given by the
+    # dictionary kw. If no named arguments are needed, kw may be
+    # NULL. args must not be NULL, use an empty tuple if no arguments
+    # are needed. Returns the result of the call on success, or NULL
+    # on failure. This is the equivalent of the Python expression
+    # "apply(callable_object, args, kw)" or "callable_object(*args,
+    # **kw)".
+
+    object PyObject_CallObject(object callable_object, object args)
+    # Return value: New reference.
+    # Call a callable Python object callable_object, with arguments
+    # given by the tuple args. If no arguments are needed, then args
+    # may be NULL. Returns the result of the call on success, or NULL
+    # on failure. This is the equivalent of the Python expression
+    # "apply(callable_object, args)" or "callable_object(*args)".
+
+    object PyObject_CallFunction(object callable, char *format, ...)
+    # Return value: New reference.
+    # Call a callable Python object callable, with a variable number
+    # of C arguments. The C arguments are described using a
+    # Py_BuildValue() style format string. The format may be NULL,
+    # indicating that no arguments are provided. Returns the result of
+    # the call on success, or NULL on failure. This is the equivalent
+    # of the Python expression "apply(callable, args)" or
+    # "callable(*args)". Note that if you only pass object  args,
+    # PyObject_CallFunctionObjArgs is a faster alternative.
+
+    object PyObject_CallMethod(object o, char *method, char *format, ...)
+    # Return value: New reference.
+    # Call the method named method of object o with a variable number
+    # of C arguments. The C arguments are described by a
+    # Py_BuildValue() format string that should produce a tuple. The
+    # format may be NULL, indicating that no arguments are
+    # provided. Returns the result of the call on success, or NULL on
+    # failure. This is the equivalent of the Python expression
+    # "o.method(args)". Note that if you only pass object  args,
+    # PyObject_CallMethodObjArgs is a faster alternative.
+
+    #object PyObject_CallFunctionObjArgs(object callable, ..., NULL)
+    object PyObject_CallFunctionObjArgs(object callable, ...)
+    # Return value: New reference.
+    # Call a callable Python object callable, with a variable number
+    # of PyObject* arguments. The arguments are provided as a variable
+    # number of parameters followed by NULL. Returns the result of the
+    # call on success, or NULL on failure.
+
+    #PyObject* PyObject_CallMethodObjArgs(object o, object name, ..., NULL)
+    object PyObject_CallMethodObjArgs(object o, object name, ...)
+    # Return value: New reference.
+    # Calls a method of the object o, where the name of the method is
+    # given as a Python string object in name. It is called with a
+    # variable number of PyObject* arguments. The arguments are
+    # provided as a variable number of parameters followed by
+    # NULL. Returns the result of the call on success, or NULL on
+    # failure.
+
+    long PyObject_Hash(object o) except? -1
+    # Compute and return the hash value of an object o. On failure,
+    # return -1. This is the equivalent of the Python expression
+    # "hash(o)".
+
+    bint PyObject_IsTrue(object o) except -1
+    # Returns 1 if the object o is considered to be true, and 0
+    # otherwise. This is equivalent to the Python expression "not not
+    # o". On failure, return -1.
+
+    bint PyObject_Not(object o) except -1
+    # Returns 0 if the object o is considered to be true, and 1
+    # otherwise. This is equivalent to the Python expression "not
+    # o". On failure, return -1.
+
+    object PyObject_Type(object o)
+    # Return value: New reference.
+    # When o is non-NULL, returns a type object corresponding to the
+    # object type of object o. On failure, raises SystemError and
+    # returns NULL. This is equivalent to the Python expression
+    # type(o). This function increments the reference count of the
+    # return value. There's really no reason to use this function
+    # instead of the common expression o->ob_type, which returns a
+    # pointer of type PyTypeObject*, except when the incremented
+    # reference count is needed.
+
+    bint PyObject_TypeCheck(object o, PyTypeObject *type)
+    # Return true if the object o is of type type or a subtype of
+    # type. Both parameters must be non-NULL.
+
+    Py_ssize_t PyObject_Length(object o) except -1
+    Py_ssize_t PyObject_Size(object o) except -1
+    # Return the length of object o. If the object o provides either
+    # the sequence and mapping protocols, the sequence length is
+    # returned. On error, -1 is returned. This is the equivalent to
+    # the Python expression "len(o)".
+
+    object PyObject_GetItem(object o, object key)
+    # Return value: New reference.
+    # Return element of o corresponding to the object key or NULL on
+    # failure. This is the equivalent of the Python expression
+    # "o[key]".
+
+    int PyObject_SetItem(object o, object key, object v) except -1
+    # Map the object key to the value v. Returns -1 on failure. This
+    # is the equivalent of the Python statement "o[key] = v".
+
+    int PyObject_DelItem(object o, object key) except -1
+    # Delete the mapping for key from o. Returns -1 on failure. This
+    # is the equivalent of the Python statement "del o[key]".
+
+    int PyObject_AsFileDescriptor(object o) except -1
+    # Derives a file-descriptor from a Python object. If the object is
+    # an integer or long integer, its value is returned. If not, the
+    # object's fileno() method is called if it exists; the method must
+    # return an integer or long integer, which is returned as the file
+    # descriptor value. Returns -1 on failure.
+
+    object PyObject_Dir(object o)
+    # Return value: New reference.
+    # This is equivalent to the Python expression "dir(o)", returning
+    # a (possibly empty) list of strings appropriate for the object
+    # argument, or NULL if there was an error. If the argument is
+    # NULL, this is like the Python "dir()", returning the names of
+    # the current locals; in this case, if no execution frame is
+    # active then NULL is returned but PyErr_Occurred() will return
+    # false.
+
+    object PyObject_GetIter(object o)
+    # Return value: New reference.
+    # This is equivalent to the Python expression "iter(o)". It
+    # returns a new iterator for the object argument, or the object
+    # itself if the object is already an iterator. Raises TypeError
+    # and returns NULL if the object cannot be iterated.
+
+    Py_ssize_t Py_SIZE(object o)
+
+    object PyObject_Format(object obj, object format_spec)
+    # Takes an arbitrary object and returns the result of calling
+    # obj.__format__(format_spec).
+    # Added in Py2.6
 
     # Type flags (tp_flags of PyTypeObject)
     long Py_TPFLAGS_HAVE_GETCHARBUFFER
diff --git a/contrib/tools/cython/Cython/Includes/cpython/oldbuffer.pxd b/contrib/tools/cython/Cython/Includes/cpython/oldbuffer.pxd
index c48a9de073..0222428ed4 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/oldbuffer.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/oldbuffer.pxd
@@ -1,63 +1,63 @@
-# Legacy Python 2 buffer interface. 
-# 
-# These functions are no longer available in Python 3, use the new 
-# buffer interface instead. 
- 
-cdef extern from "Python.h": 
-    cdef enum _: 
-        Py_END_OF_BUFFER 
-    #    This constant may be passed as the size parameter to 
-    #    PyBuffer_FromObject() or PyBuffer_FromReadWriteObject(). It 
-    #    indicates that the new PyBufferObject should refer to base object 
-    #    from the specified offset to the end of its exported 
-    #    buffer. Using this enables the caller to avoid querying the base 
-    #    object for its length. 
- 
-    bint PyBuffer_Check(object p) 
-    #    Return true if the argument has type PyBuffer_Type. 
- 
-    object PyBuffer_FromObject(object base, Py_ssize_t offset, Py_ssize_t size) 
-    #    Return value: New reference. 
-    # 
-    #    Return a new read-only buffer object. This raises TypeError if 
-    #    base doesn't support the read-only buffer protocol or doesn't 
-    #    provide exactly one buffer segment, or it raises ValueError if 
-    #    offset is less than zero. The buffer will hold a reference to the 
-    #    base object, and the buffer's contents will refer to the base 
-    #    object's buffer interface, starting as position offset and 
-    #    extending for size bytes. If size is Py_END_OF_BUFFER, then the 
-    #    new buffer's contents extend to the length of the base object's 
-    #    exported buffer data. 
- 
-    object PyBuffer_FromReadWriteObject(object base, Py_ssize_t offset, Py_ssize_t size) 
-    #    Return value: New reference. 
-    # 
-    #    Return a new writable buffer object. Parameters and exceptions 
-    #    are similar to those for PyBuffer_FromObject(). If the base 
-    #    object does not export the writeable buffer protocol, then 
-    #    TypeError is raised. 
- 
-    object PyBuffer_FromMemory(void *ptr, Py_ssize_t size) 
-    #    Return value: New reference. 
-    # 
-    #    Return a new read-only buffer object that reads from a specified 
-    #    location in memory, with a specified size. The caller is 
-    #    responsible for ensuring that the memory buffer, passed in as 
-    #    ptr, is not deallocated while the returned buffer object 
-    #    exists. Raises ValueError if size is less than zero. Note that 
-    #    Py_END_OF_BUFFER may not be passed for the size parameter; 
-    #    ValueError will be raised in that case. 
- 
-    object PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size) 
-    #    Return value: New reference. 
-    # 
-    #    Similar to PyBuffer_FromMemory(), but the returned buffer is 
-    #    writable. 
- 
-    object PyBuffer_New(Py_ssize_t size) 
-    #    Return value: New reference. 
-    # 
-    #    Return a new writable buffer object that maintains its own memory 
-    #    buffer of size bytes. ValueError is returned if size is not zero 
-    #    or positive. Note that the memory buffer (as returned by 
-    #    PyObject_AsWriteBuffer()) is not specifically aligned. 
+# Legacy Python 2 buffer interface.
+#
+# These functions are no longer available in Python 3, use the new
+# buffer interface instead.
+
+cdef extern from "Python.h":
+    cdef enum _:
+        Py_END_OF_BUFFER
+    #    This constant may be passed as the size parameter to
+    #    PyBuffer_FromObject() or PyBuffer_FromReadWriteObject(). It
+    #    indicates that the new PyBufferObject should refer to base object
+    #    from the specified offset to the end of its exported
+    #    buffer. Using this enables the caller to avoid querying the base
+    #    object for its length.
+
+    bint PyBuffer_Check(object p)
+    #    Return true if the argument has type PyBuffer_Type.
+
+    object PyBuffer_FromObject(object base, Py_ssize_t offset, Py_ssize_t size)
+    #    Return value: New reference.
+    #
+    #    Return a new read-only buffer object. This raises TypeError if
+    #    base doesn't support the read-only buffer protocol or doesn't
+    #    provide exactly one buffer segment, or it raises ValueError if
+    #    offset is less than zero. The buffer will hold a reference to the
+    #    base object, and the buffer's contents will refer to the base
+    #    object's buffer interface, starting as position offset and
+    #    extending for size bytes. If size is Py_END_OF_BUFFER, then the
+    #    new buffer's contents extend to the length of the base object's
+    #    exported buffer data.
+
+    object PyBuffer_FromReadWriteObject(object base, Py_ssize_t offset, Py_ssize_t size)
+    #    Return value: New reference.
+    #
+    #    Return a new writable buffer object. Parameters and exceptions
+    #    are similar to those for PyBuffer_FromObject(). If the base
+    #    object does not export the writeable buffer protocol, then
+    #    TypeError is raised.
+
+    object PyBuffer_FromMemory(void *ptr, Py_ssize_t size)
+    #    Return value: New reference.
+    #
+    #    Return a new read-only buffer object that reads from a specified
+    #    location in memory, with a specified size. The caller is
+    #    responsible for ensuring that the memory buffer, passed in as
+    #    ptr, is not deallocated while the returned buffer object
+    #    exists. Raises ValueError if size is less than zero. Note that
+    #    Py_END_OF_BUFFER may not be passed for the size parameter;
+    #    ValueError will be raised in that case.
+
+    object PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size)
+    #    Return value: New reference.
+    #
+    #    Similar to PyBuffer_FromMemory(), but the returned buffer is
+    #    writable.
+
+    object PyBuffer_New(Py_ssize_t size)
+    #    Return value: New reference.
+    #
+    #    Return a new writable buffer object that maintains its own memory
+    #    buffer of size bytes. ValueError is returned if size is not zero
+    #    or positive. Note that the memory buffer (as returned by
+    #    PyObject_AsWriteBuffer()) is not specifically aligned.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/pycapsule.pxd b/contrib/tools/cython/Cython/Includes/cpython/pycapsule.pxd
index a8467ce19a..c3d12c7490 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/pycapsule.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/pycapsule.pxd
@@ -1,144 +1,144 @@
- 
-# available since Python 3.1! 
- 
- 
-cdef extern from "Python.h": 
- 
-    ctypedef struct PyCapsule_Type 
-    # This subtype of PyObject represents an opaque value, useful for 
-    # C extension modules who need to pass an opaque value (as a void* 
-    # pointer) through Python code to other C code. It is often used 
-    # to make a C function pointer defined in one module available to 
-    # other modules, so the regular import mechanism can be used to 
-    # access C APIs defined in dynamically loaded modules. 
- 
- 
-    ctypedef void (*PyCapsule_Destructor)(object o) 
-    # The type of a destructor callback for a capsule. 
-    # 
-    # See PyCapsule_New() for the semantics of PyCapsule_Destructor 
-    # callbacks. 
- 
- 
-    bint PyCapsule_CheckExact(object o) 
-    # Return true if its argument is a PyCapsule. 
- 
- 
+
+# available since Python 3.1!
+
+
+cdef extern from "Python.h":
+
+    ctypedef struct PyCapsule_Type
+    # This subtype of PyObject represents an opaque value, useful for
+    # C extension modules who need to pass an opaque value (as a void*
+    # pointer) through Python code to other C code. It is often used
+    # to make a C function pointer defined in one module available to
+    # other modules, so the regular import mechanism can be used to
+    # access C APIs defined in dynamically loaded modules.
+
+
+    ctypedef void (*PyCapsule_Destructor)(object o)
+    # The type of a destructor callback for a capsule.
+    #
+    # See PyCapsule_New() for the semantics of PyCapsule_Destructor
+    # callbacks.
+
+
+    bint PyCapsule_CheckExact(object o)
+    # Return true if its argument is a PyCapsule.
+
+
     object PyCapsule_New(void *pointer, const char *name,
-                         PyCapsule_Destructor destructor) 
-    # Return value: New reference. 
-    # 
-    # Create a PyCapsule encapsulating the pointer. The pointer 
-    # argument may not be NULL. 
-    # 
-    # On failure, set an exception and return NULL. 
-    # 
-    # The name string may either be NULL or a pointer to a valid C 
-    # string. If non-NULL, this string must outlive the 
-    # capsule. (Though it is permitted to free it inside the 
-    # destructor.) 
-    # 
-    # If the destructor argument is not NULL, it will be called with 
-    # the capsule as its argument when it is destroyed. 
-    # 
-    # If this capsule will be stored as an attribute of a module, the 
-    # name should be specified as modulename.attributename. This will 
-    # enable other modules to import the capsule using 
-    # PyCapsule_Import(). 
- 
- 
+                         PyCapsule_Destructor destructor)
+    # Return value: New reference.
+    #
+    # Create a PyCapsule encapsulating the pointer. The pointer
+    # argument may not be NULL.
+    #
+    # On failure, set an exception and return NULL.
+    #
+    # The name string may either be NULL or a pointer to a valid C
+    # string. If non-NULL, this string must outlive the
+    # capsule. (Though it is permitted to free it inside the
+    # destructor.)
+    #
+    # If the destructor argument is not NULL, it will be called with
+    # the capsule as its argument when it is destroyed.
+    #
+    # If this capsule will be stored as an attribute of a module, the
+    # name should be specified as modulename.attributename. This will
+    # enable other modules to import the capsule using
+    # PyCapsule_Import().
+
+
     void* PyCapsule_GetPointer(object capsule, const char *name) except? NULL
-    # Retrieve the pointer stored in the capsule. On failure, set an 
-    # exception and return NULL. 
-    # 
-    # The name parameter must compare exactly to the name stored in 
-    # the capsule. If the name stored in the capsule is NULL, the name 
-    # passed in must also be NULL. Python uses the C function strcmp() 
-    # to compare capsule names. 
- 
- 
-    PyCapsule_Destructor PyCapsule_GetDestructor(object capsule) except? NULL 
-    # Return the current destructor stored in the capsule. On failure, 
-    # set an exception and return NULL. 
-    # 
-    # It is legal for a capsule to have a NULL destructor. This makes 
-    # a NULL return code somewhat ambiguous; use PyCapsule_IsValid() 
-    # or PyErr_Occurred() to disambiguate. 
- 
- 
+    # Retrieve the pointer stored in the capsule. On failure, set an
+    # exception and return NULL.
+    #
+    # The name parameter must compare exactly to the name stored in
+    # the capsule. If the name stored in the capsule is NULL, the name
+    # passed in must also be NULL. Python uses the C function strcmp()
+    # to compare capsule names.
+
+
+    PyCapsule_Destructor PyCapsule_GetDestructor(object capsule) except? NULL
+    # Return the current destructor stored in the capsule. On failure,
+    # set an exception and return NULL.
+    #
+    # It is legal for a capsule to have a NULL destructor. This makes
+    # a NULL return code somewhat ambiguous; use PyCapsule_IsValid()
+    # or PyErr_Occurred() to disambiguate.
+
+
     const char* PyCapsule_GetName(object capsule) except? NULL
-    # Return the current name stored in the capsule. On failure, set 
-    # an exception and return NULL. 
-    # 
-    # It is legal for a capsule to have a NULL name. This makes a NULL 
-    # return code somewhat ambiguous; use PyCapsule_IsValid() or 
-    # PyErr_Occurred() to disambiguate. 
- 
- 
-    void* PyCapsule_GetContext(object capsule) except? NULL 
-    # Return the current context stored in the capsule. On failure, 
-    # set an exception and return NULL. 
-    # 
-    # It is legal for a capsule to have a NULL context. This makes a 
-    # NULL return code somewhat ambiguous; use PyCapsule_IsValid() or 
-    # PyErr_Occurred() to disambiguate. 
- 
- 
+    # Return the current name stored in the capsule. On failure, set
+    # an exception and return NULL.
+    #
+    # It is legal for a capsule to have a NULL name. This makes a NULL
+    # return code somewhat ambiguous; use PyCapsule_IsValid() or
+    # PyErr_Occurred() to disambiguate.
+
+
+    void* PyCapsule_GetContext(object capsule) except? NULL
+    # Return the current context stored in the capsule. On failure,
+    # set an exception and return NULL.
+    #
+    # It is legal for a capsule to have a NULL context. This makes a
+    # NULL return code somewhat ambiguous; use PyCapsule_IsValid() or
+    # PyErr_Occurred() to disambiguate.
+
+
     bint PyCapsule_IsValid(object capsule, const char *name)
-    # Determines whether or not capsule is a valid capsule. A valid 
-    # capsule is non-NULL, passes PyCapsule_CheckExact(), has a 
-    # non-NULL pointer stored in it, and its internal name matches the 
-    # name parameter. (See PyCapsule_GetPointer() for information on 
-    # how capsule names are compared.) 
-    # 
-    # In other words, if PyCapsule_IsValid() returns a true value, 
-    # calls to any of the accessors (any function starting with 
-    # PyCapsule_Get()) are guaranteed to succeed. 
-    # 
-    # Return a nonzero value if the object is valid and matches the 
-    # name passed in. Return 0 otherwise. This function will not fail. 
- 
- 
-    int PyCapsule_SetPointer(object capsule, void *pointer) except -1 
-    # Set the void pointer inside capsule to pointer. The pointer may 
-    # not be NULL. 
-    # 
-    # Return 0 on success. Return nonzero and set an exception on 
-    # failure. 
- 
- 
-    int PyCapsule_SetDestructor(object capsule, PyCapsule_Destructor destructor) except -1 
-    # Set the destructor inside capsule to destructor. 
-    # 
-    # Return 0 on success. Return nonzero and set an exception on 
-    # failure. 
- 
- 
+    # Determines whether or not capsule is a valid capsule. A valid
+    # capsule is non-NULL, passes PyCapsule_CheckExact(), has a
+    # non-NULL pointer stored in it, and its internal name matches the
+    # name parameter. (See PyCapsule_GetPointer() for information on
+    # how capsule names are compared.)
+    #
+    # In other words, if PyCapsule_IsValid() returns a true value,
+    # calls to any of the accessors (any function starting with
+    # PyCapsule_Get()) are guaranteed to succeed.
+    #
+    # Return a nonzero value if the object is valid and matches the
+    # name passed in. Return 0 otherwise. This function will not fail.
+
+
+    int PyCapsule_SetPointer(object capsule, void *pointer) except -1
+    # Set the void pointer inside capsule to pointer. The pointer may
+    # not be NULL.
+    #
+    # Return 0 on success. Return nonzero and set an exception on
+    # failure.
+
+
+    int PyCapsule_SetDestructor(object capsule, PyCapsule_Destructor destructor) except -1
+    # Set the destructor inside capsule to destructor.
+    #
+    # Return 0 on success. Return nonzero and set an exception on
+    # failure.
+
+
     int PyCapsule_SetName(object capsule, const char *name) except -1
-    # Set the name inside capsule to name. If non-NULL, the name must 
-    # outlive the capsule. If the previous name stored in the capsule 
-    # was not NULL, no attempt is made to free it. 
-    # 
-    # Return 0 on success. Return nonzero and set an exception on 
-    # failure. 
- 
- 
-    int PyCapsule_SetContext(object capsule, void *context) except -1 
-    # Set the context pointer inside capsule to context.  Return 0 on 
-    # success. Return nonzero and set an exception on failure. 
- 
- 
+    # Set the name inside capsule to name. If non-NULL, the name must
+    # outlive the capsule. If the previous name stored in the capsule
+    # was not NULL, no attempt is made to free it.
+    #
+    # Return 0 on success. Return nonzero and set an exception on
+    # failure.
+
+
+    int PyCapsule_SetContext(object capsule, void *context) except -1
+    # Set the context pointer inside capsule to context.  Return 0 on
+    # success. Return nonzero and set an exception on failure.
+
+
     void* PyCapsule_Import(const char *name, int no_block) except? NULL
-    # Import a pointer to a C object from a capsule attribute in a 
-    # module. The name parameter should specify the full name to the 
-    # attribute, as in module.attribute. The name stored in the 
-    # capsule must match this string exactly. If no_block is true, 
-    # import the module without blocking (using 
-    # PyImport_ImportModuleNoBlock()). If no_block is false, import 
-    # the module conventionally (using PyImport_ImportModule()). 
-    # 
-    # Return the capsule’s internal pointer on success. On failure, 
-    # set an exception and return NULL. However, if PyCapsule_Import() 
-    # failed to import the module, and no_block was true, no exception 
-    # is set. 
- 
+    # Import a pointer to a C object from a capsule attribute in a
+    # module. The name parameter should specify the full name to the
+    # attribute, as in module.attribute. The name stored in the
+    # capsule must match this string exactly. If no_block is true,
+    # import the module without blocking (using
+    # PyImport_ImportModuleNoBlock()). If no_block is false, import
+    # the module conventionally (using PyImport_ImportModule()).
+    #
+    # Return the capsule’s internal pointer on success. On failure,
+    # set an exception and return NULL. However, if PyCapsule_Import()
+    # failed to import the module, and no_block was true, no exception
+    # is set.
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/pystate.pxd b/contrib/tools/cython/Cython/Includes/cpython/pystate.pxd
index 498841f858..1af6307931 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/pystate.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/pystate.pxd
@@ -1,92 +1,92 @@
-# Thread and interpreter state structures and their interfaces 
- 
+# Thread and interpreter state structures and their interfaces
+
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
+
+cdef extern from "Python.h":
+
     # We make these an opaque types. If the user wants specific attributes,
-    # they can be declared manually. 
- 
+    # they can be declared manually.
+
     ctypedef long PY_INT64_T  # FIXME: Py2.7+, not defined here but used here
 
-    ctypedef struct PyInterpreterState: 
-        pass 
- 
-    ctypedef struct PyThreadState: 
-        pass 
- 
-    ctypedef struct PyFrameObject: 
-        pass 
- 
-    # This is not actually a struct, but make sure it can never be coerced to 
-    # an int or used in arithmetic expressions 
+    ctypedef struct PyInterpreterState:
+        pass
+
+    ctypedef struct PyThreadState:
+        pass
+
+    ctypedef struct PyFrameObject:
+        pass
+
+    # This is not actually a struct, but make sure it can never be coerced to
+    # an int or used in arithmetic expressions
     ctypedef struct PyGILState_STATE:
         pass
- 
-    # The type of the trace function registered using PyEval_SetProfile() and 
-    # PyEval_SetTrace(). 
-    # Py_tracefunc return -1 when raising an exception, or 0 for success. 
-    ctypedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *) 
- 
-    # The following values are used for 'what' for tracefunc functions 
-    enum: 
-        PyTrace_CALL 
-        PyTrace_EXCEPTION 
-        PyTrace_LINE 
-        PyTrace_RETURN 
-        PyTrace_C_CALL 
-        PyTrace_C_EXCEPTION 
-        PyTrace_C_RETURN 
- 
- 
-    PyInterpreterState * PyInterpreterState_New() 
-    void PyInterpreterState_Clear(PyInterpreterState *) 
-    void PyInterpreterState_Delete(PyInterpreterState *) 
+
+    # The type of the trace function registered using PyEval_SetProfile() and
+    # PyEval_SetTrace().
+    # Py_tracefunc return -1 when raising an exception, or 0 for success.
+    ctypedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *)
+
+    # The following values are used for 'what' for tracefunc functions
+    enum:
+        PyTrace_CALL
+        PyTrace_EXCEPTION
+        PyTrace_LINE
+        PyTrace_RETURN
+        PyTrace_C_CALL
+        PyTrace_C_EXCEPTION
+        PyTrace_C_RETURN
+
+
+    PyInterpreterState * PyInterpreterState_New()
+    void PyInterpreterState_Clear(PyInterpreterState *)
+    void PyInterpreterState_Delete(PyInterpreterState *)
     PY_INT64_T PyInterpreterState_GetID(PyInterpreterState *)
- 
-    PyThreadState * PyThreadState_New(PyInterpreterState *) 
-    void PyThreadState_Clear(PyThreadState *) 
-    void PyThreadState_Delete(PyThreadState *) 
- 
-    PyThreadState * PyThreadState_Get() 
+
+    PyThreadState * PyThreadState_New(PyInterpreterState *)
+    void PyThreadState_Clear(PyThreadState *)
+    void PyThreadState_Delete(PyThreadState *)
+
+    PyThreadState * PyThreadState_Get()
     PyThreadState * PyThreadState_Swap(PyThreadState *)  # NOTE: DO NOT USE IN CYTHON CODE !
-    PyObject * PyThreadState_GetDict() 
-    int PyThreadState_SetAsyncExc(long, PyObject *) 
- 
-    # Ensure that the current thread is ready to call the Python 
-    # C API, regardless of the current state of Python, or of its 
-    # thread lock.  This may be called as many times as desired 
-    # by a thread so long as each call is matched with a call to 
-    # PyGILState_Release().  In general, other thread-state APIs may 
-    # be used between _Ensure() and _Release() calls, so long as the 
-    # thread-state is restored to its previous state before the Release(). 
-    # For example, normal use of the Py_BEGIN_ALLOW_THREADS/ 
-    # Py_END_ALLOW_THREADS macros are acceptable. 
- 
-    # The return value is an opaque "handle" to the thread state when 
-    # PyGILState_Ensure() was called, and must be passed to 
-    # PyGILState_Release() to ensure Python is left in the same state. Even 
-    # though recursive calls are allowed, these handles can *not* be shared - 
-    # each unique call to PyGILState_Ensure must save the handle for its 
-    # call to PyGILState_Release. 
- 
-    # When the function returns, the current thread will hold the GIL. 
- 
-    # Failure is a fatal error. 
-    PyGILState_STATE PyGILState_Ensure() 
- 
-    # Release any resources previously acquired.  After this call, Python's 
-    # state will be the same as it was prior to the corresponding 
-    # PyGILState_Ensure() call (but generally this state will be unknown to 
-    # the caller, hence the use of the GILState API.) 
- 
-    # Every call to PyGILState_Ensure must be matched by a call to 
-    # PyGILState_Release on the same thread. 
-    void PyGILState_Release(PyGILState_STATE) 
- 
-    # Routines for advanced debuggers, requested by David Beazley. 
-    # Don't use unless you know what you are doing! 
-    PyInterpreterState * PyInterpreterState_Head() 
-    PyInterpreterState * PyInterpreterState_Next(PyInterpreterState *) 
-    PyThreadState * PyInterpreterState_ThreadHead(PyInterpreterState *) 
-    PyThreadState * PyThreadState_Next(PyThreadState *) 
+    PyObject * PyThreadState_GetDict()
+    int PyThreadState_SetAsyncExc(long, PyObject *)
+
+    # Ensure that the current thread is ready to call the Python
+    # C API, regardless of the current state of Python, or of its
+    # thread lock.  This may be called as many times as desired
+    # by a thread so long as each call is matched with a call to
+    # PyGILState_Release().  In general, other thread-state APIs may
+    # be used between _Ensure() and _Release() calls, so long as the
+    # thread-state is restored to its previous state before the Release().
+    # For example, normal use of the Py_BEGIN_ALLOW_THREADS/
+    # Py_END_ALLOW_THREADS macros are acceptable.
+
+    # The return value is an opaque "handle" to the thread state when
+    # PyGILState_Ensure() was called, and must be passed to
+    # PyGILState_Release() to ensure Python is left in the same state. Even
+    # though recursive calls are allowed, these handles can *not* be shared -
+    # each unique call to PyGILState_Ensure must save the handle for its
+    # call to PyGILState_Release.
+
+    # When the function returns, the current thread will hold the GIL.
+
+    # Failure is a fatal error.
+    PyGILState_STATE PyGILState_Ensure()
+
+    # Release any resources previously acquired.  After this call, Python's
+    # state will be the same as it was prior to the corresponding
+    # PyGILState_Ensure() call (but generally this state will be unknown to
+    # the caller, hence the use of the GILState API.)
+
+    # Every call to PyGILState_Ensure must be matched by a call to
+    # PyGILState_Release on the same thread.
+    void PyGILState_Release(PyGILState_STATE)
+
+    # Routines for advanced debuggers, requested by David Beazley.
+    # Don't use unless you know what you are doing!
+    PyInterpreterState * PyInterpreterState_Head()
+    PyInterpreterState * PyInterpreterState_Next(PyInterpreterState *)
+    PyThreadState * PyInterpreterState_ThreadHead(PyInterpreterState *)
+    PyThreadState * PyThreadState_Next(PyThreadState *)
diff --git a/contrib/tools/cython/Cython/Includes/cpython/pythread.pxd b/contrib/tools/cython/Cython/Includes/cpython/pythread.pxd
index 67f48747c4..392bef7d64 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/pythread.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/pythread.pxd
@@ -1,45 +1,45 @@
- 
- 
-cdef extern from "pythread.h": 
- 
-    ctypedef void *PyThread_type_lock 
-    ctypedef void *PyThread_type_sema 
- 
-    void PyThread_init_thread() 
+
+
+cdef extern from "pythread.h":
+
+    ctypedef void *PyThread_type_lock
+    ctypedef void *PyThread_type_sema
+
+    void PyThread_init_thread()
     long PyThread_start_new_thread(void (*)(void *), void *)  # FIXME: legacy
     #unsigned long PyThread_start_new_thread(void (*)(void *), void *)  # returned 'long' before Py3.7
-    void PyThread_exit_thread() 
+    void PyThread_exit_thread()
     long PyThread_get_thread_ident()  # FIXME: legacy
     #unsigned long PyThread_get_thread_ident()  # returned 'long' before Py3.7
- 
-    PyThread_type_lock PyThread_allocate_lock() 
-    void PyThread_free_lock(PyThread_type_lock) 
-    int PyThread_acquire_lock(PyThread_type_lock, int mode) nogil 
-    void PyThread_release_lock(PyThread_type_lock) nogil 
- 
-    enum: 
-        # 'mode' in PyThread_acquire_lock() 
-        WAIT_LOCK    #   1 
-        NOWAIT_LOCK  #   0 
- 
-    ctypedef enum PyLockStatus: 
-        # return values of PyThread_acquire_lock() in CPython 3.2+ 
-        PY_LOCK_FAILURE = 0 
-        PY_LOCK_ACQUIRED = 1 
-        PY_LOCK_INTR 
- 
-    size_t PyThread_get_stacksize() 
-    int PyThread_set_stacksize(size_t) 
- 
+
+    PyThread_type_lock PyThread_allocate_lock()
+    void PyThread_free_lock(PyThread_type_lock)
+    int PyThread_acquire_lock(PyThread_type_lock, int mode) nogil
+    void PyThread_release_lock(PyThread_type_lock) nogil
+
+    enum:
+        # 'mode' in PyThread_acquire_lock()
+        WAIT_LOCK    #   1
+        NOWAIT_LOCK  #   0
+
+    ctypedef enum PyLockStatus:
+        # return values of PyThread_acquire_lock() in CPython 3.2+
+        PY_LOCK_FAILURE = 0
+        PY_LOCK_ACQUIRED = 1
+        PY_LOCK_INTR
+
+    size_t PyThread_get_stacksize()
+    int PyThread_set_stacksize(size_t)
+
     # Thread Local Storage (TLS) API deprecated in CPython 3.7+
-    int PyThread_create_key() 
-    void PyThread_delete_key(int) 
-    int PyThread_set_key_value(int, void *) 
-    void * PyThread_get_key_value(int) 
-    void PyThread_delete_key_value(int key) 
- 
-    # Cleanup after a fork 
-    void PyThread_ReInitTLS() 
+    int PyThread_create_key()
+    void PyThread_delete_key(int)
+    int PyThread_set_key_value(int, void *)
+    void * PyThread_get_key_value(int)
+    void PyThread_delete_key_value(int key)
+
+    # Cleanup after a fork
+    void PyThread_ReInitTLS()
 
     # Thread Specific Storage (TSS) API in CPython 3.7+ (also backported)
     #ctypedef struct Py_tss_t: pass   # Cython built-in type
diff --git a/contrib/tools/cython/Cython/Includes/cpython/ref.pxd b/contrib/tools/cython/Cython/Includes/cpython/ref.pxd
index 84ee788bc8..4bc9a7d7c8 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/ref.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/ref.pxd
@@ -1,51 +1,51 @@
 from .object cimport PyObject, PyTypeObject, Py_TYPE  # legacy imports for re-export
 
-cdef extern from "Python.h": 
-    ##################################################################### 
-    # 3. Reference Counts 
-    ##################################################################### 
-    # The macros in this section are used for managing reference counts of Python objects. 
-    void Py_INCREF(object o) 
-    # Increment the reference count for object o. The object must not 
-    # be NULL; if you aren't sure that it isn't NULL, use 
-    # Py_XINCREF(). 
- 
-    void Py_XINCREF(PyObject* o) 
-    # Increment the reference count for object o. The object may be NULL, in which case the macro has no effect. 
- 
-    void Py_DECREF(object o) 
-    # Decrement the reference count for object o. The object must not 
-    # be NULL; if you aren't sure that it isn't NULL, use 
-    # Py_XDECREF(). If the reference count reaches zero, the object's 
-    # type's deallocation function (which must not be NULL) is 
-    # invoked. 
- 
-    # Warning: The deallocation function can cause arbitrary Python 
-    # code to be invoked (e.g. when a class instance with a __del__() 
-    # method is deallocated). While exceptions in such code are not 
-    # propagated, the executed code has free access to all Python 
-    # global variables. This means that any object that is reachable 
-    # from a global variable should be in a consistent state before 
-    # Py_DECREF() is invoked. For example, code to delete an object 
-    # from a list should copy a reference to the deleted object in a 
-    # temporary variable, update the list data structure, and then 
-    # call Py_DECREF() for the temporary variable. 
- 
-    void Py_XDECREF(PyObject* o) 
-    # Decrement the reference count for object o. The object may be 
-    # NULL, in which case the macro has no effect; otherwise the 
-    # effect is the same as for Py_DECREF(), and the same warning 
-    # applies. 
- 
-    void Py_CLEAR(PyObject* o) 
-    # Decrement the reference count for object o. The object may be 
-    # NULL, in which case the macro has no effect; otherwise the 
-    # effect is the same as for Py_DECREF(), except that the argument 
-    # is also set to NULL. The warning for Py_DECREF() does not apply 
-    # with respect to the object passed because the macro carefully 
-    # uses a temporary variable and sets the argument to NULL before 
-    # decrementing its reference count. 
-    # It is a good idea to use this macro whenever decrementing the 
-    # value of a variable that might be traversed during garbage 
-    # collection. 
- 
+cdef extern from "Python.h":
+    #####################################################################
+    # 3. Reference Counts
+    #####################################################################
+    # The macros in this section are used for managing reference counts of Python objects.
+    void Py_INCREF(object o)
+    # Increment the reference count for object o. The object must not
+    # be NULL; if you aren't sure that it isn't NULL, use
+    # Py_XINCREF().
+
+    void Py_XINCREF(PyObject* o)
+    # Increment the reference count for object o. The object may be NULL, in which case the macro has no effect.
+
+    void Py_DECREF(object o)
+    # Decrement the reference count for object o. The object must not
+    # be NULL; if you aren't sure that it isn't NULL, use
+    # Py_XDECREF(). If the reference count reaches zero, the object's
+    # type's deallocation function (which must not be NULL) is
+    # invoked.
+
+    # Warning: The deallocation function can cause arbitrary Python
+    # code to be invoked (e.g. when a class instance with a __del__()
+    # method is deallocated). While exceptions in such code are not
+    # propagated, the executed code has free access to all Python
+    # global variables. This means that any object that is reachable
+    # from a global variable should be in a consistent state before
+    # Py_DECREF() is invoked. For example, code to delete an object
+    # from a list should copy a reference to the deleted object in a
+    # temporary variable, update the list data structure, and then
+    # call Py_DECREF() for the temporary variable.
+
+    void Py_XDECREF(PyObject* o)
+    # Decrement the reference count for object o. The object may be
+    # NULL, in which case the macro has no effect; otherwise the
+    # effect is the same as for Py_DECREF(), and the same warning
+    # applies.
+
+    void Py_CLEAR(PyObject* o)
+    # Decrement the reference count for object o. The object may be
+    # NULL, in which case the macro has no effect; otherwise the
+    # effect is the same as for Py_DECREF(), except that the argument
+    # is also set to NULL. The warning for Py_DECREF() does not apply
+    # with respect to the object passed because the macro carefully
+    # uses a temporary variable and sets the argument to NULL before
+    # decrementing its reference count.
+    # It is a good idea to use this macro whenever decrementing the
+    # value of a variable that might be traversed during garbage
+    # collection.
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/sequence.pxd b/contrib/tools/cython/Cython/Includes/cpython/sequence.pxd
index 14f9c8f29f..eb279968d2 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/sequence.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/sequence.pxd
@@ -1,136 +1,136 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 6.3 Sequence Protocol 
-    ############################################################################ 
- 
-    bint PySequence_Check(object o) 
-    # Return 1 if the object provides sequence protocol, and 0 
-    # otherwise. This function always succeeds. 
- 
-    Py_ssize_t PySequence_Size(object o) except -1 
-    # Returns the number of objects in sequence o on success, and -1 
-    # on failure. For objects that do not provide sequence protocol, 
-    # this is equivalent to the Python expression "len(o)". 
- 
-    Py_ssize_t PySequence_Length(object o) except -1 
-    # Alternate name for PySequence_Size(). 
- 
-    object PySequence_Concat(object o1, object o2) 
-    # Return value: New reference. 
-    # Return the concatenation of o1 and o2 on success, and NULL on 
-    # failure. This is the equivalent of the Python expression "o1 + 
-    # o2". 
- 
-    object PySequence_Repeat(object o, Py_ssize_t count) 
-    # Return value: New reference. 
-    # Return the result of repeating sequence object o count times, or 
-    # NULL on failure. This is the equivalent of the Python expression 
-    # "o * count". 
- 
-    object PySequence_InPlaceConcat(object o1, object o2) 
-    # Return value: New reference. 
-    # Return the concatenation of o1 and o2 on success, and NULL on 
-    # failure. The operation is done in-place when o1 supports 
-    # it. This is the equivalent of the Python expression "o1 += o2". 
- 
-    object PySequence_InPlaceRepeat(object o, Py_ssize_t count) 
-    # Return value: New reference. 
-    # Return the result of repeating sequence object o count times, or 
-    # NULL on failure. The operation is done in-place when o supports 
-    # it. This is the equivalent of the Python expression "o *= 
-    # count". 
- 
-    object PySequence_GetItem(object o, Py_ssize_t i) 
-    # Return value: New reference. 
-    # Return the ith element of o, or NULL on failure. This is the 
-    # equivalent of the Python expression "o[i]". 
- 
-    object PySequence_GetSlice(object o, Py_ssize_t i1, Py_ssize_t i2) 
-    # Return value: New reference. 
-    # Return the slice of sequence object o between i1 and i2, or NULL 
-    # on failure. This is the equivalent of the Python expression 
-    # "o[i1:i2]". 
- 
-    int PySequence_SetItem(object o, Py_ssize_t i, object v) except -1 
-    # Assign object v to the ith element of o. Returns -1 on 
-    # failure. This is the equivalent of the Python statement "o[i] = 
-    # v". This function does not steal a reference to v. 
- 
-    int PySequence_DelItem(object o, Py_ssize_t i) except -1 
-    # Delete the ith element of object o. Returns -1 on failure. This 
-    # is the equivalent of the Python statement "del o[i]". 
- 
-    int PySequence_SetSlice(object o, Py_ssize_t i1, Py_ssize_t i2, object v) except -1 
-    # Assign the sequence object v to the slice in sequence object o 
-    # from i1 to i2. This is the equivalent of the Python statement 
-    # "o[i1:i2] = v". 
- 
-    int PySequence_DelSlice(object o, Py_ssize_t i1, Py_ssize_t i2) except -1 
-    # Delete the slice in sequence object o from i1 to i2. Returns -1 
-    # on failure. This is the equivalent of the Python statement "del 
-    # o[i1:i2]". 
- 
-    int PySequence_Count(object o, object value) except -1 
-    # Return the number of occurrences of value in o, that is, return 
-    # the number of keys for which o[key] == value. On failure, return 
-    # -1. This is equivalent to the Python expression 
-    # "o.count(value)". 
- 
-    int PySequence_Contains(object o, object value) except -1 
-    # Determine if o contains value. If an item in o is equal to 
-    # value, return 1, otherwise return 0. On error, return -1. This 
-    # is equivalent to the Python expression "value in o". 
- 
-    Py_ssize_t PySequence_Index(object o, object value) except -1 
-    # Return the first index i for which o[i] == value. On error, 
-    # return -1. This is equivalent to the Python expression 
-    # "o.index(value)". 
- 
-    object PySequence_List(object o) 
-    # Return value: New reference. 
-    # Return a list object with the same contents as the arbitrary 
-    # sequence o. The returned list is guaranteed to be new. 
- 
-    object PySequence_Tuple(object o) 
-    # Return value: New reference. 
-    # Return a tuple object with the same contents as the arbitrary 
-    # sequence o or NULL on failure. If o is a tuple, a new reference 
-    # will be returned, otherwise a tuple will be constructed with the 
-    # appropriate contents. This is equivalent to the Python 
-    # expression "tuple(o)". 
- 
-    object PySequence_Fast(object o, char *m) 
-    # Return value: New reference. 
-    # Returns the sequence o as a tuple, unless it is already a tuple 
-    # or list, in which case o is returned. Use 
-    # PySequence_Fast_GET_ITEM() to access the members of the 
-    # result. Returns NULL on failure. If the object is not a 
-    # sequence, raises TypeError with m as the message text. 
- 
-    PyObject* PySequence_Fast_GET_ITEM(object o, Py_ssize_t i) 
-    # Return value: Borrowed reference. 
-    # Return the ith element of o, assuming that o was returned by 
-    # PySequence_Fast(), o is not NULL, and that i is within bounds. 
- 
-    PyObject** PySequence_Fast_ITEMS(object o) 
-    # Return the underlying array of PyObject pointers. Assumes that o 
-    # was returned by PySequence_Fast() and o is not NULL. 
- 
-    object PySequence_ITEM(object o, Py_ssize_t i) 
-    # Return value: New reference. 
-    # Return the ith element of o or NULL on failure. Macro form of 
-    # PySequence_GetItem() but without checking that 
-    # PySequence_Check(o) is true and without adjustment for negative 
-    # indices. 
- 
-    Py_ssize_t PySequence_Fast_GET_SIZE(object o) 
-    # Returns the length of o, assuming that o was returned by 
-    # PySequence_Fast() and that o is not NULL. The size can also be 
-    # gotten by calling PySequence_Size() on o, but 
-    # PySequence_Fast_GET_SIZE() is faster because it can assume o is 
-    # a list or tuple. 
- 
- 
+
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 6.3 Sequence Protocol
+    ############################################################################
+
+    bint PySequence_Check(object o)
+    # Return 1 if the object provides sequence protocol, and 0
+    # otherwise. This function always succeeds.
+
+    Py_ssize_t PySequence_Size(object o) except -1
+    # Returns the number of objects in sequence o on success, and -1
+    # on failure. For objects that do not provide sequence protocol,
+    # this is equivalent to the Python expression "len(o)".
+
+    Py_ssize_t PySequence_Length(object o) except -1
+    # Alternate name for PySequence_Size().
+
+    object PySequence_Concat(object o1, object o2)
+    # Return value: New reference.
+    # Return the concatenation of o1 and o2 on success, and NULL on
+    # failure. This is the equivalent of the Python expression "o1 +
+    # o2".
+
+    object PySequence_Repeat(object o, Py_ssize_t count)
+    # Return value: New reference.
+    # Return the result of repeating sequence object o count times, or
+    # NULL on failure. This is the equivalent of the Python expression
+    # "o * count".
+
+    object PySequence_InPlaceConcat(object o1, object o2)
+    # Return value: New reference.
+    # Return the concatenation of o1 and o2 on success, and NULL on
+    # failure. The operation is done in-place when o1 supports
+    # it. This is the equivalent of the Python expression "o1 += o2".
+
+    object PySequence_InPlaceRepeat(object o, Py_ssize_t count)
+    # Return value: New reference.
+    # Return the result of repeating sequence object o count times, or
+    # NULL on failure. The operation is done in-place when o supports
+    # it. This is the equivalent of the Python expression "o *=
+    # count".
+
+    object PySequence_GetItem(object o, Py_ssize_t i)
+    # Return value: New reference.
+    # Return the ith element of o, or NULL on failure. This is the
+    # equivalent of the Python expression "o[i]".
+
+    object PySequence_GetSlice(object o, Py_ssize_t i1, Py_ssize_t i2)
+    # Return value: New reference.
+    # Return the slice of sequence object o between i1 and i2, or NULL
+    # on failure. This is the equivalent of the Python expression
+    # "o[i1:i2]".
+
+    int PySequence_SetItem(object o, Py_ssize_t i, object v) except -1
+    # Assign object v to the ith element of o. Returns -1 on
+    # failure. This is the equivalent of the Python statement "o[i] =
+    # v". This function does not steal a reference to v.
+
+    int PySequence_DelItem(object o, Py_ssize_t i) except -1
+    # Delete the ith element of object o. Returns -1 on failure. This
+    # is the equivalent of the Python statement "del o[i]".
+
+    int PySequence_SetSlice(object o, Py_ssize_t i1, Py_ssize_t i2, object v) except -1
+    # Assign the sequence object v to the slice in sequence object o
+    # from i1 to i2. This is the equivalent of the Python statement
+    # "o[i1:i2] = v".
+
+    int PySequence_DelSlice(object o, Py_ssize_t i1, Py_ssize_t i2) except -1
+    # Delete the slice in sequence object o from i1 to i2. Returns -1
+    # on failure. This is the equivalent of the Python statement "del
+    # o[i1:i2]".
+
+    int PySequence_Count(object o, object value) except -1
+    # Return the number of occurrences of value in o, that is, return
+    # the number of keys for which o[key] == value. On failure, return
+    # -1. This is equivalent to the Python expression
+    # "o.count(value)".
+
+    int PySequence_Contains(object o, object value) except -1
+    # Determine if o contains value. If an item in o is equal to
+    # value, return 1, otherwise return 0. On error, return -1. This
+    # is equivalent to the Python expression "value in o".
+
+    Py_ssize_t PySequence_Index(object o, object value) except -1
+    # Return the first index i for which o[i] == value. On error,
+    # return -1. This is equivalent to the Python expression
+    # "o.index(value)".
+
+    object PySequence_List(object o)
+    # Return value: New reference.
+    # Return a list object with the same contents as the arbitrary
+    # sequence o. The returned list is guaranteed to be new.
+
+    object PySequence_Tuple(object o)
+    # Return value: New reference.
+    # Return a tuple object with the same contents as the arbitrary
+    # sequence o or NULL on failure. If o is a tuple, a new reference
+    # will be returned, otherwise a tuple will be constructed with the
+    # appropriate contents. This is equivalent to the Python
+    # expression "tuple(o)".
+
+    object PySequence_Fast(object o, char *m)
+    # Return value: New reference.
+    # Returns the sequence o as a tuple, unless it is already a tuple
+    # or list, in which case o is returned. Use
+    # PySequence_Fast_GET_ITEM() to access the members of the
+    # result. Returns NULL on failure. If the object is not a
+    # sequence, raises TypeError with m as the message text.
+
+    PyObject* PySequence_Fast_GET_ITEM(object o, Py_ssize_t i)
+    # Return value: Borrowed reference.
+    # Return the ith element of o, assuming that o was returned by
+    # PySequence_Fast(), o is not NULL, and that i is within bounds.
+
+    PyObject** PySequence_Fast_ITEMS(object o)
+    # Return the underlying array of PyObject pointers. Assumes that o
+    # was returned by PySequence_Fast() and o is not NULL.
+
+    object PySequence_ITEM(object o, Py_ssize_t i)
+    # Return value: New reference.
+    # Return the ith element of o or NULL on failure. Macro form of
+    # PySequence_GetItem() but without checking that
+    # PySequence_Check(o) is true and without adjustment for negative
+    # indices.
+
+    Py_ssize_t PySequence_Fast_GET_SIZE(object o)
+    # Returns the length of o, assuming that o was returned by
+    # PySequence_Fast() and that o is not NULL. The size can also be
+    # gotten by calling PySequence_Size() on o, but
+    # PySequence_Fast_GET_SIZE() is faster because it can assume o is
+    # a list or tuple.
+
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/set.pxd b/contrib/tools/cython/Cython/Includes/cpython/set.pxd
index 2a196bec10..ae31d28ae3 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/set.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/set.pxd
@@ -1,119 +1,119 @@
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # 7.5.14 Set Objects 
-    ############################################################################ 
- 
-    # This section details the public API for set and frozenset 
-    # objects. Any functionality not listed below is best accessed 
-    # using the either the abstract object protocol (including 
-    # PyObject_CallMethod(), PyObject_RichCompareBool(), 
-    # PyObject_Hash(), PyObject_Repr(), PyObject_IsTrue(), 
-    # PyObject_Print(), and PyObject_GetIter()) or the abstract number 
-    # protocol (including PyNumber_Add(), PyNumber_Subtract(), 
-    # PyNumber_Or(), PyNumber_Xor(), PyNumber_InPlaceAdd(), 
-    # PyNumber_InPlaceSubtract(), PyNumber_InPlaceOr(), and 
-    # PyNumber_InPlaceXor()). 
- 
-    # PySetObject 
-    # This subtype of PyObject is used to hold the internal data for 
-    # both set and frozenset objects. It is like a PyDictObject in 
-    # that it is a fixed size for small sets (much like tuple storage) 
-    # and will point to a separate, variable sized block of memory for 
-    # medium and large sized sets (much like list storage). None of 
-    # the fields of this structure should be considered public and are 
-    # subject to change. All access should be done through the 
-    # documented API rather than by manipulating the values in the 
-    # structure. 
- 
-    # PyTypeObject PySet_Type 
-    # This is an instance of PyTypeObject representing the Python set type. 
- 
-    # PyTypeObject PyFrozenSet_Type 
-    # This is an instance of PyTypeObject representing the Python frozenset type. 
- 
-    # The following type check macros work on pointers to any Python 
-    # object. Likewise, the constructor functions work with any 
-    # iterable Python object. 
- 
-    bint PyAnySet_Check(object p) 
-    # Return true if p is a set object, a frozenset object, or an 
-    # instance of a subtype. 
- 
-    bint PyAnySet_CheckExact(object p) 
-    # Return true if p is a set object or a frozenset object but not 
-    # an instance of a subtype. 
- 
+cdef extern from "Python.h":
+
+    ############################################################################
+    # 7.5.14 Set Objects
+    ############################################################################
+
+    # This section details the public API for set and frozenset
+    # objects. Any functionality not listed below is best accessed
+    # using the either the abstract object protocol (including
+    # PyObject_CallMethod(), PyObject_RichCompareBool(),
+    # PyObject_Hash(), PyObject_Repr(), PyObject_IsTrue(),
+    # PyObject_Print(), and PyObject_GetIter()) or the abstract number
+    # protocol (including PyNumber_Add(), PyNumber_Subtract(),
+    # PyNumber_Or(), PyNumber_Xor(), PyNumber_InPlaceAdd(),
+    # PyNumber_InPlaceSubtract(), PyNumber_InPlaceOr(), and
+    # PyNumber_InPlaceXor()).
+
+    # PySetObject
+    # This subtype of PyObject is used to hold the internal data for
+    # both set and frozenset objects. It is like a PyDictObject in
+    # that it is a fixed size for small sets (much like tuple storage)
+    # and will point to a separate, variable sized block of memory for
+    # medium and large sized sets (much like list storage). None of
+    # the fields of this structure should be considered public and are
+    # subject to change. All access should be done through the
+    # documented API rather than by manipulating the values in the
+    # structure.
+
+    # PyTypeObject PySet_Type
+    # This is an instance of PyTypeObject representing the Python set type.
+
+    # PyTypeObject PyFrozenSet_Type
+    # This is an instance of PyTypeObject representing the Python frozenset type.
+
+    # The following type check macros work on pointers to any Python
+    # object. Likewise, the constructor functions work with any
+    # iterable Python object.
+
+    bint PyAnySet_Check(object p)
+    # Return true if p is a set object, a frozenset object, or an
+    # instance of a subtype.
+
+    bint PyAnySet_CheckExact(object p)
+    # Return true if p is a set object or a frozenset object but not
+    # an instance of a subtype.
+
     bint PyFrozenSet_Check(object p)
     # Return true if p is a frozenset object or an instance of a subtype.
 
-    bint PyFrozenSet_CheckExact(object p) 
-    # Return true if p is a frozenset object but not an instance of a subtype. 
- 
+    bint PyFrozenSet_CheckExact(object p)
+    # Return true if p is a frozenset object but not an instance of a subtype.
+
     bint PySet_Check(object p)
     # Return true if p is a set object or an instance of a subtype.
 
-    object PySet_New(object iterable) 
-    # Return value: New reference. 
-    # Return a new set containing objects returned by the 
-    # iterable. The iterable may be NULL to create a new empty 
-    # set. Return the new set on success or NULL on failure. Raise 
-    # TypeError if iterable is not actually iterable. The constructor 
-    # is also useful for copying a set (c=set(s)). 
- 
-    object PyFrozenSet_New(object iterable) 
-    # Return value: New reference. 
-    # Return a new frozenset containing objects returned by the 
-    # iterable. The iterable may be NULL to create a new empty 
-    # frozenset. Return the new set on success or NULL on 
-    # failure. Raise TypeError if iterable is not actually iterable. 
- 
- 
-    # The following functions and macros are available for instances 
-    # of set or frozenset or instances of their subtypes. 
- 
-    Py_ssize_t PySet_Size(object anyset) except -1 
-    # Return the length of a set or frozenset object. Equivalent to 
-    # "len(anyset)". Raises a PyExc_SystemError if anyset is not a 
-    # set, frozenset, or an instance of a subtype. 
- 
-    Py_ssize_t PySet_GET_SIZE(object anyset) 
-    # Macro form of PySet_Size() without error checking. 
- 
-    bint PySet_Contains(object anyset, object key) except -1 
-    # Return 1 if found, 0 if not found, and -1 if an error is 
-    # encountered. Unlike the Python __contains__() method, this 
-    # function does not automatically convert unhashable sets into 
-    # temporary frozensets. Raise a TypeError if the key is 
-    # unhashable. Raise PyExc_SystemError if anyset is not a set, 
-    # frozenset, or an instance of a subtype. 
- 
- 
-    # The following functions are available for instances of set or 
-    # its subtypes but not for instances of frozenset or its subtypes. 
- 
-    int PySet_Add(object set, object key) except -1 
-    # Add key to a set instance. Does not apply to frozenset 
-    # instances. Return 0 on success or -1 on failure. Raise a 
-    # TypeError if the key is unhashable. Raise a MemoryError if there 
-    # is no room to grow. Raise a SystemError if set is an not an 
-    # instance of set or its subtype. 
- 
-    bint PySet_Discard(object set, object key) except -1 
-    # Return 1 if found and removed, 0 if not found (no action taken), 
-    # and -1 if an error is encountered. Does not raise KeyError for 
-    # missing keys. Raise a TypeError if the key is unhashable. Unlike 
-    # the Python discard() method, this function does not 
-    # automatically convert unhashable sets into temporary 
-    # frozensets. Raise PyExc_SystemError if set is an not an instance 
-    # of set or its subtype. 
- 
-    object PySet_Pop(object set) 
-    # Return value: New reference. 
-    # Return a new reference to an arbitrary object in the set, and 
-    # removes the object from the set. Return NULL on failure. Raise 
-    # KeyError if the set is empty. Raise a SystemError if set is an 
-    # not an instance of set or its subtype. 
- 
-    int PySet_Clear(object set) 
-    # Empty an existing set of all elements. 
+    object PySet_New(object iterable)
+    # Return value: New reference.
+    # Return a new set containing objects returned by the
+    # iterable. The iterable may be NULL to create a new empty
+    # set. Return the new set on success or NULL on failure. Raise
+    # TypeError if iterable is not actually iterable. The constructor
+    # is also useful for copying a set (c=set(s)).
+
+    object PyFrozenSet_New(object iterable)
+    # Return value: New reference.
+    # Return a new frozenset containing objects returned by the
+    # iterable. The iterable may be NULL to create a new empty
+    # frozenset. Return the new set on success or NULL on
+    # failure. Raise TypeError if iterable is not actually iterable.
+
+
+    # The following functions and macros are available for instances
+    # of set or frozenset or instances of their subtypes.
+
+    Py_ssize_t PySet_Size(object anyset) except -1
+    # Return the length of a set or frozenset object. Equivalent to
+    # "len(anyset)". Raises a PyExc_SystemError if anyset is not a
+    # set, frozenset, or an instance of a subtype.
+
+    Py_ssize_t PySet_GET_SIZE(object anyset)
+    # Macro form of PySet_Size() without error checking.
+
+    bint PySet_Contains(object anyset, object key) except -1
+    # Return 1 if found, 0 if not found, and -1 if an error is
+    # encountered. Unlike the Python __contains__() method, this
+    # function does not automatically convert unhashable sets into
+    # temporary frozensets. Raise a TypeError if the key is
+    # unhashable. Raise PyExc_SystemError if anyset is not a set,
+    # frozenset, or an instance of a subtype.
+
+
+    # The following functions are available for instances of set or
+    # its subtypes but not for instances of frozenset or its subtypes.
+
+    int PySet_Add(object set, object key) except -1
+    # Add key to a set instance. Does not apply to frozenset
+    # instances. Return 0 on success or -1 on failure. Raise a
+    # TypeError if the key is unhashable. Raise a MemoryError if there
+    # is no room to grow. Raise a SystemError if set is an not an
+    # instance of set or its subtype.
+
+    bint PySet_Discard(object set, object key) except -1
+    # Return 1 if found and removed, 0 if not found (no action taken),
+    # and -1 if an error is encountered. Does not raise KeyError for
+    # missing keys. Raise a TypeError if the key is unhashable. Unlike
+    # the Python discard() method, this function does not
+    # automatically convert unhashable sets into temporary
+    # frozensets. Raise PyExc_SystemError if set is an not an instance
+    # of set or its subtype.
+
+    object PySet_Pop(object set)
+    # Return value: New reference.
+    # Return a new reference to an arbitrary object in the set, and
+    # removes the object from the set. Return NULL on failure. Raise
+    # KeyError if the set is empty. Raise a SystemError if set is an
+    # not an instance of set or its subtype.
+
+    int PySet_Clear(object set)
+    # Empty an existing set of all elements.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/string.pxd b/contrib/tools/cython/Cython/Includes/cpython/string.pxd
index 19fcfd3452..8af78f3dde 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/string.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/string.pxd
@@ -1,198 +1,198 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
-    ctypedef struct va_list 
- 
-    ############################################################################ 
-    # 7.3.1 String Objects 
-    ############################################################################ 
- 
-    # These functions raise TypeError when expecting a string 
-    # parameter and are called with a non-string parameter. 
-    # PyStringObject 
-    # This subtype of PyObject represents a Python string object. 
-    # PyTypeObject PyString_Type 
-    # This instance of PyTypeObject represents the Python string type; 
-    # it is the same object as str and types.StringType in the Python 
-    # layer. 
- 
-    bint PyString_Check(object o) 
-    # Return true if the object o is a string object or an instance of 
-    # a subtype of the string type. 
- 
-    bint PyString_CheckExact(object o) 
-    # Return true if the object o is a string object, but not an instance of a subtype of the string type. 
- 
-    object PyString_FromString(char *v) 
-    # Return value: New reference. 
-    # Return a new string object with the value v on success, and NULL 
-    # on failure. The parameter v must not be NULL; it will not be 
-    # checked. 
- 
-    object PyString_FromStringAndSize(char *v, Py_ssize_t len) 
-    # Return value: New reference. 
-    # Return a new string object with the value v and length len on 
-    # success, and NULL on failure. If v is NULL, the contents of the 
-    # string are uninitialized. 
- 
-    object PyString_FromFormat(char *format, ...) 
-    # Return value: New reference. 
-    # Take a C printf()-style format string and a variable number of 
-    # arguments, calculate the size of the resulting Python string and 
-    # return a string with the values formatted into it. The variable 
-    # arguments must be C types and must correspond exactly to the 
-    # format characters in the format string. The following format 
-    # characters are allowed: 
-    # Format Characters 	Type 	Comment 
-    # %% 	n/a 	The literal % character. 
-    # %c 	int 	A single character, represented as an C int. 
-    # %d 	int 	Exactly equivalent to printf("%d"). 
-    # %u 	unsigned int 	Exactly equivalent to printf("%u"). 
-    # %ld 	long 	Exactly equivalent to printf("%ld"). 
-    # %lu 	unsigned long 	Exactly equivalent to printf("%lu"). 
-    # %zd 	Py_ssize_t 	Exactly equivalent to printf("%zd"). 
-    # %zu 	size_t 	Exactly equivalent to printf("%zu"). 
-    # %i 	int 	Exactly equivalent to printf("%i"). 
-    # %x 	int 	Exactly equivalent to printf("%x"). 
-    # %s 	char* 	A null-terminated C character array. 
- 
-    # %p 	void* 	The hex representation of a C pointer. 
-    #    Mostly equivalent to printf("%p") except that it is guaranteed to 
-    #    start with the literal 0x regardless of what the platform's printf 
-    #    yields. 
-    # An unrecognized format character causes all the rest of the 
-    # format string to be copied as-is to the result string, and any 
-    # extra arguments discarded. 
- 
-    object PyString_FromFormatV(char *format, va_list vargs) 
-    # Return value: New reference. 
-    # Identical to PyString_FromFormat() except that it takes exactly two arguments. 
- 
-    Py_ssize_t PyString_Size(object string) except -1 
-    # Return the length of the string in string object string. 
- 
-    Py_ssize_t PyString_GET_SIZE(object string) 
-    # Macro form of PyString_Size() but without error checking. 
- 
-    char* PyString_AsString(object string) except NULL 
-    # Return a NUL-terminated representation of the contents of 
-    # string. The pointer refers to the internal buffer of string, not 
-    # a copy. The data must not be modified in any way, unless the 
-    # string was just created using PyString_FromStringAndSize(NULL, 
-    # size). It must not be deallocated. If string is a Unicode 
-    # object, this function computes the default encoding of string 
-    # and operates on that. If string is not a string object at all, 
-    # PyString_AsString() returns NULL and raises TypeError. 
- 
-    char* PyString_AS_STRING(object string) 
-    # Macro form of PyString_AsString() but without error 
-    # checking. Only string objects are supported; no Unicode objects 
-    # should be passed. 
- 
-    int PyString_AsStringAndSize(object obj, char **buffer, Py_ssize_t *length) except -1 
-    # Return a NULL-terminated representation of the contents of the 
-    # object obj through the output variables buffer and length. 
-    # 
-    # The function accepts both string and Unicode objects as 
-    # input. For Unicode objects it returns the default encoded 
-    # version of the object. If length is NULL, the resulting buffer 
-    # may not contain NUL characters; if it does, the function returns 
-    # -1 and a TypeError is raised. 
- 
-    # The buffer refers to an internal string buffer of obj, not a 
-    # copy. The data must not be modified in any way, unless the 
-    # string was just created using PyString_FromStringAndSize(NULL, 
-    # size). It must not be deallocated. If string is a Unicode 
-    # object, this function computes the default encoding of string 
-    # and operates on that. If string is not a string object at all, 
-    # PyString_AsStringAndSize() returns -1 and raises TypeError. 
- 
-    void PyString_Concat(PyObject **string, object newpart) 
-    # Create a new string object in *string containing the contents of 
-    # newpart appended to string; the caller will own the new 
-    # reference. The reference to the old value of string will be 
-    # stolen. If the new string cannot be created, the old reference 
-    # to string will still be discarded and the value of *string will 
-    # be set to NULL; the appropriate exception will be set. 
- 
-    void PyString_ConcatAndDel(PyObject **string, object newpart) 
-    # Create a new string object in *string containing the contents of 
-    # newpart appended to string. This version decrements the 
-    # reference count of newpart. 
- 
-    int _PyString_Resize(PyObject **string, Py_ssize_t newsize) except -1 
-    # A way to resize a string object even though it is 
-    # ``immutable''. Only use this to build up a brand new string 
-    # object; don't use this if the string may already be known in 
-    # other parts of the code. It is an error to call this function if 
-    # the refcount on the input string object is not one. Pass the 
-    # address of an existing string object as an lvalue (it may be 
-    # written into), and the new size desired. On success, *string 
-    # holds the resized string object and 0 is returned; the address 
-    # in *string may differ from its input value. If the reallocation 
-    # fails, the original string object at *string is deallocated, 
-    # *string is set to NULL, a memory exception is set, and -1 is 
-    # returned. 
- 
-    object PyString_Format(object format, object args) 
-    # Return value: New reference.  Return a new string object from 
-    # format and args. Analogous to format % args. The args argument 
-    # must be a tuple. 
- 
-    void PyString_InternInPlace(PyObject **string) 
-    # Intern the argument *string in place. The argument must be the 
-    # address of a pointer variable pointing to a Python string 
-    # object. If there is an existing interned string that is the same 
-    # as *string, it sets *string to it (decrementing the reference 
-    # count of the old string object and incrementing the reference 
-    # count of the interned string object), otherwise it leaves 
-    # *string alone and interns it (incrementing its reference 
-    # count). (Clarification: even though there is a lot of talk about 
-    # reference counts, think of this function as 
-    # reference-count-neutral; you own the object after the call if 
-    # and only if you owned it before the call.) 
- 
-    object PyString_InternFromString(char *v) 
-    # Return value: New reference. 
-    # A combination of PyString_FromString() and 
-    # PyString_InternInPlace(), returning either a new string object 
-    # that has been interned, or a new (``owned'') reference to an 
-    # earlier interned string object with the same value. 
- 
-    object PyString_Decode(char *s, Py_ssize_t size, char *encoding, char *errors) 
-    #  Return value: New reference. 
-    # Create an object by decoding size bytes of the encoded buffer s 
-    # using the codec registered for encoding. encoding and errors 
-    # have the same meaning as the parameters of the same name in the 
-    # unicode() built-in function. The codec to be used is looked up 
-    # using the Python codec registry. Return NULL if an exception was 
-    # raised by the codec. 
- 
-    object PyString_AsDecodedObject(object str, char *encoding, char *errors) 
-    # Return value: New reference. 
-    # Decode a string object by passing it to the codec registered for 
-    # encoding and return the result as Python object. encoding and 
-    # errors have the same meaning as the parameters of the same name 
-    # in the string encode() method. The codec to be used is looked up 
-    # using the Python codec registry. Return NULL if an exception was 
-    # raised by the codec. 
- 
-    object PyString_Encode(char *s, Py_ssize_t size, char *encoding, char *errors) 
-    # Return value: New reference. 
-    # Encode the char buffer of the given size by passing it to the 
-    # codec registered for encoding and return a Python 
-    # object. encoding and errors have the same meaning as the 
-    # parameters of the same name in the string encode() method. The 
-    # codec to be used is looked up using the Python codec 
-    # registry. Return NULL if an exception was raised by the codec. 
- 
-    object PyString_AsEncodedObject(object str, char *encoding, char *errors) 
-    # Return value: New reference. 
-    # Encode a string object using the codec registered for encoding 
-    # and return the result as Python object. encoding and errors have 
-    # the same meaning as the parameters of the same name in the 
-    # string encode() method. The codec to be used is looked up using 
-    # the Python codec registry. Return NULL if an exception was 
-    # raised by the codec. 
- 
- 
+
+cdef extern from "Python.h":
+    ctypedef struct va_list
+
+    ############################################################################
+    # 7.3.1 String Objects
+    ############################################################################
+
+    # These functions raise TypeError when expecting a string
+    # parameter and are called with a non-string parameter.
+    # PyStringObject
+    # This subtype of PyObject represents a Python string object.
+    # PyTypeObject PyString_Type
+    # This instance of PyTypeObject represents the Python string type;
+    # it is the same object as str and types.StringType in the Python
+    # layer.
+
+    bint PyString_Check(object o)
+    # Return true if the object o is a string object or an instance of
+    # a subtype of the string type.
+
+    bint PyString_CheckExact(object o)
+    # Return true if the object o is a string object, but not an instance of a subtype of the string type.
+
+    object PyString_FromString(char *v)
+    # Return value: New reference.
+    # Return a new string object with the value v on success, and NULL
+    # on failure. The parameter v must not be NULL; it will not be
+    # checked.
+
+    object PyString_FromStringAndSize(char *v, Py_ssize_t len)
+    # Return value: New reference.
+    # Return a new string object with the value v and length len on
+    # success, and NULL on failure. If v is NULL, the contents of the
+    # string are uninitialized.
+
+    object PyString_FromFormat(char *format, ...)
+    # Return value: New reference.
+    # Take a C printf()-style format string and a variable number of
+    # arguments, calculate the size of the resulting Python string and
+    # return a string with the values formatted into it. The variable
+    # arguments must be C types and must correspond exactly to the
+    # format characters in the format string. The following format
+    # characters are allowed:
+    # Format Characters 	Type 	Comment
+    # %% 	n/a 	The literal % character.
+    # %c 	int 	A single character, represented as an C int.
+    # %d 	int 	Exactly equivalent to printf("%d").
+    # %u 	unsigned int 	Exactly equivalent to printf("%u").
+    # %ld 	long 	Exactly equivalent to printf("%ld").
+    # %lu 	unsigned long 	Exactly equivalent to printf("%lu").
+    # %zd 	Py_ssize_t 	Exactly equivalent to printf("%zd").
+    # %zu 	size_t 	Exactly equivalent to printf("%zu").
+    # %i 	int 	Exactly equivalent to printf("%i").
+    # %x 	int 	Exactly equivalent to printf("%x").
+    # %s 	char* 	A null-terminated C character array.
+
+    # %p 	void* 	The hex representation of a C pointer.
+    #    Mostly equivalent to printf("%p") except that it is guaranteed to
+    #    start with the literal 0x regardless of what the platform's printf
+    #    yields.
+    # An unrecognized format character causes all the rest of the
+    # format string to be copied as-is to the result string, and any
+    # extra arguments discarded.
+
+    object PyString_FromFormatV(char *format, va_list vargs)
+    # Return value: New reference.
+    # Identical to PyString_FromFormat() except that it takes exactly two arguments.
+
+    Py_ssize_t PyString_Size(object string) except -1
+    # Return the length of the string in string object string.
+
+    Py_ssize_t PyString_GET_SIZE(object string)
+    # Macro form of PyString_Size() but without error checking.
+
+    char* PyString_AsString(object string) except NULL
+    # Return a NUL-terminated representation of the contents of
+    # string. The pointer refers to the internal buffer of string, not
+    # a copy. The data must not be modified in any way, unless the
+    # string was just created using PyString_FromStringAndSize(NULL,
+    # size). It must not be deallocated. If string is a Unicode
+    # object, this function computes the default encoding of string
+    # and operates on that. If string is not a string object at all,
+    # PyString_AsString() returns NULL and raises TypeError.
+
+    char* PyString_AS_STRING(object string)
+    # Macro form of PyString_AsString() but without error
+    # checking. Only string objects are supported; no Unicode objects
+    # should be passed.
+
+    int PyString_AsStringAndSize(object obj, char **buffer, Py_ssize_t *length) except -1
+    # Return a NULL-terminated representation of the contents of the
+    # object obj through the output variables buffer and length.
+    #
+    # The function accepts both string and Unicode objects as
+    # input. For Unicode objects it returns the default encoded
+    # version of the object. If length is NULL, the resulting buffer
+    # may not contain NUL characters; if it does, the function returns
+    # -1 and a TypeError is raised.
+
+    # The buffer refers to an internal string buffer of obj, not a
+    # copy. The data must not be modified in any way, unless the
+    # string was just created using PyString_FromStringAndSize(NULL,
+    # size). It must not be deallocated. If string is a Unicode
+    # object, this function computes the default encoding of string
+    # and operates on that. If string is not a string object at all,
+    # PyString_AsStringAndSize() returns -1 and raises TypeError.
+
+    void PyString_Concat(PyObject **string, object newpart)
+    # Create a new string object in *string containing the contents of
+    # newpart appended to string; the caller will own the new
+    # reference. The reference to the old value of string will be
+    # stolen. If the new string cannot be created, the old reference
+    # to string will still be discarded and the value of *string will
+    # be set to NULL; the appropriate exception will be set.
+
+    void PyString_ConcatAndDel(PyObject **string, object newpart)
+    # Create a new string object in *string containing the contents of
+    # newpart appended to string. This version decrements the
+    # reference count of newpart.
+
+    int _PyString_Resize(PyObject **string, Py_ssize_t newsize) except -1
+    # A way to resize a string object even though it is
+    # ``immutable''. Only use this to build up a brand new string
+    # object; don't use this if the string may already be known in
+    # other parts of the code. It is an error to call this function if
+    # the refcount on the input string object is not one. Pass the
+    # address of an existing string object as an lvalue (it may be
+    # written into), and the new size desired. On success, *string
+    # holds the resized string object and 0 is returned; the address
+    # in *string may differ from its input value. If the reallocation
+    # fails, the original string object at *string is deallocated,
+    # *string is set to NULL, a memory exception is set, and -1 is
+    # returned.
+
+    object PyString_Format(object format, object args)
+    # Return value: New reference.  Return a new string object from
+    # format and args. Analogous to format % args. The args argument
+    # must be a tuple.
+
+    void PyString_InternInPlace(PyObject **string)
+    # Intern the argument *string in place. The argument must be the
+    # address of a pointer variable pointing to a Python string
+    # object. If there is an existing interned string that is the same
+    # as *string, it sets *string to it (decrementing the reference
+    # count of the old string object and incrementing the reference
+    # count of the interned string object), otherwise it leaves
+    # *string alone and interns it (incrementing its reference
+    # count). (Clarification: even though there is a lot of talk about
+    # reference counts, think of this function as
+    # reference-count-neutral; you own the object after the call if
+    # and only if you owned it before the call.)
+
+    object PyString_InternFromString(char *v)
+    # Return value: New reference.
+    # A combination of PyString_FromString() and
+    # PyString_InternInPlace(), returning either a new string object
+    # that has been interned, or a new (``owned'') reference to an
+    # earlier interned string object with the same value.
+
+    object PyString_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
+    #  Return value: New reference.
+    # Create an object by decoding size bytes of the encoded buffer s
+    # using the codec registered for encoding. encoding and errors
+    # have the same meaning as the parameters of the same name in the
+    # unicode() built-in function. The codec to be used is looked up
+    # using the Python codec registry. Return NULL if an exception was
+    # raised by the codec.
+
+    object PyString_AsDecodedObject(object str, char *encoding, char *errors)
+    # Return value: New reference.
+    # Decode a string object by passing it to the codec registered for
+    # encoding and return the result as Python object. encoding and
+    # errors have the same meaning as the parameters of the same name
+    # in the string encode() method. The codec to be used is looked up
+    # using the Python codec registry. Return NULL if an exception was
+    # raised by the codec.
+
+    object PyString_Encode(char *s, Py_ssize_t size, char *encoding, char *errors)
+    # Return value: New reference.
+    # Encode the char buffer of the given size by passing it to the
+    # codec registered for encoding and return a Python
+    # object. encoding and errors have the same meaning as the
+    # parameters of the same name in the string encode() method. The
+    # codec to be used is looked up using the Python codec
+    # registry. Return NULL if an exception was raised by the codec.
+
+    object PyString_AsEncodedObject(object str, char *encoding, char *errors)
+    # Return value: New reference.
+    # Encode a string object using the codec registered for encoding
+    # and return the result as Python object. encoding and errors have
+    # the same meaning as the parameters of the same name in the
+    # string encode() method. The codec to be used is looked up using
+    # the Python codec registry. Return NULL if an exception was
+    # raised by the codec.
+
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/tuple.pxd b/contrib/tools/cython/Cython/Includes/cpython/tuple.pxd
index 178603b691..09c46e0b4b 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/tuple.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/tuple.pxd
@@ -1,71 +1,71 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    ############################################################################ 
-    # Tuples 
-    ############################################################################ 
- 
-    bint PyTuple_Check(object  p) 
-    # Return true if p is a tuple object or an instance of a subtype 
-    # of the tuple type. 
- 
-    bint PyTuple_CheckExact(object  p) 
-    # Return true if p is a tuple object, but not an instance of a subtype of the tuple type. 
- 
-    tuple PyTuple_New(Py_ssize_t len) 
-    # Return value: New reference. 
-    # Return a new tuple object of size len, or NULL on failure. 
- 
-    tuple PyTuple_Pack(Py_ssize_t n, ...) 
-    # Return value: New reference. 
-    # Return a new tuple object of size n, or NULL on failure. The 
-    # tuple values are initialized to the subsequent n C arguments 
-    # pointing to Python objects. "PyTuple_Pack(2, a, b)" is 
-    # equivalent to "Py_BuildValue("(OO)", a, b)". 
- 
-    Py_ssize_t PyTuple_Size(object  p) except -1 
-    # Take a pointer to a tuple object, and return the size of that tuple. 
- 
-    Py_ssize_t PyTuple_GET_SIZE(object  p) 
-    # Return the size of the tuple p, which must be non-NULL and point 
-    # to a tuple; no error checking is performed. 
- 
-    PyObject* PyTuple_GetItem(object  p, Py_ssize_t pos) except NULL 
-    # Return value: Borrowed reference. 
-    # Return the object at position pos in the tuple pointed to by 
-    # p. If pos is out of bounds, return NULL and sets an IndexError 
-    # exception. 
- 
-    PyObject* PyTuple_GET_ITEM(object  p, Py_ssize_t pos) 
-    # Return value: Borrowed reference. 
-    # Like PyTuple_GetItem(), but does no checking of its arguments. 
- 
-    tuple PyTuple_GetSlice(object  p, Py_ssize_t low, Py_ssize_t high) 
-    # Return value: New reference. 
-    # Take a slice of the tuple pointed to by p from low to high and return it as a new tuple. 
- 
+
+cdef extern from "Python.h":
+
+    ############################################################################
+    # Tuples
+    ############################################################################
+
+    bint PyTuple_Check(object  p)
+    # Return true if p is a tuple object or an instance of a subtype
+    # of the tuple type.
+
+    bint PyTuple_CheckExact(object  p)
+    # Return true if p is a tuple object, but not an instance of a subtype of the tuple type.
+
+    tuple PyTuple_New(Py_ssize_t len)
+    # Return value: New reference.
+    # Return a new tuple object of size len, or NULL on failure.
+
+    tuple PyTuple_Pack(Py_ssize_t n, ...)
+    # Return value: New reference.
+    # Return a new tuple object of size n, or NULL on failure. The
+    # tuple values are initialized to the subsequent n C arguments
+    # pointing to Python objects. "PyTuple_Pack(2, a, b)" is
+    # equivalent to "Py_BuildValue("(OO)", a, b)".
+
+    Py_ssize_t PyTuple_Size(object  p) except -1
+    # Take a pointer to a tuple object, and return the size of that tuple.
+
+    Py_ssize_t PyTuple_GET_SIZE(object  p)
+    # Return the size of the tuple p, which must be non-NULL and point
+    # to a tuple; no error checking is performed.
+
+    PyObject* PyTuple_GetItem(object  p, Py_ssize_t pos) except NULL
+    # Return value: Borrowed reference.
+    # Return the object at position pos in the tuple pointed to by
+    # p. If pos is out of bounds, return NULL and sets an IndexError
+    # exception.
+
+    PyObject* PyTuple_GET_ITEM(object  p, Py_ssize_t pos)
+    # Return value: Borrowed reference.
+    # Like PyTuple_GetItem(), but does no checking of its arguments.
+
+    tuple PyTuple_GetSlice(object  p, Py_ssize_t low, Py_ssize_t high)
+    # Return value: New reference.
+    # Take a slice of the tuple pointed to by p from low to high and return it as a new tuple.
+
     int PyTuple_SetItem(object  p, Py_ssize_t pos, object  o) except -1
-    # Insert a reference to object o at position pos of the tuple 
-    # pointed to by p. Return 0 on success. Note: This function 
-    # ``steals'' a reference to o. 
- 
-    void PyTuple_SET_ITEM(object  p, Py_ssize_t pos, object  o) 
-    # Like PyTuple_SetItem(), but does no error checking, and should 
-    # only be used to fill in brand new tuples. Note: This function 
-    # ``steals'' a reference to o. 
- 
-    int _PyTuple_Resize(PyObject **p, Py_ssize_t newsize) except -1 
-    # Can be used to resize a tuple. newsize will be the new length of 
-    # the tuple. Because tuples are supposed to be immutable, this 
-    # should only be used if there is only one reference to the 
-    # object. Do not use this if the tuple may already be known to 
-    # some other part of the code. The tuple will always grow or 
-    # shrink at the end. Think of this as destroying the old tuple and 
-    # creating a new one, only more efficiently. Returns 0 on 
-    # success. Client code should never assume that the resulting 
-    # value of *p will be the same as before calling this function. If 
-    # the object referenced by *p is replaced, the original *p is 
-    # destroyed. On failure, returns -1 and sets *p to NULL, and 
-    # raises MemoryError or SystemError. 
- 
+    # Insert a reference to object o at position pos of the tuple
+    # pointed to by p. Return 0 on success. Note: This function
+    # ``steals'' a reference to o.
+
+    void PyTuple_SET_ITEM(object  p, Py_ssize_t pos, object  o)
+    # Like PyTuple_SetItem(), but does no error checking, and should
+    # only be used to fill in brand new tuples. Note: This function
+    # ``steals'' a reference to o.
+
+    int _PyTuple_Resize(PyObject **p, Py_ssize_t newsize) except -1
+    # Can be used to resize a tuple. newsize will be the new length of
+    # the tuple. Because tuples are supposed to be immutable, this
+    # should only be used if there is only one reference to the
+    # object. Do not use this if the tuple may already be known to
+    # some other part of the code. The tuple will always grow or
+    # shrink at the end. Think of this as destroying the old tuple and
+    # creating a new one, only more efficiently. Returns 0 on
+    # success. Client code should never assume that the resulting
+    # value of *p will be the same as before calling this function. If
+    # the object referenced by *p is replaced, the original *p is
+    # destroyed. On failure, returns -1 and sets *p to NULL, and
+    # raises MemoryError or SystemError.
+
diff --git a/contrib/tools/cython/Cython/Includes/cpython/type.pxd b/contrib/tools/cython/Cython/Includes/cpython/type.pxd
index 1a86fe779f..a1d094e37c 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/type.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/type.pxd
@@ -1,48 +1,48 @@
- 
-cdef extern from "Python.h": 
-    # The C structure of the objects used to describe built-in types. 
- 
-    ############################################################################ 
-    # 7.1.1 Type Objects 
-    ############################################################################ 
- 
-    ctypedef class __builtin__.type [object PyTypeObject]: 
-        pass 
- 
-    # PyObject* PyType_Type 
-    # This is the type object for type objects; it is the same object 
-    # as type and types.TypeType in the Python layer. 
- 
-    bint PyType_Check(object o) 
-    # Return true if the object o is a type object, including 
-    # instances of types derived from the standard type object. Return 
-    # false in all other cases. 
- 
-    bint PyType_CheckExact(object o) 
-    # Return true if the object o is a type object, but not a subtype 
-    # of the standard type object. Return false in all other 
-    # cases. 
- 
-    bint PyType_HasFeature(object o, int feature) 
-    # Return true if the type object o sets the feature feature. Type 
-    # features are denoted by single bit flags. 
- 
-    bint PyType_IS_GC(object o) 
-    # Return true if the type object includes support for the cycle 
-    # detector; this tests the type flag Py_TPFLAGS_HAVE_GC. 
- 
-    bint PyType_IsSubtype(type a, type b) 
-    # Return true if a is a subtype of b. 
- 
-    object PyType_GenericAlloc(object type, Py_ssize_t nitems) 
-    # Return value: New reference. 
- 
-    object PyType_GenericNew(type type, object args, object kwds) 
-    # Return value: New reference. 
- 
-    bint PyType_Ready(type type) except -1 
-    # Finalize a type object. This should be called on all type 
-    # objects to finish their initialization. This function is 
-    # responsible for adding inherited slots from a type's base 
-    # class. Return 0 on success, or return -1 and sets an exception 
-    # on error. 
+
+cdef extern from "Python.h":
+    # The C structure of the objects used to describe built-in types.
+
+    ############################################################################
+    # 7.1.1 Type Objects
+    ############################################################################
+
+    ctypedef class __builtin__.type [object PyTypeObject]:
+        pass
+
+    # PyObject* PyType_Type
+    # This is the type object for type objects; it is the same object
+    # as type and types.TypeType in the Python layer.
+
+    bint PyType_Check(object o)
+    # Return true if the object o is a type object, including
+    # instances of types derived from the standard type object. Return
+    # false in all other cases.
+
+    bint PyType_CheckExact(object o)
+    # Return true if the object o is a type object, but not a subtype
+    # of the standard type object. Return false in all other
+    # cases.
+
+    bint PyType_HasFeature(object o, int feature)
+    # Return true if the type object o sets the feature feature. Type
+    # features are denoted by single bit flags.
+
+    bint PyType_IS_GC(object o)
+    # Return true if the type object includes support for the cycle
+    # detector; this tests the type flag Py_TPFLAGS_HAVE_GC.
+
+    bint PyType_IsSubtype(type a, type b)
+    # Return true if a is a subtype of b.
+
+    object PyType_GenericAlloc(object type, Py_ssize_t nitems)
+    # Return value: New reference.
+
+    object PyType_GenericNew(type type, object args, object kwds)
+    # Return value: New reference.
+
+    bint PyType_Ready(type type) except -1
+    # Finalize a type object. This should be called on all type
+    # objects to finish their initialization. This function is
+    # responsible for adding inherited slots from a type's base
+    # class. Return 0 on success, or return -1 and sets an exception
+    # on error.
diff --git a/contrib/tools/cython/Cython/Includes/cpython/unicode.pxd b/contrib/tools/cython/Cython/Includes/cpython/unicode.pxd
index 3a7d1874e1..ad01ed64df 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/unicode.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/unicode.pxd
@@ -1,21 +1,21 @@
-cdef extern from *: 
-    # Return true if the object o is a Unicode object or an instance 
-    # of a Unicode subtype. Changed in version 2.2: Allowed subtypes 
-    # to be accepted. 
-    bint PyUnicode_Check(object o) 
- 
-    # Return true if the object o is a Unicode object, but not an 
-    # instance of a subtype. New in version 2.2. 
-    bint PyUnicode_CheckExact(object o) 
- 
-    # Return the size of the object. o has to be a PyUnicodeObject 
-    # (not checked). 
+cdef extern from *:
+    # Return true if the object o is a Unicode object or an instance
+    # of a Unicode subtype. Changed in version 2.2: Allowed subtypes
+    # to be accepted.
+    bint PyUnicode_Check(object o)
+
+    # Return true if the object o is a Unicode object, but not an
+    # instance of a subtype. New in version 2.2.
+    bint PyUnicode_CheckExact(object o)
+
+    # Return the size of the object. o has to be a PyUnicodeObject
+    # (not checked).
     #
     # Deprecated since version 3.3, will be removed in version 3.10:
     # Part of the old-style Unicode API, please migrate to using
     # PyUnicode_GET_LENGTH().
-    Py_ssize_t PyUnicode_GET_SIZE(object o) 
- 
+    Py_ssize_t PyUnicode_GET_SIZE(object o)
+
     # Return the length of the Unicode string, in code points. o has
     # to be a Unicode object in the “canonical” representation (not
     # checked).
@@ -23,128 +23,128 @@ cdef extern from *:
     # New in version 3.3.
     Py_ssize_t PyUnicode_GET_LENGTH(object o)
 
-    # Return the size of the object's internal buffer in bytes. o has 
-    # to be a PyUnicodeObject (not checked). 
-    Py_ssize_t PyUnicode_GET_DATA_SIZE(object o) 
- 
-    # Return a pointer to the internal Py_UNICODE buffer of the 
-    # object. o has to be a PyUnicodeObject (not checked). 
-    Py_UNICODE* PyUnicode_AS_UNICODE(object o) 
- 
-    # Return a pointer to the internal buffer of the object. o has to 
-    # be a PyUnicodeObject (not checked). 
-    char* PyUnicode_AS_DATA(object o) 
- 
-    # Return 1 or 0 depending on whether ch is a whitespace character. 
+    # Return the size of the object's internal buffer in bytes. o has
+    # to be a PyUnicodeObject (not checked).
+    Py_ssize_t PyUnicode_GET_DATA_SIZE(object o)
+
+    # Return a pointer to the internal Py_UNICODE buffer of the
+    # object. o has to be a PyUnicodeObject (not checked).
+    Py_UNICODE* PyUnicode_AS_UNICODE(object o)
+
+    # Return a pointer to the internal buffer of the object. o has to
+    # be a PyUnicodeObject (not checked).
+    char* PyUnicode_AS_DATA(object o)
+
+    # Return 1 or 0 depending on whether ch is a whitespace character.
     bint Py_UNICODE_ISSPACE(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is a lowercase character. 
+
+    # Return 1 or 0 depending on whether ch is a lowercase character.
     bint Py_UNICODE_ISLOWER(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is an uppercase character. 
+
+    # Return 1 or 0 depending on whether ch is an uppercase character.
     bint Py_UNICODE_ISUPPER(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is a titlecase character. 
+
+    # Return 1 or 0 depending on whether ch is a titlecase character.
     bint Py_UNICODE_ISTITLE(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is a linebreak character. 
+
+    # Return 1 or 0 depending on whether ch is a linebreak character.
     bint Py_UNICODE_ISLINEBREAK(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is a decimal character. 
+
+    # Return 1 or 0 depending on whether ch is a decimal character.
     bint Py_UNICODE_ISDECIMAL(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is a digit character. 
+
+    # Return 1 or 0 depending on whether ch is a digit character.
     bint Py_UNICODE_ISDIGIT(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is a numeric character. 
+
+    # Return 1 or 0 depending on whether ch is a numeric character.
     bint Py_UNICODE_ISNUMERIC(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is an alphabetic character. 
+
+    # Return 1 or 0 depending on whether ch is an alphabetic character.
     bint Py_UNICODE_ISALPHA(Py_UCS4 ch)
- 
-    # Return 1 or 0 depending on whether ch is an alphanumeric character. 
+
+    # Return 1 or 0 depending on whether ch is an alphanumeric character.
     bint Py_UNICODE_ISALNUM(Py_UCS4 ch)
- 
-    # Return the character ch converted to lower case. 
+
+    # Return the character ch converted to lower case.
     # Used to return a Py_UNICODE value before Py3.3.
     Py_UCS4 Py_UNICODE_TOLOWER(Py_UCS4 ch)
- 
-    # Return the character ch converted to upper case. 
+
+    # Return the character ch converted to upper case.
     # Used to return a Py_UNICODE value before Py3.3.
     Py_UCS4 Py_UNICODE_TOUPPER(Py_UCS4 ch)
- 
-    # Return the character ch converted to title case. 
+
+    # Return the character ch converted to title case.
     # Used to return a Py_UNICODE value before Py3.3.
     Py_UCS4 Py_UNICODE_TOTITLE(Py_UCS4 ch)
- 
-    # Return the character ch converted to a decimal positive 
-    # integer. Return -1 if this is not possible. This macro does not 
-    # raise exceptions. 
+
+    # Return the character ch converted to a decimal positive
+    # integer. Return -1 if this is not possible. This macro does not
+    # raise exceptions.
     int Py_UNICODE_TODECIMAL(Py_UCS4 ch)
- 
-    # Return the character ch converted to a single digit 
-    # integer. Return -1 if this is not possible. This macro does not 
-    # raise exceptions. 
+
+    # Return the character ch converted to a single digit
+    # integer. Return -1 if this is not possible. This macro does not
+    # raise exceptions.
     int Py_UNICODE_TODIGIT(Py_UCS4 ch)
- 
-    # Return the character ch converted to a double. Return -1.0 if 
-    # this is not possible. This macro does not raise exceptions. 
+
+    # Return the character ch converted to a double. Return -1.0 if
+    # this is not possible. This macro does not raise exceptions.
     double Py_UNICODE_TONUMERIC(Py_UCS4 ch)
- 
-    # To create Unicode objects and access their basic sequence 
-    # properties, use these APIs: 
- 
-    # Create a Unicode Object from the Py_UNICODE buffer u of the 
-    # given size. u may be NULL which causes the contents to be 
-    # undefined. It is the user's responsibility to fill in the needed 
-    # data. The buffer is copied into the new object. If the buffer is 
-    # not NULL, the return value might be a shared object. Therefore, 
-    # modification of the resulting Unicode object is only allowed 
-    # when u is NULL. 
-    unicode PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size) 
- 
-    # Create a Unicode Object from the given Unicode code point ordinal. 
-    # 
-    # The ordinal must be in range(0x10000) on narrow Python builds 
-    # (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError 
-    # is raised in case it is not. 
-    unicode PyUnicode_FromOrdinal(int ordinal) 
- 
-    # Return a read-only pointer to the Unicode object's internal 
-    # Py_UNICODE buffer, NULL if unicode is not a Unicode object. 
-    Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL 
- 
-    # Return the length of the Unicode object. 
-    Py_ssize_t PyUnicode_GetSize(object o) except -1 
- 
-    # Coerce an encoded object obj to an Unicode object and return a 
-    # reference with incremented refcount. 
-    # String and other char buffer compatible objects are decoded 
-    # according to the given encoding and using the error handling 
-    # defined by errors. Both can be NULL to have the interface use 
-    # the default values (see the next section for details). 
-    # All other objects, including Unicode objects, cause a TypeError 
-    # to be set. 
-    object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors) 
- 
-    # Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict") 
-    # which is used throughout the interpreter whenever coercion to 
-    # Unicode is needed. 
-    object PyUnicode_FromObject(object obj) 
- 
-    # If the platform supports wchar_t and provides a header file 
-    # wchar.h, Python can interface directly to this type using the 
-    # following functions. Support is optimized if Python's own 
-    # Py_UNICODE type is identical to the system's wchar_t. 
- 
-    #ctypedef int wchar_t 
- 
-    # Create a Unicode object from the wchar_t buffer w of the given 
-    # size. Return NULL on failure. 
-    #PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size) 
- 
-    #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size) 
- 
+
+    # To create Unicode objects and access their basic sequence
+    # properties, use these APIs:
+
+    # Create a Unicode Object from the Py_UNICODE buffer u of the
+    # given size. u may be NULL which causes the contents to be
+    # undefined. It is the user's responsibility to fill in the needed
+    # data. The buffer is copied into the new object. If the buffer is
+    # not NULL, the return value might be a shared object. Therefore,
+    # modification of the resulting Unicode object is only allowed
+    # when u is NULL.
+    unicode PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size)
+
+    # Create a Unicode Object from the given Unicode code point ordinal.
+    #
+    # The ordinal must be in range(0x10000) on narrow Python builds
+    # (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError
+    # is raised in case it is not.
+    unicode PyUnicode_FromOrdinal(int ordinal)
+
+    # Return a read-only pointer to the Unicode object's internal
+    # Py_UNICODE buffer, NULL if unicode is not a Unicode object.
+    Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL
+
+    # Return the length of the Unicode object.
+    Py_ssize_t PyUnicode_GetSize(object o) except -1
+
+    # Coerce an encoded object obj to an Unicode object and return a
+    # reference with incremented refcount.
+    # String and other char buffer compatible objects are decoded
+    # according to the given encoding and using the error handling
+    # defined by errors. Both can be NULL to have the interface use
+    # the default values (see the next section for details).
+    # All other objects, including Unicode objects, cause a TypeError
+    # to be set.
+    object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors)
+
+    # Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict")
+    # which is used throughout the interpreter whenever coercion to
+    # Unicode is needed.
+    object PyUnicode_FromObject(object obj)
+
+    # If the platform supports wchar_t and provides a header file
+    # wchar.h, Python can interface directly to this type using the
+    # following functions. Support is optimized if Python's own
+    # Py_UNICODE type is identical to the system's wchar_t.
+
+    #ctypedef int wchar_t
+
+    # Create a Unicode object from the wchar_t buffer w of the given
+    # size. Return NULL on failure.
+    #PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size)
+
+    #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size)
+
 
 # Unicode Methods
 
@@ -270,267 +270,267 @@ cdef extern from *:
     unicode PyUnicode_InternFromString(const char *v)
 
 
-# Codecs 
- 
-    # Create a Unicode object by decoding size bytes of the encoded 
-    # string s. encoding and errors have the same meaning as the 
-    # parameters of the same name in the unicode() builtin 
-    # function. The codec to be used is looked up using the Python 
-    # codec registry. Return NULL if an exception was raised by the 
-    # codec. 
-    object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors) 
- 
-    # Encode the Py_UNICODE buffer of the given size and return a 
-    # Python string object. encoding and errors have the same meaning 
-    # as the parameters of the same name in the Unicode encode() 
-    # method. The codec to be used is looked up using the Python codec 
-    # registry. Return NULL if an exception was raised by the codec. 
-    object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size, 
-                            char *encoding, char *errors) 
- 
-    # Encode a Unicode object and return the result as Python string 
-    # object. encoding and errors have the same meaning as the 
-    # parameters of the same name in the Unicode encode() method. The 
-    # codec to be used is looked up using the Python codec 
-    # registry. Return NULL if an exception was raised by the codec. 
-    object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors) 
- 
-# These are the UTF-8 codec APIs: 
- 
-    # Create a Unicode object by decoding size bytes of the UTF-8 
-    # encoded string s. Return NULL if an exception was raised by the 
-    # codec. 
+# Codecs
+
+    # Create a Unicode object by decoding size bytes of the encoded
+    # string s. encoding and errors have the same meaning as the
+    # parameters of the same name in the unicode() builtin
+    # function. The codec to be used is looked up using the Python
+    # codec registry. Return NULL if an exception was raised by the
+    # codec.
+    object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
+
+    # Encode the Py_UNICODE buffer of the given size and return a
+    # Python string object. encoding and errors have the same meaning
+    # as the parameters of the same name in the Unicode encode()
+    # method. The codec to be used is looked up using the Python codec
+    # registry. Return NULL if an exception was raised by the codec.
+    object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size,
+                            char *encoding, char *errors)
+
+    # Encode a Unicode object and return the result as Python string
+    # object. encoding and errors have the same meaning as the
+    # parameters of the same name in the Unicode encode() method. The
+    # codec to be used is looked up using the Python codec
+    # registry. Return NULL if an exception was raised by the codec.
+    object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors)
+
+# These are the UTF-8 codec APIs:
+
+    # Create a Unicode object by decoding size bytes of the UTF-8
+    # encoded string s. Return NULL if an exception was raised by the
+    # codec.
     unicode PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
- 
-    # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If 
-    # consumed is not NULL, trailing incomplete UTF-8 byte sequences 
-    # will not be treated as an error. Those bytes will not be decoded 
-    # and the number of bytes that have been decoded will be stored in 
-    # consumed. New in version 2.4. 
+
+    # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
+    # consumed is not NULL, trailing incomplete UTF-8 byte sequences
+    # will not be treated as an error. Those bytes will not be decoded
+    # and the number of bytes that have been decoded will be stored in
+    # consumed. New in version 2.4.
     unicode PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
- 
-    # Encode the Py_UNICODE buffer of the given size using UTF-8 and 
-    # return a Python string object. Return NULL if an exception was 
-    # raised by the codec. 
+
+    # Encode the Py_UNICODE buffer of the given size using UTF-8 and
+    # return a Python string object. Return NULL if an exception was
+    # raised by the codec.
     bytes PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
- 
-    # Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec. 
+
+    # Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
     bytes PyUnicode_AsUTF8String(object unicode)
- 
-# These are the UTF-16 codec APIs: 
- 
-    # Decode length bytes from a UTF-16 encoded buffer string and 
-    # return the corresponding Unicode object. errors (if non-NULL) 
-    # defines the error handling. It defaults to ``strict''. 
-    # 
-    # If byteorder is non-NULL, the decoder starts decoding using the 
-    # given byte order: 
-    # 
-    #   *byteorder == -1: little endian 
-    #   *byteorder == 0:  native order 
-    #   *byteorder == 1:  big endian 
-    # 
-    # and then switches if the first two bytes of the input data are a 
-    # byte order mark (BOM) and the specified byte order is native 
-    # order. This BOM is not copied into the resulting Unicode 
-    # string. After completion, *byteorder is set to the current byte 
-    # order at the. 
-    # 
-    # If byteorder is NULL, the codec starts in native order mode. 
+
+# These are the UTF-16 codec APIs:
+
+    # Decode length bytes from a UTF-16 encoded buffer string and
+    # return the corresponding Unicode object. errors (if non-NULL)
+    # defines the error handling. It defaults to ``strict''.
+    #
+    # If byteorder is non-NULL, the decoder starts decoding using the
+    # given byte order:
+    #
+    #   *byteorder == -1: little endian
+    #   *byteorder == 0:  native order
+    #   *byteorder == 1:  big endian
+    #
+    # and then switches if the first two bytes of the input data are a
+    # byte order mark (BOM) and the specified byte order is native
+    # order. This BOM is not copied into the resulting Unicode
+    # string. After completion, *byteorder is set to the current byte
+    # order at the.
+    #
+    # If byteorder is NULL, the codec starts in native order mode.
     unicode PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
- 
-    # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If 
-    # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not 
-    # treat trailing incomplete UTF-16 byte sequences (such as an odd 
-    # number of bytes or a split surrogate pair) as an error. Those 
-    # bytes will not be decoded and the number of bytes that have been 
-    # decoded will be stored in consumed. New in version 2.4. 
+
+    # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
+    # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
+    # treat trailing incomplete UTF-16 byte sequences (such as an odd
+    # number of bytes or a split surrogate pair) as an error. Those
+    # bytes will not be decoded and the number of bytes that have been
+    # decoded will be stored in consumed. New in version 2.4.
     unicode PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
- 
-    # Return a Python string object holding the UTF-16 encoded value 
-    # of the Unicode data in s. If byteorder is not 0, output is 
-    # written according to the following byte order: 
-    # 
-    #   byteorder == -1: little endian 
-    #   byteorder == 0:  native byte order (writes a BOM mark) 
-    #   byteorder == 1:  big endian 
-    # 
-    # If byteorder is 0, the output string will always start with the 
-    # Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark 
-    # is prepended. 
-    # 
-    # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get 
-    # represented as a surrogate pair. If it is not defined, each 
-    # Py_UNICODE values is interpreted as an UCS-2 character. 
+
+    # Return a Python string object holding the UTF-16 encoded value
+    # of the Unicode data in s. If byteorder is not 0, output is
+    # written according to the following byte order:
+    #
+    #   byteorder == -1: little endian
+    #   byteorder == 0:  native byte order (writes a BOM mark)
+    #   byteorder == 1:  big endian
+    #
+    # If byteorder is 0, the output string will always start with the
+    # Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark
+    # is prepended.
+    #
+    # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
+    # represented as a surrogate pair. If it is not defined, each
+    # Py_UNICODE values is interpreted as an UCS-2 character.
     bytes PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
- 
-    # Return a Python string using the UTF-16 encoding in native byte 
-    # order. The string always starts with a BOM mark. Error handling 
-    # is ``strict''. Return NULL if an exception was raised by the 
-    # codec. 
+
+    # Return a Python string using the UTF-16 encoding in native byte
+    # order. The string always starts with a BOM mark. Error handling
+    # is ``strict''. Return NULL if an exception was raised by the
+    # codec.
     bytes PyUnicode_AsUTF16String(object unicode)
- 
-# These are the ``Unicode Escape'' codec APIs: 
- 
-    # Create a Unicode object by decoding size bytes of the 
-    # Unicode-Escape encoded string s. Return NULL if an exception was 
-    # raised by the codec. 
-    object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors) 
- 
-    # Encode the Py_UNICODE buffer of the given size using 
-    # Unicode-Escape and return a Python string object. Return NULL if 
-    # an exception was raised by the codec. 
-    object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size) 
- 
-    # Encode a Unicode objects using Unicode-Escape and return the 
-    # result as Python string object. Error handling is 
-    # ``strict''. Return NULL if an exception was raised by the codec. 
-    object PyUnicode_AsUnicodeEscapeString(object unicode) 
- 
-# These are the ``Raw Unicode Escape'' codec APIs: 
- 
-    # Create a Unicode object by decoding size bytes of the 
-    # Raw-Unicode-Escape encoded string s. Return NULL if an exception 
-    # was raised by the codec. 
-    object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors) 
- 
-    # Encode the Py_UNICODE buffer of the given size using 
-    # Raw-Unicode-Escape and return a Python string object. Return 
-    # NULL if an exception was raised by the codec. 
-    object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors) 
- 
-    # Encode a Unicode objects using Raw-Unicode-Escape and return the 
-    # result as Python string object. Error handling is 
-    # ``strict''. Return NULL if an exception was raised by the codec. 
-    object PyUnicode_AsRawUnicodeEscapeString(object unicode) 
- 
-# These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding. 
- 
-    # Create a Unicode object by decoding size bytes of the Latin-1 
-    # encoded string s. Return NULL if an exception was raised by the 
-    # codec. 
+
+# These are the ``Unicode Escape'' codec APIs:
+
+    # Create a Unicode object by decoding size bytes of the
+    # Unicode-Escape encoded string s. Return NULL if an exception was
+    # raised by the codec.
+    object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors)
+
+    # Encode the Py_UNICODE buffer of the given size using
+    # Unicode-Escape and return a Python string object. Return NULL if
+    # an exception was raised by the codec.
+    object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size)
+
+    # Encode a Unicode objects using Unicode-Escape and return the
+    # result as Python string object. Error handling is
+    # ``strict''. Return NULL if an exception was raised by the codec.
+    object PyUnicode_AsUnicodeEscapeString(object unicode)
+
+# These are the ``Raw Unicode Escape'' codec APIs:
+
+    # Create a Unicode object by decoding size bytes of the
+    # Raw-Unicode-Escape encoded string s. Return NULL if an exception
+    # was raised by the codec.
+    object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors)
+
+    # Encode the Py_UNICODE buffer of the given size using
+    # Raw-Unicode-Escape and return a Python string object. Return
+    # NULL if an exception was raised by the codec.
+    object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors)
+
+    # Encode a Unicode objects using Raw-Unicode-Escape and return the
+    # result as Python string object. Error handling is
+    # ``strict''. Return NULL if an exception was raised by the codec.
+    object PyUnicode_AsRawUnicodeEscapeString(object unicode)
+
+# These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding.
+
+    # Create a Unicode object by decoding size bytes of the Latin-1
+    # encoded string s. Return NULL if an exception was raised by the
+    # codec.
     unicode PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
- 
-    # Encode the Py_UNICODE buffer of the given size using Latin-1 and 
+
+    # Encode the Py_UNICODE buffer of the given size using Latin-1 and
     # return a Python bytes object. Return NULL if an exception was
-    # raised by the codec. 
+    # raised by the codec.
     bytes PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
- 
-    # Encode a Unicode objects using Latin-1 and return the result as 
+
+    # Encode a Unicode objects using Latin-1 and return the result as
     # Python bytes object. Error handling is ``strict''. Return NULL
-    # if an exception was raised by the codec. 
+    # if an exception was raised by the codec.
     bytes PyUnicode_AsLatin1String(object unicode)
- 
-# These are the ASCII codec APIs. Only 7-bit ASCII data is 
-# accepted. All other codes generate errors. 
- 
-    # Create a Unicode object by decoding size bytes of the ASCII 
-    # encoded string s. Return NULL if an exception was raised by the 
-    # codec. 
+
+# These are the ASCII codec APIs. Only 7-bit ASCII data is
+# accepted. All other codes generate errors.
+
+    # Create a Unicode object by decoding size bytes of the ASCII
+    # encoded string s. Return NULL if an exception was raised by the
+    # codec.
     unicode PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
- 
-    # Encode the Py_UNICODE buffer of the given size using ASCII and 
+
+    # Encode the Py_UNICODE buffer of the given size using ASCII and
     # return a Python bytes object. Return NULL if an exception was
-    # raised by the codec. 
+    # raised by the codec.
     bytes PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
- 
-    # Encode a Unicode objects using ASCII and return the result as 
+
+    # Encode a Unicode objects using ASCII and return the result as
     # Python bytes object. Error handling is ``strict''. Return NULL
-    # if an exception was raised by the codec. 
+    # if an exception was raised by the codec.
     bytes PyUnicode_AsASCIIString(object o)
- 
-# These are the mapping codec APIs: 
-# 
-# This codec is special in that it can be used to implement many 
-# different codecs (and this is in fact what was done to obtain most 
-# of the standard codecs included in the encodings package). The codec 
-# uses mapping to encode and decode characters. 
-# 
-# Decoding mappings must map single string characters to single 
-# Unicode characters, integers (which are then interpreted as Unicode 
-# ordinals) or None (meaning "undefined mapping" and causing an 
-# error). 
-# 
-# Encoding mappings must map single Unicode characters to single 
-# string characters, integers (which are then interpreted as Latin-1 
-# ordinals) or None (meaning "undefined mapping" and causing an 
-# error). 
-# 
-# The mapping objects provided must only support the __getitem__ 
-# mapping interface. 
-# 
-# If a character lookup fails with a LookupError, the character is 
-# copied as-is meaning that its ordinal value will be interpreted as 
-# Unicode or Latin-1 ordinal resp. Because of this, mappings only need 
-# to contain those mappings which map characters to different code 
-# points. 
- 
-    # Create a Unicode object by decoding size bytes of the encoded 
-    # string s using the given mapping object. Return NULL if an 
-    # exception was raised by the codec. If mapping is NULL latin-1 
-    # decoding will be done. Else it can be a dictionary mapping byte 
-    # or a unicode string, which is treated as a lookup table. Byte 
-    # values greater that the length of the string and U+FFFE 
-    # "characters" are treated as "undefined mapping". Changed in 
-    # version 2.4: Allowed unicode string as mapping argument. 
-    object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors) 
- 
-    # Encode the Py_UNICODE buffer of the given size using the given 
-    # mapping object and return a Python string object. Return NULL if 
-    # an exception was raised by the codec. 
+
+# These are the mapping codec APIs:
+#
+# This codec is special in that it can be used to implement many
+# different codecs (and this is in fact what was done to obtain most
+# of the standard codecs included in the encodings package). The codec
+# uses mapping to encode and decode characters.
+#
+# Decoding mappings must map single string characters to single
+# Unicode characters, integers (which are then interpreted as Unicode
+# ordinals) or None (meaning "undefined mapping" and causing an
+# error).
+#
+# Encoding mappings must map single Unicode characters to single
+# string characters, integers (which are then interpreted as Latin-1
+# ordinals) or None (meaning "undefined mapping" and causing an
+# error).
+#
+# The mapping objects provided must only support the __getitem__
+# mapping interface.
+#
+# If a character lookup fails with a LookupError, the character is
+# copied as-is meaning that its ordinal value will be interpreted as
+# Unicode or Latin-1 ordinal resp. Because of this, mappings only need
+# to contain those mappings which map characters to different code
+# points.
+
+    # Create a Unicode object by decoding size bytes of the encoded
+    # string s using the given mapping object. Return NULL if an
+    # exception was raised by the codec. If mapping is NULL latin-1
+    # decoding will be done. Else it can be a dictionary mapping byte
+    # or a unicode string, which is treated as a lookup table. Byte
+    # values greater that the length of the string and U+FFFE
+    # "characters" are treated as "undefined mapping". Changed in
+    # version 2.4: Allowed unicode string as mapping argument.
+    object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors)
+
+    # Encode the Py_UNICODE buffer of the given size using the given
+    # mapping object and return a Python string object. Return NULL if
+    # an exception was raised by the codec.
     #
     # Deprecated since version 3.3, will be removed in version 4.0.
-    object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors) 
- 
-    # Encode a Unicode objects using the given mapping object and 
-    # return the result as Python string object. Error handling is 
-    # ``strict''. Return NULL if an exception was raised by the codec. 
-    object PyUnicode_AsCharmapString(object o, object mapping) 
- 
-# The following codec API is special in that maps Unicode to Unicode. 
- 
-    # Translate a Py_UNICODE buffer of the given length by applying a 
-    # character mapping table to it and return the resulting Unicode 
-    # object. Return NULL when an exception was raised by the codec. 
-    # 
-    # The mapping table must map Unicode ordinal integers to Unicode 
-    # ordinal integers or None (causing deletion of the character). 
-    # 
-    # Mapping tables need only provide the __getitem__() interface; 
-    # dictionaries and sequences work well. Unmapped character 
-    # ordinals (ones which cause a LookupError) are left untouched and 
-    # are copied as-is. 
+    object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)
+
+    # Encode a Unicode objects using the given mapping object and
+    # return the result as Python string object. Error handling is
+    # ``strict''. Return NULL if an exception was raised by the codec.
+    object PyUnicode_AsCharmapString(object o, object mapping)
+
+# The following codec API is special in that maps Unicode to Unicode.
+
+    # Translate a Py_UNICODE buffer of the given length by applying a
+    # character mapping table to it and return the resulting Unicode
+    # object. Return NULL when an exception was raised by the codec.
+    #
+    # The mapping table must map Unicode ordinal integers to Unicode
+    # ordinal integers or None (causing deletion of the character).
+    #
+    # Mapping tables need only provide the __getitem__() interface;
+    # dictionaries and sequences work well. Unmapped character
+    # ordinals (ones which cause a LookupError) are left untouched and
+    # are copied as-is.
     #
     # Deprecated since version 3.3, will be removed in version 4.0.
-    object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size, 
-                                      object table, char *errors) 
- 
-# These are the MBCS codec APIs. They are currently only available on 
-# Windows and use the Win32 MBCS converters to implement the 
-# conversions. Note that MBCS (or DBCS) is a class of encodings, not 
-# just one. The target encoding is defined by the user settings on the 
-# machine running the codec. 
- 
-    # Create a Unicode object by decoding size bytes of the MBCS 
-    # encoded string s. Return NULL if an exception was raised by the 
-    # codec. 
+    object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
+                                      object table, char *errors)
+
+# These are the MBCS codec APIs. They are currently only available on
+# Windows and use the Win32 MBCS converters to implement the
+# conversions. Note that MBCS (or DBCS) is a class of encodings, not
+# just one. The target encoding is defined by the user settings on the
+# machine running the codec.
+
+    # Create a Unicode object by decoding size bytes of the MBCS
+    # encoded string s. Return NULL if an exception was raised by the
+    # codec.
     unicode PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
- 
-    # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If 
-    # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not 
-    # decode trailing lead byte and the number of bytes that have been 
-    # decoded will be stored in consumed. New in version 2.5. 
-    # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0) 
+
+    # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
+    # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
+    # decode trailing lead byte and the number of bytes that have been
+    # decoded will be stored in consumed. New in version 2.5.
+    # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0)
     unicode PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
- 
-    # Encode the Py_UNICODE buffer of the given size using MBCS and 
-    # return a Python string object. Return NULL if an exception was 
-    # raised by the codec. 
+
+    # Encode the Py_UNICODE buffer of the given size using MBCS and
+    # return a Python string object. Return NULL if an exception was
+    # raised by the codec.
     bytes PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
- 
-    # Encode a Unicode objects using MBCS and return the result as 
-    # Python string object. Error handling is ``strict''. Return NULL 
-    # if an exception was raised by the codec. 
+
+    # Encode a Unicode objects using MBCS and return the result as
+    # Python string object. Error handling is ``strict''. Return NULL
+    # if an exception was raised by the codec.
     bytes PyUnicode_AsMBCSString(object o)
 
     # Encode the Unicode object using the specified code page and return
diff --git a/contrib/tools/cython/Cython/Includes/cpython/version.pxd b/contrib/tools/cython/Cython/Includes/cpython/version.pxd
index 4799b36ebe..ce31b249cf 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/version.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/version.pxd
@@ -1,32 +1,32 @@
-# Python version constants 
-# 
-# It's better to evaluate these at runtime (i.e. C compile time) using 
-# 
-#      if PY_MAJOR_VERSION >= 3: 
-#           do_stuff_in_Py3_0_and_later() 
-#      if PY_VERSION_HEX >= 0x02070000: 
-#           do_stuff_in_Py2_7_and_later() 
-# 
-# than using the IF/DEF statements, which are evaluated at Cython 
-# compile time.  This will keep your C code portable. 
- 
- 
-cdef extern from *: 
-    # the complete version, e.g. 0x010502B2 == 1.5.2b2 
-    int PY_VERSION_HEX 
- 
-    # the individual sections as plain numbers 
-    int PY_MAJOR_VERSION 
-    int PY_MINOR_VERSION 
-    int PY_MICRO_VERSION 
-    int PY_RELEASE_LEVEL 
-    int PY_RELEASE_SERIAL 
- 
-    # Note: PY_RELEASE_LEVEL is one of 
-    #    0xA (alpha) 
-    #    0xB (beta) 
-    #    0xC (release candidate) 
-    #    0xF (final) 
- 
-    char PY_VERSION[] 
-    char PY_PATCHLEVEL_REVISION[] 
+# Python version constants
+#
+# It's better to evaluate these at runtime (i.e. C compile time) using
+#
+#      if PY_MAJOR_VERSION >= 3:
+#           do_stuff_in_Py3_0_and_later()
+#      if PY_VERSION_HEX >= 0x02070000:
+#           do_stuff_in_Py2_7_and_later()
+#
+# than using the IF/DEF statements, which are evaluated at Cython
+# compile time.  This will keep your C code portable.
+
+
+cdef extern from *:
+    # the complete version, e.g. 0x010502B2 == 1.5.2b2
+    int PY_VERSION_HEX
+
+    # the individual sections as plain numbers
+    int PY_MAJOR_VERSION
+    int PY_MINOR_VERSION
+    int PY_MICRO_VERSION
+    int PY_RELEASE_LEVEL
+    int PY_RELEASE_SERIAL
+
+    # Note: PY_RELEASE_LEVEL is one of
+    #    0xA (alpha)
+    #    0xB (beta)
+    #    0xC (release candidate)
+    #    0xF (final)
+
+    char PY_VERSION[]
+    char PY_PATCHLEVEL_REVISION[]
diff --git a/contrib/tools/cython/Cython/Includes/cpython/weakref.pxd b/contrib/tools/cython/Cython/Includes/cpython/weakref.pxd
index c95605d017..9c4b50f564 100644
--- a/contrib/tools/cython/Cython/Includes/cpython/weakref.pxd
+++ b/contrib/tools/cython/Cython/Includes/cpython/weakref.pxd
@@ -1,42 +1,42 @@
 from .object cimport PyObject
- 
-cdef extern from "Python.h": 
- 
-    bint PyWeakref_Check(object ob) 
-    # Return true if ob is either a reference or proxy object. 
- 
-    bint PyWeakref_CheckRef(object ob) 
-    # Return true if ob is a reference object. 
- 
-    bint PyWeakref_CheckProxy(ob) 
-    # Return true if *ob* is a proxy object. 
- 
-    object PyWeakref_NewRef(object ob, object callback) 
-    # Return a weak reference object for the object ob.  This will 
-    # always return a new reference, but is not guaranteed to create a 
-    # new object; an existing reference object may be returned.  The 
-    # second parameter, callback, can be a callable object that 
-    # receives notification when ob is garbage collected; it should 
-    # accept a single parameter, which will be the weak reference 
-    # object itself. callback may also be None or NULL.  If ob is not 
-    # a weakly-referencable object, or if callback is not callable, 
-    # None, or NULL, this will return NULL and raise TypeError. 
- 
-    object PyWeakref_NewProxy(object ob, object callback) 
-    # Return a weak reference proxy object for the object ob.  This 
-    # will always return a new reference, but is not guaranteed to 
-    # create a new object; an existing proxy object may be returned. 
-    # The second parameter, callback, can be a callable object that 
-    # receives notification when ob is garbage collected; it should 
-    # accept a single parameter, which will be the weak reference 
-    # object itself. callback may also be None or NULL.  If ob is not 
-    # a weakly-referencable object, or if callback is not callable, 
-    # None, or NULL, this will return NULL and raise TypeError. 
- 
+
+cdef extern from "Python.h":
+
+    bint PyWeakref_Check(object ob)
+    # Return true if ob is either a reference or proxy object.
+
+    bint PyWeakref_CheckRef(object ob)
+    # Return true if ob is a reference object.
+
+    bint PyWeakref_CheckProxy(ob)
+    # Return true if *ob* is a proxy object.
+
+    object PyWeakref_NewRef(object ob, object callback)
+    # Return a weak reference object for the object ob.  This will
+    # always return a new reference, but is not guaranteed to create a
+    # new object; an existing reference object may be returned.  The
+    # second parameter, callback, can be a callable object that
+    # receives notification when ob is garbage collected; it should
+    # accept a single parameter, which will be the weak reference
+    # object itself. callback may also be None or NULL.  If ob is not
+    # a weakly-referencable object, or if callback is not callable,
+    # None, or NULL, this will return NULL and raise TypeError.
+
+    object PyWeakref_NewProxy(object ob, object callback)
+    # Return a weak reference proxy object for the object ob.  This
+    # will always return a new reference, but is not guaranteed to
+    # create a new object; an existing proxy object may be returned.
+    # The second parameter, callback, can be a callable object that
+    # receives notification when ob is garbage collected; it should
+    # accept a single parameter, which will be the weak reference
+    # object itself. callback may also be None or NULL.  If ob is not
+    # a weakly-referencable object, or if callback is not callable,
+    # None, or NULL, this will return NULL and raise TypeError.
+
     PyObject* PyWeakref_GetObject(object ref) except NULL
-    # Return the referenced object from a weak reference, ref.  If the 
-    # referent is no longer live, returns None. 
- 
-    PyObject* PyWeakref_GET_OBJECT(object ref) 
-    # Similar to PyWeakref_GetObject, but implemented as a macro that 
-    # does no error checking. 
+    # Return the referenced object from a weak reference, ref.  If the
+    # referent is no longer live, returns None.
+
+    PyObject* PyWeakref_GET_OBJECT(object ref)
+    # Similar to PyWeakref_GetObject, but implemented as a macro that
+    # does no error checking.
diff --git a/contrib/tools/cython/Cython/Includes/libc/__init__.pxd b/contrib/tools/cython/Cython/Includes/libc/__init__.pxd
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Includes/libc/__init__.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/__init__.pxd
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Includes/libc/errno.pxd b/contrib/tools/cython/Cython/Includes/libc/errno.pxd
index 8f630e65cb..191d47b3dc 100644
--- a/contrib/tools/cython/Cython/Includes/libc/errno.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/errno.pxd
@@ -1,128 +1,128 @@
-# 7.5 Errors <errno.h> 
- 
+# 7.5 Errors <errno.h>
+
 cdef extern from "<errno.h>" nogil:
-    enum: 
-        EPERM 
-        ENOENT 
-        ESRCH 
-        EINTR 
-        EIO 
-        ENXIO 
-        E2BIG 
-        ENOEXEC 
-        EBADF 
-        ECHILD 
-        EAGAIN 
-        ENOMEM 
-        EACCES 
-        EFAULT 
-        ENOTBLK 
-        EBUSY 
-        EEXIST 
-        EXDEV 
-        ENODEV 
-        ENOTDIR 
-        EISDIR 
-        EINVAL 
-        ENFILE 
-        EMFILE 
-        ENOTTY 
-        ETXTBSY 
-        EFBIG 
-        ENOSPC 
-        ESPIPE 
-        EROFS 
-        EMLINK 
-        EPIPE 
-        EDOM 
-        ERANGE 
-        EDEADLOCK 
-        ENAMETOOLONG 
-        ENOLCK 
-        ENOSYS 
-        ENOTEMPTY 
-        ELOOP 
-        ENOMSG 
-        EIDRM 
-        ECHRNG 
-        EL2NSYNC 
-        EL3HLT 
-        EL3RST 
-        ELNRNG 
-        EUNATCH 
-        ENOCSI 
-        EL2HLT 
-        EBADE 
-        EBADR 
-        EXFULL 
-        ENOANO 
-        EBADRQC 
-        EBADSLT 
-        EBFONT 
-        ENOSTR 
-        ENODATA 
+    enum:
+        EPERM
+        ENOENT
+        ESRCH
+        EINTR
+        EIO
+        ENXIO
+        E2BIG
+        ENOEXEC
+        EBADF
+        ECHILD
+        EAGAIN
+        ENOMEM
+        EACCES
+        EFAULT
+        ENOTBLK
+        EBUSY
+        EEXIST
+        EXDEV
+        ENODEV
+        ENOTDIR
+        EISDIR
+        EINVAL
+        ENFILE
+        EMFILE
+        ENOTTY
+        ETXTBSY
+        EFBIG
+        ENOSPC
+        ESPIPE
+        EROFS
+        EMLINK
+        EPIPE
+        EDOM
+        ERANGE
+        EDEADLOCK
+        ENAMETOOLONG
+        ENOLCK
+        ENOSYS
+        ENOTEMPTY
+        ELOOP
+        ENOMSG
+        EIDRM
+        ECHRNG
+        EL2NSYNC
+        EL3HLT
+        EL3RST
+        ELNRNG
+        EUNATCH
+        ENOCSI
+        EL2HLT
+        EBADE
+        EBADR
+        EXFULL
+        ENOANO
+        EBADRQC
+        EBADSLT
+        EBFONT
+        ENOSTR
+        ENODATA
         ENOATTR
-        ETIME 
-        ENOSR 
-        ENONET 
-        ENOPKG 
-        EREMOTE 
-        ENOLINK 
-        EADV 
-        ESRMNT 
-        ECOMM 
-        EPROTO 
-        EMULTIHOP 
-        EDOTDOT 
-        EBADMSG 
-        EOVERFLOW 
-        ENOTUNIQ 
-        EBADFD 
-        EREMCHG 
-        ELIBACC 
-        ELIBBAD 
-        ELIBSCN 
-        ELIBMAX 
-        ELIBEXEC 
-        EILSEQ 
-        ERESTART 
-        ESTRPIPE 
-        EUSERS 
-        ENOTSOCK 
-        EDESTADDRREQ 
-        EMSGSIZE 
-        EPROTOTYPE 
-        ENOPROTOOPT 
-        EPROTONOSUPPORT 
-        ESOCKTNOSUPPORT 
-        EOPNOTSUPP 
-        EPFNOSUPPORT 
-        EAFNOSUPPORT 
-        EADDRINUSE 
-        EADDRNOTAVAIL 
-        ENETDOWN 
-        ENETUNREACH 
-        ENETRESET 
-        ECONNABORTED 
-        ECONNRESET 
-        ENOBUFS 
-        EISCONN 
-        ENOTCONN 
-        ESHUTDOWN 
-        ETOOMANYREFS 
-        ETIMEDOUT 
-        ECONNREFUSED 
-        EHOSTDOWN 
-        EHOSTUNREACH 
-        EALREADY 
-        EINPROGRESS 
-        ESTALE 
-        EUCLEAN 
-        ENOTNAM 
-        ENAVAIL 
-        EISNAM 
-        EREMOTEIO 
-        EDQUOT 
- 
-    int errno 
- 
+        ETIME
+        ENOSR
+        ENONET
+        ENOPKG
+        EREMOTE
+        ENOLINK
+        EADV
+        ESRMNT
+        ECOMM
+        EPROTO
+        EMULTIHOP
+        EDOTDOT
+        EBADMSG
+        EOVERFLOW
+        ENOTUNIQ
+        EBADFD
+        EREMCHG
+        ELIBACC
+        ELIBBAD
+        ELIBSCN
+        ELIBMAX
+        ELIBEXEC
+        EILSEQ
+        ERESTART
+        ESTRPIPE
+        EUSERS
+        ENOTSOCK
+        EDESTADDRREQ
+        EMSGSIZE
+        EPROTOTYPE
+        ENOPROTOOPT
+        EPROTONOSUPPORT
+        ESOCKTNOSUPPORT
+        EOPNOTSUPP
+        EPFNOSUPPORT
+        EAFNOSUPPORT
+        EADDRINUSE
+        EADDRNOTAVAIL
+        ENETDOWN
+        ENETUNREACH
+        ENETRESET
+        ECONNABORTED
+        ECONNRESET
+        ENOBUFS
+        EISCONN
+        ENOTCONN
+        ESHUTDOWN
+        ETOOMANYREFS
+        ETIMEDOUT
+        ECONNREFUSED
+        EHOSTDOWN
+        EHOSTUNREACH
+        EALREADY
+        EINPROGRESS
+        ESTALE
+        EUCLEAN
+        ENOTNAM
+        ENAVAIL
+        EISNAM
+        EREMOTEIO
+        EDQUOT
+
+    int errno
+
diff --git a/contrib/tools/cython/Cython/Includes/libc/float.pxd b/contrib/tools/cython/Cython/Includes/libc/float.pxd
index d0cc602969..5e4e12d4f4 100644
--- a/contrib/tools/cython/Cython/Includes/libc/float.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/float.pxd
@@ -1,43 +1,43 @@
-# 5.2.4.2.2 Characteristics of floating types <float.h> 
- 
+# 5.2.4.2.2 Characteristics of floating types <float.h>
+
 cdef extern from "<float.h>":
- 
+
     const float FLT_RADIX
- 
+
     const float FLT_MANT_DIG
     const double DBL_MANT_DIG
     const long double LDBL_MANT_DIG
- 
+
     const double DECIMAL_DIG
- 
+
     const float FLT_DIG
     const double DBL_DIG
     const long double LDBL_DIG
- 
+
     const float FLT_MIN_EXP
     const double DBL_MIN_EXP
     const long double LDBL_MIN_EXP
- 
+
     const float FLT_MIN_10_EXP
     const double DBL_MIN_10_EXP
     const long double LDBL_MIN_10_EXP
- 
+
     const float FLT_MAX_EXP
     const double DBL_MAX_EXP
     const long double LDBL_MAX_EXP
- 
+
     const float FLT_MAX_10_EXP
     const double DBL_MAX_10_EXP
     const long double LDBL_MAX_10_EXP
- 
+
     const float FLT_MAX
     const double DBL_MAX
     const long double LDBL_MAX
- 
+
     const float FLT_EPSILON
     const double DBL_EPSILON
     const long double LDBL_EPSILON
- 
+
     const float FLT_MIN
     const double DBL_MIN
     const long double LDBL_MIN
diff --git a/contrib/tools/cython/Cython/Includes/libc/limits.pxd b/contrib/tools/cython/Cython/Includes/libc/limits.pxd
index b585509d34..39d10a1ff9 100644
--- a/contrib/tools/cython/Cython/Includes/libc/limits.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/limits.pxd
@@ -1,28 +1,28 @@
-# 5.2.4.2.1 Sizes of integer types <limits.h> 
- 
+# 5.2.4.2.1 Sizes of integer types <limits.h>
+
 cdef extern from "<limits.h>":
     const int CHAR_BIT
     const int MB_LEN_MAX
- 
+
     const char CHAR_MIN
     const char CHAR_MAX
- 
+
     const signed char SCHAR_MIN
     const signed char SCHAR_MAX
     const unsigned char UCHAR_MAX
- 
+
     const short SHRT_MIN
     const short SHRT_MAX
     const unsigned short USHRT_MAX
- 
+
     const int INT_MIN
     const int INT_MAX
     const unsigned int UINT_MAX
- 
+
     const long LONG_MIN
     const long LONG_MAX
     const unsigned long ULONG_MAX
- 
+
     const long long LLONG_MIN
     const long long LLONG_MAX
     const unsigned long long ULLONG_MAX
diff --git a/contrib/tools/cython/Cython/Includes/libc/locale.pxd b/contrib/tools/cython/Cython/Includes/libc/locale.pxd
index 255df6062e..5cbec953ef 100644
--- a/contrib/tools/cython/Cython/Includes/libc/locale.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/locale.pxd
@@ -1,46 +1,46 @@
-# 7.11 Localization <locale.h> 
- 
-# deprecated cimport for backwards compatibility: 
-from libc.string cimport const_char 
- 
- 
+# 7.11 Localization <locale.h>
+
+# deprecated cimport for backwards compatibility:
+from libc.string cimport const_char
+
+
 cdef extern from "<locale.h>" nogil:
- 
-    struct lconv: 
-        char *decimal_point 
-        char *thousands_sep 
-        char *grouping 
-        char *mon_decimal_point 
-        char *mon_thousands_sep 
-        char *mon_grouping 
-        char *positive_sign 
-        char *negative_sign 
-        char *currency_symbol 
-        char frac_digits 
-        char p_cs_precedes 
-        char n_cs_precedes 
-        char p_sep_by_space 
-        char n_sep_by_space 
-        char p_sign_posn 
-        char n_sign_posn 
-        char *int_curr_symbol 
-        char int_frac_digits 
-        char int_p_cs_precedes 
-        char int_n_cs_precedes 
-        char int_p_sep_by_space 
-        char int_n_sep_by_space 
-        char int_p_sign_posn 
-        char int_n_sign_posn 
- 
-    enum: LC_ALL 
-    enum: LC_COLLATE 
-    enum: LC_CTYPE 
-    enum: LC_MONETARY 
-    enum: LC_NUMERIC 
-    enum: LC_TIME 
- 
-    # 7.11.1 Locale control 
-    char *setlocale (int category, const char *locale) 
- 
-    # 7.11.2 Numeric formatting convention inquiry 
-    lconv *localeconv () 
+
+    struct lconv:
+        char *decimal_point
+        char *thousands_sep
+        char *grouping
+        char *mon_decimal_point
+        char *mon_thousands_sep
+        char *mon_grouping
+        char *positive_sign
+        char *negative_sign
+        char *currency_symbol
+        char frac_digits
+        char p_cs_precedes
+        char n_cs_precedes
+        char p_sep_by_space
+        char n_sep_by_space
+        char p_sign_posn
+        char n_sign_posn
+        char *int_curr_symbol
+        char int_frac_digits
+        char int_p_cs_precedes
+        char int_n_cs_precedes
+        char int_p_sep_by_space
+        char int_n_sep_by_space
+        char int_p_sign_posn
+        char int_n_sign_posn
+
+    enum: LC_ALL
+    enum: LC_COLLATE
+    enum: LC_CTYPE
+    enum: LC_MONETARY
+    enum: LC_NUMERIC
+    enum: LC_TIME
+
+    # 7.11.1 Locale control
+    char *setlocale (int category, const char *locale)
+
+    # 7.11.2 Numeric formatting convention inquiry
+    lconv *localeconv ()
diff --git a/contrib/tools/cython/Cython/Includes/libc/math.pxd b/contrib/tools/cython/Cython/Includes/libc/math.pxd
index 3e5ea51d61..b002670b22 100644
--- a/contrib/tools/cython/Cython/Includes/libc/math.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/math.pxd
@@ -14,95 +14,95 @@ cdef extern from "<math.h>" nogil:
     const double M_2_SQRTPI
     const double M_SQRT2
     const double M_SQRT1_2
- 
-    # C99 constants 
+
+    # C99 constants
     const float INFINITY
     const float NAN
     # note: not providing "nan" and "inf" aliases here as nan() is a function in C
     const double HUGE_VAL
     const float HUGE_VALF
     const long double HUGE_VALL
- 
-    double acos(double x) 
-    double asin(double x) 
-    double atan(double x) 
-    double atan2(double y, double x) 
-    double cos(double x) 
-    double sin(double x) 
-    double tan(double x) 
- 
-    double cosh(double x) 
-    double sinh(double x) 
-    double tanh(double x) 
-    double acosh(double x) 
-    double asinh(double x) 
-    double atanh(double x) 
- 
-    double hypot(double x, double y) 
- 
-    double exp(double x) 
-    double exp2(double x) 
-    double expm1(double x) 
-    double log(double x) 
-    double logb(double x) 
-    double log2(double x) 
-    double log10(double x) 
-    double log1p(double x) 
-    int ilogb(double x) 
- 
-    double lgamma(double x) 
-    double tgamma(double x) 
- 
-    double frexp(double x, int* exponent) 
-    double ldexp(double x, int exponent) 
- 
-    double modf(double x, double* iptr) 
-    double fmod(double x, double y) 
-    double remainder(double x, double y) 
-    double remquo(double x, double y, int *quot) 
-    double pow(double x, double y) 
-    double sqrt(double x) 
-    double cbrt(double x) 
- 
-    double fabs(double x) 
-    double ceil(double x) 
-    double floor(double x) 
-    double trunc(double x) 
-    double rint(double x) 
-    double round(double x) 
-    double nearbyint(double x) 
-    double nextafter(double, double) 
-    double nexttoward(double, long double) 
- 
-    long long llrint(double) 
-    long lrint(double) 
-    long long llround(double) 
-    long lround(double) 
- 
-    double copysign(double, double) 
-    float copysignf(float, float) 
-    long double copysignl(long double, long double) 
- 
-    double erf(double) 
-    float erff(float) 
-    long double erfl(long double) 
-    double erfc(double) 
-    float erfcf(float) 
-    long double erfcl(long double) 
- 
-    double fdim(double x, double y) 
+
+    double acos(double x)
+    double asin(double x)
+    double atan(double x)
+    double atan2(double y, double x)
+    double cos(double x)
+    double sin(double x)
+    double tan(double x)
+
+    double cosh(double x)
+    double sinh(double x)
+    double tanh(double x)
+    double acosh(double x)
+    double asinh(double x)
+    double atanh(double x)
+
+    double hypot(double x, double y)
+
+    double exp(double x)
+    double exp2(double x)
+    double expm1(double x)
+    double log(double x)
+    double logb(double x)
+    double log2(double x)
+    double log10(double x)
+    double log1p(double x)
+    int ilogb(double x)
+
+    double lgamma(double x)
+    double tgamma(double x)
+
+    double frexp(double x, int* exponent)
+    double ldexp(double x, int exponent)
+
+    double modf(double x, double* iptr)
+    double fmod(double x, double y)
+    double remainder(double x, double y)
+    double remquo(double x, double y, int *quot)
+    double pow(double x, double y)
+    double sqrt(double x)
+    double cbrt(double x)
+
+    double fabs(double x)
+    double ceil(double x)
+    double floor(double x)
+    double trunc(double x)
+    double rint(double x)
+    double round(double x)
+    double nearbyint(double x)
+    double nextafter(double, double)
+    double nexttoward(double, long double)
+
+    long long llrint(double)
+    long lrint(double)
+    long long llround(double)
+    long lround(double)
+
+    double copysign(double, double)
+    float copysignf(float, float)
+    long double copysignl(long double, long double)
+
+    double erf(double)
+    float erff(float)
+    long double erfl(long double)
+    double erfc(double)
+    float erfcf(float)
+    long double erfcl(long double)
+
+    double fdim(double x, double y)
     double fma(double x, double y, double z)
-    double fmax(double x, double y) 
-    double fmin(double x, double y) 
-    double scalbln(double x, long n) 
-    double scalbn(double x, int n) 
- 
-    double nan(const char*) 
+    double fmax(double x, double y)
+    double fmin(double x, double y)
+    double scalbln(double x, long n)
+    double scalbn(double x, int n)
+
+    double nan(const char*)
 
     int isinf(long double)   # -1 / 0 / 1
-    bint isfinite(long double) 
+    bint isfinite(long double)
     bint isnan(long double)
-    bint isnormal(long double) 
+    bint isnormal(long double)
     bint signbit(long double)
     int fpclassify(long double)
     const int FP_NAN
diff --git a/contrib/tools/cython/Cython/Includes/libc/setjmp.pxd b/contrib/tools/cython/Cython/Includes/libc/setjmp.pxd
index f7e362146b..6c11a534d4 100644
--- a/contrib/tools/cython/Cython/Includes/libc/setjmp.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/setjmp.pxd
@@ -1,8 +1,8 @@
 cdef extern from "<setjmp.h>" nogil:
-    ctypedef struct jmp_buf: 
-        pass 
-    int setjmp(jmp_buf state) 
-    void longjmp(jmp_buf state, int value) 
+    ctypedef struct jmp_buf:
+        pass
+    int setjmp(jmp_buf state)
+    void longjmp(jmp_buf state, int value)
 
     ctypedef struct sigjmp_buf:
         pass
diff --git a/contrib/tools/cython/Cython/Includes/libc/signal.pxd b/contrib/tools/cython/Cython/Includes/libc/signal.pxd
index d1effe1f4f..5d34935543 100644
--- a/contrib/tools/cython/Cython/Includes/libc/signal.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/signal.pxd
@@ -1,18 +1,18 @@
-# 7.14 Signal handling <signal.h> 
- 
-ctypedef void (*sighandler_t)(int SIGNUM) nogil 
- 
+# 7.14 Signal handling <signal.h>
+
+ctypedef void (*sighandler_t)(int SIGNUM) nogil
+
 cdef extern from "<signal.h>" nogil:
- 
-    ctypedef int sig_atomic_t 
- 
-    sighandler_t SIG_DFL 
-    sighandler_t SIG_IGN 
-    sighandler_t SIG_ERR 
- 
-    sighandler_t signal        (int signum, sighandler_t action) 
-    int          raise_"raise" (int signum) 
- 
+
+    ctypedef int sig_atomic_t
+
+    sighandler_t SIG_DFL
+    sighandler_t SIG_IGN
+    sighandler_t SIG_ERR
+
+    sighandler_t signal        (int signum, sighandler_t action)
+    int          raise_"raise" (int signum)
+
     # Signals
     enum:
         # Program Error
diff --git a/contrib/tools/cython/Cython/Includes/libc/stddef.pxd b/contrib/tools/cython/Cython/Includes/libc/stddef.pxd
index 00227447a2..9b0f4c5fd2 100644
--- a/contrib/tools/cython/Cython/Includes/libc/stddef.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/stddef.pxd
@@ -1,9 +1,9 @@
-# 7.17 Common definitions <stddef.h> 
- 
+# 7.17 Common definitions <stddef.h>
+
 cdef extern from "<stddef.h>":
- 
-    ctypedef signed int ptrdiff_t 
- 
-    ctypedef unsigned int size_t 
- 
-    ctypedef int wchar_t 
+
+    ctypedef signed int ptrdiff_t
+
+    ctypedef unsigned int size_t
+
+    ctypedef int wchar_t
diff --git a/contrib/tools/cython/Cython/Includes/libc/stdint.pxd b/contrib/tools/cython/Cython/Includes/libc/stdint.pxd
index d7177d554f..ced3d46add 100644
--- a/contrib/tools/cython/Cython/Includes/libc/stdint.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/stdint.pxd
@@ -1,105 +1,105 @@
-# Longness only used for type promotion. 
-# Actual compile time size used for conversions. 
- 
-# 7.18 Integer types <stdint.h> 
+# Longness only used for type promotion.
+# Actual compile time size used for conversions.
+
+# 7.18 Integer types <stdint.h>
 cdef extern from "<stdint.h>" nogil:
- 
-    # 7.18.1 Integer types 
-    # 7.18.1.1 Exact-width integer types 
-    ctypedef   signed char  int8_t 
-    ctypedef   signed short int16_t 
-    ctypedef   signed int   int32_t 
-    ctypedef   signed long  int64_t 
-    ctypedef unsigned char  uint8_t 
-    ctypedef unsigned short uint16_t 
-    ctypedef unsigned int   uint32_t 
+
+    # 7.18.1 Integer types
+    # 7.18.1.1 Exact-width integer types
+    ctypedef   signed char  int8_t
+    ctypedef   signed short int16_t
+    ctypedef   signed int   int32_t
+    ctypedef   signed long  int64_t
+    ctypedef unsigned char  uint8_t
+    ctypedef unsigned short uint16_t
+    ctypedef unsigned int   uint32_t
     ctypedef unsigned long long uint64_t
-    # 7.18.1.2 Minimum-width integer types 
-    ctypedef   signed char  int_least8_t 
-    ctypedef   signed short int_least16_t 
-    ctypedef   signed int   int_least32_t 
-    ctypedef   signed long  int_least64_t 
-    ctypedef unsigned char  uint_least8_t 
-    ctypedef unsigned short uint_least16_t 
-    ctypedef unsigned int   uint_least32_t 
+    # 7.18.1.2 Minimum-width integer types
+    ctypedef   signed char  int_least8_t
+    ctypedef   signed short int_least16_t
+    ctypedef   signed int   int_least32_t
+    ctypedef   signed long  int_least64_t
+    ctypedef unsigned char  uint_least8_t
+    ctypedef unsigned short uint_least16_t
+    ctypedef unsigned int   uint_least32_t
     ctypedef unsigned long long uint_least64_t
-    # 7.18.1.3 Fastest minimum-width integer types 
-    ctypedef   signed char  int_fast8_t 
-    ctypedef   signed short int_fast16_t 
-    ctypedef   signed int   int_fast32_t 
-    ctypedef   signed long  int_fast64_t 
-    ctypedef unsigned char  uint_fast8_t 
-    ctypedef unsigned short uint_fast16_t 
-    ctypedef unsigned int   uint_fast32_t 
+    # 7.18.1.3 Fastest minimum-width integer types
+    ctypedef   signed char  int_fast8_t
+    ctypedef   signed short int_fast16_t
+    ctypedef   signed int   int_fast32_t
+    ctypedef   signed long  int_fast64_t
+    ctypedef unsigned char  uint_fast8_t
+    ctypedef unsigned short uint_fast16_t
+    ctypedef unsigned int   uint_fast32_t
     ctypedef unsigned long long uint_fast64_t
-    # 7.18.1.4 Integer types capable of holding object pointers 
-    ctypedef ssize_t intptr_t 
-    ctypedef  size_t uintptr_t 
-    # 7.18.1.5 Greatest-width integer types 
-    ctypedef signed   long long intmax_t 
-    ctypedef unsigned long long uintmax_t 
- 
-    # 7.18.2 Limits of specified-width integer types 
-    # 7.18.2.1 Limits of exact-width integer types 
-    int8_t   INT8_MIN 
-    int16_t  INT16_MIN 
-    int32_t  INT32_MIN 
-    int64_t  INT64_MIN 
-    int8_t   INT8_MAX 
-    int16_t  INT16_MAX 
-    int32_t  INT32_MAX 
-    int64_t  INT64_MAX 
-    uint8_t  UINT8_MAX 
-    uint16_t UINT16_MAX 
-    uint32_t UINT32_MAX 
-    uint64_t UINT64_MAX 
-    #7.18.2.2 Limits of minimum-width integer types 
-    int_least8_t     INT_LEAST8_MIN 
-    int_least16_t   INT_LEAST16_MIN 
-    int_least32_t   INT_LEAST32_MIN 
-    int_least64_t   INT_LEAST64_MIN 
-    int_least8_t     INT_LEAST8_MAX 
-    int_least16_t   INT_LEAST16_MAX 
-    int_least32_t   INT_LEAST32_MAX 
-    int_least64_t   INT_LEAST64_MAX 
-    uint_least8_t   UINT_LEAST8_MAX 
-    uint_least16_t UINT_LEAST16_MAX 
-    uint_least32_t UINT_LEAST32_MAX 
-    uint_least64_t UINT_LEAST64_MAX 
-    #7.18.2.3 Limits of fastest minimum-width integer types 
-    int_fast8_t     INT_FAST8_MIN 
-    int_fast16_t   INT_FAST16_MIN 
-    int_fast32_t   INT_FAST32_MIN 
-    int_fast64_t   INT_FAST64_MIN 
-    int_fast8_t     INT_FAST8_MAX 
-    int_fast16_t   INT_FAST16_MAX 
-    int_fast32_t   INT_FAST32_MAX 
-    int_fast64_t   INT_FAST64_MAX 
-    uint_fast8_t   UINT_FAST8_MAX 
-    uint_fast16_t UINT_FAST16_MAX 
-    uint_fast32_t UINT_FAST32_MAX 
-    uint_fast64_t UINT_FAST64_MAX 
-    #7.18.2.4 Limits of integer types capable of holding object pointers 
-    enum:  INTPTR_MIN 
-    enum:  INTPTR_MAX 
-    enum: UINTPTR_MAX 
-    # 7.18.2.5 Limits of greatest-width integer types 
-    enum:  INTMAX_MAX 
-    enum:  INTMAX_MIN 
-    enum: UINTMAX_MAX 
- 
-    # 7.18.3 Limits of other integer types 
-    # ptrdiff_t 
-    enum: PTRDIFF_MIN 
-    enum: PTRDIFF_MAX 
-    # sig_atomic_t 
-    enum: SIG_ATOMIC_MIN 
-    enum: SIG_ATOMIC_MAX 
-    # size_t 
-    size_t SIZE_MAX 
-    # wchar_t 
-    enum: WCHAR_MIN 
-    enum: WCHAR_MAX 
-    # wint_t 
-    enum: WINT_MIN 
-    enum: WINT_MAX 
+    # 7.18.1.4 Integer types capable of holding object pointers
+    ctypedef ssize_t intptr_t
+    ctypedef  size_t uintptr_t
+    # 7.18.1.5 Greatest-width integer types
+    ctypedef signed   long long intmax_t
+    ctypedef unsigned long long uintmax_t
+
+    # 7.18.2 Limits of specified-width integer types
+    # 7.18.2.1 Limits of exact-width integer types
+    int8_t   INT8_MIN
+    int16_t  INT16_MIN
+    int32_t  INT32_MIN
+    int64_t  INT64_MIN
+    int8_t   INT8_MAX
+    int16_t  INT16_MAX
+    int32_t  INT32_MAX
+    int64_t  INT64_MAX
+    uint8_t  UINT8_MAX
+    uint16_t UINT16_MAX
+    uint32_t UINT32_MAX
+    uint64_t UINT64_MAX
+    #7.18.2.2 Limits of minimum-width integer types
+    int_least8_t     INT_LEAST8_MIN
+    int_least16_t   INT_LEAST16_MIN
+    int_least32_t   INT_LEAST32_MIN
+    int_least64_t   INT_LEAST64_MIN
+    int_least8_t     INT_LEAST8_MAX
+    int_least16_t   INT_LEAST16_MAX
+    int_least32_t   INT_LEAST32_MAX
+    int_least64_t   INT_LEAST64_MAX
+    uint_least8_t   UINT_LEAST8_MAX
+    uint_least16_t UINT_LEAST16_MAX
+    uint_least32_t UINT_LEAST32_MAX
+    uint_least64_t UINT_LEAST64_MAX
+    #7.18.2.3 Limits of fastest minimum-width integer types
+    int_fast8_t     INT_FAST8_MIN
+    int_fast16_t   INT_FAST16_MIN
+    int_fast32_t   INT_FAST32_MIN
+    int_fast64_t   INT_FAST64_MIN
+    int_fast8_t     INT_FAST8_MAX
+    int_fast16_t   INT_FAST16_MAX
+    int_fast32_t   INT_FAST32_MAX
+    int_fast64_t   INT_FAST64_MAX
+    uint_fast8_t   UINT_FAST8_MAX
+    uint_fast16_t UINT_FAST16_MAX
+    uint_fast32_t UINT_FAST32_MAX
+    uint_fast64_t UINT_FAST64_MAX
+    #7.18.2.4 Limits of integer types capable of holding object pointers
+    enum:  INTPTR_MIN
+    enum:  INTPTR_MAX
+    enum: UINTPTR_MAX
+    # 7.18.2.5 Limits of greatest-width integer types
+    enum:  INTMAX_MAX
+    enum:  INTMAX_MIN
+    enum: UINTMAX_MAX
+
+    # 7.18.3 Limits of other integer types
+    # ptrdiff_t
+    enum: PTRDIFF_MIN
+    enum: PTRDIFF_MAX
+    # sig_atomic_t
+    enum: SIG_ATOMIC_MIN
+    enum: SIG_ATOMIC_MAX
+    # size_t
+    size_t SIZE_MAX
+    # wchar_t
+    enum: WCHAR_MIN
+    enum: WCHAR_MAX
+    # wint_t
+    enum: WINT_MIN
+    enum: WINT_MAX
diff --git a/contrib/tools/cython/Cython/Includes/libc/stdio.pxd b/contrib/tools/cython/Cython/Includes/libc/stdio.pxd
index 03ed475cd5..1644a5a0ab 100644
--- a/contrib/tools/cython/Cython/Includes/libc/stdio.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/stdio.pxd
@@ -1,80 +1,80 @@
-# 7.19 Input/output <stdio.h> 
- 
- 
-# deprecated cimports for backwards compatibility: 
-from libc.string cimport const_char, const_void 
- 
- 
+# 7.19 Input/output <stdio.h>
+
+
+# deprecated cimports for backwards compatibility:
+from libc.string cimport const_char, const_void
+
+
 cdef extern from "<stdio.h>" nogil:
- 
-    ctypedef struct FILE 
-    cdef FILE *stdin 
-    cdef FILE *stdout 
-    cdef FILE *stderr 
- 
-    enum: FOPEN_MAX 
-    enum: FILENAME_MAX 
-    FILE *fopen   (const char *filename, const char  *opentype) 
-    FILE *freopen (const char *filename, const char *opentype, FILE *stream) 
-    FILE *fdopen  (int fdescriptor, const char *opentype) 
-    int  fclose   (FILE *stream) 
-    int  remove   (const char *filename) 
-    int  rename   (const char *oldname, const char *newname) 
-    FILE *tmpfile () 
- 
-    int remove (const char *pathname) 
-    int rename (const char *oldpath, const char *newpath) 
- 
-    enum: _IOFBF 
-    enum: _IOLBF 
-    enum: _IONBF 
-    int setvbuf (FILE *stream, char *buf, int mode, size_t size) 
-    enum: BUFSIZ 
-    void setbuf (FILE *stream, char *buf) 
- 
-    size_t fread  (void *data, size_t size, size_t count, FILE *stream) 
-    size_t fwrite (const void *data, size_t size, size_t count, FILE *stream) 
-    int    fflush (FILE *stream) 
- 
-    enum: EOF 
-    void clearerr (FILE *stream) 
-    int feof      (FILE *stream) 
-    int ferror    (FILE *stream) 
- 
-    enum: SEEK_SET 
-    enum: SEEK_CUR 
-    enum: SEEK_END 
-    int      fseek  (FILE *stream, long int offset, int whence) 
-    void     rewind (FILE *stream) 
-    long int ftell  (FILE *stream) 
- 
-    ctypedef struct fpos_t 
-    ctypedef const fpos_t const_fpos_t "const fpos_t" 
-    int fgetpos (FILE *stream, fpos_t *position) 
-    int fsetpos (FILE *stream, const fpos_t *position) 
- 
-    int scanf    (const char *template, ...) 
-    int sscanf   (const char *s, const char *template, ...) 
-    int fscanf   (FILE *stream, const char *template, ...) 
- 
-    int printf   (const char *template, ...) 
-    int sprintf  (char *s, const char *template, ...) 
-    int snprintf (char *s, size_t size, const char *template, ...) 
-    int fprintf  (FILE *stream, const char *template, ...) 
- 
-    void perror  (const char *message) 
- 
-    char *gets  (char *s) 
-    char *fgets (char *s, int count, FILE *stream) 
-    int getchar () 
-    int fgetc   (FILE *stream) 
-    int getc    (FILE *stream) 
-    int ungetc  (int c, FILE *stream) 
- 
-    int puts    (const char *s) 
-    int fputs   (const char *s, FILE *stream) 
-    int putchar (int c) 
-    int fputc   (int c, FILE *stream) 
-    int putc    (int c, FILE *stream) 
- 
-    size_t getline(char **lineptr, size_t *n, FILE *stream) 
+
+    ctypedef struct FILE
+    cdef FILE *stdin
+    cdef FILE *stdout
+    cdef FILE *stderr
+
+    enum: FOPEN_MAX
+    enum: FILENAME_MAX
+    FILE *fopen   (const char *filename, const char  *opentype)
+    FILE *freopen (const char *filename, const char *opentype, FILE *stream)
+    FILE *fdopen  (int fdescriptor, const char *opentype)
+    int  fclose   (FILE *stream)
+    int  remove   (const char *filename)
+    int  rename   (const char *oldname, const char *newname)
+    FILE *tmpfile ()
+
+    int remove (const char *pathname)
+    int rename (const char *oldpath, const char *newpath)
+
+    enum: _IOFBF
+    enum: _IOLBF
+    enum: _IONBF
+    int setvbuf (FILE *stream, char *buf, int mode, size_t size)
+    enum: BUFSIZ
+    void setbuf (FILE *stream, char *buf)
+
+    size_t fread  (void *data, size_t size, size_t count, FILE *stream)
+    size_t fwrite (const void *data, size_t size, size_t count, FILE *stream)
+    int    fflush (FILE *stream)
+
+    enum: EOF
+    void clearerr (FILE *stream)
+    int feof      (FILE *stream)
+    int ferror    (FILE *stream)
+
+    enum: SEEK_SET
+    enum: SEEK_CUR
+    enum: SEEK_END
+    int      fseek  (FILE *stream, long int offset, int whence)
+    void     rewind (FILE *stream)
+    long int ftell  (FILE *stream)
+
+    ctypedef struct fpos_t
+    ctypedef const fpos_t const_fpos_t "const fpos_t"
+    int fgetpos (FILE *stream, fpos_t *position)
+    int fsetpos (FILE *stream, const fpos_t *position)
+
+    int scanf    (const char *template, ...)
+    int sscanf   (const char *s, const char *template, ...)
+    int fscanf   (FILE *stream, const char *template, ...)
+
+    int printf   (const char *template, ...)
+    int sprintf  (char *s, const char *template, ...)
+    int snprintf (char *s, size_t size, const char *template, ...)
+    int fprintf  (FILE *stream, const char *template, ...)
+
+    void perror  (const char *message)
+
+    char *gets  (char *s)
+    char *fgets (char *s, int count, FILE *stream)
+    int getchar ()
+    int fgetc   (FILE *stream)
+    int getc    (FILE *stream)
+    int ungetc  (int c, FILE *stream)
+
+    int puts    (const char *s)
+    int fputs   (const char *s, FILE *stream)
+    int putchar (int c)
+    int fputc   (int c, FILE *stream)
+    int putc    (int c, FILE *stream)
+
+    size_t getline(char **lineptr, size_t *n, FILE *stream)
diff --git a/contrib/tools/cython/Cython/Includes/libc/stdlib.pxd b/contrib/tools/cython/Cython/Includes/libc/stdlib.pxd
index 9cb0d0320e..e6fac821c7 100644
--- a/contrib/tools/cython/Cython/Includes/libc/stdlib.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/stdlib.pxd
@@ -1,72 +1,72 @@
-# 7.20 General utilities <stdlib.h> 
- 
-# deprecated cimports for backwards compatibility: 
-from libc.string cimport const_char, const_void 
- 
- 
+# 7.20 General utilities <stdlib.h>
+
+# deprecated cimports for backwards compatibility:
+from libc.string cimport const_char, const_void
+
+
 cdef extern from "<stdlib.h>" nogil:
- 
-    # 7.20.1 Numeric conversion functions 
-    int atoi (const char *string) 
-    long atol (const char *string) 
-    long long atoll (const char *string) 
-    double atof (const char *string) 
-    long strtol (const char *string, char **tailptr, int base) 
-    unsigned long int strtoul (const char *string, char **tailptr, int base) 
-    long long int strtoll (const char *string, char **tailptr, int base) 
-    unsigned long long int strtoull (const char *string, char **tailptr, int base) 
-    float strtof (const char *string, char **tailptr) 
-    double strtod (const char *string, char **tailptr) 
-    long double strtold (const char *string, char **tailptr) 
- 
-    # 7.20.2 Pseudo-random sequence generation functions 
-    enum: RAND_MAX 
-    int rand () 
-    void srand (unsigned int seed) 
- 
-    # 7.20.3 Memory management functions 
-    void *calloc (size_t count, size_t eltsize) 
-    void free (void *ptr) 
-    void *malloc (size_t size) 
-    void *realloc (void *ptr, size_t newsize) 
- 
-    # 7.20.4 Communication with the environment 
-    enum: EXIT_FAILURE 
-    enum: EXIT_SUCCESS 
-    void exit (int status) 
-    void _exit (int status) 
-    int atexit (void (*function) ()) 
-    void abort () 
-    char *getenv (const char *name) 
-    int system (const char *command) 
- 
-    #7.20.5 Searching and sorting utilities 
-    void *bsearch (const void *key, const void *array, 
-                   size_t count, size_t size, 
-                   int (*compare)(const void *, const void *)) 
-    void qsort (void *array, size_t count, size_t size, 
-                int (*compare)(const void *, const void *)) 
- 
-    # 7.20.6 Integer arithmetic functions 
-    int abs (int number) 
-    long int labs (long int number) 
-    long long int llabs (long long int number) 
-    ctypedef struct div_t: 
-        int quot 
-        int rem 
-    div_t div (int numerator, int denominator) 
-    ctypedef struct ldiv_t: 
-        long int quot 
-        long int rem 
-    ldiv_t ldiv (long int numerator, long int denominator) 
-    ctypedef struct lldiv_t: 
-        long long int quot 
-        long long int rem 
-    lldiv_t lldiv (long long int numerator, long long int denominator) 
- 
- 
-    # 7.20.7 Multibyte/wide character conversion functions 
-    # XXX TODO 
- 
-    # 7.20.8 Multibyte/wide string conversion functions 
-    # XXX TODO 
+
+    # 7.20.1 Numeric conversion functions
+    int atoi (const char *string)
+    long atol (const char *string)
+    long long atoll (const char *string)
+    double atof (const char *string)
+    long strtol (const char *string, char **tailptr, int base)
+    unsigned long int strtoul (const char *string, char **tailptr, int base)
+    long long int strtoll (const char *string, char **tailptr, int base)
+    unsigned long long int strtoull (const char *string, char **tailptr, int base)
+    float strtof (const char *string, char **tailptr)
+    double strtod (const char *string, char **tailptr)
+    long double strtold (const char *string, char **tailptr)
+
+    # 7.20.2 Pseudo-random sequence generation functions
+    enum: RAND_MAX
+    int rand ()
+    void srand (unsigned int seed)
+
+    # 7.20.3 Memory management functions
+    void *calloc (size_t count, size_t eltsize)
+    void free (void *ptr)
+    void *malloc (size_t size)
+    void *realloc (void *ptr, size_t newsize)
+
+    # 7.20.4 Communication with the environment
+    enum: EXIT_FAILURE
+    enum: EXIT_SUCCESS
+    void exit (int status)
+    void _exit (int status)
+    int atexit (void (*function) ())
+    void abort ()
+    char *getenv (const char *name)
+    int system (const char *command)
+
+    #7.20.5 Searching and sorting utilities
+    void *bsearch (const void *key, const void *array,
+                   size_t count, size_t size,
+                   int (*compare)(const void *, const void *))
+    void qsort (void *array, size_t count, size_t size,
+                int (*compare)(const void *, const void *))
+
+    # 7.20.6 Integer arithmetic functions
+    int abs (int number)
+    long int labs (long int number)
+    long long int llabs (long long int number)
+    ctypedef struct div_t:
+        int quot
+        int rem
+    div_t div (int numerator, int denominator)
+    ctypedef struct ldiv_t:
+        long int quot
+        long int rem
+    ldiv_t ldiv (long int numerator, long int denominator)
+    ctypedef struct lldiv_t:
+        long long int quot
+        long long int rem
+    lldiv_t lldiv (long long int numerator, long long int denominator)
+
+
+    # 7.20.7 Multibyte/wide character conversion functions
+    # XXX TODO
+
+    # 7.20.8 Multibyte/wide string conversion functions
+    # XXX TODO
diff --git a/contrib/tools/cython/Cython/Includes/libc/string.pxd b/contrib/tools/cython/Cython/Includes/libc/string.pxd
index d7248fdb6e..e6d96183f2 100644
--- a/contrib/tools/cython/Cython/Includes/libc/string.pxd
+++ b/contrib/tools/cython/Cython/Includes/libc/string.pxd
@@ -1,50 +1,50 @@
-# 7.21 String handling <string.h> 
- 
-cdef extern from *: 
-    # deprecated backwards compatibility declarations 
-    ctypedef const char const_char "const char" 
-    ctypedef const signed char const_schar "const signed char" 
-    ctypedef const unsigned char const_uchar "const unsigned char" 
-    ctypedef const void const_void "const void" 
- 
+# 7.21 String handling <string.h>
+
+cdef extern from *:
+    # deprecated backwards compatibility declarations
+    ctypedef const char const_char "const char"
+    ctypedef const signed char const_schar "const signed char"
+    ctypedef const unsigned char const_uchar "const unsigned char"
+    ctypedef const void const_void "const void"
+
 cdef extern from "<string.h>" nogil:
- 
-    void *memcpy  (void *pto, const void *pfrom, size_t size) 
-    void *memmove (void *pto, const void *pfrom, size_t size) 
-    void *memset  (void *block, int c, size_t size) 
-    int  memcmp   (const void *a1, const void *a2, size_t size) 
-    void *memchr  (const void *block, int c, size_t size) 
- 
-    void *memchr  (const void *block, int c, size_t size) 
-    void *memrchr (const void *block, int c, size_t size) 
- 
-    size_t strlen   (const char *s) 
-    char   *strcpy  (char *pto, const char *pfrom) 
-    char   *strncpy (char *pto, const char *pfrom, size_t size) 
-    char   *strdup  (const char *s) 
-    char   *strndup (const char *s, size_t size) 
-    char   *strcat  (char *pto, const char *pfrom) 
-    char   *strncat (char *pto, const char *pfrom, size_t size) 
- 
-    int strcmp (const char *s1, const char *s2) 
-    int strcasecmp (const char *s1, const char *s2) 
-    int strncmp (const char *s1, const char *s2, size_t size) 
-    int strncasecmp (const char *s1, const char *s2, size_t n) 
- 
-    int    strcoll (const char *s1, const char *s2) 
-    size_t strxfrm (char *pto, const char *pfrom, size_t size) 
- 
-    char *strerror (int errnum) 
- 
-    char *strchr  (const char *string, int c) 
-    char *strrchr (const char *string, int c) 
- 
-    char *strstr     (const char *haystack, const char *needle) 
-    char *strcasestr (const char *haystack, const char *needle) 
- 
-    size_t strcspn (const char *string, const char *stopset) 
-    size_t strspn  (const char *string, const char *set) 
-    char * strpbrk (const char *string, const char *stopset) 
- 
-    char *strtok (char *newstring, const char *delimiters) 
-    char *strsep (char **string_ptr, const char *delimiter) 
+
+    void *memcpy  (void *pto, const void *pfrom, size_t size)
+    void *memmove (void *pto, const void *pfrom, size_t size)
+    void *memset  (void *block, int c, size_t size)
+    int  memcmp   (const void *a1, const void *a2, size_t size)
+    void *memchr  (const void *block, int c, size_t size)
+
+    void *memchr  (const void *block, int c, size_t size)
+    void *memrchr (const void *block, int c, size_t size)
+
+    size_t strlen   (const char *s)
+    char   *strcpy  (char *pto, const char *pfrom)
+    char   *strncpy (char *pto, const char *pfrom, size_t size)
+    char   *strdup  (const char *s)
+    char   *strndup (const char *s, size_t size)
+    char   *strcat  (char *pto, const char *pfrom)
+    char   *strncat (char *pto, const char *pfrom, size_t size)
+
+    int strcmp (const char *s1, const char *s2)
+    int strcasecmp (const char *s1, const char *s2)
+    int strncmp (const char *s1, const char *s2, size_t size)
+    int strncasecmp (const char *s1, const char *s2, size_t n)
+
+    int    strcoll (const char *s1, const char *s2)
+    size_t strxfrm (char *pto, const char *pfrom, size_t size)
+
+    char *strerror (int errnum)
+
+    char *strchr  (const char *string, int c)
+    char *strrchr (const char *string, int c)
+
+    char *strstr     (const char *haystack, const char *needle)
+    char *strcasestr (const char *haystack, const char *needle)
+
+    size_t strcspn (const char *string, const char *stopset)
+    size_t strspn  (const char *string, const char *set)
+    char * strpbrk (const char *string, const char *stopset)
+
+    char *strtok (char *newstring, const char *delimiters)
+    char *strsep (char **string_ptr, const char *delimiter)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/__init__.pxd b/contrib/tools/cython/Cython/Includes/libcpp/__init__.pxd
index 8ccd1ae6d9..111ea25c2f 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/__init__.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/__init__.pxd
@@ -1,4 +1,4 @@
-cdef extern from *: 
-    ctypedef bint bool 
+cdef extern from *:
+    ctypedef bint bool
     ctypedef void* nullptr_t
     nullptr_t nullptr
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/algorithm.pxd b/contrib/tools/cython/Cython/Includes/libcpp/algorithm.pxd
index e7649cf9be..ec7c3835b4 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/algorithm.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/algorithm.pxd
@@ -1,7 +1,7 @@
 from libcpp cimport bool
 
 
-cdef extern from "<algorithm>" namespace "std" nogil: 
+cdef extern from "<algorithm>" namespace "std" nogil:
     # Sorting and searching
     bool binary_search[Iter, T](Iter first, Iter last, const T& value)
     bool binary_search[Iter, T, Compare](Iter first, Iter last, const T& value,
@@ -27,17 +27,17 @@ cdef extern from "<algorithm>" namespace "std" nogil:
     Iter unique[Iter, BinaryPredicate](Iter first, Iter last, BinaryPredicate p)
 
     # Binary heaps (priority queues)
-    void make_heap[Iter](Iter first, Iter last) 
-    void make_heap[Iter, Compare](Iter first, Iter last, Compare comp) 
- 
-    void pop_heap[Iter](Iter first, Iter last) 
-    void pop_heap[Iter, Compare](Iter first, Iter last, Compare comp) 
- 
-    void push_heap[Iter](Iter first, Iter last) 
-    void push_heap[Iter, Compare](Iter first, Iter last, Compare comp) 
- 
-    void sort_heap[Iter](Iter first, Iter last) 
-    void sort_heap[Iter, Compare](Iter first, Iter last, Compare comp) 
+    void make_heap[Iter](Iter first, Iter last)
+    void make_heap[Iter, Compare](Iter first, Iter last, Compare comp)
+
+    void pop_heap[Iter](Iter first, Iter last)
+    void pop_heap[Iter, Compare](Iter first, Iter last, Compare comp)
+
+    void push_heap[Iter](Iter first, Iter last)
+    void push_heap[Iter, Compare](Iter first, Iter last, Compare comp)
+
+    void sort_heap[Iter](Iter first, Iter last)
+    void sort_heap[Iter, Compare](Iter first, Iter last, Compare comp)
 
     # Copy
     OutputIter copy[InputIter,OutputIter](InputIter,InputIter,OutputIter)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/cast.pxd b/contrib/tools/cython/Cython/Includes/libcpp/cast.pxd
index b0ce3b9b3d..c3a4d8978f 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/cast.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/cast.pxd
@@ -1,12 +1,12 @@
-# Defines the standard C++ cast operators. 
-# 
-# Due to type restrictions, these are only defined for pointer parameters, 
-# however that is the only case where they are significantly more interesting 
-# than the standard C cast operator which can be written "<T>(expression)" in 
-# Cython. 
- 
+# Defines the standard C++ cast operators.
+#
+# Due to type restrictions, these are only defined for pointer parameters,
+# however that is the only case where they are significantly more interesting
+# than the standard C cast operator which can be written "<T>(expression)" in
+# Cython.
+
 cdef extern from * nogil:
-    cdef T dynamic_cast[T](void *) except +   # nullptr may also indicate failure 
-    cdef T static_cast[T](void *) 
-    cdef T reinterpret_cast[T](void *) 
-    cdef T const_cast[T](void *) 
+    cdef T dynamic_cast[T](void *) except +   # nullptr may also indicate failure
+    cdef T static_cast[T](void *)
+    cdef T reinterpret_cast[T](void *)
+    cdef T const_cast[T](void *)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/complex.pxd b/contrib/tools/cython/Cython/Includes/libcpp/complex.pxd
index 7ae6fd0908..c875d5e5bd 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/complex.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/complex.pxd
@@ -1,101 +1,101 @@
-# Note: add integer versions of the functions? 
- 
-cdef extern from "<complex>" namespace "std" nogil: 
-    cdef cppclass complex[T]: 
-        complex() except + 
-        complex(T, T) except + 
-        complex(complex[T]&) except + 
-        # How to make the converting constructor, i.e. convert complex[double] 
-        # to complex[float]? 
- 
-        complex[T] operator+(complex[T]&) 
-        complex[T] operator-(complex[T]&) 
-        complex[T] operator+(complex[T]&, complex[T]&) 
-        complex[T] operator+(complex[T]&, T&) 
-        complex[T] operator+(T&, complex[T]&) 
-        complex[T] operator-(complex[T]&, complex[T]&) 
-        complex[T] operator-(complex[T]&, T&) 
-        complex[T] operator-(T&, complex[T]&) 
-        complex[T] operator*(complex[T]&, complex[T]&) 
-        complex[T] operator*(complex[T]&, T&) 
-        complex[T] operator*(T&, complex[T]&) 
-        complex[T] operator/(complex[T]&, complex[T]&) 
-        complex[T] operator/(complex[T]&, T&) 
-        complex[T] operator/(T&, complex[T]&) 
- 
-        bint operator==(complex[T]&, complex[T]&) 
-        bint operator==(complex[T]&, T&) 
-        bint operator==(T&, complex[T]&) 
-        bint operator!=(complex[T]&, complex[T]&) 
-        bint operator!=(complex[T]&, T&) 
-        bint operator!=(T&, complex[T]&) 
- 
-        # Access real part 
-        T real() 
-        void real(T) 
- 
-        # Access imaginary part 
-        T imag() 
-        void imag(T) 
- 
-    # Return real part 
+# Note: add integer versions of the functions?
+
+cdef extern from "<complex>" namespace "std" nogil:
+    cdef cppclass complex[T]:
+        complex() except +
+        complex(T, T) except +
+        complex(complex[T]&) except +
+        # How to make the converting constructor, i.e. convert complex[double]
+        # to complex[float]?
+
+        complex[T] operator+(complex[T]&)
+        complex[T] operator-(complex[T]&)
+        complex[T] operator+(complex[T]&, complex[T]&)
+        complex[T] operator+(complex[T]&, T&)
+        complex[T] operator+(T&, complex[T]&)
+        complex[T] operator-(complex[T]&, complex[T]&)
+        complex[T] operator-(complex[T]&, T&)
+        complex[T] operator-(T&, complex[T]&)
+        complex[T] operator*(complex[T]&, complex[T]&)
+        complex[T] operator*(complex[T]&, T&)
+        complex[T] operator*(T&, complex[T]&)
+        complex[T] operator/(complex[T]&, complex[T]&)
+        complex[T] operator/(complex[T]&, T&)
+        complex[T] operator/(T&, complex[T]&)
+
+        bint operator==(complex[T]&, complex[T]&)
+        bint operator==(complex[T]&, T&)
+        bint operator==(T&, complex[T]&)
+        bint operator!=(complex[T]&, complex[T]&)
+        bint operator!=(complex[T]&, T&)
+        bint operator!=(T&, complex[T]&)
+
+        # Access real part
+        T real()
+        void real(T)
+
+        # Access imaginary part
+        T imag()
+        void imag(T)
+
+    # Return real part
     T real[T](complex[T]&)
-    long double real(long double) 
-    double real(double) 
-    float real(float) 
- 
-    # Return imaginary part 
+    long double real(long double)
+    double real(double)
+    float real(float)
+
+    # Return imaginary part
     T imag[T](complex[T]&)
-    long double imag(long double) 
-    double imag(double) 
-    float imag(float) 
- 
+    long double imag(long double)
+    double imag(double)
+    float imag(float)
+
     T abs[T](complex[T]&)
     T arg[T](complex[T]&)
-    long double arg(long double) 
-    double arg(double) 
-    float arg(float) 
- 
+    long double arg(long double)
+    double arg(double)
+    float arg(float)
+
     T norm[T](complex[T])
-    long double norm(long double) 
-    double norm(double) 
-    float norm(float) 
- 
+    long double norm(long double)
+    double norm(double)
+    float norm(float)
+
     complex[T] conj[T](complex[T]&)
-    complex[long double] conj(long double) 
-    complex[double] conj(double) 
-    complex[float] conj(float) 
- 
+    complex[long double] conj(long double)
+    complex[double] conj(double)
+    complex[float] conj(float)
+
     complex[T] proj[T](complex[T])
-    complex[long double] proj(long double) 
-    complex[double] proj(double) 
-    complex[float] proj(float) 
- 
+    complex[long double] proj(long double)
+    complex[double] proj(double)
+    complex[float] proj(float)
+
     complex[T] polar[T](T&, T&)
     complex[T] ploar[T](T&)
- 
+
     complex[T] exp[T](complex[T]&)
     complex[T] log[T](complex[T]&)
     complex[T] log10[T](complex[T]&)
- 
+
     complex[T] pow[T](complex[T]&, complex[T]&)
     complex[T] pow[T](complex[T]&, T&)
     complex[T] pow[T](T&, complex[T]&)
-    # There are some promotion versions too 
- 
+    # There are some promotion versions too
+
     complex[T] sqrt[T](complex[T]&)
- 
+
     complex[T] sin[T](complex[T]&)
     complex[T] cos[T](complex[T]&)
     complex[T] tan[T](complex[T]&)
     complex[T] asin[T](complex[T]&)
     complex[T] acos[T](complex[T]&)
     complex[T] atan[T](complex[T]&)
- 
+
     complex[T] sinh[T](complex[T]&)
     complex[T] cosh[T](complex[T]&)
     complex[T] tanh[T](complex[T]&)
- 
+
     complex[T] asinh[T](complex[T]&)
     complex[T] acosh[T](complex[T]&)
     complex[T] atanh[T](complex[T]&)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/deque.pxd b/contrib/tools/cython/Cython/Includes/libcpp/deque.pxd
index c1374cd2f4..9e2b2291d0 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/deque.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/deque.pxd
@@ -1,4 +1,4 @@
-cdef extern from "<deque>" namespace "std" nogil: 
+cdef extern from "<deque>" namespace "std" nogil:
     cdef cppclass deque[T,ALLOCATOR=*]:
         ctypedef T value_type
         ctypedef ALLOCATOR allocator_type
@@ -9,28 +9,28 @@ cdef extern from "<deque>" namespace "std" nogil:
         ctypedef size_t size_type
         ctypedef ptrdiff_t difference_type
 
-        cppclass iterator: 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
+        cppclass iterator:
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
             iterator operator+(size_type)
             iterator operator-(size_type)
             difference_type operator-(iterator)
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
+            bint operator==(iterator)
+            bint operator!=(iterator)
             bint operator<(iterator)
             bint operator>(iterator)
             bint operator<=(iterator)
             bint operator>=(iterator)
-        cppclass reverse_iterator: 
-            T& operator*() 
+        cppclass reverse_iterator:
+            T& operator*()
             reverse_iterator operator++()
             reverse_iterator operator--()
             reverse_iterator operator+(size_type)
             reverse_iterator operator-(size_type)
             difference_type operator-(reverse_iterator)
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
             bint operator<(reverse_iterator)
             bint operator>(reverse_iterator)
             bint operator<=(reverse_iterator)
@@ -39,48 +39,48 @@ cdef extern from "<deque>" namespace "std" nogil:
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        deque() except + 
-        deque(deque&) except + 
-        deque(size_t) except + 
-        deque(size_t, T&) except + 
-        #deque[input_iterator](input_iterator, input_iterator) 
-        T& operator[](size_t) 
-        #deque& operator=(deque&) 
-        bint operator==(deque&, deque&) 
-        bint operator!=(deque&, deque&) 
-        bint operator<(deque&, deque&) 
-        bint operator>(deque&, deque&) 
-        bint operator<=(deque&, deque&) 
-        bint operator>=(deque&, deque&) 
-        void assign(size_t, T&) 
-        void assign(input_iterator, input_iterator) 
-        T& at(size_t) 
-        T& back() 
-        iterator begin() 
+        deque() except +
+        deque(deque&) except +
+        deque(size_t) except +
+        deque(size_t, T&) except +
+        #deque[input_iterator](input_iterator, input_iterator)
+        T& operator[](size_t)
+        #deque& operator=(deque&)
+        bint operator==(deque&, deque&)
+        bint operator!=(deque&, deque&)
+        bint operator<(deque&, deque&)
+        bint operator>(deque&, deque&)
+        bint operator<=(deque&, deque&)
+        bint operator>=(deque&, deque&)
+        void assign(size_t, T&)
+        void assign(input_iterator, input_iterator)
+        T& at(size_t)
+        T& back()
+        iterator begin()
         const_iterator const_begin "begin"()
-        void clear() 
-        bint empty() 
-        iterator end() 
+        void clear()
+        bint empty()
+        iterator end()
         const_iterator const_end "end"()
-        iterator erase(iterator) 
-        iterator erase(iterator, iterator) 
-        T& front() 
-        iterator insert(iterator, T&) 
-        void insert(iterator, size_t, T&) 
-        void insert(iterator, input_iterator, input_iterator) 
-        size_t max_size() 
-        void pop_back() 
-        void pop_front() 
-        void push_back(T&) 
-        void push_front(T&) 
-        reverse_iterator rbegin() 
-        #const_reverse_iterator rbegin() 
-        reverse_iterator rend() 
-        #const_reverse_iterator rend() 
-        void resize(size_t) 
-        void resize(size_t, T&) 
-        size_t size() 
-        void swap(deque&) 
+        iterator erase(iterator)
+        iterator erase(iterator, iterator)
+        T& front()
+        iterator insert(iterator, T&)
+        void insert(iterator, size_t, T&)
+        void insert(iterator, input_iterator, input_iterator)
+        size_t max_size()
+        void pop_back()
+        void pop_front()
+        void push_back(T&)
+        void push_front(T&)
+        reverse_iterator rbegin()
+        #const_reverse_iterator rbegin()
+        reverse_iterator rend()
+        #const_reverse_iterator rend()
+        void resize(size_t)
+        void resize(size_t, T&)
+        size_t size()
+        void swap(deque&)
 
         # C++11 methods
         void shrink_to_fit()
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/list.pxd b/contrib/tools/cython/Cython/Includes/libcpp/list.pxd
index ebbe39d487..b5b0410ad8 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/list.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/list.pxd
@@ -1,4 +1,4 @@
-cdef extern from "<list>" namespace "std" nogil: 
+cdef extern from "<list>" namespace "std" nogil:
     cdef cppclass list[T,ALLOCATOR=*]:
         ctypedef T value_type
         ctypedef ALLOCATOR allocator_type
@@ -9,70 +9,70 @@ cdef extern from "<list>" namespace "std" nogil:
         ctypedef size_t size_type
         ctypedef ptrdiff_t difference_type
 
-        cppclass iterator: 
-            iterator() 
-            iterator(iterator &) 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
-        cppclass reverse_iterator: 
-            reverse_iterator() 
-            reverse_iterator(iterator &) 
-            T& operator*() 
-            reverse_iterator operator++() 
-            reverse_iterator operator--() 
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
+        cppclass iterator:
+            iterator()
+            iterator(iterator &)
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(iterator)
+            bint operator!=(iterator)
+        cppclass reverse_iterator:
+            reverse_iterator()
+            reverse_iterator(iterator &)
+            T& operator*()
+            reverse_iterator operator++()
+            reverse_iterator operator--()
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
         cppclass const_iterator(iterator):
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        list() except + 
-        list(list&) except + 
-        list(size_t, T&) except + 
-        #list operator=(list&) 
-        bint operator==(list&, list&) 
-        bint operator!=(list&, list&) 
-        bint operator<(list&, list&) 
-        bint operator>(list&, list&) 
-        bint operator<=(list&, list&) 
-        bint operator>=(list&, list&) 
-        void assign(size_t, T&) 
-        T& back() 
-        iterator begin() 
+        list() except +
+        list(list&) except +
+        list(size_t, T&) except +
+        #list operator=(list&)
+        bint operator==(list&, list&)
+        bint operator!=(list&, list&)
+        bint operator<(list&, list&)
+        bint operator>(list&, list&)
+        bint operator<=(list&, list&)
+        bint operator>=(list&, list&)
+        void assign(size_t, T&)
+        T& back()
+        iterator begin()
         const_iterator const_begin "begin"()
-        void clear() 
-        bint empty() 
-        iterator end() 
+        void clear()
+        bint empty()
+        iterator end()
         const_iterator const_end "end"()
-        iterator erase(iterator) 
-        iterator erase(iterator, iterator) 
-        T& front() 
-        iterator insert(iterator, T&) 
-        void insert(iterator, size_t, T&) 
-        size_t max_size() 
-        void merge(list&) 
-        #void merge(list&, BinPred) 
-        void pop_back() 
-        void pop_front() 
-        void push_back(T&) 
-        void push_front(T&) 
-        reverse_iterator rbegin() 
+        iterator erase(iterator)
+        iterator erase(iterator, iterator)
+        T& front()
+        iterator insert(iterator, T&)
+        void insert(iterator, size_t, T&)
+        size_t max_size()
+        void merge(list&)
+        #void merge(list&, BinPred)
+        void pop_back()
+        void pop_front()
+        void push_back(T&)
+        void push_front(T&)
+        reverse_iterator rbegin()
         const_reverse_iterator const_rbegin "rbegin"()
-        void remove(T&) 
-        #void remove_if(UnPred) 
-        reverse_iterator rend() 
+        void remove(T&)
+        #void remove_if(UnPred)
+        reverse_iterator rend()
         const_reverse_iterator const_rend "rend"()
-        void resize(size_t, T&) 
-        void reverse() 
-        size_t size() 
-        void sort() 
-        #void sort(BinPred) 
-        void splice(iterator, list&) 
-        void splice(iterator, list&, iterator) 
-        void splice(iterator, list&, iterator, iterator) 
-        void swap(list&) 
-        void unique() 
-        #void unique(BinPred) 
+        void resize(size_t, T&)
+        void reverse()
+        size_t size()
+        void sort()
+        #void sort(BinPred)
+        void splice(iterator, list&)
+        void splice(iterator, list&, iterator)
+        void splice(iterator, list&, iterator, iterator)
+        void swap(list&)
+        void unique()
+        #void unique(BinPred)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/map.pxd b/contrib/tools/cython/Cython/Includes/libcpp/map.pxd
index 167d467848..624a7ac026 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/map.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/map.pxd
@@ -1,68 +1,68 @@
-from .utility cimport pair 
- 
-cdef extern from "<map>" namespace "std" nogil: 
+from .utility cimport pair
+
+cdef extern from "<map>" namespace "std" nogil:
     cdef cppclass map[T, U, COMPARE=*, ALLOCATOR=*]:
         ctypedef T key_type
         ctypedef U mapped_type
         ctypedef pair[const T, U] value_type
         ctypedef COMPARE key_compare
         ctypedef ALLOCATOR allocator_type
-        cppclass iterator: 
-            pair[T, U]& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
-        cppclass reverse_iterator: 
-            pair[T, U]& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
+        cppclass iterator:
+            pair[T, U]& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(iterator)
+            bint operator!=(iterator)
+        cppclass reverse_iterator:
+            pair[T, U]& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
         cppclass const_iterator(iterator):
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        map() except + 
-        map(map&) except + 
-        #map(key_compare&) 
-        U& operator[](T&) 
-        #map& operator=(map&) 
-        bint operator==(map&, map&) 
-        bint operator!=(map&, map&) 
-        bint operator<(map&, map&) 
-        bint operator>(map&, map&) 
-        bint operator<=(map&, map&) 
-        bint operator>=(map&, map&) 
+        map() except +
+        map(map&) except +
+        #map(key_compare&)
+        U& operator[](T&)
+        #map& operator=(map&)
+        bint operator==(map&, map&)
+        bint operator!=(map&, map&)
+        bint operator<(map&, map&)
+        bint operator>(map&, map&)
+        bint operator<=(map&, map&)
+        bint operator>=(map&, map&)
         U& at(const T&) except +
         const U& const_at "at"(const T&) except +
-        iterator begin() 
-        const_iterator const_begin "begin" () 
-        void clear() 
+        iterator begin()
+        const_iterator const_begin "begin" ()
+        void clear()
         size_t count(const T&)
-        bint empty() 
-        iterator end() 
-        const_iterator const_end "end" () 
+        bint empty()
+        iterator end()
+        const_iterator const_end "end" ()
         pair[iterator, iterator] equal_range(const T&)
-        #pair[const_iterator, const_iterator] equal_range(key_type&) 
-        void erase(iterator) 
-        void erase(iterator, iterator) 
+        #pair[const_iterator, const_iterator] equal_range(key_type&)
+        void erase(iterator)
+        void erase(iterator, iterator)
         size_t erase(const T&)
         iterator find(const T&)
         const_iterator const_find "find" (const T&)
         pair[iterator, bint] insert(pair[T, U]) except + # XXX pair[T,U]&
         iterator insert(iterator, pair[T, U]) except + # XXX pair[T,U]&
-        #void insert(input_iterator, input_iterator) 
-        #key_compare key_comp() 
+        #void insert(input_iterator, input_iterator)
+        #key_compare key_comp()
         iterator lower_bound(const T&)
         const_iterator const_lower_bound "lower_bound"(const T&)
-        size_t max_size() 
-        reverse_iterator rbegin() 
+        size_t max_size()
+        reverse_iterator rbegin()
         const_reverse_iterator const_rbegin "rbegin"()
-        reverse_iterator rend() 
+        reverse_iterator rend()
         const_reverse_iterator const_rend "rend"()
-        size_t size() 
-        void swap(map&) 
+        size_t size()
+        void swap(map&)
         iterator upper_bound(const T&)
         const_iterator const_upper_bound "upper_bound"(const T&)
-        #value_compare value_comp() 
+        #value_compare value_comp()
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/pair.pxd b/contrib/tools/cython/Cython/Includes/libcpp/pair.pxd
index 76a21aba91..869fe6674d 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/pair.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/pair.pxd
@@ -1 +1 @@
-from .utility cimport pair 
+from .utility cimport pair
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/queue.pxd b/contrib/tools/cython/Cython/Includes/libcpp/queue.pxd
index 1b4adbbed6..578cbd9159 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/queue.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/queue.pxd
@@ -1,25 +1,25 @@
-cdef extern from "<queue>" namespace "std" nogil: 
-    cdef cppclass queue[T]: 
-        queue() except + 
-        queue(queue&) except + 
-        #queue(Container&) 
-        T& back() 
-        bint empty() 
-        T& front() 
-        void pop() 
-        void push(T&) 
-        size_t size() 
+cdef extern from "<queue>" namespace "std" nogil:
+    cdef cppclass queue[T]:
+        queue() except +
+        queue(queue&) except +
+        #queue(Container&)
+        T& back()
+        bint empty()
+        T& front()
+        void pop()
+        void push(T&)
+        size_t size()
         # C++11 methods
         void swap(queue&)
 
-    cdef cppclass priority_queue[T]: 
-        priority_queue() except + 
-        priority_queue(priority_queue&) except + 
-        #priority_queue(Container&) 
-        bint empty() 
-        void pop() 
-        void push(T&) 
-        size_t size() 
-        T& top() 
+    cdef cppclass priority_queue[T]:
+        priority_queue() except +
+        priority_queue(priority_queue&) except +
+        #priority_queue(Container&)
+        bint empty()
+        void pop()
+        void push(T&)
+        size_t size()
+        T& top()
         # C++11 methods
         void swap(priority_queue&)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/set.pxd b/contrib/tools/cython/Cython/Includes/libcpp/set.pxd
index 9d9e0ca66d..1069be7466 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/set.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/set.pxd
@@ -1,61 +1,61 @@
-from .utility cimport pair 
- 
-cdef extern from "<set>" namespace "std" nogil: 
-    cdef cppclass set[T]: 
+from .utility cimport pair
+
+cdef extern from "<set>" namespace "std" nogil:
+    cdef cppclass set[T]:
         ctypedef T value_type
-        cppclass iterator: 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
-        cppclass reverse_iterator: 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
+        cppclass iterator:
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(iterator)
+            bint operator!=(iterator)
+        cppclass reverse_iterator:
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
         cppclass const_iterator(iterator):
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        set() except + 
-        set(set&) except + 
-        #set(key_compare&) 
-        #set& operator=(set&) 
-        bint operator==(set&, set&) 
-        bint operator!=(set&, set&) 
-        bint operator<(set&, set&) 
-        bint operator>(set&, set&) 
-        bint operator<=(set&, set&) 
-        bint operator>=(set&, set&) 
-        iterator begin() 
+        set() except +
+        set(set&) except +
+        #set(key_compare&)
+        #set& operator=(set&)
+        bint operator==(set&, set&)
+        bint operator!=(set&, set&)
+        bint operator<(set&, set&)
+        bint operator>(set&, set&)
+        bint operator<=(set&, set&)
+        bint operator>=(set&, set&)
+        iterator begin()
         const_iterator const_begin "begin"()
-        void clear() 
+        void clear()
         size_t count(const T&)
-        bint empty() 
-        iterator end() 
+        bint empty()
+        iterator end()
         const_iterator const_end "end"()
         pair[iterator, iterator] equal_range(const T&)
-        #pair[const_iterator, const_iterator] equal_range(T&) 
+        #pair[const_iterator, const_iterator] equal_range(T&)
         iterator erase(iterator)
         iterator erase(iterator, iterator)
-        size_t erase(T&) 
-        iterator find(T&) 
+        size_t erase(T&)
+        iterator find(T&)
         const_iterator const_find "find"(T&)
         pair[iterator, bint] insert(const T&) except +
         iterator insert(iterator, const T&) except +
         void insert(iterator, iterator) except +
-        #key_compare key_comp() 
-        iterator lower_bound(T&) 
+        #key_compare key_comp()
+        iterator lower_bound(T&)
         const_iterator const_lower_bound "lower_bound"(T&)
-        size_t max_size() 
-        reverse_iterator rbegin() 
+        size_t max_size()
+        reverse_iterator rbegin()
         const_reverse_iterator const_rbegin "rbegin"()
-        reverse_iterator rend() 
+        reverse_iterator rend()
         const_reverse_iterator const_rend "rend"()
-        size_t size() 
-        void swap(set&) 
+        size_t size()
+        void swap(set&)
         iterator upper_bound(const T&)
         const_iterator const_upper_bound "upper_bound"(const T&)
-        #value_compare value_comp() 
+        #value_compare value_comp()
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/stack.pxd b/contrib/tools/cython/Cython/Includes/libcpp/stack.pxd
index 923823e3ec..2dc80992b7 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/stack.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/stack.pxd
@@ -1,11 +1,11 @@
-cdef extern from "<stack>" namespace "std" nogil: 
-    cdef cppclass stack[T]: 
+cdef extern from "<stack>" namespace "std" nogil:
+    cdef cppclass stack[T]:
         ctypedef T value_type
-        stack() except + 
-        stack(stack&) except + 
-        #stack(Container&) 
-        bint empty() 
-        void pop() 
-        void push(T&) 
-        size_t size() 
-        T& top() 
+        stack() except +
+        stack(stack&) except +
+        #stack(Container&)
+        bint empty()
+        void pop()
+        void push(T&)
+        size_t size()
+        T& top()
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/string.pxd b/contrib/tools/cython/Cython/Includes/libcpp/string.pxd
index 5f965e8d97..a894144f1f 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/string.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/string.pxd
@@ -1,13 +1,13 @@
- 
-# deprecated cimport for backwards compatibility: 
-from libc.string cimport const_char 
- 
+
+# deprecated cimport for backwards compatibility:
+from libc.string cimport const_char
+
 cdef extern from "<string>" namespace "std::string" nogil:
     const size_t npos
- 
-cdef extern from "<string>" namespace "std" nogil: 
-    cdef cppclass string: 
- 
+
+cdef extern from "<string>" namespace "std" nogil:
+    cdef cppclass string:
+
         cppclass iterator:
             iterator()
             char& operator*()
@@ -54,19 +54,19 @@ cdef extern from "<string>" namespace "std" nogil:
         reverse_iterator rend()
         const_reverse_iterator const_rend "rend"()
 
-        const char* c_str() 
-        const char* data() 
-        size_t size() 
-        size_t max_size() 
-        size_t length() 
+        const char* c_str()
+        const char* data()
+        size_t size()
+        size_t max_size()
+        size_t length()
         void resize(size_t) except +
         void resize(size_t, char) except +
         void shrink_to_fit() except +
-        size_t capacity() 
+        size_t capacity()
         void reserve(size_t) except +
-        void clear() 
-        bint empty() 
- 
+        void clear()
+        bint empty()
+
         iterator erase(iterator first, iterator last)
         iterator erase(iterator p)
         iterator erase(const_iterator first, const_iterator last)
@@ -74,7 +74,7 @@ cdef extern from "<string>" namespace "std" nogil:
         string& erase(size_t pos, size_t len) except +
         string& erase(size_t pos) except +
         string& erase() except +
- 
+
         char& at(size_t pos) except +
         char& operator[](size_t pos)
         char& front()
@@ -85,22 +85,22 @@ cdef extern from "<string>" namespace "std" nogil:
         int compare(const char* s) except +
         int compare(size_t pos, size_t len, const char* s) except +
         int compare(size_t pos, size_t len, const char* s , size_t n) except +
- 
+
         string& append(const string& s) except +
         string& append(const string& s, size_t subpos, size_t sublen) except +
         string& append(const char* s) except +
         string& append(const char* s, size_t n) except +
         string& append(size_t n, char c) except +
- 
+
         void push_back(char c) except +
         void pop_back()
- 
+
         string& assign(const string& s) except +
         string& assign(const string& s, size_t subpos, size_t sublen) except +
         string& assign(const char* s, size_t n) except +
         string& assign(const char* s) except +
         string& assign(size_t n, char c) except +
- 
+
         string& insert(size_t pos, const string& s, size_t subpos, size_t sublen) except +
         string& insert(size_t pos, const string& s) except +
         string& insert(size_t pos, const char* s, size_t n) except +
@@ -108,10 +108,10 @@ cdef extern from "<string>" namespace "std" nogil:
         string& insert(size_t pos, size_t n, char c) except +
         void insert(iterator p, size_t n, char c) except +
         iterator insert(iterator p, char c) except +
- 
+
         size_t copy(char* s, size_t len, size_t pos) except +
         size_t copy(char* s, size_t len) except +
- 
+
         size_t find(const string& s, size_t pos)
         size_t find(const string& s)
         size_t find(const char* s, size_t pos, size_t n)
@@ -119,39 +119,39 @@ cdef extern from "<string>" namespace "std" nogil:
         size_t find(const char* s)
         size_t find(char c, size_t pos)
         size_t find(char c)
- 
+
         size_t rfind(const string&, size_t pos)
         size_t rfind(const string&)
         size_t rfind(const char* s, size_t pos, size_t n)
         size_t rfind(const char* s, size_t pos)
         size_t rfind(const char* s)
         size_t rfind(char c, size_t pos)
-        size_t rfind(char c) 
- 
+        size_t rfind(char c)
+
         size_t find_first_of(const string&, size_t pos)
         size_t find_first_of(const string&)
         size_t find_first_of(const char* s, size_t pos, size_t n)
         size_t find_first_of(const char* s, size_t pos)
         size_t find_first_of(const char* s)
         size_t find_first_of(char c, size_t pos)
-        size_t find_first_of(char c) 
- 
+        size_t find_first_of(char c)
+
         size_t find_first_not_of(const string& s, size_t pos)
         size_t find_first_not_of(const string& s)
         size_t find_first_not_of(const char* s, size_t pos, size_t n)
         size_t find_first_not_of(const char* s, size_t pos)
         size_t find_first_not_of(const char*)
         size_t find_first_not_of(char c, size_t pos)
-        size_t find_first_not_of(char c) 
- 
+        size_t find_first_not_of(char c)
+
         size_t find_last_of(const string& s, size_t pos)
         size_t find_last_of(const string& s)
         size_t find_last_of(const char* s, size_t pos, size_t n)
         size_t find_last_of(const char* s, size_t pos)
         size_t find_last_of(const char* s)
         size_t find_last_of(char c, size_t pos)
-        size_t find_last_of(char c) 
- 
+        size_t find_last_of(char c)
+
         size_t find_last_not_of(const string& s, size_t pos)
         size_t find_last_not_of(const string& s)
         size_t find_last_not_of(const char* s, size_t pos, size_t n)
@@ -159,33 +159,33 @@ cdef extern from "<string>" namespace "std" nogil:
         size_t find_last_not_of(const char* s)
         size_t find_last_not_of(char c, size_t pos)
         size_t find_last_not_of(char c)
- 
+
         string substr(size_t pos, size_t len) except +
         string substr(size_t pos) except +
-        string substr() 
- 
+        string substr()
+
         #string& operator= (const string&)
         #string& operator= (const char*)
-        #string& operator= (char) 
- 
+        #string& operator= (char)
+
         string operator+ (const string&) except +
         string operator+ (const char*) except +
- 
+
         bint operator==(const string&)
         bint operator==(const char*)
- 
+
         bint operator!= (const string&)
         bint operator!= (const char*)
- 
+
         bint operator< (const string&)
         bint operator< (const char*)
- 
+
         bint operator> (const string&)
         bint operator> (const char*)
- 
+
         bint operator<= (const string&)
         bint operator<= (const char*)
- 
+
         bint operator>= (const string&)
         bint operator>= (const char*)
 
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/unordered_map.pxd b/contrib/tools/cython/Cython/Includes/libcpp/unordered_map.pxd
index 370710abe0..a00fbbed28 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/unordered_map.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/unordered_map.pxd
@@ -1,69 +1,69 @@
-from .utility cimport pair 
- 
-cdef extern from "<unordered_map>" namespace "std" nogil: 
+from .utility cimport pair
+
+cdef extern from "<unordered_map>" namespace "std" nogil:
     cdef cppclass unordered_map[T, U, HASH=*, PRED=*, ALLOCATOR=*]:
         ctypedef T key_type
         ctypedef U mapped_type
         ctypedef pair[const T, U] value_type
-        cppclass iterator: 
-            pair[T, U]& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
-        cppclass reverse_iterator: 
-            pair[T, U]& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
+        cppclass iterator:
+            pair[T, U]& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(iterator)
+            bint operator!=(iterator)
+        cppclass reverse_iterator:
+            pair[T, U]& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
         cppclass const_iterator(iterator):
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        unordered_map() except + 
-        unordered_map(unordered_map&) except + 
-        #unordered_map(key_compare&) 
-        U& operator[](T&) 
-        #unordered_map& operator=(unordered_map&) 
-        bint operator==(unordered_map&, unordered_map&) 
-        bint operator!=(unordered_map&, unordered_map&) 
-        bint operator<(unordered_map&, unordered_map&) 
-        bint operator>(unordered_map&, unordered_map&) 
-        bint operator<=(unordered_map&, unordered_map&) 
-        bint operator>=(unordered_map&, unordered_map&) 
+        unordered_map() except +
+        unordered_map(unordered_map&) except +
+        #unordered_map(key_compare&)
+        U& operator[](T&)
+        #unordered_map& operator=(unordered_map&)
+        bint operator==(unordered_map&, unordered_map&)
+        bint operator!=(unordered_map&, unordered_map&)
+        bint operator<(unordered_map&, unordered_map&)
+        bint operator>(unordered_map&, unordered_map&)
+        bint operator<=(unordered_map&, unordered_map&)
+        bint operator>=(unordered_map&, unordered_map&)
         U& at(const T&)
         const U& const_at "at"(const T&)
-        iterator begin() 
+        iterator begin()
         const_iterator const_begin "begin"()
-        void clear() 
-        size_t count(T&) 
-        bint empty() 
-        iterator end() 
+        void clear()
+        size_t count(T&)
+        bint empty()
+        iterator end()
         const_iterator const_end "end"()
-        pair[iterator, iterator] equal_range(T&) 
+        pair[iterator, iterator] equal_range(T&)
         pair[const_iterator, const_iterator] const_equal_range "equal_range"(const T&)
         iterator erase(iterator)
         iterator erase(iterator, iterator)
-        size_t erase(T&) 
-        iterator find(T&) 
+        size_t erase(T&)
+        iterator find(T&)
         const_iterator const_find "find"(T&)
-        pair[iterator, bint] insert(pair[T, U]) # XXX pair[T,U]& 
-        iterator insert(iterator, pair[T, U]) # XXX pair[T,U]& 
+        pair[iterator, bint] insert(pair[T, U]) # XXX pair[T,U]&
+        iterator insert(iterator, pair[T, U]) # XXX pair[T,U]&
         iterator insert(iterator, iterator)
-        #key_compare key_comp() 
-        iterator lower_bound(T&) 
+        #key_compare key_comp()
+        iterator lower_bound(T&)
         const_iterator const_lower_bound "lower_bound"(T&)
-        size_t max_size() 
-        reverse_iterator rbegin() 
+        size_t max_size()
+        reverse_iterator rbegin()
         const_reverse_iterator const_rbegin "rbegin"()
-        reverse_iterator rend() 
+        reverse_iterator rend()
         const_reverse_iterator const_rend "rend"()
-        size_t size() 
-        void swap(unordered_map&) 
-        iterator upper_bound(T&) 
+        size_t size()
+        void swap(unordered_map&)
+        iterator upper_bound(T&)
         const_iterator const_upper_bound "upper_bound"(T&)
-        #value_compare value_comp() 
+        #value_compare value_comp()
         void max_load_factor(float)
         float max_load_factor()
         void rehash(size_t)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/unordered_set.pxd b/contrib/tools/cython/Cython/Includes/libcpp/unordered_set.pxd
index 9379003a45..5aa2417528 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/unordered_set.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/unordered_set.pxd
@@ -1,64 +1,64 @@
-from .utility cimport pair 
- 
-cdef extern from "<unordered_set>" namespace "std" nogil: 
+from .utility cimport pair
+
+cdef extern from "<unordered_set>" namespace "std" nogil:
     cdef cppclass unordered_set[T,HASH=*,PRED=*,ALLOCATOR=*]:
         ctypedef T value_type
-        cppclass iterator: 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
-        cppclass reverse_iterator: 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
+        cppclass iterator:
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(iterator)
+            bint operator!=(iterator)
+        cppclass reverse_iterator:
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
         cppclass const_iterator(iterator):
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        unordered_set() except + 
-        unordered_set(unordered_set&) except + 
-        #unordered_set(key_compare&) 
-        #unordered_set& operator=(unordered_set&) 
-        bint operator==(unordered_set&, unordered_set&) 
-        bint operator!=(unordered_set&, unordered_set&) 
-        bint operator<(unordered_set&, unordered_set&) 
-        bint operator>(unordered_set&, unordered_set&) 
-        bint operator<=(unordered_set&, unordered_set&) 
-        bint operator>=(unordered_set&, unordered_set&) 
-        iterator begin() 
+        unordered_set() except +
+        unordered_set(unordered_set&) except +
+        #unordered_set(key_compare&)
+        #unordered_set& operator=(unordered_set&)
+        bint operator==(unordered_set&, unordered_set&)
+        bint operator!=(unordered_set&, unordered_set&)
+        bint operator<(unordered_set&, unordered_set&)
+        bint operator>(unordered_set&, unordered_set&)
+        bint operator<=(unordered_set&, unordered_set&)
+        bint operator>=(unordered_set&, unordered_set&)
+        iterator begin()
         const_iterator const_begin "begin"()
-        void clear() 
-        size_t count(T&) 
-        bint empty() 
-        iterator end() 
+        void clear()
+        size_t count(T&)
+        bint empty()
+        iterator end()
         const_iterator const_end "end"()
-        pair[iterator, iterator] equal_range(T&) 
+        pair[iterator, iterator] equal_range(T&)
         pair[const_iterator, const_iterator] const_equal_range "equal_range"(T&)
         iterator erase(iterator)
         iterator erase(iterator, iterator)
-        size_t erase(T&) 
-        iterator find(T&) 
+        size_t erase(T&)
+        iterator find(T&)
         const_iterator const_find "find"(T&)
-        pair[iterator, bint] insert(T&) 
-        iterator insert(iterator, T&) 
-        #key_compare key_comp() 
+        pair[iterator, bint] insert(T&)
+        iterator insert(iterator, T&)
+        #key_compare key_comp()
         iterator insert(iterator, iterator)
-        iterator lower_bound(T&) 
+        iterator lower_bound(T&)
         const_iterator const_lower_bound "lower_bound"(T&)
-        size_t max_size() 
-        reverse_iterator rbegin() 
+        size_t max_size()
+        reverse_iterator rbegin()
         const_reverse_iterator const_rbegin "rbegin"()
-        reverse_iterator rend() 
+        reverse_iterator rend()
         const_reverse_iterator const_rend "rend"()
-        size_t size() 
-        void swap(unordered_set&) 
-        iterator upper_bound(T&) 
+        size_t size()
+        void swap(unordered_set&)
+        iterator upper_bound(T&)
         const_iterator const_upper_bound "upper_bound"(T&)
-        #value_compare value_comp() 
+        #value_compare value_comp()
         void max_load_factor(float)
         float max_load_factor()
         void rehash(size_t)
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/utility.pxd b/contrib/tools/cython/Cython/Includes/libcpp/utility.pxd
index 82bd11e42f..e0df69b166 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/utility.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/utility.pxd
@@ -1,18 +1,18 @@
-cdef extern from "<utility>" namespace "std" nogil: 
-    cdef cppclass pair[T, U]: 
+cdef extern from "<utility>" namespace "std" nogil:
+    cdef cppclass pair[T, U]:
         ctypedef T first_type
         ctypedef U second_type
-        T first 
-        U second 
-        pair() except + 
-        pair(pair&) except + 
-        pair(T&, U&) except + 
-        bint operator==(pair&, pair&) 
-        bint operator!=(pair&, pair&) 
-        bint operator<(pair&, pair&) 
-        bint operator>(pair&, pair&) 
-        bint operator<=(pair&, pair&) 
-        bint operator>=(pair&, pair&) 
+        T first
+        U second
+        pair() except +
+        pair(pair&) except +
+        pair(T&, U&) except +
+        bint operator==(pair&, pair&)
+        bint operator!=(pair&, pair&)
+        bint operator<(pair&, pair&)
+        bint operator>(pair&, pair&)
+        bint operator<=(pair&, pair&)
+        bint operator>=(pair&, pair&)
 
 cdef extern from * namespace "cython_std" nogil:
     """
diff --git a/contrib/tools/cython/Cython/Includes/libcpp/vector.pxd b/contrib/tools/cython/Cython/Includes/libcpp/vector.pxd
index 1ec22706b2..9b007dd0c7 100644
--- a/contrib/tools/cython/Cython/Includes/libcpp/vector.pxd
+++ b/contrib/tools/cython/Cython/Includes/libcpp/vector.pxd
@@ -1,4 +1,4 @@
-cdef extern from "<vector>" namespace "std" nogil: 
+cdef extern from "<vector>" namespace "std" nogil:
     cdef cppclass vector[T,ALLOCATOR=*]:
         ctypedef T value_type
         ctypedef ALLOCATOR allocator_type
@@ -9,80 +9,80 @@ cdef extern from "<vector>" namespace "std" nogil:
         ctypedef size_t size_type
         ctypedef ptrdiff_t difference_type
 
-        cppclass iterator: 
-            T& operator*() 
-            iterator operator++() 
-            iterator operator--() 
+        cppclass iterator:
+            T& operator*()
+            iterator operator++()
+            iterator operator--()
             iterator operator+(size_type)
             iterator operator-(size_type)
             difference_type operator-(iterator)
-            bint operator==(iterator) 
-            bint operator!=(iterator) 
-            bint operator<(iterator) 
-            bint operator>(iterator) 
-            bint operator<=(iterator) 
-            bint operator>=(iterator) 
-        cppclass reverse_iterator: 
-            T& operator*() 
+            bint operator==(iterator)
+            bint operator!=(iterator)
+            bint operator<(iterator)
+            bint operator>(iterator)
+            bint operator<=(iterator)
+            bint operator>=(iterator)
+        cppclass reverse_iterator:
+            T& operator*()
             reverse_iterator operator++()
             reverse_iterator operator--()
             reverse_iterator operator+(size_type)
             reverse_iterator operator-(size_type)
             difference_type operator-(reverse_iterator)
-            bint operator==(reverse_iterator) 
-            bint operator!=(reverse_iterator) 
-            bint operator<(reverse_iterator) 
-            bint operator>(reverse_iterator) 
-            bint operator<=(reverse_iterator) 
-            bint operator>=(reverse_iterator) 
+            bint operator==(reverse_iterator)
+            bint operator!=(reverse_iterator)
+            bint operator<(reverse_iterator)
+            bint operator>(reverse_iterator)
+            bint operator<=(reverse_iterator)
+            bint operator>=(reverse_iterator)
         cppclass const_iterator(iterator):
             pass
         cppclass const_reverse_iterator(reverse_iterator):
             pass
-        vector() except + 
-        vector(vector&) except + 
+        vector() except +
+        vector(vector&) except +
         vector(size_type) except +
         vector(size_type, T&) except +
-        #vector[input_iterator](input_iterator, input_iterator) 
+        #vector[input_iterator](input_iterator, input_iterator)
         T& operator[](size_type)
-        #vector& operator=(vector&) 
-        bint operator==(vector&, vector&) 
-        bint operator!=(vector&, vector&) 
-        bint operator<(vector&, vector&) 
-        bint operator>(vector&, vector&) 
-        bint operator<=(vector&, vector&) 
-        bint operator>=(vector&, vector&) 
+        #vector& operator=(vector&)
+        bint operator==(vector&, vector&)
+        bint operator!=(vector&, vector&)
+        bint operator<(vector&, vector&)
+        bint operator>(vector&, vector&)
+        bint operator<=(vector&, vector&)
+        bint operator>=(vector&, vector&)
         void assign(size_type, const T&)
         void assign[input_iterator](input_iterator, input_iterator) except +
         T& at(size_type) except +
-        T& back() 
-        iterator begin() 
+        T& back()
+        iterator begin()
         const_iterator const_begin "begin"()
         size_type capacity()
-        void clear() 
-        bint empty() 
-        iterator end() 
+        void clear()
+        bint empty()
+        iterator end()
         const_iterator const_end "end"()
-        iterator erase(iterator) 
-        iterator erase(iterator, iterator) 
-        T& front() 
+        iterator erase(iterator)
+        iterator erase(iterator, iterator)
+        T& front()
         iterator insert(iterator, const T&) except +
         iterator insert(iterator, size_type, const T&) except +
         iterator insert[Iter](iterator, Iter, Iter) except +
         size_type max_size()
-        void pop_back() 
+        void pop_back()
         void push_back(T&) except +
-        reverse_iterator rbegin() 
+        reverse_iterator rbegin()
         const_reverse_iterator const_rbegin "crbegin"()
-        reverse_iterator rend() 
+        reverse_iterator rend()
         const_reverse_iterator const_rend "crend"()
         void reserve(size_type)
         void resize(size_type) except +
         void resize(size_type, T&) except +
         size_type size()
-        void swap(vector&) 
+        void swap(vector&)
 
         # C++11 methods
-        T* data() 
+        T* data()
         const T* const_data "data"()
-        void shrink_to_fit() 
+        void shrink_to_fit()
diff --git a/contrib/tools/cython/Cython/Includes/numpy.pxd b/contrib/tools/cython/Cython/Includes/numpy.pxd
index d8296d6643..789669dac1 100644
--- a/contrib/tools/cython/Cython/Includes/numpy.pxd
+++ b/contrib/tools/cython/Cython/Includes/numpy.pxd
@@ -1,97 +1,97 @@
-# NumPy static imports for Cython 
-# 
-# If any of the PyArray_* functions are called, import_array must be 
-# called first. 
-# 
+# NumPy static imports for Cython
+#
+# If any of the PyArray_* functions are called, import_array must be
+# called first.
+#
 # This also defines backwards-compatibility buffer acquisition
-# code for use in Python 2.x (or Python <= 2.5 when NumPy starts 
-# implementing PEP-3118 directly). 
-# 
-# Because of laziness, the format string of the buffer is statically 
-# allocated. Increase the size if this is not enough, or submit a 
-# patch to do this properly. 
-# 
-# Author: Dag Sverre Seljebotn 
-# 
- 
-DEF _buffer_format_string_len = 255 
- 
-cimport cpython.buffer as pybuf 
+# code for use in Python 2.x (or Python <= 2.5 when NumPy starts
+# implementing PEP-3118 directly).
+#
+# Because of laziness, the format string of the buffer is statically
+# allocated. Increase the size if this is not enough, or submit a
+# patch to do this properly.
+#
+# Author: Dag Sverre Seljebotn
+#
+
+DEF _buffer_format_string_len = 255
+
+cimport cpython.buffer as pybuf
 from cpython.ref cimport Py_INCREF
 from cpython.mem cimport PyObject_Malloc, PyObject_Free
 from cpython.object cimport PyObject, PyTypeObject
-from cpython.type cimport type 
-cimport libc.stdio as stdio 
- 
-cdef extern from "Python.h": 
-    ctypedef int Py_intptr_t 
- 
-cdef extern from "numpy/arrayobject.h": 
-    ctypedef Py_intptr_t npy_intp 
-    ctypedef size_t npy_uintp 
- 
-    cdef enum NPY_TYPES: 
-        NPY_BOOL 
-        NPY_BYTE 
-        NPY_UBYTE 
-        NPY_SHORT 
-        NPY_USHORT 
-        NPY_INT 
-        NPY_UINT 
-        NPY_LONG 
-        NPY_ULONG 
-        NPY_LONGLONG 
-        NPY_ULONGLONG 
-        NPY_FLOAT 
-        NPY_DOUBLE 
-        NPY_LONGDOUBLE 
-        NPY_CFLOAT 
-        NPY_CDOUBLE 
-        NPY_CLONGDOUBLE 
-        NPY_OBJECT 
-        NPY_STRING 
-        NPY_UNICODE 
-        NPY_VOID 
+from cpython.type cimport type
+cimport libc.stdio as stdio
+
+cdef extern from "Python.h":
+    ctypedef int Py_intptr_t
+
+cdef extern from "numpy/arrayobject.h":
+    ctypedef Py_intptr_t npy_intp
+    ctypedef size_t npy_uintp
+
+    cdef enum NPY_TYPES:
+        NPY_BOOL
+        NPY_BYTE
+        NPY_UBYTE
+        NPY_SHORT
+        NPY_USHORT
+        NPY_INT
+        NPY_UINT
+        NPY_LONG
+        NPY_ULONG
+        NPY_LONGLONG
+        NPY_ULONGLONG
+        NPY_FLOAT
+        NPY_DOUBLE
+        NPY_LONGDOUBLE
+        NPY_CFLOAT
+        NPY_CDOUBLE
+        NPY_CLONGDOUBLE
+        NPY_OBJECT
+        NPY_STRING
+        NPY_UNICODE
+        NPY_VOID
         NPY_DATETIME
         NPY_TIMEDELTA
-        NPY_NTYPES 
-        NPY_NOTYPE 
- 
-        NPY_INT8 
-        NPY_INT16 
-        NPY_INT32 
-        NPY_INT64 
-        NPY_INT128 
-        NPY_INT256 
-        NPY_UINT8 
-        NPY_UINT16 
-        NPY_UINT32 
-        NPY_UINT64 
-        NPY_UINT128 
-        NPY_UINT256 
-        NPY_FLOAT16 
-        NPY_FLOAT32 
-        NPY_FLOAT64 
-        NPY_FLOAT80 
-        NPY_FLOAT96 
-        NPY_FLOAT128 
-        NPY_FLOAT256 
-        NPY_COMPLEX32 
-        NPY_COMPLEX64 
-        NPY_COMPLEX128 
-        NPY_COMPLEX160 
-        NPY_COMPLEX192 
-        NPY_COMPLEX256 
-        NPY_COMPLEX512 
- 
-        NPY_INTP 
- 
-    ctypedef enum NPY_ORDER: 
-        NPY_ANYORDER 
-        NPY_CORDER 
-        NPY_FORTRANORDER 
+        NPY_NTYPES
+        NPY_NOTYPE
+
+        NPY_INT8
+        NPY_INT16
+        NPY_INT32
+        NPY_INT64
+        NPY_INT128
+        NPY_INT256
+        NPY_UINT8
+        NPY_UINT16
+        NPY_UINT32
+        NPY_UINT64
+        NPY_UINT128
+        NPY_UINT256
+        NPY_FLOAT16
+        NPY_FLOAT32
+        NPY_FLOAT64
+        NPY_FLOAT80
+        NPY_FLOAT96
+        NPY_FLOAT128
+        NPY_FLOAT256
+        NPY_COMPLEX32
+        NPY_COMPLEX64
+        NPY_COMPLEX128
+        NPY_COMPLEX160
+        NPY_COMPLEX192
+        NPY_COMPLEX256
+        NPY_COMPLEX512
+
+        NPY_INTP
+
+    ctypedef enum NPY_ORDER:
+        NPY_ANYORDER
+        NPY_CORDER
+        NPY_FORTRANORDER
         NPY_KEEPORDER
- 
+
     ctypedef enum NPY_CASTING:
         NPY_NO_CASTING
         NPY_EQUIV_CASTING
@@ -99,63 +99,63 @@ cdef extern from "numpy/arrayobject.h":
         NPY_SAME_KIND_CASTING
         NPY_UNSAFE_CASTING
 
-    ctypedef enum NPY_CLIPMODE: 
-        NPY_CLIP 
-        NPY_WRAP 
-        NPY_RAISE 
- 
-    ctypedef enum NPY_SCALARKIND: 
-        NPY_NOSCALAR, 
-        NPY_BOOL_SCALAR, 
-        NPY_INTPOS_SCALAR, 
-        NPY_INTNEG_SCALAR, 
-        NPY_FLOAT_SCALAR, 
-        NPY_COMPLEX_SCALAR, 
-        NPY_OBJECT_SCALAR 
- 
-    ctypedef enum NPY_SORTKIND: 
-        NPY_QUICKSORT 
-        NPY_HEAPSORT 
-        NPY_MERGESORT 
- 
-    ctypedef enum NPY_SEARCHSIDE: 
-        NPY_SEARCHLEFT 
-        NPY_SEARCHRIGHT 
- 
-    enum: 
+    ctypedef enum NPY_CLIPMODE:
+        NPY_CLIP
+        NPY_WRAP
+        NPY_RAISE
+
+    ctypedef enum NPY_SCALARKIND:
+        NPY_NOSCALAR,
+        NPY_BOOL_SCALAR,
+        NPY_INTPOS_SCALAR,
+        NPY_INTNEG_SCALAR,
+        NPY_FLOAT_SCALAR,
+        NPY_COMPLEX_SCALAR,
+        NPY_OBJECT_SCALAR
+
+    ctypedef enum NPY_SORTKIND:
+        NPY_QUICKSORT
+        NPY_HEAPSORT
+        NPY_MERGESORT
+
+    ctypedef enum NPY_SEARCHSIDE:
+        NPY_SEARCHLEFT
+        NPY_SEARCHRIGHT
+
+    enum:
         # DEPRECATED since NumPy 1.7 ! Do not use in new code!
-        NPY_C_CONTIGUOUS 
-        NPY_F_CONTIGUOUS 
-        NPY_CONTIGUOUS 
-        NPY_FORTRAN 
-        NPY_OWNDATA 
-        NPY_FORCECAST 
-        NPY_ENSURECOPY 
-        NPY_ENSUREARRAY 
-        NPY_ELEMENTSTRIDES 
-        NPY_ALIGNED 
-        NPY_NOTSWAPPED 
-        NPY_WRITEABLE 
-        NPY_UPDATEIFCOPY 
-        NPY_ARR_HAS_DESCR 
- 
-        NPY_BEHAVED 
-        NPY_BEHAVED_NS 
-        NPY_CARRAY 
-        NPY_CARRAY_RO 
-        NPY_FARRAY 
-        NPY_FARRAY_RO 
-        NPY_DEFAULT 
- 
-        NPY_IN_ARRAY 
-        NPY_OUT_ARRAY 
-        NPY_INOUT_ARRAY 
-        NPY_IN_FARRAY 
-        NPY_OUT_FARRAY 
-        NPY_INOUT_FARRAY 
- 
-        NPY_UPDATE_ALL 
- 
+        NPY_C_CONTIGUOUS
+        NPY_F_CONTIGUOUS
+        NPY_CONTIGUOUS
+        NPY_FORTRAN
+        NPY_OWNDATA
+        NPY_FORCECAST
+        NPY_ENSURECOPY
+        NPY_ENSUREARRAY
+        NPY_ELEMENTSTRIDES
+        NPY_ALIGNED
+        NPY_NOTSWAPPED
+        NPY_WRITEABLE
+        NPY_UPDATEIFCOPY
+        NPY_ARR_HAS_DESCR
+
+        NPY_BEHAVED
+        NPY_BEHAVED_NS
+        NPY_CARRAY
+        NPY_CARRAY_RO
+        NPY_FARRAY
+        NPY_FARRAY_RO
+        NPY_DEFAULT
+
+        NPY_IN_ARRAY
+        NPY_OUT_ARRAY
+        NPY_INOUT_ARRAY
+        NPY_IN_FARRAY
+        NPY_OUT_FARRAY
+        NPY_INOUT_FARRAY
+
+        NPY_UPDATE_ALL
+
     enum:
         # Added in NumPy 1.7 to replace the deprecated enums above.
         NPY_ARRAY_C_CONTIGUOUS
@@ -187,13 +187,13 @@ cdef extern from "numpy/arrayobject.h":
 
         NPY_ARRAY_UPDATE_ALL
 
-    cdef enum: 
-        NPY_MAXDIMS 
- 
-    npy_intp NPY_MAX_ELSIZE 
- 
-    ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,  void *) 
- 
+    cdef enum:
+        NPY_MAXDIMS
+
+    npy_intp NPY_MAX_ELSIZE
+
+    ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,  void *)
+
     ctypedef struct PyArray_ArrayDescr:
         # shape is a tuple, but Cython doesn't support "tuple shape"
         # inside a non-PyObject declaration, so we have to declare it
@@ -204,7 +204,7 @@ cdef extern from "numpy/arrayobject.h":
         pass
 
     ctypedef class numpy.dtype [object PyArray_Descr, check_size ignore]:
-        # Use PyDataType_* macros when possible, however there are no macros 
+        # Use PyDataType_* macros when possible, however there are no macros
         # for accessing some of the fields, so some are defined.
         cdef PyTypeObject* typeobj
         cdef char kind
@@ -216,817 +216,817 @@ cdef extern from "numpy/arrayobject.h":
         # directly accessing this field.
         cdef char byteorder
         cdef char flags
-        cdef int type_num 
-        cdef int itemsize "elsize" 
+        cdef int type_num
+        cdef int itemsize "elsize"
         cdef int alignment
         cdef dict fields
-        cdef tuple names 
+        cdef tuple names
         # Use PyDataType_HASSUBARRAY to test whether this field is
         # valid (the pointer can be NULL). Most users should access
         # this field via the inline helper method PyDataType_SHAPE.
         cdef PyArray_ArrayDescr* subarray
- 
-    ctypedef extern class numpy.flatiter [object PyArrayIterObject]: 
-        # Use through macros 
-        pass 
- 
-    ctypedef extern class numpy.broadcast [object PyArrayMultiIterObject]: 
-        # Use through macros 
-        pass 
- 
-    ctypedef struct PyArrayObject: 
-        # For use in situations where ndarray can't replace PyArrayObject*, 
-        # like PyArrayObject**. 
-        pass 
- 
+
+    ctypedef extern class numpy.flatiter [object PyArrayIterObject]:
+        # Use through macros
+        pass
+
+    ctypedef extern class numpy.broadcast [object PyArrayMultiIterObject]:
+        # Use through macros
+        pass
+
+    ctypedef struct PyArrayObject:
+        # For use in situations where ndarray can't replace PyArrayObject*,
+        # like PyArrayObject**.
+        pass
+
     ctypedef class numpy.ndarray [object PyArrayObject, check_size ignore]:
-        cdef __cythonbufferdefaults__ = {"mode": "strided"} 
- 
-        cdef: 
-            # Only taking a few of the most commonly used and stable fields. 
-            # One should use PyArray_* macros instead to access the C fields. 
-            char *data 
-            int ndim "nd" 
-            npy_intp *shape "dimensions" 
-            npy_intp *strides 
+        cdef __cythonbufferdefaults__ = {"mode": "strided"}
+
+        cdef:
+            # Only taking a few of the most commonly used and stable fields.
+            # One should use PyArray_* macros instead to access the C fields.
+            char *data
+            int ndim "nd"
+            npy_intp *shape "dimensions"
+            npy_intp *strides
             dtype descr  # deprecated since NumPy 1.7 !
-            PyObject* base 
- 
-        # Note: This syntax (function definition in pxd files) is an 
-        # experimental exception made for __getbuffer__ and __releasebuffer__ 
-        # -- the details of this may change. 
-        def __getbuffer__(ndarray self, Py_buffer* info, int flags): 
-            # This implementation of getbuffer is geared towards Cython 
+            PyObject* base
+
+        # Note: This syntax (function definition in pxd files) is an
+        # experimental exception made for __getbuffer__ and __releasebuffer__
+        # -- the details of this may change.
+        def __getbuffer__(ndarray self, Py_buffer* info, int flags):
+            # This implementation of getbuffer is geared towards Cython
             # requirements, and does not yet fulfill the PEP.
-            # In particular strided access is always provided regardless 
-            # of flags 
- 
+            # In particular strided access is always provided regardless
+            # of flags
+
             cdef int i, ndim
-            cdef int endian_detector = 1 
-            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0) 
- 
-            ndim = PyArray_NDIM(self) 
- 
-            if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) 
+            cdef int endian_detector = 1
+            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+
+            ndim = PyArray_NDIM(self)
+
+            if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
                 and not PyArray_CHKFLAGS(self, NPY_ARRAY_C_CONTIGUOUS)):
-                raise ValueError(u"ndarray is not C contiguous") 
- 
-            if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) 
+                raise ValueError(u"ndarray is not C contiguous")
+
+            if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
                 and not PyArray_CHKFLAGS(self, NPY_ARRAY_F_CONTIGUOUS)):
-                raise ValueError(u"ndarray is not Fortran contiguous") 
- 
-            info.buf = PyArray_DATA(self) 
-            info.ndim = ndim 
+                raise ValueError(u"ndarray is not Fortran contiguous")
+
+            info.buf = PyArray_DATA(self)
+            info.ndim = ndim
             if sizeof(npy_intp) != sizeof(Py_ssize_t):
-                # Allocate new buffer for strides and shape info. 
-                # This is allocated as one block, strides first. 
+                # Allocate new buffer for strides and shape info.
+                # This is allocated as one block, strides first.
                 info.strides = <Py_ssize_t*>PyObject_Malloc(sizeof(Py_ssize_t) * 2 * <size_t>ndim)
-                info.shape = info.strides + ndim 
-                for i in range(ndim): 
-                    info.strides[i] = PyArray_STRIDES(self)[i] 
-                    info.shape[i] = PyArray_DIMS(self)[i] 
-            else: 
-                info.strides = <Py_ssize_t*>PyArray_STRIDES(self) 
-                info.shape = <Py_ssize_t*>PyArray_DIMS(self) 
-            info.suboffsets = NULL 
-            info.itemsize = PyArray_ITEMSIZE(self) 
-            info.readonly = not PyArray_ISWRITEABLE(self) 
- 
-            cdef int t 
-            cdef char* f = NULL 
+                info.shape = info.strides + ndim
+                for i in range(ndim):
+                    info.strides[i] = PyArray_STRIDES(self)[i]
+                    info.shape[i] = PyArray_DIMS(self)[i]
+            else:
+                info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+                info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+            info.suboffsets = NULL
+            info.itemsize = PyArray_ITEMSIZE(self)
+            info.readonly = not PyArray_ISWRITEABLE(self)
+
+            cdef int t
+            cdef char* f = NULL
             cdef dtype descr = <dtype>PyArray_DESCR(self)
-            cdef int offset 
- 
+            cdef int offset
+
             info.obj = self
- 
+
             if not PyDataType_HASFIELDS(descr):
-                t = descr.type_num 
-                if ((descr.byteorder == c'>' and little_endian) or 
-                    (descr.byteorder == c'<' and not little_endian)): 
-                    raise ValueError(u"Non-native byte order not supported") 
-                if   t == NPY_BYTE:        f = "b" 
-                elif t == NPY_UBYTE:       f = "B" 
-                elif t == NPY_SHORT:       f = "h" 
-                elif t == NPY_USHORT:      f = "H" 
-                elif t == NPY_INT:         f = "i" 
-                elif t == NPY_UINT:        f = "I" 
-                elif t == NPY_LONG:        f = "l" 
-                elif t == NPY_ULONG:       f = "L" 
-                elif t == NPY_LONGLONG:    f = "q" 
-                elif t == NPY_ULONGLONG:   f = "Q" 
-                elif t == NPY_FLOAT:       f = "f" 
-                elif t == NPY_DOUBLE:      f = "d" 
-                elif t == NPY_LONGDOUBLE:  f = "g" 
-                elif t == NPY_CFLOAT:      f = "Zf" 
-                elif t == NPY_CDOUBLE:     f = "Zd" 
-                elif t == NPY_CLONGDOUBLE: f = "Zg" 
-                elif t == NPY_OBJECT:      f = "O" 
-                else: 
-                    raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) 
-                info.format = f 
-                return 
-            else: 
+                t = descr.type_num
+                if ((descr.byteorder == c'>' and little_endian) or
+                    (descr.byteorder == c'<' and not little_endian)):
+                    raise ValueError(u"Non-native byte order not supported")
+                if   t == NPY_BYTE:        f = "b"
+                elif t == NPY_UBYTE:       f = "B"
+                elif t == NPY_SHORT:       f = "h"
+                elif t == NPY_USHORT:      f = "H"
+                elif t == NPY_INT:         f = "i"
+                elif t == NPY_UINT:        f = "I"
+                elif t == NPY_LONG:        f = "l"
+                elif t == NPY_ULONG:       f = "L"
+                elif t == NPY_LONGLONG:    f = "q"
+                elif t == NPY_ULONGLONG:   f = "Q"
+                elif t == NPY_FLOAT:       f = "f"
+                elif t == NPY_DOUBLE:      f = "d"
+                elif t == NPY_LONGDOUBLE:  f = "g"
+                elif t == NPY_CFLOAT:      f = "Zf"
+                elif t == NPY_CDOUBLE:     f = "Zd"
+                elif t == NPY_CLONGDOUBLE: f = "Zg"
+                elif t == NPY_OBJECT:      f = "O"
+                else:
+                    raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+                info.format = f
+                return
+            else:
                 info.format = <char*>PyObject_Malloc(_buffer_format_string_len)
-                info.format[0] = c'^' # Native data types, manual alignment 
-                offset = 0 
-                f = _util_dtypestring(descr, info.format + 1, 
-                                      info.format + _buffer_format_string_len, 
-                                      &offset) 
-                f[0] = c'\0' # Terminate format string 
- 
-        def __releasebuffer__(ndarray self, Py_buffer* info): 
-            if PyArray_HASFIELDS(self): 
+                info.format[0] = c'^' # Native data types, manual alignment
+                offset = 0
+                f = _util_dtypestring(descr, info.format + 1,
+                                      info.format + _buffer_format_string_len,
+                                      &offset)
+                f[0] = c'\0' # Terminate format string
+
+        def __releasebuffer__(ndarray self, Py_buffer* info):
+            if PyArray_HASFIELDS(self):
                 PyObject_Free(info.format)
-            if sizeof(npy_intp) != sizeof(Py_ssize_t): 
+            if sizeof(npy_intp) != sizeof(Py_ssize_t):
                 PyObject_Free(info.strides)
-                # info.shape was stored after info.strides in the same block 
- 
- 
+                # info.shape was stored after info.strides in the same block
+
+
     ctypedef unsigned char      npy_bool
- 
-    ctypedef signed char      npy_byte 
-    ctypedef signed short     npy_short 
-    ctypedef signed int       npy_int 
-    ctypedef signed long      npy_long 
-    ctypedef signed long long npy_longlong 
- 
-    ctypedef unsigned char      npy_ubyte 
-    ctypedef unsigned short     npy_ushort 
-    ctypedef unsigned int       npy_uint 
-    ctypedef unsigned long      npy_ulong 
-    ctypedef unsigned long long npy_ulonglong 
- 
-    ctypedef float        npy_float 
-    ctypedef double       npy_double 
-    ctypedef long double  npy_longdouble 
- 
-    ctypedef signed char        npy_int8 
-    ctypedef signed short       npy_int16 
-    ctypedef signed int         npy_int32 
-    ctypedef signed long long   npy_int64 
-    ctypedef signed long long   npy_int96 
-    ctypedef signed long long   npy_int128 
- 
-    ctypedef unsigned char      npy_uint8 
-    ctypedef unsigned short     npy_uint16 
-    ctypedef unsigned int       npy_uint32 
-    ctypedef unsigned long long npy_uint64 
-    ctypedef unsigned long long npy_uint96 
-    ctypedef unsigned long long npy_uint128 
- 
-    ctypedef float        npy_float32 
-    ctypedef double       npy_float64 
-    ctypedef long double  npy_float80 
-    ctypedef long double  npy_float96 
-    ctypedef long double  npy_float128 
- 
-    ctypedef struct npy_cfloat: 
-        double real 
-        double imag 
- 
-    ctypedef struct npy_cdouble: 
-        double real 
-        double imag 
- 
-    ctypedef struct npy_clongdouble: 
+
+    ctypedef signed char      npy_byte
+    ctypedef signed short     npy_short
+    ctypedef signed int       npy_int
+    ctypedef signed long      npy_long
+    ctypedef signed long long npy_longlong
+
+    ctypedef unsigned char      npy_ubyte
+    ctypedef unsigned short     npy_ushort
+    ctypedef unsigned int       npy_uint
+    ctypedef unsigned long      npy_ulong
+    ctypedef unsigned long long npy_ulonglong
+
+    ctypedef float        npy_float
+    ctypedef double       npy_double
+    ctypedef long double  npy_longdouble
+
+    ctypedef signed char        npy_int8
+    ctypedef signed short       npy_int16
+    ctypedef signed int         npy_int32
+    ctypedef signed long long   npy_int64
+    ctypedef signed long long   npy_int96
+    ctypedef signed long long   npy_int128
+
+    ctypedef unsigned char      npy_uint8
+    ctypedef unsigned short     npy_uint16
+    ctypedef unsigned int       npy_uint32
+    ctypedef unsigned long long npy_uint64
+    ctypedef unsigned long long npy_uint96
+    ctypedef unsigned long long npy_uint128
+
+    ctypedef float        npy_float32
+    ctypedef double       npy_float64
+    ctypedef long double  npy_float80
+    ctypedef long double  npy_float96
+    ctypedef long double  npy_float128
+
+    ctypedef struct npy_cfloat:
+        double real
+        double imag
+
+    ctypedef struct npy_cdouble:
+        double real
+        double imag
+
+    ctypedef struct npy_clongdouble:
         long double real
         long double imag
- 
-    ctypedef struct npy_complex64: 
+
+    ctypedef struct npy_complex64:
         float real
         float imag
- 
-    ctypedef struct npy_complex128: 
-        double real 
-        double imag 
- 
-    ctypedef struct npy_complex160: 
+
+    ctypedef struct npy_complex128:
+        double real
+        double imag
+
+    ctypedef struct npy_complex160:
         long double real
         long double imag
- 
-    ctypedef struct npy_complex192: 
+
+    ctypedef struct npy_complex192:
         long double real
         long double imag
- 
-    ctypedef struct npy_complex256: 
+
+    ctypedef struct npy_complex256:
         long double real
         long double imag
- 
-    ctypedef struct PyArray_Dims: 
-        npy_intp *ptr 
-        int len 
- 
+
+    ctypedef struct PyArray_Dims:
+        npy_intp *ptr
+        int len
+
     int _import_array() except -1
- 
-    # 
-    # Macros from ndarrayobject.h 
-    # 
-    bint PyArray_CHKFLAGS(ndarray m, int flags) 
+
+    #
+    # Macros from ndarrayobject.h
+    #
+    bint PyArray_CHKFLAGS(ndarray m, int flags)
     bint PyArray_IS_C_CONTIGUOUS(ndarray arr)
     bint PyArray_IS_F_CONTIGUOUS(ndarray arr)
-    bint PyArray_ISCONTIGUOUS(ndarray m) 
-    bint PyArray_ISWRITEABLE(ndarray m) 
-    bint PyArray_ISALIGNED(ndarray m) 
- 
-    int PyArray_NDIM(ndarray) 
-    bint PyArray_ISONESEGMENT(ndarray) 
-    bint PyArray_ISFORTRAN(ndarray) 
-    int PyArray_FORTRANIF(ndarray) 
- 
-    void* PyArray_DATA(ndarray) 
-    char* PyArray_BYTES(ndarray) 
-    npy_intp* PyArray_DIMS(ndarray) 
-    npy_intp* PyArray_STRIDES(ndarray) 
-    npy_intp PyArray_DIM(ndarray, size_t) 
-    npy_intp PyArray_STRIDE(ndarray, size_t) 
- 
+    bint PyArray_ISCONTIGUOUS(ndarray m)
+    bint PyArray_ISWRITEABLE(ndarray m)
+    bint PyArray_ISALIGNED(ndarray m)
+
+    int PyArray_NDIM(ndarray)
+    bint PyArray_ISONESEGMENT(ndarray)
+    bint PyArray_ISFORTRAN(ndarray)
+    int PyArray_FORTRANIF(ndarray)
+
+    void* PyArray_DATA(ndarray)
+    char* PyArray_BYTES(ndarray)
+    npy_intp* PyArray_DIMS(ndarray)
+    npy_intp* PyArray_STRIDES(ndarray)
+    npy_intp PyArray_DIM(ndarray, size_t)
+    npy_intp PyArray_STRIDE(ndarray, size_t)
+
     PyObject *PyArray_BASE(ndarray)  # returns borrowed reference!
     PyArray_Descr *PyArray_DESCR(ndarray) # returns borrowed reference to dtype!
-    int PyArray_FLAGS(ndarray) 
-    npy_intp PyArray_ITEMSIZE(ndarray) 
-    int PyArray_TYPE(ndarray arr) 
- 
-    object PyArray_GETITEM(ndarray arr, void *itemptr) 
-    int PyArray_SETITEM(ndarray arr, void *itemptr, object obj) 
- 
-    bint PyTypeNum_ISBOOL(int) 
-    bint PyTypeNum_ISUNSIGNED(int) 
-    bint PyTypeNum_ISSIGNED(int) 
-    bint PyTypeNum_ISINTEGER(int) 
-    bint PyTypeNum_ISFLOAT(int) 
-    bint PyTypeNum_ISNUMBER(int) 
-    bint PyTypeNum_ISSTRING(int) 
-    bint PyTypeNum_ISCOMPLEX(int) 
-    bint PyTypeNum_ISPYTHON(int) 
-    bint PyTypeNum_ISFLEXIBLE(int) 
-    bint PyTypeNum_ISUSERDEF(int) 
-    bint PyTypeNum_ISEXTENDED(int) 
-    bint PyTypeNum_ISOBJECT(int) 
- 
-    bint PyDataType_ISBOOL(dtype) 
-    bint PyDataType_ISUNSIGNED(dtype) 
-    bint PyDataType_ISSIGNED(dtype) 
-    bint PyDataType_ISINTEGER(dtype) 
-    bint PyDataType_ISFLOAT(dtype) 
-    bint PyDataType_ISNUMBER(dtype) 
-    bint PyDataType_ISSTRING(dtype) 
-    bint PyDataType_ISCOMPLEX(dtype) 
-    bint PyDataType_ISPYTHON(dtype) 
-    bint PyDataType_ISFLEXIBLE(dtype) 
-    bint PyDataType_ISUSERDEF(dtype) 
-    bint PyDataType_ISEXTENDED(dtype) 
-    bint PyDataType_ISOBJECT(dtype) 
-    bint PyDataType_HASFIELDS(dtype) 
+    int PyArray_FLAGS(ndarray)
+    npy_intp PyArray_ITEMSIZE(ndarray)
+    int PyArray_TYPE(ndarray arr)
+
+    object PyArray_GETITEM(ndarray arr, void *itemptr)
+    int PyArray_SETITEM(ndarray arr, void *itemptr, object obj)
+
+    bint PyTypeNum_ISBOOL(int)
+    bint PyTypeNum_ISUNSIGNED(int)
+    bint PyTypeNum_ISSIGNED(int)
+    bint PyTypeNum_ISINTEGER(int)
+    bint PyTypeNum_ISFLOAT(int)
+    bint PyTypeNum_ISNUMBER(int)
+    bint PyTypeNum_ISSTRING(int)
+    bint PyTypeNum_ISCOMPLEX(int)
+    bint PyTypeNum_ISPYTHON(int)
+    bint PyTypeNum_ISFLEXIBLE(int)
+    bint PyTypeNum_ISUSERDEF(int)
+    bint PyTypeNum_ISEXTENDED(int)
+    bint PyTypeNum_ISOBJECT(int)
+
+    bint PyDataType_ISBOOL(dtype)
+    bint PyDataType_ISUNSIGNED(dtype)
+    bint PyDataType_ISSIGNED(dtype)
+    bint PyDataType_ISINTEGER(dtype)
+    bint PyDataType_ISFLOAT(dtype)
+    bint PyDataType_ISNUMBER(dtype)
+    bint PyDataType_ISSTRING(dtype)
+    bint PyDataType_ISCOMPLEX(dtype)
+    bint PyDataType_ISPYTHON(dtype)
+    bint PyDataType_ISFLEXIBLE(dtype)
+    bint PyDataType_ISUSERDEF(dtype)
+    bint PyDataType_ISEXTENDED(dtype)
+    bint PyDataType_ISOBJECT(dtype)
+    bint PyDataType_HASFIELDS(dtype)
     bint PyDataType_HASSUBARRAY(dtype)
- 
-    bint PyArray_ISBOOL(ndarray) 
-    bint PyArray_ISUNSIGNED(ndarray) 
-    bint PyArray_ISSIGNED(ndarray) 
-    bint PyArray_ISINTEGER(ndarray) 
-    bint PyArray_ISFLOAT(ndarray) 
-    bint PyArray_ISNUMBER(ndarray) 
-    bint PyArray_ISSTRING(ndarray) 
-    bint PyArray_ISCOMPLEX(ndarray) 
-    bint PyArray_ISPYTHON(ndarray) 
-    bint PyArray_ISFLEXIBLE(ndarray) 
-    bint PyArray_ISUSERDEF(ndarray) 
-    bint PyArray_ISEXTENDED(ndarray) 
-    bint PyArray_ISOBJECT(ndarray) 
-    bint PyArray_HASFIELDS(ndarray) 
- 
-    bint PyArray_ISVARIABLE(ndarray) 
- 
-    bint PyArray_SAFEALIGNEDCOPY(ndarray) 
+
+    bint PyArray_ISBOOL(ndarray)
+    bint PyArray_ISUNSIGNED(ndarray)
+    bint PyArray_ISSIGNED(ndarray)
+    bint PyArray_ISINTEGER(ndarray)
+    bint PyArray_ISFLOAT(ndarray)
+    bint PyArray_ISNUMBER(ndarray)
+    bint PyArray_ISSTRING(ndarray)
+    bint PyArray_ISCOMPLEX(ndarray)
+    bint PyArray_ISPYTHON(ndarray)
+    bint PyArray_ISFLEXIBLE(ndarray)
+    bint PyArray_ISUSERDEF(ndarray)
+    bint PyArray_ISEXTENDED(ndarray)
+    bint PyArray_ISOBJECT(ndarray)
+    bint PyArray_HASFIELDS(ndarray)
+
+    bint PyArray_ISVARIABLE(ndarray)
+
+    bint PyArray_SAFEALIGNEDCOPY(ndarray)
     bint PyArray_ISNBO(char)              # works on ndarray.byteorder
     bint PyArray_IsNativeByteOrder(char)  # works on ndarray.byteorder
-    bint PyArray_ISNOTSWAPPED(ndarray) 
-    bint PyArray_ISBYTESWAPPED(ndarray) 
- 
-    bint PyArray_FLAGSWAP(ndarray, int) 
- 
-    bint PyArray_ISCARRAY(ndarray) 
-    bint PyArray_ISCARRAY_RO(ndarray) 
-    bint PyArray_ISFARRAY(ndarray) 
-    bint PyArray_ISFARRAY_RO(ndarray) 
-    bint PyArray_ISBEHAVED(ndarray) 
-    bint PyArray_ISBEHAVED_RO(ndarray) 
- 
- 
-    bint PyDataType_ISNOTSWAPPED(dtype) 
-    bint PyDataType_ISBYTESWAPPED(dtype) 
- 
-    bint PyArray_DescrCheck(object) 
- 
-    bint PyArray_Check(object) 
-    bint PyArray_CheckExact(object) 
- 
-    # Cannot be supported due to out arg: 
-    # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&) 
-    # bint PyArray_HasArrayInterface(op, out) 
- 
- 
-    bint PyArray_IsZeroDim(object) 
-    # Cannot be supported due to ## ## in macro: 
-    # bint PyArray_IsScalar(object, verbatim work) 
-    bint PyArray_CheckScalar(object) 
-    bint PyArray_IsPythonNumber(object) 
-    bint PyArray_IsPythonScalar(object) 
-    bint PyArray_IsAnyScalar(object) 
-    bint PyArray_CheckAnyScalar(object) 
-    ndarray PyArray_GETCONTIGUOUS(ndarray) 
-    bint PyArray_SAMESHAPE(ndarray, ndarray) 
-    npy_intp PyArray_SIZE(ndarray) 
-    npy_intp PyArray_NBYTES(ndarray) 
- 
-    object PyArray_FROM_O(object) 
-    object PyArray_FROM_OF(object m, int flags) 
+    bint PyArray_ISNOTSWAPPED(ndarray)
+    bint PyArray_ISBYTESWAPPED(ndarray)
+
+    bint PyArray_FLAGSWAP(ndarray, int)
+
+    bint PyArray_ISCARRAY(ndarray)
+    bint PyArray_ISCARRAY_RO(ndarray)
+    bint PyArray_ISFARRAY(ndarray)
+    bint PyArray_ISFARRAY_RO(ndarray)
+    bint PyArray_ISBEHAVED(ndarray)
+    bint PyArray_ISBEHAVED_RO(ndarray)
+
+
+    bint PyDataType_ISNOTSWAPPED(dtype)
+    bint PyDataType_ISBYTESWAPPED(dtype)
+
+    bint PyArray_DescrCheck(object)
+
+    bint PyArray_Check(object)
+    bint PyArray_CheckExact(object)
+
+    # Cannot be supported due to out arg:
+    # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&)
+    # bint PyArray_HasArrayInterface(op, out)
+
+
+    bint PyArray_IsZeroDim(object)
+    # Cannot be supported due to ## ## in macro:
+    # bint PyArray_IsScalar(object, verbatim work)
+    bint PyArray_CheckScalar(object)
+    bint PyArray_IsPythonNumber(object)
+    bint PyArray_IsPythonScalar(object)
+    bint PyArray_IsAnyScalar(object)
+    bint PyArray_CheckAnyScalar(object)
+    ndarray PyArray_GETCONTIGUOUS(ndarray)
+    bint PyArray_SAMESHAPE(ndarray, ndarray)
+    npy_intp PyArray_SIZE(ndarray)
+    npy_intp PyArray_NBYTES(ndarray)
+
+    object PyArray_FROM_O(object)
+    object PyArray_FROM_OF(object m, int flags)
     object PyArray_FROM_OT(object m, int type)
     object PyArray_FROM_OTF(object m, int type, int flags)
-    object PyArray_FROMANY(object m, int type, int min, int max, int flags) 
-    object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran) 
-    object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran) 
-    void PyArray_FILLWBYTE(object, int val) 
-    npy_intp PyArray_REFCOUNT(object) 
-    object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth) 
-    unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2) 
-    bint PyArray_EquivByteorders(int b1, int b2) 
-    object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) 
-    object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data) 
-    #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr) 
-    object PyArray_ToScalar(void* data, ndarray arr) 
- 
-    void* PyArray_GETPTR1(ndarray m, npy_intp i) 
-    void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) 
-    void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k) 
-    void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l) 
- 
-    void PyArray_XDECREF_ERR(ndarray) 
-    # Cannot be supported due to out arg 
-    # void PyArray_DESCR_REPLACE(descr) 
- 
- 
-    object PyArray_Copy(ndarray) 
-    object PyArray_FromObject(object op, int type, int min_depth, int max_depth) 
-    object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth) 
-    object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth) 
- 
-    object PyArray_Cast(ndarray mp, int type_num) 
-    object PyArray_Take(ndarray ap, object items, int axis) 
-    object PyArray_Put(ndarray ap, object items, object values) 
- 
-    void PyArray_ITER_RESET(flatiter it) nogil 
-    void PyArray_ITER_NEXT(flatiter it) nogil 
-    void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil 
-    void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil 
-    void* PyArray_ITER_DATA(flatiter it) nogil 
-    bint PyArray_ITER_NOTDONE(flatiter it) nogil 
- 
-    void PyArray_MultiIter_RESET(broadcast multi) nogil 
-    void PyArray_MultiIter_NEXT(broadcast multi) nogil 
-    void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil 
-    void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil 
-    void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil 
-    void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil 
-    bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil 
- 
-    # Functions from __multiarray_api.h 
- 
-    # Functions taking dtype and returning object/ndarray are disabled 
-    # for now as they steal dtype references. I'm conservative and disable 
-    # more than is probably needed until it can be checked further. 
-    int PyArray_SetNumericOps        (object) 
-    object PyArray_GetNumericOps () 
-    int PyArray_INCREF (ndarray) 
-    int PyArray_XDECREF (ndarray) 
-    void PyArray_SetStringFunction (object, int) 
-    dtype PyArray_DescrFromType (int) 
-    object PyArray_TypeObjectFromType (int) 
-    char * PyArray_Zero (ndarray) 
-    char * PyArray_One (ndarray) 
-    #object PyArray_CastToType (ndarray, dtype, int) 
-    int PyArray_CastTo (ndarray, ndarray) 
-    int PyArray_CastAnyTo (ndarray, ndarray) 
-    int PyArray_CanCastSafely (int, int) 
-    npy_bool PyArray_CanCastTo (dtype, dtype) 
-    int PyArray_ObjectType (object, int) 
-    dtype PyArray_DescrFromObject (object, dtype) 
-    #ndarray* PyArray_ConvertToCommonType (object, int *) 
-    dtype PyArray_DescrFromScalar (object) 
-    dtype PyArray_DescrFromTypeObject (object) 
-    npy_intp PyArray_Size (object) 
-    #object PyArray_Scalar (void *, dtype, object) 
-    #object PyArray_FromScalar (object, dtype) 
-    void PyArray_ScalarAsCtype (object, void *) 
-    #int PyArray_CastScalarToCtype (object, void *, dtype) 
-    #int PyArray_CastScalarDirect (object, dtype, void *, int) 
-    object PyArray_ScalarFromObject (object) 
-    #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int) 
-    object PyArray_FromDims (int, int *, int) 
-    #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *) 
-    #object PyArray_FromAny (object, dtype, int, int, int, object) 
-    object PyArray_EnsureArray (object) 
-    object PyArray_EnsureAnyArray (object) 
-    #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *) 
-    #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *) 
-    #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp) 
-    #object PyArray_FromIter (object, dtype, npy_intp) 
-    object PyArray_Return (ndarray) 
-    #object PyArray_GetField (ndarray, dtype, int) 
-    #int PyArray_SetField (ndarray, dtype, int, object) 
-    object PyArray_Byteswap (ndarray, npy_bool) 
-    object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER) 
-    int PyArray_MoveInto (ndarray, ndarray) 
-    int PyArray_CopyInto (ndarray, ndarray) 
-    int PyArray_CopyAnyInto (ndarray, ndarray) 
-    int PyArray_CopyObject (ndarray, object) 
-    object PyArray_NewCopy (ndarray, NPY_ORDER) 
-    object PyArray_ToList (ndarray) 
-    object PyArray_ToString (ndarray, NPY_ORDER) 
-    int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *) 
-    int PyArray_Dump (object, object, int) 
-    object PyArray_Dumps (object, int) 
-    int PyArray_ValidType (int) 
-    void PyArray_UpdateFlags (ndarray, int) 
-    object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object) 
-    #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object) 
-    #dtype PyArray_DescrNew (dtype) 
-    dtype PyArray_DescrNewFromType (int) 
-    double PyArray_GetPriority (object, double) 
-    object PyArray_IterNew (object) 
-    object PyArray_MultiIterNew (int, ...) 
- 
-    int PyArray_PyIntAsInt (object) 
-    npy_intp PyArray_PyIntAsIntp (object) 
-    int PyArray_Broadcast (broadcast) 
-    void PyArray_FillObjectArray (ndarray, object) 
-    int PyArray_FillWithScalar (ndarray, object) 
-    npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *) 
-    dtype PyArray_DescrNewByteorder (dtype, char) 
-    object PyArray_IterAllButAxis (object, int *) 
-    #object PyArray_CheckFromAny (object, dtype, int, int, int, object) 
-    #object PyArray_FromArray (ndarray, dtype, int) 
-    object PyArray_FromInterface (object) 
-    object PyArray_FromStructInterface (object) 
-    #object PyArray_FromArrayAttr (object, dtype, object) 
-    #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*) 
-    int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND) 
-    object PyArray_NewFlagsObject (object) 
-    npy_bool PyArray_CanCastScalar (type, type) 
-    #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t) 
-    int PyArray_RemoveSmallest (broadcast) 
-    int PyArray_ElementStrides (object) 
-    void PyArray_Item_INCREF (char *, dtype) 
-    void PyArray_Item_XDECREF (char *, dtype) 
-    object PyArray_FieldNames (object) 
-    object PyArray_Transpose (ndarray, PyArray_Dims *) 
-    object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE) 
-    object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE) 
-    object PyArray_PutMask (ndarray, object, object) 
-    object PyArray_Repeat (ndarray, object, int) 
-    object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE) 
-    int PyArray_Sort (ndarray, int, NPY_SORTKIND) 
-    object PyArray_ArgSort (ndarray, int, NPY_SORTKIND) 
-    object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE) 
-    object PyArray_ArgMax (ndarray, int, ndarray) 
-    object PyArray_ArgMin (ndarray, int, ndarray) 
-    object PyArray_Reshape (ndarray, object) 
-    object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER) 
-    object PyArray_Squeeze (ndarray) 
-    #object PyArray_View (ndarray, dtype, type) 
-    object PyArray_SwapAxes (ndarray, int, int) 
-    object PyArray_Max (ndarray, int, ndarray) 
-    object PyArray_Min (ndarray, int, ndarray) 
-    object PyArray_Ptp (ndarray, int, ndarray) 
-    object PyArray_Mean (ndarray, int, int, ndarray) 
-    object PyArray_Trace (ndarray, int, int, int, int, ndarray) 
-    object PyArray_Diagonal (ndarray, int, int, int) 
-    object PyArray_Clip (ndarray, object, object, ndarray) 
-    object PyArray_Conjugate (ndarray, ndarray) 
-    object PyArray_Nonzero (ndarray) 
-    object PyArray_Std (ndarray, int, int, ndarray, int) 
-    object PyArray_Sum (ndarray, int, int, ndarray) 
-    object PyArray_CumSum (ndarray, int, int, ndarray) 
-    object PyArray_Prod (ndarray, int, int, ndarray) 
-    object PyArray_CumProd (ndarray, int, int, ndarray) 
-    object PyArray_All (ndarray, int, ndarray) 
-    object PyArray_Any (ndarray, int, ndarray) 
-    object PyArray_Compress (ndarray, object, int, ndarray) 
-    object PyArray_Flatten (ndarray, NPY_ORDER) 
-    object PyArray_Ravel (ndarray, NPY_ORDER) 
-    npy_intp PyArray_MultiplyList (npy_intp *, int) 
-    int PyArray_MultiplyIntList (int *, int) 
-    void * PyArray_GetPtr (ndarray, npy_intp*) 
-    int PyArray_CompareLists (npy_intp *, npy_intp *, int) 
-    #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype) 
-    #int PyArray_As1D (object*, char **, int *, int) 
-    #int PyArray_As2D (object*, char ***, int *, int *, int) 
-    int PyArray_Free (object, void *) 
-    #int PyArray_Converter (object, object*) 
-    int PyArray_IntpFromSequence (object, npy_intp *, int) 
-    object PyArray_Concatenate (object, int) 
-    object PyArray_InnerProduct (object, object) 
-    object PyArray_MatrixProduct (object, object) 
-    object PyArray_CopyAndTranspose (object) 
-    object PyArray_Correlate (object, object, int) 
-    int PyArray_TypestrConvert (int, int) 
-    #int PyArray_DescrConverter (object, dtype*) 
-    #int PyArray_DescrConverter2 (object, dtype*) 
-    int PyArray_IntpConverter (object, PyArray_Dims *) 
-    #int PyArray_BufferConverter (object, chunk) 
-    int PyArray_AxisConverter (object, int *) 
-    int PyArray_BoolConverter (object, npy_bool *) 
-    int PyArray_ByteorderConverter (object, char *) 
-    int PyArray_OrderConverter (object, NPY_ORDER *) 
-    unsigned char PyArray_EquivTypes (dtype, dtype) 
-    #object PyArray_Zeros (int, npy_intp *, dtype, int) 
-    #object PyArray_Empty (int, npy_intp *, dtype, int) 
-    object PyArray_Where (object, object, object) 
-    object PyArray_Arange (double, double, double, int) 
-    #object PyArray_ArangeObj (object, object, object, dtype) 
-    int PyArray_SortkindConverter (object, NPY_SORTKIND *) 
-    object PyArray_LexSort (object, int) 
-    object PyArray_Round (ndarray, int, ndarray) 
-    unsigned char PyArray_EquivTypenums (int, int) 
-    int PyArray_RegisterDataType (dtype) 
-    int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *) 
-    int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND) 
-    #void PyArray_InitArrFuncs (PyArray_ArrFuncs *) 
-    object PyArray_IntTupleFromIntp (int, npy_intp *) 
-    int PyArray_TypeNumFromName (char *) 
-    int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *) 
-    #int PyArray_OutputConverter (object, ndarray*) 
-    object PyArray_BroadcastToShape (object, npy_intp *, int) 
-    void _PyArray_SigintHandler (int) 
-    void* _PyArray_GetSigintBuf () 
-    #int PyArray_DescrAlignConverter (object, dtype*) 
-    #int PyArray_DescrAlignConverter2 (object, dtype*) 
-    int PyArray_SearchsideConverter (object, void *) 
-    object PyArray_CheckAxis (ndarray, int *, int) 
-    npy_intp PyArray_OverflowMultiplyList (npy_intp *, int) 
-    int PyArray_CompareString (char *, char *, size_t) 
+    object PyArray_FROMANY(object m, int type, int min, int max, int flags)
+    object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran)
+    object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran)
+    void PyArray_FILLWBYTE(object, int val)
+    npy_intp PyArray_REFCOUNT(object)
+    object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth)
+    unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2)
+    bint PyArray_EquivByteorders(int b1, int b2)
+    object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum)
+    object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data)
+    #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr)
+    object PyArray_ToScalar(void* data, ndarray arr)
+
+    void* PyArray_GETPTR1(ndarray m, npy_intp i)
+    void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j)
+    void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k)
+    void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l)
+
+    void PyArray_XDECREF_ERR(ndarray)
+    # Cannot be supported due to out arg
+    # void PyArray_DESCR_REPLACE(descr)
+
+
+    object PyArray_Copy(ndarray)
+    object PyArray_FromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth)
+
+    object PyArray_Cast(ndarray mp, int type_num)
+    object PyArray_Take(ndarray ap, object items, int axis)
+    object PyArray_Put(ndarray ap, object items, object values)
+
+    void PyArray_ITER_RESET(flatiter it) nogil
+    void PyArray_ITER_NEXT(flatiter it) nogil
+    void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil
+    void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil
+    void* PyArray_ITER_DATA(flatiter it) nogil
+    bint PyArray_ITER_NOTDONE(flatiter it) nogil
+
+    void PyArray_MultiIter_RESET(broadcast multi) nogil
+    void PyArray_MultiIter_NEXT(broadcast multi) nogil
+    void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil
+    void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil
+    void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil
+    void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil
+    bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil
+
+    # Functions from __multiarray_api.h
+
+    # Functions taking dtype and returning object/ndarray are disabled
+    # for now as they steal dtype references. I'm conservative and disable
+    # more than is probably needed until it can be checked further.
+    int PyArray_SetNumericOps        (object)
+    object PyArray_GetNumericOps ()
+    int PyArray_INCREF (ndarray)
+    int PyArray_XDECREF (ndarray)
+    void PyArray_SetStringFunction (object, int)
+    dtype PyArray_DescrFromType (int)
+    object PyArray_TypeObjectFromType (int)
+    char * PyArray_Zero (ndarray)
+    char * PyArray_One (ndarray)
+    #object PyArray_CastToType (ndarray, dtype, int)
+    int PyArray_CastTo (ndarray, ndarray)
+    int PyArray_CastAnyTo (ndarray, ndarray)
+    int PyArray_CanCastSafely (int, int)
+    npy_bool PyArray_CanCastTo (dtype, dtype)
+    int PyArray_ObjectType (object, int)
+    dtype PyArray_DescrFromObject (object, dtype)
+    #ndarray* PyArray_ConvertToCommonType (object, int *)
+    dtype PyArray_DescrFromScalar (object)
+    dtype PyArray_DescrFromTypeObject (object)
+    npy_intp PyArray_Size (object)
+    #object PyArray_Scalar (void *, dtype, object)
+    #object PyArray_FromScalar (object, dtype)
+    void PyArray_ScalarAsCtype (object, void *)
+    #int PyArray_CastScalarToCtype (object, void *, dtype)
+    #int PyArray_CastScalarDirect (object, dtype, void *, int)
+    object PyArray_ScalarFromObject (object)
+    #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int)
+    object PyArray_FromDims (int, int *, int)
+    #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *)
+    #object PyArray_FromAny (object, dtype, int, int, int, object)
+    object PyArray_EnsureArray (object)
+    object PyArray_EnsureAnyArray (object)
+    #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *)
+    #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *)
+    #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp)
+    #object PyArray_FromIter (object, dtype, npy_intp)
+    object PyArray_Return (ndarray)
+    #object PyArray_GetField (ndarray, dtype, int)
+    #int PyArray_SetField (ndarray, dtype, int, object)
+    object PyArray_Byteswap (ndarray, npy_bool)
+    object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER)
+    int PyArray_MoveInto (ndarray, ndarray)
+    int PyArray_CopyInto (ndarray, ndarray)
+    int PyArray_CopyAnyInto (ndarray, ndarray)
+    int PyArray_CopyObject (ndarray, object)
+    object PyArray_NewCopy (ndarray, NPY_ORDER)
+    object PyArray_ToList (ndarray)
+    object PyArray_ToString (ndarray, NPY_ORDER)
+    int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *)
+    int PyArray_Dump (object, object, int)
+    object PyArray_Dumps (object, int)
+    int PyArray_ValidType (int)
+    void PyArray_UpdateFlags (ndarray, int)
+    object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object)
+    #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object)
+    #dtype PyArray_DescrNew (dtype)
+    dtype PyArray_DescrNewFromType (int)
+    double PyArray_GetPriority (object, double)
+    object PyArray_IterNew (object)
+    object PyArray_MultiIterNew (int, ...)
+
+    int PyArray_PyIntAsInt (object)
+    npy_intp PyArray_PyIntAsIntp (object)
+    int PyArray_Broadcast (broadcast)
+    void PyArray_FillObjectArray (ndarray, object)
+    int PyArray_FillWithScalar (ndarray, object)
+    npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *)
+    dtype PyArray_DescrNewByteorder (dtype, char)
+    object PyArray_IterAllButAxis (object, int *)
+    #object PyArray_CheckFromAny (object, dtype, int, int, int, object)
+    #object PyArray_FromArray (ndarray, dtype, int)
+    object PyArray_FromInterface (object)
+    object PyArray_FromStructInterface (object)
+    #object PyArray_FromArrayAttr (object, dtype, object)
+    #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*)
+    int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND)
+    object PyArray_NewFlagsObject (object)
+    npy_bool PyArray_CanCastScalar (type, type)
+    #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t)
+    int PyArray_RemoveSmallest (broadcast)
+    int PyArray_ElementStrides (object)
+    void PyArray_Item_INCREF (char *, dtype)
+    void PyArray_Item_XDECREF (char *, dtype)
+    object PyArray_FieldNames (object)
+    object PyArray_Transpose (ndarray, PyArray_Dims *)
+    object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE)
+    object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE)
+    object PyArray_PutMask (ndarray, object, object)
+    object PyArray_Repeat (ndarray, object, int)
+    object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE)
+    int PyArray_Sort (ndarray, int, NPY_SORTKIND)
+    object PyArray_ArgSort (ndarray, int, NPY_SORTKIND)
+    object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE)
+    object PyArray_ArgMax (ndarray, int, ndarray)
+    object PyArray_ArgMin (ndarray, int, ndarray)
+    object PyArray_Reshape (ndarray, object)
+    object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER)
+    object PyArray_Squeeze (ndarray)
+    #object PyArray_View (ndarray, dtype, type)
+    object PyArray_SwapAxes (ndarray, int, int)
+    object PyArray_Max (ndarray, int, ndarray)
+    object PyArray_Min (ndarray, int, ndarray)
+    object PyArray_Ptp (ndarray, int, ndarray)
+    object PyArray_Mean (ndarray, int, int, ndarray)
+    object PyArray_Trace (ndarray, int, int, int, int, ndarray)
+    object PyArray_Diagonal (ndarray, int, int, int)
+    object PyArray_Clip (ndarray, object, object, ndarray)
+    object PyArray_Conjugate (ndarray, ndarray)
+    object PyArray_Nonzero (ndarray)
+    object PyArray_Std (ndarray, int, int, ndarray, int)
+    object PyArray_Sum (ndarray, int, int, ndarray)
+    object PyArray_CumSum (ndarray, int, int, ndarray)
+    object PyArray_Prod (ndarray, int, int, ndarray)
+    object PyArray_CumProd (ndarray, int, int, ndarray)
+    object PyArray_All (ndarray, int, ndarray)
+    object PyArray_Any (ndarray, int, ndarray)
+    object PyArray_Compress (ndarray, object, int, ndarray)
+    object PyArray_Flatten (ndarray, NPY_ORDER)
+    object PyArray_Ravel (ndarray, NPY_ORDER)
+    npy_intp PyArray_MultiplyList (npy_intp *, int)
+    int PyArray_MultiplyIntList (int *, int)
+    void * PyArray_GetPtr (ndarray, npy_intp*)
+    int PyArray_CompareLists (npy_intp *, npy_intp *, int)
+    #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype)
+    #int PyArray_As1D (object*, char **, int *, int)
+    #int PyArray_As2D (object*, char ***, int *, int *, int)
+    int PyArray_Free (object, void *)
+    #int PyArray_Converter (object, object*)
+    int PyArray_IntpFromSequence (object, npy_intp *, int)
+    object PyArray_Concatenate (object, int)
+    object PyArray_InnerProduct (object, object)
+    object PyArray_MatrixProduct (object, object)
+    object PyArray_CopyAndTranspose (object)
+    object PyArray_Correlate (object, object, int)
+    int PyArray_TypestrConvert (int, int)
+    #int PyArray_DescrConverter (object, dtype*)
+    #int PyArray_DescrConverter2 (object, dtype*)
+    int PyArray_IntpConverter (object, PyArray_Dims *)
+    #int PyArray_BufferConverter (object, chunk)
+    int PyArray_AxisConverter (object, int *)
+    int PyArray_BoolConverter (object, npy_bool *)
+    int PyArray_ByteorderConverter (object, char *)
+    int PyArray_OrderConverter (object, NPY_ORDER *)
+    unsigned char PyArray_EquivTypes (dtype, dtype)
+    #object PyArray_Zeros (int, npy_intp *, dtype, int)
+    #object PyArray_Empty (int, npy_intp *, dtype, int)
+    object PyArray_Where (object, object, object)
+    object PyArray_Arange (double, double, double, int)
+    #object PyArray_ArangeObj (object, object, object, dtype)
+    int PyArray_SortkindConverter (object, NPY_SORTKIND *)
+    object PyArray_LexSort (object, int)
+    object PyArray_Round (ndarray, int, ndarray)
+    unsigned char PyArray_EquivTypenums (int, int)
+    int PyArray_RegisterDataType (dtype)
+    int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *)
+    int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND)
+    #void PyArray_InitArrFuncs (PyArray_ArrFuncs *)
+    object PyArray_IntTupleFromIntp (int, npy_intp *)
+    int PyArray_TypeNumFromName (char *)
+    int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *)
+    #int PyArray_OutputConverter (object, ndarray*)
+    object PyArray_BroadcastToShape (object, npy_intp *, int)
+    void _PyArray_SigintHandler (int)
+    void* _PyArray_GetSigintBuf ()
+    #int PyArray_DescrAlignConverter (object, dtype*)
+    #int PyArray_DescrAlignConverter2 (object, dtype*)
+    int PyArray_SearchsideConverter (object, void *)
+    object PyArray_CheckAxis (ndarray, int *, int)
+    npy_intp PyArray_OverflowMultiplyList (npy_intp *, int)
+    int PyArray_CompareString (char *, char *, size_t)
     int PyArray_SetBaseObject(ndarray, base)  # NOTE: steals a reference to base! Use "set_array_base()" instead.
- 
- 
-# Typedefs that matches the runtime dtype objects in 
-# the numpy module. 
- 
-# The ones that are commented out needs an IFDEF function 
-# in Cython to enable them only on the right systems. 
- 
-ctypedef npy_int8       int8_t 
-ctypedef npy_int16      int16_t 
-ctypedef npy_int32      int32_t 
-ctypedef npy_int64      int64_t 
-#ctypedef npy_int96      int96_t 
-#ctypedef npy_int128     int128_t 
- 
-ctypedef npy_uint8      uint8_t 
-ctypedef npy_uint16     uint16_t 
-ctypedef npy_uint32     uint32_t 
-ctypedef npy_uint64     uint64_t 
-#ctypedef npy_uint96     uint96_t 
-#ctypedef npy_uint128    uint128_t 
- 
-ctypedef npy_float32    float32_t 
-ctypedef npy_float64    float64_t 
-#ctypedef npy_float80    float80_t 
-#ctypedef npy_float128   float128_t 
- 
-ctypedef float complex  complex64_t 
-ctypedef double complex complex128_t 
- 
-# The int types are mapped a bit surprising -- 
-# numpy.int corresponds to 'l' and numpy.long to 'q' 
-ctypedef npy_long       int_t 
-ctypedef npy_longlong   long_t 
-ctypedef npy_longlong   longlong_t 
- 
-ctypedef npy_ulong      uint_t 
-ctypedef npy_ulonglong  ulong_t 
-ctypedef npy_ulonglong  ulonglong_t 
- 
-ctypedef npy_intp       intp_t 
-ctypedef npy_uintp      uintp_t 
- 
-ctypedef npy_double     float_t 
-ctypedef npy_double     double_t 
-ctypedef npy_longdouble longdouble_t 
- 
-ctypedef npy_cfloat      cfloat_t 
-ctypedef npy_cdouble     cdouble_t 
-ctypedef npy_clongdouble clongdouble_t 
- 
-ctypedef npy_cdouble     complex_t 
- 
-cdef inline object PyArray_MultiIterNew1(a): 
-    return PyArray_MultiIterNew(1, <void*>a) 
- 
-cdef inline object PyArray_MultiIterNew2(a, b): 
-    return PyArray_MultiIterNew(2, <void*>a, <void*>b) 
- 
-cdef inline object PyArray_MultiIterNew3(a, b, c): 
-    return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c) 
- 
-cdef inline object PyArray_MultiIterNew4(a, b, c, d): 
-    return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d) 
- 
-cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): 
-    return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e) 
- 
+
+
+# Typedefs that matches the runtime dtype objects in
+# the numpy module.
+
+# The ones that are commented out needs an IFDEF function
+# in Cython to enable them only on the right systems.
+
+ctypedef npy_int8       int8_t
+ctypedef npy_int16      int16_t
+ctypedef npy_int32      int32_t
+ctypedef npy_int64      int64_t
+#ctypedef npy_int96      int96_t
+#ctypedef npy_int128     int128_t
+
+ctypedef npy_uint8      uint8_t
+ctypedef npy_uint16     uint16_t
+ctypedef npy_uint32     uint32_t
+ctypedef npy_uint64     uint64_t
+#ctypedef npy_uint96     uint96_t
+#ctypedef npy_uint128    uint128_t
+
+ctypedef npy_float32    float32_t
+ctypedef npy_float64    float64_t
+#ctypedef npy_float80    float80_t
+#ctypedef npy_float128   float128_t
+
+ctypedef float complex  complex64_t
+ctypedef double complex complex128_t
+
+# The int types are mapped a bit surprising --
+# numpy.int corresponds to 'l' and numpy.long to 'q'
+ctypedef npy_long       int_t
+ctypedef npy_longlong   long_t
+ctypedef npy_longlong   longlong_t
+
+ctypedef npy_ulong      uint_t
+ctypedef npy_ulonglong  ulong_t
+ctypedef npy_ulonglong  ulonglong_t
+
+ctypedef npy_intp       intp_t
+ctypedef npy_uintp      uintp_t
+
+ctypedef npy_double     float_t
+ctypedef npy_double     double_t
+ctypedef npy_longdouble longdouble_t
+
+ctypedef npy_cfloat      cfloat_t
+ctypedef npy_cdouble     cdouble_t
+ctypedef npy_clongdouble clongdouble_t
+
+ctypedef npy_cdouble     complex_t
+
+cdef inline object PyArray_MultiIterNew1(a):
+    return PyArray_MultiIterNew(1, <void*>a)
+
+cdef inline object PyArray_MultiIterNew2(a, b):
+    return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+
+cdef inline object PyArray_MultiIterNew3(a, b, c):
+    return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+
+cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+    return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+
+cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+    return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+
 cdef inline tuple PyDataType_SHAPE(dtype d):
     if PyDataType_HASSUBARRAY(d):
         return <tuple>d.subarray.shape
     else:
         return ()
 
-cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: 
-    # Recursive utility function used in __getbuffer__ to get format 
-    # string. The new location in the format string is returned. 
- 
-    cdef dtype child 
-    cdef int endian_detector = 1 
-    cdef bint little_endian = ((<char*>&endian_detector)[0] != 0) 
-    cdef tuple fields 
- 
-    for childname in descr.names: 
-        fields = descr.fields[childname] 
-        child, new_offset = fields 
- 
+cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
+    # Recursive utility function used in __getbuffer__ to get format
+    # string. The new location in the format string is returned.
+
+    cdef dtype child
+    cdef int endian_detector = 1
+    cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+    cdef tuple fields
+
+    for childname in descr.names:
+        fields = descr.fields[childname]
+        child, new_offset = fields
+
         if (end - f) - <int>(new_offset - offset[0]) < 15:
-            raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") 
- 
-        if ((child.byteorder == c'>' and little_endian) or 
-            (child.byteorder == c'<' and not little_endian)): 
-            raise ValueError(u"Non-native byte order not supported") 
-            # One could encode it in the format string and have Cython 
-            # complain instead, BUT: < and > in format strings also imply 
-            # standardized sizes for datatypes, and we rely on native in 
-            # order to avoid reencoding data types based on their size. 
-            # 
-            # A proper PEP 3118 exporter for other clients than Cython 
-            # must deal properly with this! 
- 
-        # Output padding bytes 
-        while offset[0] < new_offset: 
-            f[0] = 120 # "x"; pad byte 
-            f += 1 
-            offset[0] += 1 
- 
-        offset[0] += child.itemsize 
- 
-        if not PyDataType_HASFIELDS(child): 
-            t = child.type_num 
-            if end - f < 5: 
-                raise RuntimeError(u"Format string allocated too short.") 
- 
-            # Until ticket #99 is fixed, use integers to avoid warnings 
-            if   t == NPY_BYTE:        f[0] =  98 #"b" 
-            elif t == NPY_UBYTE:       f[0] =  66 #"B" 
-            elif t == NPY_SHORT:       f[0] = 104 #"h" 
-            elif t == NPY_USHORT:      f[0] =  72 #"H" 
-            elif t == NPY_INT:         f[0] = 105 #"i" 
-            elif t == NPY_UINT:        f[0] =  73 #"I" 
-            elif t == NPY_LONG:        f[0] = 108 #"l" 
-            elif t == NPY_ULONG:       f[0] = 76  #"L" 
-            elif t == NPY_LONGLONG:    f[0] = 113 #"q" 
-            elif t == NPY_ULONGLONG:   f[0] = 81  #"Q" 
-            elif t == NPY_FLOAT:       f[0] = 102 #"f" 
-            elif t == NPY_DOUBLE:      f[0] = 100 #"d" 
-            elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g" 
-            elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf 
-            elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd 
-            elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg 
-            elif t == NPY_OBJECT:      f[0] = 79 #"O" 
-            else: 
-                raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) 
-            f += 1 
-        else: 
-            # Cython ignores struct boundary information ("T{...}"), 
-            # so don't output it 
-            f = _util_dtypestring(child, f, end, offset) 
-    return f 
- 
- 
-# 
-# ufunc API 
-# 
- 
-cdef extern from "numpy/ufuncobject.h": 
- 
-    ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *) 
- 
-    ctypedef extern class numpy.ufunc [object PyUFuncObject]: 
-        cdef: 
-            int nin, nout, nargs 
-            int identity 
-            PyUFuncGenericFunction *functions 
-            void **data 
-            int ntypes 
-            int check_return 
+            raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+
+        if ((child.byteorder == c'>' and little_endian) or
+            (child.byteorder == c'<' and not little_endian)):
+            raise ValueError(u"Non-native byte order not supported")
+            # One could encode it in the format string and have Cython
+            # complain instead, BUT: < and > in format strings also imply
+            # standardized sizes for datatypes, and we rely on native in
+            # order to avoid reencoding data types based on their size.
+            #
+            # A proper PEP 3118 exporter for other clients than Cython
+            # must deal properly with this!
+
+        # Output padding bytes
+        while offset[0] < new_offset:
+            f[0] = 120 # "x"; pad byte
+            f += 1
+            offset[0] += 1
+
+        offset[0] += child.itemsize
+
+        if not PyDataType_HASFIELDS(child):
+            t = child.type_num
+            if end - f < 5:
+                raise RuntimeError(u"Format string allocated too short.")
+
+            # Until ticket #99 is fixed, use integers to avoid warnings
+            if   t == NPY_BYTE:        f[0] =  98 #"b"
+            elif t == NPY_UBYTE:       f[0] =  66 #"B"
+            elif t == NPY_SHORT:       f[0] = 104 #"h"
+            elif t == NPY_USHORT:      f[0] =  72 #"H"
+            elif t == NPY_INT:         f[0] = 105 #"i"
+            elif t == NPY_UINT:        f[0] =  73 #"I"
+            elif t == NPY_LONG:        f[0] = 108 #"l"
+            elif t == NPY_ULONG:       f[0] = 76  #"L"
+            elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+            elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+            elif t == NPY_FLOAT:       f[0] = 102 #"f"
+            elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+            elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+            elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+            elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+            elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+            elif t == NPY_OBJECT:      f[0] = 79 #"O"
+            else:
+                raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+            f += 1
+        else:
+            # Cython ignores struct boundary information ("T{...}"),
+            # so don't output it
+            f = _util_dtypestring(child, f, end, offset)
+    return f
+
+
+#
+# ufunc API
+#
+
+cdef extern from "numpy/ufuncobject.h":
+
+    ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *)
+
+    ctypedef extern class numpy.ufunc [object PyUFuncObject]:
+        cdef:
+            int nin, nout, nargs
+            int identity
+            PyUFuncGenericFunction *functions
+            void **data
+            int ntypes
+            int check_return
             char *name
             char *types
-            char *doc 
-            void *ptr 
-            PyObject *obj 
-            PyObject *userloops 
- 
-    cdef enum: 
-        PyUFunc_Zero 
-        PyUFunc_One 
-        PyUFunc_None 
-        UFUNC_ERR_IGNORE 
-        UFUNC_ERR_WARN 
-        UFUNC_ERR_RAISE 
-        UFUNC_ERR_CALL 
-        UFUNC_ERR_PRINT 
-        UFUNC_ERR_LOG 
-        UFUNC_MASK_DIVIDEBYZERO 
-        UFUNC_MASK_OVERFLOW 
-        UFUNC_MASK_UNDERFLOW 
-        UFUNC_MASK_INVALID 
-        UFUNC_SHIFT_DIVIDEBYZERO 
-        UFUNC_SHIFT_OVERFLOW 
-        UFUNC_SHIFT_UNDERFLOW 
-        UFUNC_SHIFT_INVALID 
-        UFUNC_FPE_DIVIDEBYZERO 
-        UFUNC_FPE_OVERFLOW 
-        UFUNC_FPE_UNDERFLOW 
-        UFUNC_FPE_INVALID 
-        UFUNC_ERR_DEFAULT 
-        UFUNC_ERR_DEFAULT2 
- 
-    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *, 
-          void **, char *, int, int, int, int, char *, char *, int) 
-    int PyUFunc_RegisterLoopForType(ufunc, int, 
-                                    PyUFuncGenericFunction, int *, void *) 
-    int PyUFunc_GenericFunction \ 
-        (ufunc, PyObject *, PyObject *, PyArrayObject **) 
-    void PyUFunc_f_f_As_d_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_d_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_f_f \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_g_g \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_F_F_As_D_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_F_F \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_D_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_G_G \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_O_O \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_ff_f_As_dd_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_ff_f \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_dd_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_gg_g \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_FF_F_As_DD_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_DD_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_FF_F \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_GG_G \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_OO_O \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_O_O_method \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_OO_O_method \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_On_Om \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    int PyUFunc_GetPyValues \ 
-        (char *, int *, int *, PyObject **) 
-    int PyUFunc_checkfperr \ 
-           (int, PyObject *, int *) 
-    void PyUFunc_clearfperr() 
-    int PyUFunc_getfperr() 
-    int PyUFunc_handlefperr \ 
-        (int, PyObject *, int, int *) 
-    int PyUFunc_ReplaceLoopBySignature \ 
-        (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *) 
-    object PyUFunc_FromFuncAndDataAndSignature \ 
-             (PyUFuncGenericFunction *, void **, char *, int, int, int, 
-              int, char *, char *, int, char *) 
- 
+            char *doc
+            void *ptr
+            PyObject *obj
+            PyObject *userloops
+
+    cdef enum:
+        PyUFunc_Zero
+        PyUFunc_One
+        PyUFunc_None
+        UFUNC_ERR_IGNORE
+        UFUNC_ERR_WARN
+        UFUNC_ERR_RAISE
+        UFUNC_ERR_CALL
+        UFUNC_ERR_PRINT
+        UFUNC_ERR_LOG
+        UFUNC_MASK_DIVIDEBYZERO
+        UFUNC_MASK_OVERFLOW
+        UFUNC_MASK_UNDERFLOW
+        UFUNC_MASK_INVALID
+        UFUNC_SHIFT_DIVIDEBYZERO
+        UFUNC_SHIFT_OVERFLOW
+        UFUNC_SHIFT_UNDERFLOW
+        UFUNC_SHIFT_INVALID
+        UFUNC_FPE_DIVIDEBYZERO
+        UFUNC_FPE_OVERFLOW
+        UFUNC_FPE_UNDERFLOW
+        UFUNC_FPE_INVALID
+        UFUNC_ERR_DEFAULT
+        UFUNC_ERR_DEFAULT2
+
+    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *,
+          void **, char *, int, int, int, int, char *, char *, int)
+    int PyUFunc_RegisterLoopForType(ufunc, int,
+                                    PyUFuncGenericFunction, int *, void *)
+    int PyUFunc_GenericFunction \
+        (ufunc, PyObject *, PyObject *, PyArrayObject **)
+    void PyUFunc_f_f_As_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_f_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_g_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F_As_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_G_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f_As_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_gg_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F_As_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_GG_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_On_Om \
+         (char **, npy_intp *, npy_intp *, void *)
+    int PyUFunc_GetPyValues \
+        (char *, int *, int *, PyObject **)
+    int PyUFunc_checkfperr \
+           (int, PyObject *, int *)
+    void PyUFunc_clearfperr()
+    int PyUFunc_getfperr()
+    int PyUFunc_handlefperr \
+        (int, PyObject *, int, int *)
+    int PyUFunc_ReplaceLoopBySignature \
+        (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *)
+    object PyUFunc_FromFuncAndDataAndSignature \
+             (PyUFuncGenericFunction *, void **, char *, int, int, int,
+              int, char *, char *, int, char *)
+
     int _import_umath() except -1
- 
-cdef inline void set_array_base(ndarray arr, object base): 
+
+cdef inline void set_array_base(ndarray arr, object base):
     Py_INCREF(base) # important to do this before stealing the reference below!
     PyArray_SetBaseObject(arr, base)
- 
-cdef inline object get_array_base(ndarray arr): 
+
+cdef inline object get_array_base(ndarray arr):
     base = PyArray_BASE(arr)
     if base is NULL:
-        return None 
+        return None
     return <object>base
 
 # Versions of the import_* functions which are more suitable for
diff --git a/contrib/tools/cython/Cython/Includes/numpy/__init__.pxd b/contrib/tools/cython/Cython/Includes/numpy/__init__.pxd
index ba272b4bf4..15700c05ef 100644
--- a/contrib/tools/cython/Cython/Includes/numpy/__init__.pxd
+++ b/contrib/tools/cython/Cython/Includes/numpy/__init__.pxd
@@ -1,97 +1,97 @@
-# NumPy static imports for Cython 
-# 
-# If any of the PyArray_* functions are called, import_array must be 
-# called first. 
-# 
+# NumPy static imports for Cython
+#
+# If any of the PyArray_* functions are called, import_array must be
+# called first.
+#
 # This also defines backwards-compatibility buffer acquisition
-# code for use in Python 2.x (or Python <= 2.5 when NumPy starts 
-# implementing PEP-3118 directly). 
-# 
-# Because of laziness, the format string of the buffer is statically 
-# allocated. Increase the size if this is not enough, or submit a 
-# patch to do this properly. 
-# 
-# Author: Dag Sverre Seljebotn 
-# 
- 
-DEF _buffer_format_string_len = 255 
- 
-cimport cpython.buffer as pybuf 
+# code for use in Python 2.x (or Python <= 2.5 when NumPy starts
+# implementing PEP-3118 directly).
+#
+# Because of laziness, the format string of the buffer is statically
+# allocated. Increase the size if this is not enough, or submit a
+# patch to do this properly.
+#
+# Author: Dag Sverre Seljebotn
+#
+
+DEF _buffer_format_string_len = 255
+
+cimport cpython.buffer as pybuf
 from cpython.ref cimport Py_INCREF
 from cpython.mem cimport PyObject_Malloc, PyObject_Free
 from cpython.object cimport PyObject, PyTypeObject
-from cpython.type cimport type 
-cimport libc.stdio as stdio 
- 
-cdef extern from "Python.h": 
-    ctypedef int Py_intptr_t 
- 
-cdef extern from "numpy/arrayobject.h": 
-    ctypedef Py_intptr_t npy_intp 
-    ctypedef size_t npy_uintp 
- 
-    cdef enum NPY_TYPES: 
-        NPY_BOOL 
-        NPY_BYTE 
-        NPY_UBYTE 
-        NPY_SHORT 
-        NPY_USHORT 
-        NPY_INT 
-        NPY_UINT 
-        NPY_LONG 
-        NPY_ULONG 
-        NPY_LONGLONG 
-        NPY_ULONGLONG 
-        NPY_FLOAT 
-        NPY_DOUBLE 
-        NPY_LONGDOUBLE 
-        NPY_CFLOAT 
-        NPY_CDOUBLE 
-        NPY_CLONGDOUBLE 
-        NPY_OBJECT 
-        NPY_STRING 
-        NPY_UNICODE 
-        NPY_VOID 
+from cpython.type cimport type
+cimport libc.stdio as stdio
+
+cdef extern from "Python.h":
+    ctypedef int Py_intptr_t
+
+cdef extern from "numpy/arrayobject.h":
+    ctypedef Py_intptr_t npy_intp
+    ctypedef size_t npy_uintp
+
+    cdef enum NPY_TYPES:
+        NPY_BOOL
+        NPY_BYTE
+        NPY_UBYTE
+        NPY_SHORT
+        NPY_USHORT
+        NPY_INT
+        NPY_UINT
+        NPY_LONG
+        NPY_ULONG
+        NPY_LONGLONG
+        NPY_ULONGLONG
+        NPY_FLOAT
+        NPY_DOUBLE
+        NPY_LONGDOUBLE
+        NPY_CFLOAT
+        NPY_CDOUBLE
+        NPY_CLONGDOUBLE
+        NPY_OBJECT
+        NPY_STRING
+        NPY_UNICODE
+        NPY_VOID
         NPY_DATETIME
         NPY_TIMEDELTA
-        NPY_NTYPES 
-        NPY_NOTYPE 
- 
-        NPY_INT8 
-        NPY_INT16 
-        NPY_INT32 
-        NPY_INT64 
-        NPY_INT128 
-        NPY_INT256 
-        NPY_UINT8 
-        NPY_UINT16 
-        NPY_UINT32 
-        NPY_UINT64 
-        NPY_UINT128 
-        NPY_UINT256 
-        NPY_FLOAT16 
-        NPY_FLOAT32 
-        NPY_FLOAT64 
-        NPY_FLOAT80 
-        NPY_FLOAT96 
-        NPY_FLOAT128 
-        NPY_FLOAT256 
-        NPY_COMPLEX32 
-        NPY_COMPLEX64 
-        NPY_COMPLEX128 
-        NPY_COMPLEX160 
-        NPY_COMPLEX192 
-        NPY_COMPLEX256 
-        NPY_COMPLEX512 
- 
-        NPY_INTP 
- 
-    ctypedef enum NPY_ORDER: 
-        NPY_ANYORDER 
-        NPY_CORDER 
-        NPY_FORTRANORDER 
+        NPY_NTYPES
+        NPY_NOTYPE
+
+        NPY_INT8
+        NPY_INT16
+        NPY_INT32
+        NPY_INT64
+        NPY_INT128
+        NPY_INT256
+        NPY_UINT8
+        NPY_UINT16
+        NPY_UINT32
+        NPY_UINT64
+        NPY_UINT128
+        NPY_UINT256
+        NPY_FLOAT16
+        NPY_FLOAT32
+        NPY_FLOAT64
+        NPY_FLOAT80
+        NPY_FLOAT96
+        NPY_FLOAT128
+        NPY_FLOAT256
+        NPY_COMPLEX32
+        NPY_COMPLEX64
+        NPY_COMPLEX128
+        NPY_COMPLEX160
+        NPY_COMPLEX192
+        NPY_COMPLEX256
+        NPY_COMPLEX512
+
+        NPY_INTP
+
+    ctypedef enum NPY_ORDER:
+        NPY_ANYORDER
+        NPY_CORDER
+        NPY_FORTRANORDER
         NPY_KEEPORDER
- 
+
     ctypedef enum NPY_CASTING:
         NPY_NO_CASTING
         NPY_EQUIV_CASTING
@@ -99,63 +99,63 @@ cdef extern from "numpy/arrayobject.h":
         NPY_SAME_KIND_CASTING
         NPY_UNSAFE_CASTING
 
-    ctypedef enum NPY_CLIPMODE: 
-        NPY_CLIP 
-        NPY_WRAP 
-        NPY_RAISE 
- 
-    ctypedef enum NPY_SCALARKIND: 
-        NPY_NOSCALAR, 
-        NPY_BOOL_SCALAR, 
-        NPY_INTPOS_SCALAR, 
-        NPY_INTNEG_SCALAR, 
-        NPY_FLOAT_SCALAR, 
-        NPY_COMPLEX_SCALAR, 
-        NPY_OBJECT_SCALAR 
- 
-    ctypedef enum NPY_SORTKIND: 
-        NPY_QUICKSORT 
-        NPY_HEAPSORT 
-        NPY_MERGESORT 
- 
-    ctypedef enum NPY_SEARCHSIDE: 
-        NPY_SEARCHLEFT 
-        NPY_SEARCHRIGHT 
- 
-    enum: 
+    ctypedef enum NPY_CLIPMODE:
+        NPY_CLIP
+        NPY_WRAP
+        NPY_RAISE
+
+    ctypedef enum NPY_SCALARKIND:
+        NPY_NOSCALAR,
+        NPY_BOOL_SCALAR,
+        NPY_INTPOS_SCALAR,
+        NPY_INTNEG_SCALAR,
+        NPY_FLOAT_SCALAR,
+        NPY_COMPLEX_SCALAR,
+        NPY_OBJECT_SCALAR
+
+    ctypedef enum NPY_SORTKIND:
+        NPY_QUICKSORT
+        NPY_HEAPSORT
+        NPY_MERGESORT
+
+    ctypedef enum NPY_SEARCHSIDE:
+        NPY_SEARCHLEFT
+        NPY_SEARCHRIGHT
+
+    enum:
         # DEPRECATED since NumPy 1.7 ! Do not use in new code!
-        NPY_C_CONTIGUOUS 
-        NPY_F_CONTIGUOUS 
-        NPY_CONTIGUOUS 
-        NPY_FORTRAN 
-        NPY_OWNDATA 
-        NPY_FORCECAST 
-        NPY_ENSURECOPY 
-        NPY_ENSUREARRAY 
-        NPY_ELEMENTSTRIDES 
-        NPY_ALIGNED 
-        NPY_NOTSWAPPED 
-        NPY_WRITEABLE 
-        NPY_UPDATEIFCOPY 
-        NPY_ARR_HAS_DESCR 
- 
-        NPY_BEHAVED 
-        NPY_BEHAVED_NS 
-        NPY_CARRAY 
-        NPY_CARRAY_RO 
-        NPY_FARRAY 
-        NPY_FARRAY_RO 
-        NPY_DEFAULT 
- 
-        NPY_IN_ARRAY 
-        NPY_OUT_ARRAY 
-        NPY_INOUT_ARRAY 
-        NPY_IN_FARRAY 
-        NPY_OUT_FARRAY 
-        NPY_INOUT_FARRAY 
- 
-        NPY_UPDATE_ALL 
- 
+        NPY_C_CONTIGUOUS
+        NPY_F_CONTIGUOUS
+        NPY_CONTIGUOUS
+        NPY_FORTRAN
+        NPY_OWNDATA
+        NPY_FORCECAST
+        NPY_ENSURECOPY
+        NPY_ENSUREARRAY
+        NPY_ELEMENTSTRIDES
+        NPY_ALIGNED
+        NPY_NOTSWAPPED
+        NPY_WRITEABLE
+        NPY_UPDATEIFCOPY
+        NPY_ARR_HAS_DESCR
+
+        NPY_BEHAVED
+        NPY_BEHAVED_NS
+        NPY_CARRAY
+        NPY_CARRAY_RO
+        NPY_FARRAY
+        NPY_FARRAY_RO
+        NPY_DEFAULT
+
+        NPY_IN_ARRAY
+        NPY_OUT_ARRAY
+        NPY_INOUT_ARRAY
+        NPY_IN_FARRAY
+        NPY_OUT_FARRAY
+        NPY_INOUT_FARRAY
+
+        NPY_UPDATE_ALL
+
     enum:
         # Added in NumPy 1.7 to replace the deprecated enums above.
         NPY_ARRAY_C_CONTIGUOUS
@@ -187,13 +187,13 @@ cdef extern from "numpy/arrayobject.h":
 
         NPY_ARRAY_UPDATE_ALL
 
-    cdef enum: 
-        NPY_MAXDIMS 
- 
-    npy_intp NPY_MAX_ELSIZE 
- 
-    ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,  void *) 
- 
+    cdef enum:
+        NPY_MAXDIMS
+
+    npy_intp NPY_MAX_ELSIZE
+
+    ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,  void *)
+
     ctypedef struct PyArray_ArrayDescr:
         # shape is a tuple, but Cython doesn't support "tuple shape"
         # inside a non-PyObject declaration, so we have to declare it
@@ -204,7 +204,7 @@ cdef extern from "numpy/arrayobject.h":
         pass
 
     ctypedef class numpy.dtype [object PyArray_Descr, check_size ignore]:
-        # Use PyDataType_* macros when possible, however there are no macros 
+        # Use PyDataType_* macros when possible, however there are no macros
         # for accessing some of the fields, so some are defined.
         cdef PyTypeObject* typeobj
         cdef char kind
@@ -216,816 +216,816 @@ cdef extern from "numpy/arrayobject.h":
         # directly accessing this field.
         cdef char byteorder
         cdef char flags
-        cdef int type_num 
-        cdef int itemsize "elsize" 
+        cdef int type_num
+        cdef int itemsize "elsize"
         cdef int alignment
         cdef dict fields
-        cdef tuple names 
+        cdef tuple names
         # Use PyDataType_HASSUBARRAY to test whether this field is
         # valid (the pointer can be NULL). Most users should access
         # this field via the inline helper method PyDataType_SHAPE.
         cdef PyArray_ArrayDescr* subarray
- 
+
     ctypedef class numpy.flatiter [object PyArrayIterObject, check_size ignore]:
-        # Use through macros 
-        pass 
- 
+        # Use through macros
+        pass
+
     ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]:
-        # Use through macros 
-        pass 
- 
-    ctypedef struct PyArrayObject: 
-        # For use in situations where ndarray can't replace PyArrayObject*, 
-        # like PyArrayObject**. 
-        pass 
- 
+        # Use through macros
+        pass
+
+    ctypedef struct PyArrayObject:
+        # For use in situations where ndarray can't replace PyArrayObject*,
+        # like PyArrayObject**.
+        pass
+
     ctypedef class numpy.ndarray [object PyArrayObject, check_size ignore]:
-        cdef __cythonbufferdefaults__ = {"mode": "strided"} 
- 
-        cdef: 
-            # Only taking a few of the most commonly used and stable fields. 
-            # One should use PyArray_* macros instead to access the C fields. 
-            char *data 
-            int ndim "nd" 
-            npy_intp *shape "dimensions" 
-            npy_intp *strides 
+        cdef __cythonbufferdefaults__ = {"mode": "strided"}
+
+        cdef:
+            # Only taking a few of the most commonly used and stable fields.
+            # One should use PyArray_* macros instead to access the C fields.
+            char *data
+            int ndim "nd"
+            npy_intp *shape "dimensions"
+            npy_intp *strides
             dtype descr  # deprecated since NumPy 1.7 !
-            PyObject* base 
- 
-        # Note: This syntax (function definition in pxd files) is an 
-        # experimental exception made for __getbuffer__ and __releasebuffer__ 
-        # -- the details of this may change. 
-        def __getbuffer__(ndarray self, Py_buffer* info, int flags): 
-            # This implementation of getbuffer is geared towards Cython 
+            PyObject* base
+
+        # Note: This syntax (function definition in pxd files) is an
+        # experimental exception made for __getbuffer__ and __releasebuffer__
+        # -- the details of this may change.
+        def __getbuffer__(ndarray self, Py_buffer* info, int flags):
+            # This implementation of getbuffer is geared towards Cython
             # requirements, and does not yet fulfill the PEP.
-            # In particular strided access is always provided regardless 
-            # of flags 
- 
+            # In particular strided access is always provided regardless
+            # of flags
+
             cdef int i, ndim
-            cdef int endian_detector = 1 
-            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0) 
- 
-            ndim = PyArray_NDIM(self) 
- 
-            if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) 
+            cdef int endian_detector = 1
+            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+
+            ndim = PyArray_NDIM(self)
+
+            if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
                 and not PyArray_CHKFLAGS(self, NPY_ARRAY_C_CONTIGUOUS)):
-                raise ValueError(u"ndarray is not C contiguous") 
- 
-            if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) 
+                raise ValueError(u"ndarray is not C contiguous")
+
+            if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
                 and not PyArray_CHKFLAGS(self, NPY_ARRAY_F_CONTIGUOUS)):
-                raise ValueError(u"ndarray is not Fortran contiguous") 
- 
-            info.buf = PyArray_DATA(self) 
-            info.ndim = ndim 
+                raise ValueError(u"ndarray is not Fortran contiguous")
+
+            info.buf = PyArray_DATA(self)
+            info.ndim = ndim
             if sizeof(npy_intp) != sizeof(Py_ssize_t):
-                # Allocate new buffer for strides and shape info. 
-                # This is allocated as one block, strides first. 
+                # Allocate new buffer for strides and shape info.
+                # This is allocated as one block, strides first.
                 info.strides = <Py_ssize_t*>PyObject_Malloc(sizeof(Py_ssize_t) * 2 * <size_t>ndim)
-                info.shape = info.strides + ndim 
-                for i in range(ndim): 
-                    info.strides[i] = PyArray_STRIDES(self)[i] 
-                    info.shape[i] = PyArray_DIMS(self)[i] 
-            else: 
-                info.strides = <Py_ssize_t*>PyArray_STRIDES(self) 
-                info.shape = <Py_ssize_t*>PyArray_DIMS(self) 
-            info.suboffsets = NULL 
-            info.itemsize = PyArray_ITEMSIZE(self) 
-            info.readonly = not PyArray_ISWRITEABLE(self) 
- 
-            cdef int t 
-            cdef char* f = NULL 
+                info.shape = info.strides + ndim
+                for i in range(ndim):
+                    info.strides[i] = PyArray_STRIDES(self)[i]
+                    info.shape[i] = PyArray_DIMS(self)[i]
+            else:
+                info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+                info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+            info.suboffsets = NULL
+            info.itemsize = PyArray_ITEMSIZE(self)
+            info.readonly = not PyArray_ISWRITEABLE(self)
+
+            cdef int t
+            cdef char* f = NULL
             cdef dtype descr = <dtype>PyArray_DESCR(self)
-            cdef int offset 
- 
+            cdef int offset
+
             info.obj = self
- 
+
             if not PyDataType_HASFIELDS(descr):
-                t = descr.type_num 
-                if ((descr.byteorder == c'>' and little_endian) or 
-                    (descr.byteorder == c'<' and not little_endian)): 
-                    raise ValueError(u"Non-native byte order not supported") 
-                if   t == NPY_BYTE:        f = "b" 
-                elif t == NPY_UBYTE:       f = "B" 
-                elif t == NPY_SHORT:       f = "h" 
-                elif t == NPY_USHORT:      f = "H" 
-                elif t == NPY_INT:         f = "i" 
-                elif t == NPY_UINT:        f = "I" 
-                elif t == NPY_LONG:        f = "l" 
-                elif t == NPY_ULONG:       f = "L" 
-                elif t == NPY_LONGLONG:    f = "q" 
-                elif t == NPY_ULONGLONG:   f = "Q" 
-                elif t == NPY_FLOAT:       f = "f" 
-                elif t == NPY_DOUBLE:      f = "d" 
-                elif t == NPY_LONGDOUBLE:  f = "g" 
-                elif t == NPY_CFLOAT:      f = "Zf" 
-                elif t == NPY_CDOUBLE:     f = "Zd" 
-                elif t == NPY_CLONGDOUBLE: f = "Zg" 
-                elif t == NPY_OBJECT:      f = "O" 
-                else: 
-                    raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) 
-                info.format = f 
-                return 
-            else: 
+                t = descr.type_num
+                if ((descr.byteorder == c'>' and little_endian) or
+                    (descr.byteorder == c'<' and not little_endian)):
+                    raise ValueError(u"Non-native byte order not supported")
+                if   t == NPY_BYTE:        f = "b"
+                elif t == NPY_UBYTE:       f = "B"
+                elif t == NPY_SHORT:       f = "h"
+                elif t == NPY_USHORT:      f = "H"
+                elif t == NPY_INT:         f = "i"
+                elif t == NPY_UINT:        f = "I"
+                elif t == NPY_LONG:        f = "l"
+                elif t == NPY_ULONG:       f = "L"
+                elif t == NPY_LONGLONG:    f = "q"
+                elif t == NPY_ULONGLONG:   f = "Q"
+                elif t == NPY_FLOAT:       f = "f"
+                elif t == NPY_DOUBLE:      f = "d"
+                elif t == NPY_LONGDOUBLE:  f = "g"
+                elif t == NPY_CFLOAT:      f = "Zf"
+                elif t == NPY_CDOUBLE:     f = "Zd"
+                elif t == NPY_CLONGDOUBLE: f = "Zg"
+                elif t == NPY_OBJECT:      f = "O"
+                else:
+                    raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+                info.format = f
+                return
+            else:
                 info.format = <char*>PyObject_Malloc(_buffer_format_string_len)
-                info.format[0] = c'^' # Native data types, manual alignment 
-                offset = 0 
-                f = _util_dtypestring(descr, info.format + 1, 
-                                      info.format + _buffer_format_string_len, 
-                                      &offset) 
-                f[0] = c'\0' # Terminate format string 
- 
-        def __releasebuffer__(ndarray self, Py_buffer* info): 
-            if PyArray_HASFIELDS(self): 
+                info.format[0] = c'^' # Native data types, manual alignment
+                offset = 0
+                f = _util_dtypestring(descr, info.format + 1,
+                                      info.format + _buffer_format_string_len,
+                                      &offset)
+                f[0] = c'\0' # Terminate format string
+
+        def __releasebuffer__(ndarray self, Py_buffer* info):
+            if PyArray_HASFIELDS(self):
                 PyObject_Free(info.format)
-            if sizeof(npy_intp) != sizeof(Py_ssize_t): 
+            if sizeof(npy_intp) != sizeof(Py_ssize_t):
                 PyObject_Free(info.strides)
-                # info.shape was stored after info.strides in the same block 
- 
-    ctypedef unsigned char      npy_bool 
- 
-    ctypedef signed char      npy_byte 
-    ctypedef signed short     npy_short 
-    ctypedef signed int       npy_int 
-    ctypedef signed long      npy_long 
-    ctypedef signed long long npy_longlong 
- 
-    ctypedef unsigned char      npy_ubyte 
-    ctypedef unsigned short     npy_ushort 
-    ctypedef unsigned int       npy_uint 
-    ctypedef unsigned long      npy_ulong 
-    ctypedef unsigned long long npy_ulonglong 
- 
-    ctypedef float        npy_float 
-    ctypedef double       npy_double 
-    ctypedef long double  npy_longdouble 
- 
-    ctypedef signed char        npy_int8 
-    ctypedef signed short       npy_int16 
-    ctypedef signed int         npy_int32 
-    ctypedef signed long long   npy_int64 
-    ctypedef signed long long   npy_int96 
-    ctypedef signed long long   npy_int128 
- 
-    ctypedef unsigned char      npy_uint8 
-    ctypedef unsigned short     npy_uint16 
-    ctypedef unsigned int       npy_uint32 
-    ctypedef unsigned long long npy_uint64 
-    ctypedef unsigned long long npy_uint96 
-    ctypedef unsigned long long npy_uint128 
- 
-    ctypedef float        npy_float32 
-    ctypedef double       npy_float64 
-    ctypedef long double  npy_float80 
-    ctypedef long double  npy_float96 
-    ctypedef long double  npy_float128 
- 
-    ctypedef struct npy_cfloat: 
-        double real 
-        double imag 
- 
-    ctypedef struct npy_cdouble: 
-        double real 
-        double imag 
- 
-    ctypedef struct npy_clongdouble: 
+                # info.shape was stored after info.strides in the same block
+
+    ctypedef unsigned char      npy_bool
+
+    ctypedef signed char      npy_byte
+    ctypedef signed short     npy_short
+    ctypedef signed int       npy_int
+    ctypedef signed long      npy_long
+    ctypedef signed long long npy_longlong
+
+    ctypedef unsigned char      npy_ubyte
+    ctypedef unsigned short     npy_ushort
+    ctypedef unsigned int       npy_uint
+    ctypedef unsigned long      npy_ulong
+    ctypedef unsigned long long npy_ulonglong
+
+    ctypedef float        npy_float
+    ctypedef double       npy_double
+    ctypedef long double  npy_longdouble
+
+    ctypedef signed char        npy_int8
+    ctypedef signed short       npy_int16
+    ctypedef signed int         npy_int32
+    ctypedef signed long long   npy_int64
+    ctypedef signed long long   npy_int96
+    ctypedef signed long long   npy_int128
+
+    ctypedef unsigned char      npy_uint8
+    ctypedef unsigned short     npy_uint16
+    ctypedef unsigned int       npy_uint32
+    ctypedef unsigned long long npy_uint64
+    ctypedef unsigned long long npy_uint96
+    ctypedef unsigned long long npy_uint128
+
+    ctypedef float        npy_float32
+    ctypedef double       npy_float64
+    ctypedef long double  npy_float80
+    ctypedef long double  npy_float96
+    ctypedef long double  npy_float128
+
+    ctypedef struct npy_cfloat:
+        double real
+        double imag
+
+    ctypedef struct npy_cdouble:
+        double real
+        double imag
+
+    ctypedef struct npy_clongdouble:
         long double real
         long double imag
- 
-    ctypedef struct npy_complex64: 
+
+    ctypedef struct npy_complex64:
         float real
         float imag
- 
-    ctypedef struct npy_complex128: 
-        double real 
-        double imag 
- 
-    ctypedef struct npy_complex160: 
+
+    ctypedef struct npy_complex128:
+        double real
+        double imag
+
+    ctypedef struct npy_complex160:
         long double real
         long double imag
- 
-    ctypedef struct npy_complex192: 
+
+    ctypedef struct npy_complex192:
         long double real
         long double imag
- 
-    ctypedef struct npy_complex256: 
+
+    ctypedef struct npy_complex256:
         long double real
         long double imag
- 
-    ctypedef struct PyArray_Dims: 
-        npy_intp *ptr 
-        int len 
- 
+
+    ctypedef struct PyArray_Dims:
+        npy_intp *ptr
+        int len
+
     int _import_array() except -1
- 
-    # 
-    # Macros from ndarrayobject.h 
-    # 
-    bint PyArray_CHKFLAGS(ndarray m, int flags) 
+
+    #
+    # Macros from ndarrayobject.h
+    #
+    bint PyArray_CHKFLAGS(ndarray m, int flags)
     bint PyArray_IS_C_CONTIGUOUS(ndarray arr)
     bint PyArray_IS_F_CONTIGUOUS(ndarray arr)
-    bint PyArray_ISCONTIGUOUS(ndarray m) 
-    bint PyArray_ISWRITEABLE(ndarray m) 
-    bint PyArray_ISALIGNED(ndarray m) 
- 
-    int PyArray_NDIM(ndarray) 
-    bint PyArray_ISONESEGMENT(ndarray) 
-    bint PyArray_ISFORTRAN(ndarray) 
-    int PyArray_FORTRANIF(ndarray) 
- 
-    void* PyArray_DATA(ndarray) 
-    char* PyArray_BYTES(ndarray) 
-    npy_intp* PyArray_DIMS(ndarray) 
-    npy_intp* PyArray_STRIDES(ndarray) 
-    npy_intp PyArray_DIM(ndarray, size_t) 
-    npy_intp PyArray_STRIDE(ndarray, size_t) 
- 
+    bint PyArray_ISCONTIGUOUS(ndarray m)
+    bint PyArray_ISWRITEABLE(ndarray m)
+    bint PyArray_ISALIGNED(ndarray m)
+
+    int PyArray_NDIM(ndarray)
+    bint PyArray_ISONESEGMENT(ndarray)
+    bint PyArray_ISFORTRAN(ndarray)
+    int PyArray_FORTRANIF(ndarray)
+
+    void* PyArray_DATA(ndarray)
+    char* PyArray_BYTES(ndarray)
+    npy_intp* PyArray_DIMS(ndarray)
+    npy_intp* PyArray_STRIDES(ndarray)
+    npy_intp PyArray_DIM(ndarray, size_t)
+    npy_intp PyArray_STRIDE(ndarray, size_t)
+
     PyObject *PyArray_BASE(ndarray)  # returns borrowed reference!
     PyArray_Descr *PyArray_DESCR(ndarray) # returns borrowed reference to dtype!
-    int PyArray_FLAGS(ndarray) 
-    npy_intp PyArray_ITEMSIZE(ndarray) 
-    int PyArray_TYPE(ndarray arr) 
- 
-    object PyArray_GETITEM(ndarray arr, void *itemptr) 
-    int PyArray_SETITEM(ndarray arr, void *itemptr, object obj) 
- 
-    bint PyTypeNum_ISBOOL(int) 
-    bint PyTypeNum_ISUNSIGNED(int) 
-    bint PyTypeNum_ISSIGNED(int) 
-    bint PyTypeNum_ISINTEGER(int) 
-    bint PyTypeNum_ISFLOAT(int) 
-    bint PyTypeNum_ISNUMBER(int) 
-    bint PyTypeNum_ISSTRING(int) 
-    bint PyTypeNum_ISCOMPLEX(int) 
-    bint PyTypeNum_ISPYTHON(int) 
-    bint PyTypeNum_ISFLEXIBLE(int) 
-    bint PyTypeNum_ISUSERDEF(int) 
-    bint PyTypeNum_ISEXTENDED(int) 
-    bint PyTypeNum_ISOBJECT(int) 
- 
-    bint PyDataType_ISBOOL(dtype) 
-    bint PyDataType_ISUNSIGNED(dtype) 
-    bint PyDataType_ISSIGNED(dtype) 
-    bint PyDataType_ISINTEGER(dtype) 
-    bint PyDataType_ISFLOAT(dtype) 
-    bint PyDataType_ISNUMBER(dtype) 
-    bint PyDataType_ISSTRING(dtype) 
-    bint PyDataType_ISCOMPLEX(dtype) 
-    bint PyDataType_ISPYTHON(dtype) 
-    bint PyDataType_ISFLEXIBLE(dtype) 
-    bint PyDataType_ISUSERDEF(dtype) 
-    bint PyDataType_ISEXTENDED(dtype) 
-    bint PyDataType_ISOBJECT(dtype) 
-    bint PyDataType_HASFIELDS(dtype) 
+    int PyArray_FLAGS(ndarray)
+    npy_intp PyArray_ITEMSIZE(ndarray)
+    int PyArray_TYPE(ndarray arr)
+
+    object PyArray_GETITEM(ndarray arr, void *itemptr)
+    int PyArray_SETITEM(ndarray arr, void *itemptr, object obj)
+
+    bint PyTypeNum_ISBOOL(int)
+    bint PyTypeNum_ISUNSIGNED(int)
+    bint PyTypeNum_ISSIGNED(int)
+    bint PyTypeNum_ISINTEGER(int)
+    bint PyTypeNum_ISFLOAT(int)
+    bint PyTypeNum_ISNUMBER(int)
+    bint PyTypeNum_ISSTRING(int)
+    bint PyTypeNum_ISCOMPLEX(int)
+    bint PyTypeNum_ISPYTHON(int)
+    bint PyTypeNum_ISFLEXIBLE(int)
+    bint PyTypeNum_ISUSERDEF(int)
+    bint PyTypeNum_ISEXTENDED(int)
+    bint PyTypeNum_ISOBJECT(int)
+
+    bint PyDataType_ISBOOL(dtype)
+    bint PyDataType_ISUNSIGNED(dtype)
+    bint PyDataType_ISSIGNED(dtype)
+    bint PyDataType_ISINTEGER(dtype)
+    bint PyDataType_ISFLOAT(dtype)
+    bint PyDataType_ISNUMBER(dtype)
+    bint PyDataType_ISSTRING(dtype)
+    bint PyDataType_ISCOMPLEX(dtype)
+    bint PyDataType_ISPYTHON(dtype)
+    bint PyDataType_ISFLEXIBLE(dtype)
+    bint PyDataType_ISUSERDEF(dtype)
+    bint PyDataType_ISEXTENDED(dtype)
+    bint PyDataType_ISOBJECT(dtype)
+    bint PyDataType_HASFIELDS(dtype)
     bint PyDataType_HASSUBARRAY(dtype)
- 
-    bint PyArray_ISBOOL(ndarray) 
-    bint PyArray_ISUNSIGNED(ndarray) 
-    bint PyArray_ISSIGNED(ndarray) 
-    bint PyArray_ISINTEGER(ndarray) 
-    bint PyArray_ISFLOAT(ndarray) 
-    bint PyArray_ISNUMBER(ndarray) 
-    bint PyArray_ISSTRING(ndarray) 
-    bint PyArray_ISCOMPLEX(ndarray) 
-    bint PyArray_ISPYTHON(ndarray) 
-    bint PyArray_ISFLEXIBLE(ndarray) 
-    bint PyArray_ISUSERDEF(ndarray) 
-    bint PyArray_ISEXTENDED(ndarray) 
-    bint PyArray_ISOBJECT(ndarray) 
-    bint PyArray_HASFIELDS(ndarray) 
- 
-    bint PyArray_ISVARIABLE(ndarray) 
- 
-    bint PyArray_SAFEALIGNEDCOPY(ndarray) 
-    bint PyArray_ISNBO(char)              # works on ndarray.byteorder 
-    bint PyArray_IsNativeByteOrder(char)  # works on ndarray.byteorder 
-    bint PyArray_ISNOTSWAPPED(ndarray) 
-    bint PyArray_ISBYTESWAPPED(ndarray) 
- 
-    bint PyArray_FLAGSWAP(ndarray, int) 
- 
-    bint PyArray_ISCARRAY(ndarray) 
-    bint PyArray_ISCARRAY_RO(ndarray) 
-    bint PyArray_ISFARRAY(ndarray) 
-    bint PyArray_ISFARRAY_RO(ndarray) 
-    bint PyArray_ISBEHAVED(ndarray) 
-    bint PyArray_ISBEHAVED_RO(ndarray) 
- 
- 
-    bint PyDataType_ISNOTSWAPPED(dtype) 
-    bint PyDataType_ISBYTESWAPPED(dtype) 
- 
-    bint PyArray_DescrCheck(object) 
- 
-    bint PyArray_Check(object) 
-    bint PyArray_CheckExact(object) 
- 
-    # Cannot be supported due to out arg: 
-    # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&) 
-    # bint PyArray_HasArrayInterface(op, out) 
- 
- 
-    bint PyArray_IsZeroDim(object) 
-    # Cannot be supported due to ## ## in macro: 
-    # bint PyArray_IsScalar(object, verbatim work) 
-    bint PyArray_CheckScalar(object) 
-    bint PyArray_IsPythonNumber(object) 
-    bint PyArray_IsPythonScalar(object) 
-    bint PyArray_IsAnyScalar(object) 
-    bint PyArray_CheckAnyScalar(object) 
-    ndarray PyArray_GETCONTIGUOUS(ndarray) 
-    bint PyArray_SAMESHAPE(ndarray, ndarray) 
-    npy_intp PyArray_SIZE(ndarray) 
-    npy_intp PyArray_NBYTES(ndarray) 
- 
-    object PyArray_FROM_O(object) 
-    object PyArray_FROM_OF(object m, int flags) 
-    object PyArray_FROM_OT(object m, int type) 
-    object PyArray_FROM_OTF(object m, int type, int flags) 
-    object PyArray_FROMANY(object m, int type, int min, int max, int flags) 
-    object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran) 
-    object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran) 
-    void PyArray_FILLWBYTE(object, int val) 
-    npy_intp PyArray_REFCOUNT(object) 
-    object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth) 
-    unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2) 
-    bint PyArray_EquivByteorders(int b1, int b2) 
-    object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) 
-    object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data) 
-    #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr) 
-    object PyArray_ToScalar(void* data, ndarray arr) 
- 
-    void* PyArray_GETPTR1(ndarray m, npy_intp i) 
-    void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) 
-    void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k) 
-    void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l) 
- 
-    void PyArray_XDECREF_ERR(ndarray) 
-    # Cannot be supported due to out arg 
-    # void PyArray_DESCR_REPLACE(descr) 
- 
- 
-    object PyArray_Copy(ndarray) 
-    object PyArray_FromObject(object op, int type, int min_depth, int max_depth) 
-    object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth) 
-    object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth) 
- 
-    object PyArray_Cast(ndarray mp, int type_num) 
-    object PyArray_Take(ndarray ap, object items, int axis) 
-    object PyArray_Put(ndarray ap, object items, object values) 
- 
-    void PyArray_ITER_RESET(flatiter it) nogil 
-    void PyArray_ITER_NEXT(flatiter it) nogil 
-    void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil 
-    void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil 
-    void* PyArray_ITER_DATA(flatiter it) nogil 
-    bint PyArray_ITER_NOTDONE(flatiter it) nogil 
- 
-    void PyArray_MultiIter_RESET(broadcast multi) nogil 
-    void PyArray_MultiIter_NEXT(broadcast multi) nogil 
-    void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil 
-    void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil 
-    void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil 
-    void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil 
-    bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil 
- 
-    # Functions from __multiarray_api.h 
- 
-    # Functions taking dtype and returning object/ndarray are disabled 
-    # for now as they steal dtype references. I'm conservative and disable 
-    # more than is probably needed until it can be checked further. 
-    int PyArray_SetNumericOps        (object) 
-    object PyArray_GetNumericOps () 
-    int PyArray_INCREF (ndarray) 
-    int PyArray_XDECREF (ndarray) 
-    void PyArray_SetStringFunction (object, int) 
-    dtype PyArray_DescrFromType (int) 
-    object PyArray_TypeObjectFromType (int) 
-    char * PyArray_Zero (ndarray) 
-    char * PyArray_One (ndarray) 
-    #object PyArray_CastToType (ndarray, dtype, int) 
-    int PyArray_CastTo (ndarray, ndarray) 
-    int PyArray_CastAnyTo (ndarray, ndarray) 
-    int PyArray_CanCastSafely (int, int) 
-    npy_bool PyArray_CanCastTo (dtype, dtype) 
-    int PyArray_ObjectType (object, int) 
-    dtype PyArray_DescrFromObject (object, dtype) 
-    #ndarray* PyArray_ConvertToCommonType (object, int *) 
-    dtype PyArray_DescrFromScalar (object) 
-    dtype PyArray_DescrFromTypeObject (object) 
-    npy_intp PyArray_Size (object) 
-    #object PyArray_Scalar (void *, dtype, object) 
-    #object PyArray_FromScalar (object, dtype) 
-    void PyArray_ScalarAsCtype (object, void *) 
-    #int PyArray_CastScalarToCtype (object, void *, dtype) 
-    #int PyArray_CastScalarDirect (object, dtype, void *, int) 
-    object PyArray_ScalarFromObject (object) 
-    #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int) 
-    object PyArray_FromDims (int, int *, int) 
-    #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *) 
-    #object PyArray_FromAny (object, dtype, int, int, int, object) 
-    object PyArray_EnsureArray (object) 
-    object PyArray_EnsureAnyArray (object) 
-    #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *) 
-    #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *) 
-    #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp) 
-    #object PyArray_FromIter (object, dtype, npy_intp) 
-    object PyArray_Return (ndarray) 
-    #object PyArray_GetField (ndarray, dtype, int) 
-    #int PyArray_SetField (ndarray, dtype, int, object) 
-    object PyArray_Byteswap (ndarray, npy_bool) 
-    object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER) 
-    int PyArray_MoveInto (ndarray, ndarray) 
-    int PyArray_CopyInto (ndarray, ndarray) 
-    int PyArray_CopyAnyInto (ndarray, ndarray) 
-    int PyArray_CopyObject (ndarray, object) 
-    object PyArray_NewCopy (ndarray, NPY_ORDER) 
-    object PyArray_ToList (ndarray) 
-    object PyArray_ToString (ndarray, NPY_ORDER) 
-    int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *) 
-    int PyArray_Dump (object, object, int) 
-    object PyArray_Dumps (object, int) 
-    int PyArray_ValidType (int) 
-    void PyArray_UpdateFlags (ndarray, int) 
-    object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object) 
-    #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object) 
-    #dtype PyArray_DescrNew (dtype) 
-    dtype PyArray_DescrNewFromType (int) 
-    double PyArray_GetPriority (object, double) 
-    object PyArray_IterNew (object) 
-    object PyArray_MultiIterNew (int, ...) 
- 
-    int PyArray_PyIntAsInt (object) 
-    npy_intp PyArray_PyIntAsIntp (object) 
-    int PyArray_Broadcast (broadcast) 
-    void PyArray_FillObjectArray (ndarray, object) 
-    int PyArray_FillWithScalar (ndarray, object) 
-    npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *) 
-    dtype PyArray_DescrNewByteorder (dtype, char) 
-    object PyArray_IterAllButAxis (object, int *) 
-    #object PyArray_CheckFromAny (object, dtype, int, int, int, object) 
-    #object PyArray_FromArray (ndarray, dtype, int) 
-    object PyArray_FromInterface (object) 
-    object PyArray_FromStructInterface (object) 
-    #object PyArray_FromArrayAttr (object, dtype, object) 
-    #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*) 
-    int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND) 
-    object PyArray_NewFlagsObject (object) 
-    npy_bool PyArray_CanCastScalar (type, type) 
-    #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t) 
-    int PyArray_RemoveSmallest (broadcast) 
-    int PyArray_ElementStrides (object) 
-    void PyArray_Item_INCREF (char *, dtype) 
-    void PyArray_Item_XDECREF (char *, dtype) 
-    object PyArray_FieldNames (object) 
-    object PyArray_Transpose (ndarray, PyArray_Dims *) 
-    object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE) 
-    object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE) 
-    object PyArray_PutMask (ndarray, object, object) 
-    object PyArray_Repeat (ndarray, object, int) 
-    object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE) 
-    int PyArray_Sort (ndarray, int, NPY_SORTKIND) 
-    object PyArray_ArgSort (ndarray, int, NPY_SORTKIND) 
+
+    bint PyArray_ISBOOL(ndarray)
+    bint PyArray_ISUNSIGNED(ndarray)
+    bint PyArray_ISSIGNED(ndarray)
+    bint PyArray_ISINTEGER(ndarray)
+    bint PyArray_ISFLOAT(ndarray)
+    bint PyArray_ISNUMBER(ndarray)
+    bint PyArray_ISSTRING(ndarray)
+    bint PyArray_ISCOMPLEX(ndarray)
+    bint PyArray_ISPYTHON(ndarray)
+    bint PyArray_ISFLEXIBLE(ndarray)
+    bint PyArray_ISUSERDEF(ndarray)
+    bint PyArray_ISEXTENDED(ndarray)
+    bint PyArray_ISOBJECT(ndarray)
+    bint PyArray_HASFIELDS(ndarray)
+
+    bint PyArray_ISVARIABLE(ndarray)
+
+    bint PyArray_SAFEALIGNEDCOPY(ndarray)
+    bint PyArray_ISNBO(char)              # works on ndarray.byteorder
+    bint PyArray_IsNativeByteOrder(char)  # works on ndarray.byteorder
+    bint PyArray_ISNOTSWAPPED(ndarray)
+    bint PyArray_ISBYTESWAPPED(ndarray)
+
+    bint PyArray_FLAGSWAP(ndarray, int)
+
+    bint PyArray_ISCARRAY(ndarray)
+    bint PyArray_ISCARRAY_RO(ndarray)
+    bint PyArray_ISFARRAY(ndarray)
+    bint PyArray_ISFARRAY_RO(ndarray)
+    bint PyArray_ISBEHAVED(ndarray)
+    bint PyArray_ISBEHAVED_RO(ndarray)
+
+
+    bint PyDataType_ISNOTSWAPPED(dtype)
+    bint PyDataType_ISBYTESWAPPED(dtype)
+
+    bint PyArray_DescrCheck(object)
+
+    bint PyArray_Check(object)
+    bint PyArray_CheckExact(object)
+
+    # Cannot be supported due to out arg:
+    # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&)
+    # bint PyArray_HasArrayInterface(op, out)
+
+
+    bint PyArray_IsZeroDim(object)
+    # Cannot be supported due to ## ## in macro:
+    # bint PyArray_IsScalar(object, verbatim work)
+    bint PyArray_CheckScalar(object)
+    bint PyArray_IsPythonNumber(object)
+    bint PyArray_IsPythonScalar(object)
+    bint PyArray_IsAnyScalar(object)
+    bint PyArray_CheckAnyScalar(object)
+    ndarray PyArray_GETCONTIGUOUS(ndarray)
+    bint PyArray_SAMESHAPE(ndarray, ndarray)
+    npy_intp PyArray_SIZE(ndarray)
+    npy_intp PyArray_NBYTES(ndarray)
+
+    object PyArray_FROM_O(object)
+    object PyArray_FROM_OF(object m, int flags)
+    object PyArray_FROM_OT(object m, int type)
+    object PyArray_FROM_OTF(object m, int type, int flags)
+    object PyArray_FROMANY(object m, int type, int min, int max, int flags)
+    object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran)
+    object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran)
+    void PyArray_FILLWBYTE(object, int val)
+    npy_intp PyArray_REFCOUNT(object)
+    object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth)
+    unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2)
+    bint PyArray_EquivByteorders(int b1, int b2)
+    object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum)
+    object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data)
+    #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr)
+    object PyArray_ToScalar(void* data, ndarray arr)
+
+    void* PyArray_GETPTR1(ndarray m, npy_intp i)
+    void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j)
+    void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k)
+    void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l)
+
+    void PyArray_XDECREF_ERR(ndarray)
+    # Cannot be supported due to out arg
+    # void PyArray_DESCR_REPLACE(descr)
+
+
+    object PyArray_Copy(ndarray)
+    object PyArray_FromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth)
+
+    object PyArray_Cast(ndarray mp, int type_num)
+    object PyArray_Take(ndarray ap, object items, int axis)
+    object PyArray_Put(ndarray ap, object items, object values)
+
+    void PyArray_ITER_RESET(flatiter it) nogil
+    void PyArray_ITER_NEXT(flatiter it) nogil
+    void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil
+    void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil
+    void* PyArray_ITER_DATA(flatiter it) nogil
+    bint PyArray_ITER_NOTDONE(flatiter it) nogil
+
+    void PyArray_MultiIter_RESET(broadcast multi) nogil
+    void PyArray_MultiIter_NEXT(broadcast multi) nogil
+    void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil
+    void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil
+    void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil
+    void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil
+    bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil
+
+    # Functions from __multiarray_api.h
+
+    # Functions taking dtype and returning object/ndarray are disabled
+    # for now as they steal dtype references. I'm conservative and disable
+    # more than is probably needed until it can be checked further.
+    int PyArray_SetNumericOps        (object)
+    object PyArray_GetNumericOps ()
+    int PyArray_INCREF (ndarray)
+    int PyArray_XDECREF (ndarray)
+    void PyArray_SetStringFunction (object, int)
+    dtype PyArray_DescrFromType (int)
+    object PyArray_TypeObjectFromType (int)
+    char * PyArray_Zero (ndarray)
+    char * PyArray_One (ndarray)
+    #object PyArray_CastToType (ndarray, dtype, int)
+    int PyArray_CastTo (ndarray, ndarray)
+    int PyArray_CastAnyTo (ndarray, ndarray)
+    int PyArray_CanCastSafely (int, int)
+    npy_bool PyArray_CanCastTo (dtype, dtype)
+    int PyArray_ObjectType (object, int)
+    dtype PyArray_DescrFromObject (object, dtype)
+    #ndarray* PyArray_ConvertToCommonType (object, int *)
+    dtype PyArray_DescrFromScalar (object)
+    dtype PyArray_DescrFromTypeObject (object)
+    npy_intp PyArray_Size (object)
+    #object PyArray_Scalar (void *, dtype, object)
+    #object PyArray_FromScalar (object, dtype)
+    void PyArray_ScalarAsCtype (object, void *)
+    #int PyArray_CastScalarToCtype (object, void *, dtype)
+    #int PyArray_CastScalarDirect (object, dtype, void *, int)
+    object PyArray_ScalarFromObject (object)
+    #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int)
+    object PyArray_FromDims (int, int *, int)
+    #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *)
+    #object PyArray_FromAny (object, dtype, int, int, int, object)
+    object PyArray_EnsureArray (object)
+    object PyArray_EnsureAnyArray (object)
+    #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *)
+    #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *)
+    #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp)
+    #object PyArray_FromIter (object, dtype, npy_intp)
+    object PyArray_Return (ndarray)
+    #object PyArray_GetField (ndarray, dtype, int)
+    #int PyArray_SetField (ndarray, dtype, int, object)
+    object PyArray_Byteswap (ndarray, npy_bool)
+    object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER)
+    int PyArray_MoveInto (ndarray, ndarray)
+    int PyArray_CopyInto (ndarray, ndarray)
+    int PyArray_CopyAnyInto (ndarray, ndarray)
+    int PyArray_CopyObject (ndarray, object)
+    object PyArray_NewCopy (ndarray, NPY_ORDER)
+    object PyArray_ToList (ndarray)
+    object PyArray_ToString (ndarray, NPY_ORDER)
+    int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *)
+    int PyArray_Dump (object, object, int)
+    object PyArray_Dumps (object, int)
+    int PyArray_ValidType (int)
+    void PyArray_UpdateFlags (ndarray, int)
+    object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object)
+    #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object)
+    #dtype PyArray_DescrNew (dtype)
+    dtype PyArray_DescrNewFromType (int)
+    double PyArray_GetPriority (object, double)
+    object PyArray_IterNew (object)
+    object PyArray_MultiIterNew (int, ...)
+
+    int PyArray_PyIntAsInt (object)
+    npy_intp PyArray_PyIntAsIntp (object)
+    int PyArray_Broadcast (broadcast)
+    void PyArray_FillObjectArray (ndarray, object)
+    int PyArray_FillWithScalar (ndarray, object)
+    npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *)
+    dtype PyArray_DescrNewByteorder (dtype, char)
+    object PyArray_IterAllButAxis (object, int *)
+    #object PyArray_CheckFromAny (object, dtype, int, int, int, object)
+    #object PyArray_FromArray (ndarray, dtype, int)
+    object PyArray_FromInterface (object)
+    object PyArray_FromStructInterface (object)
+    #object PyArray_FromArrayAttr (object, dtype, object)
+    #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*)
+    int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND)
+    object PyArray_NewFlagsObject (object)
+    npy_bool PyArray_CanCastScalar (type, type)
+    #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t)
+    int PyArray_RemoveSmallest (broadcast)
+    int PyArray_ElementStrides (object)
+    void PyArray_Item_INCREF (char *, dtype)
+    void PyArray_Item_XDECREF (char *, dtype)
+    object PyArray_FieldNames (object)
+    object PyArray_Transpose (ndarray, PyArray_Dims *)
+    object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE)
+    object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE)
+    object PyArray_PutMask (ndarray, object, object)
+    object PyArray_Repeat (ndarray, object, int)
+    object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE)
+    int PyArray_Sort (ndarray, int, NPY_SORTKIND)
+    object PyArray_ArgSort (ndarray, int, NPY_SORTKIND)
     object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE, PyObject*)
-    object PyArray_ArgMax (ndarray, int, ndarray) 
-    object PyArray_ArgMin (ndarray, int, ndarray) 
-    object PyArray_Reshape (ndarray, object) 
-    object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER) 
-    object PyArray_Squeeze (ndarray) 
-    #object PyArray_View (ndarray, dtype, type) 
-    object PyArray_SwapAxes (ndarray, int, int) 
-    object PyArray_Max (ndarray, int, ndarray) 
-    object PyArray_Min (ndarray, int, ndarray) 
-    object PyArray_Ptp (ndarray, int, ndarray) 
-    object PyArray_Mean (ndarray, int, int, ndarray) 
-    object PyArray_Trace (ndarray, int, int, int, int, ndarray) 
-    object PyArray_Diagonal (ndarray, int, int, int) 
-    object PyArray_Clip (ndarray, object, object, ndarray) 
-    object PyArray_Conjugate (ndarray, ndarray) 
-    object PyArray_Nonzero (ndarray) 
-    object PyArray_Std (ndarray, int, int, ndarray, int) 
-    object PyArray_Sum (ndarray, int, int, ndarray) 
-    object PyArray_CumSum (ndarray, int, int, ndarray) 
-    object PyArray_Prod (ndarray, int, int, ndarray) 
-    object PyArray_CumProd (ndarray, int, int, ndarray) 
-    object PyArray_All (ndarray, int, ndarray) 
-    object PyArray_Any (ndarray, int, ndarray) 
-    object PyArray_Compress (ndarray, object, int, ndarray) 
-    object PyArray_Flatten (ndarray, NPY_ORDER) 
-    object PyArray_Ravel (ndarray, NPY_ORDER) 
-    npy_intp PyArray_MultiplyList (npy_intp *, int) 
-    int PyArray_MultiplyIntList (int *, int) 
-    void * PyArray_GetPtr (ndarray, npy_intp*) 
-    int PyArray_CompareLists (npy_intp *, npy_intp *, int) 
-    #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype) 
-    #int PyArray_As1D (object*, char **, int *, int) 
-    #int PyArray_As2D (object*, char ***, int *, int *, int) 
-    int PyArray_Free (object, void *) 
-    #int PyArray_Converter (object, object*) 
-    int PyArray_IntpFromSequence (object, npy_intp *, int) 
-    object PyArray_Concatenate (object, int) 
-    object PyArray_InnerProduct (object, object) 
-    object PyArray_MatrixProduct (object, object) 
-    object PyArray_CopyAndTranspose (object) 
-    object PyArray_Correlate (object, object, int) 
-    int PyArray_TypestrConvert (int, int) 
-    #int PyArray_DescrConverter (object, dtype*) 
-    #int PyArray_DescrConverter2 (object, dtype*) 
-    int PyArray_IntpConverter (object, PyArray_Dims *) 
-    #int PyArray_BufferConverter (object, chunk) 
-    int PyArray_AxisConverter (object, int *) 
-    int PyArray_BoolConverter (object, npy_bool *) 
-    int PyArray_ByteorderConverter (object, char *) 
-    int PyArray_OrderConverter (object, NPY_ORDER *) 
-    unsigned char PyArray_EquivTypes (dtype, dtype) 
-    #object PyArray_Zeros (int, npy_intp *, dtype, int) 
-    #object PyArray_Empty (int, npy_intp *, dtype, int) 
-    object PyArray_Where (object, object, object) 
-    object PyArray_Arange (double, double, double, int) 
-    #object PyArray_ArangeObj (object, object, object, dtype) 
-    int PyArray_SortkindConverter (object, NPY_SORTKIND *) 
-    object PyArray_LexSort (object, int) 
-    object PyArray_Round (ndarray, int, ndarray) 
-    unsigned char PyArray_EquivTypenums (int, int) 
-    int PyArray_RegisterDataType (dtype) 
-    int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *) 
-    int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND) 
-    #void PyArray_InitArrFuncs (PyArray_ArrFuncs *) 
-    object PyArray_IntTupleFromIntp (int, npy_intp *) 
-    int PyArray_TypeNumFromName (char *) 
-    int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *) 
-    #int PyArray_OutputConverter (object, ndarray*) 
-    object PyArray_BroadcastToShape (object, npy_intp *, int) 
-    void _PyArray_SigintHandler (int) 
-    void* _PyArray_GetSigintBuf () 
-    #int PyArray_DescrAlignConverter (object, dtype*) 
-    #int PyArray_DescrAlignConverter2 (object, dtype*) 
-    int PyArray_SearchsideConverter (object, void *) 
-    object PyArray_CheckAxis (ndarray, int *, int) 
-    npy_intp PyArray_OverflowMultiplyList (npy_intp *, int) 
-    int PyArray_CompareString (char *, char *, size_t) 
+    object PyArray_ArgMax (ndarray, int, ndarray)
+    object PyArray_ArgMin (ndarray, int, ndarray)
+    object PyArray_Reshape (ndarray, object)
+    object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER)
+    object PyArray_Squeeze (ndarray)
+    #object PyArray_View (ndarray, dtype, type)
+    object PyArray_SwapAxes (ndarray, int, int)
+    object PyArray_Max (ndarray, int, ndarray)
+    object PyArray_Min (ndarray, int, ndarray)
+    object PyArray_Ptp (ndarray, int, ndarray)
+    object PyArray_Mean (ndarray, int, int, ndarray)
+    object PyArray_Trace (ndarray, int, int, int, int, ndarray)
+    object PyArray_Diagonal (ndarray, int, int, int)
+    object PyArray_Clip (ndarray, object, object, ndarray)
+    object PyArray_Conjugate (ndarray, ndarray)
+    object PyArray_Nonzero (ndarray)
+    object PyArray_Std (ndarray, int, int, ndarray, int)
+    object PyArray_Sum (ndarray, int, int, ndarray)
+    object PyArray_CumSum (ndarray, int, int, ndarray)
+    object PyArray_Prod (ndarray, int, int, ndarray)
+    object PyArray_CumProd (ndarray, int, int, ndarray)
+    object PyArray_All (ndarray, int, ndarray)
+    object PyArray_Any (ndarray, int, ndarray)
+    object PyArray_Compress (ndarray, object, int, ndarray)
+    object PyArray_Flatten (ndarray, NPY_ORDER)
+    object PyArray_Ravel (ndarray, NPY_ORDER)
+    npy_intp PyArray_MultiplyList (npy_intp *, int)
+    int PyArray_MultiplyIntList (int *, int)
+    void * PyArray_GetPtr (ndarray, npy_intp*)
+    int PyArray_CompareLists (npy_intp *, npy_intp *, int)
+    #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype)
+    #int PyArray_As1D (object*, char **, int *, int)
+    #int PyArray_As2D (object*, char ***, int *, int *, int)
+    int PyArray_Free (object, void *)
+    #int PyArray_Converter (object, object*)
+    int PyArray_IntpFromSequence (object, npy_intp *, int)
+    object PyArray_Concatenate (object, int)
+    object PyArray_InnerProduct (object, object)
+    object PyArray_MatrixProduct (object, object)
+    object PyArray_CopyAndTranspose (object)
+    object PyArray_Correlate (object, object, int)
+    int PyArray_TypestrConvert (int, int)
+    #int PyArray_DescrConverter (object, dtype*)
+    #int PyArray_DescrConverter2 (object, dtype*)
+    int PyArray_IntpConverter (object, PyArray_Dims *)
+    #int PyArray_BufferConverter (object, chunk)
+    int PyArray_AxisConverter (object, int *)
+    int PyArray_BoolConverter (object, npy_bool *)
+    int PyArray_ByteorderConverter (object, char *)
+    int PyArray_OrderConverter (object, NPY_ORDER *)
+    unsigned char PyArray_EquivTypes (dtype, dtype)
+    #object PyArray_Zeros (int, npy_intp *, dtype, int)
+    #object PyArray_Empty (int, npy_intp *, dtype, int)
+    object PyArray_Where (object, object, object)
+    object PyArray_Arange (double, double, double, int)
+    #object PyArray_ArangeObj (object, object, object, dtype)
+    int PyArray_SortkindConverter (object, NPY_SORTKIND *)
+    object PyArray_LexSort (object, int)
+    object PyArray_Round (ndarray, int, ndarray)
+    unsigned char PyArray_EquivTypenums (int, int)
+    int PyArray_RegisterDataType (dtype)
+    int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *)
+    int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND)
+    #void PyArray_InitArrFuncs (PyArray_ArrFuncs *)
+    object PyArray_IntTupleFromIntp (int, npy_intp *)
+    int PyArray_TypeNumFromName (char *)
+    int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *)
+    #int PyArray_OutputConverter (object, ndarray*)
+    object PyArray_BroadcastToShape (object, npy_intp *, int)
+    void _PyArray_SigintHandler (int)
+    void* _PyArray_GetSigintBuf ()
+    #int PyArray_DescrAlignConverter (object, dtype*)
+    #int PyArray_DescrAlignConverter2 (object, dtype*)
+    int PyArray_SearchsideConverter (object, void *)
+    object PyArray_CheckAxis (ndarray, int *, int)
+    npy_intp PyArray_OverflowMultiplyList (npy_intp *, int)
+    int PyArray_CompareString (char *, char *, size_t)
     int PyArray_SetBaseObject(ndarray, base)  # NOTE: steals a reference to base! Use "set_array_base()" instead.
- 
- 
-# Typedefs that matches the runtime dtype objects in 
-# the numpy module. 
- 
-# The ones that are commented out needs an IFDEF function 
-# in Cython to enable them only on the right systems. 
- 
-ctypedef npy_int8       int8_t 
-ctypedef npy_int16      int16_t 
-ctypedef npy_int32      int32_t 
-ctypedef npy_int64      int64_t 
-#ctypedef npy_int96      int96_t 
-#ctypedef npy_int128     int128_t 
- 
-ctypedef npy_uint8      uint8_t 
-ctypedef npy_uint16     uint16_t 
-ctypedef npy_uint32     uint32_t 
-ctypedef npy_uint64     uint64_t 
-#ctypedef npy_uint96     uint96_t 
-#ctypedef npy_uint128    uint128_t 
- 
-ctypedef npy_float32    float32_t 
-ctypedef npy_float64    float64_t 
-#ctypedef npy_float80    float80_t 
-#ctypedef npy_float128   float128_t 
- 
-ctypedef float complex  complex64_t 
-ctypedef double complex complex128_t 
- 
-# The int types are mapped a bit surprising -- 
-# numpy.int corresponds to 'l' and numpy.long to 'q' 
-ctypedef npy_long       int_t 
-ctypedef npy_longlong   long_t 
-ctypedef npy_longlong   longlong_t 
- 
-ctypedef npy_ulong      uint_t 
-ctypedef npy_ulonglong  ulong_t 
-ctypedef npy_ulonglong  ulonglong_t 
- 
-ctypedef npy_intp       intp_t 
-ctypedef npy_uintp      uintp_t 
- 
-ctypedef npy_double     float_t 
-ctypedef npy_double     double_t 
-ctypedef npy_longdouble longdouble_t 
- 
-ctypedef npy_cfloat      cfloat_t 
-ctypedef npy_cdouble     cdouble_t 
-ctypedef npy_clongdouble clongdouble_t 
- 
-ctypedef npy_cdouble     complex_t 
- 
-cdef inline object PyArray_MultiIterNew1(a): 
-    return PyArray_MultiIterNew(1, <void*>a) 
- 
-cdef inline object PyArray_MultiIterNew2(a, b): 
-    return PyArray_MultiIterNew(2, <void*>a, <void*>b) 
- 
-cdef inline object PyArray_MultiIterNew3(a, b, c): 
-    return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c) 
- 
-cdef inline object PyArray_MultiIterNew4(a, b, c, d): 
-    return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d) 
- 
-cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): 
-    return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e) 
- 
+
+
+# Typedefs that matches the runtime dtype objects in
+# the numpy module.
+
+# The ones that are commented out needs an IFDEF function
+# in Cython to enable them only on the right systems.
+
+ctypedef npy_int8       int8_t
+ctypedef npy_int16      int16_t
+ctypedef npy_int32      int32_t
+ctypedef npy_int64      int64_t
+#ctypedef npy_int96      int96_t
+#ctypedef npy_int128     int128_t
+
+ctypedef npy_uint8      uint8_t
+ctypedef npy_uint16     uint16_t
+ctypedef npy_uint32     uint32_t
+ctypedef npy_uint64     uint64_t
+#ctypedef npy_uint96     uint96_t
+#ctypedef npy_uint128    uint128_t
+
+ctypedef npy_float32    float32_t
+ctypedef npy_float64    float64_t
+#ctypedef npy_float80    float80_t
+#ctypedef npy_float128   float128_t
+
+ctypedef float complex  complex64_t
+ctypedef double complex complex128_t
+
+# The int types are mapped a bit surprising --
+# numpy.int corresponds to 'l' and numpy.long to 'q'
+ctypedef npy_long       int_t
+ctypedef npy_longlong   long_t
+ctypedef npy_longlong   longlong_t
+
+ctypedef npy_ulong      uint_t
+ctypedef npy_ulonglong  ulong_t
+ctypedef npy_ulonglong  ulonglong_t
+
+ctypedef npy_intp       intp_t
+ctypedef npy_uintp      uintp_t
+
+ctypedef npy_double     float_t
+ctypedef npy_double     double_t
+ctypedef npy_longdouble longdouble_t
+
+ctypedef npy_cfloat      cfloat_t
+ctypedef npy_cdouble     cdouble_t
+ctypedef npy_clongdouble clongdouble_t
+
+ctypedef npy_cdouble     complex_t
+
+cdef inline object PyArray_MultiIterNew1(a):
+    return PyArray_MultiIterNew(1, <void*>a)
+
+cdef inline object PyArray_MultiIterNew2(a, b):
+    return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+
+cdef inline object PyArray_MultiIterNew3(a, b, c):
+    return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+
+cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+    return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+
+cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+    return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+
 cdef inline tuple PyDataType_SHAPE(dtype d):
     if PyDataType_HASSUBARRAY(d):
         return <tuple>d.subarray.shape
     else:
         return ()
 
-cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: 
-    # Recursive utility function used in __getbuffer__ to get format 
-    # string. The new location in the format string is returned. 
- 
-    cdef dtype child 
-    cdef int endian_detector = 1 
-    cdef bint little_endian = ((<char*>&endian_detector)[0] != 0) 
-    cdef tuple fields 
- 
-    for childname in descr.names: 
-        fields = descr.fields[childname] 
-        child, new_offset = fields 
- 
-        if (end - f) - <int>(new_offset - offset[0]) < 15: 
-            raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") 
- 
-        if ((child.byteorder == c'>' and little_endian) or 
-            (child.byteorder == c'<' and not little_endian)): 
-            raise ValueError(u"Non-native byte order not supported") 
-            # One could encode it in the format string and have Cython 
-            # complain instead, BUT: < and > in format strings also imply 
-            # standardized sizes for datatypes, and we rely on native in 
-            # order to avoid reencoding data types based on their size. 
-            # 
-            # A proper PEP 3118 exporter for other clients than Cython 
-            # must deal properly with this! 
- 
-        # Output padding bytes 
-        while offset[0] < new_offset: 
-            f[0] = 120 # "x"; pad byte 
-            f += 1 
-            offset[0] += 1 
- 
-        offset[0] += child.itemsize 
- 
-        if not PyDataType_HASFIELDS(child): 
-            t = child.type_num 
-            if end - f < 5: 
-                raise RuntimeError(u"Format string allocated too short.") 
- 
-            # Until ticket #99 is fixed, use integers to avoid warnings 
-            if   t == NPY_BYTE:        f[0] =  98 #"b" 
-            elif t == NPY_UBYTE:       f[0] =  66 #"B" 
-            elif t == NPY_SHORT:       f[0] = 104 #"h" 
-            elif t == NPY_USHORT:      f[0] =  72 #"H" 
-            elif t == NPY_INT:         f[0] = 105 #"i" 
-            elif t == NPY_UINT:        f[0] =  73 #"I" 
-            elif t == NPY_LONG:        f[0] = 108 #"l" 
-            elif t == NPY_ULONG:       f[0] = 76  #"L" 
-            elif t == NPY_LONGLONG:    f[0] = 113 #"q" 
-            elif t == NPY_ULONGLONG:   f[0] = 81  #"Q" 
-            elif t == NPY_FLOAT:       f[0] = 102 #"f" 
-            elif t == NPY_DOUBLE:      f[0] = 100 #"d" 
-            elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g" 
-            elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf 
-            elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd 
-            elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg 
-            elif t == NPY_OBJECT:      f[0] = 79 #"O" 
-            else: 
-                raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) 
-            f += 1 
-        else: 
-            # Cython ignores struct boundary information ("T{...}"), 
-            # so don't output it 
-            f = _util_dtypestring(child, f, end, offset) 
-    return f 
- 
- 
-# 
-# ufunc API 
-# 
- 
-cdef extern from "numpy/ufuncobject.h": 
- 
-    ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *) 
- 
+cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
+    # Recursive utility function used in __getbuffer__ to get format
+    # string. The new location in the format string is returned.
+
+    cdef dtype child
+    cdef int endian_detector = 1
+    cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+    cdef tuple fields
+
+    for childname in descr.names:
+        fields = descr.fields[childname]
+        child, new_offset = fields
+
+        if (end - f) - <int>(new_offset - offset[0]) < 15:
+            raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+
+        if ((child.byteorder == c'>' and little_endian) or
+            (child.byteorder == c'<' and not little_endian)):
+            raise ValueError(u"Non-native byte order not supported")
+            # One could encode it in the format string and have Cython
+            # complain instead, BUT: < and > in format strings also imply
+            # standardized sizes for datatypes, and we rely on native in
+            # order to avoid reencoding data types based on their size.
+            #
+            # A proper PEP 3118 exporter for other clients than Cython
+            # must deal properly with this!
+
+        # Output padding bytes
+        while offset[0] < new_offset:
+            f[0] = 120 # "x"; pad byte
+            f += 1
+            offset[0] += 1
+
+        offset[0] += child.itemsize
+
+        if not PyDataType_HASFIELDS(child):
+            t = child.type_num
+            if end - f < 5:
+                raise RuntimeError(u"Format string allocated too short.")
+
+            # Until ticket #99 is fixed, use integers to avoid warnings
+            if   t == NPY_BYTE:        f[0] =  98 #"b"
+            elif t == NPY_UBYTE:       f[0] =  66 #"B"
+            elif t == NPY_SHORT:       f[0] = 104 #"h"
+            elif t == NPY_USHORT:      f[0] =  72 #"H"
+            elif t == NPY_INT:         f[0] = 105 #"i"
+            elif t == NPY_UINT:        f[0] =  73 #"I"
+            elif t == NPY_LONG:        f[0] = 108 #"l"
+            elif t == NPY_ULONG:       f[0] = 76  #"L"
+            elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+            elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+            elif t == NPY_FLOAT:       f[0] = 102 #"f"
+            elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+            elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+            elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+            elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+            elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+            elif t == NPY_OBJECT:      f[0] = 79 #"O"
+            else:
+                raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+            f += 1
+        else:
+            # Cython ignores struct boundary information ("T{...}"),
+            # so don't output it
+            f = _util_dtypestring(child, f, end, offset)
+    return f
+
+
+#
+# ufunc API
+#
+
+cdef extern from "numpy/ufuncobject.h":
+
+    ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *)
+
     ctypedef class numpy.ufunc [object PyUFuncObject, check_size ignore]:
-        cdef: 
-            int nin, nout, nargs 
-            int identity 
-            PyUFuncGenericFunction *functions 
-            void **data 
-            int ntypes 
-            int check_return 
-            char *name 
-            char *types 
-            char *doc 
-            void *ptr 
-            PyObject *obj 
-            PyObject *userloops 
- 
-    cdef enum: 
-        PyUFunc_Zero 
-        PyUFunc_One 
-        PyUFunc_None 
-        UFUNC_ERR_IGNORE 
-        UFUNC_ERR_WARN 
-        UFUNC_ERR_RAISE 
-        UFUNC_ERR_CALL 
-        UFUNC_ERR_PRINT 
-        UFUNC_ERR_LOG 
-        UFUNC_MASK_DIVIDEBYZERO 
-        UFUNC_MASK_OVERFLOW 
-        UFUNC_MASK_UNDERFLOW 
-        UFUNC_MASK_INVALID 
-        UFUNC_SHIFT_DIVIDEBYZERO 
-        UFUNC_SHIFT_OVERFLOW 
-        UFUNC_SHIFT_UNDERFLOW 
-        UFUNC_SHIFT_INVALID 
-        UFUNC_FPE_DIVIDEBYZERO 
-        UFUNC_FPE_OVERFLOW 
-        UFUNC_FPE_UNDERFLOW 
-        UFUNC_FPE_INVALID 
-        UFUNC_ERR_DEFAULT 
-        UFUNC_ERR_DEFAULT2 
- 
-    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *, 
-          void **, char *, int, int, int, int, char *, char *, int) 
-    int PyUFunc_RegisterLoopForType(ufunc, int, 
-                                    PyUFuncGenericFunction, int *, void *) 
-    int PyUFunc_GenericFunction \ 
-        (ufunc, PyObject *, PyObject *, PyArrayObject **) 
-    void PyUFunc_f_f_As_d_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_d_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_f_f \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_g_g \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_F_F_As_D_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_F_F \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_D_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_G_G \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_O_O \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_ff_f_As_dd_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_ff_f \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_dd_d \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_gg_g \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_FF_F_As_DD_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_DD_D \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_FF_F \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_GG_G \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_OO_O \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_O_O_method \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_OO_O_method \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    void PyUFunc_On_Om \ 
-         (char **, npy_intp *, npy_intp *, void *) 
-    int PyUFunc_GetPyValues \ 
-        (char *, int *, int *, PyObject **) 
-    int PyUFunc_checkfperr \ 
-           (int, PyObject *, int *) 
-    void PyUFunc_clearfperr() 
-    int PyUFunc_getfperr() 
-    int PyUFunc_handlefperr \ 
-        (int, PyObject *, int, int *) 
-    int PyUFunc_ReplaceLoopBySignature \ 
-        (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *) 
-    object PyUFunc_FromFuncAndDataAndSignature \ 
-             (PyUFuncGenericFunction *, void **, char *, int, int, int, 
-              int, char *, char *, int, char *) 
- 
+        cdef:
+            int nin, nout, nargs
+            int identity
+            PyUFuncGenericFunction *functions
+            void **data
+            int ntypes
+            int check_return
+            char *name
+            char *types
+            char *doc
+            void *ptr
+            PyObject *obj
+            PyObject *userloops
+
+    cdef enum:
+        PyUFunc_Zero
+        PyUFunc_One
+        PyUFunc_None
+        UFUNC_ERR_IGNORE
+        UFUNC_ERR_WARN
+        UFUNC_ERR_RAISE
+        UFUNC_ERR_CALL
+        UFUNC_ERR_PRINT
+        UFUNC_ERR_LOG
+        UFUNC_MASK_DIVIDEBYZERO
+        UFUNC_MASK_OVERFLOW
+        UFUNC_MASK_UNDERFLOW
+        UFUNC_MASK_INVALID
+        UFUNC_SHIFT_DIVIDEBYZERO
+        UFUNC_SHIFT_OVERFLOW
+        UFUNC_SHIFT_UNDERFLOW
+        UFUNC_SHIFT_INVALID
+        UFUNC_FPE_DIVIDEBYZERO
+        UFUNC_FPE_OVERFLOW
+        UFUNC_FPE_UNDERFLOW
+        UFUNC_FPE_INVALID
+        UFUNC_ERR_DEFAULT
+        UFUNC_ERR_DEFAULT2
+
+    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *,
+          void **, char *, int, int, int, int, char *, char *, int)
+    int PyUFunc_RegisterLoopForType(ufunc, int,
+                                    PyUFuncGenericFunction, int *, void *)
+    int PyUFunc_GenericFunction \
+        (ufunc, PyObject *, PyObject *, PyArrayObject **)
+    void PyUFunc_f_f_As_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_f_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_g_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F_As_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_G_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f_As_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_gg_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F_As_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_GG_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_On_Om \
+         (char **, npy_intp *, npy_intp *, void *)
+    int PyUFunc_GetPyValues \
+        (char *, int *, int *, PyObject **)
+    int PyUFunc_checkfperr \
+           (int, PyObject *, int *)
+    void PyUFunc_clearfperr()
+    int PyUFunc_getfperr()
+    int PyUFunc_handlefperr \
+        (int, PyObject *, int, int *)
+    int PyUFunc_ReplaceLoopBySignature \
+        (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *)
+    object PyUFunc_FromFuncAndDataAndSignature \
+             (PyUFuncGenericFunction *, void **, char *, int, int, int,
+              int, char *, char *, int, char *)
+
     int _import_umath() except -1
- 
-cdef inline void set_array_base(ndarray arr, object base): 
+
+cdef inline void set_array_base(ndarray arr, object base):
     Py_INCREF(base) # important to do this before stealing the reference below!
     PyArray_SetBaseObject(arr, base)
- 
-cdef inline object get_array_base(ndarray arr): 
+
+cdef inline object get_array_base(ndarray arr):
     base = PyArray_BASE(arr)
     if base is NULL:
-        return None 
+        return None
     return <object>base
 
 # Versions of the import_* functions which are more suitable for
diff --git a/contrib/tools/cython/Cython/Includes/numpy/math.pxd b/contrib/tools/cython/Cython/Includes/numpy/math.pxd
index ae94d56b67..c16df1c51a 100644
--- a/contrib/tools/cython/Cython/Includes/numpy/math.pxd
+++ b/contrib/tools/cython/Cython/Includes/numpy/math.pxd
@@ -1,133 +1,133 @@
-# NumPy math library 
-# 
-# This exports the functionality of the NumPy core math library, aka npymath, 
-# which provides implementations of C99 math functions and macros for system 
-# with a C89 library (such as MSVC). npymath is available with NumPy >=1.3, 
-# although some functions will require later versions. The spacing function is 
-# not in C99, but comes from Fortran. 
-# 
-# On the Cython side, the npymath functions are available without the "npy_" 
-# prefix that they have in C, to make this is a drop-in replacement for 
-# libc.math. The same is true for the constants, where possible. 
-# 
-# See the NumPy documentation for linking instructions. 
-# 
-# Complex number support and NumPy 2.0 half-precision functions are currently 
-# not exported. 
-# 
-# Author: Lars Buitinck 
- 
-cdef extern from "numpy/npy_math.h" nogil: 
-    # Floating-point classification 
-    long double NAN "NPY_NAN" 
-    long double INFINITY "NPY_INFINITY" 
-    long double PZERO "NPY_PZERO"        # positive zero 
-    long double NZERO "NPY_NZERO"        # negative zero 
- 
-    # These four are actually macros and work on any floating-point type. 
+# NumPy math library
+#
+# This exports the functionality of the NumPy core math library, aka npymath,
+# which provides implementations of C99 math functions and macros for system
+# with a C89 library (such as MSVC). npymath is available with NumPy >=1.3,
+# although some functions will require later versions. The spacing function is
+# not in C99, but comes from Fortran.
+#
+# On the Cython side, the npymath functions are available without the "npy_"
+# prefix that they have in C, to make this is a drop-in replacement for
+# libc.math. The same is true for the constants, where possible.
+#
+# See the NumPy documentation for linking instructions.
+#
+# Complex number support and NumPy 2.0 half-precision functions are currently
+# not exported.
+#
+# Author: Lars Buitinck
+
+cdef extern from "numpy/npy_math.h" nogil:
+    # Floating-point classification
+    long double NAN "NPY_NAN"
+    long double INFINITY "NPY_INFINITY"
+    long double PZERO "NPY_PZERO"        # positive zero
+    long double NZERO "NPY_NZERO"        # negative zero
+
+    # These four are actually macros and work on any floating-point type.
     int isinf "npy_isinf"(long double)  # -1 / 0 / 1
-    bint isfinite "npy_isfinite"(long double) 
-    bint isnan "npy_isnan"(long double) 
-    bint signbit "npy_signbit"(long double) 
- 
-    # Math constants 
-    long double E "NPY_E" 
-    long double LOG2E "NPY_LOG2E"       # ln(e) / ln(2) 
-    long double LOG10E "NPY_LOG10E"     # ln(e) / ln(10) 
-    long double LOGE2 "NPY_LOGE2"       # ln(2) 
-    long double LOGE10 "NPY_LOGE10"     # ln(10) 
-    long double PI "NPY_PI" 
-    long double PI_2 "NPY_PI_2"         # pi / 2 
-    long double PI_4 "NPY_PI_4"         # pi / 4 
-    long double NPY_1_PI                # 1 / pi; NPY_ because of ident syntax 
-    long double NPY_2_PI                # 2 / pi 
-    long double EULER "NPY_EULER"       # Euler constant (gamma, 0.57721) 
- 
-    # Low-level floating point manipulation (NumPy >=1.4) 
-    float copysignf "npy_copysignf"(float, float) 
-    float nextafterf "npy_nextafterf"(float x, float y) 
-    float spacingf "npy_spacingf"(float x) 
-    double copysign "npy_copysign"(double, double) 
-    double nextafter "npy_nextafter"(double x, double y) 
-    double spacing "npy_spacing"(double x) 
-    long double copysignl "npy_copysignl"(long double, long double) 
-    long double nextafterl "npy_nextafterl"(long double x, long double y) 
-    long double spacingl "npy_spacingl"(long double x) 
- 
-    # Float C99 functions 
-    float sinf "npy_sinf"(float x) 
-    float cosf "npy_cosf"(float x) 
-    float tanf "npy_tanf"(float x) 
-    float sinhf "npy_sinhf"(float x) 
-    float coshf "npy_coshf"(float x) 
-    float tanhf "npy_tanhf"(float x) 
-    float fabsf "npy_fabsf"(float x) 
-    float floorf "npy_floorf"(float x) 
-    float ceilf "npy_ceilf"(float x) 
-    float rintf "npy_rintf"(float x) 
-    float sqrtf "npy_sqrtf"(float x) 
-    float log10f "npy_log10f"(float x) 
-    float logf "npy_logf"(float x) 
-    float expf "npy_expf"(float x) 
-    float expm1f "npy_expm1f"(float x) 
-    float asinf "npy_asinf"(float x) 
-    float acosf "npy_acosf"(float x) 
-    float atanf "npy_atanf"(float x) 
-    float asinhf "npy_asinhf"(float x) 
-    float acoshf "npy_acoshf"(float x) 
-    float atanhf "npy_atanhf"(float x) 
-    float log1pf "npy_log1pf"(float x) 
-    float exp2f "npy_exp2f"(float x) 
-    float log2f "npy_log2f"(float x) 
+    bint isfinite "npy_isfinite"(long double)
+    bint isnan "npy_isnan"(long double)
+    bint signbit "npy_signbit"(long double)
+
+    # Math constants
+    long double E "NPY_E"
+    long double LOG2E "NPY_LOG2E"       # ln(e) / ln(2)
+    long double LOG10E "NPY_LOG10E"     # ln(e) / ln(10)
+    long double LOGE2 "NPY_LOGE2"       # ln(2)
+    long double LOGE10 "NPY_LOGE10"     # ln(10)
+    long double PI "NPY_PI"
+    long double PI_2 "NPY_PI_2"         # pi / 2
+    long double PI_4 "NPY_PI_4"         # pi / 4
+    long double NPY_1_PI                # 1 / pi; NPY_ because of ident syntax
+    long double NPY_2_PI                # 2 / pi
+    long double EULER "NPY_EULER"       # Euler constant (gamma, 0.57721)
+
+    # Low-level floating point manipulation (NumPy >=1.4)
+    float copysignf "npy_copysignf"(float, float)
+    float nextafterf "npy_nextafterf"(float x, float y)
+    float spacingf "npy_spacingf"(float x)
+    double copysign "npy_copysign"(double, double)
+    double nextafter "npy_nextafter"(double x, double y)
+    double spacing "npy_spacing"(double x)
+    long double copysignl "npy_copysignl"(long double, long double)
+    long double nextafterl "npy_nextafterl"(long double x, long double y)
+    long double spacingl "npy_spacingl"(long double x)
+
+    # Float C99 functions
+    float sinf "npy_sinf"(float x)
+    float cosf "npy_cosf"(float x)
+    float tanf "npy_tanf"(float x)
+    float sinhf "npy_sinhf"(float x)
+    float coshf "npy_coshf"(float x)
+    float tanhf "npy_tanhf"(float x)
+    float fabsf "npy_fabsf"(float x)
+    float floorf "npy_floorf"(float x)
+    float ceilf "npy_ceilf"(float x)
+    float rintf "npy_rintf"(float x)
+    float sqrtf "npy_sqrtf"(float x)
+    float log10f "npy_log10f"(float x)
+    float logf "npy_logf"(float x)
+    float expf "npy_expf"(float x)
+    float expm1f "npy_expm1f"(float x)
+    float asinf "npy_asinf"(float x)
+    float acosf "npy_acosf"(float x)
+    float atanf "npy_atanf"(float x)
+    float asinhf "npy_asinhf"(float x)
+    float acoshf "npy_acoshf"(float x)
+    float atanhf "npy_atanhf"(float x)
+    float log1pf "npy_log1pf"(float x)
+    float exp2f "npy_exp2f"(float x)
+    float log2f "npy_log2f"(float x)
     float atan2f "npy_atan2f"(float x, float y)
     float hypotf "npy_hypotf"(float x, float y)
     float powf "npy_powf"(float x, float y)
     float fmodf "npy_fmodf"(float x, float y)
     float modff "npy_modff"(float x, float* y)
- 
-    # Long double C99 functions 
-    long double sinl "npy_sinl"(long double x) 
-    long double cosl "npy_cosl"(long double x) 
-    long double tanl "npy_tanl"(long double x) 
-    long double sinhl "npy_sinhl"(long double x) 
-    long double coshl "npy_coshl"(long double x) 
-    long double tanhl "npy_tanhl"(long double x) 
-    long double fabsl "npy_fabsl"(long double x) 
-    long double floorl "npy_floorl"(long double x) 
-    long double ceill "npy_ceill"(long double x) 
-    long double rintl "npy_rintl"(long double x) 
-    long double sqrtl "npy_sqrtl"(long double x) 
-    long double log10l "npy_log10l"(long double x) 
-    long double logl "npy_logl"(long double x) 
-    long double expl "npy_expl"(long double x) 
-    long double expm1l "npy_expm1l"(long double x) 
-    long double asinl "npy_asinl"(long double x) 
-    long double acosl "npy_acosl"(long double x) 
-    long double atanl "npy_atanl"(long double x) 
-    long double asinhl "npy_asinhl"(long double x) 
-    long double acoshl "npy_acoshl"(long double x) 
-    long double atanhl "npy_atanhl"(long double x) 
-    long double log1pl "npy_log1pl"(long double x) 
-    long double exp2l "npy_exp2l"(long double x) 
-    long double log2l "npy_log2l"(long double x) 
+
+    # Long double C99 functions
+    long double sinl "npy_sinl"(long double x)
+    long double cosl "npy_cosl"(long double x)
+    long double tanl "npy_tanl"(long double x)
+    long double sinhl "npy_sinhl"(long double x)
+    long double coshl "npy_coshl"(long double x)
+    long double tanhl "npy_tanhl"(long double x)
+    long double fabsl "npy_fabsl"(long double x)
+    long double floorl "npy_floorl"(long double x)
+    long double ceill "npy_ceill"(long double x)
+    long double rintl "npy_rintl"(long double x)
+    long double sqrtl "npy_sqrtl"(long double x)
+    long double log10l "npy_log10l"(long double x)
+    long double logl "npy_logl"(long double x)
+    long double expl "npy_expl"(long double x)
+    long double expm1l "npy_expm1l"(long double x)
+    long double asinl "npy_asinl"(long double x)
+    long double acosl "npy_acosl"(long double x)
+    long double atanl "npy_atanl"(long double x)
+    long double asinhl "npy_asinhl"(long double x)
+    long double acoshl "npy_acoshl"(long double x)
+    long double atanhl "npy_atanhl"(long double x)
+    long double log1pl "npy_log1pl"(long double x)
+    long double exp2l "npy_exp2l"(long double x)
+    long double log2l "npy_log2l"(long double x)
     long double atan2l "npy_atan2l"(long double x, long double y)
     long double hypotl "npy_hypotl"(long double x, long double y)
     long double powl "npy_powl"(long double x, long double y)
     long double fmodl "npy_fmodl"(long double x, long double y)
     long double modfl "npy_modfl"(long double x, long double* y)
- 
-    # NumPy extensions 
-    float deg2radf "npy_deg2radf"(float x) 
-    float rad2degf "npy_rad2degf"(float x) 
+
+    # NumPy extensions
+    float deg2radf "npy_deg2radf"(float x)
+    float rad2degf "npy_rad2degf"(float x)
     float logaddexpf "npy_logaddexpf"(float x, float y)
     float logaddexp2f "npy_logaddexp2f"(float x, float y)
- 
-    double deg2rad "npy_deg2rad"(double x) 
-    double rad2deg "npy_rad2deg"(double x) 
+
+    double deg2rad "npy_deg2rad"(double x)
+    double rad2deg "npy_rad2deg"(double x)
     double logaddexp "npy_logaddexp"(double x, double y)
     double logaddexp2 "npy_logaddexp2"(double x, double y)
- 
-    long double deg2radl "npy_deg2radl"(long double x) 
-    long double rad2degl "npy_rad2degl"(long double x) 
+
+    long double deg2radl "npy_deg2radl"(long double x)
+    long double rad2degl "npy_rad2degl"(long double x)
     long double logaddexpl "npy_logaddexpl"(long double x, long double y)
     long double logaddexp2l "npy_logaddexp2l"(long double x, long double y)
diff --git a/contrib/tools/cython/Cython/Includes/openmp.pxd b/contrib/tools/cython/Cython/Includes/openmp.pxd
index 32c9a53ba9..30873a588b 100644
--- a/contrib/tools/cython/Cython/Includes/openmp.pxd
+++ b/contrib/tools/cython/Cython/Includes/openmp.pxd
@@ -1,51 +1,51 @@
 cdef extern from "<omp.h>":
-    ctypedef struct omp_lock_t: 
-        pass 
-    ctypedef struct omp_nest_lock_t: 
-        pass 
- 
-    ctypedef enum omp_sched_t: 
-        omp_sched_static = 1, 
-        omp_sched_dynamic = 2, 
-        omp_sched_guided = 3, 
-        omp_sched_auto = 4 
- 
-    extern void omp_set_num_threads(int) nogil 
-    extern int omp_get_num_threads() nogil 
-    extern int omp_get_max_threads() nogil 
-    extern int omp_get_thread_num() nogil 
-    extern int omp_get_num_procs() nogil 
- 
-    extern int omp_in_parallel() nogil 
- 
-    extern void omp_set_dynamic(int) nogil 
-    extern int omp_get_dynamic() nogil 
- 
-    extern void omp_set_nested(int) nogil 
-    extern int omp_get_nested() nogil 
- 
-    extern void omp_init_lock(omp_lock_t *) nogil 
-    extern void omp_destroy_lock(omp_lock_t *) nogil 
-    extern void omp_set_lock(omp_lock_t *) nogil 
-    extern void omp_unset_lock(omp_lock_t *) nogil 
-    extern int omp_test_lock(omp_lock_t *) nogil 
- 
-    extern void omp_init_nest_lock(omp_nest_lock_t *) nogil 
-    extern void omp_destroy_nest_lock(omp_nest_lock_t *) nogil 
-    extern void omp_set_nest_lock(omp_nest_lock_t *) nogil 
-    extern void omp_unset_nest_lock(omp_nest_lock_t *) nogil 
-    extern int omp_test_nest_lock(omp_nest_lock_t *) nogil 
- 
-    extern double omp_get_wtime() nogil 
-    extern double omp_get_wtick() nogil 
- 
-    void omp_set_schedule(omp_sched_t, int) nogil 
-    void omp_get_schedule(omp_sched_t *, int *) nogil 
-    int omp_get_thread_limit() nogil 
-    void omp_set_max_active_levels(int) nogil 
-    int omp_get_max_active_levels() nogil 
-    int omp_get_level() nogil 
-    int omp_get_ancestor_thread_num(int) nogil 
-    int omp_get_team_size(int) nogil 
-    int omp_get_active_level() nogil 
- 
+    ctypedef struct omp_lock_t:
+        pass
+    ctypedef struct omp_nest_lock_t:
+        pass
+
+    ctypedef enum omp_sched_t:
+        omp_sched_static = 1,
+        omp_sched_dynamic = 2,
+        omp_sched_guided = 3,
+        omp_sched_auto = 4
+
+    extern void omp_set_num_threads(int) nogil
+    extern int omp_get_num_threads() nogil
+    extern int omp_get_max_threads() nogil
+    extern int omp_get_thread_num() nogil
+    extern int omp_get_num_procs() nogil
+
+    extern int omp_in_parallel() nogil
+
+    extern void omp_set_dynamic(int) nogil
+    extern int omp_get_dynamic() nogil
+
+    extern void omp_set_nested(int) nogil
+    extern int omp_get_nested() nogil
+
+    extern void omp_init_lock(omp_lock_t *) nogil
+    extern void omp_destroy_lock(omp_lock_t *) nogil
+    extern void omp_set_lock(omp_lock_t *) nogil
+    extern void omp_unset_lock(omp_lock_t *) nogil
+    extern int omp_test_lock(omp_lock_t *) nogil
+
+    extern void omp_init_nest_lock(omp_nest_lock_t *) nogil
+    extern void omp_destroy_nest_lock(omp_nest_lock_t *) nogil
+    extern void omp_set_nest_lock(omp_nest_lock_t *) nogil
+    extern void omp_unset_nest_lock(omp_nest_lock_t *) nogil
+    extern int omp_test_nest_lock(omp_nest_lock_t *) nogil
+
+    extern double omp_get_wtime() nogil
+    extern double omp_get_wtick() nogil
+
+    void omp_set_schedule(omp_sched_t, int) nogil
+    void omp_get_schedule(omp_sched_t *, int *) nogil
+    int omp_get_thread_limit() nogil
+    void omp_set_max_active_levels(int) nogil
+    int omp_get_max_active_levels() nogil
+    int omp_get_level() nogil
+    int omp_get_ancestor_thread_num(int) nogil
+    int omp_get_team_size(int) nogil
+    int omp_get_active_level() nogil
+
diff --git a/contrib/tools/cython/Cython/Includes/posix/__init__.pxd b/contrib/tools/cython/Cython/Includes/posix/__init__.pxd
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Includes/posix/__init__.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/__init__.pxd
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Includes/posix/fcntl.pxd b/contrib/tools/cython/Cython/Includes/posix/fcntl.pxd
index 727d0be03a..9afc33a368 100644
--- a/contrib/tools/cython/Cython/Includes/posix/fcntl.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/fcntl.pxd
@@ -1,68 +1,68 @@
-# http://www.opengroup.org/onlinepubs/009695399/basedefs/fcntl.h.html 
- 
+# http://www.opengroup.org/onlinepubs/009695399/basedefs/fcntl.h.html
+
 cdef extern from "<fcntl.h>" nogil:
- 
-    enum: F_DUPFD 
-    enum: F_GETFD 
-    enum: F_SETFD 
-    enum: F_GETFL 
-    enum: F_SETFL 
-    enum: F_GETLK 
-    enum: F_SETLK 
-    enum: F_SETLKW 
-    enum: F_GETOWN 
-    enum: F_SETOWN 
- 
-    enum: FD_CLOEXEC 
- 
-    enum: F_RDLCK 
-    enum: F_UNLCK 
-    enum: F_WRLCK 
- 
-    enum: SEEK_SET 
-    enum: SEEK_CUR 
-    enum: SEEK_END 
- 
-    enum: O_CREAT 
+
+    enum: F_DUPFD
+    enum: F_GETFD
+    enum: F_SETFD
+    enum: F_GETFL
+    enum: F_SETFL
+    enum: F_GETLK
+    enum: F_SETLK
+    enum: F_SETLKW
+    enum: F_GETOWN
+    enum: F_SETOWN
+
+    enum: FD_CLOEXEC
+
+    enum: F_RDLCK
+    enum: F_UNLCK
+    enum: F_WRLCK
+
+    enum: SEEK_SET
+    enum: SEEK_CUR
+    enum: SEEK_END
+
+    enum: O_CREAT
     enum: O_DIRECT
-    enum: O_EXCL 
-    enum: O_NOCTTY 
-    enum: O_TRUNC 
- 
-    enum: O_APPEND 
-    enum: O_DSYNC 
-    enum: O_NONBLOCK 
-    enum: O_RSYNC 
-    enum: O_SYNC 
- 
-    enum: O_ACCMODE # O_RDONLY|O_WRONLY|O_RDWR 
- 
-    enum: O_RDONLY 
-    enum: O_WRONLY 
-    enum: O_RDWR 
- 
-    enum: S_IFMT 
-    enum: S_IFBLK 
-    enum: S_IFCHR 
-    enum: S_IFIFO 
-    enum: S_IFREG 
-    enum: S_IFDIR 
-    enum: S_IFLNK 
-    enum: S_IFSOCK 
- 
-    ctypedef int    mode_t 
-    ctypedef signed pid_t 
-    ctypedef signed off_t 
- 
-    struct flock: 
-        short l_type 
-        short l_whence 
-        off_t l_start 
-        off_t l_len 
-        pid_t l_pid 
- 
-    int creat(char *, mode_t) 
-    int fcntl(int, int, ...) 
-    int open(char *, int, ...) 
-    #int open (char *, int, mode_t) 
- 
+    enum: O_EXCL
+    enum: O_NOCTTY
+    enum: O_TRUNC
+
+    enum: O_APPEND
+    enum: O_DSYNC
+    enum: O_NONBLOCK
+    enum: O_RSYNC
+    enum: O_SYNC
+
+    enum: O_ACCMODE # O_RDONLY|O_WRONLY|O_RDWR
+
+    enum: O_RDONLY
+    enum: O_WRONLY
+    enum: O_RDWR
+
+    enum: S_IFMT
+    enum: S_IFBLK
+    enum: S_IFCHR
+    enum: S_IFIFO
+    enum: S_IFREG
+    enum: S_IFDIR
+    enum: S_IFLNK
+    enum: S_IFSOCK
+
+    ctypedef int    mode_t
+    ctypedef signed pid_t
+    ctypedef signed off_t
+
+    struct flock:
+        short l_type
+        short l_whence
+        off_t l_start
+        off_t l_len
+        pid_t l_pid
+
+    int creat(char *, mode_t)
+    int fcntl(int, int, ...)
+    int open(char *, int, ...)
+    #int open (char *, int, mode_t)
+
diff --git a/contrib/tools/cython/Cython/Includes/posix/ioctl.pxd b/contrib/tools/cython/Cython/Includes/posix/ioctl.pxd
index 5c00702150..dacbc307f3 100644
--- a/contrib/tools/cython/Cython/Includes/posix/ioctl.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/ioctl.pxd
@@ -1,4 +1,4 @@
 cdef extern from "<sys/ioctl.h>" nogil:
-    enum: FIONBIO 
- 
-    int ioctl(int fd, int request, ...) 
+    enum: FIONBIO
+
+    int ioctl(int fd, int request, ...)
diff --git a/contrib/tools/cython/Cython/Includes/posix/resource.pxd b/contrib/tools/cython/Cython/Includes/posix/resource.pxd
index 6d9bac2925..9f55c6ab4e 100644
--- a/contrib/tools/cython/Cython/Includes/posix/resource.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/resource.pxd
@@ -1,55 +1,55 @@
-# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/resource.h.html 
- 
+# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/resource.h.html
+
 from posix.time  cimport timeval
-from posix.types cimport id_t 
- 
+from posix.types cimport id_t
+
 cdef extern from "<sys/resource.h>" nogil:
- 
-    enum: PRIO_PROCESS 
-    enum: PRIO_PGRP 
-    enum: PRIO_USER 
- 
-    enum: RLIM_INFINITY 
-    enum: RLIM_SAVED_MAX 
-    enum: RLIM_SAVED_CUR 
- 
-    enum: RUSAGE_SELF 
-    enum: RUSAGE_CHILDREN 
- 
-    enum: RLIMIT_CORE 
-    enum: RLIMIT_CPU 
-    enum: RLIMIT_DATA 
-    enum: RLIMIT_FSIZE 
-    enum: RLIMIT_NOFILE 
-    enum: RLIMIT_STACK 
-    enum: RLIMIT_AS 
- 
-    ctypedef unsigned long rlim_t 
- 
-    cdef struct rlimit: 
-        rlim_t rlim_cur 
-        rlim_t rlim_max 
- 
-    cdef struct rusage: 
-        timeval ru_utime 
-        timeval ru_stime 
-        long    ru_maxrss 
-        long    ru_ixrss 
-        long    ru_idrss 
-        long    ru_isrss 
-        long    ru_minflt 
-        long    ru_majflt 
-        long    ru_nswap 
-        long    ru_inblock 
-        long    ru_oublock 
-        long    ru_msgsnd 
-        long    ru_msgrcv 
-        long    ru_nsignals 
-        long    ru_nvcsw 
-        long    ru_nivcsw 
- 
-    int  getpriority(int, id_t) 
-    int  getrlimit(int, rlimit *) 
-    int  getrusage(int, rusage *) 
-    int  setpriority(int, id_t, int) 
-    int  setrlimit(int, const rlimit *) 
+
+    enum: PRIO_PROCESS
+    enum: PRIO_PGRP
+    enum: PRIO_USER
+
+    enum: RLIM_INFINITY
+    enum: RLIM_SAVED_MAX
+    enum: RLIM_SAVED_CUR
+
+    enum: RUSAGE_SELF
+    enum: RUSAGE_CHILDREN
+
+    enum: RLIMIT_CORE
+    enum: RLIMIT_CPU
+    enum: RLIMIT_DATA
+    enum: RLIMIT_FSIZE
+    enum: RLIMIT_NOFILE
+    enum: RLIMIT_STACK
+    enum: RLIMIT_AS
+
+    ctypedef unsigned long rlim_t
+
+    cdef struct rlimit:
+        rlim_t rlim_cur
+        rlim_t rlim_max
+
+    cdef struct rusage:
+        timeval ru_utime
+        timeval ru_stime
+        long    ru_maxrss
+        long    ru_ixrss
+        long    ru_idrss
+        long    ru_isrss
+        long    ru_minflt
+        long    ru_majflt
+        long    ru_nswap
+        long    ru_inblock
+        long    ru_oublock
+        long    ru_msgsnd
+        long    ru_msgrcv
+        long    ru_nsignals
+        long    ru_nvcsw
+        long    ru_nivcsw
+
+    int  getpriority(int, id_t)
+    int  getrlimit(int, rlimit *)
+    int  getrusage(int, rusage *)
+    int  setpriority(int, id_t, int)
+    int  setrlimit(int, const rlimit *)
diff --git a/contrib/tools/cython/Cython/Includes/posix/signal.pxd b/contrib/tools/cython/Cython/Includes/posix/signal.pxd
index b0b381359e..9fe7d9c36c 100644
--- a/contrib/tools/cython/Cython/Includes/posix/signal.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/signal.pxd
@@ -1,73 +1,73 @@
-# 7.14 Signal handling <signal.h> 
- 
-from posix.types cimport pid_t, sigset_t, uid_t 
- 
+# 7.14 Signal handling <signal.h>
+
+from posix.types cimport pid_t, sigset_t, uid_t
+
 cdef extern from "<signal.h>" nogil:
- 
-    cdef union sigval: 
-        int  sival_int 
-        void *sival_ptr 
- 
-    cdef struct sigevent: 
-        int    sigev_notify 
-        int    sigev_signo 
-        sigval sigev_value 
+
+    cdef union sigval:
+        int  sival_int
+        void *sival_ptr
+
+    cdef struct sigevent:
+        int    sigev_notify
+        int    sigev_signo
+        sigval sigev_value
         void   sigev_notify_function(sigval)
- 
-    ctypedef struct siginfo_t: 
-        int    si_signo 
-        int    si_code 
-        int    si_errno 
-        pid_t  si_pid 
-        uid_t  si_uid 
-        void   *si_addr 
-        int    si_status 
-        long   si_band 
-        sigval si_value 
- 
-    cdef struct sigaction_t "sigaction": 
+
+    ctypedef struct siginfo_t:
+        int    si_signo
+        int    si_code
+        int    si_errno
+        pid_t  si_pid
+        uid_t  si_uid
+        void   *si_addr
+        int    si_status
+        long   si_band
+        sigval si_value
+
+    cdef struct sigaction_t "sigaction":
         void     sa_handler(int)
         void     sa_sigaction(int, siginfo_t *, void *)
-        sigset_t sa_mask 
-        int      sa_flags 
- 
+        sigset_t sa_mask
+        int      sa_flags
+
     ctypedef struct stack_t:
         void  *ss_sp
         int ss_flags
         size_t ss_size
 
-    enum: SA_NOCLDSTOP 
-    enum: SIG_BLOCK 
-    enum: SIG_UNBLOCK 
-    enum: SIG_SETMASK 
-    enum: SA_ONSTACK 
-    enum: SA_RESETHAND 
-    enum: SA_RESTART 
-    enum: SA_SIGINFO 
-    enum: SA_NOCLDWAIT 
-    enum: SA_NODEFER 
-    enum: SS_ONSTACK 
-    enum: SS_DISABLE 
-    enum: MINSIGSTKSZ 
-    enum: SIGSTKSZ 
- 
-    enum: SIGEV_NONE 
-    enum: SIGEV_SIGNAL 
-    enum: SIGEV_THREAD 
-    enum: SIGEV_THREAD_ID 
- 
- 
-    int          kill          (pid_t, int) 
-    int          killpg        (pid_t, int) 
-    int          sigaction     (int, const sigaction_t *, sigaction_t *) 
-    int          sigpending    (sigset_t *) 
-    int          sigprocmask   (int, const sigset_t *, sigset_t *) 
-    int          sigsuspend    (const sigset_t *) 
- 
-    int          sigaddset     (sigset_t *, int) 
-    int          sigdelset     (sigset_t *, int) 
-    int          sigemptyset   (sigset_t *) 
-    int          sigfillset    (sigset_t *) 
+    enum: SA_NOCLDSTOP
+    enum: SIG_BLOCK
+    enum: SIG_UNBLOCK
+    enum: SIG_SETMASK
+    enum: SA_ONSTACK
+    enum: SA_RESETHAND
+    enum: SA_RESTART
+    enum: SA_SIGINFO
+    enum: SA_NOCLDWAIT
+    enum: SA_NODEFER
+    enum: SS_ONSTACK
+    enum: SS_DISABLE
+    enum: MINSIGSTKSZ
+    enum: SIGSTKSZ
+
+    enum: SIGEV_NONE
+    enum: SIGEV_SIGNAL
+    enum: SIGEV_THREAD
+    enum: SIGEV_THREAD_ID
+
+
+    int          kill          (pid_t, int)
+    int          killpg        (pid_t, int)
+    int          sigaction     (int, const sigaction_t *, sigaction_t *)
+    int          sigpending    (sigset_t *)
+    int          sigprocmask   (int, const sigset_t *, sigset_t *)
+    int          sigsuspend    (const sigset_t *)
+
+    int          sigaddset     (sigset_t *, int)
+    int          sigdelset     (sigset_t *, int)
+    int          sigemptyset   (sigset_t *)
+    int          sigfillset    (sigset_t *)
     int          sigismember   (const sigset_t *, int)
 
     int sigaltstack(const stack_t *, stack_t *)
diff --git a/contrib/tools/cython/Cython/Includes/posix/stat.pxd b/contrib/tools/cython/Cython/Includes/posix/stat.pxd
index 7e57b1b0d4..69c2eca166 100644
--- a/contrib/tools/cython/Cython/Includes/posix/stat.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/stat.pxd
@@ -1,71 +1,71 @@
-from posix.types cimport (blkcnt_t, blksize_t, dev_t, gid_t, ino_t, mode_t, 
-                          nlink_t, off_t, time_t, uid_t) 
- 
- 
+from posix.types cimport (blkcnt_t, blksize_t, dev_t, gid_t, ino_t, mode_t,
+                          nlink_t, off_t, time_t, uid_t)
+
+
 cdef extern from "<sys/stat.h>" nogil:
-    cdef struct struct_stat "stat": 
-        dev_t   st_dev 
-        ino_t   st_ino 
-        mode_t  st_mode 
-        nlink_t st_nlink 
-        uid_t   st_uid 
-        gid_t   st_gid 
-        dev_t   st_rdev 
-        off_t   st_size 
-        blksize_t st_blksize 
-        blkcnt_t st_blocks 
-        time_t  st_atime 
-        time_t  st_mtime 
-        time_t  st_ctime 
- 
+    cdef struct struct_stat "stat":
+        dev_t   st_dev
+        ino_t   st_ino
+        mode_t  st_mode
+        nlink_t st_nlink
+        uid_t   st_uid
+        gid_t   st_gid
+        dev_t   st_rdev
+        off_t   st_size
+        blksize_t st_blksize
+        blkcnt_t st_blocks
+        time_t  st_atime
+        time_t  st_mtime
+        time_t  st_ctime
+
         # st_birthtime exists on *BSD and OS X.
         # Under Linux, defining it here does not hurt. Compilation under Linux
         # will only (and rightfully) fail when attempting to use the field.
         time_t  st_birthtime
 
-# POSIX prescribes including both <sys/stat.h> and <unistd.h> for these 
+# POSIX prescribes including both <sys/stat.h> and <unistd.h> for these
 cdef extern from "<unistd.h>" nogil:
-    int fchmod(int, mode_t) 
-    int chmod(const char *, mode_t) 
- 
-    int fstat(int, struct_stat *) 
-    int lstat(const char *, struct_stat *) 
-    int stat(const char *, struct_stat *) 
- 
-    # Macros for st_mode 
-    mode_t S_ISREG(mode_t) 
-    mode_t S_ISDIR(mode_t) 
-    mode_t S_ISCHR(mode_t) 
-    mode_t S_ISBLK(mode_t) 
-    mode_t S_ISFIFO(mode_t) 
-    mode_t S_ISLNK(mode_t) 
-    mode_t S_ISSOCK(mode_t) 
- 
-    mode_t S_IFMT 
-    mode_t S_IFREG 
-    mode_t S_IFDIR 
-    mode_t S_IFCHR 
-    mode_t S_IFBLK 
-    mode_t S_IFIFO 
-    mode_t S_IFLNK 
-    mode_t S_IFSOCK 
- 
-    # Permissions 
-    mode_t S_ISUID 
-    mode_t S_ISGID 
-    mode_t S_ISVTX 
- 
-    mode_t S_IRWXU 
-    mode_t S_IRUSR 
-    mode_t S_IWUSR 
-    mode_t S_IXUSR 
- 
-    mode_t S_IRWXG 
-    mode_t S_IRGRP 
-    mode_t S_IWGRP 
-    mode_t S_IXGRP 
- 
-    mode_t S_IRWXO 
-    mode_t S_IROTH 
-    mode_t S_IWOTH 
-    mode_t S_IXOTH 
+    int fchmod(int, mode_t)
+    int chmod(const char *, mode_t)
+
+    int fstat(int, struct_stat *)
+    int lstat(const char *, struct_stat *)
+    int stat(const char *, struct_stat *)
+
+    # Macros for st_mode
+    mode_t S_ISREG(mode_t)
+    mode_t S_ISDIR(mode_t)
+    mode_t S_ISCHR(mode_t)
+    mode_t S_ISBLK(mode_t)
+    mode_t S_ISFIFO(mode_t)
+    mode_t S_ISLNK(mode_t)
+    mode_t S_ISSOCK(mode_t)
+
+    mode_t S_IFMT
+    mode_t S_IFREG
+    mode_t S_IFDIR
+    mode_t S_IFCHR
+    mode_t S_IFBLK
+    mode_t S_IFIFO
+    mode_t S_IFLNK
+    mode_t S_IFSOCK
+
+    # Permissions
+    mode_t S_ISUID
+    mode_t S_ISGID
+    mode_t S_ISVTX
+
+    mode_t S_IRWXU
+    mode_t S_IRUSR
+    mode_t S_IWUSR
+    mode_t S_IXUSR
+
+    mode_t S_IRWXG
+    mode_t S_IRGRP
+    mode_t S_IWGRP
+    mode_t S_IXGRP
+
+    mode_t S_IRWXO
+    mode_t S_IROTH
+    mode_t S_IWOTH
+    mode_t S_IXOTH
diff --git a/contrib/tools/cython/Cython/Includes/posix/stdlib.pxd b/contrib/tools/cython/Cython/Includes/posix/stdlib.pxd
index 8dbf1f72dd..513de938a8 100644
--- a/contrib/tools/cython/Cython/Includes/posix/stdlib.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/stdlib.pxd
@@ -1,29 +1,29 @@
-# POSIX additions to <stdlib.h> 
-# http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/stdlib.h.html 
- 
+# POSIX additions to <stdlib.h>
+# http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/stdlib.h.html
+
 cdef extern from "<stdlib.h>" nogil:
-    void   _Exit(int) 
-    double drand48() 
-    double erand48(unsigned short *) 
-    int    getsubopt(char **, char *const *, char **) 
-    void   lcong48(unsigned short *) 
-    long   lrand() 
-    char  *mkdtemp(char *) 
-    int    mkstemp(char *) 
-    long   mrand() 
-    long   nrand48(unsigned short *) 
-    int    posix_memalign(void **, size_t, size_t) 
-    int    posix_openpt(int) 
-    char  *ptsname(int) 
-    int    putenv(char *) 
-    int    rand_r(unsigned *) 
-    long   random() 
-    char  *realpath(const char *, char *) 
-    unsigned short *seed48(unsigned short *) 
-    int    setenv(const char *, const char *, int) 
-    void   setkey(const char *) 
-    char  *setstate(char *) 
-    void   srand48(long) 
-    void   srandom(unsigned) 
-    int    unlockpt(int) 
-    int    unsetenv(const char *) 
+    void   _Exit(int)
+    double drand48()
+    double erand48(unsigned short *)
+    int    getsubopt(char **, char *const *, char **)
+    void   lcong48(unsigned short *)
+    long   lrand()
+    char  *mkdtemp(char *)
+    int    mkstemp(char *)
+    long   mrand()
+    long   nrand48(unsigned short *)
+    int    posix_memalign(void **, size_t, size_t)
+    int    posix_openpt(int)
+    char  *ptsname(int)
+    int    putenv(char *)
+    int    rand_r(unsigned *)
+    long   random()
+    char  *realpath(const char *, char *)
+    unsigned short *seed48(unsigned short *)
+    int    setenv(const char *, const char *, int)
+    void   setkey(const char *)
+    char  *setstate(char *)
+    void   srand48(long)
+    void   srandom(unsigned)
+    int    unlockpt(int)
+    int    unsetenv(const char *)
diff --git a/contrib/tools/cython/Cython/Includes/posix/time.pxd b/contrib/tools/cython/Cython/Includes/posix/time.pxd
index 0c8cd6b6ca..6bc81bfea0 100644
--- a/contrib/tools/cython/Cython/Includes/posix/time.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/time.pxd
@@ -1,63 +1,63 @@
-# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/time.h.html 
- 
+# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/time.h.html
+
 from posix.types cimport suseconds_t, time_t, clockid_t, timer_t
-from posix.signal cimport sigevent 
- 
+from posix.signal cimport sigevent
+
 cdef extern from "<sys/time.h>" nogil:
-    enum: CLOCK_REALTIME 
-    enum: TIMER_ABSTIME 
-    enum: CLOCK_MONOTONIC 
- 
-    # FreeBSD-specific clocks 
-    enum: CLOCK_UPTIME 
-    enum: CLOCK_UPTIME_PRECISE 
-    enum: CLOCK_UPTIME_FAST 
-    enum: CLOCK_REALTIME_PRECISE 
-    enum: CLOCK_REALTIME_FAST 
-    enum: CLOCK_MONOTONIC_PRECISE 
-    enum: CLOCK_MONOTONIC_FAST 
-    enum: CLOCK_SECOND 
- 
-    # Linux-specific clocks 
-    enum: CLOCK_PROCESS_CPUTIME_ID 
-    enum: CLOCK_THREAD_CPUTIME_ID 
-    enum: CLOCK_MONOTONIC_RAW 
-    enum: CLOCK_REALTIME_COARSE 
-    enum: CLOCK_MONOTONIC_COARSE 
-    enum: CLOCK_BOOTTIME 
-    enum: CLOCK_REALTIME_ALARM 
-    enum: CLOCK_BOOTTIME_ALARM 
- 
-    enum: ITIMER_REAL 
-    enum: ITIMER_VIRTUAL 
-    enum: ITIMER_PROF 
- 
+    enum: CLOCK_REALTIME
+    enum: TIMER_ABSTIME
+    enum: CLOCK_MONOTONIC
+
+    # FreeBSD-specific clocks
+    enum: CLOCK_UPTIME
+    enum: CLOCK_UPTIME_PRECISE
+    enum: CLOCK_UPTIME_FAST
+    enum: CLOCK_REALTIME_PRECISE
+    enum: CLOCK_REALTIME_FAST
+    enum: CLOCK_MONOTONIC_PRECISE
+    enum: CLOCK_MONOTONIC_FAST
+    enum: CLOCK_SECOND
+
+    # Linux-specific clocks
+    enum: CLOCK_PROCESS_CPUTIME_ID
+    enum: CLOCK_THREAD_CPUTIME_ID
+    enum: CLOCK_MONOTONIC_RAW
+    enum: CLOCK_REALTIME_COARSE
+    enum: CLOCK_MONOTONIC_COARSE
+    enum: CLOCK_BOOTTIME
+    enum: CLOCK_REALTIME_ALARM
+    enum: CLOCK_BOOTTIME_ALARM
+
+    enum: ITIMER_REAL
+    enum: ITIMER_VIRTUAL
+    enum: ITIMER_PROF
+
     cdef struct timezone:
         int tz_minuteswest
         int dsttime
 
-    cdef struct timeval: 
-        time_t      tv_sec 
-        suseconds_t tv_usec 
- 
+    cdef struct timeval:
+        time_t      tv_sec
+        suseconds_t tv_usec
+
     cdef struct timespec:
         time_t tv_sec
         long   tv_nsec
 
-    cdef struct itimerval: 
-        timeval it_interval 
-        timeval it_value 
- 
-    cdef struct itimerspec: 
-        timespec it_interval 
-        timespec it_value 
- 
+    cdef struct itimerval:
+        timeval it_interval
+        timeval it_value
+
+    cdef struct itimerspec:
+        timespec it_interval
+        timespec it_value
+
     int nanosleep(const timespec *, timespec *)
- 
+
     int getitimer(int, itimerval *)
     int gettimeofday(timeval *tp, timezone *tzp)
     int setitimer(int, const itimerval *, itimerval *)
- 
+
     int clock_getcpuclockid(pid_t, clockid_t *)
     int clock_getres(clockid_t, timespec *)
     int clock_gettime(clockid_t, timespec *)
diff --git a/contrib/tools/cython/Cython/Includes/posix/types.pxd b/contrib/tools/cython/Cython/Includes/posix/types.pxd
index 2a5434e3bd..308f2954ee 100644
--- a/contrib/tools/cython/Cython/Includes/posix/types.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/types.pxd
@@ -11,20 +11,20 @@
 # type).
 
 cdef extern from "<sys/types.h>":
-    ctypedef long blkcnt_t 
-    ctypedef long blksize_t 
-    ctypedef long clockid_t 
-    ctypedef long dev_t 
-    ctypedef long gid_t 
-    ctypedef long id_t 
+    ctypedef long blkcnt_t
+    ctypedef long blksize_t
+    ctypedef long clockid_t
+    ctypedef long dev_t
+    ctypedef long gid_t
+    ctypedef long id_t
     ctypedef unsigned long ino_t
-    ctypedef long mode_t 
-    ctypedef long nlink_t 
-    ctypedef long off_t 
-    ctypedef long pid_t 
+    ctypedef long mode_t
+    ctypedef long nlink_t
+    ctypedef long off_t
+    ctypedef long pid_t
     ctypedef struct sigset_t:
         pass
-    ctypedef long suseconds_t 
-    ctypedef long time_t 
-    ctypedef long timer_t 
-    ctypedef long uid_t 
+    ctypedef long suseconds_t
+    ctypedef long time_t
+    ctypedef long timer_t
+    ctypedef long uid_t
diff --git a/contrib/tools/cython/Cython/Includes/posix/unistd.pxd b/contrib/tools/cython/Cython/Includes/posix/unistd.pxd
index 7d1299c745..1afeca3854 100644
--- a/contrib/tools/cython/Cython/Includes/posix/unistd.pxd
+++ b/contrib/tools/cython/Cython/Includes/posix/unistd.pxd
@@ -1,271 +1,271 @@
-# http://www.opengroup.org/onlinepubs/009695399/basedefs/unistd.h.html 
- 
-from posix.types cimport gid_t, pid_t, off_t, uid_t 
- 
+# http://www.opengroup.org/onlinepubs/009695399/basedefs/unistd.h.html
+
+from posix.types cimport gid_t, pid_t, off_t, uid_t
+
 cdef extern from "<unistd.h>" nogil:
- 
-    #:NULL 
- 
-    enum: R_OK 
-    enum: W_OK 
-    enum: X_OK 
-    enum: F_OK 
- 
-    enum: _CS_PATH 
-    enum: _CS_POSIX_V6_ILP32_OFF32_CFLAGS 
-    enum: _CS_POSIX_V6_ILP32_OFF32_LDFLAGS 
-    enum: _CS_POSIX_V6_ILP32_OFF32_LIBS 
-    enum: _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS 
-    enum: _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS 
-    enum: _CS_POSIX_V6_ILP32_OFFBIG_LIBS 
-    enum: _CS_POSIX_V6_LP64_OFF64_CFLAGS 
-    enum: _CS_POSIX_V6_LP64_OFF64_LDFLAGS 
-    enum: _CS_POSIX_V6_LP64_OFF64_LIBS 
-    enum: _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS 
-    enum: _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS 
-    enum: _CS_POSIX_V6_LPBIG_OFFBIG_LIBS 
-    enum: _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS 
- 
-    enum: SEEK_SET 
-    enum: SEEK_CUR 
-    enum: SEEK_END 
- 
-    enum: F_LOCK 
-    enum: F_TEST 
-    enum: F_TLOCK 
-    enum: F_ULOCK 
- 
-    enum: _PC_2_SYMLINKS 
-    enum: _PC_ALLOC_SIZE_MIN 
-    enum: _PC_ASYNC_IO 
-    enum: _PC_CHOWN_RESTRICTED 
-    enum: _PC_FILESIZEBITS 
-    enum: _PC_LINK_MAX 
-    enum: _PC_MAX_CANON 
-    enum: _PC_MAX_INPUT 
-    enum: _PC_NAME_MAX 
-    enum: _PC_NO_TRUNC 
-    enum: _PC_PATH_MAX 
-    enum: _PC_PIPE_BUF 
-    enum: _PC_PRIO_IO 
-    enum: _PC_REC_INCR_XFER_SIZE 
-    enum: _PC_REC_MIN_XFER_SIZE 
-    enum: _PC_REC_XFER_ALIGN 
-    enum: _PC_SYMLINK_MAX 
-    enum: _PC_SYNC_IO 
-    enum: _PC_VDISABLE 
- 
-    enum: _SC_2_C_BIND 
-    enum: _SC_2_C_DEV 
-    enum: _SC_2_CHAR_TERM 
-    enum: _SC_2_FORT_DEV 
-    enum: _SC_2_FORT_RUN 
-    enum: _SC_2_LOCALEDEF 
-    enum: _SC_2_PBS 
-    enum: _SC_2_PBS_ACCOUNTING 
-    enum: _SC_2_PBS_CHECKPOINT 
-    enum: _SC_2_PBS_LOCATE 
-    enum: _SC_2_PBS_MESSAGE 
-    enum: _SC_2_PBS_TRACK 
-    enum: _SC_2_SW_DEV 
-    enum: _SC_2_UPE 
-    enum: _SC_2_VERSION 
-    enum: _SC_ADVISORY_INFO 
-    enum: _SC_AIO_LISTIO_MAX 
-    enum: _SC_AIO_MAX 
-    enum: _SC_AIO_PRIO_DELTA_MAX 
-    enum: _SC_ARG_MAX 
-    enum: _SC_ASYNCHRONOUS_IO 
-    enum: _SC_ATEXIT_MAX 
-    enum: _SC_BARRIERS 
-    enum: _SC_BC_BASE_MAX 
-    enum: _SC_BC_DIM_MAX 
-    enum: _SC_BC_SCALE_MAX 
-    enum: _SC_BC_STRING_MAX 
-    enum: _SC_CHILD_MAX 
-    enum: _SC_CLK_TCK 
-    enum: _SC_CLOCK_SELECTION 
-    enum: _SC_COLL_WEIGHTS_MAX 
-    enum: _SC_CPUTIME 
-    enum: _SC_DELAYTIMER_MAX 
-    enum: _SC_EXPR_NEST_MAX 
-    enum: _SC_FSYNC 
-    enum: _SC_GETGR_R_SIZE_MAX 
-    enum: _SC_GETPW_R_SIZE_MAX 
-    enum: _SC_HOST_NAME_MAX 
-    enum: _SC_IOV_MAX 
-    enum: _SC_IPV6 
-    enum: _SC_JOB_CONTROL 
-    enum: _SC_LINE_MAX 
-    enum: _SC_LOGIN_NAME_MAX 
-    enum: _SC_MAPPED_FILES 
-    enum: _SC_MEMLOCK 
-    enum: _SC_MEMLOCK_RANGE 
-    enum: _SC_MEMORY_PROTECTION 
-    enum: _SC_MESSAGE_PASSING 
-    enum: _SC_MONOTONIC_CLOCK 
-    enum: _SC_MQ_OPEN_MAX 
-    enum: _SC_MQ_PRIO_MAX 
-    enum: _SC_NGROUPS_MAX 
-    enum: _SC_OPEN_MAX 
-    enum: _SC_PAGE_SIZE 
-    enum: _SC_PAGESIZE 
-    enum: _SC_PRIORITIZED_IO 
-    enum: _SC_PRIORITY_SCHEDULING 
-    enum: _SC_RAW_SOCKETS 
-    enum: _SC_RE_DUP_MAX 
-    enum: _SC_READER_WRITER_LOCKS 
-    enum: _SC_REALTIME_SIGNALS 
-    enum: _SC_REGEXP 
-    enum: _SC_RTSIG_MAX 
-    enum: _SC_SAVED_IDS 
-    enum: _SC_SEM_NSEMS_MAX 
-    enum: _SC_SEM_VALUE_MAX 
-    enum: _SC_SEMAPHORES 
-    enum: _SC_SHARED_MEMORY_OBJECTS 
-    enum: _SC_SHELL 
-    enum: _SC_SIGQUEUE_MAX 
-    enum: _SC_SPAWN 
-    enum: _SC_SPIN_LOCKS 
-    enum: _SC_SPORADIC_SERVER 
-    enum: _SC_SS_REPL_MAX 
-    enum: _SC_STREAM_MAX 
-    enum: _SC_SYMLOOP_MAX 
-    enum: _SC_SYNCHRONIZED_IO 
-    enum: _SC_THREAD_ATTR_STACKADDR 
-    enum: _SC_THREAD_ATTR_STACKSIZE 
-    enum: _SC_THREAD_CPUTIME 
-    enum: _SC_THREAD_DESTRUCTOR_ITERATIONS 
-    enum: _SC_THREAD_KEYS_MAX 
-    enum: _SC_THREAD_PRIO_INHERIT 
-    enum: _SC_THREAD_PRIO_PROTECT 
-    enum: _SC_THREAD_PRIORITY_SCHEDULING 
-    enum: _SC_THREAD_PROCESS_SHARED 
-    enum: _SC_THREAD_SAFE_FUNCTIONS 
-    enum: _SC_THREAD_SPORADIC_SERVER 
-    enum: _SC_THREAD_STACK_MIN 
-    enum: _SC_THREAD_THREADS_MAX 
-    enum: _SC_THREADS 
-    enum: _SC_TIMEOUTS 
-    enum: _SC_TIMER_MAX 
-    enum: _SC_TIMERS 
-    enum: _SC_TRACE 
-    enum: _SC_TRACE_EVENT_FILTER 
-    enum: _SC_TRACE_EVENT_NAME_MAX 
-    enum: _SC_TRACE_INHERIT 
-    enum: _SC_TRACE_LOG 
-    enum: _SC_TRACE_NAME_MAX 
-    enum: _SC_TRACE_SYS_MAX 
-    enum: _SC_TRACE_USER_EVENT_MAX 
-    enum: _SC_TTY_NAME_MAX 
-    enum: _SC_TYPED_MEMORY_OBJECTS 
-    enum: _SC_TZNAME_MAX 
-    enum: _SC_V6_ILP32_OFF32 
-    enum: _SC_V6_ILP32_OFFBIG 
-    enum: _SC_V6_LP64_OFF64 
-    enum: _SC_V6_LPBIG_OFFBIG 
-    enum: _SC_VERSION 
-    enum: _SC_XBS5_ILP32_OFF32 
-    enum: _SC_XBS5_ILP32_OFFBIG 
-    enum: _SC_XBS5_LP64_OFF64 
-    enum: _SC_XBS5_LPBIG_OFFBIG 
-    enum: _SC_XOPEN_CRYPT 
-    enum: _SC_XOPEN_ENH_I18N 
-    enum: _SC_XOPEN_LEGACY 
-    enum: _SC_XOPEN_REALTIME 
-    enum: _SC_XOPEN_REALTIME_THREADS 
-    enum: _SC_XOPEN_SHM 
-    enum: _SC_XOPEN_STREAMS 
-    enum: _SC_XOPEN_UNIX 
-    enum: _SC_XOPEN_VERSION 
- 
-    enum: STDIN_FILENO	#0 
-    enum: STDOUT_FILENO	#1 
-    enum: STDERR_FILENO	#2 
- 
-    ctypedef unsigned useconds_t 
- 
-    int          access(const char *, int) 
-    unsigned     alarm(unsigned) 
-    int          chdir(const char *) 
-    int          chown(const char *, uid_t, gid_t) 
-    int          close(int) 
-    size_t       confstr(int, char *, size_t) 
-    char        *crypt(const char *, const char *) 
-    char        *ctermid(char *) 
-    int          dup(int) 
-    int          dup2(int, int) 
-    void         encrypt(char[64], int) 
-    int          execl(const char *, const char *, ...) 
-    int          execle(const char *, const char *, ...) 
-    int          execlp(const char *, const char *, ...) 
-    int          execv(const char *, char *[]) 
-    int          execve(const char *, char *[], char *[]) 
-    int          execvp(const char *, char *[]) 
-    void        _exit(int) 
-    int          fchown(int, uid_t, gid_t) 
-    int          fchdir(int) 
-    int          fdatasync(int) 
-    pid_t        fork() 
-    long         fpathconf(int, int) 
-    int          fsync(int) 
-    int          ftruncate(int, off_t) 
-    char        *getcwd(char *, size_t) 
-    gid_t        getegid() 
-    uid_t        geteuid() 
-    gid_t        getgid() 
-    int          getgroups(int, gid_t []) 
-    long         gethostid() 
-    int          gethostname(char *, size_t) 
-    char        *getlogin() 
-    int          getlogin_r(char *, size_t) 
-    int          getopt(int, char * [], const char *) 
-    pid_t        getpgid(pid_t) 
-    pid_t        getpgrp() 
-    pid_t        getpid() 
-    pid_t        getppid() 
-    pid_t        getsid(pid_t) 
-    uid_t        getuid() 
-    char        *getwd(char *) 
-    int          isatty(int) 
-    int          lchown(const char *, uid_t, gid_t) 
-    int          link(const char *, const char *) 
-    int          lockf(int, int, off_t) 
-    off_t        lseek(int, off_t, int) 
-    int          nice(int) 
-    long         pathconf(char *, int) 
-    int          pause() 
-    int          pipe(int [2]) 
-    ssize_t      pread(int, void *, size_t, off_t) 
-    ssize_t      pwrite(int, const void *, size_t, off_t) 
-    ssize_t      read(int, void *, size_t) 
-    ssize_t      readlink(const char *, char *, size_t) 
-    int          rmdir(const char *) 
-    int          setegid(gid_t) 
-    int          seteuid(uid_t) 
-    int          setgid(gid_t) 
-    int          setpgid(pid_t, pid_t) 
-    pid_t        setpgrp() 
-    int          setregid(gid_t, gid_t) 
-    int          setreuid(uid_t, uid_t) 
-    pid_t        setsid() 
-    int          setuid(uid_t) 
-    unsigned     sleep(unsigned) 
-    void         swab(const void *, void *, ssize_t) 
-    int          symlink(const char *, const char *) 
-    void         sync() 
-    long         sysconf(int) 
-    pid_t        tcgetpgrp(int) 
-    int          tcsetpgrp(int, pid_t) 
-    int          truncate(const char *, off_t) 
-    char        *ttyname(int) 
-    int          ttyname_r(int, char *, size_t) 
-    useconds_t   ualarm(useconds_t, useconds_t) 
-    int          unlink(const char *) 
-    int          usleep(useconds_t) 
-    pid_t        vfork() 
-    ssize_t      write(int, const void *, size_t) 
-    char         *optarg 
-    int          optind 
-    int          opterr 
-    int          optopt 
+
+    #:NULL
+
+    enum: R_OK
+    enum: W_OK
+    enum: X_OK
+    enum: F_OK
+
+    enum: _CS_PATH
+    enum: _CS_POSIX_V6_ILP32_OFF32_CFLAGS
+    enum: _CS_POSIX_V6_ILP32_OFF32_LDFLAGS
+    enum: _CS_POSIX_V6_ILP32_OFF32_LIBS
+    enum: _CS_POSIX_V6_ILP32_OFFBIG_CFLAGS
+    enum: _CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS
+    enum: _CS_POSIX_V6_ILP32_OFFBIG_LIBS
+    enum: _CS_POSIX_V6_LP64_OFF64_CFLAGS
+    enum: _CS_POSIX_V6_LP64_OFF64_LDFLAGS
+    enum: _CS_POSIX_V6_LP64_OFF64_LIBS
+    enum: _CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS
+    enum: _CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS
+    enum: _CS_POSIX_V6_LPBIG_OFFBIG_LIBS
+    enum: _CS_POSIX_V6_WIDTH_RESTRICTED_ENVS
+
+    enum: SEEK_SET
+    enum: SEEK_CUR
+    enum: SEEK_END
+
+    enum: F_LOCK
+    enum: F_TEST
+    enum: F_TLOCK
+    enum: F_ULOCK
+
+    enum: _PC_2_SYMLINKS
+    enum: _PC_ALLOC_SIZE_MIN
+    enum: _PC_ASYNC_IO
+    enum: _PC_CHOWN_RESTRICTED
+    enum: _PC_FILESIZEBITS
+    enum: _PC_LINK_MAX
+    enum: _PC_MAX_CANON
+    enum: _PC_MAX_INPUT
+    enum: _PC_NAME_MAX
+    enum: _PC_NO_TRUNC
+    enum: _PC_PATH_MAX
+    enum: _PC_PIPE_BUF
+    enum: _PC_PRIO_IO
+    enum: _PC_REC_INCR_XFER_SIZE
+    enum: _PC_REC_MIN_XFER_SIZE
+    enum: _PC_REC_XFER_ALIGN
+    enum: _PC_SYMLINK_MAX
+    enum: _PC_SYNC_IO
+    enum: _PC_VDISABLE
+
+    enum: _SC_2_C_BIND
+    enum: _SC_2_C_DEV
+    enum: _SC_2_CHAR_TERM
+    enum: _SC_2_FORT_DEV
+    enum: _SC_2_FORT_RUN
+    enum: _SC_2_LOCALEDEF
+    enum: _SC_2_PBS
+    enum: _SC_2_PBS_ACCOUNTING
+    enum: _SC_2_PBS_CHECKPOINT
+    enum: _SC_2_PBS_LOCATE
+    enum: _SC_2_PBS_MESSAGE
+    enum: _SC_2_PBS_TRACK
+    enum: _SC_2_SW_DEV
+    enum: _SC_2_UPE
+    enum: _SC_2_VERSION
+    enum: _SC_ADVISORY_INFO
+    enum: _SC_AIO_LISTIO_MAX
+    enum: _SC_AIO_MAX
+    enum: _SC_AIO_PRIO_DELTA_MAX
+    enum: _SC_ARG_MAX
+    enum: _SC_ASYNCHRONOUS_IO
+    enum: _SC_ATEXIT_MAX
+    enum: _SC_BARRIERS
+    enum: _SC_BC_BASE_MAX
+    enum: _SC_BC_DIM_MAX
+    enum: _SC_BC_SCALE_MAX
+    enum: _SC_BC_STRING_MAX
+    enum: _SC_CHILD_MAX
+    enum: _SC_CLK_TCK
+    enum: _SC_CLOCK_SELECTION
+    enum: _SC_COLL_WEIGHTS_MAX
+    enum: _SC_CPUTIME
+    enum: _SC_DELAYTIMER_MAX
+    enum: _SC_EXPR_NEST_MAX
+    enum: _SC_FSYNC
+    enum: _SC_GETGR_R_SIZE_MAX
+    enum: _SC_GETPW_R_SIZE_MAX
+    enum: _SC_HOST_NAME_MAX
+    enum: _SC_IOV_MAX
+    enum: _SC_IPV6
+    enum: _SC_JOB_CONTROL
+    enum: _SC_LINE_MAX
+    enum: _SC_LOGIN_NAME_MAX
+    enum: _SC_MAPPED_FILES
+    enum: _SC_MEMLOCK
+    enum: _SC_MEMLOCK_RANGE
+    enum: _SC_MEMORY_PROTECTION
+    enum: _SC_MESSAGE_PASSING
+    enum: _SC_MONOTONIC_CLOCK
+    enum: _SC_MQ_OPEN_MAX
+    enum: _SC_MQ_PRIO_MAX
+    enum: _SC_NGROUPS_MAX
+    enum: _SC_OPEN_MAX
+    enum: _SC_PAGE_SIZE
+    enum: _SC_PAGESIZE
+    enum: _SC_PRIORITIZED_IO
+    enum: _SC_PRIORITY_SCHEDULING
+    enum: _SC_RAW_SOCKETS
+    enum: _SC_RE_DUP_MAX
+    enum: _SC_READER_WRITER_LOCKS
+    enum: _SC_REALTIME_SIGNALS
+    enum: _SC_REGEXP
+    enum: _SC_RTSIG_MAX
+    enum: _SC_SAVED_IDS
+    enum: _SC_SEM_NSEMS_MAX
+    enum: _SC_SEM_VALUE_MAX
+    enum: _SC_SEMAPHORES
+    enum: _SC_SHARED_MEMORY_OBJECTS
+    enum: _SC_SHELL
+    enum: _SC_SIGQUEUE_MAX
+    enum: _SC_SPAWN
+    enum: _SC_SPIN_LOCKS
+    enum: _SC_SPORADIC_SERVER
+    enum: _SC_SS_REPL_MAX
+    enum: _SC_STREAM_MAX
+    enum: _SC_SYMLOOP_MAX
+    enum: _SC_SYNCHRONIZED_IO
+    enum: _SC_THREAD_ATTR_STACKADDR
+    enum: _SC_THREAD_ATTR_STACKSIZE
+    enum: _SC_THREAD_CPUTIME
+    enum: _SC_THREAD_DESTRUCTOR_ITERATIONS
+    enum: _SC_THREAD_KEYS_MAX
+    enum: _SC_THREAD_PRIO_INHERIT
+    enum: _SC_THREAD_PRIO_PROTECT
+    enum: _SC_THREAD_PRIORITY_SCHEDULING
+    enum: _SC_THREAD_PROCESS_SHARED
+    enum: _SC_THREAD_SAFE_FUNCTIONS
+    enum: _SC_THREAD_SPORADIC_SERVER
+    enum: _SC_THREAD_STACK_MIN
+    enum: _SC_THREAD_THREADS_MAX
+    enum: _SC_THREADS
+    enum: _SC_TIMEOUTS
+    enum: _SC_TIMER_MAX
+    enum: _SC_TIMERS
+    enum: _SC_TRACE
+    enum: _SC_TRACE_EVENT_FILTER
+    enum: _SC_TRACE_EVENT_NAME_MAX
+    enum: _SC_TRACE_INHERIT
+    enum: _SC_TRACE_LOG
+    enum: _SC_TRACE_NAME_MAX
+    enum: _SC_TRACE_SYS_MAX
+    enum: _SC_TRACE_USER_EVENT_MAX
+    enum: _SC_TTY_NAME_MAX
+    enum: _SC_TYPED_MEMORY_OBJECTS
+    enum: _SC_TZNAME_MAX
+    enum: _SC_V6_ILP32_OFF32
+    enum: _SC_V6_ILP32_OFFBIG
+    enum: _SC_V6_LP64_OFF64
+    enum: _SC_V6_LPBIG_OFFBIG
+    enum: _SC_VERSION
+    enum: _SC_XBS5_ILP32_OFF32
+    enum: _SC_XBS5_ILP32_OFFBIG
+    enum: _SC_XBS5_LP64_OFF64
+    enum: _SC_XBS5_LPBIG_OFFBIG
+    enum: _SC_XOPEN_CRYPT
+    enum: _SC_XOPEN_ENH_I18N
+    enum: _SC_XOPEN_LEGACY
+    enum: _SC_XOPEN_REALTIME
+    enum: _SC_XOPEN_REALTIME_THREADS
+    enum: _SC_XOPEN_SHM
+    enum: _SC_XOPEN_STREAMS
+    enum: _SC_XOPEN_UNIX
+    enum: _SC_XOPEN_VERSION
+
+    enum: STDIN_FILENO	#0
+    enum: STDOUT_FILENO	#1
+    enum: STDERR_FILENO	#2
+
+    ctypedef unsigned useconds_t
+
+    int          access(const char *, int)
+    unsigned     alarm(unsigned)
+    int          chdir(const char *)
+    int          chown(const char *, uid_t, gid_t)
+    int          close(int)
+    size_t       confstr(int, char *, size_t)
+    char        *crypt(const char *, const char *)
+    char        *ctermid(char *)
+    int          dup(int)
+    int          dup2(int, int)
+    void         encrypt(char[64], int)
+    int          execl(const char *, const char *, ...)
+    int          execle(const char *, const char *, ...)
+    int          execlp(const char *, const char *, ...)
+    int          execv(const char *, char *[])
+    int          execve(const char *, char *[], char *[])
+    int          execvp(const char *, char *[])
+    void        _exit(int)
+    int          fchown(int, uid_t, gid_t)
+    int          fchdir(int)
+    int          fdatasync(int)
+    pid_t        fork()
+    long         fpathconf(int, int)
+    int          fsync(int)
+    int          ftruncate(int, off_t)
+    char        *getcwd(char *, size_t)
+    gid_t        getegid()
+    uid_t        geteuid()
+    gid_t        getgid()
+    int          getgroups(int, gid_t [])
+    long         gethostid()
+    int          gethostname(char *, size_t)
+    char        *getlogin()
+    int          getlogin_r(char *, size_t)
+    int          getopt(int, char * [], const char *)
+    pid_t        getpgid(pid_t)
+    pid_t        getpgrp()
+    pid_t        getpid()
+    pid_t        getppid()
+    pid_t        getsid(pid_t)
+    uid_t        getuid()
+    char        *getwd(char *)
+    int          isatty(int)
+    int          lchown(const char *, uid_t, gid_t)
+    int          link(const char *, const char *)
+    int          lockf(int, int, off_t)
+    off_t        lseek(int, off_t, int)
+    int          nice(int)
+    long         pathconf(char *, int)
+    int          pause()
+    int          pipe(int [2])
+    ssize_t      pread(int, void *, size_t, off_t)
+    ssize_t      pwrite(int, const void *, size_t, off_t)
+    ssize_t      read(int, void *, size_t)
+    ssize_t      readlink(const char *, char *, size_t)
+    int          rmdir(const char *)
+    int          setegid(gid_t)
+    int          seteuid(uid_t)
+    int          setgid(gid_t)
+    int          setpgid(pid_t, pid_t)
+    pid_t        setpgrp()
+    int          setregid(gid_t, gid_t)
+    int          setreuid(uid_t, uid_t)
+    pid_t        setsid()
+    int          setuid(uid_t)
+    unsigned     sleep(unsigned)
+    void         swab(const void *, void *, ssize_t)
+    int          symlink(const char *, const char *)
+    void         sync()
+    long         sysconf(int)
+    pid_t        tcgetpgrp(int)
+    int          tcsetpgrp(int, pid_t)
+    int          truncate(const char *, off_t)
+    char        *ttyname(int)
+    int          ttyname_r(int, char *, size_t)
+    useconds_t   ualarm(useconds_t, useconds_t)
+    int          unlink(const char *)
+    int          usleep(useconds_t)
+    pid_t        vfork()
+    ssize_t      write(int, const void *, size_t)
+    char         *optarg
+    int          optind
+    int          opterr
+    int          optopt
diff --git a/contrib/tools/cython/Cython/Plex/Actions.pxd b/contrib/tools/cython/Cython/Plex/Actions.pxd
index 595e932181..34660a2d9b 100644
--- a/contrib/tools/cython/Cython/Plex/Actions.pxd
+++ b/contrib/tools/cython/Cython/Plex/Actions.pxd
@@ -1,25 +1,25 @@
- 
-cdef class Action: 
-  cdef perform(self, token_stream, text) 
-  cpdef same_as(self, other) 
- 
-cdef class Return(Action): 
-  cdef object value 
-  cdef perform(self, token_stream, text) 
-  cpdef same_as(self, other) 
- 
-cdef class Call(Action): 
-  cdef object function 
-  cdef perform(self, token_stream, text) 
-  cpdef same_as(self, other) 
- 
-cdef class Begin(Action): 
-  cdef object state_name 
-  cdef perform(self, token_stream, text) 
-  cpdef same_as(self, other) 
- 
-cdef class Ignore(Action): 
-  cdef perform(self, token_stream, text) 
- 
-cdef class Text(Action): 
-  cdef perform(self, token_stream, text) 
+
+cdef class Action:
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Return(Action):
+  cdef object value
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Call(Action):
+  cdef object function
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Begin(Action):
+  cdef object state_name
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Ignore(Action):
+  cdef perform(self, token_stream, text)
+
+cdef class Text(Action):
+  cdef perform(self, token_stream, text)
diff --git a/contrib/tools/cython/Cython/Plex/Actions.py b/contrib/tools/cython/Cython/Plex/Actions.py
index f372352443..c88176e716 100644
--- a/contrib/tools/cython/Cython/Plex/Actions.py
+++ b/contrib/tools/cython/Cython/Plex/Actions.py
@@ -1,110 +1,110 @@
 # cython: auto_pickle=False
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#   Actions for use in token specifications 
-# 
-#======================================================================= 
- 
-class Action(object): 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Actions for use in token specifications
+#
+#=======================================================================
+
+class Action(object):
     def perform(self, token_stream, text):
         pass  # abstract
- 
+
     def same_as(self, other):
         return self is other
- 
- 
-class Return(Action): 
+
+
+class Return(Action):
     """
     Internal Plex action which causes |value| to
     be returned as the value of the associated token
     """
- 
+
     def __init__(self, value):
         self.value = value
- 
+
     def perform(self, token_stream, text):
         return self.value
- 
+
     def same_as(self, other):
         return isinstance(other, Return) and self.value == other.value
- 
+
     def __repr__(self):
         return "Return(%s)" % repr(self.value)
- 
- 
-class Call(Action): 
+
+
+class Call(Action):
     """
     Internal Plex action which causes a function to be called.
     """
- 
+
     def __init__(self, function):
         self.function = function
- 
+
     def perform(self, token_stream, text):
         return self.function(token_stream, text)
- 
+
     def __repr__(self):
         return "Call(%s)" % self.function.__name__
- 
+
     def same_as(self, other):
         return isinstance(other, Call) and self.function is other.function
- 
- 
-class Begin(Action): 
+
+
+class Begin(Action):
     """
     Begin(state_name) is a Plex action which causes the Scanner to
     enter the state |state_name|. See the docstring of Plex.Lexicon
     for more information.
     """
- 
+
     def __init__(self, state_name):
         self.state_name = state_name
- 
+
     def perform(self, token_stream, text):
         token_stream.begin(self.state_name)
- 
+
     def __repr__(self):
         return "Begin(%s)" % self.state_name
- 
+
     def same_as(self, other):
         return isinstance(other, Begin) and self.state_name == other.state_name
- 
- 
-class Ignore(Action): 
+
+
+class Ignore(Action):
     """
     IGNORE is a Plex action which causes its associated token
     to be ignored. See the docstring of Plex.Lexicon  for more
     information.
     """
- 
+
     def perform(self, token_stream, text):
         return None
- 
+
     def __repr__(self):
         return "IGNORE"
 
 
-IGNORE = Ignore() 
-#IGNORE.__doc__ = Ignore.__doc__ 
- 
+IGNORE = Ignore()
+#IGNORE.__doc__ = Ignore.__doc__
+
 
-class Text(Action): 
+class Text(Action):
     """
     TEXT is a Plex action which causes the text of a token to
     be returned as the value of the token. See the docstring of
     Plex.Lexicon  for more information.
     """
- 
+
     def perform(self, token_stream, text):
         return text
- 
+
     def __repr__(self):
         return "TEXT"
- 
 
-TEXT = Text() 
-#TEXT.__doc__ = Text.__doc__ 
- 
- 
+
+TEXT = Text()
+#TEXT.__doc__ = Text.__doc__
+
+
diff --git a/contrib/tools/cython/Cython/Plex/DFA.py b/contrib/tools/cython/Cython/Plex/DFA.py
index 478eddc2ce..76324621fc 100644
--- a/contrib/tools/cython/Cython/Plex/DFA.py
+++ b/contrib/tools/cython/Cython/Plex/DFA.py
@@ -1,18 +1,18 @@
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#   Converting NFA to DFA 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-from . import Machines 
-from .Machines import LOWEST_PRIORITY 
-from .Transitions import TransitionMap 
- 
- 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Converting NFA to DFA
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+from . import Machines
+from .Machines import LOWEST_PRIORITY
+from .Transitions import TransitionMap
+
+
 def nfa_to_dfa(old_machine, debug=None):
     """
     Given a nondeterministic Machine, return a new equivalent
@@ -49,9 +49,9 @@ def nfa_to_dfa(old_machine, debug=None):
         debug.write("\n===== State Mapping =====\n")
         state_map.dump(debug)
     return new_machine
- 
 
-def set_epsilon_closure(state_set): 
+
+def set_epsilon_closure(state_set):
     """
     Given a set of states, return the union of the epsilon
     closures of its member states.
@@ -61,9 +61,9 @@ def set_epsilon_closure(state_set):
         for state2 in epsilon_closure(state1):
             result[state2] = 1
     return result
- 
 
-def epsilon_closure(state): 
+
+def epsilon_closure(state):
     """
     Return the set of states reachable from the given state
     by epsilon moves.
@@ -75,9 +75,9 @@ def epsilon_closure(state):
         state.epsilon_closure = result
         add_to_epsilon_closure(result, state)
     return result
- 
 
-def add_to_epsilon_closure(state_set, state): 
+
+def add_to_epsilon_closure(state_set, state):
     """
     Recursively add to |state_set| states reachable from the given state
     by epsilon moves.
@@ -88,22 +88,22 @@ def add_to_epsilon_closure(state_set, state):
         if state_set_2:
             for state2 in state_set_2:
                 add_to_epsilon_closure(state_set, state2)
- 
 
-class StateMap(object): 
-    """ 
+
+class StateMap(object):
+    """
     Helper class used by nfa_to_dfa() to map back and forth between
     sets of states from the old machine and states of the new machine.
-    """ 
+    """
     new_machine = None      # Machine
     old_to_new_dict = None  # {(old_state,...) : new_state}
     new_to_old_dict = None  # {id(new_state) : old_state_set}
- 
+
     def __init__(self, new_machine):
         self.new_machine = new_machine
         self.old_to_new_dict = {}
         self.new_to_old_dict = {}
- 
+
     def old_to_new(self, old_state_set):
         """
         Return the state of the new machine corresponding to the
@@ -122,7 +122,7 @@ class StateMap(object):
             #for old_state in old_state_set.keys():
             #new_state.merge_actions(old_state)
         return new_state
- 
+
     def highest_priority_action(self, state_set):
         best_action = None
         best_priority = LOWEST_PRIORITY
@@ -132,18 +132,18 @@ class StateMap(object):
                 best_action = state.action
                 best_priority = priority
         return best_action
- 
+
     #    def old_to_new_set(self, old_state_set):
     #        """
     #        Return the new state corresponding to a set of old states as
     #        a singleton set.
     #        """
     #        return {self.old_to_new(old_state_set):1}
- 
+
     def new_to_old(self, new_state):
         """Given a new state, return a set of corresponding old states."""
         return self.new_to_old_dict[id(new_state)]
- 
+
     def make_key(self, state_set):
         """
         Convert a set of states into a uniquified
@@ -152,7 +152,7 @@ class StateMap(object):
         lst = list(state_set)
         lst.sort()
         return tuple(lst)
- 
+
     def dump(self, file):
         from .Transitions import state_set_str
 
diff --git a/contrib/tools/cython/Cython/Plex/Errors.py b/contrib/tools/cython/Cython/Plex/Errors.py
index b375bb528b..f460100d77 100644
--- a/contrib/tools/cython/Cython/Plex/Errors.py
+++ b/contrib/tools/cython/Cython/Plex/Errors.py
@@ -1,54 +1,54 @@
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#   Exception classes 
-# 
-#======================================================================= 
- 
-
-class PlexError(Exception): 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Exception classes
+#
+#=======================================================================
+
+
+class PlexError(Exception):
     message = ""
- 
 
-class PlexTypeError(PlexError, TypeError): 
+
+class PlexTypeError(PlexError, TypeError):
     pass
- 
 
-class PlexValueError(PlexError, ValueError): 
+
+class PlexValueError(PlexError, ValueError):
     pass
- 
 
-class InvalidRegex(PlexError): 
+
+class InvalidRegex(PlexError):
     pass
- 
 
-class InvalidToken(PlexError): 
+
+class InvalidToken(PlexError):
     def __init__(self, token_number, message):
         PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
- 
- 
-class InvalidScanner(PlexError): 
+
+
+class InvalidScanner(PlexError):
     pass
- 
 
-class AmbiguousAction(PlexError): 
+
+class AmbiguousAction(PlexError):
     message = "Two tokens with different actions can match the same string"
- 
+
     def __init__(self):
         pass
- 
 
-class UnrecognizedInput(PlexError): 
+
+class UnrecognizedInput(PlexError):
     scanner = None
     position = None
     state_name = None
- 
+
     def __init__(self, scanner, state_name):
         self.scanner = scanner
         self.position = scanner.get_position()
         self.state_name = state_name
- 
+
     def __str__(self):
         return ("'%s', line %d, char %d: Token not recognised in state %r" % (
             self.position + (self.state_name,)))
diff --git a/contrib/tools/cython/Cython/Plex/Lexicons.py b/contrib/tools/cython/Cython/Plex/Lexicons.py
index e163caef41..787f5854b8 100644
--- a/contrib/tools/cython/Cython/Plex/Lexicons.py
+++ b/contrib/tools/cython/Cython/Plex/Lexicons.py
@@ -1,125 +1,125 @@
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#   Lexical Analyser Specification 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-import types 
- 
-from . import Actions 
-from . import DFA 
-from . import Errors 
-from . import Machines 
-from . import Regexps 
- 
-# debug_flags for Lexicon constructor 
-DUMP_NFA = 1 
-DUMP_DFA = 2 
- 
- 
-class State(object): 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Lexical Analyser Specification
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
+
+from . import Actions
+from . import DFA
+from . import Errors
+from . import Machines
+from . import Regexps
+
+# debug_flags for Lexicon constructor
+DUMP_NFA = 1
+DUMP_DFA = 2
+
+
+class State(object):
     """
     This class is used as part of a Plex.Lexicon specification to
     introduce a user-defined state.
- 
+
     Constructor:
- 
+
        State(name, token_specifications)
     """
- 
+
     name = None
     tokens = None
- 
+
     def __init__(self, name, tokens):
         self.name = name
         self.tokens = tokens
- 
 
-class Lexicon(object): 
+
+class Lexicon(object):
     """
     Lexicon(specification) builds a lexical analyser from the given
     |specification|. The specification consists of a list of
     specification items. Each specification item may be either:
- 
+
        1) A token definition, which is a tuple:
- 
+
              (pattern, action)
- 
+
           The |pattern| is a regular axpression built using the
           constructors defined in the Plex module.
- 
+
           The |action| is the action to be performed when this pattern
           is recognised (see below).
- 
+
        2) A state definition:
- 
+
              State(name, tokens)
- 
+
           where |name| is a character string naming the state,
           and |tokens| is a list of token definitions as
           above. The meaning and usage of states is described
           below.
- 
+
     Actions
     -------
- 
+
     The |action| in a token specication may be one of three things:
- 
+
        1) A function, which is called as follows:
- 
+
              function(scanner, text)
- 
+
           where |scanner| is the relevant Scanner instance, and |text|
           is the matched text. If the function returns anything
           other than None, that value is returned as the value of the
           token. If it returns None, scanning continues as if the IGNORE
           action were specified (see below).
- 
+
         2) One of the following special actions:
- 
+
            IGNORE means that the recognised characters will be treated as
                   white space and ignored. Scanning will continue until
                   the next non-ignored token is recognised before returning.
- 
+
            TEXT   causes the scanned text itself to be returned as the
                   value of the token.
- 
+
         3) Any other value, which is returned as the value of the token.
- 
+
     States
     ------
- 
+
     At any given time, the scanner is in one of a number of states.
     Associated with each state is a set of possible tokens. When scanning,
     only tokens associated with the current state are recognised.
- 
+
     There is a default state, whose name is the empty string. Token
     definitions which are not inside any State definition belong to
     the default state.
- 
+
     The initial state of the scanner is the default state. The state can
     be changed in one of two ways:
- 
+
        1) Using Begin(state_name) as the action of a token.
- 
+
        2) Calling the begin(state_name) method of the Scanner.
- 
+
     To change back to the default state, use '' as the state name.
     """
- 
+
     machine = None  # Machine
     tables = None   # StateTableMachine
- 
+
     def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
         if not isinstance(specifications, list):
             raise Errors.InvalidScanner("Scanner definition is not a list")
         if timings:
             from .Timing import time
- 
+
             total_time = 0.0
             time1 = time()
         nfa = Machines.Machine()
@@ -161,7 +161,7 @@ class Lexicon(object):
         self.machine = dfa
 
     def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
-        try: 
+        try:
             (re, action_spec) = self.parse_token_definition(token_spec)
             # Disabled this -- matching empty strings can be useful
             #if re.nullable:
@@ -182,7 +182,7 @@ class Lexicon(object):
             final_state.set_action(action, priority=-token_number)
         except Errors.PlexError as e:
             raise e.__class__("Token number %d: %s" % (token_number, e))
- 
+
     def parse_token_definition(self, token_spec):
         if not isinstance(token_spec, tuple):
             raise Errors.InvalidToken("Token definition is not a tuple")
@@ -192,9 +192,9 @@ class Lexicon(object):
         if not isinstance(pattern, Regexps.RE):
             raise Errors.InvalidToken("Pattern is not an RE instance")
         return (pattern, action)
- 
+
     def get_initial_state(self, name):
         return self.machine.get_initial_state(name)
- 
- 
- 
+
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Machines.py b/contrib/tools/cython/Cython/Plex/Machines.py
index 6ddcbd5fe9..398850976b 100644
--- a/contrib/tools/cython/Cython/Plex/Machines.py
+++ b/contrib/tools/cython/Cython/Plex/Machines.py
@@ -1,45 +1,45 @@
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#   Classes for building NFAs and DFAs 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-import sys 
- 
-from .Transitions import TransitionMap 
- 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Classes for building NFAs and DFAs
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import sys
+
+from .Transitions import TransitionMap
+
 try:
     from sys import maxsize as maxint
 except ImportError:
     from sys import maxint
- 
+
 try:
     unichr
 except NameError:
     unichr = chr
- 
+
 LOWEST_PRIORITY = -maxint
 
 
-class Machine(object): 
+class Machine(object):
     """A collection of Nodes representing an NFA or DFA."""
     states = None          # [Node]
     next_state_number = 1
     initial_states = None  # {(name, bol): Node}
- 
+
     def __init__(self):
         self.states = []
         self.initial_states = {}
- 
+
     def __del__(self):
         #print "Destroying", self ###
         for state in self.states:
             state.destroy()
- 
+
     def new_state(self):
         """Add a new state to the machine and return it."""
         s = Node()
@@ -48,18 +48,18 @@ class Machine(object):
         s.number = n
         self.states.append(s)
         return s
- 
+
     def new_initial_state(self, name):
         state = self.new_state()
         self.make_initial_state(name, state)
         return state
- 
+
     def make_initial_state(self, name, state):
         self.initial_states[name] = state
- 
+
     def get_initial_state(self, name):
         return self.initial_states[name]
- 
+
     def dump(self, file):
         file.write("Plex.Machine:\n")
         if self.initial_states is not None:
@@ -68,36 +68,36 @@ class Machine(object):
                 file.write("      '%s': %d\n" % (name, state.number))
         for s in self.states:
             s.dump(file)
- 
 
-class Node(object): 
+
+class Node(object):
     """A state of an NFA or DFA."""
     transitions = None      # TransitionMap
     action = None           # Action
     action_priority = None  # integer
     number = 0              # for debug output
     epsilon_closure = None  # used by nfa_to_dfa()
- 
+
     def __init__(self):
         # Preinitialise the list of empty transitions, because
         # the nfa-to-dfa algorithm needs it
         #self.transitions = {'':[]}
         self.transitions = TransitionMap()
         self.action_priority = LOWEST_PRIORITY
- 
+
     def destroy(self):
         #print "Destroying", self ###
         self.transitions = None
         self.action = None
         self.epsilon_closure = None
- 
+
     def add_transition(self, event, new_state):
         self.transitions.add(event, new_state)
- 
+
     def link_to(self, state):
         """Add an epsilon-move from this state to another state."""
         self.add_transition('', state)
- 
+
     def set_action(self, action, priority):
         """Make this an accepting state with the given action. If
         there is already an action, choose the action with highest
@@ -105,19 +105,19 @@ class Node(object):
         if priority > self.action_priority:
             self.action = action
             self.action_priority = priority
- 
+
     def get_action(self):
         return self.action
- 
+
     def get_action_priority(self):
         return self.action_priority
- 
+
     def is_accepting(self):
         return self.action is not None
- 
+
     def __str__(self):
         return "State %d" % self.number
- 
+
     def dump(self, file):
         # Header
         file.write("   State %d:\n" % self.number)
@@ -129,12 +129,12 @@ class Node(object):
         priority = self.action_priority
         if action is not None:
             file.write("      %s [priority %d]\n" % (action, priority))
- 
+
     def __lt__(self, other):
         return self.number < other.number
- 
 
-class FastMachine(object): 
+
+class FastMachine(object):
     """
     FastMachine is a deterministic machine represented in a way that
     allows fast scanning.
@@ -142,19 +142,19 @@ class FastMachine(object):
     initial_states = None  # {state_name:state}
     states = None          # [state]  where state = {event:state, 'else':state, 'action':Action}
     next_number = 1        # for debugging
- 
+
     new_state_template = {
         '': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
     }
- 
+
     def __init__(self):
         self.initial_states = {}
         self.states = []
- 
+
     def __del__(self):
         for state in self.states:
             state.clear()
- 
+
     def new_state(self, action=None):
         number = self.next_number
         self.next_number = number + 1
@@ -163,10 +163,10 @@ class FastMachine(object):
         result['action'] = action
         self.states.append(result)
         return result
- 
+
     def make_initial_state(self, name, state):
         self.initial_states[name] = state
- 
+
     def add_transitions(self, state, event, new_state, maxint=maxint):
         if type(event) is tuple:
             code0, code1 = event
@@ -178,10 +178,10 @@ class FastMachine(object):
                     code0 += 1
         else:
             state[event] = new_state
- 
+
     def get_initial_state(self, name):
         return self.initial_states[name]
- 
+
     def dump(self, file):
         file.write("Plex.FastMachine:\n")
         file.write("   Initial states:\n")
@@ -189,7 +189,7 @@ class FastMachine(object):
             file.write("      %s: %s\n" % (repr(name), state['number']))
         for state in self.states:
             self.dump_state(state, file)
- 
+
     def dump_state(self, state, file):
         # Header
         file.write("   State %d:\n" % state['number'])
@@ -199,7 +199,7 @@ class FastMachine(object):
         action = state['action']
         if action is not None:
             file.write("      %s\n" % action)
- 
+
     def dump_transitions(self, state, file):
         chars_leading_to_state = {}
         special_to_state = {}
@@ -228,7 +228,7 @@ class FastMachine(object):
             state = special_to_state.get(key, None)
             if state:
                 file.write("      %s --> State %d\n" % (key, state['number']))
- 
+
     def chars_to_ranges(self, char_list):
         char_list.sort()
         i = 0
@@ -243,10 +243,10 @@ class FastMachine(object):
                 c2 += 1
             result.append((chr(c1), chr(c2)))
         return tuple(result)
- 
+
     def ranges_to_string(self, range_list):
         return ','.join(map(self.range_to_string, range_list))
- 
+
     def range_to_string(self, range_tuple):
         (c1, c2) = range_tuple
         if c1 == c2:
diff --git a/contrib/tools/cython/Cython/Plex/Regexps.py b/contrib/tools/cython/Cython/Plex/Regexps.py
index 43e5fa3de9..41816c939a 100644
--- a/contrib/tools/cython/Cython/Plex/Regexps.py
+++ b/contrib/tools/cython/Cython/Plex/Regexps.py
@@ -1,576 +1,576 @@
-#======================================================================= 
-# 
-#     Python Lexical Analyser 
-# 
-#     Regular Expressions 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-import types 
+#=======================================================================
+#
+#     Python Lexical Analyser
+#
+#     Regular Expressions
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
 try:
     from sys import maxsize as maxint
 except ImportError:
     from sys import maxint
- 
-from . import Errors 
- 
-# 
-#     Constants 
-# 
- 
-BOL = 'bol' 
-EOL = 'eol' 
-EOF = 'eof' 
- 
-nl_code = ord('\n') 
- 
- 
-# 
-#     Helper functions 
-# 
- 
-def chars_to_ranges(s): 
-    """ 
-    Return a list of character codes consisting of pairs 
-    [code1a, code1b, code2a, code2b,...] which cover all 
-    the characters in |s|. 
-    """ 
-    char_list = list(s) 
-    char_list.sort() 
-    i = 0 
-    n = len(char_list) 
-    result = [] 
-    while i < n: 
-        code1 = ord(char_list[i]) 
-        code2 = code1 + 1 
+
+from . import Errors
+
+#
+#     Constants
+#
+
+BOL = 'bol'
+EOL = 'eol'
+EOF = 'eof'
+
+nl_code = ord('\n')
+
+
+#
+#     Helper functions
+#
+
+def chars_to_ranges(s):
+    """
+    Return a list of character codes consisting of pairs
+    [code1a, code1b, code2a, code2b,...] which cover all
+    the characters in |s|.
+    """
+    char_list = list(s)
+    char_list.sort()
+    i = 0
+    n = len(char_list)
+    result = []
+    while i < n:
+        code1 = ord(char_list[i])
+        code2 = code1 + 1
         i += 1
-        while i < n and code2 >= ord(char_list[i]): 
+        while i < n and code2 >= ord(char_list[i]):
             code2 += 1
             i += 1
-        result.append(code1) 
-        result.append(code2) 
-    return result 
- 
-
-def uppercase_range(code1, code2): 
-    """ 
-    If the range of characters from code1 to code2-1 includes any 
-    lower case letters, return the corresponding upper case range. 
-    """ 
-    code3 = max(code1, ord('a')) 
-    code4 = min(code2, ord('z') + 1) 
-    if code3 < code4: 
-        d = ord('A') - ord('a') 
-        return (code3 + d, code4 + d) 
-    else: 
-        return None 
- 
-
-def lowercase_range(code1, code2): 
-    """ 
-    If the range of characters from code1 to code2-1 includes any 
-    upper case letters, return the corresponding lower case range. 
-    """ 
-    code3 = max(code1, ord('A')) 
-    code4 = min(code2, ord('Z') + 1) 
-    if code3 < code4: 
-        d = ord('a') - ord('A') 
-        return (code3 + d, code4 + d) 
-    else: 
-        return None 
- 
-
-def CodeRanges(code_list): 
-    """ 
-    Given a list of codes as returned by chars_to_ranges, return 
-    an RE which will match a character in any of the ranges. 
-    """ 
+        result.append(code1)
+        result.append(code2)
+    return result
+
+
+def uppercase_range(code1, code2):
+    """
+    If the range of characters from code1 to code2-1 includes any
+    lower case letters, return the corresponding upper case range.
+    """
+    code3 = max(code1, ord('a'))
+    code4 = min(code2, ord('z') + 1)
+    if code3 < code4:
+        d = ord('A') - ord('a')
+        return (code3 + d, code4 + d)
+    else:
+        return None
+
+
+def lowercase_range(code1, code2):
+    """
+    If the range of characters from code1 to code2-1 includes any
+    upper case letters, return the corresponding lower case range.
+    """
+    code3 = max(code1, ord('A'))
+    code4 = min(code2, ord('Z') + 1)
+    if code3 < code4:
+        d = ord('a') - ord('A')
+        return (code3 + d, code4 + d)
+    else:
+        return None
+
+
+def CodeRanges(code_list):
+    """
+    Given a list of codes as returned by chars_to_ranges, return
+    an RE which will match a character in any of the ranges.
+    """
     re_list = [CodeRange(code_list[i], code_list[i + 1]) for i in range(0, len(code_list), 2)]
-    return Alt(*re_list) 
- 
-
-def CodeRange(code1, code2): 
-    """ 
-    CodeRange(code1, code2) is an RE which matches any character 
-    with a code |c| in the range |code1| <= |c| < |code2|. 
-    """ 
-    if code1 <= nl_code < code2: 
-        return Alt(RawCodeRange(code1, nl_code), 
+    return Alt(*re_list)
+
+
+def CodeRange(code1, code2):
+    """
+    CodeRange(code1, code2) is an RE which matches any character
+    with a code |c| in the range |code1| <= |c| < |code2|.
+    """
+    if code1 <= nl_code < code2:
+        return Alt(RawCodeRange(code1, nl_code),
                    RawNewline,
                    RawCodeRange(nl_code + 1, code2))
-    else: 
-        return RawCodeRange(code1, code2) 
- 
-
-# 
-#     Abstract classes 
-# 
- 
-class RE(object): 
-    """RE is the base class for regular expression constructors. 
-    The following operators are defined on REs: 
- 
-         re1 + re2         is an RE which matches |re1| followed by |re2| 
-         re1 | re2         is an RE which matches either |re1| or |re2| 
-    """ 
- 
+    else:
+        return RawCodeRange(code1, code2)
+
+
+#
+#     Abstract classes
+#
+
+class RE(object):
+    """RE is the base class for regular expression constructors.
+    The following operators are defined on REs:
+
+         re1 + re2         is an RE which matches |re1| followed by |re2|
+         re1 | re2         is an RE which matches either |re1| or |re2|
+    """
+
     nullable = 1  # True if this RE can match 0 input symbols
     match_nl = 1  # True if this RE can match a string ending with '\n'
     str = None    # Set to a string to override the class's __str__ result
- 
-    def build_machine(self, machine, initial_state, final_state, 
+
+    def build_machine(self, machine, initial_state, final_state,
                       match_bol, nocase):
-        """ 
-        This method should add states to |machine| to implement this 
-        RE, starting at |initial_state| and ending at |final_state|. 
-        If |match_bol| is true, the RE must be able to match at the 
-        beginning of a line. If nocase is true, upper and lower case 
-        letters should be treated as equivalent. 
-        """ 
-        raise NotImplementedError("%s.build_machine not implemented" % 
+        """
+        This method should add states to |machine| to implement this
+        RE, starting at |initial_state| and ending at |final_state|.
+        If |match_bol| is true, the RE must be able to match at the
+        beginning of a line. If nocase is true, upper and lower case
+        letters should be treated as equivalent.
+        """
+        raise NotImplementedError("%s.build_machine not implemented" %
                                   self.__class__.__name__)
- 
-    def build_opt(self, m, initial_state, c): 
-        """ 
-        Given a state |s| of machine |m|, return a new state 
-        reachable from |s| on character |c| or epsilon. 
-        """ 
-        s = m.new_state() 
-        initial_state.link_to(s) 
-        initial_state.add_transition(c, s) 
-        return s 
- 
-    def __add__(self, other): 
-        return Seq(self, other) 
- 
-    def __or__(self, other): 
-        return Alt(self, other) 
- 
-    def __str__(self): 
-        if self.str: 
-            return self.str 
-        else: 
-            return self.calc_str() 
- 
-    def check_re(self, num, value): 
-        if not isinstance(value, RE): 
-            self.wrong_type(num, value, "Plex.RE instance") 
- 
-    def check_string(self, num, value): 
-        if type(value) != type(''): 
-            self.wrong_type(num, value, "string") 
- 
-    def check_char(self, num, value): 
-        self.check_string(num, value) 
-        if len(value) != 1: 
-            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s." 
+
+    def build_opt(self, m, initial_state, c):
+        """
+        Given a state |s| of machine |m|, return a new state
+        reachable from |s| on character |c| or epsilon.
+        """
+        s = m.new_state()
+        initial_state.link_to(s)
+        initial_state.add_transition(c, s)
+        return s
+
+    def __add__(self, other):
+        return Seq(self, other)
+
+    def __or__(self, other):
+        return Alt(self, other)
+
+    def __str__(self):
+        if self.str:
+            return self.str
+        else:
+            return self.calc_str()
+
+    def check_re(self, num, value):
+        if not isinstance(value, RE):
+            self.wrong_type(num, value, "Plex.RE instance")
+
+    def check_string(self, num, value):
+        if type(value) != type(''):
+            self.wrong_type(num, value, "string")
+
+    def check_char(self, num, value):
+        self.check_string(num, value)
+        if len(value) != 1:
+            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
                                         "Expected a string of length 1, got: %s" % (
                                             num, self.__class__.__name__, repr(value)))
- 
-    def wrong_type(self, num, value, expected): 
-        if type(value) == types.InstanceType: 
+
+    def wrong_type(self, num, value, expected):
+        if type(value) == types.InstanceType:
             got = "%s.%s instance" % (
                 value.__class__.__module__, value.__class__.__name__)
-        else: 
-            got = type(value).__name__ 
-        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s " 
+        else:
+            got = type(value).__name__
+        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
                                    "(expected %s, got %s" % (
                                        num, self.__class__.__name__, expected, got))
- 
-# 
-#     Primitive RE constructors 
-#     ------------------------- 
-# 
-#     These are the basic REs from which all others are built. 
-# 
- 
-## class Char(RE): 
-##     """ 
-##     Char(c) is an RE which matches the character |c|. 
-##     """ 
- 
-##     nullable = 0 
- 
-##     def __init__(self, char): 
-##         self.char = char 
-##         self.match_nl = char == '\n' 
- 
-##     def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-##         c = self.char 
-##         if match_bol and c != BOL: 
-##             s1 = self.build_opt(m, initial_state, BOL) 
-##         else: 
-##             s1 = initial_state 
-##         if c == '\n' or c == EOF: 
-##             s1 = self.build_opt(m, s1, EOL) 
-##         if len(c) == 1: 
-##             code = ord(self.char) 
-##             s1.add_transition((code, code+1), final_state) 
-##             if nocase and is_letter_code(code): 
-##                 code2 = other_case_code(code) 
-##                 s1.add_transition((code2, code2+1), final_state) 
-##         else: 
-##             s1.add_transition(c, final_state) 
- 
-##     def calc_str(self): 
-##         return "Char(%s)" % repr(self.char) 
- 
-
-def Char(c): 
-    """ 
-    Char(c) is an RE which matches the character |c|. 
-    """ 
-    if len(c) == 1: 
-        result = CodeRange(ord(c), ord(c) + 1) 
-    else: 
-        result = SpecialSymbol(c) 
-    result.str = "Char(%s)" % repr(c) 
-    return result 
- 
-
-class RawCodeRange(RE): 
-    """ 
-    RawCodeRange(code1, code2) is a low-level RE which matches any character 
-    with a code |c| in the range |code1| <= |c| < |code2|, where the range 
-    does not include newline. For internal use only. 
-    """ 
-    nullable = 0 
-    match_nl = 0 
+
+#
+#     Primitive RE constructors
+#     -------------------------
+#
+#     These are the basic REs from which all others are built.
+#
+
+## class Char(RE):
+##     """
+##     Char(c) is an RE which matches the character |c|.
+##     """
+
+##     nullable = 0
+
+##     def __init__(self, char):
+##         self.char = char
+##         self.match_nl = char == '\n'
+
+##     def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+##         c = self.char
+##         if match_bol and c != BOL:
+##             s1 = self.build_opt(m, initial_state, BOL)
+##         else:
+##             s1 = initial_state
+##         if c == '\n' or c == EOF:
+##             s1 = self.build_opt(m, s1, EOL)
+##         if len(c) == 1:
+##             code = ord(self.char)
+##             s1.add_transition((code, code+1), final_state)
+##             if nocase and is_letter_code(code):
+##                 code2 = other_case_code(code)
+##                 s1.add_transition((code2, code2+1), final_state)
+##         else:
+##             s1.add_transition(c, final_state)
+
+##     def calc_str(self):
+##         return "Char(%s)" % repr(self.char)
+
+
+def Char(c):
+    """
+    Char(c) is an RE which matches the character |c|.
+    """
+    if len(c) == 1:
+        result = CodeRange(ord(c), ord(c) + 1)
+    else:
+        result = SpecialSymbol(c)
+    result.str = "Char(%s)" % repr(c)
+    return result
+
+
+class RawCodeRange(RE):
+    """
+    RawCodeRange(code1, code2) is a low-level RE which matches any character
+    with a code |c| in the range |code1| <= |c| < |code2|, where the range
+    does not include newline. For internal use only.
+    """
+    nullable = 0
+    match_nl = 0
     range = None            # (code, code)
     uppercase_range = None  # (code, code) or None
     lowercase_range = None  # (code, code) or None
- 
-    def __init__(self, code1, code2): 
-        self.range = (code1, code2) 
-        self.uppercase_range = uppercase_range(code1, code2) 
-        self.lowercase_range = lowercase_range(code1, code2) 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        if match_bol: 
-            initial_state = self.build_opt(m, initial_state, BOL) 
-        initial_state.add_transition(self.range, final_state) 
-        if nocase: 
-            if self.uppercase_range: 
-                initial_state.add_transition(self.uppercase_range, final_state) 
-            if self.lowercase_range: 
-                initial_state.add_transition(self.lowercase_range, final_state) 
- 
-    def calc_str(self): 
-        return "CodeRange(%d,%d)" % (self.code1, self.code2) 
- 
-
-class _RawNewline(RE): 
-    """ 
-    RawNewline is a low-level RE which matches a newline character. 
-    For internal use only. 
-    """ 
-    nullable = 0 
-    match_nl = 1 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        if match_bol: 
-            initial_state = self.build_opt(m, initial_state, BOL) 
-        s = self.build_opt(m, initial_state, EOL) 
-        s.add_transition((nl_code, nl_code + 1), final_state) 
- 
-
-RawNewline = _RawNewline() 
- 
- 
-class SpecialSymbol(RE): 
-    """ 
-    SpecialSymbol(sym) is an RE which matches the special input 
-    symbol |sym|, which is one of BOL, EOL or EOF. 
-    """ 
-    nullable = 0 
-    match_nl = 0 
-    sym = None 
- 
-    def __init__(self, sym): 
-        self.sym = sym 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        # Sequences 'bol bol' and 'bol eof' are impossible, so only need 
-        # to allow for bol if sym is eol 
-        if match_bol and self.sym == EOL: 
-            initial_state = self.build_opt(m, initial_state, BOL) 
-        initial_state.add_transition(self.sym, final_state) 
- 
- 
-class Seq(RE): 
-    """Seq(re1, re2, re3...) is an RE which matches |re1| followed by 
-    |re2| followed by |re3|...""" 
- 
-    def __init__(self, *re_list): 
-        nullable = 1 
+
+    def __init__(self, code1, code2):
+        self.range = (code1, code2)
+        self.uppercase_range = uppercase_range(code1, code2)
+        self.lowercase_range = lowercase_range(code1, code2)
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        if match_bol:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        initial_state.add_transition(self.range, final_state)
+        if nocase:
+            if self.uppercase_range:
+                initial_state.add_transition(self.uppercase_range, final_state)
+            if self.lowercase_range:
+                initial_state.add_transition(self.lowercase_range, final_state)
+
+    def calc_str(self):
+        return "CodeRange(%d,%d)" % (self.code1, self.code2)
+
+
+class _RawNewline(RE):
+    """
+    RawNewline is a low-level RE which matches a newline character.
+    For internal use only.
+    """
+    nullable = 0
+    match_nl = 1
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        if match_bol:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        s = self.build_opt(m, initial_state, EOL)
+        s.add_transition((nl_code, nl_code + 1), final_state)
+
+
+RawNewline = _RawNewline()
+
+
+class SpecialSymbol(RE):
+    """
+    SpecialSymbol(sym) is an RE which matches the special input
+    symbol |sym|, which is one of BOL, EOL or EOF.
+    """
+    nullable = 0
+    match_nl = 0
+    sym = None
+
+    def __init__(self, sym):
+        self.sym = sym
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        # Sequences 'bol bol' and 'bol eof' are impossible, so only need
+        # to allow for bol if sym is eol
+        if match_bol and self.sym == EOL:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        initial_state.add_transition(self.sym, final_state)
+
+
+class Seq(RE):
+    """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
+    |re2| followed by |re3|..."""
+
+    def __init__(self, *re_list):
+        nullable = 1
         for i, re in enumerate(re_list):
-            self.check_re(i, re) 
-            nullable = nullable and re.nullable 
-        self.re_list = re_list 
-        self.nullable = nullable 
-        i = len(re_list) 
-        match_nl = 0 
-        while i: 
+            self.check_re(i, re)
+            nullable = nullable and re.nullable
+        self.re_list = re_list
+        self.nullable = nullable
+        i = len(re_list)
+        match_nl = 0
+        while i:
             i -= 1
-            re = re_list[i] 
-            if re.match_nl: 
-                match_nl = 1 
-                break 
-            if not re.nullable: 
-                break 
-        self.match_nl = match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        re_list = self.re_list 
-        if len(re_list) == 0: 
-            initial_state.link_to(final_state) 
-        else: 
-            s1 = initial_state 
-            n = len(re_list) 
+            re = re_list[i]
+            if re.match_nl:
+                match_nl = 1
+                break
+            if not re.nullable:
+                break
+        self.match_nl = match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        re_list = self.re_list
+        if len(re_list) == 0:
+            initial_state.link_to(final_state)
+        else:
+            s1 = initial_state
+            n = len(re_list)
             for i, re in enumerate(re_list):
-                if i < n - 1: 
-                    s2 = m.new_state() 
-                else: 
-                    s2 = final_state 
-                re.build_machine(m, s1, s2, match_bol, nocase) 
-                s1 = s2 
-                match_bol = re.match_nl or (match_bol and re.nullable) 
- 
-    def calc_str(self): 
-        return "Seq(%s)" % ','.join(map(str, self.re_list)) 
- 
- 
-class Alt(RE): 
-    """Alt(re1, re2, re3...) is an RE which matches either |re1| or 
-    |re2| or |re3|...""" 
- 
-    def __init__(self, *re_list): 
-        self.re_list = re_list 
-        nullable = 0 
-        match_nl = 0 
-        nullable_res = [] 
-        non_nullable_res = [] 
-        i = 1 
-        for re in re_list: 
-            self.check_re(i, re) 
-            if re.nullable: 
-                nullable_res.append(re) 
-                nullable = 1 
-            else: 
-                non_nullable_res.append(re) 
-            if re.match_nl: 
-                match_nl = 1 
+                if i < n - 1:
+                    s2 = m.new_state()
+                else:
+                    s2 = final_state
+                re.build_machine(m, s1, s2, match_bol, nocase)
+                s1 = s2
+                match_bol = re.match_nl or (match_bol and re.nullable)
+
+    def calc_str(self):
+        return "Seq(%s)" % ','.join(map(str, self.re_list))
+
+
+class Alt(RE):
+    """Alt(re1, re2, re3...) is an RE which matches either |re1| or
+    |re2| or |re3|..."""
+
+    def __init__(self, *re_list):
+        self.re_list = re_list
+        nullable = 0
+        match_nl = 0
+        nullable_res = []
+        non_nullable_res = []
+        i = 1
+        for re in re_list:
+            self.check_re(i, re)
+            if re.nullable:
+                nullable_res.append(re)
+                nullable = 1
+            else:
+                non_nullable_res.append(re)
+            if re.match_nl:
+                match_nl = 1
             i += 1
-        self.nullable_res = nullable_res 
-        self.non_nullable_res = non_nullable_res 
-        self.nullable = nullable 
-        self.match_nl = match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        for re in self.nullable_res: 
-            re.build_machine(m, initial_state, final_state, match_bol, nocase) 
-        if self.non_nullable_res: 
-            if match_bol: 
-                initial_state = self.build_opt(m, initial_state, BOL) 
-            for re in self.non_nullable_res: 
-                re.build_machine(m, initial_state, final_state, 0, nocase) 
- 
-    def calc_str(self): 
-        return "Alt(%s)" % ','.join(map(str, self.re_list)) 
- 
- 
-class Rep1(RE): 
-    """Rep1(re) is an RE which matches one or more repetitions of |re|.""" 
- 
-    def __init__(self, re): 
-        self.check_re(1, re) 
-        self.re = re 
-        self.nullable = re.nullable 
-        self.match_nl = re.match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        s1 = m.new_state() 
-        s2 = m.new_state() 
-        initial_state.link_to(s1) 
-        self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase) 
-        s2.link_to(s1) 
-        s2.link_to(final_state) 
- 
-    def calc_str(self): 
-        return "Rep1(%s)" % self.re 
- 
- 
-class SwitchCase(RE): 
-    """ 
-    SwitchCase(re, nocase) is an RE which matches the same strings as RE, 
-    but treating upper and lower case letters according to |nocase|. If 
-    |nocase| is true, case is ignored, otherwise it is not. 
-    """ 
-    re = None 
-    nocase = None 
- 
-    def __init__(self, re, nocase): 
-        self.re = re 
-        self.nocase = nocase 
-        self.nullable = re.nullable 
-        self.match_nl = re.match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        self.re.build_machine(m, initial_state, final_state, match_bol, 
+        self.nullable_res = nullable_res
+        self.non_nullable_res = non_nullable_res
+        self.nullable = nullable
+        self.match_nl = match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        for re in self.nullable_res:
+            re.build_machine(m, initial_state, final_state, match_bol, nocase)
+        if self.non_nullable_res:
+            if match_bol:
+                initial_state = self.build_opt(m, initial_state, BOL)
+            for re in self.non_nullable_res:
+                re.build_machine(m, initial_state, final_state, 0, nocase)
+
+    def calc_str(self):
+        return "Alt(%s)" % ','.join(map(str, self.re_list))
+
+
+class Rep1(RE):
+    """Rep1(re) is an RE which matches one or more repetitions of |re|."""
+
+    def __init__(self, re):
+        self.check_re(1, re)
+        self.re = re
+        self.nullable = re.nullable
+        self.match_nl = re.match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        s1 = m.new_state()
+        s2 = m.new_state()
+        initial_state.link_to(s1)
+        self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
+        s2.link_to(s1)
+        s2.link_to(final_state)
+
+    def calc_str(self):
+        return "Rep1(%s)" % self.re
+
+
+class SwitchCase(RE):
+    """
+    SwitchCase(re, nocase) is an RE which matches the same strings as RE,
+    but treating upper and lower case letters according to |nocase|. If
+    |nocase| is true, case is ignored, otherwise it is not.
+    """
+    re = None
+    nocase = None
+
+    def __init__(self, re, nocase):
+        self.re = re
+        self.nocase = nocase
+        self.nullable = re.nullable
+        self.match_nl = re.match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        self.re.build_machine(m, initial_state, final_state, match_bol,
                               self.nocase)
- 
-    def calc_str(self): 
-        if self.nocase: 
-            name = "NoCase" 
-        else: 
-            name = "Case" 
-        return "%s(%s)" % (name, self.re) 
- 
-# 
-#     Composite RE constructors 
-#     ------------------------- 
-# 
-#     These REs are defined in terms of the primitive REs. 
-# 
- 
-Empty = Seq() 
-Empty.__doc__ = \ 
-    """ 
-    Empty is an RE which matches the empty string. 
-    """ 
-Empty.str = "Empty" 
- 
-
-def Str1(s): 
-    """ 
-    Str1(s) is an RE which matches the literal string |s|. 
-    """ 
-    result = Seq(*tuple(map(Char, s))) 
-    result.str = "Str(%s)" % repr(s) 
-    return result 
- 
-
-def Str(*strs): 
-    """ 
-    Str(s) is an RE which matches the literal string |s|. 
-    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|... 
-    """ 
-    if len(strs) == 1: 
-        return Str1(strs[0]) 
-    else: 
-        result = Alt(*tuple(map(Str1, strs))) 
-        result.str = "Str(%s)" % ','.join(map(repr, strs)) 
-        return result 
- 
-
-def Any(s): 
-    """ 
-    Any(s) is an RE which matches any character in the string |s|. 
-    """ 
-    #result = apply(Alt, tuple(map(Char, s))) 
-    result = CodeRanges(chars_to_ranges(s)) 
-    result.str = "Any(%s)" % repr(s) 
-    return result 
- 
-
-def AnyBut(s): 
-    """ 
-    AnyBut(s) is an RE which matches any character (including 
-    newline) which is not in the string |s|. 
-    """ 
-    ranges = chars_to_ranges(s) 
-    ranges.insert(0, -maxint) 
-    ranges.append(maxint) 
-    result = CodeRanges(ranges) 
-    result.str = "AnyBut(%s)" % repr(s) 
-    return result 
- 
-
-AnyChar = AnyBut("") 
-AnyChar.__doc__ = \ 
-    """ 
-    AnyChar is an RE which matches any single character (including a newline). 
-    """ 
-AnyChar.str = "AnyChar" 
- 
+
+    def calc_str(self):
+        if self.nocase:
+            name = "NoCase"
+        else:
+            name = "Case"
+        return "%s(%s)" % (name, self.re)
+
+#
+#     Composite RE constructors
+#     -------------------------
+#
+#     These REs are defined in terms of the primitive REs.
+#
+
+Empty = Seq()
+Empty.__doc__ = \
+    """
+    Empty is an RE which matches the empty string.
+    """
+Empty.str = "Empty"
+
+
+def Str1(s):
+    """
+    Str1(s) is an RE which matches the literal string |s|.
+    """
+    result = Seq(*tuple(map(Char, s)))
+    result.str = "Str(%s)" % repr(s)
+    return result
+
+
+def Str(*strs):
+    """
+    Str(s) is an RE which matches the literal string |s|.
+    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
+    """
+    if len(strs) == 1:
+        return Str1(strs[0])
+    else:
+        result = Alt(*tuple(map(Str1, strs)))
+        result.str = "Str(%s)" % ','.join(map(repr, strs))
+        return result
+
+
+def Any(s):
+    """
+    Any(s) is an RE which matches any character in the string |s|.
+    """
+    #result = apply(Alt, tuple(map(Char, s)))
+    result = CodeRanges(chars_to_ranges(s))
+    result.str = "Any(%s)" % repr(s)
+    return result
+
+
+def AnyBut(s):
+    """
+    AnyBut(s) is an RE which matches any character (including
+    newline) which is not in the string |s|.
+    """
+    ranges = chars_to_ranges(s)
+    ranges.insert(0, -maxint)
+    ranges.append(maxint)
+    result = CodeRanges(ranges)
+    result.str = "AnyBut(%s)" % repr(s)
+    return result
+
+
+AnyChar = AnyBut("")
+AnyChar.__doc__ = \
+    """
+    AnyChar is an RE which matches any single character (including a newline).
+    """
+AnyChar.str = "AnyChar"
+
 
 def Range(s1, s2=None):
-    """ 
-    Range(c1, c2) is an RE which matches any single character in the range 
-    |c1| to |c2| inclusive. 
-    Range(s) where |s| is a string of even length is an RE which matches 
-    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,... 
-    """ 
-    if s2: 
-        result = CodeRange(ord(s1), ord(s2) + 1) 
-        result.str = "Range(%s,%s)" % (s1, s2) 
-    else: 
-        ranges = [] 
-        for i in range(0, len(s1), 2): 
+    """
+    Range(c1, c2) is an RE which matches any single character in the range
+    |c1| to |c2| inclusive.
+    Range(s) where |s| is a string of even length is an RE which matches
+    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
+    """
+    if s2:
+        result = CodeRange(ord(s1), ord(s2) + 1)
+        result.str = "Range(%s,%s)" % (s1, s2)
+    else:
+        ranges = []
+        for i in range(0, len(s1), 2):
             ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
-        result = Alt(*ranges) 
-        result.str = "Range(%s)" % repr(s1) 
-    return result 
- 
-
-def Opt(re): 
-    """ 
-    Opt(re) is an RE which matches either |re| or the empty string. 
-    """ 
-    result = Alt(re, Empty) 
-    result.str = "Opt(%s)" % re 
-    return result 
- 
-
-def Rep(re): 
-    """ 
-    Rep(re) is an RE which matches zero or more repetitions of |re|. 
-    """ 
-    result = Opt(Rep1(re)) 
-    result.str = "Rep(%s)" % re 
-    return result 
- 
-
-def NoCase(re): 
-    """ 
-    NoCase(re) is an RE which matches the same strings as RE, but treating 
-    upper and lower case letters as equivalent. 
-    """ 
+        result = Alt(*ranges)
+        result.str = "Range(%s)" % repr(s1)
+    return result
+
+
+def Opt(re):
+    """
+    Opt(re) is an RE which matches either |re| or the empty string.
+    """
+    result = Alt(re, Empty)
+    result.str = "Opt(%s)" % re
+    return result
+
+
+def Rep(re):
+    """
+    Rep(re) is an RE which matches zero or more repetitions of |re|.
+    """
+    result = Opt(Rep1(re))
+    result.str = "Rep(%s)" % re
+    return result
+
+
+def NoCase(re):
+    """
+    NoCase(re) is an RE which matches the same strings as RE, but treating
+    upper and lower case letters as equivalent.
+    """
     return SwitchCase(re, nocase=1)
- 
-
-def Case(re): 
-    """ 
-    Case(re) is an RE which matches the same strings as RE, but treating 
-    upper and lower case letters as distinct, i.e. it cancels the effect 
-    of any enclosing NoCase(). 
-    """ 
+
+
+def Case(re):
+    """
+    Case(re) is an RE which matches the same strings as RE, but treating
+    upper and lower case letters as distinct, i.e. it cancels the effect
+    of any enclosing NoCase().
+    """
     return SwitchCase(re, nocase=0)
- 
-# 
-#     RE Constants 
-# 
- 
-Bol = Char(BOL) 
-Bol.__doc__ = \ 
-    """ 
-    Bol is an RE which matches the beginning of a line. 
-    """ 
-Bol.str = "Bol" 
- 
-Eol = Char(EOL) 
-Eol.__doc__ = \ 
-    """ 
-    Eol is an RE which matches the end of a line. 
-    """ 
-Eol.str = "Eol" 
- 
-Eof = Char(EOF) 
-Eof.__doc__ = \ 
-    """ 
-    Eof is an RE which matches the end of the file. 
-    """ 
-Eof.str = "Eof" 
- 
+
+#
+#     RE Constants
+#
+
+Bol = Char(BOL)
+Bol.__doc__ = \
+    """
+    Bol is an RE which matches the beginning of a line.
+    """
+Bol.str = "Bol"
+
+Eol = Char(EOL)
+Eol.__doc__ = \
+    """
+    Eol is an RE which matches the end of a line.
+    """
+Eol.str = "Eol"
+
+Eof = Char(EOF)
+Eof.__doc__ = \
+    """
+    Eof is an RE which matches the end of the file.
+    """
+Eof.str = "Eof"
+
diff --git a/contrib/tools/cython/Cython/Plex/Scanners.pxd b/contrib/tools/cython/Cython/Plex/Scanners.pxd
index e2ac99b552..6e75f55e61 100644
--- a/contrib/tools/cython/Cython/Plex/Scanners.pxd
+++ b/contrib/tools/cython/Cython/Plex/Scanners.pxd
@@ -1,50 +1,50 @@
-from __future__ import absolute_import 
- 
-import cython 
- 
-from Cython.Plex.Actions cimport Action 
- 
-cdef class Scanner: 
- 
-    cdef public lexicon 
-    cdef public stream 
-    cdef public name 
-    cdef public unicode buffer 
-    cdef public Py_ssize_t buf_start_pos 
-    cdef public Py_ssize_t next_pos 
-    cdef public Py_ssize_t cur_pos 
-    cdef public Py_ssize_t cur_line 
-    cdef public Py_ssize_t cur_line_start 
-    cdef public Py_ssize_t start_pos 
-    cdef public Py_ssize_t start_line 
-    cdef public Py_ssize_t start_col 
-    cdef public text 
-    cdef public initial_state # int? 
-    cdef public state_name 
-    cdef public list queue 
-    cdef public bint trace 
-    cdef public cur_char 
-    cdef public long input_state 
- 
-    cdef public level 
- 
+from __future__ import absolute_import
+
+import cython
+
+from Cython.Plex.Actions cimport Action
+
+cdef class Scanner:
+
+    cdef public lexicon
+    cdef public stream
+    cdef public name
+    cdef public unicode buffer
+    cdef public Py_ssize_t buf_start_pos
+    cdef public Py_ssize_t next_pos
+    cdef public Py_ssize_t cur_pos
+    cdef public Py_ssize_t cur_line
+    cdef public Py_ssize_t cur_line_start
+    cdef public Py_ssize_t start_pos
+    cdef public Py_ssize_t start_line
+    cdef public Py_ssize_t start_col
+    cdef public text
+    cdef public initial_state # int?
+    cdef public state_name
+    cdef public list queue
+    cdef public bint trace
+    cdef public cur_char
+    cdef public long input_state
+
+    cdef public level
+
     @cython.final
-    @cython.locals(input_state=long) 
-    cdef next_char(self) 
-    @cython.locals(action=Action) 
+    @cython.locals(input_state=long)
+    cdef next_char(self)
+    @cython.locals(action=Action)
     cpdef tuple read(self)
     @cython.final
-    cdef tuple scan_a_token(self) 
+    cdef tuple scan_a_token(self)
     ##cdef tuple position(self)  # used frequently by Parsing.py
- 
+
     @cython.final
     @cython.locals(cur_pos=Py_ssize_t, cur_line=Py_ssize_t, cur_line_start=Py_ssize_t,
                    input_state=long, next_pos=Py_ssize_t, state=dict,
                    buf_start_pos=Py_ssize_t, buf_len=Py_ssize_t, buf_index=Py_ssize_t,
                    trace=bint, discard=Py_ssize_t, data=unicode, buffer=unicode)
-    cdef run_machine_inlined(self) 
- 
+    cdef run_machine_inlined(self)
+
     @cython.final
-    cdef begin(self, state) 
+    cdef begin(self, state)
     @cython.final
-    cdef produce(self, value, text = *) 
+    cdef produce(self, value, text = *)
diff --git a/contrib/tools/cython/Cython/Plex/Scanners.py b/contrib/tools/cython/Cython/Plex/Scanners.py
index ee5fea728e..88f7e2da3b 100644
--- a/contrib/tools/cython/Cython/Plex/Scanners.py
+++ b/contrib/tools/cython/Cython/Plex/Scanners.py
@@ -1,57 +1,57 @@
 # cython: auto_pickle=False
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-# 
-#   Scanning an input stream 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-import cython 
-
-cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object) 
- 
-from . import Errors 
-from .Regexps import BOL, EOL, EOF 
- 
-NOT_FOUND = object() 
- 
- 
-class Scanner(object): 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#
+#   Scanning an input stream
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import cython
+
+cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
+
+from . import Errors
+from .Regexps import BOL, EOL, EOF
+
+NOT_FOUND = object()
+
+
+class Scanner(object):
     """
     A Scanner is used to read tokens from a stream of characters
     using the token set specified by a Plex.Lexicon.
- 
+
     Constructor:
- 
+
       Scanner(lexicon, stream, name = '')
- 
+
         See the docstring of the __init__ method for details.
- 
+
     Methods:
- 
+
       See the docstrings of the individual methods for more
       information.
- 
+
       read() --> (value, text)
         Reads the next lexical token from the stream.
- 
+
       position() --> (name, line, col)
         Returns the position of the last token read using the
         read() method.
- 
+
       begin(state_name)
         Causes scanner to change state.
- 
+
       produce(value [, text])
         Causes return of a token value to the caller of the
         Scanner.
- 
+
     """
- 
+
     #  lexicon = None        # Lexicon
     #  stream = None         # file-like object
     #  name = ''
@@ -69,22 +69,22 @@ class Scanner(object):
     #  state_name = ''       # Name of initial state
     #  queue = None          # list of tokens to be returned
     #  trace = 0
- 
+
     def __init__(self, lexicon, stream, name='', initial_pos=None):
         """
         Scanner(lexicon, stream, name = '')
- 
+
           |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
           to be recognised.
- 
+
           |stream| can be a file object or anything which implements a
           compatible read() method.
- 
+
           |name| is optional, and may be the name of the file being
           scanned or any other identifying string.
         """
         self.trace = 0
- 
+
         self.buffer = u''
         self.buf_start_pos = 0
         self.next_pos = 0
@@ -95,7 +95,7 @@ class Scanner(object):
         self.start_col = 0
         self.text = None
         self.state_name = None
- 
+
         self.lexicon = lexicon
         self.stream = stream
         self.name = name
@@ -109,7 +109,7 @@ class Scanner(object):
         self.input_state = 1
         if initial_pos is not None:
             self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
- 
+
     def read(self):
         """
         Read the next lexical token from the stream and return a
@@ -130,7 +130,7 @@ class Scanner(object):
         result = queue[0]
         del queue[0]
         return result
- 
+
     def scan_a_token(self):
         """
         Read the next input sequence recognised by the machine
@@ -156,7 +156,7 @@ class Scanner(object):
                 if self.cur_char is None or self.cur_char is EOF:
                     return (u'', None)
             raise Errors.UnrecognizedInput(self, self.state_name)
- 
+
     def run_machine_inlined(self):
         """
         Inlined version of run_machine for speed.
@@ -171,7 +171,7 @@ class Scanner(object):
         buffer = self.buffer
         buf_start_pos = self.buf_start_pos
         buf_len = len(buffer)
-        b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \ 
+        b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
             None, 0, 0, 0, u'', 0, 0
         trace = self.trace
         while 1:
@@ -267,7 +267,7 @@ class Scanner(object):
         input_state = self.input_state
         if self.trace:
             print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
-        if input_state == 1: 
+        if input_state == 1:
             self.cur_pos = self.next_pos
             c = self.read_char()
             if c == u'\n':
@@ -276,24 +276,24 @@ class Scanner(object):
             elif not c:
                 self.cur_char = EOL
                 self.input_state = 4
-            else: 
+            else:
                 self.cur_char = c
-        elif input_state == 2: 
+        elif input_state == 2:
             self.cur_char = u'\n'
             self.input_state = 3
-        elif input_state == 3: 
+        elif input_state == 3:
             self.cur_line += 1
             self.cur_line_start = self.cur_pos = self.next_pos
             self.cur_char = BOL
             self.input_state = 1
-        elif input_state == 4: 
+        elif input_state == 4:
             self.cur_char = EOF
             self.input_state = 5
         else:  # input_state = 5
             self.cur_char = u''
         if self.trace:
             print("--> [%d] %d %r" % (input_state, self.cur_pos, self.cur_char))
- 
+
     def position(self):
         """
         Return a tuple (name, line, col) representing the location of
@@ -304,24 +304,24 @@ class Scanner(object):
         (0-based).
         """
         return (self.name, self.start_line, self.start_col)
- 
+
     def get_position(self):
         """Python accessible wrapper around position(), only for error reporting.
         """
         return self.position()
- 
+
     def begin(self, state_name):
         """Set the current state of the scanner to the named state."""
         self.initial_state = (
             self.lexicon.get_initial_state(state_name))
         self.state_name = state_name
- 
+
     def produce(self, value, text=None):
         """
         Called from an action procedure, causes |value| to be returned
         as the token value from read(). If |text| is supplied, it is
         returned in place of the scanned text.
- 
+
         produce() can be called more than once during a single call to an action
         procedure, in which case the tokens are queued up and returned one
         at a time by subsequent calls to read(), until the queue is empty,
@@ -330,7 +330,7 @@ class Scanner(object):
         if text is None:
             text = self.text
         self.queue.append((value, text))
- 
+
     def eof(self):
         """
         Override this method if you want something to be done at
diff --git a/contrib/tools/cython/Cython/Plex/Timing.py b/contrib/tools/cython/Cython/Plex/Timing.py
index 48f482cf30..5c3692693b 100644
--- a/contrib/tools/cython/Cython/Plex/Timing.py
+++ b/contrib/tools/cython/Cython/Plex/Timing.py
@@ -1,23 +1,23 @@
-# 
-#   Get time in platform-dependent way 
-# 
- 
-from __future__ import absolute_import 
- 
-import os 
-from sys import platform, exit, stderr 
- 
-if platform == 'mac': 
-    import MacOS 
-    def time(): 
-        return MacOS.GetTicks() / 60.0 
-    timekind = "real" 
-elif hasattr(os, 'times'): 
-    def time(): 
-        t = os.times() 
-        return t[0] + t[1] 
-    timekind = "cpu" 
-else: 
-    stderr.write( 
-        "Don't know how to get time on platform %s\n" % repr(platform)) 
-    exit(1) 
+#
+#   Get time in platform-dependent way
+#
+
+from __future__ import absolute_import
+
+import os
+from sys import platform, exit, stderr
+
+if platform == 'mac':
+    import MacOS
+    def time():
+        return MacOS.GetTicks() / 60.0
+    timekind = "real"
+elif hasattr(os, 'times'):
+    def time():
+        t = os.times()
+        return t[0] + t[1]
+    timekind = "cpu"
+else:
+    stderr.write(
+        "Don't know how to get time on platform %s\n" % repr(platform))
+    exit(1)
diff --git a/contrib/tools/cython/Cython/Plex/Traditional.py b/contrib/tools/cython/Cython/Plex/Traditional.py
index 5c06cc23b4..ec7252daed 100644
--- a/contrib/tools/cython/Cython/Plex/Traditional.py
+++ b/contrib/tools/cython/Cython/Plex/Traditional.py
@@ -1,42 +1,42 @@
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#   Traditional Regular Expression Syntax 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char 
-from .Errors import PlexError 
- 
- 
-class RegexpSyntaxError(PlexError): 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Traditional Regular Expression Syntax
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
+from .Errors import PlexError
+
+
+class RegexpSyntaxError(PlexError):
     pass
- 
- 
-def re(s): 
+
+
+def re(s):
     """
     Convert traditional string representation of regular expression |s|
     into Plex representation.
     """
     return REParser(s).parse_re()
- 
- 
-class REParser(object): 
+
+
+class REParser(object):
     def __init__(self, s):
         self.s = s
         self.i = -1
         self.end = 0
         self.next()
- 
+
     def parse_re(self):
         re = self.parse_alt()
         if not self.end:
             self.error("Unexpected %s" % repr(self.c))
         return re
- 
+
     def parse_alt(self):
         """Parse a set of alternative regexps."""
         re = self.parse_seq()
@@ -47,14 +47,14 @@ class REParser(object):
                 re_list.append(self.parse_seq())
             re = Alt(*re_list)
         return re
- 
+
     def parse_seq(self):
         """Parse a sequence of regexps."""
         re_list = []
         while not self.end and not self.c in "|)":
             re_list.append(self.parse_mod())
         return Seq(*re_list)
- 
+
     def parse_mod(self):
         """Parse a primitive regexp followed by *, +, ? modifiers."""
         re = self.parse_prim()
@@ -67,10 +67,10 @@ class REParser(object):
                 re = Opt(re)
             self.next()
         return re
- 
+
     def parse_prim(self):
         """Parse a primitive regexp."""
-        c = self.get() 
+        c = self.get()
         if c == '.':
             re = AnyBut("\n")
         elif c == '^':
@@ -88,7 +88,7 @@ class REParser(object):
                 c = self.get()
             re = Char(c)
         return re
- 
+
     def parse_charset(self):
         """Parse a charset. Does not include the surrounding []."""
         char_list = []
@@ -113,7 +113,7 @@ class REParser(object):
             return AnyBut(chars)
         else:
             return Any(chars)
- 
+
     def next(self):
         """Advance to the next char."""
         s = self.s
@@ -123,14 +123,14 @@ class REParser(object):
         else:
             self.c = ''
             self.end = 1
- 
+
     def get(self):
         if self.end:
             self.error("Premature end of string")
         c = self.c
         self.next()
         return c
- 
+
     def lookahead(self, n):
         """Look ahead n chars."""
         j = self.i + n
@@ -138,7 +138,7 @@ class REParser(object):
             return self.s[j]
         else:
             return ''
- 
+
     def expect(self, c):
         """
         Expect to find character |c| at current position.
@@ -148,11 +148,11 @@ class REParser(object):
             self.next()
         else:
             self.error("Missing %s" % repr(c))
- 
+
     def error(self, mess):
         """Raise exception to signal syntax error in regexp."""
         raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
             repr(self.s), self.i, mess))
- 
- 
- 
+
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Transitions.py b/contrib/tools/cython/Cython/Plex/Transitions.py
index b7c920f467..3833817946 100644
--- a/contrib/tools/cython/Cython/Plex/Transitions.py
+++ b/contrib/tools/cython/Cython/Plex/Transitions.py
@@ -1,48 +1,48 @@
-# 
+#
 # Plex - Transition Maps
-# 
+#
 # This version represents state sets directly as dicts for speed.
-# 
- 
-from __future__ import absolute_import 
- 
+#
+
+from __future__ import absolute_import
+
 try:
     from sys import maxsize as maxint
 except ImportError:
     from sys import maxint
- 
- 
-class TransitionMap(object): 
+
+
+class TransitionMap(object):
     """
     A TransitionMap maps an input event to a set of states.
     An input event is one of: a range of character codes,
     the empty string (representing an epsilon move), or one
     of the special symbols BOL, EOL, EOF.
- 
+
     For characters, this implementation compactly represents
     the map by means of a list:
- 
+
       [code_0, states_0, code_1, states_1, code_2, states_2,
         ..., code_n-1, states_n-1, code_n]
- 
+
     where |code_i| is a character code, and |states_i| is a
     set of states corresponding to characters with codes |c|
     in the range |code_i| <= |c| <= |code_i+1|.
- 
+
     The following invariants hold:
       n >= 1
       code_0 == -maxint
       code_n == maxint
       code_i < code_i+1 for i in 0..n-1
       states_0 == states_n-1
- 
+
     Mappings for the special events '', BOL, EOL, EOF are
     kept separately in a dictionary.
     """
- 
+
     map = None      # The list of codes and states
     special = None  # Mapping for special events
- 
+
     def __init__(self, map=None, special=None):
         if not map:
             map = [-maxint, {}, maxint]
@@ -51,7 +51,7 @@ class TransitionMap(object):
         self.map = map
         self.special = special
         #self.check() ###
- 
+
     def add(self, event, new_state,
             TupleType=tuple):
         """
@@ -67,7 +67,7 @@ class TransitionMap(object):
                 i += 2
         else:
             self.get_special(event)[new_state] = 1
- 
+
     def add_set(self, event, new_set,
                 TupleType=tuple):
         """
@@ -83,14 +83,14 @@ class TransitionMap(object):
                 i += 2
         else:
             self.get_special(event).update(new_set)
- 
+
     def get_epsilon(self,
                     none=None):
         """
         Return the mapping for epsilon, or None.
         """
         return self.special.get('', none)
- 
+
     def iteritems(self,
                   len=len):
         """
@@ -114,11 +114,11 @@ class TransitionMap(object):
             if set:
                 result.append((event, set))
         return iter(result)
- 
+
     items = iteritems
- 
+
     # ------------------- Private methods --------------------
- 
+
     def split(self, code,
               len=len, maxint=maxint):
         """
@@ -149,7 +149,7 @@ class TransitionMap(object):
             map[hi:hi] = [code, map[hi - 1].copy()]
             #self.check() ###
             return hi
- 
+
     def get_special(self, event):
         """
         Get state set for special event, adding a new entry if necessary.
@@ -160,9 +160,9 @@ class TransitionMap(object):
             set = {}
             special[event] = set
         return set
- 
+
     # --------------------- Conversion methods -----------------------
- 
+
     def __str__(self):
         map_strs = []
         map = self.map
@@ -188,15 +188,15 @@ class TransitionMap(object):
             ','.join(map_strs),
             special_strs
         )
- 
+
     # --------------------- Debugging methods -----------------------
- 
+
     def check(self):
         """Check data structure integrity."""
         if not self.map[-3] < self.map[-1]:
             print(self)
             assert 0
- 
+
     def dump(self, file):
         map = self.map
         i = 0
@@ -229,23 +229,23 @@ class TransitionMap(object):
     def dump_char(self, code):
         if 0 <= code <= 255:
             return repr(chr(code))
-        else: 
+        else:
             return "chr(%d)" % code
- 
+
     def dump_trans(self, key, set, file):
         file.write("      %s --> %s\n" % (key, self.dump_set(set)))
- 
+
     def dump_set(self, set):
         return state_set_str(set)
- 
- 
-# 
-#   State set manipulation functions 
-# 
- 
-#def merge_state_sets(set1, set2): 
-#        for state in set2.keys(): 
-#            set1[state] = 1 
- 
-def state_set_str(set): 
+
+
+#
+#   State set manipulation functions
+#
+
+#def merge_state_sets(set1, set2):
+#        for state in set2.keys():
+#            set1[state] = 1
+
+def state_set_str(set):
     return "[%s]" % ','.join(["S%d" % state.number for state in set])
diff --git a/contrib/tools/cython/Cython/Plex/__init__.py b/contrib/tools/cython/Cython/Plex/__init__.py
index d968a43ce9..81a066f782 100644
--- a/contrib/tools/cython/Cython/Plex/__init__.py
+++ b/contrib/tools/cython/Cython/Plex/__init__.py
@@ -1,39 +1,39 @@
-#======================================================================= 
-# 
-#   Python Lexical Analyser 
-# 
-#======================================================================= 
- 
-""" 
-The Plex module provides lexical analysers with similar capabilities 
-to GNU Flex. The following classes and functions are exported; 
-see the attached docstrings for more information. 
- 
-   Scanner          For scanning a character stream under the 
-                    direction of a Lexicon. 
- 
-   Lexicon          For constructing a lexical definition 
-                    to be used by a Scanner. 
- 
-   Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1, 
-   Bol, Eol, Eof, Empty 
- 
-                    Regular expression constructors, for building pattern 
-                    definitions for a Lexicon. 
- 
-   State            For defining scanner states when creating a 
-                    Lexicon. 
- 
-   TEXT, IGNORE, Begin 
- 
-                    Actions for associating with patterns when 
-        creating a Lexicon. 
-""" 
- 
-from __future__ import absolute_import 
- 
-from .Actions import TEXT, IGNORE, Begin 
-from .Lexicons import Lexicon, State 
-from .Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range 
-from .Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase 
-from .Scanners import Scanner 
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#=======================================================================
+
+"""
+The Plex module provides lexical analysers with similar capabilities
+to GNU Flex. The following classes and functions are exported;
+see the attached docstrings for more information.
+
+   Scanner          For scanning a character stream under the
+                    direction of a Lexicon.
+
+   Lexicon          For constructing a lexical definition
+                    to be used by a Scanner.
+
+   Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
+   Bol, Eol, Eof, Empty
+
+                    Regular expression constructors, for building pattern
+                    definitions for a Lexicon.
+
+   State            For defining scanner states when creating a
+                    Lexicon.
+
+   TEXT, IGNORE, Begin
+
+                    Actions for associating with patterns when
+        creating a Lexicon.
+"""
+
+from __future__ import absolute_import
+
+from .Actions import TEXT, IGNORE, Begin
+from .Lexicons import Lexicon, State
+from .Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
+from .Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
+from .Scanners import Scanner
diff --git a/contrib/tools/cython/Cython/Runtime/__init__.py b/contrib/tools/cython/Cython/Runtime/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Runtime/__init__.py
+++ b/contrib/tools/cython/Cython/Runtime/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Runtime/refnanny.pyx b/contrib/tools/cython/Cython/Runtime/refnanny.pyx
index 7342f7c0ac..d4b873fe97 100644
--- a/contrib/tools/cython/Cython/Runtime/refnanny.pyx
+++ b/contrib/tools/cython/Cython/Runtime/refnanny.pyx
@@ -1,194 +1,194 @@
 # cython: language_level=3, auto_pickle=False
 
-from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF, Py_XINCREF 
-from cpython.exc cimport PyErr_Fetch, PyErr_Restore 
-from cpython.pystate cimport PyThreadState_Get 
- 
-cimport cython 
- 
-loglevel = 0 
-reflog = [] 
- 
-cdef log(level, action, obj, lineno): 
-    if loglevel >= level: 
-        reflog.append((lineno, action, id(obj))) 
- 
-LOG_NONE, LOG_ALL = range(2) 
- 
-@cython.final 
-cdef class Context(object): 
-    cdef readonly object name, filename 
-    cdef readonly dict refs 
-    cdef readonly list errors 
-    cdef readonly Py_ssize_t start 
- 
-    def __cinit__(self, name, line=0, filename=None): 
-        self.name = name 
-        self.start = line 
-        self.filename = filename 
-        self.refs = {} # id -> (count, [lineno]) 
-        self.errors = [] 
- 
-    cdef regref(self, obj, lineno, bint is_null): 
-        log(LOG_ALL, u'regref', u"<NULL>" if is_null else obj, lineno) 
-        if is_null: 
+from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF, Py_XINCREF
+from cpython.exc cimport PyErr_Fetch, PyErr_Restore
+from cpython.pystate cimport PyThreadState_Get
+
+cimport cython
+
+loglevel = 0
+reflog = []
+
+cdef log(level, action, obj, lineno):
+    if loglevel >= level:
+        reflog.append((lineno, action, id(obj)))
+
+LOG_NONE, LOG_ALL = range(2)
+
+@cython.final
+cdef class Context(object):
+    cdef readonly object name, filename
+    cdef readonly dict refs
+    cdef readonly list errors
+    cdef readonly Py_ssize_t start
+
+    def __cinit__(self, name, line=0, filename=None):
+        self.name = name
+        self.start = line
+        self.filename = filename
+        self.refs = {} # id -> (count, [lineno])
+        self.errors = []
+
+    cdef regref(self, obj, lineno, bint is_null):
+        log(LOG_ALL, u'regref', u"<NULL>" if is_null else obj, lineno)
+        if is_null:
             self.errors.append(f"NULL argument on line {lineno}")
-            return 
-        id_ = id(obj) 
-        count, linenumbers = self.refs.get(id_, (0, [])) 
-        self.refs[id_] = (count + 1, linenumbers) 
-        linenumbers.append(lineno) 
- 
-    cdef bint delref(self, obj, lineno, bint is_null) except -1: 
-        # returns whether it is ok to do the decref operation 
-        log(LOG_ALL, u'delref', u"<NULL>" if is_null else obj, lineno) 
-        if is_null: 
+            return
+        id_ = id(obj)
+        count, linenumbers = self.refs.get(id_, (0, []))
+        self.refs[id_] = (count + 1, linenumbers)
+        linenumbers.append(lineno)
+
+    cdef bint delref(self, obj, lineno, bint is_null) except -1:
+        # returns whether it is ok to do the decref operation
+        log(LOG_ALL, u'delref', u"<NULL>" if is_null else obj, lineno)
+        if is_null:
             self.errors.append(f"NULL argument on line {lineno}")
-            return False 
-        id_ = id(obj) 
-        count, linenumbers = self.refs.get(id_, (0, [])) 
-        if count == 0: 
+            return False
+        id_ = id(obj)
+        count, linenumbers = self.refs.get(id_, (0, []))
+        if count == 0:
             self.errors.append(f"Too many decrefs on line {lineno}, reference acquired on lines {linenumbers!r}")
-            return False 
-        elif count == 1: 
-            del self.refs[id_] 
-            return True 
-        else: 
-            self.refs[id_] = (count - 1, linenumbers) 
-            return True 
- 
-    cdef end(self): 
-        if self.refs: 
-            msg = u"References leaked:" 
-            for count, linenos in self.refs.itervalues(): 
+            return False
+        elif count == 1:
+            del self.refs[id_]
+            return True
+        else:
+            self.refs[id_] = (count - 1, linenumbers)
+            return True
+
+    cdef end(self):
+        if self.refs:
+            msg = u"References leaked:"
+            for count, linenos in self.refs.itervalues():
                 msg += f"\n  ({count}) acquired on lines: {u', '.join([f'{x}' for x in linenos])}"
-            self.errors.append(msg) 
-        if self.errors: 
-            return u"\n".join([u'REFNANNY: '+error for error in self.errors]) 
-        else: 
-            return None 
- 
-cdef void report_unraisable(object e=None): 
-    try: 
-        if e is None: 
-            import sys 
-            e = sys.exc_info()[1] 
+            self.errors.append(msg)
+        if self.errors:
+            return u"\n".join([u'REFNANNY: '+error for error in self.errors])
+        else:
+            return None
+
+cdef void report_unraisable(object e=None):
+    try:
+        if e is None:
+            import sys
+            e = sys.exc_info()[1]
         print(f"refnanny raised an exception: {e}")
-    except: 
-        pass # We absolutely cannot exit with an exception 
- 
-# All Python operations must happen after any existing 
-# exception has been fetched, in case we are called from 
-# exception-handling code. 
- 
-cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except NULL: 
-    if Context is None: 
-        # Context may be None during finalize phase. 
-        # In that case, we don't want to be doing anything fancy 
-        # like caching and resetting exceptions. 
-        return NULL 
-    cdef (PyObject*) type = NULL, value = NULL, tb = NULL, result = NULL 
-    PyThreadState_Get() 
-    PyErr_Fetch(&type, &value, &tb) 
-    try: 
-        ctx = Context(funcname, lineno, filename) 
-        Py_INCREF(ctx) 
-        result = <PyObject*>ctx 
-    except Exception, e: 
-        report_unraisable(e) 
-    PyErr_Restore(type, value, tb) 
-    return result 
- 
-cdef void GOTREF(PyObject* ctx, PyObject* p_obj, int lineno): 
-    if ctx == NULL: return 
-    cdef (PyObject*) type = NULL, value = NULL, tb = NULL 
-    PyErr_Fetch(&type, &value, &tb) 
-    try: 
-        try: 
-            if p_obj is NULL: 
-                (<Context>ctx).regref(None, lineno, True) 
-            else: 
-                (<Context>ctx).regref(<object>p_obj, lineno, False) 
-        except: 
-            report_unraisable() 
-    except: 
-        # __Pyx_GetException may itself raise errors 
-        pass 
-    PyErr_Restore(type, value, tb) 
- 
-cdef int GIVEREF_and_report(PyObject* ctx, PyObject* p_obj, int lineno): 
-    if ctx == NULL: return 1 
-    cdef (PyObject*) type = NULL, value = NULL, tb = NULL 
-    cdef bint decref_ok = False 
-    PyErr_Fetch(&type, &value, &tb) 
-    try: 
-        try: 
-            if p_obj is NULL: 
-                decref_ok = (<Context>ctx).delref(None, lineno, True) 
-            else: 
-                decref_ok = (<Context>ctx).delref(<object>p_obj, lineno, False) 
-        except: 
-            report_unraisable() 
-    except: 
-        # __Pyx_GetException may itself raise errors 
-        pass 
-    PyErr_Restore(type, value, tb) 
-    return decref_ok 
- 
-cdef void GIVEREF(PyObject* ctx, PyObject* p_obj, int lineno): 
-    GIVEREF_and_report(ctx, p_obj, lineno) 
- 
-cdef void INCREF(PyObject* ctx, PyObject* obj, int lineno): 
-    Py_XINCREF(obj) 
-    PyThreadState_Get() 
-    GOTREF(ctx, obj, lineno) 
- 
-cdef void DECREF(PyObject* ctx, PyObject* obj, int lineno): 
-    if GIVEREF_and_report(ctx, obj, lineno): 
-        Py_XDECREF(obj) 
-    PyThreadState_Get() 
- 
-cdef void FinishContext(PyObject** ctx): 
-    if ctx == NULL or ctx[0] == NULL: return 
-    cdef (PyObject*) type = NULL, value = NULL, tb = NULL 
-    cdef object errors = None 
-    cdef Context context 
-    PyThreadState_Get() 
-    PyErr_Fetch(&type, &value, &tb) 
-    try: 
-        try: 
-            context = <Context>ctx[0] 
-            errors = context.end() 
-            if errors: 
+    except:
+        pass # We absolutely cannot exit with an exception
+
+# All Python operations must happen after any existing
+# exception has been fetched, in case we are called from
+# exception-handling code.
+
+cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except NULL:
+    if Context is None:
+        # Context may be None during finalize phase.
+        # In that case, we don't want to be doing anything fancy
+        # like caching and resetting exceptions.
+        return NULL
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL, result = NULL
+    PyThreadState_Get()
+    PyErr_Fetch(&type, &value, &tb)
+    try:
+        ctx = Context(funcname, lineno, filename)
+        Py_INCREF(ctx)
+        result = <PyObject*>ctx
+    except Exception, e:
+        report_unraisable(e)
+    PyErr_Restore(type, value, tb)
+    return result
+
+cdef void GOTREF(PyObject* ctx, PyObject* p_obj, int lineno):
+    if ctx == NULL: return
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL
+    PyErr_Fetch(&type, &value, &tb)
+    try:
+        try:
+            if p_obj is NULL:
+                (<Context>ctx).regref(None, lineno, True)
+            else:
+                (<Context>ctx).regref(<object>p_obj, lineno, False)
+        except:
+            report_unraisable()
+    except:
+        # __Pyx_GetException may itself raise errors
+        pass
+    PyErr_Restore(type, value, tb)
+
+cdef int GIVEREF_and_report(PyObject* ctx, PyObject* p_obj, int lineno):
+    if ctx == NULL: return 1
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL
+    cdef bint decref_ok = False
+    PyErr_Fetch(&type, &value, &tb)
+    try:
+        try:
+            if p_obj is NULL:
+                decref_ok = (<Context>ctx).delref(None, lineno, True)
+            else:
+                decref_ok = (<Context>ctx).delref(<object>p_obj, lineno, False)
+        except:
+            report_unraisable()
+    except:
+        # __Pyx_GetException may itself raise errors
+        pass
+    PyErr_Restore(type, value, tb)
+    return decref_ok
+
+cdef void GIVEREF(PyObject* ctx, PyObject* p_obj, int lineno):
+    GIVEREF_and_report(ctx, p_obj, lineno)
+
+cdef void INCREF(PyObject* ctx, PyObject* obj, int lineno):
+    Py_XINCREF(obj)
+    PyThreadState_Get()
+    GOTREF(ctx, obj, lineno)
+
+cdef void DECREF(PyObject* ctx, PyObject* obj, int lineno):
+    if GIVEREF_and_report(ctx, obj, lineno):
+        Py_XDECREF(obj)
+    PyThreadState_Get()
+
+cdef void FinishContext(PyObject** ctx):
+    if ctx == NULL or ctx[0] == NULL: return
+    cdef (PyObject*) type = NULL, value = NULL, tb = NULL
+    cdef object errors = None
+    cdef Context context
+    PyThreadState_Get()
+    PyErr_Fetch(&type, &value, &tb)
+    try:
+        try:
+            context = <Context>ctx[0]
+            errors = context.end()
+            if errors:
                 print(f"{context.filename.decode('latin1')}: {context.name.decode('latin1')}()")
                 print(errors)
-            context = None 
-        except: 
-            report_unraisable() 
-    except: 
-        # __Pyx_GetException may itself raise errors 
-        pass 
-    Py_XDECREF(ctx[0]) 
-    ctx[0] = NULL 
-    PyErr_Restore(type, value, tb) 
- 
-ctypedef struct RefNannyAPIStruct: 
-  void (*INCREF)(PyObject*, PyObject*, int) 
-  void (*DECREF)(PyObject*, PyObject*, int) 
-  void (*GOTREF)(PyObject*, PyObject*, int) 
-  void (*GIVEREF)(PyObject*, PyObject*, int) 
-  PyObject* (*SetupContext)(char*, int, char*) except NULL 
-  void (*FinishContext)(PyObject**) 
- 
-cdef RefNannyAPIStruct api 
-api.INCREF = INCREF 
-api.DECREF =  DECREF 
-api.GOTREF =  GOTREF 
-api.GIVEREF = GIVEREF 
-api.SetupContext = SetupContext 
-api.FinishContext = FinishContext 
- 
-cdef extern from "Python.h": 
-    object PyLong_FromVoidPtr(void*) 
- 
-RefNannyAPI = PyLong_FromVoidPtr(<void*>&api) 
+            context = None
+        except:
+            report_unraisable()
+    except:
+        # __Pyx_GetException may itself raise errors
+        pass
+    Py_XDECREF(ctx[0])
+    ctx[0] = NULL
+    PyErr_Restore(type, value, tb)
+
+ctypedef struct RefNannyAPIStruct:
+  void (*INCREF)(PyObject*, PyObject*, int)
+  void (*DECREF)(PyObject*, PyObject*, int)
+  void (*GOTREF)(PyObject*, PyObject*, int)
+  void (*GIVEREF)(PyObject*, PyObject*, int)
+  PyObject* (*SetupContext)(char*, int, char*) except NULL
+  void (*FinishContext)(PyObject**)
+
+cdef RefNannyAPIStruct api
+api.INCREF = INCREF
+api.DECREF =  DECREF
+api.GOTREF =  GOTREF
+api.GIVEREF = GIVEREF
+api.SetupContext = SetupContext
+api.FinishContext = FinishContext
+
+cdef extern from "Python.h":
+    object PyLong_FromVoidPtr(void*)
+
+RefNannyAPI = PyLong_FromVoidPtr(<void*>&api)
diff --git a/contrib/tools/cython/Cython/Shadow.py b/contrib/tools/cython/Cython/Shadow.py
index eeb7d56401..e7b9e4f612 100644
--- a/contrib/tools/cython/Cython/Shadow.py
+++ b/contrib/tools/cython/Cython/Shadow.py
@@ -1,123 +1,123 @@
-# cython.* namespace for pure mode. 
+# cython.* namespace for pure mode.
 from __future__ import absolute_import
- 
+
 __version__ = "0.29.27"
- 
+
 try:
     from __builtin__ import basestring
 except ImportError:
     basestring = str
 
 
-# BEGIN shameless copy from Cython/minivect/minitypes.py 
- 
-class _ArrayType(object): 
- 
-    is_array = True 
-    subtypes = ['dtype'] 
- 
-    def __init__(self, dtype, ndim, is_c_contig=False, is_f_contig=False, 
-                 inner_contig=False, broadcasting=None): 
-        self.dtype = dtype 
-        self.ndim = ndim 
-        self.is_c_contig = is_c_contig 
-        self.is_f_contig = is_f_contig 
-        self.inner_contig = inner_contig or is_c_contig or is_f_contig 
-        self.broadcasting = broadcasting 
- 
-    def __repr__(self): 
-        axes = [":"] * self.ndim 
-        if self.is_c_contig: 
-            axes[-1] = "::1" 
-        elif self.is_f_contig: 
-            axes[0] = "::1" 
- 
-        return "%s[%s]" % (self.dtype, ", ".join(axes)) 
- 
- 
-def index_type(base_type, item): 
-    """ 
-    Support array type creation by slicing, e.g. double[:, :] specifies 
-    a 2D strided array of doubles. The syntax is the same as for 
-    Cython memoryviews. 
-    """ 
-    class InvalidTypeSpecification(Exception): 
-        pass 
- 
-    def verify_slice(s): 
-        if s.start or s.stop or s.step not in (None, 1): 
-            raise InvalidTypeSpecification( 
-                "Only a step of 1 may be provided to indicate C or " 
-                "Fortran contiguity") 
- 
-    if isinstance(item, tuple): 
-        step_idx = None 
-        for idx, s in enumerate(item): 
-            verify_slice(s) 
-            if s.step and (step_idx or idx not in (0, len(item) - 1)): 
-                raise InvalidTypeSpecification( 
-                    "Step may only be provided once, and only in the " 
-                    "first or last dimension.") 
- 
-            if s.step == 1: 
-                step_idx = idx 
- 
-        return _ArrayType(base_type, len(item), 
-                          is_c_contig=step_idx == len(item) - 1, 
-                          is_f_contig=step_idx == 0) 
+# BEGIN shameless copy from Cython/minivect/minitypes.py
+
+class _ArrayType(object):
+
+    is_array = True
+    subtypes = ['dtype']
+
+    def __init__(self, dtype, ndim, is_c_contig=False, is_f_contig=False,
+                 inner_contig=False, broadcasting=None):
+        self.dtype = dtype
+        self.ndim = ndim
+        self.is_c_contig = is_c_contig
+        self.is_f_contig = is_f_contig
+        self.inner_contig = inner_contig or is_c_contig or is_f_contig
+        self.broadcasting = broadcasting
+
+    def __repr__(self):
+        axes = [":"] * self.ndim
+        if self.is_c_contig:
+            axes[-1] = "::1"
+        elif self.is_f_contig:
+            axes[0] = "::1"
+
+        return "%s[%s]" % (self.dtype, ", ".join(axes))
+
+
+def index_type(base_type, item):
+    """
+    Support array type creation by slicing, e.g. double[:, :] specifies
+    a 2D strided array of doubles. The syntax is the same as for
+    Cython memoryviews.
+    """
+    class InvalidTypeSpecification(Exception):
+        pass
+
+    def verify_slice(s):
+        if s.start or s.stop or s.step not in (None, 1):
+            raise InvalidTypeSpecification(
+                "Only a step of 1 may be provided to indicate C or "
+                "Fortran contiguity")
+
+    if isinstance(item, tuple):
+        step_idx = None
+        for idx, s in enumerate(item):
+            verify_slice(s)
+            if s.step and (step_idx or idx not in (0, len(item) - 1)):
+                raise InvalidTypeSpecification(
+                    "Step may only be provided once, and only in the "
+                    "first or last dimension.")
+
+            if s.step == 1:
+                step_idx = idx
+
+        return _ArrayType(base_type, len(item),
+                          is_c_contig=step_idx == len(item) - 1,
+                          is_f_contig=step_idx == 0)
     elif isinstance(item, slice):
-        verify_slice(item) 
-        return _ArrayType(base_type, 1, is_c_contig=bool(item.step)) 
+        verify_slice(item)
+        return _ArrayType(base_type, 1, is_c_contig=bool(item.step))
     else:
         # int[8] etc.
         assert int(item) == item  # array size must be a plain integer
         array(base_type, item)
- 
-# END shameless copy 
- 
- 
-compiled = False 
- 
-_Unspecified = object() 
- 
-# Function decorators 
- 
-def _empty_decorator(x): 
-    return x 
- 
-def locals(**arg_types): 
-    return _empty_decorator 
- 
-def test_assert_path_exists(*paths): 
-    return _empty_decorator 
- 
-def test_fail_if_path_exists(*paths): 
-    return _empty_decorator 
- 
-class _EmptyDecoratorAndManager(object): 
-    def __call__(self, x): 
-        return x 
-    def __enter__(self): 
-        pass 
-    def __exit__(self, exc_type, exc_value, traceback): 
-        pass 
- 
+
+# END shameless copy
+
+
+compiled = False
+
+_Unspecified = object()
+
+# Function decorators
+
+def _empty_decorator(x):
+    return x
+
+def locals(**arg_types):
+    return _empty_decorator
+
+def test_assert_path_exists(*paths):
+    return _empty_decorator
+
+def test_fail_if_path_exists(*paths):
+    return _empty_decorator
+
+class _EmptyDecoratorAndManager(object):
+    def __call__(self, x):
+        return x
+    def __enter__(self):
+        pass
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
 class _Optimization(object):
     pass
 
-cclass = ccall = cfunc = _EmptyDecoratorAndManager() 
- 
+cclass = ccall = cfunc = _EmptyDecoratorAndManager()
+
 returns = wraparound = boundscheck = initializedcheck = nonecheck = \
     embedsignature = cdivision = cdivision_warnings = \
     always_allows_keywords = profile = linetrace = infer_types = \
     unraisable_tracebacks = freelist = \
         lambda _: _EmptyDecoratorAndManager()
- 
+
 exceptval = lambda _=None, check=True: _EmptyDecoratorAndManager()
 
 overflowcheck = lambda _: _EmptyDecoratorAndManager()
 optimization = _Optimization()
- 
+
 overflowcheck.fold = optimization.use_switch = \
     optimization.unpack_method_calls = lambda arg: _EmptyDecoratorAndManager()
 
@@ -127,7 +127,7 @@ binding = lambda _: _empty_decorator
 
 
 _cython_inline = None
-def inline(f, *args, **kwds): 
+def inline(f, *args, **kwds):
     if isinstance(f, basestring):
         global _cython_inline
         if _cython_inline is None:
@@ -136,60 +136,60 @@ def inline(f, *args, **kwds):
     else:
         assert len(args) == len(kwds) == 0
         return f
- 
-
-def compile(f): 
-    from Cython.Build.Inline import RuntimeCompiledFunction 
-    return RuntimeCompiledFunction(f) 
- 
-
-# Special functions 
- 
-def cdiv(a, b): 
-    q = a / b 
-    if q < 0: 
-        q += 1 
+
+
+def compile(f):
+    from Cython.Build.Inline import RuntimeCompiledFunction
+    return RuntimeCompiledFunction(f)
+
+
+# Special functions
+
+def cdiv(a, b):
+    q = a / b
+    if q < 0:
+        q += 1
     return q
- 
-def cmod(a, b): 
-    r = a % b 
-    if (a*b) < 0: 
-        r -= b 
-    return r 
- 
- 
-# Emulated language constructs 
- 
+
+def cmod(a, b):
+    r = a % b
+    if (a*b) < 0:
+        r -= b
+    return r
+
+
+# Emulated language constructs
+
 def cast(type, *args, **kwargs):
     kwargs.pop('typecheck', None)
     assert not kwargs
-    if hasattr(type, '__call__'): 
-        return type(*args) 
-    else: 
-        return args[0] 
- 
-def sizeof(arg): 
-    return 1 
- 
-def typeof(arg): 
-    return arg.__class__.__name__ 
-    # return type(arg) 
- 
-def address(arg): 
-    return pointer(type(arg))([arg]) 
- 
-def declare(type=None, value=_Unspecified, **kwds): 
-    if type not in (None, object) and hasattr(type, '__call__'): 
-        if value is not _Unspecified: 
-            return type(value) 
-        else: 
-            return type() 
-    else: 
-        return value 
- 
-class _nogil(object): 
+    if hasattr(type, '__call__'):
+        return type(*args)
+    else:
+        return args[0]
+
+def sizeof(arg):
+    return 1
+
+def typeof(arg):
+    return arg.__class__.__name__
+    # return type(arg)
+
+def address(arg):
+    return pointer(type(arg))([arg])
+
+def declare(type=None, value=_Unspecified, **kwds):
+    if type not in (None, object) and hasattr(type, '__call__'):
+        if value is not _Unspecified:
+            return type(value)
+        else:
+            return type()
+    else:
+        return value
+
+class _nogil(object):
     """Support for 'with nogil' statement and @nogil decorator.
-    """ 
+    """
     def __call__(self, x):
         if callable(x):
             # Used as function decorator => return the function unchanged.
@@ -197,217 +197,217 @@ class _nogil(object):
         # Used as conditional context manager or to create an "@nogil(True/False)" decorator => keep going.
         return self
 
-    def __enter__(self): 
-        pass 
-    def __exit__(self, exc_class, exc, tb): 
-        return exc_class is None 
- 
-nogil = _nogil() 
-gil = _nogil() 
-del _nogil 
- 
-
-# Emulated types 
- 
-class CythonMetaType(type): 
- 
-    def __getitem__(type, ix): 
-        return array(type, ix) 
- 
-CythonTypeObject = CythonMetaType('CythonTypeObject', (object,), {}) 
- 
-class CythonType(CythonTypeObject): 
- 
-    def _pointer(self, n=1): 
-        for i in range(n): 
-            self = pointer(self) 
-        return self 
- 
-class PointerType(CythonType): 
- 
-    def __init__(self, value=None): 
-        if isinstance(value, (ArrayType, PointerType)): 
-            self._items = [cast(self._basetype, a) for a in value._items] 
-        elif isinstance(value, list): 
-            self._items = [cast(self._basetype, a) for a in value] 
-        elif value is None or value == 0: 
-            self._items = [] 
-        else: 
-            raise ValueError 
- 
-    def __getitem__(self, ix): 
-        if ix < 0: 
-            raise IndexError("negative indexing not allowed in C") 
-        return self._items[ix] 
- 
-    def __setitem__(self, ix, value): 
-        if ix < 0: 
-            raise IndexError("negative indexing not allowed in C") 
-        self._items[ix] = cast(self._basetype, value) 
- 
-    def __eq__(self, value): 
-        if value is None and not self._items: 
-            return True 
-        elif type(self) != type(value): 
-            return False 
-        else: 
-            return not self._items and not value._items 
- 
-    def __repr__(self): 
-        return "%s *" % (self._basetype,) 
- 
-class ArrayType(PointerType): 
- 
-    def __init__(self): 
-        self._items = [None] * self._n 
- 
- 
-class StructType(CythonType): 
- 
-    def __init__(self, cast_from=_Unspecified, **data): 
-        if cast_from is not _Unspecified: 
-            # do cast 
-            if len(data) > 0: 
-                raise ValueError('Cannot accept keyword arguments when casting.') 
-            if type(cast_from) is not type(self): 
-                raise ValueError('Cannot cast from %s'%cast_from) 
-            for key, value in cast_from.__dict__.items(): 
-                setattr(self, key, value) 
-        else: 
+    def __enter__(self):
+        pass
+    def __exit__(self, exc_class, exc, tb):
+        return exc_class is None
+
+nogil = _nogil()
+gil = _nogil()
+del _nogil
+
+
+# Emulated types
+
+class CythonMetaType(type):
+
+    def __getitem__(type, ix):
+        return array(type, ix)
+
+CythonTypeObject = CythonMetaType('CythonTypeObject', (object,), {})
+
+class CythonType(CythonTypeObject):
+
+    def _pointer(self, n=1):
+        for i in range(n):
+            self = pointer(self)
+        return self
+
+class PointerType(CythonType):
+
+    def __init__(self, value=None):
+        if isinstance(value, (ArrayType, PointerType)):
+            self._items = [cast(self._basetype, a) for a in value._items]
+        elif isinstance(value, list):
+            self._items = [cast(self._basetype, a) for a in value]
+        elif value is None or value == 0:
+            self._items = []
+        else:
+            raise ValueError
+
+    def __getitem__(self, ix):
+        if ix < 0:
+            raise IndexError("negative indexing not allowed in C")
+        return self._items[ix]
+
+    def __setitem__(self, ix, value):
+        if ix < 0:
+            raise IndexError("negative indexing not allowed in C")
+        self._items[ix] = cast(self._basetype, value)
+
+    def __eq__(self, value):
+        if value is None and not self._items:
+            return True
+        elif type(self) != type(value):
+            return False
+        else:
+            return not self._items and not value._items
+
+    def __repr__(self):
+        return "%s *" % (self._basetype,)
+
+class ArrayType(PointerType):
+
+    def __init__(self):
+        self._items = [None] * self._n
+
+
+class StructType(CythonType):
+
+    def __init__(self, cast_from=_Unspecified, **data):
+        if cast_from is not _Unspecified:
+            # do cast
+            if len(data) > 0:
+                raise ValueError('Cannot accept keyword arguments when casting.')
+            if type(cast_from) is not type(self):
+                raise ValueError('Cannot cast from %s'%cast_from)
+            for key, value in cast_from.__dict__.items():
+                setattr(self, key, value)
+        else:
             for key, value in data.items():
-                setattr(self, key, value) 
- 
-    def __setattr__(self, key, value): 
-        if key in self._members: 
-            self.__dict__[key] = cast(self._members[key], value) 
-        else: 
-            raise AttributeError("Struct has no member '%s'" % key) 
- 
- 
-class UnionType(CythonType): 
- 
-    def __init__(self, cast_from=_Unspecified, **data): 
-        if cast_from is not _Unspecified: 
-            # do type cast 
-            if len(data) > 0: 
-                raise ValueError('Cannot accept keyword arguments when casting.') 
-            if isinstance(cast_from, dict): 
-                datadict = cast_from 
-            elif type(cast_from) is type(self): 
-                datadict = cast_from.__dict__ 
-            else: 
-                raise ValueError('Cannot cast from %s'%cast_from) 
-        else: 
-            datadict = data 
-        if len(datadict) > 1: 
-            raise AttributeError("Union can only store one field at a time.") 
+                setattr(self, key, value)
+
+    def __setattr__(self, key, value):
+        if key in self._members:
+            self.__dict__[key] = cast(self._members[key], value)
+        else:
+            raise AttributeError("Struct has no member '%s'" % key)
+
+
+class UnionType(CythonType):
+
+    def __init__(self, cast_from=_Unspecified, **data):
+        if cast_from is not _Unspecified:
+            # do type cast
+            if len(data) > 0:
+                raise ValueError('Cannot accept keyword arguments when casting.')
+            if isinstance(cast_from, dict):
+                datadict = cast_from
+            elif type(cast_from) is type(self):
+                datadict = cast_from.__dict__
+            else:
+                raise ValueError('Cannot cast from %s'%cast_from)
+        else:
+            datadict = data
+        if len(datadict) > 1:
+            raise AttributeError("Union can only store one field at a time.")
         for key, value in datadict.items():
-            setattr(self, key, value) 
- 
-    def __setattr__(self, key, value): 
-        if key in '__dict__': 
-            CythonType.__setattr__(self, key, value) 
-        elif key in self._members: 
-            self.__dict__ = {key: cast(self._members[key], value)} 
-        else: 
-            raise AttributeError("Union has no member '%s'" % key) 
- 
-def pointer(basetype): 
-    class PointerInstance(PointerType): 
-        _basetype = basetype 
-    return PointerInstance 
- 
-def array(basetype, n): 
-    class ArrayInstance(ArrayType): 
-        _basetype = basetype 
-        _n = n 
-    return ArrayInstance 
- 
-def struct(**members): 
-    class StructInstance(StructType): 
-        _members = members 
-    for key in members: 
-        setattr(StructInstance, key, None) 
-    return StructInstance 
- 
-def union(**members): 
-    class UnionInstance(UnionType): 
-        _members = members 
-    for key in members: 
-        setattr(UnionInstance, key, None) 
-    return UnionInstance 
- 
-class typedef(CythonType): 
- 
-    def __init__(self, type, name=None): 
-        self._basetype = type 
-        self.name = name 
- 
-    def __call__(self, *arg): 
-        value = cast(self._basetype, *arg) 
-        return value 
- 
-    def __repr__(self): 
-        return self.name or str(self._basetype) 
- 
-    __getitem__ = index_type 
- 
-class _FusedType(CythonType): 
-    pass 
- 
- 
-def fused_type(*args): 
-    if not args: 
-        raise TypeError("Expected at least one type as argument") 
- 
-    # Find the numeric type with biggest rank if all types are numeric 
-    rank = -1 
-    for type in args: 
-        if type not in (py_int, py_long, py_float, py_complex): 
-            break 
- 
-        if type_ordering.index(type) > rank: 
-            result_type = type 
-    else: 
-        return result_type 
- 
-    # Not a simple numeric type, return a fused type instance. The result 
-    # isn't really meant to be used, as we can't keep track of the context in 
-    # pure-mode. Casting won't do anything in this case. 
-    return _FusedType() 
- 
- 
-def _specialized_from_args(signatures, args, kwargs): 
-    "Perhaps this should be implemented in a TreeFragment in Cython code" 
-    raise Exception("yet to be implemented") 
- 
- 
-py_int = typedef(int, "int") 
-try: 
-    py_long = typedef(long, "long") 
+            setattr(self, key, value)
+
+    def __setattr__(self, key, value):
+        if key in '__dict__':
+            CythonType.__setattr__(self, key, value)
+        elif key in self._members:
+            self.__dict__ = {key: cast(self._members[key], value)}
+        else:
+            raise AttributeError("Union has no member '%s'" % key)
+
+def pointer(basetype):
+    class PointerInstance(PointerType):
+        _basetype = basetype
+    return PointerInstance
+
+def array(basetype, n):
+    class ArrayInstance(ArrayType):
+        _basetype = basetype
+        _n = n
+    return ArrayInstance
+
+def struct(**members):
+    class StructInstance(StructType):
+        _members = members
+    for key in members:
+        setattr(StructInstance, key, None)
+    return StructInstance
+
+def union(**members):
+    class UnionInstance(UnionType):
+        _members = members
+    for key in members:
+        setattr(UnionInstance, key, None)
+    return UnionInstance
+
+class typedef(CythonType):
+
+    def __init__(self, type, name=None):
+        self._basetype = type
+        self.name = name
+
+    def __call__(self, *arg):
+        value = cast(self._basetype, *arg)
+        return value
+
+    def __repr__(self):
+        return self.name or str(self._basetype)
+
+    __getitem__ = index_type
+
+class _FusedType(CythonType):
+    pass
+
+
+def fused_type(*args):
+    if not args:
+        raise TypeError("Expected at least one type as argument")
+
+    # Find the numeric type with biggest rank if all types are numeric
+    rank = -1
+    for type in args:
+        if type not in (py_int, py_long, py_float, py_complex):
+            break
+
+        if type_ordering.index(type) > rank:
+            result_type = type
+    else:
+        return result_type
+
+    # Not a simple numeric type, return a fused type instance. The result
+    # isn't really meant to be used, as we can't keep track of the context in
+    # pure-mode. Casting won't do anything in this case.
+    return _FusedType()
+
+
+def _specialized_from_args(signatures, args, kwargs):
+    "Perhaps this should be implemented in a TreeFragment in Cython code"
+    raise Exception("yet to be implemented")
+
+
+py_int = typedef(int, "int")
+try:
+    py_long = typedef(long, "long")
 except NameError:  # Py3
-    py_long = typedef(int, "long") 
-py_float = typedef(float, "float") 
-py_complex = typedef(complex, "double complex") 
- 
- 
-# Predefined types 
- 
-int_types = ['char', 'short', 'Py_UNICODE', 'int', 'Py_UCS4', 'long', 'longlong', 'Py_ssize_t', 'size_t'] 
-float_types = ['longdouble', 'double', 'float'] 
-complex_types = ['longdoublecomplex', 'doublecomplex', 'floatcomplex', 'complex'] 
+    py_long = typedef(int, "long")
+py_float = typedef(float, "float")
+py_complex = typedef(complex, "double complex")
+
+
+# Predefined types
+
+int_types = ['char', 'short', 'Py_UNICODE', 'int', 'Py_UCS4', 'long', 'longlong', 'Py_ssize_t', 'size_t']
+float_types = ['longdouble', 'double', 'float']
+complex_types = ['longdoublecomplex', 'doublecomplex', 'floatcomplex', 'complex']
 other_types = ['bint', 'void', 'Py_tss_t']
- 
-to_repr = { 
-    'longlong': 'long long', 
-    'longdouble': 'long double', 
-    'longdoublecomplex': 'long double complex', 
-    'doublecomplex': 'double complex', 
-    'floatcomplex': 'float complex', 
-}.get 
- 
-gs = globals() 
- 
+
+to_repr = {
+    'longlong': 'long long',
+    'longdouble': 'long double',
+    'longdoublecomplex': 'long double complex',
+    'doublecomplex': 'double complex',
+    'floatcomplex': 'float complex',
+}.get
+
+gs = globals()
+
 # note: cannot simply name the unicode type here as 2to3 gets in the way and replaces it by str
 try:
     import __builtin__ as builtins
@@ -417,58 +417,58 @@ except ImportError:  # Py3
 gs['unicode'] = typedef(getattr(builtins, 'unicode', str), 'unicode')
 del builtins
 
-for name in int_types: 
-    reprname = to_repr(name, name) 
-    gs[name] = typedef(py_int, reprname) 
-    if name not in ('Py_UNICODE', 'Py_UCS4') and not name.endswith('size_t'): 
-        gs['u'+name] = typedef(py_int, "unsigned " + reprname) 
-        gs['s'+name] = typedef(py_int, "signed " + reprname) 
- 
-for name in float_types: 
-    gs[name] = typedef(py_float, to_repr(name, name)) 
- 
-for name in complex_types: 
-    gs[name] = typedef(py_complex, to_repr(name, name)) 
- 
-bint = typedef(bool, "bint") 
+for name in int_types:
+    reprname = to_repr(name, name)
+    gs[name] = typedef(py_int, reprname)
+    if name not in ('Py_UNICODE', 'Py_UCS4') and not name.endswith('size_t'):
+        gs['u'+name] = typedef(py_int, "unsigned " + reprname)
+        gs['s'+name] = typedef(py_int, "signed " + reprname)
+
+for name in float_types:
+    gs[name] = typedef(py_float, to_repr(name, name))
+
+for name in complex_types:
+    gs[name] = typedef(py_complex, to_repr(name, name))
+
+bint = typedef(bool, "bint")
 void = typedef(None, "void")
 Py_tss_t = typedef(None, "Py_tss_t")
- 
-for t in int_types + float_types + complex_types + other_types: 
-    for i in range(1, 4): 
+
+for t in int_types + float_types + complex_types + other_types:
+    for i in range(1, 4):
         gs["%s_%s" % ('p'*i, t)] = gs[t]._pointer(i)
- 
+
 NULL = gs['p_void'](0)
- 
+
 # looks like 'gs' has some users out there by now...
 #del gs
 
-integral = floating = numeric = _FusedType() 
- 
-type_ordering = [py_int, py_long, py_float, py_complex] 
- 
-class CythonDotParallel(object): 
-    """ 
-    The cython.parallel module. 
-    """ 
- 
-    __all__ = ['parallel', 'prange', 'threadid'] 
- 
-    def parallel(self, num_threads=None): 
-        return nogil 
- 
+integral = floating = numeric = _FusedType()
+
+type_ordering = [py_int, py_long, py_float, py_complex]
+
+class CythonDotParallel(object):
+    """
+    The cython.parallel module.
+    """
+
+    __all__ = ['parallel', 'prange', 'threadid']
+
+    def parallel(self, num_threads=None):
+        return nogil
+
     def prange(self, start=0, stop=None, step=1, nogil=False, schedule=None, chunksize=None, num_threads=None):
-        if stop is None: 
-            stop = start 
-            start = 0 
-        return range(start, stop, step) 
- 
-    def threadid(self): 
-        return 0 
- 
-    # def threadsavailable(self): 
-        # return 1 
- 
-import sys 
-sys.modules['cython.parallel'] = CythonDotParallel() 
-del sys 
+        if stop is None:
+            stop = start
+            start = 0
+        return range(start, stop, step)
+
+    def threadid(self):
+        return 0
+
+    # def threadsavailable(self):
+        # return 1
+
+import sys
+sys.modules['cython.parallel'] = CythonDotParallel()
+del sys
diff --git a/contrib/tools/cython/Cython/StringIOTree.py b/contrib/tools/cython/Cython/StringIOTree.py
index 097bbc10ec..d8239efeda 100644
--- a/contrib/tools/cython/Cython/StringIOTree.py
+++ b/contrib/tools/cython/Cython/StringIOTree.py
@@ -39,70 +39,70 @@ try:
     from cStringIO import StringIO
 except ImportError:
     from io import StringIO
- 
-
-class StringIOTree(object): 
-    """ 
-    See module docs. 
-    """ 
- 
-    def __init__(self, stream=None): 
-        self.prepended_children = [] 
-        if stream is None: 
-            stream = StringIO() 
-        self.stream = stream 
-        self.write = stream.write 
-        self.markers = [] 
- 
-    def getvalue(self): 
-        content = [x.getvalue() for x in self.prepended_children] 
-        content.append(self.stream.getvalue()) 
-        return "".join(content) 
- 
-    def copyto(self, target): 
-        """Potentially cheaper than getvalue as no string concatenation 
-        needs to happen.""" 
-        for child in self.prepended_children: 
-            child.copyto(target) 
-        stream_content = self.stream.getvalue() 
-        if stream_content: 
-            target.write(stream_content) 
- 
-    def commit(self): 
-        # Save what we have written until now so that the buffer 
-        # itself is empty -- this makes it ready for insertion 
-        if self.stream.tell(): 
-            self.prepended_children.append(StringIOTree(self.stream)) 
-            self.prepended_children[-1].markers = self.markers 
-            self.markers = [] 
-            self.stream = StringIO() 
-            self.write = self.stream.write 
- 
-    def insert(self, iotree): 
-        """ 
-        Insert a StringIOTree (and all of its contents) at this location. 
-        Further writing to self appears after what is inserted. 
-        """ 
-        self.commit() 
-        self.prepended_children.append(iotree) 
- 
-    def insertion_point(self): 
-        """ 
-        Returns a new StringIOTree, which is left behind at the current position 
-        (it what is written to the result will appear right before whatever is 
-        next written to self). 
- 
-        Calling getvalue() or copyto() on the result will only return the 
-        contents written to it. 
-        """ 
-        # Save what we have written until now 
-        # This is so that getvalue on the result doesn't include it. 
-        self.commit() 
-        # Construct the new forked object to return 
-        other = StringIOTree() 
-        self.prepended_children.append(other) 
-        return other 
- 
-    def allmarkers(self): 
-        children = self.prepended_children 
-        return [m for c in children for m in c.allmarkers()] + self.markers 
+
+
+class StringIOTree(object):
+    """
+    See module docs.
+    """
+
+    def __init__(self, stream=None):
+        self.prepended_children = []
+        if stream is None:
+            stream = StringIO()
+        self.stream = stream
+        self.write = stream.write
+        self.markers = []
+
+    def getvalue(self):
+        content = [x.getvalue() for x in self.prepended_children]
+        content.append(self.stream.getvalue())
+        return "".join(content)
+
+    def copyto(self, target):
+        """Potentially cheaper than getvalue as no string concatenation
+        needs to happen."""
+        for child in self.prepended_children:
+            child.copyto(target)
+        stream_content = self.stream.getvalue()
+        if stream_content:
+            target.write(stream_content)
+
+    def commit(self):
+        # Save what we have written until now so that the buffer
+        # itself is empty -- this makes it ready for insertion
+        if self.stream.tell():
+            self.prepended_children.append(StringIOTree(self.stream))
+            self.prepended_children[-1].markers = self.markers
+            self.markers = []
+            self.stream = StringIO()
+            self.write = self.stream.write
+
+    def insert(self, iotree):
+        """
+        Insert a StringIOTree (and all of its contents) at this location.
+        Further writing to self appears after what is inserted.
+        """
+        self.commit()
+        self.prepended_children.append(iotree)
+
+    def insertion_point(self):
+        """
+        Returns a new StringIOTree, which is left behind at the current position
+        (it what is written to the result will appear right before whatever is
+        next written to self).
+
+        Calling getvalue() or copyto() on the result will only return the
+        contents written to it.
+        """
+        # Save what we have written until now
+        # This is so that getvalue on the result doesn't include it.
+        self.commit()
+        # Construct the new forked object to return
+        other = StringIOTree()
+        self.prepended_children.append(other)
+        return other
+
+    def allmarkers(self):
+        children = self.prepended_children
+        return [m for c in children for m in c.allmarkers()] + self.markers
diff --git a/contrib/tools/cython/Cython/Tempita/__init__.py b/contrib/tools/cython/Cython/Tempita/__init__.py
index e013b48049..41a0ce3d0e 100644
--- a/contrib/tools/cython/Cython/Tempita/__init__.py
+++ b/contrib/tools/cython/Cython/Tempita/__init__.py
@@ -1,4 +1,4 @@
-# The original Tempita implements all of its templating code here. 
-# Moved it to _tempita.py to make the compilation portable. 
- 
+# The original Tempita implements all of its templating code here.
+# Moved it to _tempita.py to make the compilation portable.
+
 from ._tempita import *
diff --git a/contrib/tools/cython/Cython/Tempita/_looper.py b/contrib/tools/cython/Cython/Tempita/_looper.py
index b0e3266943..4010988300 100644
--- a/contrib/tools/cython/Cython/Tempita/_looper.py
+++ b/contrib/tools/cython/Cython/Tempita/_looper.py
@@ -1,163 +1,163 @@
-""" 
-Helper for looping over sequences, particular in templates. 
- 
-Often in a loop in a template it's handy to know what's next up, 
-previously up, if this is the first or last item in the sequence, etc. 
-These can be awkward to manage in a normal Python loop, but using the 
-looper you can get a better sense of the context.  Use like:: 
- 
-    >>> for loop, item in looper(['a', 'b', 'c']): 
-    ...     print loop.number, item 
-    ...     if not loop.last: 
-    ...         print '---' 
-    1 a 
-    --- 
-    2 b 
-    --- 
-    3 c 
- 
-""" 
- 
-import sys 
-from Cython.Tempita.compat3 import basestring_ 
- 
-__all__ = ['looper'] 
- 
- 
-class looper(object): 
-    """ 
-    Helper for looping (particularly in templates) 
- 
-    Use this like:: 
- 
-        for loop, item in looper(seq): 
-            if loop.first: 
-                ... 
-    """ 
- 
-    def __init__(self, seq): 
-        self.seq = seq 
- 
-    def __iter__(self): 
-        return looper_iter(self.seq) 
- 
-    def __repr__(self): 
-        return '<%s for %r>' % ( 
-            self.__class__.__name__, self.seq) 
- 
- 
-class looper_iter(object): 
- 
-    def __init__(self, seq): 
-        self.seq = list(seq) 
-        self.pos = 0 
- 
-    def __iter__(self): 
-        return self 
- 
-    def __next__(self): 
-        if self.pos >= len(self.seq): 
-            raise StopIteration 
-        result = loop_pos(self.seq, self.pos), self.seq[self.pos] 
-        self.pos += 1 
-        return result 
- 
-    if sys.version < "3": 
-        next = __next__ 
- 
- 
-class loop_pos(object): 
- 
-    def __init__(self, seq, pos): 
-        self.seq = seq 
-        self.pos = pos 
- 
-    def __repr__(self): 
-        return '<loop pos=%r at %r>' % ( 
-            self.seq[self.pos], self.pos) 
- 
-    def index(self): 
-        return self.pos 
-    index = property(index) 
- 
-    def number(self): 
-        return self.pos + 1 
-    number = property(number) 
- 
-    def item(self): 
-        return self.seq[self.pos] 
-    item = property(item) 
- 
-    def __next__(self): 
-        try: 
-            return self.seq[self.pos + 1] 
-        except IndexError: 
-            return None 
-    __next__ = property(__next__) 
- 
-    if sys.version < "3": 
-        next = __next__ 
- 
-    def previous(self): 
-        if self.pos == 0: 
-            return None 
-        return self.seq[self.pos - 1] 
-    previous = property(previous) 
- 
-    def odd(self): 
-        return not self.pos % 2 
-    odd = property(odd) 
- 
-    def even(self): 
-        return self.pos % 2 
-    even = property(even) 
- 
-    def first(self): 
-        return self.pos == 0 
-    first = property(first) 
- 
-    def last(self): 
-        return self.pos == len(self.seq) - 1 
-    last = property(last) 
- 
-    def length(self): 
-        return len(self.seq) 
-    length = property(length) 
- 
-    def first_group(self, getter=None): 
-        """ 
-        Returns true if this item is the start of a new group, 
-        where groups mean that some attribute has changed.  The getter 
-        can be None (the item itself changes), an attribute name like 
-        ``'.attr'``, a function, or a dict key or list index. 
-        """ 
-        if self.first: 
-            return True 
-        return self._compare_group(self.item, self.previous, getter) 
- 
-    def last_group(self, getter=None): 
-        """ 
-        Returns true if this item is the end of a new group, 
-        where groups mean that some attribute has changed.  The getter 
-        can be None (the item itself changes), an attribute name like 
-        ``'.attr'``, a function, or a dict key or list index. 
-        """ 
-        if self.last: 
-            return True 
-        return self._compare_group(self.item, self.__next__, getter) 
- 
-    def _compare_group(self, item, other, getter): 
-        if getter is None: 
-            return item != other 
-        elif (isinstance(getter, basestring_) 
-              and getter.startswith('.')): 
-            getter = getter[1:] 
-            if getter.endswith('()'): 
-                getter = getter[:-2] 
-                return getattr(item, getter)() != getattr(other, getter)() 
-            else: 
-                return getattr(item, getter) != getattr(other, getter) 
-        elif hasattr(getter, '__call__'): 
-            return getter(item) != getter(other) 
-        else: 
-            return item[getter] != other[getter] 
+"""
+Helper for looping over sequences, particular in templates.
+
+Often in a loop in a template it's handy to know what's next up,
+previously up, if this is the first or last item in the sequence, etc.
+These can be awkward to manage in a normal Python loop, but using the
+looper you can get a better sense of the context.  Use like::
+
+    >>> for loop, item in looper(['a', 'b', 'c']):
+    ...     print loop.number, item
+    ...     if not loop.last:
+    ...         print '---'
+    1 a
+    ---
+    2 b
+    ---
+    3 c
+
+"""
+
+import sys
+from Cython.Tempita.compat3 import basestring_
+
+__all__ = ['looper']
+
+
+class looper(object):
+    """
+    Helper for looping (particularly in templates)
+
+    Use this like::
+
+        for loop, item in looper(seq):
+            if loop.first:
+                ...
+    """
+
+    def __init__(self, seq):
+        self.seq = seq
+
+    def __iter__(self):
+        return looper_iter(self.seq)
+
+    def __repr__(self):
+        return '<%s for %r>' % (
+            self.__class__.__name__, self.seq)
+
+
+class looper_iter(object):
+
+    def __init__(self, seq):
+        self.seq = list(seq)
+        self.pos = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.pos >= len(self.seq):
+            raise StopIteration
+        result = loop_pos(self.seq, self.pos), self.seq[self.pos]
+        self.pos += 1
+        return result
+
+    if sys.version < "3":
+        next = __next__
+
+
+class loop_pos(object):
+
+    def __init__(self, seq, pos):
+        self.seq = seq
+        self.pos = pos
+
+    def __repr__(self):
+        return '<loop pos=%r at %r>' % (
+            self.seq[self.pos], self.pos)
+
+    def index(self):
+        return self.pos
+    index = property(index)
+
+    def number(self):
+        return self.pos + 1
+    number = property(number)
+
+    def item(self):
+        return self.seq[self.pos]
+    item = property(item)
+
+    def __next__(self):
+        try:
+            return self.seq[self.pos + 1]
+        except IndexError:
+            return None
+    __next__ = property(__next__)
+
+    if sys.version < "3":
+        next = __next__
+
+    def previous(self):
+        if self.pos == 0:
+            return None
+        return self.seq[self.pos - 1]
+    previous = property(previous)
+
+    def odd(self):
+        return not self.pos % 2
+    odd = property(odd)
+
+    def even(self):
+        return self.pos % 2
+    even = property(even)
+
+    def first(self):
+        return self.pos == 0
+    first = property(first)
+
+    def last(self):
+        return self.pos == len(self.seq) - 1
+    last = property(last)
+
+    def length(self):
+        return len(self.seq)
+    length = property(length)
+
+    def first_group(self, getter=None):
+        """
+        Returns true if this item is the start of a new group,
+        where groups mean that some attribute has changed.  The getter
+        can be None (the item itself changes), an attribute name like
+        ``'.attr'``, a function, or a dict key or list index.
+        """
+        if self.first:
+            return True
+        return self._compare_group(self.item, self.previous, getter)
+
+    def last_group(self, getter=None):
+        """
+        Returns true if this item is the end of a new group,
+        where groups mean that some attribute has changed.  The getter
+        can be None (the item itself changes), an attribute name like
+        ``'.attr'``, a function, or a dict key or list index.
+        """
+        if self.last:
+            return True
+        return self._compare_group(self.item, self.__next__, getter)
+
+    def _compare_group(self, item, other, getter):
+        if getter is None:
+            return item != other
+        elif (isinstance(getter, basestring_)
+              and getter.startswith('.')):
+            getter = getter[1:]
+            if getter.endswith('()'):
+                getter = getter[:-2]
+                return getattr(item, getter)() != getattr(other, getter)()
+            else:
+                return getattr(item, getter) != getattr(other, getter)
+        elif hasattr(getter, '__call__'):
+            return getter(item) != getter(other)
+        else:
+            return item[getter] != other[getter]
diff --git a/contrib/tools/cython/Cython/Tempita/_tempita.py b/contrib/tools/cython/Cython/Tempita/_tempita.py
index 16eb4c4eff..587f6e4841 100644
--- a/contrib/tools/cython/Cython/Tempita/_tempita.py
+++ b/contrib/tools/cython/Cython/Tempita/_tempita.py
@@ -1,1188 +1,1188 @@
-""" 
-A small templating language 
- 
-This implements a small templating language.  This language implements 
-if/elif/else, for/continue/break, expressions, and blocks of Python 
-code.  The syntax is:: 
- 
-  {{any expression (function calls etc)}} 
-  {{any expression | filter}} 
-  {{for x in y}}...{{endfor}} 
-  {{if x}}x{{elif y}}y{{else}}z{{endif}} 
-  {{py:x=1}} 
-  {{py: 
-  def foo(bar): 
-      return 'baz' 
-  }} 
-  {{default var = default_value}} 
-  {{# comment}} 
- 
-You use this with the ``Template`` class or the ``sub`` shortcut. 
-The ``Template`` class takes the template string and the name of 
-the template (for errors) and a default namespace.  Then (like 
-``string.Template``) you can call the ``tmpl.substitute(**kw)`` 
-method to make a substitution (or ``tmpl.substitute(a_dict)``). 
- 
-``sub(content, **kw)`` substitutes the template immediately.  You 
-can use ``__name='tmpl.html'`` to set the name of the template. 
- 
-If there are syntax errors ``TemplateError`` will be raised. 
-""" 
- 
+"""
+A small templating language
+
+This implements a small templating language.  This language implements
+if/elif/else, for/continue/break, expressions, and blocks of Python
+code.  The syntax is::
+
+  {{any expression (function calls etc)}}
+  {{any expression | filter}}
+  {{for x in y}}...{{endfor}}
+  {{if x}}x{{elif y}}y{{else}}z{{endif}}
+  {{py:x=1}}
+  {{py:
+  def foo(bar):
+      return 'baz'
+  }}
+  {{default var = default_value}}
+  {{# comment}}
+
+You use this with the ``Template`` class or the ``sub`` shortcut.
+The ``Template`` class takes the template string and the name of
+the template (for errors) and a default namespace.  Then (like
+``string.Template``) you can call the ``tmpl.substitute(**kw)``
+method to make a substitution (or ``tmpl.substitute(a_dict)``).
+
+``sub(content, **kw)`` substitutes the template immediately.  You
+can use ``__name='tmpl.html'`` to set the name of the template.
+
+If there are syntax errors ``TemplateError`` will be raised.
+"""
+
 from __future__ import absolute_import
 
-import re 
-import sys 
-try: 
-    import cgi 
-except ImportError: 
-    pass 
-try: 
-    from urllib import quote as url_quote 
-except ImportError:  # Py3 
-    try: 
-        from urllib.parse import quote as url_quote 
-    except ImportError: 
-        pass 
-import os 
-import tokenize 
+import re
+import sys
+try:
+    import cgi
+except ImportError:
+    pass
+try:
+    from urllib import quote as url_quote
+except ImportError:  # Py3
+    try:
+        from urllib.parse import quote as url_quote
+    except ImportError:
+        pass
+import os
+import tokenize
 from io import StringIO
- 
+
 from ._looper import looper
 from .compat3 import bytes, unicode_, basestring_, next, is_unicode, coerce_text
 
-__all__ = ['TemplateError', 'Template', 'sub', 'HTMLTemplate', 
-           'sub_html', 'html', 'bunch'] 
- 
-in_re = re.compile(r'\s+in\s+') 
-var_re = re.compile(r'^[a-z_][a-z0-9_]*$', re.I) 
- 
- 
-class TemplateError(Exception): 
-    """Exception raised while parsing a template 
-    """ 
- 
-    def __init__(self, message, position, name=None): 
-        Exception.__init__(self, message) 
-        self.position = position 
-        self.name = name 
- 
-    def __str__(self): 
-        msg = ' '.join(self.args) 
-        if self.position: 
-            msg = '%s at line %s column %s' % ( 
-                msg, self.position[0], self.position[1]) 
-        if self.name: 
-            msg += ' in %s' % self.name 
-        return msg 
- 
- 
-class _TemplateContinue(Exception): 
-    pass 
- 
- 
-class _TemplateBreak(Exception): 
-    pass 
- 
- 
-def get_file_template(name, from_template): 
-    path = os.path.join(os.path.dirname(from_template.name), name) 
-    return from_template.__class__.from_filename( 
-        path, namespace=from_template.namespace, 
-        get_template=from_template.get_template) 
- 
- 
-class Template(object): 
- 
-    default_namespace = { 
-        'start_braces': '{{', 
-        'end_braces': '}}', 
-        'looper': looper, 
-        } 
- 
-    default_encoding = 'utf8' 
-    default_inherit = None 
- 
-    def __init__(self, content, name=None, namespace=None, stacklevel=None, 
-                 get_template=None, default_inherit=None, line_offset=0, 
-                 delimeters=None): 
-        self.content = content 
- 
-        # set delimeters 
-        if delimeters is None: 
-            delimeters = (self.default_namespace['start_braces'], 
-                          self.default_namespace['end_braces']) 
-        else: 
-            #assert len(delimeters) == 2 and all([isinstance(delimeter, basestring) 
-            #                                     for delimeter in delimeters]) 
-            self.default_namespace = self.__class__.default_namespace.copy() 
-            self.default_namespace['start_braces'] = delimeters[0] 
-            self.default_namespace['end_braces'] = delimeters[1] 
-        self.delimeters = delimeters 
-
-        self._unicode = is_unicode(content) 
-        if name is None and stacklevel is not None: 
-            try: 
-                caller = sys._getframe(stacklevel) 
-            except ValueError: 
-                pass 
-            else: 
-                globals = caller.f_globals 
-                lineno = caller.f_lineno 
-                if '__file__' in globals: 
-                    name = globals['__file__'] 
-                    if name.endswith('.pyc') or name.endswith('.pyo'): 
-                        name = name[:-1] 
-                elif '__name__' in globals: 
-                    name = globals['__name__'] 
-                else: 
-                    name = '<string>' 
-                if lineno: 
-                    name += ':%s' % lineno 
-        self.name = name 
-        self._parsed = parse(content, name=name, line_offset=line_offset, delimeters=self.delimeters) 
-        if namespace is None: 
-            namespace = {} 
-        self.namespace = namespace 
-        self.get_template = get_template 
-        if default_inherit is not None: 
-            self.default_inherit = default_inherit 
- 
-    def from_filename(cls, filename, namespace=None, encoding=None, 
-                      default_inherit=None, get_template=get_file_template): 
-        f = open(filename, 'rb') 
-        c = f.read() 
-        f.close() 
-        if encoding: 
-            c = c.decode(encoding) 
-        return cls(content=c, name=filename, namespace=namespace, 
-                   default_inherit=default_inherit, get_template=get_template) 
- 
-    from_filename = classmethod(from_filename) 
- 
-    def __repr__(self): 
-        return '<%s %s name=%r>' % ( 
-            self.__class__.__name__, 
-            hex(id(self))[2:], self.name) 
- 
-    def substitute(self, *args, **kw): 
-        if args: 
-            if kw: 
-                raise TypeError( 
-                    "You can only give positional *or* keyword arguments") 
-            if len(args) > 1: 
-                raise TypeError( 
-                    "You can only give one positional argument") 
-            if not hasattr(args[0], 'items'): 
-                raise TypeError( 
-                    "If you pass in a single argument, you must pass in a dictionary-like object (with a .items() method); you gave %r" 
-                    % (args[0],)) 
-            kw = args[0] 
-        ns = kw 
-        ns['__template_name__'] = self.name 
-        if self.namespace: 
-            ns.update(self.namespace) 
-        result, defs, inherit = self._interpret(ns) 
-        if not inherit: 
-            inherit = self.default_inherit 
-        if inherit: 
-            result = self._interpret_inherit(result, defs, inherit, ns) 
-        return result 
- 
-    def _interpret(self, ns): 
-        __traceback_hide__ = True 
-        parts = [] 
-        defs = {} 
-        self._interpret_codes(self._parsed, ns, out=parts, defs=defs) 
-        if '__inherit__' in defs: 
-            inherit = defs.pop('__inherit__') 
-        else: 
-            inherit = None 
-        return ''.join(parts), defs, inherit 
- 
-    def _interpret_inherit(self, body, defs, inherit_template, ns): 
-        __traceback_hide__ = True 
-        if not self.get_template: 
-            raise TemplateError( 
-                'You cannot use inheritance without passing in get_template', 
-                position=None, name=self.name) 
-        templ = self.get_template(inherit_template, self) 
-        self_ = TemplateObject(self.name) 
+__all__ = ['TemplateError', 'Template', 'sub', 'HTMLTemplate',
+           'sub_html', 'html', 'bunch']
+
+in_re = re.compile(r'\s+in\s+')
+var_re = re.compile(r'^[a-z_][a-z0-9_]*$', re.I)
+
+
+class TemplateError(Exception):
+    """Exception raised while parsing a template
+    """
+
+    def __init__(self, message, position, name=None):
+        Exception.__init__(self, message)
+        self.position = position
+        self.name = name
+
+    def __str__(self):
+        msg = ' '.join(self.args)
+        if self.position:
+            msg = '%s at line %s column %s' % (
+                msg, self.position[0], self.position[1])
+        if self.name:
+            msg += ' in %s' % self.name
+        return msg
+
+
+class _TemplateContinue(Exception):
+    pass
+
+
+class _TemplateBreak(Exception):
+    pass
+
+
+def get_file_template(name, from_template):
+    path = os.path.join(os.path.dirname(from_template.name), name)
+    return from_template.__class__.from_filename(
+        path, namespace=from_template.namespace,
+        get_template=from_template.get_template)
+
+
+class Template(object):
+
+    default_namespace = {
+        'start_braces': '{{',
+        'end_braces': '}}',
+        'looper': looper,
+        }
+
+    default_encoding = 'utf8'
+    default_inherit = None
+
+    def __init__(self, content, name=None, namespace=None, stacklevel=None,
+                 get_template=None, default_inherit=None, line_offset=0,
+                 delimeters=None):
+        self.content = content
+
+        # set delimeters
+        if delimeters is None:
+            delimeters = (self.default_namespace['start_braces'],
+                          self.default_namespace['end_braces'])
+        else:
+            #assert len(delimeters) == 2 and all([isinstance(delimeter, basestring)
+            #                                     for delimeter in delimeters])
+            self.default_namespace = self.__class__.default_namespace.copy()
+            self.default_namespace['start_braces'] = delimeters[0]
+            self.default_namespace['end_braces'] = delimeters[1]
+        self.delimeters = delimeters
+
+        self._unicode = is_unicode(content)
+        if name is None and stacklevel is not None:
+            try:
+                caller = sys._getframe(stacklevel)
+            except ValueError:
+                pass
+            else:
+                globals = caller.f_globals
+                lineno = caller.f_lineno
+                if '__file__' in globals:
+                    name = globals['__file__']
+                    if name.endswith('.pyc') or name.endswith('.pyo'):
+                        name = name[:-1]
+                elif '__name__' in globals:
+                    name = globals['__name__']
+                else:
+                    name = '<string>'
+                if lineno:
+                    name += ':%s' % lineno
+        self.name = name
+        self._parsed = parse(content, name=name, line_offset=line_offset, delimeters=self.delimeters)
+        if namespace is None:
+            namespace = {}
+        self.namespace = namespace
+        self.get_template = get_template
+        if default_inherit is not None:
+            self.default_inherit = default_inherit
+
+    def from_filename(cls, filename, namespace=None, encoding=None,
+                      default_inherit=None, get_template=get_file_template):
+        f = open(filename, 'rb')
+        c = f.read()
+        f.close()
+        if encoding:
+            c = c.decode(encoding)
+        return cls(content=c, name=filename, namespace=namespace,
+                   default_inherit=default_inherit, get_template=get_template)
+
+    from_filename = classmethod(from_filename)
+
+    def __repr__(self):
+        return '<%s %s name=%r>' % (
+            self.__class__.__name__,
+            hex(id(self))[2:], self.name)
+
+    def substitute(self, *args, **kw):
+        if args:
+            if kw:
+                raise TypeError(
+                    "You can only give positional *or* keyword arguments")
+            if len(args) > 1:
+                raise TypeError(
+                    "You can only give one positional argument")
+            if not hasattr(args[0], 'items'):
+                raise TypeError(
+                    "If you pass in a single argument, you must pass in a dictionary-like object (with a .items() method); you gave %r"
+                    % (args[0],))
+            kw = args[0]
+        ns = kw
+        ns['__template_name__'] = self.name
+        if self.namespace:
+            ns.update(self.namespace)
+        result, defs, inherit = self._interpret(ns)
+        if not inherit:
+            inherit = self.default_inherit
+        if inherit:
+            result = self._interpret_inherit(result, defs, inherit, ns)
+        return result
+
+    def _interpret(self, ns):
+        __traceback_hide__ = True
+        parts = []
+        defs = {}
+        self._interpret_codes(self._parsed, ns, out=parts, defs=defs)
+        if '__inherit__' in defs:
+            inherit = defs.pop('__inherit__')
+        else:
+            inherit = None
+        return ''.join(parts), defs, inherit
+
+    def _interpret_inherit(self, body, defs, inherit_template, ns):
+        __traceback_hide__ = True
+        if not self.get_template:
+            raise TemplateError(
+                'You cannot use inheritance without passing in get_template',
+                position=None, name=self.name)
+        templ = self.get_template(inherit_template, self)
+        self_ = TemplateObject(self.name)
         for name, value in defs.items():
-            setattr(self_, name, value) 
-        self_.body = body 
-        ns = ns.copy() 
-        ns['self'] = self_ 
-        return templ.substitute(ns) 
- 
-    def _interpret_codes(self, codes, ns, out, defs): 
-        __traceback_hide__ = True 
-        for item in codes: 
-            if isinstance(item, basestring_): 
-                out.append(item) 
-            else: 
-                self._interpret_code(item, ns, out, defs) 
- 
-    def _interpret_code(self, code, ns, out, defs): 
-        __traceback_hide__ = True 
-        name, pos = code[0], code[1] 
-        if name == 'py': 
-            self._exec(code[2], ns, pos) 
-        elif name == 'continue': 
-            raise _TemplateContinue() 
-        elif name == 'break': 
-            raise _TemplateBreak() 
-        elif name == 'for': 
-            vars, expr, content = code[2], code[3], code[4] 
-            expr = self._eval(expr, ns, pos) 
-            self._interpret_for(vars, expr, content, ns, out, defs) 
-        elif name == 'cond': 
-            parts = code[2:] 
-            self._interpret_if(parts, ns, out, defs) 
-        elif name == 'expr': 
-            parts = code[2].split('|') 
-            base = self._eval(parts[0], ns, pos) 
-            for part in parts[1:]: 
-                func = self._eval(part, ns, pos) 
-                base = func(base) 
-            out.append(self._repr(base, pos)) 
-        elif name == 'default': 
-            var, expr = code[2], code[3] 
-            if var not in ns: 
-                result = self._eval(expr, ns, pos) 
-                ns[var] = result 
-        elif name == 'inherit': 
-            expr = code[2] 
-            value = self._eval(expr, ns, pos) 
-            defs['__inherit__'] = value 
-        elif name == 'def': 
-            name = code[2] 
-            signature = code[3] 
-            parts = code[4] 
-            ns[name] = defs[name] = TemplateDef(self, name, signature, body=parts, ns=ns, 
-                                                pos=pos) 
-        elif name == 'comment': 
-            return 
-        else: 
-            assert 0, "Unknown code: %r" % name 
- 
-    def _interpret_for(self, vars, expr, content, ns, out, defs): 
-        __traceback_hide__ = True 
-        for item in expr: 
-            if len(vars) == 1: 
-                ns[vars[0]] = item 
-            else: 
-                if len(vars) != len(item): 
-                    raise ValueError( 
-                        'Need %i items to unpack (got %i items)' 
-                        % (len(vars), len(item))) 
-                for name, value in zip(vars, item): 
-                    ns[name] = value 
-            try: 
-                self._interpret_codes(content, ns, out, defs) 
-            except _TemplateContinue: 
-                continue 
-            except _TemplateBreak: 
-                break 
- 
-    def _interpret_if(self, parts, ns, out, defs): 
-        __traceback_hide__ = True 
-        # @@: if/else/else gets through 
-        for part in parts: 
-            assert not isinstance(part, basestring_) 
-            name, pos = part[0], part[1] 
-            if name == 'else': 
-                result = True 
-            else: 
-                result = self._eval(part[2], ns, pos) 
-            if result: 
-                self._interpret_codes(part[3], ns, out, defs) 
-                break 
- 
-    def _eval(self, code, ns, pos): 
-        __traceback_hide__ = True 
-        try: 
-            try: 
-                value = eval(code, self.default_namespace, ns) 
+            setattr(self_, name, value)
+        self_.body = body
+        ns = ns.copy()
+        ns['self'] = self_
+        return templ.substitute(ns)
+
+    def _interpret_codes(self, codes, ns, out, defs):
+        __traceback_hide__ = True
+        for item in codes:
+            if isinstance(item, basestring_):
+                out.append(item)
+            else:
+                self._interpret_code(item, ns, out, defs)
+
+    def _interpret_code(self, code, ns, out, defs):
+        __traceback_hide__ = True
+        name, pos = code[0], code[1]
+        if name == 'py':
+            self._exec(code[2], ns, pos)
+        elif name == 'continue':
+            raise _TemplateContinue()
+        elif name == 'break':
+            raise _TemplateBreak()
+        elif name == 'for':
+            vars, expr, content = code[2], code[3], code[4]
+            expr = self._eval(expr, ns, pos)
+            self._interpret_for(vars, expr, content, ns, out, defs)
+        elif name == 'cond':
+            parts = code[2:]
+            self._interpret_if(parts, ns, out, defs)
+        elif name == 'expr':
+            parts = code[2].split('|')
+            base = self._eval(parts[0], ns, pos)
+            for part in parts[1:]:
+                func = self._eval(part, ns, pos)
+                base = func(base)
+            out.append(self._repr(base, pos))
+        elif name == 'default':
+            var, expr = code[2], code[3]
+            if var not in ns:
+                result = self._eval(expr, ns, pos)
+                ns[var] = result
+        elif name == 'inherit':
+            expr = code[2]
+            value = self._eval(expr, ns, pos)
+            defs['__inherit__'] = value
+        elif name == 'def':
+            name = code[2]
+            signature = code[3]
+            parts = code[4]
+            ns[name] = defs[name] = TemplateDef(self, name, signature, body=parts, ns=ns,
+                                                pos=pos)
+        elif name == 'comment':
+            return
+        else:
+            assert 0, "Unknown code: %r" % name
+
+    def _interpret_for(self, vars, expr, content, ns, out, defs):
+        __traceback_hide__ = True
+        for item in expr:
+            if len(vars) == 1:
+                ns[vars[0]] = item
+            else:
+                if len(vars) != len(item):
+                    raise ValueError(
+                        'Need %i items to unpack (got %i items)'
+                        % (len(vars), len(item)))
+                for name, value in zip(vars, item):
+                    ns[name] = value
+            try:
+                self._interpret_codes(content, ns, out, defs)
+            except _TemplateContinue:
+                continue
+            except _TemplateBreak:
+                break
+
+    def _interpret_if(self, parts, ns, out, defs):
+        __traceback_hide__ = True
+        # @@: if/else/else gets through
+        for part in parts:
+            assert not isinstance(part, basestring_)
+            name, pos = part[0], part[1]
+            if name == 'else':
+                result = True
+            else:
+                result = self._eval(part[2], ns, pos)
+            if result:
+                self._interpret_codes(part[3], ns, out, defs)
+                break
+
+    def _eval(self, code, ns, pos):
+        __traceback_hide__ = True
+        try:
+            try:
+                value = eval(code, self.default_namespace, ns)
             except SyntaxError as e:
-                raise SyntaxError( 
-                    'invalid syntax in expression: %s' % code) 
-            return value 
+                raise SyntaxError(
+                    'invalid syntax in expression: %s' % code)
+            return value
         except Exception as e:
-            if getattr(e, 'args', None): 
-                arg0 = e.args[0] 
-            else: 
-                arg0 = coerce_text(e) 
-            e.args = (self._add_line_info(arg0, pos),) 
+            if getattr(e, 'args', None):
+                arg0 = e.args[0]
+            else:
+                arg0 = coerce_text(e)
+            e.args = (self._add_line_info(arg0, pos),)
             raise
- 
-    def _exec(self, code, ns, pos): 
-        __traceback_hide__ = True 
-        try: 
+
+    def _exec(self, code, ns, pos):
+        __traceback_hide__ = True
+        try:
             exec(code, self.default_namespace, ns)
         except Exception as e:
-            if e.args: 
-                e.args = (self._add_line_info(e.args[0], pos),) 
-            else: 
-                e.args = (self._add_line_info(None, pos),) 
+            if e.args:
+                e.args = (self._add_line_info(e.args[0], pos),)
+            else:
+                e.args = (self._add_line_info(None, pos),)
             raise
- 
-    def _repr(self, value, pos): 
-        __traceback_hide__ = True 
-        try: 
-            if value is None: 
-                return '' 
-            if self._unicode: 
-                try: 
+
+    def _repr(self, value, pos):
+        __traceback_hide__ = True
+        try:
+            if value is None:
+                return ''
+            if self._unicode:
+                try:
                     value = unicode_(value)
-                except UnicodeDecodeError: 
-                    value = bytes(value) 
-            else: 
-                if not isinstance(value, basestring_): 
-                    value = coerce_text(value) 
-                if (is_unicode(value) 
-                    and self.default_encoding): 
-                    value = value.encode(self.default_encoding) 
+                except UnicodeDecodeError:
+                    value = bytes(value)
+            else:
+                if not isinstance(value, basestring_):
+                    value = coerce_text(value)
+                if (is_unicode(value)
+                    and self.default_encoding):
+                    value = value.encode(self.default_encoding)
         except Exception as e:
-            e.args = (self._add_line_info(e.args[0], pos),) 
+            e.args = (self._add_line_info(e.args[0], pos),)
             raise
-        else: 
-            if self._unicode and isinstance(value, bytes): 
-                if not self.default_encoding: 
-                    raise UnicodeDecodeError( 
-                        'Cannot decode bytes value %r into unicode ' 
-                        '(no default_encoding provided)' % value) 
-                try: 
-                    value = value.decode(self.default_encoding) 
+        else:
+            if self._unicode and isinstance(value, bytes):
+                if not self.default_encoding:
+                    raise UnicodeDecodeError(
+                        'Cannot decode bytes value %r into unicode '
+                        '(no default_encoding provided)' % value)
+                try:
+                    value = value.decode(self.default_encoding)
                 except UnicodeDecodeError as e:
-                    raise UnicodeDecodeError( 
-                        e.encoding, 
-                        e.object, 
-                        e.start, 
-                        e.end, 
-                        e.reason + ' in string %r' % value) 
-            elif not self._unicode and is_unicode(value): 
-                if not self.default_encoding: 
-                    raise UnicodeEncodeError( 
-                        'Cannot encode unicode value %r into bytes ' 
-                        '(no default_encoding provided)' % value) 
-                value = value.encode(self.default_encoding) 
-            return value 
- 
-    def _add_line_info(self, msg, pos): 
-        msg = "%s at line %s column %s" % ( 
-            msg, pos[0], pos[1]) 
-        if self.name: 
-            msg += " in file %s" % self.name 
-        return msg 
- 
- 
-def sub(content, delimeters=None, **kw): 
-    name = kw.get('__name') 
-    tmpl = Template(content, name=name, delimeters=delimeters) 
-    return tmpl.substitute(kw) 
- 
- 
-def paste_script_template_renderer(content, vars, filename=None): 
-    tmpl = Template(content, name=filename) 
-    return tmpl.substitute(vars) 
- 
- 
-class bunch(dict): 
- 
-    def __init__(self, **kw): 
+                    raise UnicodeDecodeError(
+                        e.encoding,
+                        e.object,
+                        e.start,
+                        e.end,
+                        e.reason + ' in string %r' % value)
+            elif not self._unicode and is_unicode(value):
+                if not self.default_encoding:
+                    raise UnicodeEncodeError(
+                        'Cannot encode unicode value %r into bytes '
+                        '(no default_encoding provided)' % value)
+                value = value.encode(self.default_encoding)
+            return value
+
+    def _add_line_info(self, msg, pos):
+        msg = "%s at line %s column %s" % (
+            msg, pos[0], pos[1])
+        if self.name:
+            msg += " in file %s" % self.name
+        return msg
+
+
+def sub(content, delimeters=None, **kw):
+    name = kw.get('__name')
+    tmpl = Template(content, name=name, delimeters=delimeters)
+    return tmpl.substitute(kw)
+
+
+def paste_script_template_renderer(content, vars, filename=None):
+    tmpl = Template(content, name=filename)
+    return tmpl.substitute(vars)
+
+
+class bunch(dict):
+
+    def __init__(self, **kw):
         for name, value in kw.items():
-            setattr(self, name, value) 
- 
-    def __setattr__(self, name, value): 
-        self[name] = value 
- 
-    def __getattr__(self, name): 
-        try: 
-            return self[name] 
-        except KeyError: 
-            raise AttributeError(name) 
- 
-    def __getitem__(self, key): 
-        if 'default' in self: 
-            try: 
-                return dict.__getitem__(self, key) 
-            except KeyError: 
-                return dict.__getitem__(self, 'default') 
-        else: 
-            return dict.__getitem__(self, key) 
- 
-    def __repr__(self): 
-        return '<%s %s>' % ( 
-            self.__class__.__name__, 
+            setattr(self, name, value)
+
+    def __setattr__(self, name, value):
+        self[name] = value
+
+    def __getattr__(self, name):
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(name)
+
+    def __getitem__(self, key):
+        if 'default' in self:
+            try:
+                return dict.__getitem__(self, key)
+            except KeyError:
+                return dict.__getitem__(self, 'default')
+        else:
+            return dict.__getitem__(self, key)
+
+    def __repr__(self):
+        return '<%s %s>' % (
+            self.__class__.__name__,
             ' '.join(['%s=%r' % (k, v) for k, v in sorted(self.items())]))
- 
-############################################################ 
-## HTML Templating 
-############################################################ 
- 
- 
-class html(object): 
- 
-    def __init__(self, value): 
-        self.value = value 
- 
-    def __str__(self): 
-        return self.value 
- 
-    def __html__(self): 
-        return self.value 
- 
-    def __repr__(self): 
-        return '<%s %r>' % ( 
-            self.__class__.__name__, self.value) 
- 
- 
-def html_quote(value, force=True): 
-    if not force and hasattr(value, '__html__'): 
-        return value.__html__() 
-    if value is None: 
-        return '' 
-    if not isinstance(value, basestring_): 
-        value = coerce_text(value) 
-    if sys.version >= "3" and isinstance(value, bytes): 
-        value = cgi.escape(value.decode('latin1'), 1) 
-        value = value.encode('latin1') 
-    else: 
-        value = cgi.escape(value, 1) 
-    if sys.version < "3": 
-        if is_unicode(value): 
-            value = value.encode('ascii', 'xmlcharrefreplace') 
-    return value 
- 
- 
-def url(v): 
-    v = coerce_text(v) 
-    if is_unicode(v): 
-        v = v.encode('utf8') 
-    return url_quote(v) 
- 
- 
-def attr(**kw): 
-    parts = [] 
+
+############################################################
+## HTML Templating
+############################################################
+
+
+class html(object):
+
+    def __init__(self, value):
+        self.value = value
+
+    def __str__(self):
+        return self.value
+
+    def __html__(self):
+        return self.value
+
+    def __repr__(self):
+        return '<%s %r>' % (
+            self.__class__.__name__, self.value)
+
+
+def html_quote(value, force=True):
+    if not force and hasattr(value, '__html__'):
+        return value.__html__()
+    if value is None:
+        return ''
+    if not isinstance(value, basestring_):
+        value = coerce_text(value)
+    if sys.version >= "3" and isinstance(value, bytes):
+        value = cgi.escape(value.decode('latin1'), 1)
+        value = value.encode('latin1')
+    else:
+        value = cgi.escape(value, 1)
+    if sys.version < "3":
+        if is_unicode(value):
+            value = value.encode('ascii', 'xmlcharrefreplace')
+    return value
+
+
+def url(v):
+    v = coerce_text(v)
+    if is_unicode(v):
+        v = v.encode('utf8')
+    return url_quote(v)
+
+
+def attr(**kw):
+    parts = []
     for name, value in sorted(kw.items()):
-        if value is None: 
-            continue 
-        if name.endswith('_'): 
-            name = name[:-1] 
-        parts.append('%s="%s"' % (html_quote(name), html_quote(value))) 
-    return html(' '.join(parts)) 
- 
- 
-class HTMLTemplate(Template): 
- 
-    default_namespace = Template.default_namespace.copy() 
-    default_namespace.update(dict( 
-        html=html, 
-        attr=attr, 
-        url=url, 
-        html_quote=html_quote, 
-        )) 
- 
-    def _repr(self, value, pos): 
-        if hasattr(value, '__html__'): 
-            value = value.__html__() 
-            quote = False 
-        else: 
-            quote = True 
-        plain = Template._repr(self, value, pos) 
-        if quote: 
-            return html_quote(plain) 
-        else: 
-            return plain 
- 
- 
-def sub_html(content, **kw): 
-    name = kw.get('__name') 
-    tmpl = HTMLTemplate(content, name=name) 
-    return tmpl.substitute(kw) 
- 
- 
-class TemplateDef(object): 
-    def __init__(self, template, func_name, func_signature, 
-                 body, ns, pos, bound_self=None): 
-        self._template = template 
-        self._func_name = func_name 
-        self._func_signature = func_signature 
-        self._body = body 
-        self._ns = ns 
-        self._pos = pos 
-        self._bound_self = bound_self 
- 
-    def __repr__(self): 
-        return '<tempita function %s(%s) at %s:%s>' % ( 
-            self._func_name, self._func_signature, 
-            self._template.name, self._pos) 
- 
-    def __str__(self): 
-        return self() 
- 
-    def __call__(self, *args, **kw): 
-        values = self._parse_signature(args, kw) 
-        ns = self._ns.copy() 
-        ns.update(values) 
-        if self._bound_self is not None: 
-            ns['self'] = self._bound_self 
-        out = [] 
-        subdefs = {} 
-        self._template._interpret_codes(self._body, ns, out, subdefs) 
-        return ''.join(out) 
- 
-    def __get__(self, obj, type=None): 
-        if obj is None: 
-            return self 
-        return self.__class__( 
-            self._template, self._func_name, self._func_signature, 
-            self._body, self._ns, self._pos, bound_self=obj) 
- 
-    def _parse_signature(self, args, kw): 
-        values = {} 
-        sig_args, var_args, var_kw, defaults = self._func_signature 
-        extra_kw = {} 
+        if value is None:
+            continue
+        if name.endswith('_'):
+            name = name[:-1]
+        parts.append('%s="%s"' % (html_quote(name), html_quote(value)))
+    return html(' '.join(parts))
+
+
+class HTMLTemplate(Template):
+
+    default_namespace = Template.default_namespace.copy()
+    default_namespace.update(dict(
+        html=html,
+        attr=attr,
+        url=url,
+        html_quote=html_quote,
+        ))
+
+    def _repr(self, value, pos):
+        if hasattr(value, '__html__'):
+            value = value.__html__()
+            quote = False
+        else:
+            quote = True
+        plain = Template._repr(self, value, pos)
+        if quote:
+            return html_quote(plain)
+        else:
+            return plain
+
+
+def sub_html(content, **kw):
+    name = kw.get('__name')
+    tmpl = HTMLTemplate(content, name=name)
+    return tmpl.substitute(kw)
+
+
+class TemplateDef(object):
+    def __init__(self, template, func_name, func_signature,
+                 body, ns, pos, bound_self=None):
+        self._template = template
+        self._func_name = func_name
+        self._func_signature = func_signature
+        self._body = body
+        self._ns = ns
+        self._pos = pos
+        self._bound_self = bound_self
+
+    def __repr__(self):
+        return '<tempita function %s(%s) at %s:%s>' % (
+            self._func_name, self._func_signature,
+            self._template.name, self._pos)
+
+    def __str__(self):
+        return self()
+
+    def __call__(self, *args, **kw):
+        values = self._parse_signature(args, kw)
+        ns = self._ns.copy()
+        ns.update(values)
+        if self._bound_self is not None:
+            ns['self'] = self._bound_self
+        out = []
+        subdefs = {}
+        self._template._interpret_codes(self._body, ns, out, subdefs)
+        return ''.join(out)
+
+    def __get__(self, obj, type=None):
+        if obj is None:
+            return self
+        return self.__class__(
+            self._template, self._func_name, self._func_signature,
+            self._body, self._ns, self._pos, bound_self=obj)
+
+    def _parse_signature(self, args, kw):
+        values = {}
+        sig_args, var_args, var_kw, defaults = self._func_signature
+        extra_kw = {}
         for name, value in kw.items():
-            if not var_kw and name not in sig_args: 
-                raise TypeError( 
-                    'Unexpected argument %s' % name) 
-            if name in sig_args: 
-                values[sig_args] = value 
-            else: 
-                extra_kw[name] = value 
-        args = list(args) 
-        sig_args = list(sig_args) 
-        while args: 
-            while sig_args and sig_args[0] in values: 
-                sig_args.pop(0) 
-            if sig_args: 
-                name = sig_args.pop(0) 
-                values[name] = args.pop(0) 
-            elif var_args: 
-                values[var_args] = tuple(args) 
-                break 
-            else: 
-                raise TypeError( 
-                    'Extra position arguments: %s' 
-                    % ', '.join([repr(v) for v in args])) 
+            if not var_kw and name not in sig_args:
+                raise TypeError(
+                    'Unexpected argument %s' % name)
+            if name in sig_args:
+                values[sig_args] = value
+            else:
+                extra_kw[name] = value
+        args = list(args)
+        sig_args = list(sig_args)
+        while args:
+            while sig_args and sig_args[0] in values:
+                sig_args.pop(0)
+            if sig_args:
+                name = sig_args.pop(0)
+                values[name] = args.pop(0)
+            elif var_args:
+                values[var_args] = tuple(args)
+                break
+            else:
+                raise TypeError(
+                    'Extra position arguments: %s'
+                    % ', '.join([repr(v) for v in args]))
         for name, value_expr in defaults.items():
-            if name not in values: 
-                values[name] = self._template._eval( 
-                    value_expr, self._ns, self._pos) 
-        for name in sig_args: 
-            if name not in values: 
-                raise TypeError( 
-                    'Missing argument: %s' % name) 
-        if var_kw: 
-            values[var_kw] = extra_kw 
-        return values 
- 
- 
-class TemplateObject(object): 
- 
-    def __init__(self, name): 
-        self.__name = name 
-        self.get = TemplateObjectGetter(self) 
- 
-    def __repr__(self): 
-        return '<%s %s>' % (self.__class__.__name__, self.__name) 
- 
- 
-class TemplateObjectGetter(object): 
- 
-    def __init__(self, template_obj): 
-        self.__template_obj = template_obj 
- 
-    def __getattr__(self, attr): 
-        return getattr(self.__template_obj, attr, Empty) 
- 
-    def __repr__(self): 
-        return '<%s around %r>' % (self.__class__.__name__, self.__template_obj) 
- 
- 
-class _Empty(object): 
-    def __call__(self, *args, **kw): 
-        return self 
- 
-    def __str__(self): 
-        return '' 
- 
-    def __repr__(self): 
-        return 'Empty' 
- 
-    def __unicode__(self): 
-        return u'' 
- 
-    def __iter__(self): 
-        return iter(()) 
- 
-    def __bool__(self): 
-        return False 
- 
-    if sys.version < "3": 
-        __nonzero__ = __bool__ 
- 
-Empty = _Empty() 
-del _Empty 
- 
-############################################################ 
-## Lexing and Parsing 
-############################################################ 
- 
- 
-def lex(s, name=None, trim_whitespace=True, line_offset=0, delimeters=None): 
-    """ 
-    Lex a string into chunks: 
- 
-        >>> lex('hey') 
-        ['hey'] 
-        >>> lex('hey {{you}}') 
-        ['hey ', ('you', (1, 7))] 
-        >>> lex('hey {{') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: No }} to finish last expression at line 1 column 7 
-        >>> lex('hey }}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: }} outside expression at line 1 column 7 
-        >>> lex('hey {{ {{') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: {{ inside expression at line 1 column 10 
- 
-    """ 
-    if delimeters is None: 
-        delimeters = ( Template.default_namespace['start_braces'], 
-                       Template.default_namespace['end_braces'] ) 
-    in_expr = False 
-    chunks = [] 
-    last = 0 
-    last_pos = (line_offset + 1, 1) 
- 
-    token_re = re.compile(r'%s|%s' % (re.escape(delimeters[0]), 
-                                      re.escape(delimeters[1]))) 
-    for match in token_re.finditer(s): 
-        expr = match.group(0) 
-        pos = find_position(s, match.end(), last, last_pos) 
-        if expr == delimeters[0] and in_expr: 
-            raise TemplateError('%s inside expression' % delimeters[0], 
-                                position=pos, 
-                                name=name) 
-        elif expr == delimeters[1] and not in_expr: 
-            raise TemplateError('%s outside expression' % delimeters[1], 
-                                position=pos, 
-                                name=name) 
-        if expr == delimeters[0]: 
-            part = s[last:match.start()] 
-            if part: 
-                chunks.append(part) 
-            in_expr = True 
-        else: 
-            chunks.append((s[last:match.start()], last_pos)) 
-            in_expr = False 
-        last = match.end() 
-        last_pos = pos 
-    if in_expr: 
-        raise TemplateError('No %s to finish last expression' % delimeters[1], 
-                            name=name, position=last_pos) 
-    part = s[last:] 
-    if part: 
-        chunks.append(part) 
-    if trim_whitespace: 
-        chunks = trim_lex(chunks) 
-    return chunks 
- 
-statement_re = re.compile(r'^(?:if |elif |for |def |inherit |default |py:)') 
-single_statements = ['else', 'endif', 'endfor', 'enddef', 'continue', 'break'] 
-trail_whitespace_re = re.compile(r'\n\r?[\t ]*$') 
-lead_whitespace_re = re.compile(r'^[\t ]*\n') 
- 
- 
-def trim_lex(tokens): 
-    r""" 
-    Takes a lexed set of tokens, and removes whitespace when there is 
-    a directive on a line by itself: 
- 
-       >>> tokens = lex('{{if x}}\nx\n{{endif}}\ny', trim_whitespace=False) 
-       >>> tokens 
-       [('if x', (1, 3)), '\nx\n', ('endif', (3, 3)), '\ny'] 
-       >>> trim_lex(tokens) 
-       [('if x', (1, 3)), 'x\n', ('endif', (3, 3)), 'y'] 
-    """ 
-    last_trim = None 
-    for i, current in enumerate(tokens): 
-        if isinstance(current, basestring_): 
-            # we don't trim this 
-            continue 
-        item = current[0] 
-        if not statement_re.search(item) and item not in single_statements: 
-            continue 
-        if not i: 
-            prev = '' 
-        else: 
-            prev = tokens[i - 1] 
-        if i + 1 >= len(tokens): 
-            next_chunk = '' 
-        else: 
-            next_chunk = tokens[i + 1] 
-        if (not isinstance(next_chunk, basestring_) 
-            or not isinstance(prev, basestring_)): 
-            continue 
-        prev_ok = not prev or trail_whitespace_re.search(prev) 
-        if i == 1 and not prev.strip(): 
-            prev_ok = True 
-        if last_trim is not None and last_trim + 2 == i and not prev.strip(): 
-            prev_ok = 'last' 
-        if (prev_ok 
-            and (not next_chunk or lead_whitespace_re.search(next_chunk) 
-                 or (i == len(tokens) - 2 and not next_chunk.strip()))): 
-            if prev: 
-                if ((i == 1 and not prev.strip()) 
-                    or prev_ok == 'last'): 
-                    tokens[i - 1] = '' 
-                else: 
-                    m = trail_whitespace_re.search(prev) 
-                    # +1 to leave the leading \n on: 
-                    prev = prev[:m.start() + 1] 
-                    tokens[i - 1] = prev 
-            if next_chunk: 
-                last_trim = i 
-                if i == len(tokens) - 2 and not next_chunk.strip(): 
-                    tokens[i + 1] = '' 
-                else: 
-                    m = lead_whitespace_re.search(next_chunk) 
-                    next_chunk = next_chunk[m.end():] 
-                    tokens[i + 1] = next_chunk 
-    return tokens 
- 
- 
-def find_position(string, index, last_index, last_pos): 
-    """Given a string and index, return (line, column)""" 
-    lines = string.count('\n', last_index, index) 
-    if lines > 0: 
-        column = index - string.rfind('\n', last_index, index) 
-    else: 
-        column = last_pos[1] + (index - last_index) 
-    return (last_pos[0] + lines, column) 
- 
- 
-def parse(s, name=None, line_offset=0, delimeters=None): 
-    r""" 
-    Parses a string into a kind of AST 
- 
-        >>> parse('{{x}}') 
-        [('expr', (1, 3), 'x')] 
-        >>> parse('foo') 
-        ['foo'] 
-        >>> parse('{{if x}}test{{endif}}') 
-        [('cond', (1, 3), ('if', (1, 3), 'x', ['test']))] 
-        >>> parse('series->{{for x in y}}x={{x}}{{endfor}}') 
-        ['series->', ('for', (1, 11), ('x',), 'y', ['x=', ('expr', (1, 27), 'x')])] 
-        >>> parse('{{for x, y in z:}}{{continue}}{{endfor}}') 
-        [('for', (1, 3), ('x', 'y'), 'z', [('continue', (1, 21))])] 
-        >>> parse('{{py:x=1}}') 
-        [('py', (1, 3), 'x=1')] 
-        >>> parse('{{if x}}a{{elif y}}b{{else}}c{{endif}}') 
-        [('cond', (1, 3), ('if', (1, 3), 'x', ['a']), ('elif', (1, 12), 'y', ['b']), ('else', (1, 23), None, ['c']))] 
- 
-    Some exceptions:: 
- 
-        >>> parse('{{continue}}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: continue outside of for loop at line 1 column 3 
-        >>> parse('{{if x}}foo') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: No {{endif}} at line 1 column 3 
-        >>> parse('{{else}}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: else outside of an if block at line 1 column 3 
-        >>> parse('{{if x}}{{for x in y}}{{endif}}{{endfor}}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: Unexpected endif at line 1 column 25 
-        >>> parse('{{if}}{{endif}}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: if with no expression at line 1 column 3 
-        >>> parse('{{for x y}}{{endfor}}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: Bad for (no "in") in 'x y' at line 1 column 3 
-        >>> parse('{{py:x=1\ny=2}}') 
-        Traceback (most recent call last): 
-            ... 
-        TemplateError: Multi-line py blocks must start with a newline at line 1 column 3 
-    """ 
-    if delimeters is None: 
-        delimeters = ( Template.default_namespace['start_braces'], 
-                       Template.default_namespace['end_braces'] ) 
-    tokens = lex(s, name=name, line_offset=line_offset, delimeters=delimeters) 
-    result = [] 
-    while tokens: 
-        next_chunk, tokens = parse_expr(tokens, name) 
-        result.append(next_chunk) 
-    return result 
- 
- 
-def parse_expr(tokens, name, context=()): 
-    if isinstance(tokens[0], basestring_): 
-        return tokens[0], tokens[1:] 
-    expr, pos = tokens[0] 
-    expr = expr.strip() 
-    if expr.startswith('py:'): 
-        expr = expr[3:].lstrip(' \t') 
-        if expr.startswith('\n') or expr.startswith('\r'): 
-            expr = expr.lstrip('\r\n') 
-            if '\r' in expr: 
-                expr = expr.replace('\r\n', '\n') 
-                expr = expr.replace('\r', '') 
-            expr += '\n' 
-        else: 
-            if '\n' in expr: 
-                raise TemplateError( 
-                    'Multi-line py blocks must start with a newline', 
-                    position=pos, name=name) 
-        return ('py', pos, expr), tokens[1:] 
-    elif expr in ('continue', 'break'): 
-        if 'for' not in context: 
-            raise TemplateError( 
-                'continue outside of for loop', 
-                position=pos, name=name) 
-        return (expr, pos), tokens[1:] 
-    elif expr.startswith('if '): 
-        return parse_cond(tokens, name, context) 
-    elif (expr.startswith('elif ') 
-          or expr == 'else'): 
-        raise TemplateError( 
-            '%s outside of an if block' % expr.split()[0], 
-            position=pos, name=name) 
-    elif expr in ('if', 'elif', 'for'): 
-        raise TemplateError( 
-            '%s with no expression' % expr, 
-            position=pos, name=name) 
-    elif expr in ('endif', 'endfor', 'enddef'): 
-        raise TemplateError( 
-            'Unexpected %s' % expr, 
-            position=pos, name=name) 
-    elif expr.startswith('for '): 
-        return parse_for(tokens, name, context) 
-    elif expr.startswith('default '): 
-        return parse_default(tokens, name, context) 
-    elif expr.startswith('inherit '): 
-        return parse_inherit(tokens, name, context) 
-    elif expr.startswith('def '): 
-        return parse_def(tokens, name, context) 
-    elif expr.startswith('#'): 
-        return ('comment', pos, tokens[0][0]), tokens[1:] 
-    return ('expr', pos, tokens[0][0]), tokens[1:] 
- 
- 
-def parse_cond(tokens, name, context): 
-    start = tokens[0][1] 
-    pieces = [] 
-    context = context + ('if',) 
-    while 1: 
-        if not tokens: 
-            raise TemplateError( 
-                'Missing {{endif}}', 
-                position=start, name=name) 
-        if (isinstance(tokens[0], tuple) 
-            and tokens[0][0] == 'endif'): 
-            return ('cond', start) + tuple(pieces), tokens[1:] 
-        next_chunk, tokens = parse_one_cond(tokens, name, context) 
-        pieces.append(next_chunk) 
- 
- 
-def parse_one_cond(tokens, name, context): 
-    (first, pos), tokens = tokens[0], tokens[1:] 
-    content = [] 
-    if first.endswith(':'): 
-        first = first[:-1] 
-    if first.startswith('if '): 
-        part = ('if', pos, first[3:].lstrip(), content) 
-    elif first.startswith('elif '): 
-        part = ('elif', pos, first[5:].lstrip(), content) 
-    elif first == 'else': 
-        part = ('else', pos, None, content) 
-    else: 
-        assert 0, "Unexpected token %r at %s" % (first, pos) 
-    while 1: 
-        if not tokens: 
-            raise TemplateError( 
-                'No {{endif}}', 
-                position=pos, name=name) 
-        if (isinstance(tokens[0], tuple) 
-            and (tokens[0][0] == 'endif' 
-                 or tokens[0][0].startswith('elif ') 
-                 or tokens[0][0] == 'else')): 
-            return part, tokens 
-        next_chunk, tokens = parse_expr(tokens, name, context) 
-        content.append(next_chunk) 
- 
- 
-def parse_for(tokens, name, context): 
-    first, pos = tokens[0] 
-    tokens = tokens[1:] 
-    context = ('for',) + context 
-    content = [] 
-    assert first.startswith('for ') 
-    if first.endswith(':'): 
-        first = first[:-1] 
-    first = first[3:].strip() 
-    match = in_re.search(first) 
-    if not match: 
-        raise TemplateError( 
-            'Bad for (no "in") in %r' % first, 
-            position=pos, name=name) 
-    vars = first[:match.start()] 
-    if '(' in vars: 
-        raise TemplateError( 
-            'You cannot have () in the variable section of a for loop (%r)' 
-            % vars, position=pos, name=name) 
-    vars = tuple([ 
-        v.strip() for v in first[:match.start()].split(',') 
-        if v.strip()]) 
-    expr = first[match.end():] 
-    while 1: 
-        if not tokens: 
-            raise TemplateError( 
-                'No {{endfor}}', 
-                position=pos, name=name) 
-        if (isinstance(tokens[0], tuple) 
-            and tokens[0][0] == 'endfor'): 
-            return ('for', pos, vars, expr, content), tokens[1:] 
-        next_chunk, tokens = parse_expr(tokens, name, context) 
-        content.append(next_chunk) 
- 
- 
-def parse_default(tokens, name, context): 
-    first, pos = tokens[0] 
-    assert first.startswith('default ') 
-    first = first.split(None, 1)[1] 
-    parts = first.split('=', 1) 
-    if len(parts) == 1: 
-        raise TemplateError( 
-            "Expression must be {{default var=value}}; no = found in %r" % first, 
-            position=pos, name=name) 
-    var = parts[0].strip() 
-    if ',' in var: 
-        raise TemplateError( 
-            "{{default x, y = ...}} is not supported", 
-            position=pos, name=name) 
-    if not var_re.search(var): 
-        raise TemplateError( 
-            "Not a valid variable name for {{default}}: %r" 
-            % var, position=pos, name=name) 
-    expr = parts[1].strip() 
-    return ('default', pos, var, expr), tokens[1:] 
- 
- 
-def parse_inherit(tokens, name, context): 
-    first, pos = tokens[0] 
-    assert first.startswith('inherit ') 
-    expr = first.split(None, 1)[1] 
-    return ('inherit', pos, expr), tokens[1:] 
- 
- 
-def parse_def(tokens, name, context): 
-    first, start = tokens[0] 
-    tokens = tokens[1:] 
-    assert first.startswith('def ') 
-    first = first.split(None, 1)[1] 
-    if first.endswith(':'): 
-        first = first[:-1] 
-    if '(' not in first: 
-        func_name = first 
-        sig = ((), None, None, {}) 
-    elif not first.endswith(')'): 
-        raise TemplateError("Function definition doesn't end with ): %s" % first, 
-                            position=start, name=name) 
-    else: 
-        first = first[:-1] 
-        func_name, sig_text = first.split('(', 1) 
-        sig = parse_signature(sig_text, name, start) 
-    context = context + ('def',) 
-    content = [] 
-    while 1: 
-        if not tokens: 
-            raise TemplateError( 
-                'Missing {{enddef}}', 
-                position=start, name=name) 
-        if (isinstance(tokens[0], tuple) 
-            and tokens[0][0] == 'enddef'): 
-            return ('def', start, func_name, sig, content), tokens[1:] 
-        next_chunk, tokens = parse_expr(tokens, name, context) 
-        content.append(next_chunk) 
- 
- 
-def parse_signature(sig_text, name, pos): 
-    tokens = tokenize.generate_tokens(StringIO(sig_text).readline) 
-    sig_args = [] 
-    var_arg = None 
-    var_kw = None 
-    defaults = {} 
- 
-    def get_token(pos=False): 
-        try: 
-            tok_type, tok_string, (srow, scol), (erow, ecol), line = next(tokens) 
-        except StopIteration: 
-            return tokenize.ENDMARKER, '' 
-        if pos: 
-            return tok_type, tok_string, (srow, scol), (erow, ecol) 
-        else: 
-            return tok_type, tok_string 
-    while 1: 
-        var_arg_type = None 
-        tok_type, tok_string = get_token() 
-        if tok_type == tokenize.ENDMARKER: 
-            break 
-        if tok_type == tokenize.OP and (tok_string == '*' or tok_string == '**'): 
-            var_arg_type = tok_string 
-            tok_type, tok_string = get_token() 
-        if tok_type != tokenize.NAME: 
-            raise TemplateError('Invalid signature: (%s)' % sig_text, 
-                                position=pos, name=name) 
-        var_name = tok_string 
-        tok_type, tok_string = get_token() 
-        if tok_type == tokenize.ENDMARKER or (tok_type == tokenize.OP and tok_string == ','): 
-            if var_arg_type == '*': 
-                var_arg = var_name 
-            elif var_arg_type == '**': 
-                var_kw = var_name 
-            else: 
-                sig_args.append(var_name) 
-            if tok_type == tokenize.ENDMARKER: 
-                break 
-            continue 
-        if var_arg_type is not None: 
-            raise TemplateError('Invalid signature: (%s)' % sig_text, 
-                                position=pos, name=name) 
-        if tok_type == tokenize.OP and tok_string == '=': 
-            nest_type = None 
-            unnest_type = None 
-            nest_count = 0 
-            start_pos = end_pos = None 
-            parts = [] 
-            while 1: 
-                tok_type, tok_string, s, e = get_token(True) 
-                if start_pos is None: 
-                    start_pos = s 
-                end_pos = e 
-                if tok_type == tokenize.ENDMARKER and nest_count: 
-                    raise TemplateError('Invalid signature: (%s)' % sig_text, 
-                                        position=pos, name=name) 
-                if (not nest_count and 
-                    (tok_type == tokenize.ENDMARKER or (tok_type == tokenize.OP and tok_string == ','))): 
-                    default_expr = isolate_expression(sig_text, start_pos, end_pos) 
-                    defaults[var_name] = default_expr 
-                    sig_args.append(var_name) 
-                    break 
-                parts.append((tok_type, tok_string)) 
-                if nest_count and tok_type == tokenize.OP and tok_string == nest_type: 
-                    nest_count += 1 
-                elif nest_count and tok_type == tokenize.OP and tok_string == unnest_type: 
-                    nest_count -= 1 
-                    if not nest_count: 
-                        nest_type = unnest_type = None 
-                elif not nest_count and tok_type == tokenize.OP and tok_string in ('(', '[', '{'): 
-                    nest_type = tok_string 
-                    nest_count = 1 
-                    unnest_type = {'(': ')', '[': ']', '{': '}'}[nest_type] 
-    return sig_args, var_arg, var_kw, defaults 
- 
- 
-def isolate_expression(string, start_pos, end_pos): 
-    srow, scol = start_pos 
-    srow -= 1 
-    erow, ecol = end_pos 
-    erow -= 1 
-    lines = string.splitlines(True) 
-    if srow == erow: 
-        return lines[srow][scol:ecol] 
-    parts = [lines[srow][scol:]] 
-    parts.extend(lines[srow+1:erow]) 
-    if erow < len(lines): 
-        # It'll sometimes give (end_row_past_finish, 0) 
-        parts.append(lines[erow][:ecol]) 
-    return ''.join(parts) 
- 
-_fill_command_usage = """\ 
-%prog [OPTIONS] TEMPLATE arg=value 
- 
-Use py:arg=value to set a Python value; otherwise all values are 
-strings. 
-""" 
- 
- 
-def fill_command(args=None): 
-    import sys 
-    import optparse 
-    import pkg_resources 
-    import os 
-    if args is None: 
-        args = sys.argv[1:] 
-    dist = pkg_resources.get_distribution('Paste') 
-    parser = optparse.OptionParser( 
-        version=coerce_text(dist), 
-        usage=_fill_command_usage) 
-    parser.add_option( 
-        '-o', '--output', 
-        dest='output', 
-        metavar="FILENAME", 
-        help="File to write output to (default stdout)") 
-    parser.add_option( 
-        '--html', 
-        dest='use_html', 
-        action='store_true', 
-        help="Use HTML style filling (including automatic HTML quoting)") 
-    parser.add_option( 
-        '--env', 
-        dest='use_env', 
-        action='store_true', 
-        help="Put the environment in as top-level variables") 
-    options, args = parser.parse_args(args) 
-    if len(args) < 1: 
-        print('You must give a template filename') 
-        sys.exit(2) 
-    template_name = args[0] 
-    args = args[1:] 
-    vars = {} 
-    if options.use_env: 
-        vars.update(os.environ) 
-    for value in args: 
-        if '=' not in value: 
-            print('Bad argument: %r' % value) 
-            sys.exit(2) 
-        name, value = value.split('=', 1) 
-        if name.startswith('py:'): 
-            name = name[:3] 
-            value = eval(value) 
-        vars[name] = value 
-    if template_name == '-': 
-        template_content = sys.stdin.read() 
-        template_name = '<stdin>' 
-    else: 
-        f = open(template_name, 'rb') 
-        template_content = f.read() 
-        f.close() 
-    if options.use_html: 
-        TemplateClass = HTMLTemplate 
-    else: 
-        TemplateClass = Template 
-    template = TemplateClass(template_content, name=template_name) 
-    result = template.substitute(vars) 
-    if options.output: 
-        f = open(options.output, 'wb') 
-        f.write(result) 
-        f.close() 
-    else: 
-        sys.stdout.write(result) 
- 
-if __name__ == '__main__': 
-    fill_command() 
+            if name not in values:
+                values[name] = self._template._eval(
+                    value_expr, self._ns, self._pos)
+        for name in sig_args:
+            if name not in values:
+                raise TypeError(
+                    'Missing argument: %s' % name)
+        if var_kw:
+            values[var_kw] = extra_kw
+        return values
+
+
+class TemplateObject(object):
+
+    def __init__(self, name):
+        self.__name = name
+        self.get = TemplateObjectGetter(self)
+
+    def __repr__(self):
+        return '<%s %s>' % (self.__class__.__name__, self.__name)
+
+
+class TemplateObjectGetter(object):
+
+    def __init__(self, template_obj):
+        self.__template_obj = template_obj
+
+    def __getattr__(self, attr):
+        return getattr(self.__template_obj, attr, Empty)
+
+    def __repr__(self):
+        return '<%s around %r>' % (self.__class__.__name__, self.__template_obj)
+
+
+class _Empty(object):
+    def __call__(self, *args, **kw):
+        return self
+
+    def __str__(self):
+        return ''
+
+    def __repr__(self):
+        return 'Empty'
+
+    def __unicode__(self):
+        return u''
+
+    def __iter__(self):
+        return iter(())
+
+    def __bool__(self):
+        return False
+
+    if sys.version < "3":
+        __nonzero__ = __bool__
+
+Empty = _Empty()
+del _Empty
+
+############################################################
+## Lexing and Parsing
+############################################################
+
+
+def lex(s, name=None, trim_whitespace=True, line_offset=0, delimeters=None):
+    """
+    Lex a string into chunks:
+
+        >>> lex('hey')
+        ['hey']
+        >>> lex('hey {{you}}')
+        ['hey ', ('you', (1, 7))]
+        >>> lex('hey {{')
+        Traceback (most recent call last):
+            ...
+        TemplateError: No }} to finish last expression at line 1 column 7
+        >>> lex('hey }}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: }} outside expression at line 1 column 7
+        >>> lex('hey {{ {{')
+        Traceback (most recent call last):
+            ...
+        TemplateError: {{ inside expression at line 1 column 10
+
+    """
+    if delimeters is None:
+        delimeters = ( Template.default_namespace['start_braces'],
+                       Template.default_namespace['end_braces'] )
+    in_expr = False
+    chunks = []
+    last = 0
+    last_pos = (line_offset + 1, 1)
+
+    token_re = re.compile(r'%s|%s' % (re.escape(delimeters[0]),
+                                      re.escape(delimeters[1])))
+    for match in token_re.finditer(s):
+        expr = match.group(0)
+        pos = find_position(s, match.end(), last, last_pos)
+        if expr == delimeters[0] and in_expr:
+            raise TemplateError('%s inside expression' % delimeters[0],
+                                position=pos,
+                                name=name)
+        elif expr == delimeters[1] and not in_expr:
+            raise TemplateError('%s outside expression' % delimeters[1],
+                                position=pos,
+                                name=name)
+        if expr == delimeters[0]:
+            part = s[last:match.start()]
+            if part:
+                chunks.append(part)
+            in_expr = True
+        else:
+            chunks.append((s[last:match.start()], last_pos))
+            in_expr = False
+        last = match.end()
+        last_pos = pos
+    if in_expr:
+        raise TemplateError('No %s to finish last expression' % delimeters[1],
+                            name=name, position=last_pos)
+    part = s[last:]
+    if part:
+        chunks.append(part)
+    if trim_whitespace:
+        chunks = trim_lex(chunks)
+    return chunks
+
+statement_re = re.compile(r'^(?:if |elif |for |def |inherit |default |py:)')
+single_statements = ['else', 'endif', 'endfor', 'enddef', 'continue', 'break']
+trail_whitespace_re = re.compile(r'\n\r?[\t ]*$')
+lead_whitespace_re = re.compile(r'^[\t ]*\n')
+
+
+def trim_lex(tokens):
+    r"""
+    Takes a lexed set of tokens, and removes whitespace when there is
+    a directive on a line by itself:
+
+       >>> tokens = lex('{{if x}}\nx\n{{endif}}\ny', trim_whitespace=False)
+       >>> tokens
+       [('if x', (1, 3)), '\nx\n', ('endif', (3, 3)), '\ny']
+       >>> trim_lex(tokens)
+       [('if x', (1, 3)), 'x\n', ('endif', (3, 3)), 'y']
+    """
+    last_trim = None
+    for i, current in enumerate(tokens):
+        if isinstance(current, basestring_):
+            # we don't trim this
+            continue
+        item = current[0]
+        if not statement_re.search(item) and item not in single_statements:
+            continue
+        if not i:
+            prev = ''
+        else:
+            prev = tokens[i - 1]
+        if i + 1 >= len(tokens):
+            next_chunk = ''
+        else:
+            next_chunk = tokens[i + 1]
+        if (not isinstance(next_chunk, basestring_)
+            or not isinstance(prev, basestring_)):
+            continue
+        prev_ok = not prev or trail_whitespace_re.search(prev)
+        if i == 1 and not prev.strip():
+            prev_ok = True
+        if last_trim is not None and last_trim + 2 == i and not prev.strip():
+            prev_ok = 'last'
+        if (prev_ok
+            and (not next_chunk or lead_whitespace_re.search(next_chunk)
+                 or (i == len(tokens) - 2 and not next_chunk.strip()))):
+            if prev:
+                if ((i == 1 and not prev.strip())
+                    or prev_ok == 'last'):
+                    tokens[i - 1] = ''
+                else:
+                    m = trail_whitespace_re.search(prev)
+                    # +1 to leave the leading \n on:
+                    prev = prev[:m.start() + 1]
+                    tokens[i - 1] = prev
+            if next_chunk:
+                last_trim = i
+                if i == len(tokens) - 2 and not next_chunk.strip():
+                    tokens[i + 1] = ''
+                else:
+                    m = lead_whitespace_re.search(next_chunk)
+                    next_chunk = next_chunk[m.end():]
+                    tokens[i + 1] = next_chunk
+    return tokens
+
+
+def find_position(string, index, last_index, last_pos):
+    """Given a string and index, return (line, column)"""
+    lines = string.count('\n', last_index, index)
+    if lines > 0:
+        column = index - string.rfind('\n', last_index, index)
+    else:
+        column = last_pos[1] + (index - last_index)
+    return (last_pos[0] + lines, column)
+
+
+def parse(s, name=None, line_offset=0, delimeters=None):
+    r"""
+    Parses a string into a kind of AST
+
+        >>> parse('{{x}}')
+        [('expr', (1, 3), 'x')]
+        >>> parse('foo')
+        ['foo']
+        >>> parse('{{if x}}test{{endif}}')
+        [('cond', (1, 3), ('if', (1, 3), 'x', ['test']))]
+        >>> parse('series->{{for x in y}}x={{x}}{{endfor}}')
+        ['series->', ('for', (1, 11), ('x',), 'y', ['x=', ('expr', (1, 27), 'x')])]
+        >>> parse('{{for x, y in z:}}{{continue}}{{endfor}}')
+        [('for', (1, 3), ('x', 'y'), 'z', [('continue', (1, 21))])]
+        >>> parse('{{py:x=1}}')
+        [('py', (1, 3), 'x=1')]
+        >>> parse('{{if x}}a{{elif y}}b{{else}}c{{endif}}')
+        [('cond', (1, 3), ('if', (1, 3), 'x', ['a']), ('elif', (1, 12), 'y', ['b']), ('else', (1, 23), None, ['c']))]
+
+    Some exceptions::
+
+        >>> parse('{{continue}}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: continue outside of for loop at line 1 column 3
+        >>> parse('{{if x}}foo')
+        Traceback (most recent call last):
+            ...
+        TemplateError: No {{endif}} at line 1 column 3
+        >>> parse('{{else}}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: else outside of an if block at line 1 column 3
+        >>> parse('{{if x}}{{for x in y}}{{endif}}{{endfor}}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: Unexpected endif at line 1 column 25
+        >>> parse('{{if}}{{endif}}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: if with no expression at line 1 column 3
+        >>> parse('{{for x y}}{{endfor}}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: Bad for (no "in") in 'x y' at line 1 column 3
+        >>> parse('{{py:x=1\ny=2}}')
+        Traceback (most recent call last):
+            ...
+        TemplateError: Multi-line py blocks must start with a newline at line 1 column 3
+    """
+    if delimeters is None:
+        delimeters = ( Template.default_namespace['start_braces'],
+                       Template.default_namespace['end_braces'] )
+    tokens = lex(s, name=name, line_offset=line_offset, delimeters=delimeters)
+    result = []
+    while tokens:
+        next_chunk, tokens = parse_expr(tokens, name)
+        result.append(next_chunk)
+    return result
+
+
+def parse_expr(tokens, name, context=()):
+    if isinstance(tokens[0], basestring_):
+        return tokens[0], tokens[1:]
+    expr, pos = tokens[0]
+    expr = expr.strip()
+    if expr.startswith('py:'):
+        expr = expr[3:].lstrip(' \t')
+        if expr.startswith('\n') or expr.startswith('\r'):
+            expr = expr.lstrip('\r\n')
+            if '\r' in expr:
+                expr = expr.replace('\r\n', '\n')
+                expr = expr.replace('\r', '')
+            expr += '\n'
+        else:
+            if '\n' in expr:
+                raise TemplateError(
+                    'Multi-line py blocks must start with a newline',
+                    position=pos, name=name)
+        return ('py', pos, expr), tokens[1:]
+    elif expr in ('continue', 'break'):
+        if 'for' not in context:
+            raise TemplateError(
+                'continue outside of for loop',
+                position=pos, name=name)
+        return (expr, pos), tokens[1:]
+    elif expr.startswith('if '):
+        return parse_cond(tokens, name, context)
+    elif (expr.startswith('elif ')
+          or expr == 'else'):
+        raise TemplateError(
+            '%s outside of an if block' % expr.split()[0],
+            position=pos, name=name)
+    elif expr in ('if', 'elif', 'for'):
+        raise TemplateError(
+            '%s with no expression' % expr,
+            position=pos, name=name)
+    elif expr in ('endif', 'endfor', 'enddef'):
+        raise TemplateError(
+            'Unexpected %s' % expr,
+            position=pos, name=name)
+    elif expr.startswith('for '):
+        return parse_for(tokens, name, context)
+    elif expr.startswith('default '):
+        return parse_default(tokens, name, context)
+    elif expr.startswith('inherit '):
+        return parse_inherit(tokens, name, context)
+    elif expr.startswith('def '):
+        return parse_def(tokens, name, context)
+    elif expr.startswith('#'):
+        return ('comment', pos, tokens[0][0]), tokens[1:]
+    return ('expr', pos, tokens[0][0]), tokens[1:]
+
+
+def parse_cond(tokens, name, context):
+    start = tokens[0][1]
+    pieces = []
+    context = context + ('if',)
+    while 1:
+        if not tokens:
+            raise TemplateError(
+                'Missing {{endif}}',
+                position=start, name=name)
+        if (isinstance(tokens[0], tuple)
+            and tokens[0][0] == 'endif'):
+            return ('cond', start) + tuple(pieces), tokens[1:]
+        next_chunk, tokens = parse_one_cond(tokens, name, context)
+        pieces.append(next_chunk)
+
+
+def parse_one_cond(tokens, name, context):
+    (first, pos), tokens = tokens[0], tokens[1:]
+    content = []
+    if first.endswith(':'):
+        first = first[:-1]
+    if first.startswith('if '):
+        part = ('if', pos, first[3:].lstrip(), content)
+    elif first.startswith('elif '):
+        part = ('elif', pos, first[5:].lstrip(), content)
+    elif first == 'else':
+        part = ('else', pos, None, content)
+    else:
+        assert 0, "Unexpected token %r at %s" % (first, pos)
+    while 1:
+        if not tokens:
+            raise TemplateError(
+                'No {{endif}}',
+                position=pos, name=name)
+        if (isinstance(tokens[0], tuple)
+            and (tokens[0][0] == 'endif'
+                 or tokens[0][0].startswith('elif ')
+                 or tokens[0][0] == 'else')):
+            return part, tokens
+        next_chunk, tokens = parse_expr(tokens, name, context)
+        content.append(next_chunk)
+
+
+def parse_for(tokens, name, context):
+    first, pos = tokens[0]
+    tokens = tokens[1:]
+    context = ('for',) + context
+    content = []
+    assert first.startswith('for ')
+    if first.endswith(':'):
+        first = first[:-1]
+    first = first[3:].strip()
+    match = in_re.search(first)
+    if not match:
+        raise TemplateError(
+            'Bad for (no "in") in %r' % first,
+            position=pos, name=name)
+    vars = first[:match.start()]
+    if '(' in vars:
+        raise TemplateError(
+            'You cannot have () in the variable section of a for loop (%r)'
+            % vars, position=pos, name=name)
+    vars = tuple([
+        v.strip() for v in first[:match.start()].split(',')
+        if v.strip()])
+    expr = first[match.end():]
+    while 1:
+        if not tokens:
+            raise TemplateError(
+                'No {{endfor}}',
+                position=pos, name=name)
+        if (isinstance(tokens[0], tuple)
+            and tokens[0][0] == 'endfor'):
+            return ('for', pos, vars, expr, content), tokens[1:]
+        next_chunk, tokens = parse_expr(tokens, name, context)
+        content.append(next_chunk)
+
+
+def parse_default(tokens, name, context):
+    first, pos = tokens[0]
+    assert first.startswith('default ')
+    first = first.split(None, 1)[1]
+    parts = first.split('=', 1)
+    if len(parts) == 1:
+        raise TemplateError(
+            "Expression must be {{default var=value}}; no = found in %r" % first,
+            position=pos, name=name)
+    var = parts[0].strip()
+    if ',' in var:
+        raise TemplateError(
+            "{{default x, y = ...}} is not supported",
+            position=pos, name=name)
+    if not var_re.search(var):
+        raise TemplateError(
+            "Not a valid variable name for {{default}}: %r"
+            % var, position=pos, name=name)
+    expr = parts[1].strip()
+    return ('default', pos, var, expr), tokens[1:]
+
+
+def parse_inherit(tokens, name, context):
+    first, pos = tokens[0]
+    assert first.startswith('inherit ')
+    expr = first.split(None, 1)[1]
+    return ('inherit', pos, expr), tokens[1:]
+
+
+def parse_def(tokens, name, context):
+    first, start = tokens[0]
+    tokens = tokens[1:]
+    assert first.startswith('def ')
+    first = first.split(None, 1)[1]
+    if first.endswith(':'):
+        first = first[:-1]
+    if '(' not in first:
+        func_name = first
+        sig = ((), None, None, {})
+    elif not first.endswith(')'):
+        raise TemplateError("Function definition doesn't end with ): %s" % first,
+                            position=start, name=name)
+    else:
+        first = first[:-1]
+        func_name, sig_text = first.split('(', 1)
+        sig = parse_signature(sig_text, name, start)
+    context = context + ('def',)
+    content = []
+    while 1:
+        if not tokens:
+            raise TemplateError(
+                'Missing {{enddef}}',
+                position=start, name=name)
+        if (isinstance(tokens[0], tuple)
+            and tokens[0][0] == 'enddef'):
+            return ('def', start, func_name, sig, content), tokens[1:]
+        next_chunk, tokens = parse_expr(tokens, name, context)
+        content.append(next_chunk)
+
+
+def parse_signature(sig_text, name, pos):
+    tokens = tokenize.generate_tokens(StringIO(sig_text).readline)
+    sig_args = []
+    var_arg = None
+    var_kw = None
+    defaults = {}
+
+    def get_token(pos=False):
+        try:
+            tok_type, tok_string, (srow, scol), (erow, ecol), line = next(tokens)
+        except StopIteration:
+            return tokenize.ENDMARKER, ''
+        if pos:
+            return tok_type, tok_string, (srow, scol), (erow, ecol)
+        else:
+            return tok_type, tok_string
+    while 1:
+        var_arg_type = None
+        tok_type, tok_string = get_token()
+        if tok_type == tokenize.ENDMARKER:
+            break
+        if tok_type == tokenize.OP and (tok_string == '*' or tok_string == '**'):
+            var_arg_type = tok_string
+            tok_type, tok_string = get_token()
+        if tok_type != tokenize.NAME:
+            raise TemplateError('Invalid signature: (%s)' % sig_text,
+                                position=pos, name=name)
+        var_name = tok_string
+        tok_type, tok_string = get_token()
+        if tok_type == tokenize.ENDMARKER or (tok_type == tokenize.OP and tok_string == ','):
+            if var_arg_type == '*':
+                var_arg = var_name
+            elif var_arg_type == '**':
+                var_kw = var_name
+            else:
+                sig_args.append(var_name)
+            if tok_type == tokenize.ENDMARKER:
+                break
+            continue
+        if var_arg_type is not None:
+            raise TemplateError('Invalid signature: (%s)' % sig_text,
+                                position=pos, name=name)
+        if tok_type == tokenize.OP and tok_string == '=':
+            nest_type = None
+            unnest_type = None
+            nest_count = 0
+            start_pos = end_pos = None
+            parts = []
+            while 1:
+                tok_type, tok_string, s, e = get_token(True)
+                if start_pos is None:
+                    start_pos = s
+                end_pos = e
+                if tok_type == tokenize.ENDMARKER and nest_count:
+                    raise TemplateError('Invalid signature: (%s)' % sig_text,
+                                        position=pos, name=name)
+                if (not nest_count and
+                    (tok_type == tokenize.ENDMARKER or (tok_type == tokenize.OP and tok_string == ','))):
+                    default_expr = isolate_expression(sig_text, start_pos, end_pos)
+                    defaults[var_name] = default_expr
+                    sig_args.append(var_name)
+                    break
+                parts.append((tok_type, tok_string))
+                if nest_count and tok_type == tokenize.OP and tok_string == nest_type:
+                    nest_count += 1
+                elif nest_count and tok_type == tokenize.OP and tok_string == unnest_type:
+                    nest_count -= 1
+                    if not nest_count:
+                        nest_type = unnest_type = None
+                elif not nest_count and tok_type == tokenize.OP and tok_string in ('(', '[', '{'):
+                    nest_type = tok_string
+                    nest_count = 1
+                    unnest_type = {'(': ')', '[': ']', '{': '}'}[nest_type]
+    return sig_args, var_arg, var_kw, defaults
+
+
+def isolate_expression(string, start_pos, end_pos):
+    srow, scol = start_pos
+    srow -= 1
+    erow, ecol = end_pos
+    erow -= 1
+    lines = string.splitlines(True)
+    if srow == erow:
+        return lines[srow][scol:ecol]
+    parts = [lines[srow][scol:]]
+    parts.extend(lines[srow+1:erow])
+    if erow < len(lines):
+        # It'll sometimes give (end_row_past_finish, 0)
+        parts.append(lines[erow][:ecol])
+    return ''.join(parts)
+
+_fill_command_usage = """\
+%prog [OPTIONS] TEMPLATE arg=value
+
+Use py:arg=value to set a Python value; otherwise all values are
+strings.
+"""
+
+
+def fill_command(args=None):
+    import sys
+    import optparse
+    import pkg_resources
+    import os
+    if args is None:
+        args = sys.argv[1:]
+    dist = pkg_resources.get_distribution('Paste')
+    parser = optparse.OptionParser(
+        version=coerce_text(dist),
+        usage=_fill_command_usage)
+    parser.add_option(
+        '-o', '--output',
+        dest='output',
+        metavar="FILENAME",
+        help="File to write output to (default stdout)")
+    parser.add_option(
+        '--html',
+        dest='use_html',
+        action='store_true',
+        help="Use HTML style filling (including automatic HTML quoting)")
+    parser.add_option(
+        '--env',
+        dest='use_env',
+        action='store_true',
+        help="Put the environment in as top-level variables")
+    options, args = parser.parse_args(args)
+    if len(args) < 1:
+        print('You must give a template filename')
+        sys.exit(2)
+    template_name = args[0]
+    args = args[1:]
+    vars = {}
+    if options.use_env:
+        vars.update(os.environ)
+    for value in args:
+        if '=' not in value:
+            print('Bad argument: %r' % value)
+            sys.exit(2)
+        name, value = value.split('=', 1)
+        if name.startswith('py:'):
+            name = name[:3]
+            value = eval(value)
+        vars[name] = value
+    if template_name == '-':
+        template_content = sys.stdin.read()
+        template_name = '<stdin>'
+    else:
+        f = open(template_name, 'rb')
+        template_content = f.read()
+        f.close()
+    if options.use_html:
+        TemplateClass = HTMLTemplate
+    else:
+        TemplateClass = Template
+    template = TemplateClass(template_content, name=template_name)
+    result = template.substitute(vars)
+    if options.output:
+        f = open(options.output, 'wb')
+        f.write(result)
+        f.close()
+    else:
+        sys.stdout.write(result)
+
+if __name__ == '__main__':
+    fill_command()
diff --git a/contrib/tools/cython/Cython/Tempita/compat3.py b/contrib/tools/cython/Cython/Tempita/compat3.py
index 98cdb8fa36..9905530757 100644
--- a/contrib/tools/cython/Cython/Tempita/compat3.py
+++ b/contrib/tools/cython/Cython/Tempita/compat3.py
@@ -1,47 +1,47 @@
-import sys 
- 
+import sys
+
 __all__ = ['b', 'basestring_', 'bytes', 'unicode_', 'next', 'is_unicode']
- 
-if sys.version < "3": 
-    b = bytes = str 
-    basestring_ = basestring 
+
+if sys.version < "3":
+    b = bytes = str
+    basestring_ = basestring
     unicode_ = unicode
-else: 
- 
-    def b(s): 
-        if isinstance(s, str): 
-            return s.encode('latin1') 
-        return bytes(s) 
-    basestring_ = (bytes, str) 
-    bytes = bytes 
+else:
+
+    def b(s):
+        if isinstance(s, str):
+            return s.encode('latin1')
+        return bytes(s)
+    basestring_ = (bytes, str)
+    bytes = bytes
     unicode_ = str
-text = str 
- 
-if sys.version < "3": 
- 
-    def next(obj): 
-        return obj.next() 
-else: 
-    next = next 
- 
-if sys.version < "3": 
- 
-    def is_unicode(obj): 
-        return isinstance(obj, unicode) 
-else: 
- 
-    def is_unicode(obj): 
-        return isinstance(obj, str) 
- 
- 
-def coerce_text(v): 
-    if not isinstance(v, basestring_): 
-        if sys.version < "3": 
-            attr = '__unicode__' 
-        else: 
-            attr = '__str__' 
-        if hasattr(v, attr): 
-            return unicode(v) 
-        else: 
-            return bytes(v) 
-    return v 
+text = str
+
+if sys.version < "3":
+
+    def next(obj):
+        return obj.next()
+else:
+    next = next
+
+if sys.version < "3":
+
+    def is_unicode(obj):
+        return isinstance(obj, unicode)
+else:
+
+    def is_unicode(obj):
+        return isinstance(obj, str)
+
+
+def coerce_text(v):
+    if not isinstance(v, basestring_):
+        if sys.version < "3":
+            attr = '__unicode__'
+        else:
+            attr = '__str__'
+        if hasattr(v, attr):
+            return unicode(v)
+        else:
+            return bytes(v)
+    return v
diff --git a/contrib/tools/cython/Cython/TestUtils.py b/contrib/tools/cython/Cython/TestUtils.py
index 97bebb0006..9d6eb67fc3 100644
--- a/contrib/tools/cython/Cython/TestUtils.py
+++ b/contrib/tools/cython/Cython/TestUtils.py
@@ -1,217 +1,217 @@
 from __future__ import absolute_import
- 
+
 import os
-import unittest 
-import tempfile 
- 
+import unittest
+import tempfile
+
 from .Compiler import Errors
 from .CodeWriter import CodeWriter
 from .Compiler.TreeFragment import TreeFragment, strip_common_indent
 from .Compiler.Visitor import TreeVisitor, VisitorTransform
 from .Compiler import TreePath
- 
-
-class NodeTypeWriter(TreeVisitor): 
-    def __init__(self): 
-        super(NodeTypeWriter, self).__init__() 
-        self._indents = 0 
-        self.result = [] 
- 
-    def visit_Node(self, node): 
-        if not self.access_path: 
-            name = u"(root)" 
-        else: 
-            tip = self.access_path[-1] 
-            if tip[2] is not None: 
-                name = u"%s[%d]" % tip[1:3] 
-            else: 
-                name = tip[1] 
- 
-        self.result.append(u"  " * self._indents + 
-                           u"%s: %s" % (name, node.__class__.__name__)) 
-        self._indents += 1 
-        self.visitchildren(node) 
-        self._indents -= 1 
- 
- 
-def treetypes(root): 
-    """Returns a string representing the tree by class names. 
-    There's a leading and trailing whitespace so that it can be 
-    compared by simple string comparison while still making test 
-    cases look ok.""" 
-    w = NodeTypeWriter() 
-    w.visit(root) 
-    return u"\n".join([u""] + w.result + [u""]) 
- 
- 
-class CythonTest(unittest.TestCase): 
- 
-    def setUp(self): 
-        self.listing_file = Errors.listing_file 
-        self.echo_file = Errors.echo_file 
-        Errors.listing_file = Errors.echo_file = None 
- 
-    def tearDown(self): 
-        Errors.listing_file = self.listing_file 
-        Errors.echo_file = self.echo_file 
- 
-    def assertLines(self, expected, result): 
-        "Checks that the given strings or lists of strings are equal line by line" 
+
+
+class NodeTypeWriter(TreeVisitor):
+    def __init__(self):
+        super(NodeTypeWriter, self).__init__()
+        self._indents = 0
+        self.result = []
+
+    def visit_Node(self, node):
+        if not self.access_path:
+            name = u"(root)"
+        else:
+            tip = self.access_path[-1]
+            if tip[2] is not None:
+                name = u"%s[%d]" % tip[1:3]
+            else:
+                name = tip[1]
+
+        self.result.append(u"  " * self._indents +
+                           u"%s: %s" % (name, node.__class__.__name__))
+        self._indents += 1
+        self.visitchildren(node)
+        self._indents -= 1
+
+
+def treetypes(root):
+    """Returns a string representing the tree by class names.
+    There's a leading and trailing whitespace so that it can be
+    compared by simple string comparison while still making test
+    cases look ok."""
+    w = NodeTypeWriter()
+    w.visit(root)
+    return u"\n".join([u""] + w.result + [u""])
+
+
+class CythonTest(unittest.TestCase):
+
+    def setUp(self):
+        self.listing_file = Errors.listing_file
+        self.echo_file = Errors.echo_file
+        Errors.listing_file = Errors.echo_file = None
+
+    def tearDown(self):
+        Errors.listing_file = self.listing_file
+        Errors.echo_file = self.echo_file
+
+    def assertLines(self, expected, result):
+        "Checks that the given strings or lists of strings are equal line by line"
         if not isinstance(expected, list):
             expected = expected.split(u"\n")
         if not isinstance(result, list):
             result = result.split(u"\n")
-        for idx, (expected_line, result_line) in enumerate(zip(expected, result)): 
+        for idx, (expected_line, result_line) in enumerate(zip(expected, result)):
             self.assertEqual(expected_line, result_line,
                              "Line %d:\nExp: %s\nGot: %s" % (idx, expected_line, result_line))
-        self.assertEqual(len(expected), len(result), 
+        self.assertEqual(len(expected), len(result),
                          "Unmatched lines. Got:\n%s\nExpected:\n%s" % ("\n".join(expected), u"\n".join(result)))
- 
-    def codeToLines(self, tree): 
-        writer = CodeWriter() 
-        writer.write(tree) 
-        return writer.result.lines 
- 
-    def codeToString(self, tree): 
-        return "\n".join(self.codeToLines(tree)) 
- 
-    def assertCode(self, expected, result_tree): 
-        result_lines = self.codeToLines(result_tree) 
- 
-        expected_lines = strip_common_indent(expected.split("\n")) 
- 
-        for idx, (line, expected_line) in enumerate(zip(result_lines, expected_lines)): 
+
+    def codeToLines(self, tree):
+        writer = CodeWriter()
+        writer.write(tree)
+        return writer.result.lines
+
+    def codeToString(self, tree):
+        return "\n".join(self.codeToLines(tree))
+
+    def assertCode(self, expected, result_tree):
+        result_lines = self.codeToLines(result_tree)
+
+        expected_lines = strip_common_indent(expected.split("\n"))
+
+        for idx, (line, expected_line) in enumerate(zip(result_lines, expected_lines)):
             self.assertEqual(expected_line, line,
                              "Line %d:\nGot: %s\nExp: %s" % (idx, line, expected_line))
-        self.assertEqual(len(result_lines), len(expected_lines), 
+        self.assertEqual(len(result_lines), len(expected_lines),
                          "Unmatched lines. Got:\n%s\nExpected:\n%s" % ("\n".join(result_lines), expected))
- 
-    def assertNodeExists(self, path, result_tree): 
-        self.assertNotEqual(TreePath.find_first(result_tree, path), None, 
-                            "Path '%s' not found in result tree" % path) 
- 
+
+    def assertNodeExists(self, path, result_tree):
+        self.assertNotEqual(TreePath.find_first(result_tree, path), None,
+                            "Path '%s' not found in result tree" % path)
+
     def fragment(self, code, pxds=None, pipeline=None):
-        "Simply create a tree fragment using the name of the test-case in parse errors." 
+        "Simply create a tree fragment using the name of the test-case in parse errors."
         if pxds is None:
             pxds = {}
         if pipeline is None:
             pipeline = []
-        name = self.id() 
+        name = self.id()
         if name.startswith("__main__."):
             name = name[len("__main__."):]
-        name = name.replace(".", "_") 
-        return TreeFragment(code, name, pxds, pipeline=pipeline) 
- 
-    def treetypes(self, root): 
-        return treetypes(root) 
- 
-    def should_fail(self, func, exc_type=Exception): 
-        """Calls "func" and fails if it doesn't raise the right exception 
-        (any exception by default). Also returns the exception in question. 
-        """ 
-        try: 
-            func() 
-            self.fail("Expected an exception of type %r" % exc_type) 
+        name = name.replace(".", "_")
+        return TreeFragment(code, name, pxds, pipeline=pipeline)
+
+    def treetypes(self, root):
+        return treetypes(root)
+
+    def should_fail(self, func, exc_type=Exception):
+        """Calls "func" and fails if it doesn't raise the right exception
+        (any exception by default). Also returns the exception in question.
+        """
+        try:
+            func()
+            self.fail("Expected an exception of type %r" % exc_type)
         except exc_type as e:
             self.assertTrue(isinstance(e, exc_type))
-            return e 
- 
-    def should_not_fail(self, func): 
-        """Calls func and succeeds if and only if no exception is raised 
-        (i.e. converts exception raising into a failed testcase). Returns 
-        the return value of func.""" 
-        try: 
-            return func() 
+            return e
+
+    def should_not_fail(self, func):
+        """Calls func and succeeds if and only if no exception is raised
+        (i.e. converts exception raising into a failed testcase). Returns
+        the return value of func."""
+        try:
+            return func()
         except Exception as exc:
             self.fail(str(exc))
- 
- 
-class TransformTest(CythonTest): 
-    """ 
-    Utility base class for transform unit tests. It is based around constructing 
-    test trees (either explicitly or by parsing a Cython code string); running 
-    the transform, serialize it using a customized Cython serializer (with 
-    special markup for nodes that cannot be represented in Cython), 
-    and do a string-comparison line-by-line of the result. 
- 
-    To create a test case: 
-     - Call run_pipeline. The pipeline should at least contain the transform you 
-       are testing; pyx should be either a string (passed to the parser to 
-       create a post-parse tree) or a node representing input to pipeline. 
-       The result will be a transformed result. 
- 
-     - Check that the tree is correct. If wanted, assertCode can be used, which 
-       takes a code string as expected, and a ModuleNode in result_tree 
-       (it serializes the ModuleNode to a string and compares line-by-line). 
- 
-    All code strings are first stripped for whitespace lines and then common 
-    indentation. 
- 
-    Plans: One could have a pxd dictionary parameter to run_pipeline. 
-    """ 
- 
+
+
+class TransformTest(CythonTest):
+    """
+    Utility base class for transform unit tests. It is based around constructing
+    test trees (either explicitly or by parsing a Cython code string); running
+    the transform, serialize it using a customized Cython serializer (with
+    special markup for nodes that cannot be represented in Cython),
+    and do a string-comparison line-by-line of the result.
+
+    To create a test case:
+     - Call run_pipeline. The pipeline should at least contain the transform you
+       are testing; pyx should be either a string (passed to the parser to
+       create a post-parse tree) or a node representing input to pipeline.
+       The result will be a transformed result.
+
+     - Check that the tree is correct. If wanted, assertCode can be used, which
+       takes a code string as expected, and a ModuleNode in result_tree
+       (it serializes the ModuleNode to a string and compares line-by-line).
+
+    All code strings are first stripped for whitespace lines and then common
+    indentation.
+
+    Plans: One could have a pxd dictionary parameter to run_pipeline.
+    """
+
     def run_pipeline(self, pipeline, pyx, pxds=None):
         if pxds is None:
             pxds = {}
-        tree = self.fragment(pyx, pxds).root 
-        # Run pipeline 
-        for T in pipeline: 
-            tree = T(tree) 
-        return tree 
- 
- 
-class TreeAssertVisitor(VisitorTransform): 
-    # actually, a TreeVisitor would be enough, but this needs to run 
-    # as part of the compiler pipeline 
- 
-    def visit_CompilerDirectivesNode(self, node): 
-        directives = node.directives 
-        if 'test_assert_path_exists' in directives: 
-            for path in directives['test_assert_path_exists']: 
-                if TreePath.find_first(node, path) is None: 
-                    Errors.error( 
-                        node.pos, 
-                        "Expected path '%s' not found in result tree" % path) 
-        if 'test_fail_if_path_exists' in directives: 
-            for path in directives['test_fail_if_path_exists']: 
-                if TreePath.find_first(node, path) is not None: 
-                    Errors.error( 
-                        node.pos, 
-                        "Unexpected path '%s' found in result tree" %  path) 
-        self.visitchildren(node) 
-        return node 
- 
-    visit_Node = VisitorTransform.recurse_to_children 
- 
- 
-def unpack_source_tree(tree_file, dir=None): 
-    if dir is None: 
-        dir = tempfile.mkdtemp() 
-    header = [] 
-    cur_file = None 
-    f = open(tree_file) 
-    try: 
-        lines = f.readlines() 
-    finally: 
-        f.close() 
-    del f 
-    try: 
-        for line in lines: 
-            if line[:5] == '#####': 
-                filename = line.strip().strip('#').strip().replace('/', os.path.sep) 
-                path = os.path.join(dir, filename) 
-                if not os.path.exists(os.path.dirname(path)): 
-                    os.makedirs(os.path.dirname(path)) 
-                if cur_file is not None: 
-                    f, cur_file = cur_file, None 
-                    f.close() 
-                cur_file = open(path, 'w') 
-            elif cur_file is not None: 
-                cur_file.write(line) 
-            elif line.strip() and not line.lstrip().startswith('#'): 
-                if line.strip() not in ('"""', "'''"): 
-                    header.append(line) 
-    finally: 
-        if cur_file is not None: 
-            cur_file.close() 
-    return dir, ''.join(header) 
+        tree = self.fragment(pyx, pxds).root
+        # Run pipeline
+        for T in pipeline:
+            tree = T(tree)
+        return tree
+
+
+class TreeAssertVisitor(VisitorTransform):
+    # actually, a TreeVisitor would be enough, but this needs to run
+    # as part of the compiler pipeline
+
+    def visit_CompilerDirectivesNode(self, node):
+        directives = node.directives
+        if 'test_assert_path_exists' in directives:
+            for path in directives['test_assert_path_exists']:
+                if TreePath.find_first(node, path) is None:
+                    Errors.error(
+                        node.pos,
+                        "Expected path '%s' not found in result tree" % path)
+        if 'test_fail_if_path_exists' in directives:
+            for path in directives['test_fail_if_path_exists']:
+                if TreePath.find_first(node, path) is not None:
+                    Errors.error(
+                        node.pos,
+                        "Unexpected path '%s' found in result tree" %  path)
+        self.visitchildren(node)
+        return node
+
+    visit_Node = VisitorTransform.recurse_to_children
+
+
+def unpack_source_tree(tree_file, dir=None):
+    if dir is None:
+        dir = tempfile.mkdtemp()
+    header = []
+    cur_file = None
+    f = open(tree_file)
+    try:
+        lines = f.readlines()
+    finally:
+        f.close()
+    del f
+    try:
+        for line in lines:
+            if line[:5] == '#####':
+                filename = line.strip().strip('#').strip().replace('/', os.path.sep)
+                path = os.path.join(dir, filename)
+                if not os.path.exists(os.path.dirname(path)):
+                    os.makedirs(os.path.dirname(path))
+                if cur_file is not None:
+                    f, cur_file = cur_file, None
+                    f.close()
+                cur_file = open(path, 'w')
+            elif cur_file is not None:
+                cur_file.write(line)
+            elif line.strip() and not line.lstrip().startswith('#'):
+                if line.strip() not in ('"""', "'''"):
+                    header.append(line)
+    finally:
+        if cur_file is not None:
+            cur_file.close()
+    return dir, ''.join(header)
diff --git a/contrib/tools/cython/Cython/Tests/TestCodeWriter.py b/contrib/tools/cython/Cython/Tests/TestCodeWriter.py
index 982629879e..42e457da20 100644
--- a/contrib/tools/cython/Cython/Tests/TestCodeWriter.py
+++ b/contrib/tools/cython/Cython/Tests/TestCodeWriter.py
@@ -1,82 +1,82 @@
-from Cython.TestUtils import CythonTest 
- 
-class TestCodeWriter(CythonTest): 
-    # CythonTest uses the CodeWriter heavily, so do some checking by 
-    # roundtripping Cython code through the test framework. 
- 
+from Cython.TestUtils import CythonTest
+
+class TestCodeWriter(CythonTest):
+    # CythonTest uses the CodeWriter heavily, so do some checking by
+    # roundtripping Cython code through the test framework.
+
     # Note that this test is dependent upon the normal Cython parser
-    # to generate the input trees to the CodeWriter. This save *a lot* 
-    # of time; better to spend that time writing other tests than perfecting 
-    # this one... 
- 
-    # Whitespace is very significant in this process: 
-    #  - always newline on new block (!) 
-    #  - indent 4 spaces 
-    #  - 1 space around every operator 
- 
-    def t(self, codestr): 
-        self.assertCode(codestr, self.fragment(codestr).root) 
- 
-    def test_print(self): 
-        self.t(u""" 
-                    print x, y 
-                    print x + y ** 2 
-                    print x, y, z, 
-               """) 
- 
-    def test_if(self): 
-        self.t(u"if x:\n    pass") 
- 
-    def test_ifelifelse(self): 
-        self.t(u""" 
-                    if x: 
-                        pass 
-                    elif y: 
-                        pass 
-                    elif z + 34 ** 34 - 2: 
-                        pass 
-                    else: 
-                        pass 
-                """) 
- 
-    def test_def(self): 
-        self.t(u""" 
-                    def f(x, y, z): 
-                        pass 
-                    def f(x = 34, y = 54, z): 
-                        pass 
-               """) 
- 
-    def test_longness_and_signedness(self): 
-        self.t(u"def f(unsigned long long long long long int y):\n    pass") 
- 
-    def test_signed_short(self): 
-        self.t(u"def f(signed short int y):\n    pass") 
- 
-    def test_typed_args(self): 
-        self.t(u"def f(int x, unsigned long int y):\n    pass") 
- 
-    def test_cdef_var(self): 
-        self.t(u""" 
-                    cdef int hello 
-                    cdef int hello = 4, x = 3, y, z 
-                """) 
- 
-    def test_for_loop(self): 
-        self.t(u""" 
-                    for x, y, z in f(g(h(34) * 2) + 23): 
-                        print x, y, z 
-                    else: 
-                        print 43 
-                """) 
- 
-    def test_inplace_assignment(self): 
-        self.t(u"x += 43") 
- 
-    def test_attribute(self): 
-        self.t(u"a.x") 
- 
-if __name__ == "__main__": 
-    import unittest 
-    unittest.main() 
- 
+    # to generate the input trees to the CodeWriter. This save *a lot*
+    # of time; better to spend that time writing other tests than perfecting
+    # this one...
+
+    # Whitespace is very significant in this process:
+    #  - always newline on new block (!)
+    #  - indent 4 spaces
+    #  - 1 space around every operator
+
+    def t(self, codestr):
+        self.assertCode(codestr, self.fragment(codestr).root)
+
+    def test_print(self):
+        self.t(u"""
+                    print x, y
+                    print x + y ** 2
+                    print x, y, z,
+               """)
+
+    def test_if(self):
+        self.t(u"if x:\n    pass")
+
+    def test_ifelifelse(self):
+        self.t(u"""
+                    if x:
+                        pass
+                    elif y:
+                        pass
+                    elif z + 34 ** 34 - 2:
+                        pass
+                    else:
+                        pass
+                """)
+
+    def test_def(self):
+        self.t(u"""
+                    def f(x, y, z):
+                        pass
+                    def f(x = 34, y = 54, z):
+                        pass
+               """)
+
+    def test_longness_and_signedness(self):
+        self.t(u"def f(unsigned long long long long long int y):\n    pass")
+
+    def test_signed_short(self):
+        self.t(u"def f(signed short int y):\n    pass")
+
+    def test_typed_args(self):
+        self.t(u"def f(int x, unsigned long int y):\n    pass")
+
+    def test_cdef_var(self):
+        self.t(u"""
+                    cdef int hello
+                    cdef int hello = 4, x = 3, y, z
+                """)
+
+    def test_for_loop(self):
+        self.t(u"""
+                    for x, y, z in f(g(h(34) * 2) + 23):
+                        print x, y, z
+                    else:
+                        print 43
+                """)
+
+    def test_inplace_assignment(self):
+        self.t(u"x += 43")
+
+    def test_attribute(self):
+        self.t(u"a.x")
+
+if __name__ == "__main__":
+    import unittest
+    unittest.main()
+
diff --git a/contrib/tools/cython/Cython/Tests/TestJediTyper.py b/contrib/tools/cython/Cython/Tests/TestJediTyper.py
index 3991be731a..253adef171 100644
--- a/contrib/tools/cython/Cython/Tests/TestJediTyper.py
+++ b/contrib/tools/cython/Cython/Tests/TestJediTyper.py
@@ -1,122 +1,122 @@
-# -*- coding: utf-8 -*- 
-# tag: jedi 
- 
-from __future__ import absolute_import 
- 
-import sys 
-import os.path 
- 
-from textwrap import dedent 
-from contextlib import contextmanager 
-from tempfile import NamedTemporaryFile 
- 
-from Cython.Compiler.ParseTreeTransforms import NormalizeTree, InterpretCompilerDirectives 
-from Cython.Compiler import Main, Symtab, Visitor 
-from Cython.TestUtils import TransformTest 
- 
-TOOLS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'Tools')) 
- 
- 
-@contextmanager 
-def _tempfile(code): 
-    code = dedent(code) 
+# -*- coding: utf-8 -*-
+# tag: jedi
+
+from __future__ import absolute_import
+
+import sys
+import os.path
+
+from textwrap import dedent
+from contextlib import contextmanager
+from tempfile import NamedTemporaryFile
+
+from Cython.Compiler.ParseTreeTransforms import NormalizeTree, InterpretCompilerDirectives
+from Cython.Compiler import Main, Symtab, Visitor
+from Cython.TestUtils import TransformTest
+
+TOOLS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'Tools'))
+
+
+@contextmanager
+def _tempfile(code):
+    code = dedent(code)
     if not isinstance(code, bytes):
-        code = code.encode('utf8') 
- 
-    with NamedTemporaryFile(suffix='.py') as f: 
-        f.write(code) 
-        f.seek(0) 
-        yield f 
- 
- 
-def _test_typing(code, inject=False): 
-    sys.path.insert(0, TOOLS_DIR) 
-    try: 
+        code = code.encode('utf8')
+
+    with NamedTemporaryFile(suffix='.py') as f:
+        f.write(code)
+        f.seek(0)
+        yield f
+
+
+def _test_typing(code, inject=False):
+    sys.path.insert(0, TOOLS_DIR)
+    try:
         import jedityper
-    finally: 
-        sys.path.remove(TOOLS_DIR) 
-    lines = [] 
-    with _tempfile(code) as f: 
+    finally:
+        sys.path.remove(TOOLS_DIR)
+    lines = []
+    with _tempfile(code) as f:
         types = jedityper.analyse(f.name)
-        if inject: 
+        if inject:
             lines = jedityper.inject_types(f.name, types)
-    return types, lines 
- 
- 
-class DeclarationsFinder(Visitor.VisitorTransform): 
-    directives = None 
- 
-    visit_Node = Visitor.VisitorTransform.recurse_to_children 
- 
-    def visit_CompilerDirectivesNode(self, node): 
-        if not self.directives: 
-            self.directives = [] 
-        self.directives.append(node) 
-        self.visitchildren(node) 
-        return node 
- 
- 
-class TestJediTyper(TransformTest): 
-    def _test(self, code): 
-        return _test_typing(code)[0] 
- 
-    def test_typing_global_int_loop(self): 
-        code = '''\ 
-        for i in range(10): 
-            a = i + 1 
-        ''' 
-        types = self._test(code) 
-        self.assertIn((None, (1, 0)), types) 
-        variables = types.pop((None, (1, 0))) 
-        self.assertFalse(types) 
-        self.assertEqual({'a': set(['int']), 'i': set(['int'])}, variables) 
- 
-    def test_typing_function_int_loop(self): 
-        code = '''\ 
-        def func(x): 
-            for i in range(x): 
-                a = i + 1 
-            return a 
-        ''' 
-        types = self._test(code) 
-        self.assertIn(('func', (1, 0)), types) 
-        variables = types.pop(('func', (1, 0))) 
-        self.assertFalse(types) 
-        self.assertEqual({'a': set(['int']), 'i': set(['int'])}, variables) 
- 
+    return types, lines
+
+
+class DeclarationsFinder(Visitor.VisitorTransform):
+    directives = None
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def visit_CompilerDirectivesNode(self, node):
+        if not self.directives:
+            self.directives = []
+        self.directives.append(node)
+        self.visitchildren(node)
+        return node
+
+
+class TestJediTyper(TransformTest):
+    def _test(self, code):
+        return _test_typing(code)[0]
+
+    def test_typing_global_int_loop(self):
+        code = '''\
+        for i in range(10):
+            a = i + 1
+        '''
+        types = self._test(code)
+        self.assertIn((None, (1, 0)), types)
+        variables = types.pop((None, (1, 0)))
+        self.assertFalse(types)
+        self.assertEqual({'a': set(['int']), 'i': set(['int'])}, variables)
+
+    def test_typing_function_int_loop(self):
+        code = '''\
+        def func(x):
+            for i in range(x):
+                a = i + 1
+            return a
+        '''
+        types = self._test(code)
+        self.assertIn(('func', (1, 0)), types)
+        variables = types.pop(('func', (1, 0)))
+        self.assertFalse(types)
+        self.assertEqual({'a': set(['int']), 'i': set(['int'])}, variables)
+
     def test_conflicting_types_in_function(self):
-        code = '''\ 
-        def func(a, b): 
-            print(a) 
-            a = 1 
-            b += a 
-            a = 'abc' 
-            return a, str(b) 
- 
-        print(func(1.5, 2)) 
-        ''' 
-        types = self._test(code) 
-        self.assertIn(('func', (1, 0)), types) 
-        variables = types.pop(('func', (1, 0))) 
-        self.assertFalse(types) 
+        code = '''\
+        def func(a, b):
+            print(a)
+            a = 1
+            b += a
+            a = 'abc'
+            return a, str(b)
+
+        print(func(1.5, 2))
+        '''
+        types = self._test(code)
+        self.assertIn(('func', (1, 0)), types)
+        variables = types.pop(('func', (1, 0)))
+        self.assertFalse(types)
         self.assertEqual({'a': set(['float', 'int', 'str']), 'b': set(['int'])}, variables)
- 
-    def _test_typing_function_char_loop(self): 
-        code = '''\ 
-        def func(x): 
-            l = [] 
-            for c in x: 
-                l.append(c) 
-            return l 
- 
-        print(func('abcdefg')) 
-        ''' 
-        types = self._test(code) 
-        self.assertIn(('func', (1, 0)), types) 
-        variables = types.pop(('func', (1, 0))) 
-        self.assertFalse(types) 
-        self.assertEqual({'a': set(['int']), 'i': set(['int'])}, variables) 
- 
+
+    def _test_typing_function_char_loop(self):
+        code = '''\
+        def func(x):
+            l = []
+            for c in x:
+                l.append(c)
+            return l
+
+        print(func('abcdefg'))
+        '''
+        types = self._test(code)
+        self.assertIn(('func', (1, 0)), types)
+        variables = types.pop(('func', (1, 0)))
+        self.assertFalse(types)
+        self.assertEqual({'a': set(['int']), 'i': set(['int'])}, variables)
+
     def test_typing_global_list(self):
         code = '''\
         a = [x for x in range(10)]
@@ -136,7 +136,7 @@ class TestJediTyper(TransformTest):
             a = [[], []]
             b = [0]* 10 + a
             c = a[0]
- 
+
         print(func([0]*100))
         '''
         types = self._test(code)
@@ -204,22 +204,22 @@ class TestJediTyper(TransformTest):
         self.assertEqual({'a': set(['set']), 'c': set(['set']), 'd': set(['set']), 'x': set(['set'])}, variables)
 
 
-class TestTypeInjection(TestJediTyper): 
-    """ 
-    Subtype of TestJediTyper that additionally tests type injection and compilation. 
-    """ 
-    def setUp(self): 
-        super(TestTypeInjection, self).setUp() 
-        compilation_options = Main.CompilationOptions(Main.default_options) 
-        ctx = compilation_options.create_context() 
-        transform = InterpretCompilerDirectives(ctx, ctx.compiler_directives) 
-        transform.module_scope = Symtab.ModuleScope('__main__', None, ctx) 
-        self.declarations_finder = DeclarationsFinder() 
-        self.pipeline = [NormalizeTree(None), transform, self.declarations_finder] 
- 
-    def _test(self, code): 
-        types, lines = _test_typing(code, inject=True) 
-        tree = self.run_pipeline(self.pipeline, ''.join(lines)) 
-        directives = self.declarations_finder.directives 
-        # TODO: validate directives 
-        return types 
+class TestTypeInjection(TestJediTyper):
+    """
+    Subtype of TestJediTyper that additionally tests type injection and compilation.
+    """
+    def setUp(self):
+        super(TestTypeInjection, self).setUp()
+        compilation_options = Main.CompilationOptions(Main.default_options)
+        ctx = compilation_options.create_context()
+        transform = InterpretCompilerDirectives(ctx, ctx.compiler_directives)
+        transform.module_scope = Symtab.ModuleScope('__main__', None, ctx)
+        self.declarations_finder = DeclarationsFinder()
+        self.pipeline = [NormalizeTree(None), transform, self.declarations_finder]
+
+    def _test(self, code):
+        types, lines = _test_typing(code, inject=True)
+        tree = self.run_pipeline(self.pipeline, ''.join(lines))
+        directives = self.declarations_finder.directives
+        # TODO: validate directives
+        return types
diff --git a/contrib/tools/cython/Cython/Tests/TestStringIOTree.py b/contrib/tools/cython/Cython/Tests/TestStringIOTree.py
index 724920085e..a15f2cd88d 100644
--- a/contrib/tools/cython/Cython/Tests/TestStringIOTree.py
+++ b/contrib/tools/cython/Cython/Tests/TestStringIOTree.py
@@ -1,67 +1,67 @@
-import unittest 
- 
-from Cython import StringIOTree as stringtree 
- 
-code = """ 
-cdef int spam                   # line 1 
- 
-cdef ham(): 
-    a = 1 
-    b = 2 
-    c = 3 
-    d = 4 
- 
-def eggs(): 
-    pass 
- 
-cpdef bacon(): 
-    print spam 
-    print 'scotch' 
-    print 'tea?' 
-    print 'or coffee?'          # line 16 
-""" 
- 
-linemap = dict(enumerate(code.splitlines())) 
- 
-class TestStringIOTree(unittest.TestCase): 
- 
-    def setUp(self): 
-        self.tree = stringtree.StringIOTree() 
- 
-    def test_markers(self): 
-        assert not self.tree.allmarkers() 
- 
-    def test_insertion(self): 
-        self.write_lines((1, 2, 3)) 
-        line_4_to_6_insertion_point = self.tree.insertion_point() 
-        self.write_lines((7, 8)) 
-        line_9_to_13_insertion_point = self.tree.insertion_point() 
-        self.write_lines((14, 15, 16)) 
- 
-        line_4_insertion_point = line_4_to_6_insertion_point.insertion_point() 
-        self.write_lines((5, 6), tree=line_4_to_6_insertion_point) 
- 
-        line_9_to_12_insertion_point = ( 
-            line_9_to_13_insertion_point.insertion_point()) 
-        self.write_line(13, tree=line_9_to_13_insertion_point) 
- 
-        self.write_line(4, tree=line_4_insertion_point) 
-        self.write_line(9, tree=line_9_to_12_insertion_point) 
-        line_10_insertion_point = line_9_to_12_insertion_point.insertion_point() 
-        self.write_line(11, tree=line_9_to_12_insertion_point) 
-        self.write_line(10, tree=line_10_insertion_point) 
-        self.write_line(12, tree=line_9_to_12_insertion_point) 
- 
+import unittest
+
+from Cython import StringIOTree as stringtree
+
+code = """
+cdef int spam                   # line 1
+
+cdef ham():
+    a = 1
+    b = 2
+    c = 3
+    d = 4
+
+def eggs():
+    pass
+
+cpdef bacon():
+    print spam
+    print 'scotch'
+    print 'tea?'
+    print 'or coffee?'          # line 16
+"""
+
+linemap = dict(enumerate(code.splitlines()))
+
+class TestStringIOTree(unittest.TestCase):
+
+    def setUp(self):
+        self.tree = stringtree.StringIOTree()
+
+    def test_markers(self):
+        assert not self.tree.allmarkers()
+
+    def test_insertion(self):
+        self.write_lines((1, 2, 3))
+        line_4_to_6_insertion_point = self.tree.insertion_point()
+        self.write_lines((7, 8))
+        line_9_to_13_insertion_point = self.tree.insertion_point()
+        self.write_lines((14, 15, 16))
+
+        line_4_insertion_point = line_4_to_6_insertion_point.insertion_point()
+        self.write_lines((5, 6), tree=line_4_to_6_insertion_point)
+
+        line_9_to_12_insertion_point = (
+            line_9_to_13_insertion_point.insertion_point())
+        self.write_line(13, tree=line_9_to_13_insertion_point)
+
+        self.write_line(4, tree=line_4_insertion_point)
+        self.write_line(9, tree=line_9_to_12_insertion_point)
+        line_10_insertion_point = line_9_to_12_insertion_point.insertion_point()
+        self.write_line(11, tree=line_9_to_12_insertion_point)
+        self.write_line(10, tree=line_10_insertion_point)
+        self.write_line(12, tree=line_9_to_12_insertion_point)
+
         self.assertEqual(self.tree.allmarkers(), list(range(1, 17)))
-        self.assertEqual(code.strip(), self.tree.getvalue().strip()) 
- 
- 
-    def write_lines(self, linenos, tree=None): 
-        for lineno in linenos: 
-            self.write_line(lineno, tree=tree) 
- 
-    def write_line(self, lineno, tree=None): 
-        if tree is None: 
-            tree = self.tree 
-        tree.markers.append(lineno) 
-        tree.write(linemap[lineno] + '\n') 
+        self.assertEqual(code.strip(), self.tree.getvalue().strip())
+
+
+    def write_lines(self, linenos, tree=None):
+        for lineno in linenos:
+            self.write_line(lineno, tree=tree)
+
+    def write_line(self, lineno, tree=None):
+        if tree is None:
+            tree = self.tree
+        tree.markers.append(lineno)
+        tree.write(linemap[lineno] + '\n')
diff --git a/contrib/tools/cython/Cython/Tests/__init__.py b/contrib/tools/cython/Cython/Tests/__init__.py
index 4a2889e8e1..fa81adaff6 100644
--- a/contrib/tools/cython/Cython/Tests/__init__.py
+++ b/contrib/tools/cython/Cython/Tests/__init__.py
@@ -1 +1 @@
-# empty file 
+# empty file
diff --git a/contrib/tools/cython/Cython/Tests/xmlrunner.py b/contrib/tools/cython/Cython/Tests/xmlrunner.py
index b86156bc08..d6838aa22e 100644
--- a/contrib/tools/cython/Cython/Tests/xmlrunner.py
+++ b/contrib/tools/cython/Cython/Tests/xmlrunner.py
@@ -1,191 +1,191 @@
-# -*- coding: utf-8 -*- 
- 
-"""unittest-xml-reporting is a PyUnit-based TestRunner that can export test 
-results to XML files that can be consumed by a wide range of tools, such as 
-build systems, IDEs and Continuous Integration servers. 
- 
-This module provides the XMLTestRunner class, which is heavily based on the 
-default TextTestRunner. This makes the XMLTestRunner very simple to use. 
- 
-The script below, adapted from the unittest documentation, shows how to use 
-XMLTestRunner in a very simple way. In fact, the only difference between this 
-script and the original one is the last line: 
- 
-import random 
-import unittest 
-import xmlrunner 
- 
-class TestSequenceFunctions(unittest.TestCase): 
-    def setUp(self): 
-        self.seq = range(10) 
- 
-    def test_shuffle(self): 
-        # make sure the shuffled sequence does not lose any elements 
-        random.shuffle(self.seq) 
-        self.seq.sort() 
-        self.assertEqual(self.seq, range(10)) 
- 
-    def test_choice(self): 
-        element = random.choice(self.seq) 
+# -*- coding: utf-8 -*-
+
+"""unittest-xml-reporting is a PyUnit-based TestRunner that can export test
+results to XML files that can be consumed by a wide range of tools, such as
+build systems, IDEs and Continuous Integration servers.
+
+This module provides the XMLTestRunner class, which is heavily based on the
+default TextTestRunner. This makes the XMLTestRunner very simple to use.
+
+The script below, adapted from the unittest documentation, shows how to use
+XMLTestRunner in a very simple way. In fact, the only difference between this
+script and the original one is the last line:
+
+import random
+import unittest
+import xmlrunner
+
+class TestSequenceFunctions(unittest.TestCase):
+    def setUp(self):
+        self.seq = range(10)
+
+    def test_shuffle(self):
+        # make sure the shuffled sequence does not lose any elements
+        random.shuffle(self.seq)
+        self.seq.sort()
+        self.assertEqual(self.seq, range(10))
+
+    def test_choice(self):
+        element = random.choice(self.seq)
         self.assertTrue(element in self.seq)
- 
-    def test_sample(self): 
-        self.assertRaises(ValueError, random.sample, self.seq, 20) 
-        for element in random.sample(self.seq, 5): 
+
+    def test_sample(self):
+        self.assertRaises(ValueError, random.sample, self.seq, 20)
+        for element in random.sample(self.seq, 5):
             self.assertTrue(element in self.seq)
- 
-if __name__ == '__main__': 
-    unittest.main(testRunner=xmlrunner.XMLTestRunner(output='test-reports')) 
-""" 
- 
+
+if __name__ == '__main__':
+    unittest.main(testRunner=xmlrunner.XMLTestRunner(output='test-reports'))
+"""
+
 from __future__ import absolute_import
 
-import os 
-import sys 
-import time 
+import os
+import sys
+import time
 from unittest import TestResult, TextTestResult, TextTestRunner
-import xml.dom.minidom 
+import xml.dom.minidom
 try:
     from StringIO import StringIO
 except ImportError:
     from io import StringIO  # doesn't accept 'str' in Py2
- 
- 
-class XMLDocument(xml.dom.minidom.Document): 
-    def createCDATAOrText(self, data): 
-        if ']]>' in data: 
-            return self.createTextNode(data) 
-        return self.createCDATASection(data) 
- 
- 
-class _TestInfo(object): 
-    """This class is used to keep useful information about the execution of a 
-    test method. 
-    """ 
- 
-    # Possible test outcomes 
-    (SUCCESS, FAILURE, ERROR) = range(3) 
- 
-    def __init__(self, test_result, test_method, outcome=SUCCESS, err=None): 
-        "Create a new instance of _TestInfo." 
-        self.test_result = test_result 
-        self.test_method = test_method 
-        self.outcome = outcome 
-        self.err = err 
-        self.stdout = test_result.stdout and test_result.stdout.getvalue().strip() or '' 
-        self.stderr = test_result.stdout and test_result.stderr.getvalue().strip() or '' 
- 
-    def get_elapsed_time(self): 
-        """Return the time that shows how long the test method took to 
-        execute. 
-        """ 
-        return self.test_result.stop_time - self.test_result.start_time 
- 
-    def get_description(self): 
-        "Return a text representation of the test method." 
-        return self.test_result.getDescription(self.test_method) 
- 
-    def get_error_info(self): 
-        """Return a text representation of an exception thrown by a test 
-        method. 
-        """ 
-        if not self.err: 
-            return '' 
-        return self.test_result._exc_info_to_string( 
-            self.err, self.test_method) 
- 
- 
+
+
+class XMLDocument(xml.dom.minidom.Document):
+    def createCDATAOrText(self, data):
+        if ']]>' in data:
+            return self.createTextNode(data)
+        return self.createCDATASection(data)
+
+
+class _TestInfo(object):
+    """This class is used to keep useful information about the execution of a
+    test method.
+    """
+
+    # Possible test outcomes
+    (SUCCESS, FAILURE, ERROR) = range(3)
+
+    def __init__(self, test_result, test_method, outcome=SUCCESS, err=None):
+        "Create a new instance of _TestInfo."
+        self.test_result = test_result
+        self.test_method = test_method
+        self.outcome = outcome
+        self.err = err
+        self.stdout = test_result.stdout and test_result.stdout.getvalue().strip() or ''
+        self.stderr = test_result.stdout and test_result.stderr.getvalue().strip() or ''
+
+    def get_elapsed_time(self):
+        """Return the time that shows how long the test method took to
+        execute.
+        """
+        return self.test_result.stop_time - self.test_result.start_time
+
+    def get_description(self):
+        "Return a text representation of the test method."
+        return self.test_result.getDescription(self.test_method)
+
+    def get_error_info(self):
+        """Return a text representation of an exception thrown by a test
+        method.
+        """
+        if not self.err:
+            return ''
+        return self.test_result._exc_info_to_string(
+            self.err, self.test_method)
+
+
 class _XMLTestResult(TextTestResult):
-    """A test result class that can express test results in a XML report. 
- 
-    Used by XMLTestRunner. 
-    """ 
-    def __init__(self, stream=sys.stderr, descriptions=1, verbosity=1, 
-                 elapsed_times=True): 
-        "Create a new instance of _XMLTestResult." 
+    """A test result class that can express test results in a XML report.
+
+    Used by XMLTestRunner.
+    """
+    def __init__(self, stream=sys.stderr, descriptions=1, verbosity=1,
+                 elapsed_times=True):
+        "Create a new instance of _XMLTestResult."
         TextTestResult.__init__(self, stream, descriptions, verbosity)
-        self.successes = [] 
-        self.callback = None 
-        self.elapsed_times = elapsed_times 
-        self.output_patched = False 
- 
-    def _prepare_callback(self, test_info, target_list, verbose_str, 
-        short_str): 
-        """Append a _TestInfo to the given target list and sets a callback 
-        method to be called by stopTest method. 
-        """ 
-        target_list.append(test_info) 
-        def callback(): 
-            """This callback prints the test method outcome to the stream, 
-            as well as the elapsed time. 
-            """ 
- 
-            # Ignore the elapsed times for a more reliable unit testing 
-            if not self.elapsed_times: 
-                self.start_time = self.stop_time = 0 
- 
-            if self.showAll: 
-                self.stream.writeln('(%.3fs) %s' % \ 
-                    (test_info.get_elapsed_time(), verbose_str)) 
-            elif self.dots: 
-                self.stream.write(short_str) 
-        self.callback = callback 
- 
-    def _patch_standard_output(self): 
-        """Replace the stdout and stderr streams with string-based streams 
-        in order to capture the tests' output. 
-        """ 
-        if not self.output_patched: 
-            (self.old_stdout, self.old_stderr) = (sys.stdout, sys.stderr) 
-            self.output_patched = True 
-        (sys.stdout, sys.stderr) = (self.stdout, self.stderr) = \ 
-            (StringIO(), StringIO()) 
- 
-    def _restore_standard_output(self): 
-        "Restore the stdout and stderr streams." 
-        (sys.stdout, sys.stderr) = (self.old_stdout, self.old_stderr) 
-        self.output_patched = False 
- 
-    def startTest(self, test): 
-        "Called before execute each test method." 
-        self._patch_standard_output() 
-        self.start_time = time.time() 
-        TestResult.startTest(self, test) 
- 
-        if self.showAll: 
-            self.stream.write('  ' + self.getDescription(test)) 
-            self.stream.write(" ... ") 
- 
-    def stopTest(self, test): 
-        "Called after execute each test method." 
-        self._restore_standard_output() 
+        self.successes = []
+        self.callback = None
+        self.elapsed_times = elapsed_times
+        self.output_patched = False
+
+    def _prepare_callback(self, test_info, target_list, verbose_str,
+        short_str):
+        """Append a _TestInfo to the given target list and sets a callback
+        method to be called by stopTest method.
+        """
+        target_list.append(test_info)
+        def callback():
+            """This callback prints the test method outcome to the stream,
+            as well as the elapsed time.
+            """
+
+            # Ignore the elapsed times for a more reliable unit testing
+            if not self.elapsed_times:
+                self.start_time = self.stop_time = 0
+
+            if self.showAll:
+                self.stream.writeln('(%.3fs) %s' % \
+                    (test_info.get_elapsed_time(), verbose_str))
+            elif self.dots:
+                self.stream.write(short_str)
+        self.callback = callback
+
+    def _patch_standard_output(self):
+        """Replace the stdout and stderr streams with string-based streams
+        in order to capture the tests' output.
+        """
+        if not self.output_patched:
+            (self.old_stdout, self.old_stderr) = (sys.stdout, sys.stderr)
+            self.output_patched = True
+        (sys.stdout, sys.stderr) = (self.stdout, self.stderr) = \
+            (StringIO(), StringIO())
+
+    def _restore_standard_output(self):
+        "Restore the stdout and stderr streams."
+        (sys.stdout, sys.stderr) = (self.old_stdout, self.old_stderr)
+        self.output_patched = False
+
+    def startTest(self, test):
+        "Called before execute each test method."
+        self._patch_standard_output()
+        self.start_time = time.time()
+        TestResult.startTest(self, test)
+
+        if self.showAll:
+            self.stream.write('  ' + self.getDescription(test))
+            self.stream.write(" ... ")
+
+    def stopTest(self, test):
+        "Called after execute each test method."
+        self._restore_standard_output()
         TextTestResult.stopTest(self, test)
-        self.stop_time = time.time() 
- 
-        if self.callback and callable(self.callback): 
-            self.callback() 
-            self.callback = None 
- 
-    def addSuccess(self, test): 
-        "Called when a test executes successfully." 
-        self._prepare_callback(_TestInfo(self, test), 
-                               self.successes, 'OK', '.') 
- 
-    def addFailure(self, test, err): 
-        "Called when a test method fails." 
-        self._prepare_callback(_TestInfo(self, test, _TestInfo.FAILURE, err), 
-                               self.failures, 'FAIL', 'F') 
- 
-    def addError(self, test, err): 
-        "Called when a test method raises an error." 
-        self._prepare_callback(_TestInfo(self, test, _TestInfo.ERROR, err), 
-                               self.errors, 'ERROR', 'E') 
- 
-    def printErrorList(self, flavour, errors): 
-        "Write some information about the FAIL or ERROR to the stream." 
-        for test_info in errors: 
-            if isinstance(test_info, tuple): 
-                test_info, exc_info = test_info 
+        self.stop_time = time.time()
+
+        if self.callback and callable(self.callback):
+            self.callback()
+            self.callback = None
+
+    def addSuccess(self, test):
+        "Called when a test executes successfully."
+        self._prepare_callback(_TestInfo(self, test),
+                               self.successes, 'OK', '.')
+
+    def addFailure(self, test, err):
+        "Called when a test method fails."
+        self._prepare_callback(_TestInfo(self, test, _TestInfo.FAILURE, err),
+                               self.failures, 'FAIL', 'F')
+
+    def addError(self, test, err):
+        "Called when a test method raises an error."
+        self._prepare_callback(_TestInfo(self, test, _TestInfo.ERROR, err),
+                               self.errors, 'ERROR', 'E')
+
+    def printErrorList(self, flavour, errors):
+        "Write some information about the FAIL or ERROR to the stream."
+        for test_info in errors:
+            if isinstance(test_info, tuple):
+                test_info, exc_info = test_info
 
             try:
                 t = test_info.get_elapsed_time()
@@ -203,195 +203,195 @@ class _XMLTestResult(TextTestResult):
             except AttributeError:
                 err_info = str(test_info)
 
-            self.stream.writeln(self.separator1) 
+            self.stream.writeln(self.separator1)
             self.stream.writeln('%s [%.3fs]: %s' % (flavour, t, descr))
-            self.stream.writeln(self.separator2) 
+            self.stream.writeln(self.separator2)
             self.stream.writeln('%s' % err_info)
- 
-    def _get_info_by_testcase(self): 
-        """This method organizes test results by TestCase module. This 
-        information is used during the report generation, where a XML report 
-        will be generated for each TestCase. 
-        """ 
-        tests_by_testcase = {} 
- 
-        for tests in (self.successes, self.failures, self.errors): 
-            for test_info in tests: 
+
+    def _get_info_by_testcase(self):
+        """This method organizes test results by TestCase module. This
+        information is used during the report generation, where a XML report
+        will be generated for each TestCase.
+        """
+        tests_by_testcase = {}
+
+        for tests in (self.successes, self.failures, self.errors):
+            for test_info in tests:
                 if not isinstance(test_info, _TestInfo):
                     print("Unexpected test result type: %r" % (test_info,))
                     continue
-                testcase = type(test_info.test_method) 
- 
-                # Ignore module name if it is '__main__' 
-                module = testcase.__module__ + '.' 
-                if module == '__main__.': 
-                    module = '' 
-                testcase_name = module + testcase.__name__ 
- 
-                if testcase_name not in tests_by_testcase: 
-                    tests_by_testcase[testcase_name] = [] 
-                tests_by_testcase[testcase_name].append(test_info) 
- 
-        return tests_by_testcase 
- 
-    def _report_testsuite(suite_name, tests, xml_document): 
-        "Appends the testsuite section to the XML document." 
-        testsuite = xml_document.createElement('testsuite') 
-        xml_document.appendChild(testsuite) 
- 
-        testsuite.setAttribute('name', str(suite_name)) 
-        testsuite.setAttribute('tests', str(len(tests))) 
- 
-        testsuite.setAttribute('time', '%.3f' % 
-            sum([e.get_elapsed_time() for e in tests])) 
- 
-        failures = len([1 for e in tests if e.outcome == _TestInfo.FAILURE]) 
-        testsuite.setAttribute('failures', str(failures)) 
- 
-        errors = len([1 for e in tests if e.outcome == _TestInfo.ERROR]) 
-        testsuite.setAttribute('errors', str(errors)) 
- 
-        return testsuite 
- 
-    _report_testsuite = staticmethod(_report_testsuite) 
- 
-    def _report_testcase(suite_name, test_result, xml_testsuite, xml_document): 
-        "Appends a testcase section to the XML document." 
-        testcase = xml_document.createElement('testcase') 
-        xml_testsuite.appendChild(testcase) 
- 
-        testcase.setAttribute('classname', str(suite_name)) 
-        testcase.setAttribute('name', test_result.test_method.shortDescription() 
-                              or getattr(test_result.test_method, '_testMethodName', 
-                                         str(test_result.test_method))) 
-        testcase.setAttribute('time', '%.3f' % test_result.get_elapsed_time()) 
- 
-        if (test_result.outcome != _TestInfo.SUCCESS): 
-            elem_name = ('failure', 'error')[test_result.outcome-1] 
-            failure = xml_document.createElement(elem_name) 
-            testcase.appendChild(failure) 
- 
-            failure.setAttribute('type', str(test_result.err[0].__name__)) 
-            failure.setAttribute('message', str(test_result.err[1])) 
- 
-            error_info = test_result.get_error_info() 
-            failureText = xml_document.createCDATAOrText(error_info) 
-            failure.appendChild(failureText) 
- 
-    _report_testcase = staticmethod(_report_testcase) 
- 
-    def _report_output(test_runner, xml_testsuite, xml_document, stdout, stderr): 
-        "Appends the system-out and system-err sections to the XML document." 
-        systemout = xml_document.createElement('system-out') 
-        xml_testsuite.appendChild(systemout) 
- 
-        systemout_text = xml_document.createCDATAOrText(stdout) 
-        systemout.appendChild(systemout_text) 
- 
-        systemerr = xml_document.createElement('system-err') 
-        xml_testsuite.appendChild(systemerr) 
- 
-        systemerr_text = xml_document.createCDATAOrText(stderr) 
-        systemerr.appendChild(systemerr_text) 
- 
-    _report_output = staticmethod(_report_output) 
- 
-    def generate_reports(self, test_runner): 
-        "Generates the XML reports to a given XMLTestRunner object." 
-        all_results = self._get_info_by_testcase() 
- 
-        if type(test_runner.output) == str and not \ 
-            os.path.exists(test_runner.output): 
-            os.makedirs(test_runner.output) 
- 
-        for suite, tests in all_results.items(): 
-            doc = XMLDocument() 
- 
-            # Build the XML file 
-            testsuite = _XMLTestResult._report_testsuite(suite, tests, doc) 
-            stdout, stderr = [], [] 
-            for test in tests: 
-                _XMLTestResult._report_testcase(suite, test, testsuite, doc) 
-                if test.stdout: 
-                    stdout.extend(['*****************', test.get_description(), test.stdout]) 
-                if test.stderr: 
-                    stderr.extend(['*****************', test.get_description(), test.stderr]) 
-            _XMLTestResult._report_output(test_runner, testsuite, doc, 
-                                          '\n'.join(stdout), '\n'.join(stderr)) 
-            xml_content = doc.toprettyxml(indent='\t') 
- 
-            if type(test_runner.output) is str: 
-                report_file = open('%s%sTEST-%s.xml' % \ 
-                    (test_runner.output, os.sep, suite), 'w') 
-                try: 
-                    report_file.write(xml_content) 
-                finally: 
-                    report_file.close() 
-            else: 
-                # Assume that test_runner.output is a stream 
-                test_runner.output.write(xml_content) 
- 
- 
-class XMLTestRunner(TextTestRunner): 
-    """A test runner class that outputs the results in JUnit like XML files. 
-    """ 
+                testcase = type(test_info.test_method)
+
+                # Ignore module name if it is '__main__'
+                module = testcase.__module__ + '.'
+                if module == '__main__.':
+                    module = ''
+                testcase_name = module + testcase.__name__
+
+                if testcase_name not in tests_by_testcase:
+                    tests_by_testcase[testcase_name] = []
+                tests_by_testcase[testcase_name].append(test_info)
+
+        return tests_by_testcase
+
+    def _report_testsuite(suite_name, tests, xml_document):
+        "Appends the testsuite section to the XML document."
+        testsuite = xml_document.createElement('testsuite')
+        xml_document.appendChild(testsuite)
+
+        testsuite.setAttribute('name', str(suite_name))
+        testsuite.setAttribute('tests', str(len(tests)))
+
+        testsuite.setAttribute('time', '%.3f' %
+            sum([e.get_elapsed_time() for e in tests]))
+
+        failures = len([1 for e in tests if e.outcome == _TestInfo.FAILURE])
+        testsuite.setAttribute('failures', str(failures))
+
+        errors = len([1 for e in tests if e.outcome == _TestInfo.ERROR])
+        testsuite.setAttribute('errors', str(errors))
+
+        return testsuite
+
+    _report_testsuite = staticmethod(_report_testsuite)
+
+    def _report_testcase(suite_name, test_result, xml_testsuite, xml_document):
+        "Appends a testcase section to the XML document."
+        testcase = xml_document.createElement('testcase')
+        xml_testsuite.appendChild(testcase)
+
+        testcase.setAttribute('classname', str(suite_name))
+        testcase.setAttribute('name', test_result.test_method.shortDescription()
+                              or getattr(test_result.test_method, '_testMethodName',
+                                         str(test_result.test_method)))
+        testcase.setAttribute('time', '%.3f' % test_result.get_elapsed_time())
+
+        if (test_result.outcome != _TestInfo.SUCCESS):
+            elem_name = ('failure', 'error')[test_result.outcome-1]
+            failure = xml_document.createElement(elem_name)
+            testcase.appendChild(failure)
+
+            failure.setAttribute('type', str(test_result.err[0].__name__))
+            failure.setAttribute('message', str(test_result.err[1]))
+
+            error_info = test_result.get_error_info()
+            failureText = xml_document.createCDATAOrText(error_info)
+            failure.appendChild(failureText)
+
+    _report_testcase = staticmethod(_report_testcase)
+
+    def _report_output(test_runner, xml_testsuite, xml_document, stdout, stderr):
+        "Appends the system-out and system-err sections to the XML document."
+        systemout = xml_document.createElement('system-out')
+        xml_testsuite.appendChild(systemout)
+
+        systemout_text = xml_document.createCDATAOrText(stdout)
+        systemout.appendChild(systemout_text)
+
+        systemerr = xml_document.createElement('system-err')
+        xml_testsuite.appendChild(systemerr)
+
+        systemerr_text = xml_document.createCDATAOrText(stderr)
+        systemerr.appendChild(systemerr_text)
+
+    _report_output = staticmethod(_report_output)
+
+    def generate_reports(self, test_runner):
+        "Generates the XML reports to a given XMLTestRunner object."
+        all_results = self._get_info_by_testcase()
+
+        if type(test_runner.output) == str and not \
+            os.path.exists(test_runner.output):
+            os.makedirs(test_runner.output)
+
+        for suite, tests in all_results.items():
+            doc = XMLDocument()
+
+            # Build the XML file
+            testsuite = _XMLTestResult._report_testsuite(suite, tests, doc)
+            stdout, stderr = [], []
+            for test in tests:
+                _XMLTestResult._report_testcase(suite, test, testsuite, doc)
+                if test.stdout:
+                    stdout.extend(['*****************', test.get_description(), test.stdout])
+                if test.stderr:
+                    stderr.extend(['*****************', test.get_description(), test.stderr])
+            _XMLTestResult._report_output(test_runner, testsuite, doc,
+                                          '\n'.join(stdout), '\n'.join(stderr))
+            xml_content = doc.toprettyxml(indent='\t')
+
+            if type(test_runner.output) is str:
+                report_file = open('%s%sTEST-%s.xml' % \
+                    (test_runner.output, os.sep, suite), 'w')
+                try:
+                    report_file.write(xml_content)
+                finally:
+                    report_file.close()
+            else:
+                # Assume that test_runner.output is a stream
+                test_runner.output.write(xml_content)
+
+
+class XMLTestRunner(TextTestRunner):
+    """A test runner class that outputs the results in JUnit like XML files.
+    """
     def __init__(self, output='.', stream=None, descriptions=True, verbose=False, elapsed_times=True):
-        "Create a new instance of XMLTestRunner." 
+        "Create a new instance of XMLTestRunner."
         if stream is None:
             stream = sys.stderr
-        verbosity = (1, 2)[verbose] 
-        TextTestRunner.__init__(self, stream, descriptions, verbosity) 
-        self.output = output 
-        self.elapsed_times = elapsed_times 
- 
-    def _make_result(self): 
-        """Create the TestResult object which will be used to store 
-        information about the executed tests. 
-        """ 
-        return _XMLTestResult(self.stream, self.descriptions, \ 
-            self.verbosity, self.elapsed_times) 
- 
-    def run(self, test): 
-        "Run the given test case or test suite." 
-        # Prepare the test execution 
-        result = self._make_result() 
- 
-        # Print a nice header 
-        self.stream.writeln() 
-        self.stream.writeln('Running tests...') 
-        self.stream.writeln(result.separator2) 
- 
-        # Execute tests 
-        start_time = time.time() 
-        test(result) 
-        stop_time = time.time() 
-        time_taken = stop_time - start_time 
- 
+        verbosity = (1, 2)[verbose]
+        TextTestRunner.__init__(self, stream, descriptions, verbosity)
+        self.output = output
+        self.elapsed_times = elapsed_times
+
+    def _make_result(self):
+        """Create the TestResult object which will be used to store
+        information about the executed tests.
+        """
+        return _XMLTestResult(self.stream, self.descriptions, \
+            self.verbosity, self.elapsed_times)
+
+    def run(self, test):
+        "Run the given test case or test suite."
+        # Prepare the test execution
+        result = self._make_result()
+
+        # Print a nice header
+        self.stream.writeln()
+        self.stream.writeln('Running tests...')
+        self.stream.writeln(result.separator2)
+
+        # Execute tests
+        start_time = time.time()
+        test(result)
+        stop_time = time.time()
+        time_taken = stop_time - start_time
+
         # Generate reports
         self.stream.writeln()
         self.stream.writeln('Generating XML reports...')
         result.generate_reports(self)
 
-        # Print results 
-        result.printErrors() 
-        self.stream.writeln(result.separator2) 
-        run = result.testsRun 
-        self.stream.writeln("Ran %d test%s in %.3fs" % 
-            (run, run != 1 and "s" or "", time_taken)) 
-        self.stream.writeln() 
- 
-        # Error traces 
-        if not result.wasSuccessful(): 
-            self.stream.write("FAILED (") 
-            failed, errored = (len(result.failures), len(result.errors)) 
-            if failed: 
-                self.stream.write("failures=%d" % failed) 
-            if errored: 
-                if failed: 
-                    self.stream.write(", ") 
-                self.stream.write("errors=%d" % errored) 
-            self.stream.writeln(")") 
-        else: 
-            self.stream.writeln("OK") 
- 
-        return result 
+        # Print results
+        result.printErrors()
+        self.stream.writeln(result.separator2)
+        run = result.testsRun
+        self.stream.writeln("Ran %d test%s in %.3fs" %
+            (run, run != 1 and "s" or "", time_taken))
+        self.stream.writeln()
+
+        # Error traces
+        if not result.wasSuccessful():
+            self.stream.write("FAILED (")
+            failed, errored = (len(result.failures), len(result.errors))
+            if failed:
+                self.stream.write("failures=%d" % failed)
+            if errored:
+                if failed:
+                    self.stream.write(", ")
+                self.stream.write("errors=%d" % errored)
+            self.stream.writeln(")")
+        else:
+            self.stream.writeln("OK")
+
+        return result
diff --git a/contrib/tools/cython/Cython/Utility/Buffer.c b/contrib/tools/cython/Cython/Utility/Buffer.c
index ca2c532868..3c7105fa35 100644
--- a/contrib/tools/cython/Cython/Utility/Buffer.c
+++ b/contrib/tools/cython/Cython/Utility/Buffer.c
@@ -1,166 +1,166 @@
-/////////////// BufferStructDeclare.proto /////////////// 
- 
-/* structs for buffer access */ 
- 
-typedef struct { 
-  Py_ssize_t shape, strides, suboffsets; 
-} __Pyx_Buf_DimInfo; 
- 
-typedef struct { 
-  size_t refcount; 
-  Py_buffer pybuffer; 
-} __Pyx_Buffer; 
- 
-typedef struct { 
-  __Pyx_Buffer *rcbuffer; 
-  char *data; 
-  __Pyx_Buf_DimInfo diminfo[{{max_dims}}]; 
-} __Pyx_LocalBuf_ND; 
- 
-/////////////// BufferIndexError.proto /////////////// 
-static void __Pyx_RaiseBufferIndexError(int axis); /*proto*/ 
- 
-/////////////// BufferIndexError /////////////// 
-static void __Pyx_RaiseBufferIndexError(int axis) { 
-  PyErr_Format(PyExc_IndexError, 
-     "Out of bounds on buffer access (axis %d)", axis); 
-} 
- 
-/////////////// BufferIndexErrorNogil.proto /////////////// 
-//@requires: BufferIndexError 
- 
-static void __Pyx_RaiseBufferIndexErrorNogil(int axis); /*proto*/ 
- 
-/////////////// BufferIndexErrorNogil /////////////// 
-static void __Pyx_RaiseBufferIndexErrorNogil(int axis) { 
-    #ifdef WITH_THREAD 
-    PyGILState_STATE gilstate = PyGILState_Ensure(); 
-    #endif 
-    __Pyx_RaiseBufferIndexError(axis); 
-    #ifdef WITH_THREAD 
-    PyGILState_Release(gilstate); 
-    #endif 
-} 
- 
-/////////////// BufferFallbackError.proto /////////////// 
-static void __Pyx_RaiseBufferFallbackError(void); /*proto*/ 
- 
-/////////////// BufferFallbackError /////////////// 
-static void __Pyx_RaiseBufferFallbackError(void) { 
-  PyErr_SetString(PyExc_ValueError, 
-     "Buffer acquisition failed on assignment; and then reacquiring the old buffer failed too!"); 
-} 
- 
-/////////////// BufferFormatStructs.proto /////////////// 
+/////////////// BufferStructDeclare.proto ///////////////
+
+/* structs for buffer access */
+
+typedef struct {
+  Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+
+typedef struct {
+  size_t refcount;
+  Py_buffer pybuffer;
+} __Pyx_Buffer;
+
+typedef struct {
+  __Pyx_Buffer *rcbuffer;
+  char *data;
+  __Pyx_Buf_DimInfo diminfo[{{max_dims}}];
+} __Pyx_LocalBuf_ND;
+
+/////////////// BufferIndexError.proto ///////////////
+static void __Pyx_RaiseBufferIndexError(int axis); /*proto*/
+
+/////////////// BufferIndexError ///////////////
+static void __Pyx_RaiseBufferIndexError(int axis) {
+  PyErr_Format(PyExc_IndexError,
+     "Out of bounds on buffer access (axis %d)", axis);
+}
+
+/////////////// BufferIndexErrorNogil.proto ///////////////
+//@requires: BufferIndexError
+
+static void __Pyx_RaiseBufferIndexErrorNogil(int axis); /*proto*/
+
+/////////////// BufferIndexErrorNogil ///////////////
+static void __Pyx_RaiseBufferIndexErrorNogil(int axis) {
+    #ifdef WITH_THREAD
+    PyGILState_STATE gilstate = PyGILState_Ensure();
+    #endif
+    __Pyx_RaiseBufferIndexError(axis);
+    #ifdef WITH_THREAD
+    PyGILState_Release(gilstate);
+    #endif
+}
+
+/////////////// BufferFallbackError.proto ///////////////
+static void __Pyx_RaiseBufferFallbackError(void); /*proto*/
+
+/////////////// BufferFallbackError ///////////////
+static void __Pyx_RaiseBufferFallbackError(void) {
+  PyErr_SetString(PyExc_ValueError,
+     "Buffer acquisition failed on assignment; and then reacquiring the old buffer failed too!");
+}
+
+/////////////// BufferFormatStructs.proto ///////////////
 //@proto_block: utility_code_proto_before_types
- 
-#define IS_UNSIGNED(type) (((type) -1) > 0) 
- 
-/* Run-time type information about structs used with buffers */ 
-struct __Pyx_StructField_; 
- 
-#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0) 
- 
-typedef struct { 
-  const char* name; /* for error messages only */ 
-  struct __Pyx_StructField_* fields; 
-  size_t size;     /* sizeof(type) */ 
-  size_t arraysize[8]; /* length of array in each dimension */ 
-  int ndim; 
-  char typegroup; /* _R_eal, _C_omplex, Signed _I_nt, _U_nsigned int, _S_truct, _P_ointer, _O_bject, c_H_ar */ 
-  char is_unsigned; 
-  int flags; 
-} __Pyx_TypeInfo; 
- 
-typedef struct __Pyx_StructField_ { 
-  __Pyx_TypeInfo* type; 
-  const char* name; 
-  size_t offset; 
-} __Pyx_StructField; 
- 
-typedef struct { 
-  __Pyx_StructField* field; 
-  size_t parent_offset; 
-} __Pyx_BufFmt_StackElem; 
- 
-typedef struct { 
-  __Pyx_StructField root; 
-  __Pyx_BufFmt_StackElem* head; 
-  size_t fmt_offset; 
-  size_t new_count, enc_count; 
-  size_t struct_alignment; 
-  int is_complex; 
-  char enc_type; 
-  char new_packmode; 
-  char enc_packmode; 
-  char is_valid_array; 
-} __Pyx_BufFmt_Context; 
- 
-
-/////////////// GetAndReleaseBuffer.proto /////////////// 
-
-#if PY_MAJOR_VERSION < 3 
-    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags); 
-    static void __Pyx_ReleaseBuffer(Py_buffer *view); 
-#else 
-    #define __Pyx_GetBuffer PyObject_GetBuffer 
-    #define __Pyx_ReleaseBuffer PyBuffer_Release 
-#endif 
- 
-/////////////// GetAndReleaseBuffer /////////////// 
-
-#if PY_MAJOR_VERSION < 3 
-static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { 
-    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags); 
- 
-    {{for type_ptr, getbuffer, releasebuffer in types}} 
-      {{if getbuffer}} 
+
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+
+/* Run-time type information about structs used with buffers */
+struct __Pyx_StructField_;
+
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+
+typedef struct {
+  const char* name; /* for error messages only */
+  struct __Pyx_StructField_* fields;
+  size_t size;     /* sizeof(type) */
+  size_t arraysize[8]; /* length of array in each dimension */
+  int ndim;
+  char typegroup; /* _R_eal, _C_omplex, Signed _I_nt, _U_nsigned int, _S_truct, _P_ointer, _O_bject, c_H_ar */
+  char is_unsigned;
+  int flags;
+} __Pyx_TypeInfo;
+
+typedef struct __Pyx_StructField_ {
+  __Pyx_TypeInfo* type;
+  const char* name;
+  size_t offset;
+} __Pyx_StructField;
+
+typedef struct {
+  __Pyx_StructField* field;
+  size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+
+typedef struct {
+  __Pyx_StructField root;
+  __Pyx_BufFmt_StackElem* head;
+  size_t fmt_offset;
+  size_t new_count, enc_count;
+  size_t struct_alignment;
+  int is_complex;
+  char enc_type;
+  char new_packmode;
+  char enc_packmode;
+  char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/////////////// GetAndReleaseBuffer.proto ///////////////
+
+#if PY_MAJOR_VERSION < 3
+    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+    static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+    #define __Pyx_GetBuffer PyObject_GetBuffer
+    #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+/////////////// GetAndReleaseBuffer ///////////////
+
+#if PY_MAJOR_VERSION < 3
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags);
+
+    {{for type_ptr, getbuffer, releasebuffer in types}}
+      {{if getbuffer}}
         if (__Pyx_TypeCheck(obj, {{type_ptr}})) return {{getbuffer}}(obj, view, flags);
-      {{endif}} 
-    {{endfor}} 
- 
-    PyErr_Format(PyExc_TypeError, "'%.200s' does not have the buffer interface", Py_TYPE(obj)->tp_name); 
-    return -1; 
-} 
- 
-static void __Pyx_ReleaseBuffer(Py_buffer *view) { 
-    PyObject *obj = view->obj; 
-    if (!obj) return; 
- 
-    if (PyObject_CheckBuffer(obj)) { 
-        PyBuffer_Release(view); 
-        return; 
-    } 
- 
+      {{endif}}
+    {{endfor}}
+
+    PyErr_Format(PyExc_TypeError, "'%.200s' does not have the buffer interface", Py_TYPE(obj)->tp_name);
+    return -1;
+}
+
+static void __Pyx_ReleaseBuffer(Py_buffer *view) {
+    PyObject *obj = view->obj;
+    if (!obj) return;
+
+    if (PyObject_CheckBuffer(obj)) {
+        PyBuffer_Release(view);
+        return;
+    }
+
     if ((0)) {}
-    {{for type_ptr, getbuffer, releasebuffer in types}} 
-      {{if releasebuffer}} 
+    {{for type_ptr, getbuffer, releasebuffer in types}}
+      {{if releasebuffer}}
         else if (__Pyx_TypeCheck(obj, {{type_ptr}})) {{releasebuffer}}(obj, view);
-      {{endif}} 
-    {{endfor}} 
- 
+      {{endif}}
+    {{endfor}}
+
     view->obj = NULL;
-    Py_DECREF(obj); 
-} 
- 
-#endif /*  PY_MAJOR_VERSION < 3 */ 
- 
- 
+    Py_DECREF(obj);
+}
+
+#endif /*  PY_MAJOR_VERSION < 3 */
+
+
 /////////////// BufferGetAndValidate.proto ///////////////
- 
+
 #define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack) \
     ((obj == Py_None || obj == NULL) ? \
     (__Pyx_ZeroBuffer(buf), 0) : \
     __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack))
- 
+
 static int  __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
     __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
 static void __Pyx_ZeroBuffer(Py_buffer* buf);
 static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);/*proto*/
- 
+
 static Py_ssize_t __Pyx_minusones[] = { {{ ", ".join(["-1"] * max_dims) }} };
 static Py_ssize_t __Pyx_zeros[] = { {{ ", ".join(["0"] * max_dims) }} };
- 
+
 
 /////////////// BufferGetAndValidate ///////////////
 //@requires: BufferFormatCheck
@@ -233,689 +233,689 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
 static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
                               __Pyx_BufFmt_StackElem* stack,
                               __Pyx_TypeInfo* type); /*proto*/
- 
-/////////////// BufferFormatCheck /////////////// 
+
+/////////////// BufferFormatCheck ///////////////
 //@requires: ModuleSetupCode.c::IsLittleEndian
 //@requires: BufferFormatStructs
- 
-static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, 
-                              __Pyx_BufFmt_StackElem* stack, 
-                              __Pyx_TypeInfo* type) { 
-  stack[0].field = &ctx->root; 
-  stack[0].parent_offset = 0; 
-  ctx->root.type = type; 
-  ctx->root.name = "buffer dtype"; 
-  ctx->root.offset = 0; 
-  ctx->head = stack; 
-  ctx->head->field = &ctx->root; 
-  ctx->fmt_offset = 0; 
-  ctx->head->parent_offset = 0; 
-  ctx->new_packmode = '@'; 
-  ctx->enc_packmode = '@'; 
-  ctx->new_count = 1; 
-  ctx->enc_count = 0; 
-  ctx->enc_type = 0; 
-  ctx->is_complex = 0; 
-  ctx->is_valid_array = 0; 
-  ctx->struct_alignment = 0; 
-  while (type->typegroup == 'S') { 
-    ++ctx->head; 
-    ctx->head->field = type->fields; 
-    ctx->head->parent_offset = 0; 
-    type = type->fields->type; 
-  } 
-} 
- 
-static int __Pyx_BufFmt_ParseNumber(const char** ts) { 
-    int count; 
-    const char* t = *ts; 
-    if (*t < '0' || *t > '9') { 
-      return -1; 
-    } else { 
-        count = *t++ - '0'; 
+
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type) {
+  stack[0].field = &ctx->root;
+  stack[0].parent_offset = 0;
+  ctx->root.type = type;
+  ctx->root.name = "buffer dtype";
+  ctx->root.offset = 0;
+  ctx->head = stack;
+  ctx->head->field = &ctx->root;
+  ctx->fmt_offset = 0;
+  ctx->head->parent_offset = 0;
+  ctx->new_packmode = '@';
+  ctx->enc_packmode = '@';
+  ctx->new_count = 1;
+  ctx->enc_count = 0;
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  ctx->is_valid_array = 0;
+  ctx->struct_alignment = 0;
+  while (type->typegroup == 'S') {
+    ++ctx->head;
+    ctx->head->field = type->fields;
+    ctx->head->parent_offset = 0;
+    type = type->fields->type;
+  }
+}
+
+static int __Pyx_BufFmt_ParseNumber(const char** ts) {
+    int count;
+    const char* t = *ts;
+    if (*t < '0' || *t > '9') {
+      return -1;
+    } else {
+        count = *t++ - '0';
         while (*t >= '0' && *t <= '9') {
-            count *= 10; 
-            count += *t++ - '0'; 
-        } 
-    } 
-    *ts = t; 
-    return count; 
-} 
- 
-static int __Pyx_BufFmt_ExpectNumber(const char **ts) { 
-    int number = __Pyx_BufFmt_ParseNumber(ts); 
-    if (number == -1) /* First char was not a digit */ 
-        PyErr_Format(PyExc_ValueError,\ 
-                     "Does not understand character buffer dtype format string ('%c')", **ts); 
-    return number; 
-} 
- 
- 
-static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) { 
-  PyErr_Format(PyExc_ValueError, 
-               "Unexpected format string character: '%c'", ch); 
-} 
- 
-static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) { 
-  switch (ch) { 
+            count *= 10;
+            count += *t++ - '0';
+        }
+    }
+    *ts = t;
+    return count;
+}
+
+static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
+    int number = __Pyx_BufFmt_ParseNumber(ts);
+    if (number == -1) /* First char was not a digit */
+        PyErr_Format(PyExc_ValueError,\
+                     "Does not understand character buffer dtype format string ('%c')", **ts);
+    return number;
+}
+
+
+static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
+  PyErr_Format(PyExc_ValueError,
+               "Unexpected format string character: '%c'", ch);
+}
+
+static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) {
+  switch (ch) {
     case '?': return "'bool'";
-    case 'c': return "'char'"; 
-    case 'b': return "'signed char'"; 
-    case 'B': return "'unsigned char'"; 
-    case 'h': return "'short'"; 
-    case 'H': return "'unsigned short'"; 
-    case 'i': return "'int'"; 
-    case 'I': return "'unsigned int'"; 
-    case 'l': return "'long'"; 
-    case 'L': return "'unsigned long'"; 
-    case 'q': return "'long long'"; 
-    case 'Q': return "'unsigned long long'"; 
-    case 'f': return (is_complex ? "'complex float'" : "'float'"); 
-    case 'd': return (is_complex ? "'complex double'" : "'double'"); 
-    case 'g': return (is_complex ? "'complex long double'" : "'long double'"); 
-    case 'T': return "a struct"; 
-    case 'O': return "Python object"; 
-    case 'P': return "a pointer"; 
-    case 's': case 'p': return "a string"; 
-    case 0: return "end"; 
-    default: return "unparseable format string"; 
-  } 
-} 
- 
-static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) { 
-  switch (ch) { 
-    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; 
-    case 'h': case 'H': return 2; 
-    case 'i': case 'I': case 'l': case 'L': return 4; 
-    case 'q': case 'Q': return 8; 
-    case 'f': return (is_complex ? 8 : 4); 
-    case 'd': return (is_complex ? 16 : 8); 
-    case 'g': { 
-      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g').."); 
-      return 0; 
-    } 
-    case 'O': case 'P': return sizeof(void*); 
-    default: 
-      __Pyx_BufFmt_RaiseUnexpectedChar(ch); 
-      return 0; 
-    } 
-} 
- 
-static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) { 
-  switch (ch) { 
+    case 'c': return "'char'";
+    case 'b': return "'signed char'";
+    case 'B': return "'unsigned char'";
+    case 'h': return "'short'";
+    case 'H': return "'unsigned short'";
+    case 'i': return "'int'";
+    case 'I': return "'unsigned int'";
+    case 'l': return "'long'";
+    case 'L': return "'unsigned long'";
+    case 'q': return "'long long'";
+    case 'Q': return "'unsigned long long'";
+    case 'f': return (is_complex ? "'complex float'" : "'float'");
+    case 'd': return (is_complex ? "'complex double'" : "'double'");
+    case 'g': return (is_complex ? "'complex long double'" : "'long double'");
+    case 'T': return "a struct";
+    case 'O': return "Python object";
+    case 'P': return "a pointer";
+    case 's': case 'p': return "a string";
+    case 0: return "end";
+    default: return "unparseable format string";
+  }
+}
+
+static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) {
+  switch (ch) {
     case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
-    case 'h': case 'H': return sizeof(short); 
-    case 'i': case 'I': return sizeof(int); 
-    case 'l': case 'L': return sizeof(long); 
-    #ifdef HAVE_LONG_LONG 
-    case 'q': case 'Q': return sizeof(PY_LONG_LONG); 
-    #endif 
-    case 'f': return sizeof(float) * (is_complex ? 2 : 1); 
-    case 'd': return sizeof(double) * (is_complex ? 2 : 1); 
-    case 'g': return sizeof(long double) * (is_complex ? 2 : 1); 
-    case 'O': case 'P': return sizeof(void*); 
-    default: { 
-      __Pyx_BufFmt_RaiseUnexpectedChar(ch); 
-      return 0; 
-    } 
-  } 
-} 
- 
-typedef struct { char c; short x; } __Pyx_st_short; 
-typedef struct { char c; int x; } __Pyx_st_int; 
-typedef struct { char c; long x; } __Pyx_st_long; 
-typedef struct { char c; float x; } __Pyx_st_float; 
-typedef struct { char c; double x; } __Pyx_st_double; 
-typedef struct { char c; long double x; } __Pyx_st_longdouble; 
-typedef struct { char c; void *x; } __Pyx_st_void_p; 
-#ifdef HAVE_LONG_LONG 
-typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong; 
-#endif 
- 
-static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED int is_complex) { 
-  switch (ch) { 
-    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; 
-    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short); 
-    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int); 
-    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long); 
-#ifdef HAVE_LONG_LONG 
-    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG); 
-#endif 
-    case 'f': return sizeof(__Pyx_st_float) - sizeof(float); 
-    case 'd': return sizeof(__Pyx_st_double) - sizeof(double); 
-    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double); 
-    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*); 
-    default: 
-      __Pyx_BufFmt_RaiseUnexpectedChar(ch); 
-      return 0; 
-    } 
-} 
- 
-/* These are for computing the padding at the end of the struct to align 
-   on the first member of the struct. This will probably the same as above, 
-   but we don't have any guarantees. 
- */ 
-typedef struct { short x; char c; } __Pyx_pad_short; 
-typedef struct { int x; char c; } __Pyx_pad_int; 
-typedef struct { long x; char c; } __Pyx_pad_long; 
-typedef struct { float x; char c; } __Pyx_pad_float; 
-typedef struct { double x; char c; } __Pyx_pad_double; 
-typedef struct { long double x; char c; } __Pyx_pad_longdouble; 
-typedef struct { void *x; char c; } __Pyx_pad_void_p; 
-#ifdef HAVE_LONG_LONG 
-typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong; 
-#endif 
- 
-static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int is_complex) { 
-  switch (ch) { 
-    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; 
-    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short); 
-    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int); 
-    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long); 
-#ifdef HAVE_LONG_LONG 
-    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG); 
-#endif 
-    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float); 
-    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double); 
-    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double); 
-    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*); 
-    default: 
-      __Pyx_BufFmt_RaiseUnexpectedChar(ch); 
-      return 0; 
-    } 
-} 
- 
-static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) { 
-  switch (ch) { 
-    case 'c': 
-        return 'H'; 
-    case 'b': case 'h': case 'i': 
-    case 'l': case 'q': case 's': case 'p': 
-        return 'I'; 
+    case 'h': case 'H': return 2;
+    case 'i': case 'I': case 'l': case 'L': return 4;
+    case 'q': case 'Q': return 8;
+    case 'f': return (is_complex ? 8 : 4);
+    case 'd': return (is_complex ? 16 : 8);
+    case 'g': {
+      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g')..");
+      return 0;
+    }
+    case 'O': case 'P': return sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+
+static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(short);
+    case 'i': case 'I': return sizeof(int);
+    case 'l': case 'L': return sizeof(long);
+    #ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(PY_LONG_LONG);
+    #endif
+    case 'f': return sizeof(float) * (is_complex ? 2 : 1);
+    case 'd': return sizeof(double) * (is_complex ? 2 : 1);
+    case 'g': return sizeof(long double) * (is_complex ? 2 : 1);
+    case 'O': case 'P': return sizeof(void*);
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+
+typedef struct { char c; short x; } __Pyx_st_short;
+typedef struct { char c; int x; } __Pyx_st_int;
+typedef struct { char c; long x; } __Pyx_st_long;
+typedef struct { char c; float x; } __Pyx_st_float;
+typedef struct { char c; double x; } __Pyx_st_double;
+typedef struct { char c; long double x; } __Pyx_st_longdouble;
+typedef struct { char c; void *x; } __Pyx_st_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
+#endif
+
+static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_st_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_st_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+
+/* These are for computing the padding at the end of the struct to align
+   on the first member of the struct. This will probably the same as above,
+   but we don't have any guarantees.
+ */
+typedef struct { short x; char c; } __Pyx_pad_short;
+typedef struct { int x; char c; } __Pyx_pad_int;
+typedef struct { long x; char c; } __Pyx_pad_long;
+typedef struct { float x; char c; } __Pyx_pad_float;
+typedef struct { double x; char c; } __Pyx_pad_double;
+typedef struct { long double x; char c; } __Pyx_pad_longdouble;
+typedef struct { void *x; char c; } __Pyx_pad_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
+#endif
+
+static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+
+static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
+  switch (ch) {
+    case 'c':
+        return 'H';
+    case 'b': case 'h': case 'i':
+    case 'l': case 'q': case 's': case 'p':
+        return 'I';
     case '?': case 'B': case 'H': case 'I': case 'L': case 'Q':
-        return 'U'; 
-    case 'f': case 'd': case 'g': 
-        return (is_complex ? 'C' : 'R'); 
-    case 'O': 
-        return 'O'; 
-    case 'P': 
-        return 'P'; 
-    default: { 
-      __Pyx_BufFmt_RaiseUnexpectedChar(ch); 
-      return 0; 
-    } 
-  } 
-} 
- 
- 
-static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) { 
-  if (ctx->head == NULL || ctx->head->field == &ctx->root) { 
-    const char* expected; 
-    const char* quote; 
-    if (ctx->head == NULL) { 
-      expected = "end"; 
-      quote = ""; 
-    } else { 
-      expected = ctx->head->field->type->name; 
-      quote = "'"; 
-    } 
-    PyErr_Format(PyExc_ValueError, 
-                 "Buffer dtype mismatch, expected %s%s%s but got %s", 
-                 quote, expected, quote, 
-                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex)); 
-  } else { 
-    __Pyx_StructField* field = ctx->head->field; 
-    __Pyx_StructField* parent = (ctx->head - 1)->field; 
-    PyErr_Format(PyExc_ValueError, 
-                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'", 
-                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex), 
-                 parent->type->name, field->name); 
-  } 
-} 
- 
-static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { 
-  char group; 
-  size_t size, offset, arraysize = 1; 
- 
-  /* printf("processing... %s\n", ctx->head->field->type->name); */ 
- 
-  if (ctx->enc_type == 0) return 0; 
- 
-  /* Validate array size */ 
-  if (ctx->head->field->type->arraysize[0]) { 
-    int i, ndim = 0; 
- 
-    /* handle strings ('s' and 'p') */ 
-    if (ctx->enc_type == 's' || ctx->enc_type == 'p') { 
-        ctx->is_valid_array = ctx->head->field->type->ndim == 1; 
-        ndim = 1; 
-        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) { 
-            PyErr_Format(PyExc_ValueError, 
-                         "Expected a dimension of size %zu, got %zu", 
-                         ctx->head->field->type->arraysize[0], ctx->enc_count); 
-            return -1; 
-        } 
-    } 
- 
-    if (!ctx->is_valid_array) { 
-      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d", 
-                   ctx->head->field->type->ndim, ndim); 
-      return -1; 
-    } 
-    for (i = 0; i < ctx->head->field->type->ndim; i++) { 
-      arraysize *= ctx->head->field->type->arraysize[i]; 
-    } 
-    ctx->is_valid_array = 0; 
-    ctx->enc_count = 1; 
-  } 
- 
-  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex); 
-  do { 
-    __Pyx_StructField* field = ctx->head->field; 
-    __Pyx_TypeInfo* type = field->type; 
- 
-    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') { 
-      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex); 
-    } else { 
-      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex); 
-    } 
- 
-    if (ctx->enc_packmode == '@') { 
-      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex); 
-      size_t align_mod_offset; 
-      if (align_at == 0) return -1; 
-      align_mod_offset = ctx->fmt_offset % align_at; 
-      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset; 
- 
-      if (ctx->struct_alignment == 0) 
-          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type, 
-                                                                 ctx->is_complex); 
-    } 
- 
-    if (type->size != size || type->typegroup != group) { 
-      if (type->typegroup == 'C' && type->fields != NULL) { 
-        /* special case -- treat as struct rather than complex number */ 
-        size_t parent_offset = ctx->head->parent_offset + field->offset; 
-        ++ctx->head; 
-        ctx->head->field = type->fields; 
-        ctx->head->parent_offset = parent_offset; 
-        continue; 
-      } 
- 
-      if ((type->typegroup == 'H' || group == 'H') && type->size == size) { 
-          /* special case -- chars don't care about sign */ 
-      } else { 
-          __Pyx_BufFmt_RaiseExpected(ctx); 
-          return -1; 
-      } 
-    } 
- 
-    offset = ctx->head->parent_offset + field->offset; 
-    if (ctx->fmt_offset != offset) { 
-      PyErr_Format(PyExc_ValueError, 
-                   "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected", 
-                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset); 
-      return -1; 
-    } 
- 
-    ctx->fmt_offset += size; 
-    if (arraysize) 
-      ctx->fmt_offset += (arraysize - 1) * size; 
- 
-    --ctx->enc_count; /* Consume from buffer string */ 
- 
-    /* Done checking, move to next field, pushing or popping struct stack if needed */ 
-    while (1) { 
-      if (field == &ctx->root) { 
-        ctx->head = NULL; 
-        if (ctx->enc_count != 0) { 
-          __Pyx_BufFmt_RaiseExpected(ctx); 
-          return -1; 
-        } 
-        break; /* breaks both loops as ctx->enc_count == 0 */ 
-      } 
-      ctx->head->field = ++field; 
-      if (field->type == NULL) { 
-        --ctx->head; 
-        field = ctx->head->field; 
-        continue; 
-      } else if (field->type->typegroup == 'S') { 
-        size_t parent_offset = ctx->head->parent_offset + field->offset; 
-        if (field->type->fields->type == NULL) continue; /* empty struct */ 
-        field = field->type->fields; 
-        ++ctx->head; 
-        ctx->head->field = field; 
-        ctx->head->parent_offset = parent_offset; 
-        break; 
-      } else { 
-        break; 
-      } 
-    } 
-  } while (ctx->enc_count); 
-  ctx->enc_type = 0; 
-  ctx->is_complex = 0; 
-  return 0; 
-} 
- 
-/* Parse an array in the format string (e.g. (1,2,3)) */ 
+        return 'U';
+    case 'f': case 'd': case 'g':
+        return (is_complex ? 'C' : 'R');
+    case 'O':
+        return 'O';
+    case 'P':
+        return 'P';
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+
+
+static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->head == NULL || ctx->head->field == &ctx->root) {
+    const char* expected;
+    const char* quote;
+    if (ctx->head == NULL) {
+      expected = "end";
+      quote = "";
+    } else {
+      expected = ctx->head->field->type->name;
+      quote = "'";
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected %s%s%s but got %s",
+                 quote, expected, quote,
+                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex));
+  } else {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_StructField* parent = (ctx->head - 1)->field;
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'",
+                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex),
+                 parent->type->name, field->name);
+  }
+}
+
+static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
+  char group;
+  size_t size, offset, arraysize = 1;
+
+  /* printf("processing... %s\n", ctx->head->field->type->name); */
+
+  if (ctx->enc_type == 0) return 0;
+
+  /* Validate array size */
+  if (ctx->head->field->type->arraysize[0]) {
+    int i, ndim = 0;
+
+    /* handle strings ('s' and 'p') */
+    if (ctx->enc_type == 's' || ctx->enc_type == 'p') {
+        ctx->is_valid_array = ctx->head->field->type->ndim == 1;
+        ndim = 1;
+        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "Expected a dimension of size %zu, got %zu",
+                         ctx->head->field->type->arraysize[0], ctx->enc_count);
+            return -1;
+        }
+    }
+
+    if (!ctx->is_valid_array) {
+      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d",
+                   ctx->head->field->type->ndim, ndim);
+      return -1;
+    }
+    for (i = 0; i < ctx->head->field->type->ndim; i++) {
+      arraysize *= ctx->head->field->type->arraysize[i];
+    }
+    ctx->is_valid_array = 0;
+    ctx->enc_count = 1;
+  }
+
+  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
+  do {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_TypeInfo* type = field->type;
+
+    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') {
+      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex);
+    } else {
+      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
+    }
+
+    if (ctx->enc_packmode == '@') {
+      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
+      size_t align_mod_offset;
+      if (align_at == 0) return -1;
+      align_mod_offset = ctx->fmt_offset % align_at;
+      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
+
+      if (ctx->struct_alignment == 0)
+          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
+                                                                 ctx->is_complex);
+    }
+
+    if (type->size != size || type->typegroup != group) {
+      if (type->typegroup == 'C' && type->fields != NULL) {
+        /* special case -- treat as struct rather than complex number */
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        ++ctx->head;
+        ctx->head->field = type->fields;
+        ctx->head->parent_offset = parent_offset;
+        continue;
+      }
+
+      if ((type->typegroup == 'H' || group == 'H') && type->size == size) {
+          /* special case -- chars don't care about sign */
+      } else {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+      }
+    }
+
+    offset = ctx->head->parent_offset + field->offset;
+    if (ctx->fmt_offset != offset) {
+      PyErr_Format(PyExc_ValueError,
+                   "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected",
+                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset);
+      return -1;
+    }
+
+    ctx->fmt_offset += size;
+    if (arraysize)
+      ctx->fmt_offset += (arraysize - 1) * size;
+
+    --ctx->enc_count; /* Consume from buffer string */
+
+    /* Done checking, move to next field, pushing or popping struct stack if needed */
+    while (1) {
+      if (field == &ctx->root) {
+        ctx->head = NULL;
+        if (ctx->enc_count != 0) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+        }
+        break; /* breaks both loops as ctx->enc_count == 0 */
+      }
+      ctx->head->field = ++field;
+      if (field->type == NULL) {
+        --ctx->head;
+        field = ctx->head->field;
+        continue;
+      } else if (field->type->typegroup == 'S') {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        if (field->type->fields->type == NULL) continue; /* empty struct */
+        field = field->type->fields;
+        ++ctx->head;
+        ctx->head->field = field;
+        ctx->head->parent_offset = parent_offset;
+        break;
+      } else {
+        break;
+      }
+    }
+  } while (ctx->enc_count);
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  return 0;
+}
+
+/* Parse an array in the format string (e.g. (1,2,3)) */
 static PyObject *
-__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) 
-{ 
-    const char *ts = *tsp; 
+__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
+{
+    const char *ts = *tsp;
     int i = 0, number, ndim;
 
-    ++ts; 
-    if (ctx->new_count != 1) { 
-        PyErr_SetString(PyExc_ValueError, 
-                        "Cannot handle repeated arrays in format string"); 
-        return NULL; 
-    } 
- 
-    /* Process the previous element */ 
-    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; 
- 
+    ++ts;
+    if (ctx->new_count != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Cannot handle repeated arrays in format string");
+        return NULL;
+    }
+
+    /* Process the previous element */
+    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+
     // store ndim now, as field advanced by __Pyx_BufFmt_ProcessTypeChunk call
     ndim = ctx->head->field->type->ndim;
 
-    /* Parse all numbers in the format string */ 
-    while (*ts && *ts != ')') { 
-        // ignore space characters (not using isspace() due to C/C++ problem on MacOS-X) 
-        switch (*ts) { 
-            case ' ': case '\f': case '\r': case '\n': case '\t': case '\v':  continue; 
-            default:  break;  /* not a 'break' in the loop */ 
-        } 
- 
-        number = __Pyx_BufFmt_ExpectNumber(&ts); 
-        if (number == -1) return NULL; 
- 
-        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) 
-            return PyErr_Format(PyExc_ValueError, 
-                        "Expected a dimension of size %zu, got %d", 
-                        ctx->head->field->type->arraysize[i], number); 
- 
-        if (*ts != ',' && *ts != ')') 
-            return PyErr_Format(PyExc_ValueError, 
-                                "Expected a comma in format string, got '%c'", *ts); 
- 
-        if (*ts == ',') ts++; 
-        i++; 
-    } 
- 
-    if (i != ndim) 
-        return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d", 
-                            ctx->head->field->type->ndim, i); 
- 
-    if (!*ts) { 
-        PyErr_SetString(PyExc_ValueError, 
-                        "Unexpected end of format string, expected ')'"); 
-        return NULL; 
-    } 
- 
-    ctx->is_valid_array = 1; 
-    ctx->new_count = 1; 
-    *tsp = ++ts; 
-    return Py_None; 
-} 
- 
-static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { 
-  int got_Z = 0; 
- 
-  while (1) { 
-    /* puts(ts); */ 
-    switch(*ts) { 
-      case 0: 
-        if (ctx->enc_type != 0 && ctx->head == NULL) { 
-          __Pyx_BufFmt_RaiseExpected(ctx); 
-          return NULL; 
-        } 
-        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; 
-        if (ctx->head != NULL) { 
-          __Pyx_BufFmt_RaiseExpected(ctx); 
-          return NULL; 
-        } 
-        return ts; 
-      case ' ': 
-      case '\r': 
-      case '\n': 
-        ++ts; 
-        break; 
-      case '<': 
+    /* Parse all numbers in the format string */
+    while (*ts && *ts != ')') {
+        // ignore space characters (not using isspace() due to C/C++ problem on MacOS-X)
+        switch (*ts) {
+            case ' ': case '\f': case '\r': case '\n': case '\t': case '\v':  continue;
+            default:  break;  /* not a 'break' in the loop */
+        }
+
+        number = __Pyx_BufFmt_ExpectNumber(&ts);
+        if (number == -1) return NULL;
+
+        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i])
+            return PyErr_Format(PyExc_ValueError,
+                        "Expected a dimension of size %zu, got %d",
+                        ctx->head->field->type->arraysize[i], number);
+
+        if (*ts != ',' && *ts != ')')
+            return PyErr_Format(PyExc_ValueError,
+                                "Expected a comma in format string, got '%c'", *ts);
+
+        if (*ts == ',') ts++;
+        i++;
+    }
+
+    if (i != ndim)
+        return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
+                            ctx->head->field->type->ndim, i);
+
+    if (!*ts) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Unexpected end of format string, expected ')'");
+        return NULL;
+    }
+
+    ctx->is_valid_array = 1;
+    ctx->new_count = 1;
+    *tsp = ++ts;
+    return Py_None;
+}
+
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
+  int got_Z = 0;
+
+  while (1) {
+    /* puts(ts); */
+    switch(*ts) {
+      case 0:
+        if (ctx->enc_type != 0 && ctx->head == NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        if (ctx->head != NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        return ts;
+      case ' ':
+      case '\r':
+      case '\n':
+        ++ts;
+        break;
+      case '<':
         if (!__Pyx_Is_Little_Endian()) {
-          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); 
-          return NULL; 
-        } 
-        ctx->new_packmode = '='; 
-        ++ts; 
-        break; 
-      case '>': 
-      case '!': 
+          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '>':
+      case '!':
         if (__Pyx_Is_Little_Endian()) {
-          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); 
-          return NULL; 
-        } 
-        ctx->new_packmode = '='; 
-        ++ts; 
-        break; 
-      case '=': 
-      case '@': 
-      case '^': 
-        ctx->new_packmode = *ts++; 
-        break; 
-      case 'T': /* substruct */ 
-        { 
-          const char* ts_after_sub; 
-          size_t i, struct_count = ctx->new_count; 
-          size_t struct_alignment = ctx->struct_alignment; 
-          ctx->new_count = 1; 
-          ++ts; 
-          if (*ts != '{') { 
-            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'"); 
-            return NULL; 
-          } 
-          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; 
-          ctx->enc_type = 0; /* Erase processed last struct element */ 
-          ctx->enc_count = 0; 
-          ctx->struct_alignment = 0; 
-          ++ts; 
-          ts_after_sub = ts; 
-          for (i = 0; i != struct_count; ++i) { 
-            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts); 
-            if (!ts_after_sub) return NULL; 
-          } 
-          ts = ts_after_sub; 
-          if (struct_alignment) ctx->struct_alignment = struct_alignment; 
-        } 
-        break; 
-      case '}': /* end of substruct; either repeat or move on */ 
-        { 
-          size_t alignment = ctx->struct_alignment; 
-          ++ts; 
-          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; 
-          ctx->enc_type = 0; /* Erase processed last struct element */ 
-          if (alignment && ctx->fmt_offset % alignment) { 
-            /* Pad struct on size of the first member */ 
-            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment); 
-          } 
-        } 
-        return ts; 
-      case 'x': 
-        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; 
-        ctx->fmt_offset += ctx->new_count; 
-        ctx->new_count = 1; 
-        ctx->enc_count = 0; 
-        ctx->enc_type = 0; 
-        ctx->enc_packmode = ctx->new_packmode; 
-        ++ts; 
-        break; 
-      case 'Z': 
-        got_Z = 1; 
-        ++ts; 
-        if (*ts != 'f' && *ts != 'd' && *ts != 'g') { 
-          __Pyx_BufFmt_RaiseUnexpectedChar('Z'); 
-          return NULL; 
-        } 
+          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '=':
+      case '@':
+      case '^':
+        ctx->new_packmode = *ts++;
+        break;
+      case 'T': /* substruct */
+        {
+          const char* ts_after_sub;
+          size_t i, struct_count = ctx->new_count;
+          size_t struct_alignment = ctx->struct_alignment;
+          ctx->new_count = 1;
+          ++ts;
+          if (*ts != '{') {
+            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'");
+            return NULL;
+          }
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0; /* Erase processed last struct element */
+          ctx->enc_count = 0;
+          ctx->struct_alignment = 0;
+          ++ts;
+          ts_after_sub = ts;
+          for (i = 0; i != struct_count; ++i) {
+            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
+            if (!ts_after_sub) return NULL;
+          }
+          ts = ts_after_sub;
+          if (struct_alignment) ctx->struct_alignment = struct_alignment;
+        }
+        break;
+      case '}': /* end of substruct; either repeat or move on */
+        {
+          size_t alignment = ctx->struct_alignment;
+          ++ts;
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0; /* Erase processed last struct element */
+          if (alignment && ctx->fmt_offset % alignment) {
+            /* Pad struct on size of the first member */
+            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
+          }
+        }
+        return ts;
+      case 'x':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->fmt_offset += ctx->new_count;
+        ctx->new_count = 1;
+        ctx->enc_count = 0;
+        ctx->enc_type = 0;
+        ctx->enc_packmode = ctx->new_packmode;
+        ++ts;
+        break;
+      case 'Z':
+        got_Z = 1;
+        ++ts;
+        if (*ts != 'f' && *ts != 'd' && *ts != 'g') {
+          __Pyx_BufFmt_RaiseUnexpectedChar('Z');
+          return NULL;
+        }
         CYTHON_FALLTHROUGH;
       case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I':
-      case 'l': case 'L': case 'q': case 'Q': 
-      case 'f': case 'd': case 'g': 
-      case 'O': case 'p': 
+      case 'l': case 'L': case 'q': case 'Q':
+      case 'f': case 'd': case 'g':
+      case 'O': case 'p':
         if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) &&
             (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) {
-          /* Continue pooling same type */ 
-          ctx->enc_count += ctx->new_count; 
-          ctx->new_count = 1; 
-          got_Z = 0; 
-          ++ts; 
-          break; 
-        } 
+          /* Continue pooling same type */
+          ctx->enc_count += ctx->new_count;
+          ctx->new_count = 1;
+          got_Z = 0;
+          ++ts;
+          break;
+        }
         CYTHON_FALLTHROUGH;
-      case 's': 
-        /* 's' or new type (cannot be added to current pool) */ 
-        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; 
-        ctx->enc_count = ctx->new_count; 
-        ctx->enc_packmode = ctx->new_packmode; 
-        ctx->enc_type = *ts; 
-        ctx->is_complex = got_Z; 
-        ++ts; 
-        ctx->new_count = 1; 
-        got_Z = 0; 
-        break; 
-      case ':': 
-        ++ts; 
-        while(*ts != ':') ++ts; 
-        ++ts; 
-        break; 
-      case '(': 
-        if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL; 
-        break; 
-      default: 
-        { 
-          int number = __Pyx_BufFmt_ExpectNumber(&ts); 
-          if (number == -1) return NULL; 
-          ctx->new_count = (size_t)number; 
-        } 
-    } 
-  } 
-} 
- 
-/////////////// TypeInfoCompare.proto /////////////// 
-static int __pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b); 
- 
-/////////////// TypeInfoCompare /////////////// 
+      case 's':
+        /* 's' or new type (cannot be added to current pool) */
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->enc_count = ctx->new_count;
+        ctx->enc_packmode = ctx->new_packmode;
+        ctx->enc_type = *ts;
+        ctx->is_complex = got_Z;
+        ++ts;
+        ctx->new_count = 1;
+        got_Z = 0;
+        break;
+      case ':':
+        ++ts;
+        while(*ts != ':') ++ts;
+        ++ts;
+        break;
+      case '(':
+        if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL;
+        break;
+      default:
+        {
+          int number = __Pyx_BufFmt_ExpectNumber(&ts);
+          if (number == -1) return NULL;
+          ctx->new_count = (size_t)number;
+        }
+    }
+  }
+}
+
+/////////////// TypeInfoCompare.proto ///////////////
+static int __pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b);
+
+/////////////// TypeInfoCompare ///////////////
 //@requires: BufferFormatStructs
 
 // See if two dtypes are equal
-static int 
-__pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b) 
-{ 
-    int i; 
- 
-    if (!a || !b) 
-        return 0; 
- 
-    if (a == b) 
-        return 1; 
- 
-    if (a->size != b->size || a->typegroup != b->typegroup || 
-            a->is_unsigned != b->is_unsigned || a->ndim != b->ndim) { 
-        if (a->typegroup == 'H' || b->typegroup == 'H') { 
-            /* Special case for chars */ 
-            return a->size == b->size; 
-        } else { 
-            return 0; 
-        } 
-    } 
- 
-    if (a->ndim) { 
-        /* Verify multidimensional C arrays */ 
-        for (i = 0; i < a->ndim; i++) 
-            if (a->arraysize[i] != b->arraysize[i]) 
-                return 0; 
-    } 
- 
-    if (a->typegroup == 'S') { 
-        /* Check for packed struct */ 
-        if (a->flags != b->flags) 
-            return 0; 
- 
-        /* compare all struct fields */ 
-        if (a->fields || b->fields) { 
-            /* Check if both have fields */ 
-            if (!(a->fields && b->fields)) 
-                return 0; 
- 
-            /* compare */ 
-            for (i = 0; a->fields[i].type && b->fields[i].type; i++) { 
-                __Pyx_StructField *field_a = a->fields + i; 
-                __Pyx_StructField *field_b = b->fields + i; 
- 
-                if (field_a->offset != field_b->offset || 
-                    !__pyx_typeinfo_cmp(field_a->type, field_b->type)) 
-                    return 0; 
-            } 
- 
-            /* If all fields are processed, we have a match */ 
-            return !a->fields[i].type && !b->fields[i].type; 
-        } 
-    } 
- 
-    return 1; 
-} 
- 
- 
-/////////////// TypeInfoToFormat.proto /////////////// 
-struct __pyx_typeinfo_string { 
-    char string[3]; 
-}; 
-static struct __pyx_typeinfo_string __Pyx_TypeInfoToFormat(__Pyx_TypeInfo *type); 
- 
-/////////////// TypeInfoToFormat /////////////// 
+static int
+__pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b)
+{
+    int i;
+
+    if (!a || !b)
+        return 0;
+
+    if (a == b)
+        return 1;
+
+    if (a->size != b->size || a->typegroup != b->typegroup ||
+            a->is_unsigned != b->is_unsigned || a->ndim != b->ndim) {
+        if (a->typegroup == 'H' || b->typegroup == 'H') {
+            /* Special case for chars */
+            return a->size == b->size;
+        } else {
+            return 0;
+        }
+    }
+
+    if (a->ndim) {
+        /* Verify multidimensional C arrays */
+        for (i = 0; i < a->ndim; i++)
+            if (a->arraysize[i] != b->arraysize[i])
+                return 0;
+    }
+
+    if (a->typegroup == 'S') {
+        /* Check for packed struct */
+        if (a->flags != b->flags)
+            return 0;
+
+        /* compare all struct fields */
+        if (a->fields || b->fields) {
+            /* Check if both have fields */
+            if (!(a->fields && b->fields))
+                return 0;
+
+            /* compare */
+            for (i = 0; a->fields[i].type && b->fields[i].type; i++) {
+                __Pyx_StructField *field_a = a->fields + i;
+                __Pyx_StructField *field_b = b->fields + i;
+
+                if (field_a->offset != field_b->offset ||
+                    !__pyx_typeinfo_cmp(field_a->type, field_b->type))
+                    return 0;
+            }
+
+            /* If all fields are processed, we have a match */
+            return !a->fields[i].type && !b->fields[i].type;
+        }
+    }
+
+    return 1;
+}
+
+
+/////////////// TypeInfoToFormat.proto ///////////////
+struct __pyx_typeinfo_string {
+    char string[3];
+};
+static struct __pyx_typeinfo_string __Pyx_TypeInfoToFormat(__Pyx_TypeInfo *type);
+
+/////////////// TypeInfoToFormat ///////////////
 //@requires: BufferFormatStructs
- 
+
 // See also MemoryView.pyx:BufferFormatFromTypeInfo
 
-static struct __pyx_typeinfo_string __Pyx_TypeInfoToFormat(__Pyx_TypeInfo *type) { 
-    struct __pyx_typeinfo_string result = { {0} }; 
-    char *buf = (char *) result.string; 
-    size_t size = type->size; 
- 
-    switch (type->typegroup) { 
-        case 'H': 
-            *buf = 'c'; 
-            break; 
-        case 'I': 
-        case 'U': 
-            if (size == 1) 
+static struct __pyx_typeinfo_string __Pyx_TypeInfoToFormat(__Pyx_TypeInfo *type) {
+    struct __pyx_typeinfo_string result = { {0} };
+    char *buf = (char *) result.string;
+    size_t size = type->size;
+
+    switch (type->typegroup) {
+        case 'H':
+            *buf = 'c';
+            break;
+        case 'I':
+        case 'U':
+            if (size == 1)
                 *buf = (type->is_unsigned) ? 'B' : 'b';
-            else if (size == 2) 
+            else if (size == 2)
                 *buf = (type->is_unsigned) ? 'H' : 'h';
-            else if (size == 4) 
+            else if (size == 4)
                 *buf = (type->is_unsigned) ? 'I' : 'i';
-            else if (size == 8) 
+            else if (size == 8)
                 *buf = (type->is_unsigned) ? 'Q' : 'q';
-            break; 
-        case 'P': 
-            *buf = 'P'; 
-            break; 
-        case 'C': 
-         { 
-            __Pyx_TypeInfo complex_type = *type; 
-            complex_type.typegroup = 'R'; 
-            complex_type.size /= 2; 
- 
-            *buf++ = 'Z'; 
-            *buf = __Pyx_TypeInfoToFormat(&complex_type).string[0]; 
-            break; 
-         } 
-        case 'R': 
-            if (size == 4) 
-                *buf = 'f'; 
-            else if (size == 8) 
-                *buf = 'd'; 
-            else 
-                *buf = 'g'; 
-            break; 
-    } 
- 
-    return result; 
-} 
+            break;
+        case 'P':
+            *buf = 'P';
+            break;
+        case 'C':
+         {
+            __Pyx_TypeInfo complex_type = *type;
+            complex_type.typegroup = 'R';
+            complex_type.size /= 2;
+
+            *buf++ = 'Z';
+            *buf = __Pyx_TypeInfoToFormat(&complex_type).string[0];
+            break;
+         }
+        case 'R':
+            if (size == 4)
+                *buf = 'f';
+            else if (size == 8)
+                *buf = 'd';
+            else
+                *buf = 'g';
+            break;
+    }
+
+    return result;
+}
diff --git a/contrib/tools/cython/Cython/Utility/Builtins.c b/contrib/tools/cython/Cython/Utility/Builtins.c
index e21a5583ac..1ffb3bcebd 100644
--- a/contrib/tools/cython/Cython/Utility/Builtins.c
+++ b/contrib/tools/cython/Cython/Utility/Builtins.c
@@ -1,180 +1,180 @@
-/* 
- * Special implementations of built-in functions and methods. 
- * 
- * Optional optimisations for builtins are in Optimize.c. 
- * 
- * General object operations and protocols are in ObjectHandling.c. 
- */ 
- 
-//////////////////// Globals.proto //////////////////// 
- 
-static PyObject* __Pyx_Globals(void); /*proto*/ 
- 
-//////////////////// Globals //////////////////// 
-//@substitute: naming 
-//@requires: ObjectHandling.c::GetAttr 
- 
-// This is a stub implementation until we have something more complete. 
-// Currently, we only handle the most common case of a read-only dict 
-// of Python names.  Supporting cdef names in the module and write 
-// access requires a rewrite as a dedicated class. 
- 
-static PyObject* __Pyx_Globals(void) { 
-    Py_ssize_t i; 
-    PyObject *names; 
-    PyObject *globals = $moddict_cname; 
-    Py_INCREF(globals); 
-    names = PyObject_Dir($module_cname); 
-    if (!names) 
-        goto bad; 
-    for (i = PyList_GET_SIZE(names)-1; i >= 0; i--) { 
-#if CYTHON_COMPILING_IN_PYPY 
+/*
+ * Special implementations of built-in functions and methods.
+ *
+ * Optional optimisations for builtins are in Optimize.c.
+ *
+ * General object operations and protocols are in ObjectHandling.c.
+ */
+
+//////////////////// Globals.proto ////////////////////
+
+static PyObject* __Pyx_Globals(void); /*proto*/
+
+//////////////////// Globals ////////////////////
+//@substitute: naming
+//@requires: ObjectHandling.c::GetAttr
+
+// This is a stub implementation until we have something more complete.
+// Currently, we only handle the most common case of a read-only dict
+// of Python names.  Supporting cdef names in the module and write
+// access requires a rewrite as a dedicated class.
+
+static PyObject* __Pyx_Globals(void) {
+    Py_ssize_t i;
+    PyObject *names;
+    PyObject *globals = $moddict_cname;
+    Py_INCREF(globals);
+    names = PyObject_Dir($module_cname);
+    if (!names)
+        goto bad;
+    for (i = PyList_GET_SIZE(names)-1; i >= 0; i--) {
+#if CYTHON_COMPILING_IN_PYPY
         PyObject* name = PySequence_ITEM(names, i);
-        if (!name) 
-            goto bad; 
-#else 
-        PyObject* name = PyList_GET_ITEM(names, i); 
-#endif 
-        if (!PyDict_Contains(globals, name)) { 
-            PyObject* value = __Pyx_GetAttr($module_cname, name); 
-            if (!value) { 
-#if CYTHON_COMPILING_IN_PYPY 
-                Py_DECREF(name); 
-#endif 
-                goto bad; 
-            } 
-            if (PyDict_SetItem(globals, name, value) < 0) { 
-#if CYTHON_COMPILING_IN_PYPY 
-                Py_DECREF(name); 
-#endif 
-                Py_DECREF(value); 
-                goto bad; 
-            } 
-        } 
-#if CYTHON_COMPILING_IN_PYPY 
-        Py_DECREF(name); 
-#endif 
-    } 
-    Py_DECREF(names); 
-    return globals; 
-bad: 
-    Py_XDECREF(names); 
-    Py_XDECREF(globals); 
-    return NULL; 
-} 
- 
-//////////////////// PyExecGlobals.proto //////////////////// 
- 
-static PyObject* __Pyx_PyExecGlobals(PyObject*); 
- 
-//////////////////// PyExecGlobals //////////////////// 
-//@requires: Globals 
-//@requires: PyExec 
- 
-static PyObject* __Pyx_PyExecGlobals(PyObject* code) { 
-    PyObject* result; 
-    PyObject* globals = __Pyx_Globals(); 
-    if (unlikely(!globals)) 
-        return NULL; 
-    result = __Pyx_PyExec2(code, globals); 
-    Py_DECREF(globals); 
-    return result; 
-} 
- 
-//////////////////// PyExec.proto //////////////////// 
- 
-static PyObject* __Pyx_PyExec3(PyObject*, PyObject*, PyObject*); 
-static CYTHON_INLINE PyObject* __Pyx_PyExec2(PyObject*, PyObject*); 
- 
-//////////////////// PyExec //////////////////// 
-//@substitute: naming 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyExec2(PyObject* o, PyObject* globals) { 
-    return __Pyx_PyExec3(o, globals, NULL); 
-} 
- 
-static PyObject* __Pyx_PyExec3(PyObject* o, PyObject* globals, PyObject* locals) { 
-    PyObject* result; 
-    PyObject* s = 0; 
-    char *code = 0; 
- 
-    if (!globals || globals == Py_None) { 
-        globals = $moddict_cname; 
-    } else if (!PyDict_Check(globals)) { 
-        PyErr_Format(PyExc_TypeError, "exec() arg 2 must be a dict, not %.200s", 
-                     Py_TYPE(globals)->tp_name); 
-        goto bad; 
-    } 
-    if (!locals || locals == Py_None) { 
-        locals = globals; 
-    } 
- 
+        if (!name)
+            goto bad;
+#else
+        PyObject* name = PyList_GET_ITEM(names, i);
+#endif
+        if (!PyDict_Contains(globals, name)) {
+            PyObject* value = __Pyx_GetAttr($module_cname, name);
+            if (!value) {
+#if CYTHON_COMPILING_IN_PYPY
+                Py_DECREF(name);
+#endif
+                goto bad;
+            }
+            if (PyDict_SetItem(globals, name, value) < 0) {
+#if CYTHON_COMPILING_IN_PYPY
+                Py_DECREF(name);
+#endif
+                Py_DECREF(value);
+                goto bad;
+            }
+        }
+#if CYTHON_COMPILING_IN_PYPY
+        Py_DECREF(name);
+#endif
+    }
+    Py_DECREF(names);
+    return globals;
+bad:
+    Py_XDECREF(names);
+    Py_XDECREF(globals);
+    return NULL;
+}
+
+//////////////////// PyExecGlobals.proto ////////////////////
+
+static PyObject* __Pyx_PyExecGlobals(PyObject*);
+
+//////////////////// PyExecGlobals ////////////////////
+//@requires: Globals
+//@requires: PyExec
+
+static PyObject* __Pyx_PyExecGlobals(PyObject* code) {
+    PyObject* result;
+    PyObject* globals = __Pyx_Globals();
+    if (unlikely(!globals))
+        return NULL;
+    result = __Pyx_PyExec2(code, globals);
+    Py_DECREF(globals);
+    return result;
+}
+
+//////////////////// PyExec.proto ////////////////////
+
+static PyObject* __Pyx_PyExec3(PyObject*, PyObject*, PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyExec2(PyObject*, PyObject*);
+
+//////////////////// PyExec ////////////////////
+//@substitute: naming
+
+static CYTHON_INLINE PyObject* __Pyx_PyExec2(PyObject* o, PyObject* globals) {
+    return __Pyx_PyExec3(o, globals, NULL);
+}
+
+static PyObject* __Pyx_PyExec3(PyObject* o, PyObject* globals, PyObject* locals) {
+    PyObject* result;
+    PyObject* s = 0;
+    char *code = 0;
+
+    if (!globals || globals == Py_None) {
+        globals = $moddict_cname;
+    } else if (!PyDict_Check(globals)) {
+        PyErr_Format(PyExc_TypeError, "exec() arg 2 must be a dict, not %.200s",
+                     Py_TYPE(globals)->tp_name);
+        goto bad;
+    }
+    if (!locals || locals == Py_None) {
+        locals = globals;
+    }
+
     if (__Pyx_PyDict_GetItemStr(globals, PYIDENT("__builtins__")) == NULL) {
-        if (PyDict_SetItem(globals, PYIDENT("__builtins__"), PyEval_GetBuiltins()) < 0) 
-            goto bad; 
-    } 
- 
-    if (PyCode_Check(o)) { 
+        if (PyDict_SetItem(globals, PYIDENT("__builtins__"), PyEval_GetBuiltins()) < 0)
+            goto bad;
+    }
+
+    if (PyCode_Check(o)) {
         if (__Pyx_PyCode_HasFreeVars((PyCodeObject *)o)) {
-            PyErr_SetString(PyExc_TypeError, 
-                "code object passed to exec() may not contain free variables"); 
-            goto bad; 
-        } 
+            PyErr_SetString(PyExc_TypeError,
+                "code object passed to exec() may not contain free variables");
+            goto bad;
+        }
         #if CYTHON_COMPILING_IN_PYPY || PY_VERSION_HEX < 0x030200B1
-        result = PyEval_EvalCode((PyCodeObject *)o, globals, locals); 
-        #else 
-        result = PyEval_EvalCode(o, globals, locals); 
-        #endif 
-    } else { 
-        PyCompilerFlags cf; 
-        cf.cf_flags = 0; 
+        result = PyEval_EvalCode((PyCodeObject *)o, globals, locals);
+        #else
+        result = PyEval_EvalCode(o, globals, locals);
+        #endif
+    } else {
+        PyCompilerFlags cf;
+        cf.cf_flags = 0;
 #if PY_VERSION_HEX >= 0x030800A3
         cf.cf_feature_version = PY_MINOR_VERSION;
 #endif
-        if (PyUnicode_Check(o)) { 
-            cf.cf_flags = PyCF_SOURCE_IS_UTF8; 
-            s = PyUnicode_AsUTF8String(o); 
-            if (!s) goto bad; 
-            o = s; 
-        #if PY_MAJOR_VERSION >= 3 
-        } else if (!PyBytes_Check(o)) { 
-        #else 
-        } else if (!PyString_Check(o)) { 
-        #endif 
-            PyErr_Format(PyExc_TypeError, 
-                "exec: arg 1 must be string, bytes or code object, got %.200s", 
-                Py_TYPE(o)->tp_name); 
-            goto bad; 
-        } 
-        #if PY_MAJOR_VERSION >= 3 
-        code = PyBytes_AS_STRING(o); 
-        #else 
-        code = PyString_AS_STRING(o); 
-        #endif 
-        if (PyEval_MergeCompilerFlags(&cf)) { 
-            result = PyRun_StringFlags(code, Py_file_input, globals, locals, &cf); 
-        } else { 
-            result = PyRun_String(code, Py_file_input, globals, locals); 
-        } 
-        Py_XDECREF(s); 
-    } 
- 
-    return result; 
-bad: 
-    Py_XDECREF(s); 
-    return 0; 
-} 
- 
-//////////////////// GetAttr3.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *, PyObject *, PyObject *); /*proto*/ 
- 
-//////////////////// GetAttr3 //////////////////// 
-//@requires: ObjectHandling.c::GetAttr 
+        if (PyUnicode_Check(o)) {
+            cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+            s = PyUnicode_AsUTF8String(o);
+            if (!s) goto bad;
+            o = s;
+        #if PY_MAJOR_VERSION >= 3
+        } else if (!PyBytes_Check(o)) {
+        #else
+        } else if (!PyString_Check(o)) {
+        #endif
+            PyErr_Format(PyExc_TypeError,
+                "exec: arg 1 must be string, bytes or code object, got %.200s",
+                Py_TYPE(o)->tp_name);
+            goto bad;
+        }
+        #if PY_MAJOR_VERSION >= 3
+        code = PyBytes_AS_STRING(o);
+        #else
+        code = PyString_AS_STRING(o);
+        #endif
+        if (PyEval_MergeCompilerFlags(&cf)) {
+            result = PyRun_StringFlags(code, Py_file_input, globals, locals, &cf);
+        } else {
+            result = PyRun_String(code, Py_file_input, globals, locals);
+        }
+        Py_XDECREF(s);
+    }
+
+    return result;
+bad:
+    Py_XDECREF(s);
+    return 0;
+}
+
+//////////////////// GetAttr3.proto ////////////////////
+
+static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *, PyObject *, PyObject *); /*proto*/
+
+//////////////////// GetAttr3 ////////////////////
+//@requires: ObjectHandling.c::GetAttr
 //@requires: Exceptions.c::PyThreadStateGet
 //@requires: Exceptions.c::PyErrFetchRestore
 //@requires: Exceptions.c::PyErrExceptionMatches
- 
+
 static PyObject *__Pyx_GetAttr3Default(PyObject *d) {
     __Pyx_PyThreadState_declare
     __Pyx_PyThreadState_assign
@@ -185,8 +185,8 @@ static PyObject *__Pyx_GetAttr3Default(PyObject *d) {
     return d;
 }
 
-static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *o, PyObject *n, PyObject *d) { 
-    PyObject *r = __Pyx_GetAttr(o, n); 
+static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *o, PyObject *n, PyObject *d) {
+    PyObject *r = __Pyx_GetAttr(o, n);
     return (likely(r)) ? r : __Pyx_GetAttr3Default(d);
 }
 
@@ -205,37 +205,37 @@ static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) {
         return -1;
     }
     r = __Pyx_GetAttr(o, n);
-    if (unlikely(!r)) { 
-        PyErr_Clear(); 
+    if (unlikely(!r)) {
+        PyErr_Clear();
         return 0;
     } else {
         Py_DECREF(r);
         return 1;
-    } 
-} 
- 
-//////////////////// Intern.proto //////////////////// 
- 
-static PyObject* __Pyx_Intern(PyObject* s); /* proto */ 
- 
-//////////////////// Intern //////////////////// 
- 
-static PyObject* __Pyx_Intern(PyObject* s) { 
-    if (!(likely(PyString_CheckExact(s)))) { 
-        PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "str", Py_TYPE(s)->tp_name); 
-        return 0; 
-    } 
-    Py_INCREF(s); 
-    #if PY_MAJOR_VERSION >= 3 
-    PyUnicode_InternInPlace(&s); 
-    #else 
-    PyString_InternInPlace(&s); 
-    #endif 
-    return s; 
-} 
- 
-//////////////////// abs_longlong.proto //////////////////// 
- 
+    }
+}
+
+//////////////////// Intern.proto ////////////////////
+
+static PyObject* __Pyx_Intern(PyObject* s); /* proto */
+
+//////////////////// Intern ////////////////////
+
+static PyObject* __Pyx_Intern(PyObject* s) {
+    if (!(likely(PyString_CheckExact(s)))) {
+        PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "str", Py_TYPE(s)->tp_name);
+        return 0;
+    }
+    Py_INCREF(s);
+    #if PY_MAJOR_VERSION >= 3
+    PyUnicode_InternInPlace(&s);
+    #else
+    PyString_InternInPlace(&s);
+    #endif
+    return s;
+}
+
+//////////////////// abs_longlong.proto ////////////////////
+
 static CYTHON_INLINE PY_LONG_LONG __Pyx_abs_longlong(PY_LONG_LONG x) {
 #if defined (__cplusplus) && __cplusplus >= 201103L
     return std::abs(x);
@@ -248,13 +248,13 @@ static CYTHON_INLINE PY_LONG_LONG __Pyx_abs_longlong(PY_LONG_LONG x) {
 #elif defined (__GNUC__)
     // gcc or clang on 64 bit windows.
     return __builtin_llabs(x);
-#else 
+#else
     if (sizeof(PY_LONG_LONG) <= sizeof(Py_ssize_t))
         return __Pyx_sst_abs(x);
     return (x<0) ? -x : x;
-#endif 
-} 
- 
+#endif
+}
+
 
 //////////////////// py_abs.proto ////////////////////
 
@@ -294,10 +294,10 @@ static PyObject *__Pyx_PyLong_AbsNeg(PyObject *n) {
 #endif
 
 
-//////////////////// pow2.proto //////////////////// 
- 
-#define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None) 
- 
+//////////////////// pow2.proto ////////////////////
+
+#define __Pyx_PyNumber_Power2(a, b) PyNumber_Power(a, b, Py_None)
+
 
 //////////////////// object_ord.proto ////////////////////
 //@requires: TypeConversion.c::UnicodeAsUCS4
@@ -342,143 +342,143 @@ static long __Pyx__PyObject_Ord(PyObject* c) {
 }
 
 
-//////////////////// py_dict_keys.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_keys //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d) { 
-    if (PY_MAJOR_VERSION >= 3) 
+//////////////////// py_dict_keys.proto ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d); /*proto*/
+
+//////////////////// py_dict_keys ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_Keys(PyObject* d) {
+    if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "keys", d);
-    else 
-        return PyDict_Keys(d); 
-} 
- 
-//////////////////// py_dict_values.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_Values(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_values //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_Values(PyObject* d) { 
-    if (PY_MAJOR_VERSION >= 3) 
+    else
+        return PyDict_Keys(d);
+}
+
+//////////////////// py_dict_values.proto ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_Values(PyObject* d); /*proto*/
+
+//////////////////// py_dict_values ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_Values(PyObject* d) {
+    if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "values", d);
-    else 
-        return PyDict_Values(d); 
-} 
- 
-//////////////////// py_dict_items.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_Items(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_items //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_Items(PyObject* d) { 
-    if (PY_MAJOR_VERSION >= 3) 
+    else
+        return PyDict_Values(d);
+}
+
+//////////////////// py_dict_items.proto ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_Items(PyObject* d); /*proto*/
+
+//////////////////// py_dict_items ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_Items(PyObject* d) {
+    if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "items", d);
-    else 
-        return PyDict_Items(d); 
-} 
- 
-//////////////////// py_dict_iterkeys.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_IterKeys(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_iterkeys //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_IterKeys(PyObject* d) { 
+    else
+        return PyDict_Items(d);
+}
+
+//////////////////// py_dict_iterkeys.proto ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_IterKeys(PyObject* d); /*proto*/
+
+//////////////////// py_dict_iterkeys ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_IterKeys(PyObject* d) {
     if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "keys", d);
     else
         return CALL_UNBOUND_METHOD(PyDict_Type, "iterkeys", d);
-} 
- 
-//////////////////// py_dict_itervalues.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_IterValues(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_itervalues //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_IterValues(PyObject* d) { 
+}
+
+//////////////////// py_dict_itervalues.proto ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_IterValues(PyObject* d); /*proto*/
+
+//////////////////// py_dict_itervalues ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_IterValues(PyObject* d) {
     if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "values", d);
     else
         return CALL_UNBOUND_METHOD(PyDict_Type, "itervalues", d);
-} 
- 
-//////////////////// py_dict_iteritems.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_IterItems(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_iteritems //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_IterItems(PyObject* d) { 
+}
+
+//////////////////// py_dict_iteritems.proto ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_IterItems(PyObject* d); /*proto*/
+
+//////////////////// py_dict_iteritems ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_IterItems(PyObject* d) {
     if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "items", d);
     else
         return CALL_UNBOUND_METHOD(PyDict_Type, "iteritems", d);
-} 
- 
-//////////////////// py_dict_viewkeys.proto //////////////////// 
- 
-#if PY_VERSION_HEX < 0x02070000 
-#error This module uses dict views, which require Python 2.7 or later 
-#endif 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewKeys(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_viewkeys //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewKeys(PyObject* d) { 
+}
+
+//////////////////// py_dict_viewkeys.proto ////////////////////
+
+#if PY_VERSION_HEX < 0x02070000
+#error This module uses dict views, which require Python 2.7 or later
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewKeys(PyObject* d); /*proto*/
+
+//////////////////// py_dict_viewkeys ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewKeys(PyObject* d) {
     if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "keys", d);
     else
         return CALL_UNBOUND_METHOD(PyDict_Type, "viewkeys", d);
-} 
- 
-//////////////////// py_dict_viewvalues.proto //////////////////// 
- 
-#if PY_VERSION_HEX < 0x02070000 
-#error This module uses dict views, which require Python 2.7 or later 
-#endif 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewValues(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_viewvalues //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewValues(PyObject* d) { 
+}
+
+//////////////////// py_dict_viewvalues.proto ////////////////////
+
+#if PY_VERSION_HEX < 0x02070000
+#error This module uses dict views, which require Python 2.7 or later
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewValues(PyObject* d); /*proto*/
+
+//////////////////// py_dict_viewvalues ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewValues(PyObject* d) {
     if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "values", d);
     else
         return CALL_UNBOUND_METHOD(PyDict_Type, "viewvalues", d);
-} 
- 
-//////////////////// py_dict_viewitems.proto //////////////////// 
- 
-#if PY_VERSION_HEX < 0x02070000 
-#error This module uses dict views, which require Python 2.7 or later 
-#endif 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewItems(PyObject* d); /*proto*/ 
- 
-//////////////////// py_dict_viewitems //////////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewItems(PyObject* d) { 
+}
+
+//////////////////// py_dict_viewitems.proto ////////////////////
+
+#if PY_VERSION_HEX < 0x02070000
+#error This module uses dict views, which require Python 2.7 or later
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewItems(PyObject* d); /*proto*/
+
+//////////////////// py_dict_viewitems ////////////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyDict_ViewItems(PyObject* d) {
     if (PY_MAJOR_VERSION >= 3)
         return CALL_UNBOUND_METHOD(PyDict_Type, "items", d);
     else
         return CALL_UNBOUND_METHOD(PyDict_Type, "viewitems", d);
-} 
- 
+}
+
 
-//////////////////// pyfrozenset_new.proto //////////////////// 
+//////////////////// pyfrozenset_new.proto ////////////////////
 
 static CYTHON_INLINE PyObject* __Pyx_PyFrozenSet_New(PyObject* it);
 
 //////////////////// pyfrozenset_new ////////////////////
-//@substitute: naming 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyFrozenSet_New(PyObject* it) { 
-    if (it) { 
-        PyObject* result; 
+//@substitute: naming
+
+static CYTHON_INLINE PyObject* __Pyx_PyFrozenSet_New(PyObject* it) {
+    if (it) {
+        PyObject* result;
 #if CYTHON_COMPILING_IN_PYPY
         // PyPy currently lacks PyFrozenSet_CheckExact() and PyFrozenSet_New()
         PyObject* args;
@@ -489,24 +489,24 @@ static CYTHON_INLINE PyObject* __Pyx_PyFrozenSet_New(PyObject* it) {
         Py_DECREF(args);
         return result;
 #else
-        if (PyFrozenSet_CheckExact(it)) { 
-            Py_INCREF(it); 
-            return it; 
-        } 
-        result = PyFrozenSet_New(it); 
-        if (unlikely(!result)) 
-            return NULL; 
+        if (PyFrozenSet_CheckExact(it)) {
+            Py_INCREF(it);
+            return it;
+        }
+        result = PyFrozenSet_New(it);
+        if (unlikely(!result))
+            return NULL;
         if ((PY_VERSION_HEX >= 0x031000A1) || likely(PySet_GET_SIZE(result)))
-            return result; 
+            return result;
         // empty frozenset is a singleton (on Python <3.10)
-        // seems wasteful, but CPython does the same 
-        Py_DECREF(result); 
+        // seems wasteful, but CPython does the same
+        Py_DECREF(result);
 #endif
-    } 
+    }
 #if CYTHON_USE_TYPE_SLOTS
-    return PyFrozenSet_Type.tp_new(&PyFrozenSet_Type, $empty_tuple, NULL); 
+    return PyFrozenSet_Type.tp_new(&PyFrozenSet_Type, $empty_tuple, NULL);
 #else
-    return PyObject_Call((PyObject*)&PyFrozenSet_Type, $empty_tuple, NULL); 
+    return PyObject_Call((PyObject*)&PyFrozenSet_Type, $empty_tuple, NULL);
 #endif
 }
 
@@ -534,9 +534,9 @@ static CYTHON_INLINE int __Pyx_PySet_Update(PyObject* set, PyObject* it) {
         // unusual result, fall through to set.update() call below
         Py_DECREF(retval);
     }
-    #endif 
+    #endif
     retval = CALL_UNBOUND_METHOD(PySet_Type, "update", set, it);
     if (unlikely(!retval)) return -1;
     Py_DECREF(retval);
     return 0;
-} 
+}
diff --git a/contrib/tools/cython/Cython/Utility/Capsule.c b/contrib/tools/cython/Cython/Utility/Capsule.c
index 448e4a7304..cc4fe0d887 100644
--- a/contrib/tools/cython/Cython/Utility/Capsule.c
+++ b/contrib/tools/cython/Cython/Utility/Capsule.c
@@ -1,20 +1,20 @@
-//////////////// Capsule.proto //////////////// 
- 
-/* Todo: wrap the rest of the functionality in similar functions */ 
-static CYTHON_INLINE PyObject *__pyx_capsule_create(void *p, const char *sig); 
- 
-//////////////// Capsule //////////////// 
- 
-static CYTHON_INLINE PyObject * 
-__pyx_capsule_create(void *p, CYTHON_UNUSED const char *sig) 
-{ 
-    PyObject *cobj; 
- 
-#if PY_VERSION_HEX >= 0x02070000 
-    cobj = PyCapsule_New(p, sig, NULL); 
-#else 
-    cobj = PyCObject_FromVoidPtr(p, NULL); 
-#endif 
- 
-    return cobj; 
-} 
+//////////////// Capsule.proto ////////////////
+
+/* Todo: wrap the rest of the functionality in similar functions */
+static CYTHON_INLINE PyObject *__pyx_capsule_create(void *p, const char *sig);
+
+//////////////// Capsule ////////////////
+
+static CYTHON_INLINE PyObject *
+__pyx_capsule_create(void *p, CYTHON_UNUSED const char *sig)
+{
+    PyObject *cobj;
+
+#if PY_VERSION_HEX >= 0x02070000
+    cobj = PyCapsule_New(p, sig, NULL);
+#else
+    cobj = PyCObject_FromVoidPtr(p, NULL);
+#endif
+
+    return cobj;
+}
diff --git a/contrib/tools/cython/Cython/Utility/CommonTypes.c b/contrib/tools/cython/Cython/Utility/CommonTypes.c
index 522f0a10e4..c2403cbf98 100644
--- a/contrib/tools/cython/Cython/Utility/CommonTypes.c
+++ b/contrib/tools/cython/Cython/Utility/CommonTypes.c
@@ -1,48 +1,48 @@
-/////////////// FetchCommonType.proto /////////////// 
- 
-static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); 
- 
-/////////////// FetchCommonType /////////////// 
- 
-static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { 
-    PyObject* fake_module; 
-    PyTypeObject* cached_type = NULL; 
- 
-    fake_module = PyImport_AddModule((char*) "_cython_" CYTHON_ABI); 
-    if (!fake_module) return NULL; 
-    Py_INCREF(fake_module); 
- 
-    cached_type = (PyTypeObject*) PyObject_GetAttrString(fake_module, type->tp_name); 
-    if (cached_type) { 
-        if (!PyType_Check((PyObject*)cached_type)) { 
-            PyErr_Format(PyExc_TypeError, 
-                "Shared Cython type %.200s is not a type object", 
-                type->tp_name); 
-            goto bad; 
-        } 
-        if (cached_type->tp_basicsize != type->tp_basicsize) { 
-            PyErr_Format(PyExc_TypeError, 
-                "Shared Cython type %.200s has the wrong size, try recompiling", 
-                type->tp_name); 
-            goto bad; 
-        } 
-    } else { 
-        if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; 
-        PyErr_Clear(); 
-        if (PyType_Ready(type) < 0) goto bad; 
-        if (PyObject_SetAttrString(fake_module, type->tp_name, (PyObject*) type) < 0) 
-            goto bad; 
-        Py_INCREF(type); 
-        cached_type = type; 
-    } 
- 
-done: 
-    Py_DECREF(fake_module); 
-    // NOTE: always returns owned reference, or NULL on error 
-    return cached_type; 
- 
-bad: 
-    Py_XDECREF(cached_type); 
-    cached_type = NULL; 
-    goto done; 
-} 
+/////////////// FetchCommonType.proto ///////////////
+
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);
+
+/////////////// FetchCommonType ///////////////
+
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) {
+    PyObject* fake_module;
+    PyTypeObject* cached_type = NULL;
+
+    fake_module = PyImport_AddModule((char*) "_cython_" CYTHON_ABI);
+    if (!fake_module) return NULL;
+    Py_INCREF(fake_module);
+
+    cached_type = (PyTypeObject*) PyObject_GetAttrString(fake_module, type->tp_name);
+    if (cached_type) {
+        if (!PyType_Check((PyObject*)cached_type)) {
+            PyErr_Format(PyExc_TypeError,
+                "Shared Cython type %.200s is not a type object",
+                type->tp_name);
+            goto bad;
+        }
+        if (cached_type->tp_basicsize != type->tp_basicsize) {
+            PyErr_Format(PyExc_TypeError,
+                "Shared Cython type %.200s has the wrong size, try recompiling",
+                type->tp_name);
+            goto bad;
+        }
+    } else {
+        if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
+        PyErr_Clear();
+        if (PyType_Ready(type) < 0) goto bad;
+        if (PyObject_SetAttrString(fake_module, type->tp_name, (PyObject*) type) < 0)
+            goto bad;
+        Py_INCREF(type);
+        cached_type = type;
+    }
+
+done:
+    Py_DECREF(fake_module);
+    // NOTE: always returns owned reference, or NULL on error
+    return cached_type;
+
+bad:
+    Py_XDECREF(cached_type);
+    cached_type = NULL;
+    goto done;
+}
diff --git a/contrib/tools/cython/Cython/Utility/CppConvert.pyx b/contrib/tools/cython/Cython/Utility/CppConvert.pyx
index 7b6bdf9e65..5f7859dd0e 100644
--- a/contrib/tools/cython/Cython/Utility/CppConvert.pyx
+++ b/contrib/tools/cython/Cython/Utility/CppConvert.pyx
@@ -1,212 +1,212 @@
-# TODO: Figure out how many of the pass-by-value copies the compiler can eliminate. 
- 
- 
-#################### string.from_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass string "{{type}}": 
-        string() 
-        string(char* c_str, size_t size) 
+# TODO: Figure out how many of the pass-by-value copies the compiler can eliminate.
+
+
+#################### string.from_py ####################
+
+cdef extern from *:
+    cdef cppclass string "{{type}}":
+        string()
+        string(char* c_str, size_t size)
     cdef const char* __Pyx_PyObject_AsStringAndSize(object, Py_ssize_t*) except NULL
- 
-@cname("{{cname}}") 
-cdef string {{cname}}(object o) except *: 
+
+@cname("{{cname}}")
+cdef string {{cname}}(object o) except *:
     cdef Py_ssize_t length = 0
     cdef const char* data = __Pyx_PyObject_AsStringAndSize(o, &length)
-    return string(data, length) 
- 
- 
-#################### string.to_py #################### 
- 
-#cimport cython 
-#from libcpp.string cimport string 
-cdef extern from *: 
-    cdef cppclass string "{{type}}": 
-        char* data() 
-        size_t size() 
- 
+    return string(data, length)
+
+
+#################### string.to_py ####################
+
+#cimport cython
+#from libcpp.string cimport string
+cdef extern from *:
+    cdef cppclass string "{{type}}":
+        char* data()
+        size_t size()
+
 {{for py_type in ['PyObject', 'PyUnicode', 'PyStr', 'PyBytes', 'PyByteArray']}}
 cdef extern from *:
     cdef object __Pyx_{{py_type}}_FromStringAndSize(const char*, size_t)
- 
+
 @cname("{{cname.replace("PyObject", py_type, 1)}}")
 cdef inline object {{cname.replace("PyObject", py_type, 1)}}(const string& s):
     return __Pyx_{{py_type}}_FromStringAndSize(s.data(), s.size())
 {{endfor}}
- 
-
-#################### vector.from_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass vector "std::vector" [T]: 
-        void push_back(T&) 
- 
-@cname("{{cname}}") 
-cdef vector[X] {{cname}}(object o) except *: 
-    cdef vector[X] v 
-    for item in o: 
+
+
+#################### vector.from_py ####################
+
+cdef extern from *:
+    cdef cppclass vector "std::vector" [T]:
+        void push_back(T&)
+
+@cname("{{cname}}")
+cdef vector[X] {{cname}}(object o) except *:
+    cdef vector[X] v
+    for item in o:
         v.push_back(<X>item)
-    return v 
- 
- 
-#################### vector.to_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass vector "const std::vector" [T]: 
-        size_t size() 
-        T& operator[](size_t) 
- 
-@cname("{{cname}}") 
-cdef object {{cname}}(vector[X]& v): 
+    return v
+
+
+#################### vector.to_py ####################
+
+cdef extern from *:
+    cdef cppclass vector "const std::vector" [T]:
+        size_t size()
+        T& operator[](size_t)
+
+@cname("{{cname}}")
+cdef object {{cname}}(vector[X]& v):
     return [v[i] for i in range(v.size())]
- 
- 
-#################### list.from_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass cpp_list "std::list" [T]: 
-        void push_back(T&) 
- 
-@cname("{{cname}}") 
-cdef cpp_list[X] {{cname}}(object o) except *: 
-    cdef cpp_list[X] l 
-    for item in o: 
+
+
+#################### list.from_py ####################
+
+cdef extern from *:
+    cdef cppclass cpp_list "std::list" [T]:
+        void push_back(T&)
+
+@cname("{{cname}}")
+cdef cpp_list[X] {{cname}}(object o) except *:
+    cdef cpp_list[X] l
+    for item in o:
         l.push_back(<X>item)
-    return l 
- 
- 
-#################### list.to_py #################### 
- 
-cimport cython 
- 
-cdef extern from *: 
-    cdef cppclass cpp_list "std::list" [T]: 
-        cppclass const_iterator: 
-            T& operator*() 
-            const_iterator operator++() 
-            bint operator!=(const_iterator) 
-        const_iterator begin() 
-        const_iterator end() 
- 
-@cname("{{cname}}") 
-cdef object {{cname}}(const cpp_list[X]& v): 
-    o = [] 
-    cdef cpp_list[X].const_iterator iter = v.begin() 
-    while iter != v.end(): 
+    return l
+
+
+#################### list.to_py ####################
+
+cimport cython
+
+cdef extern from *:
+    cdef cppclass cpp_list "std::list" [T]:
+        cppclass const_iterator:
+            T& operator*()
+            const_iterator operator++()
+            bint operator!=(const_iterator)
+        const_iterator begin()
+        const_iterator end()
+
+@cname("{{cname}}")
+cdef object {{cname}}(const cpp_list[X]& v):
+    o = []
+    cdef cpp_list[X].const_iterator iter = v.begin()
+    while iter != v.end():
         o.append(cython.operator.dereference(iter))
-        cython.operator.preincrement(iter) 
-    return o 
- 
- 
-#################### set.from_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass set "std::{{maybe_unordered}}set" [T]: 
-        void insert(T&) 
- 
-@cname("{{cname}}") 
-cdef set[X] {{cname}}(object o) except *: 
-    cdef set[X] s 
-    for item in o: 
+        cython.operator.preincrement(iter)
+    return o
+
+
+#################### set.from_py ####################
+
+cdef extern from *:
+    cdef cppclass set "std::{{maybe_unordered}}set" [T]:
+        void insert(T&)
+
+@cname("{{cname}}")
+cdef set[X] {{cname}}(object o) except *:
+    cdef set[X] s
+    for item in o:
         s.insert(<X>item)
-    return s 
- 
- 
-#################### set.to_py #################### 
- 
-cimport cython 
- 
-cdef extern from *: 
-    cdef cppclass cpp_set "std::{{maybe_unordered}}set" [T]: 
-        cppclass const_iterator: 
-            T& operator*() 
-            const_iterator operator++() 
-            bint operator!=(const_iterator) 
-        const_iterator begin() 
-        const_iterator end() 
- 
-@cname("{{cname}}") 
-cdef object {{cname}}(const cpp_set[X]& s): 
-    o = set() 
-    cdef cpp_set[X].const_iterator iter = s.begin() 
-    while iter != s.end(): 
+    return s
+
+
+#################### set.to_py ####################
+
+cimport cython
+
+cdef extern from *:
+    cdef cppclass cpp_set "std::{{maybe_unordered}}set" [T]:
+        cppclass const_iterator:
+            T& operator*()
+            const_iterator operator++()
+            bint operator!=(const_iterator)
+        const_iterator begin()
+        const_iterator end()
+
+@cname("{{cname}}")
+cdef object {{cname}}(const cpp_set[X]& s):
+    o = set()
+    cdef cpp_set[X].const_iterator iter = s.begin()
+    while iter != s.end():
         o.add(cython.operator.dereference(iter))
-        cython.operator.preincrement(iter) 
-    return o 
- 
-#################### pair.from_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass pair "std::pair" [T, U]: 
-        pair() 
-        pair(T&, U&) 
- 
-@cname("{{cname}}") 
-cdef pair[X,Y] {{cname}}(object o) except *: 
-    x, y = o 
+        cython.operator.preincrement(iter)
+    return o
+
+#################### pair.from_py ####################
+
+cdef extern from *:
+    cdef cppclass pair "std::pair" [T, U]:
+        pair()
+        pair(T&, U&)
+
+@cname("{{cname}}")
+cdef pair[X,Y] {{cname}}(object o) except *:
+    x, y = o
     return pair[X,Y](<X>x, <Y>y)
- 
- 
-#################### pair.to_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass pair "std::pair" [T, U]: 
-        T first 
-        U second 
- 
-@cname("{{cname}}") 
-cdef object {{cname}}(const pair[X,Y]& p): 
+
+
+#################### pair.to_py ####################
+
+cdef extern from *:
+    cdef cppclass pair "std::pair" [T, U]:
+        T first
+        U second
+
+@cname("{{cname}}")
+cdef object {{cname}}(const pair[X,Y]& p):
     return p.first, p.second
- 
- 
-#################### map.from_py #################### 
- 
-cdef extern from *: 
-    cdef cppclass pair "std::pair" [T, U]: 
-        pair(T&, U&) 
-    cdef cppclass map "std::{{maybe_unordered}}map" [T, U]: 
-        void insert(pair[T, U]&) 
-    cdef cppclass vector "std::vector" [T]: 
-        pass 
- 
- 
-@cname("{{cname}}") 
-cdef map[X,Y] {{cname}}(object o) except *: 
-    cdef dict d = o 
-    cdef map[X,Y] m 
-    for key, value in d.iteritems(): 
+
+
+#################### map.from_py ####################
+
+cdef extern from *:
+    cdef cppclass pair "std::pair" [T, U]:
+        pair(T&, U&)
+    cdef cppclass map "std::{{maybe_unordered}}map" [T, U]:
+        void insert(pair[T, U]&)
+    cdef cppclass vector "std::vector" [T]:
+        pass
+
+
+@cname("{{cname}}")
+cdef map[X,Y] {{cname}}(object o) except *:
+    cdef dict d = o
+    cdef map[X,Y] m
+    for key, value in d.iteritems():
         m.insert(pair[X,Y](<X>key, <Y>value))
-    return m 
- 
- 
-#################### map.to_py #################### 
-# TODO: Work out const so that this can take a const 
-# reference rather than pass by value. 
- 
-cimport cython 
- 
-cdef extern from *: 
-    cdef cppclass map "std::{{maybe_unordered}}map" [T, U]: 
-        cppclass value_type: 
-            T first 
-            U second 
-        cppclass const_iterator: 
-            value_type& operator*() 
-            const_iterator operator++() 
-            bint operator!=(const_iterator) 
-        const_iterator begin() 
-        const_iterator end() 
- 
-@cname("{{cname}}") 
-cdef object {{cname}}(const map[X,Y]& s): 
-    o = {} 
-    cdef const map[X,Y].value_type *key_value 
-    cdef map[X,Y].const_iterator iter = s.begin() 
-    while iter != s.end(): 
-        key_value = &cython.operator.dereference(iter) 
+    return m
+
+
+#################### map.to_py ####################
+# TODO: Work out const so that this can take a const
+# reference rather than pass by value.
+
+cimport cython
+
+cdef extern from *:
+    cdef cppclass map "std::{{maybe_unordered}}map" [T, U]:
+        cppclass value_type:
+            T first
+            U second
+        cppclass const_iterator:
+            value_type& operator*()
+            const_iterator operator++()
+            bint operator!=(const_iterator)
+        const_iterator begin()
+        const_iterator end()
+
+@cname("{{cname}}")
+cdef object {{cname}}(const map[X,Y]& s):
+    o = {}
+    cdef const map[X,Y].value_type *key_value
+    cdef map[X,Y].const_iterator iter = s.begin()
+    while iter != s.end():
+        key_value = &cython.operator.dereference(iter)
         o[key_value.first] = key_value.second
-        cython.operator.preincrement(iter) 
-    return o 
+        cython.operator.preincrement(iter)
+    return o
 
 
 #################### complex.from_py ####################
diff --git a/contrib/tools/cython/Cython/Utility/CppSupport.cpp b/contrib/tools/cython/Cython/Utility/CppSupport.cpp
index b215b924b4..b8fcff0643 100644
--- a/contrib/tools/cython/Cython/Utility/CppSupport.cpp
+++ b/contrib/tools/cython/Cython/Utility/CppSupport.cpp
@@ -1,51 +1,51 @@
-/////////////// CppExceptionConversion.proto /////////////// 
- 
-#ifndef __Pyx_CppExn2PyErr 
-#include <new> 
-#include <typeinfo> 
-#include <stdexcept> 
-#include <ios> 
- 
-static void __Pyx_CppExn2PyErr() { 
-  // Catch a handful of different errors here and turn them into the 
-  // equivalent Python errors. 
-  try { 
-    if (PyErr_Occurred()) 
-      ; // let the latest Python exn pass through and ignore the current one 
-    else 
-      throw; 
-  } catch (const std::bad_alloc& exn) { 
-    PyErr_SetString(PyExc_MemoryError, exn.what()); 
-  } catch (const std::bad_cast& exn) { 
-    PyErr_SetString(PyExc_TypeError, exn.what()); 
+/////////////// CppExceptionConversion.proto ///////////////
+
+#ifndef __Pyx_CppExn2PyErr
+#include <new>
+#include <typeinfo>
+#include <stdexcept>
+#include <ios>
+
+static void __Pyx_CppExn2PyErr() {
+  // Catch a handful of different errors here and turn them into the
+  // equivalent Python errors.
+  try {
+    if (PyErr_Occurred())
+      ; // let the latest Python exn pass through and ignore the current one
+    else
+      throw;
+  } catch (const std::bad_alloc& exn) {
+    PyErr_SetString(PyExc_MemoryError, exn.what());
+  } catch (const std::bad_cast& exn) {
+    PyErr_SetString(PyExc_TypeError, exn.what());
   } catch (const std::bad_typeid& exn) {
     PyErr_SetString(PyExc_TypeError, exn.what());
-  } catch (const std::domain_error& exn) { 
-    PyErr_SetString(PyExc_ValueError, exn.what()); 
-  } catch (const std::invalid_argument& exn) { 
-    PyErr_SetString(PyExc_ValueError, exn.what()); 
-  } catch (const std::ios_base::failure& exn) { 
-    // Unfortunately, in standard C++ we have no way of distinguishing EOF 
-    // from other errors here; be careful with the exception mask 
-    PyErr_SetString(PyExc_IOError, exn.what()); 
-  } catch (const std::out_of_range& exn) { 
-    // Change out_of_range to IndexError 
-    PyErr_SetString(PyExc_IndexError, exn.what()); 
-  } catch (const std::overflow_error& exn) { 
-    PyErr_SetString(PyExc_OverflowError, exn.what()); 
-  } catch (const std::range_error& exn) { 
-    PyErr_SetString(PyExc_ArithmeticError, exn.what()); 
-  } catch (const std::underflow_error& exn) { 
-    PyErr_SetString(PyExc_ArithmeticError, exn.what()); 
-  } catch (const std::exception& exn) { 
-    PyErr_SetString(PyExc_RuntimeError, exn.what()); 
-  } 
-  catch (...) 
-  { 
-    PyErr_SetString(PyExc_RuntimeError, "Unknown exception"); 
-  } 
-} 
-#endif 
+  } catch (const std::domain_error& exn) {
+    PyErr_SetString(PyExc_ValueError, exn.what());
+  } catch (const std::invalid_argument& exn) {
+    PyErr_SetString(PyExc_ValueError, exn.what());
+  } catch (const std::ios_base::failure& exn) {
+    // Unfortunately, in standard C++ we have no way of distinguishing EOF
+    // from other errors here; be careful with the exception mask
+    PyErr_SetString(PyExc_IOError, exn.what());
+  } catch (const std::out_of_range& exn) {
+    // Change out_of_range to IndexError
+    PyErr_SetString(PyExc_IndexError, exn.what());
+  } catch (const std::overflow_error& exn) {
+    PyErr_SetString(PyExc_OverflowError, exn.what());
+  } catch (const std::range_error& exn) {
+    PyErr_SetString(PyExc_ArithmeticError, exn.what());
+  } catch (const std::underflow_error& exn) {
+    PyErr_SetString(PyExc_ArithmeticError, exn.what());
+  } catch (const std::exception& exn) {
+    PyErr_SetString(PyExc_RuntimeError, exn.what());
+  }
+  catch (...)
+  {
+    PyErr_SetString(PyExc_RuntimeError, "Unknown exception");
+  }
+}
+#endif
 
 /////////////// PythranConversion.proto ///////////////
 
diff --git a/contrib/tools/cython/Cython/Utility/CythonFunction.c b/contrib/tools/cython/Cython/Utility/CythonFunction.c
index d36cc6b8cf..d51b308a8d 100644
--- a/contrib/tools/cython/Cython/Utility/CythonFunction.c
+++ b/contrib/tools/cython/Cython/Utility/CythonFunction.c
@@ -1,265 +1,265 @@
- 
+
 //////////////////// CythonFunctionShared.proto ////////////////////
- 
-#define __Pyx_CyFunction_USED 1 
- 
-#define __Pyx_CYFUNCTION_STATICMETHOD  0x01 
-#define __Pyx_CYFUNCTION_CLASSMETHOD   0x02 
-#define __Pyx_CYFUNCTION_CCLASS        0x04 
- 
-#define __Pyx_CyFunction_GetClosure(f) \ 
-    (((__pyx_CyFunctionObject *) (f))->func_closure) 
-#define __Pyx_CyFunction_GetClassObj(f) \ 
-    (((__pyx_CyFunctionObject *) (f))->func_classobj) 
- 
-#define __Pyx_CyFunction_Defaults(type, f) \ 
-    ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) 
-#define __Pyx_CyFunction_SetDefaultsGetter(f, g) \ 
-    ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) 
- 
- 
-typedef struct { 
-    PyCFunctionObject func; 
-#if PY_VERSION_HEX < 0x030500A0 
-    PyObject *func_weakreflist; 
-#endif 
-    PyObject *func_dict; 
-    PyObject *func_name; 
-    PyObject *func_qualname; 
-    PyObject *func_doc; 
-    PyObject *func_globals; 
-    PyObject *func_code; 
-    PyObject *func_closure; 
-    // No-args super() class cell 
-    PyObject *func_classobj; 
- 
-    // Dynamic default args and annotations 
-    void *defaults; 
-    int defaults_pyobjects; 
+
+#define __Pyx_CyFunction_USED 1
+
+#define __Pyx_CYFUNCTION_STATICMETHOD  0x01
+#define __Pyx_CYFUNCTION_CLASSMETHOD   0x02
+#define __Pyx_CYFUNCTION_CCLASS        0x04
+
+#define __Pyx_CyFunction_GetClosure(f) \
+    (((__pyx_CyFunctionObject *) (f))->func_closure)
+#define __Pyx_CyFunction_GetClassObj(f) \
+    (((__pyx_CyFunctionObject *) (f))->func_classobj)
+
+#define __Pyx_CyFunction_Defaults(type, f) \
+    ((type *)(((__pyx_CyFunctionObject *) (f))->defaults))
+#define __Pyx_CyFunction_SetDefaultsGetter(f, g) \
+    ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g)
+
+
+typedef struct {
+    PyCFunctionObject func;
+#if PY_VERSION_HEX < 0x030500A0
+    PyObject *func_weakreflist;
+#endif
+    PyObject *func_dict;
+    PyObject *func_name;
+    PyObject *func_qualname;
+    PyObject *func_doc;
+    PyObject *func_globals;
+    PyObject *func_code;
+    PyObject *func_closure;
+    // No-args super() class cell
+    PyObject *func_classobj;
+
+    // Dynamic default args and annotations
+    void *defaults;
+    int defaults_pyobjects;
     size_t defaults_size;  // used by FusedFunction for copying defaults
-    int flags; 
- 
-    // Defaults info 
-    PyObject *defaults_tuple;   /* Const defaults tuple */ 
-    PyObject *defaults_kwdict;  /* Const kwonly defaults dict */ 
-    PyObject *(*defaults_getter)(PyObject *); 
-    PyObject *func_annotations; /* function annotations dict */ 
-} __pyx_CyFunctionObject; 
- 
-static PyTypeObject *__pyx_CyFunctionType = 0; 
- 
+    int flags;
+
+    // Defaults info
+    PyObject *defaults_tuple;   /* Const defaults tuple */
+    PyObject *defaults_kwdict;  /* Const kwonly defaults dict */
+    PyObject *(*defaults_getter)(PyObject *);
+    PyObject *func_annotations; /* function annotations dict */
+} __pyx_CyFunctionObject;
+
+static PyTypeObject *__pyx_CyFunctionType = 0;
+
 #define __Pyx_CyFunction_Check(obj)  (__Pyx_TypeCheck(obj, __pyx_CyFunctionType))
 
 static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
-                                      int flags, PyObject* qualname, 
-                                      PyObject *self, 
-                                      PyObject *module, PyObject *globals, 
-                                      PyObject* code); 
- 
-static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, 
-                                                         size_t size, 
-                                                         int pyobjects); 
-static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, 
-                                                            PyObject *tuple); 
-static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, 
-                                                             PyObject *dict); 
-static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, 
-                                                              PyObject *dict); 
- 
- 
+                                      int flags, PyObject* qualname,
+                                      PyObject *self,
+                                      PyObject *module, PyObject *globals,
+                                      PyObject* code);
+
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m,
+                                                         size_t size,
+                                                         int pyobjects);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m,
+                                                            PyObject *tuple);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m,
+                                                             PyObject *dict);
+static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
+                                                              PyObject *dict);
+
+
 static int __pyx_CyFunction_init(void);
- 
+
 
 //////////////////// CythonFunctionShared ////////////////////
-//@substitute: naming 
+//@substitute: naming
 //@requires: CommonStructures.c::FetchCommonType
-////@requires: ObjectHandling.c::PyObjectGetAttrStr 
- 
+////@requires: ObjectHandling.c::PyObjectGetAttrStr
+
 #include <structmember.h>
 
-static PyObject * 
-__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure) 
-{ 
-    if (unlikely(op->func_doc == NULL)) { 
-        if (op->func.m_ml->ml_doc) { 
-#if PY_MAJOR_VERSION >= 3 
-            op->func_doc = PyUnicode_FromString(op->func.m_ml->ml_doc); 
-#else 
-            op->func_doc = PyString_FromString(op->func.m_ml->ml_doc); 
-#endif 
-            if (unlikely(op->func_doc == NULL)) 
-                return NULL; 
-        } else { 
-            Py_INCREF(Py_None); 
-            return Py_None; 
-        } 
-    } 
-    Py_INCREF(op->func_doc); 
-    return op->func_doc; 
-} 
- 
-static int 
+static PyObject *
+__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure)
+{
+    if (unlikely(op->func_doc == NULL)) {
+        if (op->func.m_ml->ml_doc) {
+#if PY_MAJOR_VERSION >= 3
+            op->func_doc = PyUnicode_FromString(op->func.m_ml->ml_doc);
+#else
+            op->func_doc = PyString_FromString(op->func.m_ml->ml_doc);
+#endif
+            if (unlikely(op->func_doc == NULL))
+                return NULL;
+        } else {
+            Py_INCREF(Py_None);
+            return Py_None;
+        }
+    }
+    Py_INCREF(op->func_doc);
+    return op->func_doc;
+}
+
+static int
 __Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
-{ 
-    PyObject *tmp = op->func_doc; 
-    if (value == NULL) { 
-        // Mark as deleted 
-        value = Py_None; 
-    } 
-    Py_INCREF(value); 
-    op->func_doc = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
+{
+    PyObject *tmp = op->func_doc;
+    if (value == NULL) {
+        // Mark as deleted
+        value = Py_None;
+    }
+    Py_INCREF(value);
+    op->func_doc = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
-{ 
-    if (unlikely(op->func_name == NULL)) { 
-#if PY_MAJOR_VERSION >= 3 
-        op->func_name = PyUnicode_InternFromString(op->func.m_ml->ml_name); 
-#else 
-        op->func_name = PyString_InternFromString(op->func.m_ml->ml_name); 
-#endif 
-        if (unlikely(op->func_name == NULL)) 
-            return NULL; 
-    } 
-    Py_INCREF(op->func_name); 
-    return op->func_name; 
-} 
- 
-static int 
+{
+    if (unlikely(op->func_name == NULL)) {
+#if PY_MAJOR_VERSION >= 3
+        op->func_name = PyUnicode_InternFromString(op->func.m_ml->ml_name);
+#else
+        op->func_name = PyString_InternFromString(op->func.m_ml->ml_name);
+#endif
+        if (unlikely(op->func_name == NULL))
+            return NULL;
+    }
+    Py_INCREF(op->func_name);
+    return op->func_name;
+}
+
+static int
 __Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
-{ 
-    PyObject *tmp; 
- 
-#if PY_MAJOR_VERSION >= 3 
+{
+    PyObject *tmp;
+
+#if PY_MAJOR_VERSION >= 3
     if (unlikely(value == NULL || !PyUnicode_Check(value)))
-#else 
+#else
     if (unlikely(value == NULL || !PyString_Check(value)))
-#endif 
+#endif
     {
-        PyErr_SetString(PyExc_TypeError, 
-                        "__name__ must be set to a string object"); 
-        return -1; 
-    } 
-    tmp = op->func_name; 
-    Py_INCREF(value); 
-    op->func_name = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
+        PyErr_SetString(PyExc_TypeError,
+                        "__name__ must be set to a string object");
+        return -1;
+    }
+    tmp = op->func_name;
+    Py_INCREF(value);
+    op->func_name = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
-{ 
-    Py_INCREF(op->func_qualname); 
-    return op->func_qualname; 
-} 
- 
-static int 
+{
+    Py_INCREF(op->func_qualname);
+    return op->func_qualname;
+}
+
+static int
 __Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
-{ 
-    PyObject *tmp; 
- 
-#if PY_MAJOR_VERSION >= 3 
+{
+    PyObject *tmp;
+
+#if PY_MAJOR_VERSION >= 3
     if (unlikely(value == NULL || !PyUnicode_Check(value)))
-#else 
+#else
     if (unlikely(value == NULL || !PyString_Check(value)))
-#endif 
+#endif
     {
-        PyErr_SetString(PyExc_TypeError, 
-                        "__qualname__ must be set to a string object"); 
-        return -1; 
-    } 
-    tmp = op->func_qualname; 
-    Py_INCREF(value); 
-    op->func_qualname = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
-__Pyx_CyFunction_get_self(__pyx_CyFunctionObject *m, CYTHON_UNUSED void *closure) 
-{ 
-    PyObject *self; 
- 
-    self = m->func_closure; 
-    if (self == NULL) 
-        self = Py_None; 
-    Py_INCREF(self); 
-    return self; 
-} 
- 
-static PyObject * 
+        PyErr_SetString(PyExc_TypeError,
+                        "__qualname__ must be set to a string object");
+        return -1;
+    }
+    tmp = op->func_qualname;
+    Py_INCREF(value);
+    op->func_qualname = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
+__Pyx_CyFunction_get_self(__pyx_CyFunctionObject *m, CYTHON_UNUSED void *closure)
+{
+    PyObject *self;
+
+    self = m->func_closure;
+    if (self == NULL)
+        self = Py_None;
+    Py_INCREF(self);
+    return self;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
-{ 
-    if (unlikely(op->func_dict == NULL)) { 
-        op->func_dict = PyDict_New(); 
-        if (unlikely(op->func_dict == NULL)) 
-            return NULL; 
-    } 
-    Py_INCREF(op->func_dict); 
-    return op->func_dict; 
-} 
- 
-static int 
+{
+    if (unlikely(op->func_dict == NULL)) {
+        op->func_dict = PyDict_New();
+        if (unlikely(op->func_dict == NULL))
+            return NULL;
+    }
+    Py_INCREF(op->func_dict);
+    return op->func_dict;
+}
+
+static int
 __Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
-{ 
-    PyObject *tmp; 
- 
-    if (unlikely(value == NULL)) { 
-        PyErr_SetString(PyExc_TypeError, 
-               "function's dictionary may not be deleted"); 
-        return -1; 
-    } 
-    if (unlikely(!PyDict_Check(value))) { 
-        PyErr_SetString(PyExc_TypeError, 
-               "setting function's dictionary to a non-dict"); 
-        return -1; 
-    } 
-    tmp = op->func_dict; 
-    Py_INCREF(value); 
-    op->func_dict = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
+{
+    PyObject *tmp;
+
+    if (unlikely(value == NULL)) {
+        PyErr_SetString(PyExc_TypeError,
+               "function's dictionary may not be deleted");
+        return -1;
+    }
+    if (unlikely(!PyDict_Check(value))) {
+        PyErr_SetString(PyExc_TypeError,
+               "setting function's dictionary to a non-dict");
+        return -1;
+    }
+    tmp = op->func_dict;
+    Py_INCREF(value);
+    op->func_dict = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
-{ 
-    Py_INCREF(op->func_globals); 
-    return op->func_globals; 
-} 
- 
-static PyObject * 
+{
+    Py_INCREF(op->func_globals);
+    return op->func_globals;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_closure(CYTHON_UNUSED __pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
-{ 
-    Py_INCREF(Py_None); 
-    return Py_None; 
-} 
- 
-static PyObject * 
+{
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
-{ 
-    PyObject* result = (op->func_code) ? op->func_code : Py_None; 
-    Py_INCREF(result); 
-    return result; 
-} 
- 
-static int 
-__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { 
+{
+    PyObject* result = (op->func_code) ? op->func_code : Py_None;
+    Py_INCREF(result);
+    return result;
+}
+
+static int
+__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) {
     int result = 0;
-    PyObject *res = op->defaults_getter((PyObject *) op); 
-    if (unlikely(!res)) 
-        return -1; 
- 
-    // Cache result 
+    PyObject *res = op->defaults_getter((PyObject *) op);
+    if (unlikely(!res))
+        return -1;
+
+    // Cache result
     #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-    op->defaults_tuple = PyTuple_GET_ITEM(res, 0); 
-    Py_INCREF(op->defaults_tuple); 
-    op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); 
-    Py_INCREF(op->defaults_kwdict); 
+    op->defaults_tuple = PyTuple_GET_ITEM(res, 0);
+    Py_INCREF(op->defaults_tuple);
+    op->defaults_kwdict = PyTuple_GET_ITEM(res, 1);
+    Py_INCREF(op->defaults_kwdict);
     #else
     op->defaults_tuple = PySequence_ITEM(res, 0);
     if (unlikely(!op->defaults_tuple)) result = -1;
@@ -268,254 +268,254 @@ __Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) {
         if (unlikely(!op->defaults_kwdict)) result = -1;
     }
     #endif
-    Py_DECREF(res); 
+    Py_DECREF(res);
     return result;
-} 
- 
-static int 
+}
+
+static int
 __Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, CYTHON_UNUSED void *context) {
-    PyObject* tmp; 
-    if (!value) { 
-        // del => explicit None to prevent rebuilding 
-        value = Py_None; 
-    } else if (value != Py_None && !PyTuple_Check(value)) { 
-        PyErr_SetString(PyExc_TypeError, 
-                        "__defaults__ must be set to a tuple object"); 
-        return -1; 
-    } 
-    Py_INCREF(value); 
-    tmp = op->defaults_tuple; 
-    op->defaults_tuple = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
+    PyObject* tmp;
+    if (!value) {
+        // del => explicit None to prevent rebuilding
+        value = Py_None;
+    } else if (value != Py_None && !PyTuple_Check(value)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "__defaults__ must be set to a tuple object");
+        return -1;
+    }
+    Py_INCREF(value);
+    tmp = op->defaults_tuple;
+    op->defaults_tuple = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
-    PyObject* result = op->defaults_tuple; 
-    if (unlikely(!result)) { 
-        if (op->defaults_getter) { 
-            if (__Pyx_CyFunction_init_defaults(op) < 0) return NULL; 
-            result = op->defaults_tuple; 
-        } else { 
-            result = Py_None; 
-        } 
-    } 
-    Py_INCREF(result); 
-    return result; 
-} 
- 
-static int 
+    PyObject* result = op->defaults_tuple;
+    if (unlikely(!result)) {
+        if (op->defaults_getter) {
+            if (__Pyx_CyFunction_init_defaults(op) < 0) return NULL;
+            result = op->defaults_tuple;
+        } else {
+            result = Py_None;
+        }
+    }
+    Py_INCREF(result);
+    return result;
+}
+
+static int
 __Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, CYTHON_UNUSED void *context) {
-    PyObject* tmp; 
-    if (!value) { 
-        // del => explicit None to prevent rebuilding 
-        value = Py_None; 
-    } else if (value != Py_None && !PyDict_Check(value)) { 
-        PyErr_SetString(PyExc_TypeError, 
-                        "__kwdefaults__ must be set to a dict object"); 
-        return -1; 
-    } 
-    Py_INCREF(value); 
-    tmp = op->defaults_kwdict; 
-    op->defaults_kwdict = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
+    PyObject* tmp;
+    if (!value) {
+        // del => explicit None to prevent rebuilding
+        value = Py_None;
+    } else if (value != Py_None && !PyDict_Check(value)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "__kwdefaults__ must be set to a dict object");
+        return -1;
+    }
+    Py_INCREF(value);
+    tmp = op->defaults_kwdict;
+    op->defaults_kwdict = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
-    PyObject* result = op->defaults_kwdict; 
-    if (unlikely(!result)) { 
-        if (op->defaults_getter) { 
-            if (__Pyx_CyFunction_init_defaults(op) < 0) return NULL; 
-            result = op->defaults_kwdict; 
-        } else { 
-            result = Py_None; 
-        } 
-    } 
-    Py_INCREF(result); 
-    return result; 
-} 
- 
-static int 
+    PyObject* result = op->defaults_kwdict;
+    if (unlikely(!result)) {
+        if (op->defaults_getter) {
+            if (__Pyx_CyFunction_init_defaults(op) < 0) return NULL;
+            result = op->defaults_kwdict;
+        } else {
+            result = Py_None;
+        }
+    }
+    Py_INCREF(result);
+    return result;
+}
+
+static int
 __Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, CYTHON_UNUSED void *context) {
-    PyObject* tmp; 
-    if (!value || value == Py_None) { 
-        value = NULL; 
-    } else if (!PyDict_Check(value)) { 
-        PyErr_SetString(PyExc_TypeError, 
-                        "__annotations__ must be set to a dict object"); 
-        return -1; 
-    } 
-    Py_XINCREF(value); 
-    tmp = op->func_annotations; 
-    op->func_annotations = value; 
-    Py_XDECREF(tmp); 
-    return 0; 
-} 
- 
-static PyObject * 
+    PyObject* tmp;
+    if (!value || value == Py_None) {
+        value = NULL;
+    } else if (!PyDict_Check(value)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "__annotations__ must be set to a dict object");
+        return -1;
+    }
+    Py_XINCREF(value);
+    tmp = op->func_annotations;
+    op->func_annotations = value;
+    Py_XDECREF(tmp);
+    return 0;
+}
+
+static PyObject *
 __Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
-    PyObject* result = op->func_annotations; 
-    if (unlikely(!result)) { 
-        result = PyDict_New(); 
-        if (unlikely(!result)) return NULL; 
-        op->func_annotations = result; 
-    } 
-    Py_INCREF(result); 
-    return result; 
-} 
- 
-//#if PY_VERSION_HEX >= 0x030400C1 
-//static PyObject * 
+    PyObject* result = op->func_annotations;
+    if (unlikely(!result)) {
+        result = PyDict_New();
+        if (unlikely(!result)) return NULL;
+        op->func_annotations = result;
+    }
+    Py_INCREF(result);
+    return result;
+}
+
+//#if PY_VERSION_HEX >= 0x030400C1
+//static PyObject *
 //__Pyx_CyFunction_get_signature(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
-//    PyObject *inspect_module, *signature_class, *signature; 
-//    // from inspect import Signature 
-//    inspect_module = PyImport_ImportModuleLevelObject(PYIDENT("inspect"), NULL, NULL, NULL, 0); 
-//    if (unlikely(!inspect_module)) 
-//        goto bad; 
-//    signature_class = __Pyx_PyObject_GetAttrStr(inspect_module, PYIDENT("Signature")); 
-//    Py_DECREF(inspect_module); 
-//    if (unlikely(!signature_class)) 
-//        goto bad; 
-//    // return Signature.from_function(op) 
-//    signature = PyObject_CallMethodObjArgs(signature_class, PYIDENT("from_function"), op, NULL); 
-//    Py_DECREF(signature_class); 
-//    if (likely(signature)) 
-//        return signature; 
-//bad: 
-//    // make sure we raise an AttributeError from this property on any errors 
-//    if (!PyErr_ExceptionMatches(PyExc_AttributeError)) 
-//        PyErr_SetString(PyExc_AttributeError, "failed to calculate __signature__"); 
-//    return NULL; 
-//} 
-//#endif 
- 
-static PyGetSetDef __pyx_CyFunction_getsets[] = { 
-    {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, 
-    {(char *) "__doc__",  (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, 
-    {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, 
-    {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, 
-    {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, 
-    {(char *) "__self__", (getter)__Pyx_CyFunction_get_self, 0, 0, 0}, 
-    {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, 
-    {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, 
-    {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, 
-    {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, 
-    {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, 
-    {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, 
-    {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, 
-    {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, 
-    {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, 
-    {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, 
-    {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, 
-    {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, 
-//#if PY_VERSION_HEX >= 0x030400C1 
-//    {(char *) "__signature__", (getter)__Pyx_CyFunction_get_signature, 0, 0, 0}, 
-//#endif 
-    {0, 0, 0, 0, 0} 
-}; 
- 
-static PyMemberDef __pyx_CyFunction_members[] = { 
+//    PyObject *inspect_module, *signature_class, *signature;
+//    // from inspect import Signature
+//    inspect_module = PyImport_ImportModuleLevelObject(PYIDENT("inspect"), NULL, NULL, NULL, 0);
+//    if (unlikely(!inspect_module))
+//        goto bad;
+//    signature_class = __Pyx_PyObject_GetAttrStr(inspect_module, PYIDENT("Signature"));
+//    Py_DECREF(inspect_module);
+//    if (unlikely(!signature_class))
+//        goto bad;
+//    // return Signature.from_function(op)
+//    signature = PyObject_CallMethodObjArgs(signature_class, PYIDENT("from_function"), op, NULL);
+//    Py_DECREF(signature_class);
+//    if (likely(signature))
+//        return signature;
+//bad:
+//    // make sure we raise an AttributeError from this property on any errors
+//    if (!PyErr_ExceptionMatches(PyExc_AttributeError))
+//        PyErr_SetString(PyExc_AttributeError, "failed to calculate __signature__");
+//    return NULL;
+//}
+//#endif
+
+static PyGetSetDef __pyx_CyFunction_getsets[] = {
+    {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+    {(char *) "__doc__",  (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+    {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+    {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+    {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0},
+    {(char *) "__self__", (getter)__Pyx_CyFunction_get_self, 0, 0, 0},
+    {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+    {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+    {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+    {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+    {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+    {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+    {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+    {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+    {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
+    {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
+    {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0},
+    {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0},
+//#if PY_VERSION_HEX >= 0x030400C1
+//    {(char *) "__signature__", (getter)__Pyx_CyFunction_get_signature, 0, 0, 0},
+//#endif
+    {0, 0, 0, 0, 0}
+};
+
+static PyMemberDef __pyx_CyFunction_members[] = {
     {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), PY_WRITE_RESTRICTED, 0},
-    {0, 0, 0,  0, 0} 
-}; 
- 
-static PyObject * 
-__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, CYTHON_UNUSED PyObject *args) 
-{ 
-#if PY_MAJOR_VERSION >= 3 
+    {0, 0, 0,  0, 0}
+};
+
+static PyObject *
+__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, CYTHON_UNUSED PyObject *args)
+{
+#if PY_MAJOR_VERSION >= 3
     Py_INCREF(m->func_qualname);
     return m->func_qualname;
-#else 
-    return PyString_FromString(m->func.m_ml->ml_name); 
-#endif 
-} 
- 
-static PyMethodDef __pyx_CyFunction_methods[] = { 
-    {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, 
-    {0, 0, 0, 0} 
-}; 
- 
- 
-#if PY_VERSION_HEX < 0x030500A0 
-#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) 
-#else 
-#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func.m_weakreflist) 
-#endif 
- 
+#else
+    return PyString_FromString(m->func.m_ml->ml_name);
+#endif
+}
+
+static PyMethodDef __pyx_CyFunction_methods[] = {
+    {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0},
+    {0, 0, 0, 0}
+};
+
+
+#if PY_VERSION_HEX < 0x030500A0
+#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist)
+#else
+#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func.m_weakreflist)
+#endif
+
 static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
                                        PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
     if (unlikely(op == NULL))
-        return NULL; 
-    op->flags = flags; 
-    __Pyx_CyFunction_weakreflist(op) = NULL; 
-    op->func.m_ml = ml; 
-    op->func.m_self = (PyObject *) op; 
-    Py_XINCREF(closure); 
-    op->func_closure = closure; 
-    Py_XINCREF(module); 
-    op->func.m_module = module; 
-    op->func_dict = NULL; 
-    op->func_name = NULL; 
-    Py_INCREF(qualname); 
-    op->func_qualname = qualname; 
-    op->func_doc = NULL; 
-    op->func_classobj = NULL; 
-    op->func_globals = globals; 
-    Py_INCREF(op->func_globals); 
-    Py_XINCREF(code); 
-    op->func_code = code; 
-    // Dynamic Default args 
-    op->defaults_pyobjects = 0; 
+        return NULL;
+    op->flags = flags;
+    __Pyx_CyFunction_weakreflist(op) = NULL;
+    op->func.m_ml = ml;
+    op->func.m_self = (PyObject *) op;
+    Py_XINCREF(closure);
+    op->func_closure = closure;
+    Py_XINCREF(module);
+    op->func.m_module = module;
+    op->func_dict = NULL;
+    op->func_name = NULL;
+    Py_INCREF(qualname);
+    op->func_qualname = qualname;
+    op->func_doc = NULL;
+    op->func_classobj = NULL;
+    op->func_globals = globals;
+    Py_INCREF(op->func_globals);
+    Py_XINCREF(code);
+    op->func_code = code;
+    // Dynamic Default args
+    op->defaults_pyobjects = 0;
     op->defaults_size = 0;
-    op->defaults = NULL; 
-    op->defaults_tuple = NULL; 
-    op->defaults_kwdict = NULL; 
-    op->defaults_getter = NULL; 
-    op->func_annotations = NULL; 
-    return (PyObject *) op; 
-} 
- 
-static int 
-__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) 
-{ 
-    Py_CLEAR(m->func_closure); 
-    Py_CLEAR(m->func.m_module); 
-    Py_CLEAR(m->func_dict); 
-    Py_CLEAR(m->func_name); 
-    Py_CLEAR(m->func_qualname); 
-    Py_CLEAR(m->func_doc); 
-    Py_CLEAR(m->func_globals); 
-    Py_CLEAR(m->func_code); 
-    Py_CLEAR(m->func_classobj); 
-    Py_CLEAR(m->defaults_tuple); 
-    Py_CLEAR(m->defaults_kwdict); 
-    Py_CLEAR(m->func_annotations); 
- 
-    if (m->defaults) { 
-        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); 
-        int i; 
- 
-        for (i = 0; i < m->defaults_pyobjects; i++) 
-            Py_XDECREF(pydefaults[i]); 
- 
+    op->defaults = NULL;
+    op->defaults_tuple = NULL;
+    op->defaults_kwdict = NULL;
+    op->defaults_getter = NULL;
+    op->func_annotations = NULL;
+    return (PyObject *) op;
+}
+
+static int
+__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m)
+{
+    Py_CLEAR(m->func_closure);
+    Py_CLEAR(m->func.m_module);
+    Py_CLEAR(m->func_dict);
+    Py_CLEAR(m->func_name);
+    Py_CLEAR(m->func_qualname);
+    Py_CLEAR(m->func_doc);
+    Py_CLEAR(m->func_globals);
+    Py_CLEAR(m->func_code);
+    Py_CLEAR(m->func_classobj);
+    Py_CLEAR(m->defaults_tuple);
+    Py_CLEAR(m->defaults_kwdict);
+    Py_CLEAR(m->func_annotations);
+
+    if (m->defaults) {
+        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+        int i;
+
+        for (i = 0; i < m->defaults_pyobjects; i++)
+            Py_XDECREF(pydefaults[i]);
+
         PyObject_Free(m->defaults);
-        m->defaults = NULL; 
-    } 
- 
-    return 0; 
-} 
- 
+        m->defaults = NULL;
+    }
+
+    return 0;
+}
+
 static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m)
-{ 
-    if (__Pyx_CyFunction_weakreflist(m) != NULL) 
-        PyObject_ClearWeakRefs((PyObject *) m); 
-    __Pyx_CyFunction_clear(m); 
-    PyObject_GC_Del(m); 
-} 
- 
+{
+    if (__Pyx_CyFunction_weakreflist(m) != NULL)
+        PyObject_ClearWeakRefs((PyObject *) m);
+    __Pyx_CyFunction_clear(m);
+    PyObject_GC_Del(m);
+}
+
 static void __Pyx_CyFunction_dealloc(PyObject *obj)
 {
     __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) obj;
@@ -523,92 +523,92 @@ static void __Pyx_CyFunction_dealloc(PyObject *obj)
     __Pyx__CyFunction_dealloc(m);
 }
 
-static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) 
-{ 
-    Py_VISIT(m->func_closure); 
-    Py_VISIT(m->func.m_module); 
-    Py_VISIT(m->func_dict); 
-    Py_VISIT(m->func_name); 
-    Py_VISIT(m->func_qualname); 
-    Py_VISIT(m->func_doc); 
-    Py_VISIT(m->func_globals); 
-    Py_VISIT(m->func_code); 
-    Py_VISIT(m->func_classobj); 
-    Py_VISIT(m->defaults_tuple); 
-    Py_VISIT(m->defaults_kwdict); 
- 
-    if (m->defaults) { 
-        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); 
-        int i; 
- 
-        for (i = 0; i < m->defaults_pyobjects; i++) 
-            Py_VISIT(pydefaults[i]); 
-    } 
- 
-    return 0; 
-} 
- 
-static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObject *type) 
-{ 
+static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg)
+{
+    Py_VISIT(m->func_closure);
+    Py_VISIT(m->func.m_module);
+    Py_VISIT(m->func_dict);
+    Py_VISIT(m->func_name);
+    Py_VISIT(m->func_qualname);
+    Py_VISIT(m->func_doc);
+    Py_VISIT(m->func_globals);
+    Py_VISIT(m->func_code);
+    Py_VISIT(m->func_classobj);
+    Py_VISIT(m->defaults_tuple);
+    Py_VISIT(m->defaults_kwdict);
+
+    if (m->defaults) {
+        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+        int i;
+
+        for (i = 0; i < m->defaults_pyobjects; i++)
+            Py_VISIT(pydefaults[i]);
+    }
+
+    return 0;
+}
+
+static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObject *type)
+{
 #if PY_MAJOR_VERSION < 3
-    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; 
- 
-    if (m->flags & __Pyx_CYFUNCTION_STATICMETHOD) { 
-        Py_INCREF(func); 
-        return func; 
-    } 
- 
-    if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) { 
-        if (type == NULL) 
-            type = (PyObject *)(Py_TYPE(obj)); 
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+
+    if (m->flags & __Pyx_CYFUNCTION_STATICMETHOD) {
+        Py_INCREF(func);
+        return func;
+    }
+
+    if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) {
+        if (type == NULL)
+            type = (PyObject *)(Py_TYPE(obj));
         return __Pyx_PyMethod_New(func, type, (PyObject *)(Py_TYPE(type)));
-    } 
- 
-    if (obj == Py_None) 
-        obj = NULL; 
+    }
+
+    if (obj == Py_None)
+        obj = NULL;
 #endif
     return __Pyx_PyMethod_New(func, obj, type);
-} 
- 
-static PyObject* 
-__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) 
-{ 
-#if PY_MAJOR_VERSION >= 3 
-    return PyUnicode_FromFormat("<cyfunction %U at %p>", 
-                                op->func_qualname, (void *)op); 
-#else 
-    return PyString_FromFormat("<cyfunction %s at %p>", 
-                               PyString_AsString(op->func_qualname), (void *)op); 
-#endif 
-} 
- 
+}
+
+static PyObject*
+__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op)
+{
+#if PY_MAJOR_VERSION >= 3
+    return PyUnicode_FromFormat("<cyfunction %U at %p>",
+                                op->func_qualname, (void *)op);
+#else
+    return PyString_FromFormat("<cyfunction %s at %p>",
+                               PyString_AsString(op->func_qualname), (void *)op);
+#endif
+}
+
 static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) {
     // originally copied from PyCFunction_Call() in CPython's Objects/methodobject.c
-    PyCFunctionObject* f = (PyCFunctionObject*)func; 
+    PyCFunctionObject* f = (PyCFunctionObject*)func;
     PyCFunction meth = f->m_ml->ml_meth;
-    Py_ssize_t size; 
- 
+    Py_ssize_t size;
+
     switch (f->m_ml->ml_flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) {
-    case METH_VARARGS: 
+    case METH_VARARGS:
         if (likely(kw == NULL || PyDict_Size(kw) == 0))
-            return (*meth)(self, arg); 
-        break; 
-    case METH_VARARGS | METH_KEYWORDS: 
+            return (*meth)(self, arg);
+        break;
+    case METH_VARARGS | METH_KEYWORDS:
         return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw);
-    case METH_NOARGS: 
+    case METH_NOARGS:
         if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
-            size = PyTuple_GET_SIZE(arg); 
+            size = PyTuple_GET_SIZE(arg);
             if (likely(size == 0))
-                return (*meth)(self, NULL); 
-            PyErr_Format(PyExc_TypeError, 
+                return (*meth)(self, NULL);
+            PyErr_Format(PyExc_TypeError,
                 "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
-                f->m_ml->ml_name, size); 
-            return NULL; 
-        } 
-        break; 
-    case METH_O: 
+                f->m_ml->ml_name, size);
+            return NULL;
+        }
+        break;
+    case METH_O:
         if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
-            size = PyTuple_GET_SIZE(arg); 
+            size = PyTuple_GET_SIZE(arg);
             if (likely(size == 1)) {
                 PyObject *result, *arg0;
                 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
@@ -622,28 +622,28 @@ static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, Py
                 #endif
                 return result;
             }
-            PyErr_Format(PyExc_TypeError, 
+            PyErr_Format(PyExc_TypeError,
                 "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
-                f->m_ml->ml_name, size); 
-            return NULL; 
-        } 
-        break; 
-    default: 
-        PyErr_SetString(PyExc_SystemError, "Bad call flags in " 
-                        "__Pyx_CyFunction_Call. METH_OLDARGS is no " 
-                        "longer supported!"); 
- 
-        return NULL; 
-    } 
-    PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", 
-                 f->m_ml->ml_name); 
-    return NULL; 
-} 
+                f->m_ml->ml_name, size);
+            return NULL;
+        }
+        break;
+    default:
+        PyErr_SetString(PyExc_SystemError, "Bad call flags in "
+                        "__Pyx_CyFunction_Call. METH_OLDARGS is no "
+                        "longer supported!");
+
+        return NULL;
+    }
+    PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
+                 f->m_ml->ml_name);
+    return NULL;
+}
 
 static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) {
     return __Pyx_CyFunction_CallMethod(func, ((PyCFunctionObject*)func)->m_self, arg, kw);
-} 
- 
+}
+
 static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) {
     PyObject *result;
     __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func;
@@ -672,65 +672,65 @@ static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, P
     return result;
 }
 
-static PyTypeObject __pyx_CyFunctionType_type = { 
-    PyVarObject_HEAD_INIT(0, 0) 
-    "cython_function_or_method",      /*tp_name*/ 
-    sizeof(__pyx_CyFunctionObject),   /*tp_basicsize*/ 
-    0,                                  /*tp_itemsize*/ 
-    (destructor) __Pyx_CyFunction_dealloc, /*tp_dealloc*/ 
-    0,                                  /*tp_print*/ 
-    0,                                  /*tp_getattr*/ 
-    0,                                  /*tp_setattr*/ 
-#if PY_MAJOR_VERSION < 3 
-    0,                                  /*tp_compare*/ 
-#else 
-    0,                                  /*reserved*/ 
-#endif 
-    (reprfunc) __Pyx_CyFunction_repr,   /*tp_repr*/ 
-    0,                                  /*tp_as_number*/ 
-    0,                                  /*tp_as_sequence*/ 
-    0,                                  /*tp_as_mapping*/ 
-    0,                                  /*tp_hash*/ 
+static PyTypeObject __pyx_CyFunctionType_type = {
+    PyVarObject_HEAD_INIT(0, 0)
+    "cython_function_or_method",      /*tp_name*/
+    sizeof(__pyx_CyFunctionObject),   /*tp_basicsize*/
+    0,                                  /*tp_itemsize*/
+    (destructor) __Pyx_CyFunction_dealloc, /*tp_dealloc*/
+    0,                                  /*tp_print*/
+    0,                                  /*tp_getattr*/
+    0,                                  /*tp_setattr*/
+#if PY_MAJOR_VERSION < 3
+    0,                                  /*tp_compare*/
+#else
+    0,                                  /*reserved*/
+#endif
+    (reprfunc) __Pyx_CyFunction_repr,   /*tp_repr*/
+    0,                                  /*tp_as_number*/
+    0,                                  /*tp_as_sequence*/
+    0,                                  /*tp_as_mapping*/
+    0,                                  /*tp_hash*/
     __Pyx_CyFunction_CallAsMethod,      /*tp_call*/
-    0,                                  /*tp_str*/ 
-    0,                                  /*tp_getattro*/ 
-    0,                                  /*tp_setattro*/ 
-    0,                                  /*tp_as_buffer*/ 
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 
-    0,                                  /*tp_doc*/ 
-    (traverseproc) __Pyx_CyFunction_traverse,   /*tp_traverse*/ 
-    (inquiry) __Pyx_CyFunction_clear,   /*tp_clear*/ 
-    0,                                  /*tp_richcompare*/ 
-#if PY_VERSION_HEX < 0x030500A0 
-    offsetof(__pyx_CyFunctionObject, func_weakreflist), /*tp_weaklistoffset*/ 
-#else 
-    offsetof(PyCFunctionObject, m_weakreflist),         /*tp_weaklistoffset*/ 
-#endif 
-    0,                                  /*tp_iter*/ 
-    0,                                  /*tp_iternext*/ 
-    __pyx_CyFunction_methods,           /*tp_methods*/ 
-    __pyx_CyFunction_members,           /*tp_members*/ 
-    __pyx_CyFunction_getsets,           /*tp_getset*/ 
-    0,                                  /*tp_base*/ 
-    0,                                  /*tp_dict*/ 
-    __Pyx_CyFunction_descr_get,         /*tp_descr_get*/ 
-    0,                                  /*tp_descr_set*/ 
-    offsetof(__pyx_CyFunctionObject, func_dict),/*tp_dictoffset*/ 
-    0,                                  /*tp_init*/ 
-    0,                                  /*tp_alloc*/ 
-    0,                                  /*tp_new*/ 
-    0,                                  /*tp_free*/ 
-    0,                                  /*tp_is_gc*/ 
-    0,                                  /*tp_bases*/ 
-    0,                                  /*tp_mro*/ 
-    0,                                  /*tp_cache*/ 
-    0,                                  /*tp_subclasses*/ 
-    0,                                  /*tp_weaklist*/ 
-    0,                                  /*tp_del*/ 
-    0,                                  /*tp_version_tag*/ 
-#if PY_VERSION_HEX >= 0x030400a1 
-    0,                                  /*tp_finalize*/ 
-#endif 
+    0,                                  /*tp_str*/
+    0,                                  /*tp_getattro*/
+    0,                                  /*tp_setattro*/
+    0,                                  /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+    0,                                  /*tp_doc*/
+    (traverseproc) __Pyx_CyFunction_traverse,   /*tp_traverse*/
+    (inquiry) __Pyx_CyFunction_clear,   /*tp_clear*/
+    0,                                  /*tp_richcompare*/
+#if PY_VERSION_HEX < 0x030500A0
+    offsetof(__pyx_CyFunctionObject, func_weakreflist), /*tp_weaklistoffset*/
+#else
+    offsetof(PyCFunctionObject, m_weakreflist),         /*tp_weaklistoffset*/
+#endif
+    0,                                  /*tp_iter*/
+    0,                                  /*tp_iternext*/
+    __pyx_CyFunction_methods,           /*tp_methods*/
+    __pyx_CyFunction_members,           /*tp_members*/
+    __pyx_CyFunction_getsets,           /*tp_getset*/
+    0,                                  /*tp_base*/
+    0,                                  /*tp_dict*/
+    __Pyx_CyFunction_descr_get,         /*tp_descr_get*/
+    0,                                  /*tp_descr_set*/
+    offsetof(__pyx_CyFunctionObject, func_dict),/*tp_dictoffset*/
+    0,                                  /*tp_init*/
+    0,                                  /*tp_alloc*/
+    0,                                  /*tp_new*/
+    0,                                  /*tp_free*/
+    0,                                  /*tp_is_gc*/
+    0,                                  /*tp_bases*/
+    0,                                  /*tp_mro*/
+    0,                                  /*tp_cache*/
+    0,                                  /*tp_subclasses*/
+    0,                                  /*tp_weaklist*/
+    0,                                  /*tp_del*/
+    0,                                  /*tp_version_tag*/
+#if PY_VERSION_HEX >= 0x030400a1
+    0,                                  /*tp_finalize*/
+#endif
 #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
     0,                                  /*tp_vectorcall*/
 #endif
@@ -740,47 +740,47 @@ static PyTypeObject __pyx_CyFunctionType_type = {
 #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000
     0,                                          /*tp_pypy_flags*/
 #endif
-}; 
- 
- 
+};
+
+
 static int __pyx_CyFunction_init(void) {
-    __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); 
+    __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type);
     if (unlikely(__pyx_CyFunctionType == NULL)) {
-        return -1; 
-    } 
-    return 0; 
-} 
- 
-static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { 
-    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; 
- 
+        return -1;
+    }
+    return 0;
+}
+
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+
     m->defaults = PyObject_Malloc(size);
     if (unlikely(!m->defaults))
-        return PyErr_NoMemory(); 
-    memset(m->defaults, 0, size); 
-    m->defaults_pyobjects = pyobjects; 
+        return PyErr_NoMemory();
+    memset(m->defaults, 0, size);
+    m->defaults_pyobjects = pyobjects;
     m->defaults_size = size;
-    return m->defaults; 
-} 
- 
-static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { 
-    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; 
-    m->defaults_tuple = tuple; 
-    Py_INCREF(tuple); 
-} 
- 
-static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { 
-    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; 
-    m->defaults_kwdict = dict; 
-    Py_INCREF(dict); 
-} 
- 
-static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { 
-    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; 
-    m->func_annotations = dict; 
-    Py_INCREF(dict); 
-} 
- 
+    return m->defaults;
+}
+
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->defaults_tuple = tuple;
+    Py_INCREF(tuple);
+}
+
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->defaults_kwdict = dict;
+    Py_INCREF(dict);
+}
+
+static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->func_annotations = dict;
+    Py_INCREF(dict);
+}
+
 
 //////////////////// CythonFunction.proto ////////////////////
 
@@ -806,15 +806,15 @@ static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qual
 }
 
 
-//////////////////// CyFunctionClassCell.proto //////////////////// 
+//////////////////// CyFunctionClassCell.proto ////////////////////
 static int __Pyx_CyFunction_InitClassCell(PyObject *cyfunctions, PyObject *classobj);/*proto*/
- 
-//////////////////// CyFunctionClassCell //////////////////// 
+
+//////////////////// CyFunctionClassCell ////////////////////
 //@requires: CythonFunctionShared
- 
+
 static int __Pyx_CyFunction_InitClassCell(PyObject *cyfunctions, PyObject *classobj) {
     Py_ssize_t i, count = PyList_GET_SIZE(cyfunctions);
- 
+
     for (i = 0; i < count; i++) {
         __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *)
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
@@ -825,44 +825,44 @@ static int __Pyx_CyFunction_InitClassCell(PyObject *cyfunctions, PyObject *class
             return -1;
 #endif
         Py_INCREF(classobj);
-        m->func_classobj = classobj; 
+        m->func_classobj = classobj;
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
         Py_DECREF((PyObject*)m);
 #endif
-    } 
+    }
     return 0;
-} 
- 
-
-//////////////////// FusedFunction.proto //////////////////// 
-
-typedef struct { 
-    __pyx_CyFunctionObject func; 
-    PyObject *__signatures__; 
-    PyObject *type; 
-    PyObject *self; 
-} __pyx_FusedFunctionObject; 
- 
+}
+
+
+//////////////////// FusedFunction.proto ////////////////////
+
+typedef struct {
+    __pyx_CyFunctionObject func;
+    PyObject *__signatures__;
+    PyObject *type;
+    PyObject *self;
+} __pyx_FusedFunctionObject;
+
 static PyObject *__pyx_FusedFunction_New(PyMethodDef *ml, int flags,
                                          PyObject *qualname, PyObject *closure,
-                                         PyObject *module, PyObject *globals, 
-                                         PyObject *code); 
- 
-static int __pyx_FusedFunction_clear(__pyx_FusedFunctionObject *self); 
-static PyTypeObject *__pyx_FusedFunctionType = NULL; 
-static int __pyx_FusedFunction_init(void); 
- 
-#define __Pyx_FusedFunction_USED 
- 
-//////////////////// FusedFunction //////////////////// 
+                                         PyObject *module, PyObject *globals,
+                                         PyObject *code);
+
+static int __pyx_FusedFunction_clear(__pyx_FusedFunctionObject *self);
+static PyTypeObject *__pyx_FusedFunctionType = NULL;
+static int __pyx_FusedFunction_init(void);
+
+#define __Pyx_FusedFunction_USED
+
+//////////////////// FusedFunction ////////////////////
 //@requires: CythonFunctionShared
- 
-static PyObject * 
+
+static PyObject *
 __pyx_FusedFunction_New(PyMethodDef *ml, int flags,
                         PyObject *qualname, PyObject *closure,
-                        PyObject *module, PyObject *globals, 
-                        PyObject *code) 
-{ 
+                        PyObject *module, PyObject *globals,
+                        PyObject *code)
+{
     PyObject *op = __Pyx_CyFunction_Init(
         // __pyx_CyFunctionObject is correct below since that's the cast that we want.
         PyObject_GC_New(__pyx_CyFunctionObject, __pyx_FusedFunctionType),
@@ -876,8 +876,8 @@ __pyx_FusedFunction_New(PyMethodDef *ml, int flags,
         PyObject_GC_Track(op);
     }
     return op;
-} 
- 
+}
+
 static void
 __pyx_FusedFunction_dealloc(__pyx_FusedFunctionObject *self)
 {
@@ -886,56 +886,56 @@ __pyx_FusedFunction_dealloc(__pyx_FusedFunctionObject *self)
     Py_CLEAR(self->type);
     Py_CLEAR(self->__signatures__);
     __Pyx__CyFunction_dealloc((__pyx_CyFunctionObject *) self);
-} 
- 
-static int 
-__pyx_FusedFunction_traverse(__pyx_FusedFunctionObject *self, 
-                             visitproc visit, 
-                             void *arg) 
-{ 
-    Py_VISIT(self->self); 
-    Py_VISIT(self->type); 
-    Py_VISIT(self->__signatures__); 
-    return __Pyx_CyFunction_traverse((__pyx_CyFunctionObject *) self, visit, arg); 
-} 
- 
-static int 
-__pyx_FusedFunction_clear(__pyx_FusedFunctionObject *self) 
-{ 
-    Py_CLEAR(self->self); 
-    Py_CLEAR(self->type); 
-    Py_CLEAR(self->__signatures__); 
-    return __Pyx_CyFunction_clear((__pyx_CyFunctionObject *) self); 
-} 
- 
- 
-static PyObject * 
-__pyx_FusedFunction_descr_get(PyObject *self, PyObject *obj, PyObject *type) 
-{ 
-    __pyx_FusedFunctionObject *func, *meth; 
- 
-    func = (__pyx_FusedFunctionObject *) self; 
- 
-    if (func->self || func->func.flags & __Pyx_CYFUNCTION_STATICMETHOD) { 
-        // Do not allow rebinding and don't do anything for static methods 
-        Py_INCREF(self); 
-        return self; 
-    } 
- 
-    if (obj == Py_None) 
-        obj = NULL; 
- 
+}
+
+static int
+__pyx_FusedFunction_traverse(__pyx_FusedFunctionObject *self,
+                             visitproc visit,
+                             void *arg)
+{
+    Py_VISIT(self->self);
+    Py_VISIT(self->type);
+    Py_VISIT(self->__signatures__);
+    return __Pyx_CyFunction_traverse((__pyx_CyFunctionObject *) self, visit, arg);
+}
+
+static int
+__pyx_FusedFunction_clear(__pyx_FusedFunctionObject *self)
+{
+    Py_CLEAR(self->self);
+    Py_CLEAR(self->type);
+    Py_CLEAR(self->__signatures__);
+    return __Pyx_CyFunction_clear((__pyx_CyFunctionObject *) self);
+}
+
+
+static PyObject *
+__pyx_FusedFunction_descr_get(PyObject *self, PyObject *obj, PyObject *type)
+{
+    __pyx_FusedFunctionObject *func, *meth;
+
+    func = (__pyx_FusedFunctionObject *) self;
+
+    if (func->self || func->func.flags & __Pyx_CYFUNCTION_STATICMETHOD) {
+        // Do not allow rebinding and don't do anything for static methods
+        Py_INCREF(self);
+        return self;
+    }
+
+    if (obj == Py_None)
+        obj = NULL;
+
     meth = (__pyx_FusedFunctionObject *) __pyx_FusedFunction_New(
-                    ((PyCFunctionObject *) func)->m_ml, 
-                    ((__pyx_CyFunctionObject *) func)->flags, 
-                    ((__pyx_CyFunctionObject *) func)->func_qualname, 
-                    ((__pyx_CyFunctionObject *) func)->func_closure, 
-                    ((PyCFunctionObject *) func)->m_module, 
-                    ((__pyx_CyFunctionObject *) func)->func_globals, 
-                    ((__pyx_CyFunctionObject *) func)->func_code); 
-    if (!meth) 
-        return NULL; 
- 
+                    ((PyCFunctionObject *) func)->m_ml,
+                    ((__pyx_CyFunctionObject *) func)->flags,
+                    ((__pyx_CyFunctionObject *) func)->func_qualname,
+                    ((__pyx_CyFunctionObject *) func)->func_closure,
+                    ((PyCFunctionObject *) func)->m_module,
+                    ((__pyx_CyFunctionObject *) func)->func_globals,
+                    ((__pyx_CyFunctionObject *) func)->func_code);
+    if (!meth)
+        return NULL;
+
     // defaults needs copying fully rather than just copying the pointer
     // since otherwise it will be freed on destruction of meth despite
     // belonging to func rather than meth
@@ -956,184 +956,184 @@ __pyx_FusedFunction_descr_get(PyObject *self, PyObject *obj, PyObject *type)
             Py_XINCREF(pydefaults[i]);
     }
 
-    Py_XINCREF(func->func.func_classobj); 
-    meth->func.func_classobj = func->func.func_classobj; 
- 
-    Py_XINCREF(func->__signatures__); 
-    meth->__signatures__ = func->__signatures__; 
- 
-    Py_XINCREF(type); 
-    meth->type = type; 
- 
-    Py_XINCREF(func->func.defaults_tuple); 
-    meth->func.defaults_tuple = func->func.defaults_tuple; 
- 
-    if (func->func.flags & __Pyx_CYFUNCTION_CLASSMETHOD) 
-        obj = type; 
- 
-    Py_XINCREF(obj); 
-    meth->self = obj; 
- 
-    return (PyObject *) meth; 
-} 
- 
-static PyObject * 
-_obj_to_str(PyObject *obj) 
-{ 
-    if (PyType_Check(obj)) 
-        return PyObject_GetAttr(obj, PYIDENT("__name__")); 
-    else 
-        return PyObject_Str(obj); 
-} 
- 
-static PyObject * 
-__pyx_FusedFunction_getitem(__pyx_FusedFunctionObject *self, PyObject *idx) 
-{ 
-    PyObject *signature = NULL; 
-    PyObject *unbound_result_func; 
-    PyObject *result_func = NULL; 
- 
-    if (self->__signatures__ == NULL) { 
-        PyErr_SetString(PyExc_TypeError, "Function is not fused"); 
-        return NULL; 
-    } 
- 
-    if (PyTuple_Check(idx)) { 
-        PyObject *list = PyList_New(0); 
-        Py_ssize_t n = PyTuple_GET_SIZE(idx); 
-        PyObject *sep = NULL; 
-        int i; 
- 
+    Py_XINCREF(func->func.func_classobj);
+    meth->func.func_classobj = func->func.func_classobj;
+
+    Py_XINCREF(func->__signatures__);
+    meth->__signatures__ = func->__signatures__;
+
+    Py_XINCREF(type);
+    meth->type = type;
+
+    Py_XINCREF(func->func.defaults_tuple);
+    meth->func.defaults_tuple = func->func.defaults_tuple;
+
+    if (func->func.flags & __Pyx_CYFUNCTION_CLASSMETHOD)
+        obj = type;
+
+    Py_XINCREF(obj);
+    meth->self = obj;
+
+    return (PyObject *) meth;
+}
+
+static PyObject *
+_obj_to_str(PyObject *obj)
+{
+    if (PyType_Check(obj))
+        return PyObject_GetAttr(obj, PYIDENT("__name__"));
+    else
+        return PyObject_Str(obj);
+}
+
+static PyObject *
+__pyx_FusedFunction_getitem(__pyx_FusedFunctionObject *self, PyObject *idx)
+{
+    PyObject *signature = NULL;
+    PyObject *unbound_result_func;
+    PyObject *result_func = NULL;
+
+    if (self->__signatures__ == NULL) {
+        PyErr_SetString(PyExc_TypeError, "Function is not fused");
+        return NULL;
+    }
+
+    if (PyTuple_Check(idx)) {
+        PyObject *list = PyList_New(0);
+        Py_ssize_t n = PyTuple_GET_SIZE(idx);
+        PyObject *sep = NULL;
+        int i;
+
         if (unlikely(!list))
-            return NULL; 
- 
-        for (i = 0; i < n; i++) { 
+            return NULL;
+
+        for (i = 0; i < n; i++) {
             int ret;
             PyObject *string;
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-            PyObject *item = PyTuple_GET_ITEM(idx, i); 
+            PyObject *item = PyTuple_GET_ITEM(idx, i);
 #else
             PyObject *item = PySequence_ITEM(idx, i);  if (unlikely(!item)) goto __pyx_err;
 #endif
-            string = _obj_to_str(item); 
+            string = _obj_to_str(item);
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
             Py_DECREF(item);
 #endif
             if (unlikely(!string)) goto __pyx_err;
             ret = PyList_Append(list, string);
-            Py_DECREF(string); 
+            Py_DECREF(string);
             if (unlikely(ret < 0)) goto __pyx_err;
-        } 
- 
-        sep = PyUnicode_FromString("|"); 
+        }
+
+        sep = PyUnicode_FromString("|");
         if (likely(sep))
-            signature = PyUnicode_Join(sep, list); 
-__pyx_err: 
-; 
-        Py_DECREF(list); 
-        Py_XDECREF(sep); 
-    } else { 
-        signature = _obj_to_str(idx); 
-    } 
- 
-    if (!signature) 
-        return NULL; 
- 
-    unbound_result_func = PyObject_GetItem(self->__signatures__, signature); 
- 
-    if (unbound_result_func) { 
-        if (self->self || self->type) { 
-            __pyx_FusedFunctionObject *unbound = (__pyx_FusedFunctionObject *) unbound_result_func; 
- 
-            // TODO: move this to InitClassCell 
-            Py_CLEAR(unbound->func.func_classobj); 
-            Py_XINCREF(self->func.func_classobj); 
-            unbound->func.func_classobj = self->func.func_classobj; 
- 
-            result_func = __pyx_FusedFunction_descr_get(unbound_result_func, 
-                                                        self->self, self->type); 
-        } else { 
-            result_func = unbound_result_func; 
-            Py_INCREF(result_func); 
-        } 
-    } 
- 
-    Py_DECREF(signature); 
-    Py_XDECREF(unbound_result_func); 
- 
-    return result_func; 
-} 
- 
-static PyObject * 
-__pyx_FusedFunction_callfunction(PyObject *func, PyObject *args, PyObject *kw) 
-{ 
-     __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; 
-    int static_specialized = (cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD && 
-                              !((__pyx_FusedFunctionObject *) func)->__signatures__); 
- 
-    if (cyfunc->flags & __Pyx_CYFUNCTION_CCLASS && !static_specialized) { 
+            signature = PyUnicode_Join(sep, list);
+__pyx_err:
+;
+        Py_DECREF(list);
+        Py_XDECREF(sep);
+    } else {
+        signature = _obj_to_str(idx);
+    }
+
+    if (!signature)
+        return NULL;
+
+    unbound_result_func = PyObject_GetItem(self->__signatures__, signature);
+
+    if (unbound_result_func) {
+        if (self->self || self->type) {
+            __pyx_FusedFunctionObject *unbound = (__pyx_FusedFunctionObject *) unbound_result_func;
+
+            // TODO: move this to InitClassCell
+            Py_CLEAR(unbound->func.func_classobj);
+            Py_XINCREF(self->func.func_classobj);
+            unbound->func.func_classobj = self->func.func_classobj;
+
+            result_func = __pyx_FusedFunction_descr_get(unbound_result_func,
+                                                        self->self, self->type);
+        } else {
+            result_func = unbound_result_func;
+            Py_INCREF(result_func);
+        }
+    }
+
+    Py_DECREF(signature);
+    Py_XDECREF(unbound_result_func);
+
+    return result_func;
+}
+
+static PyObject *
+__pyx_FusedFunction_callfunction(PyObject *func, PyObject *args, PyObject *kw)
+{
+     __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func;
+    int static_specialized = (cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD &&
+                              !((__pyx_FusedFunctionObject *) func)->__signatures__);
+
+    if (cyfunc->flags & __Pyx_CYFUNCTION_CCLASS && !static_specialized) {
         return __Pyx_CyFunction_CallAsMethod(func, args, kw);
-    } else { 
+    } else {
         return __Pyx_CyFunction_Call(func, args, kw);
-    } 
-} 
- 
-// Note: the 'self' from method binding is passed in in the args tuple, 
-//       whereas PyCFunctionObject's m_self is passed in as the first 
-//       argument to the C function. For extension methods we need 
-//       to pass 'self' as 'm_self' and not as the first element of the 
-//       args tuple. 
- 
-static PyObject * 
-__pyx_FusedFunction_call(PyObject *func, PyObject *args, PyObject *kw) 
-{ 
-    __pyx_FusedFunctionObject *binding_func = (__pyx_FusedFunctionObject *) func; 
-    Py_ssize_t argc = PyTuple_GET_SIZE(args); 
-    PyObject *new_args = NULL; 
-    __pyx_FusedFunctionObject *new_func = NULL; 
-    PyObject *result = NULL; 
-    PyObject *self = NULL; 
-    int is_staticmethod = binding_func->func.flags & __Pyx_CYFUNCTION_STATICMETHOD; 
-    int is_classmethod = binding_func->func.flags & __Pyx_CYFUNCTION_CLASSMETHOD; 
- 
-    if (binding_func->self) { 
-        // Bound method call, put 'self' in the args tuple 
-        Py_ssize_t i; 
-        new_args = PyTuple_New(argc + 1); 
-        if (!new_args) 
-            return NULL; 
- 
-        self = binding_func->self; 
+    }
+}
+
+// Note: the 'self' from method binding is passed in in the args tuple,
+//       whereas PyCFunctionObject's m_self is passed in as the first
+//       argument to the C function. For extension methods we need
+//       to pass 'self' as 'm_self' and not as the first element of the
+//       args tuple.
+
+static PyObject *
+__pyx_FusedFunction_call(PyObject *func, PyObject *args, PyObject *kw)
+{
+    __pyx_FusedFunctionObject *binding_func = (__pyx_FusedFunctionObject *) func;
+    Py_ssize_t argc = PyTuple_GET_SIZE(args);
+    PyObject *new_args = NULL;
+    __pyx_FusedFunctionObject *new_func = NULL;
+    PyObject *result = NULL;
+    PyObject *self = NULL;
+    int is_staticmethod = binding_func->func.flags & __Pyx_CYFUNCTION_STATICMETHOD;
+    int is_classmethod = binding_func->func.flags & __Pyx_CYFUNCTION_CLASSMETHOD;
+
+    if (binding_func->self) {
+        // Bound method call, put 'self' in the args tuple
+        Py_ssize_t i;
+        new_args = PyTuple_New(argc + 1);
+        if (!new_args)
+            return NULL;
+
+        self = binding_func->self;
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
-        Py_INCREF(self); 
+        Py_INCREF(self);
 #endif
         Py_INCREF(self);
-        PyTuple_SET_ITEM(new_args, 0, self); 
- 
-        for (i = 0; i < argc; i++) { 
+        PyTuple_SET_ITEM(new_args, 0, self);
+
+        for (i = 0; i < argc; i++) {
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-            PyObject *item = PyTuple_GET_ITEM(args, i); 
-            Py_INCREF(item); 
+            PyObject *item = PyTuple_GET_ITEM(args, i);
+            Py_INCREF(item);
 #else
             PyObject *item = PySequence_ITEM(args, i);  if (unlikely(!item)) goto bad;
 #endif
-            PyTuple_SET_ITEM(new_args, i + 1, item); 
-        } 
- 
-        args = new_args; 
-    } else if (binding_func->type) { 
-        // Unbound method call 
-        if (argc < 1) { 
-            PyErr_SetString(PyExc_TypeError, "Need at least one argument, 0 given."); 
-            return NULL; 
-        } 
+            PyTuple_SET_ITEM(new_args, i + 1, item);
+        }
+
+        args = new_args;
+    } else if (binding_func->type) {
+        // Unbound method call
+        if (argc < 1) {
+            PyErr_SetString(PyExc_TypeError, "Need at least one argument, 0 given.");
+            return NULL;
+        }
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-        self = PyTuple_GET_ITEM(args, 0); 
+        self = PyTuple_GET_ITEM(args, 0);
 #else
         self = PySequence_ITEM(args, 0);  if (unlikely(!self)) return NULL;
 #endif
-    } 
- 
+    }
+
     if (self && !is_classmethod && !is_staticmethod) {
         int is_instance = PyObject_IsInstance(self, binding_func->type);
         if (unlikely(!is_instance)) {
@@ -1145,13 +1145,13 @@ __pyx_FusedFunction_call(PyObject *func, PyObject *args, PyObject *kw)
         } else if (unlikely(is_instance == -1)) {
             goto bad;
         }
-    } 
+    }
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
     Py_XDECREF(self);
     self = NULL;
 #endif
- 
-    if (binding_func->__signatures__) { 
+
+    if (binding_func->__signatures__) {
         PyObject *tup;
         if (is_staticmethod && binding_func->func.flags & __Pyx_CYFUNCTION_CCLASS) {
             // FIXME: this seems wrong, but we must currently pass the signatures dict as 'self' argument
@@ -1168,101 +1168,101 @@ __pyx_FusedFunction_call(PyObject *func, PyObject *args, PyObject *kw)
             if (unlikely(!tup)) goto bad;
             new_func = (__pyx_FusedFunctionObject *) __pyx_FusedFunction_callfunction(func, tup, NULL);
         }
-        Py_DECREF(tup); 
- 
+        Py_DECREF(tup);
+
         if (unlikely(!new_func))
             goto bad;
- 
-        Py_XINCREF(binding_func->func.func_classobj); 
-        Py_CLEAR(new_func->func.func_classobj); 
-        new_func->func.func_classobj = binding_func->func.func_classobj; 
- 
-        func = (PyObject *) new_func; 
-    } 
- 
-    result = __pyx_FusedFunction_callfunction(func, args, kw); 
+
+        Py_XINCREF(binding_func->func.func_classobj);
+        Py_CLEAR(new_func->func.func_classobj);
+        new_func->func.func_classobj = binding_func->func.func_classobj;
+
+        func = (PyObject *) new_func;
+    }
+
+    result = __pyx_FusedFunction_callfunction(func, args, kw);
 bad:
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
     Py_XDECREF(self);
 #endif
-    Py_XDECREF(new_args); 
-    Py_XDECREF((PyObject *) new_func); 
-    return result; 
-} 
- 
-static PyMemberDef __pyx_FusedFunction_members[] = { 
-    {(char *) "__signatures__", 
-     T_OBJECT, 
-     offsetof(__pyx_FusedFunctionObject, __signatures__), 
-     READONLY, 
-     0}, 
-    {0, 0, 0, 0, 0}, 
-}; 
- 
-static PyMappingMethods __pyx_FusedFunction_mapping_methods = { 
-    0, 
-    (binaryfunc) __pyx_FusedFunction_getitem, 
-    0, 
-}; 
- 
-static PyTypeObject __pyx_FusedFunctionType_type = { 
-    PyVarObject_HEAD_INIT(0, 0) 
-    "fused_cython_function",           /*tp_name*/ 
-    sizeof(__pyx_FusedFunctionObject), /*tp_basicsize*/ 
-    0,                                  /*tp_itemsize*/ 
-    (destructor) __pyx_FusedFunction_dealloc, /*tp_dealloc*/ 
-    0,                                  /*tp_print*/ 
-    0,                                  /*tp_getattr*/ 
-    0,                                  /*tp_setattr*/ 
-#if PY_MAJOR_VERSION < 3 
-    0,                                  /*tp_compare*/ 
-#else 
-    0,                                  /*reserved*/ 
-#endif 
-    0,                                  /*tp_repr*/ 
-    0,                                  /*tp_as_number*/ 
-    0,                                  /*tp_as_sequence*/ 
-    &__pyx_FusedFunction_mapping_methods, /*tp_as_mapping*/ 
-    0,                                  /*tp_hash*/ 
-    (ternaryfunc) __pyx_FusedFunction_call, /*tp_call*/ 
-    0,                                  /*tp_str*/ 
-    0,                                  /*tp_getattro*/ 
-    0,                                  /*tp_setattro*/ 
-    0,                                  /*tp_as_buffer*/ 
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, /*tp_flags*/ 
-    0,                                  /*tp_doc*/ 
-    (traverseproc) __pyx_FusedFunction_traverse,   /*tp_traverse*/ 
-    (inquiry) __pyx_FusedFunction_clear,/*tp_clear*/ 
-    0,                                  /*tp_richcompare*/ 
-    0,                                  /*tp_weaklistoffset*/ 
-    0,                                  /*tp_iter*/ 
-    0,                                  /*tp_iternext*/ 
-    0,                                  /*tp_methods*/ 
-    __pyx_FusedFunction_members,        /*tp_members*/ 
-    // __doc__ is None for the fused function type, but we need it to be 
-    // a descriptor for the instance's __doc__, so rebuild descriptors in our subclass 
-    __pyx_CyFunction_getsets,           /*tp_getset*/ 
+    Py_XDECREF(new_args);
+    Py_XDECREF((PyObject *) new_func);
+    return result;
+}
+
+static PyMemberDef __pyx_FusedFunction_members[] = {
+    {(char *) "__signatures__",
+     T_OBJECT,
+     offsetof(__pyx_FusedFunctionObject, __signatures__),
+     READONLY,
+     0},
+    {0, 0, 0, 0, 0},
+};
+
+static PyMappingMethods __pyx_FusedFunction_mapping_methods = {
+    0,
+    (binaryfunc) __pyx_FusedFunction_getitem,
+    0,
+};
+
+static PyTypeObject __pyx_FusedFunctionType_type = {
+    PyVarObject_HEAD_INIT(0, 0)
+    "fused_cython_function",           /*tp_name*/
+    sizeof(__pyx_FusedFunctionObject), /*tp_basicsize*/
+    0,                                  /*tp_itemsize*/
+    (destructor) __pyx_FusedFunction_dealloc, /*tp_dealloc*/
+    0,                                  /*tp_print*/
+    0,                                  /*tp_getattr*/
+    0,                                  /*tp_setattr*/
+#if PY_MAJOR_VERSION < 3
+    0,                                  /*tp_compare*/
+#else
+    0,                                  /*reserved*/
+#endif
+    0,                                  /*tp_repr*/
+    0,                                  /*tp_as_number*/
+    0,                                  /*tp_as_sequence*/
+    &__pyx_FusedFunction_mapping_methods, /*tp_as_mapping*/
+    0,                                  /*tp_hash*/
+    (ternaryfunc) __pyx_FusedFunction_call, /*tp_call*/
+    0,                                  /*tp_str*/
+    0,                                  /*tp_getattro*/
+    0,                                  /*tp_setattro*/
+    0,                                  /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+    0,                                  /*tp_doc*/
+    (traverseproc) __pyx_FusedFunction_traverse,   /*tp_traverse*/
+    (inquiry) __pyx_FusedFunction_clear,/*tp_clear*/
+    0,                                  /*tp_richcompare*/
+    0,                                  /*tp_weaklistoffset*/
+    0,                                  /*tp_iter*/
+    0,                                  /*tp_iternext*/
+    0,                                  /*tp_methods*/
+    __pyx_FusedFunction_members,        /*tp_members*/
+    // __doc__ is None for the fused function type, but we need it to be
+    // a descriptor for the instance's __doc__, so rebuild descriptors in our subclass
+    __pyx_CyFunction_getsets,           /*tp_getset*/
     // NOTE: tp_base may be changed later during module initialisation when importing CyFunction across modules.
-    &__pyx_CyFunctionType_type,         /*tp_base*/ 
-    0,                                  /*tp_dict*/ 
-    __pyx_FusedFunction_descr_get,      /*tp_descr_get*/ 
-    0,                                  /*tp_descr_set*/ 
-    0,                                  /*tp_dictoffset*/ 
-    0,                                  /*tp_init*/ 
-    0,                                  /*tp_alloc*/ 
-    0,                                  /*tp_new*/ 
-    0,                                  /*tp_free*/ 
-    0,                                  /*tp_is_gc*/ 
-    0,                                  /*tp_bases*/ 
-    0,                                  /*tp_mro*/ 
-    0,                                  /*tp_cache*/ 
-    0,                                  /*tp_subclasses*/ 
-    0,                                  /*tp_weaklist*/ 
-    0,                                  /*tp_del*/ 
-    0,                                  /*tp_version_tag*/ 
-#if PY_VERSION_HEX >= 0x030400a1 
-    0,                                  /*tp_finalize*/ 
-#endif 
+    &__pyx_CyFunctionType_type,         /*tp_base*/
+    0,                                  /*tp_dict*/
+    __pyx_FusedFunction_descr_get,      /*tp_descr_get*/
+    0,                                  /*tp_descr_set*/
+    0,                                  /*tp_dictoffset*/
+    0,                                  /*tp_init*/
+    0,                                  /*tp_alloc*/
+    0,                                  /*tp_new*/
+    0,                                  /*tp_free*/
+    0,                                  /*tp_is_gc*/
+    0,                                  /*tp_bases*/
+    0,                                  /*tp_mro*/
+    0,                                  /*tp_cache*/
+    0,                                  /*tp_subclasses*/
+    0,                                  /*tp_weaklist*/
+    0,                                  /*tp_del*/
+    0,                                  /*tp_version_tag*/
+#if PY_VERSION_HEX >= 0x030400a1
+    0,                                  /*tp_finalize*/
+#endif
 #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
     0,                                  /*tp_vectorcall*/
 #endif
@@ -1272,66 +1272,66 @@ static PyTypeObject __pyx_FusedFunctionType_type = {
 #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000
     0,                                          /*tp_pypy_flags*/
 #endif
-}; 
- 
-static int __pyx_FusedFunction_init(void) { 
+};
+
+static int __pyx_FusedFunction_init(void) {
     // Set base from __Pyx_FetchCommonTypeFromSpec, in case it's different from the local static value.
     __pyx_FusedFunctionType_type.tp_base = __pyx_CyFunctionType;
-    __pyx_FusedFunctionType = __Pyx_FetchCommonType(&__pyx_FusedFunctionType_type); 
-    if (__pyx_FusedFunctionType == NULL) { 
-        return -1; 
-    } 
-    return 0; 
-} 
- 
-//////////////////// ClassMethod.proto //////////////////// 
- 
-#include "descrobject.h" 
+    __pyx_FusedFunctionType = __Pyx_FetchCommonType(&__pyx_FusedFunctionType_type);
+    if (__pyx_FusedFunctionType == NULL) {
+        return -1;
+    }
+    return 0;
+}
+
+//////////////////// ClassMethod.proto ////////////////////
+
+#include "descrobject.h"
 static CYTHON_UNUSED PyObject* __Pyx_Method_ClassMethod(PyObject *method); /*proto*/
- 
-//////////////////// ClassMethod //////////////////// 
- 
-static PyObject* __Pyx_Method_ClassMethod(PyObject *method) { 
+
+//////////////////// ClassMethod ////////////////////
+
+static PyObject* __Pyx_Method_ClassMethod(PyObject *method) {
 #if CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM <= 0x05080000
-    if (PyObject_TypeCheck(method, &PyWrapperDescr_Type)) { 
-        // cdef classes 
-        return PyClassMethod_New(method); 
-    } 
-#else 
+    if (PyObject_TypeCheck(method, &PyWrapperDescr_Type)) {
+        // cdef classes
+        return PyClassMethod_New(method);
+    }
+#else
 #if CYTHON_COMPILING_IN_PYSTON || CYTHON_COMPILING_IN_PYPY
     // special C-API function only in Pyston and PyPy >= 5.9
     if (PyMethodDescr_Check(method))
 #else
     #if PY_MAJOR_VERSION == 2
     // PyMethodDescr_Type is not exposed in the CPython C-API in Py2.
-    static PyTypeObject *methoddescr_type = NULL; 
-    if (methoddescr_type == NULL) { 
-       PyObject *meth = PyObject_GetAttrString((PyObject*)&PyList_Type, "append"); 
-       if (!meth) return NULL; 
-       methoddescr_type = Py_TYPE(meth); 
-       Py_DECREF(meth); 
-    } 
+    static PyTypeObject *methoddescr_type = NULL;
+    if (methoddescr_type == NULL) {
+       PyObject *meth = PyObject_GetAttrString((PyObject*)&PyList_Type, "append");
+       if (!meth) return NULL;
+       methoddescr_type = Py_TYPE(meth);
+       Py_DECREF(meth);
+    }
     #else
     PyTypeObject *methoddescr_type = &PyMethodDescr_Type;
     #endif
     if (__Pyx_TypeCheck(method, methoddescr_type))
 #endif
     {
-        // cdef classes 
-        PyMethodDescrObject *descr = (PyMethodDescrObject *)method; 
-        #if PY_VERSION_HEX < 0x03020000 
-        PyTypeObject *d_type = descr->d_type; 
-        #else 
-        PyTypeObject *d_type = descr->d_common.d_type; 
-        #endif 
-        return PyDescr_NewClassMethod(d_type, descr->d_method); 
-    } 
-#endif 
-    else if (PyMethod_Check(method)) { 
-        // python classes 
-        return PyClassMethod_New(PyMethod_GET_FUNCTION(method)); 
-    } 
+        // cdef classes
+        PyMethodDescrObject *descr = (PyMethodDescrObject *)method;
+        #if PY_VERSION_HEX < 0x03020000
+        PyTypeObject *d_type = descr->d_type;
+        #else
+        PyTypeObject *d_type = descr->d_common.d_type;
+        #endif
+        return PyDescr_NewClassMethod(d_type, descr->d_method);
+    }
+#endif
+    else if (PyMethod_Check(method)) {
+        // python classes
+        return PyClassMethod_New(PyMethod_GET_FUNCTION(method));
+    }
     else {
-        return PyClassMethod_New(method); 
-    } 
-} 
+        return PyClassMethod_New(method);
+    }
+}
diff --git a/contrib/tools/cython/Cython/Utility/Embed.c b/contrib/tools/cython/Cython/Utility/Embed.c
index 114847b946..60da8f2330 100644
--- a/contrib/tools/cython/Cython/Utility/Embed.c
+++ b/contrib/tools/cython/Cython/Utility/Embed.c
@@ -1,45 +1,45 @@
-//////////////////// MainFunction //////////////////// 
- 
-#ifdef __FreeBSD__ 
-#include <floatingpoint.h> 
-#endif 
- 
+//////////////////// MainFunction ////////////////////
+
+#ifdef __FreeBSD__
+#include <floatingpoint.h>
+#endif
+
 #if PY_MAJOR_VERSION < 3
 void Py_InitArgcArgv(int argc, char **argv);
 #else
 void Py_InitArgcArgv(int argc, wchar_t **argv);
 #endif
 
-#if PY_MAJOR_VERSION < 3 
-int %(main_method)s(int argc, char** argv) { 
-#elif defined(WIN32) || defined(MS_WINDOWS) 
-int %(wmain_method)s(int argc, wchar_t **argv) { 
-#else 
-static int __Pyx_main(int argc, wchar_t **argv) { 
-#endif 
-    /* 754 requires that FP exceptions run in "no stop" mode by default, 
-     * and until C vendors implement C99's ways to control FP exceptions, 
-     * Python requires non-stop mode.  Alas, some platforms enable FP 
-     * exceptions by default.  Here we disable them. 
-     */ 
-#ifdef __FreeBSD__ 
-    fp_except_t m; 
- 
-    m = fpgetmask(); 
-    fpsetmask(m & ~FP_X_OFL); 
-#endif 
+#if PY_MAJOR_VERSION < 3
+int %(main_method)s(int argc, char** argv) {
+#elif defined(WIN32) || defined(MS_WINDOWS)
+int %(wmain_method)s(int argc, wchar_t **argv) {
+#else
+static int __Pyx_main(int argc, wchar_t **argv) {
+#endif
+    /* 754 requires that FP exceptions run in "no stop" mode by default,
+     * and until C vendors implement C99's ways to control FP exceptions,
+     * Python requires non-stop mode.  Alas, some platforms enable FP
+     * exceptions by default.  Here we disable them.
+     */
+#ifdef __FreeBSD__
+    fp_except_t m;
+
+    m = fpgetmask();
+    fpsetmask(m & ~FP_X_OFL);
+#endif
     if (argc && argv) {
         Py_InitArgcArgv(argc, argv);
-        Py_SetProgramName(argv[0]); 
+        Py_SetProgramName(argv[0]);
     }
-    Py_Initialize(); 
-    if (argc && argv) 
-        PySys_SetArgv(argc, argv); 
-    { /* init module '%(module_name)s' as '__main__' */ 
-      PyObject* m = NULL; 
-      %(module_is_main)s = 1; 
-      #if PY_MAJOR_VERSION < 3 
-          init%(module_name)s(); 
+    Py_Initialize();
+    if (argc && argv)
+        PySys_SetArgv(argc, argv);
+    { /* init module '%(module_name)s' as '__main__' */
+      PyObject* m = NULL;
+      %(module_is_main)s = 1;
+      #if PY_MAJOR_VERSION < 3
+          init%(module_name)s();
       #elif CYTHON_PEP489_MULTI_PHASE_INIT
           m = PyInit_%(module_name)s();
           if (!PyModule_Check(m)) {
@@ -54,174 +54,174 @@ static int __Pyx_main(int argc, wchar_t **argv) {
                   if (m) PyModule_ExecDef(m, mdef);
               }
           }
-      #else 
-          m = PyInit_%(module_name)s(); 
-      #endif 
-      if (PyErr_Occurred()) { 
-          PyErr_Print(); /* This exits with the right code if SystemExit. */ 
-          #if PY_MAJOR_VERSION < 3 
-          if (Py_FlushLine()) PyErr_Clear(); 
-          #endif 
-          return 1; 
-      } 
-      Py_XDECREF(m); 
-    } 
+      #else
+          m = PyInit_%(module_name)s();
+      #endif
+      if (PyErr_Occurred()) {
+          PyErr_Print(); /* This exits with the right code if SystemExit. */
+          #if PY_MAJOR_VERSION < 3
+          if (Py_FlushLine()) PyErr_Clear();
+          #endif
+          return 1;
+      }
+      Py_XDECREF(m);
+    }
 #if PY_VERSION_HEX < 0x03060000
-    Py_Finalize(); 
+    Py_Finalize();
 #else
     if (Py_FinalizeEx() < 0)
         return 2;
 #endif
-    return 0; 
-} 
- 
- 
-#if PY_MAJOR_VERSION >= 3 && !defined(WIN32) && !defined(MS_WINDOWS) 
-#include <locale.h> 
- 
-static wchar_t* 
-__Pyx_char2wchar(char* arg) 
-{ 
-    wchar_t *res; 
-#ifdef HAVE_BROKEN_MBSTOWCS 
-    /* Some platforms have a broken implementation of 
-     * mbstowcs which does not count the characters that 
-     * would result from conversion.  Use an upper bound. 
-     */ 
-    size_t argsize = strlen(arg); 
-#else 
-    size_t argsize = mbstowcs(NULL, arg, 0); 
-#endif 
-    size_t count; 
-    unsigned char *in; 
-    wchar_t *out; 
-#ifdef HAVE_MBRTOWC 
-    mbstate_t mbs; 
-#endif 
-    if (argsize != (size_t)-1) { 
-        res = (wchar_t *)malloc((argsize+1)*sizeof(wchar_t)); 
-        if (!res) 
-            goto oom; 
-        count = mbstowcs(res, arg, argsize+1); 
-        if (count != (size_t)-1) { 
-            wchar_t *tmp; 
-            /* Only use the result if it contains no 
-               surrogate characters. */ 
-            for (tmp = res; *tmp != 0 && 
-                     (*tmp < 0xd800 || *tmp > 0xdfff); tmp++) 
-                ; 
-            if (*tmp == 0) 
-                return res; 
-        } 
-        free(res); 
-    } 
-    /* Conversion failed. Fall back to escaping with surrogateescape. */ 
-#ifdef HAVE_MBRTOWC 
-    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ 
- 
-    /* Overallocate; as multi-byte characters are in the argument, the 
-       actual output could use less memory. */ 
-    argsize = strlen(arg) + 1; 
+    return 0;
+}
+
+
+#if PY_MAJOR_VERSION >= 3 && !defined(WIN32) && !defined(MS_WINDOWS)
+#include <locale.h>
+
+static wchar_t*
+__Pyx_char2wchar(char* arg)
+{
+    wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+    /* Some platforms have a broken implementation of
+     * mbstowcs which does not count the characters that
+     * would result from conversion.  Use an upper bound.
+     */
+    size_t argsize = strlen(arg);
+#else
+    size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+    size_t count;
+    unsigned char *in;
+    wchar_t *out;
+#ifdef HAVE_MBRTOWC
+    mbstate_t mbs;
+#endif
+    if (argsize != (size_t)-1) {
+        res = (wchar_t *)malloc((argsize+1)*sizeof(wchar_t));
+        if (!res)
+            goto oom;
+        count = mbstowcs(res, arg, argsize+1);
+        if (count != (size_t)-1) {
+            wchar_t *tmp;
+            /* Only use the result if it contains no
+               surrogate characters. */
+            for (tmp = res; *tmp != 0 &&
+                     (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+                ;
+            if (*tmp == 0)
+                return res;
+        }
+        free(res);
+    }
+    /* Conversion failed. Fall back to escaping with surrogateescape. */
+#ifdef HAVE_MBRTOWC
+    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+
+    /* Overallocate; as multi-byte characters are in the argument, the
+       actual output could use less memory. */
+    argsize = strlen(arg) + 1;
     res = (wchar_t *)malloc(argsize*sizeof(wchar_t));
-    if (!res) goto oom; 
-    in = (unsigned char*)arg; 
-    out = res; 
-    memset(&mbs, 0, sizeof mbs); 
-    while (argsize) { 
-        size_t converted = mbrtowc(out, (char*)in, argsize, &mbs); 
-        if (converted == 0) 
-            /* Reached end of string; null char stored. */ 
-            break; 
-        if (converted == (size_t)-2) { 
-            /* Incomplete character. This should never happen, 
-               since we provide everything that we have - 
-               unless there is a bug in the C library, or I 
-               misunderstood how mbrtowc works. */ 
-            fprintf(stderr, "unexpected mbrtowc result -2\\n"); 
+    if (!res) goto oom;
+    in = (unsigned char*)arg;
+    out = res;
+    memset(&mbs, 0, sizeof mbs);
+    while (argsize) {
+        size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+        if (converted == 0)
+            /* Reached end of string; null char stored. */
+            break;
+        if (converted == (size_t)-2) {
+            /* Incomplete character. This should never happen,
+               since we provide everything that we have -
+               unless there is a bug in the C library, or I
+               misunderstood how mbrtowc works. */
+            fprintf(stderr, "unexpected mbrtowc result -2\\n");
             free(res);
-            return NULL; 
-        } 
-        if (converted == (size_t)-1) { 
-            /* Conversion error. Escape as UTF-8b, and start over 
-               in the initial shift state. */ 
-            *out++ = 0xdc00 + *in++; 
-            argsize--; 
-            memset(&mbs, 0, sizeof mbs); 
-            continue; 
-        } 
-        if (*out >= 0xd800 && *out <= 0xdfff) { 
-            /* Surrogate character.  Escape the original 
-               byte sequence with surrogateescape. */ 
-            argsize -= converted; 
-            while (converted--) 
-                *out++ = 0xdc00 + *in++; 
-            continue; 
-        } 
-        /* successfully converted some bytes */ 
-        in += converted; 
-        argsize -= converted; 
-        out++; 
-    } 
-#else 
-    /* Cannot use C locale for escaping; manually escape as if charset 
-       is ASCII (i.e. escape all bytes > 128. This will still roundtrip 
-       correctly in the locale's charset, which must be an ASCII superset. */ 
+            return NULL;
+        }
+        if (converted == (size_t)-1) {
+            /* Conversion error. Escape as UTF-8b, and start over
+               in the initial shift state. */
+            *out++ = 0xdc00 + *in++;
+            argsize--;
+            memset(&mbs, 0, sizeof mbs);
+            continue;
+        }
+        if (*out >= 0xd800 && *out <= 0xdfff) {
+            /* Surrogate character.  Escape the original
+               byte sequence with surrogateescape. */
+            argsize -= converted;
+            while (converted--)
+                *out++ = 0xdc00 + *in++;
+            continue;
+        }
+        /* successfully converted some bytes */
+        in += converted;
+        argsize -= converted;
+        out++;
+    }
+#else
+    /* Cannot use C locale for escaping; manually escape as if charset
+       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+       correctly in the locale's charset, which must be an ASCII superset. */
     res = (wchar_t *)malloc((strlen(arg)+1)*sizeof(wchar_t));
-    if (!res) goto oom; 
-    in = (unsigned char*)arg; 
-    out = res; 
-    while(*in) 
-        if(*in < 128) 
-            *out++ = *in++; 
-        else 
-            *out++ = 0xdc00 + *in++; 
-    *out = 0; 
-#endif 
-    return res; 
-oom: 
-    fprintf(stderr, "out of memory\\n"); 
-    return NULL; 
-} 
- 
-int 
-%(main_method)s(int argc, char **argv) 
-{ 
-    if (!argc) { 
-        return __Pyx_main(0, NULL); 
-    } 
-    else { 
+    if (!res) goto oom;
+    in = (unsigned char*)arg;
+    out = res;
+    while(*in)
+        if(*in < 128)
+            *out++ = *in++;
+        else
+            *out++ = 0xdc00 + *in++;
+    *out = 0;
+#endif
+    return res;
+oom:
+    fprintf(stderr, "out of memory\\n");
+    return NULL;
+}
+
+int
+%(main_method)s(int argc, char **argv)
+{
+    if (!argc) {
+        return __Pyx_main(0, NULL);
+    }
+    else {
         int i, res;
-        wchar_t **argv_copy = (wchar_t **)malloc(sizeof(wchar_t*)*argc); 
+        wchar_t **argv_copy = (wchar_t **)malloc(sizeof(wchar_t*)*argc);
         /* We need a second copy, as Python might modify the first one. */
-        wchar_t **argv_copy2 = (wchar_t **)malloc(sizeof(wchar_t*)*argc); 
+        wchar_t **argv_copy2 = (wchar_t **)malloc(sizeof(wchar_t*)*argc);
         char *oldloc = strdup(setlocale(LC_ALL, NULL));
         if (!argv_copy || !argv_copy2 || !oldloc) {
-            fprintf(stderr, "out of memory\\n"); 
+            fprintf(stderr, "out of memory\\n");
             free(argv_copy);
             free(argv_copy2);
             free(oldloc);
-            return 1; 
-        } 
+            return 1;
+        }
         res = 0;
-        setlocale(LC_ALL, ""); 
-        for (i = 0; i < argc; i++) { 
-            argv_copy2[i] = argv_copy[i] = __Pyx_char2wchar(argv[i]); 
+        setlocale(LC_ALL, "");
+        for (i = 0; i < argc; i++) {
+            argv_copy2[i] = argv_copy[i] = __Pyx_char2wchar(argv[i]);
             if (!argv_copy[i]) res = 1;  /* failure, but continue to simplify cleanup */
-        } 
-        setlocale(LC_ALL, oldloc); 
-        free(oldloc); 
+        }
+        setlocale(LC_ALL, oldloc);
+        free(oldloc);
         if (res == 0)
             res = __Pyx_main(argc, argv_copy);
-        for (i = 0; i < argc; i++) { 
+        for (i = 0; i < argc; i++) {
 #if PY_VERSION_HEX < 0x03050000
-            free(argv_copy2[i]); 
+            free(argv_copy2[i]);
 #else
             PyMem_RawFree(argv_copy2[i]);
 #endif
-        } 
-        free(argv_copy); 
-        free(argv_copy2); 
-        return res; 
-    } 
-} 
-#endif 
+        }
+        free(argv_copy);
+        free(argv_copy2);
+        return res;
+    }
+}
+#endif
diff --git a/contrib/tools/cython/Cython/Utility/Exceptions.c b/contrib/tools/cython/Cython/Utility/Exceptions.c
index e2961f1992..b0411f6956 100644
--- a/contrib/tools/cython/Cython/Utility/Exceptions.c
+++ b/contrib/tools/cython/Cython/Utility/Exceptions.c
@@ -1,10 +1,10 @@
-// Exception raising code 
-// 
-// Exceptions are raised by __Pyx_Raise() and stored as plain 
-// type/value/tb in PyThreadState->curexc_*.  When being caught by an 
-// 'except' statement, curexc_* is moved over to exc_* by 
-// __Pyx_GetException() 
- 
+// Exception raising code
+//
+// Exceptions are raised by __Pyx_Raise() and stored as plain
+// type/value/tb in PyThreadState->curexc_*.  When being caught by an
+// 'except' statement, curexc_* is moved over to exc_* by
+// __Pyx_GetException()
+
 
 /////////////// PyThreadStateGet.proto ///////////////
 //@substitute: naming
@@ -58,10 +58,10 @@ static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tsta
 }
 #endif
 
-/////////////// PyErrFetchRestore.proto /////////////// 
+/////////////// PyErrFetchRestore.proto ///////////////
 //@substitute: naming
 //@requires: PyThreadStateGet
- 
+
 #if CYTHON_FAST_THREAD_STATE
 #define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
 #define __Pyx_ErrRestoreWithState(type, value, tb)  __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
@@ -70,7 +70,7 @@ static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tsta
 #define __Pyx_ErrFetch(type, value, tb)    __Pyx_ErrFetchInState($local_tstate_cname, type, value, tb)
 static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); /*proto*/
 static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); /*proto*/
- 
+
 #if CYTHON_COMPILING_IN_CPYTHON
 #define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
 #else
@@ -88,133 +88,133 @@ static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject
 #define __Pyx_ErrFetch(type, value, tb)  PyErr_Fetch(type, value, tb)
 #endif
 
-/////////////// PyErrFetchRestore /////////////// 
- 
+/////////////// PyErrFetchRestore ///////////////
+
 #if CYTHON_FAST_THREAD_STATE
 static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
-    PyObject *tmp_type, *tmp_value, *tmp_tb; 
-    tmp_type = tstate->curexc_type; 
-    tmp_value = tstate->curexc_value; 
-    tmp_tb = tstate->curexc_traceback; 
-    tstate->curexc_type = type; 
-    tstate->curexc_value = value; 
-    tstate->curexc_traceback = tb; 
-    Py_XDECREF(tmp_type); 
-    Py_XDECREF(tmp_value); 
-    Py_XDECREF(tmp_tb); 
-} 
- 
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    tmp_type = tstate->curexc_type;
+    tmp_value = tstate->curexc_value;
+    tmp_tb = tstate->curexc_traceback;
+    tstate->curexc_type = type;
+    tstate->curexc_value = value;
+    tstate->curexc_traceback = tb;
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+}
+
 static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
-    *type = tstate->curexc_type; 
-    *value = tstate->curexc_value; 
-    *tb = tstate->curexc_traceback; 
-    tstate->curexc_type = 0; 
-    tstate->curexc_value = 0; 
-    tstate->curexc_traceback = 0; 
+    *type = tstate->curexc_type;
+    *value = tstate->curexc_value;
+    *tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
 }
-#endif 
- 
-/////////////// RaiseException.proto /////////////// 
- 
-static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/ 
- 
-/////////////// RaiseException /////////////// 
-//@requires: PyErrFetchRestore 
+#endif
+
+/////////////// RaiseException.proto ///////////////
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/
+
+/////////////// RaiseException ///////////////
+//@requires: PyErrFetchRestore
 //@requires: PyThreadStateGet
- 
-// The following function is based on do_raise() from ceval.c. There 
-// are separate versions for Python2 and Python3 as exception handling 
-// has changed quite a lot between the two versions. 
- 
-#if PY_MAJOR_VERSION < 3 
-static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, 
-                        CYTHON_UNUSED PyObject *cause) { 
+
+// The following function is based on do_raise() from ceval.c. There
+// are separate versions for Python2 and Python3 as exception handling
+// has changed quite a lot between the two versions.
+
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+                        CYTHON_UNUSED PyObject *cause) {
     __Pyx_PyThreadState_declare
-    /* 'cause' is only used in Py3 */ 
-    Py_XINCREF(type); 
-    if (!value || value == Py_None) 
-        value = NULL; 
-    else 
-        Py_INCREF(value); 
- 
-    if (!tb || tb == Py_None) 
-        tb = NULL; 
-    else { 
-        Py_INCREF(tb); 
-        if (!PyTraceBack_Check(tb)) { 
-            PyErr_SetString(PyExc_TypeError, 
-                "raise: arg 3 must be a traceback or None"); 
-            goto raise_error; 
-        } 
-    } 
- 
-    if (PyType_Check(type)) { 
-        /* instantiate the type now (we don't know when and how it will be caught) */ 
-#if CYTHON_COMPILING_IN_PYPY 
-        /* PyPy can't handle value == NULL */ 
-        if (!value) { 
-            Py_INCREF(Py_None); 
-            value = Py_None; 
-        } 
-#endif 
-        PyErr_NormalizeException(&type, &value, &tb); 
- 
-    } else { 
-        /* Raising an instance.  The value should be a dummy. */ 
-        if (value) { 
-            PyErr_SetString(PyExc_TypeError, 
-                "instance exception may not have a separate value"); 
-            goto raise_error; 
-        } 
-        /* Normalize to raise <class>, <instance> */ 
-        value = type; 
-        type = (PyObject*) Py_TYPE(type); 
-        Py_INCREF(type); 
-        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { 
-            PyErr_SetString(PyExc_TypeError, 
-                "raise: exception class must be a subclass of BaseException"); 
-            goto raise_error; 
-        } 
-    } 
- 
+    /* 'cause' is only used in Py3 */
+    Py_XINCREF(type);
+    if (!value || value == Py_None)
+        value = NULL;
+    else
+        Py_INCREF(value);
+
+    if (!tb || tb == Py_None)
+        tb = NULL;
+    else {
+        Py_INCREF(tb);
+        if (!PyTraceBack_Check(tb)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: arg 3 must be a traceback or None");
+            goto raise_error;
+        }
+    }
+
+    if (PyType_Check(type)) {
+        /* instantiate the type now (we don't know when and how it will be caught) */
+#if CYTHON_COMPILING_IN_PYPY
+        /* PyPy can't handle value == NULL */
+        if (!value) {
+            Py_INCREF(Py_None);
+            value = Py_None;
+        }
+#endif
+        PyErr_NormalizeException(&type, &value, &tb);
+
+    } else {
+        /* Raising an instance.  The value should be a dummy. */
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        /* Normalize to raise <class>, <instance> */
+        value = type;
+        type = (PyObject*) Py_TYPE(type);
+        Py_INCREF(type);
+        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: exception class must be a subclass of BaseException");
+            goto raise_error;
+        }
+    }
+
     __Pyx_PyThreadState_assign
-    __Pyx_ErrRestore(type, value, tb); 
-    return; 
-raise_error: 
-    Py_XDECREF(value); 
-    Py_XDECREF(type); 
-    Py_XDECREF(tb); 
-    return; 
-} 
- 
-#else /* Python 3+ */ 
- 
-static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { 
-    PyObject* owned_instance = NULL; 
-    if (tb == Py_None) { 
-        tb = 0; 
-    } else if (tb && !PyTraceBack_Check(tb)) { 
-        PyErr_SetString(PyExc_TypeError, 
-            "raise: arg 3 must be a traceback or None"); 
-        goto bad; 
-    } 
-    if (value == Py_None) 
-        value = 0; 
- 
-    if (PyExceptionInstance_Check(type)) { 
-        if (value) { 
-            PyErr_SetString(PyExc_TypeError, 
-                "instance exception may not have a separate value"); 
-            goto bad; 
-        } 
-        value = type; 
-        type = (PyObject*) Py_TYPE(value); 
-    } else if (PyExceptionClass_Check(type)) { 
-        // make sure value is an exception instance of type 
-        PyObject *instance_class = NULL; 
-        if (value && PyExceptionInstance_Check(value)) { 
-            instance_class = (PyObject*) Py_TYPE(value); 
-            if (instance_class != type) { 
+    __Pyx_ErrRestore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+
+#else /* Python 3+ */
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    PyObject* owned_instance = NULL;
+    if (tb == Py_None) {
+        tb = 0;
+    } else if (tb && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto bad;
+    }
+    if (value == Py_None)
+        value = 0;
+
+    if (PyExceptionInstance_Check(type)) {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto bad;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(value);
+    } else if (PyExceptionClass_Check(type)) {
+        // make sure value is an exception instance of type
+        PyObject *instance_class = NULL;
+        if (value && PyExceptionInstance_Check(value)) {
+            instance_class = (PyObject*) Py_TYPE(value);
+            if (instance_class != type) {
                 int is_subclass = PyObject_IsSubclass(instance_class, type);
                 if (!is_subclass) {
                     instance_class = NULL;
@@ -222,68 +222,68 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject
                     // error on subclass test
                     goto bad;
                 } else {
-                    // believe the instance 
-                    type = instance_class; 
-                } 
-            } 
-        } 
-        if (!instance_class) { 
-            // instantiate the type now (we don't know when and how it will be caught) 
-            // assuming that 'value' is an argument to the type's constructor 
-            // not using PyErr_NormalizeException() to avoid ref-counting problems 
-            PyObject *args; 
-            if (!value) 
-                args = PyTuple_New(0); 
-            else if (PyTuple_Check(value)) { 
-                Py_INCREF(value); 
-                args = value; 
-            } else 
-                args = PyTuple_Pack(1, value); 
-            if (!args) 
-                goto bad; 
-            owned_instance = PyObject_Call(type, args, NULL); 
-            Py_DECREF(args); 
-            if (!owned_instance) 
-                goto bad; 
-            value = owned_instance; 
-            if (!PyExceptionInstance_Check(value)) { 
-                PyErr_Format(PyExc_TypeError, 
-                             "calling %R should have returned an instance of " 
-                             "BaseException, not %R", 
-                             type, Py_TYPE(value)); 
-                goto bad; 
-            } 
-        } 
-    } else { 
-        PyErr_SetString(PyExc_TypeError, 
-            "raise: exception class must be a subclass of BaseException"); 
-        goto bad; 
-    } 
- 
-    if (cause) { 
-        PyObject *fixed_cause; 
-        if (cause == Py_None) { 
-            // raise ... from None 
-            fixed_cause = NULL; 
-        } else if (PyExceptionClass_Check(cause)) { 
-            fixed_cause = PyObject_CallObject(cause, NULL); 
-            if (fixed_cause == NULL) 
-                goto bad; 
-        } else if (PyExceptionInstance_Check(cause)) { 
-            fixed_cause = cause; 
-            Py_INCREF(fixed_cause); 
-        } else { 
-            PyErr_SetString(PyExc_TypeError, 
-                            "exception causes must derive from " 
-                            "BaseException"); 
-            goto bad; 
-        } 
-        PyException_SetCause(value, fixed_cause); 
-    } 
- 
-    PyErr_SetObject(type, value); 
- 
-    if (tb) { 
+                    // believe the instance
+                    type = instance_class;
+                }
+            }
+        }
+        if (!instance_class) {
+            // instantiate the type now (we don't know when and how it will be caught)
+            // assuming that 'value' is an argument to the type's constructor
+            // not using PyErr_NormalizeException() to avoid ref-counting problems
+            PyObject *args;
+            if (!value)
+                args = PyTuple_New(0);
+            else if (PyTuple_Check(value)) {
+                Py_INCREF(value);
+                args = value;
+            } else
+                args = PyTuple_Pack(1, value);
+            if (!args)
+                goto bad;
+            owned_instance = PyObject_Call(type, args, NULL);
+            Py_DECREF(args);
+            if (!owned_instance)
+                goto bad;
+            value = owned_instance;
+            if (!PyExceptionInstance_Check(value)) {
+                PyErr_Format(PyExc_TypeError,
+                             "calling %R should have returned an instance of "
+                             "BaseException, not %R",
+                             type, Py_TYPE(value));
+                goto bad;
+            }
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: exception class must be a subclass of BaseException");
+        goto bad;
+    }
+
+    if (cause) {
+        PyObject *fixed_cause;
+        if (cause == Py_None) {
+            // raise ... from None
+            fixed_cause = NULL;
+        } else if (PyExceptionClass_Check(cause)) {
+            fixed_cause = PyObject_CallObject(cause, NULL);
+            if (fixed_cause == NULL)
+                goto bad;
+        } else if (PyExceptionInstance_Check(cause)) {
+            fixed_cause = cause;
+            Py_INCREF(fixed_cause);
+        } else {
+            PyErr_SetString(PyExc_TypeError,
+                            "exception causes must derive from "
+                            "BaseException");
+            goto bad;
+        }
+        PyException_SetCause(value, fixed_cause);
+    }
+
+    PyErr_SetObject(type, value);
+
+    if (tb) {
 #if CYTHON_COMPILING_IN_PYPY
         PyObject *tmp_type, *tmp_value, *tmp_tb;
         PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
@@ -292,21 +292,21 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject
         Py_XDECREF(tmp_tb);
 #else
         PyThreadState *tstate = __Pyx_PyThreadState_Current;
-        PyObject* tmp_tb = tstate->curexc_traceback; 
-        if (tb != tmp_tb) { 
-            Py_INCREF(tb); 
-            tstate->curexc_traceback = tb; 
-            Py_XDECREF(tmp_tb); 
-        } 
+        PyObject* tmp_tb = tstate->curexc_traceback;
+        if (tb != tmp_tb) {
+            Py_INCREF(tb);
+            tstate->curexc_traceback = tb;
+            Py_XDECREF(tmp_tb);
+        }
+#endif
+    }
+
+bad:
+    Py_XDECREF(owned_instance);
+    return;
+}
 #endif
-    } 
- 
-bad: 
-    Py_XDECREF(owned_instance); 
-    return; 
-} 
-#endif 
- 
+
 
 /////////////// GetTopmostException.proto ///////////////
 
@@ -332,58 +332,58 @@ __Pyx_PyErr_GetTopmostException(PyThreadState *tstate)
 #endif
 
 
-/////////////// GetException.proto /////////////// 
+/////////////// GetException.proto ///////////////
 //@substitute: naming
 //@requires: PyThreadStateGet
- 
+
 #if CYTHON_FAST_THREAD_STATE
 #define __Pyx_GetException(type, value, tb)  __Pyx__GetException($local_tstate_cname, type, value, tb)
 static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 #else
-static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); /*proto*/ 
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 #endif
- 
-/////////////// GetException /////////////// 
- 
+
+/////////////// GetException ///////////////
+
 #if CYTHON_FAST_THREAD_STATE
 static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb)
 #else
 static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb)
 #endif
 {
-    PyObject *local_type, *local_value, *local_tb; 
+    PyObject *local_type, *local_value, *local_tb;
 #if CYTHON_FAST_THREAD_STATE
-    PyObject *tmp_type, *tmp_value, *tmp_tb; 
-    local_type = tstate->curexc_type; 
-    local_value = tstate->curexc_value; 
-    local_tb = tstate->curexc_traceback; 
-    tstate->curexc_type = 0; 
-    tstate->curexc_value = 0; 
-    tstate->curexc_traceback = 0; 
-#else 
-    PyErr_Fetch(&local_type, &local_value, &local_tb); 
-#endif 
-    PyErr_NormalizeException(&local_type, &local_value, &local_tb); 
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    local_type = tstate->curexc_type;
+    local_value = tstate->curexc_value;
+    local_tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+#else
+    PyErr_Fetch(&local_type, &local_value, &local_tb);
+#endif
+    PyErr_NormalizeException(&local_type, &local_value, &local_tb);
 #if CYTHON_FAST_THREAD_STATE
-    if (unlikely(tstate->curexc_type)) 
-#else 
-    if (unlikely(PyErr_Occurred())) 
-#endif 
-        goto bad; 
-    #if PY_MAJOR_VERSION >= 3 
-    if (local_tb) { 
-        if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) 
-            goto bad; 
-    } 
-    #endif 
-    // traceback may be NULL for freshly raised exceptions 
-    Py_XINCREF(local_tb); 
-    // exception state may be temporarily empty in parallel loops (race condition) 
-    Py_XINCREF(local_type); 
-    Py_XINCREF(local_value); 
-    *type = local_type; 
-    *value = local_value; 
-    *tb = local_tb; 
+    if (unlikely(tstate->curexc_type))
+#else
+    if (unlikely(PyErr_Occurred()))
+#endif
+        goto bad;
+    #if PY_MAJOR_VERSION >= 3
+    if (local_tb) {
+        if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0))
+            goto bad;
+    }
+    #endif
+    // traceback may be NULL for freshly raised exceptions
+    Py_XINCREF(local_tb);
+    // exception state may be temporarily empty in parallel loops (race condition)
+    Py_XINCREF(local_type);
+    Py_XINCREF(local_value);
+    *type = local_type;
+    *value = local_value;
+    *tb = local_tb;
 #if CYTHON_FAST_THREAD_STATE
     #if CYTHON_USE_EXC_INFO_STACK
     {
@@ -396,95 +396,95 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb)
         exc_info->exc_traceback = local_tb;
     }
     #else
-    tmp_type = tstate->exc_type; 
-    tmp_value = tstate->exc_value; 
-    tmp_tb = tstate->exc_traceback; 
-    tstate->exc_type = local_type; 
-    tstate->exc_value = local_value; 
-    tstate->exc_traceback = local_tb; 
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = local_type;
+    tstate->exc_value = local_value;
+    tstate->exc_traceback = local_tb;
     #endif
-    // Make sure tstate is in a consistent state when we XDECREF 
-    // these objects (DECREF may run arbitrary code). 
-    Py_XDECREF(tmp_type); 
-    Py_XDECREF(tmp_value); 
-    Py_XDECREF(tmp_tb); 
-#else 
-    PyErr_SetExcInfo(local_type, local_value, local_tb); 
-#endif 
-    return 0; 
-bad: 
-    *type = 0; 
-    *value = 0; 
-    *tb = 0; 
-    Py_XDECREF(local_type); 
-    Py_XDECREF(local_value); 
-    Py_XDECREF(local_tb); 
-    return -1; 
-} 
- 
-/////////////// ReRaiseException.proto /////////////// 
- 
-static CYTHON_INLINE void __Pyx_ReraiseException(void); /*proto*/ 
- 
+    // Make sure tstate is in a consistent state when we XDECREF
+    // these objects (DECREF may run arbitrary code).
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+#else
+    PyErr_SetExcInfo(local_type, local_value, local_tb);
+#endif
+    return 0;
+bad:
+    *type = 0;
+    *value = 0;
+    *tb = 0;
+    Py_XDECREF(local_type);
+    Py_XDECREF(local_value);
+    Py_XDECREF(local_tb);
+    return -1;
+}
+
+/////////////// ReRaiseException.proto ///////////////
+
+static CYTHON_INLINE void __Pyx_ReraiseException(void); /*proto*/
+
 /////////////// ReRaiseException ///////////////
 //@requires: GetTopmostException
- 
-static CYTHON_INLINE void __Pyx_ReraiseException(void) { 
-    PyObject *type = NULL, *value = NULL, *tb = NULL; 
+
+static CYTHON_INLINE void __Pyx_ReraiseException(void) {
+    PyObject *type = NULL, *value = NULL, *tb = NULL;
 #if CYTHON_FAST_THREAD_STATE
-    PyThreadState *tstate = PyThreadState_GET(); 
+    PyThreadState *tstate = PyThreadState_GET();
     #if CYTHON_USE_EXC_INFO_STACK
     _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate);
     type = exc_info->exc_type;
     value = exc_info->exc_value;
     tb = exc_info->exc_traceback;
     #else
-    type = tstate->exc_type; 
-    value = tstate->exc_value; 
-    tb = tstate->exc_traceback; 
+    type = tstate->exc_type;
+    value = tstate->exc_value;
+    tb = tstate->exc_traceback;
     #endif
-#else 
-    PyErr_GetExcInfo(&type, &value, &tb); 
-#endif 
-    if (!type || type == Py_None) { 
+#else
+    PyErr_GetExcInfo(&type, &value, &tb);
+#endif
+    if (!type || type == Py_None) {
 #if !CYTHON_FAST_THREAD_STATE
-        Py_XDECREF(type); 
-        Py_XDECREF(value); 
-        Py_XDECREF(tb); 
-#endif 
-        // message copied from Py3 
-        PyErr_SetString(PyExc_RuntimeError, 
-            "No active exception to reraise"); 
-    } else { 
+        Py_XDECREF(type);
+        Py_XDECREF(value);
+        Py_XDECREF(tb);
+#endif
+        // message copied from Py3
+        PyErr_SetString(PyExc_RuntimeError,
+            "No active exception to reraise");
+    } else {
 #if CYTHON_FAST_THREAD_STATE
-        Py_INCREF(type); 
-        Py_XINCREF(value); 
-        Py_XINCREF(tb); 
- 
-#endif 
-        PyErr_Restore(type, value, tb); 
-    } 
-} 
- 
-/////////////// SaveResetException.proto /////////////// 
+        Py_INCREF(type);
+        Py_XINCREF(value);
+        Py_XINCREF(tb);
+
+#endif
+        PyErr_Restore(type, value, tb);
+    }
+}
+
+/////////////// SaveResetException.proto ///////////////
 //@substitute: naming
 //@requires: PyThreadStateGet
- 
+
 #if CYTHON_FAST_THREAD_STATE
 #define __Pyx_ExceptionSave(type, value, tb)  __Pyx__ExceptionSave($local_tstate_cname, type, value, tb)
 static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 #define __Pyx_ExceptionReset(type, value, tb)  __Pyx__ExceptionReset($local_tstate_cname, type, value, tb)
 static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); /*proto*/
- 
+
 #else
 
 #define __Pyx_ExceptionSave(type, value, tb)   PyErr_GetExcInfo(type, value, tb)
 #define __Pyx_ExceptionReset(type, value, tb)  PyErr_SetExcInfo(type, value, tb)
 #endif
 
-/////////////// SaveResetException /////////////// 
+/////////////// SaveResetException ///////////////
 //@requires: GetTopmostException
- 
+
 #if CYTHON_FAST_THREAD_STATE
 static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
     #if CYTHON_USE_EXC_INFO_STACK
@@ -493,17 +493,17 @@ static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject *
     *value = exc_info->exc_value;
     *tb = exc_info->exc_traceback;
     #else
-    *type = tstate->exc_type; 
-    *value = tstate->exc_value; 
-    *tb = tstate->exc_traceback; 
+    *type = tstate->exc_type;
+    *value = tstate->exc_value;
+    *tb = tstate->exc_traceback;
     #endif
-    Py_XINCREF(*type); 
-    Py_XINCREF(*value); 
-    Py_XINCREF(*tb); 
-} 
- 
+    Py_XINCREF(*type);
+    Py_XINCREF(*value);
+    Py_XINCREF(*tb);
+}
+
 static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
-    PyObject *tmp_type, *tmp_value, *tmp_tb; 
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
 
     #if CYTHON_USE_EXC_INFO_STACK
     _PyErr_StackItem *exc_info = tstate->exc_info;
@@ -514,35 +514,35 @@ static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject
     exc_info->exc_value = value;
     exc_info->exc_traceback = tb;
     #else
-    tmp_type = tstate->exc_type; 
-    tmp_value = tstate->exc_value; 
-    tmp_tb = tstate->exc_traceback; 
-    tstate->exc_type = type; 
-    tstate->exc_value = value; 
-    tstate->exc_traceback = tb; 
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = type;
+    tstate->exc_value = value;
+    tstate->exc_traceback = tb;
     #endif
-    Py_XDECREF(tmp_type); 
-    Py_XDECREF(tmp_value); 
-    Py_XDECREF(tmp_tb); 
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
 }
-#endif 
- 
-/////////////// SwapException.proto /////////////// 
+#endif
+
+/////////////// SwapException.proto ///////////////
 //@substitute: naming
 //@requires: PyThreadStateGet
- 
+
 #if CYTHON_FAST_THREAD_STATE
 #define __Pyx_ExceptionSwap(type, value, tb)  __Pyx__ExceptionSwap($local_tstate_cname, type, value, tb)
 static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 #else
-static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); /*proto*/ 
+static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
 #endif
- 
-/////////////// SwapException /////////////// 
- 
+
+/////////////// SwapException ///////////////
+
 #if CYTHON_FAST_THREAD_STATE
 static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
-    PyObject *tmp_type, *tmp_value, *tmp_tb; 
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
 
     #if CYTHON_USE_EXC_INFO_STACK
     _PyErr_StackItem *exc_info = tstate->exc_info;
@@ -554,13 +554,13 @@ static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject *
     exc_info->exc_value = *value;
     exc_info->exc_traceback = *tb;
     #else
-    tmp_type = tstate->exc_type; 
-    tmp_value = tstate->exc_value; 
-    tmp_tb = tstate->exc_traceback; 
- 
-    tstate->exc_type = *type; 
-    tstate->exc_value = *value; 
-    tstate->exc_traceback = *tb; 
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+
+    tstate->exc_type = *type;
+    tstate->exc_value = *value;
+    tstate->exc_traceback = *tb;
     #endif
 
     *type = tmp_type;
@@ -568,33 +568,33 @@ static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject *
     *tb = tmp_tb;
 }
 
-#else 
+#else
 
 static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) {
     PyObject *tmp_type, *tmp_value, *tmp_tb;
-    PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb); 
-    PyErr_SetExcInfo(*type, *value, *tb); 
-    *type = tmp_type; 
-    *value = tmp_value; 
-    *tb = tmp_tb; 
-} 
+    PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb);
+    PyErr_SetExcInfo(*type, *value, *tb);
+    *type = tmp_type;
+    *value = tmp_value;
+    *tb = tmp_tb;
+}
 #endif
- 
-/////////////// WriteUnraisableException.proto /////////////// 
- 
-static void __Pyx_WriteUnraisable(const char *name, int clineno, 
-                                  int lineno, const char *filename, 
+
+/////////////// WriteUnraisableException.proto ///////////////
+
+static void __Pyx_WriteUnraisable(const char *name, int clineno,
+                                  int lineno, const char *filename,
                                   int full_traceback, int nogil); /*proto*/
- 
-/////////////// WriteUnraisableException /////////////// 
-//@requires: PyErrFetchRestore 
+
+/////////////// WriteUnraisableException ///////////////
+//@requires: PyErrFetchRestore
 //@requires: PyThreadStateGet
- 
-static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno, 
-                                  CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename, 
+
+static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno,
+                                  CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename,
                                   int full_traceback, CYTHON_UNUSED int nogil) {
-    PyObject *old_exc, *old_val, *old_tb; 
-    PyObject *ctx; 
+    PyObject *old_exc, *old_val, *old_tb;
+    PyObject *ctx;
     __Pyx_PyThreadState_declare
 #ifdef WITH_THREAD
     PyGILState_STATE state;
@@ -606,32 +606,32 @@ static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno,
 #endif
 #endif
     __Pyx_PyThreadState_assign
-    __Pyx_ErrFetch(&old_exc, &old_val, &old_tb); 
-    if (full_traceback) { 
-        Py_XINCREF(old_exc); 
-        Py_XINCREF(old_val); 
-        Py_XINCREF(old_tb); 
-        __Pyx_ErrRestore(old_exc, old_val, old_tb); 
-        PyErr_PrintEx(1); 
-    } 
-    #if PY_MAJOR_VERSION < 3 
-    ctx = PyString_FromString(name); 
-    #else 
-    ctx = PyUnicode_FromString(name); 
-    #endif 
-    __Pyx_ErrRestore(old_exc, old_val, old_tb); 
-    if (!ctx) { 
-        PyErr_WriteUnraisable(Py_None); 
-    } else { 
-        PyErr_WriteUnraisable(ctx); 
-        Py_DECREF(ctx); 
-    } 
+    __Pyx_ErrFetch(&old_exc, &old_val, &old_tb);
+    if (full_traceback) {
+        Py_XINCREF(old_exc);
+        Py_XINCREF(old_val);
+        Py_XINCREF(old_tb);
+        __Pyx_ErrRestore(old_exc, old_val, old_tb);
+        PyErr_PrintEx(1);
+    }
+    #if PY_MAJOR_VERSION < 3
+    ctx = PyString_FromString(name);
+    #else
+    ctx = PyUnicode_FromString(name);
+    #endif
+    __Pyx_ErrRestore(old_exc, old_val, old_tb);
+    if (!ctx) {
+        PyErr_WriteUnraisable(Py_None);
+    } else {
+        PyErr_WriteUnraisable(ctx);
+        Py_DECREF(ctx);
+    }
 #ifdef WITH_THREAD
     if (nogil)
         PyGILState_Release(state);
 #endif
-} 
- 
+}
+
 /////////////// CLineInTraceback.proto ///////////////
 
 #ifdef CYTHON_CLINE_IN_TRACEBACK  /* 0 or 1 to disable/enable C line display in tracebacks at C compile time */
@@ -692,109 +692,109 @@ static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int
 }
 #endif
 
-/////////////// AddTraceback.proto /////////////// 
- 
-static void __Pyx_AddTraceback(const char *funcname, int c_line, 
-                               int py_line, const char *filename); /*proto*/ 
- 
-/////////////// AddTraceback /////////////// 
-//@requires: ModuleSetupCode.c::CodeObjectCache 
+/////////////// AddTraceback.proto ///////////////
+
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename); /*proto*/
+
+/////////////// AddTraceback ///////////////
+//@requires: ModuleSetupCode.c::CodeObjectCache
 //@requires: CLineInTraceback
-//@substitute: naming 
- 
-#include "compile.h" 
-#include "frameobject.h" 
-#include "traceback.h" 
- 
-static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( 
-            const char *funcname, int c_line, 
-            int py_line, const char *filename) { 
+//@substitute: naming
+
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+            const char *funcname, int c_line,
+            int py_line, const char *filename) {
     PyCodeObject *py_code = NULL;
     PyObject *py_funcname = NULL;
     #if PY_MAJOR_VERSION < 3
     PyObject *py_srcfile = NULL;
- 
-    py_srcfile = PyString_FromString(filename); 
+
+    py_srcfile = PyString_FromString(filename);
     if (!py_srcfile) goto bad;
-    #endif 
+    #endif
 
-    if (c_line) { 
-        #if PY_MAJOR_VERSION < 3 
-        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, $cfilenm_cname, c_line); 
+    if (c_line) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, $cfilenm_cname, c_line);
         if (!py_funcname) goto bad;
-        #else 
-        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, $cfilenm_cname, c_line); 
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, $cfilenm_cname, c_line);
         if (!py_funcname) goto bad;
         funcname = PyUnicode_AsUTF8(py_funcname);
         if (!funcname) goto bad;
-        #endif 
-    } 
-    else { 
-        #if PY_MAJOR_VERSION < 3 
-        py_funcname = PyString_FromString(funcname); 
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
         if (!py_funcname) goto bad;
-        #endif 
-    } 
+        #endif
+    }
     #if PY_MAJOR_VERSION < 3
-    py_code = __Pyx_PyCode_New( 
-        0,            /*int argcount,*/ 
-        0,            /*int kwonlyargcount,*/ 
-        0,            /*int nlocals,*/ 
-        0,            /*int stacksize,*/ 
-        0,            /*int flags,*/ 
-        $empty_bytes, /*PyObject *code,*/ 
-        $empty_tuple, /*PyObject *consts,*/ 
-        $empty_tuple, /*PyObject *names,*/ 
-        $empty_tuple, /*PyObject *varnames,*/ 
-        $empty_tuple, /*PyObject *freevars,*/ 
-        $empty_tuple, /*PyObject *cellvars,*/ 
-        py_srcfile,   /*PyObject *filename,*/ 
-        py_funcname,  /*PyObject *name,*/ 
-        py_line,      /*int firstlineno,*/ 
-        $empty_bytes  /*PyObject *lnotab*/ 
-    ); 
-    Py_DECREF(py_srcfile); 
+    py_code = __Pyx_PyCode_New(
+        0,            /*int argcount,*/
+        0,            /*int kwonlyargcount,*/
+        0,            /*int nlocals,*/
+        0,            /*int stacksize,*/
+        0,            /*int flags,*/
+        $empty_bytes, /*PyObject *code,*/
+        $empty_tuple, /*PyObject *consts,*/
+        $empty_tuple, /*PyObject *names,*/
+        $empty_tuple, /*PyObject *varnames,*/
+        $empty_tuple, /*PyObject *freevars,*/
+        $empty_tuple, /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        py_line,      /*int firstlineno,*/
+        $empty_bytes  /*PyObject *lnotab*/
+    );
+    Py_DECREF(py_srcfile);
     #else
     py_code = PyCode_NewEmpty(filename, funcname, py_line);
     #endif
     Py_XDECREF(py_funcname);  // XDECREF since it's only set on Py3 if cline
-    return py_code; 
-bad: 
+    return py_code;
+bad:
     Py_XDECREF(py_funcname);
     #if PY_MAJOR_VERSION < 3
-    Py_XDECREF(py_srcfile); 
+    Py_XDECREF(py_srcfile);
     #endif
-    return NULL; 
-} 
- 
-static void __Pyx_AddTraceback(const char *funcname, int c_line, 
-                               int py_line, const char *filename) { 
-    PyCodeObject *py_code = 0; 
-    PyFrameObject *py_frame = 0; 
+    return NULL;
+}
+
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
     PyThreadState *tstate = __Pyx_PyThreadState_Current;
- 
+
     if (c_line) {
         c_line = __Pyx_CLineForTraceback(tstate, c_line);
     }
 
     // Negate to avoid collisions between py and c lines.
     py_code = $global_code_object_cache_find(c_line ? -c_line : py_line);
-    if (!py_code) { 
-        py_code = __Pyx_CreateCodeObjectForTraceback( 
-            funcname, c_line, py_line, filename); 
-        if (!py_code) goto bad; 
+    if (!py_code) {
+        py_code = __Pyx_CreateCodeObjectForTraceback(
+            funcname, c_line, py_line, filename);
+        if (!py_code) goto bad;
         $global_code_object_cache_insert(c_line ? -c_line : py_line, py_code);
-    } 
-    py_frame = PyFrame_New( 
+    }
+    py_frame = PyFrame_New(
         tstate,            /*PyThreadState *tstate,*/
         py_code,           /*PyCodeObject *code,*/
         $moddict_cname,    /*PyObject *globals,*/
         0                  /*PyObject *locals*/
-    ); 
-    if (!py_frame) goto bad; 
+    );
+    if (!py_frame) goto bad;
     __Pyx_PyFrame_SetLineNumber(py_frame, py_line);
-    PyTraceBack_Here(py_frame); 
-bad: 
-    Py_XDECREF(py_code); 
-    Py_XDECREF(py_frame); 
-} 
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
diff --git a/contrib/tools/cython/Cython/Utility/ExtensionTypes.c b/contrib/tools/cython/Cython/Utility/ExtensionTypes.c
index 7687d1150d..0d8c41dee1 100644
--- a/contrib/tools/cython/Cython/Utility/ExtensionTypes.c
+++ b/contrib/tools/cython/Cython/Utility/ExtensionTypes.c
@@ -1,5 +1,5 @@
 /////////////// PyType_Ready.proto ///////////////
- 
+
 static int __Pyx_PyType_Ready(PyTypeObject *t);
 
 /////////////// PyType_Ready ///////////////
@@ -119,58 +119,58 @@ static int __Pyx_PyType_Ready(PyTypeObject *t) {
     return r;
 }
 
-/////////////// CallNextTpDealloc.proto /////////////// 
- 
-static void __Pyx_call_next_tp_dealloc(PyObject* obj, destructor current_tp_dealloc); 
- 
-/////////////// CallNextTpDealloc /////////////// 
- 
-static void __Pyx_call_next_tp_dealloc(PyObject* obj, destructor current_tp_dealloc) { 
-    PyTypeObject* type = Py_TYPE(obj); 
-    /* try to find the first parent type that has a different tp_dealloc() function */ 
-    while (type && type->tp_dealloc != current_tp_dealloc) 
-        type = type->tp_base; 
-    while (type && type->tp_dealloc == current_tp_dealloc) 
-        type = type->tp_base; 
-    if (type) 
-        type->tp_dealloc(obj); 
-} 
- 
-/////////////// CallNextTpTraverse.proto /////////////// 
- 
-static int __Pyx_call_next_tp_traverse(PyObject* obj, visitproc v, void *a, traverseproc current_tp_traverse); 
- 
-/////////////// CallNextTpTraverse /////////////// 
- 
-static int __Pyx_call_next_tp_traverse(PyObject* obj, visitproc v, void *a, traverseproc current_tp_traverse) { 
-    PyTypeObject* type = Py_TYPE(obj); 
-    /* try to find the first parent type that has a different tp_traverse() function */ 
-    while (type && type->tp_traverse != current_tp_traverse) 
-        type = type->tp_base; 
-    while (type && type->tp_traverse == current_tp_traverse) 
-        type = type->tp_base; 
-    if (type && type->tp_traverse) 
-        return type->tp_traverse(obj, v, a); 
-    // FIXME: really ignore? 
-    return 0; 
-} 
- 
-/////////////// CallNextTpClear.proto /////////////// 
- 
-static void __Pyx_call_next_tp_clear(PyObject* obj, inquiry current_tp_dealloc); 
- 
-/////////////// CallNextTpClear /////////////// 
- 
-static void __Pyx_call_next_tp_clear(PyObject* obj, inquiry current_tp_clear) { 
-    PyTypeObject* type = Py_TYPE(obj); 
-    /* try to find the first parent type that has a different tp_clear() function */ 
-    while (type && type->tp_clear != current_tp_clear) 
-        type = type->tp_base; 
-    while (type && type->tp_clear == current_tp_clear) 
-        type = type->tp_base; 
-    if (type && type->tp_clear) 
-        type->tp_clear(obj); 
-} 
+/////////////// CallNextTpDealloc.proto ///////////////
+
+static void __Pyx_call_next_tp_dealloc(PyObject* obj, destructor current_tp_dealloc);
+
+/////////////// CallNextTpDealloc ///////////////
+
+static void __Pyx_call_next_tp_dealloc(PyObject* obj, destructor current_tp_dealloc) {
+    PyTypeObject* type = Py_TYPE(obj);
+    /* try to find the first parent type that has a different tp_dealloc() function */
+    while (type && type->tp_dealloc != current_tp_dealloc)
+        type = type->tp_base;
+    while (type && type->tp_dealloc == current_tp_dealloc)
+        type = type->tp_base;
+    if (type)
+        type->tp_dealloc(obj);
+}
+
+/////////////// CallNextTpTraverse.proto ///////////////
+
+static int __Pyx_call_next_tp_traverse(PyObject* obj, visitproc v, void *a, traverseproc current_tp_traverse);
+
+/////////////// CallNextTpTraverse ///////////////
+
+static int __Pyx_call_next_tp_traverse(PyObject* obj, visitproc v, void *a, traverseproc current_tp_traverse) {
+    PyTypeObject* type = Py_TYPE(obj);
+    /* try to find the first parent type that has a different tp_traverse() function */
+    while (type && type->tp_traverse != current_tp_traverse)
+        type = type->tp_base;
+    while (type && type->tp_traverse == current_tp_traverse)
+        type = type->tp_base;
+    if (type && type->tp_traverse)
+        return type->tp_traverse(obj, v, a);
+    // FIXME: really ignore?
+    return 0;
+}
+
+/////////////// CallNextTpClear.proto ///////////////
+
+static void __Pyx_call_next_tp_clear(PyObject* obj, inquiry current_tp_dealloc);
+
+/////////////// CallNextTpClear ///////////////
+
+static void __Pyx_call_next_tp_clear(PyObject* obj, inquiry current_tp_clear) {
+    PyTypeObject* type = Py_TYPE(obj);
+    /* try to find the first parent type that has a different tp_clear() function */
+    while (type && type->tp_clear != current_tp_clear)
+        type = type->tp_base;
+    while (type && type->tp_clear == current_tp_clear)
+        type = type->tp_base;
+    if (type && type->tp_clear)
+        type->tp_clear(obj);
+}
 
 /////////////// SetupReduce.proto ///////////////
 
diff --git a/contrib/tools/cython/Cython/Utility/FunctionArguments.c b/contrib/tools/cython/Cython/Utility/FunctionArguments.c
index ff75ad4426..8333d93666 100644
--- a/contrib/tools/cython/Cython/Utility/FunctionArguments.c
+++ b/contrib/tools/cython/Cython/Utility/FunctionArguments.c
@@ -1,109 +1,109 @@
-//////////////////// ArgTypeTest.proto //////////////////// 
- 
- 
+//////////////////// ArgTypeTest.proto ////////////////////
+
+
 #define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact) \
     ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 : \
         __Pyx__ArgTypeTest(obj, type, name, exact))
 
 static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); /*proto*/
 
-//////////////////// ArgTypeTest //////////////////// 
- 
+//////////////////// ArgTypeTest ////////////////////
+
 static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact)
-{ 
-    if (unlikely(!type)) { 
-        PyErr_SetString(PyExc_SystemError, "Missing type object"); 
-        return 0; 
-    } 
-    else if (exact) { 
-        #if PY_MAJOR_VERSION == 2 
+{
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    else if (exact) {
+        #if PY_MAJOR_VERSION == 2
         if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
-        #endif 
-    } 
-    else { 
+        #endif
+    }
+    else {
         if (likely(__Pyx_TypeCheck(obj, type))) return 1;
-    } 
+    }
     PyErr_Format(PyExc_TypeError,
         "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)",
         name, type->tp_name, Py_TYPE(obj)->tp_name);
-    return 0; 
-} 
- 
-//////////////////// RaiseArgTupleInvalid.proto //////////////////// 
- 
-static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, 
-    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/ 
- 
-//////////////////// RaiseArgTupleInvalid //////////////////// 
- 
-//  __Pyx_RaiseArgtupleInvalid raises the correct exception when too 
-//  many or too few positional arguments were found.  This handles 
-//  Py_ssize_t formatting correctly. 
- 
-static void __Pyx_RaiseArgtupleInvalid( 
-    const char* func_name, 
-    int exact, 
-    Py_ssize_t num_min, 
-    Py_ssize_t num_max, 
-    Py_ssize_t num_found) 
-{ 
-    Py_ssize_t num_expected; 
-    const char *more_or_less; 
- 
-    if (num_found < num_min) { 
-        num_expected = num_min; 
-        more_or_less = "at least"; 
-    } else { 
-        num_expected = num_max; 
-        more_or_less = "at most"; 
-    } 
-    if (exact) { 
-        more_or_less = "exactly"; 
-    } 
-    PyErr_Format(PyExc_TypeError, 
-                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", 
-                 func_name, more_or_less, num_expected, 
-                 (num_expected == 1) ? "" : "s", num_found); 
-} 
- 
- 
-//////////////////// RaiseKeywordRequired.proto //////////////////// 
- 
+    return 0;
+}
+
+//////////////////// RaiseArgTupleInvalid.proto ////////////////////
+
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/
+
+//////////////////// RaiseArgTupleInvalid ////////////////////
+
+//  __Pyx_RaiseArgtupleInvalid raises the correct exception when too
+//  many or too few positional arguments were found.  This handles
+//  Py_ssize_t formatting correctly.
+
+static void __Pyx_RaiseArgtupleInvalid(
+    const char* func_name,
+    int exact,
+    Py_ssize_t num_min,
+    Py_ssize_t num_max,
+    Py_ssize_t num_found)
+{
+    Py_ssize_t num_expected;
+    const char *more_or_less;
+
+    if (num_found < num_min) {
+        num_expected = num_min;
+        more_or_less = "at least";
+    } else {
+        num_expected = num_max;
+        more_or_less = "at most";
+    }
+    if (exact) {
+        more_or_less = "exactly";
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                 func_name, more_or_less, num_expected,
+                 (num_expected == 1) ? "" : "s", num_found);
+}
+
+
+//////////////////// RaiseKeywordRequired.proto ////////////////////
+
 static void __Pyx_RaiseKeywordRequired(const char* func_name, PyObject* kw_name); /*proto*/
- 
-//////////////////// RaiseKeywordRequired //////////////////// 
- 
+
+//////////////////// RaiseKeywordRequired ////////////////////
+
 static void __Pyx_RaiseKeywordRequired(const char* func_name, PyObject* kw_name) {
-    PyErr_Format(PyExc_TypeError, 
-        #if PY_MAJOR_VERSION >= 3 
-        "%s() needs keyword-only argument %U", func_name, kw_name); 
-        #else 
-        "%s() needs keyword-only argument %s", func_name, 
-        PyString_AS_STRING(kw_name)); 
-        #endif 
-} 
- 
- 
-//////////////////// RaiseDoubleKeywords.proto //////////////////// 
- 
-static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /*proto*/ 
- 
-//////////////////// RaiseDoubleKeywords //////////////////// 
- 
-static void __Pyx_RaiseDoubleKeywordsError( 
-    const char* func_name, 
-    PyObject* kw_name) 
-{ 
-    PyErr_Format(PyExc_TypeError, 
-        #if PY_MAJOR_VERSION >= 3 
-        "%s() got multiple values for keyword argument '%U'", func_name, kw_name); 
-        #else 
-        "%s() got multiple values for keyword argument '%s'", func_name, 
-        PyString_AsString(kw_name)); 
-        #endif 
-} 
- 
- 
+    PyErr_Format(PyExc_TypeError,
+        #if PY_MAJOR_VERSION >= 3
+        "%s() needs keyword-only argument %U", func_name, kw_name);
+        #else
+        "%s() needs keyword-only argument %s", func_name,
+        PyString_AS_STRING(kw_name));
+        #endif
+}
+
+
+//////////////////// RaiseDoubleKeywords.proto ////////////////////
+
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /*proto*/
+
+//////////////////// RaiseDoubleKeywords ////////////////////
+
+static void __Pyx_RaiseDoubleKeywordsError(
+    const char* func_name,
+    PyObject* kw_name)
+{
+    PyErr_Format(PyExc_TypeError,
+        #if PY_MAJOR_VERSION >= 3
+        "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+        #else
+        "%s() got multiple values for keyword argument '%s'", func_name,
+        PyString_AsString(kw_name));
+        #endif
+}
+
+
 //////////////////// RaiseMappingExpected.proto ////////////////////
 
 static void __Pyx_RaiseMappingExpectedError(PyObject* arg); /*proto*/
@@ -115,186 +115,186 @@ static void __Pyx_RaiseMappingExpectedError(PyObject* arg) {
 }
 
 
-//////////////////// KeywordStringCheck.proto //////////////////// 
- 
+//////////////////// KeywordStringCheck.proto ////////////////////
+
 static int __Pyx_CheckKeywordStrings(PyObject *kwdict, const char* function_name, int kw_allowed); /*proto*/
- 
-//////////////////// KeywordStringCheck //////////////////// 
- 
-//  __Pyx_CheckKeywordStrings raises an error if non-string keywords 
-//  were passed to a function, or if any keywords were passed to a 
-//  function that does not accept them. 
- 
+
+//////////////////// KeywordStringCheck ////////////////////
+
+//  __Pyx_CheckKeywordStrings raises an error if non-string keywords
+//  were passed to a function, or if any keywords were passed to a
+//  function that does not accept them.
+
 static int __Pyx_CheckKeywordStrings(
-    PyObject *kwdict, 
-    const char* function_name, 
-    int kw_allowed) 
-{ 
-    PyObject* key = 0; 
-    Py_ssize_t pos = 0; 
-#if CYTHON_COMPILING_IN_PYPY 
-    /* PyPy appears to check keywords at call time, not at unpacking time => not much to do here */ 
-    if (!kw_allowed && PyDict_Next(kwdict, &pos, &key, 0)) 
-        goto invalid_keyword; 
-    return 1; 
-#else 
-    while (PyDict_Next(kwdict, &pos, &key, 0)) { 
-        #if PY_MAJOR_VERSION < 3 
+    PyObject *kwdict,
+    const char* function_name,
+    int kw_allowed)
+{
+    PyObject* key = 0;
+    Py_ssize_t pos = 0;
+#if CYTHON_COMPILING_IN_PYPY
+    /* PyPy appears to check keywords at call time, not at unpacking time => not much to do here */
+    if (!kw_allowed && PyDict_Next(kwdict, &pos, &key, 0))
+        goto invalid_keyword;
+    return 1;
+#else
+    while (PyDict_Next(kwdict, &pos, &key, 0)) {
+        #if PY_MAJOR_VERSION < 3
         if (unlikely(!PyString_Check(key)))
-        #endif 
-            if (unlikely(!PyUnicode_Check(key))) 
-                goto invalid_keyword_type; 
-    } 
-    if ((!kw_allowed) && unlikely(key)) 
-        goto invalid_keyword; 
-    return 1; 
-invalid_keyword_type: 
-    PyErr_Format(PyExc_TypeError, 
-        "%.200s() keywords must be strings", function_name); 
-    return 0; 
-#endif 
-invalid_keyword: 
-    PyErr_Format(PyExc_TypeError, 
-    #if PY_MAJOR_VERSION < 3 
-        "%.200s() got an unexpected keyword argument '%.200s'", 
-        function_name, PyString_AsString(key)); 
-    #else 
-        "%s() got an unexpected keyword argument '%U'", 
-        function_name, key); 
-    #endif 
-    return 0; 
-} 
- 
- 
-//////////////////// ParseKeywords.proto //////////////////// 
- 
-static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \ 
-    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \ 
-    const char* function_name); /*proto*/ 
- 
-//////////////////// ParseKeywords //////////////////// 
-//@requires: RaiseDoubleKeywords 
- 
-//  __Pyx_ParseOptionalKeywords copies the optional/unknown keyword 
-//  arguments from the kwds dict into kwds2.  If kwds2 is NULL, unknown 
-//  keywords will raise an invalid keyword error. 
-// 
-//  Three kinds of errors are checked: 1) non-string keywords, 2) 
-//  unexpected keywords and 3) overlap with positional arguments. 
-// 
-//  If num_posargs is greater 0, it denotes the number of positional 
-//  arguments that were passed and that must therefore not appear 
-//  amongst the keywords as well. 
-// 
-//  This method does not check for required keyword arguments. 
- 
-static int __Pyx_ParseOptionalKeywords( 
-    PyObject *kwds, 
-    PyObject **argnames[], 
-    PyObject *kwds2, 
-    PyObject *values[], 
-    Py_ssize_t num_pos_args, 
-    const char* function_name) 
-{ 
-    PyObject *key = 0, *value = 0; 
-    Py_ssize_t pos = 0; 
-    PyObject*** name; 
-    PyObject*** first_kw_arg = argnames + num_pos_args; 
- 
-    while (PyDict_Next(kwds, &pos, &key, &value)) { 
-        name = first_kw_arg; 
-        while (*name && (**name != key)) name++; 
-        if (*name) { 
-            values[name-argnames] = value; 
-            continue; 
-        } 
- 
-        name = first_kw_arg; 
-        #if PY_MAJOR_VERSION < 3 
+        #endif
+            if (unlikely(!PyUnicode_Check(key)))
+                goto invalid_keyword_type;
+    }
+    if ((!kw_allowed) && unlikely(key))
+        goto invalid_keyword;
+    return 1;
+invalid_keyword_type:
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() keywords must be strings", function_name);
+    return 0;
+#endif
+invalid_keyword:
+    PyErr_Format(PyExc_TypeError,
+    #if PY_MAJOR_VERSION < 3
+        "%.200s() got an unexpected keyword argument '%.200s'",
+        function_name, PyString_AsString(key));
+    #else
+        "%s() got an unexpected keyword argument '%U'",
+        function_name, key);
+    #endif
+    return 0;
+}
+
+
+//////////////////// ParseKeywords.proto ////////////////////
+
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \
+    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \
+    const char* function_name); /*proto*/
+
+//////////////////// ParseKeywords ////////////////////
+//@requires: RaiseDoubleKeywords
+
+//  __Pyx_ParseOptionalKeywords copies the optional/unknown keyword
+//  arguments from the kwds dict into kwds2.  If kwds2 is NULL, unknown
+//  keywords will raise an invalid keyword error.
+//
+//  Three kinds of errors are checked: 1) non-string keywords, 2)
+//  unexpected keywords and 3) overlap with positional arguments.
+//
+//  If num_posargs is greater 0, it denotes the number of positional
+//  arguments that were passed and that must therefore not appear
+//  amongst the keywords as well.
+//
+//  This method does not check for required keyword arguments.
+
+static int __Pyx_ParseOptionalKeywords(
+    PyObject *kwds,
+    PyObject **argnames[],
+    PyObject *kwds2,
+    PyObject *values[],
+    Py_ssize_t num_pos_args,
+    const char* function_name)
+{
+    PyObject *key = 0, *value = 0;
+    Py_ssize_t pos = 0;
+    PyObject*** name;
+    PyObject*** first_kw_arg = argnames + num_pos_args;
+
+    while (PyDict_Next(kwds, &pos, &key, &value)) {
+        name = first_kw_arg;
+        while (*name && (**name != key)) name++;
+        if (*name) {
+            values[name-argnames] = value;
+            continue;
+        }
+
+        name = first_kw_arg;
+        #if PY_MAJOR_VERSION < 3
         if (likely(PyString_Check(key))) {
-            while (*name) { 
-                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) 
-                        && _PyString_Eq(**name, key)) { 
-                    values[name-argnames] = value; 
-                    break; 
-                } 
-                name++; 
-            } 
-            if (*name) continue; 
-            else { 
-                // not found after positional args, check for duplicate 
-                PyObject*** argname = argnames; 
-                while (argname != first_kw_arg) { 
-                    if ((**argname == key) || ( 
-                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) 
-                             && _PyString_Eq(**argname, key))) { 
-                        goto arg_passed_twice; 
-                    } 
-                    argname++; 
-                } 
-            } 
-        } else 
-        #endif 
-        if (likely(PyUnicode_Check(key))) { 
-            while (*name) { 
-                int cmp = (**name == key) ? 0 : 
-                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 
+            while (*name) {
+                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+                        && _PyString_Eq(**name, key)) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                // not found after positional args, check for duplicate
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    if ((**argname == key) || (
+                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+                             && _PyString_Eq(**argname, key))) {
+                        goto arg_passed_twice;
+                    }
+                    argname++;
+                }
+            }
+        } else
+        #endif
+        if (likely(PyUnicode_Check(key))) {
+            while (*name) {
+                int cmp = (**name == key) ? 0 :
+                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
                     (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
-                #endif 
+                #endif
                     // In Py2, we may need to convert the argument name from str to unicode for comparison.
-                    PyUnicode_Compare(**name, key); 
-                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; 
-                if (cmp == 0) { 
-                    values[name-argnames] = value; 
-                    break; 
-                } 
-                name++; 
-            } 
-            if (*name) continue; 
-            else { 
-                // not found after positional args, check for duplicate 
-                PyObject*** argname = argnames; 
-                while (argname != first_kw_arg) { 
-                    int cmp = (**argname == key) ? 0 : 
-                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 
+                    PyUnicode_Compare(**name, key);
+                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                if (cmp == 0) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                // not found after positional args, check for duplicate
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    int cmp = (**argname == key) ? 0 :
+                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
                         (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
-                    #endif 
-                        // need to convert argument name from bytes to unicode for comparison 
-                        PyUnicode_Compare(**argname, key); 
-                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; 
-                    if (cmp == 0) goto arg_passed_twice; 
-                    argname++; 
-                } 
-            } 
-        } else 
-            goto invalid_keyword_type; 
- 
-        if (kwds2) { 
-            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; 
-        } else { 
-            goto invalid_keyword; 
-        } 
-    } 
-    return 0; 
-arg_passed_twice: 
-    __Pyx_RaiseDoubleKeywordsError(function_name, key); 
-    goto bad; 
-invalid_keyword_type: 
-    PyErr_Format(PyExc_TypeError, 
-        "%.200s() keywords must be strings", function_name); 
-    goto bad; 
-invalid_keyword: 
-    PyErr_Format(PyExc_TypeError, 
-    #if PY_MAJOR_VERSION < 3 
-        "%.200s() got an unexpected keyword argument '%.200s'", 
-        function_name, PyString_AsString(key)); 
-    #else 
-        "%s() got an unexpected keyword argument '%U'", 
-        function_name, key); 
-    #endif 
-bad: 
-    return -1; 
-} 
+                    #endif
+                        // need to convert argument name from bytes to unicode for comparison
+                        PyUnicode_Compare(**argname, key);
+                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                    if (cmp == 0) goto arg_passed_twice;
+                    argname++;
+                }
+            }
+        } else
+            goto invalid_keyword_type;
+
+        if (kwds2) {
+            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+        } else {
+            goto invalid_keyword;
+        }
+    }
+    return 0;
+arg_passed_twice:
+    __Pyx_RaiseDoubleKeywordsError(function_name, key);
+    goto bad;
+invalid_keyword_type:
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() keywords must be strings", function_name);
+    goto bad;
+invalid_keyword:
+    PyErr_Format(PyExc_TypeError,
+    #if PY_MAJOR_VERSION < 3
+        "%.200s() got an unexpected keyword argument '%.200s'",
+        function_name, PyString_AsString(key));
+    #else
+        "%s() got an unexpected keyword argument '%U'",
+        function_name, key);
+    #endif
+bad:
+    return -1;
+}
 
 
 //////////////////// MergeKeywords.proto ////////////////////
diff --git a/contrib/tools/cython/Cython/Utility/ImportExport.c b/contrib/tools/cython/Cython/Utility/ImportExport.c
index e227dd1652..532ec326f6 100644
--- a/contrib/tools/cython/Cython/Utility/ImportExport.c
+++ b/contrib/tools/cython/Cython/Utility/ImportExport.c
@@ -1,110 +1,110 @@
-/////////////// PyIdentifierFromString.proto /////////////// 
- 
-#if !defined(__Pyx_PyIdentifier_FromString) 
-#if PY_MAJOR_VERSION < 3 
-  #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s) 
-#else 
-  #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s) 
-#endif 
-#endif 
- 
- 
-/////////////// Import.proto /////////////// 
- 
-static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/ 
- 
-/////////////// Import /////////////// 
-//@requires: ObjectHandling.c::PyObjectGetAttrStr 
-//@substitute: naming 
- 
-static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { 
-    PyObject *empty_list = 0; 
-    PyObject *module = 0; 
-    PyObject *global_dict = 0; 
-    PyObject *empty_dict = 0; 
-    PyObject *list; 
+/////////////// PyIdentifierFromString.proto ///////////////
+
+#if !defined(__Pyx_PyIdentifier_FromString)
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
+#else
+  #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
+#endif
+#endif
+
+
+/////////////// Import.proto ///////////////
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/
+
+/////////////// Import ///////////////
+//@requires: ObjectHandling.c::PyObjectGetAttrStr
+//@substitute: naming
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+    PyObject *empty_list = 0;
+    PyObject *module = 0;
+    PyObject *global_dict = 0;
+    PyObject *empty_dict = 0;
+    PyObject *list;
     #if PY_MAJOR_VERSION < 3
-    PyObject *py_import; 
-    py_import = __Pyx_PyObject_GetAttrStr($builtins_cname, PYIDENT("__import__")); 
-    if (!py_import) 
-        goto bad; 
-    #endif 
-    if (from_list) 
-        list = from_list; 
-    else { 
-        empty_list = PyList_New(0); 
-        if (!empty_list) 
-            goto bad; 
-        list = empty_list; 
-    } 
-    global_dict = PyModule_GetDict($module_cname); 
-    if (!global_dict) 
-        goto bad; 
-    empty_dict = PyDict_New(); 
-    if (!empty_dict) 
-        goto bad; 
-    { 
-        #if PY_MAJOR_VERSION >= 3 
-        if (level == -1) { 
+    PyObject *py_import;
+    py_import = __Pyx_PyObject_GetAttrStr($builtins_cname, PYIDENT("__import__"));
+    if (!py_import)
+        goto bad;
+    #endif
+    if (from_list)
+        list = from_list;
+    else {
+        empty_list = PyList_New(0);
+        if (!empty_list)
+            goto bad;
+        list = empty_list;
+    }
+    global_dict = PyModule_GetDict($module_cname);
+    if (!global_dict)
+        goto bad;
+    empty_dict = PyDict_New();
+    if (!empty_dict)
+        goto bad;
+    {
+        #if PY_MAJOR_VERSION >= 3
+        if (level == -1) {
             // Avoid C compiler warning if strchr() evaluates to false at compile time.
             if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
-                /* try package relative import first */ 
-                module = PyImport_ImportModuleLevelObject( 
-                    name, global_dict, empty_dict, list, 1); 
-                if (!module) { 
-                    if (!PyErr_ExceptionMatches(PyExc_ImportError)) 
-                        goto bad; 
-                    PyErr_Clear(); 
-                } 
-            } 
-            level = 0; /* try absolute import on failure */ 
-        } 
-        #endif 
-        if (!module) { 
+                /* try package relative import first */
+                module = PyImport_ImportModuleLevelObject(
+                    name, global_dict, empty_dict, list, 1);
+                if (!module) {
+                    if (!PyErr_ExceptionMatches(PyExc_ImportError))
+                        goto bad;
+                    PyErr_Clear();
+                }
+            }
+            level = 0; /* try absolute import on failure */
+        }
+        #endif
+        if (!module) {
             #if PY_MAJOR_VERSION < 3
-            PyObject *py_level = PyInt_FromLong(level); 
-            if (!py_level) 
-                goto bad; 
-            module = PyObject_CallFunctionObjArgs(py_import, 
+            PyObject *py_level = PyInt_FromLong(level);
+            if (!py_level)
+                goto bad;
+            module = PyObject_CallFunctionObjArgs(py_import,
                 name, global_dict, empty_dict, list, py_level, (PyObject *)NULL);
-            Py_DECREF(py_level); 
-            #else 
-            module = PyImport_ImportModuleLevelObject( 
-                name, global_dict, empty_dict, list, level); 
-            #endif 
-        } 
-    } 
-bad: 
+            Py_DECREF(py_level);
+            #else
+            module = PyImport_ImportModuleLevelObject(
+                name, global_dict, empty_dict, list, level);
+            #endif
+        }
+    }
+bad:
     #if PY_MAJOR_VERSION < 3
-    Py_XDECREF(py_import); 
-    #endif 
-    Py_XDECREF(empty_list); 
-    Py_XDECREF(empty_dict); 
-    return module; 
-} 
- 
- 
-/////////////// ImportFrom.proto /////////////// 
- 
-static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); /*proto*/ 
- 
-/////////////// ImportFrom /////////////// 
-//@requires: ObjectHandling.c::PyObjectGetAttrStr 
- 
-static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { 
-    PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); 
-    if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { 
-        PyErr_Format(PyExc_ImportError, 
-        #if PY_MAJOR_VERSION < 3 
-            "cannot import name %.230s", PyString_AS_STRING(name)); 
-        #else 
-            "cannot import name %S", name); 
-        #endif 
-    } 
-    return value; 
-} 
- 
- 
+    Py_XDECREF(py_import);
+    #endif
+    Py_XDECREF(empty_list);
+    Py_XDECREF(empty_dict);
+    return module;
+}
+
+
+/////////////// ImportFrom.proto ///////////////
+
+static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); /*proto*/
+
+/////////////// ImportFrom ///////////////
+//@requires: ObjectHandling.c::PyObjectGetAttrStr
+
+static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
+    PyObject* value = __Pyx_PyObject_GetAttrStr(module, name);
+    if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Format(PyExc_ImportError,
+        #if PY_MAJOR_VERSION < 3
+            "cannot import name %.230s", PyString_AS_STRING(name));
+        #else
+            "cannot import name %S", name);
+        #endif
+    }
+    return value;
+}
+
+
 /////////////// ImportStar ///////////////
 //@substitute: naming
 
@@ -224,95 +224,95 @@ bad:
 }
 
 
-/////////////// SetPackagePathFromImportLib.proto /////////////// 
- 
+/////////////// SetPackagePathFromImportLib.proto ///////////////
+
 // PY_VERSION_HEX >= 0x03030000
 #if PY_MAJOR_VERSION >= 3 && !CYTHON_PEP489_MULTI_PHASE_INIT
-static int __Pyx_SetPackagePathFromImportLib(const char* parent_package_name, PyObject *module_name); 
-#else 
-#define __Pyx_SetPackagePathFromImportLib(a, b) 0 
-#endif 
- 
-/////////////// SetPackagePathFromImportLib /////////////// 
-//@requires: ObjectHandling.c::PyObjectGetAttrStr 
-//@substitute: naming 
- 
+static int __Pyx_SetPackagePathFromImportLib(const char* parent_package_name, PyObject *module_name);
+#else
+#define __Pyx_SetPackagePathFromImportLib(a, b) 0
+#endif
+
+/////////////// SetPackagePathFromImportLib ///////////////
+//@requires: ObjectHandling.c::PyObjectGetAttrStr
+//@substitute: naming
+
 // PY_VERSION_HEX >= 0x03030000
 #if PY_MAJOR_VERSION >= 3 && !CYTHON_PEP489_MULTI_PHASE_INIT
-static int __Pyx_SetPackagePathFromImportLib(const char* parent_package_name, PyObject *module_name) { 
-    PyObject *importlib, *loader, *osmod, *ossep, *parts, *package_path; 
-    PyObject *path = NULL, *file_path = NULL; 
-    int result; 
-    if (parent_package_name) { 
-        PyObject *package = PyImport_ImportModule(parent_package_name); 
-        if (unlikely(!package)) 
-            goto bad; 
-        path = PyObject_GetAttrString(package, "__path__"); 
-        Py_DECREF(package); 
-        if (unlikely(!path) || unlikely(path == Py_None)) 
-            goto bad; 
-    } else { 
-        path = Py_None; Py_INCREF(Py_None); 
-    } 
-    // package_path = [importlib.find_loader(module_name, path).path.rsplit(os.sep, 1)[0]] 
-    importlib = PyImport_ImportModule("importlib"); 
-    if (unlikely(!importlib)) 
-        goto bad; 
-    loader = PyObject_CallMethod(importlib, "find_loader", "(OO)", module_name, path); 
-    Py_DECREF(importlib); 
-    Py_DECREF(path); path = NULL; 
-    if (unlikely(!loader)) 
-        goto bad; 
-    file_path = PyObject_GetAttrString(loader, "path"); 
-    Py_DECREF(loader); 
-    if (unlikely(!file_path)) 
-        goto bad; 
- 
-    if (unlikely(PyObject_SetAttrString($module_cname, "__file__", file_path) < 0)) 
-        goto bad; 
- 
-    osmod = PyImport_ImportModule("os"); 
-    if (unlikely(!osmod)) 
-        goto bad; 
-    ossep = PyObject_GetAttrString(osmod, "sep"); 
-    Py_DECREF(osmod); 
-    if (unlikely(!ossep)) 
-        goto bad; 
-    parts = PyObject_CallMethod(file_path, "rsplit", "(Oi)", ossep, 1); 
-    Py_DECREF(file_path); file_path = NULL; 
-    Py_DECREF(ossep); 
-    if (unlikely(!parts)) 
-        goto bad; 
-    package_path = Py_BuildValue("[O]", PyList_GET_ITEM(parts, 0)); 
-    Py_DECREF(parts); 
-    if (unlikely(!package_path)) 
-        goto bad; 
-    goto set_path; 
- 
-bad: 
-    PyErr_WriteUnraisable(module_name); 
-    Py_XDECREF(path); 
-    Py_XDECREF(file_path); 
- 
-    // set an empty path list on failure 
-    PyErr_Clear(); 
-    package_path = PyList_New(0); 
-    if (unlikely(!package_path)) 
-        return -1; 
- 
-set_path: 
-    result = PyObject_SetAttrString($module_cname, "__path__", package_path); 
-    Py_DECREF(package_path); 
-    return result; 
-} 
-#endif 
- 
- 
-/////////////// TypeImport.proto /////////////// 
- 
+static int __Pyx_SetPackagePathFromImportLib(const char* parent_package_name, PyObject *module_name) {
+    PyObject *importlib, *loader, *osmod, *ossep, *parts, *package_path;
+    PyObject *path = NULL, *file_path = NULL;
+    int result;
+    if (parent_package_name) {
+        PyObject *package = PyImport_ImportModule(parent_package_name);
+        if (unlikely(!package))
+            goto bad;
+        path = PyObject_GetAttrString(package, "__path__");
+        Py_DECREF(package);
+        if (unlikely(!path) || unlikely(path == Py_None))
+            goto bad;
+    } else {
+        path = Py_None; Py_INCREF(Py_None);
+    }
+    // package_path = [importlib.find_loader(module_name, path).path.rsplit(os.sep, 1)[0]]
+    importlib = PyImport_ImportModule("importlib");
+    if (unlikely(!importlib))
+        goto bad;
+    loader = PyObject_CallMethod(importlib, "find_loader", "(OO)", module_name, path);
+    Py_DECREF(importlib);
+    Py_DECREF(path); path = NULL;
+    if (unlikely(!loader))
+        goto bad;
+    file_path = PyObject_GetAttrString(loader, "path");
+    Py_DECREF(loader);
+    if (unlikely(!file_path))
+        goto bad;
+
+    if (unlikely(PyObject_SetAttrString($module_cname, "__file__", file_path) < 0))
+        goto bad;
+
+    osmod = PyImport_ImportModule("os");
+    if (unlikely(!osmod))
+        goto bad;
+    ossep = PyObject_GetAttrString(osmod, "sep");
+    Py_DECREF(osmod);
+    if (unlikely(!ossep))
+        goto bad;
+    parts = PyObject_CallMethod(file_path, "rsplit", "(Oi)", ossep, 1);
+    Py_DECREF(file_path); file_path = NULL;
+    Py_DECREF(ossep);
+    if (unlikely(!parts))
+        goto bad;
+    package_path = Py_BuildValue("[O]", PyList_GET_ITEM(parts, 0));
+    Py_DECREF(parts);
+    if (unlikely(!package_path))
+        goto bad;
+    goto set_path;
+
+bad:
+    PyErr_WriteUnraisable(module_name);
+    Py_XDECREF(path);
+    Py_XDECREF(file_path);
+
+    // set an empty path list on failure
+    PyErr_Clear();
+    package_path = PyList_New(0);
+    if (unlikely(!package_path))
+        return -1;
+
+set_path:
+    result = PyObject_SetAttrString($module_cname, "__path__", package_path);
+    Py_DECREF(package_path);
+    return result;
+}
+#endif
+
+
+/////////////// TypeImport.proto ///////////////
+
 #ifndef __PYX_HAVE_RT_ImportType_proto
 #define __PYX_HAVE_RT_ImportType_proto
- 
+
 enum __Pyx_ImportType_CheckSize {
    __Pyx_ImportType_CheckSize_Error = 0,
    __Pyx_ImportType_CheckSize_Warn = 1,
@@ -323,55 +323,55 @@ static PyTypeObject *__Pyx_ImportType(PyObject* module, const char *module_name,
 
 #endif
 
-/////////////// TypeImport /////////////// 
- 
-#ifndef __PYX_HAVE_RT_ImportType 
-#define __PYX_HAVE_RT_ImportType 
+/////////////// TypeImport ///////////////
+
+#ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
 static PyTypeObject *__Pyx_ImportType(PyObject *module, const char *module_name, const char *class_name,
     size_t size, enum __Pyx_ImportType_CheckSize check_size)
-{ 
-    PyObject *result = 0; 
-    char warning[200]; 
-    Py_ssize_t basicsize; 
-#ifdef Py_LIMITED_API 
-    PyObject *py_basicsize; 
-#endif 
- 
+{
+    PyObject *result = 0;
+    char warning[200];
+    Py_ssize_t basicsize;
+#ifdef Py_LIMITED_API
+    PyObject *py_basicsize;
+#endif
+
     result = PyObject_GetAttrString(module, class_name);
-    if (!result) 
-        goto bad; 
-    if (!PyType_Check(result)) { 
-        PyErr_Format(PyExc_TypeError, 
-            "%.200s.%.200s is not a type object", 
-            module_name, class_name); 
-        goto bad; 
-    } 
-#ifndef Py_LIMITED_API 
-    basicsize = ((PyTypeObject *)result)->tp_basicsize; 
-#else 
-    py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); 
-    if (!py_basicsize) 
-        goto bad; 
-    basicsize = PyLong_AsSsize_t(py_basicsize); 
-    Py_DECREF(py_basicsize); 
-    py_basicsize = 0; 
-    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) 
-        goto bad; 
-#endif 
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s.%.200s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+#ifndef Py_LIMITED_API
+    basicsize = ((PyTypeObject *)result)->tp_basicsize;
+#else
+    py_basicsize = PyObject_GetAttrString(result, "__basicsize__");
+    if (!py_basicsize)
+        goto bad;
+    basicsize = PyLong_AsSsize_t(py_basicsize);
+    Py_DECREF(py_basicsize);
+    py_basicsize = 0;
+    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+#endif
     if ((size_t)basicsize < size) {
         PyErr_Format(PyExc_ValueError,
             "%.200s.%.200s size changed, may indicate binary incompatibility. "
             "Expected %zd from C header, got %zd from PyObject",
             module_name, class_name, size, basicsize);
         goto bad;
-    } 
+    }
     if (check_size == __Pyx_ImportType_CheckSize_Error && (size_t)basicsize != size) {
-        PyErr_Format(PyExc_ValueError, 
+        PyErr_Format(PyExc_ValueError,
             "%.200s.%.200s size changed, may indicate binary incompatibility. "
             "Expected %zd from C header, got %zd from PyObject",
             module_name, class_name, size, basicsize);
-        goto bad; 
-    } 
+        goto bad;
+    }
     else if (check_size == __Pyx_ImportType_CheckSize_Warn && (size_t)basicsize > size) {
         PyOS_snprintf(warning, sizeof(warning),
             "%s.%s size changed, may indicate binary incompatibility. "
@@ -380,262 +380,262 @@ static PyTypeObject *__Pyx_ImportType(PyObject *module, const char *module_name,
         if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
     }
     /* check_size == __Pyx_ImportType_CheckSize_Ignore does not warn nor error */
-    return (PyTypeObject *)result; 
-bad: 
-    Py_XDECREF(result); 
-    return NULL; 
-} 
-#endif 
- 
-/////////////// FunctionImport.proto /////////////// 
- 
-static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig); /*proto*/ 
- 
-/////////////// FunctionImport /////////////// 
-//@substitute: naming 
- 
-#ifndef __PYX_HAVE_RT_ImportFunction 
-#define __PYX_HAVE_RT_ImportFunction 
-static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { 
-    PyObject *d = 0; 
-    PyObject *cobj = 0; 
-    union { 
-        void (*fp)(void); 
-        void *p; 
-    } tmp; 
- 
-    d = PyObject_GetAttrString(module, (char *)"$api_name"); 
-    if (!d) 
-        goto bad; 
-    cobj = PyDict_GetItemString(d, funcname); 
-    if (!cobj) { 
-        PyErr_Format(PyExc_ImportError, 
-            "%.200s does not export expected C function %.200s", 
-                PyModule_GetName(module), funcname); 
-        goto bad; 
-    } 
-#if PY_VERSION_HEX >= 0x02070000 
-    if (!PyCapsule_IsValid(cobj, sig)) { 
-        PyErr_Format(PyExc_TypeError, 
-            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", 
-             PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj)); 
-        goto bad; 
-    } 
-    tmp.p = PyCapsule_GetPointer(cobj, sig); 
-#else 
-    {const char *desc, *s1, *s2; 
-    desc = (const char *)PyCObject_GetDesc(cobj); 
-    if (!desc) 
-        goto bad; 
-    s1 = desc; s2 = sig; 
-    while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } 
-    if (*s1 != *s2) { 
-        PyErr_Format(PyExc_TypeError, 
-            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", 
-             PyModule_GetName(module), funcname, sig, desc); 
-        goto bad; 
-    } 
-    tmp.p = PyCObject_AsVoidPtr(cobj);} 
-#endif 
-    *f = tmp.fp; 
-    if (!(*f)) 
-        goto bad; 
-    Py_DECREF(d); 
-    return 0; 
-bad: 
-    Py_XDECREF(d); 
-    return -1; 
-} 
-#endif 
- 
-/////////////// FunctionExport.proto /////////////// 
- 
-static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig); /*proto*/ 
- 
-/////////////// FunctionExport /////////////// 
-//@substitute: naming 
- 
-static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig) { 
-    PyObject *d = 0; 
-    PyObject *cobj = 0; 
-    union { 
-        void (*fp)(void); 
-        void *p; 
-    } tmp; 
- 
-    d = PyObject_GetAttrString($module_cname, (char *)"$api_name"); 
-    if (!d) { 
-        PyErr_Clear(); 
-        d = PyDict_New(); 
-        if (!d) 
-            goto bad; 
-        Py_INCREF(d); 
-        if (PyModule_AddObject($module_cname, (char *)"$api_name", d) < 0) 
-            goto bad; 
-    } 
-    tmp.fp = f; 
-#if PY_VERSION_HEX >= 0x02070000 
-    cobj = PyCapsule_New(tmp.p, sig, 0); 
-#else 
-    cobj = PyCObject_FromVoidPtrAndDesc(tmp.p, (void *)sig, 0); 
-#endif 
-    if (!cobj) 
-        goto bad; 
-    if (PyDict_SetItemString(d, name, cobj) < 0) 
-        goto bad; 
-    Py_DECREF(cobj); 
-    Py_DECREF(d); 
-    return 0; 
-bad: 
-    Py_XDECREF(cobj); 
-    Py_XDECREF(d); 
-    return -1; 
-} 
- 
-/////////////// VoidPtrImport.proto /////////////// 
- 
-static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig); /*proto*/ 
- 
-/////////////// VoidPtrImport /////////////// 
-//@substitute: naming 
- 
-#ifndef __PYX_HAVE_RT_ImportVoidPtr 
-#define __PYX_HAVE_RT_ImportVoidPtr 
-static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig) { 
-    PyObject *d = 0; 
-    PyObject *cobj = 0; 
- 
-    d = PyObject_GetAttrString(module, (char *)"$api_name"); 
-    if (!d) 
-        goto bad; 
-    cobj = PyDict_GetItemString(d, name); 
-    if (!cobj) { 
-        PyErr_Format(PyExc_ImportError, 
-            "%.200s does not export expected C variable %.200s", 
-                PyModule_GetName(module), name); 
-        goto bad; 
-    } 
-#if PY_VERSION_HEX >= 0x02070000 
-    if (!PyCapsule_IsValid(cobj, sig)) { 
-        PyErr_Format(PyExc_TypeError, 
-            "C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", 
-             PyModule_GetName(module), name, sig, PyCapsule_GetName(cobj)); 
-        goto bad; 
-    } 
-    *p = PyCapsule_GetPointer(cobj, sig); 
-#else 
-    {const char *desc, *s1, *s2; 
-    desc = (const char *)PyCObject_GetDesc(cobj); 
-    if (!desc) 
-        goto bad; 
-    s1 = desc; s2 = sig; 
-    while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } 
-    if (*s1 != *s2) { 
-        PyErr_Format(PyExc_TypeError, 
-            "C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", 
-             PyModule_GetName(module), name, sig, desc); 
-        goto bad; 
-    } 
-    *p = PyCObject_AsVoidPtr(cobj);} 
-#endif 
-    if (!(*p)) 
-        goto bad; 
-    Py_DECREF(d); 
-    return 0; 
-bad: 
-    Py_XDECREF(d); 
-    return -1; 
-} 
-#endif 
- 
-/////////////// VoidPtrExport.proto /////////////// 
- 
-static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig); /*proto*/ 
- 
-/////////////// VoidPtrExport /////////////// 
-//@substitute: naming 
-//@requires: ObjectHandling.c::PyObjectSetAttrStr 
- 
-static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig) { 
-    PyObject *d; 
-    PyObject *cobj = 0; 
- 
-    d = PyDict_GetItem($moddict_cname, PYIDENT("$api_name")); 
-    Py_XINCREF(d); 
-    if (!d) { 
-        d = PyDict_New(); 
-        if (!d) 
-            goto bad; 
-        if (__Pyx_PyObject_SetAttrStr($module_cname, PYIDENT("$api_name"), d) < 0) 
-            goto bad; 
-    } 
-#if PY_VERSION_HEX >= 0x02070000 
-    cobj = PyCapsule_New(p, sig, 0); 
-#else 
-    cobj = PyCObject_FromVoidPtrAndDesc(p, (void *)sig, 0); 
-#endif 
-    if (!cobj) 
-        goto bad; 
-    if (PyDict_SetItem(d, name, cobj) < 0) 
-        goto bad; 
-    Py_DECREF(cobj); 
-    Py_DECREF(d); 
-    return 0; 
-bad: 
-    Py_XDECREF(cobj); 
-    Py_XDECREF(d); 
-    return -1; 
-} 
- 
- 
-/////////////// SetVTable.proto /////////////// 
- 
-static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/ 
- 
-/////////////// SetVTable /////////////// 
- 
-static int __Pyx_SetVtable(PyObject *dict, void *vtable) { 
-#if PY_VERSION_HEX >= 0x02070000 
-    PyObject *ob = PyCapsule_New(vtable, 0, 0); 
-#else 
-    PyObject *ob = PyCObject_FromVoidPtr(vtable, 0); 
-#endif 
-    if (!ob) 
-        goto bad; 
-    if (PyDict_SetItem(dict, PYIDENT("__pyx_vtable__"), ob) < 0) 
-        goto bad; 
-    Py_DECREF(ob); 
-    return 0; 
-bad: 
-    Py_XDECREF(ob); 
-    return -1; 
-} 
- 
- 
-/////////////// GetVTable.proto /////////////// 
- 
-static void* __Pyx_GetVtable(PyObject *dict); /*proto*/ 
- 
-/////////////// GetVTable /////////////// 
- 
-static void* __Pyx_GetVtable(PyObject *dict) { 
-    void* ptr; 
-    PyObject *ob = PyObject_GetItem(dict, PYIDENT("__pyx_vtable__")); 
-    if (!ob) 
-        goto bad; 
-#if PY_VERSION_HEX >= 0x02070000 
-    ptr = PyCapsule_GetPointer(ob, 0); 
-#else 
-    ptr = PyCObject_AsVoidPtr(ob); 
-#endif 
-    if (!ptr && !PyErr_Occurred()) 
-        PyErr_SetString(PyExc_RuntimeError, "invalid vtable found for imported type"); 
-    Py_DECREF(ob); 
-    return ptr; 
-bad: 
-    Py_XDECREF(ob); 
-    return NULL; 
-} 
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(result);
+    return NULL;
+}
+#endif
+
+/////////////// FunctionImport.proto ///////////////
+
+static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig); /*proto*/
+
+/////////////// FunctionImport ///////////////
+//@substitute: naming
+
+#ifndef __PYX_HAVE_RT_ImportFunction
+#define __PYX_HAVE_RT_ImportFunction
+static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
+    PyObject *d = 0;
+    PyObject *cobj = 0;
+    union {
+        void (*fp)(void);
+        void *p;
+    } tmp;
+
+    d = PyObject_GetAttrString(module, (char *)"$api_name");
+    if (!d)
+        goto bad;
+    cobj = PyDict_GetItemString(d, funcname);
+    if (!cobj) {
+        PyErr_Format(PyExc_ImportError,
+            "%.200s does not export expected C function %.200s",
+                PyModule_GetName(module), funcname);
+        goto bad;
+    }
+#if PY_VERSION_HEX >= 0x02070000
+    if (!PyCapsule_IsValid(cobj, sig)) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
+        goto bad;
+    }
+    tmp.p = PyCapsule_GetPointer(cobj, sig);
+#else
+    {const char *desc, *s1, *s2;
+    desc = (const char *)PyCObject_GetDesc(cobj);
+    if (!desc)
+        goto bad;
+    s1 = desc; s2 = sig;
+    while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
+    if (*s1 != *s2) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, desc);
+        goto bad;
+    }
+    tmp.p = PyCObject_AsVoidPtr(cobj);}
+#endif
+    *f = tmp.fp;
+    if (!(*f))
+        goto bad;
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(d);
+    return -1;
+}
+#endif
+
+/////////////// FunctionExport.proto ///////////////
+
+static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig); /*proto*/
+
+/////////////// FunctionExport ///////////////
+//@substitute: naming
+
+static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig) {
+    PyObject *d = 0;
+    PyObject *cobj = 0;
+    union {
+        void (*fp)(void);
+        void *p;
+    } tmp;
+
+    d = PyObject_GetAttrString($module_cname, (char *)"$api_name");
+    if (!d) {
+        PyErr_Clear();
+        d = PyDict_New();
+        if (!d)
+            goto bad;
+        Py_INCREF(d);
+        if (PyModule_AddObject($module_cname, (char *)"$api_name", d) < 0)
+            goto bad;
+    }
+    tmp.fp = f;
+#if PY_VERSION_HEX >= 0x02070000
+    cobj = PyCapsule_New(tmp.p, sig, 0);
+#else
+    cobj = PyCObject_FromVoidPtrAndDesc(tmp.p, (void *)sig, 0);
+#endif
+    if (!cobj)
+        goto bad;
+    if (PyDict_SetItemString(d, name, cobj) < 0)
+        goto bad;
+    Py_DECREF(cobj);
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(cobj);
+    Py_XDECREF(d);
+    return -1;
+}
+
+/////////////// VoidPtrImport.proto ///////////////
+
+static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig); /*proto*/
+
+/////////////// VoidPtrImport ///////////////
+//@substitute: naming
+
+#ifndef __PYX_HAVE_RT_ImportVoidPtr
+#define __PYX_HAVE_RT_ImportVoidPtr
+static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig) {
+    PyObject *d = 0;
+    PyObject *cobj = 0;
+
+    d = PyObject_GetAttrString(module, (char *)"$api_name");
+    if (!d)
+        goto bad;
+    cobj = PyDict_GetItemString(d, name);
+    if (!cobj) {
+        PyErr_Format(PyExc_ImportError,
+            "%.200s does not export expected C variable %.200s",
+                PyModule_GetName(module), name);
+        goto bad;
+    }
+#if PY_VERSION_HEX >= 0x02070000
+    if (!PyCapsule_IsValid(cobj, sig)) {
+        PyErr_Format(PyExc_TypeError,
+            "C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), name, sig, PyCapsule_GetName(cobj));
+        goto bad;
+    }
+    *p = PyCapsule_GetPointer(cobj, sig);
+#else
+    {const char *desc, *s1, *s2;
+    desc = (const char *)PyCObject_GetDesc(cobj);
+    if (!desc)
+        goto bad;
+    s1 = desc; s2 = sig;
+    while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
+    if (*s1 != *s2) {
+        PyErr_Format(PyExc_TypeError,
+            "C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), name, sig, desc);
+        goto bad;
+    }
+    *p = PyCObject_AsVoidPtr(cobj);}
+#endif
+    if (!(*p))
+        goto bad;
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(d);
+    return -1;
+}
+#endif
+
+/////////////// VoidPtrExport.proto ///////////////
+
+static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig); /*proto*/
+
+/////////////// VoidPtrExport ///////////////
+//@substitute: naming
+//@requires: ObjectHandling.c::PyObjectSetAttrStr
+
+static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig) {
+    PyObject *d;
+    PyObject *cobj = 0;
+
+    d = PyDict_GetItem($moddict_cname, PYIDENT("$api_name"));
+    Py_XINCREF(d);
+    if (!d) {
+        d = PyDict_New();
+        if (!d)
+            goto bad;
+        if (__Pyx_PyObject_SetAttrStr($module_cname, PYIDENT("$api_name"), d) < 0)
+            goto bad;
+    }
+#if PY_VERSION_HEX >= 0x02070000
+    cobj = PyCapsule_New(p, sig, 0);
+#else
+    cobj = PyCObject_FromVoidPtrAndDesc(p, (void *)sig, 0);
+#endif
+    if (!cobj)
+        goto bad;
+    if (PyDict_SetItem(d, name, cobj) < 0)
+        goto bad;
+    Py_DECREF(cobj);
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(cobj);
+    Py_XDECREF(d);
+    return -1;
+}
+
+
+/////////////// SetVTable.proto ///////////////
+
+static int __Pyx_SetVtable(PyObject *dict, void *vtable); /*proto*/
+
+/////////////// SetVTable ///////////////
+
+static int __Pyx_SetVtable(PyObject *dict, void *vtable) {
+#if PY_VERSION_HEX >= 0x02070000
+    PyObject *ob = PyCapsule_New(vtable, 0, 0);
+#else
+    PyObject *ob = PyCObject_FromVoidPtr(vtable, 0);
+#endif
+    if (!ob)
+        goto bad;
+    if (PyDict_SetItem(dict, PYIDENT("__pyx_vtable__"), ob) < 0)
+        goto bad;
+    Py_DECREF(ob);
+    return 0;
+bad:
+    Py_XDECREF(ob);
+    return -1;
+}
+
+
+/////////////// GetVTable.proto ///////////////
+
+static void* __Pyx_GetVtable(PyObject *dict); /*proto*/
+
+/////////////// GetVTable ///////////////
+
+static void* __Pyx_GetVtable(PyObject *dict) {
+    void* ptr;
+    PyObject *ob = PyObject_GetItem(dict, PYIDENT("__pyx_vtable__"));
+    if (!ob)
+        goto bad;
+#if PY_VERSION_HEX >= 0x02070000
+    ptr = PyCapsule_GetPointer(ob, 0);
+#else
+    ptr = PyCObject_AsVoidPtr(ob);
+#endif
+    if (!ptr && !PyErr_Occurred())
+        PyErr_SetString(PyExc_RuntimeError, "invalid vtable found for imported type");
+    Py_DECREF(ob);
+    return ptr;
+bad:
+    Py_XDECREF(ob);
+    return NULL;
+}
 
 
 /////////////// MergeVTables.proto ///////////////
diff --git a/contrib/tools/cython/Cython/Utility/MemoryView.pyx b/contrib/tools/cython/Cython/Utility/MemoryView.pyx
index 0b4386360d..6ca5fab9ba 100644
--- a/contrib/tools/cython/Cython/Utility/MemoryView.pyx
+++ b/contrib/tools/cython/Cython/Utility/MemoryView.pyx
@@ -1,313 +1,313 @@
-#################### View.MemoryView #################### 
- 
-# This utility provides cython.array and cython.view.memoryview 
- 
+#################### View.MemoryView ####################
+
+# This utility provides cython.array and cython.view.memoryview
+
 from __future__ import absolute_import
- 
+
 cimport cython
 
-# from cpython cimport ... 
-cdef extern from "Python.h": 
-    int PyIndex_Check(object) 
-    object PyLong_FromVoidPtr(void *) 
- 
-cdef extern from "pythread.h": 
-    ctypedef void *PyThread_type_lock 
- 
-    PyThread_type_lock PyThread_allocate_lock() 
-    void PyThread_free_lock(PyThread_type_lock) 
-    int PyThread_acquire_lock(PyThread_type_lock, int mode) nogil 
-    void PyThread_release_lock(PyThread_type_lock) nogil 
- 
+# from cpython cimport ...
+cdef extern from "Python.h":
+    int PyIndex_Check(object)
+    object PyLong_FromVoidPtr(void *)
+
+cdef extern from "pythread.h":
+    ctypedef void *PyThread_type_lock
+
+    PyThread_type_lock PyThread_allocate_lock()
+    void PyThread_free_lock(PyThread_type_lock)
+    int PyThread_acquire_lock(PyThread_type_lock, int mode) nogil
+    void PyThread_release_lock(PyThread_type_lock) nogil
+
 cdef extern from "<string.h>":
-    void *memset(void *b, int c, size_t len) 
- 
-cdef extern from *: 
-    int __Pyx_GetBuffer(object, Py_buffer *, int) except -1 
-    void __Pyx_ReleaseBuffer(Py_buffer *) 
- 
-    ctypedef struct PyObject 
-    ctypedef Py_ssize_t Py_intptr_t 
-    void Py_INCREF(PyObject *) 
-    void Py_DECREF(PyObject *) 
- 
-    void* PyMem_Malloc(size_t n) 
-    void PyMem_Free(void *p) 
+    void *memset(void *b, int c, size_t len)
+
+cdef extern from *:
+    int __Pyx_GetBuffer(object, Py_buffer *, int) except -1
+    void __Pyx_ReleaseBuffer(Py_buffer *)
+
+    ctypedef struct PyObject
+    ctypedef Py_ssize_t Py_intptr_t
+    void Py_INCREF(PyObject *)
+    void Py_DECREF(PyObject *)
+
+    void* PyMem_Malloc(size_t n)
+    void PyMem_Free(void *p)
     void* PyObject_Malloc(size_t n)
     void PyObject_Free(void *p)
- 
-    cdef struct __pyx_memoryview "__pyx_memoryview_obj": 
-        Py_buffer view 
-        PyObject *obj 
-        __Pyx_TypeInfo *typeinfo 
- 
-    ctypedef struct {{memviewslice_name}}: 
-        __pyx_memoryview *memview 
-        char *data 
-        Py_ssize_t shape[{{max_dims}}] 
-        Py_ssize_t strides[{{max_dims}}] 
-        Py_ssize_t suboffsets[{{max_dims}}] 
- 
-    void __PYX_INC_MEMVIEW({{memviewslice_name}} *memslice, int have_gil) 
-    void __PYX_XDEC_MEMVIEW({{memviewslice_name}} *memslice, int have_gil) 
- 
-    ctypedef struct __pyx_buffer "Py_buffer": 
-        PyObject *obj 
- 
-    PyObject *Py_None 
- 
-    cdef enum: 
-        PyBUF_C_CONTIGUOUS, 
-        PyBUF_F_CONTIGUOUS, 
-        PyBUF_ANY_CONTIGUOUS 
-        PyBUF_FORMAT 
-        PyBUF_WRITABLE 
-        PyBUF_STRIDES 
-        PyBUF_INDIRECT 
+
+    cdef struct __pyx_memoryview "__pyx_memoryview_obj":
+        Py_buffer view
+        PyObject *obj
+        __Pyx_TypeInfo *typeinfo
+
+    ctypedef struct {{memviewslice_name}}:
+        __pyx_memoryview *memview
+        char *data
+        Py_ssize_t shape[{{max_dims}}]
+        Py_ssize_t strides[{{max_dims}}]
+        Py_ssize_t suboffsets[{{max_dims}}]
+
+    void __PYX_INC_MEMVIEW({{memviewslice_name}} *memslice, int have_gil)
+    void __PYX_XDEC_MEMVIEW({{memviewslice_name}} *memslice, int have_gil)
+
+    ctypedef struct __pyx_buffer "Py_buffer":
+        PyObject *obj
+
+    PyObject *Py_None
+
+    cdef enum:
+        PyBUF_C_CONTIGUOUS,
+        PyBUF_F_CONTIGUOUS,
+        PyBUF_ANY_CONTIGUOUS
+        PyBUF_FORMAT
+        PyBUF_WRITABLE
+        PyBUF_STRIDES
+        PyBUF_INDIRECT
         PyBUF_ND
-        PyBUF_RECORDS 
+        PyBUF_RECORDS
         PyBUF_RECORDS_RO
- 
-    ctypedef struct __Pyx_TypeInfo: 
-        pass 
- 
-    cdef object capsule "__pyx_capsule_create" (void *p, char *sig) 
-    cdef int __pyx_array_getbuffer(PyObject *obj, Py_buffer view, int flags) 
-    cdef int __pyx_memoryview_getbuffer(PyObject *obj, Py_buffer view, int flags) 
- 
-cdef extern from *: 
-    ctypedef int __pyx_atomic_int 
-    {{memviewslice_name}} slice_copy_contig "__pyx_memoryview_copy_new_contig"( 
-                                 __Pyx_memviewslice *from_mvs, 
-                                 char *mode, int ndim, 
-                                 size_t sizeof_dtype, int contig_flag, 
-                                 bint dtype_is_object) nogil except * 
-    bint slice_is_contig "__pyx_memviewslice_is_contig" ( 
+
+    ctypedef struct __Pyx_TypeInfo:
+        pass
+
+    cdef object capsule "__pyx_capsule_create" (void *p, char *sig)
+    cdef int __pyx_array_getbuffer(PyObject *obj, Py_buffer view, int flags)
+    cdef int __pyx_memoryview_getbuffer(PyObject *obj, Py_buffer view, int flags)
+
+cdef extern from *:
+    ctypedef int __pyx_atomic_int
+    {{memviewslice_name}} slice_copy_contig "__pyx_memoryview_copy_new_contig"(
+                                 __Pyx_memviewslice *from_mvs,
+                                 char *mode, int ndim,
+                                 size_t sizeof_dtype, int contig_flag,
+                                 bint dtype_is_object) nogil except *
+    bint slice_is_contig "__pyx_memviewslice_is_contig" (
                             {{memviewslice_name}} mvs, char order, int ndim) nogil
-    bint slices_overlap "__pyx_slices_overlap" ({{memviewslice_name}} *slice1, 
-                                                {{memviewslice_name}} *slice2, 
-                                                int ndim, size_t itemsize) nogil 
- 
- 
+    bint slices_overlap "__pyx_slices_overlap" ({{memviewslice_name}} *slice1,
+                                                {{memviewslice_name}} *slice2,
+                                                int ndim, size_t itemsize) nogil
+
+
 cdef extern from "<stdlib.h>":
-    void *malloc(size_t) nogil 
-    void free(void *) nogil 
-    void *memcpy(void *dest, void *src, size_t n) nogil 
- 
- 
- 
- 
-# 
-### cython.array class 
-# 
- 
-@cname("__pyx_array") 
-cdef class array: 
- 
-    cdef: 
-        char *data 
-        Py_ssize_t len 
-        char *format 
-        int ndim 
-        Py_ssize_t *_shape 
-        Py_ssize_t *_strides 
-        Py_ssize_t itemsize 
-        unicode mode  # FIXME: this should have been a simple 'char' 
-        bytes _format 
-        void (*callback_free_data)(void *data) 
-        # cdef object _memview 
-        cdef bint free_data 
-        cdef bint dtype_is_object 
- 
-    def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, 
-                  mode="c", bint allocate_buffer=True): 
- 
-        cdef int idx 
-        cdef Py_ssize_t i, dim 
-        cdef PyObject **p 
- 
-        self.ndim = <int> len(shape) 
-        self.itemsize = itemsize 
- 
-        if not self.ndim: 
-            raise ValueError("Empty shape tuple for cython.array") 
- 
-        if itemsize <= 0: 
-            raise ValueError("itemsize <= 0 for cython.array") 
- 
+    void *malloc(size_t) nogil
+    void free(void *) nogil
+    void *memcpy(void *dest, void *src, size_t n) nogil
+
+
+
+
+#
+### cython.array class
+#
+
+@cname("__pyx_array")
+cdef class array:
+
+    cdef:
+        char *data
+        Py_ssize_t len
+        char *format
+        int ndim
+        Py_ssize_t *_shape
+        Py_ssize_t *_strides
+        Py_ssize_t itemsize
+        unicode mode  # FIXME: this should have been a simple 'char'
+        bytes _format
+        void (*callback_free_data)(void *data)
+        # cdef object _memview
+        cdef bint free_data
+        cdef bint dtype_is_object
+
+    def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None,
+                  mode="c", bint allocate_buffer=True):
+
+        cdef int idx
+        cdef Py_ssize_t i, dim
+        cdef PyObject **p
+
+        self.ndim = <int> len(shape)
+        self.itemsize = itemsize
+
+        if not self.ndim:
+            raise ValueError("Empty shape tuple for cython.array")
+
+        if itemsize <= 0:
+            raise ValueError("itemsize <= 0 for cython.array")
+
         if not isinstance(format, bytes):
             format = format.encode('ASCII')
-        self._format = format  # keep a reference to the byte string 
-        self.format = self._format 
- 
-        # use single malloc() for both shape and strides 
+        self._format = format  # keep a reference to the byte string
+        self.format = self._format
+
+        # use single malloc() for both shape and strides
         self._shape = <Py_ssize_t *> PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2)
-        self._strides = self._shape + self.ndim 
- 
-        if not self._shape: 
-            raise MemoryError("unable to allocate shape and strides.") 
- 
-        # cdef Py_ssize_t dim, stride 
-        for idx, dim in enumerate(shape): 
-            if dim <= 0: 
-                raise ValueError("Invalid shape in axis %d: %d." % (idx, dim)) 
-            self._shape[idx] = dim 
- 
-        cdef char order 
-        if mode == 'fortran': 
-            order = b'F' 
-            self.mode = u'fortran' 
-        elif mode == 'c': 
-            order = b'C' 
-            self.mode = u'c' 
-        else: 
-            raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode) 
- 
-        self.len = fill_contig_strides_array(self._shape, self._strides, 
-                                             itemsize, self.ndim, order) 
- 
-        self.free_data = allocate_buffer 
-        self.dtype_is_object = format == b'O' 
-        if allocate_buffer: 
-            # use malloc() for backwards compatibility 
-            # in case external code wants to change the data pointer 
-            self.data = <char *>malloc(self.len) 
-            if not self.data: 
-                raise MemoryError("unable to allocate array data.") 
- 
-            if self.dtype_is_object: 
-                p = <PyObject **> self.data 
-                for i in range(self.len / itemsize): 
-                    p[i] = Py_None 
-                    Py_INCREF(Py_None) 
- 
-    @cname('getbuffer') 
-    def __getbuffer__(self, Py_buffer *info, int flags): 
-        cdef int bufmode = -1 
-        if self.mode == u"c": 
-            bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS 
-        elif self.mode == u"fortran": 
-            bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS 
-        if not (flags & bufmode): 
-            raise ValueError("Can only create a buffer that is contiguous in memory.") 
-        info.buf = self.data 
-        info.len = self.len 
-        info.ndim = self.ndim 
-        info.shape = self._shape 
-        info.strides = self._strides 
-        info.suboffsets = NULL 
-        info.itemsize = self.itemsize 
-        info.readonly = 0 
- 
-        if flags & PyBUF_FORMAT: 
-            info.format = self.format 
-        else: 
-            info.format = NULL 
- 
-        info.obj = self 
- 
-    __pyx_getbuffer = capsule(<void *> &__pyx_array_getbuffer, "getbuffer(obj, view, flags)") 
- 
-    def __dealloc__(array self): 
-        if self.callback_free_data != NULL: 
-            self.callback_free_data(self.data) 
-        elif self.free_data: 
-            if self.dtype_is_object: 
-                refcount_objects_in_slice(self.data, self._shape, 
-                                          self._strides, self.ndim, False) 
-            free(self.data) 
+        self._strides = self._shape + self.ndim
+
+        if not self._shape:
+            raise MemoryError("unable to allocate shape and strides.")
+
+        # cdef Py_ssize_t dim, stride
+        for idx, dim in enumerate(shape):
+            if dim <= 0:
+                raise ValueError("Invalid shape in axis %d: %d." % (idx, dim))
+            self._shape[idx] = dim
+
+        cdef char order
+        if mode == 'fortran':
+            order = b'F'
+            self.mode = u'fortran'
+        elif mode == 'c':
+            order = b'C'
+            self.mode = u'c'
+        else:
+            raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode)
+
+        self.len = fill_contig_strides_array(self._shape, self._strides,
+                                             itemsize, self.ndim, order)
+
+        self.free_data = allocate_buffer
+        self.dtype_is_object = format == b'O'
+        if allocate_buffer:
+            # use malloc() for backwards compatibility
+            # in case external code wants to change the data pointer
+            self.data = <char *>malloc(self.len)
+            if not self.data:
+                raise MemoryError("unable to allocate array data.")
+
+            if self.dtype_is_object:
+                p = <PyObject **> self.data
+                for i in range(self.len / itemsize):
+                    p[i] = Py_None
+                    Py_INCREF(Py_None)
+
+    @cname('getbuffer')
+    def __getbuffer__(self, Py_buffer *info, int flags):
+        cdef int bufmode = -1
+        if self.mode == u"c":
+            bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+        elif self.mode == u"fortran":
+            bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS
+        if not (flags & bufmode):
+            raise ValueError("Can only create a buffer that is contiguous in memory.")
+        info.buf = self.data
+        info.len = self.len
+        info.ndim = self.ndim
+        info.shape = self._shape
+        info.strides = self._strides
+        info.suboffsets = NULL
+        info.itemsize = self.itemsize
+        info.readonly = 0
+
+        if flags & PyBUF_FORMAT:
+            info.format = self.format
+        else:
+            info.format = NULL
+
+        info.obj = self
+
+    __pyx_getbuffer = capsule(<void *> &__pyx_array_getbuffer, "getbuffer(obj, view, flags)")
+
+    def __dealloc__(array self):
+        if self.callback_free_data != NULL:
+            self.callback_free_data(self.data)
+        elif self.free_data:
+            if self.dtype_is_object:
+                refcount_objects_in_slice(self.data, self._shape,
+                                          self._strides, self.ndim, False)
+            free(self.data)
         PyObject_Free(self._shape)
- 
+
     @property
     def memview(self):
         return self.get_memview()
- 
+
     @cname('get_memview')
     cdef get_memview(self):
         flags =  PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE
         return  memoryview(self, flags, self.dtype_is_object)
- 
+
     def __len__(self):
         return self._shape[0]
 
-    def __getattr__(self, attr): 
-        return getattr(self.memview, attr) 
- 
-    def __getitem__(self, item): 
-        return self.memview[item] 
- 
-    def __setitem__(self, item, value): 
-        self.memview[item] = value 
- 
- 
-@cname("__pyx_array_new") 
-cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, 
-                          char *mode, char *buf): 
-    cdef array result 
- 
-    if buf == NULL: 
-        result = array(shape, itemsize, format, mode.decode('ASCII')) 
-    else: 
-        result = array(shape, itemsize, format, mode.decode('ASCII'), 
-                       allocate_buffer=False) 
-        result.data = buf 
- 
-    return result 
- 
- 
-# 
-### Memoryview constants and cython.view.memoryview class 
-# 
- 
-# Disable generic_contiguous, as it makes trouble verifying contiguity: 
-#   - 'contiguous' or '::1' means the dimension is contiguous with dtype 
-#   - 'indirect_contiguous' means a contiguous list of pointers 
-#   - dtype contiguous must be contiguous in the first or last dimension 
-#     from the start, or from the dimension following the last indirect dimension 
-# 
-#   e.g. 
-#           int[::indirect_contiguous, ::contiguous, :] 
-# 
-#   is valid (list of pointers to 2d fortran-contiguous array), but 
-# 
-#           int[::generic_contiguous, ::contiguous, :] 
-# 
-#   would mean you'd have assert dimension 0 to be indirect (and pointer contiguous) at runtime. 
-#   So it doesn't bring any performance benefit, and it's only confusing. 
- 
-@cname('__pyx_MemviewEnum') 
-cdef class Enum(object): 
-    cdef object name 
-    def __init__(self, name): 
-        self.name = name 
-    def __repr__(self): 
-        return self.name 
- 
-cdef generic = Enum("<strided and direct or indirect>") 
-cdef strided = Enum("<strided and direct>") # default 
-cdef indirect = Enum("<strided and indirect>") 
-# Disable generic_contiguous, as it is a troublemaker 
-#cdef generic_contiguous = Enum("<contiguous and direct or indirect>") 
-cdef contiguous = Enum("<contiguous and direct>") 
-cdef indirect_contiguous = Enum("<contiguous and indirect>") 
- 
-# 'follow' is implied when the first or last axis is ::1 
- 
- 
-@cname('__pyx_align_pointer') 
-cdef void *align_pointer(void *memory, size_t alignment) nogil: 
-    "Align pointer memory on a given boundary" 
-    cdef Py_intptr_t aligned_p = <Py_intptr_t> memory 
-    cdef size_t offset 
- 
-    with cython.cdivision(True): 
-        offset = aligned_p % alignment 
- 
-    if offset > 0: 
-        aligned_p += alignment - offset 
- 
-    return <void *> aligned_p 
- 
+    def __getattr__(self, attr):
+        return getattr(self.memview, attr)
+
+    def __getitem__(self, item):
+        return self.memview[item]
+
+    def __setitem__(self, item, value):
+        self.memview[item] = value
+
+
+@cname("__pyx_array_new")
+cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format,
+                          char *mode, char *buf):
+    cdef array result
+
+    if buf == NULL:
+        result = array(shape, itemsize, format, mode.decode('ASCII'))
+    else:
+        result = array(shape, itemsize, format, mode.decode('ASCII'),
+                       allocate_buffer=False)
+        result.data = buf
+
+    return result
+
+
+#
+### Memoryview constants and cython.view.memoryview class
+#
+
+# Disable generic_contiguous, as it makes trouble verifying contiguity:
+#   - 'contiguous' or '::1' means the dimension is contiguous with dtype
+#   - 'indirect_contiguous' means a contiguous list of pointers
+#   - dtype contiguous must be contiguous in the first or last dimension
+#     from the start, or from the dimension following the last indirect dimension
+#
+#   e.g.
+#           int[::indirect_contiguous, ::contiguous, :]
+#
+#   is valid (list of pointers to 2d fortran-contiguous array), but
+#
+#           int[::generic_contiguous, ::contiguous, :]
+#
+#   would mean you'd have assert dimension 0 to be indirect (and pointer contiguous) at runtime.
+#   So it doesn't bring any performance benefit, and it's only confusing.
+
+@cname('__pyx_MemviewEnum')
+cdef class Enum(object):
+    cdef object name
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return self.name
+
+cdef generic = Enum("<strided and direct or indirect>")
+cdef strided = Enum("<strided and direct>") # default
+cdef indirect = Enum("<strided and indirect>")
+# Disable generic_contiguous, as it is a troublemaker
+#cdef generic_contiguous = Enum("<contiguous and direct or indirect>")
+cdef contiguous = Enum("<contiguous and direct>")
+cdef indirect_contiguous = Enum("<contiguous and indirect>")
+
+# 'follow' is implied when the first or last axis is ::1
+
+
+@cname('__pyx_align_pointer')
+cdef void *align_pointer(void *memory, size_t alignment) nogil:
+    "Align pointer memory on a given boundary"
+    cdef Py_intptr_t aligned_p = <Py_intptr_t> memory
+    cdef size_t offset
+
+    with cython.cdivision(True):
+        offset = aligned_p % alignment
+
+    if offset > 0:
+        aligned_p += alignment - offset
+
+    return <void *> aligned_p
+
 
 # pre-allocate thread locks for reuse
 ## note that this could be implemented in a more beautiful way in "normal" Cython,
@@ -326,31 +326,31 @@ cdef PyThread_type_lock[THREAD_LOCKS_PREALLOCATED] __pyx_memoryview_thread_locks
 ]
 
 
-@cname('__pyx_memoryview') 
-cdef class memoryview(object): 
- 
-    cdef object obj 
-    cdef object _size 
-    cdef object _array_interface 
-    cdef PyThread_type_lock lock 
-    # the following array will contain a single __pyx_atomic int with 
-    # suitable alignment 
-    cdef __pyx_atomic_int acquisition_count[2] 
-    cdef __pyx_atomic_int *acquisition_count_aligned_p 
-    cdef Py_buffer view 
-    cdef int flags 
-    cdef bint dtype_is_object 
-    cdef __Pyx_TypeInfo *typeinfo 
- 
-    def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): 
-        self.obj = obj 
-        self.flags = flags 
-        if type(self) is memoryview or obj is not None: 
-            __Pyx_GetBuffer(obj, &self.view, flags) 
-            if <PyObject *> self.view.obj == NULL: 
-                (<__pyx_buffer *> &self.view).obj = Py_None 
-                Py_INCREF(Py_None) 
- 
+@cname('__pyx_memoryview')
+cdef class memoryview(object):
+
+    cdef object obj
+    cdef object _size
+    cdef object _array_interface
+    cdef PyThread_type_lock lock
+    # the following array will contain a single __pyx_atomic int with
+    # suitable alignment
+    cdef __pyx_atomic_int acquisition_count[2]
+    cdef __pyx_atomic_int *acquisition_count_aligned_p
+    cdef Py_buffer view
+    cdef int flags
+    cdef bint dtype_is_object
+    cdef __Pyx_TypeInfo *typeinfo
+
+    def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False):
+        self.obj = obj
+        self.flags = flags
+        if type(self) is memoryview or obj is not None:
+            __Pyx_GetBuffer(obj, &self.view, flags)
+            if <PyObject *> self.view.obj == NULL:
+                (<__pyx_buffer *> &self.view).obj = Py_None
+                Py_INCREF(Py_None)
+
         global __pyx_memoryview_thread_locks_used
         if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED:
             self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
@@ -359,27 +359,27 @@ cdef class memoryview(object):
             self.lock = PyThread_allocate_lock()
             if self.lock is NULL:
                 raise MemoryError
- 
-        if flags & PyBUF_FORMAT: 
+
+        if flags & PyBUF_FORMAT:
             self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0')
-        else: 
-            self.dtype_is_object = dtype_is_object 
- 
-        self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer( 
-                  <void *> &self.acquisition_count[0], sizeof(__pyx_atomic_int)) 
-        self.typeinfo = NULL 
- 
-    def __dealloc__(memoryview self): 
-        if self.obj is not None: 
-            __Pyx_ReleaseBuffer(&self.view) 
+        else:
+            self.dtype_is_object = dtype_is_object
+
+        self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer(
+                  <void *> &self.acquisition_count[0], sizeof(__pyx_atomic_int))
+        self.typeinfo = NULL
+
+    def __dealloc__(memoryview self):
+        if self.obj is not None:
+            __Pyx_ReleaseBuffer(&self.view)
         elif (<__pyx_buffer *> &self.view).obj == Py_None:
             # Undo the incref in __cinit__() above.
             (<__pyx_buffer *> &self.view).obj = NULL
             Py_DECREF(Py_None)
- 
+
         cdef int i
         global __pyx_memoryview_thread_locks_used
-        if self.lock != NULL: 
+        if self.lock != NULL:
             for i in range(__pyx_memoryview_thread_locks_used):
                 if __pyx_memoryview_thread_locks[i] is self.lock:
                     __pyx_memoryview_thread_locks_used -= 1
@@ -389,649 +389,649 @@ cdef class memoryview(object):
                     break
             else:
                 PyThread_free_lock(self.lock)
- 
-    cdef char *get_item_pointer(memoryview self, object index) except NULL: 
-        cdef Py_ssize_t dim 
-        cdef char *itemp = <char *> self.view.buf 
- 
-        for dim, idx in enumerate(index): 
-            itemp = pybuffer_index(&self.view, itemp, idx, dim) 
- 
-        return itemp 
- 
-    #@cname('__pyx_memoryview_getitem') 
-    def __getitem__(memoryview self, object index): 
-        if index is Ellipsis: 
-            return self 
- 
-        have_slices, indices = _unellipsify(index, self.view.ndim) 
- 
-        cdef char *itemp 
-        if have_slices: 
-            return memview_slice(self, indices) 
-        else: 
-            itemp = self.get_item_pointer(indices) 
-            return self.convert_item_to_object(itemp) 
- 
-    def __setitem__(memoryview self, object index, object value): 
+
+    cdef char *get_item_pointer(memoryview self, object index) except NULL:
+        cdef Py_ssize_t dim
+        cdef char *itemp = <char *> self.view.buf
+
+        for dim, idx in enumerate(index):
+            itemp = pybuffer_index(&self.view, itemp, idx, dim)
+
+        return itemp
+
+    #@cname('__pyx_memoryview_getitem')
+    def __getitem__(memoryview self, object index):
+        if index is Ellipsis:
+            return self
+
+        have_slices, indices = _unellipsify(index, self.view.ndim)
+
+        cdef char *itemp
+        if have_slices:
+            return memview_slice(self, indices)
+        else:
+            itemp = self.get_item_pointer(indices)
+            return self.convert_item_to_object(itemp)
+
+    def __setitem__(memoryview self, object index, object value):
         if self.view.readonly:
             raise TypeError("Cannot assign to read-only memoryview")
 
-        have_slices, index = _unellipsify(index, self.view.ndim) 
- 
-        if have_slices: 
-            obj = self.is_slice(value) 
-            if obj: 
-                self.setitem_slice_assignment(self[index], obj) 
-            else: 
-                self.setitem_slice_assign_scalar(self[index], value) 
-        else: 
-            self.setitem_indexed(index, value) 
- 
-    cdef is_slice(self, obj): 
-        if not isinstance(obj, memoryview): 
-            try: 
+        have_slices, index = _unellipsify(index, self.view.ndim)
+
+        if have_slices:
+            obj = self.is_slice(value)
+            if obj:
+                self.setitem_slice_assignment(self[index], obj)
+            else:
+                self.setitem_slice_assign_scalar(self[index], value)
+        else:
+            self.setitem_indexed(index, value)
+
+    cdef is_slice(self, obj):
+        if not isinstance(obj, memoryview):
+            try:
                 obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS,
-                                 self.dtype_is_object) 
-            except TypeError: 
-                return None 
- 
-        return obj 
- 
-    cdef setitem_slice_assignment(self, dst, src): 
-        cdef {{memviewslice_name}} dst_slice 
-        cdef {{memviewslice_name}} src_slice 
- 
-        memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0], 
-                                 get_slice_from_memview(dst, &dst_slice)[0], 
-                                 src.ndim, dst.ndim, self.dtype_is_object) 
- 
-    cdef setitem_slice_assign_scalar(self, memoryview dst, value): 
-        cdef int array[128] 
-        cdef void *tmp = NULL 
-        cdef void *item 
- 
-        cdef {{memviewslice_name}} *dst_slice 
-        cdef {{memviewslice_name}} tmp_slice 
-        dst_slice = get_slice_from_memview(dst, &tmp_slice) 
- 
-        if <size_t>self.view.itemsize > sizeof(array): 
-            tmp = PyMem_Malloc(self.view.itemsize) 
-            if tmp == NULL: 
-                raise MemoryError 
-            item = tmp 
-        else: 
-            item = <void *> array 
- 
-        try: 
-            if self.dtype_is_object: 
-                (<PyObject **> item)[0] = <PyObject *> value 
-            else: 
-                self.assign_item_from_object(<char *> item, value) 
- 
-            # It would be easy to support indirect dimensions, but it's easier 
-            # to disallow :) 
-            if self.view.suboffsets != NULL: 
-                assert_direct_dimensions(self.view.suboffsets, self.view.ndim) 
-            slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, 
-                                item, self.dtype_is_object) 
-        finally: 
-            PyMem_Free(tmp) 
- 
-    cdef setitem_indexed(self, index, value): 
-        cdef char *itemp = self.get_item_pointer(index) 
-        self.assign_item_from_object(itemp, value) 
- 
-    cdef convert_item_to_object(self, char *itemp): 
-        """Only used if instantiated manually by the user, or if Cython doesn't 
-        know how to convert the type""" 
-        import struct 
-        cdef bytes bytesitem 
-        # Do a manual and complete check here instead of this easy hack 
-        bytesitem = itemp[:self.view.itemsize] 
-        try: 
-            result = struct.unpack(self.view.format, bytesitem) 
-        except struct.error: 
-            raise ValueError("Unable to convert item to object") 
-        else: 
-            if len(self.view.format) == 1: 
-                return result[0] 
-            return result 
- 
-    cdef assign_item_from_object(self, char *itemp, object value): 
-        """Only used if instantiated manually by the user, or if Cython doesn't 
-        know how to convert the type""" 
-        import struct 
-        cdef char c 
-        cdef bytes bytesvalue 
-        cdef Py_ssize_t i 
- 
-        if isinstance(value, tuple): 
-            bytesvalue = struct.pack(self.view.format, *value) 
-        else: 
-            bytesvalue = struct.pack(self.view.format, value) 
- 
-        for i, c in enumerate(bytesvalue): 
-            itemp[i] = c 
- 
-    @cname('getbuffer') 
-    def __getbuffer__(self, Py_buffer *info, int flags): 
+                                 self.dtype_is_object)
+            except TypeError:
+                return None
+
+        return obj
+
+    cdef setitem_slice_assignment(self, dst, src):
+        cdef {{memviewslice_name}} dst_slice
+        cdef {{memviewslice_name}} src_slice
+
+        memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0],
+                                 get_slice_from_memview(dst, &dst_slice)[0],
+                                 src.ndim, dst.ndim, self.dtype_is_object)
+
+    cdef setitem_slice_assign_scalar(self, memoryview dst, value):
+        cdef int array[128]
+        cdef void *tmp = NULL
+        cdef void *item
+
+        cdef {{memviewslice_name}} *dst_slice
+        cdef {{memviewslice_name}} tmp_slice
+        dst_slice = get_slice_from_memview(dst, &tmp_slice)
+
+        if <size_t>self.view.itemsize > sizeof(array):
+            tmp = PyMem_Malloc(self.view.itemsize)
+            if tmp == NULL:
+                raise MemoryError
+            item = tmp
+        else:
+            item = <void *> array
+
+        try:
+            if self.dtype_is_object:
+                (<PyObject **> item)[0] = <PyObject *> value
+            else:
+                self.assign_item_from_object(<char *> item, value)
+
+            # It would be easy to support indirect dimensions, but it's easier
+            # to disallow :)
+            if self.view.suboffsets != NULL:
+                assert_direct_dimensions(self.view.suboffsets, self.view.ndim)
+            slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize,
+                                item, self.dtype_is_object)
+        finally:
+            PyMem_Free(tmp)
+
+    cdef setitem_indexed(self, index, value):
+        cdef char *itemp = self.get_item_pointer(index)
+        self.assign_item_from_object(itemp, value)
+
+    cdef convert_item_to_object(self, char *itemp):
+        """Only used if instantiated manually by the user, or if Cython doesn't
+        know how to convert the type"""
+        import struct
+        cdef bytes bytesitem
+        # Do a manual and complete check here instead of this easy hack
+        bytesitem = itemp[:self.view.itemsize]
+        try:
+            result = struct.unpack(self.view.format, bytesitem)
+        except struct.error:
+            raise ValueError("Unable to convert item to object")
+        else:
+            if len(self.view.format) == 1:
+                return result[0]
+            return result
+
+    cdef assign_item_from_object(self, char *itemp, object value):
+        """Only used if instantiated manually by the user, or if Cython doesn't
+        know how to convert the type"""
+        import struct
+        cdef char c
+        cdef bytes bytesvalue
+        cdef Py_ssize_t i
+
+        if isinstance(value, tuple):
+            bytesvalue = struct.pack(self.view.format, *value)
+        else:
+            bytesvalue = struct.pack(self.view.format, value)
+
+        for i, c in enumerate(bytesvalue):
+            itemp[i] = c
+
+    @cname('getbuffer')
+    def __getbuffer__(self, Py_buffer *info, int flags):
         if flags & PyBUF_WRITABLE and self.view.readonly:
             raise ValueError("Cannot create writable memory view from read-only memoryview")
 
         if flags & PyBUF_ND:
-            info.shape = self.view.shape 
-        else: 
-            info.shape = NULL 
- 
-        if flags & PyBUF_STRIDES: 
-            info.strides = self.view.strides 
-        else: 
-            info.strides = NULL 
- 
-        if flags & PyBUF_INDIRECT: 
-            info.suboffsets = self.view.suboffsets 
-        else: 
-            info.suboffsets = NULL 
- 
-        if flags & PyBUF_FORMAT: 
-            info.format = self.view.format 
-        else: 
-            info.format = NULL 
- 
-        info.buf = self.view.buf 
-        info.ndim = self.view.ndim 
-        info.itemsize = self.view.itemsize 
-        info.len = self.view.len 
+            info.shape = self.view.shape
+        else:
+            info.shape = NULL
+
+        if flags & PyBUF_STRIDES:
+            info.strides = self.view.strides
+        else:
+            info.strides = NULL
+
+        if flags & PyBUF_INDIRECT:
+            info.suboffsets = self.view.suboffsets
+        else:
+            info.suboffsets = NULL
+
+        if flags & PyBUF_FORMAT:
+            info.format = self.view.format
+        else:
+            info.format = NULL
+
+        info.buf = self.view.buf
+        info.ndim = self.view.ndim
+        info.itemsize = self.view.itemsize
+        info.len = self.view.len
         info.readonly = self.view.readonly
-        info.obj = self 
- 
-    __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)") 
- 
+        info.obj = self
+
+    __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)")
+
     # Some properties that have the same semantics as in NumPy
     @property
     def T(self):
         cdef _memoryviewslice result = memoryview_copy(self)
         transpose_memslice(&result.from_slice)
         return result
- 
+
     @property
     def base(self):
         return self.obj
- 
+
     @property
     def shape(self):
         return tuple([length for length in self.view.shape[:self.view.ndim]])
- 
+
     @property
     def strides(self):
         if self.view.strides == NULL:
             # Note: we always ask for strides, so if this is not set it's a bug
             raise ValueError("Buffer view does not expose strides")
- 
+
         return tuple([stride for stride in self.view.strides[:self.view.ndim]])
- 
+
     @property
     def suboffsets(self):
         if self.view.suboffsets == NULL:
             return (-1,) * self.view.ndim
- 
+
         return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]])
- 
+
     @property
     def ndim(self):
         return self.view.ndim
- 
+
     @property
     def itemsize(self):
         return self.view.itemsize
- 
+
     @property
     def nbytes(self):
         return self.size * self.view.itemsize
- 
+
     @property
     def size(self):
         if self._size is None:
             result = 1
- 
+
             for length in self.view.shape[:self.view.ndim]:
                 result *= length
- 
+
             self._size = result
- 
+
         return self._size
- 
-    def __len__(self): 
-        if self.view.ndim >= 1: 
-            return self.view.shape[0] 
- 
-        return 0 
- 
-    def __repr__(self): 
-        return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__, 
-                                               id(self)) 
- 
-    def __str__(self): 
-        return "<MemoryView of %r object>" % (self.base.__class__.__name__,) 
- 
-    # Support the same attributes as memoryview slices 
-    def is_c_contig(self): 
-        cdef {{memviewslice_name}} *mslice 
-        cdef {{memviewslice_name}} tmp 
-        mslice = get_slice_from_memview(self, &tmp) 
+
+    def __len__(self):
+        if self.view.ndim >= 1:
+            return self.view.shape[0]
+
+        return 0
+
+    def __repr__(self):
+        return "<MemoryView of %r at 0x%x>" % (self.base.__class__.__name__,
+                                               id(self))
+
+    def __str__(self):
+        return "<MemoryView of %r object>" % (self.base.__class__.__name__,)
+
+    # Support the same attributes as memoryview slices
+    def is_c_contig(self):
+        cdef {{memviewslice_name}} *mslice
+        cdef {{memviewslice_name}} tmp
+        mslice = get_slice_from_memview(self, &tmp)
         return slice_is_contig(mslice[0], 'C', self.view.ndim)
- 
-    def is_f_contig(self): 
-        cdef {{memviewslice_name}} *mslice 
-        cdef {{memviewslice_name}} tmp 
-        mslice = get_slice_from_memview(self, &tmp) 
+
+    def is_f_contig(self):
+        cdef {{memviewslice_name}} *mslice
+        cdef {{memviewslice_name}} tmp
+        mslice = get_slice_from_memview(self, &tmp)
         return slice_is_contig(mslice[0], 'F', self.view.ndim)
- 
-    def copy(self): 
-        cdef {{memviewslice_name}} mslice 
-        cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS 
- 
-        slice_copy(self, &mslice) 
-        mslice = slice_copy_contig(&mslice, "c", self.view.ndim, 
-                                   self.view.itemsize, 
-                                   flags|PyBUF_C_CONTIGUOUS, 
-                                   self.dtype_is_object) 
- 
-        return memoryview_copy_from_slice(self, &mslice) 
- 
-    def copy_fortran(self): 
-        cdef {{memviewslice_name}} src, dst 
-        cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS 
- 
-        slice_copy(self, &src) 
-        dst = slice_copy_contig(&src, "fortran", self.view.ndim, 
-                                self.view.itemsize, 
-                                flags|PyBUF_F_CONTIGUOUS, 
-                                self.dtype_is_object) 
- 
-        return memoryview_copy_from_slice(self, &dst) 
- 
- 
-@cname('__pyx_memoryview_new') 
-cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): 
-    cdef memoryview result = memoryview(o, flags, dtype_is_object) 
-    result.typeinfo = typeinfo 
-    return result 
- 
-@cname('__pyx_memoryview_check') 
-cdef inline bint memoryview_check(object o): 
-    return isinstance(o, memoryview) 
- 
-cdef tuple _unellipsify(object index, int ndim): 
-    """ 
-    Replace all ellipses with full slices and fill incomplete indices with 
-    full slices. 
-    """ 
-    if not isinstance(index, tuple): 
-        tup = (index,) 
-    else: 
-        tup = index 
- 
-    result = [] 
-    have_slices = False 
-    seen_ellipsis = False 
-    for idx, item in enumerate(tup): 
-        if item is Ellipsis: 
-            if not seen_ellipsis: 
-                result.extend([slice(None)] * (ndim - len(tup) + 1)) 
-                seen_ellipsis = True 
-            else: 
-                result.append(slice(None)) 
-            have_slices = True 
-        else: 
-            if not isinstance(item, slice) and not PyIndex_Check(item): 
-                raise TypeError("Cannot index with type '%s'" % type(item)) 
- 
-            have_slices = have_slices or isinstance(item, slice) 
-            result.append(item) 
- 
-    nslices = ndim - len(result) 
-    if nslices: 
-        result.extend([slice(None)] * nslices) 
- 
-    return have_slices or nslices, tuple(result) 
- 
-cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim): 
+
+    def copy(self):
+        cdef {{memviewslice_name}} mslice
+        cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS
+
+        slice_copy(self, &mslice)
+        mslice = slice_copy_contig(&mslice, "c", self.view.ndim,
+                                   self.view.itemsize,
+                                   flags|PyBUF_C_CONTIGUOUS,
+                                   self.dtype_is_object)
+
+        return memoryview_copy_from_slice(self, &mslice)
+
+    def copy_fortran(self):
+        cdef {{memviewslice_name}} src, dst
+        cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS
+
+        slice_copy(self, &src)
+        dst = slice_copy_contig(&src, "fortran", self.view.ndim,
+                                self.view.itemsize,
+                                flags|PyBUF_F_CONTIGUOUS,
+                                self.dtype_is_object)
+
+        return memoryview_copy_from_slice(self, &dst)
+
+
+@cname('__pyx_memoryview_new')
+cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo):
+    cdef memoryview result = memoryview(o, flags, dtype_is_object)
+    result.typeinfo = typeinfo
+    return result
+
+@cname('__pyx_memoryview_check')
+cdef inline bint memoryview_check(object o):
+    return isinstance(o, memoryview)
+
+cdef tuple _unellipsify(object index, int ndim):
+    """
+    Replace all ellipses with full slices and fill incomplete indices with
+    full slices.
+    """
+    if not isinstance(index, tuple):
+        tup = (index,)
+    else:
+        tup = index
+
+    result = []
+    have_slices = False
+    seen_ellipsis = False
+    for idx, item in enumerate(tup):
+        if item is Ellipsis:
+            if not seen_ellipsis:
+                result.extend([slice(None)] * (ndim - len(tup) + 1))
+                seen_ellipsis = True
+            else:
+                result.append(slice(None))
+            have_slices = True
+        else:
+            if not isinstance(item, slice) and not PyIndex_Check(item):
+                raise TypeError("Cannot index with type '%s'" % type(item))
+
+            have_slices = have_slices or isinstance(item, slice)
+            result.append(item)
+
+    nslices = ndim - len(result)
+    if nslices:
+        result.extend([slice(None)] * nslices)
+
+    return have_slices or nslices, tuple(result)
+
+cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim):
     for suboffset in suboffsets[:ndim]:
         if suboffset >= 0:
-            raise ValueError("Indirect dimensions not supported") 
- 
-# 
-### Slicing a memoryview 
-# 
- 
-@cname('__pyx_memview_slice') 
-cdef memoryview memview_slice(memoryview memview, object indices): 
-    cdef int new_ndim = 0, suboffset_dim = -1, dim 
-    cdef bint negative_step 
-    cdef {{memviewslice_name}} src, dst 
-    cdef {{memviewslice_name}} *p_src 
- 
-    # dst is copied by value in memoryview_fromslice -- initialize it 
-    # src is never copied 
-    memset(&dst, 0, sizeof(dst)) 
- 
-    cdef _memoryviewslice memviewsliceobj 
- 
-    assert memview.view.ndim > 0 
- 
-    if isinstance(memview, _memoryviewslice): 
-        memviewsliceobj = memview 
-        p_src = &memviewsliceobj.from_slice 
-    else: 
-        slice_copy(memview, &src) 
-        p_src = &src 
- 
-    # Note: don't use variable src at this point 
-    # SubNote: we should be able to declare variables in blocks... 
- 
-    # memoryview_fromslice() will inc our dst slice 
-    dst.memview = p_src.memview 
-    dst.data = p_src.data 
- 
-    # Put everything in temps to avoid this bloody warning: 
-    # "Argument evaluation order in C function call is undefined and 
-    #  may not be as expected" 
-    cdef {{memviewslice_name}} *p_dst = &dst 
-    cdef int *p_suboffset_dim = &suboffset_dim 
-    cdef Py_ssize_t start, stop, step 
-    cdef bint have_start, have_stop, have_step 
- 
-    for dim, index in enumerate(indices): 
-        if PyIndex_Check(index): 
-            slice_memviewslice( 
-                p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], 
-                dim, new_ndim, p_suboffset_dim, 
-                index, 0, 0, # start, stop, step 
-                0, 0, 0, # have_{start,stop,step} 
-                False) 
-        elif index is None: 
-            p_dst.shape[new_ndim] = 1 
-            p_dst.strides[new_ndim] = 0 
-            p_dst.suboffsets[new_ndim] = -1 
-            new_ndim += 1 
-        else: 
-            start = index.start or 0 
-            stop = index.stop or 0 
-            step = index.step or 0 
- 
-            have_start = index.start is not None 
-            have_stop = index.stop is not None 
-            have_step = index.step is not None 
- 
-            slice_memviewslice( 
-                p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], 
-                dim, new_ndim, p_suboffset_dim, 
-                start, stop, step, 
-                have_start, have_stop, have_step, 
-                True) 
-            new_ndim += 1 
- 
-    if isinstance(memview, _memoryviewslice): 
-        return memoryview_fromslice(dst, new_ndim, 
-                                    memviewsliceobj.to_object_func, 
-                                    memviewsliceobj.to_dtype_func, 
-                                    memview.dtype_is_object) 
-    else: 
-        return memoryview_fromslice(dst, new_ndim, NULL, NULL, 
-                                    memview.dtype_is_object) 
- 
- 
-# 
-### Slicing in a single dimension of a memoryviewslice 
-# 
- 
+            raise ValueError("Indirect dimensions not supported")
+
+#
+### Slicing a memoryview
+#
+
+@cname('__pyx_memview_slice')
+cdef memoryview memview_slice(memoryview memview, object indices):
+    cdef int new_ndim = 0, suboffset_dim = -1, dim
+    cdef bint negative_step
+    cdef {{memviewslice_name}} src, dst
+    cdef {{memviewslice_name}} *p_src
+
+    # dst is copied by value in memoryview_fromslice -- initialize it
+    # src is never copied
+    memset(&dst, 0, sizeof(dst))
+
+    cdef _memoryviewslice memviewsliceobj
+
+    assert memview.view.ndim > 0
+
+    if isinstance(memview, _memoryviewslice):
+        memviewsliceobj = memview
+        p_src = &memviewsliceobj.from_slice
+    else:
+        slice_copy(memview, &src)
+        p_src = &src
+
+    # Note: don't use variable src at this point
+    # SubNote: we should be able to declare variables in blocks...
+
+    # memoryview_fromslice() will inc our dst slice
+    dst.memview = p_src.memview
+    dst.data = p_src.data
+
+    # Put everything in temps to avoid this bloody warning:
+    # "Argument evaluation order in C function call is undefined and
+    #  may not be as expected"
+    cdef {{memviewslice_name}} *p_dst = &dst
+    cdef int *p_suboffset_dim = &suboffset_dim
+    cdef Py_ssize_t start, stop, step
+    cdef bint have_start, have_stop, have_step
+
+    for dim, index in enumerate(indices):
+        if PyIndex_Check(index):
+            slice_memviewslice(
+                p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+                dim, new_ndim, p_suboffset_dim,
+                index, 0, 0, # start, stop, step
+                0, 0, 0, # have_{start,stop,step}
+                False)
+        elif index is None:
+            p_dst.shape[new_ndim] = 1
+            p_dst.strides[new_ndim] = 0
+            p_dst.suboffsets[new_ndim] = -1
+            new_ndim += 1
+        else:
+            start = index.start or 0
+            stop = index.stop or 0
+            step = index.step or 0
+
+            have_start = index.start is not None
+            have_stop = index.stop is not None
+            have_step = index.step is not None
+
+            slice_memviewslice(
+                p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim],
+                dim, new_ndim, p_suboffset_dim,
+                start, stop, step,
+                have_start, have_stop, have_step,
+                True)
+            new_ndim += 1
+
+    if isinstance(memview, _memoryviewslice):
+        return memoryview_fromslice(dst, new_ndim,
+                                    memviewsliceobj.to_object_func,
+                                    memviewsliceobj.to_dtype_func,
+                                    memview.dtype_is_object)
+    else:
+        return memoryview_fromslice(dst, new_ndim, NULL, NULL,
+                                    memview.dtype_is_object)
+
+
+#
+### Slicing in a single dimension of a memoryviewslice
+#
+
 cdef extern from "<stdlib.h>":
-    void abort() nogil 
-    void printf(char *s, ...) nogil 
- 
+    void abort() nogil
+    void printf(char *s, ...) nogil
+
 cdef extern from "<stdio.h>":
-    ctypedef struct FILE 
-    FILE *stderr 
-    int fputs(char *s, FILE *stream) 
- 
-cdef extern from "pystate.h": 
-    void PyThreadState_Get() nogil 
- 
-    # These are not actually nogil, but we check for the GIL before calling them 
-    void PyErr_SetString(PyObject *type, char *msg) nogil 
-    PyObject *PyErr_Format(PyObject *exc, char *msg, ...) nogil 
- 
-@cname('__pyx_memoryview_slice_memviewslice') 
-cdef int slice_memviewslice( 
-        {{memviewslice_name}} *dst, 
-        Py_ssize_t shape, Py_ssize_t stride, Py_ssize_t suboffset, 
-        int dim, int new_ndim, int *suboffset_dim, 
-        Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step, 
-        int have_start, int have_stop, int have_step, 
-        bint is_slice) nogil except -1: 
-    """ 
-    Create a new slice dst given slice src. 
- 
-    dim             - the current src dimension (indexing will make dimensions 
-                                                 disappear) 
-    new_dim         - the new dst dimension 
-    suboffset_dim   - pointer to a single int initialized to -1 to keep track of 
-                      where slicing offsets should be added 
-    """ 
- 
-    cdef Py_ssize_t new_shape 
-    cdef bint negative_step 
- 
-    if not is_slice: 
-        # index is a normal integer-like index 
-        if start < 0: 
-            start += shape 
-        if not 0 <= start < shape: 
-            _err_dim(IndexError, "Index out of bounds (axis %d)", dim) 
-    else: 
-        # index is a slice 
-        negative_step = have_step != 0 and step < 0 
- 
-        if have_step and step == 0: 
-            _err_dim(ValueError, "Step may not be zero (axis %d)", dim) 
- 
-        # check our bounds and set defaults 
-        if have_start: 
-            if start < 0: 
-                start += shape 
-                if start < 0: 
-                    start = 0 
-            elif start >= shape: 
-                if negative_step: 
-                    start = shape - 1 
-                else: 
-                    start = shape 
-        else: 
-            if negative_step: 
-                start = shape - 1 
-            else: 
-                start = 0 
- 
-        if have_stop: 
-            if stop < 0: 
-                stop += shape 
-                if stop < 0: 
-                    stop = 0 
-            elif stop > shape: 
-                stop = shape 
-        else: 
-            if negative_step: 
-                stop = -1 
-            else: 
-                stop = shape 
- 
-        if not have_step: 
-            step = 1 
- 
-        # len = ceil( (stop - start) / step ) 
-        with cython.cdivision(True): 
-            new_shape = (stop - start) // step 
- 
-            if (stop - start) - step * new_shape: 
-                new_shape += 1 
- 
-        if new_shape < 0: 
-            new_shape = 0 
- 
-        # shape/strides/suboffsets 
-        dst.strides[new_ndim] = stride * step 
-        dst.shape[new_ndim] = new_shape 
-        dst.suboffsets[new_ndim] = suboffset 
- 
-    # Add the slicing or idexing offsets to the right suboffset or base data * 
-    if suboffset_dim[0] < 0: 
-        dst.data += start * stride 
-    else: 
-        dst.suboffsets[suboffset_dim[0]] += start * stride 
- 
-    if suboffset >= 0: 
-        if not is_slice: 
-            if new_ndim == 0: 
-                dst.data = (<char **> dst.data)[0] + suboffset 
-            else: 
-                _err_dim(IndexError, "All dimensions preceding dimension %d " 
-                                     "must be indexed and not sliced", dim) 
-        else: 
-            suboffset_dim[0] = new_ndim 
- 
-    return 0 
- 
-# 
-### Index a memoryview 
-# 
-@cname('__pyx_pybuffer_index') 
-cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, 
-                          Py_ssize_t dim) except NULL: 
-    cdef Py_ssize_t shape, stride, suboffset = -1 
-    cdef Py_ssize_t itemsize = view.itemsize 
-    cdef char *resultp 
- 
-    if view.ndim == 0: 
-        shape = view.len / itemsize 
-        stride = itemsize 
-    else: 
-        shape = view.shape[dim] 
-        stride = view.strides[dim] 
-        if view.suboffsets != NULL: 
-            suboffset = view.suboffsets[dim] 
- 
-    if index < 0: 
-        index += view.shape[dim] 
-        if index < 0: 
-            raise IndexError("Out of bounds on buffer access (axis %d)" % dim) 
- 
-    if index >= shape: 
-        raise IndexError("Out of bounds on buffer access (axis %d)" % dim) 
- 
-    resultp = bufp + index * stride 
-    if suboffset >= 0: 
-        resultp = (<char **> resultp)[0] + suboffset 
- 
-    return resultp 
- 
-# 
-### Transposing a memoryviewslice 
-# 
-@cname('__pyx_memslice_transpose') 
-cdef int transpose_memslice({{memviewslice_name}} *memslice) nogil except 0: 
-    cdef int ndim = memslice.memview.view.ndim 
- 
-    cdef Py_ssize_t *shape = memslice.shape 
-    cdef Py_ssize_t *strides = memslice.strides 
- 
-    # reverse strides and shape 
-    cdef int i, j 
-    for i in range(ndim / 2): 
-        j = ndim - 1 - i 
-        strides[i], strides[j] = strides[j], strides[i] 
-        shape[i], shape[j] = shape[j], shape[i] 
- 
-        if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: 
-            _err(ValueError, "Cannot transpose memoryview with indirect dimensions") 
- 
-    return 1 
- 
-# 
-### Creating new memoryview objects from slices and memoryviews 
-# 
-@cname('__pyx_memoryviewslice') 
-cdef class _memoryviewslice(memoryview): 
-    "Internal class for passing memoryview slices to Python" 
- 
-    # We need this to keep our shape/strides/suboffset pointers valid 
-    cdef {{memviewslice_name}} from_slice 
-    # We need this only to print it's class' name 
-    cdef object from_object 
- 
-    cdef object (*to_object_func)(char *) 
-    cdef int (*to_dtype_func)(char *, object) except 0 
- 
-    def __dealloc__(self): 
-        __PYX_XDEC_MEMVIEW(&self.from_slice, 1) 
- 
-    cdef convert_item_to_object(self, char *itemp): 
-        if self.to_object_func != NULL: 
-            return self.to_object_func(itemp) 
-        else: 
-            return memoryview.convert_item_to_object(self, itemp) 
- 
-    cdef assign_item_from_object(self, char *itemp, object value): 
-        if self.to_dtype_func != NULL: 
-            self.to_dtype_func(itemp, value) 
-        else: 
-            memoryview.assign_item_from_object(self, itemp, value) 
- 
+    ctypedef struct FILE
+    FILE *stderr
+    int fputs(char *s, FILE *stream)
+
+cdef extern from "pystate.h":
+    void PyThreadState_Get() nogil
+
+    # These are not actually nogil, but we check for the GIL before calling them
+    void PyErr_SetString(PyObject *type, char *msg) nogil
+    PyObject *PyErr_Format(PyObject *exc, char *msg, ...) nogil
+
+@cname('__pyx_memoryview_slice_memviewslice')
+cdef int slice_memviewslice(
+        {{memviewslice_name}} *dst,
+        Py_ssize_t shape, Py_ssize_t stride, Py_ssize_t suboffset,
+        int dim, int new_ndim, int *suboffset_dim,
+        Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step,
+        int have_start, int have_stop, int have_step,
+        bint is_slice) nogil except -1:
+    """
+    Create a new slice dst given slice src.
+
+    dim             - the current src dimension (indexing will make dimensions
+                                                 disappear)
+    new_dim         - the new dst dimension
+    suboffset_dim   - pointer to a single int initialized to -1 to keep track of
+                      where slicing offsets should be added
+    """
+
+    cdef Py_ssize_t new_shape
+    cdef bint negative_step
+
+    if not is_slice:
+        # index is a normal integer-like index
+        if start < 0:
+            start += shape
+        if not 0 <= start < shape:
+            _err_dim(IndexError, "Index out of bounds (axis %d)", dim)
+    else:
+        # index is a slice
+        negative_step = have_step != 0 and step < 0
+
+        if have_step and step == 0:
+            _err_dim(ValueError, "Step may not be zero (axis %d)", dim)
+
+        # check our bounds and set defaults
+        if have_start:
+            if start < 0:
+                start += shape
+                if start < 0:
+                    start = 0
+            elif start >= shape:
+                if negative_step:
+                    start = shape - 1
+                else:
+                    start = shape
+        else:
+            if negative_step:
+                start = shape - 1
+            else:
+                start = 0
+
+        if have_stop:
+            if stop < 0:
+                stop += shape
+                if stop < 0:
+                    stop = 0
+            elif stop > shape:
+                stop = shape
+        else:
+            if negative_step:
+                stop = -1
+            else:
+                stop = shape
+
+        if not have_step:
+            step = 1
+
+        # len = ceil( (stop - start) / step )
+        with cython.cdivision(True):
+            new_shape = (stop - start) // step
+
+            if (stop - start) - step * new_shape:
+                new_shape += 1
+
+        if new_shape < 0:
+            new_shape = 0
+
+        # shape/strides/suboffsets
+        dst.strides[new_ndim] = stride * step
+        dst.shape[new_ndim] = new_shape
+        dst.suboffsets[new_ndim] = suboffset
+
+    # Add the slicing or idexing offsets to the right suboffset or base data *
+    if suboffset_dim[0] < 0:
+        dst.data += start * stride
+    else:
+        dst.suboffsets[suboffset_dim[0]] += start * stride
+
+    if suboffset >= 0:
+        if not is_slice:
+            if new_ndim == 0:
+                dst.data = (<char **> dst.data)[0] + suboffset
+            else:
+                _err_dim(IndexError, "All dimensions preceding dimension %d "
+                                     "must be indexed and not sliced", dim)
+        else:
+            suboffset_dim[0] = new_ndim
+
+    return 0
+
+#
+### Index a memoryview
+#
+@cname('__pyx_pybuffer_index')
+cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index,
+                          Py_ssize_t dim) except NULL:
+    cdef Py_ssize_t shape, stride, suboffset = -1
+    cdef Py_ssize_t itemsize = view.itemsize
+    cdef char *resultp
+
+    if view.ndim == 0:
+        shape = view.len / itemsize
+        stride = itemsize
+    else:
+        shape = view.shape[dim]
+        stride = view.strides[dim]
+        if view.suboffsets != NULL:
+            suboffset = view.suboffsets[dim]
+
+    if index < 0:
+        index += view.shape[dim]
+        if index < 0:
+            raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+
+    if index >= shape:
+        raise IndexError("Out of bounds on buffer access (axis %d)" % dim)
+
+    resultp = bufp + index * stride
+    if suboffset >= 0:
+        resultp = (<char **> resultp)[0] + suboffset
+
+    return resultp
+
+#
+### Transposing a memoryviewslice
+#
+@cname('__pyx_memslice_transpose')
+cdef int transpose_memslice({{memviewslice_name}} *memslice) nogil except 0:
+    cdef int ndim = memslice.memview.view.ndim
+
+    cdef Py_ssize_t *shape = memslice.shape
+    cdef Py_ssize_t *strides = memslice.strides
+
+    # reverse strides and shape
+    cdef int i, j
+    for i in range(ndim / 2):
+        j = ndim - 1 - i
+        strides[i], strides[j] = strides[j], strides[i]
+        shape[i], shape[j] = shape[j], shape[i]
+
+        if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0:
+            _err(ValueError, "Cannot transpose memoryview with indirect dimensions")
+
+    return 1
+
+#
+### Creating new memoryview objects from slices and memoryviews
+#
+@cname('__pyx_memoryviewslice')
+cdef class _memoryviewslice(memoryview):
+    "Internal class for passing memoryview slices to Python"
+
+    # We need this to keep our shape/strides/suboffset pointers valid
+    cdef {{memviewslice_name}} from_slice
+    # We need this only to print it's class' name
+    cdef object from_object
+
+    cdef object (*to_object_func)(char *)
+    cdef int (*to_dtype_func)(char *, object) except 0
+
+    def __dealloc__(self):
+        __PYX_XDEC_MEMVIEW(&self.from_slice, 1)
+
+    cdef convert_item_to_object(self, char *itemp):
+        if self.to_object_func != NULL:
+            return self.to_object_func(itemp)
+        else:
+            return memoryview.convert_item_to_object(self, itemp)
+
+    cdef assign_item_from_object(self, char *itemp, object value):
+        if self.to_dtype_func != NULL:
+            self.to_dtype_func(itemp, value)
+        else:
+            memoryview.assign_item_from_object(self, itemp, value)
+
     @property
     def base(self):
         return self.from_object
- 
-    __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)") 
- 
- 
-@cname('__pyx_memoryview_fromslice') 
-cdef memoryview_fromslice({{memviewslice_name}} memviewslice, 
-                          int ndim, 
-                          object (*to_object_func)(char *), 
-                          int (*to_dtype_func)(char *, object) except 0, 
-                          bint dtype_is_object): 
- 
-    cdef _memoryviewslice result 
- 
-    if <PyObject *> memviewslice.memview == Py_None: 
-        return None 
- 
-    # assert 0 < ndim <= memviewslice.memview.view.ndim, ( 
-    #                 ndim, memviewslice.memview.view.ndim) 
- 
-    result = _memoryviewslice(None, 0, dtype_is_object) 
- 
-    result.from_slice = memviewslice 
-    __PYX_INC_MEMVIEW(&memviewslice, 1) 
- 
-    result.from_object = (<memoryview> memviewslice.memview).base 
-    result.typeinfo = memviewslice.memview.typeinfo 
- 
-    result.view = memviewslice.memview.view 
-    result.view.buf = <void *> memviewslice.data 
-    result.view.ndim = ndim 
-    (<__pyx_buffer *> &result.view).obj = Py_None 
-    Py_INCREF(Py_None) 
- 
+
+    __pyx_getbuffer = capsule(<void *> &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)")
+
+
+@cname('__pyx_memoryview_fromslice')
+cdef memoryview_fromslice({{memviewslice_name}} memviewslice,
+                          int ndim,
+                          object (*to_object_func)(char *),
+                          int (*to_dtype_func)(char *, object) except 0,
+                          bint dtype_is_object):
+
+    cdef _memoryviewslice result
+
+    if <PyObject *> memviewslice.memview == Py_None:
+        return None
+
+    # assert 0 < ndim <= memviewslice.memview.view.ndim, (
+    #                 ndim, memviewslice.memview.view.ndim)
+
+    result = _memoryviewslice(None, 0, dtype_is_object)
+
+    result.from_slice = memviewslice
+    __PYX_INC_MEMVIEW(&memviewslice, 1)
+
+    result.from_object = (<memoryview> memviewslice.memview).base
+    result.typeinfo = memviewslice.memview.typeinfo
+
+    result.view = memviewslice.memview.view
+    result.view.buf = <void *> memviewslice.data
+    result.view.ndim = ndim
+    (<__pyx_buffer *> &result.view).obj = Py_None
+    Py_INCREF(Py_None)
+
     if (<memoryview>memviewslice.memview).flags & PyBUF_WRITABLE:
         result.flags = PyBUF_RECORDS
     else:
         result.flags = PyBUF_RECORDS_RO
- 
-    result.view.shape = <Py_ssize_t *> result.from_slice.shape 
-    result.view.strides = <Py_ssize_t *> result.from_slice.strides 
- 
+
+    result.view.shape = <Py_ssize_t *> result.from_slice.shape
+    result.view.strides = <Py_ssize_t *> result.from_slice.strides
+
     # only set suboffsets if actually used, otherwise set to NULL to improve compatibility
     result.view.suboffsets = NULL
     for suboffset in result.from_slice.suboffsets[:ndim]:
@@ -1039,456 +1039,456 @@ cdef memoryview_fromslice({{memviewslice_name}} memviewslice,
             result.view.suboffsets = <Py_ssize_t *> result.from_slice.suboffsets
             break
 
-    result.view.len = result.view.itemsize 
+    result.view.len = result.view.itemsize
     for length in result.view.shape[:ndim]:
         result.view.len *= length
- 
-    result.to_object_func = to_object_func 
-    result.to_dtype_func = to_dtype_func 
- 
-    return result 
- 
-@cname('__pyx_memoryview_get_slice_from_memoryview') 
-cdef {{memviewslice_name}} *get_slice_from_memview(memoryview memview, 
+
+    result.to_object_func = to_object_func
+    result.to_dtype_func = to_dtype_func
+
+    return result
+
+@cname('__pyx_memoryview_get_slice_from_memoryview')
+cdef {{memviewslice_name}} *get_slice_from_memview(memoryview memview,
                                                    {{memviewslice_name}} *mslice) except NULL:
-    cdef _memoryviewslice obj 
-    if isinstance(memview, _memoryviewslice): 
-        obj = memview 
-        return &obj.from_slice 
-    else: 
-        slice_copy(memview, mslice) 
-        return mslice 
- 
-@cname('__pyx_memoryview_slice_copy') 
-cdef void slice_copy(memoryview memview, {{memviewslice_name}} *dst): 
-    cdef int dim 
-    cdef (Py_ssize_t*) shape, strides, suboffsets 
- 
-    shape = memview.view.shape 
-    strides = memview.view.strides 
-    suboffsets = memview.view.suboffsets 
- 
-    dst.memview = <__pyx_memoryview *> memview 
-    dst.data = <char *> memview.view.buf 
- 
-    for dim in range(memview.view.ndim): 
-        dst.shape[dim] = shape[dim] 
-        dst.strides[dim] = strides[dim] 
+    cdef _memoryviewslice obj
+    if isinstance(memview, _memoryviewslice):
+        obj = memview
+        return &obj.from_slice
+    else:
+        slice_copy(memview, mslice)
+        return mslice
+
+@cname('__pyx_memoryview_slice_copy')
+cdef void slice_copy(memoryview memview, {{memviewslice_name}} *dst):
+    cdef int dim
+    cdef (Py_ssize_t*) shape, strides, suboffsets
+
+    shape = memview.view.shape
+    strides = memview.view.strides
+    suboffsets = memview.view.suboffsets
+
+    dst.memview = <__pyx_memoryview *> memview
+    dst.data = <char *> memview.view.buf
+
+    for dim in range(memview.view.ndim):
+        dst.shape[dim] = shape[dim]
+        dst.strides[dim] = strides[dim]
         dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1
- 
-@cname('__pyx_memoryview_copy_object') 
-cdef memoryview_copy(memoryview memview): 
-    "Create a new memoryview object" 
-    cdef {{memviewslice_name}} memviewslice 
-    slice_copy(memview, &memviewslice) 
-    return memoryview_copy_from_slice(memview, &memviewslice) 
- 
-@cname('__pyx_memoryview_copy_object_from_slice') 
-cdef memoryview_copy_from_slice(memoryview memview, {{memviewslice_name}} *memviewslice): 
-    """ 
-    Create a new memoryview object from a given memoryview object and slice. 
-    """ 
-    cdef object (*to_object_func)(char *) 
-    cdef int (*to_dtype_func)(char *, object) except 0 
- 
-    if isinstance(memview, _memoryviewslice): 
-        to_object_func = (<_memoryviewslice> memview).to_object_func 
-        to_dtype_func = (<_memoryviewslice> memview).to_dtype_func 
-    else: 
-        to_object_func = NULL 
-        to_dtype_func = NULL 
- 
-    return memoryview_fromslice(memviewslice[0], memview.view.ndim, 
-                                to_object_func, to_dtype_func, 
-                                memview.dtype_is_object) 
- 
- 
-# 
-### Copy the contents of a memoryview slices 
-# 
-cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil: 
-    if arg < 0: 
-        return -arg 
-    else: 
-        return arg 
- 
-@cname('__pyx_get_best_slice_order') 
-cdef char get_best_order({{memviewslice_name}} *mslice, int ndim) nogil: 
-    """ 
-    Figure out the best memory access order for a given slice. 
-    """ 
-    cdef int i 
-    cdef Py_ssize_t c_stride = 0 
-    cdef Py_ssize_t f_stride = 0 
- 
-    for i in range(ndim - 1, -1, -1): 
-        if mslice.shape[i] > 1: 
-            c_stride = mslice.strides[i] 
-            break 
- 
-    for i in range(ndim): 
-        if mslice.shape[i] > 1: 
-            f_stride = mslice.strides[i] 
-            break 
- 
-    if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): 
-        return 'C' 
-    else: 
-        return 'F' 
- 
-@cython.cdivision(True) 
-cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides, 
-                                   char *dst_data, Py_ssize_t *dst_strides, 
-                                   Py_ssize_t *src_shape, Py_ssize_t *dst_shape, 
-                                   int ndim, size_t itemsize) nogil: 
-    # Note: src_extent is 1 if we're broadcasting 
-    # dst_extent always >= src_extent as we don't do reductions 
-    cdef Py_ssize_t i 
-    cdef Py_ssize_t src_extent = src_shape[0] 
-    cdef Py_ssize_t dst_extent = dst_shape[0] 
-    cdef Py_ssize_t src_stride = src_strides[0] 
-    cdef Py_ssize_t dst_stride = dst_strides[0] 
- 
-    if ndim == 1: 
-       if (src_stride > 0 and dst_stride > 0 and 
-           <size_t> src_stride == itemsize == <size_t> dst_stride): 
-           memcpy(dst_data, src_data, itemsize * dst_extent) 
-       else: 
-           for i in range(dst_extent): 
-               memcpy(dst_data, src_data, itemsize) 
-               src_data += src_stride 
-               dst_data += dst_stride 
-    else: 
-        for i in range(dst_extent): 
-            _copy_strided_to_strided(src_data, src_strides + 1, 
-                                     dst_data, dst_strides + 1, 
-                                     src_shape + 1, dst_shape + 1, 
-                                     ndim - 1, itemsize) 
-            src_data += src_stride 
-            dst_data += dst_stride 
- 
-cdef void copy_strided_to_strided({{memviewslice_name}} *src, 
-                                  {{memviewslice_name}} *dst, 
-                                  int ndim, size_t itemsize) nogil: 
-    _copy_strided_to_strided(src.data, src.strides, dst.data, dst.strides, 
-                             src.shape, dst.shape, ndim, itemsize) 
- 
-@cname('__pyx_memoryview_slice_get_size') 
-cdef Py_ssize_t slice_get_size({{memviewslice_name}} *src, int ndim) nogil: 
-    "Return the size of the memory occupied by the slice in number of bytes" 
+
+@cname('__pyx_memoryview_copy_object')
+cdef memoryview_copy(memoryview memview):
+    "Create a new memoryview object"
+    cdef {{memviewslice_name}} memviewslice
+    slice_copy(memview, &memviewslice)
+    return memoryview_copy_from_slice(memview, &memviewslice)
+
+@cname('__pyx_memoryview_copy_object_from_slice')
+cdef memoryview_copy_from_slice(memoryview memview, {{memviewslice_name}} *memviewslice):
+    """
+    Create a new memoryview object from a given memoryview object and slice.
+    """
+    cdef object (*to_object_func)(char *)
+    cdef int (*to_dtype_func)(char *, object) except 0
+
+    if isinstance(memview, _memoryviewslice):
+        to_object_func = (<_memoryviewslice> memview).to_object_func
+        to_dtype_func = (<_memoryviewslice> memview).to_dtype_func
+    else:
+        to_object_func = NULL
+        to_dtype_func = NULL
+
+    return memoryview_fromslice(memviewslice[0], memview.view.ndim,
+                                to_object_func, to_dtype_func,
+                                memview.dtype_is_object)
+
+
+#
+### Copy the contents of a memoryview slices
+#
+cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil:
+    if arg < 0:
+        return -arg
+    else:
+        return arg
+
+@cname('__pyx_get_best_slice_order')
+cdef char get_best_order({{memviewslice_name}} *mslice, int ndim) nogil:
+    """
+    Figure out the best memory access order for a given slice.
+    """
+    cdef int i
+    cdef Py_ssize_t c_stride = 0
+    cdef Py_ssize_t f_stride = 0
+
+    for i in range(ndim - 1, -1, -1):
+        if mslice.shape[i] > 1:
+            c_stride = mslice.strides[i]
+            break
+
+    for i in range(ndim):
+        if mslice.shape[i] > 1:
+            f_stride = mslice.strides[i]
+            break
+
+    if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride):
+        return 'C'
+    else:
+        return 'F'
+
+@cython.cdivision(True)
+cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides,
+                                   char *dst_data, Py_ssize_t *dst_strides,
+                                   Py_ssize_t *src_shape, Py_ssize_t *dst_shape,
+                                   int ndim, size_t itemsize) nogil:
+    # Note: src_extent is 1 if we're broadcasting
+    # dst_extent always >= src_extent as we don't do reductions
+    cdef Py_ssize_t i
+    cdef Py_ssize_t src_extent = src_shape[0]
+    cdef Py_ssize_t dst_extent = dst_shape[0]
+    cdef Py_ssize_t src_stride = src_strides[0]
+    cdef Py_ssize_t dst_stride = dst_strides[0]
+
+    if ndim == 1:
+       if (src_stride > 0 and dst_stride > 0 and
+           <size_t> src_stride == itemsize == <size_t> dst_stride):
+           memcpy(dst_data, src_data, itemsize * dst_extent)
+       else:
+           for i in range(dst_extent):
+               memcpy(dst_data, src_data, itemsize)
+               src_data += src_stride
+               dst_data += dst_stride
+    else:
+        for i in range(dst_extent):
+            _copy_strided_to_strided(src_data, src_strides + 1,
+                                     dst_data, dst_strides + 1,
+                                     src_shape + 1, dst_shape + 1,
+                                     ndim - 1, itemsize)
+            src_data += src_stride
+            dst_data += dst_stride
+
+cdef void copy_strided_to_strided({{memviewslice_name}} *src,
+                                  {{memviewslice_name}} *dst,
+                                  int ndim, size_t itemsize) nogil:
+    _copy_strided_to_strided(src.data, src.strides, dst.data, dst.strides,
+                             src.shape, dst.shape, ndim, itemsize)
+
+@cname('__pyx_memoryview_slice_get_size')
+cdef Py_ssize_t slice_get_size({{memviewslice_name}} *src, int ndim) nogil:
+    "Return the size of the memory occupied by the slice in number of bytes"
     cdef Py_ssize_t shape, size = src.memview.view.itemsize
- 
+
     for shape in src.shape[:ndim]:
         size *= shape
- 
-    return size 
- 
-@cname('__pyx_fill_contig_strides_array') 
-cdef Py_ssize_t fill_contig_strides_array( 
-                Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride, 
-                int ndim, char order) nogil: 
-    """ 
-    Fill the strides array for a slice with C or F contiguous strides. 
-    This is like PyBuffer_FillContiguousStrides, but compatible with py < 2.6 
-    """ 
-    cdef int idx 
- 
-    if order == 'F': 
-        for idx in range(ndim): 
-            strides[idx] = stride 
+
+    return size
+
+@cname('__pyx_fill_contig_strides_array')
+cdef Py_ssize_t fill_contig_strides_array(
+                Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride,
+                int ndim, char order) nogil:
+    """
+    Fill the strides array for a slice with C or F contiguous strides.
+    This is like PyBuffer_FillContiguousStrides, but compatible with py < 2.6
+    """
+    cdef int idx
+
+    if order == 'F':
+        for idx in range(ndim):
+            strides[idx] = stride
             stride *= shape[idx]
-    else: 
-        for idx in range(ndim - 1, -1, -1): 
-            strides[idx] = stride 
+    else:
+        for idx in range(ndim - 1, -1, -1):
+            strides[idx] = stride
             stride *= shape[idx]
- 
-    return stride 
- 
-@cname('__pyx_memoryview_copy_data_to_temp') 
-cdef void *copy_data_to_temp({{memviewslice_name}} *src, 
-                             {{memviewslice_name}} *tmpslice, 
-                             char order, 
-                             int ndim) nogil except NULL: 
-    """ 
-    Copy a direct slice to temporary contiguous memory. The caller should free 
-    the result when done. 
-    """ 
-    cdef int i 
-    cdef void *result 
- 
-    cdef size_t itemsize = src.memview.view.itemsize 
-    cdef size_t size = slice_get_size(src, ndim) 
- 
-    result = malloc(size) 
-    if not result: 
-        _err(MemoryError, NULL) 
- 
-    # tmpslice[0] = src 
-    tmpslice.data = <char *> result 
-    tmpslice.memview = src.memview 
-    for i in range(ndim): 
-        tmpslice.shape[i] = src.shape[i] 
-        tmpslice.suboffsets[i] = -1 
- 
-    fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize, 
-                              ndim, order) 
- 
-    # We need to broadcast strides again 
-    for i in range(ndim): 
-        if tmpslice.shape[i] == 1: 
-            tmpslice.strides[i] = 0 
- 
+
+    return stride
+
+@cname('__pyx_memoryview_copy_data_to_temp')
+cdef void *copy_data_to_temp({{memviewslice_name}} *src,
+                             {{memviewslice_name}} *tmpslice,
+                             char order,
+                             int ndim) nogil except NULL:
+    """
+    Copy a direct slice to temporary contiguous memory. The caller should free
+    the result when done.
+    """
+    cdef int i
+    cdef void *result
+
+    cdef size_t itemsize = src.memview.view.itemsize
+    cdef size_t size = slice_get_size(src, ndim)
+
+    result = malloc(size)
+    if not result:
+        _err(MemoryError, NULL)
+
+    # tmpslice[0] = src
+    tmpslice.data = <char *> result
+    tmpslice.memview = src.memview
+    for i in range(ndim):
+        tmpslice.shape[i] = src.shape[i]
+        tmpslice.suboffsets[i] = -1
+
+    fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize,
+                              ndim, order)
+
+    # We need to broadcast strides again
+    for i in range(ndim):
+        if tmpslice.shape[i] == 1:
+            tmpslice.strides[i] = 0
+
     if slice_is_contig(src[0], order, ndim):
-        memcpy(result, src.data, size) 
-    else: 
-        copy_strided_to_strided(src, tmpslice, ndim, itemsize) 
- 
-    return result 
- 
-# Use 'with gil' functions and avoid 'with gil' blocks, as the code within the blocks 
-# has temporaries that need the GIL to clean up 
-@cname('__pyx_memoryview_err_extents') 
-cdef int _err_extents(int i, Py_ssize_t extent1, 
-                             Py_ssize_t extent2) except -1 with gil: 
-    raise ValueError("got differing extents in dimension %d (got %d and %d)" % 
-                                                        (i, extent1, extent2)) 
- 
-@cname('__pyx_memoryview_err_dim') 
-cdef int _err_dim(object error, char *msg, int dim) except -1 with gil: 
-    raise error(msg.decode('ascii') % dim) 
- 
-@cname('__pyx_memoryview_err') 
-cdef int _err(object error, char *msg) except -1 with gil: 
-    if msg != NULL: 
-        raise error(msg.decode('ascii')) 
-    else: 
-        raise error 
- 
-@cname('__pyx_memoryview_copy_contents') 
-cdef int memoryview_copy_contents({{memviewslice_name}} src, 
-                                  {{memviewslice_name}} dst, 
-                                  int src_ndim, int dst_ndim, 
-                                  bint dtype_is_object) nogil except -1: 
-    """ 
-    Copy memory from slice src to slice dst. 
-    Check for overlapping memory and verify the shapes. 
-    """ 
-    cdef void *tmpdata = NULL 
-    cdef size_t itemsize = src.memview.view.itemsize 
-    cdef int i 
-    cdef char order = get_best_order(&src, src_ndim) 
-    cdef bint broadcasting = False 
-    cdef bint direct_copy = False 
-    cdef {{memviewslice_name}} tmp 
- 
-    if src_ndim < dst_ndim: 
-        broadcast_leading(&src, src_ndim, dst_ndim) 
-    elif dst_ndim < src_ndim: 
-        broadcast_leading(&dst, dst_ndim, src_ndim) 
- 
-    cdef int ndim = max(src_ndim, dst_ndim) 
- 
-    for i in range(ndim): 
-        if src.shape[i] != dst.shape[i]: 
-            if src.shape[i] == 1: 
-                broadcasting = True 
-                src.strides[i] = 0 
-            else: 
-                _err_extents(i, dst.shape[i], src.shape[i]) 
- 
-        if src.suboffsets[i] >= 0: 
-            _err_dim(ValueError, "Dimension %d is not direct", i) 
- 
-    if slices_overlap(&src, &dst, ndim, itemsize): 
-        # slices overlap, copy to temp, copy temp to dst 
+        memcpy(result, src.data, size)
+    else:
+        copy_strided_to_strided(src, tmpslice, ndim, itemsize)
+
+    return result
+
+# Use 'with gil' functions and avoid 'with gil' blocks, as the code within the blocks
+# has temporaries that need the GIL to clean up
+@cname('__pyx_memoryview_err_extents')
+cdef int _err_extents(int i, Py_ssize_t extent1,
+                             Py_ssize_t extent2) except -1 with gil:
+    raise ValueError("got differing extents in dimension %d (got %d and %d)" %
+                                                        (i, extent1, extent2))
+
+@cname('__pyx_memoryview_err_dim')
+cdef int _err_dim(object error, char *msg, int dim) except -1 with gil:
+    raise error(msg.decode('ascii') % dim)
+
+@cname('__pyx_memoryview_err')
+cdef int _err(object error, char *msg) except -1 with gil:
+    if msg != NULL:
+        raise error(msg.decode('ascii'))
+    else:
+        raise error
+
+@cname('__pyx_memoryview_copy_contents')
+cdef int memoryview_copy_contents({{memviewslice_name}} src,
+                                  {{memviewslice_name}} dst,
+                                  int src_ndim, int dst_ndim,
+                                  bint dtype_is_object) nogil except -1:
+    """
+    Copy memory from slice src to slice dst.
+    Check for overlapping memory and verify the shapes.
+    """
+    cdef void *tmpdata = NULL
+    cdef size_t itemsize = src.memview.view.itemsize
+    cdef int i
+    cdef char order = get_best_order(&src, src_ndim)
+    cdef bint broadcasting = False
+    cdef bint direct_copy = False
+    cdef {{memviewslice_name}} tmp
+
+    if src_ndim < dst_ndim:
+        broadcast_leading(&src, src_ndim, dst_ndim)
+    elif dst_ndim < src_ndim:
+        broadcast_leading(&dst, dst_ndim, src_ndim)
+
+    cdef int ndim = max(src_ndim, dst_ndim)
+
+    for i in range(ndim):
+        if src.shape[i] != dst.shape[i]:
+            if src.shape[i] == 1:
+                broadcasting = True
+                src.strides[i] = 0
+            else:
+                _err_extents(i, dst.shape[i], src.shape[i])
+
+        if src.suboffsets[i] >= 0:
+            _err_dim(ValueError, "Dimension %d is not direct", i)
+
+    if slices_overlap(&src, &dst, ndim, itemsize):
+        # slices overlap, copy to temp, copy temp to dst
         if not slice_is_contig(src, order, ndim):
-            order = get_best_order(&dst, ndim) 
- 
-        tmpdata = copy_data_to_temp(&src, &tmp, order, ndim) 
-        src = tmp 
- 
-    if not broadcasting: 
-        # See if both slices have equal contiguity, in that case perform a 
-        # direct copy. This only works when we are not broadcasting. 
+            order = get_best_order(&dst, ndim)
+
+        tmpdata = copy_data_to_temp(&src, &tmp, order, ndim)
+        src = tmp
+
+    if not broadcasting:
+        # See if both slices have equal contiguity, in that case perform a
+        # direct copy. This only works when we are not broadcasting.
         if slice_is_contig(src, 'C', ndim):
             direct_copy = slice_is_contig(dst, 'C', ndim)
         elif slice_is_contig(src, 'F', ndim):
             direct_copy = slice_is_contig(dst, 'F', ndim)
- 
-        if direct_copy: 
-            # Contiguous slices with same order 
-            refcount_copying(&dst, dtype_is_object, ndim, False) 
-            memcpy(dst.data, src.data, slice_get_size(&src, ndim)) 
-            refcount_copying(&dst, dtype_is_object, ndim, True) 
-            free(tmpdata) 
-            return 0 
- 
-    if order == 'F' == get_best_order(&dst, ndim): 
-        # see if both slices have Fortran order, transpose them to match our 
-        # C-style indexing order 
-        transpose_memslice(&src) 
-        transpose_memslice(&dst) 
- 
-    refcount_copying(&dst, dtype_is_object, ndim, False) 
-    copy_strided_to_strided(&src, &dst, ndim, itemsize) 
-    refcount_copying(&dst, dtype_is_object, ndim, True) 
- 
-    free(tmpdata) 
-    return 0 
- 
-@cname('__pyx_memoryview_broadcast_leading') 
+
+        if direct_copy:
+            # Contiguous slices with same order
+            refcount_copying(&dst, dtype_is_object, ndim, False)
+            memcpy(dst.data, src.data, slice_get_size(&src, ndim))
+            refcount_copying(&dst, dtype_is_object, ndim, True)
+            free(tmpdata)
+            return 0
+
+    if order == 'F' == get_best_order(&dst, ndim):
+        # see if both slices have Fortran order, transpose them to match our
+        # C-style indexing order
+        transpose_memslice(&src)
+        transpose_memslice(&dst)
+
+    refcount_copying(&dst, dtype_is_object, ndim, False)
+    copy_strided_to_strided(&src, &dst, ndim, itemsize)
+    refcount_copying(&dst, dtype_is_object, ndim, True)
+
+    free(tmpdata)
+    return 0
+
+@cname('__pyx_memoryview_broadcast_leading')
 cdef void broadcast_leading({{memviewslice_name}} *mslice,
-                            int ndim, 
-                            int ndim_other) nogil: 
-    cdef int i 
-    cdef int offset = ndim_other - ndim 
- 
-    for i in range(ndim - 1, -1, -1): 
+                            int ndim,
+                            int ndim_other) nogil:
+    cdef int i
+    cdef int offset = ndim_other - ndim
+
+    for i in range(ndim - 1, -1, -1):
         mslice.shape[i + offset] = mslice.shape[i]
         mslice.strides[i + offset] = mslice.strides[i]
         mslice.suboffsets[i + offset] = mslice.suboffsets[i]
- 
-    for i in range(offset): 
+
+    for i in range(offset):
         mslice.shape[i] = 1
         mslice.strides[i] = mslice.strides[0]
         mslice.suboffsets[i] = -1
- 
-# 
+
+#
 ### Take care of refcounting the objects in slices. Do this separately from any copying,
-### to minimize acquiring the GIL 
-# 
- 
-@cname('__pyx_memoryview_refcount_copying') 
-cdef void refcount_copying({{memviewslice_name}} *dst, bint dtype_is_object, 
-                           int ndim, bint inc) nogil: 
-    # incref or decref the objects in the destination slice if the dtype is 
-    # object 
-    if dtype_is_object: 
-        refcount_objects_in_slice_with_gil(dst.data, dst.shape, 
-                                           dst.strides, ndim, inc) 
- 
-@cname('__pyx_memoryview_refcount_objects_in_slice_with_gil') 
-cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape, 
-                                             Py_ssize_t *strides, int ndim, 
-                                             bint inc) with gil: 
-    refcount_objects_in_slice(data, shape, strides, ndim, inc) 
- 
-@cname('__pyx_memoryview_refcount_objects_in_slice') 
-cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape, 
-                                    Py_ssize_t *strides, int ndim, bint inc): 
-    cdef Py_ssize_t i 
- 
-    for i in range(shape[0]): 
-        if ndim == 1: 
-            if inc: 
-                Py_INCREF((<PyObject **> data)[0]) 
-            else: 
-                Py_DECREF((<PyObject **> data)[0]) 
-        else: 
-            refcount_objects_in_slice(data, shape + 1, strides + 1, 
-                                      ndim - 1, inc) 
- 
-        data += strides[0] 
- 
-# 
-### Scalar to slice assignment 
-# 
-@cname('__pyx_memoryview_slice_assign_scalar') 
-cdef void slice_assign_scalar({{memviewslice_name}} *dst, int ndim, 
-                              size_t itemsize, void *item, 
-                              bint dtype_is_object) nogil: 
-    refcount_copying(dst, dtype_is_object, ndim, False) 
-    _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, 
-                         itemsize, item) 
-    refcount_copying(dst, dtype_is_object, ndim, True) 
- 
- 
-@cname('__pyx_memoryview__slice_assign_scalar') 
-cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape, 
-                              Py_ssize_t *strides, int ndim, 
-                              size_t itemsize, void *item) nogil: 
-    cdef Py_ssize_t i 
-    cdef Py_ssize_t stride = strides[0] 
-    cdef Py_ssize_t extent = shape[0] 
- 
-    if ndim == 1: 
-        for i in range(extent): 
-            memcpy(data, item, itemsize) 
-            data += stride 
-    else: 
-        for i in range(extent): 
-            _slice_assign_scalar(data, shape + 1, strides + 1, 
-                                ndim - 1, itemsize, item) 
-            data += stride 
- 
- 
-############### BufferFormatFromTypeInfo ############### 
-cdef extern from *: 
-    ctypedef struct __Pyx_StructField 
- 
-    cdef enum: 
-        __PYX_BUF_FLAGS_PACKED_STRUCT 
-        __PYX_BUF_FLAGS_INTEGER_COMPLEX 
- 
-    ctypedef struct __Pyx_TypeInfo: 
-      char* name 
-      __Pyx_StructField* fields 
-      size_t size 
-      size_t arraysize[8] 
-      int ndim 
-      char typegroup 
-      char is_unsigned 
-      int flags 
- 
-    ctypedef struct __Pyx_StructField: 
-      __Pyx_TypeInfo* type 
-      char* name 
-      size_t offset 
- 
-    ctypedef struct __Pyx_BufFmt_StackElem: 
-      __Pyx_StructField* field 
-      size_t parent_offset 
- 
-    #ctypedef struct __Pyx_BufFmt_Context: 
-    #  __Pyx_StructField root 
-      __Pyx_BufFmt_StackElem* head 
- 
-    struct __pyx_typeinfo_string: 
-        char string[3] 
- 
-    __pyx_typeinfo_string __Pyx_TypeInfoToFormat(__Pyx_TypeInfo *) 
- 
- 
-@cname('__pyx_format_from_typeinfo') 
-cdef bytes format_from_typeinfo(__Pyx_TypeInfo *type): 
-    cdef __Pyx_StructField *field 
-    cdef __pyx_typeinfo_string fmt 
-    cdef bytes part, result 
- 
-    if type.typegroup == 'S': 
+### to minimize acquiring the GIL
+#
+
+@cname('__pyx_memoryview_refcount_copying')
+cdef void refcount_copying({{memviewslice_name}} *dst, bint dtype_is_object,
+                           int ndim, bint inc) nogil:
+    # incref or decref the objects in the destination slice if the dtype is
+    # object
+    if dtype_is_object:
+        refcount_objects_in_slice_with_gil(dst.data, dst.shape,
+                                           dst.strides, ndim, inc)
+
+@cname('__pyx_memoryview_refcount_objects_in_slice_with_gil')
+cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape,
+                                             Py_ssize_t *strides, int ndim,
+                                             bint inc) with gil:
+    refcount_objects_in_slice(data, shape, strides, ndim, inc)
+
+@cname('__pyx_memoryview_refcount_objects_in_slice')
+cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape,
+                                    Py_ssize_t *strides, int ndim, bint inc):
+    cdef Py_ssize_t i
+
+    for i in range(shape[0]):
+        if ndim == 1:
+            if inc:
+                Py_INCREF((<PyObject **> data)[0])
+            else:
+                Py_DECREF((<PyObject **> data)[0])
+        else:
+            refcount_objects_in_slice(data, shape + 1, strides + 1,
+                                      ndim - 1, inc)
+
+        data += strides[0]
+
+#
+### Scalar to slice assignment
+#
+@cname('__pyx_memoryview_slice_assign_scalar')
+cdef void slice_assign_scalar({{memviewslice_name}} *dst, int ndim,
+                              size_t itemsize, void *item,
+                              bint dtype_is_object) nogil:
+    refcount_copying(dst, dtype_is_object, ndim, False)
+    _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim,
+                         itemsize, item)
+    refcount_copying(dst, dtype_is_object, ndim, True)
+
+
+@cname('__pyx_memoryview__slice_assign_scalar')
+cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape,
+                              Py_ssize_t *strides, int ndim,
+                              size_t itemsize, void *item) nogil:
+    cdef Py_ssize_t i
+    cdef Py_ssize_t stride = strides[0]
+    cdef Py_ssize_t extent = shape[0]
+
+    if ndim == 1:
+        for i in range(extent):
+            memcpy(data, item, itemsize)
+            data += stride
+    else:
+        for i in range(extent):
+            _slice_assign_scalar(data, shape + 1, strides + 1,
+                                ndim - 1, itemsize, item)
+            data += stride
+
+
+############### BufferFormatFromTypeInfo ###############
+cdef extern from *:
+    ctypedef struct __Pyx_StructField
+
+    cdef enum:
+        __PYX_BUF_FLAGS_PACKED_STRUCT
+        __PYX_BUF_FLAGS_INTEGER_COMPLEX
+
+    ctypedef struct __Pyx_TypeInfo:
+      char* name
+      __Pyx_StructField* fields
+      size_t size
+      size_t arraysize[8]
+      int ndim
+      char typegroup
+      char is_unsigned
+      int flags
+
+    ctypedef struct __Pyx_StructField:
+      __Pyx_TypeInfo* type
+      char* name
+      size_t offset
+
+    ctypedef struct __Pyx_BufFmt_StackElem:
+      __Pyx_StructField* field
+      size_t parent_offset
+
+    #ctypedef struct __Pyx_BufFmt_Context:
+    #  __Pyx_StructField root
+      __Pyx_BufFmt_StackElem* head
+
+    struct __pyx_typeinfo_string:
+        char string[3]
+
+    __pyx_typeinfo_string __Pyx_TypeInfoToFormat(__Pyx_TypeInfo *)
+
+
+@cname('__pyx_format_from_typeinfo')
+cdef bytes format_from_typeinfo(__Pyx_TypeInfo *type):
+    cdef __Pyx_StructField *field
+    cdef __pyx_typeinfo_string fmt
+    cdef bytes part, result
+
+    if type.typegroup == 'S':
         assert type.fields != NULL
         assert type.fields.type != NULL
- 
-        if type.flags & __PYX_BUF_FLAGS_PACKED_STRUCT: 
-            alignment = b'^' 
-        else: 
-            alignment = b'' 
- 
-        parts = [b"T{"] 
-        field = type.fields 
- 
-        while field.type: 
-            part = format_from_typeinfo(field.type) 
-            parts.append(part + b':' + field.name + b':') 
-            field += 1 
- 
-        result = alignment.join(parts) + b'}' 
-    else: 
-        fmt = __Pyx_TypeInfoToFormat(type) 
-        if type.arraysize[0]: 
-            extents = [unicode(type.arraysize[i]) for i in range(type.ndim)] 
-            result = (u"(%s)" % u','.join(extents)).encode('ascii') + fmt.string 
-        else: 
-            result = fmt.string 
- 
-    return result 
+
+        if type.flags & __PYX_BUF_FLAGS_PACKED_STRUCT:
+            alignment = b'^'
+        else:
+            alignment = b''
+
+        parts = [b"T{"]
+        field = type.fields
+
+        while field.type:
+            part = format_from_typeinfo(field.type)
+            parts.append(part + b':' + field.name + b':')
+            field += 1
+
+        result = alignment.join(parts) + b'}'
+    else:
+        fmt = __Pyx_TypeInfoToFormat(type)
+        if type.arraysize[0]:
+            extents = [unicode(type.arraysize[i]) for i in range(type.ndim)]
+            result = (u"(%s)" % u','.join(extents)).encode('ascii') + fmt.string
+        else:
+            result = fmt.string
+
+    return result
diff --git a/contrib/tools/cython/Cython/Utility/MemoryView_C.c b/contrib/tools/cython/Cython/Utility/MemoryView_C.c
index 44e6fa9d88..0a5d8ee2c2 100644
--- a/contrib/tools/cython/Cython/Utility/MemoryView_C.c
+++ b/contrib/tools/cython/Cython/Utility/MemoryView_C.c
@@ -1,352 +1,352 @@
-////////// MemviewSliceStruct.proto ////////// 
+////////// MemviewSliceStruct.proto //////////
 //@proto_block: utility_code_proto_before_types
- 
-/* memoryview slice struct */ 
-struct {{memview_struct_name}}; 
- 
-typedef struct { 
-  struct {{memview_struct_name}} *memview; 
-  char *data; 
-  Py_ssize_t shape[{{max_dims}}]; 
-  Py_ssize_t strides[{{max_dims}}]; 
-  Py_ssize_t suboffsets[{{max_dims}}]; 
-} {{memviewslice_name}}; 
- 
+
+/* memoryview slice struct */
+struct {{memview_struct_name}};
+
+typedef struct {
+  struct {{memview_struct_name}} *memview;
+  char *data;
+  Py_ssize_t shape[{{max_dims}}];
+  Py_ssize_t strides[{{max_dims}}];
+  Py_ssize_t suboffsets[{{max_dims}}];
+} {{memviewslice_name}};
+
 // used for "len(memviewslice)"
 #define __Pyx_MemoryView_Len(m)  (m.shape[0])
- 
 
-/////////// Atomics.proto ///////////// 
+
+/////////// Atomics.proto /////////////
 //@proto_block: utility_code_proto_before_types
- 
-#include <pythread.h> 
- 
-#ifndef CYTHON_ATOMICS 
-    #define CYTHON_ATOMICS 1 
-#endif 
- 
-#define __pyx_atomic_int_type int 
-// todo: Portland pgcc, maybe OS X's OSAtomicIncrement32, 
-//       libatomic + autotools-like distutils support? Such a pain... 
-#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 ||           \ 
-                    (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL >= 2)) && \ 
-                    !defined(__i386__) 
-    /* gcc >= 4.1.2 */ 
-    #define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1) 
-    #define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1) 
- 
-    #ifdef __PYX_DEBUG_ATOMICS 
-        #warning "Using GNU atomics" 
-    #endif 
+
+#include <pythread.h>
+
+#ifndef CYTHON_ATOMICS
+    #define CYTHON_ATOMICS 1
+#endif
+
+#define __pyx_atomic_int_type int
+// todo: Portland pgcc, maybe OS X's OSAtomicIncrement32,
+//       libatomic + autotools-like distutils support? Such a pain...
+#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 ||           \
+                    (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL >= 2)) && \
+                    !defined(__i386__)
+    /* gcc >= 4.1.2 */
+    #define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1)
+    #define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1)
+
+    #ifdef __PYX_DEBUG_ATOMICS
+        #warning "Using GNU atomics"
+    #endif
 #elif CYTHON_ATOMICS && defined(_MSC_VER) && 0
-    /* msvc */ 
-    #include <Windows.h> 
+    /* msvc */
+    #include <Windows.h>
     #undef __pyx_atomic_int_type
-    #define __pyx_atomic_int_type LONG 
-    #define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value) 
-    #define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value) 
- 
-    #ifdef __PYX_DEBUG_ATOMICS 
+    #define __pyx_atomic_int_type LONG
+    #define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value)
+    #define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value)
+
+    #ifdef __PYX_DEBUG_ATOMICS
         #pragma message ("Using MSVC atomics")
-    #endif 
-#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER)) && 0 
-    #define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value) 
-    #define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value) 
- 
-    #ifdef __PYX_DEBUG_ATOMICS 
-        #warning "Using Intel atomics" 
-    #endif 
-#else 
-    #undef CYTHON_ATOMICS 
-    #define CYTHON_ATOMICS 0 
- 
-    #ifdef __PYX_DEBUG_ATOMICS 
-        #warning "Not using atomics" 
-    #endif 
-#endif 
- 
-typedef volatile __pyx_atomic_int_type __pyx_atomic_int; 
- 
-#if CYTHON_ATOMICS 
-    #define __pyx_add_acquisition_count(memview) \ 
-             __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock) 
-    #define __pyx_sub_acquisition_count(memview) \ 
-            __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock) 
-#else 
-    #define __pyx_add_acquisition_count(memview) \ 
-            __pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock) 
-    #define __pyx_sub_acquisition_count(memview) \ 
-            __pyx_sub_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock) 
-#endif 
- 
- 
-/////////////// ObjectToMemviewSlice.proto /////////////// 
- 
+    #endif
+#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER)) && 0
+    #define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value)
+    #define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value)
+
+    #ifdef __PYX_DEBUG_ATOMICS
+        #warning "Using Intel atomics"
+    #endif
+#else
+    #undef CYTHON_ATOMICS
+    #define CYTHON_ATOMICS 0
+
+    #ifdef __PYX_DEBUG_ATOMICS
+        #warning "Not using atomics"
+    #endif
+#endif
+
+typedef volatile __pyx_atomic_int_type __pyx_atomic_int;
+
+#if CYTHON_ATOMICS
+    #define __pyx_add_acquisition_count(memview) \
+             __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
+    #define __pyx_sub_acquisition_count(memview) \
+            __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
+#else
+    #define __pyx_add_acquisition_count(memview) \
+            __pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
+    #define __pyx_sub_acquisition_count(memview) \
+            __pyx_sub_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
+#endif
+
+
+/////////////// ObjectToMemviewSlice.proto ///////////////
+
 static CYTHON_INLINE {{memviewslice_name}} {{funcname}}(PyObject *, int writable_flag);
- 
- 
-////////// MemviewSliceInit.proto ////////// 
- 
-#define __Pyx_BUF_MAX_NDIMS %(BUF_MAX_NDIMS)d 
- 
-#define __Pyx_MEMVIEW_DIRECT   1 
-#define __Pyx_MEMVIEW_PTR      2 
-#define __Pyx_MEMVIEW_FULL     4 
-#define __Pyx_MEMVIEW_CONTIG   8 
-#define __Pyx_MEMVIEW_STRIDED  16 
-#define __Pyx_MEMVIEW_FOLLOW   32 
- 
-#define __Pyx_IS_C_CONTIG 1 
-#define __Pyx_IS_F_CONTIG 2 
- 
-static int __Pyx_init_memviewslice( 
-                struct __pyx_memoryview_obj *memview, 
-                int ndim, 
-                __Pyx_memviewslice *memviewslice, 
-                int memview_is_new_reference); 
- 
-static CYTHON_INLINE int __pyx_add_acquisition_count_locked( 
-    __pyx_atomic_int *acquisition_count, PyThread_type_lock lock); 
-static CYTHON_INLINE int __pyx_sub_acquisition_count_locked( 
-    __pyx_atomic_int *acquisition_count, PyThread_type_lock lock); 
- 
-#define __pyx_get_slice_count_pointer(memview) (memview->acquisition_count_aligned_p) 
-#define __pyx_get_slice_count(memview) (*__pyx_get_slice_count_pointer(memview)) 
-#define __PYX_INC_MEMVIEW(slice, have_gil) __Pyx_INC_MEMVIEW(slice, have_gil, __LINE__) 
-#define __PYX_XDEC_MEMVIEW(slice, have_gil) __Pyx_XDEC_MEMVIEW(slice, have_gil, __LINE__) 
-static CYTHON_INLINE void __Pyx_INC_MEMVIEW({{memviewslice_name}} *, int, int); 
-static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW({{memviewslice_name}} *, int, int); 
- 
- 
-/////////////// MemviewSliceIndex.proto /////////////// 
- 
-static CYTHON_INLINE char *__pyx_memviewslice_index_full( 
-    const char *bufp, Py_ssize_t idx, Py_ssize_t stride, Py_ssize_t suboffset); 
- 
- 
-/////////////// ObjectToMemviewSlice /////////////// 
-//@requires: MemviewSliceValidateAndInit 
- 
+
+
+////////// MemviewSliceInit.proto //////////
+
+#define __Pyx_BUF_MAX_NDIMS %(BUF_MAX_NDIMS)d
+
+#define __Pyx_MEMVIEW_DIRECT   1
+#define __Pyx_MEMVIEW_PTR      2
+#define __Pyx_MEMVIEW_FULL     4
+#define __Pyx_MEMVIEW_CONTIG   8
+#define __Pyx_MEMVIEW_STRIDED  16
+#define __Pyx_MEMVIEW_FOLLOW   32
+
+#define __Pyx_IS_C_CONTIG 1
+#define __Pyx_IS_F_CONTIG 2
+
+static int __Pyx_init_memviewslice(
+                struct __pyx_memoryview_obj *memview,
+                int ndim,
+                __Pyx_memviewslice *memviewslice,
+                int memview_is_new_reference);
+
+static CYTHON_INLINE int __pyx_add_acquisition_count_locked(
+    __pyx_atomic_int *acquisition_count, PyThread_type_lock lock);
+static CYTHON_INLINE int __pyx_sub_acquisition_count_locked(
+    __pyx_atomic_int *acquisition_count, PyThread_type_lock lock);
+
+#define __pyx_get_slice_count_pointer(memview) (memview->acquisition_count_aligned_p)
+#define __pyx_get_slice_count(memview) (*__pyx_get_slice_count_pointer(memview))
+#define __PYX_INC_MEMVIEW(slice, have_gil) __Pyx_INC_MEMVIEW(slice, have_gil, __LINE__)
+#define __PYX_XDEC_MEMVIEW(slice, have_gil) __Pyx_XDEC_MEMVIEW(slice, have_gil, __LINE__)
+static CYTHON_INLINE void __Pyx_INC_MEMVIEW({{memviewslice_name}} *, int, int);
+static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW({{memviewslice_name}} *, int, int);
+
+
+/////////////// MemviewSliceIndex.proto ///////////////
+
+static CYTHON_INLINE char *__pyx_memviewslice_index_full(
+    const char *bufp, Py_ssize_t idx, Py_ssize_t stride, Py_ssize_t suboffset);
+
+
+/////////////// ObjectToMemviewSlice ///////////////
+//@requires: MemviewSliceValidateAndInit
+
 static CYTHON_INLINE {{memviewslice_name}} {{funcname}}(PyObject *obj, int writable_flag) {
-    {{memviewslice_name}} result = {{memslice_init}}; 
-    __Pyx_BufFmt_StackElem stack[{{struct_nesting_depth}}]; 
-    int axes_specs[] = { {{axes_specs}} }; 
-    int retcode; 
- 
-    if (obj == Py_None) { 
-        /* We don't bother to refcount None */ 
-        result.memview = (struct __pyx_memoryview_obj *) Py_None; 
-        return result; 
-    } 
- 
-    retcode = __Pyx_ValidateAndInit_memviewslice(axes_specs, {{c_or_f_flag}}, 
+    {{memviewslice_name}} result = {{memslice_init}};
+    __Pyx_BufFmt_StackElem stack[{{struct_nesting_depth}}];
+    int axes_specs[] = { {{axes_specs}} };
+    int retcode;
+
+    if (obj == Py_None) {
+        /* We don't bother to refcount None */
+        result.memview = (struct __pyx_memoryview_obj *) Py_None;
+        return result;
+    }
+
+    retcode = __Pyx_ValidateAndInit_memviewslice(axes_specs, {{c_or_f_flag}},
                                                  {{buf_flag}} | writable_flag, {{ndim}},
-                                                 &{{dtype_typeinfo}}, stack, 
-                                                 &result, obj); 
- 
-    if (unlikely(retcode == -1)) 
-        goto __pyx_fail; 
- 
-    return result; 
-__pyx_fail: 
-    result.memview = NULL; 
-    result.data = NULL; 
-    return result; 
-} 
- 
- 
-/////////////// MemviewSliceValidateAndInit.proto /////////////// 
- 
-static int __Pyx_ValidateAndInit_memviewslice( 
-                int *axes_specs, 
-                int c_or_f_flag, 
-                int buf_flags, 
-                int ndim, 
-                __Pyx_TypeInfo *dtype, 
-                __Pyx_BufFmt_StackElem stack[], 
-                __Pyx_memviewslice *memviewslice, 
-                PyObject *original_obj); 
- 
-/////////////// MemviewSliceValidateAndInit /////////////// 
-//@requires: Buffer.c::TypeInfoCompare 
+                                                 &{{dtype_typeinfo}}, stack,
+                                                 &result, obj);
+
+    if (unlikely(retcode == -1))
+        goto __pyx_fail;
+
+    return result;
+__pyx_fail:
+    result.memview = NULL;
+    result.data = NULL;
+    return result;
+}
+
+
+/////////////// MemviewSliceValidateAndInit.proto ///////////////
+
+static int __Pyx_ValidateAndInit_memviewslice(
+                int *axes_specs,
+                int c_or_f_flag,
+                int buf_flags,
+                int ndim,
+                __Pyx_TypeInfo *dtype,
+                __Pyx_BufFmt_StackElem stack[],
+                __Pyx_memviewslice *memviewslice,
+                PyObject *original_obj);
+
+/////////////// MemviewSliceValidateAndInit ///////////////
+//@requires: Buffer.c::TypeInfoCompare
 //@requires: Buffer.c::BufferFormatStructs
 //@requires: Buffer.c::BufferFormatCheck
- 
-static int 
-__pyx_check_strides(Py_buffer *buf, int dim, int ndim, int spec) 
-{ 
-    if (buf->shape[dim] <= 1) 
-        return 1; 
- 
-    if (buf->strides) { 
-        if (spec & __Pyx_MEMVIEW_CONTIG) { 
-            if (spec & (__Pyx_MEMVIEW_PTR|__Pyx_MEMVIEW_FULL)) { 
+
+static int
+__pyx_check_strides(Py_buffer *buf, int dim, int ndim, int spec)
+{
+    if (buf->shape[dim] <= 1)
+        return 1;
+
+    if (buf->strides) {
+        if (spec & __Pyx_MEMVIEW_CONTIG) {
+            if (spec & (__Pyx_MEMVIEW_PTR|__Pyx_MEMVIEW_FULL)) {
                 if (unlikely(buf->strides[dim] != sizeof(void *))) {
-                    PyErr_Format(PyExc_ValueError, 
-                                 "Buffer is not indirectly contiguous " 
-                                 "in dimension %d.", dim); 
-                    goto fail; 
-                } 
+                    PyErr_Format(PyExc_ValueError,
+                                 "Buffer is not indirectly contiguous "
+                                 "in dimension %d.", dim);
+                    goto fail;
+                }
             } else if (unlikely(buf->strides[dim] != buf->itemsize)) {
-                PyErr_SetString(PyExc_ValueError, 
-                                "Buffer and memoryview are not contiguous " 
-                                "in the same dimension."); 
-                goto fail; 
-            } 
-        } 
- 
-        if (spec & __Pyx_MEMVIEW_FOLLOW) { 
-            Py_ssize_t stride = buf->strides[dim]; 
-            if (stride < 0) 
-                stride = -stride; 
+                PyErr_SetString(PyExc_ValueError,
+                                "Buffer and memoryview are not contiguous "
+                                "in the same dimension.");
+                goto fail;
+            }
+        }
+
+        if (spec & __Pyx_MEMVIEW_FOLLOW) {
+            Py_ssize_t stride = buf->strides[dim];
+            if (stride < 0)
+                stride = -stride;
             if (unlikely(stride < buf->itemsize)) {
-                PyErr_SetString(PyExc_ValueError, 
-                                "Buffer and memoryview are not contiguous " 
-                                "in the same dimension."); 
-                goto fail; 
-            } 
-        } 
-    } else { 
+                PyErr_SetString(PyExc_ValueError,
+                                "Buffer and memoryview are not contiguous "
+                                "in the same dimension.");
+                goto fail;
+            }
+        }
+    } else {
         if (unlikely(spec & __Pyx_MEMVIEW_CONTIG && dim != ndim - 1)) {
-            PyErr_Format(PyExc_ValueError, 
-                         "C-contiguous buffer is not contiguous in " 
-                         "dimension %d", dim); 
-            goto fail; 
+            PyErr_Format(PyExc_ValueError,
+                         "C-contiguous buffer is not contiguous in "
+                         "dimension %d", dim);
+            goto fail;
         } else if (unlikely(spec & (__Pyx_MEMVIEW_PTR))) {
-            PyErr_Format(PyExc_ValueError, 
-                         "C-contiguous buffer is not indirect in " 
-                         "dimension %d", dim); 
-            goto fail; 
+            PyErr_Format(PyExc_ValueError,
+                         "C-contiguous buffer is not indirect in "
+                         "dimension %d", dim);
+            goto fail;
         } else if (unlikely(buf->suboffsets)) {
-            PyErr_SetString(PyExc_ValueError, 
-                            "Buffer exposes suboffsets but no strides"); 
-            goto fail; 
-        } 
-    } 
- 
-    return 1; 
-fail: 
-    return 0; 
-} 
- 
-static int 
-__pyx_check_suboffsets(Py_buffer *buf, int dim, CYTHON_UNUSED int ndim, int spec) 
-{ 
-    // Todo: without PyBUF_INDIRECT we may not have suboffset information, i.e., the 
-    //       ptr may not be set to NULL but may be uninitialized? 
-    if (spec & __Pyx_MEMVIEW_DIRECT) { 
+            PyErr_SetString(PyExc_ValueError,
+                            "Buffer exposes suboffsets but no strides");
+            goto fail;
+        }
+    }
+
+    return 1;
+fail:
+    return 0;
+}
+
+static int
+__pyx_check_suboffsets(Py_buffer *buf, int dim, CYTHON_UNUSED int ndim, int spec)
+{
+    // Todo: without PyBUF_INDIRECT we may not have suboffset information, i.e., the
+    //       ptr may not be set to NULL but may be uninitialized?
+    if (spec & __Pyx_MEMVIEW_DIRECT) {
         if (unlikely(buf->suboffsets && buf->suboffsets[dim] >= 0)) {
-            PyErr_Format(PyExc_ValueError, 
-                         "Buffer not compatible with direct access " 
-                         "in dimension %d.", dim); 
-            goto fail; 
-        } 
-    } 
- 
-    if (spec & __Pyx_MEMVIEW_PTR) { 
+            PyErr_Format(PyExc_ValueError,
+                         "Buffer not compatible with direct access "
+                         "in dimension %d.", dim);
+            goto fail;
+        }
+    }
+
+    if (spec & __Pyx_MEMVIEW_PTR) {
         if (unlikely(!buf->suboffsets || (buf->suboffsets[dim] < 0))) {
-            PyErr_Format(PyExc_ValueError, 
-                         "Buffer is not indirectly accessible " 
-                         "in dimension %d.", dim); 
-            goto fail; 
-        } 
-    } 
- 
-    return 1; 
-fail: 
-    return 0; 
-} 
- 
-static int 
-__pyx_verify_contig(Py_buffer *buf, int ndim, int c_or_f_flag) 
-{ 
-    int i; 
- 
-    if (c_or_f_flag & __Pyx_IS_F_CONTIG) { 
-        Py_ssize_t stride = 1; 
-        for (i = 0; i < ndim; i++) { 
+            PyErr_Format(PyExc_ValueError,
+                         "Buffer is not indirectly accessible "
+                         "in dimension %d.", dim);
+            goto fail;
+        }
+    }
+
+    return 1;
+fail:
+    return 0;
+}
+
+static int
+__pyx_verify_contig(Py_buffer *buf, int ndim, int c_or_f_flag)
+{
+    int i;
+
+    if (c_or_f_flag & __Pyx_IS_F_CONTIG) {
+        Py_ssize_t stride = 1;
+        for (i = 0; i < ndim; i++) {
             if (unlikely(stride * buf->itemsize != buf->strides[i]  &&  buf->shape[i] > 1)) {
-                PyErr_SetString(PyExc_ValueError, 
-                    "Buffer not fortran contiguous."); 
-                goto fail; 
-            } 
-            stride = stride * buf->shape[i]; 
-        } 
-    } else if (c_or_f_flag & __Pyx_IS_C_CONTIG) { 
-        Py_ssize_t stride = 1; 
-        for (i = ndim - 1; i >- 1; i--) { 
+                PyErr_SetString(PyExc_ValueError,
+                    "Buffer not fortran contiguous.");
+                goto fail;
+            }
+            stride = stride * buf->shape[i];
+        }
+    } else if (c_or_f_flag & __Pyx_IS_C_CONTIG) {
+        Py_ssize_t stride = 1;
+        for (i = ndim - 1; i >- 1; i--) {
             if (unlikely(stride * buf->itemsize != buf->strides[i]  &&  buf->shape[i] > 1)) {
-                PyErr_SetString(PyExc_ValueError, 
-                    "Buffer not C contiguous."); 
-                goto fail; 
-            } 
-            stride = stride * buf->shape[i]; 
-        } 
-    } 
- 
-    return 1; 
-fail: 
-    return 0; 
-} 
- 
-static int __Pyx_ValidateAndInit_memviewslice( 
-                int *axes_specs, 
-                int c_or_f_flag, 
-                int buf_flags, 
-                int ndim, 
-                __Pyx_TypeInfo *dtype, 
-                __Pyx_BufFmt_StackElem stack[], 
-                __Pyx_memviewslice *memviewslice, 
-                PyObject *original_obj) 
-{ 
-    struct __pyx_memoryview_obj *memview, *new_memview; 
-    __Pyx_RefNannyDeclarations 
-    Py_buffer *buf; 
-    int i, spec = 0, retval = -1; 
-    __Pyx_BufFmt_Context ctx; 
-    int from_memoryview = __pyx_memoryview_check(original_obj); 
- 
-    __Pyx_RefNannySetupContext("ValidateAndInit_memviewslice", 0); 
- 
-    if (from_memoryview && __pyx_typeinfo_cmp(dtype, ((struct __pyx_memoryview_obj *) 
-                                                            original_obj)->typeinfo)) { 
-        /* We have a matching dtype, skip format parsing */ 
-        memview = (struct __pyx_memoryview_obj *) original_obj; 
-        new_memview = NULL; 
-    } else { 
-        memview = (struct __pyx_memoryview_obj *) __pyx_memoryview_new( 
-                                            original_obj, buf_flags, 0, dtype); 
-        new_memview = memview; 
-        if (unlikely(!memview)) 
-            goto fail; 
-    } 
- 
-    buf = &memview->view; 
+                PyErr_SetString(PyExc_ValueError,
+                    "Buffer not C contiguous.");
+                goto fail;
+            }
+            stride = stride * buf->shape[i];
+        }
+    }
+
+    return 1;
+fail:
+    return 0;
+}
+
+static int __Pyx_ValidateAndInit_memviewslice(
+                int *axes_specs,
+                int c_or_f_flag,
+                int buf_flags,
+                int ndim,
+                __Pyx_TypeInfo *dtype,
+                __Pyx_BufFmt_StackElem stack[],
+                __Pyx_memviewslice *memviewslice,
+                PyObject *original_obj)
+{
+    struct __pyx_memoryview_obj *memview, *new_memview;
+    __Pyx_RefNannyDeclarations
+    Py_buffer *buf;
+    int i, spec = 0, retval = -1;
+    __Pyx_BufFmt_Context ctx;
+    int from_memoryview = __pyx_memoryview_check(original_obj);
+
+    __Pyx_RefNannySetupContext("ValidateAndInit_memviewslice", 0);
+
+    if (from_memoryview && __pyx_typeinfo_cmp(dtype, ((struct __pyx_memoryview_obj *)
+                                                            original_obj)->typeinfo)) {
+        /* We have a matching dtype, skip format parsing */
+        memview = (struct __pyx_memoryview_obj *) original_obj;
+        new_memview = NULL;
+    } else {
+        memview = (struct __pyx_memoryview_obj *) __pyx_memoryview_new(
+                                            original_obj, buf_flags, 0, dtype);
+        new_memview = memview;
+        if (unlikely(!memview))
+            goto fail;
+    }
+
+    buf = &memview->view;
     if (unlikely(buf->ndim != ndim)) {
-        PyErr_Format(PyExc_ValueError, 
-                "Buffer has wrong number of dimensions (expected %d, got %d)", 
-                ndim, buf->ndim); 
-        goto fail; 
-    } 
- 
-    if (new_memview) { 
-        __Pyx_BufFmt_Init(&ctx, stack, dtype); 
+        PyErr_Format(PyExc_ValueError,
+                "Buffer has wrong number of dimensions (expected %d, got %d)",
+                ndim, buf->ndim);
+        goto fail;
+    }
+
+    if (new_memview) {
+        __Pyx_BufFmt_Init(&ctx, stack, dtype);
         if (unlikely(!__Pyx_BufFmt_CheckString(&ctx, buf->format))) goto fail;
-    } 
- 
+    }
+
     if (unlikely((unsigned) buf->itemsize != dtype->size)) {
-        PyErr_Format(PyExc_ValueError, 
-                     "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "u byte%s) " 
-                     "does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "u byte%s)", 
-                     buf->itemsize, 
-                     (buf->itemsize > 1) ? "s" : "", 
-                     dtype->name, 
-                     dtype->size, 
-                     (dtype->size > 1) ? "s" : ""); 
-        goto fail; 
-    } 
- 
-    /* Check axes */ 
+        PyErr_Format(PyExc_ValueError,
+                     "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "u byte%s) "
+                     "does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "u byte%s)",
+                     buf->itemsize,
+                     (buf->itemsize > 1) ? "s" : "",
+                     dtype->name,
+                     dtype->size,
+                     (dtype->size > 1) ? "s" : "");
+        goto fail;
+    }
+
+    /* Check axes */
     if (buf->len > 0) {
         // 0-sized arrays do not undergo these checks since their strides are
         // irrelevant and they are always both C- and F-contiguous.
@@ -360,506 +360,506 @@ static int __Pyx_ValidateAndInit_memviewslice(
 
         /* Check contiguity */
         if (unlikely(buf->strides && !__pyx_verify_contig(buf, ndim, c_or_f_flag)))
-            goto fail; 
-    } 
- 
-    /* Initialize */ 
-    if (unlikely(__Pyx_init_memviewslice(memview, ndim, memviewslice, 
-                                         new_memview != NULL) == -1)) { 
-        goto fail; 
-    } 
- 
-    retval = 0; 
-    goto no_fail; 
- 
-fail: 
-    Py_XDECREF(new_memview); 
-    retval = -1; 
- 
-no_fail: 
-    __Pyx_RefNannyFinishContext(); 
-    return retval; 
-} 
- 
- 
-////////// MemviewSliceInit ////////// 
- 
-static int 
-__Pyx_init_memviewslice(struct __pyx_memoryview_obj *memview, 
-                        int ndim, 
-                        {{memviewslice_name}} *memviewslice, 
-                        int memview_is_new_reference) 
-{ 
-    __Pyx_RefNannyDeclarations 
-    int i, retval=-1; 
-    Py_buffer *buf = &memview->view; 
-    __Pyx_RefNannySetupContext("init_memviewslice", 0); 
- 
+            goto fail;
+    }
+
+    /* Initialize */
+    if (unlikely(__Pyx_init_memviewslice(memview, ndim, memviewslice,
+                                         new_memview != NULL) == -1)) {
+        goto fail;
+    }
+
+    retval = 0;
+    goto no_fail;
+
+fail:
+    Py_XDECREF(new_memview);
+    retval = -1;
+
+no_fail:
+    __Pyx_RefNannyFinishContext();
+    return retval;
+}
+
+
+////////// MemviewSliceInit //////////
+
+static int
+__Pyx_init_memviewslice(struct __pyx_memoryview_obj *memview,
+                        int ndim,
+                        {{memviewslice_name}} *memviewslice,
+                        int memview_is_new_reference)
+{
+    __Pyx_RefNannyDeclarations
+    int i, retval=-1;
+    Py_buffer *buf = &memview->view;
+    __Pyx_RefNannySetupContext("init_memviewslice", 0);
+
     if (unlikely(memviewslice->memview || memviewslice->data)) {
-        PyErr_SetString(PyExc_ValueError, 
-            "memviewslice is already initialized!"); 
-        goto fail; 
-    } 
- 
-    if (buf->strides) { 
-        for (i = 0; i < ndim; i++) { 
-            memviewslice->strides[i] = buf->strides[i]; 
-        } 
-    } else { 
-        Py_ssize_t stride = buf->itemsize; 
-        for (i = ndim - 1; i >= 0; i--) { 
-            memviewslice->strides[i] = stride; 
-            stride *= buf->shape[i]; 
-        } 
-    } 
- 
-    for (i = 0; i < ndim; i++) { 
-        memviewslice->shape[i]   = buf->shape[i]; 
-        if (buf->suboffsets) { 
-            memviewslice->suboffsets[i] = buf->suboffsets[i]; 
-        } else { 
-            memviewslice->suboffsets[i] = -1; 
-        } 
-    } 
- 
-    memviewslice->memview = memview; 
-    memviewslice->data = (char *)buf->buf; 
-    if (__pyx_add_acquisition_count(memview) == 0 && !memview_is_new_reference) { 
-        Py_INCREF(memview); 
-    } 
-    retval = 0; 
-    goto no_fail; 
- 
-fail: 
-    /* Don't decref, the memoryview may be borrowed. Let the caller do the cleanup */ 
-    /* __Pyx_XDECREF(memviewslice->memview); */ 
-    memviewslice->memview = 0; 
-    memviewslice->data = 0; 
-    retval = -1; 
-no_fail: 
-    __Pyx_RefNannyFinishContext(); 
-    return retval; 
-} 
- 
+        PyErr_SetString(PyExc_ValueError,
+            "memviewslice is already initialized!");
+        goto fail;
+    }
+
+    if (buf->strides) {
+        for (i = 0; i < ndim; i++) {
+            memviewslice->strides[i] = buf->strides[i];
+        }
+    } else {
+        Py_ssize_t stride = buf->itemsize;
+        for (i = ndim - 1; i >= 0; i--) {
+            memviewslice->strides[i] = stride;
+            stride *= buf->shape[i];
+        }
+    }
+
+    for (i = 0; i < ndim; i++) {
+        memviewslice->shape[i]   = buf->shape[i];
+        if (buf->suboffsets) {
+            memviewslice->suboffsets[i] = buf->suboffsets[i];
+        } else {
+            memviewslice->suboffsets[i] = -1;
+        }
+    }
+
+    memviewslice->memview = memview;
+    memviewslice->data = (char *)buf->buf;
+    if (__pyx_add_acquisition_count(memview) == 0 && !memview_is_new_reference) {
+        Py_INCREF(memview);
+    }
+    retval = 0;
+    goto no_fail;
+
+fail:
+    /* Don't decref, the memoryview may be borrowed. Let the caller do the cleanup */
+    /* __Pyx_XDECREF(memviewslice->memview); */
+    memviewslice->memview = 0;
+    memviewslice->data = 0;
+    retval = -1;
+no_fail:
+    __Pyx_RefNannyFinishContext();
+    return retval;
+}
+
 #ifndef Py_NO_RETURN
 // available since Py3.3
 #define Py_NO_RETURN
 #endif
- 
+
 static void __pyx_fatalerror(const char *fmt, ...) Py_NO_RETURN {
-    va_list vargs; 
-    char msg[200]; 
- 
-#ifdef HAVE_STDARG_PROTOTYPES 
-    va_start(vargs, fmt); 
-#else 
-    va_start(vargs); 
-#endif 
+    va_list vargs;
+    char msg[200];
+
+#ifdef HAVE_STDARG_PROTOTYPES
+    va_start(vargs, fmt);
+#else
+    va_start(vargs);
+#endif
     vsnprintf(msg, 200, fmt, vargs);
     va_end(vargs);
- 
-    Py_FatalError(msg); 
-} 
- 
-static CYTHON_INLINE int 
-__pyx_add_acquisition_count_locked(__pyx_atomic_int *acquisition_count, 
-                                   PyThread_type_lock lock) 
-{ 
-    int result; 
-    PyThread_acquire_lock(lock, 1); 
-    result = (*acquisition_count)++; 
-    PyThread_release_lock(lock); 
-    return result; 
-} 
- 
-static CYTHON_INLINE int 
-__pyx_sub_acquisition_count_locked(__pyx_atomic_int *acquisition_count, 
-                                   PyThread_type_lock lock) 
-{ 
-    int result; 
-    PyThread_acquire_lock(lock, 1); 
-    result = (*acquisition_count)--; 
-    PyThread_release_lock(lock); 
-    return result; 
-} 
- 
- 
-static CYTHON_INLINE void 
-__Pyx_INC_MEMVIEW({{memviewslice_name}} *memslice, int have_gil, int lineno) 
-{ 
-    int first_time; 
-    struct {{memview_struct_name}} *memview = memslice->memview; 
+
+    Py_FatalError(msg);
+}
+
+static CYTHON_INLINE int
+__pyx_add_acquisition_count_locked(__pyx_atomic_int *acquisition_count,
+                                   PyThread_type_lock lock)
+{
+    int result;
+    PyThread_acquire_lock(lock, 1);
+    result = (*acquisition_count)++;
+    PyThread_release_lock(lock);
+    return result;
+}
+
+static CYTHON_INLINE int
+__pyx_sub_acquisition_count_locked(__pyx_atomic_int *acquisition_count,
+                                   PyThread_type_lock lock)
+{
+    int result;
+    PyThread_acquire_lock(lock, 1);
+    result = (*acquisition_count)--;
+    PyThread_release_lock(lock);
+    return result;
+}
+
+
+static CYTHON_INLINE void
+__Pyx_INC_MEMVIEW({{memviewslice_name}} *memslice, int have_gil, int lineno)
+{
+    int first_time;
+    struct {{memview_struct_name}} *memview = memslice->memview;
     if (unlikely(!memview || (PyObject *) memview == Py_None))
-        return; /* allow uninitialized memoryview assignment */ 
- 
+        return; /* allow uninitialized memoryview assignment */
+
     if (unlikely(__pyx_get_slice_count(memview) < 0))
-        __pyx_fatalerror("Acquisition count is %d (line %d)", 
-                         __pyx_get_slice_count(memview), lineno); 
- 
-    first_time = __pyx_add_acquisition_count(memview) == 0; 
- 
+        __pyx_fatalerror("Acquisition count is %d (line %d)",
+                         __pyx_get_slice_count(memview), lineno);
+
+    first_time = __pyx_add_acquisition_count(memview) == 0;
+
     if (unlikely(first_time)) {
-        if (have_gil) { 
-            Py_INCREF((PyObject *) memview); 
-        } else { 
-            PyGILState_STATE _gilstate = PyGILState_Ensure(); 
-            Py_INCREF((PyObject *) memview); 
-            PyGILState_Release(_gilstate); 
-        } 
-    } 
-} 
- 
-static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW({{memviewslice_name}} *memslice, 
-                                             int have_gil, int lineno) { 
-    int last_time; 
-    struct {{memview_struct_name}} *memview = memslice->memview; 
- 
+        if (have_gil) {
+            Py_INCREF((PyObject *) memview);
+        } else {
+            PyGILState_STATE _gilstate = PyGILState_Ensure();
+            Py_INCREF((PyObject *) memview);
+            PyGILState_Release(_gilstate);
+        }
+    }
+}
+
+static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW({{memviewslice_name}} *memslice,
+                                             int have_gil, int lineno) {
+    int last_time;
+    struct {{memview_struct_name}} *memview = memslice->memview;
+
     if (unlikely(!memview || (PyObject *) memview == Py_None)) {
         // we do not ref-count None
-        memslice->memview = NULL; 
-        return; 
-    } 
- 
+        memslice->memview = NULL;
+        return;
+    }
+
     if (unlikely(__pyx_get_slice_count(memview) <= 0))
-        __pyx_fatalerror("Acquisition count is %d (line %d)", 
-                         __pyx_get_slice_count(memview), lineno); 
- 
-    last_time = __pyx_sub_acquisition_count(memview) == 1; 
-    memslice->data = NULL; 
+        __pyx_fatalerror("Acquisition count is %d (line %d)",
+                         __pyx_get_slice_count(memview), lineno);
+
+    last_time = __pyx_sub_acquisition_count(memview) == 1;
+    memslice->data = NULL;
 
     if (unlikely(last_time)) {
-        if (have_gil) { 
-            Py_CLEAR(memslice->memview); 
-        } else { 
-            PyGILState_STATE _gilstate = PyGILState_Ensure(); 
-            Py_CLEAR(memslice->memview); 
-            PyGILState_Release(_gilstate); 
-        } 
-    } else { 
-        memslice->memview = NULL; 
-    } 
-} 
- 
- 
-////////// MemviewSliceCopyTemplate.proto ////////// 
- 
-static {{memviewslice_name}} 
-__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs, 
-                                 const char *mode, int ndim, 
-                                 size_t sizeof_dtype, int contig_flag, 
-                                 int dtype_is_object); 
- 
- 
-////////// MemviewSliceCopyTemplate ////////// 
- 
-static {{memviewslice_name}} 
-__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs, 
-                                 const char *mode, int ndim, 
-                                 size_t sizeof_dtype, int contig_flag, 
-                                 int dtype_is_object) 
-{ 
-    __Pyx_RefNannyDeclarations 
-    int i; 
-    __Pyx_memviewslice new_mvs = {{memslice_init}}; 
-    struct __pyx_memoryview_obj *from_memview = from_mvs->memview; 
-    Py_buffer *buf = &from_memview->view; 
-    PyObject *shape_tuple = NULL; 
-    PyObject *temp_int = NULL; 
-    struct __pyx_array_obj *array_obj = NULL; 
-    struct __pyx_memoryview_obj *memview_obj = NULL; 
- 
-    __Pyx_RefNannySetupContext("__pyx_memoryview_copy_new_contig", 0); 
- 
-    for (i = 0; i < ndim; i++) { 
+        if (have_gil) {
+            Py_CLEAR(memslice->memview);
+        } else {
+            PyGILState_STATE _gilstate = PyGILState_Ensure();
+            Py_CLEAR(memslice->memview);
+            PyGILState_Release(_gilstate);
+        }
+    } else {
+        memslice->memview = NULL;
+    }
+}
+
+
+////////// MemviewSliceCopyTemplate.proto //////////
+
+static {{memviewslice_name}}
+__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs,
+                                 const char *mode, int ndim,
+                                 size_t sizeof_dtype, int contig_flag,
+                                 int dtype_is_object);
+
+
+////////// MemviewSliceCopyTemplate //////////
+
+static {{memviewslice_name}}
+__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs,
+                                 const char *mode, int ndim,
+                                 size_t sizeof_dtype, int contig_flag,
+                                 int dtype_is_object)
+{
+    __Pyx_RefNannyDeclarations
+    int i;
+    __Pyx_memviewslice new_mvs = {{memslice_init}};
+    struct __pyx_memoryview_obj *from_memview = from_mvs->memview;
+    Py_buffer *buf = &from_memview->view;
+    PyObject *shape_tuple = NULL;
+    PyObject *temp_int = NULL;
+    struct __pyx_array_obj *array_obj = NULL;
+    struct __pyx_memoryview_obj *memview_obj = NULL;
+
+    __Pyx_RefNannySetupContext("__pyx_memoryview_copy_new_contig", 0);
+
+    for (i = 0; i < ndim; i++) {
         if (unlikely(from_mvs->suboffsets[i] >= 0)) {
-            PyErr_Format(PyExc_ValueError, "Cannot copy memoryview slice with " 
-                                           "indirect dimensions (axis %d)", i); 
-            goto fail; 
-        } 
-    } 
- 
-    shape_tuple = PyTuple_New(ndim); 
-    if (unlikely(!shape_tuple)) { 
-        goto fail; 
-    } 
-    __Pyx_GOTREF(shape_tuple); 
- 
- 
-    for(i = 0; i < ndim; i++) { 
-        temp_int = PyInt_FromSsize_t(from_mvs->shape[i]); 
-        if(unlikely(!temp_int)) { 
-            goto fail; 
-        } else { 
-            PyTuple_SET_ITEM(shape_tuple, i, temp_int); 
-            temp_int = NULL; 
-        } 
-    } 
- 
-    array_obj = __pyx_array_new(shape_tuple, sizeof_dtype, buf->format, (char *) mode, NULL); 
-    if (unlikely(!array_obj)) { 
-        goto fail; 
-    } 
-    __Pyx_GOTREF(array_obj); 
- 
-    memview_obj = (struct __pyx_memoryview_obj *) __pyx_memoryview_new( 
-                                    (PyObject *) array_obj, contig_flag, 
-                                    dtype_is_object, 
-                                    from_mvs->memview->typeinfo); 
-    if (unlikely(!memview_obj)) 
-        goto fail; 
- 
-    /* initialize new_mvs */ 
-    if (unlikely(__Pyx_init_memviewslice(memview_obj, ndim, &new_mvs, 1) < 0)) 
-        goto fail; 
- 
-    if (unlikely(__pyx_memoryview_copy_contents(*from_mvs, new_mvs, ndim, ndim, 
-                                                dtype_is_object) < 0)) 
-        goto fail; 
- 
-    goto no_fail; 
- 
-fail: 
-    __Pyx_XDECREF(new_mvs.memview); 
-    new_mvs.memview = NULL; 
-    new_mvs.data = NULL; 
-no_fail: 
-    __Pyx_XDECREF(shape_tuple); 
-    __Pyx_XDECREF(temp_int); 
-    __Pyx_XDECREF(array_obj); 
-    __Pyx_RefNannyFinishContext(); 
-    return new_mvs; 
-} 
- 
- 
-////////// CopyContentsUtility.proto ///////// 
- 
-#define {{func_cname}}(slice) \ 
-        __pyx_memoryview_copy_new_contig(&slice, "{{mode}}", {{ndim}},            \ 
-                                         sizeof({{dtype_decl}}), {{contig_flag}}, \ 
-                                         {{dtype_is_object}}) 
- 
- 
-////////// OverlappingSlices.proto ////////// 
- 
-static int __pyx_slices_overlap({{memviewslice_name}} *slice1, 
-                                {{memviewslice_name}} *slice2, 
-                                int ndim, size_t itemsize); 
- 
- 
-////////// OverlappingSlices ////////// 
- 
-/* Based on numpy's core/src/multiarray/array_assign.c */ 
- 
-/* Gets a half-open range [start, end) which contains the array data */ 
-static void 
-__pyx_get_array_memory_extents({{memviewslice_name}} *slice, 
-                               void **out_start, void **out_end, 
-                               int ndim, size_t itemsize) 
-{ 
-    char *start, *end; 
-    int i; 
- 
-    start = end = slice->data; 
- 
-    for (i = 0; i < ndim; i++) { 
-        Py_ssize_t stride = slice->strides[i]; 
-        Py_ssize_t extent = slice->shape[i]; 
- 
-        if (extent == 0) { 
-            *out_start = *out_end = start; 
-            return; 
-        } else { 
-            if (stride > 0) 
-                end += stride * (extent - 1); 
-            else 
-                start += stride * (extent - 1); 
-        } 
-    } 
- 
-    /* Return a half-open range */ 
-    *out_start = start; 
-    *out_end = end + itemsize; 
-} 
- 
-/* Returns 1 if the arrays have overlapping data, 0 otherwise */ 
-static int 
-__pyx_slices_overlap({{memviewslice_name}} *slice1, 
-                     {{memviewslice_name}} *slice2, 
-                     int ndim, size_t itemsize) 
-{ 
-    void *start1, *end1, *start2, *end2; 
- 
-    __pyx_get_array_memory_extents(slice1, &start1, &end1, ndim, itemsize); 
-    __pyx_get_array_memory_extents(slice2, &start2, &end2, ndim, itemsize); 
- 
-    return (start1 < end2) && (start2 < end1); 
-} 
- 
- 
+            PyErr_Format(PyExc_ValueError, "Cannot copy memoryview slice with "
+                                           "indirect dimensions (axis %d)", i);
+            goto fail;
+        }
+    }
+
+    shape_tuple = PyTuple_New(ndim);
+    if (unlikely(!shape_tuple)) {
+        goto fail;
+    }
+    __Pyx_GOTREF(shape_tuple);
+
+
+    for(i = 0; i < ndim; i++) {
+        temp_int = PyInt_FromSsize_t(from_mvs->shape[i]);
+        if(unlikely(!temp_int)) {
+            goto fail;
+        } else {
+            PyTuple_SET_ITEM(shape_tuple, i, temp_int);
+            temp_int = NULL;
+        }
+    }
+
+    array_obj = __pyx_array_new(shape_tuple, sizeof_dtype, buf->format, (char *) mode, NULL);
+    if (unlikely(!array_obj)) {
+        goto fail;
+    }
+    __Pyx_GOTREF(array_obj);
+
+    memview_obj = (struct __pyx_memoryview_obj *) __pyx_memoryview_new(
+                                    (PyObject *) array_obj, contig_flag,
+                                    dtype_is_object,
+                                    from_mvs->memview->typeinfo);
+    if (unlikely(!memview_obj))
+        goto fail;
+
+    /* initialize new_mvs */
+    if (unlikely(__Pyx_init_memviewslice(memview_obj, ndim, &new_mvs, 1) < 0))
+        goto fail;
+
+    if (unlikely(__pyx_memoryview_copy_contents(*from_mvs, new_mvs, ndim, ndim,
+                                                dtype_is_object) < 0))
+        goto fail;
+
+    goto no_fail;
+
+fail:
+    __Pyx_XDECREF(new_mvs.memview);
+    new_mvs.memview = NULL;
+    new_mvs.data = NULL;
+no_fail:
+    __Pyx_XDECREF(shape_tuple);
+    __Pyx_XDECREF(temp_int);
+    __Pyx_XDECREF(array_obj);
+    __Pyx_RefNannyFinishContext();
+    return new_mvs;
+}
+
+
+////////// CopyContentsUtility.proto /////////
+
+#define {{func_cname}}(slice) \
+        __pyx_memoryview_copy_new_contig(&slice, "{{mode}}", {{ndim}},            \
+                                         sizeof({{dtype_decl}}), {{contig_flag}}, \
+                                         {{dtype_is_object}})
+
+
+////////// OverlappingSlices.proto //////////
+
+static int __pyx_slices_overlap({{memviewslice_name}} *slice1,
+                                {{memviewslice_name}} *slice2,
+                                int ndim, size_t itemsize);
+
+
+////////// OverlappingSlices //////////
+
+/* Based on numpy's core/src/multiarray/array_assign.c */
+
+/* Gets a half-open range [start, end) which contains the array data */
+static void
+__pyx_get_array_memory_extents({{memviewslice_name}} *slice,
+                               void **out_start, void **out_end,
+                               int ndim, size_t itemsize)
+{
+    char *start, *end;
+    int i;
+
+    start = end = slice->data;
+
+    for (i = 0; i < ndim; i++) {
+        Py_ssize_t stride = slice->strides[i];
+        Py_ssize_t extent = slice->shape[i];
+
+        if (extent == 0) {
+            *out_start = *out_end = start;
+            return;
+        } else {
+            if (stride > 0)
+                end += stride * (extent - 1);
+            else
+                start += stride * (extent - 1);
+        }
+    }
+
+    /* Return a half-open range */
+    *out_start = start;
+    *out_end = end + itemsize;
+}
+
+/* Returns 1 if the arrays have overlapping data, 0 otherwise */
+static int
+__pyx_slices_overlap({{memviewslice_name}} *slice1,
+                     {{memviewslice_name}} *slice2,
+                     int ndim, size_t itemsize)
+{
+    void *start1, *end1, *start2, *end2;
+
+    __pyx_get_array_memory_extents(slice1, &start1, &end1, ndim, itemsize);
+    __pyx_get_array_memory_extents(slice2, &start2, &end2, ndim, itemsize);
+
+    return (start1 < end2) && (start2 < end1);
+}
+
+
 ////////// MemviewSliceCheckContig.proto //////////
- 
+
 #define __pyx_memviewslice_is_contig_{{contig_type}}{{ndim}}(slice) \
     __pyx_memviewslice_is_contig(slice, '{{contig_type}}', {{ndim}})
- 
- 
-////////// MemviewSliceIsContig.proto ////////// 
- 
+
+
+////////// MemviewSliceIsContig.proto //////////
+
 static int __pyx_memviewslice_is_contig(const {{memviewslice_name}} mvs, char order, int ndim);/*proto*/
- 
- 
-////////// MemviewSliceIsContig ////////// 
- 
-static int 
+
+
+////////// MemviewSliceIsContig //////////
+
+static int
 __pyx_memviewslice_is_contig(const {{memviewslice_name}} mvs, char order, int ndim)
-{ 
-    int i, index, step, start; 
+{
+    int i, index, step, start;
     Py_ssize_t itemsize = mvs.memview->view.itemsize;
- 
-    if (order == 'F') { 
-        step = 1; 
-        start = 0; 
-    } else { 
-        step = -1; 
-        start = ndim - 1; 
-    } 
- 
-    for (i = 0; i < ndim; i++) { 
-        index = start + step * i; 
+
+    if (order == 'F') {
+        step = 1;
+        start = 0;
+    } else {
+        step = -1;
+        start = ndim - 1;
+    }
+
+    for (i = 0; i < ndim; i++) {
+        index = start + step * i;
         if (mvs.suboffsets[index] >= 0 || mvs.strides[index] != itemsize)
-            return 0; 
- 
+            return 0;
+
         itemsize *= mvs.shape[index];
-    } 
- 
-    return 1; 
-} 
- 
- 
-/////////////// MemviewSliceIndex /////////////// 
- 
-static CYTHON_INLINE char * 
-__pyx_memviewslice_index_full(const char *bufp, Py_ssize_t idx, 
-                              Py_ssize_t stride, Py_ssize_t suboffset) 
-{ 
-    bufp = bufp + idx * stride; 
-    if (suboffset >= 0) { 
-        bufp = *((char **) bufp) + suboffset; 
-    } 
-    return (char *) bufp; 
-} 
- 
- 
-/////////////// MemviewDtypeToObject.proto /////////////// 
- 
-{{if to_py_function}} 
+    }
+
+    return 1;
+}
+
+
+/////////////// MemviewSliceIndex ///////////////
+
+static CYTHON_INLINE char *
+__pyx_memviewslice_index_full(const char *bufp, Py_ssize_t idx,
+                              Py_ssize_t stride, Py_ssize_t suboffset)
+{
+    bufp = bufp + idx * stride;
+    if (suboffset >= 0) {
+        bufp = *((char **) bufp) + suboffset;
+    }
+    return (char *) bufp;
+}
+
+
+/////////////// MemviewDtypeToObject.proto ///////////////
+
+{{if to_py_function}}
 static CYTHON_INLINE PyObject *{{get_function}}(const char *itemp); /* proto */
-{{endif}} 
- 
-{{if from_py_function}} 
+{{endif}}
+
+{{if from_py_function}}
 static CYTHON_INLINE int {{set_function}}(const char *itemp, PyObject *obj); /* proto */
-{{endif}} 
- 
-/////////////// MemviewDtypeToObject /////////////// 
- 
-{{#__pyx_memview_<dtype_name>_to_object}} 
- 
-/* Convert a dtype to or from a Python object */ 
- 
-{{if to_py_function}} 
+{{endif}}
+
+/////////////// MemviewDtypeToObject ///////////////
+
+{{#__pyx_memview_<dtype_name>_to_object}}
+
+/* Convert a dtype to or from a Python object */
+
+{{if to_py_function}}
 static CYTHON_INLINE PyObject *{{get_function}}(const char *itemp) {
-    return (PyObject *) {{to_py_function}}(*({{dtype}} *) itemp); 
-} 
-{{endif}} 
- 
-{{if from_py_function}} 
+    return (PyObject *) {{to_py_function}}(*({{dtype}} *) itemp);
+}
+{{endif}}
+
+{{if from_py_function}}
 static CYTHON_INLINE int {{set_function}}(const char *itemp, PyObject *obj) {
-    {{dtype}} value = {{from_py_function}}(obj); 
-    if ({{error_condition}}) 
-        return 0; 
-    *({{dtype}} *) itemp = value; 
-    return 1; 
-} 
-{{endif}} 
- 
- 
-/////////////// MemviewObjectToObject.proto /////////////// 
- 
-/* Function callbacks (for memoryview object) for dtype object */ 
-static PyObject *{{get_function}}(const char *itemp); /* proto */ 
-static int {{set_function}}(const char *itemp, PyObject *obj); /* proto */ 
- 
- 
-/////////////// MemviewObjectToObject /////////////// 
- 
-static PyObject *{{get_function}}(const char *itemp) { 
-    PyObject *result = *(PyObject **) itemp; 
-    Py_INCREF(result); 
-    return result; 
-} 
- 
-static int {{set_function}}(const char *itemp, PyObject *obj) { 
-    Py_INCREF(obj); 
-    Py_DECREF(*(PyObject **) itemp); 
-    *(PyObject **) itemp = obj; 
-    return 1; 
-} 
- 
-/////////// ToughSlice ////////// 
- 
-/* Dimension is indexed with 'start:stop:step' */ 
- 
-if (unlikely(__pyx_memoryview_slice_memviewslice( 
-    &{{dst}}, 
-    {{src}}.shape[{{dim}}], {{src}}.strides[{{dim}}], {{src}}.suboffsets[{{dim}}], 
-    {{dim}}, 
-    {{new_ndim}}, 
+    {{dtype}} value = {{from_py_function}}(obj);
+    if ({{error_condition}})
+        return 0;
+    *({{dtype}} *) itemp = value;
+    return 1;
+}
+{{endif}}
+
+
+/////////////// MemviewObjectToObject.proto ///////////////
+
+/* Function callbacks (for memoryview object) for dtype object */
+static PyObject *{{get_function}}(const char *itemp); /* proto */
+static int {{set_function}}(const char *itemp, PyObject *obj); /* proto */
+
+
+/////////////// MemviewObjectToObject ///////////////
+
+static PyObject *{{get_function}}(const char *itemp) {
+    PyObject *result = *(PyObject **) itemp;
+    Py_INCREF(result);
+    return result;
+}
+
+static int {{set_function}}(const char *itemp, PyObject *obj) {
+    Py_INCREF(obj);
+    Py_DECREF(*(PyObject **) itemp);
+    *(PyObject **) itemp = obj;
+    return 1;
+}
+
+/////////// ToughSlice //////////
+
+/* Dimension is indexed with 'start:stop:step' */
+
+if (unlikely(__pyx_memoryview_slice_memviewslice(
+    &{{dst}},
+    {{src}}.shape[{{dim}}], {{src}}.strides[{{dim}}], {{src}}.suboffsets[{{dim}}],
+    {{dim}},
+    {{new_ndim}},
     &{{get_suboffset_dim()}},
-    {{start}}, 
-    {{stop}}, 
-    {{step}}, 
-    {{int(have_start)}}, 
-    {{int(have_stop)}}, 
-    {{int(have_step)}}, 
-    1) < 0)) 
-{ 
-    {{error_goto}} 
-} 
- 
- 
-////////// SimpleSlice ////////// 
- 
-/* Dimension is indexed with ':' only */ 
- 
-{{dst}}.shape[{{new_ndim}}] = {{src}}.shape[{{dim}}]; 
-{{dst}}.strides[{{new_ndim}}] = {{src}}.strides[{{dim}}]; 
- 
-{{if access == 'direct'}} 
-    {{dst}}.suboffsets[{{new_ndim}}] = -1; 
-{{else}} 
-    {{dst}}.suboffsets[{{new_ndim}}] = {{src}}.suboffsets[{{dim}}]; 
-    if ({{src}}.suboffsets[{{dim}}] >= 0) 
+    {{start}},
+    {{stop}},
+    {{step}},
+    {{int(have_start)}},
+    {{int(have_stop)}},
+    {{int(have_step)}},
+    1) < 0))
+{
+    {{error_goto}}
+}
+
+
+////////// SimpleSlice //////////
+
+/* Dimension is indexed with ':' only */
+
+{{dst}}.shape[{{new_ndim}}] = {{src}}.shape[{{dim}}];
+{{dst}}.strides[{{new_ndim}}] = {{src}}.strides[{{dim}}];
+
+{{if access == 'direct'}}
+    {{dst}}.suboffsets[{{new_ndim}}] = -1;
+{{else}}
+    {{dst}}.suboffsets[{{new_ndim}}] = {{src}}.suboffsets[{{dim}}];
+    if ({{src}}.suboffsets[{{dim}}] >= 0)
         {{get_suboffset_dim()}} = {{new_ndim}};
-{{endif}} 
- 
- 
-////////// SliceIndex ////////// 
- 
-// Dimension is indexed with an integer, we could use the ToughSlice 
-// approach, but this is faster 
- 
-{ 
-    Py_ssize_t __pyx_tmp_idx = {{idx}}; 
+{{endif}}
+
+
+////////// SliceIndex //////////
+
+// Dimension is indexed with an integer, we could use the ToughSlice
+// approach, but this is faster
+
+{
+    Py_ssize_t __pyx_tmp_idx = {{idx}};
 
     {{if wraparound or boundscheck}}
         Py_ssize_t __pyx_tmp_shape = {{src}}.shape[{{dim}}];
     {{endif}}
 
-    Py_ssize_t __pyx_tmp_stride = {{src}}.strides[{{dim}}]; 
+    Py_ssize_t __pyx_tmp_stride = {{src}}.strides[{{dim}}];
     {{if wraparound}}
         if (__pyx_tmp_idx < 0)
             __pyx_tmp_idx += __pyx_tmp_shape;
     {{endif}}
- 
+
     {{if boundscheck}}
         if (unlikely(!__Pyx_is_valid_index(__pyx_tmp_idx, __pyx_tmp_shape))) {
             {{if not have_gil}}
@@ -867,79 +867,79 @@ if (unlikely(__pyx_memoryview_slice_memviewslice(
                 PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();
                 #endif
             {{endif}}
- 
+
             PyErr_SetString(PyExc_IndexError,
                             "Index out of bounds (axis {{dim}})");
- 
+
             {{if not have_gil}}
                 #ifdef WITH_THREAD
                 PyGILState_Release(__pyx_gilstate_save);
                 #endif
             {{endif}}
- 
+
             {{error_goto}}
         }
     {{endif}}
- 
-    {{if all_dimensions_direct}} 
-        {{dst}}.data += __pyx_tmp_idx * __pyx_tmp_stride; 
-    {{else}} 
+
+    {{if all_dimensions_direct}}
+        {{dst}}.data += __pyx_tmp_idx * __pyx_tmp_stride;
+    {{else}}
         if ({{get_suboffset_dim()}} < 0) {
-            {{dst}}.data += __pyx_tmp_idx * __pyx_tmp_stride; 
- 
-            /* This dimension is the first dimension, or is preceded by    */ 
-            /* direct or indirect dimensions that are indexed away.        */ 
-            /* Hence suboffset_dim must be less than zero, and we can have */ 
-            /* our data pointer refer to another block by dereferencing.   */ 
-            /*   slice.data -> B -> C     becomes     slice.data -> C      */ 
- 
-            {{if indirect}} 
-              { 
-                Py_ssize_t __pyx_tmp_suboffset = {{src}}.suboffsets[{{dim}}]; 
- 
-                {{if generic}} 
-                    if (__pyx_tmp_suboffset >= 0) 
-                {{endif}} 
- 
-                    {{dst}}.data = *((char **) {{dst}}.data) + __pyx_tmp_suboffset; 
-              } 
-            {{endif}} 
- 
-        } else { 
+            {{dst}}.data += __pyx_tmp_idx * __pyx_tmp_stride;
+
+            /* This dimension is the first dimension, or is preceded by    */
+            /* direct or indirect dimensions that are indexed away.        */
+            /* Hence suboffset_dim must be less than zero, and we can have */
+            /* our data pointer refer to another block by dereferencing.   */
+            /*   slice.data -> B -> C     becomes     slice.data -> C      */
+
+            {{if indirect}}
+              {
+                Py_ssize_t __pyx_tmp_suboffset = {{src}}.suboffsets[{{dim}}];
+
+                {{if generic}}
+                    if (__pyx_tmp_suboffset >= 0)
+                {{endif}}
+
+                    {{dst}}.data = *((char **) {{dst}}.data) + __pyx_tmp_suboffset;
+              }
+            {{endif}}
+
+        } else {
             {{dst}}.suboffsets[{{get_suboffset_dim()}}] += __pyx_tmp_idx * __pyx_tmp_stride;
- 
-            /* Note: dimension can not be indirect, the compiler will have */ 
-            /*       issued an error */ 
-        } 
- 
-    {{endif}} 
-} 
- 
- 
-////////// FillStrided1DScalar.proto ////////// 
- 
-static void 
-__pyx_fill_slice_{{dtype_name}}({{type_decl}} *p, Py_ssize_t extent, Py_ssize_t stride, 
-                                size_t itemsize, void *itemp); 
- 
-////////// FillStrided1DScalar ////////// 
- 
-/* Fill a slice with a scalar value. The dimension is direct and strided or contiguous */ 
-/* This can be used as a callback for the memoryview object to efficienty assign a scalar */ 
-/* Currently unused */ 
-static void 
-__pyx_fill_slice_{{dtype_name}}({{type_decl}} *p, Py_ssize_t extent, Py_ssize_t stride, 
-                                size_t itemsize, void *itemp) 
-{ 
-    Py_ssize_t i; 
-    {{type_decl}} item = *(({{type_decl}} *) itemp); 
-    {{type_decl}} *endp; 
- 
-    stride /= sizeof({{type_decl}}); 
-    endp = p + stride * extent; 
- 
-    while (p < endp) { 
-        *p = item; 
-        p += stride; 
-    } 
-} 
+
+            /* Note: dimension can not be indirect, the compiler will have */
+            /*       issued an error */
+        }
+
+    {{endif}}
+}
+
+
+////////// FillStrided1DScalar.proto //////////
+
+static void
+__pyx_fill_slice_{{dtype_name}}({{type_decl}} *p, Py_ssize_t extent, Py_ssize_t stride,
+                                size_t itemsize, void *itemp);
+
+////////// FillStrided1DScalar //////////
+
+/* Fill a slice with a scalar value. The dimension is direct and strided or contiguous */
+/* This can be used as a callback for the memoryview object to efficienty assign a scalar */
+/* Currently unused */
+static void
+__pyx_fill_slice_{{dtype_name}}({{type_decl}} *p, Py_ssize_t extent, Py_ssize_t stride,
+                                size_t itemsize, void *itemp)
+{
+    Py_ssize_t i;
+    {{type_decl}} item = *(({{type_decl}} *) itemp);
+    {{type_decl}} *endp;
+
+    stride /= sizeof({{type_decl}});
+    endp = p + stride * extent;
+
+    while (p < endp) {
+        *p = item;
+        p += stride;
+    }
+}
diff --git a/contrib/tools/cython/Cython/Utility/ModuleSetupCode.c b/contrib/tools/cython/Cython/Utility/ModuleSetupCode.c
index b246058bd3..0c7059b354 100644
--- a/contrib/tools/cython/Cython/Utility/ModuleSetupCode.c
+++ b/contrib/tools/cython/Cython/Utility/ModuleSetupCode.c
@@ -1,5 +1,5 @@
-/////////////// CModulePreamble /////////////// 
- 
+/////////////// CModulePreamble ///////////////
+
 #if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
@@ -7,33 +7,33 @@
 #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
 // Ignore tp_print initializer. Need for ya make -DUSE_SYSTEM_PYTHON=3.8
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif 
 #endif
- 
-#include <stddef.h> /* For offsetof */ 
-#ifndef offsetof 
+#endif
+
+#include <stddef.h> /* For offsetof */
+#ifndef offsetof
   #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
-#endif 
- 
-#if !defined(WIN32) && !defined(MS_WINDOWS) 
-  #ifndef __stdcall 
-    #define __stdcall 
-  #endif 
-  #ifndef __cdecl 
-    #define __cdecl 
-  #endif 
-  #ifndef __fastcall 
-    #define __fastcall 
-  #endif 
-#endif 
- 
-#ifndef DL_IMPORT 
-  #define DL_IMPORT(t) t 
-#endif 
-#ifndef DL_EXPORT 
-  #define DL_EXPORT(t) t 
-#endif 
- 
+#endif
+
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+  #ifndef __fastcall
+    #define __fastcall
+  #endif
+#endif
+
+#ifndef DL_IMPORT
+  #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+
 // For use in DL_IMPORT/DL_EXPORT macros.
 #define __PYX_COMMA ,
 
@@ -44,15 +44,15 @@
   #endif
 #endif
 
-#ifndef PY_LONG_LONG 
-  #define PY_LONG_LONG LONG_LONG 
-#endif 
- 
-#ifndef Py_HUGE_VAL 
-  #define Py_HUGE_VAL HUGE_VAL 
-#endif 
- 
-#ifdef PYPY_VERSION 
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+
+#ifndef Py_HUGE_VAL
+  #define Py_HUGE_VAL HUGE_VAL
+#endif
+
+#ifdef PYPY_VERSION
   #define CYTHON_COMPILING_IN_PYPY 1
   #define CYTHON_COMPILING_IN_PYSTON 0
   #define CYTHON_COMPILING_IN_CPYTHON 0
@@ -137,7 +137,7 @@
   #undef CYTHON_USE_EXC_INFO_STACK
   #define CYTHON_USE_EXC_INFO_STACK 0
 
-#else 
+#else
   #define CYTHON_COMPILING_IN_PYPY 0
   #define CYTHON_COMPILING_IN_PYSTON 0
   #define CYTHON_COMPILING_IN_CPYTHON 1
@@ -207,8 +207,8 @@
   #ifndef CYTHON_USE_EXC_INFO_STACK
     #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
   #endif
-#endif 
- 
+#endif
+
 #if !defined(CYTHON_FAST_PYCCALL)
 #define CYTHON_FAST_PYCCALL  (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
 #endif
@@ -385,18 +385,18 @@ class __Pyx_FakeReference {
 
 #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
   #define Py_OptimizeFlag 0
-#endif 
- 
-#define __PYX_BUILD_PY_SSIZE_T "n" 
-#define CYTHON_FORMAT_SSIZE_T "z" 
- 
-#if PY_MAJOR_VERSION < 3 
-  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" 
-  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ 
-          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) 
-  #define __Pyx_DefaultClassType PyClass_Type 
-#else 
-  #define __Pyx_BUILTIN_MODULE_NAME "builtins" 
+#endif
+
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+  #define __Pyx_DefaultClassType PyClass_Type
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
   #define __Pyx_DefaultClassType PyType_Type
 #if PY_VERSION_HEX >= 0x030B00A1
     static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int k, int l, int s, int f,
@@ -470,24 +470,24 @@ class __Pyx_FakeReference {
     }
 #else
   #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
-          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) 
+          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+  #define __Pyx_DefaultClassType PyType_Type
 #endif
-  #define __Pyx_DefaultClassType PyType_Type 
-#endif 
- 
+
 #ifndef Py_TPFLAGS_CHECKTYPES
-  #define Py_TPFLAGS_CHECKTYPES 0 
+  #define Py_TPFLAGS_CHECKTYPES 0
 #endif
 #ifndef Py_TPFLAGS_HAVE_INDEX
-  #define Py_TPFLAGS_HAVE_INDEX 0 
-#endif 
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
 #ifndef Py_TPFLAGS_HAVE_NEWBUFFER
-  #define Py_TPFLAGS_HAVE_NEWBUFFER 0 
-#endif 
+  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
 #ifndef Py_TPFLAGS_HAVE_FINALIZE
-  #define Py_TPFLAGS_HAVE_FINALIZE 0 
-#endif 
- 
+  #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+
 #ifndef METH_STACKLESS
   // already defined for Stackless Python (all versions) and C-Python >= 3.7
   // value if defined: Stackless Python < 3.6: 0x80 else 0x100
@@ -604,24 +604,24 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
 #define __Pyx_PyDict_GetItemStr(dict, name)  PyDict_GetItem(dict, name)
 #endif
 
-/* new Py3.3 unicode type (PEP 393) */ 
-#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) 
-  #define CYTHON_PEP393_ENABLED 1 
+/* new Py3.3 unicode type (PEP 393) */
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+  #define CYTHON_PEP393_ENABLED 1
 
   #if defined(PyUnicode_IS_READY)
-  #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ? \ 
-                                              0 : _PyUnicode_Ready((PyObject *)(op))) 
+  #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ? \
+                                              0 : _PyUnicode_Ready((PyObject *)(op)))
   #else
   // Py3.12 / PEP-623 will remove wstr type unicode strings and all of the PyUnicode_READY() machinery.
   #define __Pyx_PyUnicode_READY(op)       (0)
   #endif
 
-  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u) 
-  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) 
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
   #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   PyUnicode_MAX_CHAR_VALUE(u)
-  #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u) 
-  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u) 
-  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i) 
+  #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u)
+  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
+  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
   #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  PyUnicode_WRITE(k, d, i, ch)
   #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE)
   #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
@@ -634,32 +634,32 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
   #else
   #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_LENGTH(u))
   #endif
-#else 
-  #define CYTHON_PEP393_ENABLED 0 
+#else
+  #define CYTHON_PEP393_ENABLED 0
   #define PyUnicode_1BYTE_KIND  1
   #define PyUnicode_2BYTE_KIND  2
   #define PyUnicode_4BYTE_KIND  4
-  #define __Pyx_PyUnicode_READY(op)       (0) 
-  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u) 
-  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) 
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
   #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
-  #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE)) 
-  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u)) 
-  /* (void)(k) => avoid unused variable warning due to macro: */ 
-  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) 
+  #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE))
+  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
+  /* (void)(k) => avoid unused variable warning due to macro: */
+  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
   #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
   #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_SIZE(u))
-#endif 
- 
-#if CYTHON_COMPILING_IN_PYPY 
-  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b) 
-  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b) 
-#else 
-  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b) 
-  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ 
-      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) 
-#endif 
- 
+#endif
+
+#if CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
+#else
+  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \
+      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+
 #if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
   #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
 #endif
@@ -675,41 +675,41 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
 // ("..." % x)  must call PyNumber_Remainder() if x is a string subclass that implements "__rmod__()".
 #define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
 #define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
- 
-#if PY_MAJOR_VERSION >= 3 
-  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b) 
-#else 
-  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b) 
-#endif 
- 
+
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
+#else
+  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
+#endif
+
 #if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
   #define PyObject_ASCII(o)            PyObject_Repr(o)
 #endif
 
-#if PY_MAJOR_VERSION >= 3 
-  #define PyBaseString_Type            PyUnicode_Type 
-  #define PyStringObject               PyUnicodeObject 
-  #define PyString_Type                PyUnicode_Type 
-  #define PyString_Check               PyUnicode_Check 
-  #define PyString_CheckExact          PyUnicode_CheckExact 
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyStringObject               PyUnicodeObject
+  #define PyString_Type                PyUnicode_Type
+  #define PyString_Check               PyUnicode_Check
+  #define PyString_CheckExact          PyUnicode_CheckExact
   // PyPy3 used to define "PyObject_Unicode"
 #ifndef PyObject_Unicode
   #define PyObject_Unicode             PyObject_Str
-#endif 
-#endif
- 
-#if PY_MAJOR_VERSION >= 3 
-  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) 
-  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) 
-#else 
-  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) 
-  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) 
-#endif 
- 
-#ifndef PySet_CheckExact 
-  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type) 
-#endif 
- 
+#endif
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+
+#ifndef PySet_CheckExact
+  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
+#endif
+
 
 #if PY_VERSION_HEX >= 0x030900A4
   #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
@@ -725,50 +725,50 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
   // NOTE: might fail with exception => check for -1
   #define __Pyx_PySequence_SIZE(seq)  PySequence_Size(seq)
 #endif
- 
-#if PY_MAJOR_VERSION >= 3 
-  #define PyIntObject                  PyLongObject 
-  #define PyInt_Type                   PyLong_Type 
-  #define PyInt_Check(op)              PyLong_Check(op) 
-  #define PyInt_CheckExact(op)         PyLong_CheckExact(op) 
-  #define PyInt_FromString             PyLong_FromString 
-  #define PyInt_FromUnicode            PyLong_FromUnicode 
-  #define PyInt_FromLong               PyLong_FromLong 
-  #define PyInt_FromSize_t             PyLong_FromSize_t 
-  #define PyInt_FromSsize_t            PyLong_FromSsize_t 
-  #define PyInt_AsLong                 PyLong_AsLong 
-  #define PyInt_AS_LONG                PyLong_AS_LONG 
-  #define PyInt_AsSsize_t              PyLong_AsSsize_t 
-  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask 
-  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask 
-  #define PyNumber_Int                 PyNumber_Long 
-#endif 
- 
-#if PY_MAJOR_VERSION >= 3 
-  #define PyBoolObject                 PyLongObject 
-#endif 
- 
+
+#if PY_MAJOR_VERSION >= 3
+  #define PyIntObject                  PyLongObject
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define PyNumber_Int                 PyNumber_Long
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define PyBoolObject                 PyLongObject
+#endif
+
 #if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
   #ifndef PyUnicode_InternFromString
     #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
   #endif
 #endif
 
-#if PY_VERSION_HEX < 0x030200A4 
-  typedef long Py_hash_t; 
-  #define __Pyx_PyInt_FromHash_t PyInt_FromLong 
+#if PY_VERSION_HEX < 0x030200A4
+  typedef long Py_hash_t;
+  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
   #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsHash_t
-#else 
-  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t 
+#else
+  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
   #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsSsize_t
-#endif 
- 
-#if PY_MAJOR_VERSION >= 3 
+#endif
+
+#if PY_MAJOR_VERSION >= 3
   #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func))
 #else
   #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
-#endif 
- 
+#endif
+
 // backport of PyAsyncMethods from Py3.5 to older Py3.x versions
 // (mis-)using the "tp_reserved" type slot which is re-activated as "tp_as_async" in Py3.5
 #if CYTHON_USE_ASYNC_SLOTS
@@ -787,9 +787,9 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
         unaryfunc am_aiter;
         unaryfunc am_anext;
     } __Pyx_PyAsyncMethodsStruct;
-#endif 
- 
- 
+#endif
+
+
 /////////////// SmallCodeConfig.proto ///////////////
 
 #ifndef CYTHON_SMALL_CODE
@@ -972,44 +972,44 @@ static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObj
 #endif
 #include <math.h>
 
-#ifdef NAN 
-#define __PYX_NAN() ((float) NAN) 
-#else 
-static CYTHON_INLINE float __PYX_NAN() { 
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
   // Initialize NaN.  The sign is irrelevant, an exponent with all bits 1 and
   // a nonzero mantissa means NaN.  If the first bit in the mantissa is 1, it is
   // a quiet NaN.
-  float value; 
-  memset(&value, 0xFF, sizeof(value)); 
-  return value; 
-} 
-#endif 
- 
+  float value;
+  memset(&value, 0xFF, sizeof(value));
+  return value;
+}
+#endif
+
 #if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
 #define __Pyx_truncl trunc
 #else
 #define __Pyx_truncl truncl
 #endif
- 
 
-/////////////// UtilityFunctionPredeclarations.proto /////////////// 
- 
+
+/////////////// UtilityFunctionPredeclarations.proto ///////////////
+
 typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
-                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/ 
- 
-/////////////// ForceInitThreads.proto /////////////// 
+                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
+
+/////////////// ForceInitThreads.proto ///////////////
 //@proto_block: utility_code_proto_before_types
- 
-#ifndef __PYX_FORCE_INIT_THREADS 
-  #define __PYX_FORCE_INIT_THREADS 0 
-#endif 
- 
-/////////////// InitThreads.init /////////////// 
- 
+
+#ifndef __PYX_FORCE_INIT_THREADS
+  #define __PYX_FORCE_INIT_THREADS 0
+#endif
+
+/////////////// InitThreads.init ///////////////
+
 #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0
-PyEval_InitThreads(); 
-#endif 
- 
+PyEval_InitThreads();
+#endif
+
 
 /////////////// ModuleCreationPEP489 ///////////////
 //@substitute: naming
@@ -1090,145 +1090,145 @@ bad:
 //#endif
 
 
-/////////////// CodeObjectCache.proto /////////////// 
- 
-typedef struct { 
+/////////////// CodeObjectCache.proto ///////////////
+
+typedef struct {
     PyCodeObject* code_object;
-    int code_line; 
-} __Pyx_CodeObjectCacheEntry; 
- 
-struct __Pyx_CodeObjectCache { 
-    int count; 
-    int max_count; 
-    __Pyx_CodeObjectCacheEntry* entries; 
-}; 
- 
-static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; 
- 
-static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); 
-static PyCodeObject *__pyx_find_code_object(int code_line); 
-static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); 
- 
-/////////////// CodeObjectCache /////////////// 
-// Note that errors are simply ignored in the code below. 
-// This is just a cache, if a lookup or insertion fails - so what? 
- 
-static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { 
-    int start = 0, mid = 0, end = count - 1; 
-    if (end >= 0 && code_line > entries[end].code_line) { 
-        return count; 
-    } 
-    while (start < end) { 
+    int code_line;
+} __Pyx_CodeObjectCacheEntry;
+
+struct __Pyx_CodeObjectCache {
+    int count;
+    int max_count;
+    __Pyx_CodeObjectCacheEntry* entries;
+};
+
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+/////////////// CodeObjectCache ///////////////
+// Note that errors are simply ignored in the code below.
+// This is just a cache, if a lookup or insertion fails - so what?
+
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+    int start = 0, mid = 0, end = count - 1;
+    if (end >= 0 && code_line > entries[end].code_line) {
+        return count;
+    }
+    while (start < end) {
         mid = start + (end - start) / 2;
-        if (code_line < entries[mid].code_line) { 
-            end = mid; 
-        } else if (code_line > entries[mid].code_line) { 
-             start = mid + 1; 
-        } else { 
-            return mid; 
-        } 
-    } 
-    if (code_line <= entries[mid].code_line) { 
-        return mid; 
-    } else { 
-        return mid + 1; 
-    } 
-} 
- 
-static PyCodeObject *__pyx_find_code_object(int code_line) { 
-    PyCodeObject* code_object; 
-    int pos; 
-    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { 
-        return NULL; 
-    } 
-    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); 
-    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { 
-        return NULL; 
-    } 
-    code_object = __pyx_code_cache.entries[pos].code_object; 
-    Py_INCREF(code_object); 
-    return code_object; 
-} 
- 
-static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { 
-    int pos, i; 
-    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; 
-    if (unlikely(!code_line)) { 
-        return; 
-    } 
-    if (unlikely(!entries)) { 
-        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); 
-        if (likely(entries)) { 
-            __pyx_code_cache.entries = entries; 
-            __pyx_code_cache.max_count = 64; 
-            __pyx_code_cache.count = 1; 
-            entries[0].code_line = code_line; 
-            entries[0].code_object = code_object; 
-            Py_INCREF(code_object); 
-        } 
-        return; 
-    } 
-    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); 
-    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { 
-        PyCodeObject* tmp = entries[pos].code_object; 
-        entries[pos].code_object = code_object; 
-        Py_DECREF(tmp); 
-        return; 
-    } 
-    if (__pyx_code_cache.count == __pyx_code_cache.max_count) { 
-        int new_max = __pyx_code_cache.max_count + 64; 
-        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( 
+        if (code_line < entries[mid].code_line) {
+            end = mid;
+        } else if (code_line > entries[mid].code_line) {
+             start = mid + 1;
+        } else {
+            return mid;
+        }
+    }
+    if (code_line <= entries[mid].code_line) {
+        return mid;
+    } else {
+        return mid + 1;
+    }
+}
+
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+    PyCodeObject* code_object;
+    int pos;
+    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+        return NULL;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+        return NULL;
+    }
+    code_object = __pyx_code_cache.entries[pos].code_object;
+    Py_INCREF(code_object);
+    return code_object;
+}
+
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+    int pos, i;
+    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+    if (unlikely(!code_line)) {
+        return;
+    }
+    if (unlikely(!entries)) {
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (likely(entries)) {
+            __pyx_code_cache.entries = entries;
+            __pyx_code_cache.max_count = 64;
+            __pyx_code_cache.count = 1;
+            entries[0].code_line = code_line;
+            entries[0].code_object = code_object;
+            Py_INCREF(code_object);
+        }
+        return;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+        PyCodeObject* tmp = entries[pos].code_object;
+        entries[pos].code_object = code_object;
+        Py_DECREF(tmp);
+        return;
+    }
+    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+        int new_max = __pyx_code_cache.max_count + 64;
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
             __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
-        if (unlikely(!entries)) { 
-            return; 
-        } 
-        __pyx_code_cache.entries = entries; 
-        __pyx_code_cache.max_count = new_max; 
-    } 
-    for (i=__pyx_code_cache.count; i>pos; i--) { 
-        entries[i] = entries[i-1]; 
-    } 
-    entries[pos].code_line = code_line; 
-    entries[pos].code_object = code_object; 
-    __pyx_code_cache.count++; 
-    Py_INCREF(code_object); 
-} 
- 
-/////////////// CodeObjectCache.cleanup /////////////// 
- 
-  if (__pyx_code_cache.entries) { 
-      __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; 
-      int i, count = __pyx_code_cache.count; 
-      __pyx_code_cache.count = 0; 
-      __pyx_code_cache.max_count = 0; 
-      __pyx_code_cache.entries = NULL; 
-      for (i=0; i<count; i++) { 
-          Py_DECREF(entries[i].code_object); 
-      } 
-      PyMem_Free(entries); 
-  } 
- 
-/////////////// CheckBinaryVersion.proto /////////////// 
- 
-static int __Pyx_check_binary_version(void); 
- 
-/////////////// CheckBinaryVersion /////////////// 
- 
-static int __Pyx_check_binary_version(void) { 
-    char ctversion[4], rtversion[4]; 
-    PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); 
-    PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); 
-    if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { 
-        char message[200]; 
-        PyOS_snprintf(message, sizeof(message), 
-                      "compiletime version %s of module '%.100s' " 
-                      "does not match runtime version %s", 
-                      ctversion, __Pyx_MODULE_NAME, rtversion); 
-        return PyErr_WarnEx(NULL, message, 1); 
-    } 
-    return 0; 
-} 
- 
+        if (unlikely(!entries)) {
+            return;
+        }
+        __pyx_code_cache.entries = entries;
+        __pyx_code_cache.max_count = new_max;
+    }
+    for (i=__pyx_code_cache.count; i>pos; i--) {
+        entries[i] = entries[i-1];
+    }
+    entries[pos].code_line = code_line;
+    entries[pos].code_object = code_object;
+    __pyx_code_cache.count++;
+    Py_INCREF(code_object);
+}
+
+/////////////// CodeObjectCache.cleanup ///////////////
+
+  if (__pyx_code_cache.entries) {
+      __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+      int i, count = __pyx_code_cache.count;
+      __pyx_code_cache.count = 0;
+      __pyx_code_cache.max_count = 0;
+      __pyx_code_cache.entries = NULL;
+      for (i=0; i<count; i++) {
+          Py_DECREF(entries[i].code_object);
+      }
+      PyMem_Free(entries);
+  }
+
+/////////////// CheckBinaryVersion.proto ///////////////
+
+static int __Pyx_check_binary_version(void);
+
+/////////////// CheckBinaryVersion ///////////////
+
+static int __Pyx_check_binary_version(void) {
+    char ctversion[4], rtversion[4];
+    PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+    PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
+    if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
+        char message[200];
+        PyOS_snprintf(message, sizeof(message),
+                      "compiletime version %s of module '%.100s' "
+                      "does not match runtime version %s",
+                      ctversion, __Pyx_MODULE_NAME, rtversion);
+        return PyErr_WarnEx(NULL, message, 1);
+    }
+    return 0;
+}
+
 /////////////// IsLittleEndian.proto ///////////////
 
 static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
@@ -1245,91 +1245,91 @@ static CYTHON_INLINE int __Pyx_Is_Little_Endian(void)
   return S.u8[0] == 4;
 }
 
-/////////////// Refnanny.proto /////////////// 
- 
-#ifndef CYTHON_REFNANNY 
-  #define CYTHON_REFNANNY 0 
-#endif 
- 
-#if CYTHON_REFNANNY 
-  typedef struct { 
-    void (*INCREF)(void*, PyObject*, int); 
-    void (*DECREF)(void*, PyObject*, int); 
-    void (*GOTREF)(void*, PyObject*, int); 
-    void (*GIVEREF)(void*, PyObject*, int); 
-    void* (*SetupContext)(const char*, int, const char*); 
-    void (*FinishContext)(void**); 
-  } __Pyx_RefNannyAPIStruct; 
-  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; 
-  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/ 
-  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; 
-#ifdef WITH_THREAD 
-  #define __Pyx_RefNannySetupContext(name, acquire_gil) \ 
-          if (acquire_gil) { \ 
-              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \ 
-              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ 
-              PyGILState_Release(__pyx_gilstate_save); \ 
-          } else { \ 
-              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ 
-          } 
-#else 
-  #define __Pyx_RefNannySetupContext(name, acquire_gil) \ 
-          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) 
-#endif 
-  #define __Pyx_RefNannyFinishContext() \ 
-          __Pyx_RefNanny->FinishContext(&__pyx_refnanny) 
-  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 
-  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 
-  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 
-  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 
-  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) 
-  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) 
-  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) 
-  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) 
-#else 
-  #define __Pyx_RefNannyDeclarations 
-  #define __Pyx_RefNannySetupContext(name, acquire_gil) 
-  #define __Pyx_RefNannyFinishContext() 
-  #define __Pyx_INCREF(r) Py_INCREF(r) 
-  #define __Pyx_DECREF(r) Py_DECREF(r) 
-  #define __Pyx_GOTREF(r) 
-  #define __Pyx_GIVEREF(r) 
-  #define __Pyx_XINCREF(r) Py_XINCREF(r) 
-  #define __Pyx_XDECREF(r) Py_XDECREF(r) 
-  #define __Pyx_XGOTREF(r) 
-  #define __Pyx_XGIVEREF(r) 
-#endif /* CYTHON_REFNANNY */ 
- 
-#define __Pyx_XDECREF_SET(r, v) do {                            \ 
-        PyObject *tmp = (PyObject *) r;                         \ 
-        r = v; __Pyx_XDECREF(tmp);                              \ 
-    } while (0) 
-#define __Pyx_DECREF_SET(r, v) do {                             \ 
-        PyObject *tmp = (PyObject *) r;                         \ 
-        r = v; __Pyx_DECREF(tmp);                               \ 
-    } while (0) 
- 
-#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) 
-#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) 
- 
-/////////////// Refnanny /////////////// 
- 
-#if CYTHON_REFNANNY 
-static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { 
-    PyObject *m = NULL, *p = NULL; 
-    void *r = NULL; 
+/////////////// Refnanny.proto ///////////////
+
+#ifndef CYTHON_REFNANNY
+  #define CYTHON_REFNANNY 0
+#endif
+
+#if CYTHON_REFNANNY
+  typedef struct {
+    void (*INCREF)(void*, PyObject*, int);
+    void (*DECREF)(void*, PyObject*, int);
+    void (*GOTREF)(void*, PyObject*, int);
+    void (*GIVEREF)(void*, PyObject*, int);
+    void* (*SetupContext)(const char*, int, const char*);
+    void (*FinishContext)(void**);
+  } __Pyx_RefNannyAPIStruct;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/
+  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+  #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+          if (acquire_gil) { \
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+              PyGILState_Release(__pyx_gilstate_save); \
+          } else { \
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+          }
+#else
+  #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+  #define __Pyx_RefNannyFinishContext() \
+          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+  #define __Pyx_RefNannyDeclarations
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)
+  #define __Pyx_RefNannyFinishContext()
+  #define __Pyx_INCREF(r) Py_INCREF(r)
+  #define __Pyx_DECREF(r) Py_DECREF(r)
+  #define __Pyx_GOTREF(r)
+  #define __Pyx_GIVEREF(r)
+  #define __Pyx_XINCREF(r) Py_XINCREF(r)
+  #define __Pyx_XDECREF(r) Py_XDECREF(r)
+  #define __Pyx_XGOTREF(r)
+  #define __Pyx_XGIVEREF(r)
+#endif /* CYTHON_REFNANNY */
+
+#define __Pyx_XDECREF_SET(r, v) do {                            \
+        PyObject *tmp = (PyObject *) r;                         \
+        r = v; __Pyx_XDECREF(tmp);                              \
+    } while (0)
+#define __Pyx_DECREF_SET(r, v) do {                             \
+        PyObject *tmp = (PyObject *) r;                         \
+        r = v; __Pyx_DECREF(tmp);                               \
+    } while (0)
+
+#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/////////////// Refnanny ///////////////
+
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+    PyObject *m = NULL, *p = NULL;
+    void *r = NULL;
     m = PyImport_ImportModule(modname);
-    if (!m) goto end; 
+    if (!m) goto end;
     p = PyObject_GetAttrString(m, "RefNannyAPI");
-    if (!p) goto end; 
-    r = PyLong_AsVoidPtr(p); 
-end: 
-    Py_XDECREF(p); 
-    Py_XDECREF(m); 
-    return (__Pyx_RefNannyAPIStruct *)r; 
-} 
-#endif /* CYTHON_REFNANNY */ 
- 
+    if (!p) goto end;
+    r = PyLong_AsVoidPtr(p);
+end:
+    Py_XDECREF(p);
+    Py_XDECREF(m);
+    return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif /* CYTHON_REFNANNY */
+
 
 /////////////// ImportRefnannyAPI ///////////////
 
@@ -1344,91 +1344,91 @@ if (!__Pyx_RefNanny) {
 #endif
 
 
-/////////////// RegisterModuleCleanup.proto /////////////// 
-//@substitute: naming 
- 
-static void ${cleanup_cname}(PyObject *self); /*proto*/ 
+/////////////// RegisterModuleCleanup.proto ///////////////
+//@substitute: naming
+
+static void ${cleanup_cname}(PyObject *self); /*proto*/
 
 #if PY_MAJOR_VERSION < 3 || CYTHON_COMPILING_IN_PYPY
-static int __Pyx_RegisterCleanup(void); /*proto*/ 
+static int __Pyx_RegisterCleanup(void); /*proto*/
 #else
 #define __Pyx_RegisterCleanup() (0)
 #endif
- 
-/////////////// RegisterModuleCleanup /////////////// 
-//@substitute: naming 
- 
+
+/////////////// RegisterModuleCleanup ///////////////
+//@substitute: naming
+
 #if PY_MAJOR_VERSION < 3 || CYTHON_COMPILING_IN_PYPY
-static PyObject* ${cleanup_cname}_atexit(PyObject *module, CYTHON_UNUSED PyObject *unused) { 
-    ${cleanup_cname}(module); 
-    Py_INCREF(Py_None); return Py_None; 
-} 
- 
-static int __Pyx_RegisterCleanup(void) { 
-    // Don't use Py_AtExit because that has a 32-call limit and is called 
-    // after python finalization. 
-    // Also, we try to prepend the cleanup function to "atexit._exithandlers" 
-    // in Py2 because CPython runs them last-to-first. Being run last allows 
-    // user exit code to run before us that may depend on the globals 
-    // and cached objects that we are about to clean up. 
- 
-    static PyMethodDef cleanup_def = { 
-        "__cleanup", (PyCFunction)${cleanup_cname}_atexit, METH_NOARGS, 0}; 
- 
-    PyObject *cleanup_func = 0; 
-    PyObject *atexit = 0; 
-    PyObject *reg = 0; 
-    PyObject *args = 0; 
-    PyObject *res = 0; 
-    int ret = -1; 
- 
-    cleanup_func = PyCFunction_New(&cleanup_def, 0); 
-    if (!cleanup_func) 
-        goto bad; 
- 
+static PyObject* ${cleanup_cname}_atexit(PyObject *module, CYTHON_UNUSED PyObject *unused) {
+    ${cleanup_cname}(module);
+    Py_INCREF(Py_None); return Py_None;
+}
+
+static int __Pyx_RegisterCleanup(void) {
+    // Don't use Py_AtExit because that has a 32-call limit and is called
+    // after python finalization.
+    // Also, we try to prepend the cleanup function to "atexit._exithandlers"
+    // in Py2 because CPython runs them last-to-first. Being run last allows
+    // user exit code to run before us that may depend on the globals
+    // and cached objects that we are about to clean up.
+
+    static PyMethodDef cleanup_def = {
+        "__cleanup", (PyCFunction)${cleanup_cname}_atexit, METH_NOARGS, 0};
+
+    PyObject *cleanup_func = 0;
+    PyObject *atexit = 0;
+    PyObject *reg = 0;
+    PyObject *args = 0;
+    PyObject *res = 0;
+    int ret = -1;
+
+    cleanup_func = PyCFunction_New(&cleanup_def, 0);
+    if (!cleanup_func)
+        goto bad;
+
     atexit = PyImport_ImportModule("atexit");
-    if (!atexit) 
-        goto bad; 
-    reg = PyObject_GetAttrString(atexit, "_exithandlers"); 
-    if (reg && PyList_Check(reg)) { 
-        PyObject *a, *kw; 
-        a = PyTuple_New(0); 
-        kw = PyDict_New(); 
-        if (!a || !kw) { 
-            Py_XDECREF(a); 
-            Py_XDECREF(kw); 
-            goto bad; 
-        } 
-        args = PyTuple_Pack(3, cleanup_func, a, kw); 
-        Py_DECREF(a); 
-        Py_DECREF(kw); 
-        if (!args) 
-            goto bad; 
-        ret = PyList_Insert(reg, 0, args); 
-    } else { 
-        if (!reg) 
-            PyErr_Clear(); 
-        Py_XDECREF(reg); 
-        reg = PyObject_GetAttrString(atexit, "register"); 
-        if (!reg) 
-            goto bad; 
-        args = PyTuple_Pack(1, cleanup_func); 
-        if (!args) 
-            goto bad; 
-        res = PyObject_CallObject(reg, args); 
-        if (!res) 
-            goto bad; 
-        ret = 0; 
-    } 
-bad: 
-    Py_XDECREF(cleanup_func); 
-    Py_XDECREF(atexit); 
-    Py_XDECREF(reg); 
-    Py_XDECREF(args); 
-    Py_XDECREF(res); 
-    return ret; 
-} 
-#endif 
+    if (!atexit)
+        goto bad;
+    reg = PyObject_GetAttrString(atexit, "_exithandlers");
+    if (reg && PyList_Check(reg)) {
+        PyObject *a, *kw;
+        a = PyTuple_New(0);
+        kw = PyDict_New();
+        if (!a || !kw) {
+            Py_XDECREF(a);
+            Py_XDECREF(kw);
+            goto bad;
+        }
+        args = PyTuple_Pack(3, cleanup_func, a, kw);
+        Py_DECREF(a);
+        Py_DECREF(kw);
+        if (!args)
+            goto bad;
+        ret = PyList_Insert(reg, 0, args);
+    } else {
+        if (!reg)
+            PyErr_Clear();
+        Py_XDECREF(reg);
+        reg = PyObject_GetAttrString(atexit, "register");
+        if (!reg)
+            goto bad;
+        args = PyTuple_Pack(1, cleanup_func);
+        if (!args)
+            goto bad;
+        res = PyObject_CallObject(reg, args);
+        if (!res)
+            goto bad;
+        ret = 0;
+    }
+bad:
+    Py_XDECREF(cleanup_func);
+    Py_XDECREF(atexit);
+    Py_XDECREF(reg);
+    Py_XDECREF(args);
+    Py_XDECREF(res);
+    return ret;
+}
+#endif
 
 /////////////// FastGil.init ///////////////
 #ifdef WITH_THREAD
diff --git a/contrib/tools/cython/Cython/Utility/ObjectHandling.c b/contrib/tools/cython/Cython/Utility/ObjectHandling.c
index 2f9b3868d8..c1b1c60bda 100644
--- a/contrib/tools/cython/Cython/Utility/ObjectHandling.c
+++ b/contrib/tools/cython/Cython/Utility/ObjectHandling.c
@@ -1,119 +1,119 @@
-/* 
- * General object operations and protocol implementations, 
- * including their specialisations for certain builtins. 
- * 
- * Optional optimisations for builtins are in Optimize.c. 
- * 
- * Required replacements of builtins are in Builtins.c. 
- */ 
- 
-/////////////// RaiseNoneIterError.proto /////////////// 
- 
-static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); 
- 
-/////////////// RaiseNoneIterError /////////////// 
- 
-static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { 
-    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); 
-} 
- 
-/////////////// RaiseTooManyValuesToUnpack.proto /////////////// 
- 
-static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected); 
- 
-/////////////// RaiseTooManyValuesToUnpack /////////////// 
- 
-static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { 
-    PyErr_Format(PyExc_ValueError, 
-                 "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); 
-} 
- 
-/////////////// RaiseNeedMoreValuesToUnpack.proto /////////////// 
- 
-static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); 
- 
-/////////////// RaiseNeedMoreValuesToUnpack /////////////// 
- 
-static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { 
-    PyErr_Format(PyExc_ValueError, 
-                 "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", 
-                 index, (index == 1) ? "" : "s"); 
-} 
- 
-/////////////// UnpackTupleError.proto /////////////// 
- 
-static void __Pyx_UnpackTupleError(PyObject *, Py_ssize_t index); /*proto*/ 
- 
-/////////////// UnpackTupleError /////////////// 
-//@requires: RaiseNoneIterError 
-//@requires: RaiseNeedMoreValuesToUnpack 
-//@requires: RaiseTooManyValuesToUnpack 
- 
-static void __Pyx_UnpackTupleError(PyObject *t, Py_ssize_t index) { 
-    if (t == Py_None) { 
-      __Pyx_RaiseNoneNotIterableError(); 
-    } else if (PyTuple_GET_SIZE(t) < index) { 
-      __Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(t)); 
-    } else { 
-      __Pyx_RaiseTooManyValuesError(index); 
-    } 
-} 
- 
-/////////////// UnpackItemEndCheck.proto /////////////// 
- 
-static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected); /*proto*/ 
- 
-/////////////// UnpackItemEndCheck /////////////// 
-//@requires: RaiseTooManyValuesToUnpack 
-//@requires: IterFinish 
- 
-static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) { 
-    if (unlikely(retval)) { 
-        Py_DECREF(retval); 
-        __Pyx_RaiseTooManyValuesError(expected); 
-        return -1; 
-    } else { 
-        return __Pyx_IterFinish(); 
-    } 
-    return 0; 
-} 
- 
-/////////////// UnpackTuple2.proto /////////////// 
- 
+/*
+ * General object operations and protocol implementations,
+ * including their specialisations for certain builtins.
+ *
+ * Optional optimisations for builtins are in Optimize.c.
+ *
+ * Required replacements of builtins are in Builtins.c.
+ */
+
+/////////////// RaiseNoneIterError.proto ///////////////
+
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
+
+/////////////// RaiseNoneIterError ///////////////
+
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+}
+
+/////////////// RaiseTooManyValuesToUnpack.proto ///////////////
+
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
+
+/////////////// RaiseTooManyValuesToUnpack ///////////////
+
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
+    PyErr_Format(PyExc_ValueError,
+                 "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
+}
+
+/////////////// RaiseNeedMoreValuesToUnpack.proto ///////////////
+
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
+
+/////////////// RaiseNeedMoreValuesToUnpack ///////////////
+
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
+    PyErr_Format(PyExc_ValueError,
+                 "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
+                 index, (index == 1) ? "" : "s");
+}
+
+/////////////// UnpackTupleError.proto ///////////////
+
+static void __Pyx_UnpackTupleError(PyObject *, Py_ssize_t index); /*proto*/
+
+/////////////// UnpackTupleError ///////////////
+//@requires: RaiseNoneIterError
+//@requires: RaiseNeedMoreValuesToUnpack
+//@requires: RaiseTooManyValuesToUnpack
+
+static void __Pyx_UnpackTupleError(PyObject *t, Py_ssize_t index) {
+    if (t == Py_None) {
+      __Pyx_RaiseNoneNotIterableError();
+    } else if (PyTuple_GET_SIZE(t) < index) {
+      __Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(t));
+    } else {
+      __Pyx_RaiseTooManyValuesError(index);
+    }
+}
+
+/////////////// UnpackItemEndCheck.proto ///////////////
+
+static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected); /*proto*/
+
+/////////////// UnpackItemEndCheck ///////////////
+//@requires: RaiseTooManyValuesToUnpack
+//@requires: IterFinish
+
+static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) {
+    if (unlikely(retval)) {
+        Py_DECREF(retval);
+        __Pyx_RaiseTooManyValuesError(expected);
+        return -1;
+    } else {
+        return __Pyx_IterFinish();
+    }
+    return 0;
+}
+
+/////////////// UnpackTuple2.proto ///////////////
+
 #define __Pyx_unpack_tuple2(tuple, value1, value2, is_tuple, has_known_size, decref_tuple) \
     (likely(is_tuple || PyTuple_Check(tuple)) ? \
         (likely(has_known_size || PyTuple_GET_SIZE(tuple) == 2) ? \
             __Pyx_unpack_tuple2_exact(tuple, value1, value2, decref_tuple) : \
             (__Pyx_UnpackTupleError(tuple, 2), -1)) : \
         __Pyx_unpack_tuple2_generic(tuple, value1, value2, has_known_size, decref_tuple))
- 
+
 static CYTHON_INLINE int __Pyx_unpack_tuple2_exact(
     PyObject* tuple, PyObject** value1, PyObject** value2, int decref_tuple);
 static int __Pyx_unpack_tuple2_generic(
     PyObject* tuple, PyObject** value1, PyObject** value2, int has_known_size, int decref_tuple);
 
-/////////////// UnpackTuple2 /////////////// 
-//@requires: UnpackItemEndCheck 
-//@requires: UnpackTupleError 
-//@requires: RaiseNeedMoreValuesToUnpack 
- 
+/////////////// UnpackTuple2 ///////////////
+//@requires: UnpackItemEndCheck
+//@requires: UnpackTupleError
+//@requires: RaiseNeedMoreValuesToUnpack
+
 static CYTHON_INLINE int __Pyx_unpack_tuple2_exact(
         PyObject* tuple, PyObject** pvalue1, PyObject** pvalue2, int decref_tuple) {
     PyObject *value1 = NULL, *value2 = NULL;
-#if CYTHON_COMPILING_IN_PYPY 
+#if CYTHON_COMPILING_IN_PYPY
     value1 = PySequence_ITEM(tuple, 0);  if (unlikely(!value1)) goto bad;
     value2 = PySequence_ITEM(tuple, 1);  if (unlikely(!value2)) goto bad;
-#else 
+#else
     value1 = PyTuple_GET_ITEM(tuple, 0);  Py_INCREF(value1);
     value2 = PyTuple_GET_ITEM(tuple, 1);  Py_INCREF(value2);
-#endif 
+#endif
     if (decref_tuple) {
         Py_DECREF(tuple);
-    } 
+    }
 
-    *pvalue1 = value1; 
-    *pvalue2 = value2; 
-    return 0; 
+    *pvalue1 = value1;
+    *pvalue2 = value2;
+    return 0;
 #if CYTHON_COMPILING_IN_PYPY
 bad:
     Py_XDECREF(value1);
@@ -143,27 +143,27 @@ static int __Pyx_unpack_tuple2_generic(PyObject* tuple, PyObject** pvalue1, PyOb
     *pvalue2 = value2;
     return 0;
 
-unpacking_failed: 
-    if (!has_known_size && __Pyx_IterFinish() == 0) 
-        __Pyx_RaiseNeedMoreValuesError(index); 
-bad: 
-    Py_XDECREF(iter); 
-    Py_XDECREF(value1); 
-    Py_XDECREF(value2); 
-    if (decref_tuple) { Py_XDECREF(tuple); } 
-    return -1; 
-} 
- 
-
-/////////////// IterNext.proto /////////////// 
- 
-#define __Pyx_PyIter_Next(obj) __Pyx_PyIter_Next2(obj, NULL) 
-static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject *, PyObject *); /*proto*/ 
- 
-/////////////// IterNext /////////////// 
+unpacking_failed:
+    if (!has_known_size && __Pyx_IterFinish() == 0)
+        __Pyx_RaiseNeedMoreValuesError(index);
+bad:
+    Py_XDECREF(iter);
+    Py_XDECREF(value1);
+    Py_XDECREF(value2);
+    if (decref_tuple) { Py_XDECREF(tuple); }
+    return -1;
+}
+
+
+/////////////// IterNext.proto ///////////////
+
+#define __Pyx_PyIter_Next(obj) __Pyx_PyIter_Next2(obj, NULL)
+static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject *, PyObject *); /*proto*/
+
+/////////////// IterNext ///////////////
 //@requires: Exceptions.c::PyThreadStateGet
 //@requires: Exceptions.c::PyErrFetchRestore
- 
+
 static PyObject *__Pyx_PyIter_Next2Default(PyObject* defval) {
     PyObject* exc_type;
     __Pyx_PyThreadState_declare
@@ -189,11 +189,11 @@ static void __Pyx_PyIter_Next_ErrorNoIterator(PyObject *iterator) {
         "%.200s object is not an iterator", Py_TYPE(iterator)->tp_name);
 }
 
-// originally copied from Py3's builtin_next() 
-static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject* iterator, PyObject* defval) { 
-    PyObject* next; 
+// originally copied from Py3's builtin_next()
+static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject* iterator, PyObject* defval) {
+    PyObject* next;
     // We always do a quick slot check because calling PyIter_Check() is so wasteful.
-    iternextfunc iternext = Py_TYPE(iterator)->tp_iternext; 
+    iternextfunc iternext = Py_TYPE(iterator)->tp_iternext;
     if (likely(iternext)) {
 #if CYTHON_USE_TYPE_SLOTS
         next = iternext(iterator);
@@ -203,19 +203,19 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject* iterator, PyObject*
         if (unlikely(iternext == &_PyObject_NextNotImplemented))
             return NULL;
         #endif
-#else 
+#else
         // Since the slot was set, assume that PyIter_Next() will likely succeed, and properly fail otherwise.
         // Note: PyIter_Next() crashes in CPython if "tp_iternext" is NULL.
         next = PyIter_Next(iterator);
         if (likely(next))
             return next;
-#endif 
+#endif
     } else if (CYTHON_USE_TYPE_SLOTS || unlikely(!PyIter_Check(iterator))) {
         // If CYTHON_USE_TYPE_SLOTS, then the slot was not set and we don't have an iterable.
         // Otherwise, don't trust "tp_iternext" and rely on PyIter_Check().
         __Pyx_PyIter_Next_ErrorNoIterator(iterator);
-        return NULL; 
-    } 
+        return NULL;
+    }
 #if !CYTHON_USE_TYPE_SLOTS
     else {
         // We have an iterator with an empty "tp_iternext", but didn't call next() on it yet.
@@ -225,52 +225,52 @@ static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject* iterator, PyObject*
     }
 #endif
     return __Pyx_PyIter_Next2Default(defval);
-} 
- 
-/////////////// IterFinish.proto /////////////// 
- 
-static CYTHON_INLINE int __Pyx_IterFinish(void); /*proto*/ 
- 
-/////////////// IterFinish /////////////// 
- 
-// When PyIter_Next(iter) has returned NULL in order to signal termination, 
-// this function does the right cleanup and returns 0 on success.  If it 
-// detects an error that occurred in the iterator, it returns -1. 
- 
-static CYTHON_INLINE int __Pyx_IterFinish(void) { 
+}
+
+/////////////// IterFinish.proto ///////////////
+
+static CYTHON_INLINE int __Pyx_IterFinish(void); /*proto*/
+
+/////////////// IterFinish ///////////////
+
+// When PyIter_Next(iter) has returned NULL in order to signal termination,
+// this function does the right cleanup and returns 0 on success.  If it
+// detects an error that occurred in the iterator, it returns -1.
+
+static CYTHON_INLINE int __Pyx_IterFinish(void) {
 #if CYTHON_FAST_THREAD_STATE
     PyThreadState *tstate = __Pyx_PyThreadState_Current;
-    PyObject* exc_type = tstate->curexc_type; 
-    if (unlikely(exc_type)) { 
+    PyObject* exc_type = tstate->curexc_type;
+    if (unlikely(exc_type)) {
         if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) {
-            PyObject *exc_value, *exc_tb; 
-            exc_value = tstate->curexc_value; 
-            exc_tb = tstate->curexc_traceback; 
-            tstate->curexc_type = 0; 
-            tstate->curexc_value = 0; 
-            tstate->curexc_traceback = 0; 
-            Py_DECREF(exc_type); 
-            Py_XDECREF(exc_value); 
-            Py_XDECREF(exc_tb); 
-            return 0; 
-        } else { 
-            return -1; 
-        } 
-    } 
-    return 0; 
-#else 
-    if (unlikely(PyErr_Occurred())) { 
-        if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) { 
-            PyErr_Clear(); 
-            return 0; 
-        } else { 
-            return -1; 
-        } 
-    } 
-    return 0; 
-#endif 
-} 
- 
+            PyObject *exc_value, *exc_tb;
+            exc_value = tstate->curexc_value;
+            exc_tb = tstate->curexc_traceback;
+            tstate->curexc_type = 0;
+            tstate->curexc_value = 0;
+            tstate->curexc_traceback = 0;
+            Py_DECREF(exc_type);
+            Py_XDECREF(exc_value);
+            Py_XDECREF(exc_tb);
+            return 0;
+        } else {
+            return -1;
+        }
+    }
+    return 0;
+#else
+    if (unlikely(PyErr_Occurred())) {
+        if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) {
+            PyErr_Clear();
+            return 0;
+        } else {
+            return -1;
+        }
+    }
+    return 0;
+#endif
+}
+
 
 /////////////// ObjectGetItem.proto ///////////////
 
@@ -316,8 +316,8 @@ static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key) {
 #endif
 
 
-/////////////// DictGetItem.proto /////////////// 
- 
+/////////////// DictGetItem.proto ///////////////
+
 #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
 static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key);/*proto*/
 
@@ -333,11 +333,11 @@ static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key);/*proto*/
 /////////////// DictGetItem ///////////////
 
 #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
-static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) { 
-    PyObject *value; 
-    value = PyDict_GetItemWithError(d, key); 
-    if (unlikely(!value)) { 
-        if (!PyErr_Occurred()) { 
+static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) {
+    PyObject *value;
+    value = PyDict_GetItemWithError(d, key);
+    if (unlikely(!value)) {
+        if (!PyErr_Occurred()) {
             if (unlikely(PyTuple_Check(key))) {
                 // CPython interprets tuples as separate arguments => must wrap them in another tuple.
                 PyObject* args = PyTuple_Pack(1, key);
@@ -349,48 +349,48 @@ static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) {
                 // Avoid tuple packing if possible.
                 PyErr_SetObject(PyExc_KeyError, key);
             }
-        } 
-        return NULL; 
-    } 
-    Py_INCREF(value); 
-    return value; 
-} 
-#endif 
- 
-/////////////// GetItemInt.proto /////////////// 
- 
-#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) : \ 
-    (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) : \ 
-               __Pyx_GetItemInt_Generic(o, to_py_func(i)))) 
- 
-{{for type in ['List', 'Tuple']}} 
-#define __Pyx_GetItemInt_{{type}}(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_GetItemInt_{{type}}_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \ 
-    (PyErr_SetString(PyExc_IndexError, "{{ type.lower() }} index out of range"), (PyObject*)NULL)) 
- 
-static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ssize_t i, 
-                                                              int wraparound, int boundscheck); 
-{{endfor}} 
- 
+        }
+        return NULL;
+    }
+    Py_INCREF(value);
+    return value;
+}
+#endif
+
+/////////////// GetItemInt.proto ///////////////
+
+#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) : \
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) : \
+               __Pyx_GetItemInt_Generic(o, to_py_func(i))))
+
+{{for type in ['List', 'Tuple']}}
+#define __Pyx_GetItemInt_{{type}}(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_GetItemInt_{{type}}_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \
+    (PyErr_SetString(PyExc_IndexError, "{{ type.lower() }} index out of range"), (PyObject*)NULL))
+
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+{{endfor}}
+
 static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
-static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, 
-                                                     int is_list, int wraparound, int boundscheck); 
- 
-/////////////// GetItemInt /////////////// 
- 
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
+                                                     int is_list, int wraparound, int boundscheck);
+
+/////////////// GetItemInt ///////////////
+
 static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
-    PyObject *r; 
-    if (!j) return NULL; 
-    r = PyObject_GetItem(o, j); 
-    Py_DECREF(j); 
-    return r; 
-} 
- 
-{{for type in ['List', 'Tuple']}} 
-static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ssize_t i, 
+    PyObject *r;
+    if (!j) return NULL;
+    r = PyObject_GetItem(o, j);
+    Py_DECREF(j);
+    return r;
+}
+
+{{for type in ['List', 'Tuple']}}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ssize_t i,
                                                               CYTHON_NCP_UNUSED int wraparound,
                                                               CYTHON_NCP_UNUSED int boundscheck) {
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
@@ -400,658 +400,658 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ss
     }
     if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, Py{{type}}_GET_SIZE(o)))) {
         PyObject *r = Py{{type}}_GET_ITEM(o, wrapped_i);
-        Py_INCREF(r); 
-        return r; 
-    } 
-    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); 
-#else 
-    return PySequence_GetItem(o, i); 
-#endif 
-} 
-{{endfor}} 
- 
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+{{endfor}}
+
 static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list,
                                                      CYTHON_NCP_UNUSED int wraparound,
                                                      CYTHON_NCP_UNUSED int boundscheck) {
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
-    if (is_list || PyList_CheckExact(o)) { 
-        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); 
+    if (is_list || PyList_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
         if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
-            PyObject *r = PyList_GET_ITEM(o, n); 
-            Py_INCREF(r); 
-            return r; 
-        } 
-    } 
-    else if (PyTuple_CheckExact(o)) { 
-        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); 
+            PyObject *r = PyList_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    }
+    else if (PyTuple_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
         if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
-            PyObject *r = PyTuple_GET_ITEM(o, n); 
-            Py_INCREF(r); 
-            return r; 
-        } 
-    } else { 
-        // inlined PySequence_GetItem() + special cased length overflow 
-        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; 
-        if (likely(m && m->sq_item)) { 
-            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { 
-                Py_ssize_t l = m->sq_length(o); 
-                if (likely(l >= 0)) { 
-                    i += l; 
-                } else { 
-                    // if length > max(Py_ssize_t), maybe the object can wrap around itself? 
+            PyObject *r = PyTuple_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    } else {
+        // inlined PySequence_GetItem() + special cased length overflow
+        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
+        if (likely(m && m->sq_item)) {
+            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
+                Py_ssize_t l = m->sq_length(o);
+                if (likely(l >= 0)) {
+                    i += l;
+                } else {
+                    // if length > max(Py_ssize_t), maybe the object can wrap around itself?
                     if (!PyErr_ExceptionMatches(PyExc_OverflowError))
-                        return NULL; 
+                        return NULL;
                     PyErr_Clear();
-                } 
-            } 
-            return m->sq_item(o, i); 
-        } 
-    } 
-#else 
-    if (is_list || PySequence_Check(o)) { 
-        return PySequence_GetItem(o, i); 
-    } 
-#endif 
-    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); 
-} 
- 
-/////////////// SetItemInt.proto /////////////// 
- 
-#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) : \ 
-    (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) : \ 
-               __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) 
- 
+                }
+            }
+            return m->sq_item(o, i);
+        }
+    }
+#else
+    if (is_list || PySequence_Check(o)) {
+        return PySequence_GetItem(o, i);
+    }
+#endif
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+
+/////////////// SetItemInt.proto ///////////////
+
+#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) : \
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) : \
+               __Pyx_SetItemInt_Generic(o, to_py_func(i), v)))
+
 static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v);
-static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, 
-                                               int is_list, int wraparound, int boundscheck); 
- 
-/////////////// SetItemInt /////////////// 
- 
+static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v,
+                                               int is_list, int wraparound, int boundscheck);
+
+/////////////// SetItemInt ///////////////
+
 static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) {
-    int r; 
-    if (!j) return -1; 
-    r = PyObject_SetItem(o, j, v); 
-    Py_DECREF(j); 
-    return r; 
-} 
- 
+    int r;
+    if (!j) return -1;
+    r = PyObject_SetItem(o, j, v);
+    Py_DECREF(j);
+    return r;
+}
+
 static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list,
                                                CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) {
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
-    if (is_list || PyList_CheckExact(o)) { 
-        Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); 
+    if (is_list || PyList_CheckExact(o)) {
+        Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o));
         if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) {
-            PyObject* old = PyList_GET_ITEM(o, n); 
-            Py_INCREF(v); 
-            PyList_SET_ITEM(o, n, v); 
-            Py_DECREF(old); 
-            return 1; 
-        } 
-    } else { 
-        // inlined PySequence_SetItem() + special cased length overflow 
-        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; 
-        if (likely(m && m->sq_ass_item)) { 
-            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { 
-                Py_ssize_t l = m->sq_length(o); 
-                if (likely(l >= 0)) { 
-                    i += l; 
-                } else { 
-                    // if length > max(Py_ssize_t), maybe the object can wrap around itself? 
+            PyObject* old = PyList_GET_ITEM(o, n);
+            Py_INCREF(v);
+            PyList_SET_ITEM(o, n, v);
+            Py_DECREF(old);
+            return 1;
+        }
+    } else {
+        // inlined PySequence_SetItem() + special cased length overflow
+        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
+        if (likely(m && m->sq_ass_item)) {
+            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
+                Py_ssize_t l = m->sq_length(o);
+                if (likely(l >= 0)) {
+                    i += l;
+                } else {
+                    // if length > max(Py_ssize_t), maybe the object can wrap around itself?
                     if (!PyErr_ExceptionMatches(PyExc_OverflowError))
-                        return -1; 
+                        return -1;
                     PyErr_Clear();
-                } 
-            } 
-            return m->sq_ass_item(o, i, v); 
-        } 
-    } 
-#else 
-#if CYTHON_COMPILING_IN_PYPY 
+                }
+            }
+            return m->sq_ass_item(o, i, v);
+        }
+    }
+#else
+#if CYTHON_COMPILING_IN_PYPY
     if (is_list || (PySequence_Check(o) && !PyDict_Check(o)))
-#else 
+#else
     if (is_list || PySequence_Check(o))
-#endif 
+#endif
     {
-        return PySequence_SetItem(o, i, v); 
-    } 
-#endif 
-    return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); 
-} 
- 
- 
-/////////////// DelItemInt.proto /////////////// 
- 
-#define __Pyx_DelItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_DelItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound) : \ 
-    (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) : \ 
-               __Pyx_DelItem_Generic(o, to_py_func(i)))) 
- 
+        return PySequence_SetItem(o, i, v);
+    }
+#endif
+    return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v);
+}
+
+
+/////////////// DelItemInt.proto ///////////////
+
+#define __Pyx_DelItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_DelItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound) : \
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) : \
+               __Pyx_DelItem_Generic(o, to_py_func(i))))
+
 static int __Pyx_DelItem_Generic(PyObject *o, PyObject *j);
-static CYTHON_INLINE int __Pyx_DelItemInt_Fast(PyObject *o, Py_ssize_t i, 
+static CYTHON_INLINE int __Pyx_DelItemInt_Fast(PyObject *o, Py_ssize_t i,
                                                int is_list, int wraparound);
- 
-/////////////// DelItemInt /////////////// 
- 
+
+/////////////// DelItemInt ///////////////
+
 static int __Pyx_DelItem_Generic(PyObject *o, PyObject *j) {
-    int r; 
-    if (!j) return -1; 
-    r = PyObject_DelItem(o, j); 
-    Py_DECREF(j); 
-    return r; 
-} 
- 
-static CYTHON_INLINE int __Pyx_DelItemInt_Fast(PyObject *o, Py_ssize_t i, 
+    int r;
+    if (!j) return -1;
+    r = PyObject_DelItem(o, j);
+    Py_DECREF(j);
+    return r;
+}
+
+static CYTHON_INLINE int __Pyx_DelItemInt_Fast(PyObject *o, Py_ssize_t i,
                                                CYTHON_UNUSED int is_list, CYTHON_NCP_UNUSED int wraparound) {
 #if !CYTHON_USE_TYPE_SLOTS
-    if (is_list || PySequence_Check(o)) { 
-        return PySequence_DelItem(o, i); 
-    } 
-#else 
-    // inlined PySequence_DelItem() + special cased length overflow 
-    PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; 
-    if (likely(m && m->sq_ass_item)) { 
-        if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { 
-            Py_ssize_t l = m->sq_length(o); 
-            if (likely(l >= 0)) { 
-                i += l; 
-            } else { 
-                // if length > max(Py_ssize_t), maybe the object can wrap around itself? 
+    if (is_list || PySequence_Check(o)) {
+        return PySequence_DelItem(o, i);
+    }
+#else
+    // inlined PySequence_DelItem() + special cased length overflow
+    PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
+    if (likely(m && m->sq_ass_item)) {
+        if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
+            Py_ssize_t l = m->sq_length(o);
+            if (likely(l >= 0)) {
+                i += l;
+            } else {
+                // if length > max(Py_ssize_t), maybe the object can wrap around itself?
                 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
-                    return -1; 
+                    return -1;
                 PyErr_Clear();
-            } 
-        } 
-        return m->sq_ass_item(o, i, (PyObject *)NULL); 
-    } 
-#endif 
-    return __Pyx_DelItem_Generic(o, PyInt_FromSsize_t(i)); 
-} 
- 
- 
-/////////////// SliceObject.proto /////////////// 
- 
-// we pass pointer addresses to show the C compiler what is NULL and what isn't 
-{{if access == 'Get'}} 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( 
-        PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, 
-        PyObject** py_start, PyObject** py_stop, PyObject** py_slice, 
-        int has_cstart, int has_cstop, int wraparound); 
-{{else}} 
-#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) \ 
-    __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) 
- 
-// we pass pointer addresses to show the C compiler what is NULL and what isn't 
-static CYTHON_INLINE int __Pyx_PyObject_SetSlice( 
-        PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop, 
-        PyObject** py_start, PyObject** py_stop, PyObject** py_slice, 
-        int has_cstart, int has_cstop, int wraparound); 
-{{endif}} 
- 
-/////////////// SliceObject /////////////// 
- 
-{{if access == 'Get'}} 
+            }
+        }
+        return m->sq_ass_item(o, i, (PyObject *)NULL);
+    }
+#endif
+    return __Pyx_DelItem_Generic(o, PyInt_FromSsize_t(i));
+}
+
+
+/////////////// SliceObject.proto ///////////////
+
+// we pass pointer addresses to show the C compiler what is NULL and what isn't
+{{if access == 'Get'}}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(
+        PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop,
+        PyObject** py_start, PyObject** py_stop, PyObject** py_slice,
+        int has_cstart, int has_cstop, int wraparound);
+{{else}}
+#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) \
+    __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)
+
+// we pass pointer addresses to show the C compiler what is NULL and what isn't
+static CYTHON_INLINE int __Pyx_PyObject_SetSlice(
+        PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop,
+        PyObject** py_start, PyObject** py_stop, PyObject** py_slice,
+        int has_cstart, int has_cstop, int wraparound);
+{{endif}}
+
+/////////////// SliceObject ///////////////
+
+{{if access == 'Get'}}
 static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj,
-{{else}} 
+{{else}}
 static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value,
-{{endif}} 
+{{endif}}
         Py_ssize_t cstart, Py_ssize_t cstop,
-        PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, 
-        int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) { 
+        PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice,
+        int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) {
 #if CYTHON_USE_TYPE_SLOTS
-    PyMappingMethods* mp; 
-#if PY_MAJOR_VERSION < 3 
-    PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; 
-    if (likely(ms && ms->sq_{{if access == 'Set'}}ass_{{endif}}slice)) { 
-        if (!has_cstart) { 
-            if (_py_start && (*_py_start != Py_None)) { 
-                cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); 
-                if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; 
-            } else 
-                cstart = 0; 
-        } 
-        if (!has_cstop) { 
-            if (_py_stop && (*_py_stop != Py_None)) { 
-                cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); 
-                if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; 
-            } else 
-                cstop = PY_SSIZE_T_MAX; 
-        } 
-        if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { 
-            Py_ssize_t l = ms->sq_length(obj); 
-            if (likely(l >= 0)) { 
-                if (cstop < 0) { 
-                    cstop += l; 
-                    if (cstop < 0) cstop = 0; 
-                } 
-                if (cstart < 0) { 
-                    cstart += l; 
-                    if (cstart < 0) cstart = 0; 
-                } 
-            } else { 
-                // if length > max(Py_ssize_t), maybe the object can wrap around itself? 
+    PyMappingMethods* mp;
+#if PY_MAJOR_VERSION < 3
+    PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence;
+    if (likely(ms && ms->sq_{{if access == 'Set'}}ass_{{endif}}slice)) {
+        if (!has_cstart) {
+            if (_py_start && (*_py_start != Py_None)) {
+                cstart = __Pyx_PyIndex_AsSsize_t(*_py_start);
+                if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+            } else
+                cstart = 0;
+        }
+        if (!has_cstop) {
+            if (_py_stop && (*_py_stop != Py_None)) {
+                cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop);
+                if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+            } else
+                cstop = PY_SSIZE_T_MAX;
+        }
+        if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) {
+            Py_ssize_t l = ms->sq_length(obj);
+            if (likely(l >= 0)) {
+                if (cstop < 0) {
+                    cstop += l;
+                    if (cstop < 0) cstop = 0;
+                }
+                if (cstart < 0) {
+                    cstart += l;
+                    if (cstart < 0) cstart = 0;
+                }
+            } else {
+                // if length > max(Py_ssize_t), maybe the object can wrap around itself?
                 if (!PyErr_ExceptionMatches(PyExc_OverflowError))
-                    goto bad; 
+                    goto bad;
                 PyErr_Clear();
-            } 
-        } 
-{{if access == 'Get'}} 
-        return ms->sq_slice(obj, cstart, cstop); 
-{{else}} 
-        return ms->sq_ass_slice(obj, cstart, cstop, value); 
-{{endif}} 
-    } 
-#endif 
- 
-    mp = Py_TYPE(obj)->tp_as_mapping; 
-{{if access == 'Get'}} 
-    if (likely(mp && mp->mp_subscript)) 
-{{else}} 
-    if (likely(mp && mp->mp_ass_subscript)) 
-{{endif}} 
-#endif 
-    { 
-        {{if access == 'Get'}}PyObject*{{else}}int{{endif}} result; 
-        PyObject *py_slice, *py_start, *py_stop; 
-        if (_py_slice) { 
-            py_slice = *_py_slice; 
-        } else { 
-            PyObject* owned_start = NULL; 
-            PyObject* owned_stop = NULL; 
-            if (_py_start) { 
-                py_start = *_py_start; 
-            } else { 
-                if (has_cstart) { 
-                    owned_start = py_start = PyInt_FromSsize_t(cstart); 
-                    if (unlikely(!py_start)) goto bad; 
-                } else 
-                    py_start = Py_None; 
-            } 
-            if (_py_stop) { 
-                py_stop = *_py_stop; 
-            } else { 
-                if (has_cstop) { 
-                    owned_stop = py_stop = PyInt_FromSsize_t(cstop); 
-                    if (unlikely(!py_stop)) { 
-                        Py_XDECREF(owned_start); 
-                        goto bad; 
-                    } 
-                } else 
-                    py_stop = Py_None; 
-            } 
-            py_slice = PySlice_New(py_start, py_stop, Py_None); 
-            Py_XDECREF(owned_start); 
-            Py_XDECREF(owned_stop); 
-            if (unlikely(!py_slice)) goto bad; 
-        } 
+            }
+        }
+{{if access == 'Get'}}
+        return ms->sq_slice(obj, cstart, cstop);
+{{else}}
+        return ms->sq_ass_slice(obj, cstart, cstop, value);
+{{endif}}
+    }
+#endif
+
+    mp = Py_TYPE(obj)->tp_as_mapping;
+{{if access == 'Get'}}
+    if (likely(mp && mp->mp_subscript))
+{{else}}
+    if (likely(mp && mp->mp_ass_subscript))
+{{endif}}
+#endif
+    {
+        {{if access == 'Get'}}PyObject*{{else}}int{{endif}} result;
+        PyObject *py_slice, *py_start, *py_stop;
+        if (_py_slice) {
+            py_slice = *_py_slice;
+        } else {
+            PyObject* owned_start = NULL;
+            PyObject* owned_stop = NULL;
+            if (_py_start) {
+                py_start = *_py_start;
+            } else {
+                if (has_cstart) {
+                    owned_start = py_start = PyInt_FromSsize_t(cstart);
+                    if (unlikely(!py_start)) goto bad;
+                } else
+                    py_start = Py_None;
+            }
+            if (_py_stop) {
+                py_stop = *_py_stop;
+            } else {
+                if (has_cstop) {
+                    owned_stop = py_stop = PyInt_FromSsize_t(cstop);
+                    if (unlikely(!py_stop)) {
+                        Py_XDECREF(owned_start);
+                        goto bad;
+                    }
+                } else
+                    py_stop = Py_None;
+            }
+            py_slice = PySlice_New(py_start, py_stop, Py_None);
+            Py_XDECREF(owned_start);
+            Py_XDECREF(owned_stop);
+            if (unlikely(!py_slice)) goto bad;
+        }
 #if CYTHON_USE_TYPE_SLOTS
-{{if access == 'Get'}} 
-        result = mp->mp_subscript(obj, py_slice); 
-#else 
-        result = PyObject_GetItem(obj, py_slice); 
-{{else}} 
-        result = mp->mp_ass_subscript(obj, py_slice, value); 
-#else 
-        result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice); 
-{{endif}} 
-#endif 
-        if (!_py_slice) { 
-            Py_DECREF(py_slice); 
-        } 
-        return result; 
-    } 
-    PyErr_Format(PyExc_TypeError, 
-{{if access == 'Get'}} 
-        "'%.200s' object is unsliceable", Py_TYPE(obj)->tp_name); 
-{{else}} 
-        "'%.200s' object does not support slice %.10s", 
-        Py_TYPE(obj)->tp_name, value ? "assignment" : "deletion"); 
-{{endif}} 
- 
-bad: 
-    return {{if access == 'Get'}}NULL{{else}}-1{{endif}}; 
-} 
- 
- 
-/////////////// SliceTupleAndList.proto /////////////// 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyList_GetSlice(PyObject* src, Py_ssize_t start, Py_ssize_t stop); 
-static CYTHON_INLINE PyObject* __Pyx_PyTuple_GetSlice(PyObject* src, Py_ssize_t start, Py_ssize_t stop); 
-#else 
-#define __Pyx_PyList_GetSlice(seq, start, stop)   PySequence_GetSlice(seq, start, stop) 
-#define __Pyx_PyTuple_GetSlice(seq, start, stop)  PySequence_GetSlice(seq, start, stop) 
-#endif 
- 
-/////////////// SliceTupleAndList /////////////// 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE void __Pyx_crop_slice(Py_ssize_t* _start, Py_ssize_t* _stop, Py_ssize_t* _length) { 
-    Py_ssize_t start = *_start, stop = *_stop, length = *_length; 
-    if (start < 0) { 
-        start += length; 
-        if (start < 0) 
-            start = 0; 
-    } 
- 
-    if (stop < 0) 
-        stop += length; 
-    else if (stop > length) 
-        stop = length; 
- 
-    *_length = stop - start; 
-    *_start = start; 
-    *_stop = stop; 
-} 
- 
-static CYTHON_INLINE void __Pyx_copy_object_array(PyObject** CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { 
-    PyObject *v; 
-    Py_ssize_t i; 
-    for (i = 0; i < length; i++) { 
-        v = dest[i] = src[i]; 
-        Py_INCREF(v); 
-    } 
-} 
- 
-{{for type in ['List', 'Tuple']}} 
-static CYTHON_INLINE PyObject* __Pyx_Py{{type}}_GetSlice( 
-            PyObject* src, Py_ssize_t start, Py_ssize_t stop) { 
-    PyObject* dest; 
-    Py_ssize_t length = Py{{type}}_GET_SIZE(src); 
-    __Pyx_crop_slice(&start, &stop, &length); 
-    if (unlikely(length <= 0)) 
-        return Py{{type}}_New(0); 
- 
-    dest = Py{{type}}_New(length); 
-    if (unlikely(!dest)) 
-        return NULL; 
-    __Pyx_copy_object_array( 
-        ((Py{{type}}Object*)src)->ob_item + start, 
-        ((Py{{type}}Object*)dest)->ob_item, 
-        length); 
-    return dest; 
-} 
-{{endfor}} 
-#endif 
- 
- 
-/////////////// CalculateMetaclass.proto /////////////// 
- 
-static PyObject *__Pyx_CalculateMetaclass(PyTypeObject *metaclass, PyObject *bases); 
- 
-/////////////// CalculateMetaclass /////////////// 
- 
-static PyObject *__Pyx_CalculateMetaclass(PyTypeObject *metaclass, PyObject *bases) { 
-    Py_ssize_t i, nbases = PyTuple_GET_SIZE(bases); 
-    for (i=0; i < nbases; i++) { 
-        PyTypeObject *tmptype; 
-        PyObject *tmp = PyTuple_GET_ITEM(bases, i); 
-        tmptype = Py_TYPE(tmp); 
-#if PY_MAJOR_VERSION < 3 
-        if (tmptype == &PyClass_Type) 
-            continue; 
-#endif 
-        if (!metaclass) { 
-            metaclass = tmptype; 
-            continue; 
-        } 
-        if (PyType_IsSubtype(metaclass, tmptype)) 
-            continue; 
-        if (PyType_IsSubtype(tmptype, metaclass)) { 
-            metaclass = tmptype; 
-            continue; 
-        } 
-        // else: 
-        PyErr_SetString(PyExc_TypeError, 
-                        "metaclass conflict: " 
-                        "the metaclass of a derived class " 
-                        "must be a (non-strict) subclass " 
-                        "of the metaclasses of all its bases"); 
-        return NULL; 
-    } 
-    if (!metaclass) { 
-#if PY_MAJOR_VERSION < 3 
-        metaclass = &PyClass_Type; 
-#else 
-        metaclass = &PyType_Type; 
-#endif 
-    } 
-    // make owned reference 
-    Py_INCREF((PyObject*) metaclass); 
-    return (PyObject*) metaclass; 
-} 
- 
- 
-/////////////// FindInheritedMetaclass.proto /////////////// 
- 
-static PyObject *__Pyx_FindInheritedMetaclass(PyObject *bases); /*proto*/ 
- 
-/////////////// FindInheritedMetaclass /////////////// 
-//@requires: PyObjectGetAttrStr 
-//@requires: CalculateMetaclass 
- 
-static PyObject *__Pyx_FindInheritedMetaclass(PyObject *bases) { 
-    PyObject *metaclass; 
-    if (PyTuple_Check(bases) && PyTuple_GET_SIZE(bases) > 0) { 
-        PyTypeObject *metatype; 
+{{if access == 'Get'}}
+        result = mp->mp_subscript(obj, py_slice);
+#else
+        result = PyObject_GetItem(obj, py_slice);
+{{else}}
+        result = mp->mp_ass_subscript(obj, py_slice, value);
+#else
+        result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice);
+{{endif}}
+#endif
+        if (!_py_slice) {
+            Py_DECREF(py_slice);
+        }
+        return result;
+    }
+    PyErr_Format(PyExc_TypeError,
+{{if access == 'Get'}}
+        "'%.200s' object is unsliceable", Py_TYPE(obj)->tp_name);
+{{else}}
+        "'%.200s' object does not support slice %.10s",
+        Py_TYPE(obj)->tp_name, value ? "assignment" : "deletion");
+{{endif}}
+
+bad:
+    return {{if access == 'Get'}}NULL{{else}}-1{{endif}};
+}
+
+
+/////////////// SliceTupleAndList.proto ///////////////
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyList_GetSlice(PyObject* src, Py_ssize_t start, Py_ssize_t stop);
+static CYTHON_INLINE PyObject* __Pyx_PyTuple_GetSlice(PyObject* src, Py_ssize_t start, Py_ssize_t stop);
+#else
+#define __Pyx_PyList_GetSlice(seq, start, stop)   PySequence_GetSlice(seq, start, stop)
+#define __Pyx_PyTuple_GetSlice(seq, start, stop)  PySequence_GetSlice(seq, start, stop)
+#endif
+
+/////////////// SliceTupleAndList ///////////////
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE void __Pyx_crop_slice(Py_ssize_t* _start, Py_ssize_t* _stop, Py_ssize_t* _length) {
+    Py_ssize_t start = *_start, stop = *_stop, length = *_length;
+    if (start < 0) {
+        start += length;
+        if (start < 0)
+            start = 0;
+    }
+
+    if (stop < 0)
+        stop += length;
+    else if (stop > length)
+        stop = length;
+
+    *_length = stop - start;
+    *_start = start;
+    *_stop = stop;
+}
+
+static CYTHON_INLINE void __Pyx_copy_object_array(PyObject** CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) {
+    PyObject *v;
+    Py_ssize_t i;
+    for (i = 0; i < length; i++) {
+        v = dest[i] = src[i];
+        Py_INCREF(v);
+    }
+}
+
+{{for type in ['List', 'Tuple']}}
+static CYTHON_INLINE PyObject* __Pyx_Py{{type}}_GetSlice(
+            PyObject* src, Py_ssize_t start, Py_ssize_t stop) {
+    PyObject* dest;
+    Py_ssize_t length = Py{{type}}_GET_SIZE(src);
+    __Pyx_crop_slice(&start, &stop, &length);
+    if (unlikely(length <= 0))
+        return Py{{type}}_New(0);
+
+    dest = Py{{type}}_New(length);
+    if (unlikely(!dest))
+        return NULL;
+    __Pyx_copy_object_array(
+        ((Py{{type}}Object*)src)->ob_item + start,
+        ((Py{{type}}Object*)dest)->ob_item,
+        length);
+    return dest;
+}
+{{endfor}}
+#endif
+
+
+/////////////// CalculateMetaclass.proto ///////////////
+
+static PyObject *__Pyx_CalculateMetaclass(PyTypeObject *metaclass, PyObject *bases);
+
+/////////////// CalculateMetaclass ///////////////
+
+static PyObject *__Pyx_CalculateMetaclass(PyTypeObject *metaclass, PyObject *bases) {
+    Py_ssize_t i, nbases = PyTuple_GET_SIZE(bases);
+    for (i=0; i < nbases; i++) {
+        PyTypeObject *tmptype;
+        PyObject *tmp = PyTuple_GET_ITEM(bases, i);
+        tmptype = Py_TYPE(tmp);
+#if PY_MAJOR_VERSION < 3
+        if (tmptype == &PyClass_Type)
+            continue;
+#endif
+        if (!metaclass) {
+            metaclass = tmptype;
+            continue;
+        }
+        if (PyType_IsSubtype(metaclass, tmptype))
+            continue;
+        if (PyType_IsSubtype(tmptype, metaclass)) {
+            metaclass = tmptype;
+            continue;
+        }
+        // else:
+        PyErr_SetString(PyExc_TypeError,
+                        "metaclass conflict: "
+                        "the metaclass of a derived class "
+                        "must be a (non-strict) subclass "
+                        "of the metaclasses of all its bases");
+        return NULL;
+    }
+    if (!metaclass) {
+#if PY_MAJOR_VERSION < 3
+        metaclass = &PyClass_Type;
+#else
+        metaclass = &PyType_Type;
+#endif
+    }
+    // make owned reference
+    Py_INCREF((PyObject*) metaclass);
+    return (PyObject*) metaclass;
+}
+
+
+/////////////// FindInheritedMetaclass.proto ///////////////
+
+static PyObject *__Pyx_FindInheritedMetaclass(PyObject *bases); /*proto*/
+
+/////////////// FindInheritedMetaclass ///////////////
+//@requires: PyObjectGetAttrStr
+//@requires: CalculateMetaclass
+
+static PyObject *__Pyx_FindInheritedMetaclass(PyObject *bases) {
+    PyObject *metaclass;
+    if (PyTuple_Check(bases) && PyTuple_GET_SIZE(bases) > 0) {
+        PyTypeObject *metatype;
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-        PyObject *base = PyTuple_GET_ITEM(bases, 0); 
+        PyObject *base = PyTuple_GET_ITEM(bases, 0);
 #else
         PyObject *base = PySequence_ITEM(bases, 0);
 #endif
-#if PY_MAJOR_VERSION < 3 
-        PyObject* basetype = __Pyx_PyObject_GetAttrStr(base, PYIDENT("__class__")); 
-        if (basetype) { 
-            metatype = (PyType_Check(basetype)) ? ((PyTypeObject*) basetype) : NULL; 
-        } else { 
-            PyErr_Clear(); 
-            metatype = Py_TYPE(base); 
-            basetype = (PyObject*) metatype; 
-            Py_INCREF(basetype); 
-        } 
-#else 
-        metatype = Py_TYPE(base); 
-#endif 
-        metaclass = __Pyx_CalculateMetaclass(metatype, bases); 
+#if PY_MAJOR_VERSION < 3
+        PyObject* basetype = __Pyx_PyObject_GetAttrStr(base, PYIDENT("__class__"));
+        if (basetype) {
+            metatype = (PyType_Check(basetype)) ? ((PyTypeObject*) basetype) : NULL;
+        } else {
+            PyErr_Clear();
+            metatype = Py_TYPE(base);
+            basetype = (PyObject*) metatype;
+            Py_INCREF(basetype);
+        }
+#else
+        metatype = Py_TYPE(base);
+#endif
+        metaclass = __Pyx_CalculateMetaclass(metatype, bases);
 #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
         Py_DECREF(base);
 #endif
-#if PY_MAJOR_VERSION < 3 
-        Py_DECREF(basetype); 
-#endif 
-    } else { 
-        // no bases => use default metaclass 
-#if PY_MAJOR_VERSION < 3 
-        metaclass = (PyObject *) &PyClass_Type; 
-#else 
-        metaclass = (PyObject *) &PyType_Type; 
-#endif 
-        Py_INCREF(metaclass); 
-    } 
-    return metaclass; 
-} 
- 
-/////////////// Py3MetaclassGet.proto /////////////// 
- 
-static PyObject *__Pyx_Py3MetaclassGet(PyObject *bases, PyObject *mkw); /*proto*/ 
- 
-/////////////// Py3MetaclassGet /////////////// 
-//@requires: FindInheritedMetaclass 
-//@requires: CalculateMetaclass 
- 
-static PyObject *__Pyx_Py3MetaclassGet(PyObject *bases, PyObject *mkw) { 
+#if PY_MAJOR_VERSION < 3
+        Py_DECREF(basetype);
+#endif
+    } else {
+        // no bases => use default metaclass
+#if PY_MAJOR_VERSION < 3
+        metaclass = (PyObject *) &PyClass_Type;
+#else
+        metaclass = (PyObject *) &PyType_Type;
+#endif
+        Py_INCREF(metaclass);
+    }
+    return metaclass;
+}
+
+/////////////// Py3MetaclassGet.proto ///////////////
+
+static PyObject *__Pyx_Py3MetaclassGet(PyObject *bases, PyObject *mkw); /*proto*/
+
+/////////////// Py3MetaclassGet ///////////////
+//@requires: FindInheritedMetaclass
+//@requires: CalculateMetaclass
+
+static PyObject *__Pyx_Py3MetaclassGet(PyObject *bases, PyObject *mkw) {
     PyObject *metaclass = mkw ? __Pyx_PyDict_GetItemStr(mkw, PYIDENT("metaclass")) : NULL;
-    if (metaclass) { 
-        Py_INCREF(metaclass); 
-        if (PyDict_DelItem(mkw, PYIDENT("metaclass")) < 0) { 
-            Py_DECREF(metaclass); 
-            return NULL; 
-        } 
-        if (PyType_Check(metaclass)) { 
-            PyObject* orig = metaclass; 
-            metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases); 
-            Py_DECREF(orig); 
-        } 
-        return metaclass; 
-    } 
-    return __Pyx_FindInheritedMetaclass(bases); 
-} 
- 
-/////////////// CreateClass.proto /////////////// 
- 
-static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, 
-                                   PyObject *qualname, PyObject *modname); /*proto*/ 
- 
-/////////////// CreateClass /////////////// 
-//@requires: FindInheritedMetaclass 
-//@requires: CalculateMetaclass 
- 
-static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name, 
-                                   PyObject *qualname, PyObject *modname) { 
-    PyObject *result; 
-    PyObject *metaclass; 
- 
-    if (PyDict_SetItem(dict, PYIDENT("__module__"), modname) < 0) 
-        return NULL; 
-    if (PyDict_SetItem(dict, PYIDENT("__qualname__"), qualname) < 0) 
-        return NULL; 
- 
-    /* Python2 __metaclass__ */ 
+    if (metaclass) {
+        Py_INCREF(metaclass);
+        if (PyDict_DelItem(mkw, PYIDENT("metaclass")) < 0) {
+            Py_DECREF(metaclass);
+            return NULL;
+        }
+        if (PyType_Check(metaclass)) {
+            PyObject* orig = metaclass;
+            metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases);
+            Py_DECREF(orig);
+        }
+        return metaclass;
+    }
+    return __Pyx_FindInheritedMetaclass(bases);
+}
+
+/////////////// CreateClass.proto ///////////////
+
+static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name,
+                                   PyObject *qualname, PyObject *modname); /*proto*/
+
+/////////////// CreateClass ///////////////
+//@requires: FindInheritedMetaclass
+//@requires: CalculateMetaclass
+
+static PyObject *__Pyx_CreateClass(PyObject *bases, PyObject *dict, PyObject *name,
+                                   PyObject *qualname, PyObject *modname) {
+    PyObject *result;
+    PyObject *metaclass;
+
+    if (PyDict_SetItem(dict, PYIDENT("__module__"), modname) < 0)
+        return NULL;
+    if (PyDict_SetItem(dict, PYIDENT("__qualname__"), qualname) < 0)
+        return NULL;
+
+    /* Python2 __metaclass__ */
     metaclass = __Pyx_PyDict_GetItemStr(dict, PYIDENT("__metaclass__"));
-    if (metaclass) { 
-        Py_INCREF(metaclass); 
-        if (PyType_Check(metaclass)) { 
-            PyObject* orig = metaclass; 
-            metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases); 
-            Py_DECREF(orig); 
-        } 
-    } else { 
-        metaclass = __Pyx_FindInheritedMetaclass(bases); 
-    } 
-    if (unlikely(!metaclass)) 
-        return NULL; 
-    result = PyObject_CallFunctionObjArgs(metaclass, name, bases, dict, NULL); 
-    Py_DECREF(metaclass); 
-    return result; 
-} 
- 
-/////////////// Py3ClassCreate.proto /////////////// 
- 
-static PyObject *__Pyx_Py3MetaclassPrepare(PyObject *metaclass, PyObject *bases, PyObject *name, PyObject *qualname, 
-                                           PyObject *mkw, PyObject *modname, PyObject *doc); /*proto*/ 
-static PyObject *__Pyx_Py3ClassCreate(PyObject *metaclass, PyObject *name, PyObject *bases, PyObject *dict, 
-                                      PyObject *mkw, int calculate_metaclass, int allow_py2_metaclass); /*proto*/ 
- 
-/////////////// Py3ClassCreate /////////////// 
-//@requires: PyObjectGetAttrStr 
-//@requires: CalculateMetaclass 
- 
-static PyObject *__Pyx_Py3MetaclassPrepare(PyObject *metaclass, PyObject *bases, PyObject *name, 
-                                           PyObject *qualname, PyObject *mkw, PyObject *modname, PyObject *doc) { 
-    PyObject *ns; 
-    if (metaclass) { 
-        PyObject *prep = __Pyx_PyObject_GetAttrStr(metaclass, PYIDENT("__prepare__")); 
-        if (prep) { 
-            PyObject *pargs = PyTuple_Pack(2, name, bases); 
-            if (unlikely(!pargs)) { 
-                Py_DECREF(prep); 
-                return NULL; 
-            } 
-            ns = PyObject_Call(prep, pargs, mkw); 
-            Py_DECREF(prep); 
-            Py_DECREF(pargs); 
-        } else { 
-            if (unlikely(!PyErr_ExceptionMatches(PyExc_AttributeError))) 
-                return NULL; 
-            PyErr_Clear(); 
-            ns = PyDict_New(); 
-        } 
-    } else { 
-        ns = PyDict_New(); 
-    } 
- 
-    if (unlikely(!ns)) 
-        return NULL; 
- 
-    /* Required here to emulate assignment order */ 
-    if (unlikely(PyObject_SetItem(ns, PYIDENT("__module__"), modname) < 0)) goto bad; 
-    if (unlikely(PyObject_SetItem(ns, PYIDENT("__qualname__"), qualname) < 0)) goto bad; 
-    if (unlikely(doc && PyObject_SetItem(ns, PYIDENT("__doc__"), doc) < 0)) goto bad; 
-    return ns; 
-bad: 
-    Py_DECREF(ns); 
-    return NULL; 
-} 
- 
-static PyObject *__Pyx_Py3ClassCreate(PyObject *metaclass, PyObject *name, PyObject *bases, 
-                                      PyObject *dict, PyObject *mkw, 
-                                      int calculate_metaclass, int allow_py2_metaclass) { 
-    PyObject *result, *margs; 
-    PyObject *owned_metaclass = NULL; 
-    if (allow_py2_metaclass) { 
-        /* honour Python2 __metaclass__ for backward compatibility */ 
-        owned_metaclass = PyObject_GetItem(dict, PYIDENT("__metaclass__")); 
-        if (owned_metaclass) { 
-            metaclass = owned_metaclass; 
-        } else if (likely(PyErr_ExceptionMatches(PyExc_KeyError))) { 
-            PyErr_Clear(); 
-        } else { 
-            return NULL; 
-        } 
-    } 
-    if (calculate_metaclass && (!metaclass || PyType_Check(metaclass))) { 
-        metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases); 
-        Py_XDECREF(owned_metaclass); 
-        if (unlikely(!metaclass)) 
-            return NULL; 
-        owned_metaclass = metaclass; 
-    } 
-    margs = PyTuple_Pack(3, name, bases, dict); 
-    if (unlikely(!margs)) { 
-        result = NULL; 
-    } else { 
-        result = PyObject_Call(metaclass, margs, mkw); 
-        Py_DECREF(margs); 
-    } 
-    Py_XDECREF(owned_metaclass); 
-    return result; 
-} 
- 
-/////////////// ExtTypeTest.proto /////////////// 
- 
-static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/ 
- 
-/////////////// ExtTypeTest /////////////// 
- 
-static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { 
-    if (unlikely(!type)) { 
-        PyErr_SetString(PyExc_SystemError, "Missing type object"); 
-        return 0; 
-    } 
+    if (metaclass) {
+        Py_INCREF(metaclass);
+        if (PyType_Check(metaclass)) {
+            PyObject* orig = metaclass;
+            metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases);
+            Py_DECREF(orig);
+        }
+    } else {
+        metaclass = __Pyx_FindInheritedMetaclass(bases);
+    }
+    if (unlikely(!metaclass))
+        return NULL;
+    result = PyObject_CallFunctionObjArgs(metaclass, name, bases, dict, NULL);
+    Py_DECREF(metaclass);
+    return result;
+}
+
+/////////////// Py3ClassCreate.proto ///////////////
+
+static PyObject *__Pyx_Py3MetaclassPrepare(PyObject *metaclass, PyObject *bases, PyObject *name, PyObject *qualname,
+                                           PyObject *mkw, PyObject *modname, PyObject *doc); /*proto*/
+static PyObject *__Pyx_Py3ClassCreate(PyObject *metaclass, PyObject *name, PyObject *bases, PyObject *dict,
+                                      PyObject *mkw, int calculate_metaclass, int allow_py2_metaclass); /*proto*/
+
+/////////////// Py3ClassCreate ///////////////
+//@requires: PyObjectGetAttrStr
+//@requires: CalculateMetaclass
+
+static PyObject *__Pyx_Py3MetaclassPrepare(PyObject *metaclass, PyObject *bases, PyObject *name,
+                                           PyObject *qualname, PyObject *mkw, PyObject *modname, PyObject *doc) {
+    PyObject *ns;
+    if (metaclass) {
+        PyObject *prep = __Pyx_PyObject_GetAttrStr(metaclass, PYIDENT("__prepare__"));
+        if (prep) {
+            PyObject *pargs = PyTuple_Pack(2, name, bases);
+            if (unlikely(!pargs)) {
+                Py_DECREF(prep);
+                return NULL;
+            }
+            ns = PyObject_Call(prep, pargs, mkw);
+            Py_DECREF(prep);
+            Py_DECREF(pargs);
+        } else {
+            if (unlikely(!PyErr_ExceptionMatches(PyExc_AttributeError)))
+                return NULL;
+            PyErr_Clear();
+            ns = PyDict_New();
+        }
+    } else {
+        ns = PyDict_New();
+    }
+
+    if (unlikely(!ns))
+        return NULL;
+
+    /* Required here to emulate assignment order */
+    if (unlikely(PyObject_SetItem(ns, PYIDENT("__module__"), modname) < 0)) goto bad;
+    if (unlikely(PyObject_SetItem(ns, PYIDENT("__qualname__"), qualname) < 0)) goto bad;
+    if (unlikely(doc && PyObject_SetItem(ns, PYIDENT("__doc__"), doc) < 0)) goto bad;
+    return ns;
+bad:
+    Py_DECREF(ns);
+    return NULL;
+}
+
+static PyObject *__Pyx_Py3ClassCreate(PyObject *metaclass, PyObject *name, PyObject *bases,
+                                      PyObject *dict, PyObject *mkw,
+                                      int calculate_metaclass, int allow_py2_metaclass) {
+    PyObject *result, *margs;
+    PyObject *owned_metaclass = NULL;
+    if (allow_py2_metaclass) {
+        /* honour Python2 __metaclass__ for backward compatibility */
+        owned_metaclass = PyObject_GetItem(dict, PYIDENT("__metaclass__"));
+        if (owned_metaclass) {
+            metaclass = owned_metaclass;
+        } else if (likely(PyErr_ExceptionMatches(PyExc_KeyError))) {
+            PyErr_Clear();
+        } else {
+            return NULL;
+        }
+    }
+    if (calculate_metaclass && (!metaclass || PyType_Check(metaclass))) {
+        metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases);
+        Py_XDECREF(owned_metaclass);
+        if (unlikely(!metaclass))
+            return NULL;
+        owned_metaclass = metaclass;
+    }
+    margs = PyTuple_Pack(3, name, bases, dict);
+    if (unlikely(!margs)) {
+        result = NULL;
+    } else {
+        result = PyObject_Call(metaclass, margs, mkw);
+        Py_DECREF(margs);
+    }
+    Py_XDECREF(owned_metaclass);
+    return result;
+}
+
+/////////////// ExtTypeTest.proto ///////////////
+
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/
+
+/////////////// ExtTypeTest ///////////////
+
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
     if (likely(__Pyx_TypeCheck(obj, type)))
-        return 1; 
-    PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s", 
-                 Py_TYPE(obj)->tp_name, type->tp_name); 
-    return 0; 
-} 
- 
-/////////////// CallableCheck.proto /////////////// 
- 
+        return 1;
+    PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s",
+                 Py_TYPE(obj)->tp_name, type->tp_name);
+    return 0;
+}
+
+/////////////// CallableCheck.proto ///////////////
+
 #if CYTHON_USE_TYPE_SLOTS && PY_MAJOR_VERSION >= 3
 #define __Pyx_PyCallable_Check(obj)   (Py_TYPE(obj)->tp_call != NULL)
-#else 
-#define __Pyx_PyCallable_Check(obj)   PyCallable_Check(obj) 
-#endif 
- 
-/////////////// PyDictContains.proto /////////////// 
- 
+#else
+#define __Pyx_PyCallable_Check(obj)   PyCallable_Check(obj)
+#endif
+
+/////////////// PyDictContains.proto ///////////////
+
 static CYTHON_INLINE int __Pyx_PyDict_ContainsTF(PyObject* item, PyObject* dict, int eq) {
-    int result = PyDict_Contains(dict, item); 
-    return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); 
-} 
- 
+    int result = PyDict_Contains(dict, item);
+    return unlikely(result < 0) ? result : (result == (eq == Py_EQ));
+}
+
 /////////////// PySetContains.proto ///////////////
 
 static CYTHON_INLINE int __Pyx_PySet_ContainsTF(PyObject* key, PyObject* set, int eq); /* proto */
@@ -1083,52 +1083,52 @@ static CYTHON_INLINE int __Pyx_PySet_ContainsTF(PyObject* key, PyObject* set, in
     return unlikely(result < 0) ? result : (result == (eq == Py_EQ));
 }
 
-/////////////// PySequenceContains.proto /////////////// 
- 
+/////////////// PySequenceContains.proto ///////////////
+
 static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* seq, int eq) {
-    int result = PySequence_Contains(seq, item); 
-    return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); 
-} 
- 
-/////////////// PyBoolOrNullFromLong.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyBoolOrNull_FromLong(long b) { 
-    return unlikely(b < 0) ? NULL : __Pyx_PyBool_FromLong(b); 
-} 
- 
-/////////////// GetBuiltinName.proto /////////////// 
- 
-static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/ 
- 
-/////////////// GetBuiltinName /////////////// 
-//@requires: PyObjectGetAttrStr 
-//@substitute: naming 
- 
-static PyObject *__Pyx_GetBuiltinName(PyObject *name) { 
-    PyObject* result = __Pyx_PyObject_GetAttrStr($builtins_cname, name); 
-    if (unlikely(!result)) { 
-        PyErr_Format(PyExc_NameError, 
-#if PY_MAJOR_VERSION >= 3 
-            "name '%U' is not defined", name); 
-#else 
-            "name '%.200s' is not defined", PyString_AS_STRING(name)); 
-#endif 
-    } 
-    return result; 
-} 
- 
-/////////////// GetNameInClass.proto /////////////// 
- 
+    int result = PySequence_Contains(seq, item);
+    return unlikely(result < 0) ? result : (result == (eq == Py_EQ));
+}
+
+/////////////// PyBoolOrNullFromLong.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyBoolOrNull_FromLong(long b) {
+    return unlikely(b < 0) ? NULL : __Pyx_PyBool_FromLong(b);
+}
+
+/////////////// GetBuiltinName.proto ///////////////
+
+static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/
+
+/////////////// GetBuiltinName ///////////////
+//@requires: PyObjectGetAttrStr
+//@substitute: naming
+
+static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+    PyObject* result = __Pyx_PyObject_GetAttrStr($builtins_cname, name);
+    if (unlikely(!result)) {
+        PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+            "name '%U' is not defined", name);
+#else
+            "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+    }
+    return result;
+}
+
+/////////////// GetNameInClass.proto ///////////////
+
 #define __Pyx_GetNameInClass(var, nmspace, name)  (var) = __Pyx__GetNameInClass(nmspace, name)
 static PyObject *__Pyx__GetNameInClass(PyObject *nmspace, PyObject *name); /*proto*/
- 
-/////////////// GetNameInClass /////////////// 
-//@requires: PyObjectGetAttrStr 
-//@requires: GetModuleGlobalName 
+
+/////////////// GetNameInClass ///////////////
+//@requires: PyObjectGetAttrStr
+//@requires: GetModuleGlobalName
 //@requires: Exceptions.c::PyThreadStateGet
 //@requires: Exceptions.c::PyErrFetchRestore
 //@requires: Exceptions.c::PyErrExceptionMatches
- 
+
 static PyObject *__Pyx_GetGlobalNameAfterAttributeLookup(PyObject *name) {
     PyObject *result;
     __Pyx_PyThreadState_declare
@@ -1141,14 +1141,14 @@ static PyObject *__Pyx_GetGlobalNameAfterAttributeLookup(PyObject *name) {
 }
 
 static PyObject *__Pyx__GetNameInClass(PyObject *nmspace, PyObject *name) {
-    PyObject *result; 
-    result = __Pyx_PyObject_GetAttrStr(nmspace, name); 
+    PyObject *result;
+    result = __Pyx_PyObject_GetAttrStr(nmspace, name);
     if (!result) {
         result = __Pyx_GetGlobalNameAfterAttributeLookup(name);
     }
-    return result; 
-} 
- 
+    return result;
+}
+
 
 /////////////// SetNameInClass.proto ///////////////
 
@@ -1164,10 +1164,10 @@ static PyObject *__Pyx__GetNameInClass(PyObject *nmspace, PyObject *name) {
 #endif
 
 
-/////////////// GetModuleGlobalName.proto /////////////// 
+/////////////// GetModuleGlobalName.proto ///////////////
 //@requires: PyDictVersioning
 //@substitute: naming
- 
+
 #if CYTHON_USE_DICT_VERSIONS
 #define __Pyx_GetModuleGlobalName(var, name)  { \
     static PY_UINT64_T __pyx_dict_version = 0; \
@@ -1187,19 +1187,19 @@ static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_ve
 #define __Pyx_GetModuleGlobalNameUncached(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
 static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); /*proto*/
 #endif
- 
 
-/////////////// GetModuleGlobalName /////////////// 
-//@requires: GetBuiltinName 
-//@substitute: naming 
- 
+
+/////////////// GetModuleGlobalName ///////////////
+//@requires: GetBuiltinName
+//@substitute: naming
+
 #if CYTHON_USE_DICT_VERSIONS
 static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value)
 #else
 static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
 #endif
 {
-    PyObject *result; 
+    PyObject *result;
 #if !CYTHON_AVOID_BORROWED_REFS
 #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1
     // Identifier names are always interned and have a pre-calculated hash value.
@@ -1211,14 +1211,14 @@ static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
         return NULL;
     }
 #else
-    result = PyDict_GetItem($moddict_cname, name); 
+    result = PyDict_GetItem($moddict_cname, name);
     __PYX_UPDATE_DICT_CACHE($moddict_cname, result, *dict_cached_value, *dict_version)
-    if (likely(result)) { 
+    if (likely(result)) {
         return __Pyx_NewRef(result);
     }
 #endif
-#else 
-    result = PyObject_GetItem($moddict_cname, name); 
+#else
+    result = PyObject_GetItem($moddict_cname, name);
     __PYX_UPDATE_DICT_CACHE($moddict_cname, result, *dict_cached_value, *dict_version)
     if (likely(result)) {
         return __Pyx_NewRef(result);
@@ -1226,56 +1226,56 @@ static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
     PyErr_Clear();
 #endif
     return __Pyx_GetBuiltinName(name);
-} 
- 
-//////////////////// GetAttr.proto //////////////////// 
- 
-static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); /*proto*/ 
- 
-//////////////////// GetAttr //////////////////// 
-//@requires: PyObjectGetAttrStr 
- 
-static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) { 
+}
+
+//////////////////// GetAttr.proto ////////////////////
+
+static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); /*proto*/
+
+//////////////////// GetAttr ////////////////////
+//@requires: PyObjectGetAttrStr
+
+static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) {
 #if CYTHON_USE_TYPE_SLOTS
-#if PY_MAJOR_VERSION >= 3 
-    if (likely(PyUnicode_Check(n))) 
-#else 
-    if (likely(PyString_Check(n))) 
-#endif 
-        return __Pyx_PyObject_GetAttrStr(o, n); 
-#endif 
-    return PyObject_GetAttr(o, n); 
-} 
- 
-/////////////// PyObjectLookupSpecial.proto /////////////// 
-//@requires: PyObjectGetAttrStr 
- 
+#if PY_MAJOR_VERSION >= 3
+    if (likely(PyUnicode_Check(n)))
+#else
+    if (likely(PyString_Check(n)))
+#endif
+        return __Pyx_PyObject_GetAttrStr(o, n);
+#endif
+    return PyObject_GetAttr(o, n);
+}
+
+/////////////// PyObjectLookupSpecial.proto ///////////////
+//@requires: PyObjectGetAttrStr
+
 #if CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS
-static CYTHON_INLINE PyObject* __Pyx_PyObject_LookupSpecial(PyObject* obj, PyObject* attr_name) { 
-    PyObject *res; 
-    PyTypeObject *tp = Py_TYPE(obj); 
-#if PY_MAJOR_VERSION < 3 
-    if (unlikely(PyInstance_Check(obj))) 
-        return __Pyx_PyObject_GetAttrStr(obj, attr_name); 
-#endif 
-    // adapted from CPython's special_lookup() in ceval.c 
-    res = _PyType_Lookup(tp, attr_name); 
-    if (likely(res)) { 
-        descrgetfunc f = Py_TYPE(res)->tp_descr_get; 
-        if (!f) { 
-            Py_INCREF(res); 
-        } else { 
-            res = f(res, obj, (PyObject *)tp); 
-        } 
-    } else { 
-        PyErr_SetObject(PyExc_AttributeError, attr_name); 
-    } 
-    return res; 
-} 
-#else 
-#define __Pyx_PyObject_LookupSpecial(o,n) __Pyx_PyObject_GetAttrStr(o,n) 
-#endif 
- 
+static CYTHON_INLINE PyObject* __Pyx_PyObject_LookupSpecial(PyObject* obj, PyObject* attr_name) {
+    PyObject *res;
+    PyTypeObject *tp = Py_TYPE(obj);
+#if PY_MAJOR_VERSION < 3
+    if (unlikely(PyInstance_Check(obj)))
+        return __Pyx_PyObject_GetAttrStr(obj, attr_name);
+#endif
+    // adapted from CPython's special_lookup() in ceval.c
+    res = _PyType_Lookup(tp, attr_name);
+    if (likely(res)) {
+        descrgetfunc f = Py_TYPE(res)->tp_descr_get;
+        if (!f) {
+            Py_INCREF(res);
+        } else {
+            res = f(res, obj, (PyObject *)tp);
+        }
+    } else {
+        PyErr_SetObject(PyExc_AttributeError, attr_name);
+    }
+    return res;
+}
+#else
+#define __Pyx_PyObject_LookupSpecial(o,n) __Pyx_PyObject_GetAttrStr(o,n)
+#endif
+
 
 /////////////// PyObject_GenericGetAttrNoDict.proto ///////////////
 
@@ -1396,8 +1396,8 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, P
 }
 
 
-/////////////// PyObjectGetAttrStr.proto /////////////// 
- 
+/////////////// PyObjectGetAttrStr.proto ///////////////
+
 #if CYTHON_USE_TYPE_SLOTS
 static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);/*proto*/
 #else
@@ -1407,21 +1407,21 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject
 /////////////// PyObjectGetAttrStr ///////////////
 
 #if CYTHON_USE_TYPE_SLOTS
-static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { 
-    PyTypeObject* tp = Py_TYPE(obj); 
-    if (likely(tp->tp_getattro)) 
-        return tp->tp_getattro(obj, attr_name); 
-#if PY_MAJOR_VERSION < 3 
-    if (likely(tp->tp_getattr)) 
-        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); 
-#endif 
-    return PyObject_GetAttr(obj, attr_name); 
-} 
-#endif 
- 
-
-/////////////// PyObjectSetAttrStr.proto /////////////// 
- 
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro))
+        return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+    if (likely(tp->tp_getattr))
+        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+    return PyObject_GetAttr(obj, attr_name);
+}
+#endif
+
+
+/////////////// PyObjectSetAttrStr.proto ///////////////
+
 #if CYTHON_USE_TYPE_SLOTS
 #define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o, n, NULL)
 static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value);/*proto*/
@@ -1433,19 +1433,19 @@ static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr
 /////////////// PyObjectSetAttrStr ///////////////
 
 #if CYTHON_USE_TYPE_SLOTS
-static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) { 
-    PyTypeObject* tp = Py_TYPE(obj); 
-    if (likely(tp->tp_setattro)) 
-        return tp->tp_setattro(obj, attr_name, value); 
-#if PY_MAJOR_VERSION < 3 
-    if (likely(tp->tp_setattr)) 
-        return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value); 
-#endif 
-    return PyObject_SetAttr(obj, attr_name, value); 
-} 
-#endif 
- 
- 
+static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) {
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_setattro))
+        return tp->tp_setattro(obj, attr_name, value);
+#if PY_MAJOR_VERSION < 3
+    if (likely(tp->tp_setattr))
+        return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value);
+#endif
+    return PyObject_SetAttr(obj, attr_name, value);
+}
+#endif
+
+
 /////////////// PyObjectGetMethod.proto ///////////////
 
 static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method);/*proto*/
@@ -1785,16 +1785,16 @@ bad:
 }
 
 
-/////////////// PyObjectCallMethod0.proto /////////////// 
- 
-static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); /*proto*/ 
- 
-/////////////// PyObjectCallMethod0 /////////////// 
+/////////////// PyObjectCallMethod0.proto ///////////////
+
+static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); /*proto*/
+
+/////////////// PyObjectCallMethod0 ///////////////
 //@requires: PyObjectGetMethod
-//@requires: PyObjectCallOneArg 
-//@requires: PyObjectCallNoArg 
- 
-static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { 
+//@requires: PyObjectCallOneArg
+//@requires: PyObjectCallNoArg
+
+static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) {
     PyObject *method = NULL, *result = NULL;
     int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
     if (likely(is_method)) {
@@ -1803,22 +1803,22 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name
         return result;
     }
     if (unlikely(!method)) goto bad;
-    result = __Pyx_PyObject_CallNoArg(method); 
-    Py_DECREF(method); 
-bad: 
-    return result; 
-} 
- 
- 
-/////////////// PyObjectCallMethod1.proto /////////////// 
- 
-static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); /*proto*/ 
- 
-/////////////// PyObjectCallMethod1 /////////////// 
+    result = __Pyx_PyObject_CallNoArg(method);
+    Py_DECREF(method);
+bad:
+    return result;
+}
+
+
+/////////////// PyObjectCallMethod1.proto ///////////////
+
+static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); /*proto*/
+
+/////////////// PyObjectCallMethod1 ///////////////
 //@requires: PyObjectGetMethod
-//@requires: PyObjectCallOneArg 
+//@requires: PyObjectCallOneArg
 //@requires: PyObjectCall2Args
- 
+
 static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) {
     // Separate function to avoid excessive inlining.
     PyObject *result = __Pyx_PyObject_CallOneArg(method, arg);
@@ -1836,19 +1836,19 @@ static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name
     }
     if (unlikely(!method)) return NULL;
     return __Pyx__PyObject_CallMethod1(method, arg);
-} 
- 
- 
-/////////////// PyObjectCallMethod2.proto /////////////// 
- 
-static PyObject* __Pyx_PyObject_CallMethod2(PyObject* obj, PyObject* method_name, PyObject* arg1, PyObject* arg2); /*proto*/ 
- 
-/////////////// PyObjectCallMethod2 /////////////// 
-//@requires: PyObjectCall 
+}
+
+
+/////////////// PyObjectCallMethod2.proto ///////////////
+
+static PyObject* __Pyx_PyObject_CallMethod2(PyObject* obj, PyObject* method_name, PyObject* arg1, PyObject* arg2); /*proto*/
+
+/////////////// PyObjectCallMethod2 ///////////////
+//@requires: PyObjectCall
 //@requires: PyFunctionFastCall
 //@requires: PyCFunctionFastCall
 //@requires: PyObjectCall2Args
- 
+
 static PyObject* __Pyx_PyObject_Call3Args(PyObject* function, PyObject* arg1, PyObject* arg2, PyObject* arg3) {
     #if CYTHON_FAST_PYCALL
     if (PyFunction_Check(function)) {
@@ -1877,89 +1877,89 @@ static PyObject* __Pyx_PyObject_Call3Args(PyObject* function, PyObject* arg1, Py
     return result;
 }
 
-static PyObject* __Pyx_PyObject_CallMethod2(PyObject* obj, PyObject* method_name, PyObject* arg1, PyObject* arg2) { 
+static PyObject* __Pyx_PyObject_CallMethod2(PyObject* obj, PyObject* method_name, PyObject* arg1, PyObject* arg2) {
     PyObject *args, *method = NULL, *result = NULL;
     int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method);
     if (likely(is_method)) {
         result = __Pyx_PyObject_Call3Args(method, obj, arg1, arg2);
-        Py_DECREF(method); 
+        Py_DECREF(method);
         return result;
-    } 
+    }
     if (unlikely(!method)) return NULL;
     result = __Pyx_PyObject_Call2Args(method, arg1, arg2);
-    Py_DECREF(method); 
-    return result; 
-} 
- 
- 
-/////////////// tp_new.proto /////////////// 
- 
-#define __Pyx_tp_new(type_obj, args) __Pyx_tp_new_kwargs(type_obj, args, NULL) 
-static CYTHON_INLINE PyObject* __Pyx_tp_new_kwargs(PyObject* type_obj, PyObject* args, PyObject* kwargs) { 
-    return (PyObject*) (((PyTypeObject*)type_obj)->tp_new((PyTypeObject*)type_obj, args, kwargs)); 
-} 
- 
- 
-/////////////// PyObjectCall.proto /////////////// 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/ 
-#else 
-#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) 
-#endif 
- 
-/////////////// PyObjectCall /////////////// 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { 
-    PyObject *result; 
+    Py_DECREF(method);
+    return result;
+}
+
+
+/////////////// tp_new.proto ///////////////
+
+#define __Pyx_tp_new(type_obj, args) __Pyx_tp_new_kwargs(type_obj, args, NULL)
+static CYTHON_INLINE PyObject* __Pyx_tp_new_kwargs(PyObject* type_obj, PyObject* args, PyObject* kwargs) {
+    return (PyObject*) (((PyTypeObject*)type_obj)->tp_new((PyTypeObject*)type_obj, args, kwargs));
+}
+
+
+/////////////// PyObjectCall.proto ///////////////
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/////////////// PyObjectCall ///////////////
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+    PyObject *result;
     ternaryfunc call = Py_TYPE(func)->tp_call;
- 
-    if (unlikely(!call)) 
-        return PyObject_Call(func, arg, kw); 
-    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) 
-        return NULL; 
-    result = (*call)(func, arg, kw); 
-    Py_LeaveRecursiveCall(); 
-    if (unlikely(!result) && unlikely(!PyErr_Occurred())) { 
-        PyErr_SetString( 
-            PyExc_SystemError, 
-            "NULL result without error in PyObject_Call"); 
-    } 
-    return result; 
-} 
-#endif 
- 
- 
-/////////////// PyObjectCallMethO.proto /////////////// 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); /*proto*/ 
-#endif 
- 
-/////////////// PyObjectCallMethO /////////////// 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { 
-    PyObject *self, *result; 
-    PyCFunction cfunc; 
-    cfunc = PyCFunction_GET_FUNCTION(func); 
-    self = PyCFunction_GET_SELF(func); 
- 
-    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) 
-        return NULL; 
-    result = cfunc(self, arg); 
-    Py_LeaveRecursiveCall(); 
-    if (unlikely(!result) && unlikely(!PyErr_Occurred())) { 
-        PyErr_SetString( 
-            PyExc_SystemError, 
-            "NULL result without error in PyObject_Call"); 
-    } 
-    return result; 
-} 
-#endif 
- 
- 
+
+    if (unlikely(!call))
+        return PyObject_Call(func, arg, kw);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = (*call)(func, arg, kw);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+
+/////////////// PyObjectCallMethO.proto ///////////////
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); /*proto*/
+#endif
+
+/////////////// PyObjectCallMethO ///////////////
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+    PyObject *self, *result;
+    PyCFunction cfunc;
+    cfunc = PyCFunction_GET_FUNCTION(func);
+    self = PyCFunction_GET_SELF(func);
+
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = cfunc(self, arg);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+
 /////////////// PyFunctionFastCall.proto ///////////////
 
 #if CYTHON_FAST_PYCALL
@@ -2240,124 +2240,124 @@ done:
 }
 
 
-/////////////// PyObjectCallOneArg.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); /*proto*/ 
- 
-/////////////// PyObjectCallOneArg /////////////// 
-//@requires: PyObjectCallMethO 
-//@requires: PyObjectCall 
+/////////////// PyObjectCallOneArg.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); /*proto*/
+
+/////////////// PyObjectCallOneArg ///////////////
+//@requires: PyObjectCallMethO
+//@requires: PyObjectCall
 //@requires: PyFunctionFastCall
 //@requires: PyCFunctionFastCall
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { 
-    PyObject *result; 
-    PyObject *args = PyTuple_New(1); 
-    if (unlikely(!args)) return NULL; 
-    Py_INCREF(arg); 
-    PyTuple_SET_ITEM(args, 0, arg); 
-    result = __Pyx_PyObject_Call(func, args, NULL); 
-    Py_DECREF(args); 
-    return result; 
-} 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { 
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_New(1);
+    if (unlikely(!args)) return NULL;
+    Py_INCREF(arg);
+    PyTuple_SET_ITEM(args, 0, arg);
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
 #if CYTHON_FAST_PYCALL
     if (PyFunction_Check(func)) {
         return __Pyx_PyFunction_FastCall(func, &arg, 1);
     }
 #endif
-    if (likely(PyCFunction_Check(func))) { 
-        if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { 
-            // fast and simple case that we are optimising for 
-            return __Pyx_PyObject_CallMethO(func, arg); 
+    if (likely(PyCFunction_Check(func))) {
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) {
+            // fast and simple case that we are optimising for
+            return __Pyx_PyObject_CallMethO(func, arg);
 #if CYTHON_FAST_PYCCALL
         } else if (__Pyx_PyFastCFunction_Check(func)) {
             return __Pyx_PyCFunction_FastCall(func, &arg, 1);
 #endif
-        } 
-    } 
-    return __Pyx__PyObject_CallOneArg(func, arg); 
-} 
-#else 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { 
+        }
+    }
+    return __Pyx__PyObject_CallOneArg(func, arg);
+}
+#else
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
     PyObject *result;
     PyObject *args = PyTuple_Pack(1, arg);
     if (unlikely(!args)) return NULL;
     result = __Pyx_PyObject_Call(func, args, NULL);
     Py_DECREF(args);
     return result;
-} 
-#endif 
- 
- 
-/////////////// PyObjectCallNoArg.proto /////////////// 
-//@requires: PyObjectCall 
-//@substitute: naming 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); /*proto*/ 
-#else 
-#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, $empty_tuple, NULL) 
-#endif 
- 
-/////////////// PyObjectCallNoArg /////////////// 
-//@requires: PyObjectCallMethO 
-//@requires: PyObjectCall 
+}
+#endif
+
+
+/////////////// PyObjectCallNoArg.proto ///////////////
+//@requires: PyObjectCall
+//@substitute: naming
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); /*proto*/
+#else
+#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, $empty_tuple, NULL)
+#endif
+
+/////////////// PyObjectCallNoArg ///////////////
+//@requires: PyObjectCallMethO
+//@requires: PyObjectCall
 //@requires: PyFunctionFastCall
-//@substitute: naming 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { 
+//@substitute: naming
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
 #if CYTHON_FAST_PYCALL
     if (PyFunction_Check(func)) {
         return __Pyx_PyFunction_FastCall(func, NULL, 0);
     }
 #endif
-#ifdef __Pyx_CyFunction_USED 
+#ifdef __Pyx_CyFunction_USED
     if (likely(PyCFunction_Check(func) || __Pyx_CyFunction_Check(func)))
-#else 
+#else
     if (likely(PyCFunction_Check(func)))
-#endif 
+#endif
     {
-        if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) { 
-            // fast and simple case that we are optimising for 
-            return __Pyx_PyObject_CallMethO(func, NULL); 
-        } 
-    } 
-    return __Pyx_PyObject_Call(func, $empty_tuple, NULL); 
-} 
-#endif 
- 
- 
-/////////////// MatrixMultiply.proto /////////////// 
- 
-#if PY_VERSION_HEX >= 0x03050000 
-  #define __Pyx_PyNumber_MatrixMultiply(x,y)         PyNumber_MatrixMultiply(x,y) 
-  #define __Pyx_PyNumber_InPlaceMatrixMultiply(x,y)  PyNumber_InPlaceMatrixMultiply(x,y) 
-#else 
-#define __Pyx_PyNumber_MatrixMultiply(x,y)         __Pyx__PyNumber_MatrixMultiply(x, y, "@") 
-static PyObject* __Pyx__PyNumber_MatrixMultiply(PyObject* x, PyObject* y, const char* op_name); 
-static PyObject* __Pyx_PyNumber_InPlaceMatrixMultiply(PyObject* x, PyObject* y); 
-#endif 
- 
-/////////////// MatrixMultiply /////////////// 
-//@requires: PyObjectGetAttrStr 
-//@requires: PyObjectCallOneArg 
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
+            // fast and simple case that we are optimising for
+            return __Pyx_PyObject_CallMethO(func, NULL);
+        }
+    }
+    return __Pyx_PyObject_Call(func, $empty_tuple, NULL);
+}
+#endif
+
+
+/////////////// MatrixMultiply.proto ///////////////
+
+#if PY_VERSION_HEX >= 0x03050000
+  #define __Pyx_PyNumber_MatrixMultiply(x,y)         PyNumber_MatrixMultiply(x,y)
+  #define __Pyx_PyNumber_InPlaceMatrixMultiply(x,y)  PyNumber_InPlaceMatrixMultiply(x,y)
+#else
+#define __Pyx_PyNumber_MatrixMultiply(x,y)         __Pyx__PyNumber_MatrixMultiply(x, y, "@")
+static PyObject* __Pyx__PyNumber_MatrixMultiply(PyObject* x, PyObject* y, const char* op_name);
+static PyObject* __Pyx_PyNumber_InPlaceMatrixMultiply(PyObject* x, PyObject* y);
+#endif
+
+/////////////// MatrixMultiply ///////////////
+//@requires: PyObjectGetAttrStr
+//@requires: PyObjectCallOneArg
 //@requires: PyFunctionFastCall
 //@requires: PyCFunctionFastCall
- 
-#if PY_VERSION_HEX < 0x03050000 
-static PyObject* __Pyx_PyObject_CallMatrixMethod(PyObject* method, PyObject* arg) { 
-    // NOTE: eats the method reference 
-    PyObject *result = NULL; 
+
+#if PY_VERSION_HEX < 0x03050000
+static PyObject* __Pyx_PyObject_CallMatrixMethod(PyObject* method, PyObject* arg) {
+    // NOTE: eats the method reference
+    PyObject *result = NULL;
 #if CYTHON_UNPACK_METHODS
-    if (likely(PyMethod_Check(method))) { 
-        PyObject *self = PyMethod_GET_SELF(method); 
-        if (likely(self)) { 
-            PyObject *args; 
-            PyObject *function = PyMethod_GET_FUNCTION(method); 
+    if (likely(PyMethod_Check(method))) {
+        PyObject *self = PyMethod_GET_SELF(method);
+        if (likely(self)) {
+            PyObject *args;
+            PyObject *function = PyMethod_GET_FUNCTION(method);
             #if CYTHON_FAST_PYCALL
             if (PyFunction_Check(function)) {
                 PyObject *args[2] = {self, arg};
@@ -2372,69 +2372,69 @@ static PyObject* __Pyx_PyObject_CallMatrixMethod(PyObject* method, PyObject* arg
                 goto done;
             }
             #endif
-            args = PyTuple_New(2); 
+            args = PyTuple_New(2);
             if (unlikely(!args)) goto done;
-            Py_INCREF(self); 
-            PyTuple_SET_ITEM(args, 0, self); 
-            Py_INCREF(arg); 
-            PyTuple_SET_ITEM(args, 1, arg); 
-            Py_INCREF(function); 
-            Py_DECREF(method); method = NULL; 
-            result = __Pyx_PyObject_Call(function, args, NULL); 
-            Py_DECREF(args); 
-            Py_DECREF(function); 
-            return result; 
-        } 
-    } 
-#endif 
-    result = __Pyx_PyObject_CallOneArg(method, arg); 
+            Py_INCREF(self);
+            PyTuple_SET_ITEM(args, 0, self);
+            Py_INCREF(arg);
+            PyTuple_SET_ITEM(args, 1, arg);
+            Py_INCREF(function);
+            Py_DECREF(method); method = NULL;
+            result = __Pyx_PyObject_Call(function, args, NULL);
+            Py_DECREF(args);
+            Py_DECREF(function);
+            return result;
+        }
+    }
+#endif
+    result = __Pyx_PyObject_CallOneArg(method, arg);
 done:
-    Py_DECREF(method); 
-    return result; 
-} 
- 
-#define __Pyx_TryMatrixMethod(x, y, py_method_name) {                   \ 
-    PyObject *func = __Pyx_PyObject_GetAttrStr(x, py_method_name);      \ 
-    if (func) {                                                         \ 
-        PyObject *result = __Pyx_PyObject_CallMatrixMethod(func, y);    \ 
-        if (result != Py_NotImplemented)                                \ 
-            return result;                                              \ 
-        Py_DECREF(result);                                              \ 
-    } else {                                                            \ 
-        if (!PyErr_ExceptionMatches(PyExc_AttributeError))              \ 
-            return NULL;                                                \ 
-        PyErr_Clear();                                                  \ 
-    }                                                                   \ 
-} 
- 
-static PyObject* __Pyx__PyNumber_MatrixMultiply(PyObject* x, PyObject* y, const char* op_name) { 
-    int right_is_subtype = PyObject_IsSubclass((PyObject*)Py_TYPE(y), (PyObject*)Py_TYPE(x)); 
+    Py_DECREF(method);
+    return result;
+}
+
+#define __Pyx_TryMatrixMethod(x, y, py_method_name) {                   \
+    PyObject *func = __Pyx_PyObject_GetAttrStr(x, py_method_name);      \
+    if (func) {                                                         \
+        PyObject *result = __Pyx_PyObject_CallMatrixMethod(func, y);    \
+        if (result != Py_NotImplemented)                                \
+            return result;                                              \
+        Py_DECREF(result);                                              \
+    } else {                                                            \
+        if (!PyErr_ExceptionMatches(PyExc_AttributeError))              \
+            return NULL;                                                \
+        PyErr_Clear();                                                  \
+    }                                                                   \
+}
+
+static PyObject* __Pyx__PyNumber_MatrixMultiply(PyObject* x, PyObject* y, const char* op_name) {
+    int right_is_subtype = PyObject_IsSubclass((PyObject*)Py_TYPE(y), (PyObject*)Py_TYPE(x));
     if (unlikely(right_is_subtype == -1))
         return NULL;
-    if (right_is_subtype) { 
-        // to allow subtypes to override parent behaviour, try reversed operation first 
-        // see note at https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types 
-        __Pyx_TryMatrixMethod(y, x, PYIDENT("__rmatmul__")) 
-    } 
-    __Pyx_TryMatrixMethod(x, y, PYIDENT("__matmul__")) 
-    if (!right_is_subtype) { 
-        __Pyx_TryMatrixMethod(y, x, PYIDENT("__rmatmul__")) 
-    } 
-    PyErr_Format(PyExc_TypeError, 
-                 "unsupported operand type(s) for %.2s: '%.100s' and '%.100s'", 
-                 op_name, 
-                 Py_TYPE(x)->tp_name, 
-                 Py_TYPE(y)->tp_name); 
-    return NULL; 
-} 
- 
-static PyObject* __Pyx_PyNumber_InPlaceMatrixMultiply(PyObject* x, PyObject* y) { 
-    __Pyx_TryMatrixMethod(x, y, PYIDENT("__imatmul__")) 
-    return __Pyx__PyNumber_MatrixMultiply(x, y, "@="); 
-} 
- 
-#undef __Pyx_TryMatrixMethod 
-#endif 
+    if (right_is_subtype) {
+        // to allow subtypes to override parent behaviour, try reversed operation first
+        // see note at https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types
+        __Pyx_TryMatrixMethod(y, x, PYIDENT("__rmatmul__"))
+    }
+    __Pyx_TryMatrixMethod(x, y, PYIDENT("__matmul__"))
+    if (!right_is_subtype) {
+        __Pyx_TryMatrixMethod(y, x, PYIDENT("__rmatmul__"))
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "unsupported operand type(s) for %.2s: '%.100s' and '%.100s'",
+                 op_name,
+                 Py_TYPE(x)->tp_name,
+                 Py_TYPE(y)->tp_name);
+    return NULL;
+}
+
+static PyObject* __Pyx_PyNumber_InPlaceMatrixMultiply(PyObject* x, PyObject* y) {
+    __Pyx_TryMatrixMethod(x, y, PYIDENT("__imatmul__"))
+    return __Pyx__PyNumber_MatrixMultiply(x, y, "@=");
+}
+
+#undef __Pyx_TryMatrixMethod
+#endif
 
 
 /////////////// PyDictVersioning.proto ///////////////
diff --git a/contrib/tools/cython/Cython/Utility/Optimize.c b/contrib/tools/cython/Cython/Utility/Optimize.c
index 34b6438f33..d18c9b78ec 100644
--- a/contrib/tools/cython/Cython/Utility/Optimize.c
+++ b/contrib/tools/cython/Cython/Utility/Optimize.c
@@ -1,122 +1,122 @@
-/* 
- * Optional optimisations of built-in functions and methods. 
- * 
- * Required replacements of builtins are in Builtins.c. 
- * 
- * General object operations and protocols are in ObjectHandling.c. 
- */ 
- 
-/////////////// append.proto /////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyObject_Append(PyObject* L, PyObject* x); /*proto*/ 
- 
-/////////////// append /////////////// 
-//@requires: ListAppend 
-//@requires: ObjectHandling.c::PyObjectCallMethod1 
- 
-static CYTHON_INLINE int __Pyx_PyObject_Append(PyObject* L, PyObject* x) { 
-    if (likely(PyList_CheckExact(L))) { 
-        if (unlikely(__Pyx_PyList_Append(L, x) < 0)) return -1; 
-    } else { 
-        PyObject* retval = __Pyx_PyObject_CallMethod1(L, PYIDENT("append"), x); 
-        if (unlikely(!retval)) 
-            return -1; 
-        Py_DECREF(retval); 
-    } 
-    return 0; 
-} 
- 
-/////////////// ListAppend.proto /////////////// 
- 
+/*
+ * Optional optimisations of built-in functions and methods.
+ *
+ * Required replacements of builtins are in Builtins.c.
+ *
+ * General object operations and protocols are in ObjectHandling.c.
+ */
+
+/////////////// append.proto ///////////////
+
+static CYTHON_INLINE int __Pyx_PyObject_Append(PyObject* L, PyObject* x); /*proto*/
+
+/////////////// append ///////////////
+//@requires: ListAppend
+//@requires: ObjectHandling.c::PyObjectCallMethod1
+
+static CYTHON_INLINE int __Pyx_PyObject_Append(PyObject* L, PyObject* x) {
+    if (likely(PyList_CheckExact(L))) {
+        if (unlikely(__Pyx_PyList_Append(L, x) < 0)) return -1;
+    } else {
+        PyObject* retval = __Pyx_PyObject_CallMethod1(L, PYIDENT("append"), x);
+        if (unlikely(!retval))
+            return -1;
+        Py_DECREF(retval);
+    }
+    return 0;
+}
+
+/////////////// ListAppend.proto ///////////////
+
 #if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
-static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) { 
-    PyListObject* L = (PyListObject*) list; 
-    Py_ssize_t len = Py_SIZE(list); 
-    if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) { 
-        Py_INCREF(x); 
-        PyList_SET_ITEM(list, len, x); 
+static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
+    PyListObject* L = (PyListObject*) list;
+    Py_ssize_t len = Py_SIZE(list);
+    if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
+        Py_INCREF(x);
+        PyList_SET_ITEM(list, len, x);
         __Pyx_SET_SIZE(list, len + 1);
-        return 0; 
-    } 
-    return PyList_Append(list, x); 
-} 
-#else 
-#define __Pyx_PyList_Append(L,x) PyList_Append(L,x) 
-#endif 
- 
-/////////////// ListCompAppend.proto /////////////// 
- 
+        return 0;
+    }
+    return PyList_Append(list, x);
+}
+#else
+#define __Pyx_PyList_Append(L,x) PyList_Append(L,x)
+#endif
+
+/////////////// ListCompAppend.proto ///////////////
+
 #if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
-static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { 
-    PyListObject* L = (PyListObject*) list; 
-    Py_ssize_t len = Py_SIZE(list); 
-    if (likely(L->allocated > len)) { 
-        Py_INCREF(x); 
-        PyList_SET_ITEM(list, len, x); 
+static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) {
+    PyListObject* L = (PyListObject*) list;
+    Py_ssize_t len = Py_SIZE(list);
+    if (likely(L->allocated > len)) {
+        Py_INCREF(x);
+        PyList_SET_ITEM(list, len, x);
         __Pyx_SET_SIZE(list, len + 1);
-        return 0; 
-    } 
-    return PyList_Append(list, x); 
-} 
-#else 
-#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) 
-#endif 
- 
-//////////////////// ListExtend.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyList_Extend(PyObject* L, PyObject* v) { 
-#if CYTHON_COMPILING_IN_CPYTHON 
-    PyObject* none = _PyList_Extend((PyListObject*)L, v); 
-    if (unlikely(!none)) 
-        return -1; 
-    Py_DECREF(none); 
-    return 0; 
-#else 
-    return PyList_SetSlice(L, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, v); 
-#endif 
-} 
- 
-/////////////// pop.proto /////////////// 
- 
+        return 0;
+    }
+    return PyList_Append(list, x);
+}
+#else
+#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x)
+#endif
+
+//////////////////// ListExtend.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_PyList_Extend(PyObject* L, PyObject* v) {
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyObject* none = _PyList_Extend((PyListObject*)L, v);
+    if (unlikely(!none))
+        return -1;
+    Py_DECREF(none);
+    return 0;
+#else
+    return PyList_SetSlice(L, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, v);
+#endif
+}
+
+/////////////// pop.proto ///////////////
+
 static CYTHON_INLINE PyObject* __Pyx__PyObject_Pop(PyObject* L); /*proto*/
- 
+
 #if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
-static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L); /*proto*/ 
+static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L); /*proto*/
 #define __Pyx_PyObject_Pop(L) (likely(PyList_CheckExact(L)) ? \
     __Pyx_PyList_Pop(L) : __Pyx__PyObject_Pop(L))
- 
+
 #else
 #define __Pyx_PyList_Pop(L)  __Pyx__PyObject_Pop(L)
 #define __Pyx_PyObject_Pop(L)  __Pyx__PyObject_Pop(L)
 #endif
 
-/////////////// pop /////////////// 
-//@requires: ObjectHandling.c::PyObjectCallMethod0 
- 
-static CYTHON_INLINE PyObject* __Pyx__PyObject_Pop(PyObject* L) { 
-    if (Py_TYPE(L) == &PySet_Type) { 
-        return PySet_Pop(L); 
-    } 
-    return __Pyx_PyObject_CallMethod0(L, PYIDENT("pop")); 
-} 
- 
+/////////////// pop ///////////////
+//@requires: ObjectHandling.c::PyObjectCallMethod0
+
+static CYTHON_INLINE PyObject* __Pyx__PyObject_Pop(PyObject* L) {
+    if (Py_TYPE(L) == &PySet_Type) {
+        return PySet_Pop(L);
+    }
+    return __Pyx_PyObject_CallMethod0(L, PYIDENT("pop"));
+}
+
 #if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
-static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L) { 
-    /* Check that both the size is positive and no reallocation shrinking needs to be done. */ 
-    if (likely(PyList_GET_SIZE(L) > (((PyListObject*)L)->allocated >> 1))) { 
+static CYTHON_INLINE PyObject* __Pyx_PyList_Pop(PyObject* L) {
+    /* Check that both the size is positive and no reallocation shrinking needs to be done. */
+    if (likely(PyList_GET_SIZE(L) > (((PyListObject*)L)->allocated >> 1))) {
         __Pyx_SET_SIZE(L, Py_SIZE(L) - 1);
-        return PyList_GET_ITEM(L, PyList_GET_SIZE(L)); 
-    } 
+        return PyList_GET_ITEM(L, PyList_GET_SIZE(L));
+    }
     return CALL_UNBOUND_METHOD(PyList_Type, "pop", L);
 }
-#endif 
- 
- 
-/////////////// pop_index.proto /////////////// 
- 
+#endif
+
+
+/////////////// pop_index.proto ///////////////
+
 static PyObject* __Pyx__PyObject_PopNewIndex(PyObject* L, PyObject* py_ix); /*proto*/
 static PyObject* __Pyx__PyObject_PopIndex(PyObject* L, PyObject* py_ix); /*proto*/
- 
+
 #if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
 static PyObject* __Pyx__PyList_PopIndex(PyObject* L, PyObject* py_ix, Py_ssize_t ix); /*proto*/
 
@@ -127,13 +127,13 @@ static PyObject* __Pyx__PyList_PopIndex(PyObject* L, PyObject* py_ix, Py_ssize_t
             __Pyx__PyObject_PopIndex(L, py_ix)))
 
 #define __Pyx_PyList_PopIndex(L, py_ix, ix, is_signed, type, to_py_func) ( \
-    __Pyx_fits_Py_ssize_t(ix, type, is_signed) ? \ 
+    __Pyx_fits_Py_ssize_t(ix, type, is_signed) ? \
         __Pyx__PyList_PopIndex(L, py_ix, ix) : ( \
         (unlikely((py_ix) == Py_None)) ? __Pyx__PyObject_PopNewIndex(L, to_py_func(ix)) : \
             __Pyx__PyObject_PopIndex(L, py_ix)))
- 
+
 #else
- 
+
 #define __Pyx_PyList_PopIndex(L, py_ix, ix, is_signed, type, to_py_func) \
     __Pyx_PyObject_PopIndex(L, py_ix, ix, is_signed, type, to_py_func)
 
@@ -142,135 +142,135 @@ static PyObject* __Pyx__PyList_PopIndex(PyObject* L, PyObject* py_ix, Py_ssize_t
         __Pyx__PyObject_PopIndex(L, py_ix))
 #endif
 
-/////////////// pop_index /////////////// 
-//@requires: ObjectHandling.c::PyObjectCallMethod1 
- 
+/////////////// pop_index ///////////////
+//@requires: ObjectHandling.c::PyObjectCallMethod1
+
 static PyObject* __Pyx__PyObject_PopNewIndex(PyObject* L, PyObject* py_ix) {
-    PyObject *r; 
-    if (unlikely(!py_ix)) return NULL; 
+    PyObject *r;
+    if (unlikely(!py_ix)) return NULL;
     r = __Pyx__PyObject_PopIndex(L, py_ix);
-    Py_DECREF(py_ix); 
-    return r; 
-} 
- 
+    Py_DECREF(py_ix);
+    return r;
+}
+
 static PyObject* __Pyx__PyObject_PopIndex(PyObject* L, PyObject* py_ix) {
     return __Pyx_PyObject_CallMethod1(L, PYIDENT("pop"), py_ix);
 }
 
 #if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
 static PyObject* __Pyx__PyList_PopIndex(PyObject* L, PyObject* py_ix, Py_ssize_t ix) {
-    Py_ssize_t size = PyList_GET_SIZE(L); 
-    if (likely(size > (((PyListObject*)L)->allocated >> 1))) { 
-        Py_ssize_t cix = ix; 
-        if (cix < 0) { 
-            cix += size; 
-        } 
+    Py_ssize_t size = PyList_GET_SIZE(L);
+    if (likely(size > (((PyListObject*)L)->allocated >> 1))) {
+        Py_ssize_t cix = ix;
+        if (cix < 0) {
+            cix += size;
+        }
         if (likely(__Pyx_is_valid_index(cix, size))) {
-            PyObject* v = PyList_GET_ITEM(L, cix); 
+            PyObject* v = PyList_GET_ITEM(L, cix);
             __Pyx_SET_SIZE(L, Py_SIZE(L) - 1);
-            size -= 1; 
-            memmove(&PyList_GET_ITEM(L, cix), &PyList_GET_ITEM(L, cix+1), (size_t)(size-cix)*sizeof(PyObject*)); 
-            return v; 
-        } 
-    } 
+            size -= 1;
+            memmove(&PyList_GET_ITEM(L, cix), &PyList_GET_ITEM(L, cix+1), (size_t)(size-cix)*sizeof(PyObject*));
+            return v;
+        }
+    }
     if (py_ix == Py_None) {
         return __Pyx__PyObject_PopNewIndex(L, PyInt_FromSsize_t(ix));
     } else {
         return __Pyx__PyObject_PopIndex(L, py_ix);
     }
 }
-#endif 
- 
- 
-/////////////// dict_getitem_default.proto /////////////// 
- 
-static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value); /*proto*/ 
- 
-/////////////// dict_getitem_default /////////////// 
- 
-static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) { 
-    PyObject* value; 
+#endif
+
+
+/////////////// dict_getitem_default.proto ///////////////
+
+static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value); /*proto*/
+
+/////////////// dict_getitem_default ///////////////
+
+static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value) {
+    PyObject* value;
 #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
-    value = PyDict_GetItemWithError(d, key); 
-    if (unlikely(!value)) { 
-        if (unlikely(PyErr_Occurred())) 
-            return NULL; 
-        value = default_value; 
-    } 
-    Py_INCREF(value); 
+    value = PyDict_GetItemWithError(d, key);
+    if (unlikely(!value)) {
+        if (unlikely(PyErr_Occurred()))
+            return NULL;
+        value = default_value;
+    }
+    Py_INCREF(value);
     // avoid C compiler warning about unused utility functions
     if ((1));
-#else 
-    if (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key)) { 
-        /* these presumably have safe hash functions */ 
-        value = PyDict_GetItem(d, key); 
-        if (unlikely(!value)) { 
-            value = default_value; 
-        } 
-        Py_INCREF(value); 
+#else
+    if (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key)) {
+        /* these presumably have safe hash functions */
+        value = PyDict_GetItem(d, key);
+        if (unlikely(!value)) {
+            value = default_value;
+        }
+        Py_INCREF(value);
     }
 #endif
     else {
-        if (default_value == Py_None) 
+        if (default_value == Py_None)
             value = CALL_UNBOUND_METHOD(PyDict_Type, "get", d, key);
         else
             value = CALL_UNBOUND_METHOD(PyDict_Type, "get", d, key, default_value);
-    } 
-    return value; 
-} 
- 
- 
-/////////////// dict_setdefault.proto /////////////// 
- 
-static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value, int is_safe_type); /*proto*/ 
- 
-/////////////// dict_setdefault /////////////// 
- 
-static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value, 
-                                                       CYTHON_UNUSED int is_safe_type) { 
-    PyObject* value; 
-#if PY_VERSION_HEX >= 0x030400A0 
-    // we keep the method call at the end to avoid "unused" C compiler warnings 
+    }
+    return value;
+}
+
+
+/////////////// dict_setdefault.proto ///////////////
+
+static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value, int is_safe_type); /*proto*/
+
+/////////////// dict_setdefault ///////////////
+
+static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value,
+                                                       CYTHON_UNUSED int is_safe_type) {
+    PyObject* value;
+#if PY_VERSION_HEX >= 0x030400A0
+    // we keep the method call at the end to avoid "unused" C compiler warnings
     if ((1)) {
-        value = PyDict_SetDefault(d, key, default_value); 
-        if (unlikely(!value)) return NULL; 
-        Py_INCREF(value); 
-#else 
-    if (is_safe_type == 1 || (is_safe_type == -1 && 
-        /* the following builtins presumably have repeatably safe and fast hash functions */ 
+        value = PyDict_SetDefault(d, key, default_value);
+        if (unlikely(!value)) return NULL;
+        Py_INCREF(value);
+#else
+    if (is_safe_type == 1 || (is_safe_type == -1 &&
+        /* the following builtins presumably have repeatably safe and fast hash functions */
 #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
-            (PyUnicode_CheckExact(key) || PyString_CheckExact(key) || PyLong_CheckExact(key)))) { 
-        value = PyDict_GetItemWithError(d, key); 
-        if (unlikely(!value)) { 
-            if (unlikely(PyErr_Occurred())) 
-                return NULL; 
-            if (unlikely(PyDict_SetItem(d, key, default_value) == -1)) 
-                return NULL; 
-            value = default_value; 
-        } 
-        Py_INCREF(value); 
-#else 
-            (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key) || PyLong_CheckExact(key)))) { 
-        value = PyDict_GetItem(d, key); 
-        if (unlikely(!value)) { 
-            if (unlikely(PyDict_SetItem(d, key, default_value) == -1)) 
-                return NULL; 
-            value = default_value; 
-        } 
-        Py_INCREF(value); 
-#endif 
-#endif 
-    } else { 
+            (PyUnicode_CheckExact(key) || PyString_CheckExact(key) || PyLong_CheckExact(key)))) {
+        value = PyDict_GetItemWithError(d, key);
+        if (unlikely(!value)) {
+            if (unlikely(PyErr_Occurred()))
+                return NULL;
+            if (unlikely(PyDict_SetItem(d, key, default_value) == -1))
+                return NULL;
+            value = default_value;
+        }
+        Py_INCREF(value);
+#else
+            (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key) || PyLong_CheckExact(key)))) {
+        value = PyDict_GetItem(d, key);
+        if (unlikely(!value)) {
+            if (unlikely(PyDict_SetItem(d, key, default_value) == -1))
+                return NULL;
+            value = default_value;
+        }
+        Py_INCREF(value);
+#endif
+#endif
+    } else {
         value = CALL_UNBOUND_METHOD(PyDict_Type, "setdefault", d, key, default_value);
-    } 
-    return value; 
-} 
- 
- 
-/////////////// py_dict_clear.proto /////////////// 
- 
-#define __Pyx_PyDict_Clear(d) (PyDict_Clear(d), 0) 
- 
+    }
+    return value;
+}
+
+
+/////////////// py_dict_clear.proto ///////////////
+
+#define __Pyx_PyDict_Clear(d) (PyDict_Clear(d), 0)
+
 
 /////////////// py_dict_pop.proto ///////////////
 
@@ -293,27 +293,27 @@ static CYTHON_INLINE PyObject *__Pyx_PyDict_Pop(PyObject *d, PyObject *key, PyOb
 }
 
 
-/////////////// dict_iter.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* dict, int is_dict, PyObject* method_name, 
-                                                   Py_ssize_t* p_orig_length, int* p_is_dict); 
-static CYTHON_INLINE int __Pyx_dict_iter_next(PyObject* dict_or_iter, Py_ssize_t orig_length, Py_ssize_t* ppos, 
-                                              PyObject** pkey, PyObject** pvalue, PyObject** pitem, int is_dict); 
- 
-/////////////// dict_iter /////////////// 
-//@requires: ObjectHandling.c::UnpackTuple2 
-//@requires: ObjectHandling.c::IterFinish 
-//@requires: ObjectHandling.c::PyObjectCallMethod0 
- 
-static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* iterable, int is_dict, PyObject* method_name, 
-                                                   Py_ssize_t* p_orig_length, int* p_source_is_dict) { 
-    is_dict = is_dict || likely(PyDict_CheckExact(iterable)); 
-    *p_source_is_dict = is_dict; 
+/////////////// dict_iter.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* dict, int is_dict, PyObject* method_name,
+                                                   Py_ssize_t* p_orig_length, int* p_is_dict);
+static CYTHON_INLINE int __Pyx_dict_iter_next(PyObject* dict_or_iter, Py_ssize_t orig_length, Py_ssize_t* ppos,
+                                              PyObject** pkey, PyObject** pvalue, PyObject** pitem, int is_dict);
+
+/////////////// dict_iter ///////////////
+//@requires: ObjectHandling.c::UnpackTuple2
+//@requires: ObjectHandling.c::IterFinish
+//@requires: ObjectHandling.c::PyObjectCallMethod0
+
+static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* iterable, int is_dict, PyObject* method_name,
+                                                   Py_ssize_t* p_orig_length, int* p_source_is_dict) {
+    is_dict = is_dict || likely(PyDict_CheckExact(iterable));
+    *p_source_is_dict = is_dict;
     if (is_dict) {
-#if !CYTHON_COMPILING_IN_PYPY 
-        *p_orig_length = PyDict_Size(iterable); 
-        Py_INCREF(iterable); 
-        return iterable; 
+#if !CYTHON_COMPILING_IN_PYPY
+        *p_orig_length = PyDict_Size(iterable);
+        Py_INCREF(iterable);
+        return iterable;
 #elif PY_MAJOR_VERSION >= 3
         // On PyPy3, we need to translate manually a few method names.
         // This logic is not needed on CPython thanks to the fast case above.
@@ -334,93 +334,93 @@ static CYTHON_INLINE PyObject* __Pyx_dict_iterator(PyObject* iterable, int is_di
             }
         }
 #endif
-    } 
-    *p_orig_length = 0; 
-    if (method_name) { 
-        PyObject* iter; 
-        iterable = __Pyx_PyObject_CallMethod0(iterable, method_name); 
-        if (!iterable) 
-            return NULL; 
-#if !CYTHON_COMPILING_IN_PYPY 
-        if (PyTuple_CheckExact(iterable) || PyList_CheckExact(iterable)) 
-            return iterable; 
-#endif 
-        iter = PyObject_GetIter(iterable); 
-        Py_DECREF(iterable); 
-        return iter; 
-    } 
-    return PyObject_GetIter(iterable); 
-} 
- 
+    }
+    *p_orig_length = 0;
+    if (method_name) {
+        PyObject* iter;
+        iterable = __Pyx_PyObject_CallMethod0(iterable, method_name);
+        if (!iterable)
+            return NULL;
+#if !CYTHON_COMPILING_IN_PYPY
+        if (PyTuple_CheckExact(iterable) || PyList_CheckExact(iterable))
+            return iterable;
+#endif
+        iter = PyObject_GetIter(iterable);
+        Py_DECREF(iterable);
+        return iter;
+    }
+    return PyObject_GetIter(iterable);
+}
+
 static CYTHON_INLINE int __Pyx_dict_iter_next(
         PyObject* iter_obj, CYTHON_NCP_UNUSED Py_ssize_t orig_length, CYTHON_NCP_UNUSED Py_ssize_t* ppos,
         PyObject** pkey, PyObject** pvalue, PyObject** pitem, int source_is_dict) {
-    PyObject* next_item; 
-#if !CYTHON_COMPILING_IN_PYPY 
-    if (source_is_dict) { 
-        PyObject *key, *value; 
-        if (unlikely(orig_length != PyDict_Size(iter_obj))) { 
-            PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration"); 
-            return -1; 
-        } 
-        if (unlikely(!PyDict_Next(iter_obj, ppos, &key, &value))) { 
-            return 0; 
-        } 
-        if (pitem) { 
-            PyObject* tuple = PyTuple_New(2); 
-            if (unlikely(!tuple)) { 
-                return -1; 
-            } 
-            Py_INCREF(key); 
-            Py_INCREF(value); 
-            PyTuple_SET_ITEM(tuple, 0, key); 
-            PyTuple_SET_ITEM(tuple, 1, value); 
-            *pitem = tuple; 
-        } else { 
-            if (pkey) { 
-                Py_INCREF(key); 
-                *pkey = key; 
-            } 
-            if (pvalue) { 
-                Py_INCREF(value); 
-                *pvalue = value; 
-            } 
-        } 
-        return 1; 
-    } else if (PyTuple_CheckExact(iter_obj)) { 
-        Py_ssize_t pos = *ppos; 
-        if (unlikely(pos >= PyTuple_GET_SIZE(iter_obj))) return 0; 
-        *ppos = pos + 1; 
-        next_item = PyTuple_GET_ITEM(iter_obj, pos); 
-        Py_INCREF(next_item); 
-    } else if (PyList_CheckExact(iter_obj)) { 
-        Py_ssize_t pos = *ppos; 
-        if (unlikely(pos >= PyList_GET_SIZE(iter_obj))) return 0; 
-        *ppos = pos + 1; 
-        next_item = PyList_GET_ITEM(iter_obj, pos); 
-        Py_INCREF(next_item); 
-    } else 
-#endif 
-    { 
-        next_item = PyIter_Next(iter_obj); 
-        if (unlikely(!next_item)) { 
-            return __Pyx_IterFinish(); 
-        } 
-    } 
-    if (pitem) { 
-        *pitem = next_item; 
-    } else if (pkey && pvalue) { 
-        if (__Pyx_unpack_tuple2(next_item, pkey, pvalue, source_is_dict, source_is_dict, 1)) 
-            return -1; 
-    } else if (pkey) { 
-        *pkey = next_item; 
-    } else { 
-        *pvalue = next_item; 
-    } 
-    return 1; 
-} 
- 
- 
+    PyObject* next_item;
+#if !CYTHON_COMPILING_IN_PYPY
+    if (source_is_dict) {
+        PyObject *key, *value;
+        if (unlikely(orig_length != PyDict_Size(iter_obj))) {
+            PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration");
+            return -1;
+        }
+        if (unlikely(!PyDict_Next(iter_obj, ppos, &key, &value))) {
+            return 0;
+        }
+        if (pitem) {
+            PyObject* tuple = PyTuple_New(2);
+            if (unlikely(!tuple)) {
+                return -1;
+            }
+            Py_INCREF(key);
+            Py_INCREF(value);
+            PyTuple_SET_ITEM(tuple, 0, key);
+            PyTuple_SET_ITEM(tuple, 1, value);
+            *pitem = tuple;
+        } else {
+            if (pkey) {
+                Py_INCREF(key);
+                *pkey = key;
+            }
+            if (pvalue) {
+                Py_INCREF(value);
+                *pvalue = value;
+            }
+        }
+        return 1;
+    } else if (PyTuple_CheckExact(iter_obj)) {
+        Py_ssize_t pos = *ppos;
+        if (unlikely(pos >= PyTuple_GET_SIZE(iter_obj))) return 0;
+        *ppos = pos + 1;
+        next_item = PyTuple_GET_ITEM(iter_obj, pos);
+        Py_INCREF(next_item);
+    } else if (PyList_CheckExact(iter_obj)) {
+        Py_ssize_t pos = *ppos;
+        if (unlikely(pos >= PyList_GET_SIZE(iter_obj))) return 0;
+        *ppos = pos + 1;
+        next_item = PyList_GET_ITEM(iter_obj, pos);
+        Py_INCREF(next_item);
+    } else
+#endif
+    {
+        next_item = PyIter_Next(iter_obj);
+        if (unlikely(!next_item)) {
+            return __Pyx_IterFinish();
+        }
+    }
+    if (pitem) {
+        *pitem = next_item;
+    } else if (pkey && pvalue) {
+        if (__Pyx_unpack_tuple2(next_item, pkey, pvalue, source_is_dict, source_is_dict, 1))
+            return -1;
+    } else if (pkey) {
+        *pkey = next_item;
+    } else {
+        *pvalue = next_item;
+    }
+    return 1;
+}
+
+
 /////////////// set_iter.proto ///////////////
 
 static CYTHON_INLINE PyObject* __Pyx_set_iterator(PyObject* iterable, int is_set,
@@ -558,105 +558,105 @@ static CYTHON_INLINE int __Pyx_PySet_Remove(PyObject *set, PyObject *key) {
 }
 
 
-/////////////// unicode_iter.proto /////////////// 
- 
-static CYTHON_INLINE int __Pyx_init_unicode_iteration( 
-    PyObject* ustring, Py_ssize_t *length, void** data, int *kind); /* proto */ 
- 
-/////////////// unicode_iter /////////////// 
- 
-static CYTHON_INLINE int __Pyx_init_unicode_iteration( 
-    PyObject* ustring, Py_ssize_t *length, void** data, int *kind) { 
-#if CYTHON_PEP393_ENABLED 
-    if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return -1; 
-    *kind   = PyUnicode_KIND(ustring); 
-    *length = PyUnicode_GET_LENGTH(ustring); 
-    *data   = PyUnicode_DATA(ustring); 
-#else 
-    *kind   = 0; 
-    *length = PyUnicode_GET_SIZE(ustring); 
-    *data   = (void*)PyUnicode_AS_UNICODE(ustring); 
-#endif 
-    return 0; 
-} 
- 
-/////////////// pyobject_as_double.proto /////////////// 
- 
-static double __Pyx__PyObject_AsDouble(PyObject* obj); /* proto */ 
- 
-#if CYTHON_COMPILING_IN_PYPY 
-#define __Pyx_PyObject_AsDouble(obj) \ 
-(likely(PyFloat_CheckExact(obj)) ? PyFloat_AS_DOUBLE(obj) : \ 
- likely(PyInt_CheckExact(obj)) ? \ 
- PyFloat_AsDouble(obj) : __Pyx__PyObject_AsDouble(obj)) 
-#else 
-#define __Pyx_PyObject_AsDouble(obj) \ 
-((likely(PyFloat_CheckExact(obj))) ? \ 
- PyFloat_AS_DOUBLE(obj) : __Pyx__PyObject_AsDouble(obj)) 
-#endif 
- 
-/////////////// pyobject_as_double /////////////// 
- 
-static double __Pyx__PyObject_AsDouble(PyObject* obj) { 
-    PyObject* float_value; 
+/////////////// unicode_iter.proto ///////////////
+
+static CYTHON_INLINE int __Pyx_init_unicode_iteration(
+    PyObject* ustring, Py_ssize_t *length, void** data, int *kind); /* proto */
+
+/////////////// unicode_iter ///////////////
+
+static CYTHON_INLINE int __Pyx_init_unicode_iteration(
+    PyObject* ustring, Py_ssize_t *length, void** data, int *kind) {
+#if CYTHON_PEP393_ENABLED
+    if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return -1;
+    *kind   = PyUnicode_KIND(ustring);
+    *length = PyUnicode_GET_LENGTH(ustring);
+    *data   = PyUnicode_DATA(ustring);
+#else
+    *kind   = 0;
+    *length = PyUnicode_GET_SIZE(ustring);
+    *data   = (void*)PyUnicode_AS_UNICODE(ustring);
+#endif
+    return 0;
+}
+
+/////////////// pyobject_as_double.proto ///////////////
+
+static double __Pyx__PyObject_AsDouble(PyObject* obj); /* proto */
+
+#if CYTHON_COMPILING_IN_PYPY
+#define __Pyx_PyObject_AsDouble(obj) \
+(likely(PyFloat_CheckExact(obj)) ? PyFloat_AS_DOUBLE(obj) : \
+ likely(PyInt_CheckExact(obj)) ? \
+ PyFloat_AsDouble(obj) : __Pyx__PyObject_AsDouble(obj))
+#else
+#define __Pyx_PyObject_AsDouble(obj) \
+((likely(PyFloat_CheckExact(obj))) ? \
+ PyFloat_AS_DOUBLE(obj) : __Pyx__PyObject_AsDouble(obj))
+#endif
+
+/////////////// pyobject_as_double ///////////////
+
+static double __Pyx__PyObject_AsDouble(PyObject* obj) {
+    PyObject* float_value;
 #if !CYTHON_USE_TYPE_SLOTS
     float_value = PyNumber_Float(obj);  if ((0)) goto bad;
-#else 
-    PyNumberMethods *nb = Py_TYPE(obj)->tp_as_number; 
-    if (likely(nb) && likely(nb->nb_float)) { 
-        float_value = nb->nb_float(obj); 
-        if (likely(float_value) && unlikely(!PyFloat_Check(float_value))) { 
-            PyErr_Format(PyExc_TypeError, 
-                "__float__ returned non-float (type %.200s)", 
-                Py_TYPE(float_value)->tp_name); 
-            Py_DECREF(float_value); 
-            goto bad; 
-        } 
-    } else if (PyUnicode_CheckExact(obj) || PyBytes_CheckExact(obj)) { 
-#if PY_MAJOR_VERSION >= 3 
-        float_value = PyFloat_FromString(obj); 
-#else 
-        float_value = PyFloat_FromString(obj, 0); 
-#endif 
-    } else { 
-        PyObject* args = PyTuple_New(1); 
-        if (unlikely(!args)) goto bad; 
-        PyTuple_SET_ITEM(args, 0, obj); 
-        float_value = PyObject_Call((PyObject*)&PyFloat_Type, args, 0); 
-        PyTuple_SET_ITEM(args, 0, 0); 
-        Py_DECREF(args); 
-    } 
-#endif 
-    if (likely(float_value)) { 
-        double value = PyFloat_AS_DOUBLE(float_value); 
-        Py_DECREF(float_value); 
-        return value; 
-    } 
-bad: 
-    return (double)-1; 
-} 
- 
- 
-/////////////// PyNumberPow2.proto /////////////// 
- 
-#define __Pyx_PyNumber_InPlacePowerOf2(a, b, c) __Pyx__PyNumber_PowerOf2(a, b, c, 1) 
-#define __Pyx_PyNumber_PowerOf2(a, b, c) __Pyx__PyNumber_PowerOf2(a, b, c, 0) 
- 
-static PyObject* __Pyx__PyNumber_PowerOf2(PyObject *two, PyObject *exp, PyObject *none, int inplace); /*proto*/ 
- 
-/////////////// PyNumberPow2 /////////////// 
- 
-static PyObject* __Pyx__PyNumber_PowerOf2(PyObject *two, PyObject *exp, PyObject *none, int inplace) { 
-// in CPython, 1<<N is substantially faster than 2**N 
-// see http://bugs.python.org/issue21420 
+#else
+    PyNumberMethods *nb = Py_TYPE(obj)->tp_as_number;
+    if (likely(nb) && likely(nb->nb_float)) {
+        float_value = nb->nb_float(obj);
+        if (likely(float_value) && unlikely(!PyFloat_Check(float_value))) {
+            PyErr_Format(PyExc_TypeError,
+                "__float__ returned non-float (type %.200s)",
+                Py_TYPE(float_value)->tp_name);
+            Py_DECREF(float_value);
+            goto bad;
+        }
+    } else if (PyUnicode_CheckExact(obj) || PyBytes_CheckExact(obj)) {
+#if PY_MAJOR_VERSION >= 3
+        float_value = PyFloat_FromString(obj);
+#else
+        float_value = PyFloat_FromString(obj, 0);
+#endif
+    } else {
+        PyObject* args = PyTuple_New(1);
+        if (unlikely(!args)) goto bad;
+        PyTuple_SET_ITEM(args, 0, obj);
+        float_value = PyObject_Call((PyObject*)&PyFloat_Type, args, 0);
+        PyTuple_SET_ITEM(args, 0, 0);
+        Py_DECREF(args);
+    }
+#endif
+    if (likely(float_value)) {
+        double value = PyFloat_AS_DOUBLE(float_value);
+        Py_DECREF(float_value);
+        return value;
+    }
+bad:
+    return (double)-1;
+}
+
+
+/////////////// PyNumberPow2.proto ///////////////
+
+#define __Pyx_PyNumber_InPlacePowerOf2(a, b, c) __Pyx__PyNumber_PowerOf2(a, b, c, 1)
+#define __Pyx_PyNumber_PowerOf2(a, b, c) __Pyx__PyNumber_PowerOf2(a, b, c, 0)
+
+static PyObject* __Pyx__PyNumber_PowerOf2(PyObject *two, PyObject *exp, PyObject *none, int inplace); /*proto*/
+
+/////////////// PyNumberPow2 ///////////////
+
+static PyObject* __Pyx__PyNumber_PowerOf2(PyObject *two, PyObject *exp, PyObject *none, int inplace) {
+// in CPython, 1<<N is substantially faster than 2**N
+// see http://bugs.python.org/issue21420
 #if !CYTHON_COMPILING_IN_PYPY
-    Py_ssize_t shiftby; 
+    Py_ssize_t shiftby;
 #if PY_MAJOR_VERSION < 3
     if (likely(PyInt_CheckExact(exp))) {
         shiftby = PyInt_AS_LONG(exp);
     } else
 #endif
-    if (likely(PyLong_CheckExact(exp))) { 
+    if (likely(PyLong_CheckExact(exp))) {
         #if CYTHON_USE_PYLONG_INTERNALS
         const Py_ssize_t size = Py_SIZE(exp);
         // tuned to optimise branch prediction
@@ -668,36 +668,36 @@ static PyObject* __Pyx__PyNumber_PowerOf2(PyObject *two, PyObject *exp, PyObject
             goto fallback;
         } else {
             shiftby = PyLong_AsSsize_t(exp);
-        } 
-        #else 
-        shiftby = PyLong_AsSsize_t(exp); 
-        #endif 
-    } else { 
-        goto fallback; 
-    } 
-    if (likely(shiftby >= 0)) { 
-        if ((size_t)shiftby <= sizeof(long) * 8 - 2) { 
-            long value = 1L << shiftby; 
-            return PyInt_FromLong(value); 
+        }
+        #else
+        shiftby = PyLong_AsSsize_t(exp);
+        #endif
+    } else {
+        goto fallback;
+    }
+    if (likely(shiftby >= 0)) {
+        if ((size_t)shiftby <= sizeof(long) * 8 - 2) {
+            long value = 1L << shiftby;
+            return PyInt_FromLong(value);
 #ifdef HAVE_LONG_LONG
         } else if ((size_t)shiftby <= sizeof(unsigned PY_LONG_LONG) * 8 - 1) {
             unsigned PY_LONG_LONG value = ((unsigned PY_LONG_LONG)1) << shiftby;
             return PyLong_FromUnsignedLongLong(value);
 #endif
-        } else { 
+        } else {
             PyObject *result, *one = PyInt_FromLong(1L);
-            if (unlikely(!one)) return NULL; 
+            if (unlikely(!one)) return NULL;
             result = PyNumber_Lshift(one, exp);
             Py_DECREF(one);
             return result;
-        } 
-    } else if (shiftby == -1 && PyErr_Occurred()) { 
-        PyErr_Clear(); 
-    } 
-fallback: 
-#endif 
-    return (inplace ? PyNumber_InPlacePower : PyNumber_Power)(two, exp, none); 
-} 
+        }
+    } else if (shiftby == -1 && PyErr_Occurred()) {
+        PyErr_Clear();
+    }
+fallback:
+#endif
+    return (inplace ? PyNumber_InPlacePower : PyNumber_Power)(two, exp, none);
+}
 
 
 /////////////// PyIntCompare.proto ///////////////
diff --git a/contrib/tools/cython/Cython/Utility/Overflow.c b/contrib/tools/cython/Cython/Utility/Overflow.c
index 5fd3aa55da..0259c58f01 100644
--- a/contrib/tools/cython/Cython/Utility/Overflow.c
+++ b/contrib/tools/cython/Cython/Utility/Overflow.c
@@ -1,269 +1,269 @@
-/* 
-These functions provide integer arithmetic with integer checking.  They do not 
-actually raise an exception when an overflow is detected, but rather set a bit 
+/*
+These functions provide integer arithmetic with integer checking.  They do not
+actually raise an exception when an overflow is detected, but rather set a bit
 in the overflow parameter.  (This parameter may be re-used across several
-arithmetic operations, so should be or-ed rather than assigned to.) 
- 
-The implementation is divided into two parts, the signed and unsigned basecases, 
-which is where the magic happens, and a generic template matching a specific 
-type to an implementation based on its (c-compile-time) size and signedness. 
- 
-When possible, branching is avoided, and preference is given to speed over 
-accuracy (a low rate of falsely "detected" overflows are acceptable, 
-undetected overflows are not). 
- 
- 
-TODO: Hook up checking. 
-TODO: Conditionally support 128-bit with intmax_t? 
-*/ 
- 
-/////////////// Common.proto /////////////// 
- 
-static int __Pyx_check_twos_complement(void) { 
+arithmetic operations, so should be or-ed rather than assigned to.)
+
+The implementation is divided into two parts, the signed and unsigned basecases,
+which is where the magic happens, and a generic template matching a specific
+type to an implementation based on its (c-compile-time) size and signedness.
+
+When possible, branching is avoided, and preference is given to speed over
+accuracy (a low rate of falsely "detected" overflows are acceptable,
+undetected overflows are not).
+
+
+TODO: Hook up checking.
+TODO: Conditionally support 128-bit with intmax_t?
+*/
+
+/////////////// Common.proto ///////////////
+
+static int __Pyx_check_twos_complement(void) {
     if ((-1 != ~0)) {
-        PyErr_SetString(PyExc_RuntimeError, "Two's complement required for overflow checks."); 
-        return 1; 
+        PyErr_SetString(PyExc_RuntimeError, "Two's complement required for overflow checks.");
+        return 1;
     } else if ((sizeof(short) == sizeof(int))) {
-        PyErr_SetString(PyExc_RuntimeError, "sizeof(short) < sizeof(int) required for overflow checks."); 
-        return 1; 
-    } else { 
-        return 0; 
-    } 
-} 
- 
+        PyErr_SetString(PyExc_RuntimeError, "sizeof(short) < sizeof(int) required for overflow checks.");
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
 #define __PYX_IS_UNSIGNED(type) ((((type) -1) > 0))
 #define __PYX_SIGN_BIT(type)    ((((unsigned type) 1) << (sizeof(type) * 8 - 1)))
 #define __PYX_HALF_MAX(type)    ((((type) 1) << (sizeof(type) * 8 - 2)))
 #define __PYX_MIN(type)         ((__PYX_IS_UNSIGNED(type) ? (type) 0 : 0 - __PYX_HALF_MAX(type) - __PYX_HALF_MAX(type)))
 #define __PYX_MAX(type)         ((~__PYX_MIN(type)))
- 
-#define __Pyx_add_no_overflow(a, b, overflow) ((a) + (b)) 
-#define __Pyx_add_const_no_overflow(a, b, overflow) ((a) + (b)) 
-#define __Pyx_sub_no_overflow(a, b, overflow) ((a) - (b)) 
-#define __Pyx_sub_const_no_overflow(a, b, overflow) ((a) - (b)) 
-#define __Pyx_mul_no_overflow(a, b, overflow) ((a) * (b)) 
-#define __Pyx_mul_const_no_overflow(a, b, overflow) ((a) * (b)) 
-#define __Pyx_div_no_overflow(a, b, overflow) ((a) / (b)) 
-#define __Pyx_div_const_no_overflow(a, b, overflow) ((a) / (b)) 
- 
-/////////////// Common.init /////////////// 
+
+#define __Pyx_add_no_overflow(a, b, overflow) ((a) + (b))
+#define __Pyx_add_const_no_overflow(a, b, overflow) ((a) + (b))
+#define __Pyx_sub_no_overflow(a, b, overflow) ((a) - (b))
+#define __Pyx_sub_const_no_overflow(a, b, overflow) ((a) - (b))
+#define __Pyx_mul_no_overflow(a, b, overflow) ((a) * (b))
+#define __Pyx_mul_const_no_overflow(a, b, overflow) ((a) * (b))
+#define __Pyx_div_no_overflow(a, b, overflow) ((a) / (b))
+#define __Pyx_div_const_no_overflow(a, b, overflow) ((a) / (b))
+
+/////////////// Common.init ///////////////
 //@substitute: naming
- 
+
 // FIXME: Propagate the error here instead of just printing it.
 if (unlikely(__Pyx_check_twos_complement())) {
     PyErr_WriteUnraisable($module_cname);
 }
- 
-/////////////// BaseCaseUnsigned.proto /////////////// 
- 
-static CYTHON_INLINE {{UINT}} __Pyx_add_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow); 
-static CYTHON_INLINE {{UINT}} __Pyx_sub_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow); 
-static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow); 
-static CYTHON_INLINE {{UINT}} __Pyx_div_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow); 
- 
-// Use these when b is known at compile time. 
-#define __Pyx_add_const_{{NAME}}_checking_overflow __Pyx_add_{{NAME}}_checking_overflow 
-#define __Pyx_sub_const_{{NAME}}_checking_overflow __Pyx_sub_{{NAME}}_checking_overflow 
-static CYTHON_INLINE {{UINT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} constant, int *overflow); 
-#define __Pyx_div_const_{{NAME}}_checking_overflow __Pyx_div_{{NAME}}_checking_overflow 
- 
-/////////////// BaseCaseUnsigned /////////////// 
- 
-static CYTHON_INLINE {{UINT}} __Pyx_add_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) { 
-    {{UINT}} r = a + b; 
-    *overflow |= r < a; 
-    return r; 
-} 
- 
-static CYTHON_INLINE {{UINT}} __Pyx_sub_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) { 
-    {{UINT}} r = a - b; 
-    *overflow |= r > a; 
-    return r; 
-} 
- 
-static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) { 
+
+/////////////// BaseCaseUnsigned.proto ///////////////
+
+static CYTHON_INLINE {{UINT}} __Pyx_add_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow);
+static CYTHON_INLINE {{UINT}} __Pyx_sub_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow);
+static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow);
+static CYTHON_INLINE {{UINT}} __Pyx_div_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow);
+
+// Use these when b is known at compile time.
+#define __Pyx_add_const_{{NAME}}_checking_overflow __Pyx_add_{{NAME}}_checking_overflow
+#define __Pyx_sub_const_{{NAME}}_checking_overflow __Pyx_sub_{{NAME}}_checking_overflow
+static CYTHON_INLINE {{UINT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} constant, int *overflow);
+#define __Pyx_div_const_{{NAME}}_checking_overflow __Pyx_div_{{NAME}}_checking_overflow
+
+/////////////// BaseCaseUnsigned ///////////////
+
+static CYTHON_INLINE {{UINT}} __Pyx_add_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    {{UINT}} r = a + b;
+    *overflow |= r < a;
+    return r;
+}
+
+static CYTHON_INLINE {{UINT}} __Pyx_sub_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    {{UINT}} r = a - b;
+    *overflow |= r > a;
+    return r;
+}
+
+static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
     if ((sizeof({{UINT}}) < sizeof(unsigned long))) {
-        unsigned long big_r = ((unsigned long) a) * ((unsigned long) b); 
-        {{UINT}} r = ({{UINT}}) big_r; 
-        *overflow |= big_r != r; 
-        return r; 
+        unsigned long big_r = ((unsigned long) a) * ((unsigned long) b);
+        {{UINT}} r = ({{UINT}}) big_r;
+        *overflow |= big_r != r;
+        return r;
 #ifdef HAVE_LONG_LONG
     } else if ((sizeof({{UINT}}) < sizeof(unsigned PY_LONG_LONG))) {
         unsigned PY_LONG_LONG big_r = ((unsigned PY_LONG_LONG) a) * ((unsigned PY_LONG_LONG) b);
-        {{UINT}} r = ({{UINT}}) big_r; 
-        *overflow |= big_r != r; 
-        return r; 
+        {{UINT}} r = ({{UINT}}) big_r;
+        *overflow |= big_r != r;
+        return r;
 #endif
-    } else { 
-        {{UINT}} prod = a * b; 
-        double dprod = ((double) a) * ((double) b); 
-        // Overflow results in an error of at least 2^sizeof(UINT), 
-        // whereas rounding represents an error on the order of 2^(sizeof(UINT)-53). 
-        *overflow |= fabs(dprod - prod) > (__PYX_MAX({{UINT}}) / 2); 
-        return prod; 
-    } 
-} 
- 
-static CYTHON_INLINE {{UINT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) { 
-    if (b > 1) { 
-        *overflow |= a > __PYX_MAX({{UINT}}) / b; 
-    } 
-    return a * b; 
-} 
- 
- 
-static CYTHON_INLINE {{UINT}} __Pyx_div_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) { 
-    if (b == 0) { 
-        *overflow |= 1; 
-        return 0; 
-    } 
-    return a / b; 
-} 
- 
- 
-/////////////// BaseCaseSigned.proto /////////////// 
- 
-static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow); 
-static CYTHON_INLINE {{INT}} __Pyx_sub_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow); 
-static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow); 
-static CYTHON_INLINE {{INT}} __Pyx_div_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow); 
- 
- 
-// Use when b is known at compile time. 
-static CYTHON_INLINE {{INT}} __Pyx_add_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow); 
-static CYTHON_INLINE {{INT}} __Pyx_sub_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow); 
-static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} constant, int *overflow); 
-#define __Pyx_div_const_{{NAME}}_checking_overflow __Pyx_div_{{NAME}}_checking_overflow 
- 
-/////////////// BaseCaseSigned /////////////// 
- 
-static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
+    } else {
+        {{UINT}} prod = a * b;
+        double dprod = ((double) a) * ((double) b);
+        // Overflow results in an error of at least 2^sizeof(UINT),
+        // whereas rounding represents an error on the order of 2^(sizeof(UINT)-53).
+        *overflow |= fabs(dprod - prod) > (__PYX_MAX({{UINT}}) / 2);
+        return prod;
+    }
+}
+
+static CYTHON_INLINE {{UINT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    if (b > 1) {
+        *overflow |= a > __PYX_MAX({{UINT}}) / b;
+    }
+    return a * b;
+}
+
+
+static CYTHON_INLINE {{UINT}} __Pyx_div_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    if (b == 0) {
+        *overflow |= 1;
+        return 0;
+    }
+    return a / b;
+}
+
+
+/////////////// BaseCaseSigned.proto ///////////////
+
+static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+static CYTHON_INLINE {{INT}} __Pyx_sub_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+static CYTHON_INLINE {{INT}} __Pyx_div_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+
+
+// Use when b is known at compile time.
+static CYTHON_INLINE {{INT}} __Pyx_add_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+static CYTHON_INLINE {{INT}} __Pyx_sub_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} constant, int *overflow);
+#define __Pyx_div_const_{{NAME}}_checking_overflow __Pyx_div_{{NAME}}_checking_overflow
+
+/////////////// BaseCaseSigned ///////////////
+
+static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
     if ((sizeof({{INT}}) < sizeof(long))) {
-        long big_r = ((long) a) + ((long) b); 
-        {{INT}} r = ({{INT}}) big_r; 
-        *overflow |= big_r != r; 
-        return r; 
+        long big_r = ((long) a) + ((long) b);
+        {{INT}} r = ({{INT}}) big_r;
+        *overflow |= big_r != r;
+        return r;
 #ifdef HAVE_LONG_LONG
     } else if ((sizeof({{INT}}) < sizeof(PY_LONG_LONG))) {
         PY_LONG_LONG big_r = ((PY_LONG_LONG) a) + ((PY_LONG_LONG) b);
-        {{INT}} r = ({{INT}}) big_r; 
-        *overflow |= big_r != r; 
-        return r; 
+        {{INT}} r = ({{INT}}) big_r;
+        *overflow |= big_r != r;
+        return r;
 #endif
-    } else { 
-        // Signed overflow undefined, but unsigned overflow is well defined. 
-        {{INT}} r = ({{INT}}) ((unsigned {{INT}}) a + (unsigned {{INT}}) b); 
-        // Overflow happened if the operands have the same sign, but the result 
-        // has opposite sign. 
-        // sign(a) == sign(b) != sign(r) 
-        {{INT}} sign_a = __PYX_SIGN_BIT({{INT}}) & a; 
-        {{INT}} sign_b = __PYX_SIGN_BIT({{INT}}) & b; 
-        {{INT}} sign_r = __PYX_SIGN_BIT({{INT}}) & r; 
-        *overflow |= (sign_a == sign_b) & (sign_a != sign_r); 
-        return r; 
-    } 
-} 
- 
-static CYTHON_INLINE {{INT}} __Pyx_add_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
-    if (b > 0) { 
-        *overflow |= a > __PYX_MAX({{INT}}) - b; 
-    } else if (b < 0) { 
-        *overflow |= a < __PYX_MIN({{INT}}) - b; 
-    } 
-    return a + b; 
-} 
- 
-static CYTHON_INLINE {{INT}} __Pyx_sub_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
-    *overflow |= b == __PYX_MIN({{INT}}); 
-    return __Pyx_add_{{NAME}}_checking_overflow(a, -b, overflow); 
-} 
- 
-static CYTHON_INLINE {{INT}} __Pyx_sub_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
-    *overflow |= b == __PYX_MIN({{INT}}); 
-    return __Pyx_add_const_{{NAME}}_checking_overflow(a, -b, overflow); 
-} 
- 
-static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
+    } else {
+        // Signed overflow undefined, but unsigned overflow is well defined.
+        {{INT}} r = ({{INT}}) ((unsigned {{INT}}) a + (unsigned {{INT}}) b);
+        // Overflow happened if the operands have the same sign, but the result
+        // has opposite sign.
+        // sign(a) == sign(b) != sign(r)
+        {{INT}} sign_a = __PYX_SIGN_BIT({{INT}}) & a;
+        {{INT}} sign_b = __PYX_SIGN_BIT({{INT}}) & b;
+        {{INT}} sign_r = __PYX_SIGN_BIT({{INT}}) & r;
+        *overflow |= (sign_a == sign_b) & (sign_a != sign_r);
+        return r;
+    }
+}
+
+static CYTHON_INLINE {{INT}} __Pyx_add_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    if (b > 0) {
+        *overflow |= a > __PYX_MAX({{INT}}) - b;
+    } else if (b < 0) {
+        *overflow |= a < __PYX_MIN({{INT}}) - b;
+    }
+    return a + b;
+}
+
+static CYTHON_INLINE {{INT}} __Pyx_sub_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    *overflow |= b == __PYX_MIN({{INT}});
+    return __Pyx_add_{{NAME}}_checking_overflow(a, -b, overflow);
+}
+
+static CYTHON_INLINE {{INT}} __Pyx_sub_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    *overflow |= b == __PYX_MIN({{INT}});
+    return __Pyx_add_const_{{NAME}}_checking_overflow(a, -b, overflow);
+}
+
+static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
     if ((sizeof({{INT}}) < sizeof(long))) {
-        long big_r = ((long) a) * ((long) b); 
-        {{INT}} r = ({{INT}}) big_r; 
-        *overflow |= big_r != r; 
-        return ({{INT}}) r; 
+        long big_r = ((long) a) * ((long) b);
+        {{INT}} r = ({{INT}}) big_r;
+        *overflow |= big_r != r;
+        return ({{INT}}) r;
 #ifdef HAVE_LONG_LONG
     } else if ((sizeof({{INT}}) < sizeof(PY_LONG_LONG))) {
         PY_LONG_LONG big_r = ((PY_LONG_LONG) a) * ((PY_LONG_LONG) b);
-        {{INT}} r = ({{INT}}) big_r; 
-        *overflow |= big_r != r; 
-        return ({{INT}}) r; 
+        {{INT}} r = ({{INT}}) big_r;
+        *overflow |= big_r != r;
+        return ({{INT}}) r;
 #endif
-    } else { 
-        {{INT}} prod = a * b; 
-        double dprod = ((double) a) * ((double) b); 
-        // Overflow results in an error of at least 2^sizeof(INT), 
-        // whereas rounding represents an error on the order of 2^(sizeof(INT)-53). 
-        *overflow |= fabs(dprod - prod) > (__PYX_MAX({{INT}}) / 2); 
-        return prod; 
-    } 
-} 
- 
-static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
-    if (b > 1) { 
-        *overflow |= a > __PYX_MAX({{INT}}) / b; 
-        *overflow |= a < __PYX_MIN({{INT}}) / b; 
-    } else if (b == -1) { 
-        *overflow |= a == __PYX_MIN({{INT}}); 
-    } else if (b < -1) { 
-        *overflow |= a > __PYX_MIN({{INT}}) / b; 
-        *overflow |= a < __PYX_MAX({{INT}}) / b; 
-    } 
-    return a * b; 
-} 
- 
-static CYTHON_INLINE {{INT}} __Pyx_div_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) { 
-    if (b == 0) { 
-        *overflow |= 1; 
-        return 0; 
-    } 
-    *overflow |= (a == __PYX_MIN({{INT}})) & (b == -1); 
-    return a / b; 
-} 
- 
- 
-/////////////// SizeCheck.init /////////////// 
+    } else {
+        {{INT}} prod = a * b;
+        double dprod = ((double) a) * ((double) b);
+        // Overflow results in an error of at least 2^sizeof(INT),
+        // whereas rounding represents an error on the order of 2^(sizeof(INT)-53).
+        *overflow |= fabs(dprod - prod) > (__PYX_MAX({{INT}}) / 2);
+        return prod;
+    }
+}
+
+static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    if (b > 1) {
+        *overflow |= a > __PYX_MAX({{INT}}) / b;
+        *overflow |= a < __PYX_MIN({{INT}}) / b;
+    } else if (b == -1) {
+        *overflow |= a == __PYX_MIN({{INT}});
+    } else if (b < -1) {
+        *overflow |= a > __PYX_MIN({{INT}}) / b;
+        *overflow |= a < __PYX_MAX({{INT}}) / b;
+    }
+    return a * b;
+}
+
+static CYTHON_INLINE {{INT}} __Pyx_div_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    if (b == 0) {
+        *overflow |= 1;
+        return 0;
+    }
+    *overflow |= (a == __PYX_MIN({{INT}})) & (b == -1);
+    return a / b;
+}
+
+
+/////////////// SizeCheck.init ///////////////
 //@substitute: naming
- 
+
 // FIXME: Propagate the error here instead of just printing it.
 if (unlikely(__Pyx_check_sane_{{NAME}}())) {
     PyErr_WriteUnraisable($module_cname);
 }
- 
-/////////////// SizeCheck.proto /////////////// 
- 
-static int __Pyx_check_sane_{{NAME}}(void) { 
+
+/////////////// SizeCheck.proto ///////////////
+
+static int __Pyx_check_sane_{{NAME}}(void) {
     if (((sizeof({{TYPE}}) <= sizeof(int)) ||
 #ifdef HAVE_LONG_LONG
             (sizeof({{TYPE}}) == sizeof(PY_LONG_LONG)) ||
 #endif
             (sizeof({{TYPE}}) == sizeof(long)))) {
-        return 0; 
-    } else { 
-        PyErr_Format(PyExc_RuntimeError, \ 
-            "Bad size for int type %.{{max(60, len(TYPE))}}s: %d", "{{TYPE}}", (int) sizeof({{TYPE}})); 
-        return 1; 
-    } 
-} 
- 
- 
-/////////////// Binop.proto /////////////// 
- 
-static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow); 
- 
-/////////////// Binop /////////////// 
- 
-static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow) { 
+        return 0;
+    } else {
+        PyErr_Format(PyExc_RuntimeError, \
+            "Bad size for int type %.{{max(60, len(TYPE))}}s: %d", "{{TYPE}}", (int) sizeof({{TYPE}}));
+        return 1;
+    }
+}
+
+
+/////////////// Binop.proto ///////////////
+
+static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow);
+
+/////////////// Binop ///////////////
+
+static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow) {
     if ((sizeof({{TYPE}}) < sizeof(int))) {
-        return __Pyx_{{BINOP}}_no_overflow(a, b, overflow); 
-    } else if (__PYX_IS_UNSIGNED({{TYPE}})) { 
+        return __Pyx_{{BINOP}}_no_overflow(a, b, overflow);
+    } else if (__PYX_IS_UNSIGNED({{TYPE}})) {
         if ((sizeof({{TYPE}}) == sizeof(unsigned int))) {
             return ({{TYPE}}) __Pyx_{{BINOP}}_unsigned_int_checking_overflow(a, b, overflow);
         } else if ((sizeof({{TYPE}}) == sizeof(unsigned long))) {
@@ -272,10 +272,10 @@ static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}
         } else if ((sizeof({{TYPE}}) == sizeof(unsigned PY_LONG_LONG))) {
             return ({{TYPE}}) __Pyx_{{BINOP}}_unsigned_long_long_checking_overflow(a, b, overflow);
 #endif
-        } else { 
+        } else {
             abort(); return 0; /* handled elsewhere */
-        } 
-    } else { 
+        }
+    } else {
         if ((sizeof({{TYPE}}) == sizeof(int))) {
             return ({{TYPE}}) __Pyx_{{BINOP}}_int_checking_overflow(a, b, overflow);
         } else if ((sizeof({{TYPE}}) == sizeof(long))) {
@@ -284,24 +284,24 @@ static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}
         } else if ((sizeof({{TYPE}}) == sizeof(PY_LONG_LONG))) {
             return ({{TYPE}}) __Pyx_{{BINOP}}_long_long_checking_overflow(a, b, overflow);
 #endif
-        } else { 
+        } else {
             abort(); return 0; /* handled elsewhere */
-        } 
-    } 
-} 
- 
-/////////////// LeftShift.proto /////////////// 
- 
-static CYTHON_INLINE {{TYPE}} __Pyx_lshift_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow) { 
-    *overflow |= 
-#if {{SIGNED}} 
-        (b < 0) | 
-#endif 
-        (b > ({{TYPE}}) (8 * sizeof({{TYPE}}))) | (a > (__PYX_MAX({{TYPE}}) >> b)); 
-    return a << b; 
-} 
-#define __Pyx_lshift_const_{{NAME}}_checking_overflow __Pyx_lshift_{{NAME}}_checking_overflow 
- 
+        }
+    }
+}
+
+/////////////// LeftShift.proto ///////////////
+
+static CYTHON_INLINE {{TYPE}} __Pyx_lshift_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow) {
+    *overflow |=
+#if {{SIGNED}}
+        (b < 0) |
+#endif
+        (b > ({{TYPE}}) (8 * sizeof({{TYPE}}))) | (a > (__PYX_MAX({{TYPE}}) >> b));
+    return a << b;
+}
+#define __Pyx_lshift_const_{{NAME}}_checking_overflow __Pyx_lshift_{{NAME}}_checking_overflow
+
 
 /////////////// UnaryNegOverflows.proto ///////////////
 
diff --git a/contrib/tools/cython/Cython/Utility/Printing.c b/contrib/tools/cython/Cython/Utility/Printing.c
index 40768e6aff..71aa7eafe9 100644
--- a/contrib/tools/cython/Cython/Utility/Printing.c
+++ b/contrib/tools/cython/Cython/Utility/Printing.c
@@ -1,176 +1,176 @@
-////////////////////// Print.proto ////////////////////// 
-//@substitute: naming 
- 
-static int __Pyx_Print(PyObject*, PyObject *, int); /*proto*/ 
-#if CYTHON_COMPILING_IN_PYPY || PY_MAJOR_VERSION >= 3 
-static PyObject* $print_function = 0; 
-static PyObject* $print_function_kwargs = 0; 
-#endif 
- 
-////////////////////// Print.cleanup ////////////////////// 
-//@substitute: naming 
- 
-#if CYTHON_COMPILING_IN_PYPY || PY_MAJOR_VERSION >= 3 
-Py_CLEAR($print_function); 
-Py_CLEAR($print_function_kwargs); 
-#endif 
- 
-////////////////////// Print ////////////////////// 
-//@substitute: naming 
- 
-#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3 
-static PyObject *__Pyx_GetStdout(void) { 
-    PyObject *f = PySys_GetObject((char *)"stdout"); 
-    if (!f) { 
-        PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); 
-    } 
-    return f; 
-} 
- 
-static int __Pyx_Print(PyObject* f, PyObject *arg_tuple, int newline) { 
-    int i; 
- 
-    if (!f) { 
-        if (!(f = __Pyx_GetStdout())) 
-            return -1; 
-    } 
-    Py_INCREF(f); 
-    for (i=0; i < PyTuple_GET_SIZE(arg_tuple); i++) { 
-        PyObject* v; 
-        if (PyFile_SoftSpace(f, 1)) { 
-            if (PyFile_WriteString(" ", f) < 0) 
-                goto error; 
-        } 
-        v = PyTuple_GET_ITEM(arg_tuple, i); 
-        if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0) 
-            goto error; 
-        if (PyString_Check(v)) { 
-            char *s = PyString_AsString(v); 
-            Py_ssize_t len = PyString_Size(v); 
-            if (len > 0) { 
-                // append soft-space if necessary (not using isspace() due to C/C++ problem on MacOS-X) 
-                switch (s[len-1]) { 
-                    case ' ': break; 
-                    case '\f': case '\r': case '\n': case '\t': case '\v': 
-                        PyFile_SoftSpace(f, 0); 
-                        break; 
-                    default:  break; 
-                } 
-            } 
-        } 
-    } 
-    if (newline) { 
-        if (PyFile_WriteString("\n", f) < 0) 
-            goto error; 
-        PyFile_SoftSpace(f, 0); 
-    } 
-    Py_DECREF(f); 
-    return 0; 
-error: 
-    Py_DECREF(f); 
-    return -1; 
-} 
- 
-#else /* Python 3 has a print function */ 
- 
-static int __Pyx_Print(PyObject* stream, PyObject *arg_tuple, int newline) { 
-    PyObject* kwargs = 0; 
-    PyObject* result = 0; 
-    PyObject* end_string; 
-    if (unlikely(!$print_function)) { 
-        $print_function = PyObject_GetAttr($builtins_cname, PYIDENT("print")); 
-        if (!$print_function) 
-            return -1; 
-    } 
-    if (stream) { 
-        kwargs = PyDict_New(); 
-        if (unlikely(!kwargs)) 
-            return -1; 
-        if (unlikely(PyDict_SetItem(kwargs, PYIDENT("file"), stream) < 0)) 
-            goto bad; 
-        if (!newline) { 
-            end_string = PyUnicode_FromStringAndSize(" ", 1); 
-            if (unlikely(!end_string)) 
-                goto bad; 
-            if (PyDict_SetItem(kwargs, PYIDENT("end"), end_string) < 0) { 
-                Py_DECREF(end_string); 
-                goto bad; 
-            } 
-            Py_DECREF(end_string); 
-        } 
-    } else if (!newline) { 
-        if (unlikely(!$print_function_kwargs)) { 
-            $print_function_kwargs = PyDict_New(); 
-            if (unlikely(!$print_function_kwargs)) 
-                return -1; 
-            end_string = PyUnicode_FromStringAndSize(" ", 1); 
-            if (unlikely(!end_string)) 
-                return -1; 
-            if (PyDict_SetItem($print_function_kwargs, PYIDENT("end"), end_string) < 0) { 
-                Py_DECREF(end_string); 
-                return -1; 
-            } 
-            Py_DECREF(end_string); 
-        } 
-        kwargs = $print_function_kwargs; 
-    } 
-    result = PyObject_Call($print_function, arg_tuple, kwargs); 
-    if (unlikely(kwargs) && (kwargs != $print_function_kwargs)) 
-        Py_DECREF(kwargs); 
-    if (!result) 
-        return -1; 
-    Py_DECREF(result); 
-    return 0; 
-bad: 
-    if (kwargs != $print_function_kwargs) 
-        Py_XDECREF(kwargs); 
-    return -1; 
-} 
-#endif 
- 
-////////////////////// PrintOne.proto ////////////////////// 
-//@requires: Print 
- 
-static int __Pyx_PrintOne(PyObject* stream, PyObject *o); /*proto*/ 
- 
-////////////////////// PrintOne ////////////////////// 
- 
-#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3 
- 
-static int __Pyx_PrintOne(PyObject* f, PyObject *o) { 
-    if (!f) { 
-        if (!(f = __Pyx_GetStdout())) 
-            return -1; 
-    } 
-    Py_INCREF(f); 
-    if (PyFile_SoftSpace(f, 0)) { 
-        if (PyFile_WriteString(" ", f) < 0) 
-            goto error; 
-    } 
-    if (PyFile_WriteObject(o, f, Py_PRINT_RAW) < 0) 
-        goto error; 
-    if (PyFile_WriteString("\n", f) < 0) 
-        goto error; 
-    Py_DECREF(f); 
-    return 0; 
-error: 
-    Py_DECREF(f); 
-    return -1; 
-    /* the line below is just to avoid C compiler 
-     * warnings about unused functions */ 
-    return __Pyx_Print(f, NULL, 0); 
-} 
- 
-#else /* Python 3 has a print function */ 
- 
-static int __Pyx_PrintOne(PyObject* stream, PyObject *o) { 
-    int res; 
-    PyObject* arg_tuple = PyTuple_Pack(1, o); 
-    if (unlikely(!arg_tuple)) 
-        return -1; 
-    res = __Pyx_Print(stream, arg_tuple, 1); 
-    Py_DECREF(arg_tuple); 
-    return res; 
-} 
- 
-#endif 
+////////////////////// Print.proto //////////////////////
+//@substitute: naming
+
+static int __Pyx_Print(PyObject*, PyObject *, int); /*proto*/
+#if CYTHON_COMPILING_IN_PYPY || PY_MAJOR_VERSION >= 3
+static PyObject* $print_function = 0;
+static PyObject* $print_function_kwargs = 0;
+#endif
+
+////////////////////// Print.cleanup //////////////////////
+//@substitute: naming
+
+#if CYTHON_COMPILING_IN_PYPY || PY_MAJOR_VERSION >= 3
+Py_CLEAR($print_function);
+Py_CLEAR($print_function_kwargs);
+#endif
+
+////////////////////// Print //////////////////////
+//@substitute: naming
+
+#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3
+static PyObject *__Pyx_GetStdout(void) {
+    PyObject *f = PySys_GetObject((char *)"stdout");
+    if (!f) {
+        PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
+    }
+    return f;
+}
+
+static int __Pyx_Print(PyObject* f, PyObject *arg_tuple, int newline) {
+    int i;
+
+    if (!f) {
+        if (!(f = __Pyx_GetStdout()))
+            return -1;
+    }
+    Py_INCREF(f);
+    for (i=0; i < PyTuple_GET_SIZE(arg_tuple); i++) {
+        PyObject* v;
+        if (PyFile_SoftSpace(f, 1)) {
+            if (PyFile_WriteString(" ", f) < 0)
+                goto error;
+        }
+        v = PyTuple_GET_ITEM(arg_tuple, i);
+        if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0)
+            goto error;
+        if (PyString_Check(v)) {
+            char *s = PyString_AsString(v);
+            Py_ssize_t len = PyString_Size(v);
+            if (len > 0) {
+                // append soft-space if necessary (not using isspace() due to C/C++ problem on MacOS-X)
+                switch (s[len-1]) {
+                    case ' ': break;
+                    case '\f': case '\r': case '\n': case '\t': case '\v':
+                        PyFile_SoftSpace(f, 0);
+                        break;
+                    default:  break;
+                }
+            }
+        }
+    }
+    if (newline) {
+        if (PyFile_WriteString("\n", f) < 0)
+            goto error;
+        PyFile_SoftSpace(f, 0);
+    }
+    Py_DECREF(f);
+    return 0;
+error:
+    Py_DECREF(f);
+    return -1;
+}
+
+#else /* Python 3 has a print function */
+
+static int __Pyx_Print(PyObject* stream, PyObject *arg_tuple, int newline) {
+    PyObject* kwargs = 0;
+    PyObject* result = 0;
+    PyObject* end_string;
+    if (unlikely(!$print_function)) {
+        $print_function = PyObject_GetAttr($builtins_cname, PYIDENT("print"));
+        if (!$print_function)
+            return -1;
+    }
+    if (stream) {
+        kwargs = PyDict_New();
+        if (unlikely(!kwargs))
+            return -1;
+        if (unlikely(PyDict_SetItem(kwargs, PYIDENT("file"), stream) < 0))
+            goto bad;
+        if (!newline) {
+            end_string = PyUnicode_FromStringAndSize(" ", 1);
+            if (unlikely(!end_string))
+                goto bad;
+            if (PyDict_SetItem(kwargs, PYIDENT("end"), end_string) < 0) {
+                Py_DECREF(end_string);
+                goto bad;
+            }
+            Py_DECREF(end_string);
+        }
+    } else if (!newline) {
+        if (unlikely(!$print_function_kwargs)) {
+            $print_function_kwargs = PyDict_New();
+            if (unlikely(!$print_function_kwargs))
+                return -1;
+            end_string = PyUnicode_FromStringAndSize(" ", 1);
+            if (unlikely(!end_string))
+                return -1;
+            if (PyDict_SetItem($print_function_kwargs, PYIDENT("end"), end_string) < 0) {
+                Py_DECREF(end_string);
+                return -1;
+            }
+            Py_DECREF(end_string);
+        }
+        kwargs = $print_function_kwargs;
+    }
+    result = PyObject_Call($print_function, arg_tuple, kwargs);
+    if (unlikely(kwargs) && (kwargs != $print_function_kwargs))
+        Py_DECREF(kwargs);
+    if (!result)
+        return -1;
+    Py_DECREF(result);
+    return 0;
+bad:
+    if (kwargs != $print_function_kwargs)
+        Py_XDECREF(kwargs);
+    return -1;
+}
+#endif
+
+////////////////////// PrintOne.proto //////////////////////
+//@requires: Print
+
+static int __Pyx_PrintOne(PyObject* stream, PyObject *o); /*proto*/
+
+////////////////////// PrintOne //////////////////////
+
+#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION < 3
+
+static int __Pyx_PrintOne(PyObject* f, PyObject *o) {
+    if (!f) {
+        if (!(f = __Pyx_GetStdout()))
+            return -1;
+    }
+    Py_INCREF(f);
+    if (PyFile_SoftSpace(f, 0)) {
+        if (PyFile_WriteString(" ", f) < 0)
+            goto error;
+    }
+    if (PyFile_WriteObject(o, f, Py_PRINT_RAW) < 0)
+        goto error;
+    if (PyFile_WriteString("\n", f) < 0)
+        goto error;
+    Py_DECREF(f);
+    return 0;
+error:
+    Py_DECREF(f);
+    return -1;
+    /* the line below is just to avoid C compiler
+     * warnings about unused functions */
+    return __Pyx_Print(f, NULL, 0);
+}
+
+#else /* Python 3 has a print function */
+
+static int __Pyx_PrintOne(PyObject* stream, PyObject *o) {
+    int res;
+    PyObject* arg_tuple = PyTuple_Pack(1, o);
+    if (unlikely(!arg_tuple))
+        return -1;
+    res = __Pyx_Print(stream, arg_tuple, 1);
+    Py_DECREF(arg_tuple);
+    return res;
+}
+
+#endif
diff --git a/contrib/tools/cython/Cython/Utility/Profile.c b/contrib/tools/cython/Cython/Utility/Profile.c
index 56c5a08e1f..921eb67529 100644
--- a/contrib/tools/cython/Cython/Utility/Profile.c
+++ b/contrib/tools/cython/Cython/Utility/Profile.c
@@ -1,18 +1,18 @@
-/////////////// Profile.proto /////////////// 
+/////////////// Profile.proto ///////////////
 //@requires: Exceptions.c::PyErrFetchRestore
-//@substitute: naming 
- 
-// Note that cPython ignores PyTrace_EXCEPTION, 
-// but maybe some other profilers don't. 
- 
-#ifndef CYTHON_PROFILE 
+//@substitute: naming
+
+// Note that cPython ignores PyTrace_EXCEPTION,
+// but maybe some other profilers don't.
+
+#ifndef CYTHON_PROFILE
 #if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_PYSTON
   #define CYTHON_PROFILE 0
 #else
-  #define CYTHON_PROFILE 1 
-#endif 
+  #define CYTHON_PROFILE 1
+#endif
 #endif
- 
+
 #ifndef CYTHON_TRACE_NOGIL
   #define CYTHON_TRACE_NOGIL 0
 #else
@@ -21,37 +21,37 @@
   #endif
 #endif
 
-#ifndef CYTHON_TRACE 
-  #define CYTHON_TRACE 0 
-#endif 
- 
-#if CYTHON_TRACE 
-  #undef CYTHON_PROFILE_REUSE_FRAME 
-#endif 
- 
-#ifndef CYTHON_PROFILE_REUSE_FRAME 
-  #define CYTHON_PROFILE_REUSE_FRAME 0 
-#endif 
- 
-#if CYTHON_PROFILE || CYTHON_TRACE 
- 
-  #include "compile.h" 
-  #include "frameobject.h" 
-  #include "traceback.h" 
- 
-  #if CYTHON_PROFILE_REUSE_FRAME 
-    #define CYTHON_FRAME_MODIFIER static 
+#ifndef CYTHON_TRACE
+  #define CYTHON_TRACE 0
+#endif
+
+#if CYTHON_TRACE
+  #undef CYTHON_PROFILE_REUSE_FRAME
+#endif
+
+#ifndef CYTHON_PROFILE_REUSE_FRAME
+  #define CYTHON_PROFILE_REUSE_FRAME 0
+#endif
+
+#if CYTHON_PROFILE || CYTHON_TRACE
+
+  #include "compile.h"
+  #include "frameobject.h"
+  #include "traceback.h"
+
+  #if CYTHON_PROFILE_REUSE_FRAME
+    #define CYTHON_FRAME_MODIFIER static
     #define CYTHON_FRAME_DEL(frame)
-  #else 
-    #define CYTHON_FRAME_MODIFIER 
+  #else
+    #define CYTHON_FRAME_MODIFIER
     #define CYTHON_FRAME_DEL(frame) Py_CLEAR(frame)
-  #endif 
- 
+  #endif
+
   #define __Pyx_TraceDeclarations                                         \
       static PyCodeObject *$frame_code_cname = NULL;                      \
       CYTHON_FRAME_MODIFIER PyFrameObject *$frame_cname = NULL;           \
       int __Pyx_use_tracing = 0;
- 
+
   #define __Pyx_TraceFrameInit(codeobj)                                   \
       if (codeobj) $frame_code_cname = (PyCodeObject*) codeobj;
 
@@ -118,7 +118,7 @@
           __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&$frame_code_cname, &$frame_cname, tstate, funcname, srcfile, firstlineno);  \
           if (unlikely(__Pyx_use_tracing < 0)) goto_error;                               \
       }                                                                                  \
-  } 
+  }
   #else
   #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil, goto_error)             \
   {   PyThreadState* tstate = PyThreadState_GET();                                       \
@@ -128,8 +128,8 @@
       }                                                                                  \
   }
   #endif
- 
-  #define __Pyx_TraceException()                                                           \ 
+
+  #define __Pyx_TraceException()                                                           \
   if (likely(!__Pyx_use_tracing)); else {                                                  \
       PyThreadState* tstate = __Pyx_PyThreadState_Current;                                 \
       if (__Pyx_IsTracing(tstate, 0, 1)) {                                                 \
@@ -144,9 +144,9 @@
               Py_DECREF(exc_info);                                                         \
           }                                                                                \
           __Pyx_LeaveTracing(tstate);                                                      \
-      }                                                                                    \ 
-  } 
- 
+      }                                                                                    \
+  }
+
   static void __Pyx_call_return_trace_func(PyThreadState *tstate, PyFrameObject *frame, PyObject *result) {
       PyObject *type, *value, *traceback;
       __Pyx_ErrFetchInState(tstate, &type, &value, &traceback);
@@ -158,8 +158,8 @@
       CYTHON_FRAME_DEL(frame);
       __Pyx_LeaveTracing(tstate);
       __Pyx_ErrRestoreInState(tstate, type, value, traceback);
-  } 
- 
+  }
+
   #ifdef WITH_THREAD
   #define __Pyx_TraceReturn(result, nogil)                                                \
   if (likely(!__Pyx_use_tracing)); else {                                                 \
@@ -190,21 +190,21 @@
   }
   #endif
 
-  static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno); /*proto*/ 
+  static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno); /*proto*/
   static int __Pyx_TraceSetupAndCall(PyCodeObject** code, PyFrameObject** frame, PyThreadState* tstate, const char *funcname, const char *srcfile, int firstlineno); /*proto*/
- 
-#else 
- 
-  #define __Pyx_TraceDeclarations 
+
+#else
+
+  #define __Pyx_TraceDeclarations
   #define __Pyx_TraceFrameInit(codeobj)
   // mark error label as used to avoid compiler warnings
   #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil, goto_error)   if ((1)); else goto_error;
-  #define __Pyx_TraceException() 
+  #define __Pyx_TraceException()
   #define __Pyx_TraceReturn(result, nogil)
- 
-#endif /* CYTHON_PROFILE */ 
- 
-#if CYTHON_TRACE 
+
+#endif /* CYTHON_PROFILE */
+
+#if CYTHON_TRACE
   // see call_trace_protected() in CPython's ceval.c
   static int __Pyx_call_line_trace_func(PyThreadState *tstate, PyFrameObject *frame, int lineno) {
       int ret;
@@ -264,60 +264,60 @@
           // XXX https://github.com/cython/cython/issues/2274                              \
           if (unlikely(ret)) { fprintf(stderr, "cython: line_trace_func returned %d\n", ret); } \
       }                                                                                    \
-  } 
+  }
   #endif
-#else 
+#else
   // mark error label as used to avoid compiler warnings
   #define __Pyx_TraceLine(lineno, nogil, goto_error)   if ((1)); else goto_error;
-#endif 
- 
-/////////////// Profile /////////////// 
-//@substitute: naming 
- 
-#if CYTHON_PROFILE 
- 
-static int __Pyx_TraceSetupAndCall(PyCodeObject** code, 
-                                   PyFrameObject** frame, 
+#endif
+
+/////////////// Profile ///////////////
+//@substitute: naming
+
+#if CYTHON_PROFILE
+
+static int __Pyx_TraceSetupAndCall(PyCodeObject** code,
+                                   PyFrameObject** frame,
                                    PyThreadState* tstate,
-                                   const char *funcname, 
-                                   const char *srcfile, 
-                                   int firstlineno) { 
+                                   const char *funcname,
+                                   const char *srcfile,
+                                   int firstlineno) {
     PyObject *type, *value, *traceback;
-    int retval; 
-    if (*frame == NULL || !CYTHON_PROFILE_REUSE_FRAME) { 
-        if (*code == NULL) { 
-            *code = __Pyx_createFrameCodeObject(funcname, srcfile, firstlineno); 
-            if (*code == NULL) return 0; 
-        } 
-        *frame = PyFrame_New( 
-            tstate,                          /*PyThreadState *tstate*/ 
-            *code,                           /*PyCodeObject *code*/ 
-            $moddict_cname,                  /*PyObject *globals*/ 
-            0                                /*PyObject *locals*/ 
-        ); 
-        if (*frame == NULL) return 0; 
-        if (CYTHON_TRACE && (*frame)->f_trace == NULL) { 
-            // this enables "f_lineno" lookup, at least in CPython ... 
-            Py_INCREF(Py_None); 
-            (*frame)->f_trace = Py_None; 
-        } 
-#if PY_VERSION_HEX < 0x030400B1 
-    } else { 
-        (*frame)->f_tstate = tstate; 
-#endif 
-    } 
+    int retval;
+    if (*frame == NULL || !CYTHON_PROFILE_REUSE_FRAME) {
+        if (*code == NULL) {
+            *code = __Pyx_createFrameCodeObject(funcname, srcfile, firstlineno);
+            if (*code == NULL) return 0;
+        }
+        *frame = PyFrame_New(
+            tstate,                          /*PyThreadState *tstate*/
+            *code,                           /*PyCodeObject *code*/
+            $moddict_cname,                  /*PyObject *globals*/
+            0                                /*PyObject *locals*/
+        );
+        if (*frame == NULL) return 0;
+        if (CYTHON_TRACE && (*frame)->f_trace == NULL) {
+            // this enables "f_lineno" lookup, at least in CPython ...
+            Py_INCREF(Py_None);
+            (*frame)->f_trace = Py_None;
+        }
+#if PY_VERSION_HEX < 0x030400B1
+    } else {
+        (*frame)->f_tstate = tstate;
+#endif
+    }
     __Pyx_PyFrame_SetLineNumber(*frame, firstlineno);
 
     retval = 1;
     __Pyx_EnterTracing(tstate);
     __Pyx_ErrFetchInState(tstate, &type, &value, &traceback);
 
-    #if CYTHON_TRACE 
-    if (tstate->c_tracefunc) 
+    #if CYTHON_TRACE
+    if (tstate->c_tracefunc)
         retval = tstate->c_tracefunc(tstate->c_traceobj, *frame, PyTrace_CALL, NULL) == 0;
     if (retval && tstate->c_profilefunc)
-    #endif 
-        retval = tstate->c_profilefunc(tstate->c_profileobj, *frame, PyTrace_CALL, NULL) == 0; 
+    #endif
+        retval = tstate->c_profilefunc(tstate->c_profileobj, *frame, PyTrace_CALL, NULL) == 0;
 
     __Pyx_LeaveTracing(tstate);
     if (retval) {
@@ -329,9 +329,9 @@ static int __Pyx_TraceSetupAndCall(PyCodeObject** code,
         Py_XDECREF(traceback);
         return -1;
     }
-} 
- 
-static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno) { 
+}
+
+static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno) {
     PyCodeObject *py_code = 0;
 
 #if PY_MAJOR_VERSION >= 3
@@ -341,38 +341,38 @@ static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const cha
         py_code->co_flags |= CO_OPTIMIZED | CO_NEWLOCALS;
     }
 #else
-    PyObject *py_srcfile = 0; 
-    PyObject *py_funcname = 0; 
- 
-    py_funcname = PyString_FromString(funcname); 
+    PyObject *py_srcfile = 0;
+    PyObject *py_funcname = 0;
+
+    py_funcname = PyString_FromString(funcname);
     if (unlikely(!py_funcname)) goto bad;
-    py_srcfile = PyString_FromString(srcfile); 
+    py_srcfile = PyString_FromString(srcfile);
     if (unlikely(!py_srcfile)) goto bad;
- 
-    py_code = PyCode_New( 
-        0,                /*int argcount,*/ 
-        0,                /*int nlocals,*/ 
-        0,                /*int stacksize,*/ 
+
+    py_code = PyCode_New(
+        0,                /*int argcount,*/
+        0,                /*int nlocals,*/
+        0,                /*int stacksize,*/
         // make CPython use a fresh dict for "f_locals" at need (see GH #1836)
         CO_OPTIMIZED | CO_NEWLOCALS,  /*int flags,*/
-        $empty_bytes,     /*PyObject *code,*/ 
-        $empty_tuple,     /*PyObject *consts,*/ 
-        $empty_tuple,     /*PyObject *names,*/ 
-        $empty_tuple,     /*PyObject *varnames,*/ 
-        $empty_tuple,     /*PyObject *freevars,*/ 
-        $empty_tuple,     /*PyObject *cellvars,*/ 
-        py_srcfile,       /*PyObject *filename,*/ 
-        py_funcname,      /*PyObject *name,*/ 
-        firstlineno,      /*int firstlineno,*/ 
-        $empty_bytes      /*PyObject *lnotab*/ 
-    ); 
- 
-bad: 
-    Py_XDECREF(py_srcfile); 
-    Py_XDECREF(py_funcname); 
+        $empty_bytes,     /*PyObject *code,*/
+        $empty_tuple,     /*PyObject *consts,*/
+        $empty_tuple,     /*PyObject *names,*/
+        $empty_tuple,     /*PyObject *varnames,*/
+        $empty_tuple,     /*PyObject *freevars,*/
+        $empty_tuple,     /*PyObject *cellvars,*/
+        py_srcfile,       /*PyObject *filename,*/
+        py_funcname,      /*PyObject *name,*/
+        firstlineno,      /*int firstlineno,*/
+        $empty_bytes      /*PyObject *lnotab*/
+    );
+
+bad:
+    Py_XDECREF(py_srcfile);
+    Py_XDECREF(py_funcname);
 #endif
- 
-    return py_code; 
-} 
- 
-#endif /* CYTHON_PROFILE */ 
+
+    return py_code;
+}
+
+#endif /* CYTHON_PROFILE */
diff --git a/contrib/tools/cython/Cython/Utility/StringTools.c b/contrib/tools/cython/Cython/Utility/StringTools.c
index c60333c2a3..2fdae812a0 100644
--- a/contrib/tools/cython/Cython/Utility/StringTools.c
+++ b/contrib/tools/cython/Cython/Utility/StringTools.c
@@ -1,71 +1,71 @@
- 
-//////////////////// IncludeStringH.proto //////////////////// 
- 
-#include <string.h> 
- 
-//////////////////// IncludeCppStringH.proto //////////////////// 
- 
-#include <string> 
- 
-//////////////////// InitStrings.proto //////////////////// 
- 
-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ 
- 
-//////////////////// InitStrings //////////////////// 
- 
-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { 
-    while (t->p) { 
-        #if PY_MAJOR_VERSION < 3 
-        if (t->is_unicode) { 
-            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); 
-        } else if (t->intern) { 
-            *t->p = PyString_InternFromString(t->s); 
-        } else { 
-            *t->p = PyString_FromStringAndSize(t->s, t->n - 1); 
-        } 
-        #else  /* Python 3+ has unicode identifiers */ 
-        if (t->is_unicode | t->is_str) { 
-            if (t->intern) { 
-                *t->p = PyUnicode_InternFromString(t->s); 
-            } else if (t->encoding) { 
-                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); 
-            } else { 
-                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); 
-            } 
-        } else { 
-            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); 
-        } 
-        #endif 
-        if (!*t->p) 
-            return -1; 
+
+//////////////////// IncludeStringH.proto ////////////////////
+
+#include <string.h>
+
+//////////////////// IncludeCppStringH.proto ////////////////////
+
+#include <string>
+
+//////////////////// InitStrings.proto ////////////////////
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
+
+//////////////////// InitStrings ////////////////////
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION < 3
+        if (t->is_unicode) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        #else  /* Python 3+ has unicode identifiers */
+        if (t->is_unicode | t->is_str) {
+            if (t->intern) {
+                *t->p = PyUnicode_InternFromString(t->s);
+            } else if (t->encoding) {
+                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+            } else {
+                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            }
+        } else {
+            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+        }
+        #endif
+        if (!*t->p)
+            return -1;
         // initialise cached hash value
         if (PyObject_Hash(*t->p) == -1)
             return -1;
-        ++t; 
-    } 
-    return 0; 
-} 
- 
-//////////////////// BytesContains.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character); /*proto*/ 
- 
-//////////////////// BytesContains //////////////////// 
+        ++t;
+    }
+    return 0;
+}
+
+//////////////////// BytesContains.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character); /*proto*/
+
+//////////////////// BytesContains ////////////////////
 //@requires: IncludeStringH
- 
-static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character) { 
-    const Py_ssize_t length = PyBytes_GET_SIZE(bytes); 
-    char* char_start = PyBytes_AS_STRING(bytes); 
+
+static CYTHON_INLINE int __Pyx_BytesContains(PyObject* bytes, char character) {
+    const Py_ssize_t length = PyBytes_GET_SIZE(bytes);
+    char* char_start = PyBytes_AS_STRING(bytes);
     return memchr(char_start, (unsigned char)character, (size_t)length) != NULL;
-} 
- 
- 
-//////////////////// PyUCS4InUnicode.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character); /*proto*/ 
- 
-//////////////////// PyUCS4InUnicode //////////////////// 
- 
+}
+
+
+//////////////////// PyUCS4InUnicode.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character); /*proto*/
+
+//////////////////// PyUCS4InUnicode ////////////////////
+
 #if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
 
 #if PY_VERSION_HEX < 0x03090000
@@ -103,26 +103,26 @@ static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t
 }
 #endif
 
-static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) { 
-#if CYTHON_PEP393_ENABLED 
-    const int kind = PyUnicode_KIND(unicode); 
+static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) {
+#if CYTHON_PEP393_ENABLED
+    const int kind = PyUnicode_KIND(unicode);
     #ifdef PyUnicode_WCHAR_KIND
     if (likely(kind != PyUnicode_WCHAR_KIND))
     #endif
     {
-        Py_ssize_t i; 
-        const void* udata = PyUnicode_DATA(unicode); 
-        const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); 
-        for (i=0; i < length; i++) { 
-            if (unlikely(character == PyUnicode_READ(kind, udata, i))) return 1; 
-        } 
-        return 0; 
-    } 
+        Py_ssize_t i;
+        const void* udata = PyUnicode_DATA(unicode);
+        const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
+        for (i=0; i < length; i++) {
+            if (unlikely(character == PyUnicode_READ(kind, udata, i))) return 1;
+        }
+        return 0;
+    }
 #elif PY_VERSION_HEX >= 0x03090000
     #error Cannot use "UChar in Unicode" in Python 3.9 without PEP-393 unicode strings.
 #elif !defined(PyUnicode_AS_UNICODE)
     #error Cannot use "UChar in Unicode" in Python < 3.9 without Py_UNICODE support.
-#endif 
+#endif
 
 #if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE))
 #if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2
@@ -138,20 +138,20 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch
             __Pyx_PyUnicode_AS_UNICODE(unicode),
             __Pyx_PyUnicode_GET_SIZE(unicode),
             character);
- 
-    } 
+
+    }
 #endif
-} 
- 
- 
-//////////////////// PyUnicodeContains.proto //////////////////// 
- 
+}
+
+
+//////////////////// PyUnicodeContains.proto ////////////////////
+
 static CYTHON_INLINE int __Pyx_PyUnicode_ContainsTF(PyObject* substring, PyObject* text, int eq) {
-    int result = PyUnicode_Contains(text, substring); 
-    return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); 
-} 
- 
- 
+    int result = PyUnicode_Contains(text, substring);
+    return unlikely(result < 0) ? result : (result == (eq == Py_EQ));
+}
+
+
 //////////////////// CStringEquals.proto ////////////////////
 
 static CYTHON_INLINE int __Pyx_StrEq(const char *, const char *); /*proto*/
@@ -164,65 +164,65 @@ static CYTHON_INLINE int __Pyx_StrEq(const char *s1, const char *s2) {
 }
 
 
-//////////////////// StrEquals.proto //////////////////// 
-//@requires: BytesEquals 
-//@requires: UnicodeEquals 
- 
-#if PY_MAJOR_VERSION >= 3 
-#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals 
-#else 
-#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals 
-#endif 
- 
- 
-//////////////////// UnicodeEquals.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/ 
- 
-//////////////////// UnicodeEquals //////////////////// 
-//@requires: BytesEquals 
- 
-static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { 
-#if CYTHON_COMPILING_IN_PYPY 
-    return PyObject_RichCompareBool(s1, s2, equals); 
-#else 
-#if PY_MAJOR_VERSION < 3 
-    PyObject* owned_ref = NULL; 
-#endif 
-    int s1_is_unicode, s2_is_unicode; 
-    if (s1 == s2) { 
-        /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */ 
-        goto return_eq; 
-    } 
-    s1_is_unicode = PyUnicode_CheckExact(s1); 
-    s2_is_unicode = PyUnicode_CheckExact(s2); 
-#if PY_MAJOR_VERSION < 3 
-    if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { 
-        owned_ref = PyUnicode_FromObject(s2); 
-        if (unlikely(!owned_ref)) 
-            return -1; 
-        s2 = owned_ref; 
-        s2_is_unicode = 1; 
-    } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { 
-        owned_ref = PyUnicode_FromObject(s1); 
-        if (unlikely(!owned_ref)) 
-            return -1; 
-        s1 = owned_ref; 
-        s1_is_unicode = 1; 
-    } else if (((!s2_is_unicode) & (!s1_is_unicode))) { 
-        return __Pyx_PyBytes_Equals(s1, s2, equals); 
-    } 
-#endif 
-    if (s1_is_unicode & s2_is_unicode) { 
-        Py_ssize_t length; 
-        int kind; 
-        void *data1, *data2; 
-        if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) 
-            return -1; 
-        length = __Pyx_PyUnicode_GET_LENGTH(s1); 
-        if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { 
-            goto return_ne; 
-        } 
+//////////////////// StrEquals.proto ////////////////////
+//@requires: BytesEquals
+//@requires: UnicodeEquals
+
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
+#else
+#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
+#endif
+
+
+//////////////////// UnicodeEquals.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
+
+//////////////////// UnicodeEquals ////////////////////
+//@requires: BytesEquals
+
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+#if PY_MAJOR_VERSION < 3
+    PyObject* owned_ref = NULL;
+#endif
+    int s1_is_unicode, s2_is_unicode;
+    if (s1 == s2) {
+        /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */
+        goto return_eq;
+    }
+    s1_is_unicode = PyUnicode_CheckExact(s1);
+    s2_is_unicode = PyUnicode_CheckExact(s2);
+#if PY_MAJOR_VERSION < 3
+    if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
+        owned_ref = PyUnicode_FromObject(s2);
+        if (unlikely(!owned_ref))
+            return -1;
+        s2 = owned_ref;
+        s2_is_unicode = 1;
+    } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
+        owned_ref = PyUnicode_FromObject(s1);
+        if (unlikely(!owned_ref))
+            return -1;
+        s1 = owned_ref;
+        s1_is_unicode = 1;
+    } else if (((!s2_is_unicode) & (!s1_is_unicode))) {
+        return __Pyx_PyBytes_Equals(s1, s2, equals);
+    }
+#endif
+    if (s1_is_unicode & s2_is_unicode) {
+        Py_ssize_t length;
+        int kind;
+        void *data1, *data2;
+        if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0))
+            return -1;
+        length = __Pyx_PyUnicode_GET_LENGTH(s1);
+        if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
+            goto return_ne;
+        }
 #if CYTHON_USE_UNICODE_INTERNALS
         {
             Py_hash_t hash1, hash2;
@@ -238,81 +238,81 @@ static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int
             }
         }
 #endif
-        // len(s1) == len(s2) >= 1  (empty string is interned, and "s1 is not s2") 
-        kind = __Pyx_PyUnicode_KIND(s1); 
-        if (kind != __Pyx_PyUnicode_KIND(s2)) { 
-            goto return_ne; 
-        } 
-        data1 = __Pyx_PyUnicode_DATA(s1); 
-        data2 = __Pyx_PyUnicode_DATA(s2); 
-        if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { 
-            goto return_ne; 
-        } else if (length == 1) { 
-            goto return_eq; 
-        } else { 
-            int result = memcmp(data1, data2, (size_t)(length * kind)); 
-            #if PY_MAJOR_VERSION < 3 
-            Py_XDECREF(owned_ref); 
-            #endif 
-            return (equals == Py_EQ) ? (result == 0) : (result != 0); 
-        } 
-    } else if ((s1 == Py_None) & s2_is_unicode) { 
-        goto return_ne; 
-    } else if ((s2 == Py_None) & s1_is_unicode) { 
-        goto return_ne; 
-    } else { 
-        int result; 
-        PyObject* py_result = PyObject_RichCompare(s1, s2, equals); 
+        // len(s1) == len(s2) >= 1  (empty string is interned, and "s1 is not s2")
+        kind = __Pyx_PyUnicode_KIND(s1);
+        if (kind != __Pyx_PyUnicode_KIND(s2)) {
+            goto return_ne;
+        }
+        data1 = __Pyx_PyUnicode_DATA(s1);
+        data2 = __Pyx_PyUnicode_DATA(s2);
+        if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
+            goto return_ne;
+        } else if (length == 1) {
+            goto return_eq;
+        } else {
+            int result = memcmp(data1, data2, (size_t)(length * kind));
+            #if PY_MAJOR_VERSION < 3
+            Py_XDECREF(owned_ref);
+            #endif
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & s2_is_unicode) {
+        goto return_ne;
+    } else if ((s2 == Py_None) & s1_is_unicode) {
+        goto return_ne;
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
         #if PY_MAJOR_VERSION < 3
         Py_XDECREF(owned_ref);
         #endif
-        if (!py_result) 
-            return -1; 
-        result = __Pyx_PyObject_IsTrue(py_result); 
-        Py_DECREF(py_result); 
-        return result; 
-    } 
-return_eq: 
-    #if PY_MAJOR_VERSION < 3 
-    Py_XDECREF(owned_ref); 
-    #endif 
-    return (equals == Py_EQ); 
-return_ne: 
-    #if PY_MAJOR_VERSION < 3 
-    Py_XDECREF(owned_ref); 
-    #endif 
-    return (equals == Py_NE); 
-#endif 
-} 
- 
- 
-//////////////////// BytesEquals.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/ 
- 
-//////////////////// BytesEquals //////////////////// 
-//@requires: IncludeStringH 
- 
-static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { 
-#if CYTHON_COMPILING_IN_PYPY 
-    return PyObject_RichCompareBool(s1, s2, equals); 
-#else 
-    if (s1 == s2) { 
-        /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */ 
-        return (equals == Py_EQ); 
-    } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { 
-        const char *ps1, *ps2; 
-        Py_ssize_t length = PyBytes_GET_SIZE(s1); 
-        if (length != PyBytes_GET_SIZE(s2)) 
-            return (equals == Py_NE); 
-        // len(s1) == len(s2) >= 1  (empty string is interned, and "s1 is not s2") 
-        ps1 = PyBytes_AS_STRING(s1); 
-        ps2 = PyBytes_AS_STRING(s2); 
-        if (ps1[0] != ps2[0]) { 
-            return (equals == Py_NE); 
-        } else if (length == 1) { 
-            return (equals == Py_EQ); 
-        } else { 
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+return_eq:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(owned_ref);
+    #endif
+    return (equals == Py_EQ);
+return_ne:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(owned_ref);
+    #endif
+    return (equals == Py_NE);
+#endif
+}
+
+
+//////////////////// BytesEquals.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
+
+//////////////////// BytesEquals ////////////////////
+//@requires: IncludeStringH
+
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+    if (s1 == s2) {
+        /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */
+        return (equals == Py_EQ);
+    } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
+        const char *ps1, *ps2;
+        Py_ssize_t length = PyBytes_GET_SIZE(s1);
+        if (length != PyBytes_GET_SIZE(s2))
+            return (equals == Py_NE);
+        // len(s1) == len(s2) >= 1  (empty string is interned, and "s1 is not s2")
+        ps1 = PyBytes_AS_STRING(s1);
+        ps2 = PyBytes_AS_STRING(s2);
+        if (ps1[0] != ps2[0]) {
+            return (equals == Py_NE);
+        } else if (length == 1) {
+            return (equals == Py_EQ);
+        } else {
             int result;
 #if CYTHON_USE_UNICODE_INTERNALS
             Py_hash_t hash1, hash2;
@@ -323,117 +323,117 @@ static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int eq
             }
 #endif
             result = memcmp(ps1, ps2, (size_t)length);
-            return (equals == Py_EQ) ? (result == 0) : (result != 0); 
-        } 
-    } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { 
-        return (equals == Py_NE); 
-    } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { 
-        return (equals == Py_NE); 
-    } else { 
-        int result; 
-        PyObject* py_result = PyObject_RichCompare(s1, s2, equals); 
-        if (!py_result) 
-            return -1; 
-        result = __Pyx_PyObject_IsTrue(py_result); 
-        Py_DECREF(py_result); 
-        return result; 
-    } 
-#endif 
-} 
- 
-//////////////////// GetItemIntByteArray.proto //////////////////// 
- 
-#define __Pyx_GetItemInt_ByteArray(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_GetItemInt_ByteArray_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \ 
-    (PyErr_SetString(PyExc_IndexError, "bytearray index out of range"), -1)) 
- 
-static CYTHON_INLINE int __Pyx_GetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i, 
-                                                         int wraparound, int boundscheck); 
- 
-//////////////////// GetItemIntByteArray //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_GetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i, 
-                                                         int wraparound, int boundscheck) { 
-    Py_ssize_t length; 
-    if (wraparound | boundscheck) { 
-        length = PyByteArray_GET_SIZE(string); 
-        if (wraparound & unlikely(i < 0)) i += length; 
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
+        return (equals == Py_NE);
+    } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
+        return (equals == Py_NE);
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+#endif
+}
+
+//////////////////// GetItemIntByteArray.proto ////////////////////
+
+#define __Pyx_GetItemInt_ByteArray(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_GetItemInt_ByteArray_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \
+    (PyErr_SetString(PyExc_IndexError, "bytearray index out of range"), -1))
+
+static CYTHON_INLINE int __Pyx_GetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i,
+                                                         int wraparound, int boundscheck);
+
+//////////////////// GetItemIntByteArray ////////////////////
+
+static CYTHON_INLINE int __Pyx_GetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i,
+                                                         int wraparound, int boundscheck) {
+    Py_ssize_t length;
+    if (wraparound | boundscheck) {
+        length = PyByteArray_GET_SIZE(string);
+        if (wraparound & unlikely(i < 0)) i += length;
         if ((!boundscheck) || likely(__Pyx_is_valid_index(i, length))) {
-            return (unsigned char) (PyByteArray_AS_STRING(string)[i]); 
-        } else { 
-            PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); 
-            return -1; 
-        } 
-    } else { 
-        return (unsigned char) (PyByteArray_AS_STRING(string)[i]); 
-    } 
-} 
- 
- 
-//////////////////// SetItemIntByteArray.proto //////////////////// 
- 
-#define __Pyx_SetItemInt_ByteArray(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_SetItemInt_ByteArray_Fast(o, (Py_ssize_t)i, v, wraparound, boundscheck) : \ 
-    (PyErr_SetString(PyExc_IndexError, "bytearray index out of range"), -1)) 
- 
-static CYTHON_INLINE int __Pyx_SetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i, unsigned char v, 
-                                                         int wraparound, int boundscheck); 
- 
-//////////////////// SetItemIntByteArray //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_SetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i, unsigned char v, 
-                                                         int wraparound, int boundscheck) { 
-    Py_ssize_t length; 
-    if (wraparound | boundscheck) { 
-        length = PyByteArray_GET_SIZE(string); 
-        if (wraparound & unlikely(i < 0)) i += length; 
+            return (unsigned char) (PyByteArray_AS_STRING(string)[i]);
+        } else {
+            PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
+            return -1;
+        }
+    } else {
+        return (unsigned char) (PyByteArray_AS_STRING(string)[i]);
+    }
+}
+
+
+//////////////////// SetItemIntByteArray.proto ////////////////////
+
+#define __Pyx_SetItemInt_ByteArray(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_SetItemInt_ByteArray_Fast(o, (Py_ssize_t)i, v, wraparound, boundscheck) : \
+    (PyErr_SetString(PyExc_IndexError, "bytearray index out of range"), -1))
+
+static CYTHON_INLINE int __Pyx_SetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i, unsigned char v,
+                                                         int wraparound, int boundscheck);
+
+//////////////////// SetItemIntByteArray ////////////////////
+
+static CYTHON_INLINE int __Pyx_SetItemInt_ByteArray_Fast(PyObject* string, Py_ssize_t i, unsigned char v,
+                                                         int wraparound, int boundscheck) {
+    Py_ssize_t length;
+    if (wraparound | boundscheck) {
+        length = PyByteArray_GET_SIZE(string);
+        if (wraparound & unlikely(i < 0)) i += length;
         if ((!boundscheck) || likely(__Pyx_is_valid_index(i, length))) {
-            PyByteArray_AS_STRING(string)[i] = (char) v; 
-            return 0; 
-        } else { 
-            PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); 
-            return -1; 
-        } 
-    } else { 
-        PyByteArray_AS_STRING(string)[i] = (char) v; 
-        return 0; 
-    } 
-} 
- 
- 
-//////////////////// GetItemIntUnicode.proto //////////////////// 
- 
-#define __Pyx_GetItemInt_Unicode(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ 
-    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ 
-    __Pyx_GetItemInt_Unicode_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \ 
-    (PyErr_SetString(PyExc_IndexError, "string index out of range"), (Py_UCS4)-1)) 
- 
-static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i, 
-                                                           int wraparound, int boundscheck); 
- 
-//////////////////// GetItemIntUnicode //////////////////// 
- 
-static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i, 
-                                                           int wraparound, int boundscheck) { 
-    Py_ssize_t length; 
-    if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return (Py_UCS4)-1; 
-    if (wraparound | boundscheck) { 
-        length = __Pyx_PyUnicode_GET_LENGTH(ustring); 
-        if (wraparound & unlikely(i < 0)) i += length; 
+            PyByteArray_AS_STRING(string)[i] = (char) v;
+            return 0;
+        } else {
+            PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
+            return -1;
+        }
+    } else {
+        PyByteArray_AS_STRING(string)[i] = (char) v;
+        return 0;
+    }
+}
+
+
+//////////////////// GetItemIntUnicode.proto ////////////////////
+
+#define __Pyx_GetItemInt_Unicode(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
+    __Pyx_GetItemInt_Unicode_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \
+    (PyErr_SetString(PyExc_IndexError, "string index out of range"), (Py_UCS4)-1))
+
+static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i,
+                                                           int wraparound, int boundscheck);
+
+//////////////////// GetItemIntUnicode ////////////////////
+
+static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i,
+                                                           int wraparound, int boundscheck) {
+    Py_ssize_t length;
+    if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return (Py_UCS4)-1;
+    if (wraparound | boundscheck) {
+        length = __Pyx_PyUnicode_GET_LENGTH(ustring);
+        if (wraparound & unlikely(i < 0)) i += length;
         if ((!boundscheck) || likely(__Pyx_is_valid_index(i, length))) {
-            return __Pyx_PyUnicode_READ_CHAR(ustring, i); 
-        } else { 
-            PyErr_SetString(PyExc_IndexError, "string index out of range"); 
-            return (Py_UCS4)-1; 
-        } 
-    } else { 
-        return __Pyx_PyUnicode_READ_CHAR(ustring, i); 
-    } 
-} 
- 
- 
+            return __Pyx_PyUnicode_READ_CHAR(ustring, i);
+        } else {
+            PyErr_SetString(PyExc_IndexError, "string index out of range");
+            return (Py_UCS4)-1;
+        }
+    } else {
+        return __Pyx_PyUnicode_READ_CHAR(ustring, i);
+    }
+}
+
+
 /////////////// decode_c_string_utf16.proto ///////////////
 
 static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) {
@@ -449,37 +449,37 @@ static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_s
     return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
 }
 
-/////////////// decode_cpp_string.proto /////////////// 
-//@requires: IncludeCppStringH 
-//@requires: decode_c_bytes 
- 
-static CYTHON_INLINE PyObject* __Pyx_decode_cpp_string( 
+/////////////// decode_cpp_string.proto ///////////////
+//@requires: IncludeCppStringH
+//@requires: decode_c_bytes
+
+static CYTHON_INLINE PyObject* __Pyx_decode_cpp_string(
          std::string_view cppstring, Py_ssize_t start, Py_ssize_t stop,
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { 
-    return __Pyx_decode_c_bytes( 
-        cppstring.data(), cppstring.size(), start, stop, encoding, errors, decode_func); 
-} 
- 
-/////////////// decode_c_string.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_decode_c_string( 
-         const char* cstring, Py_ssize_t start, Py_ssize_t stop, 
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); 
- 
-/////////////// decode_c_string /////////////// 
-//@requires: IncludeStringH 
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
+    return __Pyx_decode_c_bytes(
+        cppstring.data(), cppstring.size(), start, stop, encoding, errors, decode_func);
+}
+
+/////////////// decode_c_string.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
+         const char* cstring, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
+
+/////////////// decode_c_string ///////////////
+//@requires: IncludeStringH
 //@requires: decode_c_string_utf16
 //@substitute: naming
- 
-/* duplicate code to avoid calling strlen() if start >= 0 and stop >= 0 */ 
-static CYTHON_INLINE PyObject* __Pyx_decode_c_string( 
-         const char* cstring, Py_ssize_t start, Py_ssize_t stop, 
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { 
-    Py_ssize_t length; 
-    if (unlikely((start < 0) | (stop < 0))) { 
+
+/* duplicate code to avoid calling strlen() if start >= 0 and stop >= 0 */
+static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
+         const char* cstring, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
+    Py_ssize_t length;
+    if (unlikely((start < 0) | (stop < 0))) {
         size_t slen = strlen(cstring);
         if (unlikely(slen > (size_t) PY_SSIZE_T_MAX)) {
             PyErr_SetString(PyExc_OverflowError,
@@ -487,145 +487,145 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
             return NULL;
         }
         length = (Py_ssize_t) slen;
-        if (start < 0) { 
-            start += length; 
-            if (start < 0) 
-                start = 0; 
-        } 
-        if (stop < 0) 
-            stop += length; 
-    } 
+        if (start < 0) {
+            start += length;
+            if (start < 0)
+                start = 0;
+        }
+        if (stop < 0)
+            stop += length;
+    }
     if (unlikely(stop <= start))
         return __Pyx_NewRef($empty_unicode);
-    length = stop - start; 
-    cstring += start; 
-    if (decode_func) { 
-        return decode_func(cstring, length, errors); 
-    } else { 
-        return PyUnicode_Decode(cstring, length, encoding, errors); 
-    } 
-} 
- 
-/////////////// decode_c_bytes.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( 
-         const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, 
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); 
- 
-/////////////// decode_c_bytes /////////////// 
+    length = stop - start;
+    cstring += start;
+    if (decode_func) {
+        return decode_func(cstring, length, errors);
+    } else {
+        return PyUnicode_Decode(cstring, length, encoding, errors);
+    }
+}
+
+/////////////// decode_c_bytes.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
+         const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
+
+/////////////// decode_c_bytes ///////////////
 //@requires: decode_c_string_utf16
 //@substitute: naming
- 
-static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( 
-         const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, 
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { 
-    if (unlikely((start < 0) | (stop < 0))) { 
-        if (start < 0) { 
-            start += length; 
-            if (start < 0) 
-                start = 0; 
-        } 
-        if (stop < 0) 
-            stop += length; 
-    } 
-    if (stop > length) 
-        stop = length; 
+
+static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
+         const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
+    if (unlikely((start < 0) | (stop < 0))) {
+        if (start < 0) {
+            start += length;
+            if (start < 0)
+                start = 0;
+        }
+        if (stop < 0)
+            stop += length;
+    }
+    if (stop > length)
+        stop = length;
     if (unlikely(stop <= start))
         return __Pyx_NewRef($empty_unicode);
-    length = stop - start; 
-    cstring += start; 
-    if (decode_func) { 
-        return decode_func(cstring, length, errors); 
-    } else { 
-        return PyUnicode_Decode(cstring, length, encoding, errors); 
-    } 
-} 
- 
-/////////////// decode_bytes.proto /////////////// 
-//@requires: decode_c_bytes 
- 
-static CYTHON_INLINE PyObject* __Pyx_decode_bytes( 
-         PyObject* string, Py_ssize_t start, Py_ssize_t stop, 
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { 
-    return __Pyx_decode_c_bytes( 
-        PyBytes_AS_STRING(string), PyBytes_GET_SIZE(string), 
-        start, stop, encoding, errors, decode_func); 
-} 
- 
-/////////////// decode_bytearray.proto /////////////// 
-//@requires: decode_c_bytes 
- 
-static CYTHON_INLINE PyObject* __Pyx_decode_bytearray( 
-         PyObject* string, Py_ssize_t start, Py_ssize_t stop, 
-         const char* encoding, const char* errors, 
-         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { 
-    return __Pyx_decode_c_bytes( 
-        PyByteArray_AS_STRING(string), PyByteArray_GET_SIZE(string), 
-        start, stop, encoding, errors, decode_func); 
-} 
- 
-/////////////// PyUnicode_Substring.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( 
-            PyObject* text, Py_ssize_t start, Py_ssize_t stop); 
- 
-/////////////// PyUnicode_Substring /////////////// 
+    length = stop - start;
+    cstring += start;
+    if (decode_func) {
+        return decode_func(cstring, length, errors);
+    } else {
+        return PyUnicode_Decode(cstring, length, encoding, errors);
+    }
+}
+
+/////////////// decode_bytes.proto ///////////////
+//@requires: decode_c_bytes
+
+static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
+         PyObject* string, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
+    return __Pyx_decode_c_bytes(
+        PyBytes_AS_STRING(string), PyBytes_GET_SIZE(string),
+        start, stop, encoding, errors, decode_func);
+}
+
+/////////////// decode_bytearray.proto ///////////////
+//@requires: decode_c_bytes
+
+static CYTHON_INLINE PyObject* __Pyx_decode_bytearray(
+         PyObject* string, Py_ssize_t start, Py_ssize_t stop,
+         const char* encoding, const char* errors,
+         PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) {
+    return __Pyx_decode_c_bytes(
+        PyByteArray_AS_STRING(string), PyByteArray_GET_SIZE(string),
+        start, stop, encoding, errors, decode_func);
+}
+
+/////////////// PyUnicode_Substring.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
+            PyObject* text, Py_ssize_t start, Py_ssize_t stop);
+
+/////////////// PyUnicode_Substring ///////////////
 //@substitute: naming
- 
-static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( 
-            PyObject* text, Py_ssize_t start, Py_ssize_t stop) { 
-    Py_ssize_t length; 
-    if (unlikely(__Pyx_PyUnicode_READY(text) == -1)) return NULL; 
-    length = __Pyx_PyUnicode_GET_LENGTH(text); 
-    if (start < 0) { 
-        start += length; 
-        if (start < 0) 
-            start = 0; 
-    } 
-    if (stop < 0) 
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
+            PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
+    Py_ssize_t length;
+    if (unlikely(__Pyx_PyUnicode_READY(text) == -1)) return NULL;
+    length = __Pyx_PyUnicode_GET_LENGTH(text);
+    if (start < 0) {
+        start += length;
+        if (start < 0)
+            start = 0;
+    }
+    if (stop < 0)
         stop += length;
-    else if (stop > length) 
-        stop = length; 
+    else if (stop > length)
+        stop = length;
     if (stop <= start)
         return __Pyx_NewRef($empty_unicode);
-#if CYTHON_PEP393_ENABLED 
-    return PyUnicode_FromKindAndData(PyUnicode_KIND(text), 
-        PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); 
-#else 
-    return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start); 
-#endif 
-} 
- 
- 
-/////////////// py_unicode_istitle.proto /////////////// 
- 
-// Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter 
-// additionally allows character that comply with Py_UNICODE_ISUPPER() 
- 
-#if PY_VERSION_HEX < 0x030200A2 
-static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar) 
-#else 
-static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UCS4 uchar) 
-#endif 
-{ 
-    return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar); 
-} 
- 
- 
-/////////////// unicode_tailmatch.proto /////////////// 
- 
+#if CYTHON_PEP393_ENABLED
+    return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
+        PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
+#else
+    return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
+#endif
+}
+
+
+/////////////// py_unicode_istitle.proto ///////////////
+
+// Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter
+// additionally allows character that comply with Py_UNICODE_ISUPPER()
+
+#if PY_VERSION_HEX < 0x030200A2
+static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar)
+#else
+static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UCS4 uchar)
+#endif
+{
+    return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar);
+}
+
+
+/////////////// unicode_tailmatch.proto ///////////////
+
 static int __Pyx_PyUnicode_Tailmatch(
     PyObject* s, PyObject* substr, Py_ssize_t start, Py_ssize_t end, int direction); /*proto*/
 
 /////////////// unicode_tailmatch ///////////////
 
-// Python's unicode.startswith() and unicode.endswith() support a 
-// tuple of prefixes/suffixes, whereas it's much more common to 
-// test for a single unicode string. 
- 
+// Python's unicode.startswith() and unicode.endswith() support a
+// tuple of prefixes/suffixes, whereas it's much more common to
+// test for a single unicode string.
+
 static int __Pyx_PyUnicode_TailmatchTuple(PyObject* s, PyObject* substrings,
                                           Py_ssize_t start, Py_ssize_t end, int direction) {
     Py_ssize_t i, count = PyTuple_GET_SIZE(substrings);
@@ -634,16 +634,16 @@ static int __Pyx_PyUnicode_TailmatchTuple(PyObject* s, PyObject* substrings,
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
         result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substrings, i),
                                      start, end, direction);
-#else 
+#else
         PyObject* sub = PySequence_ITEM(substrings, i);
         if (unlikely(!sub)) return -1;
         result = PyUnicode_Tailmatch(s, sub, start, end, direction);
         Py_DECREF(sub);
-#endif 
+#endif
         if (result) {
             return (int) result;
-        } 
-    } 
+        }
+    }
     return 0;
 }
 
@@ -653,11 +653,11 @@ static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
         return __Pyx_PyUnicode_TailmatchTuple(s, substr, start, end, direction);
     }
     return (int) PyUnicode_Tailmatch(s, substr, start, end, direction);
-} 
- 
- 
-/////////////// bytes_tailmatch.proto /////////////// 
- 
+}
+
+
+/////////////// bytes_tailmatch.proto ///////////////
+
 static int __Pyx_PyBytes_SingleTailmatch(PyObject* self, PyObject* arg,
                                          Py_ssize_t start, Py_ssize_t end, int direction); /*proto*/
 static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr,
@@ -667,60 +667,60 @@ static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr,
 
 static int __Pyx_PyBytes_SingleTailmatch(PyObject* self, PyObject* arg,
                                          Py_ssize_t start, Py_ssize_t end, int direction) {
-    const char* self_ptr = PyBytes_AS_STRING(self); 
-    Py_ssize_t self_len = PyBytes_GET_SIZE(self); 
-    const char* sub_ptr; 
-    Py_ssize_t sub_len; 
-    int retval; 
- 
-    Py_buffer view; 
-    view.obj = NULL; 
- 
-    if ( PyBytes_Check(arg) ) { 
-        sub_ptr = PyBytes_AS_STRING(arg); 
-        sub_len = PyBytes_GET_SIZE(arg); 
-    } 
-#if PY_MAJOR_VERSION < 3 
-    // Python 2.x allows mixing unicode and str 
-    else if ( PyUnicode_Check(arg) ) { 
+    const char* self_ptr = PyBytes_AS_STRING(self);
+    Py_ssize_t self_len = PyBytes_GET_SIZE(self);
+    const char* sub_ptr;
+    Py_ssize_t sub_len;
+    int retval;
+
+    Py_buffer view;
+    view.obj = NULL;
+
+    if ( PyBytes_Check(arg) ) {
+        sub_ptr = PyBytes_AS_STRING(arg);
+        sub_len = PyBytes_GET_SIZE(arg);
+    }
+#if PY_MAJOR_VERSION < 3
+    // Python 2.x allows mixing unicode and str
+    else if ( PyUnicode_Check(arg) ) {
         return (int) PyUnicode_Tailmatch(self, arg, start, end, direction);
-    } 
-#endif 
-    else { 
-        if (unlikely(PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) == -1)) 
-            return -1; 
-        sub_ptr = (const char*) view.buf; 
-        sub_len = view.len; 
-    } 
- 
-    if (end > self_len) 
-        end = self_len; 
-    else if (end < 0) 
-        end += self_len; 
-    if (end < 0) 
-        end = 0; 
-    if (start < 0) 
-        start += self_len; 
-    if (start < 0) 
-        start = 0; 
- 
-    if (direction > 0) { 
-        /* endswith */ 
-        if (end-sub_len > start) 
-            start = end - sub_len; 
-    } 
- 
-    if (start + sub_len <= end) 
-        retval = !memcmp(self_ptr+start, sub_ptr, (size_t)sub_len); 
-    else 
-        retval = 0; 
- 
-    if (view.obj) 
-        PyBuffer_Release(&view); 
- 
-    return retval; 
-} 
- 
+    }
+#endif
+    else {
+        if (unlikely(PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) == -1))
+            return -1;
+        sub_ptr = (const char*) view.buf;
+        sub_len = view.len;
+    }
+
+    if (end > self_len)
+        end = self_len;
+    else if (end < 0)
+        end += self_len;
+    if (end < 0)
+        end = 0;
+    if (start < 0)
+        start += self_len;
+    if (start < 0)
+        start = 0;
+
+    if (direction > 0) {
+        /* endswith */
+        if (end-sub_len > start)
+            start = end - sub_len;
+    }
+
+    if (start + sub_len <= end)
+        retval = !memcmp(self_ptr+start, sub_ptr, (size_t)sub_len);
+    else
+        retval = 0;
+
+    if (view.obj)
+        PyBuffer_Release(&view);
+
+    return retval;
+}
+
 static int __Pyx_PyBytes_TailmatchTuple(PyObject* self, PyObject* substrings,
                                         Py_ssize_t start, Py_ssize_t end, int direction) {
     Py_ssize_t i, count = PyTuple_GET_SIZE(substrings);
@@ -729,102 +729,102 @@ static int __Pyx_PyBytes_TailmatchTuple(PyObject* self, PyObject* substrings,
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
         result = __Pyx_PyBytes_SingleTailmatch(self, PyTuple_GET_ITEM(substrings, i),
                                                start, end, direction);
-#else 
+#else
         PyObject* sub = PySequence_ITEM(substrings, i);
         if (unlikely(!sub)) return -1;
         result = __Pyx_PyBytes_SingleTailmatch(self, sub, start, end, direction);
         Py_DECREF(sub);
-#endif 
+#endif
         if (result) {
             return result;
-        } 
-    } 
+        }
+    }
     return 0;
 }
- 
+
 static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr,
                                    Py_ssize_t start, Py_ssize_t end, int direction) {
     if (unlikely(PyTuple_Check(substr))) {
         return __Pyx_PyBytes_TailmatchTuple(self, substr, start, end, direction);
     }
 
-    return __Pyx_PyBytes_SingleTailmatch(self, substr, start, end, direction); 
-} 
- 
- 
-/////////////// str_tailmatch.proto /////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start, 
+    return __Pyx_PyBytes_SingleTailmatch(self, substr, start, end, direction);
+}
+
+
+/////////////// str_tailmatch.proto ///////////////
+
+static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
                                                Py_ssize_t end, int direction); /*proto*/
- 
-/////////////// str_tailmatch /////////////// 
-//@requires: bytes_tailmatch 
-//@requires: unicode_tailmatch 
- 
-static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start, 
-                                               Py_ssize_t end, int direction) 
-{ 
-    // We do not use a C compiler macro here to avoid "unused function" 
-    // warnings for the *_Tailmatch() function that is not being used in 
-    // the specific CPython version.  The C compiler will generate the same 
-    // code anyway, and will usually just remove the unused function. 
-    if (PY_MAJOR_VERSION < 3) 
-        return __Pyx_PyBytes_Tailmatch(self, arg, start, end, direction); 
-    else 
-        return __Pyx_PyUnicode_Tailmatch(self, arg, start, end, direction); 
-} 
- 
- 
-/////////////// bytes_index.proto /////////////// 
- 
+
+/////////////// str_tailmatch ///////////////
+//@requires: bytes_tailmatch
+//@requires: unicode_tailmatch
+
+static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
+                                               Py_ssize_t end, int direction)
+{
+    // We do not use a C compiler macro here to avoid "unused function"
+    // warnings for the *_Tailmatch() function that is not being used in
+    // the specific CPython version.  The C compiler will generate the same
+    // code anyway, and will usually just remove the unused function.
+    if (PY_MAJOR_VERSION < 3)
+        return __Pyx_PyBytes_Tailmatch(self, arg, start, end, direction);
+    else
+        return __Pyx_PyUnicode_Tailmatch(self, arg, start, end, direction);
+}
+
+
+/////////////// bytes_index.proto ///////////////
+
 static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds); /*proto*/
 
 /////////////// bytes_index ///////////////
 
-static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) { 
+static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
     if (index < 0)
         index += PyBytes_GET_SIZE(bytes);
-    if (check_bounds) { 
-        Py_ssize_t size = PyBytes_GET_SIZE(bytes); 
+    if (check_bounds) {
+        Py_ssize_t size = PyBytes_GET_SIZE(bytes);
         if (unlikely(!__Pyx_is_valid_index(index, size))) {
-            PyErr_SetString(PyExc_IndexError, "string index out of range"); 
+            PyErr_SetString(PyExc_IndexError, "string index out of range");
             return (char) -1;
-        } 
-    } 
-    return PyBytes_AS_STRING(bytes)[index]; 
-} 
- 
- 
-//////////////////// StringJoin.proto //////////////////// 
- 
-#if PY_MAJOR_VERSION < 3 
-#define __Pyx_PyString_Join __Pyx_PyBytes_Join 
-#define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v)) 
-#else 
-#define __Pyx_PyString_Join PyUnicode_Join 
-#define __Pyx_PyBaseString_Join PyUnicode_Join 
-#endif 
- 
-#if CYTHON_COMPILING_IN_CPYTHON 
-    #if PY_MAJOR_VERSION < 3 
-    #define __Pyx_PyBytes_Join _PyString_Join 
-    #else 
-    #define __Pyx_PyBytes_Join _PyBytes_Join 
-    #endif 
-#else 
-static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); /*proto*/ 
-#endif 
- 
- 
-//////////////////// StringJoin //////////////////// 
- 
-#if !CYTHON_COMPILING_IN_CPYTHON 
-static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) { 
-    return PyObject_CallMethodObjArgs(sep, PYIDENT("join"), values, NULL); 
-} 
-#endif 
- 
- 
+        }
+    }
+    return PyBytes_AS_STRING(bytes)[index];
+}
+
+
+//////////////////// StringJoin.proto ////////////////////
+
+#if PY_MAJOR_VERSION < 3
+#define __Pyx_PyString_Join __Pyx_PyBytes_Join
+#define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v))
+#else
+#define __Pyx_PyString_Join PyUnicode_Join
+#define __Pyx_PyBaseString_Join PyUnicode_Join
+#endif
+
+#if CYTHON_COMPILING_IN_CPYTHON
+    #if PY_MAJOR_VERSION < 3
+    #define __Pyx_PyBytes_Join _PyString_Join
+    #else
+    #define __Pyx_PyBytes_Join _PyBytes_Join
+    #endif
+#else
+static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); /*proto*/
+#endif
+
+
+//////////////////// StringJoin ////////////////////
+
+#if !CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) {
+    return PyObject_CallMethodObjArgs(sep, PYIDENT("join"), values, NULL);
+}
+#endif
+
+
 /////////////// JoinPyUnicode.proto ///////////////
 
 static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
@@ -989,82 +989,82 @@ done_or_error:
 }
 
 
-//////////////////// ByteArrayAppendObject.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyObject* value); 
- 
-//////////////////// ByteArrayAppendObject //////////////////// 
-//@requires: ByteArrayAppend 
- 
-static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyObject* value) { 
-    Py_ssize_t ival; 
-#if PY_MAJOR_VERSION < 3 
-    if (unlikely(PyString_Check(value))) { 
-        if (unlikely(PyString_GET_SIZE(value) != 1)) { 
-            PyErr_SetString(PyExc_ValueError, "string must be of size 1"); 
-            return -1; 
-        } 
-        ival = (unsigned char) (PyString_AS_STRING(value)[0]); 
-    } else 
+//////////////////// ByteArrayAppendObject.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyObject* value);
+
+//////////////////// ByteArrayAppendObject ////////////////////
+//@requires: ByteArrayAppend
+
+static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyObject* value) {
+    Py_ssize_t ival;
+#if PY_MAJOR_VERSION < 3
+    if (unlikely(PyString_Check(value))) {
+        if (unlikely(PyString_GET_SIZE(value) != 1)) {
+            PyErr_SetString(PyExc_ValueError, "string must be of size 1");
+            return -1;
+        }
+        ival = (unsigned char) (PyString_AS_STRING(value)[0]);
+    } else
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+    if (likely(PyLong_CheckExact(value)) && likely(Py_SIZE(value) == 1 || Py_SIZE(value) == 0)) {
+        if (Py_SIZE(value) == 0) {
+            ival = 0;
+        } else {
+            ival = ((PyLongObject*)value)->ob_digit[0];
+            if (unlikely(ival > 255)) goto bad_range;
+        }
+    } else
 #endif
-#if CYTHON_USE_PYLONG_INTERNALS 
-    if (likely(PyLong_CheckExact(value)) && likely(Py_SIZE(value) == 1 || Py_SIZE(value) == 0)) { 
-        if (Py_SIZE(value) == 0) { 
-            ival = 0; 
-        } else { 
-            ival = ((PyLongObject*)value)->ob_digit[0]; 
-            if (unlikely(ival > 255)) goto bad_range; 
-        } 
-    } else 
-#endif 
-    { 
-        // CPython calls PyNumber_Index() internally 
-        ival = __Pyx_PyIndex_AsSsize_t(value); 
+    {
+        // CPython calls PyNumber_Index() internally
+        ival = __Pyx_PyIndex_AsSsize_t(value);
         if (unlikely(!__Pyx_is_valid_index(ival, 256))) {
-            if (ival == -1 && PyErr_Occurred()) 
-                return -1; 
-            goto bad_range; 
-        } 
-    } 
-    return __Pyx_PyByteArray_Append(bytearray, ival); 
-bad_range: 
-    PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); 
-    return -1; 
-} 
- 
-//////////////////// ByteArrayAppend.proto //////////////////// 
- 
-static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value); 
- 
-//////////////////// ByteArrayAppend //////////////////// 
-//@requires: ObjectHandling.c::PyObjectCallMethod1 
- 
-static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value) { 
-    PyObject *pyval, *retval; 
-#if CYTHON_COMPILING_IN_CPYTHON 
+            if (ival == -1 && PyErr_Occurred())
+                return -1;
+            goto bad_range;
+        }
+    }
+    return __Pyx_PyByteArray_Append(bytearray, ival);
+bad_range:
+    PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+    return -1;
+}
+
+//////////////////// ByteArrayAppend.proto ////////////////////
+
+static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value);
+
+//////////////////// ByteArrayAppend ////////////////////
+//@requires: ObjectHandling.c::PyObjectCallMethod1
+
+static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value) {
+    PyObject *pyval, *retval;
+#if CYTHON_COMPILING_IN_CPYTHON
     if (likely(__Pyx_is_valid_index(value, 256))) {
-        Py_ssize_t n = Py_SIZE(bytearray); 
-        if (likely(n != PY_SSIZE_T_MAX)) { 
-            if (unlikely(PyByteArray_Resize(bytearray, n + 1) < 0)) 
-                return -1; 
-            PyByteArray_AS_STRING(bytearray)[n] = value; 
-            return 0; 
-        } 
-    } else { 
-        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); 
-        return -1; 
-    } 
-#endif 
-    pyval = PyInt_FromLong(value); 
-    if (unlikely(!pyval)) 
-        return -1; 
-    retval = __Pyx_PyObject_CallMethod1(bytearray, PYIDENT("append"), pyval); 
-    Py_DECREF(pyval); 
-    if (unlikely(!retval)) 
-        return -1; 
-    Py_DECREF(retval); 
-    return 0; 
-} 
+        Py_ssize_t n = Py_SIZE(bytearray);
+        if (likely(n != PY_SSIZE_T_MAX)) {
+            if (unlikely(PyByteArray_Resize(bytearray, n + 1) < 0))
+                return -1;
+            PyByteArray_AS_STRING(bytearray)[n] = value;
+            return 0;
+        }
+    } else {
+        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+        return -1;
+    }
+#endif
+    pyval = PyInt_FromLong(value);
+    if (unlikely(!pyval))
+        return -1;
+    retval = __Pyx_PyObject_CallMethod1(bytearray, PYIDENT("append"), pyval);
+    Py_DECREF(pyval);
+    if (unlikely(!retval))
+        return -1;
+    Py_DECREF(retval);
+    return 0;
+}
 
 
 //////////////////// PyObjectFormat.proto ////////////////////
diff --git a/contrib/tools/cython/Cython/Utility/TestCyUtilityLoader.pyx b/contrib/tools/cython/Cython/Utility/TestCyUtilityLoader.pyx
index 4457e4b98f..00e7a7681b 100644
--- a/contrib/tools/cython/Cython/Utility/TestCyUtilityLoader.pyx
+++ b/contrib/tools/cython/Cython/Utility/TestCyUtilityLoader.pyx
@@ -1,8 +1,8 @@
-########## TestCyUtilityLoader ########## 
-#@requires: OtherUtility 
- 
-test {{cy_loader}} impl 
- 
- 
-########## OtherUtility ########## 
-req {{cy_loader}} impl 
+########## TestCyUtilityLoader ##########
+#@requires: OtherUtility
+
+test {{cy_loader}} impl
+
+
+########## OtherUtility ##########
+req {{cy_loader}} impl
diff --git a/contrib/tools/cython/Cython/Utility/TestCythonScope.pyx b/contrib/tools/cython/Cython/Utility/TestCythonScope.pyx
index 5cd582d227..f585be2983 100644
--- a/contrib/tools/cython/Cython/Utility/TestCythonScope.pyx
+++ b/contrib/tools/cython/Cython/Utility/TestCythonScope.pyx
@@ -1,64 +1,64 @@
-########## TestClass ########## 
-# These utilities are for testing purposes 
- 
-cdef extern from *: 
-    cdef object __pyx_test_dep(object) 
- 
-@cname('__pyx_TestClass') 
-cdef class TestClass(object): 
-    cdef public int value 
- 
-    def __init__(self, int value): 
-        self.value = value 
- 
-    def __str__(self): 
-        return 'TestClass(%d)' % self.value 
- 
-    cdef cdef_method(self, int value): 
-        print 'Hello from cdef_method', value 
- 
-    cpdef cpdef_method(self, int value): 
-        print 'Hello from cpdef_method', value 
- 
-    def def_method(self, int value): 
-        print 'Hello from def_method', value 
- 
-    @cname('cdef_cname') 
-    cdef cdef_cname_method(self, int value): 
-        print "Hello from cdef_cname_method", value 
- 
-    @cname('cpdef_cname') 
-    cpdef cpdef_cname_method(self, int value): 
-        print "Hello from cpdef_cname_method", value 
- 
-    @cname('def_cname') 
-    def def_cname_method(self, int value): 
-        print "Hello from def_cname_method", value 
- 
-@cname('__pyx_test_call_other_cy_util') 
-cdef test_call(obj): 
-    print 'test_call' 
-    __pyx_test_dep(obj) 
- 
-@cname('__pyx_TestClass_New') 
-cdef _testclass_new(int value): 
-    return TestClass(value) 
- 
-########### TestDep ########## 
- 
-@cname('__pyx_test_dep') 
-cdef test_dep(obj): 
-    print 'test_dep', obj 
- 
-########## TestScope ########## 
- 
-@cname('__pyx_testscope') 
-cdef object _testscope(int value): 
-    return "hello from cython scope, value=%d" % value 
- 
-########## View.TestScope ########## 
- 
-@cname('__pyx_view_testscope') 
-cdef object _testscope(int value): 
-    return "hello from cython.view scope, value=%d" % value 
- 
+########## TestClass ##########
+# These utilities are for testing purposes
+
+cdef extern from *:
+    cdef object __pyx_test_dep(object)
+
+@cname('__pyx_TestClass')
+cdef class TestClass(object):
+    cdef public int value
+
+    def __init__(self, int value):
+        self.value = value
+
+    def __str__(self):
+        return 'TestClass(%d)' % self.value
+
+    cdef cdef_method(self, int value):
+        print 'Hello from cdef_method', value
+
+    cpdef cpdef_method(self, int value):
+        print 'Hello from cpdef_method', value
+
+    def def_method(self, int value):
+        print 'Hello from def_method', value
+
+    @cname('cdef_cname')
+    cdef cdef_cname_method(self, int value):
+        print "Hello from cdef_cname_method", value
+
+    @cname('cpdef_cname')
+    cpdef cpdef_cname_method(self, int value):
+        print "Hello from cpdef_cname_method", value
+
+    @cname('def_cname')
+    def def_cname_method(self, int value):
+        print "Hello from def_cname_method", value
+
+@cname('__pyx_test_call_other_cy_util')
+cdef test_call(obj):
+    print 'test_call'
+    __pyx_test_dep(obj)
+
+@cname('__pyx_TestClass_New')
+cdef _testclass_new(int value):
+    return TestClass(value)
+
+########### TestDep ##########
+
+@cname('__pyx_test_dep')
+cdef test_dep(obj):
+    print 'test_dep', obj
+
+########## TestScope ##########
+
+@cname('__pyx_testscope')
+cdef object _testscope(int value):
+    return "hello from cython scope, value=%d" % value
+
+########## View.TestScope ##########
+
+@cname('__pyx_view_testscope')
+cdef object _testscope(int value):
+    return "hello from cython.view scope, value=%d" % value
+
diff --git a/contrib/tools/cython/Cython/Utility/TestUtilityLoader.c b/contrib/tools/cython/Cython/Utility/TestUtilityLoader.c
index ca277f1d3b..595305f211 100644
--- a/contrib/tools/cython/Cython/Utility/TestUtilityLoader.c
+++ b/contrib/tools/cython/Cython/Utility/TestUtilityLoader.c
@@ -1,12 +1,12 @@
-////////// TestUtilityLoader.proto ////////// 
-test {{loader}} prototype 
- 
-////////// TestUtilityLoader ////////// 
-//@requires: OtherUtility 
-test {{loader}} impl 
- 
-////////// OtherUtility.proto ////////// 
-req {{loader}} proto 
- 
-////////// OtherUtility ////////// 
-req {{loader}} impl 
+////////// TestUtilityLoader.proto //////////
+test {{loader}} prototype
+
+////////// TestUtilityLoader //////////
+//@requires: OtherUtility
+test {{loader}} impl
+
+////////// OtherUtility.proto //////////
+req {{loader}} proto
+
+////////// OtherUtility //////////
+req {{loader}} impl
diff --git a/contrib/tools/cython/Cython/Utility/TypeConversion.c b/contrib/tools/cython/Cython/Utility/TypeConversion.c
index 3ca4711b0a..7a7bf0f799 100644
--- a/contrib/tools/cython/Cython/Utility/TypeConversion.c
+++ b/contrib/tools/cython/Cython/Utility/TypeConversion.c
@@ -1,21 +1,21 @@
-/////////////// TypeConversions.proto /////////////// 
- 
-/* Type Conversion Predeclarations */ 
- 
+/////////////// TypeConversions.proto ///////////////
+
+/* Type Conversion Predeclarations */
+
 #define __Pyx_uchar_cast(c) ((unsigned char)c)
 #define __Pyx_long_cast(x) ((long)x)
 
-#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (    \ 
-    (sizeof(type) < sizeof(Py_ssize_t))  ||             \ 
-    (sizeof(type) > sizeof(Py_ssize_t) &&               \ 
-          likely(v < (type)PY_SSIZE_T_MAX ||            \ 
-                 v == (type)PY_SSIZE_T_MAX)  &&         \ 
-          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||       \ 
-                                v == (type)PY_SSIZE_T_MIN)))  ||  \ 
-    (sizeof(type) == sizeof(Py_ssize_t) &&              \ 
-          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||        \ 
-                               v == (type)PY_SSIZE_T_MAX)))  ) 
- 
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (    \
+    (sizeof(type) < sizeof(Py_ssize_t))  ||             \
+    (sizeof(type) > sizeof(Py_ssize_t) &&               \
+          likely(v < (type)PY_SSIZE_T_MAX ||            \
+                 v == (type)PY_SSIZE_T_MAX)  &&         \
+          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||       \
+                                v == (type)PY_SSIZE_T_MIN)))  ||  \
+    (sizeof(type) == sizeof(Py_ssize_t) &&              \
+          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||        \
+                               v == (type)PY_SSIZE_T_MAX)))  )
+
 static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
     // Optimisation from Section 14.2 "Bounds Checking" in
     //   https://www.agner.org/optimize/optimizing_cpp.pdf
@@ -47,21 +47,21 @@ static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
 
 static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
 static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
- 
-#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) 
-#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) 
-#define __Pyx_PyBytes_FromString        PyBytes_FromString 
-#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize 
-static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); 
- 
-#if PY_MAJOR_VERSION < 3 
-    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString 
-    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize 
-#else 
-    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString 
-    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize 
-#endif 
- 
+
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString        PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+
+#if PY_MAJOR_VERSION < 3
+    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+
 #define __Pyx_PyBytes_AsWritableString(s)     ((char*) PyBytes_AS_STRING(s))
 #define __Pyx_PyBytes_AsWritableSString(s)    ((signed char*) PyBytes_AS_STRING(s))
 #define __Pyx_PyBytes_AsWritableUString(s)    ((unsigned char*) PyBytes_AS_STRING(s))
@@ -78,39 +78,39 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
 #define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
 #define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
 #define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
- 
+
 // There used to be a Py_UNICODE_strlen() in CPython 3.x, but it is deprecated since Py3.3.
 static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
-    const Py_UNICODE *u_end = u; 
-    while (*u_end++) ; 
-    return (size_t)(u_end - u - 1); 
-} 
- 
-#define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) 
-#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode 
-#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode 
- 
+    const Py_UNICODE *u_end = u;
+    while (*u_end++) ;
+    return (size_t)(u_end - u - 1);
+}
+
+#define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
+
 #define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
 #define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
 static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); 
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
 static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
 static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
- 
+
 #define __Pyx_PySequence_Tuple(obj) \
     (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
 
-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); 
-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); 
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
 static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
- 
+
 #if CYTHON_ASSUME_SAFE_MACROS
-#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) 
-#else 
-#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) 
-#endif 
-#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) 
- 
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+
 #if PY_MAJOR_VERSION >= 3
 #define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
 #else
@@ -118,101 +118,101 @@ static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
 #endif
 #define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
 
-#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 
-static int __Pyx_sys_getdefaultencoding_not_ascii; 
-static int __Pyx_init_sys_getdefaultencoding_params(void) { 
-    PyObject* sys; 
-    PyObject* default_encoding = NULL; 
-    PyObject* ascii_chars_u = NULL; 
-    PyObject* ascii_chars_b = NULL; 
-    const char* default_encoding_c; 
-    sys = PyImport_ImportModule("sys"); 
-    if (!sys) goto bad; 
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    PyObject* ascii_chars_u = NULL;
+    PyObject* ascii_chars_b = NULL;
+    const char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
     default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
-    Py_DECREF(sys); 
-    if (!default_encoding) goto bad; 
-    default_encoding_c = PyBytes_AsString(default_encoding); 
-    if (!default_encoding_c) goto bad; 
-    if (strcmp(default_encoding_c, "ascii") == 0) { 
-        __Pyx_sys_getdefaultencoding_not_ascii = 0; 
-    } else { 
-        char ascii_chars[128]; 
-        int c; 
-        for (c = 0; c < 128; c++) { 
-            ascii_chars[c] = c; 
-        } 
-        __Pyx_sys_getdefaultencoding_not_ascii = 1; 
-        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); 
-        if (!ascii_chars_u) goto bad; 
-        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); 
-        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { 
-            PyErr_Format( 
-                PyExc_ValueError, 
-                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", 
-                default_encoding_c); 
-            goto bad; 
-        } 
-        Py_DECREF(ascii_chars_u); 
-        Py_DECREF(ascii_chars_b); 
-    } 
-    Py_DECREF(default_encoding); 
-    return 0; 
-bad: 
-    Py_XDECREF(default_encoding); 
-    Py_XDECREF(ascii_chars_u); 
-    Py_XDECREF(ascii_chars_b); 
-    return -1; 
-} 
-#endif
- 
-#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 
-#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) 
-#else 
-#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) 
- 
-// __PYX_DEFAULT_STRING_ENCODING is either a user provided string constant 
-// or we need to look it up here 
-#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 
-static char* __PYX_DEFAULT_STRING_ENCODING; 
- 
-static int __Pyx_init_sys_getdefaultencoding_params(void) { 
-    PyObject* sys; 
-    PyObject* default_encoding = NULL; 
-    char* default_encoding_c; 
- 
-    sys = PyImport_ImportModule("sys"); 
-    if (!sys) goto bad; 
-    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); 
-    Py_DECREF(sys); 
-    if (!default_encoding) goto bad; 
-    default_encoding_c = PyBytes_AsString(default_encoding); 
-    if (!default_encoding_c) goto bad; 
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    if (strcmp(default_encoding_c, "ascii") == 0) {
+        __Pyx_sys_getdefaultencoding_not_ascii = 0;
+    } else {
+        char ascii_chars[128];
+        int c;
+        for (c = 0; c < 128; c++) {
+            ascii_chars[c] = c;
+        }
+        __Pyx_sys_getdefaultencoding_not_ascii = 1;
+        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+        if (!ascii_chars_u) goto bad;
+        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+            PyErr_Format(
+                PyExc_ValueError,
+                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+                default_encoding_c);
+            goto bad;
+        }
+        Py_DECREF(ascii_chars_u);
+        Py_DECREF(ascii_chars_b);
+    }
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    Py_XDECREF(ascii_chars_u);
+    Py_XDECREF(ascii_chars_b);
+    return -1;
+}
+#endif
+
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+
+// __PYX_DEFAULT_STRING_ENCODING is either a user provided string constant
+// or we need to look it up here
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    char* default_encoding_c;
+
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
     __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
-    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; 
-    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); 
-    Py_DECREF(default_encoding); 
-    return 0; 
-bad: 
-    Py_XDECREF(default_encoding); 
-    return -1; 
-} 
-#endif 
-#endif 
- 
-/////////////// TypeConversions /////////////// 
- 
-/* Type Conversion Functions */ 
- 
-static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { 
-    return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); 
-} 
- 
+    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    return -1;
+}
+#endif
+#endif
+
+/////////////// TypeConversions ///////////////
+
+/* Type Conversion Functions */
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+    return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
+}
+
 // Py3.7 returns a "const char*" for unicode strings
 static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
-    Py_ssize_t ignore; 
-    return __Pyx_PyObject_AsStringAndSize(o, &ignore); 
-} 
- 
+    Py_ssize_t ignore;
+    return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+
 #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
 #if !CYTHON_PEP393_ENABLED
 static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
@@ -221,7 +221,7 @@ static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *leng
     PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
     if (!defenc) return NULL;
     defenc_c = PyBytes_AS_STRING(defenc);
-#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
     {
         char* end = defenc_c + PyBytes_GET_SIZE(defenc);
         char* c;
@@ -230,10 +230,10 @@ static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *leng
                 // raise the error
                 PyUnicode_AsASCIIString(o);
                 return NULL;
-            } 
-        } 
+            }
+        }
     }
-#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/ 
+#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
     *length = PyBytes_GET_SIZE(defenc);
     return defenc_c;
 }
@@ -242,7 +242,7 @@ static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *leng
 
 static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
     if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL;
-#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
     if (likely(PyUnicode_IS_ASCII(o))) {
         // cached for the lifetime of the object
         *length = PyUnicode_GET_LENGTH(o);
@@ -252,9 +252,9 @@ static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py
         PyUnicode_AsASCIIString(o);
         return NULL;
     }
-#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ 
+#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
     return PyUnicode_AsUTF8AndSize(o, length);
-#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ 
+#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
 }
 #endif /* CYTHON_PEP393_ENABLED */
 #endif
@@ -268,33 +268,33 @@ static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_
 #endif
             PyUnicode_Check(o)) {
         return __Pyx_PyUnicode_AsStringAndSize(o, length);
-    } else 
-#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII  || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */ 
- 
+    } else
+#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII  || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
+
 #if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
-    if (PyByteArray_Check(o)) { 
-        *length = PyByteArray_GET_SIZE(o); 
-        return PyByteArray_AS_STRING(o); 
-    } else 
-#endif 
-    { 
-        char* result; 
-        int r = PyBytes_AsStringAndSize(o, &result, length); 
-        if (unlikely(r < 0)) { 
-            return NULL; 
-        } else { 
-            return result; 
-        } 
-    } 
-} 
- 
-/* Note: __Pyx_PyObject_IsTrue is written to minimize branching. */ 
-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { 
-   int is_true = x == Py_True; 
-   if (is_true | (x == Py_False) | (x == Py_None)) return is_true; 
-   else return PyObject_IsTrue(x); 
-} 
- 
+    if (PyByteArray_Check(o)) {
+        *length = PyByteArray_GET_SIZE(o);
+        return PyByteArray_AS_STRING(o);
+    } else
+#endif
+    {
+        char* result;
+        int r = PyBytes_AsStringAndSize(o, &result, length);
+        if (unlikely(r < 0)) {
+            return NULL;
+        } else {
+            return result;
+        }
+    }
+}
+
+/* Note: __Pyx_PyObject_IsTrue is written to minimize branching. */
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   int is_true = x == Py_True;
+   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+   else return PyObject_IsTrue(x);
+}
+
 static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
     int retval;
     if (unlikely(!x)) return -1;
@@ -327,68 +327,68 @@ static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const
 
 static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
 #if CYTHON_USE_TYPE_SLOTS
-  PyNumberMethods *m; 
+  PyNumberMethods *m;
 #endif
-  const char *name = NULL; 
-  PyObject *res = NULL; 
-#if PY_MAJOR_VERSION < 3 
+  const char *name = NULL;
+  PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
   if (likely(PyInt_Check(x) || PyLong_Check(x)))
-#else 
+#else
   if (likely(PyLong_Check(x)))
-#endif 
+#endif
     return __Pyx_NewRef(x);
 #if CYTHON_USE_TYPE_SLOTS
-  m = Py_TYPE(x)->tp_as_number; 
+  m = Py_TYPE(x)->tp_as_number;
   #if PY_MAJOR_VERSION < 3
-  if (m && m->nb_int) { 
-    name = "int"; 
+  if (m && m->nb_int) {
+    name = "int";
     res = m->nb_int(x);
-  } 
-  else if (m && m->nb_long) { 
-    name = "long"; 
+  }
+  else if (m && m->nb_long) {
+    name = "long";
     res = m->nb_long(x);
-  } 
+  }
   #else
   if (likely(m && m->nb_int)) {
-    name = "int"; 
+    name = "int";
     res = m->nb_int(x);
-  } 
+  }
   #endif
 #else
   if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) {
     res = PyNumber_Int(x);
   }
-#endif 
+#endif
   if (likely(res)) {
-#if PY_MAJOR_VERSION < 3 
+#if PY_MAJOR_VERSION < 3
     if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) {
-#else 
+#else
     if (unlikely(!PyLong_CheckExact(res))) {
-#endif 
+#endif
         return __Pyx_PyNumber_IntOrLongWrongResultType(res, name);
-    } 
-  } 
-  else if (!PyErr_Occurred()) { 
-    PyErr_SetString(PyExc_TypeError, 
-                    "an integer is required"); 
-  } 
-  return res; 
-} 
- 
+    }
+  }
+  else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_TypeError,
+                    "an integer is required");
+  }
+  return res;
+}
+
 {{py: from Cython.Utility import pylong_join }}
 
-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { 
-  Py_ssize_t ival; 
-  PyObject *x; 
-#if PY_MAJOR_VERSION < 3 
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject *x;
+#if PY_MAJOR_VERSION < 3
   if (likely(PyInt_CheckExact(b))) {
     if (sizeof(Py_ssize_t) >= sizeof(long))
         return PyInt_AS_LONG(b);
     else
         return PyInt_AsSsize_t(b);
   }
-#endif 
-  if (likely(PyLong_CheckExact(b))) { 
+#endif
+  if (likely(PyLong_CheckExact(b))) {
     #if CYTHON_USE_PYLONG_INTERNALS
     const digit* digits = ((PyLongObject*)b)->ob_digit;
     const Py_ssize_t size = Py_SIZE(b);
@@ -410,16 +410,16 @@ static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
          {{endfor}}
       }
     }
-    #endif 
-    return PyLong_AsSsize_t(b); 
-  } 
-  x = PyNumber_Index(b); 
-  if (!x) return -1; 
-  ival = PyInt_AsSsize_t(x); 
-  Py_DECREF(x); 
-  return ival; 
-} 
- 
+    #endif
+    return PyLong_AsSsize_t(b);
+  }
+  x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+
 
 static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) {
   if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) {
@@ -445,11 +445,11 @@ static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
 }
 
 
-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { 
-    return PyInt_FromSize_t(ival); 
-} 
- 
- 
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+    return PyInt_FromSize_t(ival);
+}
+
+
 /////////////// GCCDiagnostics.proto ///////////////
 
 // GCC diagnostic pragmas were introduced in GCC 4.6
@@ -484,23 +484,23 @@ bad:
 
 
 /////////////// FromPyCTupleUtility.proto ///////////////
-static {{struct_type_decl}} {{funcname}}(PyObject *); 
- 
+static {{struct_type_decl}} {{funcname}}(PyObject *);
+
 /////////////// FromPyCTupleUtility ///////////////
-static {{struct_type_decl}} {{funcname}}(PyObject * o) { 
-    {{struct_type_decl}} result; 
- 
+static {{struct_type_decl}} {{funcname}}(PyObject * o) {
+    {{struct_type_decl}} result;
+
     if (!PyTuple_Check(o) || PyTuple_GET_SIZE(o) != {{size}}) {
         PyErr_Format(PyExc_TypeError, "Expected %.16s of size %d, got %.200s", "a tuple", {{size}}, Py_TYPE(o)->tp_name);
-        goto bad; 
-    } 
- 
+        goto bad;
+    }
+
 #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
     {{for ix, component in enumerate(components):}}
         {{py:attr = "result.f%s" % ix}}
         {{attr}} = {{component.from_py_function}}(PyTuple_GET_ITEM(o, {{ix}}));
         if ({{component.error_condition(attr)}}) goto bad;
-    {{endfor}} 
+    {{endfor}}
 #else
     {
         PyObject *item;
@@ -513,12 +513,12 @@ static {{struct_type_decl}} {{funcname}}(PyObject * o) {
     {{endfor}}
     }
 #endif
- 
-    return result; 
-bad: 
-    return result; 
-} 
- 
+
+    return result;
+bad:
+    return result;
+}
+
 
 /////////////// UnicodeAsUCS4.proto ///////////////
 
@@ -557,24 +557,24 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyUnicode_AsPy_UCS4(PyObject* x) {
 }
 
 
-/////////////// ObjectAsUCS4.proto /////////////// 
+/////////////// ObjectAsUCS4.proto ///////////////
 //@requires: UnicodeAsUCS4
- 
+
 #define __Pyx_PyObject_AsPy_UCS4(x) \
     (likely(PyUnicode_Check(x)) ? __Pyx_PyUnicode_AsPy_UCS4(x) : __Pyx__PyObject_AsPy_UCS4(x))
 static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject*);
- 
-/////////////// ObjectAsUCS4 /////////////// 
- 
+
+/////////////// ObjectAsUCS4 ///////////////
+
 static Py_UCS4 __Pyx__PyObject_AsPy_UCS4_raise_error(long ival) {
    if (ival < 0) {
-       if (!PyErr_Occurred()) 
-           PyErr_SetString(PyExc_OverflowError, 
-                           "cannot convert negative value to Py_UCS4"); 
+       if (!PyErr_Occurred())
+           PyErr_SetString(PyExc_OverflowError,
+                           "cannot convert negative value to Py_UCS4");
    } else {
-       PyErr_SetString(PyExc_OverflowError, 
-                       "value too large to convert to Py_UCS4"); 
-   } 
+       PyErr_SetString(PyExc_OverflowError,
+                       "value too large to convert to Py_UCS4");
+   }
    return (Py_UCS4)-1;
 }
 
@@ -584,46 +584,46 @@ static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject* x) {
    if (unlikely(!__Pyx_is_valid_index(ival, 1114111 + 1))) {
        return __Pyx__PyObject_AsPy_UCS4_raise_error(ival);
    }
-   return (Py_UCS4)ival; 
-} 
- 
-
-/////////////// ObjectAsPyUnicode.proto /////////////// 
- 
-static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*); 
- 
-/////////////// ObjectAsPyUnicode /////////////// 
- 
-static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) { 
-    long ival; 
-    #if CYTHON_PEP393_ENABLED 
-    #if Py_UNICODE_SIZE > 2 
-    const long maxval = 1114111; 
-    #else 
-    const long maxval = 65535; 
-    #endif 
-    #else 
-    static long maxval = 0; 
-    #endif 
-    if (PyUnicode_Check(x)) { 
-        if (unlikely(__Pyx_PyUnicode_GET_LENGTH(x) != 1)) { 
-            PyErr_Format(PyExc_ValueError, 
-                         "only single character unicode strings can be converted to Py_UNICODE, " 
-                         "got length %" CYTHON_FORMAT_SSIZE_T "d", __Pyx_PyUnicode_GET_LENGTH(x)); 
-            return (Py_UNICODE)-1; 
-        } 
-        #if CYTHON_PEP393_ENABLED 
-        ival = PyUnicode_READ_CHAR(x, 0); 
-        #else 
-        return PyUnicode_AS_UNICODE(x)[0]; 
-        #endif 
-    } else { 
-        #if !CYTHON_PEP393_ENABLED 
-        if (unlikely(!maxval)) 
-            maxval = (long)PyUnicode_GetMax(); 
-        #endif 
-        ival = __Pyx_PyInt_As_long(x); 
-    } 
+   return (Py_UCS4)ival;
+}
+
+
+/////////////// ObjectAsPyUnicode.proto ///////////////
+
+static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*);
+
+/////////////// ObjectAsPyUnicode ///////////////
+
+static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) {
+    long ival;
+    #if CYTHON_PEP393_ENABLED
+    #if Py_UNICODE_SIZE > 2
+    const long maxval = 1114111;
+    #else
+    const long maxval = 65535;
+    #endif
+    #else
+    static long maxval = 0;
+    #endif
+    if (PyUnicode_Check(x)) {
+        if (unlikely(__Pyx_PyUnicode_GET_LENGTH(x) != 1)) {
+            PyErr_Format(PyExc_ValueError,
+                         "only single character unicode strings can be converted to Py_UNICODE, "
+                         "got length %" CYTHON_FORMAT_SSIZE_T "d", __Pyx_PyUnicode_GET_LENGTH(x));
+            return (Py_UNICODE)-1;
+        }
+        #if CYTHON_PEP393_ENABLED
+        ival = PyUnicode_READ_CHAR(x, 0);
+        #else
+        return PyUnicode_AS_UNICODE(x)[0];
+        #endif
+    } else {
+        #if !CYTHON_PEP393_ENABLED
+        if (unlikely(!maxval))
+            maxval = (long)PyUnicode_GetMax();
+        #endif
+        ival = __Pyx_PyInt_As_long(x);
+    }
     if (unlikely(!__Pyx_is_valid_index(ival, maxval + 1))) {
         if (ival < 0) {
             if (!PyErr_Occurred())
@@ -631,23 +631,23 @@ static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) {
                                 "cannot convert negative value to Py_UNICODE");
             return (Py_UNICODE)-1;
         } else {
-            PyErr_SetString(PyExc_OverflowError, 
+            PyErr_SetString(PyExc_OverflowError,
                             "value too large to convert to Py_UNICODE");
         }
-        return (Py_UNICODE)-1; 
-    } 
-    return (Py_UNICODE)ival; 
-} 
- 
- 
-/////////////// CIntToPy.proto /////////////// 
- 
-static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value); 
- 
-/////////////// CIntToPy /////////////// 
+        return (Py_UNICODE)-1;
+    }
+    return (Py_UNICODE)ival;
+}
+
+
+/////////////// CIntToPy.proto ///////////////
+
+static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value);
+
+/////////////// CIntToPy ///////////////
 //@requires: GCCDiagnostics
- 
-static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) { 
+
+static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) {
 #ifdef __Pyx_HAS_GCC_DIAGNOSTIC
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
@@ -656,35 +656,35 @@ static CYTHON_INLINE PyObject* {{TO_PY_FUNCTION}}({{TYPE}} value) {
 #ifdef __Pyx_HAS_GCC_DIAGNOSTIC
 #pragma GCC diagnostic pop
 #endif
-    const int is_unsigned = neg_one > const_zero; 
-    if (is_unsigned) { 
-        if (sizeof({{TYPE}}) < sizeof(long)) { 
-            return PyInt_FromLong((long) value); 
-        } else if (sizeof({{TYPE}}) <= sizeof(unsigned long)) { 
-            return PyLong_FromUnsignedLong((unsigned long) value); 
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof({{TYPE}}) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof({{TYPE}}) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
 #ifdef HAVE_LONG_LONG
         } else if (sizeof({{TYPE}}) <= sizeof(unsigned PY_LONG_LONG)) {
             return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
 #endif
-        } 
-    } else { 
-        if (sizeof({{TYPE}}) <= sizeof(long)) { 
-            return PyInt_FromLong((long) value); 
+        }
+    } else {
+        if (sizeof({{TYPE}}) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
 #ifdef HAVE_LONG_LONG
         } else if (sizeof({{TYPE}}) <= sizeof(PY_LONG_LONG)) {
             return PyLong_FromLongLong((PY_LONG_LONG) value);
 #endif
-        } 
-    } 
-    { 
-        int one = 1; int little = (int)*(unsigned char *)&one; 
-        unsigned char *bytes = (unsigned char *)&value; 
-        return _PyLong_FromByteArray(bytes, sizeof({{TYPE}}), 
-                                     little, !is_unsigned); 
-    } 
-} 
- 
- 
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof({{TYPE}}),
+                                     little, !is_unsigned);
+    }
+}
+
+
 /////////////// CIntToDigits ///////////////
 
 static const char DIGIT_PAIRS_10[2*10*10+1] = {
@@ -837,44 +837,44 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_FromDouble(double value) {
 #endif
 
 
-/////////////// CIntFromPyVerify /////////////// 
- 
-// see CIntFromPy 
-#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)       \ 
+/////////////// CIntFromPyVerify ///////////////
+
+// see CIntFromPy
+#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)       \
     __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
 
 #define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)   \
     __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
 
 #define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc) \
-    {                                                                     \ 
-        func_type value = func_value;                                     \ 
-        if (sizeof(target_type) < sizeof(func_type)) {                    \ 
-            if (unlikely(value != (func_type) (target_type) value)) {     \ 
-                func_type zero = 0;                                       \ 
+    {                                                                     \
+        func_type value = func_value;                                     \
+        if (sizeof(target_type) < sizeof(func_type)) {                    \
+            if (unlikely(value != (func_type) (target_type) value)) {     \
+                func_type zero = 0;                                       \
                 if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))  \
                     return (target_type) -1;                              \
-                if (is_unsigned && unlikely(value < zero))                \ 
-                    goto raise_neg_overflow;                              \ 
-                else                                                      \ 
-                    goto raise_overflow;                                  \ 
-            }                                                             \ 
-        }                                                                 \ 
-        return (target_type) value;                                       \ 
-    } 
- 
- 
-/////////////// CIntFromPy.proto /////////////// 
- 
-static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *); 
- 
-/////////////// CIntFromPy /////////////// 
-//@requires: CIntFromPyVerify 
+                if (is_unsigned && unlikely(value < zero))                \
+                    goto raise_neg_overflow;                              \
+                else                                                      \
+                    goto raise_overflow;                                  \
+            }                                                             \
+        }                                                                 \
+        return (target_type) value;                                       \
+    }
+
+
+/////////////// CIntFromPy.proto ///////////////
+
+static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *);
+
+/////////////// CIntFromPy ///////////////
+//@requires: CIntFromPyVerify
 //@requires: GCCDiagnostics
- 
+
 {{py: from Cython.Utility import pylong_join }}
 
-static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) { 
+static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
 #ifdef __Pyx_HAS_GCC_DIAGNOSTIC
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
@@ -883,25 +883,25 @@ static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
 #ifdef __Pyx_HAS_GCC_DIAGNOSTIC
 #pragma GCC diagnostic pop
 #endif
-    const int is_unsigned = neg_one > const_zero; 
-#if PY_MAJOR_VERSION < 3 
-    if (likely(PyInt_Check(x))) { 
-        if (sizeof({{TYPE}}) < sizeof(long)) { 
-            __PYX_VERIFY_RETURN_INT({{TYPE}}, long, PyInt_AS_LONG(x)) 
-        } else { 
-            long val = PyInt_AS_LONG(x); 
-            if (is_unsigned && unlikely(val < 0)) { 
-                goto raise_neg_overflow; 
-            } 
-            return ({{TYPE}}) val; 
-        } 
-    } else 
-#endif 
-    if (likely(PyLong_Check(x))) { 
-        if (is_unsigned) { 
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof({{TYPE}}) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT({{TYPE}}, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return ({{TYPE}}) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
 #if CYTHON_USE_PYLONG_INTERNALS
             const digit* digits = ((PyLongObject*)x)->ob_digit;
-            switch (Py_SIZE(x)) { 
+            switch (Py_SIZE(x)) {
                 case  0: return ({{TYPE}}) 0;
                 case  1: __PYX_VERIFY_RETURN_INT({{TYPE}}, digit, digits[0])
                 {{for _size in (2, 3, 4)}}
@@ -915,12 +915,12 @@ static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
                     }
                     break;
                 {{endfor}}
-            } 
-#endif 
+            }
+#endif
 #if CYTHON_COMPILING_IN_CPYTHON
-            if (unlikely(Py_SIZE(x) < 0)) { 
-                goto raise_neg_overflow; 
-            } 
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
 #else
             {
                 // misuse Py_False as a quick way to compare to a '0' int object in PyPy
@@ -931,18 +931,18 @@ static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
                     goto raise_neg_overflow;
             }
 #endif
-            if (sizeof({{TYPE}}) <= sizeof(unsigned long)) { 
+            if (sizeof({{TYPE}}) <= sizeof(unsigned long)) {
                 __PYX_VERIFY_RETURN_INT_EXC({{TYPE}}, unsigned long, PyLong_AsUnsignedLong(x))
 #ifdef HAVE_LONG_LONG
             } else if (sizeof({{TYPE}}) <= sizeof(unsigned PY_LONG_LONG)) {
                 __PYX_VERIFY_RETURN_INT_EXC({{TYPE}}, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
 #endif
-            } 
-        } else { 
+            }
+        } else {
             // signed
 #if CYTHON_USE_PYLONG_INTERNALS
             const digit* digits = ((PyLongObject*)x)->ob_digit;
-            switch (Py_SIZE(x)) { 
+            switch (Py_SIZE(x)) {
                 case  0: return ({{TYPE}}) 0;
                 case -1: __PYX_VERIFY_RETURN_INT({{TYPE}}, sdigit, (sdigit) (-(sdigit)digits[0]))
                 case  1: __PYX_VERIFY_RETURN_INT({{TYPE}},  digit, +digits[0])
@@ -959,59 +959,59 @@ static CYTHON_INLINE {{TYPE}} {{FROM_PY_FUNCTION}}(PyObject *x) {
                     break;
                 {{endfor}}
                 {{endfor}}
-            } 
-#endif 
-            if (sizeof({{TYPE}}) <= sizeof(long)) { 
+            }
+#endif
+            if (sizeof({{TYPE}}) <= sizeof(long)) {
                 __PYX_VERIFY_RETURN_INT_EXC({{TYPE}}, long, PyLong_AsLong(x))
 #ifdef HAVE_LONG_LONG
             } else if (sizeof({{TYPE}}) <= sizeof(PY_LONG_LONG)) {
                 __PYX_VERIFY_RETURN_INT_EXC({{TYPE}}, PY_LONG_LONG, PyLong_AsLongLong(x))
 #endif
-            } 
-        } 
-        { 
-#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) 
-            PyErr_SetString(PyExc_RuntimeError, 
-                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); 
-#else 
-            {{TYPE}} val; 
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            {{TYPE}} val;
             PyObject *v = __Pyx_PyNumber_IntOrLong(x);
- #if PY_MAJOR_VERSION < 3 
-            if (likely(v) && !PyLong_Check(v)) { 
-                PyObject *tmp = v; 
-                v = PyNumber_Long(tmp); 
-                Py_DECREF(tmp); 
-            } 
- #endif 
-            if (likely(v)) { 
-                int one = 1; int is_little = (int)*(unsigned char *)&one; 
-                unsigned char *bytes = (unsigned char *)&val; 
-                int ret = _PyLong_AsByteArray((PyLongObject *)v, 
-                                              bytes, sizeof(val), 
-                                              is_little, !is_unsigned); 
-                Py_DECREF(v); 
-                if (likely(!ret)) 
-                    return val; 
-            } 
-#endif 
-            return ({{TYPE}}) -1; 
-        } 
-    } else { 
-        {{TYPE}} val; 
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return ({{TYPE}}) -1;
+        }
+    } else {
+        {{TYPE}} val;
         PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
-        if (!tmp) return ({{TYPE}}) -1; 
-        val = {{FROM_PY_FUNCTION}}(tmp); 
-        Py_DECREF(tmp); 
-        return val; 
-    } 
- 
-raise_overflow: 
-    PyErr_SetString(PyExc_OverflowError, 
-        "value too large to convert to {{TYPE}}"); 
-    return ({{TYPE}}) -1; 
- 
-raise_neg_overflow: 
-    PyErr_SetString(PyExc_OverflowError, 
-        "can't convert negative value to {{TYPE}}"); 
-    return ({{TYPE}}) -1; 
-} 
+        if (!tmp) return ({{TYPE}}) -1;
+        val = {{FROM_PY_FUNCTION}}(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to {{TYPE}}");
+    return ({{TYPE}}) -1;
+
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to {{TYPE}}");
+    return ({{TYPE}}) -1;
+}
diff --git a/contrib/tools/cython/Cython/Utility/arrayarray.h b/contrib/tools/cython/Cython/Utility/arrayarray.h
index fa7fa58d16..a9e4923785 100644
--- a/contrib/tools/cython/Cython/Utility/arrayarray.h
+++ b/contrib/tools/cython/Cython/Utility/arrayarray.h
@@ -1,149 +1,149 @@
-/////////////// ArrayAPI.proto /////////////// 
- 
-// arrayarray.h 
-// 
-//    Artificial C-API for Python's <array.array> type, 
-//    used by array.pxd 
-// 
-//    last changes: 2009-05-15 rk 
-//                  2012-05-02 andreasvc 
-//                  (see revision control) 
-// 
- 
-#ifndef _ARRAYARRAY_H 
-#define _ARRAYARRAY_H 
- 
-// These two forward declarations are explicitly handled in the type 
-// declaration code, as including them here is too late for cython-defined 
-// types to use them. 
-// struct arrayobject; 
-// typedef struct arrayobject arrayobject; 
- 
-// All possible arraydescr values are defined in the vector "descriptors" 
-// below.  That's defined later because the appropriate get and set 
-// functions aren't visible yet. 
-typedef struct arraydescr { 
-    int typecode; 
-    int itemsize; 
-    PyObject * (*getitem)(struct arrayobject *, Py_ssize_t); 
-    int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *); 
-#if PY_MAJOR_VERSION >= 3 
-    char *formats; 
+/////////////// ArrayAPI.proto ///////////////
+
+// arrayarray.h
+//
+//    Artificial C-API for Python's <array.array> type,
+//    used by array.pxd
+//
+//    last changes: 2009-05-15 rk
+//                  2012-05-02 andreasvc
+//                  (see revision control)
+//
+
+#ifndef _ARRAYARRAY_H
+#define _ARRAYARRAY_H
+
+// These two forward declarations are explicitly handled in the type
+// declaration code, as including them here is too late for cython-defined
+// types to use them.
+// struct arrayobject;
+// typedef struct arrayobject arrayobject;
+
+// All possible arraydescr values are defined in the vector "descriptors"
+// below.  That's defined later because the appropriate get and set
+// functions aren't visible yet.
+typedef struct arraydescr {
+    int typecode;
+    int itemsize;
+    PyObject * (*getitem)(struct arrayobject *, Py_ssize_t);
+    int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *);
+#if PY_MAJOR_VERSION >= 3
+    char *formats;
 #endif
-} arraydescr; 
- 
- 
-struct arrayobject { 
-    PyObject_HEAD 
-    Py_ssize_t ob_size; 
-    union { 
-        char *ob_item; 
-        float *as_floats; 
-        double *as_doubles; 
-        int *as_ints; 
-        unsigned int *as_uints; 
-        unsigned char *as_uchars; 
-        signed char *as_schars; 
-        char *as_chars; 
-        unsigned long *as_ulongs; 
-        long *as_longs; 
+} arraydescr;
+
+
+struct arrayobject {
+    PyObject_HEAD
+    Py_ssize_t ob_size;
+    union {
+        char *ob_item;
+        float *as_floats;
+        double *as_doubles;
+        int *as_ints;
+        unsigned int *as_uints;
+        unsigned char *as_uchars;
+        signed char *as_schars;
+        char *as_chars;
+        unsigned long *as_ulongs;
+        long *as_longs;
 #if PY_MAJOR_VERSION >= 3
         unsigned long long *as_ulonglongs;
         long long *as_longlongs;
 #endif
-        short *as_shorts; 
-        unsigned short *as_ushorts; 
-        Py_UNICODE *as_pyunicodes; 
-        void *as_voidptr; 
-    } data; 
-    Py_ssize_t allocated; 
-    struct arraydescr *ob_descr; 
-    PyObject *weakreflist; /* List of weak references */ 
-#if PY_MAJOR_VERSION >= 3 
-        int ob_exports;  /* Number of exported buffers */ 
-#endif 
-}; 
- 
-#ifndef NO_NEWARRAY_INLINE 
-//  fast creation of a new array 
-static CYTHON_INLINE PyObject * newarrayobject(PyTypeObject *type, Py_ssize_t size, 
-    struct arraydescr *descr) { 
-    arrayobject *op; 
-    size_t nbytes; 
- 
-    if (size < 0) { 
-        PyErr_BadInternalCall(); 
-        return NULL; 
-    } 
- 
-    nbytes = size * descr->itemsize; 
-    // Check for overflow 
-    if (nbytes / descr->itemsize != (size_t)size) { 
-        return PyErr_NoMemory(); 
-    } 
-    op = (arrayobject *) type->tp_alloc(type, 0); 
-    if (op == NULL) { 
-        return NULL; 
-    } 
-    op->ob_descr = descr; 
-    op->allocated = size; 
-    op->weakreflist = NULL; 
+        short *as_shorts;
+        unsigned short *as_ushorts;
+        Py_UNICODE *as_pyunicodes;
+        void *as_voidptr;
+    } data;
+    Py_ssize_t allocated;
+    struct arraydescr *ob_descr;
+    PyObject *weakreflist; /* List of weak references */
+#if PY_MAJOR_VERSION >= 3
+        int ob_exports;  /* Number of exported buffers */
+#endif
+};
+
+#ifndef NO_NEWARRAY_INLINE
+//  fast creation of a new array
+static CYTHON_INLINE PyObject * newarrayobject(PyTypeObject *type, Py_ssize_t size,
+    struct arraydescr *descr) {
+    arrayobject *op;
+    size_t nbytes;
+
+    if (size < 0) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    nbytes = size * descr->itemsize;
+    // Check for overflow
+    if (nbytes / descr->itemsize != (size_t)size) {
+        return PyErr_NoMemory();
+    }
+    op = (arrayobject *) type->tp_alloc(type, 0);
+    if (op == NULL) {
+        return NULL;
+    }
+    op->ob_descr = descr;
+    op->allocated = size;
+    op->weakreflist = NULL;
     __Pyx_SET_SIZE(op, size);
-    if (size <= 0) { 
-        op->data.ob_item = NULL; 
-    } 
-    else { 
-        op->data.ob_item = PyMem_NEW(char, nbytes); 
-        if (op->data.ob_item == NULL) { 
-            Py_DECREF(op); 
-            return PyErr_NoMemory(); 
-        } 
-    } 
-    return (PyObject *) op; 
-} 
-#else 
-PyObject* newarrayobject(PyTypeObject *type, Py_ssize_t size, 
-    struct arraydescr *descr); 
-#endif /* ifndef NO_NEWARRAY_INLINE */ 
- 
-// fast resize (reallocation to the point) 
-// not designed for filing small increments (but for fast opaque array apps) 
-static CYTHON_INLINE int resize(arrayobject *self, Py_ssize_t n) { 
-    void *items = (void*) self->data.ob_item; 
-    PyMem_Resize(items, char, (size_t)(n * self->ob_descr->itemsize)); 
-    if (items == NULL) { 
-        PyErr_NoMemory(); 
-        return -1; 
+    if (size <= 0) {
+        op->data.ob_item = NULL;
+    }
+    else {
+        op->data.ob_item = PyMem_NEW(char, nbytes);
+        if (op->data.ob_item == NULL) {
+            Py_DECREF(op);
+            return PyErr_NoMemory();
+        }
     }
-    self->data.ob_item = (char*) items; 
+    return (PyObject *) op;
+}
+#else
+PyObject* newarrayobject(PyTypeObject *type, Py_ssize_t size,
+    struct arraydescr *descr);
+#endif /* ifndef NO_NEWARRAY_INLINE */
+
+// fast resize (reallocation to the point)
+// not designed for filing small increments (but for fast opaque array apps)
+static CYTHON_INLINE int resize(arrayobject *self, Py_ssize_t n) {
+    void *items = (void*) self->data.ob_item;
+    PyMem_Resize(items, char, (size_t)(n * self->ob_descr->itemsize));
+    if (items == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    self->data.ob_item = (char*) items;
     __Pyx_SET_SIZE(self, n);
-    self->allocated = n; 
-    return 0; 
-} 
- 
-// suitable for small increments; over allocation 50% ; 
-static CYTHON_INLINE int resize_smart(arrayobject *self, Py_ssize_t n) { 
-    void *items = (void*) self->data.ob_item; 
-    Py_ssize_t newsize; 
+    self->allocated = n;
+    return 0;
+}
+
+// suitable for small increments; over allocation 50% ;
+static CYTHON_INLINE int resize_smart(arrayobject *self, Py_ssize_t n) {
+    void *items = (void*) self->data.ob_item;
+    Py_ssize_t newsize;
     if (n < self->allocated && n*4 > self->allocated) {
         __Pyx_SET_SIZE(self, n);
         return 0;
-    } 
+    }
     newsize = n + (n / 2) + 1;
     if (newsize <= n) {   /* overflow */
         PyErr_NoMemory();
         return -1;
     }
-    PyMem_Resize(items, char, (size_t)(newsize * self->ob_descr->itemsize)); 
-    if (items == NULL) { 
-        PyErr_NoMemory(); 
-        return -1; 
+    PyMem_Resize(items, char, (size_t)(newsize * self->ob_descr->itemsize));
+    if (items == NULL) {
+        PyErr_NoMemory();
+        return -1;
     }
-    self->data.ob_item = (char*) items; 
+    self->data.ob_item = (char*) items;
     __Pyx_SET_SIZE(self, n);
-    self->allocated = newsize; 
-    return 0; 
-} 
- 
-#endif 
-/* _ARRAYARRAY_H */ 
+    self->allocated = newsize;
+    return 0;
+}
+
+#endif
+/* _ARRAYARRAY_H */
diff --git a/contrib/tools/cython/Cython/Utils.py b/contrib/tools/cython/Cython/Utils.py
index 5a3a5bb0b6..d59d67d78b 100644
--- a/contrib/tools/cython/Cython/Utils.py
+++ b/contrib/tools/cython/Cython/Utils.py
@@ -1,8 +1,8 @@
-# 
-#   Cython -- Things that don't belong 
-#            anywhere else in particular 
-# 
- 
+#
+#   Cython -- Things that don't belong
+#            anywhere else in particular
+#
+
 from __future__ import absolute_import
 
 try:
@@ -15,87 +15,87 @@ try:
 except NameError:
     FileNotFoundError = OSError
 
-import os 
-import sys 
-import re 
-import io 
-import codecs 
+import os
+import sys
+import re
+import io
+import codecs
 import shutil
 import tempfile
-from contextlib import contextmanager 
- 
-modification_time = os.path.getmtime 
- 
+from contextlib import contextmanager
+
+modification_time = os.path.getmtime
+
 _function_caches = []
 def clear_function_caches():
     for cache in _function_caches:
         cache.clear()
- 
-def cached_function(f): 
-    cache = {} 
+
+def cached_function(f):
+    cache = {}
     _function_caches.append(cache)
-    uncomputed = object() 
-    def wrapper(*args): 
-        res = cache.get(args, uncomputed) 
-        if res is uncomputed: 
-            res = cache[args] = f(*args) 
-        return res 
+    uncomputed = object()
+    def wrapper(*args):
+        res = cache.get(args, uncomputed)
+        if res is uncomputed:
+            res = cache[args] = f(*args)
+        return res
     wrapper.uncached = f
-    return wrapper 
- 
-def cached_method(f): 
-    cache_name = '__%s_cache' % f.__name__ 
-    def wrapper(self, *args): 
-        cache = getattr(self, cache_name, None) 
-        if cache is None: 
-            cache = {} 
-            setattr(self, cache_name, cache) 
-        if args in cache: 
-            return cache[args] 
-        res = cache[args] = f(self, *args) 
-        return res 
-    return wrapper 
- 
-def replace_suffix(path, newsuf): 
-    base, _ = os.path.splitext(path) 
-    return base + newsuf 
- 
- 
-def open_new_file(path): 
-    if os.path.exists(path): 
-        # Make sure to create a new file here so we can 
-        # safely hard link the output files. 
-        os.unlink(path) 
- 
-    # we use the ISO-8859-1 encoding here because we only write pure 
-    # ASCII strings or (e.g. for file names) byte encoded strings as 
-    # Unicode, so we need a direct mapping from the first 256 Unicode 
-    # characters to a byte sequence, which ISO-8859-1 provides 
- 
-    # note: can't use io.open() in Py2 as we may be writing str objects 
-    return codecs.open(path, "w", encoding="ISO-8859-1") 
- 
- 
-def castrate_file(path, st): 
-    #  Remove junk contents from an output file after a 
-    #  failed compilation. 
-    #  Also sets access and modification times back to 
-    #  those specified by st (a stat struct). 
-    try: 
-        f = open_new_file(path) 
-    except EnvironmentError: 
-        pass 
-    else: 
-        f.write( 
-            "#error Do not use this file, it is the result of a failed Cython compilation.\n") 
-        f.close() 
-        if st: 
-            os.utime(path, (st.st_atime, st.st_mtime-1)) 
- 
-def file_newer_than(path, time): 
-    ftime = modification_time(path) 
-    return ftime > time 
- 
+    return wrapper
+
+def cached_method(f):
+    cache_name = '__%s_cache' % f.__name__
+    def wrapper(self, *args):
+        cache = getattr(self, cache_name, None)
+        if cache is None:
+            cache = {}
+            setattr(self, cache_name, cache)
+        if args in cache:
+            return cache[args]
+        res = cache[args] = f(self, *args)
+        return res
+    return wrapper
+
+def replace_suffix(path, newsuf):
+    base, _ = os.path.splitext(path)
+    return base + newsuf
+
+
+def open_new_file(path):
+    if os.path.exists(path):
+        # Make sure to create a new file here so we can
+        # safely hard link the output files.
+        os.unlink(path)
+
+    # we use the ISO-8859-1 encoding here because we only write pure
+    # ASCII strings or (e.g. for file names) byte encoded strings as
+    # Unicode, so we need a direct mapping from the first 256 Unicode
+    # characters to a byte sequence, which ISO-8859-1 provides
+
+    # note: can't use io.open() in Py2 as we may be writing str objects
+    return codecs.open(path, "w", encoding="ISO-8859-1")
+
+
+def castrate_file(path, st):
+    #  Remove junk contents from an output file after a
+    #  failed compilation.
+    #  Also sets access and modification times back to
+    #  those specified by st (a stat struct).
+    try:
+        f = open_new_file(path)
+    except EnvironmentError:
+        pass
+    else:
+        f.write(
+            "#error Do not use this file, it is the result of a failed Cython compilation.\n")
+        f.close()
+        if st:
+            os.utime(path, (st.st_atime, st.st_mtime-1))
+
+def file_newer_than(path, time):
+    ftime = modification_time(path)
+    return ftime > time
+
 
 def safe_makedirs(path):
     try:
@@ -124,61 +124,61 @@ def copy_file_to_dir_if_newer(sourcefile, destdir):
     shutil.copy2(sourcefile, destfile)
 
 
-@cached_function 
-def find_root_package_dir(file_path): 
-    dir = os.path.dirname(file_path) 
-    if file_path == dir: 
-        return dir 
-    elif is_package_dir(dir): 
-        return find_root_package_dir(dir) 
-    else: 
-        return dir 
- 
-@cached_function 
-def check_package_dir(dir, package_names): 
-    for dirname in package_names: 
-        dir = os.path.join(dir, dirname) 
-        if not is_package_dir(dir): 
-            return None 
-    return dir 
- 
-@cached_function 
-def is_package_dir(dir_path): 
-    for filename in ("__init__.py", 
-                     "__init__.pyc", 
-                     "__init__.pyx", 
-                     "__init__.pxd"): 
-        path = os.path.join(dir_path, filename) 
-        if path_exists(path): 
-            return 1 
- 
-@cached_function 
-def path_exists(path): 
-    # try on the filesystem first 
-    if os.path.exists(path): 
-        return True 
-    # figure out if a PEP 302 loader is around 
-    try: 
-        loader = __loader__ 
-        # XXX the code below assumes a 'zipimport.zipimporter' instance 
-        # XXX should be easy to generalize, but too lazy right now to write it 
-        archive_path = getattr(loader, 'archive', None) 
-        if archive_path: 
-            normpath = os.path.normpath(path) 
-            if normpath.startswith(archive_path): 
-                arcname = normpath[len(archive_path)+1:] 
-                try: 
-                    loader.get_data(arcname) 
-                    return True 
-                except IOError: 
-                    return False 
-    except NameError: 
-        pass 
-    return False 
- 
-# file name encodings 
- 
-def decode_filename(filename): 
+@cached_function
+def find_root_package_dir(file_path):
+    dir = os.path.dirname(file_path)
+    if file_path == dir:
+        return dir
+    elif is_package_dir(dir):
+        return find_root_package_dir(dir)
+    else:
+        return dir
+
+@cached_function
+def check_package_dir(dir, package_names):
+    for dirname in package_names:
+        dir = os.path.join(dir, dirname)
+        if not is_package_dir(dir):
+            return None
+    return dir
+
+@cached_function
+def is_package_dir(dir_path):
+    for filename in ("__init__.py",
+                     "__init__.pyc",
+                     "__init__.pyx",
+                     "__init__.pxd"):
+        path = os.path.join(dir_path, filename)
+        if path_exists(path):
+            return 1
+
+@cached_function
+def path_exists(path):
+    # try on the filesystem first
+    if os.path.exists(path):
+        return True
+    # figure out if a PEP 302 loader is around
+    try:
+        loader = __loader__
+        # XXX the code below assumes a 'zipimport.zipimporter' instance
+        # XXX should be easy to generalize, but too lazy right now to write it
+        archive_path = getattr(loader, 'archive', None)
+        if archive_path:
+            normpath = os.path.normpath(path)
+            if normpath.startswith(archive_path):
+                arcname = normpath[len(archive_path)+1:]
+                try:
+                    loader.get_data(arcname)
+                    return True
+                except IOError:
+                    return False
+    except NameError:
+        pass
+    return False
+
+# file name encodings
+
+def decode_filename(filename):
     if isinstance(filename, bytes):
         try:
             filename_encoding = sys.getfilesystemencoding()
@@ -187,17 +187,17 @@ def decode_filename(filename):
             filename = filename.decode(filename_encoding)
         except UnicodeDecodeError:
             pass
-    return filename 
- 
-# support for source file encoding detection 
- 
+    return filename
+
+# support for source file encoding detection
+
 _match_file_encoding = re.compile(br"(\w*coding)[:=]\s*([-\w.]+)").search
- 
- 
-def detect_opened_file_encoding(f): 
-    # PEPs 263 and 3120 
+
+
+def detect_opened_file_encoding(f):
+    # PEPs 263 and 3120
     # Most of the time the first two lines fall in the first couple of hundred chars,
-    # and this bulk read/split is much faster. 
+    # and this bulk read/split is much faster.
     lines = ()
     start = b''
     while len(lines) < 3:
@@ -211,21 +211,21 @@ def detect_opened_file_encoding(f):
         return m.group(2).decode('iso8859-1')
     elif len(lines) > 1:
         m = _match_file_encoding(lines[1])
-        if m: 
+        if m:
             return m.group(2).decode('iso8859-1')
-    return "UTF-8" 
- 
- 
-def skip_bom(f): 
-    """ 
-    Read past a BOM at the beginning of a source file. 
-    This could be added to the scanner, but it's *substantially* easier 
-    to keep it at this level. 
-    """ 
-    if f.read(1) != u'\uFEFF': 
-        f.seek(0) 
- 
- 
+    return "UTF-8"
+
+
+def skip_bom(f):
+    """
+    Read past a BOM at the beginning of a source file.
+    This could be added to the scanner, but it's *substantially* easier
+    to keep it at this level.
+    """
+    if f.read(1) != u'\uFEFF':
+        f.seek(0)
+
+
 def open_source_file(source_filename, encoding=None, error_handling=None):
     stream = None
     try:
@@ -233,157 +233,157 @@ def open_source_file(source_filename, encoding=None, error_handling=None):
             # Most of the time the encoding is not specified, so try hard to open the file only once.
             f = io.open(source_filename, 'rb')
             encoding = detect_opened_file_encoding(f)
-            f.seek(0) 
+            f.seek(0)
             stream = io.TextIOWrapper(f, encoding=encoding, errors=error_handling)
-        else: 
+        else:
             stream = io.open(source_filename, encoding=encoding, errors=error_handling)
- 
+
     except OSError:
         if os.path.exists(source_filename):
             raise  # File is there, but something went wrong reading from it.
         # Allow source files to be in zip files etc.
-        try: 
-            loader = __loader__ 
-            if source_filename.startswith(loader.archive): 
+        try:
+            loader = __loader__
+            if source_filename.startswith(loader.archive):
                 stream = open_source_from_loader(
-                    loader, source_filename, 
+                    loader, source_filename,
                     encoding, error_handling)
-        except (NameError, AttributeError): 
-            pass 
- 
+        except (NameError, AttributeError):
+            pass
+
     if stream is None:
         raise FileNotFoundError(source_filename)
-    skip_bom(stream) 
-    return stream 
- 
- 
-def open_source_from_loader(loader, 
-                            source_filename, 
+    skip_bom(stream)
+    return stream
+
+
+def open_source_from_loader(loader,
+                            source_filename,
                             encoding=None, error_handling=None):
-    nrmpath = os.path.normpath(source_filename) 
-    arcname = nrmpath[len(loader.archive)+1:] 
-    data = loader.get_data(arcname) 
-    return io.TextIOWrapper(io.BytesIO(data), 
-                            encoding=encoding, 
-                            errors=error_handling) 
- 
- 
-def str_to_number(value): 
-    # note: this expects a string as input that was accepted by the 
+    nrmpath = os.path.normpath(source_filename)
+    arcname = nrmpath[len(loader.archive)+1:]
+    data = loader.get_data(arcname)
+    return io.TextIOWrapper(io.BytesIO(data),
+                            encoding=encoding,
+                            errors=error_handling)
+
+
+def str_to_number(value):
+    # note: this expects a string as input that was accepted by the
     # parser already, with an optional "-" sign in front
     is_neg = False
     if value[:1] == '-':
         is_neg = True
         value = value[1:]
-    if len(value) < 2: 
-        value = int(value, 0) 
-    elif value[0] == '0': 
+    if len(value) < 2:
+        value = int(value, 0)
+    elif value[0] == '0':
         literal_type = value[1]  # 0'o' - 0'b' - 0'x'
         if literal_type in 'xX':
-            # hex notation ('0x1AF') 
-            value = int(value[2:], 16) 
+            # hex notation ('0x1AF')
+            value = int(value[2:], 16)
         elif literal_type in 'oO':
-            # Py3 octal notation ('0o136') 
-            value = int(value[2:], 8) 
+            # Py3 octal notation ('0o136')
+            value = int(value[2:], 8)
         elif literal_type in 'bB':
-            # Py3 binary notation ('0b101') 
-            value = int(value[2:], 2) 
-        else: 
-            # Py2 octal notation ('0136') 
-            value = int(value, 8) 
-    else: 
-        value = int(value, 0) 
+            # Py3 binary notation ('0b101')
+            value = int(value[2:], 2)
+        else:
+            # Py2 octal notation ('0136')
+            value = int(value, 8)
+    else:
+        value = int(value, 0)
     return -value if is_neg else value
- 
- 
-def long_literal(value): 
-    if isinstance(value, basestring): 
-        value = str_to_number(value) 
-    return not -2**31 <= value < 2**31 
- 
- 
-@cached_function 
-def get_cython_cache_dir(): 
+
+
+def long_literal(value):
+    if isinstance(value, basestring):
+        value = str_to_number(value)
+    return not -2**31 <= value < 2**31
+
+
+@cached_function
+def get_cython_cache_dir():
     r"""
     Return the base directory containing Cython's caches.
- 
-    Priority: 
- 
-    1. CYTHON_CACHE_DIR 
-    2. (OS X): ~/Library/Caches/Cython 
-       (posix not OS X): XDG_CACHE_HOME/cython if XDG_CACHE_HOME defined 
-    3. ~/.cython 
- 
-    """ 
-    if 'CYTHON_CACHE_DIR' in os.environ: 
-        return os.environ['CYTHON_CACHE_DIR'] 
- 
-    parent = None 
-    if os.name == 'posix': 
-        if sys.platform == 'darwin': 
-            parent = os.path.expanduser('~/Library/Caches') 
-        else: 
-            # this could fallback on ~/.cache 
-            parent = os.environ.get('XDG_CACHE_HOME') 
- 
-    if parent and os.path.isdir(parent): 
-        return os.path.join(parent, 'cython') 
- 
-    # last fallback: ~/.cython 
-    return os.path.expanduser(os.path.join('~', '.cython')) 
- 
- 
-@contextmanager 
-def captured_fd(stream=2, encoding=None): 
-    orig_stream = os.dup(stream)  # keep copy of original stream 
-    try: 
+
+    Priority:
+
+    1. CYTHON_CACHE_DIR
+    2. (OS X): ~/Library/Caches/Cython
+       (posix not OS X): XDG_CACHE_HOME/cython if XDG_CACHE_HOME defined
+    3. ~/.cython
+
+    """
+    if 'CYTHON_CACHE_DIR' in os.environ:
+        return os.environ['CYTHON_CACHE_DIR']
+
+    parent = None
+    if os.name == 'posix':
+        if sys.platform == 'darwin':
+            parent = os.path.expanduser('~/Library/Caches')
+        else:
+            # this could fallback on ~/.cache
+            parent = os.environ.get('XDG_CACHE_HOME')
+
+    if parent and os.path.isdir(parent):
+        return os.path.join(parent, 'cython')
+
+    # last fallback: ~/.cython
+    return os.path.expanduser(os.path.join('~', '.cython'))
+
+
+@contextmanager
+def captured_fd(stream=2, encoding=None):
+    orig_stream = os.dup(stream)  # keep copy of original stream
+    try:
         with tempfile.TemporaryFile(mode="a+b") as temp_file:
             def read_output(_output=[b'']):
                 if not temp_file.closed:
                     temp_file.seek(0)
                     _output[0] = temp_file.read()
                 return _output[0]
- 
+
             os.dup2(temp_file.fileno(), stream)  # replace stream by copy of pipe
             try:
                 def get_output():
                     result = read_output()
                     return result.decode(encoding) if encoding else result
- 
+
                 yield get_output
             finally:
                 os.dup2(orig_stream, stream)  # restore original stream
                 read_output()  # keep the output in case it's used after closing the context manager
-    finally: 
-        os.close(orig_stream) 
- 
- 
+    finally:
+        os.close(orig_stream)
+
+
 def print_bytes(s, header_text=None, end=b'\n', file=sys.stdout, flush=True):
     if header_text:
         file.write(header_text)  # note: text! => file.write() instead of out.write()
-    file.flush() 
-    try: 
-        out = file.buffer  # Py3 
-    except AttributeError: 
-        out = file         # Py2 
-    out.write(s) 
-    if end: 
-        out.write(end) 
-    if flush: 
-        out.flush() 
- 
-class LazyStr: 
-    def __init__(self, callback): 
-        self.callback = callback 
-    def __str__(self): 
-        return self.callback() 
-    def __repr__(self): 
-        return self.callback() 
-    def __add__(self, right): 
-        return self.callback() + right 
-    def __radd__(self, left): 
-        return left + self.callback() 
- 
+    file.flush()
+    try:
+        out = file.buffer  # Py3
+    except AttributeError:
+        out = file         # Py2
+    out.write(s)
+    if end:
+        out.write(end)
+    if flush:
+        out.flush()
+
+class LazyStr:
+    def __init__(self, callback):
+        self.callback = callback
+    def __str__(self):
+        return self.callback()
+    def __repr__(self):
+        return self.callback()
+    def __add__(self, right):
+        return self.callback() + right
+    def __radd__(self, left):
+        return left + self.callback()
+
 
 class OrderedSet(object):
   def __init__(self, elements=()):
diff --git a/contrib/tools/cython/Cython/__init__.py b/contrib/tools/cython/Cython/__init__.py
index 38b0f821bf..549246b8a3 100644
--- a/contrib/tools/cython/Cython/__init__.py
+++ b/contrib/tools/cython/Cython/__init__.py
@@ -1,12 +1,12 @@
 from __future__ import absolute_import
- 
+
 from .Shadow import __version__
 
-# Void cython.* directives (for case insensitive operating systems). 
+# Void cython.* directives (for case insensitive operating systems).
 from .Shadow import *
- 
- 
-def load_ipython_extension(ip): 
-    """Load the extension in IPython.""" 
+
+
+def load_ipython_extension(ip):
+    """Load the extension in IPython."""
     from .Build.IpythonMagic import CythonMagics  # pylint: disable=cyclic-import
-    ip.register_magics(CythonMagics) 
+    ip.register_magics(CythonMagics)
diff --git a/contrib/tools/cython/cygdb.py b/contrib/tools/cython/cygdb.py
index 6269011dbd..7f2d57f5d6 100755
--- a/contrib/tools/cython/cygdb.py
+++ b/contrib/tools/cython/cygdb.py
@@ -1,8 +1,8 @@
-#!/usr/bin/env python 
- 
-import sys 
- 
-from Cython.Debugger import Cygdb as cygdb 
- 
-if __name__ == '__main__': 
-    cygdb.main() 
+#!/usr/bin/env python
+
+import sys
+
+from Cython.Debugger import Cygdb as cygdb
+
+if __name__ == '__main__':
+    cygdb.main()
diff --git a/contrib/tools/cython/cython.py b/contrib/tools/cython/cython.py
index aef94b40f7..f4f1486ecf 100755
--- a/contrib/tools/cython/cython.py
+++ b/contrib/tools/cython/cython.py
@@ -1,27 +1,27 @@
-#!/usr/bin/env python 
- 
+#!/usr/bin/env python
+
 # Change content of this file to change uids for cython programs - cython 0.29.27 r0
 
-# 
-#   Cython -- Main Program, generic 
-# 
- 
-if __name__ == '__main__': 
- 
-    import os 
-    import sys 
-    sys.dont_write_bytecode = True 
- 
-    # Make sure we import the right Cython 
-    cythonpath, _ = os.path.split(os.path.realpath(__file__)) 
-    sys.path.insert(0, cythonpath) 
- 
-    from Cython.Compiler.Main import main 
-    main(command_line = 1) 
- 
-else: 
-    # Void cython.* directives. 
-    from Cython.Shadow import * 
-    ## and bring in the __version__ 
-    from Cython import __version__ 
-    from Cython import load_ipython_extension 
+#
+#   Cython -- Main Program, generic
+#
+
+if __name__ == '__main__':
+
+    import os
+    import sys
+    sys.dont_write_bytecode = True
+
+    # Make sure we import the right Cython
+    cythonpath, _ = os.path.split(os.path.realpath(__file__))
+    sys.path.insert(0, cythonpath)
+
+    from Cython.Compiler.Main import main
+    main(command_line = 1)
+
+else:
+    # Void cython.* directives.
+    from Cython.Shadow import *
+    ## and bring in the __version__
+    from Cython import __version__
+    from Cython import load_ipython_extension
diff --git a/contrib/tools/protoc/ya.make b/contrib/tools/protoc/ya.make
index 76a072ee24..81244e1259 100644
--- a/contrib/tools/protoc/ya.make
+++ b/contrib/tools/protoc/ya.make
@@ -11,7 +11,7 @@ ENDIF()
 IF (NOT PREBUILT)
     INCLUDE(${ARCADIA_ROOT}/contrib/tools/protoc/bin/ya.make)
 ENDIF()
- 
+
 RECURSE(
     bin
     plugins
diff --git a/contrib/tools/python/pyconfig.inc b/contrib/tools/python/pyconfig.inc
index b7ea0b273a..01da7d4566 100644
--- a/contrib/tools/python/pyconfig.inc
+++ b/contrib/tools/python/pyconfig.inc
@@ -1,9 +1,9 @@
-NO_COMPILER_WARNINGS() 
- 
+NO_COMPILER_WARNINGS()
+
 ENABLE(USE_ARCADIA_PYTHON)
 
 SET(PYTHON_DIR "contrib/tools/python")
-SET(PYTHON_SRC_DIR "${PYTHON_DIR}/src") 
+SET(PYTHON_SRC_DIR "${PYTHON_DIR}/src")
 
 SET(PYTHON_ROOT "${ARCADIA_ROOT}/${PYTHON_DIR}")
 SET(PYTHON_SRC_ROOT "${ARCADIA_ROOT}/${PYTHON_SRC_DIR}")
@@ -16,33 +16,33 @@ CFLAGS(
     -DUNQUOTED_VERSION=2.7
 )
 
-IF (NOT WIN32) 
+IF (NOT WIN32)
     EXTRALIBS(-lutil)
 
-    IF (LINUX) 
+    IF (LINUX)
         EXTRALIBS(-ldl)
 
         CFLAGS(
             -DUNQUOTED_PYTHONPATH=:plat-linux2
             -DUNQUOTED_PLATFORM=linux2
         )
-    ELSEIF (FREEBSD) 
+    ELSEIF (FREEBSD)
         CFLAGS(
             -DUNQUOTED_PYTHONPATH=:plat-freebsd${FREEBSD_VER}
             -DUNQUOTED_PLATFORM=freebsd${FREEBSD_VER}
         )
-    ELSEIF (DARWIN) 
-        CFLAGS( 
+    ELSEIF (DARWIN)
+        CFLAGS(
             -DUNQUOTED_PYTHONPATH=:plat-darwin
             -DUNQUOTED_PLATFORM=darwin
-        ) 
+        )
         LDFLAGS(
            -framework SystemConfiguration
            -framework CoreFoundation
         )
-    ENDIF () 
-ENDIF () 
- 
-IF (MSVC) 
-    CFLAGS(/DXMLCALL= /DXMLIMPORT=) 
-ENDIF () 
+    ENDIF ()
+ENDIF ()
+
+IF (MSVC)
+    CFLAGS(/DXMLCALL= /DXMLIMPORT=)
+ENDIF ()
diff --git a/contrib/tools/python/src/Include/pyport.h b/contrib/tools/python/src/Include/pyport.h
index 8e273b9576..ad2ff5929a 100644
--- a/contrib/tools/python/src/Include/pyport.h
+++ b/contrib/tools/python/src/Include/pyport.h
@@ -759,7 +759,7 @@ extern int fdatasync(int);
   BeOS and cygwin are the only other autoconf platform requiring special
   linkage handling and both of these use __declspec().
 */
-#if defined(__BEOS__) 
+#if defined(__BEOS__)
 #       define HAVE_DECLSPEC_DLL
 #endif
 
@@ -774,7 +774,7 @@ extern int fdatasync(int);
 #endif
 
 /* only get special linkage if built as shared or platform is Cygwin */
-#if defined(Py_ENABLE_SHARED) 
+#if defined(Py_ENABLE_SHARED)
 #       if defined(HAVE_DECLSPEC_DLL)
 #               ifdef Py_BUILD_CORE
 #                       define PyAPI_FUNC(RTYPE) __declspec(dllexport) RTYPE
diff --git a/contrib/tools/python/src/config_init.c b/contrib/tools/python/src/config_init.c
index 97a05a9893..2fa47000b8 100644
--- a/contrib/tools/python/src/config_init.c
+++ b/contrib/tools/python/src/config_init.c
@@ -58,10 +58,10 @@ extern void init_scproxy(void);
 #endif
 #endif
 
-#ifdef _CYGWIN_ 
-extern void init_multiprocessing(void); 
-#endif 
- 
+#ifdef _CYGWIN_
+extern void init_multiprocessing(void);
+#endif
+
 #ifdef _UNIX_
 extern void init_socket(void);
 extern void initcrypt(void);
@@ -82,8 +82,8 @@ extern void init_socket(void);
 extern void initnt(void);
 extern void initpyexpat(void);
 extern void initselect(void);
-extern void initmsvcrt(void); 
-extern void init_subprocess(void); 
+extern void initmsvcrt(void);
+extern void init_subprocess(void);
 extern void init_winreg(void);
 #endif
 
diff --git a/contrib/tools/python/src/config_map.c b/contrib/tools/python/src/config_map.c
index 4463fc44af..6bfe125ddd 100644
--- a/contrib/tools/python/src/config_map.c
+++ b/contrib/tools/python/src/config_map.c
@@ -58,10 +58,10 @@
 #endif
 #endif
 
-#ifdef _CYGWIN_ 
-{"_multiprocessing", init_multiprocessing}, 
-#endif 
- 
+#ifdef _CYGWIN_
+{"_multiprocessing", init_multiprocessing},
+#endif
+
 #ifdef _UNIX_
 {"_socket", init_socket},
 {"crypt", initcrypt},
@@ -82,8 +82,8 @@
 {"nt", initnt},
 {"pyexpat", initpyexpat},
 {"select", initselect},
-{"msvcrt", initmsvcrt}, 
-{"_subprocess", init_subprocess}, 
+{"msvcrt", initmsvcrt},
+{"_subprocess", init_subprocess},
 {"_winreg", init_winreg},
 #endif
 
diff --git a/contrib/tools/python/ya.make b/contrib/tools/python/ya.make
index 5697d4ea77..fc95aaca64 100644
--- a/contrib/tools/python/ya.make
+++ b/contrib/tools/python/ya.make
@@ -1,17 +1,17 @@
 PROGRAM(python)
 
 OWNER(g:contrib orivej)
- 
+
 LICENSE(PSF-2.0)
 
 VERSION(2.7.16)
 
 ORIGINAL_SOURCE(https://github.com/python/cpython)
 
-PEERDIR( 
-    contrib/tools/python/libpython 
+PEERDIR(
+    contrib/tools/python/libpython
     contrib/tools/python/src/Modules/_sqlite
-) 
+)
 
 END()
 
diff --git a/contrib/tools/ragel6/cdcodegen.cpp b/contrib/tools/ragel6/cdcodegen.cpp
index 3ab82b9c20..9e784e3f58 100644
--- a/contrib/tools/ragel6/cdcodegen.cpp
+++ b/contrib/tools/ragel6/cdcodegen.cpp
@@ -654,20 +654,20 @@ string FsmCodeGen::WIDE_ALPH_TYPE()
 void FsmCodeGen::STATE_IDS()
 {
 	if ( redFsm->startState != 0 )
-		STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << "};\n"; 
+		STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << "};\n";
 
 	if ( !noFinal )
-		STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << "};\n"; 
+		STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << "};\n";
 
 	if ( !noError )
-		STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << "};\n"; 
+		STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << "};\n";
 
 	out << "\n";
 
 	if ( !noEntry && entryPointNames.length() > 0 ) {
 		for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
 			STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) << 
-					" = " << entryPointIds[en.pos()] << "};\n"; 
+					" = " << entryPointIds[en.pos()] << "};\n";
 		}
 		out << "\n";
 	}
@@ -719,7 +719,7 @@ std::ostream &CCodeGen::CLOSE_ARRAY()
 
 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
 {
-	out << "enum {" << name; 
+	out << "enum {" << name;
 	return out;
 }
 
diff --git a/contrib/tools/ragel6/javacodegen.cpp b/contrib/tools/ragel6/javacodegen.cpp
index 75f119aeb9..99fbbbcf4b 100644
--- a/contrib/tools/ragel6/javacodegen.cpp
+++ b/contrib/tools/ragel6/javacodegen.cpp
@@ -1681,8 +1681,8 @@ ostream &JavaTabCodeGen::source_error( const InputLoc &loc )
 	return cerr;
 }
 
-#undef _resume 
-#undef _again 
-#undef _eof_trans 
-#undef _test_eof 
-#undef _out 
+#undef _resume
+#undef _again
+#undef _eof_trans
+#undef _test_eof
+#undef _out
diff --git a/contrib/tools/ragel6/rbxgoto.cpp b/contrib/tools/ragel6/rbxgoto.cpp
index 7baab974e3..932fdf35f2 100644
--- a/contrib/tools/ragel6/rbxgoto.cpp
+++ b/contrib/tools/ragel6/rbxgoto.cpp
@@ -658,7 +658,7 @@ void RbxGotoCodeGen::BREAK( ostream &ret, int targState )
 	out <<
 		"	begin\n"
 		"		" << P() << " += 1\n"
-		"		"; rbxGoto(ret, "_out") << "\n"  
+		"		"; rbxGoto(ret, "_out") << "\n" 
 		"	end\n";
 }
 
diff --git a/contrib/tools/ragel6/rlscan.cpp b/contrib/tools/ragel6/rlscan.cpp
index 07bfd4d099..5e2fc36645 100644
--- a/contrib/tools/ragel6/rlscan.cpp
+++ b/contrib/tools/ragel6/rlscan.cpp
@@ -159,7 +159,7 @@ tr8:
 	{ tok_te = p;p--;}
 	goto st2;
 st2:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{ tok_ts = 0;}
 	if ( ++p == pe )
 		goto _test_eof2;
@@ -871,7 +871,7 @@ char **Scanner::makeIncludePathChecks( const char *thisFileName,
 ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
 {
 	char **check = pathChecks;
-	ifstream *inFile = new ifstream; 
+	ifstream *inFile = new ifstream;
 	
 	while ( *check != 0 ) {
 		inFile->open( *check );
@@ -893,7 +893,7 @@ ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
 	}
 
 	found = -1;
-	delete inFile; 
+	delete inFile;
 	return 0;
 }
 
@@ -1269,7 +1269,7 @@ _again:
 
 	if ( ++p == pe )
 		goto _test_eof;
-_resume: 
+_resume:
 	switch ( cs )
 	{
 tr0:
@@ -1333,11 +1333,11 @@ tr87:
 	{te = p;p--;{ pass( IMP_UInt, ts, te ); }}
 	goto st38;
 tr88:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{	switch( act ) {
 	case 176:
 	{{p = ((te))-1;} pass( IMP_Define, 0, 0 ); }
-	break; 
+	break;
 	case 177:
 	{{p = ((te))-1;} pass( IMP_Word, ts, te ); }
 	break;
@@ -1398,7 +1398,7 @@ case 39:
 	}
 	goto tr82;
 tr75:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 	goto st40;
 st40:
@@ -1668,7 +1668,7 @@ tr14:
 	goto st52;
 tr17:
 #line 716 "rlscan.rl"
-	{te = p+1;{ token( IL_Literal, ts, te ); }} 
+	{te = p+1;{ token( IL_Literal, ts, te ); }}
 	goto st52;
 tr20:
 #line 641 "rlscan.rl"
@@ -1760,7 +1760,7 @@ tr118:
 	{te = p+1;{ token( TK_NameSep, ts, te ); }}
 	goto st52;
 tr119:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{	switch( act ) {
 	case 1:
 	{{p = ((te))-1;} token( KW_PChar ); }
@@ -2829,7 +2829,7 @@ tr186:
 	{te = p+1;{ token( TK_NameSep, ts, te ); }}
 	goto st95;
 tr187:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{	switch( act ) {
 	case 27:
 	{{p = ((te))-1;} token( KW_PChar ); }
@@ -3840,11 +3840,11 @@ tr245:
 	goto st137;
 tr246:
 #line 881 "rlscan.rl"
-	{te = p+1;{ token( RE_Char, '\t' ); }} 
+	{te = p+1;{ token( RE_Char, '\t' ); }}
 	goto st137;
 tr247:
 #line 883 "rlscan.rl"
-	{te = p+1;{ token( RE_Char, '\v' ); }} 
+	{te = p+1;{ token( RE_Char, '\v' ); }}
 	goto st137;
 st137:
 #line 1 "NONE"
@@ -4026,7 +4026,7 @@ tr275:
 	goto st143;
 tr276:
 #line 940 "rlscan.rl"
-	{te = p;p--;{ token( TK_Word, ts, te ); }} 
+	{te = p;p--;{ token( TK_Word, ts, te ); }}
 	goto st143;
 st143:
 #line 1 "NONE"
@@ -4281,7 +4281,7 @@ tr348:
 	goto st146;
 tr349:
 #line 1063 "rlscan.rl"
-	{te = p;p--;{ token( TK_Middle ); }} 
+	{te = p;p--;{ token( TK_Middle ); }}
 	goto st146;
 tr350:
 #line 1052 "rlscan.rl"
@@ -4352,7 +4352,7 @@ tr366:
 	{te = p+1;{ token( TK_NotFinalToState ); }}
 	goto st146;
 tr367:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{	switch( act ) {
 	case 88:
 	{{p = ((te))-1;} token( KW_Machine ); }
@@ -4461,7 +4461,7 @@ tr461:
 	{te = p+1;{ token( TK_BarStar ); }}
 	goto st146;
 st146:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{ts = 0;}
 	if ( ++p == pe )
 		goto _test_eof146;
@@ -4685,7 +4685,7 @@ case 156:
 		goto tr333;
 	goto tr315;
 tr290:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 	goto st157;
 st157:
@@ -4807,127 +4807,127 @@ case 166:
 	}
 	goto tr315;
 tr297:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 1002 "rlscan.rl"
 	{act = 108;}
 	goto st167;
 tr377:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 975 "rlscan.rl"
 	{act = 97;}
 	goto st167;
 tr380:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 959 "rlscan.rl"
 	{act = 92;}
 	goto st167;
 tr386:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 960 "rlscan.rl"
 	{act = 93;}
 	goto st167;
 tr390:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 994 "rlscan.rl"
 	{act = 102;}
 	goto st167;
 tr391:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 995 "rlscan.rl"
 	{act = 103;}
 	goto st167;
 tr395:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 999 "rlscan.rl"
 	{act = 107;}
 	goto st167;
 tr398:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 998 "rlscan.rl"
 	{act = 106;}
 	goto st167;
 tr403:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 967 "rlscan.rl"
 	{act = 96;}
 	goto st167;
 tr409:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 954 "rlscan.rl"
 	{act = 90;}
 	goto st167;
 tr415:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 953 "rlscan.rl"
 	{act = 89;}
 	goto st167;
 tr418:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 992 "rlscan.rl"
 	{act = 100;}
 	goto st167;
 tr421:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 996 "rlscan.rl"
 	{act = 104;}
 	goto st167;
 tr427:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 952 "rlscan.rl"
 	{act = 88;}
 	goto st167;
 tr433:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 993 "rlscan.rl"
 	{act = 101;}
 	goto st167;
 tr440:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 962 "rlscan.rl"
 	{act = 95;}
 	goto st167;
 tr445:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 961 "rlscan.rl"
 	{act = 94;}
 	goto st167;
 tr446:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 997 "rlscan.rl"
 	{act = 105;}
 	goto st167;
 tr453:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 983 "rlscan.rl"
 	{act = 98;}
 	goto st167;
 tr457:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 991 "rlscan.rl"
 	{act = 99;}
 	goto st167;
 tr460:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 #line 955 "rlscan.rl"
 	{act = 91;}
@@ -6365,7 +6365,7 @@ case 251:
 		goto tr461;
 	goto tr315;
 tr313:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 	goto st252;
 st252:
@@ -6444,12 +6444,12 @@ tr480:
 	{te = p;p--;{ pass( IMP_Word, ts, te ); }}
 	goto st253;
 st253:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{ts = 0;}
 	if ( ++p == pe )
 		goto _test_eof253;
 case 253:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{ts = p;}
 #line 6453 "rlscan.cpp"
 	switch( (*p) ) {
@@ -6493,7 +6493,7 @@ case 254:
 	}
 	goto tr474;
 tr467:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 	goto st255;
 st255:
@@ -6534,7 +6534,7 @@ case 32:
 		goto tr60;
 	goto st31;
 tr468:
-#line 1 "NONE" 
+#line 1 "NONE"
 	{te = p+1;}
 	goto st256;
 st256:
@@ -6567,8 +6567,8 @@ case 258:
 		goto tr478;
 	goto tr477;
 tr470:
-#line 1 "NONE" 
-	{te = p+1;} 
+#line 1 "NONE"
+	{te = p+1;}
 	goto st259;
 st259:
 	if ( ++p == pe )
@@ -6608,8 +6608,8 @@ case 35:
 		goto tr66;
 	goto st34;
 tr471:
-#line 1 "NONE" 
-	{te = p+1;} 
+#line 1 "NONE"
+	{te = p+1;}
 	goto st260;
 st260:
 	if ( ++p == pe )
diff --git a/contrib/tools/ragel6/rlscan.h b/contrib/tools/ragel6/rlscan.h
index 0487db5293..2aa6ae3914 100644
--- a/contrib/tools/ragel6/rlscan.h
+++ b/contrib/tools/ragel6/rlscan.h
@@ -33,7 +33,7 @@
 using std::istream;
 using std::ostream;
 
-extern const char *Parser_lelNames[]; 
+extern const char *Parser_lelNames[];
 
 struct Scanner
 {
diff --git a/contrib/tools/ya.make b/contrib/tools/ya.make
index e8efe70901..750911c587 100644
--- a/contrib/tools/ya.make
+++ b/contrib/tools/ya.make
@@ -1,6 +1,6 @@
 OWNER(g:contrib)
 
-RECURSE( 
+RECURSE(
     bdb
     bison
     chromaprinter
@@ -10,7 +10,7 @@ RECURSE(
     flatc64
     flex
     flex-old
-    flex-old/fl 
+    flex-old/fl
     fluent-bit
     fusermount
     gperf
@@ -54,7 +54,7 @@ RECURSE(
     jdk
     jdk/test
     xmllint
-) 
+)
 
 IF (NOT OS_WINDOWS)
     RECURSE(
diff --git a/contrib/tools/yasm/bin/ya.make b/contrib/tools/yasm/bin/ya.make
index 24baf355b2..96ca75c53e 100644
--- a/contrib/tools/yasm/bin/ya.make
+++ b/contrib/tools/yasm/bin/ya.make
@@ -6,16 +6,16 @@ OWNER(
 
 PROGRAM(yasm)
 
-IF (MUSL) 
-    PEERDIR(contrib/libs/musl_extra) 
-    PEERDIR(contrib/libs/jemalloc) 
-    DISABLE(USE_ASMLIB) 
-    NO_RUNTIME() 
-    ENABLE(MUSL_LITE) 
-ELSE() 
-    NO_PLATFORM() 
-ENDIF() 
- 
+IF (MUSL)
+    PEERDIR(contrib/libs/musl_extra)
+    PEERDIR(contrib/libs/jemalloc)
+    DISABLE(USE_ASMLIB)
+    NO_RUNTIME()
+    ENABLE(MUSL_LITE)
+ELSE()
+    NO_PLATFORM()
+ENDIF()
+
 NO_CLANG_COVERAGE()
 NO_COMPILER_WARNINGS()
 NO_UTIL()
diff --git a/contrib/tools/yasm/util.h b/contrib/tools/yasm/util.h
index ca9c48eeef..4174648a22 100644
--- a/contrib/tools/yasm/util.h
+++ b/contrib/tools/yasm/util.h
@@ -64,11 +64,11 @@
 #include <strings.h>
 #endif
 
-#if !defined(_musl_) 
+#if !defined(_musl_)
 #if __linux__ && __x86_64__
 __asm__(".symver memcpy,memcpy@GLIBC_2.2.5");
 #endif
-#endif 
+#endif
 
 #include <libyasm-stdint.h>
 #include <libyasm/coretype.h>
diff --git a/contrib/ya.make b/contrib/ya.make
index 528ab124fb..4af7600532 100644
--- a/contrib/ya.make
+++ b/contrib/ya.make
@@ -1,4 +1,4 @@
-RECURSE( 
+RECURSE(
     clickhouse
     deprecated
     go
@@ -6,9 +6,9 @@ RECURSE(
     libs
     nginx
     node_modules
-    phantom/pd/ssl 
+    phantom/pd/ssl
     python
     restricted
     tests
     tools
-) 
+)
author	Anton Samokhvalov <pg83@yandex.ru>	2022-02-10 16:45:17 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:17 +0300
commit	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
tree	dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib
parent	72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
download	ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz